aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt167
-rw-r--r--src/cmake/VPPConfig.cmake2
-rw-r--r--src/cmake/api.cmake8
-rw-r--r--src/cmake/ccache.cmake4
-rw-r--r--src/cmake/cpu.cmake129
-rw-r--r--src/cmake/library.cmake60
-rw-r--r--src/cmake/pack.cmake5
-rw-r--r--src/cmake/platform/octeon10.cmake4
-rw-r--r--src/cmake/plugin.cmake12
-rw-r--r--src/examples/handoffdemo/README.md186
-rw-r--r--src/examples/handoffdemo/handoffdemo.c2
-rw-r--r--src/examples/handoffdemo/handoffdemo.rst194
-rw-r--r--src/examples/handoffdemo/node.c11
-rw-r--r--src/examples/sample-plugin/sample/node.c37
-rw-r--r--src/examples/sample-plugin/sample/sample.c2
-rw-r--r--src/examples/sample-plugin/sample_plugin_doc.md66
-rw-r--r--src/examples/sample-plugin/sample_plugin_doc.rst97
-rw-r--r--src/examples/srv6-sample-localsid/srv6_localsid_sample.c3
-rw-r--r--src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.md30
-rw-r--r--src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.rst66
-rw-r--r--src/examples/vlib/elog_samples.c2
-rw-r--r--src/examples/vlib/main_stub.c14
-rw-r--r--src/examples/vlib/mc_test.c10
-rw-r--r--src/pkg/CMakeLists.txt19
-rw-r--r--src/pkg/debian/control.in14
-rw-r--r--[-rwxr-xr-x]src/pkg/debian/rules.in9
-rw-r--r--src/plugins/CMakeLists.txt22
-rw-r--r--src/plugins/abf/FEATURE.yaml7
-rw-r--r--src/plugins/abf/abf.api2
-rw-r--r--src/plugins/abf/abf_api.c27
-rw-r--r--src/plugins/abf/abf_itf_attach.c23
-rw-r--r--src/plugins/abf/abf_policy.c102
-rw-r--r--src/plugins/acl/CMakeLists.txt11
-rw-r--r--src/plugins/acl/acl.api42
-rw-r--r--src/plugins/acl/acl.c162
-rw-r--r--src/plugins/acl/acl_hash_lookup_doc.md240
-rw-r--r--src/plugins/acl/acl_hash_lookup_doc.rst243
-rw-r--r--src/plugins/acl/acl_lookup_context.md125
-rw-r--r--src/plugins/acl/acl_lookup_context.rst138
-rw-r--r--src/plugins/acl/acl_multicore_doc.md349
-rw-r--r--src/plugins/acl/acl_multicore_doc.rst354
-rw-r--r--src/plugins/acl/acl_test.c100
-rw-r--r--src/plugins/acl/dataplane_node.c6
-rw-r--r--src/plugins/acl/dataplane_node_nonip.c6
-rw-r--r--src/plugins/acl/exports.h6
-rw-r--r--src/plugins/acl/fa_node.h2
-rw-r--r--src/plugins/acl/hash_lookup.c8
-rw-r--r--src/plugins/acl/public_inlines.h8
-rw-r--r--src/plugins/acl/sess_mgmt_node.c25
-rw-r--r--src/plugins/adl/adl.api2
-rw-r--r--src/plugins/adl/adl.c8
-rw-r--r--src/plugins/adl/adl_api.c1
-rw-r--r--src/plugins/adl/ip4_allowlist.c75
-rw-r--r--src/plugins/adl/setup.pg72
-rw-r--r--src/plugins/af_packet/CMakeLists.txt (renamed from src/plugins/gbp/CMakeLists.txt)54
-rw-r--r--src/plugins/af_packet/FEATURE.yaml (renamed from src/vnet/devices/af_packet/FEATURE.yaml)1
-rw-r--r--src/plugins/af_packet/af_packet.api200
-rw-r--r--src/plugins/af_packet/af_packet.c1054
-rw-r--r--src/plugins/af_packet/af_packet.h182
-rw-r--r--src/plugins/af_packet/af_packet_api.c253
-rw-r--r--src/plugins/af_packet/cli.c (renamed from src/vnet/devices/af_packet/cli.c)123
-rw-r--r--src/plugins/af_packet/device.c793
-rw-r--r--src/plugins/af_packet/dir.dox (renamed from src/vnet/devices/af_packet/dir.dox)0
-rw-r--r--src/plugins/af_packet/node.c832
-rw-r--r--src/plugins/af_packet/plugin.c12
-rw-r--r--src/plugins/af_xdp/CMakeLists.txt34
-rw-r--r--src/plugins/af_xdp/af_xdp.api97
-rw-r--r--src/plugins/af_xdp/af_xdp.h9
-rw-r--r--src/plugins/af_xdp/af_xdp_doc.md129
-rw-r--r--src/plugins/af_xdp/af_xdp_doc.rst164
-rw-r--r--src/plugins/af_xdp/api.c69
-rw-r--r--src/plugins/af_xdp/cli.c8
-rw-r--r--src/plugins/af_xdp/device.c425
-rw-r--r--src/plugins/af_xdp/input.c9
-rw-r--r--src/plugins/af_xdp/output.c51
-rw-r--r--src/plugins/af_xdp/plugin.c2
-rw-r--r--src/plugins/af_xdp/test_api.c105
-rw-r--r--src/plugins/af_xdp/unformat.c2
-rw-r--r--src/plugins/arping/arping.api23
-rw-r--r--src/plugins/arping/arping.c11
-rw-r--r--src/plugins/arping/arping_api.c37
-rw-r--r--src/plugins/arping/arping_test.c19
-rw-r--r--src/plugins/avf/CMakeLists.txt1
-rw-r--r--src/plugins/avf/README.md107
-rw-r--r--src/plugins/avf/README.rst135
-rw-r--r--src/plugins/avf/avf.h10
-rw-r--r--src/plugins/avf/avf_advanced_flow.h408
-rw-r--r--src/plugins/avf/avf_api.c13
-rw-r--r--src/plugins/avf/avf_fdir_lib.c82
-rw-r--r--src/plugins/avf/avf_rss_lib.c2690
-rw-r--r--src/plugins/avf/cli.c62
-rw-r--r--src/plugins/avf/device.c124
-rw-r--r--src/plugins/avf/flow.c317
-rw-r--r--src/plugins/avf/input.c10
-rw-r--r--src/plugins/avf/output.c57
-rw-r--r--src/plugins/avf/plugin.c2
-rw-r--r--src/plugins/avf/virtchnl.h2
-rw-r--r--src/plugins/bpf_trace_filter/CMakeLists.txt45
-rw-r--r--src/plugins/bpf_trace_filter/FEATURE.yaml8
-rw-r--r--src/plugins/bpf_trace_filter/api.c97
-rw-r--r--src/plugins/bpf_trace_filter/bpf_trace_filter.api35
-rw-r--r--src/plugins/bpf_trace_filter/bpf_trace_filter.c112
-rw-r--r--src/plugins/bpf_trace_filter/bpf_trace_filter.h42
-rw-r--r--src/plugins/bpf_trace_filter/bpf_trace_filter.rst4
-rw-r--r--src/plugins/bpf_trace_filter/cli.c99
-rw-r--r--src/plugins/bpf_trace_filter/plugin.c (renamed from src/vpp/api/vpe_all_api_h.h)20
-rw-r--r--src/plugins/bufmon/CMakeLists.txt20
-rw-r--r--src/plugins/bufmon/FEATURE.yaml8
-rw-r--r--src/plugins/bufmon/bufmon.c314
-rw-r--r--src/plugins/bufmon/bufmon_doc.rst33
-rw-r--r--src/plugins/builtinurl/builtins.c69
-rw-r--r--src/plugins/builtinurl/builtinurl.c4
-rw-r--r--src/plugins/cdp/cdp.c4
-rw-r--r--src/plugins/cdp/cdp.pg12
-rw-r--r--src/plugins/cdp/cdp_input.c45
-rw-r--r--src/plugins/cdp/cdp_node.c2
-rw-r--r--src/plugins/cdp/cdp_periodic.c6
-rw-r--r--src/plugins/cnat/CMakeLists.txt1
-rw-r--r--src/plugins/cnat/FEATURE.yaml2
-rw-r--r--src/plugins/cnat/cnat.api8
-rw-r--r--src/plugins/cnat/cnat.rst42
-rw-r--r--src/plugins/cnat/cnat_api.c8
-rw-r--r--src/plugins/cnat/cnat_bihash.h9
-rw-r--r--src/plugins/cnat/cnat_client.c76
-rw-r--r--src/plugins/cnat/cnat_client.h41
-rw-r--r--src/plugins/cnat/cnat_inline.h104
-rw-r--r--src/plugins/cnat/cnat_maglev.c379
-rw-r--r--src/plugins/cnat/cnat_maglev.h21
-rw-r--r--src/plugins/cnat/cnat_node.h485
-rw-r--r--src/plugins/cnat/cnat_node_feature.c20
-rw-r--r--src/plugins/cnat/cnat_node_snat.c10
-rw-r--r--src/plugins/cnat/cnat_node_vip.c8
-rw-r--r--src/plugins/cnat/cnat_scanner.c1
-rw-r--r--src/plugins/cnat/cnat_session.c77
-rw-r--r--src/plugins/cnat/cnat_session.h5
-rw-r--r--src/plugins/cnat/cnat_snat_policy.c15
-rw-r--r--src/plugins/cnat/cnat_snat_policy.h3
-rw-r--r--src/plugins/cnat/cnat_src_policy.c4
-rw-r--r--src/plugins/cnat/cnat_translation.c136
-rw-r--r--src/plugins/cnat/cnat_translation.h22
-rw-r--r--src/plugins/cnat/cnat_types.c25
-rw-r--r--src/plugins/cnat/cnat_types.h52
-rw-r--r--src/plugins/crypto_ipsecmb/CMakeLists.txt10
-rw-r--r--src/plugins/crypto_ipsecmb/ipsecmb.c294
-rw-r--r--src/plugins/crypto_native/CMakeLists.txt14
-rw-r--r--src/plugins/crypto_native/FEATURE.yaml2
-rw-r--r--src/plugins/crypto_native/aes_cbc.c478
-rw-r--r--src/plugins/crypto_native/aes_ctr.c130
-rw-r--r--src/plugins/crypto_native/aes_gcm.c1213
-rw-r--r--src/plugins/crypto_native/crypto_native.h68
-rw-r--r--src/plugins/crypto_native/main.c108
-rw-r--r--src/plugins/crypto_native/sha2.c186
-rw-r--r--src/plugins/crypto_openssl/CMakeLists.txt3
-rw-r--r--src/plugins/crypto_openssl/crypto_openssl.h20
-rw-r--r--src/plugins/crypto_openssl/main.c319
-rw-r--r--src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.api2
-rw-r--r--src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.h18
-rw-r--r--src/plugins/crypto_sw_scheduler/main.c579
-rw-r--r--src/plugins/ct6/ct6.c18
-rw-r--r--src/plugins/ct6/ct6.h2
-rw-r--r--src/plugins/ct6/ct6_in2out.c2
-rw-r--r--src/plugins/ct6/ct6_out2in.c2
-rw-r--r--src/plugins/dev_ena/CMakeLists.txt21
-rw-r--r--src/plugins/dev_ena/aenq.c186
-rw-r--r--src/plugins/dev_ena/aq.c359
-rw-r--r--src/plugins/dev_ena/ena.c265
-rw-r--r--src/plugins/dev_ena/ena.h234
-rw-r--r--src/plugins/dev_ena/ena_admin_defs.h685
-rw-r--r--src/plugins/dev_ena/ena_aenq_defs.h107
-rw-r--r--src/plugins/dev_ena/ena_defs.h25
-rw-r--r--src/plugins/dev_ena/ena_inlines.h40
-rw-r--r--src/plugins/dev_ena/ena_io_defs.h179
-rw-r--r--src/plugins/dev_ena/ena_reg_defs.h150
-rw-r--r--src/plugins/dev_ena/format.c146
-rw-r--r--src/plugins/dev_ena/format_aq.c412
-rw-r--r--src/plugins/dev_ena/port.c96
-rw-r--r--src/plugins/dev_ena/queue.c384
-rw-r--r--src/plugins/dev_ena/reg.c172
-rw-r--r--src/plugins/dev_ena/rx_node.c457
-rw-r--r--src/plugins/dev_ena/tx_node.c514
-rw-r--r--src/plugins/dev_iavf/CMakeLists.txt20
-rw-r--r--src/plugins/dev_iavf/adminq.c485
-rw-r--r--src/plugins/dev_iavf/counters.c128
-rw-r--r--src/plugins/dev_iavf/format.c112
-rw-r--r--src/plugins/dev_iavf/iavf.c307
-rw-r--r--src/plugins/dev_iavf/iavf.h218
-rw-r--r--src/plugins/dev_iavf/iavf_desc.h125
-rw-r--r--src/plugins/dev_iavf/iavf_regs.h364
-rw-r--r--src/plugins/dev_iavf/port.c543
-rw-r--r--src/plugins/dev_iavf/queue.c178
-rw-r--r--src/plugins/dev_iavf/rx_node.c529
-rw-r--r--src/plugins/dev_iavf/tx_node.c517
-rw-r--r--src/plugins/dev_iavf/virtchnl.c372
-rw-r--r--src/plugins/dev_iavf/virtchnl.h570
-rw-r--r--src/plugins/dev_iavf/virtchnl_funcs.h241
-rw-r--r--src/plugins/dev_octeon/CMakeLists.txt42
-rw-r--r--src/plugins/dev_octeon/common.h29
-rw-r--r--src/plugins/dev_octeon/flow.c505
-rw-r--r--src/plugins/dev_octeon/format.c183
-rw-r--r--src/plugins/dev_octeon/hw_defs.h98
-rw-r--r--src/plugins/dev_octeon/init.c312
-rw-r--r--src/plugins/dev_octeon/octeon.h186
-rw-r--r--src/plugins/dev_octeon/port.c493
-rw-r--r--src/plugins/dev_octeon/queue.c311
-rw-r--r--src/plugins/dev_octeon/roc_helper.c181
-rw-r--r--src/plugins/dev_octeon/rx_node.c392
-rw-r--r--src/plugins/dev_octeon/tx_node.c435
-rw-r--r--src/plugins/dhcp/FEATURE.yaml2
-rw-r--r--src/plugins/dhcp/client.c14
-rw-r--r--src/plugins/dhcp/dhcp.api9
-rw-r--r--src/plugins/dhcp/dhcp4_proxy_node.c24
-rw-r--r--src/plugins/dhcp/dhcp6_client_common_dp.c4
-rw-r--r--src/plugins/dhcp/dhcp6_ia_na_client_cp.c18
-rw-r--r--src/plugins/dhcp/dhcp6_ia_na_client_dp.c2
-rw-r--r--src/plugins/dhcp/dhcp6_packet.h16
-rw-r--r--src/plugins/dhcp/dhcp6_pd_client_cp.c26
-rw-r--r--src/plugins/dhcp/dhcp6_pd_client_dp.c2
-rw-r--r--src/plugins/dhcp/dhcp6_pd_doc.md86
-rw-r--r--src/plugins/dhcp/dhcp6_pd_doc.rst113
-rw-r--r--src/plugins/dhcp/dhcp6_proxy_node.c30
-rw-r--r--src/plugins/dhcp/dhcp_api.c47
-rw-r--r--src/plugins/dhcp/dhcp_client_detect.c2
-rw-r--r--src/plugins/dhcp/dhcp_test.c17
-rw-r--r--src/plugins/dispatch-trace/CMakeLists.txt3
-rw-r--r--src/plugins/dispatch-trace/main.c2
-rw-r--r--src/plugins/dma_intel/CMakeLists.txt11
-rw-r--r--src/plugins/dma_intel/dsa.c452
-rw-r--r--src/plugins/dma_intel/dsa_intel.h160
-rw-r--r--src/plugins/dma_intel/format.c15
-rw-r--r--src/plugins/dma_intel/main.c272
-rw-r--r--src/plugins/dns/dns.c231
-rw-r--r--src/plugins/dns/dns_packet.h6
-rw-r--r--src/plugins/dns/reply_node.c2
-rw-r--r--src/plugins/dns/request_node.c15
-rw-r--r--src/plugins/dpdk/CMakeLists.txt20
-rw-r--r--src/plugins/dpdk/buffer.c28
-rw-r--r--src/plugins/dpdk/cryptodev/cryptodev.c432
-rw-r--r--src/plugins/dpdk/cryptodev/cryptodev.h204
-rw-r--r--src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c467
-rw-r--r--src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c428
-rw-r--r--src/plugins/dpdk/device/cli.c44
-rw-r--r--src/plugins/dpdk/device/common.c276
-rw-r--r--src/plugins/dpdk/device/device.c137
-rw-r--r--src/plugins/dpdk/device/dpdk.h287
-rw-r--r--src/plugins/dpdk/device/dpdk_priv.h142
-rw-r--r--src/plugins/dpdk/device/driver.c154
-rw-r--r--src/plugins/dpdk/device/flow.c226
-rw-r--r--src/plugins/dpdk/device/format.c601
-rw-r--r--src/plugins/dpdk/device/init.c1333
-rw-r--r--src/plugins/dpdk/device/node.c150
-rw-r--r--src/plugins/dpdk/main.c13
-rw-r--r--src/plugins/dpdk/thread.c85
-rw-r--r--src/plugins/fateshare/CMakeLists.txt25
-rw-r--r--src/plugins/fateshare/fateshare.c309
-rw-r--r--src/plugins/fateshare/fateshare.h (renamed from src/vnet/lawful-intercept/lawful_intercept.h)40
-rw-r--r--src/plugins/fateshare/vpp_fateshare_monitor.c289
-rw-r--r--src/plugins/flowprobe/FEATURE.yaml9
-rw-r--r--src/plugins/flowprobe/flowprobe.api119
-rw-r--r--src/plugins/flowprobe/flowprobe.c593
-rw-r--r--src/plugins/flowprobe/flowprobe.h19
-rw-r--r--src/plugins/flowprobe/flowprobe_plugin_doc.md13
-rw-r--r--src/plugins/flowprobe/flowprobe_plugin_doc.rst18
-rw-r--r--src/plugins/flowprobe/flowprobe_test.c218
-rw-r--r--src/plugins/flowprobe/node.c327
-rw-r--r--src/plugins/gbp/gbp.api470
-rw-r--r--src/plugins/gbp/gbp.h80
-rw-r--r--src/plugins/gbp/gbp_api.c1154
-rw-r--r--src/plugins/gbp/gbp_bridge_domain.c503
-rw-r--r--src/plugins/gbp/gbp_bridge_domain.h156
-rw-r--r--src/plugins/gbp/gbp_classify.c71
-rw-r--r--src/plugins/gbp/gbp_classify.h94
-rw-r--r--src/plugins/gbp/gbp_classify_node.c628
-rw-r--r--src/plugins/gbp/gbp_contract.c819
-rw-r--r--src/plugins/gbp/gbp_contract.h362
-rw-r--r--src/plugins/gbp/gbp_endpoint.c1597
-rw-r--r--src/plugins/gbp/gbp_endpoint.h376
-rw-r--r--src/plugins/gbp/gbp_endpoint_group.c402
-rw-r--r--src/plugins/gbp/gbp_endpoint_group.h166
-rw-r--r--src/plugins/gbp/gbp_ext_itf.c293
-rw-r--r--src/plugins/gbp/gbp_ext_itf.h92
-rw-r--r--src/plugins/gbp/gbp_fwd.c56
-rw-r--r--src/plugins/gbp/gbp_fwd_dpo.c306
-rw-r--r--src/plugins/gbp/gbp_fwd_dpo.h62
-rw-r--r--src/plugins/gbp/gbp_fwd_node.c163
-rw-r--r--src/plugins/gbp/gbp_itf.c575
-rw-r--r--src/plugins/gbp/gbp_itf.h97
-rw-r--r--src/plugins/gbp/gbp_learn.c76
-rw-r--r--src/plugins/gbp/gbp_learn.h63
-rw-r--r--src/plugins/gbp/gbp_learn_node.c718
-rw-r--r--src/plugins/gbp/gbp_policy.c79
-rw-r--r--src/plugins/gbp/gbp_policy.h57
-rw-r--r--src/plugins/gbp/gbp_policy_dpo.c420
-rw-r--r--src/plugins/gbp/gbp_policy_dpo.h121
-rw-r--r--src/plugins/gbp/gbp_policy_node.c341
-rw-r--r--src/plugins/gbp/gbp_recirc.c292
-rw-r--r--src/plugins/gbp/gbp_recirc.h88
-rw-r--r--src/plugins/gbp/gbp_route_domain.c447
-rw-r--r--src/plugins/gbp/gbp_route_domain.h84
-rw-r--r--src/plugins/gbp/gbp_scanner.c136
-rw-r--r--src/plugins/gbp/gbp_subnet.c598
-rw-r--r--src/plugins/gbp/gbp_subnet.h53
-rw-r--r--src/plugins/gbp/gbp_vxlan.c654
-rw-r--r--src/plugins/gbp/gbp_vxlan.h135
-rw-r--r--src/plugins/gbp/gbp_vxlan_node.c218
-rw-r--r--src/plugins/geneve/decap.c6
-rw-r--r--src/plugins/geneve/encap.c2
-rw-r--r--src/plugins/geneve/geneve.c67
-rw-r--r--src/plugins/geneve/geneve.h2
-rw-r--r--src/plugins/geneve/geneve_api.c12
-rw-r--r--src/plugins/geneve/geneve_test.c14
-rw-r--r--src/plugins/gre/CMakeLists.txt (renamed from src/plugins/l2e/CMakeLists.txt)25
-rw-r--r--src/plugins/gre/FEATURE.yaml (renamed from src/vnet/gre/FEATURE.yaml)0
-rw-r--r--src/plugins/gre/error.def (renamed from src/vnet/gre/error.def)0
-rw-r--r--src/plugins/gre/gre.api (renamed from src/vnet/gre/gre.api)0
-rw-r--r--src/plugins/gre/gre.c (renamed from src/vnet/gre/gre.c)202
-rw-r--r--src/plugins/gre/gre.h (renamed from src/vnet/gre/gre.h)6
-rw-r--r--src/plugins/gre/gre_api.c (renamed from src/vnet/gre/gre_api.c)90
-rw-r--r--src/plugins/gre/interface.c (renamed from src/vnet/gre/interface.c)214
-rw-r--r--src/plugins/gre/node.c (renamed from src/vnet/gre/node.c)227
-rw-r--r--src/plugins/gre/pg.c (renamed from src/vnet/gre/pg.c)14
-rw-r--r--src/plugins/gre/plugin.c (renamed from src/plugins/gbp/gbp_scanner.h)24
-rw-r--r--src/plugins/gtpu/gtpu.api196
-rw-r--r--src/plugins/gtpu/gtpu.c457
-rw-r--r--src/plugins/gtpu/gtpu.h118
-rw-r--r--src/plugins/gtpu/gtpu_api.c248
-rw-r--r--src/plugins/gtpu/gtpu_decap.c1543
-rw-r--r--src/plugins/gtpu/gtpu_encap.c218
-rw-r--r--src/plugins/gtpu/gtpu_error.def2
-rw-r--r--src/plugins/gtpu/gtpu_test.c384
-rw-r--r--src/plugins/hs_apps/CMakeLists.txt9
-rw-r--r--src/plugins/hs_apps/echo_client.c1348
-rw-r--r--src/plugins/hs_apps/echo_client.h116
-rw-r--r--src/plugins/hs_apps/echo_server.c515
-rw-r--r--src/plugins/hs_apps/hs_apps.c2
-rw-r--r--src/plugins/hs_apps/hs_test.h212
-rw-r--r--src/plugins/hs_apps/http_cli.c676
-rw-r--r--src/plugins/hs_apps/http_cli.h (renamed from src/vpp/api/vpe_msg_enum.h)20
-rw-r--r--src/plugins/hs_apps/http_client_cli.c555
-rw-r--r--src/plugins/hs_apps/http_server.c1004
-rw-r--r--src/plugins/hs_apps/http_tps.c839
-rw-r--r--src/plugins/hs_apps/proxy.c265
-rw-r--r--src/plugins/hs_apps/proxy.h29
-rw-r--r--src/plugins/hs_apps/sapi/vpp_echo.c184
-rw-r--r--src/plugins/hs_apps/sapi/vpp_echo_bapi.c40
-rw-r--r--src/plugins/hs_apps/sapi/vpp_echo_common.h15
-rw-r--r--src/plugins/hs_apps/sapi/vpp_echo_proto_quic.c2
-rw-r--r--src/plugins/hs_apps/sapi/vpp_echo_sapi.c330
-rw-r--r--src/plugins/hs_apps/vcl/sock_test_client.c160
-rw-r--r--src/plugins/hs_apps/vcl/sock_test_server.c74
-rw-r--r--src/plugins/hs_apps/vcl/vcl_test.h217
-rw-r--r--src/plugins/hs_apps/vcl/vcl_test_client.c731
-rw-r--r--src/plugins/hs_apps/vcl/vcl_test_protos.c109
-rw-r--r--src/plugins/hs_apps/vcl/vcl_test_server.c103
-rw-r--r--src/plugins/hsi/CMakeLists.txt17
-rw-r--r--src/plugins/hsi/FEATURE.yaml8
-rw-r--r--src/plugins/hsi/hsi.c404
-rw-r--r--src/plugins/hsi/hsi.h29
-rw-r--r--src/plugins/hsi/hsi_error.def (renamed from src/vnet/vxlan-gbp/dir.dox)12
-rw-r--r--src/plugins/http/CMakeLists.txt19
-rw-r--r--src/plugins/http/http.c1513
-rw-r--r--src/plugins/http/http.h288
-rw-r--r--src/plugins/http/http_buffer.c219
-rw-r--r--src/plugins/http/http_buffer.h82
-rw-r--r--src/plugins/http/http_timer.c91
-rw-r--r--src/plugins/http/http_timer.h91
-rw-r--r--src/plugins/http_static/CMakeLists.txt4
-rw-r--r--src/plugins/http_static/FEATURE.yaml20
-rw-r--r--src/plugins/http_static/builtinurl/json_urls.c192
-rw-r--r--src/plugins/http_static/http_cache.c450
-rw-r--r--src/plugins/http_static/http_cache.h78
-rw-r--r--src/plugins/http_static/http_static.c99
-rw-r--r--src/plugins/http_static/http_static.h226
-rw-r--r--src/plugins/http_static/static_server.c1737
-rw-r--r--src/plugins/idpf/CMakeLists.txt28
-rw-r--r--src/plugins/idpf/README.rst59
-rw-r--r--src/plugins/idpf/cli.c135
-rw-r--r--src/plugins/idpf/device.c2265
-rw-r--r--src/plugins/idpf/format.c77
-rw-r--r--src/plugins/idpf/idpf.api80
-rw-r--r--src/plugins/idpf/idpf.h929
-rw-r--r--src/plugins/idpf/idpf_api.c111
-rw-r--r--src/plugins/idpf/idpf_controlq.c890
-rw-r--r--src/plugins/idpf/idpf_test.c169
-rw-r--r--src/plugins/idpf/plugin.c35
-rw-r--r--src/plugins/idpf/virtchnl2.h855
-rw-r--r--src/plugins/idpf/virtchnl2_lan_desc.h610
-rw-r--r--src/plugins/igmp/igmp.c27
-rw-r--r--src/plugins/igmp/igmp.h1
-rw-r--r--src/plugins/igmp/igmp_api.c21
-rw-r--r--src/plugins/igmp/igmp_cli.c18
-rw-r--r--src/plugins/igmp/igmp_config.c4
-rw-r--r--src/plugins/igmp/igmp_group.c10
-rw-r--r--src/plugins/igmp/igmp_input.c8
-rw-r--r--src/plugins/igmp/igmp_pkt.c4
-rw-r--r--src/plugins/igmp/igmp_proxy.c9
-rw-r--r--src/plugins/igmp/igmp_query.c2
-rw-r--r--src/plugins/igmp/igmp_ssm_range.c4
-rw-r--r--src/plugins/igmp/igmp_timer.c2
-rw-r--r--src/plugins/ikev2/CMakeLists.txt9
-rw-r--r--src/plugins/ikev2/ikev2.api74
-rw-r--r--src/plugins/ikev2/ikev2.c872
-rw-r--r--src/plugins/ikev2/ikev2.h11
-rw-r--r--src/plugins/ikev2/ikev2_api.c413
-rw-r--r--src/plugins/ikev2/ikev2_cli.c51
-rw-r--r--src/plugins/ikev2/ikev2_crypto.c6
-rw-r--r--src/plugins/ikev2/ikev2_payload.c42
-rw-r--r--src/plugins/ikev2/ikev2_priv.h64
-rw-r--r--src/plugins/ikev2/ikev2_test.c223
-rw-r--r--src/plugins/ikev2/ikev2_types.api76
-rw-r--r--src/plugins/ila/ila.c12
-rw-r--r--src/plugins/ioam/analyse/ioam_summary_export.c36
-rw-r--r--src/plugins/ioam/analyse/ioam_summary_export.h8
-rw-r--r--src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.c4
-rw-r--r--src/plugins/ioam/analyse/ip6/node.c2
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_e2e.c2
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_e2e.h2
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_pot.c2
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_pot.h2
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_trace.c6
-rw-r--r--src/plugins/ioam/encap/ip6_ioam_trace.h2
-rw-r--r--src/plugins/ioam/export-common/ioam_export.h2
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c2
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c2
-rw-r--r--src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c2
-rw-r--r--src/plugins/ioam/export/ioam_export.c2
-rw-r--r--src/plugins/ioam/ioam_plugin_doc.md464
-rw-r--r--src/plugins/ioam/ioam_plugin_doc.rst490
-rw-r--r--src/plugins/ioam/ip6/ioam_cache.h21
-rw-r--r--src/plugins/ioam/ip6/ioam_cache_node.c4
-rw-r--r--src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c6
-rw-r--r--src/plugins/ioam/ipfixcollector/node.c2
-rw-r--r--src/plugins/ioam/lib-e2e/e2e_util.h2
-rw-r--r--src/plugins/ioam/lib-pot/math64.h31
-rw-r--r--src/plugins/ioam/lib-trace/trace_util.c6
-rw-r--r--src/plugins/ioam/lib-trace/trace_util.h2
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c2
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c2
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c2
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c4
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c5
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c15
-rw-r--r--src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c6
-rw-r--r--src/plugins/ioam/udp-ping/udp_ping_export.c34
-rw-r--r--src/plugins/ioam/udp-ping/udp_ping_node.c10
-rw-r--r--src/plugins/ip_session_redirect/CMakeLists.txt27
-rw-r--r--src/plugins/ip_session_redirect/FEATURE.yaml9
-rw-r--r--src/plugins/ip_session_redirect/api.c124
-rw-r--r--src/plugins/ip_session_redirect/ip_session_redirect.api106
-rw-r--r--src/plugins/ip_session_redirect/ip_session_redirect.h33
-rw-r--r--src/plugins/ip_session_redirect/ip_session_redirect_doc.rst42
-rw-r--r--src/plugins/ip_session_redirect/punt_redirect.vpp48
-rw-r--r--src/plugins/ip_session_redirect/redirect.c463
-rw-r--r--src/plugins/ip_session_redirect/test_api.c195
-rw-r--r--src/plugins/l2e/l2e.c198
-rw-r--r--src/plugins/l2e/l2e.h84
-rw-r--r--src/plugins/l2e/l2e_api.c89
-rw-r--r--src/plugins/l2e/l2e_node.c283
-rw-r--r--src/plugins/l2tp/decap.c4
-rw-r--r--src/plugins/l2tp/encap.c2
-rw-r--r--src/plugins/l2tp/l2tp.c34
-rw-r--r--src/plugins/l2tp/l2tp_api.c6
-rw-r--r--src/plugins/l2tp/l2tp_test.c14
-rw-r--r--src/plugins/l2tp/packet.h2
-rw-r--r--src/plugins/l3xc/FEATURE.yaml2
-rw-r--r--src/plugins/l3xc/l3xc.c20
-rw-r--r--src/plugins/l3xc/l3xc_api.c12
-rw-r--r--src/plugins/l3xc/l3xc_node.c2
-rw-r--r--src/plugins/lacp/cli.c6
-rw-r--r--src/plugins/lacp/input.c22
-rw-r--r--src/plugins/lacp/lacp.c32
-rw-r--r--src/plugins/lacp/lacp_api.c16
-rw-r--r--src/plugins/lacp/lacp_doc.md104
-rw-r--r--src/plugins/lacp/lacp_doc.rst109
-rw-r--r--src/plugins/lacp/lacp_test.c2
-rw-r--r--src/plugins/lacp/mux_machine.c2
-rw-r--r--src/plugins/lacp/node.c8
-rw-r--r--src/plugins/lacp/ptx_machine.c2
-rw-r--r--src/plugins/lacp/rx_machine.c4
-rw-r--r--src/plugins/lacp/tx_machine.c2
-rw-r--r--src/plugins/lb/api.c92
-rw-r--r--src/plugins/lb/cli.c33
-rw-r--r--src/plugins/lb/lb.api35
-rw-r--r--src/plugins/lb/lb.c23
-rw-r--r--src/plugins/lb/lb.h18
-rw-r--r--src/plugins/lb/lb_plugin_doc.md192
-rw-r--r--src/plugins/lb/lb_plugin_doc.rst223
-rw-r--r--src/plugins/lb/lb_test.c99
-rw-r--r--src/plugins/lb/lb_types.api10
-rw-r--r--src/plugins/lb/lbhash.h3
-rw-r--r--src/plugins/lb/node.c102
-rw-r--r--src/plugins/linux-cp/CMakeLists.txt17
-rw-r--r--src/plugins/linux-cp/FEATURE.yaml14
-rw-r--r--src/plugins/linux-cp/lcp.api76
-rw-r--r--src/plugins/linux-cp/lcp.c119
-rw-r--r--src/plugins/linux-cp/lcp.h39
-rw-r--r--src/plugins/linux-cp/lcp.rst35
-rw-r--r--src/plugins/linux-cp/lcp_adj.c6
-rw-r--r--src/plugins/linux-cp/lcp_api.c158
-rw-r--r--src/plugins/linux-cp/lcp_cli.c272
-rw-r--r--src/plugins/linux-cp/lcp_interface.c630
-rw-r--r--src/plugins/linux-cp/lcp_interface.h54
-rw-r--r--src/plugins/linux-cp/lcp_interface_sync.c445
-rw-r--r--src/plugins/linux-cp/lcp_mpls_sync.c160
-rw-r--r--src/plugins/linux-cp/lcp_nl.c1043
-rw-r--r--src/plugins/linux-cp/lcp_nl.h161
-rw-r--r--src/plugins/linux-cp/lcp_node.c162
-rw-r--r--src/plugins/linux-cp/lcp_router.c1578
-rw-r--r--src/plugins/lisp/CMakeLists.txt5
-rw-r--r--src/plugins/lisp/lisp-cp/control.c51
-rw-r--r--src/plugins/lisp/lisp-cp/control.h2
-rw-r--r--src/plugins/lisp/lisp-cp/gid_dictionary.c4
-rw-r--r--src/plugins/lisp/lisp-cp/lisp_api.c26
-rw-r--r--src/plugins/lisp/lisp-cp/lisp_cli.c57
-rw-r--r--src/plugins/lisp/lisp-cp/lisp_cp_test.c16
-rw-r--r--src/plugins/lisp/lisp-cp/lisp_msg_serdes.c16
-rw-r--r--src/plugins/lisp/lisp-cp/lisp_types.h7
-rw-r--r--src/plugins/lisp/lisp-cp/one_api.c56
-rw-r--r--src/plugins/lisp/lisp-cp/one_cli.c101
-rw-r--r--src/plugins/lisp/lisp-cp/one_test.c16
-rw-r--r--src/plugins/lisp/lisp-cp/packets.c1
-rw-r--r--src/plugins/lisp/lisp-cp/packets.h1
-rw-r--r--src/plugins/lisp/lisp-gpe/decap.c8
-rw-r--r--src/plugins/lisp/lisp-gpe/interface.c36
-rw-r--r--src/plugins/lisp/lisp-gpe/lisp_gpe.c20
-rw-r--r--src/plugins/lisp/lisp-gpe/lisp_gpe.h4
-rw-r--r--src/plugins/lisp/lisp-gpe/lisp_gpe_adjacency.c9
-rw-r--r--src/plugins/lisp/lisp-gpe/lisp_gpe_api.c10
-rw-r--r--src/plugins/lisp/lisp-gpe/lisp_gpe_fwd_entry.c10
-rw-r--r--src/plugins/lisp/lisp-gpe/lisp_gpe_sub_interface.c13
-rw-r--r--src/plugins/lisp/lisp-gpe/lisp_gpe_tenant.c6
-rw-r--r--src/plugins/lisp/lisp-gpe/lisp_gpe_test.c16
-rw-r--r--src/plugins/lisp/lisp-gpe/lisp_gpe_tunnel.c4
-rw-r--r--src/plugins/lisp/test/lisp_cp_test.c50
-rw-r--r--src/plugins/lldp/lldp.api76
-rw-r--r--src/plugins/lldp/lldp_api.c59
-rw-r--r--src/plugins/lldp/lldp_cli.c16
-rw-r--r--src/plugins/lldp/lldp_doc.md86
-rw-r--r--src/plugins/lldp/lldp_doc.rst84
-rw-r--r--src/plugins/lldp/lldp_input.c4
-rw-r--r--src/plugins/lldp/lldp_node.c6
-rw-r--r--src/plugins/lldp/lldp_protocol.h6
-rw-r--r--src/plugins/lldp/lldp_test.c24
-rw-r--r--src/plugins/mactime/CMakeLists.txt1
-rw-r--r--src/plugins/mactime/builtins.c21
-rw-r--r--src/plugins/mactime/mactime.c24
-rw-r--r--src/plugins/mactime/mactime_test.c2
-rw-r--r--src/plugins/mactime/mactime_top.c22
-rw-r--r--src/plugins/mactime/node.c4
-rwxr-xr-xsrc/plugins/map/examples/gen-rules.py161
-rwxr-xr-xsrc/plugins/map/examples/test_map.py120
-rwxr-xr-xsrc/plugins/map/gen-rules.py124
-rw-r--r--src/plugins/map/ip4_map.c5
-rw-r--r--src/plugins/map/ip4_map_t.c10
-rw-r--r--src/plugins/map/ip6_map.c6
-rw-r--r--src/plugins/map/ip6_map_t.c13
-rw-r--r--src/plugins/map/lpm.c28
-rw-r--r--src/plugins/map/map.c20
-rw-r--r--src/plugins/map/map.h6
-rw-r--r--src/plugins/map/map_api.c8
-rw-r--r--src/plugins/map/map_doc.md69
-rw-r--r--src/plugins/map/map_doc.rst99
-rw-r--r--src/plugins/marvell/README.md65
-rw-r--r--src/plugins/marvell/README.rst85
-rw-r--r--src/plugins/marvell/plugin.c2
-rw-r--r--src/plugins/marvell/pp2/cli.c4
-rw-r--r--src/plugins/marvell/pp2/format.c1
-rw-r--r--src/plugins/marvell/pp2/input.c9
-rw-r--r--src/plugins/marvell/pp2/pp2.c21
-rw-r--r--src/plugins/marvell/pp2/pp2_api.c11
-rw-r--r--src/plugins/mdata/mdata.c37
-rw-r--r--src/plugins/mdata/mdata_doc.md24
-rw-r--r--src/plugins/mdata/mdata_doc.rst26
-rw-r--r--src/plugins/memif/CMakeLists.txt2
-rw-r--r--src/plugins/memif/cli.c125
-rw-r--r--src/plugins/memif/device.c315
-rw-r--r--src/plugins/memif/memif.api92
-rw-r--r--src/plugins/memif/memif.c477
-rw-r--r--src/plugins/memif/memif_api.c150
-rw-r--r--src/plugins/memif/memif_test.c197
-rw-r--r--src/plugins/memif/node.c873
-rw-r--r--src/plugins/memif/private.h101
-rw-r--r--src/plugins/memif/socket.c6
-rw-r--r--src/plugins/mss_clamp/mss_clamp_node.c30
-rw-r--r--src/plugins/nat/CMakeLists.txt4
-rw-r--r--src/plugins/nat/FEATURE.yaml2
-rw-r--r--src/plugins/nat/det44/det44.api4
-rw-r--r--src/plugins/nat/det44/det44.c56
-rw-r--r--src/plugins/nat/det44/det44.h7
-rw-r--r--src/plugins/nat/det44/det44_api.c16
-rw-r--r--src/plugins/nat/det44/det44_cli.c2
-rw-r--r--src/plugins/nat/det44/det44_in2out.c2
-rw-r--r--src/plugins/nat/det44/det44_inlines.h4
-rw-r--r--src/plugins/nat/det44/det44_out2in.c5
-rw-r--r--src/plugins/nat/dslite/dslite.c4
-rw-r--r--src/plugins/nat/dslite/dslite.h2
-rw-r--r--src/plugins/nat/dslite/dslite_api.c6
-rw-r--r--src/plugins/nat/dslite/dslite_ce_decap.c2
-rw-r--r--src/plugins/nat/dslite/dslite_ce_encap.c2
-rw-r--r--src/plugins/nat/dslite/dslite_cli.c6
-rw-r--r--src/plugins/nat/dslite/dslite_in2out.c4
-rw-r--r--src/plugins/nat/dslite/dslite_out2in.c2
-rw-r--r--src/plugins/nat/extras/nat_100ks.py24
-rw-r--r--src/plugins/nat/extras/nat_10Ms.py24
-rw-r--r--src/plugins/nat/extras/nat_10ks.py24
-rw-r--r--src/plugins/nat/extras/nat_1Ms.py24
-rw-r--r--src/plugins/nat/extras/nat_out2in_100ks.py24
-rw-r--r--src/plugins/nat/extras/nat_out2in_10Ms.py24
-rw-r--r--src/plugins/nat/extras/nat_out2in_10ks.py24
-rw-r--r--src/plugins/nat/extras/nat_out2in_1Ms.py24
-rw-r--r--src/plugins/nat/extras/nat_ses_open.py83
-rwxr-xr-xsrc/plugins/nat/extras/nat_static_gen_cfg.py34
-rw-r--r--src/plugins/nat/extras/nat_test_fast_path.py64
-rw-r--r--src/plugins/nat/extras/nat_test_slow_path.py60
-rw-r--r--src/plugins/nat/extras/nat_test_slow_path_with_latency.py81
-rw-r--r--src/plugins/nat/lib/alloc.h3
-rw-r--r--src/plugins/nat/lib/inlines.h53
-rw-r--r--src/plugins/nat/lib/ipfix_logging.c295
-rw-r--r--src/plugins/nat/lib/ipfix_logging.h18
-rw-r--r--src/plugins/nat/lib/lib.c1
-rw-r--r--src/plugins/nat/lib/lib.h51
-rw-r--r--src/plugins/nat/lib/log.h15
-rw-r--r--src/plugins/nat/lib/nat_proto.h76
-rw-r--r--src/plugins/nat/lib/nat_syslog.c109
-rw-r--r--src/plugins/nat/lib/nat_syslog.h13
-rw-r--r--src/plugins/nat/lib/nat_syslog_constants.h62
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed.api798
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed.c2986
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed.h617
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_affinity.c7
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_api.c771
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_classify.c42
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_cli.c758
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_doc.rst729
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_format.c293
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_handoff.c1
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_in2out.c867
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_inlines.h379
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_out2in.c444
-rw-r--r--src/plugins/nat/nat44-ed/tcp_conn_track.rst65
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei.api85
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei.c2589
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei.h142
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_api.c398
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_cli.c249
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_ha.c8
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_ha_doc.md70
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_ha_doc.rst88
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_hairpinning.c756
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_hairpinning.h92
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_handoff.c3
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_in2out.c1213
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_inlines.h24
-rw-r--r--src/plugins/nat/nat44-ei/nat44_ei_out2in.c218
-rw-r--r--src/plugins/nat/nat64/nat64.c56
-rw-r--r--src/plugins/nat/nat64/nat64.h2
-rw-r--r--src/plugins/nat/nat64/nat64_api.c6
-rw-r--r--src/plugins/nat/nat64/nat64_cli.c6
-rw-r--r--src/plugins/nat/nat64/nat64_db.c40
-rw-r--r--src/plugins/nat/nat64/nat64_db.h9
-rw-r--r--src/plugins/nat/nat64/nat64_doc.md73
-rw-r--r--src/plugins/nat/nat64/nat64_doc.rst91
-rw-r--r--src/plugins/nat/nat64/nat64_in2out.c14
-rw-r--r--src/plugins/nat/nat64/nat64_out2in.c4
-rw-r--r--src/plugins/nat/nat66/nat66_cli.c10
-rw-r--r--src/plugins/nat/nat66/nat66_in2out.c4
-rw-r--r--src/plugins/nat/nat66/nat66_out2in.c2
-rw-r--r--src/plugins/nat/pnat/pnat.api18
-rw-r--r--src/plugins/nat/pnat/pnat.c4
-rw-r--r--src/plugins/nat/pnat/pnat.md37
-rw-r--r--src/plugins/nat/pnat/pnat.rst45
-rw-r--r--src/plugins/nat/pnat/pnat_api.c31
-rw-r--r--src/plugins/nat/pnat/pnat_cli.c6
-rw-r--r--src/plugins/nat/pnat/pnat_node.h1
-rw-r--r--src/plugins/nat/pnat/tests/pnat_test.c8
-rw-r--r--src/plugins/nat/pnat/tests/pnat_test_stubs.h6
-rwxr-xr-xsrc/plugins/nat/pnat/tests/test_genpackets.py30
-rw-r--r--src/plugins/npt66/CMakeLists.txt17
-rw-r--r--src/plugins/npt66/FEATURE.yaml16
-rw-r--r--src/plugins/npt66/npt66.api40
-rw-r--r--src/plugins/npt66/npt66.c124
-rw-r--r--src/plugins/npt66/npt66.h28
-rw-r--r--src/plugins/npt66/npt66_api.c72
-rw-r--r--src/plugins/npt66/npt66_cli.c121
-rw-r--r--src/plugins/npt66/npt66_node.c372
-rw-r--r--src/plugins/nsh/FEATURE.yaml1
-rw-r--r--src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export.c2
-rw-r--r--src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export_thread.c2
-rw-r--r--src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_node.c2
-rw-r--r--src/plugins/nsh/nsh-md2-ioam/md2_ioam_transit.c4
-rw-r--r--src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam.c3
-rw-r--r--src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_api.c1
-rw-r--r--src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_trace.c6
-rw-r--r--src/plugins/nsh/nsh.c46
-rw-r--r--src/plugins/nsh/nsh_api.c8
-rw-r--r--src/plugins/nsh/nsh_cli.c10
-rw-r--r--src/plugins/nsh/nsh_node.c2
-rw-r--r--src/plugins/nsh/nsh_output.c2
-rw-r--r--src/plugins/nsh/nsh_pop.c1
-rw-r--r--src/plugins/nsim/node.c4
-rw-r--r--src/plugins/nsim/nsim.c81
-rw-r--r--src/plugins/nsim/nsim.h3
-rw-r--r--src/plugins/nsim/nsim_input.c2
-rw-r--r--src/plugins/oddbuf/CMakeLists.txt3
-rw-r--r--src/plugins/oddbuf/node.c2
-rw-r--r--src/plugins/oddbuf/oddbuf.c8
-rw-r--r--src/plugins/perfmon/CMakeLists.txt57
-rw-r--r--src/plugins/perfmon/arm/bundle/branch_pred.c140
-rw-r--r--src/plugins/perfmon/arm/bundle/cache_data.c128
-rw-r--r--src/plugins/perfmon/arm/bundle/cache_data_tlb.c106
-rw-r--r--src/plugins/perfmon/arm/bundle/cache_inst.c103
-rw-r--r--src/plugins/perfmon/arm/bundle/cache_inst_tlb.c105
-rw-r--r--src/plugins/perfmon/arm/bundle/inst_clock.c102
-rw-r--r--src/plugins/perfmon/arm/bundle/mem_access.c88
-rw-r--r--src/plugins/perfmon/arm/bundle/stall.c94
-rw-r--r--src/plugins/perfmon/arm/dispatch_wrapper.c142
-rw-r--r--src/plugins/perfmon/arm/dispatch_wrapper.h (renamed from src/vnet/vxlan-gbp/vxlan_gbp_error.def)9
-rw-r--r--src/plugins/perfmon/arm/events.c227
-rw-r--r--src/plugins/perfmon/arm/events.h130
-rw-r--r--src/plugins/perfmon/cli.c231
-rw-r--r--src/plugins/perfmon/dispatch_wrapper.c170
-rw-r--r--src/plugins/perfmon/intel/bundle/backend_bound_core.c100
-rw-r--r--src/plugins/perfmon/intel/bundle/backend_bound_mem.c102
-rw-r--r--src/plugins/perfmon/intel/bundle/frontend_bound_bw_src.c90
-rw-r--r--src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c89
-rw-r--r--src/plugins/perfmon/intel/bundle/frontend_bound_lat.c99
-rw-r--r--src/plugins/perfmon/intel/bundle/iio_bw.c263
-rw-r--r--src/plugins/perfmon/intel/bundle/topdown_icelake.c176
-rw-r--r--src/plugins/perfmon/intel/bundle/topdown_metrics.c232
-rw-r--r--src/plugins/perfmon/intel/bundle/topdown_tremont.c85
-rw-r--r--src/plugins/perfmon/intel/core.c78
-rw-r--r--src/plugins/perfmon/intel/core.h129
-rw-r--r--src/plugins/perfmon/intel/dispatch_wrapper.c160
-rw-r--r--src/plugins/perfmon/intel/dispatch_wrapper.h18
-rw-r--r--src/plugins/perfmon/intel/uncore.c60
-rw-r--r--src/plugins/perfmon/intel/uncore.h52
-rw-r--r--src/plugins/perfmon/linux.c7
-rw-r--r--src/plugins/perfmon/perfmon.c70
-rw-r--r--src/plugins/perfmon/perfmon.h120
-rw-r--r--src/plugins/ping/CMakeLists.txt5
-rw-r--r--src/plugins/ping/ping.api (renamed from src/plugins/l2e/l2e.api)31
-rw-r--r--src/plugins/ping/ping.c330
-rw-r--r--src/plugins/ping/ping.h75
-rw-r--r--src/plugins/ping/ping_api.c155
-rw-r--r--src/plugins/pppoe/pppoe.c29
-rw-r--r--src/plugins/pppoe/pppoe.h4
-rw-r--r--src/plugins/pppoe/pppoe_api.c6
-rw-r--r--src/plugins/pppoe/pppoe_cp.c2
-rw-r--r--src/plugins/pppoe/pppoe_decap.c23
-rw-r--r--src/plugins/prom/CMakeLists.txt21
-rw-r--r--src/plugins/prom/FEATURE.yaml10
-rw-r--r--src/plugins/prom/prom.c436
-rw-r--r--src/plugins/prom/prom.h66
-rw-r--r--src/plugins/prom/prom_cli.c153
-rw-r--r--src/plugins/quic/CMakeLists.txt7
-rw-r--r--src/plugins/quic/quic.c97
-rw-r--r--src/plugins/quic/quic.h3
-rw-r--r--src/plugins/quic/quic_crypto.c116
-rw-r--r--src/plugins/quic/quic_crypto.h13
-rw-r--r--src/plugins/rdma/CMakeLists.txt8
-rw-r--r--src/plugins/rdma/api.c52
-rw-r--r--src/plugins/rdma/cli.c14
-rw-r--r--src/plugins/rdma/device.c71
-rw-r--r--src/plugins/rdma/format.c15
-rw-r--r--src/plugins/rdma/input.c90
-rw-r--r--src/plugins/rdma/output.c45
-rw-r--r--src/plugins/rdma/plugin.c2
-rw-r--r--src/plugins/rdma/rdma.api54
-rw-r--r--src/plugins/rdma/rdma_doc.md75
-rw-r--r--src/plugins/rdma/rdma_doc.rst102
-rw-r--r--src/plugins/rdma/rdma_mlx5dv.h12
-rw-r--r--src/plugins/rdma/test_api.c53
-rw-r--r--src/plugins/snort/cli.c15
-rw-r--r--src/plugins/snort/daq_vpp.c37
-rw-r--r--src/plugins/snort/daq_vpp.h2
-rw-r--r--src/plugins/snort/dequeue.c4
-rw-r--r--src/plugins/snort/enqueue.c15
-rw-r--r--src/plugins/snort/main.c65
-rw-r--r--src/plugins/snort/snort.h10
-rw-r--r--src/plugins/srtp/srtp.c29
-rw-r--r--src/plugins/srtp/srtp_plugin.md72
-rw-r--r--src/plugins/srtp/srtp_plugin.rst82
-rw-r--r--src/plugins/srv6-ad-flow/ad-flow.c10
-rw-r--r--src/plugins/srv6-ad-flow/ad_flow_plugin_doc.md25
-rw-r--r--src/plugins/srv6-ad-flow/ad_flow_plugin_doc.rst31
-rw-r--r--src/plugins/srv6-ad/ad.c4
-rw-r--r--src/plugins/srv6-ad/ad_plugin_doc.md73
-rw-r--r--src/plugins/srv6-ad/ad_plugin_doc.rst86
-rw-r--r--src/plugins/srv6-ad/node.c10
-rw-r--r--src/plugins/srv6-am/am.c4
-rw-r--r--src/plugins/srv6-am/am_plugin_doc.md100
-rw-r--r--src/plugins/srv6-am/am_plugin_doc.rst116
-rw-r--r--src/plugins/srv6-am/node.c7
-rw-r--r--src/plugins/srv6-as/as.c4
-rw-r--r--src/plugins/srv6-as/as_plugin_doc.md152
-rw-r--r--src/plugins/srv6-as/as_plugin_doc.rst172
-rw-r--r--src/plugins/srv6-as/node.c11
-rw-r--r--src/plugins/srv6-mobile/CMakeLists.txt5
-rw-r--r--src/plugins/srv6-mobile/FEATURE.yaml2
-rw-r--r--src/plugins/srv6-mobile/extra/Dockerfile.j21
-rw-r--r--src/plugins/srv6-mobile/extra/Dockerfile.j2.release1
-rwxr-xr-xsrc/plugins/srv6-mobile/extra/runner.py832
-rw-r--r--src/plugins/srv6-mobile/extra/runner_doc.md105
-rw-r--r--src/plugins/srv6-mobile/extra/runner_doc.rst135
-rw-r--r--src/plugins/srv6-mobile/gtp4_d.c142
-rw-r--r--src/plugins/srv6-mobile/gtp4_dt.c40
-rw-r--r--src/plugins/srv6-mobile/gtp4_e.c68
-rw-r--r--src/plugins/srv6-mobile/gtp6_d.c142
-rw-r--r--src/plugins/srv6-mobile/gtp6_d_di.c30
-rw-r--r--src/plugins/srv6-mobile/gtp6_dt.c42
-rw-r--r--src/plugins/srv6-mobile/gtp6_e.c39
-rw-r--r--src/plugins/srv6-mobile/mobile.h85
-rw-r--r--src/plugins/srv6-mobile/mobile_plugin_doc.md201
-rw-r--r--src/plugins/srv6-mobile/mobile_plugin_doc.rst278
-rw-r--r--src/plugins/srv6-mobile/node.c2368
-rw-r--r--src/plugins/srv6-mobile/sr_mobile.api79
-rw-r--r--src/plugins/srv6-mobile/sr_mobile_api.c339
-rw-r--r--src/plugins/srv6-mobile/sr_mobile_api.h72
-rw-r--r--src/plugins/srv6-mobile/sr_mobile_types.api24
-rw-r--r--src/plugins/stn/stn.c8
-rw-r--r--src/plugins/stn/stn_api.c14
-rw-r--r--src/plugins/stn/stn_test.c2
-rw-r--r--src/plugins/svs/svs.c10
-rw-r--r--src/plugins/svs/svs_api.c13
-rw-r--r--src/plugins/tlsmbedtls/tls_mbedtls.c35
-rw-r--r--src/plugins/tlsopenssl/CMakeLists.txt1
-rw-r--r--src/plugins/tlsopenssl/tls_async.c4
-rw-r--r--src/plugins/tlsopenssl/tls_openssl.c386
-rw-r--r--src/plugins/tlsopenssl/tls_openssl.h2
-rw-r--r--src/plugins/tlsopenssl/tls_openssl_api.c1
-rw-r--r--src/plugins/tlspicotls/CMakeLists.txt7
-rw-r--r--src/plugins/tlspicotls/pico_vpp_crypto.c103
-rw-r--r--src/plugins/tlspicotls/tls_picotls.c600
-rw-r--r--src/plugins/tlspicotls/tls_picotls.h9
-rw-r--r--src/plugins/tracedump/CMakeLists.txt6
-rw-r--r--src/plugins/tracedump/graph_api.c2
-rw-r--r--src/plugins/tracedump/graph_cli.c12
-rw-r--r--src/plugins/tracedump/graph_test.c2
-rw-r--r--src/plugins/tracedump/setup.pg52
-rw-r--r--src/plugins/tracedump/tracedump.api68
-rw-r--r--src/plugins/tracedump/tracedump.c217
-rw-r--r--src/plugins/tracedump/tracedump_test.c97
-rw-r--r--src/plugins/tracenode/CMakeLists.txt37
-rw-r--r--src/plugins/tracenode/FEATURE.yaml8
-rw-r--r--src/plugins/tracenode/api.c64
-rw-r--r--src/plugins/tracenode/cli.c72
-rw-r--r--src/plugins/tracenode/node.c145
-rw-r--r--src/plugins/tracenode/plugin.c (renamed from src/plugins/gbp/gbp_types.h)21
-rw-r--r--src/plugins/tracenode/test.c93
-rw-r--r--src/plugins/tracenode/tracenode.api42
-rw-r--r--src/plugins/tracenode/tracenode.c71
-rw-r--r--src/plugins/tracenode/tracenode.h43
-rw-r--r--src/plugins/unittest/CMakeLists.txt11
-rw-r--r--src/plugins/unittest/api_fuzz_test.c10
-rw-r--r--src/plugins/unittest/api_test.c102
-rw-r--r--src/plugins/unittest/bier_test.c54
-rw-r--r--src/plugins/unittest/bihash_test.c48
-rw-r--r--src/plugins/unittest/bitmap_test.c219
-rw-r--r--src/plugins/unittest/counter_test.c37
-rw-r--r--src/plugins/unittest/crypto/aes_cbc.c6
-rw-r--r--src/plugins/unittest/crypto/aes_ctr.c6
-rw-r--r--src/plugins/unittest/crypto/aes_gcm.c2
-rw-r--r--src/plugins/unittest/crypto/aes_gmac.c3029
-rw-r--r--src/plugins/unittest/crypto/chacha20_poly1305.c6
-rw-r--r--src/plugins/unittest/crypto/crypto.h4
-rw-r--r--src/plugins/unittest/crypto/rfc2202_hmac_md5.c14
-rw-r--r--src/plugins/unittest/crypto/rfc2202_hmac_sha1.c16
-rw-r--r--src/plugins/unittest/crypto/rfc4231.c14
-rw-r--r--src/plugins/unittest/crypto_test.c12
-rw-r--r--src/plugins/unittest/fib_test.c309
-rw-r--r--src/plugins/unittest/gso_test.c456
-rw-r--r--src/plugins/unittest/hash_test.c331
-rw-r--r--src/plugins/unittest/interface_test.c2
-rw-r--r--src/plugins/unittest/ip_psh_cksum_test.c266
-rw-r--r--src/plugins/unittest/ipsec_test.c326
-rw-r--r--src/plugins/unittest/llist_test.c8
-rw-r--r--src/plugins/unittest/mactime_test.c2
-rw-r--r--src/plugins/unittest/mfib_test.c216
-rw-r--r--src/plugins/unittest/mpcap_node.c2
-rw-r--r--src/plugins/unittest/pool_test.c38
-rw-r--r--src/plugins/unittest/punt_test.c6
-rw-r--r--src/plugins/unittest/rbtree_test.c2
-rw-r--r--src/plugins/unittest/segment_manager_test.c57
-rw-r--r--src/plugins/unittest/session_test.c114
-rw-r--r--src/plugins/unittest/sparse_vec_test.c2
-rw-r--r--src/plugins/unittest/string_test.c466
-rw-r--r--src/plugins/unittest/svm_fifo_test.c14
-rw-r--r--src/plugins/unittest/tcp_test.c4
-rw-r--r--src/plugins/unittest/test_buffer.c310
-rw-r--r--src/plugins/unittest/unittest.c2
-rw-r--r--src/plugins/unittest/util_test.c6
-rw-r--r--src/plugins/unittest/vlib_test.c6
-rw-r--r--src/plugins/urpf/ip4_urpf.c2
-rw-r--r--src/plugins/urpf/ip6_urpf.c2
-rw-r--r--src/plugins/urpf/urpf.api45
-rw-r--r--src/plugins/urpf/urpf.c125
-rw-r--r--src/plugins/urpf/urpf.h21
-rw-r--r--src/plugins/urpf/urpf_api.c110
-rw-r--r--src/plugins/urpf/urpf_dp.h22
-rw-r--r--src/plugins/vhost/CMakeLists.txt34
-rw-r--r--src/plugins/vhost/FEATURE.yaml13
-rw-r--r--src/plugins/vhost/plugin.c12
-rw-r--r--src/plugins/vhost/vhost_std.h69
-rw-r--r--src/plugins/vhost/vhost_user.api (renamed from src/vnet/devices/virtio/vhost_user.api)0
-rw-r--r--src/plugins/vhost/vhost_user.c (renamed from src/vnet/devices/virtio/vhost_user.c)533
-rw-r--r--src/plugins/vhost/vhost_user.h (renamed from src/vnet/devices/virtio/vhost_user.h)40
-rw-r--r--src/plugins/vhost/vhost_user_api.c (renamed from src/vnet/devices/virtio/vhost_user_api.c)34
-rw-r--r--src/plugins/vhost/vhost_user_inline.h (renamed from src/vnet/devices/virtio/vhost_user_inline.h)25
-rw-r--r--src/plugins/vhost/vhost_user_input.c (renamed from src/vnet/devices/virtio/vhost_user_input.c)55
-rw-r--r--src/plugins/vhost/vhost_user_output.c (renamed from src/vnet/devices/virtio/vhost_user_output.c)149
-rw-r--r--src/plugins/vhost/virtio_std.h188
-rw-r--r--src/plugins/vmxnet3/README.md64
-rw-r--r--src/plugins/vmxnet3/README.rst86
-rw-r--r--src/plugins/vmxnet3/cli.c41
-rw-r--r--src/plugins/vmxnet3/format.c2
-rw-r--r--src/plugins/vmxnet3/input.c15
-rw-r--r--src/plugins/vmxnet3/plugin.c2
-rw-r--r--src/plugins/vmxnet3/vmxnet3.c96
-rw-r--r--src/plugins/vmxnet3/vmxnet3.h19
-rw-r--r--src/plugins/vmxnet3/vmxnet3_api.c19
-rw-r--r--src/plugins/vmxnet3/vmxnet3_test.c2
-rw-r--r--src/plugins/vrrp/node.c60
-rw-r--r--src/plugins/vrrp/setup.pg20
-rw-r--r--src/plugins/vrrp/vrrp.api51
-rw-r--r--src/plugins/vrrp/vrrp.c321
-rw-r--r--src/plugins/vrrp/vrrp.h47
-rw-r--r--src/plugins/vrrp/vrrp_all_api_h.h11
-rw-r--r--src/plugins/vrrp/vrrp_api.c114
-rw-r--r--src/plugins/vrrp/vrrp_cli.c20
-rw-r--r--src/plugins/vrrp/vrrp_msg_enum.h23
-rw-r--r--src/plugins/vrrp/vrrp_packet.c51
-rw-r--r--src/plugins/vrrp/vrrp_packet.h9
-rw-r--r--src/plugins/vrrp/vrrp_periodic.c2
-rw-r--r--src/plugins/vrrp/vrrp_test.c173
-rw-r--r--src/plugins/vxlan/CMakeLists.txt29
-rw-r--r--src/plugins/vxlan/FEATURE.yaml (renamed from src/vnet/vxlan/FEATURE.yaml)0
-rw-r--r--src/plugins/vxlan/decap.c (renamed from src/vnet/vxlan/decap.c)16
-rw-r--r--src/plugins/vxlan/dir.dox (renamed from src/vnet/vxlan/dir.dox)0
-rw-r--r--src/plugins/vxlan/encap.c (renamed from src/vnet/vxlan/encap.c)131
-rw-r--r--src/plugins/vxlan/plugin.c12
-rw-r--r--src/plugins/vxlan/vxlan.api (renamed from src/vnet/vxlan/vxlan.api)12
-rw-r--r--src/plugins/vxlan/vxlan.c (renamed from src/vnet/vxlan/vxlan.c)63
-rw-r--r--src/plugins/vxlan/vxlan.h (renamed from src/vnet/vxlan/vxlan.h)10
-rw-r--r--src/plugins/vxlan/vxlan_api.c (renamed from src/vnet/vxlan/vxlan_api.c)5
-rw-r--r--src/plugins/vxlan/vxlan_error.def (renamed from src/vnet/vxlan/vxlan_error.def)0
-rw-r--r--src/plugins/vxlan/vxlan_packet.h (renamed from src/vnet/vxlan/vxlan_packet.h)0
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/CMakeLists.txt11
-rw-r--r--src/plugins/wireguard/FEATURE.yaml3
-rwxr-xr-xsrc/plugins/wireguard/README.md55
-rw-r--r--src/plugins/wireguard/README.rst79
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/blake/blake2-impl.h0
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/blake/blake2s.c0
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/blake/blake2s.h0
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard.api79
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard.c65
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard.h79
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_api.c210
-rw-r--r--src/plugins/wireguard/wireguard_chachapoly.c133
-rw-r--r--src/plugins/wireguard/wireguard_chachapoly.h48
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_cli.c103
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_cookie.c218
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_cookie.h43
-rw-r--r--src/plugins/wireguard/wireguard_handoff.c104
-rw-r--r--src/plugins/wireguard/wireguard_hchacha20.h90
-rw-r--r--src/plugins/wireguard/wireguard_if.c180
-rw-r--r--src/plugins/wireguard/wireguard_if.h59
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_index_table.c14
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_index_table.h7
-rw-r--r--src/plugins/wireguard/wireguard_input.c1111
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_key.c0
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_key.h0
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_messages.h0
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_noise.c315
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_noise.h97
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_output_tun.c869
-rw-r--r--src/plugins/wireguard/wireguard_peer.c481
-rw-r--r--src/plugins/wireguard/wireguard_peer.h90
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_send.c194
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_send.h7
-rw-r--r--src/plugins/wireguard/wireguard_timer.c50
-rw-r--r--[-rwxr-xr-x]src/plugins/wireguard/wireguard_timer.h14
-rwxr-xr-xsrc/scripts/fts.py224
-rwxr-xr-xsrc/scripts/generate_version_h2
-rwxr-xr-xsrc/scripts/host-stack/cc_plots.py337
-rwxr-xr-xsrc/scripts/remove-rpath2
-rwxr-xr-xsrc/scripts/version4
-rw-r--r--src/scripts/vnet/arp420
-rw-r--r--src/scripts/vnet/arp4-mpls20
-rw-r--r--src/scripts/vnet/arp620
-rw-r--r--src/scripts/vnet/bvi60
-rw-r--r--src/scripts/vnet/dhcp/left-ping-target.sh2
-rw-r--r--src/scripts/vnet/gre96
-rw-r--r--src/scripts/vnet/gre-teb44
-rw-r--r--src/scripts/vnet/icmp20
-rw-r--r--src/scripts/vnet/icmp620
-rw-r--r--src/scripts/vnet/ige18
-rw-r--r--src/scripts/vnet/ip622
-rw-r--r--src/scripts/vnet/ip6-hbh126
-rw-r--r--src/scripts/vnet/ipsec75
-rw-r--r--src/scripts/vnet/ipsec_spd26
-rw-r--r--src/scripts/vnet/ipsec_spd_vrf56
-rw-r--r--src/scripts/vnet/ipsec_tun_protect71
-rw-r--r--src/scripts/vnet/ixge15
-rw-r--r--src/scripts/vnet/l2efpfilter54
-rw-r--r--src/scripts/vnet/l2efpfilter_perf18
-rw-r--r--src/scripts/vnet/l2fib28
-rw-r--r--src/scripts/vnet/l2fib_perf14
-rw-r--r--src/scripts/vnet/l2fib_xc14
-rw-r--r--src/scripts/vnet/l2flood19
-rw-r--r--src/scripts/vnet/l2tp152
-rw-r--r--src/scripts/vnet/l3fwd88
-rw-r--r--src/scripts/vnet/lfib/ip4-to-mpls20
-rw-r--r--src/scripts/vnet/lfib/mpls-pop-to-mpls16
-rw-r--r--src/scripts/vnet/lfib/mpls-to-ip416
-rw-r--r--src/scripts/vnet/lfib/mpls-to-mpls16
-rw-r--r--src/scripts/vnet/mcast/ip420
-rw-r--r--src/scripts/vnet/mpls-o-ethernet/pg18
-rw-r--r--src/scripts/vnet/mpls-to-dot1ad32
-rw-r--r--src/scripts/vnet/mpls-tunnel80
-rw-r--r--src/scripts/vnet/nat4444
-rw-r--r--src/scripts/vnet/nat44_det144
-rw-r--r--src/scripts/vnet/nat44_lb48
-rw-r--r--src/scripts/vnet/nat44_static48
-rw-r--r--src/scripts/vnet/nat44_static_with_port48
-rw-r--r--src/scripts/vnet/nat6448
-rw-r--r--src/scripts/vnet/nat64_static48
-rw-r--r--src/scripts/vnet/pcap20
-rw-r--r--src/scripts/vnet/rewrite54
-rw-r--r--src/scripts/vnet/rpf20
-rw-r--r--src/scripts/vnet/rtt-test20
-rw-r--r--src/scripts/vnet/source_and_port_range_check60
-rw-r--r--src/scripts/vnet/speed20
-rw-r--r--src/scripts/vnet/srp20
-rw-r--r--src/scripts/vnet/tcp24
-rw-r--r--src/scripts/vnet/udp26
-rwxr-xr-xsrc/scripts/vnet/uri/dummy_app.py110
-rw-r--r--src/scripts/vnet/uri/udp26
-rw-r--r--src/scripts/vnet/urpf100
-rw-r--r--src/scripts/vnet/vlan24
-rw-r--r--src/svm/fifo_segment.c72
-rw-r--r--src/svm/fifo_segment.h30
-rw-r--r--src/svm/fifo_types.h4
-rw-r--r--src/svm/message_queue.c47
-rw-r--r--src/svm/message_queue.h25
-rw-r--r--src/svm/queue.c4
-rw-r--r--src/svm/ssvm.c13
-rw-r--r--src/svm/ssvm.h2
-rw-r--r--src/svm/svm.c27
-rw-r--r--src/svm/svm.h17
-rw-r--r--src/svm/svm_common.h6
-rw-r--r--src/svm/svm_fifo.c30
-rw-r--r--src/svm/svm_fifo.h130
-rw-r--r--src/svm/svmdb.c20
-rw-r--r--src/svm/svmdbtool.c5
-rw-r--r--src/svm/svmtool.c4
-rw-r--r--src/tools/appimage/CMakeLists.txt2
-rw-r--r--src/tools/g2/clib.c1
-rw-r--r--src/tools/g2/cpel.c1
-rw-r--r--src/tools/g2/events.c1
-rw-r--r--src/tools/g2/pointsel.c2
-rw-r--r--src/tools/g2/view1.c23
-rw-r--r--src/tools/perftool/c2cpel.c1
-rw-r--r--src/tools/perftool/cpel_util.c3
-rw-r--r--src/tools/perftool/cpelatency.c1
-rw-r--r--src/tools/perftool/cpeldump.c1
-rw-r--r--src/tools/perftool/cpelinreg.c11
-rw-r--r--src/tools/perftool/cpelstate.c1
-rw-r--r--src/tools/perftool/delsvec.c496
-rw-r--r--src/tools/vppapigen/CMakeLists.txt18
-rw-r--r--src/tools/vppapigen/VPPAPI.md346
-rw-r--r--src/tools/vppapigen/VPPAPI.rst597
-rwxr-xr-xsrc/tools/vppapigen/generate_go.py236
-rwxr-xr-xsrc/tools/vppapigen/generate_json.py118
-rwxr-xr-xsrc/tools/vppapigen/test_vppapigen.py120
-rwxr-xr-xsrc/tools/vppapigen/vppapigen.py916
-rwxr-xr-x[-rw-r--r--]src/tools/vppapigen/vppapigen_c.py1983
-rw-r--r--src/tools/vppapigen/vppapigen_crc.py17
-rw-r--r--src/tools/vppapigen/vppapigen_json.py89
l---------src/tools/vppapitrace/vppapitrace1
-rwxr-xr-xsrc/tools/vppapitrace/vppapitrace.py492
-rw-r--r--src/vat/CMakeLists.txt5
-rw-r--r--src/vat/api_format.c886
-rw-r--r--src/vat/ip_types.c29
-rw-r--r--src/vat/json_format.h5
-rw-r--r--src/vat/main.c42
-rw-r--r--src/vat/plugin.c4
-rw-r--r--src/vat/vat.h25
-rw-r--r--src/vat2/CMakeLists.txt18
-rw-r--r--src/vat2/jsonconvert.h105
-rw-r--r--src/vat2/main.c501
-rw-r--r--src/vat2/plugin.c7
-rw-r--r--src/vat2/test/vat2_test.c1
-rw-r--r--src/vat2/vat2.h10
-rw-r--r--src/vat2/vat2_helpers.h4
-rw-r--r--src/vcl/CMakeLists.txt11
-rw-r--r--src/vcl/ldp.c470
-rw-r--r--src/vcl/ldp.h2
-rw-r--r--src/vcl/ldp_glibc_socket.h19
-rw-r--r--src/vcl/ldp_socket_wrapper.c121
-rw-r--r--src/vcl/ldp_socket_wrapper.h38
-rw-r--r--src/vcl/vcl_bapi.c33
-rw-r--r--src/vcl/vcl_cfg.c52
-rw-r--r--src/vcl/vcl_locked.c350
-rw-r--r--src/vcl/vcl_private.c236
-rw-r--r--src/vcl/vcl_private.h85
-rw-r--r--src/vcl/vcl_sapi.c119
-rw-r--r--src/vcl/vppcom.c1136
-rw-r--r--src/vcl/vppcom.h116
-rw-r--r--src/vlib/CMakeLists.txt54
-rw-r--r--src/vlib/buffer.c227
-rw-r--r--src/vlib/buffer.h147
-rw-r--r--src/vlib/buffer_funcs.c245
-rw-r--r--src/vlib/buffer_funcs.h465
-rw-r--r--src/vlib/buffer_node.h107
-rw-r--r--src/vlib/cli.c112
-rw-r--r--src/vlib/cli.h1
-rw-r--r--src/vlib/config.h.in3
-rw-r--r--src/vlib/counter.c106
-rw-r--r--src/vlib/counter.h16
-rw-r--r--src/vlib/dma/cli.c160
-rw-r--r--src/vlib/dma/dma.c82
-rw-r--r--src/vlib/dma/dma.h132
-rw-r--r--src/vlib/dma/dma.rst70
-rw-r--r--src/vlib/drop.c22
-rw-r--r--src/vlib/error.c164
-rw-r--r--src/vlib/error.h5
-rw-r--r--src/vlib/freebsd/pci.c380
-rw-r--r--src/vlib/handoff_trace.c3
-rw-r--r--src/vlib/init.c6
-rw-r--r--src/vlib/init.h2
-rw-r--r--src/vlib/lex.c6
-rw-r--r--src/vlib/linux/pci.c419
-rw-r--r--src/vlib/linux/vfio.c54
-rw-r--r--src/vlib/linux/vfio.h2
-rw-r--r--src/vlib/linux/vmbus.c79
-rw-r--r--src/vlib/log.c78
-rw-r--r--src/vlib/log.h29
-rw-r--r--src/vlib/main.c270
-rw-r--r--src/vlib/main.h32
-rw-r--r--src/vlib/node.c181
-rw-r--r--src/vlib/node.h42
-rw-r--r--src/vlib/node_cli.c51
-rw-r--r--src/vlib/node_format.c2
-rw-r--r--src/vlib/node_funcs.h320
-rw-r--r--src/vlib/pci/pci.c228
-rw-r--r--src/vlib/pci/pci.h137
-rw-r--r--src/vlib/pci/pci_config.h541
-rw-r--r--src/vlib/physmem.c8
-rw-r--r--src/vlib/punt.c14
-rw-r--r--src/vlib/punt_node.c2
-rw-r--r--src/vlib/stat_weak_inlines.h72
-rw-r--r--src/vlib/stats/cli.c121
-rw-r--r--src/vlib/stats/collector.c186
-rw-r--r--src/vlib/stats/format.c21
-rw-r--r--src/vlib/stats/init.c258
-rw-r--r--src/vlib/stats/provider_mem.c68
-rw-r--r--src/vlib/stats/shared.h50
-rw-r--r--src/vlib/stats/stats.c574
-rw-r--r--src/vlib/stats/stats.h164
-rw-r--r--src/vlib/threads.c435
-rw-r--r--src/vlib/threads.h93
-rw-r--r--src/vlib/threads_cli.c15
-rw-r--r--src/vlib/time.c84
-rw-r--r--src/vlib/time.h26
-rw-r--r--src/vlib/trace.c173
-rw-r--r--src/vlib/trace.h39
-rw-r--r--src/vlib/trace_funcs.h9
-rw-r--r--src/vlib/unix/cli.c250
-rw-r--r--src/vlib/unix/input.c9
-rw-r--r--src/vlib/unix/main.c168
-rw-r--r--src/vlib/unix/mc_socket.c12
-rw-r--r--src/vlib/unix/plugin.c20
-rw-r--r--src/vlib/unix/plugin.h20
-rw-r--r--src/vlib/unix/util.c6
-rw-r--r--src/vlibapi/CMakeLists.txt21
-rw-r--r--src/vlibapi/api.h42
-rw-r--r--src/vlibapi/api_common.h120
-rw-r--r--src/vlibapi/api_doc.md352
-rw-r--r--src/vlibapi/api_doc.rst341
-rw-r--r--src/vlibapi/api_format.c39
-rw-r--r--src/vlibapi/api_helper_macros.h608
-rw-r--r--src/vlibapi/api_shared.c693
-rw-r--r--src/vlibapi/memory_shared.c (renamed from src/vlibmemory/memory_shared.c)21
-rw-r--r--src/vlibapi/memory_shared.h (renamed from src/vlibmemory/memory_shared.h)0
-rw-r--r--src/vlibapi/node_serialize.c4
-rw-r--r--src/vlibapi/vat_helper_macros.h16
-rw-r--r--src/vlibmemory/CMakeLists.txt30
-rw-r--r--src/vlibmemory/api.h1
-rw-r--r--src/vlibmemory/memclnt.api68
-rw-r--r--src/vlibmemory/memclnt_api.c780
-rw-r--r--src/vlibmemory/memory_api.c304
-rw-r--r--src/vlibmemory/memory_api.h2
-rw-r--r--src/vlibmemory/memory_client.c82
-rw-r--r--src/vlibmemory/memory_client.h3
-rw-r--r--src/vlibmemory/socket_api.c70
-rw-r--r--src/vlibmemory/socket_client.c94
-rw-r--r--src/vlibmemory/socket_client.h3
-rw-r--r--src/vlibmemory/vlib.api250
-rw-r--r--src/vlibmemory/vlib_api.c830
-rw-r--r--src/vlibmemory/vlib_api_cli.c618
-rw-r--r--src/vlibmemory/vlibapi_test.c470
-rw-r--r--src/vnet/CMakeLists.txt204
-rw-r--r--src/vnet/MTU.md72
-rw-r--r--src/vnet/adj/adj.c19
-rw-r--r--src/vnet/adj/adj.h10
-rw-r--r--src/vnet/adj/adj_bfd.c53
-rw-r--r--src/vnet/adj/adj_dp.h42
-rw-r--r--src/vnet/adj/adj_glean.c135
-rw-r--r--src/vnet/adj/adj_glean.h1
-rw-r--r--src/vnet/adj/adj_internal.h19
-rw-r--r--src/vnet/adj/adj_mcast.c4
-rw-r--r--src/vnet/adj/adj_midchain.c82
-rw-r--r--src/vnet/adj/adj_midchain.h7
-rw-r--r--src/vnet/adj/adj_midchain_delegate.c25
-rw-r--r--src/vnet/adj/adj_midchain_node.c35
-rw-r--r--src/vnet/adj/adj_nbr.c95
-rw-r--r--src/vnet/adj/adj_nsh.c2
-rw-r--r--src/vnet/adj/rewrite.h6
-rw-r--r--src/vnet/api_errno.h179
-rw-r--r--src/vnet/arp/arp.api118
-rw-r--r--src/vnet/arp/arp.c249
-rw-r--r--src/vnet/arp/arp.h27
-rw-r--r--src/vnet/arp/arp_packet.h2
-rw-r--r--src/vnet/arp/arp_proxy.c40
-rw-r--r--src/vnet/arp/arp_test.c4
-rw-r--r--src/vnet/bfd/bfd.api121
-rw-r--r--src/vnet/bfd/bfd_api.c29
-rw-r--r--src/vnet/bfd/bfd_api.h9
-rw-r--r--src/vnet/bfd/bfd_cli.c134
-rw-r--r--src/vnet/bfd/bfd_doc.md374
-rw-r--r--src/vnet/bfd/bfd_doc.rst512
-rw-r--r--src/vnet/bfd/bfd_main.c387
-rw-r--r--src/vnet/bfd/bfd_main.h51
-rw-r--r--src/vnet/bfd/bfd_protocol.h10
-rw-r--r--src/vnet/bfd/bfd_udp.c555
-rw-r--r--src/vnet/bfd/bfd_udp.h14
-rw-r--r--src/vnet/bier/bier_update.c9
-rw-r--r--src/vnet/bonding/bond_api.c16
-rw-r--r--src/vnet/bonding/cli.c108
-rw-r--r--src/vnet/bonding/device.c383
-rw-r--r--src/vnet/bonding/node.c2
-rw-r--r--src/vnet/bonding/node.h3
-rw-r--r--src/vnet/buffer.c21
-rw-r--r--src/vnet/buffer.h50
-rw-r--r--src/vnet/classify/classify.api40
-rw-r--r--src/vnet/classify/classify_api.c137
-rw-r--r--src/vnet/classify/flow_classify.c4
-rw-r--r--src/vnet/classify/flow_classify_node.c8
-rw-r--r--src/vnet/classify/in_out_acl.c132
-rw-r--r--src/vnet/classify/in_out_acl.h16
-rw-r--r--src/vnet/classify/ip_classify.c8
-rw-r--r--src/vnet/classify/pcap_classify.h6
-rw-r--r--src/vnet/classify/policer_classify.c4
-rw-r--r--src/vnet/classify/trace_classify.h5
-rw-r--r--src/vnet/classify/vnet_classify.c174
-rw-r--r--src/vnet/classify/vnet_classify.h343
-rw-r--r--src/vnet/config.c31
-rw-r--r--src/vnet/config.h6
-rw-r--r--src/vnet/crypto/cli.c98
-rw-r--r--src/vnet/crypto/crypto.api21
-rw-r--r--src/vnet/crypto/crypto.c244
-rw-r--r--src/vnet/crypto/crypto.h114
-rw-r--r--src/vnet/crypto/crypto_api.c14
-rw-r--r--src/vnet/crypto/node.c46
-rw-r--r--src/vnet/dev/api.c275
-rw-r--r--src/vnet/dev/api.h68
-rw-r--r--src/vnet/dev/args.c237
-rw-r--r--src/vnet/dev/args.h74
-rw-r--r--src/vnet/dev/cli.c331
-rw-r--r--src/vnet/dev/config.c196
-rw-r--r--src/vnet/dev/counters.c132
-rw-r--r--src/vnet/dev/counters.h128
-rw-r--r--src/vnet/dev/dev.api86
-rw-r--r--src/vnet/dev/dev.c461
-rw-r--r--src/vnet/dev/dev.h753
-rw-r--r--src/vnet/dev/dev_api.c192
-rw-r--r--src/vnet/dev/dev_funcs.h332
-rw-r--r--src/vnet/dev/error.c54
-rw-r--r--src/vnet/dev/errors.h46
-rw-r--r--src/vnet/dev/format.c507
-rw-r--r--src/vnet/dev/handlers.c256
-rw-r--r--src/vnet/dev/log.h22
-rw-r--r--src/vnet/dev/mgmt.h10
-rw-r--r--src/vnet/dev/pci.c458
-rw-r--r--src/vnet/dev/pci.h80
-rw-r--r--src/vnet/dev/port.c748
-rw-r--r--src/vnet/dev/process.c474
-rw-r--r--src/vnet/dev/process.h10
-rw-r--r--src/vnet/dev/queue.c227
-rw-r--r--src/vnet/dev/runtime.c180
-rw-r--r--src/vnet/dev/types.h66
-rw-r--r--src/vnet/devices/af_packet/af_packet.api97
-rw-r--r--src/vnet/devices/af_packet/af_packet.c578
-rw-r--r--src/vnet/devices/af_packet/af_packet.h93
-rw-r--r--src/vnet/devices/af_packet/af_packet_api.c156
-rw-r--r--src/vnet/devices/af_packet/device.c398
-rw-r--r--src/vnet/devices/af_packet/node.c410
-rw-r--r--src/vnet/devices/devices.c50
-rw-r--r--src/vnet/devices/devices.h2
-rw-r--r--src/vnet/devices/netlink.c50
-rw-r--r--src/vnet/devices/netlink.h2
-rw-r--r--src/vnet/devices/pipe/pipe.c34
-rw-r--r--src/vnet/devices/pipe/pipe_api.c2
-rw-r--r--src/vnet/devices/tap/FEATURE.yaml2
-rw-r--r--src/vnet/devices/tap/cli.c24
-rw-r--r--src/vnet/devices/tap/tap.c242
-rw-r--r--src/vnet/devices/tap/tap.h5
-rw-r--r--src/vnet/devices/tap/tapv2.api80
-rw-r--r--src/vnet/devices/tap/tapv2_api.c97
-rw-r--r--src/vnet/devices/virtio/FEATURE.yaml4
-rw-r--r--src/vnet/devices/virtio/cli.c20
-rw-r--r--src/vnet/devices/virtio/device.c233
-rw-r--r--src/vnet/devices/virtio/node.c146
-rw-r--r--src/vnet/devices/virtio/pci.c281
-rw-r--r--src/vnet/devices/virtio/pci.h17
-rw-r--r--src/vnet/devices/virtio/virtio.api2
-rw-r--r--src/vnet/devices/virtio/virtio.c137
-rw-r--r--src/vnet/devices/virtio/virtio.h90
-rw-r--r--src/vnet/devices/virtio/virtio_api.c6
-rw-r--r--src/vnet/devices/virtio/virtio_buffering.h11
-rw-r--r--src/vnet/devices/virtio/virtio_inline.h17
-rw-r--r--src/vnet/devices/virtio/virtio_pci_legacy.c7
-rw-r--r--src/vnet/devices/virtio/virtio_pci_modern.c22
-rw-r--r--src/vnet/devices/virtio/virtio_pre_input.c160
-rw-r--r--src/vnet/devices/virtio/virtio_process.c4
-rw-r--r--src/vnet/devices/virtio/virtio_std.h82
-rw-r--r--src/vnet/dpo/dpo.c4
-rw-r--r--src/vnet/dpo/dpo.h2
-rw-r--r--src/vnet/dpo/dvr_dpo.c9
-rw-r--r--src/vnet/dpo/interface_rx_dpo.c7
-rw-r--r--src/vnet/dpo/interface_tx_dpo.c5
-rw-r--r--src/vnet/dpo/ip6_ll_dpo.c22
-rw-r--r--src/vnet/dpo/l3_proxy_dpo.c5
-rw-r--r--src/vnet/dpo/load_balance.c45
-rw-r--r--src/vnet/dpo/load_balance.h18
-rw-r--r--src/vnet/dpo/load_balance_map.c2
-rw-r--r--src/vnet/dpo/lookup_dpo.c51
-rw-r--r--src/vnet/dpo/mpls_disposition.c58
-rw-r--r--src/vnet/dpo/mpls_label_dpo.c46
-rw-r--r--src/vnet/dpo/receive_dpo.c21
-rw-r--r--src/vnet/dpo/replicate_dpo.c16
-rw-r--r--src/vnet/dpo/replicate_dpo.h8
-rw-r--r--src/vnet/error.c54
-rw-r--r--src/vnet/error.h177
-rw-r--r--src/vnet/ethernet/arp_packet.h12
-rw-r--r--src/vnet/ethernet/ethernet.h34
-rw-r--r--src/vnet/ethernet/init.c4
-rw-r--r--src/vnet/ethernet/interface.c126
-rw-r--r--src/vnet/ethernet/mac_address.c6
-rw-r--r--src/vnet/ethernet/node.c48
-rw-r--r--src/vnet/ethernet/p2p_ethernet.c16
-rw-r--r--src/vnet/ethernet/p2p_ethernet_api.c2
-rw-r--r--src/vnet/ethernet/p2p_ethernet_input.c2
-rw-r--r--src/vnet/ethernet/packet.h2
-rw-r--r--src/vnet/feature/feature.c64
-rw-r--r--src/vnet/feature/feature.h115
-rw-r--r--src/vnet/feature/registration.c2
-rw-r--r--src/vnet/fib/fib.c2
-rw-r--r--src/vnet/fib/fib_api.c36
-rw-r--r--src/vnet/fib/fib_api.h2
-rw-r--r--src/vnet/fib/fib_attached_export.c8
-rw-r--r--src/vnet/fib/fib_bfd.c2
-rw-r--r--src/vnet/fib/fib_entry.c224
-rw-r--r--src/vnet/fib/fib_entry.h11
-rw-r--r--src/vnet/fib/fib_entry_src.c81
-rw-r--r--src/vnet/fib/fib_entry_src.h3
-rw-r--r--src/vnet/fib/fib_entry_src_interface.c81
-rw-r--r--src/vnet/fib/fib_node.c47
-rw-r--r--src/vnet/fib/fib_node.h32
-rw-r--r--src/vnet/fib/fib_path.c78
-rw-r--r--src/vnet/fib/fib_path.h2
-rw-r--r--src/vnet/fib/fib_path_ext.c5
-rw-r--r--src/vnet/fib/fib_path_ext.h2
-rw-r--r--src/vnet/fib/fib_path_list.c25
-rw-r--r--src/vnet/fib/fib_sas.c33
-rw-r--r--src/vnet/fib/fib_table.c89
-rw-r--r--src/vnet/fib/fib_table.h9
-rw-r--r--src/vnet/fib/fib_types.api2
-rw-r--r--src/vnet/fib/fib_types.c86
-rw-r--r--src/vnet/fib/fib_types.h12
-rw-r--r--src/vnet/fib/fib_urpf_list.c14
-rw-r--r--src/vnet/fib/fib_walk.c2
-rw-r--r--src/vnet/fib/ip4_fib.c420
-rw-r--r--src/vnet/fib/ip4_fib.h281
-rw-r--r--src/vnet/fib/ip4_fib_16.c137
-rw-r--r--src/vnet/fib/ip4_fib_16.h106
-rw-r--r--src/vnet/fib/ip4_fib_8.c137
-rw-r--r--src/vnet/fib/ip4_fib_8.h106
-rw-r--r--src/vnet/fib/ip4_fib_hash.c249
-rw-r--r--src/vnet/fib/ip4_fib_hash.h74
-rw-r--r--src/vnet/fib/ip6_fib.c9
-rw-r--r--src/vnet/fib/mpls_fib.c28
-rw-r--r--src/vnet/fib/mpls_fib.h4
-rw-r--r--src/vnet/flow/FEATURE.yaml4
-rw-r--r--src/vnet/flow/flow.api31
-rw-r--r--src/vnet/flow/flow.c2
-rw-r--r--src/vnet/flow/flow.h161
-rw-r--r--src/vnet/flow/flow_api.c97
-rw-r--r--src/vnet/flow/flow_cli.c204
-rw-r--r--src/vnet/flow/flow_types.api109
-rw-r--r--src/vnet/gre/packet.h2
-rw-r--r--src/vnet/gso/FEATURE.yaml2
-rw-r--r--src/vnet/gso/cli.c2
-rw-r--r--src/vnet/gso/gro_func.h60
-rw-r--r--src/vnet/gso/gso.h273
-rw-r--r--src/vnet/gso/gso.rst154
-rw-r--r--src/vnet/gso/hdr_offset_parser.h10
-rw-r--r--src/vnet/gso/node.c207
-rw-r--r--src/vnet/handoff.c130
-rw-r--r--src/vnet/hash/FEATURE.yaml9
-rw-r--r--src/vnet/hash/cli.c33
-rw-r--r--src/vnet/hash/crc32_5tuple.c168
-rw-r--r--src/vnet/hash/handoff_eth.c (renamed from src/vnet/handoff.h)116
-rw-r--r--src/vnet/hash/hash.c76
-rw-r--r--src/vnet/hash/hash.h59
-rw-r--r--src/vnet/hash/hash.rst90
-rw-r--r--src/vnet/hash/hash_eth.c326
-rw-r--r--src/vnet/hdlc/hdlc.c2
-rw-r--r--src/vnet/hdlc/node.c2
-rw-r--r--src/vnet/interface.api150
-rw-r--r--src/vnet/interface.c224
-rw-r--r--src/vnet/interface.h149
-rw-r--r--src/vnet/interface/caps.c63
-rw-r--r--src/vnet/interface/monitor.c121
-rw-r--r--src/vnet/interface/runtime.c130
-rw-r--r--src/vnet/interface/rx_queue.c25
-rw-r--r--src/vnet/interface/rx_queue_funcs.h12
-rw-r--r--src/vnet/interface/stats.c84
-rw-r--r--src/vnet/interface/tx_queue.rst159
-rw-r--r--src/vnet/interface/tx_queue_funcs.h17
-rw-r--r--src/vnet/interface_api.c512
-rw-r--r--src/vnet/interface_cli.c481
-rw-r--r--src/vnet/interface_format.c28
-rw-r--r--src/vnet/interface_funcs.h53
-rw-r--r--src/vnet/interface_output.c420
-rw-r--r--src/vnet/interface_output.h31
-rw-r--r--src/vnet/interface_stats.c2
-rw-r--r--src/vnet/interface_test.c1316
-rw-r--r--src/vnet/ip-neighbor/ip4_neighbor.c75
-rw-r--r--src/vnet/ip-neighbor/ip4_neighbor.h24
-rw-r--r--src/vnet/ip-neighbor/ip6_neighbor.c70
-rw-r--r--src/vnet/ip-neighbor/ip6_neighbor.h23
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor.api115
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor.c270
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor.h8
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor_api.c28
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor_types.c47
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor_types.h30
-rw-r--r--src/vnet/ip-neighbor/ip_neighbor_watch.c10
-rw-r--r--src/vnet/ip/icmp4.c73
-rw-r--r--src/vnet/ip/icmp4.h23
-rw-r--r--src/vnet/ip/icmp46_packet.h4
-rw-r--r--src/vnet/ip/icmp6.c251
-rw-r--r--src/vnet/ip/icmp6.h42
-rw-r--r--src/vnet/ip/ip.api948
-rw-r--r--src/vnet/ip/ip.c19
-rw-r--r--src/vnet/ip/ip.h12
-rw-r--r--src/vnet/ip/ip4.h10
-rw-r--r--src/vnet/ip/ip46_address.h2
-rw-r--r--src/vnet/ip/ip46_cli.c10
-rw-r--r--src/vnet/ip/ip4_error.h108
-rw-r--r--src/vnet/ip/ip4_forward.c333
-rw-r--r--src/vnet/ip/ip4_forward.h59
-rw-r--r--src/vnet/ip/ip4_inlines.h26
-rw-r--r--src/vnet/ip/ip4_input.c13
-rw-r--r--src/vnet/ip/ip4_input.h19
-rw-r--r--src/vnet/ip/ip4_mtrie.c531
-rw-r--r--src/vnet/ip/ip4_mtrie.h148
-rw-r--r--src/vnet/ip/ip4_options.c9
-rw-r--r--src/vnet/ip/ip4_packet.h50
-rw-r--r--src/vnet/ip/ip4_punt_drop.c65
-rw-r--r--src/vnet/ip/ip4_source_and_port_range_check.c30
-rw-r--r--src/vnet/ip/ip4_to_ip6.h2
-rw-r--r--src/vnet/ip/ip6.h6
-rw-r--r--src/vnet/ip/ip6_error.h106
-rw-r--r--src/vnet/ip/ip6_format.c4
-rw-r--r--src/vnet/ip/ip6_forward.c185
-rw-r--r--src/vnet/ip/ip6_hop_by_hop.c18
-rw-r--r--src/vnet/ip/ip6_inlines.h103
-rw-r--r--src/vnet/ip/ip6_input.c12
-rw-r--r--src/vnet/ip/ip6_input.h2
-rw-r--r--src/vnet/ip/ip6_link.c25
-rw-r--r--src/vnet/ip/ip6_ll_table.c40
-rw-r--r--src/vnet/ip/ip6_ll_types.c6
-rw-r--r--src/vnet/ip/ip6_packet.h357
-rw-r--r--src/vnet/ip/ip6_punt_drop.c57
-rw-r--r--src/vnet/ip/ip6_to_ip4.h56
-rw-r--r--src/vnet/ip/ip_api.c368
-rw-r--r--src/vnet/ip/ip_checksum.c2
-rw-r--r--src/vnet/ip/ip_container_proxy.c32
-rw-r--r--src/vnet/ip/ip_flow_hash.h12
-rw-r--r--src/vnet/ip/ip_frag.c82
-rw-r--r--src/vnet/ip/ip_frag.h20
-rw-r--r--src/vnet/ip/ip_in_out_acl.c545
-rw-r--r--src/vnet/ip/ip_init.c2
-rw-r--r--src/vnet/ip/ip_interface.c18
-rw-r--r--src/vnet/ip/ip_interface.h5
-rwxr-xr-x[-rw-r--r--]src/vnet/ip/ip_packet.h108
-rw-r--r--src/vnet/ip/ip_path_mtu.c28
-rw-r--r--src/vnet/ip/ip_path_mtu.h3
-rw-r--r--src/vnet/ip/ip_path_mtu_node.c9
-rw-r--r--src/vnet/ip/ip_psh_cksum.h55
-rw-r--r--src/vnet/ip/ip_punt_drop.c11
-rw-r--r--src/vnet/ip/ip_punt_drop.h5
-rw-r--r--src/vnet/ip/ip_sas.c219
-rw-r--r--src/vnet/ip/ip_sas.h32
-rw-r--r--src/vnet/ip/ip_test.c1578
-rw-r--r--src/vnet/ip/ip_types.c32
-rw-r--r--src/vnet/ip/ip_types.h8
-rw-r--r--src/vnet/ip/lookup.c231
-rw-r--r--src/vnet/ip/lookup.h5
-rw-r--r--src/vnet/ip/punt.c77
-rw-r--r--src/vnet/ip/punt.h6
-rw-r--r--src/vnet/ip/punt_api.c2
-rw-r--r--src/vnet/ip/punt_node.c79
-rw-r--r--src/vnet/ip/reass/ip4_full_reass.c801
-rw-r--r--src/vnet/ip/reass/ip4_full_reass.h3
-rw-r--r--src/vnet/ip/reass/ip4_sv_reass.c535
-rw-r--r--src/vnet/ip/reass/ip4_sv_reass.h1
-rw-r--r--src/vnet/ip/reass/ip6_full_reass.c819
-rw-r--r--src/vnet/ip/reass/ip6_full_reass.h2
-rw-r--r--src/vnet/ip/reass/ip6_sv_reass.c395
-rw-r--r--src/vnet/ip/reass/ip6_sv_reass.h1
-rw-r--r--src/vnet/ip/reass/reassembly.rst221
-rw-r--r--src/vnet/ip/vtep.h6
-rw-r--r--src/vnet/ip6-nd/FEATURE.yaml5
-rw-r--r--src/vnet/ip6-nd/ip6_mld.c14
-rw-r--r--src/vnet/ip6-nd/ip6_nd.api144
-rw-r--r--src/vnet/ip6-nd/ip6_nd.c21
-rw-r--r--src/vnet/ip6-nd/ip6_nd.h1
-rw-r--r--src/vnet/ip6-nd/ip6_nd_api.c205
-rw-r--r--src/vnet/ip6-nd/ip6_nd_inline.h13
-rw-r--r--src/vnet/ip6-nd/ip6_nd_mirror_proxy.c414
-rw-r--r--src/vnet/ip6-nd/ip6_nd_proxy.c8
-rw-r--r--src/vnet/ip6-nd/ip6_nd_test.c65
-rw-r--r--src/vnet/ip6-nd/ip6_ra.c234
-rw-r--r--src/vnet/ip6-nd/ip6_ra.h111
-rw-r--r--src/vnet/ip6-nd/rd_cp.c16
-rw-r--r--src/vnet/ip6-nd/rd_cp_api.c1
-rw-r--r--src/vnet/ipfix-export/flow_api.c257
-rw-r--r--src/vnet/ipfix-export/flow_report.c651
-rw-r--r--src/vnet/ipfix-export/flow_report.h154
-rw-r--r--src/vnet/ipfix-export/flow_report_classify.c46
-rw-r--r--src/vnet/ipfix-export/flow_report_classify.h17
-rw-r--r--src/vnet/ipfix-export/ipfix_doc.md355
-rw-r--r--src/vnet/ipfix-export/ipfix_doc.rst360
-rw-r--r--src/vnet/ipfix-export/ipfix_export.api74
-rw-r--r--src/vnet/ipip/ipip.c70
-rw-r--r--src/vnet/ipip/ipip_api.c53
-rw-r--r--src/vnet/ipip/ipip_cli.c12
-rw-r--r--src/vnet/ipip/node.c2
-rw-r--r--src/vnet/ipip/sixrd.c12
-rw-r--r--src/vnet/ipsec/FEATURE.yaml2
-rw-r--r--src/vnet/ipsec/ah.h57
-rw-r--r--src/vnet/ipsec/ah_decrypt.c122
-rw-r--r--src/vnet/ipsec/ah_encrypt.c49
-rw-r--r--src/vnet/ipsec/esp.h134
-rw-r--r--src/vnet/ipsec/esp_decrypt.c437
-rw-r--r--src/vnet/ipsec/esp_encrypt.c480
-rw-r--r--src/vnet/ipsec/ipsec.api473
-rw-r--r--src/vnet/ipsec/ipsec.c321
-rw-r--r--src/vnet/ipsec/ipsec.h146
-rw-r--r--src/vnet/ipsec/ipsec.rst4
-rw-r--r--src/vnet/ipsec/ipsec_api.c413
-rw-r--r--src/vnet/ipsec/ipsec_cli.c135
-rw-r--r--src/vnet/ipsec/ipsec_format.c220
-rw-r--r--src/vnet/ipsec/ipsec_handoff.c2
-rw-r--r--src/vnet/ipsec/ipsec_input.c562
-rw-r--r--src/vnet/ipsec/ipsec_itf.c43
-rw-r--r--src/vnet/ipsec/ipsec_itf.h2
-rw-r--r--src/vnet/ipsec/ipsec_output.c143
-rw-r--r--src/vnet/ipsec/ipsec_output.h489
-rw-r--r--src/vnet/ipsec/ipsec_punt.h3
-rw-r--r--src/vnet/ipsec/ipsec_sa.c368
-rw-r--r--src/vnet/ipsec/ipsec_sa.h571
-rw-r--r--src/vnet/ipsec/ipsec_spd.c144
-rw-r--r--src/vnet/ipsec/ipsec_spd.h33
-rw-r--r--src/vnet/ipsec/ipsec_spd_fp_lookup.h579
-rw-r--r--src/vnet/ipsec/ipsec_spd_policy.c876
-rw-r--r--src/vnet/ipsec/ipsec_spd_policy.h153
-rw-r--r--src/vnet/ipsec/ipsec_test.c755
-rw-r--r--src/vnet/ipsec/ipsec_tun.c207
-rw-r--r--src/vnet/ipsec/ipsec_tun.h4
-rw-r--r--src/vnet/ipsec/ipsec_tun_in.c135
-rw-r--r--src/vnet/ipsec/ipsec_types.api145
-rw-r--r--src/vnet/l2/feat_bitmap.c2
-rw-r--r--src/vnet/l2/l2.api49
-rw-r--r--src/vnet/l2/l2_api.c51
-rw-r--r--src/vnet/l2/l2_arp_term.c6
-rw-r--r--src/vnet/l2/l2_bd.c65
-rw-r--r--src/vnet/l2/l2_bd.h3
-rw-r--r--src/vnet/l2/l2_bvi.c25
-rw-r--r--src/vnet/l2/l2_classify.h1
-rw-r--r--src/vnet/l2/l2_efp_filter.c4
-rw-r--r--src/vnet/l2/l2_fib.c25
-rw-r--r--src/vnet/l2/l2_fib.h26
-rw-r--r--src/vnet/l2/l2_flood.c4
-rw-r--r--src/vnet/l2/l2_fwd.c9
-rw-r--r--src/vnet/l2/l2_in_out_acl.c6
-rw-r--r--src/vnet/l2/l2_in_out_feat_arc.c7
-rw-r--r--src/vnet/l2/l2_input.c12
-rw-r--r--src/vnet/l2/l2_input.h13
-rw-r--r--src/vnet/l2/l2_input_classify.c14
-rw-r--r--src/vnet/l2/l2_input_node.c17
-rw-r--r--src/vnet/l2/l2_input_vtr.c2
-rw-r--r--src/vnet/l2/l2_learn.c4
-rw-r--r--src/vnet/l2/l2_output.c5
-rw-r--r--src/vnet/l2/l2_output.h3
-rw-r--r--src/vnet/l2/l2_output_classify.c13
-rw-r--r--src/vnet/l2/l2_patch.c10
-rw-r--r--src/vnet/l2/l2_rw.c82
-rw-r--r--src/vnet/l2/l2_rw.h4
-rw-r--r--src/vnet/l2/l2_test.c1435
-rw-r--r--src/vnet/l2/l2_uu_fwd.c2
-rw-r--r--src/vnet/l2/l2_vtr.c4
-rw-r--r--src/vnet/l2/l2_xcrw.c18
-rw-r--r--src/vnet/lawful-intercept/lawful_intercept.c124
-rw-r--r--src/vnet/lawful-intercept/node.c288
-rw-r--r--src/vnet/llc/llc.c2
-rw-r--r--src/vnet/llc/node.c2
-rw-r--r--src/vnet/mfib/.clang-format2
-rw-r--r--src/vnet/mfib/ip4_mfib.c45
-rw-r--r--src/vnet/mfib/ip6_mfib.c13
-rw-r--r--src/vnet/mfib/mfib_entry.c13
-rw-r--r--src/vnet/mfib/mfib_entry_src.h2
-rw-r--r--src/vnet/mfib/mfib_entry_src_rr.c24
-rw-r--r--src/vnet/mfib/mfib_forward.c50
-rw-r--r--src/vnet/mfib/mfib_itf.c10
-rw-r--r--src/vnet/mfib/mfib_table.c10
-rw-r--r--src/vnet/mfib/mfib_table.h2
-rw-r--r--src/vnet/mfib/mfib_types.c4
-rw-r--r--src/vnet/mfib/mfib_types.h34
-rw-r--r--src/vnet/misc.c9
-rw-r--r--src/vnet/mpls/error.def32
-rw-r--r--src/vnet/mpls/interface.c43
-rw-r--r--src/vnet/mpls/mpls.api122
-rw-r--r--src/vnet/mpls/mpls.c15
-rw-r--r--src/vnet/mpls/mpls.h26
-rw-r--r--src/vnet/mpls/mpls_api.c69
-rw-r--r--src/vnet/mpls/mpls_features.c1
-rw-r--r--src/vnet/mpls/mpls_input.c11
-rw-r--r--src/vnet/mpls/mpls_lookup.c244
-rw-r--r--src/vnet/mpls/mpls_output.c253
-rw-r--r--src/vnet/mpls/mpls_tunnel.c17
-rw-r--r--src/vnet/mtu.rst108
-rw-r--r--src/vnet/osi/node.c2
-rw-r--r--src/vnet/pg/cli.c38
-rw-r--r--src/vnet/pg/example.script10
-rw-r--r--src/vnet/pg/input.c36
-rw-r--r--src/vnet/pg/pg.api4
-rw-r--r--src/vnet/pg/pg.h6
-rw-r--r--src/vnet/pg/pg_api.c2
-rw-r--r--src/vnet/pg/stream.c31
-rw-r--r--src/vnet/policer/node_funcs.c80
-rw-r--r--src/vnet/policer/police.h8
-rw-r--r--src/vnet/policer/police_inlines.h2
-rw-r--r--src/vnet/policer/policer.api107
-rw-r--r--src/vnet/policer/policer.c558
-rw-r--r--src/vnet/policer/policer.h22
-rw-r--r--src/vnet/policer/policer.rst217
-rw-r--r--src/vnet/policer/policer_api.c412
-rw-r--r--src/vnet/policer/policer_types.api28
-rw-r--r--src/vnet/policer/xlate.c2
-rw-r--r--src/vnet/policer/xlate.h2
-rw-r--r--src/vnet/ppp/node.c2
-rw-r--r--src/vnet/ppp/ppp.c2
-rw-r--r--src/vnet/qos/FEATURE.yaml2
-rw-r--r--src/vnet/qos/qos.api2
-rw-r--r--src/vnet/qos/qos_egress_map.c10
-rw-r--r--src/vnet/qos/qos_mark.c4
-rw-r--r--src/vnet/qos/qos_mark_node.c2
-rw-r--r--src/vnet/qos/qos_record.c4
-rw-r--r--src/vnet/qos/qos_record_node.c2
-rw-r--r--src/vnet/qos/qos_store.c10
-rw-r--r--src/vnet/qos/qos_store_node.c2
-rw-r--r--src/vnet/session/application.c306
-rw-r--r--src/vnet/session/application.h87
-rw-r--r--src/vnet/session/application_interface.c26
-rw-r--r--src/vnet/session/application_interface.h135
-rw-r--r--src/vnet/session/application_local.c578
-rw-r--r--src/vnet/session/application_local.h3
-rw-r--r--src/vnet/session/application_namespace.c203
-rw-r--r--src/vnet/session/application_namespace.h19
-rw-r--r--src/vnet/session/application_worker.c434
-rw-r--r--src/vnet/session/mma_template.c5
-rw-r--r--src/vnet/session/mma_template.h2
-rw-r--r--src/vnet/session/segment_manager.c399
-rw-r--r--src/vnet/session/segment_manager.h21
-rw-r--r--src/vnet/session/session.api122
-rw-r--r--src/vnet/session/session.c1036
-rw-r--r--src/vnet/session/session.h368
-rw-r--r--src/vnet/session/session_api.c822
-rw-r--r--src/vnet/session/session_cli.c151
-rw-r--r--src/vnet/session/session_debug.c125
-rw-r--r--src/vnet/session/session_debug.h203
-rw-r--r--src/vnet/session/session_input.c343
-rw-r--r--src/vnet/session/session_lookup.c182
-rw-r--r--src/vnet/session/session_lookup.h7
-rw-r--r--src/vnet/session/session_node.c706
-rw-r--r--src/vnet/session/session_rules_table.c23
-rw-r--r--src/vnet/session/session_rules_table.h9
-rw-r--r--src/vnet/session/session_table.c86
-rw-r--r--src/vnet/session/session_table.h5
-rw-r--r--src/vnet/session/session_test.c363
-rw-r--r--src/vnet/session/session_types.h152
-rw-r--r--src/vnet/session/transport.c325
-rw-r--r--src/vnet/session/transport.h44
-rw-r--r--src/vnet/session/transport_types.h54
-rw-r--r--src/vnet/snap/node.c2
-rw-r--r--src/vnet/snap/snap.h2
-rw-r--r--src/vnet/span/node.c4
-rw-r--r--src/vnet/span/span.c9
-rw-r--r--src/vnet/span/span_api.c2
-rw-r--r--src/vnet/span/span_doc.md65
-rw-r--r--src/vnet/span/span_doc.rst84
-rw-r--r--[-rwxr-xr-x]src/vnet/srmpls/dir.dox0
-rw-r--r--src/vnet/srmpls/sr_doc.md121
-rw-r--r--src/vnet/srmpls/sr_doc.rst215
-rw-r--r--src/vnet/srmpls/sr_mpls.h2
-rw-r--r--src/vnet/srmpls/sr_mpls_api.c56
-rw-r--r--src/vnet/srmpls/sr_mpls_policy.c18
-rw-r--r--src/vnet/srmpls/sr_mpls_steering.c8
-rw-r--r--src/vnet/srmpls/sr_mpls_test.c174
-rw-r--r--src/vnet/srp/node.c8
-rw-r--r--src/vnet/srp/packet.h3
-rw-r--r--[-rwxr-xr-x]src/vnet/srv6/dir.dox0
-rw-r--r--src/vnet/srv6/sr.api116
-rw-r--r--src/vnet/srv6/sr.h24
-rw-r--r--src/vnet/srv6/sr_api.c256
-rw-r--r--src/vnet/srv6/sr_doc.md63
-rw-r--r--src/vnet/srv6/sr_doc.rst123
-rw-r--r--src/vnet/srv6/sr_localsid.c22
-rw-r--r--src/vnet/srv6/sr_localsid.md58
-rw-r--r--src/vnet/srv6/sr_localsid.rst90
-rw-r--r--src/vnet/srv6/sr_packet.h18
-rw-r--r--src/vnet/srv6/sr_policy.md60
-rw-r--r--src/vnet/srv6/sr_policy.rst96
-rw-r--r--src/vnet/srv6/sr_policy_rewrite.c257
-rw-r--r--src/vnet/srv6/sr_pt.api59
-rw-r--r--src/vnet/srv6/sr_pt.c281
-rw-r--r--src/vnet/srv6/sr_pt.h89
-rw-r--r--src/vnet/srv6/sr_pt_api.c97
-rw-r--r--src/vnet/srv6/sr_pt_node.c175
-rw-r--r--src/vnet/srv6/sr_steering.c13
-rw-r--r--src/vnet/srv6/sr_steering.md35
-rw-r--r--src/vnet/srv6/sr_steering.rst50
-rw-r--r--src/vnet/srv6/sr_test.c218
-rw-r--r--src/vnet/syslog/sylog_doc.md65
-rw-r--r--src/vnet/syslog/sylog_doc.rst70
-rw-r--r--src/vnet/syslog/syslog.c2
-rw-r--r--src/vnet/syslog/syslog_api.c4
-rw-r--r--src/vnet/tcp/tcp.c140
-rw-r--r--src/vnet/tcp/tcp.h24
-rw-r--r--src/vnet/tcp/tcp_bt.c2
-rw-r--r--src/vnet/tcp/tcp_cli.c23
-rw-r--r--src/vnet/tcp/tcp_cubic.c26
-rw-r--r--src/vnet/tcp/tcp_debug.c4
-rw-r--r--src/vnet/tcp/tcp_debug.h62
-rw-r--r--src/vnet/tcp/tcp_error.def75
-rw-r--r--src/vnet/tcp/tcp_format.c89
-rw-r--r--src/vnet/tcp/tcp_inlines.h37
-rw-r--r--src/vnet/tcp/tcp_input.c653
-rw-r--r--src/vnet/tcp/tcp_newreno.c43
-rw-r--r--src/vnet/tcp/tcp_output.c321
-rw-r--r--src/vnet/tcp/tcp_packet.h97
-rw-r--r--src/vnet/tcp/tcp_pg.c181
-rw-r--r--src/vnet/tcp/tcp_sack.c21
-rw-r--r--src/vnet/tcp/tcp_sack.h2
-rw-r--r--src/vnet/tcp/tcp_syn_filter4.c22
-rw-r--r--src/vnet/tcp/tcp_timer.h53
-rw-r--r--src/vnet/tcp/tcp_types.h2
-rw-r--r--src/vnet/teib/teib.c91
-rw-r--r--src/vnet/teib/teib_cli.c6
-rw-r--r--src/vnet/tls/tls.c380
-rw-r--r--src/vnet/tls/tls.h54
-rw-r--r--src/vnet/tunnel/tunnel.c27
-rw-r--r--src/vnet/tunnel/tunnel_types_api.c9
-rw-r--r--src/vnet/udp/udp.api2
-rw-r--r--src/vnet/udp/udp.c433
-rw-r--r--src/vnet/udp/udp.h123
-rw-r--r--src/vnet/udp/udp_api.c23
-rw-r--r--src/vnet/udp/udp_cli.c233
-rw-r--r--src/vnet/udp/udp_encap.c57
-rw-r--r--src/vnet/udp/udp_encap.h3
-rw-r--r--src/vnet/udp/udp_encap_node.c138
-rw-r--r--src/vnet/udp/udp_error.def23
-rw-r--r--src/vnet/udp/udp_inlines.h107
-rw-r--r--src/vnet/udp/udp_input.c88
-rw-r--r--src/vnet/udp/udp_local.c228
-rw-r--r--src/vnet/udp/udp_output.c254
-rw-r--r--src/vnet/unix/gdb_funcs.c8
-rw-r--r--src/vnet/unix/tuntap.c34
-rw-r--r--src/vnet/util/throttle.c5
-rw-r--r--src/vnet/util/throttle.h19
-rw-r--r--src/vnet/vnet.h2
-rw-r--r--src/vnet/vxlan-gbp/decap.c1050
-rw-r--r--src/vnet/vxlan-gbp/encap.c601
-rw-r--r--src/vnet/vxlan-gbp/vxlan_gbp.api100
-rw-r--r--src/vnet/vxlan-gbp/vxlan_gbp.c1192
-rw-r--r--src/vnet/vxlan-gbp/vxlan_gbp.h250
-rw-r--r--src/vnet/vxlan-gbp/vxlan_gbp_api.c217
-rw-r--r--src/vnet/vxlan-gbp/vxlan_gbp_packet.c60
-rw-r--r--src/vnet/vxlan-gbp/vxlan_gbp_packet.h173
-rw-r--r--src/vnet/vxlan-gpe/decap.c443
-rw-r--r--src/vnet/vxlan-gpe/encap.c51
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe.api59
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe.c141
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe.h34
-rw-r--r--src/vnet/vxlan-gpe/vxlan_gpe_api.c155
-rw-r--r--src/vpp-api/client/client.c26
-rw-r--r--src/vpp-api/client/stat_client.c67
-rw-r--r--src/vpp-api/client/stat_client.h17
-rw-r--r--src/vpp-api/client/test.c20
-rw-r--r--src/vpp-api/python/CMakeLists.txt39
-rw-r--r--src/vpp-api/python/README.rst0
-rw-r--r--src/vpp-api/python/setup.py23
-rw-r--r--src/vpp-api/python/vpp_papi/__init__.py5
-rw-r--r--src/vpp-api/python/vpp_papi/data/memclnt.api.json809
-rw-r--r--src/vpp-api/python/vpp_papi/macaddress.py18
-rw-r--r--src/vpp-api/python/vpp_papi/tests/test_macaddress.py6
-rw-r--r--src/vpp-api/python/vpp_papi/tests/test_vpp_format.py104
-rw-r--r--src/vpp-api/python/vpp_papi/tests/test_vpp_papi.py12
-rwxr-xr-xsrc/vpp-api/python/vpp_papi/tests/test_vpp_serializer.py811
-rw-r--r--src/vpp-api/python/vpp_papi/vpp_format.py216
-rw-r--r--src/vpp-api/python/vpp_papi/vpp_papi.py473
-rw-r--r--src/vpp-api/python/vpp_papi/vpp_serializer.py283
-rwxr-xr-xsrc/vpp-api/python/vpp_papi/vpp_stats.py383
-rw-r--r--src/vpp-api/python/vpp_papi/vpp_transport_socket.py41
-rw-r--r--src/vpp-api/vapi/CMakeLists.txt6
-rw-r--r--src/vpp-api/vapi/fake.api.json2
-rw-r--r--src/vpp-api/vapi/vapi.c1354
-rw-r--r--src/vpp-api/vapi/vapi.h119
-rw-r--r--src/vpp-api/vapi/vapi.hpp138
-rwxr-xr-xsrc/vpp-api/vapi/vapi_c_gen.py983
-rw-r--r--src/vpp-api/vapi/vapi_c_test.c168
-rw-r--r--src/vpp-api/vapi/vapi_common.h59
-rwxr-xr-xsrc/vpp-api/vapi/vapi_cpp_gen.py231
-rw-r--r--src/vpp-api/vapi/vapi_cpp_test.cpp85
-rw-r--r--src/vpp-api/vapi/vapi_doc.md155
-rw-r--r--src/vpp-api/vapi/vapi_doc.rst191
-rw-r--r--src/vpp-api/vapi/vapi_internal.h18
-rw-r--r--src/vpp-api/vapi/vapi_json_parser.py342
-rw-r--r--src/vpp/CMakeLists.txt38
-rw-r--r--src/vpp/api/api.c457
-rw-r--r--src/vpp/api/api_main.c39
-rw-r--r--src/vpp/api/api_test.c99
-rw-r--r--src/vpp/api/gmon.c6
-rw-r--r--src/vpp/api/json_format.h5
-rw-r--r--src/vpp/api/plugin.c4
-rw-r--r--src/vpp/api/test_client.c1538
-rw-r--r--src/vpp/api/test_ha.c245
-rw-r--r--src/vpp/api/types.c49
-rw-r--r--src/vpp/api/types.h5
-rw-r--r--src/vpp/api/vpe.api267
-rw-r--r--src/vpp/api/vpp_get_metrics.c1
-rw-r--r--src/vpp/app/version.c5
-rw-r--r--src/vpp/app/vpe_cli.c2
-rw-r--r--src/vpp/app/vpp_get_stats.c24
-rw-r--r--src/vpp/app/vpp_prometheus_export.c334
-rw-r--r--src/vpp/app/vppctl.c299
-rw-r--r--src/vpp/conf/startup.conf14
-rw-r--r--src/vpp/mem/mem.c2
-rw-r--r--src/vpp/mem/mem.md21
-rw-r--r--src/vpp/mem/mem.rst25
-rw-r--r--src/vpp/stats/stat_segment.c1115
-rw-r--r--src/vpp/stats/stat_segment.h123
-rw-r--r--src/vpp/stats/stat_segment_provider.c198
-rw-r--r--src/vpp/stats/stat_segment_shared.h66
-rw-r--r--src/vpp/stats/stats.md130
-rw-r--r--src/vpp/stats/stats.rst178
-rw-r--r--src/vpp/vnet/config.h.in3
-rw-r--r--src/vpp/vnet/main.c71
-rw-r--r--src/vppinfra/CMakeLists.txt80
-rw-r--r--src/vppinfra/atomics.h2
-rw-r--r--src/vppinfra/bihash_12_4.h89
-rw-r--r--src/vppinfra/bihash_16_8.h9
-rw-r--r--src/vppinfra/bihash_16_8_32.h9
-rw-r--r--src/vppinfra/bihash_24_16.h13
-rw-r--r--src/vppinfra/bihash_24_8.h13
-rw-r--r--src/vppinfra/bihash_32_8.h13
-rw-r--r--src/vppinfra/bihash_40_8.h14
-rw-r--r--src/vppinfra/bihash_48_8.h13
-rw-r--r--src/vppinfra/bihash_8_16.h11
-rw-r--r--src/vppinfra/bihash_8_8.h8
-rw-r--r--src/vppinfra/bihash_8_8_stats.h8
-rw-r--r--src/vppinfra/bihash_doc.h216
-rw-r--r--src/vppinfra/bihash_template.c96
-rw-r--r--src/vppinfra/bihash_template.h28
-rw-r--r--src/vppinfra/bihash_vec8_8.h8
-rw-r--r--src/vppinfra/bitmap.h101
-rw-r--r--src/vppinfra/bitops.h207
-rw-r--r--src/vppinfra/byte_order.h2
-rw-r--r--src/vppinfra/cJSON.c85
-rw-r--r--src/vppinfra/cJSON.h4
-rw-r--r--src/vppinfra/cache.h60
-rw-r--r--src/vppinfra/clib.h163
-rw-r--r--src/vppinfra/config.h.in2
-rw-r--r--src/vppinfra/cpu.c260
-rw-r--r--src/vppinfra/cpu.h212
-rw-r--r--src/vppinfra/crc32.h159
-rw-r--r--src/vppinfra/crypto/aes.h (renamed from src/plugins/crypto_native/aes.h)160
-rw-r--r--src/vppinfra/crypto/aes_cbc.h542
-rw-r--r--src/vppinfra/crypto/aes_ctr.h190
-rw-r--r--src/vppinfra/crypto/aes_gcm.h944
-rw-r--r--src/vppinfra/crypto/ghash.h (renamed from src/plugins/crypto_native/ghash.h)206
-rw-r--r--src/vppinfra/crypto/poly1305.h234
-rw-r--r--src/vppinfra/crypto/sha2.h715
-rw-r--r--src/vppinfra/dlmalloc.c62
-rw-r--r--src/vppinfra/dlmalloc.h2
-rw-r--r--src/vppinfra/elf.c8
-rw-r--r--src/vppinfra/elf.h8
-rw-r--r--src/vppinfra/elf_clib.c25
-rw-r--r--src/vppinfra/elf_clib.h2
-rw-r--r--src/vppinfra/elog.c4
-rw-r--r--src/vppinfra/error.c8
-rw-r--r--src/vppinfra/error.h7
-rw-r--r--src/vppinfra/error_bootstrap.h5
-rw-r--r--src/vppinfra/fifo.c17
-rw-r--r--src/vppinfra/fifo.h103
-rw-r--r--src/vppinfra/file.h2
-rw-r--r--src/vppinfra/format.c2
-rw-r--r--src/vppinfra/format.h55
-rw-r--r--src/vppinfra/format_table.c (renamed from src/plugins/perfmon/table.c)42
-rw-r--r--src/vppinfra/format_table.h (renamed from src/plugins/perfmon/table.h)56
-rw-r--r--src/vppinfra/freebsd/mem.c471
-rw-r--r--src/vppinfra/graph.c182
-rw-r--r--src/vppinfra/graph.h127
-rw-r--r--src/vppinfra/hash.c262
-rw-r--r--src/vppinfra/hash.h40
-rw-r--r--src/vppinfra/heap.c24
-rw-r--r--src/vppinfra/heap.h44
-rw-r--r--src/vppinfra/interrupt.c99
-rw-r--r--src/vppinfra/interrupt.h141
-rw-r--r--src/vppinfra/jsonformat.c (renamed from src/vat2/jsonconvert.c)63
-rw-r--r--src/vppinfra/jsonformat.h114
-rw-r--r--src/vppinfra/lb_hash_hash.h14
-rw-r--r--src/vppinfra/linux/mem.c84
-rw-r--r--src/vppinfra/linux/sysfs.c46
-rw-r--r--src/vppinfra/linux/sysfs.h5
-rw-r--r--src/vppinfra/longjmp.S50
-rw-r--r--src/vppinfra/longjmp.h3
-rw-r--r--src/vppinfra/macros.c9
-rw-r--r--src/vppinfra/mem.h197
-rw-r--r--src/vppinfra/mem_bulk.c10
-rw-r--r--src/vppinfra/mem_dlmalloc.c360
-rw-r--r--src/vppinfra/memcpy.h43
-rw-r--r--src/vppinfra/memcpy_avx2.h249
-rw-r--r--src/vppinfra/memcpy_avx512.h285
-rw-r--r--src/vppinfra/memcpy_sse3.h368
-rw-r--r--src/vppinfra/memcpy_x86_64.h613
-rw-r--r--src/vppinfra/mhash.c6
-rw-r--r--src/vppinfra/mpcap.c2
-rw-r--r--src/vppinfra/pcap.c4
-rw-r--r--src/vppinfra/pcg.h85
-rw-r--r--src/vppinfra/perfmon/bundle_core_power.c48
-rw-r--r--src/vppinfra/perfmon/bundle_default.c61
-rw-r--r--src/vppinfra/perfmon/perfmon.c230
-rw-r--r--src/vppinfra/perfmon/perfmon.h137
-rw-r--r--src/vppinfra/pmalloc.c57
-rw-r--r--src/vppinfra/pool.c90
-rw-r--r--src/vppinfra/pool.h498
-rw-r--r--src/vppinfra/random_buffer.h18
-rw-r--r--src/vppinfra/ring.h26
-rw-r--r--src/vppinfra/sanitizer.c7
-rw-r--r--src/vppinfra/sanitizer.h141
-rw-r--r--src/vppinfra/serialize.c24
-rw-r--r--src/vppinfra/sha2.h637
-rw-r--r--src/vppinfra/socket.c620
-rw-r--r--src/vppinfra/socket.h56
-rw-r--r--src/vppinfra/sparse_vec.h27
-rw-r--r--src/vppinfra/std-formats.c166
-rw-r--r--src/vppinfra/string.h358
-rw-r--r--src/vppinfra/test/aes_cbc.c187
-rw-r--r--src/vppinfra/test/aes_ctr.c481
-rw-r--r--src/vppinfra/test/aes_gcm.c1177
-rw-r--r--src/vppinfra/test/array_mask.c (renamed from src/vppinfra/vector/test/array_mask.c)45
-rw-r--r--src/vppinfra/test/compress.c266
-rw-r--r--src/vppinfra/test/count_equal.c104
-rw-r--r--src/vppinfra/test/crc32c.c54
-rw-r--r--src/vppinfra/test/index_to_ptr.c58
-rw-r--r--src/vppinfra/test/ip_csum.c169
-rw-r--r--src/vppinfra/test/mask_compare.c (renamed from src/vppinfra/vector/test/mask_compare.c)50
-rw-r--r--src/vppinfra/test/memcpy_x86_64.c142
-rw-r--r--src/vppinfra/test/poly1305.c268
-rw-r--r--src/vppinfra/test/sha2.c322
-rw-r--r--src/vppinfra/test/test.c259
-rw-r--r--src/vppinfra/test/test.h125
-rw-r--r--src/vppinfra/test/toeplitz.c514
-rw-r--r--src/vppinfra/test_bihash_template.c72
-rw-r--r--src/vppinfra/test_fifo.c2
-rw-r--r--src/vppinfra/test_fpool.c2
-rw-r--r--src/vppinfra/test_hash.c4
-rw-r--r--src/vppinfra/test_heap.c13
-rw-r--r--src/vppinfra/test_interrupt.c78
-rw-r--r--src/vppinfra/test_longjmp.c26
-rw-r--r--src/vppinfra/test_mheap.c286
-rw-r--r--src/vppinfra/test_pool_alloc.c56
-rw-r--r--src/vppinfra/test_pool_iterate.c2
-rw-r--r--src/vppinfra/test_serialize.c46
-rw-r--r--src/vppinfra/test_socket.c2
-rw-r--r--src/vppinfra/test_tw_timer.c32
-rw-r--r--src/vppinfra/test_vec.c77
-rw-r--r--src/vppinfra/time.c51
-rw-r--r--src/vppinfra/time.h9
-rw-r--r--src/vppinfra/timing_wheel.c20
-rw-r--r--src/vppinfra/tw_timer_template.c4
-rw-r--r--src/vppinfra/types.h62
-rw-r--r--src/vppinfra/unformat.c147
-rw-r--r--src/vppinfra/unix-formats.c19
-rw-r--r--src/vppinfra/unix-misc.c157
-rw-r--r--src/vppinfra/unix.h22
-rw-r--r--src/vppinfra/vec.c281
-rw-r--r--src/vppinfra/vec.h993
-rw-r--r--src/vppinfra/vec_bootstrap.h185
-rw-r--r--src/vppinfra/vector.h125
-rw-r--r--src/vppinfra/vector/array_mask.h117
-rw-r--r--src/vppinfra/vector/compress.h287
-rw-r--r--src/vppinfra/vector/count_equal.h306
-rw-r--r--src/vppinfra/vector/index_to_ptr.h257
-rw-r--r--src/vppinfra/vector/ip_csum.h339
-rw-r--r--src/vppinfra/vector/mask_compare.h207
-rw-r--r--src/vppinfra/vector/test/compress.c81
-rw-r--r--src/vppinfra/vector/test/test.c53
-rw-r--r--src/vppinfra/vector/test/test.h35
-rw-r--r--src/vppinfra/vector/toeplitz.c122
-rw-r--r--src/vppinfra/vector/toeplitz.h513
-rw-r--r--src/vppinfra/vector_avx2.h118
-rw-r--r--src/vppinfra/vector_avx512.h237
-rw-r--r--src/vppinfra/vector_neon.h81
-rw-r--r--src/vppinfra/vector_sse42.h90
2029 files changed, 179828 insertions, 91053 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 49ea0373eb4..68d0a4fe64e 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -11,30 +11,38 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-cmake_minimum_required(VERSION 3.10)
-
-set(CMAKE_C_COMPILER_NAMES
- clang-12
- clang-11
- clang-10
- clang-9
- gcc-10
- gcc-9
- cc
-)
+cmake_minimum_required(VERSION 3.13)
+
+if(DEFINED VPP_PLATFORM AND VPP_PLATFORM STREQUAL "default")
+ unset(VPP_PLATFORM)
+ unset(VPP_PLATFORM CACHE)
+ set(VPP_PLATFORM_NAME "default")
+elseif(DEFINED VPP_PLATFORM)
+ set(platform_file ${CMAKE_SOURCE_DIR}/cmake/platform/${VPP_PLATFORM}.cmake)
+ if(NOT EXISTS ${platform_file})
+ message(FATAL_ERROR "unknown platform ${VPP_PLATFORM}")
+ endif()
+ include(${platform_file})
+ set(VPP_PLATFORM_NAME ${VPP_PLATFORM})
+else()
+ set(VPP_PLATFORM_NAME "default")
+endif()
+
+if (DEFINED VPP_PLATFORM_C_COMPILER_NAMES)
+ set(CMAKE_C_COMPILER_NAMES ${VPP_PLATFORM_C_COMPILER_NAME})
+else()
+ set(CMAKE_C_COMPILER_NAMES clang gcc cc)
+endif()
project(vpp C)
-if(CMAKE_VERSION VERSION_LESS 3.12)
- macro(add_compile_definitions defs)
- foreach(d ${defs})
- add_compile_options(-D${d})
- endforeach()
- endmacro()
+if(NOT DEFINED CMAKE_INSTALL_LIBDIR AND EXISTS "/etc/debian_version")
+ set(CMAKE_INSTALL_LIBDIR "lib/${CMAKE_LIBRARY_ARCHITECTURE}")
endif()
include(CheckCCompilerFlag)
include(CheckIPOSupported)
+include(GNUInstallDirs)
include(cmake/misc.cmake)
include(cmake/cpu.cmake)
include(cmake/ccache.cmake)
@@ -48,35 +56,91 @@ execute_process(
OUTPUT_VARIABLE VPP_VERSION
OUTPUT_STRIP_TRAILING_WHITESPACE
)
+
+if (VPP_PLATFORM)
+ set(VPP_VERSION ${VPP_VERSION}-${VPP_PLATFORM_NAME})
+endif()
+
string(REPLACE "-" ";" VPP_LIB_VERSION ${VPP_VERSION})
list(GET VPP_LIB_VERSION 0 VPP_LIB_VERSION)
##############################################################################
+# compiler specifics
+##############################################################################
+
+set(MIN_SUPPORTED_CLANG_C_COMPILER_VERSION 9.0.0)
+set(MIN_SUPPORTED_GNU_C_COMPILER_VERSION 9.0.0)
+
+if(CMAKE_C_COMPILER_ID STREQUAL "Clang")
+ if (CMAKE_C_COMPILER_VERSION VERSION_LESS MIN_SUPPORTED_CLANG_C_COMPILER_VERSION)
+ set(COMPILER_TOO_OLD TRUE)
+ endif()
+elseif(CMAKE_C_COMPILER_ID STREQUAL "GNU")
+ if (CMAKE_C_COMPILER_VERSION VERSION_LESS MIN_SUPPORTED_GNU_C_COMPILER_VERSION)
+ set(COMPILER_TOO_OLD TRUE)
+ endif()
+ set(GCC_STRING_OVERFLOW_WARNING_DISABLE_VERSION 10.0.0)
+ if (CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL GCC_STRING_OVERFLOW_WARNING_DISABLE_VERSION)
+ add_compile_options(-Wno-stringop-overflow)
+ endif()
+ set(GCC_STRING_OVERREAD_WARNING_DISABLE_VERSION 12.0.0)
+ if (CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL GCC_STRING_OVERREAD_WARNING_DISABLE_VERSION)
+ add_compile_options(-Wno-stringop-overread)
+ endif()
+ set(GCC_ARRAY_BOUNDS_WARNING_DISABLE_VERSION 12.0.0)
+ if (CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL GCC_ARRAY_BOUNDS_WARNING_DISABLE_VERSION)
+ add_compile_options(-Wno-array-bounds)
+ endif()
+else()
+ message(WARNING "WARNING: Unsupported C compiler `${CMAKE_C_COMPILER_ID}` is used")
+ set (PRINT_MIN_C_COMPILER_VER TRUE)
+endif()
+if (COMPILER_TOO_OLD)
+ message(WARNING "WARNING: C compiler version is too old and it's usage may result")
+ message(WARNING " in sub-optimal binaries or lack of support for specific CPU types.")
+ set (PRINT_MIN_C_COMPILER_VER TRUE)
+endif()
+
+if (PRINT_MIN_C_COMPILER_VER)
+ string (APPEND _t "Supported C compilers are ")
+ string (APPEND _t "Clang ${MIN_SUPPORTED_CLANG_C_COMPILER_VERSION} or higher ")
+ string (APPEND _t "and GNU ${MIN_SUPPORTED_GNU_C_COMPILER_VERSION} or higher.")
+ message(WARNING " ${_t}")
+ unset (_t)
+endif()
+
+##############################################################################
# cross compiling
##############################################################################
+
+if (${CMAKE_SYSTEM_NAME} MATCHES "Linux")
+ set(COMPILER_SUFFIX "linux-gnu")
+elseif (${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD")
+ set(COMPILER_SUFFIX "freebsd")
+endif()
+
if(CMAKE_CROSSCOMPILING)
set(CMAKE_IGNORE_PATH
- /usr/lib/${CMAKE_HOST_SYSTEM_PROCESSOR}-linux-gnu/
- /usr/lib/${CMAKE_HOST_SYSTEM_PROCESSOR}-linux-gnu/lib/
+ /usr/lib/${CMAKE_HOST_SYSTEM_PROCESSOR}-${COMPILER_SUFFIX}/
+ /usr/lib/${CMAKE_HOST_SYSTEM_PROCESSOR}-${COMPILER_SUFFIX}/lib/
)
endif()
-set(CMAKE_C_COMPILER_TARGET ${CMAKE_SYSTEM_PROCESSOR}-linux-gnu)
-
+ set(CMAKE_C_COMPILER_TARGET ${CMAKE_SYSTEM_PROCESSOR}-${COMPILER_SUFFIX})
##############################################################################
# build config
##############################################################################
check_c_compiler_flag("-Wno-address-of-packed-member"
compiler_flag_no_address_of_packed_member)
-set(VPP_RUNTIME_DIR "bin" CACHE STRING "Relative runtime directory path")
-set(VPP_LIBRARY_DIR "lib" CACHE STRING "Relative library directory path")
+set(VPP_RUNTIME_DIR ${CMAKE_INSTALL_BINDIR} CACHE STRING "Relative runtime directory path")
+set(VPP_LIBRARY_DIR ${CMAKE_INSTALL_LIBDIR} CACHE STRING "Relative library directory path")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${VPP_RUNTIME_DIR})
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${VPP_LIBRARY_DIR})
set(VPP_BINARY_DIR ${CMAKE_BINARY_DIR}/CMakeFiles)
-set(PYENV PYTHONPYCACHEPREFIX=${CMAKE_BINARY_DIR}/CMakeFile/__pycache__)
+set(PYENV PYTHONPYCACHEPREFIX=${CMAKE_BINARY_DIR}/CMakeFiles/__pycache__)
if (CMAKE_BUILD_TYPE)
- add_compile_options(-g -fPIC -Werror -Wall)
+ add_compile_options(-g -Werror -Wall)
endif()
if (compiler_flag_no_address_of_packed_member)
@@ -92,7 +156,6 @@ set(CMAKE_C_FLAGS_DEBUG "")
if (${CMAKE_BUILD_TYPE_LC} MATCHES "release")
add_compile_options(-O3 -fstack-protector -fno-common)
add_compile_definitions(_FORTIFY_SOURCE=2)
- set(CMAKE_EXE_LINKER_FLAGS_RELEASE "-pie")
elseif (${CMAKE_BUILD_TYPE_LC} MATCHES "debug")
add_compile_options(-O0 -fstack-protector -fno-common)
add_compile_definitions(CLIB_DEBUG)
@@ -102,6 +165,7 @@ elseif (${CMAKE_BUILD_TYPE_LC} MATCHES "coverity")
elseif (${CMAKE_BUILD_TYPE_LC} MATCHES "gcov")
add_compile_options(-O0 -fprofile-arcs -ftest-coverage)
add_compile_definitions(CLIB_DEBUG CLIB_GCOV)
+ link_libraries(gcov)
endif()
set(BUILD_TYPES release debug coverity gcov)
@@ -119,6 +183,10 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
endif()
endif()
+if(VPP_USE_LTO)
+ check_c_compiler_flag("-Wno-stringop-overflow"
+ compiler_flag_no_stringop_overflow)
+endif()
##############################################################################
# sanitizers
##############################################################################
@@ -132,9 +200,7 @@ set(VPP_SANITIZE_ADDR_OPTIONS
if (VPP_ENABLE_SANITIZE_ADDR)
add_compile_options(-fsanitize=address)
- add_compile_definitions(CLIB_SANITIZE_ADDR)
- set(CMAKE_EXE_LINKER_FLAGS "-fsanitize=address ${CMAKE_EXE_LINKER_FLAGS}")
- set(CMAKE_SHARED_LINKER_FLAGS "-fsanitize=address ${CMAKE_SHARED_LINKER_FLAGS}")
+ add_link_options(-fsanitize=address)
endif (VPP_ENABLE_SANITIZE_ADDR)
##############################################################################
@@ -147,11 +213,19 @@ if(VPP_ENABLE_TRAJECTORY_TRACE)
endif()
##############################################################################
+# unittest with clang code coverage
+##############################################################################
+
+if("${CMAKE_VERSION}" VERSION_GREATER_EQUAL "3.13" AND "${CMAKE_C_COMPILER_ID}" MATCHES "(Apple)?[Cc]lang")
+ option(VPP_BUILD_TESTS_WITH_COVERAGE "Build unit tests with code coverage" OFF)
+endif()
+
+##############################################################################
# install config
##############################################################################
option(VPP_SET_RPATH "Set rpath for resulting binaries and libraries." ON)
if(VPP_SET_RPATH)
- set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
+ set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${VPP_LIBRARY_DIR}")
endif()
set(CMAKE_INSTALL_MESSAGE NEVER)
@@ -171,16 +245,39 @@ include(cmake/exec.cmake)
include(cmake/plugin.cmake)
##############################################################################
+# FreeBSD - use epoll-shim
+##############################################################################
+set(EPOLL_LIB "")
+if("${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD")
+ find_path(EPOLL_SHIM_INCLUDE_DIR NAMES sys/epoll.h HINTS /usr/local/include/libepoll-shim)
+ find_library(EPOLL_SHIM_LIB NAMES epoll-shim HINTS /usr/local/lib)
+
+ if(EPOLL_SHIM_INCLUDE_DIR AND EPOLL_SHIM_LIB)
+ message(STATUS "Found epoll-shim in ${EPOLL_SHIM_INCLUDE_DIR}")
+ include_directories(${EPOLL_SHIM_INCLUDE_DIR})
+ string(JOIN " " EPOLL_LIB "${EPOLL_SHIM_LIB}")
+ endif()
+endif()
+
+##############################################################################
# subdirs - order matters
##############################################################################
option(VPP_HOST_TOOLS_ONLY "Build only host tools" OFF)
if(VPP_HOST_TOOLS_ONLY)
set(SUBDIRS tools/vppapigen cmake)
-elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
+ install(
+ PROGRAMS
+ vpp-api/vapi/vapi_c_gen.py
+ vpp-api/vapi/vapi_cpp_gen.py
+ vpp-api/vapi/vapi_json_parser.py
+ DESTINATION ${VPP_RUNTIME_DIR}
+ COMPONENT vpp-dev
+ )
+elseif(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD")
find_package(OpenSSL)
set(SUBDIRS
- vppinfra svm vlib vlibmemory vlibapi vnet vpp vat vat2 vcl plugins
- vpp-api tools/vppapigen tools/g2 tools/perftool cmake pkg
+ vppinfra svm vlib vlibmemory vlibapi vnet vpp vat vat2 vcl vpp-api
+ plugins tools/vppapigen tools/g2 tools/perftool cmake pkg
tools/appimage
)
elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
@@ -255,10 +352,12 @@ mark_as_advanced(CLEAR
# print configuration
##############################################################################
message(STATUS "Configuration:")
+pr("VPP platform" ${VPP_PLATFORM_NAME})
pr("VPP version" ${VPP_VERSION})
pr("VPP library version" ${VPP_LIB_VERSION})
pr("GIT toplevel dir" ${VPP_GIT_TOPLEVEL_DIR})
pr("Build type" ${CMAKE_BUILD_TYPE})
+pr("C compiler" "${CMAKE_C_COMPILER} (${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION})")
pr("C flags" ${CMAKE_C_FLAGS}${CMAKE_C_FLAGS_${CMAKE_BUILD_TYPE_UC}})
pr("Linker flags (apps)" ${CMAKE_EXE_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS_${CMAKE_BUILD_TYPE_UC}})
pr("Linker flags (libs)" ${CMAKE_SHARED_LINKER_FLAGS} ${CMAKE_SHARED_LINKER_FLAGS_${CMAKE_BUILD_TYPE_UC}})
@@ -266,3 +365,5 @@ pr("Host processor" ${CMAKE_HOST_SYSTEM_PROCESSOR})
pr("Target processor" ${CMAKE_SYSTEM_PROCESSOR})
pr("Prefix path" ${CMAKE_PREFIX_PATH})
pr("Install prefix" ${CMAKE_INSTALL_PREFIX})
+pr("Library dir" ${VPP_LIBRARY_DIR})
+pr("Multiarch variants" ${MARCH_VARIANTS_NAMES})
diff --git a/src/cmake/VPPConfig.cmake b/src/cmake/VPPConfig.cmake
index 21da59d68ba..6f1b530625a 100644
--- a/src/cmake/VPPConfig.cmake
+++ b/src/cmake/VPPConfig.cmake
@@ -20,6 +20,7 @@ find_program(VPP_VAPI_CPP_GEN vapi_cpp_gen.py)
if(VPP_INCLUDE_DIR AND VPP_APIGEN)
include_directories (${VPP_INCLUDE_DIR})
+ include_directories (${VPP_INCLUDE_DIR}/vpp_plugins)
else()
message(FATAL_ERROR "VPP headers, libraries and/or tools not found")
endif()
@@ -35,6 +36,7 @@ endif()
set(VPP_RUNTIME_DIR "bin" CACHE STRING "Relative runtime directory path")
set(VPP_LIBRARY_DIR "lib" CACHE STRING "Relative library directory path")
+set(VPP_BINARY_DIR ${CMAKE_BINARY_DIR}/CMakeFiles)
include(${CMAKE_CURRENT_LIST_DIR}/cpu.cmake)
include(${CMAKE_CURRENT_LIST_DIR}/api.cmake)
diff --git a/src/cmake/api.cmake b/src/cmake/api.cmake
index 831c2b1e32d..10e89d77594 100644
--- a/src/cmake/api.cmake
+++ b/src/cmake/api.cmake
@@ -64,13 +64,13 @@ function(vpp_generate_api_json_header file dir component)
add_custom_command (OUTPUT ${output_name}
COMMAND mkdir -p ${output_dir}
COMMAND ${PYENV} ${VPP_APIGEN}
- ARGS ${includedir} --includedir ${CMAKE_SOURCE_DIR} --input ${CMAKE_CURRENT_SOURCE_DIR}/${file} JSON --output ${output_name}
+ ARGS ${includedir} --includedir ${CMAKE_SOURCE_DIR} --input ${CMAKE_CURRENT_SOURCE_DIR}/${file} JSON --outputdir ${output_dir} --output ${output_name}
DEPENDS ${VPP_APIGEN} ${CMAKE_CURRENT_SOURCE_DIR}/${file}
COMMENT "Generating API header ${output_name}"
)
install(
FILES ${output_name}
- DESTINATION share/vpp/api/${dir}/
+ DESTINATION ${CMAKE_INSTALL_DATADIR}/vpp/api/${dir}/
COMPONENT ${component}
)
endfunction()
@@ -101,7 +101,7 @@ function(vpp_generate_vapi_c_header f)
)
install(
FILES ${output_name}
- DESTINATION include/vapi
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vapi
COMPONENT vpp-dev
)
endfunction ()
@@ -128,7 +128,7 @@ function (vpp_generate_vapi_cpp_header f)
)
install(
FILES ${output_name}
- DESTINATION include/vapi
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vapi
COMPONENT vpp-dev
)
endfunction ()
diff --git a/src/cmake/ccache.cmake b/src/cmake/ccache.cmake
index 058a0f3d85a..a7b395bc6f9 100644
--- a/src/cmake/ccache.cmake
+++ b/src/cmake/ccache.cmake
@@ -20,8 +20,8 @@ if(VPP_USE_CCACHE)
message(STATUS "Looking for ccache")
if(CCACHE_FOUND)
message(STATUS "Looking for ccache - found")
- set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
- set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
+ set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE_FOUND})
+ set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_FOUND})
else(CCACHE_FOUND)
message(STATUS "Looking for ccache - not found")
endif(CCACHE_FOUND)
diff --git a/src/cmake/cpu.cmake b/src/cmake/cpu.cmake
index f4a57a34281..25e966dcf7a 100644
--- a/src/cmake/cpu.cmake
+++ b/src/cmake/cpu.cmake
@@ -11,49 +11,36 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+macro(set_log2_cacheline_size var n)
+ if(${n} EQUAL 128)
+ set(${var} 7)
+ elseif(${n} EQUAL 64)
+ set(${var} 6)
+ else()
+ message(FATAL_ERROR "Cacheline size ${n} not supported")
+ endif()
+endmacro()
+
##############################################################################
-# Cache line size detection
+# Cache line size
##############################################################################
-if(CMAKE_CROSSCOMPILING)
- message(STATUS "Cross-compiling - cache line size detection disabled")
- set(VPP_LOG2_CACHE_LINE_SIZE 6)
-elseif(DEFINED VPP_LOG2_CACHE_LINE_SIZE)
- # Cache line size assigned via cmake args
-elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
- file(READ "/proc/cpuinfo" cpuinfo)
- string(REPLACE "\n" ";" cpuinfo ${cpuinfo})
- foreach(l ${cpuinfo})
- string(REPLACE ":" ";" l ${l})
- list(GET l 0 name)
- list(GET l 1 value)
- string(STRIP ${name} name)
- string(STRIP ${value} value)
- if(${name} STREQUAL "CPU implementer")
- set(CPU_IMPLEMENTER ${value})
- endif()
- if(${name} STREQUAL "CPU part")
- set(CPU_PART ${value})
- endif()
- endforeach()
- # Implementer 0x43 - Cavium
- # Part 0x0af - ThunderX2 is 64B, rest all are 128B
- if (${CPU_IMPLEMENTER} STREQUAL "0x43")
- if (${CPU_PART} STREQUAL "0x0af")
- set(VPP_LOG2_CACHE_LINE_SIZE 6)
- else()
- set(VPP_LOG2_CACHE_LINE_SIZE 7)
- endif()
+
+if(DEFINED VPP_PLATFORM_CACHE_LINE_SIZE)
+ set(VPP_CACHE_LINE_SIZE ${VPP_PLATFORM_CACHE_LINE_SIZE})
+else()
+ if(DEFINED VPP_CACHE_LINE_SIZE)
+ # Cache line size assigned via cmake args
+ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
+ set(VPP_CACHE_LINE_SIZE 128)
else()
- set(VPP_LOG2_CACHE_LINE_SIZE 6)
+ set(VPP_CACHE_LINE_SIZE 64)
endif()
- math(EXPR VPP_CACHE_LINE_SIZE "1 << ${VPP_LOG2_CACHE_LINE_SIZE}")
- message(STATUS "ARM AArch64 CPU implementer ${CPU_IMPLEMENTER} part ${CPU_PART} cacheline size ${VPP_CACHE_LINE_SIZE}")
-else()
- set(VPP_LOG2_CACHE_LINE_SIZE 6)
+
+ set(VPP_CACHE_LINE_SIZE ${VPP_CACHE_LINE_SIZE}
+ CACHE STRING "Target CPU cache line size")
endif()
-set(VPP_LOG2_CACHE_LINE_SIZE ${VPP_LOG2_CACHE_LINE_SIZE}
- CACHE STRING "Target CPU cache line size (power of 2)")
+set_log2_cacheline_size(VPP_LOG2_CACHE_LINE_SIZE ${VPP_CACHE_LINE_SIZE})
##############################################################################
# Gnu Assembler AVX-512 bug detection
@@ -74,10 +61,15 @@ endif()
##############################################################################
# CPU optimizations and multiarch support
##############################################################################
+
+if(NOT DEFINED VPP_PLATFORM)
+ option(VPP_BUILD_NATIVE_ONLY "Build only for native CPU." OFF)
+endif()
+
macro(add_vpp_march_variant v)
cmake_parse_arguments(ARG
"OFF"
- "N_PREFETCHES"
+ "N_PREFETCHES;CACHE_PREFETCH_BYTES"
"FLAGS"
${ARGN}
)
@@ -98,6 +90,10 @@ macro(add_vpp_march_variant v)
if(ARG_N_PREFETCHES)
string(APPEND fs " -DCLIB_N_PREFETCHES=${ARG_N_PREFETCHES}")
endif()
+ if(ARG_CACHE_PREFETCH_BYTES)
+ set_log2_cacheline_size(log2 ${ARG_CACHE_PREFETCH_BYTES})
+ string(APPEND fs " -DCLIB_LOG2_CACHE_PREFETCH_BYTES=${log2}")
+ endif()
if(flags_ok)
string(TOUPPER ${v} uv)
if(ARG_OFF)
@@ -107,6 +103,7 @@ macro(add_vpp_march_variant v)
endif()
if (VPP_MARCH_VARIANT_${uv})
list(APPEND MARCH_VARIANTS "${v}\;${fs}")
+ list(APPEND MARCH_VARIANTS_NAMES "${v}")
else()
list(APPEND MARCH_VARIANTS_DISABLED "${v}\;${fs}")
endif()
@@ -114,7 +111,26 @@ macro(add_vpp_march_variant v)
endif()
endmacro()
-if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
+if(DEFINED VPP_PLATFORM)
+ if(DEFINED VPP_PLATFORM_MARCH_FLAGS)
+ set(VPP_DEFAULT_MARCH_FLAGS ${VPP_PLATFORM_MARCH_FLAGS})
+ check_c_compiler_flag(${VPP_DEFAULT_MARCH_FLAGS} compiler_flag_march)
+ if(NOT compiler_flag_march)
+ message(FATAL_ERROR "platform build with ${VPP_DEFAULT_MARCH_FLAGS} is not supported by compiler")
+ endif()
+ else()
+ set(VPP_DEFAULT_MARCH_FLAGS "")
+ endif()
+ set(MARCH_VARIANTS_NAMES "platform-only")
+elseif(VPP_BUILD_NATIVE_ONLY)
+ set(VPP_BUILD_NATIVE_ARCH "native" CACHE STRING "native CPU -march= value.")
+ set(VPP_DEFAULT_MARCH_FLAGS -march=${VPP_BUILD_NATIVE_ARCH})
+ check_c_compiler_flag(${VPP_DEFAULT_MARCH_FLAGS} compiler_flag_march)
+ if(NOT compiler_flag_march)
+ message(FATAL_ERROR "Native-only build with ${VPP_DEFAULT_MARCH_FLAGS} is not supported by compiler")
+ endif()
+ set(MARCH_VARIANTS_NAMES "native-only")
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
set(VPP_DEFAULT_MARCH_FLAGS -march=corei7 -mtune=corei7-avx)
add_vpp_march_variant(hsw
@@ -126,6 +142,21 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
OFF
)
+ add_vpp_march_variant(adl
+ FLAGS -march=alderlake -mtune=alderlake -mprefer-vector-width=256
+ OFF
+ )
+
+ add_vpp_march_variant(scalar
+ FLAGS -march=core2 -mno-mmx -mno-sse
+ OFF
+ )
+
+ add_vpp_march_variant(znver3
+ FLAGS -march=znver3 -mtune=znver3 -mprefer-vector-width=256
+ OFF
+ )
+
if (GNU_ASSEMBLER_AVX512_BUG)
message(WARNING "AVX-512 multiarch variant(s) disabled due to GNU Assembler bug")
else()
@@ -136,6 +167,16 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
add_vpp_march_variant(icl
FLAGS -march=icelake-client -mtune=icelake-client -mprefer-vector-width=512
)
+
+ add_vpp_march_variant(spr
+ FLAGS -march=sapphirerapids -mtune=sapphirerapids -mprefer-vector-width=512
+ OFF
+ )
+
+ add_vpp_march_variant(znver4
+ FLAGS -march=znver4 -mtune=znver4 -mprefer-vector-width=512
+ OFF
+ )
endif()
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
set(VPP_DEFAULT_MARCH_FLAGS -march=armv8-a+crc)
@@ -143,6 +184,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
add_vpp_march_variant(qdf24xx
FLAGS -march=armv8-a+crc+crypto -mtune=qdf24xx
N_PREFETCHES 8
+ CACHE_PREFETCH_BYTES 64
OFF
)
@@ -154,16 +196,25 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
add_vpp_march_variant(thunderx2t99
FLAGS -march=armv8.1-a+crc+crypto -mtune=thunderx2t99
N_PREFETCHES 8
+ CACHE_PREFETCH_BYTES 64
)
add_vpp_march_variant(cortexa72
FLAGS -march=armv8-a+crc+crypto -mtune=cortex-a72
N_PREFETCHES 6
+ CACHE_PREFETCH_BYTES 64
)
add_vpp_march_variant(neoversen1
FLAGS -march=armv8.2-a+crc+crypto -mtune=neoverse-n1
N_PREFETCHES 6
+ CACHE_PREFETCH_BYTES 64
+ )
+ add_vpp_march_variant(neoversen2
+ FLAGS -march=armv9-a+crypto -mtune=neoverse-n2
+ N_PREFETCHES 6
+ CACHE_PREFETCH_BYTES 64
+ OFF
)
endif()
diff --git a/src/cmake/library.cmake b/src/cmake/library.cmake
index 560b2c97d5c..a06a795c69f 100644
--- a/src/cmake/library.cmake
+++ b/src/cmake/library.cmake
@@ -41,8 +41,10 @@ macro(add_vpp_library lib)
endif()
install(
TARGETS ${lib}
- DESTINATION ${VPP_LIBRARY_DIR}
- COMPONENT ${ARG_COMPONENT}
+ LIBRARY
+ DESTINATION ${VPP_LIBRARY_DIR}
+ COMPONENT ${ARG_COMPONENT}
+ NAMELINK_COMPONENT ${ARG_COMPONENT}-dev
)
if (ARG_LTO AND VPP_USE_LTO)
@@ -51,6 +53,9 @@ macro(add_vpp_library lib)
target_compile_options (${lib} PRIVATE "-ffunction-sections")
target_compile_options (${lib} PRIVATE "-fdata-sections")
target_link_libraries (${lib} "-Wl,--gc-sections")
+ if(compiler_flag_no_stringop_overflow)
+ target_link_libraries (${lib} "-Wno-stringop-overflow")
+ endif()
endif()
if(ARG_MULTIARCH_SOURCES)
@@ -67,7 +72,7 @@ macro(add_vpp_library lib)
${CMAKE_CURRENT_BINARY_DIR}/${file}_types.h
${CMAKE_CURRENT_BINARY_DIR}/${file}_tojson.h
${CMAKE_CURRENT_BINARY_DIR}/${file}_fromjson.h
- DESTINATION include/${lib}/${dir}
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${lib}/${dir}
COMPONENT vpp-dev
)
endforeach()
@@ -77,6 +82,10 @@ macro(add_vpp_library lib)
add_dependencies(${lo} api_headers)
endif()
+ if(VPP_EXTERNAL_PROJECT AND ARG_API_FILES)
+ add_dependencies(${lo} ${lib}_api_headers)
+ endif()
+
if(ARG_DEPENDS)
add_dependencies(${lo} ${ARG_DEPENDS})
endif()
@@ -87,7 +96,7 @@ macro(add_vpp_library lib)
get_filename_component(dir ${file} DIRECTORY)
install(
FILES ${file}
- DESTINATION include/${lib}/${dir}
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${lib}/${dir}
COMPONENT ${ARG_COMPONENT}-dev
)
endforeach()
@@ -102,12 +111,42 @@ function (add_vpp_headers path)
get_filename_component(dir ${file} DIRECTORY)
install(
FILES ${file}
- DESTINATION include/${path}/${dir}
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${path}/${dir}
COMPONENT vpp-dev
)
endforeach()
endfunction()
+macro(add_vat_test_library lib)
+ cmake_parse_arguments(TEST
+ ""
+ ""
+ ${ARGN}
+ )
+
+ foreach(file ${ARGN})
+ get_filename_component(name ${file} NAME_WE)
+ set(test_lib ${lib}_${name}_plugin)
+ add_library(${test_lib} SHARED ${file})
+ target_compile_options(${test_lib} PUBLIC ${VPP_DEFAULT_MARCH_FLAGS})
+ if(NOT VPP_EXTERNAL_PROJECT)
+ add_dependencies(${test_lib} api_headers)
+ endif()
+ include_directories(${CMAKE_CURRENT_BINARY_DIR})
+ set_target_properties(${test_lib} PROPERTIES NO_SONAME 1)
+ set_target_properties(${test_lib} PROPERTIES
+ PREFIX ""
+ LIBRARY_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/vpp_api_test_plugins)
+
+ # install .so
+ install(
+ TARGETS ${test_lib}
+ DESTINATION ${VPP_LIBRARY_DIR}/vpp_api_test_plugins
+ COMPONENT ${ARG_COMPONENT}
+ )
+ endforeach()
+endmacro()
+
macro(add_vpp_test_library lib)
cmake_parse_arguments(TEST
""
@@ -129,13 +168,12 @@ macro(add_vpp_test_library lib)
PREFIX ""
LIBRARY_OUTPUT_DIRECTORY ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/vat2_plugins)
- # Later: Install and package
# install .so
- #install(
- # TARGETS ${test_lib}
- # DESTINATION ${VPP_LIBRARY_DIR}/vat2_plugins
- # #COMPONENT ${ARG_COMPONENT}
- # )
+ install(
+ TARGETS ${test_lib}
+ DESTINATION ${VPP_LIBRARY_DIR}/vat2_plugins
+ COMPONENT ${ARG_COMPONENT}
+ )
endforeach()
endmacro()
diff --git a/src/cmake/pack.cmake b/src/cmake/pack.cmake
index a89a90c452c..88b62548211 100644
--- a/src/cmake/pack.cmake
+++ b/src/cmake/pack.cmake
@@ -64,6 +64,11 @@ macro(add_vpp_packaging)
set(CPACK_${type}_PACKAGE_DESCRIPTION "${ARG_DESCRIPTION}")
set(CPACK_${type}_PACKAGE_RELEASE 1)
+ # Pure Debian does not set the "OS_ID_LIKE", it only sets "OS_ID"
+ if (NOT DEFINED OS_ID_LIKE)
+ set(OS_ID_LIKE "${OS_ID}")
+ endif()
+
if(OS_ID_LIKE MATCHES "debian")
set(CPACK_GENERATOR "DEB")
set(type "DEBIAN")
diff --git a/src/cmake/platform/octeon10.cmake b/src/cmake/platform/octeon10.cmake
new file mode 100644
index 00000000000..cc2b292a419
--- /dev/null
+++ b/src/cmake/platform/octeon10.cmake
@@ -0,0 +1,4 @@
+
+set(VPP_PLATFORM_CACHE_LINE_SIZE 64)
+set(VPP_PLATFORM_MARCH_FLAGS -march=armv8.3-a+crypto+sve2-bitperm)
+set(VPP_PLATFORM_BUFFER_ALIGN 128)
diff --git a/src/cmake/plugin.cmake b/src/cmake/plugin.cmake
index b399470db02..f971e5f1401 100644
--- a/src/cmake/plugin.cmake
+++ b/src/cmake/plugin.cmake
@@ -15,9 +15,13 @@ macro(add_vpp_plugin name)
cmake_parse_arguments(PLUGIN
""
"LINK_FLAGS;COMPONENT;DEV_COMPONENT"
- "SOURCES;API_FILES;MULTIARCH_SOURCES;MULTIARCH_FORCE_ON;LINK_LIBRARIES;INSTALL_HEADERS;API_TEST_SOURCES;"
+ "SOURCES;API_FILES;MULTIARCH_SOURCES;MULTIARCH_FORCE_ON;LINK_LIBRARIES;INSTALL_HEADERS;API_TEST_SOURCES;VAT_AUTO_TEST;SUPPORTED_OS_LIST"
${ARGN}
)
+ if (PLUGIN_SUPPORTED_OS_LIST AND NOT ${CMAKE_SYSTEM_NAME} IN_LIST PLUGIN_SUPPORTED_OS_LIST)
+ message(WARNING "unsupported OS - ${name} plugin disabled")
+ return()
+ endif()
set(plugin_name ${name}_plugin)
set(api_includes)
if(NOT PLUGIN_COMPONENT)
@@ -40,7 +44,7 @@ macro(add_vpp_plugin name)
${CMAKE_CURRENT_BINARY_DIR}/${f}.h
${CMAKE_CURRENT_BINARY_DIR}/${f}_enum.h
${CMAKE_CURRENT_BINARY_DIR}/${f}_types.h
- DESTINATION include/vpp_plugins/${name}/${dir}
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vpp_plugins/${name}/${dir}
COMPONENT ${PLUGIN_DEV_COMPONENT}
)
endforeach()
@@ -82,7 +86,7 @@ macro(add_vpp_plugin name)
get_filename_component(dir ${file} DIRECTORY)
install(
FILES ${file}
- DESTINATION include/vpp_plugins/${name}/${dir}
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vpp_plugins/${name}/${dir}
COMPONENT vpp-dev
)
endforeach()
@@ -108,7 +112,7 @@ macro(add_vpp_plugin name)
COMPONENT ${PLUGIN_COMPONENT}
)
endif()
- if (PLUGIN_API_FILES)
+ if (PLUGIN_API_FILES AND NOT PLUGIN_VAT_AUTO_TEST STREQUAL OFF)
add_vpp_test_library(${name}_test_plugin ${PLUGIN_API_FILES})
endif()
diff --git a/src/examples/handoffdemo/README.md b/src/examples/handoffdemo/README.md
deleted file mode 100644
index e38c7b3cca0..00000000000
--- a/src/examples/handoffdemo/README.md
+++ /dev/null
@@ -1,186 +0,0 @@
-# Handoff queue demo plugin {#handoff_queue_demo_plugin}
-
-This plugin provides a simplified example of how to hand off
-packets between threads. I used it to debug packet-tracer handoff
-tracing support.
-
-# Packet generator input script
-
-```
- packet-generator new {
- name x
- limit 5
- size 128-128
- interface local0
- node handoffdemo-1
- data {
- incrementing 30
- }
- }
-```
-# Start vpp with 2 worker threads
-
-The demo plugin hands packets from worker 1 to worker 2.
-
-# Enable tracing, and start the packet generator
-
-```
- trace add pg-input 100
- packet-generator enable
-```
-
-# Sample Run
-
-```
- DBGvpp# ex /tmp/pg_input_script
- DBGvpp# pa en
- DBGvpp# sh err
- Count Node Reason
- 5 handoffdemo-1 packets handed off processed
- 5 handoffdemo-2 completed packets
- DBGvpp# show run
- Thread 1 vpp_wk_0 (lcore 0)
- Time 133.9, average vectors/node 5.00, last 128 main loops 0.00 per node 0.00
- vector rates in 3.7331e-2, out 0.0000e0, drop 0.0000e0, punt 0.0000e0
- Name State Calls Vectors Suspends Clocks Vectors/Call
- handoffdemo-1 active 1 5 0 4.76e3 5.00
- pg-input disabled 2 5 0 5.58e4 2.50
- unix-epoll-input polling 22760 0 0 2.14e7 0.00
- ---------------
- Thread 2 vpp_wk_1 (lcore 2)
- Time 133.9, average vectors/node 5.00, last 128 main loops 0.00 per node 0.00
- vector rates in 0.0000e0, out 0.0000e0, drop 3.7331e-2, punt 0.0000e0
- Name State Calls Vectors Suspends Clocks Vectors/Call
- drop active 1 5 0 1.35e4 5.00
- error-drop active 1 5 0 2.52e4 5.00
- handoffdemo-2 active 1 5 0 2.56e4 5.00
- unix-epoll-input polling 22406 0 0 2.18e7 0.00
-```
-
-Enable the packet tracer and run it again...
-
-```
- DBGvpp# trace add pg-input 100
- DBGvpp# pa en
- DBGvpp# sh trace
- sh trace
- ------------------- Start of thread 0 vpp_main -------------------
- No packets in trace buffer
- ------------------- Start of thread 1 vpp_wk_0 -------------------
- Packet 1
-
- 00:06:50:520688: pg-input
- stream x, 128 bytes, 0 sw_if_index
- current data 0, length 128, buffer-pool 0, ref-count 1, trace handle 0x1000000
- 00000000: 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d0000
- 00000020: 0000000000000000000000000000000000000000000000000000000000000000
- 00000040: 0000000000000000000000000000000000000000000000000000000000000000
- 00000060: 0000000000000000000000000000000000000000000000000000000000000000
- 00:06:50:520762: handoffdemo-1
- HANDOFFDEMO: current thread 1
-
- Packet 2
-
- 00:06:50:520688: pg-input
- stream x, 128 bytes, 0 sw_if_index
- current data 0, length 128, buffer-pool 0, ref-count 1, trace handle 0x1000001
- 00000000: 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d0000
- 00000020: 0000000000000000000000000000000000000000000000000000000000000000
- 00000040: 0000000000000000000000000000000000000000000000000000000000000000
- 00000060: 0000000000000000000000000000000000000000000000000000000000000000
- 00:06:50:520762: handoffdemo-1
- HANDOFFDEMO: current thread 1
-
- Packet 3
-
- 00:06:50:520688: pg-input
- stream x, 128 bytes, 0 sw_if_index
- current data 0, length 128, buffer-pool 0, ref-count 1, trace handle 0x1000002
- 00000000: 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d0000
- 00000020: 0000000000000000000000000000000000000000000000000000000000000000
- 00000040: 0000000000000000000000000000000000000000000000000000000000000000
- 00000060: 0000000000000000000000000000000000000000000000000000000000000000
- 00:06:50:520762: handoffdemo-1
- HANDOFFDEMO: current thread 1
-
- Packet 4
-
- 00:06:50:520688: pg-input
- stream x, 128 bytes, 0 sw_if_index
- current data 0, length 128, buffer-pool 0, ref-count 1, trace handle 0x1000003
- 00000000: 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d0000
- 00000020: 0000000000000000000000000000000000000000000000000000000000000000
- 00000040: 0000000000000000000000000000000000000000000000000000000000000000
- 00000060: 0000000000000000000000000000000000000000000000000000000000000000
- 00:06:50:520762: handoffdemo-1
- HANDOFFDEMO: current thread 1
-
- Packet 5
-
- 00:06:50:520688: pg-input
- stream x, 128 bytes, 0 sw_if_index
- current data 0, length 128, buffer-pool 0, ref-count 1, trace handle 0x1000004
- 00000000: 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d0000
- 00000020: 0000000000000000000000000000000000000000000000000000000000000000
- 00000040: 0000000000000000000000000000000000000000000000000000000000000000
- 00000060: 0000000000000000000000000000000000000000000000000000000000000000
- 00:06:50:520762: handoffdemo-1
- HANDOFFDEMO: current thread 1
-
- ------------------- Start of thread 2 vpp_wk_1 -------------------
- Packet 1
-
- 00:06:50:520796: handoff_trace
- HANDED-OFF: from thread 1 trace index 0
- 00:06:50:520796: handoffdemo-2
- HANDOFFDEMO: current thread 2
- 00:06:50:520867: error-drop
- rx:local0
- 00:06:50:520914: drop
- handoffdemo-2: completed packets
-
- Packet 2
-
- 00:06:50:520796: handoff_trace
- HANDED-OFF: from thread 1 trace index 1
- 00:06:50:520796: handoffdemo-2
- HANDOFFDEMO: current thread 2
- 00:06:50:520867: error-drop
- rx:local0
- 00:06:50:520914: drop
- handoffdemo-2: completed packets
-
- Packet 3
-
- 00:06:50:520796: handoff_trace
- HANDED-OFF: from thread 1 trace index 2
- 00:06:50:520796: handoffdemo-2
- HANDOFFDEMO: current thread 2
- 00:06:50:520867: error-drop
- rx:local0
- 00:06:50:520914: drop
- handoffdemo-2: completed packets
-
- Packet 4
-
- 00:06:50:520796: handoff_trace
- HANDED-OFF: from thread 1 trace index 3
- 00:06:50:520796: handoffdemo-2
- HANDOFFDEMO: current thread 2
- 00:06:50:520867: error-drop
- rx:local0
- 00:06:50:520914: drop
- handoffdemo-2: completed packets
-
- Packet 5
-
- 00:06:50:520796: handoff_trace
- HANDED-OFF: from thread 1 trace index 4
- 00:06:50:520796: handoffdemo-2
- HANDOFFDEMO: current thread 2
- 00:06:50:520867: error-drop
- rx:local0
- 00:06:50:520914: drop
- handoffdemo-2: completed packets
- DBGvpp#
-```
diff --git a/src/examples/handoffdemo/handoffdemo.c b/src/examples/handoffdemo/handoffdemo.c
index a1dad44d28d..03ebe4226f4 100644
--- a/src/examples/handoffdemo/handoffdemo.c
+++ b/src/examples/handoffdemo/handoffdemo.c
@@ -22,13 +22,11 @@
handoffdemo_main_t handoffdemo_main;
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "handoff demo plugin",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/examples/handoffdemo/handoffdemo.rst b/src/examples/handoffdemo/handoffdemo.rst
new file mode 100644
index 00000000000..d44854cc5cc
--- /dev/null
+++ b/src/examples/handoffdemo/handoffdemo.rst
@@ -0,0 +1,194 @@
+.. _handoff_queue_demo_plugin:
+
+Handoff queue in a plugin
+=========================
+
+This plugin provides a simplified example of how to hand off packets
+between threads. I used it to debug packet-tracer handoff tracing
+support.
+
+Packet generator input script
+-----------------------------
+
+::
+
+ packet-generator new {
+ name x
+ limit 5
+ size 128-128
+ interface local0
+ node handoffdemo-1
+ data {
+ incrementing 30
+ }
+ }
+
+Start vpp with 2 worker threads
+-------------------------------
+
+The demo plugin hands packets from worker 1 to worker 2.
+
+Enable tracing, and start the packet generator
+----------------------------------------------
+
+::
+
+ trace add pg-input 100
+ packet-generator enable
+
+Sample Run
+----------
+
+::
+
+ DBGvpp# ex /tmp/pg_input_script
+ DBGvpp# pa en
+ DBGvpp# sh err
+ Count Node Reason
+ 5 handoffdemo-1 packets handed off processed
+ 5 handoffdemo-2 completed packets
+ DBGvpp# show run
+ Thread 1 vpp_wk_0 (lcore 0)
+ Time 133.9, average vectors/node 5.00, last 128 main loops 0.00 per node 0.00
+ vector rates in 3.7331e-2, out 0.0000e0, drop 0.0000e0, punt 0.0000e0
+ Name State Calls Vectors Suspends Clocks Vectors/Call
+ handoffdemo-1 active 1 5 0 4.76e3 5.00
+ pg-input disabled 2 5 0 5.58e4 2.50
+ unix-epoll-input polling 22760 0 0 2.14e7 0.00
+ ---------------
+ Thread 2 vpp_wk_1 (lcore 2)
+ Time 133.9, average vectors/node 5.00, last 128 main loops 0.00 per node 0.00
+ vector rates in 0.0000e0, out 0.0000e0, drop 3.7331e-2, punt 0.0000e0
+ Name State Calls Vectors Suspends Clocks Vectors/Call
+ drop active 1 5 0 1.35e4 5.00
+ error-drop active 1 5 0 2.52e4 5.00
+ handoffdemo-2 active 1 5 0 2.56e4 5.00
+ unix-epoll-input polling 22406 0 0 2.18e7 0.00
+
+Enable the packet tracer and run it again…
+
+::
+
+ DBGvpp# trace add pg-input 100
+ DBGvpp# pa en
+ DBGvpp# sh trace
+ sh trace
+ ------------------- Start of thread 0 vpp_main -------------------
+ No packets in trace buffer
+ ------------------- Start of thread 1 vpp_wk_0 -------------------
+ Packet 1
+
+ 00:06:50:520688: pg-input
+ stream x, 128 bytes, 0 sw_if_index
+ current data 0, length 128, buffer-pool 0, ref-count 1, trace handle 0x1000000
+ 00000000: 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d0000
+ 00000020: 0000000000000000000000000000000000000000000000000000000000000000
+ 00000040: 0000000000000000000000000000000000000000000000000000000000000000
+ 00000060: 0000000000000000000000000000000000000000000000000000000000000000
+ 00:06:50:520762: handoffdemo-1
+ HANDOFFDEMO: current thread 1
+
+ Packet 2
+
+ 00:06:50:520688: pg-input
+ stream x, 128 bytes, 0 sw_if_index
+ current data 0, length 128, buffer-pool 0, ref-count 1, trace handle 0x1000001
+ 00000000: 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d0000
+ 00000020: 0000000000000000000000000000000000000000000000000000000000000000
+ 00000040: 0000000000000000000000000000000000000000000000000000000000000000
+ 00000060: 0000000000000000000000000000000000000000000000000000000000000000
+ 00:06:50:520762: handoffdemo-1
+ HANDOFFDEMO: current thread 1
+
+ Packet 3
+
+ 00:06:50:520688: pg-input
+ stream x, 128 bytes, 0 sw_if_index
+ current data 0, length 128, buffer-pool 0, ref-count 1, trace handle 0x1000002
+ 00000000: 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d0000
+ 00000020: 0000000000000000000000000000000000000000000000000000000000000000
+ 00000040: 0000000000000000000000000000000000000000000000000000000000000000
+ 00000060: 0000000000000000000000000000000000000000000000000000000000000000
+ 00:06:50:520762: handoffdemo-1
+ HANDOFFDEMO: current thread 1
+
+ Packet 4
+
+ 00:06:50:520688: pg-input
+ stream x, 128 bytes, 0 sw_if_index
+ current data 0, length 128, buffer-pool 0, ref-count 1, trace handle 0x1000003
+ 00000000: 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d0000
+ 00000020: 0000000000000000000000000000000000000000000000000000000000000000
+ 00000040: 0000000000000000000000000000000000000000000000000000000000000000
+ 00000060: 0000000000000000000000000000000000000000000000000000000000000000
+ 00:06:50:520762: handoffdemo-1
+ HANDOFFDEMO: current thread 1
+
+ Packet 5
+
+ 00:06:50:520688: pg-input
+ stream x, 128 bytes, 0 sw_if_index
+ current data 0, length 128, buffer-pool 0, ref-count 1, trace handle 0x1000004
+ 00000000: 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d0000
+ 00000020: 0000000000000000000000000000000000000000000000000000000000000000
+ 00000040: 0000000000000000000000000000000000000000000000000000000000000000
+ 00000060: 0000000000000000000000000000000000000000000000000000000000000000
+ 00:06:50:520762: handoffdemo-1
+ HANDOFFDEMO: current thread 1
+
+ ------------------- Start of thread 2 vpp_wk_1 -------------------
+ Packet 1
+
+ 00:06:50:520796: handoff_trace
+ HANDED-OFF: from thread 1 trace index 0
+ 00:06:50:520796: handoffdemo-2
+ HANDOFFDEMO: current thread 2
+ 00:06:50:520867: error-drop
+ rx:local0
+ 00:06:50:520914: drop
+ handoffdemo-2: completed packets
+
+ Packet 2
+
+ 00:06:50:520796: handoff_trace
+ HANDED-OFF: from thread 1 trace index 1
+ 00:06:50:520796: handoffdemo-2
+ HANDOFFDEMO: current thread 2
+ 00:06:50:520867: error-drop
+ rx:local0
+ 00:06:50:520914: drop
+ handoffdemo-2: completed packets
+
+ Packet 3
+
+ 00:06:50:520796: handoff_trace
+ HANDED-OFF: from thread 1 trace index 2
+ 00:06:50:520796: handoffdemo-2
+ HANDOFFDEMO: current thread 2
+ 00:06:50:520867: error-drop
+ rx:local0
+ 00:06:50:520914: drop
+ handoffdemo-2: completed packets
+
+ Packet 4
+
+ 00:06:50:520796: handoff_trace
+ HANDED-OFF: from thread 1 trace index 3
+ 00:06:50:520796: handoffdemo-2
+ HANDOFFDEMO: current thread 2
+ 00:06:50:520867: error-drop
+ rx:local0
+ 00:06:50:520914: drop
+ handoffdemo-2: completed packets
+
+ Packet 5
+
+ 00:06:50:520796: handoff_trace
+ HANDED-OFF: from thread 1 trace index 4
+ 00:06:50:520796: handoffdemo-2
+ HANDOFFDEMO: current thread 2
+ 00:06:50:520867: error-drop
+ rx:local0
+ 00:06:50:520914: drop
+ handoffdemo-2: completed packets
+ DBGvpp#
diff --git a/src/examples/handoffdemo/node.c b/src/examples/handoffdemo/node.c
index 2177ccb9d14..755fd8344fe 100644
--- a/src/examples/handoffdemo/node.c
+++ b/src/examples/handoffdemo/node.c
@@ -107,10 +107,9 @@ handoffdemo_inline (vlib_main_t * vm,
}
/* Enqueue buffers to threads */
- n_enq =
- vlib_buffer_enqueue_to_thread (vm, hmp->frame_queue_index,
- from, thread_indices, frame->n_vectors,
- 1 /* drop on congestion */ );
+ n_enq = vlib_buffer_enqueue_to_thread (
+ vm, node, hmp->frame_queue_index, from, thread_indices,
+ frame->n_vectors, 1 /* drop on congestion */);
if (n_enq < frame->n_vectors)
vlib_node_increment_counter (vm, node->node_index,
HANDOFFDEMO_ERROR_CONGESTION_DROP,
@@ -165,7 +164,6 @@ handoffdemo_node_1_fn (vlib_main_t * vm,
0 /* is_trace */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (handoffdemo_node_1) =
{
.name = "handoffdemo-1",
@@ -184,7 +182,6 @@ VLIB_REGISTER_NODE (handoffdemo_node_1) =
[HANDOFFDEMO_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
uword
handoffdemo_node_2_fn (vlib_main_t * vm,
@@ -198,7 +195,6 @@ handoffdemo_node_2_fn (vlib_main_t * vm,
0 /* is_trace */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (handoffdemo_node_2) =
{
.name = "handoffdemo-2",
@@ -217,7 +213,6 @@ VLIB_REGISTER_NODE (handoffdemo_node_2) =
[HANDOFFDEMO_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
static clib_error_t *
handoffdemo_node_init (vlib_main_t * vm)
diff --git a/src/examples/sample-plugin/sample/node.c b/src/examples/sample-plugin/sample/node.c
index a31c3e86e08..ebbf8ac3eef 100644
--- a/src/examples/sample-plugin/sample/node.c
+++ b/src/examples/sample-plugin/sample/node.c
@@ -291,7 +291,6 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
sample_next_t next_index;
u32 pkts_swapped = 0;
/* Vector shuffle mask to swap src, dst */
- u8x16 swapmac = { 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 };
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -345,8 +344,10 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
src_dst0 = ((u8x16 *) en0)[0];
src_dst1 = ((u8x16 *) en1)[0];
- src_dst0 = u8x16_shuffle (src_dst0, swapmac);
- src_dst1 = u8x16_shuffle (src_dst1, swapmac);
+ src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3,
+ 4, 5, 12, 13, 14, 15);
+ src_dst1 = u8x16_shuffle (src_dst1, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3,
+ 4, 5, 12, 13, 14, 15);
((u8x16 *) en0)[0] = src_dst0;
((u8x16 *) en1)[0] = src_dst1;
@@ -418,7 +419,8 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
en0 = vlib_buffer_get_current (b0);
src_dst0 = ((u8x16 *) en0)[0];
- src_dst0 = u8x16_shuffle (src_dst0, swapmac);
+ src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3,
+ 4, 5, 12, 13, 14, 15);
((u8x16 *) en0)[0] = src_dst0;
sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
@@ -469,7 +471,6 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
#ifdef VERSION_3
-#define u8x16_shuffle __builtin_shuffle
/* This would normally be a stack local, but since it's a constant... */
static const u16 nexts[VLIB_FRAME_SIZE] = { 0 };
@@ -479,7 +480,6 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 n_left_from, *from;
u32 pkts_swapped = 0;
/* Vector shuffle mask to swap src, dst */
- u8x16 swapmac = { 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 };
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
/* See comment below about sending all pkts to the same place... */
u16 *next __attribute__ ((unused));
@@ -518,10 +518,14 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
src_dst2 = ((u8x16 *) vlib_buffer_get_current (b[2]))[0];
src_dst3 = ((u8x16 *) vlib_buffer_get_current (b[3]))[0];
- src_dst0 = u8x16_shuffle (src_dst0, swapmac);
- src_dst1 = u8x16_shuffle (src_dst1, swapmac);
- src_dst2 = u8x16_shuffle (src_dst2, swapmac);
- src_dst3 = u8x16_shuffle (src_dst3, swapmac);
+ src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
+ 12, 13, 14, 15);
+ src_dst1 = u8x16_shuffle (src_dst1, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
+ 12, 13, 14, 15);
+ src_dst2 = u8x16_shuffle (src_dst2, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
+ 12, 13, 14, 15);
+ src_dst3 = u8x16_shuffle (src_dst3, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
+ 12, 13, 14, 15);
((u8x16 *) vlib_buffer_get_current (b[0]))[0] = src_dst0;
((u8x16 *) vlib_buffer_get_current (b[1]))[0] = src_dst1;
@@ -552,7 +556,8 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
{
u8x16 src_dst0;
src_dst0 = ((u8x16 *) vlib_buffer_get_current (b[0]))[0];
- src_dst0 = u8x16_shuffle (src_dst0, swapmac);
+ src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5,
+ 12, 13, 14, 15);
((u8x16 *) vlib_buffer_get_current (b[0]))[0] = src_dst0;
vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
vnet_buffer (b[0])->sw_if_index[VLIB_RX];
@@ -611,18 +616,14 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
#ifdef VERSION_4
-#define u8x16_shuffle __builtin_shuffle
-
-static u8x16 swapmac =
- { 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12, 13, 14, 15 };
-
/* Final stage in the pipeline, do the mac swap */
static inline u32
last_stage (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b)
{
u8x16 src_dst0;
src_dst0 = ((u8x16 *) vlib_buffer_get_current (b))[0];
- src_dst0 = u8x16_shuffle (src_dst0, swapmac);
+ src_dst0 = u8x16_shuffle (src_dst0, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, 4, 5, 12,
+ 13, 14, 15);
((u8x16 *) vlib_buffer_get_current (b))[0] = src_dst0;
vnet_buffer (b)->sw_if_index[VLIB_TX] =
vnet_buffer (b)->sw_if_index[VLIB_RX];
@@ -687,7 +688,6 @@ VLIB_NODE_FN (sample_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
}
#endif
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sample_node) =
{
.name = "sample",
@@ -705,7 +705,6 @@ VLIB_REGISTER_NODE (sample_node) =
[SAMPLE_NEXT_INTERFACE_OUTPUT] = "interface-output",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/examples/sample-plugin/sample/sample.c b/src/examples/sample-plugin/sample/sample.c
index 4aeb5358e95..d829aaffaf9 100644
--- a/src/examples/sample-plugin/sample/sample.c
+++ b/src/examples/sample-plugin/sample/sample.c
@@ -30,12 +30,10 @@
#define REPLY_MSG_ID_BASE sm->msg_id_base
#include <vlibapi/api_helper_macros.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = SAMPLE_PLUGIN_BUILD_VER,
.description = "Sample of VPP Plugin",
};
-/* *INDENT-ON* */
sample_main_t sample_main;
diff --git a/src/examples/sample-plugin/sample_plugin_doc.md b/src/examples/sample-plugin/sample_plugin_doc.md
deleted file mode 100644
index 9aaaefa0061..00000000000
--- a/src/examples/sample-plugin/sample_plugin_doc.md
+++ /dev/null
@@ -1,66 +0,0 @@
-# Sample plugin for VPP {#sample_plugin_doc}
-
-## Overview
-
-This is the VPP sample plugin demonstrates how to create a new plugin that integrates
-with VPP. The sample code implements a trival macswap algorithim that demonstrates plugin
-runtime integration with the VPP graph hierachy, api and cli.
-
-For deeper dive information see the annotations in the sample code itself. See [sample.c](@ref sample.c)
-
-## How to build and run the sample plugin.
-
-Now (re)build VPP.
-
- $ make wipe
-
-Define environmental variable 'SAMPLE_PLUGIN=yes' with a process scope
-
- $ SAMPLE_PLUGIN=yes make build
-
-or a session scope, and build VPP.
-
- $ export SAMPLE_PLUGIN=yes
- $ make build
-
-Now run VPP and make sure the plugin is loaded.
-
- $ make run
- ...
- load_one_plugin:184: Loaded plugin: memif_plugin.so (Packet Memory Interface (experimetal))
- load_one_plugin:184: Loaded plugin: sample_plugin.so (Sample of VPP Plugin)
- load_one_plugin:184: Loaded plugin: nat_plugin.so (Network Address Translation)
- ...
- DBGvpp#
-
-## How to create a new plugin
-
-To create a new plugin based on the sample plugin, copy and rename the sample plugin directory and automake config.
-
- cp -r src/examples/sample-plugin/sample src/plugins/newplugin
- cp src/examples/sample-plugin/sample.am src/plugins/newplugin.am
-
-Add the following entry to the plugins section of `src/configure.ac`.
-
- PLUGIN_ENABLED(newplugin)
-
-Add the following entry to the plugins section of `src/plugins/Makefile.am`
-
- if ENABLE_NEWPLUGIN
- include newplugin.am
- endif
-
-Now (re)build VPP.
-
- $ make wipe
- $ make build
-
-## Configuration
-
-To enable the sample plugin
-
- sample macswap <interface name>
-
-To disable the sample plugin
-
- sample macswap <interface name> disable
diff --git a/src/examples/sample-plugin/sample_plugin_doc.rst b/src/examples/sample-plugin/sample_plugin_doc.rst
new file mode 100644
index 00000000000..23023e21bfb
--- /dev/null
+++ b/src/examples/sample-plugin/sample_plugin_doc.rst
@@ -0,0 +1,97 @@
+.. _sample_plugin_doc:
+
+Sample plugin for VPP
+=====================
+
+Overview
+--------
+
+This is the VPP sample plugin demonstrates how to create a new plugin
+that integrates with VPP. The sample code implements a trivial macswap
+algorithm that demonstrates plugin runtime integration with the VPP
+graph hierarchy, api and cli.
+
+For deeper dive information see the annotations in the sample code
+itself. See `sample.c <@ref%20sample.c>`__
+
+How to build and run the sample plugin.
+---------------------------------------
+
+Now (re)build VPP.
+
+::
+
+ $ make wipe
+
+Define environmental variable ‘SAMPLE_PLUGIN=yes’ with a process scope
+
+::
+
+ $ SAMPLE_PLUGIN=yes make build
+
+or a session scope, and build VPP.
+
+::
+
+ $ export SAMPLE_PLUGIN=yes
+ $ make build
+
+Now run VPP and make sure the plugin is loaded.
+
+::
+
+ $ make run
+ ...
+ load_one_plugin:184: Loaded plugin: memif_plugin.so (Packet Memory Interface (experimetal))
+ load_one_plugin:184: Loaded plugin: sample_plugin.so (Sample of VPP Plugin)
+ load_one_plugin:184: Loaded plugin: nat_plugin.so (Network Address Translation)
+ ...
+ DBGvpp#
+
+How to create a new plugin
+--------------------------
+
+To create a new plugin based on the sample plugin, copy and rename the
+sample plugin directory and automake config.
+
+::
+
+ cp -r src/examples/sample-plugin/sample src/plugins/newplugin
+ cp src/examples/sample-plugin/sample.am src/plugins/newplugin.am
+
+Add the following entry to the plugins section of ``src/configure.ac``.
+
+::
+
+ PLUGIN_ENABLED(newplugin)
+
+Add the following entry to the plugins section of
+``src/plugins/Makefile.am``
+
+::
+
+ if ENABLE_NEWPLUGIN
+ include newplugin.am
+ endif
+
+Now (re)build VPP.
+
+::
+
+ $ make wipe
+ $ make build
+
+Configuration
+-------------
+
+To enable the sample plugin
+
+::
+
+ sample macswap <interface name>
+
+To disable the sample plugin
+
+::
+
+ sample macswap <interface name> disable
diff --git a/src/examples/srv6-sample-localsid/srv6_localsid_sample.c b/src/examples/srv6-sample-localsid/srv6_localsid_sample.c
index 8ea3218e412..5b33ed6d091 100644
--- a/src/examples/srv6-sample-localsid/srv6_localsid_sample.c
+++ b/src/examples/srv6-sample-localsid/srv6_localsid_sample.c
@@ -91,8 +91,7 @@ unformat_srv6_localsid_sample (unformat_input_t * input, va_list * args)
if (unformat (input, "new_srv6_localsid %u", &table_id))
{
/* Allocate a portion of memory */
- ls_mem = clib_mem_alloc_aligned_at_offset (
- sizeof(srv6_localsid_sample_per_sid_memory_t), 0, 0, 1);
+ ls_mem = clib_mem_alloc (sizeof (srv6_localsid_sample_per_sid_memory_t));
/* Set to zero the memory */
clib_memset (ls_mem, 0, sizeof(srv6_localsid_sample_per_sid_memory_t));
diff --git a/src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.md b/src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.md
deleted file mode 100644
index cd717db8135..00000000000
--- a/src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.md
+++ /dev/null
@@ -1,30 +0,0 @@
-# Sample SRv6 LocalSID documentation {#srv6_plugin_doc}
-
-## Introduction
-
-This plugin is an example of how an user can create a new SRv6 LocalSID behavior by using VPP plugins with the appropiate API calls to the existing SR code.
-
-This **example** plugin registers a new localsid behavior, with cli keyword 'new_srv6_localsid' which only takes one parameter, a fib-table. Upon recival of a packet, this plugin will enforce the next IP6 lookup in the specific fib-table specified by the user. (Indeed it will do the lookup in the fib_table n+1 (since for the shake of the example we increment the fib-table.)
-
-Notice that the plugin only 'defines' a new SRv6 LocalSID behavior, but the existing SR code in VNET is the one actually instantiating new LocalSIDs. Notice that there are callback functions such that when you create or remove a LocalSID you can actually setup specific parameters through the functions in this plugin.
-
-## Variables to watch for
-
-* srv6_localsid_name: This variable is the name (used as a unique key) identifying this SR LocalSID plugin.
-* keyword_str: This is the CLI keyword to be used for the plugin. In this example 'new_srv6_localsid'. (i.e. sr localsid address cafe::1 behavior new_srv6_localsid <parameters>)
-* def_str: This is a definition of this SR behavior. This is printed when you do 'show sr localsid behaviors'.
-* params_str: This is a definition of the parameters of this localsid. This is printed when you do 'show sr localsid behaviors'.
-
-## Functions to watch for
-
-* srv6_localsid_creation_fn: This function will be called every time a new SR LocalSID is instantiated with the behavior defined in this plugin.
-* srv6_localsid_removal_fn: This function will be called every time a new SR LocalSID is removed with the behavior defined in this plugin. This function tends to be used for freeing up all the memory created in the previous function.
-* format_srv6_localsid_sample: This function prints nicely the parameters of every SR LocalSID using this behavior.
-* unformat_srv6_localsid_sample: This function parses the CLI command when initialising a new SR LocalSID using this behavior. It parses all the parameters and ensures that the parameters are correct.
-* format_srv6_localsid_sample_dpo: This function formats the 'show ip6 fib' message for the SR LocalSIDs created with this plugin behavior.
-
-## Graph node
-
-The current graph node uses the function 'end_srh_processing' to do the Segment Routing Endpoint behavior. Notice that it does not allow the cleanup of a Segment Routing header (as per the SRv6 behavior specs).
-This function is identical to the one found in /src/vnet/srv6/sr_localsid.c
-In case that by some other reason you want to do decapsulation, or SRH clean_up you can use the functions 'end_decaps_srh_processing' or 'end_psp_srh_processing' respectively.
diff --git a/src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.rst b/src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.rst
new file mode 100644
index 00000000000..a076cd2a6c7
--- /dev/null
+++ b/src/examples/srv6-sample-localsid/srv6_sample_localsid_doc.rst
@@ -0,0 +1,66 @@
+.. _srv6_plugin_doc:
+
+Sample SRv6 LocalSID documentation
+==================================
+
+Introduction
+------------
+
+This plugin is an example of how an user can create a new SRv6 LocalSID
+behavior by using VPP plugins with the appropriate API calls to the
+existing SR code.
+
+This **example** plugin registers a new localsid behavior, with cli
+keyword ‘new_srv6_localsid’ which only takes one parameter, a fib-table.
+Upon receival of a packet, this plugin will enforce the next IP6 lookup
+in the specific fib-table specified by the user. (Indeed it will do the
+lookup in the fib_table n+1 (since for the shake of the example we
+increment the fib-table.)
+
+Notice that the plugin only ‘defines’ a new SRv6 LocalSID behavior, but
+the existing SR code in VNET is the one actually instantiating new
+LocalSIDs. Notice that there are callback functions such that when you
+create or remove a LocalSID you can actually setup specific parameters
+through the functions in this plugin.
+
+Variables to watch for
+----------------------
+
+- srv6_localsid_name: This variable is the name (used as a unique key)
+ identifying this SR LocalSID plugin.
+- keyword_str: This is the CLI keyword to be used for the plugin. In
+ this example ‘new_srv6_localsid’. (i.e. sr localsid address cafe::1
+ behavior new_srv6_localsid )
+- def_str: This is a definition of this SR behavior. This is printed
+ when you do ‘show sr localsid behaviors’.
+- params_str: This is a definition of the parameters of this localsid.
+ This is printed when you do ‘show sr localsid behaviors’.
+
+Functions to watch for
+----------------------
+
+- srv6_localsid_creation_fn: This function will be called every time a
+ new SR LocalSID is instantiated with the behavior defined in this
+ plugin.
+- srv6_localsid_removal_fn: This function will be called every time a
+ new SR LocalSID is removed with the behavior defined in this plugin.
+ This function tends to be used for freeing up all the memory created
+ in the previous function.
+- format_srv6_localsid_sample: This function prints nicely the
+ parameters of every SR LocalSID using this behavior.
+- unformat_srv6_localsid_sample: This function parses the CLI command
+ when initializing a new SR LocalSID using this behavior. It parses
+ all the parameters and ensures that the parameters are correct.
+- format_srv6_localsid_sample_dpo: This function formats the ‘show ip6
+ fib’ message for the SR LocalSIDs created with this plugin behavior.
+
+Graph node
+----------
+
+The current graph node uses the function ‘end_srh_processing’ to do the
+Segment Routing Endpoint behavior. Notice that it does not allow the
+cleanup of a Segment Routing header (as per the SRv6 behavior specs).
+This function is identical to the one found in
+/src/vnet/srv6/sr_localsid.c In case that by some other reason you want
+to do decapsulation, or SRH clean_up you can use the functions
+‘end_decaps_srh_processing’ or ‘end_psp_srh_processing’ respectively.
diff --git a/src/examples/vlib/elog_samples.c b/src/examples/vlib/elog_samples.c
index a8c800df959..600292119b4 100644
--- a/src/examples/vlib/elog_samples.c
+++ b/src/examples/vlib/elog_samples.c
@@ -105,13 +105,11 @@ test_elog_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_elog_command, static) = {
.path = "test elog sample",
.short_help = "test elog sample",
.function = test_elog_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/examples/vlib/main_stub.c b/src/examples/vlib/main_stub.c
index 3735055d789..be16a2b98bc 100644
--- a/src/examples/vlib/main_stub.c
+++ b/src/examples/vlib/main_stub.c
@@ -30,12 +30,10 @@ main_stub_init (vlib_main_t * vm)
return error;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (main_stub_init) =
{
.runs_after = VLIB_INITS("unix_physmem_init", "unix_cli_init"),
};
-/* *INDENT-ON* */
#if 0
/* Node test code. */
@@ -105,7 +103,6 @@ my_func (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
return i;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (my_node1,static) = {
.function = my_func,
.type = VLIB_NODE_TYPE_INPUT,
@@ -117,16 +114,13 @@ VLIB_REGISTER_NODE (my_node1,static) = {
[0] = "my-node2",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (my_node2,static) = {
.function = my_func,
.name = "my-node2",
.scalar_size = sizeof (my_frame_t),
.vector_size = STRUCT_SIZE_OF (my_frame_t, vector[0]),
};
-/* *INDENT-ON* */
#endif
@@ -181,7 +175,7 @@ my_proc (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
vlib_cli_output (vm, "%U %v: completion #%d type %d data 0x%wx",
format_time_interval, "h:m:s:u",
vlib_time_now (vm), node->name, i, type, data[0]);
- _vec_len (data) = 0;
+ vec_set_len (data, 0);
}
vec_free (data);
@@ -209,13 +203,11 @@ my_proc (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
return i;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (my_proc_node,static) = {
.function = my_proc,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "my-proc",
};
-/* *INDENT-ON* */
static uword
my_proc_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
@@ -250,13 +242,11 @@ my_proc_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (my_proc_input_node,static) = {
.function = my_proc_input,
.type = VLIB_NODE_TYPE_INPUT,
.name = "my-proc-input",
};
-/* *INDENT-ON* */
static uword
_unformat_farith (unformat_input_t * i, va_list * args)
@@ -384,7 +374,6 @@ bar_command (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bar_command2, static) = {
.path = "bar %decimal_integer",
.short_help = "bar1 command",
@@ -403,7 +392,6 @@ VLIB_CLI_COMMAND (bar_command3, static) = {
.function = bar_command,
.function_arg = 3,
};
-/* *INDENT-ON* */
#endif
diff --git a/src/examples/vlib/mc_test.c b/src/examples/vlib/mc_test.c
index 464d07cc471..8fbd3c7d215 100644
--- a/src/examples/vlib/mc_test.c
+++ b/src/examples/vlib/mc_test.c
@@ -241,16 +241,14 @@ mc_test_process (vlib_main_t * vm,
}
if (event_data)
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
}
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (mc_test_process_node, static) =
{
.function = mc_test_process,.type = VLIB_NODE_TYPE_PROCESS,.name =
"mc-test-process",};
-/* *INDENT-ON* */
static clib_error_t *
mc_test_command (vlib_main_t * vm,
@@ -277,12 +275,10 @@ mc_test_command (vlib_main_t * vm,
return unformat_parse_error (input);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_mc_command, static) =
{
.path = "test mc",.short_help = "Test mc command",.function =
mc_test_command,};
-/* *INDENT-ON* */
static clib_error_t *
mc_show_command (vlib_main_t * vm,
@@ -293,12 +289,10 @@ mc_show_command (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_mc_command, static) =
{
.path = "show mc",.short_help = "Show mc command",.function =
mc_show_command,};
-/* *INDENT-ON* */
static clib_error_t *
mc_clear_command (vlib_main_t * vm,
@@ -309,12 +303,10 @@ mc_clear_command (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_mc_command, static) =
{
.path = "clear mc",.short_help = "Clear mc command",.function =
mc_clear_command,};
-/* *INDENT-ON* */
static clib_error_t *
mc_config (vlib_main_t * vm, unformat_input_t * input)
diff --git a/src/pkg/CMakeLists.txt b/src/pkg/CMakeLists.txt
index d447774a1fd..5fa2826ad9a 100644
--- a/src/pkg/CMakeLists.txt
+++ b/src/pkg/CMakeLists.txt
@@ -18,8 +18,23 @@ endif()
get_cmake_property(VPP_COMPONENTS COMPONENTS)
string(REPLACE ";" " " VPP_COMPONENTS "${VPP_COMPONENTS}")
+##############################################################################
+# pinned timestamp for reproducible builds
+##############################################################################
+set(VPP_SOURCE_DATE_EPOCH
+ ""
+ CACHE
+ STRING "Artifact build timestamp for reproducible builds"
+)
+
+if(VPP_SOURCE_DATE_EPOCH STREQUAL "")
+ set(VPP_TIMESTAMP_ARG "")
+else()
+ set(VPP_TIMESTAMP_ARG "--date=@${VPP_SOURCE_DATE_EPOCH}")
+endif()
+
execute_process(
- COMMAND date -R
+ COMMAND date -R ${VPP_TIMESTAMP_ARG}
OUTPUT_VARIABLE TIMESTAMP
OUTPUT_STRIP_TRAILING_WHITESPACE
)
@@ -53,8 +68,6 @@ foreach(f copyright vpp.preinst vpp.postrm vpp.postinst vpp.service)
)
endforeach()
-file(WRITE ${VPP_BINARY_DIR}/debian/compat "10\n")
-
add_custom_target(pkg-deb
COMMENT "Building .deb packages..."
WORKING_DIRECTORY ${VPP_BINARY_DIR}
diff --git a/src/pkg/debian/control.in b/src/pkg/debian/control.in
index 1ce63a7d004..5f597f4ea1e 100644
--- a/src/pkg/debian/control.in
+++ b/src/pkg/debian/control.in
@@ -2,8 +2,7 @@ Source: vpp
Section: net
Priority: extra
Maintainer: fd.io VPP Packaging Team <vpp-dev@fd.io>
-Build-Depends: debhelper (>= 9),
- dh-systemd,
+Build-Depends: debhelper-compat (= 10),
dh-python,
@VPP_DEB_BUILD_DEPENDS@
Standards-Version: 3.9.4
@@ -51,14 +50,19 @@ Description: Vector Packet Processing--runtime libraries
Package: vpp-plugin-core
Architecture: any
Depends: vpp (= ${source:Version}),
- libmbedtls12 | libmbedtls10,
- libmbedx509-0,
- libmbedcrypto3 | libmbedcrypto1 | libmbedcrypto0,
${shlibs:Depends}
Description: Vector Packet Processing--runtime core plugins
This package contains VPP core plugins
.
+Package: vpp-plugin-devtools
+Architecture: any
+Depends: vpp (= ${source:Version}),
+ ${shlibs:Depends}
+Description: Vector Packet Processing--runtime developer tool plugins
+ This package contains VPP developer tool plugins
+ .
+
Package: vpp-plugin-dpdk
Architecture: any
Depends: vpp (= ${source:Version}),
diff --git a/src/pkg/debian/rules.in b/src/pkg/debian/rules.in
index 15f8eb6f810..1958497f5ed 100755..100644
--- a/src/pkg/debian/rules.in
+++ b/src/pkg/debian/rules.in
@@ -23,8 +23,6 @@ build3vers := $(shell py3versions -sv)
override_dh_strip:
dh_strip --dbg-package=vpp-dbg
-DEB_HOST_MULTIARCH = $(shell dpkg-architecture -qDEB_HOST_MULTIARCH)
-
override_dh_install:
@for c in @VPP_COMPONENTS@; do \
@CMAKE_COMMAND@ \
@@ -33,12 +31,7 @@ override_dh_install:
-D CMAKE_INSTALL_PREFIX=@VPP_BINARY_DIR@/debian/$$c \
-P @CMAKE_BINARY_DIR@/cmake_install.cmake 2>&1 \
| grep -v 'Set runtime path of' ; \
- if [ -d debian/$$c/lib ] ; then \
- mv debian/$$c/lib debian/$$c/$(DEB_HOST_MULTIARCH) ; \
- mkdir -p debian/$$c/usr/lib ; \
- mv debian/$$c/$(DEB_HOST_MULTIARCH) debian/$$c/usr/lib ; \
- fi ; \
- for d in bin include share ; do \
+ for d in bin include share lib ; do \
if [ -d debian/$$c/$$d ] ; then \
mkdir -p debian/$$c/usr ; \
mv debian/$$c/$$d debian/$$c/usr/$$d ; \
diff --git a/src/plugins/CMakeLists.txt b/src/plugins/CMakeLists.txt
index e54eaa2c4cb..43ad4cc2a25 100644
--- a/src/plugins/CMakeLists.txt
+++ b/src/plugins/CMakeLists.txt
@@ -23,7 +23,27 @@ FILE(GLOB files RELATIVE
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/*/CMakeLists.txt
)
+
+set(VPP_EXCLUDED_PLUGINS
+ ""
+ CACHE
+ STRING "Comma-separated list of core plugins excluded from packaging and tests"
+)
+
+# create the list of the plugins that we need to exclude from packaging
+SET(excluded_plugins "")
+STRING(REGEX REPLACE "[,]+" ";" exc_plugins "${VPP_EXCLUDED_PLUGINS}")
+foreach (e ${exc_plugins})
+ message(WARNING "Excluding plugin due to VPP_EXCLUDED_PLUGINS: '${e}'")
+ list(APPEND excluded_plugins ${e})
+endforeach()
+
foreach (f ${files})
get_filename_component(dir ${f} DIRECTORY)
- add_subdirectory(${dir})
+
+ # if a plugin is in the list of excluded plugin, do not add that subdirectory
+ LIST(FIND excluded_plugins "${dir}" exc_index)
+ if(${exc_index} EQUAL "-1")
+ add_subdirectory(${dir})
+ endif()
endforeach()
diff --git a/src/plugins/abf/FEATURE.yaml b/src/plugins/abf/FEATURE.yaml
index b9f3285daa3..7902dbe7800 100644
--- a/src/plugins/abf/FEATURE.yaml
+++ b/src/plugins/abf/FEATURE.yaml
@@ -1,9 +1,12 @@
---
name: ACL Based Forwarding
-maintainer: Neale Ranns <nranns@cisco.com>
+maintainer: Neale Ranns <neale@graphiant.com>
features:
- 'Policy Based Routing'
- - ACLs match traffic to be forwarded
+ - ACLs identify how traffic should be forwarded. Packets matching a permit
+ rule are forwarded using ABF policy. Packets matching a deny rule are
+ excluded from ABF handling and continue traversing the input feature arc on
+ the L3 path.
- Each rule in the ACL has an associated 'path' which determines how the
traffic will be forwarded. This path is described as a FIB path, so anything
possible with basic L3 forwarding is possible with ABF (with the exception
diff --git a/src/plugins/abf/abf.api b/src/plugins/abf/abf.api
index 1cd3da7e557..a748de4522b 100644
--- a/src/plugins/abf/abf.api
+++ b/src/plugins/abf/abf.api
@@ -51,7 +51,7 @@ define abf_plugin_get_version_reply
/** \brief A description of an ABF policy
@param policy_id User chosen Identifier for the policy
@param acl_index The ACL that the policy will match against
- @param n_paths Number of paths
+ @param n_paths Number of paths, 1..255
@param paths The set of forwarding paths that are being added or removed.
*/
typedef abf_policy
diff --git a/src/plugins/abf/abf_api.c b/src/plugins/abf/abf_api.c
index cc55b214e35..2330e7b7d21 100644
--- a/src/plugins/abf/abf_api.c
+++ b/src/plugins/abf/abf_api.c
@@ -34,10 +34,11 @@
#include <abf/abf.api_types.h>
/**
- * Base message ID fot the plugin
+ * Base message ID for the plugin
*/
static u32 abf_base_msg_id;
+#define REPLY_MSG_ID_BASE (abf_base_msg_id)
#include <vlibapi/api_helper_macros.h>
static void
@@ -68,6 +69,12 @@ vl_api_abf_policy_add_del_t_handler (vl_api_abf_policy_add_del_t * mp)
int rv = 0;
u8 pi;
+ if (mp->policy.n_paths == 0)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto done;
+ }
+
vec_validate (paths, mp->policy.n_paths - 1);
for (pi = 0; pi < mp->policy.n_paths; pi++)
@@ -93,7 +100,7 @@ vl_api_abf_policy_add_del_t_handler (vl_api_abf_policy_add_del_t * mp)
done:
vec_free (paths);
- REPLY_MACRO (VL_API_ABF_POLICY_ADD_DEL_REPLY + abf_base_msg_id);
+ REPLY_MACRO (VL_API_ABF_POLICY_ADD_DEL_REPLY);
}
static void
@@ -106,19 +113,17 @@ vl_api_abf_itf_attach_add_del_t_handler (vl_api_abf_itf_attach_add_del_t * mp)
if (mp->is_add)
{
- abf_itf_attach (fproto,
- ntohl (mp->attach.policy_id),
- ntohl (mp->attach.priority),
- ntohl (mp->attach.sw_if_index));
+ rv = abf_itf_attach (fproto, ntohl (mp->attach.policy_id),
+ ntohl (mp->attach.priority),
+ ntohl (mp->attach.sw_if_index));
}
else
{
- abf_itf_detach (fproto,
- ntohl (mp->attach.policy_id),
- ntohl (mp->attach.sw_if_index));
+ rv = abf_itf_detach (fproto, ntohl (mp->attach.policy_id),
+ ntohl (mp->attach.sw_if_index));
}
- REPLY_MACRO (VL_API_ABF_ITF_ATTACH_ADD_DEL_REPLY + abf_base_msg_id);
+ REPLY_MACRO (VL_API_ABF_ITF_ATTACH_ADD_DEL_REPLY);
}
typedef struct abf_dump_walk_ctx_t_
@@ -245,12 +250,10 @@ abf_api_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (abf_api_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Access Control List (ACL) Based Forwarding",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/abf/abf_itf_attach.c b/src/plugins/abf/abf_itf_attach.c
index 4f17f720f3b..04e5c4c40c2 100644
--- a/src/plugins/abf/abf_itf_attach.c
+++ b/src/plugins/abf/abf_itf_attach.c
@@ -399,7 +399,6 @@ abf_itf_attach_cmd (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
/**
* Attach an ABF policy to an interface.
*/
@@ -409,7 +408,6 @@ VLIB_CLI_COMMAND (abf_itf_attach_cmd_node, static) = {
.short_help = "abf attach <ip4|ip6> [del] policy <value> <interface>",
// this is not MP safe
};
-/* *INDENT-ON* */
static clib_error_t *
abf_show_attach_cmd (vlib_main_t * vm,
@@ -438,7 +436,6 @@ abf_show_attach_cmd (vlib_main_t * vm,
vlib_cli_output (vm, "specify an interface");
}
- /* *INDENT-OFF* */
FOR_EACH_FIB_IP_PROTOCOL(fproto)
{
if (sw_if_index < vec_len(abf_per_itf[fproto]))
@@ -453,31 +450,26 @@ abf_show_attach_cmd (vlib_main_t * vm,
}
}
}
- /* *INDENT-ON* */
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (abf_show_attach_cmd_node, static) = {
.path = "show abf attach",
.function = abf_show_attach_cmd,
.short_help = "show abf attach <interface>",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
void
abf_itf_attach_walk (abf_itf_attach_walk_cb_t cb, void *ctx)
{
u32 aii;
- /* *INDENT-OFF* */
pool_foreach_index (aii, abf_itf_attach_pool)
{
if (!cb(aii, ctx))
break;
}
- /* *INDENT-ON* */
}
typedef enum abf_next_t_
@@ -567,10 +559,11 @@ abf_input_inline (vlib_main_t * vm,
(FIB_PROTOCOL_IP6 == fproto), 1, 0,
&fa_5tuple0);
- if (acl_plugin_match_5tuple_inline
- (acl_plugin.p_acl_main, lc_index, &fa_5tuple0,
- (FIB_PROTOCOL_IP6 == fproto), &action, &match_acl_pos,
- &match_acl_index, &match_rule_index, &trace_bitmap))
+ if (acl_plugin_match_5tuple_inline (
+ acl_plugin.p_acl_main, lc_index, &fa_5tuple0,
+ (FIB_PROTOCOL_IP6 == fproto), &action, &match_acl_pos,
+ &match_acl_index, &match_rule_index, &trace_bitmap) &&
+ action > 0)
{
/*
* match:
@@ -656,7 +649,6 @@ static char *abf_error_strings[] = {
#undef abf_error
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (abf_ip4_node) =
{
.function = abf_input_ip4,
@@ -702,7 +694,6 @@ VNET_FEATURE_INIT (abf_ip6_feat, static) =
.node_name = "abf-input-ip6",
.runs_after = VNET_FEATURES ("acl-plugin-in-ip6-fa"),
};
-/* *INDENT-ON* */
static fib_node_t *
abf_itf_attach_get_node (fib_node_index_t index)
@@ -760,7 +751,7 @@ static clib_error_t *
abf_itf_bond_init (vlib_main_t * vm)
{
abf_itf_attach_fib_node_type =
- fib_node_register_new_type (&abf_itf_attach_vft);
+ fib_node_register_new_type ("abf-attach", &abf_itf_attach_vft);
clib_error_t *acl_init_res = acl_plugin_exports_init (&acl_plugin);
if (acl_init_res)
return (acl_init_res);
@@ -771,12 +762,10 @@ abf_itf_bond_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (abf_itf_bond_init) =
{
.runs_after = VLIB_INITS("acl_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/abf/abf_policy.c b/src/plugins/abf/abf_policy.c
index 945434bca27..e6dfe4fff55 100644
--- a/src/plugins/abf/abf_policy.c
+++ b/src/plugins/abf/abf_policy.c
@@ -192,50 +192,45 @@ abf_policy_delete (u32 policy_id, const fib_route_path_t * rpaths)
*/
return (VNET_API_ERROR_INVALID_VALUE);
}
- else
- {
- /*
- * update an existing policy.
- * - add the path to the path-list and swap our ancestry
- * - backwalk to poke all attachments to update
- */
- fib_node_index_t old_pl;
- ap = abf_policy_get (api);
- old_pl = ap->ap_pl;
+ /*
+ * update an existing policy.
+ * - add the path to the path-list and swap our ancestry
+ * - backwalk to poke all attachments to update
+ */
+ fib_node_index_t old_pl;
- fib_path_list_lock (old_pl);
- ap->ap_pl =
- fib_path_list_copy_and_path_remove (ap->ap_pl,
- (FIB_PATH_LIST_FLAG_SHARED |
- FIB_PATH_LIST_FLAG_NO_URPF),
- rpaths);
+ ap = abf_policy_get (api);
+ old_pl = ap->ap_pl;
- fib_path_list_child_remove (old_pl, ap->ap_sibling);
- ap->ap_sibling = ~0;
+ fib_path_list_lock (old_pl);
+ ap->ap_pl = fib_path_list_copy_and_path_remove (
+ ap->ap_pl, (FIB_PATH_LIST_FLAG_SHARED | FIB_PATH_LIST_FLAG_NO_URPF),
+ rpaths);
- if (FIB_NODE_INDEX_INVALID == ap->ap_pl)
- {
- /*
- * no more paths on this policy. It's toast
- * remove the CLI/API's lock
- */
- fib_node_unlock (&ap->ap_node);
- }
- else
- {
- ap->ap_sibling = fib_path_list_child_add (ap->ap_pl,
- abf_policy_fib_node_type,
- api);
+ fib_path_list_child_remove (old_pl, ap->ap_sibling);
+ ap->ap_sibling = ~0;
- fib_node_back_walk_ctx_t ctx = {
- .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE,
- };
+ if (FIB_NODE_INDEX_INVALID == ap->ap_pl)
+ {
+ /*
+ * no more paths on this policy. It's toast
+ * remove the CLI/API's lock
+ */
+ fib_node_unlock (&ap->ap_node);
+ }
+ else
+ {
+ ap->ap_sibling =
+ fib_path_list_child_add (ap->ap_pl, abf_policy_fib_node_type, api);
- fib_walk_sync (abf_policy_fib_node_type, api, &ctx);
- }
- fib_path_list_unlock (old_pl);
+ fib_node_back_walk_ctx_t ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE,
+ };
+
+ fib_walk_sync (abf_policy_fib_node_type, api, &ctx);
}
+ fib_path_list_unlock (old_pl);
return (0);
}
@@ -272,14 +267,25 @@ abf_policy_cmd (vlib_main_t * vm,
unformat_fib_route_path, &rpath, &payload_proto))
vec_add1 (rpaths, rpath);
else
- return (clib_error_return (0, "unknown input '%U'",
- format_unformat_error, line_input));
+ {
+ clib_error_t *err;
+ err = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ unformat_free (line_input);
+ return err;
+ }
}
if (INDEX_INVALID == policy_id)
{
vlib_cli_output (vm, "Specify a Policy ID");
- return 0;
+ goto out;
+ }
+
+ if (vec_len (rpaths) == 0)
+ {
+ vlib_cli_output (vm, "Hop path must not be empty");
+ goto out;
}
if (!is_del)
@@ -287,7 +293,7 @@ abf_policy_cmd (vlib_main_t * vm,
if (INDEX_INVALID == acl_index)
{
vlib_cli_output (vm, "ACL index must be set");
- return 0;
+ goto out;
}
rv = abf_policy_update (policy_id, acl_index, rpaths);
@@ -296,7 +302,7 @@ abf_policy_cmd (vlib_main_t * vm,
{
vlib_cli_output (vm,
"ACL index must match existing ACL index in policy");
- return 0;
+ goto out;
}
}
else
@@ -304,11 +310,11 @@ abf_policy_cmd (vlib_main_t * vm,
abf_policy_delete (policy_id, rpaths);
}
+out:
unformat_free (line_input);
return (NULL);
}
-/* *INDENT-OFF* */
/**
* Create an ABF policy.
*/
@@ -318,7 +324,6 @@ VLIB_CLI_COMMAND (abf_policy_cmd_node, static) = {
.short_help = "abf policy [add|del] id <index> acl <index> via ...",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static u8 *
format_abf (u8 * s, va_list * args)
@@ -345,13 +350,11 @@ abf_policy_walk (abf_policy_walk_cb_t cb, void *ctx)
{
u32 api;
- /* *INDENT-OFF* */
pool_foreach_index (api, abf_policy_pool)
{
if (!cb(api, ctx))
break;
}
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -374,12 +377,10 @@ abf_show_policy_cmd (vlib_main_t * vm,
if (INDEX_INVALID == policy_id)
{
- /* *INDENT-OFF* */
pool_foreach (ap, abf_policy_pool)
{
vlib_cli_output(vm, "%U", format_abf, ap);
}
- /* *INDENT-ON* */
}
else
{
@@ -394,14 +395,12 @@ abf_show_policy_cmd (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (abf_policy_show_policy_cmd_node, static) = {
.path = "show abf policy",
.function = abf_show_policy_cmd,
.short_help = "show abf policy <value>",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static fib_node_t *
abf_policy_get_node (fib_node_index_t index)
@@ -456,7 +455,8 @@ static const fib_node_vft_t abf_policy_vft = {
static clib_error_t *
abf_policy_init (vlib_main_t * vm)
{
- abf_policy_fib_node_type = fib_node_register_new_type (&abf_policy_vft);
+ abf_policy_fib_node_type =
+ fib_node_register_new_type ("abf-policy", &abf_policy_vft);
return (NULL);
}
diff --git a/src/plugins/acl/CMakeLists.txt b/src/plugins/acl/CMakeLists.txt
index c43dd23ea51..1bb60d94fbe 100644
--- a/src/plugins/acl/CMakeLists.txt
+++ b/src/plugins/acl/CMakeLists.txt
@@ -30,4 +30,15 @@ add_vpp_plugin(acl
API_TEST_SOURCES
acl_test.c
+
+ INSTALL_HEADERS
+ exports.h
+ exported_types.h
+ acl.h
+ fa_node.h
+ public_inlines.h
+ types.h
+ hash_lookup_types.h
+ lookup_context.h
+ hash_lookup_private.h
)
diff --git a/src/plugins/acl/acl.api b/src/plugins/acl/acl.api
index a4706c3e529..404b512abed 100644
--- a/src/plugins/acl/acl.api
+++ b/src/plugins/acl/acl.api
@@ -19,7 +19,7 @@
used to control the ACL plugin
*/
-option version = "2.0.0";
+option version = "2.0.1";
import "plugins/acl/acl_types.api";
import "vnet/interface_types.api";
@@ -497,3 +497,43 @@ autoreply define acl_stats_intf_counters_enable
bool enable;
option vat_help = "[disable]";
};
+
+/** \brief Enable hash-based ACL lookups (default) or disable them (use linear search)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param enable - whether to enable or disable the usage of hash lookup algorithm
+*/
+
+autoreply define acl_plugin_use_hash_lookup_set
+{
+ option status="in_progress";
+ u32 client_index;
+ u32 context;
+ bool enable;
+};
+
+/** \brief Get if the hash-based ACL lookups are enabled (default) or not (use linear search)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+
+
+define acl_plugin_use_hash_lookup_get
+{
+ option status="in_progress";
+ u32 client_index;
+ u32 context;
+};
+
+
+/** \brief Reply with the previous state of the hash lookup
+ @param context - returned sender context, to match reply w/ request
+ @param prev_enable - previous state of the hash lookup use
+*/
+
+define acl_plugin_use_hash_lookup_get_reply
+{
+ option status="in_progress";
+ u32 context;
+ bool enable;
+};
diff --git a/src/plugins/acl/acl.c b/src/plugins/acl/acl.c
index e8b5877ab21..e52e82fcf28 100644
--- a/src/plugins/acl/acl.c
+++ b/src/plugins/acl/acl.c
@@ -36,7 +36,6 @@
#include <acl/acl.api_enum.h>
#include <acl/acl.api_types.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#include "fa_node.h"
#include "public_inlines.h"
@@ -53,12 +52,10 @@ acl_main_t acl_main;
#include <vppinfra/bihash_template.h>
#include <vppinfra/bihash_template.c>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Access Control Lists (ACL)",
};
-/* *INDENT-ON* */
/* methods exported from ACL-as-a-service */
static acl_plugin_methods_t acl_plugin;
@@ -110,12 +107,10 @@ vl_api_acl_plugin_control_ping_t_handler (vl_api_acl_plugin_control_ping_t *
acl_main_t *am = &acl_main;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_ACL_PLUGIN_CONTROL_PING_REPLY,
({
rmp->vpe_pid = ntohl (getpid ());
}));
- /* *INDENT-ON* */
}
static void
@@ -310,7 +305,9 @@ static int
acl_api_invalid_prefix (const vl_api_prefix_t * prefix)
{
ip_prefix_t ip_prefix;
- return ip_prefix_decode2 (prefix, &ip_prefix);
+ int valid_af =
+ prefix->address.af == ADDRESS_IP4 || prefix->address.af == ADDRESS_IP6;
+ return (!valid_af) || ip_prefix_decode2 (prefix, &ip_prefix);
}
static int
@@ -339,6 +336,8 @@ acl_add_list (u32 count, vl_api_acl_rule_t rules[],
return VNET_API_ERROR_INVALID_SRC_ADDRESS;
if (acl_api_invalid_prefix (&rules[i].dst_prefix))
return VNET_API_ERROR_INVALID_DST_ADDRESS;
+ if (rules[i].src_prefix.address.af != rules[i].dst_prefix.address.af)
+ return VNET_API_ERROR_INVALID_SRC_ADDRESS;
if (ntohs (rules[i].srcport_or_icmptype_first) >
ntohs (rules[i].srcport_or_icmptype_last))
return VNET_API_ERROR_INVALID_VALUE_2;
@@ -684,7 +683,6 @@ acl_interface_set_inout_acl_list (acl_main_t * am, u32 sw_if_index,
format_bitmap_hex, old_seen_acl_bitmap, format_bitmap_hex,
seen_acl_bitmap, format_bitmap_hex, change_acl_bitmap);
-/* *INDENT-OFF* */
clib_bitmap_foreach (acln, change_acl_bitmap) {
if (clib_bitmap_get(old_seen_acl_bitmap, acln)) {
/* ACL is being removed. */
@@ -698,7 +696,6 @@ acl_interface_set_inout_acl_list (acl_main_t * am, u32 sw_if_index,
vec_add1((*pinout_sw_if_index_vec_by_acl)[acln], sw_if_index);
}
}
-/* *INDENT-ON* */
vec_free ((*pinout_acl_vec_by_sw_if_index)[sw_if_index]);
(*pinout_acl_vec_by_sw_if_index)[sw_if_index] =
@@ -1807,12 +1804,10 @@ vl_api_acl_add_replace_t_handler (vl_api_acl_add_replace_t * mp)
rv = VNET_API_ERROR_INVALID_VALUE;
}
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_ACL_ADD_REPLACE_REPLY,
({
rmp->acl_index = htonl(acl_list_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1974,13 +1969,11 @@ vl_api_acl_dump_t_handler (vl_api_acl_dump_t * mp)
if (mp->acl_index == ~0)
{
- /* *INDENT-OFF* */
/* Just dump all ACLs */
pool_foreach (acl, am->acls)
{
send_acl_details(am, reg, acl, mp->context);
}
- /* *INDENT-ON* */
}
else
{
@@ -2060,12 +2053,10 @@ vl_api_acl_interface_list_dump_t_handler (vl_api_acl_interface_list_dump_t *
if (mp->sw_if_index == ~0)
{
- /* *INDENT-OFF* */
pool_foreach (swif, im->sw_interfaces)
{
send_acl_interface_list_details(am, reg, swif->sw_if_index, mp->context);
}
- /* *INDENT-ON* */
}
else
{
@@ -2096,12 +2087,10 @@ vl_api_macip_acl_add_t_handler (vl_api_macip_acl_add_t * mp)
rv = VNET_API_ERROR_INVALID_VALUE;
}
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_MACIP_ACL_ADD_REPLY,
({
rmp->acl_index = htonl(acl_list_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -2123,12 +2112,10 @@ vl_api_macip_acl_add_replace_t_handler (vl_api_macip_acl_add_replace_t * mp)
rv = VNET_API_ERROR_INVALID_VALUE;
}
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_MACIP_ACL_ADD_REPLACE_REPLY,
({
rmp->acl_index = htonl(acl_list_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -2225,12 +2212,10 @@ vl_api_macip_acl_dump_t_handler (vl_api_macip_acl_dump_t * mp)
if (mp->acl_index == ~0)
{
/* Just dump all ACLs for now, with sw_if_index = ~0 */
- /* *INDENT-OFF* */
pool_foreach (acl, am->macip_acls)
{
send_macip_acl_details (am, reg, acl, mp->context);
}
- /* *INDENT-ON* */
}
else
{
@@ -2434,12 +2419,10 @@ static void
if (mp->sw_if_index == ~0)
{
- /* *INDENT-OFF* */
pool_foreach (swif, im->sw_interfaces)
{
send_acl_interface_etype_whitelist_details(am, reg, swif->sw_if_index, mp->context);
}
- /* *INDENT-ON* */
}
else
{
@@ -2451,6 +2434,45 @@ static void
}
static void
+vl_api_acl_plugin_use_hash_lookup_set_t_handler (
+ vl_api_acl_plugin_use_hash_lookup_set_t *mp)
+{
+ acl_main_t *am = &acl_main;
+ vl_api_acl_plugin_use_hash_lookup_set_reply_t *rmp;
+ vl_api_registration_t *reg;
+ int rv = 0;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ am->use_hash_acl_matching = mp->enable;
+ REPLY_MACRO (VL_API_ACL_PLUGIN_USE_HASH_LOOKUP_SET_REPLY);
+}
+
+static void
+vl_api_acl_plugin_use_hash_lookup_get_t_handler (
+ vl_api_acl_plugin_use_hash_lookup_get_t *mp)
+{
+ acl_main_t *am = &acl_main;
+ vl_api_acl_plugin_use_hash_lookup_get_reply_t *rmp;
+ int msg_size = sizeof (*rmp);
+ vl_api_registration_t *reg;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ rmp = vl_msg_api_alloc (msg_size);
+ clib_memset (rmp, 0, msg_size);
+ rmp->_vl_msg_id =
+ ntohs (VL_API_ACL_PLUGIN_USE_HASH_LOOKUP_GET_REPLY + am->msg_id_base);
+ rmp->context = mp->context;
+ rmp->enable = am->use_hash_acl_matching;
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
acl_set_timeout_sec (int timeout_type, u32 value)
{
acl_main_t *am = &acl_main;
@@ -2802,6 +2824,7 @@ acl_set_aclplugin_interface_fn (vlib_main_t * vm,
break;
}
+ unformat_free (line_input);
if (~0 == sw_if_index)
return (clib_error_return (0, "invalid interface"));
if (~0 == acl_index)
@@ -2809,7 +2832,6 @@ acl_set_aclplugin_interface_fn (vlib_main_t * vm,
acl_interface_add_del_inout_acl (sw_if_index, is_add, is_input, acl_index);
- unformat_free (line_input);
return (NULL);
}
@@ -2832,6 +2854,7 @@ acl_set_aclplugin_acl_fn (vlib_main_t * vm,
int rv;
int rule_idx = 0;
int n_rules_override = -1;
+ u32 acl_index = ~0;
u32 proto = 0;
u32 port1 = 0;
u32 port2 = 0;
@@ -2845,7 +2868,13 @@ acl_set_aclplugin_acl_fn (vlib_main_t * vm,
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "permit+reflect"))
+ if (unformat (line_input, "index %d", &acl_index))
+ {
+ /* operate on this acl index (which must exist),
+ * If not specified, or set to -1, create a new ACL
+ */
+ }
+ else if (unformat (line_input, "permit+reflect"))
{
vec_validate_acl_rules (rules, rule_idx);
rules[rule_idx].is_permit = 2;
@@ -2933,7 +2962,6 @@ acl_set_aclplugin_acl_fn (vlib_main_t * vm,
break;
}
- u32 acl_index = ~0;
if (!tag)
vec_add (tag, "cli", 4);
@@ -2942,6 +2970,7 @@ acl_set_aclplugin_acl_fn (vlib_main_t * vm,
vec_free (rules);
vec_free (tag);
+ unformat_free (line_input);
if (rv)
return (clib_error_return (0, "failed"));
@@ -2951,6 +2980,37 @@ acl_set_aclplugin_acl_fn (vlib_main_t * vm,
}
static clib_error_t *
+acl_delete_aclplugin_acl_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ int rv;
+ u32 acl_index = ~0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "index %d", &acl_index))
+ {
+ /* operate on this acl index (which must exist) */
+ }
+ else
+ break;
+ }
+
+ rv = acl_del_list (acl_index);
+
+ unformat_free (line_input);
+ if (rv)
+ return (clib_error_return (0, "failed"));
+
+ vlib_cli_output (vm, "Deleted ACL index:%d", acl_index);
+ return (NULL);
+}
+
+static clib_error_t *
acl_show_aclplugin_macip_acl_fn (vlib_main_t * vm,
unformat_input_t *
input, vlib_cli_command_t * cmd)
@@ -3270,7 +3330,6 @@ acl_plugin_show_sessions (acl_main_t * am,
vlib_cli_output (vm, " link list id: %u", sess->link_list_id);
}
vlib_cli_output (vm, " connection add/del stats:", wk);
- /* *INDENT-OFF* */
pool_foreach (swif, im->sw_interfaces)
{
u32 sw_if_index = swif->sw_if_index;
@@ -3295,7 +3354,6 @@ acl_plugin_show_sessions (acl_main_t * am,
n_dels,
n_epoch_changes);
}
- /* *INDENT-ON* */
vlib_cli_output (vm, " connection timeout type lists:", wk);
u8 tt = 0;
@@ -3432,6 +3490,8 @@ acl_show_aclplugin_tables_fn (vlib_main_t * vm,
}
vlib_cli_output (vm, "Stats counters enabled for interface ACLs: %d",
acl_main.interface_acl_counters_enabled);
+ vlib_cli_output (vm, "Use hash-based lookup for ACLs: %d",
+ acl_main.use_hash_acl_matching);
if (show_mask_type)
acl_plugin_show_tables_mask_type ();
if (show_acl_hash_info)
@@ -3455,7 +3515,6 @@ acl_clear_aclplugin_fn (vlib_main_t * vm,
return error;
}
- /* *INDENT-OFF* */
VLIB_CLI_COMMAND (aclplugin_set_command, static) = {
.path = "set acl-plugin",
.short_help = "set acl-plugin session timeout {{udp idle}|tcp {idle|transient}} <seconds>",
@@ -3545,26 +3604,45 @@ VLIB_CLI_COMMAND (aclplugin_set_interface_command, static) = {
/*?
* Create an Access Control List (ACL)
- * an ACL is composed of more than one Access control element (ACE). Multiple
+ * If index is not specified, a new one will be created. Otherwise, replace
+ * the one at this index.
+ *
+ * An ACL is composed of more than one Access control element (ACE). Multiple
* ACEs can be specified with this command using a comma separated list.
*
- * Each ACE describes a tuple of src+dst IP prefix, ip protocol, src+dst port ranges.
- * (the ACL plugin also support ICMP types/codes instead of UDP/TCP ports, but
- * this CLI does not).
+ * Each ACE describes a tuple of src+dst IP prefix, ip protocol, src+dst port
+ * ranges. (the ACL plugin also support ICMP types/codes instead of UDP/TCP
+ * ports, but this CLI does not).
*
- * An ACL can optionally be assigned a 'tag' - which is an identifier understood
- * by the client. VPP does not examine it in any way.
+ * An ACL can optionally be assigned a 'tag' - which is an identifier
+ * understood by the client. VPP does not examine it in any way.
*
- * @cliexpar
- * <b><em> set acl-plugin acl <permit|deny> src <PREFIX> dst <PREFIX> proto <TCP|UDP> sport <X-Y> dport <X-Y> [tag FOO] </b></em>
- * @cliexend
+ * @cliexcmd{set acl-plugin acl <permit|deny|permit+reflect> src <PREFIX> dst
+ * <PREFIX> proto <TCP|UDP> sport <X-Y> dport <X-Y> tcpflags <X> mask <X>
+ * [tag FOO]}
?*/
VLIB_CLI_COMMAND (aclplugin_set_acl_command, static) = {
- .path = "set acl-plugin acl",
- .short_help = "set acl-plugin acl <permit|deny> src <PREFIX> dst <PREFIX> proto X sport X-Y dport X-Y [tag FOO] {use comma separated list for multiple rules}",
- .function = acl_set_aclplugin_acl_fn,
+ .path = "set acl-plugin acl",
+ .short_help =
+ "set acl-plugin acl [index <idx>] <permit|deny|permit+reflect> src "
+ "<PREFIX> dst <PREFIX> [proto X] [sport X[-Y]] [dport X[-Y]] [tcpflags "
+ "<int> mask <int>] [tag FOO] {use comma separated list for multiple "
+ "rules}",
+ .function = acl_set_aclplugin_acl_fn,
+};
+
+/*?
+ * Delete an Access Control List (ACL)
+ * Removes an ACL at the specified index, which must exist but not in use by
+ * any interface.
+ *
+ * @cliexcmd{delete acl-plugin acl index <idx>}
+ ?*/
+VLIB_CLI_COMMAND (aclplugin_delete_acl_command, static) = {
+ .path = "delete acl-plugin acl",
+ .short_help = "delete acl-plugin acl index <idx>",
+ .function = acl_delete_aclplugin_acl_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
acl_plugin_config (vlib_main_t * vm, unformat_input_t * input)
@@ -3696,7 +3774,7 @@ acl_init (vlib_main_t * vm)
vec_validate (pw->expired,
ACL_N_TIMEOUTS *
am->fa_max_deleted_sessions_per_interval);
- _vec_len (pw->expired) = 0;
+ vec_set_len (pw->expired, 0);
vec_validate_init_empty (pw->fa_conn_list_head, ACL_N_TIMEOUTS - 1,
FA_SESSION_BOGUS_INDEX);
vec_validate_init_empty (pw->fa_conn_list_tail, ACL_N_TIMEOUTS - 1,
diff --git a/src/plugins/acl/acl_hash_lookup_doc.md b/src/plugins/acl/acl_hash_lookup_doc.md
deleted file mode 100644
index 6b08e1bc953..00000000000
--- a/src/plugins/acl/acl_hash_lookup_doc.md
+++ /dev/null
@@ -1,240 +0,0 @@
-ACL plugin constant-time lookup design {#acl_hash_lookup}
-======================================
-
-The initial implementation of ACL plugin performs a trivial for() cycle,
-going through the assigned ACLs on a per-packet basis. This is not very
-efficient, even if for very short ACLs due to its simplicity it can beat
-more advanced methods.
-
-However, to cover the case of longer ACLs with acceptable performance,
-we need to have a better way of matching. This write-up proposes
-a mechanism to make a lookup from O(M) where M is number of entries
-to O(N) where N is number of different mask combinations.
-
-Preparation of ACL(s)
----------------------
-
-The ACL plugin will maintain a global list of "mask types", i.e. the specific
-configurations of "do not care" bits within the ACEs.
-Upon the creation of a new ACL, a pass will be made through all the
-ACEs, to assign and possibly allocate the "mask type number".
-
-Each ACL has a structure *hash_acl_info_t* representing the "hash-based"
-parts of information related to that ACL, primarily the array of
-*hash_ace_info_t* structures - each of the members of that array
-corresponding to one of the rules (ACEs) in the original ACL,
-for this they have a pair of *(acl_index, ace_index)* to keep track,
-predominantly for debugging.
-
-Why do we need a whole separate structure, and are not adding new fields
-to the existing rule structure? First, encapsulation, to minimize
-the pollution of the main ACL code with the hash-based lookup artifacts.
-Second, one rule may correspond to more than one "hash-based" ACE.
-In fact, most of the rules do correspond to two of those. Why ?
-
-Consider that the current ACL lookup logic is that if a packet
-is not the initial fragment, and there is an L4 entry acting on the packet,
-the comparison will be made only on the L4 protocol field value rather
-than on the protocol and port values. This behavior is governed by
-*l4_match_nonfirst_fragment* flag in the *acl_main*, and is needed to
-maintain the compatibility with the existing software switch implementation.
-
-While for the sequential check in *single_acl_match_5tuple()*
-it is very easy to implement by just breaking out at the right moment,
-in case of hash-based matching this cost us two checks:
-one on full 5-tuple and the flag *pkt.is_nonfirst_fragment* being zero,
-the second on 3-tuple and the flag *pkt.is_nonfirst_fragment* being one,
-with the second check triggered by the *acl_main.l4_match_nonfirst_fragment*
-setting being the default 1. This dictates the necessity of having a "match"
-field in a given *hash_ace_info_t* element, which would reflect the value
-we are supposed to match after applying the mask.
-
-There can be other circumstances when it might be beneficial to expand
-the given rule in the original ACL into multiple - for example, as an
-optimization within the port range handling for small port ranges
-(this is not done as of the time of writing).
-
-Assigning ACLs to an interface
-------------------------------
-
-Once the ACL list is assigned to an interface, or, rather, a new ACL
-is added to the list of the existing ACLs applied to the interface,
-we need to update the bihash accelerating the lookup.
-
-All the entries for the lookups are stored within a single *48_8* bihash,
-which captures the 5-tuple from the packet as well as the miscellaneous
-per-packet information flags, e.g. *l4_valid*, *is_non_first_fragment*,
-and so on. To facilitate the use of the single bihash by all the interfaces,
-the *is_ip6*, *is_input*, *sw_if_index* are part of the key,
-as well as *mask_type_index* - the latter being necessary because
-there can be entries with the same value but different masks, e.g.:
-`permit ::/0, permit::/128`.
-
-At the moment of an ACL being applied to an interface, we need to
-walk the list of *hash_ace_info_t* entries corresponding to that ACL,
-and update the bihash with the keys corresponding to the match
-values in these entries.
-
-The value of the hash match contains the index into a per-*sw_if_index* vector
-of *applied_ace_hash_entry_t* elements, as well as a couple of flags:
-*shadowed* (optimization: if this flag on a matched entry is zero, means
-we can stop the lookup early and declare a match - see below),
-and *need_portrange_check* - meaning that what matched was a superset
-of the actual match, and we need to perform an extra check.
-
-Also, upon insertion, we must keep in mind there can be
-multiple *applied_ace_hash_entry_t* for the same key and must keep
-a list of those. This is necessary to incrementally apply/unapply
-the ACLs as part of the ACL vector: say, two ACLs have
-"permit 2001:db8::1/128 any" - we should be able to retain the entry
-for the second ACL even if we have deleted the first one.
-Also, in case there are two entries with the same key but
-different port ranges, say 0..42 and 142..65535 - we need
-to be able to sequentially match on those if we decide not
-to expand them into individual port-specific entries.
-
-Per-packet lookup
------------------
-
-The simple single-packet lookup is defined in
-*multi_acl_match_get_applied_ace_index*, which returns the index
-of the applied hash ACE if there was a match, or ~0 if there wasn't.
-
-The future optimized per-packet lookup may be batched in three phases:
-
-1. Prepare the keys in the per-worker vector by doing logical AND of
- original 5-tuple record with the elements of the mask vector.
-2. Lookup the keys in the bihash in a batch manner, collecting the
- result with lowest u64 (acl index within vector, ACE index) from
- the hash lookup value, and performing the list walk if necessary
- (for portranges).
-3. Take the action from the ACL record as defined by (ACL#, ACE#) from the
- resulting lookup winner, or, if no match found, then perform default deny.
-
-Shadowed/independent/redundant ACEs
-------------------------------------
-
-During the phase of combining multiple ACLs into one rulebase, when they
-are applied to interface, we also can perform several optimizations.
-
-If a given ACE is a strict subset of another ACE located up in the linear
-search order, we can ignore this ACE completely - because by definition
-it will never match. We will call such an ACE *redundant*. Here is an example:
-
-```
-permit 2001:db8:1::/48 2001:db8:2::/48 (B)
-deny 2001:d8b:1:1::/64 2001:db8:2:1::/64 (A)
-```
-
-A bit more formally, we can define this relationship of an ACE A to ACE B as:
-
-```
-redundant(aceA, aceB) := (contains(protoB, protoA) && contains(srcB, srcA)
- && contains(dstB, dstA) && is_after(A, B))
-```
-
-Here as "contains" we define an operation operating on the sets defined by
-the protocol, (srcIP, srcPortDefinition) and (dstIP, dstPortDefinition)
-respectively, and returning true if all the elements represented by
-the second argument are represented by the first argument. The "is_after"
-is true if A is located below B in the ruleset.
-
-If a given ACE does not intersect at all with any other ACE
-in front of it, we can mark it as such.
-
-Then during the sequence of the lookups the successful hit on this ACE means
-we do not need to look up other mask combinations - thus potentially
-significantly speeding up the match process. Here is an example,
-assuming we have the following ACL:
-
-```
-permit 2001:db8:1::/48 2001:db8:2::/48 (B)
-deny 2001:db8:3::/48 2001:db8:2:1::/64 (A)
-```
-
-In this case if we match the second entry, we do not need to check whether
-we have matched the first one - the source addresses are completely
-different. We call such an ACE *independent* from another.
-
-We can define this as
-
-```
-independent(aceA, aceB) := (!intersect(protoA, protoB) ||
- !intersect(srcA, srcB) ||
- !intersect(dstA, dstB))
-```
-
-where intersect is defined as operation returning true if there are
-elements belonging to the sets of both arguments.
-
-If the entry A is neither redundant nor independent from B, and is below
-B in the ruleset, we call such an entry *shadowed* by B, here is an example:
-
-```
-deny tcp 2001:db8:1::/48 2001:db8:2::/48 (B)
-permit 2001:d8b:1:1::/64 2001:db8:2:1::/64 (A)
-```
-
-This means the earlier rule "carves out" a subset of A, thus leaving
-a "shadow". (Evidently, the action needs to be different for the shadow
-to have an effect, but for for the terminology sake we do not care).
-
-The more formal definition:
-
-```
-shadowed(aceA, aceB) := !redundant(aceA, aceB) &&
- !independent(aceA, aceB) &&
- is_after(aceA, aceB)
-```
-
-Using this terminology, any ruleset can be represented as
-a DAG (Directed Acyclic Graph), with the bottom being the implicit
-"deny any", pointing to the set of rules shadowing it or the ones
-it is redundant for.
-
-These rules may in turn be shadowing each other. There is no cycles in
-this graph because of the natural order of the rules - the rule located
-closer to the end of the ruleset can never shadow or make redundant a rule
-higher up.
-
-The optimization that enables can allow for is to skip matching certain
-masks on a per-lookup basis - if a given rule has matched,
-the only adjustments that can happen is the match with one of
-the shadowing rules.
-
-Also, another avenue for the optimization can be starting the lookup process
-with the mask type that maximizes the chances of the independent ACE match,
-thus resulting in an ACE lookup being a single hash table hit.
-
-
-Plumbing
---------
-
-All the new routines are located in a separate file,
-so we can cleanly experiment with a different approach if this
-does not fit all of the use cases.
-
-The constant-time lookup within the data path has the API with
-the same signature as:
-
-```
-u8
-multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2,
- int is_ip6, int is_input, u32 * acl_match_p,
- u32 * rule_match_p, u32 * trace_bitmap)
-```
-
-There should be a new upper-level function with the same signature, which
-will make a decision whether to use a linear lookup, or to use the
-constant-time lookup implemented by this work, or to add some other
-optimizations (e.g. by keeping the cache of the last N lookups).
-
-The calls to the routine doing preparatory work should happen
-in `acl_add_list()` after creating the linear-lookup structures,
-and the routine doing the preparatory work populating the hashtable
-should be called from `acl_interface_add_del_inout_acl()` or its callees.
-
-The initial implementation will be geared towards looking up a single
-match at a time, with the subsequent optimizations possible to make
-the lookup for more than one packet.
-
diff --git a/src/plugins/acl/acl_hash_lookup_doc.rst b/src/plugins/acl/acl_hash_lookup_doc.rst
new file mode 100644
index 00000000000..72842af423d
--- /dev/null
+++ b/src/plugins/acl/acl_hash_lookup_doc.rst
@@ -0,0 +1,243 @@
+ACL plugin constant-time lookup
+===============================
+
+The initial implementation of ACL plugin performs a trivial for() cycle,
+going through the assigned ACLs on a per-packet basis. This is not very
+efficient, even if for very short ACLs due to its simplicity it can beat
+more advanced methods.
+
+However, to cover the case of longer ACLs with acceptable performance,
+we need to have a better way of matching. This write-up proposes a
+mechanism to make a lookup from O(M) where M is number of entries to
+O(N) where N is number of different mask combinations.
+
+Preparation of ACL(s)
+---------------------
+
+The ACL plugin will maintain a global list of “mask types”, i.e. the
+specific configurations of “do not care” bits within the ACEs. Upon the
+creation of a new ACL, a pass will be made through all the ACEs, to
+assign and possibly allocate the “mask type number”.
+
+Each ACL has a structure *hash_acl_info_t* representing the “hash-based”
+parts of information related to that ACL, primarily the array of
+*hash_ace_info_t* structures - each of the members of that array
+corresponding to one of the rules (ACEs) in the original ACL, for this
+they have a pair of *(acl_index, ace_index)* to keep track,
+predominantly for debugging.
+
+Why do we need a whole separate structure, and are not adding new fields
+to the existing rule structure? First, encapsulation, to minimize the
+pollution of the main ACL code with the hash-based lookup artifacts.
+Second, one rule may correspond to more than one “hash-based” ACE. In
+fact, most of the rules do correspond to two of those. Why ?
+
+Consider that the current ACL lookup logic is that if a packet is not
+the initial fragment, and there is an L4 entry acting on the packet, the
+comparison will be made only on the L4 protocol field value rather than
+on the protocol and port values. This behavior is governed by
+*l4_match_nonfirst_fragment* flag in the *acl_main*, and is needed to
+maintain the compatibility with the existing software switch
+implementation.
+
+While for the sequential check in *single_acl_match_5tuple()* it is very
+easy to implement by just breaking out at the right moment, in case of
+hash-based matching this cost us two checks: one on full 5-tuple and the
+flag *pkt.is_nonfirst_fragment* being zero, the second on 3-tuple and
+the flag *pkt.is_nonfirst_fragment* being one, with the second check
+triggered by the *acl_main.l4_match_nonfirst_fragment* setting being the
+default 1. This dictates the necessity of having a “match” field in a
+given *hash_ace_info_t* element, which would reflect the value we are
+supposed to match after applying the mask.
+
+There can be other circumstances when it might be beneficial to expand
+the given rule in the original ACL into multiple - for example, as an
+optimization within the port range handling for small port ranges (this
+is not done as of the time of writing).
+
+Assigning ACLs to an interface
+------------------------------
+
+Once the ACL list is assigned to an interface, or, rather, a new ACL is
+added to the list of the existing ACLs applied to the interface, we need
+to update the bihash accelerating the lookup.
+
+All the entries for the lookups are stored within a single *48_8*
+bihash, which captures the 5-tuple from the packet as well as the
+miscellaneous per-packet information flags, e.g. *l4_valid*,
+*is_non_first_fragment*, and so on. To facilitate the use of the single
+bihash by all the interfaces, the *is_ip6*, *is_input*, *sw_if_index*
+are part of the key, as well as *mask_type_index* - the latter being
+necessary because there can be entries with the same value but different
+masks, e.g.: ``permit ::/0, permit::/128``.
+
+At the moment of an ACL being applied to an interface, we need to walk
+the list of *hash_ace_info_t* entries corresponding to that ACL, and
+update the bihash with the keys corresponding to the match values in
+these entries.
+
+The value of the hash match contains the index into a per-*sw_if_index*
+vector of *applied_ace_hash_entry_t* elements, as well as a couple of
+flags: *shadowed* (optimization: if this flag on a matched entry is
+zero, means we can stop the lookup early and declare a match - see
+below), and *need_portrange_check* - meaning that what matched was a
+superset of the actual match, and we need to perform an extra check.
+
+Also, upon insertion, we must keep in mind there can be multiple
+*applied_ace_hash_entry_t* for the same key and must keep a list of
+those. This is necessary to incrementally apply/unapply the ACLs as part
+of the ACL vector: say, two ACLs have “permit 2001:db8::1/128 any” - we
+should be able to retain the entry for the second ACL even if we have
+deleted the first one. Also, in case there are two entries with the same
+key but different port ranges, say 0..42 and 142..65535 - we need to be
+able to sequentially match on those if we decide not to expand them into
+individual port-specific entries.
+
+Per-packet lookup
+-----------------
+
+The simple single-packet lookup is defined in
+*multi_acl_match_get_applied_ace_index*, which returns the index of the
+applied hash ACE if there was a match, or ~0 if there wasn’t.
+
+The future optimized per-packet lookup may be batched in three phases:
+
+1. Prepare the keys in the per-worker vector by doing logical AND of
+ original 5-tuple record with the elements of the mask vector.
+2. Lookup the keys in the bihash in a batch manner, collecting the
+ result with lowest u64 (acl index within vector, ACE index) from the
+ hash lookup value, and performing the list walk if necessary (for
+ portranges).
+3. Take the action from the ACL record as defined by (ACL#, ACE#) from
+ the resulting lookup winner, or, if no match found, then perform
+ default deny.
+
+Shadowed/independent/redundant ACEs
+-----------------------------------
+
+During the phase of combining multiple ACLs into one rulebase, when they
+are applied to interface, we also can perform several optimizations.
+
+If a given ACE is a strict subset of another ACE located up in the
+linear search order, we can ignore this ACE completely - because by
+definition it will never match. We will call such an ACE *redundant*.
+Here is an example:
+
+::
+
+ permit 2001:db8:1::/48 2001:db8:2::/48 (B)
+ deny 2001:d8b:1:1::/64 2001:db8:2:1::/64 (A)
+
+A bit more formally, we can define this relationship of an ACE A to ACE
+B as:
+
+::
+
+ redundant(aceA, aceB) := (contains(protoB, protoA) && contains(srcB, srcA)
+ && contains(dstB, dstA) && is_after(A, B))
+
+Here as “contains” we define an operation operating on the sets defined
+by the protocol, (srcIP, srcPortDefinition) and (dstIP,
+dstPortDefinition) respectively, and returning true if all the elements
+represented by the second argument are represented by the first
+argument. The “is_after” is true if A is located below B in the ruleset.
+
+If a given ACE does not intersect at all with any other ACE in front of
+it, we can mark it as such.
+
+Then during the sequence of the lookups the successful hit on this ACE
+means we do not need to look up other mask combinations - thus
+potentially significantly speeding up the match process. Here is an
+example, assuming we have the following ACL:
+
+::
+
+ permit 2001:db8:1::/48 2001:db8:2::/48 (B)
+ deny 2001:db8:3::/48 2001:db8:2:1::/64 (A)
+
+In this case if we match the second entry, we do not need to check
+whether we have matched the first one - the source addresses are
+completely different. We call such an ACE *independent* from another.
+
+We can define this as
+
+::
+
+ independent(aceA, aceB) := (!intersect(protoA, protoB) ||
+ !intersect(srcA, srcB) ||
+ !intersect(dstA, dstB))
+
+where intersect is defined as operation returning true if there are
+elements belonging to the sets of both arguments.
+
+If the entry A is neither redundant nor independent from B, and is below
+B in the ruleset, we call such an entry *shadowed* by B, here is an
+example:
+
+::
+
+ deny tcp 2001:db8:1::/48 2001:db8:2::/48 (B)
+ permit 2001:d8b:1:1::/64 2001:db8:2:1::/64 (A)
+
+This means the earlier rule “carves out” a subset of A, thus leaving a
+“shadow”. (Evidently, the action needs to be different for the shadow to
+have an effect, but for for the terminology sake we do not care).
+
+The more formal definition:
+
+::
+
+ shadowed(aceA, aceB) := !redundant(aceA, aceB) &&
+ !independent(aceA, aceB) &&
+ is_after(aceA, aceB)
+
+Using this terminology, any ruleset can be represented as a DAG
+(Directed Acyclic Graph), with the bottom being the implicit “deny any”,
+pointing to the set of rules shadowing it or the ones it is redundant
+for.
+
+These rules may in turn be shadowing each other. There is no cycles in
+this graph because of the natural order of the rules - the rule located
+closer to the end of the ruleset can never shadow or make redundant a
+rule higher up.
+
+The optimization that enables can allow for is to skip matching certain
+masks on a per-lookup basis - if a given rule has matched, the only
+adjustments that can happen is the match with one of the shadowing
+rules.
+
+Also, another avenue for the optimization can be starting the lookup
+process with the mask type that maximizes the chances of the independent
+ACE match, thus resulting in an ACE lookup being a single hash table
+hit.
+
+Plumbing
+--------
+
+All the new routines are located in a separate file, so we can cleanly
+experiment with a different approach if this does not fit all of the use
+cases.
+
+The constant-time lookup within the data path has the API with the same
+signature as:
+
+::
+
+ u8
+ multi_acl_match_5tuple (u32 sw_if_index, fa_5tuple_t * pkt_5tuple, int is_l2,
+ int is_ip6, int is_input, u32 * acl_match_p,
+ u32 * rule_match_p, u32 * trace_bitmap)
+
+There should be a new upper-level function with the same signature,
+which will make a decision whether to use a linear lookup, or to use the
+constant-time lookup implemented by this work, or to add some other
+optimizations (e.g. by keeping the cache of the last N lookups).
+
+The calls to the routine doing preparatory work should happen in
+``acl_add_list()`` after creating the linear-lookup structures, and the
+routine doing the preparatory work populating the hashtable should be
+called from ``acl_interface_add_del_inout_acl()`` or its callees.
+
+The initial implementation will be geared towards looking up a single
+match at a time, with the subsequent optimizations possible to make the
+lookup for more than one packet.
diff --git a/src/plugins/acl/acl_lookup_context.md b/src/plugins/acl/acl_lookup_context.md
deleted file mode 100644
index e95f82043f9..00000000000
--- a/src/plugins/acl/acl_lookup_context.md
+++ /dev/null
@@ -1,125 +0,0 @@
-Lookup contexts aka "ACL as a service" {#acl_lookup_context}
-======================================
-
-The initial implementation of the ACL plugin had tightly tied the policy (L3-L4) ACLs
-to ingress/egress processing on an interface.
-
-However, some uses outside of pure traffic control have appeared, for example,
-ACL-based forwarding, etc. Also, improved algorithms of the ACL lookup
-could benefit of the more abstract representation, not coupled to the interfaces.
-
-This describes a way to accommodate these use cases by generalizing the ACL
-lookups into "ACL lookup contexts", not tied to specific interfaces, usable
-by other portions of the code by utilizing the exports.h header file,
-which provides the necessary interface.
-
-
-Why "lookup contexts" and not "match me an ACL#" ?
-================================================
-
-The first reason is the logical grouping of multiple ACLs.
-
-The interface matching code currently allows for matching multiple ACLs
-in a 'first-match' fashion. Some other use cases also fall into a similar
-pattern: they attempt to match a sequence of ACLs, and the first matched ACL
-determines what the outcome is, e.g. where to forward traffic. Thus,
-a match never happens on an ACL in isolation, but always on a group of
-ACLs.
-
-The second reason is potential optimizations in matching.
-
-A naive match on series of ACLs each represented as a vector of ACEs
-does not care about the API level - it could be "match one ACL", or
-"match the set of ACLs" - there will be just a simple loop iterating over
-the ACLs to match, returning the first match. Be it in the ACL code or
-in the user code.
-
-However, for more involved lookup methods, providing a more high-level
-interface of matching over the entire group of ACLs allows for future
-improvements in the algorithms, delivered at once to all the users
-of the API.
-
-What is a "lookup context" ?
-============================
-
-An ACL lookup context is an entity that groups the set of ACL#s
-together for the purposes of a first-match lookup, and may store
-additional internal information needed to optimize the lookups
-for that particular vector of ACLs.
-
-Using ACL contexts in your code
-===============================
-
-In order to use the ACL lookup contexts, you need to include
-plugins/acl/exports.h into your code. This header includes
-all the necessary dependencies required.
-
-As you probably will invoke this code from another plugin,
-the non-inline function calls are implemented via function pointers,
-which you need to initialize by calling acl_plugin_exports_init(&acl_plugin), which,
-if everything succeeds, returns 0 and fills in the acl_plugin structure
-with pointers to the exported methods - else it will return clib_error_t with
-more information about what went wrong.
-
-When you have initialized the symbols, you also need to register yourself
-as a user of the ACL lookups - this allows to track the ACL lookup context
-ownership, as well as make the debug show outputs more user friendly.
-
-To do that, call acl_plugin.register_user_module(caller_module_string, val1_label, val2_label) -
-and record the returned value. This will bethe first parameter that you pass to create a new
-lookup context. The passed strings must be static, and are used as descriptions for the ACL
-contexts themselves, as well as labels for up to two user-supplied u32 labels, used to
-differentiate the lookup contexts for the debugging purposes.
-
-Creating a new context is done by calling acl_plugin.get_lookup_context_index(user_id, val1, val2).
-The first argument is your "user" ID obtained in a registration call earlier, the other two
-arguments are u32s with semantics that you designate. They are used purely for debugging purposes
-in the "show acl lookup context" command.
-
-To set the vector of ACL numbers to be looked up within the context, use the function
-acl_plugin.set_acl_vec_for_context(lc_index, acl_list). The first parameter specifies the context
-that you have created, the second parameter is a vector of u32s, each u32 being the index of the ACL
-which we should be looking up within this context. The command is idempotent, i.e.
-it unapplies the previously applied list of ACLs, and then sets the new list of ACLs.
-
-Subsequent ACL updates for the already applied ACLs will cause the re-application
-on an as-needed basis. Note, that the ACL application is potentially a relatively costly operation,
-so it is only expected that these changes will be done in the control plane, NOT in the datapath.
-
-The matching within the context is done using two functions - acl_plugin.fill_5tuple() and
-acl_plugin.match_5tuple() and their corresponding inline versions, named acl_plugin_fill_5tuple_inline()
-and acl_plugin_match_5tuple_inline(). The inline and non-inline versions have the equivalent functionality,
-in that the non-inline version calls the inline version. These two variants are provided
-for debugging/maintenance reasons.
-
-When you no longer need a particular context, you can return the allocated resources by calling
-acl_plugin.put_lookup_context_index() to mark it as free. The lookup structured associated with
-the vector of ACLs set for the lookup are cleaned up automatically. However, the ACLs themselves
-are not deleted and are available for subsequent reuse by other lookup contexts if needed.
-
-There is one delicate detail that you might want to be aware of.
-When the non-inline functions reference the inline functions,
-they are compiled as part of ACL plugin; whereas when you refer to the inline
-functions from your code, they are compiled as part of your code.
-This makes referring to a single acl_main structure a little trickier.
-
-It is done by having a static p_acl_main within the .h file,
-which points to acl_main of the ACL plugin, and is initialized by a static constructor
-function.
-
-This way the multiple includes and inlines will "just work" as one would expect.
-
-
-Debug CLIs
-==========
-
-To see the state of the ACL lookup contexts, you can issue "show acl-plugin lookup user" to see
-all of the users which registered for the usage of the ACL plugin lookup contexts,
-and "show acl-plugin lookup context" to show the actual contexts created. You will notice
-that the latter command uses the values supplied during the module registration in order to
-make the output more friendly.
-
-The "show acl-plugin acl" and "show acl-plugin interface" commands have also acquired the
-notion of lookup context, but there it is used from the client perspective, since
-with this change the interface ACL lookup itself is a user of ACL lookup contexts.
-
diff --git a/src/plugins/acl/acl_lookup_context.rst b/src/plugins/acl/acl_lookup_context.rst
new file mode 100644
index 00000000000..278e87381f3
--- /dev/null
+++ b/src/plugins/acl/acl_lookup_context.rst
@@ -0,0 +1,138 @@
+ACL Lookup contexts
+===================
+
+The initial implementation of the ACL plugin had tightly tied the policy
+(L3-L4) ACLs to ingress/egress processing on an interface.
+
+However, some uses outside of pure traffic control have appeared, for
+example, ACL-based forwarding, etc. Also, improved algorithms of the ACL
+lookup could benefit of the more abstract representation, not coupled to
+the interfaces.
+
+This describes a way to accommodate these use cases by generalizing the
+ACL lookups into “ACL lookup contexts”, not tied to specific interfaces,
+usable by other portions of the code by utilizing the exports.h header
+file, which provides the necessary interface.
+
+Why “lookup contexts” and not “match me an ACL” ?
+-------------------------------------------------
+
+The first reason is the logical grouping of multiple ACLs.
+
+The interface matching code currently allows for matching multiple ACLs
+in a ‘first-match’ fashion. Some other use cases also fall into a
+similar pattern: they attempt to match a sequence of ACLs, and the first
+matched ACL determines what the outcome is, e.g. where to forward
+traffic. Thus, a match never happens on an ACL in isolation, but always
+on a group of ACLs.
+
+The second reason is potential optimizations in matching.
+
+A naive match on series of ACLs each represented as a vector of ACEs
+does not care about the API level - it could be “match one ACL”, or
+“match the set of ACLs” - there will be just a simple loop iterating
+over the ACLs to match, returning the first match. Be it in the ACL code
+or in the user code.
+
+However, for more involved lookup methods, providing a more high-level
+interface of matching over the entire group of ACLs allows for future
+improvements in the algorithms, delivered at once to all the users of
+the API.
+
+What is a “lookup context” ?
+----------------------------
+
+An ACL lookup context is an entity that groups the set of ACL#s together
+for the purposes of a first-match lookup, and may store additional
+internal information needed to optimize the lookups for that particular
+vector of ACLs.
+
+Using ACL contexts in your code
+-------------------------------
+
+In order to use the ACL lookup contexts, you need to include
+plugins/acl/exports.h into your code. This header includes all the
+necessary dependencies required.
+
+As you probably will invoke this code from another plugin, the
+non-inline function calls are implemented via function pointers, which
+you need to initialize by calling acl_plugin_exports_init(&acl_plugin),
+which, if everything succeeds, returns 0 and fills in the acl_plugin
+structure with pointers to the exported methods - else it will return
+clib_error_t with more information about what went wrong.
+
+When you have initialized the symbols, you also need to register
+yourself as a user of the ACL lookups - this allows to track the ACL
+lookup context ownership, as well as make the debug show outputs more
+user friendly.
+
+To do that, call acl_plugin.register_user_module(caller_module_string,
+val1_label, val2_label) - and record the returned value. This will be the
+first parameter that you pass to create a new lookup context. The passed
+strings must be static, and are used as descriptions for the ACL
+contexts themselves, as well as labels for up to two user-supplied u32
+labels, used to differentiate the lookup contexts for the debugging
+purposes.
+
+Creating a new context is done by calling
+acl_plugin.get_lookup_context_index(user_id, val1, val2). The first
+argument is your “user” ID obtained in a registration call earlier, the
+other two arguments are u32s with semantics that you designate. They are
+used purely for debugging purposes in the “show acl lookup context”
+command.
+
+To set the vector of ACL numbers to be looked up within the context, use
+the function acl_plugin.set_acl_vec_for_context(lc_index, acl_list). The
+first parameter specifies the context that you have created, the second
+parameter is a vector of u32s, each u32 being the index of the ACL which
+we should be looking up within this context. The command is idempotent,
+i.e. it unapplies the previously applied list of ACLs, and then sets the
+new list of ACLs.
+
+Subsequent ACL updates for the already applied ACLs will cause the
+re-application on an as-needed basis. Note, that the ACL application is
+potentially a relatively costly operation, so it is only expected that
+these changes will be done in the control plane, NOT in the datapath.
+
+The matching within the context is done using two functions -
+acl_plugin.fill_5tuple() and acl_plugin.match_5tuple() and their
+corresponding inline versions, named acl_plugin_fill_5tuple_inline() and
+acl_plugin_match_5tuple_inline(). The inline and non-inline versions
+have the equivalent functionality, in that the non-inline version calls
+the inline version. These two variants are provided for
+debugging/maintenance reasons.
+
+When you no longer need a particular context, you can return the
+allocated resources by calling acl_plugin.put_lookup_context_index() to
+mark it as free. The lookup structured associated with the vector of
+ACLs set for the lookup are cleaned up automatically. However, the ACLs
+themselves are not deleted and are available for subsequent reuse by
+other lookup contexts if needed.
+
+There is one delicate detail that you might want to be aware of. When
+the non-inline functions reference the inline functions, they are
+compiled as part of ACL plugin; whereas when you refer to the inline
+functions from your code, they are compiled as part of your code. This
+makes referring to a single acl_main structure a little trickier.
+
+It is done by having a static p_acl_main within the .h file, which
+points to acl_main of the ACL plugin, and is initialized by a static
+constructor function.
+
+This way the multiple includes and inlines will “just work” as one would
+expect.
+
+Debug CLIs
+----------
+
+To see the state of the ACL lookup contexts, you can issue “show
+acl-plugin lookup user” to see all of the users which registered for the
+usage of the ACL plugin lookup contexts, and “show acl-plugin lookup
+context” to show the actual contexts created. You will notice that the
+latter command uses the values supplied during the module registration
+in order to make the output more friendly.
+
+The “show acl-plugin acl” and “show acl-plugin interface” commands have
+also acquired the notion of lookup context, but there it is used from
+the client perspective, since with this change the interface ACL lookup
+itself is a user of ACL lookup contexts.
diff --git a/src/plugins/acl/acl_multicore_doc.md b/src/plugins/acl/acl_multicore_doc.md
deleted file mode 100644
index deec5e9d566..00000000000
--- a/src/plugins/acl/acl_multicore_doc.md
+++ /dev/null
@@ -1,349 +0,0 @@
-Multicore support for ACL plugin {#acl_multicore}
-================================
-
-This captures some considerations and design decisions that I have made,
-both for my own memory later on ("what the hell was I thinking?!?"),
-and for anyone interested to criticize/improve/hack on this code.
-
-One of the factors taken into account while making these decisions,
-was the relative emphasis on the multi-thread vs. single-thread
-use cases: the latter is the vastly more prevalent. But,
-one can not optimize the single-thread performance without
-having a functioning code for multi-thread.
-
-stateless ACLs
-==============
-
-The stateless trivially parallelizes, and the only potential for the
-race between the different threads is during the reconfiguration,
-at the time of replacing the old ACL being checked, with
-the new ACL.
-
-In case an acl_add_replace is being used to replace the rules
-within the existing entry, a reallocation of `am->acls[X].rules`
-vector will happen and potentially a change in count.
-
-acl_match_5tuple() has the following code:
-
-```{.c}
- a = am->acls + acl_index;
- for (i = 0; i < a->count; i++)
- {
- r = a->rules + i;
- . . .
-```
-
-Ideally we should be immune from a->rules changing,
-but the problem arises if the count changes in flight,
-and the new ruleset is smaller - then we will attempt
-to "match" against the free memory.
-
-This can(?) be solved by replacing the for() with while(),
-so the comparison happens at each iteration.
-
-full_acl_match_5tuple(), which iterates over the list
-of ACLs, is a bit less immune, since it takes the pointer
-to the vector to iterate and keeps a local copy of
-that pointer.
-
-This race can be solved by checking the
-current pointer to the vector with the source pointer,
-and seeing if there is an (unlikely) change, and if
-there is, return the "deny" action, or, better,
-restart the check.
-
-Since the check reloads the ACL list on a per-packet basis,
-there is only a window of opportunity of one packet to
-"match" packet against an incorrect rule set.
-The workers also do not change anything, only read.
-Therefore, it looks like building special structures
-to ensure that it does not happen at all might be not
-worth it.
-
-At least not until we have a unit-test able to
-reliably catch this condition and test that
-the measures applied are effective. Adding the code
-which is not possible to exercise is worse than
-not adding any code at all.
-
-So, I opt for "do-nothing" here for the moment.
-
-reflexive ACLs: single-thread
-=============================
-
-Before we talk multi-thread, is worth revisiting the
-design of the reflexive ACLs in the plugin, and
-the history of their evolution.
-
-The very first version of the ACL plugin, shipped in
-1701, mostly did the job using the existing components
-and gluing them together. Because it needed to work
-in bridged forwarding path only, using L2 classifier
-as an insertion point appeared natural, also L2 classifier,
-being a table with sessions, seemed like a good place
-to hold the sessions.
-
-So, the original design had two conceptual nodes:
-one, pointed by the next_miss from the L2 classifier table,
-was checking the actual ACL, and inserting session into
-the L2 classifier table, and the other one, pointed
-to by the next_match within the specific session rule,
-was checking the existing session. The timing out
-of the existing connections was done in the datapath,
-by periodically calling the aging function.
-
-This decision to use the existing components,
-with its attractiveness, did bring a few limitations as well:
-
-* L2 classifier is a simple mask-and-value match, with
-a fixed mask across the table. So, sanely supporting IPv6
-packets with extension headers in that framework was impossible.
-
-* There is no way to get a backpressure from L2 classifier
-depending on memory usage. When it runs out of memory,
-it simply crashes the box. When it runs out of memory ?
-We don't really know. Depends on how it allocates it.
-
-* Since we need to match the *reflected* traffic,
-we had to create *two* full session entries
-in two different directions, which is quite wasteful memory-wise.
-
-* (showstopper): the L2 classifier runs only in
-the bridged data path, so supporting routed data path
-would require creating something else entirely different,
-which would mean much more headaches support-wise going forward.
-
-Because of that, I have moved to a different model of
-creating a session-5-tuple from the packet data - once,
-and then doing all the matching just on that 5-tuple.
-
-This has allowed to add support for skipping IPv6 extension headers.
-
-Also, this new version started to store the sessions in a dedicated
-bihash-per-interface, with the session key data being
-aligned for the ingress packets, and being mirrored for the
-egress packets. This allows of significant savings in memory,
-because now we need to keep only one copy of the session table per
-interface instead of two, and also to only have ONE node for all the lookups,
-(L2/L3 path, in/out, IPv4/IPv6) - significantly reducing the code complexity.
-
-Unfortunately, bihash still has the "lack of backpressure" problem,
-in a sense that if you try to insert too many entries and run out
-of memory in the heap you supplied, you get a crash.
-
-To somewhat workaround against that, there is a "maximum tested number of sessions"
-value, which tracks the currently inserted sessions in the bihash,
-and if this number is being approached, a more aggressive cleanup
-can happen. If this number is reached, two behaviors are possible:
-
-* attempt to do the stateless ACL matching and permit the packet
- if it succeeds
-
-* deny the packet
-
-Currently I have opted for a second one, since it allows for
-a better defined behavior, and if you have to permit
-the traffic in both directions, why using stateful anyway ?
-
-In order to be able to do the cleanup, we need to discriminate between
-the session types, with each session type having its own idle timeout.
-In order to do that, we keep three lists, defined in enum acl_timeout_e:
-ACL_TIMEOUT_UDP_IDLE, ACL_TIMEOUT_TCP_IDLE, ACL_TIMEOUT_TCP_TRANSIENT.
-
-The first one is hopefully obvious - it is just all UDP connections.
-They have an idle timeout of 600 seconds.
-
-The second and third is a bit more subtle. TCP is a complicated protocol,
-and we need to tread the fine line between doing too little and doing
-too much, and triggering the potential compatibility issues because of
-being a "middlebox".
-
-I decided to split the TCP connections into two classes:
-established, and everything else. "Established", means we have seen
-the SYN and ACK from both sides (with PUSH obviously masked out).
-This is the "active" state of any TCP connection and we would like
-to ensure we do not screw it up. So, the connections in this state
-have the default idle timer of 24 hours.
-
-All the rest of the connections have the idle timeout of 2 minutes,
-(inspired by an old value of MSL) and based on the observation
-that the states this class represent are usually very short lived.
-
-Once we have these three baskets of connections, it is trivial to
-imagine a simple cleanup mechanism to deal with this: take a
-TCP transient connection that has been hanging around.
-
-It is debatable whether we want to do discrimination between the
-different TCP transient connections. Assuming we do FIFO (and
-the lists allow us to do just that), it means a given connection
-on the head of the list has been hanging around for longest.
-Thus, if we are short on resources, we might just go ahead and
-reuse it within the datapath.
-
-This is where we are slowly approaching the question
-"Why in the world have not you used timer wheel or such ?"
-
-The answer is simple: within the above constraints, it does
-not buy me much.
-
-Also, timer wheel creates a leaky abstraction with a difficult
-to manage corner case. Which corner case ?
-
-We have a set of objects (sessions) with an event that may
-or may not happen (idle timeout timer firing), and a
-necessity to reset the idle timeout when there is
-activity on the session.
-
-In the worst case, where we had a 10000 of one-packet
-UDP sessions just created 10 minutes ago, we would need
-to deal with a spike of 10000 expired timers.
-
-Of course, if we have the active traffic on all
-of these 10000 connections, then we will not have
-to deal with that ? Right, but we will still have to deal
-with canceling and requeueing the timers.
-
-In the best possible case, requeueing a timer is
-going to be something along the lines of a linked-list
-removal and reinsertion.
-
-However, keep in mind we already need to classify the
-connections for reuse, so therefore we already have
-the linked lists!
-
-And if we just check these linked lists periodically in
-a FIFO fashion, we can get away with a very simple per-packet operation:
-writing back the timestamp of "now" into the connection structure.
-
-Then rather than requeueing the list on a per-packet or per-frame
-basis, we can defer this action until the time this session
-appears on the head of the FIFO list, and the cleaning
-routine makes the decision about whether to discard
-the session (because the interval since last activity is bigger
-than the idle timeout), or to requeue the session back to
-the end of the list (because the last activity was less
-than idle timeout ago).
-
-So, rather than using the timers, we can simply reuse our classification
-FIFOs, with the following heuristic: do not look at the session that was
-enqueued at time X until X+session_timeout. If we enqueue the sessions
-in the order of their initial activity, then we can simply use enqueue
-timestamp of the head session as a decision criterion for when we need
-to get back at looking at it for the timeout purposes.
-
-Since the number of FIFOs is small, we get a slightly worse check
-performance than with timers, but still O(1).
-
-We seemingly do quite a few "useless" operations of requeueing the items
-back to the tail of the list - but, these are the operations we do not
-have to do in the active data path, so overall it is a win.
-
-(Diversion: I believe this problem is congruent to poll vs. epoll or
-events vs. threads, some reading on this subject:
-http://web.archive.org/web/20120225022154/http://sheddingbikes.com/posts/1280829388.html)
-
-We can also can run a TCP-like scheme for adaptively changing
-the wait period in the routine that deals with the connection timeouts:
-we can attempt to check the connections a couple of times per second
-(same as we would advance the timer wheel), and then if we have requeued
-close to a max-per-quantum number of connections, we can half the waiting
-interval, and if we did not requeue any, we can slowly increment the waiting
-interval - which at a steady state should stabilize similar to what the TCP rate
-does.
-
-reflexive ACLs: multi-thread
-=============================
-
-The single-threaded implementation in 1704 used a separate "cleaner" process
-to deal with the timing out of the connections.
-It is all good and great when you know that there is only a single core
-to run everything on, but the existence of the lists proves to be
-a massive difficulty when it comes to operating from multiple threads.
-
-Initial study shows that with a few assumptions (e.g. that the cleaner running in main thread
-and the worker have a demarcation point in time where either one or the other one touches
-the session in the list) it might be possible to make it work, but the resulting
-trickiness of doing it neatly with all the corner cases is quite large.
-
-So, for the multi-threaded scenario, we need to move the connection
-aging back to the same CPU as its creation.
-
-Luckily we can do this with the help of the interrupts.
-
-So, the design is as follows: the aging thread (acl_fa_session_cleaner_process)
-periodically fires the interrupts to the workers interrupt nodes (acl_fa_worker_session_cleaner_process_node.index),
-using vlib_node_set_interrupt_pending(), and
-the interrupt node acl_fa_worker_conn_cleaner_process() calls acl_fa_check_idle_sessions()
-which does the actual job of advancing the lists. And within the actual datapath the only thing we will be
-doing is putting the items onto FIFO, and updating the last active time on the existing connection.
-
-The one "delicate" part is that the worker for one leg of the connection might be different from
-the worker of another leg of the connection - but, even if the "owner" tries to free the connection,
-nothing terrible can happen - worst case the element of the pool (which is nominally free for a short period)
-will get the timestamp updated - same thing about the TCP flags seen.
-
-A slightly trickier issue arises when the packet initially seen by one worker (thus owned by that worker),
-and the return packet processed by another worker, and as a result changes the
-the class of the connection (e.g. becomes TCP_ESTABLISHED from TCP_TRANSIENT or vice versa).
-If the class changes from one with the shorter idle time to the one with the longer idle time,
-then unless we are in the starvation mode where the transient connections are recycled,
-we can simply do nothing and let the normal requeue mechanism kick in. If the class changes from the longer idle
-timer to the shorter idle timer, then we risk keeping the connection around for longer than needed, which
-will affect the resource usage.
-
-One solution to that is to have NxN ring buffers (where N is the number of workers), such that the non-owner
-can signal to the owner the connection# that needs to be requeued out of order.
-
-A simpler solution though, is to ensure that each FIFO's period is equal to that of a shortest timer.
-This way the resource starvation problem is taken care of, at an expense of some additional work.
-
-This all looks sufficiently nice and simple until a skeleton falls out of the closet:
-sometimes we want to clean the connections en masse before they expire.
-
-There few potential scenarios:
-1) removal of an ACL from the interface
-2) removal of an interface
-3) manual action of an operator (in the future).
-
-In order to tackle this, we need to modify the logic which decides whether to requeue the
-connection on the end of the list, or to delete it due to idle timeout:
-
-We define a point in time, and have each worker thread fast-forward through its FIFO,
-in the process looking for sessions that satisfy the criteria, and either keeping them or requeueing them.
-
-To keep the ease of appearance to the outside world, we still process this as an event
-within the connection cleaner thread, but this event handler does as follows:
-1) it creates the bitmap of the sw_if_index values requested to be cleared
-2) for each worker, it waits to ensure there is no cleanup operation in progress (and if there is one,
-it waits), and then makes a copy of the bitmap, sets the per-worker flag of a cleanup operation, and sends an interrupt.
-3) wait until all cleanup operations have completed.
-
-Within the worker interrupt node, we check if the "cleanup in progress" is set,
-and if it is, we check the "fast forward time" value. If unset, we initialize it to value now, and compare the
-requested bitmap of sw_if_index values (pending_clear_sw_if_index_bitmap) with the bitmap of sw_if_index that this worker deals with.
-
-(we set the bit in the bitmap every time we enqueue the packet onto a FIFO - serviced_sw_if_index_bitmap in acl_fa_conn_list_add_session).
-
-If the result of this AND operation is zero - then we can clear the flag of cleanup in progress and return.
-Else we kick off the quantum of cleanup, and make sure we get another interrupt ASAP if that cleanup operation returns non-zero,
-meaning there is more work to do.
-When that operation returns zero, everything has been processed, we can clear the "cleanup-in-progress" flag, and
-zeroize the bitmap of sw_if_index-es requested to be cleaned.
-
-The interrupt node signals its wish to receive an interrupt ASAP by setting interrupt_is_needed
-flag within the per-worker structure. The main thread, while waiting for the
-cleanup operation to complete, checks if there is a request for interrupt,
-and if there is - it sends one.
-
-This approach gives us a way to mass-clean the connections which is reusing the code of the regular idle
-connection cleanup.
-
-One potential inefficiency is the bitmap values set by the session insertion
-in the data path - there is nothing to clear them.
-
-So, if one rearranges the interface placement with the workers, then the cleanups will cause some unnecessary work.
-For now, we consider it an acceptable limitation. It can be resolved by having another per-worker bitmap, which, when set,
-would trigger the cleanup of the bits in the serviced_sw_if_index_bitmap).
-
-=== the end ===
-
diff --git a/src/plugins/acl/acl_multicore_doc.rst b/src/plugins/acl/acl_multicore_doc.rst
new file mode 100644
index 00000000000..142b6b216d2
--- /dev/null
+++ b/src/plugins/acl/acl_multicore_doc.rst
@@ -0,0 +1,354 @@
+Multicore support for ACL plugin
+================================
+
+This captures some considerations and design decisions that I have made,
+both for my own memory later on (“what the hell was I thinking?!?”), and
+for anyone interested to criticize/improve/hack on this code.
+
+One of the factors taken into account while making these decisions, was
+the relative emphasis on the multi-thread vs. single-thread use cases:
+the latter is the vastly more prevalent. But, one can not optimize the
+single-thread performance without having a functioning code for
+multi-thread.
+
+stateless ACLs
+--------------
+
+The stateless trivially parallelizes, and the only potential for the
+race between the different threads is during the reconfiguration, at the
+time of replacing the old ACL being checked, with the new ACL.
+
+In case an acl_add_replace is being used to replace the rules within the
+existing entry, a reallocation of ``am->acls[X].rules`` vector will
+happen and potentially a change in count.
+
+acl_match_5tuple() has the following code:
+
+.. code:: c
+
+ a = am->acls + acl_index;
+ for (i = 0; i < a->count; i++)
+ {
+ r = a->rules + i;
+ . . .
+
+Ideally we should be immune from a->rules changing, but the problem
+arises if the count changes in flight, and the new ruleset is smaller -
+then we will attempt to “match” against the free memory.
+
+This can(?) be solved by replacing the for() with while(), so the
+comparison happens at each iteration.
+
+full_acl_match_5tuple(), which iterates over the list of ACLs, is a bit
+less immune, since it takes the pointer to the vector to iterate and
+keeps a local copy of that pointer.
+
+This race can be solved by checking the current pointer to the vector
+with the source pointer, and seeing if there is an (unlikely) change,
+and if there is, return the “deny” action, or, better, restart the
+check.
+
+Since the check reloads the ACL list on a per-packet basis, there is
+only a window of opportunity of one packet to “match” packet against an
+incorrect rule set. The workers also do not change anything, only read.
+Therefore, it looks like building special structures to ensure that it
+does not happen at all might be not worth it.
+
+At least not until we have a unit-test able to reliably catch this
+condition and test that the measures applied are effective. Adding the
+code which is not possible to exercise is worse than not adding any code
+at all.
+
+So, I opt for “do-nothing” here for the moment.
+
+reflexive ACLs: single-thread
+-----------------------------
+
+Before we talk multi-thread, is worth revisiting the design of the
+reflexive ACLs in the plugin, and the history of their evolution.
+
+The very first version of the ACL plugin, shipped in 1701, mostly did
+the job using the existing components and gluing them together. Because
+it needed to work in bridged forwarding path only, using L2 classifier
+as an insertion point appeared natural, also L2 classifier, being a
+table with sessions, seemed like a good place to hold the sessions.
+
+So, the original design had two conceptual nodes: one, pointed by the
+next_miss from the L2 classifier table, was checking the actual ACL, and
+inserting session into the L2 classifier table, and the other one,
+pointed to by the next_match within the specific session rule, was
+checking the existing session. The timing out of the existing
+connections was done in the datapath, by periodically calling the aging
+function.
+
+This decision to use the existing components, with its attractiveness,
+did bring a few limitations as well:
+
+- L2 classifier is a simple mask-and-value match, with a fixed mask
+ across the table. So, sanely supporting IPv6 packets with extension
+ headers in that framework was impossible.
+
+- There is no way to get a backpressure from L2 classifier depending on
+ memory usage. When it runs out of memory, it simply crashes the box.
+ When it runs out of memory ? We don’t really know. Depends on how it
+ allocates it.
+
+- Since we need to match the *reflected* traffic, we had to create
+ *two* full session entries in two different directions, which is
+ quite wasteful memory-wise.
+
+- (showstopper): the L2 classifier runs only in the bridged data path,
+ so supporting routed data path would require creating something else
+ entirely different, which would mean much more headaches support-wise
+ going forward.
+
+Because of that, I have moved to a different model of creating a
+session-5-tuple from the packet data - once, and then doing all the
+matching just on that 5-tuple.
+
+This has allowed to add support for skipping IPv6 extension headers.
+
+Also, this new version started to store the sessions in a dedicated
+bihash-per-interface, with the session key data being aligned for the
+ingress packets, and being mirrored for the egress packets. This allows
+of significant savings in memory, because now we need to keep only one
+copy of the session table per interface instead of two, and also to only
+have ONE node for all the lookups, (L2/L3 path, in/out, IPv4/IPv6) -
+significantly reducing the code complexity.
+
+Unfortunately, bihash still has the “lack of backpressure” problem, in a
+sense that if you try to insert too many entries and run out of memory
+in the heap you supplied, you get a crash.
+
+To somewhat workaround against that, there is a “maximum tested number
+of sessions” value, which tracks the currently inserted sessions in the
+bihash, and if this number is being approached, a more aggressive
+cleanup can happen. If this number is reached, two behaviors are
+possible:
+
+- attempt to do the stateless ACL matching and permit the packet if it
+ succeeds
+
+- deny the packet
+
+Currently I have opted for a second one, since it allows for a better
+defined behavior, and if you have to permit the traffic in both
+directions, why using stateful anyway ?
+
+In order to be able to do the cleanup, we need to discriminate between
+the session types, with each session type having its own idle timeout.
+In order to do that, we keep three lists, defined in enum acl_timeout_e:
+ACL_TIMEOUT_UDP_IDLE, ACL_TIMEOUT_TCP_IDLE, ACL_TIMEOUT_TCP_TRANSIENT.
+
+The first one is hopefully obvious - it is just all UDP connections.
+They have an idle timeout of 600 seconds.
+
+The second and third is a bit more subtle. TCP is a complicated
+protocol, and we need to tread the fine line between doing too little
+and doing too much, and triggering the potential compatibility issues
+because of being a “middlebox”.
+
+I decided to split the TCP connections into two classes: established,
+and everything else. “Established”, means we have seen the SYN and ACK
+from both sides (with PUSH obviously masked out). This is the “active”
+state of any TCP connection and we would like to ensure we do not screw
+it up. So, the connections in this state have the default idle timer of
+24 hours.
+
+All the rest of the connections have the idle timeout of 2 minutes,
+(inspired by an old value of MSL) and based on the observation that the
+states this class represent are usually very short lived.
+
+Once we have these three baskets of connections, it is trivial to
+imagine a simple cleanup mechanism to deal with this: take a TCP
+transient connection that has been hanging around.
+
+It is debatable whether we want to do discrimination between the
+different TCP transient connections. Assuming we do FIFO (and the lists
+allow us to do just that), it means a given connection on the head of
+the list has been hanging around for longest. Thus, if we are short on
+resources, we might just go ahead and reuse it within the datapath.
+
+This is where we are slowly approaching the question “Why in the world
+have not you used timer wheel or such ?”
+
+The answer is simple: within the above constraints, it does not buy me
+much.
+
+Also, timer wheel creates a leaky abstraction with a difficult to manage
+corner case. Which corner case ?
+
+We have a set of objects (sessions) with an event that may or may not
+happen (idle timeout timer firing), and a necessity to reset the idle
+timeout when there is activity on the session.
+
+In the worst case, where we had a 10000 of one-packet UDP sessions just
+created 10 minutes ago, we would need to deal with a spike of 10000
+expired timers.
+
+Of course, if we have the active traffic on all of these 10000
+connections, then we will not have to deal with that ? Right, but we
+will still have to deal with canceling and requeueing the timers.
+
+In the best possible case, requeueing a timer is going to be something
+along the lines of a linked-list removal and reinsertion.
+
+However, keep in mind we already need to classify the connections for
+reuse, so therefore we already have the linked lists!
+
+And if we just check these linked lists periodically in a FIFO fashion,
+we can get away with a very simple per-packet operation: writing back
+the timestamp of “now” into the connection structure.
+
+Then rather than requeueing the list on a per-packet or per-frame basis,
+we can defer this action until the time this session appears on the head
+of the FIFO list, and the cleaning routine makes the decision about
+whether to discard the session (because the interval since last activity
+is bigger than the idle timeout), or to requeue the session back to the
+end of the list (because the last activity was less than idle timeout
+ago).
+
+So, rather than using the timers, we can simply reuse our classification
+FIFOs, with the following heuristic: do not look at the session that was
+enqueued at time X until X+session_timeout. If we enqueue the sessions
+in the order of their initial activity, then we can simply use enqueue
+timestamp of the head session as a decision criterion for when we need
+to get back at looking at it for the timeout purposes.
+
+Since the number of FIFOs is small, we get a slightly worse check
+performance than with timers, but still O(1).
+
+We seemingly do quite a few “useless” operations of requeueing the items
+back to the tail of the list - but, these are the operations we do not
+have to do in the active data path, so overall it is a win.
+
+(Diversion: I believe this problem is congruent to poll vs. epoll or
+events vs. threads, some reading on this subject:
+http://web.archive.org/web/20120225022154/http://sheddingbikes.com/posts/1280829388.html)
+
+We can also can run a TCP-like scheme for adaptively changing the wait
+period in the routine that deals with the connection timeouts: we can
+attempt to check the connections a couple of times per second (same as
+we would advance the timer wheel), and then if we have requeued close to
+a max-per-quantum number of connections, we can half the waiting
+interval, and if we did not requeue any, we can slowly increment the
+waiting interval - which at a steady state should stabilize similar to
+what the TCP rate does.
+
+reflexive ACLs: multi-thread
+----------------------------
+
+The single-threaded implementation in 1704 used a separate “cleaner”
+process to deal with the timing out of the connections. It is all good
+and great when you know that there is only a single core to run
+everything on, but the existence of the lists proves to be a massive
+difficulty when it comes to operating from multiple threads.
+
+Initial study shows that with a few assumptions (e.g. that the cleaner
+running in main thread and the worker have a demarcation point in time
+where either one or the other one touches the session in the list) it
+might be possible to make it work, but the resulting trickiness of doing
+it neatly with all the corner cases is quite large.
+
+So, for the multi-threaded scenario, we need to move the connection
+aging back to the same CPU as its creation.
+
+Luckily we can do this with the help of the interrupts.
+
+So, the design is as follows: the aging thread
+(acl_fa_session_cleaner_process) periodically fires the interrupts to
+the workers interrupt nodes
+(acl_fa_worker_session_cleaner_process_node.index), using
+vlib_node_set_interrupt_pending(), and the interrupt node
+acl_fa_worker_conn_cleaner_process() calls acl_fa_check_idle_sessions()
+which does the actual job of advancing the lists. And within the actual
+datapath the only thing we will be doing is putting the items onto FIFO,
+and updating the last active time on the existing connection.
+
+The one “delicate” part is that the worker for one leg of the connection
+might be different from the worker of another leg of the connection -
+but, even if the “owner” tries to free the connection, nothing terrible
+can happen - worst case the element of the pool (which is nominally free
+for a short period) will get the timestamp updated - same thing about
+the TCP flags seen.
+
+A slightly trickier issue arises when the packet initially seen by one
+worker (thus owned by that worker), and the return packet processed by
+another worker, and as a result changes the the class of the connection
+(e.g. becomes TCP_ESTABLISHED from TCP_TRANSIENT or vice versa). If the
+class changes from one with the shorter idle time to the one with the
+longer idle time, then unless we are in the starvation mode where the
+transient connections are recycled, we can simply do nothing and let the
+normal requeue mechanism kick in. If the class changes from the longer
+idle timer to the shorter idle timer, then we risk keeping the
+connection around for longer than needed, which will affect the resource
+usage.
+
+One solution to that is to have NxN ring buffers (where N is the number
+of workers), such that the non-owner can signal to the owner the
+connection# that needs to be requeued out of order.
+
+A simpler solution though, is to ensure that each FIFO’s period is equal
+to that of a shortest timer. This way the resource starvation problem is
+taken care of, at an expense of some additional work.
+
+This all looks sufficiently nice and simple until a skeleton falls out
+of the closet: sometimes we want to clean the connections en masse
+before they expire.
+
+There few potential scenarios: 1) removal of an ACL from the interface
+2) removal of an interface 3) manual action of an operator (in the
+future).
+
+In order to tackle this, we need to modify the logic which decides
+whether to requeue the connection on the end of the list, or to delete
+it due to idle timeout:
+
+We define a point in time, and have each worker thread fast-forward
+through its FIFO, in the process looking for sessions that satisfy the
+criteria, and either keeping them or requeueing them.
+
+To keep the ease of appearance to the outside world, we still process
+this as an event within the connection cleaner thread, but this event
+handler does as follows: 1) it creates the bitmap of the sw_if_index
+values requested to be cleared 2) for each worker, it waits to ensure
+there is no cleanup operation in progress (and if there is one, it
+waits), and then makes a copy of the bitmap, sets the per-worker flag of
+a cleanup operation, and sends an interrupt. 3) wait until all cleanup
+operations have completed.
+
+Within the worker interrupt node, we check if the “cleanup in progress”
+is set, and if it is, we check the “fast forward time” value. If unset,
+we initialize it to value now, and compare the requested bitmap of
+sw_if_index values (pending_clear_sw_if_index_bitmap) with the bitmap of
+sw_if_index that this worker deals with.
+
+(we set the bit in the bitmap every time we enqueue the packet onto a
+FIFO - serviced_sw_if_index_bitmap in acl_fa_conn_list_add_session).
+
+If the result of this AND operation is zero - then we can clear the flag
+of cleanup in progress and return. Else we kick off the quantum of
+cleanup, and make sure we get another interrupt ASAP if that cleanup
+operation returns non-zero, meaning there is more work to do. When that
+operation returns zero, everything has been processed, we can clear the
+“cleanup-in-progress” flag, and zeroize the bitmap of sw_if_index-es
+requested to be cleaned.
+
+The interrupt node signals its wish to receive an interrupt ASAP by
+setting interrupt_is_needed flag within the per-worker structure. The
+main thread, while waiting for the cleanup operation to complete, checks
+if there is a request for interrupt, and if there is - it sends one.
+
+This approach gives us a way to mass-clean the connections which is
+reusing the code of the regular idle connection cleanup.
+
+One potential inefficiency is the bitmap values set by the session
+insertion in the data path - there is nothing to clear them.
+
+So, if one rearranges the interface placement with the workers, then the
+cleanups will cause some unnecessary work. For now, we consider it an
+acceptable limitation. It can be resolved by having another per-worker
+bitmap, which, when set, would trigger the cleanup of the bits in the
+serviced_sw_if_index_bitmap).
+
+=== the end ===
diff --git a/src/plugins/acl/acl_test.c b/src/plugins/acl/acl_test.c
index 79058cdc268..8404689dc06 100644
--- a/src/plugins/acl/acl_test.c
+++ b/src/plugins/acl/acl_test.c
@@ -18,6 +18,8 @@
*------------------------------------------------------------------
*/
+#include <byteswap.h>
+
#include <vat/vat.h>
#include <vlibapi/api.h>
#include <vlibmemory/api.h>
@@ -36,8 +38,6 @@ uword unformat_sw_if_index (unformat_input_t * input, va_list * args);
/* Declare message IDs */
#include <acl/acl.api_enum.h>
#include <acl/acl.api_types.h>
-#define vl_print(handle, ...)
-#undef vl_print
#define vl_endianfun /* define message structures */
#include <acl/acl.api.h>
#undef vl_endianfun
@@ -99,6 +99,15 @@ static void vl_api_acl_plugin_get_version_reply_t_handler
vam->result_ready = 1;
}
+ static void
+ vl_api_acl_plugin_use_hash_lookup_get_reply_t_handler (
+ vl_api_acl_plugin_use_hash_lookup_get_reply_t *mp)
+ {
+ vat_main_t *vam = acl_test_main.vat_main;
+ clib_warning ("ACL hash lookups enabled: %d", mp->enable);
+ vam->result_ready = 1;
+ }
+
static void vl_api_acl_interface_list_details_t_handler
(vl_api_acl_interface_list_details_t * mp)
{
@@ -150,9 +159,9 @@ static void vl_api_acl_plugin_get_conn_table_max_entries_reply_t_handler
(vl_api_acl_plugin_get_conn_table_max_entries_reply_t * mp)
{
vat_main_t * vam = acl_test_main.vat_main;
- clib_warning("\nConn table max entries: %d",
- __bswap_64(mp->conn_table_max_entries) );
- vam->result_ready = 1;
+ clib_warning ("\nConn table max entries: %d",
+ clib_net_to_host_u64 (mp->conn_table_max_entries));
+ vam->result_ready = 1;
}
static inline u8 *
@@ -484,10 +493,10 @@ static int api_acl_add_replace (vat_main_t * vam)
if (vec_len(tag) >= sizeof(mp->tag))
{
tag[sizeof(mp->tag)-1] = 0;
- _vec_len(tag) = sizeof(mp->tag);
- }
- clib_memcpy(mp->tag, tag, vec_len(tag));
- vec_free(tag);
+ vec_set_len (tag, sizeof (mp->tag));
+ }
+ clib_memcpy (mp->tag, tag, vec_len (tag));
+ vec_free (tag);
}
mp->acl_index = ntohl(acl_index);
mp->count = htonl(n_rules);
@@ -551,6 +560,63 @@ static int api_acl_stats_intf_counters_enable (vat_main_t * vam)
return ret;
}
+static int
+api_acl_plugin_use_hash_lookup_set (vat_main_t *vam)
+{
+ acl_test_main_t *sm = &acl_test_main;
+ unformat_input_t *i = vam->input;
+ vl_api_acl_plugin_use_hash_lookup_set_t *mp;
+ u32 msg_size = sizeof (*mp);
+ int ret;
+
+ vam->result_ready = 0;
+ mp = vl_msg_api_alloc_as_if_client (msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id =
+ ntohs (VL_API_ACL_PLUGIN_USE_HASH_LOOKUP_SET + sm->msg_id_base);
+ mp->client_index = vam->my_client_index;
+ mp->enable = 1;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "disable"))
+ mp->enable = 0;
+ else if (unformat (i, "enable"))
+ mp->enable = 1;
+ else
+ break;
+ }
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_acl_plugin_use_hash_lookup_get (vat_main_t *vam)
+{
+ acl_test_main_t *sm = &acl_test_main;
+ vl_api_acl_plugin_use_hash_lookup_set_t *mp;
+ u32 msg_size = sizeof (*mp);
+ int ret;
+
+ vam->result_ready = 0;
+ mp = vl_msg_api_alloc_as_if_client (msg_size);
+ memset (mp, 0, msg_size);
+ mp->_vl_msg_id =
+ ntohs (VL_API_ACL_PLUGIN_USE_HASH_LOOKUP_GET + sm->msg_id_base);
+ mp->client_index = vam->my_client_index;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
/*
* Read the series of ACL entries from file in the following format:
@@ -1267,10 +1333,10 @@ static int api_macip_acl_add (vat_main_t * vam)
if (vec_len(tag) >= sizeof(mp->tag))
{
tag[sizeof(mp->tag)-1] = 0;
- _vec_len(tag) = sizeof(mp->tag);
- }
- clib_memcpy(mp->tag, tag, vec_len(tag));
- vec_free(tag);
+ vec_set_len (tag, sizeof (mp->tag));
+ }
+ clib_memcpy (mp->tag, tag, vec_len (tag));
+ vec_free (tag);
}
mp->count = htonl(n_rules);
@@ -1409,10 +1475,10 @@ static int api_macip_acl_add_replace (vat_main_t * vam)
if (vec_len(tag) >= sizeof(mp->tag))
{
tag[sizeof(mp->tag)-1] = 0;
- _vec_len(tag) = sizeof(mp->tag);
- }
- clib_memcpy(mp->tag, tag, vec_len(tag));
- vec_free(tag);
+ vec_set_len (tag, sizeof (mp->tag));
+ }
+ clib_memcpy (mp->tag, tag, vec_len (tag));
+ vec_free (tag);
}
mp->acl_index = ntohl(acl_index);
diff --git a/src/plugins/acl/dataplane_node.c b/src/plugins/acl/dataplane_node.c
index 1a050f54b14..027afc0f660 100644
--- a/src/plugins/acl/dataplane_node.c
+++ b/src/plugins/acl/dataplane_node.c
@@ -44,7 +44,6 @@ typedef struct
u8 action;
} acl_fa_trace_t;
-/* *INDENT-OFF* */
#define foreach_acl_fa_error \
_(ACL_DROP, "ACL deny packets") \
_(ACL_PERMIT, "ACL permit packets") \
@@ -63,7 +62,6 @@ typedef enum
ACL_FA_N_ERROR,
} acl_fa_error_t;
-/* *INDENT-ON* */
always_inline u16
get_current_policy_epoch (acl_main_t * am, int is_input, u32 sw_if_index0)
@@ -176,7 +174,7 @@ prefetch_session_entry (acl_main_t * am, fa_full_session_id_t f_sess_id)
{
fa_session_t *sess = get_session_ptr_no_check (am, f_sess_id.thread_index,
f_sess_id.session_index);
- CLIB_PREFETCH (sess, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+ CLIB_PREFETCH (sess, sizeof (*sess), STORE);
}
always_inline u8
@@ -728,7 +726,6 @@ format_acl_plugin_trace (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
static char *acl_fa_error_strings[] = {
#define _(sym,string) string,
@@ -973,7 +970,6 @@ VNET_FEATURE_INIT (acl_out_ip4_fa_feature, static) = {
.runs_before = VNET_FEATURES ("ip4-dvr-reinject", "interface-output"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/acl/dataplane_node_nonip.c b/src/plugins/acl/dataplane_node_nonip.c
index 9954ea0258e..1126b57343d 100644
--- a/src/plugins/acl/dataplane_node_nonip.c
+++ b/src/plugins/acl/dataplane_node_nonip.c
@@ -45,7 +45,6 @@ typedef struct
u8 action;
} acl_fa_trace_t;
-/* *INDENT-OFF* */
#define foreach_acl_fa_error \
_(ACL_DROP, "ACL deny packets") \
_(ACL_PERMIT, "ACL permit packets") \
@@ -64,7 +63,6 @@ typedef enum
ACL_FA_N_ERROR,
} acl_fa_error_t;
-/* *INDENT-ON* */
typedef struct
{
@@ -109,7 +107,6 @@ _(DROP, "dropped outbound non-whitelisted non-ip packets") \
_(PERMIT, "permitted outbound whitelisted non-ip packets") \
-/* *INDENT-OFF* */
typedef enum
{
@@ -138,7 +135,6 @@ static char *fa_out_nonip_error_strings[] = {
foreach_nonip_out_error
#undef _
};
-/* *INDENT-ON* */
always_inline int
@@ -237,7 +233,6 @@ VLIB_NODE_FN (acl_out_nonip_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (acl_in_nonip_node) =
{
@@ -283,7 +278,6 @@ VNET_FEATURE_INIT (acl_out_l2_nonip_fa_feature, static) =
.runs_before = VNET_FEATURES ("l2-output-feat-arc-end"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/acl/exports.h b/src/plugins/acl/exports.h
index d904ad3bbae..04159aa052d 100644
--- a/src/plugins/acl/exports.h
+++ b/src/plugins/acl/exports.h
@@ -24,8 +24,8 @@
#include <vlib/unix/plugin.h>
-#include <plugins/acl/acl.h>
-#include <plugins/acl/fa_node.h>
-#include <plugins/acl/public_inlines.h>
+#include "acl.h"
+#include "fa_node.h"
+#include "public_inlines.h"
#endif /* included_acl_exports_h */
diff --git a/src/plugins/acl/fa_node.h b/src/plugins/acl/fa_node.h
index c969377ded7..c4a971aada3 100644
--- a/src/plugins/acl/fa_node.h
+++ b/src/plugins/acl/fa_node.h
@@ -5,7 +5,7 @@
#include <vppinfra/bihash_16_8.h>
#include <vppinfra/bihash_40_8.h>
-#include <plugins/acl/exported_types.h>
+#include "exported_types.h"
// #define FA_NODE_VERBOSE_DEBUG 3
diff --git a/src/plugins/acl/hash_lookup.c b/src/plugins/acl/hash_lookup.c
index 85b54b3e8ac..9c3c662a8f1 100644
--- a/src/plugins/acl/hash_lookup.c
+++ b/src/plugins/acl/hash_lookup.c
@@ -261,13 +261,11 @@ static u32
find_mask_type_index(acl_main_t *am, fa_5tuple_t *mask)
{
ace_mask_type_entry_t *mte;
- /* *INDENT-OFF* */
pool_foreach (mte, am->ace_mask_type_pool)
{
if(memcmp(&mte->mask, mask, sizeof(*mask)) == 0)
return (mte - am->ace_mask_type_pool);
}
- /* *INDENT-ON* */
return ~0;
}
@@ -682,7 +680,7 @@ hash_acl_apply(acl_main_t *am, u32 lc_index, int acl_index, u32 acl_position)
if (vec_len(ha->rules) > 0) {
int old_vec_len = vec_len(*applied_hash_aces);
vec_validate((*applied_hash_aces), old_vec_len + vec_len(ha->rules) - 1);
- _vec_len((*applied_hash_aces)) = old_vec_len;
+ vec_set_len ((*applied_hash_aces), old_vec_len);
}
/* add the rules from the ACL to the hash table for lookup and append to the vector*/
@@ -903,7 +901,7 @@ hash_acl_unapply(acl_main_t *am, u32 lc_index, int acl_index)
move_applied_ace_hash_entry(am, lc_index, applied_hash_aces, tail_offset + i, base_offset + i);
}
/* trim the end of the vector */
- _vec_len((*applied_hash_aces)) -= vec_len(ha->rules);
+ vec_dec_len ((*applied_hash_aces), vec_len (ha->rules));
remake_hash_applied_mask_info_vec(am, applied_hash_aces, lc_index);
@@ -1159,7 +1157,6 @@ acl_plugin_show_tables_mask_type (void)
ace_mask_type_entry_t *mte;
vlib_cli_output (vm, "Mask-type entries:");
- /* *INDENT-OFF* */
pool_foreach (mte, am->ace_mask_type_pool)
{
vlib_cli_output(vm, " %3d: %016llx %016llx %016llx %016llx %016llx %016llx refcount %d",
@@ -1167,7 +1164,6 @@ acl_plugin_show_tables_mask_type (void)
mte->mask.kv_40_8.key[0], mte->mask.kv_40_8.key[1], mte->mask.kv_40_8.key[2],
mte->mask.kv_40_8.key[3], mte->mask.kv_40_8.key[4], mte->mask.kv_40_8.value, mte->refcount);
}
- /* *INDENT-ON* */
}
void
diff --git a/src/plugins/acl/public_inlines.h b/src/plugins/acl/public_inlines.h
index 6b69bcef61e..eb9f0de920f 100644
--- a/src/plugins/acl/public_inlines.h
+++ b/src/plugins/acl/public_inlines.h
@@ -19,11 +19,11 @@
#include <stdint.h>
#include <vlib/unix/plugin.h>
-#include <plugins/acl/acl.h>
-#include <plugins/acl/fa_node.h>
-#include <plugins/acl/hash_lookup_private.h>
+#include "acl.h"
+#include "fa_node.h"
+#include "hash_lookup_private.h"
-#include <plugins/acl/exported_types.h>
+#include "exported_types.h"
#define LOAD_SYMBOL_FROM_PLUGIN_TO(p, s, st) \
({ \
diff --git a/src/plugins/acl/sess_mgmt_node.c b/src/plugins/acl/sess_mgmt_node.c
index 3fc4f5e1982..418baef9b6b 100644
--- a/src/plugins/acl/sess_mgmt_node.c
+++ b/src/plugins/acl/sess_mgmt_node.c
@@ -188,8 +188,7 @@ acl_fa_check_idle_sessions (acl_main_t * am, u16 thread_index, u64 now)
}
}
if (pw->wip_session_change_requests)
- _vec_len (pw->wip_session_change_requests) = 0;
-
+ vec_set_len (pw->wip_session_change_requests, 0);
{
u8 tt = 0;
@@ -309,7 +308,7 @@ acl_fa_check_idle_sessions (acl_main_t * am, u16 thread_index, u64 now)
total_expired = vec_len (pw->expired);
/* zero out the vector which we have acted on */
if (pw->expired)
- _vec_len (pw->expired) = 0;
+ vec_set_len (pw->expired, 0);
/* if we were advancing and reached the end
* (no more sessions to recycle), reset the fast-forward timestamp */
@@ -329,7 +328,6 @@ acl_fa_check_idle_sessions (acl_main_t * am, u16 thread_index, u64 now)
*/
-/* *INDENT-OFF* */
#define foreach_acl_fa_cleaner_error \
_(UNKNOWN_EVENT, "unknown event received") \
/* end of errors */
@@ -348,7 +346,6 @@ static char *acl_fa_cleaner_error_strings[] = {
#undef _
};
-/* *INDENT-ON* */
static vlib_node_registration_t acl_fa_session_cleaner_process_node;
static vlib_node_registration_t acl_fa_worker_session_cleaner_process_node;
@@ -374,8 +371,9 @@ send_one_worker_interrupt (vlib_main_t * vm, acl_main_t * am,
}
void
-aclp_post_session_change_request (acl_main_t * am, u32 target_thread,
- u32 target_session, u32 request_type)
+aclp_post_session_change_request (acl_main_t *am, u32 target_thread,
+ u32 target_session,
+ acl_fa_sess_req_t request_type)
{
acl_fa_per_worker_data_t *pw_me =
&am->per_worker_data[os_get_thread_index ()];
@@ -724,6 +722,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
}
else
{
+ clib_bitmap_free (pw0->pending_clear_sw_if_index_bitmap);
if (clear_all)
{
/* if we need to clear all, then just clear the interfaces that we are servicing */
@@ -788,7 +787,7 @@ acl_fa_session_cleaner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
send_interrupts_to_workers (vm, am);
if (event_data)
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
/*
* If the interrupts were not processed yet, ensure we wait a bit,
@@ -860,10 +859,8 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
{
acl_fa_verify_init_sessions (am);
am->fa_total_enabled_count++;
- void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
ACL_FA_CLEANER_RESCHEDULE, 0);
- clib_mem_set_heap (oldheap);
}
else
{
@@ -874,12 +871,10 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
{
ASSERT (clib_bitmap_get (am->fa_in_acl_on_sw_if_index, sw_if_index) !=
enable_disable);
- void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
vnet_feature_enable_disable ("ip4-unicast", "acl-plugin-in-ip4-fa",
sw_if_index, enable_disable, 0, 0);
vnet_feature_enable_disable ("ip6-unicast", "acl-plugin-in-ip6-fa",
sw_if_index, enable_disable, 0, 0);
- clib_mem_set_heap (oldheap);
am->fa_in_acl_on_sw_if_index =
clib_bitmap_set (am->fa_in_acl_on_sw_if_index, sw_if_index,
enable_disable);
@@ -888,12 +883,10 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
{
ASSERT (clib_bitmap_get (am->fa_out_acl_on_sw_if_index, sw_if_index) !=
enable_disable);
- void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
vnet_feature_enable_disable ("ip4-output", "acl-plugin-out-ip4-fa",
sw_if_index, enable_disable, 0, 0);
vnet_feature_enable_disable ("ip6-output", "acl-plugin-out-ip6-fa",
sw_if_index, enable_disable, 0, 0);
- clib_mem_set_heap (oldheap);
am->fa_out_acl_on_sw_if_index =
clib_bitmap_set (am->fa_out_acl_on_sw_if_index, sw_if_index,
enable_disable);
@@ -905,11 +898,9 @@ acl_fa_enable_disable (u32 sw_if_index, int is_input, int enable_disable)
clib_warning ("ENABLE-DISABLE: clean the connections on interface %d",
sw_if_index);
#endif
- void *oldheap = clib_mem_set_heap (am->vlib_main->heap_base);
vlib_process_signal_event (am->vlib_main, am->fa_cleaner_node_index,
ACL_FA_CLEANER_DELETE_BY_SW_IF_INDEX,
sw_if_index);
- clib_mem_set_heap (oldheap);
}
}
@@ -935,7 +926,6 @@ show_fa_sessions_hash (vlib_main_t * vm, u32 verbose)
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (acl_fa_worker_session_cleaner_process_node, static) = {
.function = acl_fa_worker_conn_cleaner_process,
@@ -955,7 +945,6 @@ VLIB_REGISTER_NODE (acl_fa_session_cleaner_process_node, static) = {
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/adl/adl.api b/src/plugins/adl/adl.api
index cbbb026a77c..b80cdc1c90f 100644
--- a/src/plugins/adl/adl.api
+++ b/src/plugins/adl/adl.api
@@ -13,7 +13,7 @@
* limitations under the License.
*/
-option version = "0.0.1";
+option version = "1.0.1";
import "vnet/interface_types.api";
diff --git a/src/plugins/adl/adl.c b/src/plugins/adl/adl.c
index 832bfd4a982..c6ea57a18be 100644
--- a/src/plugins/adl/adl.c
+++ b/src/plugins/adl/adl.c
@@ -150,21 +150,17 @@ adl_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (adl_init) =
{
.runs_after = VLIB_INITS ("ip4_allowlist_init", "ip6_allowlist_init"),
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (adl, static) =
{
.arc_name = "device-input",
.node_name = "adl-input",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
-/* *INDENT-ON */
int adl_interface_enable_disable (u32 sw_if_index, int enable_disable)
{
@@ -389,7 +385,6 @@ adl_allowlist_enable_disable_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (adl_allowlist_command, static) =
{
.path = "adl allowlist",
@@ -397,15 +392,12 @@ VLIB_CLI_COMMAND (adl_allowlist_command, static) =
"adl allowlist <interface-name> [ip4][ip6][default][fib-id <NN>][disable]",
.function = adl_allowlist_enable_disable_command_fn,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Allow/deny list plugin",
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/adl/adl_api.c b/src/plugins/adl/adl_api.c
index dba3b0cc17d..8bd805d9e06 100644
--- a/src/plugins/adl/adl_api.c
+++ b/src/plugins/adl/adl_api.c
@@ -30,7 +30,6 @@
#include <adl/adl.api_enum.h>
#include <adl/adl.api_types.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define REPLY_MSG_ID_BASE am->msg_id_base
#include <vlibapi/api_helper_macros.h>
diff --git a/src/plugins/adl/ip4_allowlist.c b/src/plugins/adl/ip4_allowlist.c
index 316e2cb558b..4c755725ea7 100644
--- a/src/plugins/adl/ip4_allowlist.c
+++ b/src/plugins/adl/ip4_allowlist.c
@@ -82,21 +82,19 @@ VLIB_NODE_FN (ip4_adl_allowlist_node) (vlib_main_t * vm,
ip4_header_t * ip0, * ip1;
adl_config_main_t * ccm0, * ccm1;
adl_config_data_t * c0, * c1;
- ip4_fib_mtrie_t * mtrie0, * mtrie1;
- ip4_fib_mtrie_leaf_t leaf0, leaf1;
- u32 lb_index0, lb_index1;
- const load_balance_t * lb0, *lb1;
- const dpo_id_t *dpo0, *dpo1;
+ u32 lb_index0, lb_index1;
+ const load_balance_t *lb0, *lb1;
+ const dpo_id_t *dpo0, *dpo1;
- /* Prefetch next iteration. */
- {
- vlib_buffer_t * p2, * p3;
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
clib_prefetch_store (p2->data);
clib_prefetch_store (p3->data);
@@ -121,17 +119,8 @@ VLIB_NODE_FN (ip4_adl_allowlist_node) (vlib_main_t * vm,
&ccm0->config_main, &adl_buffer (b0)->adl.current_config_index,
&next0, sizeof (c0[0]));
- mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
-
- leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
- &ip0->src_address, 2);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
- &ip0->src_address, 3);
-
- lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ lb_index0 =
+ ip4_fib_forwarding_lookup (c0->fib_index, &ip0->src_address);
ASSERT (lb_index0
== ip4_fib_table_lookup_lb (ip4_fib_get(c0->fib_index),
@@ -158,17 +147,10 @@ VLIB_NODE_FN (ip4_adl_allowlist_node) (vlib_main_t * vm,
&adl_buffer (b1)->adl.current_config_index,
&next1,
sizeof (c1[0]));
- mtrie1 = &ip4_fib_get (c1->fib_index)->mtrie;
-
- leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, &ip1->src_address);
-
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
- &ip1->src_address, 2);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1,
- &ip1->src_address, 3);
+ lb_index1 =
+ ip4_fib_forwarding_lookup (c1->fib_index, &ip1->src_address);
- lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
ASSERT (lb_index1
== ip4_fib_table_lookup_lb (ip4_fib_get(c1->fib_index),
&ip1->src_address));
@@ -226,13 +208,11 @@ VLIB_NODE_FN (ip4_adl_allowlist_node) (vlib_main_t * vm,
ip4_header_t * ip0;
adl_config_main_t *ccm0;
adl_config_data_t *c0;
- ip4_fib_mtrie_t * mtrie0;
- ip4_fib_mtrie_leaf_t leaf0;
- u32 lb_index0;
- const load_balance_t * lb0;
- const dpo_id_t *dpo0;
+ u32 lb_index0;
+ const load_balance_t *lb0;
+ const dpo_id_t *dpo0;
- /* speculatively enqueue b0 to the current next frame */
+ /* speculatively enqueue b0 to the current next frame */
bi0 = from[0];
to_next[0] = bi0;
from += 1;
@@ -253,21 +233,12 @@ VLIB_NODE_FN (ip4_adl_allowlist_node) (vlib_main_t * vm,
&next0,
sizeof (c0[0]));
- mtrie0 = &ip4_fib_get (c0->fib_index)->mtrie;
+ lb_index0 =
+ ip4_fib_forwarding_lookup (c0->fib_index, &ip0->src_address);
- leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
- &ip0->src_address, 2);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0,
- &ip0->src_address, 3);
-
- lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
-
- ASSERT (lb_index0
- == ip4_fib_table_lookup_lb (ip4_fib_get(c0->fib_index),
- &ip0->src_address));
+ ASSERT (lb_index0 ==
+ ip4_fib_table_lookup_lb (ip4_fib_get (c0->fib_index),
+ &ip0->src_address));
lb0 = load_balance_get (lb_index0);
dpo0 = load_balance_get_bucket_i(lb0, 0);
diff --git a/src/plugins/adl/setup.pg b/src/plugins/adl/setup.pg
index 7f816bc0893..7117225789a 100644
--- a/src/plugins/adl/setup.pg
+++ b/src/plugins/adl/setup.pg
@@ -6,46 +6,46 @@ set int ip6 table loop0 0
set int ip address loop0 2001:db01::1/64
set int state loop0 up
-packet-generator new {
- name ip4
- limit 100
- rate 0
- size 128-128
- interface loop0
- node adl-input
- data { IP4: 1.2.40 -> 3cfd.fed0.b6c8
- UDP: 192.168.1.2-192.168.1.10 -> 192.168.2.1
- UDP: 1234 -> 2345
- incrementing 114
- }
+packet-generator new { \
+ name ip4 \
+ limit 100 \
+ rate 0 \
+ size 128-128 \
+ interface loop0 \
+ node adl-input \
+ data { IP4: 1.2.40 -> 3cfd.fed0.b6c8 \
+ UDP: 192.168.1.2-192.168.1.10 -> 192.168.2.1 \
+ UDP: 1234 -> 2345 \
+ incrementing 114 \
+ } \
}
-packet-generator new {
- name ip6-allow
- limit 50
- rate 0
- size 128-128
- interface loop0
- node adl-input
- data { IP6: 1.2.40 -> 3cfd.fed0.b6c8
- UDP: 2001:db01::2 -> 2001:db01::1
- UDP: 1234 -> 2345
- incrementing 80
- }
+packet-generator new { \
+ name ip6-allow \
+ limit 50 \
+ rate 0 \
+ size 128-128 \
+ interface loop0 \
+ node adl-input \
+ data { IP6: 1.2.40 -> 3cfd.fed0.b6c8 \
+ UDP: 2001:db01::2 -> 2001:db01::1 \
+ UDP: 1234 -> 2345 \
+ incrementing 80 \
+ } \
}
-packet-generator new {
- name ip6-drop
- limit 50
- rate 0
- size 128-128
- interface loop0
- node adl-input
- data { IP6: 1.2.40 -> 3cfd.fed0.b6c8
- UDP: 2001:db01::3 -> 2001:db01::1
- UDP: 1234 -> 2345
- incrementing 80
- }
+packet-generator new { \
+ name ip6-drop \
+ limit 50 \
+ rate 0 \
+ size 128-128 \
+ interface loop0 \
+ node adl-input \
+ data { IP6: 1.2.40 -> 3cfd.fed0.b6c8 \
+ UDP: 2001:db01::3 -> 2001:db01::1 \
+ UDP: 1234 -> 2345 \
+ incrementing 80 \
+ } \
}
ip table 1
diff --git a/src/plugins/gbp/CMakeLists.txt b/src/plugins/af_packet/CMakeLists.txt
index 95f664ff08e..818a03107d7 100644
--- a/src/plugins/gbp/CMakeLists.txt
+++ b/src/plugins/af_packet/CMakeLists.txt
@@ -1,4 +1,5 @@
-# Copyright (c) 2018 Cisco and/or its affiliates.
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (c) 2022 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -11,44 +12,27 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-add_vpp_plugin(gbp
+add_vpp_plugin(af_packet
SOURCES
- gbp_api.c
- gbp_bridge_domain.c
- gbp_classify.c
- gbp_classify_node.c
- gbp_contract.c
- gbp_endpoint.c
- gbp_endpoint_group.c
- gbp_ext_itf.c
- gbp_fwd.c
- gbp_fwd_dpo.c
- gbp_fwd_node.c
- gbp_itf.c
- gbp_learn.c
- gbp_learn_node.c
- gbp_policy.c
- gbp_policy_dpo.c
- gbp_policy_node.c
- gbp_recirc.c
- gbp_route_domain.c
- gbp_scanner.c
- gbp_subnet.c
- gbp_vxlan.c
- gbp_vxlan_node.c
+ plugin.c
+ af_packet.c
+ device.c
+ node.c
+ cli.c
+ af_packet_api.c
MULTIARCH_SOURCES
- gbp_classify_node.c
- gbp_fwd_dpo.c
- gbp_fwd_node.c
- gbp_learn_node.c
- gbp_policy_dpo.c
- gbp_policy_node.c
- gbp_vxlan_node.c
+ node.c
+ device.c
+
+ INSTALL_HEADERS
+ af_packet.h
API_FILES
- gbp.api
+ af_packet.api
- INSTALL_HEADERS
- gbp.h
+ # API_TEST_SOURCES
+ #af_packet_test_api.c
+
+ SUPPORTED_OS_LIST Linux
)
diff --git a/src/vnet/devices/af_packet/FEATURE.yaml b/src/plugins/af_packet/FEATURE.yaml
index 25d8b2b5964..4a11ea2beb5 100644
--- a/src/vnet/devices/af_packet/FEATURE.yaml
+++ b/src/plugins/af_packet/FEATURE.yaml
@@ -3,6 +3,7 @@ name: host-interface Device AF_PACKET
maintainer: Damjan Marion <damarion@cisco.com>
features:
- L4 checksum offload
+ - GSO offload
description: "Create a host interface that will attach to a linux AF_PACKET
interface, one side of a veth pair. The veth pair must
already exist. Once created, a new host interface will
diff --git a/src/plugins/af_packet/af_packet.api b/src/plugins/af_packet/af_packet.api
new file mode 100644
index 00000000000..abc7d1a7206
--- /dev/null
+++ b/src/plugins/af_packet/af_packet.api
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2015-2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option version = "2.0.0";
+
+import "vnet/interface_types.api";
+import "vnet/ethernet/ethernet_types.api";
+
+/** \brief Create host-interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param hw_addr - interface MAC
+ @param use_random_hw_addr - use random generated MAC
+ @param host_if_name - interface name
+*/
+define af_packet_create
+{
+ option deprecated;
+
+ u32 client_index;
+ u32 context;
+
+ vl_api_mac_address_t hw_addr;
+ bool use_random_hw_addr;
+ string host_if_name[64];
+};
+
+/** \brief Create host-interface response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+*/
+define af_packet_create_reply
+{
+ option deprecated;
+
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+/** \brief Create host-interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param hw_addr - interface MAC
+ @param use_random_hw_addr - use random generated MAC
+ @param host_if_name - interface name
+ @param rx_frame_size - frame size for RX
+ @param tx_frame_size - frame size for TX
+ @param rx_frames_per_block - frames per block for RX
+ @param tx_frames_per_block - frames per block for TX
+ @param flags - flags for the af_packet interface creation
+ @param num_rx_queues - number of rx queues
+*/
+define af_packet_create_v2
+{
+ option deprecated;
+
+ u32 client_index;
+ u32 context;
+
+ vl_api_mac_address_t hw_addr;
+ bool use_random_hw_addr;
+ string host_if_name[64];
+ u32 rx_frame_size;
+ u32 tx_frame_size;
+ u32 rx_frames_per_block;
+ u32 tx_frames_per_block;
+ u32 flags;
+ u16 num_rx_queues [default=1];
+};
+
+/** \brief Create host-interface response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+*/
+define af_packet_create_v2_reply
+{
+ option deprecated;
+
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+enum af_packet_mode {
+ AF_PACKET_API_MODE_ETHERNET = 1, /* mode ethernet */
+ AF_PACKET_API_MODE_IP = 2, /* mode ip */
+};
+
+enum af_packet_flags {
+ AF_PACKET_API_FLAG_QDISC_BYPASS = 1, /* enable the qdisc bypass */
+ AF_PACKET_API_FLAG_CKSUM_GSO = 2, /* enable checksum/gso */
+ AF_PACKET_API_FLAG_VERSION_2 = 8 [backwards_compatible], /* af packet v2, default is v3 */
+};
+
+/** \brief Create host-interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mode - 1 - Ethernet, 2 - IP
+ @param hw_addr - interface MAC
+ @param use_random_hw_addr - use random generated MAC
+ @param host_if_name - interface name
+ @param rx_frame_size - frame size for RX
+ @param tx_frame_size - frame size for TX
+ @param rx_frames_per_block - frames per block for RX
+ @param tx_frames_per_block - frames per block for TX
+ @param flags - flags for the af_packet interface creation
+ @param num_rx_queues - number of rx queues
+ @param num_tx_queues - number of tx queues
+*/
+define af_packet_create_v3
+{
+ u32 client_index;
+ u32 context;
+
+ vl_api_af_packet_mode_t mode;
+ vl_api_mac_address_t hw_addr;
+ bool use_random_hw_addr;
+ string host_if_name[64];
+ u32 rx_frame_size;
+ u32 tx_frame_size;
+ u32 rx_frames_per_block;
+ u32 tx_frames_per_block;
+ vl_api_af_packet_flags_t flags;
+ u16 num_rx_queues [default=1];
+ u16 num_tx_queues [default=1];
+};
+
+/** \brief Create host-interface response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+*/
+define af_packet_create_v3_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+/** \brief Delete host-interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param host_if_name - interface name
+*/
+autoreply define af_packet_delete
+{
+ u32 client_index;
+ u32 context;
+
+ string host_if_name[64];
+};
+
+/** \brief Set l4 offload checksum calculation
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+autoreply define af_packet_set_l4_cksum_offload
+{
+ u32 client_index;
+ u32 context;
+
+ vl_api_interface_index_t sw_if_index;
+ bool set;
+};
+
+/** \brief Dump af_packet interfaces request */
+define af_packet_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for af_packet dump request
+ @param sw_if_index - software index of af_packet interface
+ @param host_if_name - interface name
+*/
+define af_packet_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ string host_if_name[64];
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/af_packet/af_packet.c b/src/plugins/af_packet/af_packet.c
new file mode 100644
index 00000000000..69245429918
--- /dev/null
+++ b/src/plugins/af_packet/af_packet.c
@@ -0,0 +1,1054 @@
+/*
+ *------------------------------------------------------------------
+ * af_packet.c - linux kernel packet interface
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <dirent.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+#include <vppinfra/linux/sysfs.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ip/ip.h>
+#include <vnet/devices/netlink.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
+
+#include <af_packet/af_packet.h>
+
+af_packet_main_t af_packet_main;
+
+VNET_HW_INTERFACE_CLASS (af_packet_ip_device_hw_interface_class, static) = {
+ .name = "af-packet-ip-device",
+ .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+};
+
+#define AF_PACKET_DEFAULT_TX_FRAMES_PER_BLOCK 1024
+#define AF_PACKET_DEFAULT_TX_FRAME_SIZE (2048 * 33) // GSO packet of 64KB
+#define AF_PACKET_TX_BLOCK_NR 1
+
+#define AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK_V2 1024
+#define AF_PACKET_DEFAULT_RX_FRAME_SIZE_V2 (2048 * 33) // GSO packet of 64KB
+#define AF_PACKET_RX_BLOCK_NR_V2 1
+
+#define AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK 32
+#define AF_PACKET_DEFAULT_RX_FRAME_SIZE 2048
+#define AF_PACKET_RX_BLOCK_NR 160
+
+/*defined in net/if.h but clashes with dpdk headers */
+unsigned int if_nametoindex (const char *ifname);
+
+#define AF_PACKET_OFFLOAD_FLAG_RXCKSUM (1 << 0)
+#define AF_PACKET_OFFLOAD_FLAG_TXCKSUM (1 << 1)
+#define AF_PACKET_OFFLOAD_FLAG_SG (1 << 2)
+#define AF_PACKET_OFFLOAD_FLAG_TSO (1 << 3)
+#define AF_PACKET_OFFLOAD_FLAG_UFO (1 << 4)
+#define AF_PACKET_OFFLOAD_FLAG_GSO (1 << 5)
+#define AF_PACKET_OFFLOAD_FLAG_GRO (1 << 6)
+
+#define AF_PACKET_OFFLOAD_FLAG_MASK \
+ (AF_PACKET_OFFLOAD_FLAG_RXCKSUM | AF_PACKET_OFFLOAD_FLAG_TXCKSUM | \
+ AF_PACKET_OFFLOAD_FLAG_SG | AF_PACKET_OFFLOAD_FLAG_TSO | \
+ AF_PACKET_OFFLOAD_FLAG_UFO | AF_PACKET_OFFLOAD_FLAG_GSO | \
+ AF_PACKET_OFFLOAD_FLAG_GRO)
+
+#define AF_PACKET_IOCTL(fd, a, ...) \
+ if (ioctl (fd, a, __VA_ARGS__) < 0) \
+ { \
+ err = clib_error_return_unix (0, "ioctl(" #a ")"); \
+ vlib_log_err (af_packet_main.log_class, "%U", format_clib_error, err); \
+ goto done; \
+ }
+
+static u32
+af_packet_get_if_capabilities (u8 *host_if_name)
+{
+ struct ifreq ifr;
+ struct ethtool_value e; // { __u32 cmd; __u32 data; };
+ clib_error_t *err = 0;
+ int ctl_fd = -1;
+ u32 oflags = 0;
+
+ if ((ctl_fd = socket (AF_INET, SOCK_STREAM, 0)) == -1)
+ {
+ clib_warning ("Cannot open control socket");
+ goto done;
+ }
+
+ clib_memset (&ifr, 0, sizeof (ifr));
+ clib_memcpy (ifr.ifr_name, host_if_name,
+ strlen ((const char *) host_if_name));
+ ifr.ifr_data = (void *) &e;
+
+ e.cmd = ETHTOOL_GRXCSUM;
+ AF_PACKET_IOCTL (ctl_fd, SIOCETHTOOL, &ifr);
+ if (e.data)
+ oflags |= AF_PACKET_OFFLOAD_FLAG_RXCKSUM;
+
+ e.cmd = ETHTOOL_GTXCSUM;
+ AF_PACKET_IOCTL (ctl_fd, SIOCETHTOOL, &ifr);
+ if (e.data)
+ oflags |= AF_PACKET_OFFLOAD_FLAG_TXCKSUM;
+
+ e.cmd = ETHTOOL_GTSO;
+ AF_PACKET_IOCTL (ctl_fd, SIOCETHTOOL, &ifr);
+ if (e.data)
+ oflags |= AF_PACKET_OFFLOAD_FLAG_TSO;
+
+ e.cmd = ETHTOOL_GGSO;
+ AF_PACKET_IOCTL (ctl_fd, SIOCETHTOOL, &ifr);
+ if (e.data)
+ oflags |= AF_PACKET_OFFLOAD_FLAG_GSO;
+
+ e.cmd = ETHTOOL_GGRO;
+ AF_PACKET_IOCTL (ctl_fd, SIOCETHTOOL, &ifr);
+ if (e.data)
+ oflags |= AF_PACKET_OFFLOAD_FLAG_GRO;
+
+done:
+ if (ctl_fd != -1)
+ close (ctl_fd);
+
+ return oflags;
+}
+
+static clib_error_t *
+af_packet_eth_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hi,
+ u32 frame_size)
+{
+ clib_error_t *error, *rv;
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, hi->dev_instance);
+
+ error = vnet_netlink_set_link_mtu (apif->host_if_index,
+ frame_size + hi->frame_overhead);
+
+ if (error)
+ {
+ vlib_log_err (apm->log_class, "netlink failed to change MTU: %U",
+ format_clib_error, error);
+ rv = vnet_error (VNET_ERR_SYSCALL_ERROR_1, "netlink error: %U",
+ format_clib_error, error);
+ clib_error_free (error);
+ return rv;
+ }
+ else
+ apif->host_mtu = frame_size + hi->frame_overhead;
+ return 0;
+}
+
+static int
+af_packet_read_mtu (af_packet_if_t *apif)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ clib_error_t *error;
+ error = vnet_netlink_get_link_mtu (apif->host_if_index, &apif->host_mtu);
+ if (error)
+ {
+ vlib_log_err (apm->log_class, "netlink failed to get MTU: %U",
+ format_clib_error, error);
+ clib_error_free (error);
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ }
+ return 0;
+}
+
+static clib_error_t *
+af_packet_fd_read_ready (clib_file_t * uf)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ /* Schedule the rx node */
+ vnet_hw_if_rx_queue_set_int_pending (vnm, uf->private_data);
+ return 0;
+}
+
+static clib_error_t *
+af_packet_fd_error (clib_file_t *uf)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ clib_error_t *err = 0;
+ u64 u64;
+
+ int ret = read (uf->file_descriptor, (char *) &u64, sizeof (u64));
+
+ if (ret < 0)
+ {
+ err = clib_error_return_unix (0, "");
+ vlib_log_notice (apm->log_class, "fd %u %U", uf->file_descriptor,
+ format_clib_error, err);
+ clib_error_free (err);
+ }
+
+ return 0;
+}
+
+static int
+is_bridge (const u8 * host_if_name)
+{
+ u8 *s;
+ DIR *dir = NULL;
+
+ s = format (0, "/sys/class/net/%s/bridge%c", host_if_name, 0);
+ dir = opendir ((char *) s);
+ vec_free (s);
+
+ if (dir)
+ {
+ closedir (dir);
+ return 0;
+ }
+
+ return -1;
+}
+
+static void
+af_packet_set_rx_queues (vlib_main_t *vm, af_packet_if_t *apif)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ af_packet_queue_t *rx_queue;
+
+ vnet_hw_if_set_input_node (vnm, apif->hw_if_index,
+ af_packet_input_node.index);
+
+ vec_foreach (rx_queue, apif->rx_queues)
+ {
+ rx_queue->queue_index = vnet_hw_if_register_rx_queue (
+ vnm, apif->hw_if_index, rx_queue->queue_id, VNET_HW_IF_RXQ_THREAD_ANY);
+
+ {
+ clib_file_t template = { 0 };
+ template.read_function = af_packet_fd_read_ready;
+ template.error_function = af_packet_fd_error;
+ template.file_descriptor = rx_queue->fd;
+ template.private_data = rx_queue->queue_index;
+ template.description =
+ format (0, "%U queue %u", format_af_packet_device_name,
+ apif->dev_instance, rx_queue->queue_id);
+ rx_queue->clib_file_index = clib_file_add (&file_main, &template);
+ }
+ vnet_hw_if_set_rx_queue_file_index (vnm, rx_queue->queue_index,
+ rx_queue->clib_file_index);
+ vnet_hw_if_set_rx_queue_mode (vnm, rx_queue->queue_index,
+ VNET_HW_IF_RX_MODE_INTERRUPT);
+ rx_queue->mode = VNET_HW_IF_RX_MODE_INTERRUPT;
+ }
+ vnet_hw_if_update_runtime_data (vnm, apif->hw_if_index);
+}
+
+static void
+af_packet_set_tx_queues (vlib_main_t *vm, af_packet_if_t *apif)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_queue_t *tx_queue;
+
+ vec_foreach (tx_queue, apif->tx_queues)
+ {
+ tx_queue->queue_index = vnet_hw_if_register_tx_queue (
+ vnm, apif->hw_if_index, tx_queue->queue_id);
+ }
+
+ if (apif->num_txqs == 0)
+ {
+ vlib_log_err (apm->log_class, "Interface %U has 0 txq",
+ format_vnet_hw_if_index_name, vnm, apif->hw_if_index);
+ return;
+ }
+
+ for (u32 j = 0; j < vlib_get_n_threads (); j++)
+ {
+ u32 qi = apif->tx_queues[j % apif->num_txqs].queue_index;
+ vnet_hw_if_tx_queue_assign_thread (vnm, qi, j);
+ }
+
+ vnet_hw_if_update_runtime_data (vnm, apif->hw_if_index);
+}
+
+static int
+create_packet_sock (int host_if_index, tpacket_req_u_t *rx_req,
+ tpacket_req_u_t *tx_req, int *fd, af_packet_ring_t *ring,
+ u32 fanout_id, af_packet_if_flags_t *flags, int ver)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ struct sockaddr_ll sll;
+ socklen_t req_sz = sizeof (tpacket_req3_t);
+ int ret;
+ u32 ring_sz = 0;
+
+ if ((*fd = socket (AF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 0)
+ {
+ vlib_log_err (apm->log_class,
+ "Failed to create AF_PACKET socket: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ /* bind before rx ring is cfged so we don't receive packets from other interfaces */
+ clib_memset (&sll, 0, sizeof (sll));
+ sll.sll_family = PF_PACKET;
+ sll.sll_protocol = htons (ETH_P_ALL);
+ sll.sll_ifindex = host_if_index;
+ if (bind (*fd, (struct sockaddr *) &sll, sizeof (sll)) < 0)
+ {
+ vlib_log_err (apm->log_class,
+ "Failed to bind rx packet socket: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ if (setsockopt (*fd, SOL_PACKET, PACKET_VERSION, &ver, sizeof (ver)) < 0)
+ {
+ vlib_log_err (apm->log_class,
+ "Failed to set rx packet interface version: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ int opt = 1;
+ if (setsockopt (*fd, SOL_PACKET, PACKET_LOSS, &opt, sizeof (opt)) < 0)
+ {
+ vlib_log_err (
+ apm->log_class,
+ "Failed to set packet tx ring error handling option: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ if (*flags & AF_PACKET_IF_FLAGS_CKSUM_GSO)
+ {
+
+ int opt2 = 1;
+ if (setsockopt (*fd, SOL_PACKET, PACKET_VNET_HDR, &opt2, sizeof (opt2)) <
+ 0)
+ {
+ // remove the flag
+ *flags &= ~AF_PACKET_IF_FLAGS_CKSUM_GSO;
+ vlib_log_debug (apm->log_class,
+ "Failed to set packet vnet hdr error handling "
+ "option: %s (errno %d)",
+ strerror (errno), errno);
+ }
+ }
+
+#if defined(PACKET_QDISC_BYPASS)
+ if (*flags & AF_PACKET_IF_FLAGS_QDISC_BYPASS)
+ /* Introduced with Linux 3.14 so the ifdef should eventually be removed */
+ if (setsockopt (*fd, SOL_PACKET, PACKET_QDISC_BYPASS, &opt, sizeof (opt)) <
+ 0)
+ {
+ // remove the flag
+ *flags &= ~AF_PACKET_IF_FLAGS_QDISC_BYPASS;
+ vlib_log_debug (apm->log_class,
+ "Failed to set qdisc bypass error "
+ "handling option: %s (errno %d)",
+ strerror (errno), errno);
+ }
+#endif
+
+ if (rx_req)
+ {
+ if (*flags & AF_PACKET_IF_FLAGS_FANOUT)
+ {
+ int fanout = ((fanout_id & 0xffff) | ((PACKET_FANOUT_HASH) << 16));
+ if (setsockopt (*fd, SOL_PACKET, PACKET_FANOUT, &fanout,
+ sizeof (fanout)) < 0)
+ {
+ // remove the flag
+ *flags &= ~AF_PACKET_IF_FLAGS_FANOUT;
+ vlib_log_err (apm->log_class,
+ "Failed to set fanout options: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+ }
+ if (ver == TPACKET_V2)
+ {
+ req_sz = sizeof (tpacket_req_t);
+ ring_sz += rx_req->req.tp_block_size * rx_req->req.tp_block_nr;
+ }
+ else
+ ring_sz += rx_req->req3.tp_block_size * rx_req->req3.tp_block_nr;
+ if (setsockopt (*fd, SOL_PACKET, PACKET_RX_RING, rx_req, req_sz) < 0)
+ {
+ vlib_log_err (apm->log_class,
+ "Failed to set packet rx ring options: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+ }
+
+ if (tx_req)
+ {
+ if (ver == TPACKET_V2)
+ {
+ req_sz = sizeof (tpacket_req_t);
+ ring_sz += tx_req->req.tp_block_size * tx_req->req.tp_block_nr;
+ }
+ else
+ ring_sz += tx_req->req3.tp_block_size * tx_req->req3.tp_block_nr;
+ if (setsockopt (*fd, SOL_PACKET, PACKET_TX_RING, tx_req, req_sz) < 0)
+ {
+ vlib_log_err (apm->log_class,
+ "Failed to set packet tx ring options: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+ }
+ ring->ring_start_addr = mmap (NULL, ring_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_LOCKED, *fd, 0);
+ if (ring->ring_start_addr == MAP_FAILED)
+ {
+ vlib_log_err (apm->log_class, "mmap failure: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ ring->ring_size = ring_sz;
+
+ return 0;
+error:
+ if (*fd >= 0)
+ {
+ close (*fd);
+ *fd = -1;
+ }
+ return ret;
+}
+
+int
+af_packet_queue_init (vlib_main_t *vm, af_packet_if_t *apif,
+ af_packet_create_if_arg_t *arg,
+ af_packet_queue_t *rx_queue, af_packet_queue_t *tx_queue,
+ u8 queue_id)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ tpacket_req_u_t *rx_req = 0;
+ tpacket_req_u_t *tx_req = 0;
+ int ret, fd = -1;
+ af_packet_ring_t ring = { 0 };
+ u8 *ring_addr = 0;
+ u32 rx_frames_per_block, tx_frames_per_block;
+ u32 rx_frame_size, tx_frame_size;
+ u32 i = 0;
+
+ if (rx_queue)
+ {
+ rx_frames_per_block = arg->rx_frames_per_block ?
+ arg->rx_frames_per_block :
+ ((apif->version == TPACKET_V3) ?
+ AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK :
+ AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK_V2);
+
+ rx_frame_size =
+ arg->rx_frame_size ?
+ arg->rx_frame_size :
+ ((apif->version == TPACKET_V3) ? AF_PACKET_DEFAULT_RX_FRAME_SIZE :
+ AF_PACKET_DEFAULT_RX_FRAME_SIZE_V2);
+ vec_validate (rx_queue->rx_req, 0);
+ rx_queue->rx_req->req.tp_block_size =
+ rx_frame_size * rx_frames_per_block;
+ rx_queue->rx_req->req.tp_frame_size = rx_frame_size;
+ rx_queue->rx_req->req.tp_block_nr = (apif->version == TPACKET_V3) ?
+ AF_PACKET_RX_BLOCK_NR :
+ AF_PACKET_RX_BLOCK_NR_V2;
+ rx_queue->rx_req->req.tp_frame_nr =
+ rx_queue->rx_req->req.tp_block_nr * rx_frames_per_block;
+ if (apif->version == TPACKET_V3)
+ {
+ rx_queue->rx_req->req3.tp_retire_blk_tov = 1; // 1 ms block timout
+ rx_queue->rx_req->req3.tp_feature_req_word = 0;
+ rx_queue->rx_req->req3.tp_sizeof_priv = 0;
+ }
+ rx_req = rx_queue->rx_req;
+ }
+ if (tx_queue)
+ {
+ tx_frames_per_block = arg->tx_frames_per_block ?
+ arg->tx_frames_per_block :
+ AF_PACKET_DEFAULT_TX_FRAMES_PER_BLOCK;
+ tx_frame_size = arg->tx_frame_size ? arg->tx_frame_size :
+ AF_PACKET_DEFAULT_TX_FRAME_SIZE;
+
+ vec_validate (tx_queue->tx_req, 0);
+ tx_queue->tx_req->req.tp_block_size =
+ tx_frame_size * tx_frames_per_block;
+ tx_queue->tx_req->req.tp_frame_size = tx_frame_size;
+ tx_queue->tx_req->req.tp_block_nr = AF_PACKET_TX_BLOCK_NR;
+ tx_queue->tx_req->req.tp_frame_nr =
+ AF_PACKET_TX_BLOCK_NR * tx_frames_per_block;
+ if (apif->version == TPACKET_V3)
+ {
+ tx_queue->tx_req->req3.tp_retire_blk_tov = 0;
+ tx_queue->tx_req->req3.tp_sizeof_priv = 0;
+ tx_queue->tx_req->req3.tp_feature_req_word = 0;
+ }
+ tx_req = tx_queue->tx_req;
+ }
+
+ if (rx_queue || tx_queue)
+ {
+ ret =
+ create_packet_sock (apif->host_if_index, rx_req, tx_req, &fd, &ring,
+ apif->dev_instance, &arg->flags, apif->version);
+
+ if (ret != 0)
+ goto error;
+
+ vec_add1 (apif->fds, fd);
+ vec_add1 (apif->rings, ring);
+ ring_addr = ring.ring_start_addr;
+ }
+
+ if (rx_queue)
+ {
+ rx_queue->fd = fd;
+ vec_validate (rx_queue->rx_ring, rx_queue->rx_req->req.tp_block_nr - 1);
+ vec_foreach_index (i, rx_queue->rx_ring)
+ {
+ rx_queue->rx_ring[i] =
+ ring_addr + i * rx_queue->rx_req->req.tp_block_size;
+ }
+
+ rx_queue->next_rx_block = 0;
+ rx_queue->queue_id = queue_id;
+ rx_queue->is_rx_pending = 0;
+ ring_addr = ring_addr + rx_queue->rx_req->req.tp_block_size *
+ rx_queue->rx_req->req.tp_block_nr;
+ }
+
+ if (tx_queue)
+ {
+ tx_queue->fd = fd;
+ vec_validate (tx_queue->tx_ring, tx_queue->tx_req->req.tp_block_nr - 1);
+ vec_foreach_index (i, tx_queue->tx_ring)
+ {
+ tx_queue->tx_ring[i] =
+ ring_addr + i * tx_queue->tx_req->req.tp_block_size;
+ }
+
+ tx_queue->next_tx_frame = 0;
+ tx_queue->queue_id = queue_id;
+ tx_queue->is_tx_pending = 0;
+ clib_spinlock_init (&tx_queue->lockp);
+ }
+
+ return 0;
+error:
+ vlib_log_err (apm->log_class, "Failed to set queue %u error", queue_id);
+ if (rx_queue)
+ vec_free (rx_queue->rx_req);
+ if (tx_queue)
+ vec_free (tx_queue->tx_req);
+ return ret;
+}
+
+int
+af_packet_device_init (vlib_main_t *vm, af_packet_if_t *apif,
+ af_packet_create_if_arg_t *args)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_queue_t *rx_queue = 0;
+ af_packet_queue_t *tx_queue = 0;
+ u16 nq = clib_min (args->num_rxqs, args->num_txqs);
+ u16 i = 0;
+ int ret = 0;
+
+ // enable fanout feature for multi-rxqs
+ if (args->num_rxqs > 1)
+ args->flags |= AF_PACKET_IF_FLAGS_FANOUT;
+
+ vec_validate (apif->rx_queues, args->num_rxqs - 1);
+ vec_validate (apif->tx_queues, args->num_txqs - 1);
+
+ for (; i < nq; i++)
+ {
+ rx_queue = vec_elt_at_index (apif->rx_queues, i);
+ tx_queue = vec_elt_at_index (apif->tx_queues, i);
+ ret = af_packet_queue_init (vm, apif, args, rx_queue, tx_queue, i);
+ if (ret != 0)
+ goto error;
+ }
+
+ if (args->num_rxqs > args->num_txqs)
+ {
+ for (; i < args->num_rxqs; i++)
+ {
+ rx_queue = vec_elt_at_index (apif->rx_queues, i);
+ ret = af_packet_queue_init (vm, apif, args, rx_queue, 0, i);
+ if (ret != 0)
+ goto error;
+ }
+ }
+ else if (args->num_txqs > args->num_rxqs)
+ {
+ for (; i < args->num_txqs; i++)
+ {
+ tx_queue = vec_elt_at_index (apif->tx_queues, i);
+ ret = af_packet_queue_init (vm, apif, args, 0, tx_queue, i);
+ if (ret != 0)
+ goto error;
+ }
+ }
+
+ apif->num_rxqs = args->num_rxqs;
+ apif->num_txqs = args->num_txqs;
+
+ return 0;
+error:
+ vlib_log_err (apm->log_class, "Failed to init device error");
+ return ret;
+}
+
+int
+af_packet_create_if (af_packet_create_if_arg_t *arg)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ vlib_main_t *vm = vlib_get_main ();
+ int fd2 = -1;
+ struct ifreq ifr;
+ af_packet_if_t *apif = 0;
+ u8 hw_addr[6];
+ vnet_sw_interface_t *sw;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_if_caps_t caps = VNET_HW_IF_CAP_INT_MODE;
+ uword *p;
+ uword if_index;
+ u8 *host_if_name_dup = 0;
+ int host_if_index = -1;
+ int ret = 0;
+ u32 oflags = 0, i = 0;
+
+ p = mhash_get (&apm->if_index_by_host_if_name, arg->host_if_name);
+ if (p)
+ {
+ apif = vec_elt_at_index (apm->interfaces, p[0]);
+ arg->sw_if_index = apif->sw_if_index;
+ return VNET_API_ERROR_IF_ALREADY_EXISTS;
+ }
+
+ host_if_name_dup = vec_dup (arg->host_if_name);
+
+ /*
+ * make sure host side of interface is 'UP' before binding AF_PACKET
+ * socket on it.
+ */
+ if ((fd2 = socket (AF_UNIX, SOCK_DGRAM, 0)) < 0)
+ {
+ vlib_log_debug (apm->log_class,
+ "Failed to create AF_UNIX socket: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ clib_memcpy (ifr.ifr_name, (const char *) arg->host_if_name,
+ vec_len (arg->host_if_name));
+ if (ioctl (fd2, SIOCGIFINDEX, &ifr) < 0)
+ {
+ vlib_log_debug (
+ apm->log_class,
+ "Failed to retrieve the interface (%s) index: %s (errno %d)",
+ arg->host_if_name, strerror (errno), errno);
+ ret = VNET_API_ERROR_INVALID_INTERFACE;
+ goto error;
+ }
+
+ host_if_index = ifr.ifr_ifindex;
+ if (ioctl (fd2, SIOCGIFFLAGS, &ifr) < 0)
+ {
+ vlib_log_debug (apm->log_class,
+ "Failed to get the active flag: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+
+ if (!(ifr.ifr_flags & IFF_UP))
+ {
+ ifr.ifr_flags |= IFF_UP;
+ if (ioctl (fd2, SIOCSIFFLAGS, &ifr) < 0)
+ {
+ vlib_log_debug (apm->log_class,
+ "Failed to set the active flag: %s (errno %d)",
+ strerror (errno), errno);
+ ret = VNET_API_ERROR_SYSCALL_ERROR_1;
+ goto error;
+ }
+ }
+
+ if (fd2 > -1)
+ {
+ close (fd2);
+ fd2 = -1;
+ }
+
+ // check the host interface capabilities
+ oflags = af_packet_get_if_capabilities (arg->host_if_name);
+
+ ret = is_bridge (arg->host_if_name);
+ if (ret == 0) /* is a bridge, ignore state */
+ host_if_index = -1;
+
+ /* So far everything looks good, let's create interface */
+ pool_get (apm->interfaces, apif);
+ if_index = apif - apm->interfaces;
+
+ apif->dev_instance = if_index;
+ apif->host_if_index = host_if_index;
+ apif->host_if_name = host_if_name_dup;
+ apif->per_interface_next_index = ~0;
+ apif->mode = arg->mode;
+ apif->host_interface_oflags = oflags;
+
+ if (arg->is_v2)
+ apif->version = TPACKET_V2;
+ else
+ apif->version = TPACKET_V3;
+
+ ret = af_packet_device_init (vm, apif, arg);
+ if (ret != 0)
+ goto error;
+
+ ret = af_packet_read_mtu (apif);
+ if (ret != 0)
+ goto error;
+
+
+ if (apif->mode != AF_PACKET_IF_MODE_IP)
+ {
+ vnet_eth_interface_registration_t eir = {};
+ /*use configured or generate random MAC address */
+ if (arg->hw_addr)
+ clib_memcpy (hw_addr, arg->hw_addr, 6);
+ else
+ {
+ f64 now = vlib_time_now (vm);
+ u32 rnd;
+ rnd = (u32) (now * 1e6);
+ rnd = random_u32 (&rnd);
+
+ clib_memcpy (hw_addr + 2, &rnd, sizeof (rnd));
+ hw_addr[0] = 2;
+ hw_addr[1] = 0xfe;
+ }
+
+ eir.dev_class_index = af_packet_device_class.index;
+ eir.dev_instance = apif->dev_instance;
+ eir.address = hw_addr;
+ eir.cb.set_max_frame_size = af_packet_eth_set_max_frame_size;
+ apif->hw_if_index = vnet_eth_register_interface (vnm, &eir);
+ }
+ else
+ {
+ apif->hw_if_index = vnet_register_interface (
+ vnm, af_packet_device_class.index, apif->dev_instance,
+ af_packet_ip_device_hw_interface_class.index, apif->dev_instance);
+ }
+
+ sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index);
+ apif->sw_if_index = sw->sw_if_index;
+
+ af_packet_set_rx_queues (vm, apif);
+ af_packet_set_tx_queues (vm, apif);
+
+ if (arg->flags & AF_PACKET_IF_FLAGS_FANOUT)
+ apif->is_fanout_enabled = 1;
+
+ apif->is_qdisc_bypass_enabled =
+ (arg->flags & AF_PACKET_IF_FLAGS_QDISC_BYPASS);
+
+ if (arg->flags & AF_PACKET_IF_FLAGS_CKSUM_GSO)
+ {
+ if (apif->host_interface_oflags & AF_PACKET_OFFLOAD_FLAG_TXCKSUM)
+ {
+ apif->is_cksum_gso_enabled = 1;
+ caps |= VNET_HW_IF_CAP_TX_IP4_CKSUM | VNET_HW_IF_CAP_TX_TCP_CKSUM |
+ VNET_HW_IF_CAP_TX_UDP_CKSUM;
+ }
+
+ if (apif->host_interface_oflags & AF_PACKET_OFFLOAD_FLAG_GSO)
+ {
+ apif->is_cksum_gso_enabled = 1;
+ caps |= VNET_HW_IF_CAP_TCP_GSO | VNET_HW_IF_CAP_TX_IP4_CKSUM |
+ VNET_HW_IF_CAP_TX_TCP_CKSUM | VNET_HW_IF_CAP_TX_UDP_CKSUM;
+ }
+ }
+ vnet_hw_if_set_caps (vnm, apif->hw_if_index, caps);
+ vnet_hw_interface_set_flags (vnm, apif->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+
+ mhash_set_mem (&apm->if_index_by_host_if_name, host_if_name_dup, &if_index,
+ 0);
+ arg->sw_if_index = apif->sw_if_index;
+
+ return 0;
+
+error:
+ if (fd2 > -1)
+ {
+ close (fd2);
+ fd2 = -1;
+ }
+
+ vec_free (host_if_name_dup);
+
+ if (apif)
+ {
+ vec_foreach_index (i, apif->fds)
+ if (apif->fds[i] != -1)
+ close (apif->fds[i]);
+ vec_free (apif->fds);
+ memset (apif, 0, sizeof (*apif));
+ pool_put (apm->interfaces, apif);
+ }
+ return ret;
+}
+
+static int
+af_packet_rx_queue_free (af_packet_if_t *apif, af_packet_queue_t *rx_queue)
+{
+ clib_file_del_by_index (&file_main, rx_queue->clib_file_index);
+ rx_queue->fd = -1;
+ rx_queue->rx_ring = NULL;
+ vec_free (rx_queue->rx_req);
+ rx_queue->rx_req = NULL;
+ return 0;
+}
+
+static int
+af_packet_tx_queue_free (af_packet_if_t *apif, af_packet_queue_t *tx_queue)
+{
+ tx_queue->fd = -1;
+ clib_spinlock_free (&tx_queue->lockp);
+ tx_queue->tx_ring = NULL;
+ vec_free (tx_queue->tx_req);
+ tx_queue->tx_req = NULL;
+ return 0;
+}
+
+static int
+af_packet_ring_free (af_packet_if_t *apif, af_packet_ring_t *ring)
+{
+ af_packet_main_t *apm = &af_packet_main;
+
+ if (ring)
+ {
+ // FIXME: unmap the memory
+ if (munmap (ring->ring_start_addr, ring->ring_size))
+ vlib_log_warn (apm->log_class,
+ "Host interface %s could not free ring %p of size %u",
+ apif->host_if_name, ring->ring_start_addr,
+ ring->ring_size);
+ else
+ ring->ring_start_addr = 0;
+ }
+
+ return 0;
+}
+
+int
+af_packet_delete_if (u8 *host_if_name)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_if_t *apif;
+ af_packet_queue_t *rx_queue;
+ af_packet_queue_t *tx_queue;
+ af_packet_ring_t *ring;
+ uword *p;
+ u32 i = 0;
+
+ p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
+ if (p == NULL)
+ {
+ vlib_log_warn (apm->log_class, "Host interface %s does not exist",
+ host_if_name);
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ }
+ apif = pool_elt_at_index (apm->interfaces, p[0]);
+
+ /* bring down the interface */
+ vnet_hw_interface_set_flags (vnm, apif->hw_if_index, 0);
+ if (apif->mode != AF_PACKET_IF_MODE_IP)
+ ethernet_delete_interface (vnm, apif->hw_if_index);
+ else
+ vnet_delete_hw_interface (vnm, apif->hw_if_index);
+
+ /* clean up */
+ vec_foreach_index (i, apif->fds)
+ if (apif->fds[i] != -1)
+ close (apif->fds[i]);
+ vec_foreach (rx_queue, apif->rx_queues)
+ af_packet_rx_queue_free (apif, rx_queue);
+ vec_foreach (tx_queue, apif->tx_queues)
+ af_packet_tx_queue_free (apif, tx_queue);
+ vec_foreach (ring, apif->rings)
+ af_packet_ring_free (apif, ring);
+
+ vec_free (apif->fds);
+ apif->fds = NULL;
+ vec_free (apif->rx_queues);
+ apif->rx_queues = NULL;
+ vec_free (apif->tx_queues);
+ apif->tx_queues = NULL;
+ vec_free (apif->rings);
+ apif->rings = NULL;
+
+ vec_free (apif->host_if_name);
+ apif->host_if_name = NULL;
+ apif->host_if_index = -1;
+
+ mhash_unset (&apm->if_index_by_host_if_name, host_if_name, p);
+
+ memset (apif, 0, sizeof (*apif));
+ pool_put (apm->interfaces, apif);
+
+ return 0;
+}
+
+int
+af_packet_enable_disable_qdisc_bypass (u32 sw_if_index, u8 enable_disable)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_if_t *apif;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hw;
+ u32 i;
+
+ hw = vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index);
+
+ if (hw->dev_class_index != af_packet_device_class.index)
+ return VNET_API_ERROR_INVALID_INTERFACE;
+
+ apif = pool_elt_at_index (apm->interfaces, hw->dev_instance);
+
+#if defined(PACKET_QDISC_BYPASS)
+ vec_foreach_index (i, apif->fds)
+ {
+ if (enable_disable)
+ {
+ int opt = 1;
+
+ /* Introduced with Linux 3.14 so the ifdef should eventually be
+ * removed */
+ if (setsockopt (apif->fds[i], SOL_PACKET, PACKET_QDISC_BYPASS, &opt,
+ sizeof (opt)) < 0)
+ {
+ vlib_log_err (apm->log_class,
+ "Failed to enable qdisc bypass error "
+ "handling option: %s (errno %d)",
+ strerror (errno), errno);
+ }
+ apif->is_qdisc_bypass_enabled = 1;
+ }
+ else
+ {
+ int opt = 0;
+ if (setsockopt (apif->fds[i], SOL_PACKET, PACKET_QDISC_BYPASS, &opt,
+ sizeof (opt)) < 0)
+ {
+ vlib_log_err (apm->log_class,
+ "Failed to disable qdisc bypass error "
+ "handling option: %s (errno %d)",
+ strerror (errno), errno);
+ }
+ apif->is_qdisc_bypass_enabled = 0;
+ }
+ }
+
+#endif
+ return 0;
+}
+
+int
+af_packet_set_l4_cksum_offload (u32 sw_if_index, u8 set)
+{
+ // deprecated ...
+ return 0;
+}
+
+int
+af_packet_dump_ifs (af_packet_if_detail_t ** out_af_packet_ifs)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_if_t *apif;
+ af_packet_if_detail_t *r_af_packet_ifs = NULL;
+ af_packet_if_detail_t *af_packet_if = NULL;
+
+ pool_foreach (apif, apm->interfaces)
+ {
+ vec_add2 (r_af_packet_ifs, af_packet_if, 1);
+ af_packet_if->sw_if_index = apif->sw_if_index;
+ if (apif->host_if_name)
+ {
+ clib_memcpy (af_packet_if->host_if_name, apif->host_if_name,
+ MIN (ARRAY_LEN (af_packet_if->host_if_name) - 1,
+ strlen ((const char *) apif->host_if_name)));
+ }
+ }
+
+ *out_af_packet_ifs = r_af_packet_ifs;
+
+ return 0;
+}
+
+static clib_error_t *
+af_packet_init (vlib_main_t * vm)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ clib_memset (apm, 0, sizeof (af_packet_main_t));
+
+ mhash_init_vec_string (&apm->if_index_by_host_if_name, sizeof (uword));
+
+ vec_validate_aligned (apm->rx_buffers, tm->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ apm->log_class = vlib_log_register_class ("af_packet", 0);
+ vlib_log_debug (apm->log_class, "initialized");
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (af_packet_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/af_packet/af_packet.h b/src/plugins/af_packet/af_packet.h
new file mode 100644
index 00000000000..e66a1119ba1
--- /dev/null
+++ b/src/plugins/af_packet/af_packet.h
@@ -0,0 +1,182 @@
+/*
+ *------------------------------------------------------------------
+ * af_packet.h - linux kernel packet interface header file
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <linux/if_packet.h>
+
+#include <vppinfra/lock.h>
+#include <vlib/log.h>
+
+typedef struct tpacket_block_desc block_desc_t;
+typedef struct tpacket_req tpacket_req_t;
+typedef struct tpacket_req3 tpacket_req3_t;
+typedef struct tpacket2_hdr tpacket2_hdr_t;
+typedef struct tpacket3_hdr tpacket3_hdr_t;
+
+typedef union _tpacket_req_u
+{
+ tpacket_req_t req;
+ tpacket_req3_t req3;
+} tpacket_req_u_t;
+
+typedef enum
+{
+ AF_PACKET_IF_MODE_ETHERNET = 1,
+ AF_PACKET_IF_MODE_IP = 2
+} af_packet_if_mode_t;
+
+typedef enum
+{
+ AF_PACKET_IF_FLAGS_QDISC_BYPASS = 1,
+ AF_PACKET_IF_FLAGS_CKSUM_GSO = 2,
+ AF_PACKET_IF_FLAGS_FANOUT = 4,
+ AF_PACKET_IF_FLAGS_VERSION_2 = 8,
+} af_packet_if_flags_t;
+
+typedef struct
+{
+ u32 sw_if_index;
+ u8 host_if_name[64];
+} af_packet_if_detail_t;
+
+typedef struct
+{
+ u8 *ring_start_addr;
+ u32 ring_size;
+} af_packet_ring_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ clib_spinlock_t lockp;
+ int fd;
+ union
+ {
+ tpacket_req_u_t *rx_req;
+ tpacket_req_u_t *tx_req;
+ };
+
+ union
+ {
+ u8 **rx_ring;
+ u8 **tx_ring;
+ };
+
+ union
+ {
+ u32 next_rx_block;
+ u32 next_rx_frame;
+ u32 next_tx_frame;
+ };
+
+ u16 queue_id;
+ u32 queue_index;
+
+ u32 clib_file_index;
+
+ u32 rx_frame_offset;
+ u16 num_rx_pkts;
+ u8 is_rx_pending;
+ u8 is_tx_pending;
+ vnet_hw_if_rx_mode mode;
+} af_packet_queue_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 hw_if_index;
+ u32 sw_if_index;
+ u32 per_interface_next_index;
+ af_packet_if_mode_t mode;
+ u8 is_admin_up;
+ u8 is_cksum_gso_enabled;
+ u8 version;
+ af_packet_queue_t *rx_queues;
+ af_packet_queue_t *tx_queues;
+
+ u8 num_rxqs;
+ u8 num_txqs;
+
+ u8 *host_if_name;
+ int host_if_index;
+
+ u32 host_mtu;
+ u32 dev_instance;
+
+ af_packet_ring_t *rings;
+ u8 is_qdisc_bypass_enabled;
+ u8 is_fanout_enabled;
+ int *fds;
+ u32 host_interface_oflags;
+} af_packet_if_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ af_packet_if_t *interfaces;
+
+ u32 polling_count;
+ /* rx buffer cache */
+ u32 **rx_buffers;
+
+ /* hash of host interface names */
+ mhash_t if_index_by_host_if_name;
+
+ /** log class */
+ vlib_log_class_t log_class;
+} af_packet_main_t;
+
+typedef struct
+{
+ u8 *host_if_name;
+ u8 *hw_addr;
+ u32 rx_frame_size;
+ u32 tx_frame_size;
+ u32 rx_frames_per_block;
+ u32 tx_frames_per_block;
+ u8 num_rxqs;
+ u8 num_txqs;
+ u8 is_v2;
+ af_packet_if_mode_t mode;
+ af_packet_if_flags_t flags;
+
+ /* return */
+ u32 sw_if_index;
+} af_packet_create_if_arg_t;
+
+extern af_packet_main_t af_packet_main;
+extern vnet_device_class_t af_packet_device_class;
+extern vlib_node_registration_t af_packet_input_node;
+
+int af_packet_create_if (af_packet_create_if_arg_t *arg);
+int af_packet_delete_if (u8 *host_if_name);
+int af_packet_set_l4_cksum_offload (u32 sw_if_index, u8 set);
+int af_packet_enable_disable_qdisc_bypass (u32 sw_if_index, u8 enable_disable);
+int af_packet_dump_ifs (af_packet_if_detail_t ** out_af_packet_ifs);
+
+format_function_t format_af_packet_device_name;
+
+#define MIN(x,y) (((x)<(y))?(x):(y))
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/af_packet/af_packet_api.c b/src/plugins/af_packet/af_packet_api.c
new file mode 100644
index 00000000000..ede057659e8
--- /dev/null
+++ b/src/plugins/af_packet/af_packet_api.c
@@ -0,0 +1,253 @@
+/*
+ *------------------------------------------------------------------
+ * af_packet_api.c - af-packet api
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <af_packet/af_packet.h>
+
+#include <vnet/format_fns.h>
+#include <af_packet/af_packet.api_enum.h>
+#include <af_packet/af_packet.api_types.h>
+
+#define REPLY_MSG_ID_BASE msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static u16 msg_id_base;
+
+static void
+vl_api_af_packet_create_t_handler (vl_api_af_packet_create_t * mp)
+{
+ af_packet_create_if_arg_t _arg, *arg = &_arg;
+ vl_api_af_packet_create_reply_t *rmp;
+ int rv = 0;
+
+ clib_memset (arg, 0, sizeof (*arg));
+
+ arg->host_if_name = format (0, "%s", mp->host_if_name);
+ vec_add1 (arg->host_if_name, 0);
+
+ arg->hw_addr = mp->use_random_hw_addr ? 0 : mp->hw_addr;
+ arg->mode = AF_PACKET_IF_MODE_ETHERNET;
+ // Default flags
+ arg->flags = AF_PACKET_IF_FLAGS_QDISC_BYPASS | AF_PACKET_IF_FLAGS_CKSUM_GSO;
+ rv = af_packet_create_if (arg);
+
+ vec_free (arg->host_if_name);
+
+ REPLY_MACRO2 (VL_API_AF_PACKET_CREATE_REPLY, ({
+ rmp->sw_if_index = clib_host_to_net_u32 (arg->sw_if_index);
+ }));
+}
+
+static void
+vl_api_af_packet_create_v2_t_handler (vl_api_af_packet_create_v2_t *mp)
+{
+ af_packet_create_if_arg_t _arg, *arg = &_arg;
+ vl_api_af_packet_create_v2_reply_t *rmp;
+ int rv = 0;
+
+ clib_memset (arg, 0, sizeof (*arg));
+
+ arg->host_if_name = format (0, "%s", mp->host_if_name);
+ vec_add1 (arg->host_if_name, 0);
+
+ // Default number of rx/tx queue(s)
+ arg->num_rxqs = 1;
+ arg->num_txqs = 1;
+ arg->rx_frame_size = clib_net_to_host_u32 (mp->rx_frame_size);
+ arg->tx_frame_size = clib_net_to_host_u32 (mp->tx_frame_size);
+ arg->rx_frames_per_block = clib_net_to_host_u32 (mp->rx_frames_per_block);
+ arg->tx_frames_per_block = clib_net_to_host_u32 (mp->tx_frames_per_block);
+ arg->hw_addr = mp->use_random_hw_addr ? 0 : mp->hw_addr;
+ arg->mode = AF_PACKET_IF_MODE_ETHERNET;
+ // Default flags
+ arg->flags = AF_PACKET_IF_FLAGS_QDISC_BYPASS | AF_PACKET_IF_FLAGS_CKSUM_GSO;
+
+ if (mp->num_rx_queues > 1)
+ arg->num_rxqs = clib_net_to_host_u16 (mp->num_rx_queues);
+
+ rv = af_packet_create_if (arg);
+
+ vec_free (arg->host_if_name);
+ REPLY_MACRO2 (VL_API_AF_PACKET_CREATE_V2_REPLY, ({
+ rmp->sw_if_index = clib_host_to_net_u32 (arg->sw_if_index);
+ }));
+}
+
+static void
+vl_api_af_packet_create_v3_t_handler (vl_api_af_packet_create_v3_t *mp)
+{
+ af_packet_create_if_arg_t _arg, *arg = &_arg;
+ vl_api_af_packet_create_v3_reply_t *rmp;
+ int rv = 0;
+
+ clib_memset (arg, 0, sizeof (*arg));
+
+ arg->host_if_name = format (0, "%s", mp->host_if_name);
+ vec_add1 (arg->host_if_name, 0);
+
+ // Default number of rx/tx queue(s)
+ arg->num_rxqs = 1;
+ arg->num_txqs = 1;
+ arg->rx_frame_size = clib_net_to_host_u32 (mp->rx_frame_size);
+ arg->tx_frame_size = clib_net_to_host_u32 (mp->tx_frame_size);
+ arg->rx_frames_per_block = clib_net_to_host_u32 (mp->rx_frames_per_block);
+ arg->tx_frames_per_block = clib_net_to_host_u32 (mp->tx_frames_per_block);
+ arg->hw_addr = mp->use_random_hw_addr ? 0 : mp->hw_addr;
+
+ switch (clib_net_to_host_u32 (mp->mode))
+ {
+ case AF_PACKET_API_MODE_ETHERNET:
+ arg->mode = AF_PACKET_IF_MODE_ETHERNET;
+ break;
+ case AF_PACKET_API_MODE_IP:
+ arg->mode = AF_PACKET_IF_MODE_IP;
+ break;
+ default:
+ arg->sw_if_index = ~0;
+ rv = VNET_ERR_INVALID_VALUE;
+ goto error;
+ }
+
+ STATIC_ASSERT (((int) AF_PACKET_API_FLAG_QDISC_BYPASS ==
+ (int) AF_PACKET_IF_FLAGS_QDISC_BYPASS),
+ "af-packet qdisc-bypass api flag mismatch");
+ STATIC_ASSERT (
+ ((int) AF_PACKET_API_FLAG_CKSUM_GSO == (int) AF_PACKET_IF_FLAGS_CKSUM_GSO),
+ "af-packet checksum/gso offload api flag mismatch");
+
+ STATIC_ASSERT (
+ ((int) AF_PACKET_API_FLAG_VERSION_2 == (int) AF_PACKET_IF_FLAGS_VERSION_2),
+ "af-packet version 2 api flag mismatch");
+
+ // Default flags
+ arg->flags = clib_net_to_host_u32 (mp->flags);
+
+ if (clib_net_to_host_u16 (mp->num_rx_queues) > 1)
+ arg->num_rxqs = clib_net_to_host_u16 (mp->num_rx_queues);
+
+ if (clib_net_to_host_u16 (mp->num_tx_queues) > 1)
+ arg->num_txqs = clib_net_to_host_u16 (mp->num_tx_queues);
+
+ arg->is_v2 = (arg->flags & AF_PACKET_API_FLAG_VERSION_2) ? 1 : 0;
+ rv = af_packet_create_if (arg);
+
+error:
+ vec_free (arg->host_if_name);
+ REPLY_MACRO2 (VL_API_AF_PACKET_CREATE_V3_REPLY, ({
+ rmp->sw_if_index = clib_host_to_net_u32 (arg->sw_if_index);
+ }));
+}
+
+static void
+vl_api_af_packet_delete_t_handler (vl_api_af_packet_delete_t * mp)
+{
+ vl_api_af_packet_delete_reply_t *rmp;
+ int rv = 0;
+ u8 *host_if_name = NULL;
+
+ host_if_name = format (0, "%s", mp->host_if_name);
+ vec_add1 (host_if_name, 0);
+
+ rv = af_packet_delete_if (host_if_name);
+
+ vec_free (host_if_name);
+
+ REPLY_MACRO (VL_API_AF_PACKET_DELETE_REPLY);
+}
+
+static void
+ vl_api_af_packet_set_l4_cksum_offload_t_handler
+ (vl_api_af_packet_set_l4_cksum_offload_t * mp)
+{
+ vl_api_af_packet_delete_reply_t *rmp;
+ int rv = 0;
+
+ rv = af_packet_set_l4_cksum_offload (ntohl (mp->sw_if_index), mp->set);
+ REPLY_MACRO (VL_API_AF_PACKET_SET_L4_CKSUM_OFFLOAD_REPLY);
+}
+
+static void
+af_packet_send_details (vpe_api_main_t * am,
+ vl_api_registration_t * reg,
+ af_packet_if_detail_t * af_packet_if, u32 context)
+{
+ vl_api_af_packet_details_t *mp;
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ clib_memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = htons (REPLY_MSG_ID_BASE + VL_API_AF_PACKET_DETAILS);
+ mp->sw_if_index = htonl (af_packet_if->sw_if_index);
+ clib_memcpy (mp->host_if_name, af_packet_if->host_if_name,
+ MIN (ARRAY_LEN (mp->host_if_name) - 1,
+ strlen ((const char *) af_packet_if->host_if_name)));
+
+ mp->context = context;
+ vl_api_send_msg (reg, (u8 *) mp);
+}
+
+
+static void
+vl_api_af_packet_dump_t_handler (vl_api_af_packet_dump_t * mp)
+{
+ int rv;
+ vpe_api_main_t *am = &vpe_api_main;
+ vl_api_registration_t *reg;
+ af_packet_if_detail_t *out_af_packet_ifs = NULL;
+ af_packet_if_detail_t *af_packet_if = NULL;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ rv = af_packet_dump_ifs (&out_af_packet_ifs);
+ if (rv)
+ return;
+
+ vec_foreach (af_packet_if, out_af_packet_ifs)
+ {
+ af_packet_send_details (am, reg, af_packet_if, mp->context);
+ }
+
+ vec_free (out_af_packet_ifs);
+}
+
+#include <af_packet/af_packet.api.c>
+static clib_error_t *
+af_packet_api_hookup (vlib_main_t * vm)
+{
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ REPLY_MSG_ID_BASE = setup_message_id_table ();
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (af_packet_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/af_packet/cli.c b/src/plugins/af_packet/cli.c
index e5466c9599d..2af3fb17ee5 100644
--- a/src/vnet/devices/af_packet/cli.c
+++ b/src/plugins/af_packet/cli.c
@@ -30,7 +30,7 @@
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
-#include <vnet/devices/af_packet/af_packet.h>
+#include <af_packet/af_packet.h>
/**
* @file
@@ -44,12 +44,22 @@ af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
- u8 *host_if_name = NULL;
+ af_packet_create_if_arg_t _arg, *arg = &_arg;
+ clib_error_t *error = NULL;
u8 hwaddr[6];
- u8 *hw_addr_ptr = 0;
- u32 sw_if_index;
int r;
- clib_error_t *error = NULL;
+
+ clib_memset (arg, 0, sizeof (*arg));
+
+ // Default mode
+ arg->mode = AF_PACKET_IF_MODE_ETHERNET;
+
+ // Default number of rx/tx queue(s)
+ arg->num_rxqs = 1;
+ arg->num_txqs = 1;
+
+ // Default flags
+ arg->flags = AF_PACKET_IF_FLAGS_QDISC_BYPASS | AF_PACKET_IF_FLAGS_CKSUM_GSO;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -57,12 +67,33 @@ af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "name %s", &host_if_name))
+ if (unformat (line_input, "name %s", &arg->host_if_name))
;
- else
- if (unformat
- (line_input, "hw-addr %U", unformat_ethernet_address, hwaddr))
- hw_addr_ptr = hwaddr;
+ else if (unformat (line_input, "rx-size %u", &arg->rx_frame_size))
+ ;
+ else if (unformat (line_input, "tx-size %u", &arg->tx_frame_size))
+ ;
+ else if (unformat (line_input, "rx-per-block %u",
+ &arg->rx_frames_per_block))
+ ;
+ else if (unformat (line_input, "tx-per-block %u",
+ &arg->tx_frames_per_block))
+ ;
+ else if (unformat (line_input, "num-rx-queues %u", &arg->num_rxqs))
+ ;
+ else if (unformat (line_input, "num-tx-queues %u", &arg->num_txqs))
+ ;
+ else if (unformat (line_input, "qdisc-bypass-disable"))
+ arg->flags &= ~AF_PACKET_IF_FLAGS_QDISC_BYPASS;
+ else if (unformat (line_input, "cksum-gso-disable"))
+ arg->flags &= ~AF_PACKET_IF_FLAGS_CKSUM_GSO;
+ else if (unformat (line_input, "mode ip"))
+ arg->mode = AF_PACKET_IF_MODE_IP;
+ else if (unformat (line_input, "v2"))
+ arg->is_v2 = 1;
+ else if (unformat (line_input, "hw-addr %U", unformat_ethernet_address,
+ hwaddr))
+ arg->hw_addr = hwaddr;
else
{
error = clib_error_return (0, "unknown input `%U'",
@@ -71,13 +102,13 @@ af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
}
}
- if (host_if_name == NULL)
+ if (arg->host_if_name == NULL)
{
error = clib_error_return (0, "missing host interface name");
goto done;
}
- r = af_packet_create_if (vm, host_if_name, hw_addr_ptr, &sw_if_index);
+ r = af_packet_create_if (arg);
if (r == VNET_API_ERROR_SYSCALL_ERROR_1)
{
@@ -98,10 +129,10 @@ af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
}
vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main (),
- sw_if_index);
+ arg->sw_if_index);
done:
- vec_free (host_if_name);
+ vec_free (arg->host_if_name);
unformat_free (line_input);
return error;
@@ -129,13 +160,13 @@ done:
* Once the host interface is created, enable the interface using:
* @cliexcmd{set interface state host-vpp1 up}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (af_packet_create_command, static) = {
.path = "create host-interface",
- .short_help = "create host-interface name <ifname> [hw-addr <mac-addr>]",
+ .short_help = "create host-interface [v2] name <ifname> [num-rx-queues <n>] "
+ "[num-tx-queues <n>] [hw-addr <mac-addr>] [mode ip] "
+ "[qdisc-bypass-disable] [cksum-gso-disable]",
.function = af_packet_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
af_packet_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -167,7 +198,7 @@ af_packet_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
goto done;
}
- af_packet_delete_if (vm, host_if_name);
+ af_packet_delete_if (host_if_name);
done:
vec_free (host_if_name);
@@ -186,13 +217,11 @@ done:
* Example of how to delete a host interface named host-vpp1:
* @cliexcmd{delete host-interface name vpp1}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (af_packet_delete_command, static) = {
.path = "delete host-interface",
.short_help = "delete host-interface name <ifname>",
.function = af_packet_delete_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
af_packet_set_l4_cksum_offload_command_fn (vlib_main_t * vm,
@@ -210,8 +239,8 @@ af_packet_set_l4_cksum_offload_command_fn (vlib_main_t * vm,
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat
- (line_input, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
+ if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
;
else if (unformat (line_input, "on"))
set = 1;
@@ -225,7 +254,7 @@ af_packet_set_l4_cksum_offload_command_fn (vlib_main_t * vm,
}
}
- if (af_packet_set_l4_cksum_offload (vm, sw_if_index, set) < 0)
+ if (af_packet_set_l4_cksum_offload (sw_if_index, set) < 0)
error = clib_error_return (0, "not an af_packet interface");
done:
@@ -243,13 +272,57 @@ done:
* @cliexcmd{set host-interface l4-cksum-offload host-vpp0 off}
* @cliexcmd{set host-interface l4-cksum-offload host-vpp0 on}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (af_packet_set_l4_cksum_offload_command, static) = {
.path = "set host-interface l4-cksum-offload",
.short_help = "set host-interface l4-cksum-offload <host-if-name> <on|off>",
.function = af_packet_set_l4_cksum_offload_command_fn,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+af_packet_enable_disable_qdisc_bypass_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 enable_disable = 0;
+ clib_error_t *error = NULL;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "enable"))
+ enable_disable = 1;
+ else if (unformat (line_input, "disable"))
+ enable_disable = 0;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (af_packet_enable_disable_qdisc_bypass (sw_if_index, enable_disable) < 0)
+ error = clib_error_return (0, "not an af_packet interface");
+
+done:
+ unformat_free (line_input);
+ return error;
+}
+
+VLIB_CLI_COMMAND (af_packet_enable_disable_qdisc_bypass_command, static) = {
+ .path = "set host-interface qdisc-bypass",
+ .short_help =
+ "set host-interface qdisc-bypass <host-if-name> <enable|disable>",
+ .function = af_packet_enable_disable_qdisc_bypass_command_fn,
+};
clib_error_t *
af_packet_cli_init (vlib_main_t * vm)
diff --git a/src/plugins/af_packet/device.c b/src/plugins/af_packet/device.c
new file mode 100644
index 00000000000..d76dad3dde0
--- /dev/null
+++ b/src/plugins/af_packet/device.c
@@ -0,0 +1,793 @@
+/*
+ *------------------------------------------------------------------
+ * af_packet.c - linux kernel packet interface
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <linux/if_packet.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip_psh_cksum.h>
+#include <vnet/tcp/tcp_packet.h>
+#include <vnet/udp/udp_packet.h>
+
+#include <af_packet/af_packet.h>
+#include <vnet/devices/virtio/virtio_std.h>
+#include <vnet/devices/netlink.h>
+
+#define foreach_af_packet_tx_func_error \
+_(FRAME_NOT_READY, "tx frame not ready") \
+_(TXRING_EAGAIN, "tx sendto temporary failure") \
+_(TXRING_FATAL, "tx sendto fatal failure") \
+_(TXRING_OVERRUN, "tx ring overrun")
+
+typedef enum
+{
+#define _(f,s) AF_PACKET_TX_ERROR_##f,
+ foreach_af_packet_tx_func_error
+#undef _
+ AF_PACKET_TX_N_ERROR,
+} af_packet_tx_func_error_t;
+
+static char *af_packet_tx_func_error_strings[] = {
+#define _(n,s) s,
+ foreach_af_packet_tx_func_error
+#undef _
+};
+
+typedef struct
+{
+ u32 buffer_index;
+ u32 hw_if_index;
+ u16 queue_id;
+ u8 is_v2;
+ union
+ {
+ tpacket2_hdr_t tph2;
+ tpacket3_hdr_t tph3;
+ };
+ vnet_virtio_net_hdr_t vnet_hdr;
+ vlib_buffer_t buffer;
+} af_packet_tx_trace_t;
+
+#ifndef CLIB_MARCH_VARIANT
+u8 *
+format_af_packet_device_name (u8 * s, va_list * args)
+{
+ u32 i = va_arg (*args, u32);
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, i);
+
+ s = format (s, "host-%s", apif->host_if_name);
+ return s;
+}
+#endif /* CLIB_MARCH_VARIANT */
+
+static u8 *
+format_af_packet_device (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ u32 indent = format_get_indent (s);
+ int __clib_unused verbose = va_arg (*args, int);
+
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, dev_instance);
+ af_packet_queue_t *rx_queue = 0;
+ af_packet_queue_t *tx_queue = 0;
+
+ s = format (s, "Linux PACKET socket interface %s",
+ (apif->version == TPACKET_V2) ? "v2" : "v3");
+ s = format (s, "\n%UFEATURES:", format_white_space, indent);
+ if (apif->is_qdisc_bypass_enabled)
+ s = format (s, "\n%Uqdisc-bpass-enabled", format_white_space, indent + 2);
+ if (apif->is_cksum_gso_enabled)
+ s = format (s, "\n%Ucksum-gso-enabled", format_white_space, indent + 2);
+ if (apif->is_fanout_enabled)
+ s = format (s, "\n%Ufanout-enabled", format_white_space, indent + 2);
+
+ vec_foreach (rx_queue, apif->rx_queues)
+ {
+ u32 rx_block_size = rx_queue->rx_req->req.tp_block_size;
+ u32 rx_frame_size = rx_queue->rx_req->req.tp_frame_size;
+ u32 rx_frame_nr = rx_queue->rx_req->req.tp_frame_nr;
+ u32 rx_block_nr = rx_queue->rx_req->req.tp_block_nr;
+
+ s = format (s, "\n%URX Queue %u:", format_white_space, indent,
+ rx_queue->queue_id);
+ s = format (s, "\n%Ublock size:%d nr:%d frame size:%d nr:%d",
+ format_white_space, indent + 2, rx_block_size, rx_block_nr,
+ rx_frame_size, rx_frame_nr);
+ if (apif->version == TPACKET_V2)
+ s = format (s, " next frame:%d", rx_queue->next_rx_frame);
+ else
+ s = format (s, " next block:%d", rx_queue->next_rx_block);
+ if (rx_queue->is_rx_pending)
+ {
+ s = format (
+ s, "\n%UPending Request: num-rx-pkts:%d next-frame-offset:%d",
+ format_white_space, indent + 2, rx_queue->num_rx_pkts,
+ rx_queue->rx_frame_offset);
+ }
+ }
+
+ vec_foreach (tx_queue, apif->tx_queues)
+ {
+ clib_spinlock_lock (&tx_queue->lockp);
+ u32 tx_block_sz = tx_queue->tx_req->req.tp_block_size;
+ u32 tx_frame_sz = tx_queue->tx_req->req.tp_frame_size;
+ u32 tx_frame_nr = tx_queue->tx_req->req.tp_frame_nr;
+ u32 tx_block_nr = tx_queue->tx_req->req.tp_block_nr;
+ int block = 0;
+ int n_send_req = 0, n_avail = 0, n_sending = 0, n_tot = 0, n_wrong = 0;
+ u8 *tx_block_start = tx_queue->tx_ring[block];
+ u32 tx_frame = tx_queue->next_tx_frame;
+ tpacket3_hdr_t *tph3;
+ tpacket2_hdr_t *tph2;
+
+ s = format (s, "\n%UTX Queue %u:", format_white_space, indent,
+ tx_queue->queue_id);
+ s = format (s, "\n%Ublock size:%d nr:%d frame size:%d nr:%d",
+ format_white_space, indent + 2, tx_block_sz, tx_block_nr,
+ tx_frame_sz, tx_frame_nr);
+ s = format (s, " next frame:%d", tx_queue->next_tx_frame);
+ if (apif->version & TPACKET_V3)
+ do
+ {
+ tph3 =
+ (tpacket3_hdr_t *) (tx_block_start + tx_frame * tx_frame_sz);
+ tx_frame = (tx_frame + 1) % tx_frame_nr;
+ if (tph3->tp_status == 0)
+ n_avail++;
+ else if (tph3->tp_status & TP_STATUS_SEND_REQUEST)
+ n_send_req++;
+ else if (tph3->tp_status & TP_STATUS_SENDING)
+ n_sending++;
+ else
+ n_wrong++;
+ n_tot++;
+ }
+ while (tx_frame != tx_queue->next_tx_frame);
+ else
+ do
+ {
+ tph2 =
+ (tpacket2_hdr_t *) (tx_block_start + tx_frame * tx_frame_sz);
+ tx_frame = (tx_frame + 1) % tx_frame_nr;
+ if (tph2->tp_status == 0)
+ n_avail++;
+ else if (tph2->tp_status & TP_STATUS_SEND_REQUEST)
+ n_send_req++;
+ else if (tph2->tp_status & TP_STATUS_SENDING)
+ n_sending++;
+ else
+ n_wrong++;
+ n_tot++;
+ }
+ while (tx_frame != tx_queue->next_tx_frame);
+ s =
+ format (s, "\n%Uavailable:%d request:%d sending:%d wrong:%d total:%d",
+ format_white_space, indent + 2, n_avail, n_send_req, n_sending,
+ n_wrong, n_tot);
+ clib_spinlock_unlock (&tx_queue->lockp);
+ }
+ return s;
+}
+
+static u8 *
+format_af_packet_tx_trace (u8 *s, va_list *va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
+ af_packet_tx_trace_t *t = va_arg (*va, af_packet_tx_trace_t *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "af_packet: hw_if_index %u tx-queue %u", t->hw_if_index,
+ t->queue_id);
+
+ if (t->is_v2)
+ {
+ s = format (
+ s,
+ "\n%Utpacket2_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
+ "\n%Usec 0x%x nsec 0x%x vlan %U"
+#ifdef TP_STATUS_VLAN_TPID_VALID
+ " vlan_tpid %u"
+#endif
+ ,
+ format_white_space, indent + 2, format_white_space, indent + 4,
+ t->tph2.tp_status, t->tph2.tp_len, t->tph2.tp_snaplen, t->tph2.tp_mac,
+ t->tph2.tp_net, format_white_space, indent + 4, t->tph2.tp_sec,
+ t->tph2.tp_nsec, format_ethernet_vlan_tci, t->tph2.tp_vlan_tci
+#ifdef TP_STATUS_VLAN_TPID_VALID
+ ,
+ t->tph2.tp_vlan_tpid
+#endif
+ );
+ }
+ else
+ {
+ s = format (
+ s,
+ "\n%Utpacket3_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
+ "\n%Usec 0x%x nsec 0x%x vlan %U"
+#ifdef TP_STATUS_VLAN_TPID_VALID
+ " vlan_tpid %u"
+#endif
+ ,
+ format_white_space, indent + 2, format_white_space, indent + 4,
+ t->tph3.tp_status, t->tph3.tp_len, t->tph3.tp_snaplen, t->tph3.tp_mac,
+ t->tph3.tp_net, format_white_space, indent + 4, t->tph3.tp_sec,
+ t->tph3.tp_nsec, format_ethernet_vlan_tci, t->tph3.hv1.tp_vlan_tci
+#ifdef TP_STATUS_VLAN_TPID_VALID
+ ,
+ t->tph3.hv1.tp_vlan_tpid
+#endif
+ );
+ }
+ s = format (s,
+ "\n%Uvnet-hdr:\n%Uflags 0x%02x gso_type 0x%02x hdr_len %u"
+ "\n%Ugso_size %u csum_start %u csum_offset %u",
+ format_white_space, indent + 2, format_white_space, indent + 4,
+ t->vnet_hdr.flags, t->vnet_hdr.gso_type, t->vnet_hdr.hdr_len,
+ format_white_space, indent + 4, t->vnet_hdr.gso_size,
+ t->vnet_hdr.csum_start, t->vnet_hdr.csum_offset);
+
+ s = format (s, "\n%Ubuffer 0x%x:\n%U%U", format_white_space, indent + 2,
+ t->buffer_index, format_white_space, indent + 4,
+ format_vnet_buffer_no_chain, &t->buffer);
+ s = format (s, "\n%U%U", format_white_space, indent + 2,
+ format_ethernet_header_with_length, t->buffer.pre_data,
+ sizeof (t->buffer.pre_data));
+ return s;
+}
+
+static void
+af_packet_tx_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t *b0, u32 bi, void *tph,
+ vnet_virtio_net_hdr_t *vnet_hdr, u32 hw_if_index,
+ u16 queue_id, u8 is_v2)
+{
+ af_packet_tx_trace_t *t;
+ t = vlib_add_trace (vm, node, b0, sizeof (t[0]));
+ t->hw_if_index = hw_if_index;
+ t->queue_id = queue_id;
+ t->buffer_index = bi;
+ t->is_v2 = is_v2;
+
+ if (is_v2)
+ clib_memcpy_fast (&t->tph2, (tpacket2_hdr_t *) tph,
+ sizeof (tpacket2_hdr_t));
+ else
+ clib_memcpy_fast (&t->tph3, (tpacket3_hdr_t *) tph,
+ sizeof (tpacket3_hdr_t));
+ clib_memcpy_fast (&t->vnet_hdr, vnet_hdr, sizeof (*vnet_hdr));
+ clib_memcpy_fast (&t->buffer, b0, sizeof (*b0) - sizeof (b0->pre_data));
+ clib_memcpy_fast (t->buffer.pre_data, vlib_buffer_get_current (b0),
+ sizeof (t->buffer.pre_data));
+}
+
+static_always_inline void
+fill_gso_offload (vlib_buffer_t *b0, vnet_virtio_net_hdr_t *vnet_hdr)
+{
+ vnet_buffer_oflags_t oflags = vnet_buffer (b0)->oflags;
+ i16 l4_hdr_offset = vnet_buffer (b0)->l4_hdr_offset - b0->current_data;
+ if (b0->flags & VNET_BUFFER_F_IS_IP4)
+ {
+ ip4_header_t *ip4;
+ vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+ vnet_hdr->gso_size = vnet_buffer2 (b0)->gso_size;
+ vnet_hdr->hdr_len = l4_hdr_offset + vnet_buffer2 (b0)->gso_l4_hdr_sz;
+ vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vnet_hdr->csum_start = l4_hdr_offset; // 0x22;
+ vnet_hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
+ ip4 = (ip4_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
+ if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
+ ip4->checksum = ip4_header_checksum (ip4);
+ tcp_header_t *tcp =
+ (tcp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
+ tcp->checksum = ip4_pseudo_header_cksum (ip4);
+ }
+ else if (b0->flags & VNET_BUFFER_F_IS_IP6)
+ {
+ ip6_header_t *ip6;
+ vnet_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+ vnet_hdr->gso_size = vnet_buffer2 (b0)->gso_size;
+ vnet_hdr->hdr_len = l4_hdr_offset + vnet_buffer2 (b0)->gso_l4_hdr_sz;
+ vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vnet_hdr->csum_start = l4_hdr_offset; // 0x36;
+ vnet_hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
+ ip6 = (ip6_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
+ tcp_header_t *tcp =
+ (tcp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
+ tcp->checksum = ip6_pseudo_header_cksum (ip6);
+ }
+}
+
+static_always_inline void
+fill_cksum_offload (vlib_buffer_t *b0, vnet_virtio_net_hdr_t *vnet_hdr)
+{
+ vnet_buffer_oflags_t oflags = vnet_buffer (b0)->oflags;
+ i16 l4_hdr_offset = vnet_buffer (b0)->l4_hdr_offset - b0->current_data;
+ if (b0->flags & VNET_BUFFER_F_IS_IP4)
+ {
+ ip4_header_t *ip4;
+ ip4 = (ip4_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
+ if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
+ ip4->checksum = ip4_header_checksum (ip4);
+ vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vnet_hdr->csum_start = l4_hdr_offset;
+ if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
+ {
+ tcp_header_t *tcp =
+ (tcp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
+ tcp->checksum = ip4_pseudo_header_cksum (ip4);
+ vnet_hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
+ vnet_hdr->hdr_len = l4_hdr_offset + tcp_header_bytes (tcp);
+ }
+ else if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
+ {
+ udp_header_t *udp =
+ (udp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
+ udp->checksum = ip4_pseudo_header_cksum (ip4);
+ vnet_hdr->csum_offset = STRUCT_OFFSET_OF (udp_header_t, checksum);
+ vnet_hdr->hdr_len = l4_hdr_offset + sizeof (udp_header_t);
+ }
+ }
+ else if (b0->flags & VNET_BUFFER_F_IS_IP6)
+ {
+ ip6_header_t *ip6;
+ vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vnet_hdr->csum_start = l4_hdr_offset;
+ ip6 = (ip6_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
+ if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
+ {
+ tcp_header_t *tcp =
+ (tcp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
+ tcp->checksum = ip6_pseudo_header_cksum (ip6);
+ vnet_hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
+ vnet_hdr->hdr_len = l4_hdr_offset + tcp_header_bytes (tcp);
+ }
+ else if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
+ {
+ udp_header_t *udp =
+ (udp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
+ udp->checksum = ip6_pseudo_header_cksum (ip6);
+ vnet_hdr->csum_offset = STRUCT_OFFSET_OF (udp_header_t, checksum);
+ vnet_hdr->hdr_len = l4_hdr_offset + sizeof (udp_header_t);
+ }
+ }
+}
+
+VNET_DEVICE_CLASS_TX_FN (af_packet_device_class) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
+ u32 *buffers = vlib_frame_vector_args (frame);
+ u32 n_left = frame->n_vectors;
+ u32 n_sent = 0;
+ vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
+ af_packet_if_t *apif =
+ pool_elt_at_index (apm->interfaces, rd->dev_instance);
+ u16 queue_id = tf->queue_id;
+ af_packet_queue_t *tx_queue = vec_elt_at_index (apif->tx_queues, queue_id);
+ u32 block = 0, frame_size = 0, frame_num = 0, tx_frame = 0;
+ u8 *block_start = 0;
+ tpacket3_hdr_t *tph3 = 0;
+ tpacket2_hdr_t *tph2 = 0;
+ u32 frame_not_ready = 0;
+ u8 is_cksum_gso_enabled = (apif->is_cksum_gso_enabled == 1) ? 1 : 0;
+ u32 tpacket_align = 0;
+ u8 is_v2 = (apif->version == TPACKET_V2) ? 1 : 0;
+
+ if (tf->shared_queue)
+ clib_spinlock_lock (&tx_queue->lockp);
+
+ frame_size = tx_queue->tx_req->req.tp_frame_size;
+ frame_num = tx_queue->tx_req->req.tp_frame_nr;
+ block_start = tx_queue->tx_ring[block];
+ tx_frame = tx_queue->next_tx_frame;
+ if (is_v2)
+ {
+ tpacket_align = TPACKET_ALIGN (sizeof (tpacket2_hdr_t));
+ while (n_left)
+ {
+ u32 len;
+ vnet_virtio_net_hdr_t *vnet_hdr = 0;
+ u32 offset = 0;
+ vlib_buffer_t *b0 = 0, *b0_first = 0;
+ u32 bi, bi_first;
+
+ bi = bi_first = buffers[0];
+ n_left--;
+ buffers++;
+
+ tph2 = (tpacket2_hdr_t *) (block_start + tx_frame * frame_size);
+ if (PREDICT_FALSE (tph2->tp_status &
+ (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)))
+ {
+ frame_not_ready++;
+ goto nextv2;
+ }
+
+ b0_first = b0 = vlib_get_buffer (vm, bi);
+
+ if (PREDICT_TRUE (is_cksum_gso_enabled))
+ {
+ vnet_hdr =
+ (vnet_virtio_net_hdr_t *) ((u8 *) tph2 + tpacket_align);
+
+ clib_memset_u8 (vnet_hdr, 0, sizeof (vnet_virtio_net_hdr_t));
+ offset = sizeof (vnet_virtio_net_hdr_t);
+
+ if (b0->flags & VNET_BUFFER_F_GSO)
+ fill_gso_offload (b0, vnet_hdr);
+ else if (b0->flags & VNET_BUFFER_F_OFFLOAD)
+ fill_cksum_offload (b0, vnet_hdr);
+ }
+
+ len = b0->current_length;
+ clib_memcpy_fast ((u8 *) tph2 + tpacket_align + offset,
+ vlib_buffer_get_current (b0), len);
+ offset += len;
+
+ while (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b0 = vlib_get_buffer (vm, b0->next_buffer);
+ len = b0->current_length;
+ clib_memcpy_fast ((u8 *) tph2 + tpacket_align + offset,
+ vlib_buffer_get_current (b0), len);
+ offset += len;
+ }
+
+ tph2->tp_len = tph2->tp_snaplen = offset;
+ tph2->tp_status = TP_STATUS_SEND_REQUEST;
+ n_sent++;
+
+ if (PREDICT_FALSE (b0_first->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ if (PREDICT_TRUE (is_cksum_gso_enabled))
+ af_packet_tx_trace (vm, node, b0_first, bi_first, tph2,
+ vnet_hdr, apif->hw_if_index, queue_id, 1);
+ else
+ {
+ vnet_virtio_net_hdr_t vnet_hdr2 = {};
+ af_packet_tx_trace (vm, node, b0_first, bi_first, tph2,
+ &vnet_hdr2, apif->hw_if_index, queue_id,
+ 1);
+ }
+ }
+ tx_frame = (tx_frame + 1) % frame_num;
+
+ nextv2:
+ /* check if we've exhausted the ring */
+ if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num))
+ break;
+ }
+ }
+ else
+ {
+ tpacket_align = TPACKET_ALIGN (sizeof (tpacket3_hdr_t));
+
+ while (n_left)
+ {
+ u32 len;
+ vnet_virtio_net_hdr_t *vnet_hdr = 0;
+ u32 offset = 0;
+ vlib_buffer_t *b0 = 0, *b0_first = 0;
+ u32 bi, bi_first;
+
+ bi = bi_first = buffers[0];
+ n_left--;
+ buffers++;
+
+ tph3 = (tpacket3_hdr_t *) (block_start + tx_frame * frame_size);
+ if (PREDICT_FALSE (tph3->tp_status &
+ (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)))
+ {
+ frame_not_ready++;
+ goto nextv3;
+ }
+
+ b0_first = b0 = vlib_get_buffer (vm, bi);
+
+ if (PREDICT_TRUE (is_cksum_gso_enabled))
+ {
+ vnet_hdr =
+ (vnet_virtio_net_hdr_t *) ((u8 *) tph3 + tpacket_align);
+
+ clib_memset_u8 (vnet_hdr, 0, sizeof (vnet_virtio_net_hdr_t));
+ offset = sizeof (vnet_virtio_net_hdr_t);
+
+ if (b0->flags & VNET_BUFFER_F_GSO)
+ fill_gso_offload (b0, vnet_hdr);
+ else if (b0->flags & VNET_BUFFER_F_OFFLOAD)
+ fill_cksum_offload (b0, vnet_hdr);
+ }
+
+ len = b0->current_length;
+ clib_memcpy_fast ((u8 *) tph3 + tpacket_align + offset,
+ vlib_buffer_get_current (b0), len);
+ offset += len;
+
+ while (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b0 = vlib_get_buffer (vm, b0->next_buffer);
+ len = b0->current_length;
+ clib_memcpy_fast ((u8 *) tph3 + tpacket_align + offset,
+ vlib_buffer_get_current (b0), len);
+ offset += len;
+ }
+
+ tph3->tp_len = tph3->tp_snaplen = offset;
+ tph3->tp_status = TP_STATUS_SEND_REQUEST;
+ n_sent++;
+
+ if (PREDICT_FALSE (b0_first->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ if (PREDICT_TRUE (is_cksum_gso_enabled))
+ af_packet_tx_trace (vm, node, b0_first, bi_first, tph3,
+ vnet_hdr, apif->hw_if_index, queue_id, 0);
+ else
+ {
+ vnet_virtio_net_hdr_t vnet_hdr2 = {};
+ af_packet_tx_trace (vm, node, b0_first, bi_first, tph3,
+ &vnet_hdr2, apif->hw_if_index, queue_id,
+ 0);
+ }
+ }
+ tx_frame = (tx_frame + 1) % frame_num;
+
+ nextv3:
+ /* check if we've exhausted the ring */
+ if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num))
+ break;
+ }
+ }
+ CLIB_MEMORY_BARRIER ();
+
+ if (PREDICT_TRUE (n_sent || tx_queue->is_tx_pending))
+ {
+ tx_queue->next_tx_frame = tx_frame;
+ tx_queue->is_tx_pending = 0;
+
+ if (PREDICT_FALSE (
+ sendto (tx_queue->fd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1))
+ {
+ /* Uh-oh, drop & move on, but count whether it was fatal or not.
+ * Note that we have no reliable way to properly determine the
+ * disposition of the packets we just enqueued for delivery.
+ */
+ uword counter;
+
+ if (unix_error_is_fatal (errno))
+ {
+ counter = AF_PACKET_TX_ERROR_TXRING_FATAL;
+ }
+ else
+ {
+ counter = AF_PACKET_TX_ERROR_TXRING_EAGAIN;
+ /* non-fatal error: kick again next time
+ * note that you could still end up in a deadlock: if you do not
+ * try to send new packets (ie reschedule this tx node), eg.
+ * because your peer is waiting for the unsent packets to reply
+ * to you but your waiting for its reply etc., you are not going
+ * to kick again, and everybody is waiting for the other to talk
+ * 1st... */
+ tx_queue->is_tx_pending = 1;
+ }
+
+ vlib_error_count (vm, node->node_index, counter, 1);
+ }
+ }
+
+ if (tf->shared_queue)
+ clib_spinlock_unlock (&tx_queue->lockp);
+
+ if (PREDICT_FALSE (frame_not_ready))
+ vlib_error_count (vm, node->node_index,
+ AF_PACKET_TX_ERROR_FRAME_NOT_READY, frame_not_ready);
+
+ if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num))
+ vlib_error_count (vm, node->node_index, AF_PACKET_TX_ERROR_TXRING_OVERRUN,
+ n_left);
+
+ vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
+ return frame->n_vectors;
+}
+
+static void
+af_packet_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
+ u32 node_index)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ af_packet_if_t *apif =
+ pool_elt_at_index (apm->interfaces, hw->dev_instance);
+
+ /* Shut off redirection */
+ if (node_index == ~0)
+ {
+ apif->per_interface_next_index = node_index;
+ return;
+ }
+
+ apif->per_interface_next_index =
+ vlib_node_add_next (vlib_get_main (), af_packet_input_node.index,
+ node_index);
+}
+
+static void
+af_packet_clear_hw_interface_counters (u32 instance)
+{
+ /* Nothing for now */
+}
+
+static clib_error_t *
+af_packet_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
+ u32 flags)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ af_packet_if_t *apif =
+ pool_elt_at_index (apm->interfaces, hw->dev_instance);
+ u32 hw_flags;
+
+ if (apif->host_if_index < 0)
+ return 0; /* no error */
+
+ apif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ if (apif->is_admin_up)
+ {
+ hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP;
+ vnet_netlink_set_link_state (apif->host_if_index, 1);
+ }
+ else
+ {
+ hw_flags = 0;
+ vnet_netlink_set_link_state (apif->host_if_index, 0);
+ }
+
+ vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
+
+ return 0; /* no error */
+}
+
+static clib_error_t *af_packet_set_mac_address_function
+ (struct vnet_hw_interface_t *hi, const u8 * old_address, const u8 * address)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_if_t *apif =
+ pool_elt_at_index (apm->interfaces, hi->dev_instance);
+ int rv, fd;
+ struct ifreq ifr;
+
+ if (apif->mode == AF_PACKET_IF_MODE_IP)
+ {
+ vlib_log_warn (apm->log_class, "af_packet_%s interface is in IP mode",
+ apif->host_if_name);
+ return clib_error_return (0,
+ " MAC update failed, interface is in IP mode");
+ }
+
+ fd = socket (AF_UNIX, SOCK_DGRAM, 0);
+ if (0 > fd)
+ {
+ vlib_log_warn (apm->log_class, "af_packet_%s could not open socket",
+ apif->host_if_name);
+ return 0;
+ }
+
+ /* if interface is a bridge ignore */
+ if (apif->host_if_index < 0)
+ goto error; /* no error */
+
+ /* use host_if_index in case host name has changed */
+ ifr.ifr_ifindex = apif->host_if_index;
+ if ((rv = ioctl (fd, SIOCGIFNAME, &ifr)) < 0)
+ {
+ vlib_log_warn
+ (apm->log_class,
+ "af_packet_%s ioctl could not retrieve eth name, error: %d",
+ apif->host_if_name, rv);
+ goto error;
+ }
+
+ clib_memcpy (ifr.ifr_hwaddr.sa_data, address, 6);
+ ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
+
+ if ((rv = ioctl (fd, SIOCSIFHWADDR, &ifr)) < 0)
+ {
+ vlib_log_warn (apm->log_class,
+ "af_packet_%s ioctl could not set mac, error: %d",
+ apif->host_if_name, rv);
+ goto error;
+ }
+
+error:
+
+ if (0 <= fd)
+ close (fd);
+
+ return 0; /* no error */
+}
+
+static clib_error_t *
+af_packet_interface_rx_mode_change (vnet_main_t *vnm, u32 hw_if_index, u32 qid,
+ vnet_hw_if_rx_mode mode)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ af_packet_if_t *apif;
+
+ apif = vec_elt_at_index (apm->interfaces, hw->dev_instance);
+
+ if (mode == VNET_HW_IF_RX_MODE_ADAPTIVE)
+ {
+ vlib_log_err (apm->log_class,
+ "af_packet_%s adaptive mode is not supported",
+ apif->host_if_name);
+ return clib_error_return (
+ 0, "af_packet_%s adaptive mode is not supported", apif->host_if_name);
+ }
+
+ af_packet_queue_t *rx_queue = vec_elt_at_index (apif->rx_queues, qid);
+
+ if (rx_queue->mode != mode)
+ {
+ rx_queue->mode = mode;
+
+ if (mode == VNET_HW_IF_RX_MODE_POLLING)
+ apm->polling_count++;
+ else if (mode == VNET_HW_IF_RX_MODE_INTERRUPT && apm->polling_count > 0)
+ apm->polling_count--;
+ }
+
+ return 0;
+}
+
+VNET_DEVICE_CLASS (af_packet_device_class) = {
+ .name = "af-packet",
+ .format_device_name = format_af_packet_device_name,
+ .format_device = format_af_packet_device,
+ .format_tx_trace = format_af_packet_tx_trace,
+ .tx_function_n_errors = AF_PACKET_TX_N_ERROR,
+ .tx_function_error_strings = af_packet_tx_func_error_strings,
+ .rx_redirect_to_node = af_packet_set_interface_next_node,
+ .clear_counters = af_packet_clear_hw_interface_counters,
+ .admin_up_down_function = af_packet_interface_admin_up_down,
+ .mac_addr_change_function = af_packet_set_mac_address_function,
+ .rx_mode_change_function = af_packet_interface_rx_mode_change,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/af_packet/dir.dox b/src/plugins/af_packet/dir.dox
index 78991c6d97f..78991c6d97f 100644
--- a/src/vnet/devices/af_packet/dir.dox
+++ b/src/plugins/af_packet/dir.dox
diff --git a/src/plugins/af_packet/node.c b/src/plugins/af_packet/node.c
new file mode 100644
index 00000000000..279f11c0183
--- /dev/null
+++ b/src/plugins/af_packet/node.c
@@ -0,0 +1,832 @@
+/*
+ *------------------------------------------------------------------
+ * af_packet.c - linux kernel packet interface
+ *
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <linux/if_packet.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/feature/feature.h>
+#include <vnet/ethernet/packet.h>
+
+#include <af_packet/af_packet.h>
+#include <vnet/devices/virtio/virtio_std.h>
+
+#define foreach_af_packet_input_error \
+ _ (PARTIAL_PKT, "partial packet") \
+ _ (TIMEDOUT_BLK, "timed out block") \
+ _ (TOTAL_RECV_BLK, "total received block")
+typedef enum
+{
+#define _(f,s) AF_PACKET_INPUT_ERROR_##f,
+ foreach_af_packet_input_error
+#undef _
+ AF_PACKET_INPUT_N_ERROR,
+} af_packet_input_error_t;
+
+static char *af_packet_input_error_strings[] = {
+#define _(n,s) s,
+ foreach_af_packet_input_error
+#undef _
+};
+
+typedef struct
+{
+ u32 next_index;
+ u32 hw_if_index;
+ u16 queue_id;
+ int block;
+ u32 pkt_num;
+ void *block_start;
+ block_desc_t bd;
+ union
+ {
+ tpacket3_hdr_t tph3;
+ tpacket2_hdr_t tph2;
+ };
+ vnet_virtio_net_hdr_t vnet_hdr;
+ u8 is_v3;
+} af_packet_input_trace_t;
+
+static u8 *
+format_af_packet_input_trace (u8 * s, va_list * args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ af_packet_input_trace_t *t = va_arg (*args, af_packet_input_trace_t *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "af_packet: hw_if_index %d rx-queue %u next-index %d",
+ t->hw_if_index, t->queue_id, t->next_index);
+
+ if (t->is_v3)
+ {
+ s = format (
+ s, "\n%Ublock %u:\n%Uaddress %p version %u seq_num %lu pkt_num %u",
+ format_white_space, indent + 2, t->block, format_white_space,
+ indent + 4, t->block_start, t->bd.version, t->bd.hdr.bh1.seq_num,
+ t->pkt_num);
+ s = format (
+ s,
+ "\n%Utpacket3_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
+ "\n%Usec 0x%x nsec 0x%x vlan %U"
+#ifdef TP_STATUS_VLAN_TPID_VALID
+ " vlan_tpid %u"
+#endif
+ ,
+ format_white_space, indent + 2, format_white_space, indent + 4,
+ t->tph3.tp_status, t->tph3.tp_len, t->tph3.tp_snaplen, t->tph3.tp_mac,
+ t->tph3.tp_net, format_white_space, indent + 4, t->tph3.tp_sec,
+ t->tph3.tp_nsec, format_ethernet_vlan_tci, t->tph3.hv1.tp_vlan_tci
+#ifdef TP_STATUS_VLAN_TPID_VALID
+ ,
+ t->tph3.hv1.tp_vlan_tpid
+#endif
+ );
+ }
+ else
+ {
+ s = format (
+ s,
+ "\n%Utpacket2_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
+ "\n%Usec 0x%x nsec 0x%x vlan %U"
+#ifdef TP_STATUS_VLAN_TPID_VALID
+ " vlan_tpid %u"
+#endif
+ ,
+ format_white_space, indent + 2, format_white_space, indent + 4,
+ t->tph2.tp_status, t->tph2.tp_len, t->tph2.tp_snaplen, t->tph2.tp_mac,
+ t->tph2.tp_net, format_white_space, indent + 4, t->tph2.tp_sec,
+ t->tph2.tp_nsec, format_ethernet_vlan_tci, t->tph2.tp_vlan_tci
+#ifdef TP_STATUS_VLAN_TPID_VALID
+ ,
+ t->tph2.tp_vlan_tpid
+#endif
+ );
+ }
+
+ s = format (s,
+ "\n%Uvnet-hdr:\n%Uflags 0x%02x gso_type 0x%02x hdr_len %u"
+ "\n%Ugso_size %u csum_start %u csum_offset %u",
+ format_white_space, indent + 2, format_white_space, indent + 4,
+ t->vnet_hdr.flags, t->vnet_hdr.gso_type, t->vnet_hdr.hdr_len,
+ format_white_space, indent + 4, t->vnet_hdr.gso_size,
+ t->vnet_hdr.csum_start, t->vnet_hdr.csum_offset);
+ return s;
+}
+
+always_inline void
+buffer_add_to_chain (vlib_buffer_t *b, vlib_buffer_t *first_b,
+ vlib_buffer_t *prev_b, u32 bi)
+{
+ /* update first buffer */
+ first_b->total_length_not_including_first_buffer += b->current_length;
+
+ /* update previous buffer */
+ prev_b->next_buffer = bi;
+ prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+
+ /* update current buffer */
+ b->next_buffer = ~0;
+}
+
+static_always_inline void
+fill_gso_offload (vlib_buffer_t *b, u32 gso_size, u8 l4_hdr_sz)
+{
+ b->flags |= VNET_BUFFER_F_GSO;
+ vnet_buffer2 (b)->gso_size = gso_size;
+ vnet_buffer2 (b)->gso_l4_hdr_sz = l4_hdr_sz;
+}
+
+static_always_inline void
+fill_cksum_offload (vlib_buffer_t *b, u8 *l4_hdr_sz, u8 is_ip)
+{
+ vnet_buffer_oflags_t oflags = 0;
+ u16 l2hdr_sz = 0;
+ u16 ethertype = 0;
+ u8 l4_proto = 0;
+
+ if (is_ip)
+ {
+ switch (b->data[0] & 0xf0)
+ {
+ case 0x40:
+ ethertype = ETHERNET_TYPE_IP4;
+ break;
+ case 0x60:
+ ethertype = ETHERNET_TYPE_IP6;
+ break;
+ }
+ }
+ else
+ {
+ ethernet_header_t *eth = (ethernet_header_t *) b->data;
+ ethertype = clib_net_to_host_u16 (eth->type);
+ l2hdr_sz = sizeof (ethernet_header_t);
+ if (ethernet_frame_is_tagged (ethertype))
+ {
+ ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (eth + 1);
+
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ l2hdr_sz += sizeof (*vlan);
+ if (ethertype == ETHERNET_TYPE_VLAN)
+ {
+ vlan++;
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ l2hdr_sz += sizeof (*vlan);
+ }
+ }
+ }
+
+ vnet_buffer (b)->l2_hdr_offset = 0;
+ vnet_buffer (b)->l3_hdr_offset = l2hdr_sz;
+
+ if (ethertype == ETHERNET_TYPE_IP4)
+ {
+ ip4_header_t *ip4 = (ip4_header_t *) (b->data + l2hdr_sz);
+ vnet_buffer (b)->l4_hdr_offset = l2hdr_sz + ip4_header_bytes (ip4);
+ b->flags |= (VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
+
+ l4_proto = ip4->protocol;
+ }
+ else if (ethertype == ETHERNET_TYPE_IP6)
+ {
+ ip6_header_t *ip6 = (ip6_header_t *) (b->data + l2hdr_sz);
+ b->flags |= (VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
+ u16 ip6_hdr_len = sizeof (ip6_header_t);
+
+ if (ip6_ext_hdr (ip6->protocol))
+ {
+ ip6_ext_header_t *p = (void *) (ip6 + 1);
+ ip6_hdr_len += ip6_ext_header_len (p);
+ while (ip6_ext_hdr (p->next_hdr))
+ {
+ ip6_hdr_len += ip6_ext_header_len (p);
+ p = ip6_ext_next_header (p);
+ }
+ l4_proto = p->next_hdr;
+ }
+ else
+ l4_proto = ip6->protocol;
+ vnet_buffer (b)->l4_hdr_offset = l2hdr_sz + ip6_hdr_len;
+ }
+
+ if (l4_proto == IP_PROTOCOL_TCP)
+ {
+ oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
+ tcp_header_t *tcp =
+ (tcp_header_t *) (b->data + vnet_buffer (b)->l4_hdr_offset);
+ *l4_hdr_sz = tcp_header_bytes (tcp);
+ }
+ else if (l4_proto == IP_PROTOCOL_UDP)
+ {
+ oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
+ *l4_hdr_sz = sizeof (udp_header_t);
+ }
+
+ if (oflags)
+ vnet_buffer_offload_flags_set (b, oflags);
+}
+
+always_inline uword
+af_packet_v3_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, af_packet_if_t *apif,
+ u16 queue_id, u8 is_cksum_gso_enabled)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_queue_t *rx_queue = vec_elt_at_index (apif->rx_queues, queue_id);
+ tpacket3_hdr_t *tph;
+ u32 next_index;
+ u32 n_free_bufs;
+ u32 n_rx_packets = 0;
+ u32 n_rx_bytes = 0;
+ u32 timedout_blk = 0;
+ u32 total = 0;
+ u32 *to_next = 0;
+ u32 block = rx_queue->next_rx_block;
+ u32 block_nr = rx_queue->rx_req->req3.tp_block_nr;
+ u8 *block_start = 0;
+ uword n_trace = vlib_get_trace_count (vm, node);
+ u32 thread_index = vm->thread_index;
+ u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm);
+ u32 min_bufs = rx_queue->rx_req->req3.tp_frame_size / n_buffer_bytes;
+ u32 num_pkts = 0;
+ u32 rx_frame_offset = 0;
+ block_desc_t *bd = 0;
+ u32 sw_if_index = apif->sw_if_index;
+ u8 is_ip = (apif->mode == AF_PACKET_IF_MODE_IP);
+
+ if (is_ip)
+ next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
+ else
+ next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+
+ if ((((block_desc_t *) (block_start = rx_queue->rx_ring[block]))
+ ->hdr.bh1.block_status &
+ TP_STATUS_USER) != 0)
+ {
+ u32 n_required = 0;
+ bd = (block_desc_t *) block_start;
+
+ if (PREDICT_FALSE (rx_queue->is_rx_pending))
+ {
+ num_pkts = rx_queue->num_rx_pkts;
+ rx_frame_offset = rx_queue->rx_frame_offset;
+ rx_queue->is_rx_pending = 0;
+ }
+ else
+ {
+ num_pkts = bd->hdr.bh1.num_pkts;
+ rx_frame_offset = bd->hdr.bh1.offset_to_first_pkt;
+ total++;
+
+ if (TP_STATUS_BLK_TMO & bd->hdr.bh1.block_status)
+ timedout_blk++;
+ }
+
+ n_required = clib_max (num_pkts, VLIB_FRAME_SIZE);
+ n_free_bufs = vec_len (apm->rx_buffers[thread_index]);
+ if (PREDICT_FALSE (n_free_bufs < n_required))
+ {
+ vec_validate (apm->rx_buffers[thread_index],
+ n_required + n_free_bufs - 1);
+ n_free_bufs += vlib_buffer_alloc (
+ vm, &apm->rx_buffers[thread_index][n_free_bufs], n_required);
+ vec_set_len (apm->rx_buffers[thread_index], n_free_bufs);
+ }
+
+ while (num_pkts && (n_free_bufs >= min_bufs))
+ {
+ u32 next0 = next_index;
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (num_pkts && n_left_to_next && (n_free_bufs >= min_bufs))
+ {
+ tph = (tpacket3_hdr_t *) (block_start + rx_frame_offset);
+
+ if (num_pkts > 1)
+ CLIB_PREFETCH (block_start + rx_frame_offset +
+ tph->tp_next_offset,
+ 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+
+ vlib_buffer_t *b0 = 0, *first_b0 = 0, *prev_b0 = 0;
+ vnet_virtio_net_hdr_t *vnet_hdr = 0;
+ u32 data_len = tph->tp_snaplen;
+ u32 offset = 0;
+ u32 bi0 = ~0, first_bi0 = ~0;
+ u8 l4_hdr_sz = 0;
+
+ if (is_cksum_gso_enabled)
+ vnet_hdr =
+ (vnet_virtio_net_hdr_t *) ((u8 *) tph + tph->tp_mac -
+ sizeof (vnet_virtio_net_hdr_t));
+
+ // save current state and return
+ if (PREDICT_FALSE (((data_len / n_buffer_bytes) + 1) >
+ vec_len (apm->rx_buffers[thread_index])))
+ {
+ rx_queue->rx_frame_offset = rx_frame_offset;
+ rx_queue->num_rx_pkts = num_pkts;
+ rx_queue->is_rx_pending = 1;
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ goto done;
+ }
+
+ while (data_len)
+ {
+ /* grab free buffer */
+ u32 last_empty_buffer =
+ vec_len (apm->rx_buffers[thread_index]) - 1;
+ bi0 = apm->rx_buffers[thread_index][last_empty_buffer];
+ vec_set_len (apm->rx_buffers[thread_index],
+ last_empty_buffer);
+ n_free_bufs--;
+
+ /* copy data */
+ u32 bytes_to_copy =
+ data_len > n_buffer_bytes ? n_buffer_bytes : data_len;
+ u32 vlan_len = 0;
+ u32 bytes_copied = 0;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b0->current_data = 0;
+
+ /* Kernel removes VLAN headers, so reconstruct VLAN */
+ if (PREDICT_FALSE (tph->tp_status & TP_STATUS_VLAN_VALID))
+ {
+ if (PREDICT_TRUE (offset == 0))
+ {
+ clib_memcpy_fast (vlib_buffer_get_current (b0),
+ (u8 *) tph + tph->tp_mac,
+ sizeof (ethernet_header_t));
+ ethernet_header_t *eth =
+ vlib_buffer_get_current (b0);
+ ethernet_vlan_header_t *vlan =
+ (ethernet_vlan_header_t *) (eth + 1);
+ vlan->priority_cfi_and_id =
+ clib_host_to_net_u16 (tph->hv1.tp_vlan_tci);
+ vlan->type = eth->type;
+ eth->type =
+ clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
+ vlan_len = sizeof (ethernet_vlan_header_t);
+ bytes_copied = sizeof (ethernet_header_t);
+ }
+ }
+ clib_memcpy_fast (((u8 *) vlib_buffer_get_current (b0)) +
+ bytes_copied + vlan_len,
+ (u8 *) tph + tph->tp_mac + offset +
+ bytes_copied,
+ (bytes_to_copy - bytes_copied));
+
+ /* fill buffer header */
+ b0->current_length = bytes_to_copy + vlan_len;
+
+ if (offset == 0)
+ {
+ b0->total_length_not_including_first_buffer = 0;
+ b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~0;
+ first_b0 = b0;
+ first_bi0 = bi0;
+ if (is_cksum_gso_enabled)
+ {
+ if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
+ fill_cksum_offload (first_b0, &l4_hdr_sz, is_ip);
+ if (vnet_hdr->gso_type & (VIRTIO_NET_HDR_GSO_TCPV4 |
+ VIRTIO_NET_HDR_GSO_TCPV6))
+ fill_gso_offload (first_b0, vnet_hdr->gso_size,
+ l4_hdr_sz);
+ }
+ }
+ else
+ buffer_add_to_chain (b0, first_b0, prev_b0, bi0);
+
+ prev_b0 = b0;
+ offset += bytes_to_copy;
+ data_len -= bytes_to_copy;
+ }
+ n_rx_packets++;
+ n_rx_bytes += tph->tp_snaplen;
+ to_next[0] = first_bi0;
+ to_next += 1;
+ n_left_to_next--;
+
+ /* drop partial packets */
+ if (PREDICT_FALSE (tph->tp_len != tph->tp_snaplen))
+ {
+ next0 = VNET_DEVICE_INPUT_NEXT_DROP;
+ first_b0->error =
+ node->errors[AF_PACKET_INPUT_ERROR_PARTIAL_PKT];
+ }
+ else
+ {
+ if (PREDICT_FALSE (apif->mode == AF_PACKET_IF_MODE_IP))
+ {
+ switch (first_b0->data[0] & 0xf0)
+ {
+ case 0x40:
+ next0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
+ break;
+ case 0x60:
+ next0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
+ break;
+ default:
+ next0 = VNET_DEVICE_INPUT_NEXT_DROP;
+ break;
+ }
+ if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
+ next0 = apif->per_interface_next_index;
+ }
+ else
+ {
+ next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
+ next0 = apif->per_interface_next_index;
+ /* redirect if feature path enabled */
+ vnet_feature_start_device_input (sw_if_index, &next0,
+ first_b0);
+ }
+ }
+
+ /* trace */
+ if (PREDICT_FALSE (n_trace > 0 &&
+ vlib_trace_buffer (vm, node, next0, first_b0,
+ /* follow_chain */ 0)))
+ {
+ af_packet_input_trace_t *tr;
+ vlib_set_trace_count (vm, node, --n_trace);
+ tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr));
+ tr->is_v3 = 1;
+ tr->next_index = next0;
+ tr->hw_if_index = apif->hw_if_index;
+ tr->queue_id = queue_id;
+ tr->block = block;
+ tr->block_start = bd;
+ tr->pkt_num = bd->hdr.bh1.num_pkts - num_pkts;
+ clib_memcpy_fast (&tr->bd, bd, sizeof (block_desc_t));
+ clib_memcpy_fast (&tr->tph3, tph, sizeof (tpacket3_hdr_t));
+ if (is_cksum_gso_enabled)
+ clib_memcpy_fast (&tr->vnet_hdr, vnet_hdr,
+ sizeof (vnet_virtio_net_hdr_t));
+ else
+ clib_memset_u8 (&tr->vnet_hdr, 0,
+ sizeof (vnet_virtio_net_hdr_t));
+ }
+
+ /* enque and take next packet */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, first_bi0,
+ next0);
+
+ /* next packet */
+ num_pkts--;
+ rx_frame_offset += tph->tp_next_offset;
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ if (PREDICT_TRUE (num_pkts == 0))
+ {
+ bd->hdr.bh1.block_status = TP_STATUS_KERNEL;
+ block = (block + 1) % block_nr;
+ }
+ else
+ {
+ rx_queue->rx_frame_offset = rx_frame_offset;
+ rx_queue->num_rx_pkts = num_pkts;
+ rx_queue->is_rx_pending = 1;
+ }
+ }
+
+ rx_queue->next_rx_block = block;
+
+done:
+
+ if (apm->polling_count == 0)
+ {
+ if ((((block_desc_t *) (block_start = rx_queue->rx_ring[block]))
+ ->hdr.bh1.block_status &
+ TP_STATUS_USER) != 0)
+ vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_POLLING);
+ else
+ vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_INTERRUPT);
+ }
+
+ vlib_error_count (vm, node->node_index, AF_PACKET_INPUT_ERROR_TOTAL_RECV_BLK,
+ total);
+ vlib_error_count (vm, node->node_index, AF_PACKET_INPUT_ERROR_TIMEDOUT_BLK,
+ timedout_blk);
+
+ vlib_increment_combined_counter
+ (vnet_get_main ()->interface_main.combined_sw_if_counters
+ + VNET_INTERFACE_COUNTER_RX,
+ vlib_get_thread_index (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
+
+ vnet_device_increment_rx_packets (thread_index, n_rx_packets);
+ return n_rx_packets;
+}
+
+always_inline uword
+af_packet_v2_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, af_packet_if_t *apif,
+ u16 queue_id, u8 is_cksum_gso_enabled)
+{
+ af_packet_main_t *apm = &af_packet_main;
+ af_packet_queue_t *rx_queue = vec_elt_at_index (apif->rx_queues, queue_id);
+ tpacket2_hdr_t *tph;
+ u32 next_index;
+ u32 block = 0;
+ u32 rx_frame;
+ u32 n_free_bufs;
+ u32 n_rx_packets = 0;
+ u32 n_rx_bytes = 0;
+ u32 *to_next = 0;
+ u32 frame_size = rx_queue->rx_req->req.tp_frame_size;
+ u32 frame_num = rx_queue->rx_req->req.tp_frame_nr;
+ u8 *block_start = rx_queue->rx_ring[block];
+ uword n_trace = vlib_get_trace_count (vm, node);
+ u32 thread_index = vm->thread_index;
+ u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm);
+ u32 min_bufs = rx_queue->rx_req->req.tp_frame_size / n_buffer_bytes;
+ u32 sw_if_index = apif->sw_if_index;
+ u8 is_ip = (apif->mode == AF_PACKET_IF_MODE_IP);
+
+ if (is_ip)
+ next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
+ else
+ next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+
+ n_free_bufs = vec_len (apm->rx_buffers[thread_index]);
+ if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE))
+ {
+ vec_validate (apm->rx_buffers[thread_index],
+ VLIB_FRAME_SIZE + n_free_bufs - 1);
+ n_free_bufs += vlib_buffer_alloc (
+ vm, &apm->rx_buffers[thread_index][n_free_bufs], VLIB_FRAME_SIZE);
+ vec_set_len (apm->rx_buffers[thread_index], n_free_bufs);
+ }
+
+ rx_frame = rx_queue->next_rx_frame;
+ tph = (tpacket2_hdr_t *) (block_start + rx_frame * frame_size);
+ while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs))
+ {
+ vlib_buffer_t *b0 = 0, *first_b0 = 0, *prev_b0 = 0;
+ u32 next0 = next_index;
+
+ u32 n_left_to_next;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs) &&
+ n_left_to_next)
+ {
+ vnet_virtio_net_hdr_t *vnet_hdr = 0;
+ u32 data_len = tph->tp_snaplen;
+ u32 offset = 0;
+ u32 bi0 = 0, first_bi0 = 0;
+ u8 l4_hdr_sz = 0;
+
+ if (is_cksum_gso_enabled)
+ vnet_hdr =
+ (vnet_virtio_net_hdr_t *) ((u8 *) tph + tph->tp_mac -
+ sizeof (vnet_virtio_net_hdr_t));
+ while (data_len)
+ {
+ /* grab free buffer */
+ u32 last_empty_buffer =
+ vec_len (apm->rx_buffers[thread_index]) - 1;
+ bi0 = apm->rx_buffers[thread_index][last_empty_buffer];
+ b0 = vlib_get_buffer (vm, bi0);
+ vec_set_len (apm->rx_buffers[thread_index], last_empty_buffer);
+ n_free_bufs--;
+
+ /* copy data */
+ u32 bytes_to_copy =
+ data_len > n_buffer_bytes ? n_buffer_bytes : data_len;
+ u32 vlan_len = 0;
+ u32 bytes_copied = 0;
+ b0->current_data = 0;
+ /* Kernel removes VLAN headers, so reconstruct VLAN */
+ if (PREDICT_FALSE (tph->tp_status & TP_STATUS_VLAN_VALID))
+ {
+ if (PREDICT_TRUE (offset == 0))
+ {
+ clib_memcpy_fast (vlib_buffer_get_current (b0),
+ (u8 *) tph + tph->tp_mac,
+ sizeof (ethernet_header_t));
+ ethernet_header_t *eth = vlib_buffer_get_current (b0);
+ ethernet_vlan_header_t *vlan =
+ (ethernet_vlan_header_t *) (eth + 1);
+ vlan->priority_cfi_and_id =
+ clib_host_to_net_u16 (tph->tp_vlan_tci);
+ vlan->type = eth->type;
+ eth->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
+ vlan_len = sizeof (ethernet_vlan_header_t);
+ bytes_copied = sizeof (ethernet_header_t);
+ }
+ }
+ clib_memcpy_fast (((u8 *) vlib_buffer_get_current (b0)) +
+ bytes_copied + vlan_len,
+ (u8 *) tph + tph->tp_mac + offset +
+ bytes_copied,
+ (bytes_to_copy - bytes_copied));
+
+ /* fill buffer header */
+ b0->current_length = bytes_to_copy + vlan_len;
+
+ if (offset == 0)
+ {
+ b0->total_length_not_including_first_buffer = 0;
+ b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~0;
+ first_bi0 = bi0;
+ first_b0 = vlib_get_buffer (vm, first_bi0);
+
+ if (is_cksum_gso_enabled)
+ {
+ if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
+ fill_cksum_offload (first_b0, &l4_hdr_sz, is_ip);
+ if (vnet_hdr->gso_type & (VIRTIO_NET_HDR_GSO_TCPV4 |
+ VIRTIO_NET_HDR_GSO_TCPV6))
+ fill_gso_offload (first_b0, vnet_hdr->gso_size,
+ l4_hdr_sz);
+ }
+ }
+ else
+ buffer_add_to_chain (b0, first_b0, prev_b0, bi0);
+
+ prev_b0 = b0;
+ offset += bytes_to_copy;
+ data_len -= bytes_to_copy;
+ }
+ n_rx_packets++;
+ n_rx_bytes += tph->tp_snaplen;
+ to_next[0] = first_bi0;
+ to_next += 1;
+ n_left_to_next--;
+
+ /* drop partial packets */
+ if (PREDICT_FALSE (tph->tp_len != tph->tp_snaplen))
+ {
+ next0 = VNET_DEVICE_INPUT_NEXT_DROP;
+ first_b0->error =
+ node->errors[AF_PACKET_INPUT_ERROR_PARTIAL_PKT];
+ }
+ else
+ {
+ if (PREDICT_FALSE (is_ip))
+ {
+ switch (first_b0->data[0] & 0xf0)
+ {
+ case 0x40:
+ next0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
+ break;
+ case 0x60:
+ next0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
+ break;
+ default:
+ next0 = VNET_DEVICE_INPUT_NEXT_DROP;
+ break;
+ }
+ if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
+ next0 = apif->per_interface_next_index;
+ }
+ else
+ {
+ next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
+ next0 = apif->per_interface_next_index;
+ /* redirect if feature path enabled */
+ vnet_feature_start_device_input (sw_if_index, &next0,
+ first_b0);
+ }
+ }
+
+ /* trace */
+ if (PREDICT_FALSE (n_trace > 0 &&
+ vlib_trace_buffer (vm, node, next0, first_b0,
+ /* follow_chain */ 0)))
+ {
+ af_packet_input_trace_t *tr;
+ vlib_set_trace_count (vm, node, --n_trace);
+ tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr));
+ tr->is_v3 = 0;
+ tr->next_index = next0;
+ tr->hw_if_index = apif->hw_if_index;
+ tr->queue_id = queue_id;
+ clib_memcpy_fast (&tr->tph2, tph, sizeof (struct tpacket2_hdr));
+ if (is_cksum_gso_enabled)
+ clib_memcpy_fast (&tr->vnet_hdr, vnet_hdr,
+ sizeof (vnet_virtio_net_hdr_t));
+ else
+ clib_memset_u8 (&tr->vnet_hdr, 0,
+ sizeof (vnet_virtio_net_hdr_t));
+ }
+
+ /* enque and take next packet */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, first_bi0, next0);
+
+ /* next packet */
+ tph->tp_status = TP_STATUS_KERNEL;
+ rx_frame = (rx_frame + 1) % frame_num;
+ tph = (struct tpacket2_hdr *) (block_start + rx_frame * frame_size);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ rx_queue->next_rx_frame = rx_frame;
+
+ vlib_increment_combined_counter (
+ vnet_get_main ()->interface_main.combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_RX,
+ vlib_get_thread_index (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
+
+ vnet_device_increment_rx_packets (thread_index, n_rx_packets);
+ return n_rx_packets;
+}
+
+always_inline uword
+af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, af_packet_if_t *apif,
+ u16 queue_id, u8 is_cksum_gso_enabled)
+
+{
+ if (apif->version == TPACKET_V3)
+ return af_packet_v3_device_input_fn (vm, node, frame, apif, queue_id,
+ is_cksum_gso_enabled);
+ else
+ return af_packet_v2_device_input_fn (vm, node, frame, apif, queue_id,
+ is_cksum_gso_enabled);
+}
+
+VLIB_NODE_FN (af_packet_input_node) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_rx_packets = 0;
+ af_packet_main_t *apm = &af_packet_main;
+ vnet_hw_if_rxq_poll_vector_t *pv;
+ pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
+ for (int i = 0; i < vec_len (pv); i++)
+ {
+ af_packet_if_t *apif;
+ apif = vec_elt_at_index (apm->interfaces, pv[i].dev_instance);
+ if (apif->is_admin_up)
+ {
+ if (apif->is_cksum_gso_enabled)
+ n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif,
+ pv[i].queue_id, 1);
+ else
+ n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif,
+ pv[i].queue_id, 0);
+ }
+ }
+ return n_rx_packets;
+}
+
+VLIB_REGISTER_NODE (af_packet_input_node) = {
+ .name = "af-packet-input",
+ .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
+ .sibling_of = "device-input",
+ .format_trace = format_af_packet_input_trace,
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+ .n_errors = AF_PACKET_INPUT_N_ERROR,
+ .error_strings = af_packet_input_error_strings,
+};
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/af_packet/plugin.c b/src/plugins/af_packet/plugin.c
new file mode 100644
index 00000000000..ddad52404c7
--- /dev/null
+++ b/src/plugins/af_packet/plugin.c
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "af-packet",
+};
diff --git a/src/plugins/af_xdp/CMakeLists.txt b/src/plugins/af_xdp/CMakeLists.txt
index cbe96aa59dd..fd7ee4e835b 100644
--- a/src/plugins/af_xdp/CMakeLists.txt
+++ b/src/plugins/af_xdp/CMakeLists.txt
@@ -11,36 +11,37 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-vpp_find_path(BPF_INCLUDE_DIR NAMES bpf/xsk.h)
-if (NOT BPF_INCLUDE_DIR)
- message(WARNING "libbpf headers not found - af_xdp plugin disabled")
+vpp_find_path(XDP_INCLUDE_DIR NAMES xdp/xsk.h)
+if (NOT XDP_INCLUDE_DIR)
+ message(WARNING "libxdp headers not found - af_xdp plugin disabled")
return()
endif()
set_property(GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS TRUE)
+vpp_plugin_find_library(af_xdp XDP_LIB libxdp.a)
vpp_plugin_find_library(af_xdp BPF_LIB libbpf.a)
-vpp_plugin_find_library(af_xdp BPF_ELF_LIB elf)
-vpp_plugin_find_library(af_xdp BPF_Z_LIB z)
-if (NOT BPF_LIB OR NOT BPF_ELF_LIB OR NOT BPF_Z_LIB)
+vpp_plugin_find_library(af_xdp ELF_LIB elf)
+vpp_plugin_find_library(af_xdp Z_LIB z)
+if (NOT XDP_LIB OR NOT BPF_LIB OR NOT ELF_LIB OR NOT Z_LIB)
message(WARNING "af_xdp plugin - missing libraries - af_xdp plugin disabled")
return()
endif()
set(CMAKE_REQUIRED_FLAGS "-fPIC")
-set(CMAKE_REQUIRED_INCLUDES "${BPF_INCLUDE_DIR}")
-set(CMAKE_REQUIRED_LIBRARIES "${BPF_LIB}" "${BPF_ELF_LIB}" "${BPF_Z_LIB}")
+set(CMAKE_REQUIRED_INCLUDES "${XDP_INCLUDE_DIR}")
+set(CMAKE_REQUIRED_LIBRARIES "${XDP_LIB}" "${BPF_LIB}" "${ELF_LIB}" "${Z_LIB}")
CHECK_C_SOURCE_COMPILES("
-#include <bpf/xsk.h>
+#include <xdp/xsk.h>
int main(void)
{
return xsk_socket__create (0, 0, 0, 0, 0, 0, 0);
-}" BPF_COMPILES_CHECK)
-if (NOT BPF_COMPILES_CHECK)
- message(WARNING "af_xdp plugins - no working libbpf found - af_xdp plugin disabled")
+}" XDP_COMPILES_CHECK)
+if (NOT XDP_COMPILES_CHECK)
+message(WARNING "af_xdp plugins - no working libxdp found - af_xdp plugin disabled")
return()
endif()
-include_directories(${BPF_INCLUDE_DIR})
+include_directories(${XDP_INCLUDE_DIR})
add_vpp_plugin(af_xdp
SOURCES
@@ -65,7 +66,10 @@ add_vpp_plugin(af_xdp
test_api.c
LINK_LIBRARIES
+ ${XDP_LIB}
${BPF_LIB}
- ${BPF_ELF_LIB}
- ${BPF_Z_LIB}
+ ${ELF_LIB}
+ ${Z_LIB}
+
+ SUPPORTED_OS_LIST Linux
)
diff --git a/src/plugins/af_xdp/af_xdp.api b/src/plugins/af_xdp/af_xdp.api
index c6716123703..4c2908e2037 100644
--- a/src/plugins/af_xdp/af_xdp.api
+++ b/src/plugins/af_xdp/af_xdp.api
@@ -15,7 +15,7 @@
*------------------------------------------------------------------
*/
-option version = "0.2.0";
+option version = "1.0.0";
import "vnet/interface_types.api";
enum af_xdp_mode
@@ -57,7 +57,39 @@ define af_xdp_create
vl_api_af_xdp_flag_t flags [default=0];
string prog[256];
option vat_help = "<host-if linux-ifname> [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues <num|all>] [prog pathname] [zero-copy|no-zero-copy] [no-syscall-lock]";
- option status="in_progress";
+ option deprecated;
+};
+
+/** \brief
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param host_if - Linux netdev interface name
+ @param name - new af_xdp interface name (optional)
+ @param rxq_num - number of receive queues. 65535 can be used as special value to request all available queues (optional)
+ @param rxq_size - receive queue size (optional)
+ @param txq_size - transmit queue size (optional)
+ @param mode - operation mode (optional)
+ @param flags - flags (optional)
+ @param prog - eBPF program path (optional)
+ @param namespace - netns of nic (optional)
+*/
+
+define af_xdp_create_v2
+{
+ u32 client_index;
+ u32 context;
+
+ string host_if[64];
+ string name[64];
+ u16 rxq_num [default=1];
+ u16 rxq_size [default=0];
+ u16 txq_size [default=0];
+ vl_api_af_xdp_mode_t mode [default=0];
+ vl_api_af_xdp_flag_t flags [default=0];
+ string prog[256];
+ string namespace[64];
+ option vat_help = "<host-if linux-ifname> [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues <num|all>] [prog pathname] [netns ns] [zero-copy|no-zero-copy] [no-syscall-lock]";
+ option deprecated;
};
/** \brief
@@ -71,7 +103,21 @@ define af_xdp_create_reply
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
- option status="in_progress";
+ option deprecated;
+};
+
+/** \brief
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+ @param sw_if_index - software index for the new af_xdp interface
+*/
+
+define af_xdp_create_v2_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+ option deprecated;
};
/** \brief
@@ -80,6 +126,50 @@ define af_xdp_create_reply
@param sw_if_index - interface index
*/
+/** \brief
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param host_if - Linux netdev interface name
+ @param name - new af_xdp interface name (optional)
+ @param rxq_num - number of receive queues. 65535 can be used as special value to request all available queues (optional)
+ @param rxq_size - receive queue size (optional)
+ @param txq_size - transmit queue size (optional)
+ @param mode - operation mode (optional)
+ @param flags - flags (optional)
+ @param prog - eBPF program path (optional)
+ @param netns - netns of nic (optional)
+*/
+
+autoendian define af_xdp_create_v3
+{
+ u32 client_index;
+ u32 context;
+
+ string host_if[64];
+ string name[64];
+ u16 rxq_num [default=1];
+ u16 rxq_size [default=0];
+ u16 txq_size [default=0];
+ vl_api_af_xdp_mode_t mode [default=0];
+ vl_api_af_xdp_flag_t flags [default=0];
+ string prog[256];
+ string netns[64];
+ option vat_help = "<host-if linux-ifname> [name ifname] [rx-queue-size size] [tx-queue-size size] [num-rx-queues <num|all>] [prog pathname] [netns ns] [zero-copy|no-zero-copy] [no-syscall-lock]";
+};
+
+/** \brief
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+ @param sw_if_index - software index for the new af_xdp interface
+*/
+
+autoendian define af_xdp_create_v3_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
autoreply define af_xdp_delete
{
u32 client_index;
@@ -87,7 +177,6 @@ autoreply define af_xdp_delete
vl_api_interface_index_t sw_if_index;
option vat_help = "<sw_if_index index>";
- option status="in_progress";
};
/*
diff --git a/src/plugins/af_xdp/af_xdp.h b/src/plugins/af_xdp/af_xdp.h
index 825a3fb29fd..cf364fc86a8 100644
--- a/src/plugins/af_xdp/af_xdp.h
+++ b/src/plugins/af_xdp/af_xdp.h
@@ -20,7 +20,7 @@
#include <vlib/log.h>
#include <vnet/interface.h>
-#include <bpf/xsk.h>
+#include <xdp/xsk.h>
#define AF_XDP_NUM_RX_QUEUES_ALL ((u16)-1)
@@ -86,6 +86,10 @@ typedef struct
struct xsk_ring_prod tx;
struct xsk_ring_cons cq;
int xsk_fd;
+
+ /* fields below are accessed in control-plane only (cold) */
+
+ u32 queue_index;
} af_xdp_txq_t;
typedef struct
@@ -113,6 +117,8 @@ typedef struct
u8 rxq_num;
+ char *netns;
+
struct xsk_umem **umem;
struct xsk_socket **xsk;
@@ -149,6 +155,7 @@ typedef struct
char *linux_ifname;
char *name;
char *prog;
+ char *netns;
af_xdp_mode_t mode;
af_xdp_create_flag_t flags;
u32 rxq_size;
diff --git a/src/plugins/af_xdp/af_xdp_doc.md b/src/plugins/af_xdp/af_xdp_doc.md
deleted file mode 100644
index f5859dbb901..00000000000
--- a/src/plugins/af_xdp/af_xdp_doc.md
+++ /dev/null
@@ -1,129 +0,0 @@
-# AF_XDP Ethernet driver {#af_xdp_doc}
-
-This driver relies on Linux AF_XDP socket to rx/tx Ethernet packets.
-
-## Maturity level
-Under development: it should work, but has not been thoroughly tested.
-
-## Features
- - copy and zero-copy mode
- - multiqueue
- - API
- - custom eBPF program
- - polling, interrupt and adaptive mode
-
-## Known limitations
-
-### MTU
-Because of AF_XDP restrictions, the MTU is limited to below PAGE_SIZE
-(4096-bytes on most systems) minus 256-bytes, and they are additional
-limitations depending upon specific Linux device drivers.
-As a rule of thumb, a MTU of 3000-bytes or less should be safe.
-
-### Number of buffers
-Furthermore, upon UMEM creation, the kernel allocates a
-physically-contiguous structure, whose size is proportional to the number
-of 4KB pages contained in the UMEM. That allocation might fail when
-the number of buffers allocated by VPP is too high. That number can be
-controlled with the `buffers { buffers-per-numa }` configuration option.
-Finally, note that because of this limitation, this plugin is unlikely
-to be compatible with the use of 1GB hugepages.
-
-### Interrupt mode
-Interrupt and adaptive mode are supported but is limited by default to single
-threaded (no worker) configurations because of a kernel limitation prior to
-5.6. You can bypass the limitation at interface creation time by adding the
-`no-syscall-lock` parameter, but you must be sure that your kernel can
-support it, otherwise you will experience double-frees.
-See
-https://lore.kernel.org/bpf/BYAPR11MB365382C5DB1E5FCC53242609C1549@BYAPR11MB3653.namprd11.prod.outlook.com/
-for more details.
-
-### Mellanox
-When setting the number of queues on Mellanox NIC with `ethtool -L`, you must
-use twice the amount of configured queues: it looks like the Linux driver will
-create separate RX queues and TX queues (but all queues can be used for both
-RX and TX, the NIC will just not sent any packet on "pure" TX queues.
-Confused? So I am.). For example if you set `combined 2` you will effectively
-have to create 4 rx queues in AF_XDP if you want to be sure to receive all
-packets.
-
-## Requirements
-This drivers supports Linux kernel 5.4 and later. Kernels older than 5.4 are
-missing unaligned buffers support.
-
-The Linux kernel interface must be up and have enough queues before
-creating the VPP AF_XDP interface, otherwise Linux will deny creating
-the AF_XDP socket.
-The AF_XDP interface will claim NIC RX queue starting from 0, up to the
-requested number of RX queues (only 1 by default). It means all packets
-destined to NIC RX queue `[0, num_rx_queues[` will be received by the
-AF_XDP interface, and only them. Depending on your configuration, there
-will usually be several RX queues (typically 1 per core) and packets are
-spread accross queues by RSS. In order to receive consistent traffic,
-you **must** program the NIC dispatching accordingly. The simplest way
-to get all the packets is to specify `num-rx-queues all` to grab all
-available queues or to reconfigure the Linux kernel driver to use only
-`num_rx_queues` RX queues (ie all NIC queues will be associated with
-the AF_XDP socket):
-```
-~# ethtool -L <iface> combined <num_rx_queues>
-```
-Additionally, the VPP AF_XDP interface will use a MAC address generated at
-creation time instead of the Linux kernel interface MAC. As Linux kernel
-interface are not in promiscuous mode by default (see below) this will
-results in a useless configuration where the VPP AF_XDP interface only
-receives packets destined to the Linux kernel interface MAC just to drop
-them because the destination MAC does not match VPP AF_XDP interface MAC.
-If you want to use the Linux interface MAC for the VPP AF_XDP interface,
-you can change it afterwards in VPP:
-```
-~# vppctl set int mac address <iface> <mac>
-```
-Finally, if you wish to receive all packets and not only the packets
-destined to the Linux kernel interface MAC you need to set the Linux
-kernel interface in promiscuous mode:
-```
-~# ip link set dev <iface> promisc on
-```
-
-## Security considerations
-When creating an AF_XDP interface, it will receive all packets arriving
-to the NIC RX queue `[0, num_rx_queues[`. You need to configure the Linux
-kernel NIC driver properly to ensure that only intented packets will
-arrive in this queue. There is no way to filter the packets after-the-fact
-using eg. netfilter or eBPF.
-
-## Quickstart
-1. Put the Linux kernel interface up and in promiscuous mode:
-```
-~# ip l set dev enp216s0f0 promisc on up
-```
-2. Create the AF_XDP interface:
-```
-~# vppctl create int af_xdp host-if enp216s0f0 num-rx-queues all
-```
-3. Use the interface as usual, eg.:
-```
-~# vppctl set int ip addr enp216s0f0/0 1.1.1.1/24
-~# vppctl set int st enp216s0f0/0 up
-~# vppctl ping 1.1.1.100`
-```
-
-## Custom eBPF XDP program
-This driver relies on libbpf and as such relies on the `xsks_map` eBPF
-map. The default behavior is to use the XDP program already attached
-to the interface if any, otherwise load the default one.
-You can request to load a custom XDP program with the `prog` option when
-creating the interface in VPP:
-```
-~# vppctl create int af_xdp host-if enp216s0f0 num-rx-queues 4 prog extras/bpf/af_xdp.bpf.o
-```
-In that case it will replace any previously attached program. A custom
-XDP program example is provided in `extras/bpf/`.
-
-## Performance consideration
-AF_XDP relies on the Linux kernel NIC driver to rx/tx packets. To reach
-high-performance (10's MPPS), the Linux kernel NIC driver must support
-zero-copy mode and its RX path must run on a dedicated core in the NUMA
-where the NIC is physically connected.
diff --git a/src/plugins/af_xdp/af_xdp_doc.rst b/src/plugins/af_xdp/af_xdp_doc.rst
new file mode 100644
index 00000000000..de951340a2d
--- /dev/null
+++ b/src/plugins/af_xdp/af_xdp_doc.rst
@@ -0,0 +1,164 @@
+AF_XDP device driver
+====================
+
+This driver relies on Linux AF_XDP socket to rx/tx Ethernet packets.
+
+Maturity level
+--------------
+
+Under development: it should work, but has not been thoroughly tested.
+
+Features
+--------
+
+- copy and zero-copy mode
+- multiqueue
+- API
+- custom eBPF program
+- polling, interrupt and adaptive mode
+
+Known limitations
+-----------------
+
+MTU
+~~~
+
+Because of AF_XDP restrictions, the MTU is limited to below PAGE_SIZE
+(4096-bytes on most systems) minus 256-bytes, and they are additional
+limitations depending upon specific Linux device drivers. As a rule of
+thumb, a MTU of 3000-bytes or less should be safe.
+
+Number of buffers
+~~~~~~~~~~~~~~~~~
+
+Furthermore, upon UMEM creation, the kernel allocates a
+physically-contiguous structure, whose size is proportional to the
+number of 4KB pages contained in the UMEM. That allocation might fail
+when the number of buffers allocated by VPP is too high. That number can
+be controlled with the ``buffers { buffers-per-numa }`` configuration
+option. Finally, note that because of this limitation, this plugin is
+unlikely to be compatible with the use of 1GB hugepages.
+
+Interrupt mode
+~~~~~~~~~~~~~~
+
+Interrupt and adaptive mode are supported but is limited by default to
+single threaded (no worker) configurations because of a kernel
+limitation prior to 5.6. You can bypass the limitation at interface
+creation time by adding the ``no-syscall-lock`` parameter, but you must
+be sure that your kernel can support it, otherwise you will experience
+double-frees. See
+https://lore.kernel.org/bpf/BYAPR11MB365382C5DB1E5FCC53242609C1549@BYAPR11MB3653.namprd11.prod.outlook.com/
+for more details.
+
+Mellanox
+~~~~~~~~
+
+When setting the number of queues on Mellanox NIC with ``ethtool -L``,
+you must use twice the amount of configured queues: it looks like the
+Linux driver will create separate RX queues and TX queues (but all
+queues can be used for both RX and TX, the NIC will just not sent any
+packet on “pure” TX queues. Confused? So I am.). For example if you set
+``combined 2`` you will effectively have to create 4 rx queues in AF_XDP
+if you want to be sure to receive all packets.
+
+Requirements
+------------
+
+This drivers supports Linux kernel 5.4 and later. Kernels older than 5.4
+are missing unaligned buffers support.
+
+The Linux kernel interface must be up and have enough queues before
+creating the VPP AF_XDP interface, otherwise Linux will deny creating
+the AF_XDP socket. The AF_XDP interface will claim NIC RX queue starting
+from 0, up to the requested number of RX queues (only 1 by default). It
+means all packets destined to NIC RX queue ``[0, num_rx_queues[`` will
+be received by the AF_XDP interface, and only them. Depending on your
+configuration, there will usually be several RX queues (typically 1 per
+core) and packets are spread across queues by RSS. In order to receive
+consistent traffic, you **must** program the NIC dispatching
+accordingly. The simplest way to get all the packets is to specify
+``num-rx-queues all`` to grab all available queues or to reconfigure the
+Linux kernel driver to use only ``num_rx_queues`` RX queues (i.e. all NIC
+queues will be associated with the AF_XDP socket):
+
+::
+
+ ~# ethtool -L <iface> combined <num_rx_queues>
+
+Additionally, the VPP AF_XDP interface will use a MAC address generated
+at creation time instead of the Linux kernel interface MAC. As Linux
+kernel interface are not in promiscuous mode by default (see below) this
+will results in a useless configuration where the VPP AF_XDP interface
+only receives packets destined to the Linux kernel interface MAC just to
+drop them because the destination MAC does not match VPP AF_XDP
+interface MAC. If you want to use the Linux interface MAC for the VPP
+AF_XDP interface, you can change it afterwards in VPP:
+
+::
+
+ ~# vppctl set int mac address <iface> <mac>
+
+Finally, if you wish to receive all packets and not only the packets
+destined to the Linux kernel interface MAC you need to set the Linux
+kernel interface in promiscuous mode:
+
+::
+
+ ~# ip link set dev <iface> promisc on
+
+Security considerations
+-----------------------
+
+When creating an AF_XDP interface, it will receive all packets arriving
+to the NIC RX queue ``[0, num_rx_queues[``. You need to configure the
+Linux kernel NIC driver properly to ensure that only intended packets
+will arrive in this queue. There is no way to filter the packets
+after-the-fact using e.g. netfilter or eBPF.
+
+Quickstart
+----------
+
+1. Put the Linux kernel interface up and in promiscuous mode:
+
+::
+
+ ~# ip l set dev enp216s0f0 promisc on up
+
+2. Create the AF_XDP interface:
+
+::
+
+ ~# vppctl create int af_xdp host-if enp216s0f0 num-rx-queues all
+
+3. Use the interface as usual, e.g.:
+
+::
+
+ ~# vppctl set int ip addr enp216s0f0/0 1.1.1.1/24
+ ~# vppctl set int st enp216s0f0/0 up
+ ~# vppctl ping 1.1.1.100`
+
+Custom eBPF XDP program
+-----------------------
+
+This driver relies on libbpf and as such relies on the ``xsks_map`` eBPF
+map. The default behavior is to use the XDP program already attached to
+the interface if any, otherwise load the default one. You can request to
+load a custom XDP program with the ``prog`` option when creating the
+interface in VPP:
+
+::
+
+ ~# vppctl create int af_xdp host-if enp216s0f0 num-rx-queues 4 prog extras/bpf/af_xdp.bpf.o
+
+In that case it will replace any previously attached program. A custom
+XDP program example is provided in ``extras/bpf/``.
+
+Performance consideration
+-------------------------
+
+AF_XDP relies on the Linux kernel NIC driver to rx/tx packets. To reach
+high-performance (10’s MPPS), the Linux kernel NIC driver must support
+zero-copy mode and its RX path must run on a dedicated core in the NUMA
+where the NIC is physically connected.
diff --git a/src/plugins/af_xdp/api.c b/src/plugins/af_xdp/api.c
index 1864c4c2ee9..3e9a3fe2578 100644
--- a/src/plugins/af_xdp/api.c
+++ b/src/plugins/af_xdp/api.c
@@ -27,6 +27,7 @@
#include <af_xdp/af_xdp.api_enum.h>
#include <af_xdp/af_xdp.api_types.h>
+#define REPLY_MSG_ID_BASE (rm->msg_id_base)
#include <vlibapi/api_helper_macros.h>
static af_xdp_mode_t
@@ -78,12 +79,72 @@ vl_api_af_xdp_create_t_handler (vl_api_af_xdp_create_t * mp)
af_xdp_create_if (vm, &args);
rv = args.rv;
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_AF_XDP_CREATE_REPLY + rm->msg_id_base,
+ REPLY_MACRO2 (VL_API_AF_XDP_CREATE_REPLY,
+ ({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
+}
+
+static void
+vl_api_af_xdp_create_v2_t_handler (vl_api_af_xdp_create_v2_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ af_xdp_main_t *rm = &af_xdp_main;
+ vl_api_af_xdp_create_v2_reply_t *rmp;
+ af_xdp_create_if_args_t args;
+ int rv;
+
+ clib_memset (&args, 0, sizeof (af_xdp_create_if_args_t));
+
+ args.linux_ifname = mp->host_if[0] ? (char *) mp->host_if : 0;
+ args.name = mp->name[0] ? (char *) mp->name : 0;
+ args.prog = mp->prog[0] ? (char *) mp->prog : 0;
+ args.netns = mp->namespace[0] ? (char *) mp->namespace : 0;
+ args.mode = af_xdp_api_mode (mp->mode);
+ args.flags = af_xdp_api_flags (mp->flags);
+ args.rxq_size = ntohs (mp->rxq_size);
+ args.txq_size = ntohs (mp->txq_size);
+ args.rxq_num = ntohs (mp->rxq_num);
+
+ af_xdp_create_if (vm, &args);
+ rv = args.rv;
+
+ /* clang-format off */
+ REPLY_MACRO2 (VL_API_AF_XDP_CREATE_V2_REPLY,
({
rmp->sw_if_index = ntohl (args.sw_if_index);
}));
- /* *INDENT-ON* */
+ /* clang-format on */
+}
+
+static void
+vl_api_af_xdp_create_v3_t_handler (vl_api_af_xdp_create_v3_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ af_xdp_main_t *rm = &af_xdp_main;
+ vl_api_af_xdp_create_v3_reply_t *rmp;
+ af_xdp_create_if_args_t args;
+ int rv;
+
+ clib_memset (&args, 0, sizeof (af_xdp_create_if_args_t));
+
+ args.linux_ifname = mp->host_if[0] ? (char *) mp->host_if : 0;
+ args.name = mp->name[0] ? (char *) mp->name : 0;
+ args.prog = mp->prog[0] ? (char *) mp->prog : 0;
+ args.netns = mp->netns[0] ? (char *) mp->netns : 0;
+ args.mode = af_xdp_api_mode (mp->mode);
+ args.flags = af_xdp_api_flags (mp->flags);
+ args.rxq_size = mp->rxq_size;
+ args.txq_size = mp->txq_size;
+ args.rxq_num = mp->rxq_num;
+
+ af_xdp_create_if (vm, &args);
+ rv = args.rv;
+
+ /* clang-format off */
+ REPLY_MACRO2_END (VL_API_AF_XDP_CREATE_V3_REPLY,
+ ({
+ rmp->sw_if_index = args.sw_if_index;
+ }));
+ /* clang-format on */
}
static void
@@ -111,7 +172,7 @@ vl_api_af_xdp_delete_t_handler (vl_api_af_xdp_delete_t * mp)
af_xdp_delete_if (vm, rd);
reply:
- REPLY_MACRO (VL_API_AF_XDP_DELETE_REPLY + rm->msg_id_base);
+ REPLY_MACRO (VL_API_AF_XDP_DELETE_REPLY);
}
/* set tup the API message handling tables */
diff --git a/src/plugins/af_xdp/cli.c b/src/plugins/af_xdp/cli.c
index 2f3deffaaee..12d3b875a71 100644
--- a/src/plugins/af_xdp/cli.c
+++ b/src/plugins/af_xdp/cli.c
@@ -40,20 +40,20 @@ af_xdp_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
vec_free (args.linux_ifname);
vec_free (args.name);
+ vec_free (args.prog);
+ vec_free (args.netns);
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (af_xdp_create_command, static) = {
.path = "create interface af_xdp",
.short_help =
"create interface af_xdp <host-if linux-ifname> [name ifname] "
"[rx-queue-size size] [tx-queue-size size] [num-rx-queues <num|all>] "
- "[prog pathname] [zero-copy|no-zero-copy] [no-syscall-lock]",
+ "[prog pathname] [netns ns] [zero-copy|no-zero-copy] [no-syscall-lock]",
.function = af_xdp_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
af_xdp_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -98,14 +98,12 @@ af_xdp_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (af_xdp_delete_command, static) = {
.path = "delete interface af_xdp",
.short_help = "delete interface af_xdp "
"{<interface> | sw_if_index <sw_idx>}",
.function = af_xdp_delete_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
af_xdp_cli_init (vlib_main_t * vm)
diff --git a/src/plugins/af_xdp/device.c b/src/plugins/af_xdp/device.c
index 7a10bce4290..63a276ce51e 100644
--- a/src/plugins/af_xdp/device.c
+++ b/src/plugins/af_xdp/device.c
@@ -17,17 +17,27 @@
#include <stdio.h>
#include <net/if.h>
+#include <sys/ioctl.h>
+#include <linux/ethtool.h>
#include <linux/if_link.h>
-#include <bpf/libbpf.h>
+#include <linux/sockios.h>
+#include <linux/limits.h>
+#include <bpf/bpf.h>
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vlib/pci/pci.h>
+#include <vppinfra/linux/netns.h>
#include <vppinfra/linux/sysfs.h>
#include <vppinfra/unix.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
#include "af_xdp.h"
+#ifndef XDP_UMEM_MIN_CHUNK_SIZE
+#define XDP_UMEM_MIN_CHUNK_SIZE 2048
+#endif
+
af_xdp_main_t af_xdp_main;
typedef struct
@@ -62,6 +72,16 @@ af_xdp_mac_change (vnet_hw_interface_t * hw, const u8 * old, const u8 * new)
return 0;
}
+static clib_error_t *
+af_xdp_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw,
+ u32 frame_size)
+{
+ af_xdp_main_t *am = &af_xdp_main;
+ af_xdp_device_t *ad = vec_elt_at_index (am->devices, hw->dev_instance);
+ af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, "set mtu not supported yet");
+ return vnet_error (VNET_ERR_UNSUPPORTED, 0);
+}
+
static u32
af_xdp_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags)
{
@@ -77,15 +97,87 @@ af_xdp_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags)
af_xdp_log (VLIB_LOG_LEVEL_ERR, ad,
"set promiscuous not supported yet");
return ~0;
- case ETHERNET_INTERFACE_FLAG_MTU:
- af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, "set mtu not supported yet");
- return ~0;
}
af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, "unknown flag %x requested", flags);
return ~0;
}
+int
+af_xdp_enter_netns (char *netns, int *fds)
+{
+ *fds = *(fds + 1) = -1;
+ if (netns != NULL)
+ {
+ *fds = clib_netns_open (NULL /* self */);
+ if ((*(fds + 1) = clib_netns_open ((u8 *) netns)) == -1)
+ return VNET_API_ERROR_SYSCALL_ERROR_8;
+ if (clib_setns (*(fds + 1)) == -1)
+ return VNET_API_ERROR_SYSCALL_ERROR_9;
+ }
+ return 0;
+}
+
+void
+af_xdp_cleanup_netns (int *fds)
+{
+ if (*fds != -1)
+ close (*fds);
+
+ if (*(fds + 1) != -1)
+ close (*(fds + 1));
+
+ *fds = *(fds + 1) = -1;
+}
+
+int
+af_xdp_exit_netns (char *netns, int *fds)
+{
+ int ret = 0;
+ if (netns != NULL)
+ {
+ if (*fds != -1)
+ ret = clib_setns (*fds);
+
+ af_xdp_cleanup_netns (fds);
+ }
+
+ return ret;
+}
+
+static int
+af_xdp_remove_program (af_xdp_device_t *ad)
+{
+ u32 curr_prog_id = 0;
+ int ret;
+ int ns_fds[2];
+
+ af_xdp_enter_netns (ad->netns, ns_fds);
+ ret = bpf_xdp_query_id (ad->linux_ifindex, XDP_FLAGS_UPDATE_IF_NOEXIST,
+ &curr_prog_id);
+ if (ret != 0)
+ {
+ af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, "bpf_xdp_query_id failed\n");
+ goto err0;
+ }
+
+ ret = bpf_xdp_detach (ad->linux_ifindex, XDP_FLAGS_UPDATE_IF_NOEXIST, NULL);
+ if (ret != 0)
+ {
+ af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, "bpf_xdp_detach failed\n");
+ goto err0;
+ }
+ af_xdp_exit_netns (ad->netns, ns_fds);
+ if (ad->bpf_obj)
+ bpf_object__close (ad->bpf_obj);
+
+ return 0;
+
+err0:
+ af_xdp_exit_netns (ad->netns, ns_fds);
+ return ret;
+}
+
void
af_xdp_delete_if (vlib_main_t * vm, af_xdp_device_t * ad)
{
@@ -101,9 +193,6 @@ af_xdp_delete_if (vlib_main_t * vm, af_xdp_device_t * ad)
ethernet_delete_interface (vnm, ad->hw_if_index);
}
- for (i = 0; i < ad->rxq_num; i++)
- clib_file_del_by_index (&file_main, vec_elt (ad->rxqs, i).file_index);
-
for (i = 0; i < ad->txq_num; i++)
clib_spinlock_free (&vec_elt (ad->txqs, i).lock);
@@ -113,17 +202,20 @@ af_xdp_delete_if (vlib_main_t * vm, af_xdp_device_t * ad)
vec_foreach (umem, ad->umem)
xsk_umem__delete (*umem);
- if (ad->bpf_obj)
- {
- bpf_set_link_xdp_fd (ad->linux_ifindex, -1, 0);
- bpf_object__unload (ad->bpf_obj);
- }
+ for (i = 0; i < ad->rxq_num; i++)
+ clib_file_del_by_index (&file_main, vec_elt (ad->rxqs, i).file_index);
+
+ if (af_xdp_remove_program (ad) != 0)
+ af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, "Error while removing XDP program.\n");
vec_free (ad->xsk);
vec_free (ad->umem);
vec_free (ad->buffer_template);
vec_free (ad->rxqs);
vec_free (ad->txqs);
+ vec_free (ad->name);
+ vec_free (ad->linux_ifname);
+ vec_free (ad->netns);
clib_error_free (ad->error);
pool_put (axm->devices, ad);
}
@@ -132,44 +224,49 @@ static int
af_xdp_load_program (af_xdp_create_if_args_t * args, af_xdp_device_t * ad)
{
int fd;
+ struct bpf_program *bpf_prog;
+ struct rlimit r = { RLIM_INFINITY, RLIM_INFINITY };
- ad->linux_ifindex = if_nametoindex (ad->linux_ifname);
- if (!ad->linux_ifindex)
- {
- args->rv = VNET_API_ERROR_INVALID_VALUE;
- args->error =
- clib_error_return_unix (0, "if_nametoindex(%s) failed",
- ad->linux_ifname);
- goto err0;
- }
+ if (setrlimit (RLIMIT_MEMLOCK, &r))
+ af_xdp_log (VLIB_LOG_LEVEL_WARNING, ad,
+ "setrlimit(%s) failed: %s (errno %d)", ad->linux_ifname,
+ strerror (errno), errno);
- if (bpf_prog_load (args->prog, BPF_PROG_TYPE_XDP, &ad->bpf_obj, &fd))
+ ad->bpf_obj = bpf_object__open_file (args->prog, NULL);
+ if (libbpf_get_error (ad->bpf_obj))
{
args->rv = VNET_API_ERROR_SYSCALL_ERROR_5;
- args->error =
- clib_error_return_unix (0, "bpf_prog_load(%s) failed", args->prog);
+ args->error = clib_error_return_unix (
+ 0, "bpf_object__open_file(%s) failed", args->prog);
goto err0;
}
-#ifndef XDP_FLAGS_REPLACE
-#define XDP_FLAGS_REPLACE 0
-#endif
- if (bpf_set_link_xdp_fd (ad->linux_ifindex, fd, XDP_FLAGS_REPLACE))
+ bpf_prog = bpf_object__next_program (ad->bpf_obj, NULL);
+ if (!bpf_prog)
+ goto err1;
+
+ bpf_program__set_type (bpf_prog, BPF_PROG_TYPE_XDP);
+
+ if (bpf_object__load (ad->bpf_obj))
+ goto err1;
+
+ fd = bpf_program__fd (bpf_prog);
+
+ if (bpf_xdp_attach (ad->linux_ifindex, fd, XDP_FLAGS_UPDATE_IF_NOEXIST,
+ NULL))
{
args->rv = VNET_API_ERROR_SYSCALL_ERROR_6;
- args->error =
- clib_error_return_unix (0, "bpf_set_link_xdp_fd(%s) failed",
- ad->linux_ifname);
+ args->error = clib_error_return_unix (0, "bpf_xdp_attach(%s) failed",
+ ad->linux_ifname);
goto err1;
}
return 0;
err1:
- bpf_object__unload (ad->bpf_obj);
+ bpf_object__close (ad->bpf_obj);
ad->bpf_obj = 0;
err0:
- ad->linux_ifindex = ~0;
return -1;
}
@@ -188,16 +285,9 @@ af_xdp_create_queue (vlib_main_t *vm, af_xdp_create_if_args_t *args,
const int is_rx = qid < ad->rxq_num;
const int is_tx = qid < ad->txq_num;
- vec_validate_aligned (ad->umem, qid, CLIB_CACHE_LINE_BYTES);
umem = vec_elt_at_index (ad->umem, qid);
-
- vec_validate_aligned (ad->xsk, qid, CLIB_CACHE_LINE_BYTES);
xsk = vec_elt_at_index (ad->xsk, qid);
-
- vec_validate_aligned (ad->rxqs, qid, CLIB_CACHE_LINE_BYTES);
rxq = vec_elt_at_index (ad->rxqs, qid);
-
- vec_validate_aligned (ad->txqs, qid, CLIB_CACHE_LINE_BYTES);
txq = vec_elt_at_index (ad->txqs, qid);
/*
@@ -221,8 +311,18 @@ af_xdp_create_queue (vlib_main_t *vm, af_xdp_create_if_args_t *args,
(umem, uword_to_pointer (vm->buffer_main->buffer_mem_start, void *),
vm->buffer_main->buffer_mem_size, fq, cq, &umem_config))
{
+ uword sys_page_size = clib_mem_get_page_size ();
args->rv = VNET_API_ERROR_SYSCALL_ERROR_1;
args->error = clib_error_return_unix (0, "xsk_umem__create() failed");
+ /* this should mimic the Linux kernel net/xdp/xdp_umem.c:xdp_umem_reg()
+ * check */
+ if (umem_config.frame_size < XDP_UMEM_MIN_CHUNK_SIZE ||
+ umem_config.frame_size > sys_page_size)
+ args->error = clib_error_return (
+ args->error,
+ "(unsupported data-size? (should be between %d and %d))",
+ XDP_UMEM_MIN_CHUNK_SIZE - sizeof (vlib_buffer_t),
+ sys_page_size - sizeof (vlib_buffer_t));
goto err0;
}
@@ -241,6 +341,8 @@ af_xdp_create_queue (vlib_main_t *vm, af_xdp_create_if_args_t *args,
sock_config.bind_flags |= XDP_ZEROCOPY;
break;
}
+ if (args->prog)
+ sock_config.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD;
if (xsk_socket__create
(xsk, ad->linux_ifname, qid, *umem, rx, tx, &sock_config))
{
@@ -253,10 +355,27 @@ af_xdp_create_queue (vlib_main_t *vm, af_xdp_create_if_args_t *args,
}
fd = xsk_socket__fd (*xsk);
+ if (args->prog)
+ {
+ struct bpf_map *map =
+ bpf_object__find_map_by_name (ad->bpf_obj, "xsks_map");
+ int ret = xsk_socket__update_xskmap (*xsk, bpf_map__fd (map));
+ if (ret)
+ {
+ args->rv = VNET_API_ERROR_SYSCALL_ERROR_3;
+ args->error = clib_error_return_unix (
+ 0, "xsk_socket__update_xskmap %s qid %d return %d",
+ ad->linux_ifname, qid, ret);
+ goto err2;
+ }
+ }
optlen = sizeof (opt);
+#ifndef SOL_XDP
+#define SOL_XDP 283
+#endif
if (getsockopt (fd, SOL_XDP, XDP_OPTIONS, &opt, &optlen))
{
- args->rv = VNET_API_ERROR_SYSCALL_ERROR_3;
+ args->rv = VNET_API_ERROR_SYSCALL_ERROR_4;
args->error =
clib_error_return_unix (0, "getsockopt(XDP_OPTIONS) failed");
goto err2;
@@ -269,6 +388,7 @@ af_xdp_create_queue (vlib_main_t *vm, af_xdp_create_if_args_t *args,
if (is_tx)
{
txq->xsk_fd = fd;
+ clib_spinlock_init (&txq->lock);
if (is_rx && (ad->flags & AF_XDP_DEVICE_F_SYSCALL_LOCK))
{
/* This is a shared rx+tx queue and we need to lock before syscalls.
@@ -321,6 +441,31 @@ af_xdp_get_numa (const char *ifname)
return numa;
}
+static void
+af_xdp_get_q_count (const char *ifname, int *rxq_num, int *txq_num)
+{
+ struct ethtool_channels ec = { .cmd = ETHTOOL_GCHANNELS };
+ struct ifreq ifr = { .ifr_data = (void *) &ec };
+ int fd, err;
+
+ *rxq_num = *txq_num = 1;
+
+ fd = socket (AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return;
+
+ snprintf (ifr.ifr_name, sizeof (ifr.ifr_name), "%s", ifname);
+ err = ioctl (fd, SIOCETHTOOL, &ifr);
+
+ close (fd);
+
+ if (err)
+ return;
+
+ *rxq_num = clib_max (ec.combined_count, ec.rx_count);
+ *txq_num = clib_max (ec.combined_count, ec.tx_count);
+}
+
static clib_error_t *
af_xdp_device_rxq_read_ready (clib_file_t * f)
{
@@ -361,22 +506,88 @@ af_xdp_device_set_rxq_mode (const af_xdp_device_t *ad, af_xdp_rxq_t *rxq,
return 0;
}
+static u32
+af_xdp_find_rxq_for_thread (vnet_main_t *vnm, const af_xdp_device_t *ad,
+ const u32 thread)
+{
+ u32 i;
+ for (i = 0; i < ad->rxq_num; i++)
+ {
+ const u32 qid = vec_elt (ad->rxqs, i).queue_index;
+ const u32 tid = vnet_hw_if_get_rx_queue (vnm, qid)->thread_index;
+ if (tid == thread)
+ return i;
+ }
+ return ~0;
+}
+
+static clib_error_t *
+af_xdp_finalize_queues (vnet_main_t *vnm, af_xdp_device_t *ad,
+ const int n_vlib_mains)
+{
+ clib_error_t *err = 0;
+ int i;
+
+ for (i = 0; i < ad->rxq_num; i++)
+ {
+ af_xdp_rxq_t *rxq = vec_elt_at_index (ad->rxqs, i);
+ rxq->queue_index = vnet_hw_if_register_rx_queue (
+ vnm, ad->hw_if_index, i, VNET_HW_IF_RXQ_THREAD_ANY);
+ u8 *desc = format (0, "%U rxq %d", format_af_xdp_device_name,
+ ad->dev_instance, i);
+ clib_file_t f = {
+ .file_descriptor = rxq->xsk_fd,
+ .private_data = rxq->queue_index,
+ .read_function = af_xdp_device_rxq_read_ready,
+ .description = desc,
+ };
+ rxq->file_index = clib_file_add (&file_main, &f);
+ vnet_hw_if_set_rx_queue_file_index (vnm, rxq->queue_index,
+ rxq->file_index);
+ err = af_xdp_device_set_rxq_mode (ad, rxq, AF_XDP_RXQ_MODE_POLLING);
+ if (err)
+ return err;
+ }
+
+ for (i = 0; i < ad->txq_num; i++)
+ vec_elt (ad->txqs, i).queue_index =
+ vnet_hw_if_register_tx_queue (vnm, ad->hw_if_index, i);
+
+ /* We set the rxq and txq of the same queue pair on the same thread
+ * by default to avoid locking because of the syscall lock. */
+ int last_qid = clib_min (ad->rxq_num, ad->txq_num - 1);
+ for (i = 0; i < n_vlib_mains; i++)
+ {
+ /* search for the 1st rxq assigned on this thread, if any */
+ u32 qid = af_xdp_find_rxq_for_thread (vnm, ad, i);
+ /* if this rxq is combined with a txq, use it. Otherwise, we'll
+ * assign txq in a round-robin fashion. We start from the 1st txq
+ * not shared with a rxq if possible... */
+ qid = qid < ad->txq_num ? qid : (last_qid++ % ad->txq_num);
+ vnet_hw_if_tx_queue_assign_thread (
+ vnm, vec_elt (ad->txqs, qid).queue_index, i);
+ }
+
+ vnet_hw_if_update_runtime_data (vnm, ad->hw_if_index);
+ return 0;
+}
+
void
af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args)
{
vnet_main_t *vnm = vnet_get_main ();
vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vnet_eth_interface_registration_t eir = {};
af_xdp_main_t *am = &af_xdp_main;
af_xdp_device_t *ad;
vnet_sw_interface_t *sw;
- vnet_hw_interface_t *hw;
int rxq_num, txq_num, q_num;
- int i;
+ int ns_fds[2];
+ int i, ret;
args->rxq_size = args->rxq_size ? args->rxq_size : 2 * VLIB_FRAME_SIZE;
args->txq_size = args->txq_size ? args->txq_size : 2 * VLIB_FRAME_SIZE;
- rxq_num = args->rxq_num ? args->rxq_num : 1;
- txq_num = tm->n_vlib_mains;
+ args->rxq_num = args->rxq_num ? args->rxq_num : 1;
if (!args->linux_ifname)
{
@@ -397,6 +608,26 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args)
goto err0;
}
+ ret = af_xdp_enter_netns (args->netns, ns_fds);
+ if (ret)
+ {
+ args->rv = ret;
+ args->error = clib_error_return (0, "enter netns %s failed, ret %d",
+ args->netns, args->rv);
+ goto err0;
+ }
+
+ af_xdp_get_q_count (args->linux_ifname, &rxq_num, &txq_num);
+ if (args->rxq_num > rxq_num && AF_XDP_NUM_RX_QUEUES_ALL != args->rxq_num)
+ {
+ args->rv = VNET_API_ERROR_INVALID_VALUE;
+ args->error = clib_error_create ("too many rxq requested (%d > %d)",
+ args->rxq_num, rxq_num);
+ goto err1;
+ }
+ rxq_num = clib_min (rxq_num, args->rxq_num);
+ txq_num = clib_min (txq_num, tm->n_vlib_mains);
+
pool_get_zero (am->devices, ad);
if (tm->n_vlib_mains > 1 &&
@@ -406,12 +637,32 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args)
ad->linux_ifname = (char *) format (0, "%s", args->linux_ifname);
vec_validate (ad->linux_ifname, IFNAMSIZ - 1); /* libbpf expects ifname to be at least IFNAMSIZ */
- if (args->prog && af_xdp_load_program (args, ad))
- goto err1;
+ if (args->netns)
+ ad->netns = (char *) format (0, "%s%c", args->netns, 0);
+
+ ad->linux_ifindex = if_nametoindex (ad->linux_ifname);
+ if (!ad->linux_ifindex)
+ {
+ args->rv = VNET_API_ERROR_INVALID_VALUE;
+ args->error = clib_error_return_unix (0, "if_nametoindex(%s) failed",
+ ad->linux_ifname);
+ ad->linux_ifindex = ~0;
+ goto err1;
+ }
+
+ if (args->prog &&
+ (af_xdp_remove_program (ad) || af_xdp_load_program (args, ad)))
+ goto err2;
q_num = clib_max (rxq_num, txq_num);
ad->rxq_num = rxq_num;
ad->txq_num = txq_num;
+
+ vec_validate_aligned (ad->umem, q_num - 1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (ad->xsk, q_num - 1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (ad->rxqs, q_num - 1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (ad->txqs, q_num - 1, CLIB_CACHE_LINE_BYTES);
+
for (i = 0; i < q_num; i++)
{
if (af_xdp_create_queue (vm, args, ad, i))
@@ -423,6 +674,8 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args)
* requested 'max'
* we might create less tx queues than workers but this is ok
*/
+ af_xdp_log (VLIB_LOG_LEVEL_DEBUG, ad,
+ "create interface failed to create queue qid=%d", i);
/* fixup vectors length */
vec_set_len (ad->umem, i);
@@ -433,19 +686,14 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args)
ad->rxq_num = clib_min (i, rxq_num);
ad->txq_num = clib_min (i, txq_num);
- if (i < rxq_num && AF_XDP_NUM_RX_QUEUES_ALL != rxq_num)
+ if (i == 0 ||
+ (i < rxq_num && AF_XDP_NUM_RX_QUEUES_ALL != args->rxq_num))
{
ad->rxq_num = ad->txq_num = 0;
- goto err1; /* failed creating requested rxq: fatal error, bailing
+ goto err2; /* failed creating requested rxq: fatal error, bailing
out */
}
- if (i < txq_num)
- {
- /* we created less txq than threads not an error but initialize lock for shared txq */
- for (i = 0; i < ad->txq_num; i++)
- clib_spinlock_init (&vec_elt (ad->txqs, i).lock);
- }
args->rv = 0;
clib_error_free (args->error);
@@ -453,6 +701,13 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args)
}
}
+ if (af_xdp_exit_netns (args->netns, ns_fds))
+ {
+ args->rv = VNET_API_ERROR_SYSCALL_ERROR_10;
+ args->error = clib_error_return (0, "exit netns failed");
+ goto err2;
+ }
+
ad->dev_instance = ad - am->devices;
ad->per_interface_next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
ad->pool =
@@ -460,53 +715,43 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args)
af_xdp_get_numa
(ad->linux_ifname));
if (!args->name)
- ad->name =
- (char *) format (0, "%s/%d", ad->linux_ifname, ad->dev_instance);
+ {
+ char *ifname = ad->linux_ifname;
+ if (args->netns != NULL && strncmp (args->netns, "pid:", 4) == 0)
+ {
+ ad->name =
+ (char *) format (0, "%s/%u", ifname, atoi (args->netns + 4));
+ }
+ else
+ ad->name = (char *) format (0, "%s/%d", ifname, ad->dev_instance);
+ }
else
ad->name = (char *) format (0, "%s", args->name);
ethernet_mac_address_generate (ad->hwaddr);
/* create interface */
- if (ethernet_register_interface (vnm, af_xdp_device_class.index,
- ad->dev_instance, ad->hwaddr,
- &ad->hw_if_index, af_xdp_flag_change))
- {
- args->rv = VNET_API_ERROR_INVALID_INTERFACE;
- args->error =
- clib_error_return (0, "ethernet_register_interface() failed");
- goto err1;
- }
+ eir.dev_class_index = af_xdp_device_class.index;
+ eir.dev_instance = ad->dev_instance;
+ eir.address = ad->hwaddr;
+ eir.cb.flag_change = af_xdp_flag_change;
+ eir.cb.set_max_frame_size = af_xdp_set_max_frame_size;
+ ad->hw_if_index = vnet_eth_register_interface (vnm, &eir);
sw = vnet_get_hw_sw_interface (vnm, ad->hw_if_index);
- hw = vnet_get_hw_interface (vnm, ad->hw_if_index);
args->sw_if_index = ad->sw_if_index = sw->sw_if_index;
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+
+ vnet_hw_if_set_caps (vnm, ad->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
vnet_hw_if_set_input_node (vnm, ad->hw_if_index, af_xdp_input_node.index);
- for (i = 0; i < ad->rxq_num; i++)
+ args->error = af_xdp_finalize_queues (vnm, ad, tm->n_vlib_mains);
+ if (args->error)
{
- af_xdp_rxq_t *rxq = vec_elt_at_index (ad->rxqs, i);
- rxq->queue_index = vnet_hw_if_register_rx_queue (
- vnm, ad->hw_if_index, i, VNET_HW_IF_RXQ_THREAD_ANY);
- u8 *desc = format (0, "%U rxq %d", format_af_xdp_device_name,
- ad->dev_instance, i);
- clib_file_t f = {
- .file_descriptor = rxq->xsk_fd,
- .private_data = rxq->queue_index,
- .read_function = af_xdp_device_rxq_read_ready,
- .description = desc,
- };
- rxq->file_index = clib_file_add (&file_main, &f);
- vnet_hw_if_set_rx_queue_file_index (vnm, rxq->queue_index,
- rxq->file_index);
- if (af_xdp_device_set_rxq_mode (ad, rxq, AF_XDP_RXQ_MODE_POLLING))
- goto err1;
+ args->rv = VNET_API_ERROR_SYSCALL_ERROR_7;
+ goto err2;
}
- vnet_hw_if_update_runtime_data (vnm, ad->hw_if_index);
-
/* buffer template */
vec_validate_aligned (ad->buffer_template, 1, CLIB_CACHE_LINE_BYTES);
ad->buffer_template->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
@@ -517,8 +762,10 @@ af_xdp_create_if (vlib_main_t * vm, af_xdp_create_if_args_t * args)
return;
-err1:
+err2:
af_xdp_delete_if (vm, ad);
+err1:
+ af_xdp_cleanup_netns (ns_fds);
err0:
vlib_log_err (am->log_class, "%U", format_clib_error, args->error);
}
@@ -610,7 +857,6 @@ af_xdp_clear (u32 dev_instance)
clib_error_free (ad->error);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (af_xdp_device_class) = {
.name = "AF_XDP interface",
.format_device = format_af_xdp_device,
@@ -623,7 +869,6 @@ VNET_DEVICE_CLASS (af_xdp_device_class) = {
.mac_addr_change_function = af_xdp_mac_change,
.clear_counters = af_xdp_clear,
};
-/* *INDENT-ON* */
clib_error_t *
af_xdp_init (vlib_main_t * vm)
diff --git a/src/plugins/af_xdp/input.c b/src/plugins/af_xdp/input.c
index 4f3ac5725a4..9177b3ffc5b 100644
--- a/src/plugins/af_xdp/input.c
+++ b/src/plugins/af_xdp/input.c
@@ -15,7 +15,6 @@
*------------------------------------------------------------------
*/
-#include <poll.h>
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vlib/pci/pci.h>
@@ -89,8 +88,7 @@ af_xdp_device_input_refill_db (vlib_main_t * vm,
if (clib_spinlock_trylock_if_init (&rxq->syscall_lock))
{
- struct pollfd fd = { .fd = rxq->xsk_fd, .events = POLLIN | POLLOUT };
- int ret = poll (&fd, 1, 0);
+ int ret = recvmsg (rxq->xsk_fd, 0, MSG_DONTWAIT);
clib_spinlock_unlock_if_init (&rxq->syscall_lock);
if (PREDICT_FALSE (ret < 0))
{
@@ -198,6 +196,7 @@ af_xdp_device_input_ethernet (vlib_main_t * vm, vlib_node_runtime_t * node,
ef = vlib_frame_scalar_args (f);
ef->sw_if_index = sw_if_index;
ef->hw_if_index = hw_if_index;
+ vlib_frame_no_append (f);
}
static_always_inline u32
@@ -297,7 +296,7 @@ af_xdp_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_buffer_copy_template (&bt, ad->buffer_template);
next_index = ad->per_interface_next_index;
if (PREDICT_FALSE (vnet_device_input_have_features (ad->sw_if_index)))
- vnet_feature_start_device_input_x1 (ad->sw_if_index, &next_index, &bt);
+ vnet_feature_start_device_input (ad->sw_if_index, &next_index, &bt);
vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
@@ -353,7 +352,6 @@ af_xdp_device_input_refill (af_xdp_device_t *ad)
}
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (af_xdp_input_node) = {
.name = "af_xdp-input",
.sibling_of = "device-input",
@@ -364,7 +362,6 @@ VLIB_REGISTER_NODE (af_xdp_input_node) = {
.error_strings = af_xdp_input_error_strings,
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/af_xdp/output.c b/src/plugins/af_xdp/output.c
index 51a56ed866d..a59c01ca6e0 100644
--- a/src/plugins/af_xdp/output.c
+++ b/src/plugins/af_xdp/output.c
@@ -1,5 +1,5 @@
-#include <poll.h>
#include <string.h>
+#include <vppinfra/clib.h>
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vnet/ethernet/ethernet.h>
@@ -101,11 +101,19 @@ af_xdp_device_output_tx_db (vlib_main_t * vm,
if (xsk_ring_prod__needs_wakeup (&txq->tx))
{
- struct pollfd fd = { .fd = txq->xsk_fd, .events = POLLIN | POLLOUT };
- int ret = poll (&fd, 1, 0);
+ const struct msghdr msg = {};
+ int ret;
+ /* On tx, xsk socket will only tx up to TX_BATCH_SIZE, as defined in
+ * kernel net/xdp/xsk.c. Unfortunately we do not know how much this is,
+ * our only option is to retry until everything is sent... */
+ do
+ {
+ ret = sendmsg (txq->xsk_fd, &msg, MSG_DONTWAIT);
+ }
+ while (ret < 0 && EAGAIN == errno);
if (PREDICT_FALSE (ret < 0))
{
- /* something bad is happening */
+ /* not EAGAIN: something bad is happening */
vlib_error_count (vm, node->node_index,
AF_XDP_TX_ERROR_SYSCALL_FAILURES, 1);
af_xdp_device_error (ad, "tx poll() failed");
@@ -147,6 +155,14 @@ wrap_around:
while (n >= 8)
{
+ if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT ||
+ b[1]->flags & VLIB_BUFFER_NEXT_PRESENT ||
+ b[2]->flags & VLIB_BUFFER_NEXT_PRESENT ||
+ b[3]->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ break;
+ }
+
vlib_prefetch_buffer_header (b[4], LOAD);
offset =
(sizeof (vlib_buffer_t) +
@@ -186,6 +202,17 @@ wrap_around:
while (n >= 1)
{
+ if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ if (vlib_buffer_chain_linearize (vm, b[0]) != 1)
+ {
+ af_xdp_log (VLIB_LOG_LEVEL_ERR, ad,
+ "vlib_buffer_chain_linearize failed");
+ vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, b[0]));
+ continue;
+ }
+ }
+
offset =
(sizeof (vlib_buffer_t) +
b[0]->current_data) << XSK_UNALIGNED_BUF_OFFSET_SHIFT;
@@ -215,9 +242,9 @@ VNET_DEVICE_CLASS_TX_FN (af_xdp_device_class) (vlib_main_t * vm,
af_xdp_main_t *rm = &af_xdp_main;
vnet_interface_output_runtime_t *ord = (void *) node->runtime_data;
af_xdp_device_t *ad = pool_elt_at_index (rm->devices, ord->dev_instance);
- u32 thread_index = vm->thread_index;
- af_xdp_txq_t *txq =
- vec_elt_at_index (ad->txqs, (thread_index - 1) % ad->txq_num);
+ const vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
+ const int shared_queue = tf->shared_queue;
+ af_xdp_txq_t *txq = vec_elt_at_index (ad->txqs, tf->queue_id);
u32 *from;
u32 n, n_tx;
int i;
@@ -225,20 +252,22 @@ VNET_DEVICE_CLASS_TX_FN (af_xdp_device_class) (vlib_main_t * vm,
from = vlib_frame_vector_args (frame);
n_tx = frame->n_vectors;
- clib_spinlock_lock_if_init (&txq->lock);
+ if (shared_queue)
+ clib_spinlock_lock (&txq->lock);
for (i = 0, n = 0; i < AF_XDP_TX_RETRIES && n < n_tx; i++)
{
u32 n_enq;
af_xdp_device_output_free (vm, node, txq);
- n_enq = af_xdp_device_output_tx_try (vm, node, ad, txq, n_tx - n, from);
+ n_enq =
+ af_xdp_device_output_tx_try (vm, node, ad, txq, n_tx - n, from + n);
n += n_enq;
- from += n_enq;
}
af_xdp_device_output_tx_db (vm, node, ad, txq, n);
- clib_spinlock_unlock_if_init (&txq->lock);
+ if (shared_queue)
+ clib_spinlock_unlock (&txq->lock);
if (PREDICT_FALSE (n != n_tx))
{
diff --git a/src/plugins/af_xdp/plugin.c b/src/plugins/af_xdp/plugin.c
index 444ee553cbf..7be7afeac83 100644
--- a/src/plugins/af_xdp/plugin.c
+++ b/src/plugins/af_xdp/plugin.c
@@ -19,12 +19,10 @@
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "AF_XDP Device Plugin",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/af_xdp/test_api.c b/src/plugins/af_xdp/test_api.c
index 6dffa29bdd1..581697e341d 100644
--- a/src/plugins/af_xdp/test_api.c
+++ b/src/plugins/af_xdp/test_api.c
@@ -91,6 +91,75 @@ api_af_xdp_create (vat_main_t * vam)
return ret;
}
+/* af_xdp create v2 API */
+static int
+api_af_xdp_create_v2 (vat_main_t *vam)
+{
+ vl_api_af_xdp_create_v2_t *mp;
+ af_xdp_create_if_args_t args;
+ int ret;
+
+ if (!unformat_user (vam->input, unformat_af_xdp_create_if_args, &args))
+ {
+ clib_warning ("unknown input `%U'", format_unformat_error, vam->input);
+ return -99;
+ }
+
+ M (AF_XDP_CREATE, mp);
+
+ snprintf ((char *) mp->host_if, sizeof (mp->host_if), "%s",
+ args.linux_ifname ?: "");
+ snprintf ((char *) mp->name, sizeof (mp->name), "%s", args.name ?: "");
+ snprintf ((char *) mp->namespace, sizeof (mp->namespace), "%s",
+ args.netns ?: "");
+ mp->rxq_num = clib_host_to_net_u16 (args.rxq_num);
+ mp->rxq_size = clib_host_to_net_u16 (args.rxq_size);
+ mp->txq_size = clib_host_to_net_u16 (args.txq_size);
+ mp->mode = api_af_xdp_mode (args.mode);
+ if (args.flags & AF_XDP_CREATE_FLAGS_NO_SYSCALL_LOCK)
+ mp->flags |= AF_XDP_API_FLAGS_NO_SYSCALL_LOCK;
+ snprintf ((char *) mp->prog, sizeof (mp->prog), "%s", args.prog ?: "");
+
+ S (mp);
+ W (ret);
+
+ return ret;
+}
+
+/* af_xdp create v2 API */
+static int
+api_af_xdp_create_v3 (vat_main_t *vam)
+{
+ vl_api_af_xdp_create_v3_t *mp;
+ af_xdp_create_if_args_t args;
+ int ret;
+
+ if (!unformat_user (vam->input, unformat_af_xdp_create_if_args, &args))
+ {
+ clib_warning ("unknown input `%U'", format_unformat_error, vam->input);
+ return -99;
+ }
+
+ M (AF_XDP_CREATE, mp);
+
+ snprintf ((char *) mp->host_if, sizeof (mp->host_if), "%s",
+ args.linux_ifname ?: "");
+ snprintf ((char *) mp->name, sizeof (mp->name), "%s", args.name ?: "");
+ snprintf ((char *) mp->netns, sizeof (mp->netns), "%s", args.netns ?: "");
+ mp->rxq_num = args.rxq_num;
+ mp->rxq_size = args.rxq_size;
+ mp->txq_size = args.txq_size;
+ mp->mode = api_af_xdp_mode (args.mode);
+ if (args.flags & AF_XDP_CREATE_FLAGS_NO_SYSCALL_LOCK)
+ mp->flags |= AF_XDP_API_FLAGS_NO_SYSCALL_LOCK;
+ snprintf ((char *) mp->prog, sizeof (mp->prog), "%s", args.prog ?: "");
+
+ S (mp);
+ W (ret);
+
+ return ret;
+}
+
/* af_xdp-create reply handler */
static void
vl_api_af_xdp_create_reply_t_handler (vl_api_af_xdp_create_reply_t * mp)
@@ -109,6 +178,42 @@ vl_api_af_xdp_create_reply_t_handler (vl_api_af_xdp_create_reply_t * mp)
vam->regenerate_interface_table = 1;
}
+/* af_xdp-create v2 reply handler */
+static void
+vl_api_af_xdp_create_v2_reply_t_handler (vl_api_af_xdp_create_v2_reply_t *mp)
+{
+ vat_main_t *vam = af_xdp_test_main.vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (retval == 0)
+ {
+ fformat (vam->ofp, "created af_xdp with sw_if_index %d\n",
+ ntohl (mp->sw_if_index));
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+ vam->regenerate_interface_table = 1;
+}
+
+/* af_xdp-create v3 reply handler */
+static void
+vl_api_af_xdp_create_v3_reply_t_handler (vl_api_af_xdp_create_v2_reply_t *mp)
+{
+ vat_main_t *vam = af_xdp_test_main.vat_main;
+ i32 retval = mp->retval;
+
+ if (retval == 0)
+ {
+ fformat (vam->ofp, "created af_xdp with sw_if_index %d\n",
+ mp->sw_if_index);
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+ vam->regenerate_interface_table = 1;
+}
+
/* af_xdp delete API */
static int
api_af_xdp_delete (vat_main_t * vam)
diff --git a/src/plugins/af_xdp/unformat.c b/src/plugins/af_xdp/unformat.c
index bb4c3048d23..8c0482d83ff 100644
--- a/src/plugins/af_xdp/unformat.c
+++ b/src/plugins/af_xdp/unformat.c
@@ -46,6 +46,8 @@ unformat_af_xdp_create_if_args (unformat_input_t * input, va_list * vargs)
;
else if (unformat (line_input, "prog %s", &args->prog))
;
+ else if (unformat (line_input, "netns %s", &args->netns))
+ ;
else if (unformat (line_input, "no-zero-copy"))
args->mode = AF_XDP_MODE_COPY;
else if (unformat (line_input, "zero-copy"))
diff --git a/src/plugins/arping/arping.api b/src/plugins/arping/arping.api
index f797b8cf3aa..9ec01a9b55d 100644
--- a/src/plugins/arping/arping.api
+++ b/src/plugins/arping/arping.api
@@ -18,6 +18,7 @@
option version = "1.0.0";
import "vnet/interface_types.api";
import "vnet/ip/ip_types.api";
+import "vnet/ethernet/ethernet_types.api";
/** \brief
@param client_index - opaque cookie to identify the sender
@@ -55,6 +56,28 @@ define arping_reply
};
/*
+ * Address Conflict Detection
+ */
+define arping_acd
+{
+ u32 client_index;
+ u32 context;
+ vl_api_address_t address;
+ vl_api_interface_index_t sw_if_index;
+ bool is_garp;
+ u32 repeat [default=1];
+ f64 interval [default=1.0];
+};
+
+define arping_acd_reply
+{
+ u32 context;
+ i32 retval;
+ u32 reply_count;
+ vl_api_mac_address_t mac_address;
+};
+
+/*
* Local Variables:
* eval: (c-set-style "gnu")
* End:
diff --git a/src/plugins/arping/arping.c b/src/plugins/arping/arping.c
index 14f92589ea7..11fb0704dd3 100644
--- a/src/plugins/arping/arping.c
+++ b/src/plugins/arping/arping.c
@@ -500,6 +500,7 @@ arping_neighbor_advertisement (vlib_main_t *vm, arping_args_t *args)
vlib_cli_output (vm, "Sending %u GARP to %U", send_count,
format_ip4_address, &args->address.ip.ip4);
ip4_neighbor_advertise (vm, vnm, args->sw_if_index,
+ vlib_get_thread_index (),
&args->address.ip.ip4);
}
else
@@ -509,6 +510,7 @@ arping_neighbor_advertisement (vlib_main_t *vm, arping_args_t *args)
send_count, format_ip6_address,
&args->address.ip.ip6);
ip6_neighbor_advertise (vm, vnm, args->sw_if_index,
+ vlib_get_thread_index (),
&args->address.ip.ip6);
}
args->repeat--;
@@ -554,7 +556,8 @@ arping_neighbor_probe_dst (vlib_main_t *vm, arping_args_t *args)
arping_intf_t aif;
/* Disallow multiple sends on the same interface for now. Who needs it? */
- if (am->interfaces && (am->interfaces[args->sw_if_index] != 0))
+ if ((vec_len (am->interfaces) > args->sw_if_index) &&
+ (am->interfaces[args->sw_if_index] != 0))
{
error = clib_error_return (
0, "arping command is in progress for the same interface. "
@@ -586,7 +589,8 @@ arping_neighbor_probe_dst (vlib_main_t *vm, arping_args_t *args)
if (args->silence == 0)
vlib_cli_output (vm, "Sending %u ARP Request to %U", send_count,
format_ip4_address, &args->address.ip.ip4);
- ip4_neighbor_probe_dst (args->sw_if_index, &args->address.ip.ip4);
+ ip4_neighbor_probe_dst (args->sw_if_index, vlib_get_thread_index (),
+ &args->address.ip.ip4);
}
else
{
@@ -594,7 +598,8 @@ arping_neighbor_probe_dst (vlib_main_t *vm, arping_args_t *args)
vlib_cli_output (vm, "Sending %u Neighbor Solicitation to %U",
send_count, format_ip6_address,
&args->address.ip.ip6);
- ip6_neighbor_probe_dst (args->sw_if_index, &args->address.ip.ip6);
+ ip6_neighbor_probe_dst (args->sw_if_index, vlib_get_thread_index (),
+ &args->address.ip.ip6);
}
args->repeat--;
if ((args->interval > 0.0) && (args->repeat > 0))
diff --git a/src/plugins/arping/arping_api.c b/src/plugins/arping/arping_api.c
index 015c6148f5e..1b3431f2f39 100644
--- a/src/plugins/arping/arping_api.c
+++ b/src/plugins/arping/arping_api.c
@@ -26,11 +26,13 @@
#include <vlibapi/api.h>
#include <vlibmemory/api.h>
+#include <vnet/ethernet/ethernet_types_api.h>
/* define message IDs */
#include <arping/arping.api_enum.h>
#include <arping/arping.api_types.h>
+#define REPLY_MSG_ID_BASE (am->msg_id_base)
#include <vlibapi/api_helper_macros.h>
static void
@@ -57,10 +59,40 @@ vl_api_arping_t_handler (vl_api_arping_t *mp)
BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO2 (VL_API_ARPING_REPLY + am->msg_id_base,
+ REPLY_MACRO2 (VL_API_ARPING_REPLY,
({ rmp->reply_count = ntohl (args.reply_count); }));
}
+static void
+vl_api_arping_acd_t_handler (vl_api_arping_acd_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ arping_main_t *am = &arping_main;
+ vl_api_arping_acd_reply_t *rmp;
+ arping_args_t args = { 0 };
+ int rv;
+
+ if (mp->sw_if_index != ~0)
+ VALIDATE_SW_IF_INDEX (mp);
+
+ ip_address_decode2 (&mp->address, &args.address);
+ args.interval = clib_net_to_host_f64 (mp->interval);
+ args.repeat = ntohl (mp->repeat);
+ args.is_garp = mp->is_garp;
+ args.sw_if_index = ntohl (mp->sw_if_index);
+ args.silence = 1;
+
+ arping_run_command (vm, &args);
+ rv = args.rv;
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO2 (VL_API_ARPING_ACD_REPLY, ({
+ rmp->reply_count = ntohl (args.reply_count);
+ mac_address_encode (&args.recv.from4.mac, rmp->mac_address);
+ }));
+}
+
/* set tup the API message handling tables */
#include <arping/arping.api.c>
clib_error_t *
@@ -73,7 +105,8 @@ arping_plugin_api_hookup (vlib_main_t *vm)
am->msg_id_base = setup_message_id_table ();
/* Mark API as mp safe */
- vam->is_mp_safe[am->msg_id_base + VL_API_ARPING] = 1;
+ vl_api_set_msg_thread_safe (vam, am->msg_id_base + VL_API_ARPING, 1);
+ vl_api_set_msg_thread_safe (vam, am->msg_id_base + VL_API_ARPING_ACD, 1);
return 0;
}
diff --git a/src/plugins/arping/arping_test.c b/src/plugins/arping/arping_test.c
index 9001b7098a7..7cd85912bd2 100644
--- a/src/plugins/arping/arping_test.c
+++ b/src/plugins/arping/arping_test.c
@@ -26,12 +26,12 @@
#define __plugin_msg_base arping_test_main.msg_id_base
#include <vlibapi/vat_helper_macros.h>
+#include <vlibmemory/vlib.api_types.h>
/* declare message IDs */
#include <vnet/format_fns.h>
#include <arping/arping.api_enum.h>
#include <arping/arping.api_types.h>
-#include <vpp/api/vpe.api_types.h>
#include <vnet/ip/ip_types_api.h>
typedef struct
@@ -52,7 +52,6 @@ api_arping (vat_main_t *vam)
arping_args_t args = { 0 };
int ret;
unformat_input_t *input = vam->input;
- vnet_main_t *vnm = vnet_get_main ();
f64 interval = ARPING_DEFAULT_INTERVAL;
vl_api_control_ping_t *mp_ping;
arping_test_main_t *atm = &arping_test_main;
@@ -76,8 +75,7 @@ api_arping (vat_main_t *vam)
return -99;
}
- if (!unformat_user (input, unformat_vnet_sw_interface, vnm,
- &args.sw_if_index))
+ if (!unformat_user (input, api_unformat_sw_if_index, vam, &args.sw_if_index))
{
errmsg ("unknown interface `%U'", format_unformat_error, input);
return -99;
@@ -156,6 +154,19 @@ vl_api_arping_reply_t_handler (vl_api_arping_reply_t *mp)
vam->result_ready = 1;
}
+static int
+api_arping_acd (vat_main_t *vam)
+{
+ // NOT YET IMPLEMENTED
+ return -99;
+}
+
+static void
+vl_api_arping_acd_reply_t_handler (vl_api_arping_reply_t *mp)
+{
+ // NOT YET IMPLEMENTED
+}
+
#include <arping/arping.api_test.c>
/*
diff --git a/src/plugins/avf/CMakeLists.txt b/src/plugins/avf/CMakeLists.txt
index f7900a64958..ca6f2cb6803 100644
--- a/src/plugins/avf/CMakeLists.txt
+++ b/src/plugins/avf/CMakeLists.txt
@@ -23,6 +23,7 @@ add_vpp_plugin(avf
avf_api.c
flow.c
avf_fdir_lib.c
+ avf_rss_lib.c
MULTIARCH_SOURCES
input.c
diff --git a/src/plugins/avf/README.md b/src/plugins/avf/README.md
deleted file mode 100644
index 7aa2661fbba..00000000000
--- a/src/plugins/avf/README.md
+++ /dev/null
@@ -1,107 +0,0 @@
-# Intel AVF device plugin for VPP {#avf_plugin_doc}
-
-##Overview
-This plugins provides native device support for intel Adaptive Virtual
-Function (AVF). AVF is driver specification for current and future
-Intel Virtual Function devices. AVF defines communication channel between
-Physical Functions (PF) and VF.
-In essence, today this driver can be used only with
-Intel XL710 / X710 / XXV710 adapters.
-
-##Prerequisites
- * Driver requires newer i40e PF linux driver to be installed on the system,
-which supports virtualchnl interface. This code is tested with i40e driver
-version 2.4.6.
-
-* Driver requires MSI-X interrupt support, which is not supported by
-uio_pci_generic driver, so vfio-pci needs to be used. On systems without IOMMU
-vfio driver can still be used with recent kernels which support no-iommu mode.
-
-##Known issues
-This driver is still in experimental phase, however it shows very good
-performance numbers.
-
-## Usage
-### System setup
-
-1. load VFIO driver
-```
-sudo modprobe vfio-pci
-```
-
-2. (systems without IOMMU only) enable unsafe NOIOMMU mode
-```
-echo Y | sudo tee /sys/module/vfio/parameters/enable_unsafe_noiommu_mode
-```
-
-3. Create and bind SR-IOV virtual function(s)
-
-Following script creates VF, assigns MAC address and binds VF to vfio-pci
-```bash
-#!/bin/bash
-
-if [ $USER != "root" ] ; then
- echo "Restarting script with sudo..."
- sudo $0 ${*}
- exit
-fi
-
-setup () {
- cd /sys/bus/pci/devices/${1}
- driver=$(basename $(readlink driver))
- if [ "${driver}" != "i40e" ]; then
- echo ${1} | tee driver/unbind
- echo ${1} | tee /sys/bus/pci/drivers/i40e/bind
- fi
- ifname=$(basename net/*)
- echo 0 | tee sriov_numvfs > /dev/null
- echo 1 | tee sriov_numvfs > /dev/null
- ip link set dev ${ifname} vf 0 mac ${2}
- ip link show dev ${ifname}
- vf=$(basename $(readlink virtfn0))
- echo ${vf} | tee virtfn0/driver/unbind
- echo vfio-pci | tee virtfn0/driver_override
- echo ${vf} | sudo tee /sys/bus/pci/drivers/vfio-pci/bind
- echo | tee virtfn0/driver_override
-}
-
-# Setup one VF on PF 0000:3b:00.0 and assign MAC address
-setup 0000:3b:00.0 00:11:22:33:44:00
-# Setup one VF on PF 0000:3b:00.1 and assign MAC address
-setup 0000:3b:00.1 00:11:22:33:44:01
-```
-
-### Promisc mode
-In cases when interface is used in the L2 mode or promisc mode is needed for some other reason,
-trust needs to be set to "on" using the linux "ip link" utility.
-```
-ip link set dev <PF inteface name> vf <VF id> trust on
-```
-
-### L2 spoofing check
-By default Virtual Function is not allowed to send ethernet frames which
-have source MAC address different than address assigned to the VF.
-In some cases it is expected that VPP will send such frames (e.g. L2 bridging,
-bonding, l2 cross-connect) and in such cases spoof chack needs to be turned
-off by issuing following command:
-```
-ip link set dev <PF inteface name> vf <VF id> spoofchk off
-```
-
-### Interface Creation
-Interfaces can be dynamically created by using following CLI:
-```
-create interface avf 0000:3b:02.0
-set int state avf-0/3b/2/0 up
-```
-
-### Interface Deletion
-Interface can be deleted with following CLI:
-```
-delete interface avf <interface name>
-```
-
-### Interface Statistics
-Interface statistics can be displayed with `sh hardware-interface <if-name>`
-command.
-
diff --git a/src/plugins/avf/README.rst b/src/plugins/avf/README.rst
new file mode 100644
index 00000000000..339f5f13c3e
--- /dev/null
+++ b/src/plugins/avf/README.rst
@@ -0,0 +1,135 @@
+Intel AVF device driver
+=======================
+
+Overview
+--------
+
+This plugins provides native device support for intel Adaptive Virtual
+Function (AVF). AVF is driver specification for current and future Intel
+Virtual Function devices. AVF defines communication channel between
+Physical Functions (PF) and VF. In essence, today this driver can be
+used only with Intel XL710 / X710 / XXV710 adapters.
+
+Prerequisites
+-------------
+
+- Driver requires newer i40e PF linux driver to be installed on the
+ system, which supports virtualchnl interface. This code is tested
+ with i40e driver version 2.4.6.
+
+- Driver requires MSI-X interrupt support, which is not supported by
+ uio_pci_generic driver, so vfio-pci needs to be used. On systems
+ without IOMMU vfio driver can still be used with recent kernels which
+ support no-iommu mode.
+
+Known issues
+------------
+
+This driver is still in experimental phase, however it shows very good
+performance numbers.
+
+Usage
+-----
+
+System setup
+~~~~~~~~~~~~
+
+1. load VFIO driver
+
+::
+
+ sudo modprobe vfio-pci
+
+2. (systems without IOMMU only) enable unsafe NOIOMMU mode
+
+::
+
+ echo Y | sudo tee /sys/module/vfio/parameters/enable_unsafe_noiommu_mode
+
+3. Create and bind SR-IOV virtual function(s)
+
+Following script creates VF, assigns MAC address and binds VF to
+vfio-pci
+
+.. code:: bash
+
+ #!/bin/bash
+
+ if [ $USER != "root" ] ; then
+ echo "Restarting script with sudo..."
+ sudo $0 ${*}
+ exit
+ fi
+
+ setup () {
+ cd /sys/bus/pci/devices/${1}
+ driver=$(basename $(readlink driver))
+ if [ "${driver}" != "i40e" ]; then
+ echo ${1} | tee driver/unbind
+ echo ${1} | tee /sys/bus/pci/drivers/i40e/bind
+ fi
+ ifname=$(basename net/*)
+ echo 0 | tee sriov_numvfs > /dev/null
+ echo 1 | tee sriov_numvfs > /dev/null
+ ip link set dev ${ifname} vf 0 mac ${2}
+ ip link show dev ${ifname}
+ vf=$(basename $(readlink virtfn0))
+ echo ${vf} | tee virtfn0/driver/unbind
+ echo vfio-pci | tee virtfn0/driver_override
+ echo ${vf} | sudo tee /sys/bus/pci/drivers/vfio-pci/bind
+ echo | tee virtfn0/driver_override
+ }
+
+ # Setup one VF on PF 0000:3b:00.0 and assign MAC address
+ setup 0000:3b:00.0 00:11:22:33:44:00
+ # Setup one VF on PF 0000:3b:00.1 and assign MAC address
+ setup 0000:3b:00.1 00:11:22:33:44:01
+
+Promisc mode
+~~~~~~~~~~~~
+
+In cases when interface is used in the L2 mode or promisc mode is needed
+for some other reason, trust needs to be set to “on” using the linux “ip
+link” utility.
+
+::
+
+ ip link set dev <PF inteface name> vf <VF id> trust on
+
+L2 spoofing check
+~~~~~~~~~~~~~~~~~
+
+By default Virtual Function is not allowed to send ethernet frames which
+have source MAC address different than address assigned to the VF. In
+some cases it is expected that VPP will send such frames (e.g. L2
+bridging, bonding, l2 cross-connect) and in such cases spoof check needs
+to be turned off by issuing following command:
+
+::
+
+ ip link set dev <PF inteface name> vf <VF id> spoofchk off
+
+Interface Creation
+~~~~~~~~~~~~~~~~~~
+
+Interfaces can be dynamically created by using following CLI:
+
+::
+
+ create interface avf 0000:3b:02.0
+ set int state avf-0/3b/2/0 up
+
+Interface Deletion
+~~~~~~~~~~~~~~~~~~
+
+Interface can be deleted with following CLI:
+
+::
+
+ delete interface avf <interface name>
+
+Interface Statistics
+~~~~~~~~~~~~~~~~~~~~
+
+Interface statistics can be displayed with
+``sh hardware-interface <if-name>`` command.
diff --git a/src/plugins/avf/avf.h b/src/plugins/avf/avf.h
index a1da4c8866b..f6f79cf0e09 100644
--- a/src/plugins/avf/avf.h
+++ b/src/plugins/avf/avf.h
@@ -19,6 +19,7 @@
#define _AVF_H_
#include <avf/virtchnl.h>
+#include <avf/avf_advanced_flow.h>
#include <vppinfra/types.h>
#include <vppinfra/error_bootstrap.h>
@@ -37,6 +38,7 @@
#define AVF_AQ_ENQ_SUSPEND_TIME 50e-6
#define AVF_AQ_ENQ_MAX_WAIT_TIME 250e-3
+#define AVF_AQ_BUF_SIZE 4096
#define AVF_RESET_SUSPEND_TIME 20e-3
#define AVF_RESET_MAX_WAIT_TIME 1
@@ -202,7 +204,9 @@ typedef struct
{
u32 flow_index;
u32 mark;
+ u8 flow_type_flag;
struct avf_fdir_conf *rcfg;
+ struct virtchnl_rss_cfg *rss_cfg;
} avf_flow_entry_t;
typedef struct
@@ -291,6 +295,7 @@ typedef struct
u32 calling_process_index;
u8 eth_addr[6];
int is_add, is_enable;
+ enum virthnl_adv_ops vc_op;
/* below parameters are used for 'program flow' event */
u8 *rule;
@@ -349,7 +354,8 @@ extern vlib_node_registration_t avf_input_node;
extern vlib_node_registration_t avf_process_node;
extern vnet_device_class_t avf_device_class;
-clib_error_t *avf_program_flow (u32 dev_instance, int is_add, u8 *rule,
+clib_error_t *avf_program_flow (u32 dev_instance, int is_add,
+ enum virthnl_adv_ops vc_op, u8 *rule,
u32 rule_len, u8 *program_status,
u32 status_len);
@@ -422,7 +428,7 @@ avf_reg_write (avf_device_t * ad, u32 addr, u32 val)
{
if (ad->flags & AVF_DEVICE_F_ELOG)
avf_elog_reg (ad, addr, val, 0);
- *(volatile u32 *) ((u8 *) ad->bar0 + addr) = val;
+ __atomic_store_n ((u32 *) ((u8 *) ad->bar0 + addr), val, __ATOMIC_RELEASE);
}
static inline u32
diff --git a/src/plugins/avf/avf_advanced_flow.h b/src/plugins/avf/avf_advanced_flow.h
index 42288b7163b..685147a5ed4 100644
--- a/src/plugins/avf/avf_advanced_flow.h
+++ b/src/plugins/avf/avf_advanced_flow.h
@@ -45,6 +45,7 @@
#define AVF_ETHER_TYPE_IPV6 0x86DD /**< IPv6 Protocol. */
#define VIRTCHNL_MAX_NUM_PROTO_HDRS 32
+#define VIRTCHNL_MAX_SIZE_GEN_PACKET 1024
#define PROTO_HDR_SHIFT 5
#define PROTO_HDR_FIELD_START(proto_hdr_type) \
(proto_hdr_type << PROTO_HDR_SHIFT)
@@ -177,6 +178,82 @@
#define AVF_INSET_PFCP_S_FIELD (AVF_PROT_PFCP | AVF_PFCP_S_FIELD)
#define AVF_INSET_PFCP_SEID (AVF_PROT_PFCP | AVF_PFCP_S_FIELD | AVF_PFCP_SEID)
+#define AVF_ETH_RSS_IPV4 BIT_ULL (2)
+#define AVF_ETH_RSS_FRAG_IPV4 BIT_ULL (3)
+#define AVF_ETH_RSS_NONFRAG_IPV4_TCP BIT_ULL (4)
+#define AVF_ETH_RSS_NONFRAG_IPV4_UDP BIT_ULL (5)
+#define AVF_ETH_RSS_NONFRAG_IPV4_SCTP BIT_ULL (6)
+#define AVF_ETH_RSS_NONFRAG_IPV4_OTHER BIT_ULL (7)
+#define AVF_ETH_RSS_IPV6 BIT_ULL (8)
+#define AVF_ETH_RSS_FRAG_IPV6 BIT_ULL (9)
+#define AVF_ETH_RSS_NONFRAG_IPV6_TCP BIT_ULL (10)
+#define AVF_ETH_RSS_NONFRAG_IPV6_UDP BIT_ULL (11)
+#define AVF_ETH_RSS_NONFRAG_IPV6_SCTP BIT_ULL (12)
+#define AVF_ETH_RSS_NONFRAG_IPV6_OTHER BIT_ULL (13)
+#define AVF_ETH_RSS_L2_PAYLOAD BIT_ULL (14)
+#define AVF_ETH_RSS_IPV6_EX BIT_ULL (15)
+#define AVF_ETH_RSS_IPV6_TCP_EX BIT_ULL (16)
+#define AVF_ETH_RSS_IPV6_UDP_EX BIT_ULL (17)
+#define AVF_ETH_RSS_PORT BIT_ULL (18)
+#define AVF_ETH_RSS_VXLAN BIT_ULL (19)
+#define AVF_ETH_RSS_GENEVE BIT_ULL (20)
+#define AVF_ETH_RSS_NVGRE BIT_ULL (21)
+#define AVF_ETH_RSS_GTPU BIT_ULL (23)
+#define AVF_ETH_RSS_ETH BIT_ULL (24)
+#define AVF_ETH_RSS_S_VLAN BIT_ULL (25)
+#define AVF_ETH_RSS_C_VLAN BIT_ULL (26)
+#define AVF_ETH_RSS_ESP BIT_ULL (27)
+#define AVF_ETH_RSS_AH BIT_ULL (28)
+#define AVF_ETH_RSS_L2TPV3 BIT_ULL (29)
+#define AVF_ETH_RSS_PFCP BIT_ULL (30)
+#define AVF_ETH_RSS_PPPOE BIT_ULL (31)
+#define AVF_ETH_RSS_ECPRI BIT_ULL (32)
+#define AVF_ETH_RSS_MPLS BIT_ULL (33)
+#define AVF_ETH_RSS_IPV4_CHKSUM BIT_ULL (34)
+#define AVF_ETH_RSS_L4_CHKSUM BIT_ULL (35)
+#define AVF_ETH_RSS_L2TPV2 BIT_ULL (36)
+#define AVF_ETH_RSS_L3_SRC_ONLY BIT_ULL (63)
+#define AVF_ETH_RSS_L3_DST_ONLY BIT_ULL (62)
+#define AVF_ETH_RSS_L4_SRC_ONLY BIT_ULL (61)
+#define AVF_ETH_RSS_L4_DST_ONLY BIT_ULL (60)
+#define AVF_ETH_RSS_L2_SRC_ONLY BIT_ULL (59)
+#define AVF_ETH_RSS_L2_DST_ONLY BIT_ULL (58)
+#define AVF_ETH_RSS_L3_PRE32 BIT_ULL (57)
+#define AVF_ETH_RSS_L3_PRE40 BIT_ULL (56)
+#define AVF_ETH_RSS_L3_PRE48 BIT_ULL (55)
+#define AVF_ETH_RSS_L3_PRE56 BIT_ULL (54)
+#define AVF_ETH_RSS_L3_PRE64 BIT_ULL (53)
+#define AVF_ETH_RSS_L3_PRE96 BIT_ULL (52)
+
+#define foreach_avf_rss_hf \
+ _ (0, AVF_ETH_RSS_FRAG_IPV4, "ipv4-frag") \
+ _ (1, AVF_ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \
+ _ (2, AVF_ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \
+ _ (3, AVF_ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \
+ _ (4, AVF_ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \
+ _ (5, AVF_ETH_RSS_IPV4, "ipv4") \
+ _ (6, AVF_ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \
+ _ (7, AVF_ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex") \
+ _ (8, AVF_ETH_RSS_FRAG_IPV6, "ipv6-frag") \
+ _ (9, AVF_ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \
+ _ (10, AVF_ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \
+ _ (11, AVF_ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \
+ _ (12, AVF_ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \
+ _ (13, AVF_ETH_RSS_IPV6_EX, "ipv6-ex") \
+ _ (14, AVF_ETH_RSS_IPV6, "ipv6") \
+ _ (15, AVF_ETH_RSS_L2_PAYLOAD, "l2-payload") \
+ _ (16, AVF_ETH_RSS_PORT, "port") \
+ _ (17, AVF_ETH_RSS_VXLAN, "vxlan") \
+ _ (18, AVF_ETH_RSS_GENEVE, "geneve") \
+ _ (19, AVF_ETH_RSS_NVGRE, "nvgre") \
+ _ (20, AVF_ETH_RSS_GTPU, "gtpu") \
+ _ (21, AVF_ETH_RSS_ESP, "esp") \
+ _ (22, AVF_ETH_RSS_L2TPV3, "l2tpv3") \
+ _ (60, AVF_ETH_RSS_L4_DST_ONLY, "l4-dst-only") \
+ _ (61, AVF_ETH_RSS_L4_SRC_ONLY, "l4-src-only") \
+ _ (62, AVF_ETH_RSS_L3_DST_ONLY, "l3-dst-only") \
+ _ (63, AVF_ETH_RSS_L3_SRC_ONLY, "l3-src-only")
+
/* Protocol header type within a packet segment. A segment consists of one or
* more protocol headers that make up a logical group of protocol headers. Each
* logical group of protocol headers encapsulates or is encapsulated using/by
@@ -202,6 +279,17 @@ enum virtchnl_proto_hdr_type
VIRTCHNL_PROTO_HDR_ESP,
VIRTCHNL_PROTO_HDR_AH,
VIRTCHNL_PROTO_HDR_PFCP,
+ VIRTCHNL_PROTO_HDR_GTPC,
+ VIRTCHNL_PROTO_HDR_ECPRI,
+ VIRTCHNL_PROTO_HDR_L2TPV2,
+ VIRTCHNL_PROTO_HDR_PPP,
+ /* IPv4 and IPv6 Fragment header types are only associated to
+ * VIRTCHNL_PROTO_HDR_IPV4 and VIRTCHNL_PROTO_HDR_IPV6 respectively,
+ * cannot be used independently.
+ */
+ VIRTCHNL_PROTO_HDR_IPV4_FRAG,
+ VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG,
+ VIRTCHNL_PROTO_HDR_GRE,
};
/* Protocol header field within a protocol header. */
@@ -224,6 +312,7 @@ enum virtchnl_proto_hdr_field
VIRTCHNL_PROTO_HDR_IPV4_DSCP,
VIRTCHNL_PROTO_HDR_IPV4_TTL,
VIRTCHNL_PROTO_HDR_IPV4_PROT,
+ VIRTCHNL_PROTO_HDR_IPV4_CHKSUM,
/* IPV6 */
VIRTCHNL_PROTO_HDR_IPV6_SRC =
PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_IPV6),
@@ -231,18 +320,34 @@ enum virtchnl_proto_hdr_field
VIRTCHNL_PROTO_HDR_IPV6_TC,
VIRTCHNL_PROTO_HDR_IPV6_HOP_LIMIT,
VIRTCHNL_PROTO_HDR_IPV6_PROT,
+ /* IPV6 Prefix */
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX32_SRC,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX32_DST,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX40_SRC,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX40_DST,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX48_SRC,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX48_DST,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX56_SRC,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX56_DST,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_SRC,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX64_DST,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX96_SRC,
+ VIRTCHNL_PROTO_HDR_IPV6_PREFIX96_DST,
/* TCP */
VIRTCHNL_PROTO_HDR_TCP_SRC_PORT =
PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_TCP),
VIRTCHNL_PROTO_HDR_TCP_DST_PORT,
+ VIRTCHNL_PROTO_HDR_TCP_CHKSUM,
/* UDP */
VIRTCHNL_PROTO_HDR_UDP_SRC_PORT =
PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_UDP),
VIRTCHNL_PROTO_HDR_UDP_DST_PORT,
+ VIRTCHNL_PROTO_HDR_UDP_CHKSUM,
/* SCTP */
VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT =
PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_SCTP),
VIRTCHNL_PROTO_HDR_SCTP_DST_PORT,
+ VIRTCHNL_PROTO_HDR_SCTP_CHKSUM,
/* GTPU_IP */
VIRTCHNL_PROTO_HDR_GTPU_IP_TEID =
PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_GTPU_IP),
@@ -264,6 +369,28 @@ enum virtchnl_proto_hdr_field
VIRTCHNL_PROTO_HDR_PFCP_S_FIELD =
PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_PFCP),
VIRTCHNL_PROTO_HDR_PFCP_SEID,
+ /* GTPC */
+ VIRTCHNL_PROTO_HDR_GTPC_TEID =
+ PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_GTPC),
+ /* ECPRI */
+ VIRTCHNL_PROTO_HDR_ECPRI_MSG_TYPE =
+ PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_ECPRI),
+ VIRTCHNL_PROTO_HDR_ECPRI_PC_RTC_ID,
+ /* IPv4 Dummy Fragment */
+ VIRTCHNL_PROTO_HDR_IPV4_FRAG_PKID =
+ PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_IPV4_FRAG),
+ /* IPv6 Extension Fragment */
+ VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG_PKID =
+ PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG),
+ /* GTPU_DWN/UP */
+ VIRTCHNL_PROTO_HDR_GTPU_DWN_QFI =
+ PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN),
+ VIRTCHNL_PROTO_HDR_GTPU_UP_QFI =
+ PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP),
+ /* L2TPv2 */
+ VIRTCHNL_PROTO_HDR_L2TPV2_SESS_ID =
+ PROTO_HDR_FIELD_START (VIRTCHNL_PROTO_HDR_L2TPV2),
+ VIRTCHNL_PROTO_HDR_L2TPV2_LEN_SESS_ID,
};
struct virtchnl_proto_hdr
@@ -284,14 +411,26 @@ struct virtchnl_proto_hdrs
{
u8 tunnel_level;
/**
- * specify where protocol header start from.
- * 0 - from the outer layer
- * 1 - from the first inner layer
- * 2 - from the second inner layer
+ * specify where protocol header start from. Must be 0 when sending a generic
+ * packet request. 0 - from the outer layer 1 - from the first inner layer 2
+ *- from the second inner layer
* ....
**/
- int count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */
- struct virtchnl_proto_hdr proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS];
+ int count;
+ /**
+ * the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS.
+ * Must be 0 when sending a generic packet request.
+ **/
+ union
+ {
+ struct virtchnl_proto_hdr proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS];
+ struct
+ {
+ u16 pkt_len;
+ u8 spec[VIRTCHNL_MAX_SIZE_GEN_PACKET];
+ u8 mask[VIRTCHNL_MAX_SIZE_GEN_PACKET];
+ } raw;
+ };
};
VIRTCHNL_CHECK_STRUCT_LEN (2312, virtchnl_proto_hdrs);
@@ -355,6 +494,140 @@ struct virtchnl_rss_cfg
VIRTCHNL_CHECK_STRUCT_LEN (2444, virtchnl_rss_cfg);
+struct avf_pattern_match_item
+{
+ enum avf_flow_item_type *pattern_list;
+ u64 input_set_mask;
+ void *meta;
+};
+
+enum avf_flow_item_type
+{
+ AVF_FLOW_ITEM_TYPE_END,
+ AVF_FLOW_ITEM_TYPE_VOID,
+ AVF_FLOW_ITEM_TYPE_INVERT,
+ AVF_FLOW_ITEM_TYPE_ANY,
+ AVF_FLOW_ITEM_TYPE_PORT_ID,
+ AVF_FLOW_ITEM_TYPE_RAW,
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_ICMP,
+ AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_SCTP,
+ AVF_FLOW_ITEM_TYPE_VXLAN,
+ AVF_FLOW_ITEM_TYPE_E_TAG,
+ AVF_FLOW_ITEM_TYPE_NVGRE,
+ AVF_FLOW_ITEM_TYPE_MPLS,
+ AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_FUZZY,
+ AVF_FLOW_ITEM_TYPE_GTP,
+ AVF_FLOW_ITEM_TYPE_GTPC,
+ AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_ESP,
+ AVF_FLOW_ITEM_TYPE_GENEVE,
+ AVF_FLOW_ITEM_TYPE_VXLAN_GPE,
+ AVF_FLOW_ITEM_TYPE_ARP_ETH_IPV4,
+ AVF_FLOW_ITEM_TYPE_IPV6_EXT,
+ AVF_FLOW_ITEM_TYPE_ICMP6,
+ AVF_FLOW_ITEM_TYPE_ICMP6_ND_NS,
+ AVF_FLOW_ITEM_TYPE_ICMP6_ND_NA,
+ AVF_FLOW_ITEM_TYPE_ICMP6_ND_OPT,
+ AVF_FLOW_ITEM_TYPE_ICMP6_ND_OPT_SLA_ETH,
+ AVF_FLOW_ITEM_TYPE_ICMP6_ND_OPT_TLA_ETH,
+ AVF_FLOW_ITEM_TYPE_MARK,
+ AVF_FLOW_ITEM_TYPE_META,
+ AVF_FLOW_ITEM_TYPE_GRE_KEY,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC,
+ AVF_FLOW_ITEM_TYPE_PPPOES,
+ AVF_FLOW_ITEM_TYPE_PPPOED,
+ AVF_FLOW_ITEM_TYPE_PPPOE_PROTO_ID,
+ AVF_FLOW_ITEM_TYPE_NSH,
+ AVF_FLOW_ITEM_TYPE_IGMP,
+ AVF_FLOW_ITEM_TYPE_AH,
+ AVF_FLOW_ITEM_TYPE_HIGIG2,
+ AVF_FLOW_ITEM_TYPE_TAG,
+ AVF_FLOW_ITEM_TYPE_L2TPV3OIP,
+ AVF_FLOW_ITEM_TYPE_PFCP,
+ AVF_FLOW_ITEM_TYPE_ECPRI,
+ AVF_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
+ AVF_FLOW_ITEM_TYPE_GENEVE_OPT,
+ AVF_FLOW_ITEM_TYPE_INTEGRITY,
+ AVF_FLOW_ITEM_TYPE_CONNTRACK,
+ AVF_FLOW_ITEM_TYPE_PORT_REPRESENTOR,
+ AVF_FLOW_ITEM_TYPE_REPRESENTED_PORT,
+ AVF_FLOW_ITEM_TYPE_FLEX,
+ AVF_FLOW_ITEM_TYPE_L2TPV2,
+ AVF_FLOW_ITEM_TYPE_PPP,
+ AVF_FLOW_ITEM_TYPE_GRE_OPTION,
+ AVF_FLOW_ITEM_TYPE_MACSEC,
+ AVF_FLOW_ITEM_TYPE_METER_COLOR,
+};
+
+enum avf_flow_action_type
+{
+ AVF_FLOW_ACTION_TYPE_END,
+ AVF_FLOW_ACTION_TYPE_VOID,
+ AVF_FLOW_ACTION_TYPE_PASSTHRU,
+ AVF_FLOW_ACTION_TYPE_JUMP,
+ AVF_FLOW_ACTION_TYPE_MARK,
+ AVF_FLOW_ACTION_TYPE_FLAG,
+ AVF_FLOW_ACTION_TYPE_QUEUE,
+ AVF_FLOW_ACTION_TYPE_DROP,
+ AVF_FLOW_ACTION_TYPE_COUNT,
+ AVF_FLOW_ACTION_TYPE_RSS,
+ AVF_FLOW_ACTION_TYPE_PF,
+ AVF_FLOW_ACTION_TYPE_VF,
+ AVF_FLOW_ACTION_TYPE_PORT_ID,
+ AVF_FLOW_ACTION_TYPE_METER,
+ AVF_FLOW_ACTION_TYPE_SECURITY,
+ AVF_FLOW_ACTION_TYPE_OF_DEC_NW_TTL,
+ AVF_FLOW_ACTION_TYPE_OF_POP_VLAN,
+ AVF_FLOW_ACTION_TYPE_OF_PUSH_VLAN,
+ AVF_FLOW_ACTION_TYPE_OF_SET_VLAN_VID,
+ AVF_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP,
+ AVF_FLOW_ACTION_TYPE_OF_POP_MPLS,
+ AVF_FLOW_ACTION_TYPE_OF_PUSH_MPLS,
+ AVF_FLOW_ACTION_TYPE_VXLAN_ENCAP,
+ AVF_FLOW_ACTION_TYPE_VXLAN_DECAP,
+ AVF_FLOW_ACTION_TYPE_NVGRE_ENCAP,
+ AVF_FLOW_ACTION_TYPE_NVGRE_DECAP,
+ AVF_FLOW_ACTION_TYPE_RAW_ENCAP,
+ AVF_FLOW_ACTION_TYPE_RAW_DECAP,
+ AVF_FLOW_ACTION_TYPE_SET_IPV4_SRC,
+ AVF_FLOW_ACTION_TYPE_SET_IPV4_DST,
+ AVF_FLOW_ACTION_TYPE_SET_IPV6_SRC,
+ AVF_FLOW_ACTION_TYPE_SET_IPV6_DST,
+ AVF_FLOW_ACTION_TYPE_SET_TP_SRC,
+ AVF_FLOW_ACTION_TYPE_SET_TP_DST,
+ AVF_FLOW_ACTION_TYPE_MAC_SWAP,
+ AVF_FLOW_ACTION_TYPE_DEC_TTL,
+ AVF_FLOW_ACTION_TYPE_SET_TTL,
+ AVF_FLOW_ACTION_TYPE_SET_MAC_SRC,
+ AVF_FLOW_ACTION_TYPE_SET_MAC_DST,
+ AVF_FLOW_ACTION_TYPE_INC_TCP_SEQ,
+ AVF_FLOW_ACTION_TYPE_DEC_TCP_SEQ,
+ AVF_FLOW_ACTION_TYPE_INC_TCP_ACK,
+ AVF_FLOW_ACTION_TYPE_DEC_TCP_ACK,
+ AVF_FLOW_ACTION_TYPE_SET_TAG,
+ AVF_FLOW_ACTION_TYPE_SET_META,
+ AVF_FLOW_ACTION_TYPE_SET_IPV4_DSCP,
+ AVF_FLOW_ACTION_TYPE_SET_IPV6_DSCP,
+ AVF_FLOW_ACTION_TYPE_AGE,
+ AVF_FLOW_ACTION_TYPE_SAMPLE,
+ AVF_FLOW_ACTION_TYPE_SHARED,
+ AVF_FLOW_ACTION_TYPE_MODIFY_FIELD,
+ AVF_FLOW_ACTION_TYPE_INDIRECT,
+ AVF_FLOW_ACTION_TYPE_CONNTRACK,
+ AVF_FLOW_ACTION_TYPE_METER_COLOR,
+ AVF_FLOW_ACTION_TYPE_PORT_REPRESENTOR,
+ AVF_FLOW_ACTION_TYPE_REPRESENTED_PORT,
+ AVF_FLOW_ACTION_TYPE_METER_MARK,
+ AVF_FLOW_ACTION_TYPE_SEND_TO_KERNEL,
+};
+
enum virtchnl_action
{
/* action types */
@@ -756,15 +1029,16 @@ struct avf_flow_action_mark
struct avf_flow_action
{
- enum virtchnl_action type; /**< Action type. */
+ enum avf_flow_action_type type; /**< Action type. */
const void *conf; /**< Pointer to action configuration object. */
};
struct avf_flow_item
{
- enum virtchnl_proto_hdr_type type; /**< Item type. */
+ enum avf_flow_item_type type; /**< Item type. */
const void *spec; /**< Pointer to item specification structure. */
const void *mask; /**< Bit-mask applied to spec and last. */
+ int is_generic; /* indicate if this item is for a generic flow pattern. */
};
struct avf_fdir_conf
@@ -783,18 +1057,20 @@ enum virthnl_adv_ops
VIRTCHNL_ADV_OP_ADD_FDIR_FILTER = 0,
VIRTCHNL_ADV_OP_DEL_FDIR_FILTER,
VIRTCHNL_ADV_OP_QUERY_FDIR_FILTER,
+ VIRTCHNL_ADV_OP_ADD_RSS_CFG,
+ VIRTCHNL_ADV_OP_DEL_RSS_CFG,
VIRTCHNL_ADV_OP_MAX
};
/* virtual channel op handler */
-typedef int (*avf_fdir_vc_op_t) (void *vc_hdl, enum virthnl_adv_ops vc_op,
+typedef int (*avf_flow_vc_op_t) (void *vc_hdl, enum virthnl_adv_ops vc_op,
void *in, u32 in_len, void *out, u32 out_len);
/* virtual channel context object */
-struct avf_fdir_vc_ctx
+struct avf_flow_vc_ctx
{
void *vc_hdl; /* virtual channel handler */
- avf_fdir_vc_op_t vc_op;
+ avf_flow_vc_op_t vc_op;
};
/**
@@ -955,7 +1231,7 @@ int avf_fdir_rcfg_act_mark (struct avf_fdir_conf *rcfg, const u32 mark,
* 0 = successful.
* < 0 = failure.
*/
-int avf_fdir_rcfg_validate (struct avf_fdir_vc_ctx *ctx,
+int avf_fdir_rcfg_validate (struct avf_flow_vc_ctx *ctx,
struct avf_fdir_conf *rcfg);
/**
@@ -971,7 +1247,7 @@ int avf_fdir_rcfg_validate (struct avf_fdir_vc_ctx *ctx,
* 0 = successfule.
* < 0 = failure.
*/
-int avf_fdir_rule_create (struct avf_fdir_vc_ctx *ctx,
+int avf_fdir_rule_create (struct avf_flow_vc_ctx *ctx,
struct avf_fdir_conf *rcfg);
/**
@@ -986,7 +1262,7 @@ int avf_fdir_rule_create (struct avf_fdir_vc_ctx *ctx,
* 0 = successfule.
* < 0 = failure.
*/
-int avf_fdir_rule_destroy (struct avf_fdir_vc_ctx *ctx,
+int avf_fdir_rule_destroy (struct avf_flow_vc_ctx *ctx,
struct avf_fdir_conf *rcfg);
/*
@@ -1008,6 +1284,24 @@ int avf_fdir_parse_pattern (struct avf_fdir_conf *rcfg,
struct avf_flow_error *error);
/*
+ * Parse avf patterns for generic flow and set pattern fields.
+ *
+ * @param rcfg
+ * flow config
+ * @param avf_items
+ * pattern items
+ * @param error
+ * save error cause
+ *
+ * @return
+ * 0 = successful.
+ * < 0 = failure
+ */
+int avf_fdir_parse_generic_pattern (struct avf_fdir_conf *rcfg,
+ struct avf_flow_item avf_items[],
+ struct avf_flow_error *error);
+
+/*
* Parse flow actions, set actions.
*
* @param actions
@@ -1025,6 +1319,92 @@ int avf_fdir_parse_action (const struct avf_flow_action actions[],
struct avf_fdir_conf *rcfg,
struct avf_flow_error *error);
+/*
+ * Parse flow patterns and rss actions, set rss config.
+ *
+ * @param avf_items
+ * flow pattern
+ * @param avf_actions
+ * flow actions
+ * @param rss_cfg
+ * rss config
+ * @param error
+ * save error cause
+ *
+ * @return
+ * 0 = successful.
+ * < 0 = failure
+ */
+int avf_rss_parse_pattern_action (struct avf_flow_item avf_items[],
+ struct avf_flow_action avf_actions[],
+ struct virtchnl_rss_cfg *rss_cfg,
+ struct avf_flow_error *error);
+
+/**
+ * Create a RSS rule cfg object.
+ *
+ * @param rss_cfg
+ * created rule cfg object.
+ * @param tunnel
+ * tunnel level where protocol header start from
+ * 0 from moster outer layer.
+ * 1 from first inner layer.
+ * 2 form second inner layer.
+ * Must be 0 for generic flow.
+ *
+ * @return
+ * 0 = successful.
+ * < 0 = failure.
+ */
+int avf_rss_cfg_create (struct virtchnl_rss_cfg **rss_cfg, int tunnel_level);
+
+int avf_rss_rcfg_destroy (struct virtchnl_rss_cfg *rss_cfg);
+
+/**
+ * Create a RSS flow rule
+ *
+ * @param ctx
+ * virtual channel context
+ * @param rss_cfg
+ * rule cfg object.
+ *
+ * @return
+ * 0 = successfule.
+ * < 0 = failure.
+ */
+int avf_rss_rule_create (struct avf_flow_vc_ctx *ctx,
+ struct virtchnl_rss_cfg *rss_cfg);
+
+/**
+ * Destroy a RSS flow rule
+ *
+ * @param ctx
+ * virtual channel context
+ * @param rss_cfg
+ * rule cfg object.
+ *
+ * @return
+ * 0 = successfule.
+ * < 0 = failure.
+ */
+int avf_rss_rule_destroy (struct avf_flow_vc_ctx *ctx,
+ struct virtchnl_rss_cfg *rss_cfg);
+
+/**
+ * Parse generic flow pattern to get spec and mask
+ *
+ * @param item
+ * flow item
+ * @param pkt_buf
+ * spec buffer.
+ * @param msk_buf
+ * mask buffer .
+ * @param spec_len
+ * length of spec.
+ */
+void avf_parse_generic_pattern (struct avf_flow_item *item, u8 *pkt_buf,
+ u8 *msk_buf, u16 spec_len);
+
/**
* Initialize flow error structure.
*
diff --git a/src/plugins/avf/avf_api.c b/src/plugins/avf/avf_api.c
index 883b374331f..ee39c87e666 100644
--- a/src/plugins/avf/avf_api.c
+++ b/src/plugins/avf/avf_api.c
@@ -29,6 +29,7 @@
#include <avf/avf.api_enum.h>
#include <avf/avf.api_types.h>
+#define REPLY_MSG_ID_BASE (am->msg_id_base)
#include <vlibapi/api_helper_macros.h>
static void
@@ -51,12 +52,8 @@ vl_api_avf_create_t_handler (vl_api_avf_create_t * mp)
avf_create_if (vm, &args);
rv = args.rv;
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_AVF_CREATE_REPLY + am->msg_id_base,
- ({
- rmp->sw_if_index = ntohl (args.sw_if_index);
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO2 (VL_API_AVF_CREATE_REPLY,
+ ({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
}
static void
@@ -82,7 +79,7 @@ vl_api_avf_delete_t_handler (vl_api_avf_delete_t * mp)
AVF_PROCESS_EVENT_DELETE_IF, hw->dev_instance);
reply:
- REPLY_MACRO (VL_API_AVF_DELETE_REPLY + am->msg_id_base);
+ REPLY_MACRO (VL_API_AVF_DELETE_REPLY);
}
/* set tup the API message handling tables */
@@ -96,7 +93,7 @@ avf_plugin_api_hookup (vlib_main_t * vm)
/* ask for a correctly-sized block of API message decode slots */
avm->msg_id_base = setup_message_id_table ();
- am->is_mp_safe[avm->msg_id_base + VL_API_AVF_DELETE] = 1;
+ vl_api_set_msg_thread_safe (am, avm->msg_id_base + VL_API_AVF_DELETE, 1);
return 0;
}
diff --git a/src/plugins/avf/avf_fdir_lib.c b/src/plugins/avf/avf_fdir_lib.c
index f38614e87ec..24b796dc91d 100644
--- a/src/plugins/avf/avf_fdir_lib.c
+++ b/src/plugins/avf/avf_fdir_lib.c
@@ -28,7 +28,7 @@
static inline int
fls_u32 (u32 x)
{
- return (x == 0) ? 0 : 32 - count_leading_zeros (x);
+ return (x == 0) ? 0 : 64 - count_leading_zeros (x);
}
static inline int
@@ -100,7 +100,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
const struct avf_flow_eth_hdr *eth_spec, *eth_mask;
struct virtchnl_proto_hdr *hdr;
- enum virtchnl_proto_hdr_type type;
+ enum avf_flow_item_type type;
u16 ether_type;
int ret = 0;
@@ -112,7 +112,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
switch (type)
{
- case VIRTCHNL_PROTO_HDR_ETH:
+ case AVF_FLOW_ITEM_TYPE_ETH:
eth_spec = item->spec;
eth_mask = item->mask;
@@ -160,7 +160,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
}
break;
- case VIRTCHNL_PROTO_HDR_IPV4:
+ case AVF_FLOW_ITEM_TYPE_IPV4:
ipv4_spec = item->spec;
ipv4_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_IPV4;
@@ -211,7 +211,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
}
break;
- case VIRTCHNL_PROTO_HDR_IPV6:
+ case AVF_FLOW_ITEM_TYPE_IPV6:
ipv6_spec = item->spec;
ipv6_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_IPV6;
@@ -257,14 +257,14 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
{
rcfg->input_set |= AVF_INSET_IPV6_DST;
VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT (hdr, IPV6, DST);
-
- clib_memcpy (hdr->buffer, ipv6_spec, sizeof (*ipv6_spec));
}
+
+ clib_memcpy (hdr->buffer, ipv6_spec, sizeof (*ipv6_spec));
}
break;
- case VIRTCHNL_PROTO_HDR_UDP:
+ case AVF_FLOW_ITEM_TYPE_UDP:
udp_spec = item->spec;
udp_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_UDP;
@@ -295,7 +295,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
}
break;
- case VIRTCHNL_PROTO_HDR_TCP:
+ case AVF_FLOW_ITEM_TYPE_TCP:
tcp_spec = item->spec;
tcp_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_TCP;
@@ -329,7 +329,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
break;
- case VIRTCHNL_PROTO_HDR_SCTP:
+ case AVF_FLOW_ITEM_TYPE_SCTP:
sctp_spec = item->spec;
sctp_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_SCTP;
@@ -360,7 +360,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
}
break;
- case VIRTCHNL_PROTO_HDR_GTPU_IP:
+ case AVF_FLOW_ITEM_TYPE_GTPU:
gtp_spec = item->spec;
gtp_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_GTPU_IP;
@@ -387,7 +387,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
break;
- case VIRTCHNL_PROTO_HDR_GTPU_EH:
+ case AVF_FLOW_ITEM_TYPE_GTP_PSC:
gtp_psc_spec = item->spec;
gtp_psc_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_GTPU_EH;
@@ -405,7 +405,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
break;
- case VIRTCHNL_PROTO_HDR_L2TPV3:
+ case AVF_FLOW_ITEM_TYPE_L2TPV3OIP:
l2tpv3oip_spec = item->spec;
l2tpv3oip_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_L2TPV3;
@@ -422,7 +422,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
}
break;
- case VIRTCHNL_PROTO_HDR_ESP:
+ case AVF_FLOW_ITEM_TYPE_ESP:
esp_spec = item->spec;
esp_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_ESP;
@@ -439,7 +439,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
}
break;
- case VIRTCHNL_PROTO_HDR_AH:
+ case AVF_FLOW_ITEM_TYPE_AH:
ah_spec = item->spec;
ah_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_AH;
@@ -456,7 +456,7 @@ avf_fdir_rcfg_set_field (struct avf_fdir_conf *rcfg, int layer,
}
break;
- case VIRTCHNL_PROTO_HDR_PFCP:
+ case AVF_FLOW_ITEM_TYPE_PFCP:
pfcp_spec = item->spec;
pfcp_mask = item->mask;
hdr->type = VIRTCHNL_PROTO_HDR_PFCP;
@@ -591,7 +591,7 @@ avf_fdir_rcfg_act_mark (struct avf_fdir_conf *rcfg, const u32 mark,
}
int
-avf_fdir_rcfg_validate (struct avf_fdir_vc_ctx *ctx,
+avf_fdir_rcfg_validate (struct avf_flow_vc_ctx *ctx,
struct avf_fdir_conf *rcfg)
{
int ret;
@@ -617,7 +617,7 @@ avf_fdir_rcfg_validate (struct avf_fdir_vc_ctx *ctx,
}
int
-avf_fdir_rule_create (struct avf_fdir_vc_ctx *ctx, struct avf_fdir_conf *rcfg)
+avf_fdir_rule_create (struct avf_flow_vc_ctx *ctx, struct avf_fdir_conf *rcfg)
{
int ret;
rcfg->add_fltr.vsi_id = rcfg->vsi;
@@ -644,7 +644,7 @@ avf_fdir_rule_create (struct avf_fdir_vc_ctx *ctx, struct avf_fdir_conf *rcfg)
}
int
-avf_fdir_rule_destroy (struct avf_fdir_vc_ctx *ctx, struct avf_fdir_conf *rcfg)
+avf_fdir_rule_destroy (struct avf_flow_vc_ctx *ctx, struct avf_fdir_conf *rcfg)
{
int ret;
struct virtchnl_fdir_del fdir_ret;
@@ -683,18 +683,18 @@ avf_fdir_parse_action (const struct avf_flow_action actions[],
struct virtchnl_fdir_rule *rule_cfg = &rcfg->add_fltr.rule_cfg;
- for (; actions->type != VIRTCHNL_ACTION_NONE; actions++, act_idx++)
+ for (; actions->type != AVF_FLOW_ACTION_TYPE_END; actions++, act_idx++)
{
switch (actions->type)
{
- case VIRTCHNL_ACTION_PASSTHRU:
+ case AVF_FLOW_ACTION_TYPE_PASSTHRU:
dest_num++;
filter_action = &rule_cfg->action_set.actions[act_idx];
filter_action->type = VIRTCHNL_ACTION_PASSTHRU;
rule_cfg->action_set.count++;
break;
- case VIRTCHNL_ACTION_DROP:
+ case AVF_FLOW_ACTION_TYPE_DROP:
dest_num++;
ret = avf_fdir_rcfg_act_drop (rcfg, act_idx);
if (ret)
@@ -703,7 +703,7 @@ avf_fdir_parse_action (const struct avf_flow_action actions[],
rule_cfg->action_set.count++;
break;
- case VIRTCHNL_ACTION_QUEUE:
+ case AVF_FLOW_ACTION_TYPE_QUEUE:
dest_num++;
act_q = actions->conf;
@@ -722,7 +722,7 @@ avf_fdir_parse_action (const struct avf_flow_action actions[],
rule_cfg->action_set.count++;
break;
- case VIRTCHNL_ACTION_Q_REGION:
+ case AVF_FLOW_ACTION_TYPE_RSS:
dest_num++;
filter_action = &rule_cfg->action_set.actions[act_idx];
ret = avf_fdir_parse_action_qregion (rcfg, actions, act_idx, error);
@@ -732,7 +732,7 @@ avf_fdir_parse_action (const struct avf_flow_action actions[],
rule_cfg->action_set.count++;
break;
- case VIRTCHNL_ACTION_MARK:
+ case AVF_FLOW_ACTION_TYPE_MARK:
mark_num++;
act_msk = actions->conf;
rcfg->mark_flag = 1;
@@ -786,6 +786,36 @@ avf_fdir_parse_action (const struct avf_flow_action actions[],
}
int
+avf_fdir_parse_generic_pattern (struct avf_fdir_conf *rcfg,
+ struct avf_flow_item avf_items[],
+ struct avf_flow_error *error)
+{
+ struct avf_flow_item *item = avf_items;
+ u8 *pkt_buf, *msk_buf;
+ u16 spec_len, pkt_len;
+
+ spec_len = clib_strnlen (item->spec, VIRTCHNL_MAX_SIZE_GEN_PACKET);
+ pkt_len = spec_len / 2;
+
+ pkt_buf = clib_mem_alloc (pkt_len);
+ msk_buf = clib_mem_alloc (pkt_len);
+
+ avf_parse_generic_pattern (item, pkt_buf, msk_buf, spec_len);
+
+ clib_memcpy (rcfg->add_fltr.rule_cfg.proto_hdrs.raw.spec, pkt_buf, pkt_len);
+ clib_memcpy (rcfg->add_fltr.rule_cfg.proto_hdrs.raw.mask, msk_buf, pkt_len);
+
+ rcfg->add_fltr.rule_cfg.proto_hdrs.count = 0;
+ rcfg->add_fltr.rule_cfg.proto_hdrs.tunnel_level = 0;
+ rcfg->add_fltr.rule_cfg.proto_hdrs.raw.pkt_len = pkt_len;
+
+ clib_mem_free (pkt_buf);
+ clib_mem_free (msk_buf);
+
+ return 0;
+}
+
+int
avf_fdir_parse_pattern (struct avf_fdir_conf *rcfg,
struct avf_flow_item avf_items[],
struct avf_flow_error *error)
@@ -794,7 +824,7 @@ avf_fdir_parse_pattern (struct avf_fdir_conf *rcfg,
int ret = 0;
struct avf_flow_item *item;
- for (item = avf_items; item->type != VIRTCHNL_PROTO_HDR_NONE; item++)
+ for (item = avf_items; item->type != AVF_FLOW_ITEM_TYPE_END; item++)
{
ret = avf_fdir_rcfg_set_field (rcfg, layer, item, error);
if (ret)
diff --git a/src/plugins/avf/avf_rss_lib.c b/src/plugins/avf/avf_rss_lib.c
new file mode 100644
index 00000000000..23b0b59b7cc
--- /dev/null
+++ b/src/plugins/avf/avf_rss_lib.c
@@ -0,0 +1,2690 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vppinfra/mem.h>
+#include "avf_advanced_flow.h"
+
+#define AVF_PHINT_NONE 0
+#define AVF_PHINT_GTPU BIT_ULL (0)
+#define AVF_PHINT_GTPU_EH BIT_ULL (1)
+#define AVF_PHINT_GTPU_EH_DWN BIT_ULL (2)
+#define AVF_PHINT_GTPU_EH_UP BIT_ULL (3)
+#define AVF_PHINT_OUTER_IPV4 BIT_ULL (4)
+#define AVF_PHINT_OUTER_IPV6 BIT_ULL (5)
+#define AVF_PHINT_GRE BIT_ULL (6)
+/* the second IP header of GTPoGRE */
+#define AVF_PHINT_MID_IPV4 BIT_ULL (7)
+#define AVF_PHINT_MID_IPV6 BIT_ULL (8)
+/* L2TPv2 */
+#define AVF_PHINT_L2TPV2 BIT_ULL (9)
+#define AVF_PHINT_L2TPV2_LEN BIT_ULL (10)
+/* Raw */
+#define AVF_PHINT_RAW BIT_ULL (11)
+
+#define AVF_PHINT_GTPU_MSK \
+ (AVF_PHINT_GTPU | AVF_PHINT_GTPU_EH | AVF_PHINT_GTPU_EH_DWN | \
+ AVF_PHINT_GTPU_EH_UP)
+
+#define AVF_PHINT_LAYERS_MSK (AVF_PHINT_OUTER_IPV4 | AVF_PHINT_OUTER_IPV6)
+
+#define AVF_GTPU_EH_DWNLINK 0
+#define AVF_GTPU_EH_UPLINK 1
+
+#define FIELD_SELECTOR(proto_hdr_field) \
+ (1UL << ((proto_hdr_field) &PROTO_HDR_FIELD_MASK))
+#define BUFF_NOUSED 0
+
+#define REFINE_PROTO_FLD(op, fld) \
+ VIRTCHNL_##op##_PROTO_HDR_FIELD (hdr, VIRTCHNL_PROTO_HDR_##fld)
+#define REPALCE_PROTO_FLD(fld_1, fld_2) \
+ do \
+ { \
+ REFINE_PROTO_FLD (DEL, fld_1); \
+ REFINE_PROTO_FLD (ADD, fld_2); \
+ } \
+ while (0)
+
+#define proto_hdr_eth \
+ { \
+ VIRTCHNL_PROTO_HDR_ETH, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_ETH_SRC) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_ETH_DST), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_svlan \
+ { \
+ VIRTCHNL_PROTO_HDR_S_VLAN, FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_S_VLAN_ID), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_cvlan \
+ { \
+ VIRTCHNL_PROTO_HDR_C_VLAN, FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_C_VLAN_ID), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_ipv4 \
+ { \
+ VIRTCHNL_PROTO_HDR_IPV4, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV4_SRC) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV4_DST), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_ipv4_with_prot \
+ { \
+ VIRTCHNL_PROTO_HDR_IPV4, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV4_SRC) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV4_DST) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV4_PROT), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_ipv6 \
+ { \
+ VIRTCHNL_PROTO_HDR_IPV6, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV6_SRC) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV6_DST), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_ipv6_frag \
+ { \
+ VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG_PKID), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_ipv6_with_prot \
+ { \
+ VIRTCHNL_PROTO_HDR_IPV6, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV6_SRC) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV6_DST) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_IPV6_PROT), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_udp \
+ { \
+ VIRTCHNL_PROTO_HDR_UDP, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_UDP_DST_PORT), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_tcp \
+ { \
+ VIRTCHNL_PROTO_HDR_TCP, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_TCP_DST_PORT), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_sctp \
+ { \
+ VIRTCHNL_PROTO_HDR_SCTP, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_SCTP_DST_PORT), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_esp \
+ { \
+ VIRTCHNL_PROTO_HDR_ESP, FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_ESP_SPI), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_ah \
+ { \
+ VIRTCHNL_PROTO_HDR_AH, FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_AH_SPI), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_l2tpv3 \
+ { \
+ VIRTCHNL_PROTO_HDR_L2TPV3, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_pfcp \
+ { \
+ VIRTCHNL_PROTO_HDR_PFCP, FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_PFCP_SEID), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_gtpc \
+ { \
+ VIRTCHNL_PROTO_HDR_GTPC, 0, { BUFF_NOUSED } \
+ }
+
+#define proto_hdr_ecpri \
+ { \
+ VIRTCHNL_PROTO_HDR_ECPRI, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_ECPRI_PC_RTC_ID), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_l2tpv2 \
+ { \
+ VIRTCHNL_PROTO_HDR_L2TPV2, \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_L2TPV2_SESS_ID) | \
+ FIELD_SELECTOR (VIRTCHNL_PROTO_HDR_L2TPV2_LEN_SESS_ID), \
+ { \
+ BUFF_NOUSED \
+ } \
+ }
+
+#define proto_hdr_ppp \
+ { \
+ VIRTCHNL_PROTO_HDR_PPP, 0, { BUFF_NOUSED } \
+ }
+
+#define TUNNEL_LEVEL_OUTER 0
+#define TUNNEL_LEVEL_INNER 1
+
+/* proto_hdrs template */
+struct virtchnl_proto_hdrs outer_ipv4_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 4,
+ { { proto_hdr_eth, proto_hdr_svlan, proto_hdr_cvlan, proto_hdr_ipv4 } }
+};
+
+struct virtchnl_proto_hdrs outer_ipv4_udp_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 5,
+ { { proto_hdr_eth, proto_hdr_svlan, proto_hdr_cvlan,
+ proto_hdr_ipv4_with_prot, proto_hdr_udp } }
+};
+
+struct virtchnl_proto_hdrs outer_ipv4_tcp_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 5,
+ { { proto_hdr_eth, proto_hdr_svlan, proto_hdr_cvlan,
+ proto_hdr_ipv4_with_prot, proto_hdr_tcp } }
+};
+
+struct virtchnl_proto_hdrs outer_ipv4_sctp_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 5,
+ { { proto_hdr_eth, proto_hdr_svlan, proto_hdr_cvlan, proto_hdr_ipv4,
+ proto_hdr_sctp } }
+};
+
+struct virtchnl_proto_hdrs outer_ipv6_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 4,
+ { { proto_hdr_eth, proto_hdr_svlan, proto_hdr_cvlan, proto_hdr_ipv6 } }
+};
+
+struct virtchnl_proto_hdrs outer_ipv6_frag_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 5,
+ { { proto_hdr_eth, proto_hdr_svlan, proto_hdr_cvlan, proto_hdr_ipv6,
+ proto_hdr_ipv6_frag } }
+};
+
+struct virtchnl_proto_hdrs outer_ipv6_udp_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 5,
+ { { proto_hdr_eth, proto_hdr_svlan, proto_hdr_cvlan,
+ proto_hdr_ipv6_with_prot, proto_hdr_udp } }
+};
+
+struct virtchnl_proto_hdrs outer_ipv6_tcp_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 5,
+ { { proto_hdr_eth, proto_hdr_svlan, proto_hdr_cvlan,
+ proto_hdr_ipv6_with_prot, proto_hdr_tcp } }
+};
+
+struct virtchnl_proto_hdrs outer_ipv6_sctp_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 5,
+ { { proto_hdr_eth, proto_hdr_svlan, proto_hdr_cvlan, proto_hdr_ipv6,
+ proto_hdr_sctp } }
+};
+
+struct virtchnl_proto_hdrs inner_ipv4_tmplt = { TUNNEL_LEVEL_INNER,
+ 1,
+ { { proto_hdr_ipv4 } } };
+
+struct virtchnl_proto_hdrs inner_ipv4_udp_tmplt = {
+ TUNNEL_LEVEL_INNER, 2, { { proto_hdr_ipv4_with_prot, proto_hdr_udp } }
+};
+
+struct virtchnl_proto_hdrs inner_ipv4_tcp_tmplt = {
+ TUNNEL_LEVEL_INNER, 2, { { proto_hdr_ipv4_with_prot, proto_hdr_tcp } }
+};
+
+struct virtchnl_proto_hdrs second_inner_ipv4_tmplt = {
+ 2, 1, { { proto_hdr_ipv4 } }
+};
+
+struct virtchnl_proto_hdrs second_inner_ipv4_udp_tmplt = {
+ 2, 2, { { proto_hdr_ipv4_with_prot, proto_hdr_udp } }
+};
+
+struct virtchnl_proto_hdrs second_inner_ipv4_tcp_tmplt = {
+ 2, 2, { { proto_hdr_ipv4_with_prot, proto_hdr_tcp } }
+};
+
+struct virtchnl_proto_hdrs second_inner_ipv6_tmplt = {
+ 2, 1, { { proto_hdr_ipv6 } }
+};
+
+struct virtchnl_proto_hdrs second_inner_ipv6_udp_tmplt = {
+ 2, 2, { { proto_hdr_ipv6_with_prot, proto_hdr_udp } }
+};
+
+struct virtchnl_proto_hdrs second_inner_ipv6_tcp_tmplt = {
+ 2, 2, { { proto_hdr_ipv6_with_prot, proto_hdr_tcp } }
+};
+
+struct virtchnl_proto_hdrs inner_ipv4_sctp_tmplt = {
+ TUNNEL_LEVEL_INNER, 2, { { proto_hdr_ipv4, proto_hdr_sctp } }
+};
+
+struct virtchnl_proto_hdrs inner_ipv6_tmplt = { TUNNEL_LEVEL_INNER,
+ 1,
+ { { proto_hdr_ipv6 } } };
+
+struct virtchnl_proto_hdrs inner_ipv6_udp_tmplt = {
+ TUNNEL_LEVEL_INNER, 2, { { proto_hdr_ipv6_with_prot, proto_hdr_udp } }
+};
+
+struct virtchnl_proto_hdrs inner_ipv6_tcp_tmplt = {
+ TUNNEL_LEVEL_INNER, 2, { { proto_hdr_ipv6_with_prot, proto_hdr_tcp } }
+};
+
+struct virtchnl_proto_hdrs inner_ipv6_sctp_tmplt = {
+ TUNNEL_LEVEL_INNER, 2, { { proto_hdr_ipv6, proto_hdr_sctp } }
+};
+
+struct virtchnl_proto_hdrs ipv4_esp_tmplt = {
+ TUNNEL_LEVEL_OUTER, 2, { { proto_hdr_ipv4, proto_hdr_esp } }
+};
+
+struct virtchnl_proto_hdrs ipv4_udp_esp_tmplt = {
+ TUNNEL_LEVEL_OUTER, 3, { { proto_hdr_ipv4, proto_hdr_udp, proto_hdr_esp } }
+};
+
+struct virtchnl_proto_hdrs ipv4_ah_tmplt = {
+ TUNNEL_LEVEL_OUTER, 2, { { proto_hdr_ipv4, proto_hdr_ah } }
+};
+
+struct virtchnl_proto_hdrs ipv6_esp_tmplt = {
+ TUNNEL_LEVEL_OUTER, 2, { { proto_hdr_ipv6, proto_hdr_esp } }
+};
+
+struct virtchnl_proto_hdrs ipv6_udp_esp_tmplt = {
+ TUNNEL_LEVEL_OUTER, 3, { { proto_hdr_ipv6, proto_hdr_udp, proto_hdr_esp } }
+};
+
+struct virtchnl_proto_hdrs ipv6_ah_tmplt = {
+ TUNNEL_LEVEL_OUTER, 2, { { proto_hdr_ipv6, proto_hdr_ah } }
+};
+
+struct virtchnl_proto_hdrs ipv4_l2tpv3_tmplt = {
+ TUNNEL_LEVEL_OUTER, 2, { { proto_hdr_ipv4, proto_hdr_l2tpv3 } }
+};
+
+struct virtchnl_proto_hdrs ipv6_l2tpv3_tmplt = {
+ TUNNEL_LEVEL_OUTER, 2, { { proto_hdr_ipv6, proto_hdr_l2tpv3 } }
+};
+
+struct virtchnl_proto_hdrs ipv4_pfcp_tmplt = {
+ TUNNEL_LEVEL_OUTER, 2, { { proto_hdr_ipv4, proto_hdr_pfcp } }
+};
+
+struct virtchnl_proto_hdrs ipv6_pfcp_tmplt = {
+ TUNNEL_LEVEL_OUTER, 2, { { proto_hdr_ipv6, proto_hdr_pfcp } }
+};
+
+struct virtchnl_proto_hdrs ipv4_udp_gtpc_tmplt = {
+ TUNNEL_LEVEL_OUTER, 3, { { proto_hdr_ipv4, proto_hdr_udp, proto_hdr_gtpc } }
+};
+
+struct virtchnl_proto_hdrs ipv6_udp_gtpc_tmplt = {
+ TUNNEL_LEVEL_OUTER, 3, { { proto_hdr_ipv6, proto_hdr_udp, proto_hdr_gtpc } }
+};
+
+struct virtchnl_proto_hdrs eth_ecpri_tmplt = {
+ TUNNEL_LEVEL_OUTER, 2, { { proto_hdr_eth, proto_hdr_ecpri } }
+};
+
+struct virtchnl_proto_hdrs ipv4_ecpri_tmplt = {
+ TUNNEL_LEVEL_OUTER, 3, { { proto_hdr_ipv4, proto_hdr_udp, proto_hdr_ecpri } }
+};
+
+struct virtchnl_proto_hdrs udp_l2tpv2_ppp_ipv4_tmplt = {
+ TUNNEL_LEVEL_INNER,
+ 3,
+ { { proto_hdr_l2tpv2, proto_hdr_ppp, proto_hdr_ipv4 } }
+};
+
+struct virtchnl_proto_hdrs udp_l2tpv2_ppp_ipv6_tmplt = {
+ TUNNEL_LEVEL_INNER,
+ 3,
+ { { proto_hdr_l2tpv2, proto_hdr_ppp, proto_hdr_ipv6 } }
+};
+
+struct virtchnl_proto_hdrs udp_l2tpv2_ppp_ipv4_udp_tmplt = {
+ TUNNEL_LEVEL_INNER,
+ 4,
+ { { proto_hdr_l2tpv2, proto_hdr_ppp, proto_hdr_ipv4_with_prot,
+ proto_hdr_udp } }
+};
+
+struct virtchnl_proto_hdrs udp_l2tpv2_ppp_ipv4_tcp_tmplt = {
+ TUNNEL_LEVEL_INNER,
+ 4,
+ { { proto_hdr_l2tpv2, proto_hdr_ppp, proto_hdr_ipv4_with_prot,
+ proto_hdr_tcp } }
+};
+
+struct virtchnl_proto_hdrs udp_l2tpv2_ppp_ipv6_udp_tmplt = {
+ TUNNEL_LEVEL_INNER,
+ 4,
+ { { proto_hdr_l2tpv2, proto_hdr_ppp, proto_hdr_ipv6_with_prot,
+ proto_hdr_udp } }
+};
+
+struct virtchnl_proto_hdrs udp_l2tpv2_ppp_ipv6_tcp_tmplt = {
+ TUNNEL_LEVEL_INNER,
+ 4,
+ { { proto_hdr_l2tpv2, proto_hdr_ppp, proto_hdr_ipv6_with_prot,
+ proto_hdr_tcp } }
+
+};
+
+struct virtchnl_proto_hdrs ipv4_l2tpv2_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 4,
+ { { proto_hdr_eth, proto_hdr_ipv4, proto_hdr_udp, proto_hdr_l2tpv2 } }
+};
+
+struct virtchnl_proto_hdrs ipv6_l2tpv2_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 4,
+ { { proto_hdr_eth, proto_hdr_ipv6, proto_hdr_udp, proto_hdr_l2tpv2 } }
+};
+
+struct virtchnl_proto_hdrs ipv4_l2tpv2_ppp_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 5,
+ { { proto_hdr_eth, proto_hdr_ipv4, proto_hdr_udp, proto_hdr_l2tpv2,
+ proto_hdr_ppp } }
+};
+
+struct virtchnl_proto_hdrs ipv6_l2tpv2_ppp_tmplt = {
+ TUNNEL_LEVEL_OUTER,
+ 5,
+ { { proto_hdr_eth, proto_hdr_ipv6, proto_hdr_udp, proto_hdr_l2tpv2,
+ proto_hdr_ppp } }
+};
+
+/* rss type super set */
+
+#define AVF_INSET_NONE 0ULL
+
+/* IPv4 outer */
+#define AVF_RSS_TYPE_OUTER_IPV4 \
+ (AVF_ETH_RSS_ETH | AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_FRAG_IPV4 | \
+ AVF_ETH_RSS_IPV4_CHKSUM)
+#define AVF_RSS_TYPE_OUTER_IPV4_UDP \
+ (AVF_RSS_TYPE_OUTER_IPV4 | AVF_ETH_RSS_NONFRAG_IPV4_UDP | \
+ AVF_ETH_RSS_L4_CHKSUM)
+#define AVF_RSS_TYPE_OUTER_IPV4_TCP \
+ (AVF_RSS_TYPE_OUTER_IPV4 | AVF_ETH_RSS_NONFRAG_IPV4_TCP | \
+ AVF_ETH_RSS_L4_CHKSUM)
+#define AVF_RSS_TYPE_OUTER_IPV4_SCTP \
+ (AVF_RSS_TYPE_OUTER_IPV4 | AVF_ETH_RSS_NONFRAG_IPV4_SCTP | \
+ AVF_ETH_RSS_L4_CHKSUM)
+/* IPv6 outer */
+#define AVF_RSS_TYPE_OUTER_IPV6 (AVF_ETH_RSS_ETH | AVF_ETH_RSS_IPV6)
+#define AVF_RSS_TYPE_OUTER_IPV6_FRAG \
+ (AVF_RSS_TYPE_OUTER_IPV6 | AVF_ETH_RSS_FRAG_IPV6)
+#define AVF_RSS_TYPE_OUTER_IPV6_UDP \
+ (AVF_RSS_TYPE_OUTER_IPV6 | AVF_ETH_RSS_NONFRAG_IPV6_UDP | \
+ AVF_ETH_RSS_L4_CHKSUM)
+#define AVF_RSS_TYPE_OUTER_IPV6_TCP \
+ (AVF_RSS_TYPE_OUTER_IPV6 | AVF_ETH_RSS_NONFRAG_IPV6_TCP | \
+ AVF_ETH_RSS_L4_CHKSUM)
+#define AVF_RSS_TYPE_OUTER_IPV6_SCTP \
+ (AVF_RSS_TYPE_OUTER_IPV6 | AVF_ETH_RSS_NONFRAG_IPV6_SCTP | \
+ AVF_ETH_RSS_L4_CHKSUM)
+/* VLAN IPV4 */
+#define AVF_RSS_TYPE_VLAN_IPV4 \
+ (AVF_RSS_TYPE_OUTER_IPV4 | AVF_ETH_RSS_S_VLAN | AVF_ETH_RSS_C_VLAN)
+#define AVF_RSS_TYPE_VLAN_IPV4_UDP \
+ (AVF_RSS_TYPE_OUTER_IPV4_UDP | AVF_ETH_RSS_S_VLAN | AVF_ETH_RSS_C_VLAN)
+#define AVF_RSS_TYPE_VLAN_IPV4_TCP \
+ (AVF_RSS_TYPE_OUTER_IPV4_TCP | AVF_ETH_RSS_S_VLAN | AVF_ETH_RSS_C_VLAN)
+#define AVF_RSS_TYPE_VLAN_IPV4_SCTP \
+ (AVF_RSS_TYPE_OUTER_IPV4_SCTP | AVF_ETH_RSS_S_VLAN | AVF_ETH_RSS_C_VLAN)
+/* VLAN IPv6 */
+#define AVF_RSS_TYPE_VLAN_IPV6 \
+ (AVF_RSS_TYPE_OUTER_IPV6 | AVF_ETH_RSS_S_VLAN | AVF_ETH_RSS_C_VLAN)
+#define AVF_RSS_TYPE_VLAN_IPV6_FRAG \
+ (AVF_RSS_TYPE_OUTER_IPV6_FRAG | AVF_ETH_RSS_S_VLAN | AVF_ETH_RSS_C_VLAN)
+#define AVF_RSS_TYPE_VLAN_IPV6_UDP \
+ (AVF_RSS_TYPE_OUTER_IPV6_UDP | AVF_ETH_RSS_S_VLAN | AVF_ETH_RSS_C_VLAN)
+#define AVF_RSS_TYPE_VLAN_IPV6_TCP \
+ (AVF_RSS_TYPE_OUTER_IPV6_TCP | AVF_ETH_RSS_S_VLAN | AVF_ETH_RSS_C_VLAN)
+#define AVF_RSS_TYPE_VLAN_IPV6_SCTP \
+ (AVF_RSS_TYPE_OUTER_IPV6_SCTP | AVF_ETH_RSS_S_VLAN | AVF_ETH_RSS_C_VLAN)
+/* IPv4 inner */
+#define AVF_RSS_TYPE_INNER_IPV4 AVF_ETH_RSS_IPV4
+#define AVF_RSS_TYPE_INNER_IPV4_UDP \
+ (AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_NONFRAG_IPV4_UDP)
+#define AVF_RSS_TYPE_INNER_IPV4_TCP \
+ (AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_NONFRAG_IPV4_TCP)
+#define AVF_RSS_TYPE_INNER_IPV4_SCTP \
+ (AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_NONFRAG_IPV4_SCTP)
+/* IPv6 inner */
+#define AVF_RSS_TYPE_INNER_IPV6 AVF_ETH_RSS_IPV6
+#define AVF_RSS_TYPE_INNER_IPV6_UDP \
+ (AVF_ETH_RSS_IPV6 | AVF_ETH_RSS_NONFRAG_IPV6_UDP)
+#define AVF_RSS_TYPE_INNER_IPV6_TCP \
+ (AVF_ETH_RSS_IPV6 | AVF_ETH_RSS_NONFRAG_IPV6_TCP)
+#define AVF_RSS_TYPE_INNER_IPV6_SCTP \
+ (AVF_ETH_RSS_IPV6 | AVF_ETH_RSS_NONFRAG_IPV6_SCTP)
+/* GTPU IPv4 */
+#define AVF_RSS_TYPE_GTPU_IPV4 (AVF_RSS_TYPE_INNER_IPV4 | AVF_ETH_RSS_GTPU)
+#define AVF_RSS_TYPE_GTPU_IPV4_UDP \
+ (AVF_RSS_TYPE_INNER_IPV4_UDP | AVF_ETH_RSS_GTPU)
+#define AVF_RSS_TYPE_GTPU_IPV4_TCP \
+ (AVF_RSS_TYPE_INNER_IPV4_TCP | AVF_ETH_RSS_GTPU)
+/* GTPU IPv6 */
+#define AVF_RSS_TYPE_GTPU_IPV6 (AVF_RSS_TYPE_INNER_IPV6 | AVF_ETH_RSS_GTPU)
+#define AVF_RSS_TYPE_GTPU_IPV6_UDP \
+ (AVF_RSS_TYPE_INNER_IPV6_UDP | AVF_ETH_RSS_GTPU)
+#define AVF_RSS_TYPE_GTPU_IPV6_TCP \
+ (AVF_RSS_TYPE_INNER_IPV6_TCP | AVF_ETH_RSS_GTPU)
+/* ESP, AH, L2TPV3 and PFCP */
+#define AVF_RSS_TYPE_IPV4_ESP (AVF_ETH_RSS_ESP | AVF_ETH_RSS_IPV4)
+#define AVF_RSS_TYPE_IPV4_AH (AVF_ETH_RSS_AH | AVF_ETH_RSS_IPV4)
+#define AVF_RSS_TYPE_IPV6_ESP (AVF_ETH_RSS_ESP | AVF_ETH_RSS_IPV6)
+#define AVF_RSS_TYPE_IPV6_AH (AVF_ETH_RSS_AH | AVF_ETH_RSS_IPV6)
+#define AVF_RSS_TYPE_IPV4_L2TPV3 (AVF_ETH_RSS_L2TPV3 | AVF_ETH_RSS_IPV4)
+#define AVF_RSS_TYPE_IPV6_L2TPV3 (AVF_ETH_RSS_L2TPV3 | AVF_ETH_RSS_IPV6)
+#define AVF_RSS_TYPE_IPV4_PFCP (AVF_ETH_RSS_PFCP | AVF_ETH_RSS_IPV4)
+#define AVF_RSS_TYPE_IPV6_PFCP (AVF_ETH_RSS_PFCP | AVF_ETH_RSS_IPV6)
+
+/* L2TPv2 */
+#define AVF_RSS_TYPE_ETH_L2TPV2 (AVF_ETH_RSS_ETH | AVF_ETH_RSS_L2TPV2)
+
+#define VALID_RSS_IPV4_L4 \
+ (AVF_ETH_RSS_NONFRAG_IPV4_UDP | AVF_ETH_RSS_NONFRAG_IPV4_TCP | \
+ AVF_ETH_RSS_NONFRAG_IPV4_SCTP)
+
+#define VALID_RSS_IPV6_L4 \
+ (AVF_ETH_RSS_NONFRAG_IPV6_UDP | AVF_ETH_RSS_NONFRAG_IPV6_TCP | \
+ AVF_ETH_RSS_NONFRAG_IPV6_SCTP)
+
+#define VALID_RSS_IPV4 \
+ (AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_FRAG_IPV4 | VALID_RSS_IPV4_L4)
+#define VALID_RSS_IPV6 \
+ (AVF_ETH_RSS_IPV6 | AVF_ETH_RSS_FRAG_IPV6 | VALID_RSS_IPV6_L4)
+#define VALID_RSS_L3 (VALID_RSS_IPV4 | VALID_RSS_IPV6)
+#define VALID_RSS_L4 (VALID_RSS_IPV4_L4 | VALID_RSS_IPV6_L4)
+
+#define VALID_RSS_ATTR \
+ (AVF_ETH_RSS_L3_SRC_ONLY | AVF_ETH_RSS_L3_DST_ONLY | \
+ AVF_ETH_RSS_L4_SRC_ONLY | AVF_ETH_RSS_L4_DST_ONLY | \
+ AVF_ETH_RSS_L2_SRC_ONLY | AVF_ETH_RSS_L2_DST_ONLY | AVF_ETH_RSS_L3_PRE64)
+
+#define INVALID_RSS_ATTR \
+ (AVF_ETH_RSS_L3_PRE32 | AVF_ETH_RSS_L3_PRE40 | AVF_ETH_RSS_L3_PRE48 | \
+ AVF_ETH_RSS_L3_PRE56 | AVF_ETH_RSS_L3_PRE96)
+
+static u64 invalid_rss_comb[] = {
+ AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_NONFRAG_IPV4_UDP,
+ AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_NONFRAG_IPV4_TCP,
+ AVF_ETH_RSS_IPV6 | AVF_ETH_RSS_NONFRAG_IPV6_UDP,
+ AVF_ETH_RSS_IPV6 | AVF_ETH_RSS_NONFRAG_IPV6_TCP,
+ AVF_ETH_RSS_L3_PRE32 | AVF_ETH_RSS_L3_PRE40 | AVF_ETH_RSS_L3_PRE48 |
+ AVF_ETH_RSS_L3_PRE56 | AVF_ETH_RSS_L3_PRE96
+};
+
+struct rss_attr_type
+{
+ u64 attr;
+ u64 type;
+};
+
+static struct rss_attr_type rss_attr_to_valid_type[] = {
+ { AVF_ETH_RSS_L2_SRC_ONLY | AVF_ETH_RSS_L2_DST_ONLY, AVF_ETH_RSS_ETH },
+ { AVF_ETH_RSS_L3_SRC_ONLY | AVF_ETH_RSS_L3_DST_ONLY, VALID_RSS_L3 },
+ { AVF_ETH_RSS_L4_SRC_ONLY | AVF_ETH_RSS_L4_DST_ONLY, VALID_RSS_L4 },
+ /* current ipv6 prefix only supports prefix 64 bits*/
+ { AVF_ETH_RSS_L3_PRE64, VALID_RSS_IPV6 },
+ { INVALID_RSS_ATTR, 0 }
+};
+
+/* raw */
+enum avf_flow_item_type avf_pattern_raw[] = {
+ AVF_FLOW_ITEM_TYPE_RAW,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* empty */
+enum avf_flow_item_type avf_pattern_empty[] = {
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* L2 */
+enum avf_flow_item_type avf_pattern_ethertype[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_ethertype_vlan[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_ethertype_qinq[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* ARP */
+enum avf_flow_item_type avf_pattern_eth_arp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_ARP_ETH_IPV4,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* non-tunnel IPv4 */
+enum avf_flow_item_type avf_pattern_eth_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_sctp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_SCTP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv4_sctp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_SCTP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv4_sctp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_SCTP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_ICMP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv4_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_ICMP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv4_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_ICMP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* non-tunnel IPv6 */
+enum avf_flow_item_type avf_pattern_eth_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_frag_ext[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv6_frag_ext[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv6_frag_ext[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_IPV6_FRAG_EXT, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_sctp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_SCTP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv6_sctp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_SCTP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv6_sctp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_SCTP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_icmp6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_ICMP6,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_vlan_ipv6_icmp6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_ICMP6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_qinq_ipv6_icmp6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_VLAN, AVF_FLOW_ITEM_TYPE_VLAN,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_ICMP6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPv4 GTPC */
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpc[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPC, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GTPU (EH) */
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_eh[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPv6 GTPC */
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpc[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPC, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GTPU (EH) */
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_eh[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GTPU IPv4 */
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_ipv4_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_ICMP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv4 UDP GTPU IPv4*/
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv4 UDP GTPU IPv6*/
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv6 UDP GTPU IPv4*/
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv6 UDP GTPU IPv6*/
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GRE IPv4 UDP GTPU IPv4*/
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv4 UDP GTPU IPv6*/
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GRE IPv6 UDP GTPU IPv4*/
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GRE IPv6 UDP GTPU IPv6*/
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv4 UDP GTPU EH IPv4*/
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv4 UDP GTPU IPv6*/
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv6 UDP GTPU EH IPv4*/
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv6 UDP GTPU EH IPv6*/
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GRE IPv4 UDP GTPU EH IPv4*/
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GRE IPv4 UDP GTPU EH IPv6*/
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GRE IPv6 UDP GTPU EH IPv4*/
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GRE IPv6 UDP GTPU EH IPv6*/
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_GTPU,
+ AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GTPU IPv6 */
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_ipv6_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_ICMP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GTPU IPv4 */
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_ipv4_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_ICMP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GTPU IPv6 */
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_ipv6_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_ICMP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GTPU EH IPv4 */
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_eh_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_eh_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_eh_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_eh_ipv4_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_ICMP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV4 GTPU EH IPv6 */
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_eh_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_eh_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_eh_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gtpu_eh_ipv6_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_ICMP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GTPU EH IPv4 */
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_eh_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_eh_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_eh_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_eh_ipv4_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_ICMP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* IPV6 GTPU EH IPv6 */
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_eh_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_eh_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_eh_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gtpu_eh_ipv6_icmp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_GTPU, AVF_FLOW_ITEM_TYPE_GTP_PSC, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_ICMP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* ESP */
+enum avf_flow_item_type avf_pattern_eth_ipv4_esp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_ESP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp_esp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_ESP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_esp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_ESP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp_esp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_ESP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* AH */
+enum avf_flow_item_type avf_pattern_eth_ipv4_ah[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_AH,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_ah[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_AH,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* L2TPV3 */
+enum avf_flow_item_type avf_pattern_eth_ipv4_l2tpv3[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_L2TPV3OIP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_l2tpv3[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_L2TPV3OIP,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* PFCP */
+enum avf_flow_item_type avf_pattern_eth_ipv4_pfcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_PFCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_pfcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_PFCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* ECPRI */
+enum avf_flow_item_type avf_pattern_eth_ecpri[] = {
+ AVF_FLOW_ITEM_TYPE_ETH,
+ AVF_FLOW_ITEM_TYPE_ECPRI,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_ecpri[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_ECPRI, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* GRE */
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_gre_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_gre_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_GRE,
+ AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp_l2tpv2[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp_l2tpv2_ppp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp_l2tpv2[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp_l2tpv2_ppp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+/* PPPoL2TPv2oUDP */
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV4, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv4[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv6[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv4_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv4_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV4,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv6_udp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_UDP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+enum avf_flow_item_type avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv6_tcp[] = {
+ AVF_FLOW_ITEM_TYPE_ETH, AVF_FLOW_ITEM_TYPE_IPV6, AVF_FLOW_ITEM_TYPE_UDP,
+ AVF_FLOW_ITEM_TYPE_L2TPV2, AVF_FLOW_ITEM_TYPE_PPP, AVF_FLOW_ITEM_TYPE_IPV6,
+ AVF_FLOW_ITEM_TYPE_TCP, AVF_FLOW_ITEM_TYPE_END,
+};
+
+static struct avf_pattern_match_item avf_hash_pattern_list[] = {
+ /* IPv4 */
+ { avf_pattern_raw, AVF_INSET_NONE, NULL },
+ { avf_pattern_eth_ipv4, AVF_RSS_TYPE_OUTER_IPV4, &outer_ipv4_tmplt },
+ { avf_pattern_eth_ipv4_udp, AVF_RSS_TYPE_OUTER_IPV4_UDP,
+ &outer_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_tcp, AVF_RSS_TYPE_OUTER_IPV4_TCP,
+ &outer_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv4_sctp, AVF_RSS_TYPE_OUTER_IPV4_SCTP,
+ &outer_ipv4_sctp_tmplt },
+ { avf_pattern_eth_vlan_ipv4, AVF_RSS_TYPE_VLAN_IPV4, &outer_ipv4_tmplt },
+ { avf_pattern_eth_vlan_ipv4_udp, AVF_RSS_TYPE_VLAN_IPV4_UDP,
+ &outer_ipv4_udp_tmplt },
+ { avf_pattern_eth_vlan_ipv4_tcp, AVF_RSS_TYPE_VLAN_IPV4_TCP,
+ &outer_ipv4_tcp_tmplt },
+ { avf_pattern_eth_vlan_ipv4_sctp, AVF_RSS_TYPE_VLAN_IPV4_SCTP,
+ &outer_ipv4_sctp_tmplt },
+ { avf_pattern_eth_ipv4_gtpu, AVF_ETH_RSS_IPV4, &outer_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_eh_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_eh_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_eh_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_eh_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_eh_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_eh_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &second_inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &second_inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &second_inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &second_inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &second_inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &second_inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &second_inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &second_inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &second_inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &second_inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &second_inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &second_inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &second_inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &second_inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &second_inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &second_inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &second_inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &second_inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &second_inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &second_inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &second_inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv4, AVF_RSS_TYPE_GTPU_IPV4,
+ &second_inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv4_udp, AVF_RSS_TYPE_GTPU_IPV4_UDP,
+ &second_inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv4_tcp, AVF_RSS_TYPE_GTPU_IPV4_TCP,
+ &second_inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv4_esp, AVF_RSS_TYPE_IPV4_ESP, &ipv4_esp_tmplt },
+ { avf_pattern_eth_ipv4_udp_esp, AVF_RSS_TYPE_IPV4_ESP, &ipv4_udp_esp_tmplt },
+ { avf_pattern_eth_ipv4_ah, AVF_RSS_TYPE_IPV4_AH, &ipv4_ah_tmplt },
+ { avf_pattern_eth_ipv4_l2tpv3, AVF_RSS_TYPE_IPV4_L2TPV3,
+ &ipv4_l2tpv3_tmplt },
+ { avf_pattern_eth_ipv4_pfcp, AVF_RSS_TYPE_IPV4_PFCP, &ipv4_pfcp_tmplt },
+ { avf_pattern_eth_ipv4_gtpc, AVF_ETH_RSS_IPV4, &ipv4_udp_gtpc_tmplt },
+ { avf_pattern_eth_ecpri, AVF_ETH_RSS_ECPRI, &eth_ecpri_tmplt },
+ { avf_pattern_eth_ipv4_ecpri, AVF_ETH_RSS_ECPRI, &ipv4_ecpri_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4, AVF_RSS_TYPE_INNER_IPV4,
+ &inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4, AVF_RSS_TYPE_INNER_IPV4,
+ &inner_ipv4_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_tcp, AVF_RSS_TYPE_INNER_IPV4_TCP,
+ &inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_tcp, AVF_RSS_TYPE_INNER_IPV4_TCP,
+ &inner_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_udp, AVF_RSS_TYPE_INNER_IPV4_UDP,
+ &inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_udp, AVF_RSS_TYPE_INNER_IPV4_UDP,
+ &inner_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_udp_l2tpv2, AVF_RSS_TYPE_ETH_L2TPV2,
+ &ipv4_l2tpv2_tmplt },
+ { avf_pattern_eth_ipv4_udp_l2tpv2_ppp, AVF_RSS_TYPE_ETH_L2TPV2,
+ &ipv4_l2tpv2_ppp_tmplt },
+ { avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv4, AVF_RSS_TYPE_INNER_IPV4,
+ &udp_l2tpv2_ppp_ipv4_tmplt },
+ { avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv4_udp, AVF_RSS_TYPE_INNER_IPV4_UDP,
+ &udp_l2tpv2_ppp_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv4_tcp, AVF_RSS_TYPE_INNER_IPV4_TCP,
+ &udp_l2tpv2_ppp_ipv4_tcp_tmplt },
+ { avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv4, AVF_RSS_TYPE_INNER_IPV4,
+ &udp_l2tpv2_ppp_ipv4_tmplt },
+ { avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv4_udp, AVF_RSS_TYPE_INNER_IPV4_UDP,
+ &udp_l2tpv2_ppp_ipv4_udp_tmplt },
+ { avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv4_tcp, AVF_RSS_TYPE_INNER_IPV4_TCP,
+ &udp_l2tpv2_ppp_ipv4_tcp_tmplt },
+
+ /* IPv6 */
+ { avf_pattern_eth_ipv6, AVF_RSS_TYPE_OUTER_IPV6, &outer_ipv6_tmplt },
+ { avf_pattern_eth_ipv6_frag_ext, AVF_RSS_TYPE_OUTER_IPV6_FRAG,
+ &outer_ipv6_frag_tmplt },
+ { avf_pattern_eth_ipv6_udp, AVF_RSS_TYPE_OUTER_IPV6_UDP,
+ &outer_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_tcp, AVF_RSS_TYPE_OUTER_IPV6_TCP,
+ &outer_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_sctp, AVF_RSS_TYPE_OUTER_IPV6_SCTP,
+ &outer_ipv6_sctp_tmplt },
+ { avf_pattern_eth_vlan_ipv6, AVF_RSS_TYPE_VLAN_IPV6, &outer_ipv6_tmplt },
+ { avf_pattern_eth_vlan_ipv6_frag_ext, AVF_RSS_TYPE_OUTER_IPV6_FRAG,
+ &outer_ipv6_frag_tmplt },
+ { avf_pattern_eth_vlan_ipv6_udp, AVF_RSS_TYPE_VLAN_IPV6_UDP,
+ &outer_ipv6_udp_tmplt },
+ { avf_pattern_eth_vlan_ipv6_tcp, AVF_RSS_TYPE_VLAN_IPV6_TCP,
+ &outer_ipv6_tcp_tmplt },
+ { avf_pattern_eth_vlan_ipv6_sctp, AVF_RSS_TYPE_VLAN_IPV6_SCTP,
+ &outer_ipv6_sctp_tmplt },
+ { avf_pattern_eth_ipv6_gtpu, AVF_ETH_RSS_IPV6, &outer_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_eh_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_eh_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv4_gtpu_eh_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_eh_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_eh_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_gtpu_eh_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &second_inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &second_inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &second_inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &second_inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &second_inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &second_inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &second_inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &second_inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &second_inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &second_inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &second_inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &second_inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &second_inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &second_inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv4_gtpu_eh_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &second_inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &second_inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &second_inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_gtpu_eh_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &second_inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &second_inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &second_inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv4_gtpu_eh_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &second_inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv6, AVF_RSS_TYPE_GTPU_IPV6,
+ &second_inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv6_udp, AVF_RSS_TYPE_GTPU_IPV6_UDP,
+ &second_inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_gtpu_eh_ipv6_tcp, AVF_RSS_TYPE_GTPU_IPV6_TCP,
+ &second_inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_esp, AVF_RSS_TYPE_IPV6_ESP, &ipv6_esp_tmplt },
+ { avf_pattern_eth_ipv6_udp_esp, AVF_RSS_TYPE_IPV6_ESP, &ipv6_udp_esp_tmplt },
+ { avf_pattern_eth_ipv6_ah, AVF_RSS_TYPE_IPV6_AH, &ipv6_ah_tmplt },
+ { avf_pattern_eth_ipv6_l2tpv3, AVF_RSS_TYPE_IPV6_L2TPV3,
+ &ipv6_l2tpv3_tmplt },
+ { avf_pattern_eth_ipv6_pfcp, AVF_RSS_TYPE_IPV6_PFCP, &ipv6_pfcp_tmplt },
+ { avf_pattern_eth_ipv6_gtpc, AVF_ETH_RSS_IPV6, &ipv6_udp_gtpc_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6, AVF_RSS_TYPE_INNER_IPV6,
+ &inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6, AVF_RSS_TYPE_INNER_IPV6,
+ &inner_ipv6_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_tcp, AVF_RSS_TYPE_INNER_IPV6_TCP,
+ &inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_tcp, AVF_RSS_TYPE_INNER_IPV6_TCP,
+ &inner_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv4_gre_ipv6_udp, AVF_RSS_TYPE_INNER_IPV6_UDP,
+ &inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_gre_ipv6_udp, AVF_RSS_TYPE_INNER_IPV6_UDP,
+ &inner_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_udp_l2tpv2, AVF_RSS_TYPE_ETH_L2TPV2,
+ &ipv6_l2tpv2_tmplt },
+ { avf_pattern_eth_ipv6_udp_l2tpv2_ppp, AVF_RSS_TYPE_ETH_L2TPV2,
+ &ipv6_l2tpv2_ppp_tmplt },
+ { avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv6, AVF_RSS_TYPE_INNER_IPV6,
+ &udp_l2tpv2_ppp_ipv6_tmplt },
+ { avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv6_udp, AVF_RSS_TYPE_INNER_IPV6_UDP,
+ &udp_l2tpv2_ppp_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv4_udp_l2tpv2_ppp_ipv6_tcp, AVF_RSS_TYPE_INNER_IPV6_TCP,
+ &udp_l2tpv2_ppp_ipv6_tcp_tmplt },
+ { avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv6, AVF_RSS_TYPE_INNER_IPV6,
+ &udp_l2tpv2_ppp_ipv6_tmplt },
+ { avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv6_udp, AVF_RSS_TYPE_INNER_IPV6_UDP,
+ &udp_l2tpv2_ppp_ipv6_udp_tmplt },
+ { avf_pattern_eth_ipv6_udp_l2tpv2_ppp_ipv6_tcp, AVF_RSS_TYPE_INNER_IPV6_TCP,
+ &udp_l2tpv2_ppp_ipv6_tcp_tmplt },
+
+};
+
+static inline u64
+avf_eth_rss_hf_refine (u64 rss_hf)
+{
+ if ((rss_hf & AVF_ETH_RSS_L3_SRC_ONLY) && (rss_hf & AVF_ETH_RSS_L3_DST_ONLY))
+ rss_hf &= ~(AVF_ETH_RSS_L3_SRC_ONLY | AVF_ETH_RSS_L3_DST_ONLY);
+
+ if ((rss_hf & AVF_ETH_RSS_L4_SRC_ONLY) && (rss_hf & AVF_ETH_RSS_L4_DST_ONLY))
+ rss_hf &= ~(AVF_ETH_RSS_L4_SRC_ONLY | AVF_ETH_RSS_L4_DST_ONLY);
+
+ return rss_hf;
+}
+
+static int
+avf_any_invalid_rss_type (enum avf_eth_hash_function rss_func, u64 rss_type,
+ u64 allow_rss_type)
+{
+ u32 i;
+
+ /**
+ * Check if l3/l4 SRC/DST_ONLY is set for SYMMETRIC_TOEPLITZ
+ * hash function.
+ */
+ if (rss_func == AVF_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ)
+ {
+ if (rss_type & (AVF_ETH_RSS_L3_SRC_ONLY | AVF_ETH_RSS_L3_DST_ONLY |
+ AVF_ETH_RSS_L4_SRC_ONLY | AVF_ETH_RSS_L4_DST_ONLY))
+ return 1;
+
+ if (!(rss_type &
+ (AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_IPV6 |
+ AVF_ETH_RSS_NONFRAG_IPV4_UDP | AVF_ETH_RSS_NONFRAG_IPV6_UDP |
+ AVF_ETH_RSS_NONFRAG_IPV4_TCP | AVF_ETH_RSS_NONFRAG_IPV6_TCP |
+ AVF_ETH_RSS_NONFRAG_IPV4_SCTP | AVF_ETH_RSS_NONFRAG_IPV6_SCTP)))
+ return 1;
+ }
+
+ /* check invalid combination */
+ for (i = 0; i < _vec_len (invalid_rss_comb); i++)
+ {
+ if (__builtin_popcountll (rss_type & invalid_rss_comb[i]) > 1)
+ return 1;
+ }
+
+ /* check invalid RSS attribute */
+ for (i = 0; i < _vec_len (rss_attr_to_valid_type); i++)
+ {
+ struct rss_attr_type *rat = &rss_attr_to_valid_type[i];
+
+ if (rat->attr & rss_type && !(rat->type & rss_type))
+ return 1;
+ }
+
+ /* check not allowed RSS type */
+ rss_type &= ~VALID_RSS_ATTR;
+
+ return ((rss_type & allow_rss_type) != rss_type);
+}
+
+int
+avf_rss_cfg_create (struct virtchnl_rss_cfg **rss_cfg, int tunnel_level)
+{
+ *rss_cfg = clib_mem_alloc (sizeof (**rss_cfg));
+
+ clib_memset (*rss_cfg, 0, sizeof (**rss_cfg));
+
+ (*rss_cfg)->proto_hdrs.tunnel_level = tunnel_level;
+
+ return 0;
+}
+
+int
+avf_rss_rcfg_destroy (struct virtchnl_rss_cfg *rss_cfg)
+{
+ clib_mem_free (rss_cfg);
+
+ return 0;
+}
+
+/* refine proto hdrs base on gtpu rss type */
+static void
+avf_refine_proto_hdrs_gtpu (struct virtchnl_proto_hdrs *proto_hdrs,
+ u64 rss_type)
+{
+ struct virtchnl_proto_hdr *hdr;
+ int i;
+
+ if (!(rss_type & AVF_ETH_RSS_GTPU))
+ return;
+
+ for (i = 0; i < proto_hdrs->count; i++)
+ {
+ hdr = &proto_hdrs->proto_hdr[i];
+ switch (hdr->type)
+ {
+ case VIRTCHNL_PROTO_HDR_GTPU_IP:
+ REFINE_PROTO_FLD (ADD, GTPU_IP_TEID);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void
+avf_hash_add_fragment_hdr (struct virtchnl_proto_hdrs *hdrs, int layer)
+{
+ struct virtchnl_proto_hdr *hdr1;
+ struct virtchnl_proto_hdr *hdr2;
+ int i;
+
+ if (layer < 0 || layer > hdrs->count)
+ return;
+
+ /* shift headers layer */
+ for (i = hdrs->count; i >= layer; i--)
+ {
+ hdr1 = &hdrs->proto_hdr[i];
+ hdr2 = &hdrs->proto_hdr[i - 1];
+ *hdr1 = *hdr2;
+ }
+
+ /* adding dummy fragment header */
+ hdr1 = &hdrs->proto_hdr[layer];
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr1, IPV4_FRAG);
+ hdrs->count = ++layer;
+}
+
+/* refine proto hdrs base on l2, l3, l4 rss type */
+static void
+avf_refine_proto_hdrs_l234 (struct virtchnl_proto_hdrs *proto_hdrs,
+ u64 rss_type)
+{
+ struct virtchnl_proto_hdr *hdr;
+ int i;
+
+ for (i = 0; i < proto_hdrs->count; i++)
+ {
+ hdr = &proto_hdrs->proto_hdr[i];
+ switch (hdr->type)
+ {
+ case VIRTCHNL_PROTO_HDR_ETH:
+ if (!(rss_type & AVF_ETH_RSS_ETH))
+ hdr->field_selector = 0;
+ else if (rss_type & AVF_ETH_RSS_L2_SRC_ONLY)
+ REFINE_PROTO_FLD (DEL, ETH_DST);
+ else if (rss_type & AVF_ETH_RSS_L2_DST_ONLY)
+ REFINE_PROTO_FLD (DEL, ETH_SRC);
+ break;
+ case VIRTCHNL_PROTO_HDR_IPV4:
+ if (rss_type &
+ (AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_FRAG_IPV4 |
+ AVF_ETH_RSS_NONFRAG_IPV4_UDP | AVF_ETH_RSS_NONFRAG_IPV4_TCP |
+ AVF_ETH_RSS_NONFRAG_IPV4_SCTP))
+ {
+ if (rss_type & AVF_ETH_RSS_FRAG_IPV4)
+ {
+ avf_hash_add_fragment_hdr (proto_hdrs, i + 1);
+ }
+ else if (rss_type & AVF_ETH_RSS_L3_SRC_ONLY)
+ {
+ REFINE_PROTO_FLD (DEL, IPV4_DST);
+ }
+ else if (rss_type & AVF_ETH_RSS_L3_DST_ONLY)
+ {
+ REFINE_PROTO_FLD (DEL, IPV4_SRC);
+ }
+ else if (rss_type &
+ (AVF_ETH_RSS_L4_SRC_ONLY | AVF_ETH_RSS_L4_DST_ONLY))
+ {
+ REFINE_PROTO_FLD (DEL, IPV4_DST);
+ REFINE_PROTO_FLD (DEL, IPV4_SRC);
+ }
+ }
+ else
+ {
+ hdr->field_selector = 0;
+ }
+
+ if (rss_type & AVF_ETH_RSS_IPV4_CHKSUM)
+ REFINE_PROTO_FLD (ADD, IPV4_CHKSUM);
+
+ break;
+ case VIRTCHNL_PROTO_HDR_IPV4_FRAG:
+ if (rss_type &
+ (AVF_ETH_RSS_IPV4 | AVF_ETH_RSS_FRAG_IPV4 |
+ AVF_ETH_RSS_NONFRAG_IPV4_UDP | AVF_ETH_RSS_NONFRAG_IPV4_TCP |
+ AVF_ETH_RSS_NONFRAG_IPV4_SCTP))
+ {
+ if (rss_type & AVF_ETH_RSS_FRAG_IPV4)
+ REFINE_PROTO_FLD (ADD, IPV4_FRAG_PKID);
+ }
+ else
+ {
+ hdr->field_selector = 0;
+ }
+
+ if (rss_type & AVF_ETH_RSS_IPV4_CHKSUM)
+ REFINE_PROTO_FLD (ADD, IPV4_CHKSUM);
+
+ break;
+ case VIRTCHNL_PROTO_HDR_IPV6:
+ if (rss_type &
+ (AVF_ETH_RSS_IPV6 | AVF_ETH_RSS_FRAG_IPV6 |
+ AVF_ETH_RSS_NONFRAG_IPV6_UDP | AVF_ETH_RSS_NONFRAG_IPV6_TCP |
+ AVF_ETH_RSS_NONFRAG_IPV6_SCTP))
+ {
+ if (rss_type & AVF_ETH_RSS_L3_SRC_ONLY)
+ {
+ REFINE_PROTO_FLD (DEL, IPV6_DST);
+ }
+ else if (rss_type & AVF_ETH_RSS_L3_DST_ONLY)
+ {
+ REFINE_PROTO_FLD (DEL, IPV6_SRC);
+ }
+ else if (rss_type &
+ (AVF_ETH_RSS_L4_SRC_ONLY | AVF_ETH_RSS_L4_DST_ONLY))
+ {
+ REFINE_PROTO_FLD (DEL, IPV6_DST);
+ REFINE_PROTO_FLD (DEL, IPV6_SRC);
+ }
+ }
+ else
+ {
+ hdr->field_selector = 0;
+ }
+ if (rss_type & AVF_ETH_RSS_L3_PRE64)
+ {
+ if (REFINE_PROTO_FLD (TEST, IPV6_SRC))
+ REPALCE_PROTO_FLD (IPV6_SRC, IPV6_PREFIX64_SRC);
+ if (REFINE_PROTO_FLD (TEST, IPV6_DST))
+ REPALCE_PROTO_FLD (IPV6_DST, IPV6_PREFIX64_DST);
+ }
+ break;
+ case VIRTCHNL_PROTO_HDR_IPV6_EH_FRAG:
+ if (rss_type & AVF_ETH_RSS_FRAG_IPV6)
+ REFINE_PROTO_FLD (ADD, IPV6_EH_FRAG_PKID);
+ else
+ hdr->field_selector = 0;
+
+ break;
+ case VIRTCHNL_PROTO_HDR_UDP:
+ if (rss_type &
+ (AVF_ETH_RSS_NONFRAG_IPV4_UDP | AVF_ETH_RSS_NONFRAG_IPV6_UDP))
+ {
+ if (rss_type & AVF_ETH_RSS_L4_SRC_ONLY)
+ REFINE_PROTO_FLD (DEL, UDP_DST_PORT);
+ else if (rss_type & AVF_ETH_RSS_L4_DST_ONLY)
+ REFINE_PROTO_FLD (DEL, UDP_SRC_PORT);
+ else if (rss_type &
+ (AVF_ETH_RSS_L3_SRC_ONLY | AVF_ETH_RSS_L3_DST_ONLY))
+ hdr->field_selector = 0;
+ }
+ else
+ {
+ hdr->field_selector = 0;
+ }
+
+ if (rss_type & AVF_ETH_RSS_L4_CHKSUM)
+ REFINE_PROTO_FLD (ADD, UDP_CHKSUM);
+ break;
+ case VIRTCHNL_PROTO_HDR_TCP:
+ if (rss_type &
+ (AVF_ETH_RSS_NONFRAG_IPV4_TCP | AVF_ETH_RSS_NONFRAG_IPV6_TCP))
+ {
+ if (rss_type & AVF_ETH_RSS_L4_SRC_ONLY)
+ REFINE_PROTO_FLD (DEL, TCP_DST_PORT);
+ else if (rss_type & AVF_ETH_RSS_L4_DST_ONLY)
+ REFINE_PROTO_FLD (DEL, TCP_SRC_PORT);
+ else if (rss_type &
+ (AVF_ETH_RSS_L3_SRC_ONLY | AVF_ETH_RSS_L3_DST_ONLY))
+ hdr->field_selector = 0;
+ }
+ else
+ {
+ hdr->field_selector = 0;
+ }
+
+ if (rss_type & AVF_ETH_RSS_L4_CHKSUM)
+ REFINE_PROTO_FLD (ADD, TCP_CHKSUM);
+ break;
+ case VIRTCHNL_PROTO_HDR_SCTP:
+ if (rss_type &
+ (AVF_ETH_RSS_NONFRAG_IPV4_SCTP | AVF_ETH_RSS_NONFRAG_IPV6_SCTP))
+ {
+ if (rss_type & AVF_ETH_RSS_L4_SRC_ONLY)
+ REFINE_PROTO_FLD (DEL, SCTP_DST_PORT);
+ else if (rss_type & AVF_ETH_RSS_L4_DST_ONLY)
+ REFINE_PROTO_FLD (DEL, SCTP_SRC_PORT);
+ else if (rss_type &
+ (AVF_ETH_RSS_L3_SRC_ONLY | AVF_ETH_RSS_L3_DST_ONLY))
+ hdr->field_selector = 0;
+ }
+ else
+ {
+ hdr->field_selector = 0;
+ }
+
+ if (rss_type & AVF_ETH_RSS_L4_CHKSUM)
+ REFINE_PROTO_FLD (ADD, SCTP_CHKSUM);
+ break;
+ case VIRTCHNL_PROTO_HDR_S_VLAN:
+ if (!(rss_type & AVF_ETH_RSS_S_VLAN))
+ hdr->field_selector = 0;
+ break;
+ case VIRTCHNL_PROTO_HDR_C_VLAN:
+ if (!(rss_type & AVF_ETH_RSS_C_VLAN))
+ hdr->field_selector = 0;
+ break;
+ case VIRTCHNL_PROTO_HDR_L2TPV3:
+ if (!(rss_type & AVF_ETH_RSS_L2TPV3))
+ hdr->field_selector = 0;
+ break;
+ case VIRTCHNL_PROTO_HDR_ESP:
+ if (!(rss_type & AVF_ETH_RSS_ESP))
+ hdr->field_selector = 0;
+ break;
+ case VIRTCHNL_PROTO_HDR_AH:
+ if (!(rss_type & AVF_ETH_RSS_AH))
+ hdr->field_selector = 0;
+ break;
+ case VIRTCHNL_PROTO_HDR_PFCP:
+ if (!(rss_type & AVF_ETH_RSS_PFCP))
+ hdr->field_selector = 0;
+ break;
+ case VIRTCHNL_PROTO_HDR_ECPRI:
+ if (!(rss_type & AVF_ETH_RSS_ECPRI))
+ hdr->field_selector = 0;
+ break;
+ case VIRTCHNL_PROTO_HDR_L2TPV2:
+ if (!(rss_type & AVF_ETH_RSS_L2TPV2))
+ hdr->field_selector = 0;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void
+avf_refine_proto_hdrs_by_pattern (struct virtchnl_proto_hdrs *proto_hdrs,
+ u64 phint)
+{
+ struct virtchnl_proto_hdr *hdr1;
+ struct virtchnl_proto_hdr *hdr2;
+ int i, shift_count = 1;
+ int tun_lvl = proto_hdrs->tunnel_level;
+
+ if (!(phint & AVF_PHINT_GTPU_MSK) && !(phint & AVF_PHINT_GRE))
+ return;
+
+ while (tun_lvl)
+ {
+ if (phint & AVF_PHINT_LAYERS_MSK)
+ shift_count = 2;
+
+ /* shift headers layer */
+ for (i = proto_hdrs->count - 1 + shift_count; i > shift_count - 1; i--)
+ {
+ hdr1 = &proto_hdrs->proto_hdr[i];
+ hdr2 = &proto_hdrs->proto_hdr[i - shift_count];
+ *hdr1 = *hdr2;
+ }
+
+ if (shift_count == 1)
+ {
+ /* adding tunnel header at layer 0 */
+ hdr1 = &proto_hdrs->proto_hdr[0];
+ }
+ else
+ {
+ /* adding tunnel header and outer ip header */
+ hdr1 = &proto_hdrs->proto_hdr[1];
+ hdr2 = &proto_hdrs->proto_hdr[0];
+ hdr2->field_selector = 0;
+ proto_hdrs->count++;
+ tun_lvl--;
+
+ if (tun_lvl == TUNNEL_LEVEL_OUTER)
+ {
+ if (phint & AVF_PHINT_OUTER_IPV4)
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr2, IPV4);
+ else if (phint & AVF_PHINT_OUTER_IPV6)
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr2, IPV6);
+ }
+ else if (tun_lvl == TUNNEL_LEVEL_INNER)
+ {
+ if (phint & AVF_PHINT_MID_IPV4)
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr2, IPV4);
+ else if (phint & AVF_PHINT_MID_IPV6)
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr2, IPV6);
+ }
+ }
+
+ hdr1->field_selector = 0;
+ proto_hdrs->count++;
+
+ if (phint & AVF_PHINT_GTPU_EH_DWN)
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr1, GTPU_EH_PDU_DWN);
+ else if (phint & AVF_PHINT_GTPU_EH_UP)
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr1, GTPU_EH_PDU_UP);
+ else if (phint & AVF_PHINT_GTPU_EH)
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr1, GTPU_EH);
+ else if (phint & AVF_PHINT_GTPU)
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr1, GTPU_IP);
+
+ if (phint & AVF_PHINT_GRE)
+ {
+ if (phint & AVF_PHINT_GTPU)
+ {
+ /* if GTPoGRE, add GRE header at the outer tunnel */
+ if (tun_lvl == TUNNEL_LEVEL_OUTER)
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr1, GRE);
+ }
+ else
+ {
+ VIRTCHNL_SET_PROTO_HDR_TYPE (hdr1, GRE);
+ }
+ }
+ }
+ proto_hdrs->tunnel_level = tun_lvl;
+}
+
+static void
+avf_refine_proto_hdrs (struct virtchnl_proto_hdrs *proto_hdrs, u64 rss_type,
+ u64 phint)
+{
+ avf_refine_proto_hdrs_l234 (proto_hdrs, rss_type);
+ avf_refine_proto_hdrs_by_pattern (proto_hdrs, phint);
+ avf_refine_proto_hdrs_gtpu (proto_hdrs, rss_type);
+}
+
+static int
+avf_rss_parse_action (const struct avf_flow_action actions[],
+ struct virtchnl_rss_cfg *rss_cfg,
+ struct avf_pattern_match_item *match_item, u64 phint,
+ struct avf_flow_error *error)
+{
+ const struct avf_flow_action_rss *rss;
+ const struct avf_flow_action *action;
+ u64 rss_type;
+ int ret;
+
+ for (action = actions; action->type != AVF_FLOW_ACTION_TYPE_END; action++)
+ {
+ switch (action->type)
+ {
+ case AVF_FLOW_ACTION_TYPE_RSS:
+ rss = action->conf;
+
+ if (rss->func == AVF_ETH_HASH_FUNCTION_SIMPLE_XOR)
+ {
+ rss_cfg->rss_algorithm = VIRTCHNL_RSS_ALG_XOR_ASYMMETRIC;
+ ret = avf_flow_error_set (error, AVF_FAILURE,
+ AVF_FLOW_ERROR_TYPE_ACTION, actions,
+ "simple xor is not supported.");
+ return ret;
+ }
+ else if (rss->func == AVF_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ)
+ {
+ rss_cfg->rss_algorithm = VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC;
+ }
+ else
+ {
+ rss_cfg->rss_algorithm = VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC;
+ }
+
+ if (rss->level)
+ return avf_flow_error_set (
+ error, AVF_FAILURE, AVF_FLOW_ERROR_TYPE_ACTION, actions,
+ "a nonzero RSS encapsulation level is not supported");
+
+ if (rss->key_len)
+ return avf_flow_error_set (
+ error, AVF_FAILURE, AVF_FLOW_ERROR_TYPE_ACTION, actions,
+ "a nonzero RSS key_len is not supported");
+
+ if (rss->queue_num)
+ return avf_flow_error_set (
+ error, AVF_FAILURE, AVF_FLOW_ERROR_TYPE_ACTION, actions,
+ "a non-NULL RSS queue is not supported");
+
+ if (phint == AVF_PHINT_RAW)
+ break;
+
+ rss_type = avf_eth_rss_hf_refine (rss->types);
+
+ if (avf_any_invalid_rss_type (rss->func, rss_type,
+ match_item->input_set_mask))
+ return avf_flow_error_set (error, AVF_FAILURE,
+ AVF_FLOW_ERROR_TYPE_ACTION, actions,
+ "RSS type not supported");
+
+ memcpy (&rss_cfg->proto_hdrs, match_item->meta,
+ sizeof (struct virtchnl_proto_hdrs));
+
+ avf_refine_proto_hdrs (&rss_cfg->proto_hdrs, rss_type, phint);
+
+ break;
+
+ default:
+ return avf_flow_error_set (error, AVF_FAILURE,
+ AVF_FLOW_ERROR_TYPE_ACTION, actions,
+ "Invalid action.");
+ }
+ }
+
+ return 0;
+}
+
+static int
+avf_rss_parse_generic_pattern (struct virtchnl_rss_cfg *rss_cfg,
+ struct avf_flow_item avf_items[],
+ struct avf_flow_error *error)
+{
+ struct avf_flow_item *item = avf_items;
+ u8 *pkt_buf, *msk_buf;
+ u16 spec_len, pkt_len;
+
+ spec_len = clib_strnlen (item->spec, VIRTCHNL_MAX_SIZE_GEN_PACKET);
+ pkt_len = spec_len / 2;
+
+ pkt_buf = clib_mem_alloc (pkt_len);
+ msk_buf = clib_mem_alloc (pkt_len);
+
+ avf_parse_generic_pattern (item, pkt_buf, msk_buf, spec_len);
+
+ clib_memcpy (rss_cfg->proto_hdrs.raw.spec, pkt_buf, pkt_len);
+ clib_memcpy (rss_cfg->proto_hdrs.raw.mask, msk_buf, pkt_len);
+
+ rss_cfg->proto_hdrs.count = 0;
+ rss_cfg->proto_hdrs.tunnel_level = 0;
+ rss_cfg->proto_hdrs.raw.pkt_len = pkt_len;
+
+ clib_mem_free (pkt_buf);
+ clib_mem_free (msk_buf);
+
+ return 0;
+}
+
+/* Find the first VOID or non-VOID item pointer */
+static const struct avf_flow_item *
+avf_find_first_item (const struct avf_flow_item *item, int is_void)
+{
+ int is_find;
+
+ while (item->type != AVF_FLOW_ITEM_TYPE_END)
+ {
+ if (is_void)
+ is_find = item->type == AVF_FLOW_ITEM_TYPE_VOID;
+ else
+ is_find = item->type != AVF_FLOW_ITEM_TYPE_VOID;
+ if (is_find)
+ break;
+ item++;
+ }
+ return item;
+}
+
+/* Skip all VOID items of the pattern */
+static void
+avf_pattern_skip_void_item (struct avf_flow_item *items,
+ const struct avf_flow_item *pattern)
+{
+ u32 cpy_count = 0;
+ const struct avf_flow_item *pb = pattern, *pe = pattern;
+
+ for (;;)
+ {
+ /* Find a non-void item first */
+ pb = avf_find_first_item (pb, 0);
+ if (pb->type == AVF_FLOW_ITEM_TYPE_END)
+ {
+ pe = pb;
+ break;
+ }
+
+ /* Find a void item */
+ pe = avf_find_first_item (pb + 1, 1);
+
+ cpy_count = pe - pb;
+ clib_memcpy (items, pb, sizeof (struct avf_flow_item) * cpy_count);
+
+ items += cpy_count;
+
+ if (pe->type == AVF_FLOW_ITEM_TYPE_END)
+ break;
+
+ pb = pe + 1;
+ }
+ /* Copy the END item. */
+ clib_memcpy (items, pe, sizeof (struct avf_flow_item));
+}
+
+/* Check if the pattern matches a supported item type array */
+static int
+avf_match_pattern (enum avf_flow_item_type *item_array,
+ const struct avf_flow_item *pattern)
+{
+ const struct avf_flow_item *item = pattern;
+
+ while ((*item_array == item->type) &&
+ (*item_array != AVF_FLOW_ITEM_TYPE_END))
+ {
+ item_array++;
+ item++;
+ }
+
+ return (*item_array == AVF_FLOW_ITEM_TYPE_END &&
+ item->type == AVF_FLOW_ITEM_TYPE_END);
+}
+
+static int
+avf_rss_search_pattern_match_item (const struct avf_flow_item pattern[],
+ struct avf_pattern_match_item **match_item,
+ struct avf_flow_error *error)
+{
+ u16 i = 0;
+ struct avf_pattern_match_item *array = avf_hash_pattern_list;
+ u32 array_len =
+ sizeof (avf_hash_pattern_list) / sizeof (avf_hash_pattern_list[0]);
+ /* need free by each filter */
+ struct avf_flow_item *items; /* used for pattern without VOID items */
+ u32 item_num = 0; /* non-void item number */
+
+ /* Get the non-void item number of pattern */
+ while ((pattern + i)->type != AVF_FLOW_ITEM_TYPE_END)
+ {
+ if ((pattern + i)->type != AVF_FLOW_ITEM_TYPE_VOID)
+ item_num++;
+ i++;
+ }
+ item_num++;
+
+ items = clib_mem_alloc (item_num * sizeof (struct avf_flow_item));
+ avf_pattern_skip_void_item (items, pattern);
+
+ for (i = 0; i < array_len; i++)
+ if (avf_match_pattern (array[i].pattern_list, items))
+ {
+ *match_item = &array[i];
+ clib_mem_free (items);
+ return 0;
+ }
+ avf_flow_error_set (error, AVF_FAILURE, AVF_FLOW_ERROR_TYPE_ITEM, pattern,
+ "Unsupported pattern");
+
+ *match_item = NULL;
+ clib_mem_free (items);
+ return -1;
+}
+
+static void
+avf_rss_parse_pattern (const struct avf_flow_item pattern[], u64 *phint)
+{
+ const struct avf_flow_item *item = pattern;
+ const struct avf_gtp_psc_hdr *psc;
+
+ for (item = pattern; item->type != AVF_FLOW_ITEM_TYPE_END; item++)
+ {
+
+ switch (item->type)
+ {
+ case AVF_FLOW_ITEM_TYPE_RAW:
+ *phint |= AVF_PHINT_RAW;
+ break;
+ case AVF_FLOW_ITEM_TYPE_IPV4:
+ if (!(*phint & AVF_PHINT_GTPU_MSK) && !(*phint & AVF_PHINT_GRE) &&
+ !(*phint & AVF_PHINT_L2TPV2))
+ *phint |= AVF_PHINT_OUTER_IPV4;
+ if ((*phint & AVF_PHINT_GRE) && !(*phint & AVF_PHINT_GTPU_MSK))
+ *phint |= AVF_PHINT_MID_IPV4;
+ break;
+ case AVF_FLOW_ITEM_TYPE_IPV6:
+ if (!(*phint & AVF_PHINT_GTPU_MSK) && !(*phint & AVF_PHINT_GRE) &&
+ !(*phint & AVF_PHINT_L2TPV2))
+ *phint |= AVF_PHINT_OUTER_IPV6;
+ if ((*phint & AVF_PHINT_GRE) && !(*phint & AVF_PHINT_GTPU_MSK))
+ *phint |= AVF_PHINT_MID_IPV6;
+ break;
+ case AVF_FLOW_ITEM_TYPE_GTPU:
+ *phint |= AVF_PHINT_GTPU;
+ break;
+ case AVF_FLOW_ITEM_TYPE_GTP_PSC:
+ *phint |= AVF_PHINT_GTPU_EH;
+ psc = item->spec;
+ if (!psc)
+ break;
+ else if (psc->pdu_type == AVF_GTPU_EH_UPLINK)
+ *phint |= AVF_PHINT_GTPU_EH_UP;
+ else if (psc->pdu_type == AVF_GTPU_EH_DWNLINK)
+ *phint |= AVF_PHINT_GTPU_EH_DWN;
+ break;
+ case AVF_FLOW_ITEM_TYPE_GRE:
+ *phint |= AVF_PHINT_GRE;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+int
+avf_rss_parse_pattern_action (struct avf_flow_item avf_items[],
+ struct avf_flow_action avf_actions[],
+ struct virtchnl_rss_cfg *rss_cfg,
+ struct avf_flow_error *error)
+{
+ struct avf_pattern_match_item *match_item = NULL;
+ u64 pattern_hint = 0;
+ int ret = 0;
+
+ ret = avf_rss_search_pattern_match_item (avf_items, &match_item, error);
+ if (ret)
+ return ret;
+
+ avf_rss_parse_pattern (avf_items, &pattern_hint);
+
+ if (pattern_hint == AVF_PHINT_RAW)
+ {
+ ret = avf_rss_parse_generic_pattern (rss_cfg, avf_items, error);
+ if (ret)
+ return ret;
+ }
+
+ ret = avf_rss_parse_action (avf_actions, rss_cfg, match_item, pattern_hint,
+ error);
+ return ret;
+}
+
+int
+avf_rss_rule_create (struct avf_flow_vc_ctx *ctx,
+ struct virtchnl_rss_cfg *rss_cfg)
+{
+ int ret;
+
+ ret = ctx->vc_op (ctx->vc_hdl, VIRTCHNL_ADV_OP_ADD_RSS_CFG, rss_cfg,
+ sizeof (*rss_cfg), 0, 0);
+
+ return ret;
+}
+
+int
+avf_rss_rule_destroy (struct avf_flow_vc_ctx *ctx,
+ struct virtchnl_rss_cfg *rss_cfg)
+{
+ int ret;
+
+ ret = ctx->vc_op (ctx->vc_hdl, VIRTCHNL_ADV_OP_DEL_RSS_CFG, rss_cfg,
+ sizeof (*rss_cfg), 0, 0);
+
+ return ret;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/avf/cli.c b/src/plugins/avf/cli.c
index 47b4b9236f2..391ff25567a 100644
--- a/src/plugins/avf/cli.c
+++ b/src/plugins/avf/cli.c
@@ -30,37 +30,31 @@ static clib_error_t *
avf_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- unformat_input_t _line_input, *line_input = &_line_input;
avf_create_if_args_t args;
u32 tmp;
clib_memset (&args, 0, sizeof (avf_create_if_args_t));
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "%U", unformat_vlib_pci_addr, &args.addr))
+ if (unformat (input, "%U", unformat_vlib_pci_addr, &args.addr))
;
- else if (unformat (line_input, "elog"))
+ else if (unformat (input, "elog"))
args.enable_elog = 1;
- else if (unformat (line_input, "rx-queue-size %u", &tmp))
+ else if (unformat (input, "rx-queue-size %u", &tmp))
args.rxq_size = tmp;
- else if (unformat (line_input, "tx-queue-size %u", &tmp))
+ else if (unformat (input, "tx-queue-size %u", &tmp))
args.txq_size = tmp;
- else if (unformat (line_input, "num-rx-queues %u", &tmp))
+ else if (unformat (input, "num-rx-queues %u", &tmp))
args.rxq_num = tmp;
- else if (unformat (line_input, "num-tx-queues %u", &tmp))
+ else if (unformat (input, "num-tx-queues %u", &tmp))
args.txq_num = tmp;
- else if (unformat (line_input, "name %s", &args.name))
+ else if (unformat (input, "name %s", &args.name))
;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
}
- unformat_free (line_input);
avf_create_if (vm, &args);
@@ -69,7 +63,6 @@ avf_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (avf_create_command, static) = {
.path = "create interface avf",
.short_help = "create interface avf <pci-address> "
@@ -77,33 +70,26 @@ VLIB_CLI_COMMAND (avf_create_command, static) = {
"[num-rx-queues <size>]",
.function = avf_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
avf_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- unformat_input_t _line_input, *line_input = &_line_input;
u32 sw_if_index = ~0;
vnet_hw_interface_t *hw;
vnet_main_t *vnm = vnet_get_main ();
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+ if (unformat (input, "sw_if_index %d", &sw_if_index))
;
- else if (unformat (line_input, "%U", unformat_vnet_sw_interface,
- vnm, &sw_if_index))
+ else if (unformat (input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
}
- unformat_free (line_input);
if (sw_if_index == ~0)
return clib_error_return (0,
@@ -119,7 +105,6 @@ avf_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (avf_delete_command, static) = {
.path = "delete interface avf",
.short_help = "delete interface avf "
@@ -127,41 +112,34 @@ VLIB_CLI_COMMAND (avf_delete_command, static) = {
.function = avf_delete_command_fn,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
avf_test_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- unformat_input_t _line_input, *line_input = &_line_input;
u32 sw_if_index = ~0;
vnet_hw_interface_t *hw;
avf_device_t *ad;
vnet_main_t *vnm = vnet_get_main ();
int test_irq = 0, enable_elog = 0, disable_elog = 0;
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+ if (unformat (input, "sw_if_index %d", &sw_if_index))
;
- else if (unformat (line_input, "irq"))
+ else if (unformat (input, "irq"))
test_irq = 1;
- else if (unformat (line_input, "elog-on"))
+ else if (unformat (input, "elog-on"))
enable_elog = 1;
- else if (unformat (line_input, "elog-off"))
+ else if (unformat (input, "elog-off"))
disable_elog = 1;
- else if (unformat (line_input, "%U", unformat_vnet_sw_interface,
- vnm, &sw_if_index))
+ else if (unformat (input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
}
- unformat_free (line_input);
if (sw_if_index == ~0)
return clib_error_return (0,
@@ -185,14 +163,12 @@ avf_test_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (avf_test_command, static) = {
.path = "test avf",
.short_help = "test avf [<interface> | sw_if_index <sw_idx>] [irq] "
"[elog-on] [elog-off]",
.function = avf_test_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
avf_cli_init (vlib_main_t * vm)
diff --git a/src/plugins/avf/device.c b/src/plugins/avf/device.c
index dbaf4a4a7f2..1618800c432 100644
--- a/src/plugins/avf/device.c
+++ b/src/plugins/avf/device.c
@@ -156,7 +156,6 @@ avf_aq_desc_enq (vlib_main_t * vm, avf_device_t * ad, avf_aq_desc_t * dt,
if (ad->flags & AVF_DEVICE_F_ELOG)
clib_memcpy_fast (&dc, d, sizeof (avf_aq_desc_t));
- CLIB_MEMORY_BARRIER ();
ad->atq_next_slot = (ad->atq_next_slot + 1) % AVF_MBOX_LEN;
avf_reg_write (ad, AVF_ATQT, ad->atq_next_slot);
avf_reg_flush (ad);
@@ -610,18 +609,25 @@ avf_op_config_rss_lut (vlib_main_t * vm, avf_device_t * ad)
clib_error_t *
avf_op_config_rss_key (vlib_main_t * vm, avf_device_t * ad)
{
+ /* from DPDK i40e... */
+ static uint32_t rss_key_default[] = { 0x6b793944, 0x23504cb5, 0x5bea75b6,
+ 0x309f4f12, 0x3dc0a2b8, 0x024ddcdf,
+ 0x339b8ca0, 0x4c4af64a, 0x34fac605,
+ 0x55d85839, 0x3a58997d, 0x2ec938e1,
+ 0x66031581 };
int msg_len = sizeof (virtchnl_rss_key_t) + ad->rss_key_size - 1;
- int i;
u8 msg[msg_len];
virtchnl_rss_key_t *rk;
+ if (sizeof (rss_key_default) != ad->rss_key_size)
+ return clib_error_create ("unsupported RSS key size (expected %d, got %d)",
+ sizeof (rss_key_default), ad->rss_key_size);
+
clib_memset (msg, 0, msg_len);
rk = (virtchnl_rss_key_t *) msg;
rk->vsi_id = ad->vsi_id;
rk->key_len = ad->rss_key_size;
- u32 seed = random_default_seed ();
- for (i = 0; i < ad->rss_key_size; i++)
- rk->key[i] = (u8) random_u32 (&seed);
+ memcpy_s (rk->key, rk->key_len, rss_key_default, sizeof (rss_key_default));
avf_log_debug (ad, "config_rss_key: vsi_id %u rss_key_size %u key 0x%U",
rk->vsi_id, rk->key_len, format_hex_bytes_no_wrap, rk->key,
@@ -1031,6 +1037,12 @@ avf_device_init (vlib_main_t * vm, avf_main_t * am, avf_device_t * ad,
outer = vc.offloads.stripping_support.outer & mask;
inner = vc.offloads.stripping_support.inner & mask;
+ /* Check for ability to modify the VLAN setting */
+ outer =
+ vc.offloads.stripping_support.outer & VIRTCHNL_VLAN_TOGGLE ? outer : 0;
+ inner =
+ vc.offloads.stripping_support.inner & VIRTCHNL_VLAN_TOGGLE ? inner : 0;
+
if ((outer || inner) &&
(error = avf_op_disable_vlan_stripping_v2 (vm, ad, outer, inner)))
return error;
@@ -1119,7 +1131,6 @@ avf_process_one_device (vlib_main_t * vm, avf_device_t * ad, int is_irq)
if (is_irq == 0)
avf_op_get_stats (vm, ad, &ad->eth_stats);
- /* *INDENT-OFF* */
vec_foreach (e, ad->events)
{
avf_log_debug (ad, "event: %s (%u) sev %d",
@@ -1162,8 +1173,9 @@ avf_process_one_device (vlib_main_t * vm, avf_device_t * ad, int is_irq)
flags |= (VNET_HW_INTERFACE_FLAG_FULL_DUPLEX |
VNET_HW_INTERFACE_FLAG_LINK_UP);
vnet_hw_interface_set_flags (vnm, ad->hw_if_index, flags);
- vnet_hw_interface_set_link_speed (vnm, ad->hw_if_index,
- mbps * 1000);
+ vnet_hw_interface_set_link_speed (
+ vnm, ad->hw_if_index,
+ (mbps == UINT32_MAX) ? UINT32_MAX : mbps * 1000);
ad->link_speed = mbps;
}
else if (!link_up && (ad->flags & AVF_DEVICE_F_LINK_UP) != 0)
@@ -1214,7 +1226,6 @@ avf_process_one_device (vlib_main_t * vm, avf_device_t * ad, int is_irq)
}
}
}
- /* *INDENT-ON* */
vec_reset_length (ad->events);
return;
@@ -1227,16 +1238,32 @@ error:
clib_error_t *
avf_op_program_flow (vlib_main_t *vm, avf_device_t *ad, int is_create,
- u8 *rule, u32 rule_len, u8 *program_status,
- u32 status_len)
+ enum virthnl_adv_ops vc_op, u8 *rule, u32 rule_len,
+ u8 *program_status, u32 status_len)
{
+ virtchnl_ops_t op;
+
avf_log_debug (ad, "avf_op_program_flow: vsi_id %u is_create %u", ad->vsi_id,
is_create);
- return avf_send_to_pf (vm, ad,
- is_create ? VIRTCHNL_OP_ADD_FDIR_FILTER :
- VIRTCHNL_OP_DEL_FDIR_FILTER,
- rule, rule_len, program_status, status_len);
+ switch (vc_op)
+ {
+ case VIRTCHNL_ADV_OP_ADD_FDIR_FILTER:
+ case VIRTCHNL_ADV_OP_DEL_FDIR_FILTER:
+ op =
+ is_create ? VIRTCHNL_OP_ADD_FDIR_FILTER : VIRTCHNL_OP_DEL_FDIR_FILTER;
+ break;
+ case VIRTCHNL_ADV_OP_ADD_RSS_CFG:
+ case VIRTCHNL_ADV_OP_DEL_RSS_CFG:
+ op = is_create ? VIRTCHNL_OP_ADD_RSS_CFG : VIRTCHNL_OP_DEL_RSS_CFG;
+ break;
+ default:
+ return clib_error_return (0, "invalid virtchnl opcode");
+ ;
+ }
+
+ return avf_send_to_pf (vm, ad, op, rule, rule_len, program_status,
+ status_len);
}
static void
@@ -1250,9 +1277,9 @@ avf_process_handle_request (vlib_main_t * vm, avf_process_req_t * req)
else if (req->type == AVF_PROCESS_REQ_CONFIG_PROMISC_MDDE)
req->error = avf_op_config_promisc_mode (vm, ad, req->is_enable);
else if (req->type == AVF_PROCESS_REQ_PROGRAM_FLOW)
- req->error =
- avf_op_program_flow (vm, ad, req->is_add, req->rule, req->rule_len,
- req->program_status, req->status_len);
+ req->error = avf_op_program_flow (vm, ad, req->is_add, req->vc_op,
+ req->rule, req->rule_len,
+ req->program_status, req->status_len);
else
clib_panic ("BUG: unknown avf proceess request type");
@@ -1374,7 +1401,6 @@ avf_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
/* create local list of device pointers as device pool may grow
* during suspend */
vec_reset_length (dev_pointers);
- /* *INDENT-OFF* */
pool_foreach_index (i, am->devices)
{
vec_add1 (dev_pointers, avf_get_device (i));
@@ -1384,19 +1410,16 @@ avf_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
{
avf_process_one_device (vm, dev_pointers[i], irq);
};
- /* *INDENT-ON* */
last_run_duration = vlib_time_now (vm) - last_periodic_time;
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (avf_process_node) = {
.function = avf_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "avf-process",
};
-/* *INDENT-ON* */
static void
avf_irq_0_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line)
@@ -1409,13 +1432,11 @@ avf_irq_0_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line)
if (ad->flags & AVF_DEVICE_F_ELOG)
{
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (el) =
{
.format = "avf[%d] irq 0: icr0 0x%x",
.format_args = "i4i4",
};
- /* *INDENT-ON* */
struct
{
u32 dev_instance;
@@ -1445,13 +1466,11 @@ avf_irq_n_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line)
if (ad->flags & AVF_DEVICE_F_ELOG)
{
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (el) =
{
.format = "avf[%d] irq %d: received",
.format_args = "i4i2",
};
- /* *INDENT-ON* */
struct
{
u32 dev_instance;
@@ -1497,7 +1516,6 @@ avf_delete_if (vlib_main_t * vm, avf_device_t * ad, int with_barrier)
vlib_physmem_free (vm, ad->atq_bufs);
vlib_physmem_free (vm, ad->arq_bufs);
- /* *INDENT-OFF* */
vec_foreach_index (i, ad->rxqs)
{
avf_rxq_t *rxq = vec_elt_at_index (ad->rxqs, i);
@@ -1507,10 +1525,8 @@ avf_delete_if (vlib_main_t * vm, avf_device_t * ad, int with_barrier)
rxq->n_enqueued);
vec_free (rxq->bufs);
}
- /* *INDENT-ON* */
vec_free (ad->rxqs);
- /* *INDENT-OFF* */
vec_foreach_index (i, ad->txqs)
{
avf_txq_t *txq = vec_elt_at_index (ad->txqs, i);
@@ -1530,7 +1546,6 @@ avf_delete_if (vlib_main_t * vm, avf_device_t * ad, int with_barrier)
vec_free (txq->tmp_descs);
clib_spinlock_free (&txq->lock);
}
- /* *INDENT-ON* */
vec_free (ad->txqs);
vec_free (ad->name);
@@ -1582,6 +1597,7 @@ void
avf_create_if (vlib_main_t * vm, avf_create_if_args_t * args)
{
vnet_main_t *vnm = vnet_get_main ();
+ vnet_eth_interface_registration_t eir = {};
avf_main_t *am = &avf_main;
avf_device_t *ad, **adp;
vlib_pci_dev_handle_t h;
@@ -1592,7 +1608,6 @@ avf_create_if (vlib_main_t * vm, avf_create_if_args_t * args)
if (avf_validate_queue_size (args) != 0)
return;
- /* *INDENT-OFF* */
pool_foreach (adp, am->devices) {
if ((*adp)->pci_addr.as_u32 == args->addr.as_u32)
{
@@ -1603,7 +1618,6 @@ avf_create_if (vlib_main_t * vm, avf_create_if_args_t * args)
return;
}
}
- /* *INDENT-ON* */
pool_get (am->devices, adp);
adp[0] = ad = clib_mem_alloc_aligned (sizeof (avf_device_t),
@@ -1715,29 +1729,25 @@ avf_create_if (vlib_main_t * vm, avf_create_if_args_t * args)
goto error;
/* create interface */
- error = ethernet_register_interface (vnm, avf_device_class.index,
- ad->dev_instance, ad->hwaddr,
- &ad->hw_if_index, avf_flag_change);
+ eir.dev_class_index = avf_device_class.index;
+ eir.dev_instance = ad->dev_instance;
+ eir.address = ad->hwaddr;
+ eir.cb.flag_change = avf_flag_change;
+ ad->hw_if_index = vnet_eth_register_interface (vnm, &eir);
- if (error)
- goto error;
-
- /* Indicate ability to support L3 DMAC filtering and
- * initialize interface to L3 non-promisc mode */
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, ad->hw_if_index);
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_MAC_FILTER |
- VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO;
ethernet_set_flags (vnm, ad->hw_if_index,
ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
vnet_sw_interface_t *sw = vnet_get_hw_sw_interface (vnm, ad->hw_if_index);
args->sw_if_index = ad->sw_if_index = sw->sw_if_index;
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, ad->hw_if_index);
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
vnet_hw_if_set_input_node (vnm, ad->hw_if_index, avf_input_node.index);
+ /* set hw interface caps */
+ vnet_hw_if_set_caps (vnm, ad->hw_if_index,
+ VNET_HW_IF_CAP_INT_MODE | VNET_HW_IF_CAP_MAC_FILTER |
+ VNET_HW_IF_CAP_TX_CKSUM | VNET_HW_IF_CAP_TCP_GSO);
+
for (i = 0; i < ad->n_rx_queues; i++)
{
u32 qi, fi;
@@ -1883,8 +1893,8 @@ avf_clear_hw_interface_counters (u32 instance)
}
clib_error_t *
-avf_program_flow (u32 dev_instance, int is_add, u8 *rule, u32 rule_len,
- u8 *program_status, u32 status_len)
+avf_program_flow (u32 dev_instance, int is_add, enum virthnl_adv_ops vc_op,
+ u8 *rule, u32 rule_len, u8 *program_status, u32 status_len)
{
vlib_main_t *vm = vlib_get_main ();
avf_process_req_t req;
@@ -1892,6 +1902,7 @@ avf_program_flow (u32 dev_instance, int is_add, u8 *rule, u32 rule_len,
req.dev_instance = dev_instance;
req.type = AVF_PROCESS_REQ_PROGRAM_FLOW;
req.is_add = is_add;
+ req.vc_op = vc_op;
req.rule = rule;
req.rule_len = rule_len;
req.program_status = program_status;
@@ -1900,7 +1911,6 @@ avf_program_flow (u32 dev_instance, int is_add, u8 *rule, u32 rule_len,
return avf_process_request (vm, &req);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (avf_device_class, ) = {
.name = "Adaptive Virtual Function (AVF) interface",
.clear_counters = avf_clear_hw_interface_counters,
@@ -1914,7 +1924,6 @@ VNET_DEVICE_CLASS (avf_device_class, ) = {
.tx_function_error_strings = avf_tx_func_error_strings,
.flow_ops_function = avf_flow_ops_fn,
};
-/* *INDENT-ON* */
clib_error_t *
avf_init (vlib_main_t * vm)
@@ -1928,17 +1937,4 @@ avf_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
-VLIB_INIT_FUNCTION (avf_init) =
-{
- .runs_after = VLIB_INITS ("pci_bus_init"),
-};
-/* *INDENT-OFF* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+VLIB_INIT_FUNCTION (avf_init);
diff --git a/src/plugins/avf/flow.c b/src/plugins/avf/flow.c
index e0d21cd96a2..91c5e018c71 100644
--- a/src/plugins/avf/flow.c
+++ b/src/plugins/avf/flow.c
@@ -44,6 +44,8 @@
(f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED) || \
(f->type == VNET_FLOW_TYPE_IP6_VXLAN))
+#define FLOW_IS_GENERIC_CLASS(f) (f->type == VNET_FLOW_TYPE_GENERIC)
+
/* check if flow is L3 type */
#define FLOW_IS_L3_TYPE(f) \
((f->type == VNET_FLOW_TYPE_IP4) || (f->type == VNET_FLOW_TYPE_IP6))
@@ -62,8 +64,25 @@
(f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
(f->type == VNET_FLOW_TYPE_IP4_GTPU))
+static inline void
+avf_flow_convert_rss_types (u64 type, u64 *avf_rss_type)
+{
+#define BIT_IS_SET(v, b) ((v) & (u64) 1 << (b))
+
+ *avf_rss_type = 0;
+
+#undef _
+#define _(n, f, s) \
+ if (n != -1 && BIT_IS_SET (type, n)) \
+ *avf_rss_type |= f;
+
+ foreach_avf_rss_hf
+#undef _
+ return;
+}
+
int
-avf_fdir_vc_op_callback (void *vc_hdl, enum virthnl_adv_ops vc_op, void *in,
+avf_flow_vc_op_callback (void *vc_hdl, enum virthnl_adv_ops vc_op, void *in,
u32 in_len, void *out, u32 out_len)
{
u32 dev_instance = *(u32 *) vc_hdl;
@@ -79,9 +98,11 @@ avf_fdir_vc_op_callback (void *vc_hdl, enum virthnl_adv_ops vc_op, void *in,
switch (vc_op)
{
case VIRTCHNL_ADV_OP_ADD_FDIR_FILTER:
+ case VIRTCHNL_ADV_OP_ADD_RSS_CFG:
is_add = 1;
break;
case VIRTCHNL_ADV_OP_DEL_FDIR_FILTER:
+ case VIRTCHNL_ADV_OP_DEL_RSS_CFG:
is_add = 0;
break;
default:
@@ -90,18 +111,114 @@ avf_fdir_vc_op_callback (void *vc_hdl, enum virthnl_adv_ops vc_op, void *in,
return -1;
}
- err = avf_program_flow (dev_instance, is_add, in, in_len, out, out_len);
+ err =
+ avf_program_flow (dev_instance, is_add, vc_op, in, in_len, out, out_len);
if (err != 0)
{
- avf_log_err (ad, "avf fdir program failed: %U", format_clib_error, err);
+ avf_log_err (ad, "avf flow program failed: %U", format_clib_error, err);
clib_error_free (err);
return -1;
}
- avf_log_debug (ad, "avf fdir program success");
+ avf_log_debug (ad, "avf flow program success");
return 0;
}
+static inline enum avf_eth_hash_function
+avf_flow_convert_rss_func (vnet_rss_function_t func)
+{
+ enum avf_eth_hash_function rss_func;
+
+ switch (func)
+ {
+ case VNET_RSS_FUNC_DEFAULT:
+ rss_func = AVF_ETH_HASH_FUNCTION_DEFAULT;
+ break;
+ case VNET_RSS_FUNC_TOEPLITZ:
+ rss_func = AVF_ETH_HASH_FUNCTION_TOEPLITZ;
+ break;
+ case VNET_RSS_FUNC_SIMPLE_XOR:
+ rss_func = AVF_ETH_HASH_FUNCTION_SIMPLE_XOR;
+ break;
+ case VNET_RSS_FUNC_SYMMETRIC_TOEPLITZ:
+ rss_func = AVF_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ;
+ break;
+ default:
+ rss_func = AVF_ETH_HASH_FUNCTION_MAX;
+ break;
+ }
+
+ return rss_func;
+}
+
+/** Maximum number of queue indices in struct avf_flow_action_rss. */
+#define ACTION_RSS_QUEUE_NUM 128
+
+static inline void
+avf_flow_convert_rss_queues (u32 queue_index, u32 queue_num,
+ struct avf_flow_action_rss *act_rss)
+{
+ u16 *queues = clib_mem_alloc (sizeof (*queues) * ACTION_RSS_QUEUE_NUM);
+ int i;
+
+ for (i = 0; i < queue_num; i++)
+ queues[i] = queue_index++;
+
+ act_rss->queue_num = queue_num;
+ act_rss->queue = queues;
+
+ return;
+}
+
+void
+avf_parse_generic_pattern (struct avf_flow_item *item, u8 *pkt_buf,
+ u8 *msk_buf, u16 spec_len)
+{
+ u8 *raw_spec, *raw_mask;
+ u8 tmp_val = 0;
+ u8 tmp_c = 0;
+ int i, j;
+
+ raw_spec = (u8 *) item->spec;
+ raw_mask = (u8 *) item->mask;
+
+ /* convert string to int array */
+ for (i = 0, j = 0; i < spec_len; i += 2, j++)
+ {
+ tmp_c = raw_spec[i];
+ if (tmp_c >= 'a' && tmp_c <= 'f')
+ tmp_val = tmp_c - 'a' + 10;
+ if (tmp_c >= 'A' && tmp_c <= 'F')
+ tmp_val = tmp_c - 'A' + 10;
+ if (tmp_c >= '0' && tmp_c <= '9')
+ tmp_val = tmp_c - '0';
+
+ tmp_c = raw_spec[i + 1];
+ if (tmp_c >= 'a' && tmp_c <= 'f')
+ pkt_buf[j] = tmp_val * 16 + tmp_c - 'a' + 10;
+ if (tmp_c >= 'A' && tmp_c <= 'F')
+ pkt_buf[j] = tmp_val * 16 + tmp_c - 'A' + 10;
+ if (tmp_c >= '0' && tmp_c <= '9')
+ pkt_buf[j] = tmp_val * 16 + tmp_c - '0';
+
+ tmp_c = raw_mask[i];
+ if (tmp_c >= 'a' && tmp_c <= 'f')
+ tmp_val = tmp_c - 0x57;
+ if (tmp_c >= 'A' && tmp_c <= 'F')
+ tmp_val = tmp_c - 0x37;
+ if (tmp_c >= '0' && tmp_c <= '9')
+ tmp_val = tmp_c - '0';
+
+ tmp_c = raw_mask[i + 1];
+ if (tmp_c >= 'a' && tmp_c <= 'f')
+ msk_buf[j] = tmp_val * 16 + tmp_c - 'a' + 10;
+ if (tmp_c >= 'A' && tmp_c <= 'F')
+ msk_buf[j] = tmp_val * 16 + tmp_c - 'A' + 10;
+ if (tmp_c >= '0' && tmp_c <= '9')
+ msk_buf[j] = tmp_val * 16 + tmp_c - '0';
+ }
+}
+
static int
avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
{
@@ -112,13 +229,15 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
u16 src_port_mask = 0, dst_port_mask = 0;
u8 protocol = IP_PROTOCOL_RESERVED;
bool fate = false;
+ bool is_fdir = true;
struct avf_flow_error error;
int layer = 0;
int action_count = 0;
- struct avf_fdir_vc_ctx vc_ctx;
+ struct avf_flow_vc_ctx vc_ctx;
struct avf_fdir_conf *filter;
+ struct virtchnl_rss_cfg *rss_cfg;
struct avf_flow_item avf_items[VIRTCHNL_MAX_NUM_PROTO_HDRS];
struct avf_flow_action avf_actions[VIRTCHNL_MAX_NUM_ACTIONS];
@@ -133,6 +252,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
struct avf_flow_action_queue act_q = {};
struct avf_flow_action_mark act_msk = {};
+ struct avf_flow_action_rss act_rss = {};
enum
{
@@ -140,6 +260,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
FLOW_ETHERNET_CLASS,
FLOW_IPV4_CLASS,
FLOW_IPV6_CLASS,
+ FLOW_GENERIC_CLASS,
} flow_class = FLOW_UNKNOWN_CLASS;
if (FLOW_IS_ETHERNET_CLASS (f))
@@ -148,6 +269,8 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
flow_class = FLOW_IPV4_CLASS;
else if (FLOW_IS_IPV6_CLASS (f))
flow_class = FLOW_IPV6_CLASS;
+ else if (FLOW_IS_GENERIC_CLASS (f))
+ flow_class = FLOW_GENERIC_CLASS;
else
return VNET_FLOW_ERROR_NOT_SUPPORTED;
@@ -158,15 +281,35 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
goto done;
}
+ ret = avf_rss_cfg_create (&rss_cfg, 0);
+ if (ret)
+ {
+ rv = VNET_FLOW_ERROR_INTERNAL;
+ goto done;
+ }
+
/* init a virtual channel context */
vc_ctx.vc_hdl = &dev_instance;
- vc_ctx.vc_op = avf_fdir_vc_op_callback;
+ vc_ctx.vc_op = avf_flow_vc_op_callback;
clib_memset (avf_items, 0, sizeof (avf_actions));
clib_memset (avf_actions, 0, sizeof (avf_actions));
+ /* Handle generic flow first */
+ if (flow_class == FLOW_GENERIC_CLASS)
+ {
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_RAW;
+ avf_items[layer].is_generic = true;
+ avf_items[layer].spec = f->generic.pattern.spec;
+ avf_items[layer].mask = f->generic.pattern.mask;
+
+ layer++;
+
+ goto pattern_end;
+ }
+
/* Ethernet Layer */
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_ETH;
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_ETH;
avf_items[layer].spec = NULL;
avf_items[layer].mask = NULL;
layer++;
@@ -176,7 +319,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
vnet_flow_ip4_t *ip4_ptr = &f->ip4;
/* IPv4 Layer */
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_IPV4;
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_IPV4;
avf_items[layer].spec = &ip4_spec;
avf_items[layer].mask = &ip4_mask;
layer++;
@@ -215,7 +358,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
vnet_flow_ip6_t *ip6_ptr = &f->ip6;
/* IPv6 Layer */
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_IPV6;
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_IPV6;
avf_items[layer].spec = &ip6_spec;
avf_items[layer].mask = &ip6_mask;
layer++;
@@ -260,7 +403,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
switch (protocol)
{
case IP_PROTOCOL_L2TP:
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_L2TPV3;
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_L2TPV3OIP;
avf_items[layer].spec = &l2tpv3_spec;
avf_items[layer].mask = &l2tpv3_mask;
layer++;
@@ -271,7 +414,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
break;
case IP_PROTOCOL_IPSEC_ESP:
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_ESP;
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_ESP;
avf_items[layer].spec = &esp_spec;
avf_items[layer].mask = &esp_mask;
layer++;
@@ -282,7 +425,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
break;
case IP_PROTOCOL_IPSEC_AH:
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_AH;
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_AH;
avf_items[layer].spec = &ah_spec;
avf_items[layer].mask = &ah_mask;
layer++;
@@ -293,7 +436,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
break;
case IP_PROTOCOL_TCP:
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_TCP;
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_TCP;
avf_items[layer].spec = &tcp_spec;
avf_items[layer].mask = &tcp_mask;
layer++;
@@ -311,7 +454,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
break;
case IP_PROTOCOL_UDP:
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_UDP;
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_UDP;
avf_items[layer].spec = &udp_spec;
avf_items[layer].mask = &udp_mask;
layer++;
@@ -330,7 +473,7 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
/* handle the UDP tunnels */
if (f->type == VNET_FLOW_TYPE_IP4_GTPU)
{
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_GTPU_IP;
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_GTPU;
avf_items[layer].spec = &gtp_spec;
avf_items[layer].mask = &gtp_mask;
layer++;
@@ -348,30 +491,56 @@ avf_flow_add (u32 dev_instance, vnet_flow_t *f, avf_flow_entry_t *fe)
pattern_end:
/* pattern end flag */
- avf_items[layer].type = VIRTCHNL_PROTO_HDR_NONE;
- ret = avf_fdir_parse_pattern (filter, avf_items, &error);
- if (ret)
- {
- avf_log_err (ad, "avf fdir parse pattern failed: %s", error.message);
- rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
- goto done;
- }
+ avf_items[layer].type = AVF_FLOW_ITEM_TYPE_END;
/* Action */
/* Only one 'fate' can be assigned */
+ if (f->actions & VNET_FLOW_ACTION_RSS)
+ {
+ is_fdir = false;
+ avf_actions[action_count].conf = &act_rss;
+ avf_actions[action_count].type = AVF_FLOW_ACTION_TYPE_RSS;
+
+ avf_flow_convert_rss_types (f->rss_types, &act_rss.types);
+
+ if ((act_rss.func = avf_flow_convert_rss_func (f->rss_fun)) ==
+ AVF_ETH_HASH_FUNCTION_MAX)
+ {
+ rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
+ goto done;
+ }
+
+ if (f->queue_num)
+ {
+ /* convert rss queues to array */
+ avf_flow_convert_rss_queues (f->queue_index, f->queue_num, &act_rss);
+ is_fdir = true;
+ }
+
+ fate = true;
+ action_count++;
+ }
+
if (f->actions & VNET_FLOW_ACTION_REDIRECT_TO_QUEUE)
{
- avf_actions[action_count].type = VIRTCHNL_ACTION_QUEUE;
+ avf_actions[action_count].type = AVF_FLOW_ACTION_TYPE_QUEUE;
avf_actions[action_count].conf = &act_q;
act_q.index = f->redirect_queue;
- fate = true;
+ if (fate == true)
+ {
+ rv = VNET_FLOW_ERROR_INTERNAL;
+ goto done;
+ }
+ else
+ fate = true;
+
action_count++;
}
if (f->actions & VNET_FLOW_ACTION_DROP)
{
- avf_actions[action_count].type = VIRTCHNL_ACTION_DROP;
+ avf_actions[action_count].type = AVF_FLOW_ACTION_TYPE_DROP;
avf_actions[action_count].conf = NULL;
if (fate == true)
@@ -381,13 +550,12 @@ pattern_end:
}
else
fate = true;
-
action_count++;
}
if (fate == false)
{
- avf_actions[action_count].type = VIRTCHNL_ACTION_PASSTHRU;
+ avf_actions[action_count].type = AVF_FLOW_ACTION_TYPE_PASSTHRU;
avf_actions[action_count].conf = NULL;
fate = true;
@@ -396,7 +564,7 @@ pattern_end:
if (f->actions & VNET_FLOW_ACTION_MARK)
{
- avf_actions[action_count].type = VIRTCHNL_ACTION_MARK;
+ avf_actions[action_count].type = AVF_FLOW_ACTION_TYPE_MARK;
avf_actions[action_count].conf = &act_msk;
action_count++;
@@ -404,16 +572,41 @@ pattern_end:
}
/* action end flag */
- avf_actions[action_count].type = VIRTCHNL_ACTION_NONE;
+ avf_actions[action_count].type = AVF_FLOW_ACTION_TYPE_END;
- /* parse action */
- ret = avf_fdir_parse_action (avf_actions, filter, &error);
- if (ret)
+ /* parse pattern and actions */
+ if (is_fdir)
{
- avf_log_err (ad, "avf fdir parse action failed: %s", error.message);
- rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
- goto done;
- }
+ if (flow_class == FLOW_GENERIC_CLASS)
+ {
+ ret = avf_fdir_parse_generic_pattern (filter, avf_items, &error);
+ if (ret)
+ {
+ avf_log_err (ad, "avf fdir parse generic pattern failed: %s",
+ error.message);
+ rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
+ goto done;
+ }
+ }
+ else
+ {
+ ret = avf_fdir_parse_pattern (filter, avf_items, &error);
+ if (ret)
+ {
+ avf_log_err (ad, "avf fdir parse pattern failed: %s",
+ error.message);
+ rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
+ goto done;
+ }
+ }
+
+ ret = avf_fdir_parse_action (avf_actions, filter, &error);
+ if (ret)
+ {
+ avf_log_err (ad, "avf fdir parse action failed: %s", error.message);
+ rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
+ goto done;
+ }
/* create flow rule, save rule */
ret = avf_fdir_rule_create (&vc_ctx, filter);
@@ -428,7 +621,36 @@ pattern_end:
else
{
fe->rcfg = filter;
+ fe->flow_type_flag = 1;
+ }
}
+ else
+ {
+ ret =
+ avf_rss_parse_pattern_action (avf_items, avf_actions, rss_cfg, &error);
+ if (ret)
+ {
+ avf_log_err (ad, "avf rss parse pattern action failed: %s",
+ error.message);
+ rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
+ goto done;
+ }
+ /* create flow rule, save rule */
+ ret = avf_rss_rule_create (&vc_ctx, rss_cfg);
+
+ if (ret)
+ {
+ avf_log_err (ad, "avf rss rule create failed");
+ rv = VNET_FLOW_ERROR_INTERNAL;
+ goto done;
+ }
+ else
+ {
+ fe->rss_cfg = rss_cfg;
+ fe->flow_type_flag = 0;
+ }
+ }
+
done:
return rv;
@@ -495,6 +717,7 @@ avf_flow_ops_fn (vnet_main_t *vm, vnet_flow_dev_op_t op, u32 dev_instance,
case VNET_FLOW_TYPE_IP4_L2TPV3OIP:
case VNET_FLOW_TYPE_IP4_IPSEC_ESP:
case VNET_FLOW_TYPE_IP4_IPSEC_AH:
+ case VNET_FLOW_TYPE_GENERIC:
if ((rv = avf_flow_add (dev_instance, flow, fe)))
goto done;
break;
@@ -509,13 +732,22 @@ avf_flow_ops_fn (vnet_main_t *vm, vnet_flow_dev_op_t op, u32 dev_instance,
{
fe = vec_elt_at_index (ad->flow_entries, *private_data);
- struct avf_fdir_vc_ctx ctx;
+ struct avf_flow_vc_ctx ctx;
ctx.vc_hdl = &dev_instance;
- ctx.vc_op = avf_fdir_vc_op_callback;
+ ctx.vc_op = avf_flow_vc_op_callback;
- rv = avf_fdir_rule_destroy (&ctx, fe->rcfg);
- if (rv)
- return VNET_FLOW_ERROR_INTERNAL;
+ if (fe->flow_type_flag)
+ {
+ rv = avf_fdir_rule_destroy (&ctx, fe->rcfg);
+ if (rv)
+ return VNET_FLOW_ERROR_INTERNAL;
+ }
+ else
+ {
+ rv = avf_rss_rule_destroy (&ctx, fe->rss_cfg);
+ if (rv)
+ return VNET_FLOW_ERROR_INTERNAL;
+ }
if (fe->mark)
{
@@ -525,6 +757,7 @@ avf_flow_ops_fn (vnet_main_t *vm, vnet_flow_dev_op_t op, u32 dev_instance,
}
(void) avf_fdir_rcfg_destroy (fe->rcfg);
+ (void) avf_rss_rcfg_destroy (fe->rss_cfg);
clib_memset (fe, 0, sizeof (*fe));
pool_put (ad->flow_entries, fe);
goto disable_rx_offload;
diff --git a/src/plugins/avf/input.c b/src/plugins/avf/input.c
index 1406d789e0b..06007db540d 100644
--- a/src/plugins/avf/input.c
+++ b/src/plugins/avf/input.c
@@ -125,6 +125,9 @@ avf_rxq_refill (vlib_main_t * vm, vlib_node_runtime_t * node, avf_rxq_t * rxq,
n_alloc -= 8;
}
+ /* RXQ can be smaller than 256 packets, especially if jumbo. */
+ rxq->descs[slot].qword[1] = 0;
+
avf_tail_write (rxq->qrx_tail, slot);
}
@@ -296,7 +299,7 @@ avf_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
next_index = ad->per_interface_next_index;
if (PREDICT_FALSE (vnet_device_input_have_features (ad->sw_if_index)))
- vnet_feature_start_device_input_x1 (ad->sw_if_index, &next_index, bt);
+ vnet_feature_start_device_input (ad->sw_if_index, &next_index, bt);
vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
@@ -423,9 +426,6 @@ no_more_desc:
rxq->next = next;
rxq->n_enqueued -= n_rx_packets + n_tail_desc;
- /* avoid eating our own tail */
- rxq->descs[(next + rxq->n_enqueued) & mask].qword[1] = 0;
-
#if defined(CLIB_HAVE_VEC256) || defined(CLIB_HAVE_VEC128)
or_qw1 |= or_q1x4[0] | or_q1x4[1] | or_q1x4[2] | or_q1x4[3];
#endif
@@ -566,7 +566,6 @@ VLIB_NODE_FN (avf_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return n_rx;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (avf_input_node) = {
.name = "avf-input",
.sibling_of = "device-input",
@@ -578,7 +577,6 @@ VLIB_REGISTER_NODE (avf_input_node) = {
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/avf/output.c b/src/plugins/avf/output.c
index 4cc9d5a49c1..daa86ae86b2 100644
--- a/src/plugins/avf/output.c
+++ b/src/plugins/avf/output.c
@@ -19,6 +19,7 @@
#include <vlib/unix/unix.h>
#include <vlib/pci/pci.h>
#include <vppinfra/ring.h>
+#include <vppinfra/vector/ip_csum.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/ip/ip4_packet.h>
@@ -110,7 +111,7 @@ avf_tx_prepare_cksum (vlib_buffer_t * b, u8 is_tso)
is_tso ? 0 :
clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
(l4_hdr_offset - l3_hdr_offset));
- sum = ~ip_csum (&psh, sizeof (psh));
+ sum = ~clib_ip_csum ((u8 *) &psh, sizeof (psh));
}
else
{
@@ -119,11 +120,9 @@ avf_tx_prepare_cksum (vlib_buffer_t * b, u8 is_tso)
psh.dst = ip6->dst_address;
psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol);
psh.l4len = is_tso ? 0 : ip6->payload_length;
- sum = ~ip_csum (&psh, sizeof (psh));
+ sum = ~clib_ip_csum ((u8 *) &psh, sizeof (psh));
}
- /* ip_csum does a byte swap for some reason... */
- sum = clib_net_to_host_u16 (sum);
if (is_tcp)
tcp->checksum = sum;
else
@@ -231,7 +230,11 @@ avf_tx_prepare (vlib_main_t *vm, vlib_node_runtime_t *node, avf_txq_t *txq,
{
const u64 cmd_eop = AVF_TXD_CMD_EOP;
u16 n_free_desc, n_desc_left, n_packets_left = n_packets;
+#if defined CLIB_HAVE_VEC512
+ vlib_buffer_t *b[8];
+#else
vlib_buffer_t *b[4];
+#endif
avf_tx_desc_t *d = txq->tmp_descs;
u32 *tb = txq->tmp_bufs;
@@ -242,11 +245,30 @@ avf_tx_prepare (vlib_main_t *vm, vlib_node_runtime_t *node, avf_txq_t *txq,
while (n_packets_left && n_desc_left)
{
+#if defined CLIB_HAVE_VEC512
+ u32 flags;
+ u64x8 or_flags_vec512;
+ u64x8 flags_mask_vec512;
+#else
u32 flags, or_flags;
+#endif
+#if defined CLIB_HAVE_VEC512
+ if (n_packets_left < 8 || n_desc_left < 8)
+#else
if (n_packets_left < 8 || n_desc_left < 4)
+#endif
goto one_by_one;
+#if defined CLIB_HAVE_VEC512
+ u64x8 base_ptr = u64x8_splat (vm->buffer_main->buffer_mem_start);
+ u32x8 buf_indices = u32x8_load_unaligned (buffers);
+
+ *(u64x8 *) &b = base_ptr + u64x8_from_u32x8 (
+ buf_indices << CLIB_LOG2_CACHE_LINE_BYTES);
+
+ or_flags_vec512 = u64x8_i64gather (u64x8_load_unaligned (b), 0, 1);
+#else
vlib_prefetch_buffer_with_index (vm, buffers[4], LOAD);
vlib_prefetch_buffer_with_index (vm, buffers[5], LOAD);
vlib_prefetch_buffer_with_index (vm, buffers[6], LOAD);
@@ -258,12 +280,37 @@ avf_tx_prepare (vlib_main_t *vm, vlib_node_runtime_t *node, avf_txq_t *txq,
b[3] = vlib_get_buffer (vm, buffers[3]);
or_flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags;
+#endif
+#if defined CLIB_HAVE_VEC512
+ flags_mask_vec512 = u64x8_splat (
+ VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD | VNET_BUFFER_F_GSO);
+ if (PREDICT_FALSE (
+ !u64x8_is_all_zero (or_flags_vec512 & flags_mask_vec512)))
+#else
if (PREDICT_FALSE (or_flags &
(VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD |
VNET_BUFFER_F_GSO)))
+#endif
goto one_by_one;
+#if defined CLIB_HAVE_VEC512
+ vlib_buffer_copy_indices (tb, buffers, 8);
+ avf_tx_fill_data_desc (vm, d + 0, b[0], cmd_eop, use_va_dma);
+ avf_tx_fill_data_desc (vm, d + 1, b[1], cmd_eop, use_va_dma);
+ avf_tx_fill_data_desc (vm, d + 2, b[2], cmd_eop, use_va_dma);
+ avf_tx_fill_data_desc (vm, d + 3, b[3], cmd_eop, use_va_dma);
+ avf_tx_fill_data_desc (vm, d + 4, b[4], cmd_eop, use_va_dma);
+ avf_tx_fill_data_desc (vm, d + 5, b[5], cmd_eop, use_va_dma);
+ avf_tx_fill_data_desc (vm, d + 6, b[6], cmd_eop, use_va_dma);
+ avf_tx_fill_data_desc (vm, d + 7, b[7], cmd_eop, use_va_dma);
+
+ buffers += 8;
+ n_packets_left -= 8;
+ n_desc_left -= 8;
+ d += 8;
+ tb += 8;
+#else
vlib_buffer_copy_indices (tb, buffers, 4);
avf_tx_fill_data_desc (vm, d + 0, b[0], cmd_eop, use_va_dma);
@@ -276,6 +323,8 @@ avf_tx_prepare (vlib_main_t *vm, vlib_node_runtime_t *node, avf_txq_t *txq,
n_desc_left -= 4;
d += 4;
tb += 4;
+#endif
+
continue;
one_by_one:
diff --git a/src/plugins/avf/plugin.c b/src/plugins/avf/plugin.c
index b5123175fe7..928dee543a6 100644
--- a/src/plugins/avf/plugin.c
+++ b/src/plugins/avf/plugin.c
@@ -19,12 +19,10 @@
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Intel Adaptive Virtual Function (AVF) Device Driver",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/avf/virtchnl.h b/src/plugins/avf/virtchnl.h
index ae4fe4a5e3c..98d6f4adf8d 100644
--- a/src/plugins/avf/virtchnl.h
+++ b/src/plugins/avf/virtchnl.h
@@ -97,6 +97,8 @@ enum
_ (31, DISABLE_CHANNELS) \
_ (32, ADD_CLOUD_FILTER) \
_ (33, DEL_CLOUD_FILTER) \
+ _ (45, ADD_RSS_CFG) \
+ _ (46, DEL_RSS_CFG) \
_ (47, ADD_FDIR_FILTER) \
_ (48, DEL_FDIR_FILTER) \
_ (49, QUERY_FDIR_FILTER) \
diff --git a/src/plugins/bpf_trace_filter/CMakeLists.txt b/src/plugins/bpf_trace_filter/CMakeLists.txt
new file mode 100644
index 00000000000..4268022c281
--- /dev/null
+++ b/src/plugins/bpf_trace_filter/CMakeLists.txt
@@ -0,0 +1,45 @@
+# Copyright (c) 2023 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+vpp_find_path(PCAP_INCLUDE_DIR NAMES pcap.h)
+if (NOT PCAP_INCLUDE_DIR)
+ message(WARNING "libpcap headers not found - bpf_trace_filter plugin disabled")
+ return()
+endif()
+
+vpp_plugin_find_library(bpf_trace_filter PCAP_LIB libpcap.so)
+
+if (NOT PCAP_LIB)
+ message(WARNING "bpf_trace_filter plugin - missing libraries - bpf_trace_filter plugin disabled")
+ return()
+endif()
+
+set(CMAKE_REQUIRED_FLAGS "-fPIC")
+set(CMAKE_REQUIRED_INCLUDES "${PCAP_INCLUDE_DIR}")
+set(CMAKE_REQUIRED_LIBRARIES "${PCAP_LIB}")
+
+include_directories(${PCAP_INCLUDE_DIR})
+
+add_vpp_plugin(bpf_trace_filter
+ SOURCES
+ cli.c
+ plugin.c
+ bpf_trace_filter.c
+ api.c
+
+ API_FILES
+ bpf_trace_filter.api
+
+ LINK_LIBRARIES
+ ${PCAP_LIB}
+)
diff --git a/src/plugins/bpf_trace_filter/FEATURE.yaml b/src/plugins/bpf_trace_filter/FEATURE.yaml
new file mode 100644
index 00000000000..101572f731d
--- /dev/null
+++ b/src/plugins/bpf_trace_filter/FEATURE.yaml
@@ -0,0 +1,8 @@
+---
+name: BPF Trace Filter
+maintainer: Mohammed Hawari <mohammed@hawari.fr>
+features:
+ - BPF Trace Filtering
+description: "BPF Trace Filtering"
+state: experimental
+properties: [CLI, API]
diff --git a/src/plugins/bpf_trace_filter/api.c b/src/plugins/bpf_trace_filter/api.c
new file mode 100644
index 00000000000..30beaddd201
--- /dev/null
+++ b/src/plugins/bpf_trace_filter/api.c
@@ -0,0 +1,97 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <bpf_trace_filter/bpf_trace_filter.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+/* define message IDs */
+#include <bpf_trace_filter/bpf_trace_filter.api_enum.h>
+#include <bpf_trace_filter/bpf_trace_filter.api_types.h>
+
+#define REPLY_MSG_ID_BASE (bm->msg_id_base)
+#include <vlibapi/api_helper_macros.h>
+
+static void
+vl_api_bpf_trace_filter_set_t_handler (vl_api_bpf_trace_filter_set_t *mp)
+{
+ bpf_trace_filter_main_t *bm = &bpf_trace_filter_main;
+ vl_api_bpf_trace_filter_set_reply_t *rmp;
+ clib_error_t *err = 0;
+ int rv = 0;
+ u8 is_del = !mp->is_add;
+ char *bpf_expr;
+
+ bpf_expr = vl_api_from_api_to_new_c_string (&mp->filter);
+ err = bpf_trace_filter_set_unset (bpf_expr, is_del, 0);
+
+ if (err)
+ {
+ rv = -1;
+ clib_error_report (err);
+ }
+ vec_free (bpf_expr);
+
+ REPLY_MACRO (VL_API_BPF_TRACE_FILTER_SET_REPLY);
+}
+
+static void
+vl_api_bpf_trace_filter_set_v2_t_handler (vl_api_bpf_trace_filter_set_v2_t *mp)
+{
+ bpf_trace_filter_main_t *bm = &bpf_trace_filter_main;
+ vl_api_bpf_trace_filter_set_v2_reply_t *rmp;
+ clib_error_t *err = 0;
+ int rv = 0;
+ u8 is_del = !mp->is_add;
+ u8 optimize = !!mp->optimize;
+ char *bpf_expr;
+
+ bpf_expr = vl_api_from_api_to_new_c_string (&mp->filter);
+ err = bpf_trace_filter_set_unset (bpf_expr, is_del, optimize);
+
+ if (err)
+ {
+ rv = -1;
+ clib_error_report (err);
+ }
+ vec_free (bpf_expr);
+
+ REPLY_MACRO (VL_API_BPF_TRACE_FILTER_SET_V2_REPLY);
+}
+
+#include <bpf_trace_filter/bpf_trace_filter.api.c>
+
+static clib_error_t *
+bpf_trace_filter_plugin_api_hookup (vlib_main_t *vm)
+{
+ bpf_trace_filter_main_t *bm = &bpf_trace_filter_main;
+
+ /* ask for a correctly-sized block of API message decode slots */
+ bm->msg_id_base = setup_message_id_table ();
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (bpf_trace_filter_plugin_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */ \ No newline at end of file
diff --git a/src/plugins/bpf_trace_filter/bpf_trace_filter.api b/src/plugins/bpf_trace_filter/bpf_trace_filter.api
new file mode 100644
index 00000000000..c2d47c8b3bf
--- /dev/null
+++ b/src/plugins/bpf_trace_filter/bpf_trace_filter.api
@@ -0,0 +1,35 @@
+/*
+ * bpf_trace_filter.api - BPF Trace filter API
+ *
+ * Copyright (c) 2023 Cisco and/or its affiliates
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ option version = "0.1.0";
+
+ autoreply define bpf_trace_filter_set
+ {
+ u32 client_index;
+ u32 context;
+ bool is_add [default = true];
+ string filter[];
+ };
+
+ autoreply define bpf_trace_filter_set_v2
+ {
+ u32 client_index;
+ u32 context;
+ bool is_add [default = true];
+ bool optimize [default = true];
+ string filter[];
+ }; \ No newline at end of file
diff --git a/src/plugins/bpf_trace_filter/bpf_trace_filter.c b/src/plugins/bpf_trace_filter/bpf_trace_filter.c
new file mode 100644
index 00000000000..9d86c8483a6
--- /dev/null
+++ b/src/plugins/bpf_trace_filter/bpf_trace_filter.c
@@ -0,0 +1,112 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <bpf_trace_filter/bpf_trace_filter.h>
+
+clib_error_t *
+bpf_trace_filter_init (vlib_main_t *vm)
+{
+ bpf_trace_filter_main_t *btm = &bpf_trace_filter_main;
+ btm->pcap = pcap_open_dead (DLT_EN10MB, 65535);
+
+ return 0;
+}
+
+int vnet_is_packet_traced (vlib_buffer_t *b, u32 classify_table_index,
+ int func);
+
+u8 *
+format_bpf_trace_filter (u8 *s, va_list *a)
+{
+ bpf_trace_filter_main_t *btm = va_arg (*a, bpf_trace_filter_main_t *);
+ struct bpf_insn *insn;
+
+ if (!btm->prog_set)
+ return format (s, "bpf trace filter is not set");
+
+ insn = btm->prog.bf_insns;
+ for (int i = 0; i < btm->prog.bf_len; insn++, i++)
+ s = format (s, "%s\n", bpf_image (insn, i));
+
+ return s;
+}
+
+clib_error_t *
+bpf_trace_filter_set_unset (const char *bpf_expr, u8 is_del, u8 optimize)
+{
+ bpf_trace_filter_main_t *btm = &bpf_trace_filter_main;
+ if (is_del)
+ {
+ if (btm->prog_set)
+ {
+ btm->prog_set = 0;
+ pcap_freecode (&btm->prog);
+ }
+ }
+ else if (bpf_expr)
+ {
+ if (btm->prog_set)
+ pcap_freecode (&btm->prog);
+ btm->prog_set = 0;
+ if (pcap_compile (btm->pcap, &btm->prog, (char *) bpf_expr, optimize,
+ PCAP_NETMASK_UNKNOWN))
+ {
+ return clib_error_return (0, "Failed pcap_compile of %s", bpf_expr);
+ }
+ btm->prog_set = 1;
+ }
+ return 0;
+};
+
+int
+bpf_is_packet_traced (vlib_buffer_t *b, u32 classify_table_index, int func)
+{
+ bpf_trace_filter_main_t *bfm = &bpf_trace_filter_main;
+ struct pcap_pkthdr phdr = { 0 };
+ int res;
+ int res1;
+
+ if (classify_table_index != ~0 &&
+ (res1 = vnet_is_packet_traced (b, classify_table_index, 0)) != 1)
+ return res1;
+
+ if (!bfm->prog_set)
+ return 1;
+
+ phdr.caplen = b->current_length;
+ phdr.len = b->current_length;
+ res = pcap_offline_filter (&bfm->prog, &phdr, vlib_buffer_get_current (b));
+ return res != 0;
+}
+
+VLIB_REGISTER_TRACE_FILTER_FUNCTION (bpf_trace_filter_fn, static) = {
+ .name = "bpf_trace_filter",
+ .description = "bpf based trace filter",
+ .priority = 10,
+ .function = bpf_is_packet_traced
+};
+
+VLIB_INIT_FUNCTION (bpf_trace_filter_init);
+bpf_trace_filter_main_t bpf_trace_filter_main;
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/bpf_trace_filter/bpf_trace_filter.h b/src/plugins/bpf_trace_filter/bpf_trace_filter.h
new file mode 100644
index 00000000000..52413ebe0ad
--- /dev/null
+++ b/src/plugins/bpf_trace_filter/bpf_trace_filter.h
@@ -0,0 +1,42 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef _BPF_TRACE_FILTER_H_
+#define _BPF_TRACE_FILTER_H_
+#include <vlib/vlib.h>
+#include <pcap.h>
+typedef struct
+{
+ pcap_t *pcap;
+ u16 msg_id_base;
+ u8 prog_set;
+ struct bpf_program prog;
+} bpf_trace_filter_main_t;
+
+extern bpf_trace_filter_main_t bpf_trace_filter_main;
+clib_error_t *bpf_trace_filter_set_unset (const char *bpf_expr, u8 is_del,
+ u8 optimize);
+u8 *format_bpf_trace_filter (u8 *s, va_list *a);
+#endif /* _BPF_TRACE_FILTER_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */ \ No newline at end of file
diff --git a/src/plugins/bpf_trace_filter/bpf_trace_filter.rst b/src/plugins/bpf_trace_filter/bpf_trace_filter.rst
new file mode 100644
index 00000000000..63deddbc5ab
--- /dev/null
+++ b/src/plugins/bpf_trace_filter/bpf_trace_filter.rst
@@ -0,0 +1,4 @@
+BPF Trace Filter Function
+============================
+This plugin provides a trace filter function that relies on a BPF interpreter to select which packets
+must be traced. \ No newline at end of file
diff --git a/src/plugins/bpf_trace_filter/cli.c b/src/plugins/bpf_trace_filter/cli.c
new file mode 100644
index 00000000000..f340b1667e1
--- /dev/null
+++ b/src/plugins/bpf_trace_filter/cli.c
@@ -0,0 +1,99 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#include <stdint.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <inttypes.h>
+
+#include <vlib/vlib.h>
+#include <bpf_trace_filter/bpf_trace_filter.h>
+
+static clib_error_t *
+set_bpf_trace_filter_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 *bpf_expr = 0;
+ u8 is_del = 0;
+ u8 optimize = 1;
+ clib_error_t *err = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_del = 1;
+ else if (unformat (line_input, "no-optimize"))
+ optimize = 0;
+ else if (unformat (line_input, "%s", &bpf_expr))
+ ;
+ else
+ {
+ err = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ break;
+ }
+ }
+ unformat_free (line_input);
+
+ if (err != 0)
+ return err;
+
+ err = bpf_trace_filter_set_unset ((char *) bpf_expr, is_del, optimize);
+
+ return err;
+}
+
+VLIB_CLI_COMMAND (set_bpf_trace_filter, static) = {
+ .path = "set bpf trace filter",
+ .short_help = "set bpf trace filter [del] [no-optimize] {<pcap string>}",
+ .function = set_bpf_trace_filter_command_fn,
+};
+
+static clib_error_t *
+show_bpf_trace_filter_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ bpf_trace_filter_main_t *btm = &bpf_trace_filter_main;
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ return (clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input));
+ }
+
+ vlib_cli_output (vm, "%U", format_bpf_trace_filter, btm);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_bpf_trace_filter, static) = {
+ .path = "show bpf trace filter",
+ .short_help = "show bpf trace filter",
+ .function = show_bpf_trace_filter_command_fn,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vpp/api/vpe_all_api_h.h b/src/plugins/bpf_trace_filter/plugin.c
index 7559208d997..db5d6111d85 100644
--- a/src/vpp/api/vpe_all_api_h.h
+++ b/src/plugins/bpf_trace_filter/plugin.c
@@ -1,5 +1,6 @@
/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -11,18 +12,17 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- */
-/*
- * Add to the bottom of the #include list, or elves will steal your
- * keyboard in the middle of the night!
+ *------------------------------------------------------------------
*/
-/* Include the (first) vlib-api API definition layer */
-#include <vlibmemory/vl_memory_api_h.h>
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
-/* Include the current layer (third) vpp API definition layer */
-#include <vpp/api/vpe_types.api.h>
-#include <vpp/api/vpe.api.h>
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "BPF Trace Filter Plugin",
+};
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/bufmon/CMakeLists.txt b/src/plugins/bufmon/CMakeLists.txt
new file mode 100644
index 00000000000..b20ccff8075
--- /dev/null
+++ b/src/plugins/bufmon/CMakeLists.txt
@@ -0,0 +1,20 @@
+# Copyright (c) 2020 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(bufmon
+ SOURCES
+ bufmon.c
+
+ COMPONENT
+ vpp-plugin-devtools
+)
diff --git a/src/plugins/bufmon/FEATURE.yaml b/src/plugins/bufmon/FEATURE.yaml
new file mode 100644
index 00000000000..81dc6e8fa5a
--- /dev/null
+++ b/src/plugins/bufmon/FEATURE.yaml
@@ -0,0 +1,8 @@
+---
+name: Buffers monitoring plugin
+maintainer: Benoît Ganne <bganne@cisco.com>
+features:
+ - monitor buffer utilization in VPP graph nodes
+description: "monitor buffer utilization in VPP graph nodes"
+state: production
+properties: [CLI, MULTITHREAD]
diff --git a/src/plugins/bufmon/bufmon.c b/src/plugins/bufmon/bufmon.c
new file mode 100644
index 00000000000..30cf6576e7a
--- /dev/null
+++ b/src/plugins/bufmon/bufmon.c
@@ -0,0 +1,314 @@
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+typedef struct
+{
+ u64 in;
+ u64 out;
+ u64 alloc;
+ u64 free;
+} bufmon_per_node_data_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ bufmon_per_node_data_t *pnd;
+ u32 cur_node;
+} bufmon_per_thread_data_t;
+
+typedef struct
+{
+ bufmon_per_thread_data_t *ptd;
+ int enabled;
+} bufmon_main_t;
+
+static bufmon_main_t bufmon_main;
+
+static u32
+bufmon_alloc_free_callback (vlib_main_t *vm, u32 n_buffers, const int is_free)
+{
+ bufmon_main_t *bm = &bufmon_main;
+ bufmon_per_thread_data_t *ptd;
+ bufmon_per_node_data_t *pnd;
+ u32 cur_node;
+
+ if (PREDICT_FALSE (vm->thread_index >= vec_len (bm->ptd)))
+ {
+ clib_warning ("bufmon: thread index %d unknown for buffer %s (%d)",
+ vm->thread_index, is_free ? "free" : "alloc", n_buffers);
+ return n_buffers;
+ }
+
+ ptd = vec_elt_at_index (bm->ptd, vm->thread_index);
+
+ cur_node = ptd->cur_node;
+ if (cur_node >= vec_len (ptd->pnd))
+ {
+ cur_node = vlib_get_current_process_node_index (vm);
+ vec_validate_aligned (ptd->pnd, cur_node, CLIB_CACHE_LINE_BYTES);
+ }
+
+ pnd = vec_elt_at_index (ptd->pnd, cur_node);
+
+ if (is_free)
+ pnd->free += n_buffers;
+ else
+ pnd->alloc += n_buffers;
+
+ return n_buffers;
+}
+
+static u32
+bufmon_alloc_callback (vlib_main_t *vm, u8 buffer_pool_index, u32 *buffers,
+ u32 n_buffers)
+{
+ return bufmon_alloc_free_callback (vm, n_buffers, 0 /* is_free */);
+}
+
+static u32
+bufmon_free_callback (vlib_main_t *vm, u8 buffer_pool_index, u32 *buffers,
+ u32 n_buffers)
+{
+ return bufmon_alloc_free_callback (vm, n_buffers, 1 /* is_free */);
+}
+
+static u32
+bufmon_count_buffers (vlib_main_t *vm, vlib_frame_t *frame)
+{
+ vlib_buffer_t *b[VLIB_FRAME_SIZE];
+ u32 *from = vlib_frame_vector_args (frame);
+ const u32 n = frame->n_vectors;
+ u32 nc = 0;
+ u32 i;
+
+ vlib_get_buffers (vm, from, b, n);
+
+ for (i = 0; i < n; i++)
+ {
+ const vlib_buffer_t *cb = b[i];
+ while (cb->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ nc++;
+ cb = vlib_get_buffer (vm, cb->next_buffer);
+ }
+ }
+
+ return n + nc;
+}
+
+static uword
+bufmon_dispatch_wrapper (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ bufmon_main_t *bm = &bufmon_main;
+ bufmon_per_thread_data_t *ptd;
+ bufmon_per_node_data_t *pnd;
+ int pending_frames;
+ uword rv;
+
+ ptd = vec_elt_at_index (bm->ptd, vm->thread_index);
+ vec_validate_aligned (ptd->pnd, node->node_index, CLIB_CACHE_LINE_BYTES);
+ pnd = vec_elt_at_index (ptd->pnd, node->node_index);
+
+ if (frame)
+ pnd->in += bufmon_count_buffers (vm, frame);
+
+ pending_frames = vec_len (nm->pending_frames);
+ ptd->cur_node = node->node_index;
+
+ rv = node->function (vm, node, frame);
+
+ ptd->cur_node = ~0;
+ for (; pending_frames < vec_len (nm->pending_frames); pending_frames++)
+ {
+ vlib_pending_frame_t *p =
+ vec_elt_at_index (nm->pending_frames, pending_frames);
+ pnd->out += bufmon_count_buffers (vm, vlib_get_frame (vm, p->frame));
+ }
+
+ return rv;
+}
+
+static void
+bufmon_unregister_callbacks (vlib_main_t *vm)
+{
+ vlib_buffer_set_alloc_free_callback (vm, 0, 0);
+ foreach_vlib_main ()
+ vlib_node_set_dispatch_wrapper (this_vlib_main, 0);
+}
+
+static clib_error_t *
+bufmon_register_callbacks (vlib_main_t *vm)
+{
+ if (vlib_buffer_set_alloc_free_callback (vm, bufmon_alloc_callback,
+ bufmon_free_callback))
+ goto err0;
+
+ foreach_vlib_main ()
+ if (vlib_node_set_dispatch_wrapper (this_vlib_main,
+ bufmon_dispatch_wrapper))
+ goto err1;
+
+ vec_validate_aligned (bufmon_main.ptd, vlib_thread_main.n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+ return 0;
+
+err1:
+ foreach_vlib_main ()
+ vlib_node_set_dispatch_wrapper (this_vlib_main, 0);
+err0:
+ vlib_buffer_set_alloc_free_callback (vm, 0, 0);
+ return clib_error_return (0, "failed to register callback");
+}
+
+static clib_error_t *
+bufmon_enable_disable (vlib_main_t *vm, int enable)
+{
+ bufmon_main_t *bm = &bufmon_main;
+
+ if (enable)
+ {
+ if (bm->enabled)
+ return 0;
+ clib_error_t *error = bufmon_register_callbacks (vm);
+ if (error)
+ return error;
+ bm->enabled = 1;
+ }
+ else
+ {
+ if (!bm->enabled)
+ return 0;
+ bufmon_unregister_callbacks (vm);
+ bm->enabled = 0;
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+set_buffer_traces (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ int on = 1;
+
+ if (unformat_user (input, unformat_line_input, line_input))
+ {
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "on"))
+ on = 1;
+ else if (unformat (line_input, "off"))
+ on = 0;
+ else
+ {
+ unformat_free (line_input);
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+ }
+ unformat_free (line_input);
+ }
+
+ return bufmon_enable_disable (vm, on);
+}
+
+VLIB_CLI_COMMAND (set_buffer_traces_command, static) = {
+ .path = "set buffer traces",
+ .short_help = "set buffer traces [on|off]",
+ .function = set_buffer_traces,
+};
+
+static clib_error_t *
+show_buffer_traces (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ const bufmon_main_t *bm = &bufmon_main;
+ const bufmon_per_thread_data_t *ptd;
+ const bufmon_per_node_data_t *pnd;
+ int verbose = 0;
+ int status = 0;
+
+ if (unformat_user (input, unformat_line_input, line_input))
+ {
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "verbose"))
+ verbose = 1;
+ else if (unformat (line_input, "status"))
+ status = 1;
+ else
+ {
+ unformat_free (line_input);
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+ }
+ unformat_free (line_input);
+ }
+
+ if (status)
+ {
+ vlib_cli_output (vm, "buffers tracing is %s",
+ bm->enabled ? "on" : "off");
+ return 0;
+ }
+
+ vlib_cli_output (vm, "%U\n\n", format_vlib_buffer_pool_all, vm);
+ vlib_cli_output (vm, "%30s%20s%20s%20s%20s%20s", "Node", "Allocated",
+ "Freed", "In", "Out", "Buffered");
+ vec_foreach (ptd, bm->ptd)
+ {
+ vec_foreach (pnd, ptd->pnd)
+ {
+ const u64 in = pnd->alloc + pnd->in;
+ const u64 out = pnd->free + pnd->out;
+ const i64 buffered = in - out;
+ if (0 == in && 0 == out)
+ continue; /* skip nodes w/o activity */
+ if (0 == buffered && !verbose)
+ continue; /* if not verbose, skip nodes w/o buffered buffers */
+ vlib_cli_output (vm, "%30U%20lu%20lu%20lu%20lu%20ld",
+ format_vlib_node_name, vm, pnd - ptd->pnd,
+ pnd->alloc, pnd->free, pnd->in, pnd->out, buffered);
+ }
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_buffer_traces_command, static) = {
+ .path = "show buffer traces",
+ .short_help = "show buffer traces [status|verbose]",
+ .function = show_buffer_traces,
+};
+
+static clib_error_t *
+clear_buffer_traces (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ const bufmon_main_t *bm = &bufmon_main;
+ const bufmon_per_thread_data_t *ptd;
+ const bufmon_per_node_data_t *pnd;
+
+ vec_foreach (ptd, bm->ptd)
+ vec_foreach (pnd, ptd->pnd)
+ vec_reset_length (pnd);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (clear_buffers_trace_command, static) = {
+ .path = "clear buffer traces",
+ .short_help = "clear buffer traces",
+ .function = clear_buffer_traces,
+};
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Buffers monitoring plugin",
+};
diff --git a/src/plugins/bufmon/bufmon_doc.rst b/src/plugins/bufmon/bufmon_doc.rst
new file mode 100644
index 00000000000..34d5bd35474
--- /dev/null
+++ b/src/plugins/bufmon/bufmon_doc.rst
@@ -0,0 +1,33 @@
+.. _bufmon_doc:
+
+Buffers monitoring plugin
+=========================
+
+This plugin enables to track buffer utilization in the VPP graph nodes.
+The main use is to detect buffer leakage. It works by keeping track of
+number of buffer allocations and free in graph nodes and also of number
+of buffers received in input frames and in output frames. The formula to
+compute the number of “buffered” buffers in a node is simply: #buffered
+= #alloc + #input - #free - #output Note: monitoring will impact
+performances.
+
+Basic usage
+-----------
+
+1. Turn buffer traces on:
+
+::
+
+ ~# vppctl set buffer traces on
+
+2. Monitor buffer usage:
+
+::
+
+ ~# vppctl show buffer traces verbose
+
+3. Turn buffer traces off:
+
+::
+
+ ~# vppctl set buffer traces off
diff --git a/src/plugins/builtinurl/builtins.c b/src/plugins/builtinurl/builtins.c
index 04567c0f8c5..b04e9dd5c7c 100644
--- a/src/plugins/builtinurl/builtins.c
+++ b/src/plugins/builtinurl/builtins.c
@@ -18,9 +18,8 @@
#include <http_static/http_static.h>
#include <vpp/app/version.h>
-int
-handle_get_version (http_builtin_method_type_t reqtype,
- u8 * request, http_session_t * hs)
+hss_url_handler_rc_t
+handle_get_version (hss_url_handler_args_t *args)
{
u8 *s = 0;
@@ -29,11 +28,10 @@ handle_get_version (http_builtin_method_type_t reqtype,
s = format (s, " \"version\": \"%s\",", VPP_BUILD_VER);
s = format (s, " \"build_date\": \"%s\"}}\r\n", VPP_BUILD_DATE);
- hs->data = s;
- hs->data_offset = 0;
- hs->cache_pool_index = ~0;
- hs->free_data = 1;
- return 0;
+ args->data = s;
+ args->data_len = vec_len (s);
+ args->free_vec_data = 1;
+ return HSS_URL_HANDLER_OK;
}
void
@@ -56,16 +54,15 @@ trim_path_from_request (u8 * s, char *path)
* like a c-string.
*/
*cp = 0;
- _vec_len (s) = cp - s;
+ vec_set_len (s, cp - s);
break;
}
cp++;
}
}
-int
-handle_get_interface_stats (http_builtin_method_type_t reqtype,
- u8 * request, http_session_t * hs)
+hss_url_handler_rc_t
+handle_get_interface_stats (hss_url_handler_args_t *args)
{
u8 *s = 0, *stats = 0;
uword *p;
@@ -81,16 +78,16 @@ handle_get_interface_stats (http_builtin_method_type_t reqtype,
vnet_interface_main_t *im = &vnm->interface_main;
/* Get stats for a single interface via http POST */
- if (reqtype == HTTP_BUILTIN_METHOD_POST)
+ if (args->reqtype == HTTP_REQ_POST)
{
- trim_path_from_request (request, "interface_stats.json");
+ trim_path_from_request (args->request, "interface_stats.json");
/* Find the sw_if_index */
- p = hash_get (im->hw_interface_by_name, request);
+ p = hash_get (im->hw_interface_by_name, args->request);
if (!p)
{
s = format (s, "{\"interface_stats\": {[\n");
- s = format (s, " \"name\": \"%s\",", request);
+ s = format (s, " \"name\": \"%s\",", args->request);
s = format (s, " \"error\": \"%s\"", "UnknownInterface");
s = format (s, "]}\n");
goto out;
@@ -100,12 +97,10 @@ handle_get_interface_stats (http_builtin_method_type_t reqtype,
}
else /* default, HTTP_BUILTIN_METHOD_GET */
{
- /* *INDENT-OFF* */
pool_foreach (hi, im->hw_interfaces)
{
vec_add1 (sw_if_indices, hi->sw_if_index);
}
- /* *INDENT-ON* */
}
s = format (s, "{%sinterface_stats%s: [\n", q, q);
@@ -133,18 +128,16 @@ handle_get_interface_stats (http_builtin_method_type_t reqtype,
s = format (s, "]}\n");
out:
- hs->data = s;
- hs->data_offset = 0;
- hs->cache_pool_index = ~0;
- hs->free_data = 1;
+ args->data = s;
+ args->data_len = vec_len (s);
+ args->free_vec_data = 1;
vec_free (sw_if_indices);
vec_free (stats);
- return 0;
+ return HSS_URL_HANDLER_OK;
}
-int
-handle_get_interface_list (http_builtin_method_type_t reqtype,
- u8 * request, http_session_t * hs)
+hss_url_handler_rc_t
+handle_get_interface_list (hss_url_handler_args_t *args)
{
u8 *s = 0;
int i;
@@ -155,14 +148,12 @@ handle_get_interface_list (http_builtin_method_type_t reqtype,
int need_comma = 0;
/* Construct vector of active hw_if_indexes ... */
- /* *INDENT-OFF* */
pool_foreach (hi, im->hw_interfaces)
{
/* No point in mentioning "local0"... */
if (hi - im->hw_interfaces)
vec_add1 (hw_if_indices, hi - im->hw_interfaces);
}
- /* *INDENT-ON* */
/* Build answer */
s = format (s, "{\"interface_list\": [\n");
@@ -177,25 +168,23 @@ handle_get_interface_list (http_builtin_method_type_t reqtype,
s = format (s, "]}\n");
vec_free (hw_if_indices);
- hs->data = s;
- hs->data_offset = 0;
- hs->cache_pool_index = ~0;
- hs->free_data = 1;
- return 0;
+ args->data = s;
+ args->data_len = vec_len (s);
+ args->free_vec_data = 1;
+ return HSS_URL_HANDLER_OK;
}
void
builtinurl_handler_init (builtinurl_main_t * bm)
{
- bm->register_handler (handle_get_version, "version.json",
- HTTP_BUILTIN_METHOD_GET);
+ bm->register_handler (handle_get_version, "version.json", HTTP_REQ_GET);
bm->register_handler (handle_get_interface_list, "interface_list.json",
- HTTP_BUILTIN_METHOD_GET);
- bm->register_handler (handle_get_interface_stats,
- "interface_stats.json", HTTP_BUILTIN_METHOD_GET);
- bm->register_handler (handle_get_interface_stats,
- "interface_stats.json", HTTP_BUILTIN_METHOD_POST);
+ HTTP_REQ_GET);
+ bm->register_handler (handle_get_interface_stats, "interface_stats.json",
+ HTTP_REQ_GET);
+ bm->register_handler (handle_get_interface_stats, "interface_stats.json",
+ HTTP_REQ_POST);
}
/*
diff --git a/src/plugins/builtinurl/builtinurl.c b/src/plugins/builtinurl/builtinurl.c
index 8782906a8d2..749a2c93b8a 100644
--- a/src/plugins/builtinurl/builtinurl.c
+++ b/src/plugins/builtinurl/builtinurl.c
@@ -85,14 +85,12 @@ builtinurl_enable_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (builtinurl_enable_command, static) =
{
.path = "builtinurl enable",
.short_help = "Turn on builtin http/https GET and POST urls",
.function = builtinurl_enable_command_fn,
};
-/* *INDENT-ON* */
/* API message handler */
static void vl_api_builtinurl_enable_t_handler
@@ -124,13 +122,11 @@ builtinurl_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (builtinurl_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "vpp built-in URL support",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/cdp/cdp.c b/src/plugins/cdp/cdp.c
index 1fe557fe82e..00784ccd0bc 100644
--- a/src/plugins/cdp/cdp.c
+++ b/src/plugins/cdp/cdp.c
@@ -86,14 +86,12 @@ cdp_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cdp_command, static) =
{
.path = "cdp",
.short_help = "cdp enable | disable",
.function = cdp_command_fn,
};
-/* *INDENT-ON* */
/* API message handler */
static void vl_api_cdp_enable_disable_t_handler
@@ -124,13 +122,11 @@ cdp_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (cdp_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Cisco Discovery Protocol (CDP)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/cdp/cdp.pg b/src/plugins/cdp/cdp.pg
index b6ba18656c2..32700463ed0 100644
--- a/src/plugins/cdp/cdp.pg
+++ b/src/plugins/cdp/cdp.pg
@@ -1,7 +1,7 @@
-packet-generator new {
- name cdp
- limit 1
- node cdp-input
- size 374-374
- data { hex 0x02b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2043333735304520536f66747761726520284333373530452d554e4956455253414c2d4d292c2056657273696f6e2031322e32283335295345352c2052454c4541534520534f4654574152452028666331290a436f707972696768742028632920313938362d3230303720627920436973636f2053797374656d732c20496e632e0a436f6d70696c6564205468752031392d4a756c2d30372031363a3137206279206e616368656e00060018636973636f2057532d4333373530452d3234544400020011000000010101cc0004000000000003001b54656e4769676162697445746865726e6574312f302f3100040008000000280008002400000c011200000000ffffffff010221ff000000000000001e7a50f000ff000000090004000a00060001000b0005010012000500001300050000160011000000010101cc000400000000001a00100000000100000000ffffffff }
+packet-generator new { \
+ name cdp \
+ limit 1 \
+ node cdp-input \
+ size 374-374 \
+ data { hex 0x02b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2043333735304520536f66747761726520284333373530452d554e4956455253414c2d4d292c2056657273696f6e2031322e32283335295345352c2052454c4541534520534f4654574152452028666331290a436f707972696768742028632920313938362d3230303720627920436973636f2053797374656d732c20496e632e0a436f6d70696c6564205468752031392d4a756c2d30372031363a3137206279206e616368656e00060018636973636f2057532d4333373530452d3234544400020011000000010101cc0004000000000003001b54656e4769676162697445746865726e6574312f302f3100040008000000280008002400000c011200000000ffffffff010221ff000000000000001e7a50f000ff000000090004000a00060001000b0005010012000500001300050000160011000000010101cc000400000000001a00100000000100000000ffffffff } \
}
diff --git a/src/plugins/cdp/cdp_input.c b/src/plugins/cdp/cdp_input.c
index 76a3d70a292..914d4dec66f 100644
--- a/src/plugins/cdp/cdp_input.c
+++ b/src/plugins/cdp/cdp_input.c
@@ -167,25 +167,24 @@ _(version,DEBUG_TLV_DUMP) \
_(platform,DEBUG_TLV_DUMP) \
_(port_id,DEBUG_TLV_DUMP)
-#define _(z,dbg) \
-static \
-cdp_error_t process_##z##_tlv (cdp_main_t *cm, cdp_neighbor_t *n, \
- cdp_tlv_t *t) \
-{ \
- int i; \
- if (dbg) \
- fformat(stdout, "%U\n", format_text_tlv, t); \
- \
- if (n->z) \
- _vec_len(n->z) = 0; \
- \
- for (i = 0; i < (t->l - sizeof (*t)); i++) \
- vec_add1(n->z, t->v[i]); \
- \
- vec_add1(n->z, 0); \
- \
- return CDP_ERROR_NONE; \
-}
+#define _(z, dbg) \
+ static cdp_error_t process_##z##_tlv (cdp_main_t *cm, cdp_neighbor_t *n, \
+ cdp_tlv_t *t) \
+ { \
+ int i; \
+ if (dbg) \
+ fformat (stdout, "%U\n", format_text_tlv, t); \
+ \
+ if (n->z) \
+ vec_set_len (n->z, 0); \
+ \
+ for (i = 0; i < (t->l - sizeof (*t)); i++) \
+ vec_add1 (n->z, t->v[i]); \
+ \
+ vec_add1 (n->z, 0); \
+ \
+ return CDP_ERROR_NONE; \
+ }
foreach_text_to_struct_tlv
#undef _
@@ -354,7 +353,7 @@ cdp_input (vlib_main_t * vm, vlib_buffer_t * b0, u32 bi0)
*/
if (n->last_rx_pkt)
- _vec_len (n->last_rx_pkt) = 0;
+ vec_set_len (n->last_rx_pkt, 0);
/* cdp disabled on this interface, we're done */
if (n->disabled)
@@ -417,12 +416,10 @@ cdp_input_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (cdp_input_init) =
{
.runs_after = VLIB_INITS("cdp_periodic_init"),
};
-/* *INDENT-ON* */
static u8 *
@@ -438,7 +435,6 @@ format_cdp_neighbors (u8 * s, va_list * va)
"%=25s %=25s %=25s %=10s\n",
"Our Port", "Peer System", "Peer Port", "Last Heard");
- /* *INDENT-OFF* */
pool_foreach (n, cm->neighbors)
{
hw = vnet_get_sup_hw_interface (vnm, n->sw_if_index);
@@ -448,7 +444,6 @@ format_cdp_neighbors (u8 * s, va_list * va)
hw->name, n->device_name, n->port_id,
n->last_heard);
}
- /* *INDENT-ON* */
return s;
}
@@ -466,13 +461,11 @@ show_cdp (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_cdp_command, static) = {
.path = "show cdp",
.short_help = "Show cdp command",
.function = show_cdp,
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/cdp/cdp_node.c b/src/plugins/cdp/cdp_node.c
index f9ee251c022..49b1e3844e4 100644
--- a/src/plugins/cdp/cdp_node.c
+++ b/src/plugins/cdp/cdp_node.c
@@ -100,7 +100,6 @@ cdp_node_fn (vlib_main_t * vm,
/*
* cdp input graph node declaration
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (cdp_input_node, static) = {
.function = cdp_node_fn,
.name = "cdp-input",
@@ -117,7 +116,6 @@ VLIB_REGISTER_NODE (cdp_input_node, static) = {
[CDP_INPUT_NEXT_NORMAL] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* cdp periodic function
diff --git a/src/plugins/cdp/cdp_periodic.c b/src/plugins/cdp/cdp_periodic.c
index c73d86be316..03a2de0d9ab 100644
--- a/src/plugins/cdp/cdp_periodic.c
+++ b/src/plugins/cdp/cdp_periodic.c
@@ -357,12 +357,10 @@ cdp_periodic (vlib_main_t * vm)
int i;
static cdp_neighbor_t **n_list = 0;
- /* *INDENT-OFF* */
pool_foreach (n, cm->neighbors)
{
vec_add1 (n_list, n);
}
- /* *INDENT-ON* */
/* Across all cdp neighbors known to the system */
for (i = 0; i < vec_len (n_list); i++)
@@ -394,9 +392,9 @@ cdp_periodic (vlib_main_t * vm)
delete_neighbor (cm, n, 1);
}
if (delete_list)
- _vec_len (delete_list) = 0;
+ vec_set_len (delete_list, 0);
if (n_list)
- _vec_len (n_list) = 0;
+ vec_set_len (n_list, 0);
}
static clib_error_t *
diff --git a/src/plugins/cnat/CMakeLists.txt b/src/plugins/cnat/CMakeLists.txt
index cfb55661a78..e99bf056a35 100644
--- a/src/plugins/cnat/CMakeLists.txt
+++ b/src/plugins/cnat/CMakeLists.txt
@@ -24,6 +24,7 @@ add_vpp_plugin(cnat
cnat_types.c
cnat_snat_policy.c
cnat_src_policy.c
+ cnat_maglev.c
API_FILES
cnat.api
diff --git a/src/plugins/cnat/FEATURE.yaml b/src/plugins/cnat/FEATURE.yaml
index 9deda2e94cc..880d713b63f 100644
--- a/src/plugins/cnat/FEATURE.yaml
+++ b/src/plugins/cnat/FEATURE.yaml
@@ -9,7 +9,7 @@ description: "This plugin is intended to complement the VPP's plugin_nat for
Cloud use-cases. It allows for source/destination address/port
translation based on multiple criterias. It is intended to be modular
enough so that one could write a use-case optimised translation function
- without having to deal with actually re-writing packets or maintining
+ without having to deal with actually re-writing packets or maintaining
sessions.
This plugin supports multithreading. Workers share a unique bihash where
sessions are stored."
diff --git a/src/plugins/cnat/cnat.api b/src/plugins/cnat/cnat.api
index e253084e74e..e6ad37dd6eb 100644
--- a/src/plugins/cnat/cnat.api
+++ b/src/plugins/cnat/cnat.api
@@ -1,6 +1,6 @@
/* Hey Emacs use -*- mode: C -*- */
/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2023 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -19,14 +19,16 @@
used to control the ABF plugin
*/
-option version = "0.2.0";
+option version = "0.3.0";
import "vnet/ip/ip_types.api";
import "vnet/fib/fib_types.api";
import "vnet/interface_types.api";
+import "vnet/ip/ip.api";
enum cnat_translation_flags:u8
{
CNAT_TRANSLATION_ALLOC_PORT = 1,
+ CNAT_TRANSLATION_NO_RETURN_SESSION = 4,
};
enum cnat_endpoint_tuple_flags:u8
@@ -70,6 +72,7 @@ typedef cnat_translation
u8 flags;
vl_api_cnat_lb_type_t lb_type;
u32 n_paths;
+ vl_api_ip_flow_hash_config_v2_t flow_hash_config;
vl_api_cnat_endpoint_tuple_t paths[n_paths];
};
@@ -172,6 +175,7 @@ enum cnat_snat_policy_table:u8
CNAT_POLICY_INCLUDE_V4 = 0,
CNAT_POLICY_INCLUDE_V6 = 1,
CNAT_POLICY_POD = 2,
+ CNAT_POLICY_HOST = 3,
};
autoreply define cnat_snat_policy_add_del_if
diff --git a/src/plugins/cnat/cnat.rst b/src/plugins/cnat/cnat.rst
index 8781f405a23..b0426f35373 100644
--- a/src/plugins/cnat/cnat.rst
+++ b/src/plugins/cnat/cnat.rst
@@ -9,7 +9,7 @@ Overview
________
This plugin covers specific NAT use-cases that come mostly
-from the container networking world. On the contraty of the
+from the container networking world. On the contrary of the
NAT concepts used for e.g. a home gateway, there is no notion
of 'outside' and 'inside'. We handle Virtual (or Real) IPs and
translations of the packets destined to them
@@ -33,9 +33,9 @@ that will store the packet rewrite to do and the one to undo
until the flow is reset or a timeout is reached
A ``session`` is a fully resolved 9-tuple of ``src_ip, src_port, dest_ip, dest_port, proto``
-to match incoming packets, and their new attributes ``new_src_ip, new_src_port, new_dest_ip, new_dest_port``. It allows for ``backend`` stickyness and a fast-path for established connections.
+to match incoming packets, and their new attributes ``new_src_ip, new_src_port, new_dest_ip, new_dest_port``. It allows for ``backend`` stickiness and a fast-path for established connections.
-These ``sessions`` expire after 30s for regular ``sessions`` and 1h for estabished
+These ``sessions`` expire after 30s for regular ``sessions`` and 1h for established
TCP connections. These can be changed in vpp's configuration file
.. code-block:: console
@@ -64,7 +64,7 @@ assigned to an interface
If ``30.0.0.2`` is the address of an interface, we can use the following
-to do the same translation, and additionnaly change the source.
+to do the same translation, and additionally change the source.
address with ``1.2.3.4``
.. code-block:: console
@@ -75,17 +75,17 @@ To show existing translations and sessions you can use
.. code-block:: console
- cnat show session verbose
- cant show translation
+ show cnat session verbose
+ show cnat translation
SourceNATing outgoing traffic
-----------------------------
-A independant part of the plugin allows changing the source address
+A independent part of the plugin allows changing the source address
of outgoing traffic on a per-interface basis.
-In the following example, all traffic comming from ``tap0`` and NOT
+In the following example, all traffic coming from ``tap0`` and NOT
going to ``20.0.0.0/24`` will be source NAT-ed with ``30.0.0.1``.
On the way back the translation will be undone.
@@ -94,10 +94,18 @@ address assigned to an interface)
.. code-block:: console
- cnat snat with 30.0.0.1
- cnat snat exclude 20.0.0.0/24
+ set cnat snat-policy addr 30.0.0.1
+ set cnat snat-policy if-pfx
+ set cnat snat-policy if table include-v4 tap0
+ set cnat snat-policy prefix 20.0.0.0/24
set interface feature tap0 cnat-snat-ip4 arc ip4-unicast
+To show the enforced snat policies:
+
+.. code-block:: console
+
+ show cnat snat-policy
+
Other parameters
----------------
@@ -105,7 +113,7 @@ In vpp's startup file, you can also configure the bihash sizes for
* the translation bihash ``(proto, port) -> translation``
* the session bihash ``src_ip, src_port, dest_ip, dest_port, proto -> new_src_ip, new_src_port, new_dest_ip, new_dest_port``
-* the snat bihash for searching ``snat exclude`` prefixes
+* the snat bihash for searching ``snat-policy`` excluded prefixes
.. code-block:: console
@@ -126,19 +134,19 @@ This plugin is built to be extensible. For now two NAT types are defined, ``cnat
* Session lookup : ``rv`` will be set to ``0`` if a session was found
* Translation primitives ``cnat_translation_ip4`` based on sessions
* A session creation primitive ``cnat_session_create``
+* A reverse session creation primitive ``cnat_rsession_create``
-Creating a session will also create a reverse session (for matching return traffic),
-and call a NAT node back that will perform the translation.
+Creating a session will also create reverse session matching return traffic unless told otherwise by setting ``CNAT_TR_FLAG_NO_RETURN_SESSION`` on the translation. This will call the NAT nodes on the return flow and perform the inverse translation.
Known limitations
_________________
-This plugin is still under developpment, it lacks the following features :
+This plugin is still under development, it lacks the following features :
* Load balancing doesn't support parametric probabilities
-* VRFs aren't supported. All rules apply to fib table 0 only
+* VRFs are not supported, all rules apply regardless of the FIB table.
* Programmatic session handling (deletion, lifetime updates) aren't supported
-* ICMP is not yet supported
-* Traffic matching is only done based on ``(proto, dst_addr, dst_port)`` source matching isn't supported
+* translations (i.e. rewriting the destination address) only match on the three
+tuple ``(proto, dst_addr, dst_port)`` other matches are not supported
* Statistics & session tracking are still rudimentary.
diff --git a/src/plugins/cnat/cnat_api.c b/src/plugins/cnat/cnat_api.c
index ea4b3aeaaef..c578e303499 100644
--- a/src/plugins/cnat/cnat_api.c
+++ b/src/plugins/cnat/cnat_api.c
@@ -81,7 +81,7 @@ cnat_endpoint_encode (const cnat_endpoint_t * in,
if (in->ce_flags & CNAT_EP_FLAG_RESOLVED)
ip_address_encode2 (&in->ce_ip, &out->addr);
else
- clib_memset ((void *) &in->ce_ip, 0, sizeof (in->ce_ip));
+ clib_memset (&out->addr, 0, sizeof (out->addr));
}
static void
@@ -97,6 +97,7 @@ vl_api_cnat_translation_update_t_handler (vl_api_cnat_translation_update_t
int rv = 0;
u32 pi, n_paths;
cnat_lb_type_t lb_type;
+ flow_hash_config_t flow_hash_config = 0;
rv = ip_proto_decode (mp->translation.ip_proto, &ip_proto);
@@ -123,7 +124,10 @@ vl_api_cnat_translation_update_t_handler (vl_api_cnat_translation_update_t
flags |= CNAT_FLAG_EXCLUSIVE;
lb_type = (cnat_lb_type_t) mp->translation.lb_type;
- id = cnat_translation_update (&vip, ip_proto, paths, flags, lb_type);
+ flow_hash_config = (flow_hash_config_t) clib_net_to_host_u32 (
+ mp->translation.flow_hash_config);
+ id = cnat_translation_update (&vip, ip_proto, paths, flags, lb_type,
+ flow_hash_config);
vec_free (paths);
diff --git a/src/plugins/cnat/cnat_bihash.h b/src/plugins/cnat/cnat_bihash.h
index c488e61a07d..75099f6bfdb 100644
--- a/src/plugins/cnat/cnat_bihash.h
+++ b/src/plugins/cnat/cnat_bihash.h
@@ -44,11 +44,16 @@ typedef struct
u64 value[7];
} clib_bihash_kv_40_56_t;
+static inline void
+clib_bihash_mark_free_40_56 (clib_bihash_kv_40_56_t *v)
+{
+ v->value[0] = 0xFEEDFACE8BADF00DULL;
+}
+
static inline int
clib_bihash_is_free_40_56 (const clib_bihash_kv_40_56_t *v)
{
- /* Free values are clib_memset to 0xff, check a bit... */
- if (v->key[0] == ~0ULL && v->value[0] == ~0ULL)
+ if (v->value[0] == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
diff --git a/src/plugins/cnat/cnat_client.c b/src/plugins/cnat/cnat_client.c
index b8fcb9add64..a28896a4c12 100644
--- a/src/plugins/cnat/cnat_client.c
+++ b/src/plugins/cnat/cnat_client.c
@@ -20,10 +20,9 @@
#include <cnat/cnat_translation.h>
cnat_client_t *cnat_client_pool;
-
cnat_client_db_t cnat_client_db;
-
dpo_type_t cnat_client_dpo;
+fib_source_t cnat_fib_source;
static_always_inline u8
cnat_client_is_clone (cnat_client_t * cc)
@@ -34,10 +33,42 @@ cnat_client_is_clone (cnat_client_t * cc)
static void
cnat_client_db_remove (cnat_client_t * cc)
{
+ clib_bihash_kv_16_8_t bkey;
+ if (ip_addr_version (&cc->cc_ip) == AF_IP4)
+ {
+ bkey.key[0] = ip_addr_v4 (&cc->cc_ip).as_u32;
+ bkey.key[1] = 0;
+ }
+ else
+ {
+ bkey.key[0] = ip_addr_v6 (&cc->cc_ip).as_u64[0];
+ bkey.key[1] = ip_addr_v6 (&cc->cc_ip).as_u64[1];
+ }
+
+ clib_bihash_add_del_16_8 (&cnat_client_db.cc_ip_id_hash, &bkey, 0 /* del */);
+}
+
+static void
+cnat_client_db_add (cnat_client_t *cc)
+{
+ index_t cci;
+
+ cci = cc - cnat_client_pool;
+
+ clib_bihash_kv_16_8_t bkey;
+ bkey.value = cci;
if (ip_addr_version (&cc->cc_ip) == AF_IP4)
- hash_unset (cnat_client_db.crd_cip4, ip_addr_v4 (&cc->cc_ip).as_u32);
+ {
+ bkey.key[0] = ip_addr_v4 (&cc->cc_ip).as_u32;
+ bkey.key[1] = 0;
+ }
else
- hash_unset_mem_free (&cnat_client_db.crd_cip6, &ip_addr_v6 (&cc->cc_ip));
+ {
+ bkey.key[0] = ip_addr_v6 (&cc->cc_ip).as_u64[0];
+ bkey.key[1] = ip_addr_v6 (&cc->cc_ip).as_u64[1];
+ }
+
+ clib_bihash_add_del_16_8 (&cnat_client_db.cc_ip_id_hash, &bkey, 1 /* add */);
}
static void
@@ -118,21 +149,6 @@ cnat_client_translation_deleted (index_t cci)
cnat_client_destroy (cc);
}
-static void
-cnat_client_db_add (cnat_client_t * cc)
-{
- index_t cci;
-
- cci = cc - cnat_client_pool;
-
- if (ip_addr_version (&cc->cc_ip) == AF_IP4)
- hash_set (cnat_client_db.crd_cip4, ip_addr_v4 (&cc->cc_ip).as_u32, cci);
- else
- hash_set_mem_alloc (&cnat_client_db.crd_cip6,
- &ip_addr_v6 (&cc->cc_ip), cci);
-}
-
-
index_t
cnat_client_add (const ip_address_t * ip, u8 flags)
{
@@ -228,12 +244,6 @@ int
cnat_client_purge (void)
{
int rv = 0, rrv = 0;
- if ((rv = hash_elts (cnat_client_db.crd_cip6)))
- clib_warning ("len(crd_cip6) isnt 0 but %d", rv);
- rrv |= rv;
- if ((rv = hash_elts (cnat_client_db.crd_cip4)))
- clib_warning ("len(crd_cip4) isnt 0 but %d", rv);
- rrv |= rv;
if ((rv = pool_elts (cnat_client_pool)))
clib_warning ("len(cnat_client_pool) isnt 0 but %d", rv);
rrv |= rv;
@@ -251,9 +261,9 @@ format_cnat_client (u8 * s, va_list * args)
cnat_client_t *cc = pool_elt_at_index (cnat_client_pool, cci);
- s = format (s, "[%d] cnat-client:[%U] tr:%d sess:%d", cci,
- format_ip_address, &cc->cc_ip,
- cc->tr_refcnt, cc->session_refcnt);
+ s = format (s, "[%d] cnat-client:[%U] tr:%d sess:%d locks:%u", cci,
+ format_ip_address, &cc->cc_ip, cc->tr_refcnt, cc->session_refcnt,
+ cc->cc_locks);
if (cc->flags & CNAT_FLAG_EXCLUSIVE)
s = format (s, " exclusive");
@@ -291,7 +301,6 @@ cnat_client_show (vlib_main_t * vm,
vlib_cli_output(vm, "%U", format_cnat_client, cci, 0);
vlib_cli_output (vm, "%d clients", pool_elts (cnat_client_pool));
- vlib_cli_output (vm, "%d timestamps", pool_elts (cnat_timestamps));
}
else
{
@@ -371,12 +380,15 @@ const static dpo_vft_t cnat_client_dpo_vft = {
static clib_error_t *
cnat_client_init (vlib_main_t * vm)
{
+ cnat_main_t *cm = &cnat_main;
cnat_client_dpo = dpo_register_new_type (&cnat_client_dpo_vft,
cnat_client_dpo_nodes);
- cnat_client_db.crd_cip6 = hash_create_mem (0,
- sizeof (ip6_address_t),
- sizeof (uword));
+ clib_bihash_init_16_8 (&cnat_client_db.cc_ip_id_hash, "CNat client DB",
+ cm->client_hash_buckets, cm->client_hash_memory);
+
+ cnat_fib_source = fib_source_allocate ("cnat", CNAT_FIB_SOURCE_PRIORITY,
+ FIB_SOURCE_BH_SIMPLE);
clib_spinlock_init (&cnat_client_db.throttle_lock);
cnat_client_db.throttle_mem =
diff --git a/src/plugins/cnat/cnat_client.h b/src/plugins/cnat/cnat_client.h
index d6e3631d868..4dc6b754b2f 100644
--- a/src/plugins/cnat/cnat_client.h
+++ b/src/plugins/cnat/cnat_client.h
@@ -17,6 +17,7 @@
#define __CNAT_CLIENT_H__
#include <cnat/cnat_types.h>
+#include <vppinfra/bihash_16_8.h>
/**
* A client is a representation of an IP address behind the NAT.
@@ -85,8 +86,6 @@ extern void cnat_client_free_by_ip (ip46_address_t * addr, u8 af);
extern cnat_client_t *cnat_client_pool;
extern dpo_type_t cnat_client_dpo;
-#define CC_INDEX_INVALID ((u32)(~0))
-
static_always_inline cnat_client_t *
cnat_client_get (index_t i)
{
@@ -132,8 +131,7 @@ extern void cnat_client_throttle_pool_process ();
*/
typedef struct cnat_client_db_t_
{
- uword *crd_cip4;
- uword *crd_cip6;
+ clib_bihash_16_8_t cc_ip_id_hash;
/* Pool of addresses that have been throttled
and need to be refcounted before calling
cnat_client_free_by_ip */
@@ -149,27 +147,15 @@ extern cnat_client_db_t cnat_client_db;
static_always_inline cnat_client_t *
cnat_client_ip4_find (const ip4_address_t * ip)
{
- uword *p;
-
- p = hash_get (cnat_client_db.crd_cip4, ip->as_u32);
-
- if (p)
- return (pool_elt_at_index (cnat_client_pool, p[0]));
-
- return (NULL);
-}
-
-static_always_inline u32
-cnat_client_ip4_find_index (const ip4_address_t * ip)
-{
- uword *p;
+ clib_bihash_kv_16_8_t bkey, bval;
- p = hash_get (cnat_client_db.crd_cip4, ip->as_u32);
+ bkey.key[0] = ip->as_u32;
+ bkey.key[1] = 0;
- if (p)
- return p[0];
+ if (clib_bihash_search_16_8 (&cnat_client_db.cc_ip_id_hash, &bkey, &bval))
+ return (NULL);
- return -1;
+ return (pool_elt_at_index (cnat_client_pool, bval.value));
}
/**
@@ -178,14 +164,15 @@ cnat_client_ip4_find_index (const ip4_address_t * ip)
static_always_inline cnat_client_t *
cnat_client_ip6_find (const ip6_address_t * ip)
{
- uword *p;
+ clib_bihash_kv_16_8_t bkey, bval;
- p = hash_get_mem (cnat_client_db.crd_cip6, ip);
+ bkey.key[0] = ip->as_u64[0];
+ bkey.key[1] = ip->as_u64[1];
- if (p)
- return (pool_elt_at_index (cnat_client_pool, p[0]));
+ if (clib_bihash_search_16_8 (&cnat_client_db.cc_ip_id_hash, &bkey, &bval))
+ return (NULL);
- return (NULL);
+ return (pool_elt_at_index (cnat_client_pool, bval.value));
}
/**
diff --git a/src/plugins/cnat/cnat_inline.h b/src/plugins/cnat/cnat_inline.h
index 5a55ecbf3c0..2986b3497a9 100644
--- a/src/plugins/cnat/cnat_inline.h
+++ b/src/plugins/cnat/cnat_inline.h
@@ -19,72 +19,122 @@
#include <cnat/cnat_types.h>
+always_inline int
+cnat_ts_is_free_index (u32 index)
+{
+ u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS);
+ index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS);
+ return pool_is_free_index (cnat_timestamps.ts_pools[pidx], index);
+}
+
+always_inline cnat_timestamp_t *
+cnat_timestamp_get (u32 index)
+{
+ /* 6 top bits for choosing pool */
+ u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS);
+ index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS);
+ return pool_elt_at_index (cnat_timestamps.ts_pools[pidx], index);
+}
+
+always_inline cnat_timestamp_t *
+cnat_timestamp_get_if_valid (u32 index)
+{
+ /* 6 top bits for choosing pool */
+ u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS);
+ index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS);
+ if (pidx >= cnat_timestamps.next_empty_pool_idx)
+ return (NULL);
+ if (pool_is_free_index (cnat_timestamps.ts_pools[pidx], index))
+ return (NULL);
+ return pool_elt_at_index (cnat_timestamps.ts_pools[pidx], index);
+}
+
+always_inline index_t
+cnat_timestamp_alloc ()
+{
+ cnat_timestamp_t *ts;
+ u32 index, pool_sz;
+ uword pidx;
+
+ clib_spinlock_lock (&cnat_timestamps.ts_lock);
+ pidx = clib_bitmap_first_set (cnat_timestamps.ts_free);
+ pool_sz = 1 << (CNAT_TS_BASE_SIZE + pidx);
+ ASSERT (pidx <= cnat_timestamps.next_empty_pool_idx);
+ if (pidx == cnat_timestamps.next_empty_pool_idx)
+ pool_init_fixed (
+ cnat_timestamps.ts_pools[cnat_timestamps.next_empty_pool_idx++],
+ pool_sz);
+ pool_get (cnat_timestamps.ts_pools[pidx], ts);
+ if (pool_elts (cnat_timestamps.ts_pools[pidx]) == pool_sz)
+ clib_bitmap_set (cnat_timestamps.ts_free, pidx, 0);
+ clib_spinlock_unlock (&cnat_timestamps.ts_lock);
+
+ index = (u32) pidx << (32 - CNAT_TS_MPOOL_BITS);
+ return index | (ts - cnat_timestamps.ts_pools[pidx]);
+}
+
+always_inline void
+cnat_timestamp_destroy (u32 index)
+{
+ u32 pidx = index >> (32 - CNAT_TS_MPOOL_BITS);
+ index = index & (0xffffffff >> CNAT_TS_MPOOL_BITS);
+ clib_spinlock_lock (&cnat_timestamps.ts_lock);
+ pool_put_index (cnat_timestamps.ts_pools[pidx], index);
+ clib_bitmap_set (cnat_timestamps.ts_free, pidx, 1);
+ clib_spinlock_unlock (&cnat_timestamps.ts_lock);
+}
+
always_inline u32
cnat_timestamp_new (f64 t)
{
- u32 index;
- cnat_timestamp_t *ts;
- clib_rwlock_writer_lock (&cnat_main.ts_lock);
- pool_get (cnat_timestamps, ts);
+ index_t index = cnat_timestamp_alloc ();
+ cnat_timestamp_t *ts = cnat_timestamp_get (index);
ts->last_seen = t;
ts->lifetime = cnat_main.session_max_age;
ts->refcnt = CNAT_TIMESTAMP_INIT_REFCNT;
- index = ts - cnat_timestamps;
- clib_rwlock_writer_unlock (&cnat_main.ts_lock);
return index;
}
always_inline void
cnat_timestamp_inc_refcnt (u32 index)
{
- clib_rwlock_reader_lock (&cnat_main.ts_lock);
- cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
- ts->refcnt++;
- clib_rwlock_reader_unlock (&cnat_main.ts_lock);
+ cnat_timestamp_t *ts = cnat_timestamp_get (index);
+ clib_atomic_add_fetch (&ts->refcnt, 1);
}
always_inline void
cnat_timestamp_update (u32 index, f64 t)
{
- clib_rwlock_reader_lock (&cnat_main.ts_lock);
- cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
+ cnat_timestamp_t *ts = cnat_timestamp_get (index);
ts->last_seen = t;
- clib_rwlock_reader_unlock (&cnat_main.ts_lock);
}
always_inline void
cnat_timestamp_set_lifetime (u32 index, u16 lifetime)
{
- clib_rwlock_reader_lock (&cnat_main.ts_lock);
- cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
+ cnat_timestamp_t *ts = cnat_timestamp_get (index);
ts->lifetime = lifetime;
- clib_rwlock_reader_unlock (&cnat_main.ts_lock);
}
always_inline f64
cnat_timestamp_exp (u32 index)
{
f64 t;
- if (INDEX_INVALID == index)
+ cnat_timestamp_t *ts = cnat_timestamp_get_if_valid (index);
+ if (NULL == ts)
return -1;
- clib_rwlock_reader_lock (&cnat_main.ts_lock);
- cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
t = ts->last_seen + (f64) ts->lifetime;
- clib_rwlock_reader_unlock (&cnat_main.ts_lock);
return t;
}
always_inline void
cnat_timestamp_free (u32 index)
{
- if (INDEX_INVALID == index)
+ cnat_timestamp_t *ts = cnat_timestamp_get_if_valid (index);
+ if (NULL == ts)
return;
- clib_rwlock_writer_lock (&cnat_main.ts_lock);
- cnat_timestamp_t *ts = pool_elt_at_index (cnat_timestamps, index);
- ts->refcnt--;
- if (0 == ts->refcnt)
- pool_put (cnat_timestamps, ts);
- clib_rwlock_writer_unlock (&cnat_main.ts_lock);
+ if (0 == clib_atomic_sub_fetch (&ts->refcnt, 1))
+ cnat_timestamp_destroy (index);
}
/*
diff --git a/src/plugins/cnat/cnat_maglev.c b/src/plugins/cnat/cnat_maglev.c
new file mode 100644
index 00000000000..2cdb868b3d7
--- /dev/null
+++ b/src/plugins/cnat/cnat_maglev.c
@@ -0,0 +1,379 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <cnat/cnat_maglev.h>
+
+static int
+cnat_maglev_perm_compare (void *_a, void *_b)
+{
+ return *(u64 *) _b - *(u64 *) _a;
+}
+
+/**
+ * Maglev algorithm implementation. This takes permutation as input,
+ * with the values of offset & skip for the backends.
+ * It fills buckets matching the permuntations, provided buckets is
+ * already of length at least M
+ */
+static void
+cnat_maglev_shuffle (cnat_maglev_perm_t *permutation, u32 *buckets)
+{
+ u32 N, M, i, done = 0;
+ u32 *next = 0;
+
+ N = vec_len (permutation);
+ if (N == 0)
+ return;
+
+ M = vec_len (buckets);
+ if (M == 0)
+ return;
+ vec_set (buckets, -1);
+
+ vec_validate (next, N - 1);
+ vec_zero (next);
+
+ while (1)
+ {
+ for (i = 0; i < N; i++)
+ {
+ u32 c = (permutation[i].offset + next[i] * permutation[i].skip) % M;
+ while (buckets[c] != (u32) -1)
+ {
+ next[i]++;
+ c = (permutation[i].offset + next[i] * permutation[i].skip) % M;
+ }
+
+ buckets[c] = permutation[i].index;
+ next[i]++;
+ done++;
+
+ if (done == M)
+ {
+ vec_free (next);
+ return;
+ }
+ }
+ }
+}
+
+void
+cnat_translation_init_maglev (cnat_translation_t *ct)
+{
+ cnat_maglev_perm_t *permutations = NULL;
+ cnat_main_t *cm = &cnat_main;
+ cnat_ep_trk_t *trk;
+ u32 backend_index = 0;
+
+ if (vec_len (ct->ct_active_paths) == 0)
+ return;
+
+ vec_foreach (trk, ct->ct_active_paths)
+ {
+ cnat_maglev_perm_t permutation;
+ u32 h1, h2;
+
+ if (AF_IP4 == ip_addr_version (&trk->ct_ep[VLIB_TX].ce_ip))
+ {
+ u32 a, b, c;
+ a = ip_addr_v4 (&trk->ct_ep[VLIB_TX].ce_ip).data_u32;
+ b = (u64) trk->ct_ep[VLIB_TX].ce_port;
+ c = 0;
+ hash_v3_mix32 (a, b, c);
+ hash_v3_finalize32 (a, b, c);
+ h1 = c;
+ h2 = b;
+ }
+ else
+ {
+ u64 a, b, c;
+ a = ip_addr_v6 (&trk->ct_ep[VLIB_TX].ce_ip).as_u64[0];
+ b = ip_addr_v6 (&trk->ct_ep[VLIB_TX].ce_ip).as_u64[1];
+ c = (u64) trk->ct_ep[VLIB_TX].ce_port;
+ hash_mix64 (a, b, c);
+ h1 = c;
+ h2 = b;
+ }
+
+ permutation.offset = h1 % cm->maglev_len;
+ permutation.skip = h2 % (cm->maglev_len - 1) + 1;
+ permutation.index = backend_index++;
+
+ if (trk->ct_flags & CNAT_TRK_FLAG_TEST_DISABLED)
+ continue;
+
+ vec_add1 (permutations, permutation);
+ }
+
+ vec_sort_with_function (permutations, cnat_maglev_perm_compare);
+
+ vec_validate (ct->lb_maglev, cm->maglev_len - 1);
+
+ cnat_maglev_shuffle (permutations, ct->lb_maglev);
+
+ vec_free (permutations);
+}
+
+static int
+cnat_u32_vec_contains (u32 *v, u32 e)
+{
+ int i;
+
+ vec_foreach_index (i, v)
+ if (v[i] == e)
+ return 1;
+
+ return 0;
+}
+
+static void
+cnat_maglev_print_changes (vlib_main_t *vm, u32 *changed_bk_indices,
+ u32 *old_maglev_lb, u32 *new_maglev_lb)
+{
+ u32 good_flow_buckets = 0, reset_flow_buckets = 0, stable_to_reset = 0;
+ u32 reset_to_stable = 0, switched_stable = 0;
+ if (vec_len (new_maglev_lb) == 0)
+ return;
+ for (u32 i = 0; i < vec_len (new_maglev_lb); i++)
+ {
+ u8 is_new_changed =
+ cnat_u32_vec_contains (changed_bk_indices, new_maglev_lb[i]);
+ u8 is_old_changed =
+ cnat_u32_vec_contains (changed_bk_indices, old_maglev_lb[i]);
+ if (new_maglev_lb[i] == old_maglev_lb[i])
+ {
+ if (is_new_changed)
+ reset_flow_buckets++;
+ else
+ good_flow_buckets++;
+ }
+ else
+ {
+ if (is_new_changed)
+ stable_to_reset++;
+ else if (is_old_changed)
+ reset_to_stable++;
+ else
+ switched_stable++;
+ }
+ }
+ vlib_cli_output (vm,
+ "good B->B:%d | lost A->A':%d A->B:%d ~%0.2f%% | bad "
+ "B->A':%d B->C:%d ~%0.2f%%",
+ good_flow_buckets, reset_flow_buckets, reset_to_stable,
+ (f64) (reset_flow_buckets + reset_to_stable) /
+ vec_len (new_maglev_lb) * 100.0,
+ stable_to_reset, switched_stable,
+ (f64) (stable_to_reset + switched_stable) /
+ vec_len (new_maglev_lb) * 100.0);
+}
+
+static u8 *
+format_cnat_maglev_buckets (u8 *s, va_list *args)
+{
+ u32 *buckets = va_arg (*args, u32 *);
+ u32 backend_idx = va_arg (*args, u32);
+ u32 count = va_arg (*args, u32);
+
+ for (u32 ii = 0; ii < vec_len (buckets); ii++)
+ if (buckets[ii] == backend_idx)
+ {
+ s = format (s, "%d,", ii);
+ if (--count == 0)
+ return (s);
+ }
+ return (s);
+}
+
+static clib_error_t *
+cnat_translation_test_init_maglev (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ cnat_translation_t *trs = 0, *ct;
+ u64 num_backends = 0, n_tests = 0;
+ cnat_main_t *cm = &cnat_main;
+ cnat_ep_trk_t *trk;
+ u32 rnd;
+ u32 n_changes = 0, n_remove = 0, verbose = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "tests %d", &n_tests))
+ ;
+ else if (unformat (input, "backends %d", &num_backends))
+ ;
+ else if (unformat (input, "len %d", &cm->maglev_len))
+ ;
+ else if (unformat (input, "change %d", &n_changes))
+ ;
+ else if (unformat (input, "rm %d", &n_remove))
+ ;
+ else if (unformat (input, "verbose %d", &verbose))
+ ;
+ else
+ return (clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input));
+ }
+
+ if (num_backends == 0 || n_tests == 0)
+ return (clib_error_return (0, "No backends / tests to run"));
+ ;
+
+ vlib_cli_output (vm, "generating random backends...");
+ rnd = random_default_seed ();
+
+ vec_validate (trs, n_tests - 1);
+ vec_foreach (ct, trs)
+ {
+ vec_validate (ct->ct_active_paths, num_backends - 1);
+ vec_foreach (trk, ct->ct_active_paths)
+ {
+ trk->ct_flags = 0;
+ ip_addr_version (&trk->ct_ep[VLIB_TX].ce_ip) = AF_IP4;
+ ip_addr_v4 (&trk->ct_ep[VLIB_TX].ce_ip).data_u32 = random_u32 (&rnd);
+ trk->ct_ep[VLIB_TX].ce_port = random_u32 (&rnd);
+ }
+ }
+
+ vlib_cli_output (vm, "testing...");
+ f64 start_time = vlib_time_now (vm);
+ vec_foreach (ct, trs)
+ cnat_translation_init_maglev (ct);
+ f64 d = vlib_time_now (vm) - start_time;
+
+ vlib_cli_output (vm, "Test took : %U", format_duration, d);
+ vlib_cli_output (vm, "Per pool : %U", format_duration, d / n_tests);
+
+ /* sanity checking of the output */
+ u32 *backend_freqs = 0;
+ vec_validate (backend_freqs, num_backends - 1);
+ vec_foreach (ct, trs)
+ {
+ if (vec_len (ct->lb_maglev) != cm->maglev_len)
+ vlib_cli_output (vm, "Unexpected bucket length %d",
+ vec_len (ct->lb_maglev));
+
+ vec_zero (backend_freqs);
+ for (u32 i = 0; i < vec_len (ct->lb_maglev); i++)
+ {
+ if (ct->lb_maglev[i] >= num_backends)
+ clib_warning ("out of bound backend");
+ backend_freqs[ct->lb_maglev[i]]++;
+ }
+ u32 fmin = ~0, fmax = 0;
+ for (u32 i = 0; i < num_backends; i++)
+ {
+ if (backend_freqs[i] > fmax)
+ fmax = backend_freqs[i];
+ if (backend_freqs[i] < fmin)
+ fmin = backend_freqs[i];
+ }
+ f64 fdiff = (fmax - fmin);
+ if (fdiff / vec_len (ct->lb_maglev) - 1 > 0.02)
+ vlib_cli_output (vm, "More than 2%% frequency diff (min %d max %d)",
+ fmin, fmax);
+ }
+ vec_free (backend_freqs);
+
+ int i = 0;
+ if (verbose)
+ vec_foreach (ct, trs)
+ {
+ vlib_cli_output (vm, "Translation %d", i++);
+ for (u32 i = 0; i < verbose; i++)
+ {
+ u32 j = random_u32 (&rnd) % vec_len (ct->ct_active_paths);
+ trk = &ct->ct_active_paths[j];
+ vlib_cli_output (
+ vm, "[%03d] %U:%d buckets:%U", j, format_ip_address,
+ &trk->ct_ep[VLIB_TX].ce_ip, trk->ct_ep[VLIB_TX].ce_port,
+ format_cnat_maglev_buckets, ct->lb_maglev, j, verbose);
+ }
+ }
+
+ if (n_remove != 0)
+ {
+ vlib_cli_output (
+ vm, "Removing %d entries (refered to as A), others (B,C) stay same",
+ n_remove);
+ vec_foreach (ct, trs)
+ {
+ u32 *old_maglev_lb = 0;
+ u32 *changed_bk_indices = 0;
+ if (vec_len (ct->lb_maglev) != cm->maglev_len)
+ vlib_cli_output (vm, "Unexpected bucket length %d",
+ vec_len (ct->lb_maglev));
+
+ vec_validate (changed_bk_indices, n_remove - 1);
+ for (u32 i = 0; i < n_remove; i++)
+ {
+ /* remove n_remove backends from the LB set */
+ changed_bk_indices[i] =
+ random_u32 (&rnd) % vec_len (ct->ct_active_paths);
+ trk = &ct->ct_active_paths[changed_bk_indices[i]];
+ trk->ct_flags |= CNAT_TRK_FLAG_TEST_DISABLED;
+ }
+
+ old_maglev_lb = vec_dup (ct->lb_maglev);
+ cnat_translation_init_maglev (ct);
+
+ cnat_maglev_print_changes (vm, changed_bk_indices, old_maglev_lb,
+ ct->lb_maglev);
+
+ vec_free (changed_bk_indices);
+ vec_free (old_maglev_lb);
+ }
+ }
+
+ /* Reshuffle and check changes */
+ if (n_changes != 0)
+ {
+ vlib_cli_output (
+ vm,
+ "Changing %d entries (refered to as A->A'), others (B,C) stay same",
+ n_changes);
+ vec_foreach (ct, trs)
+ {
+ if (vec_len (ct->lb_maglev) != cm->maglev_len)
+ vlib_cli_output (vm, "Unexpected bucket length %d",
+ vec_len (ct->lb_maglev));
+
+ u32 *old_maglev_lb = 0;
+ u32 *changed_bk_indices = 0;
+
+ vec_validate (changed_bk_indices, n_changes - 1);
+ for (u32 i = 0; i < n_changes; i++)
+ {
+ /* Change n_changes backends in the LB set */
+ changed_bk_indices[i] =
+ random_u32 (&rnd) % vec_len (ct->ct_active_paths);
+ trk = &ct->ct_active_paths[changed_bk_indices[i]];
+ ip_addr_v4 (&trk->ct_ep[VLIB_TX].ce_ip).data_u32 =
+ random_u32 (&rnd);
+ trk->ct_ep[VLIB_TX].ce_port = random_u32 (&rnd) & 0xffff;
+ }
+ old_maglev_lb = vec_dup (ct->lb_maglev);
+
+ cnat_translation_init_maglev (ct);
+ cnat_maglev_print_changes (vm, changed_bk_indices, old_maglev_lb,
+ ct->lb_maglev);
+
+ vec_free (changed_bk_indices);
+ vec_free (old_maglev_lb);
+ }
+ }
+
+ vec_foreach (ct, trs)
+ vec_free (ct->ct_active_paths);
+ vec_free (trs);
+
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (cnat_translation_test_init_maglev_cmd, static) = {
+ .path = "test cnat maglev",
+ .short_help = "test cnat maglev tests [n_tests] backends [num_backends] len "
+ "[maglev_len]",
+ .function = cnat_translation_test_init_maglev,
+};
diff --git a/src/plugins/cnat/cnat_maglev.h b/src/plugins/cnat/cnat_maglev.h
new file mode 100644
index 00000000000..a71dd3ce796
--- /dev/null
+++ b/src/plugins/cnat/cnat_maglev.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#ifndef __CNAT_MAGLEV_H__
+#define __CNAT_MAGLEV_H__
+
+#include <cnat/cnat_types.h>
+#include <cnat/cnat_translation.h>
+
+typedef struct
+{
+ /* offset & skip used for sorting, should be first */
+ u32 offset;
+ u32 skip;
+ u32 index;
+} cnat_maglev_perm_t;
+
+extern void cnat_translation_init_maglev (cnat_translation_t *ct);
+
+#endif \ No newline at end of file
diff --git a/src/plugins/cnat/cnat_node.h b/src/plugins/cnat/cnat_node.h
index 246fdb8ba57..d81f6745bc4 100644
--- a/src/plugins/cnat/cnat_node.h
+++ b/src/plugins/cnat/cnat_node.h
@@ -19,6 +19,7 @@
#include <vlibmemory/api.h>
#include <vnet/dpo/load_balance.h>
#include <vnet/dpo/load_balance_map.h>
+#include <vnet/ip/ip_psh_cksum.h>
#include <cnat/cnat_session.h>
#include <cnat/cnat_client.h>
@@ -169,86 +170,92 @@ cmp_ip6_address (const ip6_address_t * a1, const ip6_address_t * a2)
* Inline translation functions
*/
-static_always_inline u8
-has_ip6_address (ip6_address_t * a)
+static_always_inline u16
+ip4_pseudo_header_cksum2 (ip4_header_t *ip4, ip4_address_t address[VLIB_N_DIR])
{
- return ((0 != a->as_u64[0]) || (0 != a->as_u64[1]));
+ ip4_psh_t psh = { 0 };
+ psh.src = address[VLIB_RX];
+ psh.dst = address[VLIB_TX];
+ psh.proto = ip4->protocol;
+ psh.l4len = clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
+ sizeof (ip4_header_t));
+ return ~(clib_ip_csum ((u8 *) &psh, sizeof (ip4_psh_t)));
}
static_always_inline void
-cnat_ip4_translate_l4 (ip4_header_t * ip4, udp_header_t * udp,
- ip_csum_t * sum,
+cnat_ip4_translate_l4 (ip4_header_t *ip4, udp_header_t *udp, ip_csum_t *sum,
ip4_address_t new_addr[VLIB_N_DIR],
- u16 new_port[VLIB_N_DIR])
+ u16 new_port[VLIB_N_DIR], u32 oflags)
{
u16 old_port[VLIB_N_DIR];
- ip4_address_t old_addr[VLIB_N_DIR];
+ old_port[VLIB_TX] = udp->dst_port;
+ old_port[VLIB_RX] = udp->src_port;
- /* Fastpath no checksum */
- if (PREDICT_TRUE (0 == *sum))
+ udp->dst_port = new_port[VLIB_TX];
+ udp->src_port = new_port[VLIB_RX];
+
+ if (oflags &
+ (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM))
{
- udp->dst_port = new_port[VLIB_TX];
- udp->src_port = new_port[VLIB_RX];
+ *sum = ip4_pseudo_header_cksum2 (ip4, new_addr);
return;
}
- old_port[VLIB_TX] = udp->dst_port;
- old_port[VLIB_RX] = udp->src_port;
- old_addr[VLIB_TX] = ip4->dst_address;
- old_addr[VLIB_RX] = ip4->src_address;
+ *sum = ip_csum_update (*sum, ip4->dst_address.as_u32,
+ new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
+ *sum = ip_csum_update (*sum, ip4->src_address.as_u32,
+ new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
- if (new_addr[VLIB_TX].as_u32)
+ *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX],
+ udp_header_t, dst_port);
+ *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX],
+ udp_header_t, src_port);
+}
+
+static_always_inline void
+cnat_ip4_translate_sctp (ip4_header_t *ip4, sctp_header_t *sctp,
+ u16 new_port[VLIB_N_DIR])
+{
+ /* Fastpath no checksum */
+ if (PREDICT_TRUE (0 == sctp->checksum))
{
- *sum =
- ip_csum_update (*sum, old_addr[VLIB_TX].as_u32,
- new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
+ sctp->dst_port = new_port[VLIB_TX];
+ sctp->src_port = new_port[VLIB_RX];
+ return;
}
+
if (new_port[VLIB_TX])
- {
- udp->dst_port = new_port[VLIB_TX];
- *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
- }
- if (new_addr[VLIB_RX].as_u32)
- {
- *sum =
- ip_csum_update (*sum, old_addr[VLIB_RX].as_u32,
- new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
- }
+ sctp->dst_port = new_port[VLIB_TX];
if (new_port[VLIB_RX])
- {
- udp->src_port = new_port[VLIB_RX];
- *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
- }
+ sctp->src_port = new_port[VLIB_RX];
+
+ sctp->checksum = 0;
+ sctp->checksum = clib_host_to_little_u32 (~clib_crc32c_with_init (
+ (u8 *) sctp, ntohs (ip4->length) - sizeof (ip4_header_t),
+ ~0 /* init value */));
}
static_always_inline void
-cnat_ip4_translate_l3 (ip4_header_t * ip4, ip4_address_t new_addr[VLIB_N_DIR])
+cnat_ip4_translate_l3 (ip4_header_t *ip4, ip4_address_t new_addr[VLIB_N_DIR],
+ u32 oflags)
{
ip4_address_t old_addr[VLIB_N_DIR];
ip_csum_t sum;
-
old_addr[VLIB_TX] = ip4->dst_address;
old_addr[VLIB_RX] = ip4->src_address;
+ ip4->dst_address = new_addr[VLIB_TX];
+ ip4->src_address = new_addr[VLIB_RX];
+
+ // We always compute the IP checksum even if oflags &
+ // VNET_BUFFER_OFFLOAD_F_IP_CKSUM is set as this is relatively inexpensive
+ // and will allow avoiding issues in driver that do not behave properly
+ // downstream.
sum = ip4->checksum;
- if (new_addr[VLIB_TX].as_u32)
- {
- ip4->dst_address = new_addr[VLIB_TX];
- sum =
- ip_csum_update (sum, old_addr[VLIB_TX].as_u32,
+ sum = ip_csum_update (sum, old_addr[VLIB_TX].as_u32,
new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
- }
- if (new_addr[VLIB_RX].as_u32)
- {
- ip4->src_address = new_addr[VLIB_RX];
- sum =
- ip_csum_update (sum, old_addr[VLIB_RX].as_u32,
+ sum = ip_csum_update (sum, old_addr[VLIB_RX].as_u32,
new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
- }
ip4->checksum = ip_csum_fold (sum);
}
@@ -257,48 +264,40 @@ cnat_tcp_update_session_lifetime (tcp_header_t * tcp, u32 index)
{
cnat_main_t *cm = &cnat_main;
if (PREDICT_FALSE (tcp_fin (tcp)))
- {
- cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT);
- }
+ cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT);
if (PREDICT_FALSE (tcp_rst (tcp)))
- {
- cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT);
- }
+ cnat_timestamp_set_lifetime (index, CNAT_DEFAULT_TCP_RST_TIMEOUT);
if (PREDICT_FALSE (tcp_syn (tcp) && tcp_ack (tcp)))
- {
- cnat_timestamp_set_lifetime (index, cm->tcp_max_age);
- }
+ cnat_timestamp_set_lifetime (index, cm->tcp_max_age);
}
static_always_inline void
-cnat_translation_icmp4_echo (ip4_header_t * ip4, icmp46_header_t * icmp,
+cnat_translation_icmp4_echo (ip4_header_t *ip4, icmp46_header_t *icmp,
ip4_address_t new_addr[VLIB_N_DIR],
- u16 new_port[VLIB_N_DIR])
+ u16 new_port[VLIB_N_DIR], u32 oflags)
{
ip_csum_t sum;
u16 old_port;
cnat_echo_header_t *echo = (cnat_echo_header_t *) (icmp + 1);
- cnat_ip4_translate_l3 (ip4, new_addr);
+ cnat_ip4_translate_l3 (ip4, new_addr, oflags);
old_port = echo->identifier;
echo->identifier = new_port[VLIB_RX];
sum = icmp->checksum;
- sum = ip_csum_update (sum, old_port, new_port[VLIB_RX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
+ sum =
+ ip_csum_update (sum, old_port, new_port[VLIB_RX], udp_header_t, src_port);
icmp->checksum = ip_csum_fold (sum);
}
static_always_inline void
-cnat_translation_icmp4_error (ip4_header_t * outer_ip4,
- icmp46_header_t * icmp,
+cnat_translation_icmp4_error (ip4_header_t *outer_ip4, icmp46_header_t *icmp,
ip4_address_t outer_new_addr[VLIB_N_DIR],
- u16 outer_new_port[VLIB_N_DIR],
- u8 snat_outer_ip)
+ u16 outer_new_port[VLIB_N_DIR], u8 snat_outer_ip,
+ u32 oflags)
{
ip4_address_t new_addr[VLIB_N_DIR];
ip4_address_t old_addr[VLIB_N_DIR];
@@ -327,18 +326,20 @@ cnat_translation_icmp4_error (ip4_header_t * outer_ip4,
/* translate outer ip. */
if (!snat_outer_ip)
outer_new_addr[VLIB_RX] = outer_ip4->src_address;
- cnat_ip4_translate_l3 (outer_ip4, outer_new_addr);
+ cnat_ip4_translate_l3 (outer_ip4, outer_new_addr, oflags);
if (ip4->protocol == IP_PROTOCOL_TCP)
{
inner_l4_old_sum = inner_l4_sum = tcp->checksum;
- cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port);
+ cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port,
+ 0 /* flags */);
tcp->checksum = ip_csum_fold (inner_l4_sum);
}
else if (ip4->protocol == IP_PROTOCOL_UDP)
{
inner_l4_old_sum = inner_l4_sum = udp->checksum;
- cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port);
+ cnat_ip4_translate_l4 (ip4, udp, &inner_l4_sum, new_addr, new_port,
+ 0 /* flags */);
udp->checksum = ip_csum_fold (inner_l4_sum);
}
else
@@ -351,37 +352,30 @@ cnat_translation_icmp4_error (ip4_header_t * outer_ip4,
/* UDP/TCP Ports changed */
if (old_port[VLIB_TX] && new_port[VLIB_TX])
sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
+ udp_header_t, dst_port);
if (old_port[VLIB_RX] && new_port[VLIB_RX])
sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
-
+ udp_header_t, src_port);
- cnat_ip4_translate_l3 (ip4, new_addr);
+ cnat_ip4_translate_l3 (ip4, new_addr, 0 /* oflags */);
ip_csum_t new_ip_sum = ip4->checksum;
/* IP checksum changed */
sum = ip_csum_update (sum, old_ip_sum, new_ip_sum, ip4_header_t, checksum);
/* IP src/dst addr changed */
- if (new_addr[VLIB_TX].as_u32)
- sum =
- ip_csum_update (sum, old_addr[VLIB_TX].as_u32, new_addr[VLIB_TX].as_u32,
- ip4_header_t, dst_address);
+ sum = ip_csum_update (sum, old_addr[VLIB_TX].as_u32,
+ new_addr[VLIB_TX].as_u32, ip4_header_t, dst_address);
- if (new_addr[VLIB_RX].as_u32)
- sum =
- ip_csum_update (sum, old_addr[VLIB_RX].as_u32, new_addr[VLIB_RX].as_u32,
- ip4_header_t, src_address);
+ sum = ip_csum_update (sum, old_addr[VLIB_RX].as_u32,
+ new_addr[VLIB_RX].as_u32, ip4_header_t, src_address);
icmp->checksum = ip_csum_fold (sum);
}
static_always_inline void
-cnat_translation_ip4 (const cnat_session_t * session,
- ip4_header_t * ip4, udp_header_t * udp)
+cnat_translation_ip4 (const cnat_session_t *session, ip4_header_t *ip4,
+ udp_header_t *udp, u32 oflags)
{
tcp_header_t *tcp = (tcp_header_t *) udp;
ip4_address_t new_addr[VLIB_N_DIR];
@@ -395,17 +389,23 @@ cnat_translation_ip4 (const cnat_session_t * session,
if (ip4->protocol == IP_PROTOCOL_TCP)
{
ip_csum_t sum = tcp->checksum;
- cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port);
+ cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port, oflags);
tcp->checksum = ip_csum_fold (sum);
- cnat_ip4_translate_l3 (ip4, new_addr);
+ cnat_ip4_translate_l3 (ip4, new_addr, oflags);
cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index);
}
else if (ip4->protocol == IP_PROTOCOL_UDP)
{
ip_csum_t sum = udp->checksum;
- cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port);
+ cnat_ip4_translate_l4 (ip4, udp, &sum, new_addr, new_port, oflags);
udp->checksum = ip_csum_fold (sum);
- cnat_ip4_translate_l3 (ip4, new_addr);
+ cnat_ip4_translate_l3 (ip4, new_addr, oflags);
+ }
+ else if (ip4->protocol == IP_PROTOCOL_SCTP)
+ {
+ sctp_header_t *sctp = (sctp_header_t *) udp;
+ cnat_ip4_translate_sctp (ip4, sctp, new_port);
+ cnat_ip4_translate_l3 (ip4, new_addr, oflags);
}
else if (ip4->protocol == IP_PROTOCOL_ICMP)
{
@@ -417,74 +417,65 @@ cnat_translation_ip4 (const cnat_session_t * session,
(ip4->src_address.as_u32 ==
session->key.cs_ip[VLIB_RX].ip4.as_u32);
cnat_translation_icmp4_error (ip4, icmp, new_addr, new_port,
- snat_outer_ip);
+ snat_outer_ip, oflags);
}
else if (icmp_type_is_echo (icmp->type))
- cnat_translation_icmp4_echo (ip4, icmp, new_addr, new_port);
+ cnat_translation_icmp4_echo (ip4, icmp, new_addr, new_port, oflags);
}
}
static_always_inline void
cnat_ip6_translate_l3 (ip6_header_t * ip6, ip6_address_t new_addr[VLIB_N_DIR])
{
- if (has_ip6_address (&new_addr[VLIB_TX]))
- ip6_address_copy (&ip6->dst_address, &new_addr[VLIB_TX]);
- if (has_ip6_address (&new_addr[VLIB_RX]))
- ip6_address_copy (&ip6->src_address, &new_addr[VLIB_RX]);
+ ip6_address_copy (&ip6->dst_address, &new_addr[VLIB_TX]);
+ ip6_address_copy (&ip6->src_address, &new_addr[VLIB_RX]);
+}
+
+static_always_inline u16
+ip6_pseudo_header_cksum2 (ip6_header_t *ip6, ip6_address_t address[VLIB_N_DIR])
+{
+ ip6_psh_t psh = { 0 };
+ psh.src = address[VLIB_RX];
+ psh.dst = address[VLIB_TX];
+ psh.l4len = ip6->payload_length;
+ psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol);
+ return ~(clib_ip_csum ((u8 *) &psh, sizeof (ip6_psh_t)));
}
static_always_inline void
-cnat_ip6_translate_l4 (ip6_header_t * ip6, udp_header_t * udp,
- ip_csum_t * sum,
+cnat_ip6_translate_l4 (ip6_header_t *ip6, udp_header_t *udp, ip_csum_t *sum,
ip6_address_t new_addr[VLIB_N_DIR],
- u16 new_port[VLIB_N_DIR])
+ u16 new_port[VLIB_N_DIR], u32 oflags)
{
u16 old_port[VLIB_N_DIR];
- ip6_address_t old_addr[VLIB_N_DIR];
+ old_port[VLIB_TX] = udp->dst_port;
+ old_port[VLIB_RX] = udp->src_port;
- /* Fastpath no checksum */
- if (PREDICT_TRUE (0 == *sum))
+ udp->dst_port = new_port[VLIB_TX];
+ udp->src_port = new_port[VLIB_RX];
+
+ if (oflags &
+ (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM))
{
- udp->dst_port = new_port[VLIB_TX];
- udp->src_port = new_port[VLIB_RX];
+ *sum = ip6_pseudo_header_cksum2 (ip6, new_addr);
return;
}
- old_port[VLIB_TX] = udp->dst_port;
- old_port[VLIB_RX] = udp->src_port;
- ip6_address_copy (&old_addr[VLIB_TX], &ip6->dst_address);
- ip6_address_copy (&old_addr[VLIB_RX], &ip6->src_address);
+ *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[0]);
+ *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[1]);
+ *sum = ip_csum_sub_even (*sum, ip6->dst_address.as_u64[0]);
+ *sum = ip_csum_sub_even (*sum, ip6->dst_address.as_u64[1]);
- if (has_ip6_address (&new_addr[VLIB_TX]))
- {
- *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[0]);
- *sum = ip_csum_add_even (*sum, new_addr[VLIB_TX].as_u64[1]);
- *sum = ip_csum_sub_even (*sum, old_addr[VLIB_TX].as_u64[0]);
- *sum = ip_csum_sub_even (*sum, old_addr[VLIB_TX].as_u64[1]);
- }
+ *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[0]);
+ *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[1]);
+ *sum = ip_csum_sub_even (*sum, ip6->src_address.as_u64[0]);
+ *sum = ip_csum_sub_even (*sum, ip6->src_address.as_u64[1]);
- if (new_port[VLIB_TX])
- {
- udp->dst_port = new_port[VLIB_TX];
- *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
- }
- if (has_ip6_address (&new_addr[VLIB_RX]))
- {
- *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[0]);
- *sum = ip_csum_add_even (*sum, new_addr[VLIB_RX].as_u64[1]);
- *sum = ip_csum_sub_even (*sum, old_addr[VLIB_RX].as_u64[0]);
- *sum = ip_csum_sub_even (*sum, old_addr[VLIB_RX].as_u64[1]);
- }
+ *sum = ip_csum_update (*sum, old_port[VLIB_TX], new_port[VLIB_TX],
+ udp_header_t, dst_port);
- if (new_port[VLIB_RX])
- {
- udp->src_port = new_port[VLIB_RX];
- *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
- }
+ *sum = ip_csum_update (*sum, old_port[VLIB_RX], new_port[VLIB_RX],
+ udp_header_t, src_port);
}
static_always_inline void
@@ -503,26 +494,20 @@ cnat_translation_icmp6_echo (ip6_header_t * ip6, icmp46_header_t * icmp,
sum = icmp->checksum;
cnat_ip6_translate_l3 (ip6, new_addr);
- if (has_ip6_address (&new_addr[VLIB_TX]))
- {
- sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
- sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
- sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
- sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
- }
- if (has_ip6_address (&new_addr[VLIB_RX]))
- {
- sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
- sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
- sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
- sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
- }
+ sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
+ sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
+ sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
+ sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
+
+ sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
+ sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
+ sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
+ sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
echo->identifier = new_port[VLIB_RX];
- sum = ip_csum_update (sum, old_port, new_port[VLIB_RX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
+ sum =
+ ip_csum_update (sum, old_port, new_port[VLIB_RX], udp_header_t, src_port);
icmp->checksum = ip_csum_fold (sum);
}
@@ -566,79 +551,64 @@ cnat_translation_icmp6_error (ip6_header_t * outer_ip6,
if (!snat_outer_ip)
ip6_address_copy (&outer_new_addr[VLIB_RX], &outer_ip6->src_address);
cnat_ip6_translate_l3 (outer_ip6, outer_new_addr);
- if (has_ip6_address (&outer_new_addr[VLIB_TX]))
- {
- sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[0]);
- sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[1]);
- sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[0]);
- sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[1]);
- }
- if (has_ip6_address (&outer_new_addr[VLIB_RX]))
- {
- sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[0]);
- sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[1]);
- sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[0]);
- sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[1]);
- }
+ sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[0]);
+ sum = ip_csum_add_even (sum, outer_new_addr[VLIB_TX].as_u64[1]);
+ sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[0]);
+ sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_TX].as_u64[1]);
+
+ sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[0]);
+ sum = ip_csum_add_even (sum, outer_new_addr[VLIB_RX].as_u64[1]);
+ sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[0]);
+ sum = ip_csum_sub_even (sum, outer_old_addr[VLIB_RX].as_u64[1]);
/* Translate inner TCP / UDP */
if (ip6->protocol == IP_PROTOCOL_TCP)
{
inner_l4_old_sum = inner_l4_sum = tcp->checksum;
- cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port);
+ cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port,
+ 0 /* oflags */);
tcp->checksum = ip_csum_fold (inner_l4_sum);
}
else if (ip6->protocol == IP_PROTOCOL_UDP)
{
inner_l4_old_sum = inner_l4_sum = udp->checksum;
- cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port);
+ cnat_ip6_translate_l4 (ip6, udp, &inner_l4_sum, new_addr, new_port,
+ 0 /* oflags */);
udp->checksum = ip_csum_fold (inner_l4_sum);
}
else
return;
/* UDP/TCP checksum changed */
- sum = ip_csum_update (sum, inner_l4_old_sum, inner_l4_sum,
- ip4_header_t /* cheat */ ,
+ sum = ip_csum_update (sum, inner_l4_old_sum, inner_l4_sum, ip4_header_t,
checksum);
/* UDP/TCP Ports changed */
- if (old_port[VLIB_TX] && new_port[VLIB_TX])
- sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
-
- if (old_port[VLIB_RX] && new_port[VLIB_RX])
- sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
+ sum = ip_csum_update (sum, old_port[VLIB_TX], new_port[VLIB_TX],
+ udp_header_t, dst_port);
+ sum = ip_csum_update (sum, old_port[VLIB_RX], new_port[VLIB_RX],
+ udp_header_t, src_port);
cnat_ip6_translate_l3 (ip6, new_addr);
/* IP src/dst addr changed */
- if (has_ip6_address (&new_addr[VLIB_TX]))
- {
- sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
- sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
- sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
- sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
- }
+ sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[0]);
+ sum = ip_csum_add_even (sum, new_addr[VLIB_TX].as_u64[1]);
+ sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[0]);
+ sum = ip_csum_sub_even (sum, old_addr[VLIB_TX].as_u64[1]);
- if (has_ip6_address (&new_addr[VLIB_RX]))
- {
- sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
- sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
- sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
- sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
- }
+ sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[0]);
+ sum = ip_csum_add_even (sum, new_addr[VLIB_RX].as_u64[1]);
+ sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[0]);
+ sum = ip_csum_sub_even (sum, old_addr[VLIB_RX].as_u64[1]);
icmp->checksum = ip_csum_fold (sum);
}
static_always_inline void
-cnat_translation_ip6 (const cnat_session_t * session,
- ip6_header_t * ip6, udp_header_t * udp)
+cnat_translation_ip6 (const cnat_session_t *session, ip6_header_t *ip6,
+ udp_header_t *udp, u32 oflags)
{
tcp_header_t *tcp = (tcp_header_t *) udp;
ip6_address_t new_addr[VLIB_N_DIR];
@@ -652,7 +622,7 @@ cnat_translation_ip6 (const cnat_session_t * session,
if (ip6->protocol == IP_PROTOCOL_TCP)
{
ip_csum_t sum = tcp->checksum;
- cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port);
+ cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port, oflags);
tcp->checksum = ip_csum_fold (sum);
cnat_ip6_translate_l3 (ip6, new_addr);
cnat_tcp_update_session_lifetime (tcp, session->value.cs_ts_index);
@@ -660,7 +630,7 @@ cnat_translation_ip6 (const cnat_session_t * session,
else if (ip6->protocol == IP_PROTOCOL_UDP)
{
ip_csum_t sum = udp->checksum;
- cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port);
+ cnat_ip6_translate_l4 (ip6, udp, &sum, new_addr, new_port, oflags);
udp->checksum = ip_csum_fold (sum);
cnat_ip6_translate_l3 (ip6, new_addr);
}
@@ -743,6 +713,18 @@ cnat_session_make_key (vlib_buffer_t *b, ip_address_family_t af,
session->key.cs_port[VLIB_RX] = udp->src_port;
session->key.cs_port[VLIB_TX] = udp->dst_port;
}
+ else if (ip4->protocol == IP_PROTOCOL_SCTP)
+ {
+ sctp_header_t *sctp;
+ sctp = (sctp_header_t *) (ip4 + 1);
+ ip46_address_set_ip4 (&session->key.cs_ip[VLIB_TX],
+ &ip4->dst_address);
+ ip46_address_set_ip4 (&session->key.cs_ip[VLIB_RX],
+ &ip4->src_address);
+ session->key.cs_proto = ip4->protocol;
+ session->key.cs_port[VLIB_RX] = sctp->src_port;
+ session->key.cs_port[VLIB_TX] = sctp->dst_port;
+ }
else
goto error;
}
@@ -837,20 +819,74 @@ cnat_load_balance (const cnat_translation_t *ct, ip_address_family_t af,
* rsession_location is the location the (return) session will be
* matched at
*/
+
+static_always_inline void
+cnat_session_create (cnat_session_t *session, cnat_node_ctx_t *ctx)
+{
+ cnat_bihash_kv_t *bkey = (cnat_bihash_kv_t *) session;
+
+ session->value.cs_ts_index = cnat_timestamp_new (ctx->now);
+ cnat_bihash_add_del (&cnat_session_db, bkey, 1);
+}
+
static_always_inline void
-cnat_session_create (cnat_session_t *session, cnat_node_ctx_t *ctx,
- cnat_session_location_t rsession_location,
- u8 rsession_flags)
+cnat_rsession_create (cnat_session_t *session, cnat_node_ctx_t *ctx,
+ cnat_session_location_t rsession_location,
+ cnat_session_flag_t rsession_flags)
{
cnat_client_t *cc;
cnat_bihash_kv_t rkey;
cnat_session_t *rsession = (cnat_session_t *) & rkey;
cnat_bihash_kv_t *bkey = (cnat_bihash_kv_t *) session;
- cnat_bihash_kv_t rvalue;
- int rv;
+ int rv, n_retries = 0;
+ static u32 sport_seed = 0;
- session->value.cs_ts_index = cnat_timestamp_new (ctx->now);
- cnat_bihash_add_del (&cnat_session_db, bkey, 1);
+ cnat_timestamp_inc_refcnt (session->value.cs_ts_index);
+
+ /* First create the return session */
+ ip46_address_copy (&rsession->key.cs_ip[VLIB_RX],
+ &session->value.cs_ip[VLIB_TX]);
+ ip46_address_copy (&rsession->key.cs_ip[VLIB_TX],
+ &session->value.cs_ip[VLIB_RX]);
+ rsession->key.cs_proto = session->key.cs_proto;
+ rsession->key.cs_loc = rsession_location;
+ rsession->key.__cs_pad = 0;
+ rsession->key.cs_af = ctx->af;
+ rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX];
+ rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX];
+
+ ip46_address_copy (&rsession->value.cs_ip[VLIB_RX],
+ &session->key.cs_ip[VLIB_TX]);
+ ip46_address_copy (&rsession->value.cs_ip[VLIB_TX],
+ &session->key.cs_ip[VLIB_RX]);
+ rsession->value.cs_ts_index = session->value.cs_ts_index;
+ rsession->value.cs_lbi = INDEX_INVALID;
+ rsession->value.flags = rsession_flags | CNAT_SESSION_IS_RETURN;
+ rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX];
+ rsession->value.cs_port[VLIB_RX] = session->key.cs_port[VLIB_TX];
+
+retry_add_ression:
+ rv = cnat_bihash_add_del (&cnat_session_db, &rkey,
+ 2 /* add but don't overwrite */);
+ if (rv)
+ {
+ if (!(rsession_flags & CNAT_SESSION_RETRY_SNAT))
+ return;
+
+ /* return session add failed pick an new random src port */
+ rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX] =
+ random_u32 (&sport_seed);
+ if (n_retries++ < 100)
+ goto retry_add_ression;
+ else
+ {
+ clib_warning ("Could not find a free port after 100 tries");
+ /* translate this packet, but don't create state */
+ return;
+ }
+ }
+
+ cnat_bihash_add_del (&cnat_session_db, bkey, 1 /* add */);
if (!(rsession_flags & CNAT_SESSION_FLAG_NO_CLIENT))
{
@@ -894,39 +930,6 @@ cnat_session_create (cnat_session_t *session, cnat_node_ctx_t *ctx,
}
}
- /* create the reverse flow key */
- ip46_address_copy (&rsession->key.cs_ip[VLIB_RX],
- &session->value.cs_ip[VLIB_TX]);
- ip46_address_copy (&rsession->key.cs_ip[VLIB_TX],
- &session->value.cs_ip[VLIB_RX]);
- rsession->key.cs_proto = session->key.cs_proto;
- rsession->key.cs_loc = rsession_location;
- rsession->key.__cs_pad = 0;
- rsession->key.cs_af = ctx->af;
- rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX];
- rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX];
-
- /* First search for existing reverse session */
- rv = cnat_bihash_search_i2 (&cnat_session_db, &rkey, &rvalue);
- if (!rv)
- {
- /* Reverse session already exists
- cleanup before creating for refcnts */
- cnat_session_t *found_rsession = (cnat_session_t *) & rvalue;
- cnat_session_free (found_rsession);
- }
- /* add the reverse flow */
- ip46_address_copy (&rsession->value.cs_ip[VLIB_RX],
- &session->key.cs_ip[VLIB_TX]);
- ip46_address_copy (&rsession->value.cs_ip[VLIB_TX],
- &session->key.cs_ip[VLIB_RX]);
- rsession->value.cs_ts_index = session->value.cs_ts_index;
- rsession->value.cs_lbi = INDEX_INVALID;
- rsession->value.flags = rsession_flags | CNAT_SESSION_IS_RETURN;
- rsession->value.cs_port[VLIB_TX] = session->key.cs_port[VLIB_RX];
- rsession->value.cs_port[VLIB_RX] = session->key.cs_port[VLIB_TX];
-
- cnat_bihash_add_del (&cnat_session_db, &rkey, 1);
}
always_inline uword
diff --git a/src/plugins/cnat/cnat_node_feature.c b/src/plugins/cnat/cnat_node_feature.c
index aced4cd0a15..9b2c0c2fe06 100644
--- a/src/plugins/cnat/cnat_node_feature.c
+++ b/src/plugins/cnat/cnat_node_feature.c
@@ -143,7 +143,10 @@ cnat_input_feature_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
/* refcnt session in current client */
cnat_client_cnt_session (cc);
- cnat_session_create (session, ctx, CNAT_LOCATION_OUTPUT, rsession_flags);
+ cnat_session_create (session, ctx);
+ if (!(ct->flags & CNAT_TR_FLAG_NO_RETURN_SESSION))
+ cnat_rsession_create (session, ctx, CNAT_LOCATION_OUTPUT,
+ rsession_flags);
trace_flags |= CNAT_TRACE_SESSION_CREATED;
}
@@ -156,9 +159,9 @@ cnat_input_feature_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
}
if (AF_IP4 == ctx->af)
- cnat_translation_ip4 (session, ip4, udp0);
+ cnat_translation_ip4 (session, ip4, udp0, vnet_buffer (b)->oflags);
else
- cnat_translation_ip6 (session, ip6, udp0);
+ cnat_translation_ip6 (session, ip6, udp0, vnet_buffer (b)->oflags);
if (NULL != ct)
{
@@ -320,14 +323,17 @@ cnat_output_feature_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
CNAT_SESSION_FLAG_NO_CLIENT | CNAT_SESSION_FLAG_ALLOC_PORT;
trace_flags |= CNAT_TRACE_SESSION_CREATED;
- cnat_session_create (session, ctx, CNAT_LOCATION_INPUT,
- CNAT_SESSION_FLAG_NO_CLIENT);
+
+ cnat_session_create (session, ctx);
+ cnat_rsession_create (session, ctx, CNAT_LOCATION_INPUT,
+ CNAT_SESSION_FLAG_NO_CLIENT |
+ CNAT_SESSION_RETRY_SNAT);
}
if (AF_IP4 == ctx->af)
- cnat_translation_ip4 (session, ip4, udp0);
+ cnat_translation_ip4 (session, ip4, udp0, vnet_buffer (b)->oflags);
else
- cnat_translation_ip6 (session, ip6, udp0);
+ cnat_translation_ip6 (session, ip6, udp0, vnet_buffer (b)->oflags);
trace:
if (PREDICT_FALSE (ctx->do_trace))
diff --git a/src/plugins/cnat/cnat_node_snat.c b/src/plugins/cnat/cnat_node_snat.c
index 9212d67ead6..57530eb397d 100644
--- a/src/plugins/cnat/cnat_node_snat.c
+++ b/src/plugins/cnat/cnat_node_snat.c
@@ -129,15 +129,15 @@ cnat_snat_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
CNAT_SESSION_FLAG_NO_CLIENT | CNAT_SESSION_FLAG_ALLOC_PORT;
trace_flags |= CNAT_TRACE_SESSION_CREATED;
- cnat_session_create (session, ctx, CNAT_LOCATION_FIB,
- CNAT_SESSION_FLAG_HAS_SNAT);
+ cnat_session_create (session, ctx);
+ cnat_rsession_create (session, ctx, CNAT_LOCATION_FIB,
+ CNAT_SESSION_FLAG_HAS_SNAT);
}
-
if (AF_IP4 == ctx->af)
- cnat_translation_ip4 (session, ip4, udp0);
+ cnat_translation_ip4 (session, ip4, udp0, vnet_buffer (b)->oflags);
else
- cnat_translation_ip6 (session, ip6, udp0);
+ cnat_translation_ip6 (session, ip6, udp0, vnet_buffer (b)->oflags);
trace:
if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
diff --git a/src/plugins/cnat/cnat_node_vip.c b/src/plugins/cnat/cnat_node_vip.c
index f166bd4f194..d320746c5fa 100644
--- a/src/plugins/cnat/cnat_node_vip.c
+++ b/src/plugins/cnat/cnat_node_vip.c
@@ -168,7 +168,9 @@ cnat_vip_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *b,
/* refcnt session in current client */
cnat_client_cnt_session (cc);
- cnat_session_create (session, ctx, CNAT_LOCATION_FIB, rsession_flags);
+ cnat_session_create (session, ctx);
+ if (!(ct->flags & CNAT_TR_FLAG_NO_RETURN_SESSION))
+ cnat_rsession_create (session, ctx, CNAT_LOCATION_FIB, rsession_flags);
trace_flags |= CNAT_TRACE_SESSION_CREATED;
next0 = ct->ct_lb.dpoi_next_node;
@@ -176,9 +178,9 @@ cnat_vip_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *b,
}
if (AF_IP4 == ctx->af)
- cnat_translation_ip4 (session, ip4, udp0);
+ cnat_translation_ip4 (session, ip4, udp0, vnet_buffer (b)->oflags);
else
- cnat_translation_ip6 (session, ip6, udp0);
+ cnat_translation_ip6 (session, ip6, udp0, vnet_buffer (b)->oflags);
if (NULL != ct)
{
diff --git a/src/plugins/cnat/cnat_scanner.c b/src/plugins/cnat/cnat_scanner.c
index b3591f7e8b0..2f982711581 100644
--- a/src/plugins/cnat/cnat_scanner.c
+++ b/src/plugins/cnat/cnat_scanner.c
@@ -14,6 +14,7 @@
*/
#include <cnat/cnat_session.h>
+#include <vlibmemory/api.h>
#include <cnat/cnat_client.h>
static uword
diff --git a/src/plugins/cnat/cnat_session.c b/src/plugins/cnat/cnat_session.c
index 216d2575c37..0f1cd43f501 100644
--- a/src/plugins/cnat/cnat_session.c
+++ b/src/plugins/cnat/cnat_session.c
@@ -94,7 +94,8 @@ format_cnat_session (u8 * s, va_list * args)
cnat_session_t *sess = va_arg (*args, cnat_session_t *);
CLIB_UNUSED (int verbose) = va_arg (*args, int);
f64 ts = 0;
- if (!pool_is_free_index (cnat_timestamps, sess->value.cs_ts_index))
+
+ if (!cnat_ts_is_free_index (sess->value.cs_ts_index))
ts = cnat_timestamp_exp (sess->value.cs_ts_index);
s = format (
@@ -172,15 +173,43 @@ cnat_session_purge (void)
return (0);
}
+void
+cnat_reverse_session_free (cnat_session_t *session)
+{
+ cnat_bihash_kv_t bkey, bvalue;
+ cnat_session_t *rsession = (cnat_session_t *) &bkey;
+ int rv;
+
+ ip46_address_copy (&rsession->key.cs_ip[VLIB_RX],
+ &session->value.cs_ip[VLIB_TX]);
+ ip46_address_copy (&rsession->key.cs_ip[VLIB_TX],
+ &session->value.cs_ip[VLIB_RX]);
+ rsession->key.cs_proto = session->key.cs_proto;
+ rsession->key.cs_loc = session->key.cs_loc == CNAT_LOCATION_OUTPUT ?
+ CNAT_LOCATION_INPUT :
+ CNAT_LOCATION_OUTPUT;
+ rsession->key.__cs_pad = 0;
+ rsession->key.cs_af = session->key.cs_af;
+ rsession->key.cs_port[VLIB_RX] = session->value.cs_port[VLIB_TX];
+ rsession->key.cs_port[VLIB_TX] = session->value.cs_port[VLIB_RX];
+
+ rv = cnat_bihash_search_i2 (&cnat_session_db, &bkey, &bvalue);
+ if (!rv)
+ {
+ /* other session is in bihash */
+ cnat_session_t *rsession = (cnat_session_t *) &bvalue;
+ cnat_session_free (rsession);
+ }
+}
+
u64
cnat_session_scan (vlib_main_t * vm, f64 start_time, int i)
{
BVT (clib_bihash) * h = &cnat_session_db;
int j, k;
- /* Don't scan the l2 fib if it hasn't been instantiated yet */
if (alloc_arena (h) == 0)
- return 0.0;
+ return 0;
for ( /* caller saves starting point */ ; i < h->nbuckets; i++)
{
@@ -210,7 +239,7 @@ cnat_session_scan (vlib_main_t * vm, f64 start_time, int i)
{
for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
{
- if (v->kvp[k].key[0] == ~0ULL && v->kvp[k].value[0] == ~0ULL)
+ if (BV (clib_bihash_is_free) (&v->kvp[k]))
continue;
cnat_session_t *session = (cnat_session_t *) & v->kvp[k];
@@ -219,6 +248,9 @@ cnat_session_scan (vlib_main_t * vm, f64 start_time, int i)
cnat_timestamp_exp (session->value.cs_ts_index))
{
/* age it */
+ cnat_reverse_session_free (session);
+ /* this should be last as deleting the session memset it to
+ * 0xff */
cnat_session_free (session);
/*
@@ -248,6 +280,12 @@ cnat_session_init (vlib_main_t * vm)
cm->session_hash_memory);
BV (clib_bihash_set_kvp_format_fn) (&cnat_session_db, format_cnat_session);
+ cnat_timestamps.next_empty_pool_idx = 0;
+ clib_bitmap_alloc (cnat_timestamps.ts_free, 1 << CNAT_TS_MPOOL_BITS);
+ clib_bitmap_set_region (cnat_timestamps.ts_free, 0, 1,
+ 1 << CNAT_TS_MPOOL_BITS);
+ clib_spinlock_init (&cnat_timestamps.ts_lock);
+
return (NULL);
}
@@ -258,21 +296,38 @@ cnat_timestamp_show (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
cnat_timestamp_t *ts;
- clib_rwlock_reader_lock (&cnat_main.ts_lock);
- pool_foreach (ts, cnat_timestamps)
+ int ts_cnt = 0, cnt;
+ u8 verbose = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- vlib_cli_output (vm, "[%d] last_seen:%f lifetime:%u ref:%u",
- ts - cnat_timestamps, ts->last_seen, ts->lifetime,
- ts->refcnt);
+ if (unformat (input, "verbose"))
+ verbose = 1;
+ else
+ return (clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input));
+ }
+
+ for (int i = 0; i < cnat_timestamps.next_empty_pool_idx; i++)
+ {
+ cnt = pool_elts (cnat_timestamps.ts_pools[i]);
+ ts_cnt += cnt;
+ vlib_cli_output (vm, "-- Pool %d [%d/%d]", i, cnt,
+ pool_header (cnat_timestamps.ts_pools[i])->max_elts);
+ if (!verbose)
+ continue;
+ pool_foreach (ts, cnat_timestamps.ts_pools[i])
+ vlib_cli_output (vm, "[%d] last_seen:%f lifetime:%u ref:%u",
+ ts - cnat_timestamps.ts_pools[i], ts->last_seen,
+ ts->lifetime, ts->refcnt);
}
- clib_rwlock_reader_unlock (&cnat_main.ts_lock);
+ vlib_cli_output (vm, "Total timestamps %d", ts_cnt);
return (NULL);
}
VLIB_CLI_COMMAND (cnat_timestamp_show_cmd, static) = {
.path = "show cnat timestamp",
.function = cnat_timestamp_show,
- .short_help = "show cnat timestamp",
+ .short_help = "show cnat timestamp [verbose]",
.is_mp_safe = 1,
};
diff --git a/src/plugins/cnat/cnat_session.h b/src/plugins/cnat/cnat_session.h
index 072bb10f96f..a0a28c9a818 100644
--- a/src/plugins/cnat/cnat_session.h
+++ b/src/plugins/cnat/cnat_session.h
@@ -129,6 +129,11 @@ typedef enum cnat_session_flag_t_
/* Debug flag marking return sessions */
CNAT_SESSION_IS_RETURN = (1 << 4),
+
+ /** On conflicts when adding the return session, try to sNAT the
+ * forward session, and dNAT the return session with a random port */
+ CNAT_SESSION_RETRY_SNAT = (1 << 5),
+
} cnat_session_flag_t;
typedef enum cnat_session_location_t_
diff --git a/src/plugins/cnat/cnat_snat_policy.c b/src/plugins/cnat/cnat_snat_policy.c
index d59156f34c8..cd9bfef492a 100644
--- a/src/plugins/cnat/cnat_snat_policy.c
+++ b/src/plugins/cnat/cnat_snat_policy.c
@@ -29,6 +29,8 @@ unformat_cnat_snat_interface_map_type (unformat_input_t *input, va_list *args)
*a = CNAT_SNAT_IF_MAP_INCLUDE_V6;
else if (unformat (input, "k8s"))
*a = CNAT_SNAT_IF_MAP_INCLUDE_POD;
+ else if (unformat (input, "host"))
+ *a = CNAT_SNAT_IF_MAP_INCLUDE_HOST;
else
return 0;
return 1;
@@ -49,6 +51,9 @@ format_cnat_snat_interface_map_type (u8 *s, va_list *args)
case CNAT_SNAT_IF_MAP_INCLUDE_POD:
s = format (s, "k8s pod");
break;
+ case CNAT_SNAT_IF_MAP_INCLUDE_HOST:
+ s = format (s, "k8s host");
+ break;
default:
s = format (s, "(unknown)");
break;
@@ -108,7 +113,7 @@ cnat_snat_policy_add_del_if_command_fn (vlib_main_t *vm,
vnet_main_t *vnm = vnet_get_main ();
int is_add = 1;
u32 sw_if_index = ~0;
- u32 table;
+ u32 table = 0;
int rv;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
@@ -296,6 +301,14 @@ cnat_snat_policy_k8s (vlib_buffer_t *b, cnat_session_t *session)
u32 in_if = vnet_buffer (b)->sw_if_index[VLIB_RX];
u32 out_if = vnet_buffer (b)->sw_if_index[VLIB_TX];
+ /* we should never snat traffic that we punt to the host, pass traffic as it
+ * is for us */
+ if (clib_bitmap_get (cpm->interface_maps[CNAT_SNAT_IF_MAP_INCLUDE_HOST],
+ out_if))
+ {
+ return 0;
+ }
+
/* source nat for outgoing connections */
if (cnat_snat_policy_interface_enabled (in_if, af))
if (cnat_search_snat_prefix (dst_addr, af))
diff --git a/src/plugins/cnat/cnat_snat_policy.h b/src/plugins/cnat/cnat_snat_policy.h
index 987ae494e16..61c2382602f 100644
--- a/src/plugins/cnat/cnat_snat_policy.h
+++ b/src/plugins/cnat/cnat_snat_policy.h
@@ -45,6 +45,9 @@ typedef enum cnat_snat_interface_map_type_t_
CNAT_SNAT_IF_MAP_INCLUDE_V4 = AF_IP4,
CNAT_SNAT_IF_MAP_INCLUDE_V6 = AF_IP6,
CNAT_SNAT_IF_MAP_INCLUDE_POD,
+ /* CNAT_SNAT_IF_MAP_INCLUDE_HOST is used for interfaces used for punt,
+ replicating uplink */
+ CNAT_SNAT_IF_MAP_INCLUDE_HOST,
CNAT_N_SNAT_IF_MAP,
} cnat_snat_interface_map_type_t;
diff --git a/src/plugins/cnat/cnat_src_policy.c b/src/plugins/cnat/cnat_src_policy.c
index cac24b7742c..8f3f3375148 100644
--- a/src/plugins/cnat/cnat_src_policy.c
+++ b/src/plugins/cnat/cnat_src_policy.c
@@ -59,8 +59,8 @@ cnat_vip_default_source_policy (vlib_main_t * vm,
u16 sport;
sport = udp0->src_port;
/* Allocate a port only if asked and if we actually sNATed */
- if ((ct->flags & CNAT_TRANSLATION_FLAG_ALLOCATE_PORT)
- && (*rsession_flags & CNAT_SESSION_FLAG_HAS_SNAT))
+ if ((ct->flags & CNAT_TR_FLAG_ALLOCATE_PORT) &&
+ (*rsession_flags & CNAT_SESSION_FLAG_HAS_SNAT))
{
sport = 0; /* force allocation */
session->value.flags |= CNAT_SESSION_FLAG_ALLOC_PORT;
diff --git a/src/plugins/cnat/cnat_translation.c b/src/plugins/cnat/cnat_translation.c
index 049809a8684..513cedf0446 100644
--- a/src/plugins/cnat/cnat_translation.c
+++ b/src/plugins/cnat/cnat_translation.c
@@ -18,8 +18,10 @@
#include <vnet/fib/fib_entry_track.h>
#include <vnet/dpo/load_balance.h>
#include <vnet/dpo/drop_dpo.h>
+#include <vnet/dpo/dpo.h>
#include <cnat/cnat_translation.h>
+#include <cnat/cnat_maglev.h>
#include <cnat/cnat_session.h>
#include <cnat/cnat_client.h>
@@ -82,6 +84,7 @@ cnat_tracker_release (cnat_ep_trk_t * trk)
/* We only track fully resolved endpoints */
if (!(trk->ct_flags & CNAT_TRK_ACTIVE))
return;
+ dpo_reset (&trk->ct_dpo); // undo fib_entry_contribute_forwarding
fib_entry_untrack (trk->ct_fei, trk->ct_sibling);
}
@@ -200,110 +203,7 @@ cnat_remove_translation_from_db (index_t cci, cnat_endpoint_t * vip,
clib_bihash_add_del_8_8 (&cnat_translation_db, &bkey, 0);
}
-typedef struct
-{
- cnat_ep_trk_t *trk;
- u32 index;
- u32 offset;
- u32 skip;
-} cnat_maglev_entry_t;
-static int
-cnat_maglev_entry_compare (void *_a, void *_b)
-{
- cnat_ep_trk_t *a = ((cnat_maglev_entry_t *) _a)->trk;
- cnat_ep_trk_t *b = ((cnat_maglev_entry_t *) _b)->trk;
- int rv = 0;
- if ((rv =
- ip_address_cmp (&a->ct_ep[VLIB_TX].ce_ip, &b->ct_ep[VLIB_TX].ce_ip)))
- return rv;
- if ((rv = a->ct_ep[VLIB_TX].ce_port - a->ct_ep[VLIB_TX].ce_port))
- return rv;
- if ((rv =
- ip_address_cmp (&a->ct_ep[VLIB_RX].ce_ip, &b->ct_ep[VLIB_RX].ce_ip)))
- return rv;
- if ((rv = a->ct_ep[VLIB_RX].ce_port - a->ct_ep[VLIB_RX].ce_port))
- return rv;
- return 0;
-}
-
-static void
-cnat_translation_init_maglev (cnat_translation_t *ct)
-{
- cnat_maglev_entry_t *backends = NULL, *bk;
- cnat_main_t *cm = &cnat_main;
- u32 done = 0;
- cnat_ep_trk_t *trk;
- int ep_idx = 0;
-
- vec_foreach (trk, ct->ct_active_paths)
- {
- cnat_maglev_entry_t bk;
- u32 h1, h2;
-
- if (AF_IP4 == ip_addr_version (&trk->ct_ep[VLIB_TX].ce_ip))
- {
- u32 a, b, c;
- a = ip_addr_v4 (&trk->ct_ep[VLIB_TX].ce_ip).data_u32;
- b = (u64) trk->ct_ep[VLIB_TX].ce_port << 16 |
- (u64) trk->ct_ep[VLIB_RX].ce_port;
- c = ip_addr_v4 (&trk->ct_ep[VLIB_RX].ce_ip).data_u32;
- hash_v3_mix32 (a, b, c);
- hash_v3_finalize32 (a, b, c);
- h1 = c;
- h2 = b;
- }
- else
- {
- u64 a, b, c;
- a = ip_addr_v6 (&trk->ct_ep[VLIB_TX].ce_ip).as_u64[0] ^
- ip_addr_v6 (&trk->ct_ep[VLIB_TX].ce_ip).as_u64[1];
- b = (u64) trk->ct_ep[VLIB_TX].ce_port << 16 |
- (u64) trk->ct_ep[VLIB_RX].ce_port;
- c = ip_addr_v6 (&trk->ct_ep[VLIB_RX].ce_ip).as_u64[0] ^
- ip_addr_v6 (&trk->ct_ep[VLIB_RX].ce_ip).as_u64[1];
- hash_mix64 (a, b, c);
- h1 = c;
- h2 = b;
- }
-
- bk.offset = h1 % cm->maglev_len;
- bk.skip = h2 % (cm->maglev_len - 1) + 1;
- bk.index = ep_idx++;
- bk.trk = trk;
- vec_add1 (backends, bk);
- }
-
- if (0 == ep_idx)
- return;
-
- vec_sort_with_function (backends, cnat_maglev_entry_compare);
-
- /* Don't free if previous vector exists, just zero */
- vec_validate (ct->lb_maglev, cm->maglev_len);
- vec_set (ct->lb_maglev, -1);
-
- while (1)
- {
- vec_foreach (bk, backends)
- {
- u32 next = 0;
- u32 c = (bk->offset + next * bk->skip) % cm->maglev_len;
- while (ct->lb_maglev[c] != (u32) -1)
- {
- next++;
- c = (bk->offset + next * bk->skip) % cm->maglev_len;
- }
- ct->lb_maglev[c] = bk->index;
- done++;
- if (done == cm->maglev_len)
- goto finished;
- }
- }
-
-finished:
- vec_free (backends);
-}
static void
cnat_translation_stack (cnat_translation_t * ct)
@@ -323,8 +223,11 @@ cnat_translation_stack (cnat_translation_t * ct)
if (trk->ct_flags & CNAT_TRK_ACTIVE)
vec_add1 (ct->ct_active_paths, *trk);
+ flow_hash_config_t fhc = IP_FLOW_HASH_DEFAULT;
+ if (ct->fhc != 0)
+ fhc = ct->fhc;
lbi = load_balance_create (vec_len (ct->ct_active_paths),
- fib_proto_to_dpo (fproto), IP_FLOW_HASH_DEFAULT);
+ fib_proto_to_dpo (fproto), fhc);
ep_idx = 0;
vec_foreach (trk, ct->ct_active_paths)
@@ -335,7 +238,7 @@ cnat_translation_stack (cnat_translation_t * ct)
dpo_set (&ct->ct_lb, DPO_LOAD_BALANCE, dproto, lbi);
dpo_stack (cnat_client_dpo, dproto, &ct->ct_lb, &ct->ct_lb);
- ct->flags |= CNAT_TRANSLATION_STACKED;
+ ct->flags |= CNAT_TR_FLAG_STACKED;
}
int
@@ -365,8 +268,9 @@ cnat_translation_delete (u32 id)
u32
cnat_translation_update (cnat_endpoint_t *vip, ip_protocol_t proto,
cnat_endpoint_tuple_t *paths, u8 flags,
- cnat_lb_type_t lb_type)
+ cnat_lb_type_t lb_type, flow_hash_config_t fhc)
{
+ const dpo_id_t tmp = DPO_INVALID;
cnat_endpoint_tuple_t *path;
const cnat_client_t *cc;
cnat_translation_t *ct;
@@ -398,6 +302,7 @@ cnat_translation_update (cnat_endpoint_t *vip, ip_protocol_t proto,
ct->ct_cci = cci;
ct->index = ct - cnat_translation_pool;
ct->lb_type = lb_type;
+ ct->fhc = fhc;
cnat_add_translation_to_db (cci, vip, proto, ct->index);
cnat_client_translation_added (cci);
@@ -417,7 +322,7 @@ cnat_translation_update (cnat_endpoint_t *vip, ip_protocol_t proto,
}
vec_reset_length (ct->ct_paths);
- ct->flags &= ~CNAT_TRANSLATION_STACKED;
+ ct->flags &= ~CNAT_TR_FLAG_STACKED;
u64 path_idx = 0;
vec_foreach (path, paths)
@@ -438,6 +343,7 @@ cnat_translation_update (cnat_endpoint_t *vip, ip_protocol_t proto,
clib_memcpy (&trk->ct_ep[VLIB_RX], &path->src_ep,
sizeof (trk->ct_ep[VLIB_RX]));
trk->ct_flags = path->ep_flags;
+ trk->ct_dpo = tmp;
cnat_tracker_track (ct->index, trk);
}
@@ -486,6 +392,11 @@ format_cnat_translation (u8 * s, va_list * args)
format_ip_protocol, ct->ct_proto);
s = format (s, "lb:%U ", format_cnat_lb_type, ct->lb_type);
+ if ((ct->fhc == 0) || (ct->fhc == IP_FLOW_HASH_DEFAULT))
+ s = format (s, "fhc:0x%x(default)", IP_FLOW_HASH_DEFAULT);
+ else
+ s = format (s, "fhc:0x%x", ct->fhc);
+
vec_foreach (ck, ct->ct_paths)
s = format (s, "\n%U", format_cnat_ep_trk, ck, 2);
@@ -615,7 +526,7 @@ cnat_translation_back_walk_notify (fib_node_t * node,
/* If we have more than FIB_PATH_LIST_POPULAR paths
* we might get called during path tracking
* (cnat_tracker_track) */
- if (!(ct->flags & CNAT_TRANSLATION_STACKED))
+ if (!(ct->flags & CNAT_TR_FLAG_STACKED))
return (FIB_NODE_BACK_WALK_CONTINUE);
cnat_translation_stack (ct);
@@ -678,8 +589,9 @@ cnat_translation_cli_add_del (vlib_main_t * vm,
}
}
+ flow_hash_config_t fhc = 0;
if (INDEX_INVALID == del_index)
- cnat_translation_update (&vip, proto, paths, flags, lb_type);
+ cnat_translation_update (&vip, proto, paths, flags, lb_type, fhc);
else
cnat_translation_delete (del_index);
@@ -764,11 +676,11 @@ cnat_if_addr_add_del_backend_cb (addr_resolution_t * ar,
ep->ce_flags |= CNAT_EP_FLAG_RESOLVED;
}
- ct->flags &= ~CNAT_TRANSLATION_STACKED;
+ ct->flags &= ~CNAT_TR_FLAG_STACKED;
cnat_tracker_track (ar->cti, trk);
cnat_translation_stack (ct);
- ct->flags |= CNAT_TRANSLATION_STACKED;
+ ct->flags |= CNAT_TR_FLAG_STACKED;
}
static void
@@ -825,7 +737,7 @@ cnat_translation_init (vlib_main_t * vm)
ip6_main_t *i6m = &ip6_main;
cnat_main_t *cm = &cnat_main;
cnat_translation_fib_node_type =
- fib_node_register_new_type (&cnat_translation_vft);
+ fib_node_register_new_type ("cnat-translation", &cnat_translation_vft);
clib_bihash_init_8_8 (&cnat_translation_db, "CNat translation DB",
cm->translation_hash_buckets,
diff --git a/src/plugins/cnat/cnat_translation.h b/src/plugins/cnat/cnat_translation.h
index 97b0c908b42..9bb3455d9fe 100644
--- a/src/plugins/cnat/cnat_translation.h
+++ b/src/plugins/cnat/cnat_translation.h
@@ -60,12 +60,14 @@ typedef struct cnat_ep_trk_t_
typedef enum cnat_translation_flag_t_
{
/* Do allocate a source port */
- CNAT_TRANSLATION_FLAG_ALLOCATE_PORT = (1 << 0),
+ CNAT_TR_FLAG_ALLOCATE_PORT = (1 << 0),
/* Has this translation been satcked ?
* this allow not being called twice when
* with more then FIB_PATH_LIST_POPULAR backends */
- CNAT_TRANSLATION_STACKED = (1 << 1),
-} cnat_translation_flag_t;
+ CNAT_TR_FLAG_STACKED = (1 << 1),
+ /* Do not create a return session */
+ CNAT_TR_FLAG_NO_RETURN_SESSION = (1 << 2),
+} __clib_packed cnat_translation_flag_t;
typedef enum
{
@@ -76,11 +78,11 @@ typedef enum
CNAT_ADDR_N_RESOLUTIONS,
} cnat_addr_resol_type_t;
-typedef enum __attribute__ ((__packed__))
+typedef enum
{
CNAT_LB_DEFAULT,
CNAT_LB_MAGLEV,
-} cnat_lb_type_t;
+} __clib_packed cnat_lb_type_t;
/**
* Entry used to account for a translation's backend
@@ -160,13 +162,18 @@ typedef struct cnat_translation_t_
/**
* Translation flags
*/
- u8 flags;
+ cnat_translation_flag_t flags;
/**
* Type of load balancing
*/
cnat_lb_type_t lb_type;
+ /**
+ * Type of flow hash config
+ */
+ flow_hash_config_t fhc;
+
union
{
u32 *lb_maglev;
@@ -189,7 +196,8 @@ extern u8 *format_cnat_translation (u8 * s, va_list * args);
extern u32 cnat_translation_update (cnat_endpoint_t *vip,
ip_protocol_t ip_proto,
cnat_endpoint_tuple_t *backends, u8 flags,
- cnat_lb_type_t lb_type);
+ cnat_lb_type_t lb_type,
+ flow_hash_config_t fhc);
/**
* Delete a translation
diff --git a/src/plugins/cnat/cnat_types.c b/src/plugins/cnat/cnat_types.c
index 9b164c6069d..084a03da968 100644
--- a/src/plugins/cnat/cnat_types.c
+++ b/src/plugins/cnat/cnat_types.c
@@ -16,8 +16,7 @@
#include <cnat/cnat_types.h>
cnat_main_t cnat_main;
-fib_source_t cnat_fib_source;
-cnat_timestamp_t *cnat_timestamps;
+cnat_timestamp_mpool_t cnat_timestamps;
char *cnat_error_strings[] = {
#define cnat_error(n,s) s,
@@ -152,19 +151,6 @@ format_cnat_endpoint (u8 * s, va_list * args)
return (s);
}
-static clib_error_t *
-cnat_types_init (vlib_main_t * vm)
-{
- cnat_fib_source = fib_source_allocate ("cnat",
- CNAT_FIB_SOURCE_PRIORITY,
- FIB_SOURCE_BH_SIMPLE);
-
-
- clib_rwlock_init (&cnat_main.ts_lock);
-
- return (NULL);
-}
-
void
cnat_enable_disable_scanner (cnat_scanner_cmd_t event_type)
{
@@ -191,6 +177,8 @@ cnat_config (vlib_main_t * vm, unformat_input_t * input)
cm->session_hash_buckets = CNAT_DEFAULT_SESSION_BUCKETS;
cm->translation_hash_memory = CNAT_DEFAULT_TRANSLATION_MEMORY;
cm->translation_hash_buckets = CNAT_DEFAULT_TRANSLATION_BUCKETS;
+ cm->client_hash_memory = CNAT_DEFAULT_CLIENT_MEMORY;
+ cm->client_hash_buckets = CNAT_DEFAULT_CLIENT_BUCKETS;
cm->snat_hash_memory = CNAT_DEFAULT_SNAT_MEMORY;
cm->snat_hash_buckets = CNAT_DEFAULT_SNAT_BUCKETS;
cm->snat_if_map_length = CNAT_DEFAULT_SNAT_IF_MAP_LEN;
@@ -215,6 +203,12 @@ cnat_config (vlib_main_t * vm, unformat_input_t * input)
else if (unformat (input, "translation-db-memory %U",
unformat_memory_size, &cm->translation_hash_memory))
;
+ else if (unformat (input, "client-db-buckets %u",
+ &cm->client_hash_buckets))
+ ;
+ else if (unformat (input, "client-db-memory %U", unformat_memory_size,
+ &cm->client_hash_memory))
+ ;
else if (unformat (input, "snat-db-buckets %u", &cm->snat_hash_buckets))
;
else if (unformat (input, "snat-if-map-len %u", &cm->snat_if_map_length))
@@ -250,7 +244,6 @@ cnat_get_main ()
}
VLIB_EARLY_CONFIG_FUNCTION (cnat_config, "cnat");
-VLIB_INIT_FUNCTION (cnat_types_init);
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/cnat/cnat_types.h b/src/plugins/cnat/cnat_types.h
index c3ec74c345f..d229d21adae 100644
--- a/src/plugins/cnat/cnat_types.h
+++ b/src/plugins/cnat/cnat_types.h
@@ -36,12 +36,14 @@
#define CNAT_DEFAULT_SESSION_BUCKETS 1024
#define CNAT_DEFAULT_TRANSLATION_BUCKETS 1024
+#define CNAT_DEFAULT_CLIENT_BUCKETS 1024
#define CNAT_DEFAULT_SNAT_BUCKETS 1024
#define CNAT_DEFAULT_SNAT_IF_MAP_LEN 4096
#define CNAT_DEFAULT_SESSION_MEMORY (1 << 20)
#define CNAT_DEFAULT_TRANSLATION_MEMORY (256 << 10)
-#define CNAT_DEFAULT_SNAT_MEMORY (64 << 20)
+#define CNAT_DEFAULT_CLIENT_MEMORY (256 << 10)
+#define CNAT_DEFAULT_SNAT_MEMORY (64 << 10)
/* Should be prime >~ 100 * numBackends */
#define CNAT_DEFAULT_MAGLEV_LEN 1009
@@ -50,11 +52,24 @@
* from fib_source.h */
#define CNAT_FIB_SOURCE_PRIORITY 0x02
-/* Initial refcnt for timestamps (2 : session & rsession) */
-#define CNAT_TIMESTAMP_INIT_REFCNT 2
+/* Initial number of timestamps for a session
+ * this will be incremented when adding the reverse
+ * session in cnat_rsession_create */
+#define CNAT_TIMESTAMP_INIT_REFCNT 1
#define MIN_SRC_PORT ((u16) 0xC000)
+typedef struct
+{
+ /* Source and destination port. */
+ u16 src_port, dst_port;
+
+ /* Random value to distinguish connections. */
+ u32 verification_tag;
+
+ u32 checksum;
+} sctp_header_t;
+
typedef enum cnat_trk_flag_t_
{
/* Endpoint is active (static or dhcp resolved) */
@@ -62,6 +77,8 @@ typedef enum cnat_trk_flag_t_
/* Don't translate this endpoint, but still
* forward. Used by maglev for DSR */
CNAT_TRK_FLAG_NO_NAT = (1 << 1),
+ /* */
+ CNAT_TRK_FLAG_TEST_DISABLED = (1 << 7),
} cnat_trk_flag_t;
typedef enum
@@ -105,6 +122,12 @@ typedef struct cnat_main_
/* Number of buckets of the translation bihash */
u32 translation_hash_buckets;
+ /* Memory size of the client bihash */
+ uword client_hash_memory;
+
+ /* Number of buckets of the client bihash */
+ u32 client_hash_buckets;
+
/* Memory size of the source NAT prefix bihash */
uword snat_hash_memory;
@@ -125,9 +148,6 @@ typedef struct cnat_main_
/* delay in seconds between two scans of session/clients tables */
f64 scanner_timeout;
- /* Lock for the timestamp pool */
- clib_rwlock_t ts_lock;
-
/* Index of the scanner process node */
uword scanner_node_index;
@@ -152,6 +172,23 @@ typedef struct cnat_timestamp_t_
u16 refcnt;
} cnat_timestamp_t;
+/* Create the first pool with 1 << CNAT_TS_BASE_SIZE elts */
+#define CNAT_TS_BASE_SIZE (8)
+/* reserve the top CNAT_TS_MPOOL_BITS bits for finding the pool */
+#define CNAT_TS_MPOOL_BITS (6)
+
+typedef struct cnat_timestamp_mpool_t_
+{
+ /* Increasing fixed size pools of timestamps */
+ cnat_timestamp_t *ts_pools[1 << CNAT_TS_MPOOL_BITS];
+ /* Bitmap of pools with free space */
+ uword *ts_free;
+ /* Index of next pool to init */
+ u8 next_empty_pool_idx;
+ /* ts creation lock */
+ clib_spinlock_t ts_lock;
+} cnat_timestamp_mpool_t;
+
typedef struct cnat_node_ctx_
{
f64 now;
@@ -165,8 +202,7 @@ extern u8 *format_cnat_endpoint (u8 * s, va_list * args);
extern uword unformat_cnat_ep_tuple (unformat_input_t * input,
va_list * args);
extern uword unformat_cnat_ep (unformat_input_t * input, va_list * args);
-extern cnat_timestamp_t *cnat_timestamps;
-extern fib_source_t cnat_fib_source;
+extern cnat_timestamp_mpool_t cnat_timestamps;
extern cnat_main_t cnat_main;
extern char *cnat_error_strings[];
diff --git a/src/plugins/crypto_ipsecmb/CMakeLists.txt b/src/plugins/crypto_ipsecmb/CMakeLists.txt
index 981a045262e..429343a9f3b 100644
--- a/src/plugins/crypto_ipsecmb/CMakeLists.txt
+++ b/src/plugins/crypto_ipsecmb/CMakeLists.txt
@@ -33,6 +33,16 @@ if(IPSECMB_INCLUDE_DIR AND IPSECMB_LIB)
${IPSECMB_LINK_FLAGS}
)
+ file(READ "${IPSECMB_INCLUDE_DIR}/intel-ipsec-mb.h" ipsecmb_header)
+ string(REGEX MATCH "IMB_VERSION_STR (\"+[0-9]+\\.[0-9]+\\.[0-9]+\")" _ ${ipsecmb_header})
+ string(REPLACE "\"" "" IPSECMB_VERSION ${CMAKE_MATCH_1})
+
+ if (${IPSECMB_VERSION} VERSION_GREATER "0.54.0")
+ add_definitions(-DHAVE_IPSECMB_CHACHA_POLY)
+ else()
+ message(STATUS "Intel IPSecMB ${IPSECMB_VERSION} does not support chacha20-poly1305. Disabled")
+ endif()
+
target_compile_options(crypto_ipsecmb_plugin PRIVATE "-march=silvermont" "-maes")
message(STATUS "Intel IPSecMB found: ${IPSECMB_INCLUDE_DIR}")
else()
diff --git a/src/plugins/crypto_ipsecmb/ipsecmb.c b/src/plugins/crypto_ipsecmb/ipsecmb.c
index ad5f7bfe006..064c129ba12 100644
--- a/src/plugins/crypto_ipsecmb/ipsecmb.c
+++ b/src/plugins/crypto_ipsecmb/ipsecmb.c
@@ -25,14 +25,16 @@
#include <vnet/crypto/crypto.h>
#include <vppinfra/cpu.h>
-#define HMAC_MAX_BLOCK_SIZE SHA_512_BLOCK_SIZE
+#define HMAC_MAX_BLOCK_SIZE IMB_SHA_512_BLOCK_SIZE
#define EXPANDED_KEY_N_BYTES (16 * 15)
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- MB_MGR *mgr;
- __m128i cbc_iv;
+ IMB_MGR *mgr;
+#if IMB_VERSION_NUM >= IMB_VERSION(1, 3, 0)
+ IMB_JOB burst_jobs[IMB_MAX_BURST_SIZE];
+#endif
} ipsecmb_per_thread_data_t;
typedef struct
@@ -60,11 +62,12 @@ typedef struct
static ipsecmb_main_t ipsecmb_main = { };
+/* clang-format off */
/*
* (Alg, JOB_HASH_ALG, fn, block-size-bytes, hash-size-bytes, digest-size-bytes)
*/
#define foreach_ipsecmb_hmac_op \
- _(SHA1, SHA1, sha1, 64, 20, 20) \
+ _(SHA1, SHA_1, sha1, 64, 20, 20) \
_(SHA224, SHA_224, sha224, 64, 32, 28) \
_(SHA256, SHA_256, sha256, 64, 32, 32) \
_(SHA384, SHA_384, sha384, 128, 64, 48) \
@@ -88,21 +91,21 @@ static ipsecmb_main_t ipsecmb_main = { };
_(AES_128_GCM, 128) \
_(AES_192_GCM, 192) \
_(AES_256_GCM, 256)
-
+/* clang-format on */
static_always_inline vnet_crypto_op_status_t
-ipsecmb_status_job (JOB_STS status)
+ipsecmb_status_job (IMB_STATUS status)
{
switch (status)
{
- case STS_COMPLETED:
+ case IMB_STATUS_COMPLETED:
return VNET_CRYPTO_OP_STATUS_COMPLETED;
- case STS_BEING_PROCESSED:
- case STS_COMPLETED_AES:
- case STS_COMPLETED_HMAC:
+ case IMB_STATUS_BEING_PROCESSED:
+ case IMB_STATUS_COMPLETED_CIPHER:
+ case IMB_STATUS_COMPLETED_AUTH:
return VNET_CRYPTO_OP_STATUS_WORK_IN_PROGRESS;
- case STS_INVALID_ARGS:
- case STS_INTERNAL_ERROR:
- case STS_ERROR:
+ case IMB_STATUS_INVALID_ARGS:
+ case IMB_STATUS_INTERNAL_ERROR:
+ case IMB_STATUS_ERROR:
return VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR;
}
ASSERT (0);
@@ -110,12 +113,12 @@ ipsecmb_status_job (JOB_STS status)
}
always_inline void
-ipsecmb_retire_hmac_job (JOB_AES_HMAC * job, u32 * n_fail, u32 digest_size)
+ipsecmb_retire_hmac_job (IMB_JOB *job, u32 *n_fail, u32 digest_size)
{
vnet_crypto_op_t *op = job->user_data;
u32 len = op->digest_len ? op->digest_len : digest_size;
- if (PREDICT_FALSE (STS_COMPLETED != job->status))
+ if (PREDICT_FALSE (IMB_STATUS_COMPLETED != job->status))
{
op->status = ipsecmb_status_job (job->status);
*n_fail = *n_fail + 1;
@@ -139,15 +142,71 @@ ipsecmb_retire_hmac_job (JOB_AES_HMAC * job, u32 * n_fail, u32 digest_size)
op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
}
+#if IMB_VERSION_NUM >= IMB_VERSION(1, 3, 0)
+static_always_inline u32
+ipsecmb_ops_hmac_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops,
+ u32 block_size, u32 hash_size, u32 digest_size,
+ IMB_HASH_ALG alg)
+{
+ ipsecmb_main_t *imbm = &ipsecmb_main;
+ ipsecmb_per_thread_data_t *ptd =
+ vec_elt_at_index (imbm->per_thread_data, vm->thread_index);
+ IMB_JOB *job;
+ u32 i, n_fail = 0, ops_index = 0;
+ u8 scratch[n_ops][digest_size];
+ const u32 burst_sz =
+ (n_ops > IMB_MAX_BURST_SIZE) ? IMB_MAX_BURST_SIZE : n_ops;
+
+ while (n_ops)
+ {
+ const u32 n = (n_ops > burst_sz) ? burst_sz : n_ops;
+ /*
+ * configure all the jobs first ...
+ */
+ for (i = 0; i < n; i++, ops_index++)
+ {
+ vnet_crypto_op_t *op = ops[ops_index];
+ const u8 *kd = (u8 *) imbm->key_data[op->key_index];
+
+ job = &ptd->burst_jobs[i];
+
+ job->src = op->src;
+ job->hash_start_src_offset_in_bytes = 0;
+ job->msg_len_to_hash_in_bytes = op->len;
+ job->auth_tag_output_len_in_bytes = digest_size;
+ job->auth_tag_output = scratch[ops_index];
+
+ job->u.HMAC._hashed_auth_key_xor_ipad = kd;
+ job->u.HMAC._hashed_auth_key_xor_opad = kd + hash_size;
+ job->user_data = op;
+ }
+
+ /*
+ * submit all jobs to be processed and retire completed jobs
+ */
+ IMB_SUBMIT_HASH_BURST_NOCHECK (ptd->mgr, ptd->burst_jobs, n, alg);
+
+ for (i = 0; i < n; i++)
+ {
+ job = &ptd->burst_jobs[i];
+ ipsecmb_retire_hmac_job (job, &n_fail, digest_size);
+ }
+
+ n_ops -= n;
+ }
+
+ return ops_index - n_fail;
+}
+#else
static_always_inline u32
-ipsecmb_ops_hmac_inline (vlib_main_t * vm, vnet_crypto_op_t * ops[],
- u32 n_ops, u32 block_size, u32 hash_size,
- u32 digest_size, JOB_HASH_ALG alg)
+ipsecmb_ops_hmac_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops,
+ u32 block_size, u32 hash_size, u32 digest_size,
+ JOB_HASH_ALG alg)
{
ipsecmb_main_t *imbm = &ipsecmb_main;
- ipsecmb_per_thread_data_t *ptd = vec_elt_at_index (imbm->per_thread_data,
- vm->thread_index);
- JOB_AES_HMAC *job;
+ ipsecmb_per_thread_data_t *ptd =
+ vec_elt_at_index (imbm->per_thread_data, vm->thread_index);
+ IMB_JOB *job;
u32 i, n_fail = 0;
u8 scratch[n_ops][digest_size];
@@ -168,9 +227,9 @@ ipsecmb_ops_hmac_inline (vlib_main_t * vm, vnet_crypto_op_t * ops[],
job->auth_tag_output_len_in_bytes = digest_size;
job->auth_tag_output = scratch[i];
- job->cipher_mode = NULL_CIPHER;
- job->cipher_direction = DECRYPT;
- job->chain_order = HASH_CIPHER;
+ job->cipher_mode = IMB_CIPHER_NULL;
+ job->cipher_direction = IMB_DIR_DECRYPT;
+ job->chain_order = IMB_ORDER_HASH_CIPHER;
job->u.HMAC._hashed_auth_key_xor_ipad = kd;
job->u.HMAC._hashed_auth_key_xor_opad = kd + hash_size;
@@ -187,23 +246,27 @@ ipsecmb_ops_hmac_inline (vlib_main_t * vm, vnet_crypto_op_t * ops[],
return n_ops - n_fail;
}
+#endif
+/* clang-format off */
#define _(a, b, c, d, e, f) \
static_always_inline u32 \
ipsecmb_ops_hmac_##a (vlib_main_t * vm, \
vnet_crypto_op_t * ops[], \
u32 n_ops) \
-{ return ipsecmb_ops_hmac_inline (vm, ops, n_ops, d, e, f, b); } \
+{ return ipsecmb_ops_hmac_inline (vm, ops, n_ops, d, e, f, \
+ IMB_AUTH_HMAC_##b); } \
foreach_ipsecmb_hmac_op;
#undef _
+/* clang-format on */
always_inline void
-ipsecmb_retire_cipher_job (JOB_AES_HMAC * job, u32 * n_fail)
+ipsecmb_retire_cipher_job (IMB_JOB *job, u32 *n_fail)
{
vnet_crypto_op_t *op = job->user_data;
- if (PREDICT_FALSE (STS_COMPLETED != job->status))
+ if (PREDICT_FALSE (IMB_STATUS_COMPLETED != job->status))
{
op->status = ipsecmb_status_job (job->status);
*n_fail = *n_fail + 1;
@@ -212,6 +275,62 @@ ipsecmb_retire_cipher_job (JOB_AES_HMAC * job, u32 * n_fail)
op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
}
+#if IMB_VERSION_NUM >= IMB_VERSION(1, 3, 0)
+static_always_inline u32
+ipsecmb_ops_aes_cipher_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[],
+ u32 n_ops, u32 key_len,
+ IMB_CIPHER_DIRECTION direction,
+ IMB_CIPHER_MODE cipher_mode)
+{
+ ipsecmb_main_t *imbm = &ipsecmb_main;
+ ipsecmb_per_thread_data_t *ptd =
+ vec_elt_at_index (imbm->per_thread_data, vm->thread_index);
+ IMB_JOB *job;
+ u32 i, n_fail = 0, ops_index = 0;
+ const u32 burst_sz =
+ (n_ops > IMB_MAX_BURST_SIZE) ? IMB_MAX_BURST_SIZE : n_ops;
+
+ while (n_ops)
+ {
+ const u32 n = (n_ops > burst_sz) ? burst_sz : n_ops;
+
+ for (i = 0; i < n; i++)
+ {
+ ipsecmb_aes_key_data_t *kd;
+ vnet_crypto_op_t *op = ops[ops_index++];
+ kd = (ipsecmb_aes_key_data_t *) imbm->key_data[op->key_index];
+
+ job = &ptd->burst_jobs[i];
+
+ job->src = op->src;
+ job->dst = op->dst;
+ job->msg_len_to_cipher_in_bytes = op->len;
+ job->cipher_start_src_offset_in_bytes = 0;
+
+ job->hash_alg = IMB_AUTH_NULL;
+
+ job->enc_keys = kd->enc_key_exp;
+ job->dec_keys = kd->dec_key_exp;
+ job->iv = op->iv;
+ job->iv_len_in_bytes = IMB_AES_BLOCK_SIZE;
+
+ job->user_data = op;
+ }
+
+ IMB_SUBMIT_CIPHER_BURST_NOCHECK (ptd->mgr, ptd->burst_jobs, n,
+ cipher_mode, direction, key_len / 8);
+ for (i = 0; i < n; i++)
+ {
+ job = &ptd->burst_jobs[i];
+ ipsecmb_retire_cipher_job (job, &n_fail);
+ }
+
+ n_ops -= n;
+ }
+
+ return ops_index - n_fail;
+}
+#else
static_always_inline u32
ipsecmb_ops_aes_cipher_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[],
u32 n_ops, u32 key_len,
@@ -219,9 +338,9 @@ ipsecmb_ops_aes_cipher_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[],
JOB_CIPHER_MODE cipher_mode)
{
ipsecmb_main_t *imbm = &ipsecmb_main;
- ipsecmb_per_thread_data_t *ptd = vec_elt_at_index (imbm->per_thread_data,
- vm->thread_index);
- JOB_AES_HMAC *job;
+ ipsecmb_per_thread_data_t *ptd =
+ vec_elt_at_index (imbm->per_thread_data, vm->thread_index);
+ IMB_JOB *job;
u32 i, n_fail = 0;
for (i = 0; i < n_ops; i++)
@@ -229,7 +348,6 @@ ipsecmb_ops_aes_cipher_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[],
ipsecmb_aes_key_data_t *kd;
vnet_crypto_op_t *op = ops[i];
kd = (ipsecmb_aes_key_data_t *) imbm->key_data[op->key_index];
- __m128i iv;
job = IMB_GET_NEXT_JOB (ptd->mgr);
@@ -238,23 +356,18 @@ ipsecmb_ops_aes_cipher_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[],
job->msg_len_to_cipher_in_bytes = op->len;
job->cipher_start_src_offset_in_bytes = 0;
- job->hash_alg = NULL_HASH;
+ job->hash_alg = IMB_AUTH_NULL;
job->cipher_mode = cipher_mode;
job->cipher_direction = direction;
- job->chain_order = (direction == ENCRYPT ? CIPHER_HASH : HASH_CIPHER);
-
- if ((direction == ENCRYPT) && (op->flags & VNET_CRYPTO_OP_FLAG_INIT_IV))
- {
- iv = ptd->cbc_iv;
- _mm_storeu_si128 ((__m128i *) op->iv, iv);
- ptd->cbc_iv = _mm_aesenc_si128 (iv, iv);
- }
+ job->chain_order =
+ (direction == IMB_DIR_ENCRYPT ? IMB_ORDER_CIPHER_HASH :
+ IMB_ORDER_HASH_CIPHER);
job->aes_key_len_in_bytes = key_len / 8;
- job->aes_enc_key_expanded = kd->enc_key_exp;
- job->aes_dec_key_expanded = kd->dec_key_exp;
+ job->enc_keys = kd->enc_key_exp;
+ job->dec_keys = kd->dec_key_exp;
job->iv = op->iv;
- job->iv_len_in_bytes = AES_BLOCK_SIZE;
+ job->iv_len_in_bytes = IMB_AES_BLOCK_SIZE;
job->user_data = op;
@@ -269,18 +382,22 @@ ipsecmb_ops_aes_cipher_inline (vlib_main_t *vm, vnet_crypto_op_t *ops[],
return n_ops - n_fail;
}
+#endif
+/* clang-format off */
#define _(a, b, c) \
static_always_inline u32 ipsecmb_ops_cipher_enc_##a ( \
vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \
{ \
- return ipsecmb_ops_aes_cipher_inline (vm, ops, n_ops, b, ENCRYPT, c); \
+ return ipsecmb_ops_aes_cipher_inline ( \
+ vm, ops, n_ops, b, IMB_DIR_ENCRYPT, IMB_CIPHER_##c); \
} \
\
static_always_inline u32 ipsecmb_ops_cipher_dec_##a ( \
vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \
{ \
- return ipsecmb_ops_aes_cipher_inline (vm, ops, n_ops, b, DECRYPT, c); \
+ return ipsecmb_ops_aes_cipher_inline ( \
+ vm, ops, n_ops, b, IMB_DIR_DECRYPT, IMB_CIPHER_##c); \
}
foreach_ipsecmb_cipher_op;
@@ -294,7 +411,7 @@ ipsecmb_ops_gcm_cipher_enc_##a##_chained (vlib_main_t * vm, \
ipsecmb_main_t *imbm = &ipsecmb_main; \
ipsecmb_per_thread_data_t *ptd = vec_elt_at_index (imbm->per_thread_data, \
vm->thread_index); \
- MB_MGR *m = ptd->mgr; \
+ IMB_MGR *m = ptd->mgr; \
vnet_crypto_op_chunk_t *chp; \
u32 i, j; \
\
@@ -329,7 +446,7 @@ ipsecmb_ops_gcm_cipher_enc_##a (vlib_main_t * vm, vnet_crypto_op_t * ops[], \
ipsecmb_main_t *imbm = &ipsecmb_main; \
ipsecmb_per_thread_data_t *ptd = vec_elt_at_index (imbm->per_thread_data, \
vm->thread_index); \
- MB_MGR *m = ptd->mgr; \
+ IMB_MGR *m = ptd->mgr; \
u32 i; \
\
for (i = 0; i < n_ops; i++) \
@@ -355,7 +472,7 @@ ipsecmb_ops_gcm_cipher_dec_##a##_chained (vlib_main_t * vm, \
ipsecmb_main_t *imbm = &ipsecmb_main; \
ipsecmb_per_thread_data_t *ptd = vec_elt_at_index (imbm->per_thread_data, \
vm->thread_index); \
- MB_MGR *m = ptd->mgr; \
+ IMB_MGR *m = ptd->mgr; \
vnet_crypto_op_chunk_t *chp; \
u32 i, j, n_failed = 0; \
\
@@ -397,7 +514,7 @@ ipsecmb_ops_gcm_cipher_dec_##a (vlib_main_t * vm, vnet_crypto_op_t * ops[], \
ipsecmb_main_t *imbm = &ipsecmb_main; \
ipsecmb_per_thread_data_t *ptd = vec_elt_at_index (imbm->per_thread_data, \
vm->thread_index); \
- MB_MGR *m = ptd->mgr; \
+ IMB_MGR *m = ptd->mgr; \
u32 i, n_failed = 0; \
\
for (i = 0; i < n_ops; i++) \
@@ -422,17 +539,18 @@ ipsecmb_ops_gcm_cipher_dec_##a (vlib_main_t * vm, vnet_crypto_op_t * ops[], \
\
return n_ops - n_failed; \
}
-
+/* clang-format on */
foreach_ipsecmb_gcm_cipher_op;
#undef _
+#ifdef HAVE_IPSECMB_CHACHA_POLY
always_inline void
-ipsecmb_retire_aead_job (JOB_AES_HMAC *job, u32 *n_fail)
+ipsecmb_retire_aead_job (IMB_JOB *job, u32 *n_fail)
{
vnet_crypto_op_t *op = job->user_data;
u32 len = op->tag_len;
- if (PREDICT_FALSE (STS_COMPLETED != job->status))
+ if (PREDICT_FALSE (IMB_STATUS_COMPLETED != job->status))
{
op->status = ipsecmb_status_job (job->status);
*n_fail = *n_fail + 1;
@@ -462,16 +580,14 @@ ipsecmb_ops_chacha_poly (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops,
ipsecmb_per_thread_data_t *ptd =
vec_elt_at_index (imbm->per_thread_data, vm->thread_index);
struct IMB_JOB *job;
- MB_MGR *m = ptd->mgr;
+ IMB_MGR *m = ptd->mgr;
u32 i, n_fail = 0, last_key_index = ~0;
u8 scratch[VLIB_FRAME_SIZE][16];
- u8 iv_data[16];
u8 *key = 0;
for (i = 0; i < n_ops; i++)
{
vnet_crypto_op_t *op = ops[i];
- __m128i iv;
job = IMB_GET_NEXT_JOB (m);
if (last_key_index != op->key_index)
@@ -494,15 +610,6 @@ ipsecmb_ops_chacha_poly (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops,
job->src = op->src;
job->dst = op->dst;
- if ((dir == IMB_DIR_ENCRYPT) &&
- (op->flags & VNET_CRYPTO_OP_FLAG_INIT_IV))
- {
- iv = ptd->cbc_iv;
- _mm_storeu_si128 ((__m128i *) iv_data, iv);
- clib_memcpy_fast (op->iv, iv_data, 12);
- ptd->cbc_iv = _mm_aesenc_si128 (iv, iv);
- }
-
job->iv = op->iv;
job->iv_len_in_bytes = 12;
job->msg_len_to_cipher_in_bytes = job->msg_len_to_hash_in_bytes =
@@ -550,9 +657,8 @@ ipsecmb_ops_chacha_poly_chained (vlib_main_t *vm, vnet_crypto_op_t *ops[],
ipsecmb_main_t *imbm = &ipsecmb_main;
ipsecmb_per_thread_data_t *ptd =
vec_elt_at_index (imbm->per_thread_data, vm->thread_index);
- MB_MGR *m = ptd->mgr;
+ IMB_MGR *m = ptd->mgr;
u32 i, n_fail = 0, last_key_index = ~0;
- u8 iv_data[16];
u8 *key = 0;
if (dir == IMB_DIR_ENCRYPT)
@@ -562,7 +668,6 @@ ipsecmb_ops_chacha_poly_chained (vlib_main_t *vm, vnet_crypto_op_t *ops[],
vnet_crypto_op_t *op = ops[i];
struct chacha20_poly1305_context_data ctx;
vnet_crypto_op_chunk_t *chp;
- __m128i iv;
u32 j;
ASSERT (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS);
@@ -575,14 +680,6 @@ ipsecmb_ops_chacha_poly_chained (vlib_main_t *vm, vnet_crypto_op_t *ops[],
last_key_index = op->key_index;
}
- if (op->flags & VNET_CRYPTO_OP_FLAG_INIT_IV)
- {
- iv = ptd->cbc_iv;
- _mm_storeu_si128 ((__m128i *) iv_data, iv);
- clib_memcpy_fast (op->iv, iv_data, 12);
- ptd->cbc_iv = _mm_aesenc_si128 (iv, iv);
- }
-
IMB_CHACHA20_POLY1305_INIT (m, key, &ctx, op->iv, op->aad,
op->aad_len);
@@ -662,30 +759,7 @@ ipsec_mb_ops_chacha_poly_dec_chained (vlib_main_t *vm, vnet_crypto_op_t *ops[],
return ipsecmb_ops_chacha_poly_chained (vm, ops, chunks, n_ops,
IMB_DIR_DECRYPT);
}
-
-clib_error_t *
-crypto_ipsecmb_iv_init (ipsecmb_main_t * imbm)
-{
- ipsecmb_per_thread_data_t *ptd;
- clib_error_t *err = 0;
- int fd;
-
- if ((fd = open ("/dev/urandom", O_RDONLY)) < 0)
- return clib_error_return_unix (0, "failed to open '/dev/urandom'");
-
- vec_foreach (ptd, imbm->per_thread_data)
- {
- if (read (fd, &ptd->cbc_iv, sizeof (ptd->cbc_iv)) != sizeof (ptd->cbc_iv))
- {
- err = clib_error_return_unix (0, "'/dev/urandom' read failure");
- close (fd);
- return (err);
- }
- }
-
- close (fd);
- return (NULL);
-}
+#endif
static void
crypto_ipsecmb_key_handler (vlib_main_t * vm, vnet_crypto_key_op_t kop,
@@ -773,8 +847,7 @@ crypto_ipsecmb_init (vlib_main_t * vm)
ipsecmb_alg_data_t *ad;
ipsecmb_per_thread_data_t *ptd;
vlib_thread_main_t *tm = vlib_get_thread_main ();
- clib_error_t *error;
- MB_MGR *m = 0;
+ IMB_MGR *m = 0;
u32 eidx;
u8 *name;
@@ -791,13 +864,16 @@ crypto_ipsecmb_init (vlib_main_t * vm)
vec_validate_aligned (imbm->per_thread_data, tm->n_vlib_mains - 1,
CLIB_CACHE_LINE_BYTES);
- /* *INDENT-OFF* */
vec_foreach (ptd, imbm->per_thread_data)
{
ptd->mgr = alloc_mb_mgr (0);
- if (clib_cpu_supports_avx512f ())
+#if IMB_VERSION_NUM >= IMB_VERSION(1, 3, 0)
+ clib_memset_u8 (ptd->burst_jobs, 0,
+ sizeof (IMB_JOB) * IMB_MAX_BURST_SIZE);
+#endif
+ if (clib_cpu_supports_avx512f ())
init_mb_mgr_avx512 (ptd->mgr);
- else if (clib_cpu_supports_avx2 ())
+ else if (clib_cpu_supports_avx2 () && clib_cpu_supports_bmi2 ())
init_mb_mgr_avx2 (ptd->mgr);
else
init_mb_mgr_sse (ptd->mgr);
@@ -805,10 +881,6 @@ crypto_ipsecmb_init (vlib_main_t * vm)
if (ptd == imbm->per_thread_data)
m = ptd->mgr;
}
- /* *INDENT-ON* */
-
- if (clib_cpu_supports_x86_aes () && (error = crypto_ipsecmb_iv_init (imbm)))
- return (error);
#define _(a, b, c, d, e, f) \
vnet_crypto_register_ops_handler (vm, eidx, VNET_CRYPTO_OP_##a##_HMAC, \
@@ -850,6 +922,7 @@ crypto_ipsecmb_init (vlib_main_t * vm)
foreach_ipsecmb_gcm_cipher_op;
#undef _
+#ifdef HAVE_IPSECMB_CHACHA_POLY
vnet_crypto_register_ops_handler (vm, eidx,
VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC,
ipsecmb_ops_chacha_poly_enc);
@@ -864,25 +937,22 @@ crypto_ipsecmb_init (vlib_main_t * vm)
ipsec_mb_ops_chacha_poly_dec_chained);
ad = imbm->alg_data + VNET_CRYPTO_ALG_CHACHA20_POLY1305;
ad->data_size = 0;
+#endif
vnet_crypto_register_key_handler (vm, eidx, crypto_ipsecmb_key_handler);
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (crypto_ipsecmb_init) =
{
.runs_after = VLIB_INITS ("vnet_crypto_init"),
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Intel IPSEC Multi-buffer Crypto Engine",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/crypto_native/CMakeLists.txt b/src/plugins/crypto_native/CMakeLists.txt
index 688a8c95baf..5499ed4608a 100644
--- a/src/plugins/crypto_native/CMakeLists.txt
+++ b/src/plugins/crypto_native/CMakeLists.txt
@@ -12,24 +12,26 @@
# limitations under the License.
if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
- list(APPEND VARIANTS "slm\;-march=silvermont")
- list(APPEND VARIANTS "hsw\;-march=haswell")
+ list(APPEND VARIANTS "slm\;-march=silvermont -maes")
+ list(APPEND VARIANTS "hsw\;-march=haswell -maes")
if(compiler_flag_march_skylake_avx512 AND compiler_flag_mprefer_vector_width_256)
list(APPEND VARIANTS "skx\;-march=skylake-avx512 -mprefer-vector-width=256")
endif()
if(compiler_flag_march_icelake_client AND compiler_flag_mprefer_vector_width_512)
list(APPEND VARIANTS "icl\;-march=icelake-client -mprefer-vector-width=512")
endif()
- set (COMPILE_FILES aes_cbc.c aes_gcm.c)
- set (COMPILE_OPTS -Wall -fno-common -maes)
+ if(compiler_flag_march_alderlake)
+ list(APPEND VARIANTS "adl\;-march=alderlake -mprefer-vector-width=256")
+ endif()
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
list(APPEND VARIANTS "armv8\;-march=armv8.1-a+crc+crypto")
- set (COMPILE_FILES aes_cbc.c aes_gcm.c)
- set (COMPILE_OPTS -Wall -fno-common)
endif()
+set (COMPILE_FILES aes_cbc.c aes_gcm.c aes_ctr.c sha2.c)
+set (COMPILE_OPTS -Wall -fno-common)
+
if (NOT VARIANTS)
return()
endif()
diff --git a/src/plugins/crypto_native/FEATURE.yaml b/src/plugins/crypto_native/FEATURE.yaml
index 206caceb2d4..06f26d4a8cf 100644
--- a/src/plugins/crypto_native/FEATURE.yaml
+++ b/src/plugins/crypto_native/FEATURE.yaml
@@ -5,6 +5,6 @@ features:
- CBC(128, 192, 256)
- GCM(128, 192, 256)
-description: "An implentation of a native crypto-engine"
+description: "An implementation of a native crypto-engine"
state: production
properties: [API, CLI, MULTITHREAD]
diff --git a/src/plugins/crypto_native/aes_cbc.c b/src/plugins/crypto_native/aes_cbc.c
index c8ec37d152d..dd7ca3f1cf1 100644
--- a/src/plugins/crypto_native/aes_cbc.c
+++ b/src/plugins/crypto_native/aes_cbc.c
@@ -19,214 +19,30 @@
#include <vnet/plugin/plugin.h>
#include <vnet/crypto/crypto.h>
#include <crypto_native/crypto_native.h>
-#include <crypto_native/aes.h>
+#include <vppinfra/crypto/aes_cbc.h>
#if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0
#pragma GCC optimize ("O3")
#endif
-typedef struct
-{
- u8x16 encrypt_key[15];
-#if __VAES__
- u8x64 decrypt_key[15];
-#else
- u8x16 decrypt_key[15];
-#endif
-} aes_cbc_key_data_t;
-
-
-static_always_inline void __clib_unused
-aes_cbc_dec (u8x16 * k, u8x16u * src, u8x16u * dst, u8x16u * iv, int count,
- int rounds)
-{
- u8x16 r[4], c[4], f;
-
- f = iv[0];
- while (count >= 64)
- {
- clib_prefetch_load (src + 8);
- clib_prefetch_load (dst + 8);
-
- c[0] = r[0] = src[0];
- c[1] = r[1] = src[1];
- c[2] = r[2] = src[2];
- c[3] = r[3] = src[3];
-
-#if __x86_64__
- r[0] ^= k[0];
- r[1] ^= k[0];
- r[2] ^= k[0];
- r[3] ^= k[0];
-
- for (int i = 1; i < rounds; i++)
- {
- r[0] = aes_dec_round (r[0], k[i]);
- r[1] = aes_dec_round (r[1], k[i]);
- r[2] = aes_dec_round (r[2], k[i]);
- r[3] = aes_dec_round (r[3], k[i]);
- }
-
- r[0] = aes_dec_last_round (r[0], k[rounds]);
- r[1] = aes_dec_last_round (r[1], k[rounds]);
- r[2] = aes_dec_last_round (r[2], k[rounds]);
- r[3] = aes_dec_last_round (r[3], k[rounds]);
-#else
- for (int i = 0; i < rounds - 1; i++)
- {
- r[0] = vaesimcq_u8 (vaesdq_u8 (r[0], k[i]));
- r[1] = vaesimcq_u8 (vaesdq_u8 (r[1], k[i]));
- r[2] = vaesimcq_u8 (vaesdq_u8 (r[2], k[i]));
- r[3] = vaesimcq_u8 (vaesdq_u8 (r[3], k[i]));
- }
- r[0] = vaesdq_u8 (r[0], k[rounds - 1]) ^ k[rounds];
- r[1] = vaesdq_u8 (r[1], k[rounds - 1]) ^ k[rounds];
- r[2] = vaesdq_u8 (r[2], k[rounds - 1]) ^ k[rounds];
- r[3] = vaesdq_u8 (r[3], k[rounds - 1]) ^ k[rounds];
-#endif
- dst[0] = r[0] ^ f;
- dst[1] = r[1] ^ c[0];
- dst[2] = r[2] ^ c[1];
- dst[3] = r[3] ^ c[2];
- f = c[3];
-
- count -= 64;
- src += 4;
- dst += 4;
- }
-
- while (count > 0)
- {
- c[0] = r[0] = src[0];
-#if __x86_64__
- r[0] ^= k[0];
- for (int i = 1; i < rounds; i++)
- r[0] = aes_dec_round (r[0], k[i]);
- r[0] = aes_dec_last_round (r[0], k[rounds]);
-#else
- c[0] = r[0] = src[0];
- for (int i = 0; i < rounds - 1; i++)
- r[0] = vaesimcq_u8 (vaesdq_u8 (r[0], k[i]));
- r[0] = vaesdq_u8 (r[0], k[rounds - 1]) ^ k[rounds];
-#endif
- dst[0] = r[0] ^ f;
- f = c[0];
-
- count -= 16;
- src += 1;
- dst += 1;
- }
-}
-
-#if __x86_64__
-#ifdef __VAES__
-
-static_always_inline u8x64
-aes_block_load_x4 (u8 * src[], int i)
-{
- u8x64 r = { };
- r = u8x64_insert_u8x16 (r, aes_block_load (src[0] + i), 0);
- r = u8x64_insert_u8x16 (r, aes_block_load (src[1] + i), 1);
- r = u8x64_insert_u8x16 (r, aes_block_load (src[2] + i), 2);
- r = u8x64_insert_u8x16 (r, aes_block_load (src[3] + i), 3);
- return r;
-}
-
-static_always_inline void
-aes_block_store_x4 (u8 * dst[], int i, u8x64 r)
-{
- aes_block_store (dst[0] + i, u8x64_extract_u8x16 (r, 0));
- aes_block_store (dst[1] + i, u8x64_extract_u8x16 (r, 1));
- aes_block_store (dst[2] + i, u8x64_extract_u8x16 (r, 2));
- aes_block_store (dst[3] + i, u8x64_extract_u8x16 (r, 3));
-}
-
-static_always_inline u8x64
-aes_cbc_dec_permute (u8x64 a, u8x64 b)
-{
- __m512i perm = { 6, 7, 8, 9, 10, 11, 12, 13 };
- return (u8x64) _mm512_permutex2var_epi64 ((__m512i) a, perm, (__m512i) b);
-}
-
-static_always_inline void
-vaes_cbc_dec (u8x64 * k, u8x64u * src, u8x64u * dst, u8x16 * iv, int count,
- aes_key_size_t rounds)
-{
- u8x64 f, r[4], c[4] = { };
- __mmask8 m;
- int i, n_blocks = count >> 4;
-
- f = (u8x64) _mm512_mask_loadu_epi64 (_mm512_setzero_si512 (), 0xc0,
- (__m512i *) (iv - 3));
-
- while (n_blocks >= 16)
- {
- c[0] = src[0];
- c[1] = src[1];
- c[2] = src[2];
- c[3] = src[3];
-
- r[0] = c[0] ^ k[0];
- r[1] = c[1] ^ k[0];
- r[2] = c[2] ^ k[0];
- r[3] = c[3] ^ k[0];
-
- for (i = 1; i < rounds; i++)
- {
- r[0] = aes_dec_round_x4 (r[0], k[i]);
- r[1] = aes_dec_round_x4 (r[1], k[i]);
- r[2] = aes_dec_round_x4 (r[2], k[i]);
- r[3] = aes_dec_round_x4 (r[3], k[i]);
- }
-
- r[0] = aes_dec_last_round_x4 (r[0], k[i]);
- r[1] = aes_dec_last_round_x4 (r[1], k[i]);
- r[2] = aes_dec_last_round_x4 (r[2], k[i]);
- r[3] = aes_dec_last_round_x4 (r[3], k[i]);
-
- dst[0] = r[0] ^= aes_cbc_dec_permute (f, c[0]);
- dst[1] = r[1] ^= aes_cbc_dec_permute (c[0], c[1]);
- dst[2] = r[2] ^= aes_cbc_dec_permute (c[1], c[2]);
- dst[4] = r[3] ^= aes_cbc_dec_permute (c[2], c[3]);
- f = c[3];
-
- n_blocks -= 16;
- src += 4;
- dst += 4;
- }
-
- while (n_blocks > 0)
- {
- m = (1 << (n_blocks * 2)) - 1;
- c[0] = (u8x64) _mm512_mask_loadu_epi64 ((__m512i) c[0], m,
- (__m512i *) src);
- f = aes_cbc_dec_permute (f, c[0]);
- r[0] = c[0] ^ k[0];
- for (i = 1; i < rounds; i++)
- r[0] = aes_dec_round_x4 (r[0], k[i]);
- r[0] = aes_dec_last_round_x4 (r[0], k[i]);
- _mm512_mask_storeu_epi64 ((__m512i *) dst, m, (__m512i) (r[0] ^ f));
- f = c[0];
- n_blocks -= 4;
- src += 1;
- dst += 1;
- }
-}
-#endif
-#endif
-
-#ifdef __VAES__
-#define N 16
-#define u32xN u32x16
-#define u32xN_min_scalar u32x16_min_scalar
+#if defined(__VAES__) && defined(__AVX512F__)
+#define u8xN u8x64
+#define u32xN u32x16
+#define u32xN_min_scalar u32x16_min_scalar
#define u32xN_is_all_zero u32x16_is_all_zero
-#define u32xN_splat u32x16_splat
+#define u32xN_splat u32x16_splat
+#elif defined(__VAES__)
+#define u8xN u8x32
+#define u32xN u32x8
+#define u32xN_min_scalar u32x8_min_scalar
+#define u32xN_is_all_zero u32x8_is_all_zero
+#define u32xN_splat u32x8_splat
#else
-#define N 4
-#define u32xN u32x4
-#define u32xN_min_scalar u32x4_min_scalar
+#define u8xN u8x16
+#define u32xN u32x4
+#define u32xN_min_scalar u32x4_min_scalar
#define u32xN_is_all_zero u32x4_is_all_zero
-#define u32xN_splat u32x4_splat
+#define u32xN_splat u32x4_splat
#endif
static_always_inline u32
@@ -234,30 +50,22 @@ aes_ops_enc_aes_cbc (vlib_main_t * vm, vnet_crypto_op_t * ops[],
u32 n_ops, aes_key_size_t ks)
{
crypto_native_main_t *cm = &crypto_native_main;
- crypto_native_per_thread_data_t *ptd =
- vec_elt_at_index (cm->per_thread_data, vm->thread_index);
int rounds = AES_KEY_ROUNDS (ks);
u8 placeholder[8192];
u32 i, j, count, n_left = n_ops;
u32xN placeholder_mask = { };
u32xN len = { };
- vnet_crypto_key_index_t key_index[N];
- u8 *src[N] = { };
- u8 *dst[N] = { };
-#if __VAES__
- u8x64 r[N / 4] = { };
- u8x64 k[15][N / 4] = { };
- u8x16 *kq, *rq = (u8x16 *) r;
-#else
- u8x16 r[N] = { };
- u8x16 k[15][N] = { };
-#endif
+ vnet_crypto_key_index_t key_index[4 * N_AES_LANES];
+ u8 *src[4 * N_AES_LANES] = {};
+ u8 *dst[4 * N_AES_LANES] = {};
+ u8xN r[4] = {};
+ u8xN k[15][4] = {};
- for (i = 0; i < N; i++)
+ for (i = 0; i < 4 * N_AES_LANES; i++)
key_index[i] = ~0;
more:
- for (i = 0; i < N; i++)
+ for (i = 0; i < 4 * N_AES_LANES; i++)
if (len[i] == 0)
{
if (n_left == 0)
@@ -269,20 +77,8 @@ more:
}
else
{
- u8x16 t;
- if (ops[0]->flags & VNET_CRYPTO_OP_FLAG_INIT_IV)
- {
- t = ptd->cbc_iv[i];
- *(u8x16u *) ops[0]->iv = t;
- ptd->cbc_iv[i] = aes_enc_round (t, t);
- }
- else
- t = aes_block_load (ops[0]->iv);
-#if __VAES__
- rq[i] = t;
-#else
- r[i] = t;
-#endif
+ u8x16 t = aes_block_load (ops[0]->iv);
+ ((u8x16 *) r)[i] = t;
src[i] = ops[0]->src;
dst[i] = ops[0]->dst;
@@ -294,14 +90,7 @@ more:
key_index[i] = ops[0]->key_index;
kd = (aes_cbc_key_data_t *) cm->key_data[key_index[i]];
for (j = 0; j < rounds + 1; j++)
- {
-#if __VAES__
- kq = (u8x16 *) k[j];
- kq[i] = kd->encrypt_key[j];
-#else
- k[j][i] = kd->encrypt_key[j];
-#endif
- }
+ ((u8x16 *) k[j])[i] = kd->encrypt_key[j];
}
ops[0]->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
n_left--;
@@ -315,11 +104,11 @@ more:
for (i = 0; i < count; i += 16)
{
-#ifdef __VAES__
+#if defined(__VAES__) && defined(__AVX512F__)
r[0] = u8x64_xor3 (r[0], aes_block_load_x4 (src, i), k[0][0]);
- r[1] = u8x64_xor3 (r[1], aes_block_load_x4 (src, i), k[0][1]);
- r[2] = u8x64_xor3 (r[2], aes_block_load_x4 (src, i), k[0][2]);
- r[3] = u8x64_xor3 (r[3], aes_block_load_x4 (src, i), k[0][3]);
+ r[1] = u8x64_xor3 (r[1], aes_block_load_x4 (src + 4, i), k[0][1]);
+ r[2] = u8x64_xor3 (r[2], aes_block_load_x4 (src + 8, i), k[0][2]);
+ r[3] = u8x64_xor3 (r[3], aes_block_load_x4 (src + 12, i), k[0][3]);
for (j = 1; j < rounds; j++)
{
@@ -337,6 +126,28 @@ more:
aes_block_store_x4 (dst + 4, i, r[1]);
aes_block_store_x4 (dst + 8, i, r[2]);
aes_block_store_x4 (dst + 12, i, r[3]);
+#elif defined(__VAES__)
+ r[0] = u8x32_xor3 (r[0], aes_block_load_x2 (src, i), k[0][0]);
+ r[1] = u8x32_xor3 (r[1], aes_block_load_x2 (src + 2, i), k[0][1]);
+ r[2] = u8x32_xor3 (r[2], aes_block_load_x2 (src + 4, i), k[0][2]);
+ r[3] = u8x32_xor3 (r[3], aes_block_load_x2 (src + 6, i), k[0][3]);
+
+ for (j = 1; j < rounds; j++)
+ {
+ r[0] = aes_enc_round_x2 (r[0], k[j][0]);
+ r[1] = aes_enc_round_x2 (r[1], k[j][1]);
+ r[2] = aes_enc_round_x2 (r[2], k[j][2]);
+ r[3] = aes_enc_round_x2 (r[3], k[j][3]);
+ }
+ r[0] = aes_enc_last_round_x2 (r[0], k[j][0]);
+ r[1] = aes_enc_last_round_x2 (r[1], k[j][1]);
+ r[2] = aes_enc_last_round_x2 (r[2], k[j][2]);
+ r[3] = aes_enc_last_round_x2 (r[3], k[j][3]);
+
+ aes_block_store_x2 (dst, i, r[0]);
+ aes_block_store_x2 (dst + 2, i, r[1]);
+ aes_block_store_x2 (dst + 4, i, r[2]);
+ aes_block_store_x2 (dst + 6, i, r[3]);
#else
#if __x86_64__
r[0] = u8x16_xor3 (r[0], aes_block_load (src[0] + i), k[0][0]);
@@ -346,16 +157,16 @@ more:
for (j = 1; j < rounds; j++)
{
- r[0] = aes_enc_round (r[0], k[j][0]);
- r[1] = aes_enc_round (r[1], k[j][1]);
- r[2] = aes_enc_round (r[2], k[j][2]);
- r[3] = aes_enc_round (r[3], k[j][3]);
+ r[0] = aes_enc_round_x1 (r[0], k[j][0]);
+ r[1] = aes_enc_round_x1 (r[1], k[j][1]);
+ r[2] = aes_enc_round_x1 (r[2], k[j][2]);
+ r[3] = aes_enc_round_x1 (r[3], k[j][3]);
}
- r[0] = aes_enc_last_round (r[0], k[j][0]);
- r[1] = aes_enc_last_round (r[1], k[j][1]);
- r[2] = aes_enc_last_round (r[2], k[j][2]);
- r[3] = aes_enc_last_round (r[3], k[j][3]);
+ r[0] = aes_enc_last_round_x1 (r[0], k[j][0]);
+ r[1] = aes_enc_last_round_x1 (r[1], k[j][1]);
+ r[2] = aes_enc_last_round_x1 (r[2], k[j][2]);
+ r[3] = aes_enc_last_round_x1 (r[3], k[j][3]);
aes_block_store (dst[0] + i, r[0]);
aes_block_store (dst[1] + i, r[1]);
@@ -387,7 +198,7 @@ more:
len -= u32xN_splat (count);
- for (i = 0; i < N; i++)
+ for (i = 0; i < 4 * N_AES_LANES; i++)
{
src[i] += count;
dst[i] += count;
@@ -416,8 +227,11 @@ aes_ops_dec_aes_cbc (vlib_main_t * vm, vnet_crypto_op_t * ops[],
ASSERT (n_ops >= 1);
decrypt:
-#ifdef __VAES__
- vaes_cbc_dec (kd->decrypt_key, (u8x64u *) op->src, (u8x64u *) op->dst,
+#if defined(__VAES__) && defined(__AVX512F__)
+ aes4_cbc_dec (kd->decrypt_key, (u8x64u *) op->src, (u8x64u *) op->dst,
+ (u8x16u *) op->iv, op->len, rounds);
+#elif defined(__VAES__)
+ aes2_cbc_dec (kd->decrypt_key, (u8x32u *) op->src, (u8x32u *) op->dst,
(u8x16u *) op->iv, op->len, rounds);
#else
aes_cbc_dec (kd->decrypt_key, (u8x16u *) op->src, (u8x16u *) op->dst,
@@ -435,99 +249,91 @@ decrypt:
return n_ops;
}
-static_always_inline void *
-aes_cbc_key_exp (vnet_crypto_key_t * key, aes_key_size_t ks)
+static int
+aes_cbc_cpu_probe ()
+{
+#if defined(__VAES__) && defined(__AVX512F__)
+ if (clib_cpu_supports_vaes () && clib_cpu_supports_avx512f ())
+ return 50;
+#elif defined(__VAES__)
+ if (clib_cpu_supports_vaes ())
+ return 40;
+#elif defined(__AVX512F__)
+ if (clib_cpu_supports_avx512f ())
+ return 30;
+#elif defined(__AVX2__)
+ if (clib_cpu_supports_avx2 ())
+ return 20;
+#elif __AES__
+ if (clib_cpu_supports_aes ())
+ return 10;
+#elif __aarch64__
+ if (clib_cpu_supports_aarch64_aes ())
+ return 10;
+#endif
+ return -1;
+}
+
+static void *
+aes_cbc_key_exp_128 (vnet_crypto_key_t *key)
{
- u8x16 e[15], d[15];
aes_cbc_key_data_t *kd;
kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES);
- aes_key_expand (e, key->data, ks);
- aes_key_enc_to_dec (e, d, ks);
- for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
- {
-#if __VAES__
- kd->decrypt_key[i] = (u8x64) _mm512_broadcast_i64x2 ((__m128i) d[i]);
-#else
- kd->decrypt_key[i] = d[i];
-#endif
- kd->encrypt_key[i] = e[i];
- }
+ clib_aes128_cbc_key_expand (kd, key->data);
return kd;
}
-#define foreach_aes_cbc_handler_type _(128) _(192) _(256)
-
-#define _(x) \
-static u32 aes_ops_dec_aes_cbc_##x \
-(vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
-{ return aes_ops_dec_aes_cbc (vm, ops, n_ops, AES_KEY_##x); } \
-static u32 aes_ops_enc_aes_cbc_##x \
-(vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
-{ return aes_ops_enc_aes_cbc (vm, ops, n_ops, AES_KEY_##x); } \
-static void * aes_cbc_key_exp_##x (vnet_crypto_key_t *key) \
-{ return aes_cbc_key_exp (key, AES_KEY_##x); }
-
-foreach_aes_cbc_handler_type;
-#undef _
-
-#include <fcntl.h>
+static void *
+aes_cbc_key_exp_192 (vnet_crypto_key_t *key)
+{
+ aes_cbc_key_data_t *kd;
+ kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES);
+ clib_aes192_cbc_key_expand (kd, key->data);
+ return kd;
+}
-clib_error_t *
-#ifdef __VAES__
-crypto_native_aes_cbc_init_icl (vlib_main_t * vm)
-#elif __AVX512F__
-crypto_native_aes_cbc_init_skx (vlib_main_t * vm)
-#elif __aarch64__
-crypto_native_aes_cbc_init_neon (vlib_main_t * vm)
-#elif __AVX2__
-crypto_native_aes_cbc_init_hsw (vlib_main_t * vm)
-#else
-crypto_native_aes_cbc_init_slm (vlib_main_t * vm)
-#endif
+static void *
+aes_cbc_key_exp_256 (vnet_crypto_key_t *key)
{
- crypto_native_main_t *cm = &crypto_native_main;
- crypto_native_per_thread_data_t *ptd;
- clib_error_t *err = 0;
- int fd;
+ aes_cbc_key_data_t *kd;
+ kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES);
+ clib_aes256_cbc_key_expand (kd, key->data);
+ return kd;
+}
- if ((fd = open ("/dev/urandom", O_RDONLY)) < 0)
- return clib_error_return_unix (0, "failed to open '/dev/urandom'");
+#define foreach_aes_cbc_handler_type _ (128) _ (192) _ (256)
+
+#define _(x) \
+ static u32 aes_ops_enc_aes_cbc_##x (vlib_main_t *vm, \
+ vnet_crypto_op_t *ops[], u32 n_ops) \
+ { \
+ return aes_ops_enc_aes_cbc (vm, ops, n_ops, AES_KEY_##x); \
+ } \
+ \
+ CRYPTO_NATIVE_OP_HANDLER (aes_##x##_cbc_enc) = { \
+ .op_id = VNET_CRYPTO_OP_AES_##x##_CBC_ENC, \
+ .fn = aes_ops_enc_aes_cbc_##x, \
+ .probe = aes_cbc_cpu_probe, \
+ }; \
+ \
+ static u32 aes_ops_dec_aes_cbc_##x (vlib_main_t *vm, \
+ vnet_crypto_op_t *ops[], u32 n_ops) \
+ { \
+ return aes_ops_dec_aes_cbc (vm, ops, n_ops, AES_KEY_##x); \
+ } \
+ \
+ CRYPTO_NATIVE_OP_HANDLER (aes_##x##_cbc_dec) = { \
+ .op_id = VNET_CRYPTO_OP_AES_##x##_CBC_DEC, \
+ .fn = aes_ops_dec_aes_cbc_##x, \
+ .probe = aes_cbc_cpu_probe, \
+ }; \
+ \
+ CRYPTO_NATIVE_KEY_HANDLER (aes_##x##_cbc) = { \
+ .alg_id = VNET_CRYPTO_ALG_AES_##x##_CBC, \
+ .key_fn = aes_cbc_key_exp_##x, \
+ .probe = aes_cbc_cpu_probe, \
+ };
- /* *INDENT-OFF* */
- vec_foreach (ptd, cm->per_thread_data)
- {
- for (int i = 0; i < 4; i++)
- {
- if (read(fd, ptd->cbc_iv, sizeof (ptd->cbc_iv)) !=
- sizeof (ptd->cbc_iv))
- {
- err = clib_error_return_unix (0, "'/dev/urandom' read failure");
- goto error;
- }
- }
- }
- /* *INDENT-ON* */
-
-#define _(x) \
- vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
- VNET_CRYPTO_OP_AES_##x##_CBC_ENC, \
- aes_ops_enc_aes_cbc_##x); \
- vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
- VNET_CRYPTO_OP_AES_##x##_CBC_DEC, \
- aes_ops_dec_aes_cbc_##x); \
- cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_CBC] = aes_cbc_key_exp_##x;
- foreach_aes_cbc_handler_type;
+foreach_aes_cbc_handler_type;
#undef _
-error:
- close (fd);
- return err;
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/crypto_native/aes_ctr.c b/src/plugins/crypto_native/aes_ctr.c
new file mode 100644
index 00000000000..d02a7b69b9d
--- /dev/null
+++ b/src/plugins/crypto_native/aes_ctr.c
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2024 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/crypto/crypto.h>
+#include <crypto_native/crypto_native.h>
+#include <vppinfra/crypto/aes_ctr.h>
+
+#if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0
+#pragma GCC optimize("O3")
+#endif
+
+static_always_inline u32
+aes_ops_aes_ctr (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops,
+ vnet_crypto_op_chunk_t *chunks, aes_key_size_t ks,
+ int maybe_chained)
+{
+ crypto_native_main_t *cm = &crypto_native_main;
+ vnet_crypto_op_t *op = ops[0];
+ aes_ctr_key_data_t *kd;
+ aes_ctr_ctx_t ctx;
+ u32 n_left = n_ops;
+
+next:
+ kd = (aes_ctr_key_data_t *) cm->key_data[op->key_index];
+
+ clib_aes_ctr_init (&ctx, kd, op->iv, ks);
+ if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
+ {
+ vnet_crypto_op_chunk_t *chp = chunks + op->chunk_index;
+ for (int j = 0; j < op->n_chunks; j++, chp++)
+ clib_aes_ctr_transform (&ctx, chp->src, chp->dst, chp->len, ks);
+ }
+ else
+ clib_aes_ctr_transform (&ctx, op->src, op->dst, op->len, ks);
+
+ op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
+
+ if (--n_left)
+ {
+ op += 1;
+ goto next;
+ }
+
+ return n_ops;
+}
+
+static_always_inline void *
+aes_ctr_key_exp (vnet_crypto_key_t *key, aes_key_size_t ks)
+{
+ aes_ctr_key_data_t *kd;
+
+ kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES);
+
+ clib_aes_ctr_key_expand (kd, key->data, ks);
+
+ return kd;
+}
+
+#define foreach_aes_ctr_handler_type _ (128) _ (192) _ (256)
+
+#define _(x) \
+ static u32 aes_ops_aes_ctr_##x (vlib_main_t *vm, vnet_crypto_op_t *ops[], \
+ u32 n_ops) \
+ { \
+ return aes_ops_aes_ctr (vm, ops, n_ops, 0, AES_KEY_##x, 0); \
+ } \
+ static u32 aes_ops_aes_ctr_##x##_chained ( \
+ vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \
+ u32 n_ops) \
+ { \
+ return aes_ops_aes_ctr (vm, ops, n_ops, chunks, AES_KEY_##x, 1); \
+ } \
+ static void *aes_ctr_key_exp_##x (vnet_crypto_key_t *key) \
+ { \
+ return aes_ctr_key_exp (key, AES_KEY_##x); \
+ }
+
+foreach_aes_ctr_handler_type;
+#undef _
+
+static int
+probe ()
+{
+#if defined(__VAES__) && defined(__AVX512F__)
+ if (clib_cpu_supports_vaes () && clib_cpu_supports_avx512f ())
+ return 50;
+#elif defined(__VAES__)
+ if (clib_cpu_supports_vaes ())
+ return 40;
+#elif defined(__AVX512F__)
+ if (clib_cpu_supports_avx512f ())
+ return 30;
+#elif defined(__AVX2__)
+ if (clib_cpu_supports_avx2 ())
+ return 20;
+#elif __AES__
+ if (clib_cpu_supports_aes ())
+ return 10;
+#elif __aarch64__
+ if (clib_cpu_supports_aarch64_aes ())
+ return 10;
+#endif
+ return -1;
+}
+
+#define _(b) \
+ CRYPTO_NATIVE_OP_HANDLER (aes_##b##_ctr_enc) = { \
+ .op_id = VNET_CRYPTO_OP_AES_##b##_CTR_ENC, \
+ .fn = aes_ops_aes_ctr_##b, \
+ .cfn = aes_ops_aes_ctr_##b##_chained, \
+ .probe = probe, \
+ }; \
+ \
+ CRYPTO_NATIVE_OP_HANDLER (aes_##b##_ctr_dec) = { \
+ .op_id = VNET_CRYPTO_OP_AES_##b##_CTR_DEC, \
+ .fn = aes_ops_aes_ctr_##b, \
+ .cfn = aes_ops_aes_ctr_##b##_chained, \
+ .probe = probe, \
+ }; \
+ CRYPTO_NATIVE_KEY_HANDLER (aes_##b##_ctr) = { \
+ .alg_id = VNET_CRYPTO_ALG_AES_##b##_CTR, \
+ .key_fn = aes_ctr_key_exp_##b, \
+ .probe = probe, \
+ };
+
+_ (128) _ (192) _ (256)
+#undef _
diff --git a/src/plugins/crypto_native/aes_gcm.c b/src/plugins/crypto_native/aes_gcm.c
index e0c1e6c12c3..220788d4e97 100644
--- a/src/plugins/crypto_native/aes_gcm.c
+++ b/src/plugins/crypto_native/aes_gcm.c
@@ -19,1100 +19,26 @@
#include <vnet/plugin/plugin.h>
#include <vnet/crypto/crypto.h>
#include <crypto_native/crypto_native.h>
-#include <crypto_native/aes.h>
-#include <crypto_native/ghash.h>
+#include <vppinfra/crypto/aes_gcm.h>
-#if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0
-#pragma GCC optimize ("O3")
+#if __GNUC__ > 4 && !__clang__ && CLIB_DEBUG == 0
+#pragma GCC optimize("O3")
#endif
-#ifdef __VAES__
-#define NUM_HI 32
-#else
-#define NUM_HI 8
-#endif
-
-typedef struct
-{
- /* pre-calculated hash key values */
- const u8x16 Hi[NUM_HI];
- /* extracted AES key */
- const u8x16 Ke[15];
-#ifdef __VAES__
- const u8x64 Ke4[15];
-#endif
-} aes_gcm_key_data_t;
-
-typedef struct
-{
- u32 counter;
- union
- {
- u32x4 Y;
- u32x16 Y4;
- };
-} aes_gcm_counter_t;
-
-typedef enum
-{
- AES_GCM_F_WITH_GHASH = (1 << 0),
- AES_GCM_F_LAST_ROUND = (1 << 1),
- AES_GCM_F_ENCRYPT = (1 << 2),
- AES_GCM_F_DECRYPT = (1 << 3),
-} aes_gcm_flags_t;
-
-static const u32x4 ctr_inv_1 = { 0, 0, 0, 1 << 24 };
-
-#ifndef __VAES__
-static_always_inline void
-aes_gcm_enc_first_round (u8x16 * r, aes_gcm_counter_t * ctr, u8x16 k,
- int n_blocks)
-{
- if (PREDICT_TRUE ((u8) ctr->counter < (256 - 2 * n_blocks)))
- {
- for (int i = 0; i < n_blocks; i++)
- {
- r[i] = k ^ (u8x16) ctr->Y;
- ctr->Y += ctr_inv_1;
- }
- ctr->counter += n_blocks;
- }
- else
- {
- for (int i = 0; i < n_blocks; i++)
- {
- r[i] = k ^ (u8x16) ctr->Y;
- ctr->counter++;
- ctr->Y[3] = clib_host_to_net_u32 (ctr->counter + 1);
- }
- }
-}
-
-static_always_inline void
-aes_gcm_enc_round (u8x16 * r, u8x16 k, int n_blocks)
-{
- for (int i = 0; i < n_blocks; i++)
- r[i] = aes_enc_round (r[i], k);
-}
-
-static_always_inline void
-aes_gcm_enc_last_round (u8x16 * r, u8x16 * d, u8x16 const *k,
- int rounds, int n_blocks)
-{
-
- /* additional ronuds for AES-192 and AES-256 */
- for (int i = 10; i < rounds; i++)
- aes_gcm_enc_round (r, k[i], n_blocks);
-
- for (int i = 0; i < n_blocks; i++)
- d[i] ^= aes_enc_last_round (r[i], k[rounds]);
-}
-#endif
-
-static_always_inline u8x16
-aes_gcm_ghash_blocks (u8x16 T, aes_gcm_key_data_t * kd,
- u8x16u * in, int n_blocks)
-{
- ghash_data_t _gd, *gd = &_gd;
- u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - n_blocks;
- ghash_mul_first (gd, u8x16_reflect (in[0]) ^ T, Hi[0]);
- for (int i = 1; i < n_blocks; i++)
- ghash_mul_next (gd, u8x16_reflect ((in[i])), Hi[i]);
- ghash_reduce (gd);
- ghash_reduce2 (gd);
- return ghash_final (gd);
-}
-
-static_always_inline u8x16
-aes_gcm_ghash (u8x16 T, aes_gcm_key_data_t * kd, u8x16u * in, u32 n_left)
-{
-
- while (n_left >= 128)
- {
- T = aes_gcm_ghash_blocks (T, kd, in, 8);
- n_left -= 128;
- in += 8;
- }
-
- if (n_left >= 64)
- {
- T = aes_gcm_ghash_blocks (T, kd, in, 4);
- n_left -= 64;
- in += 4;
- }
-
- if (n_left >= 32)
- {
- T = aes_gcm_ghash_blocks (T, kd, in, 2);
- n_left -= 32;
- in += 2;
- }
-
- if (n_left >= 16)
- {
- T = aes_gcm_ghash_blocks (T, kd, in, 1);
- n_left -= 16;
- in += 1;
- }
-
- if (n_left)
- {
- u8x16 r = aes_load_partial (in, n_left);
- T = ghash_mul (u8x16_reflect (r) ^ T, kd->Hi[NUM_HI - 1]);
- }
- return T;
-}
-
-#ifndef __VAES__
-static_always_inline u8x16
-aes_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
- aes_gcm_counter_t * ctr, u8x16u * inv, u8x16u * outv,
- int rounds, int n, int last_block_bytes, aes_gcm_flags_t f)
-{
- u8x16 r[n];
- ghash_data_t _gd = { }, *gd = &_gd;
- const u8x16 *rk = (u8x16 *) kd->Ke;
- int ghash_blocks = (f & AES_GCM_F_ENCRYPT) ? 4 : n, gc = 1;
- u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - ghash_blocks;
-
- clib_prefetch_load (inv + 4);
-
- /* AES rounds 0 and 1 */
- aes_gcm_enc_first_round (r, ctr, rk[0], n);
- aes_gcm_enc_round (r, rk[1], n);
-
- /* load data - decrypt round */
- if (f & AES_GCM_F_DECRYPT)
- {
- for (int i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
- d[i] = inv[i];
-
- if (f & AES_GCM_F_LAST_ROUND)
- d[n - 1] = aes_load_partial (inv + n - 1, last_block_bytes);
- }
-
- /* GHASH multiply block 1 */
- if (f & AES_GCM_F_WITH_GHASH)
- ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]);
-
- /* AES rounds 2 and 3 */
- aes_gcm_enc_round (r, rk[2], n);
- aes_gcm_enc_round (r, rk[3], n);
-
- /* GHASH multiply block 2 */
- if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
- ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]);
-
- /* AES rounds 4 and 5 */
- aes_gcm_enc_round (r, rk[4], n);
- aes_gcm_enc_round (r, rk[5], n);
-
- /* GHASH multiply block 3 */
- if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
- ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]);
-
- /* AES rounds 6 and 7 */
- aes_gcm_enc_round (r, rk[6], n);
- aes_gcm_enc_round (r, rk[7], n);
-
- /* GHASH multiply block 4 */
- if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
- ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]);
-
- /* AES rounds 8 and 9 */
- aes_gcm_enc_round (r, rk[8], n);
- aes_gcm_enc_round (r, rk[9], n);
-
- /* GHASH reduce 1st step */
- if (f & AES_GCM_F_WITH_GHASH)
- ghash_reduce (gd);
-
- /* load data - encrypt round */
- if (f & AES_GCM_F_ENCRYPT)
- {
- for (int i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
- d[i] = inv[i];
-
- if (f & AES_GCM_F_LAST_ROUND)
- d[n - 1] = aes_load_partial (inv + n - 1, last_block_bytes);
- }
-
- /* GHASH reduce 2nd step */
- if (f & AES_GCM_F_WITH_GHASH)
- ghash_reduce2 (gd);
-
- /* AES last round(s) */
- aes_gcm_enc_last_round (r, d, rk, rounds, n);
-
- /* store data */
- for (int i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
- outv[i] = d[i];
-
- if (f & AES_GCM_F_LAST_ROUND)
- aes_store_partial (outv + n - 1, d[n - 1], last_block_bytes);
-
- /* GHASH final step */
- if (f & AES_GCM_F_WITH_GHASH)
- T = ghash_final (gd);
-
- return T;
-}
-
-static_always_inline u8x16
-aes_gcm_calc_double (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
- aes_gcm_counter_t * ctr, u8x16u * inv, u8x16u * outv,
- int rounds, aes_gcm_flags_t f)
-{
- u8x16 r[4];
- ghash_data_t _gd, *gd = &_gd;
- const u8x16 *rk = (u8x16 *) kd->Ke;
- u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - 8;
-
- /* AES rounds 0 and 1 */
- aes_gcm_enc_first_round (r, ctr, rk[0], 4);
- aes_gcm_enc_round (r, rk[1], 4);
-
- /* load 4 blocks of data - decrypt round */
- if (f & AES_GCM_F_DECRYPT)
- {
- d[0] = inv[0];
- d[1] = inv[1];
- d[2] = inv[2];
- d[3] = inv[3];
- }
-
- /* GHASH multiply block 0 */
- ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]);
-
- /* AES rounds 2 and 3 */
- aes_gcm_enc_round (r, rk[2], 4);
- aes_gcm_enc_round (r, rk[3], 4);
-
- /* GHASH multiply block 1 */
- ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]);
-
- /* AES rounds 4 and 5 */
- aes_gcm_enc_round (r, rk[4], 4);
- aes_gcm_enc_round (r, rk[5], 4);
-
- /* GHASH multiply block 2 */
- ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]);
-
- /* AES rounds 6 and 7 */
- aes_gcm_enc_round (r, rk[6], 4);
- aes_gcm_enc_round (r, rk[7], 4);
-
- /* GHASH multiply block 3 */
- ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]);
-
- /* AES rounds 8 and 9 */
- aes_gcm_enc_round (r, rk[8], 4);
- aes_gcm_enc_round (r, rk[9], 4);
-
- /* load 4 blocks of data - encrypt round */
- if (f & AES_GCM_F_ENCRYPT)
- {
- d[0] = inv[0];
- d[1] = inv[1];
- d[2] = inv[2];
- d[3] = inv[3];
- }
-
- /* AES last round(s) */
- aes_gcm_enc_last_round (r, d, rk, rounds, 4);
-
- /* store 4 blocks of data */
- outv[0] = d[0];
- outv[1] = d[1];
- outv[2] = d[2];
- outv[3] = d[3];
-
- /* load next 4 blocks of data data - decrypt round */
- if (f & AES_GCM_F_DECRYPT)
- {
- d[0] = inv[4];
- d[1] = inv[5];
- d[2] = inv[6];
- d[3] = inv[7];
- }
-
- /* GHASH multiply block 4 */
- ghash_mul_next (gd, u8x16_reflect (d[0]), Hi[4]);
-
- /* AES rounds 0, 1 and 2 */
- aes_gcm_enc_first_round (r, ctr, rk[0], 4);
- aes_gcm_enc_round (r, rk[1], 4);
- aes_gcm_enc_round (r, rk[2], 4);
-
- /* GHASH multiply block 5 */
- ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[5]);
-
- /* AES rounds 3 and 4 */
- aes_gcm_enc_round (r, rk[3], 4);
- aes_gcm_enc_round (r, rk[4], 4);
-
- /* GHASH multiply block 6 */
- ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[6]);
-
- /* AES rounds 5 and 6 */
- aes_gcm_enc_round (r, rk[5], 4);
- aes_gcm_enc_round (r, rk[6], 4);
-
- /* GHASH multiply block 7 */
- ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[7]);
-
- /* AES rounds 7 and 8 */
- aes_gcm_enc_round (r, rk[7], 4);
- aes_gcm_enc_round (r, rk[8], 4);
-
- /* GHASH reduce 1st step */
- ghash_reduce (gd);
-
- /* AES round 9 */
- aes_gcm_enc_round (r, rk[9], 4);
-
- /* load data - encrypt round */
- if (f & AES_GCM_F_ENCRYPT)
- {
- d[0] = inv[4];
- d[1] = inv[5];
- d[2] = inv[6];
- d[3] = inv[7];
- }
-
- /* GHASH reduce 2nd step */
- ghash_reduce2 (gd);
-
- /* AES last round(s) */
- aes_gcm_enc_last_round (r, d, rk, rounds, 4);
-
- /* store data */
- outv[4] = d[0];
- outv[5] = d[1];
- outv[6] = d[2];
- outv[7] = d[3];
-
- /* GHASH final step */
- return ghash_final (gd);
-}
-
-static_always_inline u8x16
-aes_gcm_ghash_last (u8x16 T, aes_gcm_key_data_t * kd, u8x16 * d,
- int n_blocks, int n_bytes)
-{
- ghash_data_t _gd, *gd = &_gd;
- u8x16 *Hi = (u8x16 *) kd->Hi + NUM_HI - n_blocks;
-
- if (n_bytes)
- d[n_blocks - 1] = aes_byte_mask (d[n_blocks - 1], n_bytes);
-
- ghash_mul_first (gd, u8x16_reflect (d[0]) ^ T, Hi[0]);
- if (n_blocks > 1)
- ghash_mul_next (gd, u8x16_reflect (d[1]), Hi[1]);
- if (n_blocks > 2)
- ghash_mul_next (gd, u8x16_reflect (d[2]), Hi[2]);
- if (n_blocks > 3)
- ghash_mul_next (gd, u8x16_reflect (d[3]), Hi[3]);
- ghash_reduce (gd);
- ghash_reduce2 (gd);
- return ghash_final (gd);
-}
-#endif
-
-#ifdef __VAES__
-static const u32x16 ctr_inv_1234 = {
- 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24, 0, 0, 0, 3 << 24, 0, 0, 0, 4 << 24,
-};
-
-static const u32x16 ctr_inv_4444 = {
- 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24
-};
-
-static const u32x16 ctr_1234 = {
- 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0,
-};
-
-static_always_inline void
-aes4_gcm_enc_first_round (u8x64 * r, aes_gcm_counter_t * ctr, u8x64 k, int n)
-{
- u8 last_byte = (u8) ctr->counter;
- int i = 0;
-
- /* As counter is stored in network byte order for performance reasons we
- are incrementing least significant byte only except in case where we
- overlow. As we are processing four 512-blocks in parallel except the
- last round, overflow can happen only when n == 4 */
-
- if (n == 4)
- for (; i < 2; i++)
- {
- r[i] = k ^ (u8x64) ctr->Y4;
- ctr->Y4 += ctr_inv_4444;
- }
-
- if (n == 4 && PREDICT_TRUE (last_byte == 241))
- {
- u32x16 Yc, Yr = (u32x16) u8x64_reflect_u8x16 ((u8x64) ctr->Y4);
-
- for (; i < n; i++)
- {
- r[i] = k ^ (u8x64) ctr->Y4;
- Yc = u32x16_splat (ctr->counter + 4 * (i + 1)) + ctr_1234;
- Yr = (u32x16) u32x16_mask_blend (Yr, Yc, 0x1111);
- ctr->Y4 = (u32x16) u8x64_reflect_u8x16 ((u8x64) Yr);
- }
- }
- else
- {
- for (; i < n; i++)
- {
- r[i] = k ^ (u8x64) ctr->Y4;
- ctr->Y4 += ctr_inv_4444;
- }
- }
- ctr->counter += n * 4;
-}
-
-static_always_inline void
-aes4_gcm_enc_round (u8x64 * r, u8x64 k, int n_blocks)
-{
- for (int i = 0; i < n_blocks; i++)
- r[i] = aes_enc_round_x4 (r[i], k);
-}
-
-static_always_inline void
-aes4_gcm_enc_last_round (u8x64 * r, u8x64 * d, u8x64 const *k,
- int rounds, int n_blocks)
-{
-
- /* additional ronuds for AES-192 and AES-256 */
- for (int i = 10; i < rounds; i++)
- aes4_gcm_enc_round (r, k[i], n_blocks);
-
- for (int i = 0; i < n_blocks; i++)
- d[i] ^= aes_enc_last_round_x4 (r[i], k[rounds]);
-}
-
-static_always_inline u8x16
-aes4_gcm_calc (u8x16 T, aes_gcm_key_data_t * kd, u8x64 * d,
- aes_gcm_counter_t * ctr, u8x16u * in, u8x16u * out,
- int rounds, int n, int last_4block_bytes, aes_gcm_flags_t f)
-{
- ghash4_data_t _gd, *gd = &_gd;
- const u8x64 *rk = (u8x64 *) kd->Ke4;
- int i, ghash_blocks, gc = 1;
- u8x64u *Hi4, *inv = (u8x64u *) in, *outv = (u8x64u *) out;
- u8x64 r[4];
- u64 byte_mask = _bextr_u64 (-1LL, 0, last_4block_bytes);
-
- if (f & AES_GCM_F_ENCRYPT)
- {
- /* during encryption we either hash four 512-bit blocks from previous
- round or we don't hash at all */
- ghash_blocks = 4;
- Hi4 = (u8x64u *) (kd->Hi + NUM_HI - ghash_blocks * 4);
- }
- else
- {
- /* during deccryption we hash 1..4 512-bit blocks from current round */
- ghash_blocks = n;
- int n_128bit_blocks = n * 4;
- /* if this is last round of decryption, we may have less than 4
- 128-bit blocks in the last 512-bit data block, so we need to adjust
- Hi4 pointer accordingly */
- if (f & AES_GCM_F_LAST_ROUND)
- n_128bit_blocks += ((last_4block_bytes + 15) >> 4) - 4;
- Hi4 = (u8x64u *) (kd->Hi + NUM_HI - n_128bit_blocks);
- }
-
- /* AES rounds 0 and 1 */
- aes4_gcm_enc_first_round (r, ctr, rk[0], n);
- aes4_gcm_enc_round (r, rk[1], n);
-
- /* load 4 blocks of data - decrypt round */
- if (f & AES_GCM_F_DECRYPT)
- {
- for (i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
- d[i] = inv[i];
-
- if (f & AES_GCM_F_LAST_ROUND)
- d[i] = u8x64_mask_load (u8x64_splat (0), inv + i, byte_mask);
- }
-
- /* GHASH multiply block 0 */
- if (f & AES_GCM_F_WITH_GHASH)
- ghash4_mul_first (gd, u8x64_reflect_u8x16 (d[0]) ^
- u8x64_insert_u8x16 (u8x64_splat (0), T, 0), Hi4[0]);
-
- /* AES rounds 2 and 3 */
- aes4_gcm_enc_round (r, rk[2], n);
- aes4_gcm_enc_round (r, rk[3], n);
-
- /* GHASH multiply block 1 */
- if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[1]);
-
- /* AES rounds 4 and 5 */
- aes4_gcm_enc_round (r, rk[4], n);
- aes4_gcm_enc_round (r, rk[5], n);
-
- /* GHASH multiply block 2 */
- if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[2]);
-
- /* AES rounds 6 and 7 */
- aes4_gcm_enc_round (r, rk[6], n);
- aes4_gcm_enc_round (r, rk[7], n);
-
- /* GHASH multiply block 3 */
- if ((f & AES_GCM_F_WITH_GHASH) && gc++ < ghash_blocks)
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[3]);
-
- /* load 4 blocks of data - decrypt round */
- if (f & AES_GCM_F_ENCRYPT)
- {
- for (i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
- d[i] = inv[i];
-
- if (f & AES_GCM_F_LAST_ROUND)
- d[i] = u8x64_mask_load (u8x64_splat (0), inv + i, byte_mask);
- }
-
- /* AES rounds 8 and 9 */
- aes4_gcm_enc_round (r, rk[8], n);
- aes4_gcm_enc_round (r, rk[9], n);
-
- /* AES last round(s) */
- aes4_gcm_enc_last_round (r, d, rk, rounds, n);
-
- /* store 4 blocks of data */
- for (i = 0; i < n - ((f & AES_GCM_F_LAST_ROUND) != 0); i++)
- outv[i] = d[i];
-
- if (f & AES_GCM_F_LAST_ROUND)
- u8x64_mask_store (d[i], outv + i, byte_mask);
-
- /* GHASH reduce 1st step */
- ghash4_reduce (gd);
-
- /* GHASH reduce 2nd step */
- ghash4_reduce2 (gd);
-
- /* GHASH final step */
- return ghash4_final (gd);
-}
-
-static_always_inline u8x16
-aes4_gcm_calc_double (u8x16 T, aes_gcm_key_data_t * kd, u8x64 * d,
- aes_gcm_counter_t * ctr, u8x16u * in, u8x16u * out,
- int rounds, aes_gcm_flags_t f)
-{
- u8x64 r[4];
- ghash4_data_t _gd, *gd = &_gd;
- const u8x64 *rk = (u8x64 *) kd->Ke4;
- u8x64 *Hi4 = (u8x64 *) (kd->Hi + NUM_HI - 32);
- u8x64u *inv = (u8x64u *) in, *outv = (u8x64u *) out;
-
- /* AES rounds 0 and 1 */
- aes4_gcm_enc_first_round (r, ctr, rk[0], 4);
- aes4_gcm_enc_round (r, rk[1], 4);
-
- /* load 4 blocks of data - decrypt round */
- if (f & AES_GCM_F_DECRYPT)
- for (int i = 0; i < 4; i++)
- d[i] = inv[i];
-
- /* GHASH multiply block 0 */
- ghash4_mul_first (gd, u8x64_reflect_u8x16 (d[0]) ^
- u8x64_insert_u8x16 (u8x64_splat (0), T, 0), Hi4[0]);
-
- /* AES rounds 2 and 3 */
- aes4_gcm_enc_round (r, rk[2], 4);
- aes4_gcm_enc_round (r, rk[3], 4);
-
- /* GHASH multiply block 1 */
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[1]);
-
- /* AES rounds 4 and 5 */
- aes4_gcm_enc_round (r, rk[4], 4);
- aes4_gcm_enc_round (r, rk[5], 4);
-
- /* GHASH multiply block 2 */
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[2]);
-
- /* AES rounds 6 and 7 */
- aes4_gcm_enc_round (r, rk[6], 4);
- aes4_gcm_enc_round (r, rk[7], 4);
-
- /* GHASH multiply block 3 */
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[3]);
-
- /* AES rounds 8 and 9 */
- aes4_gcm_enc_round (r, rk[8], 4);
- aes4_gcm_enc_round (r, rk[9], 4);
-
- /* load 4 blocks of data - encrypt round */
- if (f & AES_GCM_F_ENCRYPT)
- for (int i = 0; i < 4; i++)
- d[i] = inv[i];
-
- /* AES last round(s) */
- aes4_gcm_enc_last_round (r, d, rk, rounds, 4);
-
- /* store 4 blocks of data */
- for (int i = 0; i < 4; i++)
- outv[i] = d[i];
-
- /* load 4 blocks of data - decrypt round */
- if (f & AES_GCM_F_DECRYPT)
- for (int i = 0; i < 4; i++)
- d[i] = inv[i + 4];
-
- /* GHASH multiply block 3 */
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[0]), Hi4[4]);
-
- /* AES rounds 0 and 1 */
- aes4_gcm_enc_first_round (r, ctr, rk[0], 4);
- aes4_gcm_enc_round (r, rk[1], 4);
-
- /* GHASH multiply block 5 */
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[5]);
-
- /* AES rounds 2 and 3 */
- aes4_gcm_enc_round (r, rk[2], 4);
- aes4_gcm_enc_round (r, rk[3], 4);
-
- /* GHASH multiply block 6 */
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[6]);
-
- /* AES rounds 4 and 5 */
- aes4_gcm_enc_round (r, rk[4], 4);
- aes4_gcm_enc_round (r, rk[5], 4);
-
- /* GHASH multiply block 7 */
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[7]);
-
- /* AES rounds 6 and 7 */
- aes4_gcm_enc_round (r, rk[6], 4);
- aes4_gcm_enc_round (r, rk[7], 4);
-
- /* GHASH reduce 1st step */
- ghash4_reduce (gd);
-
- /* AES rounds 8 and 9 */
- aes4_gcm_enc_round (r, rk[8], 4);
- aes4_gcm_enc_round (r, rk[9], 4);
-
- /* GHASH reduce 2nd step */
- ghash4_reduce2 (gd);
-
- /* load 4 blocks of data - encrypt round */
- if (f & AES_GCM_F_ENCRYPT)
- for (int i = 0; i < 4; i++)
- d[i] = inv[i + 4];
-
- /* AES last round(s) */
- aes4_gcm_enc_last_round (r, d, rk, rounds, 4);
-
- /* store 4 blocks of data */
- for (int i = 0; i < 4; i++)
- outv[i + 4] = d[i];
-
- /* GHASH final step */
- return ghash4_final (gd);
-}
-
-static_always_inline u8x16
-aes4_gcm_ghash_last (u8x16 T, aes_gcm_key_data_t * kd, u8x64 * d,
- int n, int last_4block_bytes)
-{
- ghash4_data_t _gd, *gd = &_gd;
- u8x64u *Hi4;
- int n_128bit_blocks;
- u64 byte_mask = _bextr_u64 (-1LL, 0, last_4block_bytes);
- n_128bit_blocks = (n - 1) * 4 + ((last_4block_bytes + 15) >> 4);
- Hi4 = (u8x64u *) (kd->Hi + NUM_HI - n_128bit_blocks);
-
- d[n - 1] = u8x64_mask_blend (u8x64_splat (0), d[n - 1], byte_mask);
- ghash4_mul_first (gd, u8x64_reflect_u8x16 (d[0]) ^
- u8x64_insert_u8x16 (u8x64_splat (0), T, 0), Hi4[0]);
- if (n > 1)
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[1]), Hi4[1]);
- if (n > 2)
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[2]), Hi4[2]);
- if (n > 3)
- ghash4_mul_next (gd, u8x64_reflect_u8x16 (d[3]), Hi4[3]);
- ghash4_reduce (gd);
- ghash4_reduce2 (gd);
- return ghash4_final (gd);
-}
-#endif
-
-static_always_inline u8x16
-aes_gcm_enc (u8x16 T, aes_gcm_key_data_t * kd, aes_gcm_counter_t * ctr,
- u8x16u * inv, u8x16u * outv, u32 n_left, int rounds)
-{
- aes_gcm_flags_t f = AES_GCM_F_ENCRYPT;
-
- if (n_left == 0)
- return T;
-
-#if __VAES__
- u8x64 d4[4];
- if (n_left < 256)
- {
- f |= AES_GCM_F_LAST_ROUND;
- if (n_left > 192)
- {
- n_left -= 192;
- aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, n_left, f);
- return aes4_gcm_ghash_last (T, kd, d4, 4, n_left);
- }
- else if (n_left > 128)
- {
- n_left -= 128;
- aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3, n_left, f);
- return aes4_gcm_ghash_last (T, kd, d4, 3, n_left);
- }
- else if (n_left > 64)
- {
- n_left -= 64;
- aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2, n_left, f);
- return aes4_gcm_ghash_last (T, kd, d4, 2, n_left);
- }
- else
- {
- aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
- return aes4_gcm_ghash_last (T, kd, d4, 1, n_left);
- }
- }
-
- aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);
-
- /* next */
- n_left -= 256;
- outv += 16;
- inv += 16;
-
- f |= AES_GCM_F_WITH_GHASH;
-
- while (n_left >= 512)
- {
- T = aes4_gcm_calc_double (T, kd, d4, ctr, inv, outv, rounds, f);
-
- /* next */
- n_left -= 512;
- outv += 32;
- inv += 32;
- }
-
- while (n_left >= 256)
- {
- T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);
-
- /* next */
- n_left -= 256;
- outv += 16;
- inv += 16;
- }
-
- if (n_left == 0)
- return aes4_gcm_ghash_last (T, kd, d4, 4, 64);
-
- f |= AES_GCM_F_LAST_ROUND;
-
- if (n_left > 192)
- {
- n_left -= 192;
- T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, n_left, f);
- return aes4_gcm_ghash_last (T, kd, d4, 4, n_left);
- }
-
- if (n_left > 128)
- {
- n_left -= 128;
- T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3, n_left, f);
- return aes4_gcm_ghash_last (T, kd, d4, 3, n_left);
- }
-
- if (n_left > 64)
- {
- n_left -= 64;
- T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2, n_left, f);
- return aes4_gcm_ghash_last (T, kd, d4, 2, n_left);
- }
-
- T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
- return aes4_gcm_ghash_last (T, kd, d4, 1, n_left);
-#else
- u8x16 d[4];
- if (n_left < 64)
- {
- f |= AES_GCM_F_LAST_ROUND;
- if (n_left > 48)
- {
- n_left -= 48;
- aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left, f);
- return aes_gcm_ghash_last (T, kd, d, 4, n_left);
- }
- else if (n_left > 32)
- {
- n_left -= 32;
- aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left, f);
- return aes_gcm_ghash_last (T, kd, d, 3, n_left);
- }
- else if (n_left > 16)
- {
- n_left -= 16;
- aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left, f);
- return aes_gcm_ghash_last (T, kd, d, 2, n_left);
- }
- else
- {
- aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
- return aes_gcm_ghash_last (T, kd, d, 1, n_left);
- }
- }
-
- aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);
-
- /* next */
- n_left -= 64;
- outv += 4;
- inv += 4;
-
- f |= AES_GCM_F_WITH_GHASH;
-
- while (n_left >= 128)
- {
- T = aes_gcm_calc_double (T, kd, d, ctr, inv, outv, rounds, f);
-
- /* next */
- n_left -= 128;
- outv += 8;
- inv += 8;
- }
-
- if (n_left >= 64)
- {
- T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);
-
- /* next */
- n_left -= 64;
- outv += 4;
- inv += 4;
- }
-
- if (n_left == 0)
- return aes_gcm_ghash_last (T, kd, d, 4, 0);
-
- f |= AES_GCM_F_LAST_ROUND;
-
- if (n_left > 48)
- {
- n_left -= 48;
- T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left, f);
- return aes_gcm_ghash_last (T, kd, d, 4, n_left);
- }
-
- if (n_left > 32)
- {
- n_left -= 32;
- T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left, f);
- return aes_gcm_ghash_last (T, kd, d, 3, n_left);
- }
-
- if (n_left > 16)
- {
- n_left -= 16;
- T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left, f);
- return aes_gcm_ghash_last (T, kd, d, 2, n_left);
- }
-
- T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
- return aes_gcm_ghash_last (T, kd, d, 1, n_left);
-#endif
-}
-
-static_always_inline u8x16
-aes_gcm_dec (u8x16 T, aes_gcm_key_data_t * kd, aes_gcm_counter_t * ctr,
- u8x16u * inv, u8x16u * outv, u32 n_left, int rounds)
-{
- aes_gcm_flags_t f = AES_GCM_F_WITH_GHASH | AES_GCM_F_DECRYPT;
-#ifdef __VAES__
- u8x64 d4[4] = { };
-
- while (n_left >= 512)
- {
- T = aes4_gcm_calc_double (T, kd, d4, ctr, inv, outv, rounds, f);
-
- /* next */
- n_left -= 512;
- outv += 32;
- inv += 32;
- }
-
- while (n_left >= 256)
- {
- T = aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4, 0, f);
-
- /* next */
- n_left -= 256;
- outv += 16;
- inv += 16;
- }
-
- if (n_left == 0)
- return T;
-
- f |= AES_GCM_F_LAST_ROUND;
-
- if (n_left > 192)
- return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 4,
- n_left - 192, f);
- if (n_left > 128)
- return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 3,
- n_left - 128, f);
- if (n_left > 64)
- return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 2,
- n_left - 64, f);
- return aes4_gcm_calc (T, kd, d4, ctr, inv, outv, rounds, 1, n_left, f);
-#else
- u8x16 d[4];
- while (n_left >= 128)
- {
- T = aes_gcm_calc_double (T, kd, d, ctr, inv, outv, rounds, f);
-
- /* next */
- n_left -= 128;
- outv += 8;
- inv += 8;
- }
-
- if (n_left >= 64)
- {
- T = aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, 0, f);
-
- /* next */
- n_left -= 64;
- outv += 4;
- inv += 4;
- }
-
- if (n_left == 0)
- return T;
-
- f |= AES_GCM_F_LAST_ROUND;
-
- if (n_left > 48)
- return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 4, n_left - 48, f);
-
- if (n_left > 32)
- return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 3, n_left - 32, f);
-
- if (n_left > 16)
- return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 2, n_left - 16, f);
-
- return aes_gcm_calc (T, kd, d, ctr, inv, outv, rounds, 1, n_left, f);
-#endif
-}
-
-static_always_inline int
-aes_gcm (u8x16u * in, u8x16u * out, u8x16u * addt, u8x16u * iv, u8x16u * tag,
- u32 data_bytes, u32 aad_bytes, u8 tag_len, aes_gcm_key_data_t * kd,
- int aes_rounds, int is_encrypt)
-{
- int i;
- u8x16 r, T = { };
- u32x4 Y0;
- ghash_data_t _gd, *gd = &_gd;
- aes_gcm_counter_t _ctr, *ctr = &_ctr;
-
- clib_prefetch_load (iv);
- clib_prefetch_load (in);
- clib_prefetch_load (in + 4);
-
- /* calculate ghash for AAD - optimized for ipsec common cases */
- if (aad_bytes == 8)
- T = aes_gcm_ghash (T, kd, addt, 8);
- else if (aad_bytes == 12)
- T = aes_gcm_ghash (T, kd, addt, 12);
- else
- T = aes_gcm_ghash (T, kd, addt, aad_bytes);
-
- /* initalize counter */
- ctr->counter = 1;
- Y0 = (u32x4) aes_load_partial (iv, 12) + ctr_inv_1;
-#ifdef __VAES__
- ctr->Y4 = u32x16_splat_u32x4 (Y0) + ctr_inv_1234;
-#else
- ctr->Y = Y0 + ctr_inv_1;
-#endif
-
- /* ghash and encrypt/edcrypt */
- if (is_encrypt)
- T = aes_gcm_enc (T, kd, ctr, in, out, data_bytes, aes_rounds);
- else
- T = aes_gcm_dec (T, kd, ctr, in, out, data_bytes, aes_rounds);
-
- clib_prefetch_load (tag);
-
- /* Finalize ghash - data bytes and aad bytes converted to bits */
- /* *INDENT-OFF* */
- r = (u8x16) ((u64x2) {data_bytes, aad_bytes} << 3);
- /* *INDENT-ON* */
-
- /* interleaved computation of final ghash and E(Y0, k) */
- ghash_mul_first (gd, r ^ T, kd->Hi[NUM_HI - 1]);
- r = kd->Ke[0] ^ (u8x16) Y0;
- for (i = 1; i < 5; i += 1)
- r = aes_enc_round (r, kd->Ke[i]);
- ghash_reduce (gd);
- ghash_reduce2 (gd);
- for (; i < 9; i += 1)
- r = aes_enc_round (r, kd->Ke[i]);
- T = ghash_final (gd);
- for (; i < aes_rounds; i += 1)
- r = aes_enc_round (r, kd->Ke[i]);
- r = aes_enc_last_round (r, kd->Ke[aes_rounds]);
- T = u8x16_reflect (T) ^ r;
-
- /* tag_len 16 -> 0 */
- tag_len &= 0xf;
-
- if (is_encrypt)
- {
- /* store tag */
- if (tag_len)
- aes_store_partial (tag, T, tag_len);
- else
- tag[0] = T;
- }
- else
- {
- /* check tag */
- u16 tag_mask = tag_len ? (1 << tag_len) - 1 : 0xffff;
- if ((u8x16_msb_mask (tag[0] == T) & tag_mask) != tag_mask)
- return 0;
- }
- return 1;
-}
-
static_always_inline u32
-aes_ops_enc_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[],
- u32 n_ops, aes_key_size_t ks)
+aes_ops_enc_aes_gcm (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops,
+ aes_key_size_t ks)
{
crypto_native_main_t *cm = &crypto_native_main;
vnet_crypto_op_t *op = ops[0];
aes_gcm_key_data_t *kd;
u32 n_left = n_ops;
-
next:
kd = (aes_gcm_key_data_t *) cm->key_data[op->key_index];
- aes_gcm ((u8x16u *) op->src, (u8x16u *) op->dst, (u8x16u *) op->aad,
- (u8x16u *) op->iv, (u8x16u *) op->tag, op->len, op->aad_len,
- op->tag_len, kd, AES_KEY_ROUNDS (ks), /* is_encrypt */ 1);
+ aes_gcm (op->src, op->dst, op->aad, (u8 *) op->iv, op->tag, op->len,
+ op->aad_len, op->tag_len, kd, AES_KEY_ROUNDS (ks),
+ AES_GCM_OP_ENCRYPT);
op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
if (--n_left)
@@ -1125,7 +51,7 @@ next:
}
static_always_inline u32
-aes_ops_dec_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops,
+aes_ops_dec_aes_gcm (vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops,
aes_key_size_t ks)
{
crypto_native_main_t *cm = &crypto_native_main;
@@ -1136,10 +62,9 @@ aes_ops_dec_aes_gcm (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops,
next:
kd = (aes_gcm_key_data_t *) cm->key_data[op->key_index];
- rv = aes_gcm ((u8x16u *) op->src, (u8x16u *) op->dst, (u8x16u *) op->aad,
- (u8x16u *) op->iv, (u8x16u *) op->tag, op->len,
+ rv = aes_gcm (op->src, op->dst, op->aad, (u8 *) op->iv, op->tag, op->len,
op->aad_len, op->tag_len, kd, AES_KEY_ROUNDS (ks),
- /* is_encrypt */ 0);
+ AES_GCM_OP_DECRYPT);
if (rv)
{
@@ -1161,75 +86,81 @@ next:
}
static_always_inline void *
-aes_gcm_key_exp (vnet_crypto_key_t * key, aes_key_size_t ks)
+aes_gcm_key_exp (vnet_crypto_key_t *key, aes_key_size_t ks)
{
aes_gcm_key_data_t *kd;
- u8x16 H;
kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES);
- /* expand AES key */
- aes_key_expand ((u8x16 *) kd->Ke, key->data, ks);
+ clib_aes_gcm_key_expand (kd, key->data, ks);
- /* pre-calculate H */
- H = aes_encrypt_block (u8x16_splat (0), kd->Ke, ks);
- H = u8x16_reflect (H);
- ghash_precompute (H, (u8x16 *) kd->Hi, NUM_HI);
-#ifdef __VAES__
- u8x64 *Ke4 = (u8x64 *) kd->Ke4;
- for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
- Ke4[i] = u8x64_splat_u8x16 (kd->Ke[i]);
-#endif
return kd;
}
-#define foreach_aes_gcm_handler_type _(128) _(192) _(256)
-
-#define _(x) \
-static u32 aes_ops_dec_aes_gcm_##x \
-(vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
-{ return aes_ops_dec_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \
-static u32 aes_ops_enc_aes_gcm_##x \
-(vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
-{ return aes_ops_enc_aes_gcm (vm, ops, n_ops, AES_KEY_##x); } \
-static void * aes_gcm_key_exp_##x (vnet_crypto_key_t *key) \
-{ return aes_gcm_key_exp (key, AES_KEY_##x); }
+#define foreach_aes_gcm_handler_type _ (128) _ (192) _ (256)
+
+#define _(x) \
+ static u32 aes_ops_dec_aes_gcm_##x (vlib_main_t *vm, \
+ vnet_crypto_op_t *ops[], u32 n_ops) \
+ { \
+ return aes_ops_dec_aes_gcm (vm, ops, n_ops, AES_KEY_##x); \
+ } \
+ static u32 aes_ops_enc_aes_gcm_##x (vlib_main_t *vm, \
+ vnet_crypto_op_t *ops[], u32 n_ops) \
+ { \
+ return aes_ops_enc_aes_gcm (vm, ops, n_ops, AES_KEY_##x); \
+ } \
+ static void *aes_gcm_key_exp_##x (vnet_crypto_key_t *key) \
+ { \
+ return aes_gcm_key_exp (key, AES_KEY_##x); \
+ }
foreach_aes_gcm_handler_type;
#undef _
-clib_error_t *
-#ifdef __VAES__
-crypto_native_aes_gcm_init_icl (vlib_main_t * vm)
-#elif __AVX512F__
-crypto_native_aes_gcm_init_skx (vlib_main_t * vm)
-#elif __AVX2__
-crypto_native_aes_gcm_init_hsw (vlib_main_t * vm)
+static int
+probe ()
+{
+#if defined(__VAES__) && defined(__AVX512F__)
+ if (clib_cpu_supports_vpclmulqdq () && clib_cpu_supports_vaes () &&
+ clib_cpu_supports_avx512f ())
+ return 50;
+#elif defined(__VAES__)
+ if (clib_cpu_supports_vpclmulqdq () && clib_cpu_supports_vaes ())
+ return 40;
+#elif defined(__AVX512F__)
+ if (clib_cpu_supports_pclmulqdq () && clib_cpu_supports_avx512f ())
+ return 30;
+#elif defined(__AVX2__)
+ if (clib_cpu_supports_pclmulqdq () && clib_cpu_supports_avx2 ())
+ return 20;
+#elif __AES__
+ if (clib_cpu_supports_pclmulqdq () && clib_cpu_supports_aes ())
+ return 10;
#elif __aarch64__
-crypto_native_aes_gcm_init_neon (vlib_main_t * vm)
-#else
-crypto_native_aes_gcm_init_slm (vlib_main_t * vm)
+ if (clib_cpu_supports_aarch64_aes ())
+ return 10;
#endif
-{
- crypto_native_main_t *cm = &crypto_native_main;
+ return -1;
+}
+
+#define _(b) \
+ CRYPTO_NATIVE_OP_HANDLER (aes_##b##_gcm_enc) = { \
+ .op_id = VNET_CRYPTO_OP_AES_##b##_GCM_ENC, \
+ .fn = aes_ops_enc_aes_gcm_##b, \
+ .probe = probe, \
+ }; \
+ \
+ CRYPTO_NATIVE_OP_HANDLER (aes_##b##_gcm_dec) = { \
+ .op_id = VNET_CRYPTO_OP_AES_##b##_GCM_DEC, \
+ .fn = aes_ops_dec_aes_gcm_##b, \
+ .probe = probe, \
+ }; \
+ CRYPTO_NATIVE_KEY_HANDLER (aes_##b##_gcm) = { \
+ .alg_id = VNET_CRYPTO_ALG_AES_##b##_GCM, \
+ .key_fn = aes_gcm_key_exp_##b, \
+ .probe = probe, \
+ };
-#define _(x) \
- vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
- VNET_CRYPTO_OP_AES_##x##_GCM_ENC, \
- aes_ops_enc_aes_gcm_##x); \
- vnet_crypto_register_ops_handler (vm, cm->crypto_engine_index, \
- VNET_CRYPTO_OP_AES_##x##_GCM_DEC, \
- aes_ops_dec_aes_gcm_##x); \
- cm->key_fn[VNET_CRYPTO_ALG_AES_##x##_GCM] = aes_gcm_key_exp_##x;
- foreach_aes_gcm_handler_type;
+_ (128) _ (192) _ (256)
#undef _
- return 0;
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/crypto_native/crypto_native.h b/src/plugins/crypto_native/crypto_native.h
index d5c33daa1a6..3d18e8cabd0 100644
--- a/src/plugins/crypto_native/crypto_native.h
+++ b/src/plugins/crypto_native/crypto_native.h
@@ -19,38 +19,66 @@
#define __crypto_native_h__
typedef void *(crypto_native_key_fn_t) (vnet_crypto_key_t * key);
+typedef int (crypto_native_variant_probe_t) ();
-typedef struct
+typedef struct crypto_native_op_handler
+{
+ struct crypto_native_op_handler *next;
+ vnet_crypto_op_id_t op_id;
+ vnet_crypto_ops_handler_t *fn;
+ vnet_crypto_chained_ops_handler_t *cfn;
+ crypto_native_variant_probe_t *probe;
+ int priority;
+} crypto_native_op_handler_t;
+
+typedef struct crypto_native_key_handler
{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- u8x16 cbc_iv[16];
-} crypto_native_per_thread_data_t;
+ struct crypto_native_key_handler *next;
+ vnet_crypto_alg_t alg_id;
+ crypto_native_key_fn_t *key_fn;
+ crypto_native_variant_probe_t *probe;
+ int priority;
+} crypto_native_key_handler_t;
typedef struct
{
u32 crypto_engine_index;
- crypto_native_per_thread_data_t *per_thread_data;
crypto_native_key_fn_t *key_fn[VNET_CRYPTO_N_ALGS];
void **key_data;
+ crypto_native_op_handler_t *op_handlers;
+ crypto_native_key_handler_t *key_handlers;
} crypto_native_main_t;
extern crypto_native_main_t crypto_native_main;
-#define foreach_crypto_native_march_variant _(slm) _(hsw) _(skx) _(icl) _(neon)
-
-#define _(v) \
-clib_error_t __clib_weak *crypto_native_aes_cbc_init_##v (vlib_main_t * vm); \
-clib_error_t __clib_weak *crypto_native_aes_gcm_init_##v (vlib_main_t * vm); \
-
-foreach_crypto_native_march_variant;
-#undef _
+#define CRYPTO_NATIVE_OP_HANDLER(x) \
+ static crypto_native_op_handler_t __crypto_native_op_handler_##x; \
+ static void __clib_constructor __crypto_native_op_handler_cb_##x (void) \
+ { \
+ crypto_native_main_t *cm = &crypto_native_main; \
+ int priority = __crypto_native_op_handler_##x.probe (); \
+ if (priority >= 0) \
+ { \
+ __crypto_native_op_handler_##x.priority = priority; \
+ __crypto_native_op_handler_##x.next = cm->op_handlers; \
+ cm->op_handlers = &__crypto_native_op_handler_##x; \
+ } \
+ } \
+ static crypto_native_op_handler_t __crypto_native_op_handler_##x
+#define CRYPTO_NATIVE_KEY_HANDLER(x) \
+ static crypto_native_key_handler_t __crypto_native_key_handler_##x; \
+ static void __clib_constructor __crypto_native_key_handler_cb_##x (void) \
+ { \
+ crypto_native_main_t *cm = &crypto_native_main; \
+ int priority = __crypto_native_key_handler_##x.probe (); \
+ if (priority >= 0) \
+ { \
+ __crypto_native_key_handler_##x.priority = priority; \
+ __crypto_native_key_handler_##x.next = cm->key_handlers; \
+ cm->key_handlers = &__crypto_native_key_handler_##x; \
+ } \
+ } \
+ static crypto_native_key_handler_t __crypto_native_key_handler_##x
#endif /* __crypto_native_h__ */
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/crypto_native/main.c b/src/plugins/crypto_native/main.c
index 32bbbb13652..2bc0d98f196 100644
--- a/src/plugins/crypto_native/main.c
+++ b/src/plugins/crypto_native/main.c
@@ -63,100 +63,66 @@ clib_error_t *
crypto_native_init (vlib_main_t * vm)
{
crypto_native_main_t *cm = &crypto_native_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- clib_error_t *error = 0;
- if (clib_cpu_supports_x86_aes () == 0 &&
- clib_cpu_supports_aarch64_aes () == 0)
+ if (cm->op_handlers == 0)
return 0;
- vec_validate_aligned (cm->per_thread_data, tm->n_vlib_mains - 1,
- CLIB_CACHE_LINE_BYTES);
-
cm->crypto_engine_index =
vnet_crypto_register_engine (vm, "native", 100,
"Native ISA Optimized Crypto");
- if (0);
-#if __x86_64__
- else if (crypto_native_aes_cbc_init_icl && clib_cpu_supports_vaes ())
- error = crypto_native_aes_cbc_init_icl (vm);
- else if (crypto_native_aes_cbc_init_skx && clib_cpu_supports_avx512f ())
- error = crypto_native_aes_cbc_init_skx (vm);
- else if (crypto_native_aes_cbc_init_hsw && clib_cpu_supports_avx2 ())
- error = crypto_native_aes_cbc_init_hsw (vm);
- else if (crypto_native_aes_cbc_init_slm)
- error = crypto_native_aes_cbc_init_slm (vm);
-#endif
-#if __aarch64__
- else if (crypto_native_aes_cbc_init_neon)
- error = crypto_native_aes_cbc_init_neon (vm);
-#endif
- else
- error = clib_error_return (0, "No AES CBC implemenation available");
-
- if (error)
- goto error;
-
-#if __x86_64__
- if (clib_cpu_supports_pclmulqdq ())
+ crypto_native_op_handler_t *oh = cm->op_handlers;
+ crypto_native_key_handler_t *kh = cm->key_handlers;
+ crypto_native_op_handler_t **best_by_op_id = 0;
+ crypto_native_key_handler_t **best_by_alg_id = 0;
+
+ while (oh)
{
- if (crypto_native_aes_gcm_init_icl && clib_cpu_supports_vaes ())
- error = crypto_native_aes_gcm_init_icl (vm);
- else if (crypto_native_aes_gcm_init_skx && clib_cpu_supports_avx512f ())
- error = crypto_native_aes_gcm_init_skx (vm);
- else if (crypto_native_aes_gcm_init_hsw && clib_cpu_supports_avx2 ())
- error = crypto_native_aes_gcm_init_hsw (vm);
- else if (crypto_native_aes_gcm_init_slm)
- error = crypto_native_aes_gcm_init_slm (vm);
- else
- error = clib_error_return (0, "No AES GCM implemenation available");
-
- if (error)
- goto error;
+ vec_validate (best_by_op_id, oh->op_id);
+
+ if (best_by_op_id[oh->op_id] == 0 ||
+ best_by_op_id[oh->op_id]->priority < oh->priority)
+ best_by_op_id[oh->op_id] = oh;
+
+ oh = oh->next;
}
-#endif
-#if __aarch64__
- if (crypto_native_aes_gcm_init_neon)
- error = crypto_native_aes_gcm_init_neon (vm);
- else
- error = clib_error_return (0, "No AES GCM implemenation available");
- if (error)
- goto error;
-#endif
+ while (kh)
+ {
+ vec_validate (best_by_alg_id, kh->alg_id);
- vnet_crypto_register_key_handler (vm, cm->crypto_engine_index,
- crypto_native_key_handler);
+ if (best_by_alg_id[kh->alg_id] == 0 ||
+ best_by_alg_id[kh->alg_id]->priority < kh->priority)
+ best_by_alg_id[kh->alg_id] = kh;
+
+ kh = kh->next;
+ }
+
+ vec_foreach_pointer (oh, best_by_op_id)
+ if (oh)
+ vnet_crypto_register_ops_handlers (vm, cm->crypto_engine_index,
+ oh->op_id, oh->fn, oh->cfn);
+ vec_foreach_pointer (kh, best_by_alg_id)
+ if (kh)
+ cm->key_fn[kh->alg_id] = kh->key_fn;
-error:
- if (error)
- vec_free (cm->per_thread_data);
+ vec_free (best_by_op_id);
+ vec_free (best_by_alg_id);
- return error;
+ vnet_crypto_register_key_handler (vm, cm->crypto_engine_index,
+ crypto_native_key_handler);
+ return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (crypto_native_init) =
{
.runs_after = VLIB_INITS ("vnet_crypto_init"),
};
-/* *INDENT-ON* */
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
- .description = "Intel IA32 Software Crypto Engine",
+ .description = "Native Crypto Engine",
};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/crypto_native/sha2.c b/src/plugins/crypto_native/sha2.c
new file mode 100644
index 00000000000..459ce6d8e79
--- /dev/null
+++ b/src/plugins/crypto_native/sha2.c
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2024 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/crypto/crypto.h>
+#include <crypto_native/crypto_native.h>
+#include <vppinfra/crypto/sha2.h>
+
+static_always_inline u32
+crypto_native_ops_hash_sha2 (vlib_main_t *vm, vnet_crypto_op_t *ops[],
+ u32 n_ops, vnet_crypto_op_chunk_t *chunks,
+ clib_sha2_type_t type, int maybe_chained)
+{
+ vnet_crypto_op_t *op = ops[0];
+ clib_sha2_ctx_t ctx;
+ u32 n_left = n_ops;
+
+next:
+ if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
+ {
+ vnet_crypto_op_chunk_t *chp = chunks + op->chunk_index;
+ clib_sha2_init (&ctx, type);
+ for (int j = 0; j < op->n_chunks; j++, chp++)
+ clib_sha2_update (&ctx, chp->src, chp->len);
+ clib_sha2_final (&ctx, op->digest);
+ }
+ else
+ clib_sha2 (type, op->src, op->len, op->digest);
+
+ op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
+
+ if (--n_left)
+ {
+ op += 1;
+ goto next;
+ }
+
+ return n_ops;
+}
+
+static_always_inline u32
+crypto_native_ops_hmac_sha2 (vlib_main_t *vm, vnet_crypto_op_t *ops[],
+ u32 n_ops, vnet_crypto_op_chunk_t *chunks,
+ clib_sha2_type_t type)
+{
+ crypto_native_main_t *cm = &crypto_native_main;
+ vnet_crypto_op_t *op = ops[0];
+ u32 n_left = n_ops;
+ clib_sha2_hmac_ctx_t ctx;
+ u8 buffer[64];
+ u32 sz, n_fail = 0;
+
+ for (; n_left; n_left--, op++)
+ {
+ clib_sha2_hmac_init (
+ &ctx, type, (clib_sha2_hmac_key_data_t *) cm->key_data[op->key_index]);
+ if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
+ {
+ vnet_crypto_op_chunk_t *chp = chunks + op->chunk_index;
+ for (int j = 0; j < op->n_chunks; j++, chp++)
+ clib_sha2_hmac_update (&ctx, chp->src, chp->len);
+ }
+ else
+ clib_sha2_hmac_update (&ctx, op->src, op->len);
+
+ clib_sha2_hmac_final (&ctx, buffer);
+
+ if (op->digest_len)
+ {
+ sz = op->digest_len;
+ if (op->flags & VNET_CRYPTO_OP_FLAG_HMAC_CHECK)
+ {
+ if ((memcmp (op->digest, buffer, sz)))
+ {
+ n_fail++;
+ op->status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC;
+ continue;
+ }
+ }
+ else
+ clib_memcpy_fast (op->digest, buffer, sz);
+ }
+ else
+ {
+ sz = clib_sha2_variants[type].digest_size;
+ if (op->flags & VNET_CRYPTO_OP_FLAG_HMAC_CHECK)
+ {
+ if ((memcmp (op->digest, buffer, sz)))
+ {
+ n_fail++;
+ op->status = VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC;
+ continue;
+ }
+ }
+ else
+ clib_memcpy_fast (op->digest, buffer, sz);
+ }
+
+ op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
+ }
+
+ return n_ops - n_fail;
+}
+
+static void *
+sha2_key_add (vnet_crypto_key_t *key, clib_sha2_type_t type)
+{
+ clib_sha2_hmac_key_data_t *kd;
+
+ kd = clib_mem_alloc_aligned (sizeof (*kd), CLIB_CACHE_LINE_BYTES);
+ clib_sha2_hmac_key_data (type, key->data, vec_len (key->data), kd);
+
+ return kd;
+}
+
+static int
+probe ()
+{
+#if defined(__SHA__) && defined(__x86_64__)
+ if (clib_cpu_supports_sha ())
+ return 50;
+#elif defined(__ARM_FEATURE_SHA2)
+ if (clib_cpu_supports_sha2 ())
+ return 10;
+#endif
+ return -1;
+}
+
+#define _(b) \
+ static u32 crypto_native_ops_hash_sha##b ( \
+ vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \
+ { \
+ return crypto_native_ops_hash_sha2 (vm, ops, n_ops, 0, CLIB_SHA2_##b, 0); \
+ } \
+ \
+ static u32 crypto_native_ops_chained_hash_sha##b ( \
+ vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \
+ u32 n_ops) \
+ { \
+ return crypto_native_ops_hash_sha2 (vm, ops, n_ops, chunks, \
+ CLIB_SHA2_##b, 1); \
+ } \
+ \
+ static u32 crypto_native_ops_hmac_sha##b ( \
+ vlib_main_t *vm, vnet_crypto_op_t *ops[], u32 n_ops) \
+ { \
+ return crypto_native_ops_hmac_sha2 (vm, ops, n_ops, 0, CLIB_SHA2_##b); \
+ } \
+ \
+ static u32 crypto_native_ops_chained_hmac_sha##b ( \
+ vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \
+ u32 n_ops) \
+ { \
+ return crypto_native_ops_hmac_sha2 (vm, ops, n_ops, chunks, \
+ CLIB_SHA2_##b); \
+ } \
+ \
+ static void *sha2_##b##_key_add (vnet_crypto_key_t *k) \
+ { \
+ return sha2_key_add (k, CLIB_SHA2_##b); \
+ } \
+ \
+ CRYPTO_NATIVE_OP_HANDLER (crypto_native_hash_sha##b) = { \
+ .op_id = VNET_CRYPTO_OP_SHA##b##_HASH, \
+ .fn = crypto_native_ops_hash_sha##b, \
+ .cfn = crypto_native_ops_chained_hash_sha##b, \
+ .probe = probe, \
+ }; \
+ CRYPTO_NATIVE_OP_HANDLER (crypto_native_hmac_sha##b) = { \
+ .op_id = VNET_CRYPTO_OP_SHA##b##_HMAC, \
+ .fn = crypto_native_ops_hmac_sha##b, \
+ .cfn = crypto_native_ops_chained_hmac_sha##b, \
+ .probe = probe, \
+ }; \
+ CRYPTO_NATIVE_KEY_HANDLER (crypto_native_hmac_sha##b) = { \
+ .alg_id = VNET_CRYPTO_ALG_HMAC_SHA##b, \
+ .key_fn = sha2_##b##_key_add, \
+ .probe = probe, \
+ };
+
+_ (224)
+_ (256)
+
+#undef _
diff --git a/src/plugins/crypto_openssl/CMakeLists.txt b/src/plugins/crypto_openssl/CMakeLists.txt
index d014144eca8..472b0ef3243 100644
--- a/src/plugins/crypto_openssl/CMakeLists.txt
+++ b/src/plugins/crypto_openssl/CMakeLists.txt
@@ -16,11 +16,12 @@ if(NOT OPENSSL_FOUND)
endif()
include_directories(${OPENSSL_INCLUDE_DIR})
+add_compile_definitions(OPENSSL_SUPPRESS_DEPRECATED)
add_vpp_plugin(crypto_openssl
SOURCES
main.c
LINK_LIBRARIES
- ${OPENSSL_LIBRARIES}
+ ${OPENSSL_CRYPTO_LIBRARIES}
)
diff --git a/src/plugins/crypto_openssl/crypto_openssl.h b/src/plugins/crypto_openssl/crypto_openssl.h
new file mode 100644
index 00000000000..e16429fb5dd
--- /dev/null
+++ b/src/plugins/crypto_openssl/crypto_openssl.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 ARM Ltd and/or its affiliates.
+ */
+
+#ifndef __crypto_openssl_h__
+#define __crypto_openssl_h__
+
+typedef void *(crypto_openssl_ctx_fn_t) (vnet_crypto_key_t *key,
+ vnet_crypto_key_op_t kop,
+ vnet_crypto_key_index_t idx);
+
+typedef struct
+{
+ u32 crypto_engine_index;
+ crypto_openssl_ctx_fn_t *ctx_fn[VNET_CRYPTO_N_ALGS];
+} crypto_openssl_main_t;
+
+extern crypto_openssl_main_t crypto_openssl_main;
+
+#endif /* __crypto_openssl_h__ */
diff --git a/src/plugins/crypto_openssl/main.c b/src/plugins/crypto_openssl/main.c
index 48846b14483..b070cf336a5 100644
--- a/src/plugins/crypto_openssl/main.c
+++ b/src/plugins/crypto_openssl/main.c
@@ -15,6 +15,8 @@
*------------------------------------------------------------------
*/
+#include <sys/syscall.h>
+
#include <openssl/evp.h>
#include <openssl/hmac.h>
#include <openssl/rand.h>
@@ -24,12 +26,14 @@
#include <vnet/plugin/plugin.h>
#include <vnet/crypto/crypto.h>
#include <vpp/app/version.h>
+#include <crypto_openssl/crypto_openssl.h>
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- EVP_CIPHER_CTX *evp_cipher_ctx;
- HMAC_CTX *hmac_ctx;
+ EVP_CIPHER_CTX **evp_cipher_enc_ctx;
+ EVP_CIPHER_CTX **evp_cipher_dec_ctx;
+ HMAC_CTX **hmac_ctx;
EVP_MD_CTX *hash_ctx;
#if OPENSSL_VERSION_NUMBER < 0x10100000L
HMAC_CTX _hmac_ctx;
@@ -49,7 +53,10 @@ static openssl_per_thread_data_t *per_thread_data = 0;
_ (gcm, AES_256_GCM, EVP_aes_256_gcm, 8) \
_ (cbc, AES_128_CTR, EVP_aes_128_ctr, 8) \
_ (cbc, AES_192_CTR, EVP_aes_192_ctr, 8) \
- _ (cbc, AES_256_CTR, EVP_aes_256_ctr, 8)
+ _ (cbc, AES_256_CTR, EVP_aes_256_ctr, 8) \
+ _ (null_gmac, AES_128_NULL_GMAC, EVP_aes_128_gcm, 8) \
+ _ (null_gmac, AES_192_NULL_GMAC, EVP_aes_192_gcm, 8) \
+ _ (null_gmac, AES_256_NULL_GMAC, EVP_aes_256_gcm, 8)
#define foreach_openssl_chacha20_evp_op \
_ (chacha20_poly1305, CHACHA20_POLY1305, EVP_chacha20_poly1305, 8)
@@ -84,6 +91,8 @@ static openssl_per_thread_data_t *per_thread_data = 0;
_(SHA384, EVP_sha384) \
_(SHA512, EVP_sha512)
+crypto_openssl_main_t crypto_openssl_main;
+
static_always_inline u32
openssl_ops_enc_cbc (vlib_main_t *vm, vnet_crypto_op_t *ops[],
vnet_crypto_op_chunk_t *chunks, u32 n_ops,
@@ -91,7 +100,7 @@ openssl_ops_enc_cbc (vlib_main_t *vm, vnet_crypto_op_t *ops[],
{
openssl_per_thread_data_t *ptd = vec_elt_at_index (per_thread_data,
vm->thread_index);
- EVP_CIPHER_CTX *ctx = ptd->evp_cipher_ctx;
+ EVP_CIPHER_CTX *ctx;
vnet_crypto_op_chunk_t *chp;
u32 i, j, curr_len = 0;
u8 out_buf[VLIB_BUFFER_DEFAULT_DATA_SIZE * 5];
@@ -99,16 +108,10 @@ openssl_ops_enc_cbc (vlib_main_t *vm, vnet_crypto_op_t *ops[],
for (i = 0; i < n_ops; i++)
{
vnet_crypto_op_t *op = ops[i];
- vnet_crypto_key_t *key = vnet_crypto_get_key (op->key_index);
int out_len = 0;
- if (op->flags & VNET_CRYPTO_OP_FLAG_INIT_IV)
- RAND_bytes (op->iv, iv_len);
-
- EVP_EncryptInit_ex (ctx, cipher, NULL, key->data, op->iv);
-
- if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
- EVP_CIPHER_CTX_set_padding (ctx, 0);
+ ctx = ptd->evp_cipher_enc_ctx[op->key_index];
+ EVP_EncryptInit_ex (ctx, NULL, NULL, NULL, op->iv);
if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
{
@@ -152,7 +155,7 @@ openssl_ops_dec_cbc (vlib_main_t *vm, vnet_crypto_op_t *ops[],
{
openssl_per_thread_data_t *ptd = vec_elt_at_index (per_thread_data,
vm->thread_index);
- EVP_CIPHER_CTX *ctx = ptd->evp_cipher_ctx;
+ EVP_CIPHER_CTX *ctx;
vnet_crypto_op_chunk_t *chp;
u32 i, j, curr_len = 0;
u8 out_buf[VLIB_BUFFER_DEFAULT_DATA_SIZE * 5];
@@ -160,13 +163,10 @@ openssl_ops_dec_cbc (vlib_main_t *vm, vnet_crypto_op_t *ops[],
for (i = 0; i < n_ops; i++)
{
vnet_crypto_op_t *op = ops[i];
- vnet_crypto_key_t *key = vnet_crypto_get_key (op->key_index);
int out_len = 0;
- EVP_DecryptInit_ex (ctx, cipher, NULL, key->data, op->iv);
-
- if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
- EVP_CIPHER_CTX_set_padding (ctx, 0);
+ ctx = ptd->evp_cipher_dec_ctx[op->key_index];
+ EVP_DecryptInit_ex (ctx, NULL, NULL, NULL, op->iv);
if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
{
@@ -206,26 +206,21 @@ openssl_ops_dec_cbc (vlib_main_t *vm, vnet_crypto_op_t *ops[],
static_always_inline u32
openssl_ops_enc_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[],
vnet_crypto_op_chunk_t *chunks, u32 n_ops,
- const EVP_CIPHER *cipher, int is_gcm, const int iv_len)
+ const EVP_CIPHER *cipher, int is_gcm, int is_gmac,
+ const int iv_len)
{
openssl_per_thread_data_t *ptd = vec_elt_at_index (per_thread_data,
vm->thread_index);
- EVP_CIPHER_CTX *ctx = ptd->evp_cipher_ctx;
+ EVP_CIPHER_CTX *ctx;
vnet_crypto_op_chunk_t *chp;
u32 i, j;
for (i = 0; i < n_ops; i++)
{
vnet_crypto_op_t *op = ops[i];
- vnet_crypto_key_t *key = vnet_crypto_get_key (op->key_index);
int len = 0;
- if (op->flags & VNET_CRYPTO_OP_FLAG_INIT_IV)
- RAND_bytes (op->iv, 8);
-
- EVP_EncryptInit_ex (ctx, cipher, 0, 0, 0);
- if (is_gcm)
- EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_GCM_SET_IVLEN, 12, NULL);
- EVP_EncryptInit_ex (ctx, 0, 0, key->data, op->iv);
+ ctx = ptd->evp_cipher_enc_ctx[op->key_index];
+ EVP_EncryptInit_ex (ctx, 0, 0, NULL, op->iv);
if (op->aad_len)
EVP_EncryptUpdate (ctx, NULL, &len, op->aad, op->aad_len);
if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
@@ -233,13 +228,14 @@ openssl_ops_enc_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[],
chp = chunks + op->chunk_index;
for (j = 0; j < op->n_chunks; j++)
{
- EVP_EncryptUpdate (ctx, chp->dst, &len, chp->src, chp->len);
+ EVP_EncryptUpdate (ctx, is_gmac ? 0 : chp->dst, &len, chp->src,
+ chp->len);
chp += 1;
}
}
else
- EVP_EncryptUpdate (ctx, op->dst, &len, op->src, op->len);
- EVP_EncryptFinal_ex (ctx, op->dst + len, &len);
+ EVP_EncryptUpdate (ctx, is_gmac ? 0 : op->dst, &len, op->src, op->len);
+ EVP_EncryptFinal_ex (ctx, is_gmac ? 0 : op->dst + len, &len);
EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_AEAD_GET_TAG, op->tag_len, op->tag);
op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
}
@@ -247,12 +243,21 @@ openssl_ops_enc_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[],
}
static_always_inline u32
+openssl_ops_enc_null_gmac (vlib_main_t *vm, vnet_crypto_op_t *ops[],
+ vnet_crypto_op_chunk_t *chunks, u32 n_ops,
+ const EVP_CIPHER *cipher, const int iv_len)
+{
+ return openssl_ops_enc_aead (vm, ops, chunks, n_ops, cipher,
+ /* is_gcm */ 1, /* is_gmac */ 1, iv_len);
+}
+
+static_always_inline u32
openssl_ops_enc_gcm (vlib_main_t *vm, vnet_crypto_op_t *ops[],
vnet_crypto_op_chunk_t *chunks, u32 n_ops,
const EVP_CIPHER *cipher, const int iv_len)
{
return openssl_ops_enc_aead (vm, ops, chunks, n_ops, cipher,
- /* is_gcm */ 1, iv_len);
+ /* is_gcm */ 1, /* is_gmac */ 0, iv_len);
}
static_always_inline __clib_unused u32
@@ -261,29 +266,27 @@ openssl_ops_enc_chacha20_poly1305 (vlib_main_t *vm, vnet_crypto_op_t *ops[],
const EVP_CIPHER *cipher, const int iv_len)
{
return openssl_ops_enc_aead (vm, ops, chunks, n_ops, cipher,
- /* is_gcm */ 0, iv_len);
+ /* is_gcm */ 0, /* is_gmac */ 0, iv_len);
}
static_always_inline u32
openssl_ops_dec_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[],
vnet_crypto_op_chunk_t *chunks, u32 n_ops,
- const EVP_CIPHER *cipher, int is_gcm, const int iv_len)
+ const EVP_CIPHER *cipher, int is_gcm, int is_gmac,
+ const int iv_len)
{
openssl_per_thread_data_t *ptd = vec_elt_at_index (per_thread_data,
vm->thread_index);
- EVP_CIPHER_CTX *ctx = ptd->evp_cipher_ctx;
+ EVP_CIPHER_CTX *ctx;
vnet_crypto_op_chunk_t *chp;
u32 i, j, n_fail = 0;
for (i = 0; i < n_ops; i++)
{
vnet_crypto_op_t *op = ops[i];
- vnet_crypto_key_t *key = vnet_crypto_get_key (op->key_index);
int len = 0;
- EVP_DecryptInit_ex (ctx, cipher, 0, 0, 0);
- if (is_gcm)
- EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_GCM_SET_IVLEN, 12, 0);
- EVP_DecryptInit_ex (ctx, 0, 0, key->data, op->iv);
+ ctx = ptd->evp_cipher_dec_ctx[op->key_index];
+ EVP_DecryptInit_ex (ctx, 0, 0, NULL, op->iv);
if (op->aad_len)
EVP_DecryptUpdate (ctx, 0, &len, op->aad, op->aad_len);
if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
@@ -291,15 +294,19 @@ openssl_ops_dec_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[],
chp = chunks + op->chunk_index;
for (j = 0; j < op->n_chunks; j++)
{
- EVP_DecryptUpdate (ctx, chp->dst, &len, chp->src, chp->len);
+ EVP_DecryptUpdate (ctx, is_gmac ? 0 : chp->dst, &len, chp->src,
+ chp->len);
chp += 1;
}
}
else
- EVP_DecryptUpdate (ctx, op->dst, &len, op->src, op->len);
+ {
+ EVP_DecryptUpdate (ctx, is_gmac ? 0 : op->dst, &len, op->src,
+ op->len);
+ }
EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_AEAD_SET_TAG, op->tag_len, op->tag);
- if (EVP_DecryptFinal_ex (ctx, op->dst + len, &len) > 0)
+ if (EVP_DecryptFinal_ex (ctx, is_gmac ? 0 : op->dst + len, &len) > 0)
op->status = VNET_CRYPTO_OP_STATUS_COMPLETED;
else
{
@@ -311,12 +318,21 @@ openssl_ops_dec_aead (vlib_main_t *vm, vnet_crypto_op_t *ops[],
}
static_always_inline u32
+openssl_ops_dec_null_gmac (vlib_main_t *vm, vnet_crypto_op_t *ops[],
+ vnet_crypto_op_chunk_t *chunks, u32 n_ops,
+ const EVP_CIPHER *cipher, const int iv_len)
+{
+ return openssl_ops_dec_aead (vm, ops, chunks, n_ops, cipher,
+ /* is_gcm */ 1, /* is_gmac */ 1, iv_len);
+}
+
+static_always_inline u32
openssl_ops_dec_gcm (vlib_main_t *vm, vnet_crypto_op_t *ops[],
vnet_crypto_op_chunk_t *chunks, u32 n_ops,
const EVP_CIPHER *cipher, const int iv_len)
{
return openssl_ops_dec_aead (vm, ops, chunks, n_ops, cipher,
- /* is_gcm */ 1, iv_len);
+ /* is_gcm */ 1, /* is_gmac */ 0, iv_len);
}
static_always_inline __clib_unused u32
@@ -325,7 +341,7 @@ openssl_ops_dec_chacha20_poly1305 (vlib_main_t *vm, vnet_crypto_op_t *ops[],
const EVP_CIPHER *cipher, const int iv_len)
{
return openssl_ops_dec_aead (vm, ops, chunks, n_ops, cipher,
- /* is_gcm */ 0, iv_len);
+ /* is_gcm */ 0, /* is_gmac */ 0, iv_len);
}
static_always_inline u32
@@ -370,17 +386,17 @@ openssl_ops_hmac (vlib_main_t * vm, vnet_crypto_op_t * ops[],
u8 buffer[64];
openssl_per_thread_data_t *ptd = vec_elt_at_index (per_thread_data,
vm->thread_index);
- HMAC_CTX *ctx = ptd->hmac_ctx;
+ HMAC_CTX *ctx;
vnet_crypto_op_chunk_t *chp;
u32 i, j, n_fail = 0;
for (i = 0; i < n_ops; i++)
{
vnet_crypto_op_t *op = ops[i];
- vnet_crypto_key_t *key = vnet_crypto_get_key (op->key_index);
unsigned int out_len = 0;
size_t sz = op->digest_len ? op->digest_len : EVP_MD_size (md);
- HMAC_Init_ex (ctx, key->data, vec_len (key->data), md, NULL);
+ ctx = ptd->hmac_ctx[op->key_index];
+ HMAC_Init_ex (ctx, NULL, 0, NULL, NULL);
if (op->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS)
{
chp = chunks + op->chunk_index;
@@ -410,6 +426,131 @@ openssl_ops_hmac (vlib_main_t * vm, vnet_crypto_op_t * ops[],
return n_ops - n_fail;
}
+static_always_inline void *
+openssl_ctx_cipher (vnet_crypto_key_t *key, vnet_crypto_key_op_t kop,
+ vnet_crypto_key_index_t idx, const EVP_CIPHER *cipher,
+ int is_gcm)
+{
+ EVP_CIPHER_CTX *ctx;
+ openssl_per_thread_data_t *ptd;
+
+ if (VNET_CRYPTO_KEY_OP_ADD == kop)
+ {
+ vec_foreach (ptd, per_thread_data)
+ {
+ vec_validate_aligned (ptd->evp_cipher_enc_ctx, idx,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (ptd->evp_cipher_dec_ctx, idx,
+ CLIB_CACHE_LINE_BYTES);
+
+ ctx = EVP_CIPHER_CTX_new ();
+ EVP_CIPHER_CTX_set_padding (ctx, 0);
+ EVP_EncryptInit_ex (ctx, cipher, NULL, NULL, NULL);
+ if (is_gcm)
+ EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_GCM_SET_IVLEN, 12, NULL);
+ EVP_EncryptInit_ex (ctx, 0, 0, key->data, 0);
+ ptd->evp_cipher_enc_ctx[idx] = ctx;
+
+ ctx = EVP_CIPHER_CTX_new ();
+ EVP_CIPHER_CTX_set_padding (ctx, 0);
+ EVP_DecryptInit_ex (ctx, cipher, 0, 0, 0);
+ if (is_gcm)
+ EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_GCM_SET_IVLEN, 12, 0);
+ EVP_DecryptInit_ex (ctx, 0, 0, key->data, 0);
+ ptd->evp_cipher_dec_ctx[idx] = ctx;
+ }
+ }
+ else if (VNET_CRYPTO_KEY_OP_MODIFY == kop)
+ {
+ vec_foreach (ptd, per_thread_data)
+ {
+ ctx = ptd->evp_cipher_enc_ctx[idx];
+ EVP_EncryptInit_ex (ctx, cipher, NULL, NULL, NULL);
+ if (is_gcm)
+ EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_GCM_SET_IVLEN, 12, NULL);
+ EVP_EncryptInit_ex (ctx, 0, 0, key->data, 0);
+
+ ctx = ptd->evp_cipher_dec_ctx[idx];
+ EVP_DecryptInit_ex (ctx, cipher, 0, 0, 0);
+ if (is_gcm)
+ EVP_CIPHER_CTX_ctrl (ctx, EVP_CTRL_GCM_SET_IVLEN, 12, 0);
+ EVP_DecryptInit_ex (ctx, 0, 0, key->data, 0);
+ }
+ }
+ else if (VNET_CRYPTO_KEY_OP_DEL == kop)
+ {
+ vec_foreach (ptd, per_thread_data)
+ {
+ ctx = ptd->evp_cipher_enc_ctx[idx];
+ EVP_CIPHER_CTX_free (ctx);
+ ptd->evp_cipher_enc_ctx[idx] = NULL;
+
+ ctx = ptd->evp_cipher_dec_ctx[idx];
+ EVP_CIPHER_CTX_free (ctx);
+ ptd->evp_cipher_dec_ctx[idx] = NULL;
+ }
+ }
+ return NULL;
+}
+
+static_always_inline void *
+openssl_ctx_hmac (vnet_crypto_key_t *key, vnet_crypto_key_op_t kop,
+ vnet_crypto_key_index_t idx, const EVP_MD *md)
+{
+ HMAC_CTX *ctx;
+ openssl_per_thread_data_t *ptd;
+ if (VNET_CRYPTO_KEY_OP_ADD == kop)
+ {
+ vec_foreach (ptd, per_thread_data)
+ {
+ vec_validate_aligned (ptd->hmac_ctx, idx, CLIB_CACHE_LINE_BYTES);
+#if OPENSSL_VERSION_NUMBER >= 0x10100000L
+ ctx = HMAC_CTX_new ();
+ HMAC_Init_ex (ctx, key->data, vec_len (key->data), md, NULL);
+ ptd->hmac_ctx[idx] = ctx;
+#else
+ HMAC_CTX_init (&(ptd->_hmac_ctx));
+ ptd->hmac_ctx[idx] = &ptd->_hmac_ctx;
+#endif
+ }
+ }
+ else if (VNET_CRYPTO_KEY_OP_MODIFY == kop)
+ {
+ vec_foreach (ptd, per_thread_data)
+ {
+ ctx = ptd->hmac_ctx[idx];
+ HMAC_Init_ex (ctx, key->data, vec_len (key->data), md, NULL);
+ }
+ }
+ else if (VNET_CRYPTO_KEY_OP_DEL == kop)
+ {
+ vec_foreach (ptd, per_thread_data)
+ {
+ ctx = ptd->hmac_ctx[idx];
+ HMAC_CTX_free (ctx);
+ ptd->hmac_ctx[idx] = NULL;
+ }
+ }
+ return NULL;
+}
+
+static void
+crypto_openssl_key_handler (vlib_main_t *vm, vnet_crypto_key_op_t kop,
+ vnet_crypto_key_index_t idx)
+{
+ vnet_crypto_key_t *key = vnet_crypto_get_key (idx);
+ crypto_openssl_main_t *cm = &crypto_openssl_main;
+
+ /** TODO: add linked alg support **/
+ if (key->type == VNET_CRYPTO_KEY_TYPE_LINK)
+ return;
+
+ if (cm->ctx_fn[key->alg] == 0)
+ return;
+
+ cm->ctx_fn[key->alg](key, kop, idx);
+}
+
#define _(m, a, b, iv) \
static u32 openssl_ops_enc_##a (vlib_main_t *vm, vnet_crypto_op_t *ops[], \
u32 n_ops) \
@@ -435,6 +576,16 @@ openssl_ops_hmac (vlib_main_t * vm, vnet_crypto_op_t * ops[],
u32 n_ops) \
{ \
return openssl_ops_dec_##m (vm, ops, chunks, n_ops, b (), iv); \
+ } \
+ static void *openssl_ctx_##a (vnet_crypto_key_t *key, \
+ vnet_crypto_key_op_t kop, \
+ vnet_crypto_key_index_t idx) \
+ { \
+ int is_gcm = ((VNET_CRYPTO_ALG_AES_128_GCM <= key->alg) && \
+ (VNET_CRYPTO_ALG_AES_256_NULL_GMAC >= key->alg)) ? \
+ 1 : \
+ 0; \
+ return openssl_ctx_cipher (key, kop, idx, b (), is_gcm); \
}
foreach_openssl_evp_op;
@@ -456,29 +607,43 @@ foreach_openssl_evp_op;
foreach_openssl_hash_op;
#undef _
-#define _(a, b) \
-static u32 \
-openssl_ops_hmac_##a (vlib_main_t * vm, vnet_crypto_op_t * ops[], u32 n_ops) \
-{ return openssl_ops_hmac (vm, ops, 0, n_ops, b ()); } \
-static u32 \
-openssl_ops_hmac_chained_##a (vlib_main_t * vm, vnet_crypto_op_t * ops[], \
- vnet_crypto_op_chunk_t *chunks, u32 n_ops) \
-{ return openssl_ops_hmac (vm, ops, chunks, n_ops, b ()); } \
+#define _(a, b) \
+ static u32 openssl_ops_hmac_##a (vlib_main_t *vm, vnet_crypto_op_t *ops[], \
+ u32 n_ops) \
+ { \
+ return openssl_ops_hmac (vm, ops, 0, n_ops, b ()); \
+ } \
+ static u32 openssl_ops_hmac_chained_##a ( \
+ vlib_main_t *vm, vnet_crypto_op_t *ops[], vnet_crypto_op_chunk_t *chunks, \
+ u32 n_ops) \
+ { \
+ return openssl_ops_hmac (vm, ops, chunks, n_ops, b ()); \
+ } \
+ static void *openssl_ctx_hmac_##a (vnet_crypto_key_t *key, \
+ vnet_crypto_key_op_t kop, \
+ vnet_crypto_key_index_t idx) \
+ { \
+ return openssl_ctx_hmac (key, kop, idx, b ()); \
+ }
foreach_openssl_hmac_op;
#undef _
-
clib_error_t *
crypto_openssl_init (vlib_main_t * vm)
{
+ crypto_openssl_main_t *cm = &crypto_openssl_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
openssl_per_thread_data_t *ptd;
- u8 *seed_data = 0;
- time_t t;
- pid_t pid;
+ u8 seed[32];
+
+ if (syscall (SYS_getrandom, &seed, sizeof (seed), 0) != sizeof (seed))
+ return clib_error_return_unix (0, "getrandom() failed");
+
+ RAND_seed (seed, sizeof (seed));
u32 eidx = vnet_crypto_register_engine (vm, "openssl", 50, "OpenSSL");
+ cm->crypto_engine_index = eidx;
#define _(m, a, b, iv) \
vnet_crypto_register_ops_handlers (vm, eidx, VNET_CRYPTO_OP_##a##_ENC, \
@@ -486,15 +651,17 @@ crypto_openssl_init (vlib_main_t * vm)
openssl_ops_enc_chained_##a); \
vnet_crypto_register_ops_handlers (vm, eidx, VNET_CRYPTO_OP_##a##_DEC, \
openssl_ops_dec_##a, \
- openssl_ops_dec_chained_##a);
+ openssl_ops_dec_chained_##a); \
+ cm->ctx_fn[VNET_CRYPTO_ALG_##a] = openssl_ctx_##a;
foreach_openssl_evp_op;
#undef _
-#define _(a, b) \
- vnet_crypto_register_ops_handlers (vm, eidx, VNET_CRYPTO_OP_##a##_HMAC, \
- openssl_ops_hmac_##a, \
- openssl_ops_hmac_chained_##a); \
+#define _(a, b) \
+ vnet_crypto_register_ops_handlers (vm, eidx, VNET_CRYPTO_OP_##a##_HMAC, \
+ openssl_ops_hmac_##a, \
+ openssl_ops_hmac_chained_##a); \
+ cm->ctx_fn[VNET_CRYPTO_ALG_HMAC_##a] = openssl_ctx_hmac_##a;
foreach_openssl_hmac_op;
#undef _
@@ -512,43 +679,25 @@ crypto_openssl_init (vlib_main_t * vm)
vec_foreach (ptd, per_thread_data)
{
- ptd->evp_cipher_ctx = EVP_CIPHER_CTX_new ();
#if OPENSSL_VERSION_NUMBER >= 0x10100000L
- ptd->hmac_ctx = HMAC_CTX_new ();
ptd->hash_ctx = EVP_MD_CTX_create ();
-#else
- HMAC_CTX_init (&(ptd->_hmac_ctx));
- ptd->hmac_ctx = &ptd->_hmac_ctx;
#endif
}
-
- t = time (NULL);
- pid = getpid ();
- vec_add (seed_data, &t, sizeof (t));
- vec_add (seed_data, &pid, sizeof (pid));
- vec_add (seed_data, seed_data, sizeof (seed_data));
-
- RAND_seed ((const void *) seed_data, vec_len (seed_data));
-
- vec_free (seed_data);
-
+ vnet_crypto_register_key_handler (vm, cm->crypto_engine_index,
+ crypto_openssl_key_handler);
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (crypto_openssl_init) =
{
.runs_after = VLIB_INITS ("vnet_crypto_init"),
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "OpenSSL Crypto Engine",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.api b/src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.api
index f1741286d73..8ee8a15f48b 100644
--- a/src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.api
+++ b/src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.api
@@ -19,7 +19,7 @@
used to control the crypto SW scheduler plugin
*/
-option version = "0.1.0";
+option version = "1.1.0";
/** \brief crypto sw scheduler: Enable or disable workers
diff --git a/src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.h b/src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.h
index 50dd6c11830..e74dfdd2c2a 100644
--- a/src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.h
+++ b/src/plugins/crypto_sw_scheduler/crypto_sw_scheduler.h
@@ -21,18 +21,32 @@
#define CRYPTO_SW_SCHEDULER_QUEUE_SIZE 64
#define CRYPTO_SW_SCHEDULER_QUEUE_MASK (CRYPTO_SW_SCHEDULER_QUEUE_SIZE - 1)
+STATIC_ASSERT ((0 == (CRYPTO_SW_SCHEDULER_QUEUE_SIZE &
+ (CRYPTO_SW_SCHEDULER_QUEUE_SIZE - 1))),
+ "CRYPTO_SW_SCHEDULER_QUEUE_SIZE is not pow2");
+
+typedef enum crypto_sw_scheduler_queue_type_t_
+{
+ CRYPTO_SW_SCHED_QUEUE_TYPE_ENCRYPT = 0,
+ CRYPTO_SW_SCHED_QUEUE_TYPE_DECRYPT,
+ CRYPTO_SW_SCHED_QUEUE_N_TYPES
+} crypto_sw_scheduler_queue_type_t;
+
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
u32 head;
u32 tail;
- vnet_crypto_async_frame_t *jobs[0];
+ vnet_crypto_async_frame_t **jobs;
} crypto_sw_scheduler_queue_t;
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- crypto_sw_scheduler_queue_t *queues[VNET_CRYPTO_ASYNC_OP_N_IDS];
+ crypto_sw_scheduler_queue_t queue[CRYPTO_SW_SCHED_QUEUE_N_TYPES];
+ u32 last_serve_lcore_id;
+ u8 last_serve_encrypt;
+ u8 last_return_queue;
vnet_crypto_op_t *crypto_ops;
vnet_crypto_op_t *integ_ops;
vnet_crypto_op_t *chained_crypto_ops;
diff --git a/src/plugins/crypto_sw_scheduler/main.c b/src/plugins/crypto_sw_scheduler/main.c
index b0548fa297a..73a158e86b2 100644
--- a/src/plugins/crypto_sw_scheduler/main.c
+++ b/src/plugins/crypto_sw_scheduler/main.c
@@ -25,14 +25,14 @@ crypto_sw_scheduler_set_worker_crypto (u32 worker_idx, u8 enabled)
crypto_sw_scheduler_main_t *cm = &crypto_sw_scheduler_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
crypto_sw_scheduler_per_thread_data_t *ptd = 0;
- u32 count = 0, i = vlib_num_workers () > 0;
+ u32 count = 0, i;
if (worker_idx >= vlib_num_workers ())
{
return VNET_API_ERROR_INVALID_VALUE;
}
- for (; i < tm->n_vlib_mains; i++)
+ for (i = 0; i < tm->n_vlib_mains; i++)
{
ptd = cm->per_thread_data + i;
count += ptd->self_crypto_enabled;
@@ -74,68 +74,45 @@ crypto_sw_scheduler_key_handler (vlib_main_t * vm, vnet_crypto_key_op_t kop,
}
static int
-crypto_sw_scheduler_frame_enqueue (vlib_main_t * vm,
- vnet_crypto_async_frame_t * frame)
+crypto_sw_scheduler_frame_enqueue (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame, u8 is_enc)
{
crypto_sw_scheduler_main_t *cm = &crypto_sw_scheduler_main;
- crypto_sw_scheduler_per_thread_data_t *ptd
- = vec_elt_at_index (cm->per_thread_data, vm->thread_index);
- crypto_sw_scheduler_queue_t *q = ptd->queues[frame->op];
- u64 head = q->head;
-
- if (q->jobs[head & CRYPTO_SW_SCHEDULER_QUEUE_MASK])
+ crypto_sw_scheduler_per_thread_data_t *ptd =
+ vec_elt_at_index (cm->per_thread_data, vm->thread_index);
+ crypto_sw_scheduler_queue_t *current_queue =
+ is_enc ? &ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_ENCRYPT] :
+ &ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_DECRYPT];
+ u64 head = current_queue->head;
+
+ if (current_queue->jobs[head & CRYPTO_SW_SCHEDULER_QUEUE_MASK])
{
u32 n_elts = frame->n_elts, i;
for (i = 0; i < n_elts; i++)
frame->elts[i].status = VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR;
return -1;
}
- q->jobs[head & CRYPTO_SW_SCHEDULER_QUEUE_MASK] = frame;
+
+ current_queue->jobs[head & CRYPTO_SW_SCHEDULER_QUEUE_MASK] = frame;
head += 1;
CLIB_MEMORY_STORE_BARRIER ();
- q->head = head;
+ current_queue->head = head;
return 0;
}
-static_always_inline vnet_crypto_async_frame_t *
-crypto_sw_scheduler_get_pending_frame (crypto_sw_scheduler_queue_t * q)
+static int
+crypto_sw_scheduler_frame_enqueue_decrypt (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
{
- vnet_crypto_async_frame_t *f;
- u32 i;
- u32 tail = q->tail;
- u32 head = q->head;
-
- for (i = tail; i < head; i++)
- {
- f = q->jobs[i & CRYPTO_SW_SCHEDULER_QUEUE_MASK];
- if (!f)
- continue;
- if (clib_atomic_bool_cmp_and_swap
- (&f->state, VNET_CRYPTO_FRAME_STATE_PENDING,
- VNET_CRYPTO_FRAME_STATE_WORK_IN_PROGRESS))
- {
- return f;
- }
+ return crypto_sw_scheduler_frame_enqueue (vm, frame, 0);
}
- return NULL;
-}
-
-static_always_inline vnet_crypto_async_frame_t *
-crypto_sw_scheduler_get_completed_frame (crypto_sw_scheduler_queue_t * q)
-{
- vnet_crypto_async_frame_t *f = 0;
- if (q->jobs[q->tail & CRYPTO_SW_SCHEDULER_QUEUE_MASK]
- && q->jobs[q->tail & CRYPTO_SW_SCHEDULER_QUEUE_MASK]->state
- >= VNET_CRYPTO_FRAME_STATE_SUCCESS)
+ static int
+ crypto_sw_scheduler_frame_enqueue_encrypt (
+ vlib_main_t *vm, vnet_crypto_async_frame_t *frame)
{
- u32 tail = q->tail;
- CLIB_MEMORY_STORE_BARRIER ();
- q->tail++;
- f = q->jobs[tail & CRYPTO_SW_SCHEDULER_QUEUE_MASK];
- q->jobs[tail & CRYPTO_SW_SCHEDULER_QUEUE_MASK] = 0;
+
+ return crypto_sw_scheduler_frame_enqueue (vm, frame, 1);
}
- return f;
-}
static_always_inline void
cryptodev_sw_scheduler_sgl (vlib_main_t *vm,
@@ -267,7 +244,7 @@ crypto_sw_scheduler_convert_link_crypto (vlib_main_t * vm,
integ_op->digest = fe->digest;
integ_op->digest_len = digest_len;
integ_op->key_index = key->index_integ;
- integ_op->flags = fe->flags & ~VNET_CRYPTO_OP_FLAG_INIT_IV;
+ integ_op->flags = fe->flags;
crypto_op->user_data = integ_op->user_data = index;
}
@@ -283,17 +260,22 @@ process_ops (vlib_main_t * vm, vnet_crypto_async_frame_t * f,
n_fail = n_ops - vnet_crypto_process_ops (vm, op, n_ops);
- while (n_fail)
+ /*
+ * If we had a failure in the ops then we need to walk all the ops
+ * and set the status in the corresponding frame. This status is
+ * not set in the case with no failures, as in that case the overall
+ * frame status is success.
+ */
+ if (n_fail)
{
- ASSERT (op - ops < n_ops);
-
- if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
+ for (int i = 0; i < n_ops; i++)
{
+ ASSERT (op - ops < n_ops);
+
f->elts[op->user_data].status = op->status;
- *state = VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
- n_fail--;
+ op++;
}
- op++;
+ *state = VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
}
}
@@ -310,170 +292,287 @@ process_chained_ops (vlib_main_t * vm, vnet_crypto_async_frame_t * f,
n_fail = n_ops - vnet_crypto_process_chained_ops (vm, op, chunks, n_ops);
- while (n_fail)
+ /*
+ * If we had a failure in the ops then we need to walk all the ops
+ * and set the status in the corresponding frame. This status is
+ * not set in the case with no failures, as in that case the overall
+ * frame status is success.
+ */
+ if (n_fail)
{
- ASSERT (op - ops < n_ops);
-
- if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
+ for (int i = 0; i < n_ops; i++)
{
+ ASSERT (op - ops < n_ops);
+
f->elts[op->user_data].status = op->status;
- *state = VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
- n_fail--;
+ op++;
}
- op++;
+ *state = VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
}
}
-static_always_inline vnet_crypto_async_frame_t *
-crypto_sw_scheduler_dequeue_aead (vlib_main_t * vm,
- vnet_crypto_async_op_id_t async_op_id,
- vnet_crypto_op_id_t sync_op_id, u8 tag_len,
- u8 aad_len, u32 * nb_elts_processed,
- u32 * enqueue_thread_idx)
+static_always_inline void
+crypto_sw_scheduler_process_aead (vlib_main_t *vm,
+ crypto_sw_scheduler_per_thread_data_t *ptd,
+ vnet_crypto_async_frame_t *f, u32 aead_op,
+ u32 aad_len, u32 digest_len)
{
- crypto_sw_scheduler_main_t *cm = &crypto_sw_scheduler_main;
- crypto_sw_scheduler_per_thread_data_t *ptd = 0;
- crypto_sw_scheduler_queue_t *q = 0;
- vnet_crypto_async_frame_t *f = 0;
vnet_crypto_async_frame_elt_t *fe;
u32 *bi;
- u32 n_elts;
- int i = 0;
+ u32 n_elts = f->n_elts;
u8 state = VNET_CRYPTO_FRAME_STATE_SUCCESS;
- if (cm->per_thread_data[vm->thread_index].self_crypto_enabled)
- {
- /* *INDENT-OFF* */
- vec_foreach_index (i, cm->per_thread_data)
- {
- ptd = cm->per_thread_data + i;
- q = ptd->queues[async_op_id];
- f = crypto_sw_scheduler_get_pending_frame (q);
- if (f)
- break;
- }
- /* *INDENT-ON* */
- }
+ vec_reset_length (ptd->crypto_ops);
+ vec_reset_length (ptd->integ_ops);
+ vec_reset_length (ptd->chained_crypto_ops);
+ vec_reset_length (ptd->chained_integ_ops);
+ vec_reset_length (ptd->chunks);
- ptd = cm->per_thread_data + vm->thread_index;
+ fe = f->elts;
+ bi = f->buffer_indices;
- if (f)
+ while (n_elts--)
{
- *nb_elts_processed = n_elts = f->n_elts;
- fe = f->elts;
- bi = f->buffer_indices;
-
- vec_reset_length (ptd->crypto_ops);
- vec_reset_length (ptd->chained_crypto_ops);
- vec_reset_length (ptd->chunks);
-
- while (n_elts--)
- {
- if (n_elts > 1)
- clib_prefetch_load (fe + 1);
+ if (n_elts > 1)
+ clib_prefetch_load (fe + 1);
- crypto_sw_scheduler_convert_aead (vm, ptd, fe, fe - f->elts, bi[0],
- sync_op_id, aad_len, tag_len);
- bi++;
- fe++;
- }
+ crypto_sw_scheduler_convert_aead (vm, ptd, fe, fe - f->elts, bi[0],
+ aead_op, aad_len, digest_len);
+ bi++;
+ fe++;
+ }
process_ops (vm, f, ptd->crypto_ops, &state);
process_chained_ops (vm, f, ptd->chained_crypto_ops, ptd->chunks,
&state);
f->state = state;
- *enqueue_thread_idx = f->enqueue_thread_index;
- }
-
- return crypto_sw_scheduler_get_completed_frame (ptd->queues[async_op_id]);
}
-static_always_inline vnet_crypto_async_frame_t *
-crypto_sw_scheduler_dequeue_link (vlib_main_t * vm,
- vnet_crypto_async_op_id_t async_op_id,
- vnet_crypto_op_id_t sync_crypto_op_id,
- vnet_crypto_op_id_t sync_integ_op_id,
- u16 digest_len, u8 is_enc,
- u32 * nb_elts_processed,
- u32 * enqueue_thread_idx)
+static_always_inline void
+crypto_sw_scheduler_process_link (vlib_main_t *vm,
+ crypto_sw_scheduler_main_t *cm,
+ crypto_sw_scheduler_per_thread_data_t *ptd,
+ vnet_crypto_async_frame_t *f, u32 crypto_op,
+ u32 auth_op, u16 digest_len, u8 is_enc)
{
- crypto_sw_scheduler_main_t *cm = &crypto_sw_scheduler_main;
- crypto_sw_scheduler_per_thread_data_t *ptd = 0;
- crypto_sw_scheduler_queue_t *q = 0;
- vnet_crypto_async_frame_t *f = 0;
vnet_crypto_async_frame_elt_t *fe;
u32 *bi;
- u32 n_elts;
- int i = 0;
+ u32 n_elts = f->n_elts;
u8 state = VNET_CRYPTO_FRAME_STATE_SUCCESS;
- if (cm->per_thread_data[vm->thread_index].self_crypto_enabled)
+ vec_reset_length (ptd->crypto_ops);
+ vec_reset_length (ptd->integ_ops);
+ vec_reset_length (ptd->chained_crypto_ops);
+ vec_reset_length (ptd->chained_integ_ops);
+ vec_reset_length (ptd->chunks);
+ fe = f->elts;
+ bi = f->buffer_indices;
+
+ while (n_elts--)
+ {
+ if (n_elts > 1)
+ clib_prefetch_load (fe + 1);
+
+ crypto_sw_scheduler_convert_link_crypto (
+ vm, ptd, cm->keys + fe->key_index, fe, fe - f->elts, bi[0], crypto_op,
+ auth_op, digest_len, is_enc);
+ bi++;
+ fe++;
+ }
+
+ if (is_enc)
+ {
+ process_ops (vm, f, ptd->crypto_ops, &state);
+ process_chained_ops (vm, f, ptd->chained_crypto_ops, ptd->chunks,
+ &state);
+ process_ops (vm, f, ptd->integ_ops, &state);
+ process_chained_ops (vm, f, ptd->chained_integ_ops, ptd->chunks, &state);
+ }
+ else
{
- /* *INDENT-OFF* */
- vec_foreach_index (i, cm->per_thread_data)
- {
- ptd = cm->per_thread_data + i;
- q = ptd->queues[async_op_id];
- f = crypto_sw_scheduler_get_pending_frame (q);
- if (f)
- break;
- }
- /* *INDENT-ON* */
+ process_ops (vm, f, ptd->integ_ops, &state);
+ process_chained_ops (vm, f, ptd->chained_integ_ops, ptd->chunks, &state);
+ process_ops (vm, f, ptd->crypto_ops, &state);
+ process_chained_ops (vm, f, ptd->chained_crypto_ops, ptd->chunks,
+ &state);
}
- ptd = cm->per_thread_data + vm->thread_index;
+ f->state = state;
+}
- if (f)
+static_always_inline int
+convert_async_crypto_id (vnet_crypto_async_op_id_t async_op_id, u32 *crypto_op,
+ u32 *auth_op_or_aad_len, u16 *digest_len, u8 *is_enc)
+{
+ switch (async_op_id)
{
- vec_reset_length (ptd->crypto_ops);
- vec_reset_length (ptd->integ_ops);
- vec_reset_length (ptd->chained_crypto_ops);
- vec_reset_length (ptd->chained_integ_ops);
- vec_reset_length (ptd->chunks);
+#define _(n, s, k, t, a) \
+ case VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_ENC: \
+ *crypto_op = VNET_CRYPTO_OP_##n##_ENC; \
+ *auth_op_or_aad_len = a; \
+ *digest_len = t; \
+ *is_enc = 1; \
+ return 1; \
+ case VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_DEC: \
+ *crypto_op = VNET_CRYPTO_OP_##n##_DEC; \
+ *auth_op_or_aad_len = a; \
+ *digest_len = t; \
+ *is_enc = 0; \
+ return 1;
+ foreach_crypto_aead_async_alg
+#undef _
- *nb_elts_processed = n_elts = f->n_elts;
- fe = f->elts;
- bi = f->buffer_indices;
+#define _(c, h, s, k, d) \
+ case VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC: \
+ *crypto_op = VNET_CRYPTO_OP_##c##_ENC; \
+ *auth_op_or_aad_len = VNET_CRYPTO_OP_##h##_HMAC; \
+ *digest_len = d; \
+ *is_enc = 1; \
+ return 0; \
+ case VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC: \
+ *crypto_op = VNET_CRYPTO_OP_##c##_DEC; \
+ *auth_op_or_aad_len = VNET_CRYPTO_OP_##h##_HMAC; \
+ *digest_len = d; \
+ *is_enc = 0; \
+ return 0;
+ foreach_crypto_link_async_alg
+#undef _
- while (n_elts--)
- {
- if (n_elts > 1)
- clib_prefetch_load (fe + 1);
-
- crypto_sw_scheduler_convert_link_crypto (vm, ptd,
- cm->keys + fe->key_index,
- fe, fe - f->elts, bi[0],
- sync_crypto_op_id,
- sync_integ_op_id,
- digest_len, is_enc);
- bi++;
- fe++;
- }
+ default : return -1;
+ }
- if (is_enc)
- {
- process_ops (vm, f, ptd->crypto_ops, &state);
- process_chained_ops (vm, f, ptd->chained_crypto_ops, ptd->chunks,
- &state);
- process_ops (vm, f, ptd->integ_ops, &state);
- process_chained_ops (vm, f, ptd->chained_integ_ops, ptd->chunks,
- &state);
- }
- else
+ return -1;
+}
+
+static_always_inline vnet_crypto_async_frame_t *
+crypto_sw_scheduler_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
+ u32 *enqueue_thread_idx)
+{
+ crypto_sw_scheduler_main_t *cm = &crypto_sw_scheduler_main;
+ crypto_sw_scheduler_per_thread_data_t *ptd =
+ cm->per_thread_data + vm->thread_index;
+ vnet_crypto_async_frame_t *f = 0;
+ crypto_sw_scheduler_queue_t *current_queue = 0;
+ u32 tail, head;
+ u8 found = 0;
+ u8 recheck_queues = 1;
+
+run_next_queues:
+ /* get a pending frame to process */
+ if (ptd->self_crypto_enabled)
+ {
+ u32 i = ptd->last_serve_lcore_id + 1;
+
+ while (1)
{
- process_ops (vm, f, ptd->integ_ops, &state);
- process_chained_ops (vm, f, ptd->chained_integ_ops, ptd->chunks,
- &state);
- process_ops (vm, f, ptd->crypto_ops, &state);
- process_chained_ops (vm, f, ptd->chained_crypto_ops, ptd->chunks,
- &state);
+ crypto_sw_scheduler_per_thread_data_t *st;
+ u32 j;
+
+ if (i >= vec_len (cm->per_thread_data))
+ i = 0;
+
+ st = cm->per_thread_data + i;
+
+ if (ptd->last_serve_encrypt)
+ current_queue = &st->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_DECRYPT];
+ else
+ current_queue = &st->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_ENCRYPT];
+
+ tail = current_queue->tail;
+ head = current_queue->head;
+
+ /* Skip this queue unless tail < head or head has overflowed
+ * and tail has not. At the point where tail overflows (== 0),
+ * the largest possible value of head is (queue size - 1).
+ * Prior to that, the largest possible value of head is
+ * (queue size - 2).
+ */
+ if ((tail > head) && (head >= CRYPTO_SW_SCHEDULER_QUEUE_MASK))
+ goto skip_queue;
+
+ for (j = tail; j != head; j++)
+ {
+
+ f = current_queue->jobs[j & CRYPTO_SW_SCHEDULER_QUEUE_MASK];
+
+ if (!f)
+ continue;
+
+ if (clib_atomic_bool_cmp_and_swap (
+ &f->state, VNET_CRYPTO_FRAME_STATE_PENDING,
+ VNET_CRYPTO_FRAME_STATE_WORK_IN_PROGRESS))
+ {
+ found = 1;
+ break;
+ }
+ }
+
+ skip_queue:
+ if (found || i == ptd->last_serve_lcore_id)
+ {
+ CLIB_MEMORY_STORE_BARRIER ();
+ ptd->last_serve_encrypt = !ptd->last_serve_encrypt;
+ break;
+ }
+
+ i++;
}
- f->state = state;
+ ptd->last_serve_lcore_id = i;
+ }
+
+ if (found)
+ {
+ u32 crypto_op, auth_op_or_aad_len;
+ u16 digest_len;
+ u8 is_enc;
+ int ret;
+
+ ret = convert_async_crypto_id (f->op, &crypto_op, &auth_op_or_aad_len,
+ &digest_len, &is_enc);
+
+ if (ret == 1)
+ crypto_sw_scheduler_process_aead (vm, ptd, f, crypto_op,
+ auth_op_or_aad_len, digest_len);
+ else if (ret == 0)
+ crypto_sw_scheduler_process_link (
+ vm, cm, ptd, f, crypto_op, auth_op_or_aad_len, digest_len, is_enc);
+
*enqueue_thread_idx = f->enqueue_thread_index;
+ *nb_elts_processed = f->n_elts;
+ }
+
+ if (ptd->last_return_queue)
+ {
+ current_queue = &ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_DECRYPT];
+ ptd->last_return_queue = 0;
+ }
+ else
+ {
+ current_queue = &ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_ENCRYPT];
+ ptd->last_return_queue = 1;
}
- return crypto_sw_scheduler_get_completed_frame (ptd->queues[async_op_id]);
+ tail = current_queue->tail & CRYPTO_SW_SCHEDULER_QUEUE_MASK;
+
+ if (current_queue->jobs[tail] &&
+ current_queue->jobs[tail]->state >= VNET_CRYPTO_FRAME_STATE_SUCCESS)
+ {
+
+ CLIB_MEMORY_STORE_BARRIER ();
+ current_queue->tail++;
+ f = current_queue->jobs[tail];
+ current_queue->jobs[tail] = 0;
+
+ return f;
+ }
+
+ if (!found && recheck_queues)
+ {
+ recheck_queues = 0;
+ goto run_next_queues;
+ }
+ return 0;
}
static clib_error_t *
@@ -533,14 +632,12 @@ sw_scheduler_set_worker_crypto (vlib_main_t * vm, unformat_input_t * input,
* @cliexstart{set sw_scheduler worker 0 crypto off}
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_set_sw_scheduler_worker_crypto, static) = {
.path = "set sw_scheduler",
.short_help = "set sw_scheduler worker <idx> crypto <on|off>",
.function = sw_scheduler_set_worker_crypto,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
sw_scheduler_show_workers (vlib_main_t * vm, unformat_input_t * input,
@@ -569,14 +666,12 @@ sw_scheduler_show_workers (vlib_main_t * vm, unformat_input_t * input,
* @cliexstart{show sw_scheduler workers}
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_show_sw_scheduler_workers, static) = {
.path = "show sw_scheduler workers",
.short_help = "show sw_scheduler workers",
.function = sw_scheduler_show_workers,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
clib_error_t *
sw_scheduler_cli_init (vlib_main_t * vm)
@@ -586,50 +681,6 @@ sw_scheduler_cli_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (sw_scheduler_cli_init);
-/* *INDENT-OFF* */
-#define _(n, s, k, t, a) \
- static vnet_crypto_async_frame_t \
- *crypto_sw_scheduler_frame_dequeue_##n##_TAG_##t##_AAD_##a##_enc ( \
- vlib_main_t *vm, u32 *nb_elts_processed, u32 * thread_idx) \
- { \
- return crypto_sw_scheduler_dequeue_aead ( \
- vm, VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_ENC, \
- VNET_CRYPTO_OP_##n##_ENC, t, a, nb_elts_processed, thread_idx); \
- } \
- static vnet_crypto_async_frame_t \
- *crypto_sw_scheduler_frame_dequeue_##n##_TAG_##t##_AAD_##a##_dec ( \
- vlib_main_t *vm, u32 *nb_elts_processed, u32 * thread_idx) \
- { \
- return crypto_sw_scheduler_dequeue_aead ( \
- vm, VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_DEC, \
- VNET_CRYPTO_OP_##n##_DEC, t, a, nb_elts_processed, thread_idx); \
- }
-foreach_crypto_aead_async_alg
-#undef _
-
-#define _(c, h, s, k, d) \
- static vnet_crypto_async_frame_t \
- *crypto_sw_scheduler_frame_dequeue_##c##_##h##_TAG##d##_enc ( \
- vlib_main_t *vm, u32 *nb_elts_processed, u32 * thread_idx) \
- { \
- return crypto_sw_scheduler_dequeue_link ( \
- vm, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC, \
- VNET_CRYPTO_OP_##c##_ENC, VNET_CRYPTO_OP_##h##_HMAC, d, 1, \
- nb_elts_processed, thread_idx); \
- } \
- static vnet_crypto_async_frame_t \
- *crypto_sw_scheduler_frame_dequeue_##c##_##h##_TAG##d##_dec ( \
- vlib_main_t *vm, u32 *nb_elts_processed, u32 * thread_idx) \
- { \
- return crypto_sw_scheduler_dequeue_link ( \
- vm, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC, \
- VNET_CRYPTO_OP_##c##_DEC, VNET_CRYPTO_OP_##h##_HMAC, d, 0, \
- nb_elts_processed, thread_idx); \
- }
- foreach_crypto_link_async_alg
-#undef _
- /* *INDENT-ON* */
-
crypto_sw_scheduler_main_t crypto_sw_scheduler_main;
clib_error_t *
crypto_sw_scheduler_init (vlib_main_t * vm)
@@ -638,26 +689,33 @@ crypto_sw_scheduler_init (vlib_main_t * vm)
vlib_thread_main_t *tm = vlib_get_thread_main ();
clib_error_t *error = 0;
crypto_sw_scheduler_per_thread_data_t *ptd;
-
- u32 queue_size = CRYPTO_SW_SCHEDULER_QUEUE_SIZE * sizeof (void *)
- + sizeof (crypto_sw_scheduler_queue_t);
+ u32 i;
vec_validate_aligned (cm->per_thread_data, tm->n_vlib_mains - 1,
CLIB_CACHE_LINE_BYTES);
- vec_foreach (ptd, cm->per_thread_data)
- {
- ptd->self_crypto_enabled = 1;
- u32 i;
- for (i = 0; i < VNET_CRYPTO_ASYNC_OP_N_IDS; i++)
- {
- crypto_sw_scheduler_queue_t *q
- = clib_mem_alloc_aligned (queue_size, CLIB_CACHE_LINE_BYTES);
- ASSERT (q != 0);
- ptd->queues[i] = q;
- clib_memset_u8 (q, 0, queue_size);
- }
- }
+ for (i = 0; i < tm->n_vlib_mains; i++)
+ {
+ ptd = cm->per_thread_data + i;
+ ptd->self_crypto_enabled = i > 0 || vlib_num_workers () < 1;
+
+ ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_DECRYPT].head = 0;
+ ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_DECRYPT].tail = 0;
+
+ vec_validate_aligned (
+ ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_DECRYPT].jobs,
+ CRYPTO_SW_SCHEDULER_QUEUE_SIZE - 1, CLIB_CACHE_LINE_BYTES);
+
+ ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_ENCRYPT].head = 0;
+ ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_ENCRYPT].tail = 0;
+
+ ptd->last_serve_encrypt = 0;
+ ptd->last_return_queue = 0;
+
+ vec_validate_aligned (
+ ptd->queue[CRYPTO_SW_SCHED_QUEUE_TYPE_ENCRYPT].jobs,
+ CRYPTO_SW_SCHEDULER_QUEUE_SIZE - 1, CLIB_CACHE_LINE_BYTES);
+ }
cm->crypto_engine_index =
vnet_crypto_register_engine (vm, "sw_scheduler", 100,
@@ -668,33 +726,28 @@ crypto_sw_scheduler_init (vlib_main_t * vm)
crypto_sw_scheduler_api_init (vm);
- /* *INDENT-OFF* */
#define _(n, s, k, t, a) \
- vnet_crypto_register_async_handler ( \
- vm, cm->crypto_engine_index, \
- VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_ENC, \
- crypto_sw_scheduler_frame_enqueue, \
- crypto_sw_scheduler_frame_dequeue_##n##_TAG_##t##_AAD_##a##_enc); \
- vnet_crypto_register_async_handler ( \
- vm, cm->crypto_engine_index, \
- VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_DEC, \
- crypto_sw_scheduler_frame_enqueue, \
- crypto_sw_scheduler_frame_dequeue_##n##_TAG_##t##_AAD_##a##_dec);
+ vnet_crypto_register_enqueue_handler ( \
+ vm, cm->crypto_engine_index, VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_ENC, \
+ crypto_sw_scheduler_frame_enqueue_encrypt); \
+ vnet_crypto_register_enqueue_handler ( \
+ vm, cm->crypto_engine_index, VNET_CRYPTO_OP_##n##_TAG##t##_AAD##a##_DEC, \
+ crypto_sw_scheduler_frame_enqueue_decrypt);
foreach_crypto_aead_async_alg
#undef _
#define _(c, h, s, k, d) \
- vnet_crypto_register_async_handler ( \
- vm, cm->crypto_engine_index, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC, \
- crypto_sw_scheduler_frame_enqueue, \
- crypto_sw_scheduler_frame_dequeue_##c##_##h##_TAG##d##_enc); \
- vnet_crypto_register_async_handler ( \
- vm, cm->crypto_engine_index, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC, \
- crypto_sw_scheduler_frame_enqueue, \
- crypto_sw_scheduler_frame_dequeue_##c##_##h##_TAG##d##_dec);
- foreach_crypto_link_async_alg
+ vnet_crypto_register_enqueue_handler ( \
+ vm, cm->crypto_engine_index, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC, \
+ crypto_sw_scheduler_frame_enqueue_encrypt); \
+ vnet_crypto_register_enqueue_handler ( \
+ vm, cm->crypto_engine_index, VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC, \
+ crypto_sw_scheduler_frame_enqueue_decrypt);
+ foreach_crypto_link_async_alg
#undef _
- /* *INDENT-ON* */
+
+ vnet_crypto_register_dequeue_handler (vm, cm->crypto_engine_index,
+ crypto_sw_scheduler_dequeue);
if (error)
vec_free (cm->per_thread_data);
@@ -702,7 +755,6 @@ crypto_sw_scheduler_init (vlib_main_t * vm)
return error;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (crypto_sw_scheduler_init) = {
.runs_after = VLIB_INITS ("vnet_crypto_init"),
};
@@ -711,7 +763,6 @@ VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "SW Scheduler Crypto Async Engine plugin",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ct6/ct6.c b/src/plugins/ct6/ct6.c
index 205cd3f50ef..e5c69be2c9d 100644
--- a/src/plugins/ct6/ct6.c
+++ b/src/plugins/ct6/ct6.c
@@ -153,7 +153,6 @@ set_ct6_enable_disable_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ct6_command, static) =
{
.path = "set ct6",
@@ -161,7 +160,6 @@ VLIB_CLI_COMMAND (set_ct6_command, static) =
"set ct6 [inside|outside] <interface-name> [disable]",
.function = set_ct6_enable_disable_command_fn,
};
-/* *INDENT-ON* */
/* API message handler */
static void vl_api_ct6_enable_disable_t_handler
@@ -216,30 +214,24 @@ ct6_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (ct6_init);
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ct6out2in, static) =
{
.arc_name = "ip6-unicast",
.node_name = "ct6-out2in",
.runs_before = VNET_FEATURES ("ip6-lookup"),
};
-/* *INDENT-ON */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ct6in2out, static) = {
.arc_name = "interface-output",
.node_name = "ct6-in2out",
.runs_before = VNET_FEATURES ("interface-output-arc-end"),
};
-/* *INDENT-ON */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "IPv6 Connection Tracker",
};
-/* *INDENT-ON* */
u8 *
format_ct6_session (u8 * s, va_list * args)
@@ -320,26 +312,22 @@ show_ct6_command_fn_command_fn (vlib_main_t * vm,
format (s, "%U", format_ct6_session, cmp,
0 /* pool */ , 0 /* header */ , verbose);
- /* *INDENT-OFF* */
pool_foreach (s0, cmp->sessions[i])
{
s = format (s, "%U", format_ct6_session, cmp, i, s0, verbose);
}
- /* *INDENT-ON* */
}
vlib_cli_output (cmp->vlib_main, "%v", s);
vec_free (s);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ct6_command_fn_command, static) =
{
.path = "show ip6 connection-tracker",
.short_help = "show ip6 connection-tracker",
.function = show_ct6_command_fn_command_fn,
};
-/* *INDENT-ON* */
static void
increment_v6_address (ip6_address_t * a)
@@ -429,12 +417,10 @@ test_ct6_command_fn_command_fn (vlib_main_t * vm,
created = 0;
}
- /* *INDENT-OFF* */
pool_foreach (s0, cmp->sessions[0])
{
s = format (s, "%U", format_ct6_session, cmp, 0, s0, 1 /* verbose */);
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "\nEnd state: first index %d last index %d\n%v",
cmp->first_index[0], cmp->last_index[0], s);
@@ -449,12 +435,10 @@ test_ct6_command_fn_command_fn (vlib_main_t * vm,
ct6_update_session_hit (cmp, s0, 234.0);
- /* *INDENT-OFF* */
pool_foreach (s0, cmp->sessions[0])
{
s = format (s, "%U", format_ct6_session, cmp, 0, s0, 1 /* verbose */);
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "\nEnd state: first index %d last index %d\n%v",
cmp->first_index[0], cmp->last_index[0], s);
@@ -464,14 +448,12 @@ test_ct6_command_fn_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_ct6_command_fn_command, static) =
{
.path = "test ip6 connection-tracker",
.short_help = "test ip6 connection-tracker",
.function = test_ct6_command_fn_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ct6_config (vlib_main_t * vm, unformat_input_t * input)
diff --git a/src/plugins/ct6/ct6.h b/src/plugins/ct6/ct6.h
index 534534f5c99..0b7deb07839 100644
--- a/src/plugins/ct6/ct6.h
+++ b/src/plugins/ct6/ct6.h
@@ -26,7 +26,6 @@
#include <vppinfra/hash.h>
#include <vppinfra/error.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
union
@@ -43,7 +42,6 @@ typedef CLIB_PACKED (struct
u64 as_u64[6];
};
}) ct6_session_key_t;
-/* *INDENT-ON* */
typedef struct
{
diff --git a/src/plugins/ct6/ct6_in2out.c b/src/plugins/ct6/ct6_in2out.c
index b8bda18370c..c5d26c8caa7 100644
--- a/src/plugins/ct6/ct6_in2out.c
+++ b/src/plugins/ct6/ct6_in2out.c
@@ -344,7 +344,6 @@ VLIB_NODE_FN (ct6_in2out_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return ct6_in2out_inline (vm, node, frame, 0 /* is_trace */ );
}
-/* *INDENT-OFF* */
#ifndef CLIB_MARCH_VARIANT
VLIB_REGISTER_NODE (ct6_in2out_node) =
{
@@ -365,7 +364,6 @@ VLIB_REGISTER_NODE (ct6_in2out_node) =
.unformat_buffer = unformat_ethernet_header,
};
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ct6/ct6_out2in.c b/src/plugins/ct6/ct6_out2in.c
index ebb6da56134..a94ae38f0c5 100644
--- a/src/plugins/ct6/ct6_out2in.c
+++ b/src/plugins/ct6/ct6_out2in.c
@@ -246,7 +246,6 @@ VLIB_NODE_FN (ct6_out2in_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return ct6_out2in_inline (vm, node, frame, 0 /* is_trace */ );
}
-/* *INDENT-OFF* */
#ifndef CLIB_MARCH_VARIANT
VLIB_REGISTER_NODE (ct6_out2in_node) =
{
@@ -266,7 +265,6 @@ VLIB_REGISTER_NODE (ct6_out2in_node) =
},
};
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dev_ena/CMakeLists.txt b/src/plugins/dev_ena/CMakeLists.txt
new file mode 100644
index 00000000000..d9224d6fd9b
--- /dev/null
+++ b/src/plugins/dev_ena/CMakeLists.txt
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright(c) 2022 Cisco Systems, Inc.
+
+add_vpp_plugin(dev_ena
+ SOURCES
+ aq.c
+ aenq.c
+ ena.c
+ format.c
+ format_aq.c
+ port.c
+ queue.c
+ rx_node.c
+ tx_node.c
+ reg.c
+
+ MULTIARCH_SOURCES
+ rx_node.c
+ tx_node.c
+)
+
diff --git a/src/plugins/dev_ena/aenq.c b/src/plugins/dev_ena/aenq.c
new file mode 100644
index 00000000000..64be3c4af3a
--- /dev/null
+++ b/src/plugins/dev_ena/aenq.c
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+
+#define ENA_AENQ_POLL_INTERVAL 0.2
+
+VLIB_REGISTER_LOG_CLASS (ena_log, static) = {
+ .class_name = "ena",
+ .subclass_name = "aenq",
+};
+
+void
+ena_aenq_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+
+ log_debug (dev, "");
+
+ ASSERT (ed->aenq_started == 0);
+
+ vnet_dev_dma_mem_free (vm, dev, ed->aenq.entries);
+ ed->aenq.entries = 0;
+ ed->aenq.depth = 0;
+}
+
+vnet_dev_rv_t
+ena_aenq_olloc (vlib_main_t *vm, vnet_dev_t *dev, u16 depth)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ u32 alloc_sz = sizeof (ena_aenq_entry_t) * depth;
+ vnet_dev_rv_t rv;
+
+ log_debug (dev, "");
+
+ ASSERT (ed->aenq.entries == 0);
+
+ if ((rv = vnet_dev_dma_mem_alloc (vm, dev, alloc_sz, 0,
+ (void **) &ed->aenq.entries)))
+ goto err;
+
+ ed->aenq.depth = depth;
+
+ return VNET_DEV_OK;
+err:
+ ena_aenq_free (vm, dev);
+ return rv;
+}
+
+static ena_aenq_entry_t *
+ena_get_next_aenq_entry (vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ u16 index = ed->aenq.head & pow2_mask (ENA_ASYNC_QUEUE_LOG2_DEPTH);
+ u16 phase = 1 & (ed->aenq.head >> ENA_ASYNC_QUEUE_LOG2_DEPTH);
+ ena_aenq_entry_t *e = ed->aenq.entries + index;
+
+ if (e->phase != phase)
+ return 0;
+
+ ed->aenq.head++;
+
+ return e;
+}
+
+static void
+ena_aenq_poll (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_aenq_entry_t *ae;
+
+ while ((ae = ena_get_next_aenq_entry (dev)))
+ {
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ vnet_dev_port_state_changes_t changes = {};
+
+ log_debug (dev, "aenq: group %u syndrome %u phase %u timestamp %lu",
+ ae->group, ae->syndrome, ae->phase, ae->timestamp);
+
+ switch (ae->group)
+ {
+ case ENA_AENQ_GROUP_LINK_CHANGE:
+ log_debug (dev, "link_change: status %u",
+ ae->link_change.link_status);
+ changes.link_state = 1;
+ changes.change.link_state = 1;
+ foreach_vnet_dev_port (p, dev)
+ vnet_dev_port_state_change (vm, p, changes);
+ break;
+
+ case ENA_AENQ_GROUP_NOTIFICATION:
+ log_warn (dev, "unhandled AENQ notification received [syndrome %u]",
+ ae->syndrome);
+ break;
+
+ case ENA_AENQ_GROUP_KEEP_ALIVE:
+ if (ae->keep_alive.rx_drops || ae->keep_alive.tx_drops)
+ log_debug (dev, "keep_alive: rx_drops %lu tx_drops %lu",
+ ae->keep_alive.rx_drops, ae->keep_alive.tx_drops);
+ ed->aenq.rx_drops = ae->keep_alive.rx_drops - ed->aenq.rx_drops0;
+ ed->aenq.tx_drops = ae->keep_alive.tx_drops - ed->aenq.tx_drops0;
+ ed->aenq.last_keepalive = vlib_time_now (vm);
+ break;
+
+ default:
+ log_debug (dev, "unknown aenq entry (group %u) %U", ae->group,
+ format_hexdump, ae, sizeof (*ae));
+ };
+ }
+}
+
+vnet_dev_rv_t
+ena_aenq_start (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ u16 depth = ed->aenq.depth;
+ u32 alloc_sz = sizeof (ena_aenq_entry_t) * depth;
+
+ ASSERT (ed->aenq_started == 0);
+ ASSERT (ed->aq_started == 1);
+
+ ena_reg_aenq_caps_t aenq_caps = {
+ .depth = depth,
+ .entry_size = sizeof (ena_aenq_entry_t),
+ };
+
+ if (ena_aq_feature_is_supported (dev, ENA_ADMIN_FEAT_ID_AENQ_CONFIG))
+ {
+ ena_aq_feat_aenq_config_t aenq;
+ vnet_dev_rv_t rv;
+
+ if ((rv = ena_aq_get_feature (vm, dev, ENA_ADMIN_FEAT_ID_AENQ_CONFIG,
+ &aenq)))
+ {
+ log_err (dev, "aenq_start: get_Feature(AENQ_CONFIG) failed");
+ return rv;
+ }
+
+ aenq.enabled_groups.link_change = 1;
+ aenq.enabled_groups.fatal_error = 1;
+ aenq.enabled_groups.warning = 1;
+ aenq.enabled_groups.notification = 1;
+ aenq.enabled_groups.keep_alive = 1;
+ aenq.enabled_groups.as_u32 &= aenq.supported_groups.as_u32;
+ aenq.supported_groups.as_u32 = 0;
+
+ if ((rv = ena_aq_set_feature (vm, dev, ENA_ADMIN_FEAT_ID_AENQ_CONFIG,
+ &aenq)))
+ {
+ log_err (dev, "aenq_start: set_Feature(AENQ_CONFIG) failed");
+ return rv;
+ }
+ }
+
+ clib_memset (ed->aenq.entries, 0, alloc_sz);
+ ed->aenq.head = depth;
+
+ ena_reg_set_dma_addr (vm, dev, ENA_REG_AENQ_BASE_LO, ENA_REG_AENQ_BASE_HI,
+ ed->aenq.entries);
+
+ ena_reg_write (dev, ENA_REG_AENQ_CAPS, &aenq_caps);
+ ena_reg_write (dev, ENA_REG_AENQ_HEAD_DB, &(u32){ depth });
+
+ ed->aenq_started = 1;
+
+ vnet_dev_poll_dev_add (vm, dev, ENA_AENQ_POLL_INTERVAL, ena_aenq_poll);
+
+ return VNET_DEV_OK;
+}
+
+void
+ena_aenq_stop (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ if (ed->aenq_started == 1)
+ {
+ ena_reg_aenq_caps_t aenq_caps = {};
+ vnet_dev_poll_dev_remove (vm, dev, ena_aenq_poll);
+ ena_reg_write (dev, ENA_REG_AENQ_CAPS, &aenq_caps);
+ ed->aenq_started = 0;
+ }
+}
diff --git a/src/plugins/dev_ena/aq.c b/src/plugins/dev_ena/aq.c
new file mode 100644
index 00000000000..290d5bd52c6
--- /dev/null
+++ b/src/plugins/dev_ena/aq.c
@@ -0,0 +1,359 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+#include <vnet/ethernet/ethernet.h>
+
+VLIB_REGISTER_LOG_CLASS (ena_log, static) = {
+ .class_name = "ena",
+ .subclass_name = "admin",
+};
+
+VLIB_REGISTER_LOG_CLASS (ena_stats_log, static) = {
+ .class_name = "ena",
+ .subclass_name = "admin-stats",
+};
+
+ena_aq_feat_info_t feat_info[] = {
+#define _(v, ver, gt, st, n, s) \
+ [v] = { .name = #n, \
+ .version = (ver), \
+ .data_sz = sizeof (s), \
+ .get = (gt), \
+ .set = (st) },
+ foreach_ena_aq_feature_id
+#undef _
+};
+
+ena_aq_feat_info_t *
+ena_aq_get_feat_info (ena_aq_feature_id_t id)
+{
+ if (id >= ARRAY_LEN (feat_info) || feat_info[id].data_sz == 0)
+ return 0;
+
+ return feat_info + id;
+}
+
+void
+ena_aq_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ vnet_dev_dma_mem_free (vm, dev, ed->aq.cq_entries);
+ vnet_dev_dma_mem_free (vm, dev, ed->aq.sq_entries);
+ ed->aq.depth = 0;
+}
+
+vnet_dev_rv_t
+ena_aq_olloc (vlib_main_t *vm, vnet_dev_t *dev, u16 depth)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ vnet_dev_dma_mem_free (vm, dev, ed->aq.cq_entries);
+ vnet_dev_dma_mem_free (vm, dev, ed->aq.sq_entries);
+ u32 sq_alloc_sz = sizeof (ena_aq_sq_entry_t) * depth;
+ u32 cq_alloc_sz = sizeof (ena_aq_cq_entry_t) * depth;
+ vnet_dev_rv_t rv;
+
+ ASSERT (ed->aq.sq_entries == 0);
+ ASSERT (ed->aq.cq_entries == 0);
+
+ rv = vnet_dev_dma_mem_alloc (vm, dev, sq_alloc_sz, 0,
+ (void **) &ed->aq.sq_entries);
+ if (rv != VNET_DEV_OK)
+ goto err;
+
+ rv = vnet_dev_dma_mem_alloc (vm, dev, cq_alloc_sz, 0,
+ (void **) &ed->aq.cq_entries);
+ if (rv != VNET_DEV_OK)
+ goto err;
+
+ ed->aq.depth = depth;
+
+ return VNET_DEV_OK;
+err:
+ ena_aq_free (vm, dev);
+ return rv;
+}
+
+vnet_dev_rv_t
+ena_aq_start (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ u16 depth = ed->aq.depth;
+ u32 sq_alloc_sz = sizeof (ena_aq_sq_entry_t) * depth;
+ u32 cq_alloc_sz = sizeof (ena_aq_cq_entry_t) * depth;
+
+ ASSERT (ed->aq_started == 0);
+
+ ena_reg_aq_caps_t aq_caps = {
+ .depth = depth,
+ .entry_size = sizeof (ena_aq_sq_entry_t),
+ };
+
+ ena_reg_acq_caps_t acq_caps = {
+ .depth = depth,
+ .entry_size = sizeof (ena_aq_cq_entry_t),
+ };
+
+ clib_memset (ed->aq.sq_entries, 0, sq_alloc_sz);
+ clib_memset (ed->aq.cq_entries, 0, cq_alloc_sz);
+
+ ed->aq.sq_next = 0;
+ ed->aq.cq_head = 0;
+
+ ena_reg_set_dma_addr (vm, dev, ENA_REG_AQ_BASE_LO, ENA_REG_AQ_BASE_HI,
+ ed->aq.sq_entries);
+ ena_reg_set_dma_addr (vm, dev, ENA_REG_ACQ_BASE_LO, ENA_REG_ACQ_BASE_HI,
+ ed->aq.cq_entries);
+
+ ena_reg_write (dev, ENA_REG_AQ_CAPS, &aq_caps);
+ ena_reg_write (dev, ENA_REG_ACQ_CAPS, &acq_caps);
+
+ ed->aq_started = 1;
+
+ return VNET_DEV_OK;
+}
+
+void
+ena_aq_stop (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ ena_reg_aq_caps_t aq_caps = {};
+ ena_reg_acq_caps_t acq_caps = {};
+
+ if (ed->aq_started)
+ {
+ ena_reg_write (dev, ENA_REG_AQ_CAPS, &aq_caps);
+ ena_reg_write (dev, ENA_REG_ACQ_CAPS, &acq_caps);
+ ed->aq_started = 0;
+ }
+}
+vnet_dev_rv_t
+ena_aq_req (vlib_main_t *vm, vnet_dev_t *dev, ena_aq_opcode_t opcode,
+ void *sqe_data, u8 sqe_data_sz, void *cqe_data, u8 cqe_data_sz)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ u32 next = ed->aq.sq_next++;
+ u32 index = next & pow2_mask (ENA_ADMIN_QUEUE_LOG2_DEPTH);
+ u8 phase = 1 & (~(next >> ENA_ADMIN_QUEUE_LOG2_DEPTH));
+ ena_aq_sq_entry_t *sqe = ed->aq.sq_entries + index;
+ ena_aq_cq_entry_t *cqe = ed->aq.cq_entries + index;
+ f64 suspend_time = 1e-6;
+
+ clib_memcpy_fast (&sqe->data, sqe_data, sqe_data_sz);
+ sqe->opcode = opcode;
+ sqe->command_id = index;
+ sqe->phase = phase;
+
+ ena_reg_write (dev, ENA_REG_AQ_DB, &ed->aq.sq_next);
+
+ while (cqe->phase != phase)
+ {
+ vlib_process_suspend (vm, suspend_time);
+ suspend_time *= 2;
+ if (suspend_time > 1e-3)
+ {
+ log_err (dev, "admin queue timeout (opcode %U)",
+ format_ena_aq_opcode, opcode);
+ return VNET_DEV_ERR_TIMEOUT;
+ }
+ }
+
+ if (cqe->status != ENA_ADMIN_COMPL_STATUS_SUCCESS)
+ {
+ log_err (dev,
+ "cqe[%u]: opcode %U status %U ext_status %u sq_head_idx %u",
+ cqe - ed->aq.cq_entries, format_ena_aq_opcode, opcode,
+ format_ena_aq_status, cqe->status, cqe->extended_status,
+ cqe->sq_head_indx);
+ return VNET_DEV_ERR_DEVICE_NO_REPLY;
+ }
+
+ log_debug (dev, "cqe: status %u ext_status %u sq_head_idx %u", cqe->status,
+ cqe->extended_status, cqe->sq_head_indx);
+
+ if (cqe_data && cqe_data_sz)
+ clib_memcpy_fast (cqe_data, &cqe->data, cqe_data_sz);
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+ena_aq_set_feature (vlib_main_t *vm, vnet_dev_t *dev,
+ ena_aq_feature_id_t feat_id, void *data)
+{
+ vnet_dev_rv_t rv;
+
+ struct
+ {
+ ena_aq_aq_ctrl_buff_info_t control_buffer;
+ ena_aq_get_set_feature_common_desc_t feat_common;
+ u32 data[11];
+ } fd = {
+ .feat_common.feature_id = feat_id,
+ .feat_common.feature_version = feat_info[feat_id].version,
+ };
+
+ log_debug (dev, "set_feature(%s):\n %U", feat_info[feat_id].name,
+ format_ena_aq_feat_desc, feat_id, data);
+
+ ASSERT (feat_info[feat_id].data_sz > 1);
+ clib_memcpy (&fd.data, data, feat_info[feat_id].data_sz);
+
+ rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_SET_FEATURE, &fd, sizeof (fd), 0, 0);
+
+ if (rv != VNET_DEV_OK)
+ log_err (dev, "get_feature(%U) failed", format_ena_aq_feat_name, feat_id);
+
+ return rv;
+}
+
+vnet_dev_rv_t
+ena_aq_get_feature (vlib_main_t *vm, vnet_dev_t *dev,
+ ena_aq_feature_id_t feat_id, void *data)
+{
+ vnet_dev_rv_t rv;
+
+ struct
+ {
+ ena_aq_aq_ctrl_buff_info_t control_buffer;
+ ena_aq_get_set_feature_common_desc_t feat_common;
+ u32 data[11];
+ } fd = {
+ .feat_common.feature_id = feat_id,
+ .feat_common.feature_version = feat_info[feat_id].version,
+ };
+
+ rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_GET_FEATURE, &fd, sizeof (fd), data,
+ feat_info[feat_id].data_sz);
+
+ if (rv != VNET_DEV_OK)
+ {
+ log_err (dev, "get_feature(%U) failed", format_ena_aq_feat_name,
+ feat_id);
+ return rv;
+ }
+
+ ASSERT (feat_info[feat_id].data_sz > 1);
+
+ log_debug (dev, "get_feature(%s):\n %U", feat_info[feat_id].name,
+ format_ena_aq_feat_desc, feat_id, data);
+
+ return 0;
+}
+
+vnet_dev_rv_t
+ena_aq_create_sq (vlib_main_t *vm, vnet_dev_t *dev,
+ ena_aq_create_sq_cmd_t *cmd, ena_aq_create_sq_resp_t *resp)
+{
+ vnet_dev_rv_t rv;
+
+ log_debug (dev, "create_sq_cmd_req:\n %U", format_ena_aq_create_sq_cmd,
+ cmd);
+
+ rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_CREATE_SQ, cmd, sizeof (*cmd), resp,
+ sizeof (*resp));
+
+ if (rv != VNET_DEV_OK)
+ log_debug (dev, "create_sq_cmd_resp:\n %U", format_ena_aq_create_sq_resp,
+ resp);
+ return rv;
+}
+
+vnet_dev_rv_t
+ena_aq_create_cq (vlib_main_t *vm, vnet_dev_t *dev,
+ ena_aq_create_cq_cmd_t *cmd, ena_aq_create_cq_resp_t *resp)
+{
+ vnet_dev_rv_t rv;
+
+ log_debug (dev, "create_cq_cmd_req:\n %U", format_ena_aq_create_cq_cmd,
+ cmd);
+
+ rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_CREATE_CQ, cmd, sizeof (*cmd), resp,
+ sizeof (*resp));
+
+ if (rv != VNET_DEV_OK)
+ log_debug (dev, "create_cq_cmd_resp:\n %U", format_ena_aq_create_cq_resp,
+ resp);
+
+ return rv;
+}
+
+vnet_dev_rv_t
+ena_aq_destroy_sq (vlib_main_t *vm, vnet_dev_t *dev,
+ ena_aq_destroy_sq_cmd_t *cmd)
+{
+ log_debug (dev, "destroy_sq_cmd_req:\n %U", format_ena_aq_destroy_sq_cmd,
+ cmd);
+
+ return ena_aq_req (vm, dev, ENA_AQ_OPCODE_DESTROY_SQ, cmd, sizeof (*cmd), 0,
+ 0);
+}
+
+vnet_dev_rv_t
+ena_aq_destroy_cq (vlib_main_t *vm, vnet_dev_t *dev,
+ ena_aq_destroy_cq_cmd_t *cmd)
+{
+ log_debug (dev, "destroy_cq_cmd_req:\n %U", format_ena_aq_destroy_cq_cmd,
+ cmd);
+
+ return ena_aq_req (vm, dev, ENA_AQ_OPCODE_DESTROY_CQ, cmd, sizeof (*cmd), 0,
+ 0);
+}
+
+vnet_dev_rv_t
+ena_aq_get_stats (vlib_main_t *vm, vnet_dev_t *dev, ena_aq_stats_type_t type,
+ ena_aq_stats_scope_t scope, u16 queue_idx, void *data)
+{
+ vnet_dev_rv_t rv;
+ format_function_t *ff = 0;
+ u8 data_sz[] = {
+ [ENA_ADMIN_STATS_TYPE_BASIC] = sizeof (ena_aq_basic_stats_t),
+ [ENA_ADMIN_STATS_TYPE_EXTENDED] = 0,
+ [ENA_ADMIN_STATS_TYPE_ENI] = sizeof (ena_aq_eni_stats_t),
+ };
+
+ char *type_str[] = {
+#define _(n, s) [n] = #s,
+ foreach_ena_aq_stats_type
+#undef _
+ };
+
+ char *scope_str[] = {
+#define _(n, s) [n] = #s,
+ foreach_ena_aq_stats_scope
+#undef _
+ };
+
+ ena_aq_get_stats_cmd_t cmd = {
+ .type = type,
+ .scope = scope,
+ .queue_idx = scope == ENA_ADMIN_STATS_SCOPE_SPECIFIC_QUEUE ? queue_idx : 0,
+ .device_id = 0xffff,
+ };
+
+ if ((rv = ena_aq_req (vm, dev, ENA_AQ_OPCODE_GET_STATS, &cmd, sizeof (cmd),
+ data, data_sz[type])))
+ {
+ ena_stats_log_err (dev, "get_stats(%s, %s) failed", type_str[type],
+ scope_str[scope]);
+ return rv;
+ }
+
+ if (type == ENA_ADMIN_STATS_TYPE_BASIC)
+ ff = format_ena_aq_basic_stats;
+ else if (type == ENA_ADMIN_STATS_TYPE_ENI)
+ ff = format_ena_aq_eni_stats;
+
+ if (ff)
+ ena_stats_log_debug (dev, "get_stats(%s, %s, %u):\n %U", type_str[type],
+ scope_str[scope], queue_idx, ff, data);
+ else
+ ena_stats_log_debug (dev, "get_stats(%s, %s, %u): unknown data",
+ type_str[type], scope_str[scope], queue_idx);
+
+ return VNET_DEV_OK;
+}
diff --git a/src/plugins/dev_ena/ena.c b/src/plugins/dev_ena/ena.c
new file mode 100644
index 00000000000..ead090839c7
--- /dev/null
+++ b/src/plugins/dev_ena/ena.c
@@ -0,0 +1,265 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+static ena_aq_host_info_t host_info = {
+ .os_type = 3, /* DPDK */
+ .kernel_ver_str = VPP_BUILD_VER,
+ .os_dist_str = VPP_BUILD_VER,
+ .driver_version = {
+ .major = 16,
+ .minor = 0,
+ .sub_minor = 0,
+ },
+ .ena_spec_version = {
+ .major = 2,
+ .minor = 0,
+ },
+ .driver_supported_features = {
+ .rx_offset = 1,
+ .rss_configurable_function_key = 1,
+ }
+};
+
+VLIB_REGISTER_LOG_CLASS (ena_log, static) = {
+ .class_name = "ena",
+ .subclass_name = "init",
+};
+
+#define _(f, n, s, d) \
+ { .name = #n, .desc = d, .severity = VL_COUNTER_SEVERITY_##s },
+
+static vlib_error_desc_t ena_rx_node_counters[] = {
+ foreach_ena_rx_node_counter
+};
+static vlib_error_desc_t ena_tx_node_counters[] = {
+ foreach_ena_tx_node_counter
+};
+#undef _
+
+vnet_dev_node_t ena_rx_node = {
+ .error_counters = ena_rx_node_counters,
+ .n_error_counters = ARRAY_LEN (ena_rx_node_counters),
+ .format_trace = format_ena_rx_trace,
+};
+
+vnet_dev_node_t ena_tx_node = {
+ .error_counters = ena_tx_node_counters,
+ .n_error_counters = ARRAY_LEN (ena_tx_node_counters),
+};
+
+static void
+ena_deinit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_aenq_stop (vm, dev);
+ ena_aq_stop (vm, dev);
+}
+
+static vnet_dev_rv_t
+ena_alloc (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ vnet_dev_rv_t rv;
+
+ if ((rv = vnet_dev_dma_mem_alloc (vm, dev, 4096, 4096,
+ (void **) &ed->host_info)))
+ return rv;
+
+ if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (ena_mmio_resp_t), 0,
+ (void **) &ed->mmio_resp)))
+ return rv;
+
+ if ((rv = ena_aq_olloc (vm, dev, ENA_ADMIN_QUEUE_DEPTH)))
+ return rv;
+
+ if ((rv = ena_aenq_olloc (vm, dev, ENA_ASYNC_QUEUE_DEPTH)))
+ return rv;
+
+ return VNET_DEV_OK;
+}
+
+static void
+ena_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+
+ ena_aenq_free (vm, dev);
+ ena_aq_free (vm, dev);
+
+ vnet_dev_dma_mem_free (vm, dev, ed->host_info);
+ vnet_dev_dma_mem_free (vm, dev, ed->mmio_resp);
+}
+
+static vnet_dev_rv_t
+ena_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ ena_aq_feat_host_attr_config_t host_attr = {};
+ vlib_pci_config_hdr_t pci_cfg_hdr;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ vnet_dev_port_add_args_t port = {
+ .port = {
+ .attr = {
+ .type = VNET_DEV_PORT_TYPE_ETHERNET,
+ },
+ .ops = {
+ .init = ena_port_init,
+ .start = ena_port_start,
+ .stop = ena_port_stop,
+ .config_change = ena_port_cfg_change,
+ .config_change_validate = ena_port_cfg_change_validate,
+ },
+ .data_size = sizeof (ena_port_t),
+ },
+ .rx_node = &ena_rx_node,
+ .tx_node = &ena_tx_node,
+ .rx_queue = {
+ .config = {
+ .data_size = sizeof (ena_rxq_t),
+ .default_size = 512,
+ .min_size = 32,
+ .size_is_power_of_two = 1,
+ },
+ .ops = {
+ .alloc = ena_rx_queue_alloc,
+ .start = ena_rx_queue_start,
+ .stop = ena_rx_queue_stop,
+ .free = ena_rx_queue_free,
+ },
+ },
+ .tx_queue = {
+ .config = {
+ .data_size = sizeof (ena_txq_t),
+ .default_size = 512,
+ .min_size = 32,
+ .size_is_power_of_two = 1,
+ },
+ .ops = {
+ .alloc = ena_tx_queue_alloc,
+ .start = ena_tx_queue_start,
+ .stop = ena_tx_queue_stop,
+ .free = ena_tx_queue_free,
+ },
+ },
+ };
+
+ if ((rv = vnet_dev_pci_read_config_header (vm, dev, &pci_cfg_hdr)))
+ goto err;
+
+ log_debug (dev, "revision_id 0x%x", pci_cfg_hdr.revision_id);
+
+ ed->readless = (pci_cfg_hdr.revision_id & 1) == 0;
+
+ if ((rv = vnet_dev_pci_map_region (vm, dev, 0, &ed->reg_bar)))
+ goto err;
+
+ if ((rv = ena_reg_reset (vm, dev, ENA_RESET_REASON_NORMAL)))
+ goto err;
+
+ if ((rv = ena_aq_start (vm, dev)))
+ goto err;
+
+ *ed->host_info = host_info;
+ ed->host_info->num_cpus = vlib_get_n_threads ();
+ ena_set_mem_addr (vm, dev, &host_attr.os_info_ba, ed->host_info);
+
+ if ((rv = ena_aq_set_feature (vm, dev, ENA_ADMIN_FEAT_ID_HOST_ATTR_CONFIG,
+ &host_attr)))
+ return rv;
+
+ if ((rv = ena_aq_get_feature (vm, dev, ENA_ADMIN_FEAT_ID_DEVICE_ATTRIBUTES,
+ &ed->dev_attr)))
+ return rv;
+
+ if (ena_aq_feature_is_supported (dev, ENA_ADMIN_FEAT_ID_MAX_QUEUES_EXT))
+ {
+ ena_aq_feat_max_queue_ext_t max_q_ext;
+ if ((rv = ena_aq_get_feature (vm, dev, ENA_ADMIN_FEAT_ID_MAX_QUEUES_EXT,
+ &max_q_ext)))
+ goto err;
+ port.port.attr.max_rx_queues =
+ clib_min (max_q_ext.max_rx_cq_num, max_q_ext.max_rx_sq_num);
+ port.port.attr.max_tx_queues =
+ clib_min (max_q_ext.max_tx_cq_num, max_q_ext.max_tx_sq_num);
+ port.rx_queue.config.max_size =
+ clib_min (max_q_ext.max_rx_cq_depth, max_q_ext.max_rx_sq_depth);
+ port.tx_queue.config.max_size =
+ clib_min (max_q_ext.max_tx_cq_depth, max_q_ext.max_tx_sq_depth);
+ }
+ else
+ {
+ log_err (dev, "device doesn't support MAX_QUEUES_EXT");
+ return VNET_DEV_ERR_UNSUPPORTED_DEVICE_VER;
+ }
+
+ if ((rv = ena_aenq_start (vm, dev)))
+ goto err;
+
+ port.port.attr.max_supported_rx_frame_size = ed->dev_attr.max_mtu;
+
+ if (ena_aq_feature_is_supported (dev, ENA_ADMIN_FEAT_ID_MTU))
+ port.port.attr.caps.change_max_rx_frame_size = 1;
+
+ vnet_dev_set_hw_addr_eth_mac (&port.port.attr.hw_addr,
+ ed->dev_attr.mac_addr);
+
+ return vnet_dev_port_add (vm, dev, 0, &port);
+
+err:
+ ena_free (vm, dev);
+ return rv;
+}
+
+static u8 *
+ena_probe (vlib_main_t *vm, vnet_dev_bus_index_t bus_index, void *dev_info)
+{
+ vnet_dev_bus_pci_device_info_t *di = dev_info;
+ const struct
+ {
+ u16 device_id;
+ char *description;
+ } ena_dev_types[] = {
+ { .device_id = 0x0ec2, .description = "Elastic Network Adapter (ENA) PF" },
+ { .device_id = 0xec20, .description = "Elastic Network Adapter (ENA) VF" },
+ };
+
+ if (di->vendor_id != 0x1d0f) /* AMAZON */
+ return 0;
+
+ FOREACH_ARRAY_ELT (dt, ena_dev_types)
+ {
+ if (dt->device_id == di->device_id)
+ return format (0, "%s", dt->description);
+ }
+
+ return 0;
+}
+
+VNET_DEV_REGISTER_DRIVER (ena) = {
+ .name = "ena",
+ .bus = "pci",
+ .device_data_sz = sizeof (ena_device_t),
+ .ops = {
+ .alloc = ena_alloc,
+ .init = ena_init,
+ .deinit = ena_deinit,
+ .free = ena_free,
+ .format_info = format_ena_dev_info,
+ .probe = ena_probe,
+ },
+};
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "dev_ena",
+};
diff --git a/src/plugins/dev_ena/ena.h b/src/plugins/dev_ena/ena.h
new file mode 100644
index 00000000000..4acb8d9625a
--- /dev/null
+++ b/src/plugins/dev_ena/ena.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_H_
+#define _ENA_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/types.h>
+#include <dev_ena/ena_defs.h>
+
+#define ENA_ADMIN_QUEUE_LOG2_DEPTH 2
+#define ENA_ASYNC_QUEUE_LOG2_DEPTH 5
+#define ENA_ADMIN_QUEUE_DEPTH (1 << ENA_ADMIN_QUEUE_LOG2_DEPTH)
+#define ENA_ASYNC_QUEUE_DEPTH (1 << ENA_ASYNC_QUEUE_LOG2_DEPTH)
+
+typedef struct
+{
+ u8 readless : 1;
+ u8 aq_started : 1;
+ u8 aenq_started : 1;
+ u8 llq : 1;
+
+ void *reg_bar;
+
+ /* mmio */
+ ena_mmio_resp_t *mmio_resp;
+
+ /* admin queue */
+ struct
+ {
+ ena_aq_sq_entry_t *sq_entries;
+ ena_aq_cq_entry_t *cq_entries;
+ u16 sq_next;
+ u16 cq_head;
+ u16 depth;
+ } aq;
+
+ /* host info */
+ ena_aq_host_info_t *host_info;
+
+ /* device info */
+ ena_aq_feat_device_attr_t dev_attr;
+
+ /* async event notification */
+ struct
+ {
+ ena_aenq_entry_t *entries;
+ u16 head;
+ u16 depth;
+ f64 last_keepalive;
+ u64 tx_drops, tx_drops0;
+ u64 rx_drops, rx_drops0;
+ } aenq;
+
+} ena_device_t;
+
+typedef struct
+{
+} ena_port_t;
+
+typedef struct
+{
+ u32 *buffer_indices;
+ u16 *compl_sqe_indices;
+ ena_rx_desc_t *sqes;
+ ena_rx_cdesc_t *cqes;
+ u32 *sq_db;
+ u32 sq_next;
+ u32 cq_next;
+ u16 cq_idx;
+ u16 sq_idx;
+ u16 n_compl_sqes;
+ u8 cq_created : 1;
+ u8 sq_created : 1;
+} ena_rxq_t;
+
+typedef struct
+{
+ u32 *buffer_indices;
+ ena_tx_desc_t *sqes;
+ ena_tx_llq_desc128_t *llq_descs;
+ ena_tx_cdesc_t *cqes;
+ u64 *sqe_templates;
+ u32 *sq_db;
+ u32 sq_tail;
+ u32 sq_head;
+ u32 cq_next;
+ u16 cq_idx;
+ u16 sq_idx;
+ u8 cq_created : 1;
+ u8 sq_created : 1;
+ u8 llq : 1;
+} ena_txq_t;
+
+typedef struct
+{
+ u16 qid;
+ u16 next_index;
+ u32 hw_if_index;
+ ena_rx_cdesc_status_t status;
+ u16 length;
+ u16 n_desc;
+ u16 req_id;
+} ena_rx_trace_t;
+
+/* admin.c */
+typedef struct
+{
+ char *name;
+ u8 version;
+ u8 data_sz;
+ u8 get;
+ u8 set;
+} ena_aq_feat_info_t;
+
+ena_aq_feat_info_t *ena_aq_get_feat_info (ena_aq_feature_id_t);
+vnet_dev_rv_t ena_aq_olloc (vlib_main_t *, vnet_dev_t *, u16);
+vnet_dev_rv_t ena_aq_start (vlib_main_t *, vnet_dev_t *);
+void ena_aq_stop (vlib_main_t *, vnet_dev_t *);
+void ena_aq_free (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t ena_aq_create_sq (vlib_main_t *, vnet_dev_t *,
+ ena_aq_create_sq_cmd_t *,
+ ena_aq_create_sq_resp_t *);
+vnet_dev_rv_t ena_aq_create_cq (vlib_main_t *, vnet_dev_t *,
+ ena_aq_create_cq_cmd_t *,
+ ena_aq_create_cq_resp_t *);
+vnet_dev_rv_t ena_aq_destroy_sq (vlib_main_t *, vnet_dev_t *,
+ ena_aq_destroy_sq_cmd_t *);
+vnet_dev_rv_t ena_aq_destroy_cq (vlib_main_t *, vnet_dev_t *,
+ ena_aq_destroy_cq_cmd_t *);
+vnet_dev_rv_t ena_aq_set_feature (vlib_main_t *, vnet_dev_t *,
+ ena_aq_feature_id_t, void *);
+vnet_dev_rv_t ena_aq_get_feature (vlib_main_t *, vnet_dev_t *,
+ ena_aq_feature_id_t, void *);
+vnet_dev_rv_t ena_aq_get_stats (vlib_main_t *, vnet_dev_t *,
+ ena_aq_stats_type_t, ena_aq_stats_scope_t, u16,
+ void *);
+
+/* aenq.c */
+vnet_dev_rv_t ena_aenq_olloc (vlib_main_t *, vnet_dev_t *, u16);
+vnet_dev_rv_t ena_aenq_start (vlib_main_t *, vnet_dev_t *);
+void ena_aenq_stop (vlib_main_t *, vnet_dev_t *);
+void ena_aenq_free (vlib_main_t *, vnet_dev_t *);
+
+/* reg.c */
+void ena_reg_write (vnet_dev_t *, ena_reg_t, void *);
+void ena_reg_read (vnet_dev_t *, ena_reg_t, const void *);
+void ena_reg_set_dma_addr (vlib_main_t *, vnet_dev_t *, u32, u32, void *);
+vnet_dev_rv_t ena_reg_reset (vlib_main_t *, vnet_dev_t *, ena_reset_reason_t);
+
+/* port.c */
+vnet_dev_rv_t ena_port_init (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t ena_port_start (vlib_main_t *, vnet_dev_port_t *);
+void ena_port_stop (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t ena_port_cfg_change (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t ena_port_cfg_change_validate (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+
+/* queue.c */
+vnet_dev_rv_t ena_rx_queue_alloc (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t ena_tx_queue_alloc (vlib_main_t *, vnet_dev_tx_queue_t *);
+void ena_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *);
+void ena_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *);
+vnet_dev_rv_t ena_rx_queue_start (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t ena_tx_queue_start (vlib_main_t *, vnet_dev_tx_queue_t *);
+void ena_rx_queue_stop (vlib_main_t *, vnet_dev_rx_queue_t *);
+void ena_tx_queue_stop (vlib_main_t *, vnet_dev_tx_queue_t *);
+
+/* format.c */
+format_function_t format_ena_dev_info;
+format_function_t format_ena_mem_addr;
+format_function_t format_ena_tx_desc;
+format_function_t format_ena_rx_trace;
+
+/* format_admin.c */
+format_function_t format_ena_aq_feat_desc;
+format_function_t format_ena_aq_feat_name;
+format_function_t format_ena_aq_opcode;
+format_function_t format_ena_aq_status;
+format_function_t format_ena_aq_feat_id_bitmap;
+format_function_t format_ena_aq_create_sq_cmd;
+format_function_t format_ena_aq_create_cq_cmd;
+format_function_t format_ena_aq_create_sq_resp;
+format_function_t format_ena_aq_create_cq_resp;
+format_function_t format_ena_aq_destroy_sq_cmd;
+format_function_t format_ena_aq_destroy_cq_cmd;
+format_function_t format_ena_aq_basic_stats;
+format_function_t format_ena_aq_eni_stats;
+
+#define foreach_ena_rx_node_counter \
+ _ (BUFFER_ALLOC, buffer_alloc, ERROR, "buffer alloc error")
+
+typedef enum
+{
+#define _(f, lf, t, s) ENA_RX_NODE_CTR_##f,
+ foreach_ena_rx_node_counter
+#undef _
+ ENA_RX_NODE_N_CTRS,
+} ena_rx_node_ctr_t;
+
+#define foreach_ena_tx_node_counter \
+ _ (CHAIN_TOO_LONG, chain_too_long, ERROR, "buffer chain too long") \
+ _ (NO_FREE_SLOTS, no_free_slots, ERROR, "no free tx slots")
+
+typedef enum
+{
+#define _(f, lf, t, s) ENA_TX_NODE_CTR_##f,
+ foreach_ena_tx_node_counter
+#undef _
+ ENA_TX_NODE_N_CTRS,
+} ena_tx_node_ctr_t;
+
+#define log_debug(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, ena_log.class, "%U" f, format_vnet_dev_log, \
+ (dev), clib_string_skip_prefix (__func__, "ena_"), ##__VA_ARGS__)
+#define log_info(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_INFO, ena_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_notice(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_NOTICE, ena_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_warn(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_WARNING, ena_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_err(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_ERR, ena_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+
+#endif /* _ENA_H_ */
diff --git a/src/plugins/dev_ena/ena_admin_defs.h b/src/plugins/dev_ena/ena_admin_defs.h
new file mode 100644
index 00000000000..6433a1563b8
--- /dev/null
+++ b/src/plugins/dev_ena/ena_admin_defs.h
@@ -0,0 +1,685 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_ADMIN_DEFS_H_
+#define _ENA_ADMIN_DEFS_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+
+#define foreach_ena_aq_opcode \
+ _ (1, CREATE_SQ) \
+ _ (2, DESTROY_SQ) \
+ _ (3, CREATE_CQ) \
+ _ (4, DESTROY_CQ) \
+ _ (8, GET_FEATURE) \
+ _ (9, SET_FEATURE) \
+ _ (11, GET_STATS)
+
+typedef enum
+{
+#define _(v, n) ENA_AQ_OPCODE_##n = (v),
+ foreach_ena_aq_opcode
+#undef _
+} __clib_packed ena_aq_opcode_t;
+
+#define foreach_ena_aq_compl_status \
+ _ (0, SUCCESS) \
+ _ (1, RESOURCE_ALLOCATION_FAILURE) \
+ _ (2, BAD_OPCODE) \
+ _ (3, UNSUPPORTED_OPCODE) \
+ _ (4, MALFORMED_REQUEST) \
+ _ (5, ILLEGAL_PARAMETER) \
+ _ (6, UNKNOWN_ERROR) \
+ _ (7, RESOURCE_BUSY)
+
+typedef enum
+{
+#define _(v, n) ENA_ADMIN_COMPL_STATUS_##n = (v),
+ foreach_ena_aq_compl_status
+#undef _
+} __clib_packed ena_aq_compl_status_t;
+
+/* id, versiom, get, set, name, struct */
+#define foreach_ena_aq_feature_id \
+ _ (1, 0, 1, 0, DEVICE_ATTRIBUTES, ena_aq_feat_device_attr_t) \
+ _ (2, 0, 1, 0, MAX_QUEUES_NUM, ena_aq_feat_max_queue_num_t) \
+ _ (3, 0, 1, 0, HW_HINTS, ena_aq_feat_hw_hints_t) \
+ _ (4, 0, 1, 1, LLQ, ena_aq_feat_llq_t) \
+ _ (5, 0, 1, 0, EXTRA_PROPERTIES_STRINGS, \
+ ena_aq_feat_extra_properties_strings_t) \
+ _ (6, 0, 1, 0, EXTRA_PROPERTIES_FLAGS, \
+ ena_aq_feat_extra_properties_flags_t) \
+ _ (7, 1, 1, 0, MAX_QUEUES_EXT, ena_aq_feat_max_queue_ext_t) \
+ _ (10, 0, 1, 1, RSS_HASH_FUNCTION, ena_aq_feat_rss_hash_function_t) \
+ _ (11, 0, 1, 0, STATELESS_OFFLOAD_CONFIG, \
+ ena_aq_feat_stateless_offload_config_t) \
+ _ (12, 0, 1, 1, RSS_INDIRECTION_TABLE_CONFIG, \
+ ena_aq_feat_rss_ind_table_config_t) \
+ _ (14, 0, 0, 1, MTU, ena_aq_feat_mtu_t) \
+ _ (18, 0, 1, 1, RSS_HASH_INPUT, ena_aq_feat_rss_hash_input_t) \
+ _ (20, 0, 1, 0, INTERRUPT_MODERATION, ena_aq_feat_intr_moder_t) \
+ _ (26, 0, 1, 1, AENQ_CONFIG, ena_aq_feat_aenq_config_t) \
+ _ (27, 0, 1, 0, LINK_CONFIG, ena_aq_feat_link_config_t) \
+ _ (28, 0, 0, 1, HOST_ATTR_CONFIG, ena_aq_feat_host_attr_config_t) \
+ _ (29, 0, 1, 1, PHC_CONFIG, ena_aq_feat_phc_config_t)
+
+typedef enum
+{
+#define _(v, ver, r, w, n, s) ENA_ADMIN_FEAT_ID_##n = (v),
+ foreach_ena_aq_feature_id
+#undef _
+} __clib_packed ena_aq_feature_id_t;
+
+#define foreach_ena_aq_stats_type \
+ _ (0, BASIC) \
+ _ (1, EXTENDED) \
+ _ (2, ENI)
+
+#define foreach_ena_aq_stats_scope \
+ _ (0, SPECIFIC_QUEUE) \
+ _ (1, ETH_TRAFFIC)
+
+typedef enum
+{
+#define _(v, n) ENA_ADMIN_STATS_TYPE_##n = (v),
+ foreach_ena_aq_stats_type
+#undef _
+} __clib_packed ena_aq_stats_type_t;
+
+typedef enum
+{
+#define _(v, n) ENA_ADMIN_STATS_SCOPE_##n = (v),
+ foreach_ena_aq_stats_scope
+#undef _
+} __clib_packed ena_aq_stats_scope_t;
+
+typedef struct
+{
+ u32 addr_lo;
+ u16 addr_hi;
+ u16 _reserved_16;
+} ena_mem_addr_t;
+
+#define foreach_ena_aq_aenq_groups \
+ _ (link_change) \
+ _ (fatal_error) \
+ _ (warning) \
+ _ (notification) \
+ _ (keep_alive) \
+ _ (refresh_capabilities) \
+ _ (conf_notifications)
+
+typedef union
+{
+ struct
+ {
+#define _(g) u32 g : 1;
+ foreach_ena_aq_aenq_groups
+#undef _
+ };
+ u32 as_u32;
+} ena_aq_aenq_groups_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_aenq_groups_t, 4);
+
+typedef struct
+{
+ u32 length;
+ ena_mem_addr_t addr;
+} ena_aq_aq_ctrl_buff_info_t;
+
+typedef struct
+{
+ u32 impl_id;
+ u32 device_version;
+ u32 supported_features;
+ u32 _reserved3;
+ u32 phys_addr_width;
+ u32 virt_addr_width;
+ u8 mac_addr[6];
+ u8 _reserved7[2];
+ u32 max_mtu;
+} ena_aq_feat_device_attr_t;
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u16 l3_sort : 1;
+ u16 l4_sort : 1;
+ };
+ u16 supported_input_sort;
+ };
+ union
+ {
+ struct
+ {
+ u16 enable_l3_sort : 1;
+ u16 enable_l4_sort : 1;
+ };
+ u16 enabled_input_sort;
+ };
+} ena_aq_feat_rss_hash_input_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_feat_rss_hash_input_t, 4);
+
+typedef struct
+{
+ u16 intr_delay_resolution;
+ u16 reserved;
+} ena_aq_feat_intr_moder_t;
+
+typedef struct
+{
+ ena_aq_aenq_groups_t supported_groups;
+ ena_aq_aenq_groups_t enabled_groups;
+} ena_aq_feat_aenq_config_t;
+
+#define foreach_ena_aq_link_types \
+ _ (0, 1000, 1G) \
+ _ (1, 2500, 2_5G) \
+ _ (2, 5000, 5G) \
+ _ (3, 10000, 10G) \
+ _ (4, 25000, 25G) \
+ _ (5, 40000, 40G) \
+ _ (6, 50000, 50G) \
+ _ (7, 100000, 100G) \
+ _ (8, 200000, 200G) \
+ _ (9, 400000, 400G)
+
+typedef enum
+{
+#define _(b, v, n) ENA_ADMIN_LINK_TYPE_##n = (1U << b),
+ foreach_ena_aq_link_types
+#undef _
+} ena_aq_link_types_t;
+
+typedef struct
+{
+ u32 speed;
+ ena_aq_link_types_t supported;
+ u32 autoneg : 1;
+ u32 duplex : 1;
+} ena_aq_feat_link_config_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_feat_link_config_t, 12);
+
+typedef struct
+{
+ u32 tx;
+ u32 rx_supported;
+ u32 rx_enabled;
+} ena_aq_feat_stateless_offload_config_t;
+
+typedef struct
+{
+ u16 cq_idx;
+ u16 reserved;
+} ena_aq_feat_rss_ind_table_entry_t;
+
+typedef struct
+{
+ u16 min_size;
+ u16 max_size;
+ u16 size;
+ u8 one_entry_update : 1;
+ u8 reserved;
+ u32 inline_index;
+ ena_aq_feat_rss_ind_table_entry_t inline_entry;
+} ena_aq_feat_rss_ind_table_config_t;
+
+typedef struct
+{
+ u32 mtu;
+} ena_aq_feat_mtu_t;
+
+typedef struct
+{
+ u32 count;
+} ena_aq_feat_extra_properties_strings_t;
+
+typedef struct
+{
+ u32 flags;
+} ena_aq_feat_extra_properties_flags_t;
+
+typedef struct
+{
+ u32 max_sq_num;
+ u32 max_sq_depth;
+ u32 max_cq_num;
+ u32 max_cq_depth;
+ u32 max_legacy_llq_num;
+ u32 max_legacy_llq_depth;
+ u32 max_header_size;
+ u16 max_packet_tx_descs;
+ u16 max_packet_rx_descs;
+} ena_aq_feat_max_queue_num_t;
+
+typedef struct
+{
+ u16 mmio_read_timeout;
+ u16 driver_watchdog_timeout;
+ u16 missing_tx_completion_timeout;
+ u16 missed_tx_completion_count_threshold_to_reset;
+ u16 admin_completion_tx_timeout;
+ u16 netdev_wd_timeout;
+ u16 max_tx_sgl_size;
+ u16 max_rx_sgl_size;
+ u16 reserved[8];
+} ena_aq_feat_hw_hints_t;
+
+typedef struct
+{
+ u8 version;
+ u8 _reserved1[3];
+ u32 max_tx_sq_num;
+ u32 max_tx_cq_num;
+ u32 max_rx_sq_num;
+ u32 max_rx_cq_num;
+ u32 max_tx_sq_depth;
+ u32 max_tx_cq_depth;
+ u32 max_rx_sq_depth;
+ u32 max_rx_cq_depth;
+ u32 max_tx_header_size;
+ u16 max_per_packet_tx_descs;
+ u16 max_per_packet_rx_descs;
+} ena_aq_feat_max_queue_ext_t;
+
+typedef struct
+{
+ u32 supported_func;
+ u32 selected_func;
+ u32 init_val;
+} ena_aq_feat_rss_hash_function_t;
+
+typedef struct
+{
+ ena_mem_addr_t os_info_ba;
+ ena_mem_addr_t debug_ba;
+ u32 debug_area_size;
+} ena_aq_feat_host_attr_config_t;
+
+typedef struct
+{
+ u8 type;
+ u8 reserved1[3];
+ u32 doorbell_offset;
+ u32 expire_timeout_usec;
+ u32 block_timeout_usec;
+ ena_mem_addr_t output_address;
+ u32 output_length;
+} ena_aq_feat_phc_config_t;
+
+typedef struct
+{
+ u32 max_llq_num;
+ u32 max_llq_depth;
+ u16 header_location_ctrl_supported;
+ u16 header_location_ctrl_enabled;
+ u16 entry_size_ctrl_supported;
+ u16 entry_size_ctrl_enabled;
+ u16 desc_num_before_header_supported;
+ u16 desc_num_before_header_enabled;
+ u16 descriptors_stride_ctrl_supported;
+ u16 descriptors_stride_ctrl_enabled;
+ union
+ {
+ struct
+ {
+ u16 supported_flags;
+ u16 max_tx_burst_size;
+ } get;
+ struct
+ {
+ u16 enabled_flags;
+ } set;
+ } accel_mode;
+} ena_aq_feat_llq_t;
+
+typedef struct
+{
+ /* feat common */
+ u8 flags;
+ ena_aq_feature_id_t feature_id;
+ u8 feature_version;
+ u8 _reserved;
+} ena_aq_get_set_feature_common_desc_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_get_set_feature_common_desc_t, 4);
+
+typedef struct
+{
+ ena_aq_aq_ctrl_buff_info_t control_buffer;
+ ena_aq_stats_type_t type;
+ ena_aq_stats_scope_t scope;
+ u16 _reserved3;
+ u16 queue_idx;
+ u16 device_id;
+} ena_aq_get_stats_cmd_t;
+STATIC_ASSERT_SIZEOF (ena_aq_get_stats_cmd_t, 20);
+
+typedef enum
+{
+ ENA_ADMIN_SQ_DIRECTION_TX = 1,
+ ENA_ADMIN_SQ_DIRECTION_RX = 2,
+} ena_aq_sq_direction_t;
+
+typedef enum
+{
+ ENA_ADMIN_SQ_PLACEMENT_POLICY_HOST = 1,
+ ENA_ADMIN_SQ_PLACEMENT_POLICY_DEVICE = 3,
+} ena_aq_sq_placement_policy_t;
+
+typedef enum
+{
+ ENA_ADMIN_SQ_COMPLETION_POLICY_DESC = 0,
+ ENA_ADMIN_SQ_COMPLETION_POLICY_DESC_ON_DEMAND = 1,
+ ENA_ADMIN_SQ_COMPLETION_POLICY_HEAD_ON_DEMAND = 2,
+ ENA_ADMIN_SQ_COMPLETION_POLICY_HEAD = 3,
+} ena_aq_completion_policy_t;
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u8 _reserved0_0 : 5;
+ u8 sq_direction : 3; /* ena_aq_sq_direction_t */
+ };
+ u8 sq_identity;
+ };
+
+ u8 _reserved1;
+
+ union
+ {
+ struct
+ {
+ u8 placement_policy : 4; /* ena_aq_sq_placement_policy_t */
+ u8 completion_policy : 3; /* ena_aq_completion_policy_t */
+ u8 _reserved2_7 : 1;
+ };
+ u8 sq_caps_2;
+ };
+
+ union
+ {
+ struct
+ {
+ u8 is_physically_contiguous : 1;
+ u8 _reserved3_1 : 7;
+ };
+ u8 sq_caps_3;
+ };
+
+ u16 cq_idx;
+ u16 sq_depth;
+ ena_mem_addr_t sq_ba;
+ ena_mem_addr_t sq_head_writeback; /* used if completion_policy is 2 or 3 */
+ u32 _reserved0_w7;
+ u32 _reserved0_w8;
+} ena_aq_create_sq_cmd_t;
+
+typedef struct
+{
+ u16 sq_idx;
+ u16 _reserved;
+ u32 sq_doorbell_offset; /* REG BAR offset of queue dorbell */
+ u32 llq_descriptors_offset; /* LLQ MEM BAR offset of descriptors */
+ u32 llq_headers_offset; /* LLQ MEM BAR offset of headers mem */
+} ena_aq_create_sq_resp_t;
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u8 _reserved0_0 : 5;
+ u8 interrupt_mode_enabled : 1;
+ u8 _reserved0_6 : 2;
+ };
+ u8 cq_caps_1;
+ };
+
+ union
+ {
+ struct
+ {
+ u8 cq_entry_size_words : 4;
+ u8 _reserved1_4 : 4;
+ };
+ u8 cq_caps_2;
+ };
+
+ u16 cq_depth;
+ u32 msix_vector;
+ ena_mem_addr_t cq_ba;
+} ena_aq_create_cq_cmd_t;
+
+typedef struct
+{
+ u16 cq_idx;
+ u16 cq_actual_depth;
+ u32 numa_node_register_offset;
+ u32 cq_head_db_register_offset;
+ u32 cq_interrupt_unmask_register_offset;
+} ena_aq_create_cq_resp_t;
+
+typedef struct
+{
+ u16 sq_idx;
+ union
+ {
+ struct
+ {
+ u8 _reserved : 5;
+ u8 sq_direction : 3; /* ena_aq_sq_direction_t */
+ };
+ u8 sq_identity;
+ };
+ u8 _reserved1;
+} ena_aq_destroy_sq_cmd_t;
+
+typedef struct
+{
+ u16 cq_idx;
+ u16 _reserved1;
+} ena_aq_destroy_cq_cmd_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_create_sq_cmd_t, 32);
+STATIC_ASSERT_SIZEOF (ena_aq_create_sq_resp_t, 16);
+STATIC_ASSERT_SIZEOF (ena_aq_create_cq_cmd_t, 16);
+STATIC_ASSERT_SIZEOF (ena_aq_create_cq_resp_t, 16);
+STATIC_ASSERT_SIZEOF (ena_aq_destroy_sq_cmd_t, 4);
+STATIC_ASSERT_SIZEOF (ena_aq_destroy_cq_cmd_t, 4);
+
+typedef struct
+{
+ /* common desc */
+ u16 command_id;
+ ena_aq_opcode_t opcode;
+
+ union
+ {
+ struct
+ {
+ u8 phase : 1;
+ u8 ctrl_data : 1;
+ u8 ctrl_data_indirect : 1;
+ u8 _reserved_3_3 : 5;
+ };
+ u8 flags;
+ };
+
+ u32 data[15];
+} ena_aq_sq_entry_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_sq_entry_t, 64);
+
+typedef struct
+{
+ u32 os_type;
+ u8 os_dist_str[128];
+ u32 os_dist;
+ u8 kernel_ver_str[32];
+ u32 kernel_ver;
+
+ struct
+ {
+ u8 major;
+ u8 minor;
+ u8 sub_minor;
+ u8 module_type;
+ } driver_version;
+
+ u32 supported_network_features[2];
+
+ struct
+ {
+ u16 minor : 8;
+ u16 major : 8;
+ } ena_spec_version;
+
+ struct
+ {
+ u16 function : 3;
+ u16 device : 5;
+ u16 bus : 8;
+ } bdf;
+
+ u16 num_cpus;
+ u16 _reserved;
+
+ union
+ {
+ struct
+ {
+ u32 _reserved0 : 1;
+ u32 rx_offset : 1;
+ u32 interrupt_moderation : 1;
+ u32 rx_buf_mirroring : 1;
+ u32 rss_configurable_function_key : 1;
+ u32 _reserved5 : 1;
+ u32 rx_page_reuse : 1;
+ u32 tx_ipv6_csum_offload : 1;
+ u32 _reserved8 : 24;
+ };
+ u32 as_u32;
+ } driver_supported_features;
+
+} ena_aq_host_info_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_host_info_t, 196);
+
+typedef struct
+{
+ union
+ {
+ u64 tx_bytes;
+ struct
+ {
+ u32 tx_bytes_low;
+ u32 tx_bytes_high;
+ };
+ };
+ union
+ {
+ u64 tx_pkts;
+ struct
+ {
+ u32 tx_pkts_low;
+ u32 tx_pkts_high;
+ };
+ };
+ union
+ {
+ u64 rx_bytes;
+ struct
+ {
+ u32 rx_bytes_low;
+ u32 rx_bytes_high;
+ };
+ };
+ union
+ {
+ u64 rx_pkts;
+ struct
+ {
+ u32 rx_pkts_low;
+ u32 rx_pkts_high;
+ };
+ };
+ union
+ {
+ u64 rx_drops;
+ struct
+ {
+ u32 rx_drops_low;
+ u32 rx_drops_high;
+ };
+ };
+ union
+ {
+ u64 tx_drops;
+ struct
+ {
+ u32 tx_drops_low;
+ u32 tx_drops_high;
+ };
+ };
+} ena_aq_basic_stats_t;
+
+#define foreach_ena_aq_basic_counter \
+ _ (rx_pkts, "RX Packets") \
+ _ (tx_pkts, "TX Packets") \
+ _ (rx_bytes, "RX Bytes") \
+ _ (tx_bytes, "TX Bytes") \
+ _ (rx_drops, "RX Packet Drops") \
+ _ (tx_drops, "TX Packet Drops")
+
+typedef struct
+{
+ u64 bw_in_allowance_exceeded;
+ u64 bw_out_allowance_exceeded;
+ u64 pps_allowance_exceeded;
+ u64 conntrack_allowance_exceeded;
+ u64 linklocal_allowance_exceeded;
+} ena_aq_eni_stats_t;
+
+#define foreach_ena_aq_eni_counter \
+ _ (bw_in_allowance_exceeded, "Input BW Allowance Exceeded") \
+ _ (bw_out_allowance_exceeded, "Output BW Allowance Exceeded") \
+ _ (pps_allowance_exceeded, "PPS Allowance Exceeded") \
+ _ (conntrack_allowance_exceeded, "ConnTrack Allowance Exceeded") \
+ _ (linklocal_allowance_exceeded, "LinkLocal Allowance Exceeded")
+
+typedef struct
+{
+ /* common desc */
+ u16 command;
+ ena_aq_compl_status_t status;
+ union
+ {
+ struct
+ {
+ u8 phase : 1;
+ u8 _reserved3_1 : 7;
+ };
+ u8 flags;
+ };
+ u16 extended_status;
+ u16 sq_head_indx;
+
+ u32 data[14];
+} ena_aq_cq_entry_t;
+
+STATIC_ASSERT_SIZEOF (ena_aq_cq_entry_t, 64);
+
+#endif /* _ENA_ADMIN_DEFS_H_ */
diff --git a/src/plugins/dev_ena/ena_aenq_defs.h b/src/plugins/dev_ena/ena_aenq_defs.h
new file mode 100644
index 00000000000..4530f5e7a42
--- /dev/null
+++ b/src/plugins/dev_ena/ena_aenq_defs.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_AENQ_DEFS_H_
+#define _ENA_AENQ_DEFS_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+
+#define foreach_aenq_group \
+ _ (0, LINK_CHANGE) \
+ _ (1, FATAL_ERROR) \
+ _ (2, WARNING) \
+ _ (3, NOTIFICATION) \
+ _ (4, KEEP_ALIVE) \
+ _ (5, REFRESH_CAPABILITIES) \
+ _ (6, CONF_NOTIFICATIONS)
+
+#define foreach_aenq_syndrome \
+ _ (0, SUSPEND) \
+ _ (1, RESUME) \
+ _ (2, UPDATE_HINTS)
+
+typedef enum
+{
+#define _(v, n) ENA_AENQ_GROUP_##n = (v),
+ foreach_aenq_group
+#undef _
+} ena_aenq_group_t;
+
+typedef enum
+{
+#define _(v, n) ENA_AENQ_SYNDROME_##n = (v),
+ foreach_aenq_syndrome
+#undef _
+} ena_aenq_syndrome_t;
+
+typedef struct
+{
+ ena_aenq_group_t group : 16;
+ ena_aenq_syndrome_t syndrome : 16;
+
+ union
+ {
+ struct
+ {
+ u8 phase : 1;
+ };
+ u8 flags;
+ };
+ u8 reserved1[3];
+
+ union
+ {
+ u64 timestamp;
+ struct
+ {
+ u32 timestamp_low;
+ u32 timestamp_high;
+ };
+ };
+
+ union
+ {
+ u32 data[12];
+
+ struct
+ {
+ union
+ {
+ struct
+ {
+ u32 link_status : 1;
+ };
+ u32 flags;
+ };
+ } link_change;
+
+ struct
+ {
+ union
+ {
+ u64 rx_drops;
+ struct
+ {
+ u32 rx_drops_low;
+ u32 rx_drops_high;
+ };
+ };
+
+ union
+ {
+ u64 tx_drops;
+ struct
+ {
+ u32 tx_drops_low;
+ u32 tx_drops_high;
+ };
+ };
+ } keep_alive;
+ };
+} __clib_packed ena_aenq_entry_t;
+
+STATIC_ASSERT_SIZEOF (ena_aenq_entry_t, 64);
+
+#endif /* _ENA_AENQ_DEFS_H_ */
diff --git a/src/plugins/dev_ena/ena_defs.h b/src/plugins/dev_ena/ena_defs.h
new file mode 100644
index 00000000000..1e52ed4e05b
--- /dev/null
+++ b/src/plugins/dev_ena/ena_defs.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_DEFS_H_
+#define _ENA_DEFS_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <dev_ena/ena_reg_defs.h>
+#include <dev_ena/ena_admin_defs.h>
+#include <dev_ena/ena_aenq_defs.h>
+#include <dev_ena/ena_io_defs.h>
+
+/*
+ * MMIO Response
+ */
+typedef struct
+{
+ u16 req_id;
+ u16 reg_off;
+ u32 reg_val;
+} ena_mmio_resp_t;
+
+#endif /* _ENA_DEFS_H_ */
diff --git a/src/plugins/dev_ena/ena_inlines.h b/src/plugins/dev_ena/ena_inlines.h
new file mode 100644
index 00000000000..106bd5eaa21
--- /dev/null
+++ b/src/plugins/dev_ena/ena_inlines.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_INLINES_H_
+#define _ENA_INLINES_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <dev_ena/ena.h>
+
+#define ena_log_is_debug() \
+ vlib_log_is_enabled (VLIB_LOG_LEVEL_DEBUG, ena_log.class)
+
+#define ena_stats_log_err(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_ERR, ena_stats_log.class, "%U: " f, \
+ format_vnet_dev_addr, dev, ##__VA_ARGS__)
+
+#define ena_stats_log_debug(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, ena_stats_log.class, "%U: " f, \
+ format_vnet_dev_addr, dev, ##__VA_ARGS__)
+
+#define ena_stats_log_is_debug() \
+ vlib_log_is_enabled (VLIB_LOG_LEVEL_DEBUG, ena_stats_log.class)
+
+static_always_inline void
+ena_set_mem_addr (vlib_main_t *vm, vnet_dev_t *dev, ena_mem_addr_t *m, void *p)
+{
+ u64 pa = vnet_dev_get_dma_addr (vm, dev, p);
+ *m = (ena_mem_addr_t){ .addr_lo = (u32) pa, .addr_hi = (u16) (pa >> 32) };
+}
+
+static_always_inline int
+ena_aq_feature_is_supported (vnet_dev_t *dev, ena_aq_feature_id_t feat_id)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ return (ed->dev_attr.supported_features & (1U << feat_id)) != 0;
+}
+
+#endif /* ENA_INLINES_H */
diff --git a/src/plugins/dev_ena/ena_io_defs.h b/src/plugins/dev_ena/ena_io_defs.h
new file mode 100644
index 00000000000..89ca2ac6498
--- /dev/null
+++ b/src/plugins/dev_ena/ena_io_defs.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_IO_DEFS_H_
+#define _ENA_IO_DEFS_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/vector.h>
+
+typedef struct
+{
+ u16 length; /* 0 = 64K */
+ u8 reserved2;
+ union
+ {
+ struct
+ {
+ u8 phase : 1;
+ u8 reserved1 : 1;
+ u8 first : 1; /* first descriptor in transaction */
+ u8 last : 1; /* last descriptor in transaction */
+ u8 comp_req : 1; /* should completion be posted? */
+ u8 reserved5 : 1;
+ u8 reserved67 : 2;
+ };
+ u8 ctrl;
+ };
+ u16 req_id;
+ u16 reserved6;
+} ena_rx_desc_lo_t;
+
+STATIC_ASSERT_SIZEOF (ena_rx_desc_lo_t, 8);
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ ena_rx_desc_lo_t lo;
+ u32 buff_addr_lo;
+ u16 buff_addr_hi;
+ u16 reserved16_w3;
+ };
+ u64x2 as_u64x2;
+ };
+} ena_rx_desc_t;
+
+STATIC_ASSERT_SIZEOF (ena_rx_desc_t, 16);
+
+#define foreach_ena_rx_cdesc_status \
+ _ (5, l3_proto_idx) \
+ _ (2, src_vlan_cnt) \
+ _ (1, _reserved7) \
+ _ (5, l4_proto_idx) \
+ _ (1, l3_csum_err) \
+ _ (1, l4_csum_err) \
+ _ (1, ipv4_frag) \
+ _ (1, l4_csum_checked) \
+ _ (7, _reserved17) \
+ _ (1, phase) \
+ _ (1, l3_csum2) \
+ _ (1, first) \
+ _ (1, last) \
+ _ (2, _reserved28) \
+ _ (1, buffer) \
+ _ (1, _reserved31)
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+#define _(b, n) u32 n : (b);
+ foreach_ena_rx_cdesc_status
+#undef _
+ };
+ u32 as_u32;
+ };
+} ena_rx_cdesc_status_t;
+
+typedef struct
+{
+ ena_rx_cdesc_status_t status;
+ u16 length;
+ u16 req_id;
+ u32 hash;
+ u16 sub_qid;
+ u8 offset;
+ u8 reserved;
+} ena_rx_cdesc_t;
+
+STATIC_ASSERT_SIZEOF (ena_rx_cdesc_t, 16);
+
+#define foreach_ena_tx_desc \
+ /* len_ctrl */ \
+ _ (16, length) \
+ _ (6, req_id_hi) \
+ _ (1, _reserved0_22) \
+ _ (1, meta_desc) \
+ _ (1, phase) \
+ _ (1, _reserved0_25) \
+ _ (1, first) \
+ _ (1, last) \
+ _ (1, comp_req) \
+ _ (2, _reserved0_29) \
+ _ (1, _reserved0_31) \
+ /* meta_ctrl */ \
+ _ (4, l3_proto_idx) \
+ _ (1, df) \
+ _ (2, _reserved1_5) \
+ _ (1, tso_en) \
+ _ (5, l4_proto_idx) \
+ _ (1, l3_csum_en) \
+ _ (1, l4_csum_en) \
+ _ (1, ethernet_fcs_dis) \
+ _ (1, _reserved1_16) \
+ _ (1, l4_csum_partial) \
+ _ (3, _reserved_1_18) \
+ _ (1, _reserved_1_21) \
+ _ (10, req_id_lo)
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+#define _(b, n) u32 n : (b);
+ foreach_ena_tx_desc
+#undef _
+ u32 buff_addr_lo;
+ u16 buff_addr_hi;
+ u8 _reserved3_16;
+ u8 header_length;
+ };
+
+ u16x8 as_u16x8;
+ u32x4 as_u32x4;
+ u64x2 as_u64x2;
+ };
+} ena_tx_desc_t;
+
+STATIC_ASSERT_SIZEOF (ena_tx_desc_t, 16);
+
+typedef struct
+{
+ ena_tx_desc_t desc[2];
+ u8 data[96];
+} __clib_aligned (128)
+ena_tx_llq_desc128_t;
+STATIC_ASSERT_SIZEOF (ena_tx_llq_desc128_t, 128);
+
+typedef union
+{
+ struct
+ {
+ u16 req_id;
+ u8 status;
+ union
+ {
+ struct
+ {
+ u8 phase : 1;
+ };
+ u8 flags;
+ };
+ u16 sub_qid;
+ u16 sq_head_idx;
+ };
+ u64 as_u64;
+} ena_tx_cdesc_t;
+
+STATIC_ASSERT_SIZEOF (ena_tx_cdesc_t, 8);
+
+#endif /* _ENA_IO_DEFS_H_ */
diff --git a/src/plugins/dev_ena/ena_reg_defs.h b/src/plugins/dev_ena/ena_reg_defs.h
new file mode 100644
index 00000000000..11d458e21ac
--- /dev/null
+++ b/src/plugins/dev_ena/ena_reg_defs.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _ENA_REG_DEFS_H_
+#define _ENA_REG_DEFS_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+
+#define ena_reg_version_t_fields \
+ __ (8, minor) \
+ __ (8, major)
+
+#define ena_reg_controller_version_t_fields \
+ __ (8, subminor) \
+ __ (8, minor) \
+ __ (8, major) \
+ __ (8, impl_id)
+
+#define ena_reg_caps_t_fields \
+ __ (1, contiguous_queue_required) \
+ __ (5, reset_timeout) \
+ __ (2, _unused) \
+ __ (8, dma_addr_width) \
+ __ (4, admin_cmd_to)
+
+#define ena_reg_aq_caps_t_fields \
+ __ (16, depth) \
+ __ (16, entry_size)
+
+#define ena_reg_acq_caps_t_fields \
+ __ (16, depth) \
+ __ (16, entry_size)
+
+#define ena_reg_aenq_caps_t_fields \
+ __ (16, depth) \
+ __ (16, entry_size)
+
+#define ena_reg_dev_ctl_t_fields \
+ __ (1, dev_reset) \
+ __ (1, aq_restart) \
+ __ (1, quiescent) \
+ __ (1, io_resume) \
+ __ (24, _unused) \
+ __ (4, reset_reason)
+
+#define ena_reg_dev_sts_t_fields \
+ __ (1, ready) \
+ __ (1, aq_restart_in_progress) \
+ __ (1, aq_restart_finished) \
+ __ (1, reset_in_progress) \
+ __ (1, reset_finished) \
+ __ (1, fatal_error) \
+ __ (1, quiescent_state_in_progress) \
+ __ (1, quiescent_state_achieved)
+
+#define ena_reg_mmio_reg_read_t_fields \
+ __ (16, req_id) \
+ __ (16, reg_off)
+
+#define ena_reg_rss_ind_entry_update_t_fields \
+ __ (16, index) \
+ __ (16, cx_idx)
+
+#define __(l, f) u32 f : l;
+#define _(n) \
+ typedef union \
+ { \
+ struct \
+ { \
+ n##_fields; \
+ }; \
+ u32 as_u32; \
+ } n;
+
+_ (ena_reg_version_t)
+_ (ena_reg_controller_version_t)
+_ (ena_reg_caps_t)
+_ (ena_reg_aq_caps_t)
+_ (ena_reg_acq_caps_t)
+_ (ena_reg_aenq_caps_t)
+_ (ena_reg_dev_ctl_t)
+_ (ena_reg_dev_sts_t)
+_ (ena_reg_mmio_reg_read_t)
+_ (ena_reg_rss_ind_entry_update_t)
+#undef _
+#undef __
+
+#define foreach_ena_reg \
+ _ (0x00, 1, VERSION, ena_reg_version_t_fields) \
+ _ (0x04, 1, CONTROLLER_VERSION, ena_reg_controller_version_t_fields) \
+ _ (0x08, 1, CAPS, ena_reg_caps_t_fields) \
+ _ (0x0c, 1, EXT_CAPS, ) \
+ _ (0x10, 1, AQ_BASE_LO, ) \
+ _ (0x14, 1, AQ_BASE_HI, ) \
+ _ (0x18, 1, AQ_CAPS, ena_reg_aq_caps_t_fields) \
+ _ (0x20, 1, ACQ_BASE_LO, ) \
+ _ (0x24, 1, ACQ_BASE_HI, ) \
+ _ (0x28, 1, ACQ_CAPS, ena_reg_acq_caps_t_fields) \
+ _ (0x2c, 0, AQ_DB, ) \
+ _ (0x30, 0, ACQ_TAIL, ) \
+ _ (0x34, 1, AENQ_CAPS, ena_reg_aenq_caps_t_fields) \
+ _ (0x38, 0, AENQ_BASE_LO, ) \
+ _ (0x3c, 0, AENQ_BASE_HI, ) \
+ _ (0x40, 0, AENQ_HEAD_DB, ) \
+ _ (0x44, 0, AENQ_TAIL, ) \
+ _ (0x4c, 1, INTR_MASK, ) \
+ _ (0x54, 0, DEV_CTL, ena_reg_dev_ctl_t_fields) \
+ _ (0x58, 1, DEV_STS, ena_reg_dev_sts_t_fields) \
+ _ (0x5c, 0, MMIO_REG_READ, ena_reg_mmio_reg_read_t_fields) \
+ _ (0x60, 0, MMIO_RESP_LO, ) \
+ _ (0x64, 0, MMIO_RESP_HI, ) \
+ _ (0x68, 0, RSS_IND_ENTRY_UPDATE, ena_reg_rss_ind_entry_update_t_fields)
+
+typedef enum
+{
+#define _(o, r, n, f) ENA_REG_##n = o,
+ foreach_ena_reg
+#undef _
+} ena_reg_t;
+
+#define foreach_ena_reset_reason \
+ _ (0, NORMAL) \
+ _ (1, KEEP_ALIVE_TO) \
+ _ (2, ADMIN_TO) \
+ _ (3, MISS_TX_CMPL) \
+ _ (4, INV_RX_REQ_ID) \
+ _ (5, INV_TX_REQ_ID) \
+ _ (6, TOO_MANY_RX_DESCS) \
+ _ (7, INIT_ERR) \
+ _ (8, DRIVER_INVALID_STATE) \
+ _ (9, OS_TRIGGER) \
+ _ (10, OS_NETDEV_WD) \
+ _ (11, SHUTDOWN) \
+ _ (12, USER_TRIGGER) \
+ _ (13, GENERIC) \
+ _ (14, MISS_INTERRUPT) \
+ _ (15, SUSPECTED_POLL_STARVATION) \
+ _ (16, RX_DESCRIPTOR_MALFORMED) \
+ _ (17, TX_DESCRIPTOR_MALFORMED)
+
+typedef enum
+{
+#define _(o, n) ENA_RESET_REASON_##n = o,
+ foreach_ena_reset_reason
+#undef _
+} ena_reset_reason_t;
+
+#endif /* _ENA_REG_DEFS_H_ */
diff --git a/src/plugins/dev_ena/format.c b/src/plugins/dev_ena/format.c
new file mode 100644
index 00000000000..2db52b50f66
--- /dev/null
+++ b/src/plugins/dev_ena/format.c
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vlib/pci/pci.h"
+#include "vnet/error.h"
+#include "vppinfra/error.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_defs.h>
+
+u8 *
+format_ena_dev_info (u8 *s, va_list *args)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_format_args_t __clib_unused *a =
+ va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ u32 indent = format_get_indent (s) + 2;
+
+ format (s, "Elastic Network Adapter:");
+ format (s, "\n%UDevice version is %u, implementation id is %u",
+ format_white_space, indent, ed->dev_attr.device_version,
+ ed->dev_attr.impl_id);
+ format (s, "\n%Urx drops %lu, tx drops %lu", format_white_space, indent,
+ ed->aenq.rx_drops, ed->aenq.tx_drops);
+ format (s, "\n%ULast keepalive arrived ", format_white_space, indent);
+ if (ed->aenq.last_keepalive != 0.0)
+ format (s, "%.2f seconds ago",
+ vlib_time_now (vm) - ed->aenq.last_keepalive);
+ else
+ format (s, "never");
+ return s;
+}
+
+u8 *
+format_ena_mem_addr (u8 *s, va_list *args)
+{
+ ena_mem_addr_t *ema = va_arg (*args, ena_mem_addr_t *);
+ return format (s, "0x%lx", (u64) ema->addr_hi << 32 | ema->addr_lo);
+}
+
+u8 *
+format_ena_tx_desc (u8 *s, va_list *args)
+{
+ ena_tx_desc_t *d = va_arg (*args, ena_tx_desc_t *);
+ s =
+ format (s, "addr 0x%012lx", (u64) d->buff_addr_hi << 32 | d->buff_addr_lo);
+ s = format (s, " len %u", d->length);
+ s = format (s, " req_id 0x%x", d->req_id_lo | d->req_id_hi << 10);
+ if (d->header_length)
+ s = format (s, " hdr_len %u", d->header_length);
+#define _(v, n) \
+ if ((v) < 6 && #n[0] != '_' && d->n) \
+ s = format (s, " " #n " %u", d->n);
+ foreach_ena_tx_desc
+#undef _
+ return s;
+}
+
+u8 *
+format_ena_rx_desc_status (u8 *s, va_list *args)
+{
+ ena_rx_cdesc_status_t st = va_arg (*args, ena_rx_cdesc_status_t);
+ s = format (s, "0x%x", st.as_u32);
+ if (st.as_u32 != 0)
+ {
+ int not_first_line = 0;
+ s = format (s, " -> ");
+#define _(b, n) \
+ if (st.n) \
+ s = format (s, "%s%s %u", not_first_line++ ? ", " : "", #n, st.n);
+ foreach_ena_rx_cdesc_status
+#undef _
+ }
+ return s;
+}
+
+u8 *
+format_ena_rx_trace (u8 *s, va_list *args)
+{
+ vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+ vlib_node_t *node = va_arg (*args, vlib_node_t *);
+ ena_rx_trace_t *t = va_arg (*args, ena_rx_trace_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index);
+ u32 indent = format_get_indent (s);
+
+ s = format (
+ s, "ena: %v (%d) qid %u next-node %U length %u req-id 0x%x n-desc %u",
+ hi->name, t->hw_if_index, t->qid, format_vlib_next_node_name, vm,
+ node->index, t->next_index, t->length, t->req_id, t->n_desc);
+ s = format (s, "\n%Ustatus: %U", format_white_space, indent + 2,
+ format_ena_rx_desc_status, t->status);
+ return s;
+}
+
+u8 *
+format_ena_regs (u8 *s, va_list *args)
+{
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ int offset = va_arg (*args, int);
+ u32 indent = format_get_indent (s);
+ u32 rv = 0, f, v;
+ u8 *s2 = 0;
+
+#define _(o, r, rn, m) \
+ if ((offset == -1 || offset == o) && r == 1) \
+ { \
+ s = format (s, "\n%U", format_white_space, indent); \
+ vec_reset_length (s2); \
+ s2 = format (s2, "[0x%02x] %s:", o, #rn); \
+ ena_reg_read (dev, o, &rv); \
+ s = format (s, "%-34v = 0x%08x", s2, rv); \
+ f = 0; \
+ m \
+ }
+
+#define __(l, fn) \
+ if (#fn[0] != '_') \
+ { \
+ vec_reset_length (s2); \
+ s2 = format (s2, "\n%U", format_white_space, indent); \
+ s2 = format (s2, " [%2u:%2u] %s", f + l - 1, f, #fn); \
+ s = format (s, " %-35v = ", s2); \
+ v = (rv >> f) & pow2_mask (l); \
+ if (l < 3) \
+ s = format (s, "%u", v); \
+ else if (l <= 8) \
+ s = format (s, "0x%02x (%u)", v, v); \
+ else if (l <= 16) \
+ s = format (s, "0x%04x", v); \
+ else \
+ s = format (s, "0x%08x", v); \
+ } \
+ f += l;
+
+ foreach_ena_reg;
+#undef _
+
+ vec_free (s2);
+
+ return s;
+}
diff --git a/src/plugins/dev_ena/format_aq.c b/src/plugins/dev_ena/format_aq.c
new file mode 100644
index 00000000000..18bad1e050b
--- /dev/null
+++ b/src/plugins/dev_ena/format_aq.c
@@ -0,0 +1,412 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+#include <vnet/ethernet/ethernet.h>
+
+static char *opcode_names[] = {
+#define _(v, s) [v] = #s,
+ foreach_ena_aq_opcode
+#undef _
+};
+
+static char *status_names[] = {
+#define _(v, s) [v] = #s,
+ foreach_ena_aq_compl_status
+#undef _
+};
+
+#define __maxval(s, f) (u64) (((typeof ((s)[0])){ .f = -1LL }).f)
+
+#define __name(s, n) \
+ { \
+ s = format (s, "%s%U%-32s: ", line ? "\n" : "", format_white_space, \
+ line ? indent : 0, #n); \
+ line++; \
+ }
+
+#define _format_number(s, d, n, ...) \
+ { \
+ __name (s, n); \
+ if (d->n < 10) \
+ s = format (s, "%u", d->n); \
+ else if (__maxval (d, n) <= 255) \
+ s = format (s, "0x%02x (%u)", d->n, d->n); \
+ else if (__maxval (d, n) <= 65535) \
+ s = format (s, "0x%04x (%u)", d->n, d->n); \
+ else \
+ s = format (s, "0x%08x (%u)", d->n, d->n); \
+ }
+
+#define _format_with_fn_and_ptr(s, c, n, f) \
+ { \
+ __name (s, n); \
+ s = format (s, "%U", f, &((c)->n)); \
+ }
+
+#define _format_with_fn_and_val(s, c, n, f) \
+ { \
+ __name (s, n); \
+ s = format (s, "%U", f, (c)->n); \
+ }
+#define _format_ena_memory(s, c, n) \
+ _format_with_fn_and_ptr (s, c, n, format_ena_mem_addr)
+
+u8 *
+format_ena_aq_opcode (u8 *s, va_list *args)
+{
+ u32 opcode = va_arg (*args, u32);
+
+ if (opcode >= ARRAY_LEN (opcode_names) || opcode_names[opcode] == 0)
+ return format (s, "UNKNOWN(%u)", opcode);
+ return format (s, "%s", opcode_names[opcode]);
+}
+
+u8 *
+format_ena_aq_status (u8 *s, va_list *args)
+{
+ u32 status = va_arg (*args, u32);
+
+ if (status >= ARRAY_LEN (status_names) || status_names[status] == 0)
+ return format (s, "UNKNOWN(%u)", status);
+ return format (s, "%s", status_names[status]);
+}
+
+u8 *
+format_ena_aq_aenq_groups (u8 *s, va_list *args)
+{
+ ena_aq_aenq_groups_t g = va_arg (*args, ena_aq_aenq_groups_t);
+ u32 i, not_first = 0;
+ u32 indent = format_get_indent (s);
+
+#define _(x) \
+ if (g.x) \
+ { \
+ if (format_get_indent (s) > 80) \
+ s = format (s, "\n%U", format_white_space, indent); \
+ s = format (s, "%s%s", not_first++ ? " " : "", #x); \
+ g.x = 0; \
+ }
+ foreach_ena_aq_aenq_groups;
+#undef _
+
+ foreach_set_bit_index (i, g.as_u32)
+ s = format (s, "%sunknown-%u", not_first++ ? " " : "", i);
+
+ return s;
+}
+
+u8 *
+format_ena_aq_feat_id_bitmap (u8 *s, va_list *args)
+{
+ u32 bmp = va_arg (*args, u32);
+ int i, line = 0;
+ u32 indent = format_get_indent (s);
+
+ foreach_set_bit_index (i, bmp)
+ {
+ ena_aq_feat_info_t *info = ena_aq_get_feat_info (i);
+ if (line++)
+ s = format (s, ", ");
+ if (format_get_indent (s) > 80)
+ s = format (s, "\n%U", format_white_space, indent);
+ if (info)
+ s = format (s, "%s", info->name);
+ else
+ s = format (s, "unknown-%u", i);
+ }
+
+ return s;
+}
+
+u8 *
+format_ena_aq_feat_name (u8 *s, va_list *args)
+{
+ ena_aq_feature_id_t feat_id = va_arg (*args, int);
+ char *feat_names[] = {
+#define _(v, r, gt, st, s, u) [v] = #s,
+ foreach_ena_aq_feature_id
+#undef _
+ };
+
+ if (feat_id >= ARRAY_LEN (feat_names) || feat_names[feat_id] == 0)
+ return format (s, "UNKNOWN(%u)", feat_id);
+ return format (s, "%s", feat_names[feat_id]);
+}
+
+u8 *
+format_ena_aq_feat_desc (u8 *s, va_list *args)
+{
+ ena_aq_feature_id_t feat_id = va_arg (*args, int);
+ void *data = va_arg (*args, void *);
+ ena_aq_feat_info_t *info = ena_aq_get_feat_info (feat_id);
+ u32 indent = format_get_indent (s);
+ u32 line = 0;
+
+ switch (feat_id)
+ {
+ case ENA_ADMIN_FEAT_ID_DEVICE_ATTRIBUTES:
+ {
+ ena_aq_feat_device_attr_t *d = data;
+ _format_number (s, d, impl_id);
+ _format_number (s, d, device_version);
+ _format_number (s, d, phys_addr_width);
+ _format_number (s, d, virt_addr_width);
+ _format_with_fn_and_val (s, d, mac_addr, format_ethernet_address);
+ _format_number (s, d, max_mtu);
+ _format_with_fn_and_val (s, d, supported_features,
+ format_ena_aq_feat_id_bitmap);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_AENQ_CONFIG:
+ {
+ ena_aq_feat_aenq_config_t *d = data;
+ _format_with_fn_and_val (s, d, supported_groups,
+ format_ena_aq_aenq_groups);
+ _format_with_fn_and_val (s, d, enabled_groups,
+ format_ena_aq_aenq_groups);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_INTERRUPT_MODERATION:
+ {
+ ena_aq_feat_intr_moder_t *d = data;
+ _format_number (s, d, intr_delay_resolution);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_STATELESS_OFFLOAD_CONFIG:
+ {
+ ena_aq_feat_stateless_offload_config_t *d = data;
+ _format_number (s, d, rx_supported);
+ _format_number (s, d, rx_enabled);
+ _format_number (s, d, tx);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_RSS_INDIRECTION_TABLE_CONFIG:
+ {
+ ena_aq_feat_rss_ind_table_config_t *d = data;
+ _format_number (s, d, min_size);
+ _format_number (s, d, max_size);
+ _format_number (s, d, size);
+ _format_number (s, d, one_entry_update);
+ _format_number (s, d, inline_index);
+ _format_number (s, d, inline_entry.cq_idx);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_MAX_QUEUES_NUM:
+ {
+ ena_aq_feat_max_queue_num_t *d = data;
+ _format_number (s, d, max_sq_num);
+ _format_number (s, d, max_sq_depth);
+ _format_number (s, d, max_cq_num);
+ _format_number (s, d, max_cq_depth);
+ _format_number (s, d, max_legacy_llq_num);
+ _format_number (s, d, max_legacy_llq_depth);
+ _format_number (s, d, max_header_size);
+ _format_number (s, d, max_packet_tx_descs);
+ _format_number (s, d, max_packet_rx_descs);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_MAX_QUEUES_EXT:
+ {
+ ena_aq_feat_max_queue_ext_t *d = data;
+ _format_number (s, d, max_rx_sq_num);
+ _format_number (s, d, max_rx_cq_num);
+ _format_number (s, d, max_tx_sq_num);
+ _format_number (s, d, max_tx_cq_num);
+ _format_number (s, d, max_rx_sq_depth);
+ _format_number (s, d, max_rx_cq_depth);
+ _format_number (s, d, max_tx_sq_depth);
+ _format_number (s, d, max_tx_cq_depth);
+ _format_number (s, d, version);
+ _format_number (s, d, max_tx_header_size);
+ _format_number (s, d, max_per_packet_tx_descs);
+ _format_number (s, d, max_per_packet_rx_descs);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_RSS_HASH_FUNCTION:
+ {
+ ena_aq_feat_rss_hash_function_t *d = data;
+ _format_number (s, d, supported_func);
+ _format_number (s, d, selected_func);
+ _format_number (s, d, init_val);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_LLQ:
+ {
+ ena_aq_feat_llq_t *d = data;
+ _format_number (s, d, max_llq_num);
+ _format_number (s, d, max_llq_depth);
+ _format_number (s, d, header_location_ctrl_supported);
+ _format_number (s, d, header_location_ctrl_enabled);
+ _format_number (s, d, entry_size_ctrl_supported);
+ _format_number (s, d, entry_size_ctrl_enabled);
+ _format_number (s, d, desc_num_before_header_supported);
+ _format_number (s, d, desc_num_before_header_enabled);
+ _format_number (s, d, descriptors_stride_ctrl_supported);
+ _format_number (s, d, descriptors_stride_ctrl_enabled);
+ _format_number (s, d, accel_mode.get.supported_flags);
+ _format_number (s, d, accel_mode.get.max_tx_burst_size);
+ _format_number (s, d, accel_mode.set.enabled_flags);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_EXTRA_PROPERTIES_STRINGS:
+ {
+ ena_aq_feat_extra_properties_strings_t *d = data;
+ _format_number (s, d, count);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_EXTRA_PROPERTIES_FLAGS:
+ {
+ ena_aq_feat_extra_properties_flags_t *d = data;
+ _format_number (s, d, flags);
+ }
+ break;
+
+ case ENA_ADMIN_FEAT_ID_HOST_ATTR_CONFIG:
+ {
+ ena_aq_feat_host_attr_config_t *d = data;
+ _format_ena_memory (s, d, os_info_ba);
+ _format_ena_memory (s, d, debug_ba);
+ _format_number (s, d, debug_area_size);
+ }
+ break;
+
+ default:
+ if (info)
+ s = format (s, "%U", format_hexdump, data, info->data_sz);
+ break;
+ }
+
+ return s;
+}
+
+u8 *
+format_ena_aq_create_sq_cmd (u8 *s, va_list *args)
+{
+ ena_aq_create_sq_cmd_t *cmd = va_arg (*args, ena_aq_create_sq_cmd_t *);
+ u32 indent = format_get_indent (s);
+ u32 line = 0;
+
+ _format_number (s, cmd, sq_direction);
+ _format_number (s, cmd, placement_policy);
+ _format_number (s, cmd, completion_policy);
+ _format_number (s, cmd, is_physically_contiguous);
+ _format_number (s, cmd, cq_idx);
+ _format_number (s, cmd, sq_depth);
+ _format_ena_memory (s, cmd, sq_ba);
+ _format_ena_memory (s, cmd, sq_head_writeback);
+ return s;
+}
+
+u8 *
+format_ena_aq_create_cq_cmd (u8 *s, va_list *args)
+{
+ ena_aq_create_cq_cmd_t *cmd = va_arg (*args, ena_aq_create_cq_cmd_t *);
+ u32 indent = format_get_indent (s);
+ u32 line = 0;
+
+ _format_number (s, cmd, interrupt_mode_enabled);
+ _format_number (s, cmd, cq_entry_size_words);
+ _format_number (s, cmd, cq_depth);
+ _format_number (s, cmd, msix_vector);
+ _format_ena_memory (s, cmd, cq_ba);
+ return s;
+}
+
+u8 *
+format_ena_aq_create_sq_resp (u8 *s, va_list *args)
+{
+ ena_aq_create_sq_resp_t *resp = va_arg (*args, ena_aq_create_sq_resp_t *);
+ u32 indent = format_get_indent (s);
+ u32 line = 0;
+
+ _format_number (s, resp, sq_idx);
+ _format_number (s, resp, sq_doorbell_offset);
+ _format_number (s, resp, llq_descriptors_offset);
+ _format_number (s, resp, llq_headers_offset);
+ return s;
+}
+
+u8 *
+format_ena_aq_create_cq_resp (u8 *s, va_list *args)
+{
+ ena_aq_create_cq_resp_t *resp = va_arg (*args, ena_aq_create_cq_resp_t *);
+ u32 indent = format_get_indent (s);
+ u32 line = 0;
+
+ _format_number (s, resp, cq_idx);
+ _format_number (s, resp, cq_actual_depth);
+ _format_number (s, resp, numa_node_register_offset);
+ _format_number (s, resp, cq_head_db_register_offset);
+ _format_number (s, resp, cq_interrupt_unmask_register_offset);
+ return s;
+}
+
+u8 *
+format_ena_aq_destroy_sq_cmd (u8 *s, va_list *args)
+{
+ ena_aq_destroy_sq_cmd_t *cmd = va_arg (*args, ena_aq_destroy_sq_cmd_t *);
+ u32 indent = format_get_indent (s);
+ u32 line = 0;
+
+ _format_number (s, cmd, sq_idx);
+ _format_number (s, cmd, sq_direction);
+ return s;
+}
+
+u8 *
+format_ena_aq_destroy_cq_cmd (u8 *s, va_list *args)
+{
+ ena_aq_destroy_cq_cmd_t *cmd = va_arg (*args, ena_aq_destroy_cq_cmd_t *);
+ u32 indent = format_get_indent (s);
+ u32 line = 0;
+
+ _format_number (s, cmd, cq_idx);
+ return s;
+}
+
+u8 *
+format_ena_aq_basic_stats (u8 *s, va_list *args)
+{
+ ena_aq_basic_stats_t *st = va_arg (*args, ena_aq_basic_stats_t *);
+ u32 indent = format_get_indent (s);
+ u32 line = 0;
+
+ _format_number (s, st, tx_bytes);
+ _format_number (s, st, tx_pkts);
+ _format_number (s, st, rx_bytes);
+ _format_number (s, st, rx_pkts);
+ _format_number (s, st, rx_drops);
+ _format_number (s, st, tx_drops);
+ return s;
+}
+
+u8 *
+format_ena_aq_eni_stats (u8 *s, va_list *args)
+{
+ ena_aq_eni_stats_t *st = va_arg (*args, ena_aq_eni_stats_t *);
+ u32 indent = format_get_indent (s);
+ u32 line = 0;
+
+ _format_number (s, st, bw_in_allowance_exceeded);
+ _format_number (s, st, bw_out_allowance_exceeded);
+ _format_number (s, st, pps_allowance_exceeded);
+ _format_number (s, st, conntrack_allowance_exceeded);
+ _format_number (s, st, linklocal_allowance_exceeded);
+ return s;
+}
diff --git a/src/plugins/dev_ena/port.c b/src/plugins/dev_ena/port.c
new file mode 100644
index 00000000000..2b26fefc5e3
--- /dev/null
+++ b/src/plugins/dev_ena/port.c
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_REGISTER_LOG_CLASS (ena_log, static) = {
+ .class_name = "ena",
+ .subclass_name = "port",
+};
+
+vnet_dev_rv_t
+ena_port_init (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+
+ log_debug (dev, "port %u", port->port_id);
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+ena_port_start (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_rv_t rv;
+
+ log_debug (dev, "port start: port %u", port->port_id);
+
+ if (ena_aq_feature_is_supported (dev, ENA_ADMIN_FEAT_ID_MTU))
+ {
+ ena_aq_feat_mtu_t mtu = { .mtu = port->max_rx_frame_size };
+
+ if ((rv = ena_aq_set_feature (vm, dev, ENA_ADMIN_FEAT_ID_MTU, &mtu)))
+ return rv;
+ }
+
+ if ((rv = vnet_dev_port_start_all_rx_queues (vm, port)))
+ return rv;
+
+ if ((rv = vnet_dev_port_start_all_tx_queues (vm, port)))
+ return rv;
+
+ return VNET_DEV_OK;
+}
+
+void
+ena_port_stop (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ log_debug (port->dev, "port stop: port %u", port->port_id);
+}
+
+vnet_dev_rv_t
+ena_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ if (port->started)
+ rv = VNET_DEV_ERR_PORT_STARTED;
+ break;
+
+ default:
+ rv = VNET_DEV_ERR_NOT_SUPPORTED;
+ };
+
+ return rv;
+}
+
+vnet_dev_rv_t
+ena_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ break;
+
+ default:
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ };
+
+ return rv;
+}
diff --git a/src/plugins/dev_ena/queue.c b/src/plugins/dev_ena/queue.c
new file mode 100644
index 00000000000..08c763c8461
--- /dev/null
+++ b/src/plugins/dev_ena/queue.c
@@ -0,0 +1,384 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+
+VLIB_REGISTER_LOG_CLASS (ena_log, static) = {
+ .class_name = "ena",
+ .subclass_name = "queue",
+};
+
+void
+ena_rx_queue_free (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ ena_rxq_t *eq = vnet_dev_get_rx_queue_data (rxq);
+ vnet_dev_port_t *port = rxq->port;
+ vnet_dev_t *dev = port->dev;
+
+ ASSERT (rxq->started == 0);
+ ASSERT (eq->cq_created == 0);
+ ASSERT (eq->sq_created == 0);
+
+ log_debug (dev, "queue %u", rxq->queue_id);
+
+ foreach_pointer (p, eq->buffer_indices, eq->compl_sqe_indices)
+ if (p)
+ clib_mem_free (p);
+
+ foreach_pointer (p, eq->cqes, eq->sqes)
+ vnet_dev_dma_mem_free (vm, dev, p);
+}
+
+vnet_dev_rv_t
+ena_rx_queue_alloc (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_port_t *port = rxq->port;
+ vnet_dev_t *dev = port->dev;
+ ena_rxq_t *eq = vnet_dev_get_rx_queue_data (rxq);
+ u16 size = rxq->size;
+ vnet_dev_rv_t rv;
+
+ ASSERT (eq->buffer_indices == 0);
+ ASSERT (eq->compl_sqe_indices == 0);
+ ASSERT (eq->cqes == 0);
+ ASSERT (eq->sqes == 0);
+
+ log_debug (dev, "queue %u", rxq->queue_id);
+
+ eq->buffer_indices = clib_mem_alloc_aligned (
+ sizeof (eq->buffer_indices[0]) * size, CLIB_CACHE_LINE_BYTES);
+
+ eq->compl_sqe_indices = clib_mem_alloc_aligned (
+ sizeof (eq->compl_sqe_indices[0]) * size, CLIB_CACHE_LINE_BYTES);
+
+ if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (eq->cqes[0]) * size, 0,
+ (void **) &eq->cqes)))
+ goto err;
+
+ if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (eq->sqes[0]) * size, 0,
+ (void **) &eq->sqes)))
+ goto err;
+
+ return VNET_DEV_OK;
+
+err:
+ ena_rx_queue_free (vm, rxq);
+ return rv;
+}
+
+void
+ena_tx_queue_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_port_t *port = txq->port;
+ vnet_dev_t *dev = port->dev;
+
+ ASSERT (txq->started == 0);
+
+ log_debug (dev, "queue %u", txq->queue_id);
+
+ foreach_pointer (p, eq->buffer_indices, eq->sqe_templates)
+ if (p)
+ clib_mem_free (p);
+
+ foreach_pointer (p, eq->cqes, eq->sqes)
+ vnet_dev_dma_mem_free (vm, dev, p);
+}
+
+vnet_dev_rv_t
+ena_tx_queue_alloc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ vnet_dev_port_t *port = txq->port;
+ vnet_dev_t *dev = port->dev;
+ ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq);
+ u16 size = txq->size;
+ vnet_dev_rv_t rv;
+
+ ASSERT (eq->buffer_indices == 0);
+ ASSERT (eq->sqe_templates == 0);
+ ASSERT (eq->cqes == 0);
+ ASSERT (eq->sqes == 0);
+
+ log_debug (dev, "queue %u", txq->queue_id);
+
+ eq->buffer_indices = clib_mem_alloc_aligned (
+ sizeof (eq->buffer_indices[0]) * size, CLIB_CACHE_LINE_BYTES);
+ eq->sqe_templates = clib_mem_alloc_aligned (
+ sizeof (eq->sqe_templates[0]) * size, CLIB_CACHE_LINE_BYTES);
+
+ if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (eq->cqes[0]) * size, 0,
+ (void **) &eq->cqes)))
+ goto err;
+
+ if ((rv = vnet_dev_dma_mem_alloc (vm, dev, sizeof (eq->sqes[0]) * size, 0,
+ (void **) &eq->sqes)))
+ goto err;
+
+ return VNET_DEV_OK;
+
+err:
+ ena_tx_queue_free (vm, txq);
+ return rv;
+}
+
+vnet_dev_rv_t
+ena_rx_queue_start (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ ena_rxq_t *eq = vnet_dev_get_rx_queue_data (rxq);
+ vnet_dev_port_t *port = rxq->port;
+ vnet_dev_t *dev = port->dev;
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ u16 buffer_size = vnet_dev_get_rx_queue_buffer_data_size (vm, rxq);
+ u16 size = rxq->size;
+ vnet_dev_rv_t rv;
+
+ /* Create Completion Queue */
+ ena_aq_create_cq_resp_t cqresp;
+ ena_aq_create_cq_cmd_t cqcmd = {
+ .interrupt_mode_enabled = 1,
+ .cq_entry_size_words = sizeof (ena_rx_cdesc_t) / 4,
+ .cq_depth = size,
+ .msix_vector = ~0,
+ };
+
+ ena_set_mem_addr (vm, dev, &cqcmd.cq_ba, eq->cqes);
+ if ((rv = ena_aq_create_cq (vm, dev, &cqcmd, &cqresp)))
+ {
+ log_err (dev, "queue %u cq creation failed", rxq->queue_id);
+ goto error;
+ }
+
+ eq->cq_idx = cqresp.cq_idx;
+ eq->cq_created = 1;
+
+ log_debug (dev, "queue %u cq %u created", rxq->queue_id, eq->cq_idx);
+
+ /* Create Submission Queue */
+ ena_aq_create_sq_resp_t sqresp;
+ ena_aq_create_sq_cmd_t sqcmd = {
+ .sq_direction = ENA_ADMIN_SQ_DIRECTION_RX,
+ .placement_policy = ENA_ADMIN_SQ_PLACEMENT_POLICY_HOST,
+ .completion_policy = ENA_ADMIN_SQ_COMPLETION_POLICY_DESC,
+ .is_physically_contiguous = 1,
+ .sq_depth = size,
+ .cq_idx = cqresp.cq_idx,
+ };
+
+ ena_set_mem_addr (vm, dev, &sqcmd.sq_ba, eq->sqes);
+ if ((rv = ena_aq_create_sq (vm, dev, &sqcmd, &sqresp)))
+ {
+ log_err (dev, "queue %u sq creation failed", rxq->queue_id);
+ goto error;
+ }
+
+ eq->sq_idx = sqresp.sq_idx;
+ eq->sq_db = (u32 *) ((u8 *) ed->reg_bar + sqresp.sq_doorbell_offset);
+ eq->sq_created = 1;
+
+ log_debug (dev, "queue %u sq %u created, sq_db %p", rxq->queue_id,
+ eq->sq_idx, eq->sq_db);
+
+ for (int i = 0; i < size; i++)
+ {
+ eq->sqes[i] = (ena_rx_desc_t){
+ .lo = {
+ .length = buffer_size,
+ .comp_req = 1,
+ .first = 1,
+ .last = 1,
+ .reserved5 = 1, /* ena_com says MBO */
+ .req_id = i,
+ },
+ };
+ eq->buffer_indices[i] = VLIB_BUFFER_INVALID_INDEX;
+ eq->compl_sqe_indices[i] = i;
+ }
+
+ eq->sq_next = 0;
+ eq->n_compl_sqes = size;
+
+ return VNET_DEV_OK;
+
+error:
+ ena_rx_queue_stop (vm, rxq);
+ return rv;
+}
+
+vnet_dev_rv_t
+ena_tx_queue_start (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_port_t *port = txq->port;
+ vnet_dev_t *dev = port->dev;
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ u16 size = txq->size;
+ vnet_dev_rv_t rv;
+
+ /* Create Completion Queue */
+ ena_aq_create_cq_resp_t cqresp;
+ ena_aq_create_cq_cmd_t cqcmd = {
+ .interrupt_mode_enabled = 1,
+ .cq_entry_size_words = sizeof (ena_tx_cdesc_t) / 4,
+ .cq_depth = size,
+ .msix_vector = ~0,
+ };
+
+ ena_set_mem_addr (vm, dev, &cqcmd.cq_ba, eq->cqes);
+ if ((rv = ena_aq_create_cq (vm, dev, &cqcmd, &cqresp)))
+ {
+ log_err (dev, "queue %u cq creation failed", txq->queue_id);
+ goto error;
+ }
+
+ eq->cq_idx = cqresp.cq_idx;
+ eq->cq_created = 1;
+
+ log_debug (dev, "queue %u cq %u created", txq->queue_id, eq->cq_idx);
+
+ /* Create Submission Queue */
+ ena_aq_create_sq_resp_t sqresp;
+ ena_aq_create_sq_cmd_t sqcmd = {
+ .sq_direction = ENA_ADMIN_SQ_DIRECTION_TX,
+ .placement_policy = eq->llq ? ENA_ADMIN_SQ_PLACEMENT_POLICY_DEVICE :
+ ENA_ADMIN_SQ_PLACEMENT_POLICY_HOST,
+ .completion_policy = ENA_ADMIN_SQ_COMPLETION_POLICY_DESC,
+ .is_physically_contiguous = 1,
+ .sq_depth = size,
+ .cq_idx = cqresp.cq_idx,
+ };
+
+ if (eq->llq == 0)
+ ena_set_mem_addr (vm, dev, &sqcmd.sq_ba, eq->sqes);
+ if ((rv = ena_aq_create_sq (vm, dev, &sqcmd, &sqresp)))
+ {
+ log_err (dev, "queue %u sq creation failed", txq->queue_id);
+ goto error;
+ }
+
+ eq->sq_idx = sqresp.sq_idx;
+ eq->sq_db = (u32 *) ((u8 *) ed->reg_bar + sqresp.sq_doorbell_offset);
+ eq->sq_created = 1;
+
+ log_debug (dev, "queue %u sq %u created, sq_db %p", txq->queue_id,
+ eq->sq_idx, eq->sq_db);
+
+ for (u32 i = 0; i < size; i++)
+ {
+ eq->sqe_templates[i] =
+ (ena_tx_desc_t){ .req_id_lo = i, .req_id_hi = i >> 10, .comp_req = 1 }
+ .as_u64x2[0];
+
+ eq->buffer_indices[i] = VLIB_BUFFER_INVALID_INDEX;
+ }
+
+ eq->sq_head = 0;
+ eq->sq_tail = 0;
+ eq->cq_next = 0;
+
+#if 0
+ if (txq->llq)
+ txq->llq_descs =
+ (ena_tx_llq_desc128_t *) ((u8 *) ed->mem_bar +
+ sqresp.llq_descriptors_offset);
+#endif
+
+ log_debug (dev, "queue %u sq %u created, sq_db %p llq_desc %p",
+ txq->queue_id, eq->sq_idx, eq->sq_db,
+ eq->llq ? eq->llq_descs : 0);
+ return VNET_DEV_OK;
+
+error:
+ ena_tx_queue_stop (vm, txq);
+ return rv;
+}
+
+static void
+ena_free_sq_buffer_indices (vlib_main_t *vm, u32 *sq_buffer_indices,
+ u32 n_desc)
+{
+ u32 *to = sq_buffer_indices;
+
+ for (u32 *from = to; from < sq_buffer_indices + n_desc; from++)
+ if (from[0] != VLIB_BUFFER_INVALID_INDEX)
+ to++[0] = from[0];
+
+ if (to - sq_buffer_indices > 0)
+ vlib_buffer_free (vm, sq_buffer_indices, to - sq_buffer_indices);
+}
+
+void
+ena_rx_queue_stop (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ ena_rxq_t *eq = vnet_dev_get_rx_queue_data (rxq);
+ vnet_dev_t *dev = rxq->port->dev;
+ vnet_dev_rv_t rv;
+
+ if (eq->sq_created)
+ {
+ ena_aq_destroy_sq_cmd_t cmd = {
+ .sq_idx = eq->sq_idx,
+ .sq_direction = ENA_ADMIN_SQ_DIRECTION_TX,
+ };
+
+ if ((rv = ena_aq_destroy_sq (vm, dev, &cmd)))
+ log_err (dev, "queue %u failed to destroy sq %u", rxq->queue_id,
+ eq->sq_idx);
+ eq->sq_created = 0;
+ };
+
+ if (eq->cq_created)
+ {
+ ena_aq_destroy_cq_cmd_t cmd = {
+ .cq_idx = eq->cq_idx,
+ };
+
+ if ((rv = ena_aq_destroy_cq (vm, dev, &cmd)))
+ log_err (dev, "queue %u failed to destroy cq %u", rxq->queue_id,
+ eq->cq_idx);
+ eq->cq_created = 0;
+ };
+
+ if (eq->n_compl_sqes < rxq->size)
+ ena_free_sq_buffer_indices (vm, eq->buffer_indices, rxq->size);
+}
+
+void
+ena_tx_queue_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_t *dev = txq->port->dev;
+ vnet_dev_rv_t rv;
+
+ if (eq->sq_created)
+ {
+ ena_aq_destroy_sq_cmd_t cmd = {
+ .sq_idx = eq->sq_idx,
+ .sq_direction = ENA_ADMIN_SQ_DIRECTION_TX,
+ };
+
+ if ((rv = ena_aq_destroy_sq (vm, dev, &cmd)))
+ log_err (dev, "queue %u failed to destroy sq %u", txq->queue_id,
+ eq->sq_idx);
+ eq->sq_created = 0;
+ };
+
+ if (eq->cq_created)
+ {
+ ena_aq_destroy_cq_cmd_t cmd = {
+ .cq_idx = eq->cq_idx,
+ };
+
+ if ((rv = ena_aq_destroy_cq (vm, dev, &cmd)))
+ log_err (dev, "queue %u failed to destroy cq %u", txq->queue_id,
+ eq->cq_idx);
+ eq->cq_created = 0;
+ };
+
+ if (eq->sq_head != eq->sq_tail)
+ ena_free_sq_buffer_indices (vm, eq->buffer_indices, txq->size);
+}
diff --git a/src/plugins/dev_ena/reg.c b/src/plugins/dev_ena/reg.c
new file mode 100644
index 00000000000..7f2cc0f8aba
--- /dev/null
+++ b/src/plugins/dev_ena/reg.c
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+
+VLIB_REGISTER_LOG_CLASS (ena_log, static) = {
+ .class_name = "ena",
+ .subclass_name = "reg",
+};
+
+static vnet_dev_rv_t
+ena_err (vnet_dev_t *dev, vnet_dev_rv_t rv, char *fmt, ...)
+{
+ va_list va;
+ u8 *s;
+
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+ log_err (dev, "%v", s);
+ vec_free (s);
+ return rv;
+}
+
+static u8 *
+format_ena_reg_name (u8 *s, va_list *args)
+{
+ int offset = va_arg (*args, int);
+
+ char *reg_names[] = {
+#define _(o, r, rn, m) [(o) >> 2] = #rn,
+ foreach_ena_reg
+#undef _
+ };
+
+ offset >>= 2;
+
+ if (offset < 0 || offset >= ARRAY_LEN (reg_names) || reg_names[offset] == 0)
+ return format (s, "(unknown)");
+ return format (s, "%s", reg_names[offset]);
+}
+
+void
+ena_reg_write (vnet_dev_t *dev, ena_reg_t reg, void *v)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ u32 *p = (u32 *) ((u8 *) ed->reg_bar + reg);
+ u32 val = *(u32 *) v;
+ log_debug (dev, "%s: reg %U (0x%02x) value 0x%08x", __func__,
+ format_ena_reg_name, reg, reg, val);
+ __atomic_store_n (p, val, __ATOMIC_RELEASE);
+}
+
+void
+ena_reg_set_dma_addr (vlib_main_t *vm, vnet_dev_t *dev, u32 rlo, u32 rhi,
+ void *p)
+{
+ uword pa = vnet_dev_get_dma_addr (vm, dev, p);
+ u32 reg = (u32) pa;
+ ena_reg_write (dev, rlo, &reg);
+ reg = pa >> 32;
+ ena_reg_write (dev, rhi, &reg);
+}
+
+void
+ena_reg_read (vnet_dev_t *dev, ena_reg_t reg, const void *v)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ vlib_main_t *vm = vlib_get_main ();
+ u32 rv;
+ f64 dt = 0, t0;
+
+ if (ed->readless == 0)
+ {
+ rv =
+ __atomic_load_n ((u32 *) ((u8 *) ed->reg_bar + reg), __ATOMIC_SEQ_CST);
+ }
+ else
+ {
+ u32 *p = (u32 *) ((u8 *) ed->reg_bar + ENA_REG_MMIO_REG_READ);
+
+ ena_reg_mmio_reg_read_t rr = { .reg_off = reg, .req_id = 1 };
+ ed->mmio_resp->req_id = 0;
+ ed->mmio_resp->reg_val = ~0;
+
+ __atomic_store_n (p, rr.as_u32, __ATOMIC_RELEASE);
+
+ t0 = vlib_time_now (vm);
+ while (ed->mmio_resp->req_id == 0 && dt < 0.2)
+ {
+ CLIB_PAUSE ();
+ dt = vlib_time_now (vm) - t0;
+ }
+
+ rv = ed->mmio_resp->reg_val;
+ }
+
+ log_debug (dev, "%s: reg %U (0x%02x) value 0x%08x dt %.3fs", __func__,
+ format_ena_reg_name, reg, reg, rv, dt);
+ *(u32 *) v = rv;
+}
+
+vnet_dev_rv_t
+ena_reg_reset (vlib_main_t *vm, vnet_dev_t *dev, ena_reset_reason_t reason)
+{
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ ena_reg_version_t ver;
+ ena_reg_controller_version_t ctrl_ver;
+ ena_reg_caps_t caps = {};
+ ena_reg_dev_sts_t dev_sts = {};
+ ena_reg_dev_ctl_t reset_start = { .dev_reset = 1, .reset_reason = reason };
+
+ if (ed->readless)
+ ena_reg_set_dma_addr (vm, dev, ENA_REG_MMIO_RESP_LO, ENA_REG_MMIO_RESP_HI,
+ ed->mmio_resp);
+
+ ena_reg_read (dev, ENA_REG_DEV_STS, &dev_sts);
+ ena_reg_read (dev, ENA_REG_CAPS, &caps);
+
+ if (caps.as_u32 == ~0 && dev_sts.as_u32 == ~0)
+ return ena_err (dev, VNET_DEV_ERR_BUS, "failed to read regs");
+
+ if (dev_sts.ready == 0)
+ return VNET_DEV_ERR_NOT_READY;
+
+ log_debug (dev, "reg_reset: reset timeout is %u", caps.reset_timeout);
+
+ ena_reg_write (dev, ENA_REG_DEV_CTL, &reset_start);
+
+ if (ed->readless)
+ ena_reg_set_dma_addr (vm, dev, ENA_REG_MMIO_RESP_LO, ENA_REG_MMIO_RESP_HI,
+ ed->mmio_resp);
+
+ while (1)
+ {
+ int i = 0;
+ ena_reg_read (dev, ENA_REG_DEV_STS, &dev_sts);
+ if (dev_sts.reset_in_progress)
+ break;
+ if (i++ == 20)
+ return ena_err (dev, VNET_DEV_ERR_BUS, "failed to initiate reset");
+ vlib_process_suspend (vm, 0.001);
+ }
+
+ ena_reg_write (dev, ENA_REG_DEV_CTL, &(ena_reg_dev_ctl_t){});
+
+ return 0;
+ while (1)
+ {
+ int i = 0;
+ ena_reg_read (dev, ENA_REG_DEV_STS, &dev_sts);
+ if (dev_sts.reset_in_progress == 0)
+ break;
+ if (i++ == 20)
+ return ena_err (dev, VNET_DEV_ERR_BUS, "failed to complete reset");
+ vlib_process_suspend (vm, 0.001);
+ }
+
+ ena_reg_read (dev, ENA_REG_VERSION, &ver);
+ ena_reg_read (dev, ENA_REG_CONTROLLER_VERSION, &ctrl_ver);
+
+ log_info (dev, "version %u.%u controller_version %u.%u.%u impl_id %u\n",
+ ver.major, ver.minor, ctrl_ver.major, ctrl_ver.minor,
+ ctrl_ver.subminor, ctrl_ver.impl_id);
+
+ return 0;
+}
diff --git a/src/plugins/dev_ena/rx_node.c b/src/plugins/dev_ena/rx_node.c
new file mode 100644
index 00000000000..41fc5b8c943
--- /dev/null
+++ b/src/plugins/dev_ena/rx_node.c
@@ -0,0 +1,457 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/vector/mask_compare.h>
+#include <vppinfra/vector/compress.h>
+
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+
+#define ENA_RX_REFILL_BATCH 32
+
+typedef struct
+{
+ u16 phase_bit;
+ u16 size;
+ u32 mask;
+ ena_rx_cdesc_status_t st_or;
+ ena_rx_cdesc_status_t st_and;
+ u16 *comp_sqe_indices;
+ u32 *sq_buffer_indices;
+} ena_rx_ctx_t;
+
+static_always_inline void
+ena_device_input_status_to_flags (ena_rx_cdesc_status_t *statuses, u32 *flags,
+ u32 n_desc, vlib_frame_bitmap_t first_bmp,
+ int maybe_chained)
+{
+ const ena_rx_cdesc_status_t mask_first = { .first = 1 },
+ match_first1 = { .first = 1 };
+
+ const ena_rx_cdesc_status_t mask_last = { .last = 1 },
+ match_last0 = { .last = 0 };
+
+ const ena_rx_cdesc_status_t mask_l4_csum = { .ipv4_frag = 1,
+ .l4_csum_checked = 1,
+ .l4_csum_err = 1 },
+ match_l4_csum_ok = { .l4_csum_checked = 1 };
+
+ clib_memset_u32 (statuses + n_desc, 0, 8);
+#if defined(CLIB_HAVE_VEC128)
+
+#if defined(CxLIB_HAVE_VEC512)
+#define N 16
+#define u32xN u32x16
+#define u32xNu u32x16u
+#define u32xN_splat u32x16_splat
+#elif defined(CxLIB_HAVE_VEC256)
+#define N 8
+#define u32xN u32x8
+#define u32xNu u32x8u
+#define u32xN_splat u32x8_splat
+#else
+#define N 4
+#define u32xN u32x4
+#define u32xNu u32x4u
+#define u32xN_splat u32x4_splat
+#endif
+
+ const u32xN st_mask_first = u32xN_splat (mask_first.as_u32);
+ const u32xN st_match_first1 = u32xN_splat (match_first1.as_u32);
+ const u32xN st_mask_last = u32xN_splat (mask_last.as_u32);
+ const u32xN st_match_last0 = u32xN_splat (match_last0.as_u32);
+ const u32xN st_mask_l4_csum = u32xN_splat (mask_l4_csum.as_u32);
+ const u32xN st_match_l4_csum_ok = u32xN_splat (match_l4_csum_ok.as_u32);
+ const u32xN f_total_len_valid = u32xN_splat (VLIB_BUFFER_TOTAL_LENGTH_VALID);
+ const u32xN f_next_preset = u32xN_splat (VLIB_BUFFER_NEXT_PRESENT);
+ const u32xN f_l4_csum = u32xN_splat (VNET_BUFFER_F_L4_CHECKSUM_CORRECT |
+ VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
+
+ for (u32 i = 0; i < round_pow2 (n_desc, 2 * N); i += 2 * N)
+ {
+ uword msk = 0;
+ u32xN f0, f1, r0, r1;
+ u32xN s0 = ((u32xNu *) (statuses + i))[0];
+ u32xN s1 = ((u32xNu *) (statuses + i))[1];
+
+ r0 = (s0 & st_mask_first) == st_match_first1;
+ r1 = (s1 & st_mask_first) == st_match_first1;
+ f0 = r0 & f_total_len_valid;
+ f1 = r1 & f_total_len_valid;
+
+ if (maybe_chained)
+ {
+#if defined(CxLIB_HAVE_VEC512)
+ u64 msb_mask = 0x1111111111111111;
+ msk = bit_extract_u64 (u8x64_msb_mask ((u8x64) r0), msb_mask);
+ msk |= bit_extract_u64 (u8x64_msb_mask ((u8x64) r1), msb_mask) << 16;
+#elif defined(CxLIB_HAVE_VEC256)
+ msk = u8x32_msb_mask ((u8x32) r0);
+ msk |= (u64) u8x32_msb_mask ((u8x32) r1) << 32;
+ msk = bit_extract_u64 (msk, 0x1111111111111111);
+#else
+ msk = u8x16_msb_mask ((u8x16) r0);
+ msk |= (u32) u8x16_msb_mask ((u8x16) r1) << 16;
+ msk = bit_extract_u32 (msk, 0x11111111);
+#endif
+ first_bmp[i / uword_bits] |= msk << (i % uword_bits);
+ }
+
+ f0 |= ((s0 & st_mask_last) == st_match_last0) & f_next_preset;
+ f1 |= ((s1 & st_mask_last) == st_match_last0) & f_next_preset;
+
+ f0 |= ((s0 & st_mask_l4_csum) == st_match_l4_csum_ok) & f_l4_csum;
+ f1 |= ((s1 & st_mask_l4_csum) == st_match_l4_csum_ok) & f_l4_csum;
+
+ ((u32xNu *) (flags + i))[0] = f0;
+ ((u32xNu *) (flags + i))[1] = f1;
+ }
+#else
+ while (n_left)
+ {
+ u16 f = 0;
+ ena_rx_cdesc_status_t st = statuses++[0];
+
+ if ((st.as_u32 & mask_first.as_u32) == match_first1.as_u32)
+ f |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+ if ((st.as_u32 & mask_last.as_u32) == match_last0.as_u32)
+ f |= VLIB_BUFFER_NEXT_PRESENT;
+
+ if ((st.as_u32 & mask_l4_csum.as_u32) == match_l4_csum_ok.as_u32)
+ f |= VNET_BUFFER_F_L4_CHECKSUM_COMPUTED |
+ VNET_BUFFER_F_L4_CHECKSUM_CORRECT;
+
+ flags++[0] = f;
+ n_left--;
+ }
+#endif
+}
+
+static_always_inline u16
+ena_device_input_cq_dequeue_no_wrap (ena_rx_ctx_t *ctx, ena_rxq_t *q,
+ ena_rx_cdesc_status_t *statuses,
+ u16 *lengths, u16 *csi)
+{
+ u32 next = q->cq_next;
+ ena_rx_cdesc_t *cqes = q->cqes;
+ u32 phase = (next & ctx->size << 1) != 0;
+ u16 index = next & ctx->mask;
+ ena_rx_cdesc_t *cd = cqes + index;
+ ena_rx_cdesc_status_t st;
+ u32 n_to_check, i = 0;
+
+ st = cd->status;
+ if (st.phase == phase)
+ return 0;
+
+ n_to_check = clib_min (VLIB_FRAME_SIZE, ctx->size - index);
+
+ ctx->st_or.as_u32 |= st.as_u32;
+ ctx->st_and.as_u32 &= st.as_u32;
+ statuses[i] = st;
+ lengths[i] = cd->length;
+ csi[i] = cd->req_id;
+ i++;
+ cd++;
+
+more:
+ for (st = cd->status; i < n_to_check && st.phase != phase;
+ i++, st = (++cd)->status)
+ {
+ ctx->st_or.as_u32 |= st.as_u32;
+ ctx->st_and.as_u32 &= st.as_u32;
+ statuses[i] = st;
+ lengths[i] = cd->length;
+ csi[i] = cd->req_id;
+ }
+
+ if (i == n_to_check)
+ {
+ n_to_check = VLIB_FRAME_SIZE - n_to_check;
+ if (n_to_check)
+ {
+ phase ^= 1;
+ cd = cqes;
+ goto more;
+ }
+ }
+
+ /* revert incomplete */
+ if (PREDICT_FALSE (statuses[i - 1].last == 0))
+ {
+ i--;
+ while (i && statuses[i - 1].last == 0)
+ i--;
+ }
+
+ return i;
+}
+
+static_always_inline void
+ena_device_input_refill (vlib_main_t *vm, ena_rx_ctx_t *ctx,
+ vnet_dev_rx_queue_t *rxq, int use_va)
+{
+ ena_rxq_t *q = vnet_dev_get_rx_queue_data (rxq);
+ const u64x2 flip_phase = (ena_rx_desc_t){ .lo.phase = 1 }.as_u64x2;
+ u32 buffer_indices[ENA_RX_REFILL_BATCH];
+ uword dma_addr[ENA_RX_REFILL_BATCH];
+ u32 n_alloc, n_compl_sqes = q->n_compl_sqes;
+ u16 *csi = ctx->comp_sqe_indices;
+ ena_rx_desc_t *sqes = q->sqes;
+
+ while (n_compl_sqes > 0)
+ {
+ n_alloc = vlib_buffer_alloc_from_pool (
+ vm, buffer_indices, clib_min (ENA_RX_REFILL_BATCH, n_compl_sqes),
+ vnet_dev_get_rx_queue_buffer_pool_index (rxq));
+
+ if (PREDICT_FALSE (n_alloc == 0))
+ break;
+
+ vlib_get_buffers_with_offset (vm, buffer_indices, (void **) dma_addr,
+ ENA_RX_REFILL_BATCH,
+ STRUCT_OFFSET_OF (vlib_buffer_t, data));
+
+ if (!use_va)
+ for (u32 i = 0; i < n_alloc; i++)
+ dma_addr[i] = vlib_physmem_get_pa (vm, (void *) dma_addr[i]);
+
+ for (u32 i = 0; i < n_alloc; i++)
+ {
+ u16 slot = csi[i];
+ u64x2 r = sqes[slot].as_u64x2 ^ flip_phase;
+ ctx->sq_buffer_indices[slot] = buffer_indices[i];
+ r[1] = dma_addr[i];
+ sqes[slot].as_u64x2 = r; /* write SQE as single 16-byte store */
+ }
+
+ csi += n_alloc;
+ n_compl_sqes -= n_alloc;
+ }
+
+ if (n_compl_sqes == q->n_compl_sqes)
+ return;
+
+ q->sq_next += q->n_compl_sqes - n_compl_sqes;
+ __atomic_store_n (q->sq_db, q->sq_next, __ATOMIC_RELEASE);
+
+ if (PREDICT_FALSE (n_compl_sqes))
+ clib_memmove (ctx->comp_sqe_indices, csi, n_compl_sqes * sizeof (csi[0]));
+
+ q->n_compl_sqes = n_compl_sqes;
+}
+
+static_always_inline uword
+ena_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_dev_rx_queue_t *rxq)
+{
+ ena_rxq_t *q = vnet_dev_get_rx_queue_data (rxq);
+ vnet_dev_port_t *port = rxq->port;
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_buffer_t *buffers[VLIB_FRAME_SIZE], **b;
+ ena_rx_cdesc_status_t statuses[VLIB_FRAME_SIZE + 8];
+ u16 lengths[VLIB_FRAME_SIZE + 8], *l;
+ u32 flags[VLIB_FRAME_SIZE + 8], *f;
+ u16 *csi;
+ uword n_rx_packets = 0, n_rx_bytes = 0;
+ vlib_frame_bitmap_t head_bmp = {};
+ u32 sw_if_index = port->intf.sw_if_index;
+ u32 hw_if_index = port->intf.hw_if_index;
+ u32 n_trace, n_deq, n_left;
+ u32 cq_next = q->cq_next;
+ u32 next_index = rxq->next_index;
+ vlib_frame_t *next_frame;
+ vlib_buffer_template_t bt = rxq->buffer_template;
+ u32 *bi;
+ int maybe_chained;
+
+ ASSERT (count_set_bits (rxq->size) == 1);
+ ena_rx_ctx_t ctx = {
+ .size = rxq->size,
+ .mask = rxq->size - 1,
+ .st_and.as_u32 = ~0,
+ .comp_sqe_indices = q->compl_sqe_indices,
+ .sq_buffer_indices = q->buffer_indices,
+ };
+
+ /* we may have completed SQE indices from previous run */
+ csi = ctx.comp_sqe_indices + q->n_compl_sqes;
+
+ n_deq =
+ ena_device_input_cq_dequeue_no_wrap (&ctx, q, statuses, lengths, csi);
+
+ if (n_deq == 0)
+ goto refill;
+
+ q->n_compl_sqes += n_deq;
+
+ maybe_chained = ctx.st_and.first && ctx.st_and.last ? 0 : 1;
+
+ next_frame =
+ vlib_get_next_frame_internal (vm, node, next_index, /* new frame */ 1);
+ bi = vlib_frame_vector_args (next_frame);
+
+ /* move buffer indices from the ring */
+ for (u32 i = 0; i < n_deq; i++)
+ {
+ u32 slot = csi[i];
+ bi[i] = ctx.sq_buffer_indices[slot];
+ ctx.sq_buffer_indices[slot] = VLIB_BUFFER_INVALID_INDEX;
+ }
+
+ vlib_get_buffers (vm, bi, buffers, n_deq);
+
+ if (PREDICT_FALSE (maybe_chained))
+ ena_device_input_status_to_flags (statuses, flags, n_deq, head_bmp, 1);
+ else
+ ena_device_input_status_to_flags (statuses, flags, n_deq, head_bmp, 0);
+
+ for (b = buffers, l = lengths, f = flags, n_left = n_deq; n_left >= 8;
+ b += 4, f += 4, l += 4, n_left -= 4)
+ {
+ clib_prefetch_store (b[4]);
+ clib_prefetch_store (b[5]);
+ clib_prefetch_store (b[6]);
+ clib_prefetch_store (b[7]);
+ b[0]->template = bt;
+ n_rx_bytes += b[0]->current_length = l[0];
+ b[0]->flags = f[0];
+ b[1]->template = bt;
+ n_rx_bytes += b[1]->current_length = l[1];
+ b[1]->flags = f[1];
+ b[2]->template = bt;
+ n_rx_bytes += b[2]->current_length = l[2];
+ b[2]->flags = f[2];
+ b[3]->template = bt;
+ n_rx_bytes += b[3]->current_length = l[3];
+ b[3]->flags = f[3];
+ }
+
+ for (; n_left > 0; b += 1, f += 1, l += 1, n_left -= 1)
+ {
+ b[0]->template = bt;
+ n_rx_bytes += b[0]->current_length = l[0];
+ b[0]->flags = f[0];
+ }
+
+ if (maybe_chained)
+ {
+ vlib_buffer_t *hb = 0;
+ vlib_frame_bitmap_t tail_buf_bmp = {};
+ u32 i, total_len = 0, head_flags = 0, tail_flags = 0;
+ n_rx_packets = vlib_frame_bitmap_count_set_bits (head_bmp);
+
+ vlib_frame_bitmap_init (tail_buf_bmp, n_deq);
+ vlib_frame_bitmap_xor (tail_buf_bmp, head_bmp);
+
+ foreach_vlib_frame_bitmap_set_bit_index (i, tail_buf_bmp)
+ {
+ vlib_buffer_t *pb = buffers[i - 1];
+ /* only store opertations here */
+ pb->next_buffer = bi[i];
+ if (vlib_frame_bitmap_is_bit_set (tail_buf_bmp, i - 1) == 0)
+ {
+ if (hb)
+ {
+ hb->total_length_not_including_first_buffer = total_len;
+ /* tail descriptor contains protocol info so we need to
+ * combine head and tail buffer flags */
+ hb->flags = head_flags | tail_flags;
+ }
+ head_flags = flags[i - 1];
+ total_len = 0;
+ hb = pb;
+ }
+ total_len += lengths[i];
+ tail_flags = flags[i];
+ }
+
+ hb->total_length_not_including_first_buffer = total_len;
+ hb->flags = head_flags | tail_flags;
+ }
+ else
+ n_rx_packets = n_deq;
+
+ /* packet tracing */
+ if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node))))
+ {
+ u32 i;
+ if (!maybe_chained)
+ vlib_frame_bitmap_init (head_bmp, n_deq);
+ foreach_vlib_frame_bitmap_set_bit_index (i, head_bmp)
+ {
+ vlib_buffer_t *b = buffers[i];
+ if (vlib_trace_buffer (vm, node, next_index, b, 0))
+ {
+ u32 j = i;
+ ena_rx_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr));
+ tr->next_index = next_index;
+ tr->qid = rxq->queue_id;
+ tr->hw_if_index = hw_if_index;
+ tr->n_desc = 1;
+ tr->length = lengths[i];
+ tr->req_id = csi[i];
+ tr->status = statuses[i];
+ while (statuses[j].last == 0)
+ {
+ j++;
+ tr->n_desc++;
+ tr->length += lengths[j];
+ }
+ tr->status = statuses[j];
+
+ if (-n_trace)
+ goto trace_done;
+ }
+ }
+ trace_done:
+ vlib_set_trace_count (vm, node, n_trace);
+ }
+
+ if (PREDICT_FALSE (maybe_chained))
+ clib_compress_u32 (bi, bi, head_bmp, n_deq);
+
+ if (PREDICT_TRUE (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT))
+ {
+ ethernet_input_frame_t *ef;
+ next_frame->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+ ef = vlib_frame_scalar_args (next_frame);
+ ef->sw_if_index = sw_if_index;
+ ef->hw_if_index = hw_if_index;
+
+ if (ctx.st_or.l3_csum_err == 0)
+ next_frame->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK;
+ vlib_frame_no_append (next_frame);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, VLIB_FRAME_SIZE - n_rx_packets);
+
+ vlib_increment_combined_counter (
+ vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ vm->thread_index, hw_if_index, n_rx_packets, n_rx_bytes);
+
+ q->cq_next = cq_next + n_deq;
+
+refill:
+ if (rxq->port->dev->va_dma)
+ ena_device_input_refill (vm, &ctx, rxq, 1);
+ else
+ ena_device_input_refill (vm, &ctx, rxq, 0);
+
+ return n_rx_packets;
+}
+
+VNET_DEV_NODE_FN (ena_rx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ u32 n_rx = 0;
+ foreach_vnet_dev_rx_queue_runtime (rxq, node)
+ n_rx += ena_device_input_inline (vm, node, rxq);
+ return n_rx;
+}
diff --git a/src/plugins/dev_ena/tx_node.c b/src/plugins/dev_ena/tx_node.c
new file mode 100644
index 00000000000..ae1b852c036
--- /dev/null
+++ b/src/plugins/dev_ena/tx_node.c
@@ -0,0 +1,514 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <dev_ena/ena.h>
+#include <vnet/ethernet/ethernet.h>
+#include <dev_ena/ena.h>
+#include <dev_ena/ena_inlines.h>
+
+#define ENA_TX_ENQ_BATCH_SZ 64
+#define ENA_MAX_LOG2_TXQ_SIZE 11
+#define ENA_TX_MAX_TAIL_LEN 5
+
+typedef struct
+{
+ u32 n_bytes;
+ ena_device_t *ed;
+ u16 n_desc;
+ u32 mask;
+ u16 n_packets_left;
+ u16 n_free_slots;
+ u32 *from;
+ u32 *sq_buffer_indices;
+ u32 tmp_bi[VLIB_FRAME_SIZE];
+ ena_tx_desc_t *sqes;
+ u64 *sqe_templates;
+ u16 n_dropped_chain_too_long;
+ u8 llq;
+ void *bd;
+} ena_tx_ctx_t;
+
+/* bits inside req_id which represent SQE index */
+static const u16 reqid_sqe_idx_mask = (1U << ENA_MAX_LOG2_TXQ_SIZE) - 1;
+
+static_always_inline void
+ena_txq_adv_sq_tail (ena_tx_ctx_t *ctx, ena_txq_t *eq)
+{
+ /* CQEs can arrive out of order, so we cannot blindly advance SQ tail for
+ * number of free slots, instead we need to check if slot contains invalid
+ * buffer index */
+
+ u32 sq_head = eq->sq_head;
+ u32 sq_tail = eq->sq_tail;
+ u16 n, offset = sq_tail & ctx->mask;
+ u32 *bi = ctx->sq_buffer_indices + offset;
+ u16 n_to_check = clib_min (sq_head - sq_tail, ctx->n_desc - offset);
+
+advance_sq_tail:
+ n = n_to_check;
+
+#ifdef CLIB_HAVE_VEC256
+ for (; n >= 8; n -= 8, bi += 8)
+ if (!u32x8_is_all_equal (*(u32x8u *) bi, VLIB_BUFFER_INVALID_INDEX))
+ break;
+#elif defined(CLIB_HAVE_VEC128)
+ for (; n >= 4; n -= 4, bi += 4)
+ if (!u32x4_is_all_equal (*(u32x4u *) bi, VLIB_BUFFER_INVALID_INDEX))
+ break;
+#endif
+
+ for (; n > 0; n -= 1, bi += 1)
+ if (bi[0] != VLIB_BUFFER_INVALID_INDEX)
+ break;
+
+ sq_tail += n_to_check - n;
+
+ if (n == 0 && sq_tail < sq_head)
+ {
+ n_to_check = sq_head - sq_tail;
+ bi = ctx->sq_buffer_indices;
+ goto advance_sq_tail;
+ }
+
+ eq->sq_tail = sq_tail;
+}
+
+static_always_inline void
+ena_txq_deq (vlib_main_t *vm, ena_tx_ctx_t *ctx, ena_txq_t *txq)
+{
+ /* dequeue CQ, extract SQ slot and number of chained buffers from
+ * req_id, move completed buffer indices to temp array */
+ const ena_tx_cdesc_t mask_phase = { .phase = 1 };
+ ena_tx_cdesc_t *cqes = txq->cqes, *cd, match_phase = {};
+ u32 cq_next = txq->cq_next;
+ u32 offset, n = 0;
+ u32 n_to_check;
+ u32 *buffers_to_free = ctx->tmp_bi;
+ u32 n_buffers_to_free = 0;
+
+ offset = cq_next & ctx->mask;
+ cd = cqes + offset;
+ n_to_check = ctx->n_desc - offset;
+ match_phase.phase = ~(cq_next & (ctx->n_desc << 1)) != 0;
+
+#ifdef CLIB_HAVE_VEC256
+ const u16 reqid_nic1 = 1U << ENA_MAX_LOG2_TXQ_SIZE;
+ const ena_tx_cdesc_t mask_reqid = { .req_id = reqid_sqe_idx_mask },
+ match_ph0_nic1 = { .req_id = reqid_nic1, .phase = 0 },
+ match_ph1_nic1 = { .req_id = reqid_nic1, .phase = 1 },
+ mask_ph_nic = { .req_id = ~reqid_sqe_idx_mask,
+ .phase = 1 };
+ /* both phase and req_id are in lower 32 bits */
+ u32x8 mask_ph_nic_x8 = u32x8_splat (mask_ph_nic.as_u64);
+ u32x8 mask_reqid_x8 = u32x8_splat (mask_reqid.as_u64);
+ u32x8 match_ph_nic1_x8 = u32x8_splat (
+ match_phase.phase ? match_ph1_nic1.as_u64 : match_ph0_nic1.as_u64);
+ u32x8 buf_inv_idx_x8 = u32x8_splat (VLIB_BUFFER_INVALID_INDEX);
+#endif
+
+more:
+ while (n < n_to_check)
+ {
+ u16 req_id, n_in_chain;
+
+#ifdef CLIB_HAVE_VEC256
+ while (n + 7 < n_to_check)
+ {
+ u32x8 r, v;
+
+ /* load lower 32-bits of 8 CQEs in 256-bit register */
+ r = u32x8_shuffle2 (*(u32x8u *) cd, *(u32x8u *) (cd + 4), 0, 2, 4, 6,
+ 8, 10, 12, 14);
+
+ /* check if all 8 CQEs are completed and there is no chained bufs */
+ if (u32x8_is_equal (r & mask_ph_nic_x8, match_ph_nic1_x8) == 0)
+ goto one_by_one;
+
+ r &= mask_reqid_x8;
+
+ /* take consumed buffer indices from ring */
+ v = u32x8_gather_u32 (ctx->sq_buffer_indices, r,
+ sizeof (ctx->sq_buffer_indices[0]));
+ u32x8_scatter_u32 (ctx->sq_buffer_indices, r, buf_inv_idx_x8,
+ sizeof (ctx->sq_buffer_indices[0]));
+ *(u32x8u *) (buffers_to_free + n_buffers_to_free) = v;
+ n_buffers_to_free += 8;
+
+ n += 8;
+ cd += 8;
+ continue;
+ }
+ one_by_one:
+#endif
+
+ if ((cd->as_u64 & mask_phase.as_u64) != match_phase.as_u64)
+ goto done;
+
+ req_id = cd->req_id;
+ n_in_chain = req_id >> ENA_MAX_LOG2_TXQ_SIZE;
+ req_id &= reqid_sqe_idx_mask;
+
+ buffers_to_free[n_buffers_to_free++] = ctx->sq_buffer_indices[req_id];
+ ctx->sq_buffer_indices[req_id] = VLIB_BUFFER_INVALID_INDEX;
+
+ if (PREDICT_FALSE (n_in_chain > 1))
+ while (n_in_chain-- > 1)
+ {
+ req_id = (req_id + 1) & ctx->mask;
+ buffers_to_free[n_buffers_to_free++] =
+ ctx->sq_buffer_indices[req_id];
+ ctx->sq_buffer_indices[req_id] = VLIB_BUFFER_INVALID_INDEX;
+ }
+
+ n++;
+ cd++;
+ }
+
+ if (PREDICT_FALSE (n == n_to_check))
+ {
+ cq_next += n;
+ n = 0;
+ cd = cqes;
+ match_phase.phase ^= 1;
+#ifdef CLIB_HAVE_VEC256
+ match_ph_nic1_x8 ^= u32x8_splat (mask_phase.as_u64);
+#endif
+ n_to_check = ctx->n_desc;
+ goto more;
+ }
+
+done:
+
+ if (n_buffers_to_free)
+ {
+ cq_next += n;
+
+ /* part two - free buffers stored in temporary array */
+ vlib_buffer_free_no_next (vm, buffers_to_free, n_buffers_to_free);
+ txq->cq_next = cq_next;
+
+ ena_txq_adv_sq_tail (ctx, txq);
+ }
+}
+
+static_always_inline u16
+ena_txq_wr_sqe (vlib_main_t *vm, vlib_buffer_t *b, int use_iova,
+ ena_tx_desc_t *dp, u32 n_in_chain, ena_tx_desc_t desc)
+{
+ uword dma_addr = use_iova ? vlib_buffer_get_current_va (b) :
+ vlib_buffer_get_current_pa (vm, b);
+ u16 len = b->current_length;
+
+ desc.req_id_hi = n_in_chain << (ENA_MAX_LOG2_TXQ_SIZE - 10);
+ desc.as_u16x8[0] = len;
+ ASSERT (dma_addr < 0xffffffffffff); /* > 48bit - should never happen */
+ desc.as_u64x2[1] = dma_addr; /* this also overwrites header_length */
+
+ /* write descriptor as single 128-bit store */
+ dp->as_u64x2 = desc.as_u64x2;
+ return len;
+}
+
+static_always_inline void
+ena_txq_copy_sqes (ena_tx_ctx_t *ctx, u32 off, ena_tx_desc_t *s, u32 n_desc)
+{
+ const u64 temp_phase_xor = (ena_tx_desc_t){ .phase = 1 }.as_u64x2[0];
+ u32 n = 0;
+
+ if (ctx->llq)
+ {
+ ena_tx_llq_desc128_t *llq_descs = (ena_tx_llq_desc128_t *) ctx->sqes;
+ for (; n < n_desc; n += 1, s += 1, off += 1)
+ {
+ ena_tx_llq_desc128_t td = {};
+ u64 t = ctx->sqe_templates[off];
+ u64x2 v = { t, 0 };
+ ctx->sqe_templates[off] = t ^ temp_phase_xor;
+ td.desc[0].as_u64x2 = v | s->as_u64x2;
+ td.desc[0].phase = 1;
+ td.desc[0].header_length = 96;
+ td.desc[0].length -= 96;
+ td.desc[0].buff_addr_lo += 96;
+ vlib_buffer_t *b =
+ vlib_get_buffer (vlib_get_main (), ctx->sq_buffer_indices[off]);
+ clib_memcpy_fast (td.data, vlib_buffer_get_current (b), 96);
+ fformat (stderr, "%U\n", format_hexdump_u32, &td, 32);
+ fformat (stderr, "%U\n", format_ena_tx_desc, &td);
+ clib_memcpy_fast (llq_descs + off, &td, 128);
+ }
+ return;
+ }
+
+#ifdef CLIB_HAVE_VEC512
+ u64x8 temp_phase_xor_x8 = u64x8_splat (temp_phase_xor);
+ for (; n + 7 < n_desc; n += 8, s += 8, off += 8)
+ {
+ u64x8 t8 = *(u64x8u *) (ctx->sqe_templates + off);
+ *(u64x8u *) (ctx->sqe_templates + off) = t8 ^ temp_phase_xor_x8;
+ u64x8 r0 = *(u64x8u *) s;
+ u64x8 r1 = *(u64x8u *) (s + 4);
+ r0 |= u64x8_shuffle2 (t8, (u64x8){}, 0, 9, 1, 11, 2, 13, 3, 15);
+ r1 |= u64x8_shuffle2 (t8, (u64x8){}, 4, 9, 5, 11, 6, 13, 7, 15);
+ *((u64x8u *) (ctx->sqes + off)) = r0;
+ *((u64x8u *) (ctx->sqes + off + 4)) = r1;
+ }
+#elif defined(CLIB_HAVE_VEC256)
+ u64x4 temp_phase_xor_x4 = u64x4_splat (temp_phase_xor);
+ for (; n + 3 < n_desc; n += 4, s += 4, off += 4)
+ {
+ u64x4 t4 = *(u64x4u *) (ctx->sqe_templates + off);
+ *(u64x4u *) (ctx->sqe_templates + off) = t4 ^ temp_phase_xor_x4;
+ u64x4 r0 = *(u64x4u *) s;
+ u64x4 r1 = *(u64x4u *) (s + 2);
+ r0 |= u64x4_shuffle2 (t4, (u64x4){}, 0, 5, 1, 7);
+ r1 |= u64x4_shuffle2 (t4, (u64x4){}, 2, 5, 3, 7);
+ *((u64x4u *) (ctx->sqes + off)) = r0;
+ *((u64x4u *) (ctx->sqes + off + 2)) = r1;
+ }
+#endif
+
+ for (; n < n_desc; n += 1, s += 1, off += 1)
+ {
+ u64 t = ctx->sqe_templates[off];
+ u64x2 v = { t, 0 };
+ ctx->sqe_templates[off] = t ^ temp_phase_xor;
+ ctx->sqes[off].as_u64x2 = v | s->as_u64x2;
+ }
+}
+
+static_always_inline u32
+ena_txq_enq_one (vlib_main_t *vm, ena_tx_ctx_t *ctx, vlib_buffer_t *b0,
+ ena_tx_desc_t *d, u16 n_free_desc, u32 *f, int use_iova)
+{
+ const ena_tx_desc_t single = { .first = 1, .last = 1 };
+ vlib_buffer_t *b;
+ u32 i, n;
+
+ /* non-chained buffer */
+ if ((b0->flags & VLIB_BUFFER_NEXT_PRESENT) == 0)
+ {
+ ctx->n_bytes += ena_txq_wr_sqe (vm, b0, use_iova, d, 1, single);
+ f[0] = ctx->from[0];
+ ctx->from += 1;
+ ctx->n_packets_left -= 1;
+ return 1;
+ }
+
+ /* count number of buffers in chain */
+ for (n = 1, b = b0; b->flags & VLIB_BUFFER_NEXT_PRESENT; n++)
+ b = vlib_get_buffer (vm, b->next_buffer);
+
+ /* if chain is too long, drop packet */
+ if (n > ENA_TX_MAX_TAIL_LEN + 1)
+ {
+ vlib_buffer_free_one (vm, ctx->from[0]);
+ ctx->from += 1;
+ ctx->n_packets_left -= 1;
+ ctx->n_dropped_chain_too_long++;
+ return 0;
+ }
+
+ /* no enough descriptors to accomodate? */
+ if (n > n_free_desc)
+ return 0;
+
+ /* first */
+ f++[0] = ctx->from[0];
+ ctx->from += 1;
+ ctx->n_packets_left -= 1;
+ ctx->n_bytes +=
+ ena_txq_wr_sqe (vm, b0, use_iova, d++, n, (ena_tx_desc_t){ .first = 1 });
+
+ /* mid */
+ for (i = 1, b = b0; i < n - 1; i++)
+ {
+ f++[0] = b->next_buffer;
+ b = vlib_get_buffer (vm, b->next_buffer);
+ ctx->n_bytes +=
+ ena_txq_wr_sqe (vm, b, use_iova, d++, 0, (ena_tx_desc_t){});
+ }
+
+ /* last */
+ f[0] = b->next_buffer;
+ b = vlib_get_buffer (vm, b->next_buffer);
+ ctx->n_bytes +=
+ ena_txq_wr_sqe (vm, b, use_iova, d, 0, (ena_tx_desc_t){ .last = 1 });
+
+ return n;
+}
+
+static_always_inline uword
+ena_txq_enq (vlib_main_t *vm, ena_tx_ctx_t *ctx, ena_txq_t *txq, int use_iova)
+{
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ u32 *f = ctx->tmp_bi;
+ ena_tx_desc_t desc[ENA_TX_ENQ_BATCH_SZ], *d = desc;
+ const ena_tx_desc_t single = { .first = 1, .last = 1 };
+ u32 n_desc_left, n;
+
+ if (ctx->n_packets_left == 0)
+ return 0;
+
+ if (ctx->n_free_slots == 0)
+ return 0;
+
+ n_desc_left = clib_min (ENA_TX_ENQ_BATCH_SZ, ctx->n_free_slots);
+
+ while (n_desc_left >= 4 && ctx->n_packets_left >= 8)
+ {
+ clib_prefetch_load (vlib_get_buffer (vm, ctx->from[4]));
+ b0 = vlib_get_buffer (vm, ctx->from[0]);
+ clib_prefetch_load (vlib_get_buffer (vm, ctx->from[5]));
+ b1 = vlib_get_buffer (vm, ctx->from[1]);
+ clib_prefetch_load (vlib_get_buffer (vm, ctx->from[6]));
+ b2 = vlib_get_buffer (vm, ctx->from[2]);
+ clib_prefetch_load (vlib_get_buffer (vm, ctx->from[7]));
+ b3 = vlib_get_buffer (vm, ctx->from[3]);
+
+ if (PREDICT_FALSE (((b0->flags | b1->flags | b2->flags | b3->flags) &
+ VLIB_BUFFER_NEXT_PRESENT) == 0))
+ {
+ ctx->n_bytes += ena_txq_wr_sqe (vm, b0, use_iova, d++, 1, single);
+ ctx->n_bytes += ena_txq_wr_sqe (vm, b1, use_iova, d++, 1, single);
+ ctx->n_bytes += ena_txq_wr_sqe (vm, b2, use_iova, d++, 1, single);
+ ctx->n_bytes += ena_txq_wr_sqe (vm, b3, use_iova, d++, 1, single);
+ vlib_buffer_copy_indices (f, ctx->from, 4);
+ ctx->from += 4;
+ ctx->n_packets_left -= 4;
+
+ n_desc_left -= 4;
+ f += 4;
+ }
+ else
+ {
+ n = ena_txq_enq_one (vm, ctx, b0, d, n_desc_left, f, use_iova);
+ if (n == 0)
+ break;
+ n_desc_left -= n;
+ f += n;
+ d += n;
+ }
+ }
+
+ while (n_desc_left > 0 && ctx->n_packets_left > 0)
+ {
+ vlib_buffer_t *b0;
+
+ b0 = vlib_get_buffer (vm, ctx->from[0]);
+ n = ena_txq_enq_one (vm, ctx, b0, d, n_desc_left, f, use_iova);
+ if (n == 0)
+ break;
+ n_desc_left -= n;
+ f += n;
+ d += n;
+ }
+
+ n = d - desc;
+
+ if (n)
+ {
+ u32 head = txq->sq_head;
+ u32 offset = head & ctx->mask;
+ u32 n_before_wrap = ctx->n_desc - offset;
+ u32 n_copy;
+
+ d = desc;
+ f = ctx->tmp_bi;
+
+ if (n_before_wrap >= n)
+ {
+ n_copy = n;
+ vlib_buffer_copy_indices (ctx->sq_buffer_indices + offset, f,
+ n_copy);
+ ena_txq_copy_sqes (ctx, offset, d, n_copy);
+ }
+ else
+ {
+ n_copy = n_before_wrap;
+ vlib_buffer_copy_indices (ctx->sq_buffer_indices + offset, f,
+ n_copy);
+ ena_txq_copy_sqes (ctx, offset, d, n_copy);
+
+ n_copy = n - n_before_wrap;
+ vlib_buffer_copy_indices (ctx->sq_buffer_indices, f + n_before_wrap,
+ n_copy);
+ ena_txq_copy_sqes (ctx, 0, d + n_before_wrap, n_copy);
+ }
+
+ head += n;
+ __atomic_store_n (txq->sq_db, head, __ATOMIC_RELEASE);
+ txq->sq_head = head;
+ ctx->n_free_slots -= n;
+
+ return n;
+ }
+ return 0;
+}
+
+VNET_DEV_NODE_FN (ena_tx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ vnet_dev_tx_node_runtime_t *tnr = vnet_dev_get_tx_node_runtime (node);
+ vnet_dev_tx_queue_t *txq = tnr->tx_queue;
+ vnet_dev_t *dev = txq->port->dev;
+ ena_device_t *ed = vnet_dev_get_data (dev);
+ ena_txq_t *eq = vnet_dev_get_tx_queue_data (txq);
+ u32 n_pkts = 0;
+
+ ena_tx_ctx_t ctx = { .mask = txq->size - 1,
+ .n_desc = txq->size,
+ .n_packets_left = frame->n_vectors,
+ .from = vlib_frame_vector_args (frame),
+ .sqe_templates = eq->sqe_templates,
+ .sqes = eq->sqes,
+ .sq_buffer_indices = eq->buffer_indices,
+ .llq = ed->llq };
+
+ vnet_dev_tx_queue_lock_if_needed (txq);
+
+ /* try 3 times to enquee packets by first freeing consumed from the ring
+ * and then trying to enqueue as much as possible */
+ for (int i = 0; i < 3; i++)
+ {
+ /* free buffers consumed by ENA */
+ if (eq->sq_head != eq->sq_tail)
+ ena_txq_deq (vm, &ctx, eq);
+
+ /* enqueue new buffers, try until last attempt enqueues 0 packets */
+ ctx.n_free_slots = ctx.n_desc - (eq->sq_head - eq->sq_tail);
+
+ if (dev->va_dma)
+ while (ena_txq_enq (vm, &ctx, eq, /* va */ 1) > 0)
+ ;
+ else
+ while (ena_txq_enq (vm, &ctx, eq, /* va */ 0) > 0)
+ ;
+
+ if (ctx.n_packets_left == 0)
+ break;
+ }
+
+ vnet_dev_tx_queue_unlock_if_needed (txq);
+
+ if (ctx.n_dropped_chain_too_long)
+ vlib_error_count (vm, node->node_index, ENA_TX_NODE_CTR_CHAIN_TOO_LONG,
+ ctx.n_dropped_chain_too_long);
+
+ n_pkts = frame->n_vectors - ctx.n_packets_left;
+ vlib_increment_combined_counter (
+ vnet_get_main ()->interface_main.combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_TX,
+ vm->thread_index, tnr->hw_if_index, n_pkts, ctx.n_bytes);
+
+ if (ctx.n_packets_left)
+ {
+ vlib_buffer_free (vm, ctx.from, ctx.n_packets_left);
+ vlib_error_count (vm, node->node_index, ENA_TX_NODE_CTR_NO_FREE_SLOTS,
+ ctx.n_packets_left);
+ }
+
+ return n_pkts;
+}
diff --git a/src/plugins/dev_iavf/CMakeLists.txt b/src/plugins/dev_iavf/CMakeLists.txt
new file mode 100644
index 00000000000..8fa89b7a677
--- /dev/null
+++ b/src/plugins/dev_iavf/CMakeLists.txt
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright(c) 2022 Cisco Systems, Inc.
+
+add_vpp_plugin(dev_iavf
+ SOURCES
+ adminq.c
+ counters.c
+ format.c
+ iavf.c
+ port.c
+ queue.c
+ rx_node.c
+ tx_node.c
+ virtchnl.c
+
+ MULTIARCH_SOURCES
+ rx_node.c
+ tx_node.c
+)
+
diff --git a/src/plugins/dev_iavf/adminq.c b/src/plugins/dev_iavf/adminq.c
new file mode 100644
index 00000000000..c12dc8aa2f6
--- /dev/null
+++ b/src/plugins/dev_iavf/adminq.c
@@ -0,0 +1,485 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <ctype.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/iavf_regs.h>
+#include <dev_iavf/virtchnl.h>
+#include <vnet/ethernet/ethernet.h>
+
+#define IIAVF_AQ_LARGE_BUF 512
+#define IIAVF_AQ_ATQ_LEN 4
+#define IIAVF_AQ_ARQ_LEN 16
+
+VLIB_REGISTER_LOG_CLASS (iavf_log, static) = {
+ .class_name = "iavf",
+ .subclass_name = "adminq",
+};
+
+struct iavf_adminq_dma_mem
+{
+ iavf_aq_desc_t atq[IIAVF_AQ_ATQ_LEN];
+ iavf_aq_desc_t arq[IIAVF_AQ_ARQ_LEN];
+ struct
+ {
+ u8 data[IIAVF_AQ_BUF_SIZE];
+ } atq_bufs[IIAVF_AQ_ATQ_LEN];
+ struct
+ {
+ u8 data[IIAVF_AQ_BUF_SIZE];
+ } arq_bufs[IIAVF_AQ_ARQ_LEN];
+};
+
+static const iavf_dyn_ctl dyn_ctl0_disable = {
+ .itr_indx = 3,
+};
+
+static const iavf_dyn_ctl dyn_ctl0_enable = {
+ .intena = 1,
+ .clearpba = 1,
+ .itr_indx = 3,
+};
+
+static const iavf_vfint_icr0_ena1 icr0_ena1_aq_enable = {
+ .adminq = 1,
+};
+
+static inline void
+iavf_irq_0_disable (iavf_device_t *ad)
+{
+ iavf_reg_write (ad, IAVF_VFINT_ICR0_ENA1, 0);
+ iavf_reg_write (ad, IAVF_VFINT_DYN_CTL0, dyn_ctl0_disable.as_u32);
+ iavf_reg_flush (ad);
+}
+
+static inline void
+iavf_irq_0_enable (iavf_device_t *ad)
+{
+ iavf_reg_write (ad, IAVF_VFINT_ICR0_ENA1, icr0_ena1_aq_enable.as_u32);
+ iavf_reg_write (ad, IAVF_VFINT_DYN_CTL0, dyn_ctl0_enable.as_u32);
+ iavf_reg_flush (ad);
+}
+
+static_always_inline int
+iavf_aq_desc_is_done (iavf_aq_desc_t *d)
+{
+ iavf_aq_desc_flags_t flags;
+ flags.as_u16 = __atomic_load_n (&d->flags.as_u16, __ATOMIC_ACQUIRE);
+ return flags.dd;
+}
+
+static u8 *
+format_iavf_aq_desc_flags (u8 *s, va_list *args)
+{
+ iavf_aq_desc_flags_t f = va_arg (*args, iavf_aq_desc_flags_t);
+ int i = 0;
+
+#define _(n, v) \
+ if (f.v) \
+ { \
+ char str[] = #v, *sp = str; \
+ if (i++) \
+ { \
+ vec_add1 (s, ','); \
+ vec_add1 (s, ' '); \
+ } \
+ while (sp[0]) \
+ vec_add1 (s, (u8) toupper (sp++[0])); \
+ }
+ foreach_iavf_aq_desc_flag
+#undef _
+ return s;
+}
+
+static u8 *
+format_iavf_aq_desc_retval (u8 *s, va_list *args)
+{
+ iavf_aq_desc_retval_t rv = va_arg (*args, u32);
+
+ char *retvals[] = {
+#define _(a, b) [a] = #b,
+ foreach_iavf_aq_desc_retval
+#undef _
+ };
+
+ if (rv >= ARRAY_LEN (retvals) || retvals[rv] == 0)
+ return format (s, "UNKNOWN(%d)", rv);
+
+ return format (s, "%s", retvals[rv]);
+}
+
+static u8 *
+format_iavf_aq_desc (u8 *s, va_list *args)
+{
+ iavf_aq_desc_t *d = va_arg (*args, iavf_aq_desc_t *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "opcode 0x%04x datalen %u retval %U (%u) flags %U", d->opcode,
+ d->datalen, format_iavf_aq_desc_retval, d->retval, d->retval,
+ format_iavf_aq_desc_flags, d->flags);
+
+ if (d->opcode == IIAVF_AQ_DESC_OP_SEND_TO_PF ||
+ d->opcode == IIAVF_AQ_DESC_OP_MESSAGE_FROM_PF)
+ {
+ s =
+ format (s, "\n%Uv_opcode %U (%u) v_retval %U (%d) buf_dma_addr 0x%lx",
+ format_white_space, indent, format_virtchnl_op_name,
+ d->v_opcode, d->v_opcode, format_virtchnl_status, d->v_retval,
+ d->v_retval, (uword) d->param2 << 32 | d->param3);
+ }
+ else
+ {
+ s = format (
+ s, "\n%Ucookie_hi 0x%x cookie_lo 0x%x params %08x %08x %08x %08x",
+ format_white_space, indent, d->cookie_hi, d->cookie_lo, d->param0,
+ d->param1, d->param2, d->param3);
+ }
+ return s;
+}
+
+vnet_dev_rv_t
+iavf_aq_alloc (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ return vnet_dev_dma_mem_alloc (vm, dev, sizeof (iavf_adminq_dma_mem_t), 0,
+ (void **) &ad->aq_mem);
+}
+
+void
+iavf_aq_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ vnet_dev_dma_mem_free (vm, dev, ad->aq_mem);
+}
+
+static void
+iavf_aq_arq_slot_init (vlib_main_t *vm, vnet_dev_t *dev, u16 slot)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ u64 pa = vnet_dev_get_dma_addr (vm, dev, ad->aq_mem->arq_bufs + slot);
+ ad->aq_mem->arq[slot] = (iavf_aq_desc_t){
+ .flags.buf = 1,
+ .flags.lb = IIAVF_AQ_BUF_SIZE > IIAVF_AQ_LARGE_BUF,
+ .datalen = sizeof (ad->aq_mem->arq_bufs[0].data),
+ .addr_hi = (u32) (pa >> 32),
+ .addr_lo = (u32) pa,
+ };
+}
+
+static void
+iavf_aq_poll (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ iavf_aq_desc_t *d;
+ u8 *b;
+
+ while (iavf_aq_arq_next_acq (vm, dev, &d, &b, 0))
+ {
+
+ log_debug (dev, "poll[%u] flags %x %U op %u v_op %u", ad->arq_next_slot,
+ d->flags.as_u16, format_iavf_aq_desc_flags, d->flags,
+ d->opcode, d->v_opcode);
+ if ((d->datalen != sizeof (virtchnl_pf_event_t)) ||
+ ((d->flags.buf) == 0))
+ {
+ log_err (dev, "event message error");
+ }
+
+ vec_add1 (ad->events, *(virtchnl_pf_event_t *) b);
+ iavf_aq_arq_next_rel (vm, dev);
+ }
+
+ if (vec_len (ad->events))
+ {
+ virtchnl_pf_event_t *e;
+ char *virtchnl_event_names[] = {
+#define _(v, n) [v] = #n,
+ foreach_virtchnl_event_code
+#undef _
+ };
+
+ vec_foreach (e, ad->events)
+ {
+ log_debug (dev, "event %s (%u) sev %d",
+ virtchnl_event_names[e->event], e->event, e->severity);
+
+ if (e->event == VIRTCHNL_EVENT_LINK_CHANGE)
+ {
+ vnet_dev_port_state_changes_t changes = {};
+ vnet_dev_port_t *port = vnet_dev_get_port_by_id (dev, 0);
+
+ if (port)
+ {
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ int link_up;
+ u32 speed = 0;
+
+ if (ap->vf_cap_flags & VIRTCHNL_VF_CAP_ADV_LINK_SPEED)
+ {
+ link_up = e->event_data.link_event_adv.link_status;
+ speed = e->event_data.link_event_adv.link_speed;
+ }
+ else
+ {
+ const u32 speed_table[8] = { 100, 1000, 10000, 40000,
+ 20000, 25000, 2500, 5000 };
+
+ link_up = e->event_data.link_event.link_status;
+ speed = e->event_data.link_event.link_speed;
+
+ if (count_set_bits (speed) == 1 && speed &&
+ pow2_mask (8))
+ speed = speed_table[get_lowest_set_bit_index (speed)];
+ else
+ {
+ if (link_up)
+ log_warn (dev,
+ "unsupported link speed value "
+ "received (0x%x)",
+ speed);
+ speed = 0;
+ }
+ }
+
+ log_debug (dev, "LINK_CHANGE speed %u state %u", speed,
+ link_up);
+
+ if (port->link_up != link_up)
+ {
+ changes.change.link_state = 1;
+ changes.link_state = link_up;
+ log_debug (dev, "link state changed to %s",
+ link_up ? "up" : "down");
+ }
+
+ if (port->speed != speed * 1000)
+ {
+ changes.change.link_speed = 1;
+ changes.link_speed = speed * 1000;
+ log_debug (dev, "link speed changed to %u Mbps", speed);
+ }
+
+ if (changes.change.any)
+ vnet_dev_port_state_change (vm, port, changes);
+ }
+ }
+ }
+ vec_reset_length (ad->events);
+ }
+}
+
+static void
+iavf_adminq_msix_handler (vlib_main_t *vm, vnet_dev_t *dev, u16 line)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ iavf_reg_write (ad, IAVF_VFINT_DYN_CTL0, dyn_ctl0_enable.as_u32);
+ log_debug (dev, "MSI-X interrupt %u received", line);
+ vnet_dev_process_call_op_no_wait (vm, dev, iavf_aq_poll);
+}
+
+static void
+iavf_adminq_intx_handler (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_adminq_msix_handler (vm, dev, 0);
+}
+
+void
+iavf_aq_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ uword pa;
+ u32 len;
+
+ /* disable both tx and rx adminq queue */
+ iavf_reg_write (ad, IAVF_ATQLEN, 0);
+ iavf_reg_write (ad, IAVF_ARQLEN, 0);
+
+ len = IIAVF_AQ_ATQ_LEN;
+ pa = vnet_dev_get_dma_addr (vm, dev, &ad->aq_mem->atq);
+ iavf_reg_write (ad, IAVF_ATQT, 0); /* Tail */
+ iavf_reg_write (ad, IAVF_ATQH, 0); /* Head */
+ iavf_reg_write (ad, IAVF_ATQBAL, (u32) pa); /* Base Address Low */
+ iavf_reg_write (ad, IAVF_ATQBAH, (u32) (pa >> 32)); /* Base Address High */
+ iavf_reg_write (ad, IAVF_ATQLEN, len | (1ULL << 31)); /* len & ena */
+
+ len = IIAVF_AQ_ARQ_LEN;
+ pa = vnet_dev_get_dma_addr (vm, dev, ad->aq_mem->arq);
+ iavf_reg_write (ad, IAVF_ARQT, 0); /* Tail */
+ iavf_reg_write (ad, IAVF_ARQH, 0); /* Head */
+ iavf_reg_write (ad, IAVF_ARQBAL, (u32) pa); /* Base Address Low */
+ iavf_reg_write (ad, IAVF_ARQBAH, (u32) (pa >> 32)); /* Base Address High */
+ iavf_reg_write (ad, IAVF_ARQLEN, len | (1ULL << 31)); /* len & ena */
+
+ for (int i = 0; i < len; i++)
+ iavf_aq_arq_slot_init (vm, dev, i);
+ iavf_reg_write (ad, IAVF_ARQT, len - 1); /* Tail */
+
+ ad->atq_next_slot = 0;
+ ad->arq_next_slot = 0;
+ ad->adminq_active = 1;
+}
+
+void
+iavf_aq_poll_on (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+
+ vnet_dev_poll_dev_add (vm, dev, IIAVF_AQ_POLL_INTERVAL, iavf_aq_poll);
+
+ if (vnet_dev_get_pci_n_msix_interrupts (dev) > 0)
+ {
+ vnet_dev_pci_msix_add_handler (vm, dev, iavf_adminq_msix_handler, 0, 1);
+ vnet_dev_pci_msix_enable (vm, dev, 0, 1);
+ }
+ else
+ vnet_dev_pci_intx_add_handler (vm, dev, iavf_adminq_intx_handler);
+
+ iavf_irq_0_enable (ad);
+}
+
+void
+iavf_aq_poll_off (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+
+ iavf_irq_0_disable (ad);
+
+ vnet_dev_poll_dev_remove (vm, dev, iavf_aq_poll);
+
+ if (vnet_dev_get_pci_n_msix_interrupts (dev) > 0)
+ {
+ vnet_dev_pci_msix_disable (vm, dev, 0, 1);
+ vnet_dev_pci_msix_remove_handler (vm, dev, 0, 1);
+ }
+ else
+ vnet_dev_pci_intx_remove_handler (vm, dev);
+}
+
+vnet_dev_rv_t
+iavf_aq_atq_enq (vlib_main_t *vm, vnet_dev_t *dev, iavf_aq_desc_t *desc,
+ const u8 *data, u16 len, f64 timeout)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ iavf_aq_desc_t *d = ad->aq_mem->atq + ad->atq_next_slot;
+ u8 *buf = ad->aq_mem->atq_bufs[ad->atq_next_slot].data;
+
+ ASSERT (len <= IIAVF_AQ_BUF_SIZE);
+
+ *d = *desc;
+
+ if (len)
+ {
+ u64 pa = vnet_dev_get_dma_addr (vm, dev, buf);
+ d->datalen = len;
+ d->addr_hi = (u32) (pa >> 32);
+ d->addr_lo = (u32) pa;
+ d->flags.buf = 1;
+ d->flags.rd = 1;
+ d->flags.lb = len > IIAVF_AQ_LARGE_BUF;
+ clib_memcpy_fast (buf, data, len);
+ }
+
+ log_debug (dev, "slot %u\n %U", ad->atq_next_slot, format_iavf_aq_desc, d);
+
+ ad->atq_next_slot = (ad->atq_next_slot + 1) % IIAVF_AQ_ATQ_LEN;
+ iavf_reg_write (ad, IAVF_ATQT, ad->atq_next_slot);
+ iavf_reg_flush (ad);
+
+ if (timeout > 0)
+ {
+ f64 suspend_time = timeout / 62;
+ f64 t0 = vlib_time_now (vm);
+ iavf_aq_desc_flags_t flags;
+
+ while (1)
+ {
+ flags.as_u16 = __atomic_load_n (&d->flags.as_u16, __ATOMIC_ACQUIRE);
+
+ if (flags.err)
+ {
+ log_err (dev, "adminq enqueue error [opcode 0x%x, retval %d]",
+ d->opcode, d->retval);
+ return VNET_DEV_ERR_BUG;
+ }
+
+ if (flags.dd && flags.cmp)
+ return VNET_DEV_OK;
+
+ if (vlib_time_now (vm) - t0 > timeout)
+ {
+ log_err (dev, "adminq enqueue timeout [opcode 0x%x]", d->opcode);
+ return VNET_DEV_ERR_TIMEOUT;
+ }
+
+ vlib_process_suspend (vm, suspend_time);
+ suspend_time *= 2;
+ }
+ }
+
+ return VNET_DEV_OK;
+}
+
+void
+iavf_aq_deinit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ if (ad->adminq_active)
+ {
+ iavf_aq_desc_t d = {
+ .opcode = IIAVF_AQ_DESC_OP_QUEUE_SHUTDOWN,
+ .driver_unloading = 1,
+ .flags = { .si = 1 },
+ };
+ log_debug (dev, "adminq queue shutdown");
+ iavf_aq_atq_enq (vm, dev, &d, 0, 0, 0);
+ ad->adminq_active = 0;
+ }
+}
+
+int
+iavf_aq_arq_next_acq (vlib_main_t *vm, vnet_dev_t *dev, iavf_aq_desc_t **dp,
+ u8 **bp, f64 timeout)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ iavf_aq_desc_t *d = ad->aq_mem->arq + ad->arq_next_slot;
+
+ if (timeout)
+ {
+ f64 suspend_time = timeout / 62;
+ f64 t0 = vlib_time_now (vm);
+
+ while (!iavf_aq_desc_is_done (d))
+ {
+ if (vlib_time_now (vm) - t0 > timeout)
+ return 0;
+
+ vlib_process_suspend (vm, suspend_time);
+
+ suspend_time *= 2;
+ }
+ }
+ else if (!iavf_aq_desc_is_done (d))
+ return 0;
+
+ log_debug (dev, "arq desc acquired in slot %u\n %U", ad->arq_next_slot,
+ format_iavf_aq_desc, d);
+ *dp = d;
+ *bp = ad->aq_mem->arq_bufs[ad->arq_next_slot].data;
+ return 1;
+}
+
+void
+iavf_aq_arq_next_rel (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ ASSERT (iavf_aq_desc_is_done (ad->aq_mem->arq + ad->arq_next_slot));
+ iavf_aq_arq_slot_init (vm, dev, ad->arq_next_slot);
+ iavf_reg_write (ad, IAVF_ARQT, ad->arq_next_slot);
+ iavf_reg_flush (ad);
+ ad->arq_next_slot = (ad->arq_next_slot + 1) % IIAVF_AQ_ARQ_LEN;
+}
diff --git a/src/plugins/dev_iavf/counters.c b/src/plugins/dev_iavf/counters.c
new file mode 100644
index 00000000000..6dcd01141f0
--- /dev/null
+++ b/src/plugins/dev_iavf/counters.c
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/virtchnl.h>
+#include <dev_iavf/virtchnl_funcs.h>
+
+VLIB_REGISTER_LOG_CLASS (iavf_log, static) = {
+ .class_name = "iavf",
+ .subclass_name = "counters",
+};
+
+typedef enum
+{
+ IIAVF_PORT_CTR_RX_BYTES,
+ IIAVF_PORT_CTR_TX_BYTES,
+ IIAVF_PORT_CTR_RX_PACKETS,
+ IIAVF_PORT_CTR_TX_PACKETS,
+ IIAVF_PORT_CTR_RX_DROPS,
+ IIAVF_PORT_CTR_TX_DROPS,
+ IIAVF_PORT_CTR_RX_UCAST,
+ IIAVF_PORT_CTR_TX_UCAST,
+ IIAVF_PORT_CTR_RX_MCAST,
+ IIAVF_PORT_CTR_TX_MCAST,
+ IIAVF_PORT_CTR_RX_BCAST,
+ IIAVF_PORT_CTR_TX_BCAST,
+ IIAVF_PORT_CTR_RX_UNKNOWN_PROTOCOL,
+ IIAVF_PORT_CTR_TX_ERRORS,
+} iavf_port_counter_id_t;
+
+vnet_dev_counter_t iavf_port_counters[] = {
+ VNET_DEV_CTR_RX_BYTES (IIAVF_PORT_CTR_RX_BYTES),
+ VNET_DEV_CTR_RX_PACKETS (IIAVF_PORT_CTR_RX_PACKETS),
+ VNET_DEV_CTR_RX_DROPS (IIAVF_PORT_CTR_RX_DROPS),
+ VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_RX_UCAST, RX, PACKETS, "unicast"),
+ VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_RX_MCAST, RX, PACKETS, "multicast"),
+ VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_RX_BCAST, RX, PACKETS, "broadcast"),
+ VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_RX_UNKNOWN_PROTOCOL, RX, PACKETS,
+ "unknown protocol"),
+
+ VNET_DEV_CTR_TX_BYTES (IIAVF_PORT_CTR_TX_BYTES),
+ VNET_DEV_CTR_TX_PACKETS (IIAVF_PORT_CTR_TX_PACKETS),
+ VNET_DEV_CTR_TX_DROPS (IIAVF_PORT_CTR_TX_DROPS),
+ VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_TX_UCAST, TX, PACKETS, "unicast"),
+ VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_TX_MCAST, TX, PACKETS, "multicast"),
+ VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_TX_BCAST, TX, PACKETS, "broadcast"),
+ VNET_DEV_CTR_VENDOR (IIAVF_PORT_CTR_TX_ERRORS, TX, PACKETS, "errors"),
+};
+
+void
+iavf_port_add_counters (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_port_add_counters (vm, port, iavf_port_counters,
+ ARRAY_LEN (iavf_port_counters));
+}
+
+void
+iavf_port_poll_stats (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_rv_t rv;
+ vnet_dev_t *dev = port->dev;
+ virtchnl_eth_stats_t stats;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ virtchnl_queue_select_t qs = { .vsi_id = ap->vsi_id };
+
+ rv = iavf_vc_op_get_stats (vm, dev, &qs, &stats);
+
+ if (rv != VNET_DEV_OK)
+ return;
+
+ foreach_vnet_dev_counter (c, port->counter_main)
+ {
+ switch (c->user_data)
+ {
+ case IIAVF_PORT_CTR_RX_BYTES:
+ vnet_dev_counter_value_update (vm, c, stats.rx_bytes);
+ break;
+ case IIAVF_PORT_CTR_TX_BYTES:
+ vnet_dev_counter_value_update (vm, c, stats.tx_bytes);
+ break;
+ case IIAVF_PORT_CTR_RX_PACKETS:
+ vnet_dev_counter_value_update (
+ vm, c, stats.rx_unicast + stats.rx_broadcast + stats.rx_multicast);
+ break;
+ case IIAVF_PORT_CTR_TX_PACKETS:
+ vnet_dev_counter_value_update (
+ vm, c, stats.tx_unicast + stats.tx_broadcast + stats.tx_multicast);
+ break;
+ case IIAVF_PORT_CTR_RX_DROPS:
+ vnet_dev_counter_value_update (vm, c, stats.rx_discards);
+ break;
+ case IIAVF_PORT_CTR_TX_DROPS:
+ vnet_dev_counter_value_update (vm, c, stats.tx_discards);
+ break;
+ case IIAVF_PORT_CTR_RX_UCAST:
+ vnet_dev_counter_value_update (vm, c, stats.rx_unicast);
+ break;
+ case IIAVF_PORT_CTR_TX_UCAST:
+ vnet_dev_counter_value_update (vm, c, stats.tx_unicast);
+ break;
+ case IIAVF_PORT_CTR_RX_MCAST:
+ vnet_dev_counter_value_update (vm, c, stats.rx_multicast);
+ break;
+ case IIAVF_PORT_CTR_TX_MCAST:
+ vnet_dev_counter_value_update (vm, c, stats.tx_multicast);
+ break;
+ case IIAVF_PORT_CTR_RX_BCAST:
+ vnet_dev_counter_value_update (vm, c, stats.rx_broadcast);
+ break;
+ case IIAVF_PORT_CTR_TX_BCAST:
+ vnet_dev_counter_value_update (vm, c, stats.tx_broadcast);
+ break;
+ case IIAVF_PORT_CTR_RX_UNKNOWN_PROTOCOL:
+ vnet_dev_counter_value_update (vm, c, stats.rx_unknown_protocol);
+ break;
+ case IIAVF_PORT_CTR_TX_ERRORS:
+ vnet_dev_counter_value_update (vm, c, stats.tx_errors);
+ break;
+ default:
+ ASSERT (0);
+ }
+ }
+}
diff --git a/src/plugins/dev_iavf/format.c b/src/plugins/dev_iavf/format.c
new file mode 100644
index 00000000000..9a3dde47ee9
--- /dev/null
+++ b/src/plugins/dev_iavf/format.c
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/virtchnl.h>
+
+u8 *
+format_iavf_vf_cap_flags (u8 *s, va_list *args)
+{
+ u32 flags = va_arg (*args, u32);
+ int not_first = 0;
+
+ char *strs[32] = {
+#define _(a, b, c) [a] = c,
+ foreach_iavf_vf_cap_flag
+#undef _
+ };
+
+ for (int i = 0; i < 32; i++)
+ {
+ if ((flags & (1 << i)) == 0)
+ continue;
+ if (not_first)
+ s = format (s, " ");
+ if (strs[i])
+ s = format (s, "%s", strs[i]);
+ else
+ s = format (s, "unknown(%u)", i);
+ not_first = 1;
+ }
+ return s;
+}
+
+u8 *
+format_iavf_rx_desc_qw1 (u8 *s, va_list *args)
+{
+ iavf_rx_desc_qw1_t *qw1 = va_arg (*args, iavf_rx_desc_qw1_t *);
+ s = format (s, "len %u ptype %u ubmcast %u fltstat %u flags", qw1->length,
+ qw1->ptype, qw1->ubmcast, qw1->fltstat);
+
+#define _(f) \
+ if (qw1->f) \
+ s = format (s, " " #f)
+
+ _ (dd);
+ _ (eop);
+ _ (l2tag1p);
+ _ (l3l4p);
+ _ (crcp);
+ _ (flm);
+ _ (lpbk);
+ _ (ipv6exadd);
+ _ (int_udp_0);
+ _ (ipe);
+ _ (l4e);
+ _ (oversize);
+#undef _
+ return s;
+}
+
+u8 *
+format_iavf_rx_trace (u8 *s, va_list *args)
+{
+ vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+ vlib_node_t *node = va_arg (*args, vlib_node_t *);
+ iavf_rx_trace_t *t = va_arg (*args, iavf_rx_trace_t *);
+ iavf_rx_desc_qw1_t *qw1;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index);
+ u32 indent = format_get_indent (s);
+ int i = 0;
+
+ s = format (s, "avf: %v (%d) qid %u next-node %U flow-id %u", hi->name,
+ t->hw_if_index, t->qid, format_vlib_next_node_name, vm,
+ node->index, t->next_index, t->flow_id);
+
+ qw1 = (iavf_rx_desc_qw1_t *) t->qw1s;
+
+ do
+ s = format (s, "\n%Udesc %u: %U", format_white_space, indent + 2, i,
+ format_iavf_rx_desc_qw1, qw1 + i);
+ while ((qw1[i++].eop) == 0 && i < IAVF_RX_MAX_DESC_IN_CHAIN);
+
+ return s;
+}
+
+u8 *
+format_iavf_port_status (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t __clib_unused *a =
+ va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_port_t *port = va_arg (*args, vnet_dev_port_t *);
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "caps: %U", format_iavf_vf_cap_flags, ap->vf_cap_flags);
+ s = format (s, "\n%Uvsi is %u, RSS key size is %u, RSS lut size is %u",
+ format_white_space, indent, ap->vsi_id, ap->rss_key_size,
+ ap->rss_lut_size);
+ s = format (s, "\n%Uflow offload ", format_white_space, indent);
+ if (ap->flow_offload)
+ s = format (s, "enabled, %u flows configured",
+ vec_len (ap->flow_lookup_entries));
+ else
+ s = format (s, "disabled");
+ return s;
+}
diff --git a/src/plugins/dev_iavf/iavf.c b/src/plugins/dev_iavf/iavf.c
new file mode 100644
index 00000000000..d1c2b9edc63
--- /dev/null
+++ b/src/plugins/dev_iavf/iavf.c
@@ -0,0 +1,307 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <vppinfra/ring.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/virtchnl.h>
+#include <dev_iavf/virtchnl_funcs.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_REGISTER_LOG_CLASS (iavf_log, static) = {
+ .class_name = "iavf",
+ .subclass_name = "init",
+};
+
+#define IAVF_MAX_QPAIRS 32
+
+static const u32 driver_cap_flags =
+ /**/ VIRTCHNL_VF_CAP_ADV_LINK_SPEED |
+ /**/ VIRTCHNL_VF_LARGE_NUM_QPAIRS |
+ /**/ VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF |
+ /**/ VIRTCHNL_VF_OFFLOAD_FDIR_PF |
+ /**/ VIRTCHNL_VF_OFFLOAD_L2 |
+ /**/ VIRTCHNL_VF_OFFLOAD_REQ_QUEUES |
+ /**/ VIRTCHNL_VF_OFFLOAD_RSS_PF |
+ /**/ VIRTCHNL_VF_OFFLOAD_RX_POLLING |
+ /**/ VIRTCHNL_VF_OFFLOAD_VLAN |
+ /**/ VIRTCHNL_VF_OFFLOAD_VLAN_V2 |
+ /**/ VIRTCHNL_VF_OFFLOAD_WB_ON_ITR |
+ /**/ 0;
+
+static const virtchnl_version_info_t driver_virtchnl_version = {
+ .major = VIRTCHNL_VERSION_MAJOR,
+ .minor = VIRTCHNL_VERSION_MINOR,
+};
+
+#define _(f, n, s, d) \
+ { .name = #n, .desc = d, .severity = VL_COUNTER_SEVERITY_##s },
+
+vlib_error_desc_t iavf_rx_node_counters[] = { foreach_iavf_rx_node_counter };
+vlib_error_desc_t iavf_tx_node_counters[] = { foreach_iavf_tx_node_counter };
+#undef _
+
+vnet_dev_node_t iavf_rx_node = {
+ .error_counters = iavf_rx_node_counters,
+ .n_error_counters = ARRAY_LEN (iavf_rx_node_counters),
+ .format_trace = format_iavf_rx_trace,
+};
+
+vnet_dev_node_t iavf_tx_node = {
+ .error_counters = iavf_tx_node_counters,
+ .n_error_counters = ARRAY_LEN (iavf_tx_node_counters),
+};
+
+static struct
+{
+ u16 device_id;
+ char *desc;
+} iavf_dev_types[] = {
+ { 0x1889, "Intel(R) Adaptive Virtual Function" },
+ { 0x154c, "Intel(R) X710 Virtual Function" },
+ { 0x37cd, "Intel(R) X722 Virtual Function" },
+};
+
+static u8 *
+iavf_probe (vlib_main_t *vm, vnet_dev_bus_index_t bus_index, void *dev_info)
+{
+ vnet_dev_bus_pci_device_info_t *di = dev_info;
+
+ if (di->vendor_id != 0x8086)
+ return 0;
+
+ FOREACH_ARRAY_ELT (dt, iavf_dev_types)
+ {
+ if (dt->device_id == di->device_id)
+ return format (0, "%s", dt->desc);
+ }
+
+ return 0;
+}
+
+static vnet_dev_rv_t
+iavf_reset (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ u32 n_tries = 50;
+
+ iavf_aq_init (vm, dev);
+ iavf_vc_op_reset_vf (vm, dev);
+
+ do
+ {
+ if (n_tries-- == 0)
+ return VNET_DEV_ERR_TIMEOUT;
+ vlib_process_suspend (vm, 0.02);
+ }
+ while ((iavf_reg_read (ad, IAVF_VFGEN_RSTAT) & 3) != 2);
+
+ iavf_aq_init (vm, dev);
+ iavf_aq_poll_on (vm, dev);
+ return (VNET_DEV_OK);
+}
+
+static vnet_dev_rv_t
+iavf_alloc (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ log_debug (dev, "alloc");
+ return iavf_aq_alloc (vm, dev);
+}
+
+static vnet_dev_rv_t
+iavf_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ virtchnl_version_info_t ver;
+ virtchnl_vf_resource_t res;
+ u32 n_threads = vlib_get_n_threads ();
+ u16 max_frame_sz;
+ vnet_dev_rv_t rv;
+
+ log_debug (dev, "init");
+
+ if ((rv = vnet_dev_pci_map_region (vm, dev, 0, &ad->bar0)))
+ return rv;
+
+ if ((rv = vnet_dev_pci_bus_master_enable (vm, dev)))
+ return rv;
+
+ if ((rv = iavf_reset (vm, dev)))
+ return rv;
+
+ if ((rv = iavf_vc_op_version (vm, dev, &driver_virtchnl_version, &ver)))
+ return rv;
+
+ if (ver.major != driver_virtchnl_version.major ||
+ ver.minor != driver_virtchnl_version.minor)
+ return VNET_DEV_ERR_UNSUPPORTED_DEVICE_VER;
+
+ if ((rv = iavf_vc_op_get_vf_resources (vm, dev, &driver_cap_flags, &res)))
+ return rv;
+
+ if (res.num_vsis != 1 || res.vsi_res[0].vsi_type != VIRTCHNL_VSI_SRIOV)
+ return VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+
+ if (res.max_mtu == 0)
+ {
+ log_warn (dev, "PF driver is reporting invalid value of 0 for max_mtu, "
+ "consider upgrade");
+ max_frame_sz = ETHERNET_MAX_PACKET_BYTES;
+ }
+ else
+ /* reverse of PF driver MTU calculation */
+ max_frame_sz = res.max_mtu + 14 /* ethernet header */ + 4 /* FCS */ +
+ 2 * 4 /* two VLAN tags */;
+
+ iavf_port_t iavf_port = {
+ .vf_cap_flags = res.vf_cap_flags,
+ .rss_key_size = res.rss_key_size,
+ .rss_lut_size = res.rss_lut_size,
+ .max_vectors = res.max_vectors,
+ .vsi_id = res.vsi_res[0].vsi_id,
+ .num_qp = clib_min (IAVF_MAX_QPAIRS, res.vsi_res[0].num_queue_pairs),
+ };
+
+ vnet_dev_port_add_args_t port_add_args = {
+ .port = {
+ .attr = {
+ .type = VNET_DEV_PORT_TYPE_ETHERNET,
+ .max_rx_queues = clib_min (IAVF_MAX_QPAIRS, res.num_queue_pairs),
+ .max_tx_queues = clib_min (IAVF_MAX_QPAIRS, res.num_queue_pairs),
+ .max_supported_rx_frame_size = max_frame_sz,
+ .caps = {
+ .change_max_rx_frame_size = 1,
+ .interrupt_mode = 1,
+ .rss = 1,
+ .mac_filter = 1,
+ },
+ .rx_offloads = {
+ .ip4_cksum = 1,
+ },
+ .tx_offloads = {
+ .ip4_cksum = 1,
+ .tcp_gso = 1,
+ },
+ },
+ .ops = {
+ .init = iavf_port_init,
+ .start = iavf_port_start,
+ .stop = iavf_port_stop,
+ .config_change = iavf_port_cfg_change,
+ .config_change_validate = iavf_port_cfg_change_validate,
+ .format_status = format_iavf_port_status,
+ },
+ .data_size = sizeof (iavf_port_t),
+ .initial_data = &iavf_port,
+ },
+ .rx_node = &iavf_rx_node,
+ .tx_node = &iavf_tx_node,
+ .rx_queue = {
+ .config = {
+ .data_size = sizeof (iavf_rxq_t),
+ .default_size = 512,
+ .multiplier = 32,
+ .min_size = 32,
+ .max_size = 4096,
+ .size_is_power_of_two = 1,
+ },
+ .ops = {
+ .alloc = iavf_rx_queue_alloc,
+ .free = iavf_rx_queue_free,
+ },
+ },
+ .tx_queue = {
+ .config = {
+ .data_size = sizeof (iavf_txq_t),
+ .default_size = 512,
+ .multiplier = 32,
+ .min_size = 32,
+ .max_size = 4096,
+ .size_is_power_of_two = 1,
+ },
+ .ops = {
+ .alloc = iavf_tx_queue_alloc,
+ .free = iavf_tx_queue_free,
+ },
+ },
+ };
+
+ vnet_dev_set_hw_addr_eth_mac (&port_add_args.port.attr.hw_addr,
+ res.vsi_res[0].default_mac_addr);
+
+ log_info (dev, "MAC address is %U", format_ethernet_address,
+ res.vsi_res[0].default_mac_addr);
+
+ if (n_threads <= vnet_dev_get_pci_n_msix_interrupts (dev) - 1)
+ {
+ port_add_args.port.attr.caps.interrupt_mode = 1;
+ iavf_port.n_rx_vectors = n_threads;
+ }
+ else
+ {
+ log_notice (
+ dev,
+ "number of threads (%u) bigger than number of interrupt lines "
+ "(%u), interrupt mode disabled",
+ vlib_get_n_threads (), res.max_vectors);
+ iavf_port.n_rx_vectors = 1;
+ }
+
+ if (res.vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF)
+ {
+ if (res.rss_key_size < IAVF_MAX_RSS_KEY_SIZE)
+ {
+ log_notice (
+ dev, "unsupported RSS config provided by device, RSS disabled");
+ }
+ else
+ {
+ port_add_args.port.attr.caps.rss = 1;
+ if (res.rss_lut_size > IAVF_MAX_RSS_LUT_SIZE)
+ log_notice (dev, "device supports bigger RSS LUT than driver");
+ }
+ }
+
+ return vnet_dev_port_add (vm, dev, 0, &port_add_args);
+}
+
+static void
+iavf_deinit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ log_debug (dev, "deinit");
+ iavf_aq_poll_off (vm, dev);
+ iavf_aq_deinit (vm, dev);
+ iavf_aq_free (vm, dev);
+}
+
+static void
+iavf_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ log_debug (dev, "free");
+ iavf_aq_free (vm, dev);
+}
+
+VNET_DEV_REGISTER_DRIVER (avf) = {
+ .name = "iavf",
+ .bus = "pci",
+ .device_data_sz = sizeof (iavf_device_t),
+ .runtime_temp_space_sz = sizeof (iavf_rt_data_t),
+ .ops = {
+ .alloc = iavf_alloc,
+ .init = iavf_init,
+ .deinit = iavf_deinit,
+ .free = iavf_free,
+ .probe = iavf_probe,
+ },
+};
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "dev_iavf",
+};
diff --git a/src/plugins/dev_iavf/iavf.h b/src/plugins/dev_iavf/iavf.h
new file mode 100644
index 00000000000..39f92741a63
--- /dev/null
+++ b/src/plugins/dev_iavf/iavf.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _IIAVF_H_
+#define _IIAVF_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <dev_iavf/iavf_desc.h>
+#include <dev_iavf/virtchnl.h>
+
+#define IAVF_ITR_INT 250
+#define IAVF_RX_MAX_DESC_IN_CHAIN 5
+#define IAVF_MAX_RSS_KEY_SIZE 52
+#define IAVF_MAX_RSS_LUT_SIZE 64
+#define IIAVF_AQ_POLL_INTERVAL 0.2
+#define IIAVF_AQ_BUF_SIZE 4096
+
+typedef struct iavf_adminq_dma_mem iavf_adminq_dma_mem_t;
+
+typedef struct
+{
+ u8 adminq_active : 1;
+ void *bar0;
+
+ /* Admin queues */
+ iavf_adminq_dma_mem_t *aq_mem;
+ u16 atq_next_slot;
+ u16 arq_next_slot;
+ virtchnl_pf_event_t *events;
+} iavf_device_t;
+
+typedef struct
+{
+ u32 flow_id;
+ u16 next_index;
+ i16 buffer_advance;
+} iavf_flow_lookup_entry_t;
+
+typedef struct
+{
+ u8 admin_up : 1;
+ u8 flow_offload : 1;
+ iavf_flow_lookup_entry_t *flow_lookup_entries;
+ u64 intr_mode_per_rxq_bitmap;
+ u32 vf_cap_flags;
+ u16 vsi_id;
+ u16 rss_key_size;
+ u16 rss_lut_size;
+ u16 num_qp;
+ u16 max_vectors;
+ u16 n_rx_vectors;
+} iavf_port_t;
+
+typedef struct
+{
+ u32 *qtx_tail;
+ u32 *buffer_indices;
+ iavf_tx_desc_t *descs;
+ u16 next;
+ u16 n_enqueued;
+ u16 *rs_slots;
+ iavf_tx_desc_t *tmp_descs;
+ u32 *tmp_bufs;
+ u32 *ph_bufs;
+} iavf_txq_t;
+
+typedef struct
+{
+ u32 *qrx_tail;
+ u32 *buffer_indices;
+ iavf_rx_desc_t *descs;
+ u16 next;
+ u16 n_enqueued;
+} iavf_rxq_t;
+
+typedef struct
+{
+ u16 qid;
+ u16 next_index;
+ u32 hw_if_index;
+ u32 flow_id;
+ u64 qw1s[IAVF_RX_MAX_DESC_IN_CHAIN];
+} iavf_rx_trace_t;
+
+/* adminq.c */
+vnet_dev_rv_t iavf_aq_alloc (vlib_main_t *, vnet_dev_t *);
+void iavf_aq_init (vlib_main_t *, vnet_dev_t *);
+void iavf_aq_poll_on (vlib_main_t *, vnet_dev_t *);
+void iavf_aq_poll_off (vlib_main_t *, vnet_dev_t *);
+void iavf_aq_deinit (vlib_main_t *, vnet_dev_t *);
+void iavf_aq_free (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t iavf_aq_atq_enq (vlib_main_t *, vnet_dev_t *, iavf_aq_desc_t *,
+ const u8 *, u16, f64);
+int iavf_aq_arq_next_acq (vlib_main_t *, vnet_dev_t *, iavf_aq_desc_t **,
+ u8 **, f64);
+void iavf_aq_arq_next_rel (vlib_main_t *, vnet_dev_t *);
+format_function_t format_virtchnl_op_name;
+format_function_t format_virtchnl_status;
+
+/* format.c */
+format_function_t format_iavf_vf_cap_flags;
+format_function_t format_iavf_rx_trace;
+format_function_t format_iavf_port_status;
+
+/* port.c */
+vnet_dev_rv_t iavf_port_init (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t iavf_port_start (vlib_main_t *, vnet_dev_port_t *);
+void iavf_port_stop (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t iavf_port_cfg_change (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t iavf_port_cfg_change_validate (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+
+/* queue.c */
+vnet_dev_rv_t iavf_rx_queue_alloc (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t iavf_tx_queue_alloc (vlib_main_t *, vnet_dev_tx_queue_t *);
+vnet_dev_rv_t iavf_rx_queue_start (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t iavf_tx_queue_start (vlib_main_t *, vnet_dev_tx_queue_t *);
+void iavf_rx_queue_stop (vlib_main_t *, vnet_dev_rx_queue_t *);
+void iavf_tx_queue_stop (vlib_main_t *, vnet_dev_tx_queue_t *);
+void iavf_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *);
+void iavf_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *);
+
+/* counter.c */
+void iavf_port_poll_stats (vlib_main_t *, vnet_dev_port_t *);
+void iavf_port_add_counters (vlib_main_t *, vnet_dev_port_t *);
+
+/* inline funcs */
+
+static inline u32
+iavf_get_u32 (void *start, int offset)
+{
+ return *(u32 *) (((u8 *) start) + offset);
+}
+
+static inline void
+iavf_reg_write (iavf_device_t *ad, u32 addr, u32 val)
+{
+ __atomic_store_n ((u32 *) ((u8 *) ad->bar0 + addr), val, __ATOMIC_RELEASE);
+}
+
+static inline u32
+iavf_reg_read (iavf_device_t *ad, u32 addr)
+{
+ return __atomic_load_n ((u32 *) (ad->bar0 + addr), __ATOMIC_RELAXED);
+ ;
+}
+
+static inline void
+iavf_reg_flush (iavf_device_t *ad)
+{
+ iavf_reg_read (ad, IAVF_VFGEN_RSTAT);
+ asm volatile("" ::: "memory");
+}
+
+#define log_debug(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, iavf_log.class, "%U" f, \
+ format_vnet_dev_log, (dev), \
+ clib_string_skip_prefix (__func__, "iavf_"), ##__VA_ARGS__)
+#define log_info(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_INFO, iavf_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_notice(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_NOTICE, iavf_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_warn(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_WARNING, iavf_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_err(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_ERR, iavf_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+
+/* temp */
+#define IAVF_RX_VECTOR_SZ VLIB_FRAME_SIZE
+
+typedef struct
+{
+ u64 qw1s[IAVF_RX_MAX_DESC_IN_CHAIN - 1];
+ u32 buffers[IAVF_RX_MAX_DESC_IN_CHAIN - 1];
+} iavf_rx_tail_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vlib_buffer_t *bufs[IAVF_RX_VECTOR_SZ];
+ u16 next[IAVF_RX_VECTOR_SZ];
+ u64 qw1s[IAVF_RX_VECTOR_SZ];
+ u32 flow_ids[IAVF_RX_VECTOR_SZ];
+ iavf_rx_tail_t tails[IAVF_RX_VECTOR_SZ];
+} iavf_rt_data_t;
+
+#define foreach_iavf_tx_node_counter \
+ _ (SEG_SZ_EXCEEDED, seg_sz_exceeded, ERROR, "segment size exceeded") \
+ _ (NO_FREE_SLOTS, no_free_slots, ERROR, "no free tx slots")
+
+typedef enum
+{
+#define _(f, n, s, d) IAVF_TX_NODE_CTR_##f,
+ foreach_iavf_tx_node_counter
+#undef _
+} iavf_tx_node_counter_t;
+
+#define foreach_iavf_rx_node_counter \
+ _ (BUFFER_ALLOC, buffer_alloc, ERROR, "buffer alloc error")
+
+typedef enum
+{
+#define _(f, n, s, d) IAVF_RX_NODE_CTR_##f,
+ foreach_iavf_rx_node_counter
+#undef _
+} iavf_rx_node_counter_t;
+
+#endif /* _IIAVF_H_ */
diff --git a/src/plugins/dev_iavf/iavf_desc.h b/src/plugins/dev_iavf/iavf_desc.h
new file mode 100644
index 00000000000..053013ed9b0
--- /dev/null
+++ b/src/plugins/dev_iavf/iavf_desc.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _IIAVF_DESC_H_
+#define _IIAVF_DESC_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <dev_iavf/virtchnl.h>
+
+#define IAVF_RX_MAX_DESC_IN_CHAIN 5
+
+#define IAVF_TXD_CMD(x) (1 << (x + 4))
+#define IAVF_TXD_CMD_EXT(x, val) ((u64) val << (x + 4))
+#define IAVF_TXD_CMD_EOP IAVF_TXD_CMD (0)
+#define IAVF_TXD_CMD_RS IAVF_TXD_CMD (1)
+#define IAVF_TXD_CMD_RSV IAVF_TXD_CMD (2)
+#define IAVF_TXD_CMD_IIPT_NONE IAVF_TXD_CMD_EXT (5, 0)
+#define IAVF_TXD_CMD_IIPT_IPV6 IAVF_TXD_CMD_EXT (5, 1)
+#define IAVF_TXD_CMD_IIPT_IPV4_NO_CSUM IAVF_TXD_CMD_EXT (5, 2)
+#define IAVF_TXD_CMD_IIPT_IPV4 IAVF_TXD_CMD_EXT (5, 3)
+#define IAVF_TXD_CMD_L4T_UNKNOWN IAVF_TXD_CMD_EXT (8, 0)
+#define IAVF_TXD_CMD_L4T_TCP IAVF_TXD_CMD_EXT (8, 1)
+#define IAVF_TXD_CMD_L4T_SCTP IAVF_TXD_CMD_EXT (8, 2)
+#define IAVF_TXD_CMD_L4T_UDP IAVF_TXD_CMD_EXT (8, 3)
+#define IAVF_TXD_OFFSET(x, factor, val) \
+ (((u64) val / (u64) factor) << (16 + x))
+#define IAVF_TXD_OFFSET_MACLEN(val) IAVF_TXD_OFFSET (0, 2, val)
+#define IAVF_TXD_OFFSET_IPLEN(val) IAVF_TXD_OFFSET (7, 4, val)
+#define IAVF_TXD_OFFSET_L4LEN(val) IAVF_TXD_OFFSET (14, 4, val)
+#define IAVF_TXD_DTYP_CTX 0x1ULL
+#define IAVF_TXD_CTX_CMD_TSO IAVF_TXD_CMD (0)
+#define IAVF_TXD_CTX_SEG(val, x) (((u64) val) << (30 + x))
+#define IAVF_TXD_CTX_SEG_TLEN(val) IAVF_TXD_CTX_SEG (val, 0)
+#define IAVF_TXD_CTX_SEG_MSS(val) IAVF_TXD_CTX_SEG (val, 20)
+
+typedef union
+{
+ struct
+ {
+ u32 mirr : 13;
+ u32 _reserved1 : 3;
+ u32 l2tag1 : 16;
+ u32 filter_status;
+ };
+ u64 as_u64;
+} iavf_rx_desc_qw0_t;
+
+typedef union
+{
+ struct
+ {
+ /* status */
+ u64 dd : 1;
+ u64 eop : 1;
+ u64 l2tag1p : 1;
+ u64 l3l4p : 1;
+ u64 crcp : 1;
+ u64 _reserved2 : 4;
+ u64 ubmcast : 2;
+ u64 flm : 1;
+ u64 fltstat : 2;
+ u64 lpbk : 1;
+ u64 ipv6exadd : 1;
+ u64 _reserved3 : 2;
+ u64 int_udp_0 : 1;
+
+ /* error */
+ u64 _reserved_err0 : 3;
+ u64 ipe : 1;
+ u64 l4e : 1;
+ u64 _reserved_err5 : 1;
+ u64 oversize : 1;
+ u64 _reserved_err7 : 1;
+
+ u64 rsv2 : 3;
+ u64 ptype : 8;
+ u64 length : 26;
+ };
+ u64 as_u64;
+} iavf_rx_desc_qw1_t;
+
+STATIC_ASSERT_SIZEOF (iavf_rx_desc_qw0_t, 8);
+STATIC_ASSERT_SIZEOF (iavf_rx_desc_qw1_t, 8);
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ iavf_rx_desc_qw0_t qw0;
+ iavf_rx_desc_qw0_t qw1;
+ u64 rsv3 : 64;
+ u32 flex_lo;
+ u32 fdid_flex_hi;
+ };
+ u64 qword[4];
+ u64 addr;
+#ifdef CLIB_HAVE_VEC256
+ u64x4 as_u64x4;
+#endif
+ };
+} iavf_rx_desc_t;
+
+STATIC_ASSERT_SIZEOF (iavf_rx_desc_t, 32);
+
+typedef struct
+{
+ union
+ {
+ u64 qword[2];
+#ifdef CLIB_HAVE_VEC128
+ u64x2 as_u64x2;
+#endif
+ };
+} iavf_tx_desc_t;
+
+STATIC_ASSERT_SIZEOF (iavf_tx_desc_t, 16);
+
+#endif /* _IIAVF_DESC_H_ */
diff --git a/src/plugins/dev_iavf/iavf_regs.h b/src/plugins/dev_iavf/iavf_regs.h
new file mode 100644
index 00000000000..be3070b05e5
--- /dev/null
+++ b/src/plugins/dev_iavf/iavf_regs.h
@@ -0,0 +1,364 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _IIAVF_REGS_H_
+#define _IIAVF_REGS_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+
+#define iavf_reg_ctrl_t_fields \
+ __ (1, full_duplex) \
+ __ (1, _reserved1) \
+ __ (1, gio_master_disable) \
+ __ (3, _reserved3) \
+ __ (1, set_link_up) \
+ __ (9, _reserved7) \
+ __ (1, sdp0_gpien) \
+ __ (1, sdp1_gpien) \
+ __ (1, sdp0_data) \
+ __ (1, sdp1_data) \
+ __ (1, adww3wuc) \
+ __ (1, sdp0_wde) \
+ __ (1, sdp0_iodir) \
+ __ (1, sdp1_iodir) \
+ __ (2, _reserved24) \
+ __ (1, port_sw_reset) \
+ __ (1, rx_flow_ctl_en) \
+ __ (1, tx_flow_ctl_en) \
+ __ (1, device_reset) \
+ __ (1, vlan_mode_enable) \
+ __ (1, phy_reset)
+
+#define iavf_reg_status_t_fields \
+ __ (1, full_duplex) \
+ __ (1, link_up) \
+ __ (2, _reserved2) \
+ __ (1, tx_off) \
+ __ (1, _reserved5) \
+ __ (2, speed) \
+ __ (2, asdv) \
+ __ (1, phy_reset_asserted) \
+ __ (8, _reserved11) \
+ __ (1, gio_master_en_sts) \
+ __ (1, dev_rst_set) \
+ __ (1, rst_done) \
+ __ (1, speed_2p5) \
+ __ (7, _reserved23) \
+ __ (1, lpi_ignore) \
+ __ (1, _reserved31)
+
+#define iavf_reg_ctrl_ext_t_fields \
+ __ (2, _reserved0) \
+ __ (1, sdp2_gpien) \
+ __ (1, sdp3_gpien) \
+ __ (2, _reserved4) \
+ __ (1, sdp2_data) \
+ __ (1, sdp3_data) \
+ __ (2, _reserved8) \
+ __ (1, sdp2_iodir) \
+ __ (1, sdp3_iodir) \
+ __ (1, _reserved12) \
+ __ (1, eeprom_block_rst) \
+ __ (2, _reserved14) \
+ __ (1, no_snoop_dis) \
+ __ (1, relaxed_ordering_dis) \
+ __ (2, _reserved18) \
+ __ (1, phy_power_down_ena) \
+ __ (5, _reserved121) \
+ __ (1, ext_vlan_ena) \
+ __ (1, _reserved127) \
+ __ (1, driver_loaded) \
+ __ (3, _reserved29)
+
+#define iavf_reg_mdic_t_fields \
+ __ (16, data) \
+ __ (5, regadd) \
+ __ (5, _reserved21) \
+ __ (2, opcode) \
+ __ (1, ready) \
+ __ (1, mid_ie) \
+ __ (1, mid_err) \
+ __ (1, _reserved31)
+
+#define iavf_reg_rctl_t_fields \
+ __ (1, _reserved0) \
+ __ (1, rx_enable) \
+ __ (1, store_bad_packets) \
+ __ (1, uc_promisc_ena) \
+ __ (1, mc_promisc_ena) \
+ __ (1, long_pkt_reception_ena) \
+ __ (2, loopback_mode) \
+ __ (2, hash_select) \
+ __ (2, _reserved10) \
+ __ (2, mc_uc_tbl_off) \
+ __ (1, _reserved14) \
+ __ (1, bcast_accept_mode) \
+ __ (2, rx_buf_sz) \
+ __ (1, vlan_filter_ena) \
+ __ (1, cannonical_form_ind_ena) \
+ __ (1, cannonical_form_ind_bit_val) \
+ __ (1, pad_small_rx_pkts) \
+ __ (1, discard_pause_frames) \
+ __ (1, pass_mac_ctrl_frames) \
+ __ (2, _reserved24) \
+ __ (1, strip_eth_crc) \
+ __ (5, _reserved26)
+
+#define iavf_reg_tctl_t_fields \
+ __ (1, _reserved0) \
+ __ (1, tx_enable) \
+ __ (1, _reserved2) \
+ __ (1, pad_short_pkts) \
+ __ (8, collision_threshold) \
+ __ (10, backoff_slot_time) \
+ __ (1, sw_xoff_tx) \
+ __ (1, _reserved23) \
+ __ (1, retransmit_on_late_colision) \
+ __ (7, reserved25)
+
+#define iavf_reg_phpm_t_fields \
+ __ (1, _reserved0) \
+ __ (1, restart_autoneg) \
+ __ (1, _reserved2) \
+ __ (1, dis_1000_in_non_d0a) \
+ __ (1, link_energy_detect) \
+ __ (1, go_link_disc) \
+ __ (1, disable_1000) \
+ __ (1, spd_b2b_en) \
+ __ (1, rst_compl) \
+ __ (1, dis_100_in_non_d0a) \
+ __ (1, ulp_req) \
+ __ (1, disable_2500) \
+ __ (1, dis_2500_in_non_d0a) \
+ __ (1, ulp_trig) \
+ __ (2, ulp_delay) \
+ __ (1, link_enery_en) \
+ __ (1, dev_off_en) \
+ __ (1, dev_off_state) \
+ __ (1, ulp_en) \
+ __ (12, _reserved20)
+
+#define iavf_reg_manc_t_fields \
+ __ (1, flow_ctrl_discard) \
+ __ (1, ncsi_discard) \
+ __ (12, _reserved2) \
+ __ (1, fw_reset) \
+ __ (1, tco_isolate) \
+ __ (1, tco_reset) \
+ __ (1, rcv_tco_en) \
+ __ (1, keep_phy_link_up) \
+ __ (1, rcv_all) \
+ __ (1, inhibit_ulp) \
+ __ (2, _reserved21) \
+ __ (1, en_xsum_filter) \
+ __ (1, en_ipv4_filter) \
+ __ (1, fixed_net_type) \
+ __ (1, net_type) \
+ __ (1, ipv6_adv_only) \
+ __ (1, en_bmc2os) \
+ __ (1, en_bmc2net) \
+ __ (1, mproxye) \
+ __ (1, mproxya)
+
+#define iavf_reg_swsm_t_fields \
+ __ (1, smbi) \
+ __ (1, swesmbi) \
+ __ (30, _reserved2)
+
+#define iavf_reg_fwsm_t_fields \
+ __ (1, eep_fw_semaphore) \
+ __ (3, fw_mode) \
+ __ (2, _reserved4) \
+ __ (1, eep_reload_ind) \
+ __ (8, _reserved7) \
+ __ (1, fw_val_bit) \
+ __ (3, reset_ctr) \
+ __ (6, ext_err_ind) \
+ __ (1, pcie_config_err_ind) \
+ __ (5, _reserved26) \
+ __ (1, factory_mac_addr_restored)
+
+#define iavf_reg_sw_fw_sync_t_fields \
+ __ (1, sw_flash_sm) \
+ __ (1, sw_phy_sm) \
+ __ (1, sw_i2c_sm) \
+ __ (1, sw_mac_csr_sm) \
+ __ (3, _reserved4) \
+ __ (1, sw_svr_sm) \
+ __ (1, sw_mb_sm) \
+ __ (1, _reserved9) \
+ __ (1, sw_mng_sm) \
+ __ (5, _reserved11) \
+ __ (1, fw_flash_sm) \
+ __ (1, fw_phy_sm) \
+ __ (1, fw_i2c_sm) \
+ __ (1, fw_mac_csr_sm) \
+ __ (3, _reserved20) \
+ __ (1, fw_svr_sm) \
+ __ (8, _reserved24)
+
+#define iavf_reg_srrctl_t_fields \
+ __ (7, bsizepacket) \
+ __ (1, _reserved7) \
+ __ (6, bsizeheader) \
+ __ (2, timer1_sel) \
+ __ (1, _reserved16) \
+ __ (2, timer0_sel) \
+ __ (1, use_domain) \
+ __ (5, rdmts) \
+ __ (3, desc_type) \
+ __ (2, _reserved28) \
+ __ (1, timestamp) \
+ __ (1, drop_en)
+
+#define iavf_reg_rxdctl_t_fields \
+ __ (5, pthresh) \
+ __ (3, _reserved5) \
+ __ (5, hthresh) \
+ __ (3, _reserved13) \
+ __ (5, wthresh) \
+ __ (4, _reserved21) \
+ __ (1, enable) \
+ __ (1, swflush) \
+ __ (5, _reserved27)
+
+#define iavf_reg_eec_t_fields \
+ __ (6, _reserved0) \
+ __ (1, flash_in_use) \
+ __ (1, _reserved7) \
+ __ (1, ee_pres) \
+ __ (1, auto_rd) \
+ __ (1, _reservedxi10) \
+ __ (4, ee_size) \
+ __ (4, pci_ana_done) \
+ __ (1, flash_detected) \
+ __ (2, _reserved20) \
+ __ (1, shadow_modified) \
+ __ (1, flupd) \
+ __ (1, _reserved24) \
+ __ (1, sec1val) \
+ __ (1, fludone) \
+ __ (5, _reserved27)
+
+#define iavf_reg_eemngctl_t_fields \
+ __ (11, addr) \
+ __ (4, reserved11) \
+ __ (1, cmd_valid) \
+ __ (1, write) \
+ __ (1, eebusy) \
+ __ (1, cfg_done) \
+ __ (12, _reserved19) \
+ __ (1, done)
+
+#define IAVF_REG_STRUCT(n) \
+ typedef union \
+ { \
+ struct \
+ { \
+ n##_fields; \
+ }; \
+ u32 as_u32; \
+ } n; \
+ STATIC_ASSERT_SIZEOF (n, 4);
+
+#define __(n, f) u32 f : n;
+IAVF_REG_STRUCT (iavf_reg_status_t);
+IAVF_REG_STRUCT (iavf_reg_ctrl_t);
+IAVF_REG_STRUCT (iavf_reg_ctrl_ext_t);
+IAVF_REG_STRUCT (iavf_reg_mdic_t);
+IAVF_REG_STRUCT (iavf_reg_rctl_t);
+IAVF_REG_STRUCT (iavf_reg_tctl_t);
+IAVF_REG_STRUCT (iavf_reg_phpm_t);
+IAVF_REG_STRUCT (iavf_reg_manc_t);
+IAVF_REG_STRUCT (iavf_reg_swsm_t);
+IAVF_REG_STRUCT (iavf_reg_fwsm_t);
+IAVF_REG_STRUCT (iavf_reg_sw_fw_sync_t);
+IAVF_REG_STRUCT (iavf_reg_srrctl_t);
+IAVF_REG_STRUCT (iavf_reg_rxdctl_t);
+IAVF_REG_STRUCT (iavf_reg_eec_t);
+IAVF_REG_STRUCT (iavf_reg_eemngctl_t);
+#undef __
+
+#define foreach_iavf_reg \
+ _ (0x00000, CTRL, iavf_reg_ctrl_t_fields) \
+ _ (0x00008, STATUS, iavf_reg_status_t_fields) \
+ _ (0x00018, CTRL_EXT, iavf_reg_ctrl_ext_t_fields) \
+ _ (0x00020, MDIC, iavf_reg_mdic_t_fields) \
+ _ (0x00100, RCTL, iavf_reg_rctl_t_fields) \
+ _ (0x00400, TCTL, iavf_reg_tctl_t_fields) \
+ _ (0x00404, TCTL_EXT, ) \
+ _ (0x00e14, PHPM, iavf_reg_phpm_t_fields) \
+ _ (0x01500, ICR, ) \
+ _ (0x0150c, IMC, ) \
+ _ (0x05400, RAL0, ) \
+ _ (0x05404, RAH0, ) \
+ _ (0x05820, MANC, iavf_reg_manc_t_fields) \
+ _ (0x05b50, SWSM, iavf_reg_swsm_t_fields) \
+ _ (0x05b54, FWSM, iavf_reg_fwsm_t_fields) \
+ _ (0x05b5c, SW_FW_SYNC, iavf_reg_sw_fw_sync_t_fields) \
+ _ (0x0c000, RDBAL0, ) \
+ _ (0x0c004, RDBAH0, ) \
+ _ (0x0c008, RDLEN0, ) \
+ _ (0x0c00c, SRRCTL0, iavf_reg_srrctl_t_fields) \
+ _ (0x0c010, RDH0, ) \
+ _ (0x0c018, RDT0, ) \
+ _ (0x0c028, RXDCTL0, iavf_reg_rxdctl_t_fields) \
+ _ (0x12010, EEC, iavf_reg_eec_t_fields) \
+ _ (0x12030, EEMNGCTL, iavf_reg_eemngctl_t_fields)
+
+#define IAVF_REG_RDBAL(n) (IAVF_REG_RDBAL0 + (n) *0x40)
+#define IAVF_REG_RDBAH(n) (IAVF_REG_RDBAH0 + (n) *0x40)
+#define IAVF_REG_RDLEN(n) (IAVF_REG_RDLEN0 + (n) *0x40)
+#define IAVF_REG_SRRCTL(n) (IAVF_REG_SRRCTL0 + (n) *0x40)
+#define IAVF_REG_RDH(n) (IAVF_REG_RDH0 + (n) *0x40)
+#define IAVF_REG_RDT(n) (IAVF_REG_RDT0 + (n) *0x40)
+#define IAVF_REG_RXDCTL(n) (IAVF_REG_RXDCTL0 + (n) *0x40)
+#define IAVF_REG_SRRCTL(n) (IAVF_REG_SRRCTL0 + (n) *0x40)
+
+typedef enum
+{
+#define _(o, n, f) IAVF_REG_##n = (o),
+ foreach_iavf_reg
+#undef _
+} iavf_reg_t;
+
+typedef union
+{
+ struct
+ {
+ u32 intena : 1;
+ u32 clearpba : 1;
+ u32 swint_trig : 1;
+ u32 itr_indx : 2;
+ u32 interval : 12;
+ u32 _rsvd23 : 7;
+ u32 sw_itr_indx_ena : 1;
+ u32 sw_itr_indx : 2;
+ u32 _rsvd29 : 3;
+ u32 wb_on_itr : 1;
+ u32 intena_msk : 1;
+ };
+ u32 as_u32;
+} iavf_dyn_ctl;
+
+STATIC_ASSERT_SIZEOF (iavf_dyn_ctl, 4);
+
+typedef union
+{
+ struct
+ {
+ u32 _reserved0 : 30;
+ u32 adminq : 1;
+ u32 _reserved31 : 1;
+ };
+ u32 as_u32;
+} iavf_vfint_icr0_ena1;
+
+STATIC_ASSERT_SIZEOF (iavf_vfint_icr0_ena1, 4);
+
+#endif /* _IIAVF_REGS_H_ */
diff --git a/src/plugins/dev_iavf/port.c b/src/plugins/dev_iavf/port.c
new file mode 100644
index 00000000000..982436d9b45
--- /dev/null
+++ b/src/plugins/dev_iavf/port.c
@@ -0,0 +1,543 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/iavf_regs.h>
+#include <dev_iavf/virtchnl.h>
+#include <dev_iavf/virtchnl_funcs.h>
+#include <vnet/ethernet/ethernet.h>
+
+VLIB_REGISTER_LOG_CLASS (iavf_log, static) = {
+ .class_name = "iavf",
+ .subclass_name = "port",
+};
+
+static const u8 default_rss_key[] = {
+ 0x44, 0x39, 0x79, 0x6b, 0xb5, 0x4c, 0x50, 0x23, 0xb6, 0x75, 0xea, 0x5b, 0x12,
+ 0x4f, 0x9f, 0x30, 0xb8, 0xa2, 0xc0, 0x3d, 0xdf, 0xdc, 0x4d, 0x02, 0xa0, 0x8c,
+ 0x9b, 0x33, 0x4a, 0xf6, 0x4a, 0x4c, 0x05, 0xc6, 0xfa, 0x34, 0x39, 0x58, 0xd8,
+ 0x55, 0x7d, 0x99, 0x58, 0x3a, 0xe1, 0x38, 0xc9, 0x2e, 0x81, 0x15, 0x03, 0x66,
+};
+
+const static iavf_dyn_ctl dyn_ctln_disabled = {};
+const static iavf_dyn_ctl dyn_ctln_enabled = {
+ .intena = 1,
+ .clearpba = 1,
+ .interval = IAVF_ITR_INT / 2,
+};
+const static iavf_dyn_ctl dyn_ctln_wb_on_itr = {
+ .itr_indx = 1,
+ .interval = 2,
+ .wb_on_itr = 1,
+};
+
+vnet_dev_rv_t
+iavf_port_vlan_strip_disable (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ virtchnl_vlan_caps_t vc;
+ vnet_dev_rv_t rv;
+ u32 outer, inner;
+ const u32 mask = VIRTCHNL_VLAN_ETHERTYPE_8100;
+
+ if ((ap->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN_V2) == 0)
+ return iavf_vc_op_disable_vlan_stripping (vm, dev);
+
+ if ((rv = iavf_vc_op_get_offload_vlan_v2_caps (vm, dev, &vc)))
+ return rv;
+
+ outer = vc.offloads.stripping_support.outer;
+ inner = vc.offloads.stripping_support.inner;
+
+ outer = outer & VIRTCHNL_VLAN_TOGGLE ? outer & mask : 0;
+ inner = inner & VIRTCHNL_VLAN_TOGGLE ? inner & mask : 0;
+
+ virtchnl_vlan_setting_t vs = {
+ .vport_id = ap->vsi_id,
+ .outer_ethertype_setting = outer,
+ .inner_ethertype_setting = inner,
+ };
+
+ return iavf_vc_op_disable_vlan_stripping_v2 (vm, dev, &vs);
+}
+
+vnet_dev_rv_t
+iavf_port_init_rss (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ u16 keylen = clib_min (sizeof (default_rss_key), ap->rss_key_size);
+ u8 buffer[VIRTCHNL_MSG_SZ (virtchnl_rss_key_t, key, keylen)];
+ virtchnl_rss_key_t *key = (virtchnl_rss_key_t *) buffer;
+
+ if (!port->attr.caps.rss)
+ return VNET_DEV_OK;
+
+ /* config RSS key */
+ *key = (virtchnl_rss_key_t){
+ .vsi_id = ap->vsi_id,
+ .key_len = keylen,
+ };
+
+ clib_memcpy (key->key, default_rss_key, sizeof (default_rss_key));
+
+ return iavf_vc_op_config_rss_key (vm, dev, key);
+}
+
+vnet_dev_rv_t
+iavf_port_update_rss_lut (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ u16 lut_size = clib_min (IAVF_MAX_RSS_LUT_SIZE, ap->rss_lut_size);
+ u8 buffer[VIRTCHNL_MSG_SZ (virtchnl_rss_lut_t, lut, lut_size)];
+ virtchnl_rss_lut_t *lut = (virtchnl_rss_lut_t *) buffer;
+ u32 enabled_rxq_bmp = 0;
+
+ if (!port->attr.caps.rss)
+ return VNET_DEV_OK;
+
+ *lut = (virtchnl_rss_lut_t){
+ .vsi_id = ap->vsi_id,
+ .lut_entries = lut_size,
+ };
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->enabled)
+ enabled_rxq_bmp |= 1ULL << q->queue_id;
+
+ /* config RSS LUT */
+ for (u32 i = 0, j; i < lut->lut_entries;)
+ foreach_set_bit_index (j, enabled_rxq_bmp)
+ {
+ lut->lut[i++] = j;
+ if (i >= lut->lut_entries)
+ break;
+ }
+
+ return iavf_vc_op_config_rss_lut (vm, dev, lut);
+}
+
+vnet_dev_rv_t
+iavf_port_init_vsi_queues (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ virtchnl_queue_pair_info_t *qpi;
+ u16 vsi_id = ap->vsi_id;
+ u16 data_size = vlib_buffer_get_default_data_size (vm);
+ u16 max_frame_size = port->max_rx_frame_size;
+ u8 buffer[VIRTCHNL_MSG_SZ (virtchnl_vsi_queue_config_info_t, qpair,
+ ap->num_qp)];
+ virtchnl_vsi_queue_config_info_t *ci =
+ (virtchnl_vsi_queue_config_info_t *) buffer;
+
+ *ci = (virtchnl_vsi_queue_config_info_t){
+ .num_queue_pairs = ap->num_qp,
+ .vsi_id = vsi_id,
+ };
+
+ for (u16 i = 0; i < ap->num_qp; i++)
+ ci->qpair[i] = (virtchnl_queue_pair_info_t){
+ .rxq = { .vsi_id = vsi_id,
+ .queue_id = i,
+ .max_pkt_size = ETHERNET_MIN_PACKET_BYTES },
+ .txq = { .vsi_id = vsi_id, .queue_id = i },
+ };
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (q);
+ qpi = ci->qpair + q->queue_id;
+ qpi->rxq.ring_len = q->size;
+ qpi->rxq.databuffer_size = data_size;
+ qpi->rxq.dma_ring_addr = vnet_dev_get_dma_addr (vm, dev, arq->descs);
+ qpi->rxq.max_pkt_size = max_frame_size;
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ iavf_txq_t *atq = vnet_dev_get_tx_queue_data (q);
+ qpi = ci->qpair + q->queue_id;
+ qpi->txq.ring_len = q->size;
+ qpi->txq.dma_ring_addr = vnet_dev_get_dma_addr (vm, dev, atq->descs);
+ }
+
+ return iavf_vc_op_config_vsi_queues (vm, dev, ci);
+}
+
+vnet_dev_rv_t
+iavf_port_rx_irq_config (vlib_main_t *vm, vnet_dev_port_t *port, int enable)
+{
+ vnet_dev_t *dev = port->dev;
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ u16 n_rx_vectors = ap->n_rx_vectors;
+ u8 buffer[VIRTCHNL_MSG_SZ (virtchnl_irq_map_info_t, vecmap, n_rx_vectors)];
+ u8 n_intr_mode_queues_per_vector[n_rx_vectors];
+ u8 n_queues_per_vector[n_rx_vectors];
+ virtchnl_irq_map_info_t *im = (virtchnl_irq_map_info_t *) buffer;
+ vnet_dev_rv_t rv;
+
+ log_debug (dev, "intr mode per queue bitmap 0x%x",
+ ap->intr_mode_per_rxq_bitmap);
+
+ for (u32 i = 0; i < n_rx_vectors; i++)
+ n_intr_mode_queues_per_vector[i] = n_queues_per_vector[i] = 0;
+
+ *im = (virtchnl_irq_map_info_t){
+ .num_vectors = n_rx_vectors,
+ };
+
+ if (port->attr.caps.interrupt_mode)
+ {
+ for (u16 i = 0; i < im->num_vectors; i++)
+ im->vecmap[i] = (virtchnl_vector_map_t){
+ .vsi_id = ap->vsi_id,
+ .vector_id = i + 1,
+ };
+ if (enable)
+ foreach_vnet_dev_port_rx_queue (rxq, port)
+ if (rxq->enabled)
+ {
+ u32 i = rxq->rx_thread_index;
+ im->vecmap[i].rxq_map |= 1 << rxq->queue_id;
+ n_queues_per_vector[i]++;
+ n_intr_mode_queues_per_vector[i] +=
+ u64_is_bit_set (ap->intr_mode_per_rxq_bitmap, rxq->queue_id);
+ }
+ }
+ else
+ {
+ im->vecmap[0] = (virtchnl_vector_map_t){
+ .vsi_id = ap->vsi_id,
+ .vector_id = 1,
+ };
+ if (enable)
+ foreach_vnet_dev_port_rx_queue (rxq, port)
+ if (rxq->enabled)
+ im->vecmap[0].rxq_map |= 1 << rxq->queue_id;
+ }
+
+ if ((rv = iavf_vc_op_config_irq_map (vm, dev, im)))
+ return rv;
+
+ for (int i = 0; i < n_rx_vectors; i++)
+ {
+ u32 val;
+
+ if (enable == 0 || n_queues_per_vector[i] == 0)
+ val = dyn_ctln_disabled.as_u32;
+ else if (ap->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_WB_ON_ITR &&
+ n_intr_mode_queues_per_vector[i] == 0)
+ val = dyn_ctln_wb_on_itr.as_u32;
+ else
+ val = dyn_ctln_enabled.as_u32;
+
+ iavf_reg_write (ad, IAVF_VFINT_DYN_CTLN (i), val);
+ log_debug (dev, "VFINT_DYN_CTLN(%u) set to 0x%x", i, val);
+ }
+
+ return rv;
+}
+
+static void
+avf_msix_n_handler (vlib_main_t *vm, vnet_dev_t *dev, u16 line)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ vnet_dev_port_t *port = vnet_dev_get_port_by_id (dev, 0);
+
+ line--;
+
+ iavf_reg_write (ad, IAVF_VFINT_DYN_CTLN (line), dyn_ctln_enabled.as_u32);
+ vlib_node_set_interrupt_pending (vlib_get_main_by_index (line),
+ port->intf.rx_node_index);
+}
+
+vnet_dev_rv_t
+iavf_port_init (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ vnet_dev_rv_t rv;
+
+ log_debug (port->dev, "port %u", port->port_id);
+
+ ap->intr_mode_per_rxq_bitmap = 0;
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->interrupt_mode)
+ u64_bit_set (&ap->intr_mode_per_rxq_bitmap, q->queue_id, 1);
+
+ if ((rv = iavf_port_vlan_strip_disable (vm, port)))
+ return rv;
+
+ if ((rv = iavf_port_init_rss (vm, port)))
+ return rv;
+
+ vnet_dev_pci_msix_add_handler (vm, dev, &avf_msix_n_handler, 1,
+ ap->n_rx_vectors);
+ vnet_dev_pci_msix_enable (vm, dev, 1, ap->n_rx_vectors);
+ for (u32 i = 1; i < ap->n_rx_vectors; i++)
+ vnet_dev_pci_msix_set_polling_thread (vm, dev, i + 1, i);
+
+ if (port->dev->poll_stats)
+ iavf_port_add_counters (vm, port);
+
+ return VNET_DEV_OK;
+}
+
+static vnet_dev_rv_t
+iavf_enable_disable_queues (vlib_main_t *vm, vnet_dev_port_t *port, int enable)
+{
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+
+ virtchnl_queue_select_t qs = {
+ .vsi_id = ap->vsi_id,
+ };
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if ((enable && q->enabled) || (!enable && q->started))
+ qs.rx_queues |= 1ULL << q->queue_id;
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ if ((enable && q->enabled) || (!enable && q->started))
+ qs.tx_queues |= 1ULL << q->queue_id;
+
+ return enable ? iavf_vc_op_enable_queues (vm, port->dev, &qs) :
+ iavf_vc_op_disable_queues (vm, port->dev, &qs);
+}
+
+vnet_dev_rv_t
+iavf_port_start (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_rv_t rv;
+
+ log_debug (port->dev, "port %u", port->port_id);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->enabled)
+ if ((rv = iavf_rx_queue_start (vm, q)))
+ goto done;
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ if ((rv = iavf_tx_queue_start (vm, q)))
+ goto done;
+
+ if ((rv = iavf_port_update_rss_lut (vm, port)))
+ goto done;
+
+ /* configure qpairs */
+ if ((rv = iavf_port_init_vsi_queues (vm, port)))
+ goto done;
+
+ if ((rv = iavf_port_rx_irq_config (vm, port, /* enable */ 1)))
+ goto done;
+
+ if ((rv = iavf_enable_disable_queues (vm, port, 1)))
+ goto done;
+
+ if (port->dev->poll_stats)
+ vnet_dev_poll_port_add (vm, port, 1, iavf_port_poll_stats);
+
+done:
+ if (rv)
+ {
+ foreach_vnet_dev_port_rx_queue (q, port)
+ iavf_rx_queue_stop (vm, q);
+ foreach_vnet_dev_port_tx_queue (q, port)
+ iavf_tx_queue_stop (vm, q);
+ }
+ return rv;
+}
+
+void
+iavf_port_stop (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ log_debug (port->dev, "port %u", port->port_id);
+
+ iavf_enable_disable_queues (vm, port, /* enable */ 0);
+ iavf_port_rx_irq_config (vm, port, /* disable */ 0);
+
+ if (port->dev->poll_stats)
+ vnet_dev_poll_port_remove (vm, port, iavf_port_poll_stats);
+
+ foreach_vnet_dev_port_rx_queue (rxq, port)
+ iavf_rx_queue_stop (vm, rxq);
+
+ foreach_vnet_dev_port_tx_queue (txq, port)
+ iavf_tx_queue_stop (vm, txq);
+
+ vnet_dev_port_state_change (vm, port,
+ (vnet_dev_port_state_changes_t){
+ .change.link_state = 1,
+ .change.link_speed = 1,
+ .link_speed = 0,
+ .link_state = 0,
+ });
+}
+
+vnet_dev_rv_t
+iavf_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ if (port->started)
+ rv = VNET_DEV_ERR_PORT_STARTED;
+ break;
+
+ case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+ case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+ break;
+
+ default:
+ rv = VNET_DEV_ERR_NOT_SUPPORTED;
+ };
+
+ return rv;
+}
+
+static vnet_dev_rv_t
+iavf_port_add_del_eth_addr (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_hw_addr_t *addr, int is_add,
+ int is_primary)
+{
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ virtchnl_ether_addr_list_t al = {
+ .vsi_id = ap->vsi_id,
+ .num_elements = 1,
+ .list[0].primary = is_primary ? 1 : 0,
+ .list[0].extra = is_primary ? 0 : 1,
+ };
+
+ clib_memcpy (al.list[0].addr, addr, sizeof (al.list[0].addr));
+
+ return is_add ? iavf_vc_op_add_eth_addr (vm, port->dev, &al) :
+ iavf_vc_op_del_eth_addr (vm, port->dev, &al);
+}
+
+static vnet_dev_rv_t
+iavf_port_cfg_rxq_int_mode_change (vlib_main_t *vm, vnet_dev_port_t *port,
+ u16 qid, u8 state, u8 all)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ vnet_dev_t *dev = port->dev;
+ char *ed = state ? "ena" : "disa";
+ char qstr[16];
+ u64 old, new = 0;
+
+ state = state != 0;
+ old = ap->intr_mode_per_rxq_bitmap;
+
+ if (all)
+ {
+ snprintf (qstr, sizeof (qstr), "all queues");
+ if (state)
+ foreach_vnet_dev_port_rx_queue (q, port)
+ u64_bit_set (&new, q->queue_id, 1);
+ }
+ else
+ {
+ snprintf (qstr, sizeof (qstr), "queue %u", qid);
+ new = old;
+ u64_bit_set (&new, qid, state);
+ }
+
+ if (new == old)
+ {
+ log_warn (dev, "interrupt mode already %sbled on %s", ed, qstr);
+ return rv;
+ }
+
+ ap->intr_mode_per_rxq_bitmap = new;
+
+ if (port->started)
+ {
+ if ((rv = iavf_port_rx_irq_config (vm, port, 1)))
+ {
+ ap->intr_mode_per_rxq_bitmap = old;
+ log_err (dev, "failed to %sble interrupt mode on %s", ed, qstr);
+ return rv;
+ }
+ }
+
+ log_debug (dev, "interrupt mode %sbled on %s, new bitmap is 0x%x", ed, qstr,
+ new);
+ return rv;
+}
+
+vnet_dev_rv_t
+iavf_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_t *dev = port->dev;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ {
+ virtchnl_promisc_info_t pi = {
+ .vsi_id = ap->vsi_id,
+ .unicast_promisc = req->promisc,
+ .multicast_promisc = req->promisc,
+ };
+
+ rv = iavf_vc_op_config_promisc_mode (vm, dev, &pi);
+ }
+ break;
+
+ case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ rv = iavf_port_add_del_eth_addr (vm, port, &port->primary_hw_addr,
+ /* is_add */ 0,
+ /* is_primary */ 1);
+ if (rv == VNET_DEV_OK)
+ rv = iavf_port_add_del_eth_addr (vm, port, &req->addr,
+ /* is_add */ 1,
+ /* is_primary */ 1);
+ break;
+
+ case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+ rv = iavf_port_add_del_eth_addr (vm, port, &req->addr,
+ /* is_add */ 1,
+ /* is_primary */ 0);
+ break;
+
+ case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+ rv = iavf_port_add_del_eth_addr (vm, port, &req->addr,
+ /* is_add */ 0,
+ /* is_primary */ 0);
+ break;
+
+ case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ break;
+
+ case VNET_DEV_PORT_CFG_RXQ_INTR_MODE_ENABLE:
+ rv = iavf_port_cfg_rxq_int_mode_change (vm, port, req->queue_id, 1,
+ req->all_queues);
+ break;
+
+ case VNET_DEV_PORT_CFG_RXQ_INTR_MODE_DISABLE:
+ rv = iavf_port_cfg_rxq_int_mode_change (vm, port, req->queue_id, 0,
+ req->all_queues);
+ break;
+
+ default:
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ };
+
+ return rv;
+}
diff --git a/src/plugins/dev_iavf/queue.c b/src/plugins/dev_iavf/queue.c
new file mode 100644
index 00000000000..113c0dbdfc7
--- /dev/null
+++ b/src/plugins/dev_iavf/queue.c
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <vppinfra/ring.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/virtchnl.h>
+#include <dev_iavf/virtchnl_funcs.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_REGISTER_LOG_CLASS (iavf_log, static) = {
+ .class_name = "iavf",
+ .subclass_name = "queue",
+};
+
+vnet_dev_rv_t
+iavf_rx_queue_alloc (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_port_t *port = rxq->port;
+ vnet_dev_t *dev = port->dev;
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq);
+ vnet_dev_rv_t rv;
+
+ arq->buffer_indices = clib_mem_alloc_aligned (
+ rxq->size * sizeof (arq->buffer_indices[0]), CLIB_CACHE_LINE_BYTES);
+
+ if ((rv =
+ vnet_dev_dma_mem_alloc (vm, dev, sizeof (iavf_rx_desc_t) * rxq->size,
+ 0, (void **) &arq->descs)))
+ return rv;
+
+ arq->qrx_tail = ad->bar0 + IAVF_QRX_TAIL (rxq->queue_id);
+
+ log_debug (dev, "queue %u alocated", rxq->queue_id);
+ return rv;
+}
+
+void
+iavf_rx_queue_free (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_t *dev = rxq->port->dev;
+ iavf_rxq_t *aq = vnet_dev_get_rx_queue_data (rxq);
+
+ log_debug (dev, "queue %u", rxq->queue_id);
+
+ vnet_dev_dma_mem_free (vm, dev, aq->descs);
+
+ foreach_pointer (p, aq->buffer_indices)
+ if (p)
+ clib_mem_free (p);
+}
+
+vnet_dev_rv_t
+iavf_tx_queue_alloc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ vnet_dev_t *dev = txq->port->dev;
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_rv_t rv;
+
+ if ((rv =
+ vnet_dev_dma_mem_alloc (vm, dev, sizeof (iavf_tx_desc_t) * txq->size,
+ 0, (void **) &atq->descs)))
+ return rv;
+
+ clib_ring_new_aligned (atq->rs_slots, 32, CLIB_CACHE_LINE_BYTES);
+ atq->buffer_indices = clib_mem_alloc_aligned (
+ txq->size * sizeof (atq->buffer_indices[0]), CLIB_CACHE_LINE_BYTES);
+ atq->tmp_descs = clib_mem_alloc_aligned (
+ sizeof (atq->tmp_descs[0]) * txq->size, CLIB_CACHE_LINE_BYTES);
+ atq->tmp_bufs = clib_mem_alloc_aligned (
+ sizeof (atq->tmp_bufs[0]) * txq->size, CLIB_CACHE_LINE_BYTES);
+
+ atq->qtx_tail = ad->bar0 + IAVF_QTX_TAIL (txq->queue_id);
+
+ log_debug (dev, "queue %u alocated", txq->queue_id);
+ return VNET_DEV_OK;
+}
+
+void
+iavf_tx_queue_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ vnet_dev_t *dev = txq->port->dev;
+ iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+ iavf_txq_t *aq = vnet_dev_get_tx_queue_data (txq);
+
+ log_debug (dev, "queue %u", txq->queue_id);
+ vnet_dev_dma_mem_free (vm, dev, aq->descs);
+ clib_ring_free (atq->rs_slots);
+
+ foreach_pointer (p, aq->tmp_descs, aq->tmp_bufs, aq->buffer_indices)
+ if (p)
+ clib_mem_free (p);
+}
+
+vnet_dev_rv_t
+iavf_rx_queue_start (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_t *dev = rxq->port->dev;
+ iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq);
+ iavf_rx_desc_t *d = arq->descs;
+ u32 n_enq, *bi = arq->buffer_indices;
+ u8 bpi = vnet_dev_get_rx_queue_buffer_pool_index (rxq);
+
+ n_enq = vlib_buffer_alloc_from_pool (vm, bi, rxq->size - 8, bpi);
+
+ if (n_enq < 8)
+ {
+ if (n_enq)
+ vlib_buffer_free (vm, bi, n_enq);
+ return VNET_DEV_ERR_BUFFER_ALLOC_FAIL;
+ }
+
+ for (u32 i = 0; i < n_enq; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi[i]);
+ u64 dma_addr = vnet_dev_get_dma_addr (vm, dev, b->data);
+ d[i] = (iavf_rx_desc_t){ .addr = dma_addr };
+ }
+
+ arq->n_enqueued = n_enq;
+ arq->next = 0;
+ __atomic_store_n (arq->qrx_tail, n_enq, __ATOMIC_RELEASE);
+ return VNET_DEV_OK;
+}
+
+void
+iavf_rx_queue_stop (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq);
+
+ __atomic_store_n (arq->qrx_tail, 0, __ATOMIC_RELAXED);
+ if (arq->n_enqueued)
+ {
+ vlib_buffer_free_from_ring_no_next (vm, arq->buffer_indices, arq->next,
+ rxq->size, arq->n_enqueued);
+ log_debug (rxq->port->dev, "%u buffers freed from rx queue %u",
+ arq->n_enqueued, rxq->queue_id);
+ }
+ arq->n_enqueued = arq->next = 0;
+}
+
+vnet_dev_rv_t
+iavf_tx_queue_start (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+ atq->next = 0;
+ atq->n_enqueued = 0;
+ clib_ring_reset (atq->rs_slots);
+ __atomic_store_n (atq->qtx_tail, 0, __ATOMIC_RELAXED);
+ return VNET_DEV_OK;
+}
+
+void
+iavf_tx_queue_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+
+ log_debug (txq->port->dev, "queue %u", txq->queue_id);
+
+ __atomic_store_n (atq->qtx_tail, 0, __ATOMIC_RELAXED);
+ if (atq->n_enqueued)
+ {
+ vlib_buffer_free_from_ring_no_next (vm, atq->buffer_indices,
+ atq->next - atq->n_enqueued,
+ txq->size, atq->n_enqueued);
+ log_debug (txq->port->dev, "%u buffers freed from tx queue %u",
+ atq->n_enqueued, txq->queue_id);
+ }
+ atq->n_enqueued = atq->next = 0;
+}
diff --git a/src/plugins/dev_iavf/rx_node.c b/src/plugins/dev_iavf/rx_node.c
new file mode 100644
index 00000000000..ee6d7e8def0
--- /dev/null
+++ b/src/plugins/dev_iavf/rx_node.c
@@ -0,0 +1,529 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+#include <vnet/ethernet/ethernet.h>
+#include <dev_iavf/iavf.h>
+
+#define IAVF_RX_REFILL_TRESHOLD 32
+
+static const iavf_rx_desc_qw1_t mask_eop = { .eop = 1 };
+static const iavf_rx_desc_qw1_t mask_flm = { .flm = 1 };
+static const iavf_rx_desc_qw1_t mask_dd = { .dd = 1 };
+static const iavf_rx_desc_qw1_t mask_ipe = { .ipe = 1 };
+static const iavf_rx_desc_qw1_t mask_dd_eop = { .dd = 1, .eop = 1 };
+
+static_always_inline int
+iavf_rxd_is_not_eop (iavf_rx_desc_t *d)
+{
+ return (d->qw1.as_u64 & mask_eop.as_u64) == 0;
+}
+
+static_always_inline int
+iavf_rxd_is_not_dd (iavf_rx_desc_t *d)
+{
+ return (d->qw1.as_u64 & mask_dd.as_u64) == 0;
+}
+
+static_always_inline void
+iavf_rx_desc_write (iavf_rx_desc_t *d, u64 addr)
+{
+#ifdef CLIB_HAVE_VEC256
+ *(u64x4 *) d = (u64x4){ addr, 0, 0, 0 };
+#else
+ d->qword[0] = addr;
+ d->qword[1] = 0;
+#endif
+}
+
+static_always_inline void
+iavf_rxq_refill (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_dev_rx_queue_t *rxq, int use_va_dma)
+{
+ u16 n_refill, mask, n_alloc, slot, size;
+ iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq);
+ vlib_buffer_t *b[8];
+ iavf_rx_desc_t *d, *first_d;
+ void *p[8];
+
+ size = rxq->size;
+ mask = size - 1;
+ n_refill = mask - arq->n_enqueued;
+ if (PREDICT_TRUE (n_refill <= IAVF_RX_REFILL_TRESHOLD))
+ return;
+
+ slot = (arq->next - n_refill - 1) & mask;
+
+ n_refill &= ~7; /* round to 8 */
+ n_alloc = vlib_buffer_alloc_to_ring_from_pool (
+ vm, arq->buffer_indices, slot, size, n_refill,
+ vnet_dev_get_rx_queue_buffer_pool_index (rxq));
+
+ if (PREDICT_FALSE (n_alloc != n_refill))
+ {
+ vlib_error_count (vm, node->node_index, IAVF_RX_NODE_CTR_BUFFER_ALLOC,
+ 1);
+ if (n_alloc)
+ vlib_buffer_free_from_ring (vm, arq->buffer_indices, slot, size,
+ n_alloc);
+ return;
+ }
+
+ arq->n_enqueued += n_alloc;
+ first_d = arq->descs;
+
+ ASSERT (slot % 8 == 0);
+
+ while (n_alloc >= 8)
+ {
+ d = first_d + slot;
+
+ if (use_va_dma)
+ {
+ vlib_get_buffers_with_offset (vm, arq->buffer_indices + slot, p, 8,
+ sizeof (vlib_buffer_t));
+ iavf_rx_desc_write (d + 0, pointer_to_uword (p[0]));
+ iavf_rx_desc_write (d + 1, pointer_to_uword (p[1]));
+ iavf_rx_desc_write (d + 2, pointer_to_uword (p[2]));
+ iavf_rx_desc_write (d + 3, pointer_to_uword (p[3]));
+ iavf_rx_desc_write (d + 4, pointer_to_uword (p[4]));
+ iavf_rx_desc_write (d + 5, pointer_to_uword (p[5]));
+ iavf_rx_desc_write (d + 6, pointer_to_uword (p[6]));
+ iavf_rx_desc_write (d + 7, pointer_to_uword (p[7]));
+ }
+ else
+ {
+ vlib_get_buffers (vm, arq->buffer_indices + slot, b, 8);
+ iavf_rx_desc_write (d + 0, vlib_buffer_get_pa (vm, b[0]));
+ iavf_rx_desc_write (d + 1, vlib_buffer_get_pa (vm, b[1]));
+ iavf_rx_desc_write (d + 2, vlib_buffer_get_pa (vm, b[2]));
+ iavf_rx_desc_write (d + 3, vlib_buffer_get_pa (vm, b[3]));
+ iavf_rx_desc_write (d + 4, vlib_buffer_get_pa (vm, b[4]));
+ iavf_rx_desc_write (d + 5, vlib_buffer_get_pa (vm, b[5]));
+ iavf_rx_desc_write (d + 6, vlib_buffer_get_pa (vm, b[6]));
+ iavf_rx_desc_write (d + 7, vlib_buffer_get_pa (vm, b[7]));
+ }
+
+ /* next */
+ slot = (slot + 8) & mask;
+ n_alloc -= 8;
+ }
+
+ /* RXQ can be smaller than 256 packets, especially if jumbo. */
+ arq->descs[slot].qword[1] = 0;
+
+ __atomic_store_n (arq->qrx_tail, slot, __ATOMIC_RELEASE);
+}
+
+static_always_inline uword
+iavf_rx_attach_tail (vlib_main_t *vm, vlib_buffer_template_t *bt,
+ vlib_buffer_t *b, u64 qw1, iavf_rx_tail_t *t)
+{
+ vlib_buffer_t *hb = b;
+ u32 tlnifb = 0, i = 0;
+
+ if (qw1 & mask_eop.as_u64)
+ return 0;
+
+ while ((qw1 & mask_eop.as_u64) == 0)
+ {
+ ASSERT (i < IAVF_RX_MAX_DESC_IN_CHAIN - 1);
+ ASSERT (qw1 & mask_dd.as_u64);
+ qw1 = t->qw1s[i];
+ b->next_buffer = t->buffers[i];
+ b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ b = vlib_get_buffer (vm, b->next_buffer);
+ b->template = *bt;
+ tlnifb += b->current_length = ((iavf_rx_desc_qw1_t) qw1).length;
+ i++;
+ }
+
+ hb->total_length_not_including_first_buffer = tlnifb;
+ hb->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ return tlnifb;
+}
+
+static_always_inline void
+iavf_process_flow_offload (vnet_dev_port_t *port, iavf_rt_data_t *rtd,
+ uword n_rx_packets)
+{
+ uword n;
+ iavf_flow_lookup_entry_t fle;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+
+ for (n = 0; n < n_rx_packets; n++)
+ {
+ if ((rtd->qw1s[n] & mask_flm.as_u64) == 0)
+ continue;
+
+ fle = *pool_elt_at_index (ap->flow_lookup_entries, rtd->flow_ids[n]);
+
+ if (fle.next_index != (u16) ~0)
+ rtd->next[n] = fle.next_index;
+
+ if (fle.flow_id != ~0)
+ rtd->bufs[n]->flow_id = fle.flow_id;
+
+ if (fle.buffer_advance != ~0)
+ vlib_buffer_advance (rtd->bufs[n], fle.buffer_advance);
+ }
+}
+
+static_always_inline uword
+iavf_process_rx_burst (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_dev_rx_queue_t *rxq, iavf_rt_data_t *rtd,
+ vlib_buffer_template_t *bt, u32 n_left,
+ int maybe_multiseg)
+{
+ vlib_buffer_t **b = rtd->bufs;
+ u64 *qw1 = rtd->qw1s;
+ iavf_rx_tail_t *tail = rtd->tails;
+ uword n_rx_bytes = 0;
+
+ while (n_left >= 4)
+ {
+ if (n_left >= 12)
+ {
+ vlib_prefetch_buffer_header (b[8], LOAD);
+ vlib_prefetch_buffer_header (b[9], LOAD);
+ vlib_prefetch_buffer_header (b[10], LOAD);
+ vlib_prefetch_buffer_header (b[11], LOAD);
+ }
+
+ b[0]->template = *bt;
+ b[1]->template = *bt;
+ b[2]->template = *bt;
+ b[3]->template = *bt;
+
+ n_rx_bytes += b[0]->current_length =
+ ((iavf_rx_desc_qw1_t) qw1[0]).length;
+ n_rx_bytes += b[1]->current_length =
+ ((iavf_rx_desc_qw1_t) qw1[1]).length;
+ n_rx_bytes += b[2]->current_length =
+ ((iavf_rx_desc_qw1_t) qw1[2]).length;
+ n_rx_bytes += b[3]->current_length =
+ ((iavf_rx_desc_qw1_t) qw1[3]).length;
+
+ if (maybe_multiseg)
+ {
+ n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[0], qw1[0], tail + 0);
+ n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[1], qw1[1], tail + 1);
+ n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[2], qw1[2], tail + 2);
+ n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[3], qw1[3], tail + 3);
+ }
+
+ /* next */
+ qw1 += 4;
+ tail += 4;
+ b += 4;
+ n_left -= 4;
+ }
+
+ while (n_left)
+ {
+ b[0]->template = *bt;
+
+ n_rx_bytes += b[0]->current_length =
+ ((iavf_rx_desc_qw1_t) qw1[0]).length;
+
+ if (maybe_multiseg)
+ n_rx_bytes += iavf_rx_attach_tail (vm, bt, b[0], qw1[0], tail + 0);
+
+ /* next */
+ qw1 += 1;
+ tail += 1;
+ b += 1;
+ n_left -= 1;
+ }
+ return n_rx_bytes;
+}
+
+static_always_inline uword
+iavf_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, vnet_dev_port_t *port,
+ vnet_dev_rx_queue_t *rxq, int with_flows)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 thr_idx = vlib_get_thread_index ();
+ iavf_rt_data_t *rtd = vnet_dev_get_rt_temp_space (vm);
+ iavf_rxq_t *arq = vnet_dev_get_rx_queue_data (rxq);
+ vlib_buffer_template_t bt = rxq->buffer_template;
+ u32 n_trace, n_rx_packets = 0, n_rx_bytes = 0;
+ u16 n_tail_desc = 0;
+ u64 or_qw1 = 0;
+ u32 *bi, *to_next, n_left_to_next;
+ u32 next_index = rxq->next_index;
+ u32 sw_if_index = port->intf.sw_if_index;
+ u32 hw_if_index = port->intf.hw_if_index;
+ u16 next = arq->next;
+ u16 size = rxq->size;
+ u16 mask = size - 1;
+ iavf_rx_desc_t *d, *descs = arq->descs;
+#ifdef CLIB_HAVE_VEC256
+ u64x4 q1x4, or_q1x4 = { 0 };
+ u32x4 fdidx4;
+ u64x4 dd_eop_mask4 = u64x4_splat (mask_dd_eop.as_u64);
+#elif defined(CLIB_HAVE_VEC128)
+ u32x4 q1x4_lo, q1x4_hi, or_q1x4 = { 0 };
+ u32x4 fdidx4;
+ u32x4 dd_eop_mask4 = u32x4_splat (mask_dd_eop.as_u64);
+#endif
+ int single_next = 1;
+
+ /* is there anything on the ring */
+ d = descs + next;
+ if ((d->qword[1] & mask_dd.as_u64) == 0)
+ goto done;
+
+ vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ /* fetch up to IAVF_RX_VECTOR_SZ from the rx ring, unflatten them and
+ copy needed data from descriptor to rx vector */
+ bi = to_next;
+
+ while (n_rx_packets < IAVF_RX_VECTOR_SZ)
+ {
+ if (next + 11 < size)
+ {
+ int stride = 8;
+ clib_prefetch_load ((void *) (descs + (next + stride)));
+ clib_prefetch_load ((void *) (descs + (next + stride + 1)));
+ clib_prefetch_load ((void *) (descs + (next + stride + 2)));
+ clib_prefetch_load ((void *) (descs + (next + stride + 3)));
+ }
+
+#ifdef CLIB_HAVE_VEC256
+ if (n_rx_packets >= IAVF_RX_VECTOR_SZ - 4 || next >= size - 4)
+ goto one_by_one;
+
+ q1x4 = u64x4_gather ((void *) &d[0].qword[1], (void *) &d[1].qword[1],
+ (void *) &d[2].qword[1], (void *) &d[3].qword[1]);
+
+ /* not all packets are ready or at least one of them is chained */
+ if (!u64x4_is_equal (q1x4 & dd_eop_mask4, dd_eop_mask4))
+ goto one_by_one;
+
+ or_q1x4 |= q1x4;
+
+ u64x4_store_unaligned (q1x4, rtd->qw1s + n_rx_packets);
+#elif defined(CLIB_HAVE_VEC128)
+ if (n_rx_packets >= IAVF_RX_VECTOR_SZ - 4 || next >= size - 4)
+ goto one_by_one;
+
+ q1x4_lo =
+ u32x4_gather ((void *) &d[0].qword[1], (void *) &d[1].qword[1],
+ (void *) &d[2].qword[1], (void *) &d[3].qword[1]);
+
+ /* not all packets are ready or at least one of them is chained */
+ if (!u32x4_is_equal (q1x4_lo & dd_eop_mask4, dd_eop_mask4))
+ goto one_by_one;
+
+ q1x4_hi = u32x4_gather (
+ (void *) &d[0].qword[1] + 4, (void *) &d[1].qword[1] + 4,
+ (void *) &d[2].qword[1] + 4, (void *) &d[3].qword[1] + 4);
+
+ or_q1x4 |= q1x4_lo;
+ rtd->qw1s[n_rx_packets + 0] = (u64) q1x4_hi[0] << 32 | (u64) q1x4_lo[0];
+ rtd->qw1s[n_rx_packets + 1] = (u64) q1x4_hi[1] << 32 | (u64) q1x4_lo[1];
+ rtd->qw1s[n_rx_packets + 2] = (u64) q1x4_hi[2] << 32 | (u64) q1x4_lo[2];
+ rtd->qw1s[n_rx_packets + 3] = (u64) q1x4_hi[3] << 32 | (u64) q1x4_lo[3];
+#endif
+#if defined(CLIB_HAVE_VEC256) || defined(CLIB_HAVE_VEC128)
+
+ if (with_flows)
+ {
+ fdidx4 = u32x4_gather (
+ (void *) &d[0].fdid_flex_hi, (void *) &d[1].fdid_flex_hi,
+ (void *) &d[2].fdid_flex_hi, (void *) &d[3].fdid_flex_hi);
+ u32x4_store_unaligned (fdidx4, rtd->flow_ids + n_rx_packets);
+ }
+
+ vlib_buffer_copy_indices (bi, arq->buffer_indices + next, 4);
+
+ /* next */
+ next = (next + 4) & mask;
+ d = descs + next;
+ n_rx_packets += 4;
+ bi += 4;
+ continue;
+ one_by_one:
+#endif
+ clib_prefetch_load ((void *) (descs + ((next + 8) & mask)));
+
+ if (iavf_rxd_is_not_dd (d))
+ break;
+
+ bi[0] = arq->buffer_indices[next];
+
+ /* deal with chained buffers */
+ if (PREDICT_FALSE (iavf_rxd_is_not_eop (d)))
+ {
+ u16 tail_desc = 0;
+ u16 tail_next = next;
+ iavf_rx_tail_t *tail = rtd->tails + n_rx_packets;
+ iavf_rx_desc_t *td;
+ do
+ {
+ tail_next = (tail_next + 1) & mask;
+ td = descs + tail_next;
+
+ /* bail out in case of incomplete transaction */
+ if (iavf_rxd_is_not_dd (td))
+ goto no_more_desc;
+
+ or_qw1 |= tail->qw1s[tail_desc] = td[0].qword[1];
+ tail->buffers[tail_desc] = arq->buffer_indices[tail_next];
+ tail_desc++;
+ }
+ while (iavf_rxd_is_not_eop (td));
+ next = tail_next;
+ n_tail_desc += tail_desc;
+ }
+
+ or_qw1 |= rtd->qw1s[n_rx_packets] = d[0].qword[1];
+ if (PREDICT_FALSE (with_flows))
+ {
+ rtd->flow_ids[n_rx_packets] = d[0].fdid_flex_hi;
+ }
+
+ /* next */
+ next = (next + 1) & mask;
+ d = descs + next;
+ n_rx_packets++;
+ bi++;
+ }
+no_more_desc:
+
+ if (n_rx_packets == 0)
+ goto done;
+
+ arq->next = next;
+ arq->n_enqueued -= n_rx_packets + n_tail_desc;
+
+#if defined(CLIB_HAVE_VEC256) || defined(CLIB_HAVE_VEC128)
+ or_qw1 |= or_q1x4[0] | or_q1x4[1] | or_q1x4[2] | or_q1x4[3];
+#endif
+
+ vlib_get_buffers (vm, to_next, rtd->bufs, n_rx_packets);
+
+ n_rx_bytes =
+ n_tail_desc ?
+ iavf_process_rx_burst (vm, node, rxq, rtd, &bt, n_rx_packets, 1) :
+ iavf_process_rx_burst (vm, node, rxq, rtd, &bt, n_rx_packets, 0);
+
+ /* the MARKed packets may have different next nodes */
+ if (PREDICT_FALSE (with_flows && (or_qw1 & mask_flm.as_u64)))
+ {
+ u32 n;
+ single_next = 0;
+ for (n = 0; n < n_rx_packets; n++)
+ rtd->next[n] = next_index;
+
+ iavf_process_flow_offload (port, rtd, n_rx_packets);
+ }
+
+ /* packet trace if enabled */
+ if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node))))
+ {
+ u32 n_left = n_rx_packets;
+ u32 i, j;
+ u16 *next_indices = rtd->next;
+
+ i = 0;
+ while (n_trace && n_left)
+ {
+ vlib_buffer_t *b = rtd->bufs[i];
+ if (PREDICT_FALSE (single_next == 0))
+ next_index = next_indices[0];
+
+ if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next_index, b,
+ /* follow_chain */ 0)))
+ {
+ iavf_rx_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr));
+ tr->next_index = next_index;
+ tr->qid = rxq->queue_id;
+ tr->hw_if_index = hw_if_index;
+ tr->qw1s[0] = rtd->qw1s[i];
+ tr->flow_id =
+ (tr->qw1s[0] & mask_flm.as_u64) ? rtd->flow_ids[i] : 0;
+ for (j = 1; j < IAVF_RX_MAX_DESC_IN_CHAIN; j++)
+ tr->qw1s[j] = rtd->tails[i].qw1s[j - 1];
+
+ n_trace--;
+ }
+
+ /* next */
+ n_left--;
+ i++;
+ next_indices++;
+ }
+ vlib_set_trace_count (vm, node, n_trace);
+ }
+
+ /* enqueu the packets to the next nodes */
+ if (PREDICT_FALSE (with_flows && (or_qw1 & mask_flm.as_u64)))
+ {
+ /* release next node's frame vector, in this case we use
+ vlib_buffer_enqueue_to_next to place the packets
+ */
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+
+ /* enqueue buffers to the next node */
+ vlib_buffer_enqueue_to_next (vm, node, to_next, rtd->next, n_rx_packets);
+ }
+ else
+ {
+ if (PREDICT_TRUE (next_index == VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT))
+ {
+ vlib_next_frame_t *nf;
+ vlib_frame_t *f;
+ ethernet_input_frame_t *ef;
+ nf = vlib_node_runtime_get_next_frame (vm, node, next_index);
+ f = vlib_get_frame (vm, nf->frame);
+ f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+ ef = vlib_frame_scalar_args (f);
+ ef->sw_if_index = sw_if_index;
+ ef->hw_if_index = hw_if_index;
+
+ if ((or_qw1 & mask_ipe.as_u64) == 0)
+ f->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK;
+ vlib_frame_no_append (f);
+ }
+
+ n_left_to_next -= n_rx_packets;
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_increment_combined_counter (
+ vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thr_idx, hw_if_index, n_rx_packets, n_rx_bytes);
+
+done:
+ return n_rx_packets;
+}
+
+VNET_DEV_NODE_FN (iavf_rx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ u32 n_rx = 0;
+ foreach_vnet_dev_rx_queue_runtime (rxq, node)
+ {
+ vnet_dev_port_t *port = rxq->port;
+ iavf_port_t *ap = vnet_dev_get_port_data (port);
+ if (PREDICT_FALSE (ap->flow_offload))
+ n_rx += iavf_device_input_inline (vm, node, frame, port, rxq, 1);
+ else
+ n_rx += iavf_device_input_inline (vm, node, frame, port, rxq, 0);
+
+ /* refill rx ring */
+ if (rxq->port->dev->va_dma)
+ iavf_rxq_refill (vm, node, rxq, 1 /* use_va_dma */);
+ else
+ iavf_rxq_refill (vm, node, rxq, 0 /* use_va_dma */);
+ }
+
+ return n_rx;
+}
diff --git a/src/plugins/dev_iavf/tx_node.c b/src/plugins/dev_iavf/tx_node.c
new file mode 100644
index 00000000000..451db80d286
--- /dev/null
+++ b/src/plugins/dev_iavf/tx_node.c
@@ -0,0 +1,517 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/ring.h>
+#include <vppinfra/vector/ip_csum.h>
+
+#include <vnet/dev/dev.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/udp/udp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
+
+#include <dev_iavf/iavf.h>
+
+static_always_inline u8
+iavf_tx_desc_get_dtyp (iavf_tx_desc_t *d)
+{
+ return d->qword[1] & 0x0f;
+}
+
+struct iavf_ip4_psh
+{
+ u32 src;
+ u32 dst;
+ u8 zero;
+ u8 proto;
+ u16 l4len;
+};
+
+struct iavf_ip6_psh
+{
+ ip6_address_t src;
+ ip6_address_t dst;
+ u32 l4len;
+ u32 proto;
+};
+
+static_always_inline u64
+iavf_tx_prepare_cksum (vlib_buffer_t *b, u8 is_tso)
+{
+ u64 flags = 0;
+ if (!is_tso && !(b->flags & VNET_BUFFER_F_OFFLOAD))
+ return 0;
+
+ vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
+ u32 is_tcp = is_tso || oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
+ u32 is_udp = !is_tso && oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
+
+ if (!is_tcp && !is_udp)
+ return 0;
+
+ u32 is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4;
+ u32 is_ip6 = b->flags & VNET_BUFFER_F_IS_IP6;
+
+ ASSERT (!(is_tcp && is_udp));
+ ASSERT (is_ip4 || is_ip6);
+ i16 l2_hdr_offset = b->current_data;
+ i16 l3_hdr_offset = vnet_buffer (b)->l3_hdr_offset;
+ i16 l4_hdr_offset = vnet_buffer (b)->l4_hdr_offset;
+ u16 l2_len = l3_hdr_offset - l2_hdr_offset;
+ u16 l3_len = l4_hdr_offset - l3_hdr_offset;
+ ip4_header_t *ip4 = (void *) (b->data + l3_hdr_offset);
+ ip6_header_t *ip6 = (void *) (b->data + l3_hdr_offset);
+ tcp_header_t *tcp = (void *) (b->data + l4_hdr_offset);
+ udp_header_t *udp = (void *) (b->data + l4_hdr_offset);
+ u16 l4_len = is_tcp ? tcp_header_bytes (tcp) : sizeof (udp_header_t);
+ u16 sum = 0;
+
+ flags |= IAVF_TXD_OFFSET_MACLEN (l2_len) | IAVF_TXD_OFFSET_IPLEN (l3_len) |
+ IAVF_TXD_OFFSET_L4LEN (l4_len);
+ flags |= is_ip4 ? IAVF_TXD_CMD_IIPT_IPV4 : IAVF_TXD_CMD_IIPT_IPV6;
+ flags |= is_tcp ? IAVF_TXD_CMD_L4T_TCP : IAVF_TXD_CMD_L4T_UDP;
+
+ if (is_ip4)
+ ip4->checksum = 0;
+
+ if (is_tso)
+ {
+ if (is_ip4)
+ ip4->length = 0;
+ else
+ ip6->payload_length = 0;
+ }
+
+ if (is_ip4)
+ {
+ struct iavf_ip4_psh psh = { 0 };
+ psh.src = ip4->src_address.as_u32;
+ psh.dst = ip4->dst_address.as_u32;
+ psh.proto = ip4->protocol;
+ psh.l4len = is_tso ?
+ 0 :
+ clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
+ (l4_hdr_offset - l3_hdr_offset));
+ sum = ~clib_ip_csum ((u8 *) &psh, sizeof (psh));
+ }
+ else
+ {
+ struct iavf_ip6_psh psh = { 0 };
+ psh.src = ip6->src_address;
+ psh.dst = ip6->dst_address;
+ psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol);
+ psh.l4len = is_tso ? 0 : ip6->payload_length;
+ sum = ~clib_ip_csum ((u8 *) &psh, sizeof (psh));
+ }
+
+ if (is_tcp)
+ tcp->checksum = sum;
+ else
+ udp->checksum = sum;
+ return flags;
+}
+
+static_always_inline u32
+iavf_tx_fill_ctx_desc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq,
+ iavf_tx_desc_t *d, vlib_buffer_t *b)
+{
+ iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+ vlib_buffer_t *ctx_ph;
+ u32 *bi = atq->ph_bufs;
+
+next:
+ ctx_ph = vlib_get_buffer (vm, bi[0]);
+ if (PREDICT_FALSE (ctx_ph->ref_count == 255))
+ {
+ bi++;
+ goto next;
+ }
+
+ /* Acquire a reference on the placeholder buffer */
+ ctx_ph->ref_count++;
+
+ u16 l234hdr_sz = vnet_buffer (b)->l4_hdr_offset - b->current_data +
+ vnet_buffer2 (b)->gso_l4_hdr_sz;
+ u16 tlen = vlib_buffer_length_in_chain (vm, b) - l234hdr_sz;
+ d[0].qword[0] = 0;
+ d[0].qword[1] = IAVF_TXD_DTYP_CTX | IAVF_TXD_CTX_CMD_TSO |
+ IAVF_TXD_CTX_SEG_MSS (vnet_buffer2 (b)->gso_size) |
+ IAVF_TXD_CTX_SEG_TLEN (tlen);
+ return bi[0];
+}
+
+static_always_inline void
+iavf_tx_copy_desc (iavf_tx_desc_t *d, iavf_tx_desc_t *s, u32 n_descs)
+{
+#if defined CLIB_HAVE_VEC512
+ while (n_descs >= 8)
+ {
+ u64x8u *dv = (u64x8u *) d;
+ u64x8u *sv = (u64x8u *) s;
+
+ dv[0] = sv[0];
+ dv[1] = sv[1];
+
+ /* next */
+ d += 8;
+ s += 8;
+ n_descs -= 8;
+ }
+#elif defined CLIB_HAVE_VEC256
+ while (n_descs >= 4)
+ {
+ u64x4u *dv = (u64x4u *) d;
+ u64x4u *sv = (u64x4u *) s;
+
+ dv[0] = sv[0];
+ dv[1] = sv[1];
+
+ /* next */
+ d += 4;
+ s += 4;
+ n_descs -= 4;
+ }
+#elif defined CLIB_HAVE_VEC128
+ while (n_descs >= 2)
+ {
+ u64x2u *dv = (u64x2u *) d;
+ u64x2u *sv = (u64x2u *) s;
+
+ dv[0] = sv[0];
+ dv[1] = sv[1];
+
+ /* next */
+ d += 2;
+ s += 2;
+ n_descs -= 2;
+ }
+#endif
+ while (n_descs)
+ {
+ d[0].qword[0] = s[0].qword[0];
+ d[0].qword[1] = s[0].qword[1];
+ d++;
+ s++;
+ n_descs--;
+ }
+}
+
+static_always_inline void
+iavf_tx_fill_data_desc (vlib_main_t *vm, iavf_tx_desc_t *d, vlib_buffer_t *b,
+ u64 cmd, int use_va_dma)
+{
+ if (use_va_dma)
+ d->qword[0] = vlib_buffer_get_current_va (b);
+ else
+ d->qword[0] = vlib_buffer_get_current_pa (vm, b);
+ d->qword[1] = (((u64) b->current_length) << 34 | cmd | IAVF_TXD_CMD_RSV);
+}
+static_always_inline u16
+iavf_tx_prepare (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_dev_tx_queue_t *txq, u32 *buffers, u32 n_packets,
+ u16 *n_enq_descs, int use_va_dma)
+{
+ iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+ const u64 cmd_eop = IAVF_TXD_CMD_EOP;
+ u16 n_free_desc, n_desc_left, n_packets_left = n_packets;
+#if defined CLIB_HAVE_VEC512
+ vlib_buffer_t *b[8];
+#else
+ vlib_buffer_t *b[4];
+#endif
+ iavf_tx_desc_t *d = atq->tmp_descs;
+ u32 *tb = atq->tmp_bufs;
+
+ n_free_desc = n_desc_left = txq->size - atq->n_enqueued - 8;
+
+ if (n_desc_left == 0)
+ return 0;
+
+ while (n_packets_left && n_desc_left)
+ {
+#if defined CLIB_HAVE_VEC512
+ u32 flags;
+ u64x8 or_flags_vec512;
+ u64x8 flags_mask_vec512;
+#else
+ u32 flags, or_flags;
+#endif
+
+#if defined CLIB_HAVE_VEC512
+ if (n_packets_left < 8 || n_desc_left < 8)
+#else
+ if (n_packets_left < 8 || n_desc_left < 4)
+#endif
+ goto one_by_one;
+
+#if defined CLIB_HAVE_VEC512
+ u64x8 base_ptr = u64x8_splat (vm->buffer_main->buffer_mem_start);
+ u32x8 buf_indices = u32x8_load_unaligned (buffers);
+
+ *(u64x8 *) &b = base_ptr + u64x8_from_u32x8 (
+ buf_indices << CLIB_LOG2_CACHE_LINE_BYTES);
+
+ or_flags_vec512 = u64x8_i64gather (u64x8_load_unaligned (b), 0, 1);
+#else
+ vlib_prefetch_buffer_with_index (vm, buffers[4], LOAD);
+ vlib_prefetch_buffer_with_index (vm, buffers[5], LOAD);
+ vlib_prefetch_buffer_with_index (vm, buffers[6], LOAD);
+ vlib_prefetch_buffer_with_index (vm, buffers[7], LOAD);
+
+ b[0] = vlib_get_buffer (vm, buffers[0]);
+ b[1] = vlib_get_buffer (vm, buffers[1]);
+ b[2] = vlib_get_buffer (vm, buffers[2]);
+ b[3] = vlib_get_buffer (vm, buffers[3]);
+
+ or_flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags;
+#endif
+
+#if defined CLIB_HAVE_VEC512
+ flags_mask_vec512 = u64x8_splat (
+ VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD | VNET_BUFFER_F_GSO);
+ if (PREDICT_FALSE (
+ !u64x8_is_all_zero (or_flags_vec512 & flags_mask_vec512)))
+#else
+ if (PREDICT_FALSE (or_flags &
+ (VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD |
+ VNET_BUFFER_F_GSO)))
+#endif
+ goto one_by_one;
+
+#if defined CLIB_HAVE_VEC512
+ vlib_buffer_copy_indices (tb, buffers, 8);
+ iavf_tx_fill_data_desc (vm, d + 0, b[0], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 1, b[1], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 2, b[2], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 3, b[3], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 4, b[4], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 5, b[5], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 6, b[6], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 7, b[7], cmd_eop, use_va_dma);
+
+ buffers += 8;
+ n_packets_left -= 8;
+ n_desc_left -= 8;
+ d += 8;
+ tb += 8;
+#else
+ vlib_buffer_copy_indices (tb, buffers, 4);
+
+ iavf_tx_fill_data_desc (vm, d + 0, b[0], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 1, b[1], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 2, b[2], cmd_eop, use_va_dma);
+ iavf_tx_fill_data_desc (vm, d + 3, b[3], cmd_eop, use_va_dma);
+
+ buffers += 4;
+ n_packets_left -= 4;
+ n_desc_left -= 4;
+ d += 4;
+ tb += 4;
+#endif
+
+ continue;
+
+ one_by_one:
+ tb[0] = buffers[0];
+ b[0] = vlib_get_buffer (vm, buffers[0]);
+ flags = b[0]->flags;
+
+ /* No chained buffers or TSO case */
+ if (PREDICT_TRUE (
+ (flags & (VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_GSO)) == 0))
+ {
+ u64 cmd = cmd_eop;
+
+ if (PREDICT_FALSE (flags & VNET_BUFFER_F_OFFLOAD))
+ cmd |= iavf_tx_prepare_cksum (b[0], 0 /* is_tso */);
+
+ iavf_tx_fill_data_desc (vm, d, b[0], cmd, use_va_dma);
+ }
+ else
+ {
+ u16 n_desc_needed = 1;
+ u64 cmd = 0;
+
+ if (flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ vlib_buffer_t *next = vlib_get_buffer (vm, b[0]->next_buffer);
+ n_desc_needed = 2;
+ while (next->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ next = vlib_get_buffer (vm, next->next_buffer);
+ n_desc_needed++;
+ }
+ }
+
+ if (flags & VNET_BUFFER_F_GSO)
+ {
+ n_desc_needed++;
+ }
+ else if (PREDICT_FALSE (n_desc_needed > 8))
+ {
+ vlib_buffer_free_one (vm, buffers[0]);
+ vlib_error_count (vm, node->node_index,
+ IAVF_TX_NODE_CTR_SEG_SZ_EXCEEDED, 1);
+ n_packets_left -= 1;
+ buffers += 1;
+ continue;
+ }
+
+ if (PREDICT_FALSE (n_desc_left < n_desc_needed))
+ break;
+
+ if (flags & VNET_BUFFER_F_GSO)
+ {
+ /* Enqueue a context descriptor */
+ tb[1] = tb[0];
+ tb[0] = iavf_tx_fill_ctx_desc (vm, txq, d, b[0]);
+ n_desc_left -= 1;
+ d += 1;
+ tb += 1;
+ cmd = iavf_tx_prepare_cksum (b[0], 1 /* is_tso */);
+ }
+ else if (flags & VNET_BUFFER_F_OFFLOAD)
+ {
+ cmd = iavf_tx_prepare_cksum (b[0], 0 /* is_tso */);
+ }
+
+ /* Deal with chain buffer if present */
+ while (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ iavf_tx_fill_data_desc (vm, d, b[0], cmd, use_va_dma);
+
+ n_desc_left -= 1;
+ d += 1;
+ tb += 1;
+
+ tb[0] = b[0]->next_buffer;
+ b[0] = vlib_get_buffer (vm, b[0]->next_buffer);
+ }
+
+ iavf_tx_fill_data_desc (vm, d, b[0], cmd_eop | cmd, use_va_dma);
+ }
+
+ buffers += 1;
+ n_packets_left -= 1;
+ n_desc_left -= 1;
+ d += 1;
+ tb += 1;
+ }
+
+ *n_enq_descs = n_free_desc - n_desc_left;
+ return n_packets - n_packets_left;
+}
+
+VNET_DEV_NODE_FN (iavf_tx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ vnet_dev_tx_node_runtime_t *rt = vnet_dev_get_tx_node_runtime (node);
+ vnet_dev_tx_queue_t *txq = rt->tx_queue;
+ vnet_dev_port_t *port = txq->port;
+ vnet_dev_t *dev = port->dev;
+ iavf_txq_t *atq = vnet_dev_get_tx_queue_data (txq);
+ u16 next;
+ u16 mask = txq->size - 1;
+ u32 *buffers = vlib_frame_vector_args (frame);
+ u16 n_enq, n_left, n_desc, *slot;
+ u16 n_retry = 2;
+
+ n_left = frame->n_vectors;
+
+ vnet_dev_tx_queue_lock_if_needed (txq);
+
+retry:
+ next = atq->next;
+ /* release consumed bufs */
+ if (atq->n_enqueued)
+ {
+ i32 complete_slot = -1;
+ while (1)
+ {
+ u16 *slot = clib_ring_get_first (atq->rs_slots);
+
+ if (slot == 0)
+ break;
+
+ if (iavf_tx_desc_get_dtyp (atq->descs + slot[0]) != 0x0F)
+ break;
+
+ complete_slot = slot[0];
+
+ clib_ring_deq (atq->rs_slots);
+ }
+
+ if (complete_slot >= 0)
+ {
+ u16 first, mask, n_free;
+ mask = txq->size - 1;
+ first = (atq->next - atq->n_enqueued) & mask;
+ n_free = (complete_slot + 1 - first) & mask;
+
+ atq->n_enqueued -= n_free;
+ vlib_buffer_free_from_ring_no_next (vm, atq->buffer_indices, first,
+ txq->size, n_free);
+ }
+ }
+
+ n_desc = 0;
+ if (dev->va_dma)
+ n_enq = iavf_tx_prepare (vm, node, txq, buffers, n_left, &n_desc, 1);
+ else
+ n_enq = iavf_tx_prepare (vm, node, txq, buffers, n_left, &n_desc, 0);
+
+ if (n_desc)
+ {
+ if (PREDICT_TRUE (next + n_desc <= txq->size))
+ {
+ /* no wrap */
+ iavf_tx_copy_desc (atq->descs + next, atq->tmp_descs, n_desc);
+ vlib_buffer_copy_indices (atq->buffer_indices + next, atq->tmp_bufs,
+ n_desc);
+ }
+ else
+ {
+ /* wrap */
+ u32 n_not_wrap = txq->size - next;
+ iavf_tx_copy_desc (atq->descs + next, atq->tmp_descs, n_not_wrap);
+ iavf_tx_copy_desc (atq->descs, atq->tmp_descs + n_not_wrap,
+ n_desc - n_not_wrap);
+ vlib_buffer_copy_indices (atq->buffer_indices + next, atq->tmp_bufs,
+ n_not_wrap);
+ vlib_buffer_copy_indices (atq->buffer_indices,
+ atq->tmp_bufs + n_not_wrap,
+ n_desc - n_not_wrap);
+ }
+
+ next += n_desc;
+ if ((slot = clib_ring_enq (atq->rs_slots)))
+ {
+ u16 rs_slot = slot[0] = (next - 1) & mask;
+ atq->descs[rs_slot].qword[1] |= IAVF_TXD_CMD_RS;
+ }
+
+ atq->next = next & mask;
+ __atomic_store_n (atq->qtx_tail, atq->next, __ATOMIC_RELEASE);
+ atq->n_enqueued += n_desc;
+ n_left -= n_enq;
+ }
+
+ if (n_left)
+ {
+ buffers += n_enq;
+
+ if (n_retry--)
+ goto retry;
+
+ vlib_buffer_free (vm, buffers, n_left);
+ vlib_error_count (vm, node->node_index, IAVF_TX_NODE_CTR_NO_FREE_SLOTS,
+ n_left);
+ }
+
+ vnet_dev_tx_queue_unlock_if_needed (txq);
+
+ return frame->n_vectors - n_left;
+}
diff --git a/src/plugins/dev_iavf/virtchnl.c b/src/plugins/dev_iavf/virtchnl.c
new file mode 100644
index 00000000000..eca48106ce3
--- /dev/null
+++ b/src/plugins/dev_iavf/virtchnl.c
@@ -0,0 +1,372 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_iavf/iavf.h>
+#include <dev_iavf/virtchnl.h>
+#include <dev_iavf/virtchnl_funcs.h>
+#include <vnet/ethernet/ethernet.h>
+
+VLIB_REGISTER_LOG_CLASS (iavf_log, static) = {
+ .class_name = "iavf",
+ .subclass_name = "virtchnl",
+};
+
+u8 *
+format_virtchnl_op_name (u8 *s, va_list *args)
+{
+ virtchnl_op_t op = va_arg (*args, virtchnl_op_t);
+ char *op_names[] = {
+#define _(a, b) [a] = #b,
+ foreach_virtchnl_op
+#undef _
+ };
+
+ if (op >= ARRAY_LEN (op_names) || op_names[op] == 0)
+ return format (s, "UNKNOWN(%u)", op);
+
+ return format (s, "%s", op_names[op]);
+}
+
+u8 *
+format_virtchnl_status (u8 *s, va_list *args)
+{
+ virtchnl_status_t c = va_arg (*args, virtchnl_status_t);
+
+ if (0)
+ ;
+#define _(a, b) else if (c == a) return format (s, #b);
+ foreach_virtchnl_status
+#undef _
+ return format (s, "UNKNOWN(%d)", c);
+}
+
+static u8 *
+format_virtchnl_vlan_support_caps (u8 *s, va_list *args)
+{
+ virtchnl_vlan_support_caps_t v = va_arg (*args, u32);
+ int not_first = 0;
+
+ char *strs[32] = {
+#define _(a, b, c) [a] = c,
+ foreach_virtchnl_vlan_support_bit
+#undef _
+ };
+
+ if (v == VIRTCHNL_VLAN_UNSUPPORTED)
+ return format (s, "unsupported");
+
+ for (int i = 0; i < 32; i++)
+ {
+ if ((v & (1 << i)) == 0)
+ continue;
+ if (not_first)
+ s = format (s, " ");
+ if (strs[i])
+ s = format (s, "%s", strs[i]);
+ else
+ s = format (s, "unknown(%u)", i);
+ not_first = 1;
+ }
+ return s;
+}
+
+static u8 *
+format_virtchnl_op_req (u8 *s, va_list *args)
+{
+ virtchnl_op_t op = va_arg (*args, virtchnl_op_t);
+ void *p = va_arg (*args, void *);
+ u32 indent = format_get_indent (s);
+
+ if (p == 0)
+ return format (s, "no data");
+
+ switch (op)
+ {
+ case VIRTCHNL_OP_VERSION:
+ {
+ virtchnl_version_info_t *r = p;
+ s = format (s, "version: %u.%u", r->major, r->minor);
+ }
+ break;
+ case VIRTCHNL_OP_GET_VF_RESOURCES:
+ {
+ u32 *r = p;
+ s = format (s, "%U", format_iavf_vf_cap_flags, *r);
+ }
+ break;
+ case VIRTCHNL_OP_ENABLE_QUEUES:
+ case VIRTCHNL_OP_DISABLE_QUEUES:
+ case VIRTCHNL_OP_GET_STATS:
+ {
+ virtchnl_queue_select_t *r = p;
+ s = format (s, "vsi %u rx 0x%x tx 0x%x", r->vsi_id, r->rx_queues,
+ r->tx_queues);
+ }
+ break;
+ case VIRTCHNL_OP_CONFIG_VSI_QUEUES:
+ {
+ virtchnl_vsi_queue_config_info_t *r = p;
+ s = format (s, "vsi %u num_qp %u", r->vsi_id, r->num_queue_pairs);
+ for (int i = 0; i < r->num_queue_pairs; i++)
+ {
+ virtchnl_rxq_info_t *ri = &r->qpair[i].rxq;
+ virtchnl_txq_info_t *ti = &r->qpair[i].txq;
+
+ s = format (s, "\n%U qpair %u", format_white_space, indent + 2, i);
+ s = format (s,
+ "\n%U rx vsi %u queue %u dma_ring_addr 0x%lx "
+ "ring_len %u data_sz %u max_pkt_sz %u",
+ format_white_space, indent + 4, ri->vsi_id,
+ ri->queue_id, ri->dma_ring_addr, ri->ring_len,
+ ri->databuffer_size, ri->max_pkt_size);
+ s = format (
+ s, "\n%U tx vsi %u queue %u dma_ring_addr 0x%lx ring_len %u",
+ format_white_space, indent + 4, ti->vsi_id, ti->queue_id,
+ ti->dma_ring_addr, ti->ring_len);
+ }
+ }
+ break;
+ case VIRTCHNL_OP_CONFIG_IRQ_MAP:
+ {
+ virtchnl_irq_map_info_t *r = p;
+ s = format (s, "num_vectors %u", r->num_vectors);
+ for (int i = 0; i < r->num_vectors; i++)
+ {
+ virtchnl_vector_map_t *vecmap = r->vecmap + i;
+ s = format (s,
+ "\n%Uvsi %u vector_id %u rxq_map 0x%04x txq_map "
+ "0x%04x rxitr_idx %u txitr_idx %u",
+ format_white_space, indent + 2, vecmap->vsi_id,
+ vecmap->vector_id, vecmap->rxq_map, vecmap->txq_map,
+ vecmap->rxitr_idx, vecmap->txitr_idx);
+ }
+ }
+ break;
+ case VIRTCHNL_OP_CONFIG_RSS_LUT:
+ {
+ virtchnl_rss_lut_t *r = p;
+ s = format (s, "vsi %u entries %u lut", r->vsi_id, r->lut_entries);
+ for (int i = 0; i < r->lut_entries; i++)
+ s = format (s, " %u", r->lut[i]);
+ }
+ break;
+ case VIRTCHNL_OP_CONFIG_RSS_KEY:
+ {
+ virtchnl_rss_key_t *r = p;
+ s = format (s, "vsi %u len %u key ", r->vsi_id, r->key_len);
+ for (int i = 0; i < r->key_len; i++)
+ s = format (s, "%02x", r->key[i]);
+ }
+ break;
+ case VIRTCHNL_OP_ADD_ETH_ADDR:
+ case VIRTCHNL_OP_DEL_ETH_ADDR:
+ {
+ virtchnl_ether_addr_list_t *r = p;
+ s = format (s, "vsi %u num_elements %u elts: ", r->vsi_id,
+ r->num_elements);
+ for (int i = 0; i < r->num_elements; i++)
+ s = format (s, "%s%U%s%s", i ? ", " : "", format_ethernet_address,
+ r->list[i].addr, r->list[i].primary ? " primary" : "",
+ r->list[i].extra ? " extra" : "");
+ }
+ break;
+ case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
+ {
+ virtchnl_promisc_info_t *r = p;
+ s = format (
+ s, "promisc_info: vsi %u flags 0x%x (unicast %s multicast %s)",
+ r->vsi_id, r->flags,
+ r->flags & FLAG_VF_UNICAST_PROMISC ? "on" : "off",
+ r->flags & FLAG_VF_MULTICAST_PROMISC ? "on" : "off");
+ }
+ break;
+ case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2:
+ {
+ virtchnl_vlan_setting_t *r = p;
+ s = format (s,
+ "vport %u outer_ethertype_setting 0x%x [%U] "
+ "inner_ethertype_setting 0x%x [%U]",
+ r->vport_id, r->outer_ethertype_setting,
+ format_virtchnl_vlan_support_caps,
+ r->outer_ethertype_setting, r->inner_ethertype_setting,
+ format_virtchnl_vlan_support_caps,
+ r->inner_ethertype_setting);
+ }
+ break;
+ default:
+ s = format (s, "unknown op 0x%04x", op);
+ break;
+ };
+ return s;
+}
+static u8 *
+format_virtchnl_op_resp (u8 *s, va_list *args)
+{
+ virtchnl_op_t op = va_arg (*args, virtchnl_op_t);
+ void *p = va_arg (*args, void *);
+ u32 indent = format_get_indent (s);
+
+ if (p == 0)
+ return format (s, "no data");
+
+ switch (op)
+ {
+ case VIRTCHNL_OP_VERSION:
+ {
+ virtchnl_version_info_t *r = p;
+ s = format (s, "version %u.%u", r->major, r->minor);
+ }
+ break;
+ case VIRTCHNL_OP_GET_VF_RESOURCES:
+ {
+ virtchnl_vf_resource_t *r = p;
+ s =
+ format (s,
+ "vf_resource: num_vsis %u num_queue_pairs %u "
+ "max_vectors %u max_mtu %u rss_key_size %u rss_lut_size %u",
+ r->num_vsis, r->num_queue_pairs, r->max_vectors, r->max_mtu,
+ r->rss_key_size, r->rss_lut_size);
+ s = format (s, "\n%Uvf_cap_flags 0x%x (%U)", format_white_space,
+ indent + 2, r->vf_cap_flags, format_iavf_vf_cap_flags,
+ r->vf_cap_flags);
+ for (int i = 0; i < r->num_vsis; i++)
+ s = format (s,
+ "\n%Uvsi_resource[%u]: vsi %u num_qp %u vsi_type %u "
+ "qset_handle %u default_mac_addr %U",
+ format_white_space, indent + 2, i, r->vsi_res[i].vsi_id,
+ r->vsi_res[i].num_queue_pairs, r->vsi_res[i].vsi_type,
+ r->vsi_res[i].qset_handle, format_ethernet_address,
+ r->vsi_res[i].default_mac_addr);
+ }
+ break;
+ case VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS:
+ {
+ virtchnl_vlan_caps_t *r = p;
+ s = format (s, "filtering: ethertype_init 0x%x max_filters %u",
+ r->filtering.ethertype_init, r->filtering.max_filters);
+ s = format (s, "\n%U outer [%U] inner [%U]", format_white_space,
+ indent, format_virtchnl_vlan_support_caps,
+ r->filtering.filtering_support.outer,
+ format_virtchnl_vlan_support_caps,
+ r->filtering.filtering_support.inner);
+ s = format (s, "\n%Uoffloads: ethertype_init 0x%x ethertype_match %u",
+ format_white_space, indent, r->offloads.ethertype_init,
+ r->offloads.ethertype_match);
+ s = format (s, "\n%U stripping outer [%U] stripping inner [%U]",
+ format_white_space, indent,
+ format_virtchnl_vlan_support_caps,
+ r->offloads.stripping_support.outer,
+ format_virtchnl_vlan_support_caps,
+ r->offloads.stripping_support.inner);
+ s = format (s, "\n%U insertion outer [%U] inserion inner [%U]",
+ format_white_space, indent,
+ format_virtchnl_vlan_support_caps,
+ r->offloads.insertion_support.outer,
+ format_virtchnl_vlan_support_caps,
+ r->offloads.insertion_support.inner);
+ }
+ break;
+ case VIRTCHNL_OP_GET_STATS:
+ {
+ virtchnl_eth_stats_t *r = p;
+ s = format (s,
+ "rx: bytes %lu, unicast %lu, multicast %lu, broadcast "
+ "%lu, discards %lu unknown_protocol %lu",
+ r->rx_bytes, r->rx_unicast, r->rx_multicast,
+ r->rx_broadcast, r->rx_discards, r->rx_unknown_protocol);
+ s = format (s, "\n%U", format_white_space, indent);
+ s = format (s,
+ "tx: bytes %lu, unicast %lu, multicast %lu, broadcast "
+ "%lu, discards %lu errors %lu",
+ r->tx_bytes, r->tx_unicast, r->tx_multicast,
+ r->tx_broadcast, r->tx_discards, r->tx_errors);
+ }
+ break;
+ default:
+ s = format (s, "unknown op 0x%04x", op);
+ break;
+ };
+ return s;
+}
+
+vnet_dev_rv_t
+iavf_virtchnl_req (vlib_main_t *vm, vnet_dev_t *dev, iavf_virtchnl_req_t *r)
+{
+ iavf_device_t *ad = vnet_dev_get_data (dev);
+ vnet_dev_rv_t rv;
+ iavf_aq_desc_t *d;
+ u8 *b;
+
+ log_debug (dev, "%U req:\n %U", format_virtchnl_op_name, r->op,
+ format_virtchnl_op_req, r->op, r->req);
+
+ iavf_aq_desc_t txd = {
+ .opcode = IIAVF_AQ_DESC_OP_SEND_TO_PF,
+ .v_opcode = r->op,
+ .flags = { .si = 1 },
+ };
+
+ rv = iavf_aq_atq_enq (vm, dev, &txd, r->req, r->req_sz, 0.5);
+
+ if (rv != VNET_DEV_OK)
+ return rv;
+
+ if (r->no_reply)
+ return VNET_DEV_OK;
+
+retry:
+ if (!iavf_aq_arq_next_acq (vm, dev, &d, &b, 1.0))
+ {
+ log_err (ad, "timeout waiting for virtchnl response");
+ return VNET_DEV_ERR_TIMEOUT;
+ }
+
+ if (d->v_opcode == VIRTCHNL_OP_EVENT)
+ {
+ if ((d->datalen != sizeof (virtchnl_pf_event_t)) ||
+ ((d->flags.buf) == 0))
+ {
+ log_err (dev, "event message error");
+ return VNET_DEV_ERR_BUG;
+ }
+
+ vec_add1 (ad->events, *(virtchnl_pf_event_t *) b);
+ iavf_aq_arq_next_rel (vm, dev);
+ goto retry;
+ }
+
+ if (d->v_opcode != r->op)
+ {
+ log_err (dev,
+ "unexpected response received [v_opcode = %u, expected %u, "
+ "v_retval %d]",
+ d->v_opcode, r->op, d->v_retval);
+ rv = VNET_DEV_ERR_BUG;
+ goto done;
+ }
+
+ r->status = d->v_retval;
+
+ if (d->v_retval)
+ {
+ log_err (dev, "error [v_opcode = %u, v_retval %d]", d->v_opcode,
+ d->v_retval);
+ rv = VNET_DEV_ERR_BUG;
+ goto done;
+ }
+
+ if (r->resp_sz && d->flags.buf)
+ clib_memcpy_fast (r->resp, b, r->resp_sz);
+
+done:
+ iavf_aq_arq_next_rel (vm, dev);
+ if (rv == VNET_DEV_OK)
+ log_debug (dev, "%U resp:\n %U", format_virtchnl_op_name, r->op,
+ format_virtchnl_op_resp, r->op, r->resp);
+ return rv;
+}
diff --git a/src/plugins/dev_iavf/virtchnl.h b/src/plugins/dev_iavf/virtchnl.h
new file mode 100644
index 00000000000..2099104c8ad
--- /dev/null
+++ b/src/plugins/dev_iavf/virtchnl.h
@@ -0,0 +1,570 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _IIAVF_VIRTCHNL_H_
+#define _IIAVF_VIRTCHNL_H_
+
+#define VIRTCHNL_VERSION_MAJOR 1
+#define VIRTCHNL_VERSION_MINOR 1
+
+#define foreach_iavf_promisc_flags \
+ _ (0, UNICAST_PROMISC, "unicast") \
+ _ (1, MULTICAST_PROMISC, "multicast")
+
+enum
+{
+#define _(a, b, c) FLAG_VF_##b = (1 << a),
+ foreach_iavf_promisc_flags
+#undef _
+};
+
+#define IAVF_VFINT_DYN_CTLN(x) (0x00003800 + (0x4 * x))
+#define IAVF_VFINT_ICR0 0x00004800
+#define IAVF_VFINT_ICR0_ENA1 0x00005000
+#define IAVF_VFINT_DYN_CTL0 0x00005C00
+#define IAVF_ARQBAH 0x00006000
+#define IAVF_ATQH 0x00006400
+#define IAVF_ATQLEN 0x00006800
+#define IAVF_ARQBAL 0x00006C00
+#define IAVF_ARQT 0x00007000
+#define IAVF_ARQH 0x00007400
+#define IAVF_ATQBAH 0x00007800
+#define IAVF_ATQBAL 0x00007C00
+#define IAVF_ARQLEN 0x00008000
+#define IAVF_ATQT 0x00008400
+#define IAVF_VFGEN_RSTAT 0x00008800
+#define IAVF_QTX_TAIL(q) (0x00000000 + (0x4 * q))
+#define IAVF_QRX_TAIL(q) (0x00002000 + (0x4 * q))
+
+#define foreach_virtchnl_op \
+ _ (0, UNKNOWN) \
+ _ (1, VERSION) \
+ _ (2, RESET_VF) \
+ _ (3, GET_VF_RESOURCES) \
+ _ (4, CONFIG_TX_QUEUE) \
+ _ (5, CONFIG_RX_QUEUE) \
+ _ (6, CONFIG_VSI_QUEUES) \
+ _ (7, CONFIG_IRQ_MAP) \
+ _ (8, ENABLE_QUEUES) \
+ _ (9, DISABLE_QUEUES) \
+ _ (10, ADD_ETH_ADDR) \
+ _ (11, DEL_ETH_ADDR) \
+ _ (12, ADD_VLAN) \
+ _ (13, DEL_VLAN) \
+ _ (14, CONFIG_PROMISCUOUS_MODE) \
+ _ (15, GET_STATS) \
+ _ (16, RSVD) \
+ _ (17, EVENT) \
+ _ (18, UNDEF_18) \
+ _ (19, UNDEF_19) \
+ _ (20, IWARP) \
+ _ (21, CONFIG_IWARP_IRQ_MAP) \
+ _ (22, RELEASE_IWARP_IRQ_MAP) \
+ _ (23, CONFIG_RSS_KEY) \
+ _ (24, CONFIG_RSS_LUT) \
+ _ (25, GET_RSS_HENA_CAPS) \
+ _ (26, SET_RSS_HENA) \
+ _ (27, ENABLE_VLAN_STRIPPING) \
+ _ (28, DISABLE_VLAN_STRIPPING) \
+ _ (29, REQUEST_QUEUES) \
+ _ (30, ENABLE_CHANNELS) \
+ _ (31, DISABLE_CHANNELS) \
+ _ (32, ADD_CLOUD_FILTER) \
+ _ (33, DEL_CLOUD_FILTER) \
+ _ (45, ADD_RSS_CFG) \
+ _ (46, DEL_RSS_CFG) \
+ _ (47, ADD_FDIR_FILTER) \
+ _ (48, DEL_FDIR_FILTER) \
+ _ (49, QUERY_FDIR_FILTER) \
+ _ (50, GET_MAX_RSS_QREGION) \
+ _ (51, GET_OFFLOAD_VLAN_V2_CAPS) \
+ _ (52, ADD_VLAN_V2) \
+ _ (53, DEL_VLAN_V2) \
+ _ (54, ENABLE_VLAN_STRIPPING_V2) \
+ _ (55, DISABLE_VLAN_STRIPPING_V2) \
+ _ (56, ENABLE_VLAN_INSERTION_V2) \
+ _ (57, DISABLE_VLAN_INSERTION_V2) \
+ _ (58, ENABLE_VLAN_FILTERING_V2) \
+ _ (59, DISABLE_VLAN_FILTERING_V2) \
+ _ (107, ENABLE_QUEUES_V2) \
+ _ (108, DISABLE_QUEUES_V2) \
+ _ (111, MAP_QUEUE_VECTOR)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL_OP_##n = v,
+ foreach_virtchnl_op
+#undef _
+ VIRTCHNL_N_OPS,
+} virtchnl_op_t;
+
+#define foreach_virtchnl_status \
+ _ (0, SUCCESS) \
+ _ (-5, ERR_PARAM) \
+ _ (-18, ERR_NO_MEMORY) \
+ _ (-38, ERR_OPCODE_MISMATCH) \
+ _ (-39, ERR_CQP_COMPL_ERROR) \
+ _ (-40, ERR_INVALID_VF_ID) \
+ _ (-53, ERR_ADMIN_QUEUE_ERROR) \
+ _ (-64, NOT_SUPPORTED)
+
+typedef enum
+{
+#define _(a, b) VIRTCHNL_STATUS_##b = a,
+ foreach_virtchnl_status
+#undef _
+} virtchnl_status_t;
+
+#define foreach_iavf_vf_cap_flag \
+ _ (0, OFFLOAD_L2, "l2") \
+ _ (1, OFFLOAD_IWARP, "iwarp") \
+ _ (2, OFFLOAD_RSVD, "rsvd") \
+ _ (3, OFFLOAD_RSS_AQ, "rss-aq") \
+ _ (4, OFFLOAD_RSS_REG, "rss-reg") \
+ _ (5, OFFLOAD_WB_ON_ITR, "wb-on-itr") \
+ _ (6, OFFLOAD_REQ_QUEUES, "req-queues") \
+ _ (7, CAP_ADV_LINK_SPEED, "adv-link-speed") \
+ _ (9, LARGE_NUM_QPAIRS, "large-num-qpairs") \
+ _ (15, OFFLOAD_VLAN_V2, "vlan-v2") \
+ _ (16, OFFLOAD_VLAN, "vlan") \
+ _ (17, OFFLOAD_RX_POLLING, "rx-polling") \
+ _ (18, OFFLOAD_RSS_PCTYPE_V2, "rss-pctype-v2") \
+ _ (19, OFFLOAD_RSS_PF, "rss-pf") \
+ _ (20, OFFLOAD_ENCAP, "encap") \
+ _ (21, OFFLOAD_ENCAP_CSUM, "encap-csum") \
+ _ (22, OFFLOAD_RX_ENCAP_CSUM, "rx-encap-csum") \
+ _ (23, OFFLOAD_ADQ, "offload-adq") \
+ _ (24, OFFLOAD_ADQ_v2, "offload-adq-v2") \
+ _ (25, OFFLOAD_USO, "offload-uso") \
+ _ (26, OFFLOAD_RX_FLEX_DESC, "offload-rx-flex-desc") \
+ _ (27, OFFLOAD_ADV_RSS_PF, "offload-adv-rss-pf") \
+ _ (28, OFFLOAD_FDIR_PF, "offload-fdir-pf") \
+ _ (30, CAP_DCF, "dcf")
+
+typedef enum
+{
+#define _(a, b, c) VIRTCHNL_VF_##b = (1 << a),
+ foreach_iavf_vf_cap_flag
+#undef _
+} iavf_vf_cap_flag_t;
+
+typedef enum
+{
+ VIRTCHNL_VSI_TYPE_INVALID = 0,
+ VIRTCHNL_VSI_SRIOV = 6,
+} virtchnl_vsi_type_t;
+
+typedef enum
+{
+ VIRTCHNL_VFR_INPROGRESS = 0,
+ VIRTCHNL_VFR_COMPLETED,
+ VIRTCHNL_VFR_VFACTIVE,
+} virtchnl_vfr_states_t;
+
+typedef struct
+{
+ u16 vsi_id;
+ u16 num_queue_pairs;
+ virtchnl_vsi_type_t vsi_type;
+ u16 qset_handle;
+ u8 default_mac_addr[6];
+} virtchnl_vsi_resource_t;
+
+typedef struct
+{
+ u16 num_vsis;
+ u16 num_queue_pairs;
+ u16 max_vectors;
+ u16 max_mtu;
+ u32 vf_cap_flags;
+ u32 rss_key_size;
+ u32 rss_lut_size;
+ virtchnl_vsi_resource_t vsi_res[1];
+} virtchnl_vf_resource_t;
+
+#define foreach_virtchnl_event_code \
+ _ (0, UNKNOWN) \
+ _ (1, LINK_CHANGE) \
+ _ (2, RESET_IMPENDING) \
+ _ (3, PF_DRIVER_CLOSE)
+
+typedef enum
+{
+#define _(a, b) VIRTCHNL_EVENT_##b = (a),
+ foreach_virtchnl_event_code
+#undef _
+} virtchnl_event_codes_t;
+
+#define foreach_virtchnl_link_speed \
+ _ (0, 2_5GB, "2.5 Gbps") \
+ _ (1, 100MB, "100 Mbps") \
+ _ (2, 1GB, "1 Gbps") \
+ _ (3, 10GB, "10 Gbps") \
+ _ (4, 40GB, "40 Gbps") \
+ _ (5, 20GB, "20 Gbps") \
+ _ (6, 25GB, "25 Gbps") \
+ _ (7, 5GB, "5 Gbps")
+
+typedef enum
+{
+ VIRTCHNL_LINK_SPEED_UNKNOWN = 0,
+#define _(a, b, c) VIRTCHNL_LINK_SPEED_##b = (1 << a),
+ foreach_virtchnl_link_speed
+#undef _
+} virtchnl_link_speed_t;
+
+typedef struct
+{
+ virtchnl_event_codes_t event;
+ union
+ {
+ struct
+ {
+ virtchnl_link_speed_t link_speed;
+ u8 link_status;
+ } link_event;
+ struct
+ {
+ u32 link_speed;
+ u8 link_status;
+ } link_event_adv;
+ } event_data;
+ int severity;
+} virtchnl_pf_event_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_pf_event_t, 16);
+
+typedef struct
+{
+ u32 major;
+ u32 minor;
+} virtchnl_version_info_t;
+
+#define foreach_iavf_aq_desc_flag \
+ _ (1, dd) \
+ _ (1, cmp) \
+ _ (1, err) \
+ _ (1, vfe) \
+ _ (5, reserved) \
+ _ (1, lb) \
+ _ (1, rd) \
+ _ (1, vfc) \
+ _ (1, buf) \
+ _ (1, si) \
+ _ (1, ie) \
+ _ (1, fe)
+
+typedef union
+{
+ struct
+ {
+#define _(n, s) u16 s : n;
+ foreach_iavf_aq_desc_flag
+#undef _
+ };
+ u16 as_u16;
+} iavf_aq_desc_flags_t;
+
+STATIC_ASSERT_SIZEOF (iavf_aq_desc_flags_t, 2);
+
+typedef enum
+{
+ IIAVF_AQ_DESC_OP_QUEUE_SHUTDOWN = 0x0003,
+ IIAVF_AQ_DESC_OP_SEND_TO_PF = 0x0801,
+ IIAVF_AQ_DESC_OP_MESSAGE_FROM_PF = 0x0802,
+} __clib_packed iavf_aq_desc_op_t;
+
+#define foreach_iavf_aq_desc_retval \
+ _ (0, OK) \
+ _ (1, EPERM) \
+ _ (2, ENOENT) \
+ _ (3, ESRCH) \
+ _ (4, EINTR) \
+ _ (5, EIO) \
+ _ (6, ENXIO) \
+ _ (7, E2BIG) \
+ _ (8, EAGAIN) \
+ _ (9, ENOMEM) \
+ _ (10, EACCES) \
+ _ (11, EFAULT) \
+ _ (12, EBUSY) \
+ _ (13, EEXIST) \
+ _ (14, EINVAL) \
+ _ (15, ENOTTY) \
+ _ (16, ENOSPC) \
+ _ (17, ENOSYS) \
+ _ (18, ERANGE) \
+ _ (19, EFLUSHED) \
+ _ (20, BAD_ADDR) \
+ _ (21, EMODE) \
+ _ (22, EFBIG) \
+ _ (23, ESBCOMP) \
+ _ (24, ENOSEC) \
+ _ (25, EBADSIG) \
+ _ (26, ESVN) \
+ _ (27, EBADMAN) \
+ _ (28, EBADBUF) \
+ _ (29, EACCES_BMCU)
+
+typedef enum
+{
+#define _(a, b) IIAVF_AQ_DESC_RETVAL_##b = a,
+ foreach_iavf_aq_desc_retval
+#undef _
+} __clib_packed iavf_aq_desc_retval_t;
+
+typedef struct
+{
+ iavf_aq_desc_flags_t flags;
+ iavf_aq_desc_op_t opcode;
+ u16 datalen;
+ u16 retval;
+ union
+ {
+ u32 cookie_hi;
+ virtchnl_op_t v_opcode;
+ };
+ union
+ {
+ u32 cookie_lo;
+ virtchnl_status_t v_retval;
+ };
+ union
+ {
+ u8 driver_unloading : 1;
+ u32 param0;
+ };
+ u32 param1;
+ union
+ {
+ u32 param2;
+ u32 addr_hi;
+ };
+ union
+ {
+ u32 param3;
+ u32 addr_lo;
+ };
+} iavf_aq_desc_t;
+
+STATIC_ASSERT_SIZEOF (iavf_aq_desc_t, 32);
+
+typedef struct
+{
+ u16 vsi_id;
+ u16 queue_id;
+ u16 ring_len;
+ u64 dma_ring_addr;
+ u64 dma_headwb_addr;
+} virtchnl_txq_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_txq_info_t, 24);
+
+typedef struct
+{
+ u16 vsi_id;
+ u16 queue_id;
+ u32 ring_len;
+ u16 hdr_size;
+ u16 splithdr_enabled;
+ u32 databuffer_size;
+ u32 max_pkt_size;
+ u8 crc_disable;
+ u8 rxdid;
+ u8 pad[2];
+ u64 dma_ring_addr;
+ i32 rx_split_pos;
+ u32 pad2;
+} virtchnl_rxq_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_rxq_info_t, 40);
+
+typedef struct
+{
+ virtchnl_txq_info_t txq;
+ virtchnl_rxq_info_t rxq;
+} virtchnl_queue_pair_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_queue_pair_info_t, 64);
+
+typedef struct
+{
+ u16 vsi_id;
+ u16 num_queue_pairs;
+ u32 pad;
+ virtchnl_queue_pair_info_t qpair[1];
+} virtchnl_vsi_queue_config_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_vsi_queue_config_info_t, 72);
+
+typedef struct
+{
+ u16 vsi_id;
+ u16 pad;
+ u32 rx_queues;
+ u32 tx_queues;
+} virtchnl_queue_select_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_queue_select_t, 12);
+
+typedef struct
+{
+ u16 vsi_id;
+ u16 vector_id;
+ u16 rxq_map;
+ u16 txq_map;
+ u16 rxitr_idx;
+ u16 txitr_idx;
+} virtchnl_vector_map_t;
+
+typedef struct
+{
+ u16 num_vectors;
+ virtchnl_vector_map_t vecmap[1];
+} virtchnl_irq_map_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_irq_map_info_t, 14);
+
+typedef struct
+{
+ u8 addr[6];
+ union
+ {
+ struct
+ {
+ u8 primary : 1;
+ u8 extra : 1;
+ };
+ u8 type;
+ };
+ u8 pad[1];
+} virtchnl_ether_addr_t;
+
+typedef struct
+{
+ u16 vsi_id;
+ u16 num_elements;
+ virtchnl_ether_addr_t list[1];
+} virtchnl_ether_addr_list_t;
+
+#define foreach_virtchnl_eth_stats \
+ _ (rx_bytes) \
+ _ (rx_unicast) \
+ _ (rx_multicast) \
+ _ (rx_broadcast) \
+ _ (rx_discards) \
+ _ (rx_unknown_protocol) \
+ _ (tx_bytes) \
+ _ (tx_unicast) \
+ _ (tx_multicast) \
+ _ (tx_broadcast) \
+ _ (tx_discards) \
+ _ (tx_errors)
+
+typedef struct
+{
+#define _(s) u64 s;
+ foreach_virtchnl_eth_stats
+#undef _
+} virtchnl_eth_stats_t;
+
+typedef struct
+{
+ u16 vsi_id;
+ u16 key_len;
+ u8 key[1];
+} virtchnl_rss_key_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_rss_key_t, 6);
+
+typedef struct
+{
+ u16 vsi_id;
+ u16 lut_entries;
+ u8 lut[1];
+} virtchnl_rss_lut_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_rss_lut_t, 6);
+
+/* VIRTCHNL_OP_REQUEST_QUEUES */
+typedef struct
+{
+ u16 num_queue_pairs;
+} virtchnl_vf_res_request_t;
+
+typedef struct
+{
+ u32 outer;
+ u32 inner;
+} virtchnl_vlan_supported_caps_t;
+
+typedef struct
+{
+ virtchnl_vlan_supported_caps_t filtering_support;
+ u32 ethertype_init;
+ u16 max_filters;
+ u8 pad[2];
+} virtchnl_vlan_filtering_caps_t;
+
+typedef struct virtchnl_vlan_offload_caps
+{
+ virtchnl_vlan_supported_caps_t stripping_support;
+ virtchnl_vlan_supported_caps_t insertion_support;
+ u32 ethertype_init;
+ u8 ethertype_match;
+ u8 pad[3];
+} virtchnl_vlan_offload_caps_t;
+
+typedef struct
+{
+ virtchnl_vlan_filtering_caps_t filtering;
+ virtchnl_vlan_offload_caps_t offloads;
+} virtchnl_vlan_caps_t;
+
+#define foreach_virtchnl_vlan_support_bit \
+ _ (0, ETHERTYPE_8100, "dot1Q") \
+ _ (1, ETHERTYPE_88A8, "dot1AD") \
+ _ (2, ETHERTYPE_9100, "QinQ") \
+ _ (8, TAG_LOCATION_L2TAG1, "l2tag1") \
+ _ (9, TAG_LOCATION_L2TAG2, "l2tag2") \
+ _ (10, TAG_LOCATION_L2TAG2_2, "l2tag2_2") \
+ _ (24, PRIO, "prio") \
+ _ (28, FILTER_MASK, "filter-mask") \
+ _ (29, ETHERTYPE_AND, "etype-and") \
+ _ (30, ETHERTYPE_XOR, "etype-xor") \
+ _ (31, TOGGLE, "toggle")
+
+typedef enum
+{
+ VIRTCHNL_VLAN_UNSUPPORTED = 0,
+#define _(a, b, c) VIRTCHNL_VLAN_##b = (1 << a),
+ foreach_virtchnl_vlan_support_bit
+#undef _
+} virtchnl_vlan_support_caps_t;
+
+typedef struct
+{
+ u32 outer_ethertype_setting;
+ u32 inner_ethertype_setting;
+ u16 vport_id;
+ u8 pad[6];
+} virtchnl_vlan_setting_t;
+
+typedef struct
+{
+ u16 vsi_id;
+ union
+ {
+ struct
+ {
+ u16 unicast_promisc : 1;
+ u16 multicast_promisc : 1;
+ };
+ u16 flags;
+ };
+} virtchnl_promisc_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl_promisc_info_t, 4);
+
+#endif /* IAVF_VIRTCHNL_H */
diff --git a/src/plugins/dev_iavf/virtchnl_funcs.h b/src/plugins/dev_iavf/virtchnl_funcs.h
new file mode 100644
index 00000000000..e7f3901e0ee
--- /dev/null
+++ b/src/plugins/dev_iavf/virtchnl_funcs.h
@@ -0,0 +1,241 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _IIAVF_VIRTCHNL_FUNCS_H_
+#define _IIAVF_VIRTCHNL_FUNCS_H_
+
+#include <vppinfra/clib.h>
+#include <vnet/dev/dev.h>
+#include <dev_iavf/iavf.h>
+
+#define VIRTCHNL_MSG_SZ(s, e, n) STRUCT_OFFSET_OF (s, e[(n) + 1])
+
+typedef struct
+{
+ virtchnl_op_t op;
+ u8 no_reply : 1;
+ u16 req_sz;
+ u16 resp_sz;
+ virtchnl_status_t status;
+ const void *req;
+ void *resp;
+} iavf_virtchnl_req_t;
+
+vnet_dev_rv_t iavf_virtchnl_req (vlib_main_t *, vnet_dev_t *,
+ iavf_virtchnl_req_t *);
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_version (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_version_info_t *req,
+ virtchnl_version_info_t *resp)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_VERSION,
+ .req = req,
+ .req_sz = sizeof (*req),
+ .resp = resp,
+ .resp_sz = sizeof (*resp),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_reset_vf (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_RESET_VF,
+ .no_reply = 1,
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_get_vf_resources (vlib_main_t *vm, vnet_dev_t *dev, const u32 *req,
+ virtchnl_vf_resource_t *resp)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_GET_VF_RESOURCES,
+ .req = req,
+ .req_sz = sizeof (*req),
+ .resp = resp,
+ .resp_sz = sizeof (*resp),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_enable_queues (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_queue_select_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_ENABLE_QUEUES,
+ .req = req,
+ .req_sz = sizeof (*req),
+ };
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_disable_queues (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_queue_select_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_DISABLE_QUEUES,
+ .req = req,
+ .req_sz = sizeof (*req),
+ };
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_config_vsi_queues (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_vsi_queue_config_info_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+ .req = req,
+ .req_sz = VIRTCHNL_MSG_SZ (virtchnl_vsi_queue_config_info_t, qpair,
+ req->num_queue_pairs),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_config_irq_map (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_irq_map_info_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_CONFIG_IRQ_MAP,
+ .req = req,
+ .req_sz =
+ VIRTCHNL_MSG_SZ (virtchnl_irq_map_info_t, vecmap, req->num_vectors),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_config_rss_lut (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_rss_lut_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_CONFIG_RSS_LUT,
+ .req = req,
+ .req_sz = VIRTCHNL_MSG_SZ (virtchnl_rss_lut_t, lut, req->lut_entries),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_config_rss_key (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_rss_key_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_CONFIG_RSS_KEY,
+ .req = req,
+ .req_sz = VIRTCHNL_MSG_SZ (virtchnl_rss_key_t, key, req->key_len),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_config_promisc_mode (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_promisc_info_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+ .req = req,
+ .req_sz = sizeof (*req),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_disable_vlan_stripping (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_DISABLE_VLAN_STRIPPING,
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_add_eth_addr (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_ether_addr_list_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_ADD_ETH_ADDR,
+ .req = req,
+ .req_sz =
+ VIRTCHNL_MSG_SZ (virtchnl_ether_addr_list_t, list, req->num_elements),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_del_eth_addr (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_ether_addr_list_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_DEL_ETH_ADDR,
+ .req = req,
+ .req_sz =
+ VIRTCHNL_MSG_SZ (virtchnl_ether_addr_list_t, list, req->num_elements),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_get_offload_vlan_v2_caps (vlib_main_t *vm, vnet_dev_t *dev,
+ virtchnl_vlan_caps_t *resp)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS,
+ .resp = resp,
+ .resp_sz = sizeof (*resp),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_get_stats (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_queue_select_t *req,
+ virtchnl_eth_stats_t *resp)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_GET_STATS,
+ .req = req,
+ .req_sz = sizeof (*req),
+ .resp = resp,
+ .resp_sz = sizeof (*resp),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+static_always_inline vnet_dev_rv_t
+iavf_vc_op_disable_vlan_stripping_v2 (vlib_main_t *vm, vnet_dev_t *dev,
+ const virtchnl_vlan_setting_t *req)
+{
+ iavf_virtchnl_req_t vr = {
+ .op = VIRTCHNL_OP_DISABLE_VLAN_STRIPPING_V2,
+ .req = req,
+ .req_sz = sizeof (*req),
+ };
+
+ return iavf_virtchnl_req (vm, dev, &vr);
+}
+
+#endif /* _IIAVF_VIRTCHNL_FUNCS_H_ */
diff --git a/src/plugins/dev_octeon/CMakeLists.txt b/src/plugins/dev_octeon/CMakeLists.txt
new file mode 100644
index 00000000000..e8abf1a3389
--- /dev/null
+++ b/src/plugins/dev_octeon/CMakeLists.txt
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright(c) 2022 Cisco Systems, Inc.
+
+if (NOT VPP_PLATFORM_NAME STREQUAL "octeon10")
+ return()
+endif()
+
+# Find OCTEON roc files
+vpp_find_path(OCTEON_ROC_DIR PATH_SUFFIXES octeon-roc NAMES platform.h)
+vpp_plugin_find_library(dev-octeon OCTEON_ROC_LIB "libocteon-roc.a")
+
+if (NOT OCTEON_ROC_DIR)
+ message("OCTEON ROC files not found - Marvell OCTEON device plugin disabled")
+ return()
+endif()
+
+if (NOT OCTEON_ROC_LIB)
+ message("OCTEON ROC library (libocteon-roc.a) not found - Marvell OCTEON device plugin disabled")
+ return ()
+endif()
+
+include_directories (${OCTEON_ROC_DIR}/)
+
+add_vpp_plugin(dev_octeon
+ SOURCES
+ init.c
+ format.c
+ port.c
+ queue.c
+ roc_helper.c
+ rx_node.c
+ tx_node.c
+ flow.c
+
+ MULTIARCH_SOURCES
+ rx_node.c
+ tx_node.c
+
+ LINK_LIBRARIES
+ ${OCTEON_ROC_LIB}
+)
+
diff --git a/src/plugins/dev_octeon/common.h b/src/plugins/dev_octeon/common.h
new file mode 100644
index 00000000000..a7a051526d2
--- /dev/null
+++ b/src/plugins/dev_octeon/common.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _OCT_COMMON_H_
+#define _OCT_COMMON_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <base/roc_api.h>
+
+static_always_inline u32
+oct_aura_free_all_buffers (vlib_main_t *vm, u64 aura_handle, u16 hdr_off)
+{
+ u32 n = 0;
+ u64 iova;
+
+ while ((iova = roc_npa_aura_op_alloc (aura_handle, 0)))
+ {
+ vlib_buffer_t *b = (void *) iova + hdr_off;
+ vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, b));
+ n++;
+ }
+ return n;
+}
+
+#endif /* _OCT_COMMON_H_ */
diff --git a/src/plugins/dev_octeon/flow.c b/src/plugins/dev_octeon/flow.c
new file mode 100644
index 00000000000..1c367a036ab
--- /dev/null
+++ b/src/plugins/dev_octeon/flow.c
@@ -0,0 +1,505 @@
+/*
+ * Copyright (c) 2024 Marvell.
+ * SPDX-License-Identifier: Apache-2.0
+ * https://spdx.org/licenses/Apache-2.0.html
+ */
+
+#include <dev_octeon/octeon.h>
+#include <base/roc_npc_priv.h>
+
+VLIB_REGISTER_LOG_CLASS (oct_log, static) = {
+ .class_name = "octeon",
+ .subclass_name = "flow",
+};
+
+#define FLOW_IS_ETHERNET_CLASS(f) (f->type == VNET_FLOW_TYPE_ETHERNET)
+
+#define FLOW_IS_IPV4_CLASS(f) \
+ ((f->type == VNET_FLOW_TYPE_IP4) || \
+ (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED) || \
+ (f->type == VNET_FLOW_TYPE_IP4_VXLAN) || \
+ (f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
+ (f->type == VNET_FLOW_TYPE_IP4_GTPU) || \
+ (f->type == VNET_FLOW_TYPE_IP4_L2TPV3OIP) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IPSEC_ESP) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IPSEC_AH))
+
+#define FLOW_IS_IPV6_CLASS(f) \
+ ((f->type == VNET_FLOW_TYPE_IP6) || \
+ (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED) || \
+ (f->type == VNET_FLOW_TYPE_IP6_VXLAN))
+
+#define FLOW_IS_L3_TYPE(f) \
+ ((f->type == VNET_FLOW_TYPE_IP4) || (f->type == VNET_FLOW_TYPE_IP6))
+
+#define FLOW_IS_L4_TYPE(f) \
+ ((f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED) || \
+ (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED))
+
+#define FLOW_IS_L4_TUNNEL_TYPE(f) \
+ ((f->type == VNET_FLOW_TYPE_IP4_VXLAN) || \
+ (f->type == VNET_FLOW_TYPE_IP6_VXLAN) || \
+ (f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
+ (f->type == VNET_FLOW_TYPE_IP4_GTPU))
+
+#define OCT_FLOW_UNSUPPORTED_ACTIONS(f) \
+ ((f->actions == VNET_FLOW_ACTION_BUFFER_ADVANCE) || \
+ (f->actions == VNET_FLOW_ACTION_REDIRECT_TO_NODE))
+
+/* Keep values in sync with vnet/flow.h */
+#define foreach_oct_flow_rss_types \
+ _ (1, FLOW_KEY_TYPE_IPV4 | FLOW_KEY_TYPE_TCP, "ipv4-tcp") \
+ _ (2, FLOW_KEY_TYPE_IPV4 | FLOW_KEY_TYPE_UDP, "ipv4-udp") \
+ _ (3, FLOW_KEY_TYPE_IPV4 | FLOW_KEY_TYPE_SCTP, "ipv4-sctp") \
+ _ (5, FLOW_KEY_TYPE_IPV4, "ipv4") \
+ _ (9, FLOW_KEY_TYPE_IPV6 | FLOW_KEY_TYPE_TCP, "ipv6-tcp") \
+ _ (10, FLOW_KEY_TYPE_IPV6 | FLOW_KEY_TYPE_UDP, "ipv6-udp") \
+ _ (11, FLOW_KEY_TYPE_IPV6 | FLOW_KEY_TYPE_SCTP, "ipv6-sctp") \
+ _ (13, FLOW_KEY_TYPE_IPV6_EXT, "ipv6-ex") \
+ _ (14, FLOW_KEY_TYPE_IPV6, "ipv6") \
+ _ (16, FLOW_KEY_TYPE_PORT, "port") \
+ _ (17, FLOW_KEY_TYPE_VXLAN, "vxlan") \
+ _ (18, FLOW_KEY_TYPE_GENEVE, "geneve") \
+ _ (19, FLOW_KEY_TYPE_NVGRE, "nvgre") \
+ _ (20, FLOW_KEY_TYPE_GTPU, "gtpu") \
+ _ (60, FLOW_KEY_TYPE_L4_DST, "l4-dst-only") \
+ _ (61, FLOW_KEY_TYPE_L4_SRC, "l4-src-only") \
+ _ (62, FLOW_KEY_TYPE_L3_DST, "l3-dst-only") \
+ _ (63, FLOW_KEY_TYPE_L3_SRC, "l3-src-only")
+
+typedef struct
+{
+ u16 src_port;
+ u16 dst_port;
+ u32 verification_tag;
+ u32 cksum;
+} sctp_header_t;
+
+typedef struct
+{
+ u8 ver_flags;
+ u8 type;
+ u16 length;
+ u32 teid;
+} gtpu_header_t;
+
+static void
+oct_flow_convert_rss_types (u64 *key, u64 rss_types)
+{
+#define _(a, b, c) \
+ if (rss_types & (1UL << a)) \
+ *key |= b;
+
+ foreach_oct_flow_rss_types
+#undef _
+
+ return;
+}
+
+vnet_dev_rv_t
+oct_flow_validate_params (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_type_t type, u32 flow_index,
+ uword *priv_data)
+{
+ vnet_flow_t *flow = vnet_get_flow (flow_index);
+ u32 last_queue;
+ u32 qid;
+
+ if (type == VNET_DEV_PORT_CFG_GET_RX_FLOW_COUNTER ||
+ type == VNET_DEV_PORT_CFG_RESET_RX_FLOW_COUNTER)
+ {
+ log_err (port->dev, "Unsupported request type");
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+
+ if (OCT_FLOW_UNSUPPORTED_ACTIONS (flow))
+ {
+ log_err (port->dev, "Unsupported flow action");
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+
+ if (flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_QUEUE)
+ {
+ qid = flow->redirect_queue;
+ if (qid > port->intf.num_rx_queues - 1 || qid < 0)
+ {
+ log_err (port->dev,
+ "Given Q(%d) is invalid, supported range is %d-%d", qid, 0,
+ port->intf.num_rx_queues - 1);
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+ }
+
+ if (flow->actions & VNET_FLOW_ACTION_RSS)
+ {
+ last_queue = flow->queue_index + flow->queue_num;
+ if (last_queue > port->intf.num_rx_queues - 1)
+ {
+ log_err (port->dev,
+ "Given Q range(%d-%d) is invalid, supported range is %d-%d",
+ flow->queue_index, flow->queue_index + flow->queue_num, 0,
+ port->intf.num_rx_queues - 1);
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+ }
+ return VNET_DEV_OK;
+}
+
+static vnet_dev_rv_t
+oct_flow_rule_create (vnet_dev_port_t *port, struct roc_npc_action *actions,
+ struct roc_npc_item_info *item_info, vnet_flow_t *flow,
+ uword *private_data)
+{
+ oct_port_t *oct_port = vnet_dev_get_port_data (port);
+ struct roc_npc_attr attr = { .priority = 1, .ingress = 1 };
+ struct roc_npc_flow *npc_flow;
+ oct_flow_entry_t *flow_entry;
+ struct roc_npc *npc;
+ int rv = 0;
+
+ npc = &oct_port->npc;
+
+ npc_flow =
+ roc_npc_flow_create (npc, &attr, item_info, actions, npc->pf_func, &rv);
+ if (rv)
+ {
+ log_err (port->dev, "roc_npc_flow_create failed with '%s' error",
+ roc_error_msg_get (rv));
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+ roc_npc_mcam_clear_counter (npc, npc_flow->ctr_id);
+
+ pool_get_zero (oct_port->flow_entries, flow_entry);
+ flow_entry->index = flow_entry - oct_port->flow_entries;
+ flow_entry->vnet_flow_index = flow->index;
+ flow_entry->npc_flow = npc_flow;
+
+ *private_data = flow_entry->index;
+
+ return VNET_DEV_OK;
+}
+
+static vnet_dev_rv_t
+oct_flow_add (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow,
+ uword *private_data)
+{
+ struct roc_npc_item_info item_info[ROC_NPC_ITEM_TYPE_END] = {};
+ struct roc_npc_action actions[ROC_NPC_ITEM_TYPE_END] = {};
+ oct_port_t *oct_port = vnet_dev_get_port_data (port);
+ u16 l4_src_port = 0, l4_dst_port = 0;
+ u16 l4_src_mask = 0, l4_dst_mask = 0;
+ struct roc_npc_action_rss rss_conf = {};
+ struct roc_npc_action_queue conf = {};
+ struct roc_npc_action_mark mark = {};
+ struct roc_npc *npc = &oct_port->npc;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ int layer = 0, index = 0;
+ u16 *queues = NULL;
+ u64 flow_key = 0;
+ u8 proto = 0;
+ u16 action = 0;
+
+ if (FLOW_IS_ETHERNET_CLASS (flow))
+ {
+ ethernet_header_t eth_spec = { .type = clib_host_to_net_u16 (
+ flow->ethernet.eth_hdr.type) },
+ eth_mask = { .type = 0xFFFF };
+
+ item_info[layer].spec = (void *) &eth_spec;
+ item_info[layer].mask = (void *) &eth_mask;
+ item_info[layer].size = sizeof (ethernet_header_t);
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_ETH;
+ layer++;
+ }
+
+ else if (FLOW_IS_IPV4_CLASS (flow))
+ {
+ vnet_flow_ip4_t *ip4_hdr = &flow->ip4;
+ proto = ip4_hdr->protocol.prot;
+ ip4_header_t ip4_spec = { .src_address = ip4_hdr->src_addr.addr,
+ .dst_address = ip4_hdr->dst_addr.addr },
+ ip4_mask = { .src_address = ip4_hdr->src_addr.mask,
+ .dst_address = ip4_hdr->dst_addr.mask };
+
+ item_info[layer].spec = (void *) &ip4_spec;
+ item_info[layer].mask = (void *) &ip4_mask;
+ item_info[layer].size = sizeof (ip4_header_t);
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_IPV4;
+ layer++;
+
+ if (FLOW_IS_L4_TYPE (flow))
+ {
+ vnet_flow_ip4_n_tuple_t *ip4_tuple_hdr = &flow->ip4_n_tuple;
+
+ l4_src_port = clib_host_to_net_u16 (ip4_tuple_hdr->src_port.port);
+ l4_dst_port = clib_host_to_net_u16 (ip4_tuple_hdr->dst_port.port);
+ l4_src_mask = clib_host_to_net_u16 (ip4_tuple_hdr->src_port.mask);
+ l4_dst_mask = clib_host_to_net_u16 (ip4_tuple_hdr->dst_port.mask);
+ }
+ }
+ else if (FLOW_IS_IPV6_CLASS (flow))
+ {
+ vnet_flow_ip6_t *ip6_hdr = &flow->ip6;
+ proto = ip6_hdr->protocol.prot;
+ ip6_header_t ip6_spec = { .src_address = ip6_hdr->src_addr.addr,
+ .dst_address = ip6_hdr->dst_addr.addr },
+ ip6_mask = { .src_address = ip6_hdr->src_addr.mask,
+ .dst_address = ip6_hdr->dst_addr.mask };
+
+ item_info[layer].spec = (void *) &ip6_spec;
+ item_info[layer].mask = (void *) &ip6_mask;
+ item_info[layer].size = sizeof (ip6_header_t);
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_IPV6;
+ layer++;
+
+ if (FLOW_IS_L4_TYPE (flow))
+ {
+ vnet_flow_ip6_n_tuple_t *ip6_tuple_hdr = &flow->ip6_n_tuple;
+
+ l4_src_port = clib_host_to_net_u16 (ip6_tuple_hdr->src_port.port);
+ l4_dst_port = clib_host_to_net_u16 (ip6_tuple_hdr->dst_port.port);
+ l4_src_mask = clib_host_to_net_u16 (ip6_tuple_hdr->src_port.mask);
+ l4_dst_mask = clib_host_to_net_u16 (ip6_tuple_hdr->dst_port.mask);
+ }
+ }
+
+ if (!proto)
+ goto end_item_info;
+
+ switch (proto)
+ {
+ case IP_PROTOCOL_UDP:
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_UDP;
+
+ udp_header_t udp_spec = { .src_port = l4_src_port,
+ .dst_port = l4_dst_port },
+ udp_mask = { .src_port = l4_src_mask,
+ .dst_port = l4_dst_mask };
+
+ item_info[layer].spec = (void *) &udp_spec;
+ item_info[layer].mask = (void *) &udp_mask;
+ item_info[layer].size = sizeof (udp_header_t);
+ layer++;
+
+ if (FLOW_IS_L4_TUNNEL_TYPE (flow))
+ {
+ switch (flow->type)
+ {
+ case VNET_FLOW_TYPE_IP4_GTPU:
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_GTPU;
+ gtpu_header_t gtpu_spec = { .teid = clib_host_to_net_u32 (
+ flow->ip4_gtpu.teid) },
+ gtpu_mask = { .teid = 0XFFFFFFFF };
+
+ item_info[layer].spec = (void *) &gtpu_spec;
+ item_info[layer].mask = (void *) &gtpu_mask;
+ item_info[layer].size = sizeof (gtpu_header_t);
+ layer++;
+ break;
+
+ default:
+ log_err (port->dev, "Unsupported L4 tunnel type");
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+ } /* FLOW_IS_L4_TUNNEL_TYPE */
+ break;
+
+ case IP_PROTOCOL_TCP:
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_TCP;
+
+ tcp_header_t tcp_spec = { .src_port = l4_src_port,
+ .dst_port = l4_dst_port },
+ tcp_mask = { .src_port = l4_src_mask,
+ .dst_port = l4_dst_mask };
+
+ item_info[layer].spec = (void *) &tcp_spec;
+ item_info[layer].mask = (void *) &tcp_mask;
+ item_info[layer].size = sizeof (tcp_header_t);
+ layer++;
+ break;
+
+ case IP_PROTOCOL_SCTP:
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_SCTP;
+
+ sctp_header_t sctp_spec = { .src_port = l4_src_port,
+ .dst_port = l4_dst_port },
+ sctp_mask = { .src_port = l4_src_mask,
+ .dst_port = l4_dst_mask };
+
+ item_info[layer].spec = (void *) &sctp_spec;
+ item_info[layer].mask = (void *) &sctp_mask;
+ item_info[layer].size = sizeof (sctp_header_t);
+ layer++;
+ break;
+
+ case IP_PROTOCOL_IPSEC_ESP:
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_ESP;
+ esp_header_t esp_spec = { .spi = clib_host_to_net_u32 (
+ flow->ip4_ipsec_esp.spi) },
+ esp_mask = { .spi = 0xFFFFFFFF };
+
+ item_info[layer].spec = (void *) &esp_spec;
+ item_info[layer].mask = (void *) &esp_mask;
+ item_info[layer].size = sizeof (u32);
+ layer++;
+ break;
+
+ default:
+ log_err (port->dev, "Unsupported IP protocol '%U'", format_ip_protocol,
+ proto);
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+
+end_item_info:
+ item_info[layer].type = ROC_NPC_ITEM_TYPE_END;
+
+ if (flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_QUEUE)
+ {
+ conf.index = flow->redirect_queue;
+ actions[action].type = ROC_NPC_ACTION_TYPE_QUEUE;
+ actions[action].conf = &conf;
+ action++;
+ }
+
+ else if (flow->actions & VNET_FLOW_ACTION_DROP)
+ {
+ actions[action].type = ROC_NPC_ACTION_TYPE_DROP;
+ action++;
+ }
+
+ else if (flow->actions & VNET_FLOW_ACTION_RSS)
+ {
+ if (!flow->queue_num)
+ {
+ log_err (port->dev, "RSS action has no queues");
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+ queues = clib_mem_alloc (sizeof (u16) * port->intf.num_rx_queues);
+
+ for (index = 0; index < flow->queue_num; index++)
+ queues[index] = flow->queue_index++;
+
+ oct_flow_convert_rss_types (&flow_key, flow->rss_types);
+ if (!flow_key)
+ {
+ log_err (port->dev, "Invalid RSS hash function");
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+ npc->flowkey_cfg_state = flow_key;
+ rss_conf.queue_num = flow->queue_num;
+ rss_conf.queue = queues;
+
+ actions[action].type = ROC_NPC_ACTION_TYPE_RSS;
+ actions[action].conf = &rss_conf;
+ action++;
+ }
+
+ if (flow->actions & VNET_FLOW_ACTION_MARK)
+ {
+ if (flow->mark_flow_id == 0 ||
+ flow->mark_flow_id > (NPC_FLOW_FLAG_VAL - 2))
+ {
+ log_err (port->dev, "mark flow id must be > 0 and < 0xfffe");
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+ /* RoC library adds 1 to id, so subtract 1 */
+ mark.id = flow->mark_flow_id - 1;
+ actions[action].type = ROC_NPC_ACTION_TYPE_MARK;
+ actions[action].conf = &mark;
+ action++;
+ }
+
+ /* make count as default action */
+ actions[action].type = ROC_NPC_ACTION_TYPE_COUNT;
+ actions[action + 1].type = ROC_NPC_ACTION_TYPE_END;
+
+ rv = oct_flow_rule_create (port, actions, item_info, flow, private_data);
+
+ if (queues)
+ clib_mem_free (queues);
+
+ return rv;
+}
+
+static vnet_dev_rv_t
+oct_flow_del (vlib_main_t *vm, vnet_dev_port_t *port, vnet_flow_t *flow,
+ uword *private_data)
+{
+ oct_port_t *oct_port = vnet_dev_get_port_data (port);
+ struct roc_npc *npc = &oct_port->npc;
+ struct roc_npc_flow *npc_flow;
+ oct_flow_entry_t *flow_entry;
+ int rv = 0, index;
+
+ index = *private_data;
+ flow_entry = pool_elt_at_index (oct_port->flow_entries, index);
+ npc_flow = flow_entry->npc_flow;
+ rv = roc_npc_flow_destroy (npc, npc_flow);
+ if (rv)
+ {
+ log_err (port->dev, "roc_npc_flow_destroy failed with '%s' error",
+ roc_error_msg_get (rv));
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+ pool_put (oct_port->flow_entries, flow_entry);
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+oct_flow_query (vlib_main_t *vm, vnet_dev_port_t *port, u32 flow_index,
+ uword private_data, u64 *hits)
+{
+ oct_port_t *oct_port = vnet_dev_get_port_data (port);
+ struct roc_npc *npc = &oct_port->npc;
+ struct roc_npc_flow *npc_flow;
+ oct_flow_entry_t *flow_entry;
+ i32 flow_count;
+ int rv = 0;
+
+ flow_count = pool_elts (oct_port->flow_entries);
+ if (!flow_count)
+ {
+ log_err (port->dev, "Flow entry pool is empty");
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+
+ flow_entry = pool_elt_at_index (oct_port->flow_entries, private_data);
+ npc_flow = flow_entry->npc_flow;
+ if (npc_flow->ctr_id == NPC_COUNTER_NONE)
+ {
+ log_err (port->dev, "Counters are not available for given flow id (%u)",
+ flow_index);
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+
+ rv = roc_npc_mcam_read_counter (npc, npc_flow->ctr_id, hits);
+ if (rv != 0)
+ {
+ log_err (port->dev, "Error reading flow counter for given flow id (%u)",
+ flow_index);
+ return VNET_DEV_ERR_INTERNAL;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+oct_flow_ops_fn (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_type_t type, u32 flow_index,
+ uword *priv_data)
+{
+ vnet_flow_t *flow = vnet_get_flow (flow_index);
+
+ if (type == VNET_DEV_PORT_CFG_ADD_RX_FLOW)
+ return oct_flow_add (vm, port, flow, priv_data);
+
+ if (type == VNET_DEV_PORT_CFG_DEL_RX_FLOW)
+ return oct_flow_del (vm, port, flow, priv_data);
+
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+}
diff --git a/src/plugins/dev_octeon/format.c b/src/plugins/dev_octeon/format.c
new file mode 100644
index 00000000000..e624b84f54e
--- /dev/null
+++ b/src/plugins/dev_octeon/format.c
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vlib/pci/pci.h"
+#include "vnet/error.h"
+#include "vppinfra/error.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <dev_octeon/octeon.h>
+
+u8 *
+format_oct_port_status (u8 *s, va_list *args)
+{
+ return s;
+}
+
+u8 *
+format_oct_nix_rx_cqe_desc (u8 *s, va_list *args)
+{
+ oct_nix_rx_cqe_desc_t *d = va_arg (*args, oct_nix_rx_cqe_desc_t *);
+ u32 indent = format_get_indent (s);
+ typeof (d->hdr) *h = &d->hdr;
+ typeof (d->parse.f) *p = &d->parse.f;
+ typeof (d->sg0) *sg0 = &d->sg0;
+ typeof (d->sg0) *sg1 = &d->sg1;
+
+ s = format (s, "hdr: cqe_type %u nude %u q %u tag 0x%x", h->cqe_type,
+ h->node, h->q, h->tag);
+ s = format (s, "\n%Uparse:", format_white_space, indent);
+#define _(n, f) s = format (s, " " #n " " f, p->n)
+ _ (chan, "%u");
+ _ (errcode, "%u");
+ _ (errlev, "%u");
+ _ (desc_sizem1, "%u");
+ _ (pkt_lenm1, "%u");
+ _ (pkind, "%u");
+ s = format (s, "\n%U ", format_white_space, indent);
+ _ (nix_idx, "%u");
+ _ (color, "%u");
+ _ (flow_key_alg, "%u");
+ _ (eoh_ptr, "%u");
+ _ (match_id, "0x%x");
+ s = format (s, "\n%U ", format_white_space, indent);
+ _ (wqe_aura, "0x%x");
+ _ (pb_aura, "0x%x");
+ _ (imm_copy, "%u");
+ _ (express, "%u");
+ _ (wqwd, "%u");
+ _ (l2m, "%u");
+ _ (l2b, "%u");
+ _ (l3m, "%u");
+ _ (l3b, "%u");
+#undef _
+ s = format (s, "\n%U ", format_white_space, indent);
+ s = format (s, "layer: a b c d e f g h");
+ s = format (s, "\n%U ", format_white_space, indent);
+ s = format (s, "type: %3u %3u %3u %3u %3u %3u %3u %3u", p->latype,
+ p->lbtype, p->lctype, p->ldtype, p->letype, p->lftype, p->lgtype,
+ p->lhtype);
+ s = format (s, "\n%U ", format_white_space, indent);
+ s = format (
+ s, "flags: 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x",
+ p->laflags, p->lbflags, p->lcflags, p->ldflags, p->leflags, p->lfflags,
+ p->lgflags, p->lhflags);
+ s = format (s, "\n%U ", format_white_space, indent);
+ s = format (s, "ptr: %3u %3u %3u %3u %3u %3u %3u %3u", p->laptr,
+ p->lbptr, p->lcptr, p->ldptr, p->leptr, p->lfptr, p->lgptr,
+ p->lhptr);
+
+ if (sg0->subdc != 0x4)
+ return format (s, "\n%Usg0: unexpected subdc %x", format_white_space,
+ indent, sg0->subdc);
+
+ s = format (s,
+ "\n%Usg0: segs %u seg1_sz %u seg2_sz %u seg3_sz %u seg1 "
+ "%p seg2 %p seg3 %p",
+ format_white_space, indent, sg0->segs, sg0->seg1_size,
+ sg0->seg2_size, sg0->seg3_size, d->segs0[0], d->segs0[1],
+ d->segs0[2]);
+
+ if (sg1->subdc != 0x4 && sg1->subdc != 0)
+ return format (s, "\n%Usg1: unexpected subdc %x", format_white_space,
+ indent, sg1->subdc);
+
+ if (sg1->subdc == 4)
+ s = format (s,
+ "\n%Usg1: segs %u seg1_sz %u seg2_sz %u seg3_sz %u seg1 "
+ "%p seg2 %p seg3 %p",
+ format_white_space, indent, sg1->segs, sg1->seg1_size,
+ sg1->seg2_size, sg1->seg3_size, d->segs1[0], d->segs1[1],
+ d->segs1[2]);
+
+ return s;
+}
+
+u8 *
+format_oct_rx_trace (u8 *s, va_list *args)
+{
+ vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+ vlib_node_t *node = va_arg (*args, vlib_node_t *);
+ oct_rx_trace_t *t = va_arg (*args, oct_rx_trace_t *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "octeon-rx: next-node %U sw_if_index %u",
+ format_vlib_next_node_name, vm, node->index, t->next_index,
+ t->sw_if_index);
+ s = format (s, "\n%U%U", format_white_space, indent + 2,
+ format_oct_nix_rx_cqe_desc, &t->desc);
+ return s;
+}
+
+u8 *
+format_oct_tx_trace (u8 *s, va_list *args)
+{
+ va_arg (*args, vlib_main_t *);
+ va_arg (*args, vlib_node_t *);
+ oct_tx_trace_t *t = va_arg (*args, oct_tx_trace_t *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "octeon-tx: sw_if_index %u", t->sw_if_index);
+ s = format (s, "\n%Uhdr[0]:", format_white_space, indent + 2);
+#define _(n, f) s = format (s, " " #n " " f, t->desc.hdr_w0.n)
+ _ (total, "%u");
+ _ (df, "%u");
+ _ (aura, "0x%x");
+ _ (sizem1, "%u");
+ _ (pnc, "%u");
+ _ (sq, "%u");
+#undef _
+ s = format (s, "\n%Uhdr[1]:", format_white_space, indent + 2);
+#define _(n, f) s = format (s, " " #n " " f, t->desc.hdr_w1.n)
+ _ (ol3ptr, "%u");
+ _ (ol4ptr, "%u");
+ _ (il3ptr, "%u");
+ _ (il4ptr, "%u");
+ _ (ol3type, "%u");
+ _ (ol4type, "%u");
+ _ (il3type, "%u");
+ _ (il4type, "%u");
+ _ (sqe_id, "%u");
+#undef _
+
+ foreach_int (j, 0, 4)
+ {
+ s = format (s, "\n%Usg[%u]:", format_white_space, indent + 2, j);
+#define _(n, f) s = format (s, " " #n " " f, t->desc.sg[j].n)
+ _ (subdc, "%u");
+ _ (segs, "%u");
+ _ (seg1_size, "%u");
+ _ (seg2_size, "%u");
+ _ (seg3_size, "%u");
+ _ (i1, "%u");
+ _ (i2, "%u");
+ _ (i3, "%u");
+ _ (ld_type, "%u");
+#undef _
+ for (int i = 1; i < 4; i++)
+ s = format (s, "\n%Usg[%u]: %p", format_white_space, indent + 2, i + j,
+ t->desc.sg[i + j]);
+ }
+
+ return s;
+}
+
+u8 *
+format_oct_port_flow (u8 *s, va_list *args)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *port = va_arg (*args, vnet_dev_port_t *);
+ u32 flow_index = va_arg (*args, u32);
+ uword private_data = va_arg (*args, uword);
+ u64 hits;
+
+ if (flow_index == ~0)
+ return s;
+
+ if (oct_flow_query (vm, port, flow_index, private_data, &hits) ==
+ VNET_DEV_OK)
+ s = format (s, "flow (%u) hit count: %lu", flow_index, hits);
+
+ return s;
+}
diff --git a/src/plugins/dev_octeon/hw_defs.h b/src/plugins/dev_octeon/hw_defs.h
new file mode 100644
index 00000000000..ab0fc7bd8da
--- /dev/null
+++ b/src/plugins/dev_octeon/hw_defs.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _OCT_HW_DEFS_H_
+#define _OCT_HW_DEFS_H_
+
+#include <vppinfra/clib.h>
+#include <base/roc_api.h>
+
+typedef union
+{
+ struct
+ {
+ u64 tail : 20;
+ u64 head : 20;
+ u64 resv40 : 6;
+ u64 cq_err : 1;
+ u64 resv47 : 16;
+ u64 op_err : 1;
+ };
+ u64 as_u64;
+} oct_nix_lf_cq_op_status_t;
+
+STATIC_ASSERT_SIZEOF (oct_nix_lf_cq_op_status_t, 8);
+
+typedef union
+{
+ struct
+ {
+ u64 aura : 20;
+ u64 _reseved20 : 12;
+ u64 count_eot : 1;
+ u64 _reserved33 : 30;
+ u64 fabs : 1;
+ };
+ u64 as_u64;
+} oct_npa_lf_aura_batch_free0_t;
+
+STATIC_ASSERT_SIZEOF (oct_npa_lf_aura_batch_free0_t, 8);
+
+typedef struct
+{
+ oct_npa_lf_aura_batch_free0_t w0;
+ u64 data[15];
+} oct_npa_lf_aura_batch_free_line_t;
+
+STATIC_ASSERT_SIZEOF (oct_npa_lf_aura_batch_free_line_t, 128);
+
+typedef union
+{
+ struct npa_batch_alloc_compare_s compare_s;
+ u64 as_u64;
+} oct_npa_batch_alloc_compare_t;
+
+typedef union
+{
+ struct
+ {
+ union nix_send_hdr_w0_u hdr_w0;
+ union nix_send_hdr_w1_u hdr_w1;
+ union nix_send_sg_s sg[8];
+ };
+ u128 as_u128[5];
+} oct_tx_desc_t;
+
+STATIC_ASSERT_SIZEOF (oct_tx_desc_t, 80);
+
+typedef union
+{
+ u128 dwords[8];
+ u64 words[16];
+} lmt_line_t;
+
+STATIC_ASSERT_SIZEOF (lmt_line_t, 1 << ROC_LMT_LINE_SIZE_LOG2);
+
+typedef union
+{
+ union nix_rx_parse_u f;
+ u64 w[7];
+} oct_nix_rx_parse_t;
+
+STATIC_ASSERT_SIZEOF (oct_nix_rx_parse_t, 56);
+
+typedef struct
+{
+ CLIB_ALIGN_MARK (desc, 128);
+ struct nix_cqe_hdr_s hdr;
+ oct_nix_rx_parse_t parse;
+ struct nix_rx_sg_s sg0;
+ void *segs0[3];
+ struct nix_rx_sg_s sg1;
+ void *segs1[3];
+} oct_nix_rx_cqe_desc_t;
+
+STATIC_ASSERT_SIZEOF (oct_nix_rx_cqe_desc_t, 128);
+
+#endif /* _OCT_HW_DEFS_H_ */
diff --git a/src/plugins/dev_octeon/init.c b/src/plugins/dev_octeon/init.c
new file mode 100644
index 00000000000..8c5ed95b062
--- /dev/null
+++ b/src/plugins/dev_octeon/init.c
@@ -0,0 +1,312 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <dev_octeon/octeon.h>
+
+#include <base/roc_api.h>
+#include <common.h>
+
+struct roc_model oct_model;
+
+VLIB_REGISTER_LOG_CLASS (oct_log, static) = {
+ .class_name = "octeon",
+ .subclass_name = "init",
+};
+
+#define _(f, n, s, d) \
+ { .name = #n, .desc = d, .severity = VL_COUNTER_SEVERITY_##s },
+
+vlib_error_desc_t oct_tx_node_counters[] = { foreach_oct_tx_node_counter };
+#undef _
+
+vnet_dev_node_t oct_rx_node = {
+ .format_trace = format_oct_rx_trace,
+};
+
+vnet_dev_node_t oct_tx_node = {
+ .format_trace = format_oct_tx_trace,
+ .error_counters = oct_tx_node_counters,
+ .n_error_counters = ARRAY_LEN (oct_tx_node_counters),
+};
+
+static struct
+{
+ u16 device_id;
+ oct_device_type_t type;
+ char *description;
+} oct_dev_types[] = {
+
+#define _(id, device_type, desc) \
+ { \
+ .device_id = (id), .type = OCT_DEVICE_TYPE_##device_type, \
+ .description = (desc) \
+ }
+
+ _ (0xa063, RVU_PF, "Marvell Octeon Resource Virtualization Unit PF"),
+ _ (0xa0f8, RVU_VF, "Marvell Octeon Resource Virtualization Unit VF"),
+ _ (0xa0f7, SDP_VF, "Marvell Octeon System DPI Packet Interface Unit VF"),
+ _ (0xa0f3, CPT_VF, "Marvell Octeon Cryptographic Accelerator Unit VF"),
+#undef _
+};
+
+static u8 *
+oct_probe (vlib_main_t *vm, vnet_dev_bus_index_t bus_index, void *dev_info)
+{
+ vnet_dev_bus_pci_device_info_t *di = dev_info;
+
+ if (di->vendor_id != 0x177d) /* Cavium */
+ return 0;
+
+ FOREACH_ARRAY_ELT (dt, oct_dev_types)
+ {
+ if (dt->device_id == di->device_id)
+ return format (0, "%s", dt->description);
+ }
+
+ return 0;
+}
+
+vnet_dev_rv_t
+cnx_return_roc_err (vnet_dev_t *dev, int rrv, char *fmt, ...)
+{
+ va_list va;
+ va_start (va, fmt);
+ u8 *s = va_format (0, fmt, &va);
+ va_end (va);
+
+ log_err (dev, "%v: %s [%d]", s, roc_error_msg_get (rrv), rrv);
+ vec_free (s);
+
+ return VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+}
+
+static vnet_dev_rv_t
+oct_alloc (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ cd->nix =
+ clib_mem_alloc_aligned (sizeof (struct roc_nix), CLIB_CACHE_LINE_BYTES);
+ return VNET_DEV_OK;
+}
+
+static vnet_dev_rv_t
+oct_init_nix (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ u8 mac_addr[6];
+ int rrv;
+ oct_port_t oct_port = {};
+
+ *cd->nix = (struct roc_nix){
+ .reta_sz = ROC_NIX_RSS_RETA_SZ_256,
+ .max_sqb_count = 512,
+ .pci_dev = &cd->plt_pci_dev,
+ };
+
+ if ((rrv = roc_nix_dev_init (cd->nix)))
+ return cnx_return_roc_err (dev, rrv, "roc_nix_dev_init");
+
+ if ((rrv = roc_nix_npc_mac_addr_get (cd->nix, mac_addr)))
+ return cnx_return_roc_err (dev, rrv, "roc_nix_npc_mac_addr_get");
+
+ vnet_dev_port_add_args_t port_add_args = {
+ .port = {
+ .attr = {
+ .type = VNET_DEV_PORT_TYPE_ETHERNET,
+ .max_rx_queues = 64,
+ .max_tx_queues = 64,
+ .max_supported_rx_frame_size = roc_nix_max_pkt_len (cd->nix),
+ .caps = {
+ .rss = 1,
+ },
+ .rx_offloads = {
+ .ip4_cksum = 1,
+ },
+ },
+ .ops = {
+ .init = oct_port_init,
+ .deinit = oct_port_deinit,
+ .start = oct_port_start,
+ .stop = oct_port_stop,
+ .config_change = oct_port_cfg_change,
+ .config_change_validate = oct_port_cfg_change_validate,
+ .format_status = format_oct_port_status,
+ .format_flow = format_oct_port_flow,
+ },
+ .data_size = sizeof (oct_port_t),
+ .initial_data = &oct_port,
+ },
+ .rx_node = &oct_rx_node,
+ .tx_node = &oct_tx_node,
+ .rx_queue = {
+ .config = {
+ .data_size = sizeof (oct_rxq_t),
+ .default_size = 1024,
+ .multiplier = 32,
+ .min_size = 256,
+ .max_size = 16384,
+ },
+ .ops = {
+ .alloc = oct_rx_queue_alloc,
+ .free = oct_rx_queue_free,
+ .format_info = format_oct_rxq_info,
+ },
+ },
+ .tx_queue = {
+ .config = {
+ .data_size = sizeof (oct_txq_t),
+ .default_size = 1024,
+ .multiplier = 32,
+ .min_size = 256,
+ .max_size = 16384,
+ },
+ .ops = {
+ .alloc = oct_tx_queue_alloc,
+ .free = oct_tx_queue_free,
+ .format_info = format_oct_txq_info,
+ },
+ },
+ };
+
+ vnet_dev_set_hw_addr_eth_mac (&port_add_args.port.attr.hw_addr, mac_addr);
+
+ log_info (dev, "MAC address is %U", format_ethernet_address, mac_addr);
+
+ return vnet_dev_port_add (vm, dev, 0, &port_add_args);
+}
+
+static vnet_dev_rv_t
+oct_init_cpt (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ int rrv;
+ struct roc_cpt cpt = {
+ .pci_dev = &cd->plt_pci_dev,
+ };
+
+ if ((rrv = roc_cpt_dev_init (&cpt)))
+ return cnx_return_roc_err (dev, rrv, "roc_cpt_dev_init");
+ return VNET_DEV_OK;
+}
+
+static vnet_dev_rv_t
+oct_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ vlib_pci_config_hdr_t pci_hdr;
+ vnet_dev_rv_t rv;
+
+ rv = vnet_dev_pci_read_config_header (vm, dev, &pci_hdr);
+ if (rv != VNET_DEV_OK)
+ return rv;
+
+ if (pci_hdr.vendor_id != 0x177d)
+ return VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+
+ FOREACH_ARRAY_ELT (dt, oct_dev_types)
+ {
+ if (dt->device_id == pci_hdr.device_id)
+ cd->type = dt->type;
+ }
+
+ if (cd->type == OCT_DEVICE_TYPE_UNKNOWN)
+ return rv;
+
+ rv = VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+
+ cd->plt_pci_dev = (struct plt_pci_device){
+ .id.vendor_id = pci_hdr.vendor_id,
+ .id.device_id = pci_hdr.device_id,
+ .id.class_id = pci_hdr.class << 16 | pci_hdr.subclass,
+ .pci_handle = vnet_dev_get_pci_handle (dev),
+ };
+
+ foreach_int (i, 2, 4)
+ {
+ rv = vnet_dev_pci_map_region (vm, dev, i,
+ &cd->plt_pci_dev.mem_resource[i].addr);
+ if (rv != VNET_DEV_OK)
+ return rv;
+ }
+
+ strncpy ((char *) cd->plt_pci_dev.name, dev->device_id,
+ sizeof (cd->plt_pci_dev.name) - 1);
+
+ switch (cd->type)
+ {
+ case OCT_DEVICE_TYPE_RVU_PF:
+ case OCT_DEVICE_TYPE_RVU_VF:
+ case OCT_DEVICE_TYPE_SDP_VF:
+ return oct_init_nix (vm, dev);
+
+ case OCT_DEVICE_TYPE_CPT_VF:
+ return oct_init_cpt (vm, dev);
+
+ default:
+ return VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+ }
+
+ return 0;
+}
+
+static void
+oct_deinit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ oct_device_t *cd = vnet_dev_get_data (dev);
+
+ if (cd->nix_initialized)
+ roc_nix_dev_fini (cd->nix);
+}
+
+static void
+oct_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ oct_device_t *cd = vnet_dev_get_data (dev);
+
+ if (cd->nix_initialized)
+ roc_nix_dev_fini (cd->nix);
+}
+
+VNET_DEV_REGISTER_DRIVER (octeon) = {
+ .name = "octeon",
+ .bus = "pci",
+ .device_data_sz = sizeof (oct_device_t),
+ .ops = {
+ .alloc = oct_alloc,
+ .init = oct_init,
+ .deinit = oct_deinit,
+ .free = oct_free,
+ .probe = oct_probe,
+ },
+};
+
+static clib_error_t *
+oct_plugin_init (vlib_main_t *vm)
+{
+ int rv;
+ extern oct_plt_init_param_t oct_plt_init_param;
+
+ rv = oct_plt_init (&oct_plt_init_param);
+ if (rv)
+ return clib_error_return (0, "oct_plt_init failed");
+
+ rv = roc_model_init (&oct_model);
+ if (rv)
+ return clib_error_return (0, "roc_model_init failed");
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (oct_plugin_init);
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "dev_octeon",
+};
diff --git a/src/plugins/dev_octeon/octeon.h b/src/plugins/dev_octeon/octeon.h
new file mode 100644
index 00000000000..92ec953ed23
--- /dev/null
+++ b/src/plugins/dev_octeon/octeon.h
@@ -0,0 +1,186 @@
+
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+#ifndef _OCTEON_H_
+#define _OCTEON_H_
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/flow/flow.h>
+#include <vnet/udp/udp.h>
+#include <vnet/ipsec/esp.h>
+#include <base/roc_api.h>
+#include <dev_octeon/hw_defs.h>
+
+#define OCT_BATCH_ALLOC_IOVA0_MASK 0xFFFFFFFFFFFFFF80
+
+typedef enum
+{
+ OCT_DEVICE_TYPE_UNKNOWN = 0,
+ OCT_DEVICE_TYPE_RVU_PF,
+ OCT_DEVICE_TYPE_RVU_VF,
+ OCT_DEVICE_TYPE_SDP_VF,
+ OCT_DEVICE_TYPE_CPT_VF,
+} __clib_packed oct_device_type_t;
+
+typedef struct
+{
+ oct_device_type_t type;
+ u8 nix_initialized : 1;
+ u8 status : 1;
+ u8 full_duplex : 1;
+ u32 speed;
+ struct plt_pci_device plt_pci_dev;
+ struct roc_cpt cpt;
+ struct roc_nix *nix;
+} oct_device_t;
+
+typedef struct
+{
+ /* vnet flow index */
+ u32 vnet_flow_index;
+
+ u32 index;
+ /* Internal flow object */
+ struct roc_npc_flow *npc_flow;
+} oct_flow_entry_t;
+
+typedef struct
+{
+ u8 lf_allocated : 1;
+ u8 tm_initialized : 1;
+ u8 npc_initialized : 1;
+ struct roc_npc npc;
+ oct_flow_entry_t *flow_entries;
+} oct_port_t;
+
+typedef struct
+{
+ u8 npa_pool_initialized : 1;
+ u8 cq_initialized : 1;
+ u8 rq_initialized : 1;
+ u16 hdr_off;
+ u32 n_enq;
+ u64 aura_handle;
+ u64 aura_batch_free_ioaddr;
+ u64 lmt_base_addr;
+ CLIB_CACHE_LINE_ALIGN_MARK (data0);
+ struct roc_nix_cq cq;
+ struct roc_nix_rq rq;
+} oct_rxq_t;
+
+typedef struct
+{
+ CLIB_ALIGN_MARK (cl, 128);
+ u64 iova[16];
+} oct_npa_batch_alloc_cl128_t;
+
+typedef union
+{
+ struct npa_batch_alloc_status_s status;
+ u64 as_u64;
+} oct_npa_batch_alloc_status_t;
+
+STATIC_ASSERT_SIZEOF (oct_npa_batch_alloc_cl128_t, 128);
+
+typedef struct
+{
+ u8 sq_initialized : 1;
+ u8 npa_pool_initialized : 1;
+ u16 hdr_off;
+ u32 n_enq;
+ u64 aura_handle;
+ u64 io_addr;
+ void *lmt_addr;
+
+ oct_npa_batch_alloc_cl128_t *ba_buffer;
+ u8 ba_first_cl;
+ u8 ba_num_cl;
+ CLIB_CACHE_LINE_ALIGN_MARK (data0);
+ struct roc_nix_sq sq;
+} oct_txq_t;
+
+/* format.c */
+format_function_t format_oct_port_status;
+format_function_t format_oct_rx_trace;
+format_function_t format_oct_tx_trace;
+format_function_t format_oct_port_flow;
+
+/* port.c */
+vnet_dev_rv_t oct_port_init (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t oct_port_start (vlib_main_t *, vnet_dev_port_t *);
+void oct_port_stop (vlib_main_t *, vnet_dev_port_t *);
+void oct_port_deinit (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t oct_port_cfg_change (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t oct_port_cfg_change_validate (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+
+/* queue.c */
+vnet_dev_rv_t oct_rx_queue_alloc (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t oct_tx_queue_alloc (vlib_main_t *, vnet_dev_tx_queue_t *);
+void oct_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *);
+void oct_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *);
+vnet_dev_rv_t oct_rxq_init (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t oct_txq_init (vlib_main_t *, vnet_dev_tx_queue_t *);
+void oct_rxq_deinit (vlib_main_t *, vnet_dev_rx_queue_t *);
+void oct_txq_deinit (vlib_main_t *, vnet_dev_tx_queue_t *);
+format_function_t format_oct_rxq_info;
+format_function_t format_oct_txq_info;
+
+/* flow.c */
+vnet_dev_rv_t oct_flow_ops_fn (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_type_t, u32, uword *);
+vnet_dev_rv_t oct_flow_validate_params (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_type_t, u32,
+ uword *);
+vnet_dev_rv_t oct_flow_query (vlib_main_t *, vnet_dev_port_t *, u32, uword,
+ u64 *);
+
+#define log_debug(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, oct_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_info(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_INFO, oct_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_notice(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_NOTICE, oct_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_warn(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_WARNING, oct_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+#define log_err(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_ERR, oct_log.class, "%U: " f, \
+ format_vnet_dev_addr, (dev), ##__VA_ARGS__)
+
+#define foreach_oct_tx_node_counter \
+ _ (CHAIN_TOO_LONG, chain_too_long, ERROR, "drop due to buffer chain > 6") \
+ _ (NO_FREE_SLOTS, no_free_slots, ERROR, "no free tx slots") \
+ _ (AURA_BATCH_ALLOC_ISSUE_FAIL, aura_batch_alloc_issue_fail, ERROR, \
+ "aura batch alloc issue failed") \
+ _ (AURA_BATCH_ALLOC_NOT_READY, aura_batch_alloc_not_ready, ERROR, \
+ "aura batch alloc not ready")
+
+typedef enum
+{
+#define _(f, n, s, d) OCT_TX_NODE_CTR_##f,
+ foreach_oct_tx_node_counter
+#undef _
+} oct_tx_node_counter_t;
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 next_index;
+ oct_nix_rx_cqe_desc_t desc;
+} oct_rx_trace_t;
+
+typedef struct
+{
+ u32 sw_if_index;
+ oct_tx_desc_t desc;
+} oct_tx_trace_t;
+#endif /* _OCTEON_H_ */
diff --git a/src/plugins/dev_octeon/port.c b/src/plugins/dev_octeon/port.c
new file mode 100644
index 00000000000..d5f78301adf
--- /dev/null
+++ b/src/plugins/dev_octeon/port.c
@@ -0,0 +1,493 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_octeon/octeon.h>
+#include <dev_octeon/common.h>
+#include <vnet/ethernet/ethernet.h>
+
+#define OCT_FLOW_PREALLOC_SIZE 1
+#define OCT_FLOW_MAX_PRIORITY 7
+#define OCT_ETH_LINK_SPEED_100G 100000 /**< 100 Gbps */
+
+VLIB_REGISTER_LOG_CLASS (oct_log, static) = {
+ .class_name = "octeon",
+ .subclass_name = "port",
+};
+
+static const u8 default_rss_key[] = {
+ 0xfe, 0xed, 0x0b, 0xad, 0xfe, 0xed, 0x0b, 0xad, 0xad, 0x0b, 0xed, 0xfe,
+ 0xad, 0x0b, 0xed, 0xfe, 0x13, 0x57, 0x9b, 0xef, 0x24, 0x68, 0xac, 0x0e,
+ 0x91, 0x72, 0x53, 0x11, 0x82, 0x64, 0x20, 0x44, 0x12, 0xef, 0x34, 0xcd,
+ 0x56, 0xbc, 0x78, 0x9a, 0x9a, 0x78, 0xbc, 0x56, 0xcd, 0x34, 0xef, 0x12
+};
+
+static const u32 default_rss_flowkey =
+ (FLOW_KEY_TYPE_IPV4 | FLOW_KEY_TYPE_IPV6 | FLOW_KEY_TYPE_TCP |
+ FLOW_KEY_TYPE_UDP | FLOW_KEY_TYPE_SCTP);
+
+static const u64 rxq_cfg =
+ ROC_NIX_LF_RX_CFG_DIS_APAD | ROC_NIX_LF_RX_CFG_IP6_UDP_OPT |
+ ROC_NIX_LF_RX_CFG_L2_LEN_ERR | ROC_NIX_LF_RX_CFG_DROP_RE |
+ ROC_NIX_LF_RX_CFG_CSUM_OL4 | ROC_NIX_LF_RX_CFG_CSUM_IL4 |
+ ROC_NIX_LF_RX_CFG_LEN_OL3 | ROC_NIX_LF_RX_CFG_LEN_OL4 |
+ ROC_NIX_LF_RX_CFG_LEN_IL3 | ROC_NIX_LF_RX_CFG_LEN_IL4;
+
+static vnet_dev_rv_t
+oct_roc_err (vnet_dev_t *dev, int rv, char *fmt, ...)
+{
+ u8 *s = 0;
+ va_list va;
+
+ va_start (va, fmt);
+ s = va_format (s, fmt, &va);
+ va_end (va);
+
+ log_err (dev, "%v - ROC error %s (%d)", s, roc_error_msg_get (rv), rv);
+
+ vec_free (s);
+ return VNET_DEV_ERR_INTERNAL;
+}
+
+vnet_dev_rv_t
+oct_port_init (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ oct_port_t *cp = vnet_dev_get_port_data (port);
+ struct roc_nix *nix = cd->nix;
+ vnet_dev_rv_t rv;
+ int rrv;
+
+ log_debug (dev, "port init: port %u", port->port_id);
+
+ if ((rrv = roc_nix_lf_alloc (nix, port->intf.num_rx_queues,
+ port->intf.num_tx_queues, rxq_cfg)))
+ {
+ oct_port_deinit (vm, port);
+ return oct_roc_err (
+ dev, rrv,
+ "roc_nix_lf_alloc(nb_rxq = %u, nb_txq = %d, rxq_cfg=0x%lx) failed",
+ port->intf.num_rx_queues, port->intf.num_tx_queues, rxq_cfg);
+ }
+ cp->lf_allocated = 1;
+
+ if ((rrv = roc_nix_tm_init (nix)))
+ {
+ oct_port_deinit (vm, port);
+ return oct_roc_err (dev, rrv, "roc_nix_tm_init() failed");
+ }
+ cp->tm_initialized = 1;
+
+ if ((rrv = roc_nix_tm_hierarchy_enable (nix, ROC_NIX_TM_DEFAULT,
+ /* xmit_enable */ 0)))
+ {
+ oct_port_deinit (vm, port);
+ return oct_roc_err (dev, rrv, "roc_nix_tm_hierarchy_enable() failed");
+ }
+
+ if ((rrv = roc_nix_rss_default_setup (nix, default_rss_flowkey)))
+ {
+ oct_port_deinit (vm, port);
+ return oct_roc_err (dev, rrv, "roc_nix_rss_default_setup() failed");
+ }
+
+ roc_nix_rss_key_set (nix, default_rss_key);
+
+ cp->npc.roc_nix = nix;
+ cp->npc.flow_prealloc_size = OCT_FLOW_PREALLOC_SIZE;
+ cp->npc.flow_max_priority = OCT_FLOW_MAX_PRIORITY;
+ if ((rrv = roc_npc_init (&cp->npc)))
+ {
+ oct_port_deinit (vm, port);
+ return oct_roc_err (dev, rrv, "roc_npc_init() failed");
+ }
+ cp->npc_initialized = 1;
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->enabled)
+ if ((rv = oct_rxq_init (vm, q)))
+ {
+ oct_port_deinit (vm, port);
+ return rv;
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ if (q->enabled)
+ if ((rv = oct_txq_init (vm, q)))
+ {
+ oct_port_deinit (vm, port);
+ return rv;
+ }
+
+ return VNET_DEV_OK;
+}
+
+void
+oct_port_deinit (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ oct_port_t *cp = vnet_dev_get_port_data (port);
+ struct roc_nix *nix = cd->nix;
+ int rrv;
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ oct_rxq_deinit (vm, q);
+ foreach_vnet_dev_port_tx_queue (q, port)
+ oct_txq_deinit (vm, q);
+
+ if (cp->npc_initialized)
+ {
+ if ((rrv = roc_npc_fini (&cp->npc)))
+ oct_roc_err (dev, rrv, "roc_npc_fini() failed");
+ cp->npc_initialized = 0;
+ }
+
+ if (cp->tm_initialized)
+ {
+ roc_nix_tm_fini (nix);
+ cp->tm_initialized = 0;
+ }
+
+ if (cp->lf_allocated)
+ {
+ if ((rrv = roc_nix_lf_free (nix)))
+ oct_roc_err (dev, rrv, "roc_nix_lf_free() failed");
+ cp->lf_allocated = 0;
+ }
+}
+
+void
+oct_port_poll (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ struct roc_nix_link_info link_info = {};
+ vnet_dev_port_state_changes_t changes = {};
+ int rrv;
+
+ if (roc_nix_is_lbk (nix))
+ {
+ link_info.status = 1;
+ link_info.full_duplex = 1;
+ link_info.autoneg = 0;
+ link_info.speed = OCT_ETH_LINK_SPEED_100G;
+ }
+ else
+ {
+ rrv = roc_nix_mac_link_info_get (nix, &link_info);
+ if (rrv)
+ return;
+ }
+
+ if (cd->status != link_info.status)
+ {
+ changes.change.link_state = 1;
+ changes.link_state = link_info.status;
+ cd->status = link_info.status;
+ }
+
+ if (cd->full_duplex != link_info.full_duplex)
+ {
+ changes.change.link_duplex = 1;
+ changes.full_duplex = link_info.full_duplex;
+ cd->full_duplex = link_info.full_duplex;
+ }
+
+ if (cd->speed != link_info.speed)
+ {
+ changes.change.link_speed = 1;
+ changes.link_speed = link_info.speed;
+ cd->speed = link_info.speed;
+ }
+
+ if (changes.change.any == 0)
+ return;
+
+ log_debug (dev,
+ "status %u full_duplex %u speed %u port %u lmac_type_id %u "
+ "fec %u aautoneg %u",
+ link_info.status, link_info.full_duplex, link_info.speed,
+ link_info.port, link_info.lmac_type_id, link_info.fec,
+ link_info.autoneg);
+ vnet_dev_port_state_change (vm, port, changes);
+}
+
+vnet_dev_rv_t
+oct_rxq_start (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_t *dev = rxq->port->dev;
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ u32 buffer_indices[rxq->size], n_alloc;
+ u8 bpi = vnet_dev_get_rx_queue_buffer_pool_index (rxq);
+ int rrv;
+
+ n_alloc = vlib_buffer_alloc_from_pool (vm, buffer_indices, rxq->size, bpi);
+
+ for (int i = 0; i < n_alloc; i++)
+ roc_npa_aura_op_free (
+ crq->aura_handle, 0,
+ pointer_to_uword (vlib_get_buffer (vm, buffer_indices[i])) -
+ crq->hdr_off);
+
+ crq->n_enq = n_alloc;
+
+ if (roc_npa_aura_op_available (crq->aura_handle) != rxq->size)
+ log_warn (rxq->port->dev, "rx queue %u aura not filled completelly",
+ rxq->queue_id);
+
+ if ((rrv = roc_nix_rq_ena_dis (&crq->rq, 1)))
+ return oct_roc_err (dev, rrv, "roc_nix_rq_ena_dis() failed");
+
+ return VNET_DEV_OK;
+}
+void
+oct_rxq_stop (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_t *dev = rxq->port->dev;
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ int rrv;
+ u32 n;
+
+ if ((rrv = roc_nix_rq_ena_dis (&crq->rq, 0)))
+ oct_roc_err (dev, rrv, "roc_nix_rq_ena_dis() failed");
+
+ n = oct_aura_free_all_buffers (vm, crq->aura_handle, crq->hdr_off);
+
+ if (crq->n_enq - n > 0)
+ log_err (dev, "%u buffers leaked on rx queue %u stop", crq->n_enq - n,
+ rxq->queue_id);
+ else
+ log_debug (dev, "%u buffers freed from rx queue %u", n, rxq->queue_id);
+
+ crq->n_enq = 0;
+}
+
+void
+oct_txq_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ vnet_dev_t *dev = txq->port->dev;
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ oct_npa_batch_alloc_cl128_t *cl;
+ u32 n, off = ctq->hdr_off;
+
+ n = oct_aura_free_all_buffers (vm, ctq->aura_handle, off);
+ ctq->n_enq -= n;
+
+ if (ctq->n_enq > 0 && ctq->ba_num_cl > 0)
+ for (n = ctq->ba_num_cl, cl = ctq->ba_buffer + ctq->ba_first_cl; n;
+ cl++, n--)
+ {
+ oct_npa_batch_alloc_status_t st;
+
+ st.as_u64 = __atomic_load_n (cl->iova, __ATOMIC_ACQUIRE);
+ if (st.status.ccode != ALLOC_CCODE_INVAL)
+ for (u32 i = 0; i < st.status.count; i++)
+ {
+ vlib_buffer_t *b = (vlib_buffer_t *) (cl->iova[i] + off);
+ vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, b));
+ ctq->n_enq--;
+ }
+ }
+
+ if (ctq->n_enq > 0)
+ log_err (dev, "%u buffers leaked on tx queue %u stop", ctq->n_enq,
+ txq->queue_id);
+ else
+ log_debug (dev, "%u buffers freed from tx queue %u", n, txq->queue_id);
+
+ ctq->n_enq = 0;
+}
+
+vnet_dev_rv_t
+oct_port_start (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ struct roc_nix_eeprom_info eeprom_info = {};
+ vnet_dev_rv_t rv;
+ int rrv;
+
+ log_debug (port->dev, "port start: port %u", port->port_id);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if ((rv = oct_rxq_start (vm, q)) != VNET_DEV_OK)
+ goto done;
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (q);
+ ctq->n_enq = 0;
+ }
+
+ if ((rrv = roc_nix_mac_mtu_set (nix, 9200)))
+ {
+ rv = oct_roc_err (dev, rrv, "roc_nix_mac_mtu_set() failed");
+ goto done;
+ }
+
+ if ((rrv = roc_nix_npc_rx_ena_dis (nix, true)))
+ {
+ rv = oct_roc_err (dev, rrv, "roc_nix_npc_rx_ena_dis() failed");
+ goto done;
+ }
+
+ vnet_dev_poll_port_add (vm, port, 0.5, oct_port_poll);
+
+ if (roc_nix_eeprom_info_get (nix, &eeprom_info) == 0)
+ {
+ log_debug (dev, "sff_id %u data %U", eeprom_info.sff_id, format_hexdump,
+ eeprom_info.buf, sizeof (eeprom_info.buf));
+ }
+done:
+ if (rv != VNET_DEV_OK)
+ oct_port_stop (vm, port);
+ return VNET_DEV_OK;
+}
+
+void
+oct_port_stop (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ int rrv;
+
+ log_debug (port->dev, "port stop: port %u", port->port_id);
+
+ vnet_dev_poll_port_remove (vm, port, oct_port_poll);
+
+ rrv = roc_nix_npc_rx_ena_dis (nix, false);
+ if (rrv)
+ {
+ oct_roc_err (dev, rrv, "roc_nix_npc_rx_ena_dis() failed");
+ return;
+ }
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ oct_rxq_stop (vm, q);
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ oct_txq_stop (vm, q);
+}
+
+vnet_dev_rv_t
+oct_validate_config_promisc_mode (vnet_dev_port_t *port, int enable)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+
+ if (roc_nix_is_vf_or_sdp (nix))
+ return VNET_DEV_ERR_UNSUPPORTED_DEVICE;
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+oct_op_config_promisc_mode (vlib_main_t *vm, vnet_dev_port_t *port, int enable)
+{
+ vnet_dev_t *dev = port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ int rv;
+
+ rv = roc_nix_npc_promisc_ena_dis (nix, enable);
+ if (rv)
+ {
+ return oct_roc_err (dev, rv, "roc_nix_npc_promisc_ena_dis failed");
+ }
+
+ rv = roc_nix_mac_promisc_mode_enable (nix, enable);
+ if (rv)
+ {
+ return oct_roc_err (dev, rv,
+ "roc_nix_mac_promisc_mode_enable(%s) failed",
+ enable ? "true" : "false");
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+oct_port_cfg_change_validate (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ if (port->started)
+ rv = VNET_DEV_ERR_PORT_STARTED;
+ break;
+
+ case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ rv = oct_validate_config_promisc_mode (port, req->promisc);
+ break;
+ case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+ case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+ break;
+
+ case VNET_DEV_PORT_CFG_ADD_RX_FLOW:
+ case VNET_DEV_PORT_CFG_DEL_RX_FLOW:
+ case VNET_DEV_PORT_CFG_GET_RX_FLOW_COUNTER:
+ case VNET_DEV_PORT_CFG_RESET_RX_FLOW_COUNTER:
+ rv = oct_flow_validate_params (vm, port, req->type, req->flow_index,
+ req->private_data);
+ break;
+
+ default:
+ rv = VNET_DEV_ERR_NOT_SUPPORTED;
+ };
+
+ return rv;
+}
+
+vnet_dev_rv_t
+oct_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ rv = oct_op_config_promisc_mode (vm, port, req->promisc);
+ break;
+
+ case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ break;
+
+ case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+ break;
+
+ case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+ break;
+
+ case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ break;
+
+ case VNET_DEV_PORT_CFG_ADD_RX_FLOW:
+ case VNET_DEV_PORT_CFG_DEL_RX_FLOW:
+ case VNET_DEV_PORT_CFG_GET_RX_FLOW_COUNTER:
+ case VNET_DEV_PORT_CFG_RESET_RX_FLOW_COUNTER:
+ rv = oct_flow_ops_fn (vm, port, req->type, req->flow_index,
+ req->private_data);
+
+ break;
+
+ default:
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ };
+
+ return rv;
+}
diff --git a/src/plugins/dev_octeon/queue.c b/src/plugins/dev_octeon/queue.c
new file mode 100644
index 00000000000..d6ae794fb8d
--- /dev/null
+++ b/src/plugins/dev_octeon/queue.c
@@ -0,0 +1,311 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/counters.h>
+#include <dev_octeon/octeon.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_REGISTER_LOG_CLASS (oct_log, static) = {
+ .class_name = "octeon",
+ .subclass_name = "queue",
+};
+
+static vnet_dev_rv_t
+oct_roc_err (vnet_dev_t *dev, int rv, char *fmt, ...)
+{
+ u8 *s = 0;
+ va_list va;
+
+ va_start (va, fmt);
+ s = va_format (s, fmt, &va);
+ va_end (va);
+
+ log_err (dev, "%v - ROC error %s (%d)", s, roc_error_msg_get (rv), rv);
+
+ vec_free (s);
+ return VNET_DEV_ERR_INTERNAL;
+}
+
+vnet_dev_rv_t
+oct_rx_queue_alloc (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_port_t *port = rxq->port;
+ vnet_dev_t *dev = port->dev;
+
+ log_debug (dev, "rx_queue_alloc: queue %u alocated", rxq->queue_id);
+ return VNET_DEV_OK;
+}
+
+void
+oct_rx_queue_free (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_port_t *port = rxq->port;
+ vnet_dev_t *dev = port->dev;
+
+ log_debug (dev, "rx_queue_free: queue %u", rxq->queue_id);
+}
+
+vnet_dev_rv_t
+oct_tx_queue_alloc (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_port_t *port = txq->port;
+ vnet_dev_t *dev = port->dev;
+ u32 sz = sizeof (void *) * ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS;
+ vnet_dev_rv_t rv;
+
+ log_debug (dev, "tx_queue_alloc: queue %u alocated", txq->queue_id);
+
+ rv = vnet_dev_dma_mem_alloc (vm, dev, sz, 128, (void **) &ctq->ba_buffer);
+
+ if (rv != VNET_DEV_OK)
+ return rv;
+
+ clib_memset_u64 (ctq->ba_buffer, OCT_BATCH_ALLOC_IOVA0_MASK,
+ ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS);
+
+ return rv;
+}
+
+void
+oct_tx_queue_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_port_t *port = txq->port;
+ vnet_dev_t *dev = port->dev;
+
+ log_debug (dev, "tx_queue_free: queue %u", txq->queue_id);
+
+ vnet_dev_dma_mem_free (vm, dev, ctq->ba_buffer);
+}
+
+vnet_dev_rv_t
+oct_rxq_init (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ vnet_dev_t *dev = rxq->port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ vlib_buffer_pool_t *bp =
+ vlib_get_buffer_pool (vm, vnet_dev_get_rx_queue_buffer_pool_index (rxq));
+ struct roc_nix *nix = cd->nix;
+ int rrv;
+
+ struct npa_aura_s aura = {};
+ struct npa_pool_s npapool = { .nat_align = 1 };
+
+ if ((rrv = roc_npa_pool_create (&crq->aura_handle, bp->alloc_size, rxq->size,
+ &aura, &npapool, 0)))
+ {
+ oct_rxq_deinit (vm, rxq);
+ return oct_roc_err (dev, rrv, "roc_npa_pool_create() failed");
+ }
+
+ crq->npa_pool_initialized = 1;
+ log_notice (dev, "NPA pool created, aura_handle = 0x%lx", crq->aura_handle);
+
+ crq->cq = (struct roc_nix_cq){
+ .nb_desc = rxq->size,
+ .qid = rxq->queue_id,
+ };
+
+ if ((rrv = roc_nix_cq_init (nix, &crq->cq)))
+ {
+ oct_rxq_deinit (vm, rxq);
+ return oct_roc_err (dev, rrv,
+ "roc_nix_cq_init(qid = %u, nb_desc = %u) failed",
+ crq->cq.nb_desc, crq->cq.nb_desc);
+ }
+
+ crq->cq_initialized = 1;
+ log_debug (dev, "CQ %u initialised (qmask 0x%x wdata 0x%lx)", crq->cq.qid,
+ crq->cq.qmask, crq->cq.wdata);
+
+ crq->hdr_off = vm->buffer_main->ext_hdr_size;
+
+ crq->rq = (struct roc_nix_rq){
+ .qid = rxq->queue_id,
+ .cqid = crq->cq.qid,
+ .aura_handle = crq->aura_handle,
+ .first_skip = crq->hdr_off + sizeof (vlib_buffer_t),
+ .later_skip = crq->hdr_off + sizeof (vlib_buffer_t),
+ .lpb_size = bp->data_size + crq->hdr_off + sizeof (vlib_buffer_t),
+ .flow_tag_width = 32,
+ };
+
+ if ((rrv = roc_nix_rq_init (nix, &crq->rq, 1 /* disable */)))
+ {
+ oct_rxq_deinit (vm, rxq);
+ return oct_roc_err (dev, rrv, "roc_nix_rq_init(qid = %u) failed",
+ crq->rq.qid);
+ }
+
+ crq->rq_initialized = 1;
+ crq->lmt_base_addr = roc_idev_lmt_base_addr_get ();
+ crq->aura_batch_free_ioaddr =
+ (roc_npa_aura_handle_to_base (crq->aura_handle) +
+ NPA_LF_AURA_BATCH_FREE0) |
+ (0x7 << 4);
+
+ log_debug (dev, "RQ %u initialised", crq->cq.qid);
+
+ return VNET_DEV_OK;
+}
+
+void
+oct_rxq_deinit (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ vnet_dev_t *dev = rxq->port->dev;
+ int rrv;
+
+ if (crq->rq_initialized)
+ {
+ rrv = roc_nix_rq_fini (&crq->rq);
+ if (rrv)
+ oct_roc_err (dev, rrv, "roc_nix_rq_fini() failed");
+ crq->rq_initialized = 0;
+ }
+
+ if (crq->cq_initialized)
+ {
+ rrv = roc_nix_cq_fini (&crq->cq);
+ if (rrv)
+ oct_roc_err (dev, rrv, "roc_nix_cq_fini() failed");
+ crq->cq_initialized = 0;
+ }
+
+ if (crq->npa_pool_initialized)
+ {
+ rrv = roc_npa_pool_destroy (crq->aura_handle);
+ if (rrv)
+ oct_roc_err (dev, rrv, "roc_npa_pool_destroy() failed");
+ crq->npa_pool_initialized = 0;
+ }
+}
+
+vnet_dev_rv_t
+oct_txq_init (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_t *dev = txq->port->dev;
+ oct_device_t *cd = vnet_dev_get_data (dev);
+ struct roc_nix *nix = cd->nix;
+ struct npa_aura_s aura = {};
+ struct npa_pool_s npapool = { .nat_align = 1 };
+ int rrv;
+ vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, 0);
+
+ if ((rrv = roc_npa_pool_create (
+ &ctq->aura_handle, bp->alloc_size,
+ txq->size * 6 /* worst case - two SG with 3 segs each = 6 */, &aura,
+ &npapool, 0)))
+ {
+ oct_txq_deinit (vm, txq);
+ return oct_roc_err (dev, rrv, "roc_npa_pool_create() failed");
+ }
+
+ ctq->npa_pool_initialized = 1;
+ log_notice (dev, "NPA pool created, aura_handle = 0x%lx", ctq->aura_handle);
+
+ ctq->sq = (struct roc_nix_sq){
+ .nb_desc = txq->size,
+ .qid = txq->queue_id,
+ .max_sqe_sz = NIX_MAXSQESZ_W16,
+ };
+
+ if ((rrv = roc_nix_sq_init (nix, &ctq->sq)))
+ {
+ oct_txq_deinit (vm, txq);
+ return oct_roc_err (
+ dev, rrv,
+ "roc_nix_sq_init(qid = %u, nb_desc = %u, max_sqe_sz = %u) failed",
+ ctq->sq.nb_desc, ctq->sq.max_sqe_sz);
+ }
+
+ ctq->sq_initialized = 1;
+ log_debug (dev, "SQ initialised, qid %u, nb_desc %u, max_sqe_sz %u",
+ ctq->sq.qid, ctq->sq.nb_desc, ctq->sq.max_sqe_sz);
+
+ ctq->hdr_off = vm->buffer_main->ext_hdr_size;
+
+ if (ctq->sq.lmt_addr == 0)
+ ctq->sq.lmt_addr = (void *) nix->lmt_base;
+ ctq->io_addr = ctq->sq.io_addr & ~0x7fULL;
+ ctq->lmt_addr = ctq->sq.lmt_addr;
+
+ return VNET_DEV_OK;
+}
+
+void
+oct_txq_deinit (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ vnet_dev_t *dev = txq->port->dev;
+ int rrv;
+
+ if (ctq->sq_initialized)
+ {
+ rrv = roc_nix_sq_fini (&ctq->sq);
+ if (rrv)
+ oct_roc_err (dev, rrv, "roc_nix_sq_fini() failed");
+ ctq->sq_initialized = 0;
+ }
+
+ if (ctq->npa_pool_initialized)
+ {
+ rrv = roc_npa_pool_destroy (ctq->aura_handle);
+ if (rrv)
+ oct_roc_err (dev, rrv, "roc_npa_pool_destroy() failed");
+ ctq->npa_pool_initialized = 0;
+ }
+}
+
+u8 *
+format_oct_rxq_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t *a = va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_rx_queue_t *rxq = va_arg (*args, vnet_dev_rx_queue_t *);
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ u32 indent = format_get_indent (s);
+
+ if (a->debug)
+ {
+ s = format (s, "n_enq %u cq_nb_desc %u", crq->n_enq, crq->cq.nb_desc);
+ s = format (s, "\n%Uaura: id 0x%x count %u limit %u avail %u",
+ format_white_space, indent,
+ roc_npa_aura_handle_to_aura (crq->aura_handle),
+ roc_npa_aura_op_cnt_get (crq->aura_handle),
+ roc_npa_aura_op_limit_get (crq->aura_handle),
+ roc_npa_aura_op_available (crq->aura_handle));
+ }
+ return s;
+}
+
+u8 *
+format_oct_txq_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t *a = va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_tx_queue_t *txq = va_arg (*args, vnet_dev_tx_queue_t *);
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ u32 indent = format_get_indent (s);
+
+ if (a->debug)
+ {
+ s = format (s, "n_enq %u sq_nb_desc %u io_addr %p lmt_addr %p",
+ ctq->n_enq, ctq->sq.nb_desc, ctq->io_addr, ctq->lmt_addr);
+ s = format (s, "\n%Uaura: id 0x%x count %u limit %u avail %u",
+ format_white_space, indent,
+ roc_npa_aura_handle_to_aura (ctq->aura_handle),
+ roc_npa_aura_op_cnt_get (ctq->aura_handle),
+ roc_npa_aura_op_limit_get (ctq->aura_handle),
+ roc_npa_aura_op_available (ctq->aura_handle));
+ }
+
+ return s;
+}
diff --git a/src/plugins/dev_octeon/roc_helper.c b/src/plugins/dev_octeon/roc_helper.c
new file mode 100644
index 00000000000..f10c2cb578b
--- /dev/null
+++ b/src/plugins/dev_octeon/roc_helper.c
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2023 Marvell.
+ * SPDX-License-Identifier: Apache-2.0
+ * https://spdx.org/licenses/Apache-2.0.html
+ */
+
+#include <vnet/vnet.h>
+#include <vlib/pci/pci.h>
+#include <vlib/linux/vfio.h>
+#include <base/roc_api.h>
+#include <common.h>
+
+static oct_plt_memzone_list_t memzone_list;
+
+static inline void
+oct_plt_log (oct_plt_log_level_t level, oct_plt_log_class_t cls, char *fmt,
+ ...)
+{
+ vlib_log ((vlib_log_level_t) level, cls, fmt);
+}
+
+static inline void
+oct_plt_spinlock_init (oct_plt_spinlock_t *p)
+{
+ clib_spinlock_init ((clib_spinlock_t *) p);
+}
+
+static void
+oct_plt_spinlock_lock (oct_plt_spinlock_t *p)
+{
+ clib_spinlock_lock ((clib_spinlock_t *) p);
+}
+
+static void
+oct_plt_spinlock_unlock (oct_plt_spinlock_t *p)
+{
+ clib_spinlock_unlock ((clib_spinlock_t *) p);
+}
+
+static int
+oct_plt_spinlock_trylock (oct_plt_spinlock_t *p)
+{
+ return clib_spinlock_trylock ((clib_spinlock_t *) p);
+}
+
+static u64
+oct_plt_get_thread_index (void)
+{
+ return __os_thread_index;
+}
+
+static void
+oct_drv_physmem_free (vlib_main_t *vm, void *mem)
+{
+ if (!mem)
+ {
+ clib_warning ("Invalid address %p", mem);
+ return;
+ }
+
+ vlib_physmem_free (vm, mem);
+}
+
+static void *
+oct_drv_physmem_alloc (vlib_main_t *vm, u32 size, u32 align)
+{
+ clib_error_t *error = NULL;
+ uword *mem = NULL;
+
+ if (align)
+ {
+ /* Force cache line alloc in case alignment is less than cache line */
+ align = align < CLIB_CACHE_LINE_BYTES ? CLIB_CACHE_LINE_BYTES : align;
+ mem = vlib_physmem_alloc_aligned_on_numa (vm, size, align, 0);
+ }
+ else
+ mem =
+ vlib_physmem_alloc_aligned_on_numa (vm, size, CLIB_CACHE_LINE_BYTES, 0);
+ if (!mem)
+ return NULL;
+
+ error = vfio_map_physmem_page (vm, mem);
+ if (error)
+ goto report_error;
+
+ clib_memset (mem, 0, size);
+ return mem;
+
+report_error:
+ clib_error_report (error);
+ oct_drv_physmem_free (vm, mem);
+
+ return NULL;
+}
+
+static void
+oct_plt_free (void *addr)
+{
+ vlib_main_t *vm = vlib_get_main ();
+
+ oct_drv_physmem_free ((void *) vm, addr);
+}
+
+static void *
+oct_plt_zmalloc (u32 size, u32 align)
+{
+ vlib_main_t *vm = vlib_get_main ();
+
+ return oct_drv_physmem_alloc (vm, size, align);
+}
+
+static oct_plt_memzone_t *
+memzone_get (u32 index)
+{
+ if (index == ((u32) ~0))
+ return 0;
+
+ return pool_elt_at_index (memzone_list.mem_pool, index);
+}
+
+static int
+oct_plt_memzone_free (const oct_plt_memzone_t *name)
+{
+ uword *p;
+ p = hash_get_mem (memzone_list.memzone_by_name, name);
+
+ if (p[0] == ((u32) ~0))
+ return -EINVAL;
+
+ hash_unset_mem (memzone_list.memzone_by_name, name);
+
+ pool_put_index (memzone_list.mem_pool, p[0]);
+
+ return 0;
+}
+
+static oct_plt_memzone_t *
+oct_plt_memzone_lookup (const char *name)
+{
+ uword *p;
+ p = hash_get_mem (memzone_list.memzone_by_name, name);
+ if (p)
+ return memzone_get (p[0]);
+
+ return 0;
+}
+
+static oct_plt_memzone_t *
+oct_plt_memzone_reserve_aligned (const char *name, u64 len, u8 socket,
+ u32 flags, u32 align)
+{
+ oct_plt_memzone_t *mem_pool;
+ void *p = NULL;
+
+ pool_get_zero (memzone_list.mem_pool, mem_pool);
+
+ p = oct_plt_zmalloc (len, align);
+ if (!p)
+ return NULL;
+
+ mem_pool->addr = p;
+ mem_pool->index = mem_pool - memzone_list.mem_pool;
+ hash_set_mem (memzone_list.memzone_by_name, name, mem_pool->index);
+
+ return mem_pool;
+}
+
+oct_plt_init_param_t oct_plt_init_param = {
+ .oct_plt_log_reg_class = vlib_log_register_class,
+ .oct_plt_log = oct_plt_log,
+ .oct_plt_free = oct_plt_free,
+ .oct_plt_zmalloc = oct_plt_zmalloc,
+ .oct_plt_memzone_free = oct_plt_memzone_free,
+ .oct_plt_memzone_lookup = oct_plt_memzone_lookup,
+ .oct_plt_memzone_reserve_aligned = oct_plt_memzone_reserve_aligned,
+ .oct_plt_spinlock_init = oct_plt_spinlock_init,
+ .oct_plt_spinlock_lock = oct_plt_spinlock_lock,
+ .oct_plt_spinlock_unlock = oct_plt_spinlock_unlock,
+ .oct_plt_spinlock_trylock = oct_plt_spinlock_trylock,
+ .oct_plt_get_thread_index = oct_plt_get_thread_index,
+};
diff --git a/src/plugins/dev_octeon/rx_node.c b/src/plugins/dev_octeon/rx_node.c
new file mode 100644
index 00000000000..997f1356199
--- /dev/null
+++ b/src/plugins/dev_octeon/rx_node.c
@@ -0,0 +1,392 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/dev/dev.h>
+#include <vnet/ethernet/ethernet.h>
+#include <dev_octeon/octeon.h>
+#include <dev_octeon/hw_defs.h>
+
+typedef struct
+{
+ u32 next_index;
+ u32 sw_if_index;
+ u32 hw_if_index;
+ u32 trace_count;
+ u32 n_traced;
+ oct_nix_rx_cqe_desc_t *next_desc;
+ u64 parse_w0_or;
+ u32 n_left_to_next;
+ u32 *to_next;
+ u32 n_rx_pkts;
+ u32 n_rx_bytes;
+ u32 n_segs;
+} oct_rx_node_ctx_t;
+
+static_always_inline vlib_buffer_t *
+oct_seg_to_bp (void *p)
+{
+ return (vlib_buffer_t *) p - 1;
+}
+
+static_always_inline void
+oct_rx_attach_tail (vlib_main_t *vm, oct_rx_node_ctx_t *ctx, vlib_buffer_t *h,
+ oct_nix_rx_cqe_desc_t *d)
+{
+ u32 tail_sz = 0, n_tail_segs = 0;
+ vlib_buffer_t *p, *b;
+ u8 segs0 = d->sg0.segs, segs1 = 0;
+
+ if (segs0 < 2)
+ return;
+
+ b = oct_seg_to_bp (d->segs0[1]);
+ h->next_buffer = vlib_get_buffer_index (vm, b);
+ tail_sz += b->current_length = d->sg0.seg2_size;
+ n_tail_segs++;
+
+ if (segs0 == 2)
+ goto done;
+
+ p = b;
+ p->flags = VLIB_BUFFER_NEXT_PRESENT;
+ b = oct_seg_to_bp (d->segs0[2]);
+ p->next_buffer = vlib_get_buffer_index (vm, b);
+ tail_sz += b->current_length = d->sg0.seg3_size;
+ n_tail_segs++;
+
+ if (d->sg1.subdc != NIX_SUBDC_SG)
+ goto done;
+
+ segs1 = d->sg1.segs;
+ if (segs1 == 0)
+ goto done;
+
+ p = b;
+ p->flags = VLIB_BUFFER_NEXT_PRESENT;
+ b = oct_seg_to_bp (d->segs1[0]);
+ p->next_buffer = vlib_get_buffer_index (vm, b);
+ tail_sz += b->current_length = d->sg1.seg1_size;
+ n_tail_segs++;
+
+ if (segs1 == 1)
+ goto done;
+
+ p = b;
+ p->flags = VLIB_BUFFER_NEXT_PRESENT;
+ b = oct_seg_to_bp (d->segs1[1]);
+ p->next_buffer = vlib_get_buffer_index (vm, b);
+ tail_sz += b->current_length = d->sg1.seg2_size;
+ n_tail_segs++;
+
+ if (segs1 == 2)
+ goto done;
+
+ p = b;
+ p->flags = VLIB_BUFFER_NEXT_PRESENT;
+ b = oct_seg_to_bp (d->segs1[2]);
+ p->next_buffer = vlib_get_buffer_index (vm, b);
+ tail_sz += b->current_length = d->sg1.seg3_size;
+ n_tail_segs++;
+
+done:
+ b->flags = 0;
+ h->total_length_not_including_first_buffer = tail_sz;
+ h->flags |= VLIB_BUFFER_NEXT_PRESENT | VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ ctx->n_rx_bytes += tail_sz;
+ ctx->n_segs += n_tail_segs;
+}
+
+static_always_inline u32
+oct_rx_batch (vlib_main_t *vm, oct_rx_node_ctx_t *ctx,
+ vnet_dev_rx_queue_t *rxq, u32 n)
+{
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ vlib_buffer_template_t bt = rxq->buffer_template;
+ u32 n_left;
+ oct_nix_rx_cqe_desc_t *d = ctx->next_desc;
+ vlib_buffer_t *b[4];
+
+ for (n_left = n; n_left >= 8; d += 4, n_left -= 4, ctx->to_next += 4)
+ {
+ u32 segs = 0;
+ clib_prefetch_store (oct_seg_to_bp (d[4].segs0[0]));
+ clib_prefetch_store (oct_seg_to_bp (d[5].segs0[0]));
+ b[0] = oct_seg_to_bp (d[0].segs0[0]);
+ clib_prefetch_store (oct_seg_to_bp (d[6].segs0[0]));
+ b[1] = oct_seg_to_bp (d[1].segs0[0]);
+ clib_prefetch_store (oct_seg_to_bp (d[7].segs0[0]));
+ b[2] = oct_seg_to_bp (d[2].segs0[0]);
+ b[3] = oct_seg_to_bp (d[3].segs0[0]);
+ ctx->to_next[0] = vlib_get_buffer_index (vm, b[0]);
+ ctx->to_next[1] = vlib_get_buffer_index (vm, b[1]);
+ ctx->to_next[2] = vlib_get_buffer_index (vm, b[2]);
+ ctx->to_next[3] = vlib_get_buffer_index (vm, b[3]);
+ b[0]->template = bt;
+ b[1]->template = bt;
+ b[2]->template = bt;
+ b[3]->template = bt;
+ ctx->n_rx_bytes += b[0]->current_length = d[0].sg0.seg1_size;
+ ctx->n_rx_bytes += b[1]->current_length = d[1].sg0.seg1_size;
+ ctx->n_rx_bytes += b[2]->current_length = d[2].sg0.seg1_size;
+ ctx->n_rx_bytes += b[3]->current_length = d[3].sg0.seg1_size;
+ b[0]->flow_id = d[0].parse.w[3] >> 48;
+ b[1]->flow_id = d[1].parse.w[3] >> 48;
+ b[2]->flow_id = d[2].parse.w[3] >> 48;
+ b[3]->flow_id = d[3].parse.w[3] >> 48;
+ ctx->n_segs += 4;
+ segs = d[0].sg0.segs + d[1].sg0.segs + d[2].sg0.segs + d[3].sg0.segs;
+
+ if (PREDICT_FALSE (segs > 4))
+ {
+ oct_rx_attach_tail (vm, ctx, b[0], d + 0);
+ oct_rx_attach_tail (vm, ctx, b[1], d + 1);
+ oct_rx_attach_tail (vm, ctx, b[2], d + 2);
+ oct_rx_attach_tail (vm, ctx, b[3], d + 3);
+ }
+ }
+
+ for (; n_left; d += 1, n_left -= 1, ctx->to_next += 1)
+ {
+ b[0] = (vlib_buffer_t *) d->segs0[0] - 1;
+ ctx->to_next[0] = vlib_get_buffer_index (vm, b[0]);
+ b[0]->template = bt;
+ ctx->n_rx_bytes += b[0]->current_length = d[0].sg0.seg1_size;
+ b[0]->flow_id = d[0].parse.w[3] >> 48;
+ ctx->n_segs += 1;
+ if (d[0].sg0.segs > 1)
+ oct_rx_attach_tail (vm, ctx, b[0], d + 0);
+ }
+
+ plt_write64 ((crq->cq.wdata | n), crq->cq.door);
+ ctx->n_rx_pkts += n;
+ ctx->n_left_to_next -= n;
+ return n;
+}
+
+static_always_inline void
+oct_rxq_refill_batch (vlib_main_t *vm, u64 lmt_id, u64 addr,
+ oct_npa_lf_aura_batch_free_line_t *lines, u32 *bi,
+ oct_npa_lf_aura_batch_free0_t w0, u64 n_lines)
+{
+ u64 data;
+
+ for (u32 i = 0; i < n_lines; i++, bi += 15)
+ {
+ lines[i].w0 = w0;
+ vlib_get_buffers (vm, bi, (vlib_buffer_t **) lines[i].data, 15);
+ }
+
+ data = lmt_id | ((n_lines - 1) << 12) | ((1ULL << (n_lines * 3)) - 1) << 19;
+ roc_lmt_submit_steorl (data, addr);
+
+ /* Data Store Memory Barrier - outer shareable domain */
+ asm volatile("dmb oshst" ::: "memory");
+}
+
+static_always_inline u32
+oct_rxq_refill (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq, u16 n_refill)
+{
+ const u32 batch_max_lines = 16;
+ const u32 bufs_per_line = 15;
+ const u32 batch_max_bufs = 15 * 16;
+
+ u32 batch_bufs, n_lines, n_alloc;
+ u32 buffer_indices[batch_max_bufs];
+ u64 lmt_addr, lmt_id, addr, n_enq = 0;
+ u8 bpi = vnet_dev_get_rx_queue_buffer_pool_index (rxq);
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ oct_npa_lf_aura_batch_free_line_t *lines;
+
+ if (n_refill < bufs_per_line)
+ return 0;
+
+ n_lines = n_refill / bufs_per_line;
+
+ addr = crq->aura_batch_free_ioaddr;
+ lmt_addr = crq->lmt_base_addr;
+ lmt_id = vm->thread_index << ROC_LMT_LINES_PER_CORE_LOG2;
+ lmt_addr += lmt_id << ROC_LMT_LINE_SIZE_LOG2;
+ lines = (oct_npa_lf_aura_batch_free_line_t *) lmt_addr;
+
+ oct_npa_lf_aura_batch_free0_t w0 = {
+ .aura = roc_npa_aura_handle_to_aura (crq->aura_handle),
+ .count_eot = 1,
+ };
+
+ while (n_lines >= batch_max_lines)
+ {
+ n_alloc =
+ vlib_buffer_alloc_from_pool (vm, buffer_indices, batch_max_bufs, bpi);
+ if (PREDICT_FALSE (n_alloc < batch_max_bufs))
+ goto alloc_fail;
+ oct_rxq_refill_batch (vm, lmt_id, addr, lines, buffer_indices, w0,
+ batch_max_lines);
+ n_lines -= batch_max_lines;
+ n_enq += batch_max_bufs;
+ }
+
+ if (n_lines == 0)
+ return n_enq;
+
+ batch_bufs = n_lines * bufs_per_line;
+ n_alloc = vlib_buffer_alloc_from_pool (vm, buffer_indices, batch_bufs, bpi);
+
+ if (PREDICT_FALSE (n_alloc < batch_bufs))
+ {
+ alloc_fail:
+ if (n_alloc >= bufs_per_line)
+ {
+ u32 n_unalloc;
+ n_lines = n_alloc / bufs_per_line;
+ batch_bufs = n_lines * bufs_per_line;
+ n_unalloc = n_alloc - batch_bufs;
+
+ if (n_unalloc)
+ vlib_buffer_unalloc_to_pool (vm, buffer_indices + batch_bufs,
+ n_unalloc, bpi);
+ }
+ else
+ {
+ if (n_alloc)
+ vlib_buffer_unalloc_to_pool (vm, buffer_indices, n_alloc, bpi);
+ return n_enq;
+ }
+ }
+
+ oct_rxq_refill_batch (vm, lmt_id, addr, lines, buffer_indices, w0, n_lines);
+ n_enq += batch_bufs;
+
+ return n_enq;
+}
+
+static_always_inline void
+oct_rx_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
+ oct_rx_node_ctx_t *ctx, oct_nix_rx_cqe_desc_t *d, u32 n_desc)
+{
+ u32 i = 0;
+ if (PREDICT_TRUE (ctx->trace_count == 0))
+ return;
+
+ while (ctx->n_traced < ctx->trace_count && i < n_desc)
+ {
+ vlib_buffer_t *b = (vlib_buffer_t *) d[i].segs0[0] - 1;
+
+ if (PREDICT_TRUE (vlib_trace_buffer (vm, node, ctx->next_index, b,
+ /* follow_chain */ 0)))
+ {
+ oct_rx_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr));
+ tr->next_index = ctx->next_index;
+ tr->sw_if_index = ctx->sw_if_index;
+ tr->desc = d[i];
+ ctx->n_traced++;
+ }
+ i++;
+ }
+}
+
+static_always_inline uword
+oct_rx_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, vnet_dev_port_t *port,
+ vnet_dev_rx_queue_t *rxq, int with_flows)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 thr_idx = vlib_get_thread_index ();
+ oct_rxq_t *crq = vnet_dev_get_rx_queue_data (rxq);
+ u32 n_desc, head, n, n_enq;
+ u32 cq_size = crq->cq.nb_desc;
+ u32 cq_mask = crq->cq.qmask;
+ oct_nix_rx_cqe_desc_t *descs = crq->cq.desc_base;
+ oct_nix_lf_cq_op_status_t status;
+ oct_rx_node_ctx_t _ctx = {
+ .next_index = rxq->next_index,
+ .sw_if_index = port->intf.sw_if_index,
+ .hw_if_index = port->intf.hw_if_index,
+ }, *ctx = &_ctx;
+
+ /* get head and tail from NIX_LF_CQ_OP_STATUS */
+ status.as_u64 = roc_atomic64_add_sync (crq->cq.wdata, crq->cq.status);
+ if (status.cq_err || status.op_err)
+ return 0;
+
+ head = status.head;
+ n_desc = (status.tail - head) & cq_mask;
+
+ if (n_desc == 0)
+ goto refill;
+
+ vlib_get_new_next_frame (vm, node, ctx->next_index, ctx->to_next,
+ ctx->n_left_to_next);
+
+ ctx->trace_count = vlib_get_trace_count (vm, node);
+
+ while (1)
+ {
+ ctx->next_desc = descs + head;
+ n = clib_min (cq_size - head, clib_min (n_desc, ctx->n_left_to_next));
+ n = oct_rx_batch (vm, ctx, rxq, n);
+ oct_rx_trace (vm, node, ctx, descs + head, n);
+
+ if (ctx->n_left_to_next == 0)
+ break;
+
+ status.as_u64 = roc_atomic64_add_sync (crq->cq.wdata, crq->cq.status);
+ if (status.cq_err || status.op_err)
+ break;
+
+ head = status.head;
+ n_desc = (status.tail - head) & cq_mask;
+ if (n_desc == 0)
+ break;
+ }
+
+ if (ctx->n_traced)
+ vlib_set_trace_count (vm, node, ctx->trace_count - ctx->n_traced);
+
+ if (PREDICT_TRUE (ctx->next_index == VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT))
+ {
+ vlib_next_frame_t *nf;
+ vlib_frame_t *f;
+ ethernet_input_frame_t *ef;
+ oct_nix_rx_parse_t p = { .w[0] = ctx->parse_w0_or };
+ nf = vlib_node_runtime_get_next_frame (vm, node, ctx->next_index);
+ f = vlib_get_frame (vm, nf->frame);
+ f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+ ef = vlib_frame_scalar_args (f);
+ ef->sw_if_index = ctx->sw_if_index;
+ ef->hw_if_index = ctx->hw_if_index;
+
+ if (p.f.errcode == 0 && p.f.errlev == 0)
+ f->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK;
+
+ vlib_frame_no_append (f);
+ }
+
+ vlib_put_next_frame (vm, node, ctx->next_index, ctx->n_left_to_next);
+
+ vlib_increment_combined_counter (
+ vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thr_idx, ctx->hw_if_index, ctx->n_rx_pkts, ctx->n_rx_bytes);
+
+refill:
+ n_enq = crq->n_enq - ctx->n_segs;
+ n_enq += oct_rxq_refill (vm, rxq, rxq->size - n_enq);
+ crq->n_enq = n_enq;
+
+ return ctx->n_rx_pkts;
+}
+
+VNET_DEV_NODE_FN (oct_rx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ u32 n_rx = 0;
+ foreach_vnet_dev_rx_queue_runtime (rxq, node)
+ {
+ vnet_dev_port_t *port = rxq->port;
+ n_rx += oct_rx_node_inline (vm, node, frame, port, rxq, 0);
+ }
+
+ return n_rx;
+}
diff --git a/src/plugins/dev_octeon/tx_node.c b/src/plugins/dev_octeon/tx_node.c
new file mode 100644
index 00000000000..0dbf8759d35
--- /dev/null
+++ b/src/plugins/dev_octeon/tx_node.c
@@ -0,0 +1,435 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/ring.h>
+#include <vppinfra/vector/ip_csum.h>
+
+#include <vnet/dev/dev.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/udp/udp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
+
+#include <dev_octeon/octeon.h>
+
+typedef struct
+{
+ union nix_send_hdr_w0_u hdr_w0_teplate;
+ vlib_node_runtime_t *node;
+ u32 n_tx_bytes;
+ u32 n_drop;
+ vlib_buffer_t *drop[VLIB_FRAME_SIZE];
+ u32 batch_alloc_not_ready;
+ u32 batch_alloc_issue_fail;
+ u16 lmt_id;
+ u64 lmt_ioaddr;
+ lmt_line_t *lmt_lines;
+} oct_tx_ctx_t;
+
+static_always_inline u32
+oct_batch_free (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq)
+{
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ u8 num_cl;
+ u64 ah;
+ u32 n_freed = 0, n;
+ oct_npa_batch_alloc_cl128_t *cl;
+
+ num_cl = ctq->ba_num_cl;
+ if (num_cl)
+ {
+ u16 off = ctq->hdr_off;
+ u32 *bi = (u32 *) ctq->ba_buffer;
+
+ for (cl = ctq->ba_buffer + ctq->ba_first_cl; num_cl > 0; num_cl--, cl++)
+ {
+ oct_npa_batch_alloc_status_t st;
+
+ if ((st.as_u64 = __atomic_load_n (cl->iova, __ATOMIC_RELAXED)) ==
+ OCT_BATCH_ALLOC_IOVA0_MASK + ALLOC_CCODE_INVAL)
+ {
+ cl_not_ready:
+ ctx->batch_alloc_not_ready++;
+ n_freed = bi - (u32 *) ctq->ba_buffer;
+ if (n_freed > 0)
+ {
+ vlib_buffer_free_no_next (vm, (u32 *) ctq->ba_buffer,
+ n_freed);
+ ctq->ba_num_cl = num_cl;
+ ctq->ba_first_cl = cl - ctq->ba_buffer;
+ return n_freed;
+ }
+
+ return 0;
+ }
+
+ if (st.status.count > 8 &&
+ __atomic_load_n (cl->iova + 8, __ATOMIC_RELAXED) ==
+ OCT_BATCH_ALLOC_IOVA0_MASK)
+ goto cl_not_ready;
+
+#if (CLIB_DEBUG > 0)
+ cl->iova[0] &= OCT_BATCH_ALLOC_IOVA0_MASK;
+#endif
+ if (PREDICT_TRUE (st.status.count == 16))
+ {
+ /* optimize for likely case where cacheline is full */
+ vlib_get_buffer_indices_with_offset (vm, (void **) cl, bi, 16,
+ off);
+ bi += 16;
+ }
+ else
+ {
+ vlib_get_buffer_indices_with_offset (vm, (void **) cl, bi,
+ st.status.count, off);
+ bi += st.status.count;
+ }
+ }
+
+ n_freed = bi - (u32 *) ctq->ba_buffer;
+ if (n_freed > 0)
+ vlib_buffer_free_no_next (vm, (u32 *) ctq->ba_buffer, n_freed);
+
+ /* clear status bits in each cacheline */
+ n = cl - ctq->ba_buffer;
+ for (u32 i = 0; i < n; i++)
+ ctq->ba_buffer[i].iova[0] = ctq->ba_buffer[i].iova[8] =
+ OCT_BATCH_ALLOC_IOVA0_MASK;
+
+ ctq->ba_num_cl = ctq->ba_first_cl = 0;
+ }
+
+ ah = ctq->aura_handle;
+
+ if ((n = roc_npa_aura_op_available (ah)) >= 32)
+ {
+ u64 addr, res;
+
+ n = clib_min (n, ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS);
+
+ oct_npa_batch_alloc_compare_t cmp = {
+ .compare_s = { .aura = roc_npa_aura_handle_to_aura (ah),
+ .stype = ALLOC_STYPE_STF,
+ .count = n }
+ };
+
+ addr = roc_npa_aura_handle_to_base (ah) + NPA_LF_AURA_BATCH_ALLOC;
+ res = roc_atomic64_casl (cmp.as_u64, (uint64_t) ctq->ba_buffer,
+ (i64 *) addr);
+ if (res == ALLOC_RESULT_ACCEPTED || res == ALLOC_RESULT_NOCORE)
+ {
+ ctq->ba_num_cl = (n + 15) / 16;
+ ctq->ba_first_cl = 0;
+ }
+ else
+ ctx->batch_alloc_issue_fail++;
+ }
+
+ return n_freed;
+}
+
+static_always_inline u8
+oct_tx_enq1 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vlib_buffer_t *b,
+ lmt_line_t *line, u32 flags, int simple, int trace)
+{
+ u8 n_dwords = 2;
+ u32 total_len = 0;
+ oct_tx_desc_t d = {
+ .hdr_w0 = ctx->hdr_w0_teplate,
+ .sg[0] = {
+ .segs = 1,
+ .subdc = NIX_SUBDC_SG,
+ },
+ .sg[4] = {
+ .subdc = NIX_SUBDC_SG,
+ },
+ };
+
+ if (!simple && flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ u8 n_tail_segs = 0;
+ vlib_buffer_t *tail_segs[5], *t = b;
+
+ while (t->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ t = vlib_get_buffer (vm, t->next_buffer);
+ tail_segs[n_tail_segs++] = t;
+ if (n_tail_segs > 5)
+ {
+ ctx->drop[ctx->n_drop++] = t;
+ return 0;
+ }
+ }
+
+ switch (n_tail_segs)
+ {
+ case 5:
+ d.sg[7].u = (u64) vlib_buffer_get_current (tail_segs[4]);
+ total_len += d.sg[4].seg3_size = tail_segs[4]->current_length;
+ d.sg[4].segs++;
+ case 4:
+ d.sg[6].u = (u64) vlib_buffer_get_current (tail_segs[3]);
+ total_len += d.sg[4].seg2_size = tail_segs[3]->current_length;
+ d.sg[4].segs++;
+ n_dwords++;
+ case 3:
+ d.sg[5].u = (u64) vlib_buffer_get_current (tail_segs[2]);
+ total_len += d.sg[4].seg1_size = tail_segs[2]->current_length;
+ d.sg[4].segs++;
+ n_dwords++;
+ case 2:
+ d.sg[3].u = (u64) vlib_buffer_get_current (tail_segs[1]);
+ total_len += d.sg[0].seg3_size = tail_segs[1]->current_length;
+ d.sg[0].segs++;
+ case 1:
+ d.sg[2].u = (u64) vlib_buffer_get_current (tail_segs[0]);
+ total_len += d.sg[0].seg2_size = tail_segs[0]->current_length;
+ d.sg[0].segs++;
+ n_dwords++;
+ default:
+ break;
+ };
+ d.hdr_w0.sizem1 = n_dwords - 1;
+ }
+
+ if (!simple && flags & VNET_BUFFER_F_OFFLOAD)
+ {
+ vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
+ if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
+ {
+ d.hdr_w1.ol3type = NIX_SENDL3TYPE_IP4_CKSUM;
+ d.hdr_w1.ol3ptr = vnet_buffer (b)->l3_hdr_offset;
+ d.hdr_w1.ol4ptr =
+ vnet_buffer (b)->l3_hdr_offset + sizeof (ip4_header_t);
+ }
+ if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
+ {
+ d.hdr_w1.ol4type = NIX_SENDL4TYPE_UDP_CKSUM;
+ d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset;
+ }
+ else if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
+ {
+ d.hdr_w1.ol4type = NIX_SENDL4TYPE_TCP_CKSUM;
+ d.hdr_w1.ol4ptr = vnet_buffer (b)->l4_hdr_offset;
+ }
+ }
+
+ total_len += d.sg[0].seg1_size = b->current_length;
+ d.hdr_w0.total = total_len;
+ d.sg[1].u = (u64) vlib_buffer_get_current (b);
+
+ if (trace && flags & VLIB_BUFFER_IS_TRACED)
+ {
+ oct_tx_trace_t *t = vlib_add_trace (vm, ctx->node, b, sizeof (*t));
+ t->desc = d;
+ t->sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX];
+ }
+
+ for (u32 i = 0; i < n_dwords; i++)
+ line->dwords[i] = d.as_u128[i];
+
+ return n_dwords;
+}
+
+static_always_inline u32
+oct_tx_enq16 (vlib_main_t *vm, oct_tx_ctx_t *ctx, vnet_dev_tx_queue_t *txq,
+ vlib_buffer_t **b, u32 n_pkts, int trace)
+{
+ u8 dwords_per_line[16], *dpl = dwords_per_line;
+ u64 lmt_arg, ioaddr, n_lines;
+ u32 n_left, or_flags_16 = 0;
+ const u32 not_simple_flags =
+ VLIB_BUFFER_NEXT_PRESENT | VNET_BUFFER_F_OFFLOAD;
+ lmt_line_t *l = ctx->lmt_lines;
+
+ /* Data Store Memory Barrier - outer shareable domain */
+ asm volatile("dmb oshst" ::: "memory");
+
+ for (n_left = n_pkts; n_left >= 8; n_left -= 8, b += 8, l += 8)
+ {
+ u32 f0, f1, f2, f3, f4, f5, f6, f7, or_f = 0;
+ vlib_prefetch_buffer_header (b[8], LOAD);
+ or_f |= f0 = b[0]->flags;
+ or_f |= f1 = b[1]->flags;
+ vlib_prefetch_buffer_header (b[9], LOAD);
+ or_f |= f2 = b[2]->flags;
+ or_f |= f3 = b[3]->flags;
+ vlib_prefetch_buffer_header (b[10], LOAD);
+ or_f |= f4 = b[4]->flags;
+ or_f |= f5 = b[5]->flags;
+ vlib_prefetch_buffer_header (b[11], LOAD);
+ or_f |= f6 = b[6]->flags;
+ or_f |= f7 = b[7]->flags;
+ vlib_prefetch_buffer_header (b[12], LOAD);
+ or_flags_16 |= or_f;
+
+ if ((or_f & not_simple_flags) == 0)
+ {
+ int simple = 1;
+ oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[1], l + 1, f1, simple, trace);
+ vlib_prefetch_buffer_header (b[13], LOAD);
+ oct_tx_enq1 (vm, ctx, b[2], l + 2, f2, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[3], l + 3, f3, simple, trace);
+ vlib_prefetch_buffer_header (b[14], LOAD);
+ oct_tx_enq1 (vm, ctx, b[4], l + 4, f4, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[5], l + 5, f5, simple, trace);
+ vlib_prefetch_buffer_header (b[15], LOAD);
+ oct_tx_enq1 (vm, ctx, b[6], l + 6, f6, simple, trace);
+ oct_tx_enq1 (vm, ctx, b[7], l + 7, f7, simple, trace);
+ dpl[0] = dpl[1] = dpl[2] = dpl[3] = 2;
+ dpl[4] = dpl[5] = dpl[6] = dpl[7] = 2;
+ }
+ else
+ {
+ int simple = 0;
+ dpl[0] = oct_tx_enq1 (vm, ctx, b[0], l, f0, simple, trace);
+ dpl[1] = oct_tx_enq1 (vm, ctx, b[1], l + 1, f1, simple, trace);
+ vlib_prefetch_buffer_header (b[13], LOAD);
+ dpl[2] = oct_tx_enq1 (vm, ctx, b[2], l + 2, f2, simple, trace);
+ dpl[3] = oct_tx_enq1 (vm, ctx, b[3], l + 3, f3, simple, trace);
+ vlib_prefetch_buffer_header (b[14], LOAD);
+ dpl[4] = oct_tx_enq1 (vm, ctx, b[4], l + 4, f4, simple, trace);
+ dpl[5] = oct_tx_enq1 (vm, ctx, b[5], l + 5, f5, simple, trace);
+ vlib_prefetch_buffer_header (b[15], LOAD);
+ dpl[6] = oct_tx_enq1 (vm, ctx, b[6], l + 6, f6, simple, trace);
+ dpl[7] = oct_tx_enq1 (vm, ctx, b[7], l + 7, f7, simple, trace);
+ }
+ dpl += 8;
+ }
+
+ for (; n_left > 0; n_left -= 1, b += 1, l += 1)
+ {
+ u32 f0 = b[0]->flags;
+ dpl++[0] = oct_tx_enq1 (vm, ctx, b[0], l, f0, 0, trace);
+ or_flags_16 |= f0;
+ }
+
+ lmt_arg = ctx->lmt_id;
+ ioaddr = ctx->lmt_ioaddr;
+ n_lines = n_pkts;
+
+ if (PREDICT_FALSE (or_flags_16 & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ dpl = dwords_per_line;
+ ioaddr |= (dpl[0] - 1) << 4;
+
+ if (n_lines > 1)
+ {
+ lmt_arg |= (--n_lines) << 12;
+
+ for (u8 bit_off = 19; n_lines; n_lines--, bit_off += 3, dpl++)
+ lmt_arg |= ((u64) dpl[1] - 1) << bit_off;
+ }
+ }
+ else
+ {
+ const u64 n_dwords = 2;
+ ioaddr |= (n_dwords - 1) << 4;
+
+ if (n_lines > 1)
+ {
+ lmt_arg |= (--n_lines) << 12;
+
+ for (u8 bit_off = 19; n_lines; n_lines--, bit_off += 3)
+ lmt_arg |= (n_dwords - 1) << bit_off;
+ }
+ }
+
+ roc_lmt_submit_steorl (lmt_arg, ioaddr);
+
+ return n_pkts;
+}
+
+VNET_DEV_NODE_FN (oct_tx_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ vnet_dev_tx_node_runtime_t *rt = vnet_dev_get_tx_node_runtime (node);
+ vnet_dev_tx_queue_t *txq = rt->tx_queue;
+ oct_txq_t *ctq = vnet_dev_get_tx_queue_data (txq);
+ u32 node_index = node->node_index;
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n, n_enq, n_left, n_pkts = frame->n_vectors;
+ vlib_buffer_t *buffers[VLIB_FRAME_SIZE + 8], **b = buffers;
+ u64 lmt_id = vm->thread_index << ROC_LMT_LINES_PER_CORE_LOG2;
+
+ oct_tx_ctx_t ctx = {
+ .node = node,
+ .hdr_w0_teplate = {
+ .aura = roc_npa_aura_handle_to_aura (ctq->aura_handle),
+ .sq = ctq->sq.qid,
+ .sizem1 = 1,
+ },
+ .lmt_id = lmt_id,
+ .lmt_ioaddr = ctq->io_addr,
+ .lmt_lines = ctq->lmt_addr + (lmt_id << ROC_LMT_LINE_SIZE_LOG2),
+ };
+
+ vlib_get_buffers (vm, vlib_frame_vector_args (frame), b, n_pkts);
+ for (int i = 0; i < 8; i++)
+ b[n_pkts + i] = b[n_pkts - 1];
+
+ vnet_dev_tx_queue_lock_if_needed (txq);
+
+ n_enq = ctq->n_enq;
+ n_enq -= oct_batch_free (vm, &ctx, txq);
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ {
+ for (n_left = clib_min (n_pkts, txq->size - n_enq), n = 0; n_left >= 16;
+ n_left -= 16, b += 16)
+ n += oct_tx_enq16 (vm, &ctx, txq, b, 16, /* trace */ 1);
+
+ if (n_left)
+ n += oct_tx_enq16 (vm, &ctx, txq, b, n_left, /* trace */ 1);
+ }
+ else
+ {
+ for (n_left = clib_min (n_pkts, txq->size - n_enq), n = 0; n_left >= 16;
+ n_left -= 16, b += 16)
+ n += oct_tx_enq16 (vm, &ctx, txq, b, 16, /* trace */ 0);
+
+ if (n_left)
+ n += oct_tx_enq16 (vm, &ctx, txq, b, n_left, /* trace */ 0);
+ }
+
+ ctq->n_enq = n_enq + n;
+
+ if (n < n_pkts)
+ {
+ u32 n_free = n_pkts - n;
+ vlib_buffer_free (vm, from + n, n_free);
+ vlib_error_count (vm, node->node_index, OCT_TX_NODE_CTR_NO_FREE_SLOTS,
+ n_free);
+ n_pkts -= n_free;
+ }
+
+ if (ctx.n_drop)
+ vlib_error_count (vm, node->node_index, OCT_TX_NODE_CTR_CHAIN_TOO_LONG,
+ ctx.n_drop);
+
+ if (ctx.batch_alloc_not_ready)
+ vlib_error_count (vm, node_index,
+ OCT_TX_NODE_CTR_AURA_BATCH_ALLOC_NOT_READY,
+ ctx.batch_alloc_not_ready);
+
+ if (ctx.batch_alloc_issue_fail)
+ vlib_error_count (vm, node_index,
+ OCT_TX_NODE_CTR_AURA_BATCH_ALLOC_ISSUE_FAIL,
+ ctx.batch_alloc_issue_fail);
+
+ vnet_dev_tx_queue_unlock_if_needed (txq);
+
+ if (ctx.n_drop)
+ {
+ u32 bi[VLIB_FRAME_SIZE];
+ vlib_get_buffer_indices (vm, ctx.drop, bi, ctx.n_drop);
+ vlib_buffer_free (vm, bi, ctx.n_drop);
+ n_pkts -= ctx.n_drop;
+ }
+
+ return n_pkts;
+}
diff --git a/src/plugins/dhcp/FEATURE.yaml b/src/plugins/dhcp/FEATURE.yaml
index 469af29cee5..a517cb849de 100644
--- a/src/plugins/dhcp/FEATURE.yaml
+++ b/src/plugins/dhcp/FEATURE.yaml
@@ -6,6 +6,6 @@ features:
- DHCPv6 prefix delegation
- DHCP Proxy / Option 82
-description: "An implemenation of the Dynamic Host Configuration Protocol (DHCP) client"
+description: "An implementation of the Dynamic Host Configuration Protocol (DHCP) client"
state: production
properties: [API, CLI, MULTITHREAD]
diff --git a/src/plugins/dhcp/client.c b/src/plugins/dhcp/client.c
index f93643390e9..8fa67c616b2 100644
--- a/src/plugins/dhcp/client.c
+++ b/src/plugins/dhcp/client.c
@@ -149,7 +149,6 @@ dhcp_client_acquire_address (dhcp_client_main_t * dcm, dhcp_client_t * c)
.ip4 = c->learned.router_address,
};
- /* *INDENT-OFF* */
fib_table_entry_path_add (
fib_table_get_index_for_sw_if_index (
FIB_PROTOCOL_IP4,
@@ -161,7 +160,6 @@ dhcp_client_acquire_address (dhcp_client_main_t * dcm, dhcp_client_t * c)
&nh, c->sw_if_index,
~0, 1, NULL, // no label stack
FIB_ROUTE_PATH_FLAG_NONE);
- /* *INDENT-ON* */
}
}
clib_memcpy (&c->installed, &c->learned, sizeof (c->installed));
@@ -870,7 +868,6 @@ dhcp_client_process (vlib_main_t * vm,
case ~0:
if (pool_elts (dcm->clients))
{
- /* *INDENT-OFF* */
next_expire_time = 1e70;
pool_foreach (c, dcm->clients)
{
@@ -886,7 +883,6 @@ dhcp_client_process (vlib_main_t * vm,
clib_warning ("BUG");
timeout = 1.13;
}
- /* *INDENT-ON* */
}
else
timeout = 1000.0;
@@ -900,7 +896,6 @@ dhcp_client_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcp_client_process_node,static) = {
.function = dhcp_client_process,
.type = VLIB_NODE_TYPE_PROCESS,
@@ -909,7 +904,6 @@ VLIB_REGISTER_NODE (dhcp_client_process_node,static) = {
.n_errors = ARRAY_LEN(dhcp_client_process_stat_strings),
.error_strings = dhcp_client_process_stat_strings,
};
-/* *INDENT-ON* */
static clib_error_t *
show_dhcp_client_command_fn (vlib_main_t * vm,
@@ -943,25 +937,21 @@ show_dhcp_client_command_fn (vlib_main_t * vm,
return 0;
}
- /* *INDENT-OFF* */
pool_foreach (c, dcm->clients)
{
vlib_cli_output (vm, "%U",
format_dhcp_client, dcm,
c, verbose);
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_dhcp_client_command, static) = {
.path = "show dhcp client",
.short_help = "show dhcp client [intfc <intfc>][verbose]",
.function = show_dhcp_client_command_fn,
};
-/* *INDENT-ON* */
int
@@ -1118,13 +1108,11 @@ dhcp_client_walk (dhcp_client_walk_cb_t cb, void *ctx)
dhcp_client_main_t *dcm = &dhcp_client_main;
dhcp_client_t *c;
- /* *INDENT-OFF* */
pool_foreach (c, dcm->clients)
{
if (!cb(c, ctx))
break;
}
- /* *INDENT-ON* */
}
@@ -1229,13 +1217,11 @@ dhcp_client_set_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp_client_set_command, static) = {
.path = "set dhcp client",
.short_help = "set dhcp client [del] intfc <interface> [hostname <name>]",
.function = dhcp_client_set_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
dhcp_client_init (vlib_main_t * vm)
diff --git a/src/plugins/dhcp/dhcp.api b/src/plugins/dhcp/dhcp.api
index 4611d5fadd8..7f559128353 100644
--- a/src/plugins/dhcp/dhcp.api
+++ b/src/plugins/dhcp/dhcp.api
@@ -278,6 +278,15 @@ define dhcp_proxy_details
vl_api_dhcp_server_t servers[count];
};
+autoreply define dhcp_client_detect_enable_disable
+{
+ u32 client_index;
+ u32 context;
+
+ vl_api_interface_index_t sw_if_index;
+ bool enable;
+};
+
/** \brief Set DHCPv6 DUID-LL
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/plugins/dhcp/dhcp4_proxy_node.c b/src/plugins/dhcp/dhcp4_proxy_node.c
index 2ddad25bb11..2b49d49bb7f 100644
--- a/src/plugins/dhcp/dhcp4_proxy_node.c
+++ b/src/plugins/dhcp/dhcp4_proxy_node.c
@@ -463,7 +463,6 @@ dhcp_proxy_to_server_input (vlib_main_t * vm,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcp_proxy_to_server_node, static) = {
.function = dhcp_proxy_to_server_input,
.name = "dhcp-proxy-to-server",
@@ -486,7 +485,6 @@ VLIB_REGISTER_NODE (dhcp_proxy_to_server_node, static) = {
.unformat_buffer = unformat_dhcp_proxy_header,
#endif
};
-/* *INDENT-ON* */
typedef enum
{
@@ -783,7 +781,6 @@ dhcp_proxy_to_client_input (vlib_main_t * vm,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcp_proxy_to_client_node, static) = {
.function = dhcp_proxy_to_client_input,
.name = "dhcp-proxy-to-client",
@@ -803,7 +800,6 @@ VLIB_REGISTER_NODE (dhcp_proxy_to_client_node, static) = {
[DHCP4_PROXY_NEXT_TX] = "interface-output",
},
};
-/* *INDENT-ON* */
void
dhcp_maybe_register_udp_ports (dhcp_port_reg_flags_t ports)
@@ -956,13 +952,11 @@ dhcp4_proxy_set_command_fn (vlib_main_t * vm,
format_unformat_error, input);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp_proxy_set_command, static) = {
.path = "set dhcp proxy",
.short_help = "set dhcp proxy [del] server <ip-addr> src-address <ip-addr> [server-fib-id <n>] [rx-fib-id <n>]",
.function = dhcp4_proxy_set_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_dhcp4_proxy_server (u8 * s, va_list * args)
@@ -980,16 +974,14 @@ format_dhcp4_proxy_server (u8 * s, va_list * args)
rx_fib = ip4_fib_get (proxy->rx_fib_index);
- s = format (s, "%=14u%=16U",
- rx_fib->table_id,
- format_ip46_address, &proxy->dhcp_src_address, IP46_TYPE_ANY);
+ s = format (s, "%=14u%=16U", rx_fib->hash.table_id, format_ip46_address,
+ &proxy->dhcp_src_address, IP46_TYPE_ANY);
vec_foreach (server, proxy->dhcp_servers)
{
server_fib = ip4_fib_get (server->server_fib_index);
- s = format (s, "%u,%U ",
- server_fib->table_id,
- format_ip46_address, &server->dhcp_server, IP46_TYPE_ANY);
+ s = format (s, "%u,%U ", server_fib->hash.table_id, format_ip46_address,
+ &server->dhcp_server, IP46_TYPE_ANY);
}
return s;
}
@@ -1017,13 +1009,11 @@ dhcp4_proxy_show_command_fn (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp_proxy_show_command, static) = {
.path = "show dhcp proxy",
.short_help = "Display dhcp proxy server info",
.function = dhcp4_proxy_show_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
dhcp_option_82_vss_fn (vlib_main_t * vm,
@@ -1068,13 +1058,11 @@ dhcp_option_82_vss_fn (vlib_main_t * vm,
}
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp_proxy_vss_command,static) = {
.path = "set dhcp option-82 vss",
.short_help = "set dhcp option-82 vss [del] table <table id> [oui <n> vpn-id <n> | vpn-ascii-id <text>]",
.function = dhcp_option_82_vss_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
dhcp_vss_show_command_fn (vlib_main_t * vm,
@@ -1085,13 +1073,11 @@ dhcp_vss_show_command_fn (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp_proxy_vss_show_command, static) = {
.path = "show dhcp vss",
.short_help = "show dhcp VSS",
.function = dhcp_vss_show_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
dhcp_option_82_address_show_command_fn (vlib_main_t * vm,
@@ -1134,13 +1120,11 @@ dhcp_option_82_address_show_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp_proxy_address_show_command,static) = {
.path = "show dhcp option-82-address interface",
.short_help = "show dhcp option-82-address interface <interface>",
.function = dhcp_option_82_address_show_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dhcp/dhcp6_client_common_dp.c b/src/plugins/dhcp/dhcp6_client_common_dp.c
index da6f61aa2d5..40c4ba94c24 100644
--- a/src/plugins/dhcp/dhcp6_client_common_dp.c
+++ b/src/plugins/dhcp/dhcp6_client_common_dp.c
@@ -61,14 +61,12 @@ generate_client_duid (void)
vnet_hw_interface_t *hi;
ethernet_interface_t *eth_if = 0;
- /* *INDENT-OFF* */
pool_foreach (hi, im->hw_interfaces)
{
eth_if = ethernet_get_interface (&ethernet_main, hi->hw_if_index);
if (eth_if)
break;
}
- /* *INDENT-ON* */
if (eth_if)
clib_memcpy (client_duid.lla, &eth_if->address, 6);
@@ -425,7 +423,6 @@ dhcpv6_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcpv6_client_node, static) = {
.function = dhcpv6_client_node_fn,
.name = "dhcpv6-client",
@@ -442,7 +439,6 @@ VLIB_REGISTER_NODE (dhcpv6_client_node, static) = {
.format_trace = format_dhcpv6_client_trace,
};
-/* *INDENT-ON* */
void
dhcp6_clients_enable_disable (u8 enable)
diff --git a/src/plugins/dhcp/dhcp6_ia_na_client_cp.c b/src/plugins/dhcp/dhcp6_ia_na_client_cp.c
index 4a1156f6e8a..ddaf92c6e42 100644
--- a/src/plugins/dhcp/dhcp6_ia_na_client_cp.c
+++ b/src/plugins/dhcp/dhcp6_ia_na_client_cp.c
@@ -271,7 +271,6 @@ dhcp6_reply_event_handler (vl_api_dhcp6_reply_event_t * mp)
continue;
u8 address_already_present = 0;
- /* *INDENT-OFF* */
pool_foreach (address_info, rm->address_pool)
{
if (address_info->sw_if_index != sw_if_index)
@@ -284,7 +283,6 @@ dhcp6_reply_event_handler (vl_api_dhcp6_reply_event_t * mp)
goto address_pool_foreach_out;
}
}
- /* *INDENT-ON* */
address_pool_foreach_out:
if (address_already_present)
@@ -344,7 +342,6 @@ create_address_list (u32 sw_if_index)
dhcp6_client_cp_main_t *rm = &dhcp6_client_cp_main;
address_info_t *address_info, *address_list = 0;;
- /* *INDENT-OFF* */
pool_foreach (address_info, rm->address_pool)
{
if (address_info->sw_if_index == sw_if_index)
@@ -354,7 +351,6 @@ create_address_list (u32 sw_if_index)
clib_memcpy (&address_list[pos], address_info, sizeof (*address_info));
}
}
- /* *INDENT-ON* */
return address_list;
}
@@ -393,7 +389,6 @@ dhcp6_client_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
do
{
due_time = current_time + 1e9;
- /* *INDENT-OFF* */
pool_foreach (address_info, rm->address_pool)
{
if (address_info->due_time > current_time)
@@ -423,7 +418,6 @@ dhcp6_client_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
}
}
}
- /* *INDENT-ON* */
for (i = 0; i < vec_len (rm->client_state_by_sw_if_index); i++)
{
client_state_t *cs = &rm->client_state_by_sw_if_index[i];
@@ -473,13 +467,11 @@ dhcp6_client_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcp6_client_cp_process_node) = {
.function = dhcp6_client_cp_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "dhcp6-client-cp-process",
};
-/* *INDENT-ON* */
static void
interrupt_process (void)
@@ -524,7 +516,6 @@ dhcp6_addresses_show_command_function (vlib_main_t * vm,
address_info_t *address_info;
f64 current_time = vlib_time_now (vm);
- /* *INDENT-OFF* */
pool_foreach (address_info, dm->address_pool)
{
vlib_cli_output (vm, "address: %U, "
@@ -534,18 +525,15 @@ dhcp6_addresses_show_command_function (vlib_main_t * vm,
address_info->preferred_lt, address_info->valid_lt,
address_info->due_time - current_time);
}
- /* *INDENT-ON* */
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp6_addresses_show_command, static) = {
.path = "show dhcp6 addresses",
.short_help = "show dhcp6 addresses",
.function = dhcp6_addresses_show_command_function,
};
-/* *INDENT-ON* */
static clib_error_t *
dhcp6_clients_show_command_function (vlib_main_t * vm,
@@ -601,13 +589,11 @@ dhcp6_clients_show_command_function (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp6_clients_show_command, static) = {
.path = "show dhcp6 clients",
.short_help = "show dhcp6 clients",
.function = dhcp6_clients_show_command_function,
};
-/* *INDENT-ON* */
int
dhcp6_client_enable_disable (u32 sw_if_index, u8 enable)
@@ -659,7 +645,6 @@ dhcp6_client_enable_disable (u32 sw_if_index, u8 enable)
disable_process ();
}
- /* *INDENT-OFF* */
pool_foreach (address_info, rm->address_pool)
{
if (address_info->sw_if_index == sw_if_index)
@@ -680,7 +665,6 @@ dhcp6_client_enable_disable (u32 sw_if_index, u8 enable)
pool_put (rm->address_pool, address_info);
}
}
- /* *INDENT-ON* */
}
if (!enable)
@@ -745,13 +729,11 @@ done:
* @cliexcmd{dhcp6 client GigabitEthernet2/0/0 disable}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp6_client_enable_disable_command, static) = {
.path = "dhcp6 client",
.short_help = "dhcp6 client <interface> [disable]",
.function = dhcp6_client_enable_disable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
dhcp_ia_na_client_cp_init (vlib_main_t * vm)
diff --git a/src/plugins/dhcp/dhcp6_ia_na_client_dp.c b/src/plugins/dhcp/dhcp6_ia_na_client_dp.c
index c240beb3eb3..e957f88884a 100644
--- a/src/plugins/dhcp/dhcp6_ia_na_client_dp.c
+++ b/src/plugins/dhcp/dhcp6_ia_na_client_dp.c
@@ -346,13 +346,11 @@ send_dhcp6_client_message_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (send_dhcp6_client_message_process_node, static) = {
.function = send_dhcp6_client_message_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "send-dhcp6-client-message-process",
};
-/* *INDENT-ON* */
void
dhcp6_send_client_message (vlib_main_t * vm, u32 sw_if_index, u8 stop,
diff --git a/src/plugins/dhcp/dhcp6_packet.h b/src/plugins/dhcp/dhcp6_packet.h
index d5467952a64..78a665f926d 100644
--- a/src/plugins/dhcp/dhcp6_packet.h
+++ b/src/plugins/dhcp/dhcp6_packet.h
@@ -119,7 +119,6 @@ typedef struct dhcpv6_hdr_
u8 data[0];
} dhcpv6_header_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct dhcpv6_relay_ctx_ {
dhcpv6_header_t *pkt;
u32 pkt_len;
@@ -130,10 +129,8 @@ typedef CLIB_PACKED (struct dhcpv6_relay_ctx_ {
char ctx_name[32+1];
u8 dhcp_msg_type;
}) dhcpv6_relay_ctx_t;
-/* *INDENT-ON* */
//Structure for DHCPv6 RELAY-FORWARD and DHCPv6 RELAY-REPLY pkts
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct dhcpv6_relay_hdr_ {
u8 msg_type;
u8 hop_count;
@@ -141,7 +138,6 @@ typedef CLIB_PACKED (struct dhcpv6_relay_hdr_ {
ip6_address_t peer_addr;
u8 data[0];
}) dhcpv6_relay_hdr_t;
-/* *INDENT-ON* */
typedef enum dhcp_stats_action_type_
{
@@ -171,51 +167,39 @@ typedef enum dhcpv6_stats_drop_reason_
#define dhcpv6_optlen(opt) clib_net_to_host_u16((opt)->length)
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u16 option;
u16 length;
u8 data[0];
}) dhcpv6_option_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
dhcpv6_option_t opt;
u16 status_code;
}) dhcpv6_status_code_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
dhcpv6_option_t opt;
u32 int_idx;
}) dhcpv6_int_id_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
dhcpv6_option_t opt;
u8 vss_type;
u8 data[0];
}) dhcpv6_vss_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
dhcpv6_option_t opt;
u32 ent_num;
u32 rmt_id;
}) dhcpv6_rmt_id_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
dhcpv6_option_t opt;
u16 link_type;
u8 data[6]; // data[0]:data[5]: MAC address
}) dhcpv6_client_mac_t;
-/* *INDENT-ON* */
typedef CLIB_PACKED (struct
{
diff --git a/src/plugins/dhcp/dhcp6_pd_client_cp.c b/src/plugins/dhcp/dhcp6_pd_client_cp.c
index f6d30fa0c7d..b30f7c0af79 100644
--- a/src/plugins/dhcp/dhcp6_pd_client_cp.c
+++ b/src/plugins/dhcp/dhcp6_pd_client_cp.c
@@ -371,12 +371,10 @@ dhcp6_pd_reply_event_handler (vl_api_dhcp6_pd_reply_event_t * mp)
* We're going to loop through the pool multiple times,
* so collect active indices.
*/
- /* *INDENT-OFF* */
pool_foreach (prefix_info, pm->prefix_pool)
{
vec_add1 (pm->indices, prefix_info - pm->prefix_pool);
}
- /* *INDENT-ON* */
for (i = 0; i < n_prefixes; i++)
{
@@ -480,7 +478,6 @@ create_prefix_list (u32 sw_if_index)
ip6_prefix_main_t *pm = &ip6_prefix_main;
prefix_info_t *prefix_info, *prefix_list = 0;;
- /* *INDENT-OFF* */
pool_foreach (prefix_info, pm->prefix_pool)
{
if (is_dhcpv6_pd_prefix (prefix_info) &&
@@ -491,7 +488,6 @@ create_prefix_list (u32 sw_if_index)
clib_memcpy (&prefix_list[pos], prefix_info, sizeof (*prefix_info));
}
}
- /* *INDENT-ON* */
return prefix_list;
}
@@ -530,7 +526,6 @@ dhcp6_pd_client_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
do
{
due_time = current_time + 1e9;
- /* *INDENT-OFF* */
pool_foreach (prefix_info, pm->prefix_pool)
{
if (is_dhcpv6_pd_prefix (prefix_info))
@@ -559,7 +554,6 @@ dhcp6_pd_client_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
}
}
}
- /* *INDENT-ON* */
for (i = 0; i < vec_len (rm->client_state_by_sw_if_index); i++)
{
client_state_t *cs = &rm->client_state_by_sw_if_index[i];
@@ -608,13 +602,11 @@ dhcp6_pd_client_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcp6_pd_client_cp_process_node) = {
.function = dhcp6_pd_client_cp_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "dhcp6-pd-client-cp-process",
};
-/* *INDENT-ON* */
static void
interrupt_process (void)
@@ -787,14 +779,12 @@ cp_ip6_address_find_new_active_prefix (u32 prefix_group_index,
ip6_prefix_main_t *pm = &ip6_prefix_main;
prefix_info_t *prefix_info;
- /* *INDENT-OFF* */
pool_foreach (prefix_info, pm->prefix_pool)
{
if (prefix_info->prefix_group_index == prefix_group_index &&
prefix_info - pm->prefix_pool != ignore_prefix_index)
return prefix_info - pm->prefix_pool;
}
- /* *INDENT-ON* */
return ~0;
}
@@ -992,7 +982,7 @@ dhcp6_cp_ip6_address_add_del (u32 sw_if_index, const u8 * prefix_group,
return VNET_API_ERROR_DUPLICATE_IF_ADDRESS;
cp_ip6_address_add_del_now (address_info, 0 /* del */ );
*address_info = apm->addresses[n - 1];
- _vec_len (apm->addresses) = n - 1;
+ vec_set_len (apm->addresses, n - 1);
return 0;
}
}
@@ -1080,14 +1070,12 @@ done:
* prefix group my-prefix-group ::7/64 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_address_add_del_command, static) = {
.path = "set ip6 address",
.short_help = "set ip6 address <interface> [prefix group <string>] "
"<address> [del]",
.function = cp_ip6_address_add_del_command_function,
};
-/* *INDENT-ON* */
static clib_error_t *
cp_ip6_addresses_show_command_function (vlib_main_t * vm,
@@ -1119,13 +1107,11 @@ cp_ip6_addresses_show_command_function (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_addresses_show_command, static) = {
.path = "show ip6 addresses",
.short_help = "show ip6 addresses",
.function = cp_ip6_addresses_show_command_function,
};
-/* *INDENT-ON* */
static clib_error_t *
cp_ip6_prefixes_show_command_function (vlib_main_t * vm,
@@ -1138,7 +1124,6 @@ cp_ip6_prefixes_show_command_function (vlib_main_t * vm,
const u8 *prefix_group;
f64 current_time = vlib_time_now (vm);
- /* *INDENT-OFF* */
pool_foreach (prefix_info, pm->prefix_pool)
{
prefix_group =
@@ -1152,18 +1137,15 @@ cp_ip6_prefixes_show_command_function (vlib_main_t * vm,
prefix_info->preferred_lt, prefix_info->valid_lt,
prefix_info->due_time - current_time);
}
- /* *INDENT-ON* */
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_prefixes_show_command, static) = {
.path = "show ip6 prefixes",
.short_help = "show ip6 prefixes",
.function = cp_ip6_prefixes_show_command_function,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_pd_clients_show_command_function (vlib_main_t * vm,
@@ -1224,13 +1206,11 @@ ip6_pd_clients_show_command_function (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_pd_clients_show_command, static) = {
.path = "show ip6 pd clients",
.short_help = "show ip6 pd clients",
.function = ip6_pd_clients_show_command_function,
};
-/* *INDENT-ON* */
@@ -1304,7 +1284,6 @@ dhcp6_pd_client_enable_disable (u32 sw_if_index,
vec_validate (prefix_list, 0);
- /* *INDENT-OFF* */
pool_foreach (prefix_info, pm->prefix_pool)
{
if (is_dhcpv6_pd_prefix (prefix_info) &&
@@ -1325,7 +1304,6 @@ dhcp6_pd_client_enable_disable (u32 sw_if_index,
pool_put (pm->prefix_pool, prefix_info);
}
}
- /* *INDENT-ON* */
vec_free (prefix_list);
@@ -1398,13 +1376,11 @@ done:
* @cliexcmd{dhcp6 pd client GigabitEthernet2/0/0 disable}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcp6_pd_client_enable_disable_command, static) = {
.path = "dhcp6 pd client",
.short_help = "dhcp6 pd client <interface> (prefix group <string> | disable)",
.function = dhcp6_pd_client_enable_disable_command_fn,
};
-/* *INDENT-ON* */
#include <vlib/unix/plugin.h>
diff --git a/src/plugins/dhcp/dhcp6_pd_client_dp.c b/src/plugins/dhcp/dhcp6_pd_client_dp.c
index b43e5a4754d..340930c913e 100644
--- a/src/plugins/dhcp/dhcp6_pd_client_dp.c
+++ b/src/plugins/dhcp/dhcp6_pd_client_dp.c
@@ -352,13 +352,11 @@ send_dhcp6_pd_client_message_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (send_dhcp6_pd_client_message_process_node, static) = {
.function = send_dhcp6_pd_client_message_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "send-dhcp6-pd-client-message-process",
};
-/* *INDENT-ON* */
void
dhcp6_pd_send_client_message (vlib_main_t * vm, u32 sw_if_index, u8 stop,
diff --git a/src/plugins/dhcp/dhcp6_pd_doc.md b/src/plugins/dhcp/dhcp6_pd_doc.md
deleted file mode 100644
index 0d0e0865f1b..00000000000
--- a/src/plugins/dhcp/dhcp6_pd_doc.md
+++ /dev/null
@@ -1,86 +0,0 @@
-# DHCPv6 prefix delegation {#dhcp6_pd_doc}
-
-DHCPv6 prefix delegation client implementation is split between Control Plane and Data Plane.
-Data Plane can also be used alone by external application (external Control Plane) using Data Plane Binary API.
-
-Number of different IA\_PDs managed by VPP is currently limited to 1 (and corresponding IAID has value 1).
-Client ID is of type DUID-LLT (Link Layer address plus time) and is created on VPP startup from avaliable interfaces (or chosen at random for debugging purposes).
-Server ID is only visible to Data Plane. Control Plane identifies servers by a 32-bit handle (server\_index) mapped to Server ID by Data Plane.
-
-## Control Plane
-
-DHCPv6 PD clients are configured per interface.
-When configuring a PD client we have to choose a name of a prefix group for that client.
-Each prefix obtained through this client will be flagged as belonging to specified prefix group.
-The prefix groups are used as a filter by prefix consumers.
-
-To enable client on particular interface call Binary API function dhcp6\_pd\_client\_enable\_disable with param 'sw\_if\_index' set to that interface,
-'prefix\_group' set to prefix group name and 'enable' set to true.
-Format of corresponding Debug CLI command is: "dhcp6 pd client <interface> [disable]"
-
-To add/delete IPv6 address potentially using available prefix from specified prefix group call Binary API command ip6\_add\_del\_address\_using\_prefix with parameters:
-> sw\_if\_index - software interface index of interface to add/delete address to/from
-> prefix\_group - name of prefix group, prefix\_group[0] == '\0' means no prefix should be used
-> address - address or suffix to be used with a prefix from selected group
-> prefix\_length - subnet prefix for the address
-> is\_add - 1 for add, 0 for remove
-or Debug CLI command with format: "set ip6 addresses <interface> [prefix group <n>] <address> [del]"
-
-When no prefix is avaliable, no address is physically added, but is added once a prefix becomes avaliable.
-Address is removed when all available prefixes are removed.
-When a used prefix is removed and there is other available prefix, the address that used the prefix is reconfigured using the available prefix.
-
-There are three debug CLI commands (with no parameters) used to show the state of clients, prefixes and addresses:
- show ip6 pd clients
- show ip6 prefixes
- show ip6 addresses
-
-### Example configuration
-
-set int state GigabitEthernet0/8/0 up
-dhcp6 pd client GigabitEthernet0/8/0 prefix group my-dhcp6-pd-group
-set ip6 address GigabitEthernet0/8/0 prefix group my-dhcp6-pd-group ::7/64
-
-## Data Plane
-
-First API message to be called is dhcp6\_clients\_enable\_disable with enable parameter set to 1.
-It enables DHCPv6 client subsystem to receive UDP messages containing DHCPv6 client port (sets the router to DHCPv6 client mode).
-This is to ensure client subsystem gets the messages instead of DHCPv6 proxy subsystem.
-
-There is one common Binary API call for sending DHCPv6 client messages (dhcp6\_pd\_send\_client\_message) with these fields:
-> msg\_type - message type (e.g. Solicit)
-> sw\_if\_index - index of TX interface
-> server\_index - used to dentify DHCPv6 server,
- unique for each DHCPv6 server on the link,
- value obrtained from dhcp6\_pd\_reply\_event API message,
- use ~0 to send message to all DHCPv6 servers
-> param irt - initial retransmission time
-> param mrt - maximum retransmission time
-> param mrc - maximum retransmission count
-> param mrd - maximum retransmission duration for sending the message
-> stop - if non-zero then stop resending the message, otherwise start sending the message
-> T1 - value of T1 in IA\_PD option
-> T2 - value of T2 in IA\_PD option
-> prefixes - list of prefixes in IA\_PD option
-
-The message is automatically resent by Data Plane based on parameters 'irt', 'mrt', 'mrc' and 'mrd'.
-To stop the resending call the same function (same msg\_type is sufficient) with 'stop' set to 1.
-
-To subscribe for notifications of DHCPv6 messages from server call Binary API function
-want\_dhcp6\_pd\_reply\_events with enable\_disable set to 1
-Notification (dhcp6\_pd\_reply\_event) fileds are:
-> sw\_if\_index - index of RX interface
-> server\_index - used to dentify DHCPv6 server, unique for each DHCPv6 server on the link
-> msg\_type - message type
-> T1 - value of T1 in IA\_PD option
-> T2 - value of T2 in IA\_PD option
-> inner\_status\_code - value of status code inside IA\_PD option
-> status\_code - value of status code
-> preference - value of preference option in reply message
-> prefixes - list of prefixes in IA\_PD option
-
-Prefix is a struct with with these fields:
-> prefix - prefix bytes
-> prefix\_length - prefix length
-> valid\_time - valid lifetime
-> preferred\_time - preferred lifetime
diff --git a/src/plugins/dhcp/dhcp6_pd_doc.rst b/src/plugins/dhcp/dhcp6_pd_doc.rst
new file mode 100644
index 00000000000..349abe215e1
--- /dev/null
+++ b/src/plugins/dhcp/dhcp6_pd_doc.rst
@@ -0,0 +1,113 @@
+DHCPv6 prefix delegation
+========================
+
+| DHCPv6 prefix delegation client implementation is split between
+ Control Plane and Data Plane.
+| Data Plane can also be used alone by external application (external
+ Control Plane) using Data Plane Binary API.
+
+| Number of different IA_PDs managed by VPP is currently limited to 1
+ (and corresponding IAID has value 1).
+| Client ID is of type DUID-LLT (Link Layer address plus time) and is
+ created on VPP startup from avaliable interfaces (or chosen at random
+ for debugging purposes).
+| Server ID is only visible to Data Plane. Control Plane identifies
+ servers by a 32-bit handle (server_index) mapped to Server ID by Data
+ Plane.
+
+Control Plane
+-------------
+
+| DHCPv6 PD clients are configured per interface.
+| When configuring a PD client we have to choose a name of a prefix
+ group for that client.
+| Each prefix obtained through this client will be flagged as belonging
+ to specified prefix group.
+| The prefix groups are used as a filter by prefix consumers.
+
+| To enable client on particular interface call Binary API function
+ dhcp6_pd_client_enable_disable with param ‘sw_if_index’ set to that
+ interface, ‘prefix_group’ set to prefix group name and ‘enable’ set to
+ true.
+| Format of corresponding Debug CLI command is: “dhcp6 pd client
+ [disable]”
+
+To add/delete IPv6 address potentially using available prefix from
+specified prefix group call Binary API command
+ip6_add_del_address_using_prefix with parameters:
+> sw_if_index - software interface index of interface to add/delete
+address to/from > prefix_group - name of prefix group, prefix_group[0]
+== ‘\\0’ means no prefix should be used > address - address or suffix to
+be used with a prefix from selected group > prefix_length - subnet
+prefix for the address > is_add - 1 for add, 0 for remove or Debug CLI
+command with format: “set ip6 addresses [prefix group ]
+
+.. raw:: html
+
+ <address>
+
+[del]”
+
+| When no prefix is avaliable, no address is physically added, but is
+ added once a prefix becomes avaliable.
+| Address is removed when all available prefixes are removed.
+| When a used prefix is removed and there is other available prefix, the
+ address that used the prefix is reconfigured using the available
+ prefix.
+
+| There are three debug CLI commands (with no parameters) used to show
+ the state of clients, prefixes and addresses:
+| show ip6 pd clients
+| show ip6 prefixes
+| show ip6 addresses
+
+Example configuration
+~~~~~~~~~~~~~~~~~~~~~
+
+set int state GigabitEthernet0/8/0 up dhcp6 pd client
+GigabitEthernet0/8/0 prefix group my-dhcp6-pd-group set ip6 address
+GigabitEthernet0/8/0 prefix group my-dhcp6-pd-group ::7/64
+
+Data Plane
+----------
+
+| First API message to be called is dhcp6_clients_enable_disable with
+ enable parameter set to 1.
+| It enables DHCPv6 client subsystem to receive UDP messages containing
+ DHCPv6 client port (sets the router to DHCPv6 client mode).
+| This is to ensure client subsystem gets the messages instead of DHCPv6
+ proxy subsystem.
+
+| There is one common Binary API call for sending DHCPv6 client messages
+ (dhcp6_pd_send_client_message) with these fields:
+| > msg_type - message type (e.g. Solicit) > sw_if_index - index of TX
+ interface > server_index - used to dentify DHCPv6 server, unique for
+ each DHCPv6 server on the link, value obrtained from
+ dhcp6_pd_reply_event API message, use ~0 to send message to all DHCPv6
+ servers > param irt - initial retransmission time > param mrt -
+ maximum retransmission time > param mrc - maximum retransmission count
+ > param mrd - maximum retransmission duration for sending the message
+ > stop - if non-zero then stop resending the message, otherwise start
+ sending the message > T1 - value of T1 in IA_PD option > T2 - value of
+ T2 in IA_PD option > prefixes - list of prefixes in IA_PD option
+
+| The message is automatically resent by Data Plane based on parameters
+ ‘irt’, ‘mrt’, ‘mrc’ and ‘mrd’.
+| To stop the resending call the same function (same msg_type is
+ sufficient) with ‘stop’ set to 1.
+
+| To subscribe for notifications of DHCPv6 messages from server call
+ Binary API function
+| want_dhcp6_pd_reply_events with enable_disable set to 1
+| Notification (dhcp6_pd_reply_event) fileds are:
+| > sw_if_index - index of RX interface > server_index - used to dentify
+ DHCPv6 server, unique for each DHCPv6 server on the link > msg_type -
+ message type > T1 - value of T1 in IA_PD option > T2 - value of T2 in
+ IA_PD option > inner_status_code - value of status code inside IA_PD
+ option > status_code - value of status code > preference - value of
+ preference option in reply message > prefixes - list of prefixes in
+ IA_PD option
+
+| Prefix is a struct with with these fields:
+| > prefix - prefix bytes > prefix_length - prefix length > valid_time -
+ valid lifetime > preferred_time - preferred lifetime
diff --git a/src/plugins/dhcp/dhcp6_proxy_node.c b/src/plugins/dhcp/dhcp6_proxy_node.c
index 33fb8a37992..a1d41e83b0f 100644
--- a/src/plugins/dhcp/dhcp6_proxy_node.c
+++ b/src/plugins/dhcp/dhcp6_proxy_node.c
@@ -105,7 +105,6 @@ ip6_interface_first_global_or_site_address (ip6_main_t * im, u32 sw_if_index)
ip_interface_address_t *ia = 0;
ip6_address_t *result = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm, ia, sw_if_index,
1 /* honor unnumbered */,
({
@@ -116,7 +115,6 @@ ip6_interface_first_global_or_site_address (ip6_main_t * im, u32 sw_if_index)
break;
}
}));
- /* *INDENT-ON* */
return result;
}
@@ -136,8 +134,8 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm,
dhcp_proxy_main_t *dpm = &dhcp_proxy_main;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
- u32 pkts_to_server = 0, pkts_to_client = 0, pkts_no_server = 0;
- u32 pkts_no_interface_address = 0, pkts_no_exceeding_max_hop = 0;
+ u32 pkts_to_server = 0, pkts_to_client = 0;
+ u32 pkts_no_interface_address = 0;
u32 pkts_no_src_address = 0;
u32 pkts_wrong_msg_type = 0;
u32 pkts_too_big = 0;
@@ -236,7 +234,6 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm,
{
error0 = DHCPV6_PROXY_ERROR_NO_SERVER;
next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP;
- pkts_no_server++;
goto do_trace;
}
@@ -274,7 +271,6 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm,
{
error0 = DHCPV6_RELAY_PKT_DROP_MAX_HOPS;
next0 = DHCPV6_PROXY_TO_SERVER_INPUT_NEXT_DROP;
- pkts_no_exceeding_max_hop++;
goto do_trace;
}
@@ -534,7 +530,6 @@ dhcpv6_proxy_to_server_input (vlib_main_t * vm,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcpv6_proxy_to_server_node, static) = {
.function = dhcpv6_proxy_to_server_input,
.name = "dhcpv6-proxy-to-server",
@@ -557,7 +552,6 @@ VLIB_REGISTER_NODE (dhcpv6_proxy_to_server_node, static) = {
.unformat_buffer = unformat_dhcpv6_proxy_header,
#endif
};
-/* *INDENT-ON* */
static uword
dhcpv6_proxy_to_client_input (vlib_main_t * vm,
@@ -583,7 +577,7 @@ dhcpv6_proxy_to_client_input (vlib_main_t * vm,
udp_header_t *u0, *u1 = 0;
dhcpv6_relay_hdr_t *h0;
ip6_header_t *ip1 = 0, *ip0;
- ip6_address_t _ia0, *ia0 = &_ia0;
+ ip6_address_t *ia0 = 0;
ip6_address_t client_address;
ethernet_interface_t *ei0;
ethernet_header_t *mac0;
@@ -828,7 +822,6 @@ dhcpv6_proxy_to_client_input (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcpv6_proxy_to_client_node, static) = {
.function = dhcpv6_proxy_to_client_input,
.name = "dhcpv6-proxy-to-client",
@@ -843,7 +836,6 @@ VLIB_REGISTER_NODE (dhcpv6_proxy_to_client_node, static) = {
.unformat_buffer = unformat_dhcpv6_proxy_header,
#endif
};
-/* *INDENT-ON* */
static clib_error_t *
dhcp6_proxy_init (vlib_main_t * vm)
@@ -927,9 +919,9 @@ dhcp6_proxy_set_server (ip46_address_t * addr,
if (dhcp_proxy_server_add (FIB_PROTOCOL_IP6, addr, src_addr,
rx_fib_index, server_table_id))
{
- mfib_table_entry_path_update (rx_fib_index,
- &all_dhcp_servers,
- MFIB_SOURCE_DHCP, &path_for_us);
+ mfib_table_entry_path_update (rx_fib_index, &all_dhcp_servers,
+ MFIB_SOURCE_DHCP, MFIB_ENTRY_FLAG_NONE,
+ &path_for_us);
/*
* Each interface that is enabled in this table, needs to be added
* as an accepting interface, but this is not easily doable in VPP.
@@ -1018,14 +1010,12 @@ dhcpv6_proxy_set_command_fn (vlib_main_t * vm,
format_unformat_error, input);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcpv6_proxy_set_command, static) = {
.path = "set dhcpv6 proxy",
.short_help = "set dhcpv6 proxy [del] server <ipv6-addr> src-address <ipv6-addr> "
"[server-fib-id <fib-id>] [rx-fib-id <fib-id>] ",
.function = dhcpv6_proxy_set_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_dhcp6_proxy_server (u8 * s, va_list * args)
@@ -1082,13 +1072,11 @@ dhcpv6_proxy_show_command_fn (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcpv6_proxy_show_command, static) = {
.path = "show dhcpv6 proxy",
.short_help = "Display dhcpv6 proxy info",
.function = dhcpv6_proxy_show_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
dhcpv6_vss_command_fn (vlib_main_t * vm,
@@ -1131,13 +1119,11 @@ dhcpv6_vss_command_fn (vlib_main_t * vm,
}
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcpv6_proxy_vss_command, static) = {
.path = "set dhcpv6 vss",
.short_help = "set dhcpv6 vss table <table-id> [oui <n> vpn-id <n> | vpn-ascii-id <text>]",
.function = dhcpv6_vss_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
dhcpv6_vss_show_command_fn (vlib_main_t * vm,
@@ -1149,13 +1135,11 @@ dhcpv6_vss_show_command_fn (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcpv6_proxy_vss_show_command, static) = {
.path = "show dhcpv6 vss",
.short_help = "show dhcpv6 VSS",
.function = dhcpv6_vss_show_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
dhcpv6_link_address_show_command_fn (vlib_main_t * vm,
@@ -1197,13 +1181,11 @@ dhcpv6_link_address_show_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dhcpv6_proxy_address_show_command, static) = {
.path = "show dhcpv6 link-address interface",
.short_help = "show dhcpv6 link-address interface <interface>",
.function = dhcpv6_link_address_show_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dhcp/dhcp_api.c b/src/plugins/dhcp/dhcp_api.c
index 2b8d41a10c1..1458db3527d 100644
--- a/src/plugins/dhcp/dhcp_api.c
+++ b/src/plugins/dhcp/dhcp_api.c
@@ -76,12 +76,10 @@ vl_api_dhcp_plugin_control_ping_t_handler (vl_api_dhcp_plugin_control_ping_t *
vl_api_dhcp_plugin_control_ping_reply_t *rmp;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_DHCP_PLUGIN_CONTROL_PING_REPLY,
({
rmp->vpe_pid = ntohl (getpid ());
}));
- /* *INDENT-ON* */
}
static void
@@ -92,7 +90,7 @@ vl_api_dhcp6_duid_ll_set_t_handler (vl_api_dhcp6_duid_ll_set_t * mp)
int rv = 0;
duid = (dhcpv6_duid_ll_string_t *) mp->duid_ll;
- if (duid->duid_type != htonl (DHCPV6_DUID_LL))
+ if (duid->duid_type != htons (DHCPV6_DUID_LL))
{
rv = VNET_API_ERROR_INVALID_VALUE;
goto reply;
@@ -279,8 +277,8 @@ dhcp_client_lease_encode (vl_api_dhcp_lease_t * lease,
lease->count = vec_len (client->domain_server_address);
for (i = 0; i < lease->count; i++)
- clib_memcpy (&lease->domain_server[i].address,
- (u8 *) & client->domain_server_address[i],
+ clib_memcpy (&lease->domain_server[i].address.un.ip4,
+ (u8 *) &client->domain_server_address[i],
sizeof (ip4_address_t));
clib_memcpy (&lease->host_mac[0], client->client_hardware_address, 6);
@@ -321,7 +319,9 @@ dhcp_compl_event_callback (u32 client_index, const dhcp_client_t * client)
if (!reg)
return;
- mp = vl_msg_api_alloc (sizeof (*mp));
+ mp = vl_msg_api_alloc (sizeof (*mp) +
+ sizeof (vl_api_domain_server_t) *
+ vec_len (client->domain_server_address));
mp->client_index = client_index;
mp->pid = client->pid;
dhcp_client_lease_encode (&mp->lease, client);
@@ -643,6 +643,31 @@ call_dhcp6_reply_event_callbacks (void *data,
return error;
}
+static void
+vl_api_dhcp_client_detect_enable_disable_t_handler (
+ vl_api_dhcp_client_detect_enable_disable_t *mp)
+{
+ vl_api_dhcp_client_detect_enable_disable_reply_t *rmp;
+ int rv = 0;
+ VALIDATE_SW_IF_INDEX (mp);
+
+ if (mp->enable)
+ {
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-dhcp-client-detect",
+ clib_net_to_host_u32 (mp->sw_if_index),
+ 1 /* enable */, 0, 0);
+ }
+ else
+ {
+ vnet_feature_enable_disable ("ip4-unicast", "ip4-dhcp-client-detect",
+ clib_net_to_host_u32 (mp->sw_if_index),
+ 0 /* disable */, 0, 0);
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_DHCP_CLIENT_DETECT_ENABLE_DISABLE_REPLY);
+}
static uword
dhcp6_reply_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
vlib_frame_t * f)
@@ -699,7 +724,6 @@ dhcp6_reply_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
call_dhcp6_reply_event_callbacks (event, dcpm->functions);
vpe_client_registration_t *reg;
- /* *INDENT-OFF* */
pool_foreach (reg, vpe_api_main.dhcp6_reply_events_registrations)
{
vl_api_registration_t *vl_reg;
@@ -716,7 +740,6 @@ dhcp6_reply_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
vl_api_send_msg (vl_reg, (u8 *) msg);
}
}
- /* *INDENT-ON* */
clib_mem_free (event);
}
@@ -727,13 +750,11 @@ dhcp6_reply_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcp6_reply_process_node) = {
.function = dhcp6_reply_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "dhcp6-reply-publisher-process",
};
-/* *INDENT-ON* */
static clib_error_t *
call_dhcp6_pd_reply_event_callbacks (void *data,
@@ -811,7 +832,6 @@ dhcp6_pd_reply_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
call_dhcp6_pd_reply_event_callbacks (event, dpcpm->functions);
vpe_client_registration_t *reg;
- /* *INDENT-OFF* */
pool_foreach (reg, vpe_api_main.dhcp6_pd_reply_events_registrations)
{
vl_api_registration_t *vl_reg;
@@ -828,7 +848,6 @@ dhcp6_pd_reply_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
vl_api_send_msg (vl_reg, (u8 *) msg);
}
}
- /* *INDENT-ON* */
clib_mem_free (event);
}
@@ -839,13 +858,11 @@ dhcp6_pd_reply_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcp6_pd_reply_process_node) = {
.function = dhcp6_pd_reply_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "dhcp6-pd-reply-publisher-process",
};
-/* *INDENT-ON* */
/*
* dhcp_api_hookup
@@ -877,12 +894,10 @@ VLIB_API_INIT_FUNCTION (dhcp_api_hookup);
#include <vlib/unix/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Dynamic Host Configuration Protocol (DHCP)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dhcp/dhcp_client_detect.c b/src/plugins/dhcp/dhcp_client_detect.c
index 598bd16cf8d..c02693f2ccf 100644
--- a/src/plugins/dhcp/dhcp_client_detect.c
+++ b/src/plugins/dhcp/dhcp_client_detect.c
@@ -286,7 +286,6 @@ format_dhcp_client_detect_trace (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dhcp_client_detect_node) = {
.name = "ip4-dhcp-client-detect",
.vector_size = sizeof (u32),
@@ -313,7 +312,6 @@ VNET_FEATURE_INIT (ip4_dvr_reinject_feat_node, static) =
.runs_before = VNET_FEATURES ("ip4-not-enabled"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dhcp/dhcp_test.c b/src/plugins/dhcp/dhcp_test.c
index c1894ec01ea..7820f51d442 100644
--- a/src/plugins/dhcp/dhcp_test.c
+++ b/src/plugins/dhcp/dhcp_test.c
@@ -39,13 +39,11 @@ dhcp_test_main_t dhcp_test_main;
#define __plugin_msg_base dhcp_test_main.msg_id_base
#include <vlibapi/vat_helper_macros.h>
-/* Macro to finish up custom dump fns */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
static int
api_dhcp_proxy_config (vat_main_t * vam)
@@ -423,6 +421,11 @@ api_dhcp_plugin_get_version (vat_main_t * vam)
{
return -1;
}
+static int
+api_dhcp_client_detect_enable_disable (vat_main_t *vam)
+{
+ return -1;
+}
static void
vl_api_dhcp_plugin_get_version_reply_t_handler
diff --git a/src/plugins/dispatch-trace/CMakeLists.txt b/src/plugins/dispatch-trace/CMakeLists.txt
index 5ba47f9aba3..fd7c62396df 100644
--- a/src/plugins/dispatch-trace/CMakeLists.txt
+++ b/src/plugins/dispatch-trace/CMakeLists.txt
@@ -14,4 +14,7 @@
add_vpp_plugin(dispatch_trace
SOURCES
main.c
+
+ COMPONENT
+ vpp-plugin-devtools
)
diff --git a/src/plugins/dispatch-trace/main.c b/src/plugins/dispatch-trace/main.c
index ce662dc0eea..40a87dde0f3 100644
--- a/src/plugins/dispatch-trace/main.c
+++ b/src/plugins/dispatch-trace/main.c
@@ -480,7 +480,7 @@ dispatch_trace_command_fn (vlib_main_t *vm, unformat_input_t *input,
* pcap dispatch capture on...
* @cliexend
* Example of how to display the status of a tx packet capture in progress:
- * @cliexstart{pcap tx trace status}
+ * @cliexstart{pcap trace tx status}
* max is 35, dispatch trace to file /tmp/vppTest.pcap
* pcap tx capture is on: 20 of 35 pkts...
* @cliexend
diff --git a/src/plugins/dma_intel/CMakeLists.txt b/src/plugins/dma_intel/CMakeLists.txt
new file mode 100644
index 00000000000..b683036f7e3
--- /dev/null
+++ b/src/plugins/dma_intel/CMakeLists.txt
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright(c) 2022 Cisco Systems, Inc.
+
+add_vpp_plugin(dma_intel
+ SOURCES
+ dsa.c
+ format.c
+ main.c
+
+ SUPPORTED_OS_LIST Linux
+)
diff --git a/src/plugins/dma_intel/dsa.c b/src/plugins/dma_intel/dsa.c
new file mode 100644
index 00000000000..473f2efa93e
--- /dev/null
+++ b/src/plugins/dma_intel/dsa.c
@@ -0,0 +1,452 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/pci/pci.h>
+#include <vlib/dma/dma.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/atomics.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <dma_intel/dsa_intel.h>
+
+extern vlib_node_registration_t intel_dsa_node;
+
+VLIB_REGISTER_LOG_CLASS (intel_dsa_log, static) = {
+ .class_name = "intel_dsa",
+ .subclass_name = "dsa",
+};
+
+static void
+intel_dsa_channel_lock (intel_dsa_channel_t *ch)
+{
+ u8 expected = 0;
+ if (ch->n_threads < 2)
+ return;
+
+ /* channel is used by multiple threads so we need to lock it */
+ while (!__atomic_compare_exchange_n (&ch->lock, &expected,
+ /* desired */ 1, /* weak */ 0,
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
+ {
+ while (__atomic_load_n (&ch->lock, __ATOMIC_RELAXED))
+ CLIB_PAUSE ();
+ expected = 0;
+ }
+}
+
+static void
+intel_dsa_channel_unlock (intel_dsa_channel_t *ch)
+{
+ if (ch->n_threads < 2)
+ return;
+
+ __atomic_store_n (&ch->lock, 0, __ATOMIC_RELEASE);
+}
+
+static vlib_dma_batch_t *
+intel_dsa_batch_new (vlib_main_t *vm, struct vlib_dma_config_data *cd)
+{
+ intel_dsa_main_t *idm = &intel_dsa_main;
+ intel_dsa_config_t *idc;
+ intel_dsa_batch_t *b;
+
+ idc = vec_elt_at_index (idm->dsa_config_heap,
+ cd->private_data + vm->thread_index);
+
+ if (vec_len (idc->freelist) > 0)
+ b = vec_pop (idc->freelist);
+ else
+ {
+ clib_spinlock_lock (&idm->lock);
+ b = vlib_physmem_alloc (vm, idc->alloc_size);
+ clib_spinlock_unlock (&idm->lock);
+ /* if no free space in physmem, force quit */
+ ASSERT (b != NULL);
+ *b = idc->batch_template;
+ b->max_transfers = idc->max_transfers;
+
+ u32 def_flags = (INTEL_DSA_OP_MEMMOVE << INTEL_DSA_OP_SHIFT) |
+ INTEL_DSA_FLAG_CACHE_CONTROL;
+ if (b->ch->block_on_fault)
+ def_flags |= INTEL_DSA_FLAG_BLOCK_ON_FAULT;
+ for (int i = 0; i < idc->max_transfers; i++)
+ {
+ intel_dsa_desc_t *dsa_desc = b->descs + i;
+ dsa_desc->op_flags = def_flags;
+ }
+ }
+
+ return &b->batch;
+}
+
+#if defined(__x86_64__) || defined(i386)
+static_always_inline void
+__movdir64b (volatile void *dst, const void *src)
+{
+ asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
+ :
+ : "a"(dst), "d"(src)
+ : "memory");
+}
+#endif
+
+static_always_inline void
+intel_dsa_batch_fallback (vlib_main_t *vm, intel_dsa_batch_t *b,
+ intel_dsa_channel_t *ch)
+{
+ for (u16 i = 0; i < b->batch.n_enq; i++)
+ {
+ intel_dsa_desc_t *desc = &b->descs[i];
+ clib_memcpy_fast (desc->dst, desc->src, desc->size);
+ }
+ b->status = INTEL_DSA_STATUS_CPU_SUCCESS;
+ ch->submitted++;
+ return;
+}
+
+int
+intel_dsa_batch_submit (vlib_main_t *vm, struct vlib_dma_batch *vb)
+{
+ intel_dsa_main_t *idm = &intel_dsa_main;
+ intel_dsa_batch_t *b = (intel_dsa_batch_t *) vb;
+ intel_dsa_channel_t *ch = b->ch;
+ if (PREDICT_FALSE (vb->n_enq == 0))
+ {
+ vec_add1 (idm->dsa_config_heap[b->config_heap_index].freelist, b);
+ return 0;
+ }
+
+ intel_dsa_channel_lock (ch);
+ if (ch->n_enq >= ch->size)
+ {
+ if (!b->sw_fallback)
+ {
+ intel_dsa_channel_unlock (ch);
+ return 0;
+ }
+ /* skip channel limitation if first pending finished */
+ intel_dsa_batch_t *lb = NULL;
+ u32 n_pendings =
+ vec_len (idm->dsa_threads[vm->thread_index].pending_batches);
+ if (n_pendings)
+ lb =
+ idm->dsa_threads[vm->thread_index].pending_batches[n_pendings - 1];
+
+ if (!lb || lb->status != INTEL_DSA_STATUS_SUCCESS)
+ {
+ intel_dsa_batch_fallback (vm, b, ch);
+ goto done;
+ }
+ }
+
+ b->status = INTEL_DSA_STATUS_BUSY;
+ if (PREDICT_FALSE (vb->n_enq == 1))
+ {
+ intel_dsa_desc_t *desc = &b->descs[0];
+ desc->completion = (u64) &b->completion_cl;
+ desc->op_flags |= INTEL_DSA_FLAG_COMPLETION_ADDR_VALID |
+ INTEL_DSA_FLAG_REQUEST_COMPLETION;
+#if defined(__x86_64__) || defined(i386)
+ _mm_sfence (); /* fence before writing desc to device */
+ __movdir64b (ch->portal, (void *) desc);
+#endif
+ }
+ else
+ {
+ intel_dsa_desc_t *batch_desc = &b->descs[b->max_transfers];
+ batch_desc->op_flags = (INTEL_DSA_OP_BATCH << INTEL_DSA_OP_SHIFT) |
+ INTEL_DSA_FLAG_COMPLETION_ADDR_VALID |
+ INTEL_DSA_FLAG_REQUEST_COMPLETION;
+ batch_desc->desc_addr = (void *) (b->descs);
+ batch_desc->size = vb->n_enq;
+ batch_desc->completion = (u64) &b->completion_cl;
+#if defined(__x86_64__) || defined(i386)
+ _mm_sfence (); /* fence before writing desc to device */
+ __movdir64b (ch->portal, (void *) batch_desc);
+#endif
+ }
+
+ ch->submitted++;
+ ch->n_enq++;
+
+done:
+ intel_dsa_channel_unlock (ch);
+ vec_add1 (idm->dsa_threads[vm->thread_index].pending_batches, b);
+ vlib_node_set_interrupt_pending (vm, intel_dsa_node.index);
+ return 1;
+}
+
+static int
+intel_dsa_check_channel (intel_dsa_channel_t *ch, vlib_dma_config_data_t *cd)
+{
+ if (!ch)
+ {
+ dsa_log_error ("no available dsa channel");
+ return 1;
+ }
+ vlib_dma_config_t supported_cfg = {
+ .barrier_before_last = 1,
+ .sw_fallback = 1,
+ };
+
+ if (cd->cfg.features & ~supported_cfg.features)
+ {
+ dsa_log_error ("unsupported feature requested");
+ return 1;
+ }
+
+ if (cd->cfg.max_transfers > ch->max_transfers)
+ {
+ dsa_log_error ("transfer number (%u) too big", cd->cfg.max_transfers);
+ return 1;
+ }
+
+ if (cd->cfg.max_transfer_size > ch->max_transfer_size)
+ {
+ dsa_log_error ("transfer size (%u) too big", cd->cfg.max_transfer_size);
+ return 1;
+ }
+ return 0;
+}
+
+static_always_inline void
+intel_dsa_alloc_dma_batch (vlib_main_t *vm, intel_dsa_config_t *idc)
+{
+ intel_dsa_batch_t *b;
+ b = vlib_physmem_alloc (vm, idc->alloc_size);
+ /* if no free space in physmem, force quit */
+ ASSERT (b != NULL);
+ *b = idc->batch_template;
+ b->max_transfers = idc->max_transfers;
+
+ u32 def_flags = (INTEL_DSA_OP_MEMMOVE << INTEL_DSA_OP_SHIFT) |
+ INTEL_DSA_FLAG_CACHE_CONTROL;
+ if (b->ch->block_on_fault)
+ def_flags |= INTEL_DSA_FLAG_BLOCK_ON_FAULT;
+
+ for (int i = 0; i < idc->max_transfers; i++)
+ {
+ intel_dsa_desc_t *dsa_desc = b->descs + i;
+ dsa_desc->op_flags = def_flags;
+ }
+ vec_add1 (idc->freelist, b);
+}
+
+static int
+intel_dsa_config_add_fn (vlib_main_t *vm, vlib_dma_config_data_t *cd)
+{
+ intel_dsa_main_t *idm = &intel_dsa_main;
+ intel_dsa_config_t *idc;
+ u32 index, n_threads = vlib_get_n_threads ();
+
+ vec_validate (idm->dsa_config_heap_handle_by_config_index, cd->config_index);
+ index = heap_alloc_aligned (
+ idm->dsa_config_heap, n_threads, CLIB_CACHE_LINE_BYTES,
+ idm->dsa_config_heap_handle_by_config_index[cd->config_index]);
+
+ cd->batch_new_fn = intel_dsa_batch_new;
+ cd->private_data = index;
+
+ for (u32 thread = 0; thread < n_threads; thread++)
+ {
+ intel_dsa_batch_t *idb;
+ vlib_dma_batch_t *b;
+ idc = vec_elt_at_index (idm->dsa_config_heap, index + thread);
+
+ /* size of physmem allocation for this config */
+ idc->max_transfers = cd->cfg.max_transfers;
+ idc->alloc_size = sizeof (intel_dsa_batch_t) +
+ sizeof (intel_dsa_desc_t) * (idc->max_transfers + 1);
+ /* fill batch template */
+ idb = &idc->batch_template;
+ idb->ch = idm->dsa_threads[thread].ch;
+ if (intel_dsa_check_channel (idb->ch, cd))
+ return 0;
+
+ dsa_log_debug ("config %d in thread %d using channel %u/%u",
+ cd->config_index, thread, idb->ch->did, idb->ch->qid);
+ idb->config_heap_index = index + thread;
+ idb->config_index = cd->config_index;
+ idb->batch.callback_fn = cd->cfg.callback_fn;
+ idb->features = cd->cfg.features;
+ b = &idb->batch;
+ b->stride = sizeof (intel_dsa_desc_t);
+ b->src_ptr_off = STRUCT_OFFSET_OF (intel_dsa_batch_t, descs[0].src);
+ b->dst_ptr_off = STRUCT_OFFSET_OF (intel_dsa_batch_t, descs[0].dst);
+ b->size_off = STRUCT_OFFSET_OF (intel_dsa_batch_t, descs[0].size);
+ b->submit_fn = intel_dsa_batch_submit;
+ dsa_log_debug (
+ "config %d in thread %d stride %d src/dst/size offset %d-%d-%d",
+ cd->config_index, thread, b->stride, b->src_ptr_off, b->dst_ptr_off,
+ b->size_off);
+
+ /* allocate dma batch in advance */
+ for (u32 index = 0; index < cd->cfg.max_batches; index++)
+ intel_dsa_alloc_dma_batch (vm, idc);
+ }
+
+ dsa_log_info ("config %u added", cd->private_data);
+
+ return 1;
+}
+
+static void
+intel_dsa_config_del_fn (vlib_main_t *vm, vlib_dma_config_data_t *cd)
+{
+ intel_dsa_main_t *idm = &intel_dsa_main;
+ intel_dsa_thread_t *t =
+ vec_elt_at_index (idm->dsa_threads, vm->thread_index);
+ u32 n_pending, n_threads, config_heap_index, n = 0;
+ n_threads = vlib_get_n_threads ();
+
+ if (!t->pending_batches)
+ goto free_heap;
+
+ n_pending = vec_len (t->pending_batches);
+ intel_dsa_batch_t *b;
+
+ /* clean pending list and free list */
+ for (u32 i = 0; i < n_pending; i++)
+ {
+ b = t->pending_batches[i];
+ if (b->config_index == cd->config_index)
+ {
+ vec_add1 (idm->dsa_config_heap[b->config_heap_index].freelist, b);
+ if (b->status == INTEL_DSA_STATUS_SUCCESS ||
+ b->status == INTEL_DSA_STATUS_BUSY)
+ b->ch->n_enq--;
+ }
+ else
+ t->pending_batches[n++] = b;
+ }
+
+ vec_set_len (t->pending_batches, n);
+
+free_heap:
+ for (u32 thread = 0; thread < n_threads; thread++)
+ {
+ config_heap_index = cd->private_data + thread;
+ while (vec_len (idm->dsa_config_heap[config_heap_index].freelist) > 0)
+ {
+ b = vec_pop (idm->dsa_config_heap[config_heap_index].freelist);
+ vlib_physmem_free (vm, b);
+ }
+ }
+
+ heap_dealloc (idm->dsa_config_heap,
+ idm->dsa_config_heap_handle_by_config_index[cd->config_index]);
+
+ dsa_log_debug ("config %u removed", cd->private_data);
+}
+
+static uword
+intel_dsa_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ intel_dsa_main_t *idm = &intel_dsa_main;
+ intel_dsa_thread_t *t =
+ vec_elt_at_index (idm->dsa_threads, vm->thread_index);
+ u32 n_pending = 0, n = 0;
+ u8 glitch = 0, status;
+
+ if (!t->pending_batches)
+ return 0;
+
+ n_pending = vec_len (t->pending_batches);
+
+ for (u32 i = 0; i < n_pending; i++)
+ {
+ intel_dsa_batch_t *b = t->pending_batches[i];
+ intel_dsa_channel_t *ch = b->ch;
+
+ status = b->status;
+ if ((status == INTEL_DSA_STATUS_SUCCESS ||
+ status == INTEL_DSA_STATUS_CPU_SUCCESS) &&
+ !glitch)
+ {
+ /* callback */
+ if (b->batch.callback_fn)
+ b->batch.callback_fn (vm, &b->batch);
+
+ /* restore last descriptor fields */
+ if (b->batch.n_enq == 1)
+ {
+ b->descs[0].completion = 0;
+ b->descs[0].op_flags =
+ (INTEL_DSA_OP_MEMMOVE << INTEL_DSA_OP_SHIFT) |
+ INTEL_DSA_FLAG_CACHE_CONTROL;
+ if (b->ch->block_on_fault)
+ b->descs[0].op_flags |= INTEL_DSA_FLAG_BLOCK_ON_FAULT;
+ }
+ /* add to freelist */
+ vec_add1 (idm->dsa_config_heap[b->config_heap_index].freelist, b);
+
+ intel_dsa_channel_lock (ch);
+ if (status == INTEL_DSA_STATUS_SUCCESS)
+ {
+ ch->n_enq--;
+ ch->completed++;
+ }
+ else
+ ch->sw_fallback++;
+ intel_dsa_channel_unlock (ch);
+
+ b->batch.n_enq = 0;
+ b->status = INTEL_DSA_STATUS_IDLE;
+ }
+ else if (status == INTEL_DSA_STATUS_BUSY)
+ {
+ glitch = 1 & b->barrier_before_last;
+ t->pending_batches[n++] = b;
+ }
+ else if (!glitch)
+ {
+ /* fallback to software if exception happened */
+ intel_dsa_batch_fallback (vm, b, ch);
+ glitch = 1 & b->barrier_before_last;
+ }
+ else
+ {
+ t->pending_batches[n++] = b;
+ }
+ }
+ vec_set_len (t->pending_batches, n);
+
+ if (n)
+ {
+ vlib_node_set_interrupt_pending (vm, intel_dsa_node.index);
+ }
+
+ return n_pending - n;
+}
+
+u8 *
+format_dsa_info (u8 *s, va_list *args)
+{
+ intel_dsa_main_t *idm = &intel_dsa_main;
+ vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+ intel_dsa_channel_t *ch;
+ ch = idm->dsa_threads[vm->thread_index].ch;
+ s = format (s, "thread %d dma %u/%u request %-16lld hw %-16lld cpu %-16lld",
+ vm->thread_index, ch->did, ch->qid, ch->submitted, ch->completed,
+ ch->sw_fallback);
+ return s;
+}
+
+VLIB_REGISTER_NODE (intel_dsa_node) = {
+ .function = intel_dsa_node_fn,
+ .name = "intel-dsa",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+ .vector_size = 4,
+};
+
+vlib_dma_backend_t intel_dsa_backend = {
+ .name = "Intel DSA",
+ .config_add_fn = intel_dsa_config_add_fn,
+ .config_del_fn = intel_dsa_config_del_fn,
+ .info_fn = format_dsa_info,
+};
diff --git a/src/plugins/dma_intel/dsa_intel.h b/src/plugins/dma_intel/dsa_intel.h
new file mode 100644
index 00000000000..a52d4bff323
--- /dev/null
+++ b/src/plugins/dma_intel/dsa_intel.h
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ */
+
+#ifndef __dma_intel_dsa_intel_h__
+#define __dma_intel_dsa_intel_h__
+
+#include <vlib/vlib.h>
+#include <vlib/dma/dma.h>
+#include <vlib/pci/pci.h>
+#include <vppinfra/format.h>
+typedef struct
+{
+ u32 pasid;
+ u32 op_flags;
+ u64 completion;
+ union
+ {
+ void *src;
+ void *desc_addr;
+ };
+ void *dst;
+ u32 size;
+ u16 intr_handle;
+ /* remaining 26 bytes are reserved */
+ u16 __reserved[13];
+} intel_dsa_desc_t;
+
+STATIC_ASSERT_SIZEOF (intel_dsa_desc_t, 64);
+
+#define DSA_DEV_PATH "/dev/dsa"
+#define SYS_DSA_PATH "/sys/bus/dsa/devices"
+
+typedef enum
+{
+ INTEL_DSA_DEVICE_TYPE_UNKNOWN,
+ INTEL_DSA_DEVICE_TYPE_KERNEL,
+ INTEL_DSA_DEVICE_TYPE_USER,
+ INTEL_DSA_DEVICE_TYPE_MDEV,
+} intel_dsa_wq_type_t;
+
+enum dsa_ops
+{
+ INTEL_DSA_OP_NOP = 0,
+ INTEL_DSA_OP_BATCH,
+ INTEL_DSA_OP_DRAIN,
+ INTEL_DSA_OP_MEMMOVE,
+ INTEL_DSA_OP_FILL
+};
+#define INTEL_DSA_OP_SHIFT 24
+#define INTEL_DSA_FLAG_FENCE (1 << 0)
+#define INTEL_DSA_FLAG_BLOCK_ON_FAULT (1 << 1)
+#define INTEL_DSA_FLAG_COMPLETION_ADDR_VALID (1 << 2)
+#define INTEL_DSA_FLAG_REQUEST_COMPLETION (1 << 3)
+#define INTEL_DSA_FLAG_CACHE_CONTROL (1 << 8)
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ volatile void *portal; /* portal exposed by dedicated work queue */
+ u64 submitted;
+ u64 completed;
+ u64 sw_fallback;
+ u32 max_transfer_size; /* maximum size of each transfer */
+ u16 max_transfers; /* maximum number referenced in a batch */
+ u16 n_threads; /* number of threads using this channel */
+ u16 n_enq; /* number of batches currently enqueued */
+ union
+ {
+ u16 wq_control;
+ struct
+ {
+ u16 type : 2;
+ u16 state : 1;
+ u16 ats_disable : 1;
+ u16 block_on_fault : 1;
+ u16 mode : 1;
+ };
+ };
+ u8 lock; /* spinlock, only used if m_threads > 1 */
+ u8 numa; /* numa node */
+ u8 size; /* size of work queue */
+ u8 did; /* dsa device id */
+ u8 qid; /* work queue id */
+} intel_dsa_channel_t;
+
+typedef struct intel_dsa_batch
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (start);
+ vlib_dma_batch_t batch; /* must be first */
+ intel_dsa_channel_t *ch;
+ u32 config_heap_index;
+ u32 max_transfers;
+ u32 config_index;
+ union
+ {
+ struct
+ {
+ u32 barrier_before_last : 1;
+ u32 sw_fallback : 1;
+ };
+ u32 features;
+ };
+ CLIB_CACHE_LINE_ALIGN_MARK (completion_cl);
+#define INTEL_DSA_STATUS_IDLE 0x0
+#define INTEL_DSA_STATUS_SUCCESS 0x1
+#define INTEL_DSA_STATUS_BUSY 0xa
+#define INTEL_DSA_STATUS_CPU_SUCCESS 0xb
+ u8 status;
+ /* to avoid read-modify-write completion is written as 64-byte
+ * DMA FILL operation */
+ CLIB_CACHE_LINE_ALIGN_MARK (descriptors);
+ intel_dsa_desc_t descs[0];
+} intel_dsa_batch_t;
+
+STATIC_ASSERT_OFFSET_OF (intel_dsa_batch_t, batch, 0);
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ intel_dsa_batch_t batch_template;
+ u32 alloc_size;
+ u32 max_transfers;
+ intel_dsa_batch_t **freelist;
+} intel_dsa_config_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ intel_dsa_channel_t *ch; /* channel used by this thread */
+ intel_dsa_batch_t **pending_batches;
+} intel_dsa_thread_t;
+
+typedef struct
+{
+ intel_dsa_channel_t ***channels;
+ intel_dsa_thread_t *dsa_threads;
+ intel_dsa_config_t *dsa_config_heap;
+ uword *dsa_config_heap_handle_by_config_index;
+ /* spin lock protect pmem */
+ clib_spinlock_t lock;
+} intel_dsa_main_t;
+
+extern intel_dsa_main_t intel_dsa_main;
+extern vlib_dma_backend_t intel_dsa_backend;
+format_function_t format_intel_dsa_addr;
+
+#define dsa_log_debug(f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, intel_dsa_log.class, "%s: " f, __func__, \
+ ##__VA_ARGS__)
+
+#define dsa_log_info(f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_INFO, intel_dsa_log.class, "%s: " f, __func__, \
+ ##__VA_ARGS__)
+
+#define dsa_log_error(f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_ERR, intel_dsa_log.class, "%s: " f, __func__, \
+ ##__VA_ARGS__)
+
+#endif
diff --git a/src/plugins/dma_intel/format.c b/src/plugins/dma_intel/format.c
new file mode 100644
index 00000000000..b05a06fb3b1
--- /dev/null
+++ b/src/plugins/dma_intel/format.c
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ */
+#include <vlib/vlib.h>
+#include <vlib/pci/pci.h>
+#include <vlib/dma/dma.h>
+#include <vnet/plugin/plugin.h>
+#include <dma_intel/dsa_intel.h>
+
+u8 *
+format_intel_dsa_addr (u8 *s, va_list *va)
+{
+ intel_dsa_channel_t *ch = va_arg (*va, intel_dsa_channel_t *);
+ return format (s, "wq%d.%d", ch->did, ch->qid);
+}
diff --git a/src/plugins/dma_intel/main.c b/src/plugins/dma_intel/main.c
new file mode 100644
index 00000000000..0f9ac185b9d
--- /dev/null
+++ b/src/plugins/dma_intel/main.c
@@ -0,0 +1,272 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022 Cisco Systems, Inc.
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <vlib/vlib.h>
+#include <vlib/pci/pci.h>
+#include <vlib/dma/dma.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <vppinfra/linux/sysfs.h>
+#include <dma_intel/dsa_intel.h>
+
+VLIB_REGISTER_LOG_CLASS (intel_dsa_log, static) = {
+ .class_name = "intel_dsa",
+};
+
+intel_dsa_main_t intel_dsa_main;
+
+void
+intel_dsa_assign_channels (vlib_main_t *vm)
+{
+ intel_dsa_main_t *idm = &intel_dsa_main;
+ intel_dsa_channel_t *ch, **chv = 0;
+ u16 n_threads;
+ int n;
+
+ vec_foreach_index (n, idm->channels)
+ vec_append (chv, idm->channels[n]);
+
+ vec_validate (idm->dsa_threads, vlib_get_n_threads () - 1);
+
+ if (vec_len (chv) == 0)
+ {
+ dsa_log_debug ("No DSA channels found");
+ goto done;
+ }
+
+ if (vec_len (chv) >= vlib_get_n_threads ())
+ n_threads = 1;
+ else
+ n_threads = vlib_get_n_threads () % vec_len (chv) ?
+ vlib_get_n_threads () / vec_len (chv) + 1 :
+ vlib_get_n_threads () / vec_len (chv);
+
+ for (int i = 0; i < vlib_get_n_threads (); i++)
+ {
+ vlib_main_t *tvm = vlib_get_main_by_index (i);
+ ch = *vec_elt_at_index (chv, i / n_threads);
+ idm->dsa_threads[i].ch = ch;
+ ch->n_threads = n_threads;
+ dsa_log_debug ("Assigning channel %u/%u to thread %u (numa %u)", ch->did,
+ ch->qid, i, tvm->numa_node);
+ }
+
+done:
+ /* free */
+ vec_free (chv);
+}
+
+static clib_error_t *
+intel_dsa_map_region (intel_dsa_channel_t *ch)
+{
+ static clib_error_t *error = NULL;
+ /* map one page */
+ uword size = 0x1000;
+ uword offset = 0;
+ char path[256] = { 0 };
+
+ snprintf (path, sizeof (path), "%s/wq%d.%d", DSA_DEV_PATH, ch->did, ch->qid);
+ int fd = open (path, O_RDWR);
+ if (fd < 0)
+ return clib_error_return (0, "failed to open dsa device %s", path);
+
+ ch->portal =
+ clib_mem_vm_map_shared (0, size, fd, offset, "%s", (char *) path);
+ if (ch->portal == CLIB_MEM_VM_MAP_FAILED)
+ {
+ error = clib_error_return (0, "mmap portal %s failed", path);
+ close (fd);
+ return error;
+ }
+
+ return NULL;
+}
+
+static clib_error_t *
+intel_dsa_get_info (intel_dsa_channel_t *ch, clib_error_t **error)
+{
+ clib_error_t *err;
+ u8 *tmpstr;
+ u8 *dev_dir_name = 0, *wq_dir_name = 0;
+
+ u8 *f = 0;
+ dev_dir_name = format (0, "%s/dsa%d", SYS_DSA_PATH, ch->did);
+
+ vec_reset_length (f);
+ f = format (f, "%v/numa_node%c", dev_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
+ if (err)
+ goto error;
+ ch->numa = atoi ((char *) tmpstr);
+
+ wq_dir_name = format (0, "%s/%U", SYS_DSA_PATH, format_intel_dsa_addr, ch);
+
+ vec_reset_length (f);
+ f = format (f, "%v/max_transfer_size%c", wq_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
+ if (err)
+ goto error;
+ ch->max_transfer_size = atoi ((char *) tmpstr);
+
+ vec_reset_length (f);
+ f = format (f, "%v/max_batch_size%c", wq_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
+ if (err)
+ goto error;
+ ch->max_transfers = atoi ((char *) tmpstr);
+
+ vec_reset_length (f);
+ f = format (f, "%v/size%c", wq_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
+ if (err)
+ goto error;
+ ch->size = atoi ((char *) tmpstr);
+
+ vec_reset_length (f);
+ f = format (f, "%v/type%c", wq_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
+ if (err)
+ goto error;
+ if (tmpstr)
+ {
+ if (!clib_strcmp ((char *) tmpstr, "enabled"))
+ ch->type = INTEL_DSA_DEVICE_TYPE_UNKNOWN;
+ else if (!clib_strcmp ((char *) tmpstr, "user"))
+ ch->type = INTEL_DSA_DEVICE_TYPE_USER;
+ else if (!clib_strcmp ((char *) tmpstr, "mdev"))
+ ch->type = INTEL_DSA_DEVICE_TYPE_KERNEL;
+ else
+ ch->type = INTEL_DSA_DEVICE_TYPE_UNKNOWN;
+ vec_free (tmpstr);
+ }
+
+ vec_reset_length (f);
+ f = format (f, "%v/state%c", wq_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
+ if (err)
+ goto error;
+ if (tmpstr)
+ {
+ if (!clib_strcmp ((char *) tmpstr, "enabled"))
+ ch->state = 1;
+ else
+ ch->state = 0;
+ vec_free (tmpstr);
+ }
+
+ vec_reset_length (f);
+ f = format (f, "%v/ats_disable%c", wq_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
+ if (err)
+ goto error;
+ ch->ats_disable = atoi ((char *) tmpstr);
+
+ vec_reset_length (f);
+ f = format (f, "%v/block_on_fault%c", wq_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
+ if (err)
+ goto error;
+ ch->block_on_fault = atoi ((char *) tmpstr);
+
+ vec_reset_length (f);
+ f = format (f, "%v/mode%c", wq_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "%s", &tmpstr);
+ if (err)
+ goto error;
+ if (tmpstr)
+ {
+ if (!clib_strcmp ((char *) tmpstr, "dedicated"))
+ ch->mode = 1;
+ else
+ ch->mode = 0;
+ vec_free (tmpstr);
+ }
+
+ vec_free (f);
+ vec_free (dev_dir_name);
+ vec_free (wq_dir_name);
+ return NULL;
+
+error:
+ vec_free (f);
+ vec_free (dev_dir_name);
+ vec_free (wq_dir_name);
+
+ return err;
+}
+
+clib_error_t *
+intel_dsa_add_channel (vlib_main_t *vm, intel_dsa_channel_t *ch)
+{
+ intel_dsa_main_t *dm = &intel_dsa_main;
+ clib_error_t *err = 0;
+
+ if (intel_dsa_map_region (ch))
+ return clib_error_return (0, "dsa open device failed");
+
+ if (intel_dsa_get_info (ch, &err))
+ return clib_error_return (err, "dsa info not scanned");
+
+ vec_validate (dm->channels, ch->numa);
+ vec_add1 (dm->channels[ch->numa], ch);
+
+ return err;
+}
+
+static clib_error_t *
+dsa_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ clib_error_t *error = 0;
+ intel_dsa_channel_t *ch;
+ u32 did, qid;
+
+ if (intel_dsa_main.lock == 0)
+ clib_spinlock_init (&(intel_dsa_main.lock));
+
+ if ((error = vlib_dma_register_backend (vm, &intel_dsa_backend)))
+ goto done;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "dev wq%d.%d", &did, &qid))
+ {
+ ch = clib_mem_alloc_aligned (sizeof (*ch), CLIB_CACHE_LINE_BYTES);
+ clib_memset (ch, 0, sizeof (*ch));
+ ch->did = did;
+ ch->qid = qid;
+ if (intel_dsa_add_channel (vm, ch))
+ clib_mem_free (ch);
+ }
+ else if (unformat_skip_white_space (input))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+done:
+ return error;
+}
+
+VLIB_CONFIG_FUNCTION (dsa_config, "dsa");
+
+clib_error_t *
+intel_dsa_num_workers_change (vlib_main_t *vm)
+{
+ intel_dsa_assign_channels (vm);
+ return 0;
+}
+
+VLIB_NUM_WORKERS_CHANGE_FN (intel_dsa_num_workers_change);
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Intel DSA Backend",
+};
diff --git a/src/plugins/dns/dns.c b/src/plugins/dns/dns.c
index 0801681b8b3..3cecf942d55 100644
--- a/src/plugins/dns/dns.c
+++ b/src/plugins/dns/dns.c
@@ -16,9 +16,8 @@
#include <vnet/vnet.h>
#include <vnet/udp/udp_local.h>
#include <vnet/plugin/plugin.h>
-#include <vnet/fib/fib_table.h>
#include <dns/dns.h>
-
+#include <vnet/ip/ip_sas.h>
#include <vlibapi/api.h>
#include <vlibmemory/api.h>
#include <vpp/app/version.h>
@@ -31,16 +30,27 @@
#define REPLY_MSG_ID_BASE dm->msg_id_base
#include <vlibapi/api_helper_macros.h>
-/* Macro to finish up custom dump fns */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
dns_main_t dns_main;
+/* the cache hashtable expects a NULL-terminated C-string but everywhere else
+ * expects a non-NULL terminated vector... The pattern of adding \0 but hiding
+ * it away drives AddressSanitizer crazy, this helper tries to bring some of
+ * its sanity back
+ */
+static_always_inline void
+dns_terminate_c_string (u8 **v)
+{
+ vec_add1 (*v, 0);
+ vec_dec_len (*v, 1);
+ clib_mem_unpoison (vec_end (*v), 1);
+}
+
static int
dns_cache_clear (dns_main_t * dm)
{
@@ -51,13 +61,11 @@ dns_cache_clear (dns_main_t * dm)
dns_cache_lock (dm, 1);
- /* *INDENT-OFF* */
pool_foreach (ep, dm->entries)
{
vec_free (ep->name);
vec_free (ep->pending_requests);
}
- /* *INDENT-ON* */
pool_free (dm->entries);
hash_free (dm->cache_entry_by_name);
@@ -225,66 +233,16 @@ vnet_dns_send_dns4_request (vlib_main_t * vm, dns_main_t * dm,
u32 bi;
vlib_buffer_t *b;
ip4_header_t *ip;
- fib_prefix_t prefix;
- fib_node_index_t fei;
- u32 sw_if_index, fib_index;
udp_header_t *udp;
- ip4_main_t *im4 = &ip4_main;
- ip_lookup_main_t *lm4 = &im4->lookup_main;
- ip_interface_address_t *ia = 0;
- ip4_address_t *src_address;
+ ip4_address_t src_address;
u8 *dns_request;
vlib_frame_t *f;
u32 *to_next;
ASSERT (ep->dns_request);
- /* Find a FIB path to the server */
- clib_memcpy (&prefix.fp_addr.ip4, server, sizeof (*server));
- prefix.fp_proto = FIB_PROTOCOL_IP4;
- prefix.fp_len = 32;
-
- fib_index = fib_table_find (prefix.fp_proto, 0 /* default VRF for now */ );
- if (fib_index == (u32) ~ 0)
- {
- if (0)
- clib_warning ("no fib table");
- return;
- }
-
- fei = fib_table_lookup (fib_index, &prefix);
-
- /* Couldn't find route to destination. Bail out. */
- if (fei == FIB_NODE_INDEX_INVALID)
- {
- if (0)
- clib_warning ("no route to DNS server");
- return;
- }
-
- sw_if_index = fib_entry_get_resolving_interface (fei);
-
- if (sw_if_index == ~0)
- {
- if (0)
- clib_warning
- ("route to %U exists, fei %d, get_resolving_interface returned"
- " ~0", format_ip4_address, &prefix.fp_addr, fei);
- return;
- }
-
- /* *INDENT-OFF* */
- foreach_ip_interface_address(lm4, ia, sw_if_index, 1 /* honor unnumbered */,
- ({
- src_address = ip_interface_address_get_address (lm4, ia);
- goto found_src_address;
- }));
- /* *INDENT-ON* */
-
- clib_warning ("FIB BUG");
- return;
-
-found_src_address:
+ if (!ip4_sas (0 /* default VRF for now */, ~0, server, &src_address))
+ return;
/* Go get a buffer */
if (vlib_buffer_alloc (vm, &bi, 1) != 1)
@@ -311,7 +269,7 @@ found_src_address:
ip->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
ip->ttl = 255;
ip->protocol = IP_PROTOCOL_UDP;
- ip->src_address.as_u32 = src_address->as_u32;
+ ip->src_address.as_u32 = src_address.as_u32;
ip->dst_address.as_u32 = server->as_u32;
ip->checksum = ip4_header_checksum (ip);
@@ -343,14 +301,8 @@ vnet_dns_send_dns6_request (vlib_main_t * vm, dns_main_t * dm,
u32 bi;
vlib_buffer_t *b;
ip6_header_t *ip;
- fib_prefix_t prefix;
- fib_node_index_t fei;
- u32 sw_if_index, fib_index;
udp_header_t *udp;
- ip6_main_t *im6 = &ip6_main;
- ip_lookup_main_t *lm6 = &im6->lookup_main;
- ip_interface_address_t *ia = 0;
- ip6_address_t *src_address;
+ ip6_address_t src_address;
u8 *dns_request;
vlib_frame_t *f;
u32 *to_next;
@@ -358,41 +310,8 @@ vnet_dns_send_dns6_request (vlib_main_t * vm, dns_main_t * dm,
ASSERT (ep->dns_request);
- /* Find a FIB path to the server */
- clib_memcpy (&prefix.fp_addr, server, sizeof (*server));
- prefix.fp_proto = FIB_PROTOCOL_IP6;
- prefix.fp_len = 32;
-
- fib_index = fib_table_find (prefix.fp_proto, 0 /* default VRF for now */ );
- if (fib_index == (u32) ~ 0)
- {
- if (0)
- clib_warning ("no fib table");
- return;
- }
-
- fei = fib_table_lookup (fib_index, &prefix);
-
- /* Couldn't find route to destination. Bail out. */
- if (fei == FIB_NODE_INDEX_INVALID)
- {
- clib_warning ("no route to DNS server");
- }
-
- sw_if_index = fib_entry_get_resolving_interface (fei);
-
- /* *INDENT-OFF* */
- foreach_ip_interface_address(lm6, ia, sw_if_index, 1 /* honor unnumbered */,
- ({
- src_address = ip_interface_address_get_address (lm6, ia);
- goto found_src_address;
- }));
- /* *INDENT-ON* */
-
- clib_warning ("FIB BUG");
- return;
-
-found_src_address:
+ if (!ip6_sas (0 /* default VRF for now */, ~0, server, &src_address))
+ return;
/* Go get a buffer */
if (vlib_buffer_alloc (vm, &bi, 1) != 1)
@@ -421,7 +340,7 @@ found_src_address:
- sizeof (ip6_header_t));
ip->hop_limit = 255;
ip->protocol = IP_PROTOCOL_UDP;
- clib_memcpy (&ip->src_address, src_address, sizeof (ip6_address_t));
+ ip6_address_copy (&ip->src_address, &src_address);
clib_memcpy (&ip->dst_address, server, sizeof (ip6_address_t));
/* UDP header */
@@ -918,8 +837,8 @@ re_resolve:
pool_get (dm->entries, ep);
clib_memset (ep, 0, sizeof (*ep));
- ep->name = format (0, "%s%c", name, 0);
- _vec_len (ep->name) = vec_len (ep->name) - 1;
+ ep->name = format (0, "%s", name);
+ dns_terminate_c_string (&ep->name);
hash_set_mem (dm->cache_entry_by_name, ep->name, ep - dm->entries);
@@ -1077,8 +996,7 @@ found_last_request:
now = vlib_time_now (vm);
cname = vnet_dns_labels_to_name (rr->rdata, reply, &pos2);
/* Save the cname */
- vec_add1 (cname, 0);
- _vec_len (cname) -= 1;
+ dns_terminate_c_string (&cname);
ep = pool_elt_at_index (dm->entries, ep_index);
ep->cname = cname;
ep->flags |= (DNS_CACHE_ENTRY_FLAG_CNAME | DNS_CACHE_ENTRY_FLAG_VALID);
@@ -1096,8 +1014,7 @@ found_last_request:
clib_memset (next_ep, 0, sizeof (*next_ep));
next_ep->name = vec_dup (cname);
- vec_add1 (next_ep->name, 0);
- _vec_len (next_ep->name) -= 1;
+ dns_terminate_c_string (&next_ep->name);
hash_set_mem (dm->cache_entry_by_name, next_ep->name,
next_ep - dm->entries);
@@ -1460,7 +1377,7 @@ vl_api_dns_resolve_name_t_handler (vl_api_dns_resolve_name_t * mp)
dns_main_t *dm = &dns_main;
vl_api_dns_resolve_name_reply_t *rmp;
dns_cache_entry_t *ep = 0;
- dns_pending_request_t _t0, *t0 = &_t0;
+ dns_pending_request_t _t0 = { 0 }, *t0 = &_t0;
int rv;
dns_resolve_name_t rn;
@@ -1484,7 +1401,6 @@ vl_api_dns_resolve_name_t_handler (vl_api_dns_resolve_name_t * mp)
if (ep == 0)
return;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_DNS_RESOLVE_NAME_REPLY, ({
ip_address_copy_addr (rmp->ip4_address, &rn.address);
if (ip_addr_version (&rn.address) == AF_IP4)
@@ -1492,7 +1408,6 @@ vl_api_dns_resolve_name_t_handler (vl_api_dns_resolve_name_t * mp)
else
rmp->ip6_set = 1;
}));
- /* *INDENT-ON* */
}
static void
@@ -1506,7 +1421,7 @@ vl_api_dns_resolve_ip_t_handler (vl_api_dns_resolve_ip_t * mp)
int i, len;
u8 *lookup_name = 0;
u8 digit, nybble;
- dns_pending_request_t _t0, *t0 = &_t0;
+ dns_pending_request_t _t0 = { 0 }, *t0 = &_t0;
if (mp->is_ip6)
{
@@ -1561,13 +1476,11 @@ vl_api_dns_resolve_ip_t_handler (vl_api_dns_resolve_ip_t * mp)
if (ep == 0)
return;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_DNS_RESOLVE_IP_REPLY,
({
rv = vnet_dns_response_to_name (ep->dns_response, rmp, 0 /* ttl-ptr */);
rmp->retval = clib_host_to_net_u32 (rv);
}));
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -2177,7 +2090,6 @@ format_dns_cache (u8 * s, va_list * args)
if (verbose > 0)
{
- /* *INDENT-OFF* */
pool_foreach (ep, dm->entries)
{
if (ep->flags & DNS_CACHE_ENTRY_FLAG_VALID)
@@ -2216,7 +2128,6 @@ format_dns_cache (u8 * s, va_list * args)
}
vec_add1 (s, '\n');
}
- /* *INDENT-ON* */
}
dns_cache_unlock (dm);
@@ -2251,14 +2162,12 @@ show_dns_cache_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_dns_cache_command) =
{
.path = "show dns cache",
.short_help = "show dns cache [verbose [nn]]",
.function = show_dns_cache_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_dns_servers_command_fn (vlib_main_t * vm,
@@ -2288,14 +2197,12 @@ show_dns_servers_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_dns_server_command) =
{
.path = "show dns servers",
.short_help = "show dns servers",
.function = show_dns_servers_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -2390,14 +2297,12 @@ dns_cache_add_del_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dns_cache_add_del_command) =
{
.path = "dns cache",
.short_help = "dns cache [add|del|clear] <name> [ip4][ip6]",
.function = dns_cache_add_del_command_fn,
};
-/* *INDENT-ON* */
#define DNS_FORMAT_TEST 1
@@ -2638,14 +2543,12 @@ test_dns_fmt_command_fn (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_dns_fmt_command) =
{
.path = "test dns format",
.short_help = "test dns format",
.function = test_dns_fmt_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
test_dns_unfmt_command_fn (vlib_main_t * vm,
@@ -2678,14 +2581,12 @@ test_dns_unfmt_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_dns_unfmt_command) =
{
.path = "test dns unformat",
.short_help = "test dns unformat <name> [ip4][ip6]",
.function = test_dns_unfmt_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
test_dns_expire_command_fn (vlib_main_t * vm,
@@ -2699,10 +2600,7 @@ test_dns_expire_command_fn (vlib_main_t * vm,
dns_cache_entry_t *ep;
if (unformat (input, "%v", &name))
- {
- vec_add1 (name, 0);
- _vec_len (name) -= 1;
- }
+ dns_terminate_c_string (&name);
else
return clib_error_return (0, "no name provided");
@@ -2724,14 +2622,12 @@ test_dns_expire_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_dns_expire_command) =
{
.path = "test dns expire",
.short_help = "test dns expire <name>",
.function = test_dns_expire_command_fn,
};
-/* *INDENT-ON* */
#endif
void
@@ -2749,13 +2645,7 @@ vnet_send_dns4_reply (vlib_main_t * vm, dns_main_t * dm,
vlib_buffer_t * b0)
{
u32 bi = 0;
- fib_prefix_t prefix;
- fib_node_index_t fei;
- u32 sw_if_index, fib_index;
- ip4_main_t *im4 = &ip4_main;
- ip_lookup_main_t *lm4 = &im4->lookup_main;
- ip_interface_address_t *ia = 0;
- ip4_address_t *src_address;
+ ip4_address_t src_address;
ip4_header_t *ip;
udp_header_t *udp;
dns_header_t *dh;
@@ -2839,50 +2729,9 @@ vnet_send_dns4_reply (vlib_main_t * vm, dns_main_t * dm,
vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; /* "local0" */
vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0; /* default VRF for now */
- /* Find a FIB path to the peer we're trying to answer */
- clib_memcpy (&prefix.fp_addr.ip4, pr->dst_address, sizeof (ip4_address_t));
- prefix.fp_proto = FIB_PROTOCOL_IP4;
- prefix.fp_len = 32;
-
- fib_index = fib_table_find (prefix.fp_proto, 0 /* default VRF for now */ );
- if (fib_index == (u32) ~ 0)
- {
- clib_warning ("no fib table");
- return;
- }
-
- fei = fib_table_lookup (fib_index, &prefix);
-
- /* Couldn't find route to destination. Bail out. */
- if (fei == FIB_NODE_INDEX_INVALID)
- {
- clib_warning ("no route to DNS server");
- return;
- }
-
- sw_if_index = fib_entry_get_resolving_interface (fei);
-
- if (sw_if_index == ~0)
- {
- clib_warning (
- "route to %U exists, fei %d, get_resolving_interface returned"
- " ~0",
- format_ip4_address, &prefix.fp_addr, fei);
- return;
- }
-
- /* *INDENT-OFF* */
- foreach_ip_interface_address(lm4, ia, sw_if_index, 1 /* honor unnumbered */,
- ({
- src_address = ip_interface_address_get_address (lm4, ia);
- goto found_src_address;
- }));
- /* *INDENT-ON* */
-
- clib_warning ("FIB BUG");
- return;
-
-found_src_address:
+ if (!ip4_sas (0 /* default VRF for now */, ~0,
+ (const ip4_address_t *) &pr->dst_address, &src_address))
+ return;
ip = vlib_buffer_get_current (b0);
udp = (udp_header_t *) (ip + 1);
@@ -2975,7 +2824,7 @@ found_src_address:
ip->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
ip->ttl = 255;
ip->protocol = IP_PROTOCOL_UDP;
- ip->src_address.as_u32 = src_address->as_u32;
+ ip->src_address.as_u32 = src_address.as_u32;
clib_memcpy (ip->dst_address.as_u8, pr->dst_address,
sizeof (ip4_address_t));
ip->checksum = ip4_header_checksum (ip);
@@ -3020,7 +2869,6 @@ dns_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (dns_init) = {
.init_order = VLIB_INITS ("flow_classify_init", "dns_init"),
};
@@ -3030,7 +2878,6 @@ VLIB_PLUGIN_REGISTER () =
.version = VPP_BUILD_VER,
.description = "Simple DNS name resolver",
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/dns/dns_packet.h b/src/plugins/dns/dns_packet.h
index da5ddfa64fe..13daf7849de 100644
--- a/src/plugins/dns/dns_packet.h
+++ b/src/plugins/dns/dns_packet.h
@@ -20,7 +20,6 @@
* DNS packet header format
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u16 id; /**< transaction ID */
u16 flags; /**< flags */
@@ -29,7 +28,6 @@ typedef CLIB_PACKED (struct {
u16 nscount; /**< number of name servers */
u16 arcount; /**< number of additional records */
}) dns_header_t;
-/* *INDENT-ON* */
#define DNS_RCODE_MASK (0xf)
#define DNS_RCODE_NO_ERROR 0
@@ -99,17 +97,14 @@ typedef CLIB_PACKED (struct {
/**
* DNS "question" fixed header.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u16 type; /**< record type requested */
u16 class; /**< class, 1 = internet */
}) dns_query_t;
-/* *INDENT-ON* */
/**
* DNS RR fixed header.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u16 type; /**< record type */
u16 class; /**< class, 1 = internet */
@@ -118,7 +113,6 @@ typedef CLIB_PACKED (struct {
/**< length of r */
u8 rdata[0];
}) dns_rr_t;
-/* *INDENT-ON* */
/*
* There are quite a number of DNS record types
diff --git a/src/plugins/dns/reply_node.c b/src/plugins/dns/reply_node.c
index b15943a6448..cc9f6065474 100644
--- a/src/plugins/dns/reply_node.c
+++ b/src/plugins/dns/reply_node.c
@@ -200,7 +200,6 @@ dns46_reply_node_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dns46_reply_node) =
{
.function = dns46_reply_node_fn,
@@ -216,7 +215,6 @@ VLIB_REGISTER_NODE (dns46_reply_node) =
[DNS46_REPLY_NEXT_PUNT] = "error-punt",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dns/request_node.c b/src/plugins/dns/request_node.c
index 72a76d12e4f..13ebc4c2c13 100644
--- a/src/plugins/dns/request_node.c
+++ b/src/plugins/dns/request_node.c
@@ -208,13 +208,7 @@ dns46_request_inline (vlib_main_t * vm,
label0 = (u8 *) (d0 + 1);
- /*
- * vnet_dns_labels_to_name produces a non NULL terminated vector
- * vnet_dns_resolve_name expects a C-string.
- */
name0 = vnet_dns_labels_to_name (label0, (u8 *) d0, (u8 **) & q0);
- vec_add1 (name0, 0);
- _vec_len (name0) -= 1;
t0->request_type = DNS_PEER_PENDING_NAME_TO_IP;
@@ -242,6 +236,11 @@ dns46_request_inline (vlib_main_t * vm,
clib_memcpy_fast (t0->dst_address, ip40->src_address.as_u8,
sizeof (ip4_address_t));
+ /*
+ * vnet_dns_labels_to_name produces a non NULL terminated vector
+ * vnet_dns_resolve_name expects a C-string.
+ */
+ vec_add1 (name0, 0);
vnet_dns_resolve_name (vm, dm, name0, t0, &ep0);
if (ep0)
@@ -289,7 +288,6 @@ dns4_request_node_fn (vlib_main_t * vm,
return dns46_request_inline (vm, node, frame, 0 /* is_ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dns4_request_node) =
{
.function = dns4_request_node_fn,
@@ -306,7 +304,6 @@ VLIB_REGISTER_NODE (dns4_request_node) =
[DNS46_REQUEST_NEXT_IP_LOOKUP] = "ip4-lookup",
},
};
-/* *INDENT-ON* */
static uword
dns6_request_node_fn (vlib_main_t * vm,
@@ -316,7 +313,6 @@ dns6_request_node_fn (vlib_main_t * vm,
return dns46_request_inline (vm, node, frame, 1 /* is_ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dns6_request_node) =
{
.function = dns6_request_node_fn,
@@ -333,7 +329,6 @@ VLIB_REGISTER_NODE (dns6_request_node) =
[DNS46_REQUEST_NEXT_IP_LOOKUP] = "ip6-lookup",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dpdk/CMakeLists.txt b/src/plugins/dpdk/CMakeLists.txt
index 5de75e76289..48c56f35282 100644
--- a/src/plugins/dpdk/CMakeLists.txt
+++ b/src/plugins/dpdk/CMakeLists.txt
@@ -90,8 +90,10 @@ else()
##############################################################################
# libnuma
##############################################################################
- vpp_plugin_find_library(dpdk NUMA_LIB "numa")
- list(APPEND DPDK_LINK_LIBRARIES ${NUMA_LIB})
+ if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
+ vpp_plugin_find_library(dpdk NUMA_LIB "numa")
+ list(APPEND DPDK_LINK_LIBRARIES ${NUMA_LIB})
+ endif()
##############################################################################
# Mellanox libraries
@@ -103,12 +105,10 @@ else()
list(APPEND DPDK_LINK_LIBRARIES "${MNL_LIB}")
else()
message(WARNING "EXPERIMENTAL: DPDK plugin without dlopen mode")
- vpp_plugin_find_library(dpdk IBVERBS_LIB "libibverbs.a")
- vpp_plugin_find_library(dpdk MLX5_LIB "libmlx5.a")
- vpp_plugin_find_library(dpdk MLX4_LIB "libmlx4.a")
- vpp_plugin_find_library(dpdk CCAN_LIB "libccan.a")
- vpp_plugin_find_library(dpdk RDMA_UTIL_LIB "rdma_util")
- string_append(DPDK_LINK_FLAGS "-Wl,--whole-archive,${IBVERBS_LIB},${MLX5_LIB},${MLX4_LIB},${CCAN_LIB},${RDMA_UTIL_LIB},--no-whole-archive")
+ vpp_plugin_find_library(dpdk IBVERBS_LIB "libibverbs.a")
+ vpp_plugin_find_library(dpdk MLX5_LIB "libmlx5.a")
+ vpp_plugin_find_library(dpdk MLX4_LIB "libmlx4.a")
+ string_append(DPDK_LINK_FLAGS "-Wl,--whole-archive,${IBVERBS_LIB},${MLX5_LIB},${MLX4_LIB} -Wl,--no-whole-archive,--exclude-libs,ALL")
endif()
endif()
endif()
@@ -131,10 +131,10 @@ add_vpp_plugin(dpdk
SOURCES
buffer.c
main.c
- thread.c
device/cli.c
device/common.c
device/device.c
+ device/driver.c
device/flow.c
device/format.c
device/init.c
@@ -158,7 +158,7 @@ add_vpp_plugin(dpdk
${DPDK_LINK_LIBRARIES}
LINK_LIBRARIES
- ${OPENSSL_LIBRARIES}
+ ${OPENSSL_CRYPTO_LIBRARIES}
COMPONENT
vpp-plugin-dpdk
diff --git a/src/plugins/dpdk/buffer.c b/src/plugins/dpdk/buffer.c
index 97184519428..f3137a996d6 100644
--- a/src/plugins/dpdk/buffer.c
+++ b/src/plugins/dpdk/buffer.c
@@ -19,6 +19,7 @@
#include <rte_config.h>
#include <rte_mbuf.h>
#include <rte_ethdev.h>
+#include <rte_cryptodev.h>
#include <rte_vfio.h>
#include <rte_version.h>
@@ -115,6 +116,9 @@ dpdk_buffer_pool_init (vlib_main_t * vm, vlib_buffer_pool_t * bp)
mp->populated_size++;
nmp->populated_size++;
}
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 3, 0, 0)
+ mp->flags &= ~RTE_MEMPOOL_F_NON_IO;
+#endif
/* call the object initializers */
rte_mempool_obj_iter (mp, rte_pktmbuf_init, 0);
@@ -131,11 +135,11 @@ dpdk_buffer_pool_init (vlib_main_t * vm, vlib_buffer_pool_t * bp)
{
vlib_buffer_t *b;
b = vlib_buffer_ptr_from_index (buffer_mem_start, bp->buffers[i], 0);
- vlib_buffer_copy_template (b, &bp->buffer_template);
+ b->template = bp->buffer_template;
}
/* map DMA pages if at least one physical device exists */
- if (rte_eth_dev_count_avail ())
+ if (rte_eth_dev_count_avail () || rte_cryptodev_count ())
{
uword i;
size_t page_sz;
@@ -193,7 +197,7 @@ dpdk_ops_vpp_free (struct rte_mempool *mp)
#endif
static_always_inline void
-dpdk_ops_vpp_enqueue_one (vlib_buffer_t * bt, void *obj)
+dpdk_ops_vpp_enqueue_one (vlib_buffer_template_t *bt, void *obj)
{
/* Only non-replicated packets (b->ref_count == 1) expected */
@@ -201,7 +205,7 @@ dpdk_ops_vpp_enqueue_one (vlib_buffer_t * bt, void *obj)
vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
ASSERT (b->ref_count == 1);
ASSERT (b->buffer_pool_index == bt->buffer_pool_index);
- vlib_buffer_copy_template (b, bt);
+ b->template = *bt;
}
int
@@ -210,14 +214,14 @@ CLIB_MULTIARCH_FN (dpdk_ops_vpp_enqueue) (struct rte_mempool * mp,
{
const int batch_size = 32;
vlib_main_t *vm = vlib_get_main ();
- vlib_buffer_t bt;
+ vlib_buffer_template_t bt;
u8 buffer_pool_index = mp->pool_id;
vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index);
u32 bufs[batch_size];
u32 n_left = n;
void *const *obj = obj_table;
- vlib_buffer_copy_template (&bt, &bp->buffer_template);
+ bt = bp->buffer_template;
while (n_left >= 4)
{
@@ -259,9 +263,9 @@ CLIB_MULTIARCH_FN (dpdk_ops_vpp_enqueue) (struct rte_mempool * mp,
CLIB_MARCH_FN_REGISTRATION (dpdk_ops_vpp_enqueue);
static_always_inline void
-dpdk_ops_vpp_enqueue_no_cache_one (vlib_main_t * vm, struct rte_mempool *old,
+dpdk_ops_vpp_enqueue_no_cache_one (vlib_main_t *vm, struct rte_mempool *old,
struct rte_mempool *new, void *obj,
- vlib_buffer_t * bt)
+ vlib_buffer_template_t *bt)
{
struct rte_mbuf *mb = obj;
vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
@@ -269,7 +273,7 @@ dpdk_ops_vpp_enqueue_no_cache_one (vlib_main_t * vm, struct rte_mempool *old,
if (clib_atomic_sub_fetch (&b->ref_count, 1) == 0)
{
u32 bi = vlib_get_buffer_index (vm, b);
- vlib_buffer_copy_template (b, bt);
+ b->template = *bt;
vlib_buffer_pool_put (vm, bt->buffer_pool_index, &bi, 1);
return;
}
@@ -281,12 +285,12 @@ CLIB_MULTIARCH_FN (dpdk_ops_vpp_enqueue_no_cache) (struct rte_mempool * cmp,
unsigned n)
{
vlib_main_t *vm = vlib_get_main ();
- vlib_buffer_t bt;
+ vlib_buffer_template_t bt;
struct rte_mempool *mp;
mp = dpdk_mempool_by_buffer_pool_index[cmp->pool_id];
u8 buffer_pool_index = cmp->pool_id;
vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index);
- vlib_buffer_copy_template (&bt, &bp->buffer_template);
+ bt = bp->buffer_template;
while (n >= 4)
{
@@ -456,11 +460,9 @@ dpdk_buffer_pools_create (vlib_main_t * vm)
ops.dequeue = dpdk_ops_vpp_dequeue_no_cache;
rte_mempool_register_ops (&ops);
- /* *INDENT-OFF* */
vec_foreach (bp, vm->buffer_main->buffer_pools)
if (bp->start && (err = dpdk_buffer_pool_init (vm, bp)))
return err;
- /* *INDENT-ON* */
return 0;
}
diff --git a/src/plugins/dpdk/cryptodev/cryptodev.c b/src/plugins/dpdk/cryptodev/cryptodev.c
index d52fa407ec5..43c2c879aab 100644
--- a/src/plugins/dpdk/cryptodev/cryptodev.c
+++ b/src/plugins/dpdk/cryptodev/cryptodev.c
@@ -29,7 +29,6 @@
#include <rte_cryptodev.h>
#include <rte_crypto_sym.h>
#include <rte_crypto.h>
-#include <rte_cryptodev_pmd.h>
#include <rte_config.h>
#include "cryptodev.h"
@@ -52,12 +51,19 @@ prepare_aead_xform (struct rte_crypto_sym_xform *xform,
xform->type = RTE_CRYPTO_SYM_XFORM_AEAD;
xform->next = 0;
- if (key->alg != VNET_CRYPTO_ALG_AES_128_GCM &&
- key->alg != VNET_CRYPTO_ALG_AES_192_GCM &&
- key->alg != VNET_CRYPTO_ALG_AES_256_GCM)
+ if (key->alg == VNET_CRYPTO_ALG_AES_128_GCM ||
+ key->alg == VNET_CRYPTO_ALG_AES_192_GCM ||
+ key->alg == VNET_CRYPTO_ALG_AES_256_GCM)
+ {
+ aead_xform->algo = RTE_CRYPTO_AEAD_AES_GCM;
+ }
+ else if (key->alg == VNET_CRYPTO_ALG_CHACHA20_POLY1305)
+ {
+ aead_xform->algo = RTE_CRYPTO_AEAD_CHACHA20_POLY1305;
+ }
+ else
return -1;
- aead_xform->algo = RTE_CRYPTO_AEAD_AES_GCM;
aead_xform->op = (op_type == CRYPTODEV_OP_TYPE_ENCRYPT) ?
RTE_CRYPTO_AEAD_OP_ENCRYPT : RTE_CRYPTO_AEAD_OP_DECRYPT;
aead_xform->aad_length = aad_len;
@@ -135,7 +141,7 @@ prepare_linked_xform (struct rte_crypto_sym_xform *xforms,
}
static_always_inline void
-cryptodev_session_del (struct rte_cryptodev_sym_session *sess)
+cryptodev_session_del (cryptodev_session_t *sess)
{
u32 n_devs, i;
@@ -145,9 +151,14 @@ cryptodev_session_del (struct rte_cryptodev_sym_session *sess)
n_devs = rte_cryptodev_count ();
for (i = 0; i < n_devs; i++)
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (rte_cryptodev_sym_session_free (i, sess) == 0)
+ break;
+#else
rte_cryptodev_sym_session_clear (i, sess);
rte_cryptodev_sym_session_free (sess);
+#endif
}
static int
@@ -310,7 +321,7 @@ cryptodev_sess_handler (vlib_main_t *vm, vnet_crypto_key_op_t kop,
if (cryptodev_check_supported_vnet_alg (key) == 0)
return;
- vec_validate (ckey->keys, idx);
+ vec_validate (ckey->keys, vec_len (cmt->per_numa_data) - 1);
vec_foreach_index (i, ckey->keys)
vec_validate (ckey->keys[i], CRYPTODEV_N_OP_TYPES - 1);
}
@@ -322,6 +333,59 @@ cryptodev_key_handler (vlib_main_t *vm, vnet_crypto_key_op_t kop,
cryptodev_sess_handler (vm, kop, idx, 8);
}
+clib_error_t *
+allocate_session_pools (u32 numa_node,
+ cryptodev_session_pool_t *sess_pools_elt, u32 len)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ u8 *name;
+ clib_error_t *error = NULL;
+
+ name = format (0, "vcrypto_sess_pool_%u_%04x%c", numa_node, len, 0);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ sess_pools_elt->sess_pool = rte_cryptodev_sym_session_pool_create (
+ (char *) name, CRYPTODEV_NB_SESSION, cmt->sess_sz, 0, 0, numa_node);
+#else
+ sess_pools_elt->sess_pool = rte_cryptodev_sym_session_pool_create (
+ (char *) name, CRYPTODEV_NB_SESSION, 0, 0, 0, numa_node);
+#endif
+
+ if (!sess_pools_elt->sess_pool)
+ {
+ error = clib_error_return (0, "Not enough memory for mp %s", name);
+ goto clear_mempools;
+ }
+ vec_free (name);
+
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
+ name = format (0, "crypto_sess_pool_%u_%04x%c", numa_node, len, 0);
+ sess_pools_elt->sess_priv_pool = rte_mempool_create (
+ (char *) name, CRYPTODEV_NB_SESSION * (cmt->drivers_cnt), cmt->sess_sz, 0,
+ 0, NULL, NULL, NULL, NULL, numa_node, 0);
+
+ if (!sess_pools_elt->sess_priv_pool)
+ {
+ error = clib_error_return (0, "Not enough memory for mp %s", name);
+ goto clear_mempools;
+ }
+ vec_free (name);
+#endif
+
+clear_mempools:
+ if (error)
+ {
+ vec_free (name);
+ if (sess_pools_elt->sess_pool)
+ rte_mempool_free (sess_pools_elt->sess_pool);
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
+ if (sess_pools_elt->sess_priv_pool)
+ rte_mempool_free (sess_pools_elt->sess_priv_pool);
+#endif
+ return error;
+ }
+ return 0;
+}
+
int
cryptodev_session_create (vlib_main_t *vm, vnet_crypto_key_index_t idx,
u32 aad_len)
@@ -330,52 +394,106 @@ cryptodev_session_create (vlib_main_t *vm, vnet_crypto_key_index_t idx,
cryptodev_numa_data_t *numa_data;
cryptodev_inst_t *dev_inst;
vnet_crypto_key_t *key = vnet_crypto_get_key (idx);
- struct rte_mempool *sess_pool, *sess_priv_pool;
+ struct rte_mempool *sess_pool;
+ cryptodev_session_pool_t *sess_pools_elt;
cryptodev_key_t *ckey = vec_elt_at_index (cmt->keys, idx);
struct rte_crypto_sym_xform xforms_enc[2] = { { 0 } };
struct rte_crypto_sym_xform xforms_dec[2] = { { 0 } };
- struct rte_cryptodev_sym_session *sessions[CRYPTODEV_N_OP_TYPES] = { 0 };
+ cryptodev_session_t *sessions[CRYPTODEV_N_OP_TYPES] = { 0 };
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
+ struct rte_mempool *sess_priv_pool;
+ struct rte_cryptodev_info dev_info;
+#endif
u32 numa_node = vm->numa_node;
- int ret;
+ clib_error_t *error;
+ int ret = 0;
+ u8 found = 0;
numa_data = vec_elt_at_index (cmt->per_numa_data, numa_node);
- sess_pool = numa_data->sess_pool;
- sess_priv_pool = numa_data->sess_priv_pool;
- sessions[CRYPTODEV_OP_TYPE_ENCRYPT] =
- rte_cryptodev_sym_session_create (sess_pool);
- if (!sessions[CRYPTODEV_OP_TYPE_ENCRYPT])
+ clib_spinlock_lock (&cmt->tlock);
+ vec_foreach (sess_pools_elt, numa_data->sess_pools)
{
- ret = -1;
- goto clear_key;
+ if (sess_pools_elt->sess_pool == NULL)
+ {
+ error = allocate_session_pools (numa_node, sess_pools_elt,
+ vec_len (numa_data->sess_pools) - 1);
+ if (error)
+ {
+ ret = -1;
+ goto clear_key;
+ }
+ }
+ if (rte_mempool_avail_count (sess_pools_elt->sess_pool) >= 2)
+ {
+ found = 1;
+ break;
+ }
}
- sessions[CRYPTODEV_OP_TYPE_DECRYPT] =
- rte_cryptodev_sym_session_create (sess_pool);
- if (!sessions[CRYPTODEV_OP_TYPE_DECRYPT])
+ if (found == 0)
{
- ret = -1;
- goto clear_key;
+ vec_add2 (numa_data->sess_pools, sess_pools_elt, 1);
+ error = allocate_session_pools (numa_node, sess_pools_elt,
+ vec_len (numa_data->sess_pools) - 1);
+ if (error)
+ {
+ ret = -1;
+ goto clear_key;
+ }
}
+ sess_pool = sess_pools_elt->sess_pool;
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
+ sess_priv_pool = sess_pools_elt->sess_priv_pool;
+
+ sessions[CRYPTODEV_OP_TYPE_ENCRYPT] =
+ rte_cryptodev_sym_session_create (sess_pool);
+
+ sessions[CRYPTODEV_OP_TYPE_DECRYPT] =
+ rte_cryptodev_sym_session_create (sess_pool);
+#endif
+
if (key->type == VNET_CRYPTO_KEY_TYPE_LINK)
ret = prepare_linked_xform (xforms_enc, CRYPTODEV_OP_TYPE_ENCRYPT, key);
else
ret =
prepare_aead_xform (xforms_enc, CRYPTODEV_OP_TYPE_ENCRYPT, key, aad_len);
if (ret)
- return 0;
+ {
+ ret = -1;
+ goto clear_key;
+ }
if (key->type == VNET_CRYPTO_KEY_TYPE_LINK)
prepare_linked_xform (xforms_dec, CRYPTODEV_OP_TYPE_DECRYPT, key);
else
prepare_aead_xform (xforms_dec, CRYPTODEV_OP_TYPE_DECRYPT, key, aad_len);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ dev_inst = vec_elt_at_index (cmt->cryptodev_inst, 0);
+ u32 dev_id = dev_inst->dev_id;
+ sessions[CRYPTODEV_OP_TYPE_ENCRYPT] =
+ rte_cryptodev_sym_session_create (dev_id, xforms_enc, sess_pool);
+ sessions[CRYPTODEV_OP_TYPE_DECRYPT] =
+ rte_cryptodev_sym_session_create (dev_id, xforms_dec, sess_pool);
+ if (!sessions[CRYPTODEV_OP_TYPE_ENCRYPT] ||
+ !sessions[CRYPTODEV_OP_TYPE_DECRYPT])
+ {
+ ret = -1;
+ goto clear_key;
+ }
+
+ rte_cryptodev_sym_session_opaque_data_set (
+ sessions[CRYPTODEV_OP_TYPE_ENCRYPT], aad_len);
+ rte_cryptodev_sym_session_opaque_data_set (
+ sessions[CRYPTODEV_OP_TYPE_DECRYPT], aad_len);
+#else
vec_foreach (dev_inst, cmt->cryptodev_inst)
{
u32 dev_id = dev_inst->dev_id;
- struct rte_cryptodev *cdev = rte_cryptodev_pmd_get_dev (dev_id);
- u32 driver_id = cdev->driver_id;
+ rte_cryptodev_info_get (dev_id, &dev_info);
+ u32 driver_id = dev_info.driver_id;
/* if the session is already configured for the driver type, avoid
configuring it again to increase the session data's refcnt */
@@ -390,11 +508,12 @@ cryptodev_session_create (vlib_main_t *vm, vnet_crypto_key_index_t idx,
dev_id, sessions[CRYPTODEV_OP_TYPE_DECRYPT], xforms_dec,
sess_priv_pool);
if (ret < 0)
- return ret;
+ goto clear_key;
}
sessions[CRYPTODEV_OP_TYPE_ENCRYPT]->opaque_data = aad_len;
sessions[CRYPTODEV_OP_TYPE_DECRYPT]->opaque_data = aad_len;
+#endif
CLIB_MEMORY_STORE_BARRIER ();
ckey->keys[numa_node][CRYPTODEV_OP_TYPE_ENCRYPT] =
@@ -408,6 +527,7 @@ clear_key:
cryptodev_session_del (sessions[CRYPTODEV_OP_TYPE_ENCRYPT]);
cryptodev_session_del (sessions[CRYPTODEV_OP_TYPE_DECRYPT]);
}
+ clib_spinlock_unlock (&cmt->tlock);
return ret;
}
@@ -459,14 +579,14 @@ cryptodev_assign_resource (cryptodev_engine_thread_t * cet,
return -EBUSY;
vec_foreach_index (idx, cmt->cryptodev_inst)
- {
- cinst = cmt->cryptodev_inst + idx;
- if (cinst->dev_id == cet->cryptodev_id &&
- cinst->q_id == cet->cryptodev_q)
- break;
- }
+ {
+ cinst = cmt->cryptodev_inst + idx;
+ if (cinst->dev_id == cet->cryptodev_id &&
+ cinst->q_id == cet->cryptodev_q)
+ break;
+ }
/* invalid existing worker resource assignment */
- if (idx == vec_len (cmt->cryptodev_inst))
+ if (idx >= vec_len (cmt->cryptodev_inst))
return -EINVAL;
clib_spinlock_lock (&cmt->tlock);
clib_bitmap_set_no_check (cmt->active_cdev_inst_mask, idx, 0);
@@ -547,6 +667,90 @@ VLIB_CLI_COMMAND (show_cryptodev_assignment, static) = {
};
static clib_error_t *
+cryptodev_show_cache_rings_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ u32 thread_index = 0;
+ u16 i;
+ vec_foreach_index (thread_index, cmt->per_thread_data)
+ {
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ u16 head = ring->head;
+ u16 tail = ring->tail;
+ u16 n_cached = (CRYPTODEV_CACHE_QUEUE_SIZE - tail + head) &
+ CRYPTODEV_CACHE_QUEUE_MASK;
+
+ u16 enq_head = ring->enq_head;
+ u16 deq_tail = ring->deq_tail;
+ u16 n_frames_inflight =
+ (enq_head == deq_tail) ?
+ 0 :
+ ((CRYPTODEV_CACHE_QUEUE_SIZE + enq_head - deq_tail) &
+ CRYPTODEV_CACHE_QUEUE_MASK);
+ /* even if some elements of dequeued frame are still pending for deq
+ * we consider the frame as processed */
+ u16 n_frames_processed =
+ ((tail == deq_tail) && (ring->frames[deq_tail].f == 0)) ?
+ 0 :
+ ((CRYPTODEV_CACHE_QUEUE_SIZE - tail + deq_tail) &
+ CRYPTODEV_CACHE_QUEUE_MASK) +
+ 1;
+ /* even if some elements of enqueued frame are still pending for enq
+ * we consider the frame as enqueued */
+ u16 n_frames_pending =
+ (head == enq_head) ? 0 :
+ ((CRYPTODEV_CACHE_QUEUE_SIZE - enq_head + head) &
+ CRYPTODEV_CACHE_QUEUE_MASK) -
+ 1;
+
+ u16 elts_to_enq =
+ (ring->frames[enq_head].n_elts - ring->frames[enq_head].enq_elts_head);
+ u16 elts_to_deq =
+ (ring->frames[deq_tail].n_elts - ring->frames[deq_tail].deq_elts_tail);
+
+ u32 elts_total = 0;
+
+ for (i = 0; i < CRYPTODEV_CACHE_QUEUE_SIZE; i++)
+ elts_total += ring->frames[i].n_elts;
+
+ if (vlib_num_workers () > 0 && thread_index == 0)
+ continue;
+
+ vlib_cli_output (vm, "\n\n");
+ vlib_cli_output (vm, "Frames cached in the ring: %u", n_cached);
+ vlib_cli_output (vm, "Frames cached but not processed: %u",
+ n_frames_pending);
+ vlib_cli_output (vm, "Frames inflight: %u", n_frames_inflight);
+ vlib_cli_output (vm, "Frames processed: %u", n_frames_processed);
+ vlib_cli_output (vm, "Elements total: %u", elts_total);
+ vlib_cli_output (vm, "Elements inflight: %u", cet->inflight);
+ vlib_cli_output (vm, "Head index: %u", head);
+ vlib_cli_output (vm, "Tail index: %u", tail);
+ vlib_cli_output (vm, "Current frame index beeing enqueued: %u",
+ enq_head);
+ vlib_cli_output (vm, "Current frame index being dequeued: %u", deq_tail);
+ vlib_cli_output (vm,
+ "Elements in current frame to be enqueued: %u, waiting "
+ "to be enqueued: %u",
+ ring->frames[enq_head].n_elts, elts_to_enq);
+ vlib_cli_output (vm,
+ "Elements in current frame to be dequeued: %u, waiting "
+ "to be dequeued: %u",
+ ring->frames[deq_tail].n_elts, elts_to_deq);
+ vlib_cli_output (vm, "\n\n");
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_cryptodev_sw_rings, static) = {
+ .path = "show cryptodev cache status",
+ .short_help = "show status of all cryptodev cache rings",
+ .function = cryptodev_show_cache_rings_fn,
+};
+
+static clib_error_t *
cryptodev_set_assignment_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
@@ -643,6 +847,15 @@ cryptodev_configure (vlib_main_t *vm, u32 cryptodev_id)
rte_cryptodev_info_get (cryptodev_id, &info);
+ /* Starting from DPDK 22.11, VPP does not allow heterogeneous crypto devices
+ anymore. Only devices that have the same driver type as the first
+ initialized device can be initialized.
+ */
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (cmt->drivers_cnt == 1 && cmt->driver_id != info.driver_id)
+ return -1;
+#endif
+
if (!(info.feature_flags & RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO))
return -1;
@@ -656,7 +869,9 @@ cryptodev_configure (vlib_main_t *vm, u32 cryptodev_id)
struct rte_cryptodev_qp_conf qp_cfg;
qp_cfg.mp_session = 0;
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
qp_cfg.mp_session_private = 0;
+#endif
qp_cfg.nb_descriptors = CRYPTODEV_NB_CRYPTO_OPS;
ret = rte_cryptodev_queue_pair_setup (cryptodev_id, i, &qp_cfg,
@@ -675,16 +890,30 @@ cryptodev_configure (vlib_main_t *vm, u32 cryptodev_id)
/* start the device */
rte_cryptodev_start (cryptodev_id);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (cmt->drivers_cnt == 0)
+ {
+ cmt->drivers_cnt = 1;
+ cmt->driver_id = info.driver_id;
+ cmt->sess_sz = rte_cryptodev_sym_get_private_session_size (cryptodev_id);
+ }
+#endif
+
for (i = 0; i < info.max_nb_queue_pairs; i++)
{
cryptodev_inst_t *cdev_inst;
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ const char *dev_name = rte_dev_name (info.device);
+#else
+ const char *dev_name = info.device->name;
+#endif
vec_add2(cmt->cryptodev_inst, cdev_inst, 1);
- cdev_inst->desc = vec_new (char, strlen (info.device->name) + 10);
+ cdev_inst->desc = vec_new (char, strlen (dev_name) + 10);
cdev_inst->dev_id = cryptodev_id;
cdev_inst->q_id = i;
- snprintf (cdev_inst->desc, strlen (info.device->name) + 9,
- "%s_q%u", info.device->name, i);
+ snprintf (cdev_inst->desc, strlen (dev_name) + 9, "%s_q%u",
+ info.device->name, i);
}
return 0;
@@ -1016,46 +1245,26 @@ cryptodev_probe (vlib_main_t *vm, u32 n_workers)
return 0;
}
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
static void
-cryptodev_get_max_sz (u32 *max_sess_sz, u32 *max_dp_sz)
+is_drv_unique (u32 driver_id, u32 **unique_drivers)
{
- cryptodev_main_t *cmt = &cryptodev_main;
- cryptodev_inst_t *cinst;
- u32 max_sess = 0, max_dp = 0;
+ u32 *unique_elt;
+ u8 found = 0;
- vec_foreach (cinst, cmt->cryptodev_inst)
+ vec_foreach (unique_elt, *unique_drivers)
{
- u32 sess_sz = rte_cryptodev_sym_get_private_session_size (cinst->dev_id);
- u32 dp_sz = rte_cryptodev_get_raw_dp_ctx_size (cinst->dev_id);
-
- max_sess = clib_max (sess_sz, max_sess);
- max_dp = clib_max (dp_sz, max_dp);
+ if (*unique_elt == driver_id)
+ {
+ found = 1;
+ break;
+ }
}
- *max_sess_sz = max_sess;
- *max_dp_sz = max_dp;
-}
-
-static void
-dpdk_disable_cryptodev_engine (vlib_main_t *vm)
-{
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- cryptodev_main_t *cmt = &cryptodev_main;
- u32 i;
-
- for (i = (vlib_num_workers () > 0); i < tm->n_vlib_mains; i++)
- {
- u32 numa = vlib_get_main_by_index (i)->numa_node;
- cryptodev_numa_data_t *numa_data;
-
- vec_validate (cmt->per_numa_data, numa);
- numa_data = cmt->per_numa_data + numa;
- if (numa_data->sess_pool)
- rte_mempool_free (numa_data->sess_pool);
- if (numa_data->sess_priv_pool)
- rte_mempool_free (numa_data->sess_priv_pool);
- }
+ if (!found)
+ vec_add1 (*unique_drivers, driver_id);
}
+#endif
clib_error_t *
dpdk_cryptodev_init (vlib_main_t * vm)
@@ -1064,30 +1273,53 @@ dpdk_cryptodev_init (vlib_main_t * vm)
vlib_thread_main_t *tm = vlib_get_thread_main ();
cryptodev_engine_thread_t *cet;
cryptodev_numa_data_t *numa_data;
- struct rte_mempool *mp;
+ u32 node;
+ u8 nodes = 0;
u32 skip_master = vlib_num_workers () > 0;
u32 n_workers = tm->n_vlib_mains - skip_master;
- u32 numa = vm->numa_node;
- u32 sess_sz, dp_sz;
u32 eidx;
u32 i;
- u8 *name = 0;
clib_error_t *error;
cmt->iova_mode = rte_eal_iova_mode ();
- vec_validate (cmt->per_numa_data, vm->numa_node);
+ clib_bitmap_foreach (node, tm->cpu_socket_bitmap)
+ {
+ if (node >= nodes)
+ nodes = node;
+ }
+
+ vec_validate (cmt->per_numa_data, nodes);
+ vec_foreach (numa_data, cmt->per_numa_data)
+ {
+ vec_validate (numa_data->sess_pools, 0);
+ }
/* probe all cryptodev devices and get queue info */
if (cryptodev_probe (vm, n_workers) < 0)
+ return 0;
+
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
+ struct rte_cryptodev_info dev_info;
+ cryptodev_inst_t *dev_inst;
+ u32 *unique_drivers = 0;
+ vec_foreach (dev_inst, cmt->cryptodev_inst)
{
- error = clib_error_return (0, "Failed to configure cryptodev");
- goto err_handling;
+ u32 dev_id = dev_inst->dev_id;
+ rte_cryptodev_info_get (dev_id, &dev_info);
+ u32 driver_id = dev_info.driver_id;
+ is_drv_unique (driver_id, &unique_drivers);
+
+ u32 sess_sz =
+ rte_cryptodev_sym_get_private_session_size (dev_inst->dev_id);
+ cmt->sess_sz = clib_max (cmt->sess_sz, sess_sz);
}
- cryptodev_get_max_sz (&sess_sz, &dp_sz);
+ cmt->drivers_cnt = vec_len (unique_drivers);
+ vec_free (unique_drivers);
+#endif
- clib_bitmap_vec_validate (cmt->active_cdev_inst_mask, tm->n_vlib_mains);
+ clib_bitmap_vec_validate (cmt->active_cdev_inst_mask, n_workers);
clib_spinlock_init (&cmt->tlock);
vec_validate_aligned(cmt->per_thread_data, tm->n_vlib_mains - 1,
@@ -1095,46 +1327,13 @@ dpdk_cryptodev_init (vlib_main_t * vm)
for (i = skip_master; i < tm->n_vlib_mains; i++)
{
cet = cmt->per_thread_data + i;
- numa = vlib_get_main_by_index (i)->numa_node;
- vec_validate (cmt->per_numa_data, numa);
- numa_data = vec_elt_at_index (cmt->per_numa_data, numa);
-
- if (!numa_data->sess_pool)
+ if (cryptodev_assign_resource (cet, 0, CRYPTODEV_RESOURCE_ASSIGN_AUTO) <
+ 0)
{
- /* create session pool for the numa node */
- name = format (0, "vcryptodev_sess_pool_%u%c", numa, 0);
- mp = rte_cryptodev_sym_session_pool_create (
- (char *) name, CRYPTODEV_NB_SESSION, 0, 0, 0, numa);
- if (!mp)
- {
- error =
- clib_error_return (0, "Not enough memory for mp %s", name);
- goto err_handling;
- }
- vec_free (name);
-
- numa_data->sess_pool = mp;
-
- /* create session private pool for the numa node */
- name = format (0, "cryptodev_sess_pool_%u%c", numa, 0);
- mp =
- rte_mempool_create ((char *) name, CRYPTODEV_NB_SESSION, sess_sz,
- 0, 0, NULL, NULL, NULL, NULL, numa, 0);
- if (!mp)
- {
- error =
- clib_error_return (0, "Not enough memory for mp %s", name);
- vec_free (name);
- goto err_handling;
- }
-
- vec_free (name);
-
- numa_data->sess_priv_pool = mp;
+ error = clib_error_return (0, "Failed to configure cryptodev");
+ goto err_handling;
}
-
- cryptodev_assign_resource (cet, 0, CRYPTODEV_RESOURCE_ASSIGN_AUTO);
}
/* register handler */
@@ -1154,13 +1353,10 @@ dpdk_cryptodev_init (vlib_main_t * vm)
/* this engine is only enabled when cryptodev device(s) are presented in
* startup.conf. Assume it is wanted to be used, turn on async mode here.
*/
- vnet_crypto_request_async_mode (1);
ipsec_set_async_mode (1);
return 0;
err_handling:
- dpdk_disable_cryptodev_engine (vm);
-
return error;
}
diff --git a/src/plugins/dpdk/cryptodev/cryptodev.h b/src/plugins/dpdk/cryptodev/cryptodev.h
index 3b47b43f538..7cd525dac56 100644
--- a/src/plugins/dpdk/cryptodev/cryptodev.h
+++ b/src/plugins/dpdk/cryptodev/cryptodev.h
@@ -26,11 +26,13 @@
#define CRYPTODEV_CACHE_QUEUE_MASK (VNET_CRYPTO_FRAME_POOL_SIZE - 1)
#define CRYPTODEV_MAX_INFLIGHT (CRYPTODEV_NB_CRYPTO_OPS - 1)
#define CRYPTODEV_AAD_MASK (CRYPTODEV_NB_CRYPTO_OPS - 1)
-#define CRYPTODEV_DEQ_CACHE_SZ 32
-#define CRYPTODEV_NB_SESSION 10240
+#define CRYPTODE_ENQ_MAX 64
+#define CRYPTODE_DEQ_MAX 64
+#define CRYPTODEV_NB_SESSION 4096
#define CRYPTODEV_MAX_IV_SIZE 16
#define CRYPTODEV_MAX_AAD_SIZE 16
#define CRYPTODEV_MAX_N_SGL 8 /**< maximum number of segments */
+#define CRYPTODEV_MAX_PROCESED_IN_CACHE_QUEUE 8
#define CRYPTODEV_IV_OFFSET (offsetof (cryptodev_op_t, iv))
#define CRYPTODEV_AAD_OFFSET (offsetof (cryptodev_op_t, aad))
@@ -43,7 +45,10 @@
_ (AES_192_GCM, AEAD, AES_GCM, 12, 16, 8, 24) \
_ (AES_192_GCM, AEAD, AES_GCM, 12, 16, 12, 24) \
_ (AES_256_GCM, AEAD, AES_GCM, 12, 16, 8, 32) \
- _ (AES_256_GCM, AEAD, AES_GCM, 12, 16, 12, 32)
+ _ (AES_256_GCM, AEAD, AES_GCM, 12, 16, 12, 32) \
+ _ (CHACHA20_POLY1305, AEAD, CHACHA20_POLY1305, 12, 16, 0, 32) \
+ _ (CHACHA20_POLY1305, AEAD, CHACHA20_POLY1305, 12, 16, 8, 32) \
+ _ (CHACHA20_POLY1305, AEAD, CHACHA20_POLY1305, 12, 16, 12, 32)
/**
* crypto (alg, cryptodev_alg, key_size), hash (alg, digest-size)
@@ -66,7 +71,10 @@
_ (AES_256_CBC, AES_CBC, 32, SHA384, 24) \
_ (AES_128_CBC, AES_CBC, 16, SHA512, 32) \
_ (AES_192_CBC, AES_CBC, 24, SHA512, 32) \
- _ (AES_256_CBC, AES_CBC, 32, SHA512, 32)
+ _ (AES_256_CBC, AES_CBC, 32, SHA512, 32) \
+ _ (AES_128_CTR, AES_CTR, 16, SHA1, 12) \
+ _ (AES_192_CTR, AES_CTR, 24, SHA1, 12) \
+ _ (AES_256_CTR, AES_CTR, 32, SHA1, 12)
typedef enum
{
@@ -75,10 +83,16 @@ typedef enum
CRYPTODEV_N_OP_TYPES,
} cryptodev_op_type_t;
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+typedef void cryptodev_session_t;
+#else
+typedef struct rte_cryptodev_sym_session cryptodev_session_t;
+#endif
+
/* Cryptodev session data, one data per direction per numa */
typedef struct
{
- struct rte_cryptodev_sym_session ***keys;
+ cryptodev_session_t ***keys;
} cryptodev_key_t;
/* Replicate DPDK rte_cryptodev_sym_capability structure with key size ranges
@@ -119,7 +133,14 @@ typedef struct
typedef struct
{
struct rte_mempool *sess_pool;
+#if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
struct rte_mempool *sess_priv_pool;
+#endif
+} cryptodev_session_pool_t;
+
+typedef struct
+{
+ cryptodev_session_pool_t *sess_pools;
} cryptodev_numa_data_t;
typedef struct
@@ -135,26 +156,71 @@ typedef struct
typedef struct
{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- vlib_buffer_t *b[VNET_CRYPTO_FRAME_SIZE];
+ vnet_crypto_async_frame_t *f;
union
{
struct
{
- cryptodev_op_t **cops;
- struct rte_mempool *cop_pool;
- struct rte_ring *ring;
+ /* index of frame elt where enque to
+ * the crypto engine is happening */
+ u8 enq_elts_head;
+ /* index of the frame elt where dequeue
+ * from the crypto engine is happening */
+ u8 deq_elts_tail;
+ u8 elts_inflight;
+
+ u8 op_type;
+ u8 aad_len;
+ u8 n_elts;
+ u16 reserved;
};
+ u64 raw;
+ };
+
+ u64 frame_elts_errs_mask;
+} cryptodev_cache_ring_elt_t;
+
+typedef struct
+{
+ cryptodev_cache_ring_elt_t frames[VNET_CRYPTO_FRAME_POOL_SIZE];
+
+ union
+ {
+ struct
+ {
+ /* head of the cache ring */
+ u16 head;
+ /* tail of the cache ring */
+ u16 tail;
+ /* index of the frame where enqueue
+ * to the crypto engine is happening */
+ u16 enq_head;
+ /* index of the frame where dequeue
+ * from the crypto engine is happening */
+ u16 deq_tail;
+ };
+ u64 raw;
+ };
+} cryptodev_cache_ring_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vlib_buffer_t *b[VNET_CRYPTO_FRAME_SIZE];
+ union
+ {
+ struct rte_mempool *cop_pool;
struct
{
struct rte_crypto_raw_dp_ctx *ctx;
- struct rte_ring *cached_frame;
u16 aad_index;
u8 *aad_buf;
u64 aad_phy_addr;
- struct rte_cryptodev_sym_session *reset_sess;
+ cryptodev_session_t *reset_sess;
};
};
+
+ cryptodev_cache_ring_t cache_ring;
u16 cryptodev_id;
u16 cryptodev_q;
u16 inflight;
@@ -170,20 +236,122 @@ typedef struct
clib_bitmap_t *active_cdev_inst_mask;
clib_spinlock_t tlock;
cryptodev_capability_t *supported_caps;
+ u32 sess_sz;
+ u32 drivers_cnt;
u8 is_raw_api;
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ u8 driver_id;
+#endif
} cryptodev_main_t;
extern cryptodev_main_t cryptodev_main;
+#define CRYPTODEV_CACHE_RING_GET_FRAME(r, i) \
+ ((r)->frames[(i) &CRYPTODEV_CACHE_QUEUE_MASK].f)
+
+#define CRYPTODEV_CACHE_RING_GET_ERR_MASK(r, i) \
+ ((r)->frames[(i) &CRYPTODEV_CACHE_QUEUE_MASK].frame_elts_errs_mask)
+
+#define CRYPTODEV_CACHE_RING_GET_FRAME_ELTS_INFLIGHT(r, i) \
+ (((r)->frames[(i) &CRYPTODEV_CACHE_QUEUE_MASK].enq_elts_head) - \
+ ((r)->frames[(i) &CRYPTODEV_CACHE_QUEUE_MASK].deq_elts_tail))
+
static_always_inline void
-cryptodev_mark_frame_err_status (vnet_crypto_async_frame_t *f,
- vnet_crypto_op_status_t s)
+cryptodev_cache_ring_update_enq_head (cryptodev_cache_ring_t *r,
+ vnet_crypto_async_frame_t *f)
+{
+ if (r->frames[r->enq_head].enq_elts_head == f->n_elts)
+ {
+ r->enq_head++;
+ r->enq_head &= CRYPTODEV_CACHE_QUEUE_MASK;
+ f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED;
+ }
+}
+
+static_always_inline bool
+cryptodev_cache_ring_update_deq_tail (cryptodev_cache_ring_t *r,
+ u16 *const deq)
+{
+ if (r->frames[*deq].deq_elts_tail == r->frames[*deq].n_elts)
+ {
+ *deq += 1;
+ *deq &= CRYPTODEV_CACHE_QUEUE_MASK;
+ return 1;
+ }
+
+ return 0;
+}
+static_always_inline u64
+cryptodev_mark_frame_fill_err (vnet_crypto_async_frame_t *f, u64 current_err,
+ u16 index, u16 n, vnet_crypto_op_status_t op_s)
+{
+ u64 err = current_err;
+ u16 i;
+
+ ERROR_ASSERT (index + n <= VNET_CRYPTO_FRAME_SIZE);
+ ERROR_ASSERT (op_s != VNET_CRYPTO_OP_STATUS_COMPLETED);
+
+ for (i = index; i < (index + n); i++)
+ f->elts[i].status = op_s;
+
+ err |= (~(~(0ull) << n) << index);
+
+ return err;
+}
+
+static_always_inline cryptodev_cache_ring_elt_t *
+cryptodev_cache_ring_push (cryptodev_cache_ring_t *r,
+ vnet_crypto_async_frame_t *f)
+{
+ u16 head = r->head;
+ u16 tail = r->tail;
+
+ cryptodev_cache_ring_elt_t *ring_elt = &r->frames[head];
+ /**
+ * in debug mode we do the ring sanity test when a frame is enqueued to
+ * the ring.
+ **/
+#if CLIB_DEBUG > 0
+ u16 n_cached = (head >= tail) ? (head - tail) :
+ (CRYPTODEV_CACHE_QUEUE_MASK - tail + head);
+ ERROR_ASSERT (n_cached < CRYPTODEV_CACHE_QUEUE_SIZE);
+ ERROR_ASSERT (r->raw == 0 && r->frames[head].raw == 0 &&
+ r->frames[head].f == 0);
+#endif
+ /*the ring capacity is CRYPTODEV_CACHE_QUEUE_SIZE - 1*/
+ if (PREDICT_FALSE (head + 1) == tail)
+ return 0;
+
+ ring_elt->f = f;
+ ring_elt->n_elts = f->n_elts;
+ /* update head */
+ r->head++;
+ r->head &= CRYPTODEV_CACHE_QUEUE_MASK;
+ return ring_elt;
+}
+
+static_always_inline vnet_crypto_async_frame_t *
+cryptodev_cache_ring_pop (cryptodev_cache_ring_t *r)
{
- u32 n_elts = f->n_elts, i;
+ vnet_crypto_async_frame_t *f;
+ u16 tail = r->tail;
+ cryptodev_cache_ring_elt_t *ring_elt = &r->frames[tail];
+
+ ERROR_ASSERT (r->frames[r->head].raw == 0 ? r->head != tail : 1);
+ ERROR_ASSERT (r->frames[tail].raw != 0);
+ ERROR_ASSERT (ring_elt->deq_elts_tail == ring_elt->enq_elts_head &&
+ ring_elt->deq_elts_tail == ring_elt->n_elts);
+
+ f = CRYPTODEV_CACHE_RING_GET_FRAME (r, tail);
+ f->state = CRYPTODEV_CACHE_RING_GET_ERR_MASK (r, r->tail) == 0 ?
+ VNET_CRYPTO_FRAME_STATE_SUCCESS :
+ VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
+
+ clib_memset (ring_elt, 0, sizeof (*ring_elt));
+ r->tail++;
+ r->tail &= CRYPTODEV_CACHE_QUEUE_MASK;
- for (i = 0; i < n_elts; i++)
- f->elts[i].status = s;
- f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED;
+ return f;
}
int cryptodev_session_create (vlib_main_t *vm, vnet_crypto_key_index_t idx,
diff --git a/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c b/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c
index 4545e24fc83..8d55e4fbf0f 100644
--- a/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c
+++ b/src/plugins/dpdk/cryptodev/cryptodev_op_data_path.c
@@ -27,7 +27,6 @@
#include <rte_cryptodev.h>
#include <rte_crypto_sym.h>
#include <rte_crypto.h>
-#include <rte_cryptodev_pmd.h>
#include <rte_ring_peek_zc.h>
#include <rte_config.h>
@@ -68,6 +67,23 @@ cryptodev_get_iova (clib_pmalloc_main_t *pm, enum rte_iova_mode mode,
}
static_always_inline void
+cryptodev_validate_mbuf (struct rte_mbuf *mb, vlib_buffer_t *b)
+{
+ /* on vnet side vlib_buffer current_length is updated by cipher padding and
+ * icv_sh. mbuf needs to be sync with these changes */
+ u16 data_len = b->current_length +
+ (b->data + b->current_data - rte_pktmbuf_mtod (mb, u8 *));
+
+ /* for input nodes that are not dpdk-input, it is possible the mbuf
+ * was updated before as one of the chained mbufs. Setting nb_segs
+ * to 1 here to prevent the cryptodev PMD to access potentially
+ * invalid m_src->next pointers.
+ */
+ mb->nb_segs = 1;
+ mb->pkt_len = mb->data_len = data_len;
+}
+
+static_always_inline void
cryptodev_validate_mbuf_chain (vlib_main_t *vm, struct rte_mbuf *mb,
vlib_buffer_t *b)
{
@@ -125,39 +141,66 @@ cryptodev_frame_linked_algs_enqueue (vlib_main_t *vm,
cryptodev_op_type_t op_type)
{
cryptodev_main_t *cmt = &cryptodev_main;
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ ERROR_ASSERT (frame != 0);
+ ERROR_ASSERT (frame->n_elts > 0);
+ cryptodev_cache_ring_elt_t *ring_elt =
+ cryptodev_cache_ring_push (ring, frame);
+
+ if (PREDICT_FALSE (ring_elt == NULL))
+ return -1;
+
+ ring_elt->aad_len = 1;
+ ring_elt->op_type = (u8) op_type;
+ return 0;
+}
+
+static_always_inline void
+cryptodev_frame_linked_algs_enqueue_internal (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame,
+ cryptodev_op_type_t op_type)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
clib_pmalloc_main_t *pm = vm->physmem_main.pmalloc_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ u16 *const enq = &ring->enq_head;
vnet_crypto_async_frame_elt_t *fe;
- struct rte_cryptodev_sym_session *sess = 0;
- cryptodev_op_t **cop;
- u32 *bi;
+ cryptodev_session_t *sess = 0;
+ cryptodev_op_t *cops[CRYPTODE_ENQ_MAX] = {};
+ cryptodev_op_t **cop = cops;
+ u32 *bi = 0;
u32 n_enqueue, n_elts;
u32 last_key_index = ~0;
+ u32 max_to_enq;
if (PREDICT_FALSE (frame == 0 || frame->n_elts == 0))
- return -1;
- n_elts = frame->n_elts;
+ return;
- if (PREDICT_FALSE (CRYPTODEV_NB_CRYPTO_OPS - cet->inflight < n_elts))
- {
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
- }
+ max_to_enq = clib_min (CRYPTODE_ENQ_MAX,
+ frame->n_elts - ring->frames[*enq].enq_elts_head);
+
+ if (cet->inflight + max_to_enq > CRYPTODEV_MAX_INFLIGHT)
+ return;
+
+ n_elts = max_to_enq;
if (PREDICT_FALSE (
- rte_mempool_get_bulk (cet->cop_pool, (void **) cet->cops, n_elts) < 0))
+ rte_mempool_get_bulk (cet->cop_pool, (void **) cops, n_elts) < 0))
{
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
+ cryptodev_mark_frame_fill_err (
+ frame, ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ ring->frames[*enq].deq_elts_tail += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
+ return;
}
- cop = cet->cops;
- fe = frame->elts;
- bi = frame->buffer_indices;
- cop[0]->frame = frame;
- cop[0]->n_elts = n_elts;
+ fe = frame->elts + ring->frames[*enq].enq_elts_head;
+ bi = frame->buffer_indices + ring->frames[*enq].enq_elts_head;
while (n_elts)
{
@@ -169,8 +212,8 @@ cryptodev_frame_linked_algs_enqueue (vlib_main_t *vm,
if (n_elts > 2)
{
- CLIB_PREFETCH (cop[1], CLIB_CACHE_LINE_BYTES * 3, STORE);
- CLIB_PREFETCH (cop[2], CLIB_CACHE_LINE_BYTES * 3, STORE);
+ CLIB_PREFETCH (cop[1], sizeof (*cop[1]), STORE);
+ CLIB_PREFETCH (cop[2], sizeof (*cop[2]), STORE);
clib_prefetch_load (&fe[1]);
clib_prefetch_load (&fe[2]);
}
@@ -184,9 +227,11 @@ cryptodev_frame_linked_algs_enqueue (vlib_main_t *vm,
if (PREDICT_FALSE (
cryptodev_session_create (vm, last_key_index, 0) < 0))
{
- cryptodev_mark_frame_err_status (
- frame, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
+ cryptodev_mark_frame_fill_err (
+ frame, ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ goto error_exit;
}
}
sess = key->keys[vm->numa_node][op_type];
@@ -216,26 +261,29 @@ cryptodev_frame_linked_algs_enqueue (vlib_main_t *vm,
if (PREDICT_FALSE (fe->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS))
cryptodev_validate_mbuf_chain (vm, sop->m_src, b);
else
- /* for input nodes that are not dpdk-input, it is possible the mbuf
- * was updated before as one of the chained mbufs. Setting nb_segs
- * to 1 here to prevent the cryptodev PMD to access potentially
- * invalid m_src->next pointers.
- */
- sop->m_src->nb_segs = 1;
+ cryptodev_validate_mbuf (sop->m_src, b);
+
clib_memcpy_fast (cop[0]->iv, fe->iv, 16);
+ ring->frames[*enq].enq_elts_head++;
cop++;
bi++;
fe++;
n_elts--;
}
- n_enqueue = rte_cryptodev_enqueue_burst (cet->cryptodev_id, cet->cryptodev_q,
- (struct rte_crypto_op **) cet->cops,
- frame->n_elts);
- ASSERT (n_enqueue == frame->n_elts);
- cet->inflight += n_enqueue;
+ n_enqueue =
+ rte_cryptodev_enqueue_burst (cet->cryptodev_id, cet->cryptodev_q,
+ (struct rte_crypto_op **) cops, max_to_enq);
+ ERROR_ASSERT (n_enqueue == max_to_enq);
+ cet->inflight += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
+ return;
- return 0;
+error_exit:
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ ring->frames[*enq].deq_elts_tail += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
+ rte_mempool_put_bulk (cet->cop_pool, (void **) cops, max_to_enq);
}
static_always_inline int
@@ -244,39 +292,64 @@ cryptodev_frame_aead_enqueue (vlib_main_t *vm,
cryptodev_op_type_t op_type, u8 aad_len)
{
cryptodev_main_t *cmt = &cryptodev_main;
- clib_pmalloc_main_t *pm = vm->physmem_main.pmalloc_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ ERROR_ASSERT (frame != 0);
+ ERROR_ASSERT (frame->n_elts > 0);
+ cryptodev_cache_ring_elt_t *ring_elt =
+ cryptodev_cache_ring_push (ring, frame);
+
+ if (PREDICT_FALSE (ring_elt == NULL))
+ return -1;
+
+ ring_elt->aad_len = aad_len;
+ ring_elt->op_type = (u8) op_type;
+ return 0;
+}
+
+static_always_inline int
+cryptodev_aead_enqueue_internal (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame,
+ cryptodev_op_type_t op_type, u8 aad_len)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ u16 *const enq = &ring->enq_head;
+ clib_pmalloc_main_t *pm = vm->physmem_main.pmalloc_main;
vnet_crypto_async_frame_elt_t *fe;
- struct rte_cryptodev_sym_session *sess = 0;
- cryptodev_op_t **cop;
- u32 *bi;
+ cryptodev_session_t *sess = 0;
+ cryptodev_op_t *cops[CRYPTODE_ENQ_MAX] = {};
+ cryptodev_op_t **cop = cops;
+ u32 *bi = 0;
u32 n_enqueue = 0, n_elts;
u32 last_key_index = ~0;
+ u16 left_to_enq = frame->n_elts - ring->frames[*enq].enq_elts_head;
+ const u16 max_to_enq = clib_min (CRYPTODE_ENQ_MAX, left_to_enq);
if (PREDICT_FALSE (frame == 0 || frame->n_elts == 0))
return -1;
- n_elts = frame->n_elts;
- if (PREDICT_FALSE (CRYPTODEV_MAX_INFLIGHT - cet->inflight < n_elts))
- {
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
- }
+ if (cet->inflight + max_to_enq > CRYPTODEV_MAX_INFLIGHT)
+ return -1;
+
+ n_elts = max_to_enq;
if (PREDICT_FALSE (
- rte_mempool_get_bulk (cet->cop_pool, (void **) cet->cops, n_elts) < 0))
+ rte_mempool_get_bulk (cet->cop_pool, (void **) cops, n_elts) < 0))
{
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ cryptodev_mark_frame_fill_err (
+ frame, ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ ring->frames[*enq].deq_elts_tail += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
return -1;
}
- cop = cet->cops;
- fe = frame->elts;
- bi = frame->buffer_indices;
- cop[0]->frame = frame;
- cop[0]->n_elts = n_elts;
+ fe = frame->elts + ring->frames[*enq].enq_elts_head;
+ bi = frame->buffer_indices + ring->frames[*enq].enq_elts_head;
while (n_elts)
{
@@ -286,8 +359,8 @@ cryptodev_frame_aead_enqueue (vlib_main_t *vm,
if (n_elts > 2)
{
- CLIB_PREFETCH (cop[1], CLIB_CACHE_LINE_BYTES * 3, STORE);
- CLIB_PREFETCH (cop[2], CLIB_CACHE_LINE_BYTES * 3, STORE);
+ CLIB_PREFETCH (cop[1], sizeof (*cop[1]), STORE);
+ CLIB_PREFETCH (cop[2], sizeof (*cop[2]), STORE);
clib_prefetch_load (&fe[1]);
clib_prefetch_load (&fe[2]);
}
@@ -301,23 +374,32 @@ cryptodev_frame_aead_enqueue (vlib_main_t *vm,
if (PREDICT_FALSE (cryptodev_session_create (vm, last_key_index,
aad_len) < 0))
{
- cryptodev_mark_frame_err_status (
- frame, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
+ cryptodev_mark_frame_fill_err (
+ frame, ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ goto error_exit;
}
}
else if (PREDICT_FALSE (
- key->keys[vm->numa_node][op_type]->opaque_data !=
- aad_len))
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ rte_cryptodev_sym_session_opaque_data_get (
+ key->keys[vm->numa_node][op_type]) != (u64) aad_len
+#else
+ key->keys[vm->numa_node][op_type]->opaque_data != aad_len
+#endif
+ ))
{
cryptodev_sess_handler (vm, VNET_CRYPTO_KEY_OP_DEL,
fe->key_index, aad_len);
if (PREDICT_FALSE (cryptodev_session_create (vm, last_key_index,
aad_len) < 0))
{
- cryptodev_mark_frame_err_status (
- frame, VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
+ cryptodev_mark_frame_fill_err (
+ frame, ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ goto error_exit;
}
}
@@ -348,117 +430,179 @@ cryptodev_frame_aead_enqueue (vlib_main_t *vm,
if (PREDICT_FALSE (fe->flags & VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS))
cryptodev_validate_mbuf_chain (vm, sop->m_src, b);
else
- /* for input nodes that are not dpdk-input, it is possible the mbuf
- * was updated before as one of the chained mbufs. Setting nb_segs
- * to 1 here to prevent the cryptodev PMD to access potentially
- * invalid m_src->next pointers.
- */
- sop->m_src->nb_segs = 1;
+ cryptodev_validate_mbuf (sop->m_src, b);
+
clib_memcpy_fast (cop[0]->iv, fe->iv, 12);
clib_memcpy_fast (cop[0]->aad, fe->aad, aad_len);
+
cop++;
bi++;
fe++;
n_elts--;
}
- n_enqueue = rte_cryptodev_enqueue_burst (cet->cryptodev_id, cet->cryptodev_q,
- (struct rte_crypto_op **) cet->cops,
- frame->n_elts);
- ASSERT (n_enqueue == frame->n_elts);
- cet->inflight += n_enqueue;
+ n_enqueue =
+ rte_cryptodev_enqueue_burst (cet->cryptodev_id, cet->cryptodev_q,
+ (struct rte_crypto_op **) cops, max_to_enq);
+ ERROR_ASSERT (n_enqueue == max_to_enq);
+ cet->inflight += max_to_enq;
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
return 0;
-}
-
-static_always_inline u16
-cryptodev_ring_deq (struct rte_ring *r, cryptodev_op_t **cops)
-{
- u16 n, n_elts = 0;
- n = rte_ring_dequeue_bulk_start (r, (void **) cops, 1, 0);
- rte_ring_dequeue_finish (r, 0);
- if (!n)
- return 0;
-
- n = cops[0]->n_elts;
- if (rte_ring_count (r) < n)
- return 0;
-
- n_elts = rte_ring_sc_dequeue_bulk (r, (void **) cops, n, 0);
- ASSERT (n_elts == n);
+error_exit:
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ ring->frames[*enq].deq_elts_tail += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
+ rte_mempool_put_bulk (cet->cop_pool, (void **) cops, max_to_enq);
- return n_elts;
+ return -1;
}
-static_always_inline vnet_crypto_async_frame_t *
-cryptodev_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
- u32 *enqueue_thread_idx)
+static_always_inline u8
+cryptodev_frame_dequeue_internal (vlib_main_t *vm, u32 *enqueue_thread_idx)
{
cryptodev_main_t *cmt = &cryptodev_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
- cryptodev_op_t **cop = cet->cops;
+ vnet_crypto_async_frame_t *frame = NULL;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ u16 *const deq = &ring->deq_tail;
+ u16 n_deq, left_to_deq;
+ u16 max_to_deq = 0;
+ u16 inflight = cet->inflight;
+ u8 dequeue_more = 0;
+ cryptodev_op_t *cops[CRYPTODE_DEQ_MAX] = {};
+ cryptodev_op_t **cop = cops;
vnet_crypto_async_frame_elt_t *fe;
- vnet_crypto_async_frame_t *frame;
- u32 n_elts, n_completed_ops = rte_ring_count (cet->ring);
- u32 ss0 = 0, ss1 = 0, ss2 = 0, ss3 = 0; /* sum of status */
-
- if (cet->inflight)
- {
- n_elts = rte_cryptodev_dequeue_burst (
- cet->cryptodev_id, cet->cryptodev_q,
- (struct rte_crypto_op **) cet->cops, VNET_CRYPTO_FRAME_SIZE);
+ u32 n_elts, n;
+ u64 err0 = 0, err1 = 0, err2 = 0, err3 = 0; /* partial errors mask */
- if (n_elts)
- {
- cet->inflight -= n_elts;
- n_completed_ops += n_elts;
+ left_to_deq =
+ ring->frames[*deq].f->n_elts - ring->frames[*deq].deq_elts_tail;
+ max_to_deq = clib_min (left_to_deq, CRYPTODE_DEQ_MAX);
- rte_ring_sp_enqueue_burst (cet->ring, (void **) cet->cops, n_elts,
- NULL);
- }
- }
+ /* deq field can be used to track frame that is currently dequeued
+ based on that you can specify the amount of elements to deq for the frame */
+ n_deq =
+ rte_cryptodev_dequeue_burst (cet->cryptodev_id, cet->cryptodev_q,
+ (struct rte_crypto_op **) cops, max_to_deq);
- if (PREDICT_FALSE (n_completed_ops == 0))
- return 0;
+ if (n_deq == 0)
+ return dequeue_more;
- n_elts = cryptodev_ring_deq (cet->ring, cop);
- if (!n_elts)
- return 0;
+ frame = ring->frames[*deq].f;
+ fe = frame->elts + ring->frames[*deq].deq_elts_tail;
- frame = cop[0]->frame;
- fe = frame->elts;
+ n_elts = n_deq;
+ n = ring->frames[*deq].deq_elts_tail;
while (n_elts > 4)
{
- ss0 |= fe[0].status = cryptodev_status_conversion[cop[0]->op.status];
- ss1 |= fe[1].status = cryptodev_status_conversion[cop[1]->op.status];
- ss2 |= fe[2].status = cryptodev_status_conversion[cop[2]->op.status];
- ss3 |= fe[3].status = cryptodev_status_conversion[cop[3]->op.status];
+ fe[0].status = cryptodev_status_conversion[cop[0]->op.status];
+ fe[1].status = cryptodev_status_conversion[cop[1]->op.status];
+ fe[2].status = cryptodev_status_conversion[cop[2]->op.status];
+ fe[3].status = cryptodev_status_conversion[cop[3]->op.status];
+
+ err0 |= ((u64) (fe[0].status == VNET_CRYPTO_OP_STATUS_COMPLETED)) << n;
+ err1 |= ((u64) (fe[1].status == VNET_CRYPTO_OP_STATUS_COMPLETED))
+ << (n + 1);
+ err2 |= ((u64) (fe[2].status == VNET_CRYPTO_OP_STATUS_COMPLETED))
+ << (n + 2);
+ err3 |= ((u64) (fe[3].status == VNET_CRYPTO_OP_STATUS_COMPLETED))
+ << (n + 3);
cop += 4;
fe += 4;
n_elts -= 4;
+ n += 4;
}
while (n_elts)
{
- ss0 |= fe[0].status = cryptodev_status_conversion[cop[0]->op.status];
+ fe[0].status = cryptodev_status_conversion[cop[0]->op.status];
+ err0 |= ((u64) (fe[0].status == VNET_CRYPTO_OP_STATUS_COMPLETED)) << n;
+ n++;
fe++;
cop++;
n_elts--;
}
- frame->state = (ss0 | ss1 | ss2 | ss3) == VNET_CRYPTO_OP_STATUS_COMPLETED ?
- VNET_CRYPTO_FRAME_STATE_SUCCESS :
- VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
+ ring->frames[*deq].frame_elts_errs_mask |= (err0 | err1 | err2 | err3);
- rte_mempool_put_bulk (cet->cop_pool, (void **) cet->cops, frame->n_elts);
- *nb_elts_processed = frame->n_elts;
- *enqueue_thread_idx = frame->enqueue_thread_index;
- return frame;
+ rte_mempool_put_bulk (cet->cop_pool, (void **) cops, n_deq);
+
+ inflight -= n_deq;
+ ring->frames[*deq].deq_elts_tail += n_deq;
+ if (cryptodev_cache_ring_update_deq_tail (ring, deq))
+ {
+ u32 fr_processed =
+ (CRYPTODEV_CACHE_QUEUE_SIZE - ring->tail + ring->deq_tail) &
+ CRYPTODEV_CACHE_QUEUE_MASK;
+
+ *enqueue_thread_idx = frame->enqueue_thread_index;
+ dequeue_more = (fr_processed < CRYPTODEV_MAX_PROCESED_IN_CACHE_QUEUE);
+ }
+
+ cet->inflight = inflight;
+ return dequeue_more;
}
+static_always_inline void
+cryptodev_enqueue_frame (vlib_main_t *vm, cryptodev_cache_ring_elt_t *ring_elt)
+{
+ cryptodev_op_type_t op_type = (cryptodev_op_type_t) ring_elt->op_type;
+ u8 linked_or_aad_len = ring_elt->aad_len;
+
+ if (linked_or_aad_len == 1)
+ cryptodev_frame_linked_algs_enqueue_internal (vm, ring_elt->f, op_type);
+ else
+ cryptodev_aead_enqueue_internal (vm, ring_elt->f, op_type,
+ linked_or_aad_len);
+}
+
+static_always_inline vnet_crypto_async_frame_t *
+cryptodev_frame_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
+ u32 *enqueue_thread_idx)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ vnet_crypto_main_t *cm = &crypto_main;
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ cryptodev_cache_ring_elt_t *ring_elt = &ring->frames[ring->tail];
+
+ vnet_crypto_async_frame_t *ret_frame = 0;
+ u8 dequeue_more = 1;
+
+ while (cet->inflight > 0 && dequeue_more)
+ {
+ dequeue_more = cryptodev_frame_dequeue_internal (vm, enqueue_thread_idx);
+ }
+
+ if (PREDICT_TRUE (ring->frames[ring->enq_head].f != 0))
+ cryptodev_enqueue_frame (vm, &ring->frames[ring->enq_head]);
+
+ if (PREDICT_TRUE (ring_elt->f != 0))
+ {
+ if (ring_elt->n_elts == ring_elt->deq_elts_tail)
+ {
+ *nb_elts_processed = ring_elt->n_elts;
+ vlib_node_set_interrupt_pending (
+ vlib_get_main_by_index (vm->thread_index), cm->crypto_node_index);
+ ret_frame = cryptodev_cache_ring_pop (ring);
+ return ret_frame;
+ }
+ }
+
+ return ret_frame;
+}
+static_always_inline int
+cryptodev_enqueue_aead_aad_0_enc (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
+{
+ return cryptodev_frame_aead_enqueue (vm, frame, CRYPTODEV_OP_TYPE_ENCRYPT,
+ 0);
+}
static_always_inline int
cryptodev_enqueue_aead_aad_8_enc (vlib_main_t *vm,
vnet_crypto_async_frame_t *frame)
@@ -475,6 +619,13 @@ cryptodev_enqueue_aead_aad_12_enc (vlib_main_t *vm,
}
static_always_inline int
+cryptodev_enqueue_aead_aad_0_dec (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
+{
+ return cryptodev_frame_aead_enqueue (vm, frame, CRYPTODEV_OP_TYPE_DECRYPT,
+ 0);
+}
+static_always_inline int
cryptodev_enqueue_aead_aad_8_dec (vlib_main_t *vm,
vnet_crypto_async_frame_t *frame)
{
@@ -515,6 +666,7 @@ cryptodev_register_cop_hdl (vlib_main_t *vm, u32 eidx)
struct rte_cryptodev_sym_capability_idx cap_aead_idx;
u8 *name;
clib_error_t *error = 0;
+ u32 ref_cnt = 0;
vec_foreach (cet, cmt->per_thread_data)
{
@@ -525,43 +677,28 @@ cryptodev_register_cop_hdl (vlib_main_t *vm, u32 eidx)
(char *) name, CRYPTODEV_NB_CRYPTO_OPS, sizeof (cryptodev_op_t), 0,
sizeof (struct rte_crypto_op_pool_private), NULL, NULL, crypto_op_init,
NULL, vm->numa_node, 0);
- if (!cet->cop_pool)
- {
- error = clib_error_return (
- 0, "Failed to create cryptodev op pool %s", name);
-
- goto error_exit;
- }
vec_free (name);
-
- name = format (0, "frames_ring_%u_%u", numa, thread_index);
- cet->ring =
- rte_ring_create ((char *) name, CRYPTODEV_NB_CRYPTO_OPS, vm->numa_node,
- RING_F_SP_ENQ | RING_F_SC_DEQ);
- if (!cet->ring)
+ if (!cet->cop_pool)
{
error = clib_error_return (
0, "Failed to create cryptodev op pool %s", name);
goto error_exit;
}
- vec_free (name);
-
- vec_validate (cet->cops, VNET_CRYPTO_FRAME_SIZE - 1);
}
- /** INDENT-OFF **/
#define _(a, b, c, d, e, f, g) \
cap_aead_idx.type = RTE_CRYPTO_SYM_XFORM_AEAD; \
cap_aead_idx.algo.aead = RTE_CRYPTO_##b##_##c; \
if (cryptodev_check_cap_support (&cap_aead_idx, g, e, f)) \
{ \
- vnet_crypto_register_async_handler ( \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_TAG##e##_AAD##f##_ENC, \
- cryptodev_enqueue_aead_aad_##f##_enc, cryptodev_frame_dequeue); \
- vnet_crypto_register_async_handler ( \
+ cryptodev_enqueue_aead_aad_##f##_enc); \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_TAG##e##_AAD##f##_DEC, \
- cryptodev_enqueue_aead_aad_##f##_dec, cryptodev_frame_dequeue); \
+ cryptodev_enqueue_aead_aad_##f##_dec); \
+ ref_cnt++; \
}
foreach_vnet_aead_crypto_conversion
#undef _
@@ -574,25 +711,25 @@ cryptodev_register_cop_hdl (vlib_main_t *vm, u32 eidx)
if (cryptodev_check_cap_support (&cap_cipher_idx, c, -1, -1) && \
cryptodev_check_cap_support (&cap_auth_idx, -1, e, -1)) \
{ \
- vnet_crypto_register_async_handler ( \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_##d##_TAG##e##_ENC, \
- cryptodev_enqueue_linked_alg_enc, cryptodev_frame_dequeue); \
- vnet_crypto_register_async_handler ( \
+ cryptodev_enqueue_linked_alg_enc); \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_##d##_TAG##e##_DEC, \
- cryptodev_enqueue_linked_alg_dec, cryptodev_frame_dequeue); \
+ cryptodev_enqueue_linked_alg_dec); \
+ ref_cnt++; \
}
foreach_cryptodev_link_async_alg
#undef _
- /** INDENT-ON **/
+
+ if (ref_cnt)
+ vnet_crypto_register_dequeue_handler (vm, eidx, cryptodev_frame_dequeue);
return 0;
error_exit:
vec_foreach (cet, cmt->per_thread_data)
{
- if (cet->ring)
- rte_ring_free (cet->ring);
-
if (cet->cop_pool)
rte_mempool_free (cet->cop_pool);
}
diff --git a/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c b/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c
index 41a1e0c2a09..67ab9c89e67 100644
--- a/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c
+++ b/src/plugins/dpdk/cryptodev/cryptodev_raw_data_path.c
@@ -29,7 +29,7 @@
#include <rte_cryptodev.h>
#include <rte_crypto_sym.h>
#include <rte_crypto.h>
-#include <rte_cryptodev_pmd.h>
+#include <rte_malloc.h>
#include <rte_config.h>
#include "cryptodev.h"
@@ -96,7 +96,7 @@ cryptodev_reset_ctx (cryptodev_engine_thread_t *cet)
{
union rte_cryptodev_session_ctx sess_ctx;
- ASSERT (cet->reset_sess != 0);
+ ERROR_ASSERT (cet->reset_sess != 0);
sess_ctx.crypto_sess = cet->reset_sess;
@@ -112,30 +112,51 @@ cryptodev_frame_linked_algs_enqueue (vlib_main_t *vm,
{
cryptodev_main_t *cmt = &cryptodev_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ ERROR_ASSERT (frame != 0);
+ ERROR_ASSERT (frame->n_elts > 0);
+ cryptodev_cache_ring_elt_t *ring_elt =
+ cryptodev_cache_ring_push (ring, frame);
+
+ if (PREDICT_FALSE (ring_elt == NULL))
+ return -1;
+
+ ring_elt->aad_len = 1;
+ ring_elt->op_type = (u8) op_type;
+ return 0;
+}
+
+static_always_inline void
+cryptodev_frame_linked_algs_enqueue_internal (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame,
+ cryptodev_op_type_t op_type)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
vnet_crypto_async_frame_elt_t *fe;
vlib_buffer_t **b;
struct rte_crypto_vec vec[CRYPTODEV_MAX_N_SGL];
struct rte_crypto_va_iova_ptr iv_vec, digest_vec;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ u16 *const enq = &ring->enq_head;
u32 n_elts;
u32 last_key_index = ~0;
i16 min_ofs;
u32 max_end;
+ u32 max_to_enq = clib_min (CRYPTODE_ENQ_MAX,
+ frame->n_elts - ring->frames[*enq].enq_elts_head);
u8 is_update = 0;
int status;
- n_elts = frame->n_elts;
+ if (cet->inflight + max_to_enq > CRYPTODEV_MAX_INFLIGHT)
+ return;
- if (PREDICT_FALSE (CRYPTODEV_MAX_INFLIGHT - cet->inflight < n_elts))
- {
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
- }
+ n_elts = max_to_enq;
vlib_get_buffers (vm, frame->buffer_indices, cet->b, frame->n_elts);
- b = cet->b;
- fe = frame->elts;
+ b = cet->b + ring->frames[*enq].enq_elts_head;
+ fe = frame->elts + ring->frames[*enq].enq_elts_head;
while (n_elts)
{
@@ -215,26 +236,31 @@ cryptodev_frame_linked_algs_enqueue (vlib_main_t *vm,
if (PREDICT_FALSE (status < 0))
goto error_exit;
+ ring->frames[*enq].enq_elts_head += 1;
b++;
fe++;
n_elts--;
}
- status = rte_cryptodev_raw_enqueue_done (cet->ctx, frame->n_elts);
+ status = rte_cryptodev_raw_enqueue_done (cet->ctx, max_to_enq);
if (PREDICT_FALSE (status < 0))
- {
- cryptodev_reset_ctx (cet);
- return -1;
- }
+ goto error_exit;
- cet->inflight += frame->n_elts;
- return 0;
+ cet->inflight += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
+ return;
error_exit:
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ cryptodev_mark_frame_fill_err (frame,
+ ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ ring->frames[*enq].deq_elts_tail += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
cryptodev_reset_ctx (cet);
- return -1;
+
+ return;
}
static_always_inline int
@@ -243,6 +269,28 @@ cryptodev_raw_aead_enqueue (vlib_main_t *vm, vnet_crypto_async_frame_t *frame,
{
cryptodev_main_t *cmt = &cryptodev_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ ERROR_ASSERT (frame != 0);
+ ERROR_ASSERT (frame->n_elts > 0);
+ cryptodev_cache_ring_elt_t *ring_elt =
+ cryptodev_cache_ring_push (ring, frame);
+
+ if (PREDICT_FALSE (ring_elt == NULL))
+ return -1;
+
+ ring_elt->aad_len = aad_len;
+ ring_elt->op_type = (u8) op_type;
+ return 0;
+}
+
+static_always_inline void
+cryptodev_raw_aead_enqueue_internal (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame,
+ cryptodev_op_type_t op_type, u8 aad_len)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
vnet_crypto_async_frame_elt_t *fe;
vlib_buffer_t **b;
u32 n_elts;
@@ -250,22 +298,23 @@ cryptodev_raw_aead_enqueue (vlib_main_t *vm, vnet_crypto_async_frame_t *frame,
struct rte_crypto_vec vec[CRYPTODEV_MAX_N_SGL];
struct rte_crypto_va_iova_ptr iv_vec, digest_vec, aad_vec;
u32 last_key_index = ~0;
+ u16 *const enq = &ring->enq_head;
+ u16 left_to_enq = frame->n_elts - ring->frames[*enq].enq_elts_head;
+ u16 max_to_enq = clib_min (CRYPTODE_ENQ_MAX, left_to_enq);
u8 is_update = 0;
int status;
- n_elts = frame->n_elts;
-
- if (PREDICT_FALSE (CRYPTODEV_MAX_INFLIGHT - cet->inflight < n_elts))
+ if (cet->inflight + max_to_enq > CRYPTODEV_MAX_INFLIGHT)
{
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
- return -1;
+ return;
}
+ n_elts = max_to_enq;
+
vlib_get_buffers (vm, frame->buffer_indices, cet->b, frame->n_elts);
- fe = frame->elts;
- b = cet->b;
+ fe = frame->elts + ring->frames[*enq].enq_elts_head;
+ b = cet->b + ring->frames[*enq].enq_elts_head;
cofs.raw = 0;
while (n_elts)
@@ -292,8 +341,13 @@ cryptodev_raw_aead_enqueue (vlib_main_t *vm, vnet_crypto_async_frame_t *frame,
}
if (PREDICT_FALSE (
- (u8) key->keys[vm->numa_node][op_type]->opaque_data !=
- aad_len))
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ rte_cryptodev_sym_session_opaque_data_get (
+ key->keys[vm->numa_node][op_type]) != (u64) aad_len
+#else
+ (u8) key->keys[vm->numa_node][op_type]->opaque_data != aad_len
+#endif
+ ))
{
cryptodev_sess_handler (vm, VNET_CRYPTO_KEY_OP_DEL,
fe->key_index, aad_len);
@@ -349,7 +403,7 @@ cryptodev_raw_aead_enqueue (vlib_main_t *vm, vnet_crypto_async_frame_t *frame,
if (aad_len == 8)
*(u64 *) (cet->aad_buf + aad_offset) = *(u64 *) fe->aad;
- else
+ else if (aad_len != 0)
{
/* aad_len == 12 */
*(u64 *) (cet->aad_buf + aad_offset) = *(u64 *) fe->aad;
@@ -373,31 +427,30 @@ cryptodev_raw_aead_enqueue (vlib_main_t *vm, vnet_crypto_async_frame_t *frame,
if (PREDICT_FALSE (status < 0))
goto error_exit;
+ ring->frames[*enq].enq_elts_head += 1;
fe++;
b++;
n_elts--;
}
- status = rte_cryptodev_raw_enqueue_done (cet->ctx, frame->n_elts);
+ status = rte_cryptodev_raw_enqueue_done (cet->ctx, max_to_enq);
if (PREDICT_FALSE (status < 0))
goto error_exit;
- cet->inflight += frame->n_elts;
-
- return 0;
+ cet->inflight += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
+ return;
error_exit:
- cryptodev_mark_frame_err_status (frame,
- VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ cryptodev_mark_frame_fill_err (frame,
+ ring->frames[*enq].frame_elts_errs_mask,
+ ring->frames[*enq].enq_elts_head, max_to_enq,
+ VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR);
+ ring->frames[*enq].enq_elts_head += max_to_enq;
+ ring->frames[*enq].deq_elts_tail += max_to_enq;
+ cryptodev_cache_ring_update_enq_head (ring, frame);
cryptodev_reset_ctx (cet);
- return -1;
-}
-
-static_always_inline u32
-cryptodev_get_frame_n_elts (void *frame)
-{
- vnet_crypto_async_frame_t *f = (vnet_crypto_async_frame_t *) frame;
- return f->n_elts;
+ return;
}
static_always_inline void
@@ -409,180 +462,114 @@ cryptodev_post_dequeue (void *frame, u32 index, u8 is_op_success)
VNET_CRYPTO_OP_STATUS_FAIL_BAD_HMAC;
}
-#define GET_RING_OBJ(r, pos, f) \
- do \
- { \
- vnet_crypto_async_frame_t **ring = (void *) &r[1]; \
- f = ring[(r->cons.head + pos) & r->mask]; \
- } \
- while (0)
-
-static_always_inline vnet_crypto_async_frame_t *
-cryptodev_raw_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
- u32 *enqueue_thread_idx)
+static_always_inline u8
+cryptodev_raw_dequeue_internal (vlib_main_t *vm, u32 *enqueue_thread_idx)
{
cryptodev_main_t *cmt = &cryptodev_main;
cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
- vnet_crypto_async_frame_t *frame, *frame_ret = 0;
- u32 n_deq, n_success;
- u32 n_cached_frame = rte_ring_count (cet->cached_frame), n_room_left;
- u8 no_job_to_deq = 0;
+ vnet_crypto_async_frame_t *frame;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ u16 *const deq = &ring->deq_tail;
+ u32 n_success;
+ u16 n_deq, i, left_to_deq;
+ u16 max_to_deq = 0;
u16 inflight = cet->inflight;
+ u8 dequeue_more = 0;
int dequeue_status;
- n_room_left = CRYPTODEV_DEQ_CACHE_SZ - n_cached_frame - 1;
+ left_to_deq = ring->frames[*deq].n_elts - ring->frames[*deq].deq_elts_tail;
+ max_to_deq = clib_min (left_to_deq, CRYPTODE_DEQ_MAX);
- if (n_cached_frame)
- {
- u32 i;
- for (i = 0; i < n_cached_frame; i++)
- {
- vnet_crypto_async_frame_t *f;
- void *f_ret;
- enum rte_crypto_op_status op_status;
- u8 n_left, err, j;
+ /* deq field can be used to track frame that is currently dequeued */
+ /* based on thatthe amount of elements to deq for the frame can be specified
+ */
- GET_RING_OBJ (cet->cached_frame, i, f);
-
- if (i < n_cached_frame - 2)
- {
- vnet_crypto_async_frame_t *f1, *f2;
- GET_RING_OBJ (cet->cached_frame, i + 1, f1);
- GET_RING_OBJ (cet->cached_frame, i + 2, f2);
- clib_prefetch_load (f1);
- clib_prefetch_load (f2);
- }
-
- n_left = f->state & 0x7f;
- err = f->state & 0x80;
-
- for (j = f->n_elts - n_left; j < f->n_elts && inflight; j++)
- {
- int ret;
- f_ret = rte_cryptodev_raw_dequeue (cet->ctx, &ret, &op_status);
-
- if (!f_ret)
- break;
-
- switch (op_status)
- {
- case RTE_CRYPTO_OP_STATUS_SUCCESS:
- f->elts[j].status = VNET_CRYPTO_OP_STATUS_COMPLETED;
- break;
- default:
- f->elts[j].status = VNET_CRYPTO_OP_STATUS_FAIL_ENGINE_ERR;
- err |= 1 << 7;
- }
-
- inflight--;
- }
+ n_deq = rte_cryptodev_raw_dequeue_burst (
+ cet->ctx, NULL, max_to_deq, cryptodev_post_dequeue, (void **) &frame, 0,
+ &n_success, &dequeue_status);
- if (j == f->n_elts)
- {
- if (i == 0)
- {
- frame_ret = f;
- f->state = err ? VNET_CRYPTO_FRAME_STATE_ELT_ERROR :
- VNET_CRYPTO_FRAME_STATE_SUCCESS;
- }
- else
- {
- f->state = f->n_elts - j;
- f->state |= err;
- }
- if (inflight)
- continue;
- }
+ if (n_deq == 0)
+ return dequeue_more;
- /* to here f is not completed dequeued and no more job can be
- * dequeued
- */
- f->state = f->n_elts - j;
- f->state |= err;
- no_job_to_deq = 1;
- break;
- }
+ inflight -= n_deq;
+ if (PREDICT_FALSE (n_success < n_deq))
+ {
+ u16 idx = ring->frames[*deq].deq_elts_tail;
- if (frame_ret)
+ for (i = 0; i < n_deq; i++)
{
- rte_ring_sc_dequeue (cet->cached_frame, (void **) &frame_ret);
- n_room_left++;
+ if (frame->elts[idx + i].status != VNET_CRYPTO_OP_STATUS_COMPLETED)
+ ring->frames[*deq].frame_elts_errs_mask |= 1 << (idx + i);
}
}
+ ring->frames[*deq].deq_elts_tail += n_deq;
- /* no point to dequeue further */
- if (!inflight || no_job_to_deq || !n_room_left)
- goto end_deq;
+ if (cryptodev_cache_ring_update_deq_tail (ring, deq))
+ {
+ u32 fr_processed =
+ (CRYPTODEV_CACHE_QUEUE_SIZE - ring->tail + ring->deq_tail) &
+ CRYPTODEV_CACHE_QUEUE_MASK;
-#if RTE_VERSION >= RTE_VERSION_NUM(21, 5, 0, 0)
- n_deq = rte_cryptodev_raw_dequeue_burst (
- cet->ctx, cryptodev_get_frame_n_elts, 0, cryptodev_post_dequeue,
- (void **) &frame, 0, &n_success, &dequeue_status);
-#else
- n_deq = rte_cryptodev_raw_dequeue_burst (
- cet->ctx, cryptodev_get_frame_n_elts, cryptodev_post_dequeue,
- (void **) &frame, 0, &n_success, &dequeue_status);
-#endif
+ *enqueue_thread_idx = frame->enqueue_thread_index;
+ dequeue_more = (fr_processed < CRYPTODEV_MAX_PROCESED_IN_CACHE_QUEUE);
+ }
- if (!n_deq)
- goto end_deq;
+ int res =
+ rte_cryptodev_raw_dequeue_done (cet->ctx, cet->inflight - inflight);
+ ERROR_ASSERT (res == 0);
+ cet->inflight = inflight;
+ return dequeue_more;
+}
- inflight -= n_deq;
- no_job_to_deq = n_deq < frame->n_elts;
- /* we have to cache the frame */
- if (frame_ret || n_cached_frame || no_job_to_deq)
- {
- frame->state = frame->n_elts - n_deq;
- frame->state |= ((n_success < n_deq) << 7);
- rte_ring_sp_enqueue (cet->cached_frame, (void *) frame);
- n_room_left--;
- }
+static_always_inline void
+cryptodev_enqueue_frame_to_qat (vlib_main_t *vm,
+ cryptodev_cache_ring_elt_t *ring_elt)
+{
+ cryptodev_op_type_t op_type = (cryptodev_op_type_t) ring_elt->op_type;
+ u8 linked_or_aad_len = ring_elt->aad_len;
+
+ if (linked_or_aad_len == 1)
+ cryptodev_frame_linked_algs_enqueue_internal (vm, ring_elt->f, op_type);
else
- {
- frame->state = n_success == frame->n_elts ?
- VNET_CRYPTO_FRAME_STATE_SUCCESS :
- VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
- frame_ret = frame;
- }
+ cryptodev_raw_aead_enqueue_internal (vm, ring_elt->f, op_type,
+ linked_or_aad_len);
+}
- /* see if we can dequeue more */
- while (inflight && n_room_left && !no_job_to_deq)
- {
-#if RTE_VERSION >= RTE_VERSION_NUM(21, 5, 0, 0)
- n_deq = rte_cryptodev_raw_dequeue_burst (
- cet->ctx, cryptodev_get_frame_n_elts, 0, cryptodev_post_dequeue,
- (void **) &frame, 0, &n_success, &dequeue_status);
-#else
- n_deq = rte_cryptodev_raw_dequeue_burst (
- cet->ctx, cryptodev_get_frame_n_elts, cryptodev_post_dequeue,
- (void **) &frame, 0, &n_success, &dequeue_status);
-#endif
- if (!n_deq)
- break;
- inflight -= n_deq;
- no_job_to_deq = n_deq < frame->n_elts;
- frame->state = frame->n_elts - n_deq;
- frame->state |= ((n_success < n_deq) << 7);
- rte_ring_sp_enqueue (cet->cached_frame, (void *) frame);
- n_room_left--;
- }
+static_always_inline vnet_crypto_async_frame_t *
+cryptodev_raw_dequeue (vlib_main_t *vm, u32 *nb_elts_processed,
+ u32 *enqueue_thread_idx)
+{
+ cryptodev_main_t *cmt = &cryptodev_main;
+ vnet_crypto_main_t *cm = &crypto_main;
+ cryptodev_engine_thread_t *cet = cmt->per_thread_data + vm->thread_index;
+ cryptodev_cache_ring_t *ring = &cet->cache_ring;
+ cryptodev_cache_ring_elt_t *ring_elt = &ring->frames[ring->tail];
+ vnet_crypto_async_frame_t *ret_frame = 0;
+ u8 dequeue_more = 1;
-end_deq:
- if (inflight < cet->inflight)
- {
- int res =
- rte_cryptodev_raw_dequeue_done (cet->ctx, cet->inflight - inflight);
- ASSERT (res == 0);
- cet->inflight = inflight;
- }
+ while (cet->inflight > 0 && dequeue_more)
+ dequeue_more = cryptodev_raw_dequeue_internal (vm, enqueue_thread_idx);
+
+ if (PREDICT_TRUE (ring->frames[ring->enq_head].f != 0))
+ cryptodev_enqueue_frame_to_qat (vm, &ring->frames[ring->enq_head]);
- if (frame_ret)
+ if (PREDICT_TRUE (ring_elt->f != 0) &&
+ (ring_elt->n_elts == ring_elt->deq_elts_tail))
{
- *nb_elts_processed = frame_ret->n_elts;
- *enqueue_thread_idx = frame_ret->enqueue_thread_index;
+ *nb_elts_processed = ring_elt->n_elts;
+ vlib_node_set_interrupt_pending (
+ vlib_get_main_by_index (vm->thread_index), cm->crypto_node_index);
+ ret_frame = cryptodev_cache_ring_pop (ring);
}
- return frame_ret;
+ return ret_frame;
+}
+
+static_always_inline int
+cryptodev_raw_enq_aead_aad_0_enc (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
+{
+ return cryptodev_raw_aead_enqueue (vm, frame, CRYPTODEV_OP_TYPE_ENCRYPT, 0);
}
static_always_inline int
@@ -599,6 +586,13 @@ cryptodev_raw_enq_aead_aad_12_enc (vlib_main_t *vm,
}
static_always_inline int
+cryptodev_raw_enq_aead_aad_0_dec (vlib_main_t *vm,
+ vnet_crypto_async_frame_t *frame)
+{
+ return cryptodev_raw_aead_enqueue (vm, frame, CRYPTODEV_OP_TYPE_DECRYPT, 0);
+}
+
+static_always_inline int
cryptodev_raw_enq_aead_aad_8_dec (vlib_main_t *vm,
vnet_crypto_async_frame_t *frame)
{
@@ -639,6 +633,7 @@ cryptodev_register_raw_hdl (vlib_main_t *vm, u32 eidx)
struct rte_cryptodev_sym_capability_idx cap_aead_idx;
u32 support_raw_api = 1, max_ctx_size = 0;
clib_error_t *error = 0;
+ u8 ref_cnt = 0;
vec_foreach (cinst, cmt->cryptodev_inst)
{
@@ -661,11 +656,7 @@ cryptodev_register_raw_hdl (vlib_main_t *vm, u32 eidx)
{
u32 thread_id = cet - cmt->per_thread_data;
u32 numa = vlib_get_main_by_index (thread_id)->numa_node;
- u8 *name = format (0, "cache_frame_ring_%u_%u", numa, thread_id);
-
- cet->cached_frame =
- rte_ring_create ((char *) name, CRYPTODEV_DEQ_CACHE_SZ, numa,
- RING_F_SC_DEQ | RING_F_SP_ENQ);
+ u8 *name = format (0, "cache_cache_ring_%u_%u", numa, thread_id);
cet->aad_buf = rte_zmalloc_socket (
0, CRYPTODEV_NB_CRYPTO_OPS * CRYPTODEV_MAX_AAD_SIZE,
@@ -684,28 +675,21 @@ cryptodev_register_raw_hdl (vlib_main_t *vm, u32 eidx)
error = clib_error_return (0, "Failed to alloc raw dp ctx");
goto err_handling;
}
-
- if (cet->cached_frame == 0)
- {
- error = clib_error_return (0, "Failed to alloc frame ring %s", name);
- goto err_handling;
- }
-
vec_free (name);
}
-/** INDENT-OFF **/
#define _(a, b, c, d, e, f, g) \
cap_aead_idx.type = RTE_CRYPTO_SYM_XFORM_AEAD; \
cap_aead_idx.algo.aead = RTE_CRYPTO_##b##_##c; \
if (cryptodev_check_cap_support (&cap_aead_idx, g, e, f)) \
{ \
- vnet_crypto_register_async_handler ( \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_TAG##e##_AAD##f##_ENC, \
- cryptodev_raw_enq_aead_aad_##f##_enc, cryptodev_raw_dequeue); \
- vnet_crypto_register_async_handler ( \
+ cryptodev_raw_enq_aead_aad_##f##_enc); \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_TAG##e##_AAD##f##_DEC, \
- cryptodev_raw_enq_aead_aad_##f##_dec, cryptodev_raw_dequeue); \
+ cryptodev_raw_enq_aead_aad_##f##_dec); \
+ ref_cnt++; \
}
foreach_vnet_aead_crypto_conversion
#undef _
@@ -718,26 +702,24 @@ cryptodev_register_raw_hdl (vlib_main_t *vm, u32 eidx)
if (cryptodev_check_cap_support (&cap_cipher_idx, c, -1, -1) && \
cryptodev_check_cap_support (&cap_auth_idx, -1, e, -1)) \
{ \
- vnet_crypto_register_async_handler ( \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_##d##_TAG##e##_ENC, \
- cryptodev_raw_enq_linked_alg_enc, cryptodev_raw_dequeue); \
- vnet_crypto_register_async_handler ( \
+ cryptodev_raw_enq_linked_alg_enc); \
+ vnet_crypto_register_enqueue_handler ( \
vm, eidx, VNET_CRYPTO_OP_##a##_##d##_TAG##e##_DEC, \
- cryptodev_raw_enq_linked_alg_dec, cryptodev_raw_dequeue); \
+ cryptodev_raw_enq_linked_alg_dec); \
+ ref_cnt++; \
}
foreach_cryptodev_link_async_alg
#undef _
- cmt->is_raw_api = 1;
+ if (ref_cnt)
+ vnet_crypto_register_dequeue_handler (vm, eidx, cryptodev_raw_dequeue);
+
+ cmt->is_raw_api = 1;
return 0;
err_handling:
- vec_foreach (cet, cmt->per_thread_data)
- {
- if (cet->cached_frame)
- rte_ring_free (cet->cached_frame);
- }
-
return error;
}
diff --git a/src/plugins/dpdk/device/cli.c b/src/plugins/dpdk/device/cli.c
index 0f771c6ba77..c838800deb4 100644
--- a/src/plugins/dpdk/device/cli.c
+++ b/src/plugins/dpdk/device/cli.c
@@ -77,14 +77,12 @@ show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
* name="mbuf_pool_socket0" available = 15104 allocated = 1280 total = 16384
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_show_dpdk_buffer,static) = {
.path = "show dpdk buffer",
.short_help = "show dpdk buffer",
.function = show_dpdk_buffer,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
show_dpdk_physmem (vlib_main_t * vm, unformat_input_t * input,
@@ -142,7 +140,7 @@ show_dpdk_physmem (vlib_main_t * vm, unformat_input_t * input,
err = clib_error_return_unix (0, "read");
goto error;
}
- _vec_len (s) = len + (n < 0 ? 0 : n);
+ vec_set_len (s, len + (n < 0 ? 0 : n));
}
vlib_cli_output (vm, "%v", s);
@@ -162,14 +160,12 @@ error:
* @cliexstart{show dpdk physmem}
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_show_dpdk_physmem,static) = {
.path = "show dpdk physmem",
.short_help = "show dpdk physmem",
.function = show_dpdk_physmem,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
@@ -198,7 +194,7 @@ test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
first = vec_len (allocated_buffers) - n_free;
vlib_buffer_free (vm, allocated_buffers + first, n_free);
- _vec_len (allocated_buffers) = first;
+ vec_set_len (allocated_buffers, first);
}
if (n_alloc)
{
@@ -208,7 +204,7 @@ test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
actual_alloc = vlib_buffer_alloc (vm, allocated_buffers + first,
n_alloc);
- _vec_len (allocated_buffers) = first + actual_alloc;
+ vec_set_len (allocated_buffers, first + actual_alloc);
if (actual_alloc < n_alloc)
vlib_cli_output (vm, "WARNING: only allocated %d buffers",
@@ -250,14 +246,12 @@ test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_test_dpdk_buffer,static) = {
.path = "test dpdk buffer",
.short_help = "test dpdk buffer [allocate <nn>] [free <nn>]",
.function = test_dpdk_buffer,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input,
@@ -265,6 +259,7 @@ set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input,
{
unformat_input_t _line_input, *line_input = &_line_input;
dpdk_main_t *dm = &dpdk_main;
+ vnet_main_t *vnm = vnet_get_main ();
vnet_hw_interface_t *hw;
dpdk_device_t *xd;
u32 hw_if_index = (u32) ~ 0;
@@ -277,9 +272,8 @@ set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input,
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat
- (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
- &hw_if_index))
+ if (unformat (line_input, "%U", unformat_vnet_hw_interface, vnm,
+ &hw_if_index))
;
else if (unformat (line_input, "tx %d", &nb_tx_desc))
;
@@ -299,30 +293,21 @@ set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input,
goto done;
}
- hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
+ hw = vnet_get_hw_interface (vnm, hw_if_index);
xd = vec_elt_at_index (dm->devices, hw->dev_instance);
- if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
- {
- error =
- clib_error_return (0,
- "number of descriptors can be set only for "
- "physical devices");
- goto done;
- }
-
- if ((nb_rx_desc == (u32) ~ 0 || nb_rx_desc == xd->nb_rx_desc) &&
- (nb_tx_desc == (u32) ~ 0 || nb_tx_desc == xd->nb_tx_desc))
+ if ((nb_rx_desc == (u32) ~0 || nb_rx_desc == xd->conf.n_rx_desc) &&
+ (nb_tx_desc == (u32) ~0 || nb_tx_desc == xd->conf.n_tx_desc))
{
error = clib_error_return (0, "nothing changed");
goto done;
}
if (nb_rx_desc != (u32) ~ 0)
- xd->nb_rx_desc = nb_rx_desc;
+ xd->conf.n_rx_desc = nb_rx_desc;
if (nb_tx_desc != (u32) ~ 0)
- xd->nb_tx_desc = nb_tx_desc;
+ xd->conf.n_tx_desc = nb_tx_desc;
dpdk_device_setup (xd);
@@ -345,13 +330,11 @@ done:
* Example of how to set the DPDK interface descriptors:
* @cliexcmd{set dpdk interface descriptors GigabitEthernet0/8/0 rx 512 tx 512}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_set_dpdk_if_desc,static) = {
.path = "set dpdk interface descriptors",
.short_help = "set dpdk interface descriptors <interface> [rx <nn>] [tx <nn>]",
.function = set_dpdk_if_desc,
};
-/* *INDENT-ON* */
static clib_error_t *
show_dpdk_version_command_fn (vlib_main_t * vm,
@@ -373,16 +356,15 @@ show_dpdk_version_command_fn (vlib_main_t * vm,
* Example of how to display how many DPDK buffer test command has allocated:
* @cliexstart{show dpdk version}
* DPDK Version: DPDK 16.11.0
- * DPDK EAL init args: -c 1 -n 4 --huge-dir /run/vpp/hugepages --file-prefix vpp -w 0000:00:08.0 -w 0000:00:09.0 --master-lcore 0 --socket-mem 256
+ * DPDK EAL init args: --in-memory --no-telemetry --file-prefix vpp
+ * -w 0000:00:08.0 -w 0000:00:09.0
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_vpe_version_command, static) = {
.path = "show dpdk version",
.short_help = "show dpdk version",
.function = show_dpdk_version_command_fn,
};
-/* *INDENT-ON* */
/* Dummy function to get us linked in. */
void
diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c
index 89046d1a8c0..7a49c5aaef2 100644
--- a/src/plugins/dpdk/device/common.c
+++ b/src/plugins/dpdk/device/common.c
@@ -29,11 +29,26 @@
#include <dpdk/device/dpdk_priv.h>
#include <vppinfra/error.h>
+/* DPDK TX offload to vnet hw interface caps mapppings */
+static struct
+{
+ u64 offload;
+ vnet_hw_if_caps_t caps;
+} tx_off_caps_map[] = {
+ { RTE_ETH_TX_OFFLOAD_IPV4_CKSUM, VNET_HW_IF_CAP_TX_IP4_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_TCP_CKSUM, VNET_HW_IF_CAP_TX_TCP_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_UDP_CKSUM, VNET_HW_IF_CAP_TX_UDP_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM, VNET_HW_IF_CAP_TX_IP4_OUTER_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM, VNET_HW_IF_CAP_TX_UDP_OUTER_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_TCP_TSO, VNET_HW_IF_CAP_TCP_GSO },
+ { RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO, VNET_HW_IF_CAP_VXLAN_TNL_GSO }
+};
+
void
dpdk_device_error (dpdk_device_t * xd, char *str, int rv)
{
- dpdk_log_err ("Interface %U error %d: %s",
- format_dpdk_device_name, xd->port_id, rv, rte_strerror (rv));
+ dpdk_log_err ("Interface %U error %d: %s", format_dpdk_device_name,
+ xd->device_index, rv, rte_strerror (rv));
xd->errors = clib_error_return (xd->errors, "%s[port:%d, errno:%d]: %s",
str, xd->port_id, rv, rte_strerror (rv));
}
@@ -41,14 +56,16 @@ dpdk_device_error (dpdk_device_t * xd, char *str, int rv)
void
dpdk_device_setup (dpdk_device_t * xd)
{
- dpdk_main_t *dm = &dpdk_main;
vlib_main_t *vm = vlib_get_main ();
vnet_main_t *vnm = vnet_get_main ();
- vlib_thread_main_t *tm = vlib_get_thread_main ();
vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->sw_if_index);
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->hw_if_index);
+ u16 buf_sz = vlib_buffer_get_default_data_size (vm);
+ vnet_hw_if_caps_change_t caps = {};
struct rte_eth_dev_info dev_info;
- u64 bitmap;
+ struct rte_eth_conf conf = {};
+ u64 rxo, txo;
+ u32 max_frame_size;
int rv;
int j;
@@ -59,70 +76,152 @@ dpdk_device_setup (dpdk_device_t * xd)
if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
{
- vnet_hw_interface_set_flags (dm->vnet_main, xd->hw_if_index, 0);
+ vnet_hw_interface_set_flags (vnm, xd->hw_if_index, 0);
dpdk_device_stop (xd);
}
- /* Enable flow director when flows exist */
- if (xd->pmd == VNET_DPDK_PMD_I40E)
+ rte_eth_dev_info_get (xd->port_id, &dev_info);
+
+ dpdk_log_debug ("[%u] configuring device %U", xd->port_id,
+ format_dpdk_rte_device, dev_info.device);
+
+ /* create rx and tx offload wishlist */
+ rxo = RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
+ txo = 0;
+
+ if (xd->conf.enable_tcp_udp_checksum)
+ rxo |= RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM;
+
+ if (xd->conf.disable_tx_checksum_offload == 0 &&
+ xd->conf.enable_outer_checksum_offload)
+ txo |=
+ RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
+
+ if (xd->conf.disable_tx_checksum_offload == 0)
+ txo |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM |
+ RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
+
+ if (xd->conf.disable_multi_seg == 0)
{
- if ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) != 0)
- xd->port_conf.fdir_conf.mode = RTE_FDIR_MODE_PERFECT;
- else
- xd->port_conf.fdir_conf.mode = RTE_FDIR_MODE_NONE;
+ txo |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
+ rxo |= RTE_ETH_RX_OFFLOAD_SCATTER;
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+ rxo |= DEV_RX_OFFLOAD_JUMBO_FRAME;
+#endif
}
- rte_eth_dev_info_get (xd->port_id, &dev_info);
-
- bitmap = xd->port_conf.txmode.offloads & ~dev_info.tx_offload_capa;
- if (bitmap)
+ if (xd->conf.enable_lro)
+ rxo |= RTE_ETH_RX_OFFLOAD_TCP_LRO;
+
+ /* per-device offload config */
+ if (xd->conf.enable_tso)
+ txo |= RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_TSO |
+ RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
+
+ if (xd->conf.disable_rx_scatter)
+ rxo &= ~RTE_ETH_RX_OFFLOAD_SCATTER;
+
+ /* mask unsupported offloads */
+ rxo &= dev_info.rx_offload_capa;
+ txo &= dev_info.tx_offload_capa;
+
+ dpdk_log_debug ("[%u] Supported RX offloads: %U", xd->port_id,
+ format_dpdk_rx_offload_caps, dev_info.rx_offload_capa);
+ dpdk_log_debug ("[%u] Configured RX offloads: %U", xd->port_id,
+ format_dpdk_rx_offload_caps, rxo);
+ dpdk_log_debug ("[%u] Supported TX offloads: %U", xd->port_id,
+ format_dpdk_tx_offload_caps, dev_info.tx_offload_capa);
+ dpdk_log_debug ("[%u] Configured TX offloads: %U", xd->port_id,
+ format_dpdk_tx_offload_caps, txo);
+
+ /* finalize configuration */
+ conf.rxmode.offloads = rxo;
+ conf.txmode.offloads = txo;
+ if (rxo & RTE_ETH_RX_OFFLOAD_TCP_LRO)
+ conf.rxmode.max_lro_pkt_size = xd->conf.max_lro_pkt_size;
+
+ if (xd->conf.enable_lsc_int)
+ conf.intr_conf.lsc = 1;
+ if (xd->conf.enable_rxq_int)
+ conf.intr_conf.rxq = 1;
+
+ conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE;
+ if (xd->conf.n_rx_queues > 1)
{
- dpdk_log_warn ("unsupported tx offloads requested on port %u: %U",
- xd->port_id, format_dpdk_tx_offload_caps, bitmap);
- xd->port_conf.txmode.offloads ^= bitmap;
+ if (xd->conf.disable_rss == 0)
+ {
+ conf.rxmode.mq_mode = RTE_ETH_MQ_RX_RSS;
+ conf.rx_adv_conf.rss_conf.rss_hf = xd->conf.rss_hf;
+ }
}
- bitmap = xd->port_conf.rxmode.offloads & ~dev_info.rx_offload_capa;
- if (bitmap)
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+ if (rxo & DEV_RX_OFFLOAD_JUMBO_FRAME)
+ {
+ conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen;
+ xd->max_supported_frame_size = dev_info.max_rx_pktlen;
+ }
+ else
{
- dpdk_log_warn ("unsupported rx offloads requested on port %u: %U",
- xd->port_id, format_dpdk_rx_offload_caps, bitmap);
- xd->port_conf.rxmode.offloads ^= bitmap;
+ xd->max_supported_frame_size =
+ clib_min (1500 + xd->driver_frame_overhead, buf_sz);
}
+#else
+ if (xd->conf.disable_multi_seg)
+ xd->max_supported_frame_size = clib_min (dev_info.max_rx_pktlen, buf_sz);
+ else
+ xd->max_supported_frame_size = dev_info.max_rx_pktlen;
+#endif
+
+ max_frame_size = clib_min (xd->max_supported_frame_size,
+ ethernet_main.default_mtu + hi->frame_overhead);
- rv = rte_eth_dev_configure (xd->port_id, xd->rx_q_used,
- xd->tx_q_used, &xd->port_conf);
+#if RTE_VERSION >= RTE_VERSION_NUM(21, 11, 0, 0)
+ conf.rxmode.mtu = max_frame_size - xd->driver_frame_overhead;
+#endif
- if (rv < 0)
+retry:
+ rv = rte_eth_dev_configure (xd->port_id, xd->conf.n_rx_queues,
+ xd->conf.n_tx_queues, &conf);
+ if (rv < 0 && conf.intr_conf.rxq)
{
- dpdk_device_error (xd, "rte_eth_dev_configure", rv);
- goto error;
+ conf.intr_conf.rxq = 0;
+ goto retry;
}
- vec_validate_aligned (xd->tx_queues, xd->tx_q_used - 1,
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+ rte_eth_dev_set_mtu (xd->port_id,
+ max_frame_size - xd->driver_frame_overhead);
+#endif
+
+ hi->max_frame_size = 0;
+ vnet_hw_interface_set_max_frame_size (vnm, xd->hw_if_index, max_frame_size);
+ dpdk_log_debug ("[%u] max_frame_size %u max max_frame_size %u "
+ "driver_frame_overhead %u",
+ xd->port_id, hi->max_frame_size,
+ xd->max_supported_frame_size, xd->driver_frame_overhead);
+
+ vec_validate_aligned (xd->tx_queues, xd->conf.n_tx_queues - 1,
CLIB_CACHE_LINE_BYTES);
- for (j = 0; j < xd->tx_q_used; j++)
+ for (j = 0; j < xd->conf.n_tx_queues; j++)
{
- rv =
- rte_eth_tx_queue_setup (xd->port_id, j, xd->nb_tx_desc,
- xd->cpu_socket, &xd->tx_conf);
+ rv = rte_eth_tx_queue_setup (xd->port_id, j, xd->conf.n_tx_desc,
+ xd->cpu_socket, 0);
/* retry with any other CPU socket */
if (rv < 0)
- rv =
- rte_eth_tx_queue_setup (xd->port_id, j,
- xd->nb_tx_desc, SOCKET_ID_ANY,
- &xd->tx_conf);
+ rv = rte_eth_tx_queue_setup (xd->port_id, j, xd->conf.n_tx_desc,
+ SOCKET_ID_ANY, 0);
if (rv < 0)
dpdk_device_error (xd, "rte_eth_tx_queue_setup", rv);
- if (xd->tx_q_used < tm->n_vlib_mains)
- clib_spinlock_init (&vec_elt (xd->tx_queues, j).lock);
+ clib_spinlock_init (&vec_elt (xd->tx_queues, j).lock);
}
- vec_validate_aligned (xd->rx_queues, xd->rx_q_used - 1,
+ vec_validate_aligned (xd->rx_queues, xd->conf.n_rx_queues - 1,
CLIB_CACHE_LINE_BYTES);
- for (j = 0; j < xd->rx_q_used; j++)
+
+ for (j = 0; j < xd->conf.n_rx_queues; j++)
{
dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, j);
u8 bpidx = vlib_buffer_pool_get_default_for_numa (
@@ -130,12 +229,12 @@ dpdk_device_setup (dpdk_device_t * xd)
vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, bpidx);
struct rte_mempool *mp = dpdk_mempool_by_buffer_pool_index[bpidx];
- rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->nb_rx_desc,
+ rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->conf.n_rx_desc,
xd->cpu_socket, 0, mp);
/* retry with any other CPU socket */
if (rv < 0)
- rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->nb_rx_desc,
+ rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->conf.n_rx_desc,
SOCKET_ID_ANY, 0, mp);
rxq->buffer_pool_index = bp->index;
@@ -147,7 +246,40 @@ dpdk_device_setup (dpdk_device_t * xd)
if (vec_len (xd->errors))
goto error;
- rte_eth_dev_set_mtu (xd->port_id, hi->max_packet_bytes);
+ xd->buffer_flags =
+ (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID);
+
+ if ((rxo & (RTE_ETH_RX_OFFLOAD_TCP_CKSUM | RTE_ETH_RX_OFFLOAD_UDP_CKSUM)) ==
+ (RTE_ETH_RX_OFFLOAD_TCP_CKSUM | RTE_ETH_RX_OFFLOAD_UDP_CKSUM))
+ xd->buffer_flags |=
+ (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
+
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_RX_IP4_CKSUM,
+ rxo & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM);
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_MAYBE_MULTISEG,
+ rxo & RTE_ETH_RX_OFFLOAD_SCATTER);
+ dpdk_device_flag_set (
+ xd, DPDK_DEVICE_FLAG_TX_OFFLOAD,
+ (txo & (RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) ==
+ (RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM));
+
+ /* unconditionally set mac filtering cap */
+ caps.val = caps.mask = VNET_HW_IF_CAP_MAC_FILTER;
+
+ ethernet_set_flags (vnm, xd->hw_if_index,
+ ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
+
+ for (int i = 0; i < ARRAY_LEN (tx_off_caps_map); i++)
+ {
+ __typeof__ (tx_off_caps_map[0]) *v = tx_off_caps_map + i;
+ caps.mask |= v->caps;
+ if ((v->offload & txo) == v->offload)
+ caps.val |= v->caps;
+ }
+
+ vnet_hw_if_change_caps (vnm, xd->hw_if_index, &caps);
+ xd->enabled_rx_off = rxo;
+ xd->enabled_tx_off = txo;
if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
dpdk_device_start (xd);
@@ -187,17 +319,18 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
{
vnet_main_t *vnm = vnet_get_main ();
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->hw_if_index);
+ int int_mode = 0;
if (!hi)
return;
- if (!xd->port_conf.intr_conf.rxq)
+ if (!xd->conf.enable_rxq_int)
return;
/* Probe for interrupt support */
if (rte_eth_dev_rx_intr_enable (xd->port_id, 0))
{
dpdk_log_info ("probe for interrupt mode for device %U. Failed.\n",
- format_dpdk_device_name, xd->port_id);
+ format_dpdk_device_name, xd->device_index);
}
else
{
@@ -205,13 +338,13 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
if (!(xd->flags & DPDK_DEVICE_FLAG_INT_UNMASKABLE))
rte_eth_dev_rx_intr_disable (xd->port_id, 0);
dpdk_log_info ("Probe for interrupt mode for device %U. Success.\n",
- format_dpdk_device_name, xd->port_id);
+ format_dpdk_device_name, xd->device_index);
}
if (xd->flags & DPDK_DEVICE_FLAG_INT_SUPPORTED)
{
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
- for (int q = 0; q < xd->rx_q_used; q++)
+ int_mode = 1;
+ for (int q = 0; q < xd->conf.n_rx_queues; q++)
{
dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, q);
clib_file_t f = { 0 };
@@ -219,15 +352,15 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
if (rxq->efd < 0)
{
xd->flags &= ~DPDK_DEVICE_FLAG_INT_SUPPORTED;
- hi->caps &= ~VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+ int_mode = 0;
break;
}
f.read_function = dpdk_rx_read_ready;
f.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED;
f.file_descriptor = rxq->efd;
f.private_data = rxq->queue_index;
- f.description =
- format (0, "%U queue %u", format_dpdk_device_name, xd->port_id, q);
+ f.description = format (0, "%U queue %u", format_dpdk_device_name,
+ xd->device_index, q);
rxq->clib_file_index = clib_file_add (&file_main, &f);
vnet_hw_if_set_rx_queue_file_index (vnm, rxq->queue_index,
rxq->clib_file_index);
@@ -240,6 +373,11 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
}
}
}
+
+ if (int_mode)
+ vnet_hw_if_set_caps (vnm, hi->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
+ else
+ vnet_hw_if_unset_caps (vnm, hi->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
vnet_hw_if_update_runtime_data (vnm, xd->hw_if_index);
}
@@ -259,6 +397,11 @@ dpdk_device_start (dpdk_device_t * xd)
return;
}
+ dpdk_log_debug ("[%u] RX burst function: %U", xd->port_id,
+ format_dpdk_burst_fn, xd, VLIB_RX);
+ dpdk_log_debug ("[%u] TX burst function: %U", xd->port_id,
+ format_dpdk_burst_fn, xd, VLIB_TX);
+
dpdk_setup_interrupts (xd);
if (xd->default_mac_address)
@@ -275,8 +418,8 @@ dpdk_device_start (dpdk_device_t * xd)
rte_eth_allmulticast_enable (xd->port_id);
- dpdk_log_info ("Interface %U started",
- format_dpdk_device_name, xd->port_id);
+ dpdk_log_info ("Interface %U started", format_dpdk_device_name,
+ xd->device_index);
}
void
@@ -289,8 +432,8 @@ dpdk_device_stop (dpdk_device_t * xd)
rte_eth_dev_stop (xd->port_id);
clib_memset (&xd->link, 0, sizeof (struct rte_eth_link));
- dpdk_log_info ("Interface %U stopped",
- format_dpdk_device_name, xd->port_id);
+ dpdk_log_info ("Interface %U stopped", format_dpdk_device_name,
+ xd->device_index);
}
void vl_api_force_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
@@ -311,10 +454,11 @@ dpdk_port_state_callback_inline (dpdk_portid_t port_id,
rte_eth_link_get_nowait (port_id, &link);
u8 link_up = link.link_status;
if (link_up)
- dpdk_log_info ("Port %d Link Up - speed %u Mbps - %s",
- port_id, (unsigned) link.link_speed,
- (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
- "full-duplex" : "half-duplex");
+ dpdk_log_info ("Port %d Link Up - speed %u Mbps - %s", port_id,
+ (unsigned) link.link_speed,
+ (link.link_duplex == RTE_ETH_LINK_FULL_DUPLEX) ?
+ "full-duplex" :
+ "half-duplex");
else
dpdk_log_info ("Port %d Link Down\n\n", port_id);
@@ -337,12 +481,17 @@ dpdk_get_pci_device (const struct rte_eth_dev_info *info)
const struct rte_bus *bus;
bus = rte_bus_find_by_device (info->device);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (bus && !strcmp (rte_bus_name (bus), "pci"))
+#else
if (bus && !strcmp (bus->name, "pci"))
+#endif
return RTE_DEV_TO_PCI (info->device);
else
return NULL;
}
+#ifdef __linux__
/* If this device is VMBUS return pointer to info, otherwise NULL */
struct rte_vmbus_device *
dpdk_get_vmbus_device (const struct rte_eth_dev_info *info)
@@ -350,11 +499,16 @@ dpdk_get_vmbus_device (const struct rte_eth_dev_info *info)
const struct rte_bus *bus;
bus = rte_bus_find_by_device (info->device);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (bus && !strcmp (rte_bus_name (bus), "vmbus"))
+#else
if (bus && !strcmp (bus->name, "vmbus"))
+#endif
return container_of (info->device, struct rte_vmbus_device, device);
else
return NULL;
}
+#endif /* __linux__ */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c
index 7c083e1dcf4..0ba59562838 100644
--- a/src/plugins/dpdk/device/device.c
+++ b/src/plugins/dpdk/device/device.c
@@ -25,7 +25,6 @@
#include <vlib/unix/unix.h>
#define foreach_dpdk_tx_func_error \
- _(BAD_RETVAL, "DPDK tx function returned an error") \
_(PKT_DROP, "Tx packet drops (dpdk tx failure)")
typedef enum
@@ -153,52 +152,30 @@ dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b,
* support multiple queues. It returns the number of packets untransmitted
* If all packets are transmitted (the normal case), the function returns 0.
*/
-static_always_inline
- u32 tx_burst_vector_internal (vlib_main_t * vm,
- dpdk_device_t * xd,
- struct rte_mbuf **mb, u32 n_left)
+static_always_inline u32
+tx_burst_vector_internal (vlib_main_t *vm, dpdk_device_t *xd,
+ struct rte_mbuf **mb, u32 n_left, int queue_id,
+ u8 is_shared)
{
- dpdk_main_t *dm = &dpdk_main;
dpdk_tx_queue_t *txq;
u32 n_retry;
int n_sent = 0;
- int queue_id;
n_retry = 16;
- queue_id = vm->thread_index % xd->tx_q_used;
txq = vec_elt_at_index (xd->tx_queues, queue_id);
do
{
- clib_spinlock_lock_if_init (&txq->lock);
+ if (is_shared)
+ clib_spinlock_lock (&txq->lock);
- if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD))
- {
- /* no wrap, transmit in one burst */
- n_sent = rte_eth_tx_burst (xd->port_id, queue_id, mb, n_left);
- n_retry--;
- }
- else
- {
- ASSERT (0);
- n_sent = 0;
- }
-
- clib_spinlock_unlock_if_init (&txq->lock);
-
- if (PREDICT_FALSE (n_sent < 0))
- {
- // emit non-fatal message, bump counter
- vnet_main_t *vnm = dm->vnet_main;
- vnet_interface_main_t *im = &vnm->interface_main;
- u32 node_index;
+ /* no wrap, transmit in one burst */
+ n_sent = rte_eth_tx_burst (xd->port_id, queue_id, mb, n_left);
- node_index = vec_elt_at_index (im->hw_interfaces,
- xd->hw_if_index)->tx_node_index;
+ if (is_shared)
+ clib_spinlock_unlock (&txq->lock);
- vlib_error_count (vm, node_index, DPDK_TX_FUNC_ERROR_BAD_RETVAL, 1);
- return n_left; // untransmitted packets
- }
+ n_retry--;
n_left -= n_sent;
mb += n_sent;
}
@@ -221,7 +198,8 @@ dpdk_buffer_tx_offload (dpdk_device_t * xd, vlib_buffer_t * b,
{
int is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4;
u32 tso = b->flags & VNET_BUFFER_F_GSO, max_pkt_len;
- u32 ip_cksum, tcp_cksum, udp_cksum;
+ u32 ip_cksum, tcp_cksum, udp_cksum, outer_hdr_len = 0;
+ u32 outer_ip_cksum, vxlan_tunnel;
u64 ol_flags;
vnet_buffer_oflags_t oflags = 0;
@@ -233,25 +211,49 @@ dpdk_buffer_tx_offload (dpdk_device_t * xd, vlib_buffer_t * b,
ip_cksum = oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
tcp_cksum = oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
udp_cksum = oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
+ outer_ip_cksum = oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM;
+ vxlan_tunnel = oflags & VNET_BUFFER_OFFLOAD_F_TNL_VXLAN;
- mb->l2_len = vnet_buffer (b)->l3_hdr_offset - b->current_data;
- mb->l3_len = vnet_buffer (b)->l4_hdr_offset -
- vnet_buffer (b)->l3_hdr_offset;
- mb->outer_l3_len = 0;
- mb->outer_l2_len = 0;
- ol_flags = is_ip4 ? PKT_TX_IPV4 : PKT_TX_IPV6;
- ol_flags |= ip_cksum ? PKT_TX_IP_CKSUM : 0;
- ol_flags |= tcp_cksum ? PKT_TX_TCP_CKSUM : 0;
- ol_flags |= udp_cksum ? PKT_TX_UDP_CKSUM : 0;
+ ol_flags = is_ip4 ? RTE_MBUF_F_TX_IPV4 : RTE_MBUF_F_TX_IPV6;
+ ol_flags |= ip_cksum ? RTE_MBUF_F_TX_IP_CKSUM : 0;
+ ol_flags |= tcp_cksum ? RTE_MBUF_F_TX_TCP_CKSUM : 0;
+ ol_flags |= udp_cksum ? RTE_MBUF_F_TX_UDP_CKSUM : 0;
+
+ if (vxlan_tunnel)
+ {
+ ol_flags |= outer_ip_cksum ?
+ RTE_MBUF_F_TX_OUTER_IPV4 | RTE_MBUF_F_TX_OUTER_IP_CKSUM :
+ RTE_MBUF_F_TX_OUTER_IPV6;
+ ol_flags |= RTE_MBUF_F_TX_TUNNEL_VXLAN;
+ mb->l2_len =
+ vnet_buffer (b)->l3_hdr_offset - vnet_buffer2 (b)->outer_l4_hdr_offset;
+ mb->l3_len =
+ vnet_buffer (b)->l4_hdr_offset - vnet_buffer (b)->l3_hdr_offset;
+ mb->outer_l2_len =
+ vnet_buffer2 (b)->outer_l3_hdr_offset - b->current_data;
+ mb->outer_l3_len = vnet_buffer2 (b)->outer_l4_hdr_offset -
+ vnet_buffer2 (b)->outer_l3_hdr_offset;
+ outer_hdr_len = mb->outer_l2_len + mb->outer_l3_len;
+ }
+ else
+ {
+ mb->l2_len = vnet_buffer (b)->l3_hdr_offset - b->current_data;
+ mb->l3_len =
+ vnet_buffer (b)->l4_hdr_offset - vnet_buffer (b)->l3_hdr_offset;
+ mb->outer_l2_len = 0;
+ mb->outer_l3_len = 0;
+ }
if (tso)
{
mb->l4_len = vnet_buffer2 (b)->gso_l4_hdr_sz;
mb->tso_segsz = vnet_buffer2 (b)->gso_size;
/* ensure packet is large enough to require tso */
- max_pkt_len = mb->l2_len + mb->l3_len + mb->l4_len + mb->tso_segsz;
+ max_pkt_len =
+ outer_hdr_len + mb->l2_len + mb->l3_len + mb->l4_len + mb->tso_segsz;
if (mb->tso_segsz != 0 && mb->pkt_len > max_pkt_len)
- ol_flags |= (tcp_cksum ? PKT_TX_TCP_SEG : PKT_TX_UDP_SEG);
+ ol_flags |=
+ (tcp_cksum ? RTE_MBUF_F_TX_TCP_SEG : RTE_MBUF_F_TX_UDP_SEG);
}
mb->ol_flags |= ol_flags;
@@ -274,11 +276,13 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
dpdk_main_t *dm = &dpdk_main;
vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
dpdk_device_t *xd = vec_elt_at_index (dm->devices, rd->dev_instance);
+ vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (f);
u32 n_packets = f->n_vectors;
u32 n_left;
u32 thread_index = vm->thread_index;
- int queue_id = thread_index;
- u32 tx_pkts = 0, all_or_flags = 0;
+ int queue_id = tf->queue_id;
+ u8 is_shared = tf->shared_queue;
+ u32 tx_pkts = 0;
dpdk_per_thread_data_t *ptd = vec_elt_at_index (dm->per_thread_data,
thread_index);
struct rte_mbuf **mb;
@@ -310,7 +314,6 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
b[3] = vlib_buffer_from_rte_mbuf (mb[3]);
or_flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags;
- all_or_flags |= or_flags;
if (or_flags & VLIB_BUFFER_NEXT_PRESENT)
{
@@ -368,7 +371,6 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
b[1] = vlib_buffer_from_rte_mbuf (mb[1]);
or_flags = b[0]->flags | b[1]->flags;
- all_or_flags |= or_flags;
if (or_flags & VLIB_BUFFER_NEXT_PRESENT)
{
@@ -404,7 +406,6 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
while (n_left > 0)
{
b[0] = vlib_buffer_from_rte_mbuf (mb[0]);
- all_or_flags |= b[0]->flags;
dpdk_validate_rte_mbuf (vm, b[0], 1);
dpdk_buffer_tx_offload (xd, b[0], mb[0]);
@@ -419,7 +420,8 @@ VNET_DEVICE_CLASS_TX_FN (dpdk_device_class) (vlib_main_t * vm,
/* transmit as many packets as possible */
tx_pkts = n_packets = mb - ptd->mbufs;
- n_left = tx_burst_vector_internal (vm, xd, ptd->mbufs, n_packets);
+ n_left = tx_burst_vector_internal (vm, xd, ptd->mbufs, n_packets, queue_id,
+ is_shared);
{
/* If there is no callback then drop any non-transmitted packets */
@@ -475,7 +477,7 @@ dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
if (vec_len (xd->errors))
return clib_error_create ("Interface start failed");
xd->flags |= DPDK_DEVICE_FLAG_ADMIN_UP;
- f64 now = vlib_time_now (dm->vlib_main);
+ f64 now = vlib_time_now (vlib_get_main ());
dpdk_update_counters (xd, now);
dpdk_update_link_state (xd, now);
}
@@ -511,7 +513,7 @@ dpdk_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
}
xd->per_interface_next_index =
- vlib_node_add_next (xm->vlib_main, dpdk_input_node.index, node_index);
+ vlib_node_add_next (vlib_get_main (), dpdk_input_node.index, node_index);
}
@@ -533,11 +535,8 @@ dpdk_subif_add_del_function (vnet_main_t * vnm,
else if (xd->num_subifs)
xd->num_subifs--;
- if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
- goto done;
-
/* currently we program VLANS only for IXGBE VF */
- if (xd->pmd != VNET_DPDK_PMD_IXGBEVF)
+ if (xd->driver->program_vlans == 0)
goto done;
if (t->sub.eth.flags.no_tags == 1)
@@ -551,7 +550,7 @@ dpdk_subif_add_del_function (vnet_main_t * vnm,
}
vlan_offload = rte_eth_dev_get_vlan_offload (xd->port_id);
- vlan_offload |= ETH_VLAN_FILTER_OFFLOAD;
+ vlan_offload |= RTE_ETH_VLAN_FILTER_OFFLOAD;
if ((r = rte_eth_dev_set_vlan_offload (xd->port_id, vlan_offload)))
{
@@ -625,7 +624,6 @@ dpdk_interface_set_rss_queues (struct vnet_main_t *vnm,
clib_memset (reta, 0, dev_info.reta_size * sizeof (*reta));
valid_queue_count = 0;
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, bitmap) {
if (i >= dev_info.nb_rx_queues)
{
@@ -634,7 +632,6 @@ dpdk_interface_set_rss_queues (struct vnet_main_t *vnm,
}
reta[valid_queue_count++] = i;
}
- /* *INDENT-ON* */
/* check valid_queue_count not zero, make coverity happy */
if (valid_queue_count == 0)
@@ -651,10 +648,8 @@ dpdk_interface_set_rss_queues (struct vnet_main_t *vnm,
}
/* update reta table */
- reta_conf =
- (struct rte_eth_rss_reta_entry64 *) clib_mem_alloc (dev_info.reta_size /
- RTE_RETA_GROUP_SIZE *
- sizeof (*reta_conf));
+ reta_conf = (struct rte_eth_rss_reta_entry64 *) clib_mem_alloc (
+ dev_info.reta_size / RTE_ETH_RETA_GROUP_SIZE * sizeof (*reta_conf));
if (reta_conf == NULL)
{
err = clib_error_return (0, "clib_mem_alloc failed");
@@ -662,13 +657,13 @@ dpdk_interface_set_rss_queues (struct vnet_main_t *vnm,
}
clib_memset (reta_conf, 0,
- dev_info.reta_size / RTE_RETA_GROUP_SIZE *
- sizeof (*reta_conf));
+ dev_info.reta_size / RTE_ETH_RETA_GROUP_SIZE *
+ sizeof (*reta_conf));
for (i = 0; i < dev_info.reta_size; i++)
{
- uint32_t reta_id = i / RTE_RETA_GROUP_SIZE;
- uint32_t reta_pos = i % RTE_RETA_GROUP_SIZE;
+ uint32_t reta_id = i / RTE_ETH_RETA_GROUP_SIZE;
+ uint32_t reta_pos = i % RTE_ETH_RETA_GROUP_SIZE;
reta_conf[reta_id].mask = UINT64_MAX;
reta_conf[reta_id].reta[reta_pos] = reta[i];
@@ -726,7 +721,6 @@ dpdk_interface_rx_mode_change (vnet_main_t *vnm, u32 hw_if_index, u32 qid,
return 0;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (dpdk_device_class) = {
.name = "dpdk",
.tx_function_n_errors = DPDK_TX_FUNC_N_ERROR,
@@ -745,7 +739,6 @@ VNET_DEVICE_CLASS (dpdk_device_class) = {
.set_rss_queues_function = dpdk_interface_set_rss_queues,
.rx_mode_change_function = dpdk_interface_rx_mode_change,
};
-/* *INDENT-ON* */
#define UP_DOWN_FLAG_EVENT 1
@@ -792,14 +785,12 @@ admin_up_down_process (vlib_main_t * vm,
return 0; /* or not */
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (admin_up_down_process_node) = {
.function = admin_up_down_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "admin-up-down-process",
.process_log2_n_stack_bytes = 17, // 256KB
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h
index 66794a4e67c..88a4d9ff618 100644
--- a/src/plugins/dpdk/device/dpdk.h
+++ b/src/plugins/dpdk/device/dpdk.h
@@ -22,27 +22,25 @@
#include <rte_config.h>
-#include <rte_common.h>
-#include <rte_dev.h>
-#include <rte_memory.h>
#include <rte_eal.h>
-#include <rte_per_lcore.h>
-#include <rte_cycles.h>
-#include <rte_lcore.h>
-#include <rte_per_lcore.h>
-#include <rte_interrupts.h>
-#include <rte_pci.h>
+#include <rte_bus_pci.h>
+#ifdef __linux__
#include <rte_bus_vmbus.h>
-#include <rte_ether.h>
+#endif /* __linux__ */
#include <rte_ethdev.h>
-#include <rte_ring.h>
-#include <rte_mempool.h>
-#include <rte_mbuf.h>
#include <rte_version.h>
-#include <rte_sched.h>
#include <rte_net.h>
-#include <rte_bus_pci.h>
-#include <rte_flow.h>
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+#include <rte_bus.h>
+#include <rte_pci.h>
+#include <ctype.h>
+
+#include <bus_driver.h>
+#include <bus_pci_driver.h>
+#ifdef __linux__
+#include <bus_vmbus_driver.h>
+#endif /* __linux__ */
+#endif
#include <vnet/devices/devices.h>
@@ -60,79 +58,11 @@ extern vnet_device_class_t dpdk_device_class;
extern vlib_node_registration_t dpdk_input_node;
extern vlib_node_registration_t admin_up_down_process_node;
-#if RTE_VERSION < RTE_VERSION_NUM(20, 8, 0, 0)
-#define DPDK_MLX5_PMD_NAME "net_mlx5"
-#else
-#define DPDK_MLX5_PMD_NAME "mlx5_pci"
-#endif
-
-#define foreach_dpdk_pmd \
- _ ("net_thunderx", THUNDERX) \
- _ ("net_e1000_em", E1000EM) \
- _ ("net_e1000_igb", IGB) \
- _ ("net_e1000_igb_vf", IGBVF) \
- _ ("net_ixgbe", IXGBE) \
- _ ("net_ixgbe_vf", IXGBEVF) \
- _ ("net_i40e", I40E) \
- _ ("net_i40e_vf", I40EVF) \
- _ ("net_ice", ICE) \
- _ ("net_iavf", IAVF) \
- _ ("net_igc", IGC) \
- _ ("net_virtio", VIRTIO) \
- _ ("net_enic", ENIC) \
- _ ("net_vmxnet3", VMXNET3) \
- _ ("AF_PACKET PMD", AF_PACKET) \
- _ ("net_fm10k", FM10K) \
- _ ("net_cxgbe", CXGBE) \
- _ ("net_mlx4", MLX4) \
- _ (DPDK_MLX5_PMD_NAME, MLX5) \
- _ ("net_dpaa2", DPAA2) \
- _ ("net_virtio_user", VIRTIO_USER) \
- _ ("net_vhost", VHOST_ETHER) \
- _ ("net_ena", ENA) \
- _ ("net_failsafe", FAILSAFE) \
- _ ("net_liovf", LIOVF_ETHER) \
- _ ("net_qede", QEDE) \
- _ ("net_netvsc", NETVSC) \
- _ ("net_bnxt", BNXT)
-
-typedef enum
-{
- VNET_DPDK_PMD_NONE,
-#define _(s,f) VNET_DPDK_PMD_##f,
- foreach_dpdk_pmd
-#undef _
- VNET_DPDK_PMD_UNKNOWN, /* must be last */
-} dpdk_pmd_t;
-
-typedef enum
-{
- VNET_DPDK_PORT_TYPE_ETH_1G,
- VNET_DPDK_PORT_TYPE_ETH_2_5G,
- VNET_DPDK_PORT_TYPE_ETH_5G,
- VNET_DPDK_PORT_TYPE_ETH_10G,
- VNET_DPDK_PORT_TYPE_ETH_20G,
- VNET_DPDK_PORT_TYPE_ETH_25G,
- VNET_DPDK_PORT_TYPE_ETH_40G,
- VNET_DPDK_PORT_TYPE_ETH_50G,
- VNET_DPDK_PORT_TYPE_ETH_56G,
- VNET_DPDK_PORT_TYPE_ETH_100G,
- VNET_DPDK_PORT_TYPE_ETH_SWITCH,
- VNET_DPDK_PORT_TYPE_AF_PACKET,
- VNET_DPDK_PORT_TYPE_ETH_VF,
- VNET_DPDK_PORT_TYPE_VIRTIO_USER,
- VNET_DPDK_PORT_TYPE_VHOST_ETHER,
- VNET_DPDK_PORT_TYPE_FAILSAFE,
- VNET_DPDK_PORT_TYPE_NETVSC,
- VNET_DPDK_PORT_TYPE_UNKNOWN,
-} dpdk_port_type_t;
-
typedef uint16_t dpdk_portid_t;
#define foreach_dpdk_device_flags \
_ (0, ADMIN_UP, "admin-up") \
_ (1, PROMISC, "promisc") \
- _ (2, PMD, "pmd") \
_ (3, PMD_INIT_FAIL, "pmd-init-fail") \
_ (4, MAYBE_MULTISEG, "maybe-multiseg") \
_ (5, HAVE_SUBIF, "subif") \
@@ -143,12 +73,12 @@ typedef uint16_t dpdk_portid_t;
_ (13, INT_SUPPORTED, "int-supported") \
_ (14, INT_UNMASKABLE, "int-unmaskable")
-enum
+typedef enum
{
#define _(a, b, c) DPDK_DEVICE_FLAG_##b = (1 << a),
foreach_dpdk_device_flags
#undef _
-};
+} dpdk_device_flag_t;
typedef struct
{
@@ -177,10 +107,62 @@ typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
clib_spinlock_t lock;
+ u32 queue_index;
} dpdk_tx_queue_t;
typedef struct
{
+ const char *name;
+ const char *desc;
+} dpdk_driver_name_t;
+
+typedef struct
+{
+ dpdk_driver_name_t *drivers;
+ const char *interface_name_prefix;
+ u16 n_rx_desc;
+ u16 n_tx_desc;
+ u32 supported_flow_actions;
+ u32 enable_lsc_int : 1;
+ u32 enable_rxq_int : 1;
+ u32 disable_rx_scatter : 1;
+ u32 program_vlans : 1;
+ u32 mq_mode_none : 1;
+ u32 interface_number_from_port_id : 1;
+ u32 use_intel_phdr_cksum : 1;
+ u32 int_unmaskable : 1;
+} dpdk_driver_t;
+
+dpdk_driver_t *dpdk_driver_find (const char *name, const char **desc);
+
+typedef union
+{
+ struct
+ {
+ u16 disable_multi_seg : 1;
+ u16 enable_lro : 1;
+ u16 enable_tso : 1;
+ u16 enable_tcp_udp_checksum : 1;
+ u16 enable_outer_checksum_offload : 1;
+ u16 enable_lsc_int : 1;
+ u16 enable_rxq_int : 1;
+ u16 disable_tx_checksum_offload : 1;
+ u16 disable_rss : 1;
+ u16 disable_rx_scatter : 1;
+ u16 n_rx_queues;
+ u16 n_tx_queues;
+ u16 n_rx_desc;
+ u16 n_tx_desc;
+ u32 max_lro_pkt_size;
+ u64 rss_hf;
+ };
+ u64 as_u64[3];
+} dpdk_port_conf_t;
+
+STATIC_ASSERT_SIZEOF (dpdk_port_conf_t, 24);
+
+typedef struct
+{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
dpdk_rx_queue_t *rx_queues;
@@ -191,33 +173,28 @@ typedef struct
u32 hw_if_index;
u32 sw_if_index;
+ u32 buffer_flags;
/* next node index if we decide to steal the rx graph arc */
u32 per_interface_next_index;
- u16 rx_q_used;
- u16 tx_q_used;
u16 flags;
/* DPDK device port number */
dpdk_portid_t port_id;
- dpdk_pmd_t pmd:8;
i8 cpu_socket;
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
- u16 nb_tx_desc;
- u16 nb_rx_desc;
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+ u64 enabled_tx_off;
+ u64 enabled_rx_off;
+ dpdk_driver_t *driver;
u8 *name;
- u8 *interface_name_suffix;
+ const char *if_desc;
/* number of sub-interfaces */
u16 num_subifs;
- /* PMD related */
- struct rte_eth_conf port_conf;
- struct rte_eth_txconf tx_conf;
-
/* flow related */
u32 supported_flow_actions;
dpdk_flow_entry_t *flow_entries; /* pool */
@@ -226,9 +203,6 @@ typedef struct
u32 parked_loop_count;
struct rte_flow_error last_flow_error;
- /* af_packet instance number */
- u16 af_packet_instance_num;
-
struct rte_eth_link link;
f64 time_last_link_update;
@@ -236,26 +210,36 @@ typedef struct
struct rte_eth_stats last_stats;
struct rte_eth_xstat *xstats;
f64 time_last_stats_update;
- dpdk_port_type_t port_type;
/* mac address */
u8 *default_mac_address;
+ /* maximum supported max frame size */
+ u32 max_supported_frame_size;
+
+ /* due to lack of API to get ethernet max_frame_size we store information
+ * deducted from device info */
+ u8 driver_frame_overhead;
+
/* error string */
clib_error_t *errors;
+ dpdk_port_conf_t conf;
} dpdk_device_t;
+#define DPDK_MIN_POLL_INTERVAL (0.001) /* 1msec */
+
#define DPDK_STATS_POLL_INTERVAL (10.0)
-#define DPDK_MIN_STATS_POLL_INTERVAL (0.001) /* 1msec */
+#define DPDK_MIN_STATS_POLL_INTERVAL DPDK_MIN_POLL_INTERVAL
#define DPDK_LINK_POLL_INTERVAL (3.0)
-#define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */
-
-#define foreach_dpdk_device_config_item \
- _ (num_rx_queues) \
- _ (num_tx_queues) \
- _ (num_rx_desc) \
- _ (num_tx_desc) \
+#define DPDK_MIN_LINK_POLL_INTERVAL DPDK_MIN_POLL_INTERVAL
+
+#define foreach_dpdk_device_config_item \
+ _ (num_rx_queues) \
+ _ (num_tx_queues) \
+ _ (num_rx_desc) \
+ _ (num_tx_desc) \
+ _ (max_lro_pkt_size) \
_ (rss_fn)
typedef enum
@@ -274,11 +258,8 @@ typedef struct
};
dpdk_device_addr_type_t dev_addr_type;
u8 *name;
+ u8 *tag;
u8 is_blacklisted;
- u8 vlan_strip_offload;
-#define DPDK_DEVICE_VLAN_STRIP_DEFAULT 0
-#define DPDK_DEVICE_VLAN_STRIP_OFF 1
-#define DPDK_DEVICE_VLAN_STRIP_ON 2
#define _(x) uword x;
foreach_dpdk_device_config_item
@@ -300,9 +281,7 @@ typedef struct
u8 **eal_init_args;
u8 *eal_init_args_str;
u8 *uio_driver_name;
- u8 no_multi_seg;
- u8 enable_tcp_udp_checksum;
- u8 no_tx_checksum_offload;
+ u8 uio_bind_force;
u8 enable_telemetry;
u16 max_simd_bitwidth;
@@ -310,13 +289,6 @@ typedef struct
#define DPDK_MAX_SIMD_BITWIDTH_256 256
#define DPDK_MAX_SIMD_BITWIDTH_512 512
- /* Required config parameters */
- u8 coremask_set_manually;
- u8 nchannels_set_manually;
- u32 coremask;
- u32 nchannels;
- u32 num_crypto_mbufs;
-
/*
* format interface names ala xxxEthernet%d/%d/%d instead of
* xxxEthernet%x/%x/%x.
@@ -347,20 +319,16 @@ typedef struct
u32 buffers[DPDK_RX_BURST_SZ];
u16 next[DPDK_RX_BURST_SZ];
u16 etype[DPDK_RX_BURST_SZ];
- u16 flags[DPDK_RX_BURST_SZ];
+ u32 flags[DPDK_RX_BURST_SZ];
vlib_buffer_t buffer_template;
} dpdk_per_thread_data_t;
typedef struct
{
-
/* Devices */
dpdk_device_t *devices;
dpdk_per_thread_data_t *per_thread_data;
- /* buffer flags template, configurable to enable/disable tcp / udp cksum */
- u32 buffer_flags_template;
-
/*
* flag indicating that a posted admin up/down
* (via post_sw_interface_set_flags) is in progress
@@ -371,10 +339,8 @@ typedef struct
f64 link_state_poll_interval;
f64 stat_poll_interval;
- /* convenience */
- vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
dpdk_config_main_t *conf;
+ dpdk_port_conf_t default_port_conf;
/* API message ID base */
u16 msg_id_base;
@@ -382,7 +348,6 @@ typedef struct
/* logging */
vlib_log_class_t log_default;
vlib_log_class_t log_cryptodev;
- vlib_log_class_t log_ipsec;
} dpdk_main_t;
extern dpdk_main_t dpdk_main;
@@ -440,35 +405,39 @@ typedef enum
vlib_log(VLIB_LOG_LEVEL_NOTICE, dpdk_main.log_default, __VA_ARGS__)
#define dpdk_log_info(...) \
vlib_log(VLIB_LOG_LEVEL_INFO, dpdk_main.log_default, __VA_ARGS__)
+#define dpdk_log_debug(...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, dpdk_main.log_default, __VA_ARGS__)
void dpdk_update_link_state (dpdk_device_t * xd, f64 now);
-#define foreach_dpdk_rss_hf \
- _(0, ETH_RSS_FRAG_IPV4, "ipv4-frag") \
- _(1, ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \
- _(2, ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \
- _(3, ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \
- _(4, ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \
- _(5, ETH_RSS_IPV4, "ipv4") \
- _(6, ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \
- _(7, ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex") \
- _(8, ETH_RSS_FRAG_IPV6, "ipv6-frag") \
- _(9, ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \
- _(10, ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \
- _(11, ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \
- _(12, ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \
- _(13, ETH_RSS_IPV6_EX, "ipv6-ex") \
- _(14, ETH_RSS_IPV6, "ipv6") \
- _(15, ETH_RSS_L2_PAYLOAD, "l2-payload") \
- _(16, ETH_RSS_PORT, "port") \
- _(17, ETH_RSS_VXLAN, "vxlan") \
- _(18, ETH_RSS_GENEVE, "geneve") \
- _(19, ETH_RSS_NVGRE, "nvgre") \
- _(20, ETH_RSS_GTPU, "gtpu") \
- _(60, ETH_RSS_L4_DST_ONLY, "l4-dst-only") \
- _(61, ETH_RSS_L4_SRC_ONLY, "l4-src-only") \
- _(62, ETH_RSS_L3_DST_ONLY, "l3-dst-only") \
- _(63, ETH_RSS_L3_SRC_ONLY, "l3-src-only")
+#define foreach_dpdk_rss_hf \
+ _ (0, RTE_ETH_RSS_FRAG_IPV4, "ipv4-frag") \
+ _ (1, RTE_ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \
+ _ (2, RTE_ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \
+ _ (3, RTE_ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \
+ _ (4, RTE_ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \
+ _ (5, RTE_ETH_RSS_IPV4, "ipv4") \
+ _ (6, RTE_ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \
+ _ (7, RTE_ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex") \
+ _ (8, RTE_ETH_RSS_FRAG_IPV6, "ipv6-frag") \
+ _ (9, RTE_ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \
+ _ (10, RTE_ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \
+ _ (11, RTE_ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \
+ _ (12, RTE_ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \
+ _ (13, RTE_ETH_RSS_IPV6_EX, "ipv6-ex") \
+ _ (14, RTE_ETH_RSS_IPV6, "ipv6") \
+ _ (15, RTE_ETH_RSS_L2_PAYLOAD, "l2-payload") \
+ _ (16, RTE_ETH_RSS_PORT, "port") \
+ _ (17, RTE_ETH_RSS_VXLAN, "vxlan") \
+ _ (18, RTE_ETH_RSS_GENEVE, "geneve") \
+ _ (19, RTE_ETH_RSS_NVGRE, "nvgre") \
+ _ (20, RTE_ETH_RSS_GTPU, "gtpu") \
+ _ (21, RTE_ETH_RSS_ESP, "esp") \
+ _ (22, RTE_ETH_RSS_L2TPV3, "l2tpv3") \
+ _ (60, RTE_ETH_RSS_L4_DST_ONLY, "l4-dst-only") \
+ _ (61, RTE_ETH_RSS_L4_SRC_ONLY, "l4-src-only") \
+ _ (62, RTE_ETH_RSS_L3_DST_ONLY, "l3-dst-only") \
+ _ (63, RTE_ETH_RSS_L3_SRC_ONLY, "l3-src-only")
format_function_t format_dpdk_device_name;
format_function_t format_dpdk_device;
@@ -481,6 +450,8 @@ format_function_t format_dpdk_flow;
format_function_t format_dpdk_rss_hf_name;
format_function_t format_dpdk_rx_offload_caps;
format_function_t format_dpdk_tx_offload_caps;
+format_function_t format_dpdk_burst_fn;
+format_function_t format_dpdk_rte_device;
vnet_flow_dev_ops_function_t dpdk_flow_ops_fn;
clib_error_t *unformat_rss_fn (unformat_input_t * input, uword * rss_fn);
diff --git a/src/plugins/dpdk/device/dpdk_priv.h b/src/plugins/dpdk/device/dpdk_priv.h
index a5a8a2ad57d..cb7b185c112 100644
--- a/src/plugins/dpdk/device/dpdk_priv.h
+++ b/src/plugins/dpdk/device/dpdk_priv.h
@@ -15,15 +15,7 @@
#define DPDK_NB_RX_DESC_DEFAULT 1024
#define DPDK_NB_TX_DESC_DEFAULT 1024
-#define DPDK_NB_RX_DESC_VIRTIO 256
-#define DPDK_NB_TX_DESC_VIRTIO 256
-
-#define I40E_DEV_ID_SFP_XL710 0x1572
-#define I40E_DEV_ID_QSFP_A 0x1583
-#define I40E_DEV_ID_QSFP_B 0x1584
-#define I40E_DEV_ID_QSFP_C 0x1585
-#define I40E_DEV_ID_10G_BASE_T 0x1586
-#define I40E_DEV_ID_VF 0x154C
+#define DPDK_MAX_LRO_SIZE_DEFAULT 65536
/* These args appear by themselves */
#define foreach_eal_double_hyphen_predicate_arg \
@@ -32,10 +24,6 @@ _(no-hpet) \
_(no-huge) \
_(vmware-tsc-map)
-#define foreach_eal_single_hyphen_mandatory_arg \
-_(coremask, c) \
-_(nchannels, n) \
-
#define foreach_eal_single_hyphen_arg \
_(mem-alloc-request, m) \
_(force-ranks, r)
@@ -48,10 +36,17 @@ _(proc-type) \
_(file-prefix) \
_(vdev) \
_(log-level) \
+_(block) \
_(iova-mode) \
_(base-virtaddr)
/* clang-format on */
+static_always_inline void
+dpdk_device_flag_set (dpdk_device_t *xd, __typeof__ (xd->flags) flag, int val)
+{
+ xd->flags = val ? xd->flags | flag : xd->flags & ~flag;
+}
+
static inline void
dpdk_get_xstats (dpdk_device_t * xd)
{
@@ -69,11 +64,11 @@ dpdk_get_xstats (dpdk_device_t * xd)
ret = rte_eth_xstats_get (xd->port_id, xd->xstats, len);
if (ret < 0 || ret > len)
{
- _vec_len (xd->xstats) = 0;
+ vec_set_len (xd->xstats, 0);
return;
}
- _vec_len (xd->xstats) = len;
+ vec_set_len (xd->xstats, len);
}
#define DPDK_UPDATE_COUNTER(vnm, tidx, xd, stat, cnt) \
@@ -100,10 +95,6 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now)
vnet_main_t *vnm = vnet_get_main ();
u32 thread_index = vlib_get_thread_index ();
- /* only update counters for PMD interfaces */
- if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
- return;
-
xd->time_last_stats_update = now ? now : xd->time_last_stats_update;
clib_memcpy_fast (&xd->last_stats, &xd->stats, sizeof (xd->last_stats));
rte_eth_stats_get (xd->port_id, &xd->stats);
@@ -119,6 +110,119 @@ dpdk_update_counters (dpdk_device_t * xd, f64 now)
dpdk_get_xstats (xd);
}
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+#define RTE_MBUF_F_RX_FDIR PKT_RX_FDIR
+#define RTE_MBUF_F_RX_FDIR_FLX PKT_RX_FDIR_FLX
+#define RTE_MBUF_F_RX_FDIR_ID PKT_RX_FDIR_ID
+#define RTE_MBUF_F_RX_IEEE1588_PTP PKT_RX_IEEE1588_PTP
+#define RTE_MBUF_F_RX_IEEE1588_TMST PKT_RX_IEEE1588_TMST
+#define RTE_MBUF_F_RX_IP_CKSUM_BAD PKT_RX_IP_CKSUM_BAD
+#define RTE_MBUF_F_RX_IP_CKSUM_GOOD PKT_RX_IP_CKSUM_GOOD
+#define RTE_MBUF_F_RX_IP_CKSUM_NONE PKT_RX_IP_CKSUM_GOOD
+#define RTE_MBUF_F_RX_L4_CKSUM_BAD PKT_RX_L4_CKSUM_BAD
+#define RTE_MBUF_F_RX_L4_CKSUM_GOOD PKT_RX_L4_CKSUM_GOOD
+#define RTE_MBUF_F_RX_L4_CKSUM_NONE PKT_RX_L4_CKSUM_GOOD
+#define RTE_MBUF_F_RX_LRO PKT_RX_LRO
+#define RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD PKT_RX_OUTER_IP_CKSUM_BAD
+#define RTE_MBUF_F_RX_OUTER_L4_CKSUM_BAD PKT_RX_OUTER_L4_CKSUM_GOOD
+#define RTE_MBUF_F_RX_OUTER_L4_CKSUM_GOOD PKT_RX_OUTER_L4_CKSUM_GOOD
+#define RTE_MBUF_F_RX_OUTER_L4_CKSUM_NONE PKT_RX_OUTER_L4_CKSUM_GOOD
+#define RTE_MBUF_F_RX_QINQ PKT_RX_QINQ
+#define RTE_MBUF_F_RX_QINQ_STRIPPED PKT_RX_QINQ_STRIPPED
+#define RTE_MBUF_F_RX_RSS_HASH PKT_RX_RSS_HASH
+#define RTE_MBUF_F_RX_SEC_OFFLOAD PKT_RX_SEC_OFFLOAD
+#define RTE_MBUF_F_RX_SEC_OFFLOAD_FAILED PKT_RX_SEC_OFFLOAD_FAILED
+#define RTE_MBUF_F_RX_VLAN PKT_RX_VLAN
+#define RTE_MBUF_F_RX_VLAN_STRIPPED PKT_RX_VLAN_STRIPPED
+#define RTE_MBUF_F_TX_IEEE1588_TMST PKT_TX_IEEE1588_TMST
+#define RTE_MBUF_F_TX_IPV4 PKT_TX_IPV4
+#define RTE_MBUF_F_TX_IPV6 PKT_TX_IPV6
+#define RTE_MBUF_F_TX_IP_CKSUM PKT_TX_IP_CKSUM
+#define RTE_MBUF_F_TX_MACSEC PKT_TX_MACSEC
+#define RTE_MBUF_F_TX_OUTER_IPV4 PKT_TX_OUTER_IPV4
+#define RTE_MBUF_F_TX_OUTER_IPV6 PKT_TX_OUTER_IPV6
+#define RTE_MBUF_F_TX_OUTER_IP_CKSUM PKT_TX_OUTER_IP_CKSUM
+#define RTE_MBUF_F_TX_OUTER_UDP_CKSUM PKT_TX_OUTER_UDP_CKSUM
+#define RTE_MBUF_F_TX_QINQ PKT_TX_QINQ
+#define RTE_MBUF_F_TX_SCTP_CKSUM PKT_TX_SCTP_CKSUM
+#define RTE_MBUF_F_TX_SEC_OFFLOAD PKT_TX_SEC_OFFLOAD
+#define RTE_MBUF_F_TX_TCP_CKSUM PKT_TX_TCP_CKSUM
+#define RTE_MBUF_F_TX_TCP_SEG PKT_TX_TCP_SEG
+#define RTE_MBUF_F_TX_TUNNEL_GENEVE PKT_TX_TUNNEL_GENEVE
+#define RTE_MBUF_F_TX_TUNNEL_GRE PKT_TX_TUNNEL_GRE
+#define RTE_MBUF_F_TX_TUNNEL_GTP PKT_TX_TUNNEL_GTP
+#define RTE_MBUF_F_TX_TUNNEL_IP PKT_TX_TUNNEL_IP
+#define RTE_MBUF_F_TX_TUNNEL_IPIP PKT_TX_TUNNEL_IPIP
+#define RTE_MBUF_F_TX_TUNNEL_MPLSINUDP PKT_TX_TUNNEL_MPLSINUDP
+#define RTE_MBUF_F_TX_TUNNEL_UDP PKT_TX_TUNNEL_UDP
+#define RTE_MBUF_F_TX_TUNNEL_VXLAN PKT_TX_TUNNEL_VXLAN
+#define RTE_MBUF_F_TX_TUNNEL_VXLAN_GPE PKT_TX_TUNNEL_VXLAN_GPE
+#define RTE_MBUF_F_TX_UDP_CKSUM PKT_TX_UDP_CKSUM
+#define RTE_MBUF_F_TX_UDP_SEG PKT_TX_UDP_SEG
+#define RTE_MBUF_F_TX_VLAN PKT_TX_VLAN
+#define RTE_ETH_RSS_FRAG_IPV4 ETH_RSS_FRAG_IPV4
+#define RTE_ETH_RSS_NONFRAG_IPV4_TCP ETH_RSS_NONFRAG_IPV4_TCP
+#define RTE_ETH_RSS_NONFRAG_IPV4_UDP ETH_RSS_NONFRAG_IPV4_UDP
+#define RTE_ETH_RSS_NONFRAG_IPV4_SCTP ETH_RSS_NONFRAG_IPV4_SCTP
+#define RTE_ETH_RSS_NONFRAG_IPV4_OTHER ETH_RSS_NONFRAG_IPV4_OTHER
+#define RTE_ETH_RSS_IPV4 ETH_RSS_IPV4
+#define RTE_ETH_RSS_IPV6_TCP_EX ETH_RSS_IPV6_TCP_EX
+#define RTE_ETH_RSS_IPV6_UDP_EX ETH_RSS_IPV6_UDP_EX
+#define RTE_ETH_RSS_FRAG_IPV6 ETH_RSS_FRAG_IPV6
+#define RTE_ETH_RSS_NONFRAG_IPV6_TCP ETH_RSS_NONFRAG_IPV6_TCP
+#define RTE_ETH_RSS_NONFRAG_IPV6_UDP ETH_RSS_NONFRAG_IPV6_UDP
+#define RTE_ETH_RSS_NONFRAG_IPV6_SCTP ETH_RSS_NONFRAG_IPV6_SCTP
+#define RTE_ETH_RSS_NONFRAG_IPV6_OTHER ETH_RSS_NONFRAG_IPV6_OTHER
+#define RTE_ETH_RSS_IPV6_EX ETH_RSS_IPV6_EX
+#define RTE_ETH_RSS_IPV6 ETH_RSS_IPV6
+#define RTE_ETH_RSS_L2_PAYLOAD ETH_RSS_L2_PAYLOAD
+#define RTE_ETH_RSS_PORT ETH_RSS_PORT
+#define RTE_ETH_RSS_VXLAN ETH_RSS_VXLAN
+#define RTE_ETH_RSS_GENEVE ETH_RSS_GENEVE
+#define RTE_ETH_RSS_NVGRE ETH_RSS_NVGRE
+#define RTE_ETH_RSS_GTPU ETH_RSS_GTPU
+#define RTE_ETH_RSS_ESP ETH_RSS_ESP
+#define RTE_ETH_RSS_L4_DST_ONLY ETH_RSS_L4_DST_ONLY
+#define RTE_ETH_RSS_L4_SRC_ONLY ETH_RSS_L4_SRC_ONLY
+#define RTE_ETH_RSS_L3_DST_ONLY ETH_RSS_L3_DST_ONLY
+#define RTE_ETH_RSS_L3_SRC_ONLY ETH_RSS_L3_SRC_ONLY
+#define RTE_ETH_RETA_GROUP_SIZE RTE_RETA_GROUP_SIZE
+#define RTE_ETH_TX_OFFLOAD_IPV4_CKSUM DEV_TX_OFFLOAD_IPV4_CKSUM
+#define RTE_ETH_TX_OFFLOAD_TCP_CKSUM DEV_TX_OFFLOAD_TCP_CKSUM
+#define RTE_ETH_TX_OFFLOAD_UDP_CKSUM DEV_TX_OFFLOAD_UDP_CKSUM
+#define RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM
+#define RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM DEV_TX_OFFLOAD_OUTER_UDP_CKSUM
+#define RTE_ETH_TX_OFFLOAD_TCP_TSO DEV_TX_OFFLOAD_TCP_TSO
+#define RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO DEV_TX_OFFLOAD_VXLAN_TNL_TSO
+#define RTE_ETH_TX_OFFLOAD_MULTI_SEGS DEV_TX_OFFLOAD_MULTI_SEGS
+#define RTE_ETH_RX_OFFLOAD_IPV4_CKSUM DEV_RX_OFFLOAD_IPV4_CKSUM
+#define RTE_ETH_RX_OFFLOAD_SCATTER DEV_RX_OFFLOAD_SCATTER
+#define RTE_ETH_RX_OFFLOAD_TCP_LRO DEV_RX_OFFLOAD_TCP_LRO
+#define RTE_ETH_MQ_RX_RSS ETH_MQ_RX_RSS
+#define RTE_ETH_RX_OFFLOAD_TCP_CKSUM DEV_RX_OFFLOAD_TCP_CKSUM
+#define RTE_ETH_RX_OFFLOAD_UDP_CKSUM DEV_RX_OFFLOAD_UDP_CKSUM
+#define RTE_ETH_MQ_RX_NONE ETH_MQ_RX_NONE
+#define RTE_ETH_LINK_FULL_DUPLEX ETH_LINK_FULL_DUPLEX
+#define RTE_ETH_LINK_HALF_DUPLEX ETH_LINK_HALF_DUPLEX
+#define RTE_ETH_VLAN_STRIP_OFFLOAD ETH_VLAN_STRIP_OFFLOAD
+#define RTE_ETH_VLAN_FILTER_OFFLOAD ETH_VLAN_FILTER_OFFLOAD
+#define RTE_ETH_VLAN_EXTEND_OFFLOAD ETH_VLAN_EXTEND_OFFLOAD
+#define RTE_ETH_LINK_SPEED_200G ETH_LINK_SPEED_200G
+#define RTE_ETH_LINK_SPEED_100G ETH_LINK_SPEED_100G
+#define RTE_ETH_LINK_SPEED_56G ETH_LINK_SPEED_56G
+#define RTE_ETH_LINK_SPEED_50G ETH_LINK_SPEED_50G
+#define RTE_ETH_LINK_SPEED_40G ETH_LINK_SPEED_40G
+#define RTE_ETH_LINK_SPEED_25G ETH_LINK_SPEED_25G
+#define RTE_ETH_LINK_SPEED_20G ETH_LINK_SPEED_20G
+#define RTE_ETH_LINK_SPEED_10G ETH_LINK_SPEED_10G
+#define RTE_ETH_LINK_SPEED_5G ETH_LINK_SPEED_5G
+#define RTE_ETH_LINK_SPEED_2_5G ETH_LINK_SPEED_2_5G
+#define RTE_ETH_LINK_SPEED_1G ETH_LINK_SPEED_1G
+#define RTE_ETH_RSS_IP ETH_RSS_IP
+#define RTE_ETH_RSS_UDP ETH_RSS_UDP
+#define RTE_ETH_RSS_TCP ETH_RSS_TCP
+#endif
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/dpdk/device/driver.c b/src/plugins/dpdk/device/driver.c
new file mode 100644
index 00000000000..9c368dd9038
--- /dev/null
+++ b/src/plugins/dpdk/device/driver.c
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+
+#include <dpdk/device/dpdk.h>
+
+static const u32 supported_flow_actions_intel =
+ (VNET_FLOW_ACTION_MARK | VNET_FLOW_ACTION_REDIRECT_TO_NODE |
+ VNET_FLOW_ACTION_REDIRECT_TO_QUEUE | VNET_FLOW_ACTION_BUFFER_ADVANCE |
+ VNET_FLOW_ACTION_COUNT | VNET_FLOW_ACTION_DROP | VNET_FLOW_ACTION_RSS);
+
+#define DPDK_DRIVERS(...) \
+ (dpdk_driver_name_t[]) \
+ { \
+ __VA_ARGS__, {} \
+ }
+
+static dpdk_driver_t dpdk_drivers[] = {
+ {
+ .drivers = DPDK_DRIVERS ({ "net_ice", "Intel E810 Family" },
+ { "net_igc", "Intel I225 2.5G Family" },
+ { "net_e1000_igb", "Intel e1000" },
+ { "net_e1000_em", "Intel 82540EM (e1000)" }),
+ .enable_rxq_int = 1,
+ .supported_flow_actions = supported_flow_actions_intel,
+ .use_intel_phdr_cksum = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_ixgbe", "Intel 82599" }),
+ .enable_rxq_int = 1,
+ .supported_flow_actions = supported_flow_actions_intel,
+ .use_intel_phdr_cksum = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_i40e", "Intel X710/XL710 Family" }),
+ .enable_rxq_int = 1,
+ .supported_flow_actions = supported_flow_actions_intel,
+ .use_intel_phdr_cksum = 1,
+ .int_unmaskable = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_liovf", "Cavium Lio VF" },
+ { "net_thunderx", "Cavium ThunderX" }),
+ .interface_name_prefix = "VirtualFunctionEthernet",
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_iavf", "Intel iAVF" },
+ { "net_i40e_vf", "Intel X710/XL710 Family VF" }),
+ .interface_name_prefix = "VirtualFunctionEthernet",
+ .supported_flow_actions = supported_flow_actions_intel,
+ .use_intel_phdr_cksum = 1,
+ .int_unmaskable = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_e1000_igb_vf", "Intel e1000 VF" }),
+ .interface_name_prefix = "VirtualFunctionEthernet",
+ .use_intel_phdr_cksum = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_ixgbe_vf", "Intel 82599 VF" }),
+ .interface_name_prefix = "VirtualFunctionEthernet",
+ .use_intel_phdr_cksum = 1,
+ .program_vlans = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_dpaa2", "NXP DPAA2 Mac" }),
+ .interface_name_prefix = "TenGigabitEthernet",
+ },
+ {
+ .drivers =
+ DPDK_DRIVERS ({ "net_fm10k", "Intel FM10000 Family Ethernet Switch" }),
+ .interface_name_prefix = "EthernetSwitch",
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_cxgbe", "Chelsio T4/T5" }),
+ .interface_number_from_port_id = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_mlx4", "Mellanox ConnectX-3 Family" },
+ { "net_qede", "Cavium QLogic FastLinQ QL4xxxx" },
+ { "net_bnxt", "Broadcom NetXtreme E/S-Series" }),
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_virtio_user", "Virtio User" }),
+ .interface_name_prefix = "VirtioUser",
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_vhost", "VhostEthernet" }),
+ .interface_name_prefix = "VhostEthernet",
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "mlx5_pci", "Mellanox ConnectX-4/5/6 Family" },
+ { "net_enic", "Cisco VIC" }),
+ .use_intel_phdr_cksum = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_failsafe", "FailsafeEthernet" }),
+ .interface_name_prefix = "FailsafeEthernet",
+ .enable_lsc_int = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "AF_PACKET PMD", "af_packet" }),
+ .interface_name_prefix = "af_packet",
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_netvsc", "Microsoft Hyper-V Netvsc" }),
+ .interface_name_prefix = "NetVSC",
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_ena", "AWS ENA VF" }),
+ .interface_name_prefix = "VirtualFunctionEthernet",
+ .enable_rxq_int = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_vmxnet3", "VMware VMXNET3" }),
+ .interface_name_prefix = "GigabitEthernet",
+ .enable_rxq_int = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_virtio", "Red Hat Virtio" }),
+ .interface_name_prefix = "GigabitEthernet",
+ .n_rx_desc = 256,
+ .n_tx_desc = 256,
+ .mq_mode_none = 1,
+ .enable_rxq_int = 1,
+ },
+ {
+ .drivers = DPDK_DRIVERS ({ "net_gve", "Google vNIC" }),
+ .interface_name_prefix = "VirtualFunctionEthernet",
+ }
+};
+
+dpdk_driver_t *
+dpdk_driver_find (const char *name, const char **desc)
+{
+ for (int i = 0; i < ARRAY_LEN (dpdk_drivers); i++)
+ {
+ dpdk_driver_t *dr = dpdk_drivers + i;
+ dpdk_driver_name_t *dn = dr->drivers;
+
+ while (dn->name)
+ {
+ if (name && !strcmp (name, dn->name))
+ {
+ *desc = dn->desc;
+ return dr;
+ }
+ dn++;
+ }
+ }
+ return 0;
+}
diff --git a/src/plugins/dpdk/device/flow.c b/src/plugins/dpdk/device/flow.c
index a090ec0e930..635f6f37ebf 100644
--- a/src/plugins/dpdk/device/flow.c
+++ b/src/plugins/dpdk/device/flow.c
@@ -21,7 +21,7 @@
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/ethernet/arp_packet.h>
-#include <vnet/vxlan/vxlan.h>
+#include <vxlan/vxlan.h>
#include <dpdk/device/dpdk.h>
#include <dpdk/device/dpdk_priv.h>
#include <vppinfra/error.h>
@@ -29,22 +29,30 @@
#define FLOW_IS_ETHERNET_CLASS(f) \
(f->type == VNET_FLOW_TYPE_ETHERNET)
-#define FLOW_IS_IPV4_CLASS(f) \
- ((f->type == VNET_FLOW_TYPE_IP4) || \
- (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) || \
- (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED) || \
- (f->type == VNET_FLOW_TYPE_IP4_VXLAN) || \
- (f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
- (f->type == VNET_FLOW_TYPE_IP4_GTPU) || \
- (f->type == VNET_FLOW_TYPE_IP4_L2TPV3OIP) || \
- (f->type == VNET_FLOW_TYPE_IP4_IPSEC_ESP) || \
- (f->type == VNET_FLOW_TYPE_IP4_IPSEC_AH))
-
-#define FLOW_IS_IPV6_CLASS(f) \
- ((f->type == VNET_FLOW_TYPE_IP6) || \
- (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) || \
- (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED) || \
- (f->type == VNET_FLOW_TYPE_IP6_VXLAN))
+#define FLOW_IS_IPV4_CLASS(f) \
+ ((f->type == VNET_FLOW_TYPE_IP4) || \
+ (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED) || \
+ (f->type == VNET_FLOW_TYPE_IP4_VXLAN) || \
+ (f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
+ (f->type == VNET_FLOW_TYPE_IP4_GTPU) || \
+ (f->type == VNET_FLOW_TYPE_IP4_L2TPV3OIP) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IPSEC_ESP) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IPSEC_AH) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IP4) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IP6) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IP6_N_TUPLE))
+
+#define FLOW_IS_IPV6_CLASS(f) \
+ ((f->type == VNET_FLOW_TYPE_IP6) || \
+ (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED) || \
+ (f->type == VNET_FLOW_TYPE_IP6_VXLAN) || \
+ (f->type == VNET_FLOW_TYPE_IP6_IP4) || \
+ (f->type == VNET_FLOW_TYPE_IP6_IP6) || \
+ (f->type == VNET_FLOW_TYPE_IP6_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP6_IP6_N_TUPLE))
/* check if flow is VLAN sensitive */
#define FLOW_HAS_VLAN_TAG(f) \
@@ -70,6 +78,13 @@
(f->type == VNET_FLOW_TYPE_IP4_GTPC) || \
(f->type == VNET_FLOW_TYPE_IP4_GTPU))
+/* check if flow has a inner TCP/UDP header */
+#define FLOW_HAS_INNER_N_TUPLE(f) \
+ ((f->type == VNET_FLOW_TYPE_IP4_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP4_IP6_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP6_IP4_N_TUPLE) || \
+ (f->type == VNET_FLOW_TYPE_IP6_IP6_N_TUPLE))
+
/* constant structs */
static const struct rte_flow_attr ingress = {.ingress = 1 };
@@ -103,6 +118,25 @@ dpdk_flow_convert_rss_types (u64 type, u64 * dpdk_rss_type)
return;
}
+/** Maximum number of queue indices in struct rte_flow_action_rss. */
+#define ACTION_RSS_QUEUE_NUM 128
+
+static inline void
+dpdk_flow_convert_rss_queues (u32 queue_index, u32 queue_num,
+ struct rte_flow_action_rss *rss)
+{
+ u16 *queues = clib_mem_alloc (sizeof (*queues) * ACTION_RSS_QUEUE_NUM);
+ int i;
+
+ for (i = 0; i < queue_num; i++)
+ queues[i] = queue_index++;
+
+ rss->queue_num = queue_num;
+ rss->queue = queues;
+
+ return;
+}
+
static inline enum rte_eth_hash_function
dpdk_flow_convert_rss_func (vnet_rss_function_t func)
{
@@ -134,14 +168,15 @@ static int
dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe)
{
struct rte_flow_item_eth eth[2] = { };
- struct rte_flow_item_ipv4 ip4[2] = { };
- struct rte_flow_item_ipv6 ip6[2] = { };
- struct rte_flow_item_udp udp[2] = { };
- struct rte_flow_item_tcp tcp[2] = { };
+ struct rte_flow_item_ipv4 ip4[2] = {}, in_ip4[2] = {};
+ struct rte_flow_item_ipv6 ip6[2] = {}, in_ip6[2] = {};
+ struct rte_flow_item_udp udp[2] = {}, in_UDP[2] = {};
+ struct rte_flow_item_tcp tcp[2] = {}, in_TCP[2] = {};
struct rte_flow_item_gtp gtp[2] = { };
struct rte_flow_item_l2tpv3oip l2tp[2] = { };
struct rte_flow_item_esp esp[2] = { };
struct rte_flow_item_ah ah[2] = { };
+ struct rte_flow_item_raw generic[2] = {};
struct rte_flow_action_mark mark = { 0 };
struct rte_flow_action_queue queue = { 0 };
struct rte_flow_action_rss rss = { 0 };
@@ -165,6 +200,20 @@ dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe)
u8 protocol = IP_PROTOCOL_RESERVED;
int rv = 0;
+ /* Handle generic flow first */
+ if (f->type == VNET_FLOW_TYPE_GENERIC)
+ {
+ generic[0].pattern = f->generic.pattern.spec;
+ generic[1].pattern = f->generic.pattern.mask;
+
+ vec_add2 (items, item, 1);
+ item->type = RTE_FLOW_ITEM_TYPE_RAW;
+ item->spec = generic;
+ item->mask = generic + 1;
+
+ goto pattern_end;
+ }
+
enum
{
FLOW_UNKNOWN_CLASS,
@@ -285,7 +334,8 @@ dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe)
if ((ip6_ptr->src_addr.mask.as_u64[0] == 0) &&
(ip6_ptr->src_addr.mask.as_u64[1] == 0) &&
- (!ip6_ptr->protocol.mask))
+ (ip6_ptr->dst_addr.mask.as_u64[0] == 0) &&
+ (ip6_ptr->dst_addr.mask.as_u64[1] == 0) && (!ip6_ptr->protocol.mask))
{
item->spec = NULL;
item->mask = NULL;
@@ -437,13 +487,127 @@ dpdk_flow_add (dpdk_device_t * xd, vnet_flow_t * f, dpdk_flow_entry_t * fe)
item->mask = raw + 1;
}
break;
+ case IP_PROTOCOL_IPV6:
+ item->type = RTE_FLOW_ITEM_TYPE_IPV6;
+#define fill_inner_ip6_with_outer_ipv(OUTER_IP_VER) \
+ if (f->type == VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP6 || \
+ f->type == VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP6_N_TUPLE) \
+ { \
+ vnet_flow_ip##OUTER_IP_VER##_ip6_t *ptr = &f->ip##OUTER_IP_VER##_ip6; \
+ if ((ptr->in_src_addr.mask.as_u64[0] == 0) && \
+ (ptr->in_src_addr.mask.as_u64[1] == 0) && \
+ (ptr->in_dst_addr.mask.as_u64[0] == 0) && \
+ (ptr->in_dst_addr.mask.as_u64[1] == 0) && (!ptr->in_protocol.mask)) \
+ { \
+ item->spec = NULL; \
+ item->mask = NULL; \
+ } \
+ else \
+ { \
+ clib_memcpy (in_ip6[0].hdr.src_addr, &ptr->in_src_addr.addr, \
+ ARRAY_LEN (ptr->in_src_addr.addr.as_u8)); \
+ clib_memcpy (in_ip6[1].hdr.src_addr, &ptr->in_src_addr.mask, \
+ ARRAY_LEN (ptr->in_src_addr.mask.as_u8)); \
+ clib_memcpy (in_ip6[0].hdr.dst_addr, &ptr->in_dst_addr.addr, \
+ ARRAY_LEN (ptr->in_dst_addr.addr.as_u8)); \
+ clib_memcpy (in_ip6[1].hdr.dst_addr, &ptr->in_dst_addr.mask, \
+ ARRAY_LEN (ptr->in_dst_addr.mask.as_u8)); \
+ item->spec = in_ip6; \
+ item->mask = in_ip6 + 1; \
+ } \
+ }
+ fill_inner_ip6_with_outer_ipv (6) fill_inner_ip6_with_outer_ipv (4)
+#undef fill_inner_ip6_with_outer_ipv
+ break;
+ case IP_PROTOCOL_IP_IN_IP:
+ item->type = RTE_FLOW_ITEM_TYPE_IPV4;
+
+#define fill_inner_ip4_with_outer_ipv(OUTER_IP_VER) \
+ if (f->type == VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP4 || \
+ f->type == VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP4_N_TUPLE) \
+ { \
+ vnet_flow_ip##OUTER_IP_VER##_ip4_t *ptr = &f->ip##OUTER_IP_VER##_ip4; \
+ if ((!ptr->in_src_addr.mask.as_u32) && \
+ (!ptr->in_dst_addr.mask.as_u32) && (!ptr->in_protocol.mask)) \
+ { \
+ item->spec = NULL; \
+ item->mask = NULL; \
+ } \
+ else \
+ { \
+ in_ip4[0].hdr.src_addr = ptr->in_src_addr.addr.as_u32; \
+ in_ip4[1].hdr.src_addr = ptr->in_src_addr.mask.as_u32; \
+ in_ip4[0].hdr.dst_addr = ptr->in_dst_addr.addr.as_u32; \
+ in_ip4[1].hdr.dst_addr = ptr->in_dst_addr.mask.as_u32; \
+ item->spec = in_ip4; \
+ item->mask = in_ip4 + 1; \
+ } \
+ }
+ fill_inner_ip4_with_outer_ipv (6) fill_inner_ip4_with_outer_ipv (4)
+#undef fill_inner_ip4_with_outer_ipv
+ break;
default:
rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
goto done;
}
+ if (FLOW_HAS_INNER_N_TUPLE (f))
+ {
+ vec_add2 (items, item, 1);
+
+#define fill_inner_n_tuple_of(proto) \
+ item->type = RTE_FLOW_ITEM_TYPE_##proto; \
+ if ((ptr->in_src_port.mask == 0) && (ptr->in_dst_port.mask == 0)) \
+ { \
+ item->spec = NULL; \
+ item->mask = NULL; \
+ } \
+ else \
+ { \
+ in_##proto[0].hdr.src_port = \
+ clib_host_to_net_u16 (ptr->in_src_port.port); \
+ in_##proto[1].hdr.src_port = \
+ clib_host_to_net_u16 (ptr->in_src_port.mask); \
+ in_##proto[0].hdr.dst_port = \
+ clib_host_to_net_u16 (ptr->in_dst_port.port); \
+ in_##proto[1].hdr.dst_port = \
+ clib_host_to_net_u16 (ptr->in_dst_port.mask); \
+ item->spec = in_##proto; \
+ item->mask = in_##proto + 1; \
+ }
+
+#define fill_inner_n_tuple(OUTER_IP_VER, INNER_IP_VER) \
+ if (f->type == \
+ VNET_FLOW_TYPE_IP##OUTER_IP_VER##_IP##INNER_IP_VER##_N_TUPLE) \
+ { \
+ vnet_flow_ip##OUTER_IP_VER##_ip##INNER_IP_VER##_n_tuple_t *ptr = \
+ &f->ip##OUTER_IP_VER##_ip##INNER_IP_VER##_n_tuple; \
+ switch (ptr->in_protocol.prot) \
+ { \
+ case IP_PROTOCOL_UDP: \
+ fill_inner_n_tuple_of (UDP) break; \
+ case IP_PROTOCOL_TCP: \
+ fill_inner_n_tuple_of (TCP) break; \
+ default: \
+ break; \
+ } \
+ }
+ fill_inner_n_tuple (6, 4) fill_inner_n_tuple (4, 4)
+ fill_inner_n_tuple (6, 6) fill_inner_n_tuple (4, 6)
+#undef fill_inner_n_tuple
+#undef fill_inner_n_tuple_of
+ }
+
pattern_end:
+ if ((f->actions & VNET_FLOW_ACTION_RSS) &&
+ (f->rss_types & (1ULL << VNET_FLOW_RSS_TYPES_ESP)))
+ {
+
+ vec_add2 (items, item, 1);
+ item->type = RTE_FLOW_ITEM_TYPE_ESP;
+ }
+
vec_add2 (items, item, 1);
item->type = RTE_FLOW_ITEM_TYPE_END;
@@ -482,6 +646,10 @@ pattern_end:
/* convert types to DPDK rss bitmask */
dpdk_flow_convert_rss_types (f->rss_types, &rss_type);
+ if (f->queue_num)
+ /* convert rss queues to array */
+ dpdk_flow_convert_rss_queues (f->queue_index, f->queue_num, &rss);
+
rss.types = rss_type;
if ((rss.func = dpdk_flow_convert_rss_func (f->rss_fun)) ==
RTE_ETH_HASH_FUNCTION_MAX)
@@ -547,6 +715,7 @@ int
dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance,
u32 flow_index, uword * private_data)
{
+ vlib_main_t *vm = vlib_get_main ();
dpdk_main_t *dm = &dpdk_main;
vnet_flow_t *flow = vnet_get_flow (flow_index);
dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance);
@@ -557,7 +726,7 @@ dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance,
/* recycle old flow lookup entries only after the main loop counter
increases - i.e. previously DMA'ed packets were handled */
if (vec_len (xd->parked_lookup_indexes) > 0 &&
- xd->parked_loop_count != dm->vlib_main->main_loop_count)
+ xd->parked_loop_count != vm->main_loop_count)
{
u32 *fl_index;
@@ -580,7 +749,7 @@ dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance,
fle = pool_elt_at_index (xd->flow_lookup_entries, fe->mark);
clib_memset (fle, -1, sizeof (*fle));
vec_add1 (xd->parked_lookup_indexes, fe->mark);
- xd->parked_loop_count = dm->vlib_main->main_loop_count;
+ xd->parked_loop_count = vm->main_loop_count;
}
clib_memset (fe, 0, sizeof (*fe));
@@ -644,6 +813,15 @@ dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance,
case VNET_FLOW_TYPE_IP4_L2TPV3OIP:
case VNET_FLOW_TYPE_IP4_IPSEC_ESP:
case VNET_FLOW_TYPE_IP4_IPSEC_AH:
+ case VNET_FLOW_TYPE_IP4_IP4:
+ case VNET_FLOW_TYPE_IP4_IP4_N_TUPLE:
+ case VNET_FLOW_TYPE_IP4_IP6:
+ case VNET_FLOW_TYPE_IP4_IP6_N_TUPLE:
+ case VNET_FLOW_TYPE_IP6_IP4:
+ case VNET_FLOW_TYPE_IP6_IP4_N_TUPLE:
+ case VNET_FLOW_TYPE_IP6_IP6:
+ case VNET_FLOW_TYPE_IP6_IP6_N_TUPLE:
+ case VNET_FLOW_TYPE_GENERIC:
if ((rv = dpdk_flow_add (xd, flow, fe)))
goto done;
break;
diff --git a/src/plugins/dpdk/device/format.c b/src/plugins/dpdk/device/format.c
index 24994aa9426..c4170c20329 100644
--- a/src/plugins/dpdk/device/format.c
+++ b/src/plugins/dpdk/device/format.c
@@ -17,9 +17,6 @@
#include <vppinfra/format.h>
#include <assert.h>
-#define __USE_GNU
-#include <dlfcn.h>
-
#include <vnet/ethernet/ethernet.h>
#include <vnet/ethernet/sfp.h>
#include <dpdk/device/dpdk.h>
@@ -49,18 +46,28 @@
#endif
#define foreach_dpdk_pkt_rx_offload_flag \
- _ (PKT_RX_VLAN, "RX packet is a 802.1q VLAN packet") \
- _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \
- _ (PKT_RX_FDIR, "RX packet with FDIR infos") \
- _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \
- _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \
- _ (PKT_RX_OUTER_IP_CKSUM_BAD, "External IP header checksum error") \
- _ (PKT_RX_VLAN_STRIPPED, "RX packet VLAN tag stripped") \
- _ (PKT_RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid") \
- _ (PKT_RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid") \
- _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \
- _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") \
- _ (PKT_RX_QINQ_STRIPPED, "RX packet QinQ tags stripped")
+ _ (RX_FDIR, "RX packet with FDIR infos") \
+ _ (RX_FDIR_FLX, "RX packet with FDIR_FLX info") \
+ _ (RX_FDIR_ID, "RX packet with FDIR_ID info") \
+ _ (RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \
+ _ (RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") \
+ _ (RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \
+ _ (RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid") \
+ _ (RX_IP_CKSUM_NONE, "no IP cksum of RX pkt.") \
+ _ (RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \
+ _ (RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid") \
+ _ (RX_L4_CKSUM_NONE, "no L4 cksum of RX pkt.") \
+ _ (RX_LRO, "LRO packet") \
+ _ (RX_OUTER_IP_CKSUM_BAD, "External IP header checksum error") \
+ _ (RX_OUTER_L4_CKSUM_BAD, "External L4 header checksum error") \
+ _ (RX_OUTER_L4_CKSUM_GOOD, "External L4 header checksum OK") \
+ _ (RX_QINQ, "RX packet with QinQ tags") \
+ _ (RX_QINQ_STRIPPED, "RX packet QinQ tags stripped") \
+ _ (RX_RSS_HASH, "RX packet with RSS hash result") \
+ _ (RX_SEC_OFFLOAD, "RX packet with security offload") \
+ _ (RX_SEC_OFFLOAD_FAILED, "RX packet with security offload failed") \
+ _ (RX_VLAN, "RX packet is a 802.1q VLAN packet") \
+ _ (RX_VLAN_STRIPPED, "RX packet VLAN tag stripped")
#define foreach_dpdk_pkt_type \
_ (L2, ETHER, "Ethernet packet") \
@@ -103,14 +110,32 @@
_ (INNER_L4, NONFRAG, "Inner non-fragmented IP packet")
#define foreach_dpdk_pkt_tx_offload_flag \
- _ (PKT_TX_VLAN_PKT, "TX packet is a 802.1q VLAN packet") \
- _ (PKT_TX_TUNNEL_VXLAN, "TX packet is a VXLAN packet") \
- _ (PKT_TX_IP_CKSUM, "IP cksum of TX pkt. computed by NIC") \
- _ (PKT_TX_TCP_CKSUM, "TCP cksum of TX pkt. computed by NIC") \
- _ (PKT_TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \
- _ (PKT_TX_OUTER_IP_CKSUM, "Outer IP cksum of Tx pkt. computed by NIC") \
- _ (PKT_TX_TCP_SEG, "TSO of TX pkt. done by NIC") \
- _ (PKT_TX_IEEE1588_TMST, "TX IEEE1588 packet to timestamp")
+ _ (TX_IEEE1588_TMST, "TX IEEE1588 packet to timestamp") \
+ _ (TX_IPV4, "TX IPV4") \
+ _ (TX_IPV6, "TX IPV6") \
+ _ (TX_IP_CKSUM, "IP cksum of TX pkt. computed by NIC") \
+ _ (TX_MACSEC, "TX MACSEC") \
+ _ (TX_OUTER_IPV4, "TX outer IPV4") \
+ _ (TX_OUTER_IPV6, "TX outer IPV6") \
+ _ (TX_OUTER_IP_CKSUM, "Outer IP cksum of Tx pkt. computed by NIC") \
+ _ (TX_OUTER_UDP_CKSUM, "TX outer UDP cksum") \
+ _ (TX_QINQ, "TX QINQ") \
+ _ (TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \
+ _ (TX_SEC_OFFLOAD, "TX SEC OFFLOAD") \
+ _ (TX_TCP_CKSUM, "TCP cksum of TX pkt. computed by NIC") \
+ _ (TX_TCP_SEG, "TSO of TX pkt. done by NIC") \
+ _ (TX_TUNNEL_GENEVE, "TX tunnel GENEVE") \
+ _ (TX_TUNNEL_GRE, "TX tunnel GRE") \
+ _ (TX_TUNNEL_GTP, "TX tunnel GTP") \
+ _ (TX_TUNNEL_IP, "TX tunnel IP") \
+ _ (TX_TUNNEL_IPIP, "TX tunnel IPIP") \
+ _ (TX_TUNNEL_MPLSINUDP, "TX tunnel MPLSinUDP") \
+ _ (TX_TUNNEL_UDP, "TX tunnel UDP") \
+ _ (TX_TUNNEL_VXLAN, "TX packet is a VXLAN packet") \
+ _ (TX_TUNNEL_VXLAN_GPE, "TX tunnel VXLAN GPE") \
+ _ (TX_UDP_CKSUM, "TX UDP cksum") \
+ _ (TX_UDP_SEG, "TX UDP SEG") \
+ _ (TX_VLAN, "TX packet is a 802.1q VLAN packet")
#define foreach_dpdk_pkt_offload_flag \
foreach_dpdk_pkt_rx_offload_flag \
@@ -123,105 +148,10 @@ u8 *
format_dpdk_device_name (u8 * s, va_list * args)
{
dpdk_main_t *dm = &dpdk_main;
- char *devname_format;
- char *device_name;
u32 i = va_arg (*args, u32);
dpdk_device_t *xd = vec_elt_at_index (dm->devices, i);
- struct rte_eth_dev_info dev_info;
- struct rte_pci_device *pci_dev;
- u8 *ret;
-
- if (xd->name)
- return format (s, "%s", xd->name);
-
- if (dm->conf->interface_name_format_decimal)
- devname_format = "%s%d/%d/%d";
- else
- devname_format = "%s%x/%x/%x";
-
- switch (xd->port_type)
- {
- case VNET_DPDK_PORT_TYPE_ETH_1G:
- device_name = "GigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_2_5G:
- device_name = "Two_FiveGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_5G:
- device_name = "FiveGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_10G:
- device_name = "TenGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_20G:
- device_name = "TwentyGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_25G:
- device_name = "TwentyFiveGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_40G:
- device_name = "FortyGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_50G:
- device_name = "FiftyGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_56G:
- device_name = "FiftySixGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_100G:
- device_name = "HundredGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_SWITCH:
- device_name = "EthernetSwitch";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_VF:
- device_name = "VirtualFunctionEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_AF_PACKET:
- return format (s, "af_packet%d", xd->af_packet_instance_num);
-
- case VNET_DPDK_PORT_TYPE_VIRTIO_USER:
- device_name = "VirtioUser";
- break;
-
- case VNET_DPDK_PORT_TYPE_VHOST_ETHER:
- device_name = "VhostEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_FAILSAFE:
- device_name = "FailsafeEthernet";
- break;
-
- default:
- case VNET_DPDK_PORT_TYPE_UNKNOWN:
- device_name = "UnknownEthernet";
- break;
- }
-
- rte_eth_dev_info_get (xd->port_id, &dev_info);
- pci_dev = dpdk_get_pci_device (&dev_info);
-
- if (pci_dev && xd->port_type != VNET_DPDK_PORT_TYPE_FAILSAFE)
- ret = format (s, devname_format, device_name, pci_dev->addr.bus,
- pci_dev->addr.devid, pci_dev->addr.function);
- else
- ret = format (s, "%s%d", device_name, xd->port_id);
- if (xd->interface_name_suffix)
- return format (ret, "/%s", xd->interface_name_suffix);
- return ret;
+ return format (s, "%v", xd->name);
}
u8 *
@@ -243,126 +173,12 @@ static u8 *
format_dpdk_device_type (u8 * s, va_list * args)
{
dpdk_main_t *dm = &dpdk_main;
- char *dev_type;
u32 i = va_arg (*args, u32);
- switch (dm->devices[i].pmd)
- {
- case VNET_DPDK_PMD_E1000EM:
- dev_type = "Intel 82540EM (e1000)";
- break;
-
- case VNET_DPDK_PMD_IGB:
- dev_type = "Intel e1000";
- break;
-
- case VNET_DPDK_PMD_I40E:
- dev_type = "Intel X710/XL710 Family";
- break;
-
- case VNET_DPDK_PMD_I40EVF:
- dev_type = "Intel X710/XL710 Family VF";
- break;
-
- case VNET_DPDK_PMD_ICE:
- dev_type = "Intel E810 Family";
- break;
-
- case VNET_DPDK_PMD_IAVF:
- dev_type = "Intel iAVF";
- break;
-
- case VNET_DPDK_PMD_FM10K:
- dev_type = "Intel FM10000 Family Ethernet Switch";
- break;
-
- case VNET_DPDK_PMD_IGBVF:
- dev_type = "Intel e1000 VF";
- break;
-
- case VNET_DPDK_PMD_VIRTIO:
- dev_type = "Red Hat Virtio";
- break;
-
- case VNET_DPDK_PMD_IXGBEVF:
- dev_type = "Intel 82599 VF";
- break;
-
- case VNET_DPDK_PMD_IXGBE:
- dev_type = "Intel 82599";
- break;
-
- case VNET_DPDK_PMD_ENIC:
- dev_type = "Cisco VIC";
- break;
-
- case VNET_DPDK_PMD_CXGBE:
- dev_type = "Chelsio T4/T5";
- break;
-
- case VNET_DPDK_PMD_MLX4:
- dev_type = "Mellanox ConnectX-3 Family";
- break;
-
- case VNET_DPDK_PMD_MLX5:
- dev_type = "Mellanox ConnectX-4 Family";
- break;
-
- case VNET_DPDK_PMD_VMXNET3:
- dev_type = "VMware VMXNET3";
- break;
-
- case VNET_DPDK_PMD_AF_PACKET:
- dev_type = "af_packet";
- break;
-
- case VNET_DPDK_PMD_DPAA2:
- dev_type = "NXP DPAA2 Mac";
- break;
-
- case VNET_DPDK_PMD_VIRTIO_USER:
- dev_type = "Virtio User";
- break;
-
- case VNET_DPDK_PMD_THUNDERX:
- dev_type = "Cavium ThunderX";
- break;
-
- case VNET_DPDK_PMD_VHOST_ETHER:
- dev_type = "VhostEthernet";
- break;
-
- case VNET_DPDK_PMD_ENA:
- dev_type = "AWS ENA VF";
- break;
-
- case VNET_DPDK_PMD_FAILSAFE:
- dev_type = "FailsafeEthernet";
- break;
-
- case VNET_DPDK_PMD_LIOVF_ETHER:
- dev_type = "Cavium Lio VF";
- break;
-
- case VNET_DPDK_PMD_QEDE:
- dev_type = "Cavium QLogic FastLinQ QL4xxxx";
- break;
-
- case VNET_DPDK_PMD_NETVSC:
- dev_type = "Microsoft Hyper-V Netvsc";
- break;
-
- case VNET_DPDK_PMD_BNXT:
- dev_type = "Broadcom NetXtreme E/S-Series";
- break;
-
- default:
- case VNET_DPDK_PMD_UNKNOWN:
- dev_type = "### UNKNOWN ###";
- break;
- }
-
- return format (s, dev_type);
+ if (dm->devices[i].if_desc)
+ return format (s, dm->devices[i].if_desc);
+ else
+ return format (s, "### UNKNOWN ###");
}
static u8 *
@@ -378,10 +194,11 @@ format_dpdk_link_status (u8 * s, va_list * args)
{
u32 promisc = rte_eth_promiscuous_get (xd->port_id);
- s = format (s, "%s duplex ", (l->link_duplex == ETH_LINK_FULL_DUPLEX) ?
- "full" : "half");
- s = format (s, "mtu %d %s\n", hi->max_packet_bytes, promisc ?
- " promisc" : "");
+ s = format (s, "%s duplex ",
+ (l->link_duplex == RTE_ETH_LINK_FULL_DUPLEX) ? "full" :
+ "half");
+ s = format (s, "max-frame-size %d %s\n", hi->max_frame_size,
+ promisc ? " promisc" : "");
}
else
s = format (s, "\n");
@@ -419,8 +236,6 @@ format_offload (u8 * s, va_list * va)
uword i, l;
l = ~0;
- if (clib_mem_is_vec (id))
- l = vec_len (id);
if (id)
for (i = 0; id[i] != 0 && i < l; i++)
@@ -523,15 +338,29 @@ format_dpdk_device_module_info (u8 * s, va_list * args)
return s;
}
-static const char *
-ptr2sname (void *p)
+u8 *
+format_dpdk_burst_fn (u8 *s, va_list *args)
{
- Dl_info info = { 0 };
+ dpdk_device_t *xd = va_arg (*args, dpdk_device_t *);
+ vlib_rx_or_tx_t dir = va_arg (*args, vlib_rx_or_tx_t);
+ void *p;
+ clib_elf_symbol_t sym;
- if (dladdr (p, &info) == 0)
- return 0;
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+#define rte_eth_fp_ops rte_eth_devices
+#endif
+
+ p = (dir == VLIB_TX) ? rte_eth_fp_ops[xd->port_id].tx_pkt_burst :
+ rte_eth_fp_ops[xd->port_id].rx_pkt_burst;
- return info.dli_sname;
+ if (clib_elf_symbol_by_address (pointer_to_uword (p), &sym))
+ {
+ return format (s, "%s", clib_elf_symbol_name (&sym));
+ }
+ else
+ {
+ return format (s, "(not available)");
+ }
}
static u8 *
@@ -549,16 +378,51 @@ format_switch_info (u8 * s, va_list * args)
}
u8 *
+format_dpdk_rte_device (u8 *s, va_list *args)
+{
+ struct rte_device *d = va_arg (*args, struct rte_device *);
+
+ if (!d)
+ return format (s, "not available");
+
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ s =
+ format (s, "name: %s, numa: %d", rte_dev_name (d), rte_dev_numa_node (d));
+
+ if (rte_dev_driver (d))
+ s = format (s, ", driver: %s", rte_driver_name (rte_dev_driver (d)));
+
+ if (rte_dev_bus (d))
+ s = format (s, ", bus: %s", rte_bus_name (rte_dev_bus (d)));
+#else
+ s = format (s, "name: %s, numa: %d", d->name, d->numa_node);
+
+ if (d->driver)
+ s = format (s, ", driver: %s", d->driver->name);
+
+ if (d->bus)
+ s = format (s, ", bus: %s", d->bus->name);
+#endif
+
+ return s;
+}
+
+u8 *
format_dpdk_device (u8 * s, va_list * args)
{
u32 dev_instance = va_arg (*args, u32);
int verbose = va_arg (*args, int);
dpdk_main_t *dm = &dpdk_main;
+ vlib_main_t *vm = vlib_get_main ();
dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance);
u32 indent = format_get_indent (s);
- f64 now = vlib_time_now (dm->vlib_main);
+ f64 now = vlib_time_now (vm);
struct rte_eth_dev_info di;
struct rte_eth_burst_mode mode;
+ struct rte_pci_device *pci;
+ struct rte_eth_rss_conf rss_conf;
+ int vlan_off;
+ int retval;
dpdk_update_counters (xd, now);
dpdk_update_link_state (xd, now);
@@ -569,126 +433,114 @@ format_dpdk_device (u8 * s, va_list * args)
format_white_space, indent + 2, format_dpdk_link_status, xd);
s = format (s, "%Uflags: %U\n",
format_white_space, indent + 2, format_dpdk_device_flags, xd);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (rte_dev_devargs (di.device) && rte_dev_devargs (di.device)->args)
+ s = format (s, "%UDevargs: %s\n", format_white_space, indent + 2,
+ rte_dev_devargs (di.device)->args);
+#else
if (di.device->devargs && di.device->devargs->args)
s = format (s, "%UDevargs: %s\n",
format_white_space, indent + 2, di.device->devargs->args);
- s = format (s, "%Urx: queues %d (max %d), desc %d "
+#endif
+ s = format (s,
+ "%Urx: queues %d (max %d), desc %d "
"(min %d max %d align %d)\n",
- format_white_space, indent + 2, xd->rx_q_used, di.max_rx_queues,
- xd->nb_rx_desc, di.rx_desc_lim.nb_min, di.rx_desc_lim.nb_max,
- di.rx_desc_lim.nb_align);
- s = format (s, "%Utx: queues %d (max %d), desc %d "
+ format_white_space, indent + 2, xd->conf.n_rx_queues,
+ di.max_rx_queues, xd->conf.n_rx_desc, di.rx_desc_lim.nb_min,
+ di.rx_desc_lim.nb_max, di.rx_desc_lim.nb_align);
+ s = format (s,
+ "%Utx: queues %d (max %d), desc %d "
"(min %d max %d align %d)\n",
- format_white_space, indent + 2, xd->tx_q_used, di.max_tx_queues,
- xd->nb_tx_desc, di.tx_desc_lim.nb_min, di.tx_desc_lim.nb_max,
- di.tx_desc_lim.nb_align);
+ format_white_space, indent + 2, xd->conf.n_tx_queues,
+ di.max_tx_queues, xd->conf.n_tx_desc, di.tx_desc_lim.nb_min,
+ di.tx_desc_lim.nb_max, di.tx_desc_lim.nb_align);
- if (xd->flags & DPDK_DEVICE_FLAG_PMD)
- {
- struct rte_pci_device *pci;
- struct rte_eth_rss_conf rss_conf;
- int vlan_off;
- int retval;
+ rss_conf.rss_key = 0;
+ rss_conf.rss_hf = 0;
+ retval = rte_eth_dev_rss_hash_conf_get (xd->port_id, &rss_conf);
+ if (retval < 0)
+ clib_warning ("rte_eth_dev_rss_hash_conf_get returned %d", retval);
- rss_conf.rss_key = 0;
- rss_conf.rss_hf = 0;
- retval = rte_eth_dev_rss_hash_conf_get (xd->port_id, &rss_conf);
- if (retval < 0)
- clib_warning ("rte_eth_dev_rss_hash_conf_get returned %d", retval);
+ pci = dpdk_get_pci_device (&di);
- pci = dpdk_get_pci_device (&di);
+ if (pci)
+ {
+ u8 *s2;
+ if (xd->cpu_socket > -1)
+ s2 = format (0, "%d", xd->cpu_socket);
+ else
+ s2 = format (0, "unknown");
+ s = format (s,
+ "%Upci: device %04x:%04x subsystem %04x:%04x "
+ "address %04x:%02x:%02x.%02x numa %v\n",
+ format_white_space, indent + 2, pci->id.vendor_id,
+ pci->id.device_id, pci->id.subsystem_vendor_id,
+ pci->id.subsystem_device_id, pci->addr.domain, pci->addr.bus,
+ pci->addr.devid, pci->addr.function, s2);
+ vec_free (s2);
+ }
- if (pci)
- {
- u8 *s2;
- if (xd->cpu_socket > -1)
- s2 = format (0, "%d", xd->cpu_socket);
- else
- s2 = format (0, "unknown");
- s = format (s, "%Upci: device %04x:%04x subsystem %04x:%04x "
- "address %04x:%02x:%02x.%02x numa %v\n",
- format_white_space, indent + 2, pci->id.vendor_id,
- pci->id.device_id, pci->id.subsystem_vendor_id,
- pci->id.subsystem_device_id, pci->addr.domain,
- pci->addr.bus, pci->addr.devid, pci->addr.function, s2);
- vec_free (s2);
- }
+ if (di.switch_info.domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID)
+ {
+ s = format (s, "%Uswitch info: %U\n", format_white_space, indent + 2,
+ format_switch_info, &di.switch_info);
+ }
- if (di.switch_info.domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID)
- {
- s =
- format (s, "%Uswitch info: %U\n", format_white_space, indent + 2,
- format_switch_info, &di.switch_info);
- }
+ if (1 < verbose)
+ {
+ s = format (s, "%Umodule: %U\n", format_white_space, indent + 2,
+ format_dpdk_device_module_info, xd);
+ }
- if (1 < verbose)
- {
- s = format (s, "%Umodule: %U\n", format_white_space, indent + 2,
- format_dpdk_device_module_info, xd);
- }
+ s = format (s, "%Umax rx packet len: %d\n", format_white_space, indent + 2,
+ di.max_rx_pktlen);
+ s = format (s, "%Upromiscuous: unicast %s all-multicast %s\n",
+ format_white_space, indent + 2,
+ rte_eth_promiscuous_get (xd->port_id) ? "on" : "off",
+ rte_eth_allmulticast_get (xd->port_id) ? "on" : "off");
+ vlan_off = rte_eth_dev_get_vlan_offload (xd->port_id);
+ s = format (s, "%Uvlan offload: strip %s filter %s qinq %s\n",
+ format_white_space, indent + 2,
+ vlan_off & RTE_ETH_VLAN_STRIP_OFFLOAD ? "on" : "off",
+ vlan_off & RTE_ETH_VLAN_FILTER_OFFLOAD ? "on" : "off",
+ vlan_off & RTE_ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off");
+ s = format (s, "%Urx offload avail: %U\n", format_white_space, indent + 2,
+ format_dpdk_rx_offload_caps, di.rx_offload_capa);
+ s = format (s, "%Urx offload active: %U\n", format_white_space, indent + 2,
+ format_dpdk_rx_offload_caps, xd->enabled_rx_off);
+ s = format (s, "%Utx offload avail: %U\n", format_white_space, indent + 2,
+ format_dpdk_tx_offload_caps, di.tx_offload_capa);
+ s = format (s, "%Utx offload active: %U\n", format_white_space, indent + 2,
+ format_dpdk_tx_offload_caps, xd->enabled_tx_off);
+ s = format (s,
+ "%Urss avail: %U\n"
+ "%Urss active: %U\n",
+ format_white_space, indent + 2, format_dpdk_rss_hf_name,
+ di.flow_type_rss_offloads, format_white_space, indent + 2,
+ format_dpdk_rss_hf_name, rss_conf.rss_hf);
+
+ if (rte_eth_tx_burst_mode_get (xd->port_id, 0, &mode) == 0)
+ {
+ s = format (s, "%Utx burst mode: %s%s\n", format_white_space, indent + 2,
+ mode.info,
+ mode.flags & RTE_ETH_BURST_FLAG_PER_QUEUE ? " (per queue)" :
+ "");
+ }
- s = format (s, "%Umax rx packet len: %d\n", format_white_space,
- indent + 2, di.max_rx_pktlen);
- s = format (s, "%Upromiscuous: unicast %s all-multicast %s\n",
- format_white_space, indent + 2,
- rte_eth_promiscuous_get (xd->port_id) ? "on" : "off",
- rte_eth_allmulticast_get (xd->port_id) ? "on" : "off");
- vlan_off = rte_eth_dev_get_vlan_offload (xd->port_id);
- s = format (s, "%Uvlan offload: strip %s filter %s qinq %s\n",
- format_white_space, indent + 2,
- vlan_off & ETH_VLAN_STRIP_OFFLOAD ? "on" : "off",
- vlan_off & ETH_VLAN_FILTER_OFFLOAD ? "on" : "off",
- vlan_off & ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off");
- s = format (s, "%Urx offload avail: %U\n",
- format_white_space, indent + 2,
- format_dpdk_rx_offload_caps, di.rx_offload_capa);
- s = format (s, "%Urx offload active: %U\n",
- format_white_space, indent + 2,
- format_dpdk_rx_offload_caps, xd->port_conf.rxmode.offloads);
- s = format (s, "%Utx offload avail: %U\n",
- format_white_space, indent + 2,
- format_dpdk_tx_offload_caps, di.tx_offload_capa);
- s = format (s, "%Utx offload active: %U\n",
- format_white_space, indent + 2,
- format_dpdk_tx_offload_caps, xd->port_conf.txmode.offloads);
- s = format (s, "%Urss avail: %U\n"
- "%Urss active: %U\n",
- format_white_space, indent + 2,
- format_dpdk_rss_hf_name, di.flow_type_rss_offloads,
- format_white_space, indent + 2,
- format_dpdk_rss_hf_name, rss_conf.rss_hf);
-
- if (rte_eth_tx_burst_mode_get (xd->port_id, 0, &mode) == 0)
- {
- s = format (s, "%Utx burst mode: %s%s\n",
- format_white_space, indent + 2,
- mode.info,
- mode.flags & RTE_ETH_BURST_FLAG_PER_QUEUE ?
- " (per queue)" : "");
- }
- else
- {
- s = format (s, "%Utx burst function: %s\n",
- format_white_space, indent + 2,
- ptr2sname (rte_eth_devices[xd->port_id].tx_pkt_burst));
- }
+ s = format (s, "%Utx burst function: %U\n", format_white_space, indent + 2,
+ format_dpdk_burst_fn, xd, VLIB_TX);
- if (rte_eth_rx_burst_mode_get (xd->port_id, 0, &mode) == 0)
- {
- s = format (s, "%Urx burst mode: %s%s\n",
- format_white_space, indent + 2,
- mode.info,
- mode.flags & RTE_ETH_BURST_FLAG_PER_QUEUE ?
- " (per queue)" : "");
- }
- else
- {
- s = format (s, "%Urx burst function: %s\n",
- format_white_space, indent + 2,
- ptr2sname (rte_eth_devices[xd->port_id].rx_pkt_burst));
- }
+ if (rte_eth_rx_burst_mode_get (xd->port_id, 0, &mode) == 0)
+ {
+ s = format (s, "%Urx burst mode: %s%s\n", format_white_space, indent + 2,
+ mode.info,
+ mode.flags & RTE_ETH_BURST_FLAG_PER_QUEUE ? " (per queue)" :
+ "");
}
+ s = format (s, "%Urx burst function: %U\n", format_white_space, indent + 2,
+ format_dpdk_burst_fn, xd, VLIB_RX);
+
/* $$$ MIB counters */
{
#define _(N, V) \
@@ -713,7 +565,6 @@ format_dpdk_device (u8 * s, va_list * args)
if (ret >= 0 && ret <= len)
{
- /* *INDENT-OFF* */
vec_foreach_index(i, xd->xstats)
{
xstat = vec_elt_at_index(xd->xstats, i);
@@ -725,7 +576,6 @@ format_dpdk_device (u8 * s, va_list * args)
xstat->value);
}
}
- /* *INDENT-ON* */
vec_free (xstat_names);
}
@@ -756,14 +606,12 @@ format_dpdk_tx_trace (u8 * s, va_list * va)
dpdk_main_t *dm = &dpdk_main;
dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index);
u32 indent = format_get_indent (s);
- vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->sw_if_index);
- s = format (s, "%U tx queue %d",
- format_vnet_sw_interface_name, vnm, sw, t->queue_index);
+ s = format (s, "%U tx queue %d", format_vnet_sw_if_index_name, vnm,
+ xd->sw_if_index, t->queue_index);
- s = format (s, "\n%Ubuffer 0x%x: %U",
- format_white_space, indent,
- t->buffer_index, format_vnet_buffer, &t->buffer);
+ s = format (s, "\n%Ubuffer 0x%x: %U", format_white_space, indent,
+ t->buffer_index, format_vnet_buffer_no_chain, &t->buffer);
s = format (s, "\n%U%U",
format_white_space, indent,
@@ -787,14 +635,12 @@ format_dpdk_rx_trace (u8 * s, va_list * va)
dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index);
format_function_t *f;
u32 indent = format_get_indent (s);
- vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->sw_if_index);
- s = format (s, "%U rx queue %d",
- format_vnet_sw_interface_name, vnm, sw, t->queue_index);
+ s = format (s, "%U rx queue %d", format_vnet_sw_if_index_name, vnm,
+ xd->sw_if_index, t->queue_index);
- s = format (s, "\n%Ubuffer 0x%x: %U",
- format_white_space, indent,
- t->buffer_index, format_vnet_buffer, &t->buffer);
+ s = format (s, "\n%Ubuffer 0x%x: %U", format_white_space, indent,
+ t->buffer_index, format_vnet_buffer_no_chain, &t->buffer);
s = format (s, "\n%U%U",
format_white_space, indent,
@@ -855,11 +701,11 @@ format_dpdk_pkt_offload_flags (u8 * s, va_list * va)
s = format (s, "Packet Offload Flags");
-#define _(F, S) \
- if (*ol_flags & F) \
- { \
- s = format (s, "\n%U%s (0x%04x) %s", \
- format_white_space, indent, #F, F, S); \
+#define _(F, S) \
+ if ((*ol_flags & RTE_MBUF_F_##F) == RTE_MBUF_F_##F) \
+ { \
+ s = format (s, "\n%U%s (0x%04x) %s", format_white_space, indent, \
+ "PKT_" #F, RTE_MBUF_F_##F, S); \
}
foreach_dpdk_pkt_offload_flag
@@ -887,7 +733,7 @@ u8 *
format_dpdk_rte_mbuf_tso (u8 *s, va_list *va)
{
struct rte_mbuf *mb = va_arg (*va, struct rte_mbuf *);
- if (mb->ol_flags & PKT_TX_TCP_SEG)
+ if (mb->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
{
s = format (s, "l4_len %u tso_segsz %u", mb->l4_len, mb->tso_segsz);
}
@@ -940,8 +786,9 @@ format_dpdk_rte_mbuf (u8 * s, va_list * va)
s = format (s, "\n%U%U", format_white_space, indent,
format_dpdk_pkt_offload_flags, &mb->ol_flags);
- if ((mb->ol_flags & PKT_RX_VLAN) &&
- ((mb->ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == 0))
+ if ((mb->ol_flags & RTE_MBUF_F_RX_VLAN) &&
+ ((mb->ol_flags &
+ (RTE_MBUF_F_RX_VLAN_STRIPPED | RTE_MBUF_F_RX_QINQ_STRIPPED)) == 0))
{
ethernet_vlan_header_tv_t *vlan_hdr =
((ethernet_vlan_header_tv_t *) & (eth_hdr->type));
diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c
index aebbb64dd76..2d038b907bf 100644
--- a/src/plugins/dpdk/device/init.c
+++ b/src/plugins/dpdk/device/init.c
@@ -21,8 +21,10 @@
#include <vlib/unix/unix.h>
#include <vlib/log.h>
+#include <vnet/vnet.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
#include <dpdk/buffer.h>
#include <dpdk/device/dpdk.h>
#include <dpdk/cryptodev/cryptodev.h>
@@ -43,71 +45,69 @@
#include <dpdk/device/dpdk_priv.h>
-#define ETHER_MAX_LEN 1518 /**< Maximum frame len, including CRC. */
-
dpdk_main_t dpdk_main;
dpdk_config_main_t dpdk_config_main;
#define LINK_STATE_ELOGS 0
-/* Port configuration, mildly modified Intel app values */
+/* dev_info.speed_capa -> interface name mapppings */
+const struct
+{
+ u32 link_speed;
+ const char *pfx;
+} if_name_prefixes[] = {
+ /* sorted, higher speed first */
+ { RTE_ETH_LINK_SPEED_200G, "TwoHundredGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_100G, "HundredGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_56G, "FiftySixGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_50G, "FiftyGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_40G, "FortyGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_25G, "TwentyFiveGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_20G, "TwentyGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_10G, "TenGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_5G, "FiveGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_2_5G, "TwoDotFiveGigabitEthernet" },
+ { RTE_ETH_LINK_SPEED_1G, "GigabitEthernet" },
+};
-static dpdk_port_type_t
-port_type_from_speed_capa (struct rte_eth_dev_info *dev_info)
+static clib_error_t *
+dpdk_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hi,
+ u32 frame_size)
{
+ dpdk_main_t *dm = &dpdk_main;
+ dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance);
+ int rv;
+ u32 mtu;
- if (dev_info->speed_capa & ETH_LINK_SPEED_100G)
- return VNET_DPDK_PORT_TYPE_ETH_100G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_56G)
- return VNET_DPDK_PORT_TYPE_ETH_56G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_50G)
- return VNET_DPDK_PORT_TYPE_ETH_50G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_40G)
- return VNET_DPDK_PORT_TYPE_ETH_40G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_25G)
- return VNET_DPDK_PORT_TYPE_ETH_25G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_20G)
- return VNET_DPDK_PORT_TYPE_ETH_20G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_10G)
- return VNET_DPDK_PORT_TYPE_ETH_10G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_5G)
- return VNET_DPDK_PORT_TYPE_ETH_5G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_2_5G)
- return VNET_DPDK_PORT_TYPE_ETH_2_5G;
- else if (dev_info->speed_capa & ETH_LINK_SPEED_1G)
- return VNET_DPDK_PORT_TYPE_ETH_1G;
-
- return VNET_DPDK_PORT_TYPE_UNKNOWN;
-}
+ mtu = frame_size - xd->driver_frame_overhead;
-static dpdk_port_type_t
-port_type_from_link_speed (u32 link_speed)
-{
- switch (link_speed)
+ rv = rte_eth_dev_set_mtu (xd->port_id, mtu);
+
+ if (rv < 0)
{
- case ETH_SPEED_NUM_1G:
- return VNET_DPDK_PORT_TYPE_ETH_1G;
- case ETH_SPEED_NUM_2_5G:
- return VNET_DPDK_PORT_TYPE_ETH_2_5G;
- case ETH_SPEED_NUM_5G:
- return VNET_DPDK_PORT_TYPE_ETH_5G;
- case ETH_SPEED_NUM_10G:
- return VNET_DPDK_PORT_TYPE_ETH_10G;
- case ETH_SPEED_NUM_20G:
- return VNET_DPDK_PORT_TYPE_ETH_20G;
- case ETH_SPEED_NUM_25G:
- return VNET_DPDK_PORT_TYPE_ETH_25G;
- case ETH_SPEED_NUM_40G:
- return VNET_DPDK_PORT_TYPE_ETH_40G;
- case ETH_SPEED_NUM_50G:
- return VNET_DPDK_PORT_TYPE_ETH_50G;
- case ETH_SPEED_NUM_56G:
- return VNET_DPDK_PORT_TYPE_ETH_56G;
- case ETH_SPEED_NUM_100G:
- return VNET_DPDK_PORT_TYPE_ETH_100G;
- default:
- return VNET_DPDK_PORT_TYPE_UNKNOWN;
+ dpdk_log_err ("[%u] rte_eth_dev_set_mtu failed (mtu %u, rv %d)",
+ xd->port_id, mtu, rv);
+ switch (rv)
+ {
+ case -ENOTSUP:
+ return vnet_error (VNET_ERR_UNSUPPORTED,
+ "dpdk driver doesn't support MTU change");
+ case -EBUSY:
+ return vnet_error (VNET_ERR_BUSY, "port is running");
+ case -EINVAL:
+ return vnet_error (VNET_ERR_INVALID_VALUE, "invalid MTU");
+ default:
+ return vnet_error (VNET_ERR_BUG,
+ "unexpected return value %d returned from "
+ "rte_eth_dev_set_mtu(...)",
+ rv);
+ }
}
+ else
+ dpdk_log_debug ("[%u] max_frame_size set to %u by setting MTU to %u",
+ xd->port_id, frame_size, mtu);
+
+ return 0;
}
static u32
@@ -121,15 +121,11 @@ dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
{
case ETHERNET_INTERFACE_FLAG_DEFAULT_L3:
/* set to L3/non-promisc mode */
- xd->flags &= ~DPDK_DEVICE_FLAG_PROMISC;
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_PROMISC, 0);
break;
case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL:
- xd->flags |= DPDK_DEVICE_FLAG_PROMISC;
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_PROMISC, 1);
break;
- case ETHERNET_INTERFACE_FLAG_MTU:
- xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes;
- dpdk_device_setup (xd);
- return 0;
default:
return ~0;
}
@@ -145,12 +141,6 @@ dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
return old;
}
-static int
-dpdk_port_crc_strip_enabled (dpdk_device_t * xd)
-{
- return !(xd->port_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC);
-}
-
/* The function check_l3cache helps check if Level 3 cache exists or not on current CPUs
return value 1: exist.
return value 0: not exist.
@@ -192,701 +182,360 @@ check_l3cache ()
return 0;
}
-static void
-dpdk_enable_l4_csum_offload (dpdk_device_t * xd)
+static dpdk_device_config_t *
+dpdk_find_startup_config (struct rte_eth_dev_info *di)
{
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
- xd->flags |= DPDK_DEVICE_FLAG_TX_OFFLOAD |
- DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM;
+ dpdk_main_t *dm = &dpdk_main;
+ struct rte_pci_device *pci_dev;
+ vlib_pci_addr_t pci_addr;
+#ifdef __linux__
+ struct rte_vmbus_device *vmbus_dev;
+ vlib_vmbus_addr_t vmbus_addr;
+#endif /* __linux__ */
+ uword *p = 0;
+
+ if ((pci_dev = dpdk_get_pci_device (di)))
+ {
+ pci_addr.domain = pci_dev->addr.domain;
+ pci_addr.bus = pci_dev->addr.bus;
+ pci_addr.slot = pci_dev->addr.devid;
+ pci_addr.function = pci_dev->addr.function;
+ p =
+ hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32);
+ }
+
+#ifdef __linux__
+ if ((vmbus_dev = dpdk_get_vmbus_device (di)))
+ {
+ unformat_input_t input_vmbus;
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ const char *dev_name = rte_dev_name (di->device);
+#else
+ const char *dev_name = di->device->name;
+#endif
+ unformat_init_string (&input_vmbus, dev_name, strlen (dev_name));
+ if (unformat (&input_vmbus, "%U", unformat_vlib_vmbus_addr, &vmbus_addr))
+ p = mhash_get (&dm->conf->device_config_index_by_vmbus_addr,
+ &vmbus_addr);
+ unformat_free (&input_vmbus);
+ }
+#endif /* __linux__ */
+
+ if (p)
+ return pool_elt_at_index (dm->conf->dev_confs, p[0]);
+ return &dm->conf->default_devconf;
}
static clib_error_t *
dpdk_lib_init (dpdk_main_t * dm)
{
- u32 nports;
- u32 mtu, max_rx_frame;
- int i;
- clib_error_t *error;
- vlib_main_t *vm = vlib_get_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ u16 port_id;
vlib_thread_main_t *tm = vlib_get_thread_main ();
vnet_device_main_t *vdm = &vnet_device_main;
vnet_sw_interface_t *sw;
vnet_hw_interface_t *hi;
dpdk_device_t *xd;
- vlib_pci_addr_t last_pci_addr;
- u32 last_pci_addr_port = 0;
- u8 af_packet_instance_num = 0;
- last_pci_addr.as_u32 = ~0;
-
- nports = rte_eth_dev_count_avail ();
-
- if (nports < 1)
- {
- dpdk_log_notice ("DPDK drivers found no Ethernet devices...");
- }
-
- if (CLIB_DEBUG > 0)
- dpdk_log_notice ("DPDK drivers found %d ports...", nports);
-
- if (dm->conf->enable_tcp_udp_checksum)
- dm->buffer_flags_template &= ~(VNET_BUFFER_F_L4_CHECKSUM_CORRECT
- | VNET_BUFFER_F_L4_CHECKSUM_COMPUTED);
+ char *if_num_fmt;
/* vlib_buffer_t template */
vec_validate_aligned (dm->per_thread_data, tm->n_vlib_mains - 1,
CLIB_CACHE_LINE_BYTES);
- for (i = 0; i < tm->n_vlib_mains; i++)
+ for (int i = 0; i < tm->n_vlib_mains; i++)
{
dpdk_per_thread_data_t *ptd = vec_elt_at_index (dm->per_thread_data, i);
clib_memset (&ptd->buffer_template, 0, sizeof (vlib_buffer_t));
- ptd->buffer_template.flags = dm->buffer_flags_template;
vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_TX] = (u32) ~ 0;
}
- /* *INDENT-OFF* */
- RTE_ETH_FOREACH_DEV(i)
+ if_num_fmt =
+ dm->conf->interface_name_format_decimal ? "%d/%d/%d" : "%x/%x/%x";
+
+ /* device config defaults */
+ dm->default_port_conf.n_rx_desc = DPDK_NB_RX_DESC_DEFAULT;
+ dm->default_port_conf.n_tx_desc = DPDK_NB_TX_DESC_DEFAULT;
+ dm->default_port_conf.n_rx_queues = 1;
+ dm->default_port_conf.n_tx_queues = tm->n_vlib_mains;
+ dm->default_port_conf.rss_hf =
+ RTE_ETH_RSS_IP | RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP;
+ dm->default_port_conf.max_lro_pkt_size = DPDK_MAX_LRO_SIZE_DEFAULT;
+
+ if ((clib_mem_get_default_hugepage_size () == 2 << 20) &&
+ check_l3cache () == 0)
+ dm->default_port_conf.n_rx_desc = dm->default_port_conf.n_tx_desc = 512;
+
+ RTE_ETH_FOREACH_DEV (port_id)
{
u8 addr[6];
- int vlan_off;
- struct rte_eth_dev_info dev_info;
- struct rte_pci_device *pci_dev;
- struct rte_vmbus_device *vmbus_dev;
- dpdk_portid_t next_port_id;
+ int rv, q;
+ struct rte_eth_dev_info di;
dpdk_device_config_t *devconf = 0;
- vlib_pci_addr_t pci_addr;
- vlib_vmbus_addr_t vmbus_addr;
- uword *p = 0;
+ vnet_eth_interface_registration_t eir = {};
+ dpdk_driver_t *dr;
+ i8 numa_node;
- if (!rte_eth_dev_is_valid_port(i))
+ if (!rte_eth_dev_is_valid_port (port_id))
continue;
- rte_eth_dev_info_get (i, &dev_info);
-
- if (dev_info.device == 0)
+ if ((rv = rte_eth_dev_info_get (port_id, &di)) != 0)
{
- dpdk_log_notice ("DPDK bug: missing device info. Skipping %s device",
- dev_info.driver_name);
+ dpdk_log_warn ("[%u] failed to get device info. skipping device.",
+ port_id);
continue;
}
- pci_dev = dpdk_get_pci_device (&dev_info);
-
- if (pci_dev)
+ if (di.device == 0)
{
- pci_addr.domain = pci_dev->addr.domain;
- pci_addr.bus = pci_dev->addr.bus;
- pci_addr.slot = pci_dev->addr.devid;
- pci_addr.function = pci_dev->addr.function;
- p = hash_get (dm->conf->device_config_index_by_pci_addr,
- pci_addr.as_u32);
+ dpdk_log_warn ("[%u] missing device info. Skipping '%s' device",
+ port_id, di.driver_name);
+ continue;
}
- vmbus_dev = dpdk_get_vmbus_device (&dev_info);
+ devconf = dpdk_find_startup_config (&di);
- if (vmbus_dev)
+ /* If device is blacklisted, we should skip it */
+ if (devconf->is_blacklisted)
{
- unformat_input_t input_vmbus;
-
- unformat_init_vector (&input_vmbus, (u8 *) dev_info.device->name);
- if (unformat (&input_vmbus, "%U", unformat_vlib_vmbus_addr,
- &vmbus_addr))
- {
- p = mhash_get (&dm->conf->device_config_index_by_vmbus_addr,
- &vmbus_addr);
- }
+ dpdk_log_notice ("[%d] Device %s blacklisted. Skipping...", port_id,
+ di.driver_name);
+ continue;
}
- if (p)
+ vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES);
+ xd->port_id = port_id;
+ xd->device_index = xd - dm->devices;
+ xd->per_interface_next_index = ~0;
+
+ clib_memcpy (&xd->conf, &dm->default_port_conf,
+ sizeof (dpdk_port_conf_t));
+
+ /* find driver datea for this PMD */
+ if ((dr = dpdk_driver_find (di.driver_name, &xd->if_desc)))
{
- devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]);
- /* If device is blacklisted, we should skip it */
- if (devconf->is_blacklisted)
- {
- continue;
- }
+ xd->driver = dr;
+ xd->supported_flow_actions = dr->supported_flow_actions;
+ xd->conf.disable_rss = dr->mq_mode_none;
+ xd->conf.disable_rx_scatter = dr->disable_rx_scatter;
+ xd->conf.enable_rxq_int = dr->enable_rxq_int;
+ if (dr->use_intel_phdr_cksum)
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM, 1);
+ if (dr->int_unmaskable)
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_INT_UNMASKABLE, 1);
}
else
- devconf = &dm->conf->default_devconf;
+ dpdk_log_warn ("[%u] unknown driver '%s'", port_id, di.driver_name);
- /* Create vnet interface */
- vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES);
- xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT;
- xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT;
- xd->cpu_socket = (i8) rte_eth_dev_socket_id (i);
- if (p)
+ if (devconf->name)
{
xd->name = devconf->name;
}
-
- /* Handle representor devices that share the same PCI ID */
- if (dev_info.switch_info.domain_id != RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID)
- {
- if (dev_info.switch_info.port_id != (uint16_t)-1)
- xd->interface_name_suffix = format (0, "%d", dev_info.switch_info.port_id);
- }
- /* Handle interface naming for devices with multiple ports sharing same PCI ID */
- else if (pci_dev &&
- ((next_port_id = rte_eth_find_next (i + 1)) != RTE_MAX_ETHPORTS))
+ else
{
- struct rte_eth_dev_info di = { 0 };
- struct rte_pci_device *next_pci_dev;
- rte_eth_dev_info_get (next_port_id, &di);
- next_pci_dev = di.device ? RTE_DEV_TO_PCI (di.device) : 0;
- if (next_pci_dev &&
- pci_addr.as_u32 != last_pci_addr.as_u32 &&
- memcmp (&pci_dev->addr, &next_pci_dev->addr,
- sizeof (struct rte_pci_addr)) == 0)
+ struct rte_pci_device *pci_dev;
+ if (dr && dr->interface_name_prefix)
{
- xd->interface_name_suffix = format (0, "0");
- last_pci_addr.as_u32 = pci_addr.as_u32;
- last_pci_addr_port = i;
- }
- else if (pci_addr.as_u32 == last_pci_addr.as_u32)
- {
- xd->interface_name_suffix =
- format (0, "%u", i - last_pci_addr_port);
+ /* prefix override by driver */
+ xd->name = format (xd->name, "%s", dr->interface_name_prefix);
}
else
{
- last_pci_addr.as_u32 = ~0;
+ /* interface name prefix from speed_capa */
+ u64 mask = ~((if_name_prefixes[0].link_speed << 1) - 1);
+
+ if (di.speed_capa & mask)
+ dpdk_log_warn ("[%u] unknown speed capability 0x%x reported",
+ xd->port_id, di.speed_capa & mask);
+
+ for (int i = 0; i < ARRAY_LEN (if_name_prefixes); i++)
+ if (if_name_prefixes[i].link_speed & di.speed_capa)
+ {
+ xd->name =
+ format (xd->name, "%s", if_name_prefixes[i].pfx);
+ break;
+ }
+ if (xd->name == 0)
+ xd->name = format (xd->name, "Ethernet");
}
- }
- else
- last_pci_addr.as_u32 = ~0;
- clib_memcpy (&xd->tx_conf, &dev_info.default_txconf,
- sizeof (struct rte_eth_txconf));
+ if (dr && dr->interface_number_from_port_id)
+ xd->name = format (xd->name, "%u", port_id);
+ else if ((pci_dev = dpdk_get_pci_device (&di)))
+ xd->name = format (xd->name, if_num_fmt, pci_dev->addr.bus,
+ pci_dev->addr.devid, pci_dev->addr.function);
+ else
+ xd->name = format (xd->name, "%u", port_id);
- if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_IPV4_CKSUM)
- {
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_IPV4_CKSUM;
- xd->flags |= DPDK_DEVICE_FLAG_RX_IP4_CKSUM;
+ /* Handle representor devices that share the same PCI ID */
+ if ((di.switch_info.domain_id !=
+ RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID) &&
+ (di.switch_info.port_id != (uint16_t) -1))
+ xd->name = format (xd->name, "/%d", di.switch_info.port_id);
}
- if (dm->conf->enable_tcp_udp_checksum)
+ /* number of RX and TX queues */
+ if (devconf->num_tx_queues > 0)
{
- if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_UDP_CKSUM)
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_UDP_CKSUM;
- if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_CKSUM)
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_TCP_CKSUM;
+ if (di.max_tx_queues < devconf->num_tx_queues)
+ dpdk_log_warn ("[%u] Configured number of TX queues (%u) is "
+ "bigger than maximum supported (%u)",
+ port_id, devconf->num_tx_queues, di.max_tx_queues);
+ xd->conf.n_tx_queues = devconf->num_tx_queues;
}
- if (dm->conf->no_multi_seg)
- {
- xd->port_conf.txmode.offloads &= ~DEV_TX_OFFLOAD_MULTI_SEGS;
- xd->port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_JUMBO_FRAME;
- xd->port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_SCATTER;
- }
- else
- {
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS;
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_JUMBO_FRAME;
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_SCATTER;
- xd->flags |= DPDK_DEVICE_FLAG_MAYBE_MULTISEG;
- }
-
- xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains);
-
- if (devconf->num_tx_queues > 0
- && devconf->num_tx_queues < xd->tx_q_used)
- xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues);
+ xd->conf.n_tx_queues = clib_min (di.max_tx_queues, xd->conf.n_tx_queues);
- if (devconf->num_rx_queues > 1
- && dev_info.max_rx_queues >= devconf->num_rx_queues)
+ if (devconf->num_rx_queues > 1 &&
+ di.max_rx_queues >= devconf->num_rx_queues)
{
- xd->rx_q_used = devconf->num_rx_queues;
- xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
- if (devconf->rss_fn == 0)
- xd->port_conf.rx_adv_conf.rss_conf.rss_hf =
- ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP;
- else
+ xd->conf.n_rx_queues = devconf->num_rx_queues;
+ if (devconf->rss_fn)
{
u64 unsupported_bits;
- xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn;
- unsupported_bits = xd->port_conf.rx_adv_conf.rss_conf.rss_hf;
- unsupported_bits &= ~dev_info.flow_type_rss_offloads;
+ xd->conf.rss_hf = devconf->rss_fn;
+ unsupported_bits = xd->conf.rss_hf;
+ unsupported_bits &= ~di.flow_type_rss_offloads;
if (unsupported_bits)
dpdk_log_warn ("Unsupported RSS hash functions: %U",
format_dpdk_rss_hf_name, unsupported_bits);
}
- xd->port_conf.rx_adv_conf.rss_conf.rss_hf &=
- dev_info.flow_type_rss_offloads;
+ xd->conf.rss_hf &= di.flow_type_rss_offloads;
+ dpdk_log_debug ("[%u] rss_hf: %U", port_id, format_dpdk_rss_hf_name,
+ xd->conf.rss_hf);
}
- else
- xd->rx_q_used = 1;
- vec_validate_aligned (xd->rx_queues, xd->rx_q_used - 1,
+#ifndef RTE_VLAN_HLEN
+#define RTE_VLAN_HLEN 4
+#endif
+ xd->driver_frame_overhead =
+ RTE_ETHER_HDR_LEN + 2 * RTE_VLAN_HLEN + RTE_ETHER_CRC_LEN;
+#if RTE_VERSION >= RTE_VERSION_NUM(21, 11, 0, 0)
+ q = di.max_rx_pktlen - di.max_mtu;
+
+ /* attempt to protect from bogus value provided by pmd */
+ if (q < (2 * xd->driver_frame_overhead) && q > 0 &&
+ di.max_mtu != UINT16_MAX)
+ xd->driver_frame_overhead = q;
+ dpdk_log_debug ("[%u] min_mtu: %u, max_mtu: %u, min_rx_bufsize: %u, "
+ "max_rx_pktlen: %u, max_lro_pkt_size: %u",
+ xd->port_id, di.min_mtu, di.max_mtu, di.min_rx_bufsize,
+ di.max_rx_pktlen, di.max_lro_pkt_size);
+#endif
+ dpdk_log_debug ("[%u] driver frame overhead is %u", port_id,
+ xd->driver_frame_overhead);
+
+ /* number of RX and TX tescriptors */
+ if (devconf->num_rx_desc)
+ xd->conf.n_rx_desc = devconf->num_rx_desc;
+ else if (dr && dr->n_rx_desc)
+ xd->conf.n_rx_desc = dr->n_rx_desc;
+
+ if (devconf->num_tx_desc)
+ xd->conf.n_tx_desc = devconf->num_tx_desc;
+ else if (dr && dr->n_tx_desc)
+ xd->conf.n_tx_desc = dr->n_tx_desc;
+
+ dpdk_log_debug (
+ "[%u] n_rx_queues: %u n_tx_queues: %u n_rx_desc: %u n_tx_desc: %u",
+ port_id, xd->conf.n_rx_queues, xd->conf.n_tx_queues,
+ xd->conf.n_rx_desc, xd->conf.n_tx_desc);
+
+ vec_validate_aligned (xd->rx_queues, xd->conf.n_rx_queues - 1,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (xd->tx_queues, xd->conf.n_tx_queues - 1,
CLIB_CACHE_LINE_BYTES);
- xd->flags |= DPDK_DEVICE_FLAG_PMD;
-
- /* workaround for drivers not setting driver_name */
- if ((!dev_info.driver_name) && (pci_dev))
- dev_info.driver_name = pci_dev->driver->driver.name;
-
- ASSERT (dev_info.driver_name);
-
- if (!xd->pmd)
- {
-
-
-#define _(s,f) else if (dev_info.driver_name && \
- !strcmp(dev_info.driver_name, s)) \
- xd->pmd = VNET_DPDK_PMD_##f;
- if (0)
- ;
- foreach_dpdk_pmd
-#undef _
- else
- xd->pmd = VNET_DPDK_PMD_UNKNOWN;
-
- xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN;
- xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT;
- xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT;
-
- switch (xd->pmd)
- {
- /* Drivers with valid speed_capa set */
- case VNET_DPDK_PMD_I40E:
- xd->flags |= DPDK_DEVICE_FLAG_INT_UNMASKABLE;
- case VNET_DPDK_PMD_E1000EM:
- case VNET_DPDK_PMD_IGB:
- case VNET_DPDK_PMD_IGC:
- case VNET_DPDK_PMD_IXGBE:
- case VNET_DPDK_PMD_ICE:
- xd->port_type = port_type_from_speed_capa (&dev_info);
- xd->supported_flow_actions = VNET_FLOW_ACTION_MARK |
- VNET_FLOW_ACTION_REDIRECT_TO_NODE |
- VNET_FLOW_ACTION_REDIRECT_TO_QUEUE |
- VNET_FLOW_ACTION_BUFFER_ADVANCE |
- VNET_FLOW_ACTION_COUNT | VNET_FLOW_ACTION_DROP |
- VNET_FLOW_ACTION_RSS;
-
- if (dm->conf->no_tx_checksum_offload == 0)
- {
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
- xd->flags |=
- DPDK_DEVICE_FLAG_TX_OFFLOAD |
- DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM;
- }
-
- xd->port_conf.intr_conf.rxq = 1;
- break;
- case VNET_DPDK_PMD_CXGBE:
- case VNET_DPDK_PMD_MLX4:
- case VNET_DPDK_PMD_MLX5:
- case VNET_DPDK_PMD_QEDE:
- case VNET_DPDK_PMD_BNXT:
- xd->port_type = port_type_from_speed_capa (&dev_info);
- break;
-
- /* SR-IOV VFs */
- case VNET_DPDK_PMD_I40EVF:
- xd->flags |= DPDK_DEVICE_FLAG_INT_UNMASKABLE;
- case VNET_DPDK_PMD_IGBVF:
- case VNET_DPDK_PMD_IXGBEVF:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
- if (dm->conf->no_tx_checksum_offload == 0)
- {
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
- xd->flags |=
- DPDK_DEVICE_FLAG_TX_OFFLOAD |
- DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM;
- }
- /* DPDK bug in multiqueue... */
- /* xd->port_conf.intr_conf.rxq = 1; */
- break;
-
- /* iAVF */
- case VNET_DPDK_PMD_IAVF:
- xd->flags |= DPDK_DEVICE_FLAG_INT_UNMASKABLE;
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
- xd->supported_flow_actions =
- VNET_FLOW_ACTION_MARK | VNET_FLOW_ACTION_REDIRECT_TO_NODE |
- VNET_FLOW_ACTION_REDIRECT_TO_QUEUE |
- VNET_FLOW_ACTION_BUFFER_ADVANCE | VNET_FLOW_ACTION_COUNT |
- VNET_FLOW_ACTION_DROP | VNET_FLOW_ACTION_RSS;
-
- if (dm->conf->no_tx_checksum_offload == 0)
- {
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
- xd->flags |=
- DPDK_DEVICE_FLAG_TX_OFFLOAD |
- DPDK_DEVICE_FLAG_INTEL_PHDR_CKSUM;
- }
- /* DPDK bug in multiqueue... */
- /* xd->port_conf.intr_conf.rxq = 1; */
- break;
-
- case VNET_DPDK_PMD_THUNDERX:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
-
- if (dm->conf->no_tx_checksum_offload == 0)
- {
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_TCP_CKSUM;
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_UDP_CKSUM;
- xd->flags |= DPDK_DEVICE_FLAG_TX_OFFLOAD;
- }
- break;
-
- case VNET_DPDK_PMD_ENA:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
- xd->port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_SCATTER;
- xd->port_conf.intr_conf.rxq = 1;
- break;
-
- case VNET_DPDK_PMD_DPAA2:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G;
- break;
-
- /* Cisco VIC */
- case VNET_DPDK_PMD_ENIC:
- {
- struct rte_eth_link l;
- rte_eth_link_get_nowait (i, &l);
- xd->port_type = port_type_from_link_speed (l.link_speed);
- if (dm->conf->enable_tcp_udp_checksum)
- dpdk_enable_l4_csum_offload (xd);
- }
- break;
-
- /* Intel Red Rock Canyon */
- case VNET_DPDK_PMD_FM10K:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH;
- break;
-
- /* virtio */
- case VNET_DPDK_PMD_VIRTIO:
- xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_NONE;
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G;
- xd->nb_rx_desc = DPDK_NB_RX_DESC_VIRTIO;
- xd->nb_tx_desc = DPDK_NB_TX_DESC_VIRTIO;
- /*
- * Enable use of RX interrupts if supported.
- *
- * There is no device flag or capability for this, so
- * use the same check that the virtio driver does.
- */
- if (pci_dev && rte_intr_cap_multiple (&pci_dev->intr_handle))
- xd->port_conf.intr_conf.rxq = 1;
- break;
-
- /* vmxnet3 */
- case VNET_DPDK_PMD_VMXNET3:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G;
- xd->port_conf.txmode.offloads |= DEV_TX_OFFLOAD_MULTI_SEGS;
- break;
-
- case VNET_DPDK_PMD_AF_PACKET:
- xd->port_type = VNET_DPDK_PORT_TYPE_AF_PACKET;
- xd->af_packet_instance_num = af_packet_instance_num++;
- break;
-
- case VNET_DPDK_PMD_VIRTIO_USER:
- xd->port_type = VNET_DPDK_PORT_TYPE_VIRTIO_USER;
- break;
-
- case VNET_DPDK_PMD_VHOST_ETHER:
- xd->port_type = VNET_DPDK_PORT_TYPE_VHOST_ETHER;
- break;
-
- case VNET_DPDK_PMD_LIOVF_ETHER:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
- break;
-
- case VNET_DPDK_PMD_FAILSAFE:
- xd->port_type = VNET_DPDK_PORT_TYPE_FAILSAFE;
- xd->port_conf.intr_conf.lsc = 1;
- break;
-
- case VNET_DPDK_PMD_NETVSC:
- {
- struct rte_eth_link l;
- rte_eth_link_get_nowait (i, &l);
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_VF;
- }
- break;
-
- default:
- xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN;
- }
-
- if (devconf->num_rx_desc)
- xd->nb_rx_desc = devconf->num_rx_desc;
- else {
-
- /* If num_rx_desc is not specified by VPP user, the current CPU is working
- with 2M page and has no L3 cache, default num_rx_desc is changed to 512
- from original 1024 to help reduce TLB misses.
- */
- if ((clib_mem_get_default_hugepage_size () == 2 << 20)
- && check_l3cache() == 0)
- xd->nb_rx_desc = 512;
- }
-
- if (devconf->num_tx_desc)
- xd->nb_tx_desc = devconf->num_tx_desc;
- else {
-
- /* If num_tx_desc is not specified by VPP user, the current CPU is working
- with 2M page and has no L3 cache, default num_tx_desc is changed to 512
- from original 1024 to help reduce TLB misses.
- */
- if ((clib_mem_get_default_hugepage_size () == 2 << 20)
- && check_l3cache() == 0)
- xd->nb_tx_desc = 512;
- }
- }
-
- if (xd->pmd == VNET_DPDK_PMD_AF_PACKET)
- {
- f64 now = vlib_time_now (vm);
- u32 rnd;
- rnd = (u32) (now * 1e6);
- rnd = random_u32 (&rnd);
- clib_memcpy (addr + 2, &rnd, sizeof (rnd));
- addr[0] = 2;
- addr[1] = 0xfe;
- }
- else
- rte_eth_macaddr_get (i, (void *) addr);
-
- xd->port_id = i;
- xd->device_index = xd - dm->devices;
- xd->per_interface_next_index = ~0;
-
- /* assign interface to input thread */
- int q;
-
- error = ethernet_register_interface
- (dm->vnet_main, dpdk_device_class.index, xd->device_index,
- /* ethernet address */ addr,
- &xd->hw_if_index, dpdk_flag_change);
- if (error)
- return error;
-
- /*
- * Ensure default mtu is not > the mtu read from the hardware.
- * Otherwise rte_eth_dev_configure() will fail and the port will
- * not be available.
- * Calculate max_frame_size and mtu supported by NIC
- */
- if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen)
- {
- /*
- * This device does not support the platforms's max frame
- * size. Use it's advertised mru instead.
- */
- max_rx_frame = dev_info.max_rx_pktlen;
- mtu = dev_info.max_rx_pktlen - sizeof (ethernet_header_t);
- }
+ rte_eth_macaddr_get (port_id, (void *) addr);
+
+ /* create interface */
+ eir.dev_class_index = dpdk_device_class.index;
+ eir.dev_instance = xd->device_index;
+ eir.address = addr;
+ eir.cb.flag_change = dpdk_flag_change;
+ eir.cb.set_max_frame_size = dpdk_set_max_frame_size;
+ xd->hw_if_index = vnet_eth_register_interface (vnm, &eir);
+ hi = vnet_get_hw_interface (vnm, xd->hw_if_index);
+ numa_node = (i8) rte_eth_dev_socket_id (port_id);
+ if (numa_node == SOCKET_ID_ANY)
+ /* numa_node is not set, default to 0 */
+ hi->numa_node = xd->cpu_socket = 0;
else
- {
- /* VPP treats MTU and max_rx_pktlen both equal to
- * ETHERNET_MAX_PACKET_BYTES, if dev_info.max_rx_pktlen >=
- * ETHERNET_MAX_PACKET_BYTES + sizeof(ethernet_header_t)
- */
- if (dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES +
- sizeof (ethernet_header_t)))
- {
- mtu = ETHERNET_MAX_PACKET_BYTES;
- max_rx_frame = ETHERNET_MAX_PACKET_BYTES;
-
- /*
- * Some platforms do not account for Ethernet FCS (4 bytes) in
- * MTU calculations. To interop with them increase mru but only
- * if the device's settings can support it.
- */
- if (dpdk_port_crc_strip_enabled (xd) &&
- (dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES +
- sizeof (ethernet_header_t) +
- 4)))
- {
- max_rx_frame += 4;
- }
- }
- else
- {
- max_rx_frame = ETHERNET_MAX_PACKET_BYTES;
- mtu = ETHERNET_MAX_PACKET_BYTES - sizeof (ethernet_header_t);
-
- if (dpdk_port_crc_strip_enabled (xd) &&
- (dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)))
- {
- max_rx_frame += 4;
- }
- }
- }
-
- if (xd->pmd == VNET_DPDK_PMD_FAILSAFE)
- {
- /* failsafe device numerables are reported with active device only,
- * need to query the mtu for current device setup to overwrite
- * reported value.
- */
- uint16_t dev_mtu;
- if (!rte_eth_dev_get_mtu (i, &dev_mtu))
- {
- mtu = dev_mtu;
- max_rx_frame = mtu + sizeof (ethernet_header_t);
-
- if (dpdk_port_crc_strip_enabled (xd))
- {
- max_rx_frame += 4;
- }
- }
- }
+ hi->numa_node = xd->cpu_socket = numa_node;
+ sw = vnet_get_hw_sw_interface (vnm, xd->hw_if_index);
+ xd->sw_if_index = sw->sw_if_index;
+ dpdk_log_debug ("[%u] interface %v created", port_id, hi->name);
- /*Set port rxmode config */
- xd->port_conf.rxmode.max_rx_pkt_len = max_rx_frame;
+ if (devconf->tag)
+ vnet_set_sw_interface_tag (vnm, devconf->tag, sw->sw_if_index);
- sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->hw_if_index);
- xd->sw_if_index = sw->sw_if_index;
- vnet_hw_if_set_input_node (dm->vnet_main, xd->hw_if_index,
- dpdk_input_node.index);
+ ethernet_set_flags (vnm, xd->hw_if_index,
+ ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
+ /* assign worker threads */
+ vnet_hw_if_set_input_node (vnm, xd->hw_if_index, dpdk_input_node.index);
if (devconf->workers)
{
- int i;
+ int j;
q = 0;
- clib_bitmap_foreach (i, devconf->workers) {
+ clib_bitmap_foreach (j, devconf->workers)
+ {
dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, q);
rxq->queue_index = vnet_hw_if_register_rx_queue (
- dm->vnet_main, xd->hw_if_index, q++,
- vdm->first_worker_thread_index + i);
- }
+ vnm, xd->hw_if_index, q++, vdm->first_worker_thread_index + j);
+ }
}
else
- for (q = 0; q < xd->rx_q_used; q++)
+ for (q = 0; q < xd->conf.n_rx_queues; q++)
{
dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, q);
rxq->queue_index = vnet_hw_if_register_rx_queue (
- dm->vnet_main, xd->hw_if_index, q, VNET_HW_IF_RXQ_THREAD_ANY);
+ vnm, xd->hw_if_index, q, VNET_HW_IF_RXQ_THREAD_ANY);
}
- vnet_hw_if_update_runtime_data (dm->vnet_main, xd->hw_if_index);
-
- /*Get vnet hardware interface */
- hi = vnet_get_hw_interface (dm->vnet_main, xd->hw_if_index);
+ for (q = 0; q < xd->conf.n_tx_queues; q++)
+ {
+ dpdk_tx_queue_t *txq = vec_elt_at_index (xd->tx_queues, q);
+ txq->queue_index =
+ vnet_hw_if_register_tx_queue (vnm, xd->hw_if_index, q);
+ }
- /*Override default max_packet_bytes and max_supported_bytes set in
- * ethernet_register_interface() above*/
- if (hi)
+ for (q = 0; q < tm->n_vlib_mains; q++)
{
- hi->max_packet_bytes = mtu;
- hi->max_supported_packet_bytes = max_rx_frame;
- hi->numa_node = xd->cpu_socket;
-
- /* Indicate ability to support L3 DMAC filtering and
- * initialize interface to L3 non-promisc mode */
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_MAC_FILTER;
- ethernet_set_flags (dm->vnet_main, xd->hw_if_index,
- ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
+ u32 qi = xd->tx_queues[q % xd->conf.n_tx_queues].queue_index;
+ vnet_hw_if_tx_queue_assign_thread (vnm, qi, q);
}
- if (dm->conf->no_tx_checksum_offload == 0)
- if (xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD && hi != NULL)
- {
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TX_IP4_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM;
- }
- if (devconf->tso == DPDK_DEVICE_TSO_ON && hi != NULL)
+ if (devconf->tso == DPDK_DEVICE_TSO_ON)
{
/*tcp_udp checksum must be enabled*/
- if ((dm->conf->enable_tcp_udp_checksum) &&
- (hi->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TX_CKSUM))
- {
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_UDP_GSO;
- xd->port_conf.txmode.offloads |=
- DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_UDP_TSO;
- }
+ if (xd->conf.enable_tcp_udp_checksum == 0)
+ dpdk_log_warn ("[%u] TCP/UDP checksum offload must be enabled",
+ xd->port_id);
+ else if ((di.tx_offload_capa & RTE_ETH_TX_OFFLOAD_TCP_TSO) == 0)
+ dpdk_log_warn ("[%u] TSO not supported by device", xd->port_id);
else
- clib_warning ("%s: TCP/UDP checksum offload must be enabled",
- hi->name);
+ xd->conf.enable_tso = 1;
}
+ if (devconf->max_lro_pkt_size)
+ xd->conf.max_lro_pkt_size = devconf->max_lro_pkt_size;
+
dpdk_device_setup (xd);
/* rss queues should be configured after dpdk_device_setup() */
- if ((hi != NULL) && (devconf->rss_queues != NULL))
- {
- if (vnet_hw_interface_set_rss_queues
- (vnet_get_main (), hi, devconf->rss_queues))
- {
- clib_warning ("%s: Failed to set rss queues", hi->name);
- }
- }
+ if (devconf->rss_queues)
+ {
+ if (vnet_hw_interface_set_rss_queues (vnet_get_main (), hi,
+ devconf->rss_queues))
+ dpdk_log_warn ("[%u] Failed to set rss queues", port_id);
+ }
if (vec_len (xd->errors))
- dpdk_log_err ("setup failed for device %U. Errors:\n %U",
- format_dpdk_device_name, i,
- format_dpdk_device_errors, xd);
-
- /*
- * A note on Cisco VIC (PMD_ENIC) and VLAN:
- *
- * With Cisco VIC vNIC, every ingress packet is tagged. On a
- * trunk vNIC (C series "standalone" server), packets on no VLAN
- * are tagged with vlan 0. On an access vNIC (standalone or B
- * series "blade" server), packets on the default/native VLAN
- * are tagged with that vNIC's VLAN. VPP expects these packets
- * to be untagged, and previously enabled VLAN strip on VIC by
- * default. But it also broke vlan sub-interfaces.
- *
- * The VIC adapter has "untag default vlan" ingress VLAN rewrite
- * mode, which removes tags from these packets. VPP now includes
- * a local patch for the enic driver to use this untag mode, so
- * enabling vlan stripping is no longer needed. In future, the
- * driver + dpdk will have an API to set the mode after
- * rte_eal_init. Then, this note and local patch will be
- * removed.
- */
-
- /*
- * VLAN stripping: default to VLAN strip disabled, unless specified
- * otherwise in the startup config.
- */
-
- vlan_off = rte_eth_dev_get_vlan_offload (xd->port_id);
- if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON)
- {
- vlan_off |= ETH_VLAN_STRIP_OFFLOAD;
- if (rte_eth_dev_set_vlan_offload (xd->port_id, vlan_off) >= 0)
- dpdk_log_info ("VLAN strip enabled for interface\n");
- else
- dpdk_log_warn ("VLAN strip cannot be supported by interface\n");
- xd->port_conf.rxmode.offloads |= DEV_RX_OFFLOAD_VLAN_STRIP;
- }
- else
- {
- if (vlan_off & ETH_VLAN_STRIP_OFFLOAD)
- {
- vlan_off &= ~ETH_VLAN_STRIP_OFFLOAD;
- if (rte_eth_dev_set_vlan_offload (xd->port_id, vlan_off) >= 0)
- dpdk_log_warn ("set VLAN offload failed\n");
- }
- xd->port_conf.rxmode.offloads &= ~DEV_RX_OFFLOAD_VLAN_STRIP;
- }
-
- if (hi)
- hi->max_packet_bytes = xd->port_conf.rxmode.max_rx_pkt_len
- - sizeof (ethernet_header_t);
- else
- dpdk_log_warn ("hi NULL");
-
- if (dm->conf->no_multi_seg)
- mtu = mtu > ETHER_MAX_LEN ? ETHER_MAX_LEN : mtu;
-
- rte_eth_dev_set_mtu (xd->port_id, mtu);
-}
+ dpdk_log_err ("[%u] setup failed Errors:\n %U", port_id,
+ format_dpdk_device_errors, xd);
+ }
- /* *INDENT-ON* */
+ for (int i = 0; i < vec_len (dm->devices); i++)
+ vnet_hw_if_update_runtime_data (vnm, dm->devices[i].hw_if_index);
return 0;
}
@@ -903,7 +552,6 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
int i;
addrs = vlib_pci_get_all_dev_addrs ();
- /* *INDENT-OFF* */
vec_foreach (addr, addrs)
{
dpdk_device_config_t * devconf = 0;
@@ -922,8 +570,18 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
continue;
}
+#ifdef __FreeBSD__
+ /*
+ * The defines for the PCI_CLASS_* types are platform specific and differ
+ * on FreeBSD.
+ */
+ if (d->device_class != PCI_CLASS_NETWORK &&
+ d->device_class != PCI_CLASS_PROCESSOR_CO)
+ continue;
+#else
if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && d->device_class != PCI_CLASS_PROCESSOR_CO)
continue;
+#endif /* __FreeBSD__ */
if (num_whitelisted)
{
@@ -991,9 +649,13 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_NETWORK_ETHERNET)
;
/* all Intel QAT devices VFs */
- else if (d->vendor_id == 0x8086 && d->device_class == PCI_CLASS_PROCESSOR_CO &&
- (d->device_id == 0x0443 || d->device_id == 0x18a1 || d->device_id == 0x19e3 ||
- d->device_id == 0x37c9 || d->device_id == 0x6f55))
+ else if (d->vendor_id == 0x8086 &&
+ d->device_class == PCI_CLASS_PROCESSOR_CO &&
+ (d->device_id == 0x0443 || d->device_id == 0x18a1 ||
+ d->device_id == 0x19e3 || d->device_id == 0x37c9 ||
+ d->device_id == 0x6f55 || d->device_id == 0x18ef ||
+ d->device_id == 0x4941 || d->device_id == 0x4943 ||
+ d->device_id == 0x4945))
;
/* Cisco VIC */
else if (d->vendor_id == 0x1137 &&
@@ -1021,10 +683,28 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
{
continue;
}
- /* Mellanox CX6, CX6VF, CX6DX, CX6DXVF */
- else if (d->vendor_id == 0x15b3 && d->device_id >= 0x101b && d->device_id <= 0x101e)
+ /* Mellanox CX6, CX6VF, CX6DX, CX6DXVF, CX6LX */
+ else if (d->vendor_id == 0x15b3 &&
+ (d->device_id >= 0x101b && d->device_id <= 0x101f))
{
- continue;
+ continue;
+ }
+ /* Mellanox CX7 */
+ else if (d->vendor_id == 0x15b3 && d->device_id == 0x1021)
+ {
+ continue;
+ }
+ /* Mellanox BF, BFVF */
+ else if (d->vendor_id == 0x15b3 &&
+ (d->device_id >= 0xa2d2 && d->device_id <= 0Xa2d3))
+ {
+ continue;
+ }
+ /* Mellanox BF2, BF3 */
+ else if (d->vendor_id == 0x15b3 &&
+ (d->device_id == 0xa2d6 || d->device_id == 0xa2dc))
+ {
+ continue;
}
/* Broadcom NetXtreme S, and E series only */
else if (d->vendor_id == 0x14e4 &&
@@ -1039,6 +719,9 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
d->device_id == 0x1614 || d->device_id == 0x1606 ||
d->device_id == 0x1609 || d->device_id == 0x1614)))
;
+ /* Google vNIC */
+ else if (d->vendor_id == 0x1ae0 && d->device_id == 0x0042)
+ ;
else
{
dpdk_log_warn ("Unsupported PCI device 0x%04x:0x%04x found "
@@ -1047,7 +730,8 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
continue;
}
- error = vlib_pci_bind_to_uio (vm, addr, (char *) conf->uio_driver_name);
+ error = vlib_pci_bind_to_uio (vm, addr, (char *) conf->uio_driver_name,
+ conf->uio_bind_force);
if (error)
{
@@ -1063,7 +747,6 @@ dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
clib_error_report (error);
}
}
- /* *INDENT-ON* */
vec_free (pci_addr);
vlib_pci_free_device_info (d);
}
@@ -1078,7 +761,6 @@ dpdk_bind_vmbus_devices_to_uio (dpdk_config_main_t * conf)
addrs = vlib_vmbus_get_all_dev_addrs ();
- /* *INDENT-OFF* */
vec_foreach (addr, addrs)
{
dpdk_device_config_t *devconf = 0;
@@ -1143,7 +825,6 @@ dpdk_bind_vmbus_devices_to_uio (dpdk_config_main_t * conf)
clib_error_report (error);
}
}
- /* *INDENT-ON* */
}
uword
@@ -1240,7 +921,9 @@ dpdk_device_config (dpdk_config_main_t *conf, void *addr,
;
else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc))
;
- else if (unformat (input, "name %s", &devconf->name))
+ else if (unformat (input, "name %v", &devconf->name))
+ ;
+ else if (unformat (input, "tag %s", &devconf->tag))
;
else if (unformat (input, "workers %U", unformat_bitmap_list,
&devconf->workers))
@@ -1253,10 +936,6 @@ dpdk_device_config (dpdk_config_main_t *conf, void *addr,
if (error)
break;
}
- else if (unformat (input, "vlan-strip-offload off"))
- devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF;
- else if (unformat (input, "vlan-strip-offload on"))
- devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON;
else if (unformat (input, "tso on"))
{
devconf->tso = DPDK_DEVICE_TSO_ON;
@@ -1270,6 +949,9 @@ dpdk_device_config (dpdk_config_main_t *conf, void *addr,
else if (unformat (input, "rss-queues %U",
unformat_bitmap_list, &devconf->rss_queues))
;
+ else if (unformat (input, "max-lro-pkt-size %u",
+ &devconf->max_lro_pkt_size))
+ ;
else
{
error = clib_error_return (0, "unknown input `%U'",
@@ -1310,14 +992,26 @@ dpdk_log_read_ready (clib_file_t * uf)
n = read (uf->file_descriptor, s + len, n_try);
if (n < 0 && errno != EAGAIN)
return clib_error_return_unix (0, "read");
- _vec_len (s) = len + (n < 0 ? 0 : n);
+ vec_set_len (s, len + (n < 0 ? 0 : n));
}
unformat_init_vector (&input, s);
while (unformat_user (&input, unformat_line, &line))
{
- dpdk_log_notice ("%v", line);
+ int skip = 0;
+ vec_add1 (line, 0);
+
+ /* unfortunatelly DPDK polutes log with this error messages
+ * even when we pass --in-memory which means no secondary process */
+ if (strstr ((char *) line, "WARNING! Base virtual address hint"))
+ skip = 1;
+ else if (strstr ((char *) line, "This may cause issues with mapping "
+ "memory into secondary processes"))
+ skip = 1;
+ vec_pop (line);
+ if (!skip)
+ dpdk_log_notice ("%v", line);
vec_free (line);
}
@@ -1326,8 +1020,29 @@ dpdk_log_read_ready (clib_file_t * uf)
}
static clib_error_t *
+dpdk_set_stat_poll_interval (f64 interval)
+{
+ if (interval < DPDK_MIN_STATS_POLL_INTERVAL)
+ return clib_error_return (0, "wrong stats-poll-interval value");
+
+ dpdk_main.stat_poll_interval = interval;
+ return 0;
+}
+
+static clib_error_t *
+dpdk_set_link_state_poll_interval (f64 interval)
+{
+ if (interval < DPDK_MIN_LINK_POLL_INTERVAL)
+ return clib_error_return (0, "wrong link-state-poll-interval value");
+
+ dpdk_main.link_state_poll_interval = interval;
+ return 0;
+}
+
+static clib_error_t *
dpdk_config (vlib_main_t * vm, unformat_input_t * input)
{
+ dpdk_main_t *dm = &dpdk_main;
clib_error_t *error = 0;
dpdk_config_main_t *conf = &dpdk_config_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
@@ -1344,11 +1059,10 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
u8 no_vmbus = 0;
u8 file_prefix = 0;
u8 *socket_mem = 0;
- u8 *huge_dir_path = 0;
u32 vendor, device, domain, bus, func;
-
- huge_dir_path =
- format (0, "%s/hugepages%c", vlib_unix_get_runtime_dir (), 0);
+ void *fmt_func;
+ void *fmt_addr;
+ f64 poll_interval;
conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword));
mhash_init (&conf->device_config_index_by_vmbus_addr, sizeof (uword),
@@ -1366,19 +1080,36 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
conf->enable_telemetry = 1;
else if (unformat (input, "enable-tcp-udp-checksum"))
- conf->enable_tcp_udp_checksum = 1;
-
+ {
+ dm->default_port_conf.enable_tcp_udp_checksum = 1;
+ if (unformat (input, "enable-outer-checksum-offload"))
+ dm->default_port_conf.enable_outer_checksum_offload = 1;
+ }
else if (unformat (input, "no-tx-checksum-offload"))
- conf->no_tx_checksum_offload = 1;
+ dm->default_port_conf.disable_tx_checksum_offload = 1;
else if (unformat (input, "decimal-interface-names"))
conf->interface_name_format_decimal = 1;
else if (unformat (input, "no-multi-seg"))
- conf->no_multi_seg = 1;
+ dm->default_port_conf.disable_multi_seg = 1;
+ else if (unformat (input, "enable-lro"))
+ dm->default_port_conf.enable_lro = 1;
else if (unformat (input, "max-simd-bitwidth %U",
unformat_max_simd_bitwidth, &conf->max_simd_bitwidth))
;
+ else if (unformat (input, "link-state-poll-interval %f", &poll_interval))
+ {
+ error = dpdk_set_link_state_poll_interval (poll_interval);
+ if (error != 0)
+ return error;
+ }
+ else if (unformat (input, "stats-poll-interval %f", &poll_interval))
+ {
+ error = dpdk_set_stat_poll_interval (poll_interval);
+ if (error != 0)
+ return error;
+ }
else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input,
&sub_input))
{
@@ -1433,13 +1164,10 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
num_whitelisted++;
}
- else if (unformat (input, "num-mem-channels %d", &conf->nchannels))
- conf->nchannels_set_manually = 0;
- else if (unformat (input, "num-crypto-mbufs %d",
- &conf->num_crypto_mbufs))
- ;
else if (unformat (input, "uio-driver %s", &conf->uio_driver_name))
;
+ else if (unformat (input, "uio-bind-force"))
+ conf->uio_bind_force = 1;
else if (unformat (input, "socket-mem %s", &socket_mem))
;
else if (unformat (input, "no-pci"))
@@ -1514,28 +1242,13 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
}
foreach_eal_single_hyphen_arg
#undef _
-#define _(a,b) \
- else if (unformat(input, #a " %s", &s)) \
- { \
- tmp = format (0, "-%s%c", #b, 0); \
- vec_add1 (conf->eal_init_args, tmp); \
- vec_add1 (s, 0); \
- vec_add1 (conf->eal_init_args, s); \
- conf->a##_set_manually = 1; \
- }
- foreach_eal_single_hyphen_mandatory_arg
-#undef _
else if (unformat (input, "default"))
;
else if (unformat_skip_white_space (input))
;
- else
- {
- error = clib_error_return (0, "unknown input `%U'",
+ else return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
- goto done;
- }
}
if (!conf->uio_driver_name)
@@ -1547,7 +1260,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
default_hugepage_sz = clib_mem_get_default_hugepage_size ();
- /* *INDENT-OFF* */
clib_bitmap_foreach (x, tm->cpu_socket_bitmap)
{
clib_error_t *e;
@@ -1560,7 +1272,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
if ((e = clib_sysfs_prealloc_hugepages(x, 0, n_pages)))
clib_error_report (e);
}
- /* *INDENT-ON* */
}
/* on/off dpdk's telemetry thread */
@@ -1577,41 +1288,6 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
vec_add1 (conf->eal_init_args, tmp);
}
- if (error)
- return error;
-
- /* I'll bet that -c and -n must be the first and second args... */
- if (!conf->coremask_set_manually)
- {
- vlib_thread_registration_t *tr;
- uword *coremask = 0;
- int i;
-
- /* main thread core */
- coremask = clib_bitmap_set (coremask, tm->main_lcore, 1);
-
- for (i = 0; i < vec_len (tm->registrations); i++)
- {
- tr = tm->registrations[i];
- coremask = clib_bitmap_or (coremask, tr->coremask);
- }
-
- vec_insert (conf->eal_init_args, 2, 1);
- conf->eal_init_args[1] = (u8 *) "-c";
- tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0);
- conf->eal_init_args[2] = tmp;
- clib_bitmap_free (coremask);
- }
-
- if (!conf->nchannels_set_manually)
- {
- vec_insert (conf->eal_init_args, 2, 3);
- conf->eal_init_args[3] = (u8 *) "-n";
- tmp = format (0, "%d", conf->nchannels);
- vec_terminate_c_string (tmp);
- conf->eal_init_args[4] = tmp;
- }
-
if (no_pci == 0 && geteuid () == 0)
dpdk_bind_devices_to_uio (conf);
@@ -1622,15 +1298,11 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
if (devconf->x == 0 && conf->default_devconf.x > 0) \
devconf->x = conf->default_devconf.x ;
- /* *INDENT-OFF* */
pool_foreach (devconf, conf->dev_confs) {
/* default per-device config items */
foreach_dpdk_device_config_item
- /* copy vlan_strip config from default device */
- _ (vlan_strip_offload)
-
/* copy tso config from default device */
_ (tso)
@@ -1640,56 +1312,57 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
/* copy rss_queues config from default device */
_ (rss_queues)
- /* add DPDK EAL whitelist/blacklist entry */
- if (num_whitelisted > 0 && devconf->is_blacklisted == 0 &&
- devconf->dev_addr_type == VNET_DEV_ADDR_PCI)
- {
- tmp = format (0, "-a%c", 0);
- vec_add1 (conf->eal_init_args, tmp);
- if (devconf->devargs)
+ /* assume that default is PCI */
+ fmt_func = format_vlib_pci_addr;
+ fmt_addr = &devconf->pci_addr;
+
+ if (devconf->dev_addr_type == VNET_DEV_ADDR_VMBUS)
+ {
+ fmt_func = format_vlib_vmbus_addr;
+ fmt_addr = &devconf->vmbus_addr;
+ }
+
+ /* add DPDK EAL whitelist/blacklist entry */
+ if (num_whitelisted > 0 && devconf->is_blacklisted == 0)
+ {
+ tmp = format (0, "-a%c", 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ if (devconf->devargs)
{
- tmp = format (0, "%U,%s%c", format_vlib_pci_addr,
- &devconf->pci_addr, devconf->devargs, 0);
+ tmp =
+ format (0, "%U,%s%c", fmt_func, fmt_addr, devconf->devargs, 0);
}
else
{
- tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0);
+ tmp = format (0, "%U%c", fmt_func, fmt_addr, 0);
}
vec_add1 (conf->eal_init_args, tmp);
- }
- else if (num_whitelisted == 0 && devconf->is_blacklisted != 0 &&
- devconf->dev_addr_type == VNET_DEV_ADDR_PCI)
- {
- tmp = format (0, "-b%c", 0);
- vec_add1 (conf->eal_init_args, tmp);
- tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0);
- vec_add1 (conf->eal_init_args, tmp);
- }
+ }
+ else if (num_whitelisted == 0 && devconf->is_blacklisted != 0)
+ {
+ tmp = format (0, "-b%c", 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ tmp = format (0, "%U%c", fmt_func, fmt_addr, 0);
+ vec_add1 (conf->eal_init_args, tmp);
+ }
}
- /* *INDENT-ON* */
#undef _
- /* set master-lcore */
- tmp = format (0, "--main-lcore%c", 0);
- vec_add1 (conf->eal_init_args, tmp);
- tmp = format (0, "%u%c", tm->main_lcore, 0);
- vec_add1 (conf->eal_init_args, tmp);
-
-
if (socket_mem)
clib_warning ("socket-mem argument is deprecated");
/* NULL terminate the "argv" vector, in case of stupidity */
vec_add1 (conf->eal_init_args, 0);
- _vec_len (conf->eal_init_args) -= 1;
+ vec_dec_len (conf->eal_init_args, 1);
/* Set up DPDK eal and packet mbuf pool early. */
int log_fds[2] = { 0 };
if (pipe (log_fds) == 0)
{
- if (fcntl (log_fds[1], F_SETFL, O_NONBLOCK) == 0)
+ if (fcntl (log_fds[0], F_SETFL, O_NONBLOCK) == 0 &&
+ fcntl (log_fds[1], F_SETFL, O_NONBLOCK) == 0)
{
FILE *f = fdopen (log_fds[1], "a");
if (f && rte_openlog_stream (f) == 0)
@@ -1720,6 +1393,8 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
dpdk_log_notice ("EAL init args: %s", conf->eal_init_args_str);
ret = rte_eal_init (vec_len (conf->eal_init_args),
(char **) conf->eal_init_args);
+ if (ret < 0)
+ return clib_error_return (0, "rte_eal_init returned %d", ret);
/* enable the AVX-512 vPMDs in DPDK */
if (clib_cpu_supports_avx512_bitalg () &&
@@ -1731,20 +1406,11 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
RTE_VECT_SIMD_256 :
RTE_VECT_SIMD_512);
- /* lazy umount hugepages */
- umount2 ((char *) huge_dir_path, MNT_DETACH);
- rmdir ((char *) huge_dir_path);
- vec_free (huge_dir_path);
-
- if (ret < 0)
- return clib_error_return (0, "rte_eal_init returned %d", ret);
-
/* main thread 1st */
if ((error = dpdk_buffer_pools_create (vm)))
return error;
-done:
- return error;
+ return 0;
}
VLIB_CONFIG_FUNCTION (dpdk_config, "dpdk");
@@ -1757,10 +1423,6 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now)
u32 hw_flags = 0;
u8 hw_flags_chg = 0;
- /* only update link state for PMD interfaces */
- if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
- return;
-
xd->time_last_link_update = now ? now : xd->time_last_link_update;
clib_memset (&xd->link, 0, sizeof (xd->link));
rte_eth_link_get_nowait (xd->port_id, &xd->link);
@@ -1788,35 +1450,32 @@ dpdk_update_link_state (dpdk_device_t * xd, f64 now)
ed->new_link_state = (u8) xd->link.link_status;
}
- if ((xd->link.link_duplex != prev_link.link_duplex))
+ hw_flags_chg = ((xd->link.link_duplex != prev_link.link_duplex) ||
+ (xd->link.link_status != prev_link.link_status));
+
+ if (xd->link.link_speed != prev_link.link_speed)
+ vnet_hw_interface_set_link_speed (vnm, xd->hw_if_index,
+ (xd->link.link_speed == UINT32_MAX) ?
+ UINT32_MAX :
+ xd->link.link_speed * 1000);
+
+ if (hw_flags_chg)
{
- hw_flags_chg = 1;
+ if (xd->link.link_status)
+ hw_flags |= VNET_HW_INTERFACE_FLAG_LINK_UP;
+
switch (xd->link.link_duplex)
{
- case ETH_LINK_HALF_DUPLEX:
+ case RTE_ETH_LINK_HALF_DUPLEX:
hw_flags |= VNET_HW_INTERFACE_FLAG_HALF_DUPLEX;
break;
- case ETH_LINK_FULL_DUPLEX:
+ case RTE_ETH_LINK_FULL_DUPLEX:
hw_flags |= VNET_HW_INTERFACE_FLAG_FULL_DUPLEX;
break;
default:
break;
}
- }
- if (xd->link.link_speed != prev_link.link_speed)
- vnet_hw_interface_set_link_speed (vnm, xd->hw_if_index,
- xd->link.link_speed * 1000);
-
- if (xd->link.link_status != prev_link.link_status)
- {
- hw_flags_chg = 1;
-
- if (xd->link.link_status)
- hw_flags |= VNET_HW_INTERFACE_FLAG_LINK_UP;
- }
- if (hw_flags_chg)
- {
if (LINK_STATE_ELOGS)
{
ELOG_TYPE_DECLARE (e) =
@@ -1846,6 +1505,7 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
dpdk_device_t *xd;
vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_worker_thread_barrier_sync (vm);
error = dpdk_lib_init (dm);
if (error)
@@ -1862,6 +1522,7 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
}
}
+ vlib_worker_thread_barrier_release (vm);
tm->worker_thread_release = 1;
f64 now = vlib_time_now (vm);
@@ -1870,16 +1531,17 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
dpdk_update_link_state (xd, now);
}
+ f64 timeout =
+ clib_min (dm->link_state_poll_interval, dm->stat_poll_interval);
+
while (1)
{
- /*
- * check each time through the loop in case intervals are changed
- */
- f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ?
- dm->link_state_poll_interval : dm->stat_poll_interval;
-
+ f64 min_wait = clib_max (timeout, DPDK_MIN_POLL_INTERVAL);
vlib_process_wait_for_event_or_clock (vm, min_wait);
+ timeout =
+ clib_min (dm->link_state_poll_interval, dm->stat_poll_interval);
+
if (dm->admin_up_down_in_progress)
/* skip the poll if an admin up down is in progress (on any interface) */
continue;
@@ -1893,19 +1555,25 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
dpdk_update_link_state (xd, now);
}
- }
+ now = vlib_time_now (vm);
+ vec_foreach (xd, dm->devices)
+ {
+ timeout = clib_min (timeout, xd->time_last_stats_update +
+ dm->stat_poll_interval - now);
+ timeout = clib_min (timeout, xd->time_last_link_update +
+ dm->link_state_poll_interval - now);
+ }
+ }
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dpdk_process_node,static) = {
.function = dpdk_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "dpdk-process",
.process_log2_n_stack_bytes = 17,
};
-/* *INDENT-ON* */
static clib_error_t *
dpdk_init (vlib_main_t * vm)
@@ -1921,40 +1589,31 @@ dpdk_init (vlib_main_t * vm)
"Data in cache line 0 is bigger than cache line size");
STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0,
"Cache line marker must be 1st element in frame_queue_trace_t");
- STATIC_ASSERT (RTE_CACHE_LINE_SIZE == 1 << CLIB_LOG2_CACHE_LINE_BYTES,
- "DPDK RTE CACHE LINE SIZE does not match with 1<<CLIB_LOG2_CACHE_LINE_BYTES");
dpdk_cli_reference ();
- dm->vlib_main = vm;
- dm->vnet_main = vnet_get_main ();
dm->conf = &dpdk_config_main;
- dm->conf->nchannels = 4;
vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet");
- /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */
- dm->buffer_flags_template = (VLIB_BUFFER_TOTAL_LENGTH_VALID |
- VLIB_BUFFER_EXT_HDR_VALID |
- VNET_BUFFER_F_L4_CHECKSUM_COMPUTED |
- VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
-
dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL;
dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL;
dm->log_default = vlib_log_register_class ("dpdk", 0);
dm->log_cryptodev = vlib_log_register_class ("dpdk", "cryptodev");
- dm->log_ipsec = vlib_log_register_class ("dpdk", "ipsec");
return error;
}
VLIB_INIT_FUNCTION (dpdk_init);
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+static clib_error_t *
+dpdk_worker_thread_init (vlib_main_t *vm)
+{
+ if (rte_thread_register () < 0)
+ clib_panic ("dpdk: cannot register thread %u - %s", vm->thread_index,
+ rte_strerror (rte_errno));
+ return 0;
+}
+
+VLIB_WORKER_INIT_FUNCTION (dpdk_worker_thread_init);
diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c
index 25222856912..ca1690b708f 100644
--- a/src/plugins/dpdk/device/node.c
+++ b/src/plugins/dpdk/device/node.c
@@ -23,10 +23,10 @@
#include <dpdk/device/dpdk.h>
#include <vnet/classify/vnet_classify.h>
#include <vnet/mpls/packet.h>
-#include <vnet/handoff.h>
#include <vnet/devices/devices.h>
#include <vnet/interface/rx_queue_funcs.h>
#include <vnet/feature/feature.h>
+#include <vnet/tcp/tcp_packet.h>
#include <dpdk/device/dpdk_priv.h>
@@ -36,9 +36,13 @@ static char *dpdk_error_strings[] = {
#undef _
};
-/* make sure all flags we need are stored in lower 8 bits */
-STATIC_ASSERT ((PKT_RX_IP_CKSUM_BAD | PKT_RX_FDIR) <
- 256, "dpdk flags not un lower byte, fix needed");
+/* make sure all flags we need are stored in lower 32 bits */
+STATIC_ASSERT ((u64) (RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD |
+ RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_LRO) < (1ULL << 32),
+ "dpdk flags not in lower word, fix needed");
+
+STATIC_ASSERT (RTE_MBUF_F_RX_L4_CKSUM_BAD == (1ULL << 3),
+ "bit number of RTE_MBUF_F_RX_L4_CKSUM_BAD is no longer 3!");
static_always_inline uword
dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b,
@@ -97,13 +101,13 @@ dpdk_prefetch_buffer_x4 (struct rte_mbuf *mb[])
{
vlib_buffer_t *b;
b = vlib_buffer_from_rte_mbuf (mb[0]);
- clib_prefetch_load (b);
+ clib_prefetch_store (b);
b = vlib_buffer_from_rte_mbuf (mb[1]);
- clib_prefetch_load (b);
+ clib_prefetch_store (b);
b = vlib_buffer_from_rte_mbuf (mb[2]);
- clib_prefetch_load (b);
+ clib_prefetch_store (b);
b = vlib_buffer_from_rte_mbuf (mb[3]);
- clib_prefetch_load (b);
+ clib_prefetch_store (b);
}
/** \brief Main DPDK input node
@@ -125,18 +129,18 @@ dpdk_prefetch_buffer_x4 (struct rte_mbuf *mb[])
@em Uses:
- <code>struct rte_mbuf mb->ol_flags</code>
- - PKT_RX_IP_CKSUM_BAD
+ - RTE_MBUF_F_RX_IP_CKSUM_BAD
@em Sets:
- <code>b->error</code> if the packet is to be dropped immediately
- <code>b->current_data, b->current_length</code>
- - adjusted as needed to skip the L2 header in direct-dispatch cases
+ - adjusted as needed to skip the L2 header in direct-dispatch cases
- <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
- - rx interface sw_if_index
+ - rx interface sw_if_index
- <code>vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0</code>
- - required by ipX-lookup
+ - required by ipX-lookup
- <code>b->flags</code>
- - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc.
+ - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc.
<em>Next Nodes:</em>
- Static arcs to: error-drop, ethernet-input,
@@ -145,31 +149,30 @@ dpdk_prefetch_buffer_x4 (struct rte_mbuf *mb[])
<code>xd->per_interface_next_index</code>
*/
-static_always_inline u16
-dpdk_ol_flags_extract (struct rte_mbuf **mb, u16 * flags, int count)
+static_always_inline u32
+dpdk_ol_flags_extract (struct rte_mbuf **mb, u32 *flags, int count)
{
- u16 rv = 0;
+ u32 rv = 0;
int i;
for (i = 0; i < count; i++)
{
/* all flags we are interested in are in lower 8 bits but
that might change */
- flags[i] = (u16) mb[i]->ol_flags;
+ flags[i] = (u32) mb[i]->ol_flags;
rv |= flags[i];
}
return rv;
}
static_always_inline uword
-dpdk_process_rx_burst (vlib_main_t * vm, dpdk_per_thread_data_t * ptd,
- uword n_rx_packets, int maybe_multiseg,
- u16 * or_flagsp)
+dpdk_process_rx_burst (vlib_main_t *vm, dpdk_per_thread_data_t *ptd,
+ uword n_rx_packets, int maybe_multiseg, u32 *or_flagsp)
{
u32 n_left = n_rx_packets;
vlib_buffer_t *b[4];
struct rte_mbuf **mb = ptd->mbufs;
uword n_bytes = 0;
- u16 *flags, or_flags = 0;
+ u32 *flags, or_flags = 0;
vlib_buffer_t bt;
mb = ptd->mbufs;
@@ -254,7 +257,7 @@ dpdk_process_flow_offload (dpdk_device_t * xd, dpdk_per_thread_data_t * ptd,
/* TODO prefetch and quad-loop */
for (n = 0; n < n_rx_packets; n++)
{
- if ((ptd->flags[n] & PKT_RX_FDIR_ID) == 0)
+ if ((ptd->flags[n] & RTE_MBUF_F_RX_FDIR_ID) == 0)
continue;
fle = pool_elt_at_index (xd->flow_lookup_entries,
@@ -277,6 +280,65 @@ dpdk_process_flow_offload (dpdk_device_t * xd, dpdk_per_thread_data_t * ptd,
}
}
+static_always_inline u16
+dpdk_lro_find_l4_hdr_sz (vlib_buffer_t *b)
+{
+ u16 l4_hdr_sz = 0;
+ u16 current_offset = 0;
+ ethernet_header_t *e;
+ tcp_header_t *tcp;
+ u8 *data = vlib_buffer_get_current (b);
+ u16 ethertype;
+ e = (void *) data;
+ current_offset += sizeof (e[0]);
+ ethertype = clib_net_to_host_u16 (e->type);
+ if (ethernet_frame_is_tagged (ethertype))
+ {
+ ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (e + 1);
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ current_offset += sizeof (*vlan);
+ if (ethertype == ETHERNET_TYPE_VLAN)
+ {
+ vlan++;
+ current_offset += sizeof (*vlan);
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ }
+ }
+ data += current_offset;
+ if (ethertype == ETHERNET_TYPE_IP4)
+ {
+ data += sizeof (ip4_header_t);
+ tcp = (void *) data;
+ l4_hdr_sz = tcp_header_bytes (tcp);
+ }
+ else
+ {
+ /* FIXME: extension headers...*/
+ data += sizeof (ip6_header_t);
+ tcp = (void *) data;
+ l4_hdr_sz = tcp_header_bytes (tcp);
+ }
+ return l4_hdr_sz;
+}
+
+static_always_inline void
+dpdk_process_lro_offload (dpdk_device_t *xd, dpdk_per_thread_data_t *ptd,
+ uword n_rx_packets)
+{
+ uword n;
+ vlib_buffer_t *b0;
+ for (n = 0; n < n_rx_packets; n++)
+ {
+ b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]);
+ if (ptd->flags[n] & RTE_MBUF_F_RX_LRO)
+ {
+ b0->flags |= VNET_BUFFER_F_GSO;
+ vnet_buffer2 (b0)->gso_size = ptd->mbufs[n]->tso_segsz;
+ vnet_buffer2 (b0)->gso_l4_hdr_sz = dpdk_lro_find_l4_hdr_sz (b0);
+ }
+ }
+}
+
static_always_inline u32
dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
vlib_node_runtime_t * node, u32 thread_index, u16 queue_id)
@@ -289,7 +351,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
struct rte_mbuf **mb;
vlib_buffer_t *b0;
u16 *next;
- u16 or_flags;
+ u32 or_flags;
u32 n;
int single_next = 0;
@@ -303,12 +365,13 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
/* get up to DPDK_RX_BURST_SZ buffers from PMD */
while (n_rx_packets < DPDK_RX_BURST_SZ)
{
- n = rte_eth_rx_burst (xd->port_id, queue_id,
- ptd->mbufs + n_rx_packets,
- DPDK_RX_BURST_SZ - n_rx_packets);
+ u32 n_to_rx = clib_min (DPDK_RX_BURST_SZ - n_rx_packets, 32);
+
+ n = rte_eth_rx_burst (xd->port_id, queue_id, ptd->mbufs + n_rx_packets,
+ n_to_rx);
n_rx_packets += n;
- if (n < 32)
+ if (n < n_to_rx)
break;
}
@@ -318,6 +381,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
/* Update buffer template */
vnet_buffer (bt)->sw_if_index[VLIB_RX] = xd->sw_if_index;
bt->error = node->errors[DPDK_ERROR_NONE];
+ bt->flags = xd->buffer_flags;
/* as DPDK is allocating empty buffers from mempool provided before interface
start for each queue, it is safe to store this in the template */
bt->buffer_pool_index = rxq->buffer_pool_index;
@@ -332,14 +396,34 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
/* as all packets belong to the same interface feature arc lookup
can be don once and result stored in the buffer template */
if (PREDICT_FALSE (vnet_device_input_have_features (xd->sw_if_index)))
- vnet_feature_start_device_input_x1 (xd->sw_if_index, &next_index, bt);
+ vnet_feature_start_device_input (xd->sw_if_index, &next_index, bt);
if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG)
n_rx_bytes = dpdk_process_rx_burst (vm, ptd, n_rx_packets, 1, &or_flags);
else
n_rx_bytes = dpdk_process_rx_burst (vm, ptd, n_rx_packets, 0, &or_flags);
- if (PREDICT_FALSE (or_flags & PKT_RX_FDIR))
+ if (PREDICT_FALSE ((or_flags & RTE_MBUF_F_RX_LRO)))
+ dpdk_process_lro_offload (xd, ptd, n_rx_packets);
+
+ if (PREDICT_FALSE ((or_flags & RTE_MBUF_F_RX_L4_CKSUM_BAD) &&
+ (xd->buffer_flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT)))
+ {
+ for (n = 0; n < n_rx_packets; n++)
+ {
+ /* Check and reset VNET_BUFFER_F_L4_CHECKSUM_CORRECT flag
+ if RTE_MBUF_F_RX_L4_CKSUM_BAD is set.
+ The magic num 3 is the bit number of RTE_MBUF_F_RX_L4_CKSUM_BAD
+ which is defined in DPDK.
+ Have made a STATIC_ASSERT in this file to ensure this.
+ */
+ b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]);
+ b0->flags ^= (ptd->flags[n] & RTE_MBUF_F_RX_L4_CKSUM_BAD)
+ << (VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT - 3);
+ }
+ }
+
+ if (PREDICT_FALSE (or_flags & RTE_MBUF_F_RX_FDIR))
{
/* some packets will need to go to different next nodes */
for (n = 0; n < n_rx_packets; n++)
@@ -348,7 +432,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
/* flow offload - process if rx flow offload enabled and at least one
packet is marked */
if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) &&
- (or_flags & PKT_RX_FDIR)))
+ (or_flags & RTE_MBUF_F_RX_FDIR)))
dpdk_process_flow_offload (xd, ptd, n_rx_packets);
/* enqueue buffers to the next node */
@@ -385,7 +469,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
marked as ip4 checksum bad we can notify ethernet input so it
can send pacets to ip4-input-no-checksum node */
if (xd->flags & DPDK_DEVICE_FLAG_RX_IP4_CKSUM &&
- (or_flags & PKT_RX_IP_CKSUM_BAD) == 0)
+ (or_flags & RTE_MBUF_F_RX_IP_CKSUM_BAD) == 0)
f->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK;
vlib_frame_no_append (f);
}
@@ -459,7 +543,7 @@ VLIB_NODE_FN (dpdk_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
dpdk_device_t *xd;
uword n_rx_packets = 0;
vnet_hw_if_rxq_poll_vector_t *pv;
- u32 thread_index = node->thread_index;
+ u32 thread_index = vm->thread_index;
/*
* Poll all devices on this cpu for input/interrupts.
@@ -476,7 +560,6 @@ VLIB_NODE_FN (dpdk_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return n_rx_packets;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dpdk_input_node) = {
.type = VLIB_NODE_TYPE_INPUT,
.name = "dpdk-input",
@@ -492,7 +575,6 @@ VLIB_REGISTER_NODE (dpdk_input_node) = {
.n_errors = DPDK_N_ERROR,
.error_strings = dpdk_error_strings,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/dpdk/main.c b/src/plugins/dpdk/main.c
index 413a0f0df9d..9781d0ed7f0 100644
--- a/src/plugins/dpdk/main.c
+++ b/src/plugins/dpdk/main.c
@@ -13,13 +13,6 @@
* limitations under the License.
*/
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <linux/vfio.h>
-#include <sys/ioctl.h>
-
#include <vnet/vnet.h>
#include <vnet/plugin/plugin.h>
#include <dpdk/device/dpdk.h>
@@ -79,19 +72,14 @@ rte_delay_us_override_cb (unsigned us)
static clib_error_t * dpdk_main_init (vlib_main_t * vm)
{
- dpdk_main_t * dm = &dpdk_main;
clib_error_t * error = 0;
- dm->vlib_main = vm;
- dm->vnet_main = vnet_get_main ();
-
/* register custom delay function */
rte_delay_us_callback_register (rte_delay_us_override_cb);
return error;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (dpdk_main_init) =
{
.runs_after = VLIB_INITS("dpdk_init"),
@@ -101,4 +89,3 @@ VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Data Plane Development Kit (DPDK)",
};
-/* *INDENT-ON* */
diff --git a/src/plugins/dpdk/thread.c b/src/plugins/dpdk/thread.c
deleted file mode 100644
index 3a3fcc6cea6..00000000000
--- a/src/plugins/dpdk/thread.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2017 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <rte_config.h>
-
-#include <rte_common.h>
-#include <rte_log.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_tailq.h>
-#include <rte_eal.h>
-#include <rte_per_lcore.h>
-#include <rte_launch.h>
-#include <rte_atomic.h>
-#include <rte_cycles.h>
-#include <rte_prefetch.h>
-#include <rte_lcore.h>
-#include <rte_per_lcore.h>
-#include <rte_branch_prediction.h>
-#include <rte_interrupts.h>
-#include <rte_pci.h>
-#include <rte_random.h>
-#include <rte_debug.h>
-#include <rte_ether.h>
-#include <rte_ethdev.h>
-#include <rte_ring.h>
-#include <rte_mempool.h>
-#include <rte_mbuf.h>
-#include <rte_version.h>
-
-#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <dpdk/device/dpdk.h>
-#include <dpdk/device/dpdk_priv.h>
-
-static clib_error_t *
-dpdk_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id)
-{
- int r;
- r = rte_eal_remote_launch (fp, (void *) w, lcore_id);
- if (r)
- return clib_error_return (0, "Failed to launch thread %u", lcore_id);
- return 0;
-}
-
-static clib_error_t *
-dpdk_thread_set_lcore (u32 thread, u16 lcore)
-{
- return 0;
-}
-
-static vlib_thread_callbacks_t callbacks = {
- .vlib_launch_thread_cb = &dpdk_launch_thread,
- .vlib_thread_set_lcore_cb = &dpdk_thread_set_lcore,
-};
-
-static clib_error_t *
-dpdk_thread_init (vlib_main_t * vm)
-{
- vlib_thread_cb_register (vm, &callbacks);
- return 0;
-}
-
-VLIB_INIT_FUNCTION (dpdk_thread_init);
-
-/** @endcond */
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/fateshare/CMakeLists.txt b/src/plugins/fateshare/CMakeLists.txt
new file mode 100644
index 00000000000..4916d1ffbaf
--- /dev/null
+++ b/src/plugins/fateshare/CMakeLists.txt
@@ -0,0 +1,25 @@
+
+# Copyright (c) 2022 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(fateshare
+ SOURCES
+ fateshare.c
+ fateshare.h
+)
+
+add_vpp_executable(vpp_fateshare_monitor
+ SOURCES vpp_fateshare_monitor.c
+ LINK_LIBRARIES vppinfra
+)
+
diff --git a/src/plugins/fateshare/fateshare.c b/src/plugins/fateshare/fateshare.c
new file mode 100644
index 00000000000..971d32303db
--- /dev/null
+++ b/src/plugins/fateshare/fateshare.c
@@ -0,0 +1,309 @@
+/*
+ * fateshare.c - skeleton vpp engine plug-in
+ *
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <vppinfra/unix.h>
+#include <fateshare/fateshare.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vpp/app/version.h>
+#include <stdbool.h>
+
+#include <sys/types.h>
+#include <sys/wait.h>
+#ifdef __linux__
+#include <sys/prctl.h> // prctl(), PR_SET_PDEATHSIG
+#else
+#include <sys/procctl.h>
+#endif /* __linux__ */
+#include <limits.h>
+
+fateshare_main_t fateshare_main;
+
+/* Action function shared between message handler and debug CLI */
+
+static void
+child_handler (int sig)
+{
+ pid_t pid;
+ int status;
+ fateshare_main_t *kmp = &fateshare_main;
+
+ while ((pid = waitpid (-1, &status, WNOHANG)) > 0)
+ {
+ if (pid == kmp->monitor_pid)
+ {
+ clib_warning ("Monitor child %d exited with status %d!", pid,
+ status);
+ kmp->vlib_main->main_loop_exit_now = 1;
+ }
+ else
+ {
+ clib_warning ("child %d exited with status %d!", pid, status);
+ }
+ }
+}
+
+clib_error_t *
+launch_monitor (fateshare_main_t *kmp)
+{
+ clib_error_t *error = 0;
+ pid_t ppid_before_fork = getpid ();
+ pid_t cpid = fork ();
+ if (cpid == -1)
+ {
+ perror (0);
+ error = clib_error_return (0, "can not fork");
+ goto done;
+ }
+ clib_warning ("fateshare about to launch monitor %v.", kmp->monitor_cmd);
+ int logfd =
+ open ((char *) kmp->monitor_logfile, O_APPEND | O_RDWR | O_CREAT, 0777);
+ if (logfd < 0)
+ {
+ error = clib_error_return (0, "can not open log file");
+ goto done;
+ }
+ if (cpid)
+ {
+ /* parent */
+ kmp->monitor_pid = cpid;
+ close (logfd);
+ return 0;
+ }
+ else
+ {
+ dup2 (logfd, 1);
+ dup2 (logfd, 2);
+#ifdef __linux__
+ int r = prctl (PR_SET_PDEATHSIG, SIGTERM);
+ if (r == -1)
+ {
+ perror (0);
+ exit (1);
+ }
+#else
+ int r, s = SIGTERM;
+
+ r = procctl (P_PID, 0, PROC_PDEATHSIG_CTL, &s);
+ if (r == -1)
+ {
+ perror (0);
+ exit (1);
+ }
+#endif /* __linux__ */
+ pid_t current_ppid = getppid ();
+ if (current_ppid != ppid_before_fork)
+ {
+ fprintf (stderr, "parent pid changed while starting (%d => %d)\n",
+ ppid_before_fork, current_ppid);
+ if (current_ppid == 1)
+ {
+ fprintf (stderr, "exiting.\n");
+ exit (1);
+ }
+ }
+
+ int r1 = setpgid (getpid (), 0);
+ if (r1 != 0)
+ {
+ perror ("setpgid error");
+ exit (1);
+ }
+
+ u8 *scmd = format (0, "%v\0", kmp->monitor_cmd);
+ u8 *logfile_base = format (0, "%v\0", kmp->monitor_logfile);
+ int fd = logfd - 1;
+ while (fd > 2)
+ {
+ close (fd);
+ fd--;
+ }
+
+ fd = open ("/dev/null", O_RDONLY);
+ if (fd < 0)
+ {
+ exit (1);
+ }
+ dup2 (fd, 0);
+
+ char *ppid_str = (char *) format (0, "%lld\0", current_ppid);
+
+ char **argv = 0;
+ vec_validate (argv, vec_len (kmp->commands) + 3 - 1);
+ argv[0] = (void *) scmd;
+ argv[1] = ppid_str;
+ argv[2] = (char *) logfile_base;
+ int i;
+ vec_foreach_index (i, kmp->commands)
+ {
+ argv[3 + i] = (char *) kmp->commands[i];
+ }
+
+ int res = execv (argv[0], argv);
+ clib_warning ("ERROR during execve: %d", res);
+ perror ("execve");
+
+ exit (0);
+ }
+done:
+
+ return error;
+}
+
+static clib_error_t *
+fateshare_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ fateshare_main_t *fmp = &fateshare_main;
+ u8 *command = 0;
+ u8 **new_command = 0;
+ clib_error_t *error = 0;
+
+ /* unix config may make vpp fork, we want to run after that. */
+ if ((error = vlib_call_config_function (vm, unix_config)))
+ return error;
+
+ /* Defaults */
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "monitor %s", &fmp->monitor_cmd))
+ {
+ clib_warning ("setting monitor to %v", fmp->monitor_cmd);
+ }
+ else if (unformat (input, "logfile %s", &fmp->monitor_logfile))
+ {
+ clib_warning ("setting logfile to %v", fmp->monitor_logfile);
+ }
+ else if (unformat (input, "command %s", &command))
+ {
+ vec_add2 (fmp->commands, new_command, 1);
+ *new_command = command;
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ vec_add2 (fmp->commands, new_command, 1);
+ *new_command = 0;
+
+ /* Establish handler. */
+ struct sigaction sa;
+ sigemptyset (&sa.sa_mask);
+ sa.sa_flags = 0;
+ sa.sa_handler = child_handler;
+
+ sigaction (SIGCHLD, &sa, NULL);
+
+ if (fmp->monitor_cmd == 0)
+ {
+ char *p;
+ u8 *path;
+
+ /* find executable path */
+ path = os_get_exec_path ();
+
+ if (path == 0)
+ return clib_error_return (
+ 0, "could not get exec path - set monitor manually");
+
+ /* add null termination */
+ vec_add1 (path, 0);
+
+ /* strip filename */
+ if ((p = strrchr ((char *) path, '/')) == 0)
+ {
+ vec_free (path);
+ return clib_error_return (
+ 0, "could not determine vpp directory - set monitor manually");
+ }
+ *p = 0;
+
+ fmp->monitor_cmd = format (0, "%s/vpp_fateshare_monitor\0", path);
+ vec_free (path);
+ }
+ if (fmp->monitor_logfile == 0)
+ {
+ fmp->monitor_logfile =
+ format (0, "/tmp/vpp-fateshare-monitor-log.txt\0");
+ }
+ error = launch_monitor (fmp);
+
+ return error;
+}
+
+clib_error_t *
+fateshare_init (vlib_main_t *vm)
+{
+ fateshare_main_t *kmp = &fateshare_main;
+ clib_error_t *error = 0;
+
+ kmp->vlib_main = vm;
+
+ return error;
+}
+
+static clib_error_t *
+fateshare_send_hup_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ clib_error_t *error = 0;
+ fateshare_main_t *kmp = &fateshare_main;
+
+ if (kmp->monitor_pid)
+ {
+ int rc = kill (kmp->monitor_pid, SIGHUP);
+ if (rc)
+ {
+ error = clib_error_return (
+ 0, "can not send signal to monitor process: %s", strerror (errno));
+ }
+ }
+ else
+ {
+ error = clib_error_return (0, "can not find monitor process");
+ }
+
+ return error;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (fateshare_config, "fateshare");
+
+VLIB_INIT_FUNCTION (fateshare_init);
+
+VLIB_CLI_COMMAND (fateshare_restart_process_command, static) = {
+ .path = "fateshare restart-processes",
+ .short_help = "restart dependent processes",
+ .function = fateshare_send_hup_fn,
+};
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Run child processes which will share fate with VPP, restart "
+ "them if they quit",
+ .default_disabled = 1,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/lawful-intercept/lawful_intercept.h b/src/plugins/fateshare/fateshare.h
index e39fa0d0752..4ad7ac1df16 100644
--- a/src/vnet/lawful-intercept/lawful_intercept.h
+++ b/src/plugins/fateshare/fateshare.h
@@ -1,5 +1,8 @@
+
/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
+ * fateshare.h - skeleton vpp engine plug-in header file
+ *
+ * Copyright (c) 2022 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -12,40 +15,29 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-#ifndef __lawful_intercept_h__
-#define __lawful_intercept_h__
+#ifndef __included_fateshare_h__
+#define __included_fateshare_h__
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+
typedef struct
{
- /* LI collector info */
- ip4_address_t *src_addrs;
- ip4_address_t *collectors;
- u16 *ports;
-
- /* Hit node index */
- u32 hit_node_index;
-
/* convenience */
vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
-} li_main_t;
-
-extern li_main_t li_main;
-/* *INDENT-OFF* */
-typedef CLIB_PACKED(struct {
- ip4_header_t ip4;
- udp_header_t udp;
-}) ip4_udp_header_t;
-/* *INDENT-ON* */
+ u8 *monitor_cmd;
+ u8 *monitor_logfile;
+ pid_t monitor_pid;
+ u8 **commands;
+} fateshare_main_t;
-extern vlib_node_registration_t li_hit_node;
+extern fateshare_main_t fateshare_main;
-#endif /* __lawful_intercept_h__ */
+#endif /* __included_fateshare_h__ */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/fateshare/vpp_fateshare_monitor.c b/src/plugins/fateshare/vpp_fateshare_monitor.c
new file mode 100644
index 00000000000..7af451ccffe
--- /dev/null
+++ b/src/plugins/fateshare/vpp_fateshare_monitor.c
@@ -0,0 +1,289 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <sys/types.h>
+#include <sys/wait.h>
+#ifdef __linux__
+#include <sys/prctl.h> // prctl(), PR_SET_PDEATHSIG
+#else
+#include <signal.h>
+#include <sys/procctl.h>
+#endif /* __linux__ */
+
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <limits.h>
+
+typedef struct
+{
+ pid_t pid;
+ char *cmd;
+} child_record_t;
+
+int n_children = 0;
+child_record_t *children = NULL;
+
+static void
+child_handler (int sig)
+{
+ pid_t pid;
+ int status;
+
+ while ((pid = waitpid (-1, &status, WNOHANG)) > 0)
+ {
+ int i;
+ printf ("fateshare: pid %d quit with status %d\n", pid, status);
+ for (i = 0; i < n_children; i++)
+ {
+ if (children[i].pid == pid)
+ {
+ children[i].pid = 0;
+ }
+ }
+ }
+}
+
+static void
+term_handler (int sig)
+{
+ int i;
+
+ printf ("fateshare: terminating!\n");
+ for (i = 0; i < n_children; i++)
+ {
+ kill (-children[i].pid, SIGTERM);
+ }
+ exit (0);
+}
+
+static void
+hup_handler (int sig)
+{
+ int i;
+
+ printf ("fateshare: terminating all the child processes!\n");
+ for (i = 0; i < n_children; i++)
+ {
+ kill (-children[i].pid, SIGTERM);
+ }
+}
+
+pid_t
+launch_command (char *scmd, char *logname_base)
+{
+ pid_t ppid_before_fork = getpid ();
+ pid_t cpid = fork ();
+ if (cpid == -1)
+ {
+ perror ("fork");
+ sleep (1);
+ return 0;
+ }
+ if (cpid)
+ {
+ /* parent */
+ return cpid;
+ }
+
+ /* child */
+#ifdef __linux__
+ int r = prctl (PR_SET_PDEATHSIG, SIGTERM);
+ if (r == -1)
+ {
+ perror ("prctl");
+ sleep (5);
+ exit (1);
+ }
+#else
+ int r, s = SIGTERM;
+
+ r = procctl (P_PID, 0, PROC_PDEATHSIG_CTL, &s);
+ if (r == -1)
+ {
+ perror ("procctl");
+ exit (1);
+ }
+#endif /* __linux__ */
+
+ if (getppid () != ppid_before_fork)
+ {
+ sleep (5);
+ exit (1);
+ }
+
+ int r1 = setpgid (getpid (), 0);
+ if (r1 != 0)
+ {
+ perror ("setpgid error");
+ sleep (5);
+ exit (1);
+ }
+
+ int fd = open ("/dev/null", O_RDONLY);
+ if (fd < 0)
+ {
+ sleep (5);
+ exit (1);
+ }
+ while (fd >= 0)
+ {
+ close (fd);
+ fd--;
+ }
+ fd = open ("/dev/null", O_RDONLY);
+ if (fd < 0)
+ {
+ sleep (5);
+ exit (1);
+ }
+ dup2 (fd, 0);
+
+ char logname_stdout[PATH_MAX];
+ char logname_stderr[PATH_MAX];
+
+ snprintf (logname_stdout, PATH_MAX - 1, "%s-stdout.txt", logname_base);
+ snprintf (logname_stderr, PATH_MAX - 1, "%s-stderr.txt", logname_base);
+
+ printf ("LOG STDOUT %s: %s\n", scmd, logname_stdout);
+ printf ("LOG STDERR %s: %s\n", scmd, logname_stderr);
+
+ fd = open ((char *) logname_stdout, O_APPEND | O_RDWR | O_CREAT, 0777);
+ if (fd < 0)
+ {
+ sleep (5);
+ exit (1);
+ }
+ dup2 (fd, 1);
+ fd = open ((char *) logname_stderr, O_APPEND | O_RDWR | O_CREAT, 0777);
+ if (fd < 0)
+ {
+ sleep (5);
+ exit (1);
+ }
+ dup2 (fd, 2);
+
+ char *argv[] = { (char *) scmd, 0 };
+ int res = execv (argv[0], argv);
+ if (res != 0)
+ {
+ perror ("execve");
+ }
+ sleep (10);
+
+ exit (42);
+}
+
+int
+main (int argc, char **argv)
+{
+ pid_t ppid = getppid ();
+ int i = 0;
+ if (argc < 3)
+ {
+ printf ("usage: %s <parent_pid> <logfile-basename>\n", argv[0]);
+ exit (1);
+ }
+ char *errptr = 0;
+ pid_t parent_pid = strtoll (argv[1], &errptr, 10);
+ char *logname_base = argv[2];
+
+ printf ("DEBUG: pid %d starting for parent pid %d\n", getpid (), ppid);
+ printf ("DEBUG: parent pid: %d\n", parent_pid);
+ printf ("DEBUG: base log name: %s\n", logname_base);
+ if (*errptr)
+ {
+ printf ("%s is not a valid parent pid\n", errptr);
+ exit (2);
+ }
+
+#ifdef __linux__
+ int r = prctl (PR_SET_PDEATHSIG, SIGTERM);
+ if (r == -1)
+ {
+ perror (0);
+ exit (1);
+ }
+#else
+ int r, s = SIGTERM;
+
+ r = procctl (P_PID, 0, PROC_PDEATHSIG_CTL, &s);
+ if (r == -1)
+ {
+ perror ("procctl");
+ exit (1);
+ }
+#endif /* __linux__ */
+
+ /* Establish handler. */
+ struct sigaction sa;
+ sigemptyset (&sa.sa_mask);
+ sa.sa_flags = 0;
+ sa.sa_handler = child_handler;
+
+ sigaction (SIGCHLD, &sa, NULL);
+
+ sigemptyset (&sa.sa_mask);
+ sa.sa_flags = 0;
+ sa.sa_handler = term_handler;
+
+ sigaction (SIGTERM, &sa, NULL);
+
+ sigemptyset (&sa.sa_mask);
+ sa.sa_flags = 0;
+ sa.sa_handler = hup_handler;
+
+ sigaction (SIGHUP, &sa, NULL);
+
+ if (getppid () != parent_pid)
+ {
+ printf ("parent process unexpectedly finished\n");
+ exit (3);
+ }
+
+ argc -= 3; /* skip over argv0, ppid, and log base */
+ argv += 3;
+
+ n_children = argc;
+ printf ("DEBUG: total %d children\n", n_children);
+ children = calloc (n_children, sizeof (children[0]));
+ for (i = 0; i < n_children; i++)
+ {
+ /* argv persists, so we can just use that pointer */
+ children[i].cmd = argv[i];
+ children[i].pid = launch_command (children[i].cmd, logname_base);
+ printf ("DEBUG: child %d (%s): initial launch pid %d\n", i,
+ children[i].cmd, children[i].pid);
+ }
+
+ while (1)
+ {
+ sleep (1);
+ pid_t curr_ppid = getppid ();
+ printf ("pid: %d, current ppid %d, original ppid %d\n", getpid (),
+ curr_ppid, ppid);
+ if (curr_ppid != ppid)
+ {
+ printf ("current ppid %d != original ppid %d - force quit\n",
+ curr_ppid, ppid);
+ fflush (stdout);
+ exit (1);
+ }
+ int restarted = 0;
+ for (i = 0; i < n_children; i++)
+ {
+ if (children[i].pid == 0)
+ {
+ printf ("child %s exited, restarting\n", children[i].cmd);
+ restarted = 1;
+ children[i].pid = launch_command (children[i].cmd, logname_base);
+ }
+ }
+ if (restarted)
+ {
+ sleep (1);
+ }
+
+ fflush (stdout);
+ }
+}
diff --git a/src/plugins/flowprobe/FEATURE.yaml b/src/plugins/flowprobe/FEATURE.yaml
index 66382433d03..9c80b12dc9f 100644
--- a/src/plugins/flowprobe/FEATURE.yaml
+++ b/src/plugins/flowprobe/FEATURE.yaml
@@ -2,12 +2,11 @@
name: IPFIX probe
maintainer: Ole Troan <ot@cisco.com>
features:
- - L2 input feature
- - IPv4 / IPv6 input feature
- - Recording of L2, L3 and L4 information
-description: "IPFIX flow probe. Works in the L2, or IP input feature path."
+ - L2 input and output feature path
+ - IPv4 / IPv6 input and output feature path
+ - Recording of L2, L3, and L4 information
+description: "IPFIX flow probe. Works in the L2 or IP feature path both input and output."
missing:
- - Output path
- Export over IPv6
- Export over TCP/SCTP
state: production
diff --git a/src/plugins/flowprobe/flowprobe.api b/src/plugins/flowprobe/flowprobe.api
index 55dd51d3c30..c2090637cc8 100644
--- a/src/plugins/flowprobe/flowprobe.api
+++ b/src/plugins/flowprobe/flowprobe.api
@@ -5,7 +5,7 @@
used to control the flowprobe plugin
*/
-option version = "1.0.0";
+option version = "2.1.0";
import "vnet/interface_types.api";
@@ -16,6 +16,13 @@ enum flowprobe_which_flags : u8
FLOWPROBE_WHICH_FLAG_IP6 = 0x4,
};
+enum flowprobe_which : u8
+{
+ FLOWPROBE_WHICH_IP4 = 0,
+ FLOWPROBE_WHICH_IP6,
+ FLOWPROBE_WHICH_L2,
+};
+
enum flowprobe_record_flags : u8
{
FLOWPROBE_RECORD_FLAG_L2 = 0x1,
@@ -23,6 +30,13 @@ enum flowprobe_record_flags : u8
FLOWPROBE_RECORD_FLAG_L4 = 0x4,
};
+enum flowprobe_direction : u8
+{
+ FLOWPROBE_DIRECTION_RX = 0,
+ FLOWPROBE_DIRECTION_TX,
+ FLOWPROBE_DIRECTION_BOTH,
+};
+
/** \brief Enable / disable per-packet IPFIX recording on an interface
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -32,6 +46,8 @@ enum flowprobe_record_flags : u8
*/
autoreply define flowprobe_tx_interface_add_del
{
+ option replaced_by="flowprobe_interface_add_del";
+
/* Client identifier, set from api_main.my_client_index */
u32 client_index;
@@ -47,8 +63,59 @@ autoreply define flowprobe_tx_interface_add_del
option vat_help = "<intfc> [disable]";
};
+/** \brief Enable or disable IPFIX flow record generation on an interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add interface if non-zero, else delete
+ @param which - datapath on which to record flows
+ @param direction - direction of recorded flows
+ @param sw_if_index - index of the interface
+*/
+autoreply define flowprobe_interface_add_del
+{
+ option in_progress;
+ u32 client_index;
+ u32 context;
+ bool is_add;
+ vl_api_flowprobe_which_t which;
+ vl_api_flowprobe_direction_t direction;
+ vl_api_interface_index_t sw_if_index;
+ option vat_help = "(<intfc> | sw_if_index <if-idx>) [(ip4|ip6|l2)] [(rx|tx|both)] [disable]";
+};
+
+/** \brief Dump interfaces for which IPFIX flow record generation is enabled
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface index to use as filter (0xffffffff is "all")
+*/
+define flowprobe_interface_dump
+{
+ option in_progress;
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index [default=0xffffffff];
+ option vat_help = "[<if-idx>]";
+};
+
+/** \brief Details about IPFIX flow record generation enabled on interface
+ @param context - sender context which was passed in the request
+ @param which - datapath on which to record flows
+ @param direction - direction of recorded flows
+ @param sw_if_index - index of the interface
+*/
+define flowprobe_interface_details
+{
+ option in_progress;
+ u32 context;
+ vl_api_flowprobe_which_t which;
+ vl_api_flowprobe_direction_t direction;
+ vl_api_interface_index_t sw_if_index;
+};
+
autoreply define flowprobe_params
{
+ option replaced_by="flowprobe_set_params";
+
u32 client_index;
u32 context;
vl_api_flowprobe_record_flags_t record_flags;
@@ -56,3 +123,53 @@ autoreply define flowprobe_params
u32 passive_timer; /* ~0 is off, 0 is default */
option vat_help = "record <[l2] [l3] [l4]> [active <timer> passive <timer>]";
};
+
+/** \brief Set IPFIX flow record generation parameters
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param record_flags - flags indicating what data to record
+ @param active_timer - time in seconds after which active flow records are
+ to be exported (0 is "off", 0xffffffff is "use default value")
+ @param passive_timer - time in seconds after which passive flow records are
+ to be deleted (0 is "off", 0xffffffff is "use default value")
+*/
+autoreply define flowprobe_set_params
+{
+ option in_progress;
+ u32 client_index;
+ u32 context;
+ vl_api_flowprobe_record_flags_t record_flags;
+ u32 active_timer [default=0xffffffff];
+ u32 passive_timer [default=0xffffffff];
+ option vat_help = "record [l2] [l3] [l4] [active <timer>] [passive <timer>]";
+};
+
+/** \brief Get IPFIX flow record generation parameters
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define flowprobe_get_params
+{
+ option in_progress;
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply to get IPFIX flow record generation parameters
+ @param context - sender context, to match reply w/ request
+ @param retval - error (0 is "no error")
+ @param record_flags - flags indicating what data to record
+ @param active_timer - time in seconds after which active flow records are
+ to be exported (0 is "off")
+ @param passive_timer - time in seconds after which passive flow records are
+ to be deleted (0 is "off")
+*/
+define flowprobe_get_params_reply
+{
+ option in_progress;
+ u32 context;
+ i32 retval;
+ vl_api_flowprobe_record_flags_t record_flags;
+ u32 active_timer;
+ u32 passive_timer;
+};
diff --git a/src/plugins/flowprobe/flowprobe.c b/src/plugins/flowprobe/flowprobe.c
index ffc43bcd440..58a7cfe22f1 100644
--- a/src/plugins/flowprobe/flowprobe.c
+++ b/src/plugins/flowprobe/flowprobe.c
@@ -45,35 +45,54 @@ uword flowprobe_walker_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
#include <vlibapi/api_helper_macros.h>
/* Define the per-interface configurable features */
-/* *INDENT-OFF* */
-VNET_FEATURE_INIT (flow_perpacket_ip4, static) =
-{
+VNET_FEATURE_INIT (flowprobe_input_ip4_unicast, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "flowprobe-input-ip4",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+};
+VNET_FEATURE_INIT (flowprobe_input_ip4_multicast, static) = {
+ .arc_name = "ip4-multicast",
+ .node_name = "flowprobe-input-ip4",
+ .runs_before = VNET_FEATURES ("ip4-mfib-forward-lookup"),
+};
+VNET_FEATURE_INIT (flowprobe_input_ip6_unicast, static) = {
+ .arc_name = "ip6-unicast",
+ .node_name = "flowprobe-input-ip6",
+ .runs_before = VNET_FEATURES ("ip6-lookup"),
+};
+VNET_FEATURE_INIT (flowprobe_input_ip6_multicast, static) = {
+ .arc_name = "ip6-multicast",
+ .node_name = "flowprobe-input-ip6",
+ .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
+};
+VNET_FEATURE_INIT (flowprobe_input_l2, static) = {
+ .arc_name = "device-input",
+ .node_name = "flowprobe-input-l2",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+VNET_FEATURE_INIT (flowprobe_output_ip4, static) = {
.arc_name = "ip4-output",
- .node_name = "flowprobe-ip4",
+ .node_name = "flowprobe-output-ip4",
.runs_before = VNET_FEATURES ("interface-output"),
};
-VNET_FEATURE_INIT (flow_perpacket_ip6, static) =
-{
+VNET_FEATURE_INIT (flowprobe_output_ip6, static) = {
.arc_name = "ip6-output",
- .node_name = "flowprobe-ip6",
+ .node_name = "flowprobe-output-ip6",
.runs_before = VNET_FEATURES ("interface-output"),
};
-VNET_FEATURE_INIT (flow_perpacket_l2, static) = {
+VNET_FEATURE_INIT (flowprobe_output_l2, static) = {
.arc_name = "interface-output",
- .node_name = "flowprobe-l2",
+ .node_name = "flowprobe-output-l2",
.runs_before = VNET_FEATURES ("interface-output-arc-end"),
};
-/* *INDENT-ON* */
-/* Macro to finish up custom dump fns */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
static inline ipfix_field_specifier_t *
flowprobe_template_ip4_fields (ipfix_field_specifier_t * f)
@@ -143,7 +162,7 @@ flowprobe_template_l2_fields (ipfix_field_specifier_t * f)
static inline ipfix_field_specifier_t *
flowprobe_template_common_fields (ipfix_field_specifier_t * f)
{
-#define flowprobe_template_common_field_count() 5
+#define flowprobe_template_common_field_count() 6
/* ingressInterface, TLV type 10, u32 */
f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
ingressInterface, 4);
@@ -154,6 +173,10 @@ flowprobe_template_common_fields (ipfix_field_specifier_t * f)
egressInterface, 4);
f++;
+ /* flowDirection, TLV type 61, u8 */
+ f->e_id_length = ipfix_e_id_length (0 /* enterprise */, flowDirection, 1);
+ f++;
+
/* packetDeltaCount, TLV type 2, u64 */
f->e_id_length = ipfix_e_id_length (0 /* enterprise */ ,
packetDeltaCount, 8);
@@ -202,10 +225,7 @@ flowprobe_template_l4_fields (ipfix_field_specifier_t * f)
* @returns u8 * vector containing the indicated IPFIX template packet
*/
static inline u8 *
-flowprobe_template_rewrite_inline (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+flowprobe_template_rewrite_inline (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
flowprobe_variant_t which)
{
@@ -223,8 +243,9 @@ flowprobe_template_rewrite_inline (flow_report_main_t * frm,
flowprobe_main_t *fm = &flowprobe_main;
flowprobe_record_t flags = fr->opaque.as_uword;
bool collect_ip4 = false, collect_ip6 = false;
+ bool collect_l4 = false;
- stream = &frm->streams[fr->stream_index];
+ stream = &exp->streams[fr->stream_index];
if (flags & FLOW_RECORD_L3)
{
@@ -235,6 +256,10 @@ flowprobe_template_rewrite_inline (flow_report_main_t * frm,
if (which == FLOW_VARIANT_L2_IP6)
flags |= FLOW_RECORD_L2_IP6;
}
+ if (flags & FLOW_RECORD_L4)
+ {
+ collect_l4 = (which != FLOW_VARIANT_L2);
+ }
field_count += flowprobe_template_common_field_count ();
if (flags & FLOW_RECORD_L2)
@@ -243,7 +268,7 @@ flowprobe_template_rewrite_inline (flow_report_main_t * frm,
field_count += flowprobe_template_ip4_field_count ();
if (collect_ip6)
field_count += flowprobe_template_ip6_field_count ();
- if (flags & FLOW_RECORD_L4)
+ if (collect_l4)
field_count += flowprobe_template_l4_field_count ();
/* allocate rewrite space */
@@ -263,8 +288,8 @@ flowprobe_template_rewrite_inline (flow_report_main_t * frm,
ip->ip_version_and_header_length = 0x45;
ip->ttl = 254;
ip->protocol = IP_PROTOCOL_UDP;
- ip->src_address.as_u32 = src_address->as_u32;
- ip->dst_address.as_u32 = collector_address->as_u32;
+ ip->src_address.as_u32 = exp->src_address.ip.ip4.as_u32;
+ ip->dst_address.as_u32 = exp->ipfix_collector.ip.ip4.as_u32;
udp->src_port = clib_host_to_net_u16 (stream->src_port);
udp->dst_port = clib_host_to_net_u16 (collector_port);
udp->length = clib_host_to_net_u16 (vec_len (rewrite) - sizeof (*ip));
@@ -282,7 +307,7 @@ flowprobe_template_rewrite_inline (flow_report_main_t * frm,
f = flowprobe_template_ip4_fields (f);
if (collect_ip6)
f = flowprobe_template_ip6_fields (f);
- if (flags & FLOW_RECORD_L4)
+ if (collect_l4)
f = flowprobe_template_l4_fields (f);
/* Back to the template packet... */
@@ -309,73 +334,53 @@ flowprobe_template_rewrite_inline (flow_report_main_t * frm,
}
static u8 *
-flowprobe_template_rewrite_ip6 (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+flowprobe_template_rewrite_ip6 (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index)
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return flowprobe_template_rewrite_inline
- (frm, fr, collector_address, src_address, collector_port,
- FLOW_VARIANT_IP6);
+ return flowprobe_template_rewrite_inline (exp, fr, collector_port,
+ FLOW_VARIANT_IP6);
}
static u8 *
-flowprobe_template_rewrite_ip4 (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+flowprobe_template_rewrite_ip4 (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index)
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return flowprobe_template_rewrite_inline
- (frm, fr, collector_address, src_address, collector_port,
- FLOW_VARIANT_IP4);
+ return flowprobe_template_rewrite_inline (exp, fr, collector_port,
+ FLOW_VARIANT_IP4);
}
static u8 *
-flowprobe_template_rewrite_l2 (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+flowprobe_template_rewrite_l2 (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index)
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return flowprobe_template_rewrite_inline
- (frm, fr, collector_address, src_address, collector_port,
- FLOW_VARIANT_L2);
+ return flowprobe_template_rewrite_inline (exp, fr, collector_port,
+ FLOW_VARIANT_L2);
}
static u8 *
-flowprobe_template_rewrite_l2_ip4 (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+flowprobe_template_rewrite_l2_ip4 (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index)
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return flowprobe_template_rewrite_inline
- (frm, fr, collector_address, src_address, collector_port,
- FLOW_VARIANT_L2_IP4);
+ return flowprobe_template_rewrite_inline (exp, fr, collector_port,
+ FLOW_VARIANT_L2_IP4);
}
static u8 *
-flowprobe_template_rewrite_l2_ip6 (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+flowprobe_template_rewrite_l2_ip6 (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index)
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return flowprobe_template_rewrite_inline
- (frm, fr, collector_address, src_address, collector_port,
- FLOW_VARIANT_L2_IP6);
+ return flowprobe_template_rewrite_inline (exp, fr, collector_port,
+ FLOW_VARIANT_L2_IP6);
}
/**
@@ -389,27 +394,27 @@ flowprobe_template_rewrite_l2_ip6 (flow_report_main_t * frm,
* will be sent.
*/
vlib_frame_t *
-flowprobe_data_callback_ip4 (flow_report_main_t * frm,
- flow_report_t * fr,
- vlib_frame_t * f, u32 * to_next, u32 node_index)
+flowprobe_data_callback_ip4 (flow_report_main_t *frm, ipfix_exporter_t *exp,
+ flow_report_t *fr, vlib_frame_t *f, u32 *to_next,
+ u32 node_index)
{
flowprobe_flush_callback_ip4 ();
return f;
}
vlib_frame_t *
-flowprobe_data_callback_ip6 (flow_report_main_t * frm,
- flow_report_t * fr,
- vlib_frame_t * f, u32 * to_next, u32 node_index)
+flowprobe_data_callback_ip6 (flow_report_main_t *frm, ipfix_exporter_t *exp,
+ flow_report_t *fr, vlib_frame_t *f, u32 *to_next,
+ u32 node_index)
{
flowprobe_flush_callback_ip6 ();
return f;
}
vlib_frame_t *
-flowprobe_data_callback_l2 (flow_report_main_t * frm,
- flow_report_t * fr,
- vlib_frame_t * f, u32 * to_next, u32 node_index)
+flowprobe_data_callback_l2 (flow_report_main_t *frm, ipfix_exporter_t *exp,
+ flow_report_t *fr, vlib_frame_t *f, u32 *to_next,
+ u32 node_index)
{
flowprobe_flush_callback_l2 ();
return f;
@@ -422,7 +427,7 @@ flowprobe_template_add_del (u32 domain_id, u16 src_port,
vnet_flow_rewrite_callback_t * rewrite_callback,
bool is_add, u16 * template_id)
{
- flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = &flow_report_main.exporters[0];
vnet_flow_report_add_del_args_t a = {
.rewrite_callback = rewrite_callback,
.flow_data_callback = flow_data_callback,
@@ -431,7 +436,7 @@ flowprobe_template_add_del (u32 domain_id, u16 src_port,
.src_port = src_port,
.opaque.as_uword = flags,
};
- return vnet_flow_report_add_del (frm, &a, template_id);
+ return vnet_flow_report_add_del (exp, &a, template_id);
}
static void
@@ -501,11 +506,49 @@ flowprobe_create_state_tables (u32 active_timer)
return error;
}
+static clib_error_t *
+flowprobe_clear_state_if_index (u32 sw_if_index)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ clib_error_t *error = 0;
+ u32 worker_i;
+ u32 entry_i;
+
+ if (fm->active_timer > 0)
+ {
+ vec_foreach_index (worker_i, fm->pool_per_worker)
+ {
+ pool_foreach_index (entry_i, fm->pool_per_worker[worker_i])
+ {
+ flowprobe_entry_t *e =
+ pool_elt_at_index (fm->pool_per_worker[worker_i], entry_i);
+ if (e->key.rx_sw_if_index == sw_if_index ||
+ e->key.tx_sw_if_index == sw_if_index)
+ {
+ e->packetcount = 0;
+ e->octetcount = 0;
+ e->prot.tcp.flags = 0;
+ if (fm->passive_timer > 0)
+ {
+ tw_timer_stop_2t_1w_2048sl (
+ fm->timers_per_worker[worker_i],
+ e->passive_timer_handle);
+ }
+ flowprobe_delete_by_index (worker_i, entry_i);
+ }
+ }
+ }
+ }
+
+ return error;
+}
+
static int
validate_feature_on_interface (flowprobe_main_t * fm, u32 sw_if_index,
u8 which)
{
vec_validate_init_empty (fm->flow_per_interface, sw_if_index, ~0);
+ vec_validate_init_empty (fm->direction_per_interface, sw_if_index, ~0);
if (fm->flow_per_interface[sw_if_index] == (u8) ~ 0)
return -1;
@@ -519,13 +562,15 @@ validate_feature_on_interface (flowprobe_main_t * fm, u32 sw_if_index,
* @brief configure / deconfigure the IPFIX flow-per-packet
* @param fm flowprobe_main_t * fm
* @param sw_if_index u32 the desired interface
+ * @param which u8 the desired datapath
+ * @param direction u8 the desired direction
* @param is_add int 1 to enable the feature, 0 to disable it
* @returns 0 if successful, non-zero otherwise
*/
static int
-flowprobe_tx_interface_add_del_feature (flowprobe_main_t * fm,
- u32 sw_if_index, u8 which, int is_add)
+flowprobe_interface_add_del_feature (flowprobe_main_t *fm, u32 sw_if_index,
+ u8 which, u8 direction, int is_add)
{
vlib_main_t *vm = vlib_get_main ();
int rv = 0;
@@ -533,6 +578,7 @@ flowprobe_tx_interface_add_del_feature (flowprobe_main_t * fm,
flowprobe_record_t flags = fm->record;
fm->flow_per_interface[sw_if_index] = (is_add) ? which : (u8) ~ 0;
+ fm->direction_per_interface[sw_if_index] = (is_add) ? direction : (u8) ~0;
fm->template_per_flow[which] += (is_add) ? 1 : -1;
if (is_add && fm->template_per_flow[which] > 1)
template_id = fm->template_reports[flags];
@@ -542,12 +588,17 @@ flowprobe_tx_interface_add_del_feature (flowprobe_main_t * fm,
{
if (which == FLOW_VARIANT_L2)
{
+ if (!is_add)
+ {
+ flowprobe_flush_callback_l2 ();
+ }
if (fm->record & FLOW_RECORD_L2)
{
rv = flowprobe_template_add_del (1, UDP_DST_PORT_ipfix, flags,
flowprobe_data_callback_l2,
flowprobe_template_rewrite_l2,
is_add, &template_id);
+ fm->template_reports[flags] = (is_add) ? template_id : 0;
}
if (fm->record & FLOW_RECORD_L3 || fm->record & FLOW_RECORD_L4)
{
@@ -570,20 +621,30 @@ flowprobe_tx_interface_add_del_feature (flowprobe_main_t * fm,
flags | FLOW_RECORD_L2_IP4;
fm->context[FLOW_VARIANT_L2_IP6].flags =
flags | FLOW_RECORD_L2_IP6;
-
- fm->template_reports[flags] = template_id;
}
}
else if (which == FLOW_VARIANT_IP4)
- rv = flowprobe_template_add_del (1, UDP_DST_PORT_ipfix, flags,
- flowprobe_data_callback_ip4,
- flowprobe_template_rewrite_ip4,
- is_add, &template_id);
+ {
+ if (!is_add)
+ {
+ flowprobe_flush_callback_ip4 ();
+ }
+ rv = flowprobe_template_add_del (
+ 1, UDP_DST_PORT_ipfix, flags, flowprobe_data_callback_ip4,
+ flowprobe_template_rewrite_ip4, is_add, &template_id);
+ fm->template_reports[flags] = (is_add) ? template_id : 0;
+ }
else if (which == FLOW_VARIANT_IP6)
- rv = flowprobe_template_add_del (1, UDP_DST_PORT_ipfix, flags,
- flowprobe_data_callback_ip6,
- flowprobe_template_rewrite_ip6,
- is_add, &template_id);
+ {
+ if (!is_add)
+ {
+ flowprobe_flush_callback_ip6 ();
+ }
+ rv = flowprobe_template_add_del (
+ 1, UDP_DST_PORT_ipfix, flags, flowprobe_data_callback_ip6,
+ flowprobe_template_rewrite_ip6, is_add, &template_id);
+ fm->template_reports[flags] = (is_add) ? template_id : 0;
+ }
}
if (rv && rv != VNET_API_ERROR_VALUE_EXIST)
{
@@ -594,18 +655,41 @@ flowprobe_tx_interface_add_del_feature (flowprobe_main_t * fm,
if (which != (u8) ~ 0)
{
fm->context[which].flags = fm->record;
- fm->template_reports[flags] = (is_add) ? template_id : 0;
}
- if (which == FLOW_VARIANT_IP4)
- vnet_feature_enable_disable ("ip4-output", "flowprobe-ip4",
- sw_if_index, is_add, 0, 0);
- else if (which == FLOW_VARIANT_IP6)
- vnet_feature_enable_disable ("ip6-output", "flowprobe-ip6",
- sw_if_index, is_add, 0, 0);
- else if (which == FLOW_VARIANT_L2)
- vnet_feature_enable_disable ("interface-output", "flowprobe-l2",
- sw_if_index, is_add, 0, 0);
+ if (direction == FLOW_DIRECTION_RX || direction == FLOW_DIRECTION_BOTH)
+ {
+ if (which == FLOW_VARIANT_IP4)
+ {
+ vnet_feature_enable_disable ("ip4-unicast", "flowprobe-input-ip4",
+ sw_if_index, is_add, 0, 0);
+ vnet_feature_enable_disable ("ip4-multicast", "flowprobe-input-ip4",
+ sw_if_index, is_add, 0, 0);
+ }
+ else if (which == FLOW_VARIANT_IP6)
+ {
+ vnet_feature_enable_disable ("ip6-unicast", "flowprobe-input-ip6",
+ sw_if_index, is_add, 0, 0);
+ vnet_feature_enable_disable ("ip6-multicast", "flowprobe-input-ip6",
+ sw_if_index, is_add, 0, 0);
+ }
+ else if (which == FLOW_VARIANT_L2)
+ vnet_feature_enable_disable ("device-input", "flowprobe-input-l2",
+ sw_if_index, is_add, 0, 0);
+ }
+
+ if (direction == FLOW_DIRECTION_TX || direction == FLOW_DIRECTION_BOTH)
+ {
+ if (which == FLOW_VARIANT_IP4)
+ vnet_feature_enable_disable ("ip4-output", "flowprobe-output-ip4",
+ sw_if_index, is_add, 0, 0);
+ else if (which == FLOW_VARIANT_IP6)
+ vnet_feature_enable_disable ("ip6-output", "flowprobe-output-ip6",
+ sw_if_index, is_add, 0, 0);
+ else if (which == FLOW_VARIANT_L2)
+ vnet_feature_enable_disable ("interface-output", "flowprobe-output-l2",
+ sw_if_index, is_add, 0, 0);
+ }
/* Stateful flow collection */
if (is_add && !fm->initialized)
@@ -615,6 +699,11 @@ flowprobe_tx_interface_add_del_feature (flowprobe_main_t * fm,
vlib_process_signal_event (vm, flowprobe_timer_node.index, 1, 0);
}
+ if (!is_add && fm->initialized)
+ {
+ flowprobe_clear_state_if_index (sw_if_index);
+ }
+
return 0;
}
@@ -646,8 +735,8 @@ void vl_api_flowprobe_tx_interface_add_del_t_handler
goto out;
}
- rv = flowprobe_tx_interface_add_del_feature
- (fm, sw_if_index, mp->which, mp->is_add);
+ rv = flowprobe_interface_add_del_feature (fm, sw_if_index, mp->which,
+ FLOW_DIRECTION_TX, mp->is_add);
out:
BAD_SW_IF_INDEX_LABEL;
@@ -655,6 +744,167 @@ out:
REPLY_MACRO (VL_API_FLOWPROBE_TX_INTERFACE_ADD_DEL_REPLY);
}
+void
+vl_api_flowprobe_interface_add_del_t_handler (
+ vl_api_flowprobe_interface_add_del_t *mp)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ vl_api_flowprobe_interface_add_del_reply_t *rmp;
+ u32 sw_if_index;
+ u8 which;
+ u8 direction;
+ bool is_add;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ sw_if_index = ntohl (mp->sw_if_index);
+ is_add = mp->is_add;
+
+ if (mp->which == FLOWPROBE_WHICH_IP4)
+ which = FLOW_VARIANT_IP4;
+ else if (mp->which == FLOWPROBE_WHICH_IP6)
+ which = FLOW_VARIANT_IP6;
+ else if (mp->which == FLOWPROBE_WHICH_L2)
+ which = FLOW_VARIANT_L2;
+ else
+ {
+ clib_warning ("Invalid value of which");
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto out;
+ }
+
+ if (mp->direction == FLOWPROBE_DIRECTION_RX)
+ direction = FLOW_DIRECTION_RX;
+ else if (mp->direction == FLOWPROBE_DIRECTION_TX)
+ direction = FLOW_DIRECTION_TX;
+ else if (mp->direction == FLOWPROBE_DIRECTION_BOTH)
+ direction = FLOW_DIRECTION_BOTH;
+ else
+ {
+ clib_warning ("Invalid value of direction");
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto out;
+ }
+
+ if (fm->record == 0)
+ {
+ clib_warning ("Please specify flowprobe params record first");
+ rv = VNET_API_ERROR_CANNOT_ENABLE_DISABLE_FEATURE;
+ goto out;
+ }
+
+ rv = validate_feature_on_interface (fm, sw_if_index, which);
+ if (rv == 1)
+ {
+ if (is_add)
+ {
+ clib_warning ("Variant is already enabled for given interface");
+ rv = VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
+ goto out;
+ }
+ }
+ else if (rv == 0)
+ {
+ clib_warning ("Interface has different variant enabled");
+ rv = VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
+ goto out;
+ }
+ else if (rv == -1)
+ {
+ if (!is_add)
+ {
+ clib_warning ("Interface has no variant enabled");
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto out;
+ }
+ }
+
+ rv = flowprobe_interface_add_del_feature (fm, sw_if_index, which, direction,
+ is_add);
+
+out:
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_FLOWPROBE_INTERFACE_ADD_DEL_REPLY);
+}
+
+static void
+send_flowprobe_interface_details (u32 sw_if_index, u8 which, u8 direction,
+ vl_api_registration_t *reg, u32 context)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ vl_api_flowprobe_interface_details_t *rmp = 0;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ if (!rmp)
+ return;
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_FLOWPROBE_INTERFACE_DETAILS + REPLY_MSG_ID_BASE);
+ rmp->context = context;
+
+ rmp->sw_if_index = htonl (sw_if_index);
+
+ if (which == FLOW_VARIANT_IP4)
+ rmp->which = FLOWPROBE_WHICH_IP4;
+ else if (which == FLOW_VARIANT_IP6)
+ rmp->which = FLOWPROBE_WHICH_IP6;
+ else if (which == FLOW_VARIANT_L2)
+ rmp->which = FLOWPROBE_WHICH_L2;
+ else
+ ASSERT (0);
+
+ if (direction == FLOW_DIRECTION_RX)
+ rmp->direction = FLOWPROBE_DIRECTION_RX;
+ else if (direction == FLOW_DIRECTION_TX)
+ rmp->direction = FLOWPROBE_DIRECTION_TX;
+ else if (direction == FLOW_DIRECTION_BOTH)
+ rmp->direction = FLOWPROBE_DIRECTION_BOTH;
+ else
+ ASSERT (0);
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_flowprobe_interface_dump_t_handler (
+ vl_api_flowprobe_interface_dump_t *mp)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ vl_api_registration_t *reg;
+ u32 sw_if_index;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (sw_if_index == ~0)
+ {
+ u8 *which;
+
+ vec_foreach (which, fm->flow_per_interface)
+ {
+ if (*which == (u8) ~0)
+ continue;
+
+ sw_if_index = which - fm->flow_per_interface;
+ send_flowprobe_interface_details (
+ sw_if_index, *which, fm->direction_per_interface[sw_if_index], reg,
+ mp->context);
+ }
+ }
+ else if (vec_len (fm->flow_per_interface) > sw_if_index &&
+ fm->flow_per_interface[sw_if_index] != (u8) ~0)
+ {
+ send_flowprobe_interface_details (
+ sw_if_index, fm->flow_per_interface[sw_if_index],
+ fm->direction_per_interface[sw_if_index], reg, mp->context);
+ }
+}
+
#define vec_neg_search(v,E) \
({ \
word _v(i) = 0; \
@@ -675,7 +925,7 @@ flowprobe_params (flowprobe_main_t * fm, u8 record_l2,
flowprobe_record_t flags = 0;
if (vec_neg_search (fm->flow_per_interface, (u8) ~ 0) != ~0)
- return ~0;
+ return VNET_API_ERROR_UNSUPPORTED;
if (record_l2)
flags |= FLOW_RECORD_L2;
@@ -715,17 +965,89 @@ vl_api_flowprobe_params_t_handler (vl_api_flowprobe_params_t * mp)
REPLY_MACRO (VL_API_FLOWPROBE_PARAMS_REPLY);
}
-/* *INDENT-OFF* */
+void
+vl_api_flowprobe_set_params_t_handler (vl_api_flowprobe_set_params_t *mp)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ vl_api_flowprobe_set_params_reply_t *rmp;
+ bool record_l2, record_l3, record_l4;
+ u32 active_timer;
+ u32 passive_timer;
+ int rv = 0;
+
+ record_l2 = (mp->record_flags & FLOWPROBE_RECORD_FLAG_L2);
+ record_l3 = (mp->record_flags & FLOWPROBE_RECORD_FLAG_L3);
+ record_l4 = (mp->record_flags & FLOWPROBE_RECORD_FLAG_L4);
+
+ active_timer = clib_net_to_host_u32 (mp->active_timer);
+ passive_timer = clib_net_to_host_u32 (mp->passive_timer);
+
+ if (passive_timer > 0 && active_timer > passive_timer)
+ {
+ clib_warning ("Passive timer must be greater than active timer");
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto out;
+ }
+
+ rv = flowprobe_params (fm, record_l2, record_l3, record_l4, active_timer,
+ passive_timer);
+ if (rv == VNET_API_ERROR_UNSUPPORTED)
+ clib_warning (
+ "Cannot change params when feature is enabled on some interfaces");
+
+out:
+ REPLY_MACRO (VL_API_FLOWPROBE_SET_PARAMS_REPLY);
+}
+
+void
+vl_api_flowprobe_get_params_t_handler (vl_api_flowprobe_get_params_t *mp)
+{
+ flowprobe_main_t *fm = &flowprobe_main;
+ vl_api_flowprobe_get_params_reply_t *rmp;
+ u8 record_flags = 0;
+ int rv = 0;
+
+ if (fm->record & FLOW_RECORD_L2)
+ record_flags |= FLOWPROBE_RECORD_FLAG_L2;
+ if (fm->record & FLOW_RECORD_L3)
+ record_flags |= FLOWPROBE_RECORD_FLAG_L3;
+ if (fm->record & FLOW_RECORD_L4)
+ record_flags |= FLOWPROBE_RECORD_FLAG_L4;
+
+ // clang-format off
+ REPLY_MACRO2 (VL_API_FLOWPROBE_GET_PARAMS_REPLY,
+ ({
+ rmp->record_flags = record_flags;
+ rmp->active_timer = htonl (fm->active_timer);
+ rmp->passive_timer = htonl (fm->passive_timer);
+ }));
+ // clang-format on
+}
+
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Flow per Packet",
};
-/* *INDENT-ON* */
+
+u8 *
+format_flowprobe_direction (u8 *s, va_list *args)
+{
+ u8 *direction = va_arg (*args, u8 *);
+ if (*direction == FLOW_DIRECTION_RX)
+ s = format (s, "rx");
+ else if (*direction == FLOW_DIRECTION_TX)
+ s = format (s, "tx");
+ else if (*direction == FLOW_DIRECTION_BOTH)
+ s = format (s, "rx tx");
+
+ return s;
+}
u8 *
format_flowprobe_entry (u8 * s, va_list * args)
{
flowprobe_entry_t *e = va_arg (*args, flowprobe_entry_t *);
+ s = format (s, " %U", format_flowprobe_direction, &e->key.direction);
s = format (s, " %d/%d", e->key.rx_sw_if_index, e->key.tx_sw_if_index);
s = format (s, " %U %U", format_ethernet_address, &e->key.src_mac,
@@ -789,14 +1111,12 @@ flowprobe_show_table_fn (vlib_main_t * vm,
for (i = 0; i < vec_len (fm->pool_per_worker); i++)
{
- /* *INDENT-OFF* */
pool_foreach (e, fm->pool_per_worker[i])
{
vlib_cli_output (vm, "%U",
format_flowprobe_entry,
e);
}
- /* *INDENT-ON* */
}
return 0;
@@ -822,14 +1142,15 @@ flowprobe_show_stats_fn (vlib_main_t * vm,
}
static clib_error_t *
-flowprobe_tx_interface_add_del_feature_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
+flowprobe_interface_add_del_feature_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
flowprobe_main_t *fm = &flowprobe_main;
u32 sw_if_index = ~0;
int is_add = 1;
u8 which = FLOW_VARIANT_IP4;
+ flowprobe_direction_t direction = FLOW_DIRECTION_TX;
int rv;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
@@ -844,6 +1165,12 @@ flowprobe_tx_interface_add_del_feature_command_fn (vlib_main_t * vm,
which = FLOW_VARIANT_IP6;
else if (unformat (input, "l2"))
which = FLOW_VARIANT_L2;
+ else if (unformat (input, "rx"))
+ direction = FLOW_DIRECTION_RX;
+ else if (unformat (input, "tx"))
+ direction = FLOW_DIRECTION_TX;
+ else if (unformat (input, "both"))
+ direction = FLOW_DIRECTION_BOTH;
else
break;
}
@@ -865,9 +1192,16 @@ flowprobe_tx_interface_add_del_feature_command_fn (vlib_main_t * vm,
else if (rv == 0)
return clib_error_return (0,
"Interface has enable different datapath ...");
+ else if (rv == -1)
+ {
+ if (!is_add)
+ {
+ return clib_error_return (0, "Interface has no datapath enabled");
+ }
+ }
- rv =
- flowprobe_tx_interface_add_del_feature (fm, sw_if_index, which, is_add);
+ rv = flowprobe_interface_add_del_feature (fm, sw_if_index, which, direction,
+ is_add);
switch (rv)
{
case 0:
@@ -904,9 +1238,10 @@ flowprobe_show_feature_command_fn (vlib_main_t * vm,
continue;
sw_if_index = which - fm->flow_per_interface;
- vlib_cli_output (vm, " %U %U", format_vnet_sw_if_index_name,
+ vlib_cli_output (vm, " %U %U %U", format_vnet_sw_if_index_name,
vnet_get_main (), sw_if_index, format_flowprobe_feature,
- which);
+ which, format_flowprobe_direction,
+ &fm->direction_per_interface[sw_if_index]);
}
return 0;
}
@@ -983,18 +1318,17 @@ flowprobe_show_params_command_fn (vlib_main_t * vm,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (flowprobe_enable_disable_command, static) = {
- .path = "flowprobe feature add-del",
- .short_help =
- "flowprobe feature add-del <interface-name> <l2|ip4|ip6> disable",
- .function = flowprobe_tx_interface_add_del_feature_command_fn,
+ .path = "flowprobe feature add-del",
+ .short_help = "flowprobe feature add-del <interface-name> [(l2|ip4|ip6)] "
+ "[(rx|tx|both)] [disable]",
+ .function = flowprobe_interface_add_del_feature_command_fn,
};
VLIB_CLI_COMMAND (flowprobe_params_command, static) = {
- .path = "flowprobe params",
- .short_help =
- "flowprobe params record <[l2] [l3] [l4]> [active <timer> passive <timer>]",
- .function = flowprobe_params_command_fn,
+ .path = "flowprobe params",
+ .short_help = "flowprobe params record [l2] [l3] [l4] [active <timer>] "
+ "[passive <timer>]",
+ .function = flowprobe_params_command_fn,
};
VLIB_CLI_COMMAND (flowprobe_show_feature_command, static) = {
@@ -1019,7 +1353,6 @@ VLIB_CLI_COMMAND (flowprobe_show_stats_command, static) = {
.short_help = "show flowprobe statistics",
.function = flowprobe_show_stats_fn,
};
-/* *INDENT-ON* */
/*
* Main-core process, sending an interrupt to the per worker input
@@ -1073,13 +1406,11 @@ timer_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
return 0; /* or not */
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (flowprobe_timer_node,static) = {
.function = timer_process,
.name = "flowprobe-timer-process",
.type = VLIB_NODE_TYPE_PROCESS,
};
-/* *INDENT-ON* */
#include <flowprobe/flowprobe.api.c>
diff --git a/src/plugins/flowprobe/flowprobe.h b/src/plugins/flowprobe/flowprobe.h
index 2d28c81de33..1fde5f94491 100644
--- a/src/plugins/flowprobe/flowprobe.h
+++ b/src/plugins/flowprobe/flowprobe.h
@@ -42,17 +42,22 @@ typedef enum
FLOW_N_RECORDS = 1 << 5,
} flowprobe_record_t;
-/* *INDENT-OFF* */
typedef enum __attribute__ ((__packed__))
{
- FLOW_VARIANT_IP4,
+ FLOW_VARIANT_IP4 = 0,
FLOW_VARIANT_IP6,
FLOW_VARIANT_L2,
FLOW_VARIANT_L2_IP4,
FLOW_VARIANT_L2_IP6,
FLOW_N_VARIANTS,
} flowprobe_variant_t;
-/* *INDENT-ON* */
+
+typedef enum __attribute__ ((__packed__))
+{
+ FLOW_DIRECTION_RX = 0,
+ FLOW_DIRECTION_TX,
+ FLOW_DIRECTION_BOTH,
+} flowprobe_direction_t;
STATIC_ASSERT (sizeof (flowprobe_variant_t) == 1,
"flowprobe_variant_t is expected to be 1 byte, "
@@ -72,7 +77,6 @@ typedef struct
u16 *next_record_offset_per_worker;
} flowprobe_protocol_context_t;
-/* *INDENT-OFF* */
typedef struct __attribute__ ((aligned (8))) {
u32 rx_sw_if_index;
u32 tx_sw_if_index;
@@ -85,8 +89,8 @@ typedef struct __attribute__ ((aligned (8))) {
u16 src_port;
u16 dst_port;
flowprobe_variant_t which;
+ flowprobe_direction_t direction;
} flowprobe_key_t;
-/* *INDENT-ON* */
typedef struct
{
@@ -134,9 +138,7 @@ typedef struct
u8 ht_log2len; /* Hash table size is 2^log2len */
u32 **hash_per_worker;
flowprobe_entry_t **pool_per_worker;
- /* *INDENT-OFF* */
TWT (tw_timer_wheel) ** timers_per_worker;
- /* *INDENT-ON* */
u32 **expired_passive_per_worker;
flowprobe_record_t record;
@@ -149,6 +151,7 @@ typedef struct
u16 template_per_flow[FLOW_N_VARIANTS];
u8 *flow_per_interface;
+ u8 *direction_per_interface;
/** convenience vlib_main_t pointer */
vlib_main_t *vlib_main;
@@ -159,6 +162,8 @@ typedef struct
extern flowprobe_main_t flowprobe_main;
extern vlib_node_registration_t flowprobe_walker_node;
+void flowprobe_delete_by_index (u32 my_cpu_number, u32 poolindex);
+
void flowprobe_flush_callback_ip4 (void);
void flowprobe_flush_callback_ip6 (void);
void flowprobe_flush_callback_l2 (void);
diff --git a/src/plugins/flowprobe/flowprobe_plugin_doc.md b/src/plugins/flowprobe/flowprobe_plugin_doc.md
deleted file mode 100644
index 4c9b2342a83..00000000000
--- a/src/plugins/flowprobe/flowprobe_plugin_doc.md
+++ /dev/null
@@ -1,13 +0,0 @@
-IPFIX flow record plugin {#flowprobe_plugin_doc}
-========================
-
-## Introduction
-
-This plugin generates ipfix flow records on interfaces which have the feature enabled
-
-## Sample configuration
-
-set ipfix exporter collector 192.168.6.2 src 192.168.6.1 template-interval 20 port 4739 path-mtu 1500
-
-flowprobe params record l3 active 20 passive 120
-flowprobe feature add-del GigabitEthernet2/3/0 l2 \ No newline at end of file
diff --git a/src/plugins/flowprobe/flowprobe_plugin_doc.rst b/src/plugins/flowprobe/flowprobe_plugin_doc.rst
new file mode 100644
index 00000000000..4add41f5611
--- /dev/null
+++ b/src/plugins/flowprobe/flowprobe_plugin_doc.rst
@@ -0,0 +1,18 @@
+IPFIX flow record plugin
+========================
+
+Introduction
+------------
+
+This plugin generates ipfix flow records on interfaces which have the
+feature enabled
+
+Sample configuration
+--------------------
+
+::
+
+ set ipfix exporter collector 192.168.6.2 src 192.168.6.1 template-interval 20 port 4739 path-mtu 1450
+
+ flowprobe params record l3 active 20 passive 120
+ flowprobe feature add-del GigabitEthernet2/3/0 l2
diff --git a/src/plugins/flowprobe/flowprobe_test.c b/src/plugins/flowprobe/flowprobe_test.c
index a694e45ae9b..37b91207e29 100644
--- a/src/plugins/flowprobe/flowprobe_test.c
+++ b/src/plugins/flowprobe/flowprobe_test.c
@@ -93,6 +93,136 @@ api_flowprobe_tx_interface_add_del (vat_main_t * vam)
}
static int
+api_flowprobe_interface_add_del (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ int enable_disable = 1;
+ u8 which = FLOWPROBE_WHICH_IP4;
+ u8 direction = FLOWPROBE_DIRECTION_TX;
+ u32 sw_if_index = ~0;
+ vl_api_flowprobe_interface_add_del_t *mp;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "disable"))
+ enable_disable = 0;
+ else if (unformat (i, "ip4"))
+ which = FLOWPROBE_WHICH_IP4;
+ else if (unformat (i, "ip6"))
+ which = FLOWPROBE_WHICH_IP6;
+ else if (unformat (i, "l2"))
+ which = FLOWPROBE_WHICH_L2;
+ else if (unformat (i, "rx"))
+ direction = FLOWPROBE_DIRECTION_RX;
+ else if (unformat (i, "tx"))
+ direction = FLOWPROBE_DIRECTION_TX;
+ else if (unformat (i, "both"))
+ direction = FLOWPROBE_DIRECTION_BOTH;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("Missing interface name / explicit sw_if_index number\n");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (FLOWPROBE_INTERFACE_ADD_DEL, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_add = enable_disable;
+ mp->which = which;
+ mp->direction = direction;
+
+ /* Send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_flowprobe_interface_dump (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_flowprobe_interface_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u32 sw_if_index = ~0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%d", &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M (FLOWPROBE_INTERFACE_DUMP, mp);
+ mp->sw_if_index = htonl (sw_if_index);
+
+ /* Send it... */
+ S (mp);
+
+ /* Use control ping for synchronization */
+ PING (&flowprobe_test_main, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_flowprobe_interface_details_t_handler (
+ vl_api_flowprobe_interface_details_t *mp)
+{
+ vat_main_t *vam = flowprobe_test_main.vat_main;
+ u32 sw_if_index;
+ u8 which;
+ u8 direction;
+ u8 *out = 0;
+ const char *variants[] = {
+ [FLOWPROBE_WHICH_IP4] = "ip4",
+ [FLOWPROBE_WHICH_IP6] = "ip6",
+ [FLOWPROBE_WHICH_L2] = "l2",
+ "Erroneous variant",
+ };
+ const char *directions[] = {
+ [FLOWPROBE_DIRECTION_RX] = "rx",
+ [FLOWPROBE_DIRECTION_TX] = "tx",
+ [FLOWPROBE_DIRECTION_BOTH] = "rx tx",
+ "Erroneous direction",
+ };
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ which = mp->which;
+ if (which > ARRAY_LEN (variants) - 2)
+ which = ARRAY_LEN (variants) - 1;
+
+ direction = mp->direction;
+ if (direction > ARRAY_LEN (directions) - 2)
+ direction = ARRAY_LEN (directions) - 1;
+
+ out = format (0, "sw_if_index: %u, variant: %s, direction: %s\n%c",
+ sw_if_index, variants[which], directions[direction], 0);
+
+ fformat (vam->ofp, (char *) out);
+ vec_free (out);
+}
+
+static int
api_flowprobe_params (vat_main_t * vam)
{
unformat_input_t *i = vam->input;
@@ -145,6 +275,94 @@ api_flowprobe_params (vat_main_t * vam)
return ret;
}
+static int
+api_flowprobe_set_params (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_flowprobe_set_params_t *mp;
+ u32 active_timer = ~0;
+ u32 passive_timer = ~0;
+ u8 record_flags = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "active %d", &active_timer))
+ ;
+ else if (unformat (i, "passive %d", &passive_timer))
+ ;
+ else if (unformat (i, "record"))
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "l2"))
+ record_flags |= FLOWPROBE_RECORD_FLAG_L2;
+ else if (unformat (i, "l3"))
+ record_flags |= FLOWPROBE_RECORD_FLAG_L3;
+ else if (unformat (i, "l4"))
+ record_flags |= FLOWPROBE_RECORD_FLAG_L4;
+ else
+ break;
+ }
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M (FLOWPROBE_SET_PARAMS, mp);
+ mp->record_flags = record_flags;
+ mp->active_timer = ntohl (active_timer);
+ mp->passive_timer = ntohl (passive_timer);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+
+ return ret;
+}
+
+static int
+api_flowprobe_get_params (vat_main_t *vam)
+{
+ vl_api_flowprobe_get_params_t *mp;
+ int ret;
+
+ /* Construct the API message */
+ M (FLOWPROBE_GET_PARAMS, mp);
+
+ /* Send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_flowprobe_get_params_reply_t_handler (
+ vl_api_flowprobe_get_params_reply_t *mp)
+{
+ vat_main_t *vam = flowprobe_test_main.vat_main;
+ u8 *out = 0;
+
+ out =
+ format (0, "active: %u, passive: %u, record:", ntohl (mp->active_timer),
+ ntohl (mp->passive_timer));
+
+ if (mp->record_flags & FLOWPROBE_RECORD_FLAG_L2)
+ out = format (out, " l2");
+ if (mp->record_flags & FLOWPROBE_RECORD_FLAG_L3)
+ out = format (out, " l3");
+ if (mp->record_flags & FLOWPROBE_RECORD_FLAG_L4)
+ out = format (out, " l4");
+
+ out = format (out, "\n%c", 0);
+ fformat (vam->ofp, (char *) out);
+ vec_free (out);
+ vam->result_ready = 1;
+}
+
/*
* List of messages that the api test plugin sends,
* and that the data plane plugin processes
diff --git a/src/plugins/flowprobe/node.c b/src/plugins/flowprobe/node.c
index e7a39a7ed7e..03511689dda 100644
--- a/src/plugins/flowprobe/node.c
+++ b/src/plugins/flowprobe/node.c
@@ -17,6 +17,7 @@
#include <vlib/vlib.h>
#include <vnet/vnet.h>
#include <vppinfra/crc32.h>
+#include <vppinfra/xxhash.h>
#include <vppinfra/error.h>
#include <flowprobe/flowprobe.h>
#include <vnet/ip/ip6_packet.h>
@@ -98,9 +99,15 @@ format_flowprobe_trace (u8 * s, va_list * args)
return s;
}
-vlib_node_registration_t flowprobe_ip4_node;
-vlib_node_registration_t flowprobe_ip6_node;
-vlib_node_registration_t flowprobe_l2_node;
+vlib_node_registration_t flowprobe_input_ip4_node;
+vlib_node_registration_t flowprobe_input_ip6_node;
+vlib_node_registration_t flowprobe_input_l2_node;
+vlib_node_registration_t flowprobe_output_ip4_node;
+vlib_node_registration_t flowprobe_output_ip6_node;
+vlib_node_registration_t flowprobe_output_l2_node;
+vlib_node_registration_t flowprobe_flush_ip4_node;
+vlib_node_registration_t flowprobe_flush_ip6_node;
+vlib_node_registration_t flowprobe_flush_l2_node;
/* No counters at the moment */
#define foreach_flowprobe_error \
@@ -166,6 +173,11 @@ flowprobe_common_add (vlib_buffer_t * to_b, flowprobe_entry_t * e, u16 offset)
clib_memcpy_fast (to_b->data + offset, &tx_if, sizeof (tx_if));
offset += sizeof (tx_if);
+ /* Flow direction
+ 0x00: ingress flow
+ 0x01: egress flow */
+ to_b->data[offset++] = (e->key.direction == FLOW_DIRECTION_TX);
+
/* packet delta count */
u64 packetdelta = clib_host_to_net_u64 (e->packetcount);
clib_memcpy_fast (to_b->data + offset, &packetdelta, sizeof (u64));
@@ -357,25 +369,30 @@ flowprobe_create (u32 my_cpu_number, flowprobe_key_t * k, u32 * poolindex)
}
static inline void
-add_to_flow_record_state (vlib_main_t * vm, vlib_node_runtime_t * node,
- flowprobe_main_t * fm, vlib_buffer_t * b,
+add_to_flow_record_state (vlib_main_t *vm, vlib_node_runtime_t *node,
+ flowprobe_main_t *fm, vlib_buffer_t *b,
timestamp_nsec_t timestamp, u16 length,
- flowprobe_variant_t which, flowprobe_trace_t * t)
+ flowprobe_variant_t which,
+ flowprobe_direction_t direction,
+ flowprobe_trace_t *t)
{
if (fm->disabled)
return;
+ ASSERT (direction == FLOW_DIRECTION_RX || direction == FLOW_DIRECTION_TX);
+
u32 my_cpu_number = vm->thread_index;
u16 octets = 0;
flowprobe_record_t flags = fm->context[which].flags;
bool collect_ip4 = false, collect_ip6 = false;
ASSERT (b);
- ethernet_header_t *eth = vlib_buffer_get_current (b);
+ ethernet_header_t *eth = (direction == FLOW_DIRECTION_TX) ?
+ vlib_buffer_get_current (b) :
+ ethernet_buffer_get_header (b);
u16 ethertype = clib_net_to_host_u16 (eth->type);
- /* *INDENT-OFF* */
+ i16 l3_hdr_offset = (u8 *) eth - b->data + sizeof (ethernet_header_t);
flowprobe_key_t k = {};
- /* *INDENT-ON* */
ip4_header_t *ip4 = 0;
ip6_header_t *ip6 = 0;
udp_header_t *udp = 0;
@@ -392,6 +409,7 @@ add_to_flow_record_state (vlib_main_t * vm, vlib_node_runtime_t * node,
k.tx_sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX];
k.which = which;
+ k.direction = direction;
if (flags & FLOW_RECORD_L2)
{
@@ -399,9 +417,22 @@ add_to_flow_record_state (vlib_main_t * vm, vlib_node_runtime_t * node,
clib_memcpy_fast (k.dst_mac, eth->dst_address, 6);
k.ethertype = ethertype;
}
+ if (ethertype == ETHERNET_TYPE_VLAN)
+ {
+ /*VLAN TAG*/
+ ethernet_vlan_header_tv_t *ethv =
+ (ethernet_vlan_header_tv_t *) (&(eth->type));
+ /*Q in Q possibility */
+ while (clib_net_to_host_u16 (ethv->type) == ETHERNET_TYPE_VLAN)
+ {
+ ethv++;
+ l3_hdr_offset += sizeof (ethernet_vlan_header_tv_t);
+ }
+ k.ethertype = ethertype = clib_net_to_host_u16 ((ethv)->type);
+ }
if (collect_ip6 && ethertype == ETHERNET_TYPE_IP6)
{
- ip6 = (ip6_header_t *) (eth + 1);
+ ip6 = (ip6_header_t *) (b->data + l3_hdr_offset);
if (flags & FLOW_RECORD_L3)
{
k.src_address.as_u64[0] = ip6->src_address.as_u64[0];
@@ -420,7 +451,7 @@ add_to_flow_record_state (vlib_main_t * vm, vlib_node_runtime_t * node,
}
if (collect_ip4 && ethertype == ETHERNET_TYPE_IP4)
{
- ip4 = (ip4_header_t *) (eth + 1);
+ ip4 = (ip4_header_t *) (b->data + l3_hdr_offset);
if (flags & FLOW_RECORD_L3)
{
k.src_address.ip4.as_u32 = ip4->src_address.as_u32;
@@ -520,6 +551,7 @@ flowprobe_export_send (vlib_main_t * vm, vlib_buffer_t * b0,
{
flowprobe_main_t *fm = &flowprobe_main;
flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
vlib_frame_t *f;
ip4_ipfix_template_packet_t *tp;
ipfix_set_header_t *s;
@@ -537,19 +569,19 @@ flowprobe_export_send (vlib_main_t * vm, vlib_buffer_t * b0,
flowprobe_get_headersize ())
return;
- u32 i, index = vec_len (frm->streams);
+ u32 i, index = vec_len (exp->streams);
for (i = 0; i < index; i++)
- if (frm->streams[i].domain_id == 1)
+ if (exp->streams[i].domain_id == 1)
{
index = i;
break;
}
- if (i == vec_len (frm->streams))
+ if (i == vec_len (exp->streams))
{
- vec_validate (frm->streams, index);
- frm->streams[index].domain_id = 1;
+ vec_validate (exp->streams, index);
+ exp->streams[index].domain_id = 1;
}
- stream = &frm->streams[index];
+ stream = &exp->streams[index];
tp = vlib_buffer_get_current (b0);
ip = (ip4_header_t *) & tp->ip4;
@@ -561,16 +593,15 @@ flowprobe_export_send (vlib_main_t * vm, vlib_buffer_t * b0,
ip->ttl = 254;
ip->protocol = IP_PROTOCOL_UDP;
ip->flags_and_fragment_offset = 0;
- ip->src_address.as_u32 = frm->src_address.as_u32;
- ip->dst_address.as_u32 = frm->ipfix_collector.as_u32;
+ ip->src_address.as_u32 = exp->src_address.ip.ip4.as_u32;
+ ip->dst_address.as_u32 = exp->ipfix_collector.ip.ip4.as_u32;
udp->src_port = clib_host_to_net_u16 (stream->src_port);
- udp->dst_port = clib_host_to_net_u16 (frm->collector_port);
+ udp->dst_port = clib_host_to_net_u16 (exp->collector_port);
udp->checksum = 0;
/* FIXUP: message header export_time */
- h->export_time = (u32)
- (((f64) frm->unix_time_0) +
- (vlib_time_now (frm->vlib_main) - frm->vlib_time_0));
+ h->export_time =
+ (u32) (((f64) frm->unix_time_0) + (vlib_time_now (vm) - frm->vlib_time_0));
h->export_time = clib_host_to_net_u32 (h->export_time);
h->domain_id = clib_host_to_net_u32 (stream->domain_id);
@@ -590,7 +621,7 @@ flowprobe_export_send (vlib_main_t * vm, vlib_buffer_t * b0,
ip->checksum = ip4_header_checksum (ip);
udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
- if (frm->udp_checksum)
+ if (exp->udp_checksum)
{
/* RFC 7011 section 10.3.2. */
udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
@@ -616,7 +647,7 @@ flowprobe_export_send (vlib_main_t * vm, vlib_buffer_t * b0,
}
vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
- vlib_node_increment_counter (vm, flowprobe_l2_node.index,
+ vlib_node_increment_counter (vm, flowprobe_output_l2_node.index,
FLOWPROBE_ERROR_EXPORTED_PACKETS, 1);
fm->context[which].frames_per_worker[my_cpu_number] = 0;
@@ -629,7 +660,7 @@ static vlib_buffer_t *
flowprobe_get_buffer (vlib_main_t * vm, flowprobe_variant_t which)
{
flowprobe_main_t *fm = &flowprobe_main;
- flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = pool_elt_at_index (flow_report_main.exporters, 0);
vlib_buffer_t *b0;
u32 bi0;
u32 my_cpu_number = vm->thread_index;
@@ -642,7 +673,7 @@ flowprobe_get_buffer (vlib_main_t * vm, flowprobe_variant_t which)
{
if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
{
- vlib_node_increment_counter (vm, flowprobe_l2_node.index,
+ vlib_node_increment_counter (vm, flowprobe_output_l2_node.index,
FLOWPROBE_ERROR_BUFFER, 1);
return 0;
}
@@ -656,7 +687,7 @@ flowprobe_get_buffer (vlib_main_t * vm, flowprobe_variant_t which)
b0->flags |=
(VLIB_BUFFER_TOTAL_LENGTH_VALID | VNET_BUFFER_F_FLOW_REPORT);
vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = frm->fib_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = exp->fib_index;
fm->context[which].next_record_offset_per_worker[my_cpu_number] =
b0->current_length;
}
@@ -669,9 +700,10 @@ flowprobe_export_entry (vlib_main_t * vm, flowprobe_entry_t * e)
{
u32 my_cpu_number = vm->thread_index;
flowprobe_main_t *fm = &flowprobe_main;
- flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = pool_elt_at_index (flow_report_main.exporters, 0);
vlib_buffer_t *b0;
bool collect_ip4 = false, collect_ip6 = false;
+ bool collect_l4 = false;
flowprobe_variant_t which = e->key.which;
flowprobe_record_t flags = fm->context[which].flags;
u16 offset =
@@ -690,6 +722,10 @@ flowprobe_export_entry (vlib_main_t * vm, flowprobe_entry_t * e)
collect_ip4 = which == FLOW_VARIANT_L2_IP4 || which == FLOW_VARIANT_IP4;
collect_ip6 = which == FLOW_VARIANT_L2_IP6 || which == FLOW_VARIANT_IP6;
}
+ if (flags & FLOW_RECORD_L4)
+ {
+ collect_l4 = (which != FLOW_VARIANT_L2);
+ }
offset += flowprobe_common_add (b0, e, offset);
@@ -699,26 +735,27 @@ flowprobe_export_entry (vlib_main_t * vm, flowprobe_entry_t * e)
offset += flowprobe_l3_ip6_add (b0, e, offset);
if (collect_ip4)
offset += flowprobe_l3_ip4_add (b0, e, offset);
- if (flags & FLOW_RECORD_L4)
+ if (collect_l4)
offset += flowprobe_l4_add (b0, e, offset);
/* Reset per flow-export counters */
e->packetcount = 0;
e->octetcount = 0;
e->last_exported = vlib_time_now (vm);
+ e->prot.tcp.flags = 0;
b0->current_length = offset;
fm->context[which].next_record_offset_per_worker[my_cpu_number] = offset;
/* Time to flush the buffer? */
- if (offset + fm->template_size[flags] > frm->path_mtu)
+ if (offset + fm->template_size[flags] > exp->path_mtu)
flowprobe_export_send (vm, b0, which);
}
uword
-flowprobe_node_fn (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame,
- flowprobe_variant_t which)
+flowprobe_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, flowprobe_variant_t which,
+ flowprobe_direction_t direction)
{
u32 n_left_from, *from, *to_next;
flowprobe_next_t next_index;
@@ -778,20 +815,22 @@ flowprobe_node_fn (vlib_main_t * vm,
u16 ethertype0 = clib_net_to_host_u16 (eh0->type);
if (PREDICT_TRUE ((b0->flags & VNET_BUFFER_F_FLOW_REPORT) == 0))
- add_to_flow_record_state (vm, node, fm, b0, timestamp, len0,
- flowprobe_get_variant
- (which, fm->context[which].flags,
- ethertype0), 0);
+ add_to_flow_record_state (
+ vm, node, fm, b0, timestamp, len0,
+ flowprobe_get_variant (which, fm->context[which].flags,
+ ethertype0),
+ direction, 0);
len1 = vlib_buffer_length_in_chain (vm, b1);
ethernet_header_t *eh1 = vlib_buffer_get_current (b1);
u16 ethertype1 = clib_net_to_host_u16 (eh1->type);
if (PREDICT_TRUE ((b1->flags & VNET_BUFFER_F_FLOW_REPORT) == 0))
- add_to_flow_record_state (vm, node, fm, b1, timestamp, len1,
- flowprobe_get_variant
- (which, fm->context[which].flags,
- ethertype1), 0);
+ add_to_flow_record_state (
+ vm, node, fm, b1, timestamp, len1,
+ flowprobe_get_variant (which, fm->context[which].flags,
+ ethertype1),
+ direction, 0);
/* verify speculative enqueues, maybe switch current next frame */
vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
@@ -829,10 +868,11 @@ flowprobe_node_fn (vlib_main_t * vm,
&& (b0->flags & VLIB_BUFFER_IS_TRACED)))
t = vlib_add_trace (vm, node, b0, sizeof (*t));
- add_to_flow_record_state (vm, node, fm, b0, timestamp, len0,
- flowprobe_get_variant
- (which, fm->context[which].flags,
- ethertype0), t);
+ add_to_flow_record_state (
+ vm, node, fm, b0, timestamp, len0,
+ flowprobe_get_variant (which, fm->context[which].flags,
+ ethertype0),
+ direction, t);
}
/* verify speculative enqueue, maybe switch current next frame */
@@ -847,24 +887,51 @@ flowprobe_node_fn (vlib_main_t * vm,
}
static uword
-flowprobe_ip4_node_fn (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
+flowprobe_input_ip4_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_IP4,
+ FLOW_DIRECTION_RX);
+}
+
+static uword
+flowprobe_input_ip6_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_IP6,
+ FLOW_DIRECTION_RX);
+}
+
+static uword
+flowprobe_input_l2_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_L2,
+ FLOW_DIRECTION_RX);
+}
+
+static uword
+flowprobe_output_ip4_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
{
- return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_IP4);
+ return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_IP4,
+ FLOW_DIRECTION_TX);
}
static uword
-flowprobe_ip6_node_fn (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
+flowprobe_output_ip6_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
{
- return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_IP6);
+ return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_IP6,
+ FLOW_DIRECTION_TX);
}
static uword
-flowprobe_l2_node_fn (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
+flowprobe_output_l2_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
{
- return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_L2);
+ return flowprobe_node_fn (vm, node, frame, FLOW_VARIANT_L2,
+ FLOW_DIRECTION_TX);
}
static inline void
@@ -879,25 +946,63 @@ flush_record (flowprobe_variant_t which)
void
flowprobe_flush_callback_ip4 (void)
{
+ vlib_main_t *worker_vm;
+ u32 i;
+
+ /* Flush for each worker thread */
+ for (i = 1; i < vlib_get_n_threads (); i++)
+ {
+ worker_vm = vlib_get_main_by_index (i);
+ if (worker_vm)
+ vlib_node_set_interrupt_pending (worker_vm,
+ flowprobe_flush_ip4_node.index);
+ }
+
+ /* Flush for the main thread */
flush_record (FLOW_VARIANT_IP4);
}
void
flowprobe_flush_callback_ip6 (void)
{
+ vlib_main_t *worker_vm;
+ u32 i;
+
+ /* Flush for each worker thread */
+ for (i = 1; i < vlib_get_n_threads (); i++)
+ {
+ worker_vm = vlib_get_main_by_index (i);
+ if (worker_vm)
+ vlib_node_set_interrupt_pending (worker_vm,
+ flowprobe_flush_ip6_node.index);
+ }
+
+ /* Flush for the main thread */
flush_record (FLOW_VARIANT_IP6);
}
void
flowprobe_flush_callback_l2 (void)
{
+ vlib_main_t *worker_vm;
+ u32 i;
+
+ /* Flush for each worker thread */
+ for (i = 1; i < vlib_get_n_threads (); i++)
+ {
+ worker_vm = vlib_get_main_by_index (i);
+ if (worker_vm)
+ vlib_node_set_interrupt_pending (worker_vm,
+ flowprobe_flush_l2_node.index);
+ }
+
+ /* Flush for the main thread */
flush_record (FLOW_VARIANT_L2);
flush_record (FLOW_VARIANT_L2_IP4);
flush_record (FLOW_VARIANT_L2_IP6);
}
-
-static void
+void
flowprobe_delete_by_index (u32 my_cpu_number, u32 poolindex)
{
flowprobe_main_t *fm = &flowprobe_main;
@@ -922,14 +1027,15 @@ flowprobe_walker_process (vlib_main_t * vm,
vlib_node_runtime_t * rt, vlib_frame_t * f)
{
flowprobe_main_t *fm = &flowprobe_main;
- flow_report_main_t *frm = &flow_report_main;
flowprobe_entry_t *e;
+ ipfix_exporter_t *exp = pool_elt_at_index (flow_report_main.exporters, 0);
/*
* $$$$ Remove this check from here and track FRM status and disable
* this process if required.
*/
- if (frm->ipfix_collector.as_u32 == 0 || frm->src_address.as_u32 == 0)
+ if (ip_address_is_zero (&exp->ipfix_collector) ||
+ ip_address_is_zero (&exp->src_address))
{
fm->disabled = true;
return 0;
@@ -996,36 +1102,94 @@ flowprobe_walker_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (flowprobe_ip4_node) = {
- .function = flowprobe_ip4_node_fn,
- .name = "flowprobe-ip4",
+static uword
+flowprobe_flush_ip4 (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
+{
+ flush_record (FLOW_VARIANT_IP4);
+
+ return 0;
+}
+
+static uword
+flowprobe_flush_ip6 (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
+{
+ flush_record (FLOW_VARIANT_IP6);
+
+ return 0;
+}
+
+static uword
+flowprobe_flush_l2 (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
+{
+ flush_record (FLOW_VARIANT_L2);
+ flush_record (FLOW_VARIANT_L2_IP4);
+ flush_record (FLOW_VARIANT_L2_IP6);
+
+ return 0;
+}
+
+VLIB_REGISTER_NODE (flowprobe_input_ip4_node) = {
+ .function = flowprobe_input_ip4_node_fn,
+ .name = "flowprobe-input-ip4",
+ .vector_size = sizeof (u32),
+ .format_trace = format_flowprobe_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (flowprobe_error_strings),
+ .error_strings = flowprobe_error_strings,
+ .n_next_nodes = FLOWPROBE_N_NEXT,
+ .next_nodes = FLOWPROBE_NEXT_NODES,
+};
+VLIB_REGISTER_NODE (flowprobe_input_ip6_node) = {
+ .function = flowprobe_input_ip6_node_fn,
+ .name = "flowprobe-input-ip6",
.vector_size = sizeof (u32),
.format_trace = format_flowprobe_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(flowprobe_error_strings),
+ .n_errors = ARRAY_LEN (flowprobe_error_strings),
.error_strings = flowprobe_error_strings,
.n_next_nodes = FLOWPROBE_N_NEXT,
.next_nodes = FLOWPROBE_NEXT_NODES,
};
-VLIB_REGISTER_NODE (flowprobe_ip6_node) = {
- .function = flowprobe_ip6_node_fn,
- .name = "flowprobe-ip6",
+VLIB_REGISTER_NODE (flowprobe_input_l2_node) = {
+ .function = flowprobe_input_l2_node_fn,
+ .name = "flowprobe-input-l2",
.vector_size = sizeof (u32),
.format_trace = format_flowprobe_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(flowprobe_error_strings),
+ .n_errors = ARRAY_LEN (flowprobe_error_strings),
.error_strings = flowprobe_error_strings,
.n_next_nodes = FLOWPROBE_N_NEXT,
.next_nodes = FLOWPROBE_NEXT_NODES,
};
-VLIB_REGISTER_NODE (flowprobe_l2_node) = {
- .function = flowprobe_l2_node_fn,
- .name = "flowprobe-l2",
+VLIB_REGISTER_NODE (flowprobe_output_ip4_node) = {
+ .function = flowprobe_output_ip4_node_fn,
+ .name = "flowprobe-output-ip4",
.vector_size = sizeof (u32),
.format_trace = format_flowprobe_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(flowprobe_error_strings),
+ .n_errors = ARRAY_LEN (flowprobe_error_strings),
+ .error_strings = flowprobe_error_strings,
+ .n_next_nodes = FLOWPROBE_N_NEXT,
+ .next_nodes = FLOWPROBE_NEXT_NODES,
+};
+VLIB_REGISTER_NODE (flowprobe_output_ip6_node) = {
+ .function = flowprobe_output_ip6_node_fn,
+ .name = "flowprobe-output-ip6",
+ .vector_size = sizeof (u32),
+ .format_trace = format_flowprobe_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (flowprobe_error_strings),
+ .error_strings = flowprobe_error_strings,
+ .n_next_nodes = FLOWPROBE_N_NEXT,
+ .next_nodes = FLOWPROBE_NEXT_NODES,
+};
+VLIB_REGISTER_NODE (flowprobe_output_l2_node) = {
+ .function = flowprobe_output_l2_node_fn,
+ .name = "flowprobe-output-l2",
+ .vector_size = sizeof (u32),
+ .format_trace = format_flowprobe_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (flowprobe_error_strings),
.error_strings = flowprobe_error_strings,
.n_next_nodes = FLOWPROBE_N_NEXT,
.next_nodes = FLOWPROBE_NEXT_NODES,
@@ -1036,7 +1200,24 @@ VLIB_REGISTER_NODE (flowprobe_walker_node) = {
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_INTERRUPT,
};
-/* *INDENT-ON* */
+VLIB_REGISTER_NODE (flowprobe_flush_ip4_node) = {
+ .function = flowprobe_flush_ip4,
+ .name = "flowprobe-flush-ip4",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+};
+VLIB_REGISTER_NODE (flowprobe_flush_ip6_node) = {
+ .function = flowprobe_flush_ip6,
+ .name = "flowprobe-flush-ip6",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+};
+VLIB_REGISTER_NODE (flowprobe_flush_l2_node) = {
+ .function = flowprobe_flush_l2,
+ .name = "flowprobe-flush-l2",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+};
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/gbp/gbp.api b/src/plugins/gbp/gbp.api
deleted file mode 100644
index 525e70536bd..00000000000
--- a/src/plugins/gbp/gbp.api
+++ /dev/null
@@ -1,470 +0,0 @@
-/* Hey Emacs use -*- mode: C -*- */
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-option version = "2.0.0";
-
-import "vnet/ip/ip_types.api";
-import "vnet/ethernet/ethernet_types.api";
-import "vnet/interface_types.api";
-
-enum gbp_bridge_domain_flags
-{
- GBP_BD_API_FLAG_NONE = 0,
- GBP_BD_API_FLAG_DO_NOT_LEARN = 1,
- GBP_BD_API_FLAG_UU_FWD_DROP = 2,
- GBP_BD_API_FLAG_MCAST_DROP = 4,
- GBP_BD_API_FLAG_UCAST_ARP = 8,
-};
-
-typedef gbp_bridge_domain
-{
- u32 bd_id;
- u32 rd_id;
- vl_api_gbp_bridge_domain_flags_t flags;
- vl_api_interface_index_t bvi_sw_if_index;
- vl_api_interface_index_t uu_fwd_sw_if_index;
- vl_api_interface_index_t bm_flood_sw_if_index;
-};
-
- autoreply define gbp_bridge_domain_add
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- vl_api_gbp_bridge_domain_t bd;
-};
- autoreply define gbp_bridge_domain_del
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- u32 bd_id;
-};
-autoreply define gbp_bridge_domain_dump
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
-};
-define gbp_bridge_domain_details
-{
- option status="in_progress";
- u32 context;
- vl_api_gbp_bridge_domain_t bd;
-};
-
-typedef u16 gbp_scope;
-
-typedef gbp_route_domain
-{
- u32 rd_id;
- u32 ip4_table_id;
- u32 ip6_table_id;
- vl_api_interface_index_t ip4_uu_sw_if_index;
- vl_api_interface_index_t ip6_uu_sw_if_index;
- vl_api_gbp_scope_t scope;
-};
-
- autoreply define gbp_route_domain_add
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- vl_api_gbp_route_domain_t rd;
-};
- autoreply define gbp_route_domain_del
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- u32 rd_id;
-};
-autoreply define gbp_route_domain_dump
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
-};
-define gbp_route_domain_details
-{
- option status="in_progress";
- u32 context;
- vl_api_gbp_route_domain_t rd;
-};
-
-/** \brief Endpoint
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
-*/
-
-enum gbp_endpoint_flags
-{
- GBP_API_ENDPOINT_FLAG_NONE = 0,
- GBP_API_ENDPOINT_FLAG_BOUNCE = 0x1,
- GBP_API_ENDPOINT_FLAG_REMOTE = 0x2,
- GBP_API_ENDPOINT_FLAG_LEARNT = 0x4,
- GBP_API_ENDPOINT_FLAG_EXTERNAL = 0x8,
-};
-
-typedef gbp_endpoint_tun
-{
- vl_api_address_t src;
- vl_api_address_t dst;
-};
-
-typedef gbp_endpoint
-{
- vl_api_interface_index_t sw_if_index;
- u16 sclass;
- vl_api_gbp_endpoint_flags_t flags;
- vl_api_mac_address_t mac;
- vl_api_gbp_endpoint_tun_t tun;
- u8 n_ips;
- vl_api_address_t ips[n_ips];
-};
-
- define gbp_endpoint_add
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- vl_api_gbp_endpoint_t endpoint;
-};
-
-define gbp_endpoint_add_reply
-{
- option status="in_progress";
- u32 context;
- i32 retval;
- u32 handle;
-};
-
- autoreply define gbp_endpoint_del
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- u32 handle;
-};
-
-define gbp_endpoint_dump
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
-};
-
-define gbp_endpoint_details
-{
- option status="in_progress";
- u32 context;
- f64 age;
- u32 handle;
- vl_api_gbp_endpoint_t endpoint;
-};
-
-typedef gbp_endpoint_retention
-{
- u32 remote_ep_timeout;
-};
-
-typedef gbp_endpoint_group
-{
- u32 vnid;
- u16 sclass;
- u32 bd_id;
- u32 rd_id;
- vl_api_interface_index_t uplink_sw_if_index;
- vl_api_gbp_endpoint_retention_t retention;
-};
-
- autoreply define gbp_endpoint_group_add
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- vl_api_gbp_endpoint_group_t epg;
-};
- autoreply define gbp_endpoint_group_del
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- u16 sclass;
-};
-
-define gbp_endpoint_group_dump
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
-};
-
-define gbp_endpoint_group_details
-{
- option status="in_progress";
- u32 context;
- vl_api_gbp_endpoint_group_t epg;
-};
-
-typedef gbp_recirc
-{
- vl_api_interface_index_t sw_if_index;
- u16 sclass;
- bool is_ext;
-};
-
- autoreply define gbp_recirc_add_del
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- bool is_add;
- vl_api_gbp_recirc_t recirc;
-};
-
-define gbp_recirc_dump
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
-};
-
-define gbp_recirc_details
-{
- option status="in_progress";
- u32 context;
- vl_api_gbp_recirc_t recirc;
-};
-
-enum gbp_subnet_type
-{
- GBP_API_SUBNET_TRANSPORT,
- GBP_API_SUBNET_STITCHED_INTERNAL,
- GBP_API_SUBNET_STITCHED_EXTERNAL,
- GBP_API_SUBNET_L3_OUT,
- GBP_API_SUBNET_ANON_L3_OUT,
-};
-
-typedef gbp_subnet
-{
- u32 rd_id;
- vl_api_interface_index_t sw_if_index [default= 0xffffffff];
- u16 sclass [default=0xffffffff];
- vl_api_gbp_subnet_type_t type;
- vl_api_prefix_t prefix;
-};
-
- autoreply define gbp_subnet_add_del
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- bool is_add;
- vl_api_gbp_subnet_t subnet;
-};
-
-define gbp_subnet_dump
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
-};
-
-define gbp_subnet_details
-{
- option status="in_progress";
- u32 context;
- vl_api_gbp_subnet_t subnet;
-};
-
-typedef gbp_next_hop
-{
- vl_api_address_t ip;
- vl_api_mac_address_t mac;
- u32 bd_id;
- u32 rd_id;
-};
-
-enum gbp_hash_mode
-{
- GBP_API_HASH_MODE_SRC_IP,
- GBP_API_HASH_MODE_DST_IP,
- GBP_API_HASH_MODE_SYMMETRIC,
-};
-
-typedef gbp_next_hop_set
-{
- vl_api_gbp_hash_mode_t hash_mode;
- u8 n_nhs;
- vl_api_gbp_next_hop_t nhs[8];
-};
-
-enum gbp_rule_action
-{
- GBP_API_RULE_PERMIT,
- GBP_API_RULE_DENY,
- GBP_API_RULE_REDIRECT,
-};
-
-typedef gbp_rule
-{
- vl_api_gbp_rule_action_t action;
- vl_api_gbp_next_hop_set_t nh_set;
-};
-
-typedef gbp_contract
-{
- vl_api_gbp_scope_t scope;
- u16 sclass;
- u16 dclass;
- u32 acl_index;
- u8 n_ether_types;
- u16 allowed_ethertypes[16];
- u8 n_rules;
- vl_api_gbp_rule_t rules[n_rules];
-};
-
- define gbp_contract_add_del
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- bool is_add;
- vl_api_gbp_contract_t contract;
-};
-define gbp_contract_add_del_reply
-{
- option status="in_progress";
- u32 context;
- i32 retval;
- u32 stats_index;
-};
-
-define gbp_contract_dump
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
-};
-
-define gbp_contract_details
-{
- option status="in_progress";
- u32 context;
- vl_api_gbp_contract_t contract;
-};
-
-/**
- * @brief Configure a 'base' tunnel from which learned tunnels
- * are permitted to derive
- * A base tunnel consists only of the VNI, any src,dst IP
- * pair is thus allowed.
- */
-enum gbp_vxlan_tunnel_mode
-{
- GBP_VXLAN_TUNNEL_MODE_L2,
- GBP_VXLAN_TUNNEL_MODE_L3,
-};
-
-typedef gbp_vxlan_tunnel
-{
- u32 vni;
- vl_api_gbp_vxlan_tunnel_mode_t mode;
- u32 bd_rd_id;
- vl_api_ip4_address_t src;
-};
-
- define gbp_vxlan_tunnel_add
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- vl_api_gbp_vxlan_tunnel_t tunnel;
-};
-
-define gbp_vxlan_tunnel_add_reply
-{
- option status="in_progress";
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-
- autoreply define gbp_vxlan_tunnel_del
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- u32 vni;
-};
-
-define gbp_vxlan_tunnel_dump
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
-};
-
-define gbp_vxlan_tunnel_details
-{
- option status="in_progress";
- u32 context;
- vl_api_gbp_vxlan_tunnel_t tunnel;
-};
-
-enum gbp_ext_itf_flags
-{
- GBP_API_EXT_ITF_F_NONE = 0,
- GBP_API_EXT_ITF_F_ANON = 1,
-};
-
-typedef gbp_ext_itf
-{
- vl_api_interface_index_t sw_if_index;
- u32 bd_id;
- u32 rd_id;
- vl_api_gbp_ext_itf_flags_t flags;
-};
-
- autoreply define gbp_ext_itf_add_del
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
- bool is_add;
- vl_api_gbp_ext_itf_t ext_itf;
-};
-
-define gbp_ext_itf_dump
-{
- option status="in_progress";
- u32 client_index;
- u32 context;
-};
-
-define gbp_ext_itf_details
-{
- option status="in_progress";
- u32 context;
- vl_api_gbp_ext_itf_t ext_itf;
-};
-
-/*
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp.h b/src/plugins/gbp/gbp.h
deleted file mode 100644
index 50039b3bdcf..00000000000
--- a/src/plugins/gbp/gbp.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Group Base Policy (GBP) defines:
- * - endpoints: typically a VM or container that is connected to the
- * virtual switch/router (i.e. to VPP)
- * - endpoint-group: (EPG) a collection of endpoints
- * - policy: rules determining which traffic can pass between EPGs a.k.a
- * a 'contract'
- *
- * Here, policy is implemented via an ACL.
- * EPG classification for transit packets is determined by:
- * - source EPG: from the packet's input interface
- * - destination EPG: from the packet's destination IP address.
- *
- */
-
-#ifndef __GBP_H__
-#define __GBP_H__
-
-#include <plugins/acl/exports.h>
-
-#include <plugins/gbp/gbp_types.h>
-#include <plugins/gbp/gbp_endpoint.h>
-#include <plugins/gbp/gbp_endpoint_group.h>
-#include <plugins/gbp/gbp_subnet.h>
-#include <plugins/gbp/gbp_recirc.h>
-
-typedef struct
-{
- u32 gbp_acl_user_id;
- acl_plugin_methods_t acl_plugin;
-} gbp_main_t;
-
-extern gbp_main_t gbp_main;
-
-typedef enum gbp_policy_type_t_
-{
- GBP_POLICY_PORT,
- GBP_POLICY_MAC,
- GBP_POLICY_LPM,
- GBP_N_POLICY
-#define GBP_N_POLICY GBP_N_POLICY
-} gbp_policy_type_t;
-
-/**
- * Grouping of global data for the GBP source EPG classification feature
- */
-typedef struct gbp_policy_main_t_
-{
- /**
- * Next nodes for L2 output features
- */
- u32 l2_output_feat_next[GBP_N_POLICY][32];
-} gbp_policy_main_t;
-
-extern gbp_policy_main_t gbp_policy_main;
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_api.c b/src/plugins/gbp/gbp_api.c
deleted file mode 100644
index ab89172b1af..00000000000
--- a/src/plugins/gbp/gbp_api.c
+++ /dev/null
@@ -1,1154 +0,0 @@
-/*
- *------------------------------------------------------------------
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vnet/vnet.h>
-#include <vnet/plugin/plugin.h>
-
-#include <vnet/interface.h>
-#include <vnet/api_errno.h>
-#include <vnet/ip/ip_types_api.h>
-#include <vnet/ethernet/ethernet_types_api.h>
-#include <vpp/app/version.h>
-
-#include <gbp/gbp.h>
-#include <gbp/gbp_learn.h>
-#include <gbp/gbp_itf.h>
-#include <gbp/gbp_vxlan.h>
-#include <gbp/gbp_bridge_domain.h>
-#include <gbp/gbp_route_domain.h>
-#include <gbp/gbp_ext_itf.h>
-#include <gbp/gbp_contract.h>
-
-#include <vlibapi/api.h>
-#include <vlibmemory/api.h>
-
-/* define message IDs */
-#include <gbp/gbp.api_enum.h>
-#include <gbp/gbp.api_types.h>
-#include <vnet/format_fns.h>
-#include <vlibapi/api_helper_macros.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-
-gbp_main_t gbp_main;
-
-static u16 msg_id_base;
-
-#define GBP_MSG_BASE msg_id_base
-
-static gbp_endpoint_flags_t
-gbp_endpoint_flags_decode (vl_api_gbp_endpoint_flags_t v)
-{
- gbp_endpoint_flags_t f = GBP_ENDPOINT_FLAG_NONE;
-
- v = ntohl (v);
-
- if (v & GBP_API_ENDPOINT_FLAG_BOUNCE)
- f |= GBP_ENDPOINT_FLAG_BOUNCE;
- if (v & GBP_API_ENDPOINT_FLAG_REMOTE)
- f |= GBP_ENDPOINT_FLAG_REMOTE;
- if (v & GBP_API_ENDPOINT_FLAG_LEARNT)
- f |= GBP_ENDPOINT_FLAG_LEARNT;
- if (v & GBP_API_ENDPOINT_FLAG_EXTERNAL)
- f |= GBP_ENDPOINT_FLAG_EXTERNAL;
-
- return (f);
-}
-
-static vl_api_gbp_endpoint_flags_t
-gbp_endpoint_flags_encode (gbp_endpoint_flags_t f)
-{
- vl_api_gbp_endpoint_flags_t v = 0;
-
-
- if (f & GBP_ENDPOINT_FLAG_BOUNCE)
- v |= GBP_API_ENDPOINT_FLAG_BOUNCE;
- if (f & GBP_ENDPOINT_FLAG_REMOTE)
- v |= GBP_API_ENDPOINT_FLAG_REMOTE;
- if (f & GBP_ENDPOINT_FLAG_LEARNT)
- v |= GBP_API_ENDPOINT_FLAG_LEARNT;
- if (f & GBP_ENDPOINT_FLAG_EXTERNAL)
- v |= GBP_API_ENDPOINT_FLAG_EXTERNAL;
-
- v = htonl (v);
-
- return (v);
-}
-
-static void
-vl_api_gbp_endpoint_add_t_handler (vl_api_gbp_endpoint_add_t * mp)
-{
- vl_api_gbp_endpoint_add_reply_t *rmp;
- gbp_endpoint_flags_t gef;
- u32 sw_if_index, handle;
- ip46_address_t *ips;
- mac_address_t mac;
- int rv = 0, ii;
-
- handle = INDEX_INVALID;
-
- VALIDATE_SW_IF_INDEX (&(mp->endpoint));
-
- gef = gbp_endpoint_flags_decode (mp->endpoint.flags), ips = NULL;
- sw_if_index = ntohl (mp->endpoint.sw_if_index);
-
- if (mp->endpoint.n_ips)
- {
- vec_validate (ips, mp->endpoint.n_ips - 1);
-
- vec_foreach_index (ii, ips)
- {
- ip_address_decode (&mp->endpoint.ips[ii], &ips[ii]);
- }
- }
- mac_address_decode (mp->endpoint.mac, &mac);
-
- if (GBP_ENDPOINT_FLAG_REMOTE & gef)
- {
- ip46_address_t tun_src, tun_dst;
-
- ip_address_decode (&mp->endpoint.tun.src, &tun_src);
- ip_address_decode (&mp->endpoint.tun.dst, &tun_dst);
-
- rv = gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_CP,
- sw_if_index, ips, &mac,
- INDEX_INVALID, INDEX_INVALID,
- ntohs (mp->endpoint.sclass),
- gef, &tun_src, &tun_dst, &handle);
- }
- else
- {
- rv = gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_CP,
- sw_if_index, ips, &mac,
- INDEX_INVALID, INDEX_INVALID,
- ntohs (mp->endpoint.sclass),
- gef, NULL, NULL, &handle);
- }
- vec_free (ips);
- BAD_SW_IF_INDEX_LABEL;
-
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_GBP_ENDPOINT_ADD_REPLY + GBP_MSG_BASE,
- ({
- rmp->handle = htonl (handle);
- }));
- /* *INDENT-ON* */
-}
-
-static void
-vl_api_gbp_endpoint_del_t_handler (vl_api_gbp_endpoint_del_t * mp)
-{
- vl_api_gbp_endpoint_del_reply_t *rmp;
- int rv = 0;
-
- gbp_endpoint_unlock (GBP_ENDPOINT_SRC_CP, ntohl (mp->handle));
-
- REPLY_MACRO (VL_API_GBP_ENDPOINT_DEL_REPLY + GBP_MSG_BASE);
-}
-
-typedef struct gbp_walk_ctx_t_
-{
- vl_api_registration_t *reg;
- u32 context;
-} gbp_walk_ctx_t;
-
-static walk_rc_t
-gbp_endpoint_send_details (index_t gei, void *args)
-{
- vl_api_gbp_endpoint_details_t *mp;
- gbp_endpoint_loc_t *gel;
- gbp_endpoint_fwd_t *gef;
- gbp_endpoint_t *ge;
- gbp_walk_ctx_t *ctx;
- u8 n_ips, ii;
-
- ctx = args;
- ge = gbp_endpoint_get (gei);
-
- n_ips = vec_len (ge->ge_key.gek_ips);
- mp = vl_msg_api_alloc (sizeof (*mp) + (sizeof (*mp->endpoint.ips) * n_ips));
- if (!mp)
- return 1;
-
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_GBP_ENDPOINT_DETAILS + GBP_MSG_BASE);
- mp->context = ctx->context;
-
- gel = &ge->ge_locs[0];
- gef = &ge->ge_fwd;
-
- if (gbp_endpoint_is_remote (ge))
- {
- mp->endpoint.sw_if_index = ntohl (gel->tun.gel_parent_sw_if_index);
- ip_address_encode (&gel->tun.gel_src, IP46_TYPE_ANY,
- &mp->endpoint.tun.src);
- ip_address_encode (&gel->tun.gel_dst, IP46_TYPE_ANY,
- &mp->endpoint.tun.dst);
- }
- else
- {
- mp->endpoint.sw_if_index =
- ntohl (gbp_itf_get_sw_if_index (gef->gef_itf));
- }
- mp->endpoint.sclass = ntohs (ge->ge_fwd.gef_sclass);
- mp->endpoint.n_ips = n_ips;
- mp->endpoint.flags = gbp_endpoint_flags_encode (gef->gef_flags);
- mp->handle = htonl (gei);
- mp->age =
- clib_host_to_net_f64 (vlib_time_now (vlib_get_main ()) -
- ge->ge_last_time);
- mac_address_encode (&ge->ge_key.gek_mac, mp->endpoint.mac);
-
- vec_foreach_index (ii, ge->ge_key.gek_ips)
- {
- ip_address_encode (&ge->ge_key.gek_ips[ii].fp_addr,
- IP46_TYPE_ANY, &mp->endpoint.ips[ii]);
- }
-
- vl_api_send_msg (ctx->reg, (u8 *) mp);
-
- return (WALK_CONTINUE);
-}
-
-static void
-vl_api_gbp_endpoint_dump_t_handler (vl_api_gbp_endpoint_dump_t * mp)
-{
- vl_api_registration_t *reg;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- gbp_walk_ctx_t ctx = {
- .reg = reg,
- .context = mp->context,
- };
-
- gbp_endpoint_walk (gbp_endpoint_send_details, &ctx);
-}
-
-static void
-gbp_retention_decode (const vl_api_gbp_endpoint_retention_t * in,
- gbp_endpoint_retention_t * out)
-{
- out->remote_ep_timeout = ntohl (in->remote_ep_timeout);
-}
-
-static void
- vl_api_gbp_endpoint_group_add_t_handler
- (vl_api_gbp_endpoint_group_add_t * mp)
-{
- vl_api_gbp_endpoint_group_add_reply_t *rmp;
- gbp_endpoint_retention_t retention;
- int rv = 0;
-
- gbp_retention_decode (&mp->epg.retention, &retention);
-
- rv = gbp_endpoint_group_add_and_lock (ntohl (mp->epg.vnid),
- ntohs (mp->epg.sclass),
- ntohl (mp->epg.bd_id),
- ntohl (mp->epg.rd_id),
- ntohl (mp->epg.uplink_sw_if_index),
- &retention);
-
- REPLY_MACRO (VL_API_GBP_ENDPOINT_GROUP_ADD_REPLY + GBP_MSG_BASE);
-}
-
-static void
- vl_api_gbp_endpoint_group_del_t_handler
- (vl_api_gbp_endpoint_group_del_t * mp)
-{
- vl_api_gbp_endpoint_group_del_reply_t *rmp;
- int rv = 0;
-
- rv = gbp_endpoint_group_delete (ntohs (mp->sclass));
-
- REPLY_MACRO (VL_API_GBP_ENDPOINT_GROUP_DEL_REPLY + GBP_MSG_BASE);
-}
-
-static gbp_bridge_domain_flags_t
-gbp_bridge_domain_flags_from_api (vl_api_gbp_bridge_domain_flags_t a)
-{
- gbp_bridge_domain_flags_t g;
-
- g = GBP_BD_FLAG_NONE;
- a = clib_net_to_host_u32 (a);
-
- if (a & GBP_BD_API_FLAG_DO_NOT_LEARN)
- g |= GBP_BD_FLAG_DO_NOT_LEARN;
- if (a & GBP_BD_API_FLAG_UU_FWD_DROP)
- g |= GBP_BD_FLAG_UU_FWD_DROP;
- if (a & GBP_BD_API_FLAG_MCAST_DROP)
- g |= GBP_BD_FLAG_MCAST_DROP;
- if (a & GBP_BD_API_FLAG_UCAST_ARP)
- g |= GBP_BD_FLAG_UCAST_ARP;
-
- return (g);
-}
-
-static void
-vl_api_gbp_bridge_domain_add_t_handler (vl_api_gbp_bridge_domain_add_t * mp)
-{
- vl_api_gbp_bridge_domain_add_reply_t *rmp;
- int rv = 0;
-
- rv = gbp_bridge_domain_add_and_lock (ntohl (mp->bd.bd_id),
- ntohl (mp->bd.rd_id),
- gbp_bridge_domain_flags_from_api
- (mp->bd.flags),
- ntohl (mp->bd.bvi_sw_if_index),
- ntohl (mp->bd.uu_fwd_sw_if_index),
- ntohl (mp->bd.bm_flood_sw_if_index));
-
- REPLY_MACRO (VL_API_GBP_BRIDGE_DOMAIN_ADD_REPLY + GBP_MSG_BASE);
-}
-
-static void
-vl_api_gbp_bridge_domain_del_t_handler (vl_api_gbp_bridge_domain_del_t * mp)
-{
- vl_api_gbp_bridge_domain_del_reply_t *rmp;
- int rv = 0;
-
- rv = gbp_bridge_domain_delete (ntohl (mp->bd_id));
-
- REPLY_MACRO (VL_API_GBP_BRIDGE_DOMAIN_DEL_REPLY + GBP_MSG_BASE);
-}
-
-static void
-vl_api_gbp_route_domain_add_t_handler (vl_api_gbp_route_domain_add_t * mp)
-{
- vl_api_gbp_route_domain_add_reply_t *rmp;
- int rv = 0;
-
- rv = gbp_route_domain_add_and_lock (ntohl (mp->rd.rd_id),
- ntohs (mp->rd.scope),
- ntohl (mp->rd.ip4_table_id),
- ntohl (mp->rd.ip6_table_id),
- ntohl (mp->rd.ip4_uu_sw_if_index),
- ntohl (mp->rd.ip6_uu_sw_if_index));
-
- REPLY_MACRO (VL_API_GBP_ROUTE_DOMAIN_ADD_REPLY + GBP_MSG_BASE);
-}
-
-static void
-vl_api_gbp_route_domain_del_t_handler (vl_api_gbp_route_domain_del_t * mp)
-{
- vl_api_gbp_route_domain_del_reply_t *rmp;
- int rv = 0;
-
- rv = gbp_route_domain_delete (ntohl (mp->rd_id));
-
- REPLY_MACRO (VL_API_GBP_ROUTE_DOMAIN_DEL_REPLY + GBP_MSG_BASE);
-}
-
-static int
-gub_subnet_type_from_api (vl_api_gbp_subnet_type_t a, gbp_subnet_type_t * t)
-{
- a = clib_net_to_host_u32 (a);
-
- switch (a)
- {
- case GBP_API_SUBNET_TRANSPORT:
- *t = GBP_SUBNET_TRANSPORT;
- return (0);
- case GBP_API_SUBNET_L3_OUT:
- *t = GBP_SUBNET_L3_OUT;
- return (0);
- case GBP_API_SUBNET_ANON_L3_OUT:
- *t = GBP_SUBNET_ANON_L3_OUT;
- return (0);
- case GBP_API_SUBNET_STITCHED_INTERNAL:
- *t = GBP_SUBNET_STITCHED_INTERNAL;
- return (0);
- case GBP_API_SUBNET_STITCHED_EXTERNAL:
- *t = GBP_SUBNET_STITCHED_EXTERNAL;
- return (0);
- }
-
- return (-1);
-}
-
-static void
-vl_api_gbp_subnet_add_del_t_handler (vl_api_gbp_subnet_add_del_t * mp)
-{
- vl_api_gbp_subnet_add_del_reply_t *rmp;
- gbp_subnet_type_t type;
- fib_prefix_t pfx;
- int rv = 0;
-
- ip_prefix_decode (&mp->subnet.prefix, &pfx);
-
- rv = gub_subnet_type_from_api (mp->subnet.type, &type);
-
- if (0 != rv)
- goto out;
-
- if (mp->is_add)
- rv = gbp_subnet_add (ntohl (mp->subnet.rd_id),
- &pfx, type,
- ntohl (mp->subnet.sw_if_index),
- ntohs (mp->subnet.sclass));
- else
- rv = gbp_subnet_del (ntohl (mp->subnet.rd_id), &pfx);
-
-out:
- REPLY_MACRO (VL_API_GBP_SUBNET_ADD_DEL_REPLY + GBP_MSG_BASE);
-}
-
-static vl_api_gbp_subnet_type_t
-gub_subnet_type_to_api (gbp_subnet_type_t t)
-{
- vl_api_gbp_subnet_type_t a = 0;
-
- switch (t)
- {
- case GBP_SUBNET_TRANSPORT:
- a = GBP_API_SUBNET_TRANSPORT;
- break;
- case GBP_SUBNET_STITCHED_INTERNAL:
- a = GBP_API_SUBNET_STITCHED_INTERNAL;
- break;
- case GBP_SUBNET_STITCHED_EXTERNAL:
- a = GBP_API_SUBNET_STITCHED_EXTERNAL;
- break;
- case GBP_SUBNET_L3_OUT:
- a = GBP_API_SUBNET_L3_OUT;
- break;
- case GBP_SUBNET_ANON_L3_OUT:
- a = GBP_API_SUBNET_ANON_L3_OUT;
- break;
- }
-
- a = clib_host_to_net_u32 (a);
-
- return (a);
-}
-
-static walk_rc_t
-gbp_subnet_send_details (u32 rd_id,
- const fib_prefix_t * pfx,
- gbp_subnet_type_t type,
- u32 sw_if_index, sclass_t sclass, void *args)
-{
- vl_api_gbp_subnet_details_t *mp;
- gbp_walk_ctx_t *ctx;
-
- ctx = args;
- mp = vl_msg_api_alloc (sizeof (*mp));
- if (!mp)
- return 1;
-
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_GBP_SUBNET_DETAILS + GBP_MSG_BASE);
- mp->context = ctx->context;
-
- mp->subnet.type = gub_subnet_type_to_api (type);
- mp->subnet.sw_if_index = ntohl (sw_if_index);
- mp->subnet.sclass = ntohs (sclass);
- mp->subnet.rd_id = ntohl (rd_id);
- ip_prefix_encode (pfx, &mp->subnet.prefix);
-
- vl_api_send_msg (ctx->reg, (u8 *) mp);
-
- return (WALK_CONTINUE);
-}
-
-static void
-vl_api_gbp_subnet_dump_t_handler (vl_api_gbp_subnet_dump_t * mp)
-{
- vl_api_registration_t *reg;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- gbp_walk_ctx_t ctx = {
- .reg = reg,
- .context = mp->context,
- };
-
- gbp_subnet_walk (gbp_subnet_send_details, &ctx);
-}
-
-static int
-gbp_endpoint_group_send_details (gbp_endpoint_group_t * gg, void *args)
-{
- vl_api_gbp_endpoint_group_details_t *mp;
- gbp_walk_ctx_t *ctx;
-
- ctx = args;
- mp = vl_msg_api_alloc (sizeof (*mp));
- if (!mp)
- return 1;
-
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_GBP_ENDPOINT_GROUP_DETAILS + GBP_MSG_BASE);
- mp->context = ctx->context;
-
- mp->epg.uplink_sw_if_index = ntohl (gg->gg_uplink_sw_if_index);
- mp->epg.vnid = ntohl (gg->gg_vnid);
- mp->epg.sclass = ntohs (gg->gg_sclass);
- mp->epg.bd_id = ntohl (gbp_endpoint_group_get_bd_id (gg));
- mp->epg.rd_id = ntohl (gbp_route_domain_get_rd_id (gg->gg_rd));
-
- vl_api_send_msg (ctx->reg, (u8 *) mp);
-
- return (1);
-}
-
-static void
-vl_api_gbp_endpoint_group_dump_t_handler (vl_api_gbp_endpoint_group_dump_t *
- mp)
-{
- vl_api_registration_t *reg;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- gbp_walk_ctx_t ctx = {
- .reg = reg,
- .context = mp->context,
- };
-
- gbp_endpoint_group_walk (gbp_endpoint_group_send_details, &ctx);
-}
-
-static int
-gbp_bridge_domain_send_details (gbp_bridge_domain_t * gb, void *args)
-{
- vl_api_gbp_bridge_domain_details_t *mp;
- gbp_route_domain_t *gr;
- gbp_walk_ctx_t *ctx;
-
- ctx = args;
- mp = vl_msg_api_alloc (sizeof (*mp));
- if (!mp)
- return 1;
-
- memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_GBP_BRIDGE_DOMAIN_DETAILS + GBP_MSG_BASE);
- mp->context = ctx->context;
-
- gr = gbp_route_domain_get (gb->gb_rdi);
-
- mp->bd.bd_id = ntohl (gb->gb_bd_id);
- mp->bd.rd_id = ntohl (gr->grd_id);
- mp->bd.bvi_sw_if_index = ntohl (gb->gb_bvi_sw_if_index);
- mp->bd.uu_fwd_sw_if_index = ntohl (gb->gb_uu_fwd_sw_if_index);
- mp->bd.bm_flood_sw_if_index =
- ntohl (gbp_itf_get_sw_if_index (gb->gb_bm_flood_itf));
-
- vl_api_send_msg (ctx->reg, (u8 *) mp);
-
- return (1);
-}
-
-static void
-vl_api_gbp_bridge_domain_dump_t_handler (vl_api_gbp_bridge_domain_dump_t * mp)
-{
- vl_api_registration_t *reg;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- gbp_walk_ctx_t ctx = {
- .reg = reg,
- .context = mp->context,
- };
-
- gbp_bridge_domain_walk (gbp_bridge_domain_send_details, &ctx);
-}
-
-static int
-gbp_route_domain_send_details (gbp_route_domain_t * grd, void *args)
-{
- vl_api_gbp_route_domain_details_t *mp;
- gbp_walk_ctx_t *ctx;
-
- ctx = args;
- mp = vl_msg_api_alloc (sizeof (*mp));
- if (!mp)
- return 1;
-
- memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_GBP_ROUTE_DOMAIN_DETAILS + GBP_MSG_BASE);
- mp->context = ctx->context;
-
- mp->rd.rd_id = ntohl (grd->grd_id);
- mp->rd.ip4_uu_sw_if_index =
- ntohl (grd->grd_uu_sw_if_index[FIB_PROTOCOL_IP4]);
- mp->rd.ip6_uu_sw_if_index =
- ntohl (grd->grd_uu_sw_if_index[FIB_PROTOCOL_IP6]);
-
- vl_api_send_msg (ctx->reg, (u8 *) mp);
-
- return (1);
-}
-
-static void
-vl_api_gbp_route_domain_dump_t_handler (vl_api_gbp_route_domain_dump_t * mp)
-{
- vl_api_registration_t *reg;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- gbp_walk_ctx_t ctx = {
- .reg = reg,
- .context = mp->context,
- };
-
- gbp_route_domain_walk (gbp_route_domain_send_details, &ctx);
-}
-
-static void
-vl_api_gbp_recirc_add_del_t_handler (vl_api_gbp_recirc_add_del_t * mp)
-{
- vl_api_gbp_recirc_add_del_reply_t *rmp;
- u32 sw_if_index;
- int rv = 0;
-
- sw_if_index = ntohl (mp->recirc.sw_if_index);
- if (!vnet_sw_if_index_is_api_valid (sw_if_index))
- goto bad_sw_if_index;
-
- if (mp->is_add)
- rv = gbp_recirc_add (sw_if_index,
- ntohs (mp->recirc.sclass), mp->recirc.is_ext);
- else
- rv = gbp_recirc_delete (sw_if_index);
-
- BAD_SW_IF_INDEX_LABEL;
-
- REPLY_MACRO (VL_API_GBP_RECIRC_ADD_DEL_REPLY + GBP_MSG_BASE);
-}
-
-static walk_rc_t
-gbp_recirc_send_details (gbp_recirc_t * gr, void *args)
-{
- vl_api_gbp_recirc_details_t *mp;
- gbp_walk_ctx_t *ctx;
-
- ctx = args;
- mp = vl_msg_api_alloc (sizeof (*mp));
- if (!mp)
- return (WALK_STOP);
-
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_GBP_RECIRC_DETAILS + GBP_MSG_BASE);
- mp->context = ctx->context;
-
- mp->recirc.sclass = ntohs (gr->gr_sclass);
- mp->recirc.sw_if_index = ntohl (gr->gr_sw_if_index);
- mp->recirc.is_ext = gr->gr_is_ext;
-
- vl_api_send_msg (ctx->reg, (u8 *) mp);
-
- return (WALK_CONTINUE);
-}
-
-static void
-vl_api_gbp_recirc_dump_t_handler (vl_api_gbp_recirc_dump_t * mp)
-{
- vl_api_registration_t *reg;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- gbp_walk_ctx_t ctx = {
- .reg = reg,
- .context = mp->context,
- };
-
- gbp_recirc_walk (gbp_recirc_send_details, &ctx);
-}
-
-static void
-vl_api_gbp_ext_itf_add_del_t_handler (vl_api_gbp_ext_itf_add_del_t * mp)
-{
- vl_api_gbp_ext_itf_add_del_reply_t *rmp;
- u32 sw_if_index = ~0;
- vl_api_gbp_ext_itf_t *ext_itf;
- int rv = 0;
-
- ext_itf = &mp->ext_itf;
- if (ext_itf)
- sw_if_index = ntohl (ext_itf->sw_if_index);
-
- if (!vnet_sw_if_index_is_api_valid (sw_if_index))
- goto bad_sw_if_index;
-
- if (mp->is_add)
- rv = gbp_ext_itf_add (sw_if_index,
- ntohl (ext_itf->bd_id), ntohl (ext_itf->rd_id),
- ntohl (ext_itf->flags));
- else
- rv = gbp_ext_itf_delete (sw_if_index);
-
- BAD_SW_IF_INDEX_LABEL;
-
- REPLY_MACRO (VL_API_GBP_EXT_ITF_ADD_DEL_REPLY + GBP_MSG_BASE);
-}
-
-static walk_rc_t
-gbp_ext_itf_send_details (gbp_ext_itf_t * gx, void *args)
-{
- vl_api_gbp_ext_itf_details_t *mp;
- gbp_walk_ctx_t *ctx;
-
- ctx = args;
- mp = vl_msg_api_alloc (sizeof (*mp));
- if (!mp)
- return (WALK_STOP);
-
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_GBP_EXT_ITF_DETAILS + GBP_MSG_BASE);
- mp->context = ctx->context;
-
- mp->ext_itf.flags = ntohl (gx->gx_flags);
- mp->ext_itf.bd_id = ntohl (gbp_bridge_domain_get_bd_id (gx->gx_bd));
- mp->ext_itf.rd_id = ntohl (gbp_route_domain_get_rd_id (gx->gx_rd));
- mp->ext_itf.sw_if_index = ntohl (gbp_itf_get_sw_if_index (gx->gx_itf));
-
- vl_api_send_msg (ctx->reg, (u8 *) mp);
-
- return (WALK_CONTINUE);
-}
-
-static void
-vl_api_gbp_ext_itf_dump_t_handler (vl_api_gbp_ext_itf_dump_t * mp)
-{
- vl_api_registration_t *reg;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- gbp_walk_ctx_t ctx = {
- .reg = reg,
- .context = mp->context,
- };
-
- gbp_ext_itf_walk (gbp_ext_itf_send_details, &ctx);
-}
-
-static int
-gbp_contract_rule_action_deocde (vl_api_gbp_rule_action_t in,
- gbp_rule_action_t * out)
-{
- in = clib_net_to_host_u32 (in);
-
- switch (in)
- {
- case GBP_API_RULE_PERMIT:
- *out = GBP_RULE_PERMIT;
- return (0);
- case GBP_API_RULE_DENY:
- *out = GBP_RULE_DENY;
- return (0);
- case GBP_API_RULE_REDIRECT:
- *out = GBP_RULE_REDIRECT;
- return (0);
- }
-
- return (-1);
-}
-
-static int
-gbp_hash_mode_decode (vl_api_gbp_hash_mode_t in, gbp_hash_mode_t * out)
-{
- in = clib_net_to_host_u32 (in);
-
- switch (in)
- {
- case GBP_API_HASH_MODE_SRC_IP:
- *out = GBP_HASH_MODE_SRC_IP;
- return (0);
- case GBP_API_HASH_MODE_DST_IP:
- *out = GBP_HASH_MODE_DST_IP;
- return (0);
- case GBP_API_HASH_MODE_SYMMETRIC:
- *out = GBP_HASH_MODE_SYMMETRIC;
- return (0);
- }
-
- return (-2);
-}
-
-static int
-gbp_next_hop_decode (const vl_api_gbp_next_hop_t * in, index_t * gnhi)
-{
- ip46_address_t ip;
- mac_address_t mac;
- index_t grd, gbd;
-
- gbd = gbp_bridge_domain_find_and_lock (ntohl (in->bd_id));
-
- if (INDEX_INVALID == gbd)
- return (VNET_API_ERROR_BD_NOT_MODIFIABLE);
-
- grd = gbp_route_domain_find_and_lock (ntohl (in->rd_id));
-
- if (INDEX_INVALID == grd)
- return (VNET_API_ERROR_NO_SUCH_FIB);
-
- ip_address_decode (&in->ip, &ip);
- mac_address_decode (in->mac, &mac);
-
- *gnhi = gbp_next_hop_alloc (&ip, grd, &mac, gbd);
-
- return (0);
-}
-
-static int
-gbp_next_hop_set_decode (const vl_api_gbp_next_hop_set_t * in,
- gbp_hash_mode_t * hash_mode, index_t ** out)
-{
-
- index_t *gnhis = NULL;
- int rv;
- u8 ii;
-
- rv = gbp_hash_mode_decode (in->hash_mode, hash_mode);
-
- if (0 != rv)
- return rv;
-
- vec_validate (gnhis, in->n_nhs - 1);
-
- for (ii = 0; ii < in->n_nhs; ii++)
- {
- rv = gbp_next_hop_decode (&in->nhs[ii], &gnhis[ii]);
-
- if (0 != rv)
- {
- vec_free (gnhis);
- break;
- }
- }
-
- *out = gnhis;
- return (rv);
-}
-
-static int
-gbp_contract_rule_decode (const vl_api_gbp_rule_t * in, index_t * gui)
-{
- gbp_hash_mode_t hash_mode;
- gbp_rule_action_t action;
- index_t *nhs = NULL;
- int rv;
-
- rv = gbp_contract_rule_action_deocde (in->action, &action);
-
- if (0 != rv)
- return rv;
-
- if (GBP_RULE_REDIRECT == action)
- {
- rv = gbp_next_hop_set_decode (&in->nh_set, &hash_mode, &nhs);
-
- if (0 != rv)
- return (rv);
- }
- else
- {
- hash_mode = GBP_HASH_MODE_SRC_IP;
- }
-
- *gui = gbp_rule_alloc (action, hash_mode, nhs);
-
- return (rv);
-}
-
-static int
-gbp_contract_rules_decode (u8 n_rules,
- const vl_api_gbp_rule_t * rules, index_t ** out)
-{
- index_t *guis = NULL;
- int rv;
- u8 ii;
-
- if (0 == n_rules)
- {
- *out = NULL;
- return (0);
- }
-
- vec_validate (guis, n_rules - 1);
-
- for (ii = 0; ii < n_rules; ii++)
- {
- rv = gbp_contract_rule_decode (&rules[ii], &guis[ii]);
-
- if (0 != rv)
- {
- index_t *gui;
- vec_foreach (gui, guis) gbp_rule_free (*gui);
- vec_free (guis);
- return (rv);
- }
- }
-
- *out = guis;
- return (rv);
-}
-
-static void
-vl_api_gbp_contract_add_del_t_handler (vl_api_gbp_contract_add_del_t * mp)
-{
- vl_api_gbp_contract_add_del_reply_t *rmp;
- u16 *allowed_ethertypes;
- u32 stats_index = ~0;
- index_t *rules;
- int ii, rv = 0;
- u8 n_et;
-
- if (mp->is_add)
- {
- rv = gbp_contract_rules_decode (mp->contract.n_rules,
- mp->contract.rules, &rules);
- if (0 != rv)
- goto out;
-
- allowed_ethertypes = NULL;
-
- /*
- * allowed ether types
- */
- n_et = mp->contract.n_ether_types;
- vec_validate (allowed_ethertypes, n_et - 1);
-
- for (ii = 0; ii < n_et; ii++)
- {
- /* leave the ether types in network order */
- allowed_ethertypes[ii] = mp->contract.allowed_ethertypes[ii];
- }
-
- rv = gbp_contract_update (ntohs (mp->contract.scope),
- ntohs (mp->contract.sclass),
- ntohs (mp->contract.dclass),
- ntohl (mp->contract.acl_index),
- rules, allowed_ethertypes, &stats_index);
- }
- else
- rv = gbp_contract_delete (ntohs (mp->contract.scope),
- ntohs (mp->contract.sclass),
- ntohs (mp->contract.dclass));
-
-out:
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_GBP_CONTRACT_ADD_DEL_REPLY + GBP_MSG_BASE,
- ({
- rmp->stats_index = htonl (stats_index);
- }));
- /* *INDENT-ON* */
-}
-
-static int
-gbp_contract_send_details (gbp_contract_t * gbpc, void *args)
-{
- vl_api_gbp_contract_details_t *mp;
- gbp_walk_ctx_t *ctx;
-
- ctx = args;
- mp = vl_msg_api_alloc (sizeof (*mp));
- if (!mp)
- return 1;
-
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_GBP_CONTRACT_DETAILS + GBP_MSG_BASE);
- mp->context = ctx->context;
-
- mp->contract.sclass = ntohs (gbpc->gc_key.gck_src);
- mp->contract.dclass = ntohs (gbpc->gc_key.gck_dst);
- mp->contract.acl_index = ntohl (gbpc->gc_acl_index);
- mp->contract.scope = ntohs (gbpc->gc_key.gck_scope);
-
- vl_api_send_msg (ctx->reg, (u8 *) mp);
-
- return (1);
-}
-
-static void
-vl_api_gbp_contract_dump_t_handler (vl_api_gbp_contract_dump_t * mp)
-{
- vl_api_registration_t *reg;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- gbp_walk_ctx_t ctx = {
- .reg = reg,
- .context = mp->context,
- };
-
- gbp_contract_walk (gbp_contract_send_details, &ctx);
-}
-
-static int
-gbp_vxlan_tunnel_mode_2_layer (vl_api_gbp_vxlan_tunnel_mode_t mode,
- gbp_vxlan_tunnel_layer_t * l)
-{
- mode = clib_net_to_host_u32 (mode);
-
- switch (mode)
- {
- case GBP_VXLAN_TUNNEL_MODE_L2:
- *l = GBP_VXLAN_TUN_L2;
- return (0);
- case GBP_VXLAN_TUNNEL_MODE_L3:
- *l = GBP_VXLAN_TUN_L3;
- return (0);
- }
- return (-1);
-}
-
-static void
-vl_api_gbp_vxlan_tunnel_add_t_handler (vl_api_gbp_vxlan_tunnel_add_t * mp)
-{
- vl_api_gbp_vxlan_tunnel_add_reply_t *rmp;
- gbp_vxlan_tunnel_layer_t layer;
- ip4_address_t src;
- u32 sw_if_index;
- int rv = 0;
-
- ip4_address_decode (mp->tunnel.src, &src);
- rv = gbp_vxlan_tunnel_mode_2_layer (mp->tunnel.mode, &layer);
-
- if (0 != rv)
- goto out;
-
- rv = gbp_vxlan_tunnel_add (ntohl (mp->tunnel.vni),
- layer,
- ntohl (mp->tunnel.bd_rd_id), &src, &sw_if_index);
-
-out:
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_GBP_VXLAN_TUNNEL_ADD_REPLY + GBP_MSG_BASE,
- ({
- rmp->sw_if_index = htonl (sw_if_index);
- }));
- /* *INDENT-ON* */
-}
-
-static void
-vl_api_gbp_vxlan_tunnel_del_t_handler (vl_api_gbp_vxlan_tunnel_add_t * mp)
-{
- vl_api_gbp_vxlan_tunnel_del_reply_t *rmp;
- int rv = 0;
-
- rv = gbp_vxlan_tunnel_del (ntohl (mp->tunnel.vni));
-
- REPLY_MACRO (VL_API_GBP_VXLAN_TUNNEL_DEL_REPLY + GBP_MSG_BASE);
-}
-
-static vl_api_gbp_vxlan_tunnel_mode_t
-gbp_vxlan_tunnel_layer_2_mode (gbp_vxlan_tunnel_layer_t layer)
-{
- vl_api_gbp_vxlan_tunnel_mode_t mode = GBP_VXLAN_TUNNEL_MODE_L2;
-
- switch (layer)
- {
- case GBP_VXLAN_TUN_L2:
- mode = GBP_VXLAN_TUNNEL_MODE_L2;
- break;
- case GBP_VXLAN_TUN_L3:
- mode = GBP_VXLAN_TUNNEL_MODE_L3;
- break;
- }
- mode = clib_host_to_net_u32 (mode);
-
- return (mode);
-}
-
-static walk_rc_t
-gbp_vxlan_tunnel_send_details (gbp_vxlan_tunnel_t * gt, void *args)
-{
- vl_api_gbp_vxlan_tunnel_details_t *mp;
- gbp_walk_ctx_t *ctx;
-
- ctx = args;
- mp = vl_msg_api_alloc (sizeof (*mp));
- if (!mp)
- return 1;
-
- memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = htons (VL_API_GBP_VXLAN_TUNNEL_DETAILS + GBP_MSG_BASE);
- mp->context = ctx->context;
-
- mp->tunnel.vni = htonl (gt->gt_vni);
- mp->tunnel.mode = gbp_vxlan_tunnel_layer_2_mode (gt->gt_layer);
- mp->tunnel.bd_rd_id = htonl (gt->gt_bd_rd_id);
-
- vl_api_send_msg (ctx->reg, (u8 *) mp);
-
- return (1);
-}
-
-static void
-vl_api_gbp_vxlan_tunnel_dump_t_handler (vl_api_gbp_vxlan_tunnel_dump_t * mp)
-{
- vl_api_registration_t *reg;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- gbp_walk_ctx_t ctx = {
- .reg = reg,
- .context = mp->context,
- };
-
- gbp_vxlan_walk (gbp_vxlan_tunnel_send_details, &ctx);
-}
-
-#include <gbp/gbp.api.c>
-static clib_error_t *
-gbp_init (vlib_main_t * vm)
-{
- gbp_main_t *gbpm = &gbp_main;
-
- gbpm->gbp_acl_user_id = ~0;
-
- /* Ask for a correctly-sized block of API message decode slots */
- msg_id_base = setup_message_id_table ();
-
- return (NULL);
-}
-
-VLIB_API_INIT_FUNCTION (gbp_init);
-
-/* *INDENT-OFF* */
-VLIB_PLUGIN_REGISTER () = {
- .version = VPP_BUILD_VER,
- .description = "Group Based Policy (GBP)",
-};
-/* *INDENT-ON* */
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_bridge_domain.c b/src/plugins/gbp/gbp_bridge_domain.c
deleted file mode 100644
index 279169abb1d..00000000000
--- a/src/plugins/gbp/gbp_bridge_domain.c
+++ /dev/null
@@ -1,503 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <plugins/gbp/gbp_route_domain.h>
-#include <plugins/gbp/gbp_endpoint.h>
-#include <plugins/gbp/gbp_learn.h>
-#include <plugins/gbp/gbp_itf.h>
-
-#include <vnet/dpo/dvr_dpo.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/l2/l2_input.h>
-#include <vnet/l2/feat_bitmap.h>
-#include <vnet/l2/l2_bvi.h>
-#include <vnet/l2/l2_fib.h>
-
-/**
- * Pool of GBP bridge_domains
- */
-gbp_bridge_domain_t *gbp_bridge_domain_pool;
-
-/**
- * DB of bridge_domains
- */
-gbp_bridge_domain_db_t gbp_bridge_domain_db;
-
-/**
- * Map of BD index to contract scope
- */
-gbp_scope_t *gbp_scope_by_bd_index;
-
-/**
- * logger
- */
-vlib_log_class_t gb_logger;
-
-#define GBP_BD_DBG(...) \
- vlib_log_debug (gb_logger, __VA_ARGS__);
-
-index_t
-gbp_bridge_domain_index (const gbp_bridge_domain_t * gbd)
-{
- return (gbd - gbp_bridge_domain_pool);
-}
-
-static void
-gbp_bridge_domain_lock (index_t i)
-{
- gbp_bridge_domain_t *gb;
-
- gb = gbp_bridge_domain_get (i);
- gb->gb_locks++;
-}
-
-u32
-gbp_bridge_domain_get_bd_id (index_t gbdi)
-{
- gbp_bridge_domain_t *gb;
-
- gb = gbp_bridge_domain_get (gbdi);
-
- return (gb->gb_bd_id);
-}
-
-static index_t
-gbp_bridge_domain_find (u32 bd_id)
-{
- uword *p;
-
- p = hash_get (gbp_bridge_domain_db.gbd_by_bd_id, bd_id);
-
- if (NULL != p)
- return p[0];
-
- return (INDEX_INVALID);
-}
-
-index_t
-gbp_bridge_domain_find_and_lock (u32 bd_id)
-{
- uword *p;
-
- p = hash_get (gbp_bridge_domain_db.gbd_by_bd_id, bd_id);
-
- if (NULL != p)
- {
- gbp_bridge_domain_lock (p[0]);
- return p[0];
- }
- return (INDEX_INVALID);
-}
-
-static void
-gbp_bridge_domain_db_add (gbp_bridge_domain_t * gb)
-{
- index_t gbi = gb - gbp_bridge_domain_pool;
-
- hash_set (gbp_bridge_domain_db.gbd_by_bd_id, gb->gb_bd_id, gbi);
- vec_validate_init_empty (gbp_bridge_domain_db.gbd_by_bd_index,
- gb->gb_bd_index, INDEX_INVALID);
- gbp_bridge_domain_db.gbd_by_bd_index[gb->gb_bd_index] = gbi;
-}
-
-static void
-gbp_bridge_domain_db_remove (gbp_bridge_domain_t * gb)
-{
- hash_unset (gbp_bridge_domain_db.gbd_by_bd_id, gb->gb_bd_id);
- gbp_bridge_domain_db.gbd_by_bd_index[gb->gb_bd_index] = INDEX_INVALID;
-}
-
-u8 *
-format_gbp_bridge_domain_flags (u8 * s, va_list * args)
-{
- gbp_bridge_domain_flags_t gf = va_arg (*args, gbp_bridge_domain_flags_t);
-
- if (gf)
- {
- if (gf & GBP_BD_FLAG_DO_NOT_LEARN)
- s = format (s, "do-not-learn ");
- if (gf & GBP_BD_FLAG_UU_FWD_DROP)
- s = format (s, "uu-fwd-drop ");
- if (gf & GBP_BD_FLAG_MCAST_DROP)
- s = format (s, "mcast-drop ");
- if (gf & GBP_BD_FLAG_UCAST_ARP)
- s = format (s, "ucast-arp ");
- }
- else
- {
- s = format (s, "none");
- }
- return (s);
-}
-
-static u8 *
-format_gbp_bridge_domain_ptr (u8 * s, va_list * args)
-{
- gbp_bridge_domain_t *gb = va_arg (*args, gbp_bridge_domain_t *);
- vnet_main_t *vnm = vnet_get_main ();
-
- if (NULL != gb)
- s =
- format (s,
- "[%d] bd:[%d,%d], bvi:%U uu-flood:%U bm-flood:%U flags:%U locks:%d",
- gb - gbp_bridge_domain_pool, gb->gb_bd_id, gb->gb_bd_index,
- format_vnet_sw_if_index_name, vnm, gb->gb_bvi_sw_if_index,
- format_vnet_sw_if_index_name, vnm, gb->gb_uu_fwd_sw_if_index,
- format_gbp_itf_hdl, gb->gb_bm_flood_itf,
- format_gbp_bridge_domain_flags, gb->gb_flags, gb->gb_locks);
- else
- s = format (s, "NULL");
-
- return (s);
-}
-
-u8 *
-format_gbp_bridge_domain (u8 * s, va_list * args)
-{
- index_t gbi = va_arg (*args, index_t);
-
- s =
- format (s, "%U", format_gbp_bridge_domain_ptr,
- gbp_bridge_domain_get (gbi));
-
- return (s);
-}
-
-int
-gbp_bridge_domain_add_and_lock (u32 bd_id,
- u32 rd_id,
- gbp_bridge_domain_flags_t flags,
- u32 bvi_sw_if_index,
- u32 uu_fwd_sw_if_index,
- u32 bm_flood_sw_if_index)
-{
- gbp_bridge_domain_t *gb;
- index_t gbi;
-
- gbi = gbp_bridge_domain_find (bd_id);
-
- if (INDEX_INVALID == gbi)
- {
- gbp_route_domain_t *gr;
- u32 bd_index;
-
- bd_index = bd_find_index (&bd_main, bd_id);
-
- if (~0 == bd_index)
- return (VNET_API_ERROR_BD_NOT_MODIFIABLE);
-
- bd_flags_t bd_flags = L2_NONE;
- if (flags & GBP_BD_FLAG_UU_FWD_DROP)
- bd_flags |= L2_UU_FLOOD;
- if (flags & GBP_BD_FLAG_MCAST_DROP)
- bd_flags |= L2_FLOOD;
-
- pool_get (gbp_bridge_domain_pool, gb);
- memset (gb, 0, sizeof (*gb));
-
- gbi = gb - gbp_bridge_domain_pool;
- gb->gb_bd_id = bd_id;
- gb->gb_bd_index = bd_index;
- gb->gb_uu_fwd_sw_if_index = uu_fwd_sw_if_index;
- gb->gb_bvi_sw_if_index = bvi_sw_if_index;
- gbp_itf_hdl_reset (&gb->gb_bm_flood_itf);
- gb->gb_locks = 1;
- gb->gb_flags = flags;
- gb->gb_rdi = gbp_route_domain_find_and_lock (rd_id);
-
- /*
- * set the scope from the BD's RD's scope
- */
- gr = gbp_route_domain_get (gb->gb_rdi);
- vec_validate (gbp_scope_by_bd_index, gb->gb_bd_index);
- gbp_scope_by_bd_index[gb->gb_bd_index] = gr->grd_scope;
-
- /*
- * Set the BVI and uu-flood interfaces into the BD
- */
- gbp_bridge_domain_itf_add (gbi, gb->gb_bvi_sw_if_index,
- L2_BD_PORT_TYPE_BVI);
-
- if ((!(flags & GBP_BD_FLAG_UU_FWD_DROP) ||
- (flags & GBP_BD_FLAG_UCAST_ARP)) &&
- ~0 != gb->gb_uu_fwd_sw_if_index)
- gbp_bridge_domain_itf_add (gbi, gb->gb_uu_fwd_sw_if_index,
- L2_BD_PORT_TYPE_UU_FWD);
-
- if (!(flags & GBP_BD_FLAG_MCAST_DROP) && ~0 != bm_flood_sw_if_index)
- {
- gb->gb_bm_flood_itf =
- gbp_itf_l2_add_and_lock (bm_flood_sw_if_index, gbi);
- gbp_itf_l2_set_input_feature (gb->gb_bm_flood_itf,
- L2INPUT_FEAT_GBP_LEARN);
- }
-
- /*
- * unset any flag(s) set above
- */
- bd_set_flags (vlib_get_main (), bd_index, bd_flags, 0);
-
- if (flags & GBP_BD_FLAG_UCAST_ARP)
- {
- bd_flags = L2_ARP_UFWD;
- bd_set_flags (vlib_get_main (), bd_index, bd_flags, 1);
- }
-
- /*
- * Add the BVI's MAC to the L2FIB
- */
- l2fib_add_entry (vnet_sw_interface_get_hw_address
- (vnet_get_main (), gb->gb_bvi_sw_if_index),
- gb->gb_bd_index, gb->gb_bvi_sw_if_index,
- (L2FIB_ENTRY_RESULT_FLAG_STATIC |
- L2FIB_ENTRY_RESULT_FLAG_BVI));
-
- gbp_bridge_domain_db_add (gb);
- }
- else
- {
- gb = gbp_bridge_domain_get (gbi);
- gb->gb_locks++;
- }
-
- GBP_BD_DBG ("add: %U", format_gbp_bridge_domain_ptr, gb);
-
- return (0);
-}
-
-void
-gbp_bridge_domain_itf_add (index_t gbdi,
- u32 sw_if_index, l2_bd_port_type_t type)
-{
- gbp_bridge_domain_t *gb;
-
- gb = gbp_bridge_domain_get (gbdi);
-
- set_int_l2_mode (vlib_get_main (), vnet_get_main (), MODE_L2_BRIDGE,
- sw_if_index, gb->gb_bd_index, type, 0, 0);
- /*
- * adding an interface to the bridge enables learning on the
- * interface. Disable learning on the interface by default for gbp
- * interfaces
- */
- l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_LEARN, 0);
-}
-
-void
-gbp_bridge_domain_itf_del (index_t gbdi,
- u32 sw_if_index, l2_bd_port_type_t type)
-{
- gbp_bridge_domain_t *gb;
-
- gb = gbp_bridge_domain_get (gbdi);
-
- set_int_l2_mode (vlib_get_main (), vnet_get_main (), MODE_L3, sw_if_index,
- gb->gb_bd_index, type, 0, 0);
-}
-
-void
-gbp_bridge_domain_unlock (index_t gbdi)
-{
- gbp_bridge_domain_t *gb;
-
- gb = gbp_bridge_domain_get (gbdi);
-
- gb->gb_locks--;
-
- if (0 == gb->gb_locks)
- {
- GBP_BD_DBG ("destroy: %U", format_gbp_bridge_domain_ptr, gb);
-
- l2fib_del_entry (vnet_sw_interface_get_hw_address
- (vnet_get_main (), gb->gb_bvi_sw_if_index),
- gb->gb_bd_index, gb->gb_bvi_sw_if_index);
-
- gbp_bridge_domain_itf_del (gbdi, gb->gb_bvi_sw_if_index,
- L2_BD_PORT_TYPE_BVI);
- if (~0 != gb->gb_uu_fwd_sw_if_index)
- gbp_bridge_domain_itf_del (gbdi, gb->gb_uu_fwd_sw_if_index,
- L2_BD_PORT_TYPE_UU_FWD);
- gbp_itf_unlock (&gb->gb_bm_flood_itf);
-
- gbp_bridge_domain_db_remove (gb);
- gbp_route_domain_unlock (gb->gb_rdi);
-
- pool_put (gbp_bridge_domain_pool, gb);
- }
-}
-
-int
-gbp_bridge_domain_delete (u32 bd_id)
-{
- index_t gbi;
-
- GBP_BD_DBG ("del: %d", bd_id);
- gbi = gbp_bridge_domain_find (bd_id);
-
- if (INDEX_INVALID != gbi)
- {
- GBP_BD_DBG ("del: %U", format_gbp_bridge_domain, gbi);
- gbp_bridge_domain_unlock (gbi);
-
- return (0);
- }
-
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
-}
-
-void
-gbp_bridge_domain_walk (gbp_bridge_domain_cb_t cb, void *ctx)
-{
- gbp_bridge_domain_t *gbpe;
-
- /* *INDENT-OFF* */
- pool_foreach (gbpe, gbp_bridge_domain_pool)
- {
- if (!cb(gbpe, ctx))
- break;
- }
- /* *INDENT-ON* */
-}
-
-static clib_error_t *
-gbp_bridge_domain_cli (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vnet_main_t *vnm = vnet_get_main ();
- gbp_bridge_domain_flags_t flags;
- u32 bm_flood_sw_if_index = ~0;
- u32 uu_fwd_sw_if_index = ~0;
- u32 bd_id = ~0, rd_id = ~0;
- u32 bvi_sw_if_index = ~0;
- u8 add = 1;
-
- flags = GBP_BD_FLAG_NONE;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "bvi %U", unformat_vnet_sw_interface,
- vnm, &bvi_sw_if_index))
- ;
- else if (unformat (input, "uu-fwd %U", unformat_vnet_sw_interface,
- vnm, &uu_fwd_sw_if_index))
- ;
- else if (unformat (input, "bm-flood %U", unformat_vnet_sw_interface,
- vnm, &bm_flood_sw_if_index))
- ;
- else if (unformat (input, "add"))
- add = 1;
- else if (unformat (input, "del"))
- add = 0;
- else if (unformat (input, "flags %d", &flags))
- ;
- else if (unformat (input, "bd %d", &bd_id))
- ;
- else if (unformat (input, "rd %d", &rd_id))
- ;
- else
- break;
- }
-
- if (~0 == bd_id)
- return clib_error_return (0, "BD-ID must be specified");
- if (~0 == rd_id)
- return clib_error_return (0, "RD-ID must be specified");
-
- if (add)
- {
- if (~0 == bvi_sw_if_index)
- return clib_error_return (0, "interface must be specified");
-
- gbp_bridge_domain_add_and_lock (bd_id, rd_id,
- flags,
- bvi_sw_if_index,
- uu_fwd_sw_if_index,
- bm_flood_sw_if_index);
- }
- else
- gbp_bridge_domain_delete (bd_id);
-
- return (NULL);
-}
-
-/*?
- * Configure a GBP bridge-domain
- *
- * @cliexpar
- * @cliexstart{gbp bridge-domain [del] bd <ID> bvi <interface> [uu-fwd <interface>] [bm-flood <interface>] [flags <flags>]}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_bridge_domain_cli_node, static) = {
- .path = "gbp bridge-domain",
- .short_help = "gbp bridge-domain [del] bd <ID> bvi <interface> [uu-fwd <interface>] [bm-flood <interface>] [flags <flags>]",
- .function = gbp_bridge_domain_cli,
-};
-
-static int
-gbp_bridge_domain_show_one (gbp_bridge_domain_t *gb, void *ctx)
-{
- vlib_main_t *vm;
-
- vm = ctx;
- vlib_cli_output (vm, " %U", format_gbp_bridge_domain_ptr, gb);
-
- return (1);
-}
-
-static clib_error_t *
-gbp_bridge_domain_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vlib_cli_output (vm, "Bridge-Domains:");
- gbp_bridge_domain_walk (gbp_bridge_domain_show_one, vm);
-
- return (NULL);
-}
-
-
-/*?
- * Show Group Based Policy Bridge_Domains and derived information
- *
- * @cliexpar
- * @cliexstart{show gbp bridge_domain}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_bridge_domain_show_node, static) = {
- .path = "show gbp bridge-domain",
- .short_help = "show gbp bridge-domain\n",
- .function = gbp_bridge_domain_show,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-gbp_bridge_domain_init (vlib_main_t * vm)
-{
- gb_logger = vlib_log_register_class ("gbp", "bd");
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_bridge_domain_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_bridge_domain.h b/src/plugins/gbp/gbp_bridge_domain.h
deleted file mode 100644
index 0449240083c..00000000000
--- a/src/plugins/gbp/gbp_bridge_domain.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_BRIDGE_DOMAIN_H__
-#define __GBP_BRIDGE_DOMAIN_H__
-
-#include <plugins/gbp/gbp_types.h>
-#include <plugins/gbp/gbp_itf.h>
-
-#include <vnet/fib/fib_types.h>
-#include <vnet/l2/l2_bd.h>
-
-/**
- * Bridge Domain Flags
- */
-typedef enum gbp_bridge_domain_flags_t_
-{
- GBP_BD_FLAG_NONE = 0,
- GBP_BD_FLAG_DO_NOT_LEARN = (1 << 0),
- GBP_BD_FLAG_UU_FWD_DROP = (1 << 1),
- GBP_BD_FLAG_MCAST_DROP = (1 << 2),
- GBP_BD_FLAG_UCAST_ARP = (1 << 3),
-} gbp_bridge_domain_flags_t;
-
-/**
- * A bridge Domain Representation.
- * This is a standard bridge-domain plus all the attributes it must
- * have to supprt the GBP model.
- */
-typedef struct gbp_bridge_domain_t_
-{
- /**
- * Bridge-domain ID
- */
- u32 gb_bd_id;
- u32 gb_bd_index;
-
- /**
- * Index of the Route-domain this BD is associated with. This is used as the
- * 'scope' of the packets for contract matching.
- */
- u32 gb_rdi;
-
- /**
- * Flags conttrolling behaviour
- */
- gbp_bridge_domain_flags_t gb_flags;
-
- /**
- * The BD's BVI interface (obligatory)
- */
- u32 gb_bvi_sw_if_index;
-
- /**
- * The BD's MAC spine-proxy interface (optional)
- */
- u32 gb_uu_fwd_sw_if_index;
-
- /**
- * The BD's interface to sned Broadcast and multicast packets
- */
- gbp_itf_hdl_t gb_bm_flood_itf;
-
- /**
- * The index of the BD's VNI interface on which packets from
- * unkown endpoints arrive
- */
- u32 gb_vni;
-
- /**
- * locks/references to the BD so it does not get deleted (from the API)
- * whilst it is still being used
- */
- u32 gb_locks;
-} gbp_bridge_domain_t;
-
-extern void gbp_bridge_domain_itf_add (index_t gbdi,
- u32 sw_if_index,
- l2_bd_port_type_t type);
-extern void gbp_bridge_domain_itf_del (index_t gbdi,
- u32 sw_if_index,
- l2_bd_port_type_t type);
-
-extern int gbp_bridge_domain_add_and_lock (u32 bd_id,
- u32 rd_id,
- gbp_bridge_domain_flags_t flags,
- u32 bvi_sw_if_index,
- u32 uu_fwd_sw_if_index,
- u32 bm_flood_sw_if_index);
-
-extern void gbp_bridge_domain_unlock (index_t gbi);
-extern index_t gbp_bridge_domain_find_and_lock (u32 bd_id);
-extern int gbp_bridge_domain_delete (u32 bd_id);
-extern index_t gbp_bridge_domain_index (const gbp_bridge_domain_t *);
-extern u32 gbp_bridge_domain_get_bd_id (index_t gbdi);
-
-typedef int (*gbp_bridge_domain_cb_t) (gbp_bridge_domain_t * gb, void *ctx);
-extern void gbp_bridge_domain_walk (gbp_bridge_domain_cb_t bgpe, void *ctx);
-
-extern u8 *format_gbp_bridge_domain (u8 * s, va_list * args);
-extern u8 *format_gbp_bridge_domain_flags (u8 * s, va_list * args);
-
-/**
- * DB of bridge_domains
- */
-typedef struct gbp_bridge_domain_db_t
-{
- uword *gbd_by_bd_id;
- index_t *gbd_by_bd_index;
-} gbp_bridge_domain_db_t;
-
-extern gbp_bridge_domain_db_t gbp_bridge_domain_db;
-extern gbp_bridge_domain_t *gbp_bridge_domain_pool;
-
-always_inline gbp_bridge_domain_t *
-gbp_bridge_domain_get (index_t i)
-{
- return (pool_elt_at_index (gbp_bridge_domain_pool, i));
-}
-
-always_inline gbp_bridge_domain_t *
-gbp_bridge_domain_get_by_bd_index (u32 bd_index)
-{
- return (gbp_bridge_domain_get
- (gbp_bridge_domain_db.gbd_by_bd_index[bd_index]));
-}
-
-extern gbp_scope_t *gbp_scope_by_bd_index;
-
-always_inline gbp_scope_t
-gbp_bridge_domain_get_scope (u32 bd_index)
-{
- return (gbp_scope_by_bd_index[bd_index]);
-}
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_classify.c b/src/plugins/gbp/gbp_classify.c
deleted file mode 100644
index 255db252871..00000000000
--- a/src/plugins/gbp/gbp_classify.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * gbp.h : Group Based Policy
- *
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_classify.h>
-#include <vnet/l2/l2_input.h>
-
-gbp_src_classify_main_t gbp_src_classify_main;
-
-static clib_error_t *
-gbp_src_classify_init (vlib_main_t * vm)
-{
- gbp_src_classify_main_t *em = &gbp_src_classify_main;
-
- vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "gbp-src-classify");
-
- /* Initialize the feature next-node indexes */
- feat_bitmap_init_next_nodes (vm,
- node->index,
- L2INPUT_N_FEAT,
- l2input_get_feat_names (),
- em->l2_input_feat_next[GBP_SRC_CLASSIFY_NULL]);
-
- node = vlib_get_node_by_name (vm, (u8 *) "gbp-null-classify");
- feat_bitmap_init_next_nodes (vm,
- node->index,
- L2INPUT_N_FEAT,
- l2input_get_feat_names (),
- em->l2_input_feat_next[GBP_SRC_CLASSIFY_PORT]);
-
- node = vlib_get_node_by_name (vm, (u8 *) "l2-gbp-lpm-classify");
- feat_bitmap_init_next_nodes (vm,
- node->index,
- L2INPUT_N_FEAT,
- l2input_get_feat_names (),
- em->l2_input_feat_next[GBP_SRC_CLASSIFY_LPM]);
-
- node = vlib_get_node_by_name (vm, (u8 *) "l2-gbp-lpm-anon-classify");
- feat_bitmap_init_next_nodes (vm,
- node->index,
- L2INPUT_N_FEAT,
- l2input_get_feat_names (),
- em->l2_input_feat_next
- [GBP_SRC_CLASSIFY_LPM_ANON]);
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (gbp_src_classify_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_classify.h b/src/plugins/gbp/gbp_classify.h
deleted file mode 100644
index ca7db94a2c0..00000000000
--- a/src/plugins/gbp/gbp_classify.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * gbp.h : Group Based Policy
- *
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_CLASSIFY_H__
-#define __GBP_CLASSIFY_H__
-
-#include <plugins/gbp/gbp.h>
-#include <vnet/ethernet/arp_packet.h>
-
-typedef enum gbp_src_classify_type_t_
-{
- GBP_SRC_CLASSIFY_NULL,
- GBP_SRC_CLASSIFY_PORT,
- GBP_SRC_CLASSIFY_LPM,
- GBP_SRC_CLASSIFY_LPM_ANON,
- GBP_SRC_N_CLASSIFY
-#define GBP_SRC_N_CLASSIFY GBP_SRC_N_CLASSIFY
-} gbp_src_classify_type_t;
-
-/**
- * Grouping of global data for the GBP source EPG classification feature
- */
-typedef struct gbp_src_classify_main_t_
-{
- /**
- * Next nodes for L2 output features
- */
- u32 l2_input_feat_next[GBP_SRC_N_CLASSIFY][32];
-} gbp_src_classify_main_t;
-
-extern gbp_src_classify_main_t gbp_src_classify_main;
-
-enum gbp_classify_get_ip_way
-{
- GBP_CLASSIFY_GET_IP_SRC = 0,
- GBP_CLASSIFY_GET_IP_DST = 1
-};
-
-static_always_inline dpo_proto_t
-gbp_classify_get_ip_address (const ethernet_header_t * eh0,
- const ip4_address_t ** ip4,
- const ip6_address_t ** ip6,
- const enum gbp_classify_get_ip_way way)
-{
- u16 etype = clib_net_to_host_u16 (eh0->type);
- const void *l3h0 = eh0 + 1;
-
- if (ETHERNET_TYPE_VLAN == etype)
- {
- const ethernet_vlan_header_t *vh0 =
- (ethernet_vlan_header_t *) (eh0 + 1);
- etype = clib_net_to_host_u16 (vh0->type);
- l3h0 = vh0 + 1;
- }
-
- switch (etype)
- {
- case ETHERNET_TYPE_IP4:
- *ip4 = &(&((const ip4_header_t *) l3h0)->src_address)[way];
- return DPO_PROTO_IP4;
- case ETHERNET_TYPE_IP6:
- *ip6 = &(&((const ip6_header_t *) l3h0)->src_address)[way];
- return DPO_PROTO_IP6;
- case ETHERNET_TYPE_ARP:
- *ip4 = &((ethernet_arp_header_t *) l3h0)->ip4_over_ethernet[way].ip4;
- return DPO_PROTO_IP4;
- }
-
- return DPO_PROTO_NONE;
-}
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_classify_node.c b/src/plugins/gbp/gbp_classify_node.c
deleted file mode 100644
index a2058a21284..00000000000
--- a/src/plugins/gbp/gbp_classify_node.c
+++ /dev/null
@@ -1,628 +0,0 @@
-/*
- * gbp.h : Group Based Policy
- *
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_classify.h>
-#include <plugins/gbp/gbp_policy_dpo.h>
-#include <plugins/gbp/gbp_ext_itf.h>
-#include <vnet/fib/ip4_fib.h>
-#include <vnet/fib/ip6_fib.h>
-#include <vnet/dpo/load_balance.h>
-#include <vnet/l2/l2_input.h>
-#include <vnet/l2/feat_bitmap.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/vxlan-gbp/vxlan_gbp_packet.h>
-#include <vnet/ethernet/arp_packet.h>
-
-/**
- * per-packet trace data
- */
-typedef struct gbp_classify_trace_t_
-{
- /* per-pkt trace data */
- sclass_t sclass;
-} gbp_classify_trace_t;
-
-/*
- * determine the SRC EPG form the input port
- */
-always_inline uword
-gbp_classify_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame,
- gbp_src_classify_type_t type, dpo_proto_t dproto)
-{
- gbp_src_classify_main_t *gscm = &gbp_src_classify_main;
- u32 n_left_from, *from, *to_next;
- u32 next_index;
-
- next_index = 0;
- n_left_from = frame->n_vectors;
- from = vlib_frame_vector_args (frame);
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 next0, bi0, sw_if_index0;
- const gbp_endpoint_t *ge0;
- vlib_buffer_t *b0;
- sclass_t sclass0;
-
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
-
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- vnet_buffer2 (b0)->gbp.flags = VXLAN_GBP_GPFLAGS_NONE;
-
- if (GBP_SRC_CLASSIFY_NULL == type)
- {
- sclass0 = SCLASS_INVALID;
- next0 =
- vnet_l2_feature_next (b0, gscm->l2_input_feat_next[type],
- L2INPUT_FEAT_GBP_NULL_CLASSIFY);
- }
- else
- {
- if (DPO_PROTO_ETHERNET == dproto)
- {
- const ethernet_header_t *h0;
-
- h0 = vlib_buffer_get_current (b0);
- next0 =
- vnet_l2_feature_next (b0, gscm->l2_input_feat_next[type],
- L2INPUT_FEAT_GBP_SRC_CLASSIFY);
- ge0 = gbp_endpoint_find_mac (h0->src_address,
- vnet_buffer (b0)->l2.bd_index);
- }
- else if (DPO_PROTO_IP4 == dproto)
- {
- const ip4_header_t *h0;
-
- h0 = vlib_buffer_get_current (b0);
-
- ge0 = gbp_endpoint_find_ip4
- (&h0->src_address,
- fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
- sw_if_index0));
-
-
- /*
- * Go straight to looukp, do not pass go, do not collect $200
- */
- next0 = 0;
- }
- else if (DPO_PROTO_IP6 == dproto)
- {
- const ip6_header_t *h0;
-
- h0 = vlib_buffer_get_current (b0);
-
- ge0 = gbp_endpoint_find_ip6
- (&h0->src_address,
- fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6,
- sw_if_index0));
-
-
- /*
- * Go straight to lookup, do not pass go, do not collect $200
- */
- next0 = 0;
- }
- else
- {
- ge0 = NULL;
- next0 = 0;
- ASSERT (0);
- }
-
- if (PREDICT_TRUE (NULL != ge0))
- sclass0 = ge0->ge_fwd.gef_sclass;
- else
- sclass0 = SCLASS_INVALID;
- }
-
- vnet_buffer2 (b0)->gbp.sclass = sclass0;
-
- if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- gbp_classify_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- t->sclass = sclass0;
- }
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (gbp_src_classify_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_classify_inline (vm, node, frame,
- GBP_SRC_CLASSIFY_PORT, DPO_PROTO_ETHERNET));
-}
-
-VLIB_NODE_FN (gbp_null_classify_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_classify_inline (vm, node, frame,
- GBP_SRC_CLASSIFY_NULL, DPO_PROTO_ETHERNET));
-}
-
-VLIB_NODE_FN (gbp_ip4_src_classify_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_classify_inline (vm, node, frame,
- GBP_SRC_CLASSIFY_PORT, DPO_PROTO_IP4));
-}
-
-VLIB_NODE_FN (gbp_ip6_src_classify_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_classify_inline (vm, node, frame,
- GBP_SRC_CLASSIFY_PORT, DPO_PROTO_IP6));
-}
-
-
-/* packet trace format function */
-static u8 *
-format_gbp_classify_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gbp_classify_trace_t *t = va_arg (*args, gbp_classify_trace_t *);
-
- s = format (s, "sclass:%d", t->sclass);
-
- return s;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gbp_null_classify_node) = {
- .name = "gbp-null-classify",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_classify_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = 0,
- .n_next_nodes = 0,
-};
-
-VLIB_REGISTER_NODE (gbp_src_classify_node) = {
- .name = "gbp-src-classify",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_classify_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = 0,
- .n_next_nodes = 0,
-};
-
-VLIB_REGISTER_NODE (gbp_ip4_src_classify_node) = {
- .name = "ip4-gbp-src-classify",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_classify_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = 0,
- .n_next_nodes = 1,
- .next_nodes = {
- [0] = "ip4-lookup"
- },
-};
-
-VLIB_REGISTER_NODE (gbp_ip6_src_classify_node) = {
- .name = "ip6-gbp-src-classify",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_classify_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = 0,
- .n_next_nodes = 1,
- .next_nodes = {
- [0] = "ip6-lookup"
- },
-};
-
-VNET_FEATURE_INIT (gbp_ip4_src_classify_feat_node, static) =
-{
- .arc_name = "ip4-unicast",
- .node_name = "ip4-gbp-src-classify",
- .runs_before = VNET_FEATURES ("nat44-out2in"),
-};
-VNET_FEATURE_INIT (gbp_ip6_src_classify_feat_node, static) =
-{
- .arc_name = "ip6-unicast",
- .node_name = "ip6-gbp-src-classify",
- .runs_before = VNET_FEATURES ("nat66-out2in"),
-};
-
-/* *INDENT-ON* */
-
-typedef enum gbp_lpm_classify_next_t_
-{
- GPB_LPM_CLASSIFY_DROP,
-} gbp_lpm_classify_next_t;
-
-/**
- * per-packet trace data
- */
-typedef struct gbp_lpm_classify_trace_t_
-{
- sclass_t sclass;
- index_t lbi;
- ip46_address_t src;
-} gbp_lpm_classify_trace_t;
-
-/* packet trace format function */
-static u8 *
-format_gbp_lpm_classify_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gbp_lpm_classify_trace_t *t = va_arg (*args, gbp_lpm_classify_trace_t *);
-
- s = format (s, "sclass:%d lb:%d src:%U",
- t->sclass, t->lbi, format_ip46_address, &t->src, IP46_TYPE_ANY);
-
- return s;
-}
-
-enum gbp_lpm_type
-{
- GBP_LPM_RECIRC,
- GBP_LPM_EPG,
- GBP_LPM_ANON
-};
-
-/*
- * Determine the SRC EPG from a LPM
- */
-always_inline uword
-gbp_lpm_classify_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame,
- const dpo_proto_t dproto,
- const enum gbp_lpm_type type)
-{
- gbp_src_classify_main_t *gscm = &gbp_src_classify_main;
- u32 n_left_from, *from, *to_next;
- u32 next_index;
-
- next_index = 0;
- n_left_from = frame->n_vectors;
- from = vlib_frame_vector_args (frame);
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0, sw_if_index0, fib_index0, lbi0;
- const gbp_endpoint_t *ge0, *ge_lpm0;
- gbp_lpm_classify_next_t next0;
- const ethernet_header_t *eh0;
- const gbp_policy_dpo_t *gpd0;
- const ip4_address_t *ip4_0;
- const ip6_address_t *ip6_0;
- const gbp_recirc_t *gr0;
- vlib_buffer_t *b0;
- sclass_t sclass0;
-
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
- ip4_0 = NULL;
- ip6_0 = NULL;
- next0 = GPB_LPM_CLASSIFY_DROP;
-
- lbi0 = ~0;
- eh0 = NULL;
- b0 = vlib_get_buffer (vm, bi0);
-
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- vnet_buffer2 (b0)->gbp.flags = VXLAN_GBP_GPFLAGS_NONE;
-
- if (DPO_PROTO_IP4 == dproto)
- ip4_0 =
- &((ip4_header_t *) vlib_buffer_get_current (b0))->src_address;
- else if (DPO_PROTO_IP6 == dproto)
- ip6_0 =
- &((ip6_header_t *) vlib_buffer_get_current (b0))->src_address;
- else if (DPO_PROTO_ETHERNET == dproto)
- {
- eh0 = vlib_buffer_get_current (b0);
- gbp_classify_get_ip_address (eh0, &ip4_0, &ip6_0,
- GBP_CLASSIFY_GET_IP_SRC);
- }
-
- if (GBP_LPM_RECIRC == type)
- {
- gr0 = gbp_recirc_get (sw_if_index0);
- fib_index0 = gr0->gr_fib_index[dproto];
- ge0 = NULL;
-
- vnet_feature_next (&next0, b0);
- }
- else
- {
- if (NULL == eh0)
- {
- /* packet should be l2 */
- sclass0 = SCLASS_INVALID;
- goto trace;
- }
-
- if (GBP_LPM_ANON == type)
- {
- /*
- * anonymous LPM classification: only honour LPM as no EP
- * were programmed
- */
- gbp_ext_itf_t *gei = gbp_ext_itf_get (sw_if_index0);
- if (ip4_0)
- fib_index0 = gei->gx_fib_index[DPO_PROTO_IP4];
- else if (ip6_0)
- fib_index0 = gei->gx_fib_index[DPO_PROTO_IP6];
- else
- {
- /* not IP so no LPM classify possible */
- sclass0 = SCLASS_INVALID;
- next0 = GPB_LPM_CLASSIFY_DROP;
- goto trace;
- }
- next0 = vnet_l2_feature_next
- (b0, gscm->l2_input_feat_next[GBP_SRC_CLASSIFY_LPM_ANON],
- L2INPUT_FEAT_GBP_LPM_ANON_CLASSIFY);
- }
- else
- {
- /*
- * not an anonymous LPM classification: check it comes from
- * an EP, and use EP RD info
- */
- ge0 = gbp_endpoint_find_mac (eh0->src_address,
- vnet_buffer (b0)->l2.bd_index);
-
- if (NULL == ge0)
- {
- /* packet must have come from an EP's mac */
- sclass0 = SCLASS_INVALID;
- goto trace;
- }
-
- fib_index0 = ge0->ge_fwd.gef_fib_index;
-
- if (~0 == fib_index0)
- {
- sclass0 = SCLASS_INVALID;
- goto trace;
- }
-
- if (ip4_0)
- {
- ge_lpm0 = gbp_endpoint_find_ip4 (ip4_0, fib_index0);
- }
- else if (ip6_0)
- {
- ge_lpm0 = gbp_endpoint_find_ip6 (ip6_0, fib_index0);
- }
- else
- {
- ge_lpm0 = NULL;
- }
-
- next0 = vnet_l2_feature_next
- (b0, gscm->l2_input_feat_next[GBP_SRC_CLASSIFY_LPM],
- L2INPUT_FEAT_GBP_LPM_CLASSIFY);
-
- /*
- * if we found the EP by IP lookup, it must be from the EP
- * not a network behind it
- */
- if (NULL != ge_lpm0)
- {
- if (PREDICT_FALSE (ge0 != ge_lpm0))
- {
- /* an EP spoofing another EP */
- sclass0 = SCLASS_INVALID;
- next0 = GPB_LPM_CLASSIFY_DROP;
- }
- else
- {
- sclass0 = ge0->ge_fwd.gef_sclass;
- }
- goto trace;
- }
- }
- }
-
- gpd0 = gbp_classify_get_gpd (ip4_0, ip6_0, fib_index0);
- if (0 == gpd0)
- {
- /* could not classify => drop */
- sclass0 = SCLASS_INVALID;
- next0 = GPB_LPM_CLASSIFY_DROP;
- goto trace;
- }
-
- sclass0 = gpd0->gpd_sclass;
-
- /* all packets from an external network should not be learned by the
- * reciever. so set the Do-not-learn bit here */
- vnet_buffer2 (b0)->gbp.flags = VXLAN_GBP_GPFLAGS_D;
-
- trace:
- vnet_buffer2 (b0)->gbp.sclass = sclass0;
-
- if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- gbp_lpm_classify_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- t->sclass = sclass0;
- t->lbi = lbi0;
- if (ip4_0)
- t->src.ip4 = *ip4_0;
- if (ip6_0)
- t->src.ip6 = *ip6_0;
- }
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (gbp_ip4_lpm_classify_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_lpm_classify_inline
- (vm, node, frame, DPO_PROTO_IP4, GBP_LPM_RECIRC));
-}
-
-VLIB_NODE_FN (gbp_ip6_lpm_classify_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_lpm_classify_inline
- (vm, node, frame, DPO_PROTO_IP6, GBP_LPM_RECIRC));
-}
-
-VLIB_NODE_FN (gbp_l2_lpm_classify_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_lpm_classify_inline
- (vm, node, frame, DPO_PROTO_ETHERNET, GBP_LPM_EPG));
-}
-
-VLIB_NODE_FN (gbp_l2_lpm_anon_classify_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_lpm_classify_inline
- (vm, node, frame, DPO_PROTO_ETHERNET, GBP_LPM_ANON));
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gbp_ip4_lpm_classify_node) = {
- .name = "ip4-gbp-lpm-classify",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_lpm_classify_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = 0,
- .n_next_nodes = 1,
- .next_nodes = {
- [GPB_LPM_CLASSIFY_DROP] = "ip4-drop"
- },
-};
-
-VLIB_REGISTER_NODE (gbp_ip6_lpm_classify_node) = {
- .name = "ip6-gbp-lpm-classify",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_lpm_classify_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = 0,
- .n_next_nodes = 1,
- .next_nodes = {
- [GPB_LPM_CLASSIFY_DROP] = "ip6-drop"
- },
-};
-
-VLIB_REGISTER_NODE (gbp_l2_lpm_classify_node) = {
- .name = "l2-gbp-lpm-classify",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_lpm_classify_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = 0,
- .n_next_nodes = 1,
- .next_nodes = {
- [GPB_LPM_CLASSIFY_DROP] = "error-drop"
- },
-};
-
-VLIB_REGISTER_NODE (gbp_l2_lpm_anon_classify_node) = {
- .name = "l2-gbp-lpm-anon-classify",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_lpm_classify_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = 0,
- .n_next_nodes = 1,
- .next_nodes = {
- [GPB_LPM_CLASSIFY_DROP] = "error-drop"
- },
-};
-
-VNET_FEATURE_INIT (gbp_ip4_lpm_classify_feat_node, static) =
-{
- .arc_name = "ip4-unicast",
- .node_name = "ip4-gbp-lpm-classify",
- .runs_before = VNET_FEATURES ("nat44-out2in"),
-};
-VNET_FEATURE_INIT (gbp_ip6_lpm_classify_feat_node, static) =
-{
- .arc_name = "ip6-unicast",
- .node_name = "ip6-gbp-lpm-classify",
- .runs_before = VNET_FEATURES ("nat66-out2in"),
-};
-
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_contract.c b/src/plugins/gbp/gbp_contract.c
deleted file mode 100644
index dd433f28a84..00000000000
--- a/src/plugins/gbp/gbp_contract.c
+++ /dev/null
@@ -1,819 +0,0 @@
-/*
- * gbp.h : Group Based Policy
- *
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <plugins/gbp/gbp_route_domain.h>
-#include <plugins/gbp/gbp_policy_dpo.h>
-#include <plugins/gbp/gbp_contract.h>
-
-#include <vnet/dpo/load_balance.h>
-#include <vnet/dpo/drop_dpo.h>
-
-char *gbp_contract_error_strings[] = {
-#define _(sym,string) string,
- foreach_gbp_contract_error
-#undef _
-};
-
-/**
- * Single contract DB instance
- */
-gbp_contract_db_t gbp_contract_db;
-
-gbp_contract_t *gbp_contract_pool;
-
-vlib_log_class_t gc_logger;
-
-fib_node_type_t gbp_next_hop_fib_type;
-
-gbp_rule_t *gbp_rule_pool;
-gbp_next_hop_t *gbp_next_hop_pool;
-
-#define GBP_CONTRACT_DBG(...) \
- vlib_log_notice (gc_logger, __VA_ARGS__);
-
-/* Adjacency packet/byte counters indexed by adjacency index. */
-vlib_combined_counter_main_t gbp_contract_permit_counters = {
- .name = "gbp-contracts-permit",
- .stat_segment_name = "/net/gbp/contract/permit",
-};
-
-vlib_combined_counter_main_t gbp_contract_drop_counters = {
- .name = "gbp-contracts-drop",
- .stat_segment_name = "/net/gbp/contract/drop",
-};
-
-index_t
-gbp_rule_alloc (gbp_rule_action_t action,
- gbp_hash_mode_t hash_mode, index_t * nhs)
-{
- gbp_rule_t *gu;
-
- pool_get_zero (gbp_rule_pool, gu);
-
- gu->gu_hash_mode = hash_mode;
- gu->gu_nhs = nhs;
- gu->gu_action = action;
-
- return (gu - gbp_rule_pool);
-}
-
-void
-gbp_rule_free (index_t gui)
-{
- pool_put_index (gbp_rule_pool, gui);
-}
-
-index_t
-gbp_next_hop_alloc (const ip46_address_t * ip,
- index_t grd, const mac_address_t * mac, index_t gbd)
-{
- fib_protocol_t fproto;
- gbp_next_hop_t *gnh;
-
- pool_get_zero (gbp_next_hop_pool, gnh);
-
- fib_node_init (&gnh->gnh_node, gbp_next_hop_fib_type);
-
- ip46_address_copy (&gnh->gnh_ip, ip);
- mac_address_copy (&gnh->gnh_mac, mac);
-
- gnh->gnh_rd = grd;
- gnh->gnh_bd = gbd;
-
- FOR_EACH_FIB_IP_PROTOCOL (fproto) gnh->gnh_ai[fproto] = INDEX_INVALID;
-
- return (gnh - gbp_next_hop_pool);
-}
-
-static inline gbp_next_hop_t *
-gbp_next_hop_get (index_t gui)
-{
- return (pool_elt_at_index (gbp_next_hop_pool, gui));
-}
-
-static void
-gbp_contract_rules_free (index_t * rules)
-{
- index_t *gui, *gnhi;
-
- vec_foreach (gui, rules)
- {
- gbp_policy_node_t pnode;
- fib_protocol_t fproto;
- gbp_next_hop_t *gnh;
- gbp_rule_t *gu;
-
- gu = gbp_rule_get (*gui);
-
- FOR_EACH_GBP_POLICY_NODE (pnode)
- {
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- {
- dpo_reset (&gu->gu_dpo[pnode][fproto]);
- dpo_reset (&gu->gu_dpo[pnode][fproto]);
- }
- }
-
- vec_foreach (gnhi, gu->gu_nhs)
- {
- fib_protocol_t fproto;
-
- gnh = gbp_next_hop_get (*gnhi);
- gbp_bridge_domain_unlock (gnh->gnh_bd);
- gbp_route_domain_unlock (gnh->gnh_rd);
- gbp_endpoint_child_remove (gnh->gnh_ge, gnh->gnh_sibling);
- gbp_endpoint_unlock (GBP_ENDPOINT_SRC_RR, gnh->gnh_ge);
-
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- {
- adj_unlock (gnh->gnh_ai[fproto]);
- }
- }
-
- gbp_rule_free (*gui);
- }
- vec_free (rules);
-}
-
-static u8 *
-format_gbp_next_hop (u8 * s, va_list * args)
-{
- index_t gnhi = va_arg (*args, index_t);
- gbp_next_hop_t *gnh;
-
- gnh = gbp_next_hop_get (gnhi);
-
- s = format (s, "%U, %U, %U EP:%d",
- format_mac_address_t, &gnh->gnh_mac,
- format_gbp_bridge_domain, gnh->gnh_bd,
- format_ip46_address, &gnh->gnh_ip, IP46_TYPE_ANY, gnh->gnh_ge);
-
- return (s);
-}
-
-u8 *
-format_gbp_rule_action (u8 * s, va_list * args)
-{
- gbp_rule_action_t action = va_arg (*args, gbp_rule_action_t);
-
- switch (action)
- {
-#define _(v,a) case GBP_RULE_##v: return (format (s, "%s", a));
- foreach_gbp_rule_action
-#undef _
- }
-
- return (format (s, "unknown"));
-}
-
-static u8 *
-format_gbp_hash_mode (u8 * s, va_list * args)
-{
- gbp_hash_mode_t hash_mode = va_arg (*args, gbp_hash_mode_t);
-
- switch (hash_mode)
- {
-#define _(v,a) case GBP_HASH_MODE_##v: return (format (s, "%s", a));
- foreach_gbp_hash_mode
-#undef _
- }
-
- return (format (s, "unknown"));
-}
-
-static u8 *
-format_gbp_policy_node (u8 * s, va_list * args)
-{
- gbp_policy_node_t action = va_arg (*args, gbp_policy_node_t);
-
- switch (action)
- {
-#define _(v,a) case GBP_POLICY_NODE_##v: return (format (s, "%s", a));
- foreach_gbp_policy_node
-#undef _
- }
-
- return (format (s, "unknown"));
-}
-
-static u8 *
-format_gbp_rule (u8 * s, va_list * args)
-{
- index_t gui = va_arg (*args, index_t);
- gbp_policy_node_t pnode;
- fib_protocol_t fproto;
- gbp_rule_t *gu;
- index_t *gnhi;
-
- gu = gbp_rule_get (gui);
- s = format (s, "%U", format_gbp_rule_action, gu->gu_action);
-
- switch (gu->gu_action)
- {
- case GBP_RULE_PERMIT:
- case GBP_RULE_DENY:
- return (s);
- case GBP_RULE_REDIRECT:
- s = format (s, ", %U", format_gbp_hash_mode, gu->gu_hash_mode);
- break;
- }
-
- vec_foreach (gnhi, gu->gu_nhs)
- {
- s = format (s, "\n [%U]", format_gbp_next_hop, *gnhi);
- }
-
- FOR_EACH_GBP_POLICY_NODE (pnode)
- {
- s = format (s, "\n policy-%U", format_gbp_policy_node, pnode);
-
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- {
- if (dpo_id_is_valid (&gu->gu_dpo[pnode][fproto]))
- {
- s =
- format (s, "\n %U", format_dpo_id,
- &gu->gu_dpo[pnode][fproto], 8);
- }
- }
- }
-
- return (s);
-}
-
-static void
-gbp_contract_mk_adj (gbp_next_hop_t * gnh, fib_protocol_t fproto)
-{
- ethernet_header_t *eth;
- gbp_endpoint_t *ge;
- index_t old_ai;
- u8 *rewrite;
-
- old_ai = gnh->gnh_ai[fproto];
- rewrite = NULL;
- vec_validate (rewrite, sizeof (*eth) - 1);
- eth = (ethernet_header_t *) rewrite;
-
- GBP_CONTRACT_DBG ("...mk-adj: %U", format_gbp_next_hop,
- gnh - gbp_next_hop_pool);
-
- ge = gbp_endpoint_get (gnh->gnh_ge);
-
- eth->type = clib_host_to_net_u16 ((fproto == FIB_PROTOCOL_IP4 ?
- ETHERNET_TYPE_IP4 : ETHERNET_TYPE_IP6));
- mac_address_to_bytes (gbp_route_domain_get_local_mac (), eth->src_address);
- mac_address_to_bytes (&gnh->gnh_mac, eth->dst_address);
-
- gnh->gnh_ai[fproto] =
- adj_nbr_add_or_lock_w_rewrite (fproto,
- fib_proto_to_link (fproto),
- &gnh->gnh_ip,
- gbp_itf_get_sw_if_index (ge->
- ge_fwd.gef_itf),
- rewrite);
-
- adj_unlock (old_ai);
-}
-
-static flow_hash_config_t
-gbp_contract_mk_lb_hp (gbp_hash_mode_t gu_hash_mode)
-{
- switch (gu_hash_mode)
- {
- case GBP_HASH_MODE_SRC_IP:
- return IP_FLOW_HASH_SRC_ADDR;
- case GBP_HASH_MODE_DST_IP:
- return IP_FLOW_HASH_DST_ADDR;
- case GBP_HASH_MODE_SYMMETRIC:
- return (IP_FLOW_HASH_SRC_ADDR | IP_FLOW_HASH_DST_ADDR |
- IP_FLOW_HASH_PROTO | IP_FLOW_HASH_SYMMETRIC);
- }
-
- return 0;
-}
-
-static void
-gbp_contract_mk_lb (index_t gui, fib_protocol_t fproto)
-{
- load_balance_path_t *paths = NULL;
- gbp_policy_node_t pnode;
- gbp_next_hop_t *gnh;
- dpo_proto_t dproto;
- gbp_rule_t *gu;
- u32 ii;
-
- u32 policy_nodes[] = {
- [GBP_POLICY_NODE_L2] = gbp_policy_port_node.index,
- [GBP_POLICY_NODE_IP4] = ip4_gbp_policy_dpo_node.index,
- [GBP_POLICY_NODE_IP6] = ip6_gbp_policy_dpo_node.index,
- };
-
- GBP_CONTRACT_DBG ("..mk-lb: %U", format_gbp_rule, gui);
-
- gu = gbp_rule_get (gui);
- dproto = fib_proto_to_dpo (fproto);
-
- if (GBP_RULE_REDIRECT != gu->gu_action)
- return;
-
- vec_foreach_index (ii, gu->gu_nhs)
- {
- gnh = gbp_next_hop_get (gu->gu_nhs[ii]);
-
- gbp_contract_mk_adj (gnh, FIB_PROTOCOL_IP4);
- gbp_contract_mk_adj (gnh, FIB_PROTOCOL_IP6);
- }
-
- FOR_EACH_GBP_POLICY_NODE (pnode)
- {
- vec_validate (paths, vec_len (gu->gu_nhs) - 1);
-
- vec_foreach_index (ii, gu->gu_nhs)
- {
- gnh = gbp_next_hop_get (gu->gu_nhs[ii]);
-
- paths[ii].path_index = FIB_NODE_INDEX_INVALID;
- paths[ii].path_weight = 1;
- dpo_set (&paths[ii].path_dpo, DPO_ADJACENCY,
- dproto, gnh->gnh_ai[fproto]);
- }
-
- if (!dpo_id_is_valid (&gu->gu_dpo[pnode][fproto]))
- {
- dpo_id_t dpo = DPO_INVALID;
-
- dpo_set (&dpo, DPO_LOAD_BALANCE, dproto,
- load_balance_create (vec_len (paths),
- dproto,
- gbp_contract_mk_lb_hp
- (gu->gu_hash_mode)));
- dpo_stack_from_node (policy_nodes[pnode], &gu->gu_dpo[pnode][fproto],
- &dpo);
- dpo_reset (&dpo);
- }
-
- load_balance_multipath_update (&gu->gu_dpo[pnode][fproto],
- paths, LOAD_BALANCE_FLAG_NONE);
- vec_free (paths);
- }
-}
-
-static void
-gbp_contract_mk_one_lb (index_t gui)
-{
- gbp_contract_mk_lb (gui, FIB_PROTOCOL_IP4);
- gbp_contract_mk_lb (gui, FIB_PROTOCOL_IP6);
-}
-
-static int
-gbp_contract_next_hop_resolve (index_t gui, index_t gnhi)
-{
- gbp_bridge_domain_t *gbd;
- gbp_next_hop_t *gnh;
- ip46_address_t *ips;
- int rv;
-
- ips = NULL;
- gnh = gbp_next_hop_get (gnhi);
- gbd = gbp_bridge_domain_get (gnh->gnh_bd);
-
- gnh->gnh_gu = gui;
- vec_add1 (ips, gnh->gnh_ip);
-
- /*
- * source the endpoint this contract needs to forward via.
- * give ofrwarding details via the spine proxy. if this EP is known
- * to us, then since we source here with a low priority, the learned
- * info will take precedenc.
- */
- rv = gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_RR,
- gbd->gb_uu_fwd_sw_if_index,
- ips,
- &gnh->gnh_mac,
- gnh->gnh_bd, gnh->gnh_rd, SCLASS_INVALID,
- GBP_ENDPOINT_FLAG_NONE, NULL, NULL,
- &gnh->gnh_ge);
-
- if (0 == rv)
- {
- gnh->gnh_sibling = gbp_endpoint_child_add (gnh->gnh_ge,
- gbp_next_hop_fib_type, gnhi);
- }
-
- GBP_CONTRACT_DBG ("..resolve: %d: %d: %U", gui, gnhi, format_gbp_next_hop,
- gnhi);
-
- vec_free (ips);
- return (rv);
-}
-
-static void
-gbp_contract_rule_resolve (index_t gui)
-{
- gbp_rule_t *gu;
- index_t *gnhi;
-
- gu = gbp_rule_get (gui);
-
- GBP_CONTRACT_DBG ("..resolve: %U", format_gbp_rule, gui);
-
- vec_foreach (gnhi, gu->gu_nhs)
- {
- gbp_contract_next_hop_resolve (gui, *gnhi);
- }
-}
-
-static void
-gbp_contract_resolve (index_t * guis)
-{
- index_t *gui;
-
- vec_foreach (gui, guis)
- {
- gbp_contract_rule_resolve (*gui);
- }
-}
-
-static void
-gbp_contract_mk_lbs (index_t * guis)
-{
- index_t *gui;
-
- vec_foreach (gui, guis)
- {
- gbp_contract_mk_one_lb (*gui);
- }
-}
-
-int
-gbp_contract_update (gbp_scope_t scope,
- sclass_t sclass,
- sclass_t dclass,
- u32 acl_index,
- index_t * rules,
- u16 * allowed_ethertypes, u32 * stats_index)
-{
- gbp_main_t *gm = &gbp_main;
- u32 *acl_vec = NULL;
- gbp_contract_t *gc;
- index_t gci;
- uword *p;
-
- gbp_contract_key_t key = {
- .gck_scope = scope,
- .gck_src = sclass,
- .gck_dst = dclass,
- };
-
- if (~0 == gm->gbp_acl_user_id)
- {
- acl_plugin_exports_init (&gm->acl_plugin);
- gm->gbp_acl_user_id =
- gm->acl_plugin.register_user_module ("GBP ACL", "src-epg", "dst-epg");
- }
-
- p = hash_get (gbp_contract_db.gc_hash, key.as_u64);
- if (p != NULL)
- {
- gci = p[0];
- gc = gbp_contract_get (gci);
- gbp_contract_rules_free (gc->gc_rules);
- gbp_main.acl_plugin.put_lookup_context_index (gc->gc_lc_index);
- gc->gc_rules = NULL;
- vec_free (gc->gc_allowed_ethertypes);
- }
- else
- {
- pool_get_zero (gbp_contract_pool, gc);
- gc->gc_key = key;
- gci = gc - gbp_contract_pool;
- hash_set (gbp_contract_db.gc_hash, key.as_u64, gci);
-
- vlib_validate_combined_counter (&gbp_contract_drop_counters, gci);
- vlib_zero_combined_counter (&gbp_contract_drop_counters, gci);
- vlib_validate_combined_counter (&gbp_contract_permit_counters, gci);
- vlib_zero_combined_counter (&gbp_contract_permit_counters, gci);
- }
-
- GBP_CONTRACT_DBG ("update: %U", format_gbp_contract, gci);
-
- gc->gc_rules = rules;
- gc->gc_allowed_ethertypes = allowed_ethertypes;
- gbp_contract_resolve (gc->gc_rules);
- gbp_contract_mk_lbs (gc->gc_rules);
-
- gc->gc_acl_index = acl_index;
- gc->gc_lc_index =
- gm->acl_plugin.get_lookup_context_index (gm->gbp_acl_user_id,
- sclass, dclass);
-
- vec_add1 (acl_vec, gc->gc_acl_index);
- gm->acl_plugin.set_acl_vec_for_context (gc->gc_lc_index, acl_vec);
- vec_free (acl_vec);
-
- *stats_index = gci;
-
- return (0);
-}
-
-int
-gbp_contract_delete (gbp_scope_t scope, sclass_t sclass, sclass_t dclass)
-{
- gbp_contract_key_t key = {
- .gck_scope = scope,
- .gck_src = sclass,
- .gck_dst = dclass,
- };
- gbp_contract_t *gc;
- uword *p;
-
- p = hash_get (gbp_contract_db.gc_hash, key.as_u64);
- if (p != NULL)
- {
- gc = gbp_contract_get (p[0]);
-
- gbp_contract_rules_free (gc->gc_rules);
- gbp_main.acl_plugin.put_lookup_context_index (gc->gc_lc_index);
- vec_free (gc->gc_allowed_ethertypes);
-
- hash_unset (gbp_contract_db.gc_hash, key.as_u64);
- pool_put (gbp_contract_pool, gc);
-
- return (0);
- }
-
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
-}
-
-void
-gbp_contract_walk (gbp_contract_cb_t cb, void *ctx)
-{
- gbp_contract_t *gc;
-
- /* *INDENT-OFF* */
- pool_foreach (gc, gbp_contract_pool)
- {
- if (!cb(gc, ctx))
- break;
- }
- /* *INDENT-ON* */
-}
-
-static clib_error_t *
-gbp_contract_cli (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- sclass_t sclass = SCLASS_INVALID, dclass = SCLASS_INVALID;
- u32 acl_index = ~0, stats_index, scope;
- u8 add = 1;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "add"))
- add = 1;
- else if (unformat (input, "del"))
- add = 0;
- else if (unformat (input, "scope %d", &scope))
- ;
- else if (unformat (input, "sclass %d", &sclass))
- ;
- else if (unformat (input, "dclass %d", &dclass))
- ;
- else if (unformat (input, "acl-index %d", &acl_index))
- ;
- else
- break;
- }
-
- if (SCLASS_INVALID == sclass)
- return clib_error_return (0, "Source EPG-ID must be specified");
- if (SCLASS_INVALID == dclass)
- return clib_error_return (0, "Destination EPG-ID must be specified");
-
- if (add)
- {
- gbp_contract_update (scope, sclass, dclass, acl_index,
- NULL, NULL, &stats_index);
- }
- else
- {
- gbp_contract_delete (scope, sclass, dclass);
- }
-
- return (NULL);
-}
-
-/*?
- * Configure a GBP Contract
- *
- * @cliexpar
- * @cliexstart{set gbp contract [del] src-epg <ID> dst-epg <ID> acl-index <ACL>}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_contract_cli_node, static) =
-{
- .path = "gbp contract",
- .short_help =
- "gbp contract [del] src-epg <ID> dst-epg <ID> acl-index <ACL>",
- .function = gbp_contract_cli,
-};
-/* *INDENT-ON* */
-
-static u8 *
-format_gbp_contract_key (u8 * s, va_list * args)
-{
- gbp_contract_key_t *gck = va_arg (*args, gbp_contract_key_t *);
-
- s = format (s, "{%d,%d,%d}", gck->gck_scope, gck->gck_src, gck->gck_dst);
-
- return (s);
-}
-
-u8 *
-format_gbp_contract (u8 * s, va_list * args)
-{
- index_t gci = va_arg (*args, index_t);
- vlib_counter_t counts;
- gbp_contract_t *gc;
- index_t *gui;
- u16 *et;
-
- gc = gbp_contract_get (gci);
-
- s = format (s, "[%d] %U: acl-index:%d",
- gci, format_gbp_contract_key, &gc->gc_key, gc->gc_acl_index);
-
- s = format (s, "\n rules:");
- vec_foreach (gui, gc->gc_rules)
- {
- s = format (s, "\n %d: %U", *gui, format_gbp_rule, *gui);
- }
-
- s = format (s, "\n allowed-ethertypes:");
- s = format (s, "\n [");
- vec_foreach (et, gc->gc_allowed_ethertypes)
- {
- int host_et = clib_net_to_host_u16 (*et);
- if (0 != host_et)
- s = format (s, "0x%x, ", host_et);
- }
- s = format (s, "]");
-
- s = format (s, "\n stats:");
- vlib_get_combined_counter (&gbp_contract_drop_counters, gci, &counts);
- s = format (s, "\n drop:[%Ld:%Ld]", counts.packets, counts.bytes);
- vlib_get_combined_counter (&gbp_contract_permit_counters, gci, &counts);
- s = format (s, "\n permit:[%Ld:%Ld]", counts.packets, counts.bytes);
-
- s = format (s, "]");
-
- return (s);
-}
-
-static clib_error_t *
-gbp_contract_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- gbp_contract_t *gc;
- u32 src, dst;
- index_t gci;
-
- src = dst = SCLASS_INVALID;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "src %d", &src))
- ;
- else if (unformat (input, "dst %d", &dst))
- ;
- else
- break;
- }
-
- vlib_cli_output (vm, "Contracts:");
-
- /* *INDENT-OFF* */
- pool_foreach (gc, gbp_contract_pool)
- {
- gci = gc - gbp_contract_pool;
-
- if (SCLASS_INVALID != src && SCLASS_INVALID != dst)
- {
- if (gc->gc_key.gck_src == src &&
- gc->gc_key.gck_dst == dst)
- vlib_cli_output (vm, " %U", format_gbp_contract, gci);
- }
- else if (SCLASS_INVALID != src)
- {
- if (gc->gc_key.gck_src == src)
- vlib_cli_output (vm, " %U", format_gbp_contract, gci);
- }
- else if (SCLASS_INVALID != dst)
- {
- if (gc->gc_key.gck_dst == dst)
- vlib_cli_output (vm, " %U", format_gbp_contract, gci);
- }
- else
- vlib_cli_output (vm, " %U", format_gbp_contract, gci);
- }
- /* *INDENT-ON* */
-
- return (NULL);
-}
-
-/*?
- * Show Group Based Policy Contracts
- *
- * @cliexpar
- * @cliexstart{show gbp contract}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_contract_show_node, static) = {
- .path = "show gbp contract",
- .short_help = "show gbp contract [src <SRC>] [dst <DST>]\n",
- .function = gbp_contract_show,
-};
-/* *INDENT-ON* */
-
-static fib_node_t *
-gbp_next_hop_get_node (fib_node_index_t index)
-{
- gbp_next_hop_t *gnh;
-
- gnh = gbp_next_hop_get (index);
-
- return (&gnh->gnh_node);
-}
-
-static void
-gbp_next_hop_last_lock_gone (fib_node_t * node)
-{
- ASSERT (0);
-}
-
-static gbp_next_hop_t *
-gbp_next_hop_from_fib_node (fib_node_t * node)
-{
- ASSERT (gbp_next_hop_fib_type == node->fn_type);
- return ((gbp_next_hop_t *) node);
-}
-
-static fib_node_back_walk_rc_t
-gbp_next_hop_back_walk_notify (fib_node_t * node,
- fib_node_back_walk_ctx_t * ctx)
-{
- gbp_next_hop_t *gnh;
-
- gnh = gbp_next_hop_from_fib_node (node);
-
- gbp_contract_mk_one_lb (gnh->gnh_gu);
-
- return (FIB_NODE_BACK_WALK_CONTINUE);
-}
-
-/*
- * The FIB path's graph node virtual function table
- */
-static const fib_node_vft_t gbp_next_hop_vft = {
- .fnv_get = gbp_next_hop_get_node,
- .fnv_last_lock = gbp_next_hop_last_lock_gone,
- .fnv_back_walk = gbp_next_hop_back_walk_notify,
- // .fnv_mem_show = fib_path_memory_show,
-};
-
-static clib_error_t *
-gbp_contract_init (vlib_main_t * vm)
-{
- gc_logger = vlib_log_register_class ("gbp", "con");
- gbp_next_hop_fib_type = fib_node_register_new_type (&gbp_next_hop_vft);
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_contract_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_contract.h b/src/plugins/gbp/gbp_contract.h
deleted file mode 100644
index 1e74db60116..00000000000
--- a/src/plugins/gbp/gbp_contract.h
+++ /dev/null
@@ -1,362 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_CONTRACT_H__
-#define __GBP_CONTRACT_H__
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_types.h>
-
-#define foreach_gbp_contract_error \
- _(ALLOW_NO_SCLASS, "allow-no-sclass") \
- _(ALLOW_INTRA, "allow-intra-sclass") \
- _(ALLOW_A_BIT, "allow-a-bit-set") \
- _(ALLOW_SCLASS_1, "allow-sclass-1") \
- _(ALLOW_CONTRACT, "allow-contract") \
- _(DROP_CONTRACT, "drop-contract") \
- _(DROP_ETHER_TYPE, "drop-ether-type") \
- _(DROP_NO_CONTRACT, "drop-no-contract") \
- _(DROP_NO_DCLASS, "drop-no-dclass") \
- _(DROP_NO_RULE, "drop-no-rule")
-
-typedef enum
-{
-#define _(sym,str) GBP_CONTRACT_ERROR_##sym,
- foreach_gbp_contract_error
-#undef _
- GBP_CONTRACT_N_ERROR,
-#define GBP_CONTRACT_N_ERROR GBP_CONTRACT_N_ERROR
-} gbp_contract_error_t;
-
-extern char *gbp_contract_error_strings[GBP_CONTRACT_N_ERROR];
-
-/**
- * The key for an Contract
- */
-typedef struct gbp_contract_key_t_
-{
- union
- {
- struct
- {
- gbp_scope_t gck_scope;
- /**
- * source and destination EPGs for which the ACL applies
- */
- sclass_t gck_src;
- sclass_t gck_dst;
- };
- u64 as_u64;
- };
-} gbp_contract_key_t;
-
-typedef struct gbp_next_hop_t_
-{
- fib_node_t gnh_node;
- ip46_address_t gnh_ip;
- mac_address_t gnh_mac;
- index_t gnh_gu;
- index_t gnh_bd;
- index_t gnh_rd;
- u32 gnh_ge;
- u32 gnh_sibling;
- index_t gnh_ai[FIB_PROTOCOL_IP_MAX];
-} gbp_next_hop_t;
-
-#define foreach_gbp_hash_mode \
- _(SRC_IP, "src-ip") \
- _(DST_IP, "dst-ip") \
- _(SYMMETRIC, "symmetric")
-
-typedef enum gbp_hash_mode_t_
-{
-#define _(v,s) GBP_HASH_MODE_##v,
- foreach_gbp_hash_mode
-#undef _
-} gbp_hash_mode_t;
-
-#define foreach_gbp_rule_action \
- _(PERMIT, "permit") \
- _(DENY, "deny") \
- _(REDIRECT, "redirect")
-
-typedef enum gbp_rule_action_t_
-{
-#define _(v,s) GBP_RULE_##v,
- foreach_gbp_rule_action
-#undef _
-} gbp_rule_action_t;
-
-#define foreach_gbp_policy_node \
- _(L2, "L2") \
- _(IP4, "ip4") \
- _(IP6, "ip6")
-
-typedef enum gbp_policy_node_t_
-{
-#define _(v,s) GBP_POLICY_NODE_##v,
- foreach_gbp_policy_node
-#undef _
-} gbp_policy_node_t;
-#define GBP_POLICY_N_NODES (GBP_POLICY_NODE_IP6+1)
-
-#define FOR_EACH_GBP_POLICY_NODE(pnode) \
- for (pnode = GBP_POLICY_NODE_L2; pnode < GBP_POLICY_N_NODES; pnode++)
-
-typedef struct gbp_rule_t_
-{
- gbp_rule_action_t gu_action;
- gbp_hash_mode_t gu_hash_mode;
- index_t *gu_nhs;
-
- /**
- * DPO of the load-balance object used to redirect
- */
- dpo_id_t gu_dpo[GBP_POLICY_N_NODES][FIB_PROTOCOL_IP_MAX];
-} gbp_rule_t;
-
-/**
- * A Group Based Policy Contract.
- * Determines the ACL that applies to traffic pass between two endpoint groups
- */
-typedef struct gbp_contract_t_
-{
- /**
- * source and destination EPGs
- */
- gbp_contract_key_t gc_key;
-
- u32 gc_acl_index;
- u32 gc_lc_index;
-
- /**
- * The ACL to apply for packets from the source to the destination EPG
- */
- index_t *gc_rules;
-
- /**
- * An ethertype whitelist
- */
- u16 *gc_allowed_ethertypes;
-} gbp_contract_t;
-
-/**
- * EPG src,dst pair to ACL mapping table, aka contract DB
- */
-typedef struct gbp_contract_db_t_
-{
- /**
- * We can form a u64 key from the pair, so use a simple hash table
- */
- uword *gc_hash;
-} gbp_contract_db_t;
-
-extern int gbp_contract_update (gbp_scope_t scope,
- sclass_t sclass,
- sclass_t dclass,
- u32 acl_index,
- index_t * rules,
- u16 * allowed_ethertypes, u32 * stats_index);
-extern int gbp_contract_delete (gbp_scope_t scope, sclass_t sclass,
- sclass_t dclass);
-
-extern index_t gbp_rule_alloc (gbp_rule_action_t action,
- gbp_hash_mode_t hash_mode, index_t * nhs);
-extern void gbp_rule_free (index_t gui);
-extern index_t gbp_next_hop_alloc (const ip46_address_t * ip,
- index_t grd,
- const mac_address_t * mac, index_t gbd);
-
-typedef int (*gbp_contract_cb_t) (gbp_contract_t * gbpe, void *ctx);
-extern void gbp_contract_walk (gbp_contract_cb_t bgpe, void *ctx);
-
-extern u8 *format_gbp_rule_action (u8 * s, va_list * args);
-extern u8 *format_gbp_contract (u8 * s, va_list * args);
-
-/**
- * DP functions and databases
- */
-extern gbp_contract_db_t gbp_contract_db;
-
-always_inline index_t
-gbp_contract_find (gbp_contract_key_t * key)
-{
- uword *p;
-
- p = hash_get (gbp_contract_db.gc_hash, key->as_u64);
-
- if (NULL != p)
- return (p[0]);
-
- return (INDEX_INVALID);
-}
-
-extern gbp_contract_t *gbp_contract_pool;
-
-always_inline gbp_contract_t *
-gbp_contract_get (index_t gci)
-{
- return (pool_elt_at_index (gbp_contract_pool, gci));
-}
-
-extern gbp_rule_t *gbp_rule_pool;
-
-always_inline gbp_rule_t *
-gbp_rule_get (index_t gui)
-{
- return (pool_elt_at_index (gbp_rule_pool, gui));
-}
-
-extern vlib_combined_counter_main_t gbp_contract_permit_counters;
-extern vlib_combined_counter_main_t gbp_contract_drop_counters;
-
-typedef enum
-{
- GBP_CONTRACT_APPLY_L2,
- GBP_CONTRACT_APPLY_IP4,
- GBP_CONTRACT_APPLY_IP6,
-} gbp_contract_apply_type_t;
-
-static_always_inline gbp_rule_action_t
-gbp_contract_apply (vlib_main_t * vm, gbp_main_t * gm,
- gbp_contract_key_t * key, vlib_buffer_t * b,
- gbp_rule_t ** rule, u32 * intra, u32 * sclass1,
- u32 * acl_match, u32 * rule_match,
- gbp_contract_error_t * err,
- gbp_contract_apply_type_t type)
-{
- fa_5tuple_opaque_t fa_5tuple;
- const gbp_contract_t *contract;
- index_t contract_index;
- u32 acl_pos, trace_bitmap;
- u16 etype;
- u8 ip6, action;
-
- *rule = 0;
- trace_bitmap = 0;
-
- if (key->gck_src == key->gck_dst)
- {
- /* intra-epg allowed */
- (*intra)++;
- *err = GBP_CONTRACT_ERROR_ALLOW_INTRA;
- return GBP_RULE_PERMIT;
- }
-
- if (1 == key->gck_src || 1 == key->gck_dst)
- {
- /* sclass 1 allowed */
- (*sclass1)++;
- *err = GBP_CONTRACT_ERROR_ALLOW_SCLASS_1;
- return GBP_RULE_PERMIT;
- }
-
- /* look for contract */
- contract_index = gbp_contract_find (key);
- if (INDEX_INVALID == contract_index)
- {
- *err = GBP_CONTRACT_ERROR_DROP_NO_CONTRACT;
- return GBP_RULE_DENY;
- }
-
- contract = gbp_contract_get (contract_index);
-
- *err = GBP_CONTRACT_ERROR_DROP_CONTRACT;
-
- switch (type)
- {
- case GBP_CONTRACT_APPLY_IP4:
- ip6 = 0;
- break;
- case GBP_CONTRACT_APPLY_IP6:
- ip6 = 1;
- break;
- case GBP_CONTRACT_APPLY_L2:
- {
- /* check ethertype */
- etype =
- ((u16 *) (vlib_buffer_get_current (b) +
- vnet_buffer (b)->l2.l2_len))[-1];
-
- if (~0 == vec_search (contract->gc_allowed_ethertypes, etype))
- {
- *err = GBP_CONTRACT_ERROR_DROP_ETHER_TYPE;
- goto contract_deny;
- }
-
- switch (clib_net_to_host_u16 (etype))
- {
- case ETHERNET_TYPE_IP4:
- ip6 = 0;
- break;
- case ETHERNET_TYPE_IP6:
- ip6 = 1;
- break;
- default:
- goto contract_deny;
- }
- }
- break;
- }
-
- /* check ACL */
- action = 0;
- acl_plugin_fill_5tuple_inline (gm->acl_plugin.p_acl_main,
- contract->gc_lc_index, b, ip6,
- GBP_CONTRACT_APPLY_L2 != type /* input */ ,
- GBP_CONTRACT_APPLY_L2 == type /* l2_path */ ,
- &fa_5tuple);
- acl_plugin_match_5tuple_inline (gm->acl_plugin.p_acl_main,
- contract->gc_lc_index, &fa_5tuple, ip6,
- &action, &acl_pos, acl_match, rule_match,
- &trace_bitmap);
- if (action <= 0)
- goto contract_deny;
-
- if (PREDICT_FALSE (*rule_match >= vec_len (contract->gc_rules)))
- {
- *err = GBP_CONTRACT_ERROR_DROP_NO_RULE;
- goto contract_deny;
- }
-
- *rule = gbp_rule_get (contract->gc_rules[*rule_match]);
- switch ((*rule)->gu_action)
- {
- case GBP_RULE_PERMIT:
- case GBP_RULE_REDIRECT:
- *err = GBP_CONTRACT_ERROR_ALLOW_CONTRACT;
- vlib_increment_combined_counter (&gbp_contract_permit_counters,
- vm->thread_index, contract_index, 1,
- vlib_buffer_length_in_chain (vm, b));
- return (*rule)->gu_action;
- case GBP_RULE_DENY:
- break;
- }
-
-contract_deny:
- vlib_increment_combined_counter (&gbp_contract_drop_counters,
- vm->thread_index, contract_index, 1,
- vlib_buffer_length_in_chain (vm, b));
- return GBP_RULE_DENY;
-}
-
-#endif /* __GBP_CONTRACT_H__ */
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_endpoint.c b/src/plugins/gbp/gbp_endpoint.c
deleted file mode 100644
index b0cf64ced2d..00000000000
--- a/src/plugins/gbp/gbp_endpoint.c
+++ /dev/null
@@ -1,1597 +0,0 @@
-/*
- * gbp.h : Group Based Policy
- *
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp_endpoint.h>
-#include <plugins/gbp/gbp_endpoint_group.h>
-#include <plugins/gbp/gbp_itf.h>
-#include <plugins/gbp/gbp_scanner.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <plugins/gbp/gbp_route_domain.h>
-#include <plugins/gbp/gbp_policy_dpo.h>
-#include <plugins/gbp/gbp_vxlan.h>
-
-#include <vnet/l2/l2_input.h>
-#include <vnet/l2/l2_output.h>
-#include <vnet/l2/feat_bitmap.h>
-#include <vnet/l2/l2_fib.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/ip-neighbor/ip_neighbor.h>
-#include <vnet/ip-neighbor/ip4_neighbor.h>
-#include <vnet/ip-neighbor/ip6_neighbor.h>
-#include <vnet/fib/fib_walk.h>
-#include <vnet/vxlan-gbp/vxlan_gbp.h>
-
-static const char *gbp_endpoint_attr_names[] = GBP_ENDPOINT_ATTR_NAMES;
-
-/**
- * EP DBs
- */
-gbp_ep_db_t gbp_ep_db;
-
-static fib_source_t gbp_fib_source_hi;
-static fib_source_t gbp_fib_source_low;
-static fib_node_type_t gbp_endpoint_fib_type;
-static vlib_log_class_t gbp_ep_logger;
-
-#define GBP_ENDPOINT_DBG(...) \
- vlib_log_debug (gbp_ep_logger, __VA_ARGS__);
-
-#define GBP_ENDPOINT_INFO(...) \
- vlib_log_notice (gbp_ep_logger, __VA_ARGS__);
-
-/**
- * Pool of GBP endpoints
- */
-gbp_endpoint_t *gbp_endpoint_pool;
-
-/**
- * A count of the number of dynamic entries
- */
-static u32 gbp_n_learnt_endpoints;
-
-#define FOR_EACH_GBP_ENDPOINT_ATTR(_item) \
- for (_item = GBP_ENDPOINT_ATTR_FIRST; \
- _item < GBP_ENDPOINT_ATTR_LAST; \
- _item++)
-
-u8 *
-format_gbp_endpoint_flags (u8 * s, va_list * args)
-{
- gbp_endpoint_attr_t attr;
- gbp_endpoint_flags_t flags = va_arg (*args, gbp_endpoint_flags_t);
-
- FOR_EACH_GBP_ENDPOINT_ATTR (attr)
- {
- if ((1 << attr) & flags)
- {
- s = format (s, "%s,", gbp_endpoint_attr_names[attr]);
- }
- }
-
- return (s);
-}
-
-int
-gbp_endpoint_is_remote (const gbp_endpoint_t * ge)
-{
- return (! !(ge->ge_fwd.gef_flags & GBP_ENDPOINT_FLAG_REMOTE));
-}
-
-int
-gbp_endpoint_is_local (const gbp_endpoint_t * ge)
-{
- return (!(ge->ge_fwd.gef_flags & GBP_ENDPOINT_FLAG_REMOTE));
-}
-
-int
-gbp_endpoint_is_external (const gbp_endpoint_t * ge)
-{
- return (! !(ge->ge_fwd.gef_flags & GBP_ENDPOINT_FLAG_EXTERNAL));
-}
-
-int
-gbp_endpoint_is_learnt (const gbp_endpoint_t * ge)
-{
- if (0 == vec_len (ge->ge_locs))
- return 0;
-
- /* DP is the highest source so if present it will be first */
- return (ge->ge_locs[0].gel_src == GBP_ENDPOINT_SRC_DP);
-}
-
-static void
-gbp_endpoint_extract_key_mac_itf (const clib_bihash_kv_16_8_t * key,
- mac_address_t * mac, u32 * sw_if_index)
-{
- mac_address_from_u64 (mac, key->key[0]);
- *sw_if_index = key->key[1];
-}
-
-static void
-gbp_endpoint_extract_key_ip_itf (const clib_bihash_kv_24_8_t * key,
- ip46_address_t * ip, u32 * sw_if_index)
-{
- ip->as_u64[0] = key->key[0];
- ip->as_u64[1] = key->key[1];
- *sw_if_index = key->key[2];
-}
-
-gbp_endpoint_t *
-gbp_endpoint_find_ip (const ip46_address_t * ip, u32 fib_index)
-{
- clib_bihash_kv_24_8_t key, value;
- int rv;
-
- gbp_endpoint_mk_key_ip (ip, fib_index, &key);
-
- rv = clib_bihash_search_24_8 (&gbp_ep_db.ged_by_ip_rd, &key, &value);
-
- if (0 != rv)
- return NULL;
-
- return (gbp_endpoint_get (value.value));
-}
-
-static void
-gbp_endpoint_add_itf (u32 sw_if_index, index_t gei)
-{
- vec_validate_init_empty (gbp_ep_db.ged_by_sw_if_index, sw_if_index, ~0);
-
- gbp_ep_db.ged_by_sw_if_index[sw_if_index] = gei;
-}
-
-static bool
-gbp_endpoint_add_mac (const mac_address_t * mac, u32 bd_index, index_t gei)
-{
- clib_bihash_kv_16_8_t key;
- int rv;
-
- gbp_endpoint_mk_key_mac (mac->bytes, bd_index, &key);
- key.value = gei;
-
- rv = clib_bihash_add_del_16_8 (&gbp_ep_db.ged_by_mac_bd, &key, 1);
-
-
- return (0 == rv);
-}
-
-static bool
-gbp_endpoint_add_ip (const ip46_address_t * ip, u32 fib_index, index_t gei)
-{
- clib_bihash_kv_24_8_t key;
- int rv;
-
- gbp_endpoint_mk_key_ip (ip, fib_index, &key);
- key.value = gei;
-
- rv = clib_bihash_add_del_24_8 (&gbp_ep_db.ged_by_ip_rd, &key, 1);
-
- return (0 == rv);
-}
-
-static void
-gbp_endpoint_del_mac (const mac_address_t * mac, u32 bd_index)
-{
- clib_bihash_kv_16_8_t key;
-
- gbp_endpoint_mk_key_mac (mac->bytes, bd_index, &key);
-
- clib_bihash_add_del_16_8 (&gbp_ep_db.ged_by_mac_bd, &key, 0);
-}
-
-static void
-gbp_endpoint_del_ip (const ip46_address_t * ip, u32 fib_index)
-{
- clib_bihash_kv_24_8_t key;
-
- gbp_endpoint_mk_key_ip (ip, fib_index, &key);
-
- clib_bihash_add_del_24_8 (&gbp_ep_db.ged_by_ip_rd, &key, 0);
-}
-
-static index_t
-gbp_endpoint_index (const gbp_endpoint_t * ge)
-{
- return (ge - gbp_endpoint_pool);
-}
-
-static int
-gbp_endpoint_ip_is_equal (const fib_prefix_t * fp, const ip46_address_t * ip)
-{
- return (ip46_address_is_equal (ip, &fp->fp_addr));
-}
-
-static void
-gbp_endpoint_ips_update (gbp_endpoint_t * ge,
- const ip46_address_t * ips,
- const gbp_route_domain_t * grd)
-{
- const ip46_address_t *ip;
- index_t gei, grdi;
-
- gei = gbp_endpoint_index (ge);
- grdi = gbp_route_domain_index (grd);
-
- ASSERT ((ge->ge_key.gek_grd == INDEX_INVALID) ||
- (ge->ge_key.gek_grd == grdi));
-
- vec_foreach (ip, ips)
- {
- if (~0 == vec_search_with_function (ge->ge_key.gek_ips, ip,
- gbp_endpoint_ip_is_equal))
- {
- fib_prefix_t *pfx;
-
- vec_add2 (ge->ge_key.gek_ips, pfx, 1);
- fib_prefix_from_ip46_addr (ip, pfx);
-
- gbp_endpoint_add_ip (&pfx->fp_addr,
- grd->grd_fib_index[pfx->fp_proto], gei);
- }
- ge->ge_key.gek_grd = grdi;
- }
-}
-
-static gbp_endpoint_t *
-gbp_endpoint_alloc (const ip46_address_t * ips,
- const gbp_route_domain_t * grd,
- const mac_address_t * mac,
- const gbp_bridge_domain_t * gbd)
-{
- gbp_endpoint_t *ge;
- index_t gei;
-
- pool_get_zero (gbp_endpoint_pool, ge);
-
- fib_node_init (&ge->ge_node, gbp_endpoint_fib_type);
- gei = gbp_endpoint_index (ge);
- ge->ge_key.gek_gbd =
- ge->ge_key.gek_grd = ge->ge_fwd.gef_fib_index = INDEX_INVALID;
- gbp_itf_hdl_reset (&ge->ge_fwd.gef_itf);
- ge->ge_last_time = vlib_time_now (vlib_get_main ());
- ge->ge_key.gek_gbd = gbp_bridge_domain_index (gbd);
-
- if (NULL != mac)
- {
- mac_address_copy (&ge->ge_key.gek_mac, mac);
- gbp_endpoint_add_mac (mac, gbd->gb_bd_index, gei);
- }
- gbp_endpoint_ips_update (ge, ips, grd);
-
- return (ge);
-}
-
-static int
-gbp_endpoint_loc_is_equal (gbp_endpoint_loc_t * a, gbp_endpoint_loc_t * b)
-{
- return (a->gel_src == b->gel_src);
-}
-
-static int
-gbp_endpoint_loc_cmp_for_sort (gbp_endpoint_loc_t * a, gbp_endpoint_loc_t * b)
-{
- return (a->gel_src - b->gel_src);
-}
-
-static gbp_endpoint_loc_t *
-gbp_endpoint_loc_find (gbp_endpoint_t * ge, gbp_endpoint_src_t src)
-{
- gbp_endpoint_loc_t gel = {
- .gel_src = src,
- };
- u32 pos;
-
- pos = vec_search_with_function (ge->ge_locs, &gel,
- gbp_endpoint_loc_is_equal);
-
- if (~0 != pos)
- return (&ge->ge_locs[pos]);
-
- return NULL;
-}
-
-static int
-gbp_endpoint_loc_unlock (gbp_endpoint_t * ge, gbp_endpoint_loc_t * gel)
-{
- u32 pos;
-
- gel->gel_locks--;
-
- if (0 == gel->gel_locks)
- {
- pos = gel - ge->ge_locs;
-
- vec_del1 (ge->ge_locs, pos);
- if (vec_len (ge->ge_locs) > 1)
- vec_sort_with_function (ge->ge_locs, gbp_endpoint_loc_cmp_for_sort);
-
- /* This could be the last lock, so don't access the EP from
- * this point on */
- fib_node_unlock (&ge->ge_node);
-
- return (1);
- }
- return (0);
-}
-
-static void
-gbp_endpoint_loc_destroy (gbp_endpoint_loc_t * gel)
-{
- gbp_endpoint_group_unlock (gel->gel_epg);
- gbp_itf_unlock (&gel->gel_itf);
-}
-
-static gbp_endpoint_loc_t *
-gbp_endpoint_loc_find_or_add (gbp_endpoint_t * ge, gbp_endpoint_src_t src)
-{
- gbp_endpoint_loc_t gel = {
- .gel_src = src,
- .gel_epg = INDEX_INVALID,
- .gel_itf = GBP_ITF_HDL_INVALID,
- .gel_locks = 0,
- };
- u32 pos;
-
- pos = vec_search_with_function (ge->ge_locs, &gel,
- gbp_endpoint_loc_is_equal);
-
- if (~0 == pos)
- {
- vec_add1 (ge->ge_locs, gel);
-
- if (vec_len (ge->ge_locs) > 1)
- {
- vec_sort_with_function (ge->ge_locs, gbp_endpoint_loc_cmp_for_sort);
-
- pos = vec_search_with_function (ge->ge_locs, &gel,
- gbp_endpoint_loc_is_equal);
- }
- else
- pos = 0;
-
- /*
- * it's the sources and children that lock the endpoints
- */
- fib_node_lock (&ge->ge_node);
- }
-
- return (&ge->ge_locs[pos]);
-}
-
-/**
- * Find an EP inthe DBs and check that if we find it in the L2 DB
- * it has the same IPs as this update
- */
-static int
-gbp_endpoint_find_for_update (const ip46_address_t * ips,
- const gbp_route_domain_t * grd,
- const mac_address_t * mac,
- const gbp_bridge_domain_t * gbd,
- gbp_endpoint_t ** ge)
-{
- gbp_endpoint_t *l2_ge, *l3_ge, *tmp;
-
- l2_ge = l3_ge = NULL;
-
- if (NULL != mac && !mac_address_is_zero (mac))
- {
- ASSERT (gbd);
- l2_ge = gbp_endpoint_find_mac (mac->bytes, gbd->gb_bd_index);
- }
- if (NULL != ips && !ip46_address_is_zero (ips))
- {
- const ip46_address_t *ip;
- fib_protocol_t fproto;
-
- ASSERT (grd);
- vec_foreach (ip, ips)
- {
- fproto = fib_proto_from_ip46 (ip46_address_get_type (ip));
-
- tmp = gbp_endpoint_find_ip (ip, grd->grd_fib_index[fproto]);
-
- if (NULL == tmp)
- /* not found */
- continue;
- else if (NULL == l3_ge)
- /* first match against an IP address */
- l3_ge = tmp;
- else if (tmp == l3_ge)
- /* another match against IP address that is the same endpoint */
- continue;
- else
- {
- /*
- * a match agains a different endpoint.
- * this means the KEY of the EP is changing which is not allowed
- */
- return (-1);
- }
- }
- }
-
- if (NULL == l2_ge && NULL == l3_ge)
- /* not found */
- *ge = NULL;
- else if (NULL == l2_ge)
- /* found at L3 */
- *ge = l3_ge;
- else if (NULL == l3_ge)
- /* found at L2 */
- *ge = l2_ge;
- else
- {
- /* found both L3 and L2 - they must be the same else the KEY
- * is changing
- */
- if (l2_ge == l3_ge)
- *ge = l2_ge;
- else
- return (-1);
- }
-
- return (0);
-}
-
-static gbp_endpoint_src_t
-gbp_endpoint_get_best_src (const gbp_endpoint_t * ge)
-{
- if (0 == vec_len (ge->ge_locs))
- return (GBP_ENDPOINT_SRC_MAX);
-
- return (ge->ge_locs[0].gel_src);
-}
-
-static void
-gbp_endpoint_n_learned (int n)
-{
- gbp_n_learnt_endpoints += n;
-
- if (n > 0 && 1 == gbp_n_learnt_endpoints)
- {
- vlib_process_signal_event (vlib_get_main (),
- gbp_scanner_node.index,
- GBP_ENDPOINT_SCAN_START, 0);
- }
- if (n < 0 && 0 == gbp_n_learnt_endpoints)
- {
- vlib_process_signal_event (vlib_get_main (),
- gbp_scanner_node.index,
- GBP_ENDPOINT_SCAN_STOP, 0);
- }
-}
-
-static void
-gbp_endpoint_loc_update (const gbp_endpoint_t * ge,
- gbp_endpoint_loc_t * gel,
- const gbp_bridge_domain_t * gb,
- u32 sw_if_index,
- index_t ggi,
- gbp_endpoint_flags_t flags,
- const ip46_address_t * tun_src,
- const ip46_address_t * tun_dst)
-{
- int was_learnt, is_learnt;
-
- gel->gel_locks++;
- was_learnt = ! !(gel->gel_flags & GBP_ENDPOINT_FLAG_REMOTE);
- gel->gel_flags = flags;
- is_learnt = ! !(gel->gel_flags & GBP_ENDPOINT_FLAG_REMOTE);
-
- gbp_endpoint_n_learned (is_learnt - was_learnt);
-
- /*
- * update the EPG
- */
- gbp_endpoint_group_lock (ggi);
- gbp_endpoint_group_unlock (gel->gel_epg);
- gel->gel_epg = ggi;
-
- if (gel->gel_flags & GBP_ENDPOINT_FLAG_REMOTE)
- {
- if (NULL != tun_src)
- ip46_address_copy (&gel->tun.gel_src, tun_src);
- if (NULL != tun_dst)
- ip46_address_copy (&gel->tun.gel_dst, tun_dst);
-
- if (ip46_address_is_multicast (&gel->tun.gel_src))
- {
- /*
- * we learnt the EP from the multicast tunnel.
- * Create a unicast TEP from the packet's source
- * and the fixed address of the BD's parent tunnel
- */
- const gbp_vxlan_tunnel_t *gt;
-
- gt = gbp_vxlan_tunnel_get (gb->gb_vni);
-
- if (NULL != gt)
- {
- ip46_address_copy (&gel->tun.gel_src, &gt->gt_src);
- sw_if_index = gt->gt_sw_if_index;
- }
- }
-
- /*
- * the input interface may be the parent GBP-vxlan interface,
- * create a child vlxan-gbp tunnel and use that as the endpoint's
- * interface.
- */
- gbp_itf_hdl_t old = gel->gel_itf;
-
- switch (gbp_vxlan_tunnel_get_type (sw_if_index))
- {
- case GBP_VXLAN_TEMPLATE_TUNNEL:
- gel->tun.gel_parent_sw_if_index = sw_if_index;
- gel->gel_itf = gbp_vxlan_tunnel_clone_and_lock (sw_if_index,
- &gel->tun.gel_src,
- &gel->tun.gel_dst);
- break;
- case VXLAN_GBP_TUNNEL:
- gel->tun.gel_parent_sw_if_index =
- vxlan_gbp_tunnel_get_parent (sw_if_index);
- gel->gel_itf = vxlan_gbp_tunnel_lock_itf (sw_if_index);
- break;
- }
-
- gbp_itf_unlock (&old);
- }
- else
- {
- gel->gel_itf = gbp_itf_l2_add_and_lock (sw_if_index,
- ge->ge_key.gek_gbd);
- }
-}
-
-static void
-gbb_endpoint_fwd_reset (gbp_endpoint_t * ge)
-{
- const gbp_route_domain_t *grd;
- const gbp_bridge_domain_t *gbd;
- gbp_endpoint_fwd_t *gef;
- const fib_prefix_t *pfx;
- index_t *ai;
-
- gbd = gbp_bridge_domain_get (ge->ge_key.gek_gbd);
- gef = &ge->ge_fwd;
-
- vec_foreach (pfx, ge->ge_key.gek_ips)
- {
- u32 fib_index;
-
- grd = gbp_route_domain_get (ge->ge_key.gek_grd);
- fib_index = grd->grd_fib_index[pfx->fp_proto];
-
- bd_add_del_ip_mac (gbd->gb_bd_index, fib_proto_to_ip46 (pfx->fp_proto),
- &pfx->fp_addr, &ge->ge_key.gek_mac, 0);
-
- /*
- * remove a host route
- */
- if (gbp_endpoint_is_remote (ge))
- {
- fib_table_entry_special_remove (fib_index, pfx, gbp_fib_source_hi);
- }
-
- fib_table_entry_delete (fib_index, pfx, gbp_fib_source_low);
- }
- vec_foreach (ai, gef->gef_adjs)
- {
- adj_unlock (*ai);
- }
-
- if (gbp_itf_hdl_is_valid (gef->gef_itf))
- {
- l2fib_del_entry (ge->ge_key.gek_mac.bytes,
- gbd->gb_bd_index,
- gbp_itf_get_sw_if_index (gef->gef_itf));
- }
-
- gbp_itf_unlock (&gef->gef_itf);
- vec_free (gef->gef_adjs);
-}
-
-static void
-gbb_endpoint_fwd_recalc (gbp_endpoint_t * ge)
-{
- const gbp_bridge_domain_t *gbd;
- const gbp_endpoint_group_t *gg;
- const gbp_route_domain_t *grd;
- gbp_endpoint_loc_t *gel;
- gbp_endpoint_fwd_t *gef;
- const fib_prefix_t *pfx;
- index_t gei;
-
- /*
- * locations are sort in source priority order
- */
- gei = gbp_endpoint_index (ge);
- gel = &ge->ge_locs[0];
- gef = &ge->ge_fwd;
- gbd = gbp_bridge_domain_get (ge->ge_key.gek_gbd);
-
- gef->gef_flags = gel->gel_flags;
-
- if (INDEX_INVALID != gel->gel_epg)
- {
- gg = gbp_endpoint_group_get (gel->gel_epg);
- gef->gef_sclass = gg->gg_sclass;
- }
- else
- {
- gg = NULL;
- }
-
- gef->gef_itf = gbp_itf_clone_and_lock (gel->gel_itf);
-
- if (!mac_address_is_zero (&ge->ge_key.gek_mac))
- {
- gbp_itf_l2_set_input_feature (gef->gef_itf, L2INPUT_FEAT_GBP_FWD);
-
- if (gbp_endpoint_is_remote (ge) || gbp_endpoint_is_external (ge))
- {
- /*
- * bridged packets to external endpoints should be classifed
- * based on the EP's/BD's EPG
- */
- gbp_itf_l2_set_output_feature (gef->gef_itf,
- L2OUTPUT_FEAT_GBP_POLICY_MAC);
- }
- else
- {
- gbp_endpoint_add_itf (gbp_itf_get_sw_if_index (gef->gef_itf), gei);
- gbp_itf_l2_set_output_feature (gef->gef_itf,
- L2OUTPUT_FEAT_GBP_POLICY_PORT);
- }
- l2fib_add_entry (ge->ge_key.gek_mac.bytes,
- gbd->gb_bd_index,
- gbp_itf_get_sw_if_index (gef->gef_itf),
- L2FIB_ENTRY_RESULT_FLAG_STATIC);
- }
-
- vec_foreach (pfx, ge->ge_key.gek_ips)
- {
- ethernet_header_t *eth;
- u32 ip_sw_if_index;
- u32 fib_index;
- u8 *rewrite;
- index_t ai;
-
- rewrite = NULL;
- grd = gbp_route_domain_get (ge->ge_key.gek_grd);
- fib_index = grd->grd_fib_index[pfx->fp_proto];
- gef->gef_fib_index = fib_index;
-
- bd_add_del_ip_mac (gbd->gb_bd_index, fib_proto_to_ip46 (pfx->fp_proto),
- &pfx->fp_addr, &ge->ge_key.gek_mac, 1);
-
- /*
- * add a host route via the EPG's BVI we need this because the
- * adj fib does not install, due to cover refinement check, since
- * the BVI's prefix is /32
- */
- vec_validate (rewrite, sizeof (*eth) - 1);
- eth = (ethernet_header_t *) rewrite;
-
- eth->type = clib_host_to_net_u16 ((pfx->fp_proto == FIB_PROTOCOL_IP4 ?
- ETHERNET_TYPE_IP4 :
- ETHERNET_TYPE_IP6));
-
- if (gbp_endpoint_is_remote (ge))
- {
- /*
- * for dynamic EPs we must add the IP adjacency via the learned
- * tunnel since the BD will not contain the EP's MAC since it was
- * L3 learned. The dst MAC address used is the 'BD's MAC'.
- */
- ip_sw_if_index = gbp_itf_get_sw_if_index (gef->gef_itf);
-
- mac_address_to_bytes (gbp_route_domain_get_local_mac (),
- eth->src_address);
- mac_address_to_bytes (gbp_route_domain_get_remote_mac (),
- eth->dst_address);
- }
- else
- {
- /*
- * for the static EPs we add the IP adjacency via the BVI
- * knowing that the BD has the MAC address to route to and
- * that policy will be applied on egress to the EP's port
- */
- ip_sw_if_index = gbd->gb_bvi_sw_if_index;
-
- clib_memcpy (eth->src_address,
- vnet_sw_interface_get_hw_address (vnet_get_main (),
- ip_sw_if_index),
- sizeof (eth->src_address));
- mac_address_to_bytes (&ge->ge_key.gek_mac, eth->dst_address);
- }
-
- fib_table_entry_path_add (fib_index, pfx,
- gbp_fib_source_low,
- FIB_ENTRY_FLAG_NONE,
- fib_proto_to_dpo (pfx->fp_proto),
- &pfx->fp_addr, ip_sw_if_index,
- ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
-
- ai = adj_nbr_add_or_lock_w_rewrite (pfx->fp_proto,
- fib_proto_to_link (pfx->fp_proto),
- &pfx->fp_addr,
- ip_sw_if_index, rewrite);
- vec_add1 (gef->gef_adjs, ai);
-
- /*
- * if the endpoint is external then routed packet to it must be
- * classifed to the BD's EPG. but this will happen anyway with
- * the GBP_MAC classification.
- */
-
- if (NULL != gg)
- {
- if (gbp_endpoint_is_remote (ge))
- {
- dpo_id_t policy_dpo = DPO_INVALID;
-
- /*
- * interpose a policy DPO from the endpoint so that policy
- * is applied
- */
- gbp_policy_dpo_add_or_lock (fib_proto_to_dpo (pfx->fp_proto),
- grd->grd_scope,
- gg->gg_sclass, ~0, &policy_dpo);
-
- fib_table_entry_special_dpo_add (fib_index, pfx,
- gbp_fib_source_hi,
- FIB_ENTRY_FLAG_INTERPOSE,
- &policy_dpo);
- dpo_reset (&policy_dpo);
- }
-
- /*
- * send a gratuitous ARP on the EPG's uplink. this is done so
- * that if this EP has moved from some other place in the
- * 'fabric', upstream devices are informed
- */
- if (gbp_endpoint_is_local (ge) && ~0 != gg->gg_uplink_sw_if_index)
- {
- gbp_endpoint_add_itf (gbp_itf_get_sw_if_index (gef->gef_itf),
- gei);
- if (FIB_PROTOCOL_IP4 == pfx->fp_proto)
- ip4_neighbor_advertise (vlib_get_main (),
- vnet_get_main (),
- gg->gg_uplink_sw_if_index,
- &pfx->fp_addr.ip4);
- else
- ip6_neighbor_advertise (vlib_get_main (),
- vnet_get_main (),
- gg->gg_uplink_sw_if_index,
- &pfx->fp_addr.ip6);
- }
- }
- }
-
- if (gbp_endpoint_is_external (ge))
- {
- gbp_itf_l2_set_input_feature (gef->gef_itf,
- L2INPUT_FEAT_GBP_LPM_CLASSIFY);
- }
- else if (gbp_endpoint_is_local (ge))
- {
- /*
- * non-remote endpoints (i.e. those not arriving on iVXLAN
- * tunnels) need to be classifed based on the the input interface.
- * We enable the GBP-FWD feature only if the group has an uplink
- * interface (on which the GBP-FWD feature would send UU traffic).
- * External endpoints get classified based on an LPM match
- */
- l2input_feat_masks_t feats = L2INPUT_FEAT_GBP_SRC_CLASSIFY;
-
- if (NULL != gg && ~0 != gg->gg_uplink_sw_if_index)
- feats |= L2INPUT_FEAT_GBP_FWD;
- gbp_itf_l2_set_input_feature (gef->gef_itf, feats);
- }
-
- /*
- * update children with the new forwarding info
- */
- fib_node_back_walk_ctx_t bw_ctx = {
- .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE,
- .fnbw_flags = FIB_NODE_BW_FLAG_FORCE_SYNC,
- };
-
- fib_walk_sync (gbp_endpoint_fib_type, gei, &bw_ctx);
-}
-
-int
-gbp_endpoint_update_and_lock (gbp_endpoint_src_t src,
- u32 sw_if_index,
- const ip46_address_t * ips,
- const mac_address_t * mac,
- index_t gbdi, index_t grdi,
- sclass_t sclass,
- gbp_endpoint_flags_t flags,
- const ip46_address_t * tun_src,
- const ip46_address_t * tun_dst, u32 * handle)
-{
- gbp_bridge_domain_t *gbd;
- gbp_endpoint_group_t *gg;
- gbp_endpoint_src_t best;
- gbp_route_domain_t *grd;
- gbp_endpoint_loc_t *gel;
- gbp_endpoint_t *ge;
- index_t ggi, gei;
- int rv;
-
- if (~0 == sw_if_index)
- return (VNET_API_ERROR_INVALID_SW_IF_INDEX);
-
- ge = NULL;
- gg = NULL;
-
- /*
- * we need to determine the bridge-domain, either from the EPG or
- * the BD passed
- */
- if (SCLASS_INVALID != sclass)
- {
- ggi = gbp_endpoint_group_find (sclass);
-
- if (INDEX_INVALID == ggi)
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
-
- gg = gbp_endpoint_group_get (ggi);
- gbdi = gg->gg_gbd;
- grdi = gg->gg_rd;
- }
- else
- {
- if (INDEX_INVALID == gbdi)
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
- if (INDEX_INVALID == grdi)
- return (VNET_API_ERROR_NO_SUCH_FIB);
- ggi = INDEX_INVALID;
- }
-
- gbd = gbp_bridge_domain_get (gbdi);
- grd = gbp_route_domain_get (grdi);
- rv = gbp_endpoint_find_for_update (ips, grd, mac, gbd, &ge);
-
- if (0 != rv)
- return (rv);
-
- if (NULL == ge)
- {
- ge = gbp_endpoint_alloc (ips, grd, mac, gbd);
- }
- else
- {
- gbp_endpoint_ips_update (ge, ips, grd);
- }
-
- best = gbp_endpoint_get_best_src (ge);
- gei = gbp_endpoint_index (ge);
- gel = gbp_endpoint_loc_find_or_add (ge, src);
-
- gbp_endpoint_loc_update (ge, gel, gbd, sw_if_index, ggi, flags,
- tun_src, tun_dst);
-
- if (src <= best)
- {
- /*
- * either the best source has been updated or we have a new best source
- */
- gbb_endpoint_fwd_reset (ge);
- gbb_endpoint_fwd_recalc (ge);
- }
- else
- {
- /*
- * an update to a lower priority source, so we need do nothing
- */
- }
-
- if (handle)
- *handle = gei;
-
- GBP_ENDPOINT_INFO ("update: %U", format_gbp_endpoint, gei);
-
- return (0);
-}
-
-void
-gbp_endpoint_unlock (gbp_endpoint_src_t src, index_t gei)
-{
- gbp_endpoint_loc_t *gel, gel_copy;
- gbp_endpoint_src_t best;
- gbp_endpoint_t *ge;
- int removed;
-
- if (pool_is_free_index (gbp_endpoint_pool, gei))
- return;
-
- GBP_ENDPOINT_INFO ("delete: %U", format_gbp_endpoint, gei);
-
- ge = gbp_endpoint_get (gei);
-
- gel = gbp_endpoint_loc_find (ge, src);
-
- if (NULL == gel)
- return;
-
- /*
- * lock the EP so we can control when it is deleted
- */
- fib_node_lock (&ge->ge_node);
- best = gbp_endpoint_get_best_src (ge);
-
- /*
- * copy the location info since we'll lose it when it's removed from
- * the vector
- */
- clib_memcpy (&gel_copy, gel, sizeof (gel_copy));
-
- /*
- * remove the source we no longer need
- */
- removed = gbp_endpoint_loc_unlock (ge, gel);
-
- if (src == best)
- {
- /*
- * we have removed the old best source => recalculate fwding
- */
- if (0 == vec_len (ge->ge_locs))
- {
- /*
- * if there are no more sources left, then we need only release
- * the fwding resources held and then this EP is gawn.
- */
- gbb_endpoint_fwd_reset (ge);
- }
- else
- {
- /*
- * else there are more sources. release the old and get new
- * fwding objects
- */
- gbb_endpoint_fwd_reset (ge);
- gbb_endpoint_fwd_recalc (ge);
- }
- }
- /*
- * else
- * we removed a lower priority source so we need to do nothing
- */
-
- /*
- * clear up any resources held by the source
- */
- if (removed)
- gbp_endpoint_loc_destroy (&gel_copy);
-
- /*
- * remove the lock taken above
- */
- fib_node_unlock (&ge->ge_node);
- /*
- * We may have removed the last source and so this EP is now TOAST
- * DO NOTHING BELOW HERE
- */
-}
-
-u32
-gbp_endpoint_child_add (index_t gei,
- fib_node_type_t type, fib_node_index_t index)
-{
- return (fib_node_child_add (gbp_endpoint_fib_type, gei, type, index));
-}
-
-void
-gbp_endpoint_child_remove (index_t gei, u32 sibling)
-{
- return (fib_node_child_remove (gbp_endpoint_fib_type, gei, sibling));
-}
-
-typedef struct gbp_endpoint_flush_ctx_t_
-{
- u32 sw_if_index;
- gbp_endpoint_src_t src;
- index_t *geis;
-} gbp_endpoint_flush_ctx_t;
-
-static walk_rc_t
-gbp_endpoint_flush_cb (index_t gei, void *args)
-{
- gbp_endpoint_flush_ctx_t *ctx = args;
- gbp_endpoint_loc_t *gel;
- gbp_endpoint_t *ge;
-
- ge = gbp_endpoint_get (gei);
- gel = gbp_endpoint_loc_find (ge, ctx->src);
-
- if ((NULL != gel) && ctx->sw_if_index == gel->tun.gel_parent_sw_if_index)
- {
- vec_add1 (ctx->geis, gei);
- }
-
- return (WALK_CONTINUE);
-}
-
-/**
- * remove all learnt endpoints using the interface
- */
-void
-gbp_endpoint_flush (gbp_endpoint_src_t src, u32 sw_if_index)
-{
- gbp_endpoint_flush_ctx_t ctx = {
- .sw_if_index = sw_if_index,
- .src = src,
- };
- index_t *gei;
-
- GBP_ENDPOINT_INFO ("flush: %U %U",
- format_gbp_endpoint_src, src,
- format_vnet_sw_if_index_name, vnet_get_main (),
- sw_if_index);
- gbp_endpoint_walk (gbp_endpoint_flush_cb, &ctx);
-
- vec_foreach (gei, ctx.geis)
- {
- gbp_endpoint_unlock (src, *gei);
- }
-
- vec_free (ctx.geis);
-}
-
-void
-gbp_endpoint_walk (gbp_endpoint_cb_t cb, void *ctx)
-{
- u32 index;
-
- /* *INDENT-OFF* */
- pool_foreach_index (index, gbp_endpoint_pool)
- {
- if (!cb(index, ctx))
- break;
- }
- /* *INDENT-ON* */
-}
-
-static clib_error_t *
-gbp_endpoint_cli (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- ip46_address_t ip = ip46_address_initializer, *ips = NULL;
- mac_address_t mac = ZERO_MAC_ADDRESS;
- vnet_main_t *vnm = vnet_get_main ();
- u32 sclass = SCLASS_INVALID;
- u32 handle = INDEX_INVALID;
- u32 sw_if_index = ~0;
- u32 flags = GBP_ENDPOINT_FLAG_NONE;
- u8 add = 1;
- int rv;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- ip46_address_reset (&ip);
-
- if (unformat (input, "%U", unformat_vnet_sw_interface,
- vnm, &sw_if_index))
- ;
- else if (unformat (input, "add"))
- add = 1;
- else if (unformat (input, "del"))
- add = 0;
- else if (unformat (input, "sclass %d", &sclass))
- ;
- else if (unformat (input, "handle %d", &handle))
- ;
- else if (unformat (input, "ip %U", unformat_ip4_address, &ip.ip4))
- vec_add1 (ips, ip);
- else if (unformat (input, "ip %U", unformat_ip6_address, &ip.ip6))
- vec_add1 (ips, ip);
- else if (unformat (input, "mac %U", unformat_mac_address, &mac))
- ;
- else if (unformat (input, "flags 0x%x", &flags))
- ;
- else
- break;
- }
-
- if (add)
- {
- if (~0 == sw_if_index)
- return clib_error_return (0, "interface must be specified");
- if (SCLASS_INVALID == sclass)
- return clib_error_return (0, "SCLASS must be specified");
-
- rv =
- gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_CP,
- sw_if_index, ips, &mac,
- INDEX_INVALID, INDEX_INVALID,
- sclass, flags, NULL, NULL, &handle);
-
- if (rv)
- return clib_error_return (0, "GBP Endpoint update returned %d", rv);
- else
- vlib_cli_output (vm, "handle %d\n", handle);
- }
- else
- {
- if (INDEX_INVALID == handle)
- return clib_error_return (0, "handle must be specified");
-
- gbp_endpoint_unlock (GBP_ENDPOINT_SRC_CP, handle);
- }
-
- vec_free (ips);
-
- return (NULL);
-}
-
-/*?
- * Configure a GBP Endpoint
- *
- * @cliexpar
- * @cliexstart{gbp endpoint del <handle> | [add] <interface> sclass <SCLASS> ip <IP> mac <MAC> [flags <flags>]}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_endpoint_cli_node, static) = {
- .path = "gbp endpoint",
- .short_help = "gbp endpoint del <handle> | [add] <interface> sclass <SCLASS> ip <IP> mac <MAC> [flags <flags>]",
- .function = gbp_endpoint_cli,
-};
-/* *INDENT-ON* */
-
-u8 *
-format_gbp_endpoint_src (u8 * s, va_list * args)
-{
- gbp_endpoint_src_t action = va_arg (*args, gbp_endpoint_src_t);
-
- switch (action)
- {
-#define _(v,a) case GBP_ENDPOINT_SRC_##v: return (format (s, "%s", a));
- foreach_gbp_endpoint_src
-#undef _
- }
-
- return (format (s, "unknown"));
-}
-
-static u8 *
-format_gbp_endpoint_fwd (u8 * s, va_list * args)
-{
- gbp_endpoint_fwd_t *gef = va_arg (*args, gbp_endpoint_fwd_t *);
-
- s = format (s, "fwd:");
- s = format (s, "\n itf:[%U]", format_gbp_itf_hdl, gef->gef_itf);
- if (GBP_ENDPOINT_FLAG_NONE != gef->gef_flags)
- {
- s = format (s, " flags:%U", format_gbp_endpoint_flags, gef->gef_flags);
- }
-
- return (s);
-}
-
-static u8 *
-format_gbp_endpoint_key (u8 * s, va_list * args)
-{
- gbp_endpoint_key_t *gek = va_arg (*args, gbp_endpoint_key_t *);
- const fib_prefix_t *pfx;
-
- s = format (s, "ips:[");
-
- vec_foreach (pfx, gek->gek_ips)
- {
- s = format (s, "%U, ", format_fib_prefix, pfx);
- }
- s = format (s, "]");
-
- s = format (s, " mac:%U", format_mac_address_t, &gek->gek_mac);
-
- return (s);
-}
-
-static u8 *
-format_gbp_endpoint_loc (u8 * s, va_list * args)
-{
- gbp_endpoint_loc_t *gel = va_arg (*args, gbp_endpoint_loc_t *);
-
- s = format (s, "%U", format_gbp_endpoint_src, gel->gel_src);
- s = format (s, "\n EPG:%d [%U]", gel->gel_epg,
- format_gbp_itf_hdl, gel->gel_itf);
-
- if (GBP_ENDPOINT_FLAG_NONE != gel->gel_flags)
- {
- s = format (s, " flags:%U", format_gbp_endpoint_flags, gel->gel_flags);
- }
- if (GBP_ENDPOINT_FLAG_REMOTE & gel->gel_flags)
- {
- s = format (s, " tun:[");
- s = format (s, "parent:%U", format_vnet_sw_if_index_name,
- vnet_get_main (), gel->tun.gel_parent_sw_if_index);
- s = format (s, " {%U,%U}]",
- format_ip46_address, &gel->tun.gel_src, IP46_TYPE_ANY,
- format_ip46_address, &gel->tun.gel_dst, IP46_TYPE_ANY);
- }
-
- return (s);
-}
-
-u8 *
-format_gbp_endpoint (u8 * s, va_list * args)
-{
- index_t gei = va_arg (*args, index_t);
- gbp_endpoint_loc_t *gel;
- gbp_endpoint_t *ge;
-
- ge = gbp_endpoint_get (gei);
-
- s = format (s, "[@%d] %U", gei, format_gbp_endpoint_key, &ge->ge_key);
- s = format (s, " last-time:[%f]", ge->ge_last_time);
-
- vec_foreach (gel, ge->ge_locs)
- {
- s = format (s, "\n %U", format_gbp_endpoint_loc, gel);
- }
- s = format (s, "\n %U", format_gbp_endpoint_fwd, &ge->ge_fwd);
-
- return s;
-}
-
-static walk_rc_t
-gbp_endpoint_show_one (index_t gei, void *ctx)
-{
- vlib_main_t *vm;
-
- vm = ctx;
- vlib_cli_output (vm, " %U", format_gbp_endpoint, gei);
-
- return (WALK_CONTINUE);
-}
-
-static int
-gbp_endpoint_walk_ip_itf (clib_bihash_kv_24_8_t * kvp, void *arg)
-{
- ip46_address_t ip;
- vlib_main_t *vm;
- u32 sw_if_index;
-
- vm = arg;
-
- gbp_endpoint_extract_key_ip_itf (kvp, &ip, &sw_if_index);
-
- vlib_cli_output (vm, " {%U, %U} -> %d",
- format_ip46_address, &ip, IP46_TYPE_ANY,
- format_vnet_sw_if_index_name, vnet_get_main (),
- sw_if_index, kvp->value);
- return (BIHASH_WALK_CONTINUE);
-}
-
-static int
-gbp_endpoint_walk_mac_itf (clib_bihash_kv_16_8_t * kvp, void *arg)
-{
- mac_address_t mac;
- vlib_main_t *vm;
- u32 sw_if_index;
-
- vm = arg;
-
- gbp_endpoint_extract_key_mac_itf (kvp, &mac, &sw_if_index);
-
- vlib_cli_output (vm, " {%U, %U} -> %d",
- format_mac_address_t, &mac,
- format_vnet_sw_if_index_name, vnet_get_main (),
- sw_if_index, kvp->value);
- return (BIHASH_WALK_CONTINUE);
-}
-
-static clib_error_t *
-gbp_endpoint_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- u32 show_dbs, handle;
-
- handle = INDEX_INVALID;
- show_dbs = 0;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "%d", &handle))
- ;
- else if (unformat (input, "db"))
- show_dbs = 1;
- else
- break;
- }
-
- if (INDEX_INVALID != handle)
- {
- vlib_cli_output (vm, "%U", format_gbp_endpoint, handle);
- }
- else if (show_dbs)
- {
- vlib_cli_output (vm, "\nDatabases:");
- clib_bihash_foreach_key_value_pair_24_8 (&gbp_ep_db.ged_by_ip_rd,
- gbp_endpoint_walk_ip_itf, vm);
- clib_bihash_foreach_key_value_pair_16_8
- (&gbp_ep_db.ged_by_mac_bd, gbp_endpoint_walk_mac_itf, vm);
- }
- else
- {
- vlib_cli_output (vm, "Endpoints:");
- gbp_endpoint_walk (gbp_endpoint_show_one, vm);
- }
-
- return (NULL);
-}
-
-/*?
- * Show Group Based Policy Endpoints and derived information
- *
- * @cliexpar
- * @cliexstart{show gbp endpoint}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_endpoint_show_node, static) = {
- .path = "show gbp endpoint",
- .short_help = "show gbp endpoint\n",
- .function = gbp_endpoint_show,
-};
-/* *INDENT-ON* */
-
-static void
-gbp_endpoint_check (index_t gei, f64 start_time)
-{
- gbp_endpoint_group_t *gg;
- gbp_endpoint_loc_t *gel;
- gbp_endpoint_t *ge;
-
- ge = gbp_endpoint_get (gei);
- gel = gbp_endpoint_loc_find (ge, GBP_ENDPOINT_SRC_DP);
-
- if (NULL != gel)
- {
- gg = gbp_endpoint_group_get (gel->gel_epg);
-
- if ((start_time - ge->ge_last_time) >
- gg->gg_retention.remote_ep_timeout)
- {
- gbp_endpoint_unlock (GBP_ENDPOINT_SRC_DP, gei);
- }
- }
-}
-
-static void
-gbp_endpoint_scan_l2 (vlib_main_t * vm)
-{
- clib_bihash_16_8_t *gte_table = &gbp_ep_db.ged_by_mac_bd;
- f64 last_start, start_time, delta_t;
- int i, j, k;
-
- if (!gte_table->instantiated)
- return;
-
- delta_t = 0;
- last_start = start_time = vlib_time_now (vm);
-
- for (i = 0; i < gte_table->nbuckets; i++)
- {
- clib_bihash_bucket_16_8_t *b;
- clib_bihash_value_16_8_t *v;
-
- /* allow no more than 20us without a pause */
- delta_t = vlib_time_now (vm) - last_start;
- if (delta_t > 20e-6)
- {
- /* suspend for 100 us */
- vlib_process_suspend (vm, 100e-6);
- last_start = vlib_time_now (vm);
- }
-
- b = clib_bihash_get_bucket_16_8 (gte_table, i);
- if (clib_bihash_bucket_is_empty_16_8 (b))
- continue;
- v = clib_bihash_get_value_16_8 (gte_table, b->offset);
-
- for (j = 0; j < (1 << b->log2_pages); j++)
- {
- for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
- {
- if (clib_bihash_is_free_16_8 (&v->kvp[k]))
- continue;
-
- gbp_endpoint_check (v->kvp[k].value, start_time);
-
- /*
- * Note: we may have just freed the bucket's backing
- * storage, so check right here...
- */
- if (clib_bihash_bucket_is_empty_16_8 (b))
- goto doublebreak;
- }
- v++;
- }
- doublebreak:
- ;
- }
-}
-
-static void
-gbp_endpoint_scan_l3 (vlib_main_t * vm)
-{
- clib_bihash_24_8_t *gte_table = &gbp_ep_db.ged_by_ip_rd;
- f64 last_start, start_time, delta_t;
- int i, j, k;
-
- if (!gte_table->instantiated)
- return;
-
- delta_t = 0;
- last_start = start_time = vlib_time_now (vm);
-
- for (i = 0; i < gte_table->nbuckets; i++)
- {
- clib_bihash_bucket_24_8_t *b;
- clib_bihash_value_24_8_t *v;
-
- /* allow no more than 20us without a pause */
- delta_t = vlib_time_now (vm) - last_start;
- if (delta_t > 20e-6)
- {
- /* suspend for 100 us */
- vlib_process_suspend (vm, 100e-6);
- last_start = vlib_time_now (vm);
- }
-
- b = clib_bihash_get_bucket_24_8 (gte_table, i);
- if (clib_bihash_bucket_is_empty_24_8 (b))
- continue;
- v = clib_bihash_get_value_24_8 (gte_table, b->offset);
-
- for (j = 0; j < (1 << b->log2_pages); j++)
- {
- for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
- {
- if (clib_bihash_is_free_24_8 (&v->kvp[k]))
- continue;
-
- gbp_endpoint_check (v->kvp[k].value, start_time);
-
- /*
- * Note: we may have just freed the bucket's backing
- * storage, so check right here...
- */
- if (clib_bihash_bucket_is_empty_24_8 (b))
- goto doublebreak;
- }
- v++;
- }
- doublebreak:
- ;
- }
-}
-
-void
-gbp_endpoint_scan (vlib_main_t * vm)
-{
- gbp_endpoint_scan_l2 (vm);
- gbp_endpoint_scan_l3 (vm);
-}
-
-static fib_node_t *
-gbp_endpoint_get_node (fib_node_index_t index)
-{
- gbp_endpoint_t *ge;
-
- ge = gbp_endpoint_get (index);
-
- return (&ge->ge_node);
-}
-
-static gbp_endpoint_t *
-gbp_endpoint_from_fib_node (fib_node_t * node)
-{
- ASSERT (gbp_endpoint_fib_type == node->fn_type);
- return ((gbp_endpoint_t *) node);
-}
-
-static void
-gbp_endpoint_last_lock_gone (fib_node_t * node)
-{
- const gbp_bridge_domain_t *gbd;
- const gbp_route_domain_t *grd;
- const fib_prefix_t *pfx;
- gbp_endpoint_t *ge;
-
- ge = gbp_endpoint_from_fib_node (node);
-
- ASSERT (0 == vec_len (ge->ge_locs));
-
- gbd = gbp_bridge_domain_get (ge->ge_key.gek_gbd);
-
- /*
- * we have removed the last source. this EP is toast
- */
- if (INDEX_INVALID != ge->ge_key.gek_gbd)
- {
- gbp_endpoint_del_mac (&ge->ge_key.gek_mac, gbd->gb_bd_index);
- }
- vec_foreach (pfx, ge->ge_key.gek_ips)
- {
- grd = gbp_route_domain_get (ge->ge_key.gek_grd);
- gbp_endpoint_del_ip (&pfx->fp_addr, grd->grd_fib_index[pfx->fp_proto]);
- }
- pool_put (gbp_endpoint_pool, ge);
-}
-
-static fib_node_back_walk_rc_t
-gbp_endpoint_back_walk_notify (fib_node_t * node,
- fib_node_back_walk_ctx_t * ctx)
-{
- ASSERT (0);
-
- return (FIB_NODE_BACK_WALK_CONTINUE);
-}
-
-/*
- * The FIB path's graph node virtual function table
- */
-static const fib_node_vft_t gbp_endpoint_vft = {
- .fnv_get = gbp_endpoint_get_node,
- .fnv_last_lock = gbp_endpoint_last_lock_gone,
- .fnv_back_walk = gbp_endpoint_back_walk_notify,
- // .fnv_mem_show = fib_path_memory_show,
-};
-
-static clib_error_t *
-gbp_endpoint_init (vlib_main_t * vm)
-{
-#define GBP_EP_HASH_NUM_BUCKETS (2 * 1024)
-#define GBP_EP_HASH_MEMORY_SIZE (1 << 20)
-
- clib_bihash_init_24_8 (&gbp_ep_db.ged_by_ip_rd,
- "GBP Endpoints - IP/RD",
- GBP_EP_HASH_NUM_BUCKETS, GBP_EP_HASH_MEMORY_SIZE);
-
- clib_bihash_init_16_8 (&gbp_ep_db.ged_by_mac_bd,
- "GBP Endpoints - MAC/BD",
- GBP_EP_HASH_NUM_BUCKETS, GBP_EP_HASH_MEMORY_SIZE);
-
- gbp_ep_logger = vlib_log_register_class ("gbp", "ep");
- gbp_endpoint_fib_type = fib_node_register_new_type (&gbp_endpoint_vft);
- gbp_fib_source_hi = fib_source_allocate ("gbp-endpoint-hi",
- FIB_SOURCE_PRIORITY_HI,
- FIB_SOURCE_BH_SIMPLE);
- gbp_fib_source_low = fib_source_allocate ("gbp-endpoint-low",
- FIB_SOURCE_PRIORITY_LOW,
- FIB_SOURCE_BH_SIMPLE);
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_endpoint_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_endpoint.h b/src/plugins/gbp/gbp_endpoint.h
deleted file mode 100644
index 3155e7be4e0..00000000000
--- a/src/plugins/gbp/gbp_endpoint.h
+++ /dev/null
@@ -1,376 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_ENDPOINT_H__
-#define __GBP_ENDPOINT_H__
-
-#include <plugins/gbp/gbp_types.h>
-#include <plugins/gbp/gbp_itf.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/mac_address.h>
-
-#include <vppinfra/bihash_16_8.h>
-#include <vppinfra/bihash_template.h>
-#include <vppinfra/bihash_24_8.h>
-#include <vppinfra/bihash_template.h>
-
-/**
- * Flags for each endpoint
- */
-typedef enum gbp_endpoint_attr_t_
-{
- GBP_ENDPOINT_ATTR_FIRST = 0,
- GBP_ENDPOINT_ATTR_BOUNCE = GBP_ENDPOINT_ATTR_FIRST,
- GBP_ENDPOINT_ATTR_REMOTE,
- GBP_ENDPOINT_ATTR_LEARNT,
- GBP_ENDPOINT_ATTR_EXTERNAL,
- GBP_ENDPOINT_ATTR_LAST,
-} gbp_endpoint_attr_t;
-
-typedef enum gbp_endpoint_flags_t_
-{
- GBP_ENDPOINT_FLAG_NONE = 0,
- GBP_ENDPOINT_FLAG_BOUNCE = (1 << GBP_ENDPOINT_ATTR_BOUNCE),
- GBP_ENDPOINT_FLAG_REMOTE = (1 << GBP_ENDPOINT_ATTR_REMOTE),
- GBP_ENDPOINT_FLAG_LEARNT = (1 << GBP_ENDPOINT_ATTR_LEARNT),
- GBP_ENDPOINT_FLAG_EXTERNAL = (1 << GBP_ENDPOINT_ATTR_EXTERNAL),
-} gbp_endpoint_flags_t;
-
-#define GBP_ENDPOINT_ATTR_NAMES { \
- [GBP_ENDPOINT_ATTR_BOUNCE] = "bounce", \
- [GBP_ENDPOINT_ATTR_REMOTE] = "remote", \
- [GBP_ENDPOINT_ATTR_LEARNT] = "learnt", \
- [GBP_ENDPOINT_ATTR_EXTERNAL] = "external", \
-}
-
-extern u8 *format_gbp_endpoint_flags (u8 * s, va_list * args);
-
-/**
- * Sources of Endpoints in priority order. The best (lowest value) source
- * provides the forwarding information.
- * Data-plane takes preference because the CP data is not always complete,
- * it may not have the sclass.
- */
-#define foreach_gbp_endpoint_src \
- _(DP, "data-plane") \
- _(CP, "control-plane") \
- _(RR, "recursive-resolution")
-
-typedef enum gbp_endpoint_src_t_
-{
-#define _(v,s) GBP_ENDPOINT_SRC_##v,
- foreach_gbp_endpoint_src
-#undef _
-} gbp_endpoint_src_t;
-
-#define GBP_ENDPOINT_SRC_MAX (GBP_ENDPOINT_SRC_RR+1)
-
-extern u8 *format_gbp_endpoint_src (u8 * s, va_list * args);
-
-/**
- * This is the identity of an endpoint, as such it is information
- * about an endpoint that is idempotent.
- * The ID is used to add the EP into the various data-bases for retrieval.
- */
-typedef struct gbp_endpoint_key_t_
-{
- /**
- * A vector of ip addresses that belong to the endpoint.
- * Together with the route EPG's RD this forms the EP's L3 key
- */
- fib_prefix_t *gek_ips;
-
- /**
- * MAC address of the endpoint.
- * Together with the route EPG's BD this forms the EP's L2 key
- */
- mac_address_t gek_mac;
-
- /**
- * Index of the Bridge-Domain
- */
- index_t gek_gbd;
-
- /**
- * Index of the Route-Domain
- */
- index_t gek_grd;
-} gbp_endpoint_key_t;
-
-/**
- * Information about the location of the endpoint provided by a source
- * of endpoints
- */
-typedef struct gbp_endpoint_loc_t_
-{
- /**
- * The source providing this location information
- */
- gbp_endpoint_src_t gel_src;
-
- /**
- * The interface on which the EP is connected
- */
- gbp_itf_hdl_t gel_itf;
-
- /**
- * Endpoint flags
- */
- gbp_endpoint_flags_t gel_flags;
-
- /**
- * Endpoint Group.
- */
- index_t gel_epg;
-
- /**
- * number of times this source has locked this
- */
- u32 gel_locks;
-
- /**
- * Tunnel info for remote endpoints
- */
- struct
- {
- u32 gel_parent_sw_if_index;
- ip46_address_t gel_src;
- ip46_address_t gel_dst;
- } tun;
-} gbp_endpoint_loc_t;
-
-/**
- * And endpoints current forwarding state
- */
-typedef struct gbp_endpoint_fwd_t_
-{
- /**
- * The interface on which the EP is connected
- */
- gbp_itf_hdl_t gef_itf;
-
- /**
- * The L3 adj, if created
- */
- index_t *gef_adjs;
-
- /**
- * Endpoint Group's sclass. cached for fast DP access.
- */
- sclass_t gef_sclass;
-
- /**
- * FIB index the EP is in
- */
- u32 gef_fib_index;
-
- gbp_endpoint_flags_t gef_flags;
-} gbp_endpoint_fwd_t;
-
-/**
- * A Group Based Policy Endpoint.
- * This is typically a VM or container. If the endpoint is local (i.e. on
- * the same compute node as VPP) then there is one interface per-endpoint.
- * If the EP is remote,e.g. reachable over a [vxlan] tunnel, then there
- * will be multiple EPs reachable over the tunnel and they can be distinguished
- * via either their MAC or IP Address[es].
- */
-typedef struct gbp_endpoint_t_
-{
- /**
- * A FIB node that allows the tracking of children.
- */
- fib_node_t ge_node;
-
- /**
- * The key/ID of this EP
- */
- gbp_endpoint_key_t ge_key;
-
- /**
- * Location information provided by the various sources.
- * These are sorted based on source priority.
- */
- gbp_endpoint_loc_t *ge_locs;
-
- gbp_endpoint_fwd_t ge_fwd;
-
- /**
- * The last time a packet from seen from this end point
- */
- f64 ge_last_time;
-} gbp_endpoint_t;
-
-extern u8 *format_gbp_endpoint (u8 * s, va_list * args);
-
-/**
- * GBP Endpoint Databases
- */
-typedef struct gbp_ep_by_ip_itf_db_t_
-{
- index_t *ged_by_sw_if_index;
- clib_bihash_24_8_t ged_by_ip_rd;
- clib_bihash_16_8_t ged_by_mac_bd;
-} gbp_ep_db_t;
-
-extern int gbp_endpoint_update_and_lock (gbp_endpoint_src_t src,
- u32 sw_if_index,
- const ip46_address_t * ip,
- const mac_address_t * mac,
- index_t gbd, index_t grd,
- sclass_t sclass,
- gbp_endpoint_flags_t flags,
- const ip46_address_t * tun_src,
- const ip46_address_t * tun_dst,
- u32 * handle);
-extern void gbp_endpoint_unlock (gbp_endpoint_src_t src, index_t gbpei);
-extern u32 gbp_endpoint_child_add (index_t gei,
- fib_node_type_t type,
- fib_node_index_t index);
-extern void gbp_endpoint_child_remove (index_t gei, u32 sibling);
-
-typedef walk_rc_t (*gbp_endpoint_cb_t) (index_t gbpei, void *ctx);
-extern void gbp_endpoint_walk (gbp_endpoint_cb_t cb, void *ctx);
-extern void gbp_endpoint_scan (vlib_main_t * vm);
-extern int gbp_endpoint_is_remote (const gbp_endpoint_t * ge);
-extern int gbp_endpoint_is_local (const gbp_endpoint_t * ge);
-extern int gbp_endpoint_is_external (const gbp_endpoint_t * ge);
-extern int gbp_endpoint_is_learnt (const gbp_endpoint_t * ge);
-
-
-extern void gbp_endpoint_flush (gbp_endpoint_src_t src, u32 sw_if_index);
-
-/**
- * DP functions and databases
- */
-extern gbp_ep_db_t gbp_ep_db;
-extern gbp_endpoint_t *gbp_endpoint_pool;
-
-/**
- * Get the endpoint from a port/interface
- */
-always_inline gbp_endpoint_t *
-gbp_endpoint_get (index_t gbpei)
-{
- return (pool_elt_at_index (gbp_endpoint_pool, gbpei));
-}
-
-static_always_inline void
-gbp_endpoint_mk_key_mac (const u8 * mac,
- u32 bd_index, clib_bihash_kv_16_8_t * key)
-{
- key->key[0] = ethernet_mac_address_u64 (mac);
- key->key[1] = bd_index;
-}
-
-static_always_inline gbp_endpoint_t *
-gbp_endpoint_find_mac (const u8 * mac, u32 bd_index)
-{
- clib_bihash_kv_16_8_t key, value;
- int rv;
-
- gbp_endpoint_mk_key_mac (mac, bd_index, &key);
-
- rv = clib_bihash_search_16_8 (&gbp_ep_db.ged_by_mac_bd, &key, &value);
-
- if (0 != rv)
- return NULL;
-
- return (gbp_endpoint_get (value.value));
-}
-
-static_always_inline void
-gbp_endpoint_mk_key_ip (const ip46_address_t * ip,
- u32 fib_index, clib_bihash_kv_24_8_t * key)
-{
- key->key[0] = ip->as_u64[0];
- key->key[1] = ip->as_u64[1];
- key->key[2] = fib_index;
-}
-
-static_always_inline void
-gbp_endpoint_mk_key_ip4 (const ip4_address_t * ip,
- u32 fib_index, clib_bihash_kv_24_8_t * key)
-{
- const ip46_address_t a = {
- .ip4 = *ip,
- };
- gbp_endpoint_mk_key_ip (&a, fib_index, key);
-}
-
-static_always_inline gbp_endpoint_t *
-gbp_endpoint_find_ip4 (const ip4_address_t * ip, u32 fib_index)
-{
- clib_bihash_kv_24_8_t key, value;
- int rv;
-
- gbp_endpoint_mk_key_ip4 (ip, fib_index, &key);
-
- rv = clib_bihash_search_24_8 (&gbp_ep_db.ged_by_ip_rd, &key, &value);
-
- if (0 != rv)
- return NULL;
-
- return (gbp_endpoint_get (value.value));
-}
-
-static_always_inline void
-gbp_endpoint_mk_key_ip6 (const ip6_address_t * ip,
- u32 fib_index, clib_bihash_kv_24_8_t * key)
-{
- key->key[0] = ip->as_u64[0];
- key->key[1] = ip->as_u64[1];
- key->key[2] = fib_index;
-}
-
-static_always_inline gbp_endpoint_t *
-gbp_endpoint_find_ip6 (const ip6_address_t * ip, u32 fib_index)
-{
- clib_bihash_kv_24_8_t key, value;
- int rv;
-
- gbp_endpoint_mk_key_ip6 (ip, fib_index, &key);
-
- rv = clib_bihash_search_24_8 (&gbp_ep_db.ged_by_ip_rd, &key, &value);
-
- if (0 != rv)
- return NULL;
-
- return (gbp_endpoint_get (value.value));
-}
-
-static_always_inline gbp_endpoint_t *
-gbp_endpoint_find_itf (u32 sw_if_index)
-{
- index_t gei;
-
- gei = gbp_ep_db.ged_by_sw_if_index[sw_if_index];
-
- if (INDEX_INVALID != gei)
- return (gbp_endpoint_get (gei));
-
- return (NULL);
-}
-
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_endpoint_group.c b/src/plugins/gbp/gbp_endpoint_group.c
deleted file mode 100644
index b9044378e3b..00000000000
--- a/src/plugins/gbp/gbp_endpoint_group.c
+++ /dev/null
@@ -1,402 +0,0 @@
-/*
- * gbp.h : Group Based Policy
- *
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp_endpoint_group.h>
-#include <plugins/gbp/gbp_endpoint.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <plugins/gbp/gbp_route_domain.h>
-#include <plugins/gbp/gbp_itf.h>
-
-#include <vnet/dpo/dvr_dpo.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/l2/l2_input.h>
-
-/**
- * Pool of GBP endpoint_groups
- */
-gbp_endpoint_group_t *gbp_endpoint_group_pool;
-
-/**
- * DB of endpoint_groups
- */
-gbp_endpoint_group_db_t gbp_endpoint_group_db;
-
-/**
- * Map sclass to EPG
- */
-uword *gbp_epg_sclass_db;
-
-vlib_log_class_t gg_logger;
-
-#define GBP_EPG_DBG(...) \
- vlib_log_debug (gg_logger, __VA_ARGS__);
-
-gbp_endpoint_group_t *
-gbp_endpoint_group_get (index_t i)
-{
- return (pool_elt_at_index (gbp_endpoint_group_pool, i));
-}
-
-void
-gbp_endpoint_group_lock (index_t ggi)
-{
- gbp_endpoint_group_t *gg;
-
- if (INDEX_INVALID == ggi)
- return;
-
- gg = gbp_endpoint_group_get (ggi);
- gg->gg_locks++;
-}
-
-index_t
-gbp_endpoint_group_find (sclass_t sclass)
-{
- uword *p;
-
- p = hash_get (gbp_endpoint_group_db.gg_hash_sclass, sclass);
-
- if (NULL != p)
- return p[0];
-
- return (INDEX_INVALID);
-}
-
-int
-gbp_endpoint_group_add_and_lock (vnid_t vnid,
- u16 sclass,
- u32 bd_id,
- u32 rd_id,
- u32 uplink_sw_if_index,
- const gbp_endpoint_retention_t * retention)
-{
- gbp_endpoint_group_t *gg;
- index_t ggi;
-
- ggi = gbp_endpoint_group_find (sclass);
-
- if (INDEX_INVALID == ggi)
- {
- fib_protocol_t fproto;
- index_t gbi, grdi;
-
- gbi = gbp_bridge_domain_find_and_lock (bd_id);
-
- if (~0 == gbi)
- return (VNET_API_ERROR_BD_NOT_MODIFIABLE);
-
- grdi = gbp_route_domain_find_and_lock (rd_id);
-
- if (~0 == grdi)
- {
- gbp_bridge_domain_unlock (gbi);
- return (VNET_API_ERROR_NO_SUCH_FIB);
- }
-
- pool_get_zero (gbp_endpoint_group_pool, gg);
-
- gg->gg_vnid = vnid;
- gg->gg_rd = grdi;
- gg->gg_gbd = gbi;
-
- gg->gg_uplink_sw_if_index = uplink_sw_if_index;
- gbp_itf_hdl_reset (&gg->gg_uplink_itf);
- gg->gg_locks = 1;
- gg->gg_sclass = sclass;
- gg->gg_retention = *retention;
-
- if (SCLASS_INVALID != gg->gg_sclass)
- hash_set (gbp_epg_sclass_db, gg->gg_sclass, gg->gg_vnid);
-
- /*
- * an egress DVR dpo for internal subnets to use when sending
- * on the uplink interface
- */
- if (~0 != gg->gg_uplink_sw_if_index)
- {
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- {
- dvr_dpo_add_or_lock (uplink_sw_if_index,
- fib_proto_to_dpo (fproto),
- &gg->gg_dpo[fproto]);
- }
-
- /*
- * Add the uplink to the BD
- * packets direct from the uplink have had policy applied
- */
- gg->gg_uplink_itf =
- gbp_itf_l2_add_and_lock (gg->gg_uplink_sw_if_index, gbi);
-
- gbp_itf_l2_set_input_feature (gg->gg_uplink_itf,
- L2INPUT_FEAT_GBP_NULL_CLASSIFY);
- }
-
- hash_set (gbp_endpoint_group_db.gg_hash_sclass,
- gg->gg_sclass, gg - gbp_endpoint_group_pool);
- }
- else
- {
- gg = gbp_endpoint_group_get (ggi);
- gg->gg_locks++;
- }
-
- GBP_EPG_DBG ("add: %U", format_gbp_endpoint_group, gg);
-
- return (0);
-}
-
-void
-gbp_endpoint_group_unlock (index_t ggi)
-{
- gbp_endpoint_group_t *gg;
-
- if (INDEX_INVALID == ggi)
- return;
-
- gg = gbp_endpoint_group_get (ggi);
-
- gg->gg_locks--;
-
- if (0 == gg->gg_locks)
- {
- fib_protocol_t fproto;
-
- gg = pool_elt_at_index (gbp_endpoint_group_pool, ggi);
-
- gbp_itf_unlock (&gg->gg_uplink_itf);
-
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- {
- dpo_reset (&gg->gg_dpo[fproto]);
- }
- gbp_bridge_domain_unlock (gg->gg_gbd);
- gbp_route_domain_unlock (gg->gg_rd);
-
- if (SCLASS_INVALID != gg->gg_sclass)
- hash_unset (gbp_epg_sclass_db, gg->gg_sclass);
- hash_unset (gbp_endpoint_group_db.gg_hash_sclass, gg->gg_sclass);
-
- pool_put (gbp_endpoint_group_pool, gg);
- }
-}
-
-int
-gbp_endpoint_group_delete (sclass_t sclass)
-{
- index_t ggi;
-
- ggi = gbp_endpoint_group_find (sclass);
-
- if (INDEX_INVALID != ggi)
- {
- GBP_EPG_DBG ("del: %U", format_gbp_endpoint_group,
- gbp_endpoint_group_get (ggi));
- gbp_endpoint_group_unlock (ggi);
-
- return (0);
- }
-
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
-}
-
-u32
-gbp_endpoint_group_get_bd_id (const gbp_endpoint_group_t * gg)
-{
- const gbp_bridge_domain_t *gb;
-
- gb = gbp_bridge_domain_get (gg->gg_gbd);
-
- return (gb->gb_bd_id);
-}
-
-index_t
-gbp_endpoint_group_get_fib_index (const gbp_endpoint_group_t * gg,
- fib_protocol_t fproto)
-{
- const gbp_route_domain_t *grd;
-
- grd = gbp_route_domain_get (gg->gg_rd);
-
- return (grd->grd_fib_index[fproto]);
-}
-
-void
-gbp_endpoint_group_walk (gbp_endpoint_group_cb_t cb, void *ctx)
-{
- gbp_endpoint_group_t *gbpe;
-
- /* *INDENT-OFF* */
- pool_foreach (gbpe, gbp_endpoint_group_pool)
- {
- if (!cb(gbpe, ctx))
- break;
- }
- /* *INDENT-ON* */
-}
-
-static clib_error_t *
-gbp_endpoint_group_cli (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- gbp_endpoint_retention_t retention = { 0 };
- vnid_t vnid = VNID_INVALID, sclass;
- vnet_main_t *vnm = vnet_get_main ();
- u32 uplink_sw_if_index = ~0;
- u32 bd_id = ~0;
- u32 rd_id = ~0;
- u8 add = 1;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "%U", unformat_vnet_sw_interface,
- vnm, &uplink_sw_if_index))
- ;
- else if (unformat (input, "add"))
- add = 1;
- else if (unformat (input, "del"))
- add = 0;
- else if (unformat (input, "epg %d", &vnid))
- ;
- else if (unformat (input, "sclass %d", &sclass))
- ;
- else if (unformat (input, "bd %d", &bd_id))
- ;
- else if (unformat (input, "rd %d", &rd_id))
- ;
- else
- break;
- }
-
- if (VNID_INVALID == vnid)
- return clib_error_return (0, "EPG-ID must be specified");
-
- if (add)
- {
- if (~0 == bd_id)
- return clib_error_return (0, "Bridge-domain must be specified");
- if (~0 == rd_id)
- return clib_error_return (0, "route-domain must be specified");
-
- gbp_endpoint_group_add_and_lock (vnid, sclass, bd_id, rd_id,
- uplink_sw_if_index, &retention);
- }
- else
- gbp_endpoint_group_delete (vnid);
-
- return (NULL);
-}
-
-/*?
- * Configure a GBP Endpoint Group
- *
- * @cliexpar
- * @cliexstart{gbp endpoint-group [del] epg <ID> bd <ID> rd <ID> [sclass <ID>] [<interface>]}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_endpoint_group_cli_node, static) = {
- .path = "gbp endpoint-group",
- .short_help = "gbp endpoint-group [del] epg <ID> bd <ID> rd <ID> [sclass <ID>] [<interface>]",
- .function = gbp_endpoint_group_cli,
-};
-
-static u8 *
-format_gbp_endpoint_retention (u8 * s, va_list * args)
-{
- gbp_endpoint_retention_t *rt = va_arg (*args, gbp_endpoint_retention_t*);
-
- s = format (s, "[remote-EP-timeout:%d]", rt->remote_ep_timeout);
-
- return (s);
-}
-
-u8 *
-format_gbp_endpoint_group (u8 * s, va_list * args)
-{
- gbp_endpoint_group_t *gg = va_arg (*args, gbp_endpoint_group_t*);
-
- if (NULL != gg)
- s = format (s, "[%d] %d, sclass:%d bd:%d rd:%d uplink:%U retention:%U locks:%d",
- gg - gbp_endpoint_group_pool,
- gg->gg_vnid,
- gg->gg_sclass,
- gg->gg_gbd,
- gg->gg_rd,
- format_gbp_itf_hdl, gg->gg_uplink_itf,
- format_gbp_endpoint_retention, &gg->gg_retention,
- gg->gg_locks);
- else
- s = format (s, "NULL");
-
- return (s);
-}
-
-static int
-gbp_endpoint_group_show_one (gbp_endpoint_group_t *gg, void *ctx)
-{
- vlib_main_t *vm;
-
- vm = ctx;
- vlib_cli_output (vm, " %U",format_gbp_endpoint_group, gg);
-
- return (1);
-}
-
-static clib_error_t *
-gbp_endpoint_group_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vlib_cli_output (vm, "Endpoint-Groups:");
- gbp_endpoint_group_walk (gbp_endpoint_group_show_one, vm);
-
- return (NULL);
-}
-
-
-/*?
- * Show Group Based Policy Endpoint_Groups and derived information
- *
- * @cliexpar
- * @cliexstart{show gbp endpoint_group}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_endpoint_group_show_node, static) = {
- .path = "show gbp endpoint-group",
- .short_help = "show gbp endpoint-group\n",
- .function = gbp_endpoint_group_show,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-gbp_endpoint_group_init (vlib_main_t * vm)
-{
- gg_logger = vlib_log_register_class ("gbp", "epg");
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_endpoint_group_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_endpoint_group.h b/src/plugins/gbp/gbp_endpoint_group.h
deleted file mode 100644
index c5fdff8463d..00000000000
--- a/src/plugins/gbp/gbp_endpoint_group.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_ENDPOINT_GROUP_H__
-#define __GBP_ENDPOINT_GROUP_H__
-
-#include <plugins/gbp/gbp_types.h>
-#include <plugins/gbp/gbp_itf.h>
-
-#include <vnet/fib/fib_types.h>
-
-/**
- * Endpoint Retnetion Policy
- */
-typedef struct gbp_endpoint_retention_t_
-{
- /** Aging timeout for remote endpoints */
- u32 remote_ep_timeout;
-} gbp_endpoint_retention_t;
-
-/**
- * An Endpoint Group representation
- */
-typedef struct gpb_endpoint_group_t_
-{
- /**
- * ID
- */
- vnid_t gg_vnid;
-
- /**
- * Sclass. Could be unset => ~0
- */
- u16 gg_sclass;
-
- /**
- * Bridge-domain ID the EPG is in
- */
- index_t gg_gbd;
-
- /**
- * route-domain/IP-table ID the EPG is in
- */
- index_t gg_rd;
-
- /**
- * Is the EPG an external/NAT
- */
- u8 gg_is_ext;
-
- /**
- * the uplink interface dedicated to the EPG
- */
- u32 gg_uplink_sw_if_index;
- gbp_itf_hdl_t gg_uplink_itf;
-
- /**
- * The DPO used in the L3 path for forwarding internal subnets
- */
- dpo_id_t gg_dpo[FIB_PROTOCOL_IP_MAX];
-
- /**
- * Locks/references to this EPG
- */
- u32 gg_locks;
-
- /**
- * EP retention policy
- */
- gbp_endpoint_retention_t gg_retention;
-} gbp_endpoint_group_t;
-
-/**
- * EPG DB, key'd on EGP-ID
- */
-typedef struct gbp_endpoint_group_db_t_
-{
- uword *gg_hash_sclass;
-} gbp_endpoint_group_db_t;
-
-extern int gbp_endpoint_group_add_and_lock (vnid_t vnid,
- u16 sclass,
- u32 bd_id,
- u32 rd_id,
- u32 uplink_sw_if_index,
- const gbp_endpoint_retention_t *
- retention);
-extern index_t gbp_endpoint_group_find (sclass_t sclass);
-extern int gbp_endpoint_group_delete (sclass_t sclass);
-extern void gbp_endpoint_group_unlock (index_t index);
-extern void gbp_endpoint_group_lock (index_t index);
-extern u32 gbp_endpoint_group_get_bd_id (const gbp_endpoint_group_t *);
-
-extern gbp_endpoint_group_t *gbp_endpoint_group_get (index_t i);
-extern index_t gbp_endpoint_group_get_fib_index (const gbp_endpoint_group_t *
- gg, fib_protocol_t fproto);
-
-typedef int (*gbp_endpoint_group_cb_t) (gbp_endpoint_group_t * gbpe,
- void *ctx);
-extern void gbp_endpoint_group_walk (gbp_endpoint_group_cb_t bgpe, void *ctx);
-
-
-extern u8 *format_gbp_endpoint_group (u8 * s, va_list * args);
-
-/**
- * DP functions and databases
- */
-extern gbp_endpoint_group_db_t gbp_endpoint_group_db;
-extern gbp_endpoint_group_t *gbp_endpoint_group_pool;
-extern uword *gbp_epg_sclass_db;
-
-always_inline u32
-gbp_epg_itf_lookup_sclass (sclass_t sclass)
-{
- uword *p;
-
- p = hash_get (gbp_endpoint_group_db.gg_hash_sclass, sclass);
-
- if (NULL != p)
- {
- gbp_endpoint_group_t *gg;
-
- gg = pool_elt_at_index (gbp_endpoint_group_pool, p[0]);
- return (gg->gg_uplink_sw_if_index);
- }
- return (~0);
-}
-
-always_inline const dpo_id_t *
-gbp_epg_dpo_lookup (sclass_t sclass, fib_protocol_t fproto)
-{
- uword *p;
-
- p = hash_get (gbp_endpoint_group_db.gg_hash_sclass, sclass);
-
- if (NULL != p)
- {
- gbp_endpoint_group_t *gg;
-
- gg = pool_elt_at_index (gbp_endpoint_group_pool, p[0]);
- return (&gg->gg_dpo[fproto]);
- }
- return (NULL);
-}
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_ext_itf.c b/src/plugins/gbp/gbp_ext_itf.c
deleted file mode 100644
index c5506661c2d..00000000000
--- a/src/plugins/gbp/gbp_ext_itf.c
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp_ext_itf.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <plugins/gbp/gbp_route_domain.h>
-#include <plugins/gbp/gbp_itf.h>
-
-/**
- * Pool of GBP ext_itfs
- */
-gbp_ext_itf_t *gbp_ext_itf_pool;
-
-/**
- * external interface configs keyed by sw_if_index
- */
-index_t *gbp_ext_itf_db;
-
-#define GBP_EXT_ITF_ID 0x00000080
-
-/**
- * logger
- */
-vlib_log_class_t gx_logger;
-
-#define GBP_EXT_ITF_DBG(...) \
- vlib_log_debug (gx_logger, __VA_ARGS__);
-
-u8 *
-format_gbp_ext_itf (u8 * s, va_list * args)
-{
- gbp_ext_itf_t *gx = va_arg (*args, gbp_ext_itf_t *);
-
- return (format (s, "%U%s in %U",
- format_gbp_itf_hdl, gx->gx_itf,
- (gx->gx_flags & GBP_EXT_ITF_F_ANON) ? " [anon]" : "",
- format_gbp_bridge_domain, gx->gx_bd));
-}
-
-int
-gbp_ext_itf_add (u32 sw_if_index, u32 bd_id, u32 rd_id, u32 flags)
-{
- gbp_ext_itf_t *gx;
- index_t gxi;
-
- vec_validate_init_empty (gbp_ext_itf_db, sw_if_index, INDEX_INVALID);
-
- gxi = gbp_ext_itf_db[sw_if_index];
-
- if (INDEX_INVALID == gxi)
- {
- gbp_route_domain_t *gr;
- fib_protocol_t fproto;
- index_t gbi, gri;
-
- gbi = gbp_bridge_domain_find_and_lock (bd_id);
-
- if (INDEX_INVALID == gbi)
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
-
- gri = gbp_route_domain_find_and_lock (rd_id);
-
- if (INDEX_INVALID == gri)
- {
- gbp_bridge_domain_unlock (gbi);
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
- }
-
- pool_get_zero (gbp_ext_itf_pool, gx);
- gxi = gx - gbp_ext_itf_pool;
-
- gr = gbp_route_domain_get (gri);
-
- gx->gx_bd = gbi;
- gx->gx_rd = gri;
- gbp_itf_hdl_reset (&gx->gx_itf);
-
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- {
- gx->gx_fib_index[fproto] =
- gr->grd_fib_index[fib_proto_to_dpo (fproto)];
- }
-
- if (flags & GBP_EXT_ITF_F_ANON)
- {
- /* add interface to the BD */
- gx->gx_itf = gbp_itf_l2_add_and_lock (sw_if_index, gbi);
-
- /* setup GBP L2 features on this interface */
- gbp_itf_l2_set_input_feature (gx->gx_itf,
- L2INPUT_FEAT_GBP_LPM_ANON_CLASSIFY |
- L2INPUT_FEAT_LEARN);
- gbp_itf_l2_set_output_feature (gx->gx_itf,
- L2OUTPUT_FEAT_GBP_POLICY_LPM);
- }
-
- gx->gx_flags = flags;
-
- gbp_ext_itf_db[sw_if_index] = gxi;
-
- GBP_EXT_ITF_DBG ("add: %U", format_gbp_ext_itf, gx);
-
- return (0);
- }
-
- return (VNET_API_ERROR_ENTRY_ALREADY_EXISTS);
-}
-
-int
-gbp_ext_itf_delete (u32 sw_if_index)
-{
- gbp_ext_itf_t *gx;
- index_t gxi;
-
- if (vec_len (gbp_ext_itf_db) <= sw_if_index)
- return (VNET_API_ERROR_INVALID_SW_IF_INDEX);
-
- gxi = gbp_ext_itf_db[sw_if_index];
-
- if (INDEX_INVALID != gxi)
- {
- gx = pool_elt_at_index (gbp_ext_itf_pool, gxi);
-
- GBP_EXT_ITF_DBG ("del: %U", format_gbp_ext_itf, gx);
-
- gbp_itf_unlock (&gx->gx_itf);
- gbp_route_domain_unlock (gx->gx_rd);
- gbp_bridge_domain_unlock (gx->gx_bd);
-
- gbp_ext_itf_db[sw_if_index] = INDEX_INVALID;
- pool_put (gbp_ext_itf_pool, gx);
-
- return (0);
- }
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
-}
-
-static clib_error_t *
-gbp_ext_itf_add_del_cli (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- u32 sw_if_index = ~0, bd_id = ~0, rd_id = ~0, flags = 0;
- int add = 1;
- int rv;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "del"))
- add = 0;
- else
- if (unformat
- (line_input, "%U", unformat_vnet_sw_interface, vnet_get_main (),
- &sw_if_index))
- ;
- else if (unformat (line_input, "bd %d", &bd_id))
- ;
- else if (unformat (line_input, "rd %d", &rd_id))
- ;
- else if (unformat (line_input, "anon-l3-out"))
- flags |= GBP_EXT_ITF_F_ANON;
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- }
- unformat_free (line_input);
-
- if (~0 == sw_if_index)
- return clib_error_return (0, "interface must be specified");
-
- if (add)
- {
- if (~0 == bd_id)
- return clib_error_return (0, "BD-ID must be specified");
- if (~0 == rd_id)
- return clib_error_return (0, "RD-ID must be specified");
- rv = gbp_ext_itf_add (sw_if_index, bd_id, rd_id, flags);
- }
- else
- rv = gbp_ext_itf_delete (sw_if_index);
-
- switch (rv)
- {
- case 0:
- return 0;
- case VNET_API_ERROR_ENTRY_ALREADY_EXISTS:
- return clib_error_return (0, "interface already exists");
- case VNET_API_ERROR_NO_SUCH_ENTRY: /* fallthrough */
- case VNET_API_ERROR_INVALID_SW_IF_INDEX:
- return clib_error_return (0, "unknown interface");
- default:
- return clib_error_return (0, "error %d", rv);
- }
-
- /* never reached */
- return 0;
-}
-
-/*?
- * Add Group Based Interface as anonymous L3out interface
- *
- * @cliexpar
- * @cliexstart{gbp interface [del] anon-l3out <interface> bd <ID>}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_itf_anon_l3out_add_del_node, static) = {
- .path = "gbp ext-itf",
- .short_help = "gbp ext-itf [del] <interface> bd <ID> rd <ID> [anon-l3-out]\n",
- .function = gbp_ext_itf_add_del_cli,
-};
-/* *INDENT-ON* */
-
-void
-gbp_ext_itf_walk (gbp_ext_itf_cb_t cb, void *ctx)
-{
- gbp_ext_itf_t *ge;
-
- /* *INDENT-OFF* */
- pool_foreach (ge, gbp_ext_itf_pool)
- {
- if (!cb(ge, ctx))
- break;
- }
- /* *INDENT-ON* */
-}
-
-static walk_rc_t
-gbp_ext_itf_show_one (gbp_ext_itf_t * gx, void *ctx)
-{
- vlib_cli_output (ctx, " %U", format_gbp_ext_itf, gx);
-
- return (WALK_CONTINUE);
-}
-
-static clib_error_t *
-gbp_ext_itf_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vlib_cli_output (vm, "External-Interfaces:");
- gbp_ext_itf_walk (gbp_ext_itf_show_one, vm);
-
- return (NULL);
-}
-
-/*?
- * Show Group Based Policy external interface and derived information
- *
- * @cliexpar
- * @cliexstart{show gbp ext-itf}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_ext_itf_show_node, static) = {
- .path = "show gbp ext-itf",
- .short_help = "show gbp ext-itf\n",
- .function = gbp_ext_itf_show,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-gbp_ext_itf_init (vlib_main_t * vm)
-{
- gx_logger = vlib_log_register_class ("gbp", "ext-itf");
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_ext_itf_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_ext_itf.h b/src/plugins/gbp/gbp_ext_itf.h
deleted file mode 100644
index 03b1992ca45..00000000000
--- a/src/plugins/gbp/gbp_ext_itf.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_EXT_ITF_H__
-#define __GBP_EXT_ITF_H__
-
-#include <gbp/gbp.h>
-
-enum
-{
- GBP_EXT_ITF_F_NONE = 0,
- GBP_EXT_ITF_F_ANON = 1 << 0,
-};
-
-/**
- * An external interface maps directly to an oflex L3ExternalInterface.
- * The special characteristics of an external interface is the way the source
- * EPG is determined for input packets which, like a recirc interface, is via
- * a LPM.
- */
-typedef struct gpb_ext_itf_t_
-{
- /**
- * The interface
- */
- gbp_itf_hdl_t gx_itf;
-
- /**
- * The BD this external interface is a member of
- */
- index_t gx_bd;
-
- /**
- * The RD this external interface is a member of
- */
- index_t gx_rd;
-
- /**
- * cached FIB indices from the RD
- */
- u32 gx_fib_index[DPO_PROTO_NUM];
-
- /**
- * The associated flags
- */
- u32 gx_flags;
-
-} gbp_ext_itf_t;
-
-
-extern int gbp_ext_itf_add (u32 sw_if_index, u32 bd_id, u32 rd_id, u32 flags);
-extern int gbp_ext_itf_delete (u32 sw_if_index);
-
-extern u8 *format_gbp_ext_itf (u8 * s, va_list * args);
-
-typedef walk_rc_t (*gbp_ext_itf_cb_t) (gbp_ext_itf_t * gbpe, void *ctx);
-extern void gbp_ext_itf_walk (gbp_ext_itf_cb_t bgpe, void *ctx);
-
-
-/**
- * Exposed types for the data-plane
- */
-extern gbp_ext_itf_t *gbp_ext_itf_pool;
-extern index_t *gbp_ext_itf_db;
-
-always_inline gbp_ext_itf_t *
-gbp_ext_itf_get (u32 sw_if_index)
-{
- return (pool_elt_at_index (gbp_ext_itf_pool, gbp_ext_itf_db[sw_if_index]));
-}
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_fwd.c b/src/plugins/gbp/gbp_fwd.c
deleted file mode 100644
index 4ecc4779b92..00000000000
--- a/src/plugins/gbp/gbp_fwd.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2019 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <plugins/gbp/gbp.h>
-#include <vnet/l2/l2_input.h>
-#include <plugins/gbp/gbp_learn.h>
-
-/**
- * Grouping of global data for the GBP source EPG classification feature
- */
-typedef struct gbp_fwd_main_t_
-{
- /**
- * Next nodes for L2 output features
- */
- u32 l2_input_feat_next[32];
-} gbp_fwd_main_t;
-
-gbp_fwd_main_t gbp_fwd_main;
-
-static clib_error_t *
-gbp_fwd_init (vlib_main_t * vm)
-{
- gbp_fwd_main_t *gpm = &gbp_fwd_main;
- vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "gbp-fwd");
-
- /* Initialize the feature next-node indices */
- feat_bitmap_init_next_nodes (vm,
- node->index,
- L2INPUT_N_FEAT,
- l2input_get_feat_names (),
- gpm->l2_input_feat_next);
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (gbp_fwd_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_fwd_dpo.c b/src/plugins/gbp/gbp_fwd_dpo.c
deleted file mode 100644
index b1023f5e78f..00000000000
--- a/src/plugins/gbp/gbp_fwd_dpo.c
+++ /dev/null
@@ -1,306 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_fwd_dpo.h>
-
-#include <vnet/ethernet/ethernet.h>
-
-
-#ifndef CLIB_MARCH_VARIANT
-/**
- * The 'DB' of GBP FWD DPOs.
- * There is one per-proto
- */
-static index_t gbp_fwd_dpo_db[DPO_PROTO_NUM] = { INDEX_INVALID };
-
-/**
- * DPO type registered for these GBP FWD
- */
-static dpo_type_t gbp_fwd_dpo_type;
-
-/**
- * @brief pool of all interface DPOs
- */
-gbp_fwd_dpo_t *gbp_fwd_dpo_pool;
-
-static gbp_fwd_dpo_t *
-gbp_fwd_dpo_alloc (void)
-{
- gbp_fwd_dpo_t *gfd;
-
- pool_get (gbp_fwd_dpo_pool, gfd);
-
- return (gfd);
-}
-
-static inline gbp_fwd_dpo_t *
-gbp_fwd_dpo_get_from_dpo (const dpo_id_t * dpo)
-{
- ASSERT (gbp_fwd_dpo_type == dpo->dpoi_type);
-
- return (gbp_fwd_dpo_get (dpo->dpoi_index));
-}
-
-static inline index_t
-gbp_fwd_dpo_get_index (gbp_fwd_dpo_t * gfd)
-{
- return (gfd - gbp_fwd_dpo_pool);
-}
-
-static void
-gbp_fwd_dpo_lock (dpo_id_t * dpo)
-{
- gbp_fwd_dpo_t *gfd;
-
- gfd = gbp_fwd_dpo_get_from_dpo (dpo);
- gfd->gfd_locks++;
-}
-
-static void
-gbp_fwd_dpo_unlock (dpo_id_t * dpo)
-{
- gbp_fwd_dpo_t *gfd;
-
- gfd = gbp_fwd_dpo_get_from_dpo (dpo);
- gfd->gfd_locks--;
-
- if (0 == gfd->gfd_locks)
- {
- gbp_fwd_dpo_db[gfd->gfd_proto] = INDEX_INVALID;
- pool_put (gbp_fwd_dpo_pool, gfd);
- }
-}
-
-void
-gbp_fwd_dpo_add_or_lock (dpo_proto_t dproto, dpo_id_t * dpo)
-{
- gbp_fwd_dpo_t *gfd;
-
- if (INDEX_INVALID == gbp_fwd_dpo_db[dproto])
- {
- gfd = gbp_fwd_dpo_alloc ();
-
- gfd->gfd_proto = dproto;
-
- gbp_fwd_dpo_db[dproto] = gbp_fwd_dpo_get_index (gfd);
- }
- else
- {
- gfd = gbp_fwd_dpo_get (gbp_fwd_dpo_db[dproto]);
- }
-
- dpo_set (dpo, gbp_fwd_dpo_type, dproto, gbp_fwd_dpo_get_index (gfd));
-}
-
-u8 *
-format_gbp_fwd_dpo (u8 * s, va_list * ap)
-{
- index_t index = va_arg (*ap, index_t);
- CLIB_UNUSED (u32 indent) = va_arg (*ap, u32);
- gbp_fwd_dpo_t *gfd = gbp_fwd_dpo_get (index);
-
- return (format (s, "gbp-fwd-dpo: %U", format_dpo_proto, gfd->gfd_proto));
-}
-
-const static dpo_vft_t gbp_fwd_dpo_vft = {
- .dv_lock = gbp_fwd_dpo_lock,
- .dv_unlock = gbp_fwd_dpo_unlock,
- .dv_format = format_gbp_fwd_dpo,
-};
-
-/**
- * @brief The per-protocol VLIB graph nodes that are assigned to a glean
- * object.
- *
- * this means that these graph nodes are ones from which a glean is the
- * parent object in the DPO-graph.
- */
-const static char *const gbp_fwd_dpo_ip4_nodes[] = {
- "ip4-gbp-fwd-dpo",
- NULL,
-};
-
-const static char *const gbp_fwd_dpo_ip6_nodes[] = {
- "ip6-gbp-fwd-dpo",
- NULL,
-};
-
-const static char *const *const gbp_fwd_dpo_nodes[DPO_PROTO_NUM] = {
- [DPO_PROTO_IP4] = gbp_fwd_dpo_ip4_nodes,
- [DPO_PROTO_IP6] = gbp_fwd_dpo_ip6_nodes,
-};
-
-dpo_type_t
-gbp_fwd_dpo_get_type (void)
-{
- return (gbp_fwd_dpo_type);
-}
-
-static clib_error_t *
-gbp_fwd_dpo_module_init (vlib_main_t * vm)
-{
- dpo_proto_t dproto;
-
- FOR_EACH_DPO_PROTO (dproto)
- {
- gbp_fwd_dpo_db[dproto] = INDEX_INVALID;
- }
-
- gbp_fwd_dpo_type = dpo_register_new_type (&gbp_fwd_dpo_vft,
- gbp_fwd_dpo_nodes);
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_fwd_dpo_module_init);
-#endif /* CLIB_MARCH_VARIANT */
-
-typedef struct gbp_fwd_dpo_trace_t_
-{
- u32 sclass;
- u32 dpo_index;
-} gbp_fwd_dpo_trace_t;
-
-typedef enum
-{
- GBP_FWD_DROP,
- GBP_FWD_FWD,
- GBP_FWD_N_NEXT,
-} gbp_fwd_next_t;
-
-always_inline uword
-gbp_fwd_dpo_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame, fib_protocol_t fproto)
-{
- u32 n_left_from, next_index, *from, *to_next;
-
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
-
- next_index = node->cached_next_index;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- const dpo_id_t *next_dpo0;
- vlib_buffer_t *b0;
- sclass_t sclass0;
- u32 bi0, next0;
-
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
-
- sclass0 = vnet_buffer2 (b0)->gbp.sclass;
- next_dpo0 = gbp_epg_dpo_lookup (sclass0, fproto);
-
- if (PREDICT_TRUE (NULL != next_dpo0))
- {
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = next_dpo0->dpoi_index;
- next0 = GBP_FWD_FWD;
- }
- else
- {
- next0 = GBP_FWD_DROP;
- }
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- gbp_fwd_dpo_trace_t *tr;
-
- tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->sclass = sclass0;
- tr->dpo_index = (NULL != next_dpo0 ?
- next_dpo0->dpoi_index : ~0);
- }
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
- return from_frame->n_vectors;
-}
-
-static u8 *
-format_gbp_fwd_dpo_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gbp_fwd_dpo_trace_t *t = va_arg (*args, gbp_fwd_dpo_trace_t *);
-
- s = format (s, " sclass:%d dpo:%d", t->sclass, t->dpo_index);
-
- return s;
-}
-
-VLIB_NODE_FN (ip4_gbp_fwd_dpo_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return (gbp_fwd_dpo_inline (vm, node, from_frame, FIB_PROTOCOL_IP4));
-}
-
-VLIB_NODE_FN (ip6_gbp_fwd_dpo_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return (gbp_fwd_dpo_inline (vm, node, from_frame, FIB_PROTOCOL_IP6));
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip4_gbp_fwd_dpo_node) = {
- .name = "ip4-gbp-fwd-dpo",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_fwd_dpo_trace,
- .n_next_nodes = GBP_FWD_N_NEXT,
- .next_nodes =
- {
- [GBP_FWD_DROP] = "ip4-drop",
- [GBP_FWD_FWD] = "ip4-dvr-dpo",
- }
-};
-VLIB_REGISTER_NODE (ip6_gbp_fwd_dpo_node) = {
- .name = "ip6-gbp-fwd-dpo",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_fwd_dpo_trace,
- .n_next_nodes = GBP_FWD_N_NEXT,
- .next_nodes =
- {
- [GBP_FWD_DROP] = "ip6-drop",
- [GBP_FWD_FWD] = "ip6-dvr-dpo",
- }
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_fwd_dpo.h b/src/plugins/gbp/gbp_fwd_dpo.h
deleted file mode 100644
index 6092d6241b5..00000000000
--- a/src/plugins/gbp/gbp_fwd_dpo.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_FWD_DPO_H__
-#define __GBP_FWD_DPO_H__
-
-#include <vnet/dpo/dpo.h>
-
-/**
- * @brief
- * The GBP FWD DPO. Used in the L3 path to select the correct EPG uplink
- * based on the source EPG.
- */
-typedef struct gbp_fwd_dpo_t_
-{
- /**
- * The protocol of packets using this DPO
- */
- dpo_proto_t gfd_proto;
-
- /**
- * number of locks.
- */
- u16 gfd_locks;
-} gbp_fwd_dpo_t;
-
-extern void gbp_fwd_dpo_add_or_lock (dpo_proto_t dproto, dpo_id_t * dpo);
-
-extern dpo_type_t gbp_fwd_dpo_get_type (void);
-
-/**
- * @brief pool of all interface DPOs
- */
-extern gbp_fwd_dpo_t *gbp_fwd_dpo_pool;
-
-static inline gbp_fwd_dpo_t *
-gbp_fwd_dpo_get (index_t index)
-{
- return (pool_elt_at_index (gbp_fwd_dpo_pool, index));
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
-
-#endif
diff --git a/src/plugins/gbp/gbp_fwd_node.c b/src/plugins/gbp/gbp_fwd_node.c
deleted file mode 100644
index 6ea56fd8074..00000000000
--- a/src/plugins/gbp/gbp_fwd_node.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <vnet/l2/l2_input.h>
-
-#define foreach_gbp_fwd \
- _(DROP, "drop") \
- _(OUTPUT, "output")
-
-typedef enum
-{
-#define _(sym,str) GBP_FWD_ERROR_##sym,
- foreach_gbp_fwd
-#undef _
- GBP_FWD_N_ERROR,
-} gbp_fwd_error_t;
-
-static char *gbp_fwd_error_strings[] = {
-#define _(sym,string) string,
- foreach_gbp_fwd
-#undef _
-};
-
-typedef enum
-{
-#define _(sym,str) GBP_FWD_NEXT_##sym,
- foreach_gbp_fwd
-#undef _
- GBP_FWD_N_NEXT,
-} gbp_fwd_next_t;
-
-/**
- * per-packet trace data
- */
-typedef struct gbp_fwd_trace_t_
-{
- /* per-pkt trace data */
- sclass_t sclass;
- u32 sw_if_index;
-} gbp_fwd_trace_t;
-
-VLIB_NODE_FN (gbp_fwd_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- u32 n_left_from, *from, *to_next;
- u32 next_index;
-
- next_index = 0;
- n_left_from = frame->n_vectors;
- from = vlib_frame_vector_args (frame);
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0, sw_if_index0;
- gbp_fwd_next_t next0;
- vlib_buffer_t *b0;
- sclass_t sclass0;
-
- next0 = GBP_FWD_NEXT_DROP;
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
-
- /*
- * lookup the uplink based on src EPG
- */
- sclass0 = vnet_buffer2 (b0)->gbp.sclass;
-
- sw_if_index0 = gbp_epg_itf_lookup_sclass (sclass0);
-
- if (~0 != sw_if_index0)
- {
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
-
- next0 = GBP_FWD_NEXT_OUTPUT;
- }
- /*
- * else
- * don't know the uplink interface for this EPG => drop
- */
-
- if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- gbp_fwd_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
- t->sclass = sclass0;
- t->sw_if_index = sw_if_index0;
- }
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-/* packet trace format function */
-static u8 *
-format_gbp_fwd_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gbp_fwd_trace_t *t = va_arg (*args, gbp_fwd_trace_t *);
-
- s = format (s, "sclass:%d", t->sclass);
-
- return s;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gbp_fwd_node) = {
- .name = "gbp-fwd",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_fwd_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(gbp_fwd_error_strings),
- .error_strings = gbp_fwd_error_strings,
-
- .n_next_nodes = GBP_FWD_N_NEXT,
-
- .next_nodes = {
- [GBP_FWD_NEXT_DROP] = "error-drop",
- [GBP_FWD_NEXT_OUTPUT] = "l2-output",
- },
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_itf.c b/src/plugins/gbp/gbp_itf.c
deleted file mode 100644
index 0c8f6a45a87..00000000000
--- a/src/plugins/gbp/gbp_itf.c
+++ /dev/null
@@ -1,575 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp_itf.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <plugins/gbp/gbp_route_domain.h>
-
-#include <vnet/ip/ip.h>
-
-#define foreach_gbp_itf_mode \
- _(L2, "l2") \
- _(L3, "L3")
-
-typedef enum gbp_ift_mode_t_
-{
-#define _(s,v) GBP_ITF_MODE_##s,
- foreach_gbp_itf_mode
-#undef _
-} gbp_itf_mode_t;
-
-/**
- * Attributes and configurations attached to interfaces by GBP
- */
-typedef struct gbp_itf_t_
-{
- /**
- * Number of references to this interface
- */
- u32 gi_locks;
-
- /**
- * The interface this wrapper is managing
- */
- u32 gi_sw_if_index;
-
- /**
- * The mode of the interface
- */
- gbp_itf_mode_t gi_mode;
-
- /**
- * Users of this interface - this is encoded in the user's handle
- */
- u32 *gi_users;
-
- /**
- * L2/L3 Features configured by each user
- */
- u32 *gi_input_fbs;
- u32 gi_input_fb;
- u32 *gi_output_fbs;
- u32 gi_output_fb;
-
- /**
- * function to call when the interface is deleted.
- */
- gbp_itf_free_fn_t gi_free_fn;
-
- union
- {
- /**
- * GBP BD or RD index
- */
- u32 gi_gbi;
- index_t gi_gri;
- };
-} gbp_itf_t;
-
-static gbp_itf_t *gbp_itf_pool;
-static uword *gbp_itf_db;
-
-static const char *gbp_itf_feat_bit_pos_to_arc[] = {
-#define _(s,v,a) [GBP_ITF_L3_FEAT_POS_##s] = a,
- foreach_gdb_l3_feature
-#undef _
-};
-
-static const char *gbp_itf_feat_bit_pos_to_feat[] = {
-#define _(s,v,a) [GBP_ITF_L3_FEAT_POS_##s] = v,
- foreach_gdb_l3_feature
-#undef _
-};
-
-u8 *
-format_gbp_itf_l3_feat (u8 * s, va_list * args)
-{
- gbp_itf_l3_feat_t flags = va_arg (*args, gbp_itf_l3_feat_t);
-
-#define _(a, b, c) \
- if (flags & GBP_ITF_L3_FEAT_##a) \
- s = format (s, "%s ", b);
- foreach_gdb_l3_feature
-#undef _
- return (s);
-}
-
-void
-gbp_itf_hdl_reset (gbp_itf_hdl_t * gh)
-{
- *gh = GBP_ITF_HDL_INVALID;
-}
-
-bool
-gbp_itf_hdl_is_valid (gbp_itf_hdl_t gh)
-{
- return (gh.gh_which != GBP_ITF_HDL_INVALID.gh_which);
-}
-
-static gbp_itf_t *
-gbp_itf_get (index_t gii)
-{
- if (pool_is_free_index (gbp_itf_pool, gii))
- return (NULL);
-
- return (pool_elt_at_index (gbp_itf_pool, gii));
-}
-
-static gbp_itf_t *
-gbp_itf_find (u32 sw_if_index)
-{
- uword *p;
-
- p = hash_get (gbp_itf_db, sw_if_index);
-
- if (NULL != p)
- return (gbp_itf_get (p[0]));
-
- return (NULL);
-}
-
-static gbp_itf_t *
-gbp_itf_find_hdl (gbp_itf_hdl_t gh)
-{
- return (gbp_itf_find (gh.gh_which));
-}
-
-u32
-gbp_itf_get_sw_if_index (gbp_itf_hdl_t hdl)
-{
- return (hdl.gh_which);
-}
-
-static gbp_itf_hdl_t
-gbp_itf_mk_hdl (gbp_itf_t * gi)
-{
- gbp_itf_hdl_t gh;
- u32 *useri;
-
- pool_get (gi->gi_users, useri);
- *useri = 0;
-
- gh.gh_who = useri - gi->gi_users;
- gh.gh_which = gi->gi_sw_if_index;
-
- return (gh);
-}
-
-static gbp_itf_hdl_t
-gbp_itf_l2_add_and_lock_i (u32 sw_if_index, index_t gbi, gbp_itf_free_fn_t ff)
-{
- gbp_itf_t *gi;
-
- gi = gbp_itf_find (sw_if_index);
-
- if (NULL == gi)
- {
- pool_get_zero (gbp_itf_pool, gi);
-
- gi->gi_sw_if_index = sw_if_index;
- gi->gi_gbi = gbi;
- gi->gi_mode = GBP_ITF_MODE_L2;
- gi->gi_free_fn = ff;
-
- gbp_bridge_domain_itf_add (gi->gi_gbi, gi->gi_sw_if_index,
- L2_BD_PORT_TYPE_NORMAL);
-
- hash_set (gbp_itf_db, gi->gi_sw_if_index, gi - gbp_itf_pool);
- }
-
- gi->gi_locks++;
-
- return (gbp_itf_mk_hdl (gi));
-}
-
-gbp_itf_hdl_t
-gbp_itf_l2_add_and_lock (u32 sw_if_index, index_t gbi)
-{
- return (gbp_itf_l2_add_and_lock_i (sw_if_index, gbi, NULL));
-}
-
-gbp_itf_hdl_t
-gbp_itf_l2_add_and_lock_w_free (u32 sw_if_index,
- index_t gbi, gbp_itf_free_fn_t ff)
-{
- return (gbp_itf_l2_add_and_lock_i (sw_if_index, gbi, ff));
-}
-
-gbp_itf_hdl_t
-gbp_itf_l3_add_and_lock_i (u32 sw_if_index, index_t gri, gbp_itf_free_fn_t ff)
-{
- gbp_itf_t *gi;
-
- gi = gbp_itf_find (sw_if_index);
-
- if (NULL == gi)
- {
- const gbp_route_domain_t *grd;
- fib_protocol_t fproto;
-
- pool_get_zero (gbp_itf_pool, gi);
-
- gi->gi_sw_if_index = sw_if_index;
- gi->gi_mode = GBP_ITF_MODE_L3;
- gi->gi_gri = gri;
- gi->gi_free_fn = ff;
-
- grd = gbp_route_domain_get (gi->gi_gri);
-
- ip4_sw_interface_enable_disable (gi->gi_sw_if_index, 1);
- ip6_sw_interface_enable_disable (gi->gi_sw_if_index, 1);
-
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- ip_table_bind (fproto, gi->gi_sw_if_index,
- grd->grd_table_id[fproto], 1);
-
- hash_set (gbp_itf_db, gi->gi_sw_if_index, gi - gbp_itf_pool);
- }
-
- gi->gi_locks++;
-
- return (gbp_itf_mk_hdl (gi));
-}
-
-gbp_itf_hdl_t
-gbp_itf_l3_add_and_lock (u32 sw_if_index, index_t gri)
-{
- return (gbp_itf_l3_add_and_lock_i (sw_if_index, gri, NULL));
-}
-
-gbp_itf_hdl_t
-gbp_itf_l3_add_and_lock_w_free (u32 sw_if_index,
- index_t gri, gbp_itf_free_fn_t ff)
-{
- return (gbp_itf_l3_add_and_lock_i (sw_if_index, gri, ff));
-}
-
-void
-gbp_itf_lock (gbp_itf_hdl_t gh)
-{
- gbp_itf_t *gi;
-
- if (!gbp_itf_hdl_is_valid (gh))
- return;
-
- gi = gbp_itf_find_hdl (gh);
-
- gi->gi_locks++;
-}
-
-gbp_itf_hdl_t
-gbp_itf_clone_and_lock (gbp_itf_hdl_t gh)
-{
- gbp_itf_t *gi;
-
- if (!gbp_itf_hdl_is_valid (gh))
- return (GBP_ITF_HDL_INVALID);
-
- gi = gbp_itf_find_hdl (gh);
-
- gi->gi_locks++;
-
- return (gbp_itf_mk_hdl (gi));
-}
-
-void
-gbp_itf_unlock (gbp_itf_hdl_t * gh)
-{
- gbp_itf_t *gi;
-
- if (!gbp_itf_hdl_is_valid (*gh))
- return;
-
- gi = gbp_itf_find_hdl (*gh);
- ASSERT (gi->gi_locks > 0);
- gi->gi_locks--;
-
- if (0 == gi->gi_locks)
- {
- if (GBP_ITF_MODE_L2 == gi->gi_mode)
- {
- gbp_itf_l2_set_input_feature (*gh, L2INPUT_FEAT_NONE);
- gbp_itf_l2_set_output_feature (*gh, L2OUTPUT_FEAT_NONE);
- gbp_bridge_domain_itf_del (gi->gi_gbi,
- gi->gi_sw_if_index,
- L2_BD_PORT_TYPE_NORMAL);
- }
- else
- {
- fib_protocol_t fproto;
-
- gbp_itf_l3_set_input_feature (*gh, GBP_ITF_L3_FEAT_NONE);
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- ip_table_bind (fproto, gi->gi_sw_if_index, 0, 0);
-
- ip4_sw_interface_enable_disable (gi->gi_sw_if_index, 0);
- ip6_sw_interface_enable_disable (gi->gi_sw_if_index, 0);
- }
-
- hash_unset (gbp_itf_db, gi->gi_sw_if_index);
-
- if (gi->gi_free_fn)
- gi->gi_free_fn (gi->gi_sw_if_index);
-
- pool_free (gi->gi_users);
- vec_free (gi->gi_input_fbs);
- vec_free (gi->gi_output_fbs);
-
- memset (gi, 0, sizeof (*gi));
- }
-
- gbp_itf_hdl_reset (gh);
-}
-
-void
-gbp_itf_l3_set_input_feature (gbp_itf_hdl_t gh, gbp_itf_l3_feat_t feats)
-{
- u32 diff_fb, new_fb, *fb, feat;
- gbp_itf_t *gi;
-
- gi = gbp_itf_find_hdl (gh);
-
- if (NULL == gi || GBP_ITF_MODE_L3 != gi->gi_mode)
- return;
-
- vec_validate (gi->gi_input_fbs, gh.gh_who);
- gi->gi_input_fbs[gh.gh_who] = feats;
-
- new_fb = 0;
- vec_foreach (fb, gi->gi_input_fbs)
- {
- new_fb |= *fb;
- }
-
- /* add new features */
- diff_fb = (gi->gi_input_fb ^ new_fb) & new_fb;
-
- /* *INDENT-OFF* */
- foreach_set_bit (feat, diff_fb,
- ({
- vnet_feature_enable_disable (gbp_itf_feat_bit_pos_to_arc[feat],
- gbp_itf_feat_bit_pos_to_feat[feat],
- gi->gi_sw_if_index, 1, 0, 0);
- }));
- /* *INDENT-ON* */
-
- /* remove unneeded features */
- diff_fb = (gi->gi_input_fb ^ new_fb) & gi->gi_input_fb;
-
- /* *INDENT-OFF* */
- foreach_set_bit (feat, diff_fb,
- ({
- vnet_feature_enable_disable (gbp_itf_feat_bit_pos_to_arc[feat],
- gbp_itf_feat_bit_pos_to_feat[feat],
- gi->gi_sw_if_index, 0, 0, 0);
- }));
- /* *INDENT-ON* */
-
- gi->gi_input_fb = new_fb;
-}
-
-void
-gbp_itf_l2_set_input_feature (gbp_itf_hdl_t gh, l2input_feat_masks_t feats)
-{
- u32 diff_fb, new_fb, *fb, feat;
- gbp_itf_t *gi;
-
- gi = gbp_itf_find_hdl (gh);
-
- if (NULL == gi || GBP_ITF_MODE_L2 != gi->gi_mode)
- {
- ASSERT (0);
- return;
- }
-
- vec_validate (gi->gi_input_fbs, gh.gh_who);
- gi->gi_input_fbs[gh.gh_who] = feats;
-
- new_fb = 0;
- vec_foreach (fb, gi->gi_input_fbs)
- {
- new_fb |= *fb;
- }
-
- /* add new features */
- diff_fb = (gi->gi_input_fb ^ new_fb) & new_fb;
-
- /* *INDENT-OFF* */
- foreach_set_bit (feat, diff_fb,
- ({
- l2input_intf_bitmap_enable (gi->gi_sw_if_index, (1 << feat), 1);
- }));
- /* *INDENT-ON* */
-
- /* remove unneeded features */
- diff_fb = (gi->gi_input_fb ^ new_fb) & gi->gi_input_fb;
-
- /* *INDENT-OFF* */
- foreach_set_bit (feat, diff_fb,
- ({
- l2input_intf_bitmap_enable (gi->gi_sw_if_index, (1 << feat), 0);
- }));
- /* *INDENT-ON* */
-
- gi->gi_input_fb = new_fb;
-}
-
-void
-gbp_itf_l2_set_output_feature (gbp_itf_hdl_t gh, l2output_feat_masks_t feats)
-{
- u32 diff_fb, new_fb, *fb, feat;
- gbp_itf_t *gi;
-
- gi = gbp_itf_find_hdl (gh);
-
- if (NULL == gi || GBP_ITF_MODE_L2 != gi->gi_mode)
- {
- ASSERT (0);
- return;
- }
-
- vec_validate (gi->gi_output_fbs, gh.gh_who);
- gi->gi_output_fbs[gh.gh_who] = feats;
-
- new_fb = 0;
- vec_foreach (fb, gi->gi_output_fbs)
- {
- new_fb |= *fb;
- }
-
- /* add new features */
- diff_fb = (gi->gi_output_fb ^ new_fb) & new_fb;
-
- /* *INDENT-OFF* */
- foreach_set_bit (feat, diff_fb,
- ({
- l2output_intf_bitmap_enable (gi->gi_sw_if_index, (1 << feat), 1);
- }));
- /* *INDENT-ON* */
-
- /* remove unneeded features */
- diff_fb = (gi->gi_output_fb ^ new_fb) & gi->gi_output_fb;
-
- /* *INDENT-OFF* */
- foreach_set_bit (feat, diff_fb,
- ({
- l2output_intf_bitmap_enable (gi->gi_sw_if_index, (1 << feat), 0);
- }));
- /* *INDENT-ON* */
-
- gi->gi_output_fb = new_fb;
-}
-
-static u8 *
-format_gbp_itf_mode (u8 * s, va_list * args)
-{
- gbp_itf_mode_t mode = va_arg (*args, gbp_itf_mode_t);
-
- switch (mode)
- {
-#define _(a,v) \
- case GBP_ITF_MODE_##a: \
- return format(s, "%s", v);
- foreach_gbp_itf_mode
-#undef _
- }
- return (s);
-}
-
-static u8 *
-format_gbp_itf (u8 * s, va_list * args)
-{
- index_t gii = va_arg (*args, index_t);
- gbp_itf_t *gi;
-
- if (INDEX_INVALID == gii)
- return (format (s, "unset"));
-
- gi = gbp_itf_get (gii);
-
- s = format (s, "%U locks:%d mode:%U ",
- format_vnet_sw_if_index_name, vnet_get_main (),
- gi->gi_sw_if_index, gi->gi_locks,
- format_gbp_itf_mode, gi->gi_mode);
-
- if (GBP_ITF_MODE_L2 == gi->gi_mode)
- s = format (s, "gbp-bd:%d input-feats:[%U] output-feats:[%U]",
- gi->gi_gbi,
- format_l2_input_features, gi->gi_input_fb, 0,
- format_l2_output_features, gi->gi_output_fb, 0);
- else
- s = format (s, "gbp-rd:%d input-feats:[%U] output-feats:[%U]",
- gi->gi_gbi,
- format_gbp_itf_l3_feat, gi->gi_input_fb,
- format_gbp_itf_l3_feat, gi->gi_output_fb);
-
- return (s);
-}
-
-u8 *
-format_gbp_itf_hdl (u8 * s, va_list * args)
-{
- gbp_itf_hdl_t gh = va_arg (*args, gbp_itf_hdl_t);
- gbp_itf_t *gi;
-
- gi = gbp_itf_find_hdl (gh);
-
- if (NULL == gi)
- return format (s, "INVALID");
-
- return (format (s, "%U", format_gbp_itf, gi - gbp_itf_pool));
-}
-
-static clib_error_t *
-gbp_itf_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- u32 gii;
-
- vlib_cli_output (vm, "Interfaces:");
-
- /* *INDENT-OFF* */
- pool_foreach_index (gii, gbp_itf_pool)
- {
- vlib_cli_output (vm, " [%d] %U", gii, format_gbp_itf, gii);
- }
- /* *INDENT-ON* */
-
- return (NULL);
-}
-
-/*?
- * Show Group Based Interfaces
- *
- * @cliexpar
- * @cliexstart{show gbp contract}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_contract_show_node, static) = {
- .path = "show gbp interface",
- .short_help = "show gbp interface\n",
- .function = gbp_itf_show,
-};
-/* *INDENT-ON* */
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_itf.h b/src/plugins/gbp/gbp_itf.h
deleted file mode 100644
index 23a09b2a9ff..00000000000
--- a/src/plugins/gbp/gbp_itf.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_INTERFACE_H__
-#define __GBP_INTERFACE_H__
-
-#include <vnet/l2/l2_input.h>
-#include <vnet/l2/l2_output.h>
-#include <vnet/dpo/dpo.h>
-
-
-#define foreach_gdb_l3_feature \
- _(LEARN_IP4, "gbp-learn-ip4", "ip4-unicast") \
- _(LEARN_IP6, "gbp-learn-ip6", "ip6-unicast")
-
-typedef enum gbp_itf_l3_feat_pos_t_
-{
-#define _(s,v,a) GBP_ITF_L3_FEAT_POS_##s,
- foreach_gdb_l3_feature
-#undef _
-} gbp_itf_l3_feat_pos_t;
-
-typedef enum gbp_itf_l3_feat_t_
-{
- GBP_ITF_L3_FEAT_NONE,
-#define _(s,v,a) GBP_ITF_L3_FEAT_##s = (1 << GBP_ITF_L3_FEAT_POS_##s),
- foreach_gdb_l3_feature
-#undef _
-} gbp_itf_l3_feat_t;
-
-#define GBP_ITF_L3_FEAT_LEARN (GBP_ITF_L3_FEAT_LEARN_IP4|GBP_ITF_L3_FEAT_LEARN_IP6)
-
-typedef struct gbp_itf_hdl_t_
-{
- union
- {
- struct
- {
- u32 gh_who;
- u32 gh_which;
- };
- };
-} gbp_itf_hdl_t;
-
-#define GBP_ITF_HDL_INIT {.gh_which = ~0}
-const static gbp_itf_hdl_t GBP_ITF_HDL_INVALID = GBP_ITF_HDL_INIT;
-
-extern void gbp_itf_hdl_reset (gbp_itf_hdl_t * gh);
-extern bool gbp_itf_hdl_is_valid (gbp_itf_hdl_t gh);
-
-typedef void (*gbp_itf_free_fn_t) (u32 sw_if_index);
-
-extern gbp_itf_hdl_t gbp_itf_l2_add_and_lock (u32 sw_if_index, u32 bd_index);
-extern gbp_itf_hdl_t gbp_itf_l3_add_and_lock (u32 sw_if_index, index_t gri);
-extern gbp_itf_hdl_t gbp_itf_l2_add_and_lock_w_free (u32 sw_if_index,
- u32 bd_index,
- gbp_itf_free_fn_t ff);
-extern gbp_itf_hdl_t gbp_itf_l3_add_and_lock_w_free (u32 sw_if_index,
- index_t gri,
- gbp_itf_free_fn_t ff);
-
-extern void gbp_itf_unlock (gbp_itf_hdl_t * hdl);
-extern void gbp_itf_lock (gbp_itf_hdl_t hdl);
-extern gbp_itf_hdl_t gbp_itf_clone_and_lock (gbp_itf_hdl_t hdl);
-extern u32 gbp_itf_get_sw_if_index (gbp_itf_hdl_t hdl);
-
-extern void gbp_itf_l2_set_input_feature (gbp_itf_hdl_t hdl,
- l2input_feat_masks_t feats);
-extern void gbp_itf_l2_set_output_feature (gbp_itf_hdl_t hdl,
- l2output_feat_masks_t feats);
-
-extern void gbp_itf_l3_set_input_feature (gbp_itf_hdl_t hdl,
- gbp_itf_l3_feat_t feats);
-
-extern u8 *format_gbp_itf_hdl (u8 * s, va_list * args);
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_learn.c b/src/plugins/gbp/gbp_learn.c
deleted file mode 100644
index af3a6fb52ac..00000000000
--- a/src/plugins/gbp/gbp_learn.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_learn.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-
-#include <vnet/l2/l2_input.h>
-
-gbp_learn_main_t gbp_learn_main;
-
-void
-gbp_learn_enable (u32 sw_if_index)
-{
- vnet_feature_enable_disable ("ip4-unicast",
- "gbp-learn-ip4", sw_if_index, 1, 0, 0);
- vnet_feature_enable_disable ("ip6-unicast",
- "gbp-learn-ip6", sw_if_index, 1, 0, 0);
-}
-
-void
-gbp_learn_disable (u32 sw_if_index)
-{
- vnet_feature_enable_disable ("ip4-unicast",
- "gbp-learn-ip4", sw_if_index, 0, 0, 0);
- vnet_feature_enable_disable ("ip6-unicast",
- "gbp-learn-ip6", sw_if_index, 0, 0, 0);
-}
-
-static clib_error_t *
-gbp_learn_init (vlib_main_t * vm)
-{
- gbp_learn_main_t *glm = &gbp_learn_main;
- vlib_thread_main_t *tm = &vlib_thread_main;
-
- vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "gbp-learn-l2");
-
- /* Initialize the feature next-node indices */
- feat_bitmap_init_next_nodes (vm,
- node->index,
- L2INPUT_N_FEAT,
- l2input_get_feat_names (),
- glm->gl_l2_input_feat_next);
-
- throttle_init (&glm->gl_l2_throttle,
- tm->n_vlib_mains, GBP_ENDPOINT_HASH_LEARN_RATE);
-
- throttle_init (&glm->gl_l3_throttle,
- tm->n_vlib_mains, GBP_ENDPOINT_HASH_LEARN_RATE);
-
- glm->gl_logger = vlib_log_register_class ("gbp", "learn");
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (gbp_learn_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_learn.h b/src/plugins/gbp/gbp_learn.h
deleted file mode 100644
index b4f3ae0a23d..00000000000
--- a/src/plugins/gbp/gbp_learn.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_LEARN_H__
-#define __GBP_LEARN_H__
-
-#include <plugins/gbp/gbp.h>
-
-#include <vnet/util/throttle.h>
-
-/**
- * The maximum learning rate per-hashed EP
- */
-#define GBP_ENDPOINT_HASH_LEARN_RATE (1e-2)
-
-/**
- * Grouping of global data for the GBP source EPG classification feature
- */
-typedef struct gbp_learn_main_t_
-{
- /**
- * Next nodes for L2 output features
- */
- u32 gl_l2_input_feat_next[32];
-
- /**
- * logger - VLIB log class
- */
- vlib_log_class_t gl_logger;
-
- /**
- * throttles for the DP leanring
- */
- throttle_t gl_l2_throttle;
- throttle_t gl_l3_throttle;
-} gbp_learn_main_t;
-
-extern gbp_learn_main_t gbp_learn_main;
-
-extern void gbp_learn_enable (u32 sw_if_index);
-extern void gbp_learn_disable (u32 sw_if_index);
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_learn_node.c b/src/plugins/gbp/gbp_learn_node.c
deleted file mode 100644
index a6c54971956..00000000000
--- a/src/plugins/gbp/gbp_learn_node.c
+++ /dev/null
@@ -1,718 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_learn.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <vlibmemory/api.h>
-
-#include <vnet/util/throttle.h>
-#include <vnet/l2/l2_input.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/vxlan-gbp/vxlan_gbp_packet.h>
-#include <vnet/ethernet/arp_packet.h>
-
-#define GBP_LEARN_DBG(...) \
- vlib_log_debug (gbp_learn_main.gl_logger, __VA_ARGS__);
-
-#define foreach_gbp_learn \
- _(DROP, "drop")
-
-typedef enum
-{
-#define _(sym,str) GBP_LEARN_ERROR_##sym,
- foreach_gbp_learn
-#undef _
- GBP_LEARN_N_ERROR,
-} gbp_learn_error_t;
-
-static char *gbp_learn_error_strings[] = {
-#define _(sym,string) string,
- foreach_gbp_learn
-#undef _
-};
-
-typedef enum
-{
-#define _(sym,str) GBP_LEARN_NEXT_##sym,
- foreach_gbp_learn
-#undef _
- GBP_LEARN_N_NEXT,
-} gbp_learn_next_t;
-
-typedef struct gbp_learn_l2_t_
-{
- ip46_address_t ip;
- mac_address_t mac;
- u32 sw_if_index;
- u32 bd_index;
- sclass_t sclass;
- ip46_address_t outer_src;
- ip46_address_t outer_dst;
-} gbp_learn_l2_t;
-
-
-static void
-gbp_learn_l2_cp (const gbp_learn_l2_t * gl2)
-{
- ip46_address_t *ips = NULL;
-
- GBP_LEARN_DBG ("L2 EP: %U %U, %d",
- format_mac_address_t, &gl2->mac,
- format_ip46_address, &gl2->ip, IP46_TYPE_ANY, gl2->sclass);
-
- if (!ip46_address_is_zero (&gl2->ip))
- vec_add1 (ips, gl2->ip);
-
- /*
- * flip the source and dst, since that's how it was received, this API
- * takes how it's sent
- */
- gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_DP,
- gl2->sw_if_index, ips,
- &gl2->mac, INDEX_INVALID,
- INDEX_INVALID, gl2->sclass,
- (GBP_ENDPOINT_FLAG_LEARNT |
- GBP_ENDPOINT_FLAG_REMOTE),
- &gl2->outer_dst, &gl2->outer_src, NULL);
- vec_free (ips);
-}
-
-static void
-gbp_learn_l2_ip4_dp (const u8 * mac, const ip4_address_t * ip,
- u32 bd_index, u32 sw_if_index, sclass_t sclass,
- const ip4_address_t * outer_src,
- const ip4_address_t * outer_dst)
-{
- gbp_learn_l2_t gl2 = {
- .sw_if_index = sw_if_index,
- .bd_index = bd_index,
- .sclass = sclass,
- .ip.ip4 = *ip,
- .outer_src.ip4 = *outer_src,
- .outer_dst.ip4 = *outer_dst,
- };
- mac_address_from_bytes (&gl2.mac, mac);
-
- vl_api_rpc_call_main_thread (gbp_learn_l2_cp, (u8 *) & gl2, sizeof (gl2));
-}
-
-static void
-gbp_learn_l2_ip6_dp (const u8 * mac, const ip6_address_t * ip,
- u32 bd_index, u32 sw_if_index, sclass_t sclass,
- const ip4_address_t * outer_src,
- const ip4_address_t * outer_dst)
-{
- gbp_learn_l2_t gl2 = {
- .sw_if_index = sw_if_index,
- .bd_index = bd_index,
- .sclass = sclass,
- .ip.ip6 = *ip,
- .outer_src.ip4 = *outer_src,
- .outer_dst.ip4 = *outer_dst,
- };
- mac_address_from_bytes (&gl2.mac, mac);
-
- vl_api_rpc_call_main_thread (gbp_learn_l2_cp, (u8 *) & gl2, sizeof (gl2));
-}
-
-static void
-gbp_learn_l2_dp (const u8 * mac, u32 bd_index, u32 sw_if_index,
- sclass_t sclass,
- const ip4_address_t * outer_src,
- const ip4_address_t * outer_dst)
-{
- gbp_learn_l2_t gl2 = {
- .sw_if_index = sw_if_index,
- .bd_index = bd_index,
- .sclass = sclass,
- .outer_src.ip4 = *outer_src,
- .outer_dst.ip4 = *outer_dst,
- };
- mac_address_from_bytes (&gl2.mac, mac);
-
- vl_api_rpc_call_main_thread (gbp_learn_l2_cp, (u8 *) & gl2, sizeof (gl2));
-}
-
-/**
- * per-packet trace data
- */
-typedef struct gbp_learn_l2_trace_t_
-{
- /* per-pkt trace data */
- mac_address_t mac;
- u32 sw_if_index;
- u32 new;
- u32 throttled;
- u32 sclass;
- u32 d_bit;
- gbp_bridge_domain_flags_t gb_flags;
-} gbp_learn_l2_trace_t;
-
-always_inline void
-gbp_learn_get_outer (const ethernet_header_t * eh0,
- ip4_address_t * outer_src, ip4_address_t * outer_dst)
-{
- ip4_header_t *ip0;
- u8 *buff;
-
- /* rewind back to the ivxlan header */
- buff = (u8 *) eh0;
- buff -= (sizeof (vxlan_gbp_header_t) +
- sizeof (udp_header_t) + sizeof (ip4_header_t));
-
- ip0 = (ip4_header_t *) buff;
-
- *outer_src = ip0->src_address;
- *outer_dst = ip0->dst_address;
-}
-
-always_inline int
-gbp_endpoint_update_required (const gbp_endpoint_t * ge0,
- u32 rx_sw_if_index, sclass_t sclass)
-{
- /* Conditions for [re]learning this EP */
-
- /* 1. it doesn't have a dataplane source */
- if (!gbp_endpoint_is_learnt (ge0))
- return (!0);
-
- /* 2. has the input interface changed */
- if (gbp_itf_get_sw_if_index (ge0->ge_fwd.gef_itf) != rx_sw_if_index)
- return (!0);
-
- /* 3. has the sclass changed */
- if (sclass != ge0->ge_fwd.gef_sclass)
- return (!0);
-
- /* otherwise it's unchanged */
- return (0);
-}
-
-VLIB_NODE_FN (gbp_learn_l2_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- u32 n_left_from, *from, *to_next, next_index, thread_index, seed;
- gbp_learn_main_t *glm;
- f64 time_now;
-
- glm = &gbp_learn_main;
- next_index = 0;
- n_left_from = frame->n_vectors;
- from = vlib_frame_vector_args (frame);
- time_now = vlib_time_now (vm);
- thread_index = vm->thread_index;
-
- seed = throttle_seed (&glm->gl_l2_throttle, thread_index, time_now);
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- ip4_address_t outer_src, outer_dst;
- const ethernet_header_t *eh0;
- u32 bi0, sw_if_index0, t0;
- gbp_bridge_domain_t *gb0;
- gbp_learn_next_t next0;
- gbp_endpoint_t *ge0;
- vlib_buffer_t *b0;
- sclass_t sclass0;
-
- next0 = GBP_LEARN_NEXT_DROP;
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-
- eh0 = vlib_buffer_get_current (b0);
- sclass0 = vnet_buffer2 (b0)->gbp.sclass;
-
- next0 = vnet_l2_feature_next (b0, glm->gl_l2_input_feat_next,
- L2INPUT_FEAT_GBP_LEARN);
-
- ge0 = gbp_endpoint_find_mac (eh0->src_address,
- vnet_buffer (b0)->l2.bd_index);
- gb0 =
- gbp_bridge_domain_get_by_bd_index (vnet_buffer (b0)->l2.bd_index);
-
- if ((vnet_buffer2 (b0)->gbp.flags & VXLAN_GBP_GPFLAGS_D) ||
- (gb0->gb_flags & GBP_BD_FLAG_DO_NOT_LEARN))
- {
- t0 = 1;
- goto trace;
- }
-
- /*
- * check for new EP or a moved EP
- */
- if (NULL == ge0 ||
- gbp_endpoint_update_required (ge0, sw_if_index0, sclass0))
- {
- /*
- * use the last 4 bytes of the mac address as the hash for the EP
- */
- t0 = throttle_check (&glm->gl_l2_throttle, thread_index,
- *((u32 *) (eh0->src_address + 2)), seed);
- if (!t0)
- {
- gbp_learn_get_outer (eh0, &outer_src, &outer_dst);
-
- if (outer_src.as_u32 == 0 || outer_dst.as_u32 == 0)
- {
- t0 = 2;
- goto trace;
- }
-
- switch (clib_net_to_host_u16 (eh0->type))
- {
- case ETHERNET_TYPE_IP4:
- {
- const ip4_header_t *ip0;
-
- ip0 = (ip4_header_t *) (eh0 + 1);
-
- gbp_learn_l2_ip4_dp (eh0->src_address,
- &ip0->src_address,
- vnet_buffer (b0)->l2.bd_index,
- sw_if_index0, sclass0,
- &outer_src, &outer_dst);
-
- break;
- }
- case ETHERNET_TYPE_IP6:
- {
- const ip6_header_t *ip0;
-
- ip0 = (ip6_header_t *) (eh0 + 1);
-
- gbp_learn_l2_ip6_dp (eh0->src_address,
- &ip0->src_address,
- vnet_buffer (b0)->l2.bd_index,
- sw_if_index0, sclass0,
- &outer_src, &outer_dst);
-
- break;
- }
- case ETHERNET_TYPE_ARP:
- {
- const ethernet_arp_header_t *arp0;
-
- arp0 = (ethernet_arp_header_t *) (eh0 + 1);
-
- gbp_learn_l2_ip4_dp (eh0->src_address,
- &arp0->ip4_over_ethernet[0].ip4,
- vnet_buffer (b0)->l2.bd_index,
- sw_if_index0, sclass0,
- &outer_src, &outer_dst);
- break;
- }
- default:
- gbp_learn_l2_dp (eh0->src_address,
- vnet_buffer (b0)->l2.bd_index,
- sw_if_index0, sclass0,
- &outer_src, &outer_dst);
- break;
- }
- }
- }
- else
- {
- /*
- * this update could happen simultaneoulsy from multiple workers
- * but that's ok we are not interested in being very accurate.
- */
- t0 = 0;
- ge0->ge_last_time = time_now;
- }
- trace:
- if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- gbp_learn_l2_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- clib_memcpy_fast (t->mac.bytes, eh0->src_address, 6);
- t->new = (NULL == ge0);
- t->throttled = t0;
- t->sw_if_index = sw_if_index0;
- t->sclass = sclass0;
- t->gb_flags = gb0->gb_flags;
- t->d_bit = ! !(vnet_buffer2 (b0)->gbp.flags &
- VXLAN_GBP_GPFLAGS_D);
- }
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-/* packet trace format function */
-static u8 *
-format_gbp_learn_l2_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gbp_learn_l2_trace_t *t = va_arg (*args, gbp_learn_l2_trace_t *);
-
- s = format (s, "new:%d throttled:%d d-bit:%d mac:%U itf:%d sclass:%d"
- " gb-flags:%U",
- t->new, t->throttled, t->d_bit,
- format_mac_address_t, &t->mac, t->sw_if_index, t->sclass,
- format_gbp_bridge_domain_flags, t->gb_flags);
-
- return s;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gbp_learn_l2_node) = {
- .name = "gbp-learn-l2",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_learn_l2_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(gbp_learn_error_strings),
- .error_strings = gbp_learn_error_strings,
-
- .n_next_nodes = GBP_LEARN_N_NEXT,
-
- .next_nodes = {
- [GBP_LEARN_NEXT_DROP] = "error-drop",
- },
-};
-/* *INDENT-ON* */
-
-typedef struct gbp_learn_l3_t_
-{
- ip46_address_t ip;
- u32 fib_index;
- u32 sw_if_index;
- sclass_t sclass;
- ip46_address_t outer_src;
- ip46_address_t outer_dst;
-} gbp_learn_l3_t;
-
-static void
-gbp_learn_l3_cp (const gbp_learn_l3_t * gl3)
-{
- ip46_address_t *ips = NULL;
-
- GBP_LEARN_DBG ("L3 EP: %U, %d", format_ip46_address, &gl3->ip,
- IP46_TYPE_ANY, gl3->sclass);
-
- vec_add1 (ips, gl3->ip);
-
- gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_DP,
- gl3->sw_if_index, ips, NULL,
- INDEX_INVALID, INDEX_INVALID, gl3->sclass,
- (GBP_ENDPOINT_FLAG_REMOTE |
- GBP_ENDPOINT_FLAG_LEARNT),
- &gl3->outer_dst, &gl3->outer_src, NULL);
- vec_free (ips);
-}
-
-static void
-gbp_learn_ip4_dp (const ip4_address_t * ip,
- u32 fib_index, u32 sw_if_index, sclass_t sclass,
- const ip4_address_t * outer_src,
- const ip4_address_t * outer_dst)
-{
- /* *INDENT-OFF* */
- gbp_learn_l3_t gl3 = {
- .ip = {
- .ip4 = *ip,
- },
- .sw_if_index = sw_if_index,
- .fib_index = fib_index,
- .sclass = sclass,
- .outer_src.ip4 = *outer_src,
- .outer_dst.ip4 = *outer_dst,
- };
- /* *INDENT-ON* */
-
- vl_api_rpc_call_main_thread (gbp_learn_l3_cp, (u8 *) & gl3, sizeof (gl3));
-}
-
-static void
-gbp_learn_ip6_dp (const ip6_address_t * ip,
- u32 fib_index, u32 sw_if_index, sclass_t sclass,
- const ip4_address_t * outer_src,
- const ip4_address_t * outer_dst)
-{
- /* *INDENT-OFF* */
- gbp_learn_l3_t gl3 = {
- .ip = {
- .ip6 = *ip,
- },
- .sw_if_index = sw_if_index,
- .fib_index = fib_index,
- .sclass = sclass,
- .outer_src.ip4 = *outer_src,
- .outer_dst.ip4 = *outer_dst,
- };
- /* *INDENT-ON* */
-
- vl_api_rpc_call_main_thread (gbp_learn_l3_cp, (u8 *) & gl3, sizeof (gl3));
-}
-
-/**
- * per-packet trace data
- */
-typedef struct gbp_learn_l3_trace_t_
-{
- /* per-pkt trace data */
- ip46_address_t ip;
- u32 sw_if_index;
- u32 new;
- u32 throttled;
- u32 sclass;
-} gbp_learn_l3_trace_t;
-
-static uword
-gbp_learn_l3 (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame,
- fib_protocol_t fproto)
-{
- u32 n_left_from, *from, *to_next, next_index, thread_index, seed;
- gbp_learn_main_t *glm;
- f64 time_now;
-
- glm = &gbp_learn_main;
- next_index = 0;
- n_left_from = frame->n_vectors;
- from = vlib_frame_vector_args (frame);
- time_now = vlib_time_now (vm);
- thread_index = vm->thread_index;
-
- seed = throttle_seed (&glm->gl_l3_throttle, thread_index, time_now);
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- CLIB_UNUSED (const ip4_header_t *) ip4_0;
- CLIB_UNUSED (const ip6_header_t *) ip6_0;
- u32 bi0, sw_if_index0, t0, fib_index0;
- ip4_address_t outer_src, outer_dst;
- ethernet_header_t *eth0;
- gbp_learn_next_t next0;
- gbp_endpoint_t *ge0;
- vlib_buffer_t *b0;
- sclass_t sclass0;
-
- next0 = GBP_LEARN_NEXT_DROP;
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- sclass0 = vnet_buffer2 (b0)->gbp.sclass;
- ip6_0 = NULL;
- ip4_0 = NULL;
-
- vnet_feature_next (&next0, b0);
-
- if (vnet_buffer2 (b0)->gbp.flags & VXLAN_GBP_GPFLAGS_D)
- {
- t0 = 1;
- ge0 = NULL;
- goto trace;
- }
-
- fib_index0 = fib_table_get_index_for_sw_if_index (fproto,
- sw_if_index0);
-
- if (FIB_PROTOCOL_IP6 == fproto)
- {
- ip6_0 = vlib_buffer_get_current (b0);
- eth0 = (ethernet_header_t *) (((u8 *) ip6_0) - sizeof (*eth0));
-
- gbp_learn_get_outer (eth0, &outer_src, &outer_dst);
-
- ge0 = gbp_endpoint_find_ip6 (&ip6_0->src_address, fib_index0);
-
- if ((NULL == ge0) ||
- gbp_endpoint_update_required (ge0, sw_if_index0, sclass0))
- {
- t0 = throttle_check (&glm->gl_l3_throttle,
- thread_index,
- ip6_address_hash_to_u32
- (&ip6_0->src_address), seed);
-
- if (!t0)
- {
- gbp_learn_ip6_dp (&ip6_0->src_address,
- fib_index0, sw_if_index0, sclass0,
- &outer_src, &outer_dst);
- }
- }
- else
- {
- /*
- * this update could happen simultaneoulsy from multiple
- * workers but that's ok we are not interested in being
- * very accurate.
- */
- t0 = 0;
- ge0->ge_last_time = time_now;
- }
- }
- else
- {
- ip4_0 = vlib_buffer_get_current (b0);
- eth0 = (ethernet_header_t *) (((u8 *) ip4_0) - sizeof (*eth0));
-
- gbp_learn_get_outer (eth0, &outer_src, &outer_dst);
- ge0 = gbp_endpoint_find_ip4 (&ip4_0->src_address, fib_index0);
-
- if ((NULL == ge0) ||
- gbp_endpoint_update_required (ge0, sw_if_index0, sclass0))
- {
- t0 = throttle_check (&glm->gl_l3_throttle, thread_index,
- ip4_0->src_address.as_u32, seed);
-
- if (!t0)
- {
- gbp_learn_ip4_dp (&ip4_0->src_address,
- fib_index0, sw_if_index0, sclass0,
- &outer_src, &outer_dst);
- }
- }
- else
- {
- /*
- * this update could happen simultaneoulsy from multiple
- * workers but that's ok we are not interested in being
- * very accurate.
- */
- t0 = 0;
- ge0->ge_last_time = time_now;
- }
- }
- trace:
- if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- gbp_learn_l3_trace_t *t;
-
- t = vlib_add_trace (vm, node, b0, sizeof (*t));
- if (FIB_PROTOCOL_IP6 == fproto && ip6_0)
- ip46_address_set_ip6 (&t->ip, &ip6_0->src_address);
- if (FIB_PROTOCOL_IP4 == fproto && ip4_0)
- ip46_address_set_ip4 (&t->ip, &ip4_0->src_address);
- t->new = (NULL == ge0);
- t->throttled = t0;
- t->sw_if_index = sw_if_index0;
- t->sclass = sclass0;
- }
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-/* packet trace format function */
-static u8 *
-format_gbp_learn_l3_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gbp_learn_l3_trace_t *t = va_arg (*args, gbp_learn_l3_trace_t *);
-
- s = format (s, "new:%d throttled:%d ip:%U itf:%d sclass:%d",
- t->new, t->throttled,
- format_ip46_address, &t->ip, IP46_TYPE_ANY, t->sw_if_index,
- t->sclass);
-
- return s;
-}
-
-VLIB_NODE_FN (gbp_learn_ip4_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_learn_l3 (vm, node, frame, FIB_PROTOCOL_IP4));
-}
-
-VLIB_NODE_FN (gbp_learn_ip6_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_learn_l3 (vm, node, frame, FIB_PROTOCOL_IP6));
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gbp_learn_ip4_node) = {
- .name = "gbp-learn-ip4",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_learn_l3_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-};
-
-VNET_FEATURE_INIT (gbp_learn_ip4, static) =
-{
- .arc_name = "ip4-unicast",
- .node_name = "gbp-learn-ip4",
-};
-
-VLIB_REGISTER_NODE (gbp_learn_ip6_node) = {
- .name = "gbp-learn-ip6",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_learn_l3_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-};
-
-VNET_FEATURE_INIT (gbp_learn_ip6, static) =
-{
- .arc_name = "ip6-unicast",
- .node_name = "gbp-learn-ip6",
-};
-
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_policy.c b/src/plugins/gbp/gbp_policy.c
deleted file mode 100644
index 127c6d3f059..00000000000
--- a/src/plugins/gbp/gbp_policy.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_policy.h>
-#include <vnet/vxlan-gbp/vxlan_gbp_packet.h>
-
-gbp_policy_main_t gbp_policy_main;
-
-/* packet trace format function */
-u8 *
-format_gbp_policy_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gbp_policy_trace_t *t = va_arg (*args, gbp_policy_trace_t *);
-
- s =
- format (s,
- "scope:%d sclass:%d, dclass:%d, action:%U flags:%U acl: %d rule: %d",
- t->scope, t->sclass, t->dclass, format_gbp_rule_action, t->action,
- format_vxlan_gbp_header_gpflags, t->flags, t->acl_match,
- t->rule_match);
-
- return s;
-}
-
-static clib_error_t *
-gbp_policy_init (vlib_main_t * vm)
-{
- gbp_policy_main_t *gpm = &gbp_policy_main;
- clib_error_t *error = 0;
-
- /* Initialize the feature next-node indexes */
- vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "gbp-policy-port");
- feat_bitmap_init_next_nodes (vm,
- node->index,
- L2OUTPUT_N_FEAT,
- l2output_get_feat_names (),
- gpm->l2_output_feat_next[GBP_POLICY_PORT]);
-
- node = vlib_get_node_by_name (vm, (u8 *) "gbp-policy-mac");
- feat_bitmap_init_next_nodes (vm,
- node->index,
- L2OUTPUT_N_FEAT,
- l2output_get_feat_names (),
- gpm->l2_output_feat_next[GBP_POLICY_MAC]);
-
- node = vlib_get_node_by_name (vm, (u8 *) "gbp-policy-lpm");
- feat_bitmap_init_next_nodes (vm,
- node->index,
- L2OUTPUT_N_FEAT,
- l2output_get_feat_names (),
- gpm->l2_output_feat_next[GBP_POLICY_LPM]);
-
- return error;
-}
-
-VLIB_INIT_FUNCTION (gbp_policy_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_policy.h b/src/plugins/gbp/gbp_policy.h
deleted file mode 100644
index 6f87f2ec7c4..00000000000
--- a/src/plugins/gbp/gbp_policy.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_POLICY_H__
-#define __GBP_POLICY_H__
-
-#include <plugins/gbp/gbp_contract.h>
-
-/**
- * per-packet trace data
- */
-typedef struct gbp_policy_trace_t_
-{
- /* per-pkt trace data */
- gbp_scope_t scope;
- sclass_t sclass;
- sclass_t dclass;
- gbp_rule_action_t action;
- u32 flags;
- u32 acl_match;
- u32 rule_match;
-} gbp_policy_trace_t;
-
-/* packet trace format function */
-u8 * format_gbp_policy_trace (u8 * s, va_list * args);
-
-static_always_inline void
-gbp_policy_trace(vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t *b, const gbp_contract_key_t *key, gbp_rule_action_t action, u32 acl_match, u32 rule_match)
-{
- gbp_policy_trace_t *t;
-
- if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_IS_TRACED)))
- return;
-
- t = vlib_add_trace (vm, node, b, sizeof (*t));
- t->sclass = key->gck_src;
- t->dclass = key->gck_dst;
- t->scope = key->gck_scope;
- t->action = action;
- t->flags = vnet_buffer2 (b)->gbp.flags;
- t->acl_match = acl_match;
- t->rule_match = rule_match;
-}
-
-#endif /* __GBP_POLICY_H__ */
diff --git a/src/plugins/gbp/gbp_policy_dpo.c b/src/plugins/gbp/gbp_policy_dpo.c
deleted file mode 100644
index 9f26b9c67ab..00000000000
--- a/src/plugins/gbp/gbp_policy_dpo.c
+++ /dev/null
@@ -1,420 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/dpo/dvr_dpo.h>
-#include <vnet/dpo/drop_dpo.h>
-#include <vnet/vxlan-gbp/vxlan_gbp_packet.h>
-#include <vnet/vxlan-gbp/vxlan_gbp.h>
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_policy.h>
-#include <plugins/gbp/gbp_policy_dpo.h>
-#include <plugins/gbp/gbp_recirc.h>
-#include <plugins/gbp/gbp_contract.h>
-
-#ifndef CLIB_MARCH_VARIANT
-/**
- * DPO pool
- */
-gbp_policy_dpo_t *gbp_policy_dpo_pool;
-
-/**
- * DPO type registered for these GBP FWD
- */
-dpo_type_t gbp_policy_dpo_type;
-
-static gbp_policy_dpo_t *
-gbp_policy_dpo_alloc (void)
-{
- gbp_policy_dpo_t *gpd;
-
- pool_get_aligned_zero (gbp_policy_dpo_pool, gpd, CLIB_CACHE_LINE_BYTES);
-
- return (gpd);
-}
-
-static inline gbp_policy_dpo_t *
-gbp_policy_dpo_get_from_dpo (const dpo_id_t * dpo)
-{
- ASSERT (gbp_policy_dpo_type == dpo->dpoi_type);
-
- return (gbp_policy_dpo_get (dpo->dpoi_index));
-}
-
-static inline index_t
-gbp_policy_dpo_get_index (gbp_policy_dpo_t * gpd)
-{
- return (gpd - gbp_policy_dpo_pool);
-}
-
-static void
-gbp_policy_dpo_lock (dpo_id_t * dpo)
-{
- gbp_policy_dpo_t *gpd;
-
- gpd = gbp_policy_dpo_get_from_dpo (dpo);
- gpd->gpd_locks++;
-}
-
-static void
-gbp_policy_dpo_unlock (dpo_id_t * dpo)
-{
- gbp_policy_dpo_t *gpd;
-
- gpd = gbp_policy_dpo_get_from_dpo (dpo);
- gpd->gpd_locks--;
-
- if (0 == gpd->gpd_locks)
- {
- dpo_reset (&gpd->gpd_dpo);
- pool_put (gbp_policy_dpo_pool, gpd);
- }
-}
-
-static u32
-gbp_policy_dpo_get_urpf (const dpo_id_t * dpo)
-{
- gbp_policy_dpo_t *gpd;
-
- gpd = gbp_policy_dpo_get_from_dpo (dpo);
-
- return (gpd->gpd_sw_if_index);
-}
-
-void
-gbp_policy_dpo_add_or_lock (dpo_proto_t dproto,
- gbp_scope_t scope,
- sclass_t sclass, u32 sw_if_index, dpo_id_t * dpo)
-{
- gbp_policy_dpo_t *gpd;
- dpo_id_t parent = DPO_INVALID;
-
- gpd = gbp_policy_dpo_alloc ();
-
- gpd->gpd_proto = dproto;
- gpd->gpd_sw_if_index = sw_if_index;
- gpd->gpd_sclass = sclass;
- gpd->gpd_scope = scope;
-
- if (~0 != sw_if_index)
- {
- /*
- * stack on the DVR DPO for the output interface
- */
- dvr_dpo_add_or_lock (sw_if_index, dproto, &parent);
- }
- else
- {
- dpo_copy (&parent, drop_dpo_get (dproto));
- }
-
- dpo_stack (gbp_policy_dpo_type, dproto, &gpd->gpd_dpo, &parent);
- dpo_set (dpo, gbp_policy_dpo_type, dproto, gbp_policy_dpo_get_index (gpd));
-}
-
-u8 *
-format_gbp_policy_dpo (u8 * s, va_list * ap)
-{
- index_t index = va_arg (*ap, index_t);
- u32 indent = va_arg (*ap, u32);
- gbp_policy_dpo_t *gpd = gbp_policy_dpo_get (index);
- vnet_main_t *vnm = vnet_get_main ();
-
- s = format (s, "gbp-policy-dpo: %U, scope:%d sclass:%d out:%U",
- format_dpo_proto, gpd->gpd_proto,
- gpd->gpd_scope, (int) gpd->gpd_sclass,
- format_vnet_sw_if_index_name, vnm, gpd->gpd_sw_if_index);
- s = format (s, "\n%U", format_white_space, indent + 2);
- s = format (s, "%U", format_dpo_id, &gpd->gpd_dpo, indent + 4);
-
- return (s);
-}
-
-/**
- * Interpose a policy DPO
- */
-static void
-gbp_policy_dpo_interpose (const dpo_id_t * original,
- const dpo_id_t * parent, dpo_id_t * clone)
-{
- gbp_policy_dpo_t *gpd, *gpd_clone;
-
- gpd_clone = gbp_policy_dpo_alloc ();
- gpd = gbp_policy_dpo_get (original->dpoi_index);
-
- gpd_clone->gpd_proto = gpd->gpd_proto;
- gpd_clone->gpd_scope = gpd->gpd_scope;
- gpd_clone->gpd_sclass = gpd->gpd_sclass;
- gpd_clone->gpd_sw_if_index = gpd->gpd_sw_if_index;
-
- /*
- * if no interface is provided, grab one from the parent
- * on which we stack
- */
- if (~0 == gpd_clone->gpd_sw_if_index)
- gpd_clone->gpd_sw_if_index = dpo_get_urpf (parent);
-
- dpo_stack (gbp_policy_dpo_type,
- gpd_clone->gpd_proto, &gpd_clone->gpd_dpo, parent);
-
- dpo_set (clone,
- gbp_policy_dpo_type,
- gpd_clone->gpd_proto, gbp_policy_dpo_get_index (gpd_clone));
-}
-
-const static dpo_vft_t gbp_policy_dpo_vft = {
- .dv_lock = gbp_policy_dpo_lock,
- .dv_unlock = gbp_policy_dpo_unlock,
- .dv_format = format_gbp_policy_dpo,
- .dv_get_urpf = gbp_policy_dpo_get_urpf,
- .dv_mk_interpose = gbp_policy_dpo_interpose,
-};
-
-/**
- * @brief The per-protocol VLIB graph nodes that are assigned to a glean
- * object.
- *
- * this means that these graph nodes are ones from which a glean is the
- * parent object in the DPO-graph.
- */
-const static char *const gbp_policy_dpo_ip4_nodes[] = {
- "ip4-gbp-policy-dpo",
- NULL,
-};
-
-const static char *const gbp_policy_dpo_ip6_nodes[] = {
- "ip6-gbp-policy-dpo",
- NULL,
-};
-
-const static char *const *const gbp_policy_dpo_nodes[DPO_PROTO_NUM] = {
- [DPO_PROTO_IP4] = gbp_policy_dpo_ip4_nodes,
- [DPO_PROTO_IP6] = gbp_policy_dpo_ip6_nodes,
-};
-
-dpo_type_t
-gbp_policy_dpo_get_type (void)
-{
- return (gbp_policy_dpo_type);
-}
-
-static clib_error_t *
-gbp_policy_dpo_module_init (vlib_main_t * vm)
-{
- gbp_policy_dpo_type = dpo_register_new_type (&gbp_policy_dpo_vft,
- gbp_policy_dpo_nodes);
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_policy_dpo_module_init);
-#endif /* CLIB_MARCH_VARIANT */
-
-typedef enum
-{
- GBP_POLICY_DROP,
- GBP_POLICY_N_NEXT,
-} gbp_policy_next_t;
-
-always_inline u32
-gbp_rule_l3_redirect (const gbp_rule_t * gu, vlib_buffer_t * b0, int is_ip6)
-{
- gbp_policy_node_t pnode;
- const dpo_id_t *dpo;
- dpo_proto_t dproto;
-
- pnode = (is_ip6 ? GBP_POLICY_NODE_IP6 : GBP_POLICY_NODE_IP4);
- dproto = (is_ip6 ? DPO_PROTO_IP6 : DPO_PROTO_IP4);
- dpo = &gu->gu_dpo[pnode][dproto];
-
- /* The flow hash is still valid as this is a IP packet being switched */
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo->dpoi_index;
-
- return (dpo->dpoi_next_node);
-}
-
-always_inline uword
-gbp_policy_dpo_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame, u8 is_ip6)
-{
- gbp_main_t *gm = &gbp_main;
- u32 n_left_from, next_index, *from, *to_next;
- u32 n_allow_intra, n_allow_a_bit, n_allow_sclass_1;
-
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
- n_allow_intra = n_allow_a_bit = n_allow_sclass_1 = 0;
-
- next_index = node->cached_next_index;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- gbp_rule_action_t action0 = GBP_RULE_DENY;
- u32 acl_match = ~0, rule_match = ~0;
- const gbp_policy_dpo_t *gpd0;
- gbp_contract_error_t err0;
- gbp_contract_key_t key0;
- vlib_buffer_t *b0;
- gbp_rule_t *rule0;
- u32 bi0, next0;
-
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
- next0 = GBP_POLICY_DROP;
-
- b0 = vlib_get_buffer (vm, bi0);
-
- gpd0 = gbp_policy_dpo_get (vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = gpd0->gpd_dpo.dpoi_index;
-
- /*
- * Reflection check; in and out on an ivxlan tunnel
- */
- if ((~0 != vxlan_gbp_tunnel_by_sw_if_index (gpd0->gpd_sw_if_index))
- && (vnet_buffer2 (b0)->gbp.flags & VXLAN_GBP_GPFLAGS_R))
- {
- goto trace;
- }
-
- if (vnet_buffer2 (b0)->gbp.flags & VXLAN_GBP_GPFLAGS_A)
- {
- next0 = gpd0->gpd_dpo.dpoi_next_node;
- key0.as_u64 = ~0;
- n_allow_a_bit++;
- goto trace;
- }
-
- /* zero out the key to ensure the pad space is clear */
- key0.as_u64 = 0;
- key0.gck_src = vnet_buffer2 (b0)->gbp.sclass;
-
- if (SCLASS_INVALID == key0.gck_src)
- {
- /*
- * the src EPG is not set when the packet arrives on an EPG
- * uplink interface and we do not need to apply policy
- */
- next0 = gpd0->gpd_dpo.dpoi_next_node;
- goto trace;
- }
-
- key0.gck_scope = gpd0->gpd_scope;
- key0.gck_dst = gpd0->gpd_sclass;
-
- action0 =
- gbp_contract_apply (vm, gm, &key0, b0, &rule0, &n_allow_intra,
- &n_allow_sclass_1, &acl_match, &rule_match,
- &err0,
- is_ip6 ? GBP_CONTRACT_APPLY_IP6 :
- GBP_CONTRACT_APPLY_IP4);
- switch (action0)
- {
- case GBP_RULE_PERMIT:
- next0 = gpd0->gpd_dpo.dpoi_next_node;
- vnet_buffer2 (b0)->gbp.flags |= VXLAN_GBP_GPFLAGS_A;
- break;
- case GBP_RULE_REDIRECT:
- next0 = gbp_rule_l3_redirect (rule0, b0, is_ip6);
- vnet_buffer2 (b0)->gbp.flags |= VXLAN_GBP_GPFLAGS_A;
- break;
- case GBP_RULE_DENY:
- next0 = GBP_POLICY_DROP;
- b0->error = node->errors[err0];
- break;
- }
-
- trace:
- gbp_policy_trace (vm, node, b0, &key0, action0, acl_match,
- rule_match);
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- vlib_node_increment_counter (vm, node->node_index,
- GBP_CONTRACT_ERROR_ALLOW_INTRA, n_allow_intra);
- vlib_node_increment_counter (vm, node->node_index,
- GBP_CONTRACT_ERROR_ALLOW_A_BIT, n_allow_a_bit);
- vlib_node_increment_counter (vm, node->node_index,
- GBP_CONTRACT_ERROR_ALLOW_SCLASS_1,
- n_allow_sclass_1);
- return from_frame->n_vectors;
-}
-
-VLIB_NODE_FN (ip4_gbp_policy_dpo_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return (gbp_policy_dpo_inline (vm, node, from_frame, 0));
-}
-
-VLIB_NODE_FN (ip6_gbp_policy_dpo_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return (gbp_policy_dpo_inline (vm, node, from_frame, 1));
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip4_gbp_policy_dpo_node) = {
- .name = "ip4-gbp-policy-dpo",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_policy_trace,
-
- .n_errors = ARRAY_LEN(gbp_contract_error_strings),
- .error_strings = gbp_contract_error_strings,
-
- .n_next_nodes = GBP_POLICY_N_NEXT,
- .next_nodes =
- {
- [GBP_POLICY_DROP] = "ip4-drop",
- }
-};
-VLIB_REGISTER_NODE (ip6_gbp_policy_dpo_node) = {
- .name = "ip6-gbp-policy-dpo",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_policy_trace,
-
- .n_errors = ARRAY_LEN(gbp_contract_error_strings),
- .error_strings = gbp_contract_error_strings,
-
- .n_next_nodes = GBP_POLICY_N_NEXT,
- .next_nodes =
- {
- [GBP_POLICY_DROP] = "ip6-drop",
- }
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_policy_dpo.h b/src/plugins/gbp/gbp_policy_dpo.h
deleted file mode 100644
index 77ca5d93bd0..00000000000
--- a/src/plugins/gbp/gbp_policy_dpo.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_POLICY_DPO_H__
-#define __GBP_POLICY_DPO_H__
-
-#include <vnet/dpo/dpo.h>
-#include <vnet/dpo/load_balance.h>
-#include <vnet/fib/ip4_fib.h>
-#include <vnet/fib/ip6_fib.h>
-
-/**
- * @brief
- * The GBP FWD DPO. Used in the L3 path to select the correct EPG uplink
- * based on the source EPG.
- */
-typedef struct gbp_policy_dpo_t_
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-
- /**
- * The protocol of packets using this DPO
- */
- dpo_proto_t gpd_proto;
-
- /**
- * SClass
- */
- sclass_t gpd_sclass;
-
- /**
- * sclass scope
- */
- gbp_scope_t gpd_scope;
-
- /**
- * output sw_if_index
- */
- u32 gpd_sw_if_index;
-
- /**
- * number of locks.
- */
- u16 gpd_locks;
-
- /**
- * Stacked DPO on DVR/ADJ of output interface
- */
- dpo_id_t gpd_dpo;
-} gbp_policy_dpo_t;
-
-extern void gbp_policy_dpo_add_or_lock (dpo_proto_t dproto,
- gbp_scope_t scope,
- sclass_t sclass,
- u32 sw_if_index, dpo_id_t * dpo);
-
-extern dpo_type_t gbp_policy_dpo_get_type (void);
-
-extern vlib_node_registration_t ip4_gbp_policy_dpo_node;
-extern vlib_node_registration_t ip6_gbp_policy_dpo_node;
-extern vlib_node_registration_t gbp_policy_port_node;
-
-/**
- * Types exposed for the Data-plane
- */
-extern dpo_type_t gbp_policy_dpo_type;
-extern gbp_policy_dpo_t *gbp_policy_dpo_pool;
-
-always_inline gbp_policy_dpo_t *
-gbp_policy_dpo_get (index_t index)
-{
- return (pool_elt_at_index (gbp_policy_dpo_pool, index));
-}
-
-static_always_inline const gbp_policy_dpo_t *
-gbp_classify_get_gpd (const ip4_address_t * ip4, const ip6_address_t * ip6,
- const u32 fib_index)
-{
- const gbp_policy_dpo_t *gpd;
- const dpo_id_t *dpo;
- const load_balance_t *lb;
- u32 lbi;
-
- if (ip4)
- lbi = ip4_fib_forwarding_lookup (fib_index, ip4);
- else if (ip6)
- lbi = ip6_fib_table_fwding_lookup (fib_index, ip6);
- else
- return 0;
-
- lb = load_balance_get (lbi);
- dpo = load_balance_get_bucket_i (lb, 0);
-
- if (dpo->dpoi_type != gbp_policy_dpo_type)
- return 0;
-
- gpd = gbp_policy_dpo_get (dpo->dpoi_index);
- return gpd;
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
-
-#endif
diff --git a/src/plugins/gbp/gbp_policy_node.c b/src/plugins/gbp/gbp_policy_node.c
deleted file mode 100644
index 8c6ef5c2b94..00000000000
--- a/src/plugins/gbp/gbp_policy_node.c
+++ /dev/null
@@ -1,341 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_classify.h>
-#include <plugins/gbp/gbp_policy.h>
-#include <plugins/gbp/gbp_policy_dpo.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <plugins/gbp/gbp_ext_itf.h>
-#include <plugins/gbp/gbp_contract.h>
-
-#include <vnet/vxlan-gbp/vxlan_gbp_packet.h>
-#include <vnet/vxlan-gbp/vxlan_gbp.h>
-
-typedef enum
-{
- GBP_POLICY_NEXT_DROP,
- GBP_POLICY_N_NEXT,
-} gbp_policy_next_t;
-
-always_inline dpo_proto_t
-ethertype_to_dpo_proto (u16 etype)
-{
- etype = clib_net_to_host_u16 (etype);
-
- switch (etype)
- {
- case ETHERNET_TYPE_IP4:
- return (DPO_PROTO_IP4);
- case ETHERNET_TYPE_IP6:
- return (DPO_PROTO_IP6);
- }
-
- return (DPO_PROTO_NONE);
-}
-
-always_inline u32
-gbp_rule_l2_redirect (const gbp_rule_t * gu, vlib_buffer_t * b0)
-{
- const ethernet_header_t *eth0;
- const dpo_id_t *dpo;
- dpo_proto_t dproto;
-
- eth0 = vlib_buffer_get_current (b0);
- /* pop the ethernet header to prepare for L3 rewrite */
- vlib_buffer_advance (b0, vnet_buffer (b0)->l2.l2_len);
-
- dproto = ethertype_to_dpo_proto (eth0->type);
- dpo = &gu->gu_dpo[GBP_POLICY_NODE_L2][dproto];
-
- /* save the LB index for the next node and reset the IP flow hash
- * so it's recalculated */
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo->dpoi_index;
- vnet_buffer (b0)->ip.flow_hash = 0;
-
- return (dpo->dpoi_next_node);
-}
-
-static_always_inline gbp_policy_next_t
-gbp_policy_l2_feature_next (gbp_policy_main_t * gpm, vlib_buffer_t * b,
- const gbp_policy_type_t type)
-{
- u32 feat_bit;
-
- switch (type)
- {
- case GBP_POLICY_PORT:
- feat_bit = L2OUTPUT_FEAT_GBP_POLICY_PORT;
- break;
- case GBP_POLICY_MAC:
- feat_bit = L2OUTPUT_FEAT_GBP_POLICY_MAC;
- break;
- case GBP_POLICY_LPM:
- feat_bit = L2OUTPUT_FEAT_GBP_POLICY_LPM;
- break;
- default:
- return GBP_POLICY_NEXT_DROP;
- }
-
- return vnet_l2_feature_next (b, gpm->l2_output_feat_next[type], feat_bit);
-}
-
-static uword
-gbp_policy_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, const gbp_policy_type_t type)
-{
- gbp_main_t *gm = &gbp_main;
- gbp_policy_main_t *gpm = &gbp_policy_main;
- u32 n_left_from, *from, *to_next;
- u32 next_index;
- u32 n_allow_intra, n_allow_a_bit, n_allow_sclass_1;
-
- next_index = 0;
- n_left_from = frame->n_vectors;
- from = vlib_frame_vector_args (frame);
- n_allow_intra = n_allow_a_bit = n_allow_sclass_1 = 0;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- gbp_rule_action_t action0 = GBP_RULE_DENY;
- const ethernet_header_t *h0;
- const gbp_endpoint_t *ge0;
- gbp_contract_error_t err0;
- u32 acl_match = ~0, rule_match = ~0;
- gbp_policy_next_t next0;
- gbp_contract_key_t key0;
- u32 bi0, sw_if_index0;
- vlib_buffer_t *b0;
- gbp_rule_t *rule0;
-
- next0 = GBP_POLICY_NEXT_DROP;
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- h0 = vlib_buffer_get_current (b0);
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
-
- /*
- * Reflection check; in and out on an ivxlan tunnel
- */
- if ((~0 != vxlan_gbp_tunnel_by_sw_if_index (sw_if_index0)) &&
- (vnet_buffer2 (b0)->gbp.flags & VXLAN_GBP_GPFLAGS_R))
- {
- goto trace;
- }
-
- /*
- * If the A-bit is set then policy has already been applied
- * and we skip enforcement here.
- */
- if (vnet_buffer2 (b0)->gbp.flags & VXLAN_GBP_GPFLAGS_A)
- {
- next0 = gbp_policy_l2_feature_next (gpm, b0, type);
- n_allow_a_bit++;
- key0.as_u64 = ~0;
- goto trace;
- }
-
- /*
- * determine the src and dst EPG
- */
-
- /* zero out the key to ensure the pad space is clear */
- key0.as_u64 = 0;
- key0.gck_src = vnet_buffer2 (b0)->gbp.sclass;
- key0.gck_dst = SCLASS_INVALID;
-
- if (GBP_POLICY_LPM == type)
- {
- const ip4_address_t *ip4 = 0;
- const ip6_address_t *ip6 = 0;
- const dpo_proto_t proto =
- gbp_classify_get_ip_address (h0, &ip4, &ip6,
- GBP_CLASSIFY_GET_IP_DST);
- if (PREDICT_TRUE (DPO_PROTO_NONE != proto))
- {
- const gbp_ext_itf_t *ext_itf =
- gbp_ext_itf_get (sw_if_index0);
- const gbp_policy_dpo_t *gpd =
- gbp_classify_get_gpd (ip4, ip6,
- ext_itf->gx_fib_index[proto]);
- if (gpd)
- key0.gck_dst = gpd->gpd_sclass;
- }
- }
- else
- {
- if (GBP_POLICY_PORT == type)
- ge0 = gbp_endpoint_find_itf (sw_if_index0);
- else
- ge0 = gbp_endpoint_find_mac (h0->dst_address,
- vnet_buffer (b0)->l2.bd_index);
- if (NULL != ge0)
- key0.gck_dst = ge0->ge_fwd.gef_sclass;
- }
-
- if (SCLASS_INVALID == key0.gck_dst)
- {
- /* If you cannot determine the destination EP then drop */
- b0->error = node->errors[GBP_CONTRACT_ERROR_DROP_NO_DCLASS];
- goto trace;
- }
-
- key0.gck_src = vnet_buffer2 (b0)->gbp.sclass;
- if (SCLASS_INVALID == key0.gck_src)
- {
- /*
- * the src EPG is not set when the packet arrives on an EPG
- * uplink interface and we do not need to apply policy
- */
- next0 = gbp_policy_l2_feature_next (gpm, b0, type);
- goto trace;
- }
-
- key0.gck_scope =
- gbp_bridge_domain_get_scope (vnet_buffer (b0)->l2.bd_index);
-
- action0 =
- gbp_contract_apply (vm, gm, &key0, b0, &rule0, &n_allow_intra,
- &n_allow_sclass_1, &acl_match, &rule_match,
- &err0, GBP_CONTRACT_APPLY_L2);
- switch (action0)
- {
- case GBP_RULE_PERMIT:
- next0 = gbp_policy_l2_feature_next (gpm, b0, type);
- vnet_buffer2 (b0)->gbp.flags |= VXLAN_GBP_GPFLAGS_A;
- break;
- case GBP_RULE_REDIRECT:
- next0 = gbp_rule_l2_redirect (rule0, b0);
- vnet_buffer2 (b0)->gbp.flags |= VXLAN_GBP_GPFLAGS_A;
- break;
- case GBP_RULE_DENY:
- next0 = GBP_POLICY_NEXT_DROP;
- b0->error = node->errors[err0];
- break;
- }
-
- trace:
- gbp_policy_trace (vm, node, b0, &key0, action0, acl_match,
- rule_match);
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- vlib_node_increment_counter (vm, node->node_index,
- GBP_CONTRACT_ERROR_ALLOW_INTRA, n_allow_intra);
- vlib_node_increment_counter (vm, node->node_index,
- GBP_CONTRACT_ERROR_ALLOW_A_BIT, n_allow_a_bit);
- vlib_node_increment_counter (vm, node->node_index,
- GBP_CONTRACT_ERROR_ALLOW_SCLASS_1,
- n_allow_sclass_1);
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (gbp_policy_port_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_policy_inline (vm, node, frame, GBP_POLICY_PORT));
-}
-
-VLIB_NODE_FN (gbp_policy_mac_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_policy_inline (vm, node, frame, GBP_POLICY_MAC));
-}
-
-VLIB_NODE_FN (gbp_policy_lpm_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return (gbp_policy_inline (vm, node, frame, GBP_POLICY_LPM));
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gbp_policy_port_node) = {
- .name = "gbp-policy-port",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_policy_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(gbp_contract_error_strings),
- .error_strings = gbp_contract_error_strings,
-
- .n_next_nodes = GBP_POLICY_N_NEXT,
- .next_nodes = {
- [GBP_POLICY_NEXT_DROP] = "error-drop",
- },
-};
-
-VLIB_REGISTER_NODE (gbp_policy_mac_node) = {
- .name = "gbp-policy-mac",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_policy_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(gbp_contract_error_strings),
- .error_strings = gbp_contract_error_strings,
-
- .n_next_nodes = GBP_POLICY_N_NEXT,
- .next_nodes = {
- [GBP_POLICY_NEXT_DROP] = "error-drop",
- },
-};
-
-VLIB_REGISTER_NODE (gbp_policy_lpm_node) = {
- .name = "gbp-policy-lpm",
- .vector_size = sizeof (u32),
- .format_trace = format_gbp_policy_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(gbp_contract_error_strings),
- .error_strings = gbp_contract_error_strings,
-
- .n_next_nodes = GBP_POLICY_N_NEXT,
- .next_nodes = {
- [GBP_POLICY_NEXT_DROP] = "error-drop",
- },
-};
-
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_recirc.c b/src/plugins/gbp/gbp_recirc.c
deleted file mode 100644
index 8d56f11b4e3..00000000000
--- a/src/plugins/gbp/gbp_recirc.c
+++ /dev/null
@@ -1,292 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp_recirc.h>
-#include <plugins/gbp/gbp_endpoint_group.h>
-#include <plugins/gbp/gbp_endpoint.h>
-#include <plugins/gbp/gbp_itf.h>
-
-#include <vnet/dpo/dvr_dpo.h>
-#include <vnet/fib/fib_table.h>
-
-#include <vlib/unix/plugin.h>
-
-/**
- * Pool of GBP recircs
- */
-gbp_recirc_t *gbp_recirc_pool;
-
-/**
- * Recirc configs keyed by sw_if_index
- */
-index_t *gbp_recirc_db;
-
-/**
- * logger
- */
-vlib_log_class_t gr_logger;
-
-/**
- * L2 Emulation enable/disable symbols
- */
-static void (*l2e_enable) (u32 sw_if_index);
-static void (*l2e_disable) (u32 sw_if_index);
-
-#define GBP_RECIRC_DBG(...) \
- vlib_log_debug (gr_logger, __VA_ARGS__);
-
-u8 *
-format_gbp_recirc (u8 * s, va_list * args)
-{
- gbp_recirc_t *gr = va_arg (*args, gbp_recirc_t *);
- vnet_main_t *vnm = vnet_get_main ();
-
- return format (s, " %U, sclass:%d, ext:%d",
- format_vnet_sw_if_index_name, vnm,
- gr->gr_sw_if_index, gr->gr_sclass, gr->gr_is_ext);
-}
-
-int
-gbp_recirc_add (u32 sw_if_index, sclass_t sclass, u8 is_ext)
-{
- gbp_recirc_t *gr;
- index_t gri;
-
- vec_validate_init_empty (gbp_recirc_db, sw_if_index, INDEX_INVALID);
-
- gri = gbp_recirc_db[sw_if_index];
-
- if (INDEX_INVALID == gri)
- {
- gbp_endpoint_group_t *gg;
- fib_protocol_t fproto;
- index_t ggi;
-
- ggi = gbp_endpoint_group_find (sclass);
-
- if (INDEX_INVALID == ggi)
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
-
- gbp_endpoint_group_lock (ggi);
- pool_get_zero (gbp_recirc_pool, gr);
- gri = gr - gbp_recirc_pool;
-
- gr->gr_sclass = sclass;
- gr->gr_is_ext = is_ext;
- gr->gr_sw_if_index = sw_if_index;
-
- /*
- * IP enable the recirc interface
- */
- ip4_sw_interface_enable_disable (gr->gr_sw_if_index, 1);
- ip6_sw_interface_enable_disable (gr->gr_sw_if_index, 1);
-
- /*
- * cache the FIB indicies of the EPG
- */
- gr->gr_epgi = ggi;
-
- gg = gbp_endpoint_group_get (gr->gr_epgi);
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- {
- gr->gr_fib_index[fib_proto_to_dpo (fproto)] =
- gbp_endpoint_group_get_fib_index (gg, fproto);
- }
-
- /*
- * bind to the bridge-domain of the EPG
- */
- gr->gr_itf = gbp_itf_l2_add_and_lock (gr->gr_sw_if_index, gg->gg_gbd);
-
- /*
- * set the interface into L2 emulation mode
- */
- l2e_enable (gr->gr_sw_if_index);
-
- /*
- * Packets on the recirculation interface are subject to src-EPG
- * classification. Recirc interfaces are L2-emulation mode.
- * for internal EPGs this is via an LPM on all external subnets.
- * for external EPGs this is via a port mapping.
- */
- if (gr->gr_is_ext)
- {
- mac_address_t mac;
- /*
- * recirc is for post-NAT translation packets going into
- * the external EPG, these are classified to the NAT EPG
- * based on its port
- */
- mac_address_from_bytes (&mac,
- vnet_sw_interface_get_hw_address
- (vnet_get_main (), gr->gr_sw_if_index));
- gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_CP,
- gr->gr_sw_if_index,
- NULL, &mac, INDEX_INVALID,
- INDEX_INVALID, gr->gr_sclass,
- GBP_ENDPOINT_FLAG_NONE,
- NULL, NULL, &gr->gr_ep);
- vnet_feature_enable_disable ("ip4-unicast",
- "ip4-gbp-src-classify",
- gr->gr_sw_if_index, 1, 0, 0);
- vnet_feature_enable_disable ("ip6-unicast",
- "ip6-gbp-src-classify",
- gr->gr_sw_if_index, 1, 0, 0);
- }
- else
- {
- /*
- * recirc is for pre-NAT translation packets coming from
- * the external EPG, these are classified based on a LPM
- * in the EPG's route-domain
- */
- vnet_feature_enable_disable ("ip4-unicast",
- "ip4-gbp-lpm-classify",
- gr->gr_sw_if_index, 1, 0, 0);
- vnet_feature_enable_disable ("ip6-unicast",
- "ip6-gbp-lpm-classify",
- gr->gr_sw_if_index, 1, 0, 0);
- }
-
- gbp_recirc_db[sw_if_index] = gri;
- }
- else
- {
- gr = gbp_recirc_get (gri);
- }
-
- GBP_RECIRC_DBG ("add: %U", format_gbp_recirc, gr);
- return (0);
-}
-
-int
-gbp_recirc_delete (u32 sw_if_index)
-{
- gbp_recirc_t *gr;
- index_t gri;
-
- if (vec_len (gbp_recirc_db) <= sw_if_index)
- return VNET_API_ERROR_INVALID_SW_IF_INDEX;
- gri = gbp_recirc_db[sw_if_index];
-
- if (INDEX_INVALID != gri)
- {
- gr = pool_elt_at_index (gbp_recirc_pool, gri);
-
- GBP_RECIRC_DBG ("del: %U", format_gbp_recirc, gr);
-
- if (gr->gr_is_ext)
- {
- gbp_endpoint_unlock (GBP_ENDPOINT_SRC_CP, gr->gr_ep);
- vnet_feature_enable_disable ("ip4-unicast",
- "ip4-gbp-src-classify",
- gr->gr_sw_if_index, 0, 0, 0);
- vnet_feature_enable_disable ("ip6-unicast",
- "ip6-gbp-src-classify",
- gr->gr_sw_if_index, 0, 0, 0);
- }
- else
- {
- vnet_feature_enable_disable ("ip4-unicast",
- "ip4-gbp-lpm-classify",
- gr->gr_sw_if_index, 0, 0, 0);
- vnet_feature_enable_disable ("ip6-unicast",
- "ip6-gbp-lpm-classify",
- gr->gr_sw_if_index, 0, 0, 0);
- }
-
- ip4_sw_interface_enable_disable (gr->gr_sw_if_index, 0);
- ip6_sw_interface_enable_disable (gr->gr_sw_if_index, 0);
- l2e_disable (gr->gr_sw_if_index);
-
- gbp_itf_unlock (&gr->gr_itf);
-
- gbp_endpoint_group_unlock (gr->gr_epgi);
- gbp_recirc_db[sw_if_index] = INDEX_INVALID;
- pool_put (gbp_recirc_pool, gr);
- return (0);
- }
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-}
-
-void
-gbp_recirc_walk (gbp_recirc_cb_t cb, void *ctx)
-{
- gbp_recirc_t *ge;
-
- /* *INDENT-OFF* */
- pool_foreach (ge, gbp_recirc_pool)
- {
- if (!cb(ge, ctx))
- break;
- }
- /* *INDENT-ON* */
-}
-
-static walk_rc_t
-gbp_recirc_show_one (gbp_recirc_t * gr, void *ctx)
-{
- vlib_cli_output (ctx, " %U", format_gbp_recirc, gr);
-
- return (WALK_CONTINUE);
-}
-
-static clib_error_t *
-gbp_recirc_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vlib_cli_output (vm, "Recirculation-Interfaces:");
- gbp_recirc_walk (gbp_recirc_show_one, vm);
-
- return (NULL);
-}
-
-/*?
- * Show Group Based Policy Recircs and derived information
- *
- * @cliexpar
- * @cliexstart{show gbp recirc}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_recirc_show_node, static) = {
- .path = "show gbp recirc",
- .short_help = "show gbp recirc\n",
- .function = gbp_recirc_show,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-gbp_recirc_init (vlib_main_t * vm)
-{
- gr_logger = vlib_log_register_class ("gbp", "recirc");
-
- l2e_enable =
- vlib_get_plugin_symbol ("l2e_plugin.so", "l2_emulation_enable");
- l2e_disable =
- vlib_get_plugin_symbol ("l2e_plugin.so", "l2_emulation_disable");
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_recirc_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_recirc.h b/src/plugins/gbp/gbp_recirc.h
deleted file mode 100644
index 2f3354b794e..00000000000
--- a/src/plugins/gbp/gbp_recirc.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_RECIRC_H__
-#define __GBP_RECIRC_H__
-
-#include <plugins/gbp/gbp_types.h>
-#include <plugins/gbp/gbp_itf.h>
-#include <vnet/fib/fib_types.h>
-
-/**
- * A GBP recirculation interface representation
- * Thes interfaces join Bridge domains that are internal to those that are
- * NAT external, so the packets can be NAT translated and then undergo the
- * whole policy process again.
- */
-typedef struct gpb_recirc_t_
-{
- /**
- * EPG ID that packets will classify to when they arrive on this recirc
- */
- sclass_t gr_sclass;
-
- /**
- * The index of the EPG
- */
- index_t gr_epgi;
-
- /**
- * FIB indices the EPG is mapped to
- */
- u32 gr_fib_index[DPO_PROTO_NUM];
-
- /**
- * Is the interface for packets post-NAT translation (i.e. ext)
- * or pre-NAT translation (i.e. internal)
- */
- u8 gr_is_ext;
-
- /**
- */
- u32 gr_sw_if_index;
- gbp_itf_hdl_t gr_itf;
-
- /**
- * The endpoint created to represent the reric interface
- */
- index_t gr_ep;
-} gbp_recirc_t;
-
-extern int gbp_recirc_add (u32 sw_if_index, sclass_t sclass, u8 is_ext);
-extern int gbp_recirc_delete (u32 sw_if_index);
-
-typedef walk_rc_t (*gbp_recirc_cb_t) (gbp_recirc_t * gbpe, void *ctx);
-extern void gbp_recirc_walk (gbp_recirc_cb_t bgpe, void *ctx);
-
-/**
- * Data plane functions
- */
-extern gbp_recirc_t *gbp_recirc_pool;
-extern index_t *gbp_recirc_db;
-
-always_inline gbp_recirc_t *
-gbp_recirc_get (u32 sw_if_index)
-{
- return (pool_elt_at_index (gbp_recirc_pool, gbp_recirc_db[sw_if_index]));
-}
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_route_domain.c b/src/plugins/gbp/gbp_route_domain.c
deleted file mode 100644
index 6cc595d0fa9..00000000000
--- a/src/plugins/gbp/gbp_route_domain.c
+++ /dev/null
@@ -1,447 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp_route_domain.h>
-#include <plugins/gbp/gbp_endpoint.h>
-
-#include <vnet/dpo/dvr_dpo.h>
-#include <vnet/fib/fib_table.h>
-
-/**
- * A fixed MAC address to use as the source MAC for packets L3 switched
- * onto the routed uu-fwd interfaces.
- * Magic values - origin lost to the mists of time...
- */
-/* *INDENT-OFF* */
-const static mac_address_t GBP_ROUTED_SRC_MAC = {
- .bytes = {
- 0x0, 0x22, 0xBD, 0xF8, 0x19, 0xFF,
- }
-};
-
-const static mac_address_t GBP_ROUTED_DST_MAC = {
- .bytes = {
- 00, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
- }
-};
-/* *INDENT-ON* */
-
-/**
- * Pool of GBP route_domains
- */
-gbp_route_domain_t *gbp_route_domain_pool;
-
-/**
- * DB of route_domains
- */
-typedef struct gbp_route_domain_db_t
-{
- uword *gbd_by_rd_id;
-} gbp_route_domain_db_t;
-
-static gbp_route_domain_db_t gbp_route_domain_db;
-static fib_source_t gbp_fib_source;
-
-/**
- * logger
- */
-vlib_log_class_t grd_logger;
-
-#define GBP_BD_DBG(...) \
- vlib_log_debug (grd_logger, __VA_ARGS__);
-
-index_t
-gbp_route_domain_index (const gbp_route_domain_t * grd)
-{
- return (grd - gbp_route_domain_pool);
-}
-
-gbp_route_domain_t *
-gbp_route_domain_get (index_t i)
-{
- return (pool_elt_at_index (gbp_route_domain_pool, i));
-}
-
-static void
-gbp_route_domain_lock (index_t i)
-{
- gbp_route_domain_t *grd;
-
- grd = gbp_route_domain_get (i);
- grd->grd_locks++;
-}
-
-index_t
-gbp_route_domain_find (u32 rd_id)
-{
- uword *p;
-
- p = hash_get (gbp_route_domain_db.gbd_by_rd_id, rd_id);
-
- if (NULL != p)
- return p[0];
-
- return (INDEX_INVALID);
-}
-
-index_t
-gbp_route_domain_find_and_lock (u32 rd_id)
-{
- index_t grdi;
-
- grdi = gbp_route_domain_find (rd_id);
-
- if (INDEX_INVALID != grdi)
- {
- gbp_route_domain_lock (grdi);
- }
- return (grdi);
-}
-
-static void
-gbp_route_domain_db_add (gbp_route_domain_t * grd)
-{
- index_t grdi = grd - gbp_route_domain_pool;
-
- hash_set (gbp_route_domain_db.gbd_by_rd_id, grd->grd_id, grdi);
-}
-
-static void
-gbp_route_domain_db_remove (gbp_route_domain_t * grd)
-{
- hash_unset (gbp_route_domain_db.gbd_by_rd_id, grd->grd_id);
-}
-
-int
-gbp_route_domain_add_and_lock (u32 rd_id,
- gbp_scope_t scope,
- u32 ip4_table_id,
- u32 ip6_table_id,
- u32 ip4_uu_sw_if_index, u32 ip6_uu_sw_if_index)
-{
- gbp_route_domain_t *grd;
- index_t grdi;
-
- grdi = gbp_route_domain_find (rd_id);
-
- if (INDEX_INVALID == grdi)
- {
- fib_protocol_t fproto;
-
- pool_get_zero (gbp_route_domain_pool, grd);
-
- grd->grd_id = rd_id;
- grd->grd_scope = scope;
- grd->grd_table_id[FIB_PROTOCOL_IP4] = ip4_table_id;
- grd->grd_table_id[FIB_PROTOCOL_IP6] = ip6_table_id;
- grd->grd_uu_sw_if_index[FIB_PROTOCOL_IP4] = ip4_uu_sw_if_index;
- grd->grd_uu_sw_if_index[FIB_PROTOCOL_IP6] = ip6_uu_sw_if_index;
-
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- {
- grd->grd_fib_index[fproto] =
- fib_table_find_or_create_and_lock (fproto,
- grd->grd_table_id[fproto],
- gbp_fib_source);
-
- if (~0 != grd->grd_uu_sw_if_index[fproto])
- {
- ethernet_header_t *eth;
- u8 *rewrite;
-
- rewrite = NULL;
- vec_validate (rewrite, sizeof (*eth) - 1);
- eth = (ethernet_header_t *) rewrite;
-
- eth->type = clib_host_to_net_u16 ((fproto == FIB_PROTOCOL_IP4 ?
- ETHERNET_TYPE_IP4 :
- ETHERNET_TYPE_IP6));
-
- mac_address_to_bytes (gbp_route_domain_get_local_mac (),
- eth->src_address);
- mac_address_to_bytes (gbp_route_domain_get_remote_mac (),
- eth->dst_address);
-
- /*
- * create an adjacency out of the uu-fwd interfaces that will
- * be used when adding subnet routes.
- */
- grd->grd_adj[fproto] =
- adj_nbr_add_or_lock_w_rewrite (fproto,
- fib_proto_to_link (fproto),
- &ADJ_BCAST_ADDR,
- grd->grd_uu_sw_if_index[fproto],
- rewrite);
- }
- else
- {
- grd->grd_adj[fproto] = INDEX_INVALID;
- }
- }
-
- gbp_route_domain_db_add (grd);
- }
- else
- {
- grd = gbp_route_domain_get (grdi);
- }
-
- grd->grd_locks++;
- GBP_BD_DBG ("add: %U", format_gbp_route_domain, grd);
-
- return (0);
-}
-
-void
-gbp_route_domain_unlock (index_t index)
-{
- gbp_route_domain_t *grd;
-
- grd = gbp_route_domain_get (index);
-
- grd->grd_locks--;
-
- if (0 == grd->grd_locks)
- {
- fib_protocol_t fproto;
-
- GBP_BD_DBG ("destroy: %U", format_gbp_route_domain, grd);
-
- FOR_EACH_FIB_IP_PROTOCOL (fproto)
- {
- fib_table_unlock (grd->grd_fib_index[fproto], fproto, gbp_fib_source);
- if (INDEX_INVALID != grd->grd_adj[fproto])
- adj_unlock (grd->grd_adj[fproto]);
- }
-
- gbp_route_domain_db_remove (grd);
-
- pool_put (gbp_route_domain_pool, grd);
- }
-}
-
-u32
-gbp_route_domain_get_rd_id (index_t grdi)
-{
- gbp_route_domain_t *grd;
-
- grd = gbp_route_domain_get (grdi);
-
- return (grd->grd_id);
-}
-
-gbp_scope_t
-gbp_route_domain_get_scope (index_t grdi)
-{
- gbp_route_domain_t *grd;
-
- grd = gbp_route_domain_get (grdi);
-
- return (grd->grd_scope);
-}
-
-int
-gbp_route_domain_delete (u32 rd_id)
-{
- index_t grdi;
-
- GBP_BD_DBG ("del: %d", rd_id);
- grdi = gbp_route_domain_find (rd_id);
-
- if (INDEX_INVALID != grdi)
- {
- GBP_BD_DBG ("del: %U", format_gbp_route_domain,
- gbp_route_domain_get (grdi));
- gbp_route_domain_unlock (grdi);
-
- return (0);
- }
-
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
-}
-
-const mac_address_t *
-gbp_route_domain_get_local_mac (void)
-{
- return (&GBP_ROUTED_SRC_MAC);
-}
-
-const mac_address_t *
-gbp_route_domain_get_remote_mac (void)
-{
- return (&GBP_ROUTED_DST_MAC);
-}
-
-void
-gbp_route_domain_walk (gbp_route_domain_cb_t cb, void *ctx)
-{
- gbp_route_domain_t *gbpe;
-
- /* *INDENT-OFF* */
- pool_foreach (gbpe, gbp_route_domain_pool)
- {
- if (!cb(gbpe, ctx))
- break;
- }
- /* *INDENT-ON* */
-}
-
-static clib_error_t *
-gbp_route_domain_cli (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vnet_main_t *vnm = vnet_get_main ();
- u32 ip4_uu_sw_if_index = ~0;
- u32 ip6_uu_sw_if_index = ~0;
- u32 ip4_table_id = ~0;
- u32 ip6_table_id = ~0;
- u32 scope = ~0;
- u32 rd_id = ~0;
- u8 add = 1;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "ip4-uu %U", unformat_vnet_sw_interface,
- vnm, &ip4_uu_sw_if_index))
- ;
- else if (unformat (input, "ip6-uu %U", unformat_vnet_sw_interface,
- vnm, &ip6_uu_sw_if_index))
- ;
- else if (unformat (input, "ip4-table-id %d", &ip4_table_id))
- ;
- else if (unformat (input, "ip6-table-id %d", &ip6_table_id))
- ;
- else if (unformat (input, "add"))
- add = 1;
- else if (unformat (input, "del"))
- add = 0;
- else if (unformat (input, "rd %d", &rd_id))
- ;
- else if (unformat (input, "scope %d", &scope))
- ;
- else
- break;
- }
-
- if (~0 == rd_id)
- return clib_error_return (0, "RD-ID must be specified");
-
- if (add)
- {
- if (~0 == ip4_table_id)
- return clib_error_return (0, "IP4 table-ID must be specified");
- if (~0 == ip6_table_id)
- return clib_error_return (0, "IP6 table-ID must be specified");
-
- gbp_route_domain_add_and_lock (rd_id, scope,
- ip4_table_id,
- ip6_table_id,
- ip4_uu_sw_if_index, ip6_uu_sw_if_index);
- }
- else
- gbp_route_domain_delete (rd_id);
-
- return (NULL);
-}
-
-/*?
- * Configure a GBP route-domain
- *
- * @cliexpar
- * @cliexstart{gbp route-domain [del] rd <ID> ip4-table-id <ID> ip6-table-id <ID> [ip4-uu <interface>] [ip6-uu <interface>]}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_route_domain_cli_node, static) = {
- .path = "gbp route-domain",
- .short_help = "gbp route-domain [del] rd <ID> ip4-table-id <ID> ip6-table-id <ID> [ip4-uu <interface>] [ip6-uu <interface>]",
- .function = gbp_route_domain_cli,
-};
-
-u8 *
-format_gbp_route_domain (u8 * s, va_list * args)
-{
- gbp_route_domain_t *grd = va_arg (*args, gbp_route_domain_t*);
- vnet_main_t *vnm = vnet_get_main ();
-
- if (NULL != grd)
- s = format (s, "[%d] rd:%d ip4-uu:%U ip6-uu:%U locks:%d",
- grd - gbp_route_domain_pool,
- grd->grd_id,
- format_vnet_sw_if_index_name, vnm, grd->grd_uu_sw_if_index[FIB_PROTOCOL_IP4],
- format_vnet_sw_if_index_name, vnm, grd->grd_uu_sw_if_index[FIB_PROTOCOL_IP6],
- grd->grd_locks);
- else
- s = format (s, "NULL");
-
- return (s);
-}
-
-static int
-gbp_route_domain_show_one (gbp_route_domain_t *gb, void *ctx)
-{
- vlib_main_t *vm;
-
- vm = ctx;
- vlib_cli_output (vm, " %U",format_gbp_route_domain, gb);
-
- return (1);
-}
-
-static clib_error_t *
-gbp_route_domain_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vlib_cli_output (vm, "Route-Domains:");
- gbp_route_domain_walk (gbp_route_domain_show_one, vm);
-
- return (NULL);
-}
-
-/*?
- * Show Group Based Policy Route_Domains and derived information
- *
- * @cliexpar
- * @cliexstart{show gbp route_domain}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_route_domain_show_node, static) = {
- .path = "show gbp route-domain",
- .short_help = "show gbp route-domain\n",
- .function = gbp_route_domain_show,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-gbp_route_domain_init (vlib_main_t * vm)
-{
- grd_logger = vlib_log_register_class ("gbp", "rd");
- gbp_fib_source = fib_source_allocate ("gbp-rd",
- FIB_SOURCE_PRIORITY_HI,
- FIB_SOURCE_BH_DROP);
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_route_domain_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_route_domain.h b/src/plugins/gbp/gbp_route_domain.h
deleted file mode 100644
index 897c1bdd7ac..00000000000
--- a/src/plugins/gbp/gbp_route_domain.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_ROUTE_DOMAIN_H__
-#define __GBP_ROUTE_DOMAIN_H__
-
-#include <plugins/gbp/gbp_types.h>
-
-#include <vnet/fib/fib_types.h>
-#include <vnet/ethernet/mac_address.h>
-
-/**
- * A route Domain Representation.
- * This is a standard route-domain plus all the attributes it must
- * have to supprt the GBP model.
- */
-typedef struct gpb_route_domain_t_
-{
- /**
- * Route-domain ID
- */
- u32 grd_id;
- gbp_scope_t grd_scope;
- u32 grd_fib_index[FIB_PROTOCOL_IP_MAX];
- u32 grd_table_id[FIB_PROTOCOL_IP_MAX];
-
- /**
- * The interfaces on which to send packets to unnknown EPs
- */
- u32 grd_uu_sw_if_index[FIB_PROTOCOL_IP_MAX];
-
- /**
- * adjacencies on the UU interfaces.
- */
- u32 grd_adj[FIB_PROTOCOL_IP_MAX];
-
- u32 grd_locks;
-} gbp_route_domain_t;
-
-extern int gbp_route_domain_add_and_lock (u32 rd_id,
- gbp_scope_t scope,
- u32 ip4_table_id,
- u32 ip6_table_id,
- u32 ip4_uu_sw_if_index,
- u32 ip6_uu_sw_if_index);
-extern void gbp_route_domain_unlock (index_t grdi);
-extern index_t gbp_route_domain_find_and_lock (u32 rd_id);
-extern index_t gbp_route_domain_find (u32 rd_id);
-extern index_t gbp_route_domain_index (const gbp_route_domain_t *);
-
-extern int gbp_route_domain_delete (u32 rd_id);
-extern gbp_route_domain_t *gbp_route_domain_get (index_t i);
-extern u32 gbp_route_domain_get_rd_id (index_t i);
-extern gbp_scope_t gbp_route_domain_get_scope (index_t i);
-
-typedef int (*gbp_route_domain_cb_t) (gbp_route_domain_t * gb, void *ctx);
-extern void gbp_route_domain_walk (gbp_route_domain_cb_t bgpe, void *ctx);
-
-extern const mac_address_t *gbp_route_domain_get_local_mac (void);
-extern const mac_address_t *gbp_route_domain_get_remote_mac (void);
-
-extern u8 *format_gbp_route_domain (u8 * s, va_list * args);
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_scanner.c b/src/plugins/gbp/gbp_scanner.c
deleted file mode 100644
index 9ae962b7449..00000000000
--- a/src/plugins/gbp/gbp_scanner.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * gbp.h : Group Based Policy
- *
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp_scanner.h>
-#include <plugins/gbp/gbp_endpoint.h>
-#include <plugins/gbp/gbp_vxlan.h>
-
-/**
- * Scanner logger
- */
-vlib_log_class_t gs_logger;
-
-/**
- * Scanner state
- */
-static bool gs_enabled;
-
-#define GBP_SCANNER_DBG(...) \
- vlib_log_debug (gs_logger, __VA_ARGS__);
-
-static uword
-gbp_scanner (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
-{
- uword event_type, *event_data = 0;
- bool do_scan = 0;
-
- while (1)
- {
- do_scan = 0;
-
- if (gs_enabled)
- {
- /* scan every 'inactive threshold' seconds */
- vlib_process_wait_for_event_or_clock (vm, 2);
- }
- else
- vlib_process_wait_for_event (vm);
-
- event_type = vlib_process_get_events (vm, &event_data);
- vec_reset_length (event_data);
-
- switch (event_type)
- {
- case ~0:
- /* timer expired */
- do_scan = 1;
- break;
-
- case GBP_ENDPOINT_SCAN_START:
- gs_enabled = 1;
- break;
-
- case GBP_ENDPOINT_SCAN_STOP:
- gs_enabled = 0;
- break;
-
- case GBP_ENDPOINT_SCAN_SET_TIME:
- break;
-
- default:
- ASSERT (0);
- }
-
- if (do_scan)
- {
- GBP_SCANNER_DBG ("start");
- gbp_endpoint_scan (vm);
- GBP_SCANNER_DBG ("stop");
- }
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gbp_scanner_node) = {
- .function = gbp_scanner,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "gbp-scanner",
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-gbp_scanner_cli (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vlib_cli_output (vm, "GBP-scanner: enabled:%d interval:2", gs_enabled);
-
- return (NULL);
-}
-
-/*?
- * Show GBP scanner
- *
- * @cliexpar
- * @cliexstart{show gbp scanner}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_scanner_cli_node, static) = {
- .path = "show gbp scanner",
- .short_help = "show gbp scanner",
- .function = gbp_scanner_cli,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-gbp_scanner_init (vlib_main_t * vm)
-{
- gs_logger = vlib_log_register_class ("gbp", "scan");
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_scanner_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_subnet.c b/src/plugins/gbp/gbp_subnet.c
deleted file mode 100644
index 8d3b571657c..00000000000
--- a/src/plugins/gbp/gbp_subnet.c
+++ /dev/null
@@ -1,598 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp.h>
-#include <plugins/gbp/gbp_fwd_dpo.h>
-#include <plugins/gbp/gbp_policy_dpo.h>
-#include <plugins/gbp/gbp_route_domain.h>
-
-#include <vnet/fib/fib_table.h>
-#include <vnet/dpo/load_balance.h>
-
-/**
- * a key for the DB
- */
-typedef struct gbp_subnet_key_t_
-{
- fib_prefix_t gsk_pfx;
- u32 gsk_fib_index;
-} gbp_subnet_key_t;
-
-/**
- * Subnet
- */
-typedef struct gbp_subnet_t_
-{
- gbp_subnet_key_t *gs_key;
- gbp_subnet_type_t gs_type;
- index_t gs_rd;
-
- union
- {
- struct
- {
- sclass_t gs_sclass;
- u32 gs_sw_if_index;
- } gs_stitched_external;
- struct
- {
- sclass_t gs_sclass;
- } gs_l3_out;
- };
-
- fib_node_index_t gs_fei;
-} gbp_subnet_t;
-
-/**
- * A DB of the subnets; key={pfx,fib-index}
- */
-uword *gbp_subnet_db;
-
-/**
- * pool of subnets
- */
-gbp_subnet_t *gbp_subnet_pool;
-
-static fib_source_t gbp_fib_source;
-
-static index_t
-gbp_subnet_db_find (u32 fib_index, const fib_prefix_t * pfx)
-{
- gbp_subnet_key_t key = {
- .gsk_pfx = *pfx,
- .gsk_fib_index = fib_index,
- };
- uword *p;
-
- p = hash_get_mem (gbp_subnet_db, &key);
-
- if (NULL != p)
- return p[0];
-
- return (INDEX_INVALID);
-}
-
-static void
-gbp_subnet_db_add (u32 fib_index, const fib_prefix_t * pfx, gbp_subnet_t * gs)
-{
- gbp_subnet_key_t *key;
-
- key = clib_mem_alloc (sizeof (*key));
-
- clib_memcpy (&(key->gsk_pfx), pfx, sizeof (*pfx));
- key->gsk_fib_index = fib_index;
-
- hash_set_mem (gbp_subnet_db, key, (gs - gbp_subnet_pool));
-
- gs->gs_key = key;
-}
-
-static void
-gbp_subnet_db_del (gbp_subnet_t * gs)
-{
- hash_unset_mem (gbp_subnet_db, gs->gs_key);
-
- clib_mem_free (gs->gs_key);
- gs->gs_key = NULL;
-}
-
-
-static int
-gbp_subnet_transport_add (gbp_subnet_t * gs)
-{
- dpo_id_t gfd = DPO_INVALID;
- gbp_route_domain_t *grd;
- fib_protocol_t fproto;
-
- fproto = gs->gs_key->gsk_pfx.fp_proto;
- grd = gbp_route_domain_get (gs->gs_rd);
-
- if (~0 == grd->grd_uu_sw_if_index[fproto])
- return (VNET_API_ERROR_INVALID_SW_IF_INDEX);
-
- gs->gs_fei = fib_table_entry_update_one_path (gs->gs_key->gsk_fib_index,
- &gs->gs_key->gsk_pfx,
- gbp_fib_source,
- FIB_ENTRY_FLAG_NONE,
- fib_proto_to_dpo (fproto),
- &ADJ_BCAST_ADDR,
- grd->grd_uu_sw_if_index
- [fproto], ~0, 1, NULL,
- FIB_ROUTE_PATH_FLAG_NONE);
-
- dpo_reset (&gfd);
-
- return (0);
-}
-
-static int
-gbp_subnet_internal_add (gbp_subnet_t * gs)
-{
- dpo_id_t gfd = DPO_INVALID;
-
- gbp_fwd_dpo_add_or_lock (fib_proto_to_dpo (gs->gs_key->gsk_pfx.fp_proto),
- &gfd);
-
- gs->gs_fei = fib_table_entry_special_dpo_update (gs->gs_key->gsk_fib_index,
- &gs->gs_key->gsk_pfx,
- gbp_fib_source,
- FIB_ENTRY_FLAG_EXCLUSIVE,
- &gfd);
-
- dpo_reset (&gfd);
-
- return (0);
-}
-
-static int
-gbp_subnet_external_add (gbp_subnet_t * gs, u32 sw_if_index, sclass_t sclass)
-{
- dpo_id_t gpd = DPO_INVALID;
-
- gs->gs_stitched_external.gs_sclass = sclass;
- gs->gs_stitched_external.gs_sw_if_index = sw_if_index;
-
- gbp_policy_dpo_add_or_lock (fib_proto_to_dpo (gs->gs_key->gsk_pfx.fp_proto),
- gbp_route_domain_get_scope (gs->gs_rd),
- gs->gs_stitched_external.gs_sclass,
- gs->gs_stitched_external.gs_sw_if_index, &gpd);
-
- gs->gs_fei = fib_table_entry_special_dpo_update (gs->gs_key->gsk_fib_index,
- &gs->gs_key->gsk_pfx,
- gbp_fib_source,
- (FIB_ENTRY_FLAG_EXCLUSIVE |
- FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT),
- &gpd);
-
- dpo_reset (&gpd);
-
- return (0);
-}
-
-static int
-gbp_subnet_l3_out_add (gbp_subnet_t * gs, sclass_t sclass, int is_anon)
-{
- fib_entry_flag_t flags;
- dpo_id_t gpd = DPO_INVALID;
-
- gs->gs_l3_out.gs_sclass = sclass;
-
- gbp_policy_dpo_add_or_lock (fib_proto_to_dpo (gs->gs_key->gsk_pfx.fp_proto),
- gbp_route_domain_get_scope (gs->gs_rd),
- gs->gs_l3_out.gs_sclass, ~0, &gpd);
-
- flags = FIB_ENTRY_FLAG_INTERPOSE;
- if (is_anon)
- flags |= FIB_ENTRY_FLAG_COVERED_INHERIT;
-
- gs->gs_fei = fib_table_entry_special_dpo_add (gs->gs_key->gsk_fib_index,
- &gs->gs_key->gsk_pfx,
- FIB_SOURCE_SPECIAL,
- flags, &gpd);
-
- dpo_reset (&gpd);
-
- return (0);
-}
-
-static void
-gbp_subnet_del_i (index_t gsi)
-{
- gbp_subnet_t *gs;
-
- gs = pool_elt_at_index (gbp_subnet_pool, gsi);
-
- fib_table_entry_delete_index (gs->gs_fei,
- (GBP_SUBNET_L3_OUT == gs->gs_type
- || GBP_SUBNET_ANON_L3_OUT ==
- gs->gs_type) ? FIB_SOURCE_SPECIAL :
- gbp_fib_source);
-
- gbp_subnet_db_del (gs);
- gbp_route_domain_unlock (gs->gs_rd);
-
- pool_put (gbp_subnet_pool, gs);
-}
-
-int
-gbp_subnet_del (u32 rd_id, const fib_prefix_t * pfx)
-{
- gbp_route_domain_t *grd;
- index_t gsi, grdi;
- u32 fib_index;
-
- grdi = gbp_route_domain_find (rd_id);
-
- if (~0 == grdi)
- return (VNET_API_ERROR_NO_SUCH_FIB);
-
- grd = gbp_route_domain_get (grdi);
- fib_index = grd->grd_fib_index[pfx->fp_proto];
-
- gsi = gbp_subnet_db_find (fib_index, pfx);
-
- if (INDEX_INVALID == gsi)
- return (VNET_API_ERROR_NO_SUCH_ENTRY);
-
- gbp_subnet_del_i (gsi);
-
- return (0);
-}
-
-int
-gbp_subnet_add (u32 rd_id,
- const fib_prefix_t * pfx,
- gbp_subnet_type_t type, u32 sw_if_index, sclass_t sclass)
-{
- gbp_route_domain_t *grd;
- index_t grdi, gsi;
- gbp_subnet_t *gs;
- u32 fib_index;
- int rv;
-
- switch (type)
- {
- case GBP_SUBNET_TRANSPORT:
- case GBP_SUBNET_STITCHED_INTERNAL:
- case GBP_SUBNET_STITCHED_EXTERNAL:
- case GBP_SUBNET_L3_OUT:
- case GBP_SUBNET_ANON_L3_OUT:
- break;
- default:
- return (VNET_API_ERROR_INCORRECT_ADJACENCY_TYPE);
- }
-
- grdi = gbp_route_domain_find_and_lock (rd_id);
-
- if (~0 == grdi)
- return (VNET_API_ERROR_NO_SUCH_FIB);
-
- grd = gbp_route_domain_get (grdi);
- fib_index = grd->grd_fib_index[pfx->fp_proto];
-
- gsi = gbp_subnet_db_find (fib_index, pfx);
-
- /*
- * this is an update if the subnet already exists, so remove the old
- */
- if (INDEX_INVALID != gsi)
- gbp_subnet_del_i (gsi);
-
- rv = -2;
-
- pool_get (gbp_subnet_pool, gs);
-
- gs->gs_type = type;
- gs->gs_rd = grdi;
- gbp_subnet_db_add (fib_index, pfx, gs);
-
- switch (type)
- {
- case GBP_SUBNET_STITCHED_INTERNAL:
- rv = gbp_subnet_internal_add (gs);
- break;
- case GBP_SUBNET_STITCHED_EXTERNAL:
- rv = gbp_subnet_external_add (gs, sw_if_index, sclass);
- break;
- case GBP_SUBNET_TRANSPORT:
- rv = gbp_subnet_transport_add (gs);
- break;
- case GBP_SUBNET_L3_OUT:
- rv = gbp_subnet_l3_out_add (gs, sclass, 0 /* is_anon */ );
- break;
- case GBP_SUBNET_ANON_L3_OUT:
- rv = gbp_subnet_l3_out_add (gs, sclass, 1 /* is_anon */ );
- break;
- }
-
- return (rv);
-}
-
-static clib_error_t *
-gbp_subnet_add_del_cli (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- vnet_main_t *vnm = vnet_get_main ();
- fib_prefix_t pfx = {.fp_addr = ip46_address_initializer };
- int length;
- u32 rd_id = ~0;
- u32 sw_if_index = ~0;
- gbp_subnet_type_t type = ~0;
- u32 sclass = ~0;
- int is_add = 1;
- int rv;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "del"))
- is_add = 0;
- else if (unformat (line_input, "rd %d", &rd_id))
- ;
- else
- if (unformat
- (line_input, "prefix %U/%d", unformat_ip4_address,
- &pfx.fp_addr.ip4, &length))
- pfx.fp_proto = FIB_PROTOCOL_IP4;
- else
- if (unformat
- (line_input, "prefix %U/%d", unformat_ip6_address,
- &pfx.fp_addr.ip6, &length))
- pfx.fp_proto = FIB_PROTOCOL_IP6;
- else if (unformat (line_input, "type transport"))
- type = GBP_SUBNET_TRANSPORT;
- else if (unformat (line_input, "type stitched-internal"))
- type = GBP_SUBNET_STITCHED_INTERNAL;
- else if (unformat (line_input, "type stitched-external"))
- type = GBP_SUBNET_STITCHED_EXTERNAL;
- else if (unformat (line_input, "type anon-l3-out"))
- type = GBP_SUBNET_ANON_L3_OUT;
- else if (unformat (line_input, "type l3-out"))
- type = GBP_SUBNET_L3_OUT;
- else
- if (unformat_user
- (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index))
- ;
- else if (unformat (line_input, "sclass %u", &sclass))
- ;
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- }
- unformat_free (line_input);
-
- pfx.fp_len = length;
-
- if (is_add)
- rv = gbp_subnet_add (rd_id, &pfx, type, sw_if_index, sclass);
- else
- rv = gbp_subnet_del (rd_id, &pfx);
-
- switch (rv)
- {
- case 0:
- return 0;
- case VNET_API_ERROR_NO_SUCH_FIB:
- return clib_error_return (0, "no such FIB");
- }
-
- return clib_error_return (0, "unknown error %d", rv);
-}
-
-/*?
- * Add Group Based Policy Subnets
- *
- * @cliexpar
- * @cliexstart{gbp subnet [del] rd <ID> prefix <prefix> type <type> [<interface>] [sclass <sclass>]}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_subnet_add_del, static) = {
- .path = "gbp subnet",
- .short_help = "gbp subnet [del] rd <ID> prefix <prefix> type <type> [<interface>] [sclass <sclass>]\n",
- .function = gbp_subnet_add_del_cli,
-};
-/* *INDENT-ON* */
-
-
-
-void
-gbp_subnet_walk (gbp_subnet_cb_t cb, void *ctx)
-{
- gbp_route_domain_t *grd;
- gbp_subnet_t *gs;
- u32 sw_if_index;
- sclass_t sclass;
-
- sclass = SCLASS_INVALID;
- sw_if_index = ~0;
-
- /* *INDENT-OFF* */
- pool_foreach (gs, gbp_subnet_pool)
- {
- grd = gbp_route_domain_get(gs->gs_rd);
-
- switch (gs->gs_type)
- {
- case GBP_SUBNET_STITCHED_INTERNAL:
- case GBP_SUBNET_TRANSPORT:
- /* use defaults above */
- break;
- case GBP_SUBNET_STITCHED_EXTERNAL:
- sw_if_index = gs->gs_stitched_external.gs_sw_if_index;
- sclass = gs->gs_stitched_external.gs_sclass;
- break;
- case GBP_SUBNET_L3_OUT:
- case GBP_SUBNET_ANON_L3_OUT:
- sclass = gs->gs_l3_out.gs_sclass;
- break;
- }
-
- if (WALK_STOP == cb (grd->grd_id, &gs->gs_key->gsk_pfx,
- gs->gs_type, sw_if_index, sclass, ctx))
- break;
- }
- /* *INDENT-ON* */
-}
-
-typedef enum gsb_subnet_show_flags_t_
-{
- GBP_SUBNET_SHOW_BRIEF,
- GBP_SUBNET_SHOW_DETAILS,
-} gsb_subnet_show_flags_t;
-
-static u8 *
-format_gbp_subnet_type (u8 * s, va_list * args)
-{
- gbp_subnet_type_t type = va_arg (*args, gbp_subnet_type_t);
-
- switch (type)
- {
- case GBP_SUBNET_STITCHED_INTERNAL:
- return (format (s, "stitched-internal"));
- case GBP_SUBNET_STITCHED_EXTERNAL:
- return (format (s, "stitched-external"));
- case GBP_SUBNET_TRANSPORT:
- return (format (s, "transport"));
- case GBP_SUBNET_L3_OUT:
- return (format (s, "l3-out"));
- case GBP_SUBNET_ANON_L3_OUT:
- return (format (s, "anon-l3-out"));
- }
-
- return (format (s, "unknown"));
-}
-
-u8 *
-format_gbp_subnet (u8 * s, va_list * args)
-{
- index_t gsi = va_arg (*args, index_t);
- gsb_subnet_show_flags_t flags = va_arg (*args, gsb_subnet_show_flags_t);
- gbp_subnet_t *gs;
- u32 table_id;
-
- gs = pool_elt_at_index (gbp_subnet_pool, gsi);
-
- table_id = fib_table_get_table_id (gs->gs_key->gsk_fib_index,
- gs->gs_key->gsk_pfx.fp_proto);
-
- s = format (s, "[%d] tbl:%d %U %U", gsi, table_id,
- format_fib_prefix, &gs->gs_key->gsk_pfx,
- format_gbp_subnet_type, gs->gs_type);
-
- switch (gs->gs_type)
- {
- case GBP_SUBNET_STITCHED_INTERNAL:
- case GBP_SUBNET_TRANSPORT:
- break;
- case GBP_SUBNET_STITCHED_EXTERNAL:
- s = format (s, " {sclass:%d %U}", gs->gs_stitched_external.gs_sclass,
- format_vnet_sw_if_index_name,
- vnet_get_main (), gs->gs_stitched_external.gs_sw_if_index);
- break;
- case GBP_SUBNET_L3_OUT:
- case GBP_SUBNET_ANON_L3_OUT:
- s = format (s, " {sclass:%d}", gs->gs_l3_out.gs_sclass);
- break;
- }
-
- switch (flags)
- {
- case GBP_SUBNET_SHOW_DETAILS:
- {
- s = format (s, "\n %U", format_fib_entry, gs->gs_fei,
- FIB_ENTRY_FORMAT_DETAIL);
- }
- case GBP_SUBNET_SHOW_BRIEF:
- break;
- }
- return (s);
-}
-
-static clib_error_t *
-gbp_subnet_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- u32 gsi;
-
- gsi = INDEX_INVALID;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "%d", &gsi))
- ;
- else
- break;
- }
-
- if (INDEX_INVALID != gsi)
- {
- vlib_cli_output (vm, "%U", format_gbp_subnet, gsi,
- GBP_SUBNET_SHOW_DETAILS);
- }
- else
- {
- /* *INDENT-OFF* */
- pool_foreach_index (gsi, gbp_subnet_pool)
- {
- vlib_cli_output (vm, "%U", format_gbp_subnet, gsi,
- GBP_SUBNET_SHOW_BRIEF);
- }
- /* *INDENT-ON* */
- }
-
- return (NULL);
-}
-
-/*?
- * Show Group Based Policy Subnets
- *
- * @cliexpar
- * @cliexstart{show gbp subnet}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_subnet_show_node, static) = {
- .path = "show gbp subnet",
- .short_help = "show gbp subnet\n",
- .function = gbp_subnet_show,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-gbp_subnet_init (vlib_main_t * vm)
-{
- gbp_subnet_db = hash_create_mem (0,
- sizeof (gbp_subnet_key_t), sizeof (u32));
- gbp_fib_source = fib_source_allocate ("gbp-subnet",
- FIB_SOURCE_PRIORITY_HI,
- FIB_SOURCE_BH_SIMPLE);
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (gbp_subnet_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_subnet.h b/src/plugins/gbp/gbp_subnet.h
deleted file mode 100644
index 6fbef01ceba..00000000000
--- a/src/plugins/gbp/gbp_subnet.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_SUBNET_H__
-#define __GBP_SUBNET_H__
-
-#include <plugins/gbp/gbp_types.h>
-
-typedef enum gbp_subnet_type_t_
-{
- GBP_SUBNET_TRANSPORT,
- GBP_SUBNET_STITCHED_INTERNAL,
- GBP_SUBNET_STITCHED_EXTERNAL,
- GBP_SUBNET_L3_OUT,
- GBP_SUBNET_ANON_L3_OUT,
-} gbp_subnet_type_t;
-
-extern int gbp_subnet_add (u32 rd_id,
- const fib_prefix_t * pfx,
- gbp_subnet_type_t type,
- u32 sw_if_index, sclass_t sclass);
-
-extern int gbp_subnet_del (u32 rd_id, const fib_prefix_t * pfx);
-
-typedef walk_rc_t (*gbp_subnet_cb_t) (u32 rd_id,
- const fib_prefix_t * pfx,
- gbp_subnet_type_t type,
- u32 sw_if_index,
- sclass_t sclass, void *ctx);
-
-extern void gbp_subnet_walk (gbp_subnet_cb_t cb, void *ctx);
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_vxlan.c b/src/plugins/gbp/gbp_vxlan.c
deleted file mode 100644
index 77e4d7ac11b..00000000000
--- a/src/plugins/gbp/gbp_vxlan.c
+++ /dev/null
@@ -1,654 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/gbp/gbp_vxlan.h>
-#include <plugins/gbp/gbp_learn.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <plugins/gbp/gbp_route_domain.h>
-
-#include <vnet/vxlan-gbp/vxlan_gbp.h>
-#include <vlibmemory/api.h>
-#include <vnet/fib/fib_table.h>
-#include <vlib/punt.h>
-
-/**
- * A reference to a VXLAN-GBP tunnel created as a child/dependent tunnel
- * of the template GBP-VXLAN tunnel
- */
-typedef struct vxlan_tunnel_ref_t_
-{
- gbp_itf_hdl_t vxr_itf;
- u32 vxr_sw_if_index;
- index_t vxr_parent;
- gbp_vxlan_tunnel_layer_t vxr_layer;
-} vxlan_tunnel_ref_t;
-
-/**
- * DB of added tunnels
- */
-uword *gv_db;
-
-/**
- * Logger
- */
-static vlib_log_class_t gt_logger;
-
-/**
- * Pool of template tunnels
- */
-static gbp_vxlan_tunnel_t *gbp_vxlan_tunnel_pool;
-
-/**
- * Pool of child tunnels
- */
-static vxlan_tunnel_ref_t *vxlan_tunnel_ref_pool;
-
-/**
- * DB of template interfaces by SW interface index
- */
-static index_t *gbp_vxlan_tunnel_db;
-
-/**
- * DB of child interfaces by SW interface index
- */
-static index_t *vxlan_tunnel_ref_db;
-
-/**
- * handle registered with the ;unt infra
- */
-static vlib_punt_hdl_t punt_hdl;
-
-static char *gbp_vxlan_tunnel_layer_strings[] = {
-#define _(n,s) [GBP_VXLAN_TUN_##n] = s,
- foreach_gbp_vxlan_tunnel_layer
-#undef _
-};
-
-#define GBP_VXLAN_TUN_DBG(...) \
- vlib_log_debug (gt_logger, __VA_ARGS__);
-
-
-gbp_vxlan_tunnel_t *
-gbp_vxlan_tunnel_get (index_t gti)
-{
- return (pool_elt_at_index (gbp_vxlan_tunnel_pool, gti));
-}
-
-static vxlan_tunnel_ref_t *
-vxlan_tunnel_ref_get (index_t vxri)
-{
- return (pool_elt_at_index (vxlan_tunnel_ref_pool, vxri));
-}
-
-static u8 *
-format_vxlan_tunnel_ref (u8 * s, va_list * args)
-{
- index_t vxri = va_arg (*args, u32);
- vxlan_tunnel_ref_t *vxr;
-
- vxr = vxlan_tunnel_ref_get (vxri);
-
- s = format (s, "[%U]", format_gbp_itf_hdl, vxr->vxr_itf);
-
- return (s);
-}
-
-static void
-gdb_vxlan_dep_del (u32 sw_if_index)
-{
- vxlan_tunnel_ref_t *vxr;
- gbp_vxlan_tunnel_t *gt;
- index_t vxri;
- u32 pos;
-
- vxr = vxlan_tunnel_ref_get (vxlan_tunnel_ref_db[sw_if_index]);
- vxri = vxr - vxlan_tunnel_ref_pool;
- gt = gbp_vxlan_tunnel_get (vxr->vxr_parent);
-
- GBP_VXLAN_TUN_DBG ("del-dep:%U", format_vxlan_tunnel_ref, vxri);
-
- vxlan_tunnel_ref_db[vxr->vxr_sw_if_index] = INDEX_INVALID;
- pos = vec_search (gt->gt_tuns, vxri);
-
- ASSERT (~0 != pos);
- vec_del1 (gt->gt_tuns, pos);
-
- vnet_vxlan_gbp_tunnel_del (vxr->vxr_sw_if_index);
-
- pool_put (vxlan_tunnel_ref_pool, vxr);
-}
-
-static gbp_itf_hdl_t
-gdb_vxlan_dep_add (gbp_vxlan_tunnel_t * gt,
- const ip46_address_t * src, const ip46_address_t * dst)
-{
- vnet_vxlan_gbp_tunnel_add_del_args_t args = {
- .is_add = 1,
- .is_ip6 = !ip46_address_is_ip4 (src),
- .vni = gt->gt_vni,
- .src = *src,
- .dst = *dst,
- .instance = ~0,
- .mode = (GBP_VXLAN_TUN_L2 == gt->gt_layer ?
- VXLAN_GBP_TUNNEL_MODE_L2 : VXLAN_GBP_TUNNEL_MODE_L3),
- };
- vxlan_tunnel_ref_t *vxr;
- u32 sw_if_index;
- index_t vxri;
- int rv;
-
- sw_if_index = ~0;
- rv = vnet_vxlan_gbp_tunnel_add_del (&args, &sw_if_index);
-
- if (VNET_API_ERROR_TUNNEL_EXIST == rv)
- {
- vxri = vxlan_tunnel_ref_db[sw_if_index];
-
- vxr = vxlan_tunnel_ref_get (vxri);
- gbp_itf_lock (vxr->vxr_itf);
- }
- else if (0 == rv)
- {
- ASSERT (~0 != sw_if_index);
- GBP_VXLAN_TUN_DBG ("add-dep:%U %U %U %d", format_vnet_sw_if_index_name,
- vnet_get_main (), sw_if_index,
- format_ip46_address, src, IP46_TYPE_ANY,
- format_ip46_address, dst, IP46_TYPE_ANY, gt->gt_vni);
-
- pool_get_zero (vxlan_tunnel_ref_pool, vxr);
-
- vxri = (vxr - vxlan_tunnel_ref_pool);
- vxr->vxr_parent = gt - gbp_vxlan_tunnel_pool;
- vxr->vxr_sw_if_index = sw_if_index;
- vxr->vxr_layer = gt->gt_layer;
-
- /*
- * store the child both on the parent's list and the global DB
- */
- vec_add1 (gt->gt_tuns, vxri);
-
- vec_validate_init_empty (vxlan_tunnel_ref_db,
- vxr->vxr_sw_if_index, INDEX_INVALID);
- vxlan_tunnel_ref_db[vxr->vxr_sw_if_index] = vxri;
-
- if (GBP_VXLAN_TUN_L2 == vxr->vxr_layer)
- {
- l2output_feat_masks_t ofeat;
- l2input_feat_masks_t ifeat;
- gbp_bridge_domain_t *gbd;
-
- gbd = gbp_bridge_domain_get (gt->gt_gbd);
- vxr->vxr_itf = gbp_itf_l2_add_and_lock_w_free
- (vxr->vxr_sw_if_index, gt->gt_gbd, gdb_vxlan_dep_del);
-
- ofeat = L2OUTPUT_FEAT_GBP_POLICY_MAC;
- ifeat = L2INPUT_FEAT_NONE;
-
- if (!(gbd->gb_flags & GBP_BD_FLAG_DO_NOT_LEARN))
- ifeat |= L2INPUT_FEAT_GBP_LEARN;
-
- gbp_itf_l2_set_output_feature (vxr->vxr_itf, ofeat);
- gbp_itf_l2_set_input_feature (vxr->vxr_itf, ifeat);
- }
- else
- {
- vxr->vxr_itf = gbp_itf_l3_add_and_lock_w_free
- (vxr->vxr_sw_if_index, gt->gt_grd, gdb_vxlan_dep_del);
-
- gbp_itf_l3_set_input_feature (vxr->vxr_itf, GBP_ITF_L3_FEAT_LEARN);
- }
- }
- else
- {
- return (GBP_ITF_HDL_INVALID);
- }
-
- return (vxr->vxr_itf);
-}
-
-u32
-vxlan_gbp_tunnel_get_parent (u32 sw_if_index)
-{
- ASSERT ((sw_if_index < vec_len (vxlan_tunnel_ref_db)) &&
- (INDEX_INVALID != vxlan_tunnel_ref_db[sw_if_index]));
-
- gbp_vxlan_tunnel_t *gt;
- vxlan_tunnel_ref_t *vxr;
-
- vxr = vxlan_tunnel_ref_get (vxlan_tunnel_ref_db[sw_if_index]);
- gt = gbp_vxlan_tunnel_get (vxr->vxr_parent);
-
- return (gt->gt_sw_if_index);
-}
-
-gbp_itf_hdl_t
-vxlan_gbp_tunnel_lock_itf (u32 sw_if_index)
-{
- ASSERT ((sw_if_index < vec_len (vxlan_tunnel_ref_db)) &&
- (INDEX_INVALID != vxlan_tunnel_ref_db[sw_if_index]));
-
- vxlan_tunnel_ref_t *vxr;
-
- vxr = vxlan_tunnel_ref_get (vxlan_tunnel_ref_db[sw_if_index]);
-
- gbp_itf_lock (vxr->vxr_itf);
-
- return (vxr->vxr_itf);
-}
-
-
-gbp_vxlan_tunnel_type_t
-gbp_vxlan_tunnel_get_type (u32 sw_if_index)
-{
- if (sw_if_index < vec_len (vxlan_tunnel_ref_db) &&
- INDEX_INVALID != vxlan_tunnel_ref_db[sw_if_index])
- {
- return (VXLAN_GBP_TUNNEL);
- }
- else if (sw_if_index < vec_len (gbp_vxlan_tunnel_db) &&
- INDEX_INVALID != gbp_vxlan_tunnel_db[sw_if_index])
- {
- return (GBP_VXLAN_TEMPLATE_TUNNEL);
- }
-
- ASSERT (0);
- return (GBP_VXLAN_TEMPLATE_TUNNEL);
-}
-
-gbp_itf_hdl_t
-gbp_vxlan_tunnel_clone_and_lock (u32 sw_if_index,
- const ip46_address_t * src,
- const ip46_address_t * dst)
-{
- gbp_vxlan_tunnel_t *gt;
- index_t gti;
-
- gti = gbp_vxlan_tunnel_db[sw_if_index];
-
- if (INDEX_INVALID == gti)
- return (GBP_ITF_HDL_INVALID);
-
- gt = pool_elt_at_index (gbp_vxlan_tunnel_pool, gti);
-
- return (gdb_vxlan_dep_add (gt, src, dst));
-}
-
-void
-vxlan_gbp_tunnel_unlock (u32 sw_if_index)
-{
- /* vxlan_tunnel_ref_t *vxr; */
- /* index_t vxri; */
-
- /* vxri = vxlan_tunnel_ref_db[sw_if_index]; */
-
- /* ASSERT (vxri != INDEX_INVALID); */
-
- /* vxr = vxlan_tunnel_ref_get (vxri); */
-
- /* gdb_vxlan_dep_del (vxri); */
-}
-
-void
-gbp_vxlan_walk (gbp_vxlan_cb_t cb, void *ctx)
-{
- gbp_vxlan_tunnel_t *gt;
-
- /* *INDENT-OFF* */
- pool_foreach (gt, gbp_vxlan_tunnel_pool)
- {
- if (WALK_CONTINUE != cb(gt, ctx))
- break;
- }
- /* *INDENT-ON* */
-}
-
-static walk_rc_t
-gbp_vxlan_tunnel_show_one (gbp_vxlan_tunnel_t * gt, void *ctx)
-{
- vlib_cli_output (ctx, "%U", format_gbp_vxlan_tunnel,
- gt - gbp_vxlan_tunnel_pool);
-
- return (WALK_CONTINUE);
-}
-
-static u8 *
-format_gbp_vxlan_tunnel_name (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
-
- return format (s, "gbp-vxlan-%d", dev_instance);
-}
-
-u8 *
-format_gbp_vxlan_tunnel_layer (u8 * s, va_list * args)
-{
- gbp_vxlan_tunnel_layer_t gl = va_arg (*args, gbp_vxlan_tunnel_layer_t);
- s = format (s, "%s", gbp_vxlan_tunnel_layer_strings[gl]);
-
- return (s);
-}
-
-u8 *
-format_gbp_vxlan_tunnel (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- CLIB_UNUSED (int verbose) = va_arg (*args, int);
- gbp_vxlan_tunnel_t *gt = gbp_vxlan_tunnel_get (dev_instance);
- index_t *vxri;
-
- s = format (s, " [%d] gbp-vxlan-tunnel: hw:%d sw:%d vni:%d %U",
- dev_instance, gt->gt_hw_if_index,
- gt->gt_sw_if_index, gt->gt_vni,
- format_gbp_vxlan_tunnel_layer, gt->gt_layer);
- if (GBP_VXLAN_TUN_L2 == gt->gt_layer)
- s = format (s, " BD:%d gbd-index:%d", gt->gt_bd_rd_id, gt->gt_gbd);
- else
- s = format (s, " RD:%d grd-index:%d", gt->gt_bd_rd_id, gt->gt_grd);
-
- s = format (s, " dependents:");
- vec_foreach (vxri, gt->gt_tuns)
- {
- s = format (s, "\n %U, ", format_vxlan_tunnel_ref, *vxri);
- }
-
- return s;
-}
-
-typedef struct gbp_vxlan_tx_trace_t_
-{
- u32 vni;
-} gbp_vxlan_tx_trace_t;
-
-u8 *
-format_gbp_vxlan_tx_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gbp_vxlan_tx_trace_t *t = va_arg (*args, gbp_vxlan_tx_trace_t *);
-
- s = format (s, "GBP-VXLAN: vni:%d", t->vni);
-
- return (s);
-}
-
-clib_error_t *
-gbp_vxlan_interface_admin_up_down (vnet_main_t * vnm,
- u32 hw_if_index, u32 flags)
-{
- vnet_hw_interface_t *hi;
- u32 ti;
-
- hi = vnet_get_hw_interface (vnm, hw_if_index);
-
- if (NULL == gbp_vxlan_tunnel_db ||
- hi->sw_if_index >= vec_len (gbp_vxlan_tunnel_db))
- return (NULL);
-
- ti = gbp_vxlan_tunnel_db[hi->sw_if_index];
-
- if (~0 == ti)
- /* not one of ours */
- return (NULL);
-
- if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
- vnet_hw_interface_set_flags (vnm, hw_if_index,
- VNET_HW_INTERFACE_FLAG_LINK_UP);
- else
- vnet_hw_interface_set_flags (vnm, hw_if_index, 0);
-
- return (NULL);
-}
-
-static uword
-gbp_vxlan_interface_tx (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
-{
- clib_warning ("you shouldn't be here, leaking buffers...");
- return frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VNET_DEVICE_CLASS (gbp_vxlan_device_class) = {
- .name = "GBP VXLAN tunnel-template",
- .format_device_name = format_gbp_vxlan_tunnel_name,
- .format_device = format_gbp_vxlan_tunnel,
- .format_tx_trace = format_gbp_vxlan_tx_trace,
- .admin_up_down_function = gbp_vxlan_interface_admin_up_down,
- .tx_function = gbp_vxlan_interface_tx,
-};
-
-VNET_HW_INTERFACE_CLASS (gbp_vxlan_hw_interface_class) = {
- .name = "GBP-VXLAN",
- .flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
-};
-/* *INDENT-ON* */
-
-int
-gbp_vxlan_tunnel_add (u32 vni, gbp_vxlan_tunnel_layer_t layer,
- u32 bd_rd_id,
- const ip4_address_t * src, u32 * sw_if_indexp)
-{
- gbp_vxlan_tunnel_t *gt;
- index_t gti;
- uword *p;
- int rv;
-
- rv = 0;
- p = hash_get (gv_db, vni);
-
- GBP_VXLAN_TUN_DBG ("add: %d %d %d", vni, layer, bd_rd_id);
-
- if (NULL == p)
- {
- vnet_sw_interface_t *si;
- vnet_hw_interface_t *hi;
- index_t gbi, grdi;
- vnet_main_t *vnm;
-
- gbi = grdi = INDEX_INVALID;
-
- if (layer == GBP_VXLAN_TUN_L2)
- {
- gbi = gbp_bridge_domain_find_and_lock (bd_rd_id);
-
- if (INDEX_INVALID == gbi)
- {
- return (VNET_API_ERROR_BD_NOT_MODIFIABLE);
- }
- }
- else
- {
- grdi = gbp_route_domain_find_and_lock (bd_rd_id);
-
- if (INDEX_INVALID == grdi)
- {
- return (VNET_API_ERROR_NO_SUCH_FIB);
- }
- }
-
- vnm = vnet_get_main ();
- pool_get (gbp_vxlan_tunnel_pool, gt);
- gti = gt - gbp_vxlan_tunnel_pool;
-
- gt->gt_vni = vni;
- gt->gt_layer = layer;
- gt->gt_bd_rd_id = bd_rd_id;
- gt->gt_src.ip4.as_u32 = src->as_u32;
- gt->gt_hw_if_index = vnet_register_interface (vnm,
- gbp_vxlan_device_class.index,
- gti,
- gbp_vxlan_hw_interface_class.index,
- gti);
-
- hi = vnet_get_hw_interface (vnm, gt->gt_hw_if_index);
-
- gt->gt_sw_if_index = hi->sw_if_index;
-
- /* don't flood packets in a BD to these interfaces */
- si = vnet_get_sw_interface (vnm, gt->gt_sw_if_index);
- si->flood_class = VNET_FLOOD_CLASS_NO_FLOOD;
-
- if (layer == GBP_VXLAN_TUN_L2)
- {
- gbp_bridge_domain_t *gb;
-
- gb = gbp_bridge_domain_get (gbi);
-
- gt->gt_gbd = gbi;
- gb->gb_vni = gti;
- /* set it up as a GBP interface */
- gt->gt_itf = gbp_itf_l2_add_and_lock (gt->gt_sw_if_index,
- gt->gt_gbd);
- gbp_itf_l2_set_input_feature (gt->gt_itf, L2INPUT_FEAT_GBP_LEARN);
- }
- else
- {
- gt->gt_grd = grdi;
- gt->gt_itf = gbp_itf_l3_add_and_lock (gt->gt_sw_if_index,
- gt->gt_grd);
- gbp_itf_l3_set_input_feature (gt->gt_itf, GBP_ITF_L3_FEAT_LEARN);
- }
-
- /*
- * save the tunnel by VNI and by sw_if_index
- */
- hash_set (gv_db, vni, gti);
-
- vec_validate_init_empty (gbp_vxlan_tunnel_db,
- gt->gt_sw_if_index, INDEX_INVALID);
- gbp_vxlan_tunnel_db[gt->gt_sw_if_index] = gti;
-
- if (sw_if_indexp)
- *sw_if_indexp = gt->gt_sw_if_index;
-
- vxlan_gbp_register_udp_ports ();
- }
- else
- {
- gti = p[0];
- rv = VNET_API_ERROR_IF_ALREADY_EXISTS;
- }
-
- GBP_VXLAN_TUN_DBG ("add: %U", format_gbp_vxlan_tunnel, gti);
-
- return (rv);
-}
-
-int
-gbp_vxlan_tunnel_del (u32 vni)
-{
- gbp_vxlan_tunnel_t *gt;
- uword *p;
-
- p = hash_get (gv_db, vni);
-
- if (NULL != p)
- {
- vnet_main_t *vnm;
-
- vnm = vnet_get_main ();
- gt = gbp_vxlan_tunnel_get (p[0]);
-
- vxlan_gbp_unregister_udp_ports ();
-
- GBP_VXLAN_TUN_DBG ("del: %U", format_gbp_vxlan_tunnel,
- gt - gbp_vxlan_tunnel_pool);
-
- gbp_endpoint_flush (GBP_ENDPOINT_SRC_DP, gt->gt_sw_if_index);
- ASSERT (0 == vec_len (gt->gt_tuns));
- vec_free (gt->gt_tuns);
-
- gbp_itf_unlock (&gt->gt_itf);
-
- if (GBP_VXLAN_TUN_L2 == gt->gt_layer)
- {
- gbp_bridge_domain_unlock (gt->gt_gbd);
- }
- else
- {
- gbp_route_domain_unlock (gt->gt_grd);
- }
-
- vnet_sw_interface_set_flags (vnm, gt->gt_sw_if_index, 0);
- vnet_delete_hw_interface (vnm, gt->gt_hw_if_index);
-
- hash_unset (gv_db, vni);
- gbp_vxlan_tunnel_db[gt->gt_sw_if_index] = INDEX_INVALID;
-
- pool_put (gbp_vxlan_tunnel_pool, gt);
- }
- else
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-
- return (0);
-}
-
-static clib_error_t *
-gbp_vxlan_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
-
- vlib_cli_output (vm, "GBP-VXLAN Interfaces:");
-
- gbp_vxlan_walk (gbp_vxlan_tunnel_show_one, vm);
-
- return (NULL);
-}
-
-/*?
- * Show Group Based Policy VXLAN tunnels
- *
- * @cliexpar
- * @cliexstart{show gbp vxlan}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (gbp_vxlan_show_node, static) = {
- .path = "show gbp vxlan",
- .short_help = "show gbp vxlan\n",
- .function = gbp_vxlan_show,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-gbp_vxlan_init (vlib_main_t * vm)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
-
- gt_logger = vlib_log_register_class ("gbp", "tun");
-
- punt_hdl = vlib_punt_client_register ("gbp-vxlan");
-
- vlib_punt_register (punt_hdl,
- vxm->punt_no_such_tunnel[FIB_PROTOCOL_IP4],
- "gbp-vxlan4");
-
- return (0);
-}
-
-/* *INDENT-OFF* */
-VLIB_INIT_FUNCTION (gbp_vxlan_init) =
-{
- .runs_after = VLIB_INITS("punt_init", "vxlan_gbp_init"),
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_vxlan.h b/src/plugins/gbp/gbp_vxlan.h
deleted file mode 100644
index 706fe2a0e85..00000000000
--- a/src/plugins/gbp/gbp_vxlan.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef __GBP_VXLAN_H__
-#define __GBP_VXLAN_H__
-
-#include <vnet/fib/fib_types.h>
-#include <plugins/gbp/gbp_itf.h>
-
-#define foreach_gbp_vxlan_tunnel_layer \
- _ (L2, "l2") \
- _ (L3, "l3")
-
-typedef enum gbp_vxlan_tunnel_layer_t_
-{
-#define _(s,n) GBP_VXLAN_TUN_##s,
- foreach_gbp_vxlan_tunnel_layer
-#undef _
-} gbp_vxlan_tunnel_layer_t;
-
-/**
- * GBP VXLAN (template) tunnel.
- * A template tunnel has only a VNI, it does not have src,dst address.
- * As such it cannot be used to send traffic. It is used in the RX path
- * to RX vxlan-gbp packets that do not match an existing tunnel;
- */
-typedef struct gbp_vxlan_tunnel_t_
-{
- u32 gt_hw_if_index;
- u32 gt_sw_if_index;
- u32 gt_vni;
-
- /**
- * The BD or RD value (depending on the layer) that the tunnel is bound to
- */
- u32 gt_bd_rd_id;
- gbp_vxlan_tunnel_layer_t gt_layer;
-
- union
- {
- struct
- {
- /**
- * Reference to the GPB-BD
- */
- index_t gt_gbd;
- };
- struct
- {
- /**
- * References to the GBP-RD
- */
- index_t gt_grd;
- };
- };
-
- /**
- * gbp-itf config for this interface
- */
- gbp_itf_hdl_t gt_itf;
-
- /**
- * list of child vxlan-gbp tunnels built from this template
- */
- index_t *gt_tuns;
-
- /**
- * The source address to use for child tunnels
- */
- ip46_address_t gt_src;
-} gbp_vxlan_tunnel_t;
-
-/**
- * The different types of interfaces that endpoints are learned on
- */
-typedef enum gbp_vxlan_tunnel_type_t_
-{
- /**
- * This is the object type defined above.
- * A template representation of a vxlan-gbp tunnel. from this tunnel
- * type, real vxlan-gbp tunnels are created (by cloning the VNI)
- */
- GBP_VXLAN_TEMPLATE_TUNNEL,
-
- /**
- * A real VXLAN-GBP tunnel (from vnet/vxlan-gbp/...)
- */
- VXLAN_GBP_TUNNEL,
-} gbp_vxlan_tunnel_type_t;
-
-extern int gbp_vxlan_tunnel_add (u32 vni, gbp_vxlan_tunnel_layer_t layer,
- u32 bd_rd_id,
- const ip4_address_t * src,
- u32 * sw_if_indexp);
-extern int gbp_vxlan_tunnel_del (u32 vni);
-
-extern gbp_vxlan_tunnel_type_t gbp_vxlan_tunnel_get_type (u32 sw_if_index);
-
-extern gbp_itf_hdl_t gbp_vxlan_tunnel_clone_and_lock (u32 parent_tunnel,
- const ip46_address_t *
- src,
- const ip46_address_t *
- dst);
-
-extern u32 vxlan_gbp_tunnel_get_parent (u32 sw_if_index);
-extern gbp_itf_hdl_t vxlan_gbp_tunnel_lock_itf (u32 sw_if_index);
-
-typedef walk_rc_t (*gbp_vxlan_cb_t) (gbp_vxlan_tunnel_t * gt, void *ctx);
-extern void gbp_vxlan_walk (gbp_vxlan_cb_t cb, void *ctx);
-
-extern u8 *format_gbp_vxlan_tunnel (u8 * s, va_list * args);
-extern u8 *format_gbp_vxlan_tunnel_layer (u8 * s, va_list * args);
-
-extern gbp_vxlan_tunnel_t *gbp_vxlan_tunnel_get (index_t gti);
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/gbp/gbp_vxlan_node.c b/src/plugins/gbp/gbp_vxlan_node.c
deleted file mode 100644
index 413a9f47e1b..00000000000
--- a/src/plugins/gbp/gbp_vxlan_node.c
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * Copyright (c) 2019 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <plugins/gbp/gbp_vxlan.h>
-#include <plugins/gbp/gbp_itf.h>
-#include <plugins/gbp/gbp_learn.h>
-#include <plugins/gbp/gbp_bridge_domain.h>
-#include <plugins/gbp/gbp_route_domain.h>
-
-#include <vnet/vxlan-gbp/vxlan_gbp.h>
-#include <vlibmemory/api.h>
-#include <vnet/fib/fib_table.h>
-
-extern uword *gv_db;
-
-typedef struct gbp_vxlan_trace_t_
-{
- u8 dropped;
- u32 vni;
- u32 sw_if_index;
- u16 sclass;
- u8 flags;
-} gbp_vxlan_trace_t;
-
-#define foreach_gbp_vxlan_input_next \
- _(DROP, "error-drop") \
- _(L2_INPUT, "l2-input") \
- _(IP4_INPUT, "ip4-input") \
- _(IP6_INPUT, "ip6-input")
-
-typedef enum
-{
-#define _(s,n) GBP_VXLAN_INPUT_NEXT_##s,
- foreach_gbp_vxlan_input_next
-#undef _
- GBP_VXLAN_INPUT_N_NEXT,
-} gbp_vxlan_input_next_t;
-
-
-#define foreach_gbp_vxlan_error \
- _(DECAPPED, "decapped") \
- _(LEARNED, "learned")
-
-typedef enum
-{
-#define _(s,n) GBP_VXLAN_ERROR_##s,
- foreach_gbp_vxlan_error
-#undef _
- GBP_VXLAN_N_ERROR,
-} gbp_vxlan_input_error_t;
-
-static char *gbp_vxlan_error_strings[] = {
-#define _(n,s) s,
- foreach_gbp_vxlan_error
-#undef _
-};
-
-static uword
-gbp_vxlan_decap (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame, u8 is_ip4)
-{
- u32 n_left_to_next, n_left_from, next_index, *to_next, *from;
-
- next_index = 0;
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
-
- while (n_left_from > 0)
- {
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- vxlan_gbp_header_t *vxlan_gbp0;
- gbp_vxlan_input_next_t next0;
- gbp_vxlan_tunnel_t *gt0;
- vlib_buffer_t *b0;
- u32 bi0, vni0;
- uword *p;
-
- bi0 = to_next[0] = from[0];
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
- next0 = GBP_VXLAN_INPUT_NEXT_DROP;
-
- b0 = vlib_get_buffer (vm, bi0);
- vxlan_gbp0 =
- vlib_buffer_get_current (b0) - sizeof (vxlan_gbp_header_t);
-
- vni0 = vxlan_gbp_get_vni (vxlan_gbp0);
- p = hash_get (gv_db, vni0);
-
- if (PREDICT_FALSE (NULL == p))
- {
- gt0 = NULL;
- next0 = GBP_VXLAN_INPUT_NEXT_DROP;
- }
- else
- {
- gt0 = gbp_vxlan_tunnel_get (p[0]);
-
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = gt0->gt_sw_if_index;
-
- if (GBP_VXLAN_TUN_L2 == gt0->gt_layer)
- /*
- * An L2 layer tunnel goes into the BD
- */
- next0 = GBP_VXLAN_INPUT_NEXT_L2_INPUT;
- else
- {
- /*
- * An L3 layer tunnel needs to strip the L2 header
- * an inject into the RD
- */
- ethernet_header_t *e0;
- u16 type0;
-
- e0 = vlib_buffer_get_current (b0);
- type0 = clib_net_to_host_u16 (e0->type);
- switch (type0)
- {
- case ETHERNET_TYPE_IP4:
- next0 = GBP_VXLAN_INPUT_NEXT_IP4_INPUT;
- break;
- case ETHERNET_TYPE_IP6:
- next0 = GBP_VXLAN_INPUT_NEXT_IP6_INPUT;
- break;
- default:
- goto trace;
- }
- vlib_buffer_advance (b0, sizeof (*e0));
- }
- }
-
- trace:
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- gbp_vxlan_trace_t *tr;
-
- tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->dropped = (next0 == GBP_VXLAN_INPUT_NEXT_DROP);
- tr->vni = vni0;
- tr->sw_if_index = (gt0 ? gt0->gt_sw_if_index : ~0);
- tr->flags = vxlan_gbp_get_gpflags (vxlan_gbp0);
- tr->sclass = vxlan_gbp_get_sclass (vxlan_gbp0);
- }
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return from_frame->n_vectors;
-}
-
-VLIB_NODE_FN (gbp_vxlan4_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return gbp_vxlan_decap (vm, node, from_frame, 1);
-}
-
-static u8 *
-format_gbp_vxlan_rx_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- gbp_vxlan_trace_t *t = va_arg (*args, gbp_vxlan_trace_t *);
-
- s = format (s, "vni:%d dropped:%d rx:%d sclass:%d flags:%U",
- t->vni, t->dropped, t->sw_if_index,
- t->sclass, format_vxlan_gbp_header_gpflags, t->flags);
-
- return (s);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (gbp_vxlan4_input_node) =
-{
- .name = "gbp-vxlan4",
- .vector_size = sizeof (u32),
- .n_errors = GBP_VXLAN_N_ERROR,
- .error_strings = gbp_vxlan_error_strings,
- .n_next_nodes = GBP_VXLAN_INPUT_N_NEXT,
- .format_trace = format_gbp_vxlan_rx_trace,
- .next_nodes = {
-#define _(s,n) [GBP_VXLAN_INPUT_NEXT_##s] = n,
- foreach_gbp_vxlan_input_next
-#undef _
- },
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/geneve/decap.c b/src/plugins/geneve/decap.c
index bd189913f71..c64121e2829 100644
--- a/src/plugins/geneve/decap.c
+++ b/src/plugins/geneve/decap.c
@@ -812,7 +812,6 @@ static char *geneve_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (geneve4_input_node) = {
.name = "geneve4-input",
/* Takes a vector of packets. */
@@ -847,7 +846,6 @@ VLIB_REGISTER_NODE (geneve6_input_node) = {
.format_trace = format_geneve_rx_trace,
// $$$$ .unformat_buffer = unformat_geneve_header,
};
-/* *INDENT-ON* */
typedef enum
{
@@ -1252,7 +1250,6 @@ VLIB_NODE_FN (ip4_geneve_bypass_node) (vlib_main_t * vm,
return ip_geneve_bypass_inline (vm, node, frame, /* is_ip4 */ 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_geneve_bypass_node) =
{
.name = "ip4-geneve-bypass",
@@ -1265,7 +1262,6 @@ VLIB_REGISTER_NODE (ip4_geneve_bypass_node) =
.format_buffer = format_ip4_header,
.format_trace = format_ip4_forward_next_trace,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_geneve_bypass_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1274,7 +1270,6 @@ VLIB_NODE_FN (ip6_geneve_bypass_node) (vlib_main_t * vm,
return ip_geneve_bypass_inline (vm, node, frame, /* is_ip4 */ 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_geneve_bypass_node) =
{
.name = "ip6-geneve-bypass",
@@ -1288,7 +1283,6 @@ VLIB_REGISTER_NODE (ip6_geneve_bypass_node) =
.format_buffer = format_ip6_header,
.format_trace = format_ip6_forward_next_trace,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/geneve/encap.c b/src/plugins/geneve/encap.c
index 6a84d0c5490..609da2218cf 100644
--- a/src/plugins/geneve/encap.c
+++ b/src/plugins/geneve/encap.c
@@ -544,7 +544,6 @@ VLIB_NODE_FN (geneve6_encap_node) (vlib_main_t * vm,
return geneve_encap_inline (vm, node, from_frame, /* is_ip4 */ 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (geneve4_encap_node) = {
.name = "geneve4-encap",
.vector_size = sizeof (u32),
@@ -570,7 +569,6 @@ VLIB_REGISTER_NODE (geneve6_encap_node) = {
[GENEVE_ENCAP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/geneve/geneve.c b/src/plugins/geneve/geneve.c
index 62502ef3fde..37b83d01761 100644
--- a/src/plugins/geneve/geneve.c
+++ b/src/plugins/geneve/geneve.c
@@ -114,7 +114,6 @@ geneve_mac_change (vnet_hw_interface_t * hi,
return (NULL);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (geneve_device_class, static) = {
.name = "GENEVE",
.format_device_name = format_geneve_name,
@@ -122,7 +121,6 @@ VNET_DEVICE_CLASS (geneve_device_class, static) = {
.admin_up_down_function = geneve_interface_admin_up_down,
.mac_addr_change_function = geneve_mac_change,
};
-/* *INDENT-ON* */
static u8 *
format_geneve_header_with_length (u8 * s, va_list * args)
@@ -132,13 +130,11 @@ format_geneve_header_with_length (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (geneve_hw_class) = {
.name = "GENEVE",
.format_header = format_geneve_header_with_length,
.build_rewrite = default_build_rewrite,
};
-/* *INDENT-ON* */
static void
geneve_tunnel_restack_dpo (geneve_tunnel_t * t)
@@ -425,18 +421,15 @@ int vnet_geneve_add_del_tunnel
vnet_hw_interface_t *hi;
if (a->l3_mode)
{
+ vnet_eth_interface_registration_t eir = {};
u32 t_idx = t - vxm->tunnels;
u8 address[6] =
{ 0xd0, 0x0b, 0xee, 0xd0, (u8) (t_idx >> 8), (u8) t_idx };
- clib_error_t *error =
- ethernet_register_interface (vnm, geneve_device_class.index,
- t_idx,
- address, &hw_if_index, 0);
- if (error)
- {
- clib_error_report (error);
- return VNET_API_ERROR_INVALID_REGISTRATION;
- }
+
+ eir.dev_class_index = geneve_device_class.index;
+ eir.dev_instance = t_idx;
+ eir.address = address;
+ hw_if_index = vnet_eth_register_interface (vnm, &eir);
}
else
{
@@ -473,7 +466,8 @@ int vnet_geneve_add_del_tunnel
fib_prefix_t tun_remote_pfx;
vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
- fib_prefix_from_ip46_addr (&t->remote, &tun_remote_pfx);
+ fib_protocol_t fp = fib_ip_proto (is_ip6);
+ fib_prefix_from_ip46_addr (fp, &t->remote, &tun_remote_pfx);
if (!ip46_address_is_multicast (&t->remote))
{
/* Unicast tunnel -
@@ -497,8 +491,6 @@ int vnet_geneve_add_del_tunnel
* with different VNIs, create the output fib adjecency only if
* it does not already exist
*/
- fib_protocol_t fp = fib_ip_proto (is_ip6);
-
if (vtep_addr_ref (&vxm->vtep_table,
t->encap_fib_index, &t->remote) == 1)
{
@@ -524,15 +516,16 @@ int vnet_geneve_add_del_tunnel
* - the forwarding interface is for-us
* - the accepting interface is that from the API
*/
- mfib_table_entry_path_update (t->encap_fib_index,
- &mpfx, MFIB_SOURCE_GENEVE, &path);
+ mfib_table_entry_path_update (t->encap_fib_index, &mpfx,
+ MFIB_SOURCE_GENEVE,
+ MFIB_ENTRY_FLAG_NONE, &path);
path.frp_sw_if_index = a->mcast_sw_if_index;
path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
path.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT;
- mfei = mfib_table_entry_path_update (t->encap_fib_index,
- &mpfx,
- MFIB_SOURCE_GENEVE, &path);
+ mfei = mfib_table_entry_path_update (
+ t->encap_fib_index, &mpfx, MFIB_SOURCE_GENEVE,
+ MFIB_ENTRY_FLAG_NONE, &path);
/*
* Create the mcast adjacency to send traffic to the group
@@ -881,7 +874,6 @@ done:
* Example of how to delete a GENEVE Tunnel:
* @cliexcmd{create geneve tunnel local 10.0.3.1 remote 10.0.3.3 vni 13 del}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_geneve_tunnel_command, static) = {
.path = "create geneve tunnel",
.short_help =
@@ -890,7 +882,6 @@ VLIB_CLI_COMMAND (create_geneve_tunnel_command, static) = {
" [encap-vrf-id <nn>] [decap-next [l2|node <name>]] [l3-mode] [del]",
.function = geneve_add_del_tunnel_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_geneve_tunnel_command_fn (vlib_main_t * vm,
@@ -920,13 +911,11 @@ show_geneve_tunnel_command_fn (vlib_main_t * vm,
* [0] local 10.0.3.1 remote 10.0.3.3 vni 13 encap_fib_index 0 sw_if_index 5 decap_next l2
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_geneve_tunnel_command, static) = {
.path = "show geneve tunnel",
.short_help = "show geneve tunnel",
.function = show_geneve_tunnel_command_fn,
};
-/* *INDENT-ON* */
void
@@ -995,7 +984,7 @@ set_ip4_geneve_bypass (vlib_main_t * vm,
/*?
* This command adds the 'ip4-geneve-bypass' graph node for a given interface.
* By adding the IPv4 geneve-bypass graph node to an interface, the node checks
- * for and validate input geneve packet and bypass ip4-lookup, ip4-local,
+ * for and validate input geneve packet and bypass ip4-lookup, ip4-local,
* ip4-udp-lookup nodes to speedup geneve packet forwarding. This node will
* cause extra overhead to for non-geneve packets which is kept at a minimum.
*
@@ -1014,13 +1003,13 @@ set_ip4_geneve_bypass (vlib_main_t * vm,
*
* Example of graph node after ip4-geneve-bypass is enabled:
* @cliexstart{show vlib graph ip4-geneve-bypass}
- * Name Next Previous
- * ip4-geneve-bypass error-drop [0] ip4-input
- * geneve4-input [1] ip4-input-no-checksum
- * ip4-lookup [2]
+ * Name Next Previous
+ * ip4-geneve-bypass error-drop [0] ip4-input
+ * geneve4-input [1] ip4-input-no-checksum
+ * ip4-lookup [2]
* @cliexend
*
- * Example of how to display the feature enabed on an interface:
+ * Example of how to display the feature enabled on an interface:
* @cliexstart{show ip interface features GigabitEthernet2/0/0}
* IP feature paths configured on GigabitEthernet2/0/0...
* ...
@@ -1034,13 +1023,11 @@ set_ip4_geneve_bypass (vlib_main_t * vm,
* @cliexcmd{set interface ip geneve-bypass GigabitEthernet2/0/0 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_geneve_bypass_command, static) = {
.path = "set interface ip geneve-bypass",
.function = set_ip4_geneve_bypass,
.short_help = "set interface ip geneve-bypass <interface> [del]",
};
-/* *INDENT-ON* */
static clib_error_t *
set_ip6_geneve_bypass (vlib_main_t * vm,
@@ -1052,7 +1039,7 @@ set_ip6_geneve_bypass (vlib_main_t * vm,
/*?
* This command adds the 'ip6-geneve-bypass' graph node for a given interface.
* By adding the IPv6 geneve-bypass graph node to an interface, the node checks
- * for and validate input geneve packet and bypass ip6-lookup, ip6-local,
+ * for and validate input geneve packet and bypass ip6-lookup, ip6-local,
* ip6-udp-lookup nodes to speedup geneve packet forwarding. This node will
* cause extra overhead to for non-geneve packets which is kept at a minimum.
*
@@ -1071,13 +1058,13 @@ set_ip6_geneve_bypass (vlib_main_t * vm,
*
* Example of graph node after ip6-geneve-bypass is enabled:
* @cliexstart{show vlib graph ip6-geneve-bypass}
- * Name Next Previous
- * ip6-geneve-bypass error-drop [0] ip6-input
- * geneve6-input [1] ip4-input-no-checksum
- * ip6-lookup [2]
+ * Name Next Previous
+ * ip6-geneve-bypass error-drop [0] ip6-input
+ * geneve6-input [1] ip4-input-no-checksum
+ * ip6-lookup [2]
* @cliexend
*
- * Example of how to display the feature enabed on an interface:
+ * Example of how to display the feature enabled on an interface:
* @cliexstart{show ip interface features GigabitEthernet2/0/0}
* IP feature paths configured on GigabitEthernet2/0/0...
* ...
@@ -1091,13 +1078,11 @@ set_ip6_geneve_bypass (vlib_main_t * vm,
* @cliexcmd{set interface ip6 geneve-bypass GigabitEthernet2/0/0 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip6_geneve_bypass_command, static) = {
.path = "set interface ip6 geneve-bypass",
.function = set_ip6_geneve_bypass,
.short_help = "set interface ip6 geneve-bypass <interface> [del]",
};
-/* *INDENT-ON* */
clib_error_t *
geneve_init (vlib_main_t * vm)
diff --git a/src/plugins/geneve/geneve.h b/src/plugins/geneve/geneve.h
index 0cc14214b9b..deb51abe126 100644
--- a/src/plugins/geneve/geneve.h
+++ b/src/plugins/geneve/geneve.h
@@ -187,9 +187,7 @@ typedef struct
u16 msg_id_base;
/* cache for last 8 geneve tunnel */
-#ifdef CLIB_HAVE_VEC512
vtep4_cache_t vtep4_u512;
-#endif
} geneve_main_t;
diff --git a/src/plugins/geneve/geneve_api.c b/src/plugins/geneve/geneve_api.c
index d35a1bf5dbc..120fab93561 100644
--- a/src/plugins/geneve/geneve_api.c
+++ b/src/plugins/geneve/geneve_api.c
@@ -97,12 +97,10 @@ static void vl_api_geneve_add_del_tunnel_t_handler
rv = vnet_geneve_add_del_tunnel (&a, &sw_if_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_GENEVE_ADD_DEL_TUNNEL_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void vl_api_geneve_add_del_tunnel2_t_handler
@@ -149,12 +147,10 @@ static void vl_api_geneve_add_del_tunnel2_t_handler
rv = vnet_geneve_add_del_tunnel (&a, &sw_if_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_GENEVE_ADD_DEL_TUNNEL2_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void send_geneve_tunnel_details
@@ -201,12 +197,10 @@ static void vl_api_geneve_tunnel_dump_t_handler
if (~0 == sw_if_index)
{
- /* *INDENT-OFF* */
pool_foreach (t, vxm->tunnels)
{
send_geneve_tunnel_details(t, reg, mp->context);
}
- /* *INDENT-ON* */
}
else
{
@@ -238,8 +232,8 @@ geneve_api_hookup (vlib_main_t * vm)
*/
geneve_base_msg_id = setup_message_id_table ();
- am->api_trace_cfg[VL_API_GENEVE_ADD_DEL_TUNNEL + REPLY_MSG_ID_BASE].size +=
- 16 * sizeof (u32);
+ vl_api_increase_msg_trace_size (
+ am, VL_API_GENEVE_ADD_DEL_TUNNEL + REPLY_MSG_ID_BASE, 16 * sizeof (u32));
return 0;
}
@@ -249,12 +243,10 @@ VLIB_API_INIT_FUNCTION (geneve_api_hookup);
#include <vlib/unix/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "GENEVE Tunnels",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/geneve/geneve_test.c b/src/plugins/geneve/geneve_test.c
index ad6d3296ef6..e777e9b998e 100644
--- a/src/plugins/geneve/geneve_test.c
+++ b/src/plugins/geneve/geneve_test.c
@@ -26,7 +26,7 @@
#include <vnet/format_fns.h>
#include <geneve/geneve.api_enum.h>
#include <geneve/geneve.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
@@ -41,13 +41,11 @@ geneve_test_main_t geneve_test_main;
#define __plugin_msg_base geneve_test_main.msg_id_base
#include <vlibapi/vat_helper_macros.h>
-/* Macro to finish up custom dump fns */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
static void vl_api_geneve_add_del_tunnel_reply_t_handler
(vl_api_geneve_add_del_tunnel_reply_t * mp)
diff --git a/src/plugins/l2e/CMakeLists.txt b/src/plugins/gre/CMakeLists.txt
index 2bfb05a43e6..60fe540b968 100644
--- a/src/plugins/l2e/CMakeLists.txt
+++ b/src/plugins/gre/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2018 Cisco and/or its affiliates.
+# Copyright (c) 2023 Cisco and/or its affiliates
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at:
@@ -11,18 +11,25 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-add_vpp_plugin(l2e
+add_vpp_plugin(gre
SOURCES
- l2e_node.c
- l2e_api.c
- l2e.c
+ gre.c
+ node.c
+ gre_api.c
+ interface.c
+ pg.c
+ plugin.c
MULTIARCH_SOURCES
- l2e_node.c
+ node.c
+ gre.c
+
+ INSTALL_HEADERS
+ gre.h
+ error.def
API_FILES
- l2e.api
+ gre.api
- INSTALL_HEADERS
- l2e.h
)
+
diff --git a/src/vnet/gre/FEATURE.yaml b/src/plugins/gre/FEATURE.yaml
index 4b35b870dc3..4b35b870dc3 100644
--- a/src/vnet/gre/FEATURE.yaml
+++ b/src/plugins/gre/FEATURE.yaml
diff --git a/src/vnet/gre/error.def b/src/plugins/gre/error.def
index 161ecc1d874..161ecc1d874 100644
--- a/src/vnet/gre/error.def
+++ b/src/plugins/gre/error.def
diff --git a/src/vnet/gre/gre.api b/src/plugins/gre/gre.api
index 9c69ba4007d..9c69ba4007d 100644
--- a/src/vnet/gre/gre.api
+++ b/src/plugins/gre/gre.api
diff --git a/src/vnet/gre/gre.c b/src/plugins/gre/gre.c
index fcdf9c0d6bc..ce11ee9ecb2 100644
--- a/src/vnet/gre/gre.c
+++ b/src/plugins/gre/gre.c
@@ -16,9 +16,11 @@
*/
#include <vnet/vnet.h>
-#include <vnet/gre/gre.h>
+#include <gre/gre.h>
#include <vnet/adj/adj_midchain.h>
#include <vnet/tunnel/tunnel_dp.h>
+#include <vpp/app/version.h>
+#include <vnet/plugin/plugin.h>
extern gre_main_t gre_main;
@@ -44,7 +46,6 @@ typedef struct
} ip6_and_gre_union_t;
#endif /* CLIB_MARCH_VARIANT */
-
/* Packet trace structure */
typedef struct
{
@@ -59,25 +60,24 @@ typedef struct
ip46_address_t dst;
} gre_tx_trace_t;
-extern u8 *format_gre_tx_trace (u8 * s, va_list * args);
+extern u8 *format_gre_tx_trace (u8 *s, va_list *args);
#ifndef CLIB_MARCH_VARIANT
u8 *
-format_gre_tx_trace (u8 * s, va_list * args)
+format_gre_tx_trace (u8 *s, va_list *args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
gre_tx_trace_t *t = va_arg (*args, gre_tx_trace_t *);
- s = format (s, "GRE: tunnel %d len %d src %U dst %U",
- t->tunnel_id, t->length,
- format_ip46_address, &t->src, IP46_TYPE_ANY,
+ s = format (s, "GRE: tunnel %d len %d src %U dst %U", t->tunnel_id,
+ t->length, format_ip46_address, &t->src, IP46_TYPE_ANY,
format_ip46_address, &t->dst, IP46_TYPE_ANY);
return s;
}
u8 *
-format_gre_protocol (u8 * s, va_list * args)
+format_gre_protocol (u8 *s, va_list *args)
{
gre_protocol_t p = va_arg (*args, u32);
gre_main_t *gm = &gre_main;
@@ -92,7 +92,7 @@ format_gre_protocol (u8 * s, va_list * args)
}
u8 *
-format_gre_header_with_length (u8 * s, va_list * args)
+format_gre_header_with_length (u8 *s, va_list *args)
{
gre_main_t *gm = &gre_main;
gre_header_t *h = va_arg (*args, gre_header_t *);
@@ -113,17 +113,16 @@ format_gre_header_with_length (u8 * s, va_list * args)
gre_protocol_info_t *pi = gre_get_protocol_info (gm, p);
vlib_node_t *node = vlib_get_node (gm->vlib_main, pi->node_index);
if (node->format_buffer)
- s = format (s, "\n%U%U",
- format_white_space, indent,
- node->format_buffer, (void *) (h + 1),
- max_header_bytes - header_bytes);
+ s =
+ format (s, "\n%U%U", format_white_space, indent, node->format_buffer,
+ (void *) (h + 1), max_header_bytes - header_bytes);
}
return s;
}
u8 *
-format_gre_header (u8 * s, va_list * args)
+format_gre_header (u8 *s, va_list *args)
{
gre_header_t *h = va_arg (*args, gre_header_t *);
return format (s, "%U", format_gre_header_with_length, h, 0);
@@ -131,8 +130,7 @@ format_gre_header (u8 * s, va_list * args)
/* Returns gre protocol as an int in host byte order. */
uword
-unformat_gre_protocol_host_byte_order (unformat_input_t * input,
- va_list * args)
+unformat_gre_protocol_host_byte_order (unformat_input_t *input, va_list *args)
{
u16 *result = va_arg (*args, u16 *);
gre_main_t *gm = &gre_main;
@@ -151,18 +149,17 @@ unformat_gre_protocol_host_byte_order (unformat_input_t * input,
}
uword
-unformat_gre_protocol_net_byte_order (unformat_input_t * input,
- va_list * args)
+unformat_gre_protocol_net_byte_order (unformat_input_t *input, va_list *args)
{
u16 *result = va_arg (*args, u16 *);
if (!unformat_user (input, unformat_gre_protocol_host_byte_order, result))
return 0;
- *result = clib_host_to_net_u16 ((u16) * result);
+ *result = clib_host_to_net_u16 ((u16) *result);
return 1;
}
uword
-unformat_gre_header (unformat_input_t * input, va_list * args)
+unformat_gre_header (unformat_input_t *input, va_list *args)
{
u8 **result = va_arg (*args, u8 **);
gre_header_t _h, *h = &_h;
@@ -209,9 +206,8 @@ gre_proto_from_vnet_link (vnet_link_t link)
}
static u8 *
-gre_build_rewrite (vnet_main_t * vnm,
- u32 sw_if_index,
- vnet_link_t link_type, const void *dst_address)
+gre_build_rewrite (vnet_main_t *vnm, u32 sw_if_index, vnet_link_t link_type,
+ const void *dst_address)
{
gre_main_t *gm = &gre_main;
const ip46_address_t *dst;
@@ -276,8 +272,8 @@ gre_build_rewrite (vnet_main_t * vnm,
}
static void
-gre44_fixup (vlib_main_t * vm,
- const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
+gre44_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b0,
+ const void *data)
{
tunnel_encap_decap_flags_t flags;
ip4_and_gre_header_t *ip0;
@@ -294,8 +290,8 @@ gre44_fixup (vlib_main_t * vm,
}
static void
-gre64_fixup (vlib_main_t * vm,
- const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
+gre64_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b0,
+ const void *data)
{
tunnel_encap_decap_flags_t flags;
ip4_and_gre_header_t *ip0;
@@ -312,8 +308,8 @@ gre64_fixup (vlib_main_t * vm,
}
static void
-grex4_fixup (vlib_main_t * vm,
- const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
+grex4_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b0,
+ const void *data)
{
ip4_header_t *ip0;
@@ -326,8 +322,8 @@ grex4_fixup (vlib_main_t * vm,
}
static void
-gre46_fixup (vlib_main_t * vm,
- const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
+gre46_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b0,
+ const void *data)
{
tunnel_encap_decap_flags_t flags;
ip6_and_gre_header_t *ip0;
@@ -337,15 +333,14 @@ gre46_fixup (vlib_main_t * vm,
/* Fixup the payload length field in the GRE tunnel encap that was applied
* at the midchain node */
- ip0->ip6.payload_length =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
- sizeof (ip0->ip6));
+ ip0->ip6.payload_length = clib_host_to_net_u16 (
+ vlib_buffer_length_in_chain (vm, b0) - sizeof (ip0->ip6));
tunnel_encap_fixup_4o6 (flags, b0, (ip4_header_t *) (ip0 + 1), &ip0->ip6);
}
static void
-gre66_fixup (vlib_main_t * vm,
- const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
+gre66_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b0,
+ const void *data)
{
tunnel_encap_decap_flags_t flags;
ip6_and_gre_header_t *ip0;
@@ -355,15 +350,14 @@ gre66_fixup (vlib_main_t * vm,
/* Fixup the payload length field in the GRE tunnel encap that was applied
* at the midchain node */
- ip0->ip6.payload_length =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
- sizeof (ip0->ip6));
+ ip0->ip6.payload_length = clib_host_to_net_u16 (
+ vlib_buffer_length_in_chain (vm, b0) - sizeof (ip0->ip6));
tunnel_encap_fixup_6o6 (flags, (ip6_header_t *) (ip0 + 1), &ip0->ip6);
}
static void
-grex6_fixup (vlib_main_t * vm,
- const ip_adjacency_t * adj, vlib_buffer_t * b0, const void *data)
+grex6_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b0,
+ const void *data)
{
ip6_and_gre_header_t *ip0;
@@ -371,9 +365,8 @@ grex6_fixup (vlib_main_t * vm,
/* Fixup the payload length field in the GRE tunnel encap that was applied
* at the midchain node */
- ip0->ip6.payload_length =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
- sizeof (ip0->ip6));
+ ip0->ip6.payload_length = clib_host_to_net_u16 (
+ vlib_buffer_length_in_chain (vm, b0) - sizeof (ip0->ip6));
}
/**
@@ -401,7 +394,7 @@ gre_get_fixup (fib_protocol_t fproto, vnet_link_t lt)
}
void
-gre_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
+gre_update_adj (vnet_main_t *vnm, u32 sw_if_index, adj_index_t ai)
{
gre_main_t *gm = &gre_main;
gre_tunnel_t *t;
@@ -420,15 +413,11 @@ gre_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
if (!(t->flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_INNER_HASH))
af |= ADJ_FLAG_MIDCHAIN_IP_STACK;
- if (VNET_LINK_ETHERNET == adj_get_link_type (ai))
- af |= ADJ_FLAG_MIDCHAIN_NO_COUNT;
-
- adj_nbr_midchain_update_rewrite
- (ai, gre_get_fixup (t->tunnel_dst.fp_proto,
- adj_get_link_type (ai)),
- uword_to_pointer (t->flags, void *), af,
- gre_build_rewrite (vnm, sw_if_index, adj_get_link_type (ai),
- &t->tunnel_dst.fp_addr));
+ adj_nbr_midchain_update_rewrite (
+ ai, gre_get_fixup (t->tunnel_dst.fp_proto, adj_get_link_type (ai)),
+ uword_to_pointer (t->flags, void *), af,
+ gre_build_rewrite (vnm, sw_if_index, adj_get_link_type (ai),
+ &t->tunnel_dst.fp_addr));
gre_tunnel_stack (ai);
}
@@ -449,15 +438,12 @@ mgre_mk_complete_walk (adj_index_t ai, void *data)
if (!(ctx->t->flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_INNER_HASH))
af |= ADJ_FLAG_MIDCHAIN_IP_STACK;
- adj_nbr_midchain_update_rewrite
- (ai, gre_get_fixup (ctx->t->tunnel_dst.fp_proto,
- adj_get_link_type (ai)),
- uword_to_pointer (ctx->t->flags, void *),
- af,
- gre_build_rewrite (vnet_get_main (),
- ctx->t->sw_if_index,
- adj_get_link_type (ai),
- &teib_entry_get_nh (ctx->ne)->fp_addr));
+ adj_nbr_midchain_update_rewrite (
+ ai, gre_get_fixup (ctx->t->tunnel_dst.fp_proto, adj_get_link_type (ai)),
+ uword_to_pointer (ctx->t->flags, void *), af,
+ gre_build_rewrite (vnet_get_main (), ctx->t->sw_if_index,
+ adj_get_link_type (ai),
+ &teib_entry_get_nh (ctx->ne)->fp_addr));
teib_entry_adj_stack (ctx->ne, ai);
@@ -469,9 +455,9 @@ mgre_mk_incomplete_walk (adj_index_t ai, void *data)
{
gre_tunnel_t *t = data;
- adj_nbr_midchain_update_rewrite (ai, gre_get_fixup (t->tunnel_dst.fp_proto,
- adj_get_link_type (ai)),
- NULL, ADJ_FLAG_NONE, NULL);
+ adj_nbr_midchain_update_rewrite (
+ ai, gre_get_fixup (t->tunnel_dst.fp_proto, adj_get_link_type (ai)), NULL,
+ ADJ_FLAG_NONE, NULL);
adj_midchain_delegate_unstack (ai);
@@ -479,7 +465,7 @@ mgre_mk_incomplete_walk (adj_index_t ai, void *data)
}
void
-mgre_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
+mgre_update_adj (vnet_main_t *vnm, u32 sw_if_index, adj_index_t ai)
{
gre_main_t *gm = &gre_main;
ip_adjacency_t *adj;
@@ -491,8 +477,8 @@ mgre_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
ti = gm->tunnel_index_by_sw_if_index[sw_if_index];
t = pool_elt_at_index (gm->tunnels, ti);
- ne = teib_entry_find_46 (sw_if_index,
- adj->ia_nh_proto, &adj->sub_type.nbr.next_hop);
+ ne = teib_entry_find_46 (sw_if_index, adj->ia_nh_proto,
+ &adj->sub_type.nbr.next_hop);
if (NULL == ne)
{
@@ -503,13 +489,9 @@ mgre_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
return;
}
- mgre_walk_ctx_t ctx = {
- .t = t,
- .ne = ne
- };
- adj_nbr_walk_nh (sw_if_index,
- adj->ia_nh_proto,
- &adj->sub_type.nbr.next_hop, mgre_mk_complete_walk, &ctx);
+ mgre_walk_ctx_t ctx = { .t = t, .ne = ne };
+ adj_nbr_walk_nh (sw_if_index, adj->ia_nh_proto, &adj->sub_type.nbr.next_hop,
+ mgre_mk_complete_walk, &ctx);
}
#endif /* CLIB_MARCH_VARIANT */
@@ -524,9 +506,8 @@ typedef enum
* L3 traffic uses the adj-midchains.
*/
static_always_inline u32
-gre_encap_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, gre_tunnel_type_t type)
+gre_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, gre_tunnel_type_t type)
{
gre_main_t *gm = &gre_main;
u32 *from, n_left_from;
@@ -542,8 +523,8 @@ gre_encap_inline (vlib_main_t * vm,
while (n_left_from >= 2)
{
- if (PREDICT_FALSE
- (sw_if_index[0] != vnet_buffer (b[0])->sw_if_index[VLIB_TX]))
+ if (PREDICT_FALSE (sw_if_index[0] !=
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX]))
{
const vnet_hw_interface_t *hi;
sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
@@ -551,8 +532,8 @@ gre_encap_inline (vlib_main_t * vm,
gt[0] = &gm->tunnels[hi->dev_instance];
adj_index[0] = gt[0]->l2_adj_index;
}
- if (PREDICT_FALSE
- (sw_if_index[1] != vnet_buffer (b[1])->sw_if_index[VLIB_TX]))
+ if (PREDICT_FALSE (sw_if_index[1] !=
+ vnet_buffer (b[1])->sw_if_index[VLIB_TX]))
{
const vnet_hw_interface_t *hi;
sw_if_index[1] = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
@@ -595,8 +576,7 @@ gre_encap_inline (vlib_main_t * vm,
if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
{
- gre_tx_trace_t *tr = vlib_add_trace (vm, node,
- b[0], sizeof (*tr));
+ gre_tx_trace_t *tr = vlib_add_trace (vm, node, b[0], sizeof (*tr));
tr->tunnel_id = gt[0] - gm->tunnels;
tr->src = gt[0]->tunnel_src;
tr->dst = gt[0]->tunnel_dst.fp_addr;
@@ -604,8 +584,7 @@ gre_encap_inline (vlib_main_t * vm,
}
if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
{
- gre_tx_trace_t *tr = vlib_add_trace (vm, node,
- b[1], sizeof (*tr));
+ gre_tx_trace_t *tr = vlib_add_trace (vm, node, b[1], sizeof (*tr));
tr->tunnel_id = gt[1] - gm->tunnels;
tr->src = gt[1]->tunnel_src;
tr->dst = gt[1]->tunnel_dst.fp_addr;
@@ -619,8 +598,8 @@ gre_encap_inline (vlib_main_t * vm,
while (n_left_from >= 1)
{
- if (PREDICT_FALSE
- (sw_if_index[0] != vnet_buffer (b[0])->sw_if_index[VLIB_TX]))
+ if (PREDICT_FALSE (sw_if_index[0] !=
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX]))
{
const vnet_hw_interface_t *hi;
sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
@@ -649,8 +628,7 @@ gre_encap_inline (vlib_main_t * vm,
if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
{
- gre_tx_trace_t *tr = vlib_add_trace (vm, node,
- b[0], sizeof (*tr));
+ gre_tx_trace_t *tr = vlib_add_trace (vm, node, b[0], sizeof (*tr));
tr->tunnel_id = gt[0] - gm->tunnels;
tr->src = gt[0]->tunnel_src;
tr->dst = gt[0]->tunnel_dst.fp_addr;
@@ -661,37 +639,33 @@ gre_encap_inline (vlib_main_t * vm,
n_left_from -= 1;
}
- vlib_buffer_enqueue_to_single_next (vm, node, from,
- GRE_ENCAP_NEXT_L2_MIDCHAIN,
- frame->n_vectors);
+ vlib_buffer_enqueue_to_single_next (
+ vm, node, from, GRE_ENCAP_NEXT_L2_MIDCHAIN, frame->n_vectors);
- vlib_node_increment_counter (vm, node->node_index,
- GRE_ERROR_PKTS_ENCAP, frame->n_vectors);
+ vlib_node_increment_counter (vm, node->node_index, GRE_ERROR_PKTS_ENCAP,
+ frame->n_vectors);
return frame->n_vectors;
}
static char *gre_error_strings[] = {
-#define gre_error(n,s) s,
+#define gre_error(n, s) s,
#include "error.def"
#undef gre_error
};
-VLIB_NODE_FN (gre_teb_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (gre_teb_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
return (gre_encap_inline (vm, node, frame, GRE_TUNNEL_TYPE_TEB));
}
-VLIB_NODE_FN (gre_erspan_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (gre_erspan_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
return (gre_encap_inline (vm, node, frame, GRE_TUNNEL_TYPE_ERSPAN));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (gre_teb_encap_node) =
{
.name = "gre-teb-encap",
@@ -718,11 +692,10 @@ VLIB_REGISTER_NODE (gre_erspan_encap_node) =
[GRE_ENCAP_NEXT_L2_MIDCHAIN] = "adj-l2-midchain",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
static u8 *
-format_gre_tunnel_name (u8 * s, va_list * args)
+format_gre_tunnel_name (u8 *s, va_list *args)
{
u32 dev_instance = va_arg (*args, u32);
gre_main_t *gm = &gre_main;
@@ -736,7 +709,7 @@ format_gre_tunnel_name (u8 * s, va_list * args)
}
static u8 *
-format_gre_device (u8 * s, va_list * args)
+format_gre_device (u8 *s, va_list *args)
{
u32 dev_instance = va_arg (*args, u32);
CLIB_UNUSED (int verbose) = va_arg (*args, int);
@@ -746,8 +719,8 @@ format_gre_device (u8 * s, va_list * args)
}
static int
-gre_tunnel_desc (u32 sw_if_index,
- ip46_address_t * src, ip46_address_t * dst, u8 * is_l2)
+gre_tunnel_desc (u32 sw_if_index, ip46_address_t *src, ip46_address_t *dst,
+ u8 *is_l2)
{
gre_main_t *gm = &gre_main;
gre_tunnel_t *t;
@@ -768,7 +741,6 @@ gre_tunnel_desc (u32 sw_if_index,
return (0);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (gre_device_class) = {
.name = "GRE tunnel device",
.format_device_name = format_gre_tunnel_name,
@@ -779,7 +751,8 @@ VNET_DEVICE_CLASS (gre_device_class) = {
#ifdef SOON
.clear counter = 0;
#endif
-};
+}
+;
VNET_HW_INTERFACE_CLASS (gre_hw_interface_class) = {
.name = "GRE",
@@ -798,11 +771,10 @@ VNET_HW_INTERFACE_CLASS (mgre_hw_interface_class) = {
.update_adjacency = mgre_update_adj,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA,
};
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
static void
-add_protocol (gre_main_t * gm, gre_protocol_t protocol, char *protocol_name)
+add_protocol (gre_main_t *gm, gre_protocol_t protocol, char *protocol_name)
{
gre_protocol_info_t *pi;
u32 i;
@@ -819,7 +791,7 @@ add_protocol (gre_main_t * gm, gre_protocol_t protocol, char *protocol_name)
}
static clib_error_t *
-gre_init (vlib_main_t * vm)
+gre_init (vlib_main_t *vm)
{
gre_main_t *gm = &gre_main;
clib_error_t *error;
@@ -853,7 +825,7 @@ gre_init (vlib_main_t * vm)
gm->seq_num_by_key =
hash_create_mem (0, sizeof (gre_sn_key_t), sizeof (uword));
-#define _(n,s) add_protocol (gm, GRE_PROTOCOL_##s, #s);
+#define _(n, s) add_protocol (gm, GRE_PROTOCOL_##s, #s);
foreach_gre_protocol
#undef _
return vlib_call_init_function (vm, gre_input_init);
diff --git a/src/vnet/gre/gre.h b/src/plugins/gre/gre.h
index ea085bf0fa1..ce57454f9b7 100644
--- a/src/vnet/gre/gre.h
+++ b/src/plugins/gre/gre.h
@@ -32,7 +32,7 @@ extern vnet_hw_interface_class_t mgre_hw_interface_class;
typedef enum
{
#define gre_error(n,s) GRE_ERROR_##n,
-#include <vnet/gre/error.def>
+#include <gre/error.def>
#undef gre_error
GRE_N_ERROR,
} gre_error_t;
@@ -293,22 +293,18 @@ typedef struct
/**
* @brief IPv4 and GRE header.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip4_header_t ip4;
gre_header_t gre;
}) ip4_and_gre_header_t;
-/* *INDENT-ON* */
/**
* @brief IPv6 and GRE header.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip6_header_t ip6;
gre_header_t gre;
}) ip6_and_gre_header_t;
-/* *INDENT-ON* */
always_inline gre_protocol_info_t *
gre_get_protocol_info (gre_main_t * em, gre_protocol_t protocol)
diff --git a/src/vnet/gre/gre_api.c b/src/plugins/gre/gre_api.c
index e83635a7820..5149f92fb80 100644
--- a/src/vnet/gre/gre_api.c
+++ b/src/plugins/gre/gre_api.c
@@ -23,26 +23,26 @@
#include <vnet/interface.h>
#include <vnet/api_errno.h>
-#include <vnet/gre/gre.h>
+#include <gre/gre.h>
#include <vnet/fib/fib_table.h>
#include <vnet/tunnel/tunnel_types_api.h>
#include <vnet/ip/ip_types_api.h>
-#include <vnet/gre/gre.api_enum.h>
-#include <vnet/gre/gre.api_types.h>
+#include <gre/gre.api_enum.h>
+#include <gre/gre.api_types.h>
#define REPLY_MSG_ID_BASE gre_main.msg_id_base
#include <vlibapi/api_helper_macros.h>
static int
-gre_tunnel_type_decode (vl_api_gre_tunnel_type_t in, gre_tunnel_type_t * out)
+gre_tunnel_type_decode (vl_api_gre_tunnel_type_t in, gre_tunnel_type_t *out)
{
switch (in)
{
-#define _(n, v) \
- case GRE_API_TUNNEL_TYPE_##n: \
- *out = GRE_TUNNEL_TYPE_##n; \
- return (0);
+#define _(n, v) \
+ case GRE_API_TUNNEL_TYPE_##n: \
+ *out = GRE_TUNNEL_TYPE_##n; \
+ return (0);
foreach_gre_tunnel_type
#undef _
}
@@ -57,10 +57,10 @@ gre_tunnel_type_encode (gre_tunnel_type_t in)
switch (in)
{
-#define _(n, v) \
- case GRE_TUNNEL_TYPE_##n: \
- out = GRE_API_TUNNEL_TYPE_##n; \
- break;
+#define _(n, v) \
+ case GRE_TUNNEL_TYPE_##n: \
+ out = GRE_API_TUNNEL_TYPE_##n; \
+ break;
foreach_gre_tunnel_type
#undef _
}
@@ -68,10 +68,10 @@ gre_tunnel_type_encode (gre_tunnel_type_t in)
return (out);
}
-static void vl_api_gre_tunnel_add_del_t_handler
- (vl_api_gre_tunnel_add_del_t * mp)
+static void
+vl_api_gre_tunnel_add_del_t_handler (vl_api_gre_tunnel_add_del_t *mp)
{
- vnet_gre_tunnel_add_del_args_t _a = { }, *a = &_a;
+ vnet_gre_tunnel_add_del_args_t _a = {}, *a = &_a;
vl_api_gre_tunnel_add_del_reply_t *rmp;
tunnel_encap_decap_flags_t flags;
u32 sw_if_index = ~0;
@@ -118,41 +118,35 @@ static void vl_api_gre_tunnel_add_del_t_handler
rv = vnet_gre_tunnel_add_del (a, &sw_if_index);
out:
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_GRE_TUNNEL_ADD_DEL_REPLY,
- ({
- rmp->sw_if_index = ntohl (sw_if_index);
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO2 (VL_API_GRE_TUNNEL_ADD_DEL_REPLY,
+ ({ rmp->sw_if_index = ntohl (sw_if_index); }));
}
-static void send_gre_tunnel_details
- (gre_tunnel_t * t, vl_api_gre_tunnel_dump_t * mp)
+static void
+send_gre_tunnel_details (gre_tunnel_t *t, vl_api_gre_tunnel_dump_t *mp)
{
vl_api_gre_tunnel_details_t *rmp;
- int rv = 0;
- /* *INDENT-OFF* */
- REPLY_MACRO_DETAILS2(VL_API_GRE_TUNNEL_DETAILS,
- ({
- ip_address_encode (&t->tunnel_src, IP46_TYPE_ANY, &rmp->tunnel.src);
- ip_address_encode (&t->tunnel_dst.fp_addr, IP46_TYPE_ANY, &rmp->tunnel.dst);
-
- rmp->tunnel.outer_table_id =
- htonl (fib_table_get_table_id
- (t->outer_fib_index, t->tunnel_dst.fp_proto));
-
- rmp->tunnel.type = gre_tunnel_type_encode (t->type);
- rmp->tunnel.mode = tunnel_mode_encode (t->mode);
- rmp->tunnel.instance = htonl (t->user_instance);
- rmp->tunnel.sw_if_index = htonl (t->sw_if_index);
- rmp->tunnel.session_id = htons (t->session_id);
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO_DETAILS2 (
+ VL_API_GRE_TUNNEL_DETAILS, ({
+ ip_address_encode (&t->tunnel_src, IP46_TYPE_ANY, &rmp->tunnel.src);
+ ip_address_encode (&t->tunnel_dst.fp_addr, IP46_TYPE_ANY,
+ &rmp->tunnel.dst);
+
+ rmp->tunnel.outer_table_id = htonl (
+ fib_table_get_table_id (t->outer_fib_index, t->tunnel_dst.fp_proto));
+
+ rmp->tunnel.type = gre_tunnel_type_encode (t->type);
+ rmp->tunnel.mode = tunnel_mode_encode (t->mode);
+ rmp->tunnel.flags = tunnel_encap_decap_flags_encode (t->flags);
+ rmp->tunnel.instance = htonl (t->user_instance);
+ rmp->tunnel.sw_if_index = htonl (t->sw_if_index);
+ rmp->tunnel.session_id = htons (t->session_id);
+ }));
}
static void
-vl_api_gre_tunnel_dump_t_handler (vl_api_gre_tunnel_dump_t * mp)
+vl_api_gre_tunnel_dump_t_handler (vl_api_gre_tunnel_dump_t *mp)
{
vl_api_registration_t *reg;
gre_main_t *gm = &gre_main;
@@ -167,12 +161,10 @@ vl_api_gre_tunnel_dump_t_handler (vl_api_gre_tunnel_dump_t * mp)
if (~0 == sw_if_index)
{
- /* *INDENT-OFF* */
pool_foreach (t, gm->tunnels)
- {
- send_gre_tunnel_details(t, mp);
- }
- /* *INDENT-ON* */
+ {
+ send_gre_tunnel_details (t, mp);
+ }
}
else
@@ -196,10 +188,10 @@ vl_api_gre_tunnel_dump_t_handler (vl_api_gre_tunnel_dump_t * mp)
*/
/* API definitions */
#include <vnet/format_fns.h>
-#include <vnet/gre/gre.api.c>
+#include <gre/gre.api.c>
static clib_error_t *
-gre_api_hookup (vlib_main_t * vm)
+gre_api_hookup (vlib_main_t *vm)
{
/*
* Set up the (msg_name, crc, message-id) table
diff --git a/src/vnet/gre/interface.c b/src/plugins/gre/interface.c
index f2c679cb9b5..bd9a6078502 100644
--- a/src/vnet/gre/interface.c
+++ b/src/plugins/gre/interface.c
@@ -16,7 +16,7 @@
*/
#include <vnet/vnet.h>
-#include <vnet/gre/gre.h>
+#include <gre/gre.h>
#include <vnet/ip/format.h>
#include <vnet/fib/fib_table.h>
#include <vnet/adj/adj_midchain.h>
@@ -26,15 +26,16 @@
#include <vnet/teib/teib.h>
u8 *
-format_gre_tunnel_type (u8 * s, va_list * args)
+format_gre_tunnel_type (u8 *s, va_list *args)
{
gre_tunnel_type_t type = va_arg (*args, int);
switch (type)
{
-#define _(n, v) case GRE_TUNNEL_TYPE_##n: \
- s = format (s, "%s", v); \
- break;
+#define _(n, v) \
+ case GRE_TUNNEL_TYPE_##n: \
+ s = format (s, "%s", v); \
+ break;
foreach_gre_tunnel_type
#undef _
}
@@ -43,15 +44,15 @@ format_gre_tunnel_type (u8 * s, va_list * args)
}
static u8 *
-format_gre_tunnel (u8 * s, va_list * args)
+format_gre_tunnel (u8 *s, va_list *args)
{
gre_tunnel_t *t = va_arg (*args, gre_tunnel_t *);
s = format (s, "[%d] instance %d src %U dst %U fib-idx %d sw-if-idx %d ",
- t->dev_instance, t->user_instance,
- format_ip46_address, &t->tunnel_src, IP46_TYPE_ANY,
- format_ip46_address, &t->tunnel_dst.fp_addr, IP46_TYPE_ANY,
- t->outer_fib_index, t->sw_if_index);
+ t->dev_instance, t->user_instance, format_ip46_address,
+ &t->tunnel_src, IP46_TYPE_ANY, format_ip46_address,
+ &t->tunnel_dst.fp_addr, IP46_TYPE_ANY, t->outer_fib_index,
+ t->sw_if_index);
s = format (s, "payload %U ", format_gre_tunnel_type, t->type);
s = format (s, "%U ", format_tunnel_mode, t->mode);
@@ -66,22 +67,22 @@ format_gre_tunnel (u8 * s, va_list * args)
}
static gre_tunnel_t *
-gre_tunnel_db_find (const vnet_gre_tunnel_add_del_args_t * a,
- u32 outer_fib_index, gre_tunnel_key_t * key)
+gre_tunnel_db_find (const vnet_gre_tunnel_add_del_args_t *a,
+ u32 outer_fib_index, gre_tunnel_key_t *key)
{
gre_main_t *gm = &gre_main;
uword *p;
if (!a->is_ipv6)
{
- gre_mk_key4 (a->src.ip4, a->dst.ip4, outer_fib_index,
- a->type, a->mode, a->session_id, &key->gtk_v4);
+ gre_mk_key4 (a->src.ip4, a->dst.ip4, outer_fib_index, a->type, a->mode,
+ a->session_id, &key->gtk_v4);
p = hash_get_mem (gm->tunnel_by_key4, &key->gtk_v4);
}
else
{
- gre_mk_key6 (&a->src.ip6, &a->dst.ip6, outer_fib_index,
- a->type, a->mode, a->session_id, &key->gtk_v6);
+ gre_mk_key6 (&a->src.ip6, &a->dst.ip6, outer_fib_index, a->type, a->mode,
+ a->session_id, &key->gtk_v6);
p = hash_get_mem (gm->tunnel_by_key6, &key->gtk_v6);
}
@@ -92,7 +93,7 @@ gre_tunnel_db_find (const vnet_gre_tunnel_add_del_args_t * a,
}
static void
-gre_tunnel_db_add (gre_tunnel_t * t, gre_tunnel_key_t * key)
+gre_tunnel_db_add (gre_tunnel_t *t, gre_tunnel_key_t *key)
{
gre_main_t *gm = &gre_main;
@@ -107,7 +108,7 @@ gre_tunnel_db_add (gre_tunnel_t * t, gre_tunnel_key_t * key)
}
static void
-gre_tunnel_db_remove (gre_tunnel_t * t, gre_tunnel_key_t * key)
+gre_tunnel_db_remove (gre_tunnel_t *t, gre_tunnel_key_t *key)
{
gre_main_t *gm = &gre_main;
@@ -213,7 +214,7 @@ mgre_adj_walk_cb (adj_index_t ai, void *ctx)
}
static void
-gre_tunnel_restack (gre_tunnel_t * gt)
+gre_tunnel_restack (gre_tunnel_t *gt)
{
fib_protocol_t proto;
@@ -235,8 +236,8 @@ gre_tunnel_restack (gre_tunnel_t * gt)
}
static void
-gre_teib_mk_key (const gre_tunnel_t * t,
- const teib_entry_t * ne, gre_tunnel_key_t * key)
+gre_teib_mk_key (const gre_tunnel_t *t, const teib_entry_t *ne,
+ gre_tunnel_key_t *key)
{
const fib_prefix_t *nh;
@@ -244,22 +245,20 @@ gre_teib_mk_key (const gre_tunnel_t * t,
/* construct the key using mode P2P so it can be found in the DP */
if (FIB_PROTOCOL_IP4 == nh->fp_proto)
- gre_mk_key4 (t->tunnel_src.ip4,
- nh->fp_addr.ip4,
- teib_entry_get_fib_index (ne),
- t->type, TUNNEL_MODE_P2P, 0, &key->gtk_v4);
+ gre_mk_key4 (t->tunnel_src.ip4, nh->fp_addr.ip4,
+ teib_entry_get_fib_index (ne), t->type, TUNNEL_MODE_P2P, 0,
+ &key->gtk_v4);
else
- gre_mk_key6 (&t->tunnel_src.ip6,
- &nh->fp_addr.ip6,
- teib_entry_get_fib_index (ne),
- t->type, TUNNEL_MODE_P2P, 0, &key->gtk_v6);
+ gre_mk_key6 (&t->tunnel_src.ip6, &nh->fp_addr.ip6,
+ teib_entry_get_fib_index (ne), t->type, TUNNEL_MODE_P2P, 0,
+ &key->gtk_v6);
}
/**
* An TEIB entry has been added
*/
static void
-gre_teib_entry_added (const teib_entry_t * ne)
+gre_teib_entry_added (const teib_entry_t *ne)
{
gre_main_t *gm = &gre_main;
const ip_address_t *nh;
@@ -290,20 +289,16 @@ gre_teib_entry_added (const teib_entry_t * ne)
/* update the rewrites for each of the adjacencies for this peer (overlay)
* using the next-hop (underlay) */
- mgre_walk_ctx_t ctx = {
- .t = t,
- .ne = ne
- };
+ mgre_walk_ctx_t ctx = { .t = t, .ne = ne };
nh = teib_entry_get_peer (ne);
- adj_nbr_walk_nh (teib_entry_get_sw_if_index (ne),
- (AF_IP4 == ip_addr_version (nh) ?
- FIB_PROTOCOL_IP4 :
- FIB_PROTOCOL_IP6),
- &ip_addr_46 (nh), mgre_mk_complete_walk, &ctx);
+ adj_nbr_walk_nh (
+ teib_entry_get_sw_if_index (ne),
+ (AF_IP4 == ip_addr_version (nh) ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6),
+ &ip_addr_46 (nh), mgre_mk_complete_walk, &ctx);
}
static void
-gre_teib_entry_deleted (const teib_entry_t * ne)
+gre_teib_entry_deleted (const teib_entry_t *ne)
{
gre_main_t *gm = &gre_main;
const ip_address_t *nh;
@@ -330,11 +325,10 @@ gre_teib_entry_deleted (const teib_entry_t * ne)
nh = teib_entry_get_peer (ne);
/* make all the adjacencies incomplete */
- adj_nbr_walk_nh (teib_entry_get_sw_if_index (ne),
- (AF_IP4 == ip_addr_version (nh) ?
- FIB_PROTOCOL_IP4 :
- FIB_PROTOCOL_IP6),
- &ip_addr_46 (nh), mgre_mk_incomplete_walk, t);
+ adj_nbr_walk_nh (
+ teib_entry_get_sw_if_index (ne),
+ (AF_IP4 == ip_addr_version (nh) ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6),
+ &ip_addr_46 (nh), mgre_mk_incomplete_walk, t);
}
static walk_rc_t
@@ -353,7 +347,7 @@ static walk_rc_t
gre_tunnel_add_teib_walk (index_t nei, void *ctx)
{
gre_tunnel_t *t = ctx;
- gre_tunnel_key_t key;
+ gre_tunnel_key_t key = {};
gre_teib_mk_key (t, teib_entry_get (nei), &key);
gre_tunnel_db_add (t, &key);
@@ -362,17 +356,14 @@ gre_tunnel_add_teib_walk (index_t nei, void *ctx)
}
static int
-vnet_gre_tunnel_add (vnet_gre_tunnel_add_del_args_t * a,
- u32 outer_fib_index, u32 * sw_if_indexp)
+vnet_gre_tunnel_add (vnet_gre_tunnel_add_del_args_t *a, u32 outer_fib_index,
+ u32 *sw_if_indexp)
{
gre_main_t *gm = &gre_main;
vnet_main_t *vnm = gm->vnet_main;
- ip4_main_t *im4 = &ip4_main;
- ip6_main_t *im6 = &ip6_main;
gre_tunnel_t *t;
vnet_hw_interface_t *hi;
u32 hw_if_index, sw_if_index;
- clib_error_t *error;
u8 is_ipv6 = a->is_ipv6;
gre_tunnel_key_t key;
@@ -384,8 +375,8 @@ vnet_gre_tunnel_add (vnet_gre_tunnel_add_del_args_t * a,
clib_memset (t, 0, sizeof (*t));
/* Reconcile the real dev_instance and a possible requested instance */
- u32 t_idx = t - gm->tunnels; /* tunnel index (or instance) */
- u32 u_idx = a->instance; /* user specified instance */
+ u32 t_idx = t - gm->tunnels; /* tunnel index (or instance) */
+ u32 u_idx = a->instance; /* user specified instance */
if (u_idx == ~0)
u_idx = t_idx;
if (hash_get (gm->instance_used, u_idx))
@@ -395,8 +386,8 @@ vnet_gre_tunnel_add (vnet_gre_tunnel_add_del_args_t * a,
}
hash_set (gm->instance_used, u_idx, 1);
- t->dev_instance = t_idx; /* actual */
- t->user_instance = u_idx; /* name */
+ t->dev_instance = t_idx; /* actual */
+ t->user_instance = u_idx; /* name */
t->type = a->type;
t->mode = a->mode;
@@ -417,17 +408,17 @@ vnet_gre_tunnel_add (vnet_gre_tunnel_add_del_args_t * a,
}
else
{
+ vnet_eth_interface_registration_t eir = {};
+
/* Default MAC address (d00b:eed0:0000 + sw_if_index) */
- u8 address[6] =
- { 0xd0, 0x0b, 0xee, 0xd0, (u8) (t_idx >> 8), (u8) t_idx };
- error =
- ethernet_register_interface (vnm, gre_device_class.index, t_idx,
- address, &hw_if_index, 0);
- if (error)
- {
- clib_error_report (error);
- return VNET_API_ERROR_INVALID_REGISTRATION;
- }
+ u8 address[6] = {
+ 0xd0, 0x0b, 0xee, 0xd0, (u8) (t_idx >> 8), (u8) t_idx
+ };
+
+ eir.dev_class_index = gre_device_class.index;
+ eir.dev_instance = t_idx;
+ eir.address = address;
+ hw_if_index = vnet_eth_register_interface (vnm, &eir);
}
/* Set GRE tunnel interface output node (not used for L3 payload) */
@@ -451,15 +442,13 @@ vnet_gre_tunnel_add (vnet_gre_tunnel_add_del_args_t * a,
if (!is_ipv6)
{
- vec_validate (im4->fib_index_by_sw_if_index, sw_if_index);
- hi->min_packet_bytes =
- 64 + sizeof (gre_header_t) + sizeof (ip4_header_t);
+ hi->frame_overhead = sizeof (gre_header_t) + sizeof (ip4_header_t);
+ hi->min_frame_size = hi->frame_overhead + 64;
}
else
{
- vec_validate (im6->fib_index_by_sw_if_index, sw_if_index);
- hi->min_packet_bytes =
- 64 + sizeof (gre_header_t) + sizeof (ip6_header_t);
+ hi->frame_overhead = sizeof (gre_header_t) + sizeof (ip6_header_t);
+ hi->min_frame_size = hi->frame_overhead + 64;
}
/* Standard default gre MTU. */
@@ -506,11 +495,17 @@ vnet_gre_tunnel_add (vnet_gre_tunnel_add_del_args_t * a,
if (t->type != GRE_TUNNEL_TYPE_L3)
{
- t->l2_adj_index = adj_nbr_add_or_lock
- (t->tunnel_dst.fp_proto, VNET_LINK_ETHERNET, &zero_addr, sw_if_index);
+ t->l2_adj_index = adj_nbr_add_or_lock (
+ t->tunnel_dst.fp_proto, VNET_LINK_ETHERNET, &zero_addr, sw_if_index);
+ vnet_set_interface_l3_output_node (gm->vlib_main, sw_if_index,
+ (u8 *) "tunnel-output-no-count");
gre_update_adj (vnm, t->sw_if_index, t->l2_adj_index);
}
-
+ else
+ {
+ vnet_set_interface_l3_output_node (gm->vlib_main, sw_if_index,
+ (u8 *) "tunnel-output");
+ }
if (sw_if_indexp)
*sw_if_indexp = sw_if_index;
@@ -522,8 +517,8 @@ vnet_gre_tunnel_add (vnet_gre_tunnel_add_del_args_t * a,
}
static int
-vnet_gre_tunnel_delete (vnet_gre_tunnel_add_del_args_t * a,
- u32 outer_fib_index, u32 * sw_if_indexp)
+vnet_gre_tunnel_delete (vnet_gre_tunnel_add_del_args_t *a, u32 outer_fib_index,
+ u32 *sw_if_indexp)
{
gre_main_t *gm = &gre_main;
vnet_main_t *vnm = gm->vnet_main;
@@ -539,7 +534,7 @@ vnet_gre_tunnel_delete (vnet_gre_tunnel_add_del_args_t * a,
teib_walk_itf (t->sw_if_index, gre_tunnel_delete_teib_walk, t);
sw_if_index = t->sw_if_index;
- vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ );
+ vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */);
/* make sure tunnel is removed from l2 bd or xconnect */
set_int_l2_mode (gm->vlib_main, vnm, MODE_L3, sw_if_index, 0,
@@ -566,6 +561,7 @@ vnet_gre_tunnel_delete (vnet_gre_tunnel_add_del_args_t * a,
clib_mem_free (t->gre_sn);
}
+ vnet_reset_interface_l3_output_node (gm->vlib_main, sw_if_index);
hash_unset (gm->instance_used, t->user_instance);
gre_tunnel_db_remove (t, &key);
pool_put (gm->tunnels, t);
@@ -577,14 +573,12 @@ vnet_gre_tunnel_delete (vnet_gre_tunnel_add_del_args_t * a,
}
int
-vnet_gre_tunnel_add_del (vnet_gre_tunnel_add_del_args_t * a,
- u32 * sw_if_indexp)
+vnet_gre_tunnel_add_del (vnet_gre_tunnel_add_del_args_t *a, u32 *sw_if_indexp)
{
u32 outer_fib_index;
- outer_fib_index = fib_table_find ((a->is_ipv6 ?
- FIB_PROTOCOL_IP6 :
- FIB_PROTOCOL_IP4), a->outer_table_id);
+ outer_fib_index = fib_table_find (
+ (a->is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4), a->outer_table_id);
if (~0 == outer_fib_index)
return VNET_API_ERROR_NO_SUCH_FIB;
@@ -602,7 +596,7 @@ vnet_gre_tunnel_add_del (vnet_gre_tunnel_add_del_args_t * a,
}
clib_error_t *
-gre_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+gre_interface_admin_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
{
gre_main_t *gm = &gre_main;
vnet_hw_interface_t *hi;
@@ -627,7 +621,7 @@ gre_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
vnet_hw_interface_set_flags (vnm, hw_if_index,
VNET_HW_INTERFACE_FLAG_LINK_UP);
else
- vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */ );
+ vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */);
gre_tunnel_restack (t);
@@ -635,14 +629,13 @@ gre_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
}
static clib_error_t *
-create_gre_tunnel_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
+create_gre_tunnel_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
vnet_gre_tunnel_add_del_args_t _a, *a = &_a;
- ip46_address_t src = ip46_address_initializer, dst =
- ip46_address_initializer;
+ ip46_address_t src = ip46_address_initializer,
+ dst = ip46_address_initializer;
u32 instance = ~0;
u32 outer_table_id = 0;
gre_tunnel_type_t t_type = GRE_TUNNEL_TYPE_L3;
@@ -676,10 +669,8 @@ create_gre_tunnel_command_fn (vlib_main_t * vm,
t_type = GRE_TUNNEL_TYPE_TEB;
else if (unformat (line_input, "erspan %d", &session_id))
t_type = GRE_TUNNEL_TYPE_ERSPAN;
- else
- if (unformat
- (line_input, "flags %U", unformat_tunnel_encap_decap_flags,
- &flags))
+ else if (unformat (line_input, "flags %U",
+ unformat_tunnel_encap_decap_flags, &flags))
;
else
{
@@ -709,8 +700,7 @@ create_gre_tunnel_command_fn (vlib_main_t * vm,
if (ip46_address_is_ip4 (&src) != ip46_address_is_ip4 (&dst))
{
- error =
- clib_error_return (0, "src and dst address must be the same AF");
+ error = clib_error_return (0, "src and dst address must be the same AF");
goto done;
}
@@ -745,15 +735,14 @@ create_gre_tunnel_command_fn (vlib_main_t * vm,
error = clib_error_return (0, "GRE tunnel doesn't exist");
goto done;
case VNET_API_ERROR_INVALID_SESSION_ID:
- error = clib_error_return (0, "session ID %d out of range\n",
- session_id);
+ error =
+ clib_error_return (0, "session ID %d out of range\n", session_id);
goto done;
case VNET_API_ERROR_INSTANCE_IN_USE:
error = clib_error_return (0, "Instance is in use");
goto done;
default:
- error =
- clib_error_return (0, "vnet_gre_tunnel_add_del returned %d", rv);
+ error = clib_error_return (0, "vnet_gre_tunnel_add_del returned %d", rv);
goto done;
}
@@ -763,20 +752,17 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_gre_tunnel_command, static) = {
.path = "create gre tunnel",
.short_help = "create gre tunnel src <addr> dst <addr> [instance <n>] "
- "[outer-fib-id <fib>] [teb | erspan <session-id>] [del] "
- "[multipoint]",
+ "[outer-fib-id <fib>] [teb | erspan <session-id>] [del] "
+ "[multipoint]",
.function = create_gre_tunnel_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
-show_gre_tunnel_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
+show_gre_tunnel_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
gre_main_t *gm = &gre_main;
gre_tunnel_t *t;
@@ -795,12 +781,10 @@ show_gre_tunnel_command_fn (vlib_main_t * vm,
if (~0 == ti)
{
- /* *INDENT-OFF* */
pool_foreach (t, gm->tunnels)
- {
- vlib_cli_output (vm, "%U", format_gre_tunnel, t);
- }
- /* *INDENT-ON* */
+ {
+ vlib_cli_output (vm, "%U", format_gre_tunnel, t);
+ }
}
else
{
@@ -812,12 +796,10 @@ show_gre_tunnel_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_gre_tunnel_command, static) = {
- .path = "show gre tunnel",
- .function = show_gre_tunnel_command_fn,
+ .path = "show gre tunnel",
+ .function = show_gre_tunnel_command_fn,
};
-/* *INDENT-ON* */
const static teib_vft_t gre_teib_vft = {
.nv_added = gre_teib_entry_added,
@@ -826,7 +808,7 @@ const static teib_vft_t gre_teib_vft = {
/* force inclusion from application's main.c */
clib_error_t *
-gre_interface_init (vlib_main_t * vm)
+gre_interface_init (vlib_main_t *vm)
{
teib_register (&gre_teib_vft);
diff --git a/src/vnet/gre/node.c b/src/plugins/gre/node.c
index 92523069f05..5235888cc6f 100644
--- a/src/vnet/gre/node.c
+++ b/src/plugins/gre/node.c
@@ -17,21 +17,21 @@
#include <vlib/vlib.h>
#include <vnet/pg/pg.h>
-#include <vnet/gre/gre.h>
+#include <gre/gre.h>
#include <vnet/mpls/mpls.h>
#include <vppinfra/sparse_vec.h>
-#define foreach_gre_input_next \
-_(PUNT, "error-punt") \
-_(DROP, "error-drop") \
-_(ETHERNET_INPUT, "ethernet-input") \
-_(IP4_INPUT, "ip4-input") \
-_(IP6_INPUT, "ip6-input") \
-_(MPLS_INPUT, "mpls-input")
+#define foreach_gre_input_next \
+ _ (PUNT, "error-punt") \
+ _ (DROP, "error-drop") \
+ _ (ETHERNET_INPUT, "ethernet-input") \
+ _ (IP4_INPUT, "ip4-input") \
+ _ (IP6_INPUT, "ip6-input") \
+ _ (MPLS_INPUT, "mpls-input")
typedef enum
{
-#define _(s,n) GRE_INPUT_NEXT_##s,
+#define _(s, n) GRE_INPUT_NEXT_##s,
foreach_gre_input_next
#undef _
GRE_INPUT_N_NEXT,
@@ -45,20 +45,19 @@ typedef struct
ip46_address_t dst;
} gre_rx_trace_t;
-extern u8 *format_gre_rx_trace (u8 * s, va_list * args);
+extern u8 *format_gre_rx_trace (u8 *s, va_list *args);
#ifndef CLIB_MARCH_VARIANT
u8 *
-format_gre_rx_trace (u8 * s, va_list * args)
+format_gre_rx_trace (u8 *s, va_list *args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
gre_rx_trace_t *t = va_arg (*args, gre_rx_trace_t *);
- s = format (s, "GRE: tunnel %d len %d src %U dst %U",
- t->tunnel_id, clib_net_to_host_u16 (t->length),
- format_ip46_address, &t->src, IP46_TYPE_ANY,
- format_ip46_address, &t->dst, IP46_TYPE_ANY);
+ s = format (s, "GRE: tunnel %d len %d src %U dst %U", t->tunnel_id,
+ clib_net_to_host_u16 (t->length), format_ip46_address, &t->src,
+ IP46_TYPE_ANY, format_ip46_address, &t->dst, IP46_TYPE_ANY);
return s;
}
#endif /* CLIB_MARCH_VARIANT */
@@ -71,12 +70,11 @@ typedef struct
} gre_input_runtime_t;
always_inline void
-gre_trace (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b,
- u32 tun_sw_if_index, const ip6_header_t * ip6,
- const ip4_header_t * ip4, int is_ipv6)
+gre_trace (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *b,
+ u32 tun_sw_if_index, const ip6_header_t *ip6,
+ const ip4_header_t *ip4, int is_ipv6)
{
- gre_rx_trace_t *tr = vlib_add_trace (vm, node,
- b, sizeof (*tr));
+ gre_rx_trace_t *tr = vlib_add_trace (vm, node, b, sizeof (*tr));
tr->tunnel_id = tun_sw_if_index;
if (is_ipv6)
{
@@ -97,14 +95,14 @@ gre_trace (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_buffer_t * b,
}
always_inline void
-gre_tunnel_get (const gre_main_t * gm, vlib_node_runtime_t * node,
- vlib_buffer_t * b, u16 * next, const gre_tunnel_key_t * key,
- gre_tunnel_key_t * cached_key, u32 * tun_sw_if_index,
- u32 * cached_tun_sw_if_index, int is_ipv6)
+gre_tunnel_get (const gre_main_t *gm, vlib_node_runtime_t *node,
+ vlib_buffer_t *b, u16 *next, const gre_tunnel_key_t *key,
+ gre_tunnel_key_t *cached_key, u32 *tun_sw_if_index,
+ u32 *cached_tun_sw_if_index, int is_ipv6)
{
const uword *p;
- p = is_ipv6 ? hash_get_mem (gm->tunnel_by_key6, &key->gtk_v6)
- : hash_get_mem (gm->tunnel_by_key4, &key->gtk_v4);
+ p = is_ipv6 ? hash_get_mem (gm->tunnel_by_key6, &key->gtk_v6) :
+ hash_get_mem (gm->tunnel_by_key4, &key->gtk_v4);
if (PREDICT_FALSE (!p))
{
*next = GRE_INPUT_NEXT_DROP;
@@ -124,8 +122,7 @@ gre_tunnel_get (const gre_main_t * gm, vlib_node_runtime_t * node,
}
always_inline uword
-gre_input (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame,
+gre_input (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame,
const int is_ipv6)
{
gre_main_t *gm = &gre_main;
@@ -217,23 +214,23 @@ gre_input (vlib_main_t * vm,
type[0] = ni[0].tunnel_type;
type[1] = ni[1].tunnel_type;
- b[0]->error = nidx[0] == SPARSE_VEC_INVALID_INDEX
- ? node->errors[GRE_ERROR_UNKNOWN_PROTOCOL]
- : node->errors[GRE_ERROR_NONE];
- b[1]->error = nidx[1] == SPARSE_VEC_INVALID_INDEX
- ? node->errors[GRE_ERROR_UNKNOWN_PROTOCOL]
- : node->errors[GRE_ERROR_NONE];
+ b[0]->error = nidx[0] == SPARSE_VEC_INVALID_INDEX ?
+ node->errors[GRE_ERROR_UNKNOWN_PROTOCOL] :
+ node->errors[GRE_ERROR_NONE];
+ b[1]->error = nidx[1] == SPARSE_VEC_INVALID_INDEX ?
+ node->errors[GRE_ERROR_UNKNOWN_PROTOCOL] :
+ node->errors[GRE_ERROR_NONE];
version[0] = clib_net_to_host_u16 (gre[0]->flags_and_version);
version[1] = clib_net_to_host_u16 (gre[1]->flags_and_version);
version[0] &= GRE_VERSION_MASK;
version[1] &= GRE_VERSION_MASK;
- b[0]->error = version[0]
- ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[0]->error;
+ b[0]->error =
+ version[0] ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[0]->error;
next[0] = version[0] ? GRE_INPUT_NEXT_DROP : next[0];
- b[1]->error = version[1]
- ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[1]->error;
+ b[1]->error =
+ version[1] ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[1]->error;
next[1] = version[1] ? GRE_INPUT_NEXT_DROP : next[1];
len[0] = vlib_buffer_length_in_chain (vm, b[0]);
@@ -242,27 +239,23 @@ gre_input (vlib_main_t * vm,
/* always search for P2P types in the DP */
if (is_ipv6)
{
- gre_mk_key6 (&ip6[0]->dst_address,
- &ip6[0]->src_address,
- vnet_buffer (b[0])->ip.fib_index,
- type[0], TUNNEL_MODE_P2P, 0, &key[0].gtk_v6);
- gre_mk_key6 (&ip6[1]->dst_address,
- &ip6[1]->src_address,
- vnet_buffer (b[1])->ip.fib_index,
- type[1], TUNNEL_MODE_P2P, 0, &key[1].gtk_v6);
+ gre_mk_key6 (&ip6[0]->dst_address, &ip6[0]->src_address,
+ vnet_buffer (b[0])->ip.fib_index, type[0],
+ TUNNEL_MODE_P2P, 0, &key[0].gtk_v6);
+ gre_mk_key6 (&ip6[1]->dst_address, &ip6[1]->src_address,
+ vnet_buffer (b[1])->ip.fib_index, type[1],
+ TUNNEL_MODE_P2P, 0, &key[1].gtk_v6);
matched[0] = gre_match_key6 (&cached_key.gtk_v6, &key[0].gtk_v6);
matched[1] = gre_match_key6 (&cached_key.gtk_v6, &key[1].gtk_v6);
}
else
{
- gre_mk_key4 (ip4[0]->dst_address,
- ip4[0]->src_address,
- vnet_buffer (b[0])->ip.fib_index,
- type[0], TUNNEL_MODE_P2P, 0, &key[0].gtk_v4);
- gre_mk_key4 (ip4[1]->dst_address,
- ip4[1]->src_address,
- vnet_buffer (b[1])->ip.fib_index,
- type[1], TUNNEL_MODE_P2P, 0, &key[1].gtk_v4);
+ gre_mk_key4 (ip4[0]->dst_address, ip4[0]->src_address,
+ vnet_buffer (b[0])->ip.fib_index, type[0],
+ TUNNEL_MODE_P2P, 0, &key[0].gtk_v4);
+ gre_mk_key4 (ip4[1]->dst_address, ip4[1]->src_address,
+ vnet_buffer (b[1])->ip.fib_index, type[1],
+ TUNNEL_MODE_P2P, 0, &key[1].gtk_v4);
matched[0] = gre_match_key4 (&cached_key.gtk_v4, &key[0].gtk_v4);
matched[1] = gre_match_key4 (&cached_key.gtk_v4, &key[1].gtk_v4);
}
@@ -271,36 +264,33 @@ gre_input (vlib_main_t * vm,
tun_sw_if_index[1] = cached_tun_sw_if_index;
if (PREDICT_FALSE (!matched[0]))
gre_tunnel_get (gm, node, b[0], &next[0], &key[0], &cached_key,
- &tun_sw_if_index[0], &cached_tun_sw_if_index,
- is_ipv6);
+ &tun_sw_if_index[0], &cached_tun_sw_if_index, is_ipv6);
if (PREDICT_FALSE (!matched[1]))
gre_tunnel_get (gm, node, b[1], &next[1], &key[1], &cached_key,
- &tun_sw_if_index[1], &cached_tun_sw_if_index,
- is_ipv6);
+ &tun_sw_if_index[1], &cached_tun_sw_if_index, is_ipv6);
if (PREDICT_TRUE (next[0] > GRE_INPUT_NEXT_DROP))
{
- vlib_increment_combined_counter (&gm->vnet_main->
- interface_main.combined_sw_if_counters
- [VNET_INTERFACE_COUNTER_RX],
- vm->thread_index,
- tun_sw_if_index[0],
- 1 /* packets */ ,
- len[0] /* bytes */ );
+ vlib_increment_combined_counter (
+ &gm->vnet_main->interface_main
+ .combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX],
+ vm->thread_index, tun_sw_if_index[0], 1 /* packets */,
+ len[0] /* bytes */);
vnet_buffer (b[0])->sw_if_index[VLIB_RX] = tun_sw_if_index[0];
}
if (PREDICT_TRUE (next[1] > GRE_INPUT_NEXT_DROP))
{
- vlib_increment_combined_counter (&gm->vnet_main->
- interface_main.combined_sw_if_counters
- [VNET_INTERFACE_COUNTER_RX],
- vm->thread_index,
- tun_sw_if_index[1],
- 1 /* packets */ ,
- len[1] /* bytes */ );
+ vlib_increment_combined_counter (
+ &gm->vnet_main->interface_main
+ .combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX],
+ vm->thread_index, tun_sw_if_index[1], 1 /* packets */,
+ len[1] /* bytes */);
vnet_buffer (b[1])->sw_if_index[VLIB_RX] = tun_sw_if_index[1];
}
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX] = (u32) ~0;
+ vnet_buffer (b[1])->sw_if_index[VLIB_TX] = (u32) ~0;
+
if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
gre_trace (vm, node, b[0], tun_sw_if_index[0], ip6[0], ip4[0],
is_ipv6);
@@ -363,54 +353,51 @@ gre_input (vlib_main_t * vm,
next[0] = ni[0].next_index;
type[0] = ni[0].tunnel_type;
- b[0]->error = nidx[0] == SPARSE_VEC_INVALID_INDEX
- ? node->errors[GRE_ERROR_UNKNOWN_PROTOCOL]
- : node->errors[GRE_ERROR_NONE];
+ b[0]->error = nidx[0] == SPARSE_VEC_INVALID_INDEX ?
+ node->errors[GRE_ERROR_UNKNOWN_PROTOCOL] :
+ node->errors[GRE_ERROR_NONE];
version[0] = clib_net_to_host_u16 (gre[0]->flags_and_version);
version[0] &= GRE_VERSION_MASK;
- b[0]->error = version[0]
- ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[0]->error;
+ b[0]->error =
+ version[0] ? node->errors[GRE_ERROR_UNSUPPORTED_VERSION] : b[0]->error;
next[0] = version[0] ? GRE_INPUT_NEXT_DROP : next[0];
len[0] = vlib_buffer_length_in_chain (vm, b[0]);
if (is_ipv6)
{
- gre_mk_key6 (&ip6[0]->dst_address,
- &ip6[0]->src_address,
- vnet_buffer (b[0])->ip.fib_index,
- type[0], TUNNEL_MODE_P2P, 0, &key[0].gtk_v6);
+ gre_mk_key6 (&ip6[0]->dst_address, &ip6[0]->src_address,
+ vnet_buffer (b[0])->ip.fib_index, type[0],
+ TUNNEL_MODE_P2P, 0, &key[0].gtk_v6);
matched[0] = gre_match_key6 (&cached_key.gtk_v6, &key[0].gtk_v6);
}
else
{
- gre_mk_key4 (ip4[0]->dst_address,
- ip4[0]->src_address,
- vnet_buffer (b[0])->ip.fib_index,
- type[0], TUNNEL_MODE_P2P, 0, &key[0].gtk_v4);
+ gre_mk_key4 (ip4[0]->dst_address, ip4[0]->src_address,
+ vnet_buffer (b[0])->ip.fib_index, type[0],
+ TUNNEL_MODE_P2P, 0, &key[0].gtk_v4);
matched[0] = gre_match_key4 (&cached_key.gtk_v4, &key[0].gtk_v4);
}
tun_sw_if_index[0] = cached_tun_sw_if_index;
if (PREDICT_FALSE (!matched[0]))
gre_tunnel_get (gm, node, b[0], &next[0], &key[0], &cached_key,
- &tun_sw_if_index[0], &cached_tun_sw_if_index,
- is_ipv6);
+ &tun_sw_if_index[0], &cached_tun_sw_if_index, is_ipv6);
if (PREDICT_TRUE (next[0] > GRE_INPUT_NEXT_DROP))
{
- vlib_increment_combined_counter (&gm->vnet_main->
- interface_main.combined_sw_if_counters
- [VNET_INTERFACE_COUNTER_RX],
- vm->thread_index,
- tun_sw_if_index[0],
- 1 /* packets */ ,
- len[0] /* bytes */ );
+ vlib_increment_combined_counter (
+ &gm->vnet_main->interface_main
+ .combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX],
+ vm->thread_index, tun_sw_if_index[0], 1 /* packets */,
+ len[0] /* bytes */);
vnet_buffer (b[0])->sw_if_index[VLIB_RX] = tun_sw_if_index[0];
}
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX] = (u32) ~0;
+
if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
gre_trace (vm, node, b[0], tun_sw_if_index[0], ip6[0], ip4[0],
is_ipv6);
@@ -422,35 +409,31 @@ gre_input (vlib_main_t * vm,
vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
- vlib_node_increment_counter (vm,
- is_ipv6 ? gre6_input_node.index :
- gre4_input_node.index, GRE_ERROR_PKTS_DECAP,
- n_left_from);
+ vlib_node_increment_counter (
+ vm, is_ipv6 ? gre6_input_node.index : gre4_input_node.index,
+ GRE_ERROR_PKTS_DECAP, n_left_from);
return frame->n_vectors;
}
-VLIB_NODE_FN (gre4_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
+VLIB_NODE_FN (gre4_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
{
return gre_input (vm, node, from_frame, /* is_ip6 */ 0);
}
-VLIB_NODE_FN (gre6_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
+VLIB_NODE_FN (gre6_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
{
return gre_input (vm, node, from_frame, /* is_ip6 */ 1);
}
static char *gre_error_strings[] = {
-#define gre_error(n,s) s,
+#define gre_error(n, s) s,
#include "error.def"
#undef gre_error
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (gre4_input_node) = {
.name = "gre4-input",
/* Takes a vector of packets. */
@@ -461,7 +444,7 @@ VLIB_REGISTER_NODE (gre4_input_node) = {
.n_next_nodes = GRE_INPUT_N_NEXT,
.next_nodes = {
-#define _(s,n) [GRE_INPUT_NEXT_##s] = n,
+#define _(s, n) [GRE_INPUT_NEXT_##s] = n,
foreach_gre_input_next
#undef _
},
@@ -483,7 +466,7 @@ VLIB_REGISTER_NODE (gre6_input_node) = {
.n_next_nodes = GRE_INPUT_N_NEXT,
.next_nodes = {
-#define _(s,n) [GRE_INPUT_NEXT_##s] = n,
+#define _(s, n) [GRE_INPUT_NEXT_##s] = n,
foreach_gre_input_next
#undef _
},
@@ -492,13 +475,11 @@ VLIB_REGISTER_NODE (gre6_input_node) = {
.format_trace = format_gre_rx_trace,
.unformat_buffer = unformat_gre_header,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
void
-gre_register_input_protocol (vlib_main_t * vm,
- gre_protocol_t protocol, u32 node_index,
- gre_tunnel_type_t tunnel_type)
+gre_register_input_protocol (vlib_main_t *vm, gre_protocol_t protocol,
+ u32 node_index, gre_tunnel_type_t tunnel_type)
{
gre_main_t *em = &gre_main;
gre_protocol_info_t *pi;
@@ -526,7 +507,7 @@ gre_register_input_protocol (vlib_main_t * vm,
}
static void
-gre_setup_node (vlib_main_t * vm, u32 node_index)
+gre_setup_node (vlib_main_t *vm, u32 node_index)
{
vlib_node_t *n = vlib_get_node (vm, node_index);
pg_node_t *pn = pg_get_node (node_index);
@@ -537,7 +518,7 @@ gre_setup_node (vlib_main_t * vm, u32 node_index)
}
static clib_error_t *
-gre_input_init (vlib_main_t * vm)
+gre_input_init (vlib_main_t *vm)
{
gre_main_t *gm = &gre_main;
vlib_node_t *ethernet_input, *ip4_input, *ip6_input, *mpls_unicast_input;
@@ -552,9 +533,9 @@ gre_input_init (vlib_main_t * vm)
gre_setup_node (vm, gre4_input_node.index);
gre_setup_node (vm, gre6_input_node.index);
- gm->next_by_protocol = sparse_vec_new
- ( /* elt bytes */ sizeof (gm->next_by_protocol[0]),
- /* bits in index */ BITS (((gre_header_t *) 0)->protocol));
+ gm->next_by_protocol =
+ sparse_vec_new (/* elt bytes */ sizeof (gm->next_by_protocol[0]),
+ /* bits in index */ BITS (((gre_header_t *) 0)->protocol));
/* These could be moved to the supported protocol input node defn's */
ethernet_input = vlib_get_node_by_name (vm, (u8 *) "ethernet-input");
@@ -566,14 +547,14 @@ gre_input_init (vlib_main_t * vm)
mpls_unicast_input = vlib_get_node_by_name (vm, (u8 *) "mpls-input");
ASSERT (mpls_unicast_input);
- gre_register_input_protocol (vm, GRE_PROTOCOL_teb,
- ethernet_input->index, GRE_TUNNEL_TYPE_TEB);
+ gre_register_input_protocol (vm, GRE_PROTOCOL_teb, ethernet_input->index,
+ GRE_TUNNEL_TYPE_TEB);
- gre_register_input_protocol (vm, GRE_PROTOCOL_ip4,
- ip4_input->index, GRE_TUNNEL_TYPE_L3);
+ gre_register_input_protocol (vm, GRE_PROTOCOL_ip4, ip4_input->index,
+ GRE_TUNNEL_TYPE_L3);
- gre_register_input_protocol (vm, GRE_PROTOCOL_ip6,
- ip6_input->index, GRE_TUNNEL_TYPE_L3);
+ gre_register_input_protocol (vm, GRE_PROTOCOL_ip6, ip6_input->index,
+ GRE_TUNNEL_TYPE_L3);
gre_register_input_protocol (vm, GRE_PROTOCOL_mpls_unicast,
mpls_unicast_input->index, GRE_TUNNEL_TYPE_L3);
diff --git a/src/vnet/gre/pg.c b/src/plugins/gre/pg.c
index 38a3a07ebad..91c9e487899 100644
--- a/src/vnet/gre/pg.c
+++ b/src/plugins/gre/pg.c
@@ -17,7 +17,7 @@
#include <vlib/vlib.h>
#include <vnet/pg/pg.h>
-#include <vnet/gre/gre.h>
+#include <gre/gre.h>
typedef struct
{
@@ -26,14 +26,14 @@ typedef struct
} pg_gre_header_t;
static inline void
-pg_gre_header_init (pg_gre_header_t * e)
+pg_gre_header_init (pg_gre_header_t *e)
{
pg_edit_init (&e->flags_and_version, gre_header_t, flags_and_version);
pg_edit_init (&e->protocol, gre_header_t, protocol);
}
uword
-unformat_pg_gre_header (unformat_input_t * input, va_list * args)
+unformat_pg_gre_header (unformat_input_t *input, va_list *args)
{
pg_stream_t *s = va_arg (*args, pg_stream_t *);
pg_gre_header_t *h;
@@ -46,8 +46,7 @@ unformat_pg_gre_header (unformat_input_t * input, va_list * args)
pg_edit_set_fixed (&h->flags_and_version, 0);
error = 1;
- if (!unformat (input, "%U",
- unformat_pg_edit,
+ if (!unformat (input, "%U", unformat_pg_edit,
unformat_gre_protocol_net_byte_order, &h->protocol))
goto done;
@@ -64,8 +63,8 @@ unformat_pg_gre_header (unformat_input_t * input, va_list * args)
pg_node = pg_get_node (pi->node_index);
}
- if (pg_node && pg_node->unformat_edit
- && unformat_user (input, pg_node->unformat_edit, s))
+ if (pg_node && pg_node->unformat_edit &&
+ unformat_user (input, pg_node->unformat_edit, s))
;
}
@@ -76,7 +75,6 @@ done:
return error == 0;
}
-
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/gbp/gbp_scanner.h b/src/plugins/gre/plugin.c
index 1133167d927..b92ec0b6dcd 100644
--- a/src/plugins/gbp/gbp_scanner.h
+++ b/src/plugins/gre/plugin.c
@@ -1,5 +1,7 @@
/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
+ * plugin.c: gre
+ *
+ * Copyright (c) 2023 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -13,18 +15,12 @@
* limitations under the License.
*/
-#ifndef __GBP_SCANNER_H__
-#define __GBP_SCANNER_H__
-
#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
-typedef enum gbp_scan_event_t_
-{
- GBP_ENDPOINT_SCAN_START,
- GBP_ENDPOINT_SCAN_STOP,
- GBP_ENDPOINT_SCAN_SET_TIME,
-} gbp_scan_event_t;
-
-extern vlib_node_registration_t gbp_scanner_node;
-
-#endif
+// register a plugin
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Generic Routing Encapsulation (GRE) plugin",
+};
diff --git a/src/plugins/gtpu/gtpu.api b/src/plugins/gtpu/gtpu.api
index ec4933af197..7c5c137a840 100644
--- a/src/plugins/gtpu/gtpu.api
+++ b/src/plugins/gtpu/gtpu.api
@@ -13,10 +13,34 @@
* limitations under the License.
*/
-option version = "2.0.1";
+option version = "2.1.0";
import "vnet/interface_types.api";
import "vnet/ip/ip_types.api";
+enum gtpu_forwarding_type
+{
+ GTPU_API_FORWARDING_NONE = 0,
+ GTPU_API_FORWARDING_BAD_HEADER = 1,
+ GTPU_API_FORWARDING_UNKNOWN_TEID = 2,
+ GTPU_API_FORWARDING_UNKNOWN_TYPE = 4,
+};
+
+enum gtpu_decap_next_type
+{
+ GTPU_API_DECAP_NEXT_DROP = 0,
+ GTPU_API_DECAP_NEXT_L2 = 1,
+ GTPU_API_DECAP_NEXT_IP4 = 2,
+ GTPU_API_DECAP_NEXT_IP6 = 3,
+};
+
+typedef sw_if_counters
+{
+ u64 packets_rx;
+ u64 packets_tx;
+ u64 bytes_rx;
+ u64 bytes_tx;
+};
+
/** \brief Create or delete a GTPU tunnel
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -56,6 +80,53 @@ define gtpu_add_del_tunnel_reply
vl_api_interface_index_t sw_if_index;
};
+/** \brief Create or delete a GTPU tunnel
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param src_address - GTPU tunnel's source address.
+ @param dst_address - GTPU tunnel's destination address.
+ @param mcast_sw_if_index - version, O-bit and C-bit (see nsh_packet.h)
+ @param encap_vrf_id - fib identifier used for outgoing encapsulated packets
+ @param decap_next_index - the index of the next node if success
+ @param teid - Local (rx) Tunnel Endpoint Identifier
+ @param tteid - Remote (tx) Tunnel Endpoint Identifier
+ @param pdu_extension - add PDU session container extension to each packet
+ @param qfi - the QFI to set in the PDU session container, 6 bits only
+*/
+define gtpu_add_del_tunnel_v2
+{
+ u32 client_index;
+ u32 context;
+ bool is_add;
+ vl_api_address_t src_address;
+ vl_api_address_t dst_address;
+ vl_api_interface_index_t mcast_sw_if_index;
+ u32 encap_vrf_id;
+ vl_api_gtpu_decap_next_type_t decap_next_index;
+ u32 teid;
+ u32 tteid;
+ bool pdu_extension;
+ u8 qfi;
+ option vat_help = "src <ip-addr> {dst <ip-addr> | group <mcast-ip-addr> {<intfc> | mcast_sw_if_index <nn>}} teid <nn> [tteid <nn>] [encap-vrf-id <nn>] [decap-next <l2|nn>] [qfi <nn>] [del]";
+ option in_progress;
+};
+
+/** \brief reply for set or delete an GTPU tunnel
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param sw_if_index - software index of the interface
+ @param counters - Number of packets/bytes that is sent/received via this tunnel. Inaccurate (with in flight packets), sum for the entire set of per-thread counters. Zero for new tunnels.
+*/
+define gtpu_add_del_tunnel_v2_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+ vl_api_sw_if_counters_t counters;
+ option in_progress;
+};
+
/** \brief Update GTPU tunnel TX TEID
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -112,6 +183,56 @@ define gtpu_tunnel_details
u32 tteid;
};
+
+/** \brief Dump GTPU tunnel
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software index of the interface
+*/
+define gtpu_tunnel_v2_dump
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ option vat_help = "[<intfc> | sw_if_index <nn>]";
+ option in_progress;
+};
+
+/** \brief dump details of an GTPU tunnel
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - software index of the interface
+ @param src_address - GTPU tunnel's source address.
+ @param dst_address - GTPU tunnel's destination address.
+ @param mcast_sw_if_index - version, O-bit and C-bit (see nsh_packet.h)
+ @param encap_vrf_id - fib identifier used for outgoing encapsulated packets
+ @param decap_next_index - the index of the next node if success
+ @param teid - Local (rx) Tunnel Endpoint Identifier
+ @param tteid - Remote (tx) Tunnel Endpoint Identifier
+ @param pdu_extension - add PDU session container extension to each packet
+ @param qfi - the QFI to set in the PDU session container, 6 bits only
+ @param is_forwarding - tunnel used for forwarding packets
+ @param forwarding_type - the type of packets forwarded
+ @param counters - Number of packets/bytes that is sent/received via this tunnel. Inaccurate (with in flight packets), sum for the entire set of per-thread counters.
+*/
+define gtpu_tunnel_v2_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ vl_api_address_t src_address;
+ vl_api_address_t dst_address;
+ vl_api_interface_index_t mcast_sw_if_index;
+ u32 encap_vrf_id;
+ vl_api_gtpu_decap_next_type_t decap_next_index;
+ u32 teid;
+ u32 tteid;
+ bool pdu_extension;
+ u8 qfi;
+ bool is_forwarding;
+ vl_api_gtpu_forwarding_type_t forwarding_type;
+ vl_api_sw_if_counters_t counters;
+ option in_progress;
+};
+
/** \brief Interface set gtpu-bypass request
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -146,6 +267,79 @@ autoreply define gtpu_offload_rx
option vat_help = "hw <intfc> rx <tunnel-name> [del]";
};
+/** \brief Set gtpu-forward request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add address if non-zero, else delete
+ @param forwarding_type - forward filter (unknown teid, unknown message type or unknown header)
+ @param dst_address - forward destination address.
+ @param encap_vrf_id - fib identifier used for outgoing packets
+ @param decap_next_index - the index of the next node if success
+*/
+define gtpu_add_del_forward
+{
+ u32 client_index;
+ u32 context;
+ bool is_add;
+ vl_api_address_t dst_address;
+ vl_api_gtpu_forwarding_type_t forwarding_type;
+ u32 encap_vrf_id;
+ vl_api_gtpu_decap_next_type_t decap_next_index;
+ option vat_help = "dst <ip-addr> {bad-header|unknown-teid|unknown-type} [decap-next <l2|nn>] [del]";
+ option in_progress;
+};
+
+/** \brief reply for set or delete GTPU forwarding
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param sw_if_index - software index of the interface
+*/
+define gtpu_add_del_forward_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+ option in_progress;
+};
+
+/** \brief Get list of metrics, use for bulk transfer.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index_start - software index of the first interface to return data on.
+ @param capacity - max number of interfaces returned.
+*/
+define gtpu_get_transfer_counts
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index_start;
+ u32 capacity;
+ //option vat_help = "start_index <sw_if_index> count <nn>";
+ option in_progress;
+};
+
+/** \brief reply for set or delete GTPU forwarding
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param count - number of tunnel counters returned, sequential starting at sw_if_index_start.
+ @param tunnels - Number of packets/bytes that is sent/received via this tunnel. Inaccurate (with in flight packets), sum for the entire set of per-thread counters.
+*/
+typedef tunnel_metrics
+{
+ vl_api_interface_index_t sw_if_index;
+ u32 reserved;
+ vl_api_sw_if_counters_t counters;
+};
+
+define gtpu_get_transfer_counts_reply
+{
+ u32 context;
+ i32 retval;
+ u32 count;
+ vl_api_tunnel_metrics_t tunnels[count];
+ option in_progress;
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/plugins/gtpu/gtpu.c b/src/plugins/gtpu/gtpu.c
index 531e45a1d5a..a2013c91c3f 100644
--- a/src/plugins/gtpu/gtpu.c
+++ b/src/plugins/gtpu/gtpu.c
@@ -35,7 +35,6 @@
gtpu_main_t gtpu_main;
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_gtpu_bypass, static) = {
.arc_name = "ip4-unicast",
.node_name = "ip4-gtpu-bypass",
@@ -47,7 +46,6 @@ VNET_FEATURE_INIT (ip6_gtpu_bypass, static) = {
.node_name = "ip6-gtpu-bypass",
.runs_before = VNET_FEATURES ("ip6-lookup"),
};
-/* *INDENT-on* */
u8 * format_gtpu_encap_trace (u8 * s, va_list * args)
{
@@ -56,8 +54,13 @@ u8 * format_gtpu_encap_trace (u8 * s, va_list * args)
gtpu_encap_trace_t * t
= va_arg (*args, gtpu_encap_trace_t *);
- s = format (s, "GTPU encap to gtpu_tunnel%d tteid %d",
- t->tunnel_index, t->tteid);
+ s = format (s, "GTPU encap to gtpu_tunnel%d tteid %u ", t->tunnel_index,
+ t->tteid);
+
+ if (t->pdu_extension)
+ s = format (s, "pdu-extension qfi %d ", t->qfi);
+ else
+ s = format (s, "no-pdu-extension ");
return s;
}
@@ -95,16 +98,37 @@ format_gtpu_tunnel (u8 * s, va_list * args)
is_ipv6 ? im6->fibs[t->encap_fib_index].ft_table_id :
im4->fibs[t->encap_fib_index].ft_table_id;
- s = format (s, "[%d] src %U dst %U teid %d tteid %d "
+ s = format (s,
+ "[%d] src %U dst %U teid %u tteid %u "
"encap-vrf-id %d sw-if-idx %d ",
- t - ngm->tunnels,
- format_ip46_address, &t->src, IP46_TYPE_ANY,
- format_ip46_address, &t->dst, IP46_TYPE_ANY,
- t->teid, t->tteid, encap_vrf_id, t->sw_if_index);
+ t - ngm->tunnels, format_ip46_address, &t->src, IP46_TYPE_ANY,
+ format_ip46_address, &t->dst, IP46_TYPE_ANY, t->teid, t->tteid,
+ encap_vrf_id, t->sw_if_index);
s = format (s, "encap-dpo-idx %d ", t->next_dpo.dpoi_index);
s = format (s, "decap-next-%U ", format_decap_next, t->decap_next_index);
+ if (t->is_forwarding)
+ {
+ switch (t->forwarding_type)
+ {
+ case GTPU_FORWARD_BAD_HEADER:
+ s = format (s, "forwarding bad-header ");
+ break;
+ case GTPU_FORWARD_UNKNOWN_TEID:
+ s = format (s, "forwarding unknown-teid ");
+ break;
+ case GTPU_FORWARD_UNKNOWN_TYPE:
+ s = format (s, "forwarding unknown-type ");
+ break;
+ }
+ return s;
+ }
+ if (t->pdu_extension != 0)
+ s = format (s, "pdu-enabled qfi %d ", t->qfi);
+ else
+ s = format (s, "pdu-disabled ");
+
if (PREDICT_FALSE (ip46_address_is_multicast (&t->dst)))
s = format (s, "mcast-sw-if-idx %d ", t->mcast_sw_if_index);
@@ -128,14 +152,12 @@ gtpu_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
return /* no error */ 0;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (gtpu_device_class,static) = {
.name = "GTPU",
.format_device_name = format_gtpu_name,
.format_tx_trace = format_gtpu_encap_trace,
.admin_up_down_function = gtpu_interface_admin_up_down,
};
-/* *INDENT-ON* */
static u8 *
format_gtpu_header_with_length (u8 * s, va_list * args)
@@ -145,7 +167,6 @@ format_gtpu_header_with_length (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (gtpu_hw_class) =
{
.name = "GTPU",
@@ -153,7 +174,6 @@ VNET_HW_INTERFACE_CLASS (gtpu_hw_class) =
.build_rewrite = default_build_rewrite,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
static void
gtpu_tunnel_restack_dpo (gtpu_tunnel_t * t)
@@ -224,15 +244,18 @@ const static fib_node_vft_t gtpu_vft = {
.fnv_back_walk = gtpu_tunnel_back_walk,
};
-
-#define foreach_copy_field \
-_(teid) \
-_(tteid) \
-_(mcast_sw_if_index) \
-_(encap_fib_index) \
-_(decap_next_index) \
-_(src) \
-_(dst)
+#define foreach_copy_field \
+ _ (teid) \
+ _ (tteid) \
+ _ (mcast_sw_if_index) \
+ _ (encap_fib_index) \
+ _ (decap_next_index) \
+ _ (src) \
+ _ (dst) \
+ _ (pdu_extension) \
+ _ (qfi) \
+ _ (is_forwarding) \
+ _ (forwarding_type)
static void
ip_udp_gtpu_rewrite (gtpu_tunnel_t * t, bool is_ip6)
@@ -251,12 +274,15 @@ ip_udp_gtpu_rewrite (gtpu_tunnel_t * t, bool is_ip6)
udp_header_t *udp;
gtpu_header_t *gtpu;
+ gtpu_ext_with_pdu_session_header_t *gtpu_ext_pdu;
+ i64 length_adjustment = 0;
/* Fixed portion of the (outer) ip header */
if (!is_ip6)
{
ip4_header_t *ip = &r.h4->ip4;
udp = &r.h4->udp;
gtpu = &r.h4->gtpu;
+ gtpu_ext_pdu = &r.h4->gtpu_ext;
ip->ip_version_and_header_length = 0x45;
ip->ttl = 254;
ip->protocol = IP_PROTOCOL_UDP;
@@ -272,6 +298,7 @@ ip_udp_gtpu_rewrite (gtpu_tunnel_t * t, bool is_ip6)
ip6_header_t *ip = &r.h6->ip6;
udp = &r.h6->udp;
gtpu = &r.h6->gtpu;
+ gtpu_ext_pdu = &r.h6->gtpu_ext;
ip->ip_version_traffic_class_and_flow_label =
clib_host_to_net_u32 (6 << 28);
ip->hop_limit = 255;
@@ -290,9 +317,27 @@ ip_udp_gtpu_rewrite (gtpu_tunnel_t * t, bool is_ip6)
gtpu->type = GTPU_TYPE_GTPU;
gtpu->teid = clib_host_to_net_u32 (t->tteid);
+ if (t->pdu_extension)
+ {
+ gtpu->ver_flags = GTPU_V1_VER | GTPU_PT_GTP | GTPU_E_BIT;
+ gtpu->next_ext_type = GTPU_EXT_HDR_PDU_SESSION_CONTAINER;
+ gtpu_ext_pdu->len = 1;
+ gtpu_ext_pdu->pdu.oct0 = GTPU_PDU_DL_SESSION_TYPE;
+ gtpu_ext_pdu->pdu.oct1 = t->qfi;
+ gtpu_ext_pdu->next_header = 0;
+ }
+ else
+ {
+ // Remove the size of the PDU session header and the optional fields
+ length_adjustment = -sizeof (gtpu_ext_with_pdu_session_header_t) - 4;
+ }
+
t->rewrite = r.rw;
- /* Now only support 8-byte gtpu header. TBD */
- _vec_len (t->rewrite) = sizeof (ip4_gtpu_header_t) - 4;
+ /* Now only support 8-byte gtpu header or 12+4-byte header. TBD */
+ if (!is_ip6)
+ vec_set_len (t->rewrite, sizeof (ip4_gtpu_header_t) + length_adjustment);
+ else
+ vec_set_len (t->rewrite, sizeof (ip6_gtpu_header_t) + length_adjustment);
return;
}
@@ -349,6 +394,139 @@ mcast_shared_remove (ip46_address_t * dst)
hash_unset_mem_free (&gtpu_main.mcast_shared, dst);
}
+int
+vnet_gtpu_add_del_forwarding (vnet_gtpu_add_mod_del_tunnel_args_t *a,
+ u32 *sw_if_indexp)
+{
+ gtpu_main_t *gtm = &gtpu_main;
+ bool is_add;
+ u32 current_index_value, current_index_value_ipv6;
+ u32 address_tabel_ipv4;
+ ip6_address_t address_tabel_ipv6;
+ u32 sw_if_index = ~0;
+ bool is_ip6 = !ip46_address_is_ip4 (&a->dst);
+ int rv;
+ /* Check for errors */
+ if (!a->is_forwarding)
+ {
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+
+ switch (a->opn)
+ {
+ case GTPU_ADD_TUNNEL:
+ is_add = 1;
+ break;
+ case GTPU_DEL_TUNNEL:
+ is_add = 0;
+ break;
+ default:
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+
+ /* Check if the operation is valid, and get the current state if it is.
+ * Handling multiple flags at once is not supported yet. */
+ switch (a->forwarding_type)
+ {
+ case GTPU_FORWARD_BAD_HEADER:
+ current_index_value = gtm->bad_header_forward_tunnel_index_ipv4;
+ current_index_value_ipv6 = gtm->bad_header_forward_tunnel_index_ipv6;
+ address_tabel_ipv4 = GTPU_FORWARD_BAD_HEADER_ADDRESS_IPV4;
+ /* ipv6 is TBD */
+ ip6_address_t address_tabel_ipv6_ = GTPU_FORWARD_BAD_HEADER_ADDRESS_IPV6;
+ address_tabel_ipv6 = address_tabel_ipv6_;
+ break;
+ case GTPU_FORWARD_UNKNOWN_TEID:
+ current_index_value = gtm->unknown_teid_forward_tunnel_index_ipv4;
+ current_index_value_ipv6 = gtm->unknown_teid_forward_tunnel_index_ipv6;
+ address_tabel_ipv4 = GTPU_FORWARD_UNKNOWN_TEID_ADDRESS_IPV4;
+ ip6_address_t address_tabel_ipv6__ =
+ GTPU_FORWARD_UNKNOWN_TEID_ADDRESS_IPV6;
+ address_tabel_ipv6 = address_tabel_ipv6__;
+ break;
+ case GTPU_FORWARD_UNKNOWN_TYPE:
+ current_index_value = gtm->unknown_type_forward_tunnel_index_ipv4;
+ current_index_value_ipv6 = gtm->unknown_type_forward_tunnel_index_ipv6;
+ address_tabel_ipv4 = GTPU_FORWARD_UNKNOWN_TYPE_ADDRESS_IPV4;
+ ip6_address_t address_tabel_ipv6___ =
+ GTPU_FORWARD_UNKNOWN_TYPE_ADDRESS_IPV6;
+ address_tabel_ipv6 = address_tabel_ipv6___;
+ break;
+ default:
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+
+ if (is_ip6)
+ current_index_value = current_index_value_ipv6;
+
+ /* Check if the existing forwarding rule state conflicts with this operation
+ */
+ if ((is_add) && (current_index_value != ~0))
+ {
+ return VNET_API_ERROR_TUNNEL_EXIST;
+ }
+ if (!is_add)
+ {
+ if (current_index_value == ~0)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ /* Clear the tunnel index before deleting the tunnel itself */
+ switch (a->forwarding_type)
+ {
+ case GTPU_FORWARD_BAD_HEADER:
+ gtm->bad_header_forward_tunnel_index_ipv4 = ~0;
+ break;
+ case GTPU_FORWARD_UNKNOWN_TEID:
+ gtm->unknown_teid_forward_tunnel_index_ipv4 = ~0;
+ break;
+ case GTPU_FORWARD_UNKNOWN_TYPE:
+ gtm->unknown_type_forward_tunnel_index_ipv4 = ~0;
+ break;
+ }
+ }
+
+ /* src is the tunnel lookup key, so it is fixed.
+ * dst is used for the new target */
+ a->src = a->dst;
+ if (is_ip6)
+ a->dst.ip6 = address_tabel_ipv6;
+ else
+ a->dst.ip4.as_u32 = address_tabel_ipv4;
+ rv = vnet_gtpu_add_mod_del_tunnel (a, &sw_if_index);
+
+ // Forward only if not nil
+ if (sw_if_indexp)
+ *sw_if_indexp = sw_if_index;
+
+ if (rv != 0)
+ return rv;
+
+ /* Update the forwarding tunnel index */
+ u32 tunnel_index = is_add ? vnet_gtpu_get_tunnel_index (sw_if_index) : ~0;
+ switch (a->forwarding_type)
+ {
+ case GTPU_FORWARD_BAD_HEADER:
+ if (is_ip6)
+ gtm->bad_header_forward_tunnel_index_ipv6 = tunnel_index;
+ else
+ gtm->bad_header_forward_tunnel_index_ipv4 = tunnel_index;
+
+ break;
+ case GTPU_FORWARD_UNKNOWN_TEID:
+ if (is_ip6)
+ gtm->unknown_teid_forward_tunnel_index_ipv6 = tunnel_index;
+ else
+ gtm->unknown_teid_forward_tunnel_index_ipv4 = tunnel_index;
+ break;
+ case GTPU_FORWARD_UNKNOWN_TYPE:
+ if (is_ip6)
+ gtm->unknown_type_forward_tunnel_index_ipv6 = tunnel_index;
+ else
+ gtm->unknown_type_forward_tunnel_index_ipv4 = tunnel_index;
+ break;
+ }
+ return 0;
+}
+
int vnet_gtpu_add_mod_del_tunnel
(vnet_gtpu_add_mod_del_tunnel_args_t * a, u32 * sw_if_indexp)
{
@@ -419,7 +597,7 @@ int vnet_gtpu_add_mod_del_tunnel
vnet_interface_main_t *im = &vnm->interface_main;
hw_if_index = gtm->free_gtpu_tunnel_hw_if_indices
[vec_len (gtm->free_gtpu_tunnel_hw_if_indices) - 1];
- _vec_len (gtm->free_gtpu_tunnel_hw_if_indices) -= 1;
+ vec_dec_len (gtm->free_gtpu_tunnel_hw_if_indices, 1);
hi = vnet_get_hw_interface (vnm, hw_if_index);
hi->dev_instance = t - gtm->tunnels;
@@ -473,7 +651,8 @@ int vnet_gtpu_add_mod_del_tunnel
fib_prefix_t tun_dst_pfx;
vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
- fib_prefix_from_ip46_addr (&t->dst, &tun_dst_pfx);
+ fib_protocol_t fp = fib_ip_proto (is_ip6);
+ fib_prefix_from_ip46_addr (fp, &t->dst, &tun_dst_pfx);
if (!ip46_address_is_multicast (&t->dst))
{
/* Unicast tunnel -
@@ -497,8 +676,6 @@ int vnet_gtpu_add_mod_del_tunnel
* with different VNIs, create the output adjacency only if
* it does not already exist
*/
- fib_protocol_t fp = fib_ip_proto (is_ip6);
-
if (vtep_addr_ref (&gtm->vtep_table,
t->encap_fib_index, &t->dst) == 1)
{
@@ -524,15 +701,16 @@ int vnet_gtpu_add_mod_del_tunnel
* - the forwarding interface is for-us
* - the accepting interface is that from the API
*/
- mfib_table_entry_path_update (t->encap_fib_index,
- &mpfx, MFIB_SOURCE_GTPU, &path);
+ mfib_table_entry_path_update (t->encap_fib_index, &mpfx,
+ MFIB_SOURCE_GTPU,
+ MFIB_ENTRY_FLAG_NONE, &path);
path.frp_sw_if_index = a->mcast_sw_if_index;
path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
path.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT;
- mfei = mfib_table_entry_path_update (t->encap_fib_index,
- &mpfx,
- MFIB_SOURCE_GTPU, &path);
+ mfei = mfib_table_entry_path_update (
+ t->encap_fib_index, &mpfx, MFIB_SOURCE_GTPU,
+ MFIB_ENTRY_FLAG_NONE, &path);
/*
* Create the mcast adjacency to send traffic to the group
@@ -577,6 +755,7 @@ int vnet_gtpu_add_mod_del_tunnel
if (a->tteid == 0)
return VNET_API_ERROR_INVALID_VALUE;
t->tteid = a->tteid;
+ vec_free (t->rewrite);
ip_udp_gtpu_rewrite (t, is_ip6);
return 0;
}
@@ -634,6 +813,22 @@ int vnet_gtpu_add_mod_del_tunnel
return 0;
}
+int
+get_combined_counters (u32 sw_if_index, vlib_counter_t *result_rx,
+ vlib_counter_t *result_tx)
+{
+ gtpu_main_t *gtm = &gtpu_main;
+ vnet_main_t *vnm = gtm->vnet_main;
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vlib_get_combined_counter (im->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_RX,
+ sw_if_index, result_rx);
+ vlib_get_combined_counter (im->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_TX,
+ sw_if_index, result_tx);
+ return 0;
+}
+
static uword
get_decap_next_for_node (u32 node_index, u32 ipv4_set)
{
@@ -689,6 +884,11 @@ gtpu_add_del_tunnel_command_fn (vlib_main_t * vm,
u32 decap_next_index = GTPU_INPUT_NEXT_L2_INPUT;
u32 teid = 0, tteid = 0;
u32 tmp;
+ /* PDU is disabled by default */
+ u8 pdu_extension = 0;
+ u32 qfi = ~0;
+ u8 is_forwarding = 0;
+ u8 forwarding_type = 0;
int rv;
vnet_gtpu_add_mod_del_tunnel_args_t _a, *a = &_a;
u32 tunnel_sw_if_index;
@@ -767,6 +967,8 @@ gtpu_add_del_tunnel_command_fn (vlib_main_t * vm,
;
else if (unformat (line_input, "upd-tteid %d", &tteid))
opn = GTPU_UPD_TTEID;
+ else if (unformat (line_input, "qfi %d", &qfi))
+ pdu_extension = 1;
else
{
error = clib_error_return (0, "parse error: '%U'",
@@ -828,7 +1030,11 @@ gtpu_add_del_tunnel_command_fn (vlib_main_t * vm,
error = clib_error_return (0, "next node not found");
goto done;
}
-
+ if (pdu_extension == 1 && qfi > 31)
+ {
+ error = clib_error_return (0, "qfi max value is 31");
+ goto done;
+ }
clib_memset (a, 0, sizeof (*a));
a->opn = opn;
@@ -879,29 +1085,30 @@ done:
* to span multiple servers. This is done by building an L2 overlay on
* top of an L3 network underlay using GTPU tunnels.
*
- * GTPU can also be used to transport IP packetes as its PDU type to
+ * GTPU can also be used to transport IP packets as its PDU type to
* allow IP forwarding over underlay network, e.g. between RAN and UPF
- * for mobility deplyments.
+ * for mobility deployments.
*
* @cliexpar
* Example of how to create a GTPU Tunnel:
- * @cliexcmd{create gtpu tunnel src 10.0.3.1 dst 10.0.3.3 teid 13 tteid 55 encap-vrf-id 7}
+ * @cliexcmd{create gtpu tunnel src 10.0.3.1 dst 10.0.3.3 teid 13 tteid 55
+ * encap-vrf-id 7}
* Example of how to delete a GTPU Tunnel:
- * @cliexcmd{create gtpu tunnel src 10.0.3.1 dst 10.0.3.3 teid 13 encap-vrf-id 7 del}
+ * @cliexcmd{create gtpu tunnel src 10.0.3.1 dst 10.0.3.3 teid 13 encap-vrf-id
+ * 7 del}
* Example of how to update tx TEID of a GTPU Tunnel:
- * @cliexcmd{create gtpu tunnel src 10.0.3.1 dst 10.0.3.3 encap-vrf-id 7 upd-tteid 55}
+ * @cliexcmd{create gtpu tunnel src 10.0.3.1 dst 10.0.3.3 encap-vrf-id 7
+ * upd-tteid 55}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_gtpu_tunnel_command, static) = {
.path = "create gtpu tunnel",
.short_help =
- "create gtpu tunnel src <local-tep-addr>"
- " {dst <remote-tep-addr>|group <mcast-addr> <intf-name>}"
- " teid <nn> [tteid <nn>] [encap-vrf-id <nn>]"
- " [decap-next [l2|ip4|ip6|node <name>]] [del | upd-tteid <nn>]",
+ "create gtpu tunnel src <local-tep-addr>"
+ " {dst <remote-tep-addr>|group <mcast-addr> <intf-name>}"
+ " teid <nn> [tteid <nn>] [encap-vrf-id <nn>]"
+ " [decap-next [l2|ip4|ip6|node <name>]] [qfi <nn>] [del | upd-tteid <nn>]",
.function = gtpu_add_del_tunnel_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_gtpu_tunnel_command_fn (vlib_main_t * vm,
@@ -928,16 +1135,15 @@ show_gtpu_tunnel_command_fn (vlib_main_t * vm,
* @cliexpar
* Example of how to display the GTPU Tunnel entries:
* @cliexstart{show gtpu tunnel}
- * [0] src 10.0.3.1 dst 10.0.3.3 teid 13 tx-teid 55 encap_fib_index 0 sw_if_index 5 decap_next l2
+ * [0] src 10.0.3.1 dst 10.0.3.3 teid 13 tx-teid 55 encap_fib_index 0
+ sw_if_index 5 decap_next l2 pdu-disabled
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_gtpu_tunnel_command, static) = {
.path = "show gtpu tunnel",
.short_help = "show gtpu tunnel",
.function = show_gtpu_tunnel_command_fn,
};
-/* *INDENT-ON* */
void
vnet_int_gtpu_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable)
@@ -1004,7 +1210,7 @@ set_ip4_gtpu_bypass (vlib_main_t * vm,
/*?
* This command adds the 'ip4-gtpu-bypass' graph node for a given interface.
* By adding the IPv4 gtpu-bypass graph node to an interface, the node checks
- * for and validate input gtpu packet and bypass ip4-lookup, ip4-local,
+ * for and validate input gtpu packet and bypass ip4-lookup, ip4-local,
* ip4-udp-lookup nodes to speedup gtpu packet forwarding. This node will
* cause extra overhead to for non-gtpu packets which is kept at a minimum.
*
@@ -1043,13 +1249,11 @@ set_ip4_gtpu_bypass (vlib_main_t * vm,
* @cliexcmd{set interface ip gtpu-bypass GigabitEthernet2/0/0 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_gtpu_bypass_command, static) = {
.path = "set interface ip gtpu-bypass",
.function = set_ip4_gtpu_bypass,
.short_help = "set interface ip gtpu-bypass <interface> [del]",
};
-/* *INDENT-ON* */
static clib_error_t *
set_ip6_gtpu_bypass (vlib_main_t * vm,
@@ -1061,7 +1265,7 @@ set_ip6_gtpu_bypass (vlib_main_t * vm,
/*?
* This command adds the 'ip6-gtpu-bypass' graph node for a given interface.
* By adding the IPv6 gtpu-bypass graph node to an interface, the node checks
- * for and validate input gtpu packet and bypass ip6-lookup, ip6-local,
+ * for and validate input gtpu packet and bypass ip6-lookup, ip6-local,
* ip6-udp-lookup nodes to speedup gtpu packet forwarding. This node will
* cause extra overhead to for non-gtpu packets which is kept at a minimum.
*
@@ -1100,13 +1304,11 @@ set_ip6_gtpu_bypass (vlib_main_t * vm,
* @cliexcmd{set interface ip6 gtpu-bypass GigabitEthernet2/0/0 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip6_gtpu_bypass_command, static) = {
.path = "set interface ip6 gtpu-bypass",
.function = set_ip6_gtpu_bypass,
.short_help = "set interface ip6 gtpu-bypass <interface> [del]",
};
-/* *INDENT-ON* */
int
vnet_gtpu_add_del_rx_flow (u32 hw_if_index, u32 t_index, int is_add)
@@ -1229,14 +1431,145 @@ gtpu_offload_command_fn (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (gtpu_offload_command, static) = {
.path = "set flow-offload gtpu",
.short_help =
"set flow-offload gtpu hw <inerface-name> rx <tunnel-name> [del]",
.function = gtpu_offload_command_fn,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+gtpu_forward_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ u32 tunnel_sw_if_index;
+ clib_error_t *error = NULL;
+
+ u32 decap_next_index = GTPU_INPUT_NEXT_L2_INPUT;
+
+ int is_add = 1;
+ u8 dst_set = 0;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+ ip46_address_t src, dst;
+ u32 encap_fib_index = 0;
+ u32 mcast_sw_if_index = ~0;
+ u32 teid = 0, tteid = 0;
+ u32 tmp;
+ /* PDU is disabled by default */
+ u8 pdu_extension = 0;
+ u32 qfi = ~0;
+ u8 is_forwarding = 1;
+ u8 forwarding_type = 0;
+ int rv;
+ vnet_gtpu_add_mod_del_tunnel_args_t _a, *a = &_a;
+
+ /* Cant "universally zero init" (={0}) due to GCC bug 53119 */
+ clib_memset (&src, 0, sizeof src);
+ clib_memset (&dst, 0, sizeof dst);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "dst %U", unformat_ip4_address, &dst.ip4))
+ {
+ dst_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "dst %U", unformat_ip6_address, &dst.ip6))
+ {
+ dst_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "decap-next %U", unformat_decap_next,
+ &decap_next_index, ipv4_set))
+ ;
+ else if (unformat (line_input, "encap-vrf-id %d", &tmp))
+ {
+ encap_fib_index = fib_table_find (fib_ip_proto (ipv6_set), tmp);
+ if (encap_fib_index == ~0)
+ {
+ error =
+ clib_error_return (0, "nonexistent encap-vrf-id %d", tmp);
+ goto done;
+ }
+ }
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "bad-header"))
+ forwarding_type |= GTPU_FORWARD_BAD_HEADER;
+ else if (unformat (line_input, "unknown-teid"))
+ forwarding_type |= GTPU_FORWARD_UNKNOWN_TEID;
+ else if (unformat (line_input, "unknown-type"))
+ forwarding_type |= GTPU_FORWARD_UNKNOWN_TYPE;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (!dst_set)
+ {
+ error = clib_error_return (0, "dst must be set to a valid IP address");
+ goto done;
+ }
+
+ a->opn = is_add ? GTPU_ADD_TUNNEL : GTPU_DEL_TUNNEL;
+#define _(x) a->x = x;
+ foreach_copy_field;
+#undef _
+
+ rv = vnet_gtpu_add_del_forwarding (a, &tunnel_sw_if_index);
+
+ switch (rv)
+ {
+ case 0:
+ if (is_add)
+ vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
+ vnet_get_main (), tunnel_sw_if_index);
+ break;
+
+ case VNET_API_ERROR_TUNNEL_EXIST:
+ error = clib_error_return (0, "tunnel already exists...");
+ goto done;
+
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "tunnel does not exist...");
+ goto done;
+
+ case VNET_API_ERROR_INVALID_ARGUMENT:
+ error =
+ clib_error_return (0, "one and only one of unknown-teid, unknown-type "
+ "or bad-header must be specified");
+ goto done;
+
+ default:
+ error =
+ clib_error_return (0, "vnet_gtpu_add_del_tunnel returned %d", rv);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (gtpu_forward_command, static) = {
+ .path = "create gtpu forward",
+ .short_help =
+ "create gtpu forward dst <local-tep-addr> "
+ "{unknown-teid|unknown-type|bad-header} "
+ "[decap-next [l2|ip4|ip6|node <name>]] [encap-vrf-id <nn>] [del]",
+ .function = gtpu_forward_command_fn,
+};
clib_error_t *
gtpu_init (vlib_main_t * vm)
@@ -1258,19 +1591,25 @@ gtpu_init (vlib_main_t * vm)
sizeof (ip46_address_t),
sizeof (mcast_shared_t));
- gtm->fib_node_type = fib_node_register_new_type (&gtpu_vft);
+ gtm->fib_node_type = fib_node_register_new_type ("gtpu", &gtpu_vft);
+
+ /* Clear forward tunnels */
+ gtm->bad_header_forward_tunnel_index_ipv4 = ~0;
+ gtm->unknown_teid_forward_tunnel_index_ipv4 = ~0;
+ gtm->unknown_type_forward_tunnel_index_ipv4 = ~0;
+ gtm->bad_header_forward_tunnel_index_ipv6 = ~0;
+ gtm->unknown_teid_forward_tunnel_index_ipv6 = ~0;
+ gtm->unknown_type_forward_tunnel_index_ipv6 = ~0;
return 0;
}
VLIB_INIT_FUNCTION (gtpu_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "GPRS Tunnelling Protocol, User Data (GTPv1-U)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/gtpu/gtpu.h b/src/plugins/gtpu/gtpu.h
index 59e340148fb..881fbca936a 100644
--- a/src/plugins/gtpu/gtpu.h
+++ b/src/plugins/gtpu/gtpu.h
@@ -53,21 +53,56 @@
* 12 Next Extension Header Type3) 4)
**/
-typedef struct
-{
+typedef CLIB_PACKED (struct {
u8 ver_flags;
u8 type;
u16 length; /* length in octets of the data following the fixed part of the header */
u32 teid;
+ /* The following fields exists if and only if one or more of E, S or PN
+ * are 1. */
u16 sequence;
u8 pdu_number;
u8 next_ext_type;
-} gtpu_header_t;
+}) gtpu_header_t;
-#define GTPU_V1_HDR_LEN 8
+typedef CLIB_PACKED (struct {
+ u8 type;
+ u8 len;
+ u16 pad;
+}) gtpu_ext_header_t;
+
+/**
+ * DL PDU SESSION INFORMATION (PDU Type 0):
+ * (3GPP TS 38.415)
+ * Bits
+ * Octets 8 7 6 5 4 3 2 1
+ * 1 type qmp snp spare
+ * 2 ppp rqi qos_fi
+ *
+ * UL PDU SESSION INFORMATION (PDU Type 1):
+ * Bits
+ * Octets 8 7 6 5 4 3 2 1
+ * 1 type qmp DL d. UL d. snp
+ * 2 n3/n9 delay new IE qos_fi
+ **/
+typedef CLIB_PACKED (struct {
+ u8 oct0;
+ u8 oct1;
+ // Extensions are supported
+}) pdu_session_container_t;
+
+STATIC_ASSERT_SIZEOF (pdu_session_container_t, 2);
+typedef CLIB_PACKED (struct {
+ u8 len;
+ pdu_session_container_t pdu;
+ u8 next_header;
+}) gtpu_ext_with_pdu_session_header_t;
+
+#define GTPU_V1_HDR_LEN 8
#define GTPU_VER_MASK (7<<5)
#define GTPU_PT_BIT (1<<4)
+#define GTPU_RES_BIT (1 << 3)
#define GTPU_E_BIT (1<<2)
#define GTPU_S_BIT (1<<1)
#define GTPU_PN_BIT (1<<0)
@@ -78,25 +113,51 @@ typedef struct
#define GTPU_PT_GTP (1<<4)
#define GTPU_TYPE_GTPU 255
-/* *INDENT-OFF* */
+#define GTPU_EXT_HDR_PDU_SESSION_CONTAINER 133
+#define GTPU_NO_MORE_EXT_HDR 0
+#define GTPU_PDU_DL_SESSION_TYPE 0
+#define GTPU_PDU_UL_SESSION_TYPE (1 << 4)
+
+#define GTPU_FORWARD_BAD_HEADER (1 << 0)
+#define GTPU_FORWARD_UNKNOWN_TEID (1 << 1)
+#define GTPU_FORWARD_UNKNOWN_TYPE (1 << 2)
+
+/* the ipv4 addresses used for the forwarding tunnels. 127.0.0.127 - .129. */
+#define GTPU_FORWARD_BAD_HEADER_ADDRESS_IPV4 0x7f00007fu
+#define GTPU_FORWARD_UNKNOWN_TEID_ADDRESS_IPV4 0x8000007fu
+#define GTPU_FORWARD_UNKNOWN_TYPE_ADDRESS_IPV4 0x8100007fu
+
+/* the ipv6 addresses used for the forwarding tunnels.
+ * 2001:db8:ffff:ffff:ffff:ffff:ffff:fffd -
+ * 2001:db8:ffff:ffff:ffff:ffff:ffff:ffff*/
+#define GTPU_FORWARD_BAD_HEADER_ADDRESS_IPV6 \
+ { \
+ .as_u64[0] = 0xffffffffb80d0120ull, .as_u64[1] = 0xfdffffffffffffffull \
+ }
+#define GTPU_FORWARD_UNKNOWN_TEID_ADDRESS_IPV6 \
+ { \
+ .as_u64[0] = 0xffffffffb80d0120ull, .as_u64[1] = 0xfeffffffffffffffull \
+ }
+#define GTPU_FORWARD_UNKNOWN_TYPE_ADDRESS_IPV6 \
+ { \
+ .as_u64[0] = 0xffffffffb80d0120ull, .as_u64[1] = 0xffffffffffffffffull \
+ }
typedef CLIB_PACKED(struct
{
ip4_header_t ip4; /* 20 bytes */
udp_header_t udp; /* 8 bytes */
gtpu_header_t gtpu; /* 12 bytes */
+ gtpu_ext_with_pdu_session_header_t gtpu_ext; /* 4 bytes */
}) ip4_gtpu_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct
{
ip6_header_t ip6; /* 40 bytes */
udp_header_t udp; /* 8 bytes */
- gtpu_header_t gtpu; /* 8 bytes */
+ gtpu_header_t gtpu; /* 12 bytes */
+ gtpu_ext_with_pdu_session_header_t gtpu_ext; /* 4 bytes */
}) ip6_gtpu_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED
(struct {
/*
@@ -111,9 +172,7 @@ typedef CLIB_PACKED
u64 as_u64;
};
}) gtpu4_tunnel_key_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED
(struct {
/*
@@ -123,7 +182,6 @@ typedef CLIB_PACKED
ip6_address_t src;
u32 teid;
}) gtpu6_tunnel_key_t;
-/* *INDENT-ON* */
typedef struct
{
@@ -157,6 +215,14 @@ typedef struct
u32 sw_if_index;
u32 hw_if_index;
+ /* PDU session container extension enable/disable */
+ u8 pdu_extension;
+ u8 qfi;
+
+ /* The tunnel is used for forwarding */
+ u8 is_forwarding;
+ u8 forwarding_type;
+
/**
* Linkage into the FIB object graph
*/
@@ -232,14 +298,25 @@ typedef struct
/* API message ID base */
u16 msg_id_base;
+ /* Handle GTP packets of unknown type like echo and error indication,
+ * unknown teid or bad version/header.
+ * All packets will be forwarded to a new IP address,
+ * so that they can be processes outside vpp.
+ * If not set then packets are dropped.
+ * One of more indexes can be unused (~0). */
+ u32 bad_header_forward_tunnel_index_ipv4;
+ u32 unknown_teid_forward_tunnel_index_ipv4;
+ u32 unknown_type_forward_tunnel_index_ipv4;
+ u32 bad_header_forward_tunnel_index_ipv6;
+ u32 unknown_teid_forward_tunnel_index_ipv6;
+ u32 unknown_type_forward_tunnel_index_ipv6;
+
/* convenience */
vlib_main_t *vlib_main;
vnet_main_t *vnet_main;
u32 flow_id_start;
/* cache for last 8 gtpu tunnel */
-#ifdef CLIB_HAVE_VEC512
vtep4_cache_t vtep4_u512;
-#endif
} gtpu_main_t;
@@ -265,8 +342,15 @@ typedef struct
u32 decap_next_index;
u32 teid; /* local or rx teid */
u32 tteid; /* remote or tx teid */
+ u8 pdu_extension;
+ u8 qfi;
+ u8 is_forwarding;
+ u8 forwarding_type;
} vnet_gtpu_add_mod_del_tunnel_args_t;
+int vnet_gtpu_add_del_forwarding (vnet_gtpu_add_mod_del_tunnel_args_t *a,
+ u32 *sw_if_indexp);
+
int vnet_gtpu_add_mod_del_tunnel
(vnet_gtpu_add_mod_del_tunnel_args_t * a, u32 * sw_if_indexp);
@@ -274,11 +358,15 @@ typedef struct
{
u32 tunnel_index;
u32 tteid;
+ u8 pdu_extension;
+ u8 qfi;
} gtpu_encap_trace_t;
void vnet_int_gtpu_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable);
u32 vnet_gtpu_get_tunnel_index (u32 sw_if_index);
int vnet_gtpu_add_del_rx_flow (u32 hw_if_index, u32 t_imdex, int is_add);
+int get_combined_counters (u32 sw_if_index, vlib_counter_t *result_rx,
+ vlib_counter_t *result_tx);
#endif /* included_vnet_gtpu_h */
diff --git a/src/plugins/gtpu/gtpu_api.c b/src/plugins/gtpu/gtpu_api.c
index 77432bae4fa..4efd9ac3bba 100644
--- a/src/plugins/gtpu/gtpu_api.c
+++ b/src/plugins/gtpu/gtpu_api.c
@@ -124,6 +124,10 @@ static void vl_api_gtpu_add_del_tunnel_t_handler
.decap_next_index = ntohl (mp->decap_next_index),
.teid = ntohl (mp->teid),
.tteid = ntohl (mp->tteid),
+ .pdu_extension = 0,
+ .qfi = 0,
+ .is_forwarding = 0,
+ .forwarding_type = 0,
};
ip_address_decode (&mp->dst_address, &a.dst);
ip_address_decode (&mp->src_address, &a.src);
@@ -154,12 +158,70 @@ static void vl_api_gtpu_add_del_tunnel_t_handler
rv = vnet_gtpu_add_mod_del_tunnel (&a, &sw_if_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_GTPU_ADD_DEL_TUNNEL_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
+}
+
+static void
+vl_api_gtpu_add_del_tunnel_v2_t_handler (vl_api_gtpu_add_del_tunnel_v2_t *mp)
+{
+ vl_api_gtpu_add_del_tunnel_v2_reply_t *rmp;
+ int rv = 0;
+ vlib_counter_t result_rx;
+ vlib_counter_t result_tx;
+ gtpu_main_t *gtm = &gtpu_main;
+
+ vnet_gtpu_add_mod_del_tunnel_args_t a = {
+ .opn = mp->is_add ? GTPU_ADD_TUNNEL : GTPU_DEL_TUNNEL,
+ .mcast_sw_if_index = ntohl (mp->mcast_sw_if_index),
+ .decap_next_index = ntohl (mp->decap_next_index),
+ .teid = ntohl (mp->teid),
+ .tteid = ntohl (mp->tteid),
+ .pdu_extension = mp->pdu_extension ? 1 : 0,
+ .qfi = mp->qfi,
+ .is_forwarding = 0,
+ .forwarding_type = 0,
+ };
+ ip_address_decode (&mp->dst_address, &a.dst);
+ ip_address_decode (&mp->src_address, &a.src);
+
+ u8 is_ipv6 = !ip46_address_is_ip4 (&a.dst);
+ a.encap_fib_index =
+ fib_table_find (fib_ip_proto (is_ipv6), ntohl (mp->encap_vrf_id));
+ if (a.encap_fib_index == ~0)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_FIB;
+ goto out;
+ }
+
+ /* Check src & dst are different */
+ if (ip46_address_cmp (&a.dst, &a.src) == 0)
+ {
+ rv = VNET_API_ERROR_SAME_SRC_DST;
+ goto out;
+ }
+ if (ip46_address_is_multicast (&a.dst) &&
+ !vnet_sw_if_index_is_api_valid (a.mcast_sw_if_index))
+ {
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ goto out;
+ }
+
+ u32 sw_if_index = ~0;
+ rv = vnet_gtpu_add_mod_del_tunnel (&a, &sw_if_index);
+ get_combined_counters (sw_if_index, &result_rx, &result_tx);
+
+out:
+ REPLY_MACRO2 (
+ VL_API_GTPU_ADD_DEL_TUNNEL_V2_REPLY, ({
+ rmp->sw_if_index = ntohl (sw_if_index);
+ rmp->counters.packets_rx = clib_net_to_host_u64 (result_rx.packets);
+ rmp->counters.packets_tx = clib_net_to_host_u64 (result_tx.packets);
+ rmp->counters.bytes_rx = clib_net_to_host_u64 (result_rx.bytes);
+ rmp->counters.bytes_tx = clib_net_to_host_u64 (result_tx.bytes);
+ }));
}
static void vl_api_gtpu_tunnel_update_tteid_t_handler
@@ -238,12 +300,10 @@ vl_api_gtpu_tunnel_dump_t_handler (vl_api_gtpu_tunnel_dump_t * mp)
if (~0 == sw_if_index)
{
- /* *INDENT-OFF* */
pool_foreach (t, gtm->tunnels)
{
send_gtpu_tunnel_details(t, reg, mp->context);
- }
- /* *INDENT-ON* */
+ }
}
else
{
@@ -257,6 +317,184 @@ vl_api_gtpu_tunnel_dump_t_handler (vl_api_gtpu_tunnel_dump_t * mp)
}
}
+static void
+send_gtpu_tunnel_details_v2 (gtpu_tunnel_t *t, vl_api_registration_t *reg,
+ u32 context)
+{
+ vl_api_gtpu_tunnel_v2_details_t *rmp;
+ vlib_counter_t result_rx;
+ vlib_counter_t result_tx;
+ gtpu_main_t *gtm = &gtpu_main;
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ u8 is_ipv6 = !ip46_address_is_ip4 (&t->dst);
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_GTPU_TUNNEL_V2_DETAILS + gtm->msg_id_base);
+
+ ip_address_encode (&t->src, is_ipv6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
+ &rmp->src_address);
+ ip_address_encode (&t->dst, is_ipv6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
+ &rmp->dst_address);
+
+ rmp->encap_vrf_id = is_ipv6 ?
+ htonl (im6->fibs[t->encap_fib_index].ft_table_id) :
+ htonl (im4->fibs[t->encap_fib_index].ft_table_id);
+ rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index);
+ rmp->teid = htonl (t->teid);
+ rmp->tteid = htonl (t->tteid);
+ rmp->decap_next_index = htonl (t->decap_next_index);
+ rmp->sw_if_index = htonl (t->sw_if_index);
+ rmp->context = context;
+ rmp->pdu_extension = t->pdu_extension;
+ rmp->qfi = t->qfi;
+ rmp->is_forwarding = t->is_forwarding;
+ rmp->forwarding_type = htonl (t->forwarding_type);
+
+ get_combined_counters (t->sw_if_index, &result_rx, &result_tx);
+ rmp->counters.packets_rx = clib_net_to_host_u64 (result_rx.packets);
+ rmp->counters.packets_tx = clib_net_to_host_u64 (result_tx.packets);
+ rmp->counters.bytes_rx = clib_net_to_host_u64 (result_rx.bytes);
+ rmp->counters.bytes_tx = clib_net_to_host_u64 (result_tx.bytes);
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_gtpu_tunnel_v2_dump_t_handler (vl_api_gtpu_tunnel_v2_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ gtpu_main_t *gtm = &gtpu_main;
+ gtpu_tunnel_t *t;
+ u32 sw_if_index;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (~0 == sw_if_index)
+ {
+ pool_foreach (t, gtm->tunnels)
+ {
+ send_gtpu_tunnel_details_v2 (t, reg, mp->context);
+ }
+ }
+ else
+ {
+ if ((sw_if_index >= vec_len (gtm->tunnel_index_by_sw_if_index)) ||
+ (~0 == gtm->tunnel_index_by_sw_if_index[sw_if_index]))
+ {
+ return;
+ }
+ t = &gtm->tunnels[gtm->tunnel_index_by_sw_if_index[sw_if_index]];
+ send_gtpu_tunnel_details_v2 (t, reg, mp->context);
+ }
+}
+
+static void
+vl_api_gtpu_add_del_forward_t_handler (vl_api_gtpu_add_del_forward_t *mp)
+{
+ vl_api_gtpu_add_del_forward_reply_t *rmp;
+ int rv = 0;
+ gtpu_main_t *gtm = &gtpu_main;
+
+ vnet_gtpu_add_mod_del_tunnel_args_t a = {
+ .opn = mp->is_add ? GTPU_ADD_TUNNEL : GTPU_DEL_TUNNEL,
+ .mcast_sw_if_index = 0,
+ .decap_next_index = ntohl (mp->decap_next_index),
+ .teid = 0,
+ .tteid = 0,
+ .pdu_extension = 0,
+ .qfi = 0,
+ .is_forwarding = 1,
+ .forwarding_type = ntohl (mp->forwarding_type),
+ };
+ ip_address_decode (&mp->dst_address, &a.dst);
+ /* Will be overwritten later */
+ ip_address_decode (&mp->dst_address, &a.src);
+
+ u8 is_ipv6 = !ip46_address_is_ip4 (&a.dst);
+ a.encap_fib_index =
+ fib_table_find (fib_ip_proto (is_ipv6), ntohl (mp->encap_vrf_id));
+
+ if (a.encap_fib_index == ~0)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_FIB;
+ goto out;
+ }
+
+ if (ip46_address_is_multicast (&a.dst) &&
+ !vnet_sw_if_index_is_api_valid (a.mcast_sw_if_index))
+ {
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ goto out;
+ }
+
+ u32 sw_if_index = ~0;
+ rv = vnet_gtpu_add_del_forwarding (&a, &sw_if_index);
+
+out:
+ REPLY_MACRO2 (VL_API_GTPU_ADD_DEL_FORWARD_REPLY,
+ ({ rmp->sw_if_index = ntohl (sw_if_index); }));
+}
+
+static void
+vl_api_gtpu_get_transfer_counts_t_handler (
+ vl_api_gtpu_get_transfer_counts_t *mp)
+{
+ vl_api_gtpu_get_transfer_counts_reply_t *rmp;
+ int rv = 0;
+ vlib_counter_t result_rx;
+ vlib_counter_t result_tx;
+ gtpu_main_t *gtm = &gtpu_main;
+ u32 count = 0;
+ u32 sw_if_index;
+ u32 capacity = ntohl (mp->capacity);
+ u32 sw_if_index_start = ntohl (mp->sw_if_index_start);
+ int extra_size = sizeof (rmp->tunnels[0]) * capacity;
+
+ if (sw_if_index_start >= vec_len (gtm->tunnel_index_by_sw_if_index))
+ {
+ capacity = 0;
+ extra_size = 0;
+ }
+ sw_if_index = sw_if_index_start;
+
+ REPLY_MACRO4 (
+ VL_API_GTPU_GET_TRANSFER_COUNTS_REPLY, extra_size, ({
+ for (; count < capacity; sw_if_index++)
+ {
+ if (sw_if_index >= vec_len (gtm->tunnel_index_by_sw_if_index))
+ {
+ // No more tunnels
+ break;
+ }
+ if (~0 == gtm->tunnel_index_by_sw_if_index[sw_if_index])
+ {
+ // Skip inactive/deleted tunnel
+ continue;
+ }
+ rmp->tunnels[count].sw_if_index = htonl (sw_if_index);
+ rmp->tunnels[count].reserved = 0;
+
+ get_combined_counters (sw_if_index, &result_rx, &result_tx);
+ rmp->tunnels[count].counters.packets_rx =
+ clib_net_to_host_u64 (result_rx.packets);
+ rmp->tunnels[count].counters.packets_tx =
+ clib_net_to_host_u64 (result_tx.packets);
+ rmp->tunnels[count].counters.bytes_rx =
+ clib_net_to_host_u64 (result_rx.bytes);
+ rmp->tunnels[count].counters.bytes_tx =
+ clib_net_to_host_u64 (result_tx.bytes);
+ count++;
+ }
+ rmp->count = htonl (count);
+ }));
+}
+
#include <gtpu/gtpu.api.c>
static clib_error_t *
gtpu_api_hookup (vlib_main_t * vm)
diff --git a/src/plugins/gtpu/gtpu_decap.c b/src/plugins/gtpu/gtpu_decap.c
index 40243dbcc53..093d85ef13c 100644
--- a/src/plugins/gtpu/gtpu_decap.c
+++ b/src/plugins/gtpu/gtpu_decap.c
@@ -26,6 +26,8 @@ typedef struct {
u32 tunnel_index;
u32 error;
u32 teid;
+ gtpu_header_t header;
+ u8 forwarding_type;
} gtpu_rx_trace_t;
static u8 * format_gtpu_rx_trace (u8 * s, va_list * args)
@@ -36,14 +38,29 @@ static u8 * format_gtpu_rx_trace (u8 * s, va_list * args)
if (t->tunnel_index != ~0)
{
- s = format (s, "GTPU decap from gtpu_tunnel%d teid %d next %d error %d",
- t->tunnel_index, t->teid, t->next_index, t->error);
+ s = format (s, "GTPU decap from gtpu_tunnel%d ", t->tunnel_index);
+ switch (t->forwarding_type)
+ {
+ case GTPU_FORWARD_BAD_HEADER:
+ s = format (s, "forwarding bad-header ");
+ break;
+ case GTPU_FORWARD_UNKNOWN_TEID:
+ s = format (s, "forwarding unknown-teid ");
+ break;
+ case GTPU_FORWARD_UNKNOWN_TYPE:
+ s = format (s, "forwarding unknown-type ");
+ break;
+ }
+ s = format (s, "teid %u, ", t->teid);
}
else
{
- s = format (s, "GTPU decap error - tunnel for teid %d does not exist",
+ s = format (s, "GTPU decap error - tunnel for teid %u does not exist, ",
t->teid);
}
+ s = format (s, "next %d error %d, ", t->next_index, t->error);
+ s = format (s, "flags: 0x%x, type: %d, length: %d", t->header.ver_flags,
+ t->header.type, t->header.length);
return s;
}
@@ -53,6 +70,7 @@ validate_gtpu_fib (vlib_buffer_t *b, gtpu_tunnel_t *t, u32 is_ip4)
return t->encap_fib_index == vlib_buffer_get_ip_fib_index (b, is_ip4);
}
+// Gets run with every input
always_inline uword
gtpu_input (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -75,28 +93,41 @@ gtpu_input (vlib_main_t * vm,
else
clib_memset (&last_key6, 0xff, sizeof (last_key6));
+ // Where is the framevector coming from
from = vlib_frame_vector_args (from_frame);
+ // number of packets left in frame
n_left_from = from_frame->n_vectors;
+ // whats the next node it needs to go to
next_index = node->cached_next_index;
+ // stats from the next interface
stats_sw_if_index = node->runtime_data[0];
+ // number of packets processed
stats_n_packets = stats_n_bytes = 0;
+ // run until no more packets left in vectorframe
while (n_left_from > 0)
{
u32 n_left_to_next;
+ // get vectorframe to process
vlib_get_next_frame (vm, node, next_index,
to_next, n_left_to_next);
+ // while there are still more than 4 packets left in frame and more than
+ // two packets in current frame
while (n_left_from >= 4 && n_left_to_next >= 2)
{
- u32 bi0, bi1;
+ // buffer index for loading packet data
+ u32 bi0, bi1;
+ // vlib packet buffer
vlib_buffer_t * b0, * b1;
+ // next operation to do with the packet
u32 next0, next1;
- ip4_header_t * ip4_0, * ip4_1;
- ip6_header_t * ip6_0, * ip6_1;
- gtpu_header_t * gtpu0, * gtpu1;
- u32 gtpu_hdr_len0, gtpu_hdr_len1;
+ // IP4 header type
+ ip4_header_t *ip4_0, *ip4_1;
+ ip6_header_t *ip6_0, *ip6_1;
+ gtpu_header_t *gtpu0, *gtpu1;
+ i32 gtpu_hdr_len0, gtpu_hdr_len1;
uword * p0, * p1;
u32 tunnel_index0, tunnel_index1;
gtpu_tunnel_t * t0, * t1, * mt0 = NULL, * mt1 = NULL;
@@ -106,11 +137,19 @@ gtpu_input (vlib_main_t * vm,
u32 sw_if_index0, sw_if_index1, len0, len1;
u8 has_space0, has_space1;
u8 ver0, ver1;
+ udp_header_t *udp0, *udp1;
+ ip_csum_t sum0, sum1;
+ u32 old0, old1;
+ gtpu_ext_header_t ext = { .type = 0, .len = 0, .pad = 0 };
+ gtpu_ext_header_t *ext0, *ext1;
+ bool is_fast_track0, is_fast_track1;
+ ext0 = ext1 = &ext;
/* Prefetch next iteration. */
{
vlib_buffer_t * p2, * p3;
+ // prefetch 3 and 4
p2 = vlib_get_buffer (vm, from[2]);
p3 = vlib_get_buffer (vm, from[3]);
@@ -121,57 +160,172 @@ gtpu_input (vlib_main_t * vm,
CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
}
+ // getting buffer index from vectorframe
bi0 = from[0];
bi1 = from[1];
+ // pre inserting the packets for the next node
to_next[0] = bi0;
to_next[1] = bi1;
+ // forward in vectorframe
from += 2;
+ // forward next node
to_next += 2;
+ // decimate message counter for next node
n_left_to_next -= 2;
+ // decimate message counter for current progessing node
n_left_from -= 2;
+ // load packets into buffer
b0 = vlib_get_buffer (vm, bi0);
b1 = vlib_get_buffer (vm, bi1);
/* udp leaves current_data pointing at the gtpu header */
- gtpu0 = vlib_buffer_get_current (b0);
- gtpu1 = vlib_buffer_get_current (b1);
- if (is_ip4)
- {
- ip4_0 = (void *)((u8*)gtpu0 - sizeof(udp_header_t) - sizeof(ip4_header_t));
- ip4_1 = (void *)((u8*)gtpu1 - sizeof(udp_header_t) - sizeof(ip4_header_t));
- }
- else
- {
- ip6_0 = (void *)((u8*)gtpu0 - sizeof(udp_header_t) - sizeof(ip6_header_t));
- ip6_1 = (void *)((u8*)gtpu1 - sizeof(udp_header_t) - sizeof(ip6_header_t));
- }
+ // get pointers to the beginnings of the gtpu frame
+ gtpu0 = vlib_buffer_get_current (b0);
+ gtpu1 = vlib_buffer_get_current (b1);
+ if (is_ip4)
+ {
+ ip4_0 = (void *) ((u8 *) gtpu0 - sizeof (udp_header_t) -
+ sizeof (ip4_header_t));
+ ip4_1 = (void *) ((u8 *) gtpu1 - sizeof (udp_header_t) -
+ sizeof (ip4_header_t));
+ }
+ else
+ {
+ ip6_0 = (void *) ((u8 *) gtpu0 - sizeof (udp_header_t) -
+ sizeof (ip6_header_t));
+ ip6_1 = (void *) ((u8 *) gtpu1 - sizeof (udp_header_t) -
+ sizeof (ip6_header_t));
+ }
+ udp0 = (void *) ((u8 *) gtpu0 - sizeof (udp_header_t));
+ udp1 = (void *) ((u8 *) gtpu1 - sizeof (udp_header_t));
- tunnel_index0 = ~0;
- error0 = 0;
+ tunnel_index0 = ~0;
+ error0 = 0;
- tunnel_index1 = ~0;
- error1 = 0;
+ tunnel_index1 = ~0;
+ error1 = 0;
- /* speculatively load gtp header version field */
- ver0 = gtpu0->ver_flags;
- ver1 = gtpu1->ver_flags;
+ /* speculatively load gtp header version field */
+ ver0 = gtpu0->ver_flags;
+ ver1 = gtpu1->ver_flags;
/*
* Manipulate gtpu header
* TBD: Manipulate Sequence Number and N-PDU Number
* TBD: Manipulate Next Extension Header
*/
- gtpu_hdr_len0 = sizeof(gtpu_header_t) - (((ver0 & GTPU_E_S_PN_BIT) == 0) * 4);
- gtpu_hdr_len1 = sizeof(gtpu_header_t) - (((ver1 & GTPU_E_S_PN_BIT) == 0) * 4);
-
- has_space0 = vlib_buffer_has_space (b0, gtpu_hdr_len0);
- has_space1 = vlib_buffer_has_space (b1, gtpu_hdr_len1);
- if (PREDICT_FALSE (((ver0 & GTPU_VER_MASK) != GTPU_V1_VER) | (!has_space0)))
+ /* Perform all test assuming the packet has the needed space.
+ * Check if version 1, not PT, not reserved.
+ * Check message type 255.
+ */
+ is_fast_track0 =
+ ((ver0 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT));
+ is_fast_track0 = is_fast_track0 & (gtpu0->type == 255);
+
+ is_fast_track1 =
+ ((ver1 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT));
+ is_fast_track1 = is_fast_track1 & (gtpu1->type == 255);
+
+ /* Make the header overlap the end of the gtpu_header_t, so
+ * that it starts with the same Next extension header as the
+ * gtpu_header_t.
+ * This means that the gtpu_ext_header_t (ext) has the type
+ * from the previous header and the length from the current one.
+ * Works both for the first gtpu_header_t and all following
+ * gtpu_ext_header_t extensions.
+ * Copy the ext data if the E bit is set, else use the 0 value.
+ */
+ ext0 = (ver0 & GTPU_E_BIT) ?
+ (gtpu_ext_header_t *) &gtpu0->next_ext_type :
+ &ext;
+ ext1 = (ver1 & GTPU_E_BIT) ?
+ (gtpu_ext_header_t *) &gtpu1->next_ext_type :
+ &ext;
+
+ /* One or more of the E, S and PN flags are set, so all 3 fields
+ * must be present:
+ * The gtpu_header_t contains the Sequence number, N-PDU number and
+ * Next extension header type.
+ * If E is not set subtract 4 bytes from the header.
+ * Then add the length of the extension. 0 * 4 if E is not set,
+ * else it's the ext->len from the gtp extension. Length is multiple
+ * of 4 always.
+ * Note: This length is only valid if the header itself is valid,
+ * so it must be verified before use.
+ */
+ gtpu_hdr_len0 = sizeof (gtpu_header_t) -
+ (((ver0 & GTPU_E_S_PN_BIT) == 0) * 4) +
+ ext0->len * 4;
+ gtpu_hdr_len1 = sizeof (gtpu_header_t) -
+ (((ver1 & GTPU_E_S_PN_BIT) == 0) * 4) +
+ ext1->len * 4;
+
+ /* Get the next extension, unconditionally.
+ * If E was not set in the gtp header ext->len is zero.
+ * If E was set ext0 will now point to the packet buffer.
+ * If the gtp packet is illegal this might point outside the buffer.
+ * TBD check the updated for ext0->type != 0, and continue removing
+ * extensions. Only for clarity, will be optimized away.
+ */
+ ext0 += ext0->len * 4 / sizeof (*ext0);
+ ext1 += ext1->len * 4 / sizeof (*ext1);
+
+ /* Check the space, if this is true then ext0 points to a valid
+ * location in the buffer as well.
+ */
+ has_space0 = vlib_buffer_has_space (b0, gtpu_hdr_len0);
+ has_space1 = vlib_buffer_has_space (b1, gtpu_hdr_len1);
+
+ /* Diverge the packet paths for 0 and 1 */
+ if (PREDICT_FALSE ((!is_fast_track0) | (!has_space0)))
{
- error0 = has_space0 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
+ /* Not fast path. ext0 and gtpu_hdr_len0 might be wrong */
+
+ /* GCC will hopefully fix the duplicate compute */
+ if (PREDICT_FALSE (
+ !((ver0 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT)) |
+ (!has_space0)))
+ {
+ /* The header or size is wrong */
+ error0 =
+ has_space0 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+
+ /* This is an unsupported/bad packet.
+ * Check if it is to be forwarded.
+ */
+ if (is_ip4)
+ tunnel_index0 = gtm->bad_header_forward_tunnel_index_ipv4;
+ else
+ tunnel_index0 = gtm->bad_header_forward_tunnel_index_ipv6;
+
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward0;
+
+ goto trace0;
+ }
+ /* Correct version and has the space. It can only be unknown
+ * message type.
+ */
+ error0 = GTPU_ERROR_UNSUPPORTED_TYPE;
next0 = GTPU_INPUT_NEXT_DROP;
+
+ /* This is an error/nonstandard packet
+ * Check if it is to be forwarded. */
+ if (is_ip4)
+ tunnel_index0 = gtm->unknown_type_forward_tunnel_index_ipv4;
+ else
+ tunnel_index0 = gtm->unknown_type_forward_tunnel_index_ipv6;
+
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward0;
+
+ /* The packet is ipv6/not forwarded */
goto trace0;
}
@@ -180,22 +334,31 @@ gtpu_input (vlib_main_t * vm,
key4_0.src = ip4_0->src_address.as_u32;
key4_0.teid = gtpu0->teid;
- /* Make sure GTPU tunnel exist according to packet SIP and teid
- * SIP identify a GTPU path, and teid identify a tunnel in a given GTPU path */
- if (PREDICT_FALSE (key4_0.as_u64 != last_key4.as_u64))
- {
- p0 = hash_get (gtm->gtpu4_tunnel_by_key, key4_0.as_u64);
- if (PREDICT_FALSE (p0 == NULL))
- {
- error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace0;
- }
- last_key4.as_u64 = key4_0.as_u64;
- tunnel_index0 = last_tunnel_index = p0[0];
- }
- else
- tunnel_index0 = last_tunnel_index;
+ /* Make sure GTPU tunnel exist according to packet SourceIP and
+ * teid SourceIP identify a GTPU path, and teid identify a tunnel
+ * in a given GTPU path */
+ if (PREDICT_FALSE (key4_0.as_u64 != last_key4.as_u64))
+ {
+ p0 = hash_get (gtm->gtpu4_tunnel_by_key, key4_0.as_u64);
+ if (PREDICT_FALSE (p0 == NULL))
+ {
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ /* This is a standard packet, but no tunnel was found.
+ * Check if it is to be forwarded. */
+ tunnel_index0 =
+ gtm->unknown_teid_forward_tunnel_index_ipv4;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward0;
+ goto trace0;
+ }
+ last_key4.as_u64 = key4_0.as_u64;
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else // when the address of the packet is the same as the packet
+ // before ... saving lookup in table
+ tunnel_index0 = last_tunnel_index;
+ // tunnel index in vpp
t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
/* Validate GTPU tunnel encap-fib index against packet */
@@ -203,10 +366,13 @@ gtpu_input (vlib_main_t * vm,
{
error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
next0 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index0 = gtm->unknown_teid_forward_tunnel_index_ipv4;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward0;
goto trace0;
}
- /* Validate GTPU tunnel SIP against packet DIP */
+ /* Validate GTPU tunnel SourceIP against packet DestinationIP */
if (PREDICT_TRUE (ip4_0->dst_address.as_u32 == t0->src.ip4.as_u32))
goto next0; /* valid packet */
if (PREDICT_FALSE (ip4_address_is_multicast (&ip4_0->dst_address)))
@@ -223,6 +389,9 @@ gtpu_input (vlib_main_t * vm,
}
error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
next0 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index0 = gtm->unknown_teid_forward_tunnel_index_ipv4;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward0;
goto trace0;
} else /* !is_ip4 */ {
@@ -239,13 +408,19 @@ gtpu_input (vlib_main_t * vm,
{
error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
next0 = GTPU_INPUT_NEXT_DROP;
- goto trace0;
- }
- clib_memcpy_fast (&last_key6, &key6_0, sizeof(key6_0));
- tunnel_index0 = last_tunnel_index = p0[0];
- }
- else
- tunnel_index0 = last_tunnel_index;
+ /* This is a standard packet, but no tunnel was found.
+ * Check if it is to be forwarded. */
+ tunnel_index0 =
+ gtm->unknown_teid_forward_tunnel_index_ipv6;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward0;
+ goto trace0;
+ }
+ clib_memcpy_fast (&last_key6, &key6_0, sizeof (key6_0));
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
/* Validate GTPU tunnel encap-fib index against packet */
@@ -253,6 +428,9 @@ gtpu_input (vlib_main_t * vm,
{
error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
next0 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index0 = gtm->unknown_teid_forward_tunnel_index_ipv6;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward0;
goto trace0;
}
@@ -274,28 +452,85 @@ gtpu_input (vlib_main_t * vm,
}
error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
next0 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index0 = gtm->unknown_teid_forward_tunnel_index_ipv6;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward0;
goto trace0;
}
+ forward0:
+ /* Get the tunnel */
+ t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+
+ /* Validate GTPU tunnel encap-fib index against packet */
+ if (PREDICT_FALSE (validate_gtpu_fib (b0, t0, is_ip4) == 0))
+ {
+ error0 = GTPU_ERROR_NO_ERROR_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+
+ /* Clear the error, next0 will be overwritten by the tunnel */
+ error0 = 0;
+
+ if (is_ip4)
+ {
+ /* Forward packet instead. Push the IP+UDP header */
+ gtpu_hdr_len0 =
+ -(i32) (sizeof (udp_header_t) + sizeof (ip4_header_t));
+ /* Backup the IP4 checksum and address */
+ sum0 = ip4_0->checksum;
+ old0 = ip4_0->dst_address.as_u32;
+
+ /* Update IP address of the packet using the src from the tunnel
+ */
+ ip4_0->dst_address.as_u32 = t0->src.ip4.as_u32;
+
+ /* Fix the IP4 checksum */
+ sum0 = ip_csum_update (sum0, old0, ip4_0->dst_address.as_u32,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip4_0->checksum = ip_csum_fold (sum0);
+ }
+ else
+ {
+ /* Forward packet instead. Push the IP+UDP header */
+ gtpu_hdr_len0 =
+ -(i32) (sizeof (udp_header_t) + sizeof (ip6_header_t));
+ /* IPv6 UDP checksum is mandatory */
+ int bogus = 0;
+ udp0->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip6_0, &bogus);
+ if (udp0->checksum == 0)
+ udp0->checksum = 0xffff;
+ }
next0:
- /* Pop gtpu header */
+ /* Pop/Remove gtpu header from buffered package or push existing
+ * IP+UDP header back to the buffer*/
vlib_buffer_advance (b0, gtpu_hdr_len0);
- next0 = t0->decap_next_index;
- sw_if_index0 = t0->sw_if_index;
- len0 = vlib_buffer_length_in_chain (vm, b0);
+ // where does it need to go in the graph next
+ next0 = t0->decap_next_index;
+ // interface index the package is on
+ sw_if_index0 = t0->sw_if_index;
+ len0 = vlib_buffer_length_in_chain (vm, b0);
- /* Required to make the l2 tag push / pop code work on l2 subifs */
- if (PREDICT_TRUE(next0 == GTPU_INPUT_NEXT_L2_INPUT))
- vnet_update_l2_len (b0);
+ // Next three lines are for forwarding the payload to L2
+ // subinterfaces
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ if (PREDICT_TRUE (next0 == GTPU_INPUT_NEXT_L2_INPUT))
+ vnet_update_l2_len (b0);
- /* Set packet input sw_if_index to unicast GTPU tunnel for learning */
- vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ /* Set packet input sw_if_index to unicast GTPU tunnel for learning
+ */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ // in case its a multicast packet set different interface index
sw_if_index0 = (mt0) ? mt0->sw_if_index : sw_if_index0;
- pkts_decapsulated ++;
- stats_n_packets += 1;
- stats_n_bytes += len0;
+ // Update stats
+ pkts_decapsulated++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
/* Batch stats increment on the same gtpu tunnel so counter
is not incremented per packet */
@@ -324,12 +559,61 @@ gtpu_input (vlib_main_t * vm,
tr->error = error0;
tr->tunnel_index = tunnel_index0;
tr->teid = has_space0 ? clib_net_to_host_u32(gtpu0->teid) : ~0;
- }
- if (PREDICT_FALSE (((ver1 & GTPU_VER_MASK) != GTPU_V1_VER) | (!has_space1)))
+ if (vlib_buffer_has_space (b0, 4))
+ {
+ tr->header.ver_flags = gtpu0->ver_flags;
+ tr->header.type = gtpu0->type;
+ tr->header.length = clib_net_to_host_u16 (gtpu0->length);
+ }
+ }
+
+ /* End of processing for packet 0, start for packet 1 */
+ if (PREDICT_FALSE ((!is_fast_track1) | (!has_space1)))
{
- error1 = has_space1 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
+ /* Not fast path. ext1 and gtpu_hdr_len1 might be wrong */
+
+ /* GCC will hopefully fix the duplicate compute */
+ if (PREDICT_FALSE (
+ !((ver1 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT)) |
+ (!has_space1)))
+ {
+ /* The header or size is wrong */
+ error1 =
+ has_space1 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
+ next1 = GTPU_INPUT_NEXT_DROP;
+
+ /* This is an unsupported/bad packet.
+ * Check if it is to be forwarded.
+ */
+ if (is_ip4)
+ tunnel_index1 = gtm->bad_header_forward_tunnel_index_ipv4;
+ else
+ tunnel_index1 = gtm->bad_header_forward_tunnel_index_ipv6;
+
+ if (PREDICT_FALSE (tunnel_index1 != ~0))
+ goto forward1;
+
+ goto trace1;
+ }
+ /* Correct version and has the space. It can only be unknown
+ * message type.
+ */
+ error1 = GTPU_ERROR_UNSUPPORTED_TYPE;
next1 = GTPU_INPUT_NEXT_DROP;
+
+ /* This is an error/nonstandard packet
+ * Check if it is to be forwarded. */
+ if (is_ip4)
+ tunnel_index1 = gtm->unknown_type_forward_tunnel_index_ipv4;
+ else
+ tunnel_index1 = gtm->unknown_type_forward_tunnel_index_ipv6;
+
+ if (PREDICT_FALSE (tunnel_index1 != ~0))
+ goto forward1;
+
+ /* The packet is ipv6/not forwarded */
goto trace1;
}
@@ -347,20 +631,27 @@ gtpu_input (vlib_main_t * vm,
{
error1 = GTPU_ERROR_NO_SUCH_TUNNEL;
next1 = GTPU_INPUT_NEXT_DROP;
- goto trace1;
- }
- last_key4.as_u64 = key4_1.as_u64;
- tunnel_index1 = last_tunnel_index = p1[0];
- }
- else
- tunnel_index1 = last_tunnel_index;
- t1 = pool_elt_at_index (gtm->tunnels, tunnel_index1);
+ tunnel_index1 =
+ gtm->unknown_teid_forward_tunnel_index_ipv4;
+ if (PREDICT_FALSE (tunnel_index1 != ~0))
+ goto forward1;
+ goto trace1;
+ }
+ last_key4.as_u64 = key4_1.as_u64;
+ tunnel_index1 = last_tunnel_index = p1[0];
+ }
+ else
+ tunnel_index1 = last_tunnel_index;
+ t1 = pool_elt_at_index (gtm->tunnels, tunnel_index1);
/* Validate GTPU tunnel encap-fib index against packet */
if (PREDICT_FALSE (validate_gtpu_fib (b1, t1, is_ip4) == 0))
{
error1 = GTPU_ERROR_NO_SUCH_TUNNEL;
next1 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index1 = gtm->unknown_teid_forward_tunnel_index_ipv4;
+ if (PREDICT_FALSE (tunnel_index1 != ~0))
+ goto forward1;
goto trace1;
}
@@ -381,6 +672,9 @@ gtpu_input (vlib_main_t * vm,
}
error1 = GTPU_ERROR_NO_SUCH_TUNNEL;
next1 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index1 = gtm->unknown_teid_forward_tunnel_index_ipv4;
+ if (PREDICT_FALSE (tunnel_index1 != ~0))
+ goto forward1;
goto trace1;
} else /* !is_ip4 */ {
@@ -398,21 +692,28 @@ gtpu_input (vlib_main_t * vm,
{
error1 = GTPU_ERROR_NO_SUCH_TUNNEL;
next1 = GTPU_INPUT_NEXT_DROP;
- goto trace1;
- }
+ tunnel_index1 =
+ gtm->unknown_teid_forward_tunnel_index_ipv6;
+ if (PREDICT_FALSE (tunnel_index1 != ~0))
+ goto forward1;
+ goto trace1;
+ }
- clib_memcpy_fast (&last_key6, &key6_1, sizeof(key6_1));
- tunnel_index1 = last_tunnel_index = p1[0];
- }
- else
- tunnel_index1 = last_tunnel_index;
- t1 = pool_elt_at_index (gtm->tunnels, tunnel_index1);
+ clib_memcpy_fast (&last_key6, &key6_1, sizeof (key6_1));
+ tunnel_index1 = last_tunnel_index = p1[0];
+ }
+ else
+ tunnel_index1 = last_tunnel_index;
+ t1 = pool_elt_at_index (gtm->tunnels, tunnel_index1);
/* Validate GTPU tunnel encap-fib index against packet */
if (PREDICT_FALSE (validate_gtpu_fib (b1, t1, is_ip4) == 0))
{
error1 = GTPU_ERROR_NO_SUCH_TUNNEL;
next1 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index1 = gtm->unknown_teid_forward_tunnel_index_ipv6;
+ if (PREDICT_FALSE (tunnel_index1 != ~0))
+ goto forward1;
goto trace1;
}
@@ -434,11 +735,63 @@ gtpu_input (vlib_main_t * vm,
}
error1 = GTPU_ERROR_NO_SUCH_TUNNEL;
next1 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index1 = gtm->unknown_teid_forward_tunnel_index_ipv6;
+ if (PREDICT_FALSE (tunnel_index1 != ~0))
+ goto forward1;
goto trace1;
}
+ forward1:
+
+ /* Get the tunnel */
+ t1 = pool_elt_at_index (gtm->tunnels, tunnel_index1);
+
+ /* Validate GTPU tunnel encap-fib index against packet */
+ if (PREDICT_FALSE (validate_gtpu_fib (b1, t1, is_ip4) == 0))
+ {
+ error1 = GTPU_ERROR_NO_ERROR_TUNNEL;
+ next1 = GTPU_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+
+ /* Clear the error, next0 will be overwritten by the tunnel */
+ error1 = 0;
+
+ if (is_ip4)
+ {
+ /* Forward packet instead. Push the IP+UDP header */
+ gtpu_hdr_len1 =
+ -(i32) (sizeof (udp_header_t) + sizeof (ip4_header_t));
+
+ /* Backup the IP4 checksum and address */
+ sum1 = ip4_1->checksum;
+ old1 = ip4_1->dst_address.as_u32;
+
+ /* Update IP address of the packet using the src from the tunnel
+ */
+ ip4_1->dst_address.as_u32 = t1->src.ip4.as_u32;
+
+ /* Fix the IP4 checksum */
+ sum1 = ip_csum_update (sum1, old1, ip4_1->dst_address.as_u32,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip4_1->checksum = ip_csum_fold (sum1);
+ }
+ else
+ {
+ /* Forward packet instead. Push the IP+UDP header */
+ gtpu_hdr_len1 =
+ -(i32) (sizeof (udp_header_t) + sizeof (ip6_header_t));
+
+ /* IPv6 UDP checksum is mandatory */
+ int bogus = 0;
+ udp1->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b1, ip6_1, &bogus);
+ if (udp1->checksum == 0)
+ udp1->checksum = 0xffff;
+ }
next1:
- /* Pop gtpu header */
+ /* Pop gtpu header / push IP+UDP header */
vlib_buffer_advance (b1, gtpu_hdr_len1);
next1 = t1->decap_next_index;
@@ -484,13 +837,21 @@ gtpu_input (vlib_main_t * vm,
tr->error = error1;
tr->tunnel_index = tunnel_index1;
tr->teid = has_space1 ? clib_net_to_host_u32(gtpu1->teid) : ~0;
- }
+ if (vlib_buffer_has_space (b1, 4))
+ {
+ tr->header.ver_flags = gtpu1->ver_flags;
+ tr->header.type = gtpu1->type;
+ tr->header.length = clib_net_to_host_u16 (gtpu1->length);
+ }
+ }
vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
to_next, n_left_to_next,
bi0, bi1, next0, next1);
}
+ /* In case there are less than 4 packets left in frame and packets in
+ current frame aka single processing */
while (n_left_from > 0 && n_left_to_next > 0)
{
u32 bi0;
@@ -499,7 +860,7 @@ gtpu_input (vlib_main_t * vm,
ip4_header_t * ip4_0;
ip6_header_t * ip6_0;
gtpu_header_t * gtpu0;
- u32 gtpu_hdr_len0;
+ i32 gtpu_hdr_len0;
uword * p0;
u32 tunnel_index0;
gtpu_tunnel_t * t0, * mt0 = NULL;
@@ -509,6 +870,13 @@ gtpu_input (vlib_main_t * vm,
u32 sw_if_index0, len0;
u8 has_space0;
u8 ver0;
+ udp_header_t *udp0;
+ ip_csum_t sum0;
+ u32 old0;
+ gtpu_ext_header_t ext = { .type = 0, .len = 0, .pad = 0 };
+ gtpu_ext_header_t *ext0;
+ bool is_fast_track0;
+ ext0 = &ext;
bi0 = from[0];
to_next[0] = bi0;
@@ -526,112 +894,197 @@ gtpu_input (vlib_main_t * vm,
} else {
ip6_0 = (void *)((u8*)gtpu0 - sizeof(udp_header_t) - sizeof(ip6_header_t));
}
+ udp0 = (void *) ((u8 *) gtpu0 - sizeof (udp_header_t));
- tunnel_index0 = ~0;
- error0 = 0;
-
- /* speculatively load gtp header version field */
- ver0 = gtpu0->ver_flags;
+ tunnel_index0 = ~0;
+ error0 = 0;
+ /* speculatively load gtp header version field */
+ ver0 = gtpu0->ver_flags;
/*
* Manipulate gtpu header
* TBD: Manipulate Sequence Number and N-PDU Number
* TBD: Manipulate Next Extension Header
*/
- gtpu_hdr_len0 = sizeof(gtpu_header_t) - (((ver0 & GTPU_E_S_PN_BIT) == 0) * 4);
- has_space0 = vlib_buffer_has_space (b0, gtpu_hdr_len0);
+ is_fast_track0 =
+ ((ver0 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT));
+ is_fast_track0 = is_fast_track0 & (gtpu0->type == 255);
- if (PREDICT_FALSE (((ver0 & GTPU_VER_MASK) != GTPU_V1_VER) | (!has_space0)))
- {
- error0 = has_space0 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace00;
- }
+ ext0 = (ver0 & GTPU_E_BIT) ?
+ (gtpu_ext_header_t *) &gtpu0->next_ext_type :
+ &ext;
- if (is_ip4) {
- key4_0.src = ip4_0->src_address.as_u32;
- key4_0.teid = gtpu0->teid;
+ gtpu_hdr_len0 = sizeof (gtpu_header_t) -
+ (((ver0 & GTPU_E_S_PN_BIT) == 0) * 4) +
+ ext0->len * 4;
- /* Make sure GTPU tunnel exist according to packet SIP and teid
- * SIP identify a GTPU path, and teid identify a tunnel in a given GTPU path */
- if (PREDICT_FALSE (key4_0.as_u64 != last_key4.as_u64))
- {
- p0 = hash_get (gtm->gtpu4_tunnel_by_key, key4_0.as_u64);
- if (PREDICT_FALSE (p0 == NULL))
- {
- error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace00;
- }
- last_key4.as_u64 = key4_0.as_u64;
- tunnel_index0 = last_tunnel_index = p0[0];
- }
- else
- tunnel_index0 = last_tunnel_index;
- t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+ ext0 += ext0->len * 4 / sizeof (*ext0);
- /* Validate GTPU tunnel encap-fib index against packet */
- if (PREDICT_FALSE (validate_gtpu_fib (b0, t0, is_ip4) == 0))
- {
- error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace00;
- }
+ has_space0 = vlib_buffer_has_space (b0, gtpu_hdr_len0);
- /* Validate GTPU tunnel SIP against packet DIP */
- if (PREDICT_TRUE (ip4_0->dst_address.as_u32 == t0->src.ip4.as_u32))
- goto next00; /* valid packet */
- if (PREDICT_FALSE (ip4_address_is_multicast (&ip4_0->dst_address)))
- {
- key4_0.src = ip4_0->dst_address.as_u32;
- key4_0.teid = gtpu0->teid;
- /* Make sure mcast GTPU tunnel exist by packet DIP and teid */
- p0 = hash_get (gtm->gtpu4_tunnel_by_key, key4_0.as_u64);
- if (PREDICT_TRUE (p0 != NULL))
- {
- mt0 = pool_elt_at_index (gtm->tunnels, p0[0]);
- goto next00; /* valid packet */
- }
- }
- error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace00;
+ if (PREDICT_FALSE ((!is_fast_track0) | (!has_space0)))
+ {
+ /* Not fast path. ext0 and gtpu_hdr_len0 might be wrong */
+
+ /* GCC will hopefully fix the duplicate compute */
+ if (PREDICT_FALSE (
+ !((ver0 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT)) |
+ (!has_space0)))
+ {
+ /* The header or size is wrong */
+ error0 =
+ has_space0 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+
+ /* This is an unsupported/bad packet.
+ * Check if it is to be forwarded.
+ */
+ if (is_ip4)
+ tunnel_index0 = gtm->bad_header_forward_tunnel_index_ipv4;
+ else
+ tunnel_index0 = gtm->bad_header_forward_tunnel_index_ipv6;
- } else /* !is_ip4 */ {
- key6_0.src.as_u64[0] = ip6_0->src_address.as_u64[0];
- key6_0.src.as_u64[1] = ip6_0->src_address.as_u64[1];
- key6_0.teid = gtpu0->teid;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward00;
- /* Make sure GTPU tunnel exist according to packet SIP and teid
- * SIP identify a GTPU path, and teid identify a tunnel in a given GTPU path */
- if (PREDICT_FALSE (memcmp(&key6_0, &last_key6, sizeof(last_key6)) != 0))
- {
- p0 = hash_get_mem (gtm->gtpu6_tunnel_by_key, &key6_0);
- if (PREDICT_FALSE (p0 == NULL))
- {
- error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace00;
- }
- clib_memcpy_fast (&last_key6, &key6_0, sizeof(key6_0));
- tunnel_index0 = last_tunnel_index = p0[0];
- }
- else
- tunnel_index0 = last_tunnel_index;
- t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+ goto trace00;
+ }
+ /* Correct version and has the space. It can only be unknown
+ * message type
+ */
+ error0 = GTPU_ERROR_UNSUPPORTED_TYPE;
+ next0 = GTPU_INPUT_NEXT_DROP;
- /* Validate GTPU tunnel encap-fib index against packet */
- if (PREDICT_FALSE (validate_gtpu_fib (b0, t0, is_ip4) == 0))
- {
- error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace00;
- }
+ /* This is an error/nonstandard packet
+ * Check if it is to be forwarded. */
+ if (is_ip4)
+ tunnel_index0 = gtm->unknown_type_forward_tunnel_index_ipv4;
+ else
+ tunnel_index0 = gtm->unknown_type_forward_tunnel_index_ipv6;
- /* Validate GTPU tunnel SIP against packet DIP */
- if (PREDICT_TRUE (ip6_address_is_equal (&ip6_0->dst_address,
- &t0->src.ip6)))
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward00;
+
+ /* The packet is ipv6/not forwarded */
+ goto trace00;
+ }
+
+ if (is_ip4)
+ {
+ key4_0.src = ip4_0->src_address.as_u32;
+ key4_0.teid = gtpu0->teid;
+
+ /* Make sure GTPU tunnel exist according to packet SIP and teid
+ * SIP identify a GTPU path, and teid identify a tunnel in a
+ * given GTPU path */
+ if (PREDICT_FALSE (key4_0.as_u64 != last_key4.as_u64))
+ {
+ // Cache miss, so try normal lookup now.
+ p0 = hash_get (gtm->gtpu4_tunnel_by_key, key4_0.as_u64);
+ if (PREDICT_FALSE (p0 == NULL))
+ {
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+
+ /* This is a standard packet, but no tunnel was found.
+ * Check if it is to be forwarded. */
+ tunnel_index0 =
+ gtm->unknown_teid_forward_tunnel_index_ipv4;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward00;
+ goto trace00;
+ }
+ // Update the key/tunnel cache for normal packets
+ last_key4.as_u64 = key4_0.as_u64;
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+
+ /* Validate GTPU tunnel encap-fib index against packet */
+ if (PREDICT_FALSE (validate_gtpu_fib (b0, t0, is_ip4) == 0))
+ {
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index0 = gtm->unknown_teid_forward_tunnel_index_ipv4;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward00;
+ goto trace00;
+ }
+
+ /* Validate GTPU tunnel SIP against packet DIP */
+ if (PREDICT_TRUE (ip4_0->dst_address.as_u32 ==
+ t0->src.ip4.as_u32))
+ goto next00; /* valid packet */
+ if (PREDICT_FALSE (
+ ip4_address_is_multicast (&ip4_0->dst_address)))
+ {
+ key4_0.src = ip4_0->dst_address.as_u32;
+ key4_0.teid = gtpu0->teid;
+ /* Make sure mcast GTPU tunnel exist by packet DIP and teid
+ */
+ p0 = hash_get (gtm->gtpu4_tunnel_by_key, key4_0.as_u64);
+ if (PREDICT_TRUE (p0 != NULL))
+ {
+ mt0 = pool_elt_at_index (gtm->tunnels, p0[0]);
+ goto next00; /* valid packet */
+ }
+ }
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index0 = gtm->unknown_teid_forward_tunnel_index_ipv4;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward00;
+ goto trace00;
+ }
+ else /* !is_ip4 */
+ {
+ key6_0.src.as_u64[0] = ip6_0->src_address.as_u64[0];
+ key6_0.src.as_u64[1] = ip6_0->src_address.as_u64[1];
+ key6_0.teid = gtpu0->teid;
+
+ /* Make sure GTPU tunnel exist according to packet SIP and teid
+ * SIP identify a GTPU path, and teid identify a tunnel in a
+ * given GTPU path */
+ if (PREDICT_FALSE (
+ memcmp (&key6_0, &last_key6, sizeof (last_key6)) != 0))
+ {
+ p0 = hash_get_mem (gtm->gtpu6_tunnel_by_key, &key6_0);
+ if (PREDICT_FALSE (p0 == NULL))
+ {
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index0 =
+ gtm->unknown_teid_forward_tunnel_index_ipv6;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward00;
+ goto trace00;
+ }
+ clib_memcpy_fast (&last_key6, &key6_0, sizeof (key6_0));
+ tunnel_index0 = last_tunnel_index = p0[0];
+ }
+ else
+ tunnel_index0 = last_tunnel_index;
+ t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+
+ /* Validate GTPU tunnel encap-fib index against packet */
+ if (PREDICT_FALSE (validate_gtpu_fib (b0, t0, is_ip4) == 0))
+ {
+ error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index0 = gtm->unknown_teid_forward_tunnel_index_ipv6;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward00;
+ goto trace00;
+ }
+
+ /* Validate GTPU tunnel SIP against packet DIP */
+ if (PREDICT_TRUE (
+ ip6_address_is_equal (&ip6_0->dst_address, &t0->src.ip6)))
goto next00; /* valid packet */
if (PREDICT_FALSE (ip6_address_is_multicast (&ip6_0->dst_address)))
{
@@ -647,11 +1100,63 @@ gtpu_input (vlib_main_t * vm,
}
error0 = GTPU_ERROR_NO_SUCH_TUNNEL;
next0 = GTPU_INPUT_NEXT_DROP;
+ tunnel_index0 = gtm->unknown_teid_forward_tunnel_index_ipv6;
+ if (PREDICT_FALSE (tunnel_index0 != ~0))
+ goto forward00;
goto trace00;
- }
+ }
+
+ /* This can only be reached via goto */
+ forward00:
+ // Get the tunnel
+ t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+
+ /* Validate GTPU tunnel encap-fib index against packet */
+ if (PREDICT_FALSE (validate_gtpu_fib (b0, t0, is_ip4) == 0))
+ {
+ error0 = GTPU_ERROR_NO_ERROR_TUNNEL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+
+ /* Clear the error, next0 will be overwritten by the tunnel */
+ error0 = 0;
+
+ if (is_ip4)
+ {
+ /* Forward packet instead. Push the IP+UDP header */
+ gtpu_hdr_len0 =
+ -(i32) (sizeof (udp_header_t) + sizeof (ip4_header_t));
+ /* Backup the IP4 checksum and address */
+ sum0 = ip4_0->checksum;
+ old0 = ip4_0->dst_address.as_u32;
+
+ /* Update IP address of the packet using the src from the tunnel
+ */
+ ip4_0->dst_address.as_u32 = t0->src.ip4.as_u32;
+
+ /* Fix the IP4 checksum */
+ sum0 = ip_csum_update (sum0, old0, ip4_0->dst_address.as_u32,
+ ip4_header_t,
+ dst_address /* changed member */);
+ ip4_0->checksum = ip_csum_fold (sum0);
+ }
+ else
+ {
+ /* Forward packet instead. Push the IP+UDP header */
+ gtpu_hdr_len0 =
+ -(i32) (sizeof (udp_header_t) + sizeof (ip6_header_t));
+
+ /* IPv6 UDP checksum is mandatory */
+ int bogus = 0;
+ udp0->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip6_0, &bogus);
+ if (udp0->checksum == 0)
+ udp0->checksum = 0xffff;
+ }
next00:
- /* Pop gtpu header */
+ /* Pop gtpu header / push IP+UDP header */
vlib_buffer_advance (b0, gtpu_hdr_len0);
next0 = t0->decap_next_index;
@@ -697,7 +1202,13 @@ gtpu_input (vlib_main_t * vm,
tr->error = error0;
tr->tunnel_index = tunnel_index0;
tr->teid = has_space0 ? clib_net_to_host_u32(gtpu0->teid) : ~0;
- }
+ if (vlib_buffer_has_space (b0, 4))
+ {
+ tr->header.ver_flags = gtpu0->ver_flags;
+ tr->header.type = gtpu0->type;
+ tr->header.length = clib_net_to_host_u16 (gtpu0->length);
+ }
+ }
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
to_next, n_left_to_next,
bi0, next0);
@@ -790,6 +1301,8 @@ typedef enum {
IP_GTPU_BYPASS_N_NEXT,
} ip_vxan_bypass_next_t;
+/* this function determines if a udp packet is actually gtpu and needs
+ forwarding to gtpu_input */
always_inline uword
ip_gtpu_bypass_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1356,128 +1869,183 @@ gtpu_flow_input (vlib_main_t * vm,
u32 sw_if_index0, sw_if_index1, len0, len1;
u8 has_space0 = 0, has_space1 = 0;
u8 ver0, ver1;
+ gtpu_ext_header_t ext = { .type = 0, .len = 0, .pad = 0 };
+ gtpu_ext_header_t *ext0, *ext1;
+ bool is_fast_track0, is_fast_track1;
+ ext0 = ext1 = &ext;
- /* Prefetch next iteration. */
- {
- vlib_buffer_t * p2, * p3;
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
- CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
- }
+ CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ }
- bi0 = from[0];
- bi1 = from[1];
- to_next[0] = bi0;
- to_next[1] = bi1;
- from += 2;
- to_next += 2;
- n_left_to_next -= 2;
- n_left_from -= 2;
+ bi0 = from[0];
+ bi1 = from[1];
+ to_next[0] = bi0;
+ to_next[1] = bi1;
+ from += 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+ n_left_from -= 2;
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
- /* udp leaves current_data pointing at the gtpu header */
- gtpu0 = vlib_buffer_get_current (b0);
- gtpu1 = vlib_buffer_get_current (b1);
+ /* udp leaves current_data pointing at the gtpu header */
+ gtpu0 = vlib_buffer_get_current (b0);
+ gtpu1 = vlib_buffer_get_current (b1);
- len0 = vlib_buffer_length_in_chain (vm, b0);
- len1 = vlib_buffer_length_in_chain (vm, b1);
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+ len1 = vlib_buffer_length_in_chain (vm, b1);
- tunnel_index0 = ~0;
- error0 = 0;
-
- tunnel_index1 = ~0;
- error1 = 0;
-
- ip_err0 = gtpu_check_ip (b0, len0);
- udp_err0 = gtpu_check_ip_udp_len (b0);
- ip_err1 = gtpu_check_ip (b1, len1);
- udp_err1 = gtpu_check_ip_udp_len (b1);
-
- if (PREDICT_FALSE (gtpu_local_need_csum_check (b0)))
- csum_err0 = !gtpu_validate_udp_csum (vm, b0);
- else
- csum_err0 = !gtpu_local_csum_is_valid (b0);
- if (PREDICT_FALSE (gtpu_local_need_csum_check (b1)))
- csum_err1 = !gtpu_validate_udp_csum (vm, b1);
- else
- csum_err1 = !gtpu_local_csum_is_valid (b1);
-
- if (ip_err0 || udp_err0 || csum_err0)
- {
- next0 = GTPU_INPUT_NEXT_DROP;
- error0 = gtpu_err_code (ip_err0, udp_err0, csum_err0);
- goto trace0;
- }
-
- /* speculatively load gtp header version field */
- ver0 = gtpu0->ver_flags;
-
- /*
- * Manipulate gtpu header
- * TBD: Manipulate Sequence Number and N-PDU Number
- * TBD: Manipulate Next Extension Header
- */
- gtpu_hdr_len0 = sizeof(gtpu_header_t) - (((ver0 & GTPU_E_S_PN_BIT) == 0) * 4);
-
- has_space0 = vlib_buffer_has_space (b0, gtpu_hdr_len0);
- if (PREDICT_FALSE (((ver0 & GTPU_VER_MASK) != GTPU_V1_VER) | (!has_space0)))
- {
- error0 = has_space0 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace0;
- }
-
- /* Manipulate packet 0 */
- ASSERT (b0->flow_id != 0);
- tunnel_index0 = b0->flow_id - gtm->flow_id_start;
- t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
- b0->flow_id = 0;
-
- /* Pop gtpu header */
- vlib_buffer_advance (b0, gtpu_hdr_len0);
-
- /* assign the next node */
- if (PREDICT_FALSE (t0->decap_next_index != GTPU_INPUT_NEXT_IP4_INPUT) &&
- (t0->decap_next_index != GTPU_INPUT_NEXT_IP6_INPUT))
- {
- error0 = GTPU_FLOW_ERROR_PAYLOAD_ERROR;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace0;
- }
- next0 = t0->decap_next_index;
+ tunnel_index0 = ~0;
+ error0 = 0;
- sw_if_index0 = t0->sw_if_index;
+ tunnel_index1 = ~0;
+ error1 = 0;
- /* Set packet input sw_if_index to unicast GTPU tunnel for learning */
- vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ ip_err0 = gtpu_check_ip (b0, len0);
+ udp_err0 = gtpu_check_ip_udp_len (b0);
+ ip_err1 = gtpu_check_ip (b1, len1);
+ udp_err1 = gtpu_check_ip_udp_len (b1);
- pkts_decapsulated ++;
- stats_n_packets += 1;
- stats_n_bytes += len0;
+ if (PREDICT_FALSE (gtpu_local_need_csum_check (b0)))
+ csum_err0 = !gtpu_validate_udp_csum (vm, b0);
+ else
+ csum_err0 = !gtpu_local_csum_is_valid (b0);
+ if (PREDICT_FALSE (gtpu_local_need_csum_check (b1)))
+ csum_err1 = !gtpu_validate_udp_csum (vm, b1);
+ else
+ csum_err1 = !gtpu_local_csum_is_valid (b1);
- /* Batch stats increment on the same gtpu tunnel so counter
- is not incremented per packet */
- if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
- {
- stats_n_packets -= 1;
- stats_n_bytes -= len0;
- if (stats_n_packets)
- vlib_increment_combined_counter
- (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
- thread_index, stats_sw_if_index,
- stats_n_packets, stats_n_bytes);
- stats_n_packets = 1;
- stats_n_bytes = len0;
- stats_sw_if_index = sw_if_index0;
- }
+ /* speculatively load gtp header version field */
+ ver0 = gtpu0->ver_flags;
+ ver1 = gtpu1->ver_flags;
+
+ /*
+ * Manipulate gtpu header
+ * TBD: Manipulate Sequence Number and N-PDU Number
+ * TBD: Manipulate Next Extension Header
+ */
+ is_fast_track0 =
+ ((ver0 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT));
+ is_fast_track0 = is_fast_track0 & (gtpu0->type == 255);
+
+ is_fast_track1 =
+ ((ver1 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT));
+ is_fast_track1 = is_fast_track1 & (gtpu1->type == 255);
+
+ ext0 = (ver0 & GTPU_E_BIT) ?
+ (gtpu_ext_header_t *) &gtpu0->next_ext_type :
+ &ext;
+ ext1 = (ver1 & GTPU_E_BIT) ?
+ (gtpu_ext_header_t *) &gtpu1->next_ext_type :
+ &ext;
+
+ gtpu_hdr_len0 = sizeof (gtpu_header_t) -
+ (((ver0 & GTPU_E_S_PN_BIT) == 0) * 4) +
+ ext0->len * 4;
+ gtpu_hdr_len1 = sizeof (gtpu_header_t) -
+ (((ver1 & GTPU_E_S_PN_BIT) == 0) * 4) +
+ ext1->len * 4;
+
+ /* Only for clarity, will be optimized away */
+ ext0 += ext0->len * 4 / sizeof (*ext0);
+ ext1 += ext1->len * 4 / sizeof (*ext1);
+
+ has_space0 = vlib_buffer_has_space (b0, gtpu_hdr_len0);
+ has_space1 = vlib_buffer_has_space (b1, gtpu_hdr_len1);
+
+ if (ip_err0 || udp_err0 || csum_err0)
+ {
+ next0 = GTPU_INPUT_NEXT_DROP;
+ error0 = gtpu_err_code (ip_err0, udp_err0, csum_err0);
+ goto trace0;
+ }
+
+ /* Diverge the packet paths for 0 and 1 */
+ if (PREDICT_FALSE ((!is_fast_track0) | (!has_space0)))
+ {
+ /* Not fast path. ext0 and gtpu_hdr_len0 might be wrong */
+
+ /* GCC will hopefully fix the duplicate compute */
+ if (PREDICT_FALSE (
+ !((ver0 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT)) |
+ (!has_space0)))
+ {
+ /* The header or size is wrong */
+ error0 =
+ has_space0 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+ /* Correct version and has the space. It can only be unknown
+ * message type.
+ */
+ error0 = GTPU_ERROR_UNSUPPORTED_TYPE;
+ next0 = GTPU_INPUT_NEXT_DROP;
+
+ /* The packet is not forwarded */
+ goto trace0;
+ }
+
+ /* Manipulate packet 0 */
+ ASSERT (b0->flow_id != 0);
+ tunnel_index0 = b0->flow_id - gtm->flow_id_start;
+ t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+ b0->flow_id = 0;
+
+ /* Pop gtpu header */
+ vlib_buffer_advance (b0, gtpu_hdr_len0);
+
+ /* assign the next node */
+ if (PREDICT_FALSE (t0->decap_next_index !=
+ GTPU_INPUT_NEXT_IP4_INPUT) &&
+ (t0->decap_next_index != GTPU_INPUT_NEXT_IP6_INPUT))
+ {
+ error0 = GTPU_FLOW_ERROR_PAYLOAD_ERROR;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+ next0 = t0->decap_next_index;
+
+ sw_if_index0 = t0->sw_if_index;
+
+ /* Set packet input sw_if_index to unicast GTPU tunnel for learning
+ */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+
+ pkts_decapsulated++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same gtpu tunnel so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index, stats_n_packets,
+ stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
trace0:
b0->error = error0 ? node->errors[error0] : 0;
@@ -1490,81 +2058,103 @@ trace0:
tr->error = error0;
tr->tunnel_index = tunnel_index0;
tr->teid = has_space0 ? clib_net_to_host_u32(gtpu0->teid) : ~0;
- }
+ if (vlib_buffer_has_space (b0, 4))
+ {
+ tr->header.ver_flags = gtpu0->ver_flags;
+ tr->header.type = gtpu0->type;
+ tr->header.length = clib_net_to_host_u16 (gtpu0->length);
+ }
+ }
- if (ip_err1 || udp_err1 || csum_err1)
- {
- next1 = GTPU_INPUT_NEXT_DROP;
- error1 = gtpu_err_code (ip_err1, udp_err1, csum_err1);
- goto trace1;
- }
+ if (ip_err1 || udp_err1 || csum_err1)
+ {
+ next1 = GTPU_INPUT_NEXT_DROP;
+ error1 = gtpu_err_code (ip_err1, udp_err1, csum_err1);
+ goto trace1;
+ }
- /* speculatively load gtp header version field */
- ver1 = gtpu1->ver_flags;
+ /*
+ * Manipulate gtpu header
+ * TBD: Manipulate Sequence Number and N-PDU Number
+ * TBD: Manipulate Next Extension Header
+ */
+ if (PREDICT_FALSE ((!is_fast_track1) | (!has_space1)))
+ {
+ /* Not fast path. ext1 and gtpu_hdr_len1 might be wrong */
+
+ /* GCC will hopefully fix the duplicate compute */
+ if (PREDICT_FALSE (
+ !((ver1 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT)) |
+ (!has_space1)))
+ {
+ /* The header or size is wrong */
+ error1 =
+ has_space1 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
+ next1 = GTPU_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+ /* Correct version and has the space. It can only be unknown
+ * message type.
+ */
+ error1 = GTPU_ERROR_UNSUPPORTED_TYPE;
+ next1 = GTPU_INPUT_NEXT_DROP;
- /*
- * Manipulate gtpu header
- * TBD: Manipulate Sequence Number and N-PDU Number
- * TBD: Manipulate Next Extension Header
- */
- gtpu_hdr_len1 = sizeof(gtpu_header_t) - (((ver1 & GTPU_E_S_PN_BIT) == 0) * 4);
- has_space1 = vlib_buffer_has_space (b1, gtpu_hdr_len1);
- if (PREDICT_FALSE (((ver1 & GTPU_VER_MASK) != GTPU_V1_VER) | (!has_space1)))
- {
- error1 = has_space1 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
- next1 = GTPU_INPUT_NEXT_DROP;
- goto trace1;
- }
+ /* The packet is not forwarded */
+ goto trace1;
+ }
- /* Manipulate packet 1 */
- ASSERT (b1->flow_id != 0);
- tunnel_index1 = b1->flow_id - gtm->flow_id_start;
- t1 = pool_elt_at_index (gtm->tunnels, tunnel_index1);
- b1->flow_id = 0;
-
- /* Pop gtpu header */
- vlib_buffer_advance (b1, gtpu_hdr_len1);
-
- /* assign the next node */
- if (PREDICT_FALSE (t1->decap_next_index != GTPU_INPUT_NEXT_IP4_INPUT) &&
- (t1->decap_next_index != GTPU_INPUT_NEXT_IP6_INPUT))
- {
- error1 = GTPU_FLOW_ERROR_PAYLOAD_ERROR;
- next1 = GTPU_INPUT_NEXT_DROP;
- goto trace1;
- }
- next1 = t1->decap_next_index;
+ /* Manipulate packet 1 */
+ ASSERT (b1->flow_id != 0);
+ tunnel_index1 = b1->flow_id - gtm->flow_id_start;
+ t1 = pool_elt_at_index (gtm->tunnels, tunnel_index1);
+ b1->flow_id = 0;
- sw_if_index1 = t1->sw_if_index;
+ /* Pop gtpu header */
+ vlib_buffer_advance (b1, gtpu_hdr_len1);
- /* Required to make the l2 tag push / pop code work on l2 subifs */
- /* This won't happen in current implementation as only
- ipv4/udp/gtpu/IPV4 type packets can be matched */
- if (PREDICT_FALSE(next1 == GTPU_INPUT_NEXT_L2_INPUT))
- vnet_update_l2_len (b1);
+ /* assign the next node */
+ if (PREDICT_FALSE (t1->decap_next_index !=
+ GTPU_INPUT_NEXT_IP4_INPUT) &&
+ (t1->decap_next_index != GTPU_INPUT_NEXT_IP6_INPUT))
+ {
+ error1 = GTPU_FLOW_ERROR_PAYLOAD_ERROR;
+ next1 = GTPU_INPUT_NEXT_DROP;
+ goto trace1;
+ }
+ next1 = t1->decap_next_index;
- /* Set packet input sw_if_index to unicast GTPU tunnel for learning */
- vnet_buffer(b1)->sw_if_index[VLIB_RX] = sw_if_index1;
+ sw_if_index1 = t1->sw_if_index;
- pkts_decapsulated ++;
- stats_n_packets += 1;
- stats_n_bytes += len1;
+ /* Required to make the l2 tag push / pop code work on l2 subifs */
+ /* This won't happen in current implementation as only
+ ipv4/udp/gtpu/IPV4 type packets can be matched */
+ if (PREDICT_FALSE (next1 == GTPU_INPUT_NEXT_L2_INPUT))
+ vnet_update_l2_len (b1);
+
+ /* Set packet input sw_if_index to unicast GTPU tunnel for learning
+ */
+ vnet_buffer (b1)->sw_if_index[VLIB_RX] = sw_if_index1;
- /* Batch stats increment on the same gtpu tunnel so counter
- is not incremented per packet */
- if (PREDICT_FALSE (sw_if_index1 != stats_sw_if_index))
- {
- stats_n_packets -= 1;
- stats_n_bytes -= len1;
- if (stats_n_packets)
- vlib_increment_combined_counter
- (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
- thread_index, stats_sw_if_index,
- stats_n_packets, stats_n_bytes);
- stats_n_packets = 1;
- stats_n_bytes = len1;
- stats_sw_if_index = sw_if_index1;
- }
+ pkts_decapsulated++;
+ stats_n_packets += 1;
+ stats_n_bytes += len1;
+
+ /* Batch stats increment on the same gtpu tunnel so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE (sw_if_index1 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len1;
+ if (stats_n_packets)
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index, stats_n_packets,
+ stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len1;
+ stats_sw_if_index = sw_if_index1;
+ }
trace1:
b1->error = error1 ? node->errors[error1] : 0;
@@ -1577,12 +2167,18 @@ trace1:
tr->error = error1;
tr->tunnel_index = tunnel_index1;
tr->teid = has_space1 ? clib_net_to_host_u32(gtpu1->teid) : ~0;
- }
+ if (vlib_buffer_has_space (b1, 4))
+ {
+ tr->header.ver_flags = gtpu1->ver_flags;
+ tr->header.type = gtpu1->type;
+ tr->header.length = clib_net_to_host_u16 (gtpu1->length);
+ }
+ }
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0,
+ next1);
+}
while (n_left_from > 0 && n_left_to_next > 0)
{
@@ -1597,97 +2193,135 @@ trace1:
u32 sw_if_index0, len0;
u8 has_space0 = 0;
u8 ver0;
+ gtpu_ext_header_t ext = { .type = 0, .len = 0, .pad = 0 };
+ gtpu_ext_header_t *ext0;
+ bool is_fast_track0;
+ ext0 = &ext;
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- len0 = vlib_buffer_length_in_chain (vm, b0);
-
- tunnel_index0 = ~0;
- error0 = 0;
-
- ip_err0 = gtpu_check_ip (b0, len0);
- udp_err0 = gtpu_check_ip_udp_len (b0);
- if (PREDICT_FALSE (gtpu_local_need_csum_check (b0)))
- csum_err0 = !gtpu_validate_udp_csum (vm, b0);
- else
- csum_err0 = !gtpu_local_csum_is_valid (b0);
-
- if (ip_err0 || udp_err0 || csum_err0)
- {
- next0 = GTPU_INPUT_NEXT_DROP;
- error0 = gtpu_err_code (ip_err0, udp_err0, csum_err0);
- goto trace00;
- }
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
- /* udp leaves current_data pointing at the gtpu header */
- gtpu0 = vlib_buffer_get_current (b0);
+ b0 = vlib_get_buffer (vm, bi0);
+ len0 = vlib_buffer_length_in_chain (vm, b0);
- /* speculatively load gtp header version field */
- ver0 = gtpu0->ver_flags;
+ tunnel_index0 = ~0;
+ error0 = 0;
- /*
- * Manipulate gtpu header
- * TBD: Manipulate Sequence Number and N-PDU Number
- * TBD: Manipulate Next Extension Header
- */
- gtpu_hdr_len0 = sizeof(gtpu_header_t) - (((ver0 & GTPU_E_S_PN_BIT) == 0) * 4);
+ ip_err0 = gtpu_check_ip (b0, len0);
+ udp_err0 = gtpu_check_ip_udp_len (b0);
+ if (PREDICT_FALSE (gtpu_local_need_csum_check (b0)))
+ csum_err0 = !gtpu_validate_udp_csum (vm, b0);
+ else
+ csum_err0 = !gtpu_local_csum_is_valid (b0);
- has_space0 = vlib_buffer_has_space (b0, gtpu_hdr_len0);
- if (PREDICT_FALSE (((ver0 & GTPU_VER_MASK) != GTPU_V1_VER) | (!has_space0)))
- {
- error0 = has_space0 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace00;
- }
-
- ASSERT (b0->flow_id != 0);
- tunnel_index0 = b0->flow_id - gtm->flow_id_start;
- t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
- b0->flow_id = 0;
-
- /* Pop gtpu header */
- vlib_buffer_advance (b0, gtpu_hdr_len0);
-
- /* assign the next node */
- if (PREDICT_FALSE (t0->decap_next_index != GTPU_INPUT_NEXT_IP4_INPUT) &&
- (t0->decap_next_index != GTPU_INPUT_NEXT_IP6_INPUT))
- {
- error0 = GTPU_FLOW_ERROR_PAYLOAD_ERROR;
- next0 = GTPU_INPUT_NEXT_DROP;
- goto trace00;
- }
- next0 = t0->decap_next_index;
+ /* udp leaves current_data pointing at the gtpu header */
+ gtpu0 = vlib_buffer_get_current (b0);
- sw_if_index0 = t0->sw_if_index;
+ /* speculatively load gtp header version field */
+ ver0 = gtpu0->ver_flags;
- /* Set packet input sw_if_index to unicast GTPU tunnel for learning */
- vnet_buffer(b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+ /*
+ * Manipulate gtpu header
+ * TBD: Manipulate Sequence Number and N-PDU Number
+ * TBD: Manipulate Next Extension Header
+ */
+ is_fast_track0 =
+ ((ver0 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT));
+ is_fast_track0 = is_fast_track0 & (gtpu0->type == 255);
+
+ ext0 = (ver0 & GTPU_E_BIT) ?
+ (gtpu_ext_header_t *) &gtpu0->next_ext_type :
+ &ext;
+
+ gtpu_hdr_len0 = sizeof (gtpu_header_t) -
+ (((ver0 & GTPU_E_S_PN_BIT) == 0) * 4) +
+ ext0->len * 4;
+ ext0 += ext0->len * 4 / sizeof (*ext0);
+
+ has_space0 = vlib_buffer_has_space (b0, gtpu_hdr_len0);
+
+ if (ip_err0 || udp_err0 || csum_err0)
+ {
+ next0 = GTPU_INPUT_NEXT_DROP;
+ error0 = gtpu_err_code (ip_err0, udp_err0, csum_err0);
+ goto trace00;
+ }
- pkts_decapsulated ++;
- stats_n_packets += 1;
- stats_n_bytes += len0;
+ if (PREDICT_FALSE ((!is_fast_track0) | (!has_space0)))
+ {
+ /* Not fast path. ext0 and gtpu_hdr_len0 might be wrong */
+
+ /* GCC will hopefully fix the duplicate compute */
+ if (PREDICT_FALSE (
+ !((ver0 & (GTPU_VER_MASK | GTPU_PT_BIT | GTPU_RES_BIT)) ==
+ (GTPU_V1_VER | GTPU_PT_BIT)) |
+ (!has_space0)))
+ {
+ /* The header or size is wrong */
+ error0 =
+ has_space0 ? GTPU_ERROR_BAD_VER : GTPU_ERROR_TOO_SMALL;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+ /* Correct version and has the space. It can only be unknown
+ * message type.
+ */
+ error0 = GTPU_ERROR_UNSUPPORTED_TYPE;
+ next0 = GTPU_INPUT_NEXT_DROP;
- /* Batch stats increment on the same gtpu tunnel so counter
- is not incremented per packet */
- if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
- {
- stats_n_packets -= 1;
- stats_n_bytes -= len0;
- if (stats_n_packets)
- vlib_increment_combined_counter
- (im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
- thread_index, stats_sw_if_index,
- stats_n_packets, stats_n_bytes);
- stats_n_packets = 1;
- stats_n_bytes = len0;
- stats_sw_if_index = sw_if_index0;
- }
+ /* The packet is not forwarded */
+ goto trace00;
+ }
+
+ ASSERT (b0->flow_id != 0);
+ tunnel_index0 = b0->flow_id - gtm->flow_id_start;
+ t0 = pool_elt_at_index (gtm->tunnels, tunnel_index0);
+ b0->flow_id = 0;
+
+ /* Pop gtpu header */
+ vlib_buffer_advance (b0, gtpu_hdr_len0);
+
+ /* assign the next node */
+ if (PREDICT_FALSE (t0->decap_next_index !=
+ GTPU_INPUT_NEXT_IP4_INPUT) &&
+ (t0->decap_next_index != GTPU_INPUT_NEXT_IP6_INPUT))
+ {
+ error0 = GTPU_FLOW_ERROR_PAYLOAD_ERROR;
+ next0 = GTPU_INPUT_NEXT_DROP;
+ goto trace00;
+ }
+ next0 = t0->decap_next_index;
+
+ sw_if_index0 = t0->sw_if_index;
+
+ /* Set packet input sw_if_index to unicast GTPU tunnel for learning
+ */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index0;
+
+ pkts_decapsulated++;
+ stats_n_packets += 1;
+ stats_n_bytes += len0;
+
+ /* Batch stats increment on the same gtpu tunnel so counter
+ is not incremented per packet */
+ if (PREDICT_FALSE (sw_if_index0 != stats_sw_if_index))
+ {
+ stats_n_packets -= 1;
+ stats_n_bytes -= len0;
+ if (stats_n_packets)
+ vlib_increment_combined_counter (
+ im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, stats_sw_if_index, stats_n_packets,
+ stats_n_bytes);
+ stats_n_packets = 1;
+ stats_n_bytes = len0;
+ stats_sw_if_index = sw_if_index0;
+ }
trace00:
b0->error = error0 ? node->errors[error0] : 0;
@@ -1699,11 +2333,16 @@ trace1:
tr->error = error0;
tr->tunnel_index = tunnel_index0;
tr->teid = has_space0 ? clib_net_to_host_u32(gtpu0->teid) : ~0;
- }
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
+ if (vlib_buffer_has_space (b0, 4))
+ {
+ tr->header.ver_flags = gtpu0->ver_flags;
+ tr->header.type = gtpu0->type;
+ tr->header.length = clib_net_to_host_u16 (gtpu0->length);
+ }
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
@@ -1733,7 +2372,6 @@ VLIB_NODE_FN (gtpu4_flow_input_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
#ifndef CLIB_MULTIARCH_VARIANT
VLIB_REGISTER_NODE (gtpu4_flow_input_node) = {
.name = "gtpu4-flow-input",
@@ -1754,6 +2392,5 @@ VLIB_REGISTER_NODE (gtpu4_flow_input_node) = {
},
};
#endif
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
diff --git a/src/plugins/gtpu/gtpu_encap.c b/src/plugins/gtpu/gtpu_encap.c
index 4b7d98786f4..2c3c46a4be2 100644
--- a/src/plugins/gtpu/gtpu_encap.c
+++ b/src/plugins/gtpu/gtpu_encap.c
@@ -199,7 +199,8 @@ gtpu_encap_inline (vlib_main_t * vm,
copy_dst3 = (u64 *) ip4_3;
copy_src3 = (u64 *) t3->rewrite;
- /* Copy first 32 octets 8-bytes at a time */
+ /* Copy first 32 octets 8-bytes at a time (minimum size)
+ * TODO: check if clib_memcpy_fast is better */
#define _(offs) copy_dst0[offs] = copy_src0[offs];
foreach_fixed_header4_offset;
#undef _
@@ -212,19 +213,83 @@ gtpu_encap_inline (vlib_main_t * vm,
#define _(offs) copy_dst3[offs] = copy_src3[offs];
foreach_fixed_header4_offset;
#undef _
- /* Last 4 octets. Hopefully gcc will be our friend */
- copy_dst_last0 = (u32 *)(&copy_dst0[4]);
- copy_src_last0 = (u32 *)(&copy_src0[4]);
- copy_dst_last0[0] = copy_src_last0[0];
- copy_dst_last1 = (u32 *)(&copy_dst1[4]);
- copy_src_last1 = (u32 *)(&copy_src1[4]);
- copy_dst_last1[0] = copy_src_last1[0];
- copy_dst_last2 = (u32 *)(&copy_dst2[4]);
- copy_src_last2 = (u32 *)(&copy_src2[4]);
- copy_dst_last2[0] = copy_src_last2[0];
- copy_dst_last3 = (u32 *)(&copy_dst3[4]);
- copy_src_last3 = (u32 *)(&copy_src3[4]);
- copy_dst_last3[0] = copy_src_last3[0];
+
+ /* Copy last octets */
+ if (_vec_len (t0->rewrite) == 36)
+ {
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last0 = (u32 *) (&copy_dst0[4]);
+ copy_src_last0 = (u32 *) (&copy_src0[4]);
+ copy_dst_last0[0] = copy_src_last0[0];
+ }
+ else
+ {
+ /* Near last 8 octets. */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ _ (4);
+#undef _
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last0 = (u32 *) (&copy_dst0[5]);
+ copy_src_last0 = (u32 *) (&copy_src0[5]);
+ copy_dst_last0[0] = copy_src_last0[0];
+ }
+
+ if (_vec_len (t1->rewrite) == 36)
+ {
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last1 = (u32 *) (&copy_dst1[4]);
+ copy_src_last1 = (u32 *) (&copy_src1[4]);
+ copy_dst_last1[0] = copy_src_last1[0];
+ }
+ else
+ {
+ /* Near last 8 octets. */
+#define _(offs) copy_dst1[offs] = copy_src1[offs];
+ _ (4);
+#undef _
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last1 = (u32 *) (&copy_dst1[5]);
+ copy_src_last1 = (u32 *) (&copy_src1[5]);
+ copy_dst_last1[0] = copy_src_last1[0];
+ }
+
+ if (_vec_len (t2->rewrite) == 36)
+ {
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last2 = (u32 *) (&copy_dst2[4]);
+ copy_src_last2 = (u32 *) (&copy_src2[4]);
+ copy_dst_last2[0] = copy_src_last2[0];
+ }
+ else
+ {
+ /* Near last 8 octets. */
+#define _(offs) copy_dst2[offs] = copy_src2[offs];
+ _ (4);
+#undef _
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last2 = (u32 *) (&copy_dst2[5]);
+ copy_src_last2 = (u32 *) (&copy_src2[5]);
+ copy_dst_last2[0] = copy_src_last2[0];
+ }
+
+ if (_vec_len (t3->rewrite) == 36)
+ {
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last3 = (u32 *) (&copy_dst3[4]);
+ copy_src_last3 = (u32 *) (&copy_src3[4]);
+ copy_dst_last3[0] = copy_src_last3[0];
+ }
+ else
+ {
+ /* Near last 8 octets. */
+#define _(offs) copy_dst3[offs] = copy_src3[offs];
+ _ (4);
+#undef _
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last3 = (u32 *) (&copy_dst3[5]);
+ copy_src_last3 = (u32 *) (&copy_src3[5]);
+ copy_dst_last3[0] = copy_src_last3[0];
+ }
/* Fix the IP4 checksum and length */
sum0 = ip4_0->checksum;
@@ -318,7 +383,7 @@ gtpu_encap_inline (vlib_main_t * vm,
copy_src2 = (u64 *) t2->rewrite;
copy_dst3 = (u64 *) ip6_3;
copy_src3 = (u64 *) t3->rewrite;
- /* Copy first 56 (ip6) octets 8-bytes at a time */
+ /* Copy first 56 (ip6) octets 8-bytes at a time (minimum size) */
#define _(offs) copy_dst0[offs] = copy_src0[offs];
foreach_fixed_header6_offset;
#undef _
@@ -331,6 +396,40 @@ gtpu_encap_inline (vlib_main_t * vm,
#define _(offs) copy_dst3[offs] = copy_src3[offs];
foreach_fixed_header6_offset;
#undef _
+
+ /* Copy last octets */
+ if (_vec_len (t0->rewrite) == 64)
+ {
+ /* Last 8 octets. */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ _ (7);
+#undef _
+ }
+
+ if (_vec_len (t1->rewrite) == 64)
+ {
+ /* Last 8 octets. */
+#define _(offs) copy_dst1[offs] = copy_src1[offs];
+ _ (7);
+#undef _
+ }
+
+ if (_vec_len (t2->rewrite) == 64)
+ {
+ /* Last 8 octets. */
+#define _(offs) copy_dst2[offs] = copy_src2[offs];
+ _ (7);
+#undef _
+ }
+
+ if (_vec_len (t3->rewrite) == 64)
+ {
+ /* Last 8 octets. */
+#define _(offs) copy_dst3[offs] = copy_src3[offs];
+ _ (7);
+#undef _
+ }
+
/* Fix IP6 payload length */
new_l0 =
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
@@ -466,15 +565,19 @@ gtpu_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->tunnel_index = t0 - gtm->tunnels;
tr->tteid = t0->tteid;
- }
+ tr->pdu_extension = t0->pdu_extension;
+ tr->qfi = t0->qfi;
+ }
- if (PREDICT_FALSE(b1->flags & VLIB_BUFFER_IS_TRACED))
- {
- gtpu_encap_trace_t *tr =
- vlib_add_trace (vm, node, b1, sizeof (*tr));
- tr->tunnel_index = t1 - gtm->tunnels;
- tr->tteid = t1->tteid;
- }
+ if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ gtpu_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b1, sizeof (*tr));
+ tr->tunnel_index = t1 - gtm->tunnels;
+ tr->tteid = t1->tteid;
+ tr->pdu_extension = t1->pdu_extension;
+ tr->qfi = t1->qfi;
+ }
if (PREDICT_FALSE(b2->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -482,15 +585,19 @@ gtpu_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b2, sizeof (*tr));
tr->tunnel_index = t2 - gtm->tunnels;
tr->tteid = t2->tteid;
- }
+ tr->pdu_extension = t2->pdu_extension;
+ tr->qfi = t2->qfi;
+ }
- if (PREDICT_FALSE(b3->flags & VLIB_BUFFER_IS_TRACED))
- {
- gtpu_encap_trace_t *tr =
- vlib_add_trace (vm, node, b3, sizeof (*tr));
- tr->tunnel_index = t3 - gtm->tunnels;
- tr->tteid = t3->tteid;
- }
+ if (PREDICT_FALSE (b3->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ gtpu_encap_trace_t *tr =
+ vlib_add_trace (vm, node, b3, sizeof (*tr));
+ tr->tunnel_index = t3 - gtm->tunnels;
+ tr->tteid = t3->tteid;
+ tr->pdu_extension = t3->pdu_extension;
+ tr->qfi = t3->qfi;
+ }
vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
to_next, n_left_to_next,
@@ -532,8 +639,9 @@ gtpu_encap_inline (vlib_main_t * vm,
next0 = t0->next_dpo.dpoi_next_node;
vnet_buffer(b0)->ip.adj_index[VLIB_TX] = t0->next_dpo.dpoi_index;
- /* Apply the rewrite string. $$$$ vnet_rewrite? */
- vlib_buffer_advance (b0, -(word)_vec_len(t0->rewrite));
+ /* Apply the rewrite string. $$$$ vnet_rewrite.
+ * The correct total size is set in ip_udp_gtpu_rewrite() */
+ vlib_buffer_advance (b0, -(word) _vec_len (t0->rewrite));
if (is_ip4)
{
@@ -546,10 +654,26 @@ gtpu_encap_inline (vlib_main_t * vm,
#define _(offs) copy_dst0[offs] = copy_src0[offs];
foreach_fixed_header4_offset;
#undef _
- /* Last 4 octets. Hopefully gcc will be our friend */
- copy_dst_last0 = (u32 *)(&copy_dst0[4]);
- copy_src_last0 = (u32 *)(&copy_src0[4]);
- copy_dst_last0[0] = copy_src_last0[0];
+
+ /* Copy last octets */
+ if (_vec_len (t0->rewrite) == 36)
+ {
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last0 = (u32 *) (&copy_dst0[4]);
+ copy_src_last0 = (u32 *) (&copy_src0[4]);
+ copy_dst_last0[0] = copy_src_last0[0];
+ }
+ else
+ {
+ /* Near last 8 octets. */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ _ (4);
+#undef _
+ /* Last 4 octets. Hopefully gcc will be our friend */
+ copy_dst_last0 = (u32 *) (&copy_dst0[5]);
+ copy_src_last0 = (u32 *) (&copy_src0[5]);
+ copy_dst_last0[0] = copy_src_last0[0];
+ }
/* Fix the IP4 checksum and length */
sum0 = ip4_0->checksum;
@@ -587,6 +711,16 @@ gtpu_encap_inline (vlib_main_t * vm,
#define _(offs) copy_dst0[offs] = copy_src0[offs];
foreach_fixed_header6_offset;
#undef _
+
+ /* Copy last octets */
+ if (_vec_len (t0->rewrite) == 64)
+ {
+ /* Last 8 octets. */
+#define _(offs) copy_dst0[offs] = copy_src0[offs];
+ _ (7);
+#undef _
+ }
+
/* Fix IP6 payload length */
new_l0 =
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
@@ -600,9 +734,9 @@ gtpu_encap_inline (vlib_main_t * vm,
/* Fix GTPU length */
gtpu0 = (gtpu_header_t *)(udp0+1);
- new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain(vm, b0)
- - sizeof (*ip4_0) - sizeof(*udp0)
- - GTPU_V1_HDR_LEN);
+ new_l0 = clib_host_to_net_u16 (
+ vlib_buffer_length_in_chain (vm, b0) - sizeof (*ip6_0) -
+ sizeof (*udp0) - GTPU_V1_HDR_LEN);
gtpu0->length = new_l0;
/* IPv6 UDP checksum is mandatory */
@@ -644,7 +778,9 @@ gtpu_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->tunnel_index = t0 - gtm->tunnels;
tr->tteid = t0->tteid;
- }
+ tr->pdu_extension = t0->pdu_extension;
+ tr->qfi = t0->qfi;
+ }
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
to_next, n_left_to_next,
bi0, next0);
diff --git a/src/plugins/gtpu/gtpu_error.def b/src/plugins/gtpu/gtpu_error.def
index 4351529ef25..6b521c8658a 100644
--- a/src/plugins/gtpu/gtpu_error.def
+++ b/src/plugins/gtpu/gtpu_error.def
@@ -17,3 +17,5 @@ gtpu_error (NO_SUCH_TUNNEL, "no such tunnel packets")
gtpu_error (BAD_VER, "packets with bad version in gtpu header")
gtpu_error (BAD_FLAGS, "packets with bad flags field in gtpu header")
gtpu_error (TOO_SMALL, "packet too small to fit a gtpu header")
+gtpu_error (UNSUPPORTED_TYPE, "packets with message type < 255 in gtpu header")
+gtpu_error (NO_ERROR_TUNNEL, "did not find an forward tunnel")
diff --git a/src/plugins/gtpu/gtpu_test.c b/src/plugins/gtpu/gtpu_test.c
index 373e7888341..fadcb82cb88 100644
--- a/src/plugins/gtpu/gtpu_test.c
+++ b/src/plugins/gtpu/gtpu_test.c
@@ -91,24 +91,6 @@ static void vl_api_gtpu_add_del_tunnel_reply_t_handler
}
static uword
-api_unformat_sw_if_index (unformat_input_t * input, va_list * args)
-{
- vat_main_t *vam = va_arg (*args, vat_main_t *);
- u32 *result = va_arg (*args, u32 *);
- u8 *if_name;
- uword *p;
-
- if (!unformat (input, "%s", &if_name))
- return 0;
-
- p = hash_get_mem (vam->sw_if_index_by_interface_name, if_name);
- if (p == 0)
- return 0;
- *result = p[0];
- return 1;
-}
-
-static uword
api_unformat_hw_if_index (unformat_input_t * input, va_list * args)
{
return 0;
@@ -316,9 +298,9 @@ api_gtpu_add_del_tunnel (vat_main_t * vam)
unformat_gtpu_decap_next, &decap_next_index))
;
else if (unformat (line_input, "teid %d", &teid))
- ;
+ ;
else if (unformat (line_input, "tteid %d", &tteid))
- ;
+ ;
else
{
errmsg ("parse error '%U'", format_unformat_error, line_input);
@@ -378,6 +360,175 @@ api_gtpu_add_del_tunnel (vat_main_t * vam)
return ret;
}
+static void
+vl_api_gtpu_add_del_tunnel_v2_reply_t_handler (
+ vl_api_gtpu_add_del_tunnel_v2_reply_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+ }
+}
+
+static int
+api_gtpu_add_del_tunnel_v2 (vat_main_t *vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_gtpu_add_del_tunnel_v2_t *mp;
+ ip46_address_t src, dst;
+ u8 is_add = 1;
+ u8 ipv4_set = 0, ipv6_set = 0;
+ u8 src_set = 0;
+ u8 dst_set = 0;
+ u8 grp_set = 0;
+ u32 mcast_sw_if_index = ~0;
+ u32 encap_vrf_id = 0;
+ u32 decap_next_index = ~0;
+ u32 teid = 0, tteid = 0;
+ u8 pdu_extension = 0;
+ u32 qfi = 0;
+ int ret;
+
+ /* Can't "universally zero init" (={0}) due to GCC bug 53119 */
+ clib_memset (&src, 0, sizeof src);
+ clib_memset (&dst, 0, sizeof dst);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "src %U", unformat_ip4_address, &src.ip4))
+ {
+ ipv4_set = 1;
+ src_set = 1;
+ }
+ else if (unformat (line_input, "dst %U", unformat_ip4_address, &dst.ip4))
+ {
+ ipv4_set = 1;
+ dst_set = 1;
+ }
+ else if (unformat (line_input, "src %U", unformat_ip6_address, &src.ip6))
+ {
+ ipv6_set = 1;
+ src_set = 1;
+ }
+ else if (unformat (line_input, "dst %U", unformat_ip6_address, &dst.ip6))
+ {
+ ipv6_set = 1;
+ dst_set = 1;
+ }
+ else if (unformat (line_input, "group %U %U", unformat_ip4_address,
+ &dst.ip4, api_unformat_sw_if_index, vam,
+ &mcast_sw_if_index))
+ {
+ grp_set = dst_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "group %U", unformat_ip4_address,
+ &dst.ip4))
+ {
+ grp_set = dst_set = 1;
+ ipv4_set = 1;
+ }
+ else if (unformat (line_input, "group %U %U", unformat_ip6_address,
+ &dst.ip6, api_unformat_sw_if_index, vam,
+ &mcast_sw_if_index))
+ {
+ grp_set = dst_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "group %U", unformat_ip6_address,
+ &dst.ip6))
+ {
+ grp_set = dst_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "mcast_sw_if_index %u",
+ &mcast_sw_if_index))
+ ;
+ else if (unformat (line_input, "encap-vrf-id %d", &encap_vrf_id))
+ ;
+ else if (unformat (line_input, "decap-next %U", unformat_gtpu_decap_next,
+ &decap_next_index))
+ ;
+ else if (unformat (line_input, "teid %d", &teid)) /* Change to %u ? */
+ ;
+ else if (unformat (line_input, "tteid %d", &tteid)) /* Change to %u ? */
+ ;
+ else if (unformat (line_input, "qfi %u", &qfi))
+ pdu_extension = 1;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ if (is_add && src_set == 0)
+ {
+ errmsg ("tunnel src address not specified");
+ return -99;
+ }
+ if (dst_set == 0)
+ {
+ errmsg ("tunnel dst address not specified");
+ return -99;
+ }
+
+ if (grp_set && !ip46_address_is_multicast (&dst))
+ {
+ errmsg ("tunnel group address not multicast");
+ return -99;
+ }
+ if (grp_set && mcast_sw_if_index == ~0)
+ {
+ errmsg ("tunnel nonexistent multicast device");
+ return -99;
+ }
+ if (grp_set == 0 && ip46_address_is_multicast (&dst))
+ {
+ errmsg ("tunnel dst address must be unicast");
+ return -99;
+ }
+
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("both IPv4 and IPv6 addresses specified");
+ return -99;
+ }
+ if (qfi > 31)
+ {
+ errmsg ("qfi max value is 31");
+ return -99;
+ }
+
+ M (GTPU_ADD_DEL_TUNNEL_V2, mp);
+
+ ip_address_encode (&src, ipv6_set ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
+ &mp->src_address);
+ ip_address_encode (&dst, ipv6_set ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
+ &mp->dst_address);
+ mp->encap_vrf_id = ntohl (encap_vrf_id);
+ mp->decap_next_index = ntohl (decap_next_index);
+ mp->mcast_sw_if_index = ntohl (mcast_sw_if_index);
+ mp->teid = ntohl (teid);
+ mp->tteid = ntohl (tteid);
+ mp->is_add = is_add;
+ mp->pdu_extension = pdu_extension;
+ mp->qfi = ntohl (qfi);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
static int
api_gtpu_tunnel_update_tteid (vat_main_t * vam)
{
@@ -454,6 +605,40 @@ static void vl_api_gtpu_tunnel_details_t_handler
ntohl (mp->mcast_sw_if_index));
}
+static void
+vl_api_gtpu_tunnel_v2_details_t_handler (vl_api_gtpu_tunnel_v2_details_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ ip46_address_t src;
+ ip46_address_t dst;
+ ip_address_decode (&mp->dst_address, &dst);
+ ip_address_decode (&mp->src_address, &src);
+ print (vam->ofp, "%11d%24U%24U%14d%18d%13d%13d%19d%15d%5d%15d%17d",
+ ntohl (mp->sw_if_index), format_ip46_address, &src, IP46_TYPE_ANY,
+ format_ip46_address, &dst, IP46_TYPE_ANY, ntohl (mp->encap_vrf_id),
+ ntohl (mp->decap_next_index), ntohl (mp->teid), ntohl (mp->tteid),
+ ntohl (mp->mcast_sw_if_index), mp->pdu_extension, mp->qfi,
+ mp->is_forwarding, ntohl (mp->forwarding_type));
+}
+
+static void
+vl_api_gtpu_add_del_forward_reply_t_handler (
+ vl_api_gtpu_add_del_forward_reply_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->sw_if_index = ntohl (mp->sw_if_index);
+ vam->result_ready = 1;
+ }
+}
+
static int
api_gtpu_tunnel_dump (vat_main_t * vam)
{
@@ -498,4 +683,163 @@ api_gtpu_tunnel_dump (vat_main_t * vam)
return 0;
}
+static int
+api_gtpu_tunnel_v2_dump (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_gtpu_tunnel_dump_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ sw_if_index = ~0;
+ }
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%11s%24s%24s%14s%18s%13s%13s%19s%12s%5s%15s%17s",
+ "sw_if_index", "src_address", "dst_address", "encap_vrf_id",
+ "decap_next_index", "teid", "tteid", "mcast_sw_if_index",
+ "pdu_extension", "qfi", "is_forwarding", "forwarding_type");
+ }
+
+ /* Get list of gtpu-tunnel interfaces */
+ M (GTPU_TUNNEL_DUMP, mp);
+
+ mp->sw_if_index = htonl (sw_if_index);
+
+ S (mp);
+
+ /* No status response for this API call.
+ * Wait 1 sec for any dump output before return to vat# */
+ sleep (1);
+
+ return 0;
+}
+
+static int
+api_gtpu_add_del_forward (vat_main_t *vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_gtpu_add_del_forward_t *mp;
+ int ret;
+ u32 decap_next_index = GTPU_INPUT_NEXT_L2_INPUT;
+ int is_add = 1;
+ ip46_address_t dst;
+ u8 dst_set = 0;
+ u8 type = 0;
+ u8 ipv6_set = 0;
+ u32 encap_vrf_id;
+
+ /* Cant "universally zero init" (={0}) due to GCC bug 53119 */
+ clib_memset (&dst, 0, sizeof dst);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "dst %U", unformat_ip4_address, &dst.ip4))
+ dst_set = 1;
+ else if (unformat (line_input, "dst %U", unformat_ip6_address, &dst.ip6))
+ {
+ dst_set = 1;
+ ipv6_set = 1;
+ }
+ else if (unformat (line_input, "decap-next %U", unformat_gtpu_decap_next,
+ &decap_next_index))
+ ;
+ else if (unformat (line_input, "encap-vrf-id %d", &encap_vrf_id))
+ ;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "bad-header"))
+ type |= GTPU_FORWARD_BAD_HEADER;
+ else if (unformat (line_input, "unknown-teid"))
+ type |= GTPU_FORWARD_UNKNOWN_TEID;
+ else if (unformat (line_input, "unknown-type"))
+ type |= GTPU_FORWARD_UNKNOWN_TYPE;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ if (!dst_set)
+ {
+ errmsg ("dst must be set to a valid IP address");
+ return -99;
+ }
+
+ M (GTPU_ADD_DEL_FORWARD, mp);
+
+ mp->is_add = is_add;
+ ip_address_encode (&dst, ipv6_set ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
+ &mp->dst_address);
+ mp->forwarding_type = type;
+ mp->encap_vrf_id = ntohl (encap_vrf_id);
+ mp->decap_next_index = ntohl (decap_next_index);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_gtpu_get_transfer_counts (vat_main_t *vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_gtpu_get_transfer_counts_t *mp;
+ u32 start_index;
+ u32 capacity;
+ int ret;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "start_index %u", &start_index))
+ ;
+ else if (unformat (line_input, "capacity %u", &capacity))
+ ;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, line_input);
+ return -99;
+ }
+ }
+
+ M (GTPU_GET_TRANSFER_COUNTS, mp);
+ mp->sw_if_index_start = start_index;
+ mp->capacity = capacity;
+
+ S (mp); // TODO: Handle the prints somehow. But how is it done??
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_gtpu_get_transfer_counts_reply_t_handler (
+ vl_api_gtpu_get_transfer_counts_reply_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ // TODO: Add reply here?
+ vam->result_ready = 1;
+ }
+}
+
#include <gtpu/gtpu.api_test.c>
diff --git a/src/plugins/hs_apps/CMakeLists.txt b/src/plugins/hs_apps/CMakeLists.txt
index 1f474828b15..179c9c7a4c4 100644
--- a/src/plugins/hs_apps/CMakeLists.txt
+++ b/src/plugins/hs_apps/CMakeLists.txt
@@ -19,7 +19,9 @@ add_vpp_plugin(hs_apps
echo_client.c
echo_server.c
hs_apps.c
- http_server.c
+ http_cli.c
+ http_client_cli.c
+ http_tps.c
proxy.c
)
@@ -33,6 +35,7 @@ if(VPP_BUILD_HS_SAPI_APPS)
sapi/vpp_echo.c
sapi/vpp_echo_common.c
sapi/vpp_echo_bapi.c
+ sapi/vpp_echo_sapi.c
sapi/vpp_echo_proto_quic.c
sapi/vpp_echo_proto_tcp.c
sapi/vpp_echo_proto_udp.c
@@ -52,7 +55,7 @@ if(VPP_BUILD_VCL_TESTS)
)
add_vpp_executable(${test}
SOURCES "vcl/${test}.c"
- LINK_LIBRARIES vppcom pthread
+ LINK_LIBRARIES vppcom pthread ${EPOLL_LIB}
NO_INSTALL
)
endforeach()
@@ -65,7 +68,7 @@ if(VPP_BUILD_VCL_TESTS)
SOURCES
"vcl/${test}.c"
vcl/vcl_test_protos.c
- LINK_LIBRARIES vppcom pthread
+ LINK_LIBRARIES vppcom pthread ${EPOLL_LIB}
NO_INSTALL
)
endforeach()
diff --git a/src/plugins/hs_apps/echo_client.c b/src/plugins/hs_apps/echo_client.c
index d641a9ec14e..d1443e75e80 100644
--- a/src/plugins/hs_apps/echo_client.c
+++ b/src/plugins/hs_apps/echo_client.c
@@ -15,38 +15,69 @@
* limitations under the License.
*/
-#include <vnet/vnet.h>
-#include <vlibapi/api.h>
-#include <vlibmemory/api.h>
#include <hs_apps/echo_client.h>
-echo_client_main_t echo_client_main;
+static ec_main_t ec_main;
-#define ECHO_CLIENT_DBG (0)
-#define DBG(_fmt, _args...) \
- if (ECHO_CLIENT_DBG) \
- clib_warning (_fmt, ##_args)
+#define ec_err(_fmt, _args...) clib_warning (_fmt, ##_args);
+
+#define ec_dbg(_fmt, _args...) \
+ do \
+ { \
+ if (ec_main.cfg.verbose) \
+ ec_err (_fmt, ##_args); \
+ } \
+ while (0)
+
+#define ec_cli(_fmt, _args...) vlib_cli_output (vm, _fmt, ##_args)
static void
-signal_evt_to_cli_i (int *code)
+signal_evt_to_cli_i (void *codep)
{
- echo_client_main_t *ecm = &echo_client_main;
+ ec_main_t *ecm = &ec_main;
+ int code;
+
ASSERT (vlib_get_thread_index () == 0);
- vlib_process_signal_event (ecm->vlib_main, ecm->cli_node_index, *code, 0);
+ code = pointer_to_uword (codep);
+ vlib_process_signal_event (ecm->vlib_main, ecm->cli_node_index, code, 0);
}
static void
signal_evt_to_cli (int code)
{
if (vlib_get_thread_index () != 0)
- vl_api_rpc_call_main_thread (signal_evt_to_cli_i, (u8 *) & code,
- sizeof (code));
+ session_send_rpc_evt_to_thread_force (
+ 0, signal_evt_to_cli_i, uword_to_pointer ((uword) code, void *));
else
- signal_evt_to_cli_i (&code);
+ signal_evt_to_cli_i (uword_to_pointer ((uword) code, void *));
+}
+
+static inline ec_worker_t *
+ec_worker_get (u32 thread_index)
+{
+ return vec_elt_at_index (ec_main.wrk, thread_index);
+}
+
+static inline ec_session_t *
+ec_session_alloc (ec_worker_t *wrk)
+{
+ ec_session_t *ecs;
+
+ pool_get_zero (wrk->sessions, ecs);
+ ecs->session_index = ecs - wrk->sessions;
+ ecs->thread_index = wrk->thread_index;
+
+ return ecs;
+}
+
+static inline ec_session_t *
+ec_session_get (ec_worker_t *wrk, u32 ec_index)
+{
+ return pool_elt_at_index (wrk->sessions, ec_index);
}
static void
-send_data_chunk (echo_client_main_t * ecm, eclient_session_t * s)
+send_data_chunk (ec_main_t *ecm, ec_session_t *es)
{
u8 *test_data = ecm->connect_test_data;
int test_buf_len, test_buf_offset, rv;
@@ -54,27 +85,28 @@ send_data_chunk (echo_client_main_t * ecm, eclient_session_t * s)
test_buf_len = vec_len (test_data);
ASSERT (test_buf_len > 0);
- test_buf_offset = s->bytes_sent % test_buf_len;
- bytes_this_chunk = clib_min (test_buf_len - test_buf_offset,
- s->bytes_to_send);
+ test_buf_offset = es->bytes_sent % test_buf_len;
+ bytes_this_chunk =
+ clib_min (test_buf_len - test_buf_offset, es->bytes_to_send);
- if (!ecm->is_dgram)
+ if (!es->is_dgram)
{
if (ecm->no_copy)
{
- svm_fifo_t *f = s->data.tx_fifo;
+ svm_fifo_t *f = es->tx_fifo;
rv = clib_min (svm_fifo_max_enqueue_prod (f), bytes_this_chunk);
svm_fifo_enqueue_nocopy (f, rv);
session_send_io_evt_to_thread_custom (
- &f->shr->master_session_index, s->thread_index, SESSION_IO_EVT_TX);
+ &es->vpp_session_index, es->thread_index, SESSION_IO_EVT_TX);
}
else
- rv = app_send_stream (&s->data, test_data + test_buf_offset,
- bytes_this_chunk, 0);
+ rv =
+ app_send_stream ((app_session_t *) es, test_data + test_buf_offset,
+ bytes_this_chunk, 0);
}
else
{
- svm_fifo_t *f = s->data.tx_fifo;
+ svm_fifo_t *f = es->tx_fifo;
u32 max_enqueue = svm_fifo_max_enqueue_prod (f);
if (max_enqueue < sizeof (session_dgram_hdr_t))
@@ -85,7 +117,7 @@ send_data_chunk (echo_client_main_t * ecm, eclient_session_t * s)
if (ecm->no_copy)
{
session_dgram_hdr_t hdr;
- app_session_transport_t *at = &s->data.transport;
+ app_session_transport_t *at = &es->transport;
rv = clib_min (max_enqueue, bytes_this_chunk);
@@ -101,13 +133,15 @@ send_data_chunk (echo_client_main_t * ecm, eclient_session_t * s)
svm_fifo_enqueue (f, sizeof (hdr), (u8 *) & hdr);
svm_fifo_enqueue_nocopy (f, rv);
session_send_io_evt_to_thread_custom (
- &f->shr->master_session_index, s->thread_index, SESSION_IO_EVT_TX);
+ &es->vpp_session_index, es->thread_index, SESSION_IO_EVT_TX);
}
else
{
bytes_this_chunk = clib_min (bytes_this_chunk, max_enqueue);
- rv = app_send_dgram (&s->data, test_data + test_buf_offset,
- bytes_this_chunk, 0);
+ bytes_this_chunk = clib_min (bytes_this_chunk, 1460);
+ rv =
+ app_send_dgram ((app_session_t *) es, test_data + test_buf_offset,
+ bytes_this_chunk, 0);
}
}
@@ -115,45 +149,39 @@ send_data_chunk (echo_client_main_t * ecm, eclient_session_t * s)
if (rv > 0)
{
/* Account for it... */
- s->bytes_to_send -= rv;
- s->bytes_sent += rv;
+ es->bytes_to_send -= rv;
+ es->bytes_sent += rv;
- if (ECHO_CLIENT_DBG)
+ if (ecm->cfg.verbose)
{
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "tx-enq: xfer %d bytes, sent %u remain %u",
.format_args = "i4i4i4",
};
- /* *INDENT-ON* */
struct
{
u32 data[3];
} *ed;
ed = ELOG_DATA (&vlib_global_main.elog_main, e);
ed->data[0] = rv;
- ed->data[1] = s->bytes_sent;
- ed->data[2] = s->bytes_to_send;
+ ed->data[1] = es->bytes_sent;
+ ed->data[2] = es->bytes_to_send;
}
}
}
static void
-receive_data_chunk (echo_client_main_t * ecm, eclient_session_t * s)
+receive_data_chunk (ec_worker_t *wrk, ec_session_t *es)
{
- svm_fifo_t *rx_fifo = s->data.rx_fifo;
- u32 thread_index = vlib_get_thread_index ();
+ ec_main_t *ecm = &ec_main;
+ svm_fifo_t *rx_fifo = es->rx_fifo;
int n_read, i;
- if (ecm->test_bytes)
+ if (ecm->cfg.test_bytes)
{
- if (!ecm->is_dgram)
- n_read = app_recv_stream (&s->data, ecm->rx_buf[thread_index],
- vec_len (ecm->rx_buf[thread_index]));
- else
- n_read = app_recv_dgram (&s->data, ecm->rx_buf[thread_index],
- vec_len (ecm->rx_buf[thread_index]));
+ n_read =
+ app_recv ((app_session_t *) es, wrk->rx_buf, vec_len (wrk->rx_buf));
}
else
{
@@ -163,15 +191,13 @@ receive_data_chunk (echo_client_main_t * ecm, eclient_session_t * s)
if (n_read > 0)
{
- if (ECHO_CLIENT_DBG)
+ if (ecm->cfg.verbose)
{
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "rx-deq: %d bytes",
.format_args = "i4",
};
- /* *INDENT-ON* */
struct
{
u32 data[1];
@@ -180,102 +206,104 @@ receive_data_chunk (echo_client_main_t * ecm, eclient_session_t * s)
ed->data[0] = n_read;
}
- if (ecm->test_bytes)
+ if (ecm->cfg.test_bytes)
{
for (i = 0; i < n_read; i++)
{
- if (ecm->rx_buf[thread_index][i]
- != ((s->bytes_received + i) & 0xff))
+ if (wrk->rx_buf[i] != ((es->bytes_received + i) & 0xff))
{
- clib_warning ("read %d error at byte %lld, 0x%x not 0x%x",
- n_read, s->bytes_received + i,
- ecm->rx_buf[thread_index][i],
- ((s->bytes_received + i) & 0xff));
+ ec_err ("read %d error at byte %lld, 0x%x not 0x%x", n_read,
+ es->bytes_received + i, wrk->rx_buf[i],
+ ((es->bytes_received + i) & 0xff));
ecm->test_failed = 1;
}
}
}
- ASSERT (n_read <= s->bytes_to_receive);
- s->bytes_to_receive -= n_read;
- s->bytes_received += n_read;
+ ASSERT (n_read <= es->bytes_to_receive);
+ es->bytes_to_receive -= n_read;
+ es->bytes_received += n_read;
}
}
static uword
-echo_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+ec_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- echo_client_main_t *ecm = &echo_client_main;
- int my_thread_index = vlib_get_thread_index ();
- eclient_session_t *sp;
- int i;
- int delete_session;
- u32 *connection_indices;
- u32 *connections_this_batch;
- u32 nconnections_this_batch;
-
- connection_indices = ecm->connection_index_by_thread[my_thread_index];
- connections_this_batch =
- ecm->connections_this_batch_by_thread[my_thread_index];
-
- if ((ecm->run_test != ECHO_CLIENTS_RUNNING) ||
- ((vec_len (connection_indices) == 0)
- && vec_len (connections_this_batch) == 0))
+ u32 *conn_indices, *conns_this_batch, nconns_this_batch;
+ int thread_index = vm->thread_index, i, delete_session;
+ ec_main_t *ecm = &ec_main;
+ ec_worker_t *wrk;
+ ec_session_t *es;
+ session_t *s;
+
+ if (ecm->run_test != EC_RUNNING)
+ return 0;
+
+ wrk = ec_worker_get (thread_index);
+ conn_indices = wrk->conn_indices;
+ conns_this_batch = wrk->conns_this_batch;
+
+ if (((vec_len (conn_indices) == 0) && vec_len (conns_this_batch) == 0))
return 0;
/* Grab another pile of connections */
- if (PREDICT_FALSE (vec_len (connections_this_batch) == 0))
+ if (PREDICT_FALSE (vec_len (conns_this_batch) == 0))
{
- nconnections_this_batch =
- clib_min (ecm->connections_per_batch, vec_len (connection_indices));
-
- ASSERT (nconnections_this_batch > 0);
- vec_validate (connections_this_batch, nconnections_this_batch - 1);
- clib_memcpy_fast (connections_this_batch,
- connection_indices + vec_len (connection_indices)
- - nconnections_this_batch,
- nconnections_this_batch * sizeof (u32));
- _vec_len (connection_indices) -= nconnections_this_batch;
+ nconns_this_batch =
+ clib_min (ecm->connections_per_batch, vec_len (conn_indices));
+
+ ASSERT (nconns_this_batch > 0);
+ vec_validate (conns_this_batch, nconns_this_batch - 1);
+ clib_memcpy_fast (conns_this_batch,
+ conn_indices + vec_len (conn_indices) -
+ nconns_this_batch,
+ nconns_this_batch * sizeof (u32));
+ vec_dec_len (conn_indices, nconns_this_batch);
}
- if (PREDICT_FALSE (ecm->prev_conns != ecm->connections_per_batch
- && ecm->prev_conns == vec_len (connections_this_batch)))
+ /*
+ * Track progress
+ */
+ if (PREDICT_FALSE (ecm->prev_conns != ecm->connections_per_batch &&
+ ecm->prev_conns == vec_len (conns_this_batch)))
{
ecm->repeats++;
- ecm->prev_conns = vec_len (connections_this_batch);
+ ecm->prev_conns = vec_len (conns_this_batch);
if (ecm->repeats == 500000)
{
- clib_warning ("stuck clients");
+ ec_err ("stuck clients");
}
}
else
{
- ecm->prev_conns = vec_len (connections_this_batch);
+ ecm->prev_conns = vec_len (conns_this_batch);
ecm->repeats = 0;
}
- for (i = 0; i < vec_len (connections_this_batch); i++)
+ /*
+ * Handle connections in this batch
+ */
+ for (i = 0; i < vec_len (conns_this_batch); i++)
{
- delete_session = 1;
+ es = ec_session_get (wrk, conns_this_batch[i]);
- sp = pool_elt_at_index (ecm->sessions, connections_this_batch[i]);
+ delete_session = 1;
- if (sp->bytes_to_send > 0)
+ if (es->bytes_to_send > 0)
{
- send_data_chunk (ecm, sp);
+ send_data_chunk (ecm, es);
delete_session = 0;
}
- if (sp->bytes_to_receive > 0)
+
+ if (es->bytes_to_receive > 0)
{
delete_session = 0;
}
+
if (PREDICT_FALSE (delete_session == 1))
{
- session_t *s;
-
- clib_atomic_fetch_add (&ecm->tx_total, sp->bytes_sent);
- clib_atomic_fetch_add (&ecm->rx_total, sp->bytes_received);
- s = session_get_from_handle_if_valid (sp->vpp_session_handle);
+ clib_atomic_fetch_add (&ecm->tx_total, es->bytes_sent);
+ clib_atomic_fetch_add (&ecm->rx_total, es->bytes_received);
+ s = session_get_from_handle_if_valid (es->vpp_session_handle);
if (s)
{
@@ -284,205 +312,327 @@ echo_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
a->app_index = ecm->app_index;
vnet_disconnect_session (a);
- vec_delete (connections_this_batch, 1, i);
+ vec_delete (conns_this_batch, 1, i);
i--;
clib_atomic_fetch_add (&ecm->ready_connections, -1);
}
else
{
- clib_warning ("session AWOL?");
- vec_delete (connections_this_batch, 1, i);
+ ec_err ("session AWOL?");
+ vec_delete (conns_this_batch, 1, i);
}
/* Kick the debug CLI process */
if (ecm->ready_connections == 0)
{
- signal_evt_to_cli (2);
+ signal_evt_to_cli (EC_CLI_TEST_DONE);
}
}
}
- ecm->connection_index_by_thread[my_thread_index] = connection_indices;
- ecm->connections_this_batch_by_thread[my_thread_index] =
- connections_this_batch;
+ wrk->conn_indices = conn_indices;
+ wrk->conns_this_batch = conns_this_batch;
return 0;
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (echo_clients_node) =
-{
- .function = echo_client_node_fn,
+VLIB_REGISTER_NODE (echo_clients_node) = {
+ .function = ec_node_fn,
.name = "echo-clients",
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
+
+static void
+ec_reset_runtime_config (ec_main_t *ecm)
+{
+ hs_test_cfg_init (&ecm->cfg);
+ ecm->n_clients = 1;
+ ecm->quic_streams = 1;
+ ecm->bytes_to_send = 8192;
+ ecm->echo_bytes = 0;
+ ecm->fifo_size = 64 << 10;
+ ecm->connections_per_batch = 1000;
+ ecm->private_segment_count = 0;
+ ecm->private_segment_size = 256 << 20;
+ ecm->test_failed = 0;
+ ecm->tls_engine = CRYPTO_ENGINE_OPENSSL;
+ ecm->no_copy = 0;
+ ecm->run_test = EC_STARTING;
+ ecm->ready_connections = 0;
+ ecm->connect_conn_index = 0;
+ ecm->rx_total = 0;
+ ecm->tx_total = 0;
+ ecm->barrier_acq_needed = 0;
+ ecm->prealloc_sessions = 0;
+ ecm->prealloc_fifos = 0;
+ ecm->appns_id = 0;
+ ecm->appns_secret = 0;
+ ecm->attach_flags = 0;
+ ecm->syn_timeout = 20.0;
+ ecm->test_timeout = 20.0;
+ vec_free (ecm->connect_uri);
+}
static int
-echo_clients_init (vlib_main_t * vm)
+ec_init (vlib_main_t *vm)
{
- echo_client_main_t *ecm = &echo_client_main;
- vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ ec_main_t *ecm = &ec_main;
+ ec_worker_t *wrk;
u32 num_threads;
int i;
- num_threads = 1 /* main thread */ + vtm->n_threads;
+ ec_reset_runtime_config (ecm);
+
+ /* Store cli process node index for signaling */
+ ecm->cli_node_index = vlib_get_current_process (vm)->node_runtime.node_index;
+ ecm->vlib_main = vm;
+
+ if (vlib_num_workers ())
+ {
+ /* The request came over the binary api and the inband cli handler
+ * is not mp_safe. Drop the barrier to make sure the workers are not
+ * blocked.
+ */
+ if (vlib_thread_is_main_w_barrier ())
+ {
+ ecm->barrier_acq_needed = 1;
+ vlib_worker_thread_barrier_release (vm);
+ }
+ /*
+ * There's a good chance that both the client and the server echo
+ * apps will be enabled so make sure the session queue node polls on
+ * the main thread as connections will probably be established on it.
+ */
+ vlib_node_set_state (vm, session_queue_node.index,
+ VLIB_NODE_STATE_POLLING);
+ }
+
+ /* App init done only once */
+ if (ecm->app_is_init)
+ return 0;
+
/* Init test data. Big buffer */
vec_validate (ecm->connect_test_data, 4 * 1024 * 1024 - 1);
for (i = 0; i < vec_len (ecm->connect_test_data); i++)
ecm->connect_test_data[i] = i & 0xff;
- vec_validate (ecm->rx_buf, num_threads - 1);
- for (i = 0; i < num_threads; i++)
- vec_validate (ecm->rx_buf[i], vec_len (ecm->connect_test_data) - 1);
+ num_threads = 1 /* main thread */ + vlib_num_workers ();
+ vec_validate (ecm->wrk, num_threads - 1);
+ vec_foreach (wrk, ecm->wrk)
+ {
+ vec_validate (wrk->rx_buf, vec_len (ecm->connect_test_data) - 1);
+ wrk->thread_index = wrk - ecm->wrk;
+ wrk->vpp_event_queue =
+ session_main_get_vpp_event_queue (wrk->thread_index);
+ }
- ecm->is_init = 1;
+ ecm->app_is_init = 1;
- vec_validate (ecm->connection_index_by_thread, vtm->n_vlib_mains);
- vec_validate (ecm->connections_this_batch_by_thread, vtm->n_vlib_mains);
- vec_validate (ecm->quic_session_index_by_thread, vtm->n_vlib_mains);
- vec_validate (ecm->vpp_event_queue, vtm->n_vlib_mains);
+ vlib_worker_thread_barrier_sync (vm);
+ vnet_session_enable_disable (vm, 1 /* turn on session and transports */);
+
+ /* Turn on the builtin client input nodes */
+ foreach_vlib_main ()
+ vlib_node_set_state (this_vlib_main, echo_clients_node.index,
+ VLIB_NODE_STATE_POLLING);
+
+ vlib_worker_thread_barrier_release (vm);
return 0;
}
+static void
+ec_prealloc_sessions (ec_main_t *ecm)
+{
+ u32 sessions_per_wrk, n_wrks;
+ ec_worker_t *wrk;
+
+ n_wrks = vlib_num_workers () ? vlib_num_workers () : 1;
+
+ sessions_per_wrk = ecm->n_clients / n_wrks;
+ vec_foreach (wrk, ecm->wrk)
+ pool_init_fixed (wrk->sessions, 1.1 * sessions_per_wrk);
+}
+
+static void
+ec_worker_cleanup (ec_worker_t *wrk)
+{
+ pool_free (wrk->sessions);
+ vec_free (wrk->conn_indices);
+ vec_free (wrk->conns_this_batch);
+}
+
+static void
+ec_cleanup (ec_main_t *ecm)
+{
+ ec_worker_t *wrk;
+
+ vec_foreach (wrk, ecm->wrk)
+ ec_worker_cleanup (wrk);
+
+ vec_free (ecm->connect_uri);
+ vec_free (ecm->appns_id);
+
+ if (ecm->barrier_acq_needed)
+ vlib_worker_thread_barrier_sync (ecm->vlib_main);
+}
+
static int
-quic_echo_clients_qsession_connected_callback (u32 app_index, u32 api_context,
- session_t * s,
- session_error_t err)
+quic_ec_qsession_connected_callback (u32 app_index, u32 api_context,
+ session_t *s, session_error_t err)
{
- echo_client_main_t *ecm = &echo_client_main;
- vnet_connect_args_t *a = 0;
- int rv;
- u8 thread_index = vlib_get_thread_index ();
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
+ ec_main_t *ecm = &ec_main;
+ vnet_connect_args_t _a, *a = &_a;
u32 stream_n;
- session_handle_t handle;
+ int rv;
- DBG ("QUIC Connection handle %d", session_handle (s));
+ ec_dbg ("QUIC Connection handle %d", session_handle (s));
- vec_validate (a, 1);
a->uri = (char *) ecm->connect_uri;
if (parse_uri (a->uri, &sep))
return -1;
- sep.parent_handle = handle = session_handle (s);
+ sep.parent_handle = session_handle (s);
for (stream_n = 0; stream_n < ecm->quic_streams; stream_n++)
{
clib_memset (a, 0, sizeof (*a));
a->app_index = ecm->app_index;
- a->api_context = -1 - api_context;
+ a->api_context = -2 - api_context;
clib_memcpy (&a->sep_ext, &sep, sizeof (sep));
- DBG ("QUIC opening stream %d", stream_n);
+ ec_dbg ("QUIC opening stream %d", stream_n);
if ((rv = vnet_connect (a)))
{
clib_error ("Stream session %d opening failed: %d", stream_n, rv);
return -1;
}
- DBG ("QUIC stream %d connected", stream_n);
+ ec_dbg ("QUIC stream %d connected", stream_n);
}
- /*
- * 's' is no longer valid, its underlying pool could have been moved in
- * vnet_connect()
- */
- vec_add1 (ecm->quic_session_index_by_thread[thread_index], handle);
- vec_free (a);
return 0;
}
static int
-quic_echo_clients_session_connected_callback (u32 app_index, u32 api_context,
- session_t * s,
- session_error_t err)
+ec_ctrl_send (hs_test_cmd_t cmd)
+{
+ ec_main_t *ecm = &ec_main;
+ session_t *s;
+ int rv;
+
+ ecm->cfg.cmd = cmd;
+ if (ecm->ctrl_session_handle == SESSION_INVALID_HANDLE)
+ {
+ ec_dbg ("ctrl session went away");
+ return -1;
+ }
+
+ s = session_get_from_handle_if_valid (ecm->ctrl_session_handle);
+ if (!s)
+ {
+ ec_err ("ctrl session not found");
+ return -1;
+ }
+
+ ec_dbg ("sending test paramters to the server..");
+ if (ecm->cfg.verbose)
+ hs_test_cfg_dump (&ecm->cfg, 1);
+
+ rv = svm_fifo_enqueue (s->tx_fifo, sizeof (ecm->cfg), (u8 *) &ecm->cfg);
+ ASSERT (rv == sizeof (ecm->cfg));
+ session_send_io_evt_to_thread (s->tx_fifo, SESSION_IO_EVT_TX);
+ return 0;
+}
+
+static int
+ec_ctrl_session_connected_callback (session_t *s)
+{
+ ec_main_t *ecm = &ec_main;
+
+ s->opaque = HS_CTRL_HANDLE;
+ ecm->ctrl_session_handle = session_handle (s);
+
+ /* send test parameters to the server */
+ ec_ctrl_send (HS_TEST_CMD_SYNC);
+ return 0;
+}
+
+static int
+quic_ec_session_connected_callback (u32 app_index, u32 api_context,
+ session_t *s, session_error_t err)
{
- echo_client_main_t *ecm = &echo_client_main;
- eclient_session_t *session;
- u32 session_index;
- u8 thread_index;
+ ec_main_t *ecm = &ec_main;
+ ec_session_t *es;
+ ec_worker_t *wrk;
+ u32 thread_index;
- if (PREDICT_FALSE (ecm->run_test != ECHO_CLIENTS_STARTING))
+ if (PREDICT_FALSE (api_context == HS_CTRL_HANDLE))
+ return ec_ctrl_session_connected_callback (s);
+
+ if (PREDICT_FALSE (ecm->run_test != EC_STARTING))
return -1;
if (err)
{
- clib_warning ("connection %d failed!", api_context);
- ecm->run_test = ECHO_CLIENTS_EXITING;
- signal_evt_to_cli (-1);
+ ec_err ("connection %d failed!", api_context);
+ ecm->run_test = EC_EXITING;
+ signal_evt_to_cli (EC_CLI_CONNECTS_FAILED);
return 0;
}
if (s->listener_handle == SESSION_INVALID_HANDLE)
- return quic_echo_clients_qsession_connected_callback (app_index,
- api_context, s,
- err);
- DBG ("STREAM Connection callback %d", api_context);
+ return quic_ec_qsession_connected_callback (app_index, api_context, s,
+ err);
+ ec_dbg ("STREAM Connection callback %d", api_context);
thread_index = s->thread_index;
ASSERT (thread_index == vlib_get_thread_index ()
|| session_transport_service_type (s) == TRANSPORT_SERVICE_CL);
- if (!ecm->vpp_event_queue[thread_index])
- ecm->vpp_event_queue[thread_index] =
- session_main_get_vpp_event_queue (thread_index);
+ wrk = ec_worker_get (thread_index);
/*
* Setup session
*/
- clib_spinlock_lock_if_init (&ecm->sessions_lock);
- pool_get (ecm->sessions, session);
- clib_spinlock_unlock_if_init (&ecm->sessions_lock);
-
- clib_memset (session, 0, sizeof (*session));
- session_index = session - ecm->sessions;
- session->bytes_to_send = ecm->bytes_to_send;
- session->bytes_to_receive = ecm->no_return ? 0ULL : ecm->bytes_to_send;
- session->data.rx_fifo = s->rx_fifo;
- session->data.rx_fifo->shr->client_session_index = session_index;
- session->data.tx_fifo = s->tx_fifo;
- session->data.tx_fifo->shr->client_session_index = session_index;
- session->data.vpp_evt_q = ecm->vpp_event_queue[thread_index];
- session->vpp_session_handle = session_handle (s);
-
- if (ecm->is_dgram)
- {
- transport_connection_t *tc;
- tc = session_get_transport (s);
- clib_memcpy_fast (&session->data.transport, tc,
- sizeof (session->data.transport));
- session->data.is_dgram = 1;
- }
+ es = ec_session_alloc (wrk);
+ hs_test_app_session_init (es, s);
- vec_add1 (ecm->connection_index_by_thread[thread_index], session_index);
+ es->bytes_to_send = ecm->bytes_to_send;
+ es->bytes_to_receive = ecm->echo_bytes ? ecm->bytes_to_send : 0ULL;
+ es->vpp_session_handle = session_handle (s);
+ es->vpp_session_index = s->session_index;
+ s->opaque = es->session_index;
+
+ vec_add1 (wrk->conn_indices, es->session_index);
clib_atomic_fetch_add (&ecm->ready_connections, 1);
if (ecm->ready_connections == ecm->expected_connections)
{
- ecm->run_test = ECHO_CLIENTS_RUNNING;
+ ecm->run_test = EC_RUNNING;
/* Signal the CLI process that the action is starting... */
- signal_evt_to_cli (1);
+ signal_evt_to_cli (EC_CLI_CONNECTS_DONE);
}
return 0;
}
static int
-echo_clients_session_connected_callback (u32 app_index, u32 api_context,
- session_t * s, session_error_t err)
+ec_session_connected_callback (u32 app_index, u32 api_context, session_t *s,
+ session_error_t err)
{
- echo_client_main_t *ecm = &echo_client_main;
- eclient_session_t *session;
- u32 session_index;
- u8 thread_index;
+ ec_main_t *ecm = &ec_main;
+ ec_session_t *es;
+ u32 thread_index;
+ ec_worker_t *wrk;
- if (PREDICT_FALSE (ecm->run_test != ECHO_CLIENTS_STARTING))
+ if (PREDICT_FALSE (ecm->run_test != EC_STARTING))
return -1;
if (err)
{
- clib_warning ("connection %d failed!", api_context);
- ecm->run_test = ECHO_CLIENTS_EXITING;
- signal_evt_to_cli (-1);
+ ec_err ("connection %d failed! %U", api_context, format_session_error,
+ err);
+ ecm->run_test = EC_EXITING;
+ signal_evt_to_cli (EC_CLI_CONNECTS_FAILED);
return 0;
}
@@ -490,57 +640,43 @@ echo_clients_session_connected_callback (u32 app_index, u32 api_context,
ASSERT (thread_index == vlib_get_thread_index ()
|| session_transport_service_type (s) == TRANSPORT_SERVICE_CL);
- if (!ecm->vpp_event_queue[thread_index])
- ecm->vpp_event_queue[thread_index] =
- session_main_get_vpp_event_queue (thread_index);
+ if (PREDICT_FALSE (api_context == HS_CTRL_HANDLE))
+ return ec_ctrl_session_connected_callback (s);
+
+ wrk = ec_worker_get (thread_index);
/*
* Setup session
*/
- clib_spinlock_lock_if_init (&ecm->sessions_lock);
- pool_get (ecm->sessions, session);
- clib_spinlock_unlock_if_init (&ecm->sessions_lock);
-
- clib_memset (session, 0, sizeof (*session));
- session_index = session - ecm->sessions;
- session->bytes_to_send = ecm->bytes_to_send;
- session->bytes_to_receive = ecm->no_return ? 0ULL : ecm->bytes_to_send;
- session->data.rx_fifo = s->rx_fifo;
- session->data.rx_fifo->shr->client_session_index = session_index;
- session->data.tx_fifo = s->tx_fifo;
- session->data.tx_fifo->shr->client_session_index = session_index;
- session->data.vpp_evt_q = ecm->vpp_event_queue[thread_index];
- session->vpp_session_handle = session_handle (s);
-
- if (ecm->is_dgram)
- {
- transport_connection_t *tc;
- tc = session_get_transport (s);
- clib_memcpy_fast (&session->data.transport, tc,
- sizeof (session->data.transport));
- session->data.is_dgram = 1;
- }
+ es = ec_session_alloc (wrk);
+ hs_test_app_session_init (es, s);
+
+ es->bytes_to_send = ecm->bytes_to_send;
+ es->bytes_to_receive = ecm->echo_bytes ? ecm->bytes_to_send : 0ULL;
+ es->vpp_session_handle = session_handle (s);
+ es->vpp_session_index = s->session_index;
+ s->opaque = es->session_index;
- vec_add1 (ecm->connection_index_by_thread[thread_index], session_index);
+ vec_add1 (wrk->conn_indices, es->session_index);
clib_atomic_fetch_add (&ecm->ready_connections, 1);
if (ecm->ready_connections == ecm->expected_connections)
{
- ecm->run_test = ECHO_CLIENTS_RUNNING;
+ ecm->run_test = EC_RUNNING;
/* Signal the CLI process that the action is starting... */
- signal_evt_to_cli (1);
+ signal_evt_to_cli (EC_CLI_CONNECTS_DONE);
}
return 0;
}
static void
-echo_clients_session_reset_callback (session_t * s)
+ec_session_reset_callback (session_t *s)
{
- echo_client_main_t *ecm = &echo_client_main;
+ ec_main_t *ecm = &ec_main;
vnet_disconnect_args_t _a = { 0 }, *a = &_a;
if (s->session_state == SESSION_STATE_READY)
- clib_warning ("Reset active connection %U", format_session, s, 2);
+ ec_err ("Reset active connection %U", format_session, s, 2);
a->handle = session_handle (s);
a->app_index = ecm->app_index;
@@ -549,16 +685,23 @@ echo_clients_session_reset_callback (session_t * s)
}
static int
-echo_clients_session_create_callback (session_t * s)
+ec_session_accept_callback (session_t *s)
{
return 0;
}
static void
-echo_clients_session_disconnect_callback (session_t * s)
+ec_session_disconnect_callback (session_t *s)
{
- echo_client_main_t *ecm = &echo_client_main;
+ ec_main_t *ecm = &ec_main;
vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+
+ if (session_handle (s) == ecm->ctrl_session_handle)
+ {
+ ec_dbg ("ctrl session disconnect");
+ ecm->ctrl_session_handle = SESSION_INVALID_HANDLE;
+ }
+
a->handle = session_handle (s);
a->app_index = ecm->app_index;
vnet_disconnect_session (a);
@@ -566,9 +709,9 @@ echo_clients_session_disconnect_callback (session_t * s)
}
void
-echo_clients_session_disconnect (session_t * s)
+ec_session_disconnect (session_t *s)
{
- echo_client_main_t *ecm = &echo_client_main;
+ ec_main_t *ecm = &ec_main;
vnet_disconnect_args_t _a = { 0 }, *a = &_a;
a->handle = session_handle (s);
a->app_index = ecm->app_index;
@@ -576,54 +719,124 @@ echo_clients_session_disconnect (session_t * s)
}
static int
-echo_clients_rx_callback (session_t * s)
+ec_ctrl_session_rx_callback (session_t *s)
{
- echo_client_main_t *ecm = &echo_client_main;
- eclient_session_t *sp;
+ ec_main_t *ecm = &ec_main;
+ int rx_bytes;
+ hs_test_cfg_t cfg = { 0 };
- if (PREDICT_FALSE (ecm->run_test != ECHO_CLIENTS_RUNNING))
+ rx_bytes = svm_fifo_dequeue (s->rx_fifo, sizeof (cfg), (u8 *) &cfg);
+ if (rx_bytes != sizeof (cfg))
{
- echo_clients_session_disconnect (s);
+ ec_err ("invalid cfg length %d (expected %d)", rx_bytes, sizeof (cfg));
+ signal_evt_to_cli (EC_CLI_CONNECTS_FAILED);
return -1;
}
- sp =
- pool_elt_at_index (ecm->sessions, s->rx_fifo->shr->client_session_index);
- receive_data_chunk (ecm, sp);
+ ec_dbg ("control message received:");
+ if (ecm->cfg.verbose)
+ hs_test_cfg_dump (&cfg, 1);
- if (svm_fifo_max_dequeue_cons (s->rx_fifo))
+ switch (cfg.cmd)
{
- if (svm_fifo_set_event (s->rx_fifo))
- session_send_io_evt_to_thread (s->rx_fifo, SESSION_IO_EVT_BUILTIN_RX);
+ case HS_TEST_CMD_SYNC:
+ switch (ecm->run_test)
+ {
+ case EC_STARTING:
+ if (!hs_test_cfg_verify (&cfg, &ecm->cfg))
+ {
+ ec_err ("invalid config received from server!");
+ signal_evt_to_cli (EC_CLI_CONNECTS_FAILED);
+ return -1;
+ }
+ signal_evt_to_cli (EC_CLI_CFG_SYNC);
+ break;
+
+ case EC_RUNNING:
+ ec_dbg ("test running..");
+ break;
+
+ case EC_EXITING:
+ /* post test sync */
+ signal_evt_to_cli (EC_CLI_CFG_SYNC);
+ break;
+
+ default:
+ ec_err ("unexpected test state! %d", ecm->run_test);
+ break;
+ }
+ break;
+ case HS_TEST_CMD_START:
+ signal_evt_to_cli (EC_CLI_START);
+ break;
+ case HS_TEST_CMD_STOP:
+ signal_evt_to_cli (EC_CLI_STOP);
+ break;
+ default:
+ ec_err ("unexpected cmd! %d", cfg.cmd);
+ break;
}
+
return 0;
}
-int
-echo_client_add_segment_callback (u32 client_index, u64 segment_handle)
+static int
+ec_session_rx_callback (session_t *s)
{
- /* New heaps may be added */
+ ec_main_t *ecm = &ec_main;
+ ec_worker_t *wrk;
+ ec_session_t *es;
+
+ if (PREDICT_FALSE (s->opaque == HS_CTRL_HANDLE))
+ return ec_ctrl_session_rx_callback (s);
+
+ if (PREDICT_FALSE (ecm->run_test != EC_RUNNING))
+ {
+ ec_session_disconnect (s);
+ return -1;
+ }
+
+ wrk = ec_worker_get (s->thread_index);
+ es = ec_session_get (wrk, s->opaque);
+
+ receive_data_chunk (wrk, es);
+
+ if (svm_fifo_max_dequeue_cons (s->rx_fifo))
+ session_enqueue_notify (s);
+
return 0;
}
-/* *INDENT-OFF* */
-static session_cb_vft_t echo_clients = {
- .session_reset_callback = echo_clients_session_reset_callback,
- .session_connected_callback = echo_clients_session_connected_callback,
- .session_accept_callback = echo_clients_session_create_callback,
- .session_disconnect_callback = echo_clients_session_disconnect_callback,
- .builtin_app_rx_callback = echo_clients_rx_callback,
- .add_segment_callback = echo_client_add_segment_callback
+static int
+ec_add_segment_callback (u32 app_index, u64 segment_handle)
+{
+ /* New segments may be added */
+ return 0;
+}
+
+static int
+ec_del_segment_callback (u32 app_index, u64 segment_handle)
+{
+ return 0;
+}
+
+static session_cb_vft_t ec_cb_vft = {
+ .session_reset_callback = ec_session_reset_callback,
+ .session_connected_callback = ec_session_connected_callback,
+ .session_accept_callback = ec_session_accept_callback,
+ .session_disconnect_callback = ec_session_disconnect_callback,
+ .builtin_app_rx_callback = ec_session_rx_callback,
+ .add_segment_callback = ec_add_segment_callback,
+ .del_segment_callback = ec_del_segment_callback,
};
-/* *INDENT-ON* */
static clib_error_t *
-echo_clients_attach (u8 * appns_id, u64 appns_flags, u64 appns_secret)
+ec_attach ()
{
vnet_app_add_cert_key_pair_args_t _ck_pair, *ck_pair = &_ck_pair;
- u32 prealloc_fifos, segment_size = 256 << 20;
- echo_client_main_t *ecm = &echo_client_main;
+ ec_main_t *ecm = &ec_main;
vnet_app_attach_args_t _a, *a = &_a;
+ u32 prealloc_fifos;
u64 options[18];
int rv;
@@ -633,18 +846,14 @@ echo_clients_attach (u8 * appns_id, u64 appns_flags, u64 appns_secret)
a->api_client_index = ~0;
a->name = format (0, "echo_client");
if (ecm->transport_proto == TRANSPORT_PROTO_QUIC)
- echo_clients.session_connected_callback =
- quic_echo_clients_session_connected_callback;
- a->session_cb_vft = &echo_clients;
+ ec_cb_vft.session_connected_callback = quic_ec_session_connected_callback;
+ a->session_cb_vft = &ec_cb_vft;
prealloc_fifos = ecm->prealloc_fifos ? ecm->expected_connections : 1;
- if (ecm->private_segment_size)
- segment_size = ecm->private_segment_size;
-
options[APP_OPTIONS_ACCEPT_COOKIE] = 0x12345678;
- options[APP_OPTIONS_SEGMENT_SIZE] = segment_size;
- options[APP_OPTIONS_ADD_SEGMENT_SIZE] = segment_size;
+ options[APP_OPTIONS_SEGMENT_SIZE] = ecm->private_segment_size;
+ options[APP_OPTIONS_ADD_SEGMENT_SIZE] = ecm->private_segment_size;
options[APP_OPTIONS_RX_FIFO_SIZE] = ecm->fifo_size;
options[APP_OPTIONS_TX_FIFO_SIZE] = ecm->fifo_size;
options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = ecm->private_segment_count;
@@ -652,13 +861,13 @@ echo_clients_attach (u8 * appns_id, u64 appns_flags, u64 appns_secret)
options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
options[APP_OPTIONS_TLS_ENGINE] = ecm->tls_engine;
options[APP_OPTIONS_PCT_FIRST_ALLOC] = 100;
- if (appns_id)
+ options[APP_OPTIONS_FLAGS] |= ecm->attach_flags;
+ if (ecm->appns_id)
{
- options[APP_OPTIONS_FLAGS] |= appns_flags;
- options[APP_OPTIONS_NAMESPACE_SECRET] = appns_secret;
+ options[APP_OPTIONS_NAMESPACE_SECRET] = ecm->appns_secret;
+ a->namespace_id = ecm->appns_id;
}
a->options = options;
- a->namespace_id = appns_id;
if ((rv = vnet_application_attach (a)))
return clib_error_return (0, "attach returned %d", rv);
@@ -674,16 +883,21 @@ echo_clients_attach (u8 * appns_id, u64 appns_flags, u64 appns_secret)
vnet_app_add_cert_key_pair (ck_pair);
ecm->ckpair_index = ck_pair->index;
+ ecm->test_client_attached = 1;
+
return 0;
}
static int
-echo_clients_detach ()
+ec_detach ()
{
- echo_client_main_t *ecm = &echo_client_main;
+ ec_main_t *ecm = &ec_main;
vnet_app_detach_args_t _da, *da = &_da;
int rv;
+ if (!ecm->test_client_attached)
+ return 0;
+
da->app_index = ecm->app_index;
da->api_client_index = ~0;
rv = vnet_application_detach (da);
@@ -694,412 +908,450 @@ echo_clients_detach ()
return rv;
}
-static void *
-echo_client_thread_fn (void *arg)
-{
- return 0;
-}
-
-/** Start a transmit thread */
-int
-echo_clients_start_tx_pthread (echo_client_main_t * ecm)
-{
- if (ecm->client_thread_handle == 0)
- {
- int rv = pthread_create (&ecm->client_thread_handle,
- NULL /*attr */ ,
- echo_client_thread_fn, 0);
- if (rv)
- {
- ecm->client_thread_handle = 0;
- return -1;
- }
- }
- return 0;
-}
-
static int
-echo_client_transport_needs_crypto (transport_proto_t proto)
+ec_transport_needs_crypto (transport_proto_t proto)
{
return proto == TRANSPORT_PROTO_TLS || proto == TRANSPORT_PROTO_DTLS ||
proto == TRANSPORT_PROTO_QUIC;
}
-clib_error_t *
-echo_clients_connect (vlib_main_t * vm, u32 n_clients)
+static int
+ec_connect_rpc (void *args)
{
- session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
- echo_client_main_t *ecm = &echo_client_main;
- vnet_connect_args_t _a, *a = &_a;
- int i, rv;
-
- clib_memset (a, 0, sizeof (*a));
+ ec_main_t *ecm = &ec_main;
+ vnet_connect_args_t _a = {}, *a = &_a;
+ int rv, needs_crypto;
+ u32 n_clients, ci;
+
+ n_clients = ecm->n_clients;
+ needs_crypto = ec_transport_needs_crypto (ecm->transport_proto);
+ clib_memcpy (&a->sep_ext, &ecm->connect_sep, sizeof (ecm->connect_sep));
+ a->sep_ext.transport_flags |= TRANSPORT_CFG_F_CONNECTED;
+ a->app_index = ecm->app_index;
- if (parse_uri ((char *) ecm->connect_uri, &sep))
- return clib_error_return (0, "invalid uri");
+ ci = ecm->connect_conn_index;
- for (i = 0; i < n_clients; i++)
+ while (ci < n_clients)
{
- clib_memcpy (&a->sep_ext, &sep, sizeof (sep));
- a->api_context = i;
- a->app_index = ecm->app_index;
- if (echo_client_transport_needs_crypto (a->sep_ext.transport_proto))
+ /* Crude pacing for call setups */
+ if (ci - ecm->ready_connections > 128)
+ {
+ ecm->connect_conn_index = ci;
+ break;
+ }
+
+ a->api_context = ci;
+ if (needs_crypto)
{
session_endpoint_alloc_ext_cfg (&a->sep_ext,
TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
a->sep_ext.ext_cfg->crypto.ckpair_index = ecm->ckpair_index;
}
- vlib_worker_thread_barrier_sync (vm);
rv = vnet_connect (a);
- if (a->sep_ext.ext_cfg)
+
+ if (needs_crypto)
clib_mem_free (a->sep_ext.ext_cfg);
+
if (rv)
{
- vlib_worker_thread_barrier_release (vm);
- return clib_error_return (0, "connect returned: %d", rv);
+ ec_err ("connect returned: %U", format_session_error, rv);
+ ecm->run_test = EC_EXITING;
+ signal_evt_to_cli (EC_CLI_CONNECTS_FAILED);
+ break;
}
- vlib_worker_thread_barrier_release (vm);
- /* Crude pacing for call setups */
- if ((i % 16) == 0)
- vlib_process_suspend (vm, 100e-6);
- ASSERT (i + 1 >= ecm->ready_connections);
- while (i + 1 - ecm->ready_connections > 128)
- vlib_process_suspend (vm, 1e-3);
+ ci += 1;
}
+
+ if (ci < ecm->expected_connections && ecm->run_test != EC_EXITING)
+ ec_program_connects ();
+
return 0;
}
-#define ec_cli_output(_fmt, _args...) \
- if (!ecm->no_output) \
- vlib_cli_output(vm, _fmt, ##_args)
+void
+ec_program_connects (void)
+{
+ session_send_rpc_evt_to_thread_force (transport_cl_thread (), ec_connect_rpc,
+ 0);
+}
static clib_error_t *
-echo_clients_command_fn (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
+ec_ctrl_connect_rpc ()
{
- echo_client_main_t *ecm = &echo_client_main;
- vlib_thread_main_t *thread_main = vlib_get_thread_main ();
- u64 tmp, total_bytes, appns_flags = 0, appns_secret = 0;
- session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
- f64 test_timeout = 20.0, syn_timeout = 20.0, delta;
- char *default_uri = "tcp://6.0.1.1/1234";
- u8 *appns_id = 0, barrier_acq_needed = 0;
- int preallocate_sessions = 0, i, rv;
+ session_error_t rv;
+ ec_main_t *ecm = &ec_main;
+ vnet_connect_args_t _a = {}, *a = &_a;
+
+ a->api_context = HS_CTRL_HANDLE;
+ ecm->cfg.cmd = HS_TEST_CMD_SYNC;
+ clib_memcpy (&a->sep_ext, &ecm->connect_sep, sizeof (ecm->connect_sep));
+ a->sep_ext.transport_proto = TRANSPORT_PROTO_TCP;
+ a->app_index = ecm->app_index;
+
+ rv = vnet_connect (a);
+ if (rv)
+ {
+ ec_err ("ctrl connect returned: %U", format_session_error, rv);
+ ecm->run_test = EC_EXITING;
+ signal_evt_to_cli (EC_CLI_CONNECTS_FAILED);
+ }
+ return 0;
+}
+
+static void
+ec_ctrl_connect (void)
+{
+ session_send_rpc_evt_to_thread_force (transport_cl_thread (),
+ ec_ctrl_connect_rpc, 0);
+}
+
+static void
+ec_ctrl_session_disconnect ()
+{
+ ec_main_t *ecm = &ec_main;
+ vnet_disconnect_args_t _a, *a = &_a;
+ session_error_t err;
+
+ a->handle = ecm->ctrl_session_handle;
+ a->app_index = ecm->app_index;
+ err = vnet_disconnect_session (a);
+ if (err)
+ ec_err ("vnet_disconnect_session: %U", format_session_error, err);
+}
+
+static int
+ec_ctrl_test_sync ()
+{
+ ec_main_t *ecm = &ec_main;
+ ecm->cfg.test = HS_TEST_TYPE_ECHO;
+ return ec_ctrl_send (HS_TEST_CMD_SYNC);
+}
+
+static int
+ec_ctrl_test_start ()
+{
+ return ec_ctrl_send (HS_TEST_CMD_START);
+}
+
+static int
+ec_ctrl_test_stop ()
+{
+ return ec_ctrl_send (HS_TEST_CMD_STOP);
+}
+
+#define ec_wait_for_signal(_sig) \
+ vlib_process_wait_for_event_or_clock (vm, ecm->syn_timeout); \
+ event_type = vlib_process_get_events (vm, &event_data); \
+ switch (event_type) \
+ { \
+ case ~0: \
+ ec_cli ("Timeout while waiting for " #_sig); \
+ error = \
+ clib_error_return (0, "failed: timeout while waiting for " #_sig); \
+ goto cleanup; \
+ case _sig: \
+ break; \
+ default: \
+ ec_cli ("unexpected event while waiting for " #_sig ": %d", \
+ event_type); \
+ error = \
+ clib_error_return (0, "failed: unexpected event: %d", event_type); \
+ goto cleanup; \
+ }
+
+static clib_error_t *
+ec_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ char *default_uri = "tcp://6.0.1.1/1234", *transfer_type;
+ ec_main_t *ecm = &ec_main;
uword *event_data = 0, event_type;
- f64 time_before_connects;
- u32 n_clients = 1;
- char *transfer_type;
clib_error_t *error = 0;
+ int rv, had_config = 1;
+ u64 tmp, total_bytes;
+ f64 delta;
- ecm->quic_streams = 1;
- ecm->bytes_to_send = 8192;
- ecm->no_return = 0;
- ecm->fifo_size = 64 << 10;
- ecm->connections_per_batch = 1000;
- ecm->private_segment_count = 0;
- ecm->private_segment_size = 0;
- ecm->no_output = 0;
- ecm->test_bytes = 0;
- ecm->test_failed = 0;
- ecm->vlib_main = vm;
- ecm->tls_engine = CRYPTO_ENGINE_OPENSSL;
- ecm->no_copy = 0;
- ecm->run_test = ECHO_CLIENTS_STARTING;
+ if (ecm->test_client_attached)
+ return clib_error_return (0, "failed: already running!");
- if (vlib_num_workers ())
+ if (ec_init (vm))
{
- /* The request came over the binary api and the inband cli handler
- * is not mp_safe. Drop the barrier to make sure the workers are not
- * blocked.
- */
- if (vlib_thread_is_main_w_barrier ())
- {
- barrier_acq_needed = 1;
- vlib_worker_thread_barrier_release (vm);
- }
- /*
- * There's a good chance that both the client and the server echo
- * apps will be enabled so make sure the session queue node polls on
- * the main thread as connections will probably be established on it.
- */
- vlib_node_set_state (vm, session_queue_node.index,
- VLIB_NODE_STATE_POLLING);
+ error = clib_error_return (0, "failed init");
+ goto cleanup;
}
- if (thread_main->n_vlib_mains > 1)
- clib_spinlock_init (&ecm->sessions_lock);
- vec_free (ecm->connect_uri);
+ if (!unformat_user (input, unformat_line_input, line_input))
+ {
+ had_config = 0;
+ goto parse_config;
+ }
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "uri %s", &ecm->connect_uri))
+ if (unformat (line_input, "uri %s", &ecm->connect_uri))
;
- else if (unformat (input, "nclients %d", &n_clients))
+ else if (unformat (line_input, "nclients %d", &ecm->n_clients))
;
- else if (unformat (input, "quic-streams %d", &ecm->quic_streams))
+ else if (unformat (line_input, "quic-streams %d", &ecm->quic_streams))
;
- else if (unformat (input, "mbytes %lld", &tmp))
+ else if (unformat (line_input, "mbytes %lld", &tmp))
ecm->bytes_to_send = tmp << 20;
- else if (unformat (input, "gbytes %lld", &tmp))
+ else if (unformat (line_input, "gbytes %lld", &tmp))
ecm->bytes_to_send = tmp << 30;
- else if (unformat (input, "bytes %lld", &ecm->bytes_to_send))
+ else if (unformat (line_input, "bytes %U", unformat_memory_size,
+ &ecm->bytes_to_send))
+ ;
+ else if (unformat (line_input, "test-timeout %f", &ecm->test_timeout))
;
- else if (unformat (input, "test-timeout %f", &test_timeout))
+ else if (unformat (line_input, "syn-timeout %f", &ecm->syn_timeout))
;
- else if (unformat (input, "syn-timeout %f", &syn_timeout))
+ else if (unformat (line_input, "echo-bytes"))
+ ecm->echo_bytes = 1;
+ else if (unformat (line_input, "fifo-size %U", unformat_memory_size,
+ &ecm->fifo_size))
;
- else if (unformat (input, "no-return"))
- ecm->no_return = 1;
- else if (unformat (input, "fifo-size %d", &ecm->fifo_size))
- ecm->fifo_size <<= 10;
- else if (unformat (input, "private-segment-count %d",
+ else if (unformat (line_input, "private-segment-count %d",
&ecm->private_segment_count))
;
- else if (unformat (input, "private-segment-size %U",
- unformat_memory_size, &tmp))
- {
- if (tmp >= 0x100000000ULL)
- {
- error = clib_error_return (
- 0, "private segment size %lld (%llu) too large", tmp, tmp);
- goto cleanup;
- }
- ecm->private_segment_size = tmp;
- }
- else if (unformat (input, "preallocate-fifos"))
+ else if (unformat (line_input, "private-segment-size %U",
+ unformat_memory_size, &ecm->private_segment_size))
+ ;
+ else if (unformat (line_input, "preallocate-fifos"))
ecm->prealloc_fifos = 1;
- else if (unformat (input, "preallocate-sessions"))
- preallocate_sessions = 1;
- else
- if (unformat (input, "client-batch %d", &ecm->connections_per_batch))
+ else if (unformat (line_input, "preallocate-sessions"))
+ ecm->prealloc_sessions = 1;
+ else if (unformat (line_input, "client-batch %d",
+ &ecm->connections_per_batch))
;
- else if (unformat (input, "appns %_%v%_", &appns_id))
+ else if (unformat (line_input, "appns %_%v%_", &ecm->appns_id))
;
- else if (unformat (input, "all-scope"))
- appns_flags |= (APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE
- | APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE);
- else if (unformat (input, "local-scope"))
- appns_flags = APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE;
- else if (unformat (input, "global-scope"))
- appns_flags = APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE;
- else if (unformat (input, "secret %lu", &appns_secret))
+ else if (unformat (line_input, "all-scope"))
+ ecm->attach_flags |= (APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE |
+ APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE);
+ else if (unformat (line_input, "local-scope"))
+ ecm->attach_flags = APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE;
+ else if (unformat (line_input, "global-scope"))
+ ecm->attach_flags = APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE;
+ else if (unformat (line_input, "secret %lu", &ecm->appns_secret))
;
- else if (unformat (input, "no-output"))
- ecm->no_output = 1;
- else if (unformat (input, "test-bytes"))
- ecm->test_bytes = 1;
- else if (unformat (input, "tls-engine %d", &ecm->tls_engine))
+ else if (unformat (line_input, "verbose"))
+ ecm->cfg.verbose = 1;
+ else if (unformat (line_input, "test-bytes"))
+ ecm->cfg.test_bytes = 1;
+ else if (unformat (line_input, "tls-engine %d", &ecm->tls_engine))
;
else
{
error = clib_error_return (0, "failed: unknown input `%U'",
- format_unformat_error, input);
+ format_unformat_error, line_input);
goto cleanup;
}
}
- /* Store cli process node index for signalling */
- ecm->cli_node_index =
- vlib_get_current_process (vm)->node_runtime.node_index;
-
- if (ecm->is_init == 0)
- {
- if (echo_clients_init (vm))
- {
- error = clib_error_return (0, "failed init");
- goto cleanup;
- }
- }
+parse_config:
-
- ecm->ready_connections = 0;
- ecm->expected_connections = n_clients * ecm->quic_streams;
- ecm->rx_total = 0;
- ecm->tx_total = 0;
+ ecm->cfg.num_test_sessions = ecm->expected_connections =
+ ecm->n_clients * ecm->quic_streams;
if (!ecm->connect_uri)
{
- clib_warning ("No uri provided. Using default: %s", default_uri);
+ ec_cli ("No uri provided. Using default: %s", default_uri);
ecm->connect_uri = format (0, "%s%c", default_uri, 0);
}
- if ((rv = parse_uri ((char *) ecm->connect_uri, &sep)))
+ if ((rv = parse_uri ((char *) ecm->connect_uri, &ecm->connect_sep)))
{
error = clib_error_return (0, "Uri parse error: %d", rv);
goto cleanup;
}
- ecm->transport_proto = sep.transport_proto;
- ecm->is_dgram = (sep.transport_proto == TRANSPORT_PROTO_UDP);
+ ecm->transport_proto = ecm->connect_sep.transport_proto;
-#if ECHO_CLIENT_PTHREAD
- echo_clients_start_tx_pthread ();
-#endif
+ if (ecm->prealloc_sessions)
+ ec_prealloc_sessions (ecm);
- vlib_worker_thread_barrier_sync (vm);
- vnet_session_enable_disable (vm, 1 /* turn on session and transports */ );
- vlib_worker_thread_barrier_release (vm);
-
- if (ecm->test_client_attached == 0)
+ if ((error = ec_attach ()))
{
- if ((error = echo_clients_attach (appns_id, appns_flags, appns_secret)))
- {
- vec_free (appns_id);
- clib_error_report (error);
- goto cleanup;
- }
- vec_free (appns_id);
+ clib_error_report (error);
+ goto cleanup;
}
- ecm->test_client_attached = 1;
- /* Turn on the builtin client input nodes */
- for (i = 0; i < thread_main->n_vlib_mains; i++)
- vlib_node_set_state (vlib_get_main_by_index (i), echo_clients_node.index,
- VLIB_NODE_STATE_POLLING);
+ if (ecm->echo_bytes)
+ ecm->cfg.test = HS_TEST_TYPE_BI;
+ else
+ ecm->cfg.test = HS_TEST_TYPE_UNI;
- if (preallocate_sessions)
- pool_init_fixed (ecm->sessions, 1.1 * n_clients);
+ ec_ctrl_connect ();
+ ec_wait_for_signal (EC_CLI_CFG_SYNC);
- /* Fire off connect requests */
- time_before_connects = vlib_time_now (vm);
- if ((error = echo_clients_connect (vm, n_clients)))
+ if (ec_ctrl_test_start () < 0)
{
+ ec_cli ("failed to send start command");
goto cleanup;
}
+ ec_wait_for_signal (EC_CLI_START);
- /* Park until the sessions come up, or ten seconds elapse... */
- vlib_process_wait_for_event_or_clock (vm, syn_timeout);
+ /*
+ * Start. Fire off connect requests
+ */
+
+ /* update data port */
+ ecm->connect_sep.port = hs_make_data_port (ecm->connect_sep.port);
+
+ ecm->syn_start_time = vlib_time_now (vm);
+ ec_program_connects ();
+
+ /*
+ * Park until the sessions come up, or syn_timeout seconds pass
+ */
+
+ vlib_process_wait_for_event_or_clock (vm, ecm->syn_timeout);
event_type = vlib_process_get_events (vm, &event_data);
switch (event_type)
{
case ~0:
- ec_cli_output ("Timeout with only %d sessions active...",
- ecm->ready_connections);
+ ec_cli ("Timeout with only %d sessions active...",
+ ecm->ready_connections);
error = clib_error_return (0, "failed: syn timeout with %d sessions",
ecm->ready_connections);
- goto cleanup;
+ goto stop_test;
- case 1:
- delta = vlib_time_now (vm) - time_before_connects;
+ case EC_CLI_CONNECTS_DONE:
+ delta = vlib_time_now (vm) - ecm->syn_start_time;
if (delta != 0.0)
- ec_cli_output ("%d three-way handshakes in %.2f seconds %.2f/s",
- n_clients, delta, ((f64) n_clients) / delta);
-
- ecm->test_start_time = vlib_time_now (ecm->vlib_main);
- ec_cli_output ("Test started at %.6f", ecm->test_start_time);
+ ec_cli ("%d three-way handshakes in %.2f seconds %.2f/s",
+ ecm->n_clients, delta, ((f64) ecm->n_clients) / delta);
break;
+ case EC_CLI_CONNECTS_FAILED:
+ error = clib_error_return (0, "failed: connect returned");
+ goto stop_test;
+
default:
- ec_cli_output ("unexpected event(1): %d", event_type);
- error = clib_error_return (0, "failed: unexpected event(1): %d",
- event_type);
- goto cleanup;
+ ec_cli ("unexpected event(2): %d", event_type);
+ error =
+ clib_error_return (0, "failed: unexpected event(2): %d", event_type);
+ goto stop_test;
}
- /* Now wait for the sessions to finish... */
- vlib_process_wait_for_event_or_clock (vm, test_timeout);
+ /*
+ * Wait for the sessions to finish or test_timeout seconds pass
+ */
+ ecm->test_start_time = vlib_time_now (ecm->vlib_main);
+ ec_cli ("Test started at %.6f", ecm->test_start_time);
+ vlib_process_wait_for_event_or_clock (vm, ecm->test_timeout);
event_type = vlib_process_get_events (vm, &event_data);
switch (event_type)
{
case ~0:
- ec_cli_output ("Timeout with %d sessions still active...",
- ecm->ready_connections);
+ ec_cli ("Timeout at %.6f with %d sessions still active...",
+ vlib_time_now (ecm->vlib_main), ecm->ready_connections);
error = clib_error_return (0, "failed: timeout with %d sessions",
ecm->ready_connections);
- goto cleanup;
+ goto stop_test;
- case 2:
+ case EC_CLI_TEST_DONE:
ecm->test_end_time = vlib_time_now (vm);
- ec_cli_output ("Test finished at %.6f", ecm->test_end_time);
+ ec_cli ("Test finished at %.6f", ecm->test_end_time);
break;
default:
- ec_cli_output ("unexpected event(2): %d", event_type);
- error = clib_error_return (0, "failed: unexpected event(2): %d",
- event_type);
- goto cleanup;
+ ec_cli ("unexpected event(3): %d", event_type);
+ error =
+ clib_error_return (0, "failed: unexpected event(3): %d", event_type);
+ goto stop_test;
}
+ /*
+ * Done. Compute stats
+ */
delta = ecm->test_end_time - ecm->test_start_time;
- if (delta != 0.0)
+ if (delta == 0.0)
{
- total_bytes = (ecm->no_return ? ecm->tx_total : ecm->rx_total);
- transfer_type = ecm->no_return ? "half-duplex" : "full-duplex";
- ec_cli_output ("%lld bytes (%lld mbytes, %lld gbytes) in %.2f seconds",
- total_bytes, total_bytes / (1ULL << 20),
- total_bytes / (1ULL << 30), delta);
- ec_cli_output ("%.2f bytes/second %s", ((f64) total_bytes) / (delta),
- transfer_type);
- ec_cli_output ("%.4f gbit/second %s",
- (((f64) total_bytes * 8.0) / delta / 1e9),
- transfer_type);
- }
- else
- {
- ec_cli_output ("zero delta-t?");
+ ec_cli ("zero delta-t?");
error = clib_error_return (0, "failed: zero delta-t");
- goto cleanup;
+ goto stop_test;
}
- if (ecm->test_bytes && ecm->test_failed)
+ total_bytes = (ecm->echo_bytes ? ecm->rx_total : ecm->tx_total);
+ transfer_type = ecm->echo_bytes ? "full-duplex" : "half-duplex";
+ ec_cli ("%lld bytes (%lld mbytes, %lld gbytes) in %.2f seconds", total_bytes,
+ total_bytes / (1ULL << 20), total_bytes / (1ULL << 30), delta);
+ ec_cli ("%.2f bytes/second %s", ((f64) total_bytes) / (delta),
+ transfer_type);
+ ec_cli ("%.4f gbit/second %s", (((f64) total_bytes * 8.0) / delta / 1e9),
+ transfer_type);
+
+ if (ecm->cfg.test_bytes && ecm->test_failed)
error = clib_error_return (0, "failed: test bytes");
-cleanup:
- ecm->run_test = ECHO_CLIENTS_EXITING;
- vlib_process_wait_for_event_or_clock (vm, 10e-3);
- for (i = 0; i < vec_len (ecm->connection_index_by_thread); i++)
+stop_test:
+ ecm->run_test = EC_EXITING;
+
+ /* send stop test command to the server */
+ if (ec_ctrl_test_stop () < 0)
{
- vec_reset_length (ecm->connection_index_by_thread[i]);
- vec_reset_length (ecm->connections_this_batch_by_thread[i]);
- vec_reset_length (ecm->quic_session_index_by_thread[i]);
+ ec_cli ("failed to send stop command");
+ goto cleanup;
}
+ ec_wait_for_signal (EC_CLI_STOP);
- pool_free (ecm->sessions);
+ /* post test sync */
+ if (ec_ctrl_test_sync () < 0)
+ {
+ ec_cli ("failed to send post sync command");
+ goto cleanup;
+ }
+ ec_wait_for_signal (EC_CLI_CFG_SYNC);
+
+ /* disconnect control session */
+ ec_ctrl_session_disconnect ();
+
+cleanup:
+
+ ecm->run_test = EC_EXITING;
+ vlib_process_wait_for_event_or_clock (vm, 10e-3);
/* Detach the application, so we can use different fifo sizes next time */
- if (ecm->test_client_attached)
+ if (ec_detach ())
{
- if (echo_clients_detach ())
- {
- error = clib_error_return (0, "failed: app detach");
- ec_cli_output ("WARNING: app detach failed...");
- }
+ error = clib_error_return (0, "failed: app detach");
+ ec_cli ("WARNING: app detach failed...");
}
- if (error)
- ec_cli_output ("test failed");
- vec_free (ecm->connect_uri);
- clib_spinlock_free (&ecm->sessions_lock);
- if (barrier_acq_needed)
- vlib_worker_thread_barrier_sync (vm);
+ ec_cleanup (ecm);
+ if (had_config)
+ unformat_free (line_input);
+
+ if (error)
+ ec_cli ("test failed");
return error;
}
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (echo_clients_command, static) =
-{
+VLIB_CLI_COMMAND (ec_command, static) = {
.path = "test echo clients",
- .short_help = "test echo clients [nclients %d][[m|g]bytes <bytes>]"
- "[test-timeout <time>][syn-timeout <time>][no-return][fifo-size <size>]"
- "[private-segment-count <count>][private-segment-size <bytes>[m|g]]"
- "[preallocate-fifos][preallocate-sessions][client-batch <batch-size>]"
- "[uri <tcp://ip/port>][test-bytes][no-output]",
- .function = echo_clients_command_fn,
+ .short_help =
+ "test echo clients [nclients %d][[m|g]bytes <bytes>]"
+ "[test-timeout <time>][syn-timeout <time>][echo-bytes][fifo-size <size>]"
+ "[private-segment-count <count>][private-segment-size <bytes>[m|g]]"
+ "[preallocate-fifos][preallocate-sessions][client-batch <batch-size>]"
+ "[uri <tcp://ip/port>][test-bytes][verbose]",
+ .function = ec_command_fn,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
clib_error_t *
-echo_clients_main_init (vlib_main_t * vm)
+ec_main_init (vlib_main_t *vm)
{
- echo_client_main_t *ecm = &echo_client_main;
- ecm->is_init = 0;
+ ec_main_t *ecm = &ec_main;
+ ecm->app_is_init = 0;
return 0;
}
-VLIB_INIT_FUNCTION (echo_clients_main_init);
+VLIB_INIT_FUNCTION (ec_main_init);
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/hs_apps/echo_client.h b/src/plugins/hs_apps/echo_client.h
index c4983ca78d8..5868c3652ce 100644
--- a/src/plugins/hs_apps/echo_client.h
+++ b/src/plugins/hs_apps/echo_client.h
@@ -18,105 +18,121 @@
#ifndef __included_echo_client_h__
#define __included_echo_client_h__
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
-
-#include <vppinfra/hash.h>
-#include <vppinfra/error.h>
+#include <hs_apps/hs_test.h>
#include <vnet/session/session.h>
#include <vnet/session/application_interface.h>
-typedef struct
+typedef struct ec_session_
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- app_session_t data;
+#define _(type, name) type name;
+ foreach_app_session_field
+#undef _
+ u32 vpp_session_index;
+ u32 thread_index;
u64 bytes_to_send;
u64 bytes_sent;
u64 bytes_to_receive;
u64 bytes_received;
u64 vpp_session_handle;
- u8 thread_index;
-} eclient_session_t;
+} ec_session_t;
+
+typedef struct ec_worker_
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ ec_session_t *sessions; /**< session pool */
+ u8 *rx_buf; /**< prealloced rx buffer */
+ u32 *conn_indices; /**< sessions handled by worker */
+ u32 *conns_this_batch; /**< sessions handled in batch */
+ svm_msg_q_t *vpp_event_queue; /**< session layer worker mq */
+ u32 thread_index; /**< thread index for worker */
+} ec_worker_t;
typedef struct
{
+ ec_worker_t *wrk; /**< Per-thread state */
+ u8 *connect_test_data; /**< Pre-computed test data */
+
+ volatile u32 ready_connections;
+ volatile u64 rx_total;
+ volatile u64 tx_total;
+ volatile int run_test; /**< Signal start of test */
+
+ f64 syn_start_time;
+ f64 test_start_time;
+ f64 test_end_time;
+ u32 prev_conns;
+ u32 repeats;
+
+ u32 connect_conn_index; /**< Connects attempted progress */
+
/*
* Application setup parameters
*/
- svm_queue_t *vl_input_queue; /**< vpe input queue */
- svm_msg_q_t **vpp_event_queue;
u32 cli_node_index; /**< cli process node index */
- u32 my_client_index; /**< loopback API client handle */
u32 app_index; /**< app index after attach */
+ session_handle_t ctrl_session_handle; /**< control session handle */
/*
* Configuration params
*/
+ hs_test_cfg_t cfg;
+ u32 n_clients; /**< Number of clients */
u8 *connect_uri; /**< URI for slave's connect */
+ session_endpoint_cfg_t connect_sep; /**< Sever session endpoint */
u64 bytes_to_send; /**< Bytes to send */
u32 configured_segment_size;
u32 fifo_size;
u32 expected_connections; /**< Number of clients/connections */
u32 connections_per_batch; /**< Connections to rx/tx at once */
u32 private_segment_count; /**< Number of private fifo segs */
- u32 private_segment_size; /**< size of private fifo segs */
+ u64 private_segment_size; /**< size of private fifo segs */
u32 tls_engine; /**< TLS engine mbedtls/openssl */
- u8 is_dgram;
u32 no_copy; /**< Don't memcpy data to tx fifo */
u32 quic_streams; /**< QUIC streams per connection */
u32 ckpair_index; /**< Cert key pair for tls/quic */
+ u64 attach_flags; /**< App attach flags */
+ u8 *appns_id; /**< App namespaces id */
+ u64 appns_secret; /**< App namespace secret */
+ f64 syn_timeout; /**< Test syn timeout (s) */
+ f64 test_timeout; /**< Test timeout (s) */
/*
- * Test state variables
- */
- eclient_session_t *sessions; /**< Session pool, shared */
- clib_spinlock_t sessions_lock;
- u8 **rx_buf; /**< intermediate rx buffers */
- u8 *connect_test_data; /**< Pre-computed test data */
- u32 **quic_session_index_by_thread;
- u32 **connection_index_by_thread;
- u32 **connections_this_batch_by_thread; /**< active connection batch */
- pthread_t client_thread_handle;
-
- volatile u32 ready_connections;
- volatile u32 finished_connections;
- volatile u64 rx_total;
- volatile u64 tx_total;
- volatile int run_test; /**< Signal start of test */
-
- f64 test_start_time;
- f64 test_end_time;
- u32 prev_conns;
- u32 repeats;
- /*
* Flags
*/
- u8 is_init;
+ u8 app_is_init;
u8 test_client_attached;
- u8 no_return;
+ u8 echo_bytes;
u8 test_return_packets;
- int i_am_master;
int drop_packets; /**< drop all packets */
u8 prealloc_fifos; /**< Request fifo preallocation */
- u8 no_output;
- u8 test_bytes;
+ u8 prealloc_sessions;
u8 test_failed;
u8 transport_proto;
+ u8 barrier_acq_needed;
vlib_main_t *vlib_main;
-} echo_client_main_t;
+} ec_main_t;
+
+typedef enum ec_state_
+{
+ EC_STARTING,
+ EC_RUNNING,
+ EC_EXITING
+} ec_state_t;
-enum
+typedef enum ec_cli_signal_
{
- ECHO_CLIENTS_STARTING,
- ECHO_CLIENTS_RUNNING,
- ECHO_CLIENTS_EXITING
-} echo_clients_test_state_e;
-extern echo_client_main_t echo_client_main;
+ EC_CLI_CONNECTS_DONE = 1,
+ EC_CLI_CONNECTS_FAILED,
+ EC_CLI_CFG_SYNC,
+ EC_CLI_START,
+ EC_CLI_STOP,
+ EC_CLI_TEST_DONE
+} ec_cli_signal_t;
-vlib_node_registration_t echo_clients_node;
+void ec_program_connects (void);
#endif /* __included_echo_client_h__ */
diff --git a/src/plugins/hs_apps/echo_server.c b/src/plugins/hs_apps/echo_server.c
index b75a3667e83..0243252434a 100644
--- a/src/plugins/hs_apps/echo_server.c
+++ b/src/plugins/hs_apps/echo_server.c
@@ -13,79 +13,143 @@
* limitations under the License.
*/
+#include <hs_apps/hs_test.h>
#include <vnet/vnet.h>
#include <vlibmemory/api.h>
#include <vnet/session/application.h>
#include <vnet/session/application_interface.h>
#include <vnet/session/session.h>
-#define ECHO_SERVER_DBG (0)
-#define DBG(_fmt, _args...) \
- if (ECHO_SERVER_DBG) \
- clib_warning (_fmt, ##_args)
+static void es_set_echo_rx_callbacks (u8 no_echo);
typedef struct
{
- /*
- * Server app parameters
- */
- svm_msg_q_t **vpp_queue;
- svm_queue_t *vl_input_queue; /**< Sever's event queue */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+#define _(type, name) type name;
+ foreach_app_session_field
+#undef _
+ u64 vpp_session_handle;
+ u32 vpp_session_index;
+ u32 rx_retries;
+ u8 byte_index;
+} es_session_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ es_session_t *sessions;
+ u8 *rx_buf; /**< Per-thread RX buffer */
+ svm_msg_q_t *vpp_event_queue;
+ u32 thread_index;
+} es_worker_t;
+typedef struct
+{
u32 app_index; /**< Server app index */
- u32 my_client_index; /**< API client handle */
- u32 node_index; /**< process node index for event scheduling */
/*
* Config params
*/
- u8 no_echo; /**< Don't echo traffic */
+ hs_test_cfg_t cfg;
u32 fifo_size; /**< Fifo size */
u32 rcv_buffer_size; /**< Rcv buffer size */
u32 prealloc_fifos; /**< Preallocate fifos */
u32 private_segment_count; /**< Number of private segments */
- u32 private_segment_size; /**< Size of private segments */
+ u64 private_segment_size; /**< Size of private segments */
char *server_uri; /**< Server URI */
u32 tls_engine; /**< TLS engine: mbedtls/openssl */
u32 ckpair_index; /**< Cert and key for tls/quic */
- u8 is_dgram; /**< set if transport is dgram */
/*
* Test state
*/
- u8 **rx_buf; /**< Per-thread RX buffer */
- u64 byte_index;
- u32 **rx_retries;
+ es_worker_t *wrk;
+ int (*rx_callback) (session_t *session);
u8 transport_proto;
u64 listener_handle; /**< Session handle of the root listener */
+ u64 ctrl_listener_handle;
vlib_main_t *vlib_main;
} echo_server_main_t;
echo_server_main_t echo_server_main;
+#define es_err(_fmt, _args...) clib_warning (_fmt, ##_args);
+
+#define es_dbg(_fmt, _args...) \
+ do \
+ { \
+ if (PREDICT_FALSE (echo_server_main.cfg.verbose)) \
+ es_err (_fmt, ##_args); \
+ } \
+ while (0)
+
+#define es_cli(_fmt, _args...) vlib_cli_output (vm, _fmt, ##_args)
+
+static inline es_worker_t *
+es_worker_get (u32 thread_index)
+{
+ return vec_elt_at_index (echo_server_main.wrk, thread_index);
+}
+
+static inline es_session_t *
+es_session_alloc (es_worker_t *wrk)
+{
+ es_session_t *es;
+
+ pool_get_zero (wrk->sessions, es);
+ es->session_index = es - wrk->sessions;
+ return es;
+}
+
+static inline es_session_t *
+es_session_get (es_worker_t *wrk, u32 es_index)
+{
+ return pool_elt_at_index (wrk->sessions, es_index);
+}
+
int
quic_echo_server_qsession_accept_callback (session_t * s)
{
- DBG ("QSession %u accept w/opaque %d", s->session_index, s->opaque);
+ es_dbg ("QSession %u accept w/opaque %d", s->session_index, s->opaque);
return 0;
}
+static int
+echo_server_ctrl_session_accept_callback (session_t *s)
+{
+ s->session_state = SESSION_STATE_READY;
+ return 0;
+}
+
+static void
+es_session_alloc_and_init (session_t *s)
+{
+ es_session_t *es;
+ es_worker_t *wrk = es_worker_get (s->thread_index);
+
+ es = es_session_alloc (wrk);
+ hs_test_app_session_init (es, s);
+ es->vpp_session_index = s->session_index;
+ es->vpp_session_handle = session_handle (s);
+ s->opaque = es->session_index;
+}
+
int
quic_echo_server_session_accept_callback (session_t * s)
{
echo_server_main_t *esm = &echo_server_main;
+
+ if (PREDICT_FALSE (esm->ctrl_listener_handle == s->listener_handle))
+ return echo_server_ctrl_session_accept_callback (s);
+
if (s->listener_handle == esm->listener_handle)
return quic_echo_server_qsession_accept_callback (s);
- DBG ("SSESSION %u accept w/opaque %d", s->session_index, s->opaque);
- esm->vpp_queue[s->thread_index] =
- session_main_get_vpp_event_queue (s->thread_index);
+ es_dbg ("SSESSION %u accept w/opaque %d", s->session_index, s->opaque);
+
s->session_state = SESSION_STATE_READY;
- esm->byte_index = 0;
- ASSERT (vec_len (esm->rx_retries) > s->thread_index);
- vec_validate (esm->rx_retries[s->thread_index], s->session_index);
- esm->rx_retries[s->thread_index][s->session_index] = 0;
+ es_session_alloc_and_init (s);
return 0;
}
@@ -93,13 +157,12 @@ int
echo_server_session_accept_callback (session_t * s)
{
echo_server_main_t *esm = &echo_server_main;
- esm->vpp_queue[s->thread_index] =
- session_main_get_vpp_event_queue (s->thread_index);
+
+ if (PREDICT_FALSE (esm->ctrl_listener_handle == s->listener_handle))
+ return echo_server_ctrl_session_accept_callback (s);
+
s->session_state = SESSION_STATE_READY;
- esm->byte_index = 0;
- ASSERT (vec_len (esm->rx_retries) > s->thread_index);
- vec_validate (esm->rx_retries[s->thread_index], s->session_index);
- esm->rx_retries[s->thread_index][s->session_index] = 0;
+ es_session_alloc_and_init (s);
return 0;
}
@@ -119,7 +182,7 @@ echo_server_session_reset_callback (session_t * s)
{
echo_server_main_t *esm = &echo_server_main;
vnet_disconnect_args_t _a = { 0 }, *a = &_a;
- clib_warning ("Reset session %U", format_session, s, 2);
+ es_dbg ("Reset session %U", format_session, s, 2);
a->handle = session_handle (s);
a->app_index = esm->app_index;
vnet_disconnect_session (a);
@@ -129,7 +192,7 @@ int
echo_server_session_connected_callback (u32 app_index, u32 api_context,
session_t * s, session_error_t err)
{
- clib_warning ("called...");
+ es_err ("called...");
return -1;
}
@@ -143,26 +206,135 @@ echo_server_add_segment_callback (u32 client_index, u64 segment_handle)
int
echo_server_redirect_connect_callback (u32 client_index, void *mp)
{
- clib_warning ("called...");
+ es_err ("called...");
return -1;
}
-void
-test_bytes (echo_server_main_t * esm, int actual_transfer)
+static void
+es_foreach_thread (void *fp)
{
- int i;
- u32 my_thread_id = vlib_get_thread_index ();
+ echo_server_main_t *esm = &echo_server_main;
+ uword thread_index;
+ for (thread_index = 0; thread_index < vec_len (esm->wrk); thread_index++)
+ {
+ session_send_rpc_evt_to_thread (thread_index, fp,
+ uword_to_pointer (thread_index, void *));
+ }
+}
- for (i = 0; i < actual_transfer; i++)
+static int
+es_wrk_prealloc_sessions (void *args)
+{
+ echo_server_main_t *esm = &echo_server_main;
+ u32 sessions_per_wrk, n_wrks, thread_index;
+
+ thread_index = pointer_to_uword (args);
+ es_worker_t *wrk = es_worker_get (thread_index);
+ n_wrks = vlib_num_workers () ? vlib_num_workers () : 1;
+ sessions_per_wrk = esm->cfg.num_test_sessions / n_wrks;
+ pool_alloc (wrk->sessions, 1.1 * sessions_per_wrk);
+ return 0;
+}
+
+static int
+echo_server_setup_test (hs_test_cfg_t *c)
+{
+ if (c->test == HS_TEST_TYPE_UNI)
+ es_set_echo_rx_callbacks (1 /* no echo */);
+ else
+ es_set_echo_rx_callbacks (0 /* no echo */);
+
+ es_foreach_thread (es_wrk_prealloc_sessions);
+ return 0;
+}
+
+static void
+echo_server_ctrl_reply (session_t *s)
+{
+ echo_server_main_t *esm = &echo_server_main;
+ int rv;
+
+ rv = svm_fifo_enqueue (s->tx_fifo, sizeof (esm->cfg), (u8 *) &esm->cfg);
+ ASSERT (rv == sizeof (esm->cfg));
+ session_send_io_evt_to_thread_custom (&s->session_index, s->thread_index,
+ SESSION_IO_EVT_TX);
+}
+
+static int
+es_test_cmd_sync (echo_server_main_t *esm, session_t *s)
+{
+ int rv;
+
+ rv = echo_server_setup_test (&esm->cfg);
+ if (rv)
+ es_err ("setup test error!");
+
+ echo_server_ctrl_reply (s);
+ return 0;
+}
+
+static int
+es_wrk_cleanup_sessions (void *args)
+{
+ echo_server_main_t *esm = &echo_server_main;
+ vnet_disconnect_args_t _a = {}, *a = &_a;
+ u32 thread_index = pointer_to_uword (args);
+ es_session_t *es;
+ es_worker_t *wrk;
+
+ wrk = es_worker_get (thread_index);
+ a->app_index = esm->app_index;
+
+ pool_foreach (es, wrk->sessions)
+ {
+ a->handle = es->vpp_session_handle;
+ vnet_disconnect_session (a);
+ }
+ pool_free (wrk->sessions);
+
+ return 0;
+}
+
+static int
+echo_server_rx_ctrl_callback (session_t *s)
+{
+ echo_server_main_t *esm = &echo_server_main;
+ int rv;
+
+ rv = svm_fifo_dequeue (s->rx_fifo, sizeof (esm->cfg), (u8 *) &esm->cfg);
+ ASSERT (rv == sizeof (esm->cfg));
+
+ es_dbg ("control message received:");
+ if (esm->cfg.verbose)
+ hs_test_cfg_dump (&esm->cfg, 0);
+
+ switch (esm->cfg.cmd)
{
- if (esm->rx_buf[my_thread_id][i] != ((esm->byte_index + i) & 0xff))
+ case HS_TEST_CMD_SYNC:
+ switch (esm->cfg.test)
{
- clib_warning ("at %lld expected %d got %d", esm->byte_index + i,
- (esm->byte_index + i) & 0xff,
- esm->rx_buf[my_thread_id][i]);
+ case HS_TEST_TYPE_ECHO:
+ case HS_TEST_TYPE_NONE:
+ es_foreach_thread (es_wrk_cleanup_sessions);
+ echo_server_ctrl_reply (s);
+ break;
+ case HS_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
+ return es_test_cmd_sync (esm, s);
+ break;
+ default:
+ es_err ("unknown command type! %d", esm->cfg.cmd);
}
+ break;
+ case HS_TEST_CMD_START:
+ case HS_TEST_CMD_STOP:
+ echo_server_ctrl_reply (s);
+ break;
+ default:
+ es_err ("unknown command! %d", esm->cfg.cmd);
+ break;
}
- esm->byte_index += actual_transfer;
+ return 0;
}
/*
@@ -171,11 +343,30 @@ test_bytes (echo_server_main_t * esm, int actual_transfer)
int
echo_server_builtin_server_rx_callback_no_echo (session_t * s)
{
+ echo_server_main_t *esm = &echo_server_main;
+ if (PREDICT_FALSE (esm->ctrl_listener_handle == s->listener_handle))
+ return echo_server_rx_ctrl_callback (s);
+
svm_fifo_t *rx_fifo = s->rx_fifo;
svm_fifo_dequeue_drop (rx_fifo, svm_fifo_max_dequeue_cons (rx_fifo));
return 0;
}
+static void
+es_test_bytes (es_worker_t *wrk, es_session_t *es, int actual_transfer)
+{
+ int i;
+ for (i = 0; i < actual_transfer; i++)
+ {
+ if (wrk->rx_buf[i] != ((es->byte_index + i) & 0xff))
+ {
+ es_err ("at %lld expected %d got %d", es->byte_index + i,
+ (es->byte_index + i) & 0xff, wrk->rx_buf[i]);
+ }
+ }
+ es->byte_index += actual_transfer;
+}
+
int
echo_server_rx_callback (session_t * s)
{
@@ -184,7 +375,8 @@ echo_server_rx_callback (session_t * s)
svm_fifo_t *tx_fifo, *rx_fifo;
echo_server_main_t *esm = &echo_server_main;
u32 thread_index = vlib_get_thread_index ();
- app_session_transport_t at;
+ es_worker_t *wrk;
+ es_session_t *es;
ASSERT (s->thread_index == thread_index);
@@ -194,24 +386,25 @@ echo_server_rx_callback (session_t * s)
ASSERT (rx_fifo->master_thread_index == thread_index);
ASSERT (tx_fifo->master_thread_index == thread_index);
+ if (PREDICT_FALSE (esm->ctrl_listener_handle == s->listener_handle))
+ return echo_server_rx_ctrl_callback (s);
+
+ wrk = es_worker_get (thread_index);
max_enqueue = svm_fifo_max_enqueue_prod (tx_fifo);
- if (!esm->is_dgram)
- {
- max_dequeue = svm_fifo_max_dequeue_cons (rx_fifo);
- }
- else
+ es = es_session_get (wrk, s->opaque);
+
+ if (es->is_dgram)
{
session_dgram_pre_hdr_t ph;
svm_fifo_peek (rx_fifo, 0, sizeof (ph), (u8 *) & ph);
max_dequeue = ph.data_length - ph.data_offset;
- if (!esm->vpp_queue[s->thread_index])
- {
- svm_msg_q_t *mq;
- mq = session_main_get_vpp_event_queue (s->thread_index);
- esm->vpp_queue[s->thread_index] = mq;
- }
+ ASSERT (wrk->vpp_event_queue);
max_enqueue -= sizeof (session_dgram_hdr_t);
}
+ else
+ {
+ max_dequeue = svm_fifo_max_dequeue_cons (rx_fifo);
+ }
if (PREDICT_FALSE (max_dequeue == 0))
return 0;
@@ -228,65 +421,40 @@ echo_server_rx_callback (session_t * s)
/* Program self-tap to retry */
if (svm_fifo_set_event (rx_fifo))
{
+ /* TODO should be session_enqueue_notify(s) but quic tests seem
+ * to fail if that's the case */
if (session_send_io_evt_to_thread (rx_fifo,
SESSION_IO_EVT_BUILTIN_RX))
- clib_warning ("failed to enqueue self-tap");
+ es_err ("failed to enqueue self-tap");
- vec_validate (esm->rx_retries[s->thread_index], s->session_index);
- if (esm->rx_retries[thread_index][s->session_index] == 500000)
+ if (es->rx_retries == 500000)
{
- clib_warning ("session stuck: %U", format_session, s, 2);
+ es_err ("session stuck: %U", format_session, s, 2);
}
- if (esm->rx_retries[thread_index][s->session_index] < 500001)
- esm->rx_retries[thread_index][s->session_index]++;
+ if (es->rx_retries < 500001)
+ es->rx_retries++;
}
return 0;
}
- vec_validate (esm->rx_buf[thread_index], max_transfer);
- if (!esm->is_dgram)
- {
- actual_transfer = app_recv_stream_raw (rx_fifo,
- esm->rx_buf[thread_index],
- max_transfer,
- 0 /* don't clear event */ ,
- 0 /* peek */ );
- }
- else
+ vec_validate (wrk->rx_buf, max_transfer);
+ actual_transfer = app_recv ((app_session_t *) es, wrk->rx_buf, max_transfer);
+ ASSERT (actual_transfer == max_transfer);
+
+ if (esm->cfg.test_bytes)
{
- actual_transfer = app_recv_dgram_raw (rx_fifo,
- esm->rx_buf[thread_index],
- max_transfer, &at,
- 0 /* don't clear event */ ,
- 0 /* peek */ );
+ es_test_bytes (wrk, es, actual_transfer);
}
- ASSERT (actual_transfer == max_transfer);
- /* test_bytes (esm, actual_transfer); */
/*
* Echo back
*/
- if (!esm->is_dgram)
- {
- n_written = app_send_stream_raw (tx_fifo,
- esm->vpp_queue[thread_index],
- esm->rx_buf[thread_index],
- actual_transfer, SESSION_IO_EVT_TX,
- 1 /* do_evt */ , 0);
- }
- else
- {
- n_written = app_send_dgram_raw (tx_fifo, &at,
- esm->vpp_queue[s->thread_index],
- esm->rx_buf[thread_index],
- actual_transfer, SESSION_IO_EVT_TX,
- 1 /* do_evt */ , 0);
- }
+ n_written = app_send ((app_session_t *) es, wrk->rx_buf, actual_transfer, 0);
if (n_written != max_transfer)
- clib_warning ("short trout! written %u read %u", n_written, max_transfer);
+ es_err ("short trout! written %u read %u", n_written, max_transfer);
if (PREDICT_FALSE (svm_fifo_max_dequeue_cons (rx_fifo)))
goto rx_event;
@@ -294,15 +462,32 @@ echo_server_rx_callback (session_t * s)
return 0;
}
+int
+echo_server_rx_callback_common (session_t *s)
+{
+ echo_server_main_t *esm = &echo_server_main;
+ return esm->rx_callback (s);
+}
+
static session_cb_vft_t echo_server_session_cb_vft = {
.session_accept_callback = echo_server_session_accept_callback,
.session_disconnect_callback = echo_server_session_disconnect_callback,
.session_connected_callback = echo_server_session_connected_callback,
.add_segment_callback = echo_server_add_segment_callback,
- .builtin_app_rx_callback = echo_server_rx_callback,
+ .builtin_app_rx_callback = echo_server_rx_callback_common,
.session_reset_callback = echo_server_session_reset_callback
};
+static void
+es_set_echo_rx_callbacks (u8 no_echo)
+{
+ echo_server_main_t *esm = &echo_server_main;
+ if (no_echo)
+ esm->rx_callback = echo_server_builtin_server_rx_callback_no_echo;
+ else
+ esm->rx_callback = echo_server_rx_callback;
+}
+
static int
echo_server_attach (u8 * appns_id, u64 appns_flags, u64 appns_secret)
{
@@ -310,30 +495,22 @@ echo_server_attach (u8 * appns_id, u64 appns_flags, u64 appns_secret)
echo_server_main_t *esm = &echo_server_main;
vnet_app_attach_args_t _a, *a = &_a;
u64 options[APP_OPTIONS_N_OPTIONS];
- u32 segment_size = 512 << 20;
clib_memset (a, 0, sizeof (*a));
clib_memset (options, 0, sizeof (options));
- if (esm->no_echo)
- echo_server_session_cb_vft.builtin_app_rx_callback =
- echo_server_builtin_server_rx_callback_no_echo;
- else
- echo_server_session_cb_vft.builtin_app_rx_callback =
- echo_server_rx_callback;
+ esm->rx_callback = echo_server_rx_callback;
+
if (esm->transport_proto == TRANSPORT_PROTO_QUIC)
echo_server_session_cb_vft.session_accept_callback =
quic_echo_server_session_accept_callback;
- if (esm->private_segment_size)
- segment_size = esm->private_segment_size;
-
a->api_client_index = ~0;
a->name = format (0, "echo_server");
a->session_cb_vft = &echo_server_session_cb_vft;
a->options = options;
- a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size;
- a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = segment_size;
+ a->options[APP_OPTIONS_SEGMENT_SIZE] = esm->private_segment_size;
+ a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = esm->private_segment_size;
a->options[APP_OPTIONS_RX_FIFO_SIZE] = esm->fifo_size;
a->options[APP_OPTIONS_TX_FIFO_SIZE] = esm->fifo_size;
a->options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = esm->private_segment_count;
@@ -352,7 +529,7 @@ echo_server_attach (u8 * appns_id, u64 appns_flags, u64 appns_secret)
if (vnet_application_attach (a))
{
- clib_warning ("failed to attach server");
+ es_err ("failed to attach server");
return -1;
}
esm->app_index = a->app_index;
@@ -392,19 +569,35 @@ echo_client_transport_needs_crypto (transport_proto_t proto)
}
static int
+echo_server_listen_ctrl ()
+{
+ echo_server_main_t *esm = &echo_server_main;
+ vnet_listen_args_t _args = {}, *args = &_args;
+ session_error_t rv;
+
+ if ((rv = parse_uri (esm->server_uri, &args->sep_ext)))
+ return -1;
+ args->sep_ext.transport_proto = TRANSPORT_PROTO_TCP;
+ args->app_index = esm->app_index;
+
+ rv = vnet_listen (args);
+ esm->ctrl_listener_handle = args->handle;
+ return rv;
+}
+
+static int
echo_server_listen ()
{
i32 rv;
echo_server_main_t *esm = &echo_server_main;
- vnet_listen_args_t _args = { 0 }, *args = &_args;
-
- args->sep_ext.app_wrk_index = 0;
+ vnet_listen_args_t _args = {}, *args = &_args;
if ((rv = parse_uri (esm->server_uri, &args->sep_ext)))
{
return -1;
}
args->app_index = esm->app_index;
+ args->sep_ext.port = hs_make_data_port (args->sep_ext.port);
if (echo_client_transport_needs_crypto (args->sep_ext.transport_proto))
{
session_endpoint_alloc_ext_cfg (&args->sep_ext,
@@ -430,30 +623,36 @@ echo_server_create (vlib_main_t * vm, u8 * appns_id, u64 appns_flags,
{
echo_server_main_t *esm = &echo_server_main;
vlib_thread_main_t *vtm = vlib_get_thread_main ();
- u32 num_threads;
- int i;
+ es_worker_t *wrk;
- num_threads = 1 /* main thread */ + vtm->n_threads;
- vec_validate (echo_server_main.vpp_queue, num_threads - 1);
- vec_validate (esm->rx_buf, num_threads - 1);
- vec_validate (esm->rx_retries, num_threads - 1);
- for (i = 0; i < vec_len (esm->rx_retries); i++)
- vec_validate (esm->rx_retries[i],
- pool_elts (session_main.wrk[i].sessions));
esm->rcv_buffer_size = clib_max (esm->rcv_buffer_size, esm->fifo_size);
- for (i = 0; i < num_threads; i++)
- vec_validate (esm->rx_buf[i], esm->rcv_buffer_size);
+ vec_validate (esm->wrk, vtm->n_threads);
+
+ vec_foreach (wrk, esm->wrk)
+ {
+ wrk->thread_index = wrk - esm->wrk;
+ vec_validate (wrk->rx_buf, esm->rcv_buffer_size);
+ wrk->vpp_event_queue =
+ session_main_get_vpp_event_queue (wrk->thread_index);
+ }
if (echo_server_attach (appns_id, appns_flags, appns_secret))
{
- clib_warning ("failed to attach server");
+ es_err ("failed to attach server");
+ return -1;
+ }
+ if (echo_server_listen_ctrl ())
+ {
+ es_err ("failed to start listening on ctrl session");
+ if (echo_server_detach ())
+ es_err ("failed to detach");
return -1;
}
if (echo_server_listen ())
{
- clib_warning ("failed to start listening");
+ es_err ("failed to start listening");
if (echo_server_detach ())
- clib_warning ("failed to detach");
+ es_err ("failed to detach");
return -1;
}
return 0;
@@ -466,27 +665,16 @@ echo_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
echo_server_main_t *esm = &echo_server_main;
u8 server_uri_set = 0, *appns_id = 0;
- u64 tmp, appns_flags = 0, appns_secret = 0;
+ u64 appns_flags = 0, appns_secret = 0;
char *default_uri = "tcp://0.0.0.0/1234";
- int rv, is_stop = 0, barrier_acq_needed = 0;
+ int rv, is_stop = 0;
clib_error_t *error = 0;
- /* The request came over the binary api and the inband cli handler
- * is not mp_safe. Drop the barrier to make sure the workers are not
- * blocked.
- */
- if (vlib_num_workers () && vlib_thread_is_main_w_barrier ())
- {
- barrier_acq_needed = 1;
- vlib_worker_thread_barrier_release (vm);
- }
-
- esm->no_echo = 0;
esm->fifo_size = 64 << 10;
esm->rcv_buffer_size = 128 << 10;
esm->prealloc_fifos = 0;
esm->private_segment_count = 0;
- esm->private_segment_size = 0;
+ esm->private_segment_size = 512 << 20;
esm->tls_engine = CRYPTO_ENGINE_OPENSSL;
vec_free (esm->server_uri);
@@ -494,10 +682,9 @@ echo_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
{
if (unformat (input, "uri %s", &esm->server_uri))
server_uri_set = 1;
- else if (unformat (input, "no-echo"))
- esm->no_echo = 1;
- else if (unformat (input, "fifo-size %d", &esm->fifo_size))
- esm->fifo_size <<= 10;
+ else if (unformat (input, "fifo-size %U", unformat_memory_size,
+ &esm->fifo_size))
+ ;
else if (unformat (input, "rcv-buf-size %d", &esm->rcv_buffer_size))
;
else if (unformat (input, "prealloc-fifos %d", &esm->prealloc_fifos))
@@ -506,16 +693,8 @@ echo_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
&esm->private_segment_count))
;
else if (unformat (input, "private-segment-size %U",
- unformat_memory_size, &tmp))
- {
- if (tmp >= 0x100000000ULL)
- {
- error = clib_error_return (
- 0, "private segment size %lld (%llu) too large", tmp, tmp);
- goto cleanup;
- }
- esm->private_segment_size = tmp;
- }
+ unformat_memory_size, &esm->private_segment_size))
+ ;
else if (unformat (input, "appns %_%v%_", &appns_id))
;
else if (unformat (input, "all-scope"))
@@ -543,14 +722,14 @@ echo_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
{
if (esm->app_index == (u32) ~ 0)
{
- clib_warning ("server not running");
+ es_cli ("server not running");
error = clib_error_return (0, "failed: server not running");
goto cleanup;
}
rv = echo_server_detach ();
if (rv)
{
- clib_warning ("failed: detach");
+ es_cli ("failed: detach");
error = clib_error_return (0, "failed: server detach %d", rv);
goto cleanup;
}
@@ -561,7 +740,7 @@ echo_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (!server_uri_set)
{
- clib_warning ("No uri provided! Using default: %s", default_uri);
+ es_cli ("No uri provided! Using default: %s", default_uri);
esm->server_uri = (char *) format (0, "%s%c", default_uri, 0);
}
@@ -571,7 +750,6 @@ echo_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
goto cleanup;
}
esm->transport_proto = sep.transport_proto;
- esm->is_dgram = (sep.transport_proto == TRANSPORT_PROTO_UDP);
rv = echo_server_create (vm, appns_id, appns_flags, appns_secret);
if (rv)
@@ -584,29 +762,22 @@ echo_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
cleanup:
vec_free (appns_id);
- if (barrier_acq_needed)
- vlib_worker_thread_barrier_sync (vm);
-
return error;
}
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (echo_server_create_command, static) =
-{
+VLIB_CLI_COMMAND (echo_server_create_command, static) = {
.path = "test echo server",
- .short_help = "test echo server proto <proto> [no echo][fifo-size <mbytes>]"
- "[rcv-buf-size <bytes>][prealloc-fifos <count>]"
- "[private-segment-count <count>][private-segment-size <bytes[m|g]>]"
- "[uri <tcp://ip/port>]",
+ .short_help =
+ "test echo server proto <proto> [fifo-size <mbytes>]"
+ "[rcv-buf-size <bytes>][prealloc-fifos <count>]"
+ "[private-segment-count <count>][private-segment-size <bytes[m|g]>]"
+ "[uri <tcp://ip/port>]",
.function = echo_server_create_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
echo_server_main_init (vlib_main_t * vm)
{
- echo_server_main_t *esm = &echo_server_main;
- esm->my_client_index = ~0;
return 0;
}
diff --git a/src/plugins/hs_apps/hs_apps.c b/src/plugins/hs_apps/hs_apps.c
index 5067919cc28..8e991954c7e 100644
--- a/src/plugins/hs_apps/hs_apps.c
+++ b/src/plugins/hs_apps/hs_apps.c
@@ -17,13 +17,11 @@
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Host Stack Applications",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/hs_apps/hs_test.h b/src/plugins/hs_apps/hs_test.h
new file mode 100644
index 00000000000..167c7957229
--- /dev/null
+++ b/src/plugins/hs_apps/hs_test.h
@@ -0,0 +1,212 @@
+/*
+ * hs_test.h
+ *
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_hs_test_t__
+#define __included_hs_test_t__
+
+#include <vnet/session/application_interface.h>
+#include <vnet/session/session.h>
+
+#define HS_TEST_CFG_CTRL_MAGIC 0xfeedface
+#define HS_TEST_CFG_TXBUF_SIZE_DEF 8192
+#define HS_TEST_CFG_RXBUF_SIZE_DEF (64 * HS_TEST_CFG_TXBUF_SIZE_DEF)
+#define HS_TEST_CFG_NUM_WRITES_DEF 1000000
+
+#define VCL_TEST_TOKEN_HELP "#H"
+#define VCL_TEST_TOKEN_EXIT "#X"
+#define VCL_TEST_TOKEN_VERBOSE "#V"
+#define VCL_TEST_TOKEN_TXBUF_SIZE "#T:"
+#define VCL_TEST_TOKEN_NUM_TEST_SESS "#I:"
+#define VCL_TEST_TOKEN_NUM_WRITES "#N:"
+#define VCL_TEST_TOKEN_RXBUF_SIZE "#R:"
+#define VCL_TEST_TOKEN_SHOW_CFG "#C"
+#define HS_TEST_TOKEN_RUN_UNI "#U"
+#define HS_TEST_TOKEN_RUN_BI "#B"
+
+#define HS_TEST_SEPARATOR_STRING " -----------------------------\n"
+
+#define HS_CTRL_HANDLE (~0)
+
+typedef enum
+{
+ HS_TEST_CMD_SYNC,
+ HS_TEST_CMD_START,
+ HS_TEST_CMD_STOP,
+} hs_test_cmd_t;
+
+typedef enum
+{
+ HS_TEST_TYPE_NONE,
+ HS_TEST_TYPE_ECHO,
+ HS_TEST_TYPE_UNI,
+ HS_TEST_TYPE_BI,
+ HS_TEST_TYPE_EXIT,
+ HS_TEST_TYPE_EXIT_CLIENT,
+} hs_test_t;
+
+typedef struct __attribute__ ((packed))
+{
+ uint32_t magic;
+ uint32_t seq_num;
+ uint32_t test;
+ uint32_t cmd;
+ uint32_t ctrl_handle;
+ uint32_t num_test_sessions;
+ uint32_t num_test_sessions_perq;
+ uint32_t num_test_qsessions;
+ uint32_t verbose;
+ uint32_t address_ip6;
+ uint32_t transport_udp;
+ uint64_t rxbuf_size;
+ uint64_t txbuf_size;
+ uint64_t num_writes;
+ uint64_t total_bytes;
+ uint32_t test_bytes;
+} hs_test_cfg_t;
+
+static inline char *
+hs_test_type_str (hs_test_t t)
+{
+ switch (t)
+ {
+ case HS_TEST_TYPE_NONE:
+ return "NONE";
+
+ case HS_TEST_TYPE_ECHO:
+ return "ECHO";
+
+ case HS_TEST_TYPE_UNI:
+ return "UNI";
+
+ case HS_TEST_TYPE_BI:
+ return "BI";
+
+ case HS_TEST_TYPE_EXIT:
+ return "EXIT";
+
+ default:
+ return "Unknown";
+ }
+}
+
+static inline int
+hs_test_cfg_verify (hs_test_cfg_t *cfg, hs_test_cfg_t *valid_cfg)
+{
+ /* Note: txbuf & rxbuf on server are the same buffer,
+ * so txbuf_size is not included in this check.
+ */
+ return ((cfg->magic == valid_cfg->magic) && (cfg->test == valid_cfg->test) &&
+ (cfg->verbose == valid_cfg->verbose) &&
+ (cfg->rxbuf_size == valid_cfg->rxbuf_size) &&
+ (cfg->num_writes == valid_cfg->num_writes) &&
+ (cfg->total_bytes == valid_cfg->total_bytes));
+}
+
+static inline void
+hs_test_cfg_init (hs_test_cfg_t *cfg)
+{
+ cfg->magic = HS_TEST_CFG_CTRL_MAGIC;
+ cfg->test = HS_TEST_TYPE_UNI;
+ cfg->ctrl_handle = ~0;
+ cfg->num_test_sessions = 1;
+ cfg->num_test_sessions_perq = 1;
+ cfg->verbose = 0;
+ cfg->rxbuf_size = HS_TEST_CFG_RXBUF_SIZE_DEF;
+ cfg->num_writes = HS_TEST_CFG_NUM_WRITES_DEF;
+ cfg->txbuf_size = HS_TEST_CFG_TXBUF_SIZE_DEF;
+ cfg->total_bytes = cfg->num_writes * cfg->txbuf_size;
+ cfg->test_bytes = 0;
+}
+
+static inline char *
+hs_test_cmd_to_str (int cmd)
+{
+ switch (cmd)
+ {
+ case HS_TEST_CMD_SYNC:
+ return "SYNC";
+ case HS_TEST_CMD_START:
+ return "START";
+ case HS_TEST_CMD_STOP:
+ return "STOP";
+ }
+ return "";
+}
+
+static inline void
+hs_test_cfg_dump (hs_test_cfg_t *cfg, uint8_t is_client)
+{
+ char *spc = " ";
+
+ printf (" test config (%p):\n" HS_TEST_SEPARATOR_STRING
+ " command: %s\n"
+ " magic: 0x%08x\n"
+ " seq_num: 0x%08x\n"
+ " test bytes: %s\n"
+ "%-5s test: %s (%d)\n"
+ " ctrl handle: %d (0x%x)\n"
+ "%-5s num test sockets: %u (0x%08x)\n"
+ "%-5s verbose: %s (%d)\n"
+ "%-5s rxbuf size: %lu (0x%08lx)\n"
+ "%-5s txbuf size: %lu (0x%08lx)\n"
+ "%-5s num writes: %lu (0x%08lx)\n"
+ " client tx bytes: %lu (0x%08lx)\n" HS_TEST_SEPARATOR_STRING,
+ (void *) cfg, hs_test_cmd_to_str (cfg->cmd), cfg->magic,
+ cfg->seq_num, cfg->test_bytes ? "yes" : "no",
+ is_client && (cfg->test == HS_TEST_TYPE_UNI) ?
+ "'" HS_TEST_TOKEN_RUN_UNI "'" :
+ is_client && (cfg->test == HS_TEST_TYPE_BI) ?
+ "'" HS_TEST_TOKEN_RUN_BI "'" :
+ spc,
+ hs_test_type_str (cfg->test), cfg->test, cfg->ctrl_handle,
+ cfg->ctrl_handle,
+ is_client ? "'" VCL_TEST_TOKEN_NUM_TEST_SESS "'" : spc,
+ cfg->num_test_sessions, cfg->num_test_sessions,
+ is_client ? "'" VCL_TEST_TOKEN_VERBOSE "'" : spc,
+ cfg->verbose ? "on" : "off", cfg->verbose,
+ is_client ? "'" VCL_TEST_TOKEN_RXBUF_SIZE "'" : spc, cfg->rxbuf_size,
+ cfg->rxbuf_size, is_client ? "'" VCL_TEST_TOKEN_TXBUF_SIZE "'" : spc,
+ cfg->txbuf_size, cfg->txbuf_size,
+ is_client ? "'" VCL_TEST_TOKEN_NUM_WRITES "'" : spc, cfg->num_writes,
+ cfg->num_writes, cfg->total_bytes, cfg->total_bytes);
+}
+
+static inline u16
+hs_make_data_port (u16 p)
+{
+ p = clib_net_to_host_u16 (p);
+ return clib_host_to_net_u16 (p + 1);
+}
+
+static inline void
+hs_test_app_session_init_ (app_session_t *as, session_t *s)
+{
+ as->rx_fifo = s->rx_fifo;
+ as->tx_fifo = s->tx_fifo;
+ as->vpp_evt_q = session_main_get_vpp_event_queue (s->thread_index);
+ if (session_get_transport_proto (s) == TRANSPORT_PROTO_UDP)
+ {
+ transport_connection_t *tc;
+ tc = session_get_transport (s);
+ clib_memcpy_fast (&as->transport, tc, sizeof (as->transport));
+ as->is_dgram = 1;
+ }
+}
+
+#define hs_test_app_session_init(_as, _s) \
+ hs_test_app_session_init_ ((app_session_t *) (_as), (_s))
+
+#endif /* __included_hs_test_t__ */
diff --git a/src/plugins/hs_apps/http_cli.c b/src/plugins/hs_apps/http_cli.c
new file mode 100644
index 00000000000..5d4d49c0fba
--- /dev/null
+++ b/src/plugins/hs_apps/http_cli.c
@@ -0,0 +1,676 @@
+/*
+* Copyright (c) 2017-2019 Cisco and/or its affiliates.
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at:
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+#include <vnet/session/application.h>
+#include <vnet/session/application_interface.h>
+#include <vnet/session/session.h>
+#include <http/http.h>
+
+typedef struct
+{
+ u32 hs_index;
+ u32 thread_index;
+ u64 node_index;
+ u8 *buf;
+} hcs_cli_args_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 session_index;
+ u32 thread_index;
+ u8 *tx_buf;
+ u32 tx_offset;
+ u32 vpp_session_index;
+} hcs_session_t;
+
+typedef struct
+{
+ hcs_session_t **sessions;
+ u32 *free_http_cli_process_node_indices;
+ u32 app_index;
+
+ /* Cert key pair for tls */
+ u32 ckpair_index;
+
+ u32 prealloc_fifos;
+ u32 private_segment_size;
+ u32 fifo_size;
+ u8 *uri;
+ vlib_main_t *vlib_main;
+} hcs_main_t;
+
+static hcs_main_t hcs_main;
+
+static hcs_session_t *
+hcs_session_alloc (u32 thread_index)
+{
+ hcs_main_t *hcm = &hcs_main;
+ hcs_session_t *hs;
+ pool_get (hcm->sessions[thread_index], hs);
+ memset (hs, 0, sizeof (*hs));
+ hs->session_index = hs - hcm->sessions[thread_index];
+ hs->thread_index = thread_index;
+ return hs;
+}
+
+static hcs_session_t *
+hcs_session_get (u32 thread_index, u32 hs_index)
+{
+ hcs_main_t *hcm = &hcs_main;
+ if (pool_is_free_index (hcm->sessions[thread_index], hs_index))
+ return 0;
+ return pool_elt_at_index (hcm->sessions[thread_index], hs_index);
+}
+
+static void
+hcs_session_free (hcs_session_t *hs)
+{
+ hcs_main_t *hcm = &hcs_main;
+ u32 thread = hs->thread_index;
+ if (CLIB_DEBUG)
+ memset (hs, 0xfa, sizeof (*hs));
+ pool_put (hcm->sessions[thread], hs);
+}
+
+static void
+hcs_cli_process_free (hcs_cli_args_t *args)
+{
+ vlib_main_t *vm = vlib_get_first_main ();
+ hcs_main_t *hcm = &hcs_main;
+ hcs_cli_args_t **save_args;
+ vlib_node_runtime_t *rt;
+ vlib_node_t *n;
+ u32 node_index;
+
+ node_index = args->node_index;
+ ASSERT (node_index != 0);
+
+ n = vlib_get_node (vm, node_index);
+ rt = vlib_node_get_runtime (vm, n->index);
+ save_args = vlib_node_get_runtime_data (vm, n->index);
+
+ /* Reset process session pointer */
+ clib_mem_free (*save_args);
+ *save_args = 0;
+
+ /* Turn off the process node */
+ vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
+
+ /* add node index to the freelist */
+ vec_add1 (hcm->free_http_cli_process_node_indices, node_index);
+}
+
+/* Header, including incantation to suppress favicon.ico requests */
+static const char *html_header_template =
+ "<html><head><title>%v</title></head>"
+ "<link rel=\"icon\" href=\"data:,\">"
+ "<body><pre>";
+
+static const char *html_footer =
+ "</pre></body></html>\r\n";
+
+static void
+hcs_cli_output (uword arg, u8 *buffer, uword buffer_bytes)
+{
+ u8 **output_vecp = (u8 **) arg;
+ u8 *output_vec;
+ u32 offset;
+
+ output_vec = *output_vecp;
+
+ offset = vec_len (output_vec);
+ vec_validate (output_vec, offset + buffer_bytes - 1);
+ clib_memcpy_fast (output_vec + offset, buffer, buffer_bytes);
+
+ *output_vecp = output_vec;
+}
+
+static void
+start_send_data (hcs_session_t *hs, http_status_code_t status)
+{
+ http_msg_t msg;
+ session_t *ts;
+ int rv;
+
+ msg.type = HTTP_MSG_REPLY;
+ msg.code = status;
+ msg.content_type = HTTP_CONTENT_TEXT_HTML;
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.len = vec_len (hs->tx_buf);
+
+ ts = session_get (hs->vpp_session_index, hs->thread_index);
+ rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (!msg.data.len)
+ goto done;
+
+ rv = svm_fifo_enqueue (ts->tx_fifo, vec_len (hs->tx_buf), hs->tx_buf);
+
+ if (rv != vec_len (hs->tx_buf))
+ {
+ hs->tx_offset = rv;
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ }
+ else
+ {
+ vec_free (hs->tx_buf);
+ }
+
+done:
+
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+}
+
+static void
+send_data_to_http (void *rpc_args)
+{
+ hcs_cli_args_t *args = (hcs_cli_args_t *) rpc_args;
+ hcs_session_t *hs;
+
+ hs = hcs_session_get (args->thread_index, args->hs_index);
+ if (!hs)
+ {
+ vec_free (args->buf);
+ goto cleanup;
+ }
+
+ hs->tx_buf = args->buf;
+ start_send_data (hs, HTTP_STATUS_OK);
+
+cleanup:
+
+ clib_mem_free (rpc_args);
+}
+
+static uword
+hcs_cli_process (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
+{
+ u8 *request = 0, *reply = 0, *html = 0;
+ hcs_cli_args_t *args, *rpc_args;
+ hcs_main_t *hcm = &hcs_main;
+ hcs_cli_args_t **save_args;
+ unformat_input_t input;
+ int i;
+
+ save_args = vlib_node_get_runtime_data (hcm->vlib_main, rt->node_index);
+ args = *save_args;
+
+ request = args->buf;
+
+ /* Replace slashes with spaces, stop at the end of the path */
+ i = 0;
+ while (i < vec_len (request))
+ {
+ if (request[i] == '/')
+ request[i] = ' ';
+ else if (request[i] == ' ')
+ {
+ /* vlib_cli_input is vector-based, no need for a NULL */
+ vec_set_len (request, i);
+ break;
+ }
+ i++;
+ }
+
+ /* Generate the html header */
+ html = format (0, html_header_template, request /* title */ );
+
+ /* Run the command */
+ unformat_init_vector (&input, vec_dup (request));
+ vlib_cli_input (vm, &input, hcs_cli_output, (uword) &reply);
+ unformat_free (&input);
+ request = 0;
+
+ /* Generate the html page */
+ html = format (html, "%v", reply);
+ html = format (html, html_footer);
+
+ /* Send it */
+ rpc_args = clib_mem_alloc (sizeof (*args));
+ clib_memcpy_fast (rpc_args, args, sizeof (*args));
+ rpc_args->buf = html;
+
+ session_send_rpc_evt_to_thread_force (args->thread_index, send_data_to_http,
+ rpc_args);
+
+ vec_free (reply);
+ vec_free (args->buf);
+ hcs_cli_process_free (args);
+
+ return (0);
+}
+
+static void
+alloc_cli_process (hcs_cli_args_t *args)
+{
+ hcs_main_t *hcm = &hcs_main;
+ vlib_main_t *vm = hcm->vlib_main;
+ hcs_cli_args_t **save_args;
+ vlib_node_t *n;
+ uword l;
+
+ l = vec_len (hcm->free_http_cli_process_node_indices);
+ if (l > 0)
+ {
+ n = vlib_get_node (vm, hcm->free_http_cli_process_node_indices[l - 1]);
+ vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING);
+ vec_set_len (hcm->free_http_cli_process_node_indices, l - 1);
+ }
+ else
+ {
+ static vlib_node_registration_t r = {
+ .function = hcs_cli_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .process_log2_n_stack_bytes = 16,
+ .runtime_data_bytes = sizeof (void *),
+ };
+
+ vlib_register_node (vm, &r, "http-cli-%d", l);
+
+ n = vlib_get_node (vm, r.index);
+ }
+
+ /* Save the node index in the args. It won't be zero. */
+ args->node_index = n->index;
+
+ /* Save the args (pointer) in the node runtime */
+ save_args = vlib_node_get_runtime_data (vm, n->index);
+ *save_args = clib_mem_alloc (sizeof (*args));
+ clib_memcpy_fast (*save_args, args, sizeof (*args));
+
+ vlib_start_process (vm, n->runtime_index);
+}
+
+static void
+alloc_cli_process_callback (void *cb_args)
+{
+ alloc_cli_process ((hcs_cli_args_t *) cb_args);
+}
+
+static int
+hcs_ts_rx_callback (session_t *ts)
+{
+ hcs_cli_args_t args = {};
+ hcs_session_t *hs;
+ http_msg_t msg;
+ int rv;
+
+ hs = hcs_session_get (ts->thread_index, ts->opaque);
+
+ /* Read the http message header */
+ rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (msg.type != HTTP_MSG_REQUEST || msg.method_type != HTTP_REQ_GET)
+ {
+ hs->tx_buf = 0;
+ start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED);
+ return 0;
+ }
+
+ /* send the command to a new/recycled vlib process */
+ vec_validate (args.buf, msg.data.len - 1);
+ rv = svm_fifo_dequeue (ts->rx_fifo, msg.data.len, args.buf);
+ ASSERT (rv == msg.data.len);
+ vec_set_len (args.buf, rv);
+
+ args.hs_index = hs->session_index;
+ args.thread_index = ts->thread_index;
+
+ /* Send RPC request to main thread */
+ if (vlib_get_thread_index () != 0)
+ vlib_rpc_call_main_thread (alloc_cli_process_callback, (u8 *) &args,
+ sizeof (args));
+ else
+ alloc_cli_process (&args);
+ return 0;
+}
+
+static int
+hcs_ts_tx_callback (session_t *ts)
+{
+ hcs_session_t *hs;
+ u32 to_send;
+ int rv;
+
+ hs = hcs_session_get (ts->thread_index, ts->opaque);
+ if (!hs || !hs->tx_buf)
+ return 0;
+
+ to_send = vec_len (hs->tx_buf) - hs->tx_offset;
+ rv = svm_fifo_enqueue (ts->tx_fifo, to_send, hs->tx_buf + hs->tx_offset);
+
+ if (rv <= 0)
+ {
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ return 0;
+ }
+
+ if (rv < to_send)
+ {
+ hs->tx_offset += rv;
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ }
+ else
+ {
+ vec_free (hs->tx_buf);
+ }
+
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+
+ return 0;
+}
+
+static int
+hcs_ts_accept_callback (session_t *ts)
+{
+ hcs_session_t *hs;
+
+ hs = hcs_session_alloc (ts->thread_index);
+ hs->vpp_session_index = ts->session_index;
+
+ ts->opaque = hs->session_index;
+ ts->session_state = SESSION_STATE_READY;
+
+ return 0;
+}
+
+static int
+hcs_ts_connected_callback (u32 app_index, u32 api_context, session_t *s,
+ session_error_t err)
+{
+ clib_warning ("called...");
+ return -1;
+}
+
+static void
+hcs_ts_disconnect_callback (session_t *s)
+{
+ hcs_main_t *hcm = &hcs_main;
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+
+ a->handle = session_handle (s);
+ a->app_index = hcm->app_index;
+ vnet_disconnect_session (a);
+}
+
+static void
+hcs_ts_reset_callback (session_t *s)
+{
+ hcs_main_t *hcm = &hcs_main;
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+
+ a->handle = session_handle (s);
+ a->app_index = hcm->app_index;
+ vnet_disconnect_session (a);
+}
+
+static void
+hcs_ts_cleanup_callback (session_t *s, session_cleanup_ntf_t ntf)
+{
+ hcs_session_t *hs;
+
+ if (ntf == SESSION_CLEANUP_TRANSPORT)
+ return;
+
+ hs = hcs_session_get (s->thread_index, s->opaque);
+ if (!hs)
+ return;
+
+ vec_free (hs->tx_buf);
+ hcs_session_free (hs);
+}
+
+static int
+hcs_add_segment_callback (u32 client_index, u64 segment_handle)
+{
+ return 0;
+}
+
+static int
+hcs_del_segment_callback (u32 client_index, u64 segment_handle)
+{
+ return 0;
+}
+
+static session_cb_vft_t hcs_session_cb_vft = {
+ .session_accept_callback = hcs_ts_accept_callback,
+ .session_disconnect_callback = hcs_ts_disconnect_callback,
+ .session_connected_callback = hcs_ts_connected_callback,
+ .add_segment_callback = hcs_add_segment_callback,
+ .del_segment_callback = hcs_del_segment_callback,
+ .builtin_app_rx_callback = hcs_ts_rx_callback,
+ .builtin_app_tx_callback = hcs_ts_tx_callback,
+ .session_reset_callback = hcs_ts_reset_callback,
+ .session_cleanup_callback = hcs_ts_cleanup_callback,
+};
+
+static int
+hcs_attach ()
+{
+ vnet_app_add_cert_key_pair_args_t _ck_pair, *ck_pair = &_ck_pair;
+ hcs_main_t *hcm = &hcs_main;
+ u64 options[APP_OPTIONS_N_OPTIONS];
+ vnet_app_attach_args_t _a, *a = &_a;
+ u32 segment_size = 128 << 20;
+
+ clib_memset (a, 0, sizeof (*a));
+ clib_memset (options, 0, sizeof (options));
+
+ if (hcm->private_segment_size)
+ segment_size = hcm->private_segment_size;
+
+ a->api_client_index = ~0;
+ a->name = format (0, "http_cli_server");
+ a->session_cb_vft = &hcs_session_cb_vft;
+ a->options = options;
+ a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size;
+ a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = segment_size;
+ a->options[APP_OPTIONS_RX_FIFO_SIZE] =
+ hcm->fifo_size ? hcm->fifo_size : 8 << 10;
+ a->options[APP_OPTIONS_TX_FIFO_SIZE] =
+ hcm->fifo_size ? hcm->fifo_size : 32 << 10;
+ a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
+ a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = hcm->prealloc_fifos;
+
+ if (vnet_application_attach (a))
+ {
+ vec_free (a->name);
+ clib_warning ("failed to attach server");
+ return -1;
+ }
+ vec_free (a->name);
+ hcm->app_index = a->app_index;
+
+ clib_memset (ck_pair, 0, sizeof (*ck_pair));
+ ck_pair->cert = (u8 *) test_srv_crt_rsa;
+ ck_pair->key = (u8 *) test_srv_key_rsa;
+ ck_pair->cert_len = test_srv_crt_rsa_len;
+ ck_pair->key_len = test_srv_key_rsa_len;
+ vnet_app_add_cert_key_pair (ck_pair);
+ hcm->ckpair_index = ck_pair->index;
+
+ return 0;
+}
+
+static int
+hcs_transport_needs_crypto (transport_proto_t proto)
+{
+ return proto == TRANSPORT_PROTO_TLS || proto == TRANSPORT_PROTO_DTLS ||
+ proto == TRANSPORT_PROTO_QUIC;
+}
+
+static int
+hcs_listen ()
+{
+ session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
+ hcs_main_t *hcm = &hcs_main;
+ vnet_listen_args_t _a, *a = &_a;
+ char *uri = "tcp://0.0.0.0/80";
+ u8 need_crypto;
+ int rv;
+
+ clib_memset (a, 0, sizeof (*a));
+ a->app_index = hcm->app_index;
+
+ if (hcm->uri)
+ uri = (char *) hcm->uri;
+
+ if (parse_uri (uri, &sep))
+ return -1;
+
+ need_crypto = hcs_transport_needs_crypto (sep.transport_proto);
+
+ sep.transport_proto = TRANSPORT_PROTO_HTTP;
+ clib_memcpy (&a->sep_ext, &sep, sizeof (sep));
+
+ if (need_crypto)
+ {
+ session_endpoint_alloc_ext_cfg (&a->sep_ext,
+ TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
+ a->sep_ext.ext_cfg->crypto.ckpair_index = hcm->ckpair_index;
+ }
+
+ rv = vnet_listen (a);
+
+ if (need_crypto)
+ clib_mem_free (a->sep_ext.ext_cfg);
+
+ return rv;
+}
+
+static void
+hcs_detach ()
+{
+ vnet_app_detach_args_t _a, *a = &_a;
+ hcs_main_t *hcm = &hcs_main;
+ a->app_index = hcm->app_index;
+ a->api_client_index = APP_INVALID_INDEX;
+ hcm->app_index = ~0;
+ vnet_application_detach (a);
+}
+
+static int
+hcs_create (vlib_main_t *vm)
+{
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ hcs_main_t *hcm = &hcs_main;
+ u32 num_threads;
+
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+ vec_validate (hcm->sessions, num_threads - 1);
+
+ if (hcs_attach ())
+ {
+ clib_warning ("failed to attach server");
+ return -1;
+ }
+ if (hcs_listen ())
+ {
+ hcs_detach ();
+ clib_warning ("failed to start listening");
+ return -1;
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+hcs_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ hcs_main_t *hcm = &hcs_main;
+ u64 seg_size;
+ int rv;
+
+ hcm->prealloc_fifos = 0;
+ hcm->private_segment_size = 0;
+ hcm->fifo_size = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ goto start_server;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "prealloc-fifos %d", &hcm->prealloc_fifos))
+ ;
+ else if (unformat (line_input, "private-segment-size %U",
+ unformat_memory_size, &seg_size))
+ hcm->private_segment_size = seg_size;
+ else if (unformat (line_input, "fifo-size %d", &hcm->fifo_size))
+ hcm->fifo_size <<= 10;
+ else if (unformat (line_input, "uri %s", &hcm->uri))
+ ;
+ else
+ {
+ unformat_free (line_input);
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+ }
+
+ unformat_free (line_input);
+
+start_server:
+
+ if (hcm->app_index != (u32) ~0)
+ return clib_error_return (0, "test http server is already running");
+
+ vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
+
+ rv = hcs_create (vm);
+ switch (rv)
+ {
+ case 0:
+ break;
+ default:
+ return clib_error_return (0, "server_create returned %d", rv);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (hcs_create_command, static) = {
+ .path = "http cli server",
+ .short_help = "http cli server [uri <uri>] [fifo-size <nbytes>] "
+ "[private-segment-size <nMG>] [prealloc-fifos <n>]",
+ .function = hcs_create_command_fn,
+};
+
+static clib_error_t *
+hcs_main_init (vlib_main_t *vm)
+{
+ hcs_main_t *hcs = &hcs_main;
+
+ hcs->app_index = ~0;
+ hcs->vlib_main = vm;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (hcs_main_init);
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vpp/api/vpe_msg_enum.h b/src/plugins/hs_apps/http_cli.h
index 4fcc1c8cd8e..f774552d60f 100644
--- a/src/vpp/api/vpe_msg_enum.h
+++ b/src/plugins/hs_apps/http_cli.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Copyright (c) 2022 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -12,21 +12,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-#ifndef included_vpe_msg_enum_h
-#define included_vpe_msg_enum_h
+const char *html_header_template = "<html><head><title>%v</title></head>"
+ "<link rel=\"icon\" href=\"data:,\">"
+ "<body><pre>";
-#include <vppinfra/byte_order.h>
-
-#define vl_msg_id(n,h) n,
-typedef enum
-{
- VL_ILLEGAL_MESSAGE_ID = 0,
-#include <vpp/api/vpe_all_api_h.h>
- VL_MSG_FIRST_AVAILABLE,
-} vl_msg_id_t;
-#undef vl_msg_id
-
-#endif /* included_vpe_msg_enum_h */
+const char *html_footer = "</pre></body></html>\r\n";
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/hs_apps/http_client_cli.c b/src/plugins/hs_apps/http_client_cli.c
new file mode 100644
index 00000000000..085a2b69bf7
--- /dev/null
+++ b/src/plugins/hs_apps/http_client_cli.c
@@ -0,0 +1,555 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/session/application.h>
+#include <vnet/session/application_interface.h>
+#include <vnet/session/session.h>
+#include <http/http.h>
+#include <hs_apps/http_cli.h>
+
+#define HCC_DEBUG 0
+
+#if HCC_DEBUG
+#define HCC_DBG(_fmt, _args...) clib_warning (_fmt, ##_args)
+#else
+#define HCC_DBG(_fmt, _args...)
+#endif
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 session_index;
+ u32 thread_index;
+ u32 rx_offset;
+ u32 vpp_session_index;
+ u32 to_recv;
+ u8 is_closed;
+} hcc_session_t;
+
+typedef struct
+{
+ hcc_session_t *sessions;
+ u8 *rx_buf;
+ u32 thread_index;
+} hcc_worker_t;
+
+typedef struct
+{
+ hcc_worker_t *wrk;
+ u32 app_index;
+
+ u32 prealloc_fifos;
+ u32 private_segment_size;
+ u32 fifo_size;
+ u8 *uri;
+ u8 *http_query;
+ session_endpoint_cfg_t connect_sep;
+
+ u8 test_client_attached;
+ vlib_main_t *vlib_main;
+ u32 cli_node_index;
+ u8 *http_response;
+ u8 *appns_id;
+ u64 appns_secret;
+} hcc_main_t;
+
+typedef enum
+{
+ HCC_REPLY_RECEIVED = 100,
+} hcc_cli_signal_t;
+
+static hcc_main_t hcc_main;
+
+static hcc_worker_t *
+hcc_worker_get (u32 thread_index)
+{
+ return vec_elt_at_index (hcc_main.wrk, thread_index);
+}
+
+static hcc_session_t *
+hcc_session_alloc (hcc_worker_t *wrk)
+{
+ hcc_session_t *hs;
+ pool_get_zero (wrk->sessions, hs);
+ hs->session_index = hs - wrk->sessions;
+ hs->thread_index = wrk->thread_index;
+ return hs;
+}
+
+static hcc_session_t *
+hcc_session_get (u32 hs_index, u32 thread_index)
+{
+ hcc_worker_t *wrk = hcc_worker_get (thread_index);
+ return pool_elt_at_index (wrk->sessions, hs_index);
+}
+
+static void
+hcc_session_free (u32 thread_index, hcc_session_t *hs)
+{
+ hcc_worker_t *wrk = hcc_worker_get (thread_index);
+ pool_put (wrk->sessions, hs);
+}
+
+static int
+hcc_ts_accept_callback (session_t *ts)
+{
+ clib_warning ("bug");
+ return -1;
+}
+
+static void
+hcc_ts_disconnect_callback (session_t *s)
+{
+ hcc_main_t *hcm = &hcc_main;
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+
+ a->handle = session_handle (s);
+ a->app_index = hcm->app_index;
+ vnet_disconnect_session (a);
+}
+
+static int
+hcc_ts_connected_callback (u32 app_index, u32 hc_index, session_t *as,
+ session_error_t err)
+{
+ hcc_main_t *hcm = &hcc_main;
+ hcc_session_t *hs, *new_hs;
+ hcc_worker_t *wrk;
+ http_msg_t msg;
+ int rv;
+
+ HCC_DBG ("hc_index: %d", hc_index);
+
+ if (err)
+ {
+ clib_warning ("connected error: hc_index(%d): %U", hc_index,
+ format_session_error, err);
+ return -1;
+ }
+
+ /* TODO delete half open session once the support is added in http layer */
+ hs = hcc_session_get (hc_index, 0);
+ wrk = hcc_worker_get (as->thread_index);
+ new_hs = hcc_session_alloc (wrk);
+ clib_memcpy_fast (new_hs, hs, sizeof (*hs));
+
+ hs->vpp_session_index = as->session_index;
+
+ msg.type = HTTP_MSG_REQUEST;
+ msg.method_type = HTTP_REQ_GET;
+ msg.content_type = HTTP_CONTENT_TEXT_HTML;
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.len = vec_len (hcm->http_query);
+
+ svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) },
+ { hcm->http_query, vec_len (hcm->http_query) } };
+
+ rv = svm_fifo_enqueue_segments (as->tx_fifo, segs, 2, 0 /* allow partial */);
+ if (rv < 0 || rv != sizeof (msg) + vec_len (hcm->http_query))
+ {
+ clib_warning ("failed app enqueue");
+ return -1;
+ }
+
+ if (svm_fifo_set_event (as->tx_fifo))
+ session_send_io_evt_to_thread (as->tx_fifo, SESSION_IO_EVT_TX);
+
+ return 0;
+}
+
+static void
+hcc_ts_reset_callback (session_t *s)
+{
+ hcc_main_t *hcm = &hcc_main;
+ hcc_session_t *hs;
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+
+ hs = hcc_session_get (s->opaque, s->thread_index);
+ hs->is_closed = 1;
+
+ a->handle = session_handle (s);
+ a->app_index = hcm->app_index;
+ vnet_disconnect_session (a);
+}
+
+static int
+hcc_ts_tx_callback (session_t *ts)
+{
+ clib_warning ("bug");
+ return -1;
+}
+
+static void
+hcc_session_disconnect (session_t *s)
+{
+ hcc_main_t *hcm = &hcc_main;
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+ a->handle = session_handle (s);
+ a->app_index = hcm->app_index;
+ vnet_disconnect_session (a);
+}
+
+static int
+hcc_ts_rx_callback (session_t *ts)
+{
+ hcc_main_t *hcm = &hcc_main;
+ hcc_session_t *hs;
+ http_msg_t msg;
+ int rv;
+
+ hs = hcc_session_get (ts->opaque, ts->thread_index);
+
+ if (hs->is_closed)
+ {
+ clib_warning ("session is closed");
+ return 0;
+ }
+
+ if (hs->to_recv == 0)
+ {
+ rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (msg.type != HTTP_MSG_REPLY || msg.code != HTTP_STATUS_OK)
+ {
+ clib_warning ("unexpected msg type %d", msg.type);
+ return 0;
+ }
+ vec_validate (hcm->http_response, msg.data.len - 1);
+ vec_reset_length (hcm->http_response);
+ hs->to_recv = msg.data.len;
+ }
+
+ u32 max_deq = svm_fifo_max_dequeue (ts->rx_fifo);
+
+ u32 n_deq = clib_min (hs->to_recv, max_deq);
+ u32 curr = vec_len (hcm->http_response);
+ rv = svm_fifo_dequeue (ts->rx_fifo, n_deq, hcm->http_response + curr);
+ if (rv < 0)
+ {
+ clib_warning ("app dequeue(n=%d) failed; rv = %d", n_deq, rv);
+ return -1;
+ }
+
+ if (rv != n_deq)
+ return -1;
+
+ vec_set_len (hcm->http_response, curr + n_deq);
+ ASSERT (hs->to_recv >= rv);
+ hs->to_recv -= rv;
+ HCC_DBG ("app rcvd %d, remains %d", rv, hs->to_recv);
+
+ if (hs->to_recv == 0)
+ {
+ hcc_session_disconnect (ts);
+ vlib_process_signal_event_mt (hcm->vlib_main, hcm->cli_node_index,
+ HCC_REPLY_RECEIVED, 0);
+ }
+
+ return 0;
+}
+
+static void
+hcc_ts_cleanup_callback (session_t *s, session_cleanup_ntf_t ntf)
+{
+ hcc_session_t *hs;
+
+ hs = hcc_session_get (s->thread_index, s->opaque);
+ if (!hs)
+ return;
+
+ hcc_session_free (s->thread_index, hs);
+}
+
+static session_cb_vft_t hcc_session_cb_vft = {
+ .session_accept_callback = hcc_ts_accept_callback,
+ .session_disconnect_callback = hcc_ts_disconnect_callback,
+ .session_connected_callback = hcc_ts_connected_callback,
+ .builtin_app_rx_callback = hcc_ts_rx_callback,
+ .builtin_app_tx_callback = hcc_ts_tx_callback,
+ .session_reset_callback = hcc_ts_reset_callback,
+ .session_cleanup_callback = hcc_ts_cleanup_callback,
+};
+
+static clib_error_t *
+hcc_attach ()
+{
+ hcc_main_t *hcm = &hcc_main;
+ vnet_app_attach_args_t _a, *a = &_a;
+ u64 options[18];
+ u32 segment_size = 128 << 20;
+ int rv;
+
+ if (hcm->private_segment_size)
+ segment_size = hcm->private_segment_size;
+
+ clib_memset (a, 0, sizeof (*a));
+ clib_memset (options, 0, sizeof (options));
+
+ a->api_client_index = ~0;
+ a->name = format (0, "http_cli_client");
+ a->session_cb_vft = &hcc_session_cb_vft;
+ a->options = options;
+ a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size;
+ a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = segment_size;
+ a->options[APP_OPTIONS_RX_FIFO_SIZE] =
+ hcm->fifo_size ? hcm->fifo_size : 8 << 10;
+ a->options[APP_OPTIONS_TX_FIFO_SIZE] =
+ hcm->fifo_size ? hcm->fifo_size : 32 << 10;
+ a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
+ a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = hcm->prealloc_fifos;
+ if (hcm->appns_id)
+ {
+ a->namespace_id = hcm->appns_id;
+ a->options[APP_OPTIONS_NAMESPACE_SECRET] = hcm->appns_secret;
+ }
+
+ if ((rv = vnet_application_attach (a)))
+ return clib_error_return (0, "attach returned %d", rv);
+
+ hcm->app_index = a->app_index;
+ vec_free (a->name);
+ hcm->test_client_attached = 1;
+ return 0;
+}
+
+static int
+hcc_connect_rpc (void *rpc_args)
+{
+ vnet_connect_args_t *a = rpc_args;
+ int rv;
+
+ rv = vnet_connect (a);
+ if (rv)
+ clib_warning (0, "connect returned: %U", format_session_error, rv);
+
+ vec_free (a);
+ return rv;
+}
+
+static void
+hcc_program_connect (vnet_connect_args_t *a)
+{
+ session_send_rpc_evt_to_thread_force (transport_cl_thread (),
+ hcc_connect_rpc, a);
+}
+
+static clib_error_t *
+hcc_connect ()
+{
+ vnet_connect_args_t *a = 0;
+ hcc_main_t *hcm = &hcc_main;
+ hcc_worker_t *wrk;
+ hcc_session_t *hs;
+
+ vec_validate (a, 0);
+ clib_memset (a, 0, sizeof (a[0]));
+
+ clib_memcpy (&a->sep_ext, &hcm->connect_sep, sizeof (hcm->connect_sep));
+ a->app_index = hcm->app_index;
+
+ /* allocate http session on main thread */
+ wrk = hcc_worker_get (0);
+ hs = hcc_session_alloc (wrk);
+ a->api_context = hs->session_index;
+
+ hcc_program_connect (a);
+ return 0;
+}
+
+static clib_error_t *
+hcc_run (vlib_main_t *vm, int print_output)
+{
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ hcc_main_t *hcm = &hcc_main;
+ uword event_type, *event_data = 0;
+ u32 num_threads;
+ clib_error_t *err = 0;
+ hcc_worker_t *wrk;
+
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+ vec_validate (hcm->wrk, num_threads);
+ vec_foreach (wrk, hcm->wrk)
+ {
+ wrk->thread_index = wrk - hcm->wrk;
+ }
+
+ if ((err = hcc_attach ()))
+ {
+ return clib_error_return (0, "http client attach: %U", format_clib_error,
+ err);
+ }
+
+ if ((err = hcc_connect ()))
+ {
+ return clib_error_return (0, "http client connect: %U",
+ format_clib_error, err);
+ }
+
+ vlib_process_wait_for_event_or_clock (vm, 10);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case ~0:
+ err = clib_error_return (0, "timeout");
+ goto cleanup;
+
+ case HCC_REPLY_RECEIVED:
+ if (print_output)
+ vlib_cli_output (vm, "%v", hcm->http_response);
+ vec_free (hcm->http_response);
+ break;
+ default:
+ err = clib_error_return (0, "unexpected event %d", event_type);
+ break;
+ }
+
+cleanup:
+ vec_free (event_data);
+ return err;
+}
+
+static int
+hcc_detach ()
+{
+ hcc_main_t *hcm = &hcc_main;
+ vnet_app_detach_args_t _da, *da = &_da;
+ int rv;
+
+ if (!hcm->test_client_attached)
+ return 0;
+
+ da->app_index = hcm->app_index;
+ da->api_client_index = ~0;
+ rv = vnet_application_detach (da);
+ hcm->test_client_attached = 0;
+ hcm->app_index = ~0;
+
+ return rv;
+}
+
+static clib_error_t *
+hcc_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ hcc_main_t *hcm = &hcc_main;
+ u64 seg_size;
+ u8 *appns_id = 0;
+ clib_error_t *err = 0;
+ int rv, print_output = 1;
+
+ hcm->prealloc_fifos = 0;
+ hcm->private_segment_size = 0;
+ hcm->fifo_size = 0;
+
+ if (hcm->test_client_attached)
+ return clib_error_return (0, "failed: already running!");
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, "expected URI");
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "prealloc-fifos %d", &hcm->prealloc_fifos))
+ ;
+ else if (unformat (line_input, "private-segment-size %U",
+ unformat_memory_size, &seg_size))
+ hcm->private_segment_size = seg_size;
+ else if (unformat (line_input, "fifo-size %d", &hcm->fifo_size))
+ hcm->fifo_size <<= 10;
+ else if (unformat (line_input, "uri %s", &hcm->uri))
+ ;
+ else if (unformat (line_input, "no-output"))
+ print_output = 0;
+ else if (unformat (line_input, "appns %_%v%_", &appns_id))
+ ;
+ else if (unformat (line_input, "secret %lu", &hcm->appns_secret))
+ ;
+ else if (unformat (line_input, "query %s", &hcm->http_query))
+ ;
+ else
+ {
+ err = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ vec_free (hcm->appns_id);
+ hcm->appns_id = appns_id;
+ hcm->cli_node_index = vlib_get_current_process (vm)->node_runtime.node_index;
+
+ if (!hcm->uri)
+ {
+ err = clib_error_return (0, "URI not defined");
+ goto done;
+ }
+
+ if ((rv = parse_uri ((char *) hcm->uri, &hcm->connect_sep)))
+ {
+ err = clib_error_return (0, "Uri parse error: %d", rv);
+ goto done;
+ }
+
+ vlib_worker_thread_barrier_sync (vm);
+ vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */);
+ vlib_worker_thread_barrier_release (vm);
+
+ err = hcc_run (vm, print_output);
+
+ if (hcc_detach ())
+ {
+ /* don't override last error */
+ if (!err)
+ err = clib_error_return (0, "failed: app detach");
+ clib_warning ("WARNING: app detach failed...");
+ }
+
+done:
+ vec_free (hcm->uri);
+ vec_free (hcm->http_query);
+ unformat_free (line_input);
+ return err;
+}
+
+VLIB_CLI_COMMAND (hcc_command, static) = {
+ .path = "http cli client",
+ .short_help = "[appns <app-ns> secret <appns-secret>] uri http://<ip-addr> "
+ "query <query-string> [no-output]",
+ .function = hcc_command_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+hcc_main_init (vlib_main_t *vm)
+{
+ hcc_main_t *hcm = &hcc_main;
+
+ hcm->app_index = ~0;
+ hcm->vlib_main = vm;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (hcc_main_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/hs_apps/http_server.c b/src/plugins/hs_apps/http_server.c
deleted file mode 100644
index a46e0a4ae13..00000000000
--- a/src/plugins/hs_apps/http_server.c
+++ /dev/null
@@ -1,1004 +0,0 @@
-/*
-* Copyright (c) 2017-2019 Cisco and/or its affiliates.
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at:
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
-
-#include <vnet/vnet.h>
-#include <vnet/session/application.h>
-#include <vnet/session/application_interface.h>
-#include <vnet/session/session.h>
-#include <vppinfra/tw_timer_2t_1w_2048sl.h>
-
-typedef enum
-{
- EVENT_WAKEUP = 1,
-} http_process_event_t;
-
-typedef struct
-{
- u32 hs_index;
- u32 thread_index;
- u64 node_index;
-} http_server_args;
-
-typedef enum
-{
- HTTP_STATE_CLOSED,
- HTTP_STATE_ESTABLISHED,
- HTTP_STATE_OK_SENT,
-} http_session_state_t;
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-#define _(type, name) type name;
- foreach_app_session_field
-#undef _
- u32 thread_index;
- u8 *rx_buf;
- u32 vpp_session_index;
- u64 vpp_session_handle;
- u32 timer_handle;
-} http_session_t;
-
-typedef struct
-{
- http_session_t **sessions;
- clib_rwlock_t sessions_lock;
- u32 **session_to_http_session;
-
- svm_msg_q_t **vpp_queue;
-
- uword *handler_by_get_request;
-
- u32 *free_http_cli_process_node_indices;
-
- /* Sever's event queue */
- svm_queue_t *vl_input_queue;
-
- /* API client handle */
- u32 my_client_index;
-
- u32 app_index;
-
- /* process node index for evnt scheduling */
- u32 node_index;
-
- /* Cert key pair for tls */
- u32 ckpair_index;
-
- tw_timer_wheel_2t_1w_2048sl_t tw;
- clib_spinlock_t tw_lock;
-
- u32 prealloc_fifos;
- u32 private_segment_size;
- u32 fifo_size;
- u8 *uri;
- u32 is_static;
- vlib_main_t *vlib_main;
-} http_server_main_t;
-
-http_server_main_t http_server_main;
-
-static void
-http_server_sessions_reader_lock (void)
-{
- clib_rwlock_reader_lock (&http_server_main.sessions_lock);
-}
-
-static void
-http_server_sessions_reader_unlock (void)
-{
- clib_rwlock_reader_unlock (&http_server_main.sessions_lock);
-}
-
-static void
-http_server_sessions_writer_lock (void)
-{
- clib_rwlock_writer_lock (&http_server_main.sessions_lock);
-}
-
-static void
-http_server_sessions_writer_unlock (void)
-{
- clib_rwlock_writer_unlock (&http_server_main.sessions_lock);
-}
-
-static http_session_t *
-http_server_session_alloc (u32 thread_index)
-{
- http_server_main_t *hsm = &http_server_main;
- http_session_t *hs;
- pool_get (hsm->sessions[thread_index], hs);
- memset (hs, 0, sizeof (*hs));
- hs->session_index = hs - hsm->sessions[thread_index];
- hs->thread_index = thread_index;
- hs->timer_handle = ~0;
- return hs;
-}
-
-static http_session_t *
-http_server_session_get (u32 thread_index, u32 hs_index)
-{
- http_server_main_t *hsm = &http_server_main;
- if (pool_is_free_index (hsm->sessions[thread_index], hs_index))
- return 0;
- return pool_elt_at_index (hsm->sessions[thread_index], hs_index);
-}
-
-static void
-http_server_session_free (http_session_t * hs)
-{
- http_server_main_t *hsm = &http_server_main;
- u32 thread = hs->thread_index;
- if (CLIB_DEBUG)
- memset (hs, 0xfa, sizeof (*hs));
- pool_put (hsm->sessions[thread], hs);
-}
-
-static void
-http_server_session_lookup_add (u32 thread_index, u32 s_index, u32 hs_index)
-{
- http_server_main_t *hsm = &http_server_main;
- vec_validate (hsm->session_to_http_session[thread_index], s_index);
- hsm->session_to_http_session[thread_index][s_index] = hs_index;
-}
-
-static void
-http_server_session_lookup_del (u32 thread_index, u32 s_index)
-{
- http_server_main_t *hsm = &http_server_main;
- hsm->session_to_http_session[thread_index][s_index] = ~0;
-}
-
-static http_session_t *
-http_server_session_lookup (u32 thread_index, u32 s_index)
-{
- http_server_main_t *hsm = &http_server_main;
- u32 hs_index;
-
- if (s_index < vec_len (hsm->session_to_http_session[thread_index]))
- {
- hs_index = hsm->session_to_http_session[thread_index][s_index];
- return http_server_session_get (thread_index, hs_index);
- }
- return 0;
-}
-
-
-static void
-http_server_session_timer_start (http_session_t * hs)
-{
- u32 hs_handle;
- hs_handle = hs->thread_index << 24 | hs->session_index;
- clib_spinlock_lock (&http_server_main.tw_lock);
- hs->timer_handle = tw_timer_start_2t_1w_2048sl (&http_server_main.tw,
- hs_handle, 0, 60);
- clib_spinlock_unlock (&http_server_main.tw_lock);
-}
-
-static void
-http_server_session_timer_stop (http_session_t * hs)
-{
- if (hs->timer_handle == ~0)
- return;
- clib_spinlock_lock (&http_server_main.tw_lock);
- tw_timer_stop_2t_1w_2048sl (&http_server_main.tw, hs->timer_handle);
- clib_spinlock_unlock (&http_server_main.tw_lock);
-}
-
-static void
-http_server_session_disconnect (http_session_t * hs)
-{
- vnet_disconnect_args_t _a = { 0 }, *a = &_a;
- a->handle = hs->vpp_session_handle;
- a->app_index = http_server_main.app_index;
- vnet_disconnect_session (a);
-}
-
-static void
-http_process_free (http_server_args * args)
-{
- vlib_node_runtime_t *rt;
- vlib_main_t *vm = vlib_get_first_main ();
- http_server_main_t *hsm = &http_server_main;
- vlib_node_t *n;
- u32 node_index;
- http_server_args **save_args;
-
- node_index = args->node_index;
- ASSERT (node_index != 0);
-
- n = vlib_get_node (vm, node_index);
- rt = vlib_node_get_runtime (vm, n->index);
- save_args = vlib_node_get_runtime_data (vm, n->index);
-
- /* Reset process session pointer */
- clib_mem_free (*save_args);
- *save_args = 0;
-
- /* Turn off the process node */
- vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
-
- /* add node index to the freelist */
- vec_add1 (hsm->free_http_cli_process_node_indices, node_index);
-}
-
-/* *INDENT-OFF* */
-static const char *http_ok =
- "HTTP/1.1 200 OK\r\n";
-
-static const char *http_response =
- "Content-Type: text/html\r\n"
- "Expires: Mon, 11 Jan 1970 10:10:10 GMT\r\n"
- "Connection: close \r\n"
- "Pragma: no-cache\r\n"
- "Content-Length: %d\r\n\r\n%v";
-
-static const char *http_error_template =
- "HTTP/1.1 %s\r\n"
- "Content-Type: text/html\r\n"
- "Expires: Mon, 11 Jan 1970 10:10:10 GMT\r\n"
- "Connection: close\r\n"
- "Pragma: no-cache\r\n"
- "Content-Length: 0\r\n\r\n";
-
-/* Header, including incantation to suppress favicon.ico requests */
-static const char *html_header_template =
- "<html><head><title>%v</title></head>"
- "<link rel=\"icon\" href=\"data:,\">"
- "<body><pre>";
-
-static const char *html_footer =
- "</pre></body></html>\r\n";
-
-static const char *html_header_static =
- "<html><head><title>static reply</title></head>"
- "<link rel=\"icon\" href=\"data:,\">"
- "<body><pre>hello</pre></body></html>\r\n";
-/* *INDENT-ON* */
-
-static u8 *static_http;
-static u8 *static_ok;
-
-static void
-http_cli_output (uword arg, u8 * buffer, uword buffer_bytes)
-{
- u8 **output_vecp = (u8 **) arg;
- u8 *output_vec;
- u32 offset;
-
- output_vec = *output_vecp;
-
- offset = vec_len (output_vec);
- vec_validate (output_vec, offset + buffer_bytes - 1);
- clib_memcpy_fast (output_vec + offset, buffer, buffer_bytes);
-
- *output_vecp = output_vec;
-}
-
-void
-send_data (http_session_t * hs, u8 * data)
-{
- http_server_main_t *hsm = &http_server_main;
- vnet_disconnect_args_t _a = { 0 }, *a = &_a;
- vlib_main_t *vm = vlib_get_main ();
- f64 last_sent_timer = vlib_time_now (vm);
- u32 offset, bytes_to_send;
- f64 delay = 10e-3;
-
- bytes_to_send = vec_len (data);
- offset = 0;
-
- while (bytes_to_send > 0)
- {
- int actual_transfer;
-
- actual_transfer = svm_fifo_enqueue
- (hs->tx_fifo, bytes_to_send, data + offset);
-
- /* Made any progress? */
- if (actual_transfer <= 0)
- {
- http_server_sessions_reader_unlock ();
- vlib_process_suspend (vm, delay);
- http_server_sessions_reader_lock ();
-
- /* 10s deadman timer */
- if (vlib_time_now (vm) > last_sent_timer + 10.0)
- {
- a->handle = hs->vpp_session_handle;
- a->app_index = hsm->app_index;
- vnet_disconnect_session (a);
- break;
- }
- /* Exponential backoff, within reason */
- if (delay < 1.0)
- delay = delay * 2.0;
- }
- else
- {
- last_sent_timer = vlib_time_now (vm);
- offset += actual_transfer;
- bytes_to_send -= actual_transfer;
-
- if (svm_fifo_set_event (hs->tx_fifo))
- session_send_io_evt_to_thread (hs->tx_fifo,
- SESSION_IO_EVT_TX_FLUSH);
- delay = 10e-3;
- }
- }
-}
-
-static void
-send_error (http_session_t * hs, char *str)
-{
- u8 *data;
-
- data = format (0, http_error_template, str);
- send_data (hs, data);
- vec_free (data);
-}
-
-static uword
-http_cli_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
- vlib_frame_t * f)
-{
- u8 *request = 0, *reply = 0, *http = 0, *html = 0;
- http_server_main_t *hsm = &http_server_main;
- http_server_args **save_args;
- http_server_args *args;
- unformat_input_t input;
- http_session_t *hs;
- int i;
-
- save_args = vlib_node_get_runtime_data (hsm->vlib_main, rt->node_index);
- args = *save_args;
-
- http_server_sessions_reader_lock ();
-
- hs = http_server_session_get (args->thread_index, args->hs_index);
- ASSERT (hs);
-
- request = hs->rx_buf;
- if (vec_len (request) < 7)
- {
- send_error (hs, "400 Bad Request");
- goto out;
- }
-
- for (i = 0; i < vec_len (request) - 4; i++)
- {
- if (request[i] == 'G' &&
- request[i + 1] == 'E' &&
- request[i + 2] == 'T' && request[i + 3] == ' ')
- goto found;
- }
-bad_request:
- send_error (hs, "400 Bad Request");
- goto out;
-
-found:
- /* Lose "GET " */
- vec_delete (request, i + 5, 0);
-
- /* Replace slashes with spaces, stop at the end of the path */
- i = 0;
- while (1)
- {
- if (request[i] == '/')
- request[i] = ' ';
- else if (request[i] == ' ')
- {
- /* vlib_cli_input is vector-based, no need for a NULL */
- _vec_len (request) = i;
- break;
- }
- i++;
- /* Should never happen */
- if (i == vec_len (request))
- goto bad_request;
- }
-
- /* Generate the html header */
- html = format (0, html_header_template, request /* title */ );
-
- /* Run the command */
- unformat_init_vector (&input, vec_dup (request));
- vlib_cli_input (vm, &input, http_cli_output, (uword) & reply);
- unformat_free (&input);
- request = 0;
-
- /* Generate the html page */
- html = format (html, "%v", reply);
- html = format (html, html_footer);
- /* And the http reply */
- http = format (0, http_ok);
- http = format (http, http_response, vec_len (html), html);
-
- /* Send it */
- send_data (hs, http);
-
-out:
- /* Cleanup */
- http_server_sessions_reader_unlock ();
- vec_free (reply);
- vec_free (html);
- vec_free (http);
-
- http_process_free (args);
- return (0);
-}
-
-static void
-alloc_http_process (http_server_args * args)
-{
- char *name;
- vlib_node_t *n;
- http_server_main_t *hsm = &http_server_main;
- vlib_main_t *vm = hsm->vlib_main;
- uword l = vec_len (hsm->free_http_cli_process_node_indices);
- http_server_args **save_args;
-
- if (vec_len (hsm->free_http_cli_process_node_indices) > 0)
- {
- n = vlib_get_node (vm, hsm->free_http_cli_process_node_indices[l - 1]);
- vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING);
- _vec_len (hsm->free_http_cli_process_node_indices) = l - 1;
- }
- else
- {
- static vlib_node_registration_t r = {
- .function = http_cli_process,
- .type = VLIB_NODE_TYPE_PROCESS,
- .process_log2_n_stack_bytes = 16,
- .runtime_data_bytes = sizeof (void *),
- };
-
- name = (char *) format (0, "http-cli-%d", l);
- r.name = name;
- vlib_register_node (vm, &r);
- vec_free (name);
-
- n = vlib_get_node (vm, r.index);
- }
-
- /* Save the node index in the args. It won't be zero. */
- args->node_index = n->index;
-
- /* Save the args (pointer) in the node runtime */
- save_args = vlib_node_get_runtime_data (vm, n->index);
- *save_args = clib_mem_alloc (sizeof (*args));
- clib_memcpy_fast (*save_args, args, sizeof (*args));
-
- vlib_start_process (vm, n->runtime_index);
-}
-
-static void
-alloc_http_process_callback (void *cb_args)
-{
- alloc_http_process ((http_server_args *) cb_args);
-}
-
-static int
-session_rx_request (http_session_t * hs)
-{
- u32 max_dequeue, cursize;
- int n_read;
-
- cursize = vec_len (hs->rx_buf);
- max_dequeue = svm_fifo_max_dequeue_cons (hs->rx_fifo);
- if (PREDICT_FALSE (max_dequeue == 0))
- return -1;
-
- vec_validate (hs->rx_buf, cursize + max_dequeue - 1);
- n_read = app_recv_stream_raw (hs->rx_fifo, hs->rx_buf + cursize,
- max_dequeue, 0, 0 /* peek */ );
- ASSERT (n_read == max_dequeue);
- if (svm_fifo_is_empty_cons (hs->rx_fifo))
- svm_fifo_unset_event (hs->rx_fifo);
-
- _vec_len (hs->rx_buf) = cursize + n_read;
- return 0;
-}
-
-static int
-http_server_rx_callback (session_t * s)
-{
- http_server_args args;
- http_session_t *hs;
- int rv;
-
- http_server_sessions_reader_lock ();
-
- hs = http_server_session_lookup (s->thread_index, s->session_index);
- if (!hs || hs->session_state != HTTP_STATE_ESTABLISHED)
- return -1;
-
- rv = session_rx_request (hs);
- if (rv)
- return rv;
-
- /* send the command to a new/recycled vlib process */
- args.hs_index = hs->session_index;
- args.thread_index = hs->thread_index;
-
- http_server_sessions_reader_unlock ();
-
- /* Send RPC request to main thread */
- if (vlib_get_thread_index () != 0)
- vlib_rpc_call_main_thread (alloc_http_process_callback, (u8 *) & args,
- sizeof (args));
- else
- alloc_http_process (&args);
- return 0;
-}
-
-static int
-http_server_rx_callback_static (session_t * s)
-{
- http_session_t *hs;
- u32 request_len;
- u8 *request = 0;
- int i, rv;
-
- hs = http_server_session_lookup (s->thread_index, s->session_index);
- if (!hs || hs->session_state == HTTP_STATE_CLOSED)
- return 0;
-
- /* ok 200 was sent */
- if (hs->session_state == HTTP_STATE_OK_SENT)
- goto send_data;
-
- rv = session_rx_request (hs);
- if (rv)
- goto wait_for_data;
-
- request = hs->rx_buf;
- request_len = vec_len (request);
- if (vec_len (request) < 7)
- {
- send_error (hs, "400 Bad Request");
- goto close_session;
- }
-
- for (i = 0; i < request_len - 4; i++)
- {
- if (request[i] == 'G' &&
- request[i + 1] == 'E' &&
- request[i + 2] == 'T' && request[i + 3] == ' ')
- goto find_end;
- }
- send_error (hs, "400 Bad Request");
- goto close_session;
-
-find_end:
-
- /* check for the end sequence: /r/n/r/n */
- if (request[request_len - 1] != 0xa || request[request_len - 3] != 0xa
- || request[request_len - 2] != 0xd || request[request_len - 4] != 0xd)
- goto wait_for_data;
-
- /* send 200 OK first */
- send_data (hs, static_ok);
- hs->session_state = HTTP_STATE_OK_SENT;
- goto postpone;
-
-send_data:
- send_data (hs, static_http);
-
-close_session:
- http_server_session_disconnect (hs);
- return 0;
-
-postpone:
- (void) svm_fifo_set_event (hs->rx_fifo);
- session_send_io_evt_to_thread (hs->rx_fifo, SESSION_IO_EVT_BUILTIN_RX);
- return 0;
-
-wait_for_data:
- return 0;
-}
-
-static int
-http_server_session_accept_callback (session_t * s)
-{
- http_server_main_t *hsm = &http_server_main;
- http_session_t *hs;
-
- hsm->vpp_queue[s->thread_index] =
- session_main_get_vpp_event_queue (s->thread_index);
-
- if (!hsm->is_static)
- http_server_sessions_writer_lock ();
-
- hs = http_server_session_alloc (s->thread_index);
- http_server_session_lookup_add (s->thread_index, s->session_index,
- hs->session_index);
- hs->rx_fifo = s->rx_fifo;
- hs->tx_fifo = s->tx_fifo;
- hs->vpp_session_index = s->session_index;
- hs->vpp_session_handle = session_handle (s);
- hs->session_state = HTTP_STATE_ESTABLISHED;
- http_server_session_timer_start (hs);
-
- if (!hsm->is_static)
- http_server_sessions_writer_unlock ();
-
- s->session_state = SESSION_STATE_READY;
- return 0;
-}
-
-static void
-http_server_session_disconnect_callback (session_t * s)
-{
- http_server_main_t *hsm = &http_server_main;
- vnet_disconnect_args_t _a = { 0 }, *a = &_a;
-
- a->handle = session_handle (s);
- a->app_index = hsm->app_index;
- vnet_disconnect_session (a);
-}
-
-static void
-http_server_session_reset_callback (session_t * s)
-{
- http_server_main_t *hsm = &http_server_main;
- vnet_disconnect_args_t _a = { 0 }, *a = &_a;
-
- a->handle = session_handle (s);
- a->app_index = hsm->app_index;
- vnet_disconnect_session (a);
-}
-
-static int
-http_server_session_connected_callback (u32 app_index, u32 api_context,
- session_t * s, session_error_t err)
-{
- clib_warning ("called...");
- return -1;
-}
-
-static int
-http_server_add_segment_callback (u32 client_index, u64 segment_handle)
-{
- return 0;
-}
-
-static void
-http_server_cleanup_callback (session_t * s, session_cleanup_ntf_t ntf)
-{
- http_server_main_t *hsm = &http_server_main;
- http_session_t *hs;
-
- if (ntf == SESSION_CLEANUP_TRANSPORT)
- return;
-
- if (!hsm->is_static)
- http_server_sessions_writer_lock ();
-
- hs = http_server_session_lookup (s->thread_index, s->session_index);
- if (!hs)
- goto done;
-
- http_server_session_lookup_del (hs->thread_index, hs->vpp_session_index);
- vec_free (hs->rx_buf);
- http_server_session_timer_stop (hs);
- http_server_session_free (hs);
-
-done:
-
- if (!hsm->is_static)
- http_server_sessions_writer_unlock ();
-}
-
-static session_cb_vft_t http_server_session_cb_vft = {
- .session_accept_callback = http_server_session_accept_callback,
- .session_disconnect_callback = http_server_session_disconnect_callback,
- .session_connected_callback = http_server_session_connected_callback,
- .add_segment_callback = http_server_add_segment_callback,
- .builtin_app_rx_callback = http_server_rx_callback,
- .session_reset_callback = http_server_session_reset_callback,
- .session_cleanup_callback = http_server_cleanup_callback,
-};
-
-static int
-http_server_attach ()
-{
- vnet_app_add_cert_key_pair_args_t _ck_pair, *ck_pair = &_ck_pair;
- http_server_main_t *hsm = &http_server_main;
- u64 options[APP_OPTIONS_N_OPTIONS];
- vnet_app_attach_args_t _a, *a = &_a;
- u32 segment_size = 128 << 20;
-
- clib_memset (a, 0, sizeof (*a));
- clib_memset (options, 0, sizeof (options));
-
- if (hsm->private_segment_size)
- segment_size = hsm->private_segment_size;
-
- a->api_client_index = ~0;
- a->name = format (0, "test_http_server");
- a->session_cb_vft = &http_server_session_cb_vft;
- a->options = options;
- a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size;
- a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = segment_size;
- a->options[APP_OPTIONS_RX_FIFO_SIZE] =
- hsm->fifo_size ? hsm->fifo_size : 8 << 10;
- a->options[APP_OPTIONS_TX_FIFO_SIZE] =
- hsm->fifo_size ? hsm->fifo_size : 32 << 10;
- a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
- a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = hsm->prealloc_fifos;
-
- if (vnet_application_attach (a))
- {
- vec_free (a->name);
- clib_warning ("failed to attach server");
- return -1;
- }
- vec_free (a->name);
- hsm->app_index = a->app_index;
-
- clib_memset (ck_pair, 0, sizeof (*ck_pair));
- ck_pair->cert = (u8 *) test_srv_crt_rsa;
- ck_pair->key = (u8 *) test_srv_key_rsa;
- ck_pair->cert_len = test_srv_crt_rsa_len;
- ck_pair->key_len = test_srv_key_rsa_len;
- vnet_app_add_cert_key_pair (ck_pair);
- hsm->ckpair_index = ck_pair->index;
-
- return 0;
-}
-
-static int
-http_transport_needs_crypto (transport_proto_t proto)
-{
- return proto == TRANSPORT_PROTO_TLS || proto == TRANSPORT_PROTO_DTLS ||
- proto == TRANSPORT_PROTO_QUIC;
-}
-
-static int
-http_server_listen ()
-{
- session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
- http_server_main_t *hsm = &http_server_main;
- vnet_listen_args_t _a, *a = &_a;
- char *uri = "tcp://0.0.0.0/80";
- int rv;
-
- clib_memset (a, 0, sizeof (*a));
- a->app_index = hsm->app_index;
-
- if (hsm->uri)
- uri = (char *) hsm->uri;
-
- if (parse_uri (uri, &sep))
- return -1;
-
- clib_memcpy (&a->sep_ext, &sep, sizeof (sep));
- if (http_transport_needs_crypto (a->sep_ext.transport_proto))
- {
- session_endpoint_alloc_ext_cfg (&a->sep_ext,
- TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
- a->sep_ext.ext_cfg->crypto.ckpair_index = hsm->ckpair_index;
- }
-
- rv = vnet_listen (a);
- if (a->sep_ext.ext_cfg)
- clib_mem_free (a->sep_ext.ext_cfg);
- return rv;
-}
-
-static void
-http_server_session_close_cb (void *hs_handlep)
-{
- http_session_t *hs;
- uword hs_handle;
- hs_handle = pointer_to_uword (hs_handlep);
- hs = http_server_session_get (hs_handle >> 24, hs_handle & 0x00FFFFFF);
- if (!hs)
- return;
- hs->timer_handle = ~0;
- http_server_session_disconnect (hs);
-}
-
-static void
-http_expired_timers_dispatch (u32 * expired_timers)
-{
- u32 hs_handle;
- int i;
-
- for (i = 0; i < vec_len (expired_timers); i++)
- {
- /* Get session handle. The first bit is the timer id */
- hs_handle = expired_timers[i] & 0x7FFFFFFF;
- session_send_rpc_evt_to_thread (hs_handle >> 24,
- http_server_session_close_cb,
- uword_to_pointer (hs_handle, void *));
- }
-}
-
-static uword
-http_server_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
- vlib_frame_t * f)
-{
- http_server_main_t *hsm = &http_server_main;
- f64 now, timeout = 1.0;
- uword *event_data = 0;
- uword __clib_unused event_type;
-
- while (1)
- {
- vlib_process_wait_for_event_or_clock (vm, timeout);
- now = vlib_time_now (vm);
- event_type = vlib_process_get_events (vm, (uword **) & event_data);
-
- /* expire timers */
- clib_spinlock_lock (&http_server_main.tw_lock);
- tw_timer_expire_timers_2t_1w_2048sl (&hsm->tw, now);
- clib_spinlock_unlock (&http_server_main.tw_lock);
-
- vec_reset_length (event_data);
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (http_server_process_node) =
-{
- .function = http_server_process,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "http-server-process",
- .state = VLIB_NODE_STATE_DISABLED,
-};
-/* *INDENT-ON* */
-
-static int
-http_server_create (vlib_main_t * vm)
-{
- vlib_thread_main_t *vtm = vlib_get_thread_main ();
- http_server_main_t *hsm = &http_server_main;
- u32 num_threads;
- vlib_node_t *n;
-
- num_threads = 1 /* main thread */ + vtm->n_threads;
- vec_validate (hsm->vpp_queue, num_threads - 1);
- vec_validate (hsm->sessions, num_threads - 1);
- vec_validate (hsm->session_to_http_session, num_threads - 1);
-
- clib_rwlock_init (&hsm->sessions_lock);
- clib_spinlock_init (&hsm->tw_lock);
-
- if (http_server_attach ())
- {
- clib_warning ("failed to attach server");
- return -1;
- }
- if (http_server_listen ())
- {
- clib_warning ("failed to start listening");
- return -1;
- }
-
- /* Init timer wheel and process */
- tw_timer_wheel_init_2t_1w_2048sl (&hsm->tw, http_expired_timers_dispatch,
- 1 /* timer interval */ , ~0);
- vlib_node_set_state (vm, http_server_process_node.index,
- VLIB_NODE_STATE_POLLING);
- n = vlib_get_node (vm, http_server_process_node.index);
- vlib_start_process (vm, n->runtime_index);
-
- return 0;
-}
-
-static clib_error_t *
-http_server_create_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- http_server_main_t *hsm = &http_server_main;
- unformat_input_t _line_input, *line_input = &_line_input;
- u64 seg_size;
- u8 *html;
- int rv;
-
- hsm->prealloc_fifos = 0;
- hsm->private_segment_size = 0;
- hsm->fifo_size = 0;
- hsm->is_static = 0;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- goto start_server;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "static"))
- hsm->is_static = 1;
- else
- if (unformat (line_input, "prealloc-fifos %d", &hsm->prealloc_fifos))
- ;
- else if (unformat (line_input, "private-segment-size %U",
- unformat_memory_size, &seg_size))
- {
- if (seg_size >= 0x100000000ULL)
- {
- vlib_cli_output (vm, "private segment size %llu, too large",
- seg_size);
- return 0;
- }
- hsm->private_segment_size = seg_size;
- }
- else if (unformat (line_input, "fifo-size %d", &hsm->fifo_size))
- hsm->fifo_size <<= 10;
- else if (unformat (line_input, "uri %s", &hsm->uri))
- ;
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- }
- unformat_free (line_input);
-
-start_server:
-
- if (hsm->my_client_index != (u32) ~ 0)
- return clib_error_return (0, "test http server is already running");
-
- vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
-
- if (hsm->is_static)
- {
- http_server_session_cb_vft.builtin_app_rx_callback =
- http_server_rx_callback_static;
- html = format (0, html_header_static);
- static_http = format (0, http_response, vec_len (html), html);
- static_ok = format (0, http_ok);
- }
- rv = http_server_create (vm);
- switch (rv)
- {
- case 0:
- break;
- default:
- return clib_error_return (0, "server_create returned %d", rv);
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (http_server_create_command, static) =
-{
- .path = "test http server",
- .short_help = "test http server",
- .function = http_server_create_command_fn,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-http_server_main_init (vlib_main_t * vm)
-{
- http_server_main_t *hsm = &http_server_main;
-
- hsm->my_client_index = ~0;
- hsm->vlib_main = vm;
- return 0;
-}
-
-VLIB_INIT_FUNCTION (http_server_main_init);
-
-/*
-* fd.io coding-style-patch-verification: ON
-*
-* Local Variables:
-* eval: (c-set-style "gnu")
-* End:
-*/
diff --git a/src/plugins/hs_apps/http_tps.c b/src/plugins/hs_apps/http_tps.c
new file mode 100644
index 00000000000..920f7ea731f
--- /dev/null
+++ b/src/plugins/hs_apps/http_tps.c
@@ -0,0 +1,839 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/session/application.h>
+#include <vnet/session/application_interface.h>
+#include <vnet/session/session.h>
+#include <http/http.h>
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 session_index;
+ u32 thread_index;
+ u64 data_len;
+ u64 data_offset;
+ u32 vpp_session_index;
+ union
+ {
+ /** threshold after which connection is closed */
+ f64 close_threshold;
+ /** rate at which accepted sessions are marked for random close */
+ u32 close_rate;
+ };
+ u8 *uri;
+} hts_session_t;
+
+typedef struct hts_listen_cfg_
+{
+ u8 *uri;
+ u32 vrf;
+ f64 rnd_close;
+ u8 is_del;
+} hts_listen_cfg_t;
+
+typedef struct hs_main_
+{
+ hts_session_t **sessions;
+ u32 app_index;
+
+ u32 ckpair_index;
+ u8 *test_data;
+
+ /** Hash table of listener uris to handles */
+ uword *uri_to_handle;
+
+ /*
+ * Configs
+ */
+ u8 *uri;
+ u32 fifo_size;
+ u64 segment_size;
+ u8 debug_level;
+ u8 no_zc;
+ u8 *default_uri;
+ u32 seed;
+} hts_main_t;
+
+static hts_main_t hts_main;
+
+static hts_session_t *
+hts_session_alloc (u32 thread_index)
+{
+ hts_main_t *htm = &hts_main;
+ hts_session_t *hs;
+
+ pool_get_zero (htm->sessions[thread_index], hs);
+ hs->session_index = hs - htm->sessions[thread_index];
+ hs->thread_index = thread_index;
+
+ return hs;
+}
+
+static hts_session_t *
+hts_session_get (u32 thread_index, u32 hts_index)
+{
+ hts_main_t *htm = &hts_main;
+
+ if (pool_is_free_index (htm->sessions[thread_index], hts_index))
+ return 0;
+
+ return pool_elt_at_index (htm->sessions[thread_index], hts_index);
+}
+
+static void
+hts_session_free (hts_session_t *hs)
+{
+ hts_main_t *htm = &hts_main;
+ u32 thread = hs->thread_index;
+
+ if (htm->debug_level > 0)
+ clib_warning ("Freeing session %u", hs->session_index);
+
+ if (CLIB_DEBUG)
+ clib_memset (hs, 0xfa, sizeof (*hs));
+
+ pool_put (htm->sessions[thread], hs);
+}
+
+static void
+hts_disconnect_transport (hts_session_t *hs)
+{
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+ hts_main_t *htm = &hts_main;
+ session_t *ts;
+
+ if (htm->debug_level > 0)
+ clib_warning ("Actively closing session %u", hs->session_index);
+
+ ts = session_get (hs->vpp_session_index, hs->thread_index);
+ a->handle = session_handle (ts);
+ a->app_index = htm->app_index;
+ vnet_disconnect_session (a);
+}
+
+static void
+hts_session_tx_zc (hts_session_t *hs, session_t *ts)
+{
+ u32 to_send, space;
+ u64 max_send;
+ int rv;
+
+ rv = svm_fifo_fill_chunk_list (ts->tx_fifo);
+ if (rv < 0)
+ {
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ return;
+ }
+
+ max_send = hs->data_len - hs->data_offset;
+ space = svm_fifo_max_enqueue (ts->tx_fifo);
+ ASSERT (space != 0);
+ to_send = clib_min (space, max_send);
+
+ svm_fifo_enqueue_nocopy (ts->tx_fifo, to_send);
+
+ hs->data_offset += to_send;
+
+ if (to_send < max_send)
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+}
+
+static void
+hts_session_tx_no_zc (hts_session_t *hs, session_t *ts)
+{
+ u32 n_segs, buf_offset, buf_left;
+ u64 max_send = 32 << 10, left;
+ hts_main_t *htm = &hts_main;
+ svm_fifo_seg_t seg[2];
+ int sent;
+
+ left = hs->data_len - hs->data_offset;
+ max_send = clib_min (left, max_send);
+ buf_offset = hs->data_offset % vec_len (htm->test_data);
+ buf_left = vec_len (htm->test_data) - buf_offset;
+
+ if (buf_left < max_send)
+ {
+ seg[0].data = htm->test_data + buf_offset;
+ seg[0].len = buf_left;
+ seg[1].data = htm->test_data;
+ seg[1].len = max_send - buf_left;
+ n_segs = 2;
+ }
+ else
+ {
+ seg[0].data = htm->test_data + buf_offset;
+ seg[0].len = max_send;
+ n_segs = 1;
+ }
+
+ sent = svm_fifo_enqueue_segments (ts->tx_fifo, seg, n_segs,
+ 1 /* allow partial */);
+
+ if (sent <= 0)
+ {
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ return;
+ }
+
+ hs->data_offset += sent;
+
+ if (sent < left)
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+}
+
+static inline void
+hts_session_tx (hts_session_t *hs, session_t *ts)
+{
+ hts_main_t *htm = &hts_main;
+
+ if (!htm->no_zc)
+ hts_session_tx_zc (hs, ts);
+ else
+ hts_session_tx_no_zc (hs, ts);
+
+ if (hs->close_threshold > 0)
+ {
+ if ((f64) hs->data_offset / hs->data_len > hs->close_threshold)
+ hts_disconnect_transport (hs);
+ }
+}
+
+static void
+hts_start_send_data (hts_session_t *hs, http_status_code_t status)
+{
+ http_msg_t msg;
+ session_t *ts;
+ int rv;
+
+ msg.type = HTTP_MSG_REPLY;
+ msg.code = status;
+ msg.content_type = HTTP_CONTENT_APP_OCTET_STREAM;
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.len = hs->data_len;
+
+ ts = session_get (hs->vpp_session_index, hs->thread_index);
+ rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (!msg.data.len)
+ {
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+ return;
+ }
+
+ hts_session_tx (hs, ts);
+}
+
+static int
+try_test_file (hts_session_t *hs, u8 *request)
+{
+ char *test_str = "test_file";
+ hts_main_t *htm = &hts_main;
+ unformat_input_t input;
+ uword file_size;
+ int rc = 0;
+
+ if (memcmp (request, test_str, clib_strnlen (test_str, 9)))
+ return -1;
+
+ unformat_init_vector (&input, vec_dup (request));
+ if (!unformat (&input, "test_file_%U", unformat_memory_size, &file_size))
+ {
+ rc = -1;
+ goto done;
+ }
+
+ if (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ {
+ rc = -1;
+ goto done;
+ }
+
+ if (htm->debug_level)
+ clib_warning ("Requested file size %U", format_memory_size, file_size);
+
+ hs->data_len = file_size;
+ hs->data_offset = 0;
+
+ if (hs->close_threshold > 0)
+ {
+ /* Disconnect if the header is already enough to fill the quota */
+ if ((f64) 30 / hs->data_len > hs->close_threshold)
+ {
+ hts_disconnect_transport (hs);
+ goto done;
+ }
+ }
+
+ hts_start_send_data (hs, HTTP_STATUS_OK);
+
+done:
+ unformat_free (&input);
+
+ return rc;
+}
+
+static int
+hts_ts_rx_callback (session_t *ts)
+{
+ hts_session_t *hs;
+ u8 *request = 0;
+ http_msg_t msg;
+ int rv;
+
+ hs = hts_session_get (ts->thread_index, ts->opaque);
+
+ /* Read the http message header */
+ rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (msg.type != HTTP_MSG_REQUEST || msg.method_type != HTTP_REQ_GET)
+ {
+ hts_start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED);
+ goto done;
+ }
+
+ if (!msg.data.len)
+ {
+ hts_start_send_data (hs, HTTP_STATUS_BAD_REQUEST);
+ goto done;
+ }
+
+ vec_validate (request, msg.data.len - 1);
+ rv = svm_fifo_dequeue (ts->rx_fifo, msg.data.len, request);
+
+ if (try_test_file (hs, request))
+ hts_start_send_data (hs, HTTP_STATUS_NOT_FOUND);
+
+done:
+
+ return 0;
+}
+
+static int
+hs_ts_tx_callback (session_t *ts)
+{
+ hts_session_t *hs;
+
+ hs = hts_session_get (ts->thread_index, ts->opaque);
+ if (!hs)
+ return 0;
+
+ hts_session_tx (hs, ts);
+
+ return 0;
+}
+
+static int
+hts_ts_accept_callback (session_t *ts)
+{
+ hts_main_t *htm = &hts_main;
+ hts_session_t *hs, *lhs;
+ session_t *ls;
+
+ hs = hts_session_alloc (ts->thread_index);
+ hs->vpp_session_index = ts->session_index;
+
+ ts->opaque = hs->session_index;
+ ts->session_state = SESSION_STATE_READY;
+
+ /* Check if listener configured for random closes */
+ ls = listen_session_get_from_handle (ts->listener_handle);
+ lhs = hts_session_get (0, ls->opaque);
+
+ if (lhs->close_rate)
+ {
+ /* overload listener's data_offset as session counter */
+ u32 cnt = __atomic_add_fetch (&lhs->data_offset, 1, __ATOMIC_RELEASE);
+ if ((cnt % lhs->close_rate) == 0)
+ hs->close_threshold = random_f64 (&htm->seed);
+ }
+
+ if (htm->debug_level > 0)
+ clib_warning ("Accepted session %u close threshold %.2f", ts->opaque,
+ hs->close_threshold);
+
+ return 0;
+}
+
+static int
+hts_ts_connected_callback (u32 app_index, u32 api_context, session_t *s,
+ session_error_t err)
+{
+ clib_warning ("called...");
+ return -1;
+}
+
+static void
+hts_ts_disconnect_callback (session_t *ts)
+{
+ hts_main_t *htm = &hts_main;
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+
+ if (htm->debug_level > 0)
+ clib_warning ("Transport closing session %u", ts->opaque);
+
+ a->handle = session_handle (ts);
+ a->app_index = htm->app_index;
+ vnet_disconnect_session (a);
+}
+
+static void
+hts_ts_reset_callback (session_t *ts)
+{
+ hts_main_t *htm = &hts_main;
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+
+ if (htm->debug_level > 0)
+ clib_warning ("Transport reset session %u", ts->opaque);
+
+ a->handle = session_handle (ts);
+ a->app_index = htm->app_index;
+ vnet_disconnect_session (a);
+}
+
+static void
+hts_ts_cleanup_callback (session_t *s, session_cleanup_ntf_t ntf)
+{
+ hts_session_t *hs;
+
+ if (ntf == SESSION_CLEANUP_TRANSPORT)
+ return;
+
+ hs = hts_session_get (s->thread_index, s->opaque);
+ if (!hs)
+ return;
+
+ hts_session_free (hs);
+}
+
+static int
+hts_add_segment_callback (u32 client_index, u64 segment_handle)
+{
+ return 0;
+}
+
+static int
+hts_del_segment_callback (u32 client_index, u64 segment_handle)
+{
+ return 0;
+}
+
+static session_cb_vft_t hs_session_cb_vft = {
+ .session_accept_callback = hts_ts_accept_callback,
+ .session_disconnect_callback = hts_ts_disconnect_callback,
+ .session_connected_callback = hts_ts_connected_callback,
+ .add_segment_callback = hts_add_segment_callback,
+ .del_segment_callback = hts_del_segment_callback,
+ .builtin_app_rx_callback = hts_ts_rx_callback,
+ .builtin_app_tx_callback = hs_ts_tx_callback,
+ .session_reset_callback = hts_ts_reset_callback,
+ .session_cleanup_callback = hts_ts_cleanup_callback,
+};
+
+static int
+hts_attach (hts_main_t *hm)
+{
+ vnet_app_add_cert_key_pair_args_t _ck_pair, *ck_pair = &_ck_pair;
+ u64 options[APP_OPTIONS_N_OPTIONS];
+ vnet_app_attach_args_t _a, *a = &_a;
+
+ clib_memset (a, 0, sizeof (*a));
+ clib_memset (options, 0, sizeof (options));
+
+ a->api_client_index = ~0;
+ a->name = format (0, "http_tps");
+ a->session_cb_vft = &hs_session_cb_vft;
+ a->options = options;
+ a->options[APP_OPTIONS_SEGMENT_SIZE] = hm->segment_size;
+ a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = hm->segment_size;
+ a->options[APP_OPTIONS_RX_FIFO_SIZE] = hm->fifo_size;
+ a->options[APP_OPTIONS_TX_FIFO_SIZE] = hm->fifo_size;
+ a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
+
+ if (vnet_application_attach (a))
+ {
+ vec_free (a->name);
+ clib_warning ("failed to attach server");
+ return -1;
+ }
+ vec_free (a->name);
+ hm->app_index = a->app_index;
+
+ clib_memset (ck_pair, 0, sizeof (*ck_pair));
+ ck_pair->cert = (u8 *) test_srv_crt_rsa;
+ ck_pair->key = (u8 *) test_srv_key_rsa;
+ ck_pair->cert_len = test_srv_crt_rsa_len;
+ ck_pair->key_len = test_srv_key_rsa_len;
+ vnet_app_add_cert_key_pair (ck_pair);
+ hm->ckpair_index = ck_pair->index;
+
+ return 0;
+}
+
+static int
+hts_transport_needs_crypto (transport_proto_t proto)
+{
+ return proto == TRANSPORT_PROTO_TLS || proto == TRANSPORT_PROTO_DTLS ||
+ proto == TRANSPORT_PROTO_QUIC;
+}
+
+static int
+hts_start_listen (hts_main_t *htm, session_endpoint_cfg_t *sep, u8 *uri,
+ f64 rnd_close)
+{
+ vnet_listen_args_t _a, *a = &_a;
+ u8 need_crypto;
+ hts_session_t *hls;
+ session_t *ls;
+ u32 thread_index = 0;
+ int rv;
+
+ clib_memset (a, 0, sizeof (*a));
+ a->app_index = htm->app_index;
+
+ need_crypto = hts_transport_needs_crypto (sep->transport_proto);
+
+ sep->transport_proto = TRANSPORT_PROTO_HTTP;
+ clib_memcpy (&a->sep_ext, sep, sizeof (*sep));
+
+ if (need_crypto)
+ {
+ session_endpoint_alloc_ext_cfg (&a->sep_ext,
+ TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
+ a->sep_ext.ext_cfg->crypto.ckpair_index = htm->ckpair_index;
+ }
+
+ rv = vnet_listen (a);
+
+ if (need_crypto)
+ clib_mem_free (a->sep_ext.ext_cfg);
+
+ if (rv)
+ return rv;
+
+ hls = hts_session_alloc (thread_index);
+ hls->uri = vec_dup (uri);
+ hls->close_rate = (f64) 1 / rnd_close;
+ ls = listen_session_get_from_handle (a->handle);
+ hls->vpp_session_index = ls->session_index;
+ hash_set_mem (htm->uri_to_handle, hls->uri, hls->session_index);
+
+ /* opaque holds index of hls, which is used in `hts_ts_accept_callback`
+ * to get back the pointer to hls */
+ ls->opaque = hls - htm->sessions[thread_index];
+
+ return 0;
+}
+
+static int
+hts_stop_listen (hts_main_t *htm, u32 hls_index)
+{
+ hts_session_t *hls;
+ session_t *ls;
+
+ hls = hts_session_get (0, hls_index);
+ ls = listen_session_get (hls->vpp_session_index);
+
+ vnet_unlisten_args_t ua = {
+ .handle = listen_session_get_handle (ls),
+ .app_index = htm->app_index,
+ .wrk_map_index = 0 /* default wrk */
+ };
+
+ hash_unset_mem (htm->uri_to_handle, hls->uri);
+
+ if (vnet_unlisten (&ua))
+ return -1;
+
+ vec_free (hls->uri);
+ hts_session_free (hls);
+
+ return 0;
+}
+
+static clib_error_t *
+hts_listen (hts_main_t *htm, hts_listen_cfg_t *lcfg)
+{
+ session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
+ clib_error_t *error = 0;
+ u8 *uri, *uri_key;
+ uword *p;
+ int rv;
+
+ uri = lcfg->uri ? lcfg->uri : htm->default_uri;
+ uri_key = format (0, "vrf%u-%s", lcfg->vrf, uri);
+ p = hash_get_mem (htm->uri_to_handle, uri_key);
+
+ if (lcfg->is_del)
+ {
+ if (!p)
+ error = clib_error_return (0, "not listening on %v", uri);
+ else if (hts_stop_listen (htm, p[0]))
+ error = clib_error_return (0, "failed to unlisten");
+ goto done;
+ }
+
+ if (p)
+ {
+ error = clib_error_return (0, "already listening %v", uri);
+ goto done;
+ }
+
+ if (parse_uri ((char *) uri, &sep))
+ {
+ error = clib_error_return (0, "failed to parse uri %v", uri);
+ goto done;
+ }
+
+ if (lcfg->vrf)
+ {
+ fib_protocol_t fp;
+ u32 fib_index;
+
+ fp = sep.is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
+ fib_index = fib_table_find (fp, lcfg->vrf);
+ if (fib_index == ~0)
+ {
+ error = clib_error_return (0, "no such vrf %u", lcfg->vrf);
+ goto done;
+ }
+ sep.fib_index = fib_index;
+ }
+
+ if ((rv = hts_start_listen (htm, &sep, uri_key, lcfg->rnd_close)))
+ {
+ error = clib_error_return (0, "failed to listen on %v: %U", uri,
+ format_session_error, rv);
+ }
+
+done:
+
+ vec_free (uri_key);
+ return error;
+}
+
+static int
+hts_create (vlib_main_t *vm)
+{
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ hts_main_t *htm = &hts_main;
+ u32 num_threads;
+
+ num_threads = 1 /* main thread */ + vtm->n_threads;
+ vec_validate (htm->sessions, num_threads - 1);
+
+ if (htm->no_zc)
+ vec_validate (htm->test_data, (64 << 10) - 1);
+
+ if (hts_attach (htm))
+ {
+ clib_warning ("failed to attach server");
+ return -1;
+ }
+
+ htm->default_uri = format (0, "tcp://0.0.0.0/80%c", 0);
+ htm->uri_to_handle = hash_create_vec (0, sizeof (u8), sizeof (uword));
+
+ return 0;
+}
+
+static clib_error_t *
+hts_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ hts_main_t *htm = &hts_main;
+ hts_listen_cfg_t lcfg = {};
+ clib_error_t *error = 0;
+ u64 mem_size;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ goto start_server;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "private-segment-size %U",
+ unformat_memory_size, &mem_size))
+ htm->segment_size = mem_size;
+ else if (unformat (line_input, "fifo-size %U", unformat_memory_size,
+ &mem_size))
+ htm->fifo_size = mem_size;
+ else if (unformat (line_input, "no-zc"))
+ htm->no_zc = 1;
+ else if (unformat (line_input, "debug"))
+ htm->debug_level = 1;
+ else if (unformat (line_input, "vrf %u", &lcfg.vrf))
+ ;
+ else if (unformat (line_input, "uri %s", &lcfg.uri))
+ ;
+ else if (unformat (line_input, "rnd-close %f", &lcfg.rnd_close))
+ {
+ if (lcfg.rnd_close > 1.0)
+ {
+ error = clib_error_return (0, "invalid rnd close value %f",
+ lcfg.rnd_close);
+ break;
+ }
+ }
+ else if (unformat (line_input, "del"))
+ lcfg.is_del = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
+ }
+
+ unformat_free (line_input);
+
+ if (error)
+ goto done;
+
+start_server:
+
+ if (htm->app_index == (u32) ~0)
+ {
+ vnet_session_enable_disable (vm, 1 /* is_enable */);
+
+ if (hts_create (vm))
+ {
+ error = clib_error_return (0, "http tps create failed");
+ goto done;
+ }
+ }
+
+ error = hts_listen (htm, &lcfg);
+
+done:
+
+ vec_free (lcfg.uri);
+ return error;
+}
+
+VLIB_CLI_COMMAND (http_tps_command, static) = {
+ .path = "http tps",
+ .short_help = "http tps [uri <uri>] [fifo-size <nbytes>] "
+ "[segment-size <nMG>] [prealloc-fifos <n>] [debug] [no-zc] "
+ "[del]",
+ .function = hts_create_command_fn,
+};
+
+static clib_error_t *
+hts_show_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ hts_main_t *htm = &hts_main;
+ clib_error_t *error = 0;
+ u8 do_listeners = 0;
+ hts_session_t **sessions;
+ u32 n_listeners = 0, n_sessions = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ goto no_input;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "listeners"))
+ do_listeners = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
+ }
+
+ if (error)
+ return error;
+
+no_input:
+
+ if (htm->app_index == ~0)
+ {
+ vlib_cli_output (vm, "http tps not enabled");
+ goto done;
+ }
+
+ if (do_listeners)
+ {
+ uword handle;
+ u8 *s = 0, *uri;
+
+ /* clang-format off */
+ hash_foreach (uri, handle, htm->uri_to_handle, ({
+ s = format (s, "%-30v%lx\n", uri, handle);
+ }));
+ /* clang-format on */
+
+ if (s)
+ {
+ vlib_cli_output (vm, "%-29s%s", "URI", "Index");
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ }
+ goto done;
+ }
+
+ n_listeners = hash_elts (htm->uri_to_handle);
+ vec_foreach (sessions, htm->sessions)
+ n_sessions += pool_elts (*sessions);
+
+ vlib_cli_output (vm, " app index: %u\n listeners: %u\n sesions: %u",
+ htm->app_index, n_listeners, n_sessions - n_listeners);
+
+done:
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_http_tps_command, static) = {
+ .path = "show http tps",
+ .short_help = "http tps [listeners]",
+ .function = hts_show_command_fn,
+};
+
+static clib_error_t *
+hs_main_init (vlib_main_t *vm)
+{
+ hts_main_t *htm = &hts_main;
+
+ htm->app_index = ~0;
+ htm->segment_size = 128 << 20;
+ htm->fifo_size = 64 << 10;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (hs_main_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/hs_apps/proxy.c b/src/plugins/hs_apps/proxy.c
index eb0d70277da..e8fedf921a5 100644
--- a/src/plugins/hs_apps/proxy.c
+++ b/src/plugins/hs_apps/proxy.c
@@ -66,27 +66,69 @@ proxy_call_main_thread (vnet_connect_args_t * a)
}
static proxy_session_t *
-proxy_get_active_open (proxy_main_t * pm, session_handle_t handle)
+proxy_session_alloc (void)
{
- proxy_session_t *ps = 0;
- uword *p;
+ proxy_main_t *pm = &proxy_main;
+ proxy_session_t *ps;
+
+ pool_get_zero (pm->sessions, ps);
+ ps->ps_index = ps - pm->sessions;
- p = hash_get (pm->proxy_session_by_active_open_handle, handle);
- if (p)
- ps = pool_elt_at_index (pm->sessions, p[0]);
return ps;
}
-static proxy_session_t *
-proxy_get_passive_open (proxy_main_t * pm, session_handle_t handle)
+static inline proxy_session_t *
+proxy_session_get (u32 ps_index)
+{
+ proxy_main_t *pm = &proxy_main;
+
+ return pool_elt_at_index (pm->sessions, ps_index);
+}
+
+static inline proxy_session_t *
+proxy_session_get_if_valid (u32 ps_index)
+{
+ proxy_main_t *pm = &proxy_main;
+
+ if (pool_is_free_index (pm->sessions, ps_index))
+ return 0;
+ return pool_elt_at_index (pm->sessions, ps_index);
+}
+
+static void
+proxy_session_free (proxy_session_t *ps)
{
+ proxy_main_t *pm = &proxy_main;
+
+ if (CLIB_DEBUG > 0)
+ clib_memset (ps, 0xFE, sizeof (*ps));
+ pool_put (pm->sessions, ps);
+}
+
+static int
+proxy_session_postponed_free_rpc (void *arg)
+{
+ uword ps_index = pointer_to_uword (arg);
+ proxy_main_t *pm = &proxy_main;
proxy_session_t *ps = 0;
- uword *p;
- p = hash_get (pm->proxy_session_by_server_handle, handle);
- if (p)
- ps = pool_elt_at_index (pm->sessions, p[0]);
- return ps;
+ clib_spinlock_lock_if_init (&pm->sessions_lock);
+
+ ps = proxy_session_get (ps_index);
+ segment_manager_dealloc_fifos (ps->server_rx_fifo, ps->server_tx_fifo);
+ proxy_session_free (ps);
+
+ clib_spinlock_unlock_if_init (&pm->sessions_lock);
+
+ return 0;
+}
+
+static void
+proxy_session_postponed_free (proxy_session_t *ps)
+{
+ session_send_rpc_evt_to_thread (ps->po_thread_index,
+ proxy_session_postponed_free_rpc,
+ uword_to_pointer (ps->ps_index, void *));
}
static void
@@ -95,17 +137,13 @@ proxy_try_close_session (session_t * s, int is_active_open)
proxy_main_t *pm = &proxy_main;
proxy_session_t *ps = 0;
vnet_disconnect_args_t _a, *a = &_a;
- session_handle_t handle;
-
- handle = session_handle (s);
clib_spinlock_lock_if_init (&pm->sessions_lock);
+ ps = proxy_session_get (s->opaque);
+
if (is_active_open)
{
- ps = proxy_get_active_open (pm, handle);
- ASSERT (ps != 0);
-
a->handle = ps->vpp_active_open_handle;
a->app_index = pm->active_open_app_index;
vnet_disconnect_session (a);
@@ -122,9 +160,6 @@ proxy_try_close_session (session_t * s, int is_active_open)
}
else
{
- ps = proxy_get_passive_open (pm, handle);
- ASSERT (ps != 0);
-
a->handle = ps->vpp_server_handle;
a->app_index = pm->server_app_index;
vnet_disconnect_session (a);
@@ -146,43 +181,42 @@ proxy_try_close_session (session_t * s, int is_active_open)
}
static void
-proxy_session_free (proxy_session_t * ps)
-{
- proxy_main_t *pm = &proxy_main;
- if (CLIB_DEBUG > 0)
- clib_memset (ps, 0xFE, sizeof (*ps));
- pool_put (pm->sessions, ps);
-}
-
-static void
proxy_try_delete_session (session_t * s, u8 is_active_open)
{
proxy_main_t *pm = &proxy_main;
proxy_session_t *ps = 0;
- session_handle_t handle;
-
- handle = session_handle (s);
clib_spinlock_lock_if_init (&pm->sessions_lock);
+ ps = proxy_session_get (s->opaque);
+
if (is_active_open)
{
- ps = proxy_get_active_open (pm, handle);
- ASSERT (ps != 0);
-
ps->vpp_active_open_handle = SESSION_INVALID_HANDLE;
- hash_unset (pm->proxy_session_by_active_open_handle, handle);
+ /* Revert master thread index change on connect notification */
+ ps->server_rx_fifo->master_thread_index = ps->po_thread_index;
+
+ /* Passive open already cleaned up */
if (ps->vpp_server_handle == SESSION_INVALID_HANDLE)
- proxy_session_free (ps);
+ {
+ ASSERT (s->rx_fifo->refcnt == 1);
+
+ /* The two sides of the proxy on different threads */
+ if (ps->po_thread_index != s->thread_index)
+ {
+ /* This is not the right thread to delete the fifos */
+ s->rx_fifo = 0;
+ s->tx_fifo = 0;
+ proxy_session_postponed_free (ps);
+ }
+ else
+ proxy_session_free (ps);
+ }
}
else
{
- ps = proxy_get_passive_open (pm, handle);
- ASSERT (ps != 0);
-
ps->vpp_server_handle = SESSION_INVALID_HANDLE;
- hash_unset (pm->proxy_session_by_server_handle, handle);
if (ps->vpp_active_open_handle == SESSION_INVALID_HANDLE)
{
@@ -245,12 +279,12 @@ proxy_accept_callback (session_t * s)
clib_spinlock_lock_if_init (&pm->sessions_lock);
- pool_get_zero (pm->sessions, ps);
+ ps = proxy_session_alloc ();
ps->vpp_server_handle = session_handle (s);
ps->vpp_active_open_handle = SESSION_INVALID_HANDLE;
+ ps->po_thread_index = s->thread_index;
- hash_set (pm->proxy_session_by_server_handle, ps->vpp_server_handle,
- ps - pm->sessions);
+ s->opaque = ps->ps_index;
clib_spinlock_unlock_if_init (&pm->sessions_lock);
@@ -303,8 +337,7 @@ proxy_rx_callback (session_t * s)
clib_spinlock_lock_if_init (&pm->sessions_lock);
- ps = proxy_get_passive_open (pm, session_handle (s));
- ASSERT (ps != 0);
+ ps = proxy_session_get (s->opaque);
if (PREDICT_TRUE (ps->vpp_active_open_handle != SESSION_INVALID_HANDLE))
{
@@ -332,7 +365,7 @@ proxy_rx_callback (session_t * s)
{
vnet_connect_args_t _a, *a = &_a;
svm_fifo_t *tx_fifo, *rx_fifo;
- u32 max_dequeue, proxy_index;
+ u32 max_dequeue, ps_index;
int actual_transfer __attribute__ ((unused));
rx_fifo = s->rx_fifo;
@@ -344,7 +377,10 @@ proxy_rx_callback (session_t * s)
max_dequeue = svm_fifo_max_dequeue_cons (s->rx_fifo);
if (PREDICT_FALSE (max_dequeue == 0))
- return 0;
+ {
+ clib_spinlock_unlock_if_init (&pm->sessions_lock);
+ return 0;
+ }
max_dequeue = clib_min (pm->rcv_buffer_size, max_dequeue);
actual_transfer = svm_fifo_peek (rx_fifo, 0 /* relative_offset */ ,
@@ -357,12 +393,12 @@ proxy_rx_callback (session_t * s)
ps->server_rx_fifo = rx_fifo;
ps->server_tx_fifo = tx_fifo;
ps->active_open_establishing = 1;
- proxy_index = ps - pm->sessions;
+ ps_index = ps->ps_index;
clib_spinlock_unlock_if_init (&pm->sessions_lock);
clib_memcpy (&a->sep_ext, &pm->client_sep, sizeof (pm->client_sep));
- a->api_context = proxy_index;
+ a->api_context = ps_index;
a->app_index = pm->active_open_app_index;
if (proxy_transport_needs_crypto (a->sep.transport_proto))
@@ -407,11 +443,10 @@ proxy_tx_callback (session_t * proxy_s)
clib_spinlock_lock_if_init (&pm->sessions_lock);
- ps = proxy_get_passive_open (pm, session_handle (proxy_s));
- ASSERT (ps != 0);
+ ps = proxy_session_get (proxy_s->opaque);
if (ps->vpp_active_open_handle == SESSION_INVALID_HANDLE)
- return 0;
+ goto unlock;
/* Force ack on active open side to update rcv wnd. Make sure it's done on
* the right thread */
@@ -419,6 +454,7 @@ proxy_tx_callback (session_t * proxy_s)
session_send_rpc_evt_to_thread (ps->server_rx_fifo->master_thread_index,
proxy_force_ack, arg);
+unlock:
clib_spinlock_unlock_if_init (&pm->sessions_lock);
return 0;
@@ -442,10 +478,47 @@ static session_cb_vft_t proxy_session_cb_vft = {
.builtin_app_tx_callback = proxy_tx_callback,
.session_reset_callback = proxy_reset_callback,
.session_cleanup_callback = proxy_cleanup_callback,
- .fifo_tuning_callback = common_fifo_tuning_callback
+ .fifo_tuning_callback = common_fifo_tuning_callback,
};
static int
+active_open_alloc_session_fifos (session_t *s)
+{
+ proxy_main_t *pm = &proxy_main;
+ svm_fifo_t *rxf, *txf;
+ proxy_session_t *ps;
+
+ clib_spinlock_lock_if_init (&pm->sessions_lock);
+
+ ps = proxy_session_get (s->opaque);
+
+ txf = ps->server_rx_fifo;
+ rxf = ps->server_tx_fifo;
+
+ /*
+ * Reset the active-open tx-fifo master indices so the active-open session
+ * will receive data, etc.
+ */
+ txf->shr->master_session_index = s->session_index;
+ txf->master_thread_index = s->thread_index;
+
+ /*
+ * Account for the active-open session's use of the fifos
+ * so they won't disappear until the last session which uses
+ * them disappears
+ */
+ rxf->refcnt++;
+ txf->refcnt++;
+
+ clib_spinlock_unlock_if_init (&pm->sessions_lock);
+
+ s->rx_fifo = rxf;
+ s->tx_fifo = txf;
+
+ return 0;
+}
+
+static int
active_open_connected_callback (u32 app_index, u32 opaque,
session_t * s, session_error_t err)
{
@@ -458,7 +531,7 @@ active_open_connected_callback (u32 app_index, u32 opaque,
*/
clib_spinlock_lock_if_init (&pm->sessions_lock);
- ps = pool_elt_at_index (pm->sessions, opaque);
+ ps = proxy_session_get (opaque);
/* Connection failed */
if (err)
@@ -480,33 +553,12 @@ active_open_connected_callback (u32 app_index, u32 opaque,
if (ps->po_disconnected)
{
/* Setup everything for the cleanup notification */
- hash_set (pm->proxy_session_by_active_open_handle,
- ps->vpp_active_open_handle, opaque);
ps->ao_disconnected = 1;
clib_spinlock_unlock_if_init (&pm->sessions_lock);
return -1;
}
- s->tx_fifo = ps->server_rx_fifo;
- s->rx_fifo = ps->server_tx_fifo;
-
- /*
- * Reset the active-open tx-fifo master indices so the active-open session
- * will receive data, etc.
- */
- s->tx_fifo->shr->master_session_index = s->session_index;
- s->tx_fifo->master_thread_index = s->thread_index;
-
- /*
- * Account for the active-open session's use of the fifos
- * so they won't disappear until the last session which uses
- * them disappears
- */
- s->tx_fifo->refcnt++;
- s->rx_fifo->refcnt++;
-
- hash_set (pm->proxy_session_by_active_open_handle,
- ps->vpp_active_open_handle, opaque);
+ s->opaque = opaque;
clib_spinlock_unlock_if_init (&pm->sessions_lock);
@@ -568,11 +620,9 @@ active_open_tx_callback (session_t * ao_s)
{
proxy_main_t *pm = &proxy_main;
transport_connection_t *tc;
- session_handle_t handle;
proxy_session_t *ps;
session_t *proxy_s;
u32 min_free;
- uword *p;
min_free = clib_min (svm_fifo_size (ao_s->tx_fifo) >> 3, 128 << 10);
if (svm_fifo_max_enqueue (ao_s->tx_fifo) < min_free)
@@ -583,17 +633,12 @@ active_open_tx_callback (session_t * ao_s)
clib_spinlock_lock_if_init (&pm->sessions_lock);
- handle = session_handle (ao_s);
- p = hash_get (pm->proxy_session_by_active_open_handle, handle);
- if (!p)
- return 0;
-
- if (pool_is_free_index (pm->sessions, p[0]))
- return 0;
+ ps = proxy_session_get_if_valid (ao_s->opaque);
+ if (!ps)
+ goto unlock;
- ps = pool_elt_at_index (pm->sessions, p[0]);
if (ps->vpp_server_handle == ~0)
- return 0;
+ goto unlock;
proxy_s = session_get_from_handle (ps->vpp_server_handle);
@@ -601,6 +646,7 @@ active_open_tx_callback (session_t * ao_s)
tc = session_get_transport (proxy_s);
tcp_send_ack ((tcp_connection_t *) tc);
+unlock:
clib_spinlock_unlock_if_init (&pm->sessions_lock);
return 0;
@@ -615,7 +661,6 @@ active_open_cleanup_callback (session_t * s, session_cleanup_ntf_t ntf)
proxy_try_delete_session (s, 1 /* is_active_open */ );
}
-/* *INDENT-OFF* */
static session_cb_vft_t active_open_clients = {
.session_reset_callback = active_open_reset_callback,
.session_connected_callback = active_open_connected_callback,
@@ -624,9 +669,9 @@ static session_cb_vft_t active_open_clients = {
.session_cleanup_callback = active_open_cleanup_callback,
.builtin_app_rx_callback = active_open_rx_callback,
.builtin_app_tx_callback = active_open_tx_callback,
- .fifo_tuning_callback = common_fifo_tuning_callback
+ .fifo_tuning_callback = common_fifo_tuning_callback,
+ .proxy_alloc_session_fifos = active_open_alloc_session_fifos,
};
-/* *INDENT-ON* */
static int
proxy_server_attach ()
@@ -634,19 +679,16 @@ proxy_server_attach ()
proxy_main_t *pm = &proxy_main;
u64 options[APP_OPTIONS_N_OPTIONS];
vnet_app_attach_args_t _a, *a = &_a;
- u32 segment_size = 512 << 20;
clib_memset (a, 0, sizeof (*a));
clib_memset (options, 0, sizeof (options));
- if (pm->private_segment_size)
- segment_size = pm->private_segment_size;
a->name = format (0, "proxy-server");
a->api_client_index = pm->server_client_index;
a->session_cb_vft = &proxy_session_cb_vft;
a->options = options;
- a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size;
- a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = segment_size;
+ a->options[APP_OPTIONS_SEGMENT_SIZE] = pm->segment_size;
+ a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = pm->segment_size;
a->options[APP_OPTIONS_RX_FIFO_SIZE] = pm->fifo_size;
a->options[APP_OPTIONS_TX_FIFO_SIZE] = pm->fifo_size;
a->options[APP_OPTIONS_MAX_FIFO_SIZE] = pm->max_fifo_size;
@@ -753,14 +795,12 @@ proxy_server_add_ckpair (void)
static int
proxy_server_create (vlib_main_t * vm)
{
- proxy_main_t *pm = &proxy_main;
vlib_thread_main_t *vtm = vlib_get_thread_main ();
+ proxy_main_t *pm = &proxy_main;
u32 num_threads;
int i;
num_threads = 1 /* main thread */ + vtm->n_threads;
- vec_validate (proxy_main.server_event_queue, num_threads - 1);
- vec_validate (proxy_main.active_open_event_queue, num_threads - 1);
vec_validate (pm->rx_buf, num_threads - 1);
for (i = 0; i < num_threads; i++)
@@ -784,15 +824,6 @@ proxy_server_create (vlib_main_t * vm)
return -1;
}
- for (i = 0; i < num_threads; i++)
- {
- pm->active_open_event_queue[i] = session_main_get_vpp_event_queue (i);
-
- ASSERT (pm->active_open_event_queue[i]);
-
- pm->server_event_queue[i] = session_main_get_vpp_event_queue (i);
- }
-
return 0;
}
@@ -816,7 +847,7 @@ proxy_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
pm->rcv_buffer_size = 1024;
pm->prealloc_fifos = 0;
pm->private_segment_count = 0;
- pm->private_segment_size = 0;
+ pm->segment_size = 512 << 20;
if (vlib_num_workers ())
clib_spinlock_init (&pm->sessions_lock);
@@ -846,13 +877,7 @@ proxy_server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
else if (unformat (line_input, "private-segment-size %U",
unformat_memory_size, &tmp64))
{
- if (tmp64 >= 0x100000000ULL)
- {
- error = clib_error_return (
- 0, "private segment size %lld (%llu) too large", tmp64, tmp64);
- goto done;
- }
- pm->private_segment_size = tmp64;
+ pm->segment_size = tmp64;
}
else if (unformat (line_input, "server-uri %s", &server_uri))
vec_add1 (server_uri, 0);
@@ -908,7 +933,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (proxy_create_command, static) =
{
.path = "test proxy server",
@@ -919,7 +943,6 @@ VLIB_CLI_COMMAND (proxy_create_command, static) =
"[private-segment-size <mem>][private-segment-count <nn>]",
.function = proxy_server_create_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
proxy_main_init (vlib_main_t * vm)
@@ -927,8 +950,6 @@ proxy_main_init (vlib_main_t * vm)
proxy_main_t *pm = &proxy_main;
pm->server_client_index = ~0;
pm->active_open_client_index = ~0;
- pm->proxy_session_by_active_open_handle = hash_create (0, sizeof (uword));
- pm->proxy_session_by_server_handle = hash_create (0, sizeof (uword));
return 0;
}
diff --git a/src/plugins/hs_apps/proxy.h b/src/plugins/hs_apps/proxy.h
index aef23e1e556..26f4de2f729 100644
--- a/src/plugins/hs_apps/proxy.h
+++ b/src/plugins/hs_apps/proxy.h
@@ -36,54 +36,41 @@ typedef struct
volatile int active_open_establishing;
volatile int po_disconnected;
volatile int ao_disconnected;
+
+ u32 ps_index;
+ u32 po_thread_index;
} proxy_session_t;
typedef struct
{
- svm_queue_t *vl_input_queue; /**< vpe input queue */
- /** per-thread vectors */
- svm_msg_q_t **server_event_queue;
- svm_msg_q_t **active_open_event_queue;
+ proxy_session_t *sessions; /**< session pool, shared */
+ clib_spinlock_t sessions_lock; /**< lock for session pool */
u8 **rx_buf; /**< intermediate rx buffers */
- u32 cli_node_index; /**< cli process node index */
u32 server_client_index; /**< server API client handle */
u32 server_app_index; /**< server app index */
u32 active_open_client_index; /**< active open API client handle */
u32 active_open_app_index; /**< active open index after attach */
-
- uword *proxy_session_by_server_handle;
- uword *proxy_session_by_active_open_handle;
+ u32 ckpair_index; /**< certkey pair index for tls */
/*
* Configuration params
*/
- u8 *connect_uri; /**< URI for slave's connect */
- u32 configured_segment_size;
u32 fifo_size; /**< initial fifo size */
u32 max_fifo_size; /**< max fifo size */
u8 high_watermark; /**< high watermark (%) */
u8 low_watermark; /**< low watermark (%) */
u32 private_segment_count; /**< Number of private fifo segs */
- u32 private_segment_size; /**< size of private fifo segs */
+ u64 segment_size; /**< size of fifo segs */
+ u8 prealloc_fifos; /**< Request fifo preallocation */
int rcv_buffer_size;
session_endpoint_cfg_t server_sep;
session_endpoint_cfg_t client_sep;
- u32 ckpair_index;
- /*
- * Test state variables
- */
- proxy_session_t *sessions; /**< Session pool, shared */
- clib_spinlock_t sessions_lock;
- u32 **connection_index_by_thread;
- pthread_t client_thread_handle;
-
/*
* Flags
*/
u8 is_init;
- u8 prealloc_fifos; /**< Request fifo preallocation */
} proxy_main_t;
extern proxy_main_t proxy_main;
diff --git a/src/plugins/hs_apps/sapi/vpp_echo.c b/src/plugins/hs_apps/sapi/vpp_echo.c
index 80d274db5b0..08fd4e175e9 100644
--- a/src/plugins/hs_apps/sapi/vpp_echo.c
+++ b/src/plugins/hs_apps/sapi/vpp_echo.c
@@ -84,16 +84,19 @@ stop_signal (int signum)
em->time_to_stop = 1;
}
-int
-connect_to_vpp (char *name)
+static int
+connect_to_vpp (echo_main_t *em)
{
- echo_main_t *em = &echo_main;
api_main_t *am = vlibapi_get_main ();
+ if (em->use_app_socket_api)
+ return echo_api_connect_app_socket (em);
+
if (em->use_sock_api)
{
- if (vl_socket_client_connect ((char *) em->socket_name, name,
- 0 /* default rx, tx buffer */ ))
+ if (vl_socket_client_connect ((char *) em->socket_name,
+ (char *) em->app_name,
+ 0 /* default rx, tx buffer */))
{
ECHO_FAIL (ECHO_FAIL_SOCKET_CONNECT, "socket connect failed");
return -1;
@@ -107,7 +110,8 @@ connect_to_vpp (char *name)
}
else
{
- if (vl_client_connect_to_vlib ("/vpe-api", name, 32) < 0)
+ if (vl_client_connect_to_vlib ("/vpe-api", (char *) em->app_name, 32) <
+ 0)
{
ECHO_FAIL (ECHO_FAIL_SHMEM_CONNECT, "shmem connect failed");
return -1;
@@ -286,13 +290,11 @@ echo_free_sessions (echo_main_t * em)
echo_session_t *s;
u32 *session_indexes = 0, *session_index;
- /* *INDENT-OFF* */
pool_foreach (s, em->sessions)
{
if (s->session_state == ECHO_SESSION_STATE_CLOSED)
vec_add1 (session_indexes, s->session_index);
}
- /* *INDENT-ON* */
vec_foreach (session_index, session_indexes)
{
/* Free session */
@@ -725,9 +727,18 @@ session_reset_handler (session_reset_msg_t * mp)
app_send_ctrl_evt_to_vpp (s->vpp_evt_q, app_evt);
}
+static int
+echo_recv_fd (echo_main_t *em, int *fds, int n_fds)
+{
+ if (em->use_app_socket_api)
+ return echo_sapi_recv_fd (em, fds, n_fds);
+ return echo_bapi_recv_fd (em, fds, n_fds);
+}
+
static void
add_segment_handler (session_app_add_segment_msg_t * mp)
{
+ echo_main_t *em = &echo_main;
fifo_segment_main_t *sm = &echo_main.segment_main;
fifo_segment_create_args_t _a, *a = &_a;
int *fds = 0, i;
@@ -737,10 +748,10 @@ add_segment_handler (session_app_add_segment_msg_t * mp)
if (mp->fd_flags & SESSION_FD_F_MEMFD_SEGMENT)
{
vec_validate (fds, 1);
- if (vl_socket_client_recv_fd_msg (fds, 1, 5))
+ if (echo_recv_fd (em, fds, 1))
{
- ECHO_FAIL (ECHO_FAIL_VL_API_RECV_FD_MSG,
- "vl_socket_client_recv_fd_msg failed");
+ ECHO_LOG (0, "echo_recv_fd failed");
+ em->time_to_stop = 1;
goto failed;
}
@@ -1112,6 +1123,8 @@ echo_process_opts (int argc, char **argv)
em->test_return_packets = RETURN_PACKETS_LOG_WRONG;
else if (unformat (a, "socket-name %s", &em->socket_name))
;
+ else if (unformat (a, "use-app-socket-api"))
+ em->use_app_socket_api = 1;
else if (unformat (a, "use-svm-api"))
em->use_sock_api = 0;
else if (unformat (a, "fifo-size %U", unformat_memory_size, &tmp))
@@ -1228,6 +1241,15 @@ echo_process_opts (int argc, char **argv)
}
}
+static int
+echo_needs_crypto (echo_main_t *em)
+{
+ u8 tr = em->uri_elts.transport_proto;
+ if (tr == TRANSPORT_PROTO_QUIC || tr == TRANSPORT_PROTO_TLS)
+ return 1;
+ return 0;
+}
+
void
echo_process_uri (echo_main_t * em)
{
@@ -1260,13 +1282,91 @@ vpp_echo_init ()
clib_memset (em, 0, sizeof (*em));
}
+static int
+echo_detach (echo_main_t *em)
+{
+ if (em->use_app_socket_api)
+ return echo_sapi_detach (em);
+
+ echo_send_detach (em);
+ if (wait_for_state_change (em, STATE_DETACHED, TIMEOUT))
+ {
+ ECHO_FAIL (ECHO_FAIL_DETACH, "Couldn't detach from vpp");
+ return -1;
+ }
+ return 0;
+}
+
+static void
+echo_add_cert_key (echo_main_t *em)
+{
+ if (em->use_app_socket_api)
+ echo_sapi_add_cert_key (em);
+ else
+ {
+ echo_send_add_cert_key (em);
+ if (wait_for_state_change (em, STATE_ATTACHED, TIMEOUT))
+ {
+ ECHO_FAIL (ECHO_FAIL_APP_ATTACH,
+ "Couldn't add crypto context to vpp\n");
+ exit (1);
+ }
+ }
+}
+
+static int
+echo_del_cert_key (echo_main_t *em)
+{
+ if (em->use_app_socket_api)
+ return echo_sapi_del_cert_key (em);
+
+ echo_send_del_cert_key (em);
+ if (wait_for_state_change (em, STATE_CLEANED_CERT_KEY, TIMEOUT))
+ {
+ ECHO_FAIL (ECHO_FAIL_DEL_CERT_KEY, "Couldn't cleanup cert and key");
+ return -1;
+ }
+ return 0;
+}
+
+static void
+echo_disconnect (echo_main_t *em)
+{
+ if (em->use_app_socket_api)
+ return;
+
+ if (em->use_sock_api)
+ vl_socket_client_disconnect ();
+ else
+ vl_client_disconnect_from_vlib ();
+}
+
+static int
+echo_attach (echo_main_t *em)
+{
+ if (em->use_app_socket_api)
+ return echo_sapi_attach (em);
+ else
+ {
+ echo_api_hookup (em);
+ echo_send_attach (em);
+ if (wait_for_state_change (em, STATE_ATTACHED_NO_CERT, TIMEOUT))
+ {
+ ECHO_FAIL (ECHO_FAIL_ATTACH_TO_VPP,
+ "Couldn't attach to vpp, did you run <session enable> ?");
+ return -1;
+ }
+ }
+ return 0;
+}
+
int
main (int argc, char **argv)
{
echo_main_t *em = &echo_main;
fifo_segment_main_t *sm = &em->segment_main;
- char *app_name;
u64 i;
+ int *rv;
svm_msg_q_cfg_t _cfg, *cfg = &_cfg;
u32 rpc_queue_size = 256 << 10;
@@ -1329,11 +1429,9 @@ main (int argc, char **argv)
for (i = 0; i < em->tx_buf_size; i++)
em->connect_test_data[i] = i & 0xff;
- /* *INDENT-OFF* */
svm_msg_q_ring_cfg_t rc[1] = {
{rpc_queue_size, sizeof (echo_rpc_msg_t), 0},
};
- /* *INDENT-ON* */
cfg->consumer_pid = getpid ();
cfg->n_rings = 1;
cfg->q_nitems = rpc_queue_size;
@@ -1344,8 +1442,10 @@ main (int argc, char **argv)
signal (SIGQUIT, stop_signal);
signal (SIGTERM, stop_signal);
- app_name = em->i_am_master ? "echo_server" : "echo_client";
- if (connect_to_vpp (app_name))
+ em->app_name =
+ format (0, "%s%c", em->i_am_master ? "echo_server" : "echo_client", 0);
+
+ if (connect_to_vpp (em))
{
svm_region_exit ();
ECHO_FAIL (ECHO_FAIL_CONNECT_TO_VPP, "Couldn't connect to vpp");
@@ -1355,34 +1455,22 @@ main (int argc, char **argv)
echo_session_prealloc (em);
echo_notify_event (em, ECHO_EVT_START);
- echo_api_hookup (em);
+ if (echo_attach (em))
+ goto exit_on_error;
- echo_send_attach (em);
- if (wait_for_state_change (em, STATE_ATTACHED_NO_CERT, TIMEOUT))
+ if (echo_needs_crypto (em))
{
- ECHO_FAIL (ECHO_FAIL_ATTACH_TO_VPP,
- "Couldn't attach to vpp, did you run <session enable> ?");
- goto exit_on_error;
+ ECHO_LOG (2, "Adding crypto context %U", echo_format_crypto_engine,
+ em->crypto_engine);
+ echo_add_cert_key (em);
}
-
- if (em->uri_elts.transport_proto != TRANSPORT_PROTO_QUIC
- && em->uri_elts.transport_proto != TRANSPORT_PROTO_TLS)
- em->state = STATE_ATTACHED;
else
{
- ECHO_LOG (2, "Adding crypto context %U", echo_format_crypto_engine,
- em->crypto_engine);
- echo_send_add_cert_key (em);
- if (wait_for_state_change (em, STATE_ATTACHED, TIMEOUT))
- {
- ECHO_FAIL (ECHO_FAIL_APP_ATTACH,
- "Couldn't add crypto context to vpp\n");
- exit (1);
- }
+ em->state = STATE_ATTACHED;
}
- if (pthread_create (&em->mq_thread_handle,
- NULL /*attr */ , echo_mq_thread_fn, 0))
+ if (pthread_create (&em->mq_thread_handle, NULL /*attr */, echo_mq_thread_fn,
+ 0))
{
ECHO_FAIL (ECHO_FAIL_PTHREAD_CREATE, "pthread create errored");
goto exit_on_error;
@@ -1402,30 +1490,22 @@ main (int argc, char **argv)
clients_run (em);
echo_notify_event (em, ECHO_EVT_EXIT);
echo_free_sessions (em);
- echo_send_del_cert_key (em);
- if (wait_for_state_change (em, STATE_CLEANED_CERT_KEY, TIMEOUT))
+ if (echo_needs_crypto (em))
{
- ECHO_FAIL (ECHO_FAIL_DEL_CERT_KEY, "Couldn't cleanup cert and key");
- goto exit_on_error;
+ if (echo_del_cert_key (em))
+ goto exit_on_error;
}
- echo_send_detach (em);
- if (wait_for_state_change (em, STATE_DETACHED, TIMEOUT))
- {
- ECHO_FAIL (ECHO_FAIL_DETACH, "Couldn't detach from vpp");
- goto exit_on_error;
- }
- int *rv;
+ if (echo_detach (em))
+ goto exit_on_error;
+
pthread_join (em->mq_thread_handle, (void **) &rv);
if (rv)
{
ECHO_FAIL (ECHO_FAIL_MQ_PTHREAD, "mq pthread errored %d", rv);
goto exit_on_error;
}
- if (em->use_sock_api)
- vl_socket_client_disconnect ();
- else
- vl_client_disconnect_from_vlib ();
+ echo_disconnect (em);
echo_assert_test_suceeded (em);
exit_on_error:
ECHO_LOG (1, "Test complete !\n");
diff --git a/src/plugins/hs_apps/sapi/vpp_echo_bapi.c b/src/plugins/hs_apps/sapi/vpp_echo_bapi.c
index 38fb522351c..868cc3a0591 100644
--- a/src/plugins/hs_apps/sapi/vpp_echo_bapi.c
+++ b/src/plugins/hs_apps/sapi/vpp_echo_bapi.c
@@ -103,6 +103,19 @@ echo_send_del_cert_key (echo_main_t * em)
vl_msg_api_send_shmem (em->vl_input_queue, (u8 *) & bmp);
}
+int
+echo_bapi_recv_fd (echo_main_t *em, int *fds, int n_fds)
+{
+ clib_error_t *err;
+ err = vl_socket_client_recv_fd_msg (fds, n_fds, 5);
+ if (err)
+ {
+ clib_error_report (err);
+ return -1;
+ }
+ return 0;
+}
+
static u8
echo_transport_needs_crypto (transport_proto_t proto)
{
@@ -265,11 +278,11 @@ echo_segment_lookup (u64 segment_handle)
clib_spinlock_lock (&em->segment_handles_lock);
segment_idxp = hash_get (em->shared_segment_handles, segment_handle);
clib_spinlock_unlock (&em->segment_handles_lock);
- if (!segment_idxp)
- return ~0;
+ if (segment_idxp)
+ return ((u32) *segment_idxp);
ECHO_LOG (2, "Segment not mapped (0x%lx)", segment_handle);
- return ((u32) *segment_idxp);
+ return ~0;
}
void
@@ -543,11 +556,14 @@ _(APPLICATION_DETACH_REPLY, application_detach_reply) \
_(APP_ADD_CERT_KEY_PAIR_REPLY, app_add_cert_key_pair_reply) \
_(APP_DEL_CERT_KEY_PAIR_REPLY, app_del_cert_key_pair_reply)
-#define vl_print(handle, ...) fformat (handle, __VA_ARGS__)
#define vl_endianfun
#include <vnet/session/session.api.h>
#undef vl_endianfun
+#define vl_calcsizefun
+#include <vnet/session/session.api.h>
+#undef vl_calcsizefun
+
#define vl_printfun
#include <vnet/session/session.api.h>
#undef vl_printfun
@@ -569,10 +585,18 @@ echo_api_hookup (echo_main_t * em)
return;
#define _(N, n) \
- vl_msg_api_set_handlers (REPLY_MSG_ID_BASE + VL_API_##N, #n, \
- vl_api_##n##_t_handler, vl_noop_handler, \
- vl_api_##n##_t_endian, vl_api_##n##_t_print, \
- sizeof (vl_api_##n##_t), 1);
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){ \
+ .id = REPLY_MSG_ID_BASE + VL_API_##N, \
+ .name = #n, \
+ .handler = vl_api_##n##_t_handler, \
+ .endian = vl_api_##n##_t_endian, \
+ .format_fn = vl_api_##n##_t_format, \
+ .size = sizeof (vl_api_##n##_t), \
+ .traced = 1, \
+ .tojson = vl_api_##n##_t_tojson, \
+ .fromjson = vl_api_##n##_t_fromjson, \
+ .calc_size = vl_api_##n##_t_calc_size, \
+ });
foreach_quic_echo_msg;
#undef _
}
diff --git a/src/plugins/hs_apps/sapi/vpp_echo_common.h b/src/plugins/hs_apps/sapi/vpp_echo_common.h
index 80c539ccb0f..9b2a2c677b5 100644
--- a/src/plugins/hs_apps/sapi/vpp_echo_common.h
+++ b/src/plugins/hs_apps/sapi/vpp_echo_common.h
@@ -26,6 +26,9 @@
#define LOG_EVERY_N_IDLE_CYCLES (1e8)
#define ECHO_MQ_SEG_HANDLE ((u64) ~0 - 1)
+#define ECHO_INVALID_SEGMENT_INDEX ((u32) ~0)
+#define ECHO_INVALID_SEGMENT_HANDLE ((u64) ~0)
+
#define foreach_echo_fail_code \
_(ECHO_FAIL_NONE, "ECHO_FAIL_NONE") \
_(ECHO_FAIL_USAGE, "ECHO_FAIL_USAGE") \
@@ -269,6 +272,7 @@ typedef struct
svm_queue_t *vl_input_queue; /* vpe input queue */
u32 my_client_index; /* API client handle */
u8 *uri; /* The URI we're playing with */
+ u8 *app_name;
u32 n_uris; /* Cycle through adjacent ips */
ip46_address_t lcl_ip; /* Local ip for client */
u8 lcl_ip_set;
@@ -277,6 +281,8 @@ typedef struct
svm_msg_q_t *ctrl_mq; /* Our control queue (towards vpp) */
clib_time_t clib_time; /* For deadman timers */
u8 *socket_name;
+ u8 use_app_socket_api;
+ clib_socket_t app_api_sock;
int i_am_master;
u32 *listen_session_indexes; /* vec of vpp listener sessions */
volatile u32 listen_session_cnt;
@@ -449,6 +455,15 @@ void echo_send_disconnect_session (echo_main_t * em, void *args);
void echo_api_hookup (echo_main_t * em);
void echo_send_add_cert_key (echo_main_t * em);
void echo_send_del_cert_key (echo_main_t * em);
+int echo_bapi_recv_fd (echo_main_t *em, int *fds, int n_fds);
+
+/* Session socket API */
+int echo_sapi_attach (echo_main_t *em);
+int echo_sapi_add_cert_key (echo_main_t *em);
+int echo_sapi_del_cert_key (echo_main_t *em);
+int echo_api_connect_app_socket (echo_main_t *em);
+int echo_sapi_detach (echo_main_t *em);
+int echo_sapi_recv_fd (echo_main_t *em, int *fds, int n_fds);
#endif /* __included_vpp_echo_common_h__ */
diff --git a/src/plugins/hs_apps/sapi/vpp_echo_proto_quic.c b/src/plugins/hs_apps/sapi/vpp_echo_proto_quic.c
index c67b35fd8e6..1b0dbf33e29 100644
--- a/src/plugins/hs_apps/sapi/vpp_echo_proto_quic.c
+++ b/src/plugins/hs_apps/sapi/vpp_echo_proto_quic.c
@@ -239,7 +239,6 @@ quic_echo_initiate_qsession_close_no_stream (echo_main_t * em)
/* Close Quic session without streams */
echo_session_t *s;
- /* *INDENT-OFF* */
pool_foreach (s, em->sessions)
{
if (s->session_type == ECHO_SESSION_TYPE_QUIC)
@@ -261,7 +260,6 @@ quic_echo_initiate_qsession_close_no_stream (echo_main_t * em)
ECHO_LOG (2,"%U: PASSIVE close", echo_format_session, s);
}
}
- /* *INDENT-ON* */
}
static void
diff --git a/src/plugins/hs_apps/sapi/vpp_echo_sapi.c b/src/plugins/hs_apps/sapi/vpp_echo_sapi.c
new file mode 100644
index 00000000000..a21fbea6183
--- /dev/null
+++ b/src/plugins/hs_apps/sapi/vpp_echo_sapi.c
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <hs_apps/sapi/vpp_echo_common.h>
+
+int
+echo_api_connect_app_socket (echo_main_t *em)
+{
+ clib_socket_t *cs = &em->app_api_sock;
+ clib_error_t *err;
+ int rv = 0;
+
+ cs->config = (char *) em->socket_name;
+ cs->flags =
+ CLIB_SOCKET_F_IS_CLIENT | CLIB_SOCKET_F_SEQPACKET | CLIB_SOCKET_F_BLOCKING;
+
+ if ((err = clib_socket_init (cs)))
+ {
+ clib_error_report (err);
+ rv = -1;
+ }
+
+ return rv;
+}
+
+static inline u64
+echo_vpp_worker_segment_handle (u32 wrk_index)
+{
+ return (ECHO_INVALID_SEGMENT_HANDLE - wrk_index - 1);
+}
+
+static int
+echo_segment_discover_mqs (uword segment_handle, int *fds, u32 n_fds)
+{
+ echo_main_t *em = &echo_main;
+ fifo_segment_t *fs;
+ u32 fs_index;
+
+ fs_index = echo_segment_lookup (segment_handle);
+ if (fs_index == ECHO_INVALID_SEGMENT_INDEX)
+ {
+ ECHO_LOG (0, "ERROR: mq segment %lx for is not attached!",
+ segment_handle);
+ return -1;
+ }
+
+ clib_spinlock_lock (&em->segment_handles_lock);
+
+ fs = fifo_segment_get_segment (&em->segment_main, fs_index);
+ fifo_segment_msg_qs_discover (fs, fds, n_fds);
+
+ clib_spinlock_unlock (&em->segment_handles_lock);
+
+ return 0;
+}
+
+static int
+echo_api_attach_reply_handler (app_sapi_attach_reply_msg_t *mp, int *fds)
+{
+ echo_main_t *em = &echo_main;
+ int i, rv, n_fds_used = 0;
+ u64 segment_handle;
+ u8 *segment_name;
+
+ if (mp->retval)
+ {
+ ECHO_LOG (0, "attach failed: %U", format_session_error, mp->retval);
+ goto failed;
+ }
+
+ em->my_client_index = mp->api_client_handle;
+ segment_handle = mp->segment_handle;
+ if (segment_handle == ECHO_INVALID_SEGMENT_HANDLE)
+ {
+ ECHO_LOG (0, "invalid segment handle");
+ goto failed;
+ }
+
+ if (!mp->n_fds)
+ goto failed;
+
+ if (mp->fd_flags & SESSION_FD_F_VPP_MQ_SEGMENT)
+ if (echo_segment_attach (echo_vpp_worker_segment_handle (0), "vpp-mq-seg",
+ SSVM_SEGMENT_MEMFD, fds[n_fds_used++]))
+ goto failed;
+
+ if (mp->fd_flags & SESSION_FD_F_MEMFD_SEGMENT)
+ {
+ segment_name = format (0, "memfd-%ld%c", segment_handle, 0);
+ rv = echo_segment_attach (segment_handle, (char *) segment_name,
+ SSVM_SEGMENT_MEMFD, fds[n_fds_used++]);
+ vec_free (segment_name);
+ if (rv != 0)
+ goto failed;
+ }
+
+ echo_segment_attach_mq (segment_handle, mp->app_mq, 0, &em->app_mq);
+
+ if (mp->fd_flags & SESSION_FD_F_MQ_EVENTFD)
+ {
+ ECHO_LOG (0, "SESSION_FD_F_MQ_EVENTFD unsupported!");
+ goto failed;
+ }
+
+ echo_segment_discover_mqs (echo_vpp_worker_segment_handle (0),
+ fds + n_fds_used, mp->n_fds - n_fds_used);
+ echo_segment_attach_mq (echo_vpp_worker_segment_handle (0), mp->vpp_ctrl_mq,
+ mp->vpp_ctrl_mq_thread, &em->ctrl_mq);
+
+ em->state = STATE_ATTACHED_NO_CERT;
+ return 0;
+
+failed:
+
+ for (i = clib_max (n_fds_used - 1, 0); i < mp->n_fds; i++)
+ close (fds[i]);
+
+ return -1;
+}
+
+static int
+echo_api_send_attach (clib_socket_t *cs)
+{
+ echo_main_t *em = &echo_main;
+ app_sapi_msg_t msg = { 0 };
+ app_sapi_attach_msg_t *mp = &msg.attach;
+ clib_error_t *err;
+
+ clib_memcpy (&mp->name, em->app_name, vec_len (em->app_name));
+ mp->options[APP_OPTIONS_FLAGS] =
+ APP_OPTIONS_FLAGS_ACCEPT_REDIRECT | APP_OPTIONS_FLAGS_ADD_SEGMENT;
+ mp->options[APP_OPTIONS_SEGMENT_SIZE] = 256 << 20;
+ mp->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = 128 << 20;
+ mp->options[APP_OPTIONS_RX_FIFO_SIZE] = em->fifo_size;
+ mp->options[APP_OPTIONS_TX_FIFO_SIZE] = em->fifo_size;
+ mp->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = em->prealloc_fifo_pairs;
+ mp->options[APP_OPTIONS_EVT_QUEUE_SIZE] = em->evt_q_size;
+
+ msg.type = APP_SAPI_MSG_TYPE_ATTACH;
+ err = clib_socket_sendmsg (cs, &msg, sizeof (msg), 0, 0);
+ if (err)
+ {
+ clib_error_report (err);
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+echo_sapi_attach (echo_main_t *em)
+{
+ app_sapi_msg_t _rmp, *rmp = &_rmp;
+ clib_error_t *err;
+ clib_socket_t *cs;
+ int fds[32];
+
+ cs = &em->app_api_sock;
+ if (echo_api_send_attach (cs))
+ return -1;
+
+ /*
+ * Wait for attach reply
+ */
+ err = clib_socket_recvmsg (cs, rmp, sizeof (*rmp), fds, ARRAY_LEN (fds));
+ if (err)
+ {
+ clib_error_report (err);
+ return -1;
+ }
+
+ if (rmp->type != APP_SAPI_MSG_TYPE_ATTACH_REPLY)
+ return -1;
+
+ return echo_api_attach_reply_handler (&rmp->attach_reply, fds);
+}
+
+int
+echo_sapi_add_cert_key (echo_main_t *em)
+{
+ u32 cert_len = test_srv_crt_rsa_len;
+ u32 key_len = test_srv_key_rsa_len;
+ u32 certkey_len = cert_len + key_len;
+ app_sapi_msg_t _msg = { 0 }, *msg = &_msg;
+ app_sapi_cert_key_add_del_msg_t *mp;
+ app_sapi_msg_t _rmp, *rmp = &_rmp;
+ clib_error_t *err;
+ clib_socket_t *cs;
+ u8 *certkey = 0;
+ int rv = -1;
+
+ msg->type = APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY;
+ mp = &msg->cert_key_add_del;
+ mp->context = ntohl (0xfeedface);
+ mp->cert_len = cert_len;
+ mp->certkey_len = certkey_len;
+ mp->is_add = 1;
+
+ vec_validate (certkey, certkey_len - 1);
+ clib_memcpy_fast (certkey, test_srv_crt_rsa, cert_len);
+ clib_memcpy_fast (certkey + cert_len, test_srv_key_rsa, key_len);
+
+ cs = &em->app_api_sock;
+ err = clib_socket_sendmsg (cs, msg, sizeof (*msg), 0, 0);
+ if (err)
+ {
+ clib_error_report (err);
+ goto done;
+ }
+
+ err = clib_socket_sendmsg (cs, certkey, certkey_len, 0, 0);
+ if (err)
+ {
+ clib_error_report (err);
+ goto done;
+ }
+
+ /*
+ * Wait for reply and process it
+ */
+ err = clib_socket_recvmsg (cs, rmp, sizeof (*rmp), 0, 0);
+ if (err)
+ {
+ clib_error_report (err);
+ goto done;
+ }
+
+ if (rmp->type != APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY_REPLY)
+ {
+ ECHO_LOG (0, "unexpected reply type %u", rmp->type);
+ goto done;
+ }
+
+ if (!rmp->cert_key_add_del_reply.retval)
+ rv = rmp->cert_key_add_del_reply.index;
+
+ em->state = STATE_ATTACHED;
+ em->ckpair_index = rv;
+
+done:
+
+ return rv;
+}
+
+int
+echo_sapi_recv_fd (echo_main_t *em, int *fds, int n_fds)
+{
+ app_sapi_msg_t _msg, *msg = &_msg;
+ clib_error_t *err =
+ clib_socket_recvmsg (&em->app_api_sock, msg, sizeof (*msg), fds, n_fds);
+ if (err)
+ {
+ clib_error_report (err);
+ return -1;
+ }
+ return 0;
+}
+
+int
+echo_sapi_detach (echo_main_t *em)
+{
+ clib_socket_t *cs = &em->app_api_sock;
+ clib_socket_close (cs);
+ em->state = STATE_DETACHED;
+ return 0;
+}
+
+int
+echo_sapi_del_cert_key (echo_main_t *em)
+{
+ app_sapi_msg_t _msg = { 0 }, *msg = &_msg;
+ app_sapi_cert_key_add_del_msg_t *mp;
+ app_sapi_msg_t _rmp, *rmp = &_rmp;
+ clib_error_t *err;
+ clib_socket_t *cs;
+
+ msg->type = APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY;
+ mp = &msg->cert_key_add_del;
+ mp->index = em->ckpair_index;
+
+ cs = &em->app_api_sock;
+ err = clib_socket_sendmsg (cs, msg, sizeof (*msg), 0, 0);
+ if (err)
+ {
+ clib_error_report (err);
+ return -1;
+ }
+
+ /*
+ * Wait for reply and process it
+ */
+ err = clib_socket_recvmsg (cs, rmp, sizeof (*rmp), 0, 0);
+ if (err)
+ {
+ clib_error_report (err);
+ return -1;
+ }
+
+ if (rmp->type != APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY_REPLY)
+ {
+ ECHO_LOG (0, "unexpected reply type %u", rmp->type);
+ return -1;
+ }
+
+ if (rmp->cert_key_add_del_reply.retval)
+ return -1;
+
+ em->state = STATE_CLEANED_CERT_KEY;
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/hs_apps/vcl/sock_test_client.c b/src/plugins/hs_apps/vcl/sock_test_client.c
index 35252da21bc..c8815692184 100644
--- a/src/plugins/hs_apps/vcl/sock_test_client.c
+++ b/src/plugins/hs_apps/vcl/sock_test_client.c
@@ -46,17 +46,17 @@ sock_test_cfg_sync (vcl_test_session_t * socket)
{
sock_client_main_t *scm = &sock_client_main;
vcl_test_session_t *ctrl = &scm->ctrl_socket;
- vcl_test_cfg_t *rl_cfg = (vcl_test_cfg_t *) socket->rxbuf;
+ hs_test_cfg_t *rl_cfg = (hs_test_cfg_t *) socket->rxbuf;
int rx_bytes, tx_bytes;
if (socket->cfg.verbose)
- vcl_test_cfg_dump (&socket->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&socket->cfg, 1 /* is_client */);
ctrl->cfg.seq_num = ++scm->cfg_seq_num;
if (socket->cfg.verbose)
{
stinf ("(fd %d): Sending config sent to server.\n", socket->fd);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
tx_bytes = sock_test_write (socket->fd, (uint8_t *) & ctrl->cfg,
sizeof (ctrl->cfg), NULL, ctrl->cfg.verbose);
@@ -64,21 +64,21 @@ sock_test_cfg_sync (vcl_test_session_t * socket)
stabrt ("(fd %d): write test cfg failed (%d)!", socket->fd, tx_bytes);
rx_bytes = sock_test_read (socket->fd, (uint8_t *) socket->rxbuf,
- sizeof (vcl_test_cfg_t), NULL);
+ sizeof (hs_test_cfg_t), NULL);
if (rx_bytes < 0)
return rx_bytes;
- if (rl_cfg->magic != VCL_TEST_CFG_CTRL_MAGIC)
+ if (rl_cfg->magic != HS_TEST_CFG_CTRL_MAGIC)
stabrt ("(fd %d): Bad server reply cfg -- aborting!\n", socket->fd);
- if ((rx_bytes != sizeof (vcl_test_cfg_t))
- || !vcl_test_cfg_verify (rl_cfg, &ctrl->cfg))
+ if ((rx_bytes != sizeof (hs_test_cfg_t)) ||
+ !hs_test_cfg_verify (rl_cfg, &ctrl->cfg))
stabrt ("(fd %d): Invalid config received from server!\n", socket->fd);
if (socket->cfg.verbose)
{
stinf ("(fd %d): Got config back from server.", socket->fd);
- vcl_test_cfg_dump (rl_cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (rl_cfg, 1 /* is_client */);
}
ctrl->cfg.ctrl_handle = ((ctrl->cfg.ctrl_handle == ~0) ?
rl_cfg->ctrl_handle : ctrl->cfg.ctrl_handle);
@@ -263,27 +263,25 @@ echo_test_client (void)
vcl_test_stats_dump ("CLIENT RESULTS", &ctrl->stats,
1 /* show_rx */ , 1 /* show tx */ ,
ctrl->cfg.verbose);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
if (ctrl->cfg.verbose > 1)
{
- stinf (" ctrl socket info\n"
- VCL_TEST_SEPARATOR_STRING
+ stinf (" ctrl socket info\n" HS_TEST_SEPARATOR_STRING
" fd: %d (0x%08x)\n"
" rxbuf: %p\n"
" rxbuf size: %u (0x%08x)\n"
" txbuf: %p\n"
- " txbuf size: %u (0x%08x)\n"
- VCL_TEST_SEPARATOR_STRING,
- ctrl->fd, (uint32_t) ctrl->fd,
- ctrl->rxbuf, ctrl->rxbuf_size, ctrl->rxbuf_size,
- ctrl->txbuf, ctrl->txbuf_size, ctrl->txbuf_size);
+ " txbuf size: %u (0x%08x)\n" HS_TEST_SEPARATOR_STRING,
+ ctrl->fd, (uint32_t) ctrl->fd, ctrl->rxbuf, ctrl->rxbuf_size,
+ ctrl->rxbuf_size, ctrl->txbuf, ctrl->txbuf_size,
+ ctrl->txbuf_size);
}
}
}
static void
-stream_test_client (vcl_test_t test)
+stream_test_client (hs_test_t test)
{
sock_client_main_t *scm = &sock_client_main;
vcl_test_session_t *ctrl = &scm->ctrl_socket;
@@ -292,7 +290,7 @@ stream_test_client (vcl_test_t test)
uint32_t i, n;
fd_set wr_fdset, rd_fdset;
fd_set _wfdset, *wfdset = &_wfdset;
- fd_set _rfdset, *rfdset = (test == VCL_TEST_TYPE_BI) ? &_rfdset : 0;
+ fd_set _rfdset, *rfdset = (test == HS_TEST_TYPE_BI) ? &_rfdset : 0;
ctrl->cfg.total_bytes = ctrl->cfg.num_writes * ctrl->cfg.txbuf_size;
ctrl->cfg.ctrl_handle = ~0;
@@ -300,7 +298,7 @@ stream_test_client (vcl_test_t test)
stinf ("\n" SOCK_TEST_BANNER_STRING
"CLIENT (fd %d): %s-directional Stream Test!\n\n"
"CLIENT (fd %d): Sending config to server on ctrl socket...\n",
- ctrl->fd, test == VCL_TEST_TYPE_BI ? "Bi" : "Uni", ctrl->fd);
+ ctrl->fd, test == HS_TEST_TYPE_BI ? "Bi" : "Uni", ctrl->fd);
if (sock_test_cfg_sync (ctrl))
stabrt ("test cfg sync failed -- aborting!");
@@ -352,8 +350,7 @@ stream_test_client (vcl_test_t test)
(tsock->stats.stop.tv_nsec == 0)))
continue;
- if ((test == VCL_TEST_TYPE_BI) &&
- FD_ISSET (tsock->fd, rfdset) &&
+ if ((test == HS_TEST_TYPE_BI) && FD_ISSET (tsock->fd, rfdset) &&
(tsock->stats.rx_bytes < ctrl->cfg.total_bytes))
{
(void) sock_test_read (tsock->fd,
@@ -372,9 +369,9 @@ stream_test_client (vcl_test_t test)
tsock->fd);
}
- if (((test == VCL_TEST_TYPE_UNI) &&
+ if (((test == HS_TEST_TYPE_UNI) &&
(tsock->stats.tx_bytes >= ctrl->cfg.total_bytes)) ||
- ((test == VCL_TEST_TYPE_BI) &&
+ ((test == HS_TEST_TYPE_BI) &&
(tsock->stats.rx_bytes >= ctrl->cfg.total_bytes)))
{
clock_gettime (CLOCK_REALTIME, &tsock->stats.stop);
@@ -399,40 +396,39 @@ stream_test_client (vcl_test_t test)
snprintf (buf, sizeof (buf), "CLIENT (fd %d) RESULTS", tsock->fd);
vcl_test_stats_dump (buf, &tsock->stats,
- test == VCL_TEST_TYPE_BI /* show_rx */ ,
- 1 /* show tx */ , ctrl->cfg.verbose);
+ test == HS_TEST_TYPE_BI /* show_rx */,
+ 1 /* show tx */, ctrl->cfg.verbose);
}
vcl_test_stats_accumulate (&ctrl->stats, &tsock->stats);
}
vcl_test_stats_dump ("CLIENT RESULTS", &ctrl->stats,
- test == VCL_TEST_TYPE_BI /* show_rx */ ,
- 1 /* show tx */ , ctrl->cfg.verbose);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ test == HS_TEST_TYPE_BI /* show_rx */, 1 /* show tx */,
+ ctrl->cfg.verbose);
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
if (ctrl->cfg.verbose)
{
- stinf (" ctrl socket info\n"
- VCL_TEST_SEPARATOR_STRING
+ stinf (" ctrl socket info\n" HS_TEST_SEPARATOR_STRING
" fd: %d (0x%08x)\n"
" rxbuf: %p\n"
" rxbuf size: %u (0x%08x)\n"
" txbuf: %p\n"
- " txbuf size: %u (0x%08x)\n"
- VCL_TEST_SEPARATOR_STRING,
- ctrl->fd, (uint32_t) ctrl->fd,
- ctrl->rxbuf, ctrl->rxbuf_size, ctrl->rxbuf_size,
- ctrl->txbuf, ctrl->txbuf_size, ctrl->txbuf_size);
+ " txbuf size: %u (0x%08x)\n" HS_TEST_SEPARATOR_STRING,
+ ctrl->fd, (uint32_t) ctrl->fd, ctrl->rxbuf, ctrl->rxbuf_size,
+ ctrl->rxbuf_size, ctrl->txbuf, ctrl->txbuf_size,
+ ctrl->txbuf_size);
}
- ctrl->cfg.test = VCL_TEST_TYPE_ECHO;
+ ctrl->cfg.test = HS_TEST_TYPE_ECHO;
if (sock_test_cfg_sync (ctrl))
stabrt ("post-test cfg sync failed!");
- stinf ("(fd %d): %s-directional Stream Test Complete!\n"
- SOCK_TEST_BANNER_STRING "\n", ctrl->fd,
- test == VCL_TEST_TYPE_BI ? "Bi" : "Uni");
+ stinf (
+ "(fd %d): %s-directional Stream Test Complete!\n" SOCK_TEST_BANNER_STRING
+ "\n",
+ ctrl->fd, test == HS_TEST_TYPE_BI ? "Bi" : "Uni");
}
static void
@@ -448,24 +444,24 @@ exit_client (void)
for (i = 0; i < ctrl->cfg.num_test_sessions; i++)
{
tsock = &scm->test_socket[i];
- tsock->cfg.test = VCL_TEST_TYPE_EXIT;
+ tsock->cfg.test = HS_TEST_TYPE_EXIT;
/* coverity[COPY_PASTE_ERROR] */
if (ctrl->cfg.verbose)
{
stinf ("\(fd %d): Sending exit cfg to server...\n", tsock->fd);
- vcl_test_cfg_dump (&tsock->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&tsock->cfg, 1 /* is_client */);
}
(void) sock_test_write (tsock->fd, (uint8_t *) & tsock->cfg,
sizeof (tsock->cfg), &tsock->stats,
ctrl->cfg.verbose);
}
- ctrl->cfg.test = VCL_TEST_TYPE_EXIT;
+ ctrl->cfg.test = HS_TEST_TYPE_EXIT;
if (ctrl->cfg.verbose)
{
stinf ("\n(fd %d): Sending exit cfg to server...\n", ctrl->fd);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
(void) sock_test_write (ctrl->fd, (uint8_t *) & ctrl->cfg,
sizeof (ctrl->cfg), &ctrl->stats,
@@ -557,7 +553,7 @@ cfg_txbuf_size_set (void)
ctrl->cfg.total_bytes = ctrl->cfg.num_writes * ctrl->cfg.txbuf_size;
vcl_test_buf_alloc (&ctrl->cfg, 0 /* is_rxbuf */ ,
(uint8_t **) & ctrl->txbuf, &ctrl->txbuf_size);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
else
stabrt ("Invalid txbuf size (%lu) < minimum buf size (%u)!",
@@ -576,7 +572,7 @@ cfg_num_writes_set (void)
{
ctrl->cfg.num_writes = num_writes;
ctrl->cfg.total_bytes = ctrl->cfg.num_writes * ctrl->cfg.txbuf_size;
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
else
stabrt ("Invalid num writes: %u", num_writes);
@@ -596,7 +592,7 @@ cfg_num_test_sockets_set (void)
ctrl->cfg.num_test_sessions = num_test_sockets;
sock_test_connect_test_sockets (num_test_sockets);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
else
stabrt ("Invalid num test sockets: %u, (%d max)\n", num_test_sockets,
@@ -616,7 +612,7 @@ cfg_rxbuf_size_set (void)
ctrl->cfg.rxbuf_size = rxbuf_size;
vcl_test_buf_alloc (&ctrl->cfg, 1 /* is_rxbuf */ ,
(uint8_t **) & ctrl->rxbuf, &ctrl->rxbuf_size);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
else
stabrt ("Invalid rxbuf size (%lu) < minimum buf size (%u)!",
@@ -630,19 +626,19 @@ cfg_verbose_toggle (void)
vcl_test_session_t *ctrl = &scm->ctrl_socket;
ctrl->cfg.verbose = ctrl->cfg.verbose ? 0 : 1;
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
-static vcl_test_t
+static hs_test_t
parse_input ()
{
sock_client_main_t *scm = &sock_client_main;
vcl_test_session_t *ctrl = &scm->ctrl_socket;
- vcl_test_t rv = VCL_TEST_TYPE_NONE;
+ hs_test_t rv = HS_TEST_TYPE_NONE;
if (!strncmp (VCL_TEST_TOKEN_EXIT, ctrl->txbuf,
strlen (VCL_TEST_TOKEN_EXIT)))
- rv = VCL_TEST_TYPE_EXIT;
+ rv = HS_TEST_TYPE_EXIT;
else if (!strncmp (VCL_TEST_TOKEN_HELP, ctrl->txbuf,
strlen (VCL_TEST_TOKEN_HELP)))
@@ -672,16 +668,16 @@ parse_input ()
strlen (VCL_TEST_TOKEN_RXBUF_SIZE)))
cfg_rxbuf_size_set ();
- else if (!strncmp (VCL_TEST_TOKEN_RUN_UNI, ctrl->txbuf,
- strlen (VCL_TEST_TOKEN_RUN_UNI)))
- rv = ctrl->cfg.test = VCL_TEST_TYPE_UNI;
+ else if (!strncmp (HS_TEST_TOKEN_RUN_UNI, ctrl->txbuf,
+ strlen (HS_TEST_TOKEN_RUN_UNI)))
+ rv = ctrl->cfg.test = HS_TEST_TYPE_UNI;
- else if (!strncmp (VCL_TEST_TOKEN_RUN_BI, ctrl->txbuf,
- strlen (VCL_TEST_TOKEN_RUN_BI)))
- rv = ctrl->cfg.test = VCL_TEST_TYPE_BI;
+ else if (!strncmp (HS_TEST_TOKEN_RUN_BI, ctrl->txbuf,
+ strlen (HS_TEST_TOKEN_RUN_BI)))
+ rv = ctrl->cfg.test = HS_TEST_TYPE_BI;
else
- rv = VCL_TEST_TYPE_ECHO;
+ rv = HS_TEST_TYPE_ECHO;
return rv;
}
@@ -713,9 +709,9 @@ main (int argc, char **argv)
sock_client_main_t *scm = &sock_client_main;
vcl_test_session_t *ctrl = &scm->ctrl_socket;
int c, rv;
- vcl_test_t post_test = VCL_TEST_TYPE_NONE;
+ hs_test_t post_test = HS_TEST_TYPE_NONE;
- vcl_test_cfg_init (&ctrl->cfg);
+ hs_test_cfg_init (&ctrl->cfg);
vcl_test_session_buf_alloc (ctrl);
opterr = 0;
@@ -749,7 +745,7 @@ main (int argc, char **argv)
break;
case 'X':
- post_test = VCL_TEST_TYPE_EXIT;
+ post_test = HS_TEST_TYPE_EXIT;
break;
case 'E':
@@ -760,7 +756,7 @@ main (int argc, char **argv)
print_usage_and_exit ();
}
strncpy (ctrl->txbuf, optarg, ctrl->txbuf_size);
- ctrl->cfg.test = VCL_TEST_TYPE_ECHO;
+ ctrl->cfg.test = HS_TEST_TYPE_ECHO;
break;
case 'I':
@@ -836,11 +832,11 @@ main (int argc, char **argv)
break;
case 'U':
- ctrl->cfg.test = VCL_TEST_TYPE_UNI;
+ ctrl->cfg.test = HS_TEST_TYPE_UNI;
break;
case 'B':
- ctrl->cfg.test = VCL_TEST_TYPE_BI;
+ ctrl->cfg.test = HS_TEST_TYPE_BI;
break;
case 'V':
@@ -928,54 +924,54 @@ main (int argc, char **argv)
sock_test_connect_test_sockets (ctrl->cfg.num_test_sessions);
- while (ctrl->cfg.test != VCL_TEST_TYPE_EXIT)
+ while (ctrl->cfg.test != HS_TEST_TYPE_EXIT)
{
if (scm->dump_cfg)
{
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
scm->dump_cfg = 0;
}
switch (ctrl->cfg.test)
{
- case VCL_TEST_TYPE_ECHO:
+ case HS_TEST_TYPE_ECHO:
echo_test_client ();
break;
- case VCL_TEST_TYPE_UNI:
- case VCL_TEST_TYPE_BI:
+ case HS_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
stream_test_client (ctrl->cfg.test);
break;
- case VCL_TEST_TYPE_EXIT:
+ case HS_TEST_TYPE_EXIT:
continue;
- case VCL_TEST_TYPE_NONE:
+ case HS_TEST_TYPE_NONE:
default:
break;
}
switch (post_test)
{
- case VCL_TEST_TYPE_EXIT:
+ case HS_TEST_TYPE_EXIT:
switch (ctrl->cfg.test)
{
- case VCL_TEST_TYPE_EXIT:
- case VCL_TEST_TYPE_UNI:
- case VCL_TEST_TYPE_BI:
- case VCL_TEST_TYPE_ECHO:
- ctrl->cfg.test = VCL_TEST_TYPE_EXIT;
+ case HS_TEST_TYPE_EXIT:
+ case HS_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
+ case HS_TEST_TYPE_ECHO:
+ ctrl->cfg.test = HS_TEST_TYPE_EXIT;
continue;
- case VCL_TEST_TYPE_NONE:
+ case HS_TEST_TYPE_NONE:
default:
break;
}
break;
- case VCL_TEST_TYPE_NONE:
- case VCL_TEST_TYPE_ECHO:
- case VCL_TEST_TYPE_UNI:
- case VCL_TEST_TYPE_BI:
+ case HS_TEST_TYPE_NONE:
+ case HS_TEST_TYPE_ECHO:
+ case HS_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
default:
break;
}
diff --git a/src/plugins/hs_apps/vcl/sock_test_server.c b/src/plugins/hs_apps/vcl/sock_test_server.c
index d516c1722db..2356a4eadca 100644
--- a/src/plugins/hs_apps/vcl/sock_test_server.c
+++ b/src/plugins/hs_apps/vcl/sock_test_server.c
@@ -37,7 +37,7 @@ typedef struct
int fd;
uint8_t *buf;
uint32_t buf_size;
- vcl_test_cfg_t cfg;
+ hs_test_cfg_t cfg;
vcl_test_stats_t stats;
} sock_server_conn_t;
@@ -87,7 +87,7 @@ conn_pool_expand (size_t expand_size)
{
sock_server_conn_t *conn = &conn_pool[i];
memset (conn, 0, sizeof (*conn));
- vcl_test_cfg_init (&conn->cfg);
+ hs_test_cfg_init (&conn->cfg);
vcl_test_buf_alloc (&conn->cfg, 1 /* is_rxbuf */ , &conn->buf,
&conn->buf_size);
conn->cfg.txbuf_size = conn->cfg.rxbuf_size;
@@ -123,7 +123,7 @@ conn_pool_free (sock_server_conn_t * conn)
}
static inline void
-sync_config_and_reply (sock_server_conn_t * conn, vcl_test_cfg_t * rx_cfg)
+sync_config_and_reply (sock_server_conn_t *conn, hs_test_cfg_t *rx_cfg)
{
conn->cfg = *rx_cfg;
vcl_test_buf_alloc (&conn->cfg, 1 /* is_rxbuf */ ,
@@ -133,19 +133,18 @@ sync_config_and_reply (sock_server_conn_t * conn, vcl_test_cfg_t * rx_cfg)
if (conn->cfg.verbose)
{
stinf ("(fd %d): Replying to cfg message!\n", conn->fd);
- vcl_test_cfg_dump (&conn->cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (&conn->cfg, 0 /* is_client */);
}
(void) sock_test_write (conn->fd, (uint8_t *) & conn->cfg,
sizeof (conn->cfg), NULL, conn->cfg.verbose);
}
static void
-stream_test_server_start_stop (sock_server_conn_t * conn,
- vcl_test_cfg_t * rx_cfg)
+stream_test_server_start_stop (sock_server_conn_t *conn, hs_test_cfg_t *rx_cfg)
{
sock_server_main_t *ssm = &sock_server_main;
int client_fd = conn->fd;
- vcl_test_t test = rx_cfg->test;
+ hs_test_t test = rx_cfg->test;
if (rx_cfg->ctrl_handle == conn->fd)
{
@@ -166,39 +165,37 @@ stream_test_server_start_stop (sock_server_conn_t * conn,
snprintf (buf, sizeof (buf), "SERVER (fd %d) RESULTS",
tc->fd);
- vcl_test_stats_dump (buf, &tc->stats, 1 /* show_rx */ ,
- test == VCL_TEST_TYPE_BI
- /* show tx */ ,
+ vcl_test_stats_dump (buf, &tc->stats, 1 /* show_rx */,
+ test == HS_TEST_TYPE_BI
+ /* show tx */,
conn->cfg.verbose);
}
}
}
- vcl_test_stats_dump ("SERVER RESULTS", &conn->stats, 1 /* show_rx */ ,
- (test == VCL_TEST_TYPE_BI) /* show_tx */ ,
+ vcl_test_stats_dump ("SERVER RESULTS", &conn->stats, 1 /* show_rx */,
+ (test == HS_TEST_TYPE_BI) /* show_tx */,
conn->cfg.verbose);
- vcl_test_cfg_dump (&conn->cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (&conn->cfg, 0 /* is_client */);
if (conn->cfg.verbose)
{
- stinf (" sock server main\n"
- VCL_TEST_SEPARATOR_STRING
+ stinf (" sock server main\n" HS_TEST_SEPARATOR_STRING
" buf: %p\n"
- " buf size: %u (0x%08x)\n"
- VCL_TEST_SEPARATOR_STRING,
+ " buf size: %u (0x%08x)\n" HS_TEST_SEPARATOR_STRING,
conn->buf, conn->buf_size, conn->buf_size);
}
sync_config_and_reply (conn, rx_cfg);
- stinf ("SERVER (fd %d): %s-directional Stream Test Complete!\n"
- SOCK_TEST_BANNER_STRING "\n", conn->fd,
- test == VCL_TEST_TYPE_BI ? "Bi" : "Uni");
+ stinf ("SERVER (fd %d): %s-directional Stream Test "
+ "Complete!\n" SOCK_TEST_BANNER_STRING "\n",
+ conn->fd, test == HS_TEST_TYPE_BI ? "Bi" : "Uni");
}
else
{
stinf (SOCK_TEST_BANNER_STRING
"SERVER (fd %d): %s-directional Stream Test!\n"
" Sending client the test cfg to start streaming data...\n",
- client_fd, test == VCL_TEST_TYPE_BI ? "Bi" : "Uni");
+ client_fd, test == HS_TEST_TYPE_BI ? "Bi" : "Uni");
rx_cfg->ctrl_handle = (rx_cfg->ctrl_handle == ~0) ? conn->fd :
rx_cfg->ctrl_handle;
@@ -216,9 +213,9 @@ static inline void
stream_test_server (sock_server_conn_t * conn, int rx_bytes)
{
int client_fd = conn->fd;
- vcl_test_t test = conn->cfg.test;
+ hs_test_t test = conn->cfg.test;
- if (test == VCL_TEST_TYPE_BI)
+ if (test == HS_TEST_TYPE_BI)
(void) sock_test_write (client_fd, conn->buf, rx_bytes, &conn->stats,
conn->cfg.verbose);
@@ -373,15 +370,14 @@ sts_server_echo (sock_server_conn_t * conn, int rx_bytes)
}
static int
-sts_handle_cfg (vcl_test_cfg_t * rx_cfg, sock_server_conn_t * conn,
- int rx_bytes)
+sts_handle_cfg (hs_test_cfg_t *rx_cfg, sock_server_conn_t *conn, int rx_bytes)
{
sock_server_main_t *ssm = &sock_server_main;
if (rx_cfg->verbose)
{
stinf ("(fd %d): Received a cfg message!\n", conn->fd);
- vcl_test_cfg_dump (rx_cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (rx_cfg, 0 /* is_client */);
}
if (rx_bytes != sizeof (*rx_cfg))
@@ -393,7 +389,7 @@ sts_handle_cfg (vcl_test_cfg_t * rx_cfg, sock_server_conn_t * conn,
if (conn->cfg.verbose)
{
stinf ("(fd %d): Replying to cfg message!\n", conn->fd);
- vcl_test_cfg_dump (rx_cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (rx_cfg, 0 /* is_client */);
}
sock_test_write (conn->fd, (uint8_t *) & conn->cfg, sizeof (conn->cfg),
NULL, conn->cfg.verbose);
@@ -402,23 +398,23 @@ sts_handle_cfg (vcl_test_cfg_t * rx_cfg, sock_server_conn_t * conn,
switch (rx_cfg->test)
{
- case VCL_TEST_TYPE_NONE:
+ case HS_TEST_TYPE_NONE:
sync_config_and_reply (conn, rx_cfg);
break;
- case VCL_TEST_TYPE_ECHO:
+ case HS_TEST_TYPE_ECHO:
if (socket_server_echo_af_unix_init (ssm))
goto done;
sync_config_and_reply (conn, rx_cfg);
break;
- case VCL_TEST_TYPE_BI:
- case VCL_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
+ case HS_TEST_TYPE_UNI:
stream_test_server_start_stop (conn, rx_cfg);
break;
- case VCL_TEST_TYPE_EXIT:
+ case HS_TEST_TYPE_EXIT:
stinf ("Have a great day connection %d!", conn->fd);
close (conn->fd);
conn_pool_free (conn);
@@ -428,7 +424,7 @@ sts_handle_cfg (vcl_test_cfg_t * rx_cfg, sock_server_conn_t * conn,
default:
stinf ("ERROR: Unknown test type!\n");
- vcl_test_cfg_dump (rx_cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (rx_cfg, 0 /* is_client */);
break;
}
@@ -439,7 +435,7 @@ done:
static int
sts_conn_expect_config (sock_server_conn_t * conn)
{
- if (conn->cfg.test == VCL_TEST_TYPE_ECHO)
+ if (conn->cfg.test == HS_TEST_TYPE_ECHO)
return 1;
return (conn->stats.rx_bytes < 128
@@ -452,7 +448,7 @@ main (int argc, char **argv)
int client_fd, rv, main_rv = 0, rx_bytes, c, v, i;
sock_server_main_t *ssm = &sock_server_main;
sock_server_conn_t *conn;
- vcl_test_cfg_t *rx_cfg;
+ hs_test_cfg_t *rx_cfg;
struct sockaddr_storage servaddr;
uint16_t port = VCL_TEST_SERVER_PORT;
uint32_t servaddr_size;
@@ -605,8 +601,8 @@ main (int argc, char **argv)
if (sts_conn_expect_config (conn))
{
- rx_cfg = (vcl_test_cfg_t *) conn->buf;
- if (rx_cfg->magic == VCL_TEST_CFG_CTRL_MAGIC)
+ rx_cfg = (hs_test_cfg_t *) conn->buf;
+ if (rx_cfg->magic == HS_TEST_CFG_CTRL_MAGIC)
{
sts_handle_cfg (rx_cfg, conn, rx_bytes);
if (!ssm->nfds)
@@ -619,8 +615,8 @@ main (int argc, char **argv)
}
}
- if ((conn->cfg.test == VCL_TEST_TYPE_UNI)
- || (conn->cfg.test == VCL_TEST_TYPE_BI))
+ if ((conn->cfg.test == HS_TEST_TYPE_UNI) ||
+ (conn->cfg.test == HS_TEST_TYPE_BI))
{
stream_test_server (conn, rx_bytes);
if (ioctl (conn->fd, FIONREAD))
diff --git a/src/plugins/hs_apps/vcl/vcl_test.h b/src/plugins/hs_apps/vcl/vcl_test.h
index 4f67e03f72b..0ce27ef84e2 100644
--- a/src/plugins/hs_apps/vcl/vcl_test.h
+++ b/src/plugins/hs_apps/vcl/vcl_test.h
@@ -16,6 +16,7 @@
#ifndef __vcl_test_h__
#define __vcl_test_h__
+#include <hs_apps/hs_test.h>
#include <netdb.h>
#include <errno.h>
#include <stdlib.h>
@@ -46,67 +47,18 @@
#define vt_atomic_add(_ptr, _val) \
__atomic_fetch_add (_ptr, _val, __ATOMIC_RELEASE)
-#define VCL_TEST_TOKEN_HELP "#H"
-#define VCL_TEST_TOKEN_EXIT "#X"
-#define VCL_TEST_TOKEN_VERBOSE "#V"
-#define VCL_TEST_TOKEN_TXBUF_SIZE "#T:"
-#define VCL_TEST_TOKEN_NUM_TEST_SESS "#I:"
-#define VCL_TEST_TOKEN_NUM_WRITES "#N:"
-#define VCL_TEST_TOKEN_RXBUF_SIZE "#R:"
-#define VCL_TEST_TOKEN_SHOW_CFG "#C"
-#define VCL_TEST_TOKEN_RUN_UNI "#U"
-#define VCL_TEST_TOKEN_RUN_BI "#B"
-
#define VCL_TEST_SERVER_PORT 22000
#define VCL_TEST_LOCALHOST_IPADDR "127.0.0.1"
-#define VCL_TEST_CFG_CTRL_MAGIC 0xfeedface
-#define VCL_TEST_CFG_NUM_WRITES_DEF 1000000
-#define VCL_TEST_CFG_TXBUF_SIZE_DEF 8192
-#define VCL_TEST_CFG_RXBUF_SIZE_DEF (64*VCL_TEST_CFG_TXBUF_SIZE_DEF)
#define VCL_TEST_CFG_BUF_SIZE_MIN 128
-#define VCL_TEST_CFG_MAX_TEST_SESS 512
+#define VCL_TEST_CFG_MAX_TEST_SESS ((uint32_t) 1e6)
+#define VCL_TEST_CFG_MAX_SELECT_SESS 512
+#define VCL_TEST_CFG_INIT_TEST_SESS 512
#define VCL_TEST_CFG_MAX_EPOLL_EVENTS 16
#define VCL_TEST_CTRL_LISTENER (~0 - 1)
#define VCL_TEST_DATA_LISTENER (~0)
#define VCL_TEST_DELAY_DISCONNECT 1
-#define VCL_TEST_SEPARATOR_STRING \
- " -----------------------------\n"
-typedef enum
-{
- VCL_TEST_TYPE_NONE,
- VCL_TEST_TYPE_ECHO,
- VCL_TEST_TYPE_UNI,
- VCL_TEST_TYPE_BI,
- VCL_TEST_TYPE_EXIT,
-} vcl_test_t;
-
-typedef enum
-{
- VCL_TEST_CMD_SYNC,
- VCL_TEST_CMD_START,
- VCL_TEST_CMD_STOP,
-} vcl_test_cmd_t;
-
-typedef struct __attribute__ ((packed))
-{
- uint32_t magic;
- uint32_t seq_num;
- uint32_t test;
- uint32_t cmd;
- uint32_t ctrl_handle;
- uint32_t num_test_sessions;
- uint32_t num_test_sessions_perq;
- uint32_t num_test_qsessions;
- uint32_t verbose;
- uint32_t address_ip6;
- uint32_t transport_udp;
- uint64_t rxbuf_size;
- uint64_t txbuf_size;
- uint64_t num_writes;
- uint64_t total_bytes;
-} vcl_test_cfg_t;
typedef struct
{
@@ -124,9 +76,10 @@ typedef struct
typedef struct vcl_test_session
{
- uint8_t is_alloc;
- uint8_t is_open;
uint8_t is_done;
+ uint8_t is_alloc : 1;
+ uint8_t is_open : 1;
+ uint8_t noblk_connect : 1;
int fd;
int (*read) (struct vcl_test_session *ts, void *buf, uint32_t buflen);
int (*write) (struct vcl_test_session *ts, void *buf, uint32_t buflen);
@@ -134,10 +87,11 @@ typedef struct vcl_test_session
uint32_t rxbuf_size;
char *txbuf;
char *rxbuf;
- vcl_test_cfg_t cfg;
+ hs_test_cfg_t cfg;
vcl_test_stats_t stats;
vcl_test_stats_t old_stats;
int session_index;
+ struct vcl_test_session *next;
vppcom_endpt_t endpt;
uint8_t ip[16];
vppcom_data_segment_t ds[2];
@@ -154,7 +108,7 @@ vcl_test_worker_index (void)
typedef struct
{
- int (*init) (vcl_test_cfg_t *cfg);
+ int (*init) (hs_test_cfg_t *cfg);
int (*open) (vcl_test_session_t *ts, vppcom_endpt_t *endpt);
int (*listen) (vcl_test_session_t *ts, vppcom_endpt_t *endpt);
int (*accept) (int listen_fd, vcl_test_session_t *ts);
@@ -172,7 +126,7 @@ typedef struct
{
const vcl_test_proto_vft_t *protos[VPPCOM_PROTO_SRTP + 1];
uint32_t ckpair_index;
- vcl_test_cfg_t cfg;
+ hs_test_cfg_t cfg;
vcl_test_wrk_t *wrk;
} vcl_test_main_t;
@@ -198,37 +152,8 @@ vcl_test_stats_accumulate (vcl_test_stats_t * accum, vcl_test_stats_t * incr)
}
static inline void
-vcl_test_cfg_init (vcl_test_cfg_t * cfg)
-{
- cfg->magic = VCL_TEST_CFG_CTRL_MAGIC;
- cfg->test = VCL_TEST_TYPE_NONE;
- cfg->ctrl_handle = ~0;
- cfg->num_test_sessions = 1;
- cfg->num_test_sessions_perq = 1;
- cfg->verbose = 0;
- cfg->rxbuf_size = VCL_TEST_CFG_RXBUF_SIZE_DEF;
- cfg->num_writes = VCL_TEST_CFG_NUM_WRITES_DEF;
- cfg->txbuf_size = VCL_TEST_CFG_TXBUF_SIZE_DEF;
- cfg->total_bytes = cfg->num_writes * cfg->txbuf_size;
-}
-
-static inline int
-vcl_test_cfg_verify (vcl_test_cfg_t * cfg, vcl_test_cfg_t * valid_cfg)
-{
- /* Note: txbuf & rxbuf on server are the same buffer,
- * so txbuf_size is not included in this check.
- */
- return ((cfg->magic == valid_cfg->magic)
- && (cfg->test == valid_cfg->test)
- && (cfg->verbose == valid_cfg->verbose)
- && (cfg->rxbuf_size == valid_cfg->rxbuf_size)
- && (cfg->num_writes == valid_cfg->num_writes)
- && (cfg->total_bytes == valid_cfg->total_bytes));
-}
-
-static inline void
-vcl_test_buf_alloc (vcl_test_cfg_t * cfg, uint8_t is_rxbuf, uint8_t ** buf,
- uint32_t * bufsize)
+vcl_test_buf_alloc (hs_test_cfg_t *cfg, uint8_t is_rxbuf, uint8_t **buf,
+ uint32_t *bufsize)
{
uint32_t alloc_size = is_rxbuf ? cfg->rxbuf_size : cfg->txbuf_size;
uint8_t *lb = realloc (*buf, (size_t) alloc_size);
@@ -269,69 +194,6 @@ vcl_test_session_buf_free (vcl_test_session_t *ts)
ts->txbuf = 0;
}
-static inline char *
-vcl_test_type_str (vcl_test_t t)
-{
- switch (t)
- {
- case VCL_TEST_TYPE_NONE:
- return "NONE";
-
- case VCL_TEST_TYPE_ECHO:
- return "ECHO";
-
- case VCL_TEST_TYPE_UNI:
- return "UNI";
-
- case VCL_TEST_TYPE_BI:
- return "BI";
-
- case VCL_TEST_TYPE_EXIT:
- return "EXIT";
-
- default:
- return "Unknown";
- }
-}
-
-static inline void
-vcl_test_cfg_dump (vcl_test_cfg_t * cfg, uint8_t is_client)
-{
- char *spc = " ";
-
- printf (" test config (%p):\n"
- VCL_TEST_SEPARATOR_STRING
- " magic: 0x%08x\n"
- " seq_num: 0x%08x\n"
- "%-5s test: %s (%d)\n"
- " ctrl handle: %d (0x%x)\n"
- "%-5s num test sockets: %u (0x%08x)\n"
- "%-5s verbose: %s (%d)\n"
- "%-5s rxbuf size: %lu (0x%08lx)\n"
- "%-5s txbuf size: %lu (0x%08lx)\n"
- "%-5s num writes: %lu (0x%08lx)\n"
- " client tx bytes: %lu (0x%08lx)\n"
- VCL_TEST_SEPARATOR_STRING,
- (void *) cfg, cfg->magic, cfg->seq_num,
- is_client && (cfg->test == VCL_TEST_TYPE_UNI) ?
- "'" VCL_TEST_TOKEN_RUN_UNI "'" :
- is_client && (cfg->test == VCL_TEST_TYPE_BI) ?
- "'" VCL_TEST_TOKEN_RUN_BI "'" : spc,
- vcl_test_type_str (cfg->test), cfg->test,
- cfg->ctrl_handle, cfg->ctrl_handle,
- is_client ? "'" VCL_TEST_TOKEN_NUM_TEST_SESS "'" : spc,
- cfg->num_test_sessions, cfg->num_test_sessions,
- is_client ? "'" VCL_TEST_TOKEN_VERBOSE "'" : spc,
- cfg->verbose ? "on" : "off", cfg->verbose,
- is_client ? "'" VCL_TEST_TOKEN_RXBUF_SIZE "'" : spc,
- cfg->rxbuf_size, cfg->rxbuf_size,
- is_client ? "'" VCL_TEST_TOKEN_TXBUF_SIZE "'" : spc,
- cfg->txbuf_size, cfg->txbuf_size,
- is_client ? "'" VCL_TEST_TOKEN_NUM_WRITES "'" : spc,
- cfg->num_writes, cfg->num_writes,
- cfg->total_bytes, cfg->total_bytes);
-}
-
static inline void
vcl_test_stats_dump (char *header, vcl_test_stats_t * stats,
uint8_t show_rx, uint8_t show_tx, uint8_t verbose)
@@ -361,31 +223,27 @@ vcl_test_stats_dump (char *header, vcl_test_stats_t * stats,
if (show_tx)
{
- printf (VCL_TEST_SEPARATOR_STRING
- " tx stats (0x%p):\n"
- VCL_TEST_SEPARATOR_STRING
+ printf (HS_TEST_SEPARATOR_STRING
+ " tx stats (0x%p):\n" HS_TEST_SEPARATOR_STRING
" writes: %lu (0x%08lx)\n"
" tx bytes: %lu (0x%08lx)\n"
" tx eagain: %u (0x%08x)\n"
" tx incomplete: %u (0x%08x)\n",
(void *) stats, stats->tx_xacts, stats->tx_xacts,
- stats->tx_bytes, stats->tx_bytes,
- stats->tx_eagain, stats->tx_eagain,
- stats->tx_incomp, stats->tx_incomp);
+ stats->tx_bytes, stats->tx_bytes, stats->tx_eagain,
+ stats->tx_eagain, stats->tx_incomp, stats->tx_incomp);
}
if (show_rx)
{
- printf (VCL_TEST_SEPARATOR_STRING
- " rx stats (0x%p):\n"
- VCL_TEST_SEPARATOR_STRING
+ printf (HS_TEST_SEPARATOR_STRING
+ " rx stats (0x%p):\n" HS_TEST_SEPARATOR_STRING
" reads: %lu (0x%08lx)\n"
" rx bytes: %lu (0x%08lx)\n"
" rx eagain: %u (0x%08x)\n"
" rx incomplete: %u (0x%08x)\n",
(void *) stats, stats->rx_xacts, stats->rx_xacts,
- stats->rx_bytes, stats->rx_bytes,
- stats->rx_eagain, stats->rx_eagain,
- stats->rx_incomp, stats->rx_incomp);
+ stats->rx_bytes, stats->rx_bytes, stats->rx_eagain,
+ stats->rx_eagain, stats->rx_incomp, stats->rx_incomp);
}
if (verbose)
printf (" start.tv_sec: %ld\n"
@@ -395,7 +253,7 @@ vcl_test_stats_dump (char *header, vcl_test_stats_t * stats,
stats->start.tv_sec, stats->start.tv_nsec,
stats->stop.tv_sec, stats->stop.tv_nsec);
- printf (VCL_TEST_SEPARATOR_STRING);
+ printf (HS_TEST_SEPARATOR_STRING);
}
static inline double
@@ -567,25 +425,18 @@ dump_help (void)
{
#define INDENT "\n "
- printf ("CLIENT: Test configuration commands:"
- INDENT VCL_TEST_TOKEN_HELP
- "\t\t\tDisplay help."
- INDENT VCL_TEST_TOKEN_EXIT
- "\t\t\tExit test client & server."
- INDENT VCL_TEST_TOKEN_SHOW_CFG
- "\t\t\tShow the current test cfg."
- INDENT VCL_TEST_TOKEN_RUN_UNI
- "\t\t\tRun the Uni-directional test."
- INDENT VCL_TEST_TOKEN_RUN_BI
- "\t\t\tRun the Bi-directional test."
- INDENT VCL_TEST_TOKEN_VERBOSE
- "\t\t\tToggle verbose setting."
- INDENT VCL_TEST_TOKEN_RXBUF_SIZE
- "<rxbuf size>\tRx buffer size (bytes)."
- INDENT VCL_TEST_TOKEN_TXBUF_SIZE
- "<txbuf size>\tTx buffer size (bytes)."
- INDENT VCL_TEST_TOKEN_NUM_WRITES
- "<# of writes>\tNumber of txbuf writes to server." "\n");
+ printf (
+ "CLIENT: Test configuration commands:" INDENT VCL_TEST_TOKEN_HELP
+ "\t\t\tDisplay help." INDENT VCL_TEST_TOKEN_EXIT
+ "\t\t\tExit test client & server." INDENT VCL_TEST_TOKEN_SHOW_CFG
+ "\t\t\tShow the current test cfg." INDENT HS_TEST_TOKEN_RUN_UNI
+ "\t\t\tRun the Uni-directional test." INDENT HS_TEST_TOKEN_RUN_BI
+ "\t\t\tRun the Bi-directional test." INDENT VCL_TEST_TOKEN_VERBOSE
+ "\t\t\tToggle verbose setting." INDENT VCL_TEST_TOKEN_RXBUF_SIZE
+ "<rxbuf size>\tRx buffer size (bytes)." INDENT VCL_TEST_TOKEN_TXBUF_SIZE
+ "<txbuf size>\tTx buffer size (bytes)." INDENT VCL_TEST_TOKEN_NUM_WRITES
+ "<# of writes>\tNumber of txbuf writes to server."
+ "\n");
}
#endif /* __vcl_test_h__ */
diff --git a/src/plugins/hs_apps/vcl/vcl_test_client.c b/src/plugins/hs_apps/vcl/vcl_test_client.c
index 4a9fb46e5b8..a4a10b562ff 100644
--- a/src/plugins/hs_apps/vcl/vcl_test_client.c
+++ b/src/plugins/hs_apps/vcl/vcl_test_client.c
@@ -26,18 +26,34 @@
#include <pthread.h>
#include <signal.h>
-typedef struct
+typedef struct vtc_worker_ vcl_test_client_worker_t;
+typedef int (vtc_worker_run_fn) (vcl_test_client_worker_t *wrk);
+
+struct vtc_worker_
{
vcl_test_session_t *sessions;
vcl_test_session_t *qsessions;
uint32_t n_sessions;
uint32_t wrk_index;
- fd_set wr_fdset;
- fd_set rd_fdset;
- int max_fd_index;
+ union
+ {
+ struct
+ {
+ fd_set wr_fdset;
+ fd_set rd_fdset;
+ int max_fd_index;
+ };
+ struct
+ {
+ uint32_t epoll_sh;
+ struct epoll_event ep_evts[VCL_TEST_CFG_MAX_EPOLL_EVENTS];
+ vcl_test_session_t *next_to_send;
+ };
+ };
pthread_t thread_handle;
- vcl_test_cfg_t cfg;
-} vcl_test_client_worker_t;
+ vtc_worker_run_fn *wrk_run_fn;
+ hs_test_cfg_t cfg;
+};
typedef struct
{
@@ -46,13 +62,17 @@ typedef struct
vppcom_endpt_t server_endpt;
uint32_t cfg_seq_num;
uint8_t dump_cfg;
- vcl_test_t post_test;
+ hs_test_t post_test;
uint8_t proto;
uint8_t incremental_stats;
uint32_t n_workers;
volatile int active_workers;
volatile int test_running;
- struct sockaddr_storage server_addr;
+ union
+ {
+ struct in_addr v4;
+ struct in6_addr v6;
+ } server_addr;
} vcl_test_client_main_t;
vcl_test_client_main_t vcl_client_main;
@@ -65,14 +85,14 @@ vcl_test_main_t vcl_test_main;
static int
vtc_cfg_sync (vcl_test_session_t * ts)
{
- vcl_test_cfg_t *rx_cfg = (vcl_test_cfg_t *) ts->rxbuf;
+ hs_test_cfg_t *rx_cfg = (hs_test_cfg_t *) ts->rxbuf;
int rx_bytes, tx_bytes;
vt_atomic_add (&ts->cfg.seq_num, 1);
if (ts->cfg.verbose)
{
vtinf ("(fd %d): Sending config to server.", ts->fd);
- vcl_test_cfg_dump (&ts->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ts->cfg, 1 /* is_client */);
}
tx_bytes = ts->write (ts, &ts->cfg, sizeof (ts->cfg));
if (tx_bytes < 0)
@@ -81,50 +101,48 @@ vtc_cfg_sync (vcl_test_session_t * ts)
return tx_bytes;
}
- rx_bytes = ts->read (ts, ts->rxbuf, sizeof (vcl_test_cfg_t));
+ rx_bytes = ts->read (ts, ts->rxbuf, sizeof (hs_test_cfg_t));
if (rx_bytes < 0)
return rx_bytes;
- if (rx_cfg->magic != VCL_TEST_CFG_CTRL_MAGIC)
+ if (rx_cfg->magic != HS_TEST_CFG_CTRL_MAGIC)
{
vtwrn ("(fd %d): Bad server reply cfg -- aborting!", ts->fd);
return -1;
}
- if ((rx_bytes != sizeof (vcl_test_cfg_t))
- || !vcl_test_cfg_verify (rx_cfg, &ts->cfg))
+ if ((rx_bytes != sizeof (hs_test_cfg_t)) ||
+ !hs_test_cfg_verify (rx_cfg, &ts->cfg))
{
vtwrn ("(fd %d): Invalid config received from server!", ts->fd);
- if (rx_bytes != sizeof (vcl_test_cfg_t))
+ if (rx_bytes != sizeof (hs_test_cfg_t))
{
vtinf ("\tRx bytes %d != cfg size %lu", rx_bytes,
- sizeof (vcl_test_cfg_t));
+ sizeof (hs_test_cfg_t));
}
else
{
- vcl_test_cfg_dump (rx_cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (rx_cfg, 1 /* is_client */);
vtinf ("(fd %d): Valid config sent to server.", ts->fd);
- vcl_test_cfg_dump (&ts->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ts->cfg, 1 /* is_client */);
}
return -1;
}
if (ts->cfg.verbose)
{
vtinf ("(fd %d): Got config back from server.", ts->fd);
- vcl_test_cfg_dump (rx_cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (rx_cfg, 1 /* is_client */);
}
return 0;
}
static int
-vtc_connect_test_sessions (vcl_test_client_worker_t * wrk)
+vtc_worker_alloc_sessions (vcl_test_client_worker_t *wrk)
{
- vcl_test_client_main_t *vcm = &vcl_client_main;
- vcl_test_main_t *vt = &vcl_test_main;
- const vcl_test_proto_vft_t *tp;
vcl_test_session_t *ts;
uint32_t n_test_sessions;
- int i, rv;
+ struct timespec now;
+ int i, j;
n_test_sessions = wrk->cfg.num_test_sessions;
if (n_test_sessions < 1)
@@ -148,62 +166,33 @@ vtc_connect_test_sessions (vcl_test_client_worker_t * wrk)
return errno;
}
- tp = vt->protos[vcm->proto];
+ clock_gettime (CLOCK_REALTIME, &now);
for (i = 0; i < n_test_sessions; i++)
{
ts = &wrk->sessions[i];
memset (ts, 0, sizeof (*ts));
ts->session_index = i;
+ ts->old_stats.stop = now;
ts->cfg = wrk->cfg;
vcl_test_session_buf_alloc (ts);
- rv = tp->open (&wrk->sessions[i], &vcm->server_endpt);
- if (rv < 0)
- return rv;
- }
- wrk->n_sessions = n_test_sessions;
-
-done:
- vtinf ("All test sessions (%d) connected!", n_test_sessions);
- return 0;
-}
-
-static int
-vtc_worker_test_setup (vcl_test_client_worker_t * wrk)
-{
- vcl_test_cfg_t *cfg = &wrk->cfg;
- vcl_test_session_t *ts;
- struct timespec now;
- uint32_t sidx;
- int i, j;
-
- FD_ZERO (&wrk->wr_fdset);
- FD_ZERO (&wrk->rd_fdset);
-
- clock_gettime (CLOCK_REALTIME, &now);
-
- for (i = 0; i < cfg->num_test_sessions; i++)
- {
- ts = &wrk->sessions[i];
- ts->old_stats.stop = now;
- switch (cfg->test)
+ switch (ts->cfg.test)
{
- case VCL_TEST_TYPE_UNI:
- case VCL_TEST_TYPE_BI:
+ case HS_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
for (j = 0; j < ts->txbuf_size; j++)
ts->txbuf[j] = j & 0xff;
break;
default:
break;
}
-
- FD_SET (vppcom_session_index (ts->fd), &wrk->wr_fdset);
- FD_SET (vppcom_session_index (ts->fd), &wrk->rd_fdset);
- sidx = vppcom_session_index (ts->fd);
- wrk->max_fd_index = vtc_max (sidx, wrk->max_fd_index);
}
- wrk->max_fd_index += 1;
+ wrk->n_sessions = n_test_sessions;
+
+done:
+
+ vtinf ("All test sessions (%d) initialized!", n_test_sessions);
return 0;
}
@@ -227,16 +216,13 @@ vtc_worker_init (vcl_test_client_worker_t * wrk)
}
vt_atomic_add (&vcm->active_workers, 1);
}
- rv = vtc_connect_test_sessions (wrk);
+ rv = vtc_worker_alloc_sessions (wrk);
if (rv)
{
- vterr ("vtc_connect_test_sessions ()", rv);
+ vterr ("vtc_worker_alloc_sessions ()", rv);
return rv;
}
- if (vtc_worker_test_setup (wrk))
- return -1;
-
return 0;
}
@@ -253,8 +239,7 @@ vtc_accumulate_stats (vcl_test_client_worker_t * wrk,
while (__sync_lock_test_and_set (&stats_lock, 1))
;
- if (ctrl->cfg.test == VCL_TEST_TYPE_BI
- || ctrl->cfg.test == VCL_TEST_TYPE_ECHO)
+ if (ctrl->cfg.test == HS_TEST_TYPE_BI || ctrl->cfg.test == HS_TEST_TYPE_ECHO)
show_rx = 1;
for (i = 0; i < wrk->cfg.num_test_sessions; i++)
@@ -308,32 +293,90 @@ vtc_inc_stats_check (vcl_test_session_t *ts)
}
}
-static void *
-vtc_worker_loop (void *arg)
+static void
+vtc_worker_start_transfer (vcl_test_client_worker_t *wrk)
+{
+ vtinf ("Worker %u starting transfer ...", wrk->wrk_index);
+
+ if (wrk->wrk_index == 0)
+ {
+ vcl_test_client_main_t *vcm = &vcl_client_main;
+ vcl_test_session_t *ctrl = &vcm->ctrl_session;
+
+ clock_gettime (CLOCK_REALTIME, &ctrl->stats.start);
+ }
+}
+
+static int
+vtc_session_check_is_done (vcl_test_session_t *ts, uint8_t check_rx)
+{
+ if ((!check_rx && ts->stats.tx_bytes >= ts->cfg.total_bytes) ||
+ (check_rx && ts->stats.rx_bytes >= ts->cfg.total_bytes))
+ {
+ clock_gettime (CLOCK_REALTIME, &ts->stats.stop);
+ ts->is_done = 1;
+ return 1;
+ }
+ return 0;
+}
+
+static int
+vtc_worker_connect_sessions_select (vcl_test_client_worker_t *wrk)
+{
+ vcl_test_client_main_t *vcm = &vcl_client_main;
+ vcl_test_main_t *vt = &vcl_test_main;
+ const vcl_test_proto_vft_t *tp;
+ vcl_test_session_t *ts;
+ uint32_t sidx;
+ int i, rv;
+
+ tp = vt->protos[vcm->proto];
+
+ FD_ZERO (&wrk->wr_fdset);
+ FD_ZERO (&wrk->rd_fdset);
+
+ for (i = 0; i < wrk->cfg.num_test_sessions; i++)
+ {
+ ts = &wrk->sessions[i];
+
+ rv = tp->open (&wrk->sessions[i], &vcm->server_endpt);
+ if (rv < 0)
+ return rv;
+
+ FD_SET (vppcom_session_index (ts->fd), &wrk->wr_fdset);
+ FD_SET (vppcom_session_index (ts->fd), &wrk->rd_fdset);
+ sidx = vppcom_session_index (ts->fd);
+ wrk->max_fd_index = vtc_max (sidx, wrk->max_fd_index);
+ }
+ wrk->max_fd_index += 1;
+
+ vtinf ("All test sessions (%d) connected!", wrk->cfg.num_test_sessions);
+
+ return 0;
+}
+
+static int
+vtc_worker_run_select (vcl_test_client_worker_t *wrk)
{
vcl_test_client_main_t *vcm = &vcl_client_main;
- vcl_test_session_t *ctrl = &vcm->ctrl_session;
- vcl_test_client_worker_t *wrk = arg;
- uint32_t n_active_sessions;
fd_set _wfdset, *wfdset = &_wfdset;
fd_set _rfdset, *rfdset = &_rfdset;
+ uint32_t n_active_sessions;
vcl_test_session_t *ts;
int i, rv, check_rx = 0;
- rv = vtc_worker_init (wrk);
+ rv = vtc_worker_connect_sessions_select (wrk);
if (rv)
{
- vterr ("vtc_worker_init()", rv);
- return 0;
+ vterr ("vtc_worker_connect_sessions()", rv);
+ return rv;
}
- vtinf ("Starting test ...");
+ check_rx = wrk->cfg.test != HS_TEST_TYPE_UNI;
+ n_active_sessions = wrk->cfg.num_test_sessions;
- if (wrk->wrk_index == 0)
- clock_gettime (CLOCK_REALTIME, &ctrl->stats.start);
+ vtc_worker_start_transfer (wrk);
- check_rx = wrk->cfg.test != VCL_TEST_TYPE_UNI;
- n_active_sessions = wrk->cfg.num_test_sessions;
while (n_active_sessions && vcm->test_running)
{
_wfdset = wrk->wr_fdset;
@@ -344,7 +387,7 @@ vtc_worker_loop (void *arg)
if (rv < 0)
{
vterr ("vppcom_select()", rv);
- goto exit;
+ break;
}
else if (rv == 0)
continue;
@@ -355,29 +398,29 @@ vtc_worker_loop (void *arg)
if (ts->is_done)
continue;
- if (FD_ISSET (vppcom_session_index (ts->fd), rfdset)
- && ts->stats.rx_bytes < ts->cfg.total_bytes)
+ if (FD_ISSET (vppcom_session_index (ts->fd), rfdset) &&
+ ts->stats.rx_bytes < ts->cfg.total_bytes)
{
rv = ts->read (ts, ts->rxbuf, ts->rxbuf_size);
if (rv < 0)
- goto exit;
+ break;
}
- if (FD_ISSET (vppcom_session_index (ts->fd), wfdset)
- && ts->stats.tx_bytes < ts->cfg.total_bytes)
+ if (FD_ISSET (vppcom_session_index (ts->fd), wfdset) &&
+ ts->stats.tx_bytes < ts->cfg.total_bytes)
{
rv = ts->write (ts, ts->txbuf, ts->cfg.txbuf_size);
if (rv < 0)
{
vtwrn ("vppcom_test_write (%d) failed -- aborting test",
ts->fd);
- goto exit;
+ break;
}
if (vcm->incremental_stats)
vtc_inc_stats_check (ts);
}
- if ((!check_rx && ts->stats.tx_bytes >= ts->cfg.total_bytes)
- || (check_rx && ts->stats.rx_bytes >= ts->cfg.total_bytes))
+ if ((!check_rx && ts->stats.tx_bytes >= ts->cfg.total_bytes) ||
+ (check_rx && ts->stats.rx_bytes >= ts->cfg.total_bytes))
{
clock_gettime (CLOCK_REALTIME, &ts->stats.stop);
ts->is_done = 1;
@@ -385,59 +428,343 @@ vtc_worker_loop (void *arg)
}
}
}
-exit:
+
+ return 0;
+}
+
+static void
+vtc_worker_epoll_send_add (vcl_test_client_worker_t *wrk,
+ vcl_test_session_t *ts)
+{
+ if (!wrk->next_to_send)
+ {
+ wrk->next_to_send = ts;
+ }
+ else
+ {
+ ts->next = wrk->next_to_send;
+ wrk->next_to_send = ts->next;
+ }
+}
+
+static void
+vtc_worker_epoll_send_del (vcl_test_client_worker_t *wrk,
+ vcl_test_session_t *ts, vcl_test_session_t *prev)
+{
+ if (!prev)
+ {
+ wrk->next_to_send = ts->next;
+ }
+ else
+ {
+ prev->next = ts->next;
+ }
+}
+
+static int
+vtc_worker_connect_sessions_epoll (vcl_test_client_worker_t *wrk)
+{
+ vcl_test_client_main_t *vcm = &vcl_client_main;
+ vcl_test_main_t *vt = &vcl_test_main;
+ const vcl_test_proto_vft_t *tp;
+ struct timespec start, end;
+ uint32_t n_connected = 0;
+ vcl_test_session_t *ts;
+ struct epoll_event ev;
+ int i, ci = 0, rv, n_ev;
+ double diff;
+
+ tp = vt->protos[vcm->proto];
+ wrk->epoll_sh = vppcom_epoll_create ();
+
+ ev.events = EPOLLET | EPOLLOUT;
+
+ clock_gettime (CLOCK_REALTIME, &start);
+
+ while (n_connected < wrk->cfg.num_test_sessions)
+ {
+ /*
+ * Try to connect more sessions if under pending threshold
+ */
+ while ((ci - n_connected) < 16 && ci < wrk->cfg.num_test_sessions)
+ {
+ ts = &wrk->sessions[ci];
+ ts->noblk_connect = 1;
+ rv = tp->open (&wrk->sessions[ci], &vcm->server_endpt);
+ if (rv < 0)
+ {
+ vtwrn ("open: %d", rv);
+ return rv;
+ }
+
+ ev.data.u64 = ci;
+ rv = vppcom_epoll_ctl (wrk->epoll_sh, EPOLL_CTL_ADD, ts->fd, &ev);
+ if (rv < 0)
+ {
+ vtwrn ("vppcom_epoll_ctl: %d", rv);
+ return rv;
+ }
+ ci += 1;
+ }
+
+ /*
+ * Handle connected events
+ */
+ n_ev =
+ vppcom_epoll_wait (wrk->epoll_sh, wrk->ep_evts,
+ VCL_TEST_CFG_MAX_EPOLL_EVENTS, 0 /* timeout */);
+ if (n_ev < 0)
+ {
+ vterr ("vppcom_epoll_wait() returned", n_ev);
+ return -1;
+ }
+ else if (n_ev == 0)
+ {
+ continue;
+ }
+
+ for (i = 0; i < n_ev; i++)
+ {
+ ts = &wrk->sessions[wrk->ep_evts[i].data.u32];
+ if (!(wrk->ep_evts[i].events & EPOLLOUT))
+ {
+ vtwrn ("connect failed");
+ return -1;
+ }
+ if (ts->is_open)
+ {
+ vtwrn ("connection already open?");
+ return -1;
+ }
+ ts->is_open = 1;
+ n_connected += 1;
+ }
+ }
+
+ clock_gettime (CLOCK_REALTIME, &end);
+
+ diff = vcl_test_time_diff (&start, &end);
+ vtinf ("Connected (%u) connected in %.2f seconds (%u CPS)!",
+ wrk->cfg.num_test_sessions, diff,
+ (uint32_t) ((double) wrk->cfg.num_test_sessions / diff));
+
+ ev.events = EPOLLET | EPOLLIN | EPOLLOUT;
+
+ for (i = 0; i < wrk->cfg.num_test_sessions; i++)
+ {
+ ts = &wrk->sessions[i];
+
+ /* No data to be sent */
+ if (ts->cfg.total_bytes == 0)
+ {
+ n_connected -= 1;
+ clock_gettime (CLOCK_REALTIME, &ts->stats.stop);
+ ts->is_done = 1;
+ continue;
+ }
+
+ ev.data.u64 = i;
+ rv = vppcom_epoll_ctl (wrk->epoll_sh, EPOLL_CTL_MOD, ts->fd, &ev);
+ if (rv < 0)
+ {
+ vtwrn ("vppcom_epoll_ctl: %d", rv);
+ return rv;
+ }
+ vtc_worker_epoll_send_add (wrk, ts);
+ }
+
+ return n_connected;
+}
+
+static int
+vtc_worker_run_epoll (vcl_test_client_worker_t *wrk)
+{
+ vcl_test_client_main_t *vcm = &vcl_client_main;
+ uint32_t n_active_sessions, max_writes = 16, n_writes = 0;
+ vcl_test_session_t *ts, *prev = 0;
+ int i, rv, check_rx = 0, n_ev;
+
+ rv = vtc_worker_connect_sessions_epoll (wrk);
+ if (rv < 0)
+ {
+ vterr ("vtc_worker_connect_sessions()", rv);
+ return rv;
+ }
+
+ n_active_sessions = rv;
+ check_rx = wrk->cfg.test != HS_TEST_TYPE_UNI;
+
+ vtc_worker_start_transfer (wrk);
+ ts = wrk->next_to_send;
+
+ while (n_active_sessions && vcm->test_running)
+ {
+ /*
+ * Try to write
+ */
+ if (!ts)
+ {
+ ts = wrk->next_to_send;
+ if (!ts)
+ goto get_epoll_evts;
+ }
+
+ rv = ts->write (ts, ts->txbuf, ts->cfg.txbuf_size);
+ if (rv > 0)
+ {
+ if (vcm->incremental_stats)
+ vtc_inc_stats_check (ts);
+ if (vtc_session_check_is_done (ts, check_rx))
+ n_active_sessions -= 1;
+ }
+ else if (rv == 0)
+ {
+ vtc_worker_epoll_send_del (wrk, ts, prev);
+ }
+ else
+ {
+ vtwrn ("vppcom_test_write (%d) failed -- aborting test", ts->fd);
+ return -1;
+ }
+ prev = ts;
+ ts = ts->next;
+ n_writes += 1;
+
+ if (rv > 0 && n_writes < max_writes)
+ continue;
+
+ get_epoll_evts:
+
+ /*
+ * Grab new events
+ */
+
+ n_ev =
+ vppcom_epoll_wait (wrk->epoll_sh, wrk->ep_evts,
+ VCL_TEST_CFG_MAX_EPOLL_EVENTS, 0 /* timeout */);
+ if (n_ev < 0)
+ {
+ vterr ("vppcom_epoll_wait()", n_ev);
+ break;
+ }
+ else if (n_ev == 0)
+ {
+ continue;
+ }
+
+ for (i = 0; i < n_ev; i++)
+ {
+ ts = &wrk->sessions[wrk->ep_evts[i].data.u32];
+
+ if (ts->is_done)
+ continue;
+
+ if (wrk->ep_evts[i].events & (EPOLLERR | EPOLLHUP | EPOLLRDHUP))
+ {
+ vtinf ("%u finished before reading all data?", ts->fd);
+ break;
+ }
+ if ((wrk->ep_evts[i].events & EPOLLIN) &&
+ ts->stats.rx_bytes < ts->cfg.total_bytes)
+ {
+ rv = ts->read (ts, ts->rxbuf, ts->rxbuf_size);
+ if (rv < 0)
+ break;
+ if (vtc_session_check_is_done (ts, check_rx))
+ n_active_sessions -= 1;
+ }
+ if ((wrk->ep_evts[i].events & EPOLLOUT) &&
+ ts->stats.tx_bytes < ts->cfg.total_bytes)
+ {
+ vtc_worker_epoll_send_add (wrk, ts);
+ }
+ }
+
+ n_writes = 0;
+ }
+
+ return 0;
+}
+
+static inline int
+vtc_worker_run (vcl_test_client_worker_t *wrk)
+{
+ int rv;
+
+ vtinf ("Worker %u starting test ...", wrk->wrk_index);
+
+ rv = wrk->wrk_run_fn (wrk);
+
vtinf ("Worker %d done ...", wrk->wrk_index);
+
+ return rv;
+}
+
+static void *
+vtc_worker_loop (void *arg)
+{
+ vcl_test_client_main_t *vcm = &vcl_client_main;
+ vcl_test_session_t *ctrl = &vcm->ctrl_session;
+ vcl_test_client_worker_t *wrk = arg;
+
+ if (vtc_worker_init (wrk))
+ goto done;
+
+ if (vtc_worker_run (wrk))
+ goto done;
+
vtc_accumulate_stats (wrk, ctrl);
sleep (VCL_TEST_DELAY_DISCONNECT);
vtc_worker_sessions_exit (wrk);
+
+done:
+
if (wrk->wrk_index)
vt_atomic_add (&vcm->active_workers, -1);
+
return 0;
}
static void
vtc_print_stats (vcl_test_session_t * ctrl)
{
- int is_echo = ctrl->cfg.test == VCL_TEST_TYPE_ECHO;
+ int is_echo = ctrl->cfg.test == HS_TEST_TYPE_ECHO;
int show_rx = 0;
char buf[64];
- if (ctrl->cfg.test == VCL_TEST_TYPE_BI
- || ctrl->cfg.test == VCL_TEST_TYPE_ECHO)
+ if (ctrl->cfg.test == HS_TEST_TYPE_BI || ctrl->cfg.test == HS_TEST_TYPE_ECHO)
show_rx = 1;
vcl_test_stats_dump ("CLIENT RESULTS", &ctrl->stats,
show_rx, 1 /* show tx */ ,
ctrl->cfg.verbose);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
if (ctrl->cfg.verbose)
{
- vtinf (" ctrl session info\n"
- VCL_TEST_SEPARATOR_STRING
+ vtinf (" ctrl session info\n" HS_TEST_SEPARATOR_STRING
" fd: %d (0x%08x)\n"
" rxbuf: %p\n"
" rxbuf size: %u (0x%08x)\n"
" txbuf: %p\n"
- " txbuf size: %u (0x%08x)\n"
- VCL_TEST_SEPARATOR_STRING,
- ctrl->fd, (uint32_t) ctrl->fd,
- ctrl->rxbuf, ctrl->rxbuf_size, ctrl->rxbuf_size,
- ctrl->txbuf, ctrl->txbuf_size, ctrl->txbuf_size);
+ " txbuf size: %u (0x%08x)\n" HS_TEST_SEPARATOR_STRING,
+ ctrl->fd, (uint32_t) ctrl->fd, ctrl->rxbuf, ctrl->rxbuf_size,
+ ctrl->rxbuf_size, ctrl->txbuf, ctrl->txbuf_size,
+ ctrl->txbuf_size);
}
if (is_echo)
snprintf (buf, sizeof (buf), "Echo");
else
snprintf (buf, sizeof (buf), "%s-directional Stream",
- ctrl->cfg.test == VCL_TEST_TYPE_BI ? "Bi" : "Uni");
+ ctrl->cfg.test == HS_TEST_TYPE_BI ? "Bi" : "Uni");
}
static void
vtc_echo_client (vcl_test_client_main_t * vcm)
{
vcl_test_session_t *ctrl = &vcm->ctrl_session;
- vcl_test_cfg_t *cfg = &ctrl->cfg;
+ hs_test_cfg_t *cfg = &ctrl->cfg;
int rv;
cfg->total_bytes = strlen (ctrl->txbuf) + 1;
@@ -457,12 +784,12 @@ static void
vtc_stream_client (vcl_test_client_main_t * vcm)
{
vcl_test_session_t *ctrl = &vcm->ctrl_session;
- vcl_test_cfg_t *cfg = &ctrl->cfg;
+ hs_test_cfg_t *cfg = &ctrl->cfg;
vcl_test_client_worker_t *wrk;
uint32_t i, n_conn, n_conn_per_wrk;
vtinf ("%s-directional Stream Test Starting!",
- ctrl->cfg.test == VCL_TEST_TYPE_BI ? "Bi" : "Uni");
+ ctrl->cfg.test == HS_TEST_TYPE_BI ? "Bi" : "Uni");
memset (&ctrl->stats, 0, sizeof (vcl_test_stats_t));
cfg->total_bytes = cfg->num_writes * cfg->txbuf_size;
@@ -480,7 +807,7 @@ vtc_stream_client (vcl_test_client_main_t * vcm)
}
vcm->test_running = 1;
- ctrl->cfg.cmd = VCL_TEST_CMD_START;
+ ctrl->cfg.cmd = HS_TEST_CMD_START;
if (vtc_cfg_sync (ctrl))
{
vtwrn ("test cfg sync failed -- aborting!");
@@ -490,8 +817,12 @@ vtc_stream_client (vcl_test_client_main_t * vcm)
for (i = 1; i < vcm->n_workers; i++)
{
wrk = &vcm->workers[i];
- pthread_create (&wrk->thread_handle, NULL, vtc_worker_loop,
- (void *) wrk);
+ if (pthread_create (&wrk->thread_handle, NULL, vtc_worker_loop,
+ (void *) wrk))
+ {
+ vtwrn ("pthread_create failed -- aborting!");
+ return;
+ }
}
vtc_worker_loop (&vcm->workers[0]);
@@ -499,7 +830,7 @@ vtc_stream_client (vcl_test_client_main_t * vcm)
;
vtinf ("Sending config on ctrl session (fd %d) for stats...", ctrl->fd);
- ctrl->cfg.cmd = VCL_TEST_CMD_STOP;
+ ctrl->cfg.cmd = HS_TEST_CMD_STOP;
if (vtc_cfg_sync (ctrl))
{
vtwrn ("test cfg sync failed -- aborting!");
@@ -508,8 +839,8 @@ vtc_stream_client (vcl_test_client_main_t * vcm)
vtc_print_stats (ctrl);
- ctrl->cfg.cmd = VCL_TEST_CMD_SYNC;
- ctrl->cfg.test = VCL_TEST_TYPE_ECHO;
+ ctrl->cfg.cmd = HS_TEST_CMD_SYNC;
+ ctrl->cfg.test = HS_TEST_TYPE_ECHO;
ctrl->cfg.total_bytes = 0;
if (vtc_cfg_sync (ctrl))
vtwrn ("post-test cfg sync failed!");
@@ -529,7 +860,7 @@ cfg_txbuf_size_set (void)
ctrl->cfg.total_bytes = ctrl->cfg.num_writes * ctrl->cfg.txbuf_size;
vcl_test_buf_alloc (&ctrl->cfg, 0 /* is_rxbuf */ ,
(uint8_t **) & ctrl->txbuf, &ctrl->txbuf_size);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
else
vtwrn ("Invalid txbuf size (%lu) < minimum buf size (%u)!",
@@ -548,7 +879,7 @@ cfg_num_writes_set (void)
{
ctrl->cfg.num_writes = num_writes;
ctrl->cfg.total_bytes = ctrl->cfg.num_writes * ctrl->cfg.txbuf_size;
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
else
{
@@ -568,7 +899,7 @@ cfg_num_test_sessions_set (void)
(num_test_sessions <= VCL_TEST_CFG_MAX_TEST_SESS))
{
ctrl->cfg.num_test_sessions = num_test_sessions;
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
else
{
@@ -590,7 +921,7 @@ cfg_rxbuf_size_set (void)
ctrl->cfg.rxbuf_size = rxbuf_size;
vcl_test_buf_alloc (&ctrl->cfg, 1 /* is_rxbuf */ ,
(uint8_t **) & ctrl->rxbuf, &ctrl->rxbuf_size);
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
else
vtwrn ("Invalid rxbuf size (%lu) < minimum buf size (%u)!",
@@ -604,20 +935,19 @@ cfg_verbose_toggle (void)
vcl_test_session_t *ctrl = &vcm->ctrl_session;
ctrl->cfg.verbose = ctrl->cfg.verbose ? 0 : 1;
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
-
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
}
-static vcl_test_t
+static hs_test_t
parse_input ()
{
vcl_test_client_main_t *vcm = &vcl_client_main;
vcl_test_session_t *ctrl = &vcm->ctrl_session;
- vcl_test_t rv = VCL_TEST_TYPE_NONE;
+ hs_test_t rv = HS_TEST_TYPE_NONE;
if (!strncmp (VCL_TEST_TOKEN_EXIT, ctrl->txbuf,
strlen (VCL_TEST_TOKEN_EXIT)))
- rv = VCL_TEST_TYPE_EXIT;
+ rv = HS_TEST_TYPE_EXIT;
else if (!strncmp (VCL_TEST_TOKEN_HELP, ctrl->txbuf,
strlen (VCL_TEST_TOKEN_HELP)))
@@ -647,16 +977,16 @@ parse_input ()
strlen (VCL_TEST_TOKEN_RXBUF_SIZE)))
cfg_rxbuf_size_set ();
- else if (!strncmp (VCL_TEST_TOKEN_RUN_UNI, ctrl->txbuf,
- strlen (VCL_TEST_TOKEN_RUN_UNI)))
- rv = ctrl->cfg.test = VCL_TEST_TYPE_UNI;
+ else if (!strncmp (HS_TEST_TOKEN_RUN_UNI, ctrl->txbuf,
+ strlen (HS_TEST_TOKEN_RUN_UNI)))
+ rv = ctrl->cfg.test = HS_TEST_TYPE_UNI;
- else if (!strncmp (VCL_TEST_TOKEN_RUN_BI, ctrl->txbuf,
- strlen (VCL_TEST_TOKEN_RUN_BI)))
- rv = ctrl->cfg.test = VCL_TEST_TYPE_BI;
+ else if (!strncmp (HS_TEST_TOKEN_RUN_BI, ctrl->txbuf,
+ strlen (HS_TEST_TOKEN_RUN_BI)))
+ rv = ctrl->cfg.test = HS_TEST_TYPE_BI;
else
- rv = VCL_TEST_TYPE_ECHO;
+ rv = HS_TEST_TYPE_ECHO;
return rv;
}
@@ -682,6 +1012,7 @@ print_usage_and_exit (void)
" -T <txbuf-size> Test Cfg: tx buffer size.\n"
" -U Run Uni-directional test.\n"
" -B Run Bi-directional test.\n"
+ " -b <bytes> Total number of bytes transferred\n"
" -V Verbose mode.\n"
" -I <N> Use N sessions.\n"
" -s <N> Use N sessions.\n"
@@ -697,7 +1028,7 @@ vtc_process_opts (vcl_test_client_main_t * vcm, int argc, char **argv)
int c, v;
opterr = 0;
- while ((c = getopt (argc, argv, "chnp:w:XE:I:N:R:T:UBV6DLs:q:S")) != -1)
+ while ((c = getopt (argc, argv, "chnp:w:xXE:I:N:R:T:b:UBV6DLs:q:S")) != -1)
switch (c)
{
case 'c':
@@ -752,7 +1083,11 @@ vtc_process_opts (vcl_test_client_main_t * vcm, int argc, char **argv)
break;
case 'X':
- vcm->post_test = VCL_TEST_TYPE_EXIT;
+ vcm->post_test = HS_TEST_TYPE_EXIT;
+ break;
+
+ case 'x':
+ vcm->post_test = HS_TEST_TYPE_NONE;
break;
case 'E':
@@ -763,7 +1098,7 @@ vtc_process_opts (vcl_test_client_main_t * vcm, int argc, char **argv)
print_usage_and_exit ();
}
strncpy (ctrl->txbuf, optarg, ctrl->txbuf_size);
- ctrl->cfg.test = VCL_TEST_TYPE_ECHO;
+ ctrl->cfg.test = HS_TEST_TYPE_ECHO;
break;
case 'N':
@@ -822,13 +1157,28 @@ vtc_process_opts (vcl_test_client_main_t * vcm, int argc, char **argv)
print_usage_and_exit ();
}
break;
+ case 'b':
+ if (sscanf (optarg, "0x%lu", &ctrl->cfg.total_bytes) != 1)
+ if (sscanf (optarg, "%ld", &ctrl->cfg.total_bytes) != 1)
+ {
+ vtwrn ("Invalid value for option -%c!", c);
+ print_usage_and_exit ();
+ }
+ if (ctrl->cfg.total_bytes % ctrl->cfg.txbuf_size)
+ {
+ vtwrn ("total bytes must be mutliple of txbuf size(0x%lu)!",
+ ctrl->cfg.txbuf_size);
+ print_usage_and_exit ();
+ }
+ ctrl->cfg.num_writes = ctrl->cfg.total_bytes / ctrl->cfg.txbuf_size;
+ break;
case 'U':
- ctrl->cfg.test = VCL_TEST_TYPE_UNI;
+ ctrl->cfg.test = HS_TEST_TYPE_UNI;
break;
case 'B':
- ctrl->cfg.test = VCL_TEST_TYPE_BI;
+ ctrl->cfg.test = HS_TEST_TYPE_BI;
break;
case 'V':
@@ -882,9 +1232,9 @@ vtc_process_opts (vcl_test_client_main_t * vcm, int argc, char **argv)
print_usage_and_exit ();
}
- if (argc < (optind + 2))
+ if (argc > (optind + 2))
{
- vtwrn ("Insufficient number of arguments!");
+ vtwrn ("Invalid number of arguments!");
print_usage_and_exit ();
}
@@ -895,26 +1245,25 @@ vtc_process_opts (vcl_test_client_main_t * vcm, int argc, char **argv)
memset (&vcm->server_addr, 0, sizeof (vcm->server_addr));
if (ctrl->cfg.address_ip6)
{
- struct sockaddr_in6 *sddr6 = (struct sockaddr_in6 *) &vcm->server_addr;
- sddr6->sin6_family = AF_INET6;
- inet_pton (AF_INET6, argv[optind++], &(sddr6->sin6_addr));
- sddr6->sin6_port = htons (atoi (argv[optind]));
+ struct in6_addr *in6 = &vcm->server_addr.v6;
+ inet_pton (AF_INET6, argv[optind++], in6);
vcm->server_endpt.is_ip4 = 0;
- vcm->server_endpt.ip = (uint8_t *) & sddr6->sin6_addr;
- vcm->server_endpt.port = (uint16_t) sddr6->sin6_port;
+ vcm->server_endpt.ip = (uint8_t *) in6;
}
else
{
- struct sockaddr_in *saddr4 = (struct sockaddr_in *) &vcm->server_addr;
- saddr4->sin_family = AF_INET;
- inet_pton (AF_INET, argv[optind++], &(saddr4->sin_addr));
- saddr4->sin_port = htons (atoi (argv[optind]));
+ struct in_addr *in4 = &vcm->server_addr.v4;
+ inet_pton (AF_INET, argv[optind++], in4);
vcm->server_endpt.is_ip4 = 1;
- vcm->server_endpt.ip = (uint8_t *) & saddr4->sin_addr;
- vcm->server_endpt.port = (uint16_t) saddr4->sin_port;
+ vcm->server_endpt.ip = (uint8_t *) in4;
}
+
+ if (argc == optind + 1)
+ vcm->server_endpt.port = htons (atoi (argv[optind]));
+ else
+ vcm->server_endpt.port = htons (VCL_TEST_SERVER_PORT);
}
static void
@@ -944,10 +1293,14 @@ vtc_ctrl_session_exit (void)
vcl_test_session_t *ctrl = &vcm->ctrl_session;
int verbose = ctrl->cfg.verbose;
- ctrl->cfg.test = VCL_TEST_TYPE_EXIT;
+ /* Only clients exits, server can accept new connections */
+ if (vcm->post_test == HS_TEST_TYPE_EXIT_CLIENT)
+ return;
+
+ ctrl->cfg.test = HS_TEST_TYPE_EXIT;
vtinf ("(fd %d): Sending exit cfg to server...", ctrl->fd);
if (verbose)
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
(void) vcl_test_write (ctrl, (uint8_t *) &ctrl->cfg, sizeof (ctrl->cfg));
sleep (1);
}
@@ -976,7 +1329,7 @@ vtc_ctrl_session_init (vcl_test_client_main_t *vcm, vcl_test_session_t *ctrl)
ctrl->read = vcl_test_read;
ctrl->write = vcl_test_write;
- ctrl->cfg.cmd = VCL_TEST_CMD_SYNC;
+ ctrl->cfg.cmd = HS_TEST_CMD_SYNC;
rv = vtc_cfg_sync (ctrl);
if (rv)
{
@@ -984,7 +1337,7 @@ vtc_ctrl_session_init (vcl_test_client_main_t *vcm, vcl_test_session_t *ctrl)
return rv;
}
- ctrl->cfg.ctrl_handle = ((vcl_test_cfg_t *) ctrl->rxbuf)->ctrl_handle;
+ ctrl->cfg.ctrl_handle = ((hs_test_cfg_t *) ctrl->rxbuf)->ctrl_handle;
memset (&ctrl->stats, 0, sizeof (ctrl->stats));
return 0;
@@ -1015,6 +1368,24 @@ vt_incercept_sigs (void)
}
}
+static void
+vtc_alloc_workers (vcl_test_client_main_t *vcm)
+{
+ vcl_test_main_t *vt = &vcl_test_main;
+ vtc_worker_run_fn *run_fn;
+
+ vcm->workers = calloc (vcm->n_workers, sizeof (vcl_test_client_worker_t));
+ vt->wrk = calloc (vcm->n_workers, sizeof (vcl_test_wrk_t));
+
+ if (vcm->ctrl_session.cfg.num_test_sessions > VCL_TEST_CFG_MAX_SELECT_SESS)
+ run_fn = vtc_worker_run_epoll;
+ else
+ run_fn = vtc_worker_run_select;
+
+ for (int i = 0; i < vcm->n_workers; i++)
+ vcm->workers[i].wrk_run_fn = run_fn;
+}
+
int
main (int argc, char **argv)
{
@@ -1024,13 +1395,14 @@ main (int argc, char **argv)
int rv;
vcm->n_workers = 1;
- vcl_test_cfg_init (&ctrl->cfg);
+ vcm->post_test = HS_TEST_TYPE_EXIT_CLIENT;
+
+ hs_test_cfg_init (&ctrl->cfg);
+ vt_incercept_sigs ();
vcl_test_session_buf_alloc (ctrl);
vtc_process_opts (vcm, argc, argv);
- vt_incercept_sigs ();
- vcm->workers = calloc (vcm->n_workers, sizeof (vcl_test_client_worker_t));
- vt->wrk = calloc (vcm->n_workers, sizeof (vcl_test_wrk_t));
+ vtc_alloc_workers (vcm);
rv = vppcom_app_create ("vcl_test_client");
if (rv < 0)
@@ -1038,62 +1410,67 @@ main (int argc, char **argv)
/* Protos like tls/dtls/quic need init */
if (vt->protos[vcm->proto]->init)
- vt->protos[vcm->proto]->init (&ctrl->cfg);
+ {
+ rv = vt->protos[vcm->proto]->init (&ctrl->cfg);
+ if (rv)
+ vtfail ("client init failed", rv);
+ }
if ((rv = vtc_ctrl_session_init (vcm, ctrl)))
vtfail ("vppcom_session_create() ctrl session", rv);
/* Update ctrl port to data port */
- vcm->server_endpt.port += 1;
+ vcm->server_endpt.port = hs_make_data_port (vcm->server_endpt.port);
- while (ctrl->cfg.test != VCL_TEST_TYPE_EXIT)
+ while (ctrl->cfg.test != HS_TEST_TYPE_EXIT)
{
if (vcm->dump_cfg)
{
- vcl_test_cfg_dump (&ctrl->cfg, 1 /* is_client */ );
+ hs_test_cfg_dump (&ctrl->cfg, 1 /* is_client */);
vcm->dump_cfg = 0;
}
switch (ctrl->cfg.test)
{
- case VCL_TEST_TYPE_ECHO:
+ case HS_TEST_TYPE_ECHO:
vtc_echo_client (vcm);
break;
- case VCL_TEST_TYPE_UNI:
- case VCL_TEST_TYPE_BI:
+ case HS_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
vtc_stream_client (vcm);
break;
- case VCL_TEST_TYPE_EXIT:
+ case HS_TEST_TYPE_EXIT:
continue;
- case VCL_TEST_TYPE_NONE:
+ case HS_TEST_TYPE_NONE:
default:
break;
}
switch (vcm->post_test)
{
- case VCL_TEST_TYPE_EXIT:
+ case HS_TEST_TYPE_EXIT:
+ case HS_TEST_TYPE_EXIT_CLIENT:
switch (ctrl->cfg.test)
{
- case VCL_TEST_TYPE_EXIT:
- case VCL_TEST_TYPE_UNI:
- case VCL_TEST_TYPE_BI:
- case VCL_TEST_TYPE_ECHO:
- ctrl->cfg.test = VCL_TEST_TYPE_EXIT;
+ case HS_TEST_TYPE_EXIT:
+ case HS_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
+ case HS_TEST_TYPE_ECHO:
+ ctrl->cfg.test = HS_TEST_TYPE_EXIT;
continue;
- case VCL_TEST_TYPE_NONE:
+ case HS_TEST_TYPE_NONE:
default:
break;
}
break;
- case VCL_TEST_TYPE_NONE:
- case VCL_TEST_TYPE_ECHO:
- case VCL_TEST_TYPE_UNI:
- case VCL_TEST_TYPE_BI:
+ case HS_TEST_TYPE_NONE:
+ case HS_TEST_TYPE_ECHO:
+ case HS_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
default:
break;
}
diff --git a/src/plugins/hs_apps/vcl/vcl_test_protos.c b/src/plugins/hs_apps/vcl/vcl_test_protos.c
index 60ee09265c9..cd1ac2b24f4 100644
--- a/src/plugins/hs_apps/vcl/vcl_test_protos.c
+++ b/src/plugins/hs_apps/vcl/vcl_test_protos.c
@@ -21,16 +21,15 @@ vt_tcp_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
uint32_t flags, flen;
int rv;
- ts->fd = vppcom_session_create (VPPCOM_PROTO_TCP, 0 /* is_nonblocking */);
+ ts->fd = vppcom_session_create (VPPCOM_PROTO_TCP, ts->noblk_connect);
if (ts->fd < 0)
{
vterr ("vppcom_session_create()", ts->fd);
return ts->fd;
}
- /* Connect is blocking */
rv = vppcom_session_connect (ts->fd, endpt);
- if (rv < 0)
+ if (rv < 0 && rv != VPPCOM_EINPROGRESS)
{
vterr ("vppcom_session_connect()", rv);
return rv;
@@ -38,10 +37,14 @@ vt_tcp_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
ts->read = vcl_test_read;
ts->write = vcl_test_write;
- flags = O_NONBLOCK;
- flen = sizeof (flags);
- vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
- vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+
+ if (!ts->noblk_connect)
+ {
+ flags = O_NONBLOCK;
+ flen = sizeof (flags);
+ vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
+ vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+ }
return 0;
}
@@ -108,16 +111,15 @@ vt_udp_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
uint32_t flags, flen;
int rv;
- ts->fd = vppcom_session_create (VPPCOM_PROTO_UDP, 0 /* is_nonblocking */);
+ ts->fd = vppcom_session_create (VPPCOM_PROTO_UDP, ts->noblk_connect);
if (ts->fd < 0)
{
vterr ("vppcom_session_create()", ts->fd);
return ts->fd;
}
- /* Connect is blocking */
rv = vppcom_session_connect (ts->fd, endpt);
- if (rv < 0)
+ if (rv < 0 && rv != VPPCOM_EINPROGRESS)
{
vterr ("vppcom_session_connect()", rv);
return rv;
@@ -125,10 +127,14 @@ vt_udp_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
ts->read = vcl_test_read;
ts->write = vcl_test_write;
- flags = O_NONBLOCK;
- flen = sizeof (flags);
- vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
- vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+
+ if (!ts->noblk_connect)
+ {
+ flags = O_NONBLOCK;
+ flen = sizeof (flags);
+ vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
+ vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+ }
return 0;
}
@@ -270,7 +276,7 @@ vt_add_cert_key_pair ()
}
static int
-vt_tls_init (vcl_test_cfg_t *cfg)
+vt_tls_init (hs_test_cfg_t *cfg)
{
return vt_add_cert_key_pair ();
}
@@ -282,7 +288,7 @@ vt_tls_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
uint32_t flags, flen, ckp_len;
int rv;
- ts->fd = vppcom_session_create (VPPCOM_PROTO_TLS, 0 /* is_nonblocking */);
+ ts->fd = vppcom_session_create (VPPCOM_PROTO_TLS, ts->noblk_connect);
if (ts->fd < 0)
{
vterr ("vppcom_session_create()", ts->fd);
@@ -293,9 +299,8 @@ vt_tls_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_CKPAIR, &vt->ckpair_index,
&ckp_len);
- /* Connect is blocking */
rv = vppcom_session_connect (ts->fd, endpt);
- if (rv < 0)
+ if (rv < 0 && rv != VPPCOM_EINPROGRESS)
{
vterr ("vppcom_session_connect()", rv);
return rv;
@@ -303,10 +308,14 @@ vt_tls_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
ts->read = vcl_test_read;
ts->write = vcl_test_write;
- flags = O_NONBLOCK;
- flen = sizeof (flags);
- vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
- vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+
+ if (!ts->noblk_connect)
+ {
+ flags = O_NONBLOCK;
+ flen = sizeof (flags);
+ vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
+ vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+ }
return 0;
}
@@ -375,7 +384,7 @@ static const vcl_test_proto_vft_t vcl_test_tls = {
VCL_TEST_REGISTER_PROTO (VPPCOM_PROTO_TLS, vcl_test_tls);
static int
-vt_dtls_init (vcl_test_cfg_t *cfg)
+vt_dtls_init (hs_test_cfg_t *cfg)
{
return vt_add_cert_key_pair ();
}
@@ -387,7 +396,7 @@ vt_dtls_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
uint32_t flags, flen, ckp_len;
int rv;
- ts->fd = vppcom_session_create (VPPCOM_PROTO_DTLS, 0 /* is_nonblocking */);
+ ts->fd = vppcom_session_create (VPPCOM_PROTO_DTLS, ts->noblk_connect);
if (ts->fd < 0)
{
vterr ("vppcom_session_create()", ts->fd);
@@ -398,9 +407,8 @@ vt_dtls_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_CKPAIR, &vt->ckpair_index,
&ckp_len);
- /* Connect is blocking */
rv = vppcom_session_connect (ts->fd, endpt);
- if (rv < 0)
+ if (rv < 0 && rv != VPPCOM_EINPROGRESS)
{
vterr ("vppcom_session_connect()", rv);
return rv;
@@ -408,10 +416,14 @@ vt_dtls_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
ts->read = vcl_test_read;
ts->write = vcl_test_write;
- flags = O_NONBLOCK;
- flen = sizeof (flags);
- vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
- vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+
+ if (!ts->noblk_connect)
+ {
+ flags = O_NONBLOCK;
+ flen = sizeof (flags);
+ vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
+ vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+ }
return 0;
}
@@ -480,7 +492,7 @@ static const vcl_test_proto_vft_t vcl_test_dtls = {
VCL_TEST_REGISTER_PROTO (VPPCOM_PROTO_DTLS, vcl_test_dtls);
static int
-vt_quic_init (vcl_test_cfg_t *cfg)
+vt_quic_init (hs_test_cfg_t *cfg)
{
vcl_test_main_t *vt = &vcl_test_main;
@@ -568,7 +580,7 @@ vt_quic_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
/* Make sure qsessions are initialized */
vt_quic_maybe_init_wrk (vt, wrk, endpt);
- ts->fd = vppcom_session_create (VPPCOM_PROTO_QUIC, 0 /* is_nonblocking */);
+ ts->fd = vppcom_session_create (VPPCOM_PROTO_QUIC, ts->noblk_connect);
if (ts->fd < 0)
{
vterr ("vppcom_session_create()", ts->fd);
@@ -579,21 +591,23 @@ vt_quic_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
tq = &wrk->qsessions[ts->session_index / vt->cfg.num_test_sessions_perq];
rv = vppcom_session_stream_connect (ts->fd, tq->fd);
- if (rv < 0)
+ if (rv < 0 && rv != VPPCOM_EINPROGRESS)
{
vterr ("vppcom_session_stream_connect()", rv);
return rv;
}
- flags = O_NONBLOCK;
- flen = sizeof (flags);
- vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
-
ts->read = vcl_test_read;
ts->write = vcl_test_write;
- vtinf ("Test (quic stream) session %d (fd %d) connected.", ts->session_index,
- ts->fd);
+ if (!ts->noblk_connect)
+ {
+ flags = O_NONBLOCK;
+ flen = sizeof (flags);
+ vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
+ vtinf ("Test (quic stream) session %d (fd %d) connected.",
+ ts->session_index, ts->fd);
+ }
return 0;
}
@@ -864,7 +878,7 @@ vt_srtp_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
uint32_t flags, flen;
int rv;
- ts->fd = vppcom_session_create (VPPCOM_PROTO_SRTP, 0 /* is_nonblocking */);
+ ts->fd = vppcom_session_create (VPPCOM_PROTO_SRTP, ts->noblk_connect);
if (ts->fd < 0)
{
vterr ("vppcom_session_create()", ts->fd);
@@ -873,9 +887,8 @@ vt_srtp_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
vt_session_add_srtp_policy (ts, 1 /* is connect */);
- /* Connect is blocking */
rv = vppcom_session_connect (ts->fd, endpt);
- if (rv < 0)
+ if (rv < 0 && rv != VPPCOM_EINPROGRESS)
{
vterr ("vppcom_session_connect()", rv);
return rv;
@@ -883,10 +896,14 @@ vt_srtp_connect (vcl_test_session_t *ts, vppcom_endpt_t *endpt)
ts->read = vt_srtp_read;
ts->write = vt_srtp_write;
- flags = O_NONBLOCK;
- flen = sizeof (flags);
- vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
- vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+
+ if (!ts->noblk_connect)
+ {
+ flags = O_NONBLOCK;
+ flen = sizeof (flags);
+ vppcom_session_attr (ts->fd, VPPCOM_ATTR_SET_FLAGS, &flags, &flen);
+ vtinf ("Test session %d (fd %d) connected.", ts->session_index, ts->fd);
+ }
vt_srtp_session_init (ts, 1 /* is connect */);
diff --git a/src/plugins/hs_apps/vcl/vcl_test_server.c b/src/plugins/hs_apps/vcl/vcl_test_server.c
index 93c244484c8..d17a2089ba7 100644
--- a/src/plugins/hs_apps/vcl/vcl_test_server.c
+++ b/src/plugins/hs_apps/vcl/vcl_test_server.c
@@ -28,6 +28,17 @@
#include <vppinfra/mem.h>
#include <pthread.h>
+/*
+ * XXX: Unfortunately libepoll-shim requires some hacks to work, one of these
+ * defines 'close' as a macro. This collides with vcl test callback 'close'.
+ * Undef the 'close' macro on FreeBSD if it exists.
+ */
+#ifdef __FreeBSD__
+#ifdef close
+#undef close
+#endif
+#endif /* __FreeBSD__ */
+
typedef struct
{
uint16_t port;
@@ -106,7 +117,7 @@ again:
conn->endpt.ip = wrk->conn_pool[i].ip;
conn->is_alloc = 1;
conn->session_index = i;
- vcl_test_cfg_init (&conn->cfg);
+ hs_test_cfg_init (&conn->cfg);
return (&wrk->conn_pool[i]);
}
}
@@ -130,7 +141,7 @@ conn_pool_free (vcl_test_session_t *ts)
}
static inline void
-sync_config_and_reply (vcl_test_session_t *conn, vcl_test_cfg_t *rx_cfg)
+sync_config_and_reply (vcl_test_session_t *conn, hs_test_cfg_t *rx_cfg)
{
conn->cfg = *rx_cfg;
vcl_test_buf_alloc (&conn->cfg, 1 /* is_rxbuf */, (uint8_t **) &conn->rxbuf,
@@ -140,7 +151,7 @@ sync_config_and_reply (vcl_test_session_t *conn, vcl_test_cfg_t *rx_cfg)
if (conn->cfg.verbose)
{
vtinf ("(fd %d): Replying to cfg message!\n", conn->fd);
- vcl_test_cfg_dump (&conn->cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (&conn->cfg, 0 /* is_client */);
}
(void) vcl_test_write (conn, &conn->cfg, sizeof (conn->cfg));
}
@@ -185,14 +196,14 @@ vts_wrk_cleanup_all (vcl_test_server_worker_t *wrk)
static void
vts_test_cmd (vcl_test_server_worker_t *wrk, vcl_test_session_t *conn,
- vcl_test_cfg_t *rx_cfg)
+ hs_test_cfg_t *rx_cfg)
{
- u8 is_bi = rx_cfg->test == VCL_TEST_TYPE_BI;
+ u8 is_bi = rx_cfg->test == HS_TEST_TYPE_BI;
vcl_test_session_t *tc;
char buf[64];
int i;
- if (rx_cfg->cmd == VCL_TEST_CMD_STOP)
+ if (rx_cfg->cmd == HS_TEST_CMD_STOP)
{
struct timespec stop;
clock_gettime (CLOCK_REALTIME, &stop);
@@ -232,25 +243,25 @@ vts_test_cmd (vcl_test_server_worker_t *wrk, vcl_test_session_t *conn,
vcl_test_stats_dump ("SERVER RESULTS", &conn->stats, 1 /* show_rx */ ,
is_bi /* show_tx */ , conn->cfg.verbose);
- vcl_test_cfg_dump (&conn->cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (&conn->cfg, 0 /* is_client */);
if (conn->cfg.verbose)
{
- vtinf (" vcl server main\n" VCL_TEST_SEPARATOR_STRING
+ vtinf (" vcl server main\n" HS_TEST_SEPARATOR_STRING
" buf: %p\n"
- " buf size: %u (0x%08x)\n" VCL_TEST_SEPARATOR_STRING,
+ " buf size: %u (0x%08x)\n" HS_TEST_SEPARATOR_STRING,
conn->rxbuf, conn->rxbuf_size, conn->rxbuf_size);
}
sync_config_and_reply (conn, rx_cfg);
memset (&conn->stats, 0, sizeof (conn->stats));
}
- else if (rx_cfg->cmd == VCL_TEST_CMD_SYNC)
+ else if (rx_cfg->cmd == HS_TEST_CMD_SYNC)
{
rx_cfg->ctrl_handle = conn->fd;
vtinf ("Set control fd %d for test!", conn->fd);
sync_config_and_reply (conn, rx_cfg);
}
- else if (rx_cfg->cmd == VCL_TEST_CMD_START)
+ else if (rx_cfg->cmd == HS_TEST_CMD_START)
{
vtinf ("Starting %s-directional Stream Test (fd %d)!",
is_bi ? "Bi" : "Uni", conn->fd);
@@ -268,7 +279,7 @@ vts_server_process_rx (vcl_test_session_t *conn, int rx_bytes)
{
vcl_test_server_main_t *vsm = &vcl_server_main;
- if (conn->cfg.test == VCL_TEST_TYPE_BI)
+ if (conn->cfg.test == HS_TEST_TYPE_BI)
{
if (vsm->use_ds)
{
@@ -373,8 +384,9 @@ vts_accept_client (vcl_test_server_worker_t *wrk, int listen_fd)
if (tp->accept (listen_fd, conn))
return 0;
- vtinf ("Got a connection -- fd = %d (0x%08x) on listener fd = %d (0x%08x)",
- conn->fd, conn->fd, listen_fd, listen_fd);
+ if (conn->cfg.num_test_sessions < VCL_TEST_CFG_MAX_SELECT_SESS)
+ vtinf ("Got a connection -- fd = %d (0x%08x) on listener fd = %d (0x%08x)",
+ conn->fd, conn->fd, listen_fd, listen_fd);
ev.events = EPOLLET | EPOLLIN;
ev.data.u64 = conn - wrk->conn_pool;
@@ -502,31 +514,33 @@ vcl_test_server_process_opts (vcl_test_server_main_t * vsm, int argc,
print_usage_and_exit ();
}
- if (argc < (optind + 1))
+ if (argc > (optind + 1))
{
- fprintf (stderr, "SERVER: ERROR: Insufficient number of arguments!\n");
+ fprintf (stderr, "Incorrect number of arguments!\n");
print_usage_and_exit ();
}
-
- if (sscanf (argv[optind], "%d", &v) == 1)
- vsm->server_cfg.port = (uint16_t) v;
- else
+ else if (argc > 1 && argc == (optind + 1))
{
- fprintf (stderr, "SERVER: ERROR: Invalid port (%s)!\n", argv[optind]);
- print_usage_and_exit ();
+ if (sscanf (argv[optind], "%d", &v) == 1)
+ vsm->server_cfg.port = (uint16_t) v;
+ else
+ {
+ fprintf (stderr, "Invalid port (%s)!\n", argv[optind]);
+ print_usage_and_exit ();
+ }
}
vcl_test_init_endpoint_addr (vsm);
}
int
-vts_handle_ctrl_cfg (vcl_test_server_worker_t *wrk, vcl_test_cfg_t *rx_cfg,
+vts_handle_ctrl_cfg (vcl_test_server_worker_t *wrk, hs_test_cfg_t *rx_cfg,
vcl_test_session_t *conn, int rx_bytes)
{
if (rx_cfg->verbose)
{
vtinf ("(fd %d): Received a cfg msg!", conn->fd);
- vcl_test_cfg_dump (rx_cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (rx_cfg, 0 /* is_client */);
}
if (rx_bytes != sizeof (*rx_cfg))
@@ -538,7 +552,7 @@ vts_handle_ctrl_cfg (vcl_test_server_worker_t *wrk, vcl_test_cfg_t *rx_cfg,
if (conn->cfg.verbose)
{
vtinf ("(fd %d): Replying to cfg msg", conn->fd);
- vcl_test_cfg_dump (rx_cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (rx_cfg, 0 /* is_client */);
}
conn->write (conn, &conn->cfg, sizeof (conn->cfg));
return -1;
@@ -546,27 +560,28 @@ vts_handle_ctrl_cfg (vcl_test_server_worker_t *wrk, vcl_test_cfg_t *rx_cfg,
switch (rx_cfg->test)
{
- case VCL_TEST_TYPE_NONE:
- case VCL_TEST_TYPE_ECHO:
+ case HS_TEST_TYPE_NONE:
+ case HS_TEST_TYPE_ECHO:
sync_config_and_reply (conn, rx_cfg);
break;
- case VCL_TEST_TYPE_BI:
- case VCL_TEST_TYPE_UNI:
+ case HS_TEST_TYPE_BI:
+ case HS_TEST_TYPE_UNI:
vts_test_cmd (wrk, conn, rx_cfg);
break;
- case VCL_TEST_TYPE_EXIT:
+ case HS_TEST_TYPE_EXIT:
vtinf ("Ctrl session fd %d closing!", conn->fd);
vts_session_cleanup (conn);
wrk->nfds--;
if (wrk->nfds)
vts_wrk_cleanup_all (wrk);
+ vcl_server_main.ctrl = 0;
break;
default:
vtwrn ("Unknown test type %d", rx_cfg->test);
- vcl_test_cfg_dump (rx_cfg, 0 /* is_client */ );
+ hs_test_cfg_dump (rx_cfg, 0 /* is_client */);
break;
}
@@ -586,7 +601,7 @@ vts_worker_init (vcl_test_server_worker_t * wrk)
vtinf ("Initializing worker ...");
- conn_pool_expand (wrk, VCL_TEST_CFG_MAX_TEST_SESS + 1);
+ conn_pool_expand (wrk, VCL_TEST_CFG_INIT_TEST_SESS + 1);
if (wrk->wrk_index)
if (vppcom_worker_register ())
vtfail ("vppcom_worker_register()", 1);
@@ -648,7 +663,7 @@ vts_worker_loop (void *arg)
vcl_test_server_worker_t *wrk = arg;
vcl_test_session_t *conn;
int i, rx_bytes, num_ev;
- vcl_test_cfg_t *rx_cfg;
+ hs_test_cfg_t *rx_cfg;
if (wrk->wrk_index)
vts_worker_init (wrk);
@@ -675,13 +690,13 @@ vts_worker_loop (void *arg)
*/
if (ep_evts[i].events & (EPOLLHUP | EPOLLRDHUP))
{
- vts_session_cleanup (conn);
- wrk->nfds--;
- if (!wrk->nfds)
+ if (conn == vsm->ctrl)
{
- vtinf ("All client connections closed\n");
- goto done;
+ vtinf ("ctrl session went away");
+ vsm->ctrl = 0;
}
+ vts_session_cleanup (conn);
+ wrk->nfds--;
continue;
}
@@ -699,6 +714,10 @@ vts_worker_loop (void *arg)
vsm->ctrl = vts_accept_ctrl (wrk, vsm->ctrl_listen_fd);
continue;
}
+
+ /* at this point ctrl session must be valid */
+ ASSERT (vsm->ctrl);
+
if (ep_evts[i].data.u32 == VCL_TEST_DATA_LISTENER)
{
conn = vts_accept_client (wrk, wrk->listener.fd);
@@ -718,8 +737,8 @@ vts_worker_loop (void *arg)
if (!wrk->wrk_index && conn->fd == vsm->ctrl->fd)
{
rx_bytes = conn->read (conn, conn->rxbuf, conn->rxbuf_size);
- rx_cfg = (vcl_test_cfg_t *) conn->rxbuf;
- if (rx_cfg->magic == VCL_TEST_CFG_CTRL_MAGIC)
+ rx_cfg = (hs_test_cfg_t *) conn->rxbuf;
+ if (rx_cfg->magic == HS_TEST_CFG_CTRL_MAGIC)
{
vts_handle_ctrl_cfg (wrk, rx_cfg, conn, rx_bytes);
if (!wrk->nfds)
@@ -847,13 +866,15 @@ main (int argc, char **argv)
vts_ctrl_session_init (&vsm->workers[0]);
/* Update ctrl port to data port */
- vsm->server_cfg.endpt.port += 1;
+ vsm->server_cfg.endpt.port = hs_make_data_port (vsm->server_cfg.endpt.port);
vts_worker_init (&vsm->workers[0]);
for (i = 1; i < vsm->server_cfg.workers; i++)
{
vsm->workers[i].wrk_index = i;
rv = pthread_create (&vsm->workers[i].thread_handle, NULL,
vts_worker_loop, (void *) &vsm->workers[i]);
+ if (rv)
+ vtfail ("pthread_create()", rv);
}
vts_worker_loop (&vsm->workers[0]);
diff --git a/src/plugins/hsi/CMakeLists.txt b/src/plugins/hsi/CMakeLists.txt
new file mode 100644
index 00000000000..629f5e3762c
--- /dev/null
+++ b/src/plugins/hsi/CMakeLists.txt
@@ -0,0 +1,17 @@
+# Copyright (c) 2021 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(hsi
+ SOURCES
+ hsi.c
+)
diff --git a/src/plugins/hsi/FEATURE.yaml b/src/plugins/hsi/FEATURE.yaml
new file mode 100644
index 00000000000..d6bf15fc25b
--- /dev/null
+++ b/src/plugins/hsi/FEATURE.yaml
@@ -0,0 +1,8 @@
+---
+name: HSI (Host Stack Intercept)
+maintainer: Florin Coras <fcoras@cisco.com>
+features:
+ - Host stack intercept feature
+description: "Feature that enables selective punting of flows to the host stack"
+state: experimental
+properties: [MULTITHREAD]
diff --git a/src/plugins/hsi/hsi.c b/src/plugins/hsi/hsi.c
new file mode 100644
index 00000000000..0fea0a3f288
--- /dev/null
+++ b/src/plugins/hsi/hsi.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+#include <hsi/hsi.h>
+#include <vnet/tcp/tcp_types.h>
+
+char *hsi_error_strings[] = {
+#define hsi_error(n, s) s,
+#include <hsi/hsi_error.def>
+#undef hsi_error
+};
+
+typedef enum hsi_input_next_
+{
+ HSI_INPUT_NEXT_UDP_INPUT,
+ HSI_INPUT_NEXT_TCP_INPUT,
+ HSI_INPUT_NEXT_TCP_INPUT_NOLOOKUP,
+ HSI_INPUT_N_NEXT
+} hsi_input_next_t;
+
+#define foreach_hsi4_input_next \
+ _ (UDP_INPUT, "udp4-input") \
+ _ (TCP_INPUT, "tcp4-input") \
+ _ (TCP_INPUT_NOLOOKUP, "tcp4-input-nolookup")
+
+#define foreach_hsi6_input_next \
+ _ (UDP_INPUT, "udp6-input") \
+ _ (TCP_INPUT, "tcp6-input") \
+ _ (TCP_INPUT_NOLOOKUP, "tcp6-input-nolookup")
+
+typedef struct
+{
+ u32 next_node;
+} hsi_trace_t;
+
+static u8 *
+format_hsi_trace (u8 *s, va_list *args)
+{
+ vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+ vlib_node_t *node = va_arg (*args, vlib_node_t *);
+ hsi_trace_t *t = va_arg (*args, hsi_trace_t *);
+ vlib_node_t *nn;
+
+ nn = vlib_get_next_node (vm, node->index, t->next_node);
+ s = format (s, "session %sfound, next node: %v",
+ t->next_node < HSI_INPUT_N_NEXT ? "" : "not ", nn->name);
+ return s;
+}
+
+always_inline u8
+hsi_udp_lookup (vlib_buffer_t *b, void *ip_hdr, u8 is_ip4)
+{
+ udp_header_t *hdr;
+ session_t *s;
+
+ if (is_ip4)
+ {
+ ip4_header_t *ip4 = (ip4_header_t *) ip_hdr;
+ hdr = ip4_next_header (ip4);
+ s = session_lookup_safe4 (
+ vnet_buffer (b)->ip.fib_index, &ip4->dst_address, &ip4->src_address,
+ hdr->dst_port, hdr->src_port, TRANSPORT_PROTO_UDP);
+ }
+ else
+ {
+ ip6_header_t *ip6 = (ip6_header_t *) ip_hdr;
+ hdr = ip6_next_header (ip6);
+ s = session_lookup_safe6 (
+ vnet_buffer (b)->ip.fib_index, &ip6->dst_address, &ip6->src_address,
+ hdr->dst_port, hdr->src_port, TRANSPORT_PROTO_UDP);
+ }
+
+ return s ? 1 : 0;
+}
+
+always_inline transport_connection_t *
+hsi_tcp_lookup (vlib_buffer_t *b, void *ip_hdr, tcp_header_t **rhdr, u8 is_ip4)
+{
+ transport_connection_t *tc;
+ tcp_header_t *hdr;
+ u8 result = 0;
+
+ if (is_ip4)
+ {
+ ip4_header_t *ip4 = (ip4_header_t *) ip_hdr;
+ *rhdr = hdr = ip4_next_header (ip4);
+ tc = session_lookup_connection_wt4 (
+ vnet_buffer (b)->ip.fib_index, &ip4->dst_address, &ip4->src_address,
+ hdr->dst_port, hdr->src_port, TRANSPORT_PROTO_TCP,
+ vlib_get_thread_index (), &result);
+ }
+ else
+ {
+ ip6_header_t *ip6 = (ip6_header_t *) ip_hdr;
+ *rhdr = hdr = ip6_next_header (ip6);
+ tc = session_lookup_connection_wt6 (
+ vnet_buffer (b)->ip.fib_index, &ip6->dst_address, &ip6->src_address,
+ hdr->dst_port, hdr->src_port, TRANSPORT_PROTO_TCP,
+ vlib_get_thread_index (), &result);
+ }
+
+ return result == 0 ? tc : 0;
+}
+
+always_inline void
+hsi_lookup_and_update (vlib_buffer_t *b, u32 *next, u8 is_ip4, u8 is_input)
+{
+ u8 proto, state, have_udp;
+ tcp_header_t *tcp_hdr = 0;
+ tcp_connection_t *tc;
+ u32 rw_len = 0;
+ void *ip_hdr;
+
+ if (is_input)
+ {
+ ip_hdr = vlib_buffer_get_current (b);
+ if (is_ip4)
+ ip_lookup_set_buffer_fib_index (ip4_main.fib_index_by_sw_if_index, b);
+ else
+ ip_lookup_set_buffer_fib_index (ip6_main.fib_index_by_sw_if_index, b);
+ }
+ else
+ {
+ rw_len = vnet_buffer (b)->ip.save_rewrite_length;
+ ip_hdr = vlib_buffer_get_current (b) + rw_len;
+ }
+
+ if (is_ip4)
+ proto = ((ip4_header_t *) ip_hdr)->protocol;
+ else
+ proto = ((ip6_header_t *) ip_hdr)->protocol;
+
+ switch (proto)
+ {
+ case IP_PROTOCOL_TCP:
+ tc = (tcp_connection_t *) hsi_tcp_lookup (b, ip_hdr, &tcp_hdr, is_ip4);
+ if (tc)
+ {
+ state = tc->state;
+ if (state == TCP_STATE_LISTEN)
+ {
+ /* Avoid processing non syn packets that match listener */
+ if (!tcp_syn (tcp_hdr))
+ {
+ vnet_feature_next (next, b);
+ break;
+ }
+ *next = HSI_INPUT_NEXT_TCP_INPUT;
+ }
+ else if (state == TCP_STATE_SYN_SENT)
+ {
+ *next = HSI_INPUT_NEXT_TCP_INPUT;
+ }
+ else
+ {
+ /* Lookup already done, use result */
+ *next = HSI_INPUT_NEXT_TCP_INPUT_NOLOOKUP;
+ vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
+ }
+ vlib_buffer_advance (b, rw_len);
+ }
+ else
+ {
+ vnet_feature_next (next, b);
+ }
+ break;
+ case IP_PROTOCOL_UDP:
+ have_udp = hsi_udp_lookup (b, ip_hdr, is_ip4);
+ if (have_udp)
+ {
+ *next = HSI_INPUT_NEXT_UDP_INPUT;
+ /* Emulate udp-local and consume headers up to udp payload */
+ rw_len += is_ip4 ? sizeof (ip4_header_t) : sizeof (ip6_header_t);
+ rw_len += sizeof (udp_header_t);
+ vlib_buffer_advance (b, rw_len);
+ }
+ else
+ {
+ vnet_feature_next (next, b);
+ }
+ break;
+ default:
+ vnet_feature_next (next, b);
+ break;
+ }
+}
+
+static void
+hsi_input_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t **bufs, u16 *nexts, u32 n_bufs, u8 is_ip4)
+{
+ vlib_buffer_t *b;
+ hsi_trace_t *t;
+ int i;
+
+ for (i = 0; i < n_bufs; i++)
+ {
+ b = bufs[i];
+ if (!(b->flags & VLIB_BUFFER_IS_TRACED))
+ continue;
+ t = vlib_add_trace (vm, node, b, sizeof (*t));
+ t->next_node = nexts[i];
+ }
+}
+
+always_inline uword
+hsi46_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, u8 is_ip4, u8 is_input)
+{
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+ u16 nexts[VLIB_FRAME_SIZE], *next;
+ u32 n_left_from, *from;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+ b = bufs;
+ next = nexts;
+
+ while (n_left_from >= 4)
+ {
+ u32 next0, next1;
+
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+
+ vlib_prefetch_buffer_header (b[3], LOAD);
+ CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+
+ hsi_lookup_and_update (b[0], &next0, is_ip4, is_input);
+ hsi_lookup_and_update (b[1], &next1, is_ip4, is_input);
+
+ next[0] = next0;
+ next[1] = next1;
+
+ b += 2;
+ next += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from)
+ {
+ u32 next0;
+
+ hsi_lookup_and_update (b[0], &next0, is_ip4, is_input);
+
+ next[0] = next0;
+
+ b += 1;
+ next += 1;
+ n_left_from -= 1;
+ }
+
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ hsi_input_trace_frame (vm, node, bufs, nexts, frame->n_vectors, is_ip4);
+
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (hsi4_in_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return hsi46_input_inline (vm, node, frame, 1 /* is_ip4 */,
+ 1 /* is_input */);
+}
+
+VLIB_REGISTER_NODE (hsi4_in_node) = {
+ .name = "hsi4-in",
+ .vector_size = sizeof (u32),
+ .format_trace = format_hsi_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = HSI_N_ERROR,
+ .error_strings = hsi_error_strings,
+ .n_next_nodes = HSI_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [HSI_INPUT_NEXT_##s] = n,
+ foreach_hsi4_input_next
+#undef _
+ },
+};
+
+VNET_FEATURE_INIT (hsi4_in_feature, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "hsi4-in",
+ .runs_before = VNET_FEATURES ("ip4-lookup"),
+ .runs_after = VNET_FEATURES ("ip4-full-reassembly-feature"),
+};
+
+VLIB_NODE_FN (hsi4_out_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return hsi46_input_inline (vm, node, frame, 1 /* is_ip4 */,
+ 0 /* is_input */);
+}
+
+VLIB_REGISTER_NODE (hsi4_out_node) = {
+ .name = "hsi4-out",
+ .vector_size = sizeof (u32),
+ .format_trace = format_hsi_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = HSI_N_ERROR,
+ .error_strings = hsi_error_strings,
+ .n_next_nodes = HSI_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [HSI_INPUT_NEXT_##s] = n,
+ foreach_hsi4_input_next
+#undef _
+ },
+};
+
+VNET_FEATURE_INIT (hsi4_out_feature, static) = {
+ .arc_name = "ip4-output",
+ .node_name = "hsi4-out",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+
+VLIB_NODE_FN (hsi6_in_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return hsi46_input_inline (vm, node, frame, 0 /* is_ip4 */,
+ 1 /* is_input */);
+}
+
+VLIB_REGISTER_NODE (hsi6_in_node) = {
+ .name = "hsi6-in",
+ .vector_size = sizeof (u32),
+ .format_trace = format_hsi_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = HSI_N_ERROR,
+ .error_strings = hsi_error_strings,
+ .n_next_nodes = HSI_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [HSI_INPUT_NEXT_##s] = n,
+ foreach_hsi6_input_next
+#undef _
+ },
+};
+
+VNET_FEATURE_INIT (hsi6_in_feature, static) = {
+ .arc_name = "ip6-unicast",
+ .node_name = "hsi6-in",
+ .runs_before = VNET_FEATURES ("ip6-lookup"),
+ .runs_after = VNET_FEATURES ("ip6-full-reassembly-feature"),
+};
+
+VLIB_NODE_FN (hsi6_out_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return hsi46_input_inline (vm, node, frame, 0 /* is_ip4 */,
+ 0 /* is_input */);
+}
+
+VLIB_REGISTER_NODE (hsi6_out_node) = {
+ .name = "hsi6-out",
+ .vector_size = sizeof (u32),
+ .format_trace = format_hsi_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = HSI_N_ERROR,
+ .error_strings = hsi_error_strings,
+ .n_next_nodes = HSI_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [HSI_INPUT_NEXT_##s] = n,
+ foreach_hsi6_input_next
+#undef _
+ },
+};
+
+VNET_FEATURE_INIT (hsi6_out_feature, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "hsi6-out",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Host Stack Intercept (HSI)",
+ .default_disabled = 0,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/hsi/hsi.h b/src/plugins/hsi/hsi.h
new file mode 100644
index 00000000000..1eee1565ef1
--- /dev/null
+++ b/src/plugins/hsi/hsi.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_PLUGINS_HSI_HSI_H_
+#define SRC_PLUGINS_HSI_HSI_H_
+
+#include <vnet/session/session.h>
+
+typedef enum _hsi_error
+{
+#define hsi_error(n, s) HSI_ERROR_##n,
+#include <hsi/hsi_error.def>
+#undef hsi_error
+ HSI_N_ERROR,
+} hsi_error_t;
+
+#endif /* SRC_PLUGINS_HSI_HSI_H_ */
diff --git a/src/vnet/vxlan-gbp/dir.dox b/src/plugins/hsi/hsi_error.def
index 6e63c90b17b..4e9d7f19238 100644
--- a/src/vnet/vxlan-gbp/dir.dox
+++ b/src/plugins/hsi/hsi_error.def
@@ -1,6 +1,5 @@
/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- *
+ * Copyright (c) 2021 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -14,11 +13,4 @@
* limitations under the License.
*/
-/**
-@dir
-@brief VXLAN-GBP Code.
-
-This directory contains source code to support VXLAN-GBP.
-
-*/
-/*? %%clicmd:group_label VXLAN-GBP CLI %% ?*/
+hsi_error (NONE, "no error") \ No newline at end of file
diff --git a/src/plugins/http/CMakeLists.txt b/src/plugins/http/CMakeLists.txt
new file mode 100644
index 00000000000..d9cd84a3955
--- /dev/null
+++ b/src/plugins/http/CMakeLists.txt
@@ -0,0 +1,19 @@
+# Copyright (c) 2022 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(http
+ SOURCES
+ http.c
+ http_buffer.c
+ http_timer.c
+)
diff --git a/src/plugins/http/http.c b/src/plugins/http/http.c
new file mode 100644
index 00000000000..37a6de71bc7
--- /dev/null
+++ b/src/plugins/http/http.c
@@ -0,0 +1,1513 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <http/http.h>
+#include <vnet/session/session.h>
+#include <http/http_timer.h>
+
+static http_main_t http_main;
+
+#define HTTP_FIFO_THRESH (16 << 10)
+#define CONTENT_LEN_STR "Content-Length: "
+
+/* HTTP state machine result */
+typedef enum http_sm_result_t_
+{
+ HTTP_SM_STOP = 0,
+ HTTP_SM_CONTINUE = 1,
+ HTTP_SM_ERROR = -1,
+} http_sm_result_t;
+
+const char *http_status_code_str[] = {
+#define _(c, s, str) str,
+ foreach_http_status_code
+#undef _
+};
+
+const char *http_content_type_str[] = {
+#define _(s, ext, str) str,
+ foreach_http_content_type
+#undef _
+};
+
+const http_buffer_type_t msg_to_buf_type[] = {
+ [HTTP_MSG_DATA_INLINE] = HTTP_BUFFER_FIFO,
+ [HTTP_MSG_DATA_PTR] = HTTP_BUFFER_PTR,
+};
+
+u8 *
+format_http_state (u8 *s, va_list *va)
+{
+ http_state_t state = va_arg (*va, http_state_t);
+
+ switch (state)
+ {
+ case HTTP_STATE_IDLE:
+ return format (s, "idle");
+ case HTTP_STATE_WAIT_APP_METHOD:
+ return format (s, "wait app method");
+ case HTTP_STATE_WAIT_SERVER_REPLY:
+ return format (s, "wait server reply");
+ case HTTP_STATE_CLIENT_IO_MORE_DATA:
+ return format (s, "client io more data");
+ case HTTP_STATE_WAIT_CLIENT_METHOD:
+ return format (s, "wait client method");
+ case HTTP_STATE_WAIT_APP_REPLY:
+ return format (s, "wait app reply");
+ case HTTP_STATE_APP_IO_MORE_DATA:
+ return format (s, "app io more data");
+ default:
+ break;
+ }
+ return format (s, "unknown");
+}
+
+#define http_state_change(_hc, _state) \
+ do \
+ { \
+ HTTP_DBG (1, "changing http state %U -> %U", format_http_state, \
+ (_hc)->http_state, format_http_state, _state); \
+ (_hc)->http_state = _state; \
+ } \
+ while (0)
+
+static inline http_worker_t *
+http_worker_get (u32 thread_index)
+{
+ return &http_main.wrk[thread_index];
+}
+
+static inline u32
+http_conn_alloc_w_thread (u32 thread_index)
+{
+ http_worker_t *wrk = http_worker_get (thread_index);
+ http_conn_t *hc;
+
+ pool_get_aligned_safe (wrk->conn_pool, hc, CLIB_CACHE_LINE_BYTES);
+ clib_memset (hc, 0, sizeof (*hc));
+ hc->c_thread_index = thread_index;
+ hc->h_hc_index = hc - wrk->conn_pool;
+ hc->h_pa_session_handle = SESSION_INVALID_HANDLE;
+ hc->h_tc_session_handle = SESSION_INVALID_HANDLE;
+ return hc->h_hc_index;
+}
+
+static inline http_conn_t *
+http_conn_get_w_thread (u32 hc_index, u32 thread_index)
+{
+ http_worker_t *wrk = http_worker_get (thread_index);
+ return pool_elt_at_index (wrk->conn_pool, hc_index);
+}
+
+void
+http_conn_free (http_conn_t *hc)
+{
+ http_worker_t *wrk = http_worker_get (hc->c_thread_index);
+ pool_put (wrk->conn_pool, hc);
+}
+
+static u32
+http_listener_alloc (void)
+{
+ http_main_t *hm = &http_main;
+ http_conn_t *lhc;
+
+ pool_get_zero (hm->listener_pool, lhc);
+ lhc->c_c_index = lhc - hm->listener_pool;
+ return lhc->c_c_index;
+}
+
+http_conn_t *
+http_listener_get (u32 lhc_index)
+{
+ return pool_elt_at_index (http_main.listener_pool, lhc_index);
+}
+
+void
+http_listener_free (http_conn_t *lhc)
+{
+ http_main_t *hm = &http_main;
+
+ vec_free (lhc->app_name);
+ if (CLIB_DEBUG)
+ memset (lhc, 0xfc, sizeof (*lhc));
+ pool_put (hm->listener_pool, lhc);
+}
+
+void
+http_disconnect_transport (http_conn_t *hc)
+{
+ vnet_disconnect_args_t a = {
+ .handle = hc->h_tc_session_handle,
+ .app_index = http_main.app_index,
+ };
+
+ hc->state = HTTP_CONN_STATE_CLOSED;
+
+ if (vnet_disconnect_session (&a))
+ clib_warning ("disconnect returned");
+}
+
+static void
+http_conn_timeout_cb (void *hc_handlep)
+{
+ http_conn_t *hc;
+ uword hs_handle;
+
+ hs_handle = pointer_to_uword (hc_handlep);
+ hc = http_conn_get_w_thread (hs_handle & 0x00FFFFFF, hs_handle >> 24);
+
+ HTTP_DBG (1, "terminate thread %d index %d hs %llx", hs_handle >> 24,
+ hs_handle & 0x00FFFFFF, hc);
+ if (!hc)
+ return;
+
+ hc->timer_handle = ~0;
+ session_transport_closing_notify (&hc->connection);
+ http_disconnect_transport (hc);
+}
+
+int
+http_ts_accept_callback (session_t *ts)
+{
+ session_t *ts_listener, *as, *asl;
+ app_worker_t *app_wrk;
+ http_conn_t *lhc, *hc;
+ u32 hc_index, thresh;
+ int rv;
+
+ ts_listener = listen_session_get_from_handle (ts->listener_handle);
+ lhc = http_listener_get (ts_listener->opaque);
+
+ hc_index = http_conn_alloc_w_thread (ts->thread_index);
+ hc = http_conn_get_w_thread (hc_index, ts->thread_index);
+ clib_memcpy_fast (hc, lhc, sizeof (*lhc));
+ hc->c_thread_index = ts->thread_index;
+ hc->h_hc_index = hc_index;
+
+ hc->h_tc_session_handle = session_handle (ts);
+ hc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
+
+ hc->state = HTTP_CONN_STATE_ESTABLISHED;
+ http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD);
+
+ ts->session_state = SESSION_STATE_READY;
+ ts->opaque = hc_index;
+
+ /*
+ * Alloc session and initialize
+ */
+ as = session_alloc (hc->c_thread_index);
+ hc->c_s_index = as->session_index;
+
+ as->app_wrk_index = hc->h_pa_wrk_index;
+ as->connection_index = hc->c_c_index;
+ as->session_state = SESSION_STATE_ACCEPTING;
+
+ asl = listen_session_get_from_handle (lhc->h_pa_session_handle);
+ as->session_type = asl->session_type;
+ as->listener_handle = lhc->h_pa_session_handle;
+
+ /*
+ * Init session fifos and notify app
+ */
+ if ((rv = app_worker_init_accepted (as)))
+ {
+ HTTP_DBG (1, "failed to allocate fifos");
+ session_free (as);
+ return rv;
+ }
+
+ hc->h_pa_session_handle = session_handle (as);
+ hc->h_pa_wrk_index = as->app_wrk_index;
+ app_wrk = app_worker_get (as->app_wrk_index);
+
+ HTTP_DBG (1, "Accepted on listener %u new connection [%u]%x",
+ ts_listener->opaque, vlib_get_thread_index (), hc_index);
+
+ if ((rv = app_worker_accept_notify (app_wrk, as)))
+ {
+ HTTP_DBG (0, "app accept returned");
+ session_free (as);
+ return rv;
+ }
+
+ /* Avoid enqueuing small chunks of data on transport tx notifications. If
+ * the fifo is small (under 16K) we set the threshold to it's size, meaning
+ * a notification will be given when the fifo empties.
+ */
+ ts = session_get_from_handle (hc->h_tc_session_handle);
+ thresh = clib_min (svm_fifo_size (ts->tx_fifo), HTTP_FIFO_THRESH);
+ svm_fifo_set_deq_thresh (ts->tx_fifo, thresh);
+
+ http_conn_timer_start (hc);
+
+ return 0;
+}
+
+static int
+http_ts_connected_callback (u32 http_app_index, u32 ho_hc_index, session_t *ts,
+ session_error_t err)
+{
+ u32 new_hc_index;
+ session_t *as;
+ http_conn_t *hc, *ho_hc;
+ app_worker_t *app_wrk;
+ int rv;
+
+ if (err)
+ {
+ clib_warning ("ERROR: %d", err);
+ return 0;
+ }
+
+ new_hc_index = http_conn_alloc_w_thread (ts->thread_index);
+ hc = http_conn_get_w_thread (new_hc_index, ts->thread_index);
+ ho_hc = http_conn_get_w_thread (ho_hc_index, 0);
+
+ ASSERT (ho_hc->state == HTTP_CONN_STATE_CONNECTING);
+
+ clib_memcpy_fast (hc, ho_hc, sizeof (*hc));
+
+ hc->c_thread_index = ts->thread_index;
+ hc->h_tc_session_handle = session_handle (ts);
+ hc->c_c_index = new_hc_index;
+ hc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
+ hc->state = HTTP_CONN_STATE_ESTABLISHED;
+ http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD);
+
+ ts->session_state = SESSION_STATE_READY;
+ ts->opaque = new_hc_index;
+
+ /* allocate app session and initialize */
+
+ as = session_alloc (hc->c_thread_index);
+ hc->c_s_index = as->session_index;
+ as->connection_index = hc->c_c_index;
+ as->app_wrk_index = hc->h_pa_wrk_index;
+ as->session_state = SESSION_STATE_READY;
+ as->opaque = hc->h_pa_app_api_ctx;
+ as->session_type = session_type_from_proto_and_ip (
+ TRANSPORT_PROTO_HTTP, session_type_is_ip4 (ts->session_type));
+
+ HTTP_DBG (1, "half-open hc index %d, hc index %d", ho_hc_index,
+ new_hc_index);
+
+ app_wrk = app_worker_get (hc->h_pa_wrk_index);
+ if (!app_wrk)
+ {
+ clib_warning ("no app worker");
+ return -1;
+ }
+
+ if ((rv = app_worker_init_connected (app_wrk, as)))
+ {
+ HTTP_DBG (1, "failed to allocate fifos");
+ session_free (as);
+ return rv;
+ }
+ app_worker_connect_notify (app_wrk, as, err, hc->h_pa_app_api_ctx);
+ hc->h_pa_session_handle = session_handle (as);
+ http_conn_timer_start (hc);
+
+ return 0;
+}
+
+static void
+http_ts_disconnect_callback (session_t *ts)
+{
+ http_conn_t *hc;
+
+ hc = http_conn_get_w_thread (ts->opaque, ts->thread_index);
+
+ if (hc->state < HTTP_CONN_STATE_TRANSPORT_CLOSED)
+ hc->state = HTTP_CONN_STATE_TRANSPORT_CLOSED;
+
+ /* Nothing more to rx, propagate to app */
+ if (!svm_fifo_max_dequeue_cons (ts->rx_fifo))
+ session_transport_closing_notify (&hc->connection);
+}
+
+static void
+http_ts_reset_callback (session_t *ts)
+{
+ http_conn_t *hc;
+
+ hc = http_conn_get_w_thread (ts->opaque, ts->thread_index);
+
+ hc->state = HTTP_CONN_STATE_CLOSED;
+ http_buffer_free (&hc->tx_buf);
+ http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD);
+ session_transport_reset_notify (&hc->connection);
+
+ http_disconnect_transport (hc);
+}
+
+/**
+ * http error boilerplate
+ */
+static const char *http_error_template = "HTTP/1.1 %s\r\n"
+ "Date: %U GMT\r\n"
+ "Content-Type: text/html\r\n"
+ "Connection: close\r\n"
+ "Pragma: no-cache\r\n"
+ "Content-Length: 0\r\n\r\n";
+
+static const char *http_redirect_template = "HTTP/1.1 %s\r\n";
+
+/**
+ * http response boilerplate
+ */
+static const char *http_response_template = "HTTP/1.1 %s\r\n"
+ "Date: %U GMT\r\n"
+ "Expires: %U GMT\r\n"
+ "Server: %s\r\n"
+ "Content-Type: %s\r\n"
+ "Content-Length: %lu\r\n\r\n";
+
+static const char *http_request_template = "GET %s HTTP/1.1\r\n"
+ "User-Agent: VPP HTTP client\r\n"
+ "Accept: */*\r\n";
+
+static u32
+http_send_data (http_conn_t *hc, u8 *data, u32 length, u32 offset)
+{
+ const u32 max_burst = 64 << 10;
+ session_t *ts;
+ u32 to_send;
+ int sent;
+
+ ts = session_get_from_handle (hc->h_tc_session_handle);
+
+ to_send = clib_min (length - offset, max_burst);
+ sent = svm_fifo_enqueue (ts->tx_fifo, to_send, data + offset);
+
+ if (sent <= 0)
+ return offset;
+
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+
+ return (offset + sent);
+}
+
+static void
+http_send_error (http_conn_t *hc, http_status_code_t ec)
+{
+ http_main_t *hm = &http_main;
+ u8 *data;
+ f64 now;
+
+ if (ec >= HTTP_N_STATUS)
+ ec = HTTP_STATUS_INTERNAL_ERROR;
+
+ now = clib_timebase_now (&hm->timebase);
+ data = format (0, http_error_template, http_status_code_str[ec],
+ format_clib_timebase_time, now);
+ http_send_data (hc, data, vec_len (data), 0);
+ vec_free (data);
+}
+
+static int
+http_read_message (http_conn_t *hc)
+{
+ u32 max_deq, cursize;
+ session_t *ts;
+ int n_read;
+
+ ts = session_get_from_handle (hc->h_tc_session_handle);
+
+ cursize = vec_len (hc->rx_buf);
+ max_deq = svm_fifo_max_dequeue (ts->rx_fifo);
+ if (PREDICT_FALSE (max_deq == 0))
+ return -1;
+
+ vec_validate (hc->rx_buf, cursize + max_deq - 1);
+ n_read = svm_fifo_dequeue (ts->rx_fifo, max_deq, hc->rx_buf + cursize);
+ ASSERT (n_read == max_deq);
+
+ if (svm_fifo_is_empty (ts->rx_fifo))
+ svm_fifo_unset_event (ts->rx_fifo);
+
+ vec_set_len (hc->rx_buf, cursize + n_read);
+ return 0;
+}
+
+static int
+v_find_index (u8 *vec, u32 offset, char *str)
+{
+ int start_index = offset;
+ u32 slen = (u32) strnlen_s_inline (str, 16);
+ u32 vlen = vec_len (vec);
+
+ ASSERT (slen > 0);
+
+ if (vlen <= slen)
+ return -1;
+
+ for (; start_index < (vlen - slen); start_index++)
+ {
+ if (!memcmp (vec + start_index, str, slen))
+ return start_index;
+ }
+
+ return -1;
+}
+
+static int
+http_parse_header (http_conn_t *hc, int *content_length)
+{
+ unformat_input_t input;
+ int i, len;
+ u8 *line;
+
+ i = v_find_index (hc->rx_buf, hc->rx_buf_offset, CONTENT_LEN_STR);
+ if (i < 0)
+ {
+ clib_warning ("cannot find '%s' in the header!", CONTENT_LEN_STR);
+ return -1;
+ }
+
+ hc->rx_buf_offset = i;
+
+ i = v_find_index (hc->rx_buf, hc->rx_buf_offset, "\n");
+ if (i < 0)
+ {
+ clib_warning ("end of line missing; incomplete data");
+ return -1;
+ }
+
+ len = i - hc->rx_buf_offset;
+ line = vec_new (u8, len);
+ clib_memcpy (line, hc->rx_buf + hc->rx_buf_offset, len);
+
+ unformat_init_vector (&input, line);
+ if (!unformat (&input, CONTENT_LEN_STR "%d", content_length))
+ {
+ clib_warning ("failed to unformat content length!");
+ return -1;
+ }
+ unformat_free (&input);
+
+ /* skip rest of the header */
+ hc->rx_buf_offset += len;
+ i = v_find_index (hc->rx_buf, hc->rx_buf_offset, "<html>");
+ if (i < 0)
+ {
+ clib_warning ("<html> tag not found");
+ return -1;
+ }
+ hc->rx_buf_offset = i;
+
+ return 0;
+}
+
+static http_sm_result_t
+http_state_wait_server_reply (http_conn_t *hc, transport_send_params_t *sp)
+{
+ int i, rv, content_length;
+ http_msg_t msg = {};
+ app_worker_t *app_wrk;
+ session_t *as;
+ http_status_code_t ec;
+
+ rv = http_read_message (hc);
+
+ /* Nothing yet, wait for data or timer expire */
+ if (rv)
+ return HTTP_SM_STOP;
+
+ if (vec_len (hc->rx_buf) < 8)
+ {
+ ec = HTTP_STATUS_BAD_REQUEST;
+ goto error;
+ }
+
+ if ((i = v_find_index (hc->rx_buf, 0, "200 OK")) >= 0)
+ {
+ msg.type = HTTP_MSG_REPLY;
+ msg.content_type = HTTP_CONTENT_TEXT_HTML;
+ msg.code = HTTP_STATUS_OK;
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.len = 0;
+
+ rv = http_parse_header (hc, &content_length);
+ if (rv)
+ {
+ clib_warning ("failed to parse http reply");
+ session_transport_closing_notify (&hc->connection);
+ http_disconnect_transport (hc);
+ return -1;
+ }
+ msg.data.len = content_length;
+ u32 dlen = vec_len (hc->rx_buf) - hc->rx_buf_offset;
+ as = session_get_from_handle (hc->h_pa_session_handle);
+ svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) },
+ { &hc->rx_buf[hc->rx_buf_offset], dlen } };
+
+ rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2,
+ 0 /* allow partial */);
+ if (rv < 0)
+ {
+ clib_warning ("error enqueue");
+ return HTTP_SM_ERROR;
+ }
+
+ hc->rx_buf_offset += dlen;
+ hc->to_recv = content_length - dlen;
+
+ if (hc->rx_buf_offset == vec_len (hc->rx_buf))
+ {
+ vec_reset_length (hc->rx_buf);
+ hc->rx_buf_offset = 0;
+ }
+
+ if (hc->to_recv == 0)
+ {
+ hc->rx_buf_offset = 0;
+ vec_reset_length (hc->rx_buf);
+ http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD);
+ }
+ else
+ {
+ http_state_change (hc, HTTP_STATE_CLIENT_IO_MORE_DATA);
+ }
+
+ app_wrk = app_worker_get_if_valid (as->app_wrk_index);
+ if (app_wrk)
+ app_worker_rx_notify (app_wrk, as);
+ return HTTP_SM_STOP;
+ }
+ else
+ {
+ HTTP_DBG (0, "Unknown http method %v", hc->rx_buf);
+ ec = HTTP_STATUS_METHOD_NOT_ALLOWED;
+ goto error;
+ }
+
+error:
+
+ http_send_error (hc, ec);
+ session_transport_closing_notify (&hc->connection);
+ http_disconnect_transport (hc);
+
+ return HTTP_SM_ERROR;
+}
+
+static http_sm_result_t
+http_state_wait_client_method (http_conn_t *hc, transport_send_params_t *sp)
+{
+ http_status_code_t ec;
+ app_worker_t *app_wrk;
+ http_msg_t msg;
+ session_t *as;
+ int i, rv;
+ u32 len;
+ u8 *buf;
+
+ rv = http_read_message (hc);
+
+ /* Nothing yet, wait for data or timer expire */
+ if (rv)
+ return HTTP_SM_STOP;
+
+ if (vec_len (hc->rx_buf) < 8)
+ {
+ ec = HTTP_STATUS_BAD_REQUEST;
+ goto error;
+ }
+
+ if ((i = v_find_index (hc->rx_buf, 0, "GET ")) >= 0)
+ {
+ hc->method = HTTP_REQ_GET;
+ hc->rx_buf_offset = i + 5;
+
+ i = v_find_index (hc->rx_buf, hc->rx_buf_offset, "HTTP");
+ if (i < 0)
+ {
+ ec = HTTP_STATUS_BAD_REQUEST;
+ goto error;
+ }
+
+ HTTP_DBG (0, "GET method %v", hc->rx_buf);
+ len = i - hc->rx_buf_offset - 1;
+ }
+ else if ((i = v_find_index (hc->rx_buf, 0, "POST ")) >= 0)
+ {
+ hc->method = HTTP_REQ_POST;
+ hc->rx_buf_offset = i + 6;
+ len = vec_len (hc->rx_buf) - hc->rx_buf_offset - 1;
+ HTTP_DBG (0, "POST method %v", hc->rx_buf);
+ }
+ else
+ {
+ HTTP_DBG (0, "Unknown http method %v", hc->rx_buf);
+ ec = HTTP_STATUS_METHOD_NOT_ALLOWED;
+ goto error;
+ }
+
+ buf = &hc->rx_buf[hc->rx_buf_offset];
+
+ msg.type = HTTP_MSG_REQUEST;
+ msg.method_type = hc->method;
+ msg.content_type = HTTP_CONTENT_TEXT_HTML;
+ msg.data.type = HTTP_MSG_DATA_INLINE;
+ msg.data.len = len;
+
+ svm_fifo_seg_t segs[2] = { { (u8 *) &msg, sizeof (msg) }, { buf, len } };
+
+ as = session_get_from_handle (hc->h_pa_session_handle);
+ rv = svm_fifo_enqueue_segments (as->rx_fifo, segs, 2, 0 /* allow partial */);
+ if (rv < 0 || rv != sizeof (msg) + len)
+ {
+ clib_warning ("failed app enqueue");
+ /* This should not happen as we only handle 1 request per session,
+ * and fifo is allocated, but going forward we should consider
+ * rescheduling */
+ return HTTP_SM_ERROR;
+ }
+
+ vec_free (hc->rx_buf);
+ http_state_change (hc, HTTP_STATE_WAIT_APP_REPLY);
+
+ app_wrk = app_worker_get_if_valid (as->app_wrk_index);
+ if (app_wrk)
+ app_worker_rx_notify (app_wrk, as);
+
+ return HTTP_SM_STOP;
+
+error:
+
+ http_send_error (hc, ec);
+ session_transport_closing_notify (&hc->connection);
+ http_disconnect_transport (hc);
+
+ return HTTP_SM_ERROR;
+}
+
+static http_sm_result_t
+http_state_wait_app_reply (http_conn_t *hc, transport_send_params_t *sp)
+{
+ http_main_t *hm = &http_main;
+ u8 *header;
+ u32 offset;
+ f64 now;
+ session_t *as;
+ http_status_code_t sc;
+ http_msg_t msg;
+ int rv;
+
+ as = session_get_from_handle (hc->h_pa_session_handle);
+
+ rv = svm_fifo_dequeue (as->tx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (msg.data.type > HTTP_MSG_DATA_PTR)
+ {
+ clib_warning ("no data");
+ sc = HTTP_STATUS_INTERNAL_ERROR;
+ goto error;
+ }
+
+ if (msg.type != HTTP_MSG_REPLY)
+ {
+ clib_warning ("unexpected message type %d", msg.type);
+ sc = HTTP_STATUS_INTERNAL_ERROR;
+ goto error;
+ }
+
+ http_buffer_init (&hc->tx_buf, msg_to_buf_type[msg.data.type], as->tx_fifo,
+ msg.data.len);
+
+ /*
+ * Add headers. For now:
+ * - current time
+ * - expiration time
+ * - server name
+ * - content type
+ * - data length
+ */
+ now = clib_timebase_now (&hm->timebase);
+
+ switch (msg.code)
+ {
+ case HTTP_STATUS_OK:
+ header =
+ format (0, http_response_template, http_status_code_str[msg.code],
+ /* Date */
+ format_clib_timebase_time, now,
+ /* Expires */
+ format_clib_timebase_time, now + 600.0,
+ /* Server */
+ hc->app_name,
+ /* Content type */
+ http_content_type_str[msg.content_type],
+ /* Length */
+ msg.data.len);
+ break;
+ case HTTP_STATUS_MOVED:
+ header =
+ format (0, http_redirect_template, http_status_code_str[msg.code]);
+ /* Location: http(s)://new-place already queued up as data */
+ break;
+ default:
+ return HTTP_SM_ERROR;
+ }
+
+ offset = http_send_data (hc, header, vec_len (header), 0);
+ if (offset != vec_len (header))
+ {
+ clib_warning ("couldn't send response header!");
+ sc = HTTP_STATUS_INTERNAL_ERROR;
+ vec_free (header);
+ goto error;
+ }
+ vec_free (header);
+
+ /* Start sending the actual data */
+ http_state_change (hc, HTTP_STATE_APP_IO_MORE_DATA);
+
+ ASSERT (sp->max_burst_size >= offset);
+ sp->max_burst_size -= offset;
+ return HTTP_SM_CONTINUE;
+
+error:
+ clib_warning ("unexpected msg type from app %u", msg.type);
+ http_send_error (hc, sc);
+ http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD);
+ session_transport_closing_notify (&hc->connection);
+ http_disconnect_transport (hc);
+ return HTTP_SM_STOP;
+}
+
+static http_sm_result_t
+http_state_wait_app_method (http_conn_t *hc, transport_send_params_t *sp)
+{
+ http_msg_t msg;
+ session_t *as;
+ u8 *buf = 0, *request;
+ u32 offset;
+ int rv;
+
+ as = session_get_from_handle (hc->h_pa_session_handle);
+
+ rv = svm_fifo_dequeue (as->tx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (msg.data.type > HTTP_MSG_DATA_PTR)
+ {
+ clib_warning ("no data");
+ goto error;
+ }
+
+ if (msg.type != HTTP_MSG_REQUEST)
+ {
+ clib_warning ("unexpected message type %d", msg.type);
+ goto error;
+ }
+
+ vec_validate (buf, msg.data.len - 1);
+ rv = svm_fifo_dequeue (as->tx_fifo, msg.data.len, buf);
+ ASSERT (rv == msg.data.len);
+
+ request = format (0, http_request_template, buf);
+ offset = http_send_data (hc, request, vec_len (request), 0);
+ if (offset != vec_len (request))
+ {
+ clib_warning ("sending request failed!");
+ goto error;
+ }
+
+ http_state_change (hc, HTTP_STATE_WAIT_SERVER_REPLY);
+
+ vec_free (buf);
+ vec_free (request);
+
+ return HTTP_SM_STOP;
+
+error:
+ session_transport_closing_notify (&hc->connection);
+ http_disconnect_transport (hc);
+ return HTTP_SM_ERROR;
+}
+
+static http_sm_result_t
+http_state_client_io_more_data (http_conn_t *hc, transport_send_params_t *sp)
+{
+ session_t *as, *ts;
+ app_worker_t *app_wrk;
+ svm_fifo_seg_t _seg, *seg = &_seg;
+ u32 max_len, max_deq, max_enq, n_segs = 1;
+ int rv, len;
+
+ as = session_get_from_handle (hc->h_pa_session_handle);
+ ts = session_get_from_handle (hc->h_tc_session_handle);
+
+ max_deq = svm_fifo_max_dequeue (ts->rx_fifo);
+ if (max_deq == 0)
+ {
+ HTTP_DBG (1, "no data to deq");
+ return HTTP_SM_STOP;
+ }
+
+ max_enq = svm_fifo_max_enqueue (as->rx_fifo);
+ if (max_enq == 0)
+ {
+ HTTP_DBG (1, "app's rx fifo full");
+ svm_fifo_add_want_deq_ntf (as->rx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ return HTTP_SM_STOP;
+ }
+
+ max_len = clib_min (max_enq, max_deq);
+ len = svm_fifo_segments (ts->rx_fifo, 0, seg, &n_segs, max_len);
+ if (len < 0)
+ {
+ HTTP_DBG (1, "svm_fifo_segments() len %d", len);
+ return HTTP_SM_STOP;
+ }
+
+ rv = svm_fifo_enqueue_segments (as->rx_fifo, seg, 1, 0 /* allow partial */);
+ if (rv < 0)
+ {
+ clib_warning ("data enqueue failed, rv: %d", rv);
+ return HTTP_SM_ERROR;
+ }
+
+ svm_fifo_dequeue_drop (ts->rx_fifo, rv);
+ if (rv > hc->to_recv)
+ {
+ clib_warning ("http protocol error: received more data than expected");
+ session_transport_closing_notify (&hc->connection);
+ http_disconnect_transport (hc);
+ http_state_change (hc, HTTP_STATE_WAIT_APP_METHOD);
+ return HTTP_SM_ERROR;
+ }
+ hc->to_recv -= rv;
+ HTTP_DBG (1, "drained %d from ts; remains %d", rv, hc->to_recv);
+
+ app_wrk = app_worker_get_if_valid (as->app_wrk_index);
+ if (app_wrk)
+ app_worker_rx_notify (app_wrk, as);
+
+ if (svm_fifo_max_dequeue_cons (ts->rx_fifo))
+ session_enqueue_notify (ts);
+
+ return HTTP_SM_STOP;
+}
+
+static http_sm_result_t
+http_state_app_io_more_data (http_conn_t *hc, transport_send_params_t *sp)
+{
+ u32 max_send = 64 << 10, n_segs;
+ http_buffer_t *hb = &hc->tx_buf;
+ svm_fifo_seg_t *seg;
+ session_t *ts;
+ int sent = 0;
+
+ max_send = clib_min (max_send, sp->max_burst_size);
+ ts = session_get_from_handle (hc->h_tc_session_handle);
+ if ((seg = http_buffer_get_segs (hb, max_send, &n_segs)))
+ sent = svm_fifo_enqueue_segments (ts->tx_fifo, seg, n_segs,
+ 1 /* allow partial */);
+
+ if (sent > 0)
+ {
+ /* Ask scheduler to notify app of deq event if needed */
+ sp->bytes_dequeued += http_buffer_drain (hb, sent);
+ sp->max_burst_size -= sent;
+ }
+
+ /* Not finished sending all data */
+ if (!http_buffer_is_drained (hb))
+ {
+ if (sent && svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
+
+ if (svm_fifo_max_enqueue (ts->tx_fifo) < HTTP_FIFO_THRESH)
+ {
+ /* Deschedule http session and wait for deq notification if
+ * underlying ts tx fifo almost full */
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ transport_connection_deschedule (&hc->connection);
+ sp->flags |= TRANSPORT_SND_F_DESCHED;
+ }
+ }
+ else
+ {
+ if (sent && svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX_FLUSH);
+
+ /* Finished transaction, back to HTTP_STATE_WAIT_METHOD */
+ http_state_change (hc, HTTP_STATE_WAIT_CLIENT_METHOD);
+ http_buffer_free (&hc->tx_buf);
+ }
+
+ return HTTP_SM_STOP;
+}
+
+typedef http_sm_result_t (*http_sm_handler) (http_conn_t *,
+ transport_send_params_t *sp);
+
+static http_sm_handler state_funcs[HTTP_N_STATES] = {
+ 0, /* idle state */
+ http_state_wait_app_method,
+ http_state_wait_client_method,
+ http_state_wait_server_reply,
+ http_state_wait_app_reply,
+ http_state_client_io_more_data,
+ http_state_app_io_more_data,
+};
+
+static void
+http_req_run_state_machine (http_conn_t *hc, transport_send_params_t *sp)
+{
+ http_sm_result_t res;
+
+ do
+ {
+ res = state_funcs[hc->http_state](hc, sp);
+ if (res == HTTP_SM_ERROR)
+ {
+ HTTP_DBG (1, "error in state machine %d", res);
+ return;
+ }
+ }
+ while (res == HTTP_SM_CONTINUE);
+
+ /* Reset the session expiration timer */
+ http_conn_timer_update (hc);
+}
+
+static int
+http_ts_rx_callback (session_t *ts)
+{
+ http_conn_t *hc;
+
+ hc = http_conn_get_w_thread (ts->opaque, ts->thread_index);
+ if (!hc)
+ {
+ clib_warning ("http connection not found (ts %d)", ts->opaque);
+ return -1;
+ }
+
+ if (hc->state == HTTP_CONN_STATE_CLOSED)
+ {
+ svm_fifo_dequeue_drop_all (ts->tx_fifo);
+ return 0;
+ }
+
+ http_req_run_state_machine (hc, 0);
+
+ if (hc->state == HTTP_CONN_STATE_TRANSPORT_CLOSED)
+ {
+ if (!svm_fifo_max_dequeue_cons (ts->rx_fifo))
+ session_transport_closing_notify (&hc->connection);
+ }
+ return 0;
+}
+
+int
+http_ts_builtin_tx_callback (session_t *ts)
+{
+ http_conn_t *hc;
+
+ hc = http_conn_get_w_thread (ts->opaque, ts->thread_index);
+ transport_connection_reschedule (&hc->connection);
+
+ return 0;
+}
+
+static void
+http_ts_cleanup_callback (session_t *ts, session_cleanup_ntf_t ntf)
+{
+ http_conn_t *hc;
+
+ if (ntf == SESSION_CLEANUP_TRANSPORT)
+ return;
+
+ hc = http_conn_get_w_thread (ts->opaque, ts->thread_index);
+ if (!hc)
+ {
+ clib_warning ("no http connection for %u", ts->session_index);
+ return;
+ }
+
+ vec_free (hc->rx_buf);
+
+ http_buffer_free (&hc->tx_buf);
+ http_conn_timer_stop (hc);
+
+ session_transport_delete_notify (&hc->connection);
+ http_conn_free (hc);
+}
+
+int
+http_add_segment_callback (u32 client_index, u64 segment_handle)
+{
+ /* No-op for builtin */
+ return 0;
+}
+
+int
+http_del_segment_callback (u32 client_index, u64 segment_handle)
+{
+ return 0;
+}
+
+static session_cb_vft_t http_app_cb_vft = {
+ .session_accept_callback = http_ts_accept_callback,
+ .session_disconnect_callback = http_ts_disconnect_callback,
+ .session_connected_callback = http_ts_connected_callback,
+ .session_reset_callback = http_ts_reset_callback,
+ .session_cleanup_callback = http_ts_cleanup_callback,
+ .add_segment_callback = http_add_segment_callback,
+ .del_segment_callback = http_del_segment_callback,
+ .builtin_app_rx_callback = http_ts_rx_callback,
+ .builtin_app_tx_callback = http_ts_builtin_tx_callback,
+};
+
+static clib_error_t *
+http_transport_enable (vlib_main_t *vm, u8 is_en)
+{
+ vnet_app_detach_args_t _da, *da = &_da;
+ vnet_app_attach_args_t _a, *a = &_a;
+ u64 options[APP_OPTIONS_N_OPTIONS];
+ http_main_t *hm = &http_main;
+
+ if (!is_en)
+ {
+ da->app_index = hm->app_index;
+ da->api_client_index = APP_INVALID_INDEX;
+ vnet_application_detach (da);
+ return 0;
+ }
+
+ vec_validate (hm->wrk, vlib_num_workers ());
+
+ clib_memset (a, 0, sizeof (*a));
+ clib_memset (options, 0, sizeof (options));
+
+ a->session_cb_vft = &http_app_cb_vft;
+ a->api_client_index = APP_INVALID_INDEX;
+ a->options = options;
+ a->name = format (0, "http");
+ a->options[APP_OPTIONS_SEGMENT_SIZE] = hm->first_seg_size;
+ a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = hm->add_seg_size;
+ a->options[APP_OPTIONS_RX_FIFO_SIZE] = hm->fifo_size;
+ a->options[APP_OPTIONS_TX_FIFO_SIZE] = hm->fifo_size;
+ a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
+ a->options[APP_OPTIONS_FLAGS] |= APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE;
+ a->options[APP_OPTIONS_FLAGS] |= APP_OPTIONS_FLAGS_IS_TRANSPORT_APP;
+
+ if (vnet_application_attach (a))
+ return clib_error_return (0, "failed to attach http app");
+
+ hm->app_index = a->app_index;
+ vec_free (a->name);
+
+ clib_timebase_init (&hm->timebase, 0 /* GMT */, CLIB_TIMEBASE_DAYLIGHT_NONE,
+ &vm->clib_time /* share the system clock */);
+
+ http_timers_init (vm, http_conn_timeout_cb);
+
+ return 0;
+}
+
+static int
+http_transport_connect (transport_endpoint_cfg_t *tep)
+{
+ vnet_connect_args_t _cargs, *cargs = &_cargs;
+ http_main_t *hm = &http_main;
+ session_endpoint_cfg_t *sep = (session_endpoint_cfg_t *) tep;
+ application_t *app;
+ http_conn_t *hc;
+ int error;
+ u32 hc_index;
+ app_worker_t *app_wrk = app_worker_get (sep->app_wrk_index);
+
+ clib_memset (cargs, 0, sizeof (*cargs));
+ clib_memcpy (&cargs->sep_ext, sep, sizeof (session_endpoint_cfg_t));
+ cargs->sep.transport_proto = TRANSPORT_PROTO_TCP;
+ cargs->app_index = hm->app_index;
+ app = application_get (app_wrk->app_index);
+ cargs->sep_ext.ns_index = app->ns_index;
+
+ hc_index = http_conn_alloc_w_thread (0 /* ts->thread_index */);
+ hc = http_conn_get_w_thread (hc_index, 0);
+ hc->h_pa_wrk_index = sep->app_wrk_index;
+ hc->h_pa_app_api_ctx = sep->opaque;
+ hc->state = HTTP_CONN_STATE_CONNECTING;
+ cargs->api_context = hc_index;
+
+ HTTP_DBG (1, "hc ho_index %x", hc_index);
+
+ if ((error = vnet_connect (cargs)))
+ return error;
+
+ return 0;
+}
+
+static u32
+http_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep)
+{
+ vnet_listen_args_t _args = {}, *args = &_args;
+ session_t *ts_listener, *app_listener;
+ http_main_t *hm = &http_main;
+ session_endpoint_cfg_t *sep;
+ app_worker_t *app_wrk;
+ transport_proto_t tp;
+ app_listener_t *al;
+ application_t *app;
+ http_conn_t *lhc;
+ u32 lhc_index;
+
+ sep = (session_endpoint_cfg_t *) tep;
+
+ app_wrk = app_worker_get (sep->app_wrk_index);
+ app = application_get (app_wrk->app_index);
+
+ args->app_index = hm->app_index;
+ args->sep_ext = *sep;
+ args->sep_ext.ns_index = app->ns_index;
+ tp = sep->ext_cfg ? TRANSPORT_PROTO_TLS : TRANSPORT_PROTO_TCP;
+ args->sep_ext.transport_proto = tp;
+
+ if (vnet_listen (args))
+ return SESSION_INVALID_INDEX;
+
+ lhc_index = http_listener_alloc ();
+ lhc = http_listener_get (lhc_index);
+
+ /* Grab transport connection listener and link to http listener */
+ lhc->h_tc_session_handle = args->handle;
+ al = app_listener_get_w_handle (lhc->h_tc_session_handle);
+ ts_listener = app_listener_get_session (al);
+ ts_listener->opaque = lhc_index;
+
+ /* Grab application listener and link to http listener */
+ app_listener = listen_session_get (app_listener_index);
+ lhc->h_pa_wrk_index = sep->app_wrk_index;
+ lhc->h_pa_session_handle = listen_session_get_handle (app_listener);
+ lhc->c_s_index = app_listener_index;
+ lhc->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
+
+ if (vec_len (app->name))
+ lhc->app_name = vec_dup (app->name);
+ else
+ lhc->app_name = format (0, "VPP server app");
+
+ return lhc_index;
+}
+
+static u32
+http_stop_listen (u32 listener_index)
+{
+ http_conn_t *lhc;
+ int rv;
+
+ lhc = http_listener_get (listener_index);
+
+ vnet_unlisten_args_t a = {
+ .handle = lhc->h_tc_session_handle,
+ .app_index = http_main.app_index,
+ .wrk_map_index = 0 /* default wrk */
+ };
+
+ if ((rv = vnet_unlisten (&a)))
+ clib_warning ("unlisten returned %d", rv);
+
+ http_listener_free (lhc);
+
+ return 0;
+}
+
+static void
+http_transport_close (u32 hc_index, u32 thread_index)
+{
+ session_t *as;
+ http_conn_t *hc;
+
+ HTTP_DBG (1, "App disconnecting %x", hc_index);
+
+ hc = http_conn_get_w_thread (hc_index, thread_index);
+ if (hc->state == HTTP_CONN_STATE_CONNECTING)
+ {
+ hc->state = HTTP_CONN_STATE_APP_CLOSED;
+ http_disconnect_transport (hc);
+ return;
+ }
+
+ as = session_get_from_handle (hc->h_pa_session_handle);
+
+ /* Nothing more to send, confirm close */
+ if (!svm_fifo_max_dequeue_cons (as->tx_fifo))
+ {
+ session_transport_closed_notify (&hc->connection);
+ http_disconnect_transport (hc);
+ }
+ else
+ {
+ /* Wait for all data to be written to ts */
+ hc->state = HTTP_CONN_STATE_APP_CLOSED;
+ }
+}
+
+static transport_connection_t *
+http_transport_get_connection (u32 hc_index, u32 thread_index)
+{
+ http_conn_t *hc = http_conn_get_w_thread (hc_index, thread_index);
+ return &hc->connection;
+}
+
+static transport_connection_t *
+http_transport_get_listener (u32 listener_index)
+{
+ http_conn_t *lhc = http_listener_get (listener_index);
+ return &lhc->connection;
+}
+
+static int
+http_app_tx_callback (void *session, transport_send_params_t *sp)
+{
+ session_t *as = (session_t *) session;
+ u32 max_burst_sz, sent;
+ http_conn_t *hc;
+
+ HTTP_DBG (1, "app session conn index %x", as->connection_index);
+
+ hc = http_conn_get_w_thread (as->connection_index, as->thread_index);
+ if (!http_state_is_tx_valid (hc))
+ {
+ if (hc->state != HTTP_CONN_STATE_CLOSED)
+ clib_warning ("app data req state '%U' session state %u",
+ format_http_state, hc->http_state, hc->state);
+ svm_fifo_dequeue_drop_all (as->tx_fifo);
+ return 0;
+ }
+
+ max_burst_sz = sp->max_burst_size * TRANSPORT_PACER_MIN_MSS;
+ sp->max_burst_size = max_burst_sz;
+
+ http_req_run_state_machine (hc, sp);
+
+ if (hc->state == HTTP_CONN_STATE_APP_CLOSED)
+ {
+ if (!svm_fifo_max_dequeue_cons (as->tx_fifo))
+ http_disconnect_transport (hc);
+ }
+
+ sent = max_burst_sz - sp->max_burst_size;
+
+ return sent > 0 ? clib_max (sent / TRANSPORT_PACER_MIN_MSS, 1) : 0;
+}
+
+static void
+http_transport_get_endpoint (u32 hc_index, u32 thread_index,
+ transport_endpoint_t *tep, u8 is_lcl)
+{
+ http_conn_t *hc = http_conn_get_w_thread (hc_index, thread_index);
+ session_t *ts;
+
+ ts = session_get_from_handle (hc->h_tc_session_handle);
+ session_get_endpoint (ts, tep, is_lcl);
+}
+
+static u8 *
+format_http_connection (u8 *s, va_list *args)
+{
+ http_conn_t *hc = va_arg (*args, http_conn_t *);
+ session_t *ts;
+
+ ts = session_get_from_handle (hc->h_tc_session_handle);
+ s = format (s, "[%d:%d][H] app_wrk %u ts %d:%d", hc->c_thread_index,
+ hc->c_s_index, hc->h_pa_wrk_index, ts->thread_index,
+ ts->session_index);
+
+ return s;
+}
+
+static u8 *
+format_http_listener (u8 *s, va_list *args)
+{
+ http_conn_t *lhc = va_arg (*args, http_conn_t *);
+ app_listener_t *al;
+ session_t *lts;
+
+ al = app_listener_get_w_handle (lhc->h_tc_session_handle);
+ lts = app_listener_get_session (al);
+ s = format (s, "[%d:%d][H] app_wrk %u ts %d:%d", lhc->c_thread_index,
+ lhc->c_s_index, lhc->h_pa_wrk_index, lts->thread_index,
+ lts->session_index);
+
+ return s;
+}
+
+static u8 *
+format_http_conn_state (u8 *s, va_list *args)
+{
+ http_conn_t *hc = va_arg (*args, http_conn_t *);
+
+ switch (hc->state)
+ {
+ case HTTP_CONN_STATE_LISTEN:
+ s = format (s, "LISTEN");
+ break;
+ case HTTP_CONN_STATE_CONNECTING:
+ s = format (s, "CONNECTING");
+ break;
+ case HTTP_CONN_STATE_ESTABLISHED:
+ s = format (s, "ESTABLISHED");
+ break;
+ case HTTP_CONN_STATE_TRANSPORT_CLOSED:
+ s = format (s, "TRANSPORT_CLOSED");
+ break;
+ case HTTP_CONN_STATE_APP_CLOSED:
+ s = format (s, "APP_CLOSED");
+ break;
+ case HTTP_CONN_STATE_CLOSED:
+ s = format (s, "CLOSED");
+ break;
+ }
+
+ return s;
+}
+
+static u8 *
+format_http_transport_connection (u8 *s, va_list *args)
+{
+ u32 tc_index = va_arg (*args, u32);
+ u32 thread_index = va_arg (*args, u32);
+ u32 verbose = va_arg (*args, u32);
+ http_conn_t *hc;
+
+ hc = http_conn_get_w_thread (tc_index, thread_index);
+
+ s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_http_connection, hc);
+ if (verbose)
+ {
+ s =
+ format (s, "%-" SESSION_CLI_STATE_LEN "U", format_http_conn_state, hc);
+ if (verbose > 1)
+ s = format (s, "\n");
+ }
+
+ return s;
+}
+
+static u8 *
+format_http_transport_listener (u8 *s, va_list *args)
+{
+ u32 tc_index = va_arg (*args, u32);
+ u32 __clib_unused thread_index = va_arg (*args, u32);
+ u32 __clib_unused verbose = va_arg (*args, u32);
+ http_conn_t *lhc = http_listener_get (tc_index);
+
+ s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_http_listener, lhc);
+ if (verbose)
+ s =
+ format (s, "%-" SESSION_CLI_STATE_LEN "U", format_http_conn_state, lhc);
+ return s;
+}
+
+static const transport_proto_vft_t http_proto = {
+ .enable = http_transport_enable,
+ .connect = http_transport_connect,
+ .start_listen = http_start_listen,
+ .stop_listen = http_stop_listen,
+ .close = http_transport_close,
+ .custom_tx = http_app_tx_callback,
+ .get_connection = http_transport_get_connection,
+ .get_listener = http_transport_get_listener,
+ .get_transport_endpoint = http_transport_get_endpoint,
+ .format_connection = format_http_transport_connection,
+ .format_listener = format_http_transport_listener,
+ .transport_options = {
+ .name = "http",
+ .short_name = "H",
+ .tx_type = TRANSPORT_TX_INTERNAL,
+ .service_type = TRANSPORT_SERVICE_APP,
+ },
+};
+
+static clib_error_t *
+http_transport_init (vlib_main_t *vm)
+{
+ http_main_t *hm = &http_main;
+
+ transport_register_protocol (TRANSPORT_PROTO_HTTP, &http_proto,
+ FIB_PROTOCOL_IP4, ~0);
+ transport_register_protocol (TRANSPORT_PROTO_HTTP, &http_proto,
+ FIB_PROTOCOL_IP6, ~0);
+
+ /* Default values, configurable via startup conf */
+ hm->add_seg_size = 256 << 20;
+ hm->first_seg_size = 32 << 20;
+ hm->fifo_size = 512 << 10;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (http_transport_init);
+
+static clib_error_t *
+http_config_fn (vlib_main_t *vm, unformat_input_t *input)
+{
+ http_main_t *hm = &http_main;
+ uword mem_sz;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "first-segment-size %U", unformat_memory_size,
+ &mem_sz))
+ {
+ hm->first_seg_size = clib_max (mem_sz, 1 << 20);
+ if (hm->first_seg_size != mem_sz)
+ clib_warning ("first seg size too small %u", mem_sz);
+ }
+ else if (unformat (input, "add-segment-size %U", unformat_memory_size,
+ &mem_sz))
+ {
+ hm->add_seg_size = clib_max (mem_sz, 1 << 20);
+ if (hm->add_seg_size != mem_sz)
+ clib_warning ("add seg size too small %u", mem_sz);
+ }
+ else if (unformat (input, "fifo-size %U", unformat_memory_size, &mem_sz))
+ {
+ hm->fifo_size = clib_clamp (mem_sz, 4 << 10, 2 << 30);
+ if (hm->fifo_size != mem_sz)
+ clib_warning ("invalid fifo size %lu", mem_sz);
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (http_config_fn, "http");
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Hypertext Transfer Protocol (HTTP)",
+ .default_disabled = 0,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/http/http.h b/src/plugins/http/http.h
new file mode 100644
index 00000000000..c9912dd6db8
--- /dev/null
+++ b/src/plugins/http/http.h
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_PLUGINS_HTTP_HTTP_H_
+#define SRC_PLUGINS_HTTP_HTTP_H_
+
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+#include <vppinfra/time_range.h>
+
+#include <vnet/session/application_interface.h>
+#include <vnet/session/application.h>
+#include <http/http_buffer.h>
+
+#define HTTP_DEBUG 0
+
+#if HTTP_DEBUG
+#define HTTP_DBG(_lvl, _fmt, _args...) \
+ if (_lvl <= HTTP_DEBUG) \
+ clib_warning (_fmt, ##_args)
+#else
+#define HTTP_DBG(_lvl, _fmt, _args...)
+#endif
+
+typedef struct http_conn_id_
+{
+ union
+ {
+ session_handle_t app_session_handle;
+ u32 parent_app_api_ctx;
+ };
+ session_handle_t tc_session_handle;
+ u32 parent_app_wrk_index;
+} http_conn_id_t;
+
+STATIC_ASSERT (sizeof (http_conn_id_t) <= TRANSPORT_CONN_ID_LEN,
+ "ctx id must be less than TRANSPORT_CONN_ID_LEN");
+
+typedef enum http_conn_state_
+{
+ HTTP_CONN_STATE_LISTEN,
+ HTTP_CONN_STATE_CONNECTING,
+ HTTP_CONN_STATE_ESTABLISHED,
+ HTTP_CONN_STATE_TRANSPORT_CLOSED,
+ HTTP_CONN_STATE_APP_CLOSED,
+ HTTP_CONN_STATE_CLOSED
+} http_conn_state_t;
+
+typedef enum http_state_
+{
+ HTTP_STATE_IDLE = 0,
+ HTTP_STATE_WAIT_APP_METHOD,
+ HTTP_STATE_WAIT_CLIENT_METHOD,
+ HTTP_STATE_WAIT_SERVER_REPLY,
+ HTTP_STATE_WAIT_APP_REPLY,
+ HTTP_STATE_CLIENT_IO_MORE_DATA,
+ HTTP_STATE_APP_IO_MORE_DATA,
+ HTTP_N_STATES,
+} http_state_t;
+
+typedef enum http_req_method_
+{
+ HTTP_REQ_GET = 0,
+ HTTP_REQ_POST,
+} http_req_method_t;
+
+typedef enum http_msg_type_
+{
+ HTTP_MSG_REQUEST,
+ HTTP_MSG_REPLY
+} http_msg_type_t;
+
+#define foreach_http_content_type \
+ _ (APP_7Z, ".7z", "application / x - 7z - compressed") \
+ _ (APP_DOC, ".doc", "application / msword") \
+ _ (APP_DOCX, ".docx", \
+ "application / vnd.openxmlformats - " \
+ "officedocument.wordprocessingml.document") \
+ _ (APP_EPUB, ".epub", "application / epub + zip") \
+ _ (APP_FONT, ".eot", "application / vnd.ms - fontobject") \
+ _ (APP_JAR, ".jar", "application / java - archive") \
+ _ (APP_JSON, ".json", "application / json") \
+ _ (APP_JSON_LD, ".jsonld", "application / ld + json") \
+ _ (APP_MPKG, ".mpkg", "application / vnd.apple.installer + xml") \
+ _ (APP_ODP, ".odp", "application / vnd.oasis.opendocument.presentation") \
+ _ (APP_ODS, ".ods", "application / vnd.oasis.opendocument.spreadsheet") \
+ _ (APP_ODT, ".odt", "application / vnd.oasis.opendocument.text") \
+ _ (APP_OGX, ".ogx", "application / ogg") \
+ _ (APP_PDF, ".pdf", "application / pdf") \
+ _ (APP_PHP, ".php", "application / x - httpd - php") \
+ _ (APP_PPT, ".ppt", "application / vnd.ms - powerpoint") \
+ _ (APP_PPTX, ".pptx", "application / vnd.ms - powerpoint") \
+ _ (APP_RAR, ".rar", "application / vnd.rar") \
+ _ (APP_RTF, ".rtf", "application / rtf") \
+ _ (APP_SH, ".sh", "application / x - sh") \
+ _ (APP_TAR, ".tar", "application / x - tar") \
+ _ (APP_VSD, ".vsd", "application / vnd.visio") \
+ _ (APP_XHTML, ".xhtml", "application / xhtml + xml") \
+ _ (APP_XLS, ".xls", "application / vnd.ms - excel") \
+ _ (APP_XML, ".xml", "application / xml") \
+ _ (APP_XSLX, ".xlsx", \
+ "application / vnd.openxmlformats - officedocument.spreadsheetml.sheet") \
+ _ (APP_XUL, ".xul", "application / vnd.mozilla.xul + xml") \
+ _ (APP_ZIP, ".zip", "application / zip") \
+ _ (AUDIO_AAC, ".aac", "audio / aac") \
+ _ (AUDIO_CD, ".cda", "application / x - cdf") \
+ _ (AUDIO_WAV, ".wav", "audio / wav") \
+ _ (AUDIO_WEBA, ".weba", "audio / webm") \
+ _ (AUDO_MIDI, ".midi", "audio / midi") \
+ _ (AUDO_MID, ".mid", "audo / midi") \
+ _ (AUDO_MP3, ".mp3", "audio / mpeg") \
+ _ (AUDO_OGA, ".oga", "audio / ogg") \
+ _ (AUDO_OPUS, ".opus", "audio / opus") \
+ _ (APP_OCTET_STREAM, ".bin", "application / octet - stream") \
+ _ (BZIP2, ".bz2", "application / x - bzip2") \
+ _ (BZIP, ".bz", "application / x - bzip") \
+ _ (FONT_OTF, ".otf", "font / otf") \
+ _ (FONT_TTF, ".ttf", "font / ttf") \
+ _ (FONT_WOFF2, ".woff2", "font / woff2") \
+ _ (FONT_WOFF, ".woff", "font / woff") \
+ _ (GZIP, ".gz", "application / gzip") \
+ _ (IMAGE_AVIF, ".avif", "image / avif") \
+ _ (IMAGE_BMP, ".bmp", "image / bmp") \
+ _ (IMAGE_GIF, ".gif", "image / gif") \
+ _ (IMAGE_ICON, ".ico", "image / vnd.microsoft.icon") \
+ _ (IMAGE_JPEG, ".jpeg", "image / jpeg") \
+ _ (IMAGE_JPG, ".jpg", "image / jpeg") \
+ _ (IMAGE_PNG, ".png", "image / png") \
+ _ (IMAGE_SVG, ".svg", "image / svg + xml") \
+ _ (IMAGE_TIFF, ".tiff", "image / tiff") \
+ _ (IMAGE_TIF, ".tif", "image / tiff") \
+ _ (IMAGE_WEBP, ".webp", "image / webp") \
+ _ (SCRIPT_CSH, ".csh", "application / x - csh") \
+ _ (TEXT_ABIWORD, ".abw", "application / x - abiword") \
+ _ (TEXT_ARCHIVE, ".arc", "application / x - freearc") \
+ _ (TEXT_AZW, ".azw", "application / vnd.amazon.ebook") \
+ _ (TEXT_CALENDAR, ".ics", "text / calendar") \
+ _ (TEXT_CSS, ".css", "text / css") \
+ _ (TEXT_CSV, ".csv", "text / csv") \
+ _ (TEXT_HTM, ".htm", "text / html") \
+ _ (TEXT_HTML, ".html", "text / html") \
+ _ (TEXT_JS, ".js", "text / javascript") \
+ _ (TEXT_MJS, ".mjs", "text / javascript") \
+ _ (TEXT_PLAIN, ".txt", "text / plain") \
+ _ (VIDEO_3GP2, ".3g2", "video / 3gpp2") \
+ _ (VIDEO_3GP, ".3gp", "video / 3gpp") \
+ _ (VIDEO_AVI, ".avi", "video / x - msvideo") \
+ _ (VIDEO_MP4, ".mp4", "video / mp4") \
+ _ (VIDEO_MPEG, ".mpeg", "video / mpeg") \
+ _ (VIDEO_OGG, ".ogv", "video / ogg") \
+ _ (VIDEO_TS, ".ts", "video / mp2t") \
+ _ (VIDEO_WEBM, ".webm", "video / webm")
+
+typedef enum http_content_type_
+{
+#define _(s, ext, str) HTTP_CONTENT_##s,
+ foreach_http_content_type
+#undef _
+} http_content_type_t;
+
+#define foreach_http_status_code \
+ _ (200, OK, "200 OK") \
+ _ (301, MOVED, "301 Moved Permanently") \
+ _ (400, BAD_REQUEST, "400 Bad Request") \
+ _ (404, NOT_FOUND, "404 Not Found") \
+ _ (405, METHOD_NOT_ALLOWED, "405 Method Not Allowed") \
+ _ (500, INTERNAL_ERROR, "500 Internal Server Error")
+
+typedef enum http_status_code_
+{
+#define _(c, s, str) HTTP_STATUS_##s,
+ foreach_http_status_code
+#undef _
+ HTTP_N_STATUS
+} http_status_code_t;
+
+typedef enum http_msg_data_type_
+{
+ HTTP_MSG_DATA_INLINE,
+ HTTP_MSG_DATA_PTR
+} http_msg_data_type_t;
+
+typedef struct http_msg_data_
+{
+ http_msg_data_type_t type;
+ u64 len;
+ u8 data[0];
+} http_msg_data_t;
+
+typedef struct http_msg_
+{
+ http_msg_type_t type;
+ union
+ {
+ http_req_method_t method_type;
+ http_status_code_t code;
+ };
+ http_content_type_t content_type;
+ http_msg_data_t data;
+} http_msg_t;
+
+typedef struct http_tc_
+{
+ union
+ {
+ transport_connection_t connection;
+ http_conn_id_t c_http_conn_id;
+ };
+#define h_tc_session_handle c_http_conn_id.tc_session_handle
+#define h_pa_wrk_index c_http_conn_id.parent_app_wrk_index
+#define h_pa_session_handle c_http_conn_id.app_session_handle
+#define h_pa_app_api_ctx c_http_conn_id.parent_app_api_ctx
+#define h_hc_index connection.c_index
+
+ http_conn_state_t state;
+ u32 timer_handle;
+ u8 *app_name;
+
+ /*
+ * Current request
+ */
+ http_state_t http_state;
+ http_req_method_t method;
+ u8 *rx_buf;
+ u32 rx_buf_offset;
+ http_buffer_t tx_buf;
+ u32 to_recv;
+ u32 bytes_dequeued;
+} http_conn_t;
+
+typedef struct http_worker_
+{
+ http_conn_t *conn_pool;
+} http_worker_t;
+
+typedef struct http_main_
+{
+ http_worker_t *wrk;
+ http_conn_t *listener_pool;
+ u32 app_index;
+
+ clib_timebase_t timebase;
+
+ /*
+ * Runtime config
+ */
+ u8 debug_level;
+
+ /*
+ * Config
+ */
+ u64 first_seg_size;
+ u64 add_seg_size;
+ u32 fifo_size;
+} http_main_t;
+
+static inline int
+http_state_is_tx_valid (http_conn_t *hc)
+{
+ http_state_t state = hc->http_state;
+ return (state == HTTP_STATE_APP_IO_MORE_DATA ||
+ state == HTTP_STATE_CLIENT_IO_MORE_DATA ||
+ state == HTTP_STATE_WAIT_APP_REPLY ||
+ state == HTTP_STATE_WAIT_APP_METHOD);
+}
+
+#endif /* SRC_PLUGINS_HTTP_HTTP_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/http/http_buffer.c b/src/plugins/http/http_buffer.c
new file mode 100644
index 00000000000..f3dc308dbf8
--- /dev/null
+++ b/src/plugins/http/http_buffer.c
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <http/http_buffer.h>
+#include <http/http.h>
+
+static http_buffer_vft_t buf_vfts[HTTP_BUFFER_PTR + 1];
+
+#define HTTP_BUFFER_REGISTER_VFT(type, vft) \
+ static void __attribute__ ((constructor)) http_buf_init_##type (void) \
+ { \
+ buf_vfts[type] = vft; \
+ }
+
+typedef struct http_buffer_fifo_
+{
+ svm_fifo_t *src;
+ svm_fifo_seg_t *segs;
+ u64 len;
+ u64 offset;
+} http_buffer_fifo_t;
+
+STATIC_ASSERT (sizeof (http_buffer_fifo_t) <= HTTP_BUFFER_DATA_SZ, "buf data");
+
+static void
+buf_fifo_init (http_buffer_t *hb, void *data, u64 len)
+{
+ svm_fifo_t *f = (svm_fifo_t *) data;
+ http_buffer_fifo_t *bf;
+
+ bf = (http_buffer_fifo_t *) &hb->data;
+
+ bf->len = len;
+ bf->offset = 0;
+ bf->src = f;
+ bf->segs = 0;
+}
+
+static void
+buf_fifo_free (http_buffer_t *hb)
+{
+ http_buffer_fifo_t *bf = (http_buffer_fifo_t *) &hb->data;
+
+ bf->src = 0;
+ vec_free (bf->segs);
+}
+
+static svm_fifo_seg_t *
+buf_fifo_get_segs (http_buffer_t *hb, u32 max_len, u32 *n_segs)
+{
+ http_buffer_fifo_t *bf = (http_buffer_fifo_t *) &hb->data;
+
+ u32 _n_segs = 5;
+ int len;
+
+ max_len = clib_min (bf->len - bf->offset, (u64) max_len);
+
+ vec_validate (bf->segs, _n_segs);
+
+ len = svm_fifo_segments (bf->src, 0, bf->segs, &_n_segs, max_len);
+ if (len < 0)
+ return 0;
+
+ *n_segs = _n_segs;
+
+ HTTP_DBG (1, "available to send %u n_segs %u", len, *n_segs);
+
+ return bf->segs;
+}
+
+static u32
+buf_fifo_drain (http_buffer_t *hb, u32 len)
+{
+ http_buffer_fifo_t *bf = (http_buffer_fifo_t *) &hb->data;
+
+ bf->offset += len;
+ svm_fifo_dequeue_drop (bf->src, len);
+ HTTP_DBG (1, "drained %u len %u offset %u", len, bf->len, bf->offset);
+
+ return len;
+}
+
+static u8
+buf_fifo_is_drained (http_buffer_t *hb)
+{
+ http_buffer_fifo_t *bf = (http_buffer_fifo_t *) &hb->data;
+
+ ASSERT (bf->offset <= bf->len);
+ return (bf->offset == bf->len);
+}
+
+const static http_buffer_vft_t buf_fifo_vft = {
+ .init = buf_fifo_init,
+ .free = buf_fifo_free,
+ .get_segs = buf_fifo_get_segs,
+ .drain = buf_fifo_drain,
+ .is_drained = buf_fifo_is_drained,
+};
+
+HTTP_BUFFER_REGISTER_VFT (HTTP_BUFFER_FIFO, buf_fifo_vft);
+
+typedef struct http_buffer_ptr_
+{
+ svm_fifo_seg_t *segs;
+ svm_fifo_t *f;
+} http_buffer_ptr_t;
+
+STATIC_ASSERT (sizeof (http_buffer_ptr_t) <= HTTP_BUFFER_DATA_SZ, "buf data");
+
+static void
+buf_ptr_init (http_buffer_t *hb, void *data, u64 len)
+{
+ svm_fifo_t *f = (svm_fifo_t *) data;
+ http_buffer_ptr_t *bf;
+ uword ptr;
+ int rv;
+
+ bf = (http_buffer_ptr_t *) &hb->data;
+
+ /* Peek the pointer, do not drain the fifo until done with transfer */
+ rv = svm_fifo_peek (f, 0, sizeof (ptr), (u8 *) &ptr);
+ ASSERT (rv == sizeof (ptr));
+
+ bf->f = f;
+ bf->segs = 0;
+ vec_validate (bf->segs, 1);
+
+ bf->segs[0].data = uword_to_pointer (ptr, u8 *);
+ bf->segs[0].len = len;
+
+ bf->segs[1] = bf->segs[0];
+}
+
+static void
+buf_ptr_free (http_buffer_t *hb)
+{
+ http_buffer_ptr_t *bf = (http_buffer_ptr_t *) &hb->data;
+
+ bf->f = 0;
+ vec_free (bf->segs);
+}
+
+static svm_fifo_seg_t *
+buf_ptr_get_segs (http_buffer_t *hb, u32 max_len, u32 *n_segs)
+{
+ http_buffer_ptr_t *bf = (http_buffer_ptr_t *) &hb->data;
+
+ *n_segs = 1;
+ bf->segs[1].len = clib_min (bf->segs[0].len, max_len);
+
+ return &bf->segs[1];
+}
+
+static u32
+buf_ptr_drain (http_buffer_t *hb, u32 len)
+{
+ http_buffer_ptr_t *bf = (http_buffer_ptr_t *) &hb->data;
+
+ ASSERT (bf->segs[0].len >= len);
+
+ bf->segs[1].data += len;
+ bf->segs[0].len -= len;
+
+ HTTP_DBG (1, "drained %u left %u", len, bf->segs[1].len);
+
+ if (!bf->segs[0].len)
+ {
+ svm_fifo_dequeue_drop (bf->f, sizeof (uword));
+ return sizeof (uword);
+ }
+
+ return 0;
+}
+
+static u8
+buf_ptr_is_drained (http_buffer_t *hb)
+{
+ http_buffer_ptr_t *bf = (http_buffer_ptr_t *) &hb->data;
+
+ return (bf->segs[0].len == 0);
+}
+
+const static http_buffer_vft_t buf_ptr_vft = {
+ .init = buf_ptr_init,
+ .free = buf_ptr_free,
+ .get_segs = buf_ptr_get_segs,
+ .drain = buf_ptr_drain,
+ .is_drained = buf_ptr_is_drained,
+};
+
+HTTP_BUFFER_REGISTER_VFT (HTTP_BUFFER_PTR, buf_ptr_vft);
+
+void
+http_buffer_init (http_buffer_t *hb, http_buffer_type_t type, svm_fifo_t *f,
+ u64 data_len)
+{
+ hb->vft = &buf_vfts[type];
+ hb->vft->init (hb, f, data_len);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/http/http_buffer.h b/src/plugins/http/http_buffer.h
new file mode 100644
index 00000000000..1140be42d6e
--- /dev/null
+++ b/src/plugins/http/http_buffer.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_PLUGINS_HTTP_HTTP_BUFFER_H_
+#define SRC_PLUGINS_HTTP_HTTP_BUFFER_H_
+
+#include <svm/svm_fifo.h>
+
+#define HTTP_BUFFER_DATA_SZ 32
+
+typedef enum http_buffer_type_
+{
+ HTTP_BUFFER_FIFO,
+ HTTP_BUFFER_PTR,
+} http_buffer_type_t;
+
+typedef struct http_buffer_vft_ http_buffer_vft_t;
+
+typedef struct http_buffer_
+{
+ http_buffer_vft_t *vft;
+ u8 data[HTTP_BUFFER_DATA_SZ];
+} http_buffer_t;
+
+struct http_buffer_vft_
+{
+ void (*init) (http_buffer_t *, void *data, u64 len);
+ void (*free) (http_buffer_t *);
+ svm_fifo_seg_t *(*get_segs) (http_buffer_t *, u32 max_len, u32 *n_segs);
+ u32 (*drain) (http_buffer_t *, u32 len);
+ u8 (*is_drained) (http_buffer_t *);
+};
+
+void http_buffer_init (http_buffer_t *hb, http_buffer_type_t type,
+ svm_fifo_t *f, u64 data_len);
+
+static inline void
+http_buffer_free (http_buffer_t *hb)
+{
+ if (hb->vft)
+ hb->vft->free (hb);
+}
+
+static inline svm_fifo_seg_t *
+http_buffer_get_segs (http_buffer_t *hb, u32 max_len, u32 *n_segs)
+{
+ return hb->vft->get_segs (hb, max_len, n_segs);
+}
+
+static inline u32
+http_buffer_drain (http_buffer_t *hb, u32 len)
+{
+ return hb->vft->drain (hb, len);
+}
+
+static inline u8
+http_buffer_is_drained (http_buffer_t *hb)
+{
+ return hb->vft->is_drained (hb);
+}
+
+#endif /* SRC_PLUGINS_HTTP_HTTP_BUFFER_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/http/http_timer.c b/src/plugins/http/http_timer.c
new file mode 100644
index 00000000000..42fe69076fe
--- /dev/null
+++ b/src/plugins/http/http_timer.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <http/http_timer.h>
+#include <vnet/session/session.h>
+
+http_tw_ctx_t http_tw_ctx;
+
+static void
+http_timer_process_expired_cb (u32 *expired_timers)
+{
+ http_tw_ctx_t *twc = &http_tw_ctx;
+ u32 hs_handle;
+ int i;
+
+ for (i = 0; i < vec_len (expired_timers); i++)
+ {
+ /* Get session handle. The first bit is the timer id */
+ hs_handle = expired_timers[i] & 0x7FFFFFFF;
+ session_send_rpc_evt_to_thread (hs_handle >> 24, twc->cb_fn,
+ uword_to_pointer (hs_handle, void *));
+ }
+}
+
+static uword
+http_timer_process (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
+{
+ http_tw_ctx_t *twc = &http_tw_ctx;
+ f64 now, timeout = 1.0;
+ uword *event_data = 0;
+ uword __clib_unused event_type;
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, timeout);
+ now = vlib_time_now (vm);
+ event_type = vlib_process_get_events (vm, (uword **) &event_data);
+
+ /* expire timers */
+ clib_spinlock_lock (&twc->tw_lock);
+ tw_timer_expire_timers_2t_1w_2048sl (&twc->tw, now);
+ clib_spinlock_unlock (&twc->tw_lock);
+
+ vec_reset_length (event_data);
+ }
+ return 0;
+}
+
+VLIB_REGISTER_NODE (http_timer_process_node) = {
+ .function = http_timer_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "http-timer-process",
+ .state = VLIB_NODE_STATE_DISABLED,
+};
+
+void
+http_timers_init (vlib_main_t *vm, http_conn_timeout_fn *cb_fn)
+{
+ http_tw_ctx_t *twc = &http_tw_ctx;
+ vlib_node_t *n;
+
+ tw_timer_wheel_init_2t_1w_2048sl (&twc->tw, http_timer_process_expired_cb,
+ 1.0 /* timer interval */, ~0);
+ clib_spinlock_init (&twc->tw_lock);
+ twc->cb_fn = cb_fn;
+
+ vlib_node_set_state (vm, http_timer_process_node.index,
+ VLIB_NODE_STATE_POLLING);
+ n = vlib_get_node (vm, http_timer_process_node.index);
+ vlib_start_process (vm, n->runtime_index);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/http/http_timer.h b/src/plugins/http/http_timer.h
new file mode 100644
index 00000000000..eec5a4595fe
--- /dev/null
+++ b/src/plugins/http/http_timer.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_PLUGINS_HTTP_HTTP_TIMER_H_
+#define SRC_PLUGINS_HTTP_HTTP_TIMER_H_
+
+#include <http/http.h>
+#include <vppinfra/tw_timer_2t_1w_2048sl.h>
+
+#define HTTP_CONN_TIMEOUT 60
+
+typedef void (http_conn_timeout_fn) (void *);
+
+typedef struct http_tw_ctx_
+{
+ tw_timer_wheel_2t_1w_2048sl_t tw;
+ clib_spinlock_t tw_lock;
+ http_conn_timeout_fn *cb_fn;
+} http_tw_ctx_t;
+
+extern http_tw_ctx_t http_tw_ctx;
+
+void http_timers_init (vlib_main_t *vm, http_conn_timeout_fn *cb_fn);
+
+static inline void
+http_conn_timer_start (http_conn_t *hc)
+{
+ http_tw_ctx_t *twc = &http_tw_ctx;
+ u32 hs_handle;
+ u64 timeout;
+
+ timeout = HTTP_CONN_TIMEOUT;
+ hs_handle = hc->c_thread_index << 24 | hc->c_c_index;
+
+ clib_spinlock_lock (&twc->tw_lock);
+ hc->timer_handle =
+ tw_timer_start_2t_1w_2048sl (&twc->tw, hs_handle, 0, timeout);
+ clib_spinlock_unlock (&twc->tw_lock);
+}
+
+static inline void
+http_conn_timer_stop (http_conn_t *hc)
+{
+ http_tw_ctx_t *twc = &http_tw_ctx;
+
+ if (hc->timer_handle == ~0)
+ return;
+
+ clib_spinlock_lock (&twc->tw_lock);
+ tw_timer_stop_2t_1w_2048sl (&twc->tw, hc->timer_handle);
+ hc->timer_handle = ~0;
+ clib_spinlock_unlock (&twc->tw_lock);
+}
+
+static inline void
+http_conn_timer_update (http_conn_t *hc)
+{
+ http_tw_ctx_t *twc = &http_tw_ctx;
+ u64 timeout;
+
+ if (hc->timer_handle == ~0)
+ return;
+
+ timeout = HTTP_CONN_TIMEOUT;
+
+ clib_spinlock_lock (&twc->tw_lock);
+ tw_timer_update_2t_1w_2048sl (&twc->tw, hc->timer_handle, timeout);
+ clib_spinlock_unlock (&twc->tw_lock);
+}
+
+#endif /* SRC_PLUGINS_HTTP_HTTP_TIMER_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/http_static/CMakeLists.txt b/src/plugins/http_static/CMakeLists.txt
index f9ccb15beae..5e51704dc96 100644
--- a/src/plugins/http_static/CMakeLists.txt
+++ b/src/plugins/http_static/CMakeLists.txt
@@ -14,9 +14,11 @@
add_vpp_plugin(http_static
SOURCES
+ http_cache.c
+ http_cache.h
http_static.c
static_server.c
- http_static.h
+ builtinurl/json_urls.c
API_FILES
http_static.api
diff --git a/src/plugins/http_static/FEATURE.yaml b/src/plugins/http_static/FEATURE.yaml
index d40855f2de2..ff4e147c495 100644
--- a/src/plugins/http_static/FEATURE.yaml
+++ b/src/plugins/http_static/FEATURE.yaml
@@ -1,10 +1,18 @@
---
-name: Static http https server
-maintainer: Dave Barach <dave@barachs.net>
+name: Static HTTP(S) Server
+maintainer:
+ - Dave Barach <dave@barachs.net>
+ - Florin Coras <fcoras@cisco.com>
features:
- - An extensible static http/https server with caching
-description: "A simple caching static http / https server
- A built-in vpp host stack application.
- Supports HTTP GET and HTTP POST methods."
+ - HTTP GET/POST handling
+ - LRU file caching
+ - pluggable URL handlers
+ - builtin json URL handles:
+ - version.json - vpp version info
+ - interface_list.json - list of interfaces
+ - interface_stats - single interface via HTTP POST
+ - interface_stats - all intfcs via HTTP GET."
+description: "Static HTTP(S) server implemented as a
+ built-in vpp host stack application. "
state: production
properties: [API, CLI, MULTITHREAD]
diff --git a/src/plugins/http_static/builtinurl/json_urls.c b/src/plugins/http_static/builtinurl/json_urls.c
new file mode 100644
index 00000000000..808893aac79
--- /dev/null
+++ b/src/plugins/http_static/builtinurl/json_urls.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <http_static/http_static.h>
+#include <vpp/app/version.h>
+
+hss_url_handler_rc_t
+handle_get_version (hss_url_handler_args_t *args)
+{
+ u8 *s = 0;
+
+ s = format (s, "{\"vpp_details\": {");
+ s = format (s, " \"version\": \"%s\",", VPP_BUILD_VER);
+ s = format (s, " \"build_date\": \"%s\"}}\r\n", VPP_BUILD_DATE);
+
+ args->data = s;
+ args->data_len = vec_len (s);
+ args->free_vec_data = 1;
+ return HSS_URL_HANDLER_OK;
+}
+
+void
+trim_path_from_request (u8 *s, char *path)
+{
+ u8 *cp;
+ int trim_length = strlen (path) + 1 /* remove '?' */;
+
+ /* Get rid of the path and question-mark */
+ vec_delete (s, trim_length, 0);
+
+ /* Tail trim irrelevant browser info */
+ cp = s;
+ while ((cp - s) < vec_len (s))
+ {
+ if (*cp == ' ')
+ {
+ /*
+ * Makes request a vector which happens to look
+ * like a c-string.
+ */
+ *cp = 0;
+ vec_set_len (s, cp - s);
+ break;
+ }
+ cp++;
+ }
+}
+
+hss_url_handler_rc_t
+handle_get_interface_stats (hss_url_handler_args_t *args)
+{
+ u8 *s = 0, *stats = 0;
+ uword *p;
+ u32 *sw_if_indices = 0;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+ char *q = "\"";
+ int i;
+ int need_comma = 0;
+ u8 *format_vnet_sw_interface_cntrs (u8 * s, vnet_interface_main_t * im,
+ vnet_sw_interface_t * si, int json);
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+
+ /* Get stats for a single interface via http POST */
+ if (args->reqtype == HTTP_REQ_POST)
+ {
+ trim_path_from_request (args->request, "interface_stats.json");
+
+ /* Find the sw_if_index */
+ p = hash_get (im->hw_interface_by_name, args->request);
+ if (!p)
+ {
+ s = format (s, "{\"interface_stats\": {[\n");
+ s = format (s, " \"name\": \"%s\",", args->request);
+ s = format (s, " \"error\": \"%s\"", "UnknownInterface");
+ s = format (s, "]}\n");
+ goto out;
+ }
+
+ vec_add1 (sw_if_indices, p[0]);
+ }
+ else /* default, HTTP_BUILTIN_METHOD_GET */
+ {
+ pool_foreach (hi, im->hw_interfaces)
+ {
+ vec_add1 (sw_if_indices, hi->sw_if_index);
+ }
+ }
+
+ s = format (s, "{%sinterface_stats%s: [\n", q, q);
+
+ for (i = 0; i < vec_len (sw_if_indices); i++)
+ {
+ si = vnet_get_sw_interface (vnm, sw_if_indices[i]);
+ if (need_comma)
+ s = format (s, ",\n");
+
+ need_comma = 1;
+
+ s = format (s, "{%sname%s: %s%U%s, ", q, q, q,
+ format_vnet_sw_if_index_name, vnm, sw_if_indices[i], q);
+
+ stats = format_vnet_sw_interface_cntrs (stats, &vnm->interface_main, si,
+ 1 /* want json */);
+ if (vec_len (stats))
+ s = format (s, "%v}", stats);
+ else
+ s = format (s, "%snone%s: %strue%s}", q, q, q, q);
+ vec_reset_length (stats);
+ }
+
+ s = format (s, "]}\n");
+
+out:
+ args->data = s;
+ args->data_len = vec_len (s);
+ args->free_vec_data = 1;
+ vec_free (sw_if_indices);
+ vec_free (stats);
+ return HSS_URL_HANDLER_OK;
+}
+
+hss_url_handler_rc_t
+handle_get_interface_list (hss_url_handler_args_t *args)
+{
+ u8 *s = 0;
+ int i;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ vnet_hw_interface_t *hi;
+ u32 *hw_if_indices = 0;
+ int need_comma = 0;
+
+ /* Construct vector of active hw_if_indexes ... */
+ pool_foreach (hi, im->hw_interfaces)
+ {
+ /* No point in mentioning "local0"... */
+ if (hi - im->hw_interfaces)
+ vec_add1 (hw_if_indices, hi - im->hw_interfaces);
+ }
+
+ /* Build answer */
+ s = format (s, "{\"interface_list\": [\n");
+ for (i = 0; i < vec_len (hw_if_indices); i++)
+ {
+ if (need_comma)
+ s = format (s, ",\n");
+ hi = pool_elt_at_index (im->hw_interfaces, hw_if_indices[i]);
+ s = format (s, "\"%v\"", hi->name);
+ need_comma = 1;
+ }
+ s = format (s, "]}\n");
+ vec_free (hw_if_indices);
+
+ args->data = s;
+ args->data_len = vec_len (s);
+ args->free_vec_data = 1;
+ return HSS_URL_HANDLER_OK;
+}
+
+void
+hss_builtinurl_json_handlers_init (void)
+{
+ hss_register_url_handler (handle_get_version, "version.json", HTTP_REQ_GET);
+ hss_register_url_handler (handle_get_interface_list, "interface_list.json",
+ HTTP_REQ_GET);
+ hss_register_url_handler (handle_get_interface_stats, "interface_stats.json",
+ HTTP_REQ_GET);
+ hss_register_url_handler (handle_get_interface_stats, "interface_stats.json",
+ HTTP_REQ_POST);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/http_static/http_cache.c b/src/plugins/http_static/http_cache.c
new file mode 100644
index 00000000000..8b9751b7f78
--- /dev/null
+++ b/src/plugins/http_static/http_cache.c
@@ -0,0 +1,450 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <http_static/http_cache.h>
+#include <vppinfra/bihash_template.c>
+#include <vppinfra/unix.h>
+#include <vlib/vlib.h>
+
+static void
+hss_cache_lock (hss_cache_t *hc)
+{
+ clib_spinlock_lock (&hc->cache_lock);
+}
+
+static void
+hss_cache_unlock (hss_cache_t *hc)
+{
+ clib_spinlock_unlock (&hc->cache_lock);
+}
+
+/** \brief Sanity-check the forward and reverse LRU lists
+ */
+static inline void
+lru_validate (hss_cache_t *hc)
+{
+#if CLIB_DEBUG > 0
+ f64 last_timestamp;
+ u32 index;
+ int i;
+ hss_cache_entry_t *ce;
+
+ last_timestamp = 1e70;
+ for (i = 1, index = hc->first_index; index != ~0;)
+ {
+ ce = pool_elt_at_index (hc->cache_pool, index);
+ /* Timestamps should be smaller (older) as we walk the fwd list */
+ if (ce->last_used > last_timestamp)
+ {
+ clib_warning ("%d[%d]: last used %.6f, last_timestamp %.6f", index,
+ i, ce->last_used, last_timestamp);
+ }
+ index = ce->next_index;
+ last_timestamp = ce->last_used;
+ i++;
+ }
+
+ last_timestamp = 0.0;
+ for (i = 1, index = hc->last_index; index != ~0;)
+ {
+ ce = pool_elt_at_index (hc->cache_pool, index);
+ /* Timestamps should be larger (newer) as we walk the rev list */
+ if (ce->last_used < last_timestamp)
+ {
+ clib_warning ("%d[%d]: last used %.6f, last_timestamp %.6f", index,
+ i, ce->last_used, last_timestamp);
+ }
+ index = ce->prev_index;
+ last_timestamp = ce->last_used;
+ i++;
+ }
+#endif
+}
+
+/** \brief Remove a data cache entry from the LRU lists
+ */
+static inline void
+lru_remove (hss_cache_t *hc, hss_cache_entry_t *ce)
+{
+ hss_cache_entry_t *next_ep, *prev_ep;
+ u32 ce_index;
+
+ lru_validate (hc);
+
+ ce_index = ce - hc->cache_pool;
+
+ /* Deal with list heads */
+ if (ce_index == hc->first_index)
+ hc->first_index = ce->next_index;
+ if (ce_index == hc->last_index)
+ hc->last_index = ce->prev_index;
+
+ /* Fix next->prev */
+ if (ce->next_index != ~0)
+ {
+ next_ep = pool_elt_at_index (hc->cache_pool, ce->next_index);
+ next_ep->prev_index = ce->prev_index;
+ }
+ /* Fix prev->next */
+ if (ce->prev_index != ~0)
+ {
+ prev_ep = pool_elt_at_index (hc->cache_pool, ce->prev_index);
+ prev_ep->next_index = ce->next_index;
+ }
+ lru_validate (hc);
+}
+
+/** \brief Add an entry to the LRU lists, tag w/ supplied timestamp
+ */
+static inline void
+lru_add (hss_cache_t *hc, hss_cache_entry_t *ce, f64 now)
+{
+ hss_cache_entry_t *next_ce;
+ u32 ce_index;
+
+ lru_validate (hc);
+
+ ce_index = ce - hc->cache_pool;
+
+ /*
+ * Re-add at the head of the forward LRU list,
+ * tail of the reverse LRU list
+ */
+ if (hc->first_index != ~0)
+ {
+ next_ce = pool_elt_at_index (hc->cache_pool, hc->first_index);
+ next_ce->prev_index = ce_index;
+ }
+
+ ce->prev_index = ~0;
+
+ /* ep now the new head of the LRU forward list */
+ ce->next_index = hc->first_index;
+ hc->first_index = ce_index;
+
+ /* single session case: also the tail of the reverse LRU list */
+ if (hc->last_index == ~0)
+ hc->last_index = ce_index;
+ ce->last_used = now;
+
+ lru_validate (hc);
+}
+
+/** \brief Remove and re-add a cache entry from/to the LRU lists
+ */
+static inline void
+lru_update (hss_cache_t *hc, hss_cache_entry_t *ep, f64 now)
+{
+ lru_remove (hc, ep);
+ lru_add (hc, ep, now);
+}
+
+static void
+hss_cache_attach_entry (hss_cache_t *hc, u32 ce_index, u8 **data,
+ u64 *data_len)
+{
+ hss_cache_entry_t *ce;
+
+ /* Expect ce_index to be validated outside */
+ ce = pool_elt_at_index (hc->cache_pool, ce_index);
+ ce->inuse++;
+ *data = ce->data;
+ *data_len = vec_len (ce->data);
+
+ /* Update the cache entry, mark it in-use */
+ lru_update (hc, ce, vlib_time_now (vlib_get_main ()));
+
+ if (hc->debug_level > 1)
+ clib_warning ("index %d refcnt now %d", ce_index, ce->inuse);
+}
+
+/** \brief Detach cache entry from session
+ */
+void
+hss_cache_detach_entry (hss_cache_t *hc, u32 ce_index)
+{
+ hss_cache_entry_t *ce;
+
+ hss_cache_lock (hc);
+
+ ce = pool_elt_at_index (hc->cache_pool, ce_index);
+ ce->inuse--;
+
+ if (hc->debug_level > 1)
+ clib_warning ("index %d refcnt now %d", ce_index, ce->inuse);
+
+ hss_cache_unlock (hc);
+}
+
+static u32
+hss_cache_lookup (hss_cache_t *hc, u8 *path)
+{
+ BVT (clib_bihash_kv) kv;
+ int rv;
+
+ kv.key = (u64) path;
+ kv.value = ~0;
+
+ /* Value updated only if lookup succeeds */
+ rv = BV (clib_bihash_search) (&hc->name_to_data, &kv, &kv);
+ ASSERT (!rv || kv.value == ~0);
+
+ if (hc->debug_level > 1)
+ clib_warning ("lookup '%s' %s", kv.key, kv.value == ~0 ? "fail" : "found");
+
+ return kv.value;
+}
+
+u32
+hss_cache_lookup_and_attach (hss_cache_t *hc, u8 *path, u8 **data,
+ u64 *data_len)
+{
+ u32 ce_index;
+
+ /* Make sure nobody removes the entry while we look it up */
+ hss_cache_lock (hc);
+
+ ce_index = hss_cache_lookup (hc, path);
+ if (ce_index != ~0)
+ hss_cache_attach_entry (hc, ce_index, data, data_len);
+
+ hss_cache_unlock (hc);
+
+ return ce_index;
+}
+
+static void
+hss_cache_do_evictions (hss_cache_t *hc)
+{
+ BVT (clib_bihash_kv) kv;
+ hss_cache_entry_t *ce;
+ u32 free_index;
+
+ free_index = hc->last_index;
+
+ while (free_index != ~0)
+ {
+ /* pick the LRU */
+ ce = pool_elt_at_index (hc->cache_pool, free_index);
+ /* Which could be in use... */
+ if (ce->inuse)
+ {
+ if (hc->debug_level > 1)
+ clib_warning ("index %d in use refcnt %d", free_index, ce->inuse);
+ }
+ free_index = ce->prev_index;
+ kv.key = (u64) (ce->filename);
+ kv.value = ~0ULL;
+ if (BV (clib_bihash_add_del) (&hc->name_to_data, &kv, 0 /* is_add */) <
+ 0)
+ {
+ clib_warning ("LRU delete '%s' FAILED!", ce->filename);
+ }
+ else if (hc->debug_level > 1)
+ clib_warning ("LRU delete '%s' ok", ce->filename);
+
+ lru_remove (hc, ce);
+ hc->cache_size -= vec_len (ce->data);
+ hc->cache_evictions++;
+ vec_free (ce->filename);
+ vec_free (ce->data);
+
+ if (hc->debug_level > 1)
+ clib_warning ("pool put index %d", ce - hc->cache_pool);
+
+ pool_put (hc->cache_pool, ce);
+ if (hc->cache_size < hc->cache_limit)
+ break;
+ }
+}
+
+u32
+hss_cache_add_and_attach (hss_cache_t *hc, u8 *path, u8 **data, u64 *data_len)
+{
+ BVT (clib_bihash_kv) kv;
+ hss_cache_entry_t *ce;
+ clib_error_t *error;
+ u8 *file_data;
+ u32 ce_index;
+
+ hss_cache_lock (hc);
+
+ /* Need to recycle one (or more cache) entries? */
+ if (hc->cache_size > hc->cache_limit)
+ hss_cache_do_evictions (hc);
+
+ /* Read the file */
+ error = clib_file_contents ((char *) path, &file_data);
+ if (error)
+ {
+ clib_warning ("Error reading '%s'", path);
+ clib_error_report (error);
+ return ~0;
+ }
+
+ /* Create a cache entry for it */
+ pool_get_zero (hc->cache_pool, ce);
+ ce->filename = vec_dup (path);
+ ce->data = file_data;
+
+ /* Attach cache entry without additional lock */
+ ce->inuse++;
+ *data = file_data;
+ *data_len = vec_len (file_data);
+ lru_add (hc, ce, vlib_time_now (vlib_get_main ()));
+
+ hc->cache_size += vec_len (ce->data);
+ ce_index = ce - hc->cache_pool;
+
+ if (hc->debug_level > 1)
+ clib_warning ("index %d refcnt now %d", ce_index, ce->inuse);
+
+ /* Add to the lookup table */
+
+ kv.key = (u64) vec_dup (path);
+ kv.value = ce_index;
+
+ if (hc->debug_level > 1)
+ clib_warning ("add '%s' value %lld", kv.key, kv.value);
+
+ if (BV (clib_bihash_add_del) (&hc->name_to_data, &kv, 1 /* is_add */) < 0)
+ {
+ clib_warning ("BUG: add failed!");
+ }
+
+ hss_cache_unlock (hc);
+
+ return ce_index;
+}
+
+u32
+hss_cache_clear (hss_cache_t *hc)
+{
+ u32 free_index, busy_items = 0;
+ hss_cache_entry_t *ce;
+ BVT (clib_bihash_kv) kv;
+
+ hss_cache_lock (hc);
+
+ /* Walk the LRU list to find active entries */
+ free_index = hc->last_index;
+ while (free_index != ~0)
+ {
+ ce = pool_elt_at_index (hc->cache_pool, free_index);
+ free_index = ce->prev_index;
+ /* Which could be in use... */
+ if (ce->inuse)
+ {
+ busy_items++;
+ free_index = ce->next_index;
+ continue;
+ }
+ kv.key = (u64) (ce->filename);
+ kv.value = ~0ULL;
+ if (BV (clib_bihash_add_del) (&hc->name_to_data, &kv, 0 /* is_add */) <
+ 0)
+ {
+ clib_warning ("BUG: cache clear delete '%s' FAILED!", ce->filename);
+ }
+
+ lru_remove (hc, ce);
+ hc->cache_size -= vec_len (ce->data);
+ hc->cache_evictions++;
+ vec_free (ce->filename);
+ vec_free (ce->data);
+ if (hc->debug_level > 1)
+ clib_warning ("pool put index %d", ce - hc->cache_pool);
+ pool_put (hc->cache_pool, ce);
+ free_index = hc->last_index;
+ }
+
+ hss_cache_unlock (hc);
+
+ return busy_items;
+}
+
+void
+hss_cache_init (hss_cache_t *hc, uword cache_size, u8 debug_level)
+{
+ clib_spinlock_init (&hc->cache_lock);
+
+ /* Init path-to-cache hash table */
+ BV (clib_bihash_init) (&hc->name_to_data, "http cache", 128, 32 << 20);
+
+ hc->cache_limit = cache_size;
+ hc->debug_level = debug_level;
+ hc->first_index = hc->last_index = ~0;
+}
+
+/** \brief format a file cache entry
+ */
+static u8 *
+format_hss_cache_entry (u8 *s, va_list *args)
+{
+ hss_cache_entry_t *ep = va_arg (*args, hss_cache_entry_t *);
+ f64 now = va_arg (*args, f64);
+
+ /* Header */
+ if (ep == 0)
+ {
+ s = format (s, "%40s%12s%20s", "File", "Size", "Age");
+ return s;
+ }
+ s = format (s, "%40s%12lld%20.2f", ep->filename, vec_len (ep->data),
+ now - ep->last_used);
+ return s;
+}
+
+u8 *
+format_hss_cache (u8 *s, va_list *args)
+{
+ hss_cache_t *hc = va_arg (*args, hss_cache_t *);
+ u32 verbose = va_arg (*args, u32);
+ hss_cache_entry_t *ce;
+ vlib_main_t *vm;
+ u32 index;
+ f64 now;
+
+ if (verbose == 0)
+ {
+ s = format (s, "cache size %lld bytes, limit %lld bytes, evictions %lld",
+ hc->cache_size, hc->cache_limit, hc->cache_evictions);
+ return 0;
+ }
+
+ vm = vlib_get_main ();
+ now = vlib_time_now (vm);
+
+ s = format (s, "%U", format_hss_cache_entry, 0 /* header */, now);
+
+ for (index = hc->first_index; index != ~0;)
+ {
+ ce = pool_elt_at_index (hc->cache_pool, index);
+ index = ce->next_index;
+ s = format (s, "%U", format_hss_cache_entry, ce, now);
+ }
+
+ s = format (s, "%40s%12lld", "Total Size", hc->cache_size);
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/http_static/http_cache.h b/src/plugins/http_static/http_cache.h
new file mode 100644
index 00000000000..a89ed5e7e94
--- /dev/null
+++ b/src/plugins/http_static/http_cache.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_PLUGINS_HTTP_STATIC_HTTP_CACHE_H_
+#define SRC_PLUGINS_HTTP_STATIC_HTTP_CACHE_H_
+
+#include <vppinfra/bihash_vec8_8.h>
+
+typedef struct hss_cache_entry_
+{
+ /** Name of the file */
+ u8 *filename;
+ /** Contents of the file, as a u8 * vector */
+ u8 *data;
+ /** Last time the cache entry was used */
+ f64 last_used;
+ /** Cache LRU links */
+ u32 next_index;
+ u32 prev_index;
+ /** Reference count, so we don't recycle while referenced */
+ int inuse;
+} hss_cache_entry_t;
+
+typedef struct hss_cache_
+{
+ /** Unified file data cache pool */
+ hss_cache_entry_t *cache_pool;
+ /** Hash table which maps file name to file data */
+ BVT (clib_bihash) name_to_data;
+
+ /** Session pool lock */
+ clib_spinlock_t cache_lock;
+
+ /** Current cache size */
+ u64 cache_size;
+ /** Max cache size in bytes */
+ u64 cache_limit;
+ /** Number of cache evictions */
+ u64 cache_evictions;
+
+ /** Cache LRU listheads */
+ u32 first_index;
+ u32 last_index;
+
+ u8 debug_level;
+} hss_cache_t;
+
+u32 hss_cache_lookup_and_attach (hss_cache_t *hc, u8 *path, u8 **data,
+ u64 *data_len);
+u32 hss_cache_add_and_attach (hss_cache_t *hc, u8 *path, u8 **data,
+ u64 *data_len);
+void hss_cache_detach_entry (hss_cache_t *hc, u32 ce_index);
+u32 hss_cache_clear (hss_cache_t *hc);
+void hss_cache_init (hss_cache_t *hc, uword cache_size, u8 debug_level);
+
+u8 *format_hss_cache (u8 *s, va_list *args);
+
+#endif /* SRC_PLUGINS_HTTP_STATIC_HTTP_CACHE_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/http_static/http_static.c b/src/plugins/http_static/http_static.c
index 48ae593718a..8f8fe37b7c1 100644
--- a/src/plugins/http_static/http_static.c
+++ b/src/plugins/http_static/http_static.c
@@ -1,7 +1,5 @@
/*
- * http_static.c - skeleton vpp engine plug-in
- *
- * Copyright (c) <current-year> <your-organization>
+ * Copyright (c) 2017-2022 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -29,57 +27,116 @@
#include <vpp/api/types.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define REPLY_MSG_ID_BASE hmp->msg_id_base
+#define REPLY_MSG_ID_BASE hsm->msg_id_base
#include <vlibapi/api_helper_macros.h>
-http_static_main_t http_static_main;
+__clib_export void
+hss_register_url_handler (hss_url_handler_fn fp, const char *url,
+ http_req_method_t request_type)
+{
+ hss_main_t *hsm = &hss_main;
+ uword *p, *url_table;
+
+ url_table = (request_type == HTTP_REQ_GET) ? hsm->get_url_handlers :
+ hsm->post_url_handlers;
+
+ p = hash_get_mem (url_table, url);
+
+ if (p)
+ {
+ clib_warning ("WARNING: attempt to replace handler for %s '%s' ignored",
+ (request_type == HTTP_REQ_GET) ? "GET" : "POST", url);
+ return;
+ }
+
+ hash_set_mem (url_table, url, (uword) fp);
+
+ /*
+ * Need to update the hash table pointer in http_static_server_main
+ * in case we just expanded it...
+ */
+ if (request_type == HTTP_REQ_GET)
+ hsm->get_url_handlers = url_table;
+ else
+ hsm->post_url_handlers = url_table;
+}
+
+/** \brief API helper function for vl_api_http_static_enable_t messages
+ */
+static int
+hss_enable_api (u32 fifo_size, u32 cache_limit, u32 prealloc_fifos,
+ u32 private_segment_size, u8 *www_root, u8 *uri)
+{
+ hss_main_t *hsm = &hss_main;
+ int rv;
+
+ hsm->fifo_size = fifo_size;
+ hsm->cache_size = cache_limit;
+ hsm->prealloc_fifos = prealloc_fifos;
+ hsm->private_segment_size = private_segment_size;
+ hsm->www_root = format (0, "%s%c", www_root, 0);
+ hsm->uri = format (0, "%s%c", uri, 0);
+
+ if (vec_len (hsm->www_root) < 2)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ if (hsm->app_index != ~0)
+ return VNET_API_ERROR_APP_ALREADY_ATTACHED;
+
+ vnet_session_enable_disable (hsm->vlib_main, 1 /* turn on TCP, etc. */);
+
+ rv = hss_create (hsm->vlib_main);
+ switch (rv)
+ {
+ case 0:
+ break;
+ default:
+ vec_free (hsm->www_root);
+ vec_free (hsm->uri);
+ return VNET_API_ERROR_INIT_FAILED;
+ }
+ return 0;
+}
/* API message handler */
static void vl_api_http_static_enable_t_handler
(vl_api_http_static_enable_t * mp)
{
vl_api_http_static_enable_reply_t *rmp;
- http_static_main_t *hmp = &http_static_main;
+ hss_main_t *hsm = &hss_main;
int rv;
mp->uri[ARRAY_LEN (mp->uri) - 1] = 0;
mp->www_root[ARRAY_LEN (mp->www_root) - 1] = 0;
- rv = http_static_server_enable_api
- (ntohl (mp->fifo_size),
- ntohl (mp->cache_size_limit),
- ntohl (mp->prealloc_fifos),
- ntohl (mp->private_segment_size), mp->www_root, mp->uri);
+ rv =
+ hss_enable_api (ntohl (mp->fifo_size), ntohl (mp->cache_size_limit),
+ ntohl (mp->prealloc_fifos),
+ ntohl (mp->private_segment_size), mp->www_root, mp->uri);
REPLY_MACRO (VL_API_HTTP_STATIC_ENABLE_REPLY);
}
#include <http_static/http_static.api.c>
static clib_error_t *
-http_static_init (vlib_main_t * vm)
+hss_api_init (vlib_main_t *vm)
{
- http_static_main_t *hmp = &http_static_main;
-
- hmp->vlib_main = vm;
- hmp->vnet_main = vnet_get_main ();
+ hss_main_t *hsm = &hss_main;
/* Ask for a correctly-sized block of API message decode slots */
- hmp->msg_id_base = setup_message_id_table ();
+ hsm->msg_id_base = setup_message_id_table ();
return 0;
}
-VLIB_INIT_FUNCTION (http_static_init);
+VLIB_INIT_FUNCTION (hss_api_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "HTTP Static Server"
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/http_static/http_static.h b/src/plugins/http_static/http_static.h
index 8ee0f92cd44..2850d356b74 100644
--- a/src/plugins/http_static/http_static.h
+++ b/src/plugins/http_static/http_static.h
@@ -1,8 +1,5 @@
-
/*
- * http_static.h - skeleton vpp engine plug-in header file
- *
- * Copyright (c) <current-year> <your-organization>
+ * Copyright (c) 2017-2022 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -18,199 +15,160 @@
#ifndef __included_http_static_h__
#define __included_http_static_h__
-#include <vnet/vnet.h>
-#include <vnet/session/application.h>
#include <vnet/session/application_interface.h>
#include <vnet/session/session.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
+#include <http/http.h>
#include <vppinfra/hash.h>
#include <vppinfra/error.h>
-#include <vppinfra/time_range.h>
-#include <vppinfra/tw_timer_2t_1w_2048sl.h>
-#include <vppinfra/bihash_vec8_8.h>
+#include <http_static/http_cache.h>
/** @file http_static.h
* Static http server definitions
*/
-typedef struct
-{
- /* API message ID base */
- u16 msg_id_base;
-
- /* convenience */
- vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
-} http_static_main_t;
-
-extern http_static_main_t http_static_main;
-
-/** \brief Session States
- */
-
-typedef enum
-{
- /** Session is closed */
- HTTP_STATE_CLOSED,
- /** Session is established */
- HTTP_STATE_ESTABLISHED,
- /** Session has sent an OK response */
- HTTP_STATE_OK_SENT,
- /** Session has sent an HTML response */
- HTTP_STATE_SEND_MORE_DATA,
- /** Number of states */
- HTTP_STATE_N_STATES,
-} http_session_state_t;
-
-typedef enum
-{
- CALLED_FROM_RX,
- CALLED_FROM_TX,
- CALLED_FROM_TIMER,
-} http_state_machine_called_from_t;
-
-typedef enum
-{
- HTTP_BUILTIN_METHOD_GET = 0,
- HTTP_BUILTIN_METHOD_POST,
-} http_builtin_method_type_t;
-
-
/** \brief Application session
*/
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- /** Base class instance variables */
-#define _(type, name) type name;
- foreach_app_session_field
-#undef _
+ u32 session_index;
/** rx thread index */
u32 thread_index;
- /** rx buffer */
- u8 *rx_buf;
/** vpp session index, handle */
u32 vpp_session_index;
- u64 vpp_session_handle;
- /** Timeout timer handle */
- u32 timer_handle;
+ session_handle_t vpp_session_handle;
/** Fully-resolved file path */
u8 *path;
- /** File data, a vector */
+ /** Data to send */
u8 *data;
+ /** Data length */
+ u64 data_len;
/** Current data send offset */
u32 data_offset;
/** Need to free data in detach_cache_entry */
int free_data;
-
/** File cache pool index */
u32 cache_pool_index;
- /** state machine called from... */
- http_state_machine_called_from_t called_from;
-} http_session_t;
+ /** Content type, e.g. text, text/javascript, etc. */
+ http_content_type_t content_type;
+} hss_session_t;
-/** \brief In-memory file data cache entry
- */
-typedef struct
+typedef struct hss_session_handle_
{
- /** Name of the file */
- u8 *filename;
- /** Contents of the file, as a u8 * vector */
- u8 *data;
- /** Last time the cache entry was used */
- f64 last_used;
- /** Cache LRU links */
- u32 next_index;
- u32 prev_index;
- /** Reference count, so we don't recycle while referenced */
- int inuse;
-} file_data_cache_t;
+ union
+ {
+ struct
+ {
+ u32 session_index;
+ u32 thread_index;
+ };
+ u64 as_u64;
+ };
+} hss_session_handle_t;
+
+STATIC_ASSERT_SIZEOF (hss_session_handle_t, sizeof (u64));
+
+
+typedef struct hss_url_handler_args_
+{
+ hss_session_handle_t sh;
+
+ union
+ {
+ /* Request args */
+ struct
+ {
+ u8 *request;
+ http_req_method_t reqtype;
+ };
+
+ /* Reply args */
+ struct
+ {
+ u8 *data;
+ uword data_len;
+ u8 free_vec_data;
+ http_status_code_t sc;
+ };
+ };
+} hss_url_handler_args_t;
+
+typedef enum hss_url_handler_rc_
+{
+ HSS_URL_HANDLER_OK,
+ HSS_URL_HANDLER_ERROR,
+ HSS_URL_HANDLER_ASYNC,
+} hss_url_handler_rc_t;
+
+typedef hss_url_handler_rc_t (*hss_url_handler_fn) (hss_url_handler_args_t *);
+typedef void (*hss_register_url_fn) (hss_url_handler_fn, char *, int);
+typedef void (*hss_session_send_fn) (hss_url_handler_args_t *args);
/** \brief Main data structure
*/
-
typedef struct
{
/** Per thread vector of session pools */
- http_session_t **sessions;
- /** Session pool reader writer lock */
- clib_rwlock_t sessions_lock;
- /** vpp session to http session index map */
- u32 **session_to_http_session;
-
- /** Enable debug messages */
- int debug_level;
-
- /** vpp message/event queue */
- svm_msg_q_t **vpp_queue;
-
- /** Unified file data cache pool */
- file_data_cache_t *cache_pool;
- /** Hash table which maps file name to file data */
- BVT (clib_bihash) name_to_data;
+ hss_session_t **sessions;
/** Hash tables for built-in GET and POST handlers */
uword *get_url_handlers;
uword *post_url_handlers;
- /** Current cache size */
- u64 cache_size;
- /** Max cache size in bytes */
- u64 cache_limit;
- /** Number of cache evictions */
- u64 cache_evictions;
-
- /** Cache LRU listheads */
- u32 first_index;
- u32 last_index;
+ hss_cache_t cache;
/** root path to be served */
u8 *www_root;
- /** Server's event queue */
- svm_queue_t *vl_input_queue;
-
- /** API client handle */
- u32 my_client_index;
-
/** Application index */
u32 app_index;
- /** Process node index for event scheduling */
- u32 node_index;
-
/** Cert and key pair for tls */
u32 ckpair_index;
- /** Session cleanup timer wheel */
- tw_timer_wheel_2t_1w_2048sl_t tw;
- clib_spinlock_t tw_lock;
+ /* API message ID base */
+ u16 msg_id_base;
+
+ vlib_main_t *vlib_main;
- /** Time base, so we can generate browser cache control http spew */
- clib_timebase_t timebase;
+ /*
+ * Config
+ */
+ /** Enable debug messages */
+ int debug_level;
/** Number of preallocated fifos, usually 0 */
u32 prealloc_fifos;
/** Private segment size, usually 0 */
- u32 private_segment_size;
+ u64 private_segment_size;
/** Size of the allocated rx, tx fifos, roughly 8K or so */
u32 fifo_size;
/** The bind URI, defaults to tcp://0.0.0.0/80 */
u8 *uri;
- vlib_main_t *vlib_main;
-} http_static_server_main_t;
+ /** Threshold for switching to ptr data in http msgs */
+ u64 use_ptr_thresh;
+ /** Enable the use of builtinurls */
+ u8 enable_url_handlers;
+ /** Max cache size before LRU occurs */
+ u64 cache_size;
+
+ /** hash table of file extensions to mime types string indices */
+ uword *mime_type_indices_by_file_extensions;
+} hss_main_t;
-extern http_static_server_main_t http_static_server_main;
+extern hss_main_t hss_main;
-int http_static_server_enable_api (u32 fifo_size, u32 cache_limit,
- u32 prealloc_fifos,
- u32 private_segment_size,
- u8 * www_root, u8 * uri);
+int hss_create (vlib_main_t *vm);
-void http_static_server_register_builtin_handler
- (void *fp, char *url, int type);
+/**
+ * Register a GET or POST URL handler
+ */
+void hss_register_url_handler (hss_url_handler_fn fp, const char *url,
+ http_req_method_t type);
+void hss_session_send_data (hss_url_handler_args_t *args);
+void hss_builtinurl_json_handlers_init (void);
+hss_session_t *hss_session_get (u32 thread_index, u32 hs_index);
#endif /* __included_http_static_h__ */
diff --git a/src/plugins/http_static/static_server.c b/src/plugins/http_static/static_server.c
index c715dfa6fb8..040cdca9d7a 100644
--- a/src/plugins/http_static/static_server.c
+++ b/src/plugins/http_static/static_server.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Cisco and/or its affiliates.
+ * Copyright (c) 2017-2022 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -13,158 +13,48 @@
* limitations under the License.
*/
-#include <vnet/vnet.h>
-#include <vnet/session/application.h>
-#include <vnet/session/application_interface.h>
-#include <vnet/session/session.h>
-#include <vppinfra/unix.h>
+#include <http_static/http_static.h>
+
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
-#include <http_static/http_static.h>
-
-#include <vppinfra/bihash_template.c>
/** @file static_server.c
- * Static http server, sufficient to
- * serve .html / .css / .js content.
+ * Static http server, sufficient to serve .html / .css / .js content.
*/
/*? %%clicmd:group_label Static HTTP Server %% ?*/
-http_static_server_main_t http_static_server_main;
-
-/** \brief Format the called-from enum
- */
-
-static u8 *
-format_state_machine_called_from (u8 * s, va_list * args)
-{
- http_state_machine_called_from_t cf =
- va_arg (*args, http_state_machine_called_from_t);
- char *which = "bogus!";
-
- switch (cf)
- {
- case CALLED_FROM_RX:
- which = "from rx";
- break;
- case CALLED_FROM_TX:
- which = "from tx";
- break;
- case CALLED_FROM_TIMER:
- which = "from timer";
- break;
-
- default:
- break;
- }
-
- s = format (s, "%s", which);
- return s;
-}
-
-
-/** \brief Acquire reader lock on the sessions pools
- */
-static void
-http_static_server_sessions_reader_lock (void)
-{
- clib_rwlock_reader_lock (&http_static_server_main.sessions_lock);
-}
-
-/** \brief Drop reader lock on the sessions pools
- */
-static void
-http_static_server_sessions_reader_unlock (void)
-{
- clib_rwlock_reader_unlock (&http_static_server_main.sessions_lock);
-}
-
-/** \brief Acquire writer lock on the sessions pools
- */
-static void
-http_static_server_sessions_writer_lock (void)
-{
- clib_rwlock_writer_lock (&http_static_server_main.sessions_lock);
-}
-
-/** \brief Drop writer lock on the sessions pools
- */
-static void
-http_static_server_sessions_writer_unlock (void)
-{
- clib_rwlock_writer_unlock (&http_static_server_main.sessions_lock);
-}
-
-/** \brief Start a session cleanup timer
- */
-static void
-http_static_server_session_timer_start (http_session_t * hs)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- u32 hs_handle;
+#define HSS_FIFO_THRESH (16 << 10)
- /* The session layer may fire a callback at a later date... */
- if (!pool_is_free (hsm->sessions[hs->thread_index], hs))
- {
- hs_handle = hs->thread_index << 24 | hs->session_index;
- clib_spinlock_lock (&http_static_server_main.tw_lock);
- hs->timer_handle = tw_timer_start_2t_1w_2048sl
- (&http_static_server_main.tw, hs_handle, 0, 60);
- clib_spinlock_unlock (&http_static_server_main.tw_lock);
- }
-}
+hss_main_t hss_main;
-/** \brief stop a session cleanup timer
- */
-static void
-http_static_server_session_timer_stop (http_session_t * hs)
+static hss_session_t *
+hss_session_alloc (u32 thread_index)
{
- if (hs->timer_handle == ~0)
- return;
- clib_spinlock_lock (&http_static_server_main.tw_lock);
- tw_timer_stop_2t_1w_2048sl (&http_static_server_main.tw, hs->timer_handle);
- clib_spinlock_unlock (&http_static_server_main.tw_lock);
-}
+ hss_main_t *hsm = &hss_main;
+ hss_session_t *hs;
-/** \brief Allocate an http session
- */
-static http_session_t *
-http_static_server_session_alloc (u32 thread_index)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- http_session_t *hs;
- pool_get_aligned_zero_numa (hsm->sessions[thread_index], hs,
- 0 /* not aligned */ ,
- 1 /* zero */ ,
- os_get_numa_index ());
+ pool_get_zero (hsm->sessions[thread_index], hs);
hs->session_index = hs - hsm->sessions[thread_index];
hs->thread_index = thread_index;
- hs->timer_handle = ~0;
hs->cache_pool_index = ~0;
return hs;
}
-/** \brief Get an http session by index
- */
-static http_session_t *
-http_static_server_session_get (u32 thread_index, u32 hs_index)
+__clib_export hss_session_t *
+hss_session_get (u32 thread_index, u32 hs_index)
{
- http_static_server_main_t *hsm = &http_static_server_main;
+ hss_main_t *hsm = &hss_main;
if (pool_is_free_index (hsm->sessions[thread_index], hs_index))
return 0;
return pool_elt_at_index (hsm->sessions[thread_index], hs_index);
}
-/** \brief Free an http session
- */
static void
-http_static_server_session_free (http_session_t * hs)
+hss_session_free (hss_session_t *hs)
{
- http_static_server_main_t *hsm = &http_static_server_main;
+ hss_main_t *hsm = &hss_main;
- /* Make sure the timer is stopped... */
- http_static_server_session_timer_stop (hs);
pool_put (hsm->sessions[hs->thread_index], hs);
if (CLIB_DEBUG)
@@ -173,974 +63,571 @@ http_static_server_session_free (http_session_t * hs)
save_thread_index = hs->thread_index;
/* Poison the entry, preserve timer state and thread index */
memset (hs, 0xfa, sizeof (*hs));
- hs->timer_handle = ~0;
hs->thread_index = save_thread_index;
}
}
-/** \brief add a session to the vpp < -- > http session index map
+/** \brief Disconnect a session
*/
static void
-http_static_server_session_lookup_add (u32 thread_index, u32 s_index,
- u32 hs_index)
+hss_session_disconnect_transport (hss_session_t *hs)
{
- http_static_server_main_t *hsm = &http_static_server_main;
- vec_validate (hsm->session_to_http_session[thread_index], s_index);
- hsm->session_to_http_session[thread_index][s_index] = hs_index;
+ vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+ a->handle = hs->vpp_session_handle;
+ a->app_index = hss_main.app_index;
+ vnet_disconnect_session (a);
}
-/** \brief Remove a session from the vpp < -- > http session index map
- */
static void
-http_static_server_session_lookup_del (u32 thread_index, u32 s_index)
+start_send_data (hss_session_t *hs, http_status_code_t status)
{
- http_static_server_main_t *hsm = &http_static_server_main;
- hsm->session_to_http_session[thread_index][s_index] = ~0;
-}
+ http_msg_t msg;
+ session_t *ts;
+ int rv;
-/** \brief lookup a session in the vpp < -- > http session index map
- */
+ ts = session_get (hs->vpp_session_index, hs->thread_index);
-static http_session_t *
-http_static_server_session_lookup (u32 thread_index, u32 s_index)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- u32 hs_index;
+ msg.type = HTTP_MSG_REPLY;
+ msg.code = status;
+ msg.content_type = hs->content_type;
+ msg.data.len = hs->data_len;
- if (s_index < vec_len (hsm->session_to_http_session[thread_index]))
+ if (hs->data_len > hss_main.use_ptr_thresh)
{
- hs_index = hsm->session_to_http_session[thread_index][s_index];
- return http_static_server_session_get (thread_index, hs_index);
- }
- return 0;
-}
-
-/** \brief Detach cache entry from session
- */
+ msg.data.type = HTTP_MSG_DATA_PTR;
+ rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
-static void
-http_static_server_detach_cache_entry (http_session_t * hs)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- file_data_cache_t *ep;
+ uword data = pointer_to_uword (hs->data);
+ rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (data), (u8 *) &data);
+ ASSERT (rv == sizeof (sizeof (data)));
- /*
- * Decrement cache pool entry reference count
- * Note that if e.g. a file lookup fails, the cache pool index
- * won't be set
- */
- if (hs->cache_pool_index != ~0)
- {
- ep = pool_elt_at_index (hsm->cache_pool, hs->cache_pool_index);
- ep->inuse--;
- if (hsm->debug_level > 1)
- clib_warning ("index %d refcnt now %d", hs->cache_pool_index,
- ep->inuse);
+ goto done;
}
- hs->cache_pool_index = ~0;
- if (hs->free_data)
- vec_free (hs->data);
- hs->data = 0;
- hs->data_offset = 0;
- hs->free_data = 0;
- vec_free (hs->path);
-}
-/** \brief Disconnect a session
- */
-static void
-http_static_server_session_disconnect (http_session_t * hs)
-{
- vnet_disconnect_args_t _a = { 0 }, *a = &_a;
- a->handle = hs->vpp_session_handle;
- a->app_index = http_static_server_main.app_index;
- vnet_disconnect_session (a);
-}
+ msg.data.type = HTTP_MSG_DATA_INLINE;
-/* *INDENT-OFF* */
-/** \brief http error boilerplate
- */
-static const char *http_error_template =
- "HTTP/1.1 %s\r\n"
- "Date: %U GMT\r\n"
- "Content-Type: text/html\r\n"
- "Connection: close\r\n"
- "Pragma: no-cache\r\n"
- "Content-Length: 0\r\n\r\n";
-
-/** \brief http response boilerplate
- */
-static const char *http_response_template =
- "Date: %U GMT\r\n"
- "Expires: %U GMT\r\n"
- "Server: VPP Static\r\n"
- "Content-Type: %s\r\n"
- "Content-Length: %d\r\n\r\n";
-
-/* *INDENT-ON* */
-
-/** \brief send http data
- @param hs - http session
- @param data - the data vector to transmit
- @param length - length of data
- @param offset - transmit offset for this operation
- @return offset for next transmit operation, may be unchanged w/ full fifo
-*/
+ rv = svm_fifo_enqueue (ts->tx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
-static u32
-static_send_data (http_session_t * hs, u8 * data, u32 length, u32 offset)
-{
- u32 bytes_to_send;
- http_static_server_main_t *hsm = &http_static_server_main;
+ if (!msg.data.len)
+ goto done;
- bytes_to_send = length - offset;
+ rv = svm_fifo_enqueue (ts->tx_fifo, hs->data_len, hs->data);
- while (bytes_to_send > 0)
+ if (rv != hs->data_len)
{
- int actual_transfer;
-
- actual_transfer = svm_fifo_enqueue
- (hs->tx_fifo, bytes_to_send, data + offset);
-
- /* Made any progress? */
- if (actual_transfer <= 0)
- {
- if (hsm->debug_level > 0 && bytes_to_send > 0)
- clib_warning ("WARNING: still %d bytes to send", bytes_to_send);
- return offset;
- }
- else
- {
- offset += actual_transfer;
- bytes_to_send -= actual_transfer;
-
- if (hsm->debug_level && bytes_to_send > 0)
- clib_warning ("WARNING: still %d bytes to send", bytes_to_send);
-
- if (svm_fifo_set_event (hs->tx_fifo))
- session_send_io_evt_to_thread (hs->tx_fifo,
- SESSION_IO_EVT_TX_FLUSH);
- return offset;
- }
+ hs->data_offset = rv;
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
}
- /* NOTREACHED */
- return ~0;
-}
-/** \brief Send an http error string
- @param hs - the http session
- @param str - the error string, e.g. "404 Not Found"
-*/
-static void
-send_error (http_session_t * hs, char *str)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- u8 *data;
- f64 now;
-
- now = clib_timebase_now (&hsm->timebase);
- data = format (0, http_error_template, str, format_clib_timebase_time, now);
- static_send_data (hs, data, vec_len (data), 0);
- vec_free (data);
+done:
+
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
}
-/** \brief Retrieve data from the application layer
- */
-static int
-session_rx_request (http_session_t * hs)
+__clib_export void
+hss_session_send_data (hss_url_handler_args_t *args)
{
- u32 max_dequeue, cursize;
- int n_read;
+ hss_session_t *hs;
- cursize = vec_len (hs->rx_buf);
- max_dequeue = svm_fifo_max_dequeue (hs->rx_fifo);
- if (PREDICT_FALSE (max_dequeue == 0))
- return -1;
+ hs = hss_session_get (args->sh.thread_index, args->sh.session_index);
+ if (!hs)
+ return;
- vec_validate (hs->rx_buf, cursize + max_dequeue - 1);
- n_read = app_recv_stream_raw (hs->rx_fifo, hs->rx_buf + cursize,
- max_dequeue, 0, 0 /* peek */ );
- ASSERT (n_read == max_dequeue);
- if (svm_fifo_is_empty (hs->rx_fifo))
- svm_fifo_unset_event (hs->rx_fifo);
+ if (hs->data && hs->free_data)
+ vec_free (hs->data);
- _vec_len (hs->rx_buf) = cursize + n_read;
- return 0;
+ hs->data = args->data;
+ hs->data_len = args->data_len;
+ hs->free_data = args->free_vec_data;
+ start_send_data (hs, args->sc);
}
-/** \brief Sanity-check the forward and reverse LRU lists
+/*
+ * path_has_known_suffix()
+ * Returns 1 if the request ends with a known suffix, like .htm or .ico
+ * Used to avoid looking for "/favicon.ico/index.html" or similar.
*/
-static inline void
-lru_validate (http_static_server_main_t * hsm)
+
+static int
+path_has_known_suffix (u8 *request)
{
-#if CLIB_DEBUG > 0
- f64 last_timestamp;
- u32 index;
- int i;
- file_data_cache_t *ep;
-
- last_timestamp = 1e70;
- for (i = 1, index = hsm->first_index; index != ~0;)
- {
- ep = pool_elt_at_index (hsm->cache_pool, index);
- index = ep->next_index;
- /* Timestamps should be smaller (older) as we walk the fwd list */
- if (ep->last_used > last_timestamp)
- {
- clib_warning ("%d[%d]: last used %.6f, last_timestamp %.6f",
- ep - hsm->cache_pool, i,
- ep->last_used, last_timestamp);
- }
- last_timestamp = ep->last_used;
- i++;
- }
+ u8 *ext;
+ uword *p;
- last_timestamp = 0.0;
- for (i = 1, index = hsm->last_index; index != ~0;)
+ if (vec_len (request) == 0)
{
- ep = pool_elt_at_index (hsm->cache_pool, index);
- index = ep->prev_index;
- /* Timestamps should be larger (newer) as we walk the rev list */
- if (ep->last_used < last_timestamp)
- {
- clib_warning ("%d[%d]: last used %.6f, last_timestamp %.6f",
- ep - hsm->cache_pool, i,
- ep->last_used, last_timestamp);
- }
- last_timestamp = ep->last_used;
- i++;
+ return 0;
}
-#endif
-}
-/** \brief Remove a data cache entry from the LRU lists
- */
-static inline void
-lru_remove (http_static_server_main_t * hsm, file_data_cache_t * ep)
-{
- file_data_cache_t *next_ep, *prev_ep;
- u32 ep_index;
+ ext = request + vec_len (request) - 1;
- lru_validate (hsm);
+ while (ext > request && ext[0] != '.')
+ ext--;
- ep_index = ep - hsm->cache_pool;
+ if (ext == request)
+ return 0;
- /* Deal with list heads */
- if (ep_index == hsm->first_index)
- hsm->first_index = ep->next_index;
- if (ep_index == hsm->last_index)
- hsm->last_index = ep->prev_index;
+ p = hash_get_mem (hss_main.mime_type_indices_by_file_extensions, ext);
+ if (p)
+ return 1;
- /* Fix next->prev */
- if (ep->next_index != ~0)
- {
- next_ep = pool_elt_at_index (hsm->cache_pool, ep->next_index);
- next_ep->prev_index = ep->prev_index;
- }
- /* Fix prev->next */
- if (ep->prev_index != ~0)
- {
- prev_ep = pool_elt_at_index (hsm->cache_pool, ep->prev_index);
- prev_ep->next_index = ep->next_index;
- }
- lru_validate (hsm);
+ return 0;
}
-/** \brief Add an entry to the LRU lists, tag w/ supplied timestamp
+/*
+ * content_type_from_request
+ * Returns the index of the request's suffix in the
+ * http-layer http_content_type_str[] array.
*/
-static inline void
-lru_add (http_static_server_main_t * hsm, file_data_cache_t * ep, f64 now)
+static http_content_type_t
+content_type_from_request (u8 *request)
{
- file_data_cache_t *next_ep;
- u32 ep_index;
+ u8 *ext;
+ uword *p;
+ /* default to text/html */
+ http_content_type_t rv = HTTP_CONTENT_TEXT_HTML;
- lru_validate (hsm);
+ ASSERT (vec_len (request) > 0);
- ep_index = ep - hsm->cache_pool;
+ ext = request + vec_len (request) - 1;
- /*
- * Re-add at the head of the forward LRU list,
- * tail of the reverse LRU list
- */
- if (hsm->first_index != ~0)
- {
- next_ep = pool_elt_at_index (hsm->cache_pool, hsm->first_index);
- next_ep->prev_index = ep_index;
- }
+ while (ext > request && ext[0] != '.')
+ ext--;
- ep->prev_index = ~0;
+ if (ext == request)
+ return rv;
- /* ep now the new head of the LRU forward list */
- ep->next_index = hsm->first_index;
- hsm->first_index = ep_index;
+ p = hash_get_mem (hss_main.mime_type_indices_by_file_extensions, ext);
- /* single session case: also the tail of the reverse LRU list */
- if (hsm->last_index == ~0)
- hsm->last_index = ep_index;
- ep->last_used = now;
+ if (p == 0)
+ return rv;
- lru_validate (hsm);
+ rv = p[0];
+ return rv;
}
-/** \brief Remove and re-add a cache entry from/to the LRU lists
- */
-
-static inline void
-lru_update (http_static_server_main_t * hsm, file_data_cache_t * ep, f64 now)
+static int
+try_url_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt,
+ u8 *request)
{
- lru_remove (hsm, ep);
- lru_add (hsm, ep, now);
-}
+ http_status_code_t sc = HTTP_STATUS_OK;
+ hss_url_handler_args_t args = {};
+ uword *p, *url_table;
+ http_content_type_t type;
+ int rv;
-/** \brief Session-layer (main) data rx callback.
- Parse the http request, and reply to it.
- Future extensions might include POST processing, active content, etc.
-*/
+ if (!hsm->enable_url_handlers || !request)
+ return -1;
-/* svm_fifo_add_want_deq_ntf (tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL)
-get shoulder-tap when transport dequeues something, set in
-xmit routine. */
+ /* zero-length? try "index.html" */
+ if (vec_len (request) == 0)
+ {
+ request = format (request, "index.html");
+ }
-/** \brief closed state - should never really get here
- */
-static int
-state_closed (session_t * s, http_session_t * hs,
- http_state_machine_called_from_t cf)
-{
- clib_warning ("WARNING: http session %d, called from %U",
- hs->session_index, format_state_machine_called_from, cf);
- return -1;
-}
+ type = content_type_from_request (request);
-static void
-close_session (http_session_t * hs)
-{
- http_static_server_session_disconnect (hs);
-}
+ /* Look for built-in GET / POST handlers */
+ url_table =
+ (rt == HTTP_REQ_GET) ? hsm->get_url_handlers : hsm->post_url_handlers;
-/** \brief Register a builtin GET or POST handler
- */
-__clib_export void http_static_server_register_builtin_handler
- (void *fp, char *url, int request_type)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- uword *p, *builtin_table;
+ p = hash_get_mem (url_table, request);
+ if (!p)
+ return -1;
- builtin_table = (request_type == HTTP_BUILTIN_METHOD_GET)
- ? hsm->get_url_handlers : hsm->post_url_handlers;
+ hs->path = 0;
+ hs->data_offset = 0;
+ hs->cache_pool_index = ~0;
- p = hash_get_mem (builtin_table, url);
+ if (hsm->debug_level > 0)
+ clib_warning ("%s '%s'", (rt == HTTP_REQ_GET) ? "GET" : "POST", request);
- if (p)
+ args.reqtype = rt;
+ args.request = request;
+ args.sh.thread_index = hs->thread_index;
+ args.sh.session_index = hs->session_index;
+
+ rv = ((hss_url_handler_fn) p[0]) (&args);
+
+ /* Wait for data from handler */
+ if (rv == HSS_URL_HANDLER_ASYNC)
+ return 0;
+
+ if (rv == HSS_URL_HANDLER_ERROR)
{
- clib_warning ("WARNING: attempt to replace handler for %s '%s' ignored",
- (request_type == HTTP_BUILTIN_METHOD_GET) ?
- "GET" : "POST", url);
- return;
+ clib_warning ("builtin handler %llx hit on %s '%s' but failed!", p[0],
+ (rt == HTTP_REQ_GET) ? "GET" : "POST", request);
+ sc = HTTP_STATUS_NOT_FOUND;
}
- hash_set_mem (builtin_table, url, (uword) fp);
+ hs->data = args.data;
+ hs->data_len = args.data_len;
+ hs->free_data = args.free_vec_data;
+ hs->content_type = type;
- /*
- * Need to update the hash table pointer in http_static_server_main
- * in case we just expanded it...
- */
- if (request_type == HTTP_BUILTIN_METHOD_GET)
- hsm->get_url_handlers = builtin_table;
- else
- hsm->post_url_handlers = builtin_table;
+ start_send_data (hs, sc);
+
+ if (!hs->data)
+ hss_session_disconnect_transport (hs);
+
+ return 0;
}
-static int
-v_find_index (u8 * vec, char *str)
+static u8
+file_path_is_valid (u8 *path)
{
- int start_index;
- u32 slen = (u32) strnlen_s_inline (str, 8);
- u32 vlen = vec_len (vec);
+ struct stat _sb, *sb = &_sb;
- ASSERT (slen > 0);
+ if (stat ((char *) path, sb) < 0 /* can't stat the file */
+ || (sb->st_mode & S_IFMT) != S_IFREG /* not a regular file */)
+ return 0;
- if (vlen <= slen)
- return -1;
+ return 1;
+}
- for (start_index = 0; start_index < (vlen - slen); start_index++)
- {
- if (!memcmp (vec, str, slen))
- return start_index;
- }
+static u32
+try_index_file (hss_main_t *hsm, hss_session_t *hs, u8 *path)
+{
+ u8 *port_str = 0, *redirect;
+ transport_endpoint_t endpt;
+ transport_proto_t proto;
+ int print_port = 0;
+ u16 local_port;
+ session_t *ts;
+ u32 plen;
+
+ /* Remove the trailing space */
+ vec_dec_len (path, 1);
+ plen = vec_len (path);
+
+ /* Append "index.html" */
+ if (path[plen - 1] != '/')
+ path = format (path, "/index.html%c", 0);
+ else
+ path = format (path, "index.html%c", 0);
- return -1;
-}
+ if (hsm->debug_level > 0)
+ clib_warning ("trying to find index: %s", path);
-/** \brief established state - waiting for GET, POST, etc.
- */
-static int
-state_established (session_t * s, http_session_t * hs,
- http_state_machine_called_from_t cf)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- u8 *request = 0;
- u8 *path;
- int i, rv;
- struct stat _sb, *sb = &_sb;
- clib_error_t *error;
- u8 request_type = HTTP_BUILTIN_METHOD_GET;
- u8 save_byte = 0;
- uword *p, *builtin_table;
+ if (!file_path_is_valid (path))
+ return HTTP_STATUS_NOT_FOUND;
- /* Read data from the sessison layer */
- rv = session_rx_request (hs);
+ /*
+ * We found an index.html file, build a redirect
+ */
+ vec_delete (path, vec_len (hsm->www_root) - 1, 0);
- /* No data? Odd, but stay in this state and await further instructions */
- if (rv)
- return 0;
+ ts = session_get (hs->vpp_session_index, hs->thread_index);
+ session_get_endpoint (ts, &endpt, 1 /* is_local */);
- /* Process the client request */
- request = hs->rx_buf;
- if (vec_len (request) < 8)
- {
- send_error (hs, "400 Bad Request");
- close_session (hs);
- return -1;
- }
+ local_port = clib_net_to_host_u16 (endpt.port);
+ proto = session_type_transport_proto (ts->session_type);
- if ((i = v_find_index (request, "GET ")) >= 0)
- goto find_end;
- else if ((i = v_find_index (request, "POST ")) >= 0)
+ if ((proto == TRANSPORT_PROTO_TCP && local_port != 80) ||
+ (proto == TRANSPORT_PROTO_TLS && local_port != 443))
{
- request_type = HTTP_BUILTIN_METHOD_POST;
- goto find_end;
+ print_port = 1;
+ port_str = format (0, ":%u", (u32) local_port);
}
- if (hsm->debug_level > 1)
- clib_warning ("Unknown http method");
+ redirect =
+ format (0,
+ "Location: http%s://%U%s%s\r\n\r\n",
+ proto == TRANSPORT_PROTO_TLS ? "s" : "", format_ip46_address,
+ &endpt.ip, endpt.is_ip4, print_port ? port_str : (u8 *) "", path);
- send_error (hs, "405 Method Not Allowed");
- close_session (hs);
- return -1;
+ if (hsm->debug_level > 0)
+ clib_warning ("redirect: %s", redirect);
-find_end:
+ vec_free (port_str);
- /* Lose "GET " or "POST " */
- vec_delete (request, i + 5 + request_type, 0);
+ hs->data = redirect;
+ hs->data_len = vec_len (redirect);
+ hs->free_data = 1;
- /* Temporarily drop in a NULL byte for lookup purposes */
- for (i = 0; i < vec_len (request); i++)
- {
- if (request[i] == ' ' || request[i] == '?')
- {
- save_byte = request[i];
- request[i] = 0;
- break;
- }
- }
+ return HTTP_STATUS_MOVED;
+}
+
+static int
+try_file_handler (hss_main_t *hsm, hss_session_t *hs, http_req_method_t rt,
+ u8 *request)
+{
+ http_status_code_t sc = HTTP_STATUS_OK;
+ u8 *path;
+ u32 ce_index;
+ http_content_type_t type;
+
+ /* Feature not enabled */
+ if (!hsm->www_root)
+ return -1;
+
+ type = content_type_from_request (request);
/*
- * Now we can construct the file to open
+ * Construct the file to open
* Browsers are capable of sporadically including a leading '/'
*/
- if (request[0] == '/')
+ if (!request)
+ path = format (0, "%s%c", hsm->www_root, 0);
+ else if (request[0] == '/')
path = format (0, "%s%s%c", hsm->www_root, request, 0);
else
path = format (0, "%s/%s%c", hsm->www_root, request, 0);
if (hsm->debug_level > 0)
- clib_warning ("%s '%s'", (request_type) == HTTP_BUILTIN_METHOD_GET ?
- "GET" : "POST", path);
+ clib_warning ("%s '%s'", (rt == HTTP_REQ_GET) ? "GET" : "POST", path);
- /* Look for built-in GET / POST handlers */
- builtin_table = (request_type == HTTP_BUILTIN_METHOD_GET) ?
- hsm->get_url_handlers : hsm->post_url_handlers;
-
- p = hash_get_mem (builtin_table, request);
-
- if (save_byte != 0)
- request[i] = save_byte;
+ if (hs->data && hs->free_data)
+ vec_free (hs->data);
- if (p)
- {
- int rv;
- int (*fp) (http_builtin_method_type_t, u8 *, http_session_t *);
- fp = (void *) p[0];
- hs->path = path;
- rv = (*fp) (request_type, request, hs);
- if (rv)
- {
- clib_warning ("builtin handler %llx hit on %s '%s' but failed!",
- p[0], (request_type == HTTP_BUILTIN_METHOD_GET) ?
- "GET" : "POST", request);
- send_error (hs, "404 Not Found");
- close_session (hs);
- return -1;
- }
- vec_reset_length (hs->rx_buf);
- goto send_ok;
- }
- vec_reset_length (hs->rx_buf);
- /* poison request, it's not valid anymore */
- request = 0;
- /* The static server itself doesn't do POSTs */
- if (request_type == HTTP_BUILTIN_METHOD_POST)
- {
- send_error (hs, "404 Not Found");
- close_session (hs);
- return -1;
- }
+ hs->data_offset = 0;
- /* Try to find the file. 2x special cases to find index.html */
- if (stat ((char *) path, sb) < 0 /* cant even stat the file */
- || sb->st_size < 20 /* file too small */
- || (sb->st_mode & S_IFMT) != S_IFREG /* not a regular file */ )
+ ce_index =
+ hss_cache_lookup_and_attach (&hsm->cache, path, &hs->data, &hs->data_len);
+ if (ce_index == ~0)
{
- u32 save_length = vec_len (path) - 1;
- /* Try appending "index.html"... */
- _vec_len (path) -= 1;
- path = format (path, "index.html%c", 0);
- if (stat ((char *) path, sb) < 0 /* cant even stat the file */
- || sb->st_size < 20 /* file too small */
- || (sb->st_mode & S_IFMT) != S_IFREG /* not a regular file */ )
+ if (!file_path_is_valid (path))
{
- _vec_len (path) = save_length;
- path = format (path, "/index.html%c", 0);
-
- /* Send a redirect, otherwise the browser will confuse itself */
- if (stat ((char *) path, sb) < 0 /* cant even stat the file */
- || sb->st_size < 20 /* file too small */
- || (sb->st_mode & S_IFMT) != S_IFREG /* not a regular file */ )
- {
- vec_free (path);
- send_error (hs, "404 Not Found");
- close_session (hs);
- return -1;
- }
- else
+ /*
+ * Generate error 404 right now if we can't find a path with
+ * a known file extension. It's silly to look for
+ * "favicon.ico/index.html" if you can't find
+ * "favicon.ico"; realistic example which used to happen.
+ */
+ if (path_has_known_suffix (path))
{
- transport_endpoint_t endpoint;
- transport_proto_t proto;
- u16 local_port;
- int print_port = 0;
- u8 *port_str = 0;
-
- /*
- * To make this bit work correctly, we need to know our local
- * IP address, etc. and send it in the redirect...
- */
- u8 *redirect;
-
- vec_delete (path, vec_len (hsm->www_root) - 1, 0);
-
- session_get_endpoint (s, &endpoint, 1 /* is_local */ );
-
- local_port = clib_net_to_host_u16 (endpoint.port);
-
- proto = session_type_transport_proto (s->session_type);
-
- if ((proto == TRANSPORT_PROTO_TCP && local_port != 80)
- || (proto == TRANSPORT_PROTO_TLS && local_port != 443))
- {
- print_port = 1;
- port_str = format (0, ":%u", (u32) local_port);
- }
-
- redirect = format (0, "HTTP/1.1 301 Moved Permanently\r\n"
- "Location: http%s://%U%s%s\r\n\r\n",
- proto == TRANSPORT_PROTO_TLS ? "s" : "",
- format_ip46_address, &endpoint.ip,
- endpoint.is_ip4,
- print_port ? port_str : (u8 *) "", path);
- if (hsm->debug_level > 0)
- clib_warning ("redirect: %s", redirect);
-
- vec_free (port_str);
-
- static_send_data (hs, redirect, vec_len (redirect), 0);
- hs->session_state = HTTP_STATE_CLOSED;
- hs->path = 0;
- vec_free (redirect);
- vec_free (path);
- close_session (hs);
- return -1;
+ sc = HTTP_STATUS_NOT_FOUND;
+ goto done;
}
+ sc = try_index_file (hsm, hs, path);
+ goto done;
+ }
+ ce_index =
+ hss_cache_add_and_attach (&hsm->cache, path, &hs->data, &hs->data_len);
+ if (ce_index == ~0)
+ {
+ sc = HTTP_STATUS_INTERNAL_ERROR;
+ goto done;
}
}
- /* find or read the file if we haven't done so yet. */
- if (hs->data == 0)
- {
- BVT (clib_bihash_kv) kv;
- file_data_cache_t *dp;
+ hs->path = path;
+ hs->cache_pool_index = ce_index;
- hs->path = path;
+done:
- /* First, try the cache */
- kv.key = (u64) hs->path;
- if (BV (clib_bihash_search) (&hsm->name_to_data, &kv, &kv) == 0)
- {
- if (hsm->debug_level > 1)
- clib_warning ("lookup '%s' returned %lld", kv.key, kv.value);
-
- /* found the data.. */
- dp = pool_elt_at_index (hsm->cache_pool, kv.value);
- hs->data = dp->data;
- /* Update the cache entry, mark it in-use */
- lru_update (hsm, dp, vlib_time_now (vlib_get_main ()));
- hs->cache_pool_index = dp - hsm->cache_pool;
- dp->inuse++;
- if (hsm->debug_level > 1)
- clib_warning ("index %d refcnt now %d", hs->cache_pool_index,
- dp->inuse);
- }
- else
- {
- if (hsm->debug_level > 1)
- clib_warning ("lookup '%s' failed", kv.key, kv.value);
- /* Need to recycle one (or more cache) entries? */
- if (hsm->cache_size > hsm->cache_limit)
- {
- int free_index = hsm->last_index;
-
- while (free_index != ~0)
- {
- /* pick the LRU */
- dp = pool_elt_at_index (hsm->cache_pool, free_index);
- free_index = dp->prev_index;
- /* Which could be in use... */
- if (dp->inuse)
- {
- if (hsm->debug_level > 1)
- clib_warning ("index %d in use refcnt %d",
- dp - hsm->cache_pool, dp->inuse);
-
- }
- kv.key = (u64) (dp->filename);
- kv.value = ~0ULL;
- if (BV (clib_bihash_add_del) (&hsm->name_to_data, &kv,
- 0 /* is_add */ ) < 0)
- {
- clib_warning ("LRU delete '%s' FAILED!", dp->filename);
- }
- else if (hsm->debug_level > 1)
- clib_warning ("LRU delete '%s' ok", dp->filename);
-
- lru_remove (hsm, dp);
- hsm->cache_size -= vec_len (dp->data);
- hsm->cache_evictions++;
- vec_free (dp->filename);
- vec_free (dp->data);
- if (hsm->debug_level > 1)
- clib_warning ("pool put index %d", dp - hsm->cache_pool);
- pool_put (hsm->cache_pool, dp);
- if (hsm->cache_size < hsm->cache_limit)
- break;
- }
- }
+ hs->content_type = type;
+ start_send_data (hs, sc);
+ if (!hs->data)
+ hss_session_disconnect_transport (hs);
- /* Read the file */
- error = clib_file_contents ((char *) (hs->path), &hs->data);
- if (error)
- {
- clib_warning ("Error reading '%s'", hs->path);
- clib_error_report (error);
- vec_free (hs->path);
- close_session (hs);
- return -1;
- }
- /* Create a cache entry for it */
- pool_get (hsm->cache_pool, dp);
- memset (dp, 0, sizeof (*dp));
- dp->filename = vec_dup (hs->path);
- dp->data = hs->data;
- hs->cache_pool_index = dp - hsm->cache_pool;
- dp->inuse++;
- if (hsm->debug_level > 1)
- clib_warning ("index %d refcnt now %d", hs->cache_pool_index,
- dp->inuse);
- lru_add (hsm, dp, vlib_time_now (vlib_get_main ()));
- kv.key = (u64) vec_dup (hs->path);
- kv.value = dp - hsm->cache_pool;
- /* Add to the lookup table */
- if (hsm->debug_level > 1)
- clib_warning ("add '%s' value %lld", kv.key, kv.value);
-
- if (BV (clib_bihash_add_del) (&hsm->name_to_data, &kv,
- 1 /* is_add */ ) < 0)
- {
- clib_warning ("BUG: add failed!");
- }
- hsm->cache_size += vec_len (dp->data);
- }
- hs->data_offset = 0;
- }
- /* send 200 OK first */
-send_ok:
- static_send_data (hs, (u8 *) "HTTP/1.1 200 OK\r\n", 17, 0);
- hs->session_state = HTTP_STATE_OK_SENT;
- return 1;
+ return 0;
}
static int
-state_send_more_data (session_t * s, http_session_t * hs,
- http_state_machine_called_from_t cf)
+handle_request (hss_session_t *hs, http_req_method_t rt, u8 *request)
{
+ hss_main_t *hsm = &hss_main;
- /* Start sending data */
- hs->data_offset = static_send_data (hs, hs->data, vec_len (hs->data),
- hs->data_offset);
+ if (!try_url_handler (hsm, hs, rt, request))
+ return 0;
- /* Did we finish? */
- if (hs->data_offset < vec_len (hs->data))
- {
- /* No: ask for a shoulder-tap when the tx fifo has space */
- svm_fifo_add_want_deq_ntf (hs->tx_fifo,
- SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL);
- hs->session_state = HTTP_STATE_SEND_MORE_DATA;
- return 0;
- }
- /* Finished with this transaction, back to HTTP_STATE_ESTABLISHED */
+ if (!try_file_handler (hsm, hs, rt, request))
+ return 0;
+
+ /* Handler did not find anything return 404 */
+ start_send_data (hs, HTTP_STATUS_NOT_FOUND);
+ hss_session_disconnect_transport (hs);
- /* Let go of the file cache entry */
- http_static_server_detach_cache_entry (hs);
- hs->session_state = HTTP_STATE_ESTABLISHED;
return 0;
}
static int
-state_sent_ok (session_t * s, http_session_t * hs,
- http_state_machine_called_from_t cf)
+hss_ts_rx_callback (session_t *ts)
{
- http_static_server_main_t *hsm = &http_static_server_main;
- char *suffix;
- char *http_type;
- u8 *http_response;
- f64 now;
- u32 offset;
-
- /* What kind of dog food are we serving? */
- suffix = (char *) (hs->path + vec_len (hs->path) - 1);
- while ((u8 *) suffix >= hs->path && *suffix != '.')
- suffix--;
- suffix++;
- http_type = "text/html";
- if (!clib_strcmp (suffix, "css"))
- http_type = "text/css";
- else if (!clib_strcmp (suffix, "js"))
- http_type = "text/javascript";
- else if (!clib_strcmp (suffix, "json"))
- http_type = "application/json";
-
- if (hs->data == 0)
+ hss_session_t *hs;
+ u8 *request = 0;
+ http_msg_t msg;
+ int rv;
+
+ hs = hss_session_get (ts->thread_index, ts->opaque);
+
+ /* Read the http message header */
+ rv = svm_fifo_dequeue (ts->rx_fifo, sizeof (msg), (u8 *) &msg);
+ ASSERT (rv == sizeof (msg));
+
+ if (msg.type != HTTP_MSG_REQUEST ||
+ (msg.method_type != HTTP_REQ_GET && msg.method_type != HTTP_REQ_POST))
{
- clib_warning ("BUG: hs->data not set for session %d",
- hs->session_index);
- close_session (hs);
+ hs->data = 0;
+ start_send_data (hs, HTTP_STATUS_METHOD_NOT_ALLOWED);
return 0;
}
- /*
- * Send an http response, which needs the current time,
- * the expiration time, and the data length
- */
- now = clib_timebase_now (&hsm->timebase);
- http_response = format (0, http_response_template,
- /* Date */
- format_clib_timebase_time, now,
- /* Expires */
- format_clib_timebase_time, now + 600.0,
- http_type, vec_len (hs->data));
- offset = static_send_data (hs, http_response, vec_len (http_response), 0);
- if (offset != vec_len (http_response))
+ /* Read request */
+ if (msg.data.len)
{
- clib_warning ("BUG: couldn't send response header!");
- close_session (hs);
- return 0;
+ vec_validate (request, msg.data.len - 1);
+ rv = svm_fifo_dequeue (ts->rx_fifo, msg.data.len, request);
+ ASSERT (rv == msg.data.len);
+ /* request must be a proper C-string in addition to a vector */
+ vec_add1 (request, 0);
}
- vec_free (http_response);
- /* Send data from the beginning... */
- hs->data_offset = 0;
- hs->session_state = HTTP_STATE_SEND_MORE_DATA;
- return 1;
-}
+ /* Find and send data */
+ handle_request (hs, msg.method_type, request);
-static void *state_funcs[HTTP_STATE_N_STATES] = {
- state_closed,
- /* Waiting for GET, POST, etc. */
- state_established,
- /* Sent OK */
- state_sent_ok,
- /* Send more data */
- state_send_more_data,
-};
+ vec_free (request);
-static inline int
-http_static_server_rx_tx_callback (session_t * s,
- http_state_machine_called_from_t cf)
+ return 0;
+}
+
+static int
+hss_ts_tx_callback (session_t *ts)
{
- http_session_t *hs;
- int (*fp) (session_t *, http_session_t *, http_state_machine_called_from_t);
+ hss_session_t *hs;
+ u32 to_send;
int rv;
- /* Acquire a reader lock on the session table */
- http_static_server_sessions_reader_lock ();
- hs = http_static_server_session_lookup (s->thread_index, s->session_index);
+ hs = hss_session_get (ts->thread_index, ts->opaque);
+ if (!hs || !hs->data)
+ return 0;
- if (!hs)
+ to_send = hs->data_len - hs->data_offset;
+ rv = svm_fifo_enqueue (ts->tx_fifo, to_send, hs->data + hs->data_offset);
+
+ if (rv <= 0)
{
- clib_warning ("No http session for thread %d session_index %d",
- s->thread_index, s->session_index);
- http_static_server_sessions_reader_unlock ();
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
return 0;
}
- /* Execute state machine for this session */
- do
+ if (rv < to_send)
{
- fp = state_funcs[hs->session_state];
- rv = (*fp) (s, hs, cf);
- if (rv < 0)
- goto session_closed;
+ hs->data_offset += rv;
+ svm_fifo_add_want_deq_ntf (ts->tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
}
- while (rv);
- /* Reset the session expiration timer */
- http_static_server_session_timer_stop (hs);
- http_static_server_session_timer_start (hs);
+ if (svm_fifo_set_event (ts->tx_fifo))
+ session_send_io_evt_to_thread (ts->tx_fifo, SESSION_IO_EVT_TX);
-session_closed:
- http_static_server_sessions_reader_unlock ();
return 0;
}
-static int
-http_static_server_rx_callback (session_t * s)
-{
- return http_static_server_rx_tx_callback (s, CALLED_FROM_RX);
-}
-
-static int
-http_static_server_tx_callback (session_t * s)
-{
- return http_static_server_rx_tx_callback (s, CALLED_FROM_TX);
-}
-
-
/** \brief Session accept callback
*/
-
static int
-http_static_server_session_accept_callback (session_t * s)
+hss_ts_accept_callback (session_t *ts)
{
- http_static_server_main_t *hsm = &http_static_server_main;
- http_session_t *hs;
-
- hsm->vpp_queue[s->thread_index] =
- session_main_get_vpp_event_queue (s->thread_index);
+ hss_session_t *hs;
+ u32 thresh;
- http_static_server_sessions_writer_lock ();
+ hs = hss_session_alloc (ts->thread_index);
- hs = http_static_server_session_alloc (s->thread_index);
- http_static_server_session_lookup_add (s->thread_index, s->session_index,
- hs->session_index);
- hs->rx_fifo = s->rx_fifo;
- hs->tx_fifo = s->tx_fifo;
- hs->vpp_session_index = s->session_index;
- hs->vpp_session_handle = session_handle (s);
- hs->session_state = HTTP_STATE_ESTABLISHED;
- http_static_server_session_timer_start (hs);
+ hs->vpp_session_index = ts->session_index;
+ hs->vpp_session_handle = session_handle (ts);
- http_static_server_sessions_writer_unlock ();
+ /* The application sets a threshold for it's fifo to get notified when
+ * additional data can be enqueued. We want to keep the TX fifo reasonably
+ * full, however avoid entering a state where the
+ * fifo is full all the time and small chunks of data are being enqueued
+ * each time. If the fifo is small (under 16K) we set
+ * the threshold to it's size, meaning a notification will be given when the
+ * fifo empties.
+ */
+ thresh = clib_min (svm_fifo_size (ts->tx_fifo), HSS_FIFO_THRESH);
+ svm_fifo_set_deq_thresh (ts->tx_fifo, thresh);
- s->session_state = SESSION_STATE_READY;
+ ts->opaque = hs->session_index;
+ ts->session_state = SESSION_STATE_READY;
return 0;
}
-/** \brief Session disconnect callback
- */
-
static void
-http_static_server_session_disconnect_callback (session_t * s)
+hss_ts_disconnect_callback (session_t *ts)
{
- http_static_server_main_t *hsm = &http_static_server_main;
+ hss_main_t *hsm = &hss_main;
vnet_disconnect_args_t _a = { 0 }, *a = &_a;
- a->handle = session_handle (s);
+ a->handle = session_handle (ts);
a->app_index = hsm->app_index;
vnet_disconnect_session (a);
}
-/** \brief Session reset callback
- */
-
static void
-http_static_server_session_reset_callback (session_t * s)
+hss_ts_reset_callback (session_t *ts)
{
- http_static_server_main_t *hsm = &http_static_server_main;
+ hss_main_t *hsm = &hss_main;
vnet_disconnect_args_t _a = { 0 }, *a = &_a;
- a->handle = session_handle (s);
+ a->handle = session_handle (ts);
a->app_index = hsm->app_index;
vnet_disconnect_session (a);
}
static int
-http_static_server_session_connected_callback (u32 app_index, u32 api_context,
- session_t * s,
- session_error_t err)
+hss_ts_connected_callback (u32 app_index, u32 api_context, session_t *ts,
+ session_error_t err)
{
clib_warning ("called...");
return -1;
}
static int
-http_static_server_add_segment_callback (u32 client_index, u64 segment_handle)
+hss_add_segment_callback (u32 client_index, u64 segment_handle)
{
return 0;
}
static void
-http_static_session_cleanup (session_t * s, session_cleanup_ntf_t ntf)
+hss_ts_cleanup (session_t *s, session_cleanup_ntf_t ntf)
{
- http_session_t *hs;
+ hss_main_t *hsm = &hss_main;
+ hss_session_t *hs;
if (ntf == SESSION_CLEANUP_TRANSPORT)
return;
- http_static_server_sessions_writer_lock ();
-
- hs = http_static_server_session_lookup (s->thread_index, s->session_index);
+ hs = hss_session_get (s->thread_index, s->opaque);
if (!hs)
- goto done;
+ return;
- http_static_server_detach_cache_entry (hs);
- http_static_server_session_lookup_del (hs->thread_index,
- hs->vpp_session_index);
- vec_free (hs->rx_buf);
- http_static_server_session_free (hs);
+ if (hs->cache_pool_index != ~0)
+ {
+ hss_cache_detach_entry (&hsm->cache, hs->cache_pool_index);
+ hs->cache_pool_index = ~0;
+ }
-done:
- http_static_server_sessions_writer_unlock ();
+ if (hs->free_data)
+ vec_free (hs->data);
+ hs->data = 0;
+ hs->data_offset = 0;
+ hs->free_data = 0;
+ vec_free (hs->path);
+
+ hss_session_free (hs);
}
-/** \brief Session-layer virtual function table
- */
-static session_cb_vft_t http_static_server_session_cb_vft = {
- .session_accept_callback = http_static_server_session_accept_callback,
- .session_disconnect_callback =
- http_static_server_session_disconnect_callback,
- .session_connected_callback = http_static_server_session_connected_callback,
- .add_segment_callback = http_static_server_add_segment_callback,
- .builtin_app_rx_callback = http_static_server_rx_callback,
- .builtin_app_tx_callback = http_static_server_tx_callback,
- .session_reset_callback = http_static_server_session_reset_callback,
- .session_cleanup_callback = http_static_session_cleanup,
+static session_cb_vft_t hss_cb_vft = {
+ .session_accept_callback = hss_ts_accept_callback,
+ .session_disconnect_callback = hss_ts_disconnect_callback,
+ .session_connected_callback = hss_ts_connected_callback,
+ .add_segment_callback = hss_add_segment_callback,
+ .builtin_app_rx_callback = hss_ts_rx_callback,
+ .builtin_app_tx_callback = hss_ts_tx_callback,
+ .session_reset_callback = hss_ts_reset_callback,
+ .session_cleanup_callback = hss_ts_cleanup,
};
static int
-http_static_server_attach ()
+hss_attach ()
{
vnet_app_add_cert_key_pair_args_t _ck_pair, *ck_pair = &_ck_pair;
- http_static_server_main_t *hsm = &http_static_server_main;
+ hss_main_t *hsm = &hss_main;
u64 options[APP_OPTIONS_N_OPTIONS];
vnet_app_attach_args_t _a, *a = &_a;
u32 segment_size = 128 << 20;
@@ -1152,8 +639,8 @@ http_static_server_attach ()
segment_size = hsm->private_segment_size;
a->api_client_index = ~0;
- a->name = format (0, "test_http_static_server");
- a->session_cb_vft = &http_static_server_session_cb_vft;
+ a->name = format (0, "http_static_server");
+ a->session_cb_vft = &hss_cb_vft;
a->options = options;
a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size;
a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = segment_size;
@@ -1186,19 +673,20 @@ http_static_server_attach ()
}
static int
-http_static_transport_needs_crypto (transport_proto_t proto)
+hss_transport_needs_crypto (transport_proto_t proto)
{
return proto == TRANSPORT_PROTO_TLS || proto == TRANSPORT_PROTO_DTLS ||
proto == TRANSPORT_PROTO_QUIC;
}
static int
-http_static_server_listen ()
+hss_listen (void)
{
- http_static_server_main_t *hsm = &http_static_server_main;
+ hss_main_t *hsm = &hss_main;
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
vnet_listen_args_t _a, *a = &_a;
char *uri = "tcp://0.0.0.0/80";
+ u8 need_crypto;
int rv;
clib_memset (a, 0, sizeof (*a));
@@ -1210,8 +698,12 @@ http_static_server_listen ()
if (parse_uri (uri, &sep))
return -1;
+ need_crypto = hss_transport_needs_crypto (sep.transport_proto);
+
+ sep.transport_proto = TRANSPORT_PROTO_HTTP;
clib_memcpy (&a->sep_ext, &sep, sizeof (sep));
- if (http_static_transport_needs_crypto (a->sep_ext.transport_proto))
+
+ if (need_crypto)
{
session_endpoint_alloc_ext_cfg (&a->sep_ext,
TRANSPORT_ENDPT_EXT_CFG_CRYPTO);
@@ -1219,257 +711,142 @@ http_static_server_listen ()
}
rv = vnet_listen (a);
- if (a->sep_ext.ext_cfg)
+
+ if (need_crypto)
clib_mem_free (a->sep_ext.ext_cfg);
- return rv;
-}
-static void
-http_static_server_session_close_cb (void *hs_handlep)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- http_session_t *hs;
- uword hs_handle;
- hs_handle = pointer_to_uword (hs_handlep);
- hs =
- http_static_server_session_get (hs_handle >> 24, hs_handle & 0x00FFFFFF);
-
- if (hsm->debug_level > 1)
- clib_warning ("terminate thread %d index %d hs %llx",
- hs_handle >> 24, hs_handle & 0x00FFFFFF, hs);
- if (!hs)
- return;
- hs->timer_handle = ~0;
- http_static_server_session_disconnect (hs);
+ return rv;
}
-/** \brief Expired session timer-wheel callback
- */
static void
-http_expired_timers_dispatch (u32 * expired_timers)
+hss_url_handlers_init (hss_main_t *hsm)
{
- u32 hs_handle;
- int i;
-
- for (i = 0; i < vec_len (expired_timers); i++)
+ if (!hsm->get_url_handlers)
{
- /* Get session handle. The first bit is the timer id */
- hs_handle = expired_timers[i] & 0x7FFFFFFF;
- session_send_rpc_evt_to_thread (hs_handle >> 24,
- http_static_server_session_close_cb,
- uword_to_pointer (hs_handle, void *));
+ hsm->get_url_handlers = hash_create_string (0, sizeof (uword));
+ hsm->post_url_handlers = hash_create_string (0, sizeof (uword));
}
-}
-
-/** \brief Timer-wheel expiration process
- */
-static uword
-http_static_server_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
- vlib_frame_t * f)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- f64 now, timeout = 1.0;
- uword *event_data = 0;
- uword __clib_unused event_type;
-
- while (1)
- {
- vlib_process_wait_for_event_or_clock (vm, timeout);
- now = vlib_time_now (vm);
- event_type = vlib_process_get_events (vm, (uword **) & event_data);
-
- /* expire timers */
- clib_spinlock_lock (&http_static_server_main.tw_lock);
- tw_timer_expire_timers_2t_1w_2048sl (&hsm->tw, now);
- clib_spinlock_unlock (&http_static_server_main.tw_lock);
- vec_reset_length (event_data);
- }
- return 0;
+ hss_builtinurl_json_handlers_init ();
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (http_static_server_process_node) =
-{
- .function = http_static_server_process,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "static-http-server-process",
- .state = VLIB_NODE_STATE_DISABLED,
-};
-/* *INDENT-ON* */
-
-static int
-http_static_server_create (vlib_main_t * vm)
+int
+hss_create (vlib_main_t *vm)
{
vlib_thread_main_t *vtm = vlib_get_thread_main ();
- http_static_server_main_t *hsm = &http_static_server_main;
+ hss_main_t *hsm = &hss_main;
u32 num_threads;
- vlib_node_t *n;
num_threads = 1 /* main thread */ + vtm->n_threads;
- vec_validate (hsm->vpp_queue, num_threads - 1);
vec_validate (hsm->sessions, num_threads - 1);
- vec_validate (hsm->session_to_http_session, num_threads - 1);
- clib_rwlock_init (&hsm->sessions_lock);
- clib_spinlock_init (&hsm->tw_lock);
-
- if (http_static_server_attach ())
+ if (hss_attach ())
{
clib_warning ("failed to attach server");
return -1;
}
- if (http_static_server_listen ())
+ if (hss_listen ())
{
clib_warning ("failed to start listening");
return -1;
}
- /* Init path-to-cache hash table */
- BV (clib_bihash_init) (&hsm->name_to_data, "http cache", 128, 32 << 20);
-
- hsm->get_url_handlers = hash_create_string (0, sizeof (uword));
- hsm->post_url_handlers = hash_create_string (0, sizeof (uword));
+ if (hsm->www_root)
+ hss_cache_init (&hsm->cache, hsm->cache_size, hsm->debug_level);
- /* Init timer wheel and process */
- tw_timer_wheel_init_2t_1w_2048sl (&hsm->tw, http_expired_timers_dispatch,
- 1.0 /* timer interval */ , ~0);
- vlib_node_set_state (vm, http_static_server_process_node.index,
- VLIB_NODE_STATE_POLLING);
- n = vlib_get_node (vm, http_static_server_process_node.index);
- vlib_start_process (vm, n->runtime_index);
+ if (hsm->enable_url_handlers)
+ hss_url_handlers_init (hsm);
return 0;
}
-/** \brief API helper function for vl_api_http_static_enable_t messages
- */
-int
-http_static_server_enable_api (u32 fifo_size, u32 cache_limit,
- u32 prealloc_fifos,
- u32 private_segment_size,
- u8 * www_root, u8 * uri)
-{
- http_static_server_main_t *hsm = &http_static_server_main;
- int rv;
-
- hsm->fifo_size = fifo_size;
- hsm->cache_limit = cache_limit;
- hsm->prealloc_fifos = prealloc_fifos;
- hsm->private_segment_size = private_segment_size;
- hsm->www_root = format (0, "%s%c", www_root, 0);
- hsm->uri = format (0, "%s%c", uri, 0);
-
- if (vec_len (hsm->www_root) < 2)
- return VNET_API_ERROR_INVALID_VALUE;
-
- if (hsm->my_client_index != ~0)
- return VNET_API_ERROR_APP_ALREADY_ATTACHED;
-
- vnet_session_enable_disable (hsm->vlib_main, 1 /* turn on TCP, etc. */ );
-
- rv = http_static_server_create (hsm->vlib_main);
- switch (rv)
- {
- case 0:
- break;
- default:
- vec_free (hsm->www_root);
- vec_free (hsm->uri);
- return VNET_API_ERROR_INIT_FAILED;
- }
- return 0;
-}
-
static clib_error_t *
-http_static_server_create_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
+hss_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
- http_static_server_main_t *hsm = &http_static_server_main;
unformat_input_t _line_input, *line_input = &_line_input;
+ hss_main_t *hsm = &hss_main;
+ clib_error_t *error = 0;
u64 seg_size;
- u8 *www_root = 0;
int rv;
+ if (hsm->app_index != (u32) ~0)
+ return clib_error_return (0, "http server already running...");
+
hsm->prealloc_fifos = 0;
hsm->private_segment_size = 0;
hsm->fifo_size = 0;
- /* 10mb cache limit, before LRU occurs */
- hsm->cache_limit = 10 << 20;
+ hsm->cache_size = 10 << 20;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
- goto no_wwwroot;
+ goto no_input;
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "www-root %s", &www_root))
+ if (unformat (line_input, "www-root %s", &hsm->www_root))
;
else
if (unformat (line_input, "prealloc-fifos %d", &hsm->prealloc_fifos))
;
else if (unformat (line_input, "private-segment-size %U",
unformat_memory_size, &seg_size))
- {
- if (seg_size >= 0x100000000ULL)
- {
- vlib_cli_output (vm, "private segment size %llu, too large",
- seg_size);
- return 0;
- }
- hsm->private_segment_size = seg_size;
- }
+ hsm->private_segment_size = seg_size;
else if (unformat (line_input, "fifo-size %d", &hsm->fifo_size))
hsm->fifo_size <<= 10;
else if (unformat (line_input, "cache-size %U", unformat_memory_size,
- &hsm->cache_limit))
- {
- if (hsm->cache_limit < (128 << 10))
- {
- return clib_error_return (0,
- "cache-size must be at least 128kb");
- }
- }
-
+ &hsm->cache_size))
+ ;
else if (unformat (line_input, "uri %s", &hsm->uri))
;
else if (unformat (line_input, "debug %d", &hsm->debug_level))
;
else if (unformat (line_input, "debug"))
hsm->debug_level = 1;
+ else if (unformat (line_input, "ptr-thresh %U", unformat_memory_size,
+ &hsm->use_ptr_thresh))
+ ;
+ else if (unformat (line_input, "url-handlers"))
+ hsm->enable_url_handlers = 1;
else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
}
+
unformat_free (line_input);
- if (www_root == 0)
+no_input:
+
+ if (error)
+ goto done;
+
+ if (hsm->www_root == 0 && !hsm->enable_url_handlers)
{
- no_wwwroot:
- return clib_error_return (0, "Must specify www-root <path>");
+ error = clib_error_return (0, "Must set www-root or url-handlers");
+ goto done;
}
- if (hsm->my_client_index != (u32) ~ 0)
+ if (hsm->cache_size < (128 << 10))
{
- vec_free (www_root);
- return clib_error_return (0, "http server already running...");
+ error = clib_error_return (0, "cache-size must be at least 128kb");
+ vec_free (hsm->www_root);
+ goto done;
}
- hsm->www_root = www_root;
-
vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ );
- rv = http_static_server_create (vm);
- switch (rv)
+ if ((rv = hss_create (vm)))
{
- case 0:
- break;
- default:
+ error = clib_error_return (0, "server_create returned %d", rv);
vec_free (hsm->www_root);
- return clib_error_return (0, "server_create returned %d", rv);
}
- return 0;
+
+done:
+
+ return error;
}
/*?
@@ -1484,92 +861,33 @@ http_static_server_create_command_fn (vlib_main_t * vm,
* @cliexcmd{http static server www-root <path> [prealloc-fios <nn>]
* [private-segment-size <nnMG>] [fifo-size <nbytes>] [uri <uri>]}
?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (http_static_server_create_command, static) =
-{
+VLIB_CLI_COMMAND (hss_create_command, static) = {
.path = "http static server",
- .short_help = "http static server www-root <path> [prealloc-fifos <nn>]\n"
- "[private-segment-size <nnMG>] [fifo-size <nbytes>] [uri <uri>]\n"
- "[debug [nn]]\n",
- .function = http_static_server_create_command_fn,
+ .short_help =
+ "http static server www-root <path> [prealloc-fifos <nn>]\n"
+ "[private-segment-size <nnMG>] [fifo-size <nbytes>] [uri <uri>]\n"
+ "[ptr-thresh <nn>] [url-handlers] [debug [nn]]\n",
+ .function = hss_create_command_fn,
};
-/* *INDENT-ON* */
-
-/** \brief format a file cache entry
- */
-u8 *
-format_hsm_cache_entry (u8 * s, va_list * args)
-{
- file_data_cache_t *ep = va_arg (*args, file_data_cache_t *);
- f64 now = va_arg (*args, f64);
-
- /* Header */
- if (ep == 0)
- {
- s = format (s, "%40s%12s%20s", "File", "Size", "Age");
- return s;
- }
- s = format (s, "%40s%12lld%20.2f", ep->filename, vec_len (ep->data),
- now - ep->last_used);
- return s;
-}
-u8 *
-format_http_session_state (u8 * s, va_list * args)
-{
- http_session_state_t state = va_arg (*args, http_session_state_t);
- char *state_string = "bogus!";
-
- switch (state)
- {
- case HTTP_STATE_CLOSED:
- state_string = "closed";
- break;
- case HTTP_STATE_ESTABLISHED:
- state_string = "established";
- break;
- case HTTP_STATE_OK_SENT:
- state_string = "ok sent";
- break;
- case HTTP_STATE_SEND_MORE_DATA:
- state_string = "send more data";
- break;
- default:
- break;
- }
-
- return format (s, "%s", state_string);
-}
-
-u8 *
-format_http_session (u8 * s, va_list * args)
+static u8 *
+format_hss_session (u8 *s, va_list *args)
{
- http_session_t *hs = va_arg (*args, http_session_t *);
- int verbose = va_arg (*args, int);
+ hss_session_t *hs = va_arg (*args, hss_session_t *);
+ int __clib_unused verbose = va_arg (*args, int);
- s = format (s, "[%d]: state %U", hs->session_index,
- format_http_session_state, hs->session_state);
- if (verbose > 0)
- {
- s = format (s, "\n path %s, data length %u, data_offset %u",
- hs->path ? hs->path : (u8 *) "[none]",
- vec_len (hs->data), hs->data_offset);
- }
+ s = format (s, "\n path %s, data length %u, data_offset %u",
+ hs->path ? hs->path : (u8 *) "[none]", hs->data_len,
+ hs->data_offset);
return s;
}
static clib_error_t *
-http_show_static_server_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
+hss_show_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
- http_static_server_main_t *hsm = &http_static_server_main;
- file_data_cache_t *ep, **entries = 0;
- int verbose = 0;
- int show_cache = 0;
- int show_sessions = 0;
- u32 index;
- f64 now;
+ int verbose = 0, show_cache = 0, show_sessions = 0;
+ hss_main_t *hsm = &hss_main;
if (hsm->www_root == 0)
return clib_error_return (0, "Static server disabled");
@@ -1592,61 +910,29 @@ http_show_static_server_command_fn (vlib_main_t * vm,
return clib_error_return (0, "specify one or more of cache, sessions");
if (show_cache)
- {
- if (verbose == 0)
- {
- vlib_cli_output
- (vm, "www_root %s, cache size %lld bytes, limit %lld bytes, "
- "evictions %lld",
- hsm->www_root, hsm->cache_size, hsm->cache_limit,
- hsm->cache_evictions);
- return 0;
- }
-
- now = vlib_time_now (vm);
-
- vlib_cli_output (vm, "%U", format_hsm_cache_entry, 0 /* header */ ,
- now);
-
- for (index = hsm->first_index; index != ~0;)
- {
- ep = pool_elt_at_index (hsm->cache_pool, index);
- index = ep->next_index;
- vlib_cli_output (vm, "%U", format_hsm_cache_entry, ep, now);
- }
-
- vlib_cli_output (vm, "%40s%12lld", "Total Size", hsm->cache_size);
-
- vec_free (entries);
- }
+ vlib_cli_output (vm, "%U", format_hss_cache, &hsm->cache, verbose);
if (show_sessions)
{
u32 *session_indices = 0;
- http_session_t *hs;
+ hss_session_t *hs;
int i, j;
- http_static_server_sessions_reader_lock ();
for (i = 0; i < vec_len (hsm->sessions); i++)
{
- /* *INDENT-OFF* */
pool_foreach (hs, hsm->sessions[i])
- {
vec_add1 (session_indices, hs - hsm->sessions[i]);
- }
- /* *INDENT-ON* */
for (j = 0; j < vec_len (session_indices); j++)
{
- vlib_cli_output (vm, "%U", format_http_session,
- pool_elt_at_index
- (hsm->sessions[i], session_indices[j]),
- verbose);
+ vlib_cli_output (
+ vm, "%U", format_hss_session,
+ pool_elt_at_index (hsm->sessions[i], session_indices[j]),
+ verbose);
}
vec_reset_length (session_indices);
}
- http_static_server_sessions_reader_unlock ();
vec_free (session_indices);
}
return 0;
@@ -1662,63 +948,24 @@ http_show_static_server_command_fn (vlib_main_t * vm,
* @cliend
* @cliexcmd{show http static server sessions cache [verbose [nn]]}
?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (http_show_static_server_command, static) =
-{
+VLIB_CLI_COMMAND (hss_show_command, static) = {
.path = "show http static server",
.short_help = "show http static server sessions cache [verbose [<nn>]]",
- .function = http_show_static_server_command_fn,
+ .function = hss_show_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
-http_clear_static_cache_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
+hss_clear_cache_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
- http_static_server_main_t *hsm = &http_static_server_main;
- file_data_cache_t *dp;
- u32 free_index;
+ hss_main_t *hsm = &hss_main;
u32 busy_items = 0;
- BVT (clib_bihash_kv) kv;
if (hsm->www_root == 0)
return clib_error_return (0, "Static server disabled");
- http_static_server_sessions_reader_lock ();
-
- /* Walk the LRU list to find active entries */
- free_index = hsm->last_index;
- while (free_index != ~0)
- {
- dp = pool_elt_at_index (hsm->cache_pool, free_index);
- free_index = dp->prev_index;
- /* Which could be in use... */
- if (dp->inuse)
- {
- busy_items++;
- free_index = dp->next_index;
- continue;
- }
- kv.key = (u64) (dp->filename);
- kv.value = ~0ULL;
- if (BV (clib_bihash_add_del) (&hsm->name_to_data, &kv,
- 0 /* is_add */ ) < 0)
- {
- clib_warning ("BUG: cache clear delete '%s' FAILED!", dp->filename);
- }
+ busy_items = hss_cache_clear (&hsm->cache);
- lru_remove (hsm, dp);
- hsm->cache_size -= vec_len (dp->data);
- hsm->cache_evictions++;
- vec_free (dp->filename);
- vec_free (dp->data);
- if (hsm->debug_level > 1)
- clib_warning ("pool put index %d", dp - hsm->cache_pool);
- pool_put (hsm->cache_pool, dp);
- free_index = hsm->last_index;
- }
- http_static_server_sessions_reader_unlock ();
if (busy_items > 0)
vlib_cli_output (vm, "Note: %d busy items still in cache...", busy_items);
else
@@ -1737,32 +984,34 @@ http_clear_static_cache_command_fn (vlib_main_t * vm,
* @cliend
* @cliexcmd{clear http static cache}
?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (clear_http_static_cache_command, static) =
-{
+VLIB_CLI_COMMAND (clear_hss_cache_command, static) = {
.path = "clear http static cache",
.short_help = "clear http static cache",
- .function = http_clear_static_cache_command_fn,
+ .function = hss_clear_cache_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
-http_static_server_main_init (vlib_main_t * vm)
+hss_main_init (vlib_main_t *vm)
{
- http_static_server_main_t *hsm = &http_static_server_main;
+ hss_main_t *hsm = &hss_main;
- hsm->my_client_index = ~0;
+ hsm->app_index = ~0;
hsm->vlib_main = vm;
- hsm->first_index = hsm->last_index = ~0;
- clib_timebase_init (&hsm->timebase, 0 /* GMT */ ,
- CLIB_TIMEBASE_DAYLIGHT_NONE,
- &vm->clib_time /* share the system clock */ );
+ /* Set up file extension to mime type index map */
+ hsm->mime_type_indices_by_file_extensions =
+ hash_create_string (0, sizeof (uword));
+
+#define _(def, ext, str) \
+ hash_set_mem (hsm->mime_type_indices_by_file_extensions, ext, \
+ HTTP_CONTENT_##def);
+ foreach_http_content_type;
+#undef _
return 0;
}
-VLIB_INIT_FUNCTION (http_static_server_main_init);
+VLIB_INIT_FUNCTION (hss_main_init);
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/idpf/CMakeLists.txt b/src/plugins/idpf/CMakeLists.txt
new file mode 100644
index 00000000000..1c7e5ec619c
--- /dev/null
+++ b/src/plugins/idpf/CMakeLists.txt
@@ -0,0 +1,28 @@
+# Copyright (c) 2023 Intel and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(idpf
+ SOURCES
+ cli.c
+ device.c
+ format.c
+ plugin.c
+ idpf_controlq.c
+ idpf_api.c
+
+ API_FILES
+ idpf.api
+
+ API_TEST_SOURCES
+ idpf_test.c
+)
diff --git a/src/plugins/idpf/README.rst b/src/plugins/idpf/README.rst
new file mode 100644
index 00000000000..7d4a6b93f3a
--- /dev/null
+++ b/src/plugins/idpf/README.rst
@@ -0,0 +1,59 @@
+Intel IDPF device driver
+========================
+
+Overview
+--------
+
+This plugins provides native device support for Intel Infrastructure
+Data Path Function (IDPF). The current IDPF is a driver specification
+for future Intel Physical Function devices. IDPF defines communication
+channel between Data Plane (DP) and Control Plane (CP).
+
+Prerequisites
+-------------
+
+- Driver requires MSI-X interrupt support, which is not supported by
+ uio_pci_generic driver, so vfio-pci needs to be used. On systems
+ without IOMMU vfio driver can still be used with recent kernels which
+ support no-iommu mode.
+
+Known issues
+------------
+
+- This driver is still in experimental phase, and the corresponding device
+is not released yet.
+
+- Current version only supports device initialization. Basic I/O function
+will be supported in the next release.
+
+Usage
+-----
+
+Interface Creation
+~~~~~~~~~~~~~~~~~~
+
+Interfaces can be dynamically created by using following CLI:
+
+::
+
+ create interface idpf 0000:4b:00.0 vport-num 1 rx-single 1 tx-single 1
+ set int state idpf-0/4b/0/0 up
+
+vport-num: number of vport to be created. Each vport is related to one netdev.
+rx-single: configure Rx queue mode, split queue mode by default.
+tx-single: configure Tx queue mode, split queue mode by default.
+
+Interface Deletion
+~~~~~~~~~~~~~~~~~~
+
+Interface can be deleted with following CLI:
+
+::
+
+ delete interface idpf <interface name>
+
+Interface Statistics
+~~~~~~~~~~~~~~~~~~~~
+
+Interface statistics can be displayed with
+``sh hardware-interface <if-name>`` command.
diff --git a/src/plugins/idpf/cli.c b/src/plugins/idpf/cli.c
new file mode 100644
index 00000000000..592c2612c97
--- /dev/null
+++ b/src/plugins/idpf/cli.c
@@ -0,0 +1,135 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#include <idpf/idpf.h>
+
+static clib_error_t *
+idpf_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ idpf_create_if_args_t args;
+ u32 tmp;
+
+ clib_memset (&args, 0, sizeof (idpf_create_if_args_t));
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_vlib_pci_addr, &args.addr))
+ ;
+ else if (unformat (line_input, "rx-single %u", &tmp))
+ args.rxq_single = 1;
+ else if (unformat (line_input, "tx-single %u", &tmp))
+ args.txq_single = 1;
+ else if (unformat (line_input, "rxq-num %u", &tmp))
+ args.rxq_num = tmp;
+ else if (unformat (line_input, "txq-num %u", &tmp))
+ args.txq_num = tmp;
+ else if (unformat (line_input, "rxq-size %u", &tmp))
+ args.rxq_size = tmp;
+ else if (unformat (line_input, "txq-size %u", &tmp))
+ args.txq_size = tmp;
+ else if (unformat (line_input, "vport-num %u", &tmp))
+ args.req_vport_nb = tmp;
+ else if (unformat (line_input, "name %s", &args.name))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ idpf_create_if (vm, &args);
+
+ vec_free (args.name);
+
+ return args.error;
+}
+
+VLIB_CLI_COMMAND (idpf_create_command, static) = {
+ .path = "create interface idpf",
+ .short_help = "create interface idpf <pci-address> "
+ "[vport <size>] [rx-single <size>] [tx-single <size>]",
+ .function = idpf_create_command_fn,
+};
+
+static clib_error_t *
+idpf_delete_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 sw_if_index = ~0;
+ vnet_hw_interface_t *hw;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ unformat_free (line_input);
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0,
+ "please specify interface name or sw_if_index");
+
+ hw = vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index);
+ if (hw == NULL || idpf_device_class.index != hw->dev_class_index)
+ return clib_error_return (0, "not an IDPF interface");
+
+ vlib_process_signal_event (vm, idpf_process_node.index,
+ IDPF_PROCESS_EVENT_DELETE_IF, hw->dev_instance);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (idpf_delete_command, static) = {
+ .path = "delete interface idpf",
+ .short_help = "delete interface idpf "
+ "{<interface> | sw_if_index <sw_idx>}",
+ .function = idpf_delete_command_fn,
+ .is_mp_safe = 1,
+};
+
+clib_error_t *
+idpf_cli_init (vlib_main_t *vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (idpf_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/idpf/device.c b/src/plugins/idpf/device.c
new file mode 100644
index 00000000000..44b8116d996
--- /dev/null
+++ b/src/plugins/idpf/device.c
@@ -0,0 +1,2265 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <idpf/idpf.h>
+#include <vpp/app/version.h>
+#include <vnet/plugin/plugin.h>
+
+#define IDPF_RXQ_SZ 512
+#define IDPF_TXQ_SZ 512
+
+#define PCI_VENDOR_ID_INTEL 0x8086
+#define PCI_DEVICE_ID_INTEL_IDPF_PF 0x1452
+#define PCI_DEVICE_ID_INTEL_IDPF_VF 0x1889
+
+VLIB_REGISTER_LOG_CLASS (idpf_log) = {
+ .class_name = "idpf",
+};
+
+VLIB_REGISTER_LOG_CLASS (idpf_stats_log) = {
+ .class_name = "idpf",
+ .subclass_name = "stats",
+};
+
+idpf_main_t idpf_main;
+void idpf_delete_if (vlib_main_t *vm, idpf_device_t *id, int with_barrier);
+
+static pci_device_id_t idpf_pci_device_ids[] = {
+ { .vendor_id = PCI_VENDOR_ID_INTEL,
+ .device_id = PCI_DEVICE_ID_INTEL_IDPF_PF },
+ { .vendor_id = PCI_VENDOR_ID_INTEL,
+ .device_id = PCI_DEVICE_ID_INTEL_IDPF_VF },
+ { 0 },
+};
+
+static int
+idpf_vc_clean (vlib_main_t *vm, idpf_device_t *id)
+{
+ idpf_ctlq_msg_t *q_msg[IDPF_CTLQ_LEN];
+ uint16_t num_q_msg = IDPF_CTLQ_LEN;
+ idpf_dma_mem_t *dma_mem;
+ uint32_t i;
+ int err;
+
+ for (i = 0; i < 10; i++)
+ {
+ err = idpf_ctlq_clean_sq (id->asq, &num_q_msg, q_msg);
+ vlib_process_suspend (vm, 0.02);
+ if (num_q_msg > 0)
+ break;
+ }
+ if (err != 0)
+ return err;
+
+ /* Empty queue is not an error */
+ for (i = 0; i < num_q_msg; i++)
+ {
+ dma_mem = q_msg[i]->ctx.indirect.payload;
+ if (dma_mem != NULL)
+ idpf_free_dma_mem (id, dma_mem);
+ clib_mem_free (q_msg[i]);
+ }
+
+ return 0;
+}
+
+static idpf_vc_result_t
+idpf_read_msg_from_cp (idpf_device_t *id, u16 buf_len, u8 *buf)
+{
+ idpf_ctlq_msg_t ctlq_msg;
+ idpf_dma_mem_t *dma_mem = NULL;
+ idpf_vc_result_t result = IDPF_MSG_NON;
+ u32 opcode;
+ u16 pending = 1;
+ int ret;
+
+ ret = idpf_ctlq_recv (id->arq, &pending, &ctlq_msg);
+ if (ret != 0)
+ {
+ idpf_log_debug (id, "Can't read msg from AQ");
+ if (ret != -ENOMSG)
+ result = IDPF_MSG_ERR;
+ return result;
+ }
+
+ clib_memcpy_fast (buf, ctlq_msg.ctx.indirect.payload->va, buf_len);
+
+ opcode = ctlq_msg.cookie.mbx.chnl_opcode;
+ id->cmd_retval = ctlq_msg.cookie.mbx.chnl_retval;
+
+ idpf_log_debug (id, "CQ from CP carries opcode %u, retval %d", opcode,
+ id->cmd_retval);
+
+ if (opcode == VIRTCHNL2_OP_EVENT)
+ {
+ virtchnl2_event_t *ve =
+ (virtchnl2_event_t *) ctlq_msg.ctx.indirect.payload->va;
+
+ result = IDPF_MSG_SYS;
+ switch (ve->event)
+ {
+ case VIRTCHNL2_EVENT_LINK_CHANGE:
+ break;
+ default:
+ idpf_log_err (id, "%s: Unknown event %d from CP", __func__,
+ ve->event);
+ break;
+ }
+ }
+ else
+ {
+ /* async reply msg on command issued by pf previously */
+ result = IDPF_MSG_CMD;
+ if (opcode != id->pend_cmd)
+ {
+ idpf_log_warn (id, "command mismatch, expect %u, get %u",
+ id->pend_cmd, opcode);
+ result = IDPF_MSG_ERR;
+ }
+ }
+
+ if (ctlq_msg.data_len != 0)
+ dma_mem = ctlq_msg.ctx.indirect.payload;
+ else
+ pending = 0;
+
+ ret = idpf_ctlq_post_rx_buffs (id, id->arq, &pending, &dma_mem);
+ if (ret != 0 && dma_mem != NULL)
+ idpf_free_dma_mem (id, dma_mem);
+
+ return result;
+}
+
+clib_error_t *
+idpf_send_vc_msg (vlib_main_t *vm, idpf_device_t *id, virtchnl2_op_t op,
+ u8 *in, u16 in_len)
+{
+ idpf_ctlq_msg_t *ctlq_msg;
+ idpf_dma_mem_t *dma_mem;
+ int error = 0;
+
+ error = idpf_vc_clean (vm, id);
+ if (error)
+ goto err;
+
+ ctlq_msg = clib_mem_alloc (sizeof (idpf_ctlq_msg_t));
+ if (ctlq_msg == NULL)
+ goto err;
+ clib_memset (ctlq_msg, 0, sizeof (idpf_ctlq_msg_t));
+
+ dma_mem = clib_mem_alloc (sizeof (idpf_dma_mem_t));
+ if (dma_mem == NULL)
+ goto dma_mem_error;
+ clib_memset (dma_mem, 0, sizeof (idpf_dma_mem_t));
+
+ dma_mem->va = idpf_alloc_dma_mem (vm, id, dma_mem, IDPF_DFLT_MBX_BUF_SIZE);
+ if (dma_mem->va == NULL)
+ {
+ clib_mem_free (dma_mem);
+ goto err;
+ }
+
+ clib_memcpy (dma_mem->va, in, in_len);
+
+ ctlq_msg->opcode = idpf_mbq_opc_send_msg_to_pf;
+ ctlq_msg->func_id = 0;
+ ctlq_msg->data_len = in_len;
+ ctlq_msg->cookie.mbx.chnl_opcode = op;
+ ctlq_msg->cookie.mbx.chnl_retval = VIRTCHNL2_STATUS_SUCCESS;
+ ctlq_msg->ctx.indirect.payload = dma_mem;
+
+ error = idpf_ctlq_send (id, id->asq, 1, ctlq_msg);
+ if (error)
+ goto send_error;
+
+ return 0;
+
+send_error:
+ idpf_free_dma_mem (id, dma_mem);
+dma_mem_error:
+ clib_mem_free (ctlq_msg);
+err:
+ return clib_error_return (0, "idpf send vc msg to PF failed");
+}
+
+clib_error_t *
+idpf_read_one_msg (vlib_main_t *vm, idpf_device_t *id, u32 ops, u8 *buf,
+ u16 buf_len)
+{
+ int i = 0, ret;
+ f64 suspend_time = IDPF_SEND_TO_PF_SUSPEND_TIME;
+
+ do
+ {
+ ret = idpf_read_msg_from_cp (id, buf_len, buf);
+ if (ret == IDPF_MSG_CMD)
+ break;
+ vlib_process_suspend (vm, suspend_time);
+ }
+ while (i++ < IDPF_SEND_TO_PF_MAX_TRY_TIMES);
+ if (i >= IDPF_SEND_TO_PF_MAX_TRY_TIMES ||
+ id->cmd_retval != VIRTCHNL2_STATUS_SUCCESS)
+ return clib_error_return (0, "idpf read one msg failed");
+
+ return 0;
+}
+
+clib_error_t *
+idpf_execute_vc_cmd (vlib_main_t *vm, idpf_device_t *id, idpf_cmd_info_t *args)
+{
+ clib_error_t *error = 0;
+ f64 suspend_time = IDPF_SEND_TO_PF_SUSPEND_TIME;
+ int i = 0;
+
+ if (id->pend_cmd == VIRTCHNL2_OP_UNKNOWN)
+ id->pend_cmd = args->ops;
+ else
+ return clib_error_return (0, "There is incomplete cmd %d", id->pend_cmd);
+
+ if ((error = idpf_send_vc_msg (vm, id, args->ops, args->in_args,
+ args->in_args_size)))
+ return error;
+
+ switch (args->ops)
+ {
+ case VIRTCHNL2_OP_VERSION:
+ case VIRTCHNL2_OP_GET_CAPS:
+ case VIRTCHNL2_OP_CREATE_VPORT:
+ case VIRTCHNL2_OP_DESTROY_VPORT:
+ case VIRTCHNL2_OP_SET_RSS_KEY:
+ case VIRTCHNL2_OP_SET_RSS_LUT:
+ case VIRTCHNL2_OP_SET_RSS_HASH:
+ case VIRTCHNL2_OP_CONFIG_RX_QUEUES:
+ case VIRTCHNL2_OP_CONFIG_TX_QUEUES:
+ case VIRTCHNL2_OP_ENABLE_QUEUES:
+ case VIRTCHNL2_OP_DISABLE_QUEUES:
+ case VIRTCHNL2_OP_ENABLE_VPORT:
+ case VIRTCHNL2_OP_DISABLE_VPORT:
+ case VIRTCHNL2_OP_MAP_QUEUE_VECTOR:
+ case VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR:
+ case VIRTCHNL2_OP_ALLOC_VECTORS:
+ case VIRTCHNL2_OP_DEALLOC_VECTORS:
+ case VIRTCHNL2_OP_GET_STATS:
+ /* for init virtchnl ops, need to poll the response */
+ error = idpf_read_one_msg (vm, id, args->ops, args->out_buffer,
+ args->out_size);
+ if (error)
+ return clib_error_return (0, "idpf read vc message from PF failed");
+ clear_cmd (id);
+ break;
+ case VIRTCHNL2_OP_GET_PTYPE_INFO:
+ break;
+ default:
+ do
+ {
+ if (id->pend_cmd == VIRTCHNL2_OP_UNKNOWN)
+ break;
+ vlib_process_suspend (vm, suspend_time);
+ /* If don't read msg or read sys event, continue */
+ }
+ while (i++ < IDPF_SEND_TO_PF_MAX_TRY_TIMES);
+ /* If there's no response is received, clear command */
+ if (i >= IDPF_SEND_TO_PF_MAX_TRY_TIMES ||
+ id->cmd_retval != VIRTCHNL2_STATUS_SUCCESS)
+ return clib_error_return (
+ 0, "No response or return failure (%d) for cmd %d", id->cmd_retval,
+ args->ops);
+ break;
+ }
+
+ return error;
+}
+
+static inline uword
+idpf_dma_addr (vlib_main_t *vm, idpf_device_t *id, void *p)
+{
+ return (id->flags & IDPF_DEVICE_F_VA_DMA) ? pointer_to_uword (p) :
+ vlib_physmem_get_pa (vm, p);
+}
+
+clib_error_t *
+idpf_vc_config_irq_map_unmap (vlib_main_t *vm, idpf_device_t *id,
+ idpf_vport_t *vport, bool map)
+{
+ virtchnl2_queue_vector_maps_t *map_info;
+ virtchnl2_queue_vector_t *vecmap;
+ u16 nb_rxq = vport->id->n_rx_queues;
+ idpf_cmd_info_t args;
+ clib_error_t *error;
+ int len, i;
+
+ len = sizeof (virtchnl2_queue_vector_maps_t) +
+ (nb_rxq - 1) * sizeof (virtchnl2_queue_vector_t);
+
+ map_info = clib_mem_alloc_aligned (len, CLIB_CACHE_LINE_BYTES);
+ clib_memset (map_info, 0, len);
+
+ map_info->vport_id = vport->vport_id;
+ map_info->num_qv_maps = nb_rxq;
+ for (i = 0; i < nb_rxq; i++)
+ {
+ vecmap = &map_info->qv_maps[i];
+ vecmap->queue_id = vport->qv_map[i].queue_id;
+ vecmap->vector_id = vport->qv_map[i].vector_id;
+ vecmap->itr_idx = VIRTCHNL2_ITR_IDX_0;
+ vecmap->queue_type = VIRTCHNL2_QUEUE_TYPE_RX;
+ }
+
+ args.ops =
+ map ? VIRTCHNL2_OP_MAP_QUEUE_VECTOR : VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR;
+ args.in_args = (u8 *) map_info;
+ args.in_args_size = len;
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command of VIRTCHNL2_OP_%s_QUEUE_VECTOR",
+ map ? "MAP" : "UNMAP");
+
+ clib_mem_free (map_info);
+ return error;
+}
+
+clib_error_t *
+idpf_config_rx_queues_irqs (vlib_main_t *vm, idpf_device_t *id,
+ idpf_vport_t *vport)
+{
+ virtchnl2_queue_vector_t *qv_map;
+ clib_error_t *error = 0;
+ u32 dynctl_reg_start;
+ u32 itrn_reg_start;
+ u32 dynctl_val, itrn_val;
+ int i;
+
+ qv_map = clib_mem_alloc_aligned (id->n_rx_queues *
+ sizeof (virtchnl2_queue_vector_t),
+ CLIB_CACHE_LINE_BYTES);
+ clib_memset (qv_map, 0, id->n_rx_queues * sizeof (virtchnl2_queue_vector_t));
+
+ dynctl_reg_start = vport->recv_vectors->vchunks.vchunks->dynctl_reg_start;
+ itrn_reg_start = vport->recv_vectors->vchunks.vchunks->itrn_reg_start;
+ dynctl_val = idpf_reg_read (id, dynctl_reg_start);
+ idpf_log_debug (id, "Value of dynctl_reg_start is 0x%x", dynctl_val);
+ itrn_val = idpf_reg_read (id, itrn_reg_start);
+ idpf_log_debug (id, "Value of itrn_reg_start is 0x%x", itrn_val);
+
+ if (itrn_val != 0)
+ idpf_reg_write (id, dynctl_reg_start,
+ VIRTCHNL2_ITR_IDX_0 << PF_GLINT_DYN_CTL_ITR_INDX_S |
+ PF_GLINT_DYN_CTL_WB_ON_ITR_M |
+ itrn_val << PF_GLINT_DYN_CTL_INTERVAL_S);
+ else
+ idpf_reg_write (id, dynctl_reg_start,
+ VIRTCHNL2_ITR_IDX_0 << PF_GLINT_DYN_CTL_ITR_INDX_S |
+ PF_GLINT_DYN_CTL_WB_ON_ITR_M |
+ IDPF_DFLT_INTERVAL << PF_GLINT_DYN_CTL_INTERVAL_S);
+
+ for (i = 0; i < id->n_rx_queues; i++)
+ {
+ /* map all queues to the same vector */
+ qv_map[i].queue_id = vport->chunks_info.rx_start_qid + i;
+ qv_map[i].vector_id =
+ vport->recv_vectors->vchunks.vchunks->start_vector_id;
+ }
+ vport->qv_map = qv_map;
+
+ if ((error = idpf_vc_config_irq_map_unmap (vm, id, vport, true)))
+ {
+ idpf_log_err (id, "config interrupt mapping failed");
+ goto config_irq_map_err;
+ }
+
+ return error;
+
+config_irq_map_err:
+ clib_mem_free (vport->qv_map);
+ vport->qv_map = NULL;
+
+ return error;
+}
+
+clib_error_t *
+idpf_rx_split_bufq_setup (vlib_main_t *vm, idpf_device_t *id,
+ idpf_vport_t *vport, idpf_rxq_t *bufq, u16 qid,
+ u16 rxq_size)
+{
+ clib_error_t *err;
+ u32 n_alloc, i;
+
+ bufq->size = rxq_size;
+ bufq->next = 0;
+ bufq->descs = vlib_physmem_alloc_aligned_on_numa (
+ vm, bufq->size * sizeof (virtchnl2_rx_desc_t), 2 * CLIB_CACHE_LINE_BYTES,
+ id->numa_node);
+
+ bufq->buffer_pool_index =
+ vlib_buffer_pool_get_default_for_numa (vm, id->numa_node);
+
+ if ((err = vlib_pci_map_dma (vm, id->pci_dev_handle, (void *) bufq->descs)))
+ return err;
+
+ clib_memset ((void *) bufq->descs, 0,
+ bufq->size * sizeof (virtchnl2_rx_desc_t));
+ vec_validate_aligned (bufq->bufs, bufq->size, CLIB_CACHE_LINE_BYTES);
+ bufq->qrx_tail = id->bar0 + (vport->chunks_info.rx_buf_qtail_start +
+ qid * vport->chunks_info.rx_buf_qtail_spacing);
+
+ n_alloc = vlib_buffer_alloc_from_pool (vm, bufq->bufs, bufq->size - 8,
+ bufq->buffer_pool_index);
+ if (n_alloc == 0)
+ return clib_error_return (0, "buffer allocation error");
+
+ bufq->n_enqueued = n_alloc;
+ virtchnl2_rx_desc_t *d = bufq->descs;
+ for (i = 0; i < n_alloc; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, bufq->bufs[i]);
+ if (id->flags & IDPF_DEVICE_F_VA_DMA)
+ d->qword[0] = vlib_buffer_get_va (b);
+ else
+ d->qword[0] = vlib_buffer_get_pa (vm, b);
+ d++;
+ }
+
+ return 0;
+}
+
+clib_error_t *
+idpf_split_rxq_init (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ u16 qid, u16 rxq_size)
+{
+ clib_error_t *err;
+ idpf_rxq_t *rxq;
+ u32 n_alloc, i;
+
+ vec_validate_aligned (vport->rxqs, qid, CLIB_CACHE_LINE_BYTES);
+ rxq = vec_elt_at_index (vport->rxqs, qid);
+ rxq->size = rxq_size;
+ rxq->next = 0;
+ rxq->descs = vlib_physmem_alloc_aligned_on_numa (
+ vm, rxq->size * sizeof (virtchnl2_rx_desc_t), 2 * CLIB_CACHE_LINE_BYTES,
+ id->numa_node);
+
+ rxq->buffer_pool_index =
+ vlib_buffer_pool_get_default_for_numa (vm, id->numa_node);
+
+ if (rxq->descs == 0)
+ return vlib_physmem_last_error (vm);
+
+ if ((err = vlib_pci_map_dma (vm, id->pci_dev_handle, (void *) rxq->descs)))
+ return err;
+
+ clib_memset ((void *) rxq->descs, 0,
+ rxq->size * sizeof (virtchnl2_rx_desc_t));
+ vec_validate_aligned (rxq->bufs, rxq->size, CLIB_CACHE_LINE_BYTES);
+ rxq->qrx_tail = id->bar0 + (vport->chunks_info.rx_qtail_start +
+ qid * vport->chunks_info.rx_qtail_spacing);
+
+ n_alloc = vlib_buffer_alloc_from_pool (vm, rxq->bufs, rxq->size - 8,
+ rxq->buffer_pool_index);
+
+ if (n_alloc == 0)
+ return clib_error_return (0, "buffer allocation error");
+
+ rxq->n_enqueued = n_alloc;
+ virtchnl2_rx_desc_t *d = rxq->descs;
+ for (i = 0; i < n_alloc; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, rxq->bufs[i]);
+ if (id->flags & IDPF_DEVICE_F_VA_DMA)
+ d->qword[0] = vlib_buffer_get_va (b);
+ else
+ d->qword[0] = vlib_buffer_get_pa (vm, b);
+ d++;
+ }
+
+ err =
+ idpf_rx_split_bufq_setup (vm, id, vport, rxq->bufq1, 2 * qid, rxq_size);
+ if (err)
+ return err;
+ err =
+ idpf_rx_split_bufq_setup (vm, id, vport, rxq->bufq2, 2 * qid, rxq_size);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+clib_error_t *
+idpf_single_rxq_init (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ u16 qid, u16 rxq_size)
+{
+ clib_error_t *err;
+ idpf_rxq_t *rxq;
+ u32 n_alloc, i;
+
+ vec_validate_aligned (vport->rxqs, qid, CLIB_CACHE_LINE_BYTES);
+ rxq = vec_elt_at_index (vport->rxqs, qid);
+ rxq->queue_index = vport->chunks_info.rx_start_qid + qid;
+ rxq->size = rxq_size;
+ rxq->next = 0;
+ rxq->descs = vlib_physmem_alloc_aligned_on_numa (
+ vm, rxq->size * sizeof (virtchnl2_rx_desc_t), 2 * CLIB_CACHE_LINE_BYTES,
+ id->numa_node);
+
+ rxq->buffer_pool_index =
+ vlib_buffer_pool_get_default_for_numa (vm, id->numa_node);
+
+ if (rxq->descs == 0)
+ return vlib_physmem_last_error (vm);
+
+ err = vlib_pci_map_dma (vm, id->pci_dev_handle, (void *) rxq->descs);
+ if (err)
+ return err;
+
+ clib_memset ((void *) rxq->descs, 0,
+ rxq->size * sizeof (virtchnl2_rx_desc_t));
+ vec_validate_aligned (rxq->bufs, rxq->size, CLIB_CACHE_LINE_BYTES);
+ rxq->qrx_tail = id->bar0 + (vport->chunks_info.rx_qtail_start +
+ qid * vport->chunks_info.rx_qtail_spacing);
+
+ n_alloc = vlib_buffer_alloc_from_pool (vm, rxq->bufs, rxq->size - 8,
+ rxq->buffer_pool_index);
+
+ if (n_alloc == 0)
+ return clib_error_return (0, "buffer allocation error");
+
+ rxq->n_enqueued = n_alloc;
+ virtchnl2_rx_desc_t *d = rxq->descs;
+ for (i = 0; i < n_alloc; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, rxq->bufs[i]);
+ if (id->flags & IDPF_DEVICE_F_VA_DMA)
+ d->qword[0] = vlib_buffer_get_va (b);
+ else
+ d->qword[0] = vlib_buffer_get_pa (vm, b);
+ d++;
+ }
+
+ return 0;
+}
+
+clib_error_t *
+idpf_rx_queue_setup (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ u16 qid, u16 rxq_size)
+{
+ if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)
+ return idpf_single_rxq_init (vm, id, vport, qid, rxq_size);
+ else
+ return idpf_split_rxq_init (vm, id, vport, qid, rxq_size);
+}
+
+clib_error_t *
+idpf_tx_split_complq_setup (vlib_main_t *vm, idpf_device_t *id,
+ idpf_vport_t *vport, idpf_txq_t *complq, u16 qid,
+ u16 txq_size)
+{
+ clib_error_t *err;
+ u16 n;
+ u8 bpi = vlib_buffer_pool_get_default_for_numa (vm, id->numa_node);
+
+ complq->size = txq_size;
+ complq->next = 0;
+ clib_spinlock_init (&complq->lock);
+
+ n = (complq->size / 510) + 1;
+ vec_validate_aligned (complq->ph_bufs, n, CLIB_CACHE_LINE_BYTES);
+
+ if (!vlib_buffer_alloc_from_pool (vm, complq->ph_bufs, n, bpi))
+ return clib_error_return (0, "buffer allocation error");
+
+ complq->descs = vlib_physmem_alloc_aligned_on_numa (
+ vm, complq->size * sizeof (idpf_tx_desc_t), 2 * CLIB_CACHE_LINE_BYTES,
+ id->numa_node);
+ if (complq->descs == 0)
+ return vlib_physmem_last_error (vm);
+
+ if ((err =
+ vlib_pci_map_dma (vm, id->pci_dev_handle, (void *) complq->descs)))
+ return err;
+
+ vec_validate_aligned (complq->bufs, complq->size, CLIB_CACHE_LINE_BYTES);
+ complq->qtx_tail =
+ id->bar0 + (vport->chunks_info.tx_compl_qtail_start +
+ qid * vport->chunks_info.tx_compl_qtail_spacing);
+
+ /* initialize ring of pending RS slots */
+ clib_ring_new_aligned (complq->rs_slots, 32, CLIB_CACHE_LINE_BYTES);
+
+ vec_validate_aligned (complq->tmp_descs, complq->size,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (complq->tmp_bufs, complq->size, CLIB_CACHE_LINE_BYTES);
+
+ return 0;
+}
+
+clib_error_t *
+idpf_split_txq_init (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ u16 qid, u16 txq_size)
+{
+ clib_error_t *err;
+ idpf_txq_t *txq;
+ u16 n, complq_qid;
+ u8 bpi = vlib_buffer_pool_get_default_for_numa (vm, id->numa_node);
+
+ vec_validate_aligned (vport->txqs, qid, CLIB_CACHE_LINE_BYTES);
+ txq = vec_elt_at_index (vport->txqs, qid);
+ txq->size = txq_size;
+ txq->next = 0;
+ clib_spinlock_init (&txq->lock);
+
+ n = (txq->size / 510) + 1;
+ vec_validate_aligned (txq->ph_bufs, n, CLIB_CACHE_LINE_BYTES);
+
+ if (!vlib_buffer_alloc_from_pool (vm, txq->ph_bufs, n, bpi))
+ return clib_error_return (0, "buffer allocation error");
+
+ txq->descs = vlib_physmem_alloc_aligned_on_numa (
+ vm, txq->size * sizeof (idpf_tx_desc_t), 2 * CLIB_CACHE_LINE_BYTES,
+ id->numa_node);
+ if (txq->descs == 0)
+ return vlib_physmem_last_error (vm);
+
+ err = vlib_pci_map_dma (vm, id->pci_dev_handle, (void *) txq->descs);
+ if (err)
+ return err;
+
+ vec_validate_aligned (txq->bufs, txq->size, CLIB_CACHE_LINE_BYTES);
+ txq->qtx_tail = id->bar0 + (vport->chunks_info.tx_qtail_start +
+ qid * vport->chunks_info.tx_qtail_spacing);
+
+ /* initialize ring of pending RS slots */
+ clib_ring_new_aligned (txq->rs_slots, 32, CLIB_CACHE_LINE_BYTES);
+
+ vec_validate_aligned (txq->tmp_descs, txq->size, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (txq->tmp_bufs, txq->size, CLIB_CACHE_LINE_BYTES);
+
+ complq_qid = vport->chunks_info.tx_compl_start_qid + qid;
+ err = idpf_tx_split_complq_setup (vm, id, vport, txq->complq, complq_qid,
+ 2 * txq_size);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+clib_error_t *
+idpf_single_txq_init (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ u16 qid, u16 txq_size)
+{
+ clib_error_t *err;
+ idpf_txq_t *txq;
+ u16 n;
+ u8 bpi = vlib_buffer_pool_get_default_for_numa (vm, id->numa_node);
+
+ vec_validate_aligned (vport->txqs, qid, CLIB_CACHE_LINE_BYTES);
+ txq = vec_elt_at_index (vport->txqs, qid);
+ txq->queue_index = vport->chunks_info.tx_start_qid + qid;
+ txq->size = txq_size;
+ txq->next = 0;
+ clib_spinlock_init (&txq->lock);
+
+ n = (txq->size / 510) + 1;
+ vec_validate_aligned (txq->ph_bufs, n, CLIB_CACHE_LINE_BYTES);
+
+ if (!vlib_buffer_alloc_from_pool (vm, txq->ph_bufs, n, bpi))
+ return clib_error_return (0, "buffer allocation error");
+
+ txq->descs = vlib_physmem_alloc_aligned_on_numa (
+ vm, txq->size * sizeof (idpf_tx_desc_t), 2 * CLIB_CACHE_LINE_BYTES,
+ id->numa_node);
+ if (txq->descs == 0)
+ return vlib_physmem_last_error (vm);
+
+ err = vlib_pci_map_dma (vm, id->pci_dev_handle, (void *) txq->descs);
+ if (err)
+ return err;
+
+ vec_validate_aligned (txq->bufs, txq->size, CLIB_CACHE_LINE_BYTES);
+ txq->qtx_tail = id->bar0 + (vport->chunks_info.tx_qtail_start +
+ qid * vport->chunks_info.tx_qtail_spacing);
+
+ /* initialize ring of pending RS slots */
+ clib_ring_new_aligned (txq->rs_slots, 32, CLIB_CACHE_LINE_BYTES);
+
+ vec_validate_aligned (txq->tmp_descs, txq->size, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (txq->tmp_bufs, txq->size, CLIB_CACHE_LINE_BYTES);
+
+ return 0;
+}
+
+clib_error_t *
+idpf_tx_queue_setup (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ u16 qid, u16 txq_size)
+{
+ if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)
+ return idpf_single_txq_init (vm, id, vport, qid, txq_size);
+ else
+ return idpf_split_txq_init (vm, id, vport, qid, txq_size);
+}
+
+clib_error_t *
+idpf_vc_config_txq (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ u16 qid)
+{
+ idpf_txq_t *txq;
+ virtchnl2_config_tx_queues_t *vc_txqs = NULL;
+ virtchnl2_txq_info_t *txq_info;
+ idpf_cmd_info_t args;
+ clib_error_t *error;
+ u16 num_qs;
+ int size;
+
+ vec_validate_aligned (vport->txqs, qid, CLIB_CACHE_LINE_BYTES);
+ txq = vec_elt_at_index (vport->txqs, qid);
+
+ if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)
+ num_qs = IDPF_TXQ_PER_GRP;
+ else
+ num_qs = IDPF_TXQ_PER_GRP + IDPF_TX_COMPLQ_PER_GRP;
+
+ size = sizeof (*vc_txqs) + (num_qs - 1) * sizeof (virtchnl2_txq_info_t);
+ vc_txqs = clib_mem_alloc_aligned (size, CLIB_CACHE_LINE_BYTES);
+ clib_memset (vc_txqs, 0, size);
+
+ vc_txqs->vport_id = vport->vport_id;
+ vc_txqs->num_qinfo = num_qs;
+
+ if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)
+ {
+ txq_info = &vc_txqs->qinfo[0];
+ txq_info->dma_ring_addr = idpf_dma_addr (vm, id, (void *) txq->descs);
+ txq_info->type = VIRTCHNL2_QUEUE_TYPE_TX;
+ txq_info->queue_id = txq->queue_index;
+ txq_info->model = VIRTCHNL2_QUEUE_MODEL_SINGLE;
+ txq_info->sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_QUEUE;
+ txq_info->ring_len = txq->size;
+ }
+ else
+ {
+ /* txq info */
+ txq_info = &vc_txqs->qinfo[0];
+ txq_info->dma_ring_addr = idpf_dma_addr (vm, id, (void *) txq->descs);
+ txq_info->type = VIRTCHNL2_QUEUE_TYPE_TX;
+ txq_info->queue_id = txq->queue_index;
+ txq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
+ txq_info->sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW;
+ txq_info->ring_len = txq->size;
+ txq_info->tx_compl_queue_id = txq->complq->queue_index;
+ txq_info->relative_queue_id = txq_info->queue_id;
+
+ /* tx completion queue info */
+ idpf_txq_t *complq = txq->complq;
+ txq_info = &vc_txqs->qinfo[1];
+ txq_info->dma_ring_addr = idpf_dma_addr (vm, id, (void *) complq->descs);
+ txq_info->type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION;
+ txq_info->queue_id = complq->queue_index;
+ txq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
+ txq_info->sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW;
+ txq_info->ring_len = complq->size;
+ }
+
+ clib_memset (&args, 0, sizeof (args));
+ args.ops = VIRTCHNL2_OP_CONFIG_TX_QUEUES;
+ args.in_args = (u8 *) vc_txqs;
+ args.in_args_size = size;
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ clib_mem_free (vc_txqs);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command VIRTCHNL2_OP_CONFIG_TX_QUEUES");
+
+ return error;
+}
+
+clib_error_t *
+idpf_vc_config_rxq (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ u16 qid)
+{
+ idpf_rxq_t *rxq;
+ virtchnl2_config_rx_queues_t *vc_rxqs = NULL;
+ virtchnl2_rxq_info_t *rxq_info;
+ idpf_cmd_info_t args;
+ clib_error_t *error;
+ u16 num_qs;
+ int size, i;
+
+ vec_validate_aligned (vport->rxqs, qid, CLIB_CACHE_LINE_BYTES);
+ rxq = vec_elt_at_index (vport->rxqs, qid);
+
+ if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)
+ num_qs = IDPF_RXQ_PER_GRP;
+ else
+ num_qs = IDPF_RXQ_PER_GRP + IDPF_RX_BUFQ_PER_GRP;
+
+ size = sizeof (*vc_rxqs) + (num_qs - 1) * sizeof (virtchnl2_rxq_info_t);
+ vc_rxqs = clib_mem_alloc_aligned (size, CLIB_CACHE_LINE_BYTES);
+ clib_memset (vc_rxqs, 0, size);
+
+ vc_rxqs->vport_id = vport->vport_id;
+ vc_rxqs->num_qinfo = num_qs;
+
+ if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SINGLE)
+ {
+ rxq_info = &vc_rxqs->qinfo[0];
+ rxq_info->dma_ring_addr = idpf_dma_addr (vm, id, (void *) rxq->descs);
+ rxq_info->type = VIRTCHNL2_QUEUE_TYPE_RX;
+ rxq_info->queue_id = rxq->queue_index;
+ rxq_info->model = VIRTCHNL2_QUEUE_MODEL_SINGLE;
+ rxq_info->data_buffer_size = vlib_buffer_get_default_data_size (vm);
+ rxq_info->max_pkt_size = ETHERNET_MAX_PACKET_BYTES;
+
+ rxq_info->desc_ids = VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M;
+ rxq_info->qflags |= VIRTCHNL2_RX_DESC_SIZE_32BYTE;
+
+ rxq_info->ring_len = rxq->size;
+ }
+ else
+ {
+ /* Rx queue */
+ rxq_info = &vc_rxqs->qinfo[0];
+ rxq_info->dma_ring_addr = idpf_dma_addr (vm, id, (void *) rxq->descs);
+ rxq_info->type = VIRTCHNL2_QUEUE_TYPE_RX;
+ rxq_info->queue_id = rxq->queue_index;
+ rxq_info->model = VIRTCHNL2_QUEUE_MODEL_SINGLE;
+ rxq_info->data_buffer_size = vlib_buffer_get_default_data_size (vm);
+ rxq_info->max_pkt_size = ETHERNET_MAX_PACKET_BYTES;
+
+ rxq_info->desc_ids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M;
+ rxq_info->qflags |= VIRTCHNL2_RX_DESC_SIZE_32BYTE;
+
+ rxq_info->ring_len = rxq->size;
+ rxq_info->rx_bufq1_id = rxq->bufq1->queue_index;
+ rxq_info->rx_bufq2_id = rxq->bufq2->queue_index;
+ rxq_info->rx_buffer_low_watermark = 64;
+
+ /* Buffer queue */
+ for (i = 1; i <= IDPF_RX_BUFQ_PER_GRP; i++)
+ {
+ idpf_rxq_t *bufq = (i == 1 ? rxq->bufq1 : rxq->bufq2);
+ rxq_info = &vc_rxqs->qinfo[i];
+ rxq_info->dma_ring_addr =
+ idpf_dma_addr (vm, id, (void *) bufq->descs);
+ rxq_info->type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER;
+ rxq_info->queue_id = bufq->queue_index;
+ rxq_info->model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
+ rxq_info->data_buffer_size = vlib_buffer_get_default_data_size (vm);
+ rxq_info->desc_ids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M;
+ rxq_info->ring_len = bufq->size;
+
+ rxq_info->buffer_notif_stride = IDPF_RX_BUF_STRIDE;
+ rxq_info->rx_buffer_low_watermark = 64;
+ }
+ }
+
+ clib_memset (&args, 0, sizeof (args));
+ args.ops = VIRTCHNL2_OP_CONFIG_RX_QUEUES;
+ args.in_args = (u8 *) vc_rxqs;
+ args.in_args_size = size;
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ clib_mem_free (vc_rxqs);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command VIRTCHNL2_OP_CONFIG_RX_QUEUES");
+
+ return error;
+}
+
+clib_error_t *
+idpf_alloc_vectors (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ uint16_t num_vectors)
+{
+ virtchnl2_alloc_vectors_t *alloc_vec;
+ idpf_cmd_info_t args;
+ clib_error_t *error;
+ int len;
+
+ len = sizeof (virtchnl2_alloc_vectors_t) +
+ (num_vectors - 1) * sizeof (virtchnl2_vector_chunk_t);
+ alloc_vec = clib_mem_alloc_aligned (len, CLIB_CACHE_LINE_BYTES);
+ clib_memset (alloc_vec, 0, len);
+
+ alloc_vec->num_vectors = num_vectors;
+
+ args.ops = VIRTCHNL2_OP_ALLOC_VECTORS;
+ args.in_args = (u8 *) alloc_vec;
+ args.in_args_size = sizeof (virtchnl2_alloc_vectors_t);
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command VIRTCHNL2_OP_ALLOC_VECTORS");
+
+ if (vport->recv_vectors == NULL)
+ {
+ vport->recv_vectors =
+ clib_mem_alloc_aligned (len, CLIB_CACHE_LINE_BYTES);
+ clib_memset (vport->recv_vectors, 0, len);
+ }
+
+ clib_memcpy (vport->recv_vectors, args.out_buffer, len);
+ clib_mem_free (alloc_vec);
+ return error;
+}
+
+clib_error_t *
+idpf_vc_ena_dis_one_queue (vlib_main_t *vm, idpf_device_t *id,
+ idpf_vport_t *vport, u16 qid, u32 type, bool on)
+{
+ virtchnl2_del_ena_dis_queues_t *queue_select;
+ virtchnl2_queue_chunk_t *queue_chunk;
+ idpf_cmd_info_t args;
+ clib_error_t *error = 0;
+ int len;
+
+ len = sizeof (virtchnl2_del_ena_dis_queues_t);
+ queue_select = clib_mem_alloc_aligned (len, CLIB_CACHE_LINE_BYTES);
+ clib_memset (queue_select, 0, len);
+
+ queue_chunk = queue_select->chunks.chunks;
+ queue_select->chunks.num_chunks = 1;
+ queue_select->vport_id = vport->vport_id;
+
+ queue_chunk->type = type;
+ queue_chunk->start_queue_id = qid;
+ queue_chunk->num_queues = 1;
+
+ args.ops = on ? VIRTCHNL2_OP_ENABLE_QUEUES : VIRTCHNL2_OP_DISABLE_QUEUES;
+ args.in_args = (u8 *) queue_select;
+ args.in_args_size = len;
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command of VIRTCHNL2_OP_%s_QUEUES",
+ on ? "ENABLE" : "DISABLE");
+
+ clib_mem_free (queue_select);
+ return error;
+}
+
+clib_error_t *
+idpf_op_enable_queues (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ u16 qid, bool rx, bool on)
+{
+ clib_error_t *error;
+ u16 queue_index;
+ u32 type;
+
+ /* switch txq/rxq */
+ type = rx ? VIRTCHNL2_QUEUE_TYPE_RX : VIRTCHNL2_QUEUE_TYPE_TX;
+
+ if (type == VIRTCHNL2_QUEUE_TYPE_RX)
+ {
+ queue_index = vport->chunks_info.rx_start_qid + qid;
+ error = idpf_vc_ena_dis_one_queue (vm, id, vport, queue_index, type, on);
+ }
+ else
+ {
+ queue_index = vport->chunks_info.tx_start_qid + qid;
+ error = idpf_vc_ena_dis_one_queue (vm, id, vport, queue_index, type, on);
+ }
+ if (error != 0)
+ return error;
+
+ /* switch tx completion queue */
+ if (!rx && vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT)
+ {
+ type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION;
+ queue_index = vport->chunks_info.tx_compl_start_qid + qid;
+ error = idpf_vc_ena_dis_one_queue (vm, id, vport, queue_index, type, on);
+ if (error != 0)
+ return error;
+ }
+
+ /* switch rx buffer queue */
+ if (rx && vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT)
+ {
+ type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER;
+ queue_index = vport->chunks_info.rx_buf_start_qid + 2 * qid;
+ error = idpf_vc_ena_dis_one_queue (vm, id, vport, queue_index, type, on);
+ if (error != 0)
+ return error;
+ queue_index++;
+ error = idpf_vc_ena_dis_one_queue (vm, id, vport, queue_index, type, on);
+ if (error != 0)
+ return error;
+ }
+
+ return error;
+}
+
+clib_error_t *
+idpf_queue_init (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ idpf_create_if_args_t *args)
+{
+ clib_error_t *error = 0;
+ int i;
+
+ for (i = 0; i < id->n_rx_queues; i++)
+ {
+ if ((error = idpf_rx_queue_setup (vm, id, vport, i, args->rxq_size)))
+ return error;
+ if ((error = idpf_vc_config_rxq (vm, id, vport, i)))
+ return error;
+ if ((error = idpf_op_enable_queues (vm, id, vport, i, true, true)))
+ return error;
+ }
+
+ for (i = 0; i < id->n_tx_queues; i++)
+ {
+ if ((error = idpf_tx_queue_setup (vm, id, vport, i, args->txq_size)))
+ return error;
+ if ((error = idpf_vc_config_txq (vm, id, vport, i)))
+ return error;
+ if ((error = idpf_op_enable_queues (vm, id, vport, i, false, true)))
+ return error;
+ }
+
+ if ((error = idpf_alloc_vectors (vm, id, vport, IDPF_DFLT_Q_VEC_NUM)))
+ return error;
+
+ if ((error = idpf_config_rx_queues_irqs (vm, id, vport)))
+ return error;
+
+ return error;
+}
+
+clib_error_t *
+idpf_op_version (vlib_main_t *vm, idpf_device_t *id)
+{
+ clib_error_t *error = 0;
+ idpf_cmd_info_t args;
+ virtchnl2_version_info_t myver = {
+ .major = VIRTCHNL2_VERSION_MAJOR_2,
+ .minor = VIRTCHNL2_VERSION_MINOR_0,
+ };
+ virtchnl2_version_info_t ver = { 0 };
+
+ idpf_log_debug (id, "version: major %u minor %u", myver.major, myver.minor);
+
+ args.ops = VIRTCHNL2_OP_VERSION;
+ args.in_args = (u8 *) &myver;
+ args.in_args_size = sizeof (myver);
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ return clib_error_return (0,
+ "Failed to execute command VIRTCHNL_OP_VERSION");
+
+ clib_memcpy (&ver, args.out_buffer, sizeof (ver));
+
+ if (ver.major != VIRTCHNL2_VERSION_MAJOR_2 ||
+ ver.minor != VIRTCHNL2_VERSION_MINOR_0)
+ return clib_error_return (0,
+ "incompatible virtchnl version "
+ "(remote %d.%d)",
+ ver.major, ver.minor);
+
+ return 0;
+}
+
+clib_error_t *
+idpf_op_get_caps (vlib_main_t *vm, idpf_device_t *id,
+ virtchnl2_get_capabilities_t *caps)
+{
+ virtchnl2_get_capabilities_t caps_msg = { 0 };
+ idpf_cmd_info_t args;
+ clib_error_t *error = 0;
+
+ caps_msg.csum_caps =
+ VIRTCHNL2_CAP_TX_CSUM_L3_IPV4 | VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_TCP |
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_UDP | VIRTCHNL2_CAP_TX_CSUM_L4_IPV4_SCTP |
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_TCP | VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_UDP |
+ VIRTCHNL2_CAP_TX_CSUM_L4_IPV6_SCTP | VIRTCHNL2_CAP_TX_CSUM_GENERIC |
+ VIRTCHNL2_CAP_RX_CSUM_L3_IPV4 | VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_TCP |
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_UDP | VIRTCHNL2_CAP_RX_CSUM_L4_IPV4_SCTP |
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_TCP | VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_UDP |
+ VIRTCHNL2_CAP_RX_CSUM_L4_IPV6_SCTP | VIRTCHNL2_CAP_RX_CSUM_GENERIC;
+
+ caps_msg.other_caps = VIRTCHNL2_CAP_WB_ON_ITR;
+
+ args.ops = VIRTCHNL2_OP_GET_CAPS;
+ args.in_args = (u8 *) &caps_msg;
+ args.in_args_size = sizeof (caps_msg);
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command VIRTCHNL2_OP_GET_CAPS");
+
+ clib_memcpy (caps, args.out_buffer, sizeof (*caps));
+ return error;
+}
+
+#define CTLQ_NUM 2
+clib_error_t *
+idpf_mbx_init (vlib_main_t *vm, idpf_device_t *id)
+{
+ idpf_ctlq_create_info_t ctlq_info[CTLQ_NUM] = {
+ {
+ .type = IDPF_CTLQ_TYPE_MAILBOX_TX,
+ .id = IDPF_CTLQ_ID,
+ .len = IDPF_CTLQ_LEN,
+ .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
+ .reg = {
+ .head = PF_FW_ATQH,
+ .tail = PF_FW_ATQT,
+ .len = PF_FW_ATQLEN,
+ .bah = PF_FW_ATQBAH,
+ .bal = PF_FW_ATQBAL,
+ .len_mask = PF_FW_ATQLEN_ATQLEN_M,
+ .len_ena_mask = PF_FW_ATQLEN_ATQENABLE_M,
+ .head_mask = PF_FW_ATQH_ATQH_M,
+ }
+ },
+ {
+ .type = IDPF_CTLQ_TYPE_MAILBOX_RX,
+ .id = IDPF_CTLQ_ID,
+ .len = IDPF_CTLQ_LEN,
+ .buf_size = IDPF_DFLT_MBX_BUF_SIZE,
+ .reg = {
+ .head = PF_FW_ARQH,
+ .tail = PF_FW_ARQT,
+ .len = PF_FW_ARQLEN,
+ .bah = PF_FW_ARQBAH,
+ .bal = PF_FW_ARQBAL,
+ .len_mask = PF_FW_ARQLEN_ARQLEN_M,
+ .len_ena_mask = PF_FW_ARQLEN_ARQENABLE_M,
+ .head_mask = PF_FW_ARQH_ARQH_M,
+ }
+ }
+ };
+ struct idpf_ctlq_info *ctlq;
+
+ if (idpf_ctlq_init (vm, id, CTLQ_NUM, ctlq_info))
+ return clib_error_return (0, "ctlq init failed");
+
+ LIST_FOR_EACH_ENTRY_SAFE (ctlq, NULL, &id->cq_list_head,
+ struct idpf_ctlq_info, cq_list)
+ {
+ if (ctlq->q_id == IDPF_CTLQ_ID &&
+ ctlq->cq_type == IDPF_CTLQ_TYPE_MAILBOX_TX)
+ id->asq = ctlq;
+ if (ctlq->q_id == IDPF_CTLQ_ID &&
+ ctlq->cq_type == IDPF_CTLQ_TYPE_MAILBOX_RX)
+ id->arq = ctlq;
+ }
+
+ if (!id->asq || !id->arq)
+ {
+ idpf_ctlq_deinit (id);
+ return clib_error_return (0, "ctlq deinit");
+ }
+
+ return 0;
+}
+
+clib_error_t *
+idpf_vc_query_ptype_info (vlib_main_t *vm, idpf_device_t *id)
+{
+ virtchnl2_get_ptype_info_t ptype_info;
+ idpf_cmd_info_t args;
+ clib_error_t *error;
+
+ ptype_info.start_ptype_id = 0;
+ ptype_info.num_ptypes = IDPF_MAX_PKT_TYPE;
+ args.ops = VIRTCHNL2_OP_GET_PTYPE_INFO;
+ args.in_args = (u8 *) &ptype_info;
+ args.in_args_size = sizeof (virtchnl2_get_ptype_info_t);
+ args.out_buffer = NULL;
+ args.out_size = 0;
+
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command VIRTCHNL2_OP_GET_PTYPE_INFO");
+
+ return error;
+}
+
+clib_error_t *
+idpf_get_pkt_type (vlib_main_t *vm, idpf_device_t *id)
+{
+ virtchnl2_get_ptype_info_t *ptype_info;
+ u16 ptype_recvd = 0, ptype_offset, i, j;
+ clib_error_t *error;
+
+ error = idpf_vc_query_ptype_info (vm, id);
+ if (error != 0)
+ return clib_error_return (0, "Fail to query packet type information");
+
+ ptype_info =
+ clib_mem_alloc_aligned (IDPF_DFLT_MBX_BUF_SIZE, CLIB_CACHE_LINE_BYTES);
+
+ while (ptype_recvd < IDPF_MAX_PKT_TYPE)
+ {
+ error = idpf_read_one_msg (vm, id, VIRTCHNL2_OP_GET_PTYPE_INFO,
+ (u8 *) ptype_info, IDPF_DFLT_MBX_BUF_SIZE);
+ if (error != 0)
+ {
+ error = clib_error_return (0, "Fail to get packet type information");
+ goto free_ptype_info;
+ }
+
+ ptype_recvd += ptype_info->num_ptypes;
+ ptype_offset =
+ sizeof (virtchnl2_get_ptype_info_t) - sizeof (virtchnl2_ptype_t);
+
+ for (i = 0; i < ptype_info->num_ptypes; i++)
+ {
+ bool is_inner = false, is_ip = false;
+ virtchnl2_ptype_t *ptype;
+ u32 proto_hdr = 0;
+
+ ptype = (virtchnl2_ptype_t *) ((u8 *) ptype_info + ptype_offset);
+ ptype_offset += IDPF_GET_PTYPE_SIZE (ptype);
+ if (ptype_offset > IDPF_DFLT_MBX_BUF_SIZE)
+ {
+ error =
+ clib_error_return (0, "Ptype offset exceeds mbx buffer size");
+ goto free_ptype_info;
+ }
+
+ if (ptype->ptype_id_10 == 0xFFFF)
+ goto free_ptype_info;
+
+ for (j = 0; j < ptype->proto_id_count; j++)
+ {
+ switch (ptype->proto_id[j])
+ {
+ case VIRTCHNL2_PROTO_HDR_GRE:
+ case VIRTCHNL2_PROTO_HDR_VXLAN:
+ proto_hdr &= ~IDPF_PTYPE_L4_MASK;
+ proto_hdr |= IDPF_PTYPE_TUNNEL_GRENAT;
+ is_inner = true;
+ break;
+ case VIRTCHNL2_PROTO_HDR_MAC:
+ if (is_inner)
+ {
+ proto_hdr &= ~IDPF_PTYPE_INNER_L2_MASK;
+ proto_hdr |= IDPF_PTYPE_INNER_L2_ETHER;
+ }
+ else
+ {
+ proto_hdr &= ~IDPF_PTYPE_L2_MASK;
+ proto_hdr |= IDPF_PTYPE_L2_ETHER;
+ }
+ break;
+ case VIRTCHNL2_PROTO_HDR_VLAN:
+ if (is_inner)
+ {
+ proto_hdr &= ~IDPF_PTYPE_INNER_L2_MASK;
+ proto_hdr |= IDPF_PTYPE_INNER_L2_ETHER_VLAN;
+ }
+ break;
+ case VIRTCHNL2_PROTO_HDR_PTP:
+ proto_hdr &= ~IDPF_PTYPE_L2_MASK;
+ proto_hdr |= IDPF_PTYPE_L2_ETHER_TIMESYNC;
+ break;
+ case VIRTCHNL2_PROTO_HDR_LLDP:
+ proto_hdr &= ~IDPF_PTYPE_L2_MASK;
+ proto_hdr |= IDPF_PTYPE_L2_ETHER_LLDP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_ARP:
+ proto_hdr &= ~IDPF_PTYPE_L2_MASK;
+ proto_hdr |= IDPF_PTYPE_L2_ETHER_ARP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_PPPOE:
+ proto_hdr &= ~IDPF_PTYPE_L2_MASK;
+ proto_hdr |= IDPF_PTYPE_L2_ETHER_PPPOE;
+ break;
+ case VIRTCHNL2_PROTO_HDR_IPV4:
+ if (!is_ip)
+ {
+ proto_hdr |= IDPF_PTYPE_L3_IPV4_EXT_UNKNOWN;
+ is_ip = true;
+ }
+ else
+ {
+ proto_hdr |= IDPF_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+ IDPF_PTYPE_TUNNEL_IP;
+ is_inner = true;
+ }
+ break;
+ case VIRTCHNL2_PROTO_HDR_IPV6:
+ if (!is_ip)
+ {
+ proto_hdr |= IDPF_PTYPE_L3_IPV6_EXT_UNKNOWN;
+ is_ip = true;
+ }
+ else
+ {
+ proto_hdr |= IDPF_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+ IDPF_PTYPE_TUNNEL_IP;
+ is_inner = true;
+ }
+ break;
+ case VIRTCHNL2_PROTO_HDR_IPV4_FRAG:
+ case VIRTCHNL2_PROTO_HDR_IPV6_FRAG:
+ if (is_inner)
+ proto_hdr |= IDPF_PTYPE_INNER_L4_FRAG;
+ else
+ proto_hdr |= IDPF_PTYPE_L4_FRAG;
+ break;
+ case VIRTCHNL2_PROTO_HDR_UDP:
+ if (is_inner)
+ proto_hdr |= IDPF_PTYPE_INNER_L4_UDP;
+ else
+ proto_hdr |= IDPF_PTYPE_L4_UDP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_TCP:
+ if (is_inner)
+ proto_hdr |= IDPF_PTYPE_INNER_L4_TCP;
+ else
+ proto_hdr |= IDPF_PTYPE_L4_TCP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_SCTP:
+ if (is_inner)
+ proto_hdr |= IDPF_PTYPE_INNER_L4_SCTP;
+ else
+ proto_hdr |= IDPF_PTYPE_L4_SCTP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_ICMP:
+ if (is_inner)
+ proto_hdr |= IDPF_PTYPE_INNER_L4_ICMP;
+ else
+ proto_hdr |= IDPF_PTYPE_L4_ICMP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_ICMPV6:
+ if (is_inner)
+ proto_hdr |= IDPF_PTYPE_INNER_L4_ICMP;
+ else
+ proto_hdr |= IDPF_PTYPE_L4_ICMP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_L2TPV2:
+ case VIRTCHNL2_PROTO_HDR_L2TPV2_CONTROL:
+ case VIRTCHNL2_PROTO_HDR_L2TPV3:
+ is_inner = true;
+ proto_hdr |= IDPF_PTYPE_TUNNEL_L2TP;
+ break;
+ case VIRTCHNL2_PROTO_HDR_NVGRE:
+ is_inner = true;
+ proto_hdr |= IDPF_PTYPE_TUNNEL_NVGRE;
+ break;
+ case VIRTCHNL2_PROTO_HDR_GTPC_TEID:
+ is_inner = true;
+ proto_hdr |= IDPF_PTYPE_TUNNEL_GTPC;
+ break;
+ case VIRTCHNL2_PROTO_HDR_GTPU:
+ case VIRTCHNL2_PROTO_HDR_GTPU_UL:
+ case VIRTCHNL2_PROTO_HDR_GTPU_DL:
+ is_inner = true;
+ proto_hdr |= IDPF_PTYPE_TUNNEL_GTPU;
+ break;
+ case VIRTCHNL2_PROTO_HDR_PAY:
+ case VIRTCHNL2_PROTO_HDR_IPV6_EH:
+ case VIRTCHNL2_PROTO_HDR_PRE_MAC:
+ case VIRTCHNL2_PROTO_HDR_POST_MAC:
+ case VIRTCHNL2_PROTO_HDR_ETHERTYPE:
+ case VIRTCHNL2_PROTO_HDR_SVLAN:
+ case VIRTCHNL2_PROTO_HDR_CVLAN:
+ case VIRTCHNL2_PROTO_HDR_MPLS:
+ case VIRTCHNL2_PROTO_HDR_MMPLS:
+ case VIRTCHNL2_PROTO_HDR_CTRL:
+ case VIRTCHNL2_PROTO_HDR_ECP:
+ case VIRTCHNL2_PROTO_HDR_EAPOL:
+ case VIRTCHNL2_PROTO_HDR_PPPOD:
+ case VIRTCHNL2_PROTO_HDR_IGMP:
+ case VIRTCHNL2_PROTO_HDR_AH:
+ case VIRTCHNL2_PROTO_HDR_ESP:
+ case VIRTCHNL2_PROTO_HDR_IKE:
+ case VIRTCHNL2_PROTO_HDR_NATT_KEEP:
+ case VIRTCHNL2_PROTO_HDR_GTP:
+ case VIRTCHNL2_PROTO_HDR_GTP_EH:
+ case VIRTCHNL2_PROTO_HDR_GTPCV2:
+ case VIRTCHNL2_PROTO_HDR_ECPRI:
+ case VIRTCHNL2_PROTO_HDR_VRRP:
+ case VIRTCHNL2_PROTO_HDR_OSPF:
+ case VIRTCHNL2_PROTO_HDR_TUN:
+ case VIRTCHNL2_PROTO_HDR_VXLAN_GPE:
+ case VIRTCHNL2_PROTO_HDR_GENEVE:
+ case VIRTCHNL2_PROTO_HDR_NSH:
+ case VIRTCHNL2_PROTO_HDR_QUIC:
+ case VIRTCHNL2_PROTO_HDR_PFCP:
+ case VIRTCHNL2_PROTO_HDR_PFCP_NODE:
+ case VIRTCHNL2_PROTO_HDR_PFCP_SESSION:
+ case VIRTCHNL2_PROTO_HDR_RTP:
+ case VIRTCHNL2_PROTO_HDR_NO_PROTO:
+ default:
+ continue;
+ }
+ id->ptype_tbl[ptype->ptype_id_10] = proto_hdr;
+ }
+ }
+ }
+
+free_ptype_info:
+ clib_mem_free (ptype_info);
+ clear_cmd (id);
+ return error;
+}
+
+static void
+idpf_reset_pf (idpf_device_t *id)
+{
+ u32 reg;
+
+ reg = idpf_reg_read (id, PFGEN_CTRL);
+ idpf_reg_write (id, PFGEN_CTRL, (reg | PFGEN_CTRL_PFSWR));
+}
+
+#define IDPF_RESET_WAIT_CNT 100
+clib_error_t *
+idpf_check_pf_reset_done (vlib_main_t *vm, idpf_device_t *id)
+{
+ u32 reg;
+ int i;
+
+ for (i = 0; i < IDPF_RESET_WAIT_CNT; i++)
+ {
+ reg = idpf_reg_read (id, PFGEN_RSTAT);
+ if (reg != 0xFFFFFFFF && (reg & PFGEN_RSTAT_PFR_STATE_M))
+ return 0;
+ vlib_process_suspend (vm, 1.0);
+ }
+
+ return clib_error_return (0, "pf reset time out");
+}
+
+void
+idpf_init_vport_req_info (idpf_device_t *id,
+ virtchnl2_create_vport_t *vport_info)
+{
+ vport_info->vport_type = VIRTCHNL2_VPORT_TYPE_DEFAULT;
+ if (id->txq_model == 1)
+ {
+ vport_info->txq_model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
+ vport_info->num_tx_q = IDPF_DEFAULT_TXQ_NUM;
+ vport_info->num_tx_complq =
+ IDPF_DEFAULT_TXQ_NUM * IDPF_TX_COMPLQ_PER_GRP;
+ }
+ else
+ {
+ vport_info->txq_model = VIRTCHNL2_QUEUE_MODEL_SINGLE;
+ vport_info->num_tx_q = IDPF_DEFAULT_TXQ_NUM;
+ vport_info->num_tx_complq = 0;
+ }
+ if (id->rxq_model == 1)
+ {
+ vport_info->rxq_model = VIRTCHNL2_QUEUE_MODEL_SPLIT;
+ vport_info->num_rx_q = IDPF_DEFAULT_RXQ_NUM;
+ vport_info->num_rx_bufq = IDPF_DEFAULT_RXQ_NUM * IDPF_RX_BUFQ_PER_GRP;
+ }
+ else
+ {
+ vport_info->rxq_model = VIRTCHNL2_QUEUE_MODEL_SINGLE;
+ vport_info->num_rx_q = IDPF_DEFAULT_RXQ_NUM;
+ vport_info->num_rx_bufq = 0;
+ }
+
+ return;
+}
+
+clib_error_t *
+idpf_vc_create_vport (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ virtchnl2_create_vport_t *vport_req_info)
+{
+ virtchnl2_create_vport_t vport_msg = { 0 };
+ idpf_cmd_info_t args;
+ clib_error_t *error;
+
+ vport_msg.vport_type = vport_req_info->vport_type;
+ vport_msg.txq_model = vport_req_info->txq_model;
+ vport_msg.rxq_model = vport_req_info->rxq_model;
+ vport_msg.num_tx_q = vport_req_info->num_tx_q;
+ vport_msg.num_tx_complq = vport_req_info->num_tx_complq;
+ vport_msg.num_rx_q = vport_req_info->num_rx_q;
+ vport_msg.num_rx_bufq = vport_req_info->num_rx_bufq;
+
+ clib_memset (&args, 0, sizeof (args));
+ args.ops = VIRTCHNL2_OP_CREATE_VPORT;
+ args.in_args = (u8 *) &vport_msg;
+ args.in_args_size = sizeof (vport_msg);
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command of VIRTCHNL2_OP_CREATE_VPORT");
+
+ clib_memcpy (vport->vport_info, args.out_buffer, IDPF_DFLT_MBX_BUF_SIZE);
+ return error;
+}
+
+clib_error_t *
+idpf_vc_destroy_vport (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport)
+{
+ virtchnl2_vport_t vc_vport;
+ idpf_cmd_info_t args;
+ clib_error_t *error = 0;
+
+ vc_vport.vport_id = vport->vport_id;
+
+ clib_memset (&args, 0, sizeof (args));
+ args.ops = VIRTCHNL2_OP_DESTROY_VPORT;
+ args.in_args = (u8 *) &vc_vport;
+ args.in_args_size = sizeof (vc_vport);
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command of VIRTCHNL2_OP_DESTROY_VPORT");
+
+ return error;
+}
+
+clib_error_t *
+idpf_init_vport (idpf_device_t *id, idpf_vport_t *vport)
+{
+ virtchnl2_create_vport_t *vport_info = vport->vport_info;
+ int i, type;
+
+ vport->vport_id = vport_info->vport_id;
+ vport->txq_model = vport_info->txq_model;
+ vport->rxq_model = vport_info->rxq_model;
+ vport->num_tx_q = vport_info->num_tx_q;
+ vport->num_tx_complq = vport_info->num_tx_complq;
+ vport->num_rx_q = vport_info->num_rx_q;
+ vport->num_rx_bufq = vport_info->num_rx_bufq;
+ vport->max_mtu = vport_info->max_mtu;
+ clib_memcpy (vport->default_mac_addr, vport_info->default_mac_addr,
+ IDPF_ETH_ALEN);
+
+ for (i = 0; i < vport_info->chunks.num_chunks; i++)
+ {
+ type = vport_info->chunks.chunks[i].type;
+ switch (type)
+ {
+ case VIRTCHNL2_QUEUE_TYPE_TX:
+ vport->chunks_info.tx_start_qid =
+ vport_info->chunks.chunks[i].start_queue_id;
+ vport->chunks_info.tx_qtail_start =
+ vport_info->chunks.chunks[i].qtail_reg_start;
+ vport->chunks_info.tx_qtail_spacing =
+ vport_info->chunks.chunks[i].qtail_reg_spacing;
+ break;
+ case VIRTCHNL2_QUEUE_TYPE_RX:
+ vport->chunks_info.rx_start_qid =
+ vport_info->chunks.chunks[i].start_queue_id;
+ vport->chunks_info.rx_qtail_start =
+ vport_info->chunks.chunks[i].qtail_reg_start;
+ vport->chunks_info.rx_qtail_spacing =
+ vport_info->chunks.chunks[i].qtail_reg_spacing;
+ break;
+ case VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION:
+ vport->chunks_info.tx_compl_start_qid =
+ vport_info->chunks.chunks[i].start_queue_id;
+ vport->chunks_info.tx_compl_qtail_start =
+ vport_info->chunks.chunks[i].qtail_reg_start;
+ vport->chunks_info.tx_compl_qtail_spacing =
+ vport_info->chunks.chunks[i].qtail_reg_spacing;
+ break;
+ case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER:
+ vport->chunks_info.rx_buf_start_qid =
+ vport_info->chunks.chunks[i].start_queue_id;
+ vport->chunks_info.rx_buf_qtail_start =
+ vport_info->chunks.chunks[i].qtail_reg_start;
+ vport->chunks_info.rx_buf_qtail_spacing =
+ vport_info->chunks.chunks[i].qtail_reg_spacing;
+ break;
+ default:
+ return clib_error_return (0, "Unsupported queue type");
+ }
+ }
+
+ return 0;
+}
+
+clib_error_t *
+idpf_ena_dis_vport (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport,
+ bool enable)
+{
+ virtchnl2_vport_t vc_vport;
+ idpf_cmd_info_t args;
+ clib_error_t *error;
+
+ vc_vport.vport_id = vport->vport_id;
+ args.ops = enable ? VIRTCHNL2_OP_ENABLE_VPORT : VIRTCHNL2_OP_DISABLE_VPORT;
+ args.in_args = (u8 *) &vc_vport;
+ args.in_args_size = sizeof (vc_vport);
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ {
+ return clib_error_return (
+ 0, "Failed to execute command of VIRTCHNL2_OP_%s_VPORT",
+ enable ? "ENABLE" : "DISABLE");
+ }
+
+ return error;
+}
+
+clib_error_t *
+idpf_dealloc_vectors (vlib_main_t *vm, idpf_device_t *id, idpf_vport_t *vport)
+{
+ virtchnl2_alloc_vectors_t *alloc_vec;
+ virtchnl2_vector_chunks_t *vcs;
+ idpf_cmd_info_t args;
+ clib_error_t *error;
+ int len;
+
+ alloc_vec = vport->recv_vectors;
+ vcs = &alloc_vec->vchunks;
+
+ len = sizeof (virtchnl2_vector_chunks_t) +
+ (vcs->num_vchunks - 1) * sizeof (virtchnl2_vector_chunk_t);
+
+ args.ops = VIRTCHNL2_OP_DEALLOC_VECTORS;
+ args.in_args = (u8 *) vcs;
+ args.in_args_size = len;
+ args.out_buffer = id->mbx_resp;
+ args.out_size = IDPF_DFLT_MBX_BUF_SIZE;
+ error = idpf_execute_vc_cmd (vm, id, &args);
+ if (error != 0)
+ return clib_error_return (
+ 0, "Failed to execute command VIRTCHNL2_OP_DEALLOC_VECTORS");
+
+ return error;
+}
+
+clib_error_t *
+idpf_dev_vport_init (vlib_main_t *vm, idpf_device_t *id,
+ idpf_vport_param_t *param)
+{
+ idpf_vport_t *vport;
+ virtchnl2_create_vport_t vport_req_info = { 0 };
+ clib_error_t *error = 0;
+
+ vport = clib_mem_alloc (sizeof (idpf_vport_t));
+ clib_memset (vport, 0, sizeof (idpf_vport_t));
+
+ vport->vport_info = clib_mem_alloc (IDPF_DFLT_MBX_BUF_SIZE);
+ clib_memset (vport->vport_info, 0, IDPF_DFLT_MBX_BUF_SIZE);
+
+ id->vports[param->idx] = vport;
+ vport->id = id;
+ vport->idx = param->idx;
+
+ idpf_init_vport_req_info (id, &vport_req_info);
+
+ error = idpf_vc_create_vport (vm, id, vport, &vport_req_info);
+ if (error != 0)
+ {
+ idpf_log_err (id, "Failed to create vport.");
+ goto err_create_vport;
+ }
+
+ error = idpf_init_vport (id, vport);
+ if (error != 0)
+ {
+ idpf_log_err (id, "Failed to init vports.");
+ goto err_init_vport;
+ }
+
+ id->vports[param->idx] = vport;
+
+ clib_memcpy (id->hwaddr, vport->default_mac_addr, IDPF_ETH_ALEN);
+
+ return error;
+
+err_init_vport:
+ id->vports[param->idx] = NULL; /* reset */
+ idpf_vc_destroy_vport (vm, id, vport);
+err_create_vport:
+ clib_mem_free (vport->vport_info);
+ clib_mem_free (vport);
+ return error;
+}
+
+/* dev configure */
+clib_error_t *
+idpf_device_init (vlib_main_t *vm, idpf_main_t *im, idpf_device_t *id,
+ idpf_create_if_args_t *args)
+{
+ idpf_vport_t *vport;
+ idpf_vport_param_t vport_param = { 0 };
+ virtchnl2_get_capabilities_t caps = { 0 };
+ clib_error_t *error;
+ u16 rxq_num, txq_num;
+ int i;
+
+ idpf_reset_pf (id);
+ error = idpf_check_pf_reset_done (vm, id);
+ if (error)
+ return error;
+
+ /*
+ * Init mailbox configuration
+ */
+ if ((error = idpf_mbx_init (vm, id)))
+ return error;
+
+ /*
+ * Check API version
+ */
+ error = idpf_op_version (vm, id);
+ if (error)
+ return error;
+
+ /*
+ * Get pkt type table
+ */
+ error = idpf_get_pkt_type (vm, id);
+ if (error)
+ return error;
+
+ /* Get idpf capability */
+ error = idpf_op_get_caps (vm, id, &caps);
+ if (error)
+ return error;
+
+ rxq_num = args->rxq_num ? args->rxq_num : 1;
+ txq_num = args->txq_num ? args->txq_num : vlib_get_n_threads ();
+
+ /* Sync capabilities */
+ id->n_rx_queues = rxq_num;
+ id->n_tx_queues = txq_num;
+ id->csum_caps = caps.csum_caps;
+ id->seg_caps = caps.seg_caps;
+ id->hsplit_caps = caps.hsplit_caps;
+ id->rsc_caps = caps.rsc_caps;
+ id->rss_caps = caps.rss_caps;
+ id->other_caps = caps.other_caps;
+ id->max_rx_q = caps.max_rx_q;
+ id->max_tx_q = caps.max_tx_q;
+ id->max_rx_bufq = caps.max_rx_bufq;
+ id->max_tx_complq = caps.max_tx_complq;
+ id->max_sriov_vfs = caps.max_sriov_vfs;
+ id->max_vports = caps.max_vports;
+ id->default_num_vports = caps.default_num_vports;
+
+ id->vports = clib_mem_alloc (id->max_vports * sizeof (*id->vports));
+ id->max_rxq_per_msg =
+ (IDPF_DFLT_MBX_BUF_SIZE - sizeof (virtchnl2_config_rx_queues_t)) /
+ sizeof (virtchnl2_rxq_info_t);
+ id->max_txq_per_msg =
+ (IDPF_DFLT_MBX_BUF_SIZE - sizeof (virtchnl2_config_tx_queues_t)) /
+ sizeof (virtchnl2_txq_info_t);
+
+ id->cur_vport_idx = 0;
+ id->cur_vports = 0;
+ id->cur_vport_nb = 0;
+
+ if (!args->rxq_single)
+ id->rxq_model = 1;
+ if (!args->txq_single)
+ id->txq_model = 1;
+
+ /* Init and enable vports */
+ if (args->req_vport_nb == 1)
+ {
+ vport_param.id = id;
+ vport_param.idx = 0;
+ error = idpf_dev_vport_init (vm, id, &vport_param);
+ if (error)
+ return error;
+ vport = id->vports[vport_param.idx];
+ error = idpf_ena_dis_vport (vm, id, vport, true);
+ if (error)
+ return error;
+ id->cur_vports |= 1ULL << vport_param.idx;
+ id->cur_vport_nb++;
+ id->cur_vport_idx++;
+ error = idpf_queue_init (vm, id, vport, args);
+ if (error)
+ return error;
+ }
+ else
+ {
+ for (i = 0; i < args->req_vport_nb; i++)
+ {
+ vport_param.id = id;
+ vport_param.idx = i;
+ if ((error = idpf_dev_vport_init (vm, id, &vport_param)))
+ return error;
+ vport = id->vports[vport_param.idx];
+ error = idpf_ena_dis_vport (vm, id, vport, true);
+ if (error)
+ return error;
+ id->cur_vports |= 1ULL << vport_param.idx;
+ id->cur_vport_nb++;
+ id->cur_vport_idx++;
+ error = idpf_queue_init (vm, id, vport, args);
+ if (error)
+ return error;
+ }
+ }
+
+ id->flags |= IDPF_DEVICE_F_INITIALIZED;
+ return error;
+}
+
+static u32
+idpf_flag_change (vnet_main_t *vnm, vnet_hw_interface_t *hw, u32 flags)
+{
+ idpf_device_t *id = idpf_get_device (hw->dev_instance);
+
+ switch (flags)
+ {
+ case ETHERNET_INTERFACE_FLAG_DEFAULT_L3:
+ id->flags &= ~IDPF_DEVICE_F_PROMISC;
+ break;
+ case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL:
+ id->flags |= IDPF_DEVICE_F_PROMISC;
+ break;
+ default:
+ return ~0;
+ }
+
+ return 0;
+}
+
+void
+idpf_delete_if (vlib_main_t *vm, idpf_device_t *id, int with_barrier)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ idpf_main_t *im = &idpf_main;
+ idpf_vport_t *vport;
+ int i;
+ u32 dev_instance;
+
+ id->flags &= ~IDPF_DEVICE_F_ADMIN_UP;
+
+ if (id->hw_if_index)
+ {
+ if (with_barrier)
+ vlib_worker_thread_barrier_sync (vm);
+ vnet_hw_interface_set_flags (vnm, id->hw_if_index, 0);
+ ethernet_delete_interface (vnm, id->hw_if_index);
+ if (with_barrier)
+ vlib_worker_thread_barrier_release (vm);
+ }
+
+ for (i = 0; i < id->cur_vport_nb; i++)
+ {
+ vport = id->vports[i];
+ if (vport->recv_vectors != NULL)
+ idpf_dealloc_vectors (vm, id, vport);
+ }
+
+ vlib_pci_device_close (vm, id->pci_dev_handle);
+
+ vlib_physmem_free (vm, id->asq);
+ vlib_physmem_free (vm, id->arq);
+
+ for (i = 0; i < id->cur_vport_nb; i++)
+ {
+ vport = id->vports[i];
+ vec_foreach_index (i, vport->rxqs)
+ {
+ idpf_rxq_t *rxq = vec_elt_at_index (vport->rxqs, i);
+ vlib_physmem_free (vm, (void *) rxq->descs);
+ if (rxq->n_enqueued)
+ vlib_buffer_free_from_ring (vm, rxq->bufs, rxq->next, rxq->size,
+ rxq->n_enqueued);
+ vec_free (rxq->bufs);
+ }
+
+ vec_free (vport->rxqs);
+
+ vec_foreach_index (i, vport->txqs)
+ {
+ idpf_txq_t *txq = vec_elt_at_index (vport->txqs, i);
+ vlib_physmem_free (vm, (void *) txq->descs);
+ if (txq->n_enqueued)
+ {
+ u16 first = (txq->next - txq->n_enqueued) & (txq->size - 1);
+ vlib_buffer_free_from_ring (vm, txq->bufs, first, txq->size,
+ txq->n_enqueued);
+ }
+ vec_free (txq->ph_bufs);
+ vec_free (txq->bufs);
+ clib_ring_free (txq->rs_slots);
+ vec_free (txq->tmp_bufs);
+ vec_free (txq->tmp_descs);
+ clib_spinlock_free (&txq->lock);
+ }
+ vec_free (vport->txqs);
+ }
+
+ vec_free (id->name);
+
+ clib_error_free (id->error);
+ dev_instance = id->dev_instance;
+ clib_mem_free (id->mbx_resp);
+ clib_memset (id, 0, sizeof (*id));
+ pool_put_index (im->devices, dev_instance);
+ clib_mem_free (id);
+}
+
+static u8
+idpf_validate_queue_size (idpf_create_if_args_t *args)
+{
+ clib_error_t *error = 0;
+
+ args->rxq_size = (args->rxq_size == 0) ? IDPF_RXQ_SZ : args->rxq_size;
+ args->txq_size = (args->txq_size == 0) ? IDPF_TXQ_SZ : args->txq_size;
+
+ if ((args->rxq_size > IDPF_QUEUE_SZ_MAX) ||
+ (args->txq_size > IDPF_QUEUE_SZ_MAX))
+ {
+ args->rv = VNET_API_ERROR_INVALID_VALUE;
+ args->error = clib_error_return (
+ error, "queue size must not be greater than %u", IDPF_QUEUE_SZ_MAX);
+ return 1;
+ }
+ if ((args->rxq_size < IDPF_QUEUE_SZ_MIN) ||
+ (args->txq_size < IDPF_QUEUE_SZ_MIN))
+ {
+ args->rv = VNET_API_ERROR_INVALID_VALUE;
+ args->error = clib_error_return (
+ error, "queue size must not be smaller than %u", IDPF_QUEUE_SZ_MIN);
+ return 1;
+ }
+ if ((args->rxq_size & (args->rxq_size - 1)) ||
+ (args->txq_size & (args->txq_size - 1)))
+ {
+ args->rv = VNET_API_ERROR_INVALID_VALUE;
+ args->error =
+ clib_error_return (error, "queue size must be a power of two");
+ return 1;
+ }
+ return 0;
+}
+
+void
+idpf_process_one_device (vlib_main_t *vm, idpf_device_t *id, int is_irq)
+{
+ /* placeholder */
+ return;
+}
+
+static uword
+idpf_process (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
+{
+ idpf_main_t *im = &idpf_main;
+ uword *event_data = 0, event_type;
+ int enabled = 0, irq;
+ f64 last_run_duration = 0;
+ f64 last_periodic_time = 0;
+ idpf_device_t **dev_pointers = 0;
+ u32 i;
+
+ while (1)
+ {
+ if (enabled)
+ vlib_process_wait_for_event_or_clock (vm, 5.0 - last_run_duration);
+ else
+ vlib_process_wait_for_event (vm);
+
+ event_type = vlib_process_get_events (vm, &event_data);
+ irq = 0;
+
+ switch (event_type)
+ {
+ case ~0:
+ last_periodic_time = vlib_time_now (vm);
+ break;
+ case IDPF_PROCESS_EVENT_START:
+ enabled = 1;
+ break;
+ case IDPF_PROCESS_EVENT_DELETE_IF:
+ for (int i = 0; i < vec_len (event_data); i++)
+ {
+ idpf_device_t *id = idpf_get_device (event_data[i]);
+ idpf_delete_if (vm, id, /* with_barrier */ 1);
+ }
+ if (pool_elts (im->devices) < 1)
+ enabled = 0;
+ break;
+ case IDPF_PROCESS_EVENT_AQ_INT:
+ irq = 1;
+ break;
+
+ default:
+ ASSERT (0);
+ }
+
+ vec_reset_length (event_data);
+
+ if (enabled == 0)
+ continue;
+
+ /* create local list of device pointers as device pool may grow
+ * during suspend */
+ vec_reset_length (dev_pointers);
+
+ pool_foreach_index (i, im->devices)
+ {
+ vec_add1 (dev_pointers, idpf_get_device (i));
+ }
+
+ vec_foreach_index (i, dev_pointers)
+ {
+ idpf_process_one_device (vm, dev_pointers[i], irq);
+ };
+
+ last_run_duration = vlib_time_now (vm) - last_periodic_time;
+ }
+ return 0;
+}
+
+VLIB_REGISTER_NODE (idpf_process_node) = {
+ .function = idpf_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "idpf-process",
+};
+
+void
+idpf_create_if (vlib_main_t *vm, idpf_create_if_args_t *args)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_eth_interface_registration_t eir = {};
+ idpf_main_t *im = &idpf_main;
+ idpf_device_t *id, **idp;
+ vlib_pci_dev_handle_t h;
+ clib_error_t *error = 0;
+ int i, j, v;
+
+ /* check input args */
+ if (idpf_validate_queue_size (args) != 0)
+ return;
+
+ pool_foreach (idp, im->devices)
+ {
+ if ((*idp)->pci_addr.as_u32 == args->addr.as_u32)
+ {
+ args->rv = VNET_API_ERROR_ADDRESS_IN_USE;
+ args->error =
+ clib_error_return (error, "%U: %s", format_vlib_pci_addr,
+ &args->addr, "pci address in use");
+ return;
+ }
+ }
+
+ pool_get (im->devices, idp);
+ idp[0] = id =
+ clib_mem_alloc_aligned (sizeof (idpf_device_t), CLIB_CACHE_LINE_BYTES);
+ clib_memset (id, 0, sizeof (idpf_device_t));
+ id->mbx_resp = clib_mem_alloc (IDPF_DFLT_MBX_BUF_SIZE);
+ id->dev_instance = idp - im->devices;
+ id->per_interface_next_index = ~0;
+ id->name = vec_dup (args->name);
+
+ if ((error =
+ vlib_pci_device_open (vm, &args->addr, idpf_pci_device_ids, &h)))
+ {
+ pool_put (im->devices, idp);
+ clib_mem_free (id);
+ args->rv = VNET_API_ERROR_INVALID_INTERFACE;
+ args->error = clib_error_return (error, "pci-addr %U",
+ format_vlib_pci_addr, &args->addr);
+ return;
+ }
+ id->pci_dev_handle = h;
+ id->pci_addr = args->addr;
+ id->numa_node = vlib_pci_get_numa_node (vm, h);
+
+ vlib_pci_set_private_data (vm, h, id->dev_instance);
+
+ if ((error = vlib_pci_bus_master_enable (vm, h)))
+ goto error;
+
+ if ((error = vlib_pci_map_region (vm, h, 0, &id->bar0)))
+ goto error;
+
+ if (vlib_pci_supports_virtual_addr_dma (vm, h))
+ id->flags |= IDPF_DEVICE_F_VA_DMA;
+
+ if ((error = idpf_device_init (vm, im, id, args)))
+ goto error;
+
+ /* create interface */
+ eir.dev_class_index = idpf_device_class.index;
+ eir.dev_instance = id->dev_instance;
+ eir.address = id->hwaddr;
+ eir.cb.flag_change = idpf_flag_change;
+ id->hw_if_index = vnet_eth_register_interface (vnm, &eir);
+
+ ethernet_set_flags (vnm, id->hw_if_index,
+ ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
+
+ vnet_sw_interface_t *sw = vnet_get_hw_sw_interface (vnm, id->hw_if_index);
+ args->sw_if_index = id->sw_if_index = sw->sw_if_index;
+
+ vnet_hw_if_set_caps (vnm, id->hw_if_index,
+ VNET_HW_IF_CAP_INT_MODE | VNET_HW_IF_CAP_MAC_FILTER |
+ VNET_HW_IF_CAP_TX_CKSUM | VNET_HW_IF_CAP_TCP_GSO);
+
+ for (v = 0; v < id->cur_vport_nb; v++)
+ {
+ for (j = 0; j < id->n_rx_queues; j++)
+ {
+ u32 qi;
+ i = v * id->n_rx_queues + j;
+ qi = vnet_hw_if_register_rx_queue (vnm, id->hw_if_index, i,
+ VNET_HW_IF_RXQ_THREAD_ANY);
+ id->vports[v]->rxqs[j].queue_index = qi;
+ }
+ for (j = 0; j < id->n_tx_queues; j++)
+ {
+ u32 qi;
+ i = v * id->n_tx_queues + j;
+ qi = vnet_hw_if_register_tx_queue (vnm, id->hw_if_index, i);
+ id->vports[v]->txqs[j].queue_index = qi;
+ }
+ }
+
+ for (v = 0; v < id->cur_vport_nb; v++)
+ for (i = 0; i < vlib_get_n_threads (); i++)
+ {
+ u32 qi = id->vports[v]->txqs[i % id->n_tx_queues].queue_index;
+ vnet_hw_if_tx_queue_assign_thread (vnm, qi, i);
+ }
+
+ vnet_hw_if_update_runtime_data (vnm, id->hw_if_index);
+
+ if (pool_elts (im->devices) == 1)
+ vlib_process_signal_event (vm, idpf_process_node.index,
+ IDPF_PROCESS_EVENT_START, 0);
+
+ return;
+
+error:
+ idpf_delete_if (vm, id, /* with_barrier */ 0);
+ args->rv = VNET_API_ERROR_INVALID_INTERFACE;
+ args->error = clib_error_return (error, "pci-addr %U", format_vlib_pci_addr,
+ &args->addr);
+ idpf_log_err (id, "error: %U", format_clib_error, args->error);
+}
+
+void *
+idpf_alloc_dma_mem (vlib_main_t *vm, idpf_device_t *id, idpf_dma_mem_t *mem,
+ u64 size)
+{
+ void *mz = NULL;
+ vlib_pci_dev_handle_t h = id->pci_dev_handle;
+
+ if (!mem)
+ return NULL;
+
+ /* Fixme */
+ mz = vlib_physmem_alloc_aligned_on_numa (vm, size, CLIB_CACHE_LINE_BYTES,
+ id->numa_node);
+ if (!mz)
+ return NULL;
+ if (vlib_pci_map_dma (vm, h, mz))
+ return NULL;
+
+ mem->size = size;
+ if (id->flags & IDPF_DEVICE_F_VA_DMA)
+ {
+ mem->va = mz;
+ clib_memset (mem->va, 0, size);
+ }
+ else
+ {
+ mem->va = NULL;
+ }
+ mem->pa = idpf_dma_addr (vm, id, mz);
+
+ return mem->va;
+}
+
+void
+idpf_free_dma_mem (idpf_device_t *id, idpf_dma_mem_t *mem)
+{
+ mem->size = 0;
+ mem->va = NULL;
+ mem->pa = 0;
+
+ clib_mem_free (mem);
+}
+
+static clib_error_t *
+idpf_interface_admin_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
+{
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ idpf_device_t *id = idpf_get_device (hi->dev_instance);
+ uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ if (id->flags & IDPF_DEVICE_F_ERROR)
+ return clib_error_return (0, "device is in error state");
+
+ if (is_up)
+ {
+ vnet_hw_interface_set_flags (vnm, id->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ id->flags |= IDPF_DEVICE_F_ADMIN_UP;
+ }
+ else
+ {
+ vnet_hw_interface_set_flags (vnm, id->hw_if_index, 0);
+ id->flags &= ~IDPF_DEVICE_F_ADMIN_UP;
+ }
+ return 0;
+}
+
+VNET_DEVICE_CLASS (idpf_device_class, ) = {
+ .name = "Infrastructure Data Path Function (IDPF) interface",
+ .format_device_name = format_idpf_device_name,
+ .admin_up_down_function = idpf_interface_admin_up_down,
+};
+
+clib_error_t *
+idpf_init (vlib_main_t *vm)
+{
+ idpf_main_t *im = &idpf_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ vec_validate_aligned (im->per_thread_data, tm->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (idpf_init) = {
+ .runs_after = VLIB_INITS ("pci_bus_init"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/idpf/format.c b/src/plugins/idpf/format.c
new file mode 100644
index 00000000000..86a4b884286
--- /dev/null
+++ b/src/plugins/idpf/format.c
@@ -0,0 +1,77 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <idpf/idpf.h>
+
+u8 *
+format_idpf_device_name (u8 *s, va_list *args)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 i = va_arg (*args, u32);
+ idpf_device_t *id = idpf_get_device (i);
+ vlib_pci_addr_t *addr = vlib_pci_get_addr (vm, id->pci_dev_handle);
+
+ if (id->name)
+ return format (s, "%s", id->name);
+
+ s = format (s, "idpf-%x/%x/%x/%x", addr->domain, addr->bus, addr->slot,
+ addr->function);
+ return s;
+}
+
+u8 *
+format_idpf_device_flags (u8 *s, va_list *args)
+{
+ idpf_device_t *id = va_arg (*args, idpf_device_t *);
+ u8 *t = 0;
+
+#define _(a, b, c) \
+ if (id->flags & (1 << a)) \
+ t = format (t, "%s%s", t ? " " : "", c);
+ foreach_idpf_device_flags
+#undef _
+ s = format (s, "%v", t);
+ vec_free (t);
+ return s;
+}
+
+u8 *
+format_idpf_checksum_cap_flags (u8 *s, va_list *args)
+{
+ u32 flags = va_arg (*args, u32);
+ int not_first = 0;
+
+ char *strs[32] = {
+#define _(a, b, c) [a] = c,
+ foreach_idpf_checksum_cap_flag
+#undef _
+ };
+
+ for (int i = 0; i < 32; i++)
+ {
+ if ((flags & (1 << i)) == 0)
+ continue;
+ if (not_first)
+ s = format (s, " ");
+ if (strs[i])
+ s = format (s, "%s", strs[i]);
+ else
+ s = format (s, "unknown(%u)", i);
+ not_first = 1;
+ }
+ return s;
+}
diff --git a/src/plugins/idpf/idpf.api b/src/plugins/idpf/idpf.api
new file mode 100644
index 00000000000..5d02957ac38
--- /dev/null
+++ b/src/plugins/idpf/idpf.api
@@ -0,0 +1,80 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+option version = "1.0.0";
+import "vnet/interface_types.api";
+
+/** \brief
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param pci_addr - pci address as unsigned 32bit integer:
+ 0-15 domain, 16-23 bus, 24-28 slot, 29-31 function
+ ddddddddddddddddbbbbbbbbsssssfff
+ @param rxq_num - number of receive queues
+ @param rxq_size - receive queue size
+ @param txq_size - transmit queue size
+*/
+
+define idpf_create
+{
+ u32 client_index;
+ u32 context;
+
+ u32 pci_addr;
+ u16 rxq_single;
+ u16 txq_single;
+ u16 rxq_num;
+ u16 txq_num;
+ u16 rxq_size;
+ u16 txq_size;
+ u16 req_vport_nb;
+ option vat_help = "<pci-address> [vport-num <size>] [rx-single <size>] [tx-single <size>] [rxq-num <size>] [txq-num <size>] [rxq-size <size>] [txq-size <size>]";
+};
+
+/** \brief
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+ @param sw_if_index - software index for the new idpf interface
+*/
+
+define idpf_create_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+/** \brief
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface index
+*/
+
+autoreply define idpf_delete
+{
+ u32 client_index;
+ u32 context;
+
+ vl_api_interface_index_t sw_if_index;
+ option vat_help = "<sw_if_index>";
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/idpf/idpf.h b/src/plugins/idpf/idpf.h
new file mode 100644
index 00000000000..0bac575d4b4
--- /dev/null
+++ b/src/plugins/idpf/idpf.h
@@ -0,0 +1,929 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef _IDPF_H_
+#define _IDPF_H_
+
+#include <vlib/vlib.h>
+#include <vppinfra/ring.h>
+#include <vlib/unix/unix.h>
+#include <vlib/pci/pci.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
+
+#include <vppinfra/types.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/lock.h>
+
+#include <vlib/log.h>
+#include <vlib/pci/pci.h>
+
+#include <vnet/interface.h>
+
+#include <vnet/devices/devices.h>
+#include <vnet/flow/flow.h>
+
+#include <idpf/virtchnl2.h>
+#include <sys/queue.h>
+
+#define BIT(a) (1UL << (a))
+
+/*
+ * LAN PF register
+ */
+#define MAKEMASK(m, s) ((m) << (s))
+
+/* Receive queues */
+#define PF_QRX_BASE 0x00000000
+#define PF_QRX_TAIL(_QRX) (PF_QRX_BASE + (((_QRX) *0x1000)))
+#define PF_QRX_BUFFQ_BASE 0x03000000
+#define PF_QRX_BUFFQ_TAIL(_QRX) (PF_QRX_BUFFQ_BASE + (((_QRX) *0x1000)))
+
+/* Transmit queues */
+#define PF_QTX_BASE 0x05000000
+#define PF_QTX_COMM_DBELL(_DBQM) (PF_QTX_BASE + ((_DBQM) *0x1000))
+
+/* Control(PF Mailbox) Queue */
+#define PF_FW_BASE 0x08400000
+
+#define PF_FW_ARQBAL (PF_FW_BASE)
+#define PF_FW_ARQBAH (PF_FW_BASE + 0x4)
+#define PF_FW_ARQLEN (PF_FW_BASE + 0x8)
+#define PF_FW_ARQLEN_ARQLEN_S 0
+#define PF_FW_ARQLEN_ARQLEN_M MAKEMASK (0x1FFF, PF_FW_ARQLEN_ARQLEN_S)
+#define PF_FW_ARQLEN_ARQVFE_S 28
+#define PF_FW_ARQLEN_ARQVFE_M BIT (PF_FW_ARQLEN_ARQVFE_S)
+#define PF_FW_ARQLEN_ARQOVFL_S 29
+#define PF_FW_ARQLEN_ARQOVFL_M BIT (PF_FW_ARQLEN_ARQOVFL_S)
+#define PF_FW_ARQLEN_ARQCRIT_S 30
+#define PF_FW_ARQLEN_ARQCRIT_M BIT (PF_FW_ARQLEN_ARQCRIT_S)
+#define PF_FW_ARQLEN_ARQENABLE_S 31
+#define PF_FW_ARQLEN_ARQENABLE_M BIT (PF_FW_ARQLEN_ARQENABLE_S)
+#define PF_FW_ARQH (PF_FW_BASE + 0xC)
+#define PF_FW_ARQH_ARQH_S 0
+#define PF_FW_ARQH_ARQH_M MAKEMASK (0x1FFF, PF_FW_ARQH_ARQH_S)
+#define PF_FW_ARQT (PF_FW_BASE + 0x10)
+
+#define PF_FW_ATQBAL (PF_FW_BASE + 0x14)
+#define PF_FW_ATQBAH (PF_FW_BASE + 0x18)
+#define PF_FW_ATQLEN (PF_FW_BASE + 0x1C)
+#define PF_FW_ATQLEN_ATQLEN_S 0
+#define PF_FW_ATQLEN_ATQLEN_M MAKEMASK (0x3FF, PF_FW_ATQLEN_ATQLEN_S)
+#define PF_FW_ATQLEN_ATQVFE_S 28
+#define PF_FW_ATQLEN_ATQVFE_M BIT (PF_FW_ATQLEN_ATQVFE_S)
+#define PF_FW_ATQLEN_ATQOVFL_S 29
+#define PF_FW_ATQLEN_ATQOVFL_M BIT (PF_FW_ATQLEN_ATQOVFL_S)
+#define PF_FW_ATQLEN_ATQCRIT_S 30
+#define PF_FW_ATQLEN_ATQCRIT_M BIT (PF_FW_ATQLEN_ATQCRIT_S)
+#define PF_FW_ATQLEN_ATQENABLE_S 31
+#define PF_FW_ATQLEN_ATQENABLE_M BIT (PF_FW_ATQLEN_ATQENABLE_S)
+#define PF_FW_ATQH (PF_FW_BASE + 0x20)
+#define PF_FW_ATQH_ATQH_S 0
+#define PF_FW_ATQH_ATQH_M MAKEMASK (0x3FF, PF_FW_ATQH_ATQH_S)
+#define PF_FW_ATQT (PF_FW_BASE + 0x24)
+
+/* Interrupts */
+#define PF_GLINT_BASE 0x08900000
+#define PF_GLINT_DYN_CTL_ITR_INDX_S 3
+#define PF_GLINT_DYN_CTL_ITR_INDX_M MAKEMASK (0x3, PF_GLINT_DYN_CTL_ITR_INDX_S)
+#define PF_GLINT_DYN_CTL_INTERVAL_S 5
+#define PF_GLINT_DYN_CTL_INTERVAL_M BIT (PF_GLINT_DYN_CTL_INTERVAL_S)
+#define PF_GLINT_DYN_CTL_WB_ON_ITR_S 30
+#define PF_GLINT_DYN_CTL_WB_ON_ITR_M BIT (PF_GLINT_DYN_CTL_WB_ON_ITR_S)
+
+/* Generic registers */
+#define PFGEN_RSTAT 0x08407008 /* PFR Status */
+#define PFGEN_RSTAT_PFR_STATE_S 0
+#define PFGEN_RSTAT_PFR_STATE_M MAKEMASK (0x3, PFGEN_RSTAT_PFR_STATE_S)
+#define PFGEN_CTRL 0x0840700C
+#define PFGEN_CTRL_PFSWR BIT (0)
+
+#define IDPF_CTLQ_ID -1
+#define IDPF_CTLQ_LEN 64
+#define IDPF_DFLT_MBX_BUF_SIZE 4096
+
+#define IDPF_MAX_NUM_QUEUES 256
+#define IDPF_MIN_BUF_SIZE 1024
+#define IDPF_MAX_FRAME_SIZE 9728
+#define IDPF_MAX_PKT_TYPE 1024
+#define IDPF_QUEUE_SZ_MAX 4096
+#define IDPF_QUEUE_SZ_MIN 64
+
+#define IDPF_RESET_SUSPEND_TIME 20e-3
+#define IDPF_RESET_MAX_WAIT_TIME 1
+
+#define IDPF_SEND_TO_PF_SUSPEND_TIME 10e-3
+#define IDPF_SEND_TO_PF_MAX_WAIT_TIME 1
+#define IDPF_SEND_TO_PF_MAX_TRY_TIMES 200
+
+#define IDPF_RX_MAX_DESC_IN_CHAIN 5
+
+#define IDPF_MAX_VPORT_NUM 8
+#define IDPF_DFLT_Q_VEC_NUM 1
+#define IDPF_DFLT_INTERVAL 16
+
+#define IDPF_DEFAULT_RXQ_NUM 16
+#define IDPF_DEFAULT_TXQ_NUM 16
+
+#define IDPF_ETH_ALEN 6
+
+#define IDPF_INVALID_VPORT_IDX 0xffff
+#define IDPF_TXQ_PER_GRP 1
+#define IDPF_TX_COMPLQ_PER_GRP 1
+#define IDPF_RXQ_PER_GRP 1
+#define IDPF_RX_BUFQ_PER_GRP 2
+#define IDPF_RX_BUF_STRIDE 64
+
+/* Maximum buffer lengths for all control queue types */
+#define IDPF_CTLQ_MAX_RING_SIZE 1024
+#define IDPF_CTLQ_MAX_BUF_LEN 4096
+
+#define IDPF_HI_DWORD(x) ((u32) ((((x) >> 16) >> 16) & 0xFFFFFFFF))
+#define IDPF_LO_DWORD(x) ((u32) ((x) &0xFFFFFFFF))
+#define IDPF_HI_WORD(x) ((u16) (((x) >> 16) & 0xFFFF))
+#define IDPF_LO_WORD(x) ((u16) ((x) &0xFFFF))
+
+#define IDPF_CTLQ_DESC(R, i) (&(((idpf_ctlq_desc_t *) ((R)->desc_ring.va))[i]))
+
+#define IDPF_CTLQ_DESC_UNUSED(R) \
+ (u16) ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->ring_size) + \
+ (R)->next_to_clean - (R)->next_to_use - 1)
+
+#define IDPF_GET_PTYPE_SIZE(p) \
+ (sizeof (virtchnl2_ptype_t) + \
+ (((p)->proto_id_count ? ((p)->proto_id_count - 1) : 0) * \
+ sizeof ((p)->proto_id[0])))
+
+/* log configuration */
+extern vlib_log_class_registration_t idpf_log;
+extern vlib_log_class_registration_t idpf_stats_log;
+
+#define idpf_log_err(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_ERR, idpf_log.class, "%U: " f, \
+ format_vlib_pci_addr, &dev->pci_addr, ##__VA_ARGS__)
+
+#define idpf_log_warn(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_WARNING, idpf_log.class, "%U: " f, \
+ format_vlib_pci_addr, &dev->pci_addr, ##__VA_ARGS__)
+
+#define idpf_log_debug(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, idpf_log.class, "%U: " f, \
+ format_vlib_pci_addr, &dev->pci_addr, ##__VA_ARGS__)
+
+#define idpf_stats_log_debug(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, idpf_stats_log.class, "%U: " f, \
+ format_vlib_pci_addr, &dev->pci_addr, ##__VA_ARGS__)
+
+/* List handler */
+#ifndef LIST_HEAD_TYPE
+#define LIST_HEAD_TYPE(list_name, type) LIST_HEAD (list_name, type)
+#endif
+
+#ifndef LIST_ENTRY_TYPE
+#define LIST_ENTRY_TYPE(type) LIST_ENTRY (type)
+#endif
+
+#ifndef LIST_FOR_EACH_ENTRY_SAFE
+#define LIST_FOR_EACH_ENTRY_SAFE(pos, temp, head, entry_type, list) \
+ LIST_FOREACH (pos, head, list)
+#endif
+
+#ifndef LIST_FOR_EACH_ENTRY
+#define LIST_FOR_EACH_ENTRY(pos, head, entry_type, list) \
+ LIST_FOREACH (pos, head, list)
+#endif
+
+#define foreach_idpf_device_flags \
+ _ (0, INITIALIZED, "initialized") \
+ _ (1, ERROR, "error") \
+ _ (2, ADMIN_UP, "admin-up") \
+ _ (3, VA_DMA, "vaddr-dma") \
+ _ (4, LINK_UP, "link-up") \
+ _ (6, ELOG, "elog") \
+ _ (7, PROMISC, "promisc") \
+ _ (8, RX_INT, "rx-interrupts") \
+ _ (9, RX_FLOW_OFFLOAD, "rx-flow-offload")
+
+enum
+{
+#define _(a, b, c) IDPF_DEVICE_F_##b = (1 << a),
+ foreach_idpf_device_flags
+#undef _
+};
+
+#define IDPF_PTYPE_UNKNOWN 0x00000000
+#define IDPF_PTYPE_L2_ETHER 0x00000001
+#define IDPF_PTYPE_L2_ETHER_TIMESYNC 0x00000002
+#define IDPF_PTYPE_L2_ETHER_ARP 0x00000003
+#define IDPF_PTYPE_L2_ETHER_LLDP 0x00000004
+#define IDPF_PTYPE_L2_ETHER_NSH 0x00000005
+#define IDPF_PTYPE_L2_ETHER_VLAN 0x00000006
+#define IDPF_PTYPE_L2_ETHER_QINQ 0x00000007
+#define IDPF_PTYPE_L2_ETHER_PPPOE 0x00000008
+#define IDPF_PTYPE_L2_ETHER_FCOE 0x00000009
+#define IDPF_PTYPE_L2_ETHER_MPLS 0x0000000a
+#define IDPF_PTYPE_L2_MASK 0x0000000f
+#define IDPF_PTYPE_L3_IPV4 0x00000010
+#define IDPF_PTYPE_L3_IPV4_EXT 0x00000030
+#define IDPF_PTYPE_L3_IPV6 0x00000040
+#define IDPF_PTYPE_L3_IPV4_EXT_UNKNOWN 0x00000090
+#define IDPF_PTYPE_L3_IPV6_EXT 0x000000c0
+#define IDPF_PTYPE_L3_IPV6_EXT_UNKNOWN 0x000000e0
+#define IDPF_PTYPE_L3_MASK 0x000000f0
+#define IDPF_PTYPE_L4_TCP 0x00000100
+#define IDPF_PTYPE_L4_UDP 0x00000200
+#define IDPF_PTYPE_L4_FRAG 0x00000300
+#define IDPF_PTYPE_L4_SCTP 0x00000400
+#define IDPF_PTYPE_L4_ICMP 0x00000500
+#define IDPF_PTYPE_L4_NONFRAG 0x00000600
+#define IDPF_PTYPE_L4_IGMP 0x00000700
+#define IDPF_PTYPE_L4_MASK 0x00000f00
+#define IDPF_PTYPE_TUNNEL_IP 0x00001000
+#define IDPF_PTYPE_TUNNEL_GRE 0x00002000
+#define IDPF_PTYPE_TUNNEL_VXLAN 0x00003000
+#define IDPF_PTYPE_TUNNEL_NVGRE 0x00004000
+#define IDPF_PTYPE_TUNNEL_GENEVE 0x00005000
+#define IDPF_PTYPE_TUNNEL_GRENAT 0x00006000
+#define IDPF_PTYPE_TUNNEL_GTPC 0x00007000
+#define IDPF_PTYPE_TUNNEL_GTPU 0x00008000
+#define IDPF_PTYPE_TUNNEL_ESP 0x00009000
+#define IDPF_PTYPE_TUNNEL_L2TP 0x0000a000
+#define IDPF_PTYPE_TUNNEL_VXLAN_GPE 0x0000b000
+#define IDPF_PTYPE_TUNNEL_MPLS_IN_GRE 0x0000c000
+#define IDPF_PTYPE_TUNNEL_MPLS_IN_UDP 0x0000d000
+#define IDPF_PTYPE_TUNNEL_MASK 0x0000f000
+#define IDPF_PTYPE_INNER_L2_ETHER 0x00010000
+#define IDPF_PTYPE_INNER_L2_ETHER_VLAN 0x00020000
+#define IDPF_PTYPE_INNER_L2_ETHER_QINQ 0x00030000
+#define IDPF_PTYPE_INNER_L2_MASK 0x000f0000
+#define IDPF_PTYPE_INNER_L3_IPV4 0x00100000
+#define IDPF_PTYPE_INNER_L3_IPV4_EXT 0x00200000
+#define IDPF_PTYPE_INNER_L3_IPV6 0x00300000
+#define IDPF_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x00400000
+#define IDPF_PTYPE_INNER_L3_IPV6_EXT 0x00500000
+#define IDPF_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x00600000
+#define IDPF_PTYPE_INNER_L3_MASK 0x00f00000
+#define IDPF_PTYPE_INNER_L4_TCP 0x01000000
+#define IDPF_PTYPE_INNER_L4_UDP 0x02000000
+#define IDPF_PTYPE_INNER_L4_FRAG 0x03000000
+#define IDPF_PTYPE_INNER_L4_SCTP 0x04000000
+#define IDPF_PTYPE_INNER_L4_ICMP 0x05000000
+#define IDPF_PTYPE_INNER_L4_NONFRAG 0x06000000
+#define IDPF_PTYPE_INNER_L4_MASK 0x0f000000
+#define IDPF_PTYPE_ALL_MASK 0x0fffffff
+
+/* Flags sub-structure
+ * |0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10 |11 |12 |13 |14 |15 |
+ * |DD |CMP|ERR| * RSV * |FTYPE | *RSV* |RD |VFC|BUF| HOST_ID |
+ */
+/* command flags and offsets */
+#define IDPF_CTLQ_FLAG_DD_S 0
+#define IDPF_CTLQ_FLAG_CMP_S 1
+#define IDPF_CTLQ_FLAG_ERR_S 2
+#define IDPF_CTLQ_FLAG_FTYPE_S 6
+#define IDPF_CTLQ_FLAG_RD_S 10
+#define IDPF_CTLQ_FLAG_VFC_S 11
+#define IDPF_CTLQ_FLAG_BUF_S 12
+#define IDPF_CTLQ_FLAG_HOST_ID_S 13
+
+#define IDPF_CTLQ_FLAG_DD BIT (IDPF_CTLQ_FLAG_DD_S) /* 0x1 */
+#define IDPF_CTLQ_FLAG_CMP BIT (IDPF_CTLQ_FLAG_CMP_S) /* 0x2 */
+#define IDPF_CTLQ_FLAG_ERR BIT (IDPF_CTLQ_FLAG_ERR_S) /* 0x4 */
+#define IDPF_CTLQ_FLAG_FTYPE_VM \
+ BIT (IDPF_CTLQ_FLAG_FTYPE_S) /* 0x40 */
+#define IDPF_CTLQ_FLAG_FTYPE_PF BIT (IDPF_CTLQ_FLAG_FTYPE_S + 1) /* 0x80 */
+#define IDPF_CTLQ_FLAG_RD BIT (IDPF_CTLQ_FLAG_RD_S) /* 0x400 */
+#define IDPF_CTLQ_FLAG_VFC BIT (IDPF_CTLQ_FLAG_VFC_S) /* 0x800 */
+#define IDPF_CTLQ_FLAG_BUF BIT (IDPF_CTLQ_FLAG_BUF_S) /* 0x1000 */
+
+/* Host ID is a special field that has 3b and not a 1b flag */
+#define IDPF_CTLQ_FLAG_HOST_ID_M MAKE_MASK (0x7000UL, IDPF_CTLQ_FLAG_HOST_ID_S)
+
+#define IDPF_FLEX_TXD_QW1_DTYPE_S 0
+#define IDPF_FLEX_TXD_QW1_DTYPE_M MAKEMASK (0x1FUL, IDPF_FLEX_TXD_QW1_DTYPE_S)
+#define IDPF_FLEX_TXD_QW1_CMD_S 5
+#define IDPF_FLEX_TXD_QW1_CMD_M MAKEMASK (0x7FFUL, IDPF_FLEX_TXD_QW1_CMD_S)
+
+typedef struct idpf_vport idpf_vport_t;
+
+typedef volatile struct
+{
+ u64 buf_addr; /* Packet buffer address */
+ struct
+ {
+ u64 cmd_dtype;
+ union
+ {
+ /* DTYPE = IDPF_TX_DESC_DTYPE_FLEX_DATA_(0x03) */
+ u8 raw[4];
+
+ /* DTYPE = IDPF_TX_DESC_DTYPE_FLEX_TSYN_L2TAG1 (0x06) */
+ struct
+ {
+ u16 l2tag1;
+ u8 flex;
+ u8 tsync;
+ } tsync;
+
+ /* DTYPE=IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2 (0x07) */
+ struct
+ {
+ u16 l2tag1;
+ u16 l2tag2;
+ } l2tags;
+ } flex;
+ u16 buf_size;
+ } qw1;
+} idpf_flex_tx_desc_t;
+
+typedef struct
+{
+ union
+ {
+ u64 qword[2];
+ };
+} idpf_tx_desc_t;
+
+STATIC_ASSERT_SIZEOF (idpf_tx_desc_t, 16);
+
+typedef struct idpf_rxq
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ volatile u32 *qrx_tail;
+ u16 next;
+ u16 size;
+ virtchnl2_rx_desc_t *descs;
+ u32 *bufs;
+ u16 n_enqueued;
+ u8 int_mode;
+ u8 buffer_pool_index;
+ u32 queue_index;
+
+ struct idpf_rxq *bufq1;
+ struct idpf_rxq *bufq2;
+} idpf_rxq_t;
+
+typedef struct idpf_txq
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ volatile u32 *qtx_tail;
+ u16 next;
+ u16 size;
+ u32 *ph_bufs;
+ clib_spinlock_t lock;
+ idpf_tx_desc_t *descs;
+ u32 *bufs;
+ u16 n_enqueued;
+ u16 *rs_slots;
+
+ idpf_tx_desc_t *tmp_descs;
+ u32 *tmp_bufs;
+ u32 queue_index;
+
+ struct idpf_txq *complq;
+} idpf_txq_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 flags;
+ u32 per_interface_next_index;
+ u32 cmd_retval;
+ u8 *mbx_resp;
+ virtchnl2_op_t pend_cmd;
+
+ u32 dev_instance;
+ u32 sw_if_index;
+ u32 hw_if_index;
+ vlib_pci_dev_handle_t pci_dev_handle;
+ u32 numa_node;
+ void *bar0;
+ u8 *name;
+
+ /* queues */
+ u16 n_tx_queues;
+ u16 n_rx_queues;
+ u32 txq_model;
+ u32 rxq_model;
+
+ u16 vsi_id;
+ u8 hwaddr[6];
+ u16 max_mtu;
+ vlib_pci_addr_t pci_addr;
+
+ /* error */
+ clib_error_t *error;
+
+ /* hw info */
+ u8 *hw_addr;
+ u64 hw_addr_len;
+
+ /* control queue - send and receive */
+ struct idpf_ctlq_info *asq;
+ struct idpf_ctlq_info *arq;
+
+ /* pci info */
+ u16 device_id;
+ u16 vendor_id;
+ u16 subsystem_device_id;
+ u16 subsystem_vendor_id;
+
+ /* max config queue number per vc message */
+ u32 max_rxq_per_msg;
+ u32 max_txq_per_msg;
+
+ /* vport info */
+ idpf_vport_t **vports;
+ u16 max_vport_nb;
+ u16 req_vports[IDPF_MAX_VPORT_NUM];
+ u16 req_vport_nb;
+ u16 cur_vports;
+ u16 cur_vport_nb;
+ u16 cur_vport_idx;
+
+ u32 ptype_tbl[IDPF_MAX_PKT_TYPE];
+
+ /* device capability */
+ u32 csum_caps;
+ u32 seg_caps;
+ u32 hsplit_caps;
+ u32 rsc_caps;
+ u64 rss_caps;
+ u64 other_caps;
+
+ u16 max_rx_q;
+ u16 max_tx_q;
+ u16 max_rx_bufq;
+ u16 max_tx_complq;
+ u16 max_sriov_vfs;
+ u16 max_vports;
+ u16 default_num_vports;
+
+ u32 device_type;
+
+ LIST_HEAD_TYPE (list_head, idpf_ctlq_info) cq_list_head;
+} idpf_device_t;
+
+/* memory allocation tracking */
+typedef struct
+{
+ void *va;
+ u64 pa;
+ u32 size;
+} idpf_dma_mem_t;
+
+/* Message type read in virtual channel from PF */
+typedef enum
+{
+ IDPF_MSG_ERR = -1, /* Meet error when accessing admin queue */
+ IDPF_MSG_NON, /* Read nothing from admin queue */
+ IDPF_MSG_SYS, /* Read system msg from admin queue */
+ IDPF_MSG_CMD, /* Read async command result */
+} idpf_vc_result_t;
+
+typedef struct
+{
+ u32 tx_start_qid;
+ u32 rx_start_qid;
+ u32 tx_compl_start_qid;
+ u32 rx_buf_start_qid;
+
+ u64 tx_qtail_start;
+ u32 tx_qtail_spacing;
+ u64 rx_qtail_start;
+ u32 rx_qtail_spacing;
+ u64 tx_compl_qtail_start;
+ u32 tx_compl_qtail_spacing;
+ u64 rx_buf_qtail_start;
+ u32 rx_buf_qtail_spacing;
+} idpf_chunks_info_t;
+
+typedef struct
+{
+ u32 ops;
+ u8 *in_args; /* buffer for sending */
+ u32 in_args_size; /* buffer size for sending */
+ u8 *out_buffer; /* buffer for response */
+ u32 out_size; /* buffer size for response */
+} idpf_cmd_info_t;
+
+typedef struct
+{
+ idpf_device_t *id;
+ u16 idx;
+} idpf_vport_param_t;
+
+struct idpf_vport
+{
+ idpf_device_t *id;
+ virtchnl2_create_vport_t *vport_info;
+ u16 idx;
+ u16 vport_id;
+ u32 txq_model;
+ u32 rxq_model;
+ u32 num_tx_q;
+ idpf_txq_t *txqs;
+ u16 num_tx_complq;
+ u16 num_rx_q;
+ idpf_rxq_t *rxqs;
+ u16 num_rx_bufq;
+
+ u16 max_mtu;
+ u8 default_mac_addr[VIRTCHNL2_ETH_LENGTH_OF_ADDRESS];
+
+ u16 max_pkt_len; /* Maximum packet length */
+
+ /* MSIX info*/
+ virtchnl2_queue_vector_t *qv_map; /* queue vector mapping */
+ u16 max_vectors;
+ virtchnl2_alloc_vectors_t *recv_vectors;
+
+ /* Chunk info */
+ idpf_chunks_info_t chunks_info;
+
+ virtchnl2_vport_stats_t eth_stats_offset;
+};
+
+#define IDPF_RX_VECTOR_SZ VLIB_FRAME_SIZE
+
+typedef enum
+{
+ IDPF_PROCESS_REQ_ADD_DEL_ETH_ADDR = 1,
+ IDPF_PROCESS_REQ_CONFIG_PROMISC_MDDE = 2,
+ IDPF_PROCESS_REQ_PROGRAM_FLOW = 3,
+} idpf_process_req_type_t;
+
+typedef struct
+{
+ idpf_process_req_type_t type;
+ u32 dev_instance;
+ u32 calling_process_index;
+ u8 eth_addr[6];
+ int is_add, is_enable;
+
+ /* below parameters are used for 'program flow' event */
+ u8 *rule;
+ u32 rule_len;
+ u8 *program_status;
+ u32 status_len;
+
+ clib_error_t *error;
+} idpf_process_req_t;
+
+typedef struct
+{
+ u64 qw1s[IDPF_RX_MAX_DESC_IN_CHAIN - 1];
+ u32 buffers[IDPF_RX_MAX_DESC_IN_CHAIN - 1];
+} idpf_rx_tail_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vlib_buffer_t *bufs[IDPF_RX_VECTOR_SZ];
+ u16 next[IDPF_RX_VECTOR_SZ];
+ u64 qw1s[IDPF_RX_VECTOR_SZ];
+ u32 flow_ids[IDPF_RX_VECTOR_SZ];
+ idpf_rx_tail_t tails[IDPF_RX_VECTOR_SZ];
+ vlib_buffer_t buffer_template;
+} idpf_per_thread_data_t;
+
+typedef struct
+{
+ u16 msg_id_base;
+
+ idpf_device_t **devices;
+ idpf_per_thread_data_t *per_thread_data;
+} idpf_main_t;
+
+extern idpf_main_t idpf_main;
+
+typedef struct
+{
+ vlib_pci_addr_t addr;
+ u8 *name;
+ u16 rxq_single;
+ u16 txq_single;
+ u16 rxq_num;
+ u16 txq_num;
+ u16 req_vport_nb;
+ u16 rxq_size;
+ u16 txq_size;
+ int rv;
+ u32 sw_if_index;
+ clib_error_t *error;
+} idpf_create_if_args_t;
+
+void idpf_create_if (vlib_main_t *vm, idpf_create_if_args_t *args);
+
+extern vlib_node_registration_t idpf_process_node;
+extern vnet_device_class_t idpf_device_class;
+
+/* format.c */
+format_function_t format_idpf_device_name;
+format_function_t format_idpf_device_flags;
+
+static inline void
+clear_cmd (idpf_device_t *id)
+{
+ /* Return value may be checked in anither thread, need to ensure the
+ * coherence. */
+ CLIB_MEMORY_BARRIER ();
+ id->pend_cmd = VIRTCHNL2_OP_UNKNOWN;
+ id->cmd_retval = VIRTCHNL2_STATUS_SUCCESS;
+}
+
+static_always_inline idpf_device_t *
+idpf_get_device (u32 dev_instance)
+{
+ return pool_elt_at_index (idpf_main.devices, dev_instance)[0];
+}
+
+static inline void
+idpf_reg_write (idpf_device_t *id, u32 addr, u32 val)
+{
+ *(volatile u32 *) ((u8 *) id->bar0 + addr) = val;
+}
+
+static inline u32
+idpf_reg_read (idpf_device_t *id, u32 addr)
+{
+ u32 val = *(volatile u32 *) (id->bar0 + addr);
+ return val;
+}
+
+static inline void
+idpf_reg_flush (idpf_device_t *id)
+{
+ idpf_reg_read (id, PFGEN_RSTAT);
+ asm volatile("" ::: "memory");
+}
+
+typedef struct
+{
+ u16 qid;
+ u16 next_index;
+ u32 hw_if_index;
+ u32 flow_id;
+ u64 qw1s[IDPF_RX_MAX_DESC_IN_CHAIN];
+} idpf_input_trace_t;
+
+/* Error Codes */
+/* Linux kernel driver can't directly use these. Instead, they are mapped to
+ * linux compatible error codes which get translated in the build script.
+ */
+#define IDPF_SUCCESS 0
+#define IDPF_ERR_PARAM -53 /* -EBADR */
+#define IDPF_ERR_NOT_IMPL -95 /* -EOPNOTSUPP */
+#define IDPF_ERR_NOT_READY -16 /* -EBUSY */
+#define IDPF_ERR_BAD_PTR -14 /* -EFAULT */
+#define IDPF_ERR_INVAL_SIZE -90 /* -EMSGSIZE */
+#define IDPF_ERR_DEVICE_NOT_SUPPORTED -19 /* -ENODEV */
+#define IDPF_ERR_FW_API_VER -13 /* -EACCESS */
+#define IDPF_ERR_NO_MEMORY -12 /* -ENOMEM */
+#define IDPF_ERR_CFG -22 /* -EINVAL */
+#define IDPF_ERR_OUT_OF_RANGE -34 /* -ERANGE */
+#define IDPF_ERR_ALREADY_EXISTS -17 /* -EEXIST */
+#define IDPF_ERR_DOES_NOT_EXIST -6 /* -ENXIO */
+#define IDPF_ERR_IN_USE -114 /* -EALREADY */
+#define IDPF_ERR_MAX_LIMIT -109 /* -ETOOMANYREFS */
+#define IDPF_ERR_RESET_ONGOING -104 /* -ECONNRESET */
+
+/* CRQ/CSQ specific error codes */
+#define IDPF_ERR_CTLQ_ERROR -74 /* -EBADMSG */
+#define IDPF_ERR_CTLQ_TIMEOUT -110 /* -ETIMEDOUT */
+#define IDPF_ERR_CTLQ_FULL -28 /* -ENOSPC */
+#define IDPF_ERR_CTLQ_NO_WORK -42 /* -ENOMSG */
+#define IDPF_ERR_CTLQ_EMPTY -105 /* -ENOBUFS */
+
+/* Used for queue init, response and events */
+typedef enum
+{
+ IDPF_CTLQ_TYPE_MAILBOX_TX = 0,
+ IDPF_CTLQ_TYPE_MAILBOX_RX = 1,
+ IDPF_CTLQ_TYPE_CONFIG_TX = 2,
+ IDPF_CTLQ_TYPE_CONFIG_RX = 3,
+ IDPF_CTLQ_TYPE_EVENT_RX = 4,
+ IDPF_CTLQ_TYPE_RDMA_TX = 5,
+ IDPF_CTLQ_TYPE_RDMA_RX = 6,
+ IDPF_CTLQ_TYPE_RDMA_COMPL = 7
+} idpf_ctlq_type_t;
+
+typedef enum
+{
+ IDPF_PROCESS_EVENT_START = 1,
+ IDPF_PROCESS_EVENT_DELETE_IF = 2,
+ IDPF_PROCESS_EVENT_AQ_INT = 3,
+ IDPF_PROCESS_EVENT_REQ = 4,
+} idpf_process_event_t;
+
+/*
+ * Generic Control Queue Structures
+ */
+typedef struct
+{
+ /* used for queue tracking */
+ u32 head;
+ u32 tail;
+ /* Below applies only to default mb (if present) */
+ u32 len;
+ u32 bah;
+ u32 bal;
+ u32 len_mask;
+ u32 len_ena_mask;
+ u32 head_mask;
+} idpf_ctlq_reg_t;
+
+/* Generic queue msg structure */
+typedef struct
+{
+ u8 vmvf_type; /* represents the source of the message on recv */
+#define IDPF_VMVF_TYPE_VF 0
+#define IDPF_VMVF_TYPE_VM 1
+#define IDPF_VMVF_TYPE_PF 2
+ u8 host_id;
+ /* 3b field used only when sending a message to peer - to be used in
+ * combination with target func_id to route the message
+ */
+#define IDPF_HOST_ID_MASK 0x7
+
+ u16 opcode;
+ u16 data_len; /* data_len = 0 when no payload is attached */
+ union
+ {
+ u16 func_id; /* when sending a message */
+ u16 status; /* when receiving a message */
+ };
+ union
+ {
+ struct
+ {
+ u32 chnl_retval;
+ u32 chnl_opcode;
+ } mbx;
+ u64 cookie;
+ } cookie;
+ union
+ {
+#define IDPF_DIRECT_CTX_SIZE 16
+#define IDPF_INDIRECT_CTX_SIZE 8
+ /* 16 bytes of context can be provided or 8 bytes of context
+ * plus the address of a DMA buffer
+ */
+ u8 direct[IDPF_DIRECT_CTX_SIZE];
+ struct
+ {
+ u8 context[IDPF_INDIRECT_CTX_SIZE];
+ idpf_dma_mem_t *payload;
+ } indirect;
+ } ctx;
+} idpf_ctlq_msg_t;
+
+/* Generic queue info structures */
+/* MB, CONFIG and EVENT q do not have extended info */
+typedef struct
+{
+ idpf_ctlq_type_t type;
+ int id; /* absolute queue offset passed as input
+ * -1 for default mailbox if present
+ */
+ u16 len; /* Queue length passed as input */
+ u16 buf_size; /* buffer size passed as input */
+ u64 base_address; /* output, HPA of the Queue start */
+ idpf_ctlq_reg_t reg; /* registers accessed by ctlqs */
+
+ int ext_info_size;
+ void *ext_info; /* Specific to q type */
+} idpf_ctlq_create_info_t;
+
+/* Control Queue information */
+typedef struct idpf_ctlq_info
+{
+ LIST_ENTRY_TYPE (idpf_ctlq_info) cq_list;
+
+ idpf_ctlq_type_t cq_type;
+ int q_id;
+ clib_spinlock_t cq_lock; /* queue lock */
+
+ /* used for interrupt processing */
+ u16 next_to_use;
+ u16 next_to_clean;
+ u16 next_to_post;
+
+ idpf_dma_mem_t desc_ring; /* descriptor ring memory */
+
+ union
+ {
+ idpf_dma_mem_t **rx_buff;
+ idpf_ctlq_msg_t **tx_msg;
+ } bi;
+
+ u16 buf_size; /* queue buffer size */
+ u16 ring_size; /* Number of descriptors */
+ idpf_ctlq_reg_t reg; /* registers accessed by ctlqs */
+} idpf_ctlq_info_t;
+
+/* PF/VF mailbox commands */
+enum idpf_mbx_opc
+{
+ /* idpf_mbq_opc_send_msg_to_pf:
+ * usage: used by PF or VF to send a message to its CPF
+ * target: RX queue and function ID of parent PF taken from HW
+ */
+ idpf_mbq_opc_send_msg_to_pf = 0x0801,
+
+ /* idpf_mbq_opc_send_msg_to_vf:
+ * usage: used by PF to send message to a VF
+ * target: VF control queue ID must be specified in descriptor
+ */
+ idpf_mbq_opc_send_msg_to_vf = 0x0802,
+
+ /* idpf_mbq_opc_send_msg_to_peer_pf:
+ * usage: used by any function to send message to any peer PF
+ * target: RX queue and host of parent PF taken from HW
+ */
+ idpf_mbq_opc_send_msg_to_peer_pf = 0x0803,
+
+ /* idpf_mbq_opc_send_msg_to_peer_drv:
+ * usage: used by any function to send message to any peer driver
+ * target: RX queue and target host must be specific in descriptor
+ */
+ idpf_mbq_opc_send_msg_to_peer_drv = 0x0804,
+};
+
+typedef struct
+{
+ u16 flags;
+ u16 opcode;
+ u16 datalen; /* 0 for direct commands */
+ union
+ {
+ u16 ret_val;
+ u16 pfid_vfid;
+ };
+ u32 cookie_high;
+ u32 cookie_low;
+ union
+ {
+ struct
+ {
+ u32 param0;
+ u32 param1;
+ u32 param2;
+ u32 param3;
+ } direct;
+ struct
+ {
+ u32 param0;
+ u32 param1;
+ u32 addr_high;
+ u32 addr_low;
+ } indirect;
+ u8 raw[16];
+ } params;
+} idpf_ctlq_desc_t;
+
+int idpf_ctlq_init (vlib_main_t *vm, idpf_device_t *id, u8 num_q,
+ idpf_ctlq_create_info_t *q_info);
+int idpf_ctlq_add (vlib_main_t *vm, idpf_device_t *id,
+ idpf_ctlq_create_info_t *qinfo, struct idpf_ctlq_info **cq);
+void idpf_ctlq_remove (idpf_device_t *id, struct idpf_ctlq_info *cq);
+int idpf_ctlq_send (idpf_device_t *id, struct idpf_ctlq_info *cq,
+ u16 num_q_msg, idpf_ctlq_msg_t q_msg[]);
+int idpf_ctlq_recv (struct idpf_ctlq_info *cq, u16 *num_q_msg,
+ idpf_ctlq_msg_t *q_msg);
+int idpf_ctlq_clean_sq (struct idpf_ctlq_info *cq, u16 *clean_count,
+ idpf_ctlq_msg_t *msg_status[]);
+int idpf_ctlq_post_rx_buffs (idpf_device_t *id, struct idpf_ctlq_info *cq,
+ u16 *buff_count, idpf_dma_mem_t **buffs);
+void idpf_ctlq_deinit (idpf_device_t *id);
+int idpf_ctlq_alloc_ring_res (vlib_main_t *vm, idpf_device_t *id,
+ struct idpf_ctlq_info *cq);
+void idpf_ctlq_dealloc_ring_res (idpf_device_t *id, struct idpf_ctlq_info *cq);
+void *idpf_alloc_dma_mem (vlib_main_t *vm, idpf_device_t *id,
+ idpf_dma_mem_t *mem, u64 size);
+void idpf_free_dma_mem (idpf_device_t *id, idpf_dma_mem_t *mem);
+
+#endif /* IDPF_H */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/idpf/idpf_api.c b/src/plugins/idpf/idpf_api.c
new file mode 100644
index 00000000000..8ca78e62dc0
--- /dev/null
+++ b/src/plugins/idpf/idpf_api.c
@@ -0,0 +1,111 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/pci/pci.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <idpf/idpf.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+/* define message IDs */
+#include <idpf/idpf.api_enum.h>
+#include <idpf/idpf.api_types.h>
+
+#define REPLY_MSG_ID_BASE (im->msg_id_base)
+#include <vlibapi/api_helper_macros.h>
+
+static void
+vl_api_idpf_create_t_handler (vl_api_idpf_create_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ idpf_main_t *im = &idpf_main;
+ vl_api_idpf_create_reply_t *rmp;
+ idpf_create_if_args_t args;
+ int rv;
+
+ clib_memset (&args, 0, sizeof (idpf_create_if_args_t));
+
+ args.addr.as_u32 = ntohl (mp->pci_addr);
+ args.rxq_single = ntohs (mp->rxq_single);
+ args.txq_single = ntohs (mp->txq_single);
+ args.rxq_num = ntohs (mp->rxq_num);
+ args.txq_num = ntohs (mp->txq_num);
+ args.rxq_size = ntohs (mp->rxq_size);
+ args.txq_size = ntohs (mp->txq_size);
+ args.req_vport_nb = ntohs (mp->req_vport_nb);
+
+ idpf_create_if (vm, &args);
+ rv = args.rv;
+
+ REPLY_MACRO2 (VL_API_IDPF_CREATE_REPLY,
+ ({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
+}
+
+static void
+vl_api_idpf_delete_t_handler (vl_api_idpf_delete_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ idpf_main_t *im = &idpf_main;
+ vl_api_idpf_delete_reply_t *rmp;
+ vnet_hw_interface_t *hw;
+ int rv = 0;
+
+ hw = vnet_get_sup_hw_interface_api_visible_or_null (vnm,
+ htonl (mp->sw_if_index));
+ if (hw == NULL || idpf_device_class.index != hw->dev_class_index)
+ {
+ rv = VNET_API_ERROR_INVALID_INTERFACE;
+ goto reply;
+ }
+
+ vlib_process_signal_event (vm, idpf_process_node.index,
+ IDPF_PROCESS_EVENT_DELETE_IF, hw->dev_instance);
+
+reply:
+ REPLY_MACRO (VL_API_IDPF_DELETE_REPLY);
+}
+
+/* set tup the API message handling tables */
+#include <idpf/idpf.api.c>
+static clib_error_t *
+idpf_plugin_api_hookup (vlib_main_t *vm)
+{
+ idpf_main_t *ivm = &idpf_main;
+ api_main_t *am = vlibapi_get_main ();
+
+ /* ask for a correctly-sized block of API message decode slots */
+ ivm->msg_id_base = setup_message_id_table ();
+
+ vl_api_set_msg_thread_safe (am, ivm->msg_id_base + VL_API_IDPF_DELETE, 1);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (idpf_plugin_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/idpf/idpf_controlq.c b/src/plugins/idpf/idpf_controlq.c
new file mode 100644
index 00000000000..4887bf71c86
--- /dev/null
+++ b/src/plugins/idpf/idpf_controlq.c
@@ -0,0 +1,890 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <idpf/idpf.h>
+
+/**
+ * idpf_ctlq_alloc_desc_ring - Allocate Control Queue (CQ) rings
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ */
+static int
+idpf_ctlq_alloc_desc_ring (vlib_main_t *vm, idpf_device_t *id,
+ struct idpf_ctlq_info *cq)
+{
+ size_t size = cq->ring_size * sizeof (idpf_ctlq_desc_t);
+
+ /* Fixme: alloc dma va */
+ cq->desc_ring.va = idpf_alloc_dma_mem (vm, id, &cq->desc_ring, size);
+ if (!cq->desc_ring.va)
+ return IDPF_ERR_NO_MEMORY;
+
+ return IDPF_SUCCESS;
+}
+
+/**
+ * idpf_ctlq_alloc_bufs - Allocate Control Queue (CQ) buffers
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Allocate the buffer head for all control queues, and if it's a receive
+ * queue, allocate DMA buffers
+ */
+static int
+idpf_ctlq_alloc_bufs (vlib_main_t *vm, idpf_device_t *id,
+ struct idpf_ctlq_info *cq)
+{
+ int i = 0;
+ u16 len;
+
+ /* Do not allocate DMA buffers for transmit queues */
+ if (cq->cq_type == IDPF_CTLQ_TYPE_MAILBOX_TX)
+ return IDPF_SUCCESS;
+
+ /* We'll be allocating the buffer info memory first, then we can
+ * allocate the mapped buffers for the event processing
+ */
+ len = cq->ring_size * sizeof (idpf_dma_mem_t *);
+ cq->bi.rx_buff = (idpf_dma_mem_t **) clib_mem_alloc (len);
+ if (!cq->bi.rx_buff)
+ return IDPF_ERR_NO_MEMORY;
+ clib_memset (cq->bi.rx_buff, 0, len);
+
+ /* allocate the mapped buffers (except for the last one) */
+ for (i = 0; i < cq->ring_size - 1; i++)
+ {
+ idpf_dma_mem_t *bi;
+ int num = 1; /* number of idpf_dma_mem to be allocated */
+
+ cq->bi.rx_buff[i] =
+ (idpf_dma_mem_t *) clib_mem_alloc (num * sizeof (idpf_dma_mem_t));
+ if (!cq->bi.rx_buff[i])
+ goto unwind_alloc_cq_bufs;
+
+ bi = cq->bi.rx_buff[i];
+
+ bi->va = idpf_alloc_dma_mem (vm, id, bi, cq->buf_size);
+ if (!bi->va)
+ {
+ /* unwind will not free the failed entry */
+ clib_mem_free (cq->bi.rx_buff[i]);
+ goto unwind_alloc_cq_bufs;
+ }
+ }
+
+ return IDPF_SUCCESS;
+
+unwind_alloc_cq_bufs:
+ /* don't try to free the one that failed... */
+ i--;
+ for (; i >= 0; i--)
+ {
+ idpf_free_dma_mem (id, cq->bi.rx_buff[i]);
+ clib_mem_free (cq->bi.rx_buff[i]);
+ }
+ clib_mem_free (cq->bi.rx_buff);
+
+ return IDPF_ERR_NO_MEMORY;
+}
+
+/**
+ * idpf_ctlq_free_desc_ring - Free Control Queue (CQ) rings
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * This assumes the posted send buffers have already been cleaned
+ * and de-allocated
+ */
+static void
+idpf_ctlq_free_desc_ring (idpf_device_t *id, struct idpf_ctlq_info *cq)
+{
+ idpf_free_dma_mem (id, &cq->desc_ring);
+}
+
+/**
+ * idpf_ctlq_free_bufs - Free CQ buffer info elements
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Free the DMA buffers for RX queues, and DMA buffer header for both RX and TX
+ * queues. The upper layers are expected to manage freeing of TX DMA buffers
+ */
+static void
+idpf_ctlq_free_bufs (idpf_device_t *id, struct idpf_ctlq_info *cq)
+{
+ void *bi;
+
+ if (cq->cq_type == IDPF_CTLQ_TYPE_MAILBOX_RX)
+ {
+ int i;
+
+ /* free DMA buffers for rx queues*/
+ for (i = 0; i < cq->ring_size; i++)
+ {
+ if (cq->bi.rx_buff[i])
+ {
+ idpf_free_dma_mem (id, cq->bi.rx_buff[i]);
+ /* Attention */
+ clib_mem_free (cq->bi.rx_buff[i]);
+ }
+ }
+
+ bi = (void *) cq->bi.rx_buff;
+ }
+ else
+ {
+ bi = (void *) cq->bi.tx_msg;
+ }
+
+ /* free the buffer header */
+ clib_mem_free (bi);
+}
+
+/**
+ * idpf_ctlq_dealloc_ring_res - Free memory allocated for control queue
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * Free the memory used by the ring, buffers and other related structures
+ */
+void
+idpf_ctlq_dealloc_ring_res (idpf_device_t *id, struct idpf_ctlq_info *cq)
+{
+ /* free ring buffers and the ring itself */
+ idpf_ctlq_free_bufs (id, cq);
+ idpf_ctlq_free_desc_ring (id, cq);
+}
+
+/**
+ * idpf_ctlq_alloc_ring_res - allocate memory for descriptor ring and bufs
+ * @hw: pointer to hw struct
+ * @cq: pointer to control queue struct
+ *
+ * Do *NOT* hold the lock when calling this as the memory allocation routines
+ * called are not going to be atomic context safe
+ */
+int
+idpf_ctlq_alloc_ring_res (vlib_main_t *vm, idpf_device_t *id,
+ struct idpf_ctlq_info *cq)
+{
+ int ret_code;
+
+ /* verify input for valid configuration */
+ if (!cq->ring_size || !cq->buf_size)
+ return IDPF_ERR_CFG;
+
+ /* allocate the ring memory */
+ ret_code = idpf_ctlq_alloc_desc_ring (vm, id, cq);
+ if (ret_code)
+ return ret_code;
+
+ /* allocate buffers in the rings */
+ ret_code = idpf_ctlq_alloc_bufs (vm, id, cq);
+ if (ret_code)
+ goto idpf_init_cq_free_ring;
+
+ /* success! */
+ return IDPF_SUCCESS;
+
+idpf_init_cq_free_ring:
+ idpf_free_dma_mem (id, &cq->desc_ring);
+ return ret_code;
+}
+
+/**
+ * idpf_ctlq_setup_regs - initialize control queue registers
+ * @cq: pointer to the specific control queue
+ * @q_create_info: structs containing info for each queue to be initialized
+ */
+static void
+idpf_ctlq_setup_regs (struct idpf_ctlq_info *cq,
+ idpf_ctlq_create_info_t *q_create_info)
+{
+ /* set head and tail registers in our local struct */
+ cq->reg.head = q_create_info->reg.head;
+ cq->reg.tail = q_create_info->reg.tail;
+ cq->reg.len = q_create_info->reg.len;
+ cq->reg.bah = q_create_info->reg.bah;
+ cq->reg.bal = q_create_info->reg.bal;
+ cq->reg.len_mask = q_create_info->reg.len_mask;
+ cq->reg.len_ena_mask = q_create_info->reg.len_ena_mask;
+ cq->reg.head_mask = q_create_info->reg.head_mask;
+}
+
+/**
+ * idpf_ctlq_init_regs - Initialize control queue registers
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ * @is_rxq: true if receive control queue, false otherwise
+ *
+ * Initialize registers. The caller is expected to have already initialized the
+ * descriptor ring memory and buffer memory
+ */
+static void
+idpf_ctlq_init_regs (vlib_main_t *vm, idpf_device_t *id,
+ struct idpf_ctlq_info *cq, bool is_rxq)
+{
+ /* Update tail to post pre-allocated buffers for rx queues */
+ if (is_rxq)
+ idpf_reg_write (id, cq->reg.tail, (u32) (cq->ring_size - 1));
+
+ /* For non-Mailbox control queues only TAIL need to be set */
+ if (cq->q_id != -1)
+ return;
+
+ /* Clear Head for both send or receive */
+ idpf_reg_write (id, cq->reg.head, 0);
+
+ /* set starting point */
+ idpf_reg_write (id, cq->reg.bal, IDPF_LO_DWORD (cq->desc_ring.pa));
+ idpf_reg_write (id, cq->reg.bah, IDPF_HI_DWORD (cq->desc_ring.pa));
+ idpf_reg_write (id, cq->reg.len, (cq->ring_size | cq->reg.len_ena_mask));
+}
+
+/**
+ * idpf_ctlq_init_rxq_bufs - populate receive queue descriptors with buf
+ * @cq: pointer to the specific Control queue
+ *
+ * Record the address of the receive queue DMA buffers in the descriptors.
+ * The buffers must have been previously allocated.
+ */
+static void
+idpf_ctlq_init_rxq_bufs (struct idpf_ctlq_info *cq)
+{
+ int i = 0;
+
+ for (i = 0; i < cq->ring_size; i++)
+ {
+ idpf_ctlq_desc_t *desc = IDPF_CTLQ_DESC (cq, i);
+ idpf_dma_mem_t *bi = cq->bi.rx_buff[i];
+
+ /* No buffer to post to descriptor, continue */
+ if (!bi)
+ continue;
+
+ desc->flags = IDPF_CTLQ_FLAG_BUF | IDPF_CTLQ_FLAG_RD;
+ desc->opcode = 0;
+ desc->datalen = (u16) bi->size;
+ desc->ret_val = 0;
+ desc->cookie_high = 0;
+ desc->cookie_low = 0;
+ desc->params.indirect.addr_high = IDPF_HI_DWORD (bi->pa);
+ desc->params.indirect.addr_low = IDPF_LO_DWORD (bi->pa);
+ desc->params.indirect.param0 = 0;
+ desc->params.indirect.param1 = 0;
+ }
+}
+
+/**
+ * idpf_ctlq_shutdown - shutdown the CQ
+ * @hw: pointer to hw struct
+ * @cq: pointer to the specific Control queue
+ *
+ * The main shutdown routine for any controq queue
+ */
+static void
+idpf_ctlq_shutdown (idpf_device_t *id, struct idpf_ctlq_info *cq)
+{
+ clib_spinlock_init (&cq->cq_lock);
+
+ if (!cq->ring_size)
+ goto shutdown_sq_out;
+
+ /* free ring buffers and the ring itself */
+ idpf_ctlq_dealloc_ring_res (id, cq);
+
+ /* Set ring_size to 0 to indicate uninitialized queue */
+ cq->ring_size = 0;
+
+shutdown_sq_out:
+ clib_spinlock_unlock (&cq->cq_lock);
+ clib_spinlock_free (&cq->cq_lock);
+}
+
+/**
+ * idpf_ctlq_add - add one control queue
+ * @hw: pointer to hardware struct
+ * @qinfo: info for queue to be created
+ * @cq_out: (output) double pointer to control queue to be created
+ *
+ * Allocate and initialize a control queue and add it to the control queue
+ * list. The cq parameter will be allocated/initialized and passed back to the
+ * caller if no errors occur.
+ *
+ * Note: idpf_ctlq_init must be called prior to any calls to idpf_ctlq_add
+ */
+int
+idpf_ctlq_add (vlib_main_t *vm, idpf_device_t *id,
+ idpf_ctlq_create_info_t *qinfo, struct idpf_ctlq_info **cq_out)
+{
+ bool is_rxq = false;
+ int status = IDPF_SUCCESS;
+
+ if (!qinfo->len || !qinfo->buf_size ||
+ qinfo->len > IDPF_CTLQ_MAX_RING_SIZE ||
+ qinfo->buf_size > IDPF_CTLQ_MAX_BUF_LEN)
+ return IDPF_ERR_CFG;
+
+ /* Fixme: memory allocation */
+ *cq_out = vlib_physmem_alloc_aligned_on_numa (
+ vm, sizeof (struct idpf_ctlq_info), CLIB_CACHE_LINE_BYTES, id->numa_node);
+ if (!(*cq_out))
+ return IDPF_ERR_NO_MEMORY;
+
+ if ((vlib_pci_map_dma (vm, id->pci_dev_handle, *cq_out)))
+ {
+ status = IDPF_ERR_NO_MEMORY;
+ goto init_free_q;
+ }
+
+ (*cq_out)->cq_type = qinfo->type;
+ (*cq_out)->q_id = qinfo->id;
+ (*cq_out)->buf_size = qinfo->buf_size;
+ (*cq_out)->ring_size = qinfo->len;
+
+ (*cq_out)->next_to_use = 0;
+ (*cq_out)->next_to_clean = 0;
+ (*cq_out)->next_to_post = (*cq_out)->ring_size - 1;
+
+ switch (qinfo->type)
+ {
+ case IDPF_CTLQ_TYPE_MAILBOX_RX:
+ is_rxq = true;
+ case IDPF_CTLQ_TYPE_MAILBOX_TX:
+ status = idpf_ctlq_alloc_ring_res (vm, id, *cq_out);
+ break;
+ default:
+ status = IDPF_ERR_PARAM;
+ break;
+ }
+
+ if (status)
+ goto init_free_q;
+
+ if (is_rxq)
+ {
+ idpf_ctlq_init_rxq_bufs (*cq_out);
+ }
+ else
+ {
+ /* Allocate the array of msg pointers for TX queues */
+ (*cq_out)->bi.tx_msg = (idpf_ctlq_msg_t **) clib_mem_alloc (
+ qinfo->len * sizeof (idpf_ctlq_msg_t *));
+ if (!(*cq_out)->bi.tx_msg)
+ {
+ status = IDPF_ERR_NO_MEMORY;
+ goto init_dealloc_q_mem;
+ }
+ }
+
+ idpf_ctlq_setup_regs (*cq_out, qinfo);
+
+ idpf_ctlq_init_regs (vm, id, *cq_out, is_rxq);
+
+ /* Fixeme: lock issue */
+ clib_spinlock_init (&(*cq_out)->cq_lock);
+
+ LIST_INSERT_HEAD (&id->cq_list_head, (*cq_out), cq_list);
+
+ return status;
+
+init_dealloc_q_mem:
+ /* free ring buffers and the ring itself */
+ idpf_ctlq_dealloc_ring_res (id, *cq_out);
+init_free_q:
+ clib_mem_free (*cq_out);
+
+ return status;
+}
+
+/**
+ * idpf_ctlq_remove - deallocate and remove specified control queue
+ * @hw: pointer to hardware struct
+ * @cq: pointer to control queue to be removed
+ */
+void
+idpf_ctlq_remove (idpf_device_t *id, struct idpf_ctlq_info *cq)
+{
+ LIST_REMOVE (cq, cq_list);
+ idpf_ctlq_shutdown (id, cq);
+ clib_mem_free (cq);
+}
+
+/**
+ * idpf_ctlq_init - main initialization routine for all control queues
+ * @hw: pointer to hardware struct
+ * @num_q: number of queues to initialize
+ * @q_info: array of structs containing info for each queue to be initialized
+ *
+ * This initializes any number and any type of control queues. This is an all
+ * or nothing routine; if one fails, all previously allocated queues will be
+ * destroyed. This must be called prior to using the individual add/remove
+ * APIs.
+ */
+int
+idpf_ctlq_init (vlib_main_t *vm, idpf_device_t *id, u8 num_q,
+ idpf_ctlq_create_info_t *q_info)
+{
+ struct idpf_ctlq_info *cq = NULL;
+ int ret_code = IDPF_SUCCESS;
+ int i = 0;
+
+ LIST_INIT (&id->cq_list_head);
+
+ for (i = 0; i < num_q; i++)
+ {
+ idpf_ctlq_create_info_t *qinfo = q_info + i;
+
+ ret_code = idpf_ctlq_add (vm, id, qinfo, &cq);
+ if (ret_code)
+ goto init_destroy_qs;
+ }
+
+ return ret_code;
+
+init_destroy_qs:
+ LIST_FOR_EACH_ENTRY_SAFE (cq, NULL, &id->cq_list_head, struct idpf_ctlq_info,
+ cq_list)
+ {
+ idpf_ctlq_remove (id, cq);
+ }
+
+ return ret_code;
+}
+
+/**
+ * idpf_ctlq_deinit - destroy all control queues
+ * @hw: pointer to hw struct
+ */
+void
+idpf_ctlq_deinit (idpf_device_t *id)
+{
+ struct idpf_ctlq_info *cq = NULL;
+
+ LIST_FOR_EACH_ENTRY_SAFE (cq, NULL, &id->cq_list_head, struct idpf_ctlq_info,
+ cq_list)
+ {
+ idpf_ctlq_remove (id, cq);
+ }
+
+ return;
+}
+
+/**
+ * idpf_ctlq_send - send command to Control Queue (CTQ)
+ * @id: pointer to device struct
+ * @cq: handle to control queue struct to send on
+ * @num_q_msg: number of messages to send on control queue
+ * @q_msg: pointer to array of queue messages to be sent
+ *
+ * The caller is expected to allocate DMAable buffers and pass them to the
+ * send routine via the q_msg struct / control queue specific data struct.
+ * The control queue will hold a reference to each send message until
+ * the completion for that message has been cleaned.
+ */
+int
+idpf_ctlq_send (idpf_device_t *id, struct idpf_ctlq_info *cq, u16 num_q_msg,
+ idpf_ctlq_msg_t q_msg[])
+{
+ idpf_ctlq_desc_t *desc;
+ int num_desc_avail = 0;
+ int status = IDPF_SUCCESS;
+ int i = 0;
+
+ if (!cq || !cq->ring_size)
+ return -ENOBUFS;
+
+ clib_spinlock_lock (&cq->cq_lock);
+
+ /* Ensure there are enough descriptors to send all messages */
+ num_desc_avail = IDPF_CTLQ_DESC_UNUSED (cq);
+ if (num_desc_avail == 0 || num_desc_avail < num_q_msg)
+ {
+ status = -ENOSPC;
+ goto sq_send_command_out;
+ }
+
+ for (i = 0; i < num_q_msg; i++)
+ {
+ idpf_ctlq_msg_t *msg = &q_msg[i];
+ u64 msg_cookie;
+
+ desc = IDPF_CTLQ_DESC (cq, cq->next_to_use);
+
+ /* Pay attention to CPU_TO_LE16 */
+ desc->opcode = msg->opcode;
+ desc->pfid_vfid = msg->func_id;
+
+ msg_cookie = msg->cookie.cookie;
+ desc->cookie_high = IDPF_HI_DWORD (msg_cookie);
+ desc->cookie_low = IDPF_LO_DWORD (msg_cookie);
+
+ desc->flags = (msg->host_id & IDPF_HOST_ID_MASK)
+ << IDPF_CTLQ_FLAG_HOST_ID_S;
+ if (msg->data_len)
+ {
+ idpf_dma_mem_t *buff = msg->ctx.indirect.payload;
+
+ desc->datalen |= msg->data_len;
+ desc->flags |= IDPF_CTLQ_FLAG_BUF;
+ desc->flags |= IDPF_CTLQ_FLAG_RD;
+
+ /* Update the address values in the desc with the pa
+ * value for respective buffer
+ */
+ desc->params.indirect.addr_high = IDPF_HI_DWORD (buff->pa);
+ desc->params.indirect.addr_low = IDPF_LO_DWORD (buff->pa);
+
+ clib_memcpy (&desc->params, msg->ctx.indirect.context,
+ IDPF_INDIRECT_CTX_SIZE);
+ }
+ else
+ {
+ clib_memcpy (&desc->params, msg->ctx.direct, IDPF_DIRECT_CTX_SIZE);
+ }
+
+ /* Store buffer info */
+ cq->bi.tx_msg[cq->next_to_use] = msg;
+
+ (cq->next_to_use)++;
+ if (cq->next_to_use == cq->ring_size)
+ cq->next_to_use = 0;
+ }
+
+ /* Force memory write to complete before letting hardware
+ * know that there are new descriptors to fetch.
+ */
+ CLIB_MEMORY_BARRIER ();
+
+ idpf_reg_write (id, cq->reg.tail, cq->next_to_use);
+
+sq_send_command_out:
+ clib_spinlock_unlock (&cq->cq_lock);
+
+ return status;
+}
+
+/**
+ * idpf_ctlq_clean_sq - reclaim send descriptors on HW write back for the
+ * requested queue
+ * @cq: pointer to the specific Control queue
+ * @clean_count: (input|output) number of descriptors to clean as input, and
+ * number of descriptors actually cleaned as output
+ * @msg_status: (output) pointer to msg pointer array to be populated; needs
+ * to be allocated by caller
+ *
+ * Returns an array of message pointers associated with the cleaned
+ * descriptors. The pointers are to the original ctlq_msgs sent on the cleaned
+ * descriptors. The status will be returned for each; any messages that failed
+ * to send will have a non-zero status. The caller is expected to free original
+ * ctlq_msgs and free or reuse the DMA buffers.
+ */
+int
+idpf_ctlq_clean_sq (struct idpf_ctlq_info *cq, u16 *clean_count,
+ idpf_ctlq_msg_t *msg_status[])
+{
+ idpf_ctlq_desc_t *desc;
+ u16 i = 0, num_to_clean;
+ u16 ntc, desc_err;
+ int ret = IDPF_SUCCESS;
+
+ if (!cq || !cq->ring_size)
+ return IDPF_ERR_CTLQ_EMPTY;
+
+ if (*clean_count == 0)
+ return IDPF_SUCCESS;
+ if (*clean_count > cq->ring_size)
+ return IDPF_ERR_PARAM;
+
+ /* Fixme rte func */
+ clib_spinlock_lock (&cq->cq_lock);
+
+ ntc = cq->next_to_clean;
+
+ num_to_clean = *clean_count;
+
+ for (i = 0; i < num_to_clean; i++)
+ {
+ /* Fetch next descriptor and check if marked as done */
+ desc = IDPF_CTLQ_DESC (cq, ntc);
+ if (!(desc->flags & IDPF_CTLQ_FLAG_DD))
+ break;
+
+ desc_err = desc->ret_val;
+ if (desc_err)
+ {
+ /* strip off FW internal code */
+ desc_err &= 0xff;
+ }
+
+ msg_status[i] = cq->bi.tx_msg[ntc];
+ msg_status[i]->status = desc_err;
+
+ cq->bi.tx_msg[ntc] = NULL;
+
+ /* Zero out any stale data */
+ clib_memset (desc, 0, sizeof (*desc));
+
+ ntc++;
+ if (ntc == cq->ring_size)
+ ntc = 0;
+ }
+
+ cq->next_to_clean = ntc;
+
+ clib_spinlock_unlock (&cq->cq_lock);
+
+ /* Return number of descriptors actually cleaned */
+ *clean_count = i;
+
+ return ret;
+}
+
+/**
+ * idpf_ctlq_post_rx_buffs - post buffers to descriptor ring
+ * @hw: pointer to hw struct
+ * @cq: pointer to control queue handle
+ * @buff_count: (input|output) input is number of buffers caller is trying to
+ * return; output is number of buffers that were not posted
+ * @buffs: array of pointers to dma mem structs to be given to hardware
+ *
+ * Caller uses this function to return DMA buffers to the descriptor ring after
+ * consuming them; buff_count will be the number of buffers.
+ *
+ * Note: this function needs to be called after a receive call even
+ * if there are no DMA buffers to be returned, i.e. buff_count = 0,
+ * buffs = NULL to support direct commands
+ */
+int
+idpf_ctlq_post_rx_buffs (idpf_device_t *id, struct idpf_ctlq_info *cq,
+ u16 *buff_count, idpf_dma_mem_t **buffs)
+{
+ idpf_ctlq_desc_t *desc;
+ u16 ntp = cq->next_to_post;
+ bool buffs_avail = false;
+ u16 tbp = ntp + 1;
+ int status = IDPF_SUCCESS;
+ int i = 0;
+
+ if (*buff_count > cq->ring_size)
+ return IDPF_ERR_PARAM;
+
+ if (*buff_count > 0)
+ buffs_avail = true;
+
+ clib_spinlock_lock (&cq->cq_lock);
+
+ if (tbp >= cq->ring_size)
+ tbp = 0;
+
+ if (tbp == cq->next_to_clean)
+ /* Nothing to do */
+ goto post_buffs_out;
+
+ /* Post buffers for as many as provided or up until the last one used */
+ while (ntp != cq->next_to_clean)
+ {
+ desc = IDPF_CTLQ_DESC (cq, ntp);
+
+ if (cq->bi.rx_buff[ntp])
+ goto fill_desc;
+ if (!buffs_avail)
+ {
+ /* If the caller hasn't given us any buffers or
+ * there are none left, search the ring itself
+ * for an available buffer to move to this
+ * entry starting at the next entry in the ring
+ */
+ tbp = ntp + 1;
+
+ /* Wrap ring if necessary */
+ if (tbp >= cq->ring_size)
+ tbp = 0;
+
+ while (tbp != cq->next_to_clean)
+ {
+ if (cq->bi.rx_buff[tbp])
+ {
+ cq->bi.rx_buff[ntp] = cq->bi.rx_buff[tbp];
+ cq->bi.rx_buff[tbp] = NULL;
+
+ /* Found a buffer, no need to
+ * search anymore
+ */
+ break;
+ }
+
+ /* Wrap ring if necessary */
+ tbp++;
+ if (tbp >= cq->ring_size)
+ tbp = 0;
+ }
+
+ if (tbp == cq->next_to_clean)
+ goto post_buffs_out;
+ }
+ else
+ {
+ /* Give back pointer to DMA buffer */
+ cq->bi.rx_buff[ntp] = buffs[i];
+ i++;
+
+ if (i >= *buff_count)
+ buffs_avail = false;
+ }
+
+ fill_desc:
+ desc->flags = IDPF_CTLQ_FLAG_BUF | IDPF_CTLQ_FLAG_RD;
+
+ /* Post buffers to descriptor */
+ desc->datalen = cq->bi.rx_buff[ntp]->size;
+ desc->params.indirect.addr_high =
+ IDPF_HI_DWORD (cq->bi.rx_buff[ntp]->pa);
+ desc->params.indirect.addr_low = IDPF_LO_DWORD (cq->bi.rx_buff[ntp]->pa);
+
+ ntp++;
+ if (ntp == cq->ring_size)
+ ntp = 0;
+ }
+
+post_buffs_out:
+ /* Only update tail if buffers were actually posted */
+ if (cq->next_to_post != ntp)
+ {
+ if (ntp)
+ /* Update next_to_post to ntp - 1 since current ntp
+ * will not have a buffer
+ */
+ cq->next_to_post = ntp - 1;
+ else
+ /* Wrap to end of end ring since current ntp is 0 */
+ cq->next_to_post = cq->ring_size - 1;
+
+ idpf_reg_write (id, cq->reg.tail, cq->next_to_post);
+ }
+
+ clib_spinlock_unlock (&cq->cq_lock);
+
+ /* return the number of buffers that were not posted */
+ *buff_count = *buff_count - i;
+
+ return status;
+}
+
+/**
+ * idpf_ctlq_recv - receive control queue message call back
+ * @cq: pointer to control queue handle to receive on
+ * @num_q_msg: (input|output) input number of messages that should be received;
+ * output number of messages actually received
+ * @q_msg: (output) array of received control queue messages on this q;
+ * needs to be pre-allocated by caller for as many messages as requested
+ *
+ * Called by interrupt handler or polling mechanism. Caller is expected
+ * to free buffers
+ */
+int
+idpf_ctlq_recv (struct idpf_ctlq_info *cq, u16 *num_q_msg,
+ idpf_ctlq_msg_t *q_msg)
+{
+ u16 num_to_clean, ntc, ret_val, flags;
+ idpf_ctlq_desc_t *desc;
+ int ret_code = 0;
+ u16 i = 0;
+
+ if (!cq || !cq->ring_size)
+ return -ENOBUFS;
+
+ if (*num_q_msg == 0)
+ return 0;
+ else if (*num_q_msg > cq->ring_size)
+ return -EINVAL;
+
+ /* Fixme: take the lock before we start messing with the ring */
+ clib_spinlock_lock (&cq->cq_lock);
+
+ ntc = cq->next_to_clean;
+
+ num_to_clean = *num_q_msg;
+
+ for (i = 0; i < num_to_clean; i++)
+ {
+ u64 msg_cookie;
+
+ /* Fetch next descriptor and check if marked as done */
+ desc = IDPF_CTLQ_DESC (cq, ntc);
+ flags = desc->flags;
+
+ if (!(flags & IDPF_CTLQ_FLAG_DD))
+ break;
+
+ ret_val = desc->ret_val;
+
+ q_msg[i].vmvf_type =
+ (flags & (IDPF_CTLQ_FLAG_FTYPE_VM | IDPF_CTLQ_FLAG_FTYPE_PF)) >>
+ IDPF_CTLQ_FLAG_FTYPE_S;
+
+ if (flags & IDPF_CTLQ_FLAG_ERR)
+ ret_code = IDPF_ERR_CTLQ_ERROR;
+
+ msg_cookie = (u64) desc->cookie_high << 32;
+ msg_cookie |= (u64) desc->cookie_low;
+ clib_memcpy_fast (&q_msg[i].cookie, &msg_cookie, sizeof (u64));
+
+ q_msg[i].opcode = desc->opcode;
+ q_msg[i].data_len = desc->datalen;
+ q_msg[i].status = ret_val;
+
+ if (desc->datalen)
+ {
+ clib_memcpy_fast (q_msg[i].ctx.indirect.context,
+ &desc->params.indirect, IDPF_INDIRECT_CTX_SIZE);
+
+ /* Assign pointer to dma buffer to ctlq_msg array
+ * to be given to upper layer
+ */
+ q_msg[i].ctx.indirect.payload = cq->bi.rx_buff[ntc];
+
+ /* Zero out pointer to DMA buffer info;
+ * will be repopulated by post buffers API
+ */
+ cq->bi.rx_buff[ntc] = NULL;
+ }
+ else
+ {
+ clib_memcpy_fast (q_msg[i].ctx.direct, desc->params.raw,
+ IDPF_DIRECT_CTX_SIZE);
+ }
+
+ /* Zero out stale data in descriptor */
+ clib_memset (desc, 0, sizeof (idpf_ctlq_desc_t));
+
+ ntc++;
+ if (ntc == cq->ring_size)
+ ntc = 0;
+ };
+
+ cq->next_to_clean = ntc;
+
+ /* Fixme */
+ clib_spinlock_unlock (&cq->cq_lock);
+
+ *num_q_msg = i;
+ if (*num_q_msg == 0)
+ ret_code = -ENOMSG;
+
+ return ret_code;
+}
diff --git a/src/plugins/idpf/idpf_test.c b/src/plugins/idpf/idpf_test.c
new file mode 100644
index 00000000000..85b12966681
--- /dev/null
+++ b/src/plugins/idpf/idpf_test.c
@@ -0,0 +1,169 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/pci/pci.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#include <vppinfra/error.h>
+#include <idpf/idpf.h>
+
+#define __plugin_msg_base idpf_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/* declare message IDs */
+#include <idpf/idpf.api_enum.h>
+#include <idpf/idpf.api_types.h>
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} idpf_test_main_t;
+
+idpf_test_main_t idpf_test_main;
+
+/* idpf create API */
+static int
+api_idpf_create (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_idpf_create_t *mp;
+ idpf_create_if_args_t args;
+ uint32_t tmp;
+ int ret;
+ u32 x[4];
+
+ clib_memset (&args, 0, sizeof (idpf_create_if_args_t));
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%x:%x:%x.%x", &x[0], &x[1], &x[2], &x[3]))
+ {
+ args.addr.domain = x[0];
+ args.addr.bus = x[1];
+ args.addr.slot = x[2];
+ args.addr.function = x[3];
+ }
+ else if (unformat (i, "rx-single %u", &tmp))
+ args.rxq_single = 1;
+ else if (unformat (i, "tx-single %u", &tmp))
+ args.txq_single = 1;
+ else if (unformat (i, "rxq-size %u", &tmp))
+ args.rxq_size = tmp;
+ else if (unformat (i, "txq-size %u", &tmp))
+ args.txq_size = tmp;
+ else if (unformat (i, "rxq-num %u", &tmp))
+ args.rxq_num = tmp;
+ else if (unformat (i, "txq-num %u", &tmp))
+ args.txq_num = tmp;
+ else if (unformat (i, "vport-num %u", &tmp))
+ args.req_vport_nb = tmp;
+ else
+ {
+ clib_warning ("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (IDPF_CREATE, mp);
+
+ mp->pci_addr = clib_host_to_net_u32 (args.addr.as_u32);
+ mp->rxq_single = clib_host_to_net_u16 (args.rxq_single);
+ mp->txq_single = clib_host_to_net_u16 (args.txq_single);
+ mp->rxq_num = clib_host_to_net_u16 (args.rxq_num);
+ mp->txq_num = clib_host_to_net_u16 (args.txq_num);
+ mp->rxq_size = clib_host_to_net_u16 (args.rxq_size);
+ mp->txq_size = clib_host_to_net_u16 (args.txq_size);
+ mp->req_vport_nb = clib_host_to_net_u16 (args.req_vport_nb);
+
+ S (mp);
+ W (ret);
+
+ return ret;
+}
+
+/* idpf-create reply handler */
+static void
+vl_api_idpf_create_reply_t_handler (vl_api_idpf_create_reply_t *mp)
+{
+ vat_main_t *vam = idpf_test_main.vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (retval == 0)
+ {
+ fformat (vam->ofp, "created idpf with sw_if_index %d\n",
+ ntohl (mp->sw_if_index));
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+ vam->regenerate_interface_table = 1;
+}
+
+/* idpf delete API */
+static int
+api_idpf_delete (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_idpf_delete_t *mp;
+ u32 sw_if_index = 0;
+ u8 index_defined = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %u", &sw_if_index))
+ index_defined = 1;
+ else
+ {
+ clib_warning ("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!index_defined)
+ {
+ errmsg ("missing sw_if_index\n");
+ return -99;
+ }
+
+ M (IDPF_DELETE, mp);
+
+ mp->sw_if_index = clib_host_to_net_u32 (sw_if_index);
+
+ S (mp);
+ W (ret);
+
+ return ret;
+}
+
+#include <idpf/idpf.api_test.c>
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/idpf/plugin.c b/src/plugins/idpf/plugin.c
new file mode 100644
index 00000000000..745ba43f606
--- /dev/null
+++ b/src/plugins/idpf/plugin.c
@@ -0,0 +1,35 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description =
+ "Intel Infrastructure Data Path Function (IDPF) Device Driver",
+ .default_disabled = 1,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/idpf/virtchnl2.h b/src/plugins/idpf/virtchnl2.h
new file mode 100644
index 00000000000..8db68483f22
--- /dev/null
+++ b/src/plugins/idpf/virtchnl2.h
@@ -0,0 +1,855 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef _IDPF_VIRTCHNL_H_
+#define _IDPF_VIRTCHNL_H_
+
+#include <idpf/virtchnl2_lan_desc.h>
+
+#define foreach_virtchnl2_status \
+ _ (0, SUCCESS) \
+ _ (-5, ERR_PARAM) \
+ _ (-38, ERR_OPCODE_MISMATCH)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_STATUS_##n = v,
+ foreach_virtchnl2_status
+#undef _
+} virtchnl2_status_t;
+
+#define foreach_virtchnl2_op \
+ _ (0, UNKNOWN) \
+ _ (1, VERSION) \
+ _ (500, GET_CAPS) \
+ _ (501, CREATE_VPORT) \
+ _ (502, DESTROY_VPORT) \
+ _ (503, ENABLE_VPORT) \
+ _ (504, DISABLE_VPORT) \
+ _ (505, CONFIG_TX_QUEUES) \
+ _ (506, CONFIG_RX_QUEUES) \
+ _ (507, ENABLE_QUEUES) \
+ _ (508, DISABLE_QUEUES) \
+ _ (509, ADD_QUEUES) \
+ _ (510, DEL_QUEUES) \
+ _ (511, MAP_QUEUE_VECTOR) \
+ _ (512, UNMAP_QUEUE_VECTOR) \
+ _ (513, GET_RSS_KEY) \
+ _ (514, SET_RSS_KEY) \
+ _ (515, GET_RSS_LUT) \
+ _ (516, SET_RSS_LUT) \
+ _ (517, GET_RSS_HASH) \
+ _ (518, SET_RSS_HASH) \
+ _ (519, SET_SRIOV_VFS) \
+ _ (520, ALLOC_VECTORS) \
+ _ (521, DEALLOC_VECTORS) \
+ _ (522, EVENT) \
+ _ (523, GET_STATS) \
+ _ (524, RESET_VF) \
+ _ (526, GET_PTYPE_INFO) \
+ _ (532, CREATE_ADI) \
+ _ (533, DESTROY_ADI) \
+ _ (534, LOOPBACK) \
+ _ (535, ADD_MAC_ADDR) \
+ _ (536, DEL_MAC_ADDR) \
+ _ (537, CONFIG_PROMISCUOUS_MODE)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_OP_##n = v,
+ foreach_virtchnl2_op
+#undef _
+} virtchnl2_op_t;
+
+/* VIRTCHNL2_VPORT_TYPE
+ * Type of virtual port
+ */
+#define foreach_virtchnl2_vport_type \
+ _ (0, DEFAULT) \
+ _ (1, SRIOV) \
+ _ (2, SIOV) \
+ _ (3, SUBDEV) \
+ _ (4, MNG)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_VPORT_TYPE_##n = v,
+ foreach_virtchnl2_vport_type
+#undef _
+} virtchnl2_vport_type_t;
+
+/* VIRTCHNL2_QUEUE_MODEL
+ * Type of queue model
+ */
+#define VIRTCHNL2_QUEUE_MODEL_SINGLE 0
+#define VIRTCHNL2_QUEUE_MODEL_SPLIT 1
+
+#define foreach_idpf_checksum_cap_flag \
+ _ (0, TX_CSUM_L3_IPV4, "tx-csum-l3-ipv4") \
+ _ (1, TX_CSUM_L4_IPV4_TCP, "tx-csum-l4-ipv4-tcp") \
+ _ (2, TX_CSUM_L4_IPV4_UDP, "tx-csum-l4-ipv4-udp") \
+ _ (3, TX_CSUM_L4_IPV4_SCTP, "tx-csum-l4-ipv4-sctp") \
+ _ (4, TX_CSUM_L4_IPV6_TCP, "tx-csum-l4-ipv6-tcp") \
+ _ (5, TX_CSUM_L4_IPV6_UDP, "tx-csum-l4-ipv6-udp") \
+ _ (6, TX_CSUM_L4_IPV6_SCTP, "tx-csum-l4-ipv6-sctp") \
+ _ (7, TX_CSUM_GENERIC, "tx-csum-generic") \
+ _ (8, RX_CSUM_L3_IPV4, "rx-csum-l3-ipv4") \
+ _ (9, RX_CSUM_L4_IPV4_TCP, "rx-csum-l4-ipv4-tcp") \
+ _ (10, RX_CSUM_L4_IPV4_UDP, "rx-csum-l4-ipv4-udp") \
+ _ (11, RX_CSUM_L4_IPV4_SCTP, "rx-csum-l4-ipv4-sctp") \
+ _ (12, RX_CSUM_L4_IPV6_TCP, "rx-csum-l4-ipv6-tcp") \
+ _ (13, RX_CSUM_L4_IPV6_UDP, "rx-csum-l4-ipv6-udp") \
+ _ (14, RX_CSUM_L4_IPV6_SCTP, "rx-csum-l4-ipv6-sctp") \
+ _ (15, RX_CSUM_GENERIC, "rx-csum-generic") \
+ _ (16, TX_CSUM_L3_SINGLE_TUNNEL, "tx-csum-l3-single-tunnel") \
+ _ (17, TX_CSUM_L3_DOUBLE_TUNNEL, "tx-csum-l3-double-tunnel") \
+ _ (18, RX_CSUM_L3_SINGLE_TUNNEL, "rx-csum-l3-single-tunnel") \
+ _ (19, RX_CSUM_L3_DOUBLE_TUNNEL, "rx-csum-l3-double-tunnel") \
+ _ (20, TX_CSUM_L4_SINGLE_TUNNEL, "tx-csum-l4-single-tunnel") \
+ _ (21, TX_CSUM_L4_DOUBLE_TUNNEL, "tx-csum-l4-double-tunnel") \
+ _ (22, RX_CSUM_L4_SINGLE_TUNNEL, "rx-csum-l4-single-tunnel") \
+ _ (23, RX_CSUM_L4_DOUBLE_TUNNEL, "rx-csum-l4-double-tunnel")
+
+typedef enum
+{
+#define _(a, b, c) VIRTCHNL2_CAP_##b = (1 << a),
+ foreach_idpf_checksum_cap_flag
+#undef _
+} idpf_checksum_cap_flag_t;
+
+#define foreach_idpf_seg_cap_flag \
+ _ (0, IPV4_TCP, "ipv4-tcp") \
+ _ (1, IPV4_UDP, "ipv4-udp") \
+ _ (2, IPV4_SCTP, "ipv4-sctp") \
+ _ (3, IPV6_TCP, "ipv6-tcp") \
+ _ (4, IPV6_UDP, "ipv6-udp") \
+ _ (5, IPV6_SCTP, "ipv6-sctp") \
+ _ (6, GENERIC, "generic") \
+ _ (7, TX_SINGLE_TUNNEL, "tx-single-tunnel") \
+ _ (8, TX_DOUBLE_TUNNEL, "tx-double-tunnel")
+
+typedef enum
+{
+#define _(a, b, c) VIRTCHNL2_CAP_SEG_##b = (1 << a),
+ foreach_idpf_seg_cap_flag
+#undef _
+} idpf_seg_cap_flag_t;
+
+#define foreach_idpf_rss_cap_flag \
+ _ (0, IPV4_TCP, "ipv4-tcp") \
+ _ (1, IPV4_UDP, "ipv4-udp") \
+ _ (2, IPV4_SCTP, "ipv4-sctp") \
+ _ (3, IPV4_OTHER, "ipv4-other") \
+ _ (4, IPV6_TCP, "ipv6-tcp") \
+ _ (5, IPV6_UDP, "ipv6-udp") \
+ _ (6, IPV6_SCTP, "ipv6-sctp") \
+ _ (7, IPV6_OTHER, "ipv6-other") \
+ _ (8, IPV4_AH, "ipv4-ah") \
+ _ (9, IPV4_ESP, "ipv4-esp") \
+ _ (10, IPV4_AH_ESP, "ipv4-ah-esp") \
+ _ (11, IPV6_AH, "ipv6-ah") \
+ _ (12, IPV6_ESP, "ipv6-esp") \
+ _ (13, IPV6_AH_ESP, "ipv6-ah-esp")
+
+typedef enum
+{
+#define _(a, b, c) VIRTCHNL2_CAP_RSS_##b = (1 << a),
+ foreach_idpf_rss_cap_flag
+#undef _
+} idpf_rss_cap_flag_t;
+
+#define foreach_idpf_hsplit_cap_flag \
+ _ (0, AT_L2, "at-l2") \
+ _ (1, AT_L3, "at-l3") \
+ _ (2, AT_L4V4, "at-l4v4") \
+ _ (3, AT_L4V6, "at-l4v6")
+
+typedef enum
+{
+#define _(a, b, c) VIRTCHNL2_CAP_RX_HSPLIT_##b = (1 << a),
+ foreach_idpf_hsplit_cap_flag
+#undef _
+} idpf_hsplit_cap_flag_t;
+
+#define foreach_idpf_rsc_cap_flag \
+ _ (0, IPV4_TCP, "ipv4-tcp") \
+ _ (1, IPV4_SCTP, "ipv4-sctp") \
+ _ (2, IPV6_TCP, "ipv6-tcp") \
+ _ (3, IPV6_SCTP, "ipv6-sctp")
+
+typedef enum
+{
+#define _(a, b, c) VIRTCHNL2_CAP_RSC_##b = (1 << a),
+ foreach_idpf_rsc_cap_flag
+#undef _
+} idpf_rsc_cap_flag_t;
+
+#define foreach_idpf_other_cap_flag \
+ _ (0, RDMA, "rdma") \
+ _ (1, SRIOV, "sriov") \
+ _ (2, MACFILTER, "macfilter") \
+ _ (3, FLOW_DIRECTOR, "flow-director") \
+ _ (4, SPLITQ_QSCHED, "spliteq-qsched") \
+ _ (5, CRC, "crc") \
+ _ (6, ADQ, "adq") \
+ _ (7, WB_ON_ITR, "wb-on-itr") \
+ _ (8, PROMISC, "promisc") \
+ _ (9, LINK_SPEED, "link-speed") \
+ _ (10, INLINE_IPSEC, "inline-ipsec") \
+ _ (11, LARGE_NUM_QUEUES, "large-num-queues") \
+ _ (12, VLAN, "vlan") \
+ _ (13, PTP, "ptp") \
+ _ (15, ADV_RSS, "adv-rss") \
+ _ (16, FDIR, "fdir") \
+ _ (17, RX_FLEX_DESC, "rx-flex-desc") \
+ _ (18, PTYPE, "ptype") \
+ _ (19, LOOPBACK, "loopback") \
+ _ (20, OEM, "oem")
+
+typedef enum
+{
+#define _(a, b, c) VIRTCHNL2_CAP_##b = (1 << a),
+ foreach_idpf_other_cap_flag
+#undef _
+} idpf_other_cap_flag_t;
+
+#define VIRTCHNL2_TXQ_SCHED_MODE_QUEUE 0
+#define VIRTCHNL2_TXQ_SCHED_MODE_FLOW 1
+
+#define VIRTCHNL2_TXQ_ENABLE_MISS_COMPL BIT (0)
+
+#define VIRTCHNL2_RDMA_CPF 0
+#define VIRTCHNL2_NVME_CPF 1
+#define VIRTCHNL2_ATE_CPF 2
+#define VIRTCHNL2_LCE_CPF 3
+
+#define VIRTCHNL2_RXQ_RSC BIT (0)
+#define VIRTCHNL2_RXQ_HDR_SPLIT BIT (1)
+#define VIRTCHNL2_RXQ_IMMEDIATE_WRITE_BACK BIT (2)
+#define VIRTCHNL2_RX_DESC_SIZE_16BYTE BIT (3)
+#define VIRTCHNL2_RX_DESC_SIZE_32BYTE BIT (4)
+
+#define foreach_virtchnl2_rss_alg \
+ _ (0, TOEPLITZ_ASYMMETRIC) \
+ _ (1, R_ASYMMETRIC) \
+ _ (2, TOEPLITZ_SYMMETRIC) \
+ _ (3, XOR_SYMMETRIC)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RSS_ALG_##n = v,
+ foreach_virtchnl2_rss_alg
+#undef _
+} virtchnl2_rss_alg_t;
+
+#define foreach_virtchnl2_event \
+ _ (0, UNKNOWN) \
+ _ (1, LINK_CHANGE) \
+ _ (2, START_RESET_ADI) \
+ _ (3, FINISH_RESET_ADI)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_EVENT_##n = v,
+ foreach_virtchnl2_event
+#undef _
+} virtchnl2_event_name_t;
+
+#define foreach_idpf_queue_type \
+ _ (0, TX) \
+ _ (1, RX) \
+ _ (2, TX_COMPLETION) \
+ _ (3, RX_BUFFER) \
+ _ (4, CONFIG_TX) \
+ _ (5, CONFIG_RX) \
+ _ (6, P2P_TX) \
+ _ (7, P2P_RX) \
+ _ (8, P2P_TX_COMPLETION) \
+ _ (9, P2P_RX_BUFFER) \
+ _ (10, MBX_TX) \
+ _ (11, MBX_RX)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_QUEUE_TYPE_##n = v,
+ foreach_idpf_queue_type
+#undef _
+} idpf_queue_type_t;
+
+#define foreach_virtchnl2_itr_idx \
+ _ (0, 0) \
+ _ (1, 1) \
+ _ (2, 2) \
+ _ (3, NO_ITR)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_ITR_IDX_##n = v,
+ foreach_virtchnl2_itr_idx
+#undef _
+} virtchnl2_itr_idx_t;
+
+#define VIRTCHNL2_MAC_ADDR_PRIMARY 1
+#define VIRTCHNL2_MAC_ADDR_EXTRA 2
+
+#define VIRTCHNL2_UNICAST_PROMISC BIT (0)
+#define VIRTCHNL2_MULTICAST_PROMISC BIT (1)
+
+#define foreach_virtchnl2_proto_hdr \
+ _ (0, ANY) \
+ _ (1, PRE_MAC) \
+ _ (2, MAC) \
+ _ (3, POST_MAC) \
+ _ (4, ETHERTYPE) \
+ _ (5, VLAN) \
+ _ (6, SVLAN) \
+ _ (7, CVLAN) \
+ _ (8, MPLS) \
+ _ (9, UMPLS) \
+ _ (10, MMPLS) \
+ _ (11, PTP) \
+ _ (12, CTRL) \
+ _ (13, LLDP) \
+ _ (14, ARP) \
+ _ (15, ECP) \
+ _ (16, EAPOL) \
+ _ (17, PPPOD) \
+ _ (18, PPPOE) \
+ _ (19, IPV4) \
+ _ (20, IPV4_FRAG) \
+ _ (21, IPV6) \
+ _ (22, IPV6_FRAG) \
+ _ (23, IPV6_EH) \
+ _ (24, UDP) \
+ _ (25, TCP) \
+ _ (26, SCTP) \
+ _ (27, ICMP) \
+ _ (28, ICMPV6) \
+ _ (29, IGMP) \
+ _ (30, AH) \
+ _ (31, ESP) \
+ _ (32, IKE) \
+ _ (33, NATT_KEEP) \
+ _ (34, PAY) \
+ _ (35, L2TPV2) \
+ _ (36, L2TPV2_CONTROL) \
+ _ (37, L2TPV3) \
+ _ (38, GTP) \
+ _ (39, GTP_EH) \
+ _ (40, GTPCV2) \
+ _ (41, GTPC_TEID) \
+ _ (42, GTPU) \
+ _ (43, GTPU_UL) \
+ _ (44, GTPU_DL) \
+ _ (45, ECPRI) \
+ _ (46, VRRP) \
+ _ (47, OSPF) \
+ _ (48, TUN) \
+ _ (49, GRE) \
+ _ (50, NVGRE) \
+ _ (51, VXLAN) \
+ _ (52, VXLAN_GPE) \
+ _ (53, GENEVE) \
+ _ (54, NSH) \
+ _ (55, QUIC) \
+ _ (56, PFCP) \
+ _ (57, PFCP_NODE) \
+ _ (58, PFCP_SESSION) \
+ _ (59, RTP) \
+ _ (60, ROCE) \
+ _ (61, ROCEV1) \
+ _ (62, ROCEV2) \
+ _ (65535, NO_PROTO)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_PROTO_HDR_##n = v,
+ foreach_virtchnl2_proto_hdr
+#undef _
+} virtchnl2_proto_hdr_t;
+
+#define VIRTCHNL2_VERSION_MAJOR_2 2
+#define VIRTCHNL2_VERSION_MINOR_0 0
+
+typedef struct
+{
+ u32 major;
+ u32 minor;
+} virtchnl2_version_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_version_info_t, 8);
+
+typedef struct
+{
+ u32 csum_caps;
+ u32 seg_caps;
+ u32 hsplit_caps;
+ u32 rsc_caps;
+ u64 rss_caps;
+ u64 other_caps;
+
+ u32 mailbox_dyn_ctl;
+ u16 mailbox_vector_id;
+ u16 num_allocated_vectors;
+
+ u16 max_rx_q;
+ u16 max_tx_q;
+ u16 max_rx_bufq;
+ u16 max_tx_complq;
+
+ u16 max_sriov_vfs;
+
+ u16 max_vports;
+ u16 default_num_vports;
+
+ u16 max_tx_hdr_size;
+
+ u8 max_sg_bufs_per_tx_pkt;
+
+ u8 itr_idx_map;
+
+ u16 pad1;
+
+ u16 oem_cp_ver_major;
+ u16 oem_cp_ver_minor;
+ u32 device_type;
+
+ u8 reserved[12];
+} virtchnl2_get_capabilities_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_get_capabilities_t, 80);
+
+typedef struct
+{
+ /* see VIRTCHNL2_QUEUE_TYPE definitions */
+ u32 type;
+ u32 start_queue_id;
+ u32 num_queues;
+ u32 pad;
+
+ /* Queue tail register offset and spacing provided by CP */
+ u64 qtail_reg_start;
+ u32 qtail_reg_spacing;
+
+ u8 reserved[4];
+} virtchnl2_queue_reg_chunk_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_queue_reg_chunk_t, 32);
+
+/* structure to specify several chunks of contiguous queues */
+typedef struct
+{
+ u16 num_chunks;
+ u8 reserved[6];
+ virtchnl2_queue_reg_chunk_t chunks[1];
+} virtchnl2_queue_reg_chunks_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_queue_reg_chunks_t, 40);
+
+#define VIRTCHNL2_ETH_LENGTH_OF_ADDRESS 6
+
+typedef struct
+{
+ u16 vport_type;
+ u16 txq_model;
+ u16 rxq_model;
+ u16 num_tx_q;
+ u16 num_tx_complq;
+ u16 num_rx_q;
+ u16 num_rx_bufq;
+ u16 default_rx_q;
+ u16 vport_index;
+
+ u16 max_mtu;
+ u32 vport_id;
+ u8 default_mac_addr[VIRTCHNL2_ETH_LENGTH_OF_ADDRESS];
+ u16 pad;
+ u64 rx_desc_ids;
+ u64 tx_desc_ids;
+
+#define MAX_Q_REGIONS 16
+ u32 max_qs_per_qregion[MAX_Q_REGIONS];
+ u32 qregion_total_qs;
+ u16 qregion_type;
+ u16 pad2;
+
+ u32 rss_algorithm;
+ u16 rss_key_size;
+ u16 rss_lut_size;
+
+ u32 rx_split_pos;
+
+ u8 reserved[20];
+ virtchnl2_queue_reg_chunks_t chunks;
+} virtchnl2_create_vport_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_create_vport_t, 192);
+
+typedef struct
+{
+ u32 vport_id;
+ u8 reserved[4];
+} virtchnl2_vport_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_vport_t, 8);
+
+typedef struct
+{
+ u64 dma_ring_addr;
+ u32 type;
+ u32 queue_id;
+ u16 relative_queue_id;
+ u16 model;
+ u16 sched_mode;
+ u16 qflags;
+ u16 ring_len;
+
+ u16 tx_compl_queue_id;
+ u16 peer_type;
+ u16 peer_rx_queue_id;
+
+ u16 qregion_id;
+ u8 pad[2];
+
+ u32 egress_pasid;
+ u32 egress_hdr_pasid;
+ u32 egress_buf_pasid;
+
+ u8 reserved[8];
+} virtchnl2_txq_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_txq_info_t, 56);
+
+typedef struct
+{
+ u32 vport_id;
+ u16 num_qinfo;
+
+ u8 reserved[10];
+ virtchnl2_txq_info_t qinfo[1];
+} virtchnl2_config_tx_queues_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_config_tx_queues_t, 72);
+
+/* Receive queue config info */
+typedef struct
+{
+ u64 desc_ids;
+ u64 dma_ring_addr;
+
+ u32 type;
+ u32 queue_id;
+
+ u16 model;
+
+ u16 hdr_buffer_size;
+ u32 data_buffer_size;
+ u32 max_pkt_size;
+
+ u16 ring_len;
+ u8 buffer_notif_stride;
+ u8 pad[1];
+
+ u64 dma_head_wb_addr;
+
+ u16 qflags;
+
+ u16 rx_buffer_low_watermark;
+
+ u16 rx_bufq1_id;
+ u16 rx_bufq2_id;
+ u8 bufq2_ena;
+ u8 pad2;
+
+ u16 qregion_id;
+
+ u32 ingress_pasid;
+ u32 ingress_hdr_pasid;
+ u32 ingress_buf_pasid;
+
+ u8 reserved[16];
+} virtchnl2_rxq_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_rxq_info_t, 88);
+
+typedef struct
+{
+ u32 vport_id;
+ u16 num_qinfo;
+
+ u8 reserved[18];
+ virtchnl2_rxq_info_t qinfo[1];
+} virtchnl2_config_rx_queues_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_config_rx_queues_t, 112);
+
+typedef struct
+{
+ u32 vport_id;
+ u16 num_tx_q;
+ u16 num_tx_complq;
+ u16 num_rx_q;
+ u16 num_rx_bufq;
+ u8 reserved[4];
+ virtchnl2_queue_reg_chunks_t chunks;
+} virtchnl2_add_queues_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_add_queues_t, 56);
+
+typedef struct
+{
+ u16 start_vector_id;
+ u16 start_evv_id;
+ u16 num_vectors;
+ u16 pad1;
+
+ u32 dynctl_reg_start;
+ u32 dynctl_reg_spacing;
+
+ u32 itrn_reg_start;
+ u32 itrn_reg_spacing;
+ u8 reserved[8];
+} virtchnl2_vector_chunk_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_vector_chunk_t, 32);
+
+typedef struct
+{
+ u16 num_vchunks;
+ u8 reserved[14];
+ virtchnl2_vector_chunk_t vchunks[1];
+} virtchnl2_vector_chunks_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_vector_chunks_t, 48);
+
+typedef struct
+{
+ u16 num_vectors;
+ u8 reserved[14];
+ virtchnl2_vector_chunks_t vchunks;
+} virtchnl2_alloc_vectors_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_alloc_vectors_t, 64);
+
+typedef struct
+{
+ u32 vport_id;
+ u16 lut_entries_start;
+ u16 lut_entries;
+ u8 reserved[4];
+ u32 lut[1]; /* RSS lookup table */
+} virtchnl2_rss_lut_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_rss_lut_t, 16);
+
+typedef struct
+{
+ /* Packet Type Groups bitmap */
+ u64 ptype_groups;
+ u32 vport_id;
+ u8 reserved[4];
+} virtchnl2_rss_hash_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_rss_hash_t, 16);
+
+typedef struct
+{
+ u16 num_vfs;
+ u16 pad;
+} virtchnl2_sriov_vfs_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_sriov_vfs_info_t, 4);
+
+typedef struct
+{
+ u32 pasid;
+ u16 mbx_id;
+ u16 mbx_vec_id;
+ u16 adi_id;
+ u8 reserved[64];
+ u8 pad[6];
+ virtchnl2_queue_reg_chunks_t chunks;
+ virtchnl2_vector_chunks_t vchunks;
+} virtchnl2_create_adi_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_create_adi_t, 168);
+
+typedef struct
+{
+ u16 adi_id;
+ u8 reserved[2];
+} virtchnl2_destroy_adi_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_destroy_adi_t, 4);
+
+typedef struct
+{
+ u16 ptype_id_10;
+ u8 ptype_id_8;
+ u8 proto_id_count;
+ u16 pad;
+ u16 proto_id[1];
+} virtchnl2_ptype_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_ptype_t, 8);
+
+typedef struct
+{
+ u16 start_ptype_id;
+ u16 num_ptypes;
+ u32 pad;
+ virtchnl2_ptype_t ptype[1];
+} virtchnl2_get_ptype_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_get_ptype_info_t, 16);
+
+typedef struct
+{
+ u32 vport_id;
+ u8 pad[4];
+
+ u64 rx_bytes;
+ u64 rx_unicast;
+ u64 rx_multicast;
+ u64 rx_broadcast;
+ u64 rx_discards;
+ u64 rx_errors;
+ u64 rx_unknown_protocol;
+ u64 tx_bytes;
+ u64 tx_unicast;
+ u64 tx_multicast;
+ u64 tx_broadcast;
+ u64 tx_discards;
+ u64 tx_errors;
+ u64 rx_invalid_frame_length;
+ u64 rx_overflow_drop;
+} virtchnl2_vport_stats_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_vport_stats_t, 128);
+
+typedef struct
+{
+ u32 event;
+ u32 link_speed;
+ u32 vport_id;
+ u8 link_status;
+ u8 pad[1];
+ u16 adi_id;
+} virtchnl2_event_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_event_t, 16);
+
+typedef struct
+{
+ u32 vport_id;
+ u16 key_len;
+ u8 pad;
+ u8 key[1];
+} virtchnl2_rss_key_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_rss_key_t, 8);
+
+typedef struct
+{
+ u32 type;
+ u32 start_queue_id;
+ u32 num_queues;
+ u8 reserved[4];
+} virtchnl2_queue_chunk_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_queue_chunk_t, 16);
+
+typedef struct
+{
+ u16 num_chunks;
+ u8 reserved[6];
+ virtchnl2_queue_chunk_t chunks[1];
+} virtchnl2_queue_chunks_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_queue_chunks_t, 24);
+
+typedef struct
+{
+ u32 vport_id;
+ u8 reserved[4];
+ virtchnl2_queue_chunks_t chunks;
+} virtchnl2_del_ena_dis_queues_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_del_ena_dis_queues_t, 32);
+
+typedef struct
+{
+ u32 queue_id;
+ u16 vector_id;
+ u8 pad[2];
+
+ u32 itr_idx;
+
+ u32 queue_type;
+ u8 reserved[8];
+} virtchnl2_queue_vector_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_queue_vector_t, 24);
+
+typedef struct
+{
+ u32 vport_id;
+ u16 num_qv_maps;
+ u8 pad[10];
+ virtchnl2_queue_vector_t qv_maps[1];
+} virtchnl2_queue_vector_maps_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_queue_vector_maps_t, 40);
+
+typedef struct
+{
+ u32 vport_id;
+ u8 enable;
+ u8 pad[3];
+} virtchnl2_loopback_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_loopback_t, 8);
+
+typedef struct
+{
+ u8 addr[VIRTCHNL2_ETH_LENGTH_OF_ADDRESS];
+ u8 type;
+ u8 pad;
+} virtchnl2_mac_addr_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_mac_addr_t, 8);
+
+typedef struct
+{
+ u32 vport_id;
+ u16 num_mac_addr;
+ u8 pad[2];
+ virtchnl2_mac_addr_t mac_addr_list[1];
+} virtchnl2_mac_addr_list_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_mac_addr_list_t, 16);
+
+typedef struct
+{
+ u32 vport_id;
+ u16 flags;
+ u8 pad[2];
+} virtchnl2_promisc_info_t;
+
+STATIC_ASSERT_SIZEOF (virtchnl2_promisc_info_t, 8);
+
+#endif /* _IDPF_VIRTCHNL_H_ */
diff --git a/src/plugins/idpf/virtchnl2_lan_desc.h b/src/plugins/idpf/virtchnl2_lan_desc.h
new file mode 100644
index 00000000000..31eff81fd81
--- /dev/null
+++ b/src/plugins/idpf/virtchnl2_lan_desc.h
@@ -0,0 +1,610 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef _IDPF_VIRTCHNL_LAN_DESC_H_
+#define _IDPF_VIRTCHNL_LAN_DESC_H_
+
+/* VIRTCHNL2_TX_DESC_IDS
+ * Transmit descriptor ID flags
+ */
+#define foreach_idpf_txdid \
+ _ (0, DATA) \
+ _ (1, CTX) \
+ _ (2, REINJECT_CTX) \
+ _ (3, FLEX_DATA) \
+ _ (4, FLEX_CTX) \
+ _ (5, FLEX_TSO_CTX) \
+ _ (6, FLEX_TSYN_L2TAG1) \
+ _ (7, FLEX_L2TAG1_L2TAG2) \
+ _ (8, FLEX_TSO_L2TAG2_PARSTAG_CTX) \
+ _ (9, FLEX_HOSTSPLIT_SA_TSO_CTX) \
+ _ (10, FLEX_HOSTSPLIT_SA_CTX) \
+ _ (11, FLEX_L2TAG2_CTX) \
+ _ (12, FLEX_FLOW_SCHED) \
+ _ (13, FLEX_HOSTSPLIT_TSO_CTX) \
+ _ (14, FLEX_HOSTSPLIT_CTX) \
+ _ (15, DESC_DONE)
+
+typedef enum
+{
+#define _(a, b) VIRTCHNL2_TXDID_##b = (1 << a),
+ foreach_idpf_txdid
+#undef _
+} idpf_txdid_t;
+
+/* VIRTCHNL2_RX_DESC_IDS
+ * Receive descriptor IDs (range from 0 to 63)
+ */
+#define foreach_virtchnl2_rxdid \
+ _ (0, 0_16B_BASE) \
+ _ (1, 1_32B_BASE) \
+ _ (2, 2_FLEX_SPLITQ) \
+ _ (2, 2_FLEX_SQ_NIC) \
+ _ (3, 3_FLEX_SQ_SW) \
+ _ (4, 4_FLEX_SQ_NIC_VEB) \
+ _ (5, 5_FLEX_SQ_NIC_ACL) \
+ _ (6, 6_FLEX_SQ_NIC_2) \
+ _ (7, 7_HW_RSVD) \
+ _ (16, 16_COMMS_GENERIC) \
+ _ (17, 17_COMMS_AUX_VLAN) \
+ _ (18, 18_COMMS_AUX_IPV4) \
+ _ (19, 19_COMMS_AUX_IPV6) \
+ _ (20, 20_COMMS_AUX_FLOW) \
+ _ (21, 21_COMMS_AUX_TCP)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RXDID_##n = v,
+ foreach_virtchnl2_rxdid
+#undef _
+} virtchnl2_rxdid_t;
+
+/* VIRTCHNL2_RX_DESC_ID_BITMASKS
+ * Receive descriptor ID bitmasks
+ */
+#define VIRTCHNL2_RXDID_0_16B_BASE_M BIT (VIRTCHNL2_RXDID_0_16B_BASE)
+#define VIRTCHNL2_RXDID_1_32B_BASE_M BIT (VIRTCHNL2_RXDID_1_32B_BASE)
+#define VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M BIT (VIRTCHNL2_RXDID_2_FLEX_SPLITQ)
+#define VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M BIT (VIRTCHNL2_RXDID_2_FLEX_SQ_NIC)
+#define VIRTCHNL2_RXDID_3_FLEX_SQ_SW_M BIT (VIRTCHNL2_RXDID_3_FLEX_SQ_SW)
+#define VIRTCHNL2_RXDID_4_FLEX_SQ_NIC_VEB_M \
+ BIT (VIRTCHNL2_RXDID_4_FLEX_SQ_NIC_VEB)
+#define VIRTCHNL2_RXDID_5_FLEX_SQ_NIC_ACL_M \
+ BIT (VIRTCHNL2_RXDID_5_FLEX_SQ_NIC_ACL)
+#define VIRTCHNL2_RXDID_6_FLEX_SQ_NIC_2_M BIT (VIRTCHNL2_RXDID_6_FLEX_SQ_NIC_2)
+#define VIRTCHNL2_RXDID_7_HW_RSVD_M BIT (VIRTCHNL2_RXDID_7_HW_RSVD)
+/* 9 through 15 are reserved */
+#define VIRTCHNL2_RXDID_16_COMMS_GENERIC_M \
+ BIT (VIRTCHNL2_RXDID_16_COMMS_GENERIC)
+#define VIRTCHNL2_RXDID_17_COMMS_AUX_VLAN_M \
+ BIT (VIRTCHNL2_RXDID_17_COMMS_AUX_VLAN)
+#define VIRTCHNL2_RXDID_18_COMMS_AUX_IPV4_M \
+ BIT (VIRTCHNL2_RXDID_18_COMMS_AUX_IPV4)
+#define VIRTCHNL2_RXDID_19_COMMS_AUX_IPV6_M \
+ BIT (VIRTCHNL2_RXDID_19_COMMS_AUX_IPV6)
+#define VIRTCHNL2_RXDID_20_COMMS_AUX_FLOW_M \
+ BIT (VIRTCHNL2_RXDID_20_COMMS_AUX_FLOW)
+#define VIRTCHNL2_RXDID_21_COMMS_AUX_TCP_M \
+ BIT (VIRTCHNL2_RXDID_21_COMMS_AUX_TCP)
+/* 22 through 63 are reserved */
+
+/* Rx */
+/* For splitq virtchnl2_rx_flex_desc_adv desc members */
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_S 0
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_M \
+ MAKEMASK (0xFUL, VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_S 0
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M \
+ MAKEMASK (0x3FFUL, VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_UMBCAST_S 10
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_UMBCAST_M \
+ MAKEMASK (0x3UL, VIRTCHNL2_RX_FLEX_DESC_ADV_UMBCAST_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF0_S 12
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF0_M \
+ MAKEMASK (0xFUL, VIRTCHNL2_RX_FLEX_DESC_ADV_FF0_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_S 0
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M \
+ MAKEMASK (0x3FFFUL, VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S 14
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M \
+ BIT_ULL (VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S 15
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M \
+ BIT_ULL (VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_HDR_S 0
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_HDR_M \
+ MAKEMASK (0x3FFUL, VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_HDR_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_S 10
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M \
+ BIT_ULL (VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_SPH_S 11
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_SPH_M \
+ BIT_ULL (VIRTCHNL2_RX_FLEX_DESC_ADV_SPH_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_MISS_S 12
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_MISS_M \
+ BIT_ULL (VIRTCHNL2_RX_FLEX_DESC_ADV_MISS_S)
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF1_S 13
+#define VIRTCHNL2_RX_FLEX_DESC_ADV_FF1_M \
+ MAKEMASK (0x7UL, VIRTCHNL2_RX_FLEX_DESC_ADV_FF1_M)
+
+#define foreach_virtchnl2_rx_flex_desc_adv_status0_qw1 \
+ _ (0, DD_S) \
+ _ (1, EOF_S) \
+ _ (2, HBO_S) \
+ _ (3, L3L4P_S) \
+ _ (4, XSUM_IPE_S) \
+ _ (5, XSUM_L4E_S) \
+ _ (6, XSUM_EIPE_S) \
+ _ (7, XSUM_EUDPE_S)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_##n = v,
+ foreach_virtchnl2_rx_flex_desc_adv_status0_qw1
+#undef _
+} virtchnl2_rx_flex_desc_adv_status0_qw1_t;
+
+#define foreach_virtchnl2_rx_flex_desc_adv_status0_qw0 \
+ _ (0, LPBK_S) \
+ _ (1, IPV6EXADD_S) \
+ _ (2, RXE_S) \
+ _ (3, CRCP_S) \
+ _ (4, RSS_VALID_S) \
+ _ (5, L2TAG1P_S) \
+ _ (6, XTRMD0_VALID_S) \
+ _ (7, XTRMD1_VALID_S) \
+ _ (8, LAST)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_##n = v,
+ foreach_virtchnl2_rx_flex_desc_adv_status0_qw0
+#undef _
+} virtchnl2_rx_flex_desc_adv_status0_qw0_t;
+
+#define foreach_virtchnl2_rx_flex_desc_adv_status1 \
+ _ (0, RSVD_S) \
+ _ (2, ATRAEFAIL_S) \
+ _ (3, L2TAG2P_S) \
+ _ (4, XTRMD2_VALID_S) \
+ _ (5, XTRMD3_VALID_S) \
+ _ (6, XTRMD4_VALID_S) \
+ _ (7, XTRMD5_VALID_S) \
+ _ (8, LAST)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_##n = v,
+ foreach_virtchnl2_rx_flex_desc_adv_status1
+#undef _
+} virtchnl2_rx_flex_desc_adv_status1_t;
+
+#define VIRTCHNL2_RX_FLEX_DESC_PTYPE_S 0
+#define VIRTCHNL2_RX_FLEX_DESC_PTYPE_M \
+ MAKEMASK (0x3FFUL, VIRTCHNL2_RX_FLEX_DESC_PTYPE_S) /* 10 bits */
+
+#define VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_S 0
+#define VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_M \
+ MAKEMASK (0x3FFFUL, VIRTCHNL2_RX_FLEX_DESC_PKT_LEN_S) /* 14 bits */
+
+#define foreach_virtchnl2_rx_flex_desc_status0 \
+ _ (0, DD_S) \
+ _ (1, EOF_S) \
+ _ (2, HBO_S) \
+ _ (3, L3L4P_S) \
+ _ (4, XSUM_IPE_S) \
+ _ (5, XSUM_L4E_S) \
+ _ (6, XSUM_EIPE_S) \
+ _ (7, XSUM_EUDPE_S) \
+ _ (8, LPBK_S) \
+ _ (9, IPV6EXADD_S) \
+ _ (10, RXE_S) \
+ _ (11, CRCP_S) \
+ _ (12, RSS_VALID_S) \
+ _ (13, L2TAG1P_S) \
+ _ (14, XTRMD0_VALID_S) \
+ _ (15, XTRMD1_VALID_S) \
+ _ (16, LAST)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RX_FLEX_DESC_STATUS0_##n = v,
+ foreach_virtchnl2_rx_flex_desc_status0
+#undef _
+} virtchnl2_rx_flex_desc_status0_t;
+
+#define foreach_virtchnl2_rx_flex_desc_status1 \
+ _ (0, CPM_S) \
+ _ (4, NAT_S) \
+ _ (5, CRYPTO_S) \
+ _ (11, L2TAG2P_S) \
+ _ (12, XTRMD2_VALID_S) \
+ _ (13, XTRMD3_VALID_S) \
+ _ (14, XTRMD4_VALID_S) \
+ _ (15, XTRMD5_VALID_S) \
+ _ (16, LAST)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RX_FLEX_DESC_STATUS1_##n = v,
+ foreach_virtchnl2_rx_flex_desc_status1
+#undef _
+} virtchnl2_rx_flex_desc_status1_t;
+
+#define VIRTCHNL2_RX_BASE_DESC_QW1_LEN_SPH_S 63
+#define VIRTCHNL2_RX_BASE_DESC_QW1_LEN_SPH_M \
+ BIT_ULL (VIRTCHNL2_RX_BASE_DESC_QW1_LEN_SPH_S)
+#define VIRTCHNL2_RX_BASE_DESC_QW1_LEN_HBUF_S 52
+#define VIRTCHNL2_RX_BASE_DESC_QW1_LEN_HBUF_M \
+ MAKEMASK (0x7FFULL, VIRTCHNL2_RX_BASE_DESC_QW1_LEN_HBUF_S)
+#define VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_S 38
+#define VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_M \
+ MAKEMASK (0x3FFFULL, VIRTCHNL2_RX_BASE_DESC_QW1_LEN_PBUF_S)
+#define VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_S 30
+#define VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_M \
+ MAKEMASK (0xFFULL, VIRTCHNL2_RX_BASE_DESC_QW1_PTYPE_S)
+#define VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_S 19
+#define VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M \
+ MAKEMASK (0xFFUL, VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_S)
+#define VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_S 0
+#define VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_M \
+ MAKEMASK (0x7FFFFUL, VIRTCHNL2_RX_BASE_DESC_QW1_STATUS_S)
+
+#define foreach_virtchnl2_rx_base_desc_status \
+ _ (0, DD_S) \
+ _ (1, EOF_S) \
+ _ (2, L2TAG1P_S) \
+ _ (3, L3L4P_S) \
+ _ (4, CRCP_S) \
+ _ (5, RSVD_S) \
+ _ (8, EXT_UDP_0_S) \
+ _ (9, UMBCAST_S) \
+ _ (11, FLM_S) \
+ _ (12, FLTSTAT_S) \
+ _ (14, LPBK_S) \
+ _ (15, IPV6EXADD_S) \
+ _ (16, RSVD1_S) \
+ _ (18, INT_UDP_0_S) \
+ _ (19, LAST)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RX_BASE_DESC_STATUS_##n = v,
+ foreach_virtchnl2_rx_base_desc_status
+#undef _
+} virtchnl2_rx_base_desc_status_t;
+
+#define VIRTCHNL2_RX_BASE_DESC_EXT_STATUS_L2TAG2P_S 0
+
+#define foreach_virtchnl2_rx_base_desc_error \
+ _ (0, RXE_S) \
+ _ (1, ATRAEFAIL_S) \
+ _ (2, HBO_S) \
+ _ (3, L3L4E_S) \
+ _ (3, IPE_S) \
+ _ (4, L4E_S) \
+ _ (5, EIPE_S) \
+ _ (6, OVERSIZE_S) \
+ _ (7, PPRS_S)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RX_BASE_DESC_ERROR_##n = v,
+ foreach_virtchnl2_rx_base_desc_error
+#undef _
+} virtchnl2_rx_base_desc_error_t;
+
+#define foreach_virtchnl2_rx_base_desc_fltstat \
+ _ (0, NO_DATA) \
+ _ (1, FD_ID) \
+ _ (2, RSV) \
+ _ (3, RSS_HASH)
+
+typedef enum
+{
+#define _(v, n) VIRTCHNL2_RX_BASE_DESC_FLTSTAT_##n = v,
+ foreach_virtchnl2_rx_base_desc_fltstat
+#undef _
+} virtchnl2_rx_base_desc_fltstat_t;
+
+/* Receive Descriptors */
+/* splitq buf
+ | 16| 0|
+ ----------------------------------------------------------------
+ | RSV | Buffer ID |
+ ----------------------------------------------------------------
+ | Rx packet buffer adresss |
+ ----------------------------------------------------------------
+ | Rx header buffer adresss |
+ ----------------------------------------------------------------
+ | RSV |
+ ----------------------------------------------------------------
+ | 0|
+ */
+typedef struct
+{
+ struct
+ {
+ u16 buf_id;
+ u16 rsvd0;
+ u32 rsvd1;
+ } qword0;
+ u64 pkt_addr;
+ u64 hdr_addr;
+ u64 rsvd2;
+} virtchnl2_splitq_rx_buf_desc_t;
+
+typedef struct
+{
+ u64 pkt_addr;
+ u64 hdr_addr;
+ u64 rsvd1;
+ u64 rsvd2;
+} virtchnl2_singleq_rx_buf_desc_t;
+
+union virtchnl2_rx_buf_desc
+{
+ virtchnl2_singleq_rx_buf_desc_t read;
+ virtchnl2_splitq_rx_buf_desc_t split_rd;
+};
+
+typedef struct
+{
+ struct
+ {
+ struct
+ {
+ u16 mirroring_status;
+ u16 l2tag1;
+ } lo_dword;
+ union
+ {
+ u32 rss;
+ u32 fd_id;
+ } hi_dword;
+ } qword0;
+ struct
+ {
+ u64 status_error_ptype_len;
+ } qword1;
+ struct
+ {
+ u16 ext_status;
+ u16 rsvd;
+ u16 l2tag2_1;
+ u16 l2tag2_2;
+ } qword2;
+ struct
+ {
+ u32 reserved;
+ u32 fd_id;
+ } qword3;
+} virtchnl2_singleq_base_rx_desc_t;
+
+typedef struct
+{
+ /* Qword 0 */
+ u8 rxdid;
+ u8 mir_id_umb_cast;
+ u16 ptype_flex_flags0;
+ u16 pkt_len;
+ u16 hdr_len_sph_flex_flags1;
+
+ /* Qword 1 */
+ u16 status_error0;
+ u16 l2tag1;
+ u16 flex_meta0;
+ u16 flex_meta1;
+
+ /* Qword 2 */
+ u16 status_error1;
+ u8 flex_flags2;
+ u8 time_stamp_low;
+ u16 l2tag2_1st;
+ u16 l2tag2_2nd;
+
+ /* Qword 3 */
+ u16 flex_meta2;
+ u16 flex_meta3;
+ union
+ {
+ struct
+ {
+ u16 flex_meta4;
+ u16 flex_meta5;
+ } flex;
+ u32 ts_high;
+ } flex_ts;
+} virtchnl2_rx_flex_desc_t;
+
+typedef struct
+{
+ /* Qword 0 */
+ u8 rxdid;
+ u8 mir_id_umb_cast;
+ u16 ptype_flex_flags0;
+ u16 pkt_len;
+ u16 hdr_len_sph_flex_flags1;
+
+ /* Qword 1 */
+ u16 status_error0;
+ u16 l2tag1;
+ u32 rss_hash;
+
+ /* Qword 2 */
+ u16 status_error1;
+ u8 flexi_flags2;
+ u8 ts_low;
+ u16 l2tag2_1st;
+ u16 l2tag2_2nd;
+
+ /* Qword 3 */
+ u32 flow_id;
+ union
+ {
+ struct
+ {
+ u16 rsvd;
+ u16 flow_id_ipv6;
+ } flex;
+ u32 ts_high;
+ } flex_ts;
+} virtchnl2_rx_flex_desc_nic_t;
+
+typedef struct
+{
+ /* Qword 0 */
+ u8 rxdid;
+ u8 mir_id_umb_cast;
+ u16 ptype_flex_flags0;
+ u16 pkt_len;
+ u16 hdr_len_sph_flex_flags1;
+
+ /* Qword 1 */
+ u16 status_error0;
+ u16 l2tag1;
+ u16 src_vsi;
+ u16 flex_md1_rsvd;
+
+ /* Qword 2 */
+ u16 status_error1;
+ u8 flex_flags2;
+ u8 ts_low;
+ u16 l2tag2_1st;
+ u16 l2tag2_2nd;
+
+ /* Qword 3 */
+ u32 rsvd;
+ u32 ts_high;
+} virtchnl2_rx_flex_desc_sw_t;
+
+typedef struct
+{
+ /* Qword 0 */
+ u8 rxdid;
+ u8 mir_id_umb_cast;
+ u16 ptype_flex_flags0;
+ u16 pkt_len;
+ u16 hdr_len_sph_flex_flags1;
+
+ /* Qword 1 */
+ u16 status_error0;
+ u16 l2tag1;
+ u32 rss_hash;
+
+ /* Qword 2 */
+ u16 status_error1;
+ u8 flexi_flags2;
+ u8 ts_low;
+ u16 l2tag2_1st;
+ u16 l2tag2_2nd;
+
+ /* Qword 3 */
+ u16 flow_id;
+ u16 src_vsi;
+ union
+ {
+ struct
+ {
+ u16 rsvd;
+ u16 flow_id_ipv6;
+ } flex;
+ u32 ts_high;
+ } flex_ts;
+} virtchnl2_rx_flex_desc_nic_2_t;
+
+typedef struct
+{
+ /* Qword 0 */
+ u8 rxdid_ucast;
+ u8 status_err0_qw0;
+ u16 ptype_err_fflags0;
+ u16 pktlen_gen_bufq_id;
+ u16 hdrlen_flags;
+
+ /* Qword 1 */
+ u8 status_err0_qw1;
+ u8 status_err1;
+ u8 fflags1;
+ u8 ts_low;
+ u16 fmd0;
+ u16 fmd1;
+ /* Qword 2 */
+ u16 fmd2;
+ u8 fflags2;
+ u8 hash3;
+ u16 fmd3;
+ u16 fmd4;
+ /* Qword 3 */
+ u16 fmd5;
+ u16 fmd6;
+ u16 fmd7_0;
+ u16 fmd7_1;
+} virtchnl2_rx_flex_desc_adv_t;
+
+typedef struct
+{
+ /* Qword 0 */
+ u8 rxdid_ucast;
+ u8 status_err0_qw0;
+ u16 ptype_err_fflags0;
+ u16 pktlen_gen_bufq_id;
+ u16 hdrlen_flags;
+
+ /* Qword 1 */
+ u8 status_err0_qw1;
+ u8 status_err1;
+ u8 fflags1;
+ u8 ts_low;
+ u16 buf_id;
+ union
+ {
+ u16 raw_cs;
+ u16 l2tag1;
+ u16 rscseglen;
+ } misc;
+ /* Qword 2 */
+ u16 hash1;
+ union
+ {
+ u8 fflags2;
+ u8 mirrorid;
+ u8 hash2;
+ } ff2_mirrid_hash2;
+ u8 hash3;
+ u16 l2tag2;
+ u16 fmd4;
+ /* Qword 3 */
+ u16 l2tag1;
+ u16 fmd6;
+ u32 ts_high;
+} virtchnl2_rx_flex_desc_adv_nic_3_t;
+
+typedef union
+{
+ virtchnl2_singleq_rx_buf_desc_t read;
+ virtchnl2_singleq_base_rx_desc_t base_wb;
+ virtchnl2_rx_flex_desc_t flex_wb;
+ virtchnl2_rx_flex_desc_nic_t flex_nic_wb;
+ virtchnl2_rx_flex_desc_sw_t flex_sw_wb;
+ virtchnl2_rx_flex_desc_nic_2_t flex_nic_2_wb;
+ virtchnl2_rx_flex_desc_adv_t flex_adv_wb;
+ virtchnl2_rx_flex_desc_adv_nic_3_t flex_adv_nic_3_wb;
+ u64 qword[4];
+} virtchnl2_rx_desc_t;
+
+#endif /* _IDPF_VIRTCHNL_LAN_DESC_H_ */
diff --git a/src/plugins/igmp/igmp.c b/src/plugins/igmp/igmp.c
index 1c686e39cf2..30f167d483a 100644
--- a/src/plugins/igmp/igmp.c
+++ b/src/plugins/igmp/igmp.c
@@ -35,7 +35,6 @@
igmp_main_t igmp_main;
-/* *INDENT-OFF* */
/* General Query address */
const static mfib_prefix_t mpfx_general_query = {
.fp_proto = FIB_PROTOCOL_IP4,
@@ -57,7 +56,6 @@ const static mfib_prefix_t mpfx_report = {
},
},
};
-/* *INDENT-ON* */
/**
* @brief igmp send query (igmp_timer_function_t)
@@ -345,7 +343,6 @@ igmp_enable_disable (u32 sw_if_index, u8 enable, igmp_mode_t mode)
IGMP_DBG ("%s: %U", (enable ? "Enabled" : "Disabled"),
format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index);
- /* *INDENT-OFF* */
fib_route_path_t via_itf_path =
{
.frp_proto = fib_proto_to_dpo (FIB_PROTOCOL_IP4),
@@ -365,7 +362,6 @@ igmp_enable_disable (u32 sw_if_index, u8 enable, igmp_mode_t mode)
.frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
};
- /* *INDENT-ON* */
/* find configuration, if it doesn't exist, create new */
config = igmp_config_lookup (sw_if_index);
mfib_index = mfib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
@@ -408,18 +404,19 @@ igmp_enable_disable (u32 sw_if_index, u8 enable, igmp_mode_t mode)
{
/* first config in this FIB */
mfib_table_lock (mfib_index, FIB_PROTOCOL_IP4, MFIB_SOURCE_IGMP);
- mfib_table_entry_path_update (mfib_index,
- &mpfx_general_query,
- MFIB_SOURCE_IGMP, &for_us_path);
- mfib_table_entry_path_update (mfib_index,
- &mpfx_report,
- MFIB_SOURCE_IGMP, &for_us_path);
+ mfib_table_entry_path_update (mfib_index, &mpfx_general_query,
+ MFIB_SOURCE_IGMP,
+ MFIB_ENTRY_FLAG_NONE, &for_us_path);
+ mfib_table_entry_path_update (mfib_index, &mpfx_report,
+ MFIB_SOURCE_IGMP,
+ MFIB_ENTRY_FLAG_NONE, &for_us_path);
}
- mfib_table_entry_path_update (mfib_index,
- &mpfx_general_query,
- MFIB_SOURCE_IGMP, &via_itf_path);
+ mfib_table_entry_path_update (mfib_index, &mpfx_general_query,
+ MFIB_SOURCE_IGMP, MFIB_ENTRY_FLAG_NONE,
+ &via_itf_path);
mfib_table_entry_path_update (mfib_index, &mpfx_report,
- MFIB_SOURCE_IGMP, &via_itf_path);
+ MFIB_SOURCE_IGMP, MFIB_ENTRY_FLAG_NONE,
+ &via_itf_path);
}
}
else if (config && !enable)
@@ -487,7 +484,6 @@ igmp_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (igmp_init) =
{
.runs_after = VLIB_INITS("ip4_lookup_init"),
@@ -497,7 +493,6 @@ VLIB_PLUGIN_REGISTER () =
.version = VPP_BUILD_VER,
.description = "Internet Group Management Protocol (IGMP)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/igmp/igmp.h b/src/plugins/igmp/igmp.h
index 9f9b611a649..4131d6d3b09 100644
--- a/src/plugins/igmp/igmp.h
+++ b/src/plugins/igmp/igmp.h
@@ -20,6 +20,7 @@
#include <vlib/vlib.h>
#include <vnet/ip/ip.h>
+#define REPLY_MSG_ID_BASE (igmp_main.msg_id_base)
#include <vlibapi/api_helper_macros.h>
#include <vnet/ip/igmp_packet.h>
#include <vnet/adj/adj_mcast.h>
diff --git a/src/plugins/igmp/igmp_api.c b/src/plugins/igmp/igmp_api.c
index 72c1b0394a8..3f743d8fee1 100644
--- a/src/plugins/igmp/igmp_api.c
+++ b/src/plugins/igmp/igmp_api.c
@@ -71,7 +71,7 @@ vl_api_igmp_listen_t_handler (vl_api_igmp_listen_t * mp)
BAD_SW_IF_INDEX_LABEL;
done:;
- REPLY_MACRO (IGMP_MSG_ID (VL_API_IGMP_LISTEN_REPLY));
+ REPLY_MACRO (VL_API_IGMP_LISTEN_REPLY);
}
static void
@@ -88,7 +88,7 @@ vl_api_igmp_enable_disable_t_handler (vl_api_igmp_enable_disable_t * mp)
BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO (IGMP_MSG_ID (VL_API_IGMP_ENABLE_DISABLE_REPLY));
+ REPLY_MACRO (VL_API_IGMP_ENABLE_DISABLE_REPLY);
}
static void
@@ -106,7 +106,7 @@ vl_api_igmp_proxy_device_add_del_t_handler (vl_api_igmp_proxy_device_add_del_t
BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO (IGMP_MSG_ID (VL_API_IGMP_PROXY_DEVICE_ADD_DEL_REPLY));
+ REPLY_MACRO (VL_API_IGMP_PROXY_DEVICE_ADD_DEL_REPLY);
}
static void
@@ -124,8 +124,7 @@ static void
BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO (IGMP_MSG_ID
- (VL_API_IGMP_PROXY_DEVICE_ADD_DEL_INTERFACE_REPLY));
+ REPLY_MACRO (VL_API_IGMP_PROXY_DEVICE_ADD_DEL_INTERFACE_REPLY);
}
static void
@@ -155,7 +154,6 @@ igmp_config_dump (igmp_main_t * im,
igmp_group_t *group;
igmp_src_t *src;
- /* *INDENT-OFF* */
FOR_EACH_GROUP (group, config,
({
FOR_EACH_SRC (src, group, IGMP_FILTER_MODE_INCLUDE,
@@ -163,7 +161,6 @@ igmp_config_dump (igmp_main_t * im,
send_igmp_details (rp, im, config, group, src, context);
}));
}));
- /* *INDENT-ON* */
}
static void
@@ -181,12 +178,10 @@ vl_api_igmp_dump_t_handler (vl_api_igmp_dump_t * mp)
sw_if_index = ntohl (mp->sw_if_index);
if (~0 == sw_if_index)
{
- /* *INDENT-OFF* */
pool_foreach (config, im->configs)
{
igmp_config_dump(im, rp, mp->context, config);
}
- /* *INDENT-ON* */
}
else
{
@@ -209,7 +204,7 @@ vl_api_igmp_clear_interface_t_handler (vl_api_igmp_clear_interface_t * mp)
if (config)
igmp_clear_config (config);
- REPLY_MACRO (IGMP_MSG_ID (VL_API_IGMP_CLEAR_INTERFACE_REPLY));
+ REPLY_MACRO (VL_API_IGMP_CLEAR_INTERFACE_REPLY);
}
static vl_api_group_prefix_type_t
@@ -250,7 +245,7 @@ vl_api_igmp_group_prefix_set_t_handler (vl_api_igmp_group_prefix_set_t * mp)
ip_prefix_decode (&mp->gp.prefix, &pfx);
igmp_group_prefix_set (&pfx, igmp_group_type_api_to_int (mp->gp.type));
- REPLY_MACRO (IGMP_MSG_ID (VL_API_IGMP_GROUP_PREFIX_SET_REPLY));
+ REPLY_MACRO (VL_API_IGMP_GROUP_PREFIX_SET_REPLY);
}
typedef struct igmp_ssm_range_walk_ctx_t_
@@ -343,7 +338,7 @@ vl_api_want_igmp_events_t_handler (vl_api_want_igmp_events_t * mp)
rv = VNET_API_ERROR_INVALID_REGISTRATION;
done:
- REPLY_MACRO (VL_API_WANT_IGMP_EVENTS_REPLY + im->msg_id_base);
+ REPLY_MACRO (VL_API_WANT_IGMP_EVENTS_REPLY);
}
static clib_error_t *
@@ -402,14 +397,12 @@ igmp_event (igmp_filter_mode_t filter,
vnet_get_main (), sw_if_index, format_igmp_filter_mode, filter);
- /* *INDENT-OFF* */
pool_foreach (api_client, im->api_clients)
{
rp = vl_api_client_index_to_registration (api_client->client_index);
if (rp)
send_igmp_event (rp, filter, sw_if_index, saddr, gaddr);
}
- /* *INDENT-ON* */
}
/* Set up the API message handling tables */
diff --git a/src/plugins/igmp/igmp_cli.c b/src/plugins/igmp/igmp_cli.c
index f84cdaf50f7..713f4c31bfb 100644
--- a/src/plugins/igmp/igmp_cli.c
+++ b/src/plugins/igmp/igmp_cli.c
@@ -69,13 +69,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (igmp_clear_interface_command, static) = {
.path = "clear igmp",
.short_help = "clear igmp int <interface>",
.function = igmp_clear_interface_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
igmp_listen_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -150,14 +148,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (igmp_listen_command, static) = {
.path = "igmp listen",
.short_help = "igmp listen [<enable|disable>] "
"int <interface> saddr <ip4-address> gaddr <ip4-address>",
.function = igmp_listen_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
igmp_enable_cli (vlib_main_t * vm,
@@ -211,13 +207,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (igmp_enable_command, static) = {
.path = "igmp",
.short_help = "igmp <enable|disable> <host|router> <interface>",
.function = igmp_enable_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
igmp_proxy_device_add_del_command_fn (vlib_main_t * vm,
@@ -275,13 +269,11 @@ done:
unformat_free (line_input);
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (igmp_proxy_device_add_del_command, static) = {
.path = "igmp proxy-dev",
.short_help = "igmp proxy-dev <add|del> vrf-id <table-id> <interface>",
.function = igmp_proxy_device_add_del_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
igmp_proxy_device_add_del_interface_command_fn (vlib_main_t * vm,
@@ -339,13 +331,11 @@ done:
unformat_free (line_input);
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (igmp_proxy_device_add_del_interface_command, static) = {
.path = "igmp proxy-dev itf",
.short_help = "igmp proxy-dev itf <add|del> vrf-id <table-id> <interface>",
.function = igmp_proxy_device_add_del_interface_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
igmp_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -355,23 +345,19 @@ igmp_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
igmp_main_t *im = &igmp_main;
igmp_config_t *config;
- /* *INDENT-OFF* */
pool_foreach (config, im->configs)
{
vlib_cli_output (vm, "%U", format_igmp_config, config);
}
- /* *INDENT-ON* */
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (igmp_show_command, static) = {
.path = "show igmp config",
.short_help = "show igmp config",
.function = igmp_show_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
igmp_show_timers_command_fn (vlib_main_t * vm,
@@ -384,13 +370,11 @@ igmp_show_timers_command_fn (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (igmp_show_timers_command, static) = {
.path = "show igmp timers",
.short_help = "show igmp timers",
.function = igmp_show_timers_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
test_igmp_command_fn (vlib_main_t * vm,
@@ -414,13 +398,11 @@ test_igmp_command_fn (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_igmp_command, static) = {
.path = "test igmp timers",
.short_help = "Change the default values for IGMP timers - only sensible during unit tests",
.function = test_igmp_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
diff --git a/src/plugins/igmp/igmp_config.c b/src/plugins/igmp/igmp_config.c
index 7637adba5bf..288d9c87222 100644
--- a/src/plugins/igmp/igmp_config.c
+++ b/src/plugins/igmp/igmp_config.c
@@ -28,12 +28,10 @@ igmp_clear_config (igmp_config_t * config)
format_vnet_sw_if_index_name,
vnet_get_main (), config->sw_if_index);
- /* *INDENT-OFF* */
FOR_EACH_GROUP (group, config,
({
igmp_group_clear (&group);
}));
- /* *INDENT-ON* */
for (ii = 0; ii < IGMP_CONFIG_N_TIMERS; ii++)
{
@@ -125,12 +123,10 @@ format_igmp_config (u8 * s, va_list * args)
format_igmp_timer_id, config->timers[ii]);
}
- /* *INDENT-OFF* */
FOR_EACH_GROUP (group, config,
({
s = format (s, "\n%U", format_igmp_group, group, 4);
}));
- /* *INDENT-ON* */
return (s);
}
diff --git a/src/plugins/igmp/igmp_group.c b/src/plugins/igmp/igmp_group.c
index eec4c9b8f81..be3d997cbeb 100644
--- a/src/plugins/igmp/igmp_group.c
+++ b/src/plugins/igmp/igmp_group.c
@@ -23,12 +23,10 @@ igmp_group_free_all_srcs (igmp_group_t * group)
{
igmp_src_t *src;
- /* *INDENT-OFF* */
FOR_EACH_SRC (src, group, IGMP_FILTER_MODE_INCLUDE,
({
igmp_src_free(src);
}));
- /* *INDENT-ON* */
hash_free (group->igmp_src_by_key[IGMP_FILTER_MODE_INCLUDE]);
hash_free (group->igmp_src_by_key[IGMP_FILTER_MODE_EXCLUDE]);
@@ -152,7 +150,6 @@ igmp_group_present_minus_new (igmp_group_t * group,
pmn = NULL;
- /* *INDENT-OFF* */
if (0 == vec_len(saddrs))
{
FOR_EACH_SRC(src, group, mode,
@@ -178,7 +175,6 @@ igmp_group_present_minus_new (igmp_group_t * group,
vec_add1(pmn, *src->key);
}));
}
- /* *INDENT-ON* */
return (pmn);
}
@@ -198,7 +194,6 @@ igmp_group_new_minus_present (igmp_group_t * group,
npm = NULL;
- /* *INDENT-OFF* */
vec_foreach(s1, saddrs)
{
found = 0;
@@ -214,7 +209,6 @@ igmp_group_new_minus_present (igmp_group_t * group,
if (!found)
vec_add1(npm, *s1);
}
- /* *INDENT-ON* */
return (npm);
}
@@ -230,7 +224,6 @@ igmp_group_new_intersect_present (igmp_group_t * group,
intersect = NULL;
- /* *INDENT-OFF* */
FOR_EACH_SRC(src, group, mode,
({
vec_foreach(s1, saddrs)
@@ -242,7 +235,6 @@ igmp_group_new_intersect_present (igmp_group_t * group,
}
}
}));
- /* *INDENT-ON* */
return (intersect);
}
@@ -311,12 +303,10 @@ format_igmp_group (u8 * s, va_list * args)
format_igmp_group_timer_type, ii,
format_igmp_timer_id, group->timers[ii]);
- /* *INDENT-OFF* */
FOR_EACH_SRC (src, group, IGMP_FILTER_MODE_INCLUDE,
({
s = format (s, "\n%U", format_igmp_src, src, indent+4);
}));
- /* *INDENT-ON* */
return (s);
}
diff --git a/src/plugins/igmp/igmp_input.c b/src/plugins/igmp/igmp_input.c
index 1858a1b4d66..012c22399de 100644
--- a/src/plugins/igmp/igmp_input.c
+++ b/src/plugins/igmp/igmp_input.c
@@ -219,7 +219,6 @@ igmp_input (vlib_main_t * vm, vlib_node_runtime_t * node,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (igmp_input_node) =
{
.function = igmp_input,
@@ -239,7 +238,6 @@ VLIB_REGISTER_NODE (igmp_input_node) =
[IGMP_INPUT_NEXT_PARSE_REPORT] = "igmp-parse-report",
}
};
-/* *INDENT-ON* */
static uword
igmp_parse_query (vlib_main_t * vm, vlib_node_runtime_t * node,
@@ -325,7 +323,6 @@ igmp_parse_query (vlib_main_t * vm, vlib_node_runtime_t * node,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (igmp_parse_query_node) =
{
.function = igmp_parse_query,
@@ -343,7 +340,6 @@ VLIB_REGISTER_NODE (igmp_parse_query_node) =
[IGMP_PARSE_QUERY_NEXT_DROP] = "error-drop",
}
};
-/* *INDENT-ON* */
static uword
igmp_parse_report (vlib_main_t * vm, vlib_node_runtime_t * node,
@@ -436,7 +432,6 @@ igmp_parse_report (vlib_main_t * vm, vlib_node_runtime_t * node,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (igmp_parse_report_node) =
{
.function = igmp_parse_report,
@@ -454,7 +449,6 @@ VLIB_REGISTER_NODE (igmp_parse_report_node) =
[IGMP_PARSE_REPORT_NEXT_DROP] = "error-drop",
}
};
-/* *INDENT-ON* */
static clib_error_t *
igmp_input_init (vlib_main_t * vm)
@@ -466,12 +460,10 @@ igmp_input_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (igmp_input_init) =
{
.runs_after = VLIB_INITS("igmp_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/igmp/igmp_pkt.c b/src/plugins/igmp/igmp_pkt.c
index c2ce5c71255..7fadeb638a3 100644
--- a/src/plugins/igmp/igmp_pkt.c
+++ b/src/plugins/igmp/igmp_pkt.c
@@ -329,7 +329,6 @@ igmp_pkt_report_v3_add_report (igmp_pkt_build_report_t * br,
if (NULL == igmp_group)
return;
- /* *INDENT-OFF* */
vec_foreach(s, srcs)
{
igmp_group = igmp_pkt_report_v3_append_src(br, igmp_group,
@@ -337,7 +336,6 @@ igmp_pkt_report_v3_add_report (igmp_pkt_build_report_t * br,
if (NULL == igmp_group)
return;
};
- /* *INDENT-ON* */
igmp_group->n_src_addresses = clib_host_to_net_u16 (br->n_srcs);
@@ -378,7 +376,6 @@ igmp_pkt_report_v3_add_group (igmp_pkt_build_report_t * br,
igmp_group = igmp_pkt_report_v3_append_group (br, group->key, type);
- /* *INDENT-OFF* */
FOR_EACH_SRC (src, group, IGMP_FILTER_MODE_INCLUDE,
({
igmp_group = igmp_pkt_report_v3_append_src(br, igmp_group,
@@ -387,7 +384,6 @@ igmp_pkt_report_v3_add_group (igmp_pkt_build_report_t * br,
if (NULL == igmp_group)
return;
}));
- /* *INDENT-ON* */
igmp_group->n_src_addresses = clib_host_to_net_u16 (br->n_srcs);
IGMP_DBG (" ..add-group: %U srcs:%d",
diff --git a/src/plugins/igmp/igmp_proxy.c b/src/plugins/igmp/igmp_proxy.c
index 2167740fc8a..bf5e3aafad8 100644
--- a/src/plugins/igmp/igmp_proxy.c
+++ b/src/plugins/igmp/igmp_proxy.c
@@ -34,7 +34,6 @@ igmp_proxy_device_mfib_path_add_del (igmp_group_t * group, u8 add)
mfib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
config->sw_if_index);
- /* *INDENT-OFF* */
mfib_prefix_t mpfx_group_addr = {
.fp_proto = FIB_PROTOCOL_IP4,
.fp_len = 32,
@@ -51,11 +50,11 @@ igmp_proxy_device_mfib_path_add_del (igmp_group_t * group, u8 add)
.frp_weight = 1,
.frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
};
- /* *INDENT-ON* */
if (add)
mfib_table_entry_path_update (mfib_index, &mpfx_group_addr,
- MFIB_SOURCE_IGMP, &via_itf_path);
+ MFIB_SOURCE_IGMP, MFIB_ENTRY_FLAG_NONE,
+ &via_itf_path);
else
mfib_table_entry_path_remove (mfib_index, &mpfx_group_addr,
MFIB_SOURCE_IGMP, &via_itf_path);
@@ -345,12 +344,10 @@ igmp_proxy_device_merge_group (igmp_proxy_device_t * proxy_device,
igmp_proxy_device_mfib_path_add_del (group, 0);
}
- /* *INDENT-OFF* */
FOR_EACH_SRC (src, group, group->router_filter_mode,
({
igmp_proxy_device_merge_src (&proxy_group, src, srcaddrs, block);
}));
- /* *INDENT-ON* */
return proxy_group;
}
@@ -369,7 +366,6 @@ igmp_proxy_device_merge_config (igmp_config_t * config, u8 block)
igmp_pkt_build_report_init (&br, proxy_device->upstream_if);
- /* *INDENT-OFF* */
FOR_EACH_GROUP(group, config,
({
proxy_group = igmp_proxy_device_merge_group (proxy_device, group, &srcaddrs, block);
@@ -382,7 +378,6 @@ igmp_proxy_device_merge_config (igmp_config_t * config, u8 block)
}
vec_free (srcaddrs);
}));
- /* *INDENT-ON* */
igmp_pkt_report_v3_send (&br);
diff --git a/src/plugins/igmp/igmp_query.c b/src/plugins/igmp/igmp_query.c
index c75b01a295b..c5bf8fca992 100644
--- a/src/plugins/igmp/igmp_query.c
+++ b/src/plugins/igmp/igmp_query.c
@@ -155,14 +155,12 @@ igmp_send_general_report_v3 (u32 obj, void *data)
igmp_pkt_build_report_init (&br, config->sw_if_index);
- /* *INDENT-OFF* */
FOR_EACH_GROUP (group, config,
({
igmp_pkt_report_v3_add_group
(&br, group,
igmp_filter_mode_to_report_type(group->router_filter_mode));
}));
- /* *INDENT-ON* */
igmp_pkt_report_v3_send (&br);
}
diff --git a/src/plugins/igmp/igmp_ssm_range.c b/src/plugins/igmp/igmp_ssm_range.c
index c74d312b508..a71741cd5f8 100644
--- a/src/plugins/igmp/igmp_ssm_range.c
+++ b/src/plugins/igmp/igmp_ssm_range.c
@@ -127,13 +127,11 @@ igmp_ssm_range_show (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (igmp_show_timers_command, static) = {
.path = "show igmp ssm-ranges",
.short_help = "show igmp ssm-ranges",
.function = igmp_ssm_range_show,
};
-/* *INDENT-ON* */
static clib_error_t *
igmp_ssm_range_init (vlib_main_t * vm)
@@ -145,12 +143,10 @@ igmp_ssm_range_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (igmp_ssm_range_init) =
{
.runs_after = VLIB_INITS("igmp_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/igmp/igmp_timer.c b/src/plugins/igmp/igmp_timer.c
index 2d38dd07a99..8abef8e554e 100644
--- a/src/plugins/igmp/igmp_timer.c
+++ b/src/plugins/igmp/igmp_timer.c
@@ -181,7 +181,6 @@ igmp_timer_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (igmp_timer_process_node) =
{
.function = igmp_timer_process,
@@ -189,7 +188,6 @@ VLIB_REGISTER_NODE (igmp_timer_process_node) =
.name = "igmp-timer-process",
.n_next_nodes = 0,
};
-/* *INDENT-ON* */
igmp_timer_id_t
igmp_timer_schedule (f64 when, u32 obj, igmp_timer_function_t fn, void *data)
diff --git a/src/plugins/ikev2/CMakeLists.txt b/src/plugins/ikev2/CMakeLists.txt
index 6f2e5a68153..568271ed7d9 100644
--- a/src/plugins/ikev2/CMakeLists.txt
+++ b/src/plugins/ikev2/CMakeLists.txt
@@ -11,8 +11,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-add_definitions (-DWITH_LIBSSL=1)
+if(NOT OPENSSL_FOUND)
+ message(WARNING "openssl headers not found - ikev2 plugin disabled")
+ return()
+endif()
+
include_directories(${OPENSSL_INCLUDE_DIR})
+add_compile_definitions(OPENSSL_SUPPRESS_DEPRECATED)
add_vpp_plugin(ikev2
SOURCES
@@ -35,5 +40,5 @@ add_vpp_plugin(ikev2
ikev2_priv.h
LINK_LIBRARIES
- ${OPENSSL_LIBRARIES}
+ ${OPENSSL_CRYPTO_LIBRARIES}
)
diff --git a/src/plugins/ikev2/ikev2.api b/src/plugins/ikev2/ikev2.api
index ff9ed72e888..de276e7f3ea 100644
--- a/src/plugins/ikev2/ikev2.api
+++ b/src/plugins/ikev2/ikev2.api
@@ -72,7 +72,26 @@ define ikev2_sa_dump
{
u32 client_index;
u32 context;
+};
+/** \brief Dump all SAs
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define ikev2_sa_v2_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Dump all SAs
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define ikev2_sa_v3_dump
+{
+ u32 client_index;
+ u32 context;
option status = "in_progress";
};
@@ -87,6 +106,32 @@ define ikev2_sa_details
i32 retval;
vl_api_ikev2_sa_t sa;
+};
+
+/** \brief Details about IKE SA
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param sa - SA data
+*/
+define ikev2_sa_v2_details
+{
+ u32 context;
+ i32 retval;
+
+ vl_api_ikev2_sa_v2_t sa;
+};
+
+/** \brief Details about IKE SA
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param sa - SA data
+*/
+define ikev2_sa_v3_details
+{
+ u32 context;
+ i32 retval;
+
+ vl_api_ikev2_sa_v3_t sa;
option status = "in_progress";
};
@@ -102,7 +147,6 @@ define ikev2_child_sa_dump
u32 sa_index;
option vat_help = "sa_index <index>";
- option status = "in_progress";
};
/** \brief Child SA details
@@ -116,6 +160,34 @@ define ikev2_child_sa_details
i32 retval;
vl_api_ikev2_child_sa_t child_sa;
+};
+
+/** \brief Dump child SA of specific SA
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sa_index - index of specific sa
+*/
+define ikev2_child_sa_v2_dump
+{
+ u32 client_index;
+ u32 context;
+
+ u32 sa_index;
+ option vat_help = "sa_index <index>";
+ option status = "in_progress";
+};
+
+/** \brief Child SA details
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param child_sa - child SA data
+*/
+define ikev2_child_sa_v2_details
+{
+ u32 context;
+ i32 retval;
+
+ vl_api_ikev2_child_sa_v2_t child_sa;
option status = "in_progress";
};
diff --git a/src/plugins/ikev2/ikev2.c b/src/plugins/ikev2/ikev2.c
index fa653760b1d..9bea2c96d12 100644
--- a/src/plugins/ikev2/ikev2.c
+++ b/src/plugins/ikev2/ikev2.c
@@ -110,14 +110,14 @@ typedef enum
typedef u32 ikev2_non_esp_marker;
-static_always_inline u16
-ikev2_get_port (ikev2_sa_t * sa)
+static u16
+ikev2_get_port (ikev2_sa_t *sa)
{
return ikev2_natt_active (sa) ? IKEV2_PORT_NATT : IKEV2_PORT;
}
-static_always_inline int
-ikev2_insert_non_esp_marker (ike_header_t * ike, int len)
+static int
+ikev2_insert_non_esp_marker (ike_header_t *ike, int len)
{
memmove ((u8 *) ike + sizeof (ikev2_non_esp_marker), ike, len);
clib_memset (ike, 0, sizeof (ikev2_non_esp_marker));
@@ -211,6 +211,8 @@ ikev2_select_proposal (ikev2_sa_proposal_t * proposals,
rv->proposal_num = proposal->proposal_num;
rv->protocol_id = proposal->protocol_id;
RAND_bytes ((u8 *) & rv->spi, sizeof (rv->spi));
+ if (rv->protocol_id != IKEV2_PROTOCOL_IKE)
+ rv->spi &= 0xffffffff;
goto done;
}
else
@@ -405,8 +407,8 @@ ikev2_generate_sa_init_data (ikev2_sa_t * sa)
RAND_bytes ((u8 *) & sa->rspi, 8);
/* generate nonce */
- sa->r_nonce = vec_new (u8, IKEV2_NONCE_SIZE);
- RAND_bytes ((u8 *) sa->r_nonce, IKEV2_NONCE_SIZE);
+ sa->r_nonce = vec_new (u8, vec_len (sa->i_nonce));
+ RAND_bytes ((u8 *) sa->r_nonce, vec_len (sa->i_nonce));
}
/* generate dh keys */
@@ -480,11 +482,10 @@ ikev2_complete_sa_data (ikev2_sa_t * sa, ikev2_sa_t * sai)
}
static void
-ikev2_calc_keys (ikev2_sa_t * sa)
+ikev2_calc_keys_internal (ikev2_sa_t *sa, u8 *skeyseed)
{
u8 *tmp;
/* calculate SKEYSEED = prf(Ni | Nr, g^ir) */
- u8 *skeyseed = 0;
u8 *s = 0;
u16 integ_key_len = 0, salt_len = 0;
ikev2_sa_transform_t *tr_encr, *tr_prf, *tr_integ;
@@ -502,7 +503,6 @@ ikev2_calc_keys (ikev2_sa_t * sa)
vec_append (s, sa->i_nonce);
vec_append (s, sa->r_nonce);
- skeyseed = ikev2_calc_prf (tr_prf, s, sa->dh_shared_key);
/* Calculate S = Ni | Nr | SPIi | SPIr */
u64 *spi;
@@ -520,7 +520,6 @@ ikev2_calc_keys (ikev2_sa_t * sa)
salt_len * 2;
keymat = ikev2_calc_prfplus (tr_prf, skeyseed, s, len);
- vec_free (skeyseed);
vec_free (s);
int pos = 0;
@@ -568,7 +567,42 @@ ikev2_calc_keys (ikev2_sa_t * sa)
}
static void
-ikev2_calc_child_keys (ikev2_sa_t * sa, ikev2_child_sa_t * child)
+ikev2_calc_keys_rekey (ikev2_sa_t *sa_new, ikev2_sa_t *sa_old)
+{
+ u8 *s = 0, *skeyseed = 0;
+ ikev2_sa_transform_t *tr_prf =
+ ikev2_sa_get_td_for_type (sa_old->r_proposals, IKEV2_TRANSFORM_TYPE_PRF);
+
+ vec_append (s, sa_new->dh_shared_key);
+ vec_append (s, sa_new->i_nonce);
+ vec_append (s, sa_new->r_nonce);
+ skeyseed = ikev2_calc_prf (tr_prf, sa_old->sk_d, s);
+
+ ikev2_calc_keys_internal (sa_new, skeyseed);
+
+ vec_free (skeyseed);
+ vec_free (s);
+}
+
+static void
+ikev2_calc_keys (ikev2_sa_t *sa)
+{
+ u8 *s = 0, *skeyseed = 0;
+ ikev2_sa_transform_t *tr_prf =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF);
+
+ vec_append (s, sa->i_nonce);
+ vec_append (s, sa->r_nonce);
+ skeyseed = ikev2_calc_prf (tr_prf, s, sa->dh_shared_key);
+
+ ikev2_calc_keys_internal (sa, skeyseed);
+
+ vec_free (skeyseed);
+ vec_free (s);
+}
+
+static void
+ikev2_calc_child_keys (ikev2_sa_t *sa, ikev2_child_sa_t *child, u8 kex)
{
u8 *s = 0;
u16 integ_key_len = 0;
@@ -587,6 +621,8 @@ ikev2_calc_child_keys (ikev2_sa_t * sa, ikev2_child_sa_t * child)
else
salt_len = sizeof (u32);
+ if (kex)
+ vec_append (s, sa->dh_shared_key);
vec_append (s, sa->i_nonce);
vec_append (s, sa->r_nonce);
/* calculate PRFplus */
@@ -638,8 +674,8 @@ ikev2_calc_child_keys (ikev2_sa_t * sa, ikev2_child_sa_t * child)
vec_free (keymat);
}
-static_always_inline u8 *
-ikev2_compute_nat_sha1 (u64 ispi, u64 rspi, ip_address_t * ia, u16 port)
+static u8 *
+ikev2_compute_nat_sha1 (u64 ispi, u64 rspi, ip_address_t *ia, u16 port)
{
const u32 max_buf_size =
sizeof (ispi) + sizeof (rspi) + sizeof (ip6_address_t) + sizeof (u16);
@@ -662,7 +698,10 @@ ikev2_parse_ke_payload (const void *p, u32 rlen, ikev2_sa_t * sa,
u16 plen = clib_net_to_host_u16 (ke->length);
ASSERT (plen >= sizeof (*ke) && plen <= rlen);
if (sizeof (*ke) > rlen)
- return 0;
+ {
+ ikev2_elog_error ("KE: packet too small");
+ return 0;
+ }
sa->dh_group = clib_net_to_host_u16 (ke->dh_group);
vec_reset_length (ke_data[0]);
@@ -671,13 +710,20 @@ ikev2_parse_ke_payload (const void *p, u32 rlen, ikev2_sa_t * sa,
}
static int
-ikev2_parse_nonce_payload (const void *p, u32 rlen, u8 * nonce)
+ikev2_parse_nonce_payload (const void *p, u32 rlen, const u8 **nonce)
{
const ike_payload_header_t *ikep = p;
u16 plen = clib_net_to_host_u16 (ikep->length);
ASSERT (plen >= sizeof (*ikep) && plen <= rlen);
- clib_memcpy_fast (nonce, ikep->payload, plen - sizeof (*ikep));
- return 1;
+ int len = plen - sizeof (*ikep);
+ ASSERT (len >= 16 && len <= 256);
+ if (PREDICT_FALSE (len < 16 || len > 256))
+ {
+ ikev2_elog_error ("NONCE: bad size");
+ return 0;
+ }
+ *nonce = ikep->payload;
+ return len;
}
static int
@@ -685,10 +731,16 @@ ikev2_check_payload_length (const ike_payload_header_t * ikep, int rlen,
u16 * plen)
{
if (sizeof (*ikep) > rlen)
- return 0;
+ {
+ ikev2_elog_error ("payload: packet too small");
+ return 0;
+ }
*plen = clib_net_to_host_u16 (ikep->length);
if (*plen < sizeof (*ikep) || *plen > rlen)
- return 0;
+ {
+ ikev2_elog_error ("payload: bad size");
+ return 0;
+ }
return 1;
}
@@ -696,7 +748,6 @@ static int
ikev2_process_sa_init_req (vlib_main_t *vm, ikev2_sa_t *sa, ike_header_t *ike,
udp_header_t *udp, u32 len, u32 sw_if_index)
{
- u8 nonce[IKEV2_NONCE_SIZE];
int p = 0;
u8 payload = ike->nextpayload;
ike_payload_header_t *ikep;
@@ -716,7 +767,10 @@ ikev2_process_sa_init_req (vlib_main_t *vm, ikev2_sa_t *sa, ike_header_t *ike,
vec_add (sa->last_sa_init_req_packet_data, ike, len);
if (len < sizeof (*ike))
- return 0;
+ {
+ ikev2_elog_error ("IKE_INIT request too small");
+ return 0;
+ }
len -= sizeof (*ike);
while (p < len && payload != IKEV2_PAYLOAD_NONE)
@@ -739,9 +793,13 @@ ikev2_process_sa_init_req (vlib_main_t *vm, ikev2_sa_t *sa, ike_header_t *ike,
}
else if (payload == IKEV2_PAYLOAD_NONCE)
{
+ const u8 *nonce;
+ int nonce_len;
vec_reset_length (sa->i_nonce);
- if (ikev2_parse_nonce_payload (ikep, current_length, nonce))
- vec_add (sa->i_nonce, nonce, plen - sizeof (*ikep));
+ if ((nonce_len = ikev2_parse_nonce_payload (ikep, current_length,
+ &nonce)) <= 0)
+ return 0;
+ vec_add (sa->i_nonce, nonce, nonce_len);
}
else if (payload == IKEV2_PAYLOAD_NOTIFY)
{
@@ -805,7 +863,6 @@ ikev2_process_sa_init_resp (vlib_main_t * vm,
ikev2_sa_t * sa, ike_header_t * ike,
udp_header_t * udp, u32 len)
{
- u8 nonce[IKEV2_NONCE_SIZE];
int p = 0;
u8 payload = ike->nextpayload;
ike_payload_header_t *ikep;
@@ -824,7 +881,10 @@ ikev2_process_sa_init_resp (vlib_main_t * vm,
vec_add (sa->last_sa_init_res_packet_data, ike, len);
if (sizeof (*ike) > len)
- return;
+ {
+ ikev2_elog_error ("IKE_INIT response too small");
+ return;
+ }
len -= sizeof (*ike);
while (p < len && payload != IKEV2_PAYLOAD_NONE)
@@ -853,9 +913,13 @@ ikev2_process_sa_init_resp (vlib_main_t * vm,
}
else if (payload == IKEV2_PAYLOAD_NONCE)
{
+ const u8 *nonce;
+ int nonce_len;
vec_reset_length (sa->r_nonce);
- if (ikev2_parse_nonce_payload (ikep, current_length, nonce))
- vec_add (sa->r_nonce, nonce, plen - sizeof (*ikep));
+ if ((nonce_len = ikev2_parse_nonce_payload (ikep, current_length,
+ &nonce)) <= 0)
+ return;
+ vec_add (sa->r_nonce, nonce, nonce_len);
}
else if (payload == IKEV2_PAYLOAD_NOTIFY)
{
@@ -1021,8 +1085,8 @@ ikev2_decrypt_sk_payload (ikev2_sa_t * sa, ike_header_t * ike,
return plaintext;
}
-static_always_inline int
-ikev2_is_id_equal (ikev2_id_t * i1, ikev2_id_t * i2)
+static int
+ikev2_is_id_equal (const ikev2_id_t *i1, const ikev2_id_t *i2)
{
if (i1->type != i2->type)
return 0;
@@ -1046,7 +1110,6 @@ ikev2_initial_contact_cleanup_internal (ikev2_main_per_thread_data_t * ptd,
ikev2_child_sa_t *c;
/* find old IKE SAs with the same authenticated identity */
- /* *INDENT-OFF* */
pool_foreach (tmp, ptd->sas) {
if (!ikev2_is_id_equal (&tmp->i_id, &sa->i_id)
|| !ikev2_is_id_equal(&tmp->r_id, &sa->r_id))
@@ -1055,7 +1118,6 @@ ikev2_initial_contact_cleanup_internal (ikev2_main_per_thread_data_t * ptd,
if (sa->rspi != tmp->rspi)
vec_add1(delete, tmp - ptd->sas);
}
- /* *INDENT-ON* */
for (i = 0; i < vec_len (delete); i++)
{
@@ -1332,6 +1394,159 @@ ikev2_process_informational_req (vlib_main_t * vm,
}
static int
+ikev2_process_create_child_sa_rekey (ikev2_sa_t *sa, ikev2_sa_t *sar,
+ ikev2_rekey_t *rekey,
+ ikev2_sa_proposal_t *proposal,
+ ikev2_ts_t *tsi, ikev2_ts_t *tsr,
+ const u8 *nonce, int nonce_len)
+{
+ ikev2_sa_transform_t *tr;
+
+ rekey->i_proposal = proposal;
+ rekey->r_proposal = ikev2_select_proposal (proposal, IKEV2_PROTOCOL_ESP);
+
+ if (sar->dh_group)
+ {
+ tr =
+ ikev2_sa_get_td_for_type (rekey->r_proposal, IKEV2_TRANSFORM_TYPE_DH);
+
+ if (!tr || tr->dh_type != sar->dh_group)
+ {
+ rekey->notify_type = IKEV2_NOTIFY_MSG_INVALID_KE_PAYLOAD;
+ ikev2_sa_free_proposal_vector (&rekey->r_proposal);
+ return 0;
+ }
+
+ vec_free (sa->dh_shared_key);
+ vec_free (sa->dh_private_key);
+ vec_free (sa->i_dh_data);
+ vec_free (sa->r_dh_data);
+
+ sa->dh_group = sar->dh_group;
+ sa->i_dh_data = sar->i_dh_data;
+ sar->i_dh_data = 0;
+
+ ikev2_generate_dh (sa, tr);
+ rekey->kex = 1;
+ }
+
+ vec_reset_length (sa->i_nonce);
+ vec_add (sa->i_nonce, nonce, nonce_len);
+
+ vec_validate (sa->r_nonce, nonce_len - 1);
+ RAND_bytes ((u8 *) sa->r_nonce, nonce_len);
+
+ rekey->tsi = tsi;
+ rekey->tsr = tsr;
+
+ return 1;
+}
+
+static void
+ikev2_complete_sa_rekey (ikev2_sa_t *sa_new, ikev2_sa_t *sa_old,
+ ikev2_sa_rekey_t *sa_rekey)
+{
+ sa_new->del = 0;
+ sa_new->rekey = 0;
+ sa_new->new_child = 0;
+ sa_new->sa_rekey = 0;
+ sa_new->last_sa_init_req_packet_data = 0;
+ sa_new->last_sa_init_res_packet_data = 0;
+ sa_new->last_msg_id = ~0;
+ sa_new->last_res_packet_data = 0;
+ sa_new->last_init_msg_id = 0;
+ clib_memset (&sa_new->stats, 0, sizeof (sa_new->stats));
+
+ sa_new->ispi = sa_rekey->ispi;
+ sa_new->rspi = sa_rekey->rspi;
+ sa_new->i_nonce = sa_rekey->i_nonce;
+ sa_new->r_nonce = sa_rekey->r_nonce;
+ sa_new->dh_group = sa_rekey->dh_group;
+ sa_new->dh_shared_key = sa_rekey->dh_shared_key;
+ sa_new->dh_private_key = sa_rekey->dh_private_key;
+ sa_new->i_dh_data = sa_rekey->i_dh_data;
+ sa_new->r_dh_data = sa_rekey->r_dh_data;
+ sa_new->i_proposals = sa_rekey->i_proposals;
+ sa_new->r_proposals = sa_rekey->r_proposals;
+
+ sa_new->sk_d = 0;
+ sa_new->sk_ai = 0;
+ sa_new->sk_ar = 0;
+ sa_new->sk_ei = 0;
+ sa_new->sk_er = 0;
+ sa_new->sk_pi = 0;
+ sa_new->sk_pr = 0;
+ ikev2_calc_keys_rekey (sa_new, sa_old);
+
+ sa_new->i_auth.data = vec_dup (sa_old->i_auth.data);
+ sa_new->i_auth.key = sa_old->i_auth.key;
+ if (sa_new->i_auth.key)
+ EVP_PKEY_up_ref (sa_new->i_auth.key);
+
+ sa_new->r_auth.data = vec_dup (sa_old->r_auth.data);
+ sa_new->r_auth.key = sa_old->r_auth.key;
+ if (sa_new->r_auth.key)
+ EVP_PKEY_up_ref (sa_new->r_auth.key);
+
+ sa_new->i_id.data = vec_dup (sa_old->i_id.data);
+ sa_new->r_id.data = vec_dup (sa_old->r_id.data);
+
+ sa_old->is_tun_itf_set = 0;
+ sa_old->tun_itf = ~0;
+ sa_old->old_id_expiration = 0;
+ sa_old->current_remote_id_mask = 0;
+ sa_old->old_remote_id = 0;
+ sa_old->old_remote_id_present = 0;
+ sa_old->childs = 0;
+ sa_old->sw_if_index = ~0;
+}
+
+static void
+ikev2_process_sa_rekey (ikev2_sa_t *sa_new, ikev2_sa_t *sa_old,
+ ikev2_sa_rekey_t *sa_rekey)
+{
+ ikev2_sa_transform_t *tr;
+
+ if (ikev2_generate_sa_init_data (sa_new) != IKEV2_GENERATE_SA_INIT_OK)
+ {
+ sa_rekey->notify_type = IKEV2_NOTIFY_MSG_INVALID_KE_PAYLOAD;
+ return;
+ }
+
+ sa_new->r_proposals =
+ ikev2_select_proposal (sa_new->i_proposals, IKEV2_PROTOCOL_IKE);
+
+ tr = ikev2_sa_get_td_for_type (sa_new->r_proposals, IKEV2_TRANSFORM_TYPE_DH);
+ if (!tr || tr->dh_type != sa_new->dh_group)
+ {
+ sa_rekey->notify_type = IKEV2_NOTIFY_MSG_INVALID_KE_PAYLOAD;
+ return;
+ }
+
+ sa_rekey->notify_type = 0;
+ sa_rekey->ispi = sa_new->i_proposals[0].spi;
+ sa_rekey->rspi = sa_new->r_proposals[0].spi;
+ sa_rekey->i_nonce = sa_new->i_nonce;
+ sa_rekey->r_nonce = sa_new->r_nonce;
+ sa_rekey->dh_group = sa_new->dh_group;
+ sa_rekey->dh_shared_key = sa_new->dh_shared_key;
+ sa_rekey->dh_private_key = sa_new->dh_private_key;
+ sa_rekey->i_dh_data = sa_new->i_dh_data;
+ sa_rekey->r_dh_data = sa_new->r_dh_data;
+ sa_rekey->i_proposals = sa_new->i_proposals;
+ sa_rekey->r_proposals = sa_new->r_proposals;
+
+ sa_new->i_nonce = 0;
+ sa_new->r_nonce = 0;
+ sa_new->dh_shared_key = 0;
+ sa_new->dh_private_key = 0;
+ sa_new->i_dh_data = 0;
+ sa_new->r_dh_data = 0;
+ sa_new->i_proposals = 0;
+ sa_new->r_proposals = 0;
+}
+
+static int
ikev2_process_create_child_sa_req (vlib_main_t * vm,
ikev2_sa_t * sa, ike_header_t * ike,
u32 len)
@@ -1339,8 +1554,6 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm,
int p = 0;
u8 payload = ike->nextpayload;
u8 *plaintext = 0;
- u8 rekeying = 0;
- u8 nonce[IKEV2_NONCE_SIZE];
ikev2_rekey_t *rekey;
ike_payload_header_t *ikep;
ikev2_notify_t *n = 0;
@@ -1350,6 +1563,11 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm,
ikev2_child_sa_t *child_sa;
u32 dlen = 0, src;
u16 plen;
+ const u8 *nonce = 0;
+ int nonce_len = 0;
+ ikev2_sa_t sar;
+
+ clib_memset (&sar, 0, sizeof (sar));
if (sa->is_initiator)
src = ip_addr_v4 (&sa->raddr).as_u32;
@@ -1379,13 +1597,23 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm,
{
proposal = ikev2_parse_sa_payload (ikep, current_length);
}
+ else if (payload == IKEV2_PAYLOAD_KE)
+ {
+ if (!ikev2_parse_ke_payload (ikep, current_length, &sar,
+ &sar.i_dh_data))
+ goto cleanup_and_exit;
+ }
else if (payload == IKEV2_PAYLOAD_NOTIFY)
{
- n = ikev2_parse_notify_payload (ikep, current_length);
- if (n->msg_type == IKEV2_NOTIFY_MSG_REKEY_SA)
+ ikev2_notify_t *n0;
+ n0 = ikev2_parse_notify_payload (ikep, current_length);
+ if (n0->msg_type == IKEV2_NOTIFY_MSG_REKEY_SA)
{
- rekeying = 1;
+ vec_free (n);
+ n = n0;
}
+ else
+ vec_free (n0);
}
else if (payload == IKEV2_PAYLOAD_DELETE)
{
@@ -1397,7 +1625,9 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm,
}
else if (payload == IKEV2_PAYLOAD_NONCE)
{
- ikev2_parse_nonce_payload (ikep, current_length, nonce);
+ nonce_len = ikev2_parse_nonce_payload (ikep, current_length, &nonce);
+ if (nonce_len <= 0)
+ goto cleanup_and_exit;
}
else if (payload == IKEV2_PAYLOAD_TSI)
{
@@ -1421,7 +1651,9 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm,
p += plen;
}
- if (!proposal || proposal->protocol_id != IKEV2_PROTOCOL_ESP)
+ if (!proposal || !nonce ||
+ (proposal->protocol_id != IKEV2_PROTOCOL_ESP &&
+ proposal->protocol_id != IKEV2_PROTOCOL_IKE))
goto cleanup_and_exit;
if (sa->is_initiator)
@@ -1429,6 +1661,7 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm,
rekey = sa->rekey;
if (vec_len (rekey) == 0)
goto cleanup_and_exit;
+ rekey->notify_type = 0;
rekey->protocol_id = proposal->protocol_id;
rekey->i_proposal =
ikev2_select_proposal (proposal, IKEV2_PROTOCOL_ESP);
@@ -1438,7 +1671,7 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm,
rekey->tsr = tsr;
/* update Nr */
vec_reset_length (sa->r_nonce);
- vec_add (sa->r_nonce, nonce, IKEV2_NONCE_SIZE);
+ vec_add (sa->r_nonce, nonce, nonce_len);
child_sa = ikev2_sa_get_child (sa, rekey->ispi, IKEV2_PROTOCOL_ESP, 1);
if (child_sa)
{
@@ -1447,7 +1680,7 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm,
}
else
{
- if (rekeying)
+ if (n)
{
child_sa = ikev2_sa_get_child (sa, n->spi, n->protocol_id, 1);
if (!child_sa)
@@ -1457,36 +1690,52 @@ ikev2_process_create_child_sa_req (vlib_main_t * vm,
goto cleanup_and_exit;
}
vec_add2 (sa->rekey, rekey, 1);
+ rekey->notify_type = 0;
+ rekey->kex = 0;
rekey->protocol_id = n->protocol_id;
rekey->spi = n->spi;
- rekey->i_proposal = proposal;
- rekey->r_proposal =
- ikev2_select_proposal (proposal, IKEV2_PROTOCOL_ESP);
- /* update Ni */
- vec_reset_length (sa->i_nonce);
- vec_add (sa->i_nonce, nonce, IKEV2_NONCE_SIZE);
- /* generate new Nr */
- vec_validate (sa->r_nonce, IKEV2_NONCE_SIZE - 1);
- RAND_bytes ((u8 *) sa->r_nonce, IKEV2_NONCE_SIZE);
+ if (sa->old_remote_id_present)
+ {
+ rekey->notify_type = IKEV2_NOTIFY_MSG_TEMPORARY_FAILURE;
+ vec_free (proposal);
+ vec_free (tsr);
+ vec_free (tsi);
+ }
+ else if (!ikev2_process_create_child_sa_rekey (
+ sa, &sar, rekey, proposal, tsi, tsr, nonce, nonce_len))
+ {
+ vec_free (proposal);
+ vec_free (tsr);
+ vec_free (tsi);
+ }
+ }
+ else if (proposal[0].protocol_id == IKEV2_PROTOCOL_IKE)
+ {
+ ikev2_sa_rekey_t *sa_rekey;
+ if (tsi || tsr)
+ goto cleanup_and_exit;
+ sar.i_proposals = proposal;
+ vec_add (sar.i_nonce, nonce, nonce_len);
+ vec_add2 (sa->sa_rekey, sa_rekey, 1);
+ ikev2_process_sa_rekey (&sar, sa, sa_rekey);
}
else
{
/* create new child SA */
vec_add2 (sa->new_child, rekey, 1);
- rekey->i_proposal = proposal;
- rekey->r_proposal =
- ikev2_select_proposal (proposal, IKEV2_PROTOCOL_ESP);
- /* update Ni */
- vec_reset_length (sa->i_nonce);
- vec_add (sa->i_nonce, nonce, IKEV2_NONCE_SIZE);
- /* generate new Nr */
- vec_validate (sa->r_nonce, IKEV2_NONCE_SIZE - 1);
- RAND_bytes ((u8 *) sa->r_nonce, IKEV2_NONCE_SIZE);
+ rekey->notify_type = 0;
+ rekey->kex = 0;
+ if (!ikev2_process_create_child_sa_rekey (
+ sa, &sar, rekey, proposal, tsi, tsr, nonce, nonce_len))
+ {
+ vec_free (proposal);
+ vec_free (tsr);
+ vec_free (tsi);
+ }
}
- rekey->tsi = tsi;
- rekey->tsr = tsr;
}
vec_free (n);
+ ikev2_sa_free_all_vec (&sar);
return 1;
cleanup_and_exit:
@@ -1494,6 +1743,7 @@ cleanup_and_exit:
vec_free (proposal);
vec_free (tsr);
vec_free (tsi);
+ ikev2_sa_free_all_vec (&sar);
return 0;
}
@@ -1541,6 +1791,25 @@ ikev2_sa_generate_authmsg (ikev2_sa_t * sa, int is_responder)
}
static int
+ikev2_match_profile (const ikev2_profile_t *p, const ikev2_id_t *id_loc,
+ const ikev2_id_t *id_rem, int is_initiator)
+{
+ /* on the initiator, IDi is always present and must match
+ * however on the responder, IDr (which is our local id) is optional */
+ if ((is_initiator || id_loc->type != 0) &&
+ !ikev2_is_id_equal (&p->loc_id, id_loc))
+ return 0;
+
+ /* on the initiator, we might not have configured a specific remote id
+ * however on the responder, the remote id should always be configured */
+ if ((!is_initiator || p->rem_id.type != 0) &&
+ !ikev2_is_id_equal (&p->rem_id, id_rem))
+ return 0;
+
+ return 1;
+}
+
+static int
ikev2_ts_cmp (ikev2_ts_t * ts1, ikev2_ts_t * ts2)
{
if (ts1->ts_type == ts2->ts_type && ts1->protocol_id == ts2->protocol_id &&
@@ -1560,7 +1829,6 @@ ikev2_sa_match_ts (ikev2_sa_t * sa)
ikev2_ts_t *ts, *p_tsi, *p_tsr, *tsi = 0, *tsr = 0;
ikev2_id_t *id_rem, *id_loc;
- /* *INDENT-OFF* */
pool_foreach (p, km->profiles) {
if (sa->is_initiator)
@@ -1578,9 +1846,7 @@ ikev2_sa_match_ts (ikev2_sa_t * sa)
id_loc = &sa->r_id;
}
- /* check id */
- if (!ikev2_is_id_equal (&p->rem_id, id_rem)
- || !ikev2_is_id_equal (&p->loc_id, id_loc))
+ if (!ikev2_match_profile (p, id_loc, id_rem, sa->is_initiator))
continue;
sa->profile_index = p - km->profiles;
@@ -1605,7 +1871,6 @@ ikev2_sa_match_ts (ikev2_sa_t * sa)
break;
}
- /* *INDENT-ON* */
if (tsi && tsr)
{
@@ -1623,7 +1888,7 @@ ikev2_sa_match_ts (ikev2_sa_t * sa)
}
static ikev2_profile_t *
-ikev2_select_profile (ikev2_main_t *km, ikev2_sa_t *sa,
+ikev2_select_profile (vlib_main_t *vm, ikev2_main_t *km, ikev2_sa_t *sa,
ikev2_sa_transform_t *tr_prf, u8 *key_pad)
{
ikev2_profile_t *ret = 0, *p;
@@ -1648,9 +1913,7 @@ ikev2_select_profile (ikev2_main_t *km, ikev2_sa_t *sa,
pool_foreach (p, km->profiles)
{
- /* check id */
- if (!ikev2_is_id_equal (&p->rem_id, id_rem) ||
- !ikev2_is_id_equal (&p->loc_id, id_loc))
+ if (!ikev2_match_profile (p, id_loc, id_rem, sa->is_initiator))
continue;
if (sa_auth->method == IKEV2_AUTH_METHOD_SHARED_KEY_MIC)
@@ -1665,6 +1928,7 @@ ikev2_select_profile (ikev2_main_t *km, ikev2_sa_t *sa,
if (!clib_memcmp (auth, sa_auth->data, vec_len (sa_auth->data)))
{
ikev2_set_state (sa, IKEV2_STATE_AUTHENTICATED);
+ sa->auth_timestamp = vlib_time_now (vm);
vec_free (auth);
ret = p;
break;
@@ -1683,6 +1947,7 @@ ikev2_select_profile (ikev2_main_t *km, ikev2_sa_t *sa,
if (ikev2_verify_sign (p->auth.key, sa_auth->data, authmsg) == 1)
{
ikev2_set_state (sa, IKEV2_STATE_AUTHENTICATED);
+ sa->auth_timestamp = vlib_time_now (vm);
ret = p;
break;
}
@@ -1698,7 +1963,7 @@ ikev2_select_profile (ikev2_main_t *km, ikev2_sa_t *sa,
}
static void
-ikev2_sa_auth (ikev2_sa_t *sa)
+ikev2_sa_auth (ikev2_sa_t *sa, vlib_main_t *vm)
{
ikev2_main_t *km = &ikev2_main;
ikev2_profile_t *sel_p = 0;
@@ -1719,7 +1984,7 @@ ikev2_sa_auth (ikev2_sa_t *sa)
}
key_pad = format (0, "%s", IKEV2_KEY_PAD);
- sel_p = ikev2_select_profile (km, sa, tr_prf, key_pad);
+ sel_p = ikev2_select_profile (vm, km, sa, tr_prf, key_pad);
if (sel_p)
{
@@ -1864,8 +2129,8 @@ ikev2_add_tunnel_from_main (ikev2_add_ipsec_tunnel_args_t * a)
.t_mode = TUNNEL_MODE_P2P,
.t_table_id = 0,
.t_hop_limit = 255,
- .t_src = a->local_ip,
- .t_dst = a->remote_ip,
+ .t_src = a->remote_ip,
+ .t_dst = a->local_ip,
};
tunnel_t tun_out = {
.t_flags = TUNNEL_FLAG_NONE,
@@ -1874,8 +2139,8 @@ ikev2_add_tunnel_from_main (ikev2_add_ipsec_tunnel_args_t * a)
.t_mode = TUNNEL_MODE_P2P,
.t_table_id = 0,
.t_hop_limit = 255,
- .t_src = a->remote_ip,
- .t_dst = a->local_ip,
+ .t_src = a->local_ip,
+ .t_dst = a->remote_ip,
};
if (~0 == a->sw_if_index)
@@ -1924,7 +2189,7 @@ ikev2_add_tunnel_from_main (ikev2_add_ipsec_tunnel_args_t * a)
rv = ipsec_sa_add_and_lock (a->local_sa_id, a->local_spi, IPSEC_PROTOCOL_ESP,
a->encr_type, &a->loc_ckey, a->integ_type,
&a->loc_ikey, a->flags, a->salt_local,
- a->src_port, a->dst_port, &tun_out, NULL);
+ a->src_port, a->dst_port, 0, &tun_out, NULL);
if (rv)
goto err0;
@@ -1932,7 +2197,7 @@ ikev2_add_tunnel_from_main (ikev2_add_ipsec_tunnel_args_t * a)
a->remote_sa_id, a->remote_spi, IPSEC_PROTOCOL_ESP, a->encr_type,
&a->rem_ckey, a->integ_type, &a->rem_ikey,
(a->flags | IPSEC_SA_FLAG_IS_INBOUND), a->salt_remote,
- a->ipsec_over_udp_port, a->ipsec_over_udp_port, &tun_in, NULL);
+ a->ipsec_over_udp_port, a->ipsec_over_udp_port, 0, &tun_in, NULL);
if (rv)
goto err1;
@@ -1951,10 +2216,9 @@ err0:
}
static int
-ikev2_create_tunnel_interface (vlib_main_t * vm,
- ikev2_sa_t * sa,
- ikev2_child_sa_t * child, u32 sa_index,
- u32 child_index, u8 is_rekey)
+ikev2_create_tunnel_interface (vlib_main_t *vm, ikev2_sa_t *sa,
+ ikev2_child_sa_t *child, u32 sa_index,
+ u32 child_index, u8 is_rekey, u8 kex)
{
u32 thread_index = vlib_get_thread_index ();
ikev2_main_t *km = &ikev2_main;
@@ -1968,6 +2232,8 @@ ikev2_create_tunnel_interface (vlib_main_t * vm,
clib_memset (&a, 0, sizeof (a));
+ child->timestamp = vlib_time_now (vm);
+
if (!child->r_proposals)
{
ikev2_set_state (sa, IKEV2_STATE_NO_PROPOSAL_CHOSEN);
@@ -2097,7 +2363,7 @@ ikev2_create_tunnel_interface (vlib_main_t * vm,
}
a.integ_type = integ_type;
- ikev2_calc_child_keys (sa, child);
+ ikev2_calc_child_keys (sa, child, kex);
if (sa->is_initiator)
{
@@ -2202,7 +2468,7 @@ typedef struct
u32 sw_if_index;
} ikev2_del_ipsec_tunnel_args_t;
-static_always_inline u32
+static u32
ikev2_flip_alternate_sa_bit (u32 id)
{
u32 mask = 0x800;
@@ -2220,14 +2486,12 @@ ikev2_del_tunnel_from_main (ikev2_del_ipsec_tunnel_args_t * a)
if (~0 == a->sw_if_index)
{
- /* *INDENT-OFF* */
ipip_tunnel_key_t key = {
.src = a->local_ip,
.dst = a->remote_ip,
.transport = IPIP_TRANSPORT_IP4,
.fib_index = 0,
};
- /* *INDENT-ON* */
ipip = ipip_tunnel_db_find (&key);
@@ -2284,6 +2548,47 @@ ikev2_delete_tunnel_interface (vnet_main_t * vnm, ikev2_sa_t * sa,
return 0;
}
+static void
+ikev2_add_invalid_ke_payload (ikev2_sa_t *sa, ikev2_payload_chain_t *chain)
+{
+ u8 *data = vec_new (u8, 2);
+ ikev2_sa_transform_t *tr_dh =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_DH);
+ ASSERT (tr_dh && tr_dh->dh_type);
+ data[0] = (tr_dh->dh_type >> 8) & 0xff;
+ data[1] = (tr_dh->dh_type) & 0xff;
+ ikev2_payload_add_notify (chain, IKEV2_NOTIFY_MSG_INVALID_KE_PAYLOAD, data);
+ vec_free (data);
+}
+
+static void
+ikev2_add_notify_payload (ikev2_sa_t *sa, ikev2_payload_chain_t *chain,
+ u16 notify_type)
+{
+ if (notify_type == IKEV2_NOTIFY_MSG_INVALID_KE_PAYLOAD)
+ ikev2_add_invalid_ke_payload (sa, chain);
+ else
+ ikev2_payload_add_notify (chain, notify_type, 0);
+}
+
+static void
+ikev2_add_create_child_resp (ikev2_sa_t *sa, ikev2_rekey_t *rekey,
+ ikev2_payload_chain_t *chain)
+{
+ if (rekey->notify_type)
+ {
+ ikev2_add_notify_payload (sa, chain, rekey->notify_type);
+ return;
+ }
+
+ ikev2_payload_add_sa (chain, rekey->r_proposal, 0);
+ ikev2_payload_add_nonce (chain, sa->r_nonce);
+ if (rekey->kex)
+ ikev2_payload_add_ke (chain, sa->dh_group, sa->r_dh_data);
+ ikev2_payload_add_ts (chain, rekey->tsi, IKEV2_PAYLOAD_TSI);
+ ikev2_payload_add_ts (chain, rekey->tsr, IKEV2_PAYLOAD_TSR);
+}
+
static u32
ikev2_generate_message (vlib_buffer_t *b, ikev2_sa_t *sa, ike_header_t *ike,
void *user, udp_header_t *udp, ikev2_stats_t *stats)
@@ -2314,20 +2619,7 @@ ikev2_generate_message (vlib_buffer_t *b, ikev2_sa_t *sa, ike_header_t *ike,
}
else if (sa->dh_group == IKEV2_TRANSFORM_DH_TYPE_NONE)
{
- u8 *data = vec_new (u8, 2);
- ikev2_sa_transform_t *tr_dh;
- tr_dh =
- ikev2_sa_get_td_for_type (sa->r_proposals,
- IKEV2_TRANSFORM_TYPE_DH);
- ASSERT (tr_dh && tr_dh->dh_type);
-
- data[0] = (tr_dh->dh_type >> 8) & 0xff;
- data[1] = (tr_dh->dh_type) & 0xff;
-
- ikev2_payload_add_notify (chain,
- IKEV2_NOTIFY_MSG_INVALID_KE_PAYLOAD,
- data);
- vec_free (data);
+ ikev2_add_invalid_ke_payload (sa, chain);
ikev2_set_state (sa, IKEV2_STATE_NOTIFY_AND_DELETE);
}
else if (sa->state == IKEV2_STATE_NOTIFY_AND_DELETE)
@@ -2345,7 +2637,7 @@ ikev2_generate_message (vlib_buffer_t *b, ikev2_sa_t *sa, ike_header_t *ike,
ASSERT (udp);
ike->rspi = clib_host_to_net_u64 (sa->rspi);
- ikev2_payload_add_sa (chain, sa->r_proposals);
+ ikev2_payload_add_sa (chain, sa->r_proposals, 0);
ikev2_payload_add_ke (chain, sa->dh_group, sa->r_dh_data);
ikev2_payload_add_nonce (chain, sa->r_nonce);
@@ -2372,9 +2664,8 @@ ikev2_generate_message (vlib_buffer_t *b, ikev2_sa_t *sa, ike_header_t *ike,
if (sa->state == IKEV2_STATE_AUTHENTICATED)
{
ikev2_payload_add_id (chain, &sa->r_id, IKEV2_PAYLOAD_IDR);
- ikev2_payload_add_id (chain, &sa->i_id, IKEV2_PAYLOAD_IDI);
ikev2_payload_add_auth (chain, &sa->r_auth);
- ikev2_payload_add_sa (chain, sa->childs[0].r_proposals);
+ ikev2_payload_add_sa (chain, sa->childs[0].r_proposals, 0);
ikev2_payload_add_ts (chain, sa->childs[0].tsi, IKEV2_PAYLOAD_TSI);
ikev2_payload_add_ts (chain, sa->childs[0].tsr, IKEV2_PAYLOAD_TSR);
}
@@ -2414,9 +2705,12 @@ ikev2_generate_message (vlib_buffer_t *b, ikev2_sa_t *sa, ike_header_t *ike,
else if (sa->state == IKEV2_STATE_SA_INIT)
{
ikev2_payload_add_id (chain, &sa->i_id, IKEV2_PAYLOAD_IDI);
- ikev2_payload_add_id (chain, &sa->r_id, IKEV2_PAYLOAD_IDR);
+ /* IDr is optional when sending INIT from the initiator */
+ ASSERT (sa->r_id.type != 0 || sa->is_initiator);
+ if (sa->r_id.type != 0)
+ ikev2_payload_add_id (chain, &sa->r_id, IKEV2_PAYLOAD_IDR);
ikev2_payload_add_auth (chain, &sa->i_auth);
- ikev2_payload_add_sa (chain, sa->childs[0].i_proposals);
+ ikev2_payload_add_sa (chain, sa->childs[0].i_proposals, 0);
ikev2_payload_add_ts (chain, sa->childs[0].tsi, IKEV2_PAYLOAD_TSI);
ikev2_payload_add_ts (chain, sa->childs[0].tsr, IKEV2_PAYLOAD_TSR);
ikev2_payload_add_notify (chain, IKEV2_NOTIFY_MSG_INITIAL_CONTACT,
@@ -2493,7 +2787,7 @@ ikev2_generate_message (vlib_buffer_t *b, ikev2_sa_t *sa, ike_header_t *ike,
notify.spi = sa->childs[0].i_proposals->spi;
*(u32 *) data = clib_host_to_net_u32 (notify.spi);
- ikev2_payload_add_sa (chain, proposals);
+ ikev2_payload_add_sa (chain, proposals, 0);
ikev2_payload_add_nonce (chain, sa->i_nonce);
ikev2_payload_add_ts (chain, sa->childs[0].tsi, IKEV2_PAYLOAD_TSI);
ikev2_payload_add_ts (chain, sa->childs[0].tsr, IKEV2_PAYLOAD_TSR);
@@ -2504,22 +2798,27 @@ ikev2_generate_message (vlib_buffer_t *b, ikev2_sa_t *sa, ike_header_t *ike,
}
else if (vec_len (sa->rekey) > 0)
{
- ikev2_payload_add_sa (chain, sa->rekey[0].r_proposal);
- ikev2_payload_add_nonce (chain, sa->r_nonce);
- ikev2_payload_add_ts (chain, sa->rekey[0].tsi, IKEV2_PAYLOAD_TSI);
- ikev2_payload_add_ts (chain, sa->rekey[0].tsr, IKEV2_PAYLOAD_TSR);
+ ikev2_add_create_child_resp (sa, &sa->rekey[0], chain);
vec_del1 (sa->rekey, 0);
}
else if (vec_len (sa->new_child) > 0)
{
- ikev2_payload_add_sa (chain, sa->new_child[0].r_proposal);
- ikev2_payload_add_nonce (chain, sa->r_nonce);
- ikev2_payload_add_ts (chain, sa->new_child[0].tsi,
- IKEV2_PAYLOAD_TSI);
- ikev2_payload_add_ts (chain, sa->new_child[0].tsr,
- IKEV2_PAYLOAD_TSR);
+ ikev2_add_create_child_resp (sa, &sa->new_child[0], chain);
vec_del1 (sa->new_child, 0);
}
+ else if (vec_len (sa->sa_rekey) > 0)
+ {
+ if (sa->sa_rekey[0].notify_type)
+ ikev2_add_notify_payload (sa, chain, sa->sa_rekey[0].notify_type);
+ else
+ {
+ ikev2_payload_add_sa (chain, sa->sa_rekey[0].r_proposals, 1);
+ ikev2_payload_add_nonce (chain, sa->sa_rekey[0].r_nonce);
+ ikev2_payload_add_ke (chain, sa->sa_rekey[0].dh_group,
+ sa->sa_rekey[0].r_dh_data);
+ }
+ vec_del1 (sa->sa_rekey, 0);
+ }
else if (sa->unsupported_cp)
{
u8 *data = vec_new (u8, 1);
@@ -2712,13 +3011,11 @@ ikev2_retransmit_sa_init (ike_header_t * ike, ip_address_t iaddr,
u32 res;
ikev2_main_per_thread_data_t *ptd = ikev2_get_per_thread_data ();
- /* *INDENT-OFF* */
pool_foreach (sa, ptd->sas) {
res = ikev2_retransmit_sa_init_one (sa, ike, iaddr, raddr, rlen);
if (res)
return res;
}
- /* *INDENT-ON* */
/* req is not retransmit */
return 0;
@@ -2796,8 +3093,8 @@ ikev2_del_sa_init (u64 ispi)
sizeof (ispi));
}
-static_always_inline void
-ikev2_rewrite_v6_addrs (ikev2_sa_t * sa, ip6_header_t * ih)
+static void
+ikev2_rewrite_v6_addrs (ikev2_sa_t *sa, ip6_header_t *ih)
{
if (sa->is_initiator)
{
@@ -2811,8 +3108,8 @@ ikev2_rewrite_v6_addrs (ikev2_sa_t * sa, ip6_header_t * ih)
}
}
-static_always_inline void
-ikev2_rewrite_v4_addrs (ikev2_sa_t * sa, ip4_header_t * ih)
+static void
+ikev2_rewrite_v4_addrs (ikev2_sa_t *sa, ip4_header_t *ih)
{
if (sa->is_initiator)
{
@@ -2826,7 +3123,7 @@ ikev2_rewrite_v4_addrs (ikev2_sa_t * sa, ip4_header_t * ih)
}
}
-static_always_inline void
+static void
ikev2_set_ip_address (ikev2_sa_t *sa, const void *iaddr, const void *raddr,
const ip_address_family_t af)
{
@@ -2881,7 +3178,7 @@ ikev2_update_stats (vlib_main_t *vm, u32 node_index, ikev2_stats_t *s)
s->n_sa_auth_req);
}
-static_always_inline uword
+static uword
ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame, u8 is_ip4, u8 natt)
{
@@ -3131,18 +3428,19 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
sa0->dst_port = clib_net_to_host_u16 (udp0->src_port);
res = ikev2_process_auth_req (vm, sa0, ike0, rlen);
if (res)
- ikev2_sa_auth (sa0);
+ ikev2_sa_auth (sa0, vm);
else
vlib_node_increment_counter (vm, node->node_index,
IKEV2_ERROR_MALFORMED_PACKET, 1);
if (sa0->state == IKEV2_STATE_AUTHENTICATED)
{
ikev2_initial_contact_cleanup (ptd, sa0);
+ p = hash_get (ptd->sa_by_rspi,
+ clib_net_to_host_u64 (ike0->rspi));
ikev2_sa_match_ts (sa0);
if (sa0->state != IKEV2_STATE_TS_UNACCEPTABLE)
- ikev2_create_tunnel_interface (vm, sa0,
- &sa0->childs[0],
- p[0], 0, 0);
+ ikev2_create_tunnel_interface (vm, sa0, &sa0->childs[0],
+ p[0], 0, 0, 0);
}
if (sa0->is_initiator)
@@ -3267,11 +3565,12 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
goto dispatch0;
}
- if (sa0->rekey)
+ if (vec_len (sa0->rekey) > 0)
{
- if (sa0->rekey[0].protocol_id != IKEV2_PROTOCOL_IKE)
+ if (!sa0->rekey[0].notify_type &&
+ sa0->rekey[0].protocol_id != IKEV2_PROTOCOL_IKE)
{
- if (sa0->childs)
+ if (vec_len (sa0->childs) > 0)
ikev2_sa_free_all_child_sa (&sa0->childs);
ikev2_child_sa_t *child;
vec_add2 (sa0->childs, child, 1);
@@ -3281,7 +3580,8 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
child->tsi = sa0->rekey[0].tsi;
child->tsr = sa0->rekey[0].tsr;
ikev2_create_tunnel_interface (vm, sa0, child, p[0],
- child - sa0->childs, 1);
+ child - sa0->childs, 1,
+ sa0->rekey[0].kex);
}
if (ike_hdr_is_response (ike0))
{
@@ -3300,7 +3600,7 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
1);
}
}
- else if (sa0->new_child)
+ else if (vec_len (sa0->new_child) > 0)
{
ikev2_child_sa_t *c;
vec_add2 (sa0->childs, c, 1);
@@ -3310,7 +3610,8 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
c->tsi = sa0->new_child[0].tsi;
c->tsr = sa0->new_child[0].tsr;
ikev2_create_tunnel_interface (vm, sa0, c, p[0],
- c - sa0->childs, 0);
+ c - sa0->childs, 0,
+ sa0->new_child[0].kex);
if (ike_hdr_is_request (ike0))
{
ike0->flags = IKEV2_HDR_FLAG_RESPONSE;
@@ -3321,6 +3622,38 @@ ikev2_node_internal (vlib_main_t *vm, vlib_node_runtime_t *node,
vm, node->node_index, IKEV2_ERROR_NO_BUFF_SPACE, 1);
}
}
+ else if (vec_len (sa0->sa_rekey) > 0)
+ {
+ if (!sa0->sa_rekey[0].notify_type)
+ {
+ ikev2_sa_t *sar, *tmp = 0;
+ pool_get (ptd->sas, tmp);
+ sa0 = pool_elt_at_index (ptd->sas, p[0]);
+ /* swap old/new SAs to keep index and inherit IPsec SA */
+ clib_memcpy_fast (tmp, sa0, sizeof (*tmp));
+ sar = sa0;
+ sa0 = tmp;
+ hash_set (ptd->sa_by_rspi, sa0->rspi, sa0 - ptd->sas);
+ p = hash_get (ptd->sa_by_rspi, sa0->rspi);
+ ikev2_complete_sa_rekey (sar, sa0, &sa0->sa_rekey[0]);
+ hash_set (ptd->sa_by_rspi, sar->rspi, sar - ptd->sas);
+ }
+ if (ike_hdr_is_response (ike0))
+ {
+ vec_free (sa0->sa_rekey);
+ }
+ else
+ {
+ stats->n_rekey_req++;
+ sa0->stats.n_rekey_req++;
+ ike0->flags = IKEV2_HDR_FLAG_RESPONSE;
+ slen =
+ ikev2_generate_message (b0, sa0, ike0, 0, udp0, stats);
+ if (~0 == slen)
+ vlib_node_increment_counter (
+ vm, node->node_index, IKEV2_ERROR_NO_BUFF_SPACE, 1);
+ }
+ }
}
}
else
@@ -3429,7 +3762,6 @@ ikev2_ip6 (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
return ikev2_node_internal (vm, node, frame, 0 /* is_ip4 */, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ikev2_node_ip4,static) = {
.function = ikev2_ip4,
.name = "ikev2-ip4",
@@ -3480,7 +3812,6 @@ VLIB_REGISTER_NODE (ikev2_node_ip6,static) = {
[IKEV2_NEXT_IP6_ERROR_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
// set ikev2 proposals when vpp is used as initiator
static clib_error_t *
@@ -3721,21 +4052,23 @@ ikev2_set_local_key (vlib_main_t * vm, u8 * file)
return 0;
}
-static_always_inline vnet_api_error_t
-ikev2_register_udp_port (ikev2_profile_t * p, u16 port)
+static vnet_api_error_t
+ikev2_register_udp_port (ikev2_profile_t *p, u16 port)
{
- ipsec_register_udp_port (port);
+ ipsec_register_udp_port (port, 0 /* is_ip4 */);
+ ipsec_register_udp_port (port, 1 /* is_ip4 */);
p->ipsec_over_udp_port = port;
return 0;
}
-static_always_inline void
-ikev2_unregister_udp_port (ikev2_profile_t * p)
+static void
+ikev2_unregister_udp_port (ikev2_profile_t *p)
{
if (p->ipsec_over_udp_port == IPSEC_UDP_PORT_NONE)
return;
- ipsec_unregister_udp_port (p->ipsec_over_udp_port);
+ ipsec_unregister_udp_port (p->ipsec_over_udp_port, 0 /* is_ip4 */);
+ ipsec_unregister_udp_port (p->ipsec_over_udp_port, 1 /* is_ip4 */);
p->ipsec_over_udp_port = IPSEC_UDP_PORT_NONE;
}
@@ -3820,12 +4153,10 @@ ikev2_cleanup_profile_sessions (ikev2_main_t * km, ikev2_profile_t * p)
u32 *sai;
u32 *del_sai = 0;
- /* *INDENT-OFF* */
pool_foreach (sa, km->sais) {
if (pi == sa->profile_index)
vec_add1 (del_sai, sa - km->sais);
}
- /* *INDENT-ON* */
vec_foreach (sai, del_sai)
{
@@ -3838,12 +4169,10 @@ ikev2_cleanup_profile_sessions (ikev2_main_t * km, ikev2_profile_t * p)
vec_foreach (tkm, km->per_thread_data)
{
- /* *INDENT-OFF* */
pool_foreach (sa, tkm->sas) {
if (sa->profile_index != ~0 && pi == sa->profile_index)
vec_add1 (del_sai, sa - tkm->sas);
}
- /* *INDENT-ON* */
vec_foreach (sai, del_sai)
{
@@ -3878,12 +4207,51 @@ ikev2_profile_free (ikev2_profile_t * p)
vec_free (p->rem_id.data);
}
+static void
+ikev2_bind (vlib_main_t *vm, ikev2_main_t *km)
+{
+ if (0 == km->bind_refcount)
+ {
+ udp_register_dst_port (vm, IKEV2_PORT, ikev2_node_ip4.index, 1);
+ udp_register_dst_port (vm, IKEV2_PORT, ikev2_node_ip6.index, 0);
+ udp_register_dst_port (vm, IKEV2_PORT_NATT, ikev2_node_ip4.index, 1);
+ udp_register_dst_port (vm, IKEV2_PORT_NATT, ikev2_node_ip6.index, 0);
+
+ vlib_punt_register (km->punt_hdl,
+ ipsec_punt_reason[IPSEC_PUNT_IP4_SPI_UDP_0],
+ "ikev2-ip4-natt");
+ }
+
+ km->bind_refcount++;
+}
+
+static void
+ikev2_unbind (vlib_main_t *vm, ikev2_main_t *km)
+{
+ km->bind_refcount--;
+ if (0 == km->bind_refcount)
+ {
+ vlib_punt_unregister (km->punt_hdl,
+ ipsec_punt_reason[IPSEC_PUNT_IP4_SPI_UDP_0],
+ "ikev2-ip4-natt");
+
+ udp_unregister_dst_port (vm, IKEV2_PORT_NATT, 0);
+ udp_unregister_dst_port (vm, IKEV2_PORT_NATT, 1);
+ udp_unregister_dst_port (vm, IKEV2_PORT, 0);
+ udp_unregister_dst_port (vm, IKEV2_PORT, 1);
+ }
+}
+
+static void ikev2_lazy_init (ikev2_main_t *km);
+
clib_error_t *
ikev2_add_del_profile (vlib_main_t * vm, u8 * name, int is_add)
{
ikev2_main_t *km = &ikev2_main;
ikev2_profile_t *p;
+ ikev2_lazy_init (km);
+
if (is_add)
{
if (ikev2_profile_index_by_name (name))
@@ -3897,6 +4265,8 @@ ikev2_add_del_profile (vlib_main_t * vm, u8 * name, int is_add)
p->tun_itf = ~0;
uword index = p - km->profiles;
mhash_set_mem (&km->profile_index_by_name, name, &index, 0);
+
+ ikev2_bind (vm, km);
}
else
{
@@ -3904,6 +4274,8 @@ ikev2_add_del_profile (vlib_main_t * vm, u8 * name, int is_add)
if (!p)
return clib_error_return (0, "policy %v does not exists", name);
+ ikev2_unbind (vm, km);
+
ikev2_unregister_udp_port (p);
ikev2_cleanup_profile_sessions (km, p);
@@ -3995,8 +4367,8 @@ ikev2_set_profile_id (vlib_main_t * vm, u8 * name, u8 id_type, u8 * data,
return 0;
}
-static_always_inline void
-ikev2_set_ts_type (ikev2_ts_t * ts, const ip_address_t * addr)
+static void
+ikev2_set_ts_type (ikev2_ts_t *ts, const ip_address_t *addr)
{
if (ip_addr_version (addr) == AF_IP4)
ts->ts_type = TS_IPV4_ADDR_RANGE;
@@ -4004,9 +4376,9 @@ ikev2_set_ts_type (ikev2_ts_t * ts, const ip_address_t * addr)
ts->ts_type = TS_IPV6_ADDR_RANGE;
}
-static_always_inline void
-ikev2_set_ts_addrs (ikev2_ts_t * ts, const ip_address_t * start,
- const ip_address_t * end)
+static void
+ikev2_set_ts_addrs (ikev2_ts_t *ts, const ip_address_t *start,
+ const ip_address_t *end)
{
ip_address_copy (&ts->start_addr, start);
ip_address_copy (&ts->end_addr, end);
@@ -4103,15 +4475,15 @@ ikev2_set_profile_ike_transforms (vlib_main_t * vm, u8 * name,
u32 crypto_key_size)
{
ikev2_profile_t *p;
- clib_error_t *r;
p = ikev2_profile_index_by_name (name);
-
if (!p)
- {
- r = clib_error_return (0, "unknown profile %v", name);
- return r;
- }
+ return clib_error_return (0, "unknown profile %v", name);
+
+ if ((IKEV2_TRANSFORM_INTEG_TYPE_NONE != integ_alg) +
+ (IKEV2_TRANSFORM_ENCR_TYPE_AES_GCM_16 == crypto_alg) !=
+ 1)
+ return clib_error_return (0, "invalid cipher + integrity algorithm");
p->ike_ts.crypto_alg = crypto_alg;
p->ike_ts.integ_alg = integ_alg;
@@ -4264,13 +4636,20 @@ ikev2_resolve_responder_hostname (vlib_main_t *vm, ikev2_responder_t *r)
dns_cache_entry_t *ep = 0;
dns_pending_request_t _t0, *t0 = &_t0;
dns_resolve_name_t _rn, *rn = &_rn;
+ u8 *name;
int rv;
- if (!km->dns_resolve_name)
+ if (!km->dns_resolve_name_ptr)
return clib_error_return (0, "cannot load symbols from dns plugin");
t0->request_type = DNS_API_PENDING_NAME_TO_IP;
- rv = km->dns_resolve_name (r->hostname, &ep, t0, rn);
+ /* VPP main curse: IKEv2 uses only non-NULL terminated vectors internally
+ * whereas DNS resolver expects a NULL-terminated C-string */
+ name = vec_dup (r->hostname);
+ vec_terminate_c_string (name);
+ rv = ((__typeof__ (dns_resolve_name) *) km->dns_resolve_name_ptr) (name, &ep,
+ t0, rn);
+ vec_free (name);
if (rv < 0)
return clib_error_return (0, "dns lookup failure");
@@ -4339,7 +4718,7 @@ ikev2_initiate_sa_init (vlib_main_t * vm, u8 * name)
proposals[0].protocol_id = IKEV2_PROTOCOL_IKE;
/* Add and then cleanup proposal data */
- ikev2_payload_add_sa (chain, proposals);
+ ikev2_payload_add_sa (chain, proposals, 0);
ikev2_sa_free_proposal_vector (&proposals);
sa.is_initiator = 1;
@@ -4373,6 +4752,7 @@ ikev2_initiate_sa_init (vlib_main_t * vm, u8 * name)
sa.childs[0].i_proposals[0].protocol_id = IKEV2_PROTOCOL_ESP;
RAND_bytes ((u8 *) & sa.childs[0].i_proposals[0].spi,
sizeof (sa.childs[0].i_proposals[0].spi));
+ sa.childs[0].i_proposals[0].spi &= 0xffffffff;
/* Add NAT detection notification messages (mandatory) */
u8 *nat_detection_sha1 = ikev2_compute_nat_sha1 (
@@ -4524,7 +4904,6 @@ ikev2_initiate_delete_child_sa (vlib_main_t * vm, u32 ispi)
ikev2_sa_t *sa;
if (fchild)
break;
- /* *INDENT-OFF* */
pool_foreach (sa, tkm->sas) {
fchild = ikev2_sa_get_child(sa, ispi, IKEV2_PROTOCOL_ESP, 1);
if (fchild)
@@ -4533,7 +4912,6 @@ ikev2_initiate_delete_child_sa (vlib_main_t * vm, u32 ispi)
break;
}
}
- /* *INDENT-ON* */
}
if (!fchild || !fsa)
@@ -4564,7 +4942,6 @@ ikev2_initiate_delete_ike_sa (vlib_main_t * vm, u64 ispi)
ikev2_sa_t *sa;
if (fsa)
break;
- /* *INDENT-OFF* */
pool_foreach (sa, tkm->sas) {
if (sa->ispi == ispi)
{
@@ -4573,7 +4950,6 @@ ikev2_initiate_delete_ike_sa (vlib_main_t * vm, u64 ispi)
break;
}
}
- /* *INDENT-ON* */
}
if (!fsa)
@@ -4615,10 +4991,12 @@ ikev2_rekey_child_sa_internal (vlib_main_t * vm, ikev2_sa_t * sa,
ikev2_rekey_t *rekey;
vec_reset_length (sa->rekey);
vec_add2 (sa->rekey, rekey, 1);
+ rekey->kex = 0;
ikev2_sa_proposal_t *proposals = vec_dup (csa->i_proposals);
/*need new ispi */
RAND_bytes ((u8 *) & proposals[0].spi, sizeof (proposals[0].spi));
+ proposals[0].spi &= 0xffffffff;
rekey->spi = proposals[0].spi;
rekey->ispi = csa->i_proposals->spi;
len = ikev2_generate_message (b0, sa, ike0, proposals, 0, 0);
@@ -4647,7 +5025,6 @@ ikev2_initiate_rekey_child_sa (vlib_main_t * vm, u32 ispi)
ikev2_sa_t *sa;
if (fchild)
break;
- /* *INDENT-OFF* */
pool_foreach (sa, tkm->sas) {
fchild = ikev2_sa_get_child(sa, ispi, IKEV2_PROTOCOL_ESP, 1);
if (fchild)
@@ -4656,7 +5033,6 @@ ikev2_initiate_rekey_child_sa (vlib_main_t * vm, u32 ispi)
break;
}
}
- /* *INDENT-ON* */
}
if (!fchild || !fsa)
@@ -4689,12 +5065,10 @@ ikev2_sa_del (ikev2_profile_t * p, u32 sw_if_index)
vec_foreach (tkm, km->per_thread_data)
{
- /* *INDENT-OFF* */
pool_foreach (sa, tkm->sas) {
if (ikev2_sa_sw_if_match (sa, sw_if_index))
vec_add1 (sa_vec, sa);
}
- /* *INDENT-ON* */
vec_foreach (sap, sa_vec)
{
@@ -4704,12 +5078,10 @@ ikev2_sa_del (ikev2_profile_t * p, u32 sw_if_index)
}
vec_free (sa_vec);
- /* *INDENT-OFF* */
pool_foreach (sa, km->sais) {
if (ikev2_sa_sw_if_match (sa, sw_if_index))
vec_add1 (ispi_vec, sa->ispi);
}
- /* *INDENT-ON* */
vec_foreach (ispi, ispi_vec)
{
@@ -4728,12 +5100,10 @@ ikev2_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
if (is_add)
return 0;
- /* *INDENT-OFF* */
pool_foreach (p, km->profiles) {
if (p->responder.sw_if_index == sw_if_index)
ikev2_sa_del (p, sw_if_index);
}
- /* *INDENT-ON* */
return 0;
}
@@ -4744,67 +5114,24 @@ clib_error_t *
ikev2_init (vlib_main_t * vm)
{
ikev2_main_t *km = &ikev2_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- int thread_id;
clib_memset (km, 0, sizeof (ikev2_main_t));
+
+ km->log_level = IKEV2_LOG_ERROR;
+ km->log_class = vlib_log_register_class ("ikev2", 0);
+
km->vnet_main = vnet_get_main ();
km->vlib_main = vm;
km->liveness_period = IKEV2_LIVENESS_PERIOD_CHECK;
km->liveness_max_retries = IKEV2_LIVENESS_RETRIES;
- ikev2_crypto_init (km);
-
- mhash_init_vec_string (&km->profile_index_by_name, sizeof (uword));
-
- vec_validate_aligned (km->per_thread_data, tm->n_vlib_mains - 1,
- CLIB_CACHE_LINE_BYTES);
- for (thread_id = 0; thread_id < tm->n_vlib_mains; thread_id++)
- {
- ikev2_main_per_thread_data_t *ptd =
- vec_elt_at_index (km->per_thread_data, thread_id);
- ptd->sa_by_rspi = hash_create (0, sizeof (uword));
-
-#if OPENSSL_VERSION_NUMBER >= 0x10100000L
- ptd->evp_ctx = EVP_CIPHER_CTX_new ();
- ptd->hmac_ctx = HMAC_CTX_new ();
-#else
- EVP_CIPHER_CTX_init (&ptd->_evp_ctx);
- ptd->evp_ctx = &ptd->_evp_ctx;
- HMAC_CTX_init (&(ptd->_hmac_ctx));
- ptd->hmac_ctx = &ptd->_hmac_ctx;
-#endif
- }
-
- km->sa_by_ispi = hash_create (0, sizeof (uword));
- km->sw_if_indices = hash_create (0, 0);
-
- udp_register_dst_port (vm, IKEV2_PORT, ikev2_node_ip4.index, 1);
- udp_register_dst_port (vm, IKEV2_PORT, ikev2_node_ip6.index, 0);
- udp_register_dst_port (vm, IKEV2_PORT_NATT, ikev2_node_ip4.index, 1);
- udp_register_dst_port (vm, IKEV2_PORT_NATT, ikev2_node_ip6.index, 0);
-
- vlib_punt_hdl_t punt_hdl = vlib_punt_client_register ("ikev2-ip4-natt");
- vlib_punt_register (punt_hdl, ipsec_punt_reason[IPSEC_PUNT_IP4_SPI_UDP_0],
- "ikev2-ip4-natt");
- ikev2_cli_reference ();
-
- km->dns_resolve_name =
- vlib_get_plugin_symbol ("dns_plugin.so", "dns_resolve_name");
- if (!km->dns_resolve_name)
- ikev2_log_error ("cannot load symbols from dns plugin");
-
- km->log_level = IKEV2_LOG_ERROR;
- km->log_class = vlib_log_register_class ("ikev2", 0);
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ikev2_init) = {
- .runs_after = VLIB_INITS ("ipsec_init", "ipsec_punt_init", "dns_init"),
+ .runs_after = VLIB_INITS ("ipsec_init", "ipsec_punt_init"),
};
-/* *INDENT-ON* */
static u8
ikev2_mngr_process_child_sa (ikev2_sa_t * sa, ikev2_child_sa_t * csa,
@@ -4875,14 +5202,12 @@ ikev2_mngr_process_child_sa (ikev2_sa_t * sa, ikev2_child_sa_t * csa,
ip_addr_bytes (&sa->iaddr));
}
- /* *INDENT-OFF* */
ipip_tunnel_key_t key = {
.src = local_ip,
.dst = remote_ip,
.transport = IPIP_TRANSPORT_IP4,
.fib_index = 0,
};
- /* *INDENT-ON* */
ipip = ipip_tunnel_db_find (&key);
@@ -4963,7 +5288,6 @@ ikev2_mngr_process_ipsec_sa (ipsec_sa_t * ipsec_sa)
ikev2_sa_t *sa;
if (fchild)
break;
- /* *INDENT-OFF* */
pool_foreach (sa, tkm->sas) {
fchild = ikev2_sa_get_child(sa, ipsec_sa->spi, IKEV2_PROTOCOL_ESP, 1);
if (fchild)
@@ -4972,7 +5296,6 @@ ikev2_mngr_process_ipsec_sa (ipsec_sa_t * ipsec_sa)
break;
}
}
- /* *INDENT-ON* */
}
vlib_get_combined_counter (&ipsec_sa_counters,
ipsec_sa->stat_index, &counts);
@@ -5070,7 +5393,6 @@ ikev2_process_pending_sa_init (vlib_main_t *vm, ikev2_main_t *km)
u64 ispi;
ikev2_sa_t *sa;
- /* *INDENT-OFF* */
hash_foreach (ispi, sai, km->sa_by_ispi,
({
sa = pool_elt_at_index (km->sais, sai);
@@ -5079,7 +5401,6 @@ ikev2_process_pending_sa_init (vlib_main_t *vm, ikev2_main_t *km)
ikev2_process_pending_sa_init_one (vm, km, sa);
}));
- /* *INDENT-ON* */
}
static void
@@ -5137,8 +5458,8 @@ ikev2_disable_dpd (void)
km->dpd_disabled = 1;
}
-static_always_inline int
-ikev2_mngr_process_responder_sas (ikev2_sa_t * sa)
+static int
+ikev2_mngr_process_responder_sas (ikev2_sa_t *sa)
{
ikev2_main_t *km = &ikev2_main;
vlib_main_t *vm = km->vlib_main;
@@ -5169,6 +5490,9 @@ ikev2_mngr_process_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
ikev2_child_sa_t *c;
u32 *sai;
+ /* lazy init will wake it up */
+ vlib_process_wait_for_event (vm);
+
while (1)
{
vlib_process_wait_for_event_or_clock (vm, 2);
@@ -5181,34 +5505,38 @@ ikev2_mngr_process_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
ikev2_sa_t *sa;
u32 *to_be_deleted = 0;
- /* *INDENT-OFF* */
pool_foreach (sa, tkm->sas) {
ikev2_child_sa_t *c;
u8 del_old_ids = 0;
- if (sa->state != IKEV2_STATE_AUTHENTICATED)
- continue;
+ if (sa->state == IKEV2_STATE_SA_INIT)
+ {
+ if (vec_len (sa->childs) > 0)
+ vec_add1 (to_be_deleted, sa - tkm->sas);
+ }
+ else if (sa->state != IKEV2_STATE_AUTHENTICATED)
+ continue;
- if (sa->old_remote_id_present && 0 > sa->old_id_expiration)
- {
- sa->old_remote_id_present = 0;
- del_old_ids = 1;
- }
- else
- sa->old_id_expiration -= 1;
+ if (sa->old_remote_id_present && 0 > sa->old_id_expiration)
+ {
+ sa->old_remote_id_present = 0;
+ del_old_ids = 1;
+ }
+ else
+ sa->old_id_expiration -= 1;
- vec_foreach (c, sa->childs)
- ikev2_mngr_process_child_sa(sa, c, del_old_ids);
+ vec_foreach (c, sa->childs)
+ ikev2_mngr_process_child_sa (sa, c, del_old_ids);
- if (!km->dpd_disabled && ikev2_mngr_process_responder_sas (sa))
- vec_add1 (to_be_deleted, sa - tkm->sas);
- }
- /* *INDENT-ON* */
+ if (!km->dpd_disabled && ikev2_mngr_process_responder_sas (sa))
+ vec_add1 (to_be_deleted, sa - tkm->sas);
+ }
vec_foreach (sai, to_be_deleted)
{
sa = pool_elt_at_index (tkm->sas, sai[0]);
- u8 reinitiate = (sa->is_initiator && sa->profile_index != ~0);
+ const u32 profile_index = sa->profile_index;
+ const int reinitiate = (sa->is_initiator && profile_index != ~0);
vec_foreach (c, sa->childs)
{
ikev2_delete_tunnel_interface (km->vnet_main, sa, c);
@@ -5220,7 +5548,7 @@ ikev2_mngr_process_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
if (reinitiate)
{
- p = pool_elt_at_index (km->profiles, sa->profile_index);
+ p = pool_elt_at_index (km->profiles, profile_index);
if (p)
{
clib_error_t *e = ikev2_initiate_sa_init (vm, p->name);
@@ -5237,19 +5565,16 @@ ikev2_mngr_process_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
/* process ipsec sas */
ipsec_sa_t *sa;
- /* *INDENT-OFF* */
pool_foreach (sa, ipsec_sa_pool)
{
ikev2_mngr_process_ipsec_sa (sa);
}
- /* *INDENT-ON* */
ikev2_process_pending_sa_init (vm, km);
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ikev2_mngr_process_node, static) = {
.function = ikev2_mngr_process_fn,
.type = VLIB_NODE_TYPE_PROCESS,
@@ -5257,11 +5582,60 @@ VLIB_REGISTER_NODE (ikev2_mngr_process_node, static) = {
"ikev2-manager-process",
};
+static void
+ikev2_lazy_init (ikev2_main_t *km)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ int thread_id;
+
+ if (km->lazy_init_done)
+ return;
+
+ ikev2_crypto_init (km);
+
+ mhash_init_vec_string (&km->profile_index_by_name, sizeof (uword));
+
+ vec_validate_aligned (km->per_thread_data, tm->n_vlib_mains - 1,
+ CLIB_CACHE_LINE_BYTES);
+ for (thread_id = 0; thread_id < tm->n_vlib_mains; thread_id++)
+ {
+ ikev2_main_per_thread_data_t *ptd =
+ vec_elt_at_index (km->per_thread_data, thread_id);
+
+ ptd->sa_by_rspi = hash_create (0, sizeof (uword));
+
+#if OPENSSL_VERSION_NUMBER >= 0x10100000L
+ ptd->evp_ctx = EVP_CIPHER_CTX_new ();
+ ptd->hmac_ctx = HMAC_CTX_new ();
+#else
+ EVP_CIPHER_CTX_init (&ptd->_evp_ctx);
+ ptd->evp_ctx = &ptd->_evp_ctx;
+ HMAC_CTX_init (&(ptd->_hmac_ctx));
+ ptd->hmac_ctx = &ptd->_hmac_ctx;
+#endif
+ }
+
+ km->sa_by_ispi = hash_create (0, sizeof (uword));
+ km->sw_if_indices = hash_create (0, 0);
+
+ km->punt_hdl = vlib_punt_client_register ("ikev2");
+
+ km->dns_resolve_name_ptr =
+ vlib_get_plugin_symbol ("dns_plugin.so", "dns_resolve_name");
+ if (!km->dns_resolve_name_ptr)
+ ikev2_log_error ("cannot load symbols from dns plugin");
+
+ /* wake up ikev2 process */
+ vlib_process_signal_event (vlib_get_first_main (),
+ ikev2_mngr_process_node.index, 0, 0);
+
+ km->lazy_init_done = 1;
+}
+
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Internet Key Exchange (IKEv2) Protocol",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ikev2/ikev2.h b/src/plugins/ikev2/ikev2.h
index 308ffe52ba4..9ed0ecc494c 100644
--- a/src/plugins/ikev2/ikev2.h
+++ b/src/plugins/ikev2/ikev2.h
@@ -32,7 +32,6 @@
typedef u8 v8;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u64 ispi;
u64 rspi;
@@ -42,14 +41,12 @@ typedef CLIB_PACKED (struct {
u8 flags;
u32 msgid; u32 length; u8 payload[0];
}) ike_header_t;
-/* *INDENT-ON* */
#define ike_hdr_is_response(_h) ((_h)->flags & IKEV2_HDR_FLAG_RESPONSE)
#define ike_hdr_is_request(_h) (!ike_hdr_is_response(_h))
#define ike_hdr_is_initiator(_h) ((_h)->flags & IKEV2_HDR_FLAG_INITIATOR)
#define ike_hdr_is_responder(_h) (!(ike_hdr_is_initiator(_h)))
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 nextpayload;
u8 flags;
@@ -58,17 +55,13 @@ typedef CLIB_PACKED (struct {
u8 reserved[2];
u8 payload[0];
}) ike_ke_payload_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 nextpayload;
u8 flags;
u16 length; u8 payload[0];
}) ike_payload_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 nextpayload;
u8 flags;
@@ -77,9 +70,7 @@ typedef CLIB_PACKED (struct {
u8 reserved[3];
u8 payload[0];
}) ike_auth_payload_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 nextpayload;
u8 flags;
@@ -87,7 +78,6 @@ typedef CLIB_PACKED (struct {
u8 id_type;
u8 reserved[3]; u8 payload[0];
}) ike_id_payload_header_t;
-/* *INDENT-ON* */
#define IKE_VERSION_2 0x20
@@ -451,7 +441,6 @@ uword unformat_ikev2_transform_dh_type (unformat_input_t * input,
va_list * args);
uword unformat_ikev2_transform_esn_type (unformat_input_t * input,
va_list * args);
-void ikev2_cli_reference (void);
clib_error_t *ikev2_set_liveness_params (u32 period, u32 max_retries);
diff --git a/src/plugins/ikev2/ikev2_api.c b/src/plugins/ikev2/ikev2_api.c
index d104e54579a..a3e71668126 100644
--- a/src/plugins/ikev2/ikev2_api.c
+++ b/src/plugins/ikev2/ikev2_api.c
@@ -188,12 +188,10 @@ vl_api_ikev2_profile_dump_t_handler (vl_api_ikev2_profile_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (profile, im->profiles)
{
send_profile (profile, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void
@@ -207,6 +205,32 @@ ikev2_copy_stats (vl_api_ikev2_sa_stats_t *dst, const ikev2_stats_t *src)
dst->n_sa_auth_req = src->n_sa_auth_req;
}
+static vl_api_ikev2_state_t
+ikev2_state_encode (ikev2_state_t state)
+{
+ switch (state)
+ {
+ case IKEV2_STATE_UNKNOWN:
+ return UNKNOWN;
+ case IKEV2_STATE_SA_INIT:
+ return SA_INIT;
+ case IKEV2_STATE_DELETED:
+ return DELETED;
+ case IKEV2_STATE_AUTH_FAILED:
+ return AUTH_FAILED;
+ case IKEV2_STATE_AUTHENTICATED:
+ return AUTHENTICATED;
+ case IKEV2_STATE_NOTIFY_AND_DELETE:
+ return NOTIFY_AND_DELETE;
+ case IKEV2_STATE_TS_UNACCEPTABLE:
+ return TS_UNACCEPTABLE;
+ case IKEV2_STATE_NO_PROPOSAL_CHOSEN:
+ return NO_PROPOSAL_CHOSEN;
+ }
+
+ return UNKNOWN;
+}
+
static void
send_sa (ikev2_sa_t * sa, vl_api_ikev2_sa_dump_t * mp, u32 api_sa_index)
{
@@ -214,7 +238,6 @@ send_sa (ikev2_sa_t * sa, vl_api_ikev2_sa_dump_t * mp, u32 api_sa_index)
int rv = 0;
ikev2_sa_transform_t *tr;
- /* *INDENT-OFF* */
REPLY_MACRO2_ZERO (VL_API_IKEV2_SA_DETAILS,
{
vl_api_ikev2_sa_t *rsa = &rmp->sa;
@@ -270,7 +293,6 @@ send_sa (ikev2_sa_t * sa, vl_api_ikev2_sa_dump_t * mp, u32 api_sa_index)
vl_api_ikev2_sa_t_endian(rsa);
});
- /* *INDENT-ON* */
}
static void
@@ -282,17 +304,199 @@ vl_api_ikev2_sa_dump_t_handler (vl_api_ikev2_sa_dump_t * mp)
vec_foreach (tkm, km->per_thread_data)
{
- /* *INDENT-OFF* */
pool_foreach (sa, tkm->sas)
{
u32 api_sa_index = ikev2_encode_sa_index (sa - tkm->sas,
tkm - km->per_thread_data);
send_sa (sa, mp, api_sa_index);
}
- /* *INDENT-ON* */
}
}
+static void
+send_sa_v2 (ikev2_sa_t *sa, vl_api_ikev2_sa_v2_dump_t *mp, u32 api_sa_index)
+{
+ ikev2_main_t *km = &ikev2_main;
+ vl_api_ikev2_sa_v2_details_t *rmp = 0;
+ int rv = 0;
+ ikev2_sa_transform_t *tr;
+ ikev2_profile_t *p;
+ p = pool_elt_at_index (km->profiles, sa->profile_index);
+
+ REPLY_MACRO2_ZERO (VL_API_IKEV2_SA_V2_DETAILS, {
+ vl_api_ikev2_sa_v2_t *rsa = &rmp->sa;
+ vl_api_ikev2_keys_t *k = &rsa->keys;
+
+ int size_data = sizeof (rsa->profile_name) - 1;
+ if (vec_len (p->name) < size_data)
+ size_data = vec_len (p->name);
+ clib_memcpy (rsa->profile_name, p->name, size_data);
+
+ rsa->state = ikev2_state_encode (sa->state);
+
+ rsa->sa_index = api_sa_index;
+ ip_address_encode2 (&sa->iaddr, &rsa->iaddr);
+ ip_address_encode2 (&sa->raddr, &rsa->raddr);
+ rsa->ispi = sa->ispi;
+ rsa->rspi = sa->rspi;
+ cp_id (&rsa->i_id, &sa->i_id);
+ cp_id (&rsa->r_id, &sa->r_id);
+
+ tr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR);
+ if (tr)
+ cp_sa_transform (&rsa->encryption, tr);
+
+ tr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF);
+ if (tr)
+ cp_sa_transform (&rsa->prf, tr);
+
+ tr =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG);
+ if (tr)
+ cp_sa_transform (&rsa->integrity, tr);
+
+ tr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_DH);
+ if (tr)
+ cp_sa_transform (&rsa->dh, tr);
+
+ k->sk_d_len = vec_len (sa->sk_d);
+ clib_memcpy (&k->sk_d, sa->sk_d, k->sk_d_len);
+
+ k->sk_ai_len = vec_len (sa->sk_ai);
+ clib_memcpy (&k->sk_ai, sa->sk_ai, k->sk_ai_len);
+
+ k->sk_ar_len = vec_len (sa->sk_ar);
+ clib_memcpy (&k->sk_ar, sa->sk_ar, k->sk_ar_len);
+
+ k->sk_ei_len = vec_len (sa->sk_ei);
+ clib_memcpy (&k->sk_ei, sa->sk_ei, k->sk_ei_len);
+
+ k->sk_er_len = vec_len (sa->sk_er);
+ clib_memcpy (&k->sk_er, sa->sk_er, k->sk_er_len);
+
+ k->sk_pi_len = vec_len (sa->sk_pi);
+ clib_memcpy (&k->sk_pi, sa->sk_pi, k->sk_pi_len);
+
+ k->sk_pr_len = vec_len (sa->sk_pr);
+ clib_memcpy (&k->sk_pr, sa->sk_pr, k->sk_pr_len);
+
+ ikev2_copy_stats (&rsa->stats, &sa->stats);
+
+ vl_api_ikev2_sa_v2_t_endian (rsa);
+ });
+}
+
+static void
+vl_api_ikev2_sa_v2_dump_t_handler (vl_api_ikev2_sa_v2_dump_t *mp)
+{
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_main_per_thread_data_t *tkm;
+ ikev2_sa_t *sa;
+
+ vec_foreach (tkm, km->per_thread_data)
+ {
+ pool_foreach (sa, tkm->sas)
+ {
+ u32 api_sa_index =
+ ikev2_encode_sa_index (sa - tkm->sas, tkm - km->per_thread_data);
+ send_sa_v2 (sa, mp, api_sa_index);
+ }
+ }
+}
+
+static void
+send_sa_v3 (ikev2_sa_t *sa, vl_api_ikev2_sa_v3_dump_t *mp, u32 api_sa_index)
+{
+ ikev2_main_t *km = &ikev2_main;
+ vl_api_ikev2_sa_v3_details_t *rmp = 0;
+ int rv = 0;
+ ikev2_sa_transform_t *tr;
+ ikev2_profile_t *p;
+ p = pool_elt_at_index (km->profiles, sa->profile_index);
+ vlib_main_t *vm = vlib_get_main ();
+
+ REPLY_MACRO2_ZERO (VL_API_IKEV2_SA_V3_DETAILS, {
+ vl_api_ikev2_sa_v3_t *rsa = &rmp->sa;
+ vl_api_ikev2_keys_t *k = &rsa->keys;
+
+ int size_data = sizeof (rsa->profile_name) - 1;
+ if (vec_len (p->name) < size_data)
+ size_data = vec_len (p->name);
+ clib_memcpy (rsa->profile_name, p->name, size_data);
+
+ rsa->state = ikev2_state_encode (sa->state);
+
+ rsa->uptime = vlib_time_now (vm) - sa->auth_timestamp;
+
+ rsa->sa_index = api_sa_index;
+ ip_address_encode2 (&sa->iaddr, &rsa->iaddr);
+ ip_address_encode2 (&sa->raddr, &rsa->raddr);
+ rsa->ispi = sa->ispi;
+ rsa->rspi = sa->rspi;
+ cp_id (&rsa->i_id, &sa->i_id);
+ cp_id (&rsa->r_id, &sa->r_id);
+
+ tr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR);
+ if (tr)
+ cp_sa_transform (&rsa->encryption, tr);
+
+ tr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_PRF);
+ if (tr)
+ cp_sa_transform (&rsa->prf, tr);
+
+ tr =
+ ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_INTEG);
+ if (tr)
+ cp_sa_transform (&rsa->integrity, tr);
+
+ tr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_DH);
+ if (tr)
+ cp_sa_transform (&rsa->dh, tr);
+
+ k->sk_d_len = vec_len (sa->sk_d);
+ clib_memcpy (&k->sk_d, sa->sk_d, k->sk_d_len);
+
+ k->sk_ai_len = vec_len (sa->sk_ai);
+ clib_memcpy (&k->sk_ai, sa->sk_ai, k->sk_ai_len);
+
+ k->sk_ar_len = vec_len (sa->sk_ar);
+ clib_memcpy (&k->sk_ar, sa->sk_ar, k->sk_ar_len);
+
+ k->sk_ei_len = vec_len (sa->sk_ei);
+ clib_memcpy (&k->sk_ei, sa->sk_ei, k->sk_ei_len);
+
+ k->sk_er_len = vec_len (sa->sk_er);
+ clib_memcpy (&k->sk_er, sa->sk_er, k->sk_er_len);
+
+ k->sk_pi_len = vec_len (sa->sk_pi);
+ clib_memcpy (&k->sk_pi, sa->sk_pi, k->sk_pi_len);
+
+ k->sk_pr_len = vec_len (sa->sk_pr);
+ clib_memcpy (&k->sk_pr, sa->sk_pr, k->sk_pr_len);
+
+ ikev2_copy_stats (&rsa->stats, &sa->stats);
+
+ vl_api_ikev2_sa_v3_t_endian (rsa);
+ });
+}
+
+static void
+vl_api_ikev2_sa_v3_dump_t_handler (vl_api_ikev2_sa_v3_dump_t *mp)
+{
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_main_per_thread_data_t *tkm;
+ ikev2_sa_t *sa;
+
+ vec_foreach (tkm, km->per_thread_data)
+ {
+ pool_foreach (sa, tkm->sas)
+ {
+ u32 api_sa_index =
+ ikev2_encode_sa_index (sa - tkm->sas, tkm - km->per_thread_data);
+ send_sa_v3 (sa, mp, api_sa_index);
+ }
+ }
+}
static void
send_child_sa (ikev2_child_sa_t * child,
@@ -303,7 +507,6 @@ send_child_sa (ikev2_child_sa_t * child,
int rv = 0;
ikev2_sa_transform_t *tr;
- /* *INDENT-OFF* */
REPLY_MACRO2_ZERO (VL_API_IKEV2_CHILD_SA_DETAILS,
{
vl_api_ikev2_keys_t *k = &rmp->child_sa.keys;
@@ -348,7 +551,6 @@ send_child_sa (ikev2_child_sa_t * child,
vl_api_ikev2_child_sa_t_endian (&rmp->child_sa);
});
- /* *INDENT-ON* */
}
static void
@@ -380,6 +582,85 @@ vl_api_ikev2_child_sa_dump_t_handler (vl_api_ikev2_child_sa_dump_t * mp)
}
static void
+send_child_sa_v2 (ikev2_child_sa_t *child, vl_api_ikev2_child_sa_v2_dump_t *mp,
+ u32 child_sa_index, u32 sa_index)
+{
+ vl_api_ikev2_child_sa_v2_details_t *rmp = 0;
+ int rv = 0;
+ ikev2_sa_transform_t *tr;
+ vlib_main_t *vm = vlib_get_main ();
+
+ REPLY_MACRO2_ZERO (VL_API_IKEV2_CHILD_SA_V2_DETAILS, {
+ vl_api_ikev2_keys_t *k = &rmp->child_sa.keys;
+ rmp->child_sa.child_sa_index = child_sa_index;
+ rmp->child_sa.uptime = vlib_time_now (vm) - child->timestamp;
+ rmp->child_sa.sa_index = sa_index;
+ rmp->child_sa.i_spi = child->i_proposals ? child->i_proposals[0].spi : 0;
+ rmp->child_sa.r_spi = child->r_proposals ? child->r_proposals[0].spi : 0;
+
+ tr =
+ ikev2_sa_get_td_for_type (child->r_proposals, IKEV2_TRANSFORM_TYPE_ENCR);
+ if (tr)
+ cp_sa_transform (&rmp->child_sa.encryption, tr);
+
+ tr = ikev2_sa_get_td_for_type (child->r_proposals,
+ IKEV2_TRANSFORM_TYPE_INTEG);
+ if (tr)
+ cp_sa_transform (&rmp->child_sa.integrity, tr);
+
+ tr =
+ ikev2_sa_get_td_for_type (child->r_proposals, IKEV2_TRANSFORM_TYPE_ESN);
+ if (tr)
+ cp_sa_transform (&rmp->child_sa.esn, tr);
+
+ k->sk_ei_len = vec_len (child->sk_ei);
+ clib_memcpy (&k->sk_ei, child->sk_ei, k->sk_ei_len);
+
+ k->sk_er_len = vec_len (child->sk_er);
+ clib_memcpy (&k->sk_er, child->sk_er, k->sk_er_len);
+
+ if (vec_len (child->sk_ai))
+ {
+ k->sk_ai_len = vec_len (child->sk_ai);
+ clib_memcpy (&k->sk_ai, child->sk_ai, k->sk_ai_len);
+
+ k->sk_ar_len = vec_len (child->sk_ar);
+ clib_memcpy (&k->sk_ar, child->sk_ar, k->sk_ar_len);
+ }
+
+ vl_api_ikev2_child_sa_v2_t_endian (&rmp->child_sa);
+ });
+}
+
+static void
+vl_api_ikev2_child_sa_v2_dump_t_handler (vl_api_ikev2_child_sa_v2_dump_t *mp)
+{
+ ikev2_main_t *im = &ikev2_main;
+ ikev2_main_per_thread_data_t *tkm;
+ ikev2_sa_t *sa;
+ ikev2_child_sa_t *child;
+ u32 sai = ~0, ti = ~0;
+
+ ikev2_decode_sa_index (clib_net_to_host_u32 (mp->sa_index), &sai, &ti);
+
+ if (vec_len (im->per_thread_data) <= ti)
+ return;
+
+ tkm = vec_elt_at_index (im->per_thread_data, ti);
+
+ if (pool_len (tkm->sas) <= sai || pool_is_free_index (tkm->sas, sai))
+ return;
+
+ sa = pool_elt_at_index (tkm->sas, sai);
+
+ vec_foreach (child, sa->childs)
+ {
+ u32 child_sa_index = child - sa->childs;
+ send_child_sa_v2 (child, mp, child_sa_index, sai);
+ }
+}
+
+static void
vl_api_ikev2_traffic_selector_dump_t_handler
(vl_api_ikev2_traffic_selector_dump_t * mp)
{
@@ -414,7 +695,6 @@ static void
vl_api_ikev2_traffic_selector_details_t *rmp = 0;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2_ZERO (VL_API_IKEV2_TRAFFIC_SELECTOR_DETAILS,
{
rmp->ts.sa_index = api_sa_index;
@@ -422,7 +702,6 @@ static void
cp_ts (&rmp->ts, ts, mp->is_initiator);
vl_api_ikev2_ts_t_endian (&rmp->ts);
});
- /* *INDENT-ON* */
}
}
@@ -451,13 +730,11 @@ vl_api_ikev2_nonce_get_t_handler (vl_api_ikev2_nonce_get_t * mp)
int data_len = vec_len (nonce);
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO3_ZERO (VL_API_IKEV2_NONCE_GET_REPLY, data_len,
{
rmp->data_len = clib_host_to_net_u32 (data_len);
clib_memcpy (rmp->nonce, nonce, data_len);
});
- /* *INDENT-ON* */
}
static void
@@ -490,8 +767,6 @@ static void
{
vl_api_ikev2_profile_set_liveness_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
clib_error_t *error;
error = ikev2_set_liveness_params (clib_net_to_host_u32 (mp->period),
clib_net_to_host_u32 (mp->max_retries));
@@ -501,10 +776,6 @@ static void
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_PROFILE_SET_LIVENESS_REPLY);
}
@@ -513,8 +784,6 @@ vl_api_ikev2_profile_add_del_t_handler (vl_api_ikev2_profile_add_del_t * mp)
{
vl_api_ikev2_profile_add_del_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
u8 *tmp = format (0, "%s", mp->name);
@@ -526,10 +795,6 @@ vl_api_ikev2_profile_add_del_t_handler (vl_api_ikev2_profile_add_del_t * mp)
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_PROFILE_ADD_DEL_REPLY);
}
@@ -539,8 +804,6 @@ static void
{
vl_api_ikev2_profile_set_auth_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
int data_len = ntohl (mp->data_len);
@@ -562,10 +825,6 @@ static void
}
else
rv = VNET_API_ERROR_INVALID_VALUE;
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_PROFILE_SET_AUTH_REPLY);
}
@@ -574,8 +833,6 @@ vl_api_ikev2_profile_set_id_t_handler (vl_api_ikev2_profile_set_id_t * mp)
{
vl_api_ikev2_profile_set_id_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
u8 *tmp = format (0, "%s", mp->name);
@@ -596,9 +853,6 @@ vl_api_ikev2_profile_set_id_t_handler (vl_api_ikev2_profile_set_id_t * mp)
}
else
rv = VNET_API_ERROR_INVALID_VALUE;
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
REPLY_MACRO (VL_API_IKEV2_PROFILE_SET_ID_REPLY);
}
@@ -609,8 +863,6 @@ static void
{
vl_api_ikev2_profile_set_udp_encap_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
u8 *tmp = format (0, "%s", mp->name);
@@ -622,10 +874,6 @@ static void
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_PROFILE_SET_UDP_ENCAP_REPLY);
}
@@ -634,8 +882,6 @@ vl_api_ikev2_profile_set_ts_t_handler (vl_api_ikev2_profile_set_ts_t * mp)
{
vl_api_ikev2_profile_set_ts_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
u8 *tmp = format (0, "%s", mp->name);
@@ -654,10 +900,6 @@ vl_api_ikev2_profile_set_ts_t_handler (vl_api_ikev2_profile_set_ts_t * mp)
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_PROFILE_SET_TS_REPLY);
}
@@ -666,8 +908,6 @@ vl_api_ikev2_set_local_key_t_handler (vl_api_ikev2_set_local_key_t * mp)
{
vl_api_ikev2_set_local_key_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -678,10 +918,6 @@ vl_api_ikev2_set_local_key_t_handler (vl_api_ikev2_set_local_key_t * mp)
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_SET_LOCAL_KEY_REPLY);
}
@@ -691,8 +927,6 @@ vl_api_ikev2_set_responder_hostname_t_handler (
{
vl_api_ikev2_set_responder_hostname_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -710,10 +944,6 @@ vl_api_ikev2_set_responder_hostname_t_handler (
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_SET_RESPONDER_HOSTNAME_REPLY);
}
@@ -722,8 +952,6 @@ vl_api_ikev2_set_responder_t_handler (vl_api_ikev2_set_responder_t * mp)
{
vl_api_ikev2_set_responder_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -740,10 +968,6 @@ vl_api_ikev2_set_responder_t_handler (vl_api_ikev2_set_responder_t * mp)
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_SET_RESPONDER_REPLY);
}
@@ -753,8 +977,6 @@ vl_api_ikev2_set_ike_transforms_t_handler (vl_api_ikev2_set_ike_transforms_t *
{
vl_api_ikev2_set_ike_transforms_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -772,10 +994,6 @@ vl_api_ikev2_set_ike_transforms_t_handler (vl_api_ikev2_set_ike_transforms_t *
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_SET_IKE_TRANSFORMS_REPLY);
}
@@ -785,8 +1003,6 @@ vl_api_ikev2_set_esp_transforms_t_handler (vl_api_ikev2_set_esp_transforms_t *
{
vl_api_ikev2_set_esp_transforms_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -803,10 +1019,6 @@ vl_api_ikev2_set_esp_transforms_t_handler (vl_api_ikev2_set_esp_transforms_t *
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_SET_ESP_TRANSFORMS_REPLY);
}
@@ -815,8 +1027,6 @@ vl_api_ikev2_set_sa_lifetime_t_handler (vl_api_ikev2_set_sa_lifetime_t * mp)
{
vl_api_ikev2_set_sa_lifetime_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -836,10 +1046,6 @@ vl_api_ikev2_set_sa_lifetime_t_handler (vl_api_ikev2_set_sa_lifetime_t * mp)
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_SET_SA_LIFETIME_REPLY);
}
@@ -849,8 +1055,6 @@ static void
{
vl_api_ikev2_profile_set_ipsec_udp_port_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
u8 *tmp = format (0, "%s", mp->name);
@@ -860,10 +1064,6 @@ static void
clib_net_to_host_u16 (mp->port),
mp->is_set);
vec_free (tmp);
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_PROFILE_SET_IPSEC_UDP_PORT_REPLY);
}
@@ -876,7 +1076,6 @@ static void
VALIDATE_SW_IF_INDEX (mp);
-#if WITH_LIBSSL > 0
u8 *tmp = format (0, "%s", mp->name);
clib_error_t *error;
@@ -890,10 +1089,6 @@ static void
rv = VNET_API_ERROR_UNSPECIFIED;
}
vec_free (tmp);
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_IKEV2_SET_TUNNEL_INTERFACE_REPLY);
}
@@ -903,8 +1098,6 @@ vl_api_ikev2_initiate_sa_init_t_handler (vl_api_ikev2_initiate_sa_init_t * mp)
{
vl_api_ikev2_initiate_sa_init_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -918,10 +1111,6 @@ vl_api_ikev2_initiate_sa_init_t_handler (vl_api_ikev2_initiate_sa_init_t * mp)
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_INITIATE_SA_INIT_REPLY);
}
@@ -931,8 +1120,6 @@ vl_api_ikev2_initiate_del_ike_sa_t_handler (vl_api_ikev2_initiate_del_ike_sa_t
{
vl_api_ikev2_initiate_del_ike_sa_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -943,10 +1130,6 @@ vl_api_ikev2_initiate_del_ike_sa_t_handler (vl_api_ikev2_initiate_del_ike_sa_t
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_INITIATE_DEL_IKE_SA_REPLY);
}
@@ -956,8 +1139,6 @@ static void
{
vl_api_ikev2_initiate_del_child_sa_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -968,10 +1149,6 @@ static void
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_INITIATE_DEL_CHILD_SA_REPLY);
}
@@ -981,8 +1158,6 @@ static void
{
vl_api_ikev2_profile_disable_natt_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
clib_error_t *error;
u8 *tmp = format (0, "%s", mp->name);
@@ -994,10 +1169,6 @@ static void
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_PROFILE_DISABLE_NATT_REPLY);
}
@@ -1007,8 +1178,6 @@ static void
{
vl_api_ikev2_initiate_rekey_child_sa_reply_t *rmp;
int rv = 0;
-
-#if WITH_LIBSSL > 0
vlib_main_t *vm = vlib_get_main ();
clib_error_t *error;
@@ -1019,10 +1188,6 @@ static void
clib_error_free (error);
rv = VNET_API_ERROR_UNSPECIFIED;
}
-#else
- rv = VNET_API_ERROR_UNIMPLEMENTED;
-#endif
-
REPLY_MACRO (VL_API_IKEV2_INITIATE_REKEY_CHILD_SA_REPLY);
}
diff --git a/src/plugins/ikev2/ikev2_cli.c b/src/plugins/ikev2/ikev2_cli.c
index 3523ce079b6..975774c48d5 100644
--- a/src/plugins/ikev2/ikev2_cli.c
+++ b/src/plugins/ikev2/ikev2_cli.c
@@ -74,12 +74,16 @@ format_ikev2_child_sa (u8 * s, va_list * va)
ikev2_ts_t *ts;
ikev2_sa_transform_t *tr;
u8 *c = 0;
+ vlib_main_t *vm = vlib_get_main ();
u32 indent = format_get_indent (s);
indent += 1;
s = format (s, "child sa %u:", index);
+ s = format (s, "\n uptime: %f (s)\n ",
+ vlib_time_now (vm) - child->timestamp);
+
tr = ikev2_sa_get_td_for_type (child->r_proposals,
IKEV2_TRANSFORM_TYPE_ENCR);
c = format (c, "%U ", format_ikev2_sa_transform, tr);
@@ -121,6 +125,12 @@ format_ikev2_child_sa (u8 * s, va_list * va)
return s;
}
+static char *stateNames[] = {
+#define _(v, f, s) s,
+ foreach_ikev2_state
+#undef _
+};
+
static u8 *
format_ikev2_sa (u8 * s, va_list * va)
{
@@ -129,6 +139,12 @@ format_ikev2_sa (u8 * s, va_list * va)
ikev2_sa_transform_t *tr;
ikev2_child_sa_t *child;
u32 indent = 1;
+ vlib_main_t *vm = vlib_get_main ();
+
+ ikev2_main_t *km = &ikev2_main;
+ ikev2_profile_t *p;
+
+ p = pool_elt_at_index (km->profiles, sa->profile_index);
s = format (s, "iip %U ispi %lx rip %U rspi %lx",
format_ip_address, &sa->iaddr, sa->ispi,
@@ -150,6 +166,16 @@ format_ikev2_sa (u8 * s, va_list * va)
tr = ikev2_sa_get_td_for_type (sa->r_proposals, IKEV2_TRANSFORM_TYPE_DH);
s = format (s, "%U", format_ikev2_sa_transform, tr);
+ s = format (s, "\n profile: %v", p->name);
+
+ if (sa->state <= IKEV2_STATE_NO_PROPOSAL_CHOSEN)
+ {
+ s = format (s, "\n state: %s", stateNames[sa->state]);
+ }
+
+ s =
+ format (s, "\n uptime: %f (s)\n", vlib_time_now (vm) - sa->auth_timestamp);
+
s = format (s, "\n%U", format_white_space, indent);
s = format (s, "nonce i:%U\n%Ur:%U\n",
@@ -232,7 +258,6 @@ show_ikev2_sa_command_fn (vlib_main_t * vm,
vec_foreach (tkm, km->per_thread_data)
{
- /* *INDENT-OFF* */
pool_foreach (sa, tkm->sas) {
if (show_one)
{
@@ -245,7 +270,6 @@ show_ikev2_sa_command_fn (vlib_main_t * vm,
else
s = format (s, "%U\n", format_ikev2_sa, sa, details);
}
- /* *INDENT-ON* */
}
vlib_cli_output (vm, "%v", s);
@@ -253,13 +277,11 @@ show_ikev2_sa_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ikev2_sa_command, static) = {
.path = "show ikev2 sa",
.short_help = "show ikev2 sa [rspi <rspi>] [details]",
.function = show_ikev2_sa_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ikev2_disable_dpd_command_fn (vlib_main_t * vm,
@@ -270,13 +292,11 @@ ikev2_disable_dpd_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ikev2_cli_disable_dpd_command, static) = {
.path = "ikev2 dpd disable",
.short_help = "ikev2 dpd disable",
.function = ikev2_disable_dpd_command_fn,
};
-/* *INDENT-ON* */
static uword
unformat_ikev2_token (unformat_input_t * input, va_list * va)
@@ -553,7 +573,6 @@ done:
return r;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ikev2_profile_add_del_command, static) = {
.path = "ikev2 profile",
.short_help =
@@ -574,7 +593,6 @@ VLIB_CLI_COMMAND (ikev2_profile_add_del_command, static) = {
"ikev2 profile set <id> disable natt\n",
.function = ikev2_profile_add_del_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_ikev2_profile_command_fn (vlib_main_t * vm,
@@ -584,7 +602,6 @@ show_ikev2_profile_command_fn (vlib_main_t * vm,
ikev2_main_t *km = &ikev2_main;
ikev2_profile_t *p;
- /* *INDENT-OFF* */
pool_foreach (p, km->profiles) {
vlib_cli_output(vm, "profile %v", p->name);
@@ -651,18 +668,15 @@ show_ikev2_profile_command_fn (vlib_main_t * vm,
vlib_cli_output(vm, " lifetime %d jitter %d handover %d maxdata %d",
p->lifetime, p->lifetime_jitter, p->handover, p->lifetime_maxdata);
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ikev2_profile_command, static) = {
.path = "show ikev2 profile",
.short_help = "show ikev2 profile",
.function = show_ikev2_profile_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
set_ikev2_liveness_period_fn (vlib_main_t * vm,
@@ -695,13 +709,11 @@ done:
return r;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ikev2_liveness_command, static) = {
.path = "ikev2 set liveness",
.short_help = "ikev2 set liveness <period> <max-retires>",
.function = set_ikev2_liveness_period_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
set_ikev2_local_key_command_fn (vlib_main_t * vm,
@@ -735,14 +747,12 @@ done:
return r;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ikev2_local_key_command, static) = {
.path = "set ikev2 local key",
.short_help =
"set ikev2 local key <file>",
.function = set_ikev2_local_key_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -793,7 +803,6 @@ done:
return r;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ikev2_initiate_command, static) = {
.path = "ikev2 initiate",
.short_help =
@@ -803,12 +812,6 @@ VLIB_CLI_COMMAND (ikev2_initiate_command, static) = {
"ikev2 initiate rekey-child-sa <child sa ispi>\n",
.function = ikev2_initiate_command_fn,
};
-/* *INDENT-ON* */
-
-void
-ikev2_cli_reference (void)
-{
-}
static clib_error_t *
ikev2_set_log_level_command_fn (vlib_main_t * vm,
@@ -838,13 +841,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ikev2_set_log_level_command, static) = {
.path = "ikev2 set logging level",
.function = ikev2_set_log_level_command_fn,
.short_help = "ikev2 set logging level <0-5>",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ikev2/ikev2_crypto.c b/src/plugins/ikev2/ikev2_crypto.c
index a9ab1bc8067..3d4ad0a28ed 100644
--- a/src/plugins/ikev2/ikev2_crypto.c
+++ b/src/plugins/ikev2/ikev2_crypto.c
@@ -488,7 +488,7 @@ BN_bn2binpad (const BIGNUM * a, unsigned char *to, int tolen)
{
vec_insert (to, pad, 0);
clib_memset (to, 0, pad);
- _vec_len (to) -= pad;
+ vec_dec_len (to, pad);
}
return tolen;
}
@@ -553,7 +553,7 @@ ikev2_generate_dh (ikev2_sa_t * sa, ikev2_sa_transform_t * t)
{
vec_insert (sa->dh_shared_key, pad, 0);
clib_memset (sa->dh_shared_key, 0, pad);
- _vec_len (sa->dh_shared_key) -= pad;
+ vec_dec_len (sa->dh_shared_key, pad);
}
BN_clear_free (ex);
}
@@ -679,7 +679,7 @@ ikev2_complete_dh (ikev2_sa_t * sa, ikev2_sa_transform_t * t)
{
vec_insert (sa->dh_shared_key, pad, 0);
clib_memset (sa->dh_shared_key, 0, pad);
- _vec_len (sa->dh_shared_key) -= pad;
+ vec_dec_len (sa->dh_shared_key, pad);
}
BN_clear_free (ex);
DH_free (dh);
diff --git a/src/plugins/ikev2/ikev2_payload.c b/src/plugins/ikev2/ikev2_payload.c
index 294864d8c43..5801a1b3e87 100644
--- a/src/plugins/ikev2/ikev2_payload.c
+++ b/src/plugins/ikev2/ikev2_payload.c
@@ -24,7 +24,6 @@
#include <plugins/ikev2/ikev2.h>
#include <plugins/ikev2/ikev2_priv.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 nextpayload;
u8 flags;
@@ -34,9 +33,7 @@ typedef CLIB_PACKED (struct {
u16 msg_type;
u8 payload[0];
}) ike_notify_payload_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip4_address_t start_addr;
ip4_address_t end_addr;
@@ -55,9 +52,7 @@ typedef CLIB_PACKED (struct {
u16 end_port;
u8 addr_pair[0];
}) ikev2_ts_payload_entry_t;
-/* *INDENT-OFF* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 nextpayload;
u8 flags;
@@ -66,9 +61,7 @@ typedef CLIB_PACKED (struct {
u8 reserved[3];
ikev2_ts_payload_entry_t ts[0];
}) ike_ts_payload_header_t;
-/* *INDENT-OFF* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 last_or_more;
u8 reserved;
@@ -78,9 +71,7 @@ typedef CLIB_PACKED (struct {
u8 spi_size;
u8 num_transforms; u32 spi[0];
}) ike_sa_proposal_data_t;
-/* *INDENT-OFF* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 last_or_more;
u8 reserved;
@@ -90,9 +81,7 @@ typedef CLIB_PACKED (struct {
u16 transform_id;
u8 attributes[0];
}) ike_sa_transform_data_t;
-/* *INDENT-OFF* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 nextpayload;
u8 flags;
@@ -102,7 +91,6 @@ typedef CLIB_PACKED (struct {
u16 num_of_spi;
u32 spi[0];
}) ike_delete_payload_header_t;
-/* *INDENT-OFF* */
static ike_payload_header_t *
ikev2_payload_add_hdr (ikev2_payload_chain_t * c, u8 payload_type, int len)
@@ -167,8 +155,8 @@ ikev2_payload_add_notify_2 (ikev2_payload_chain_t * c, u16 msg_type,
}
void
-ikev2_payload_add_sa (ikev2_payload_chain_t * c,
- ikev2_sa_proposal_t * proposals)
+ikev2_payload_add_sa (ikev2_payload_chain_t *c, ikev2_sa_proposal_t *proposals,
+ u8 force_spi)
{
ike_payload_header_t *ph;
ike_sa_proposal_data_t *prop;
@@ -184,7 +172,13 @@ ikev2_payload_add_sa (ikev2_payload_chain_t * c,
vec_foreach (p, proposals)
{
- int spi_size = (p->protocol_id == IKEV2_PROTOCOL_ESP) ? 4 : 0;
+ int spi_size = 0;
+
+ if (p->protocol_id == IKEV2_PROTOCOL_ESP)
+ spi_size = 4;
+ else if (force_spi && p->protocol_id == IKEV2_PROTOCOL_IKE)
+ spi_size = 8;
+
pr_data = vec_new (u8, sizeof (ike_sa_proposal_data_t) + spi_size);
prop = (ike_sa_proposal_data_t *) pr_data;
prop->last_or_more = proposals - p + 1 < vec_len (proposals) ? 2 : 0;
@@ -193,8 +187,13 @@ ikev2_payload_add_sa (ikev2_payload_chain_t * c,
prop->spi_size = spi_size;
prop->num_transforms = vec_len (p->transforms);
- if (spi_size)
+ if (spi_size == 4)
prop->spi[0] = clib_host_to_net_u32 (p->spi);
+ else if (spi_size == 8)
+ {
+ u64 s = clib_host_to_net_u64 (p->spi);
+ clib_memcpy_fast (prop->spi, &s, sizeof (s));
+ }
vec_foreach (t, p->transforms)
{
@@ -384,8 +383,9 @@ ikev2_parse_sa_payload (ike_payload_header_t * ikep, u32 rlen)
sap = (ike_sa_proposal_data_t *) & ikep->payload[proposal_ptr];
int i, transform_ptr;
- /* IKE proposal should not have SPI */
- if (sap->protocol_id == IKEV2_PROTOCOL_IKE && sap->spi_size != 0)
+ /* IKE proposal should have 8 bytes or no SPI */
+ if (sap->protocol_id == IKEV2_PROTOCOL_IKE && sap->spi_size != 0 &&
+ sap->spi_size != 8)
goto data_corrupted;
/* IKE proposal should not have SPI */
@@ -404,6 +404,12 @@ ikev2_parse_sa_payload (ike_payload_header_t * ikep, u32 rlen)
{
proposal->spi = clib_net_to_host_u32 (sap->spi[0]);
}
+ else if (sap->spi_size == 8)
+ {
+ u64 s;
+ clib_memcpy_fast (&s, &sap->spi[0], sizeof (s));
+ proposal->spi = clib_net_to_host_u64 (s);
+ }
for (i = 0; i < sap->num_transforms; i++)
{
diff --git a/src/plugins/ikev2/ikev2_priv.h b/src/plugins/ikev2/ikev2_priv.h
index 4c56b980f1c..0639809e9b1 100644
--- a/src/plugins/ikev2/ikev2_priv.h
+++ b/src/plugins/ikev2/ikev2_priv.h
@@ -184,16 +184,21 @@ do { \
#define ikev2_log_debug(...) \
vlib_log(VLIB_LOG_LEVEL_DEBUG, ikev2_main.log_class, __VA_ARGS__)
+#define foreach_ikev2_state \
+ _ (0, UNKNOWN, "UNKNOWN") \
+ _ (1, SA_INIT, "SA_INIT") \
+ _ (2, DELETED, "DELETED") \
+ _ (3, AUTH_FAILED, "AUTH_FAILED") \
+ _ (4, AUTHENTICATED, "AUTHENTICATED") \
+ _ (5, NOTIFY_AND_DELETE, "NOTIFY_AND_DELETE") \
+ _ (6, TS_UNACCEPTABLE, "TS_UNACCEPTABLE") \
+ _ (7, NO_PROPOSAL_CHOSEN, "NO_PROPOSAL_CHOSEN")
+
typedef enum
{
- IKEV2_STATE_UNKNOWN,
- IKEV2_STATE_SA_INIT,
- IKEV2_STATE_DELETED,
- IKEV2_STATE_AUTH_FAILED,
- IKEV2_STATE_AUTHENTICATED,
- IKEV2_STATE_NOTIFY_AND_DELETE,
- IKEV2_STATE_TS_UNACCEPTABLE,
- IKEV2_STATE_NO_PROPOSAL_CHOSEN,
+#define _(v, f, s) IKEV2_STATE_##f = v,
+ foreach_ikev2_state
+#undef _
} ikev2_state_t;
typedef struct
@@ -238,7 +243,7 @@ typedef struct
{
u8 proposal_num;
ikev2_protocol_id_t protocol_id:8;
- u32 spi;
+ u64 spi;
ikev2_sa_transform_t *transforms;
} ikev2_sa_proposal_t;
@@ -302,6 +307,8 @@ typedef struct
f64 time_to_expiration;
u8 is_expired;
i8 rekey_retries;
+
+ f64 timestamp;
} ikev2_child_sa_t;
typedef struct
@@ -312,6 +319,8 @@ typedef struct
typedef struct
{
+ u16 notify_type;
+ u8 kex;
u8 protocol_id;
u32 spi;
u32 ispi;
@@ -323,6 +332,22 @@ typedef struct
typedef struct
{
+ u16 notify_type;
+ u16 dh_group;
+ u64 ispi;
+ u64 rspi;
+ u8 *i_nonce;
+ u8 *r_nonce;
+ u8 *dh_shared_key;
+ u8 *dh_private_key;
+ u8 *i_dh_data;
+ u8 *r_dh_data;
+ ikev2_sa_proposal_t *i_proposals;
+ ikev2_sa_proposal_t *r_proposals;
+} ikev2_sa_rekey_t;
+
+typedef struct
+{
u16 msg_type;
u8 protocol_id;
u32 spi;
@@ -425,6 +450,9 @@ typedef struct
ikev2_rekey_t *new_child;
+ /* pending sa rekeyings */
+ ikev2_sa_rekey_t *sa_rekey;
+
/* packet data */
u8 *last_sa_init_req_packet_data;
u8 *last_sa_init_res_packet_data;
@@ -462,6 +490,8 @@ typedef struct
u8 keys_generated;
ikev2_stats_t stats;
+
+ f64 auth_timestamp;
} ikev2_sa_t;
@@ -530,7 +560,17 @@ typedef struct
u8 dpd_disabled;
/* pointer to name resolver function in dns plugin */
- int (*dns_resolve_name) ();
+ void *dns_resolve_name_ptr;
+
+ /* flag indicating whether lazy init is done or not */
+ int lazy_init_done;
+
+ /* refcount for IKEv2 udp ports and IPsec NATT punt registration */
+ int bind_refcount;
+
+ /* punt handle for IPsec NATT IPSEC_PUNT_IP4_SPI_UDP_0 reason */
+ vlib_punt_hdl_t punt_hdl;
+
} ikev2_main_t;
extern ikev2_main_t ikev2_main;
@@ -584,8 +624,8 @@ void ikev2_payload_add_notify (ikev2_payload_chain_t * c, u16 msg_type,
u8 * data);
void ikev2_payload_add_notify_2 (ikev2_payload_chain_t * c, u16 msg_type,
u8 * data, ikev2_notify_t * notify);
-void ikev2_payload_add_sa (ikev2_payload_chain_t * c,
- ikev2_sa_proposal_t * proposals);
+void ikev2_payload_add_sa (ikev2_payload_chain_t *c,
+ ikev2_sa_proposal_t *proposals, u8 force_spi);
void ikev2_payload_add_ke (ikev2_payload_chain_t * c, u16 dh_group,
u8 * dh_data);
void ikev2_payload_add_nonce (ikev2_payload_chain_t * c, u8 * nonce);
diff --git a/src/plugins/ikev2/ikev2_test.c b/src/plugins/ikev2/ikev2_test.c
index b63778ed103..5682d7058f6 100644
--- a/src/plugins/ikev2/ikev2_test.c
+++ b/src/plugins/ikev2/ikev2_test.c
@@ -32,7 +32,7 @@
#include <vnet/format_fns.h>
#include <ikev2/ikev2.api_enum.h>
#include <ikev2/ikev2.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
#define vl_endianfun /* define message structures */
#include <plugins/ikev2/ikev2.api.h>
@@ -396,8 +396,78 @@ vl_api_ikev2_sa_details_t_handler (vl_api_ikev2_sa_details_t * mp)
ip_address_decode2 (&sa->iaddr, &iaddr);
ip_address_decode2 (&sa->raddr, &raddr);
- fformat (vam->ofp, "profile index %d sa index: %d\n",
- mp->sa.profile_index, mp->sa.sa_index);
+ fformat (vam->ofp, "profile index %u sa index: %d\n", mp->sa.profile_index,
+ mp->sa.sa_index);
+ fformat (vam->ofp, " iip %U ispi %lx rip %U rspi %lx\n", format_ip_address,
+ &iaddr, sa->ispi, format_ip_address, &raddr, sa->rspi);
+ fformat (vam->ofp, " %U ", format_ikev2_sa_transform, &sa->encryption);
+ fformat (vam->ofp, "%U ", format_ikev2_sa_transform, &sa->prf);
+ fformat (vam->ofp, "%U ", format_ikev2_sa_transform, &sa->integrity);
+ fformat (vam->ofp, "%U \n", format_ikev2_sa_transform, &sa->dh);
+
+ fformat (vam->ofp, " SK_d %U\n", format_hex_bytes, k->sk_d, k->sk_d_len);
+
+ fformat (vam->ofp, " SK_a i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_ai, k->sk_ai_len, format_hex_bytes, k->sk_ar, k->sk_ar_len);
+
+ fformat (vam->ofp, " SK_e i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_ei, k->sk_ei_len, format_hex_bytes, k->sk_er, k->sk_er_len);
+
+ fformat (vam->ofp, " SK_p i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_pi, k->sk_pi_len, format_hex_bytes, k->sk_pr, k->sk_pr_len);
+
+ fformat (vam->ofp, " identifier (i) %U\n", format_ikev2_id_type_and_data,
+ &sa->i_id);
+ fformat (vam->ofp, " identifier (r) %U\n", format_ikev2_id_type_and_data,
+ &sa->r_id);
+
+ vam->result_ready = 1;
+}
+
+static int
+api_ikev2_sa_v2_dump (vat_main_t *vam)
+{
+ ikev2_test_main_t *im = &ikev2_test_main;
+ vl_api_ikev2_sa_v2_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ /* Construct the API message */
+ M (IKEV2_SA_V2_DUMP, mp);
+
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ if (!im->ping_id)
+ im->ping_id = vl_msg_api_get_msg_index ((u8 *) (VL_API_CONTROL_PING_CRC));
+ mp_ping = vl_msg_api_alloc_as_if_client (sizeof (*mp_ping));
+ mp_ping->_vl_msg_id = htons (im->ping_id);
+ mp_ping->client_index = vam->my_client_index;
+ vam->result_ready = 0;
+
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_ikev2_sa_v2_details_t_handler (vl_api_ikev2_sa_v2_details_t *mp)
+{
+ vat_main_t *vam = ikev2_test_main.vat_main;
+ vl_api_ikev2_sa_v2_t *sa = &mp->sa;
+ ip_address_t iaddr;
+ ip_address_t raddr;
+ vl_api_ikev2_keys_t *k = &sa->keys;
+ vl_api_ikev2_sa_v2_t_endian (sa);
+
+ ip_address_decode2 (&sa->iaddr, &iaddr);
+ ip_address_decode2 (&sa->raddr, &raddr);
+
+ fformat (vam->ofp, "profile name %s sa index: %d\n", mp->sa.profile_name,
+ mp->sa.sa_index);
fformat (vam->ofp, " iip %U ispi %lx rip %U rspi %lx\n", format_ip_address,
&iaddr, sa->ispi, format_ip_address, &raddr, sa->rspi);
fformat (vam->ofp, " %U ", format_ikev2_sa_transform, &sa->encryption);
@@ -427,6 +497,76 @@ vl_api_ikev2_sa_details_t_handler (vl_api_ikev2_sa_details_t * mp)
}
static int
+api_ikev2_sa_v3_dump (vat_main_t *vam)
+{
+ ikev2_test_main_t *im = &ikev2_test_main;
+ vl_api_ikev2_sa_v3_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ /* Construct the API message */
+ M (IKEV2_SA_V3_DUMP, mp);
+
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ if (!im->ping_id)
+ im->ping_id = vl_msg_api_get_msg_index ((u8 *) (VL_API_CONTROL_PING_CRC));
+ mp_ping = vl_msg_api_alloc_as_if_client (sizeof (*mp_ping));
+ mp_ping->_vl_msg_id = htons (im->ping_id);
+ mp_ping->client_index = vam->my_client_index;
+ vam->result_ready = 0;
+
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_ikev2_sa_v3_details_t_handler (vl_api_ikev2_sa_v3_details_t *mp)
+{
+ vat_main_t *vam = ikev2_test_main.vat_main;
+ vl_api_ikev2_sa_v3_t *sa = &mp->sa;
+ ip_address_t iaddr;
+ ip_address_t raddr;
+ vl_api_ikev2_keys_t *k = &sa->keys;
+ vl_api_ikev2_sa_v3_t_endian (sa);
+
+ ip_address_decode2 (&sa->iaddr, &iaddr);
+ ip_address_decode2 (&sa->raddr, &raddr);
+
+ fformat (vam->ofp, "profile name %s sa index: %d\n", mp->sa.profile_name,
+ mp->sa.sa_index);
+ fformat (vam->ofp, " iip %U ispi %lx rip %U rspi %lx\n", format_ip_address,
+ &iaddr, sa->ispi, format_ip_address, &raddr, sa->rspi);
+ fformat (vam->ofp, " %U ", format_ikev2_sa_transform, &sa->encryption);
+ fformat (vam->ofp, "%U ", format_ikev2_sa_transform, &sa->prf);
+ fformat (vam->ofp, "%U ", format_ikev2_sa_transform, &sa->integrity);
+ fformat (vam->ofp, "%U \n", format_ikev2_sa_transform, &sa->dh);
+
+ fformat (vam->ofp, " SK_d %U\n", format_hex_bytes, k->sk_d, k->sk_d_len);
+
+ fformat (vam->ofp, " SK_a i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_ai, k->sk_ai_len, format_hex_bytes, k->sk_ar, k->sk_ar_len);
+
+ fformat (vam->ofp, " SK_e i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_ei, k->sk_ei_len, format_hex_bytes, k->sk_er, k->sk_er_len);
+
+ fformat (vam->ofp, " SK_p i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_pi, k->sk_pi_len, format_hex_bytes, k->sk_pr, k->sk_pr_len);
+
+ fformat (vam->ofp, " identifier (i) %U\n", format_ikev2_id_type_and_data,
+ &sa->i_id);
+ fformat (vam->ofp, " identifier (r) %U\n", format_ikev2_id_type_and_data,
+ &sa->r_id);
+
+ vam->result_ready = 1;
+}
+
+static int
api_ikev2_child_sa_dump (vat_main_t * vam)
{
unformat_input_t *i = vam->input;
@@ -488,6 +628,83 @@ vl_api_ikev2_child_sa_details_t_handler (vl_api_ikev2_child_sa_details_t * mp)
fformat (vam->ofp, "%U ", format_ikev2_sa_transform, &child_sa->integrity);
fformat (vam->ofp, "%U \n", format_ikev2_sa_transform, &child_sa->esn);
+ fformat (vam->ofp, " spi(i) %lx spi(r) %lx\n", child_sa->i_spi,
+ child_sa->r_spi);
+
+ fformat (vam->ofp, " SK_e i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_ei, k->sk_ei_len, format_hex_bytes, k->sk_er, k->sk_er_len);
+ if (k->sk_ai_len)
+ {
+ fformat (vam->ofp, " SK_a i:%U\n r:%U\n", format_hex_bytes,
+ k->sk_ai, k->sk_ai_len, format_hex_bytes, k->sk_ar,
+ k->sk_ar_len);
+ }
+ vam->result_ready = 1;
+}
+
+static int
+api_ikev2_child_sa_v2_dump (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ ikev2_test_main_t *im = &ikev2_test_main;
+ vl_api_ikev2_child_sa_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+ u32 sa_index = ~0;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sa_index %d", &sa_index))
+ ;
+ else
+ {
+ errmsg ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sa_index == ~0)
+ return -99;
+
+ /* Construct the API message */
+ M (IKEV2_CHILD_SA_DUMP, mp);
+
+ mp->sa_index = clib_net_to_host_u32 (sa_index);
+
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ if (!im->ping_id)
+ im->ping_id = vl_msg_api_get_msg_index ((u8 *) (VL_API_CONTROL_PING_CRC));
+ mp_ping = vl_msg_api_alloc_as_if_client (sizeof (*mp_ping));
+ mp_ping->_vl_msg_id = htons (im->ping_id);
+ mp_ping->client_index = vam->my_client_index;
+ vam->result_ready = 0;
+
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_ikev2_child_sa_v2_details_t_handler (
+ vl_api_ikev2_child_sa_details_t *mp)
+{
+ vat_main_t *vam = ikev2_test_main.vat_main;
+ vl_api_ikev2_child_sa_t *child_sa = &mp->child_sa;
+ vl_api_ikev2_keys_t *k = &child_sa->keys;
+ vl_api_ikev2_child_sa_t_endian (child_sa);
+
+ fformat (vam->ofp, " child sa %u:\n", child_sa->child_sa_index);
+
+ fformat (vam->ofp, " %U ", format_ikev2_sa_transform,
+ &child_sa->encryption);
+ fformat (vam->ofp, "%U ", format_ikev2_sa_transform, &child_sa->integrity);
+ fformat (vam->ofp, "%U \n", format_ikev2_sa_transform, &child_sa->esn);
+
fformat (vam->ofp, " spi(i) %lx spi(r) %lx\n",
child_sa->i_spi, child_sa->r_spi);
diff --git a/src/plugins/ikev2/ikev2_types.api b/src/plugins/ikev2/ikev2_types.api
index b279026c2b9..2492611703d 100644
--- a/src/plugins/ikev2/ikev2_types.api
+++ b/src/plugins/ikev2/ikev2_types.api
@@ -128,6 +128,19 @@ typedef ikev2_child_sa
vl_api_ikev2_sa_transform_t esn;
};
+typedef ikev2_child_sa_v2
+{
+ u32 sa_index;
+ u32 child_sa_index;
+ u32 i_spi;
+ u32 r_spi;
+ vl_api_ikev2_keys_t keys;
+ vl_api_ikev2_sa_transform_t encryption;
+ vl_api_ikev2_sa_transform_t integrity;
+ vl_api_ikev2_sa_transform_t esn;
+ f64 uptime;
+};
+
typedef ikev2_sa_stats
{
u16 n_keepalives;
@@ -138,6 +151,18 @@ typedef ikev2_sa_stats
u16 n_init_sa_retransmit;
};
+enum ikev2_state
+{
+ UNKNOWN,
+ SA_INIT,
+ DELETED,
+ AUTH_FAILED,
+ AUTHENTICATED,
+ NOTIFY_AND_DELETE,
+ TS_UNACCEPTABLE,
+ NO_PROPOSAL_CHOSEN,
+};
+
typedef ikev2_sa
{
u32 sa_index;
@@ -161,3 +186,54 @@ typedef ikev2_sa
vl_api_ikev2_sa_stats_t stats;
};
+
+typedef ikev2_sa_v2
+{
+ u32 sa_index;
+ string profile_name[64];
+ vl_api_ikev2_state_t state;
+
+ u64 ispi;
+ u64 rspi;
+ vl_api_address_t iaddr;
+ vl_api_address_t raddr;
+
+ vl_api_ikev2_keys_t keys;
+
+ /* ID */
+ vl_api_ikev2_id_t i_id;
+ vl_api_ikev2_id_t r_id;
+
+ vl_api_ikev2_sa_transform_t encryption;
+ vl_api_ikev2_sa_transform_t integrity;
+ vl_api_ikev2_sa_transform_t prf;
+ vl_api_ikev2_sa_transform_t dh;
+
+ vl_api_ikev2_sa_stats_t stats;
+};
+
+typedef ikev2_sa_v3
+{
+ u32 sa_index;
+ string profile_name[64];
+ vl_api_ikev2_state_t state;
+
+ u64 ispi;
+ u64 rspi;
+ vl_api_address_t iaddr;
+ vl_api_address_t raddr;
+
+ vl_api_ikev2_keys_t keys;
+
+ /* ID */
+ vl_api_ikev2_id_t i_id;
+ vl_api_ikev2_id_t r_id;
+
+ vl_api_ikev2_sa_transform_t encryption;
+ vl_api_ikev2_sa_transform_t integrity;
+ vl_api_ikev2_sa_transform_t prf;
+ vl_api_ikev2_sa_transform_t dh;
+
+ vl_api_ikev2_sa_stats_t stats;
+ f64 uptime;
+}; \ No newline at end of file
diff --git a/src/plugins/ila/ila.c b/src/plugins/ila/ila.c
index 366abc9831e..02acd84880c 100644
--- a/src/plugins/ila/ila.c
+++ b/src/plugins/ila/ila.c
@@ -365,7 +365,7 @@ ila_ila2sir (vlib_main_t * vm,
{
ila_ila2sir_trace_t *tr =
vlib_add_trace (vm, node, p0, sizeof (*tr));
- tr->ila_index = ie0 ? (ie0 - ilm->entries) : ~0;
+ tr->ila_index = ie0 - ilm->entries;
tr->initial_dst = ip60->dst_address;
tr->adj_index = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
}
@@ -385,7 +385,6 @@ ila_ila2sir (vlib_main_t * vm,
return frame->n_vectors;
}
-/** *INDENT-OFF* */
VLIB_REGISTER_NODE (ila_ila2sir_node, static) =
{
.function = ila_ila2sir,
@@ -400,7 +399,6 @@ VLIB_REGISTER_NODE (ila_ila2sir_node, static) =
[ILA_ILA2SIR_NEXT_DROP] = "error-drop"
},
};
-/** *INDENT-ON* */
typedef enum
{
@@ -585,7 +583,6 @@ ila_sir2ila (vlib_main_t * vm,
return frame->n_vectors;
}
-/** *INDENT-OFF* */
VLIB_REGISTER_NODE (ila_sir2ila_node, static) =
{
.function = ila_sir2ila,.name = "sir-to-ila",
@@ -599,16 +596,13 @@ VLIB_REGISTER_NODE (ila_sir2ila_node, static) =
[ILA_SIR2ILA_NEXT_DROP] = "error-drop"
},
};
-/** *INDENT-ON* */
-/** *INDENT-OFF* */
VNET_FEATURE_INIT (ila_sir2ila, static) =
{
.arc_name = "ip6-unicast",
.node_name = "sir-to-ila",
.runs_before = VNET_FEATURES ("ip6-lookup"),
};
-/** *INDENT-ON* */
static void
ila_entry_stack (ila_entry_t *ie)
@@ -826,12 +820,10 @@ ila_interface (u32 sw_if_index, u8 disable)
return 0;
}
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Identifier Locator Addressing (ILA) for IPv6",
};
-/* *INDENT-ON* */
u8 *format_ila_dpo (u8 * s, va_list * va)
{
@@ -939,7 +931,7 @@ ila_init (vlib_main_t * vm)
ilm->lookup_table_nbuckets, ilm->lookup_table_size);
ila_dpo_type = dpo_register_new_type(&ila_vft, ila_nodes);
- ila_fib_node_type = fib_node_register_new_type(&ila_fib_node_vft);
+ ila_fib_node_type = fib_node_register_new_type ("ila", &ila_fib_node_vft);
ila_fib_src = fib_source_allocate("ila",
FIB_SOURCE_PRIORITY_HI,
FIB_SOURCE_BH_SIMPLE);
diff --git a/src/plugins/ioam/analyse/ioam_summary_export.c b/src/plugins/ioam/analyse/ioam_summary_export.c
index 032272f5ec7..6856bcc2200 100644
--- a/src/plugins/ioam/analyse/ioam_summary_export.c
+++ b/src/plugins/ioam/analyse/ioam_summary_export.c
@@ -20,11 +20,9 @@
#include <ioam/analyse/ip6/ip6_ioam_analyse.h>
u8 *
-ioam_template_rewrite (flow_report_main_t * frm, flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address, u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index)
+ioam_template_rewrite (ipfix_exporter_t *exp, flow_report_t *fr,
+ u16 collector_port, ipfix_report_element_t *elts,
+ u32 n_elts, u32 *stream_index)
{
ip4_header_t *ip;
udp_header_t *udp;
@@ -39,7 +37,7 @@ ioam_template_rewrite (flow_report_main_t * frm, flow_report_t * fr,
u32 field_index = 0;
flow_report_stream_t *stream;
- stream = &frm->streams[fr->stream_index];
+ stream = &exp->streams[fr->stream_index];
/* Determine field count */
#define _(field,mask,item,length) \
@@ -74,8 +72,8 @@ ioam_template_rewrite (flow_report_main_t * frm, flow_report_t * fr,
ip->ip_version_and_header_length = 0x45;
ip->ttl = 254;
ip->protocol = IP_PROTOCOL_UDP;
- ip->src_address.as_u32 = src_address->as_u32;
- ip->dst_address.as_u32 = collector_address->as_u32;
+ ip->src_address.as_u32 = exp->src_address.ip.ip4.as_u32;
+ ip->dst_address.as_u32 = exp->ipfix_collector.ip.ip4.as_u32;
udp->src_port = clib_host_to_net_u16 (collector_port);
udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_ipfix);
udp->length = clib_host_to_net_u16 (vec_len (rewrite) - sizeof (*ip));
@@ -264,8 +262,9 @@ ioam_analyse_add_ipfix_record (flow_report_t * fr,
}
vlib_frame_t *
-ioam_send_flows (flow_report_main_t * frm, flow_report_t * fr,
- vlib_frame_t * f, u32 * to_next, u32 node_index)
+ioam_send_flows (flow_report_main_t *frm, ipfix_exporter_t *exp,
+ flow_report_t *fr, vlib_frame_t *f, u32 *to_next,
+ u32 node_index)
{
vlib_buffer_t *b0 = NULL;
u32 next_offset = 0;
@@ -276,17 +275,16 @@ ioam_send_flows (flow_report_main_t * frm, flow_report_t * fr,
ipfix_set_header_t *s = NULL;
ip4_header_t *ip;
udp_header_t *udp;
- u32 records_this_buffer;
u16 new_l0, old_l0;
ip_csum_t sum0;
- vlib_main_t *vm = frm->vlib_main;
+ vlib_main_t *vm = vlib_get_main ();
ip6_address_t temp;
ioam_analyser_data_t *record = NULL;
flow_report_stream_t *stream;
ioam_analyser_data_t *aggregated_data;
u16 data_len;
- stream = &frm->streams[fr->stream_index];
+ stream = &exp->streams[fr->stream_index];
clib_memset (&temp, 0, sizeof (ip6_address_t));
@@ -330,16 +328,14 @@ ioam_send_flows (flow_report_main_t * frm, flow_report_t * fr,
h->sequence_number = stream->sequence_number++;
h->sequence_number = clib_host_to_net_u32 (h->sequence_number);
next_offset = (u32) (((u8 *) (s + 1)) - (u8 *) tp);
- records_this_buffer = 0;
}
next_offset = ioam_analyse_add_ipfix_record (fr, record,
b0, next_offset,
&temp, &temp, 0, 0);
- records_this_buffer++;
/* Flush data if packet len is about to reach path mtu */
- if (next_offset > (frm->path_mtu - 250))
+ if (next_offset > (exp->path_mtu - 250))
flush = 1;
}
@@ -366,7 +362,7 @@ ioam_send_flows (flow_report_main_t * frm, flow_report_t * fr,
udp->length =
clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
- if (frm->udp_checksum)
+ if (exp->udp_checksum)
{
/* RFC 7011 section 10.3.2. */
udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
@@ -399,7 +395,7 @@ ioam_flow_create (u8 del)
vnet_flow_report_add_del_args_t args;
int rv;
u32 domain_id = 0;
- flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = &flow_report_main.exporters[0];
u16 template_id;
clib_memset (&args, 0, sizeof (args));
@@ -408,7 +404,7 @@ ioam_flow_create (u8 del)
del ? (args.is_add = 0) : (args.is_add = 1);
args.domain_id = domain_id;
- rv = vnet_flow_report_add_del (frm, &args, &template_id);
+ rv = vnet_flow_report_add_del (exp, &args, &template_id);
switch (rv)
{
@@ -430,12 +426,10 @@ ioam_flow_report_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ioam_flow_report_init) =
{
.runs_after = VLIB_INITS("flow_report_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ioam/analyse/ioam_summary_export.h b/src/plugins/ioam/analyse/ioam_summary_export.h
index 99890ad43d9..7d1c5d00829 100644
--- a/src/plugins/ioam/analyse/ioam_summary_export.h
+++ b/src/plugins/ioam/analyse/ioam_summary_export.h
@@ -65,11 +65,9 @@ typedef struct
clib_error_t *ioam_flow_create (u8 del);
-u8 *ioam_template_rewrite (flow_report_main_t * frm, flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address, u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index);
+u8 *ioam_template_rewrite (ipfix_exporter_t *exp, flow_report_t *fr,
+ u16 collector_port, ipfix_report_element_t *elts,
+ u32 n_elts, u32 *stream_index);
u16 ioam_analyse_add_ipfix_record (flow_report_t * fr,
ioam_analyser_data_t * record,
diff --git a/src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.c b/src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.c
index 99ba3295d44..9db0485da61 100644
--- a/src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.c
+++ b/src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.c
@@ -94,13 +94,11 @@ set_ioam_analyse_command_fn (vlib_main_t * vm, unformat_input_t * input,
return (ioam_analyse_enable_disable (vm, is_add, is_export, remote_listen));
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ioam_analyse_command, static) = {
.path = "set ioam analyse",
.short_help = "set ioam analyse [export-ipfix-collector] [disable] [listen-ipfix]",
.function = set_ioam_analyse_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_ioam_analyse_cmd_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -130,13 +128,11 @@ show_ioam_analyse_cmd_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_show_ioam_ipfix_cmd, static) = {
.path = "show ioam analyse ",
.short_help = "show ioam analyser information",
.function = show_ioam_analyse_cmd_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ioam_analyse_init (vlib_main_t * vm)
diff --git a/src/plugins/ioam/analyse/ip6/node.c b/src/plugins/ioam/analyse/ip6/node.c
index ef35d0a9134..67895aa6486 100644
--- a/src/plugins/ioam/analyse/ip6/node.c
+++ b/src/plugins/ioam/analyse/ip6/node.c
@@ -466,7 +466,6 @@ ip6_ioam_analyse_unregister_handlers ()
ip6_ioam_analyse_unregister_hbh_handler (HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE);
}
-/* *INDENT-OFF* */
/*
* Node for IP6 analyse - packets
@@ -507,7 +506,6 @@ VLIB_REGISTER_NODE (analyse_node_remote) =
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ioam/encap/ip6_ioam_e2e.c b/src/plugins/ioam/encap/ip6_ioam_e2e.c
index a3dd048c659..0a811da6105 100644
--- a/src/plugins/ioam/encap/ip6_ioam_e2e.c
+++ b/src/plugins/ioam/encap/ip6_ioam_e2e.c
@@ -205,9 +205,7 @@ ioam_e2e_init (vlib_main_t * vm)
* Init function for the E2E lib.
* ip6_hop_by_hop_ioam_e2e_init gets called during init.
*/
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ioam_e2e_init) =
{
.runs_after = VLIB_INITS("ip6_hop_by_hop_ioam_init"),
};
-/* *INDENT-ON* */
diff --git a/src/plugins/ioam/encap/ip6_ioam_e2e.h b/src/plugins/ioam/encap/ip6_ioam_e2e.h
index fb83403da8f..f958e17669b 100644
--- a/src/plugins/ioam/encap/ip6_ioam_e2e.h
+++ b/src/plugins/ioam/encap/ip6_ioam_e2e.h
@@ -19,12 +19,10 @@
#include <ioam/lib-e2e/e2e_util.h>
#include "ip6_ioam_seqno.h"
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
ip6_hop_by_hop_option_t hdr;
ioam_e2e_packet_t e2e_hdr;
}) ioam_e2e_option_t;
-/* *INDENT-ON* */
typedef struct ioam_e2e_data_t_ {
u32 flow_ctx;
diff --git a/src/plugins/ioam/encap/ip6_ioam_pot.c b/src/plugins/ioam/encap/ip6_ioam_pot.c
index 99c21b571a3..54d748455d2 100644
--- a/src/plugins/ioam/encap/ip6_ioam_pot.c
+++ b/src/plugins/ioam/encap/ip6_ioam_pot.c
@@ -255,9 +255,7 @@ ip6_hop_by_hop_ioam_pot_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_pot_init) =
{
.runs_after = VLIB_INITS("ip6_hop_by_hop_ioam_init"),
};
-/* *INDENT-OFF* */
diff --git a/src/plugins/ioam/encap/ip6_ioam_pot.h b/src/plugins/ioam/encap/ip6_ioam_pot.h
index 01ce4ac590f..ef6f4c7344c 100644
--- a/src/plugins/ioam/encap/ip6_ioam_pot.h
+++ b/src/plugins/ioam/encap/ip6_ioam_pot.h
@@ -18,7 +18,6 @@
#include <vnet/ip/ip6_hop_by_hop_packet.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip6_hop_by_hop_option_t hdr;
u8 pot_type;
@@ -27,7 +26,6 @@ typedef CLIB_PACKED (struct {
u64 random;
u64 cumulative;
}) ioam_pot_option_t;
-/* *INDENT-ON* */
#endif /* PLUGINS_IOAM_PLUGIN_IOAM_ENCAP_IP6_IOAM_POT_H_ */
diff --git a/src/plugins/ioam/encap/ip6_ioam_trace.c b/src/plugins/ioam/encap/ip6_ioam_trace.c
index ea496610433..b244af56a6b 100644
--- a/src/plugins/ioam/encap/ip6_ioam_trace.c
+++ b/src/plugins/ioam/encap/ip6_ioam_trace.c
@@ -399,20 +399,16 @@ ip6_show_ioam_trace_cmd_fn (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_show_ioam_trace_cmd, static) = {
.path = "show ioam trace",
.short_help = "iOAM trace statistics",
.function = ip6_show_ioam_trace_cmd_fn,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Inbound Operations, Administration, and Maintenance (OAM)",
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_hop_by_hop_ioam_trace_init (vlib_main_t * vm)
@@ -443,13 +439,11 @@ ip6_hop_by_hop_ioam_trace_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_trace_init) =
{
.runs_after = VLIB_INITS ("ip_main_init", "ip6_lookup_init",
"ip6_hop_by_hop_ioam_init"),
};
-/* *INDENT-ON* */
int
ip6_trace_profile_cleanup (void)
diff --git a/src/plugins/ioam/encap/ip6_ioam_trace.h b/src/plugins/ioam/encap/ip6_ioam_trace.h
index 4eda6110d24..25693dfc6cd 100644
--- a/src/plugins/ioam/encap/ip6_ioam_trace.h
+++ b/src/plugins/ioam/encap/ip6_ioam_trace.h
@@ -21,12 +21,10 @@
#include <vnet/ip/ip6_hop_by_hop_packet.h>
#include <ioam/lib-trace/trace_util.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
ip6_hop_by_hop_option_t hdr;
ioam_trace_hdr_t trace_hdr;
}) ioam_trace_option_t;
-/* *INDENT-ON* */
always_inline void
ip6_hbh_ioam_trace_set_bit (ioam_trace_option_t * trace, u8 trace_bit)
diff --git a/src/plugins/ioam/export-common/ioam_export.h b/src/plugins/ioam/export-common/ioam_export.h
index f242ad7a788..1b764f66b15 100644
--- a/src/plugins/ioam/export-common/ioam_export.h
+++ b/src/plugins/ioam/export-common/ioam_export.h
@@ -287,7 +287,7 @@ ioam_export_header_create (ioam_export_main_t * em,
(DEFAULT_EXPORT_RECORDS *
DEFAULT_EXPORT_SIZE));
ip->checksum = ip4_header_checksum (ip);
- _vec_len (rewrite) = sizeof (ip4_ipfix_data_packet_t);
+ vec_set_len (rewrite, sizeof (ip4_ipfix_data_packet_t));
em->record_header = rewrite;
return (1);
}
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c
index f802a049365..1606f72224f 100644
--- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.c
@@ -158,14 +158,12 @@ set_vxlan_gpe_ioam_export_ipfix_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_vxlan_gpe_ioam_ipfix_command, static) =
{
.path = "set vxlan-gpe-ioam export ipfix",
.short_help = "set vxlan-gpe-ioam export ipfix collector <ip4-address> src <ip4-address>",
.function = set_vxlan_gpe_ioam_export_ipfix_command_fn,
};
-/* *INDENT-ON* */
#include <ioam/export-vxlan-gpe/vxlan_gpe_ioam_export.api.c>
static clib_error_t *
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c
index 7d66bd45849..5de10ba47f3 100644
--- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_ioam_export_thread.c
@@ -33,14 +33,12 @@ vxlan_gpe_ioam_export_process (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan_gpe_ioam_export_process_node, static) =
{
.function = vxlan_gpe_ioam_export_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "vxlan-gpe-ioam-export-process",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c
index f8d90332d5c..839fd80b443 100644
--- a/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c
+++ b/src/plugins/ioam/export-vxlan-gpe/vxlan_gpe_node.c
@@ -144,7 +144,6 @@ vxlan_gpe_export_node_fn (vlib_main_t * vm,
/*
* Node for VXLAN-GPE export
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan_export_node) =
{
.function = vxlan_gpe_export_node_fn,
@@ -159,7 +158,6 @@ VLIB_REGISTER_NODE (vxlan_export_node) =
.next_nodes =
{[EXPORT_NEXT_VXLAN_GPE_INPUT] = "vxlan-gpe-pop-ioam-v4"},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ioam/export/ioam_export.c b/src/plugins/ioam/export/ioam_export.c
index 21695af3368..f38281182c8 100644
--- a/src/plugins/ioam/export/ioam_export.c
+++ b/src/plugins/ioam/export/ioam_export.c
@@ -137,13 +137,11 @@ set_ioam_export_ipfix_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ipfix_command, static) =
{
.path = "set ioam export ipfix",.short_help =
"set ioam export ipfix collector <ip4-address> src <ip4-address>",.
function = set_ioam_export_ipfix_command_fn,};
-/* *INDENT-ON* */
#include <ioam/export/ioam_export.api.c>
static clib_error_t *
diff --git a/src/plugins/ioam/ioam_plugin_doc.md b/src/plugins/ioam/ioam_plugin_doc.md
deleted file mode 100644
index 343abcf73d8..00000000000
--- a/src/plugins/ioam/ioam_plugin_doc.md
+++ /dev/null
@@ -1,464 +0,0 @@
-## VPP Inband OAM (iOAM) {#ioam_plugin_doc}
-
-In-band OAM (iOAM) is an implementation study to record operational
-information in the packet while the packet traverses a path between
-two points in the network.
-
-Overview of iOAM can be found in [iOAM-Devnet] page.
-The following IETF drafts detail the motivation and mechanism for
-recording operational information:
- - [iOAM-ietf-requirements] - Describes motivation and usecases for iOAM
- - [iOAM-ietf-data] - Describes data records that can be collected using iOAM
- - [iOAM-ietf-transport] - Lists out the transport protocols
- and mechanism to carry iOAM data records
- - [iOAM-ietf-proof-of-transit] - Describes the idea of Proof of Transit (POT)
- and mechanisms to operationalize the idea
-
-## Terminology
-In-band OAM is expected to be deployed in a specific domain rather
-than on the overall Internet. The part of the network which employs in-band OAM
-is referred to as **"in-band OAM-domain"**.
-
-In-band OAM data is added to a packet on entering the in-band OAM-domain
-and is removed from the packet when exiting the domain.
-Within the in-band OAM-domain, network nodes that the packet traverses
-may update the in-band OAM data records.
-
-- The node which adds in-band OAM data to the packet is called the
-**"in-band OAM encapsulating node"**.
-
-- The node which removes the in-band OAM data is referred to as the
-**"in-band OAM decapsulating node"**.
-
-- Nodes within the domain which are aware of in-band OAM data and read
-and/or write or process the in-band OAM data are called
-**"in-band OAM transit nodes"**.
-
-## Features supported in the current release
-VPP can function as in-band OAM encapsulating, transit and decapsulating node.
-In this version of VPP in-band OAM data is transported as options in an
-IPv6 hop-by-hop extension header. Hence in-band OAM can be enabled
-for IPv6 traffic.
-
-The following iOAM features are supported:
-
-- **In-band OAM Tracing** : In-band OAM supports multiple data records to be
-recorded in the packet as the packet traverses the network.
-These data records offer insights into the operational behavior of the network.
-The following information can be collected in the tracing
-data from the nodes a packet traverses:
- - Node ID
- - Ingress interface ID
- - Egress interface ID
- - Timestamp
- - Pre-configured application data
-
-- **In-band OAM Proof of Transit (POT)**: Proof of transit iOAM data is
-added to every packet for verifying that a packet traverses a specific
-set of nodes.
-In-band OAM data is updated at every node that is enabled with iOAM
-proof of transit and is used to verify whether a packet traversed
-all the specified nodes. When the verifier receives each packet,
-it can validate whether the packet traversed the specified nodes.
-
-
-## Configuration
-Configuring iOAM involves:
-- Selecting the packets for which iOAM data must be inserted, updated or removed
- - Selection of packets for iOAM data insertion on iOAM encapsulating node.
- Selection of packets is done by 5-tuple based classification
- - Selection of packets for updating iOAM data is implicitly done on the
- presence of iOAM options in the packet
- - Selection of packets for removing the iOAM data is done on 5-tuple
- based classification
-- The kind of data to be collected
- - Tracing data
- - Proof of transit
-- Additional details for processing iOAM data to be collected
- - For trace data - trace type, number of nodes to be recorded in the trace,
- time stamp precision, etc.
- - For POT data - configuration of POT profile required to process the POT data
-
-The CLI for configuring iOAM is explained here followed by detailed steps
-and examples to deploy iOAM on VPP as an encapsulating, transit or
-decapsulating iOAM node in the subsequent sub-sections.
-
-VPP iOAM configuration for enabling trace and POT is as follows:
-
- set ioam rewrite trace-type <0x1f|0x7|0x9|0x11|0x19>
- trace-elts <number of trace elements> trace-tsp <0|1|2|3>
- node-id <node ID in hex> app-data <application data in hex> [pot]
-
-A description of each of the options of the CLI follows:
-- trace-type : An entry in the "Node data List" array of the trace option
-can have different formats, following the needs of the a deployment.
-For example: Some deployments might only be interested
-in recording the node identifiers, whereas others might be interested
-in recording node identifier and timestamp.
-The following types are currently supported:
- - 0x1f : Node data to include hop limit (8 bits), node ID (24 bits),
- ingress and egress interface IDs (16 bits each), timestamp (32 bits),
- application data (32 bits)
- - 0x7 : Node data to include hop limit (8 bits), node ID (24 bits),
- ingress and egress interface IDs (16 bits each)
- - 0x9 : Node data to include hop limit (8 bits), node ID (24 bits),
- timestamp (32 bits)
- - 0x11: Node data to include hop limit (8 bits), node ID (24 bits),
- application data (32 bits)
- - 0x19: Node data to include hop limit (8 bits), node ID (24 bits),
- timestamp (32 bits), application data (32 bits)
-- trace-elts : Defines the length of the node data array in the trace option.
-- trace-tsp : Defines the timestamp precision to use with the enumerated value
- for precision as follows:
- - 0 : 32bits timestamp in seconds
- - 1 : 32bits timestamp in milliseconds
- - 2 : 32bits timestamp in microseconds
- - 3 : 32bits timestamp in nanoseconds
-- node-id : Unique identifier for the node, included in the node ID
- field of the node data in trace option.
-- app-data : The value configured here is included as is in
-application data field of node data in trace option.
-- pot : Enables POT option to be included in the iOAM options.
-
-### Trace configuration
-
-#### On in-band OAM encapsulating node
- - **Configure classifier and apply ACL** to select packets for
- iOAM data insertion
- - Example to enable iOAM data insertion for all the packets
- towards IPv6 address db06::06:
-
- vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
-
- vpp# classify session acl-hit-next node ip6-add-hop-by-hop
- table-index 0 match l3 ip6 dst db06::06
-
- vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
-
- - **Enable tracing** : Specify node ID, maximum number of nodes for which
- trace data should be recorded, type of data to be included for recording,
- optionally application data to be included
- - Example to enable tracing with a maximum of 4 nodes recorded
- and the data to be recorded to include - hop limit, node id,
- ingress and egress interface IDs, timestamp (millisecond precision),
- application data (0x1234):
-
-
- vpp# set ioam rewrite trace-type 0x1f trace-elts 4 trace-tsp 1
- node-id 0x1 app-data 0x1234
-
-
-
-#### On in-band OAM transit node
-- The transit node requires trace type, timestamp precision, node ID and
-optionally application data to be configured,
-to update its node data in the trace option.
-
-Example:
-
- vpp# set ioam rewrite trace-type 0x1f trace-elts 4 trace-tsp 1
- node-id 0x2 app-data 0x1234
-
-#### On the In-band OAM decapsulating node
-- The decapsulating node similar to encapsulating node requires
-**classification** of the packets to remove iOAM data from.
- - Example to decapsulate iOAM data for packets towards
- db06::06, configure classifier and enable it as an ACL as follows:
-
-
- vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
-
- vpp# classify session acl-hit-next node ip6-lookup table-index 0
- match l3 ip6 dst db06::06 opaque-index 100
-
- vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
-
-
-- Decapsulating node requires trace type, timestamp precision,
-node ID and optionally application data to be configured,
-to update its node data in the trace option before it is decapsulated.
-
-Example:
-
- vpp# set ioam rewrite trace-type 0x1f trace-elts 4
- trace-tsp 1 node-id 0x3 app-data 0x1234
-
-
-### Proof of Transit configuration
-
-For details on proof-of-transit,
-see the IETF draft [iOAM-ietf-proof-of-transit].
-To enable Proof of Transit all the nodes that participate
-and hence are verified for transit need a proof of transit profile.
-A script to generate a proof of transit profile as per the mechanism
-described in [iOAM-ietf-proof-of-transit] will be available at [iOAM-Devnet].
-
-The Proof of transit mechanism implemented here is based on
-Shamir's Secret Sharing algorithm.
-The overall algorithm uses two polynomials
-POLY-1 and POLY-2. The degree of polynomials depends on number of nodes
-to be verified for transit.
-POLY-1 is secret and constant. Each node gets a point on POLY-1
-at setup-time and keeps it secret.
-POLY-2 is public, random and per packet.
-Each node is assigned a point on POLY-1 and POLY-2 with the same x index.
-Each node derives its point on POLY-2 each time a packet arrives at it.
-A node then contributes its points on POLY-1 and POLY-2 to construct
-POLY-3 (POLY-3 = POLY-1 + POLY-2) using lagrange extrapolation and
-forwards it towards the verifier by updating POT data in the packet.
-The verifier constructs POLY-3 from the accumulated value from all the nodes
-and its own points on POLY-1 and POLY-2 and verifies whether
-POLY-3 = POLY-1 + POLY-2. Only the verifier knows POLY-1.
-The solution leverages finite field arithmetic in a field of size "prime number"
-for reasons explained in description of Shamir's secret sharing algorithm.
-
-Here is an explanation of POT profile list and profile configuration CLI to
-realize the above mechanism.
-It is best to use the script provided at [iOAM-Devnet] to generate
-this configuration.
-- **Create POT profile** : set pot profile name <string> id [0-1]
-[validator-key 0xu64] prime-number 0xu64 secret_share 0xu64
-lpc 0xu64 polynomial2 0xu64 bits-in-random [0-64]
- - name : Profile list name.
- - id : Profile id, it can be 0 or 1.
- A maximum of two profiles can be configured per profile list.
- - validator-key : Secret key configured only on the
- verifier/decapsulating node used to compare and verify proof of transit.
- - prime-number : Prime number for finite field arithmetic as required by the
- proof of transit mechanism.
- - secret_share : Unique point for each node on the secret polynomial POLY-1.
- - lpc : Lagrange Polynomial Constant(LPC) calculated per node based on
- its point (x value used for evaluating the points on the polynomial)
- on the polynomial used in lagrange extrapolation
- for reconstructing polynomial (POLY-3).
- - polynomial2 : Is the pre-evaluated value of the point on
- 2nd polynomial(POLY-2). This is unique for each node.
- It is pre-evaluated for all the coefficients of POLY-2 except
- for the constant part of the polynomial that changes per packet
- and is received as part of the POT data in the packet.
- - bits-in-random : To control the size of the random number to be
- generated. This number has to match the other numbers generated and used
- in the profile as per the algorithm.
-
-- **Set a configured profile as active/in-use** :
-set pot profile-active name <string> ID [0-1]
- - name : Name of the profile list to be used for computing
- POT data per packet.
- - ID : Identifier of the profile within the list to be used.
-
-#### On In-band OAM encapsulating node
- - Configure the classifier and apply ACL to select packets for iOAM data insertion.
- - Example to enable iOAM data insertion for all the packet towards
- IPv6 address db06::06 -
-
-
- vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
-
- vpp# classify session acl-hit-next node
- ip6-add-hop-by-hop table-index 0 match l3 ip6 dst db06::06
-
- vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
-
-
- - Configure the proof of transit profile list with profiles.
-Each profile list referred to by a name can contain 2 profiles,
-only one is in use for updating proof of transit data at any time.
- - Example profile list example with a profile generated from the
- script to verify transit through 3 nodes is:
-
-
- vpp# set pot profile name example id 0 prime-number 0x7fff0000fa884685
- secret_share 0x6c22eff0f45ec56d lpc 0x7fff0000fa884682
- polynomial2 0xffb543d4a9c bits-in-random 63
-
- - Enable one of the profiles from the configured profile list as active
- so that is will be used for calculating proof of transit
-
-Example enable profile ID 0 from profile list example configured above:
-
-
- vpp# set pot profile-active name example ID 0
-
-
- - Enable POT option to be inserted
-
-
- vpp# set ioam rewrite pot
-
-
-#### On in-band OAM transit node
- - Configure the proof of transit profile list with profiles for transit node.
-Example:
-
-
- vpp# set pot profile name example id 0 prime-number 0x7fff0000fa884685
- secret_share 0x564cdbdec4eb625d lpc 0x1
- polynomial2 0x23f3a227186a bits-in-random 63
-
-#### On in-band OAM decapsulating node / verifier
-- The decapsulating node, similar to the encapsulating node requires
-classification of the packets to remove iOAM data from.
- - Example to decapsulate iOAM data for packets towards db06::06
- configure classifier and enable it as an ACL as follows:
-
-
- vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
-
- vpp# classify session acl-hit-next node ip6-lookup table-index 0
- match l3 ip6 dst db06::06 opaque-index 100
-
- vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
-
-- To update and verify the proof of transit, POT profile list should be configured.
- - Example POT profile list configured as follows:
-
- vpp# set pot profile name example id 0 validate-key 0x7fff0000fa88465d
- prime-number 0x7fff0000fa884685 secret_share 0x7a08fbfc5b93116d lpc 0x3
- polynomial2 0x3ff738597ce bits-in-random 63
-
-## Operational data
-
-Following CLIs are available to check iOAM operation:
-- To check iOAM configuration that are effective use "show ioam summary"
-
-Example:
-
- vpp# show ioam summary
- REWRITE FLOW CONFIGS - Not configured
- HOP BY HOP OPTIONS - TRACE CONFIG -
- Trace Type : 0x1f (31)
- Trace timestamp precision : 1 (Milliseconds)
- Num of trace nodes : 4
- Node-id : 0x2 (2)
- App Data : 0x1234 (4660)
- POT OPTION - 1 (Enabled)
- Try 'show ioam pot and show pot profile' for more information
-
-- To find statistics about packets for which iOAM options were
-added (encapsulating node) and removed (decapsulating node) execute
-*show errors*
-
-Example on encapsulating node:
-
-
- vpp# show error
- Count Node Reason
- 1208804706 ip6-inacl input ACL hits
- 1208804706 ip6-add-hop-by-hop Pkts w/ added ip6 hop-by-hop options
-
-Example on decapsulating node:
-
- vpp# show error
- Count Node Reason
- 69508569 ip6-inacl input ACL hits
- 69508569 ip6-pop-hop-by-hop Pkts w/ removed ip6 hop-by-hop options
-
-- To check the POT profiles use "show pot profile"
-
-Example:
-
- vpp# show pot profile
- Profile list in use : example
- POT Profile at index: 0
- ID : 0
- Validator : False (0)
- Secret share : 0x564cdbdec4eb625d (6218586935324795485)
- Prime number : 0x7fff0000fa884685 (9223090566081300101)
- 2nd polynomial(eval) : 0x23f3a227186a (39529304496234)
- LPC : 0x1 (1)
- Bit mask : 0x7fffffffffffffff (9223372036854775807)
- Profile index in use: 0
- Pkts passed : 0x36 (54)
-
-- To get statistics of POT for packets use "show ioam pot"
-
-Example at encapsulating or transit node:
-
- vpp# show ioam pot
- Pkts with ip6 hop-by-hop POT options - 54
- Pkts with ip6 hop-by-hop POT options but no profile set - 0
- Pkts with POT in Policy - 0
- Pkts with POT out of Policy - 0
-
-
-Example at decapsulating/verification node:
-
-
- vpp# show ioam pot
- Pkts with ip6 hop-by-hop POT options - 54
- Pkts with ip6 hop-by-hop POT options but no profile set - 0
- Pkts with POT in Policy - 54
- Pkts with POT out of Policy - 0
-
-- Tracing - enable trace of IPv6 packets to view the data inserted and
-collected.
-
-Example when the nodes are receiving data over a DPDK interface:
-Enable tracing using "trace add dpdk-input 20" and
-execute "show trace" to view the iOAM data collected:
-
-
- vpp# trace add dpdk-input 20
-
- vpp# show trace
-
- ------------------- Start of thread 0 vpp_main -------------------
-
- Packet 1
-
- 00:00:19:294697: dpdk-input
- GigabitEthernetb/0/0 rx queue 0
- buffer 0x10e6b: current data 0, length 214, free-list 0, totlen-nifb 0, trace 0x0
- PKT MBUF: port 0, nb_segs 1, pkt_len 214
- buf_len 2176, data_len 214, ol_flags 0x0, data_off 128, phys_addr 0xe9a35a00
- packet_type 0x0
- IP6: 00:50:56:9c:df:72 -> 00:50:56:9c:be:55
- IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
- tos 0x00, flow label 0x0, hop limit 63, payload length 160
- 00:00:19:294737: ethernet-input
- IP6: 00:50:56:9c:df:72 -> 00:50:56:9c:be:55
- 00:00:19:294753: ip6-input
- IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
- tos 0x00, flow label 0x0, hop limit 63, payload length 160
- 00:00:19:294757: ip6-lookup
- fib 0 adj-idx 15 : indirect via db05::2 flow hash: 0x00000000
- IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
- tos 0x00, flow label 0x0, hop limit 63, payload length 160
- 00:00:19:294802: ip6-hop-by-hop
- IP6_HOP_BY_HOP: next index 5 len 96 traced 96 Trace Type 0x1f , 1 elts left
- [0] ttl 0x0 node ID 0x0 ingress 0x0 egress 0x0 ts 0x0
- app 0x0
- [1] ttl 0x3e node ID 0x3 ingress 0x1 egress 0x2 ts 0xb68c2213
- app 0x1234
- [2] ttl 0x3f node ID 0x2 ingress 0x1 egress 0x2 ts 0xb68c2204
- app 0x1234
- [3] ttl 0x40 node ID 0x1 ingress 0x5 egress 0x6 ts 0xb68c2200
- app 0x1234
- POT opt present
- random = 0x577a916946071950, Cumulative = 0x10b46e78a35a392d, Index = 0x0
- 00:00:19:294810: ip6-rewrite
- tx_sw_if_index 1 adj-idx 14 : GigabitEthernetb/0/0
- IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72 flow hash: 0x00000000
- IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
- IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
- tos 0x00, flow label 0x0, hop limit 62, payload length 160
- 00:00:19:294814: GigabitEthernetb/0/0-output
- GigabitEthernetb/0/0
- IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
- IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
- tos 0x00, flow label 0x0, hop limit 62, payload length 160
- 00:00:19:294820: GigabitEthernetb/0/0-tx
- GigabitEthernetb/0/0 tx queue 0
- buffer 0x10e6b: current data 0, length 214, free-list 0, totlen-nifb 0, trace 0x0
- IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
-
- IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
-
- tos 0x00, flow label 0x0, hop limit 62, payload length 160
-
-
-[iOAM-Devnet]: <https://github.com/ciscodevnet/iOAM>
-[iOAM-ietf-requirements]:<https://tools.ietf.org/html/draft-brockners-inband-oam-requirements-01>
-[iOAM-ietf-transport]:<https://tools.ietf.org/html/draft-brockners-inband-oam-transport-01>
-[iOAM-ietf-data]:<https://tools.ietf.org/html/draft-brockners-inband-oam-data-01>
-[iOAM-ietf-proof-of-transit]:<https://tools.ietf.org/html/draft-brockners-proof-of-transit-01>
diff --git a/src/plugins/ioam/ioam_plugin_doc.rst b/src/plugins/ioam/ioam_plugin_doc.rst
new file mode 100644
index 00000000000..0f84d5f7a36
--- /dev/null
+++ b/src/plugins/ioam/ioam_plugin_doc.rst
@@ -0,0 +1,490 @@
+Inband OAM (iOAM)
+=================
+
+In-band OAM (iOAM) is an implementation study to record operational
+information in the packet while the packet traverses a path between two
+points in the network.
+
+Overview of iOAM can be found in
+`iOAM-Devnet <https://github.com/ciscodevnet/iOAM>`__ page. The
+following IETF drafts detail the motivation and mechanism for recording
+operational information: -
+`iOAM-ietf-requirements <https://tools.ietf.org/html/draft-brockners-inband-oam-requirements-01>`__
+- Describes motivation and usecases for iOAM -
+`iOAM-ietf-data <https://tools.ietf.org/html/draft-brockners-inband-oam-data-01>`__
+- Describes data records that can be collected using iOAM -
+`iOAM-ietf-transport <https://tools.ietf.org/html/draft-brockners-inband-oam-transport-01>`__
+- Lists out the transport protocols and mechanism to carry iOAM data
+records -
+`iOAM-ietf-proof-of-transit <https://tools.ietf.org/html/draft-brockners-proof-of-transit-01>`__
+- Describes the idea of Proof of Transit (POT) and mechanisms to
+operationalize the idea
+
+Terminology
+-----------
+
+In-band OAM is expected to be deployed in a specific domain rather than
+on the overall Internet. The part of the network which employs in-band
+OAM is referred to as **“in-band OAM-domain”**.
+
+In-band OAM data is added to a packet on entering the in-band OAM-domain
+and is removed from the packet when exiting the domain. Within the
+in-band OAM-domain, network nodes that the packet traverses may update
+the in-band OAM data records.
+
+- The node which adds in-band OAM data to the packet is called the
+ **“in-band OAM encapsulating node”**.
+
+- The node which removes the in-band OAM data is referred to as the
+ **“in-band OAM decapsulating node”**.
+
+- Nodes within the domain which are aware of in-band OAM data and read
+ and/or write or process the in-band OAM data are called **“in-band
+ OAM transit nodes”**.
+
+Features supported in the current release
+-----------------------------------------
+
+VPP can function as in-band OAM encapsulating, transit and decapsulating
+node. In this version of VPP in-band OAM data is transported as options
+in an IPv6 hop-by-hop extension header. Hence in-band OAM can be enabled
+for IPv6 traffic.
+
+The following iOAM features are supported:
+
+- **In-band OAM Tracing** : In-band OAM supports multiple data records
+ to be recorded in the packet as the packet traverses the network.
+ These data records offer insights into the operational behavior of
+ the network. The following information can be collected in the
+ tracing data from the nodes a packet traverses:
+
+ - Node ID
+ - Ingress interface ID
+ - Egress interface ID
+ - Timestamp
+ - Pre-configured application data
+
+- **In-band OAM Proof of Transit (POT)**: Proof of transit iOAM data is
+ added to every packet for verifying that a packet traverses a
+ specific set of nodes. In-band OAM data is updated at every node that
+ is enabled with iOAM proof of transit and is used to verify whether a
+ packet traversed all the specified nodes. When the verifier receives
+ each packet, it can validate whether the packet traversed the
+ specified nodes.
+
+Configuration
+-------------
+
+Configuring iOAM involves: - Selecting the packets for which iOAM data
+must be inserted, updated or removed - Selection of packets for iOAM
+data insertion on iOAM encapsulating node. Selection of packets is done
+by 5-tuple based classification - Selection of packets for updating iOAM
+data is implicitly done on the presence of iOAM options in the packet -
+Selection of packets for removing the iOAM data is done on 5-tuple based
+classification - The kind of data to be collected - Tracing data - Proof
+of transit - Additional details for processing iOAM data to be collected
+- For trace data - trace type, number of nodes to be recorded in the
+trace, time stamp precision, etc. - For POT data - configuration of POT
+profile required to process the POT data
+
+The CLI for configuring iOAM is explained here followed by detailed
+steps and examples to deploy iOAM on VPP as an encapsulating, transit or
+decapsulating iOAM node in the subsequent sub-sections.
+
+VPP iOAM configuration for enabling trace and POT is as follows:
+
+::
+
+ set ioam rewrite trace-type <0x1f|0x7|0x9|0x11|0x19>
+ trace-elts <number of trace elements> trace-tsp <0|1|2|3>
+ node-id <node ID in hex> app-data <application data in hex> [pot]
+
+A description of each of the options of the CLI follows: - trace-type :
+An entry in the “Node data List” array of the trace option can have
+different formats, following the needs of the a deployment. For example:
+Some deployments might only be interested in recording the node
+identifiers, whereas others might be interested in recording node
+identifier and timestamp. The following types are currently supported: -
+0x1f : Node data to include hop limit (8 bits), node ID (24 bits),
+ingress and egress interface IDs (16 bits each), timestamp (32 bits),
+application data (32 bits) - 0x7 : Node data to include hop limit (8
+bits), node ID (24 bits), ingress and egress interface IDs (16 bits
+each) - 0x9 : Node data to include hop limit (8 bits), node ID (24
+bits), timestamp (32 bits) - 0x11: Node data to include hop limit (8
+bits), node ID (24 bits), application data (32 bits) - 0x19: Node data
+to include hop limit (8 bits), node ID (24 bits), timestamp (32 bits),
+application data (32 bits) - trace-elts : Defines the length of the node
+data array in the trace option. - trace-tsp : Defines the timestamp
+precision to use with the enumerated value for precision as follows: - 0
+: 32bits timestamp in seconds - 1 : 32bits timestamp in milliseconds - 2
+: 32bits timestamp in microseconds - 3 : 32bits timestamp in nanoseconds
+- node-id : Unique identifier for the node, included in the node ID
+field of the node data in trace option. - app-data : The value
+configured here is included as is in application data field of node data
+in trace option. - pot : Enables POT option to be included in the iOAM
+options.
+
+Trace configuration
+~~~~~~~~~~~~~~~~~~~
+
+On in-band OAM encapsulating node
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- **Configure classifier and apply ACL** to select packets for iOAM
+ data insertion
+
+ - Example to enable iOAM data insertion for all the packets towards
+ IPv6 address db06::06:
+
+ vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
+
+ vpp# classify session acl-hit-next node ip6-add-hop-by-hop
+ table-index 0 match l3 ip6 dst db06::06
+
+ vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
+
+- **Enable tracing** : Specify node ID, maximum number of nodes for
+ which trace data should be recorded, type of data to be included for
+ recording, optionally application data to be included
+
+ - Example to enable tracing with a maximum of 4 nodes recorded and
+ the data to be recorded to include - hop limit, node id, ingress
+ and egress interface IDs, timestamp (millisecond precision),
+ application data (0x1234):
+
+ vpp# set ioam rewrite trace-type 0x1f trace-elts 4 trace-tsp 1
+ node-id 0x1 app-data 0x1234
+
+On in-band OAM transit node
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- The transit node requires trace type, timestamp precision, node ID
+ and optionally application data to be configured, to update its node
+ data in the trace option.
+
+Example:
+
+::
+
+ vpp# set ioam rewrite trace-type 0x1f trace-elts 4 trace-tsp 1
+ node-id 0x2 app-data 0x1234
+
+On the In-band OAM decapsulating node
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- The decapsulating node similar to encapsulating node requires
+ **classification** of the packets to remove iOAM data from.
+
+ - Example to decapsulate iOAM data for packets towards db06::06,
+ configure classifier and enable it as an ACL as follows:
+
+ vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
+
+ vpp# classify session acl-hit-next node ip6-lookup table-index 0
+ match l3 ip6 dst db06::06 opaque-index 100
+
+ vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
+
+- Decapsulating node requires trace type, timestamp precision, node ID
+ and optionally application data to be configured, to update its node
+ data in the trace option before it is decapsulated.
+
+Example:
+
+::
+
+ vpp# set ioam rewrite trace-type 0x1f trace-elts 4
+ trace-tsp 1 node-id 0x3 app-data 0x1234
+
+Proof of Transit configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For details on proof-of-transit, see the IETF draft
+`iOAM-ietf-proof-of-transit <https://tools.ietf.org/html/draft-brockners-proof-of-transit-01>`__.
+To enable Proof of Transit all the nodes that participate and hence are
+verified for transit need a proof of transit profile. A script to
+generate a proof of transit profile as per the mechanism described in
+`iOAM-ietf-proof-of-transit <https://tools.ietf.org/html/draft-brockners-proof-of-transit-01>`__
+will be available at
+`iOAM-Devnet <https://github.com/ciscodevnet/iOAM>`__.
+
+The Proof of transit mechanism implemented here is based on Shamir’s
+Secret Sharing algorithm. The overall algorithm uses two polynomials
+POLY-1 and POLY-2. The degree of polynomials depends on number of nodes
+to be verified for transit. POLY-1 is secret and constant. Each node
+gets a point on POLY-1 at setup-time and keeps it secret. POLY-2 is
+public, random and per packet. Each node is assigned a point on POLY-1
+and POLY-2 with the same x index. Each node derives its point on POLY-2
+each time a packet arrives at it. A node then contributes its points on
+POLY-1 and POLY-2 to construct POLY-3 (POLY-3 = POLY-1 + POLY-2) using
+lagrange extrapolation and forwards it towards the verifier by updating
+POT data in the packet. The verifier constructs POLY-3 from the
+accumulated value from all the nodes and its own points on POLY-1 and
+POLY-2 and verifies whether POLY-3 = POLY-1 + POLY-2. Only the verifier
+knows POLY-1. The solution leverages finite field arithmetic in a field
+of size “prime number” for reasons explained in description of Shamir’s
+secret sharing algorithm.
+
+| Here is an explanation of POT profile list and profile configuration
+ CLI to realize the above mechanism. It is best to use the script
+ provided at `iOAM-Devnet <https://github.com/ciscodevnet/iOAM>`__ to
+ generate this configuration. - **Create POT profile** : set pot
+ profile name id [0-1]
+| [validator-key 0xu64] prime-number 0xu64 secret_share 0xu64
+| lpc 0xu64 polynomial2 0xu64 bits-in-random [0-64]
+| - name : Profile list name. - id : Profile id, it can be 0 or 1. A
+ maximum of two profiles can be configured per profile list. -
+ validator-key : Secret key configured only on the
+ verifier/decapsulating node used to compare and verify proof of
+ transit. - prime-number : Prime number for finite field arithmetic as
+ required by the proof of transit mechanism. - secret_share : Unique
+ point for each node on the secret polynomial POLY-1. - lpc : Lagrange
+ Polynomial Constant(LPC) calculated per node based on its point (x
+ value used for evaluating the points on the polynomial) on the
+ polynomial used in lagrange extrapolation for reconstructing
+ polynomial (POLY-3). - polynomial2 : Is the pre-evaluated value of the
+ point on 2nd polynomial(POLY-2). This is unique for each node. It is
+ pre-evaluated for all the coefficients of POLY-2 except for the
+ constant part of the polynomial that changes per packet and is
+ received as part of the POT data in the packet. - bits-in-random : To
+ control the size of the random number to be generated. This number has
+ to match the other numbers generated and used in the profile as per
+ the algorithm.
+
+- **Set a configured profile as active/in-use** :
+ set pot profile-active name ID [0-1]
+
+ - name : Name of the profile list to be used for computing POT data
+ per packet.
+ - ID : Identifier of the profile within the list to be used.
+
+.. _on-in-band-oam-encapsulating-node-1:
+
+On In-band OAM encapsulating node
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Configure the classifier and apply ACL to select packets for iOAM
+ data insertion.
+
+ - Example to enable iOAM data insertion for all the packet towards
+ IPv6 address db06::06 -
+
+ vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
+
+ vpp# classify session acl-hit-next node ip6-add-hop-by-hop
+ table-index 0 match l3 ip6 dst db06::06
+
+ vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
+
+- Configure the proof of transit profile list with profiles. Each
+ profile list referred to by a name can contain 2 profiles, only one
+ is in use for updating proof of transit data at any time.
+
+ - Example profile list example with a profile generated from the
+ script to verify transit through 3 nodes is:
+
+ vpp# set pot profile name example id 0 prime-number
+ 0x7fff0000fa884685 secret_share 0x6c22eff0f45ec56d lpc
+ 0x7fff0000fa884682 polynomial2 0xffb543d4a9c bits-in-random 63
+
+- Enable one of the profiles from the configured profile list as active
+ so that is will be used for calculating proof of transit
+
+Example enable profile ID 0 from profile list example configured above:
+
+::
+
+ vpp# set pot profile-active name example ID 0
+
+- Enable POT option to be inserted
+
+ vpp# set ioam rewrite pot
+
+.. _on-in-band-oam-transit-node-1:
+
+On in-band OAM transit node
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- Configure the proof of transit profile list with profiles for transit
+ node. Example:
+
+ vpp# set pot profile name example id 0 prime-number
+ 0x7fff0000fa884685 secret_share 0x564cdbdec4eb625d lpc 0x1
+ polynomial2 0x23f3a227186a bits-in-random 63
+
+On in-band OAM decapsulating node / verifier
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- The decapsulating node, similar to the encapsulating node requires
+ classification of the packets to remove iOAM data from.
+
+ - Example to decapsulate iOAM data for packets towards db06::06
+ configure classifier and enable it as an ACL as follows:
+
+ vpp# classify table miss-next node ip6-lookup mask l3 ip6 dst
+
+ vpp# classify session acl-hit-next node ip6-lookup table-index 0
+ match l3 ip6 dst db06::06 opaque-index 100
+
+ vpp# set int input acl intfc GigabitEthernet0/0/0 ip6-table 0
+
+- To update and verify the proof of transit, POT profile list should be
+ configured.
+
+ - Example POT profile list configured as follows:
+
+ vpp# set pot profile name example id 0 validate-key
+ 0x7fff0000fa88465d prime-number 0x7fff0000fa884685 secret_share
+ 0x7a08fbfc5b93116d lpc 0x3 polynomial2 0x3ff738597ce bits-in-random
+ 63
+
+Operational data
+----------------
+
+Following CLIs are available to check iOAM operation: - To check iOAM
+configuration that are effective use “show ioam summary”
+
+Example:
+
+::
+
+ vpp# show ioam summary
+ REWRITE FLOW CONFIGS - Not configured
+ HOP BY HOP OPTIONS - TRACE CONFIG -
+ Trace Type : 0x1f (31)
+ Trace timestamp precision : 1 (Milliseconds)
+ Num of trace nodes : 4
+ Node-id : 0x2 (2)
+ App Data : 0x1234 (4660)
+ POT OPTION - 1 (Enabled)
+ Try 'show ioam pot and show pot profile' for more information
+
+- To find statistics about packets for which iOAM options were added
+ (encapsulating node) and removed (decapsulating node) execute *show
+ errors*
+
+Example on encapsulating node:
+
+::
+
+ vpp# show error
+ Count Node Reason
+ 1208804706 ip6-inacl input ACL hits
+ 1208804706 ip6-add-hop-by-hop Pkts w/ added ip6 hop-by-hop options
+
+Example on decapsulating node:
+
+::
+
+ vpp# show error
+ Count Node Reason
+ 69508569 ip6-inacl input ACL hits
+ 69508569 ip6-pop-hop-by-hop Pkts w/ removed ip6 hop-by-hop options
+
+- To check the POT profiles use “show pot profile”
+
+Example:
+
+::
+
+ vpp# show pot profile
+ Profile list in use : example
+ POT Profile at index: 0
+ ID : 0
+ Validator : False (0)
+ Secret share : 0x564cdbdec4eb625d (6218586935324795485)
+ Prime number : 0x7fff0000fa884685 (9223090566081300101)
+ 2nd polynomial(eval) : 0x23f3a227186a (39529304496234)
+ LPC : 0x1 (1)
+ Bit mask : 0x7fffffffffffffff (9223372036854775807)
+ Profile index in use: 0
+ Pkts passed : 0x36 (54)
+
+- To get statistics of POT for packets use “show ioam pot”
+
+Example at encapsulating or transit node:
+
+::
+
+ vpp# show ioam pot
+ Pkts with ip6 hop-by-hop POT options - 54
+ Pkts with ip6 hop-by-hop POT options but no profile set - 0
+ Pkts with POT in Policy - 0
+ Pkts with POT out of Policy - 0
+
+Example at decapsulating/verification node:
+
+::
+
+ vpp# show ioam pot
+ Pkts with ip6 hop-by-hop POT options - 54
+ Pkts with ip6 hop-by-hop POT options but no profile set - 0
+ Pkts with POT in Policy - 54
+ Pkts with POT out of Policy - 0
+
+- Tracing - enable trace of IPv6 packets to view the data inserted and
+ collected.
+
+Example when the nodes are receiving data over a DPDK interface: Enable
+tracing using “trace add dpdk-input 20” and execute “show trace” to view
+the iOAM data collected:
+
+::
+
+ vpp# trace add dpdk-input 20
+
+ vpp# show trace
+
+ ------------------- Start of thread 0 vpp_main -------------------
+
+ Packet 1
+
+ 00:00:19:294697: dpdk-input
+ GigabitEthernetb/0/0 rx queue 0
+ buffer 0x10e6b: current data 0, length 214, free-list 0, totlen-nifb 0, trace 0x0
+ PKT MBUF: port 0, nb_segs 1, pkt_len 214
+ buf_len 2176, data_len 214, ol_flags 0x0, data_off 128, phys_addr 0xe9a35a00
+ packet_type 0x0
+ IP6: 00:50:56:9c:df:72 -> 00:50:56:9c:be:55
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 63, payload length 160
+ 00:00:19:294737: ethernet-input
+ IP6: 00:50:56:9c:df:72 -> 00:50:56:9c:be:55
+ 00:00:19:294753: ip6-input
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 63, payload length 160
+ 00:00:19:294757: ip6-lookup
+ fib 0 adj-idx 15 : indirect via db05::2 flow hash: 0x00000000
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 63, payload length 160
+ 00:00:19:294802: ip6-hop-by-hop
+ IP6_HOP_BY_HOP: next index 5 len 96 traced 96 Trace Type 0x1f , 1 elts left
+ [0] ttl 0x0 node ID 0x0 ingress 0x0 egress 0x0 ts 0x0
+ app 0x0
+ [1] ttl 0x3e node ID 0x3 ingress 0x1 egress 0x2 ts 0xb68c2213
+ app 0x1234
+ [2] ttl 0x3f node ID 0x2 ingress 0x1 egress 0x2 ts 0xb68c2204
+ app 0x1234
+ [3] ttl 0x40 node ID 0x1 ingress 0x5 egress 0x6 ts 0xb68c2200
+ app 0x1234
+ POT opt present
+ random = 0x577a916946071950, Cumulative = 0x10b46e78a35a392d, Index = 0x0
+ 00:00:19:294810: ip6-rewrite
+ tx_sw_if_index 1 adj-idx 14 : GigabitEthernetb/0/0
+ IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72 flow hash: 0x00000000
+ IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 62, payload length 160
+ 00:00:19:294814: GigabitEthernetb/0/0-output
+ GigabitEthernetb/0/0
+ IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+ tos 0x00, flow label 0x0, hop limit 62, payload length 160
+ 00:00:19:294820: GigabitEthernetb/0/0-tx
+ GigabitEthernetb/0/0 tx queue 0
+ buffer 0x10e6b: current data 0, length 214, free-list 0, totlen-nifb 0, trace 0x0
+ IP6: 00:50:56:9c:be:55 -> 00:50:56:9c:df:72
+
+ IP6_HOP_BY_HOP_OPTIONS: db05::2 -> db06::6
+
+ tos 0x00, flow label 0x0, hop limit 62, payload length 160
diff --git a/src/plugins/ioam/ip6/ioam_cache.h b/src/plugins/ioam/ip6/ioam_cache.h
index b85172e8ac1..8c1b6291707 100644
--- a/src/plugins/ioam/ip6/ioam_cache.h
+++ b/src/plugins/ioam/ip6/ioam_cache.h
@@ -608,17 +608,20 @@ ioam_cache_ts_table_destroy (vlib_main_t * vm)
int i;
/* free pool and hash table */
- for (i = 0; i < no_of_threads; i++)
+ if (cm->ioam_ts_pool)
{
- pool_foreach (entry, cm->ioam_ts_pool[i])
- {
- ioam_cache_ts_entry_free (i, entry, cm->error_node_index);
- }
- pool_free (cm->ioam_ts_pool[i]);
- cm->ioam_ts_pool = 0;
- tw_timer_wheel_free_16t_2w_512sl (&cm->timer_wheels[i]);
+ for (i = 0; i < no_of_threads; i++)
+ {
+ pool_foreach (entry, cm->ioam_ts_pool[i])
+ {
+ ioam_cache_ts_entry_free (i, entry, cm->error_node_index);
+ }
+ pool_free (cm->ioam_ts_pool[i]);
+ cm->ioam_ts_pool[i] = 0;
+ tw_timer_wheel_free_16t_2w_512sl (&cm->timer_wheels[i]);
+ }
+ vec_free (cm->ioam_ts_pool);
}
- vec_free (cm->ioam_ts_pool);
return (0);
}
diff --git a/src/plugins/ioam/ip6/ioam_cache_node.c b/src/plugins/ioam/ip6/ioam_cache_node.c
index 6a5465b86aa..9859ee6fbf0 100644
--- a/src/plugins/ioam/ip6/ioam_cache_node.c
+++ b/src/plugins/ioam/ip6/ioam_cache_node.c
@@ -179,7 +179,6 @@ ip6_ioam_cache_node_fn (vlib_main_t * vm,
/*
* Node for IP6 iOAM header cache
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ioam_cache_node) =
{
.function = ip6_ioam_cache_node_fn,
@@ -196,7 +195,6 @@ VLIB_REGISTER_NODE (ioam_cache_node) =
[IOAM_CACHE_NEXT_POP_HBYH] = "ip6-pop-hop-by-hop"
},
};
-/* *INDENT-ON* */
typedef struct
{
@@ -386,7 +384,6 @@ VLIB_NODE_FN (ip6_add_from_cache_hbh_node) (vlib_main_t * vm,
processed);
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_add_from_cache_hbh_node) =
{
.name = "ip6-add-from-cache-hop-by-hop",
@@ -404,7 +401,6 @@ VLIB_REGISTER_NODE (ip6_add_from_cache_hbh_node) =
#undef _
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
index d2c7f20a778..61476ebd85c 100644
--- a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
+++ b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
@@ -235,7 +235,6 @@ ip6_ioam_cache_ts_node_fn (vlib_main_t * vm,
/*
* Node for IP6 iOAM header cache
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ioam_cache_ts_node) =
{
.function = ip6_ioam_cache_ts_node_fn,
@@ -253,7 +252,6 @@ VLIB_REGISTER_NODE (ioam_cache_ts_node) =
[IOAM_CACHE_TS_ERROR_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
typedef struct
{
@@ -623,7 +621,6 @@ VLIB_NODE_FN (ip6_reset_ts_hbh_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_reset_ts_hbh_node) =
{
.name = "ip6-add-syn-hop-by-hop",
@@ -642,7 +639,6 @@ VLIB_REGISTER_NODE (ip6_reset_ts_hbh_node) =
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
vlib_node_registration_t ioam_cache_ts_timer_tick_node;
@@ -738,7 +734,6 @@ ioam_cache_ts_timer_tick_node_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ioam_cache_ts_timer_tick_node) = {
.function = ioam_cache_ts_timer_tick_node_fn,
.name = "ioam-cache-ts-timer-tick",
@@ -757,7 +752,6 @@ VLIB_REGISTER_NODE (ioam_cache_ts_timer_tick_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ioam/ipfixcollector/node.c b/src/plugins/ioam/ipfixcollector/node.c
index 098029d6ad6..73d7b57fab5 100644
--- a/src/plugins/ioam/ipfixcollector/node.c
+++ b/src/plugins/ioam/ipfixcollector/node.c
@@ -271,7 +271,6 @@ ipfix_collector_node_fn (vlib_main_t * vm,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ipfix_collector_node) = {
.function = ipfix_collector_node_fn,
.name = "ipfix-collector",
@@ -289,7 +288,6 @@ VLIB_REGISTER_NODE (ipfix_collector_node) = {
[IPFIX_COLLECTOR_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ioam/lib-e2e/e2e_util.h b/src/plugins/ioam/lib-e2e/e2e_util.h
index f8a4ebd4797..a72b4030b2c 100644
--- a/src/plugins/ioam/lib-e2e/e2e_util.h
+++ b/src/plugins/ioam/lib-e2e/e2e_util.h
@@ -18,13 +18,11 @@
#include <ioam/lib-e2e/ioam_seqno_lib.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
u8 e2e_type;
u8 reserved;
u32 e2e_data;
}) ioam_e2e_packet_t;
-/* *INDENT-ON* */
#endif /* PLUGINS_IOAM_PLUGIN_IOAM_LIB_E2E_E2E_UTIL_H_ */
diff --git a/src/plugins/ioam/lib-pot/math64.h b/src/plugins/ioam/lib-pot/math64.h
index 4c608a37de4..2084c25fa58 100644
--- a/src/plugins/ioam/lib-pot/math64.h
+++ b/src/plugins/ioam/lib-pot/math64.h
@@ -51,23 +51,23 @@ static inline void mul64by64(u64 a, u64 b, u64 * hi, u64 * lo)
static inline u64 mod128by64(u64 x, u64 y, u64 m, double di)
{
- u64 q1, q2, q;
- u64 p1, p0;
- double dq;
+ u64 q1, q2;
+ u64 p1, p0;
+ double dq;
- /* calculate quotient first pass 53 bits */
- dq = (TWO64 * (double)x + (double)y) * di;
+ /* calculate quotient first pass 53 bits */
+ dq = (TWO64 * (double) x + (double) y) * di;
- if (dq >= TWO64)
- q1 = 0xfffffffffffff800L;
- else
- q1 = dq;
+ if (dq >= TWO64)
+ q1 = 0xfffffffffffff800L;
+ else
+ q1 = dq;
- /* q1 * m to compare the product to the dividend. */
- mul64by64(q1, m, &p1, &p0);
+ /* q1 * m to compare the product to the dividend. */
+ mul64by64 (q1, m, &p1, &p0);
- /* Adjust quotient. is it > actual result: */
- if (x < p1 || (x == p1 && y < p0))
+ /* Adjust quotient. is it > actual result: */
+ if (x < p1 || (x == p1 && y < p0))
{
/* q1 > quotient. calculate abs remainder */
x = p1 - (x + (p0 < y));
@@ -77,7 +77,6 @@ static inline u64 mod128by64(u64 x, u64 y, u64 m, double di)
q2 = (u64) ((TWO64 * (double)x + (double)y) * di);
mul64by64(q2, m, &p1, &p0);
- q = q1 - q2;
if (x < p1 || (x == p1 && y <= p0))
{
y = p0 - y;
@@ -86,7 +85,6 @@ static inline u64 mod128by64(u64 x, u64 y, u64 m, double di)
{
y = p0 - y;
y += m;
- q--;
}
}
else
@@ -97,12 +95,10 @@ static inline u64 mod128by64(u64 x, u64 y, u64 m, double di)
q2 = (u64) ((TWO64 * (double)x + (double)y) * di);
mul64by64(q2, m, &p1, &p0);
- q = q1 + q2;
if (x < p1 || (x == p1 && y < p0))
{
y = y - p0;
y += m;
- q--;
}
else
{
@@ -110,7 +106,6 @@ static inline u64 mod128by64(u64 x, u64 y, u64 m, double di)
if (y >= m)
{
y -= m;
- q++;
}
}
}
diff --git a/src/plugins/ioam/lib-trace/trace_util.c b/src/plugins/ioam/lib-trace/trace_util.c
index d935543cf23..31fbb2b7446 100644
--- a/src/plugins/ioam/lib-trace/trace_util.c
+++ b/src/plugins/ioam/lib-trace/trace_util.c
@@ -98,14 +98,12 @@ clear_trace_profiles (void)
clear_trace_profile_command_fn (0, 0, 0);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(clear_trace_profile_command) =
{
.path = "clear ioam-trace profile",
.short_help = "clear ioam-trace profile [<index>|all]",
.function = clear_trace_profile_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
set_trace_profile_command_fn (vlib_main_t * vm,
@@ -137,7 +135,6 @@ set_trace_profile_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_trace_profile_command, static) =
{
.path = "set ioam-trace profile",
@@ -146,7 +143,6 @@ VLIB_CLI_COMMAND (set_trace_profile_command, static) =
node-id <node id in hex> app-data <app_data in hex>",
.function = set_trace_profile_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_trace_profile_command_fn (vlib_main_t * vm,
@@ -189,14 +185,12 @@ show_trace_profile_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_trace_profile_command, static) =
{
.path = "show ioam-trace profile",
.short_help = "show ioam-trace profile",
.function = show_trace_profile_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ioam/lib-trace/trace_util.h b/src/plugins/ioam/lib-trace/trace_util.h
index 61f18d9173a..869ea717cf8 100644
--- a/src/plugins/ioam/lib-trace/trace_util.h
+++ b/src/plugins/ioam/lib-trace/trace_util.h
@@ -75,14 +75,12 @@ int trace_profile_create (trace_profile * profile, u8 trace_type, u8 num_elts,
void clear_trace_profiles (void);
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
u8 ioam_trace_type;
u8 data_list_elts_left;
u32 elts[0]; /* Variable type. So keep it generic */
}) ioam_trace_hdr_t;
-/* *INDENT-ON* */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c
index 87e57d3605e..801faa98066 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_decap.c
@@ -193,7 +193,6 @@ vxlan_gpe_decap_ioam_v4 (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan_gpe_decap_ioam_v4_node) = {
.function = vxlan_gpe_decap_ioam_v4,
.name = "vxlan-gpe-decap-ioam-v4",
@@ -211,7 +210,6 @@ VLIB_REGISTER_NODE (vxlan_gpe_decap_ioam_v4_node) = {
[VXLAN_GPE_DECAP_IOAM_V4_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c
index 1d15654464d..de375df4f7c 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_encap.c
@@ -164,7 +164,6 @@ vxlan_gpe_encap_ioam_v4 (vlib_main_t * vm,
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan_gpe_encap_ioam_v4_node) = {
.function = vxlan_gpe_encap_ioam_v4,
.name = "vxlan-gpe-encap-ioam-v4",
@@ -182,7 +181,6 @@ VLIB_REGISTER_NODE (vxlan_gpe_encap_ioam_v4_node) = {
[VXLAN_GPE_ENCAP_IOAM_V4_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c
index 84900eb7e01..2fa0aa29450 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_pop.c
@@ -321,7 +321,6 @@ vxlan_gpe_pop_ioam_v4 (vlib_main_t * vm,
return vxlan_gpe_pop_ioam (vm, node, from_frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan_gpe_pop_ioam_v4_node) = {
.function = vxlan_gpe_pop_ioam_v4,
.name = "vxlan-gpe-pop-ioam-v4",
@@ -340,7 +339,6 @@ VLIB_REGISTER_NODE (vxlan_gpe_pop_ioam_v4_node) = {
#undef _
},
};
-/* *INDENT-ON* */
diff --git a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c
index 215f14b74bc..e3c82725e26 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/ioam_transit.c
@@ -51,14 +51,12 @@ typedef enum
} vxlan_gpe_transit_ioam_next_t;
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (vxlan_gpe_transit_ioam, static) =
{
.arc_name = "ip4-output",
.node_name = "vxlan-gpe-transit-ioam",
.runs_before = VNET_FEATURES ("interface-output"),
};
-/* *INDENT-ON* */
static uword
vxlan_gpe_transit_ioam (vlib_main_t * vm,
@@ -156,7 +154,6 @@ vxlan_gpe_transit_ioam (vlib_main_t * vm,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan_gpe_transit_ioam_node) = {
.function = vxlan_gpe_transit_ioam,
.name = "vxlan-gpe-transit-ioam",
@@ -175,7 +172,6 @@ VLIB_REGISTER_NODE (vxlan_gpe_transit_ioam_node) = {
},
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c
index 9f6d181f0b1..d61832d975a 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_api.c
@@ -27,6 +27,7 @@
#include <vlibmemory/api.h>
#include <vnet/format_fns.h>
#include <vnet/ip/ip_types_api.h>
+#include <vnet/udp/udp_local.h>
/* define message IDs */
#include <ioam/lib-vxlan-gpe/ioam_vxlan_gpe.api_enum.h>
@@ -92,7 +93,7 @@ static void vl_api_vxlan_gpe_ioam_vni_enable_t_handler
clib_memcpy (&key4.remote, &mp->remote.un.ip4, sizeof (key4.remote));
vni = clib_net_to_host_u32 (mp->vni);
key4.vni = clib_host_to_net_u32 (vni << 8);
- key4.pad = 0;
+ key4.port = clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE);
p = hash_get_mem (gm->vxlan4_gpe_tunnel_by_key, &key4);
}
@@ -141,7 +142,7 @@ static void vl_api_vxlan_gpe_ioam_vni_disable_t_handler
clib_memcpy (&key4.remote, &mp->remote, sizeof (key4.remote));
vni = clib_net_to_host_u32 (mp->vni);
key4.vni = clib_host_to_net_u32 (vni << 8);
- key4.pad = 0;
+ key4.port = clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE);
p = hash_get_mem (gm->vxlan4_gpe_tunnel_by_key, &key4);
}
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c
index 108b0c0765b..327afc3fb61 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam.c
@@ -19,6 +19,7 @@
#include <vnet/dpo/load_balance.h>
#include <vnet/fib/ip4_fib.h>
#include <vnet/fib/fib_entry.h>
+#include <vnet/udp/udp_local.h>
vxlan_gpe_ioam_main_t vxlan_gpe_ioam_main;
@@ -478,7 +479,7 @@ vxlan_gpe_set_ioam_rewrite_command_fn (vlib_main_t *
key4.local = local.ip4.as_u32;
key4.remote = remote.ip4.as_u32;
key4.vni = clib_host_to_net_u32 (vni << 8);
- key4.pad = 0;
+ key4.port = clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE);
p = hash_get_mem (gm->vxlan4_gpe_tunnel_by_key, &key4);
}
else
@@ -488,6 +489,7 @@ vxlan_gpe_set_ioam_rewrite_command_fn (vlib_main_t *
key6.remote.as_u64[0] = remote.ip6.as_u64[0];
key6.remote.as_u64[1] = remote.ip6.as_u64[1];
key6.vni = clib_host_to_net_u32 (vni << 8);
+ key6.port = clib_host_to_net_u16 (UDP_DST_PORT_VXLAN6_GPE);
p = hash_get_mem (gm->vxlan6_gpe_tunnel_by_key, &key6);
}
@@ -508,13 +510,11 @@ vxlan_gpe_set_ioam_rewrite_command_fn (vlib_main_t *
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vxlan_gpe_set_ioam_rewrite_cmd, static) = {
.path = "set vxlan-gpe-ioam",
.short_help = "set vxlan-gpe-ioam vxlan <src-ip> <dst_ip> <vnid> [disable]",
.function = vxlan_gpe_set_ioam_rewrite_command_fn,
};
-/* *INDENT-ON* */
@@ -590,13 +590,11 @@ vxlan_gpe_set_ioam_flags_command_fn (vlib_main_t * vm,
return rv;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vxlan_gpe_set_ioam_flags_cmd, static) =
{
.path = "set vxlan-gpe-ioam rewrite",
.short_help = "set vxlan-gpe-ioam [trace] [pot] [ppc <encap|decap>]",
.function = vxlan_gpe_set_ioam_flags_command_fn,};
-/* *INDENT-ON* */
int vxlan_gpe_ioam_disable_for_dest
@@ -681,13 +679,11 @@ static clib_error_t *vxlan_gpe_set_ioam_transit_rewrite_command_fn
return rv;
}
- /* *INDENT-OFF* */
VLIB_CLI_COMMAND (vxlan_gpe_set_ioam_transit_rewrite_cmd, static) = {
.path = "set vxlan-gpe-ioam-transit",
.short_help = "set vxlan-gpe-ioam-transit dst-ip <dst_ip> [outer-fib-index <outer_fib_index>] [disable]",
.function = vxlan_gpe_set_ioam_transit_rewrite_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *clear_vxlan_gpe_ioam_rewrite_command_fn
(vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
@@ -695,14 +691,12 @@ clib_error_t *clear_vxlan_gpe_ioam_rewrite_command_fn
return (vxlan_gpe_ioam_disable (0, 0, 0));
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vxlan_gpe_clear_ioam_flags_cmd, static) =
{
.path = "clear vxlan-gpe-ioam rewrite",
.short_help = "clear vxlan-gpe-ioam rewrite",
.function = clear_vxlan_gpe_ioam_rewrite_command_fn,
};
-/* *INDENT-ON* */
/**
@@ -749,7 +743,8 @@ void
vxlan_gpe_ioam_interface_init (void)
{
vxlan_gpe_ioam_main_t *hm = &vxlan_gpe_ioam_main;
- hm->fib_entry_type = fib_node_register_new_type (&vxlan_gpe_ioam_vft);
+ hm->fib_entry_type =
+ fib_node_register_new_type ("vxlan-gpe", &vxlan_gpe_ioam_vft);
return;
}
diff --git a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c
index 1a37059396c..9c783c747d0 100644
--- a/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c
+++ b/src/plugins/ioam/lib-vxlan-gpe/vxlan_gpe_ioam_trace.c
@@ -39,14 +39,12 @@ typedef union
} time_u64_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
vxlan_gpe_ioam_option_t hdr;
u8 ioam_trace_type;
u8 data_list_elts_left;
u32 elts[0]; /* Variable type. So keep it generic */
}) vxlan_gpe_ioam_trace_option_t;
-/* *INDENT-ON* */
#define foreach_vxlan_gpe_ioam_trace_stats \
@@ -422,13 +420,11 @@ vxlan_gpe_show_ioam_trace_cmd_fn (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vxlan_gpe_show_ioam_trace_cmd, static) = {
.path = "show ioam vxlan-gpe trace",
.short_help = "iOAM trace statistics",
.function = vxlan_gpe_show_ioam_trace_cmd_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -459,13 +455,11 @@ vxlan_gpe_ioam_trace_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (vxlan_gpe_ioam_trace_init) =
{
.runs_after = VLIB_INITS("ip_main_init", "ip6_lookup_init",
"vxlan_gpe_init"),
};
-/* *INDENT-ON* */
int
diff --git a/src/plugins/ioam/udp-ping/udp_ping_export.c b/src/plugins/ioam/udp-ping/udp_ping_export.c
index 3e835989a6f..78d62233a7d 100644
--- a/src/plugins/ioam/udp-ping/udp_ping_export.c
+++ b/src/plugins/ioam/udp-ping/udp_ping_export.c
@@ -23,20 +23,18 @@
#define UDP_PING_EXPORT_RECORD_SIZE 400
static u8 *
-udp_ping_template_rewrite (flow_report_main_t * frm, flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address, u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index)
+udp_ping_template_rewrite (ipfix_exporter_t *exp, flow_report_t *fr,
+ u16 collector_port, ipfix_report_element_t *elts,
+ u32 n_elts, u32 *stream_index)
{
- return ioam_template_rewrite (frm, fr, collector_address,
- src_address, collector_port, elts, n_elts,
+ return ioam_template_rewrite (exp, fr, collector_port, elts, n_elts,
stream_index);
}
static vlib_frame_t *
-udp_ping_send_flows (flow_report_main_t * frm, flow_report_t * fr,
- vlib_frame_t * f, u32 * to_next, u32 node_index)
+udp_ping_send_flows (flow_report_main_t *frm, ipfix_exporter_t *exp,
+ flow_report_t *fr, vlib_frame_t *f, u32 *to_next,
+ u32 node_index)
{
vlib_buffer_t *b0 = NULL;
u32 next_offset = 0;
@@ -47,17 +45,16 @@ udp_ping_send_flows (flow_report_main_t * frm, flow_report_t * fr,
ipfix_set_header_t *s = NULL;
ip4_header_t *ip;
udp_header_t *udp;
- u32 records_this_buffer;
u16 new_l0, old_l0;
ip_csum_t sum0;
- vlib_main_t *vm = frm->vlib_main;
+ vlib_main_t *vm = vlib_get_main ();
flow_report_stream_t *stream;
udp_ping_flow_data *stats;
ip46_udp_ping_flow *ip46_flow;
u16 src_port, dst_port;
u16 data_len;
- stream = &frm->streams[fr->stream_index];
+ stream = &exp->streams[fr->stream_index];
data_len = vec_len (udp_ping_main.ip46_flow);
for (i = 0; i < data_len; i++)
@@ -101,7 +98,6 @@ udp_ping_send_flows (flow_report_main_t * frm, flow_report_t * fr,
h->sequence_number =
clib_host_to_net_u32 (h->sequence_number);
next_offset = (u32) (((u8 *) (s + 1)) - (u8 *) tp);
- records_this_buffer = 0;
}
next_offset = ioam_analyse_add_ipfix_record (fr,
@@ -117,10 +113,8 @@ udp_ping_send_flows (flow_report_main_t * frm, flow_report_t * fr,
//memcpy (b0->data + next_offset, &pak_sent, sizeof(u32));
//next_offset += sizeof(u32);
- records_this_buffer++;
-
/* Flush data if packet len is about to reach path mtu */
- if (next_offset > (frm->path_mtu - UDP_PING_EXPORT_RECORD_SIZE))
+ if (next_offset > (exp->path_mtu - UDP_PING_EXPORT_RECORD_SIZE))
{
b0->current_length = next_offset;
b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
@@ -230,7 +224,7 @@ udp_ping_flow_create (u8 del)
vnet_flow_report_add_del_args_t args;
int rv;
u32 domain_id = 0;
- flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = &flow_report_main.exporters[0];
u16 template_id;
clib_memset (&args, 0, sizeof (args));
@@ -240,7 +234,7 @@ udp_ping_flow_create (u8 del)
args.domain_id = domain_id;
args.src_port = UDP_DST_PORT_ipfix;
- rv = vnet_flow_report_add_del (frm, &args, &template_id);
+ rv = vnet_flow_report_add_del (exp, &args, &template_id);
switch (rv)
{
@@ -281,13 +275,11 @@ set_udp_ping_export_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_udp_ping_export_command, static) = {
.path = "set udp-ping export-ipfix",
.short_help = "set udp-ping export-ipfix [disable]",
.function = set_udp_ping_export_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
udp_ping_flow_report_init (vlib_main_t * vm)
@@ -295,12 +287,10 @@ udp_ping_flow_report_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (udp_ping_flow_report_init) =
{
.runs_after = VLIB_INITS ("flow_report_init"),
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/ioam/udp-ping/udp_ping_node.c b/src/plugins/ioam/udp-ping/udp_ping_node.c
index 6bfa8f67ef5..fbc3b13971c 100644
--- a/src/plugins/ioam/udp-ping/udp_ping_node.c
+++ b/src/plugins/ioam/udp-ping/udp_ping_node.c
@@ -93,14 +93,12 @@ format_udp_ping_trace (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp_ping_node, static) =
{
.function = udp_ping_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "udp-ping-process",
};
-/* *INDENT-ON* */
void
udp_ping_calculate_timer_interval (void)
@@ -301,7 +299,6 @@ set_udp_ping_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_udp_ping_command, static) =
{
.path = "set udp-ping",
@@ -312,7 +309,6 @@ VLIB_CLI_COMMAND (set_udp_ping_command, static) =
[disable]",
.function = set_udp_ping_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_udp_ping_summary_cmd_fn (vlib_main_t * vm,
@@ -374,14 +370,12 @@ show_udp_ping_summary_cmd_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_udp_ping_cmd, static) =
{
.path = "show udp-ping summary",
.short_help = "Summary of udp-ping",
.function = show_udp_ping_summary_cmd_fn,
};
-/* *INDENT-ON* */
/**
* @brief UDP-Ping Process node.
@@ -790,7 +784,6 @@ udp_ping_local_node_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
/*
* Node for udp-ping-local
*/
@@ -814,7 +807,6 @@ VLIB_REGISTER_NODE (udp_ping_local, static) =
[UDP_PING_NEXT_IP6_DROP] = "ip6-drop",
},
};
-/* *INDENT-ON* */
static clib_error_t *
udp_ping_init (vlib_main_t * vm)
@@ -828,12 +820,10 @@ udp_ping_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (udp_ping_init) =
{
.runs_after = VLIB_INITS("ip_main_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ip_session_redirect/CMakeLists.txt b/src/plugins/ip_session_redirect/CMakeLists.txt
new file mode 100644
index 00000000000..09b93d72759
--- /dev/null
+++ b/src/plugins/ip_session_redirect/CMakeLists.txt
@@ -0,0 +1,27 @@
+# Copyright (c) 2021-2022 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(ip_session_redirect
+ SOURCES
+ api.c
+ redirect.c
+
+ API_FILES
+ ip_session_redirect.api
+
+ API_TEST_SOURCES
+ test_api.c
+
+ INSTALL_HEADERS
+ ip_session_redirect.h
+)
diff --git a/src/plugins/ip_session_redirect/FEATURE.yaml b/src/plugins/ip_session_redirect/FEATURE.yaml
new file mode 100644
index 00000000000..d5cca4673d6
--- /dev/null
+++ b/src/plugins/ip_session_redirect/FEATURE.yaml
@@ -0,0 +1,9 @@
+---
+name: IP session redirect
+maintainer: Benoît Ganne <bganne@cisco.com>
+features:
+ - use the classifier ACL infrastructure to redirect sessions via arbitrary
+ fib paths
+description: "IP session redirect plugin"
+state: experimental
+properties: [CLI, STATS, MULTITHREAD, API]
diff --git a/src/plugins/ip_session_redirect/api.c b/src/plugins/ip_session_redirect/api.c
new file mode 100644
index 00000000000..1d17d55b5b4
--- /dev/null
+++ b/src/plugins/ip_session_redirect/api.c
@@ -0,0 +1,124 @@
+/* Copyright (c) 2021-2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+
+#include <vlib/vlib.h>
+#include <vnet/fib/fib_api.h>
+#include <vnet/ip/ip_format_fns.h>
+#include <vlibmemory/api.h>
+#include <vlibapi/api.h>
+
+#define REPLY_MSG_ID_BASE vl_api_ip_sesion_redirect_msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+#include "ip_session_redirect.api_enum.h"
+#include "ip_session_redirect.api_types.h"
+
+#include "ip_session_redirect.h"
+
+static u16 vl_api_ip_sesion_redirect_msg_id_base;
+
+static int
+vl_api_ip_session_redirect_add (u32 table_index, u32 opaque_index,
+ vl_api_fib_path_nh_proto_t proto, int is_punt,
+ u8 *match, int match_len,
+ vl_api_fib_path_t *paths, int n_paths)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ fib_route_path_t *paths_ = 0;
+ dpo_proto_t proto_;
+ u8 *match_ = 0;
+ int rv = 0;
+
+ if (n_paths <= 0)
+ {
+ rv = VNET_API_ERROR_NO_PATHS_IN_ROUTE;
+ goto err0;
+ }
+
+ for (int i = 0; i < n_paths; i++)
+ {
+ fib_route_path_t path;
+ if ((rv = fib_api_path_decode (&paths[i], &path)))
+ goto err1;
+ vec_add1 (paths_, path);
+ }
+
+ if (~0 == proto)
+ proto_ = paths_[0].frp_proto;
+ else
+ fib_api_path_nh_proto_to_dpo (ntohl (proto), &proto_);
+
+ vec_add (match_, match, match_len);
+ rv = ip_session_redirect_add (vm, ntohl (table_index), ntohl (opaque_index),
+ proto_, is_punt, match_, paths_);
+ vec_free (match_);
+
+err1:
+ vec_free (paths_);
+err0:
+ return rv;
+}
+
+static void
+vl_api_ip_session_redirect_add_t_handler (vl_api_ip_session_redirect_add_t *mp)
+{
+ vl_api_ip_session_redirect_add_reply_t *rmp;
+ int rv = vl_api_ip_session_redirect_add (
+ mp->table_index, mp->opaque_index, ~0 /* proto */, mp->is_punt, mp->match,
+ mp->match_len, mp->paths, mp->n_paths);
+ REPLY_MACRO (VL_API_IP_SESSION_REDIRECT_ADD_REPLY)
+}
+
+static void
+vl_api_ip_session_redirect_add_v2_t_handler (
+ vl_api_ip_session_redirect_add_v2_t *mp)
+{
+ vl_api_ip_session_redirect_add_v2_reply_t *rmp;
+ int rv = vl_api_ip_session_redirect_add (
+ mp->table_index, mp->opaque_index, mp->proto, mp->is_punt, mp->match,
+ mp->match_len, mp->paths, mp->n_paths);
+ REPLY_MACRO (VL_API_IP_SESSION_REDIRECT_ADD_V2_REPLY)
+}
+
+static void
+vl_api_ip_session_redirect_del_t_handler (vl_api_ip_session_redirect_del_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_ip_session_redirect_del_reply_t *rmp;
+ u8 *match = 0;
+ int rv;
+
+ vec_add (match, mp->match, mp->match_len);
+ rv = ip_session_redirect_del (vm, ntohl (mp->table_index), match);
+ vec_free (match);
+
+ REPLY_MACRO (VL_API_IP_SESSION_REDIRECT_DEL_REPLY);
+}
+
+#include "ip_session_redirect.api.c"
+static clib_error_t *
+ip_session_redirect_plugin_api_hookup (vlib_main_t *vm)
+{
+ vl_api_ip_sesion_redirect_msg_id_base = setup_message_id_table ();
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (ip_session_redirect_plugin_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ip_session_redirect/ip_session_redirect.api b/src/plugins/ip_session_redirect/ip_session_redirect.api
new file mode 100644
index 00000000000..2bf2373dbd2
--- /dev/null
+++ b/src/plugins/ip_session_redirect/ip_session_redirect.api
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2021-2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option version = "0.3.0";
+import "vnet/interface_types.api";
+import "vnet/fib/fib_types.api";
+
+/** \brief Add or update a session redirection
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param table_index - classifier table index
+ @param opaque_index - classifier session opaque index
+ @param match_len - classifier session match length in bytes (max is 80-bytes)
+ @param match - classifier session match
+ @param is_punt - true = punted traffic, false = forwarded traffic
+ @param n_paths - number of paths
+ @param paths - the paths of the redirect
+*/
+
+autoreply define ip_session_redirect_add
+{
+ option deprecated;
+ u32 client_index;
+ u32 context;
+
+ u32 table_index;
+ u8 match_len;
+ u8 match[80];
+ u32 opaque_index [default=0xffffffff];
+ bool is_punt;
+ u8 n_paths;
+ vl_api_fib_path_t paths[n_paths];
+
+ option vat_help = "table <index> match <match> via <path>";
+ option status="in_progress";
+};
+
+/** \brief Add or update a session redirection - version 2
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param table_index - classifier table index
+ @param opaque_index - classifier session opaque index
+ @param proto - protocol of forwarded packets (default autodetect from path nh)
+ @param is_punt - true = punted traffic, false = forwarded traffic
+ @param match_len - classifier session match length in bytes (max is 80-bytes)
+ @param match - classifier session match
+ @param n_paths - number of paths
+ @param paths - the paths of the redirect
+*/
+
+autoreply define ip_session_redirect_add_v2
+{
+ u32 client_index;
+ u32 context;
+
+ u32 table_index;
+ u32 opaque_index [default=0xffffffff];
+ vl_api_fib_path_nh_proto_t proto [default=0xffffffff];
+ bool is_punt;
+ u8 match_len;
+ u8 match[80];
+ u8 n_paths;
+ vl_api_fib_path_t paths[n_paths];
+
+ option vat_help = "table <index> match <match> via <path>";
+ option status="in_progress";
+};
+
+/** \brief Delete a session redirection
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param table_index - classifier table index
+ @param match_len - classifier session match length in bytes (max is 80-bytes)
+ @param match - classifier session match
+*/
+
+autoreply define ip_session_redirect_del
+{
+ u32 client_index;
+ u32 context;
+
+ u32 table_index;
+ u8 match_len;
+ u8 match[match_len];
+
+ option vat_help = "session-index <index> table <index> match <match>";
+ option status="in_progress";
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ip_session_redirect/ip_session_redirect.h b/src/plugins/ip_session_redirect/ip_session_redirect.h
new file mode 100644
index 00000000000..45f64eebba1
--- /dev/null
+++ b/src/plugins/ip_session_redirect/ip_session_redirect.h
@@ -0,0 +1,33 @@
+/* Copyright (c) 2021-2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+
+#ifndef IP_SESSION_REDIRECT_H_
+#define IP_SESSION_REDIRECT_H_
+
+#include <vnet/fib/fib_node.h>
+
+int ip_session_redirect_add (vlib_main_t *vm, u32 table_index,
+ u32 opaque_index, dpo_proto_t proto, int is_punt,
+ const u8 *match, const fib_route_path_t *rpaths);
+int ip_session_redirect_del (vlib_main_t *vm, u32 table_index,
+ const u8 *match);
+
+#endif /* IP_SESSION_REDIRECT_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ip_session_redirect/ip_session_redirect_doc.rst b/src/plugins/ip_session_redirect/ip_session_redirect_doc.rst
new file mode 100644
index 00000000000..aad87166f8f
--- /dev/null
+++ b/src/plugins/ip_session_redirect/ip_session_redirect_doc.rst
@@ -0,0 +1,42 @@
+IP session redirect
+===================
+
+This plugin allows to steer packet via different paths based on the
+classifier.
+It leverages the VPP classifier ACL infrastructure (classifier, in_out_acl
+etc), extending its capabilities to redirect traffic without having to
+resort on additional VRFs.
+It also allows to steer punted packets using the same mechanism.
+
+Maturity level
+--------------
+
+Under development: it should work, but has not been thoroughly tested.
+
+Features
+--------
+
+- steer regular or/and punt traffic using the classifier
+- API
+
+Quickstart
+----------
+
+1. configure punting
+
+::
+
+ ~# vppctl set punt ipv4 udp all
+
+2. create the classifier table and uses it for punt ACL
+
+::
+
+ ~# vppctl classify table miss-next drop mask l3 ip4 src l4 udp src_port buckets 100000
+ ~# vppctl set interface input acl intfc local0 ip4-punt-table 0
+
+3. add session to steer punted packets
+
+::
+
+ ~# vppctl ip session redirect table 0 match l3 ip4 src 10.10.10.10 l4 src_port 1234 via 10.10.0.10 pg1
diff --git a/src/plugins/ip_session_redirect/punt_redirect.vpp b/src/plugins/ip_session_redirect/punt_redirect.vpp
new file mode 100644
index 00000000000..e3594cd71d9
--- /dev/null
+++ b/src/plugins/ip_session_redirect/punt_redirect.vpp
@@ -0,0 +1,48 @@
+create packet-generator interface pg0
+set int ip addr pg0 10.10.10.1/24
+
+create packet-generator interface pg1
+set int ip addr pg1 10.10.0.1/24
+set ip neighbor pg1 10.10.0.10 4.5.6
+
+set punt ipv4 udp all
+
+classify table miss-next drop mask l3 ip4 src l4 udp src_port buckets 100000
+set interface input acl intfc local0 ip4-punt-table 0
+ip session redirect punt table 0 match l3 ip4 src 10.10.10.10 l4 src_port 1234 via 10.10.0.10 pg1
+
+set int st pg0 up
+set int st pg1 up
+
+comment { punt because of no udp listener for 53667, redirected }
+packet-generator new { \
+ name ok \
+ limit 1 \
+ node ethernet-input \
+ source pg0 \
+ size 100-100 \
+ data { \
+ IP4: 5.6.7 -> 2.3.4 \
+ UDP: 10.10.10.10 -> 10.10.10.1 \
+ UDP: 1234 -> 53667 \
+ incrementing 1 \
+ } \
+}
+
+comment { punt because of no udp listener for 53668, dropped }
+packet-generator new { \
+ name nok \
+ limit 1 \
+ node ethernet-input \
+ source pg0 \
+ size 100-100 \
+ data { \
+ IP4: 5.6.7 -> 2.3.4 \
+ UDP: 10.10.10.10 -> 10.10.10.1 \
+ UDP: 1235 -> 53668 \
+ incrementing 1 \
+ } \
+}
+
+trace add pg-input 10
+pa en
diff --git a/src/plugins/ip_session_redirect/redirect.c b/src/plugins/ip_session_redirect/redirect.c
new file mode 100644
index 00000000000..ea18182e309
--- /dev/null
+++ b/src/plugins/ip_session_redirect/redirect.c
@@ -0,0 +1,463 @@
+/* Copyright (c) 2021-2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+#include <vlib/vlib.h>
+#include <vnet/fib/fib_path_list.h>
+#include <vnet/classify/vnet_classify.h>
+#include <vnet/classify/in_out_acl.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include "ip_session_redirect.h"
+
+typedef struct
+{
+ u8 *match_and_table_index;
+ dpo_id_t dpo; /* forwarding dpo */
+ fib_node_t node; /* linkage into the FIB graph */
+ fib_node_index_t pl;
+ u32 sibling;
+ u32 parent_node_index;
+ u32 opaque_index;
+ u32 table_index;
+ fib_forward_chain_type_t payload_type;
+ u8 is_punt : 1;
+ u8 is_ip6 : 1;
+} ip_session_redirect_t;
+
+typedef struct
+{
+ ip_session_redirect_t *pool;
+ u32 *session_by_match_and_table_index;
+ fib_node_type_t fib_node_type;
+} ip_session_redirect_main_t;
+
+static ip_session_redirect_main_t ip_session_redirect_main;
+
+static int
+ip_session_redirect_stack (ip_session_redirect_t *ipr)
+{
+ dpo_id_t dpo = DPO_INVALID;
+
+ fib_path_list_contribute_forwarding (ipr->pl, ipr->payload_type,
+ fib_path_list_is_popular (ipr->pl) ?
+ FIB_PATH_LIST_FWD_FLAG_NONE :
+ FIB_PATH_LIST_FWD_FLAG_COLLAPSE,
+ &dpo);
+ dpo_stack_from_node (ipr->parent_node_index, &ipr->dpo, &dpo);
+ dpo_reset (&dpo);
+
+ /* update session with new next_index */
+ return vnet_classify_add_del_session (
+ &vnet_classify_main, ipr->table_index, ipr->match_and_table_index,
+ ipr->dpo.dpoi_next_node /* hit_next_index */, ipr->opaque_index,
+ 0 /* advance */, CLASSIFY_ACTION_SET_METADATA,
+ ipr->dpo.dpoi_index /* metadata */, 1 /* is_add */);
+}
+
+static ip_session_redirect_t *
+ip_session_redirect_find (ip_session_redirect_main_t *im, u32 table_index,
+ const u8 *match)
+{
+ /* we are adding the table index at the end of the match string so we
+ * can disambiguiate identical matches in different tables in
+ * im->session_by_match_and_table_index */
+ u8 *match_and_table_index = vec_dup (match);
+ vec_add (match_and_table_index, (void *) &table_index, 4);
+ uword *p =
+ hash_get_mem (im->session_by_match_and_table_index, match_and_table_index);
+ vec_free (match_and_table_index);
+ if (!p)
+ return 0;
+ return pool_elt_at_index (im->pool, p[0]);
+}
+
+__clib_export int
+ip_session_redirect_add (vlib_main_t *vm, u32 table_index, u32 opaque_index,
+ dpo_proto_t proto, int is_punt, const u8 *match,
+ const fib_route_path_t *rpaths)
+{
+ ip_session_redirect_main_t *im = &ip_session_redirect_main;
+ fib_forward_chain_type_t payload_type;
+ ip_session_redirect_t *ipr;
+ const char *pname;
+
+ payload_type = fib_forw_chain_type_from_dpo_proto (proto);
+ switch (payload_type)
+ {
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
+ pname = is_punt ? "ip4-punt-acl" : "ip4-inacl";
+ break;
+ case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
+ pname = is_punt ? "ip6-punt-acl" : "ip6-inacl";
+ break;
+ default:
+ return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
+ }
+
+ ipr = ip_session_redirect_find (im, table_index, match);
+ if (ipr)
+ {
+ /* update to an existing session */
+ fib_path_list_child_remove (ipr->pl, ipr->sibling);
+ dpo_reset (&ipr->dpo);
+ }
+ else
+ {
+ /* allocate a new entry */
+ pool_get (im->pool, ipr);
+ fib_node_init (&ipr->node, im->fib_node_type);
+ ipr->match_and_table_index = vec_dup ((u8 *) match);
+ /* we are adding the table index at the end of the match string so we
+ * can disambiguiate identical matches in different tables in
+ * im->session_by_match_and_table_index */
+ vec_add (ipr->match_and_table_index, (void *) &table_index, 4);
+ ipr->table_index = table_index;
+ hash_set_mem (im->session_by_match_and_table_index,
+ ipr->match_and_table_index, ipr - im->pool);
+ }
+
+ ipr->payload_type = payload_type;
+ ipr->pl = fib_path_list_create (
+ FIB_PATH_LIST_FLAG_SHARED | FIB_PATH_LIST_FLAG_NO_URPF, rpaths);
+ ipr->sibling =
+ fib_path_list_child_add (ipr->pl, im->fib_node_type, ipr - im->pool);
+ ipr->parent_node_index = vlib_get_node_by_name (vm, (u8 *) pname)->index;
+ ipr->opaque_index = opaque_index;
+ ipr->is_punt = is_punt;
+ ipr->is_ip6 = payload_type == FIB_FORW_CHAIN_TYPE_UNICAST_IP6;
+
+ return ip_session_redirect_stack (ipr);
+}
+
+__clib_export int
+ip_session_redirect_del (vlib_main_t *vm, u32 table_index, const u8 *match)
+{
+ ip_session_redirect_main_t *im = &ip_session_redirect_main;
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ ip_session_redirect_t *ipr;
+ int rv;
+
+ ipr = ip_session_redirect_find (im, table_index, match);
+ if (!ipr)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ rv = vnet_classify_add_del_session (
+ cm, ipr->table_index, ipr->match_and_table_index, 0 /* hit_next_index */,
+ 0 /* opaque_index */, 0 /* advance */, 0 /* action */, 0 /* metadata */,
+ 0 /* is_add */);
+ if (rv)
+ return rv;
+
+ hash_unset_mem (im->session_by_match_and_table_index,
+ ipr->match_and_table_index);
+ vec_free (ipr->match_and_table_index);
+ fib_path_list_child_remove (ipr->pl, ipr->sibling);
+ dpo_reset (&ipr->dpo);
+ pool_put (im->pool, ipr);
+ return 0;
+}
+
+static int
+ip_session_redirect_show_yield (vlib_main_t *vm, f64 *start)
+{
+ /* yields for 2 clock ticks every 1 tick to avoid blocking the main thread
+ * when dumping huge data structures */
+ f64 now = vlib_time_now (vm);
+ if (now - *start > 11e-6)
+ {
+ vlib_process_suspend (vm, 21e-6);
+ *start = vlib_time_now (vm);
+ return 1;
+ }
+
+ return 0;
+}
+
+static u8 *
+format_ip_session_redirect (u8 *s, va_list *args)
+{
+ const ip_session_redirect_main_t *im = &ip_session_redirect_main;
+ const ip_session_redirect_t *ipr =
+ va_arg (*args, const ip_session_redirect_t *);
+ index_t ipri = ipr - im->pool;
+ const char *type = ipr->is_punt ? "[punt]" : "[acl]";
+ const char *ip = ipr->is_ip6 ? "[ip6]" : "[ip4]";
+ s =
+ format (s, "[%u] %s %s table %d key %U opaque_index 0x%x\n", ipri, type,
+ ip, ipr->table_index, format_hex_bytes, ipr->match_and_table_index,
+ vec_len (ipr->match_and_table_index) - 4, ipr->opaque_index);
+ s = format (s, " via:\n");
+ s = format (s, " %U", format_fib_path_list, ipr->pl, 2);
+ s = format (s, " forwarding\n");
+ s = format (s, " %U", format_dpo_id, &ipr->dpo, 0);
+ return s;
+}
+
+static clib_error_t *
+ip_session_redirect_show_cmd (vlib_main_t *vm, unformat_input_t *main_input,
+ vlib_cli_command_t *cmd)
+{
+ ip_session_redirect_main_t *im = &ip_session_redirect_main;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ ip_session_redirect_t *ipr;
+ clib_error_t *error = 0;
+ u32 table_index = ~0;
+ int is_punt = -1;
+ int is_ip6 = -1;
+ u8 *match = 0;
+ int max = 50;
+ u8 *s = 0;
+
+ if (unformat_is_eof (main_input))
+ unformat_init (line_input, 0,
+ 0); /* support straight "sh ip session redirect" */
+ else if (!unformat_user (main_input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "all"))
+ ;
+ else if (unformat (line_input, "punt"))
+ is_punt = 1;
+ else if (unformat (line_input, "acl"))
+ is_punt = 0;
+ else if (unformat (line_input, "ip4"))
+ is_ip6 = 0;
+ else if (unformat (line_input, "ip6"))
+ is_ip6 = 1;
+ else if (unformat (line_input, "table %u", &table_index))
+ ;
+ else if (unformat (line_input, "match %U", unformat_classify_match, cm,
+ &match, table_index))
+ ;
+ else if (unformat (line_input, "max %d", &max))
+ ;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto out;
+ }
+ }
+
+ if (match)
+ {
+ ipr = ip_session_redirect_find (im, table_index, match);
+ if (!ipr)
+ vlib_cli_output (vm, "none");
+ else
+ vlib_cli_output (vm, "%U", format_ip_session_redirect, ipr);
+ }
+ else
+ {
+ f64 start = vlib_time_now (vm);
+ ip_session_redirect_t *iprs = im->pool;
+ int n = 0;
+ pool_foreach (ipr, iprs)
+ {
+ if (n >= max)
+ {
+ n = -1; /* signal overflow */
+ break;
+ }
+ if ((~0 == table_index || ipr->table_index == table_index) &&
+ (-1 == is_punt || ipr->is_punt == is_punt) &&
+ (-1 == is_ip6 || ipr->is_ip6 == is_ip6))
+ {
+ s = format (s, "%U\n", format_ip_session_redirect, ipr);
+ n++;
+ }
+ if (ip_session_redirect_show_yield (vm, &start))
+ {
+ /* we must reload the pool as it might have moved */
+ u32 ii = ipr - iprs;
+ iprs = im->pool;
+ ipr = iprs + ii;
+ }
+ }
+ vec_add1 (s, 0);
+ vlib_cli_output (vm, (char *) s);
+ vec_free (s);
+ if (-1 == n)
+ {
+ vlib_cli_output (
+ vm,
+ "\nPlease note: only the first %d entries displayed. "
+ "To display more, specify max.",
+ max);
+ }
+ }
+
+out:
+ vec_free (match);
+ unformat_free (line_input);
+ return error;
+}
+
+VLIB_CLI_COMMAND (ip_session_redirect_show_command, static) = {
+ .path = "show ip session redirect",
+ .function = ip_session_redirect_show_cmd,
+ .short_help = "show ip session redirect [all|[table <table-index>] "
+ "[punt|acl] [ip4|ip6] [match]]",
+};
+
+static clib_error_t *
+ip_session_redirect_cmd (vlib_main_t *vm, unformat_input_t *main_input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ dpo_proto_t proto = DPO_PROTO_IP4;
+ fib_route_path_t *rpaths = 0, rpath;
+ clib_error_t *error = 0;
+ u32 opaque_index = ~0;
+ u32 table_index = ~0;
+ int is_punt = 0;
+ int is_add = 1;
+ u8 *match = 0;
+ int rv;
+
+ if (!unformat_user (main_input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del"))
+ is_add = 0;
+ else if (unformat (line_input, "add"))
+ is_add = 1;
+ else if (unformat (line_input, "punt"))
+ is_punt = 1;
+ else if (unformat (line_input, "table %u", &table_index))
+ ;
+ else if (unformat (line_input, "opaque-index %u", &opaque_index))
+ ;
+ else if (unformat (line_input, "match %U", unformat_classify_match, cm,
+ &match, table_index))
+ ;
+ else if (unformat (line_input, "via %U", unformat_fib_route_path, &rpath,
+ &proto))
+ vec_add1 (rpaths, rpath);
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto out;
+ }
+ }
+
+ if (~0 == table_index || 0 == match)
+ {
+ error = clib_error_create ("missing table index or match");
+ goto out;
+ }
+
+ if (is_add)
+ {
+ if (0 == rpaths)
+ {
+ error = clib_error_create ("missing path");
+ goto out;
+ }
+ rv = ip_session_redirect_add (vm, table_index, opaque_index, proto,
+ is_punt, match, rpaths);
+ }
+ else
+ {
+ rv = ip_session_redirect_del (vm, table_index, match);
+ }
+
+ if (rv)
+ error = clib_error_create ("failed with error %d", rv);
+
+out:
+ vec_free (rpaths);
+ vec_free (match);
+ unformat_free (line_input);
+ return error;
+}
+
+VLIB_CLI_COMMAND (ip_session_redirect_command, static) = {
+ .path = "ip session redirect",
+ .function = ip_session_redirect_cmd,
+ .short_help = "ip session redirect [add] [punt] table <index> match <match> "
+ "via <path> | del table <index> match <match>"
+};
+
+static fib_node_t *
+ip_session_redirect_get_node (fib_node_index_t index)
+{
+ ip_session_redirect_main_t *im = &ip_session_redirect_main;
+ ip_session_redirect_t *ipr = pool_elt_at_index (im->pool, index);
+ return &ipr->node;
+}
+
+static ip_session_redirect_t *
+ip_session_redirect_get_from_node (fib_node_t *node)
+{
+ return (
+ ip_session_redirect_t *) (((char *) node) -
+ STRUCT_OFFSET_OF (ip_session_redirect_t, node));
+}
+
+static void
+ip_session_redirect_last_lock_gone (fib_node_t *node)
+{
+ /* the lifetime of the entry is managed by the table. */
+ ASSERT (0);
+}
+
+/* A back walk has reached this entry */
+static fib_node_back_walk_rc_t
+ip_session_redirect_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ int rv;
+ ip_session_redirect_t *ipr = ip_session_redirect_get_from_node (node);
+ rv = ip_session_redirect_stack (ipr);
+ ASSERT (0 == rv);
+ if (rv)
+ clib_warning ("ip_session_redirect_stack() error %d", rv);
+ return FIB_NODE_BACK_WALK_CONTINUE;
+}
+
+static const fib_node_vft_t ip_session_redirect_vft = {
+ .fnv_get = ip_session_redirect_get_node,
+ .fnv_last_lock = ip_session_redirect_last_lock_gone,
+ .fnv_back_walk = ip_session_redirect_back_walk_notify,
+};
+
+static clib_error_t *
+ip_session_redirect_init (vlib_main_t *vm)
+{
+ ip_session_redirect_main_t *im = &ip_session_redirect_main;
+ im->session_by_match_and_table_index =
+ hash_create_vec (0, sizeof (u8), sizeof (u32));
+ im->fib_node_type = fib_node_register_new_type ("ip-session-redirect",
+ &ip_session_redirect_vft);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (ip_session_redirect_init);
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "IP session redirect",
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ip_session_redirect/test_api.c b/src/plugins/ip_session_redirect/test_api.c
new file mode 100644
index 00000000000..e4026a673ff
--- /dev/null
+++ b/src/plugins/ip_session_redirect/test_api.c
@@ -0,0 +1,195 @@
+/* Copyright (c) 2021-2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+
+#include <vlib/vlib.h>
+#include <vnet/fib/fib_api.h>
+#include <vnet/ip/ip_format_fns.h>
+#include <vnet/classify/vnet_classify.h>
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#define __plugin_msg_base ip_session_redirect_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+/* declare message IDs */
+#include "ip_session_redirect.api_enum.h"
+#include "ip_session_redirect.api_types.h"
+#include "ip_session_redirect.h"
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} ip_session_redirect_test_main_t;
+
+ip_session_redirect_test_main_t ip_session_redirect_test_main;
+
+static int
+api_ip_session_redirect_add_parse (vat_main_t *vam, u32 *table_index,
+ u32 *opaque_index, dpo_proto_t *proto,
+ int *is_punt, u8 **match,
+ fib_route_path_t **paths)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ fib_route_path_t path;
+
+ *table_index = ~0;
+ *opaque_index = ~0;
+ *proto = DPO_PROTO_IP4;
+ *is_punt = 0;
+ *match = 0;
+ *paths = 0;
+
+ while (unformat_check_input (vam->input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (vam->input, "punt"))
+ *is_punt = 1;
+ else if (unformat (vam->input, "table %u", table_index))
+ ;
+ else if (unformat (vam->input, "opaque-index %u", opaque_index))
+ ;
+ else if (unformat (vam->input, "match %U", unformat_classify_match, cm,
+ match, *table_index))
+ ;
+ else if (unformat (vam->input, "via %U", unformat_fib_route_path, &path,
+ proto))
+ vec_add1 (*paths, path);
+ else
+ {
+ clib_warning ("unknown input `%U'", format_unformat_error,
+ vam->input);
+ return -99;
+ }
+ }
+
+ return 0;
+}
+
+static int
+api_ip_session_redirect_add (vat_main_t *vam)
+{
+ vl_api_ip_session_redirect_add_t *mp;
+ fib_route_path_t *paths;
+ dpo_proto_t proto;
+ u32 opaque_index;
+ u32 table_index;
+ int is_punt;
+ int ret, i;
+ u8 *match;
+
+ ret = api_ip_session_redirect_add_parse (vam, &table_index, &opaque_index,
+ &proto, &is_punt, &match, &paths);
+ if (ret)
+ goto err;
+
+ M2 (IP_SESSION_REDIRECT_ADD, mp, vec_len (paths) * sizeof (mp->paths[0]));
+
+ mp->table_index = htonl (table_index);
+ mp->opaque_index = htonl (opaque_index);
+ mp->is_punt = is_punt;
+ memcpy_s (mp->match, sizeof (mp->match), match, vec_len (match));
+ mp->n_paths = vec_len (paths);
+ vec_foreach_index (i, paths)
+ fib_api_path_encode (&paths[i], &mp->paths[i]);
+
+ S (mp);
+ W (ret);
+
+err:
+ vec_free (match);
+ vec_free (paths);
+ return ret;
+}
+
+static int
+api_ip_session_redirect_add_v2 (vat_main_t *vam)
+{
+ vl_api_ip_session_redirect_add_v2_t *mp;
+ fib_route_path_t *paths;
+ dpo_proto_t proto;
+ u32 opaque_index;
+ u32 table_index;
+ int is_punt;
+ int ret, i;
+ u8 *match;
+
+ ret = api_ip_session_redirect_add_parse (vam, &table_index, &opaque_index,
+ &proto, &is_punt, &match, &paths);
+ if (ret)
+ goto err;
+
+ M2 (IP_SESSION_REDIRECT_ADD_V2, mp, vec_len (paths) * sizeof (mp->paths[0]));
+
+ mp->table_index = htonl (table_index);
+ mp->opaque_index = htonl (opaque_index);
+ mp->proto = fib_api_path_dpo_proto_to_nh (proto);
+ mp->is_punt = is_punt;
+ memcpy_s (mp->match, sizeof (mp->match), match, vec_len (match));
+ mp->n_paths = vec_len (paths);
+ vec_foreach_index (i, paths)
+ fib_api_path_encode (&paths[i], &mp->paths[i]);
+
+ S (mp);
+ W (ret);
+
+err:
+ vec_free (match);
+ vec_free (paths);
+ return ret;
+}
+
+static int
+api_ip_session_redirect_del (vat_main_t *vam)
+{
+ vnet_classify_main_t *cm = &vnet_classify_main;
+ vl_api_ip_session_redirect_del_t *mp;
+ u32 table_index = ~0;
+ u8 *match = 0;
+ int ret;
+
+ while (unformat_check_input (vam->input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (vam->input, "table %u", &table_index))
+ ;
+ else if (unformat (vam->input, "match %U", unformat_classify_match, cm,
+ &match, table_index))
+ ;
+ else
+ {
+ clib_warning ("unknown input '%U'", format_unformat_error,
+ vam->input);
+ return -99;
+ }
+ }
+
+ M2 (IP_SESSION_REDIRECT_DEL, mp, vec_len (match));
+
+ mp->table_index = htonl (table_index);
+ mp->match_len = htonl (vec_len (match));
+ clib_memcpy (mp->match, match, vec_len (match));
+
+ S (mp);
+ W (ret);
+
+ return ret;
+}
+
+#include "ip_session_redirect.api_test.c"
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/l2e/l2e.c b/src/plugins/l2e/l2e.c
deleted file mode 100644
index 4c6eac50446..00000000000
--- a/src/plugins/l2e/l2e.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * l2e.c : Extract L3 packets from the L2 input and feed
- * them into the L3 path.
- *
- * Copyright (c) 2013 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <plugins/l2e/l2e.h>
-#include <vnet/l2/l2_input.h>
-#include <vnet/l2/feat_bitmap.h>
-#include <vnet/ip/ip.h>
-
-l2_emulation_main_t l2_emulation_main;
-
-/**
- * A zero'd out struct we can use in the vec_validate
- */
-static const l2_emulation_t ezero = { };
-
-__clib_export void
-l2_emulation_enable (u32 sw_if_index)
-{
- l2_emulation_main_t *em = &l2_emulation_main;
- vec_validate_init_empty (em->l2_emulations, sw_if_index, ezero);
-
- l2_emulation_t *l23e = &em->l2_emulations[sw_if_index];
-
- l23e->enabled = 1;
-
- /*
- * L3 enable the interface - using IP unnumbered from the control
- * plane may not be possible since there may be no BVI interface
- * to which to unnumber
- */
- ip4_sw_interface_enable_disable (sw_if_index, 1);
- ip6_sw_interface_enable_disable (sw_if_index, 1);
-
- l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_L2_EMULATION, 1);
-}
-
-
-__clib_export void
-l2_emulation_disable (u32 sw_if_index)
-{
- l2_emulation_main_t *em = &l2_emulation_main;
- if (vec_len (em->l2_emulations) >= sw_if_index)
- {
- l2_emulation_t *l23e = &em->l2_emulations[sw_if_index];
- clib_memset (l23e, 0, sizeof (*l23e));
-
- l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_L2_EMULATION, 0);
- ip4_sw_interface_enable_disable (sw_if_index, 0);
- ip6_sw_interface_enable_disable (sw_if_index, 0);
- }
-}
-
-static clib_error_t *
-l2_emulation_interface_add_del (vnet_main_t * vnm,
- u32 sw_if_index, u32 is_add)
-{
- l2_emulation_main_t *em = &l2_emulation_main;
- if (is_add)
- {
- vec_validate_init_empty (em->l2_emulations, sw_if_index, ezero);
- }
-
- return (NULL);
-}
-
-VNET_SW_INTERFACE_ADD_DEL_FUNCTION (l2_emulation_interface_add_del);
-
-static clib_error_t *
-l2_emulation_cli (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- vnet_main_t *vnm = vnet_get_main ();
- u32 sw_if_index = ~0;
- u8 enable = 1;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "%U", unformat_vnet_sw_interface,
- vnm, &sw_if_index))
- ;
- else if (unformat (input, "enable"))
- enable = 1;
- else if (unformat (input, "disable"))
- enable = 0;
- else
- break;
- }
-
- if (~0 == sw_if_index)
- return clib_error_return (0, "interface must be specified");
-
- if (enable)
- l2_emulation_enable (sw_if_index);
- else
- l2_emulation_disable (sw_if_index);
-
- return (NULL);
-}
-
-/*?
- * Configure l2 emulation.
- * When the interface is in L2 mode, configure the extraction of L3
- * packets out of the L2 path and into the L3 path.
- *
- * @cliexpar
- * @cliexstart{set interface l2 input l2-emulation <interface-name> [disable]}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (l2_emulation_cli_node, static) = {
- .path = "set interface l2 l2-emulation",
- .short_help =
- "set interface l2 l2-emulation <interface-name> [disable|enable]\n",
- .function = l2_emulation_cli,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-l2_emulation_show (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- l2_emulation_main_t *em = &l2_emulation_main;
- vnet_main_t *vnm = vnet_get_main ();
- l2_emulation_t *l23e;
- u32 sw_if_index;
-
- vec_foreach_index (sw_if_index, em->l2_emulations)
- {
- l23e = &em->l2_emulations[sw_if_index];
- if (l23e->enabled)
- {
- vlib_cli_output (vm, "%U\n",
- format_vnet_sw_if_index_name, vnm, sw_if_index);
- }
- }
- return (NULL);
-}
-
-/*?
- * Show l2 emulation.
- * When the interface is in L2 mode, configure the extraction of L3
- * packets out of the L2 path and into the L3 path.
- *
- * @cliexpar
- * @cliexstart{show interface l2 l2-emulation}
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (l2_emulation_show_node, static) = {
- .path = "show interface l2 l2-emulation",
- .short_help = "show interface l2 l2-emulation\n",
- .function = l2_emulation_show,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-l2_emulation_init (vlib_main_t * vm)
-{
- l2_emulation_main_t *em = &l2_emulation_main;
- vlib_node_t *node;
-
- node = vlib_get_node_by_name (vm, (u8 *) "l2-emulation");
- em->l2_emulation_node_index = node->index;
-
- /* Initialize the feature next-node indexes */
- feat_bitmap_init_next_nodes (vm,
- em->l2_emulation_node_index,
- L2INPUT_N_FEAT,
- l2input_get_feat_names (),
- em->l2_input_feat_next);
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (l2_emulation_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/l2e/l2e.h b/src/plugins/l2e/l2e.h
deleted file mode 100644
index e548d333f9d..00000000000
--- a/src/plugins/l2e/l2e.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2013 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef included_vnet_l2_emulation_h
-#define included_vnet_l2_emulation_h
-
-#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-
-/**
- * Per-interface L2 configuration
- */
-typedef struct l2_emulation_t_
-{
- /**
- * Enabled or Disabled.
- * this is required since one L3 protocl can be enabled, but others not
- */
- u8 enabled;
-} l2_emulation_t;
-
-/**
- * per-packet trace data
- */
-typedef struct l2_emulation_trace_t_
-{
- /* per-pkt trace data */
- u8 extracted;
-} l2_emulation_trace_t;
-
-/**
- * Grouping of global data for the L2 emulation feature
- */
-typedef struct l2_emulation_main_t_
-{
- u16 msg_id_base;
-
- u32 l2_emulation_node_index;
-
- /**
- * Per-interface vector of emulation configs
- */
- l2_emulation_t *l2_emulations;
-
- /**
- * Next nodes for L2 output features
- */
- u32 l2_input_feat_next[32];
-} l2_emulation_main_t;
-
-/**
- * L2 Emulation is a feautre that is applied to L2 ports to 'extract'
- * IP packets from the L2 path and inject them into the L3 path (i.e.
- * into the appropriate ip[4|6]_input node).
- * L3 routes in the table_id for that interface should then be configured
- * as DVR routes, therefore the forwarded packet has the L2 header
- * preserved and togehter the L3 routed system behaves like an L2 bridge.
- */
-extern void l2_emulation_enable (u32 sw_if_index);
-extern void l2_emulation_disable (u32 sw_if_index);
-
-extern l2_emulation_main_t l2_emulation_main;
-
-#endif
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/l2e/l2e_api.c b/src/plugins/l2e/l2e_api.c
deleted file mode 100644
index fe2fb7ee06e..00000000000
--- a/src/plugins/l2e/l2e_api.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- *------------------------------------------------------------------
- * l2e_api.c - layer 2 emulation api
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vnet/vnet.h>
-#include <vnet/plugin/plugin.h>
-
-#include <vnet/interface.h>
-#include <vnet/api_errno.h>
-#include <vpp/app/version.h>
-
-#include <l2e/l2e.h>
-
-#include <vlibapi/api.h>
-#include <vlibmemory/api.h>
-
-/* define message IDs */
-#include <l2e/l2e.api_enum.h>
-#include <l2e/l2e.api_types.h>
-
-#include <vlibapi/api_helper_macros.h>
-
-#define L2E_MSG_BASE l2em->msg_id_base
-
-static void
-vl_api_l2_emulation_t_handler (vl_api_l2_emulation_t * mp)
-{
- l2_emulation_main_t *l2em = &l2_emulation_main;
- vl_api_l2_emulation_reply_t *rmp;
- int rv = 0;
-
- VALIDATE_SW_IF_INDEX (mp);
-
- u32 sw_if_index = ntohl (mp->sw_if_index);
-
- if (mp->enable)
- l2_emulation_enable (sw_if_index);
- else
- l2_emulation_disable (sw_if_index);
-
- BAD_SW_IF_INDEX_LABEL;
-
- REPLY_MACRO (VL_API_L2_EMULATION_REPLY + L2E_MSG_BASE);
-}
-
-#include <l2e/l2e.api.c>
-static clib_error_t *
-l2e_init (vlib_main_t * vm)
-{
- l2_emulation_main_t *l2em = &l2_emulation_main;
-
- /* Ask for a correctly-sized block of API message decode slots */
- l2em->msg_id_base = setup_message_id_table ();
-
- return (NULL);
-}
-
-VLIB_API_INIT_FUNCTION (l2e_init);
-
-/* *INDENT-OFF* */
-VLIB_PLUGIN_REGISTER () = {
- .version = VPP_BUILD_VER,
- .description = "Layer 2 (L2) Emulation",
-};
-/* *INDENT-ON* */
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/l2e/l2e_node.c b/src/plugins/l2e/l2e_node.c
deleted file mode 100644
index 71c9b4bc6af..00000000000
--- a/src/plugins/l2e/l2e_node.c
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- * l2e_node.c : l2 emulation node
- *
- * Copyright (c) 2019 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <plugins/l2e/l2e.h>
-#include <vnet/l2/l2_input.h>
-#include <vnet/l2/feat_bitmap.h>
-
-#define foreach_l2_emulation \
- _(IP4, "Extract IPv4") \
- _(IP6, "Extract IPv6")
-
-typedef enum
-{
-#define _(sym,str) L2_EMULATION_ERROR_##sym,
- foreach_l2_emulation
-#undef _
- L2_EMULATION_N_ERROR,
-} l2_emulation_error_t;
-
-static char *l2_emulation_error_strings[] = {
-#define _(sym,string) string,
- foreach_l2_emulation
-#undef _
-};
-
-typedef enum
-{
-#define _(sym,str) L2_EMULATION_NEXT_##sym,
- foreach_l2_emulation
-#undef _
- L2_EMULATION_N_NEXT,
-} l2_emulation_next_t;
-
-/* packet trace format function */
-static u8 *
-format_l2_emulation_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- l2_emulation_trace_t *t = va_arg (*args, l2_emulation_trace_t *);
-
- s = format (s, "l2-emulation: %s", (t->extracted ? "yes" : "no"));
-
- return s;
-}
-
-VLIB_NODE_FN (l2_emulation_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- l2_emulation_main_t *em = &l2_emulation_main;
- u32 n_left_from, *from, *to_next;
- l2_emulation_next_t next_index;
- u32 ip4_hits = 0;
- u32 ip6_hits = 0;
-
- next_index = 0;
- n_left_from = frame->n_vectors;
- from = vlib_frame_vector_args (frame);
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- vlib_buffer_t *b0, *b1;
- u32 sw_if_index0, sw_if_index1;
- u16 ether_type0, ether_type1;
- u32 next0 = ~0, next1 = ~0;
- u8 l2_len0, l2_len1;
- u32 bi0, bi1;
- u8 *h0, *h1;
-
- bi0 = to_next[0] = from[0];
- bi1 = to_next[1] = from[1];
-
- from += 2;
- n_left_from -= 2;
- to_next += 2;
- n_left_to_next -= 2;
-
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
- l2_len0 = vnet_buffer (b0)->l2.l2_len;
- l2_len1 = vnet_buffer (b1)->l2.l2_len;
-
- h0 = vlib_buffer_get_current (b0);
- h1 = vlib_buffer_get_current (b1);
-
- ether_type0 = clib_net_to_host_u16 (*(u16 *) (h0 + l2_len0 - 2));
- ether_type1 = clib_net_to_host_u16 (*(u16 *) (h1 + l2_len1 - 2));
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
-
- /*
- * only extract unicast
- */
- if (PREDICT_TRUE (!(h0[0] & 0x1)))
- {
- switch (ether_type0)
- {
- case ETHERNET_TYPE_IP4:
- ASSERT (em->l2_emulations[sw_if_index0].enabled);
- ++ip4_hits;
- next0 = L2_EMULATION_NEXT_IP4;
- vlib_buffer_advance (b0, l2_len0);
- break;
- case ETHERNET_TYPE_IP6:
- ASSERT (em->l2_emulations[sw_if_index0].enabled);
- ++ip6_hits;
- next0 = L2_EMULATION_NEXT_IP6;
- vlib_buffer_advance (b0, l2_len0);
- default:
- break;
- }
- }
- if (PREDICT_TRUE (!(h1[0] & 0x1)))
- {
- switch (ether_type1)
- {
- case ETHERNET_TYPE_IP4:
- ASSERT (em->l2_emulations[sw_if_index1].enabled);
- ++ip4_hits;
- next1 = L2_EMULATION_NEXT_IP4;
- vlib_buffer_advance (b1, l2_len1);
- break;
- case ETHERNET_TYPE_IP6:
- ASSERT (em->l2_emulations[sw_if_index1].enabled);
- ++ip6_hits;
- next1 = L2_EMULATION_NEXT_IP6;
- vlib_buffer_advance (b1, l2_len1);
- default:
- break;
- }
- }
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- l2_emulation_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- t->extracted = (next0 != ~0);
- }
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b1->flags & VLIB_BUFFER_IS_TRACED)))
- {
- l2_emulation_trace_t *t =
- vlib_add_trace (vm, node, b1, sizeof (*t));
- t->extracted = (next1 != ~0);
- }
-
- /* Determine the next node and remove ourself from bitmap */
- if (PREDICT_TRUE (next0 == ~0))
- next0 = vnet_l2_feature_next (b0, em->l2_input_feat_next,
- L2INPUT_FEAT_L2_EMULATION);
-
- /* Determine the next node and remove ourself from bitmap */
- if (PREDICT_TRUE (next1 == ~0))
- next1 = vnet_l2_feature_next (b1, em->l2_input_feat_next,
- L2INPUT_FEAT_L2_EMULATION);
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- vlib_buffer_t *b0;
- u32 sw_if_index0;
- u16 ether_type0;
- u32 next0 = ~0;
- u8 l2_len0;
- u32 bi0;
- u8 *h0;
-
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- l2_len0 = vnet_buffer (b0)->l2.l2_len;
-
- h0 = vlib_buffer_get_current (b0);
- ether_type0 = clib_net_to_host_u16 (*(u16 *) (h0 + l2_len0 - 2));
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-
- /*
- * only extract unicast
- */
- if (PREDICT_TRUE (!(h0[0] & 0x1)))
- {
- switch (ether_type0)
- {
- case ETHERNET_TYPE_IP4:
- ASSERT (em->l2_emulations[sw_if_index0].enabled);
- ++ip4_hits;
- next0 = L2_EMULATION_NEXT_IP4;
- vlib_buffer_advance (b0, l2_len0);
- break;
- case ETHERNET_TYPE_IP6:
- ASSERT (em->l2_emulations[sw_if_index0].enabled);
- ++ip6_hits;
- next0 = L2_EMULATION_NEXT_IP6;
- vlib_buffer_advance (b0, l2_len0);
- default:
- break;
- }
- }
-
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- l2_emulation_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- t->extracted = (next0 != ~0);
- }
-
- /* Determine the next node and remove ourself from bitmap */
- if (PREDICT_TRUE (next0 == ~0))
- next0 = vnet_l2_feature_next (b0, em->l2_input_feat_next,
- L2INPUT_FEAT_L2_EMULATION);
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- vlib_node_increment_counter (vm, node->node_index,
- L2_EMULATION_ERROR_IP4, ip4_hits);
- vlib_node_increment_counter (vm, node->node_index,
- L2_EMULATION_ERROR_IP6, ip6_hits);
-
- return frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (l2_emulation_node) = {
- .name = "l2-emulation",
- .vector_size = sizeof (u32),
- .format_trace = format_l2_emulation_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(l2_emulation_error_strings),
- .error_strings = l2_emulation_error_strings,
-
- .n_next_nodes = L2_EMULATION_N_NEXT,
-
- /* edit / add dispositions here */
- .next_nodes = {
- [L2_EMULATION_NEXT_IP4] = "ip4-input",
- [L2_EMULATION_NEXT_IP6] = "ip6-input",
- },
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/l2tp/decap.c b/src/plugins/l2tp/decap.c
index 8c41bdd2357..e6ad8b0926c 100644
--- a/src/plugins/l2tp/decap.c
+++ b/src/plugins/l2tp/decap.c
@@ -249,7 +249,6 @@ VLIB_NODE_FN (l2t_decap_node) (vlib_main_t * vm,
* while l2tp-decap-local drops it.
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2t_decap_node) = {
.name = "l2tp-decap",
.vector_size = sizeof (u32),
@@ -267,11 +266,9 @@ VLIB_REGISTER_NODE (l2t_decap_node) = {
[L2T_DECAP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
extern vlib_node_function_t l2t_decap_node_fn;
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2t_decap_local_node) = {
.function = l2t_decap_node_fn,
.name = "l2tp-decap-local",
@@ -290,7 +287,6 @@ VLIB_REGISTER_NODE (l2t_decap_local_node) = {
[L2T_DECAP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/l2tp/encap.c b/src/plugins/l2tp/encap.c
index fbb5fc6ea46..3115b96f088 100644
--- a/src/plugins/l2tp/encap.c
+++ b/src/plugins/l2tp/encap.c
@@ -192,7 +192,6 @@ VLIB_NODE_FN (l2t_encap_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2t_encap_node) = {
.name = "l2tp-encap",
.vector_size = sizeof (u32),
@@ -211,7 +210,6 @@ VLIB_REGISTER_NODE (l2t_encap_node) = {
[L2T_ENCAP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
void
diff --git a/src/plugins/l2tp/l2tp.c b/src/plugins/l2tp/l2tp.c
index 08fa6d1e60b..907468b5900 100644
--- a/src/plugins/l2tp/l2tp.c
+++ b/src/plugins/l2tp/l2tp.c
@@ -53,12 +53,10 @@ format_l2t_session (u8 * s, va_list * args)
vlib_counter_t v;
s = format (s, "[%d] %U (our) %U (client) %U (sw_if_index %d)\n",
- session - lm->sessions,
- format_ip6_address, &session->our_address,
- format_ip6_address, &session->client_address,
- format_vnet_sw_interface_name, lm->vnet_main,
- vnet_get_sw_interface (lm->vnet_main, session->sw_if_index),
- session->sw_if_index);
+ session - lm->sessions, format_ip6_address,
+ &session->our_address, format_ip6_address,
+ &session->client_address, format_vnet_sw_if_index_name,
+ lm->vnet_main, session->sw_if_index, session->sw_if_index);
s = format (s, " local cookies %016llx %016llx remote cookie %016llx\n",
clib_net_to_host_u64 (session->local_cookie[0]),
@@ -129,24 +127,20 @@ show_l2tp_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "L2tp session lookup on %s", keystr);
- /* *INDENT-OFF* */
pool_foreach (session, lm->sessions)
{
vlib_cli_output (vm, "%U", format_l2t_session, session);
}
- /* *INDENT-ON* */
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_session_detail_command, static) = {
.path = "show l2tpv3",
.short_help = "show l2tpv3 [verbose]",
.function = show_l2tp_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
test_counters_command_fn (vlib_main_t * vm,
@@ -159,7 +153,6 @@ test_counters_command_fn (vlib_main_t * vm,
u32 nincr = 0;
u32 thread_index = vm->thread_index;
- /* *INDENT-OFF* */
pool_foreach (session, lm->sessions)
{
session_index = session - lm->sessions;
@@ -177,19 +170,16 @@ test_counters_command_fn (vlib_main_t * vm,
nincr++;
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "Incremented %d active counters\n", nincr);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_counters_command, static) = {
.path = "test lt2p counters",
.short_help = "increment all active counters",
.function = test_counters_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
clear_counters_command_fn (vlib_main_t * vm,
@@ -201,7 +191,6 @@ clear_counters_command_fn (vlib_main_t * vm,
u32 counter_index;
u32 nincr = 0;
- /* *INDENT-OFF* */
pool_foreach (session, lm->sessions)
{
session_index = session - lm->sessions;
@@ -212,19 +201,16 @@ clear_counters_command_fn (vlib_main_t * vm,
vlib_zero_combined_counter (&lm->counter_main, counter_index+1);
nincr++;
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "Cleared %d active counters\n", nincr);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_counters_command, static) = {
.path = "clear l2tp counters",
.short_help = "clear all active counters",
.function = clear_counters_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_l2tpv3_name (u8 * s, va_list * args)
@@ -254,13 +240,11 @@ l2tpv3_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance)
return 0;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (l2tpv3_device_class,static) = {
.name = "L2TPv3",
.format_device_name = format_l2tpv3_name,
.name_renumber = l2tpv3_name_renumber,
};
-/* *INDENT-ON* */
static u8 *
format_l2tp_header_with_length (u8 * s, va_list * args)
@@ -270,14 +254,12 @@ format_l2tp_header_with_length (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (l2tpv3_hw_class) = {
.name = "L2TPV3",
.format_header = format_l2tp_header_with_length,
.build_rewrite = default_build_rewrite,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
int
create_l2tpv3_ipv6_tunnel (l2t_main_t * lm,
@@ -377,7 +359,7 @@ create_l2tpv3_ipv6_tunnel (l2t_main_t * lm,
{
hw_if_index = lm->free_l2tpv3_tunnel_hw_if_indices
[vec_len (lm->free_l2tpv3_tunnel_hw_if_indices) - 1];
- _vec_len (lm->free_l2tpv3_tunnel_hw_if_indices) -= 1;
+ vec_dec_len (lm->free_l2tpv3_tunnel_hw_if_indices, 1);
hi = vnet_get_hw_interface (vnm, hw_if_index);
hi->dev_instance = s - lm->sessions;
@@ -517,7 +499,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_l2tpv3_tunnel_command, static) =
{
.path = "create l2tpv3 tunnel",
@@ -525,7 +506,6 @@ VLIB_CLI_COMMAND (create_l2tpv3_tunnel_command, static) =
"create l2tpv3 tunnel client <ip6> our <ip6> local-cookie <hex> remote-cookie <hex> local-session <dec> remote-session <dec>",
.function = create_l2tpv3_tunnel_command_fn,
};
-/* *INDENT-ON* */
int
l2tpv3_set_tunnel_cookies (l2t_main_t * lm,
@@ -600,7 +580,6 @@ set_l2tp_tunnel_cookie_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_l2tp_tunnel_cookie_command, static) =
{
.path = "set l2tpv3 tunnel cookie",
@@ -608,7 +587,6 @@ VLIB_CLI_COMMAND (set_l2tp_tunnel_cookie_command, static) =
"set l2tpv3 tunnel cookie <intfc> local <hex> remote <hex>",
.function = set_l2tp_tunnel_cookie_command_fn,
};
-/* *INDENT-ON* */
int
l2tpv3_interface_enable_disable (vnet_main_t * vnm,
@@ -665,14 +643,12 @@ set_ip6_l2tpv3 (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip6_l2tpv3, static) =
{
.path = "set interface ip6 l2tpv3",
.function = set_ip6_l2tpv3,
.short_help = "set interface ip6 l2tpv3 <intfc> [del]",
};
-/* *INDENT-ON* */
static clib_error_t *
l2tp_config (vlib_main_t * vm, unformat_input_t * input)
diff --git a/src/plugins/l2tp/l2tp_api.c b/src/plugins/l2tp/l2tp_api.c
index ba9d2681b35..9c5ad700e0c 100644
--- a/src/plugins/l2tp/l2tp_api.c
+++ b/src/plugins/l2tp/l2tp_api.c
@@ -89,12 +89,10 @@ vl_api_sw_if_l2tpv3_tunnel_dump_t_handler (vl_api_sw_if_l2tpv3_tunnel_dump_t *
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (session, lm->sessions)
{
send_sw_if_l2tpv3_tunnel_details (am, reg, session, lm, mp->context);
}
- /* *INDENT-ON* */
}
static void vl_api_l2tpv3_create_tunnel_t_handler
@@ -146,12 +144,10 @@ static void vl_api_l2tpv3_create_tunnel_t_handler
encap_fib_index, &sw_if_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_L2TPV3_CREATE_TUNNEL_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void vl_api_l2tpv3_set_tunnel_cookies_t_handler
@@ -234,12 +230,10 @@ VLIB_API_INIT_FUNCTION (l2tp_api_hookup);
#include <vlib/unix/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Layer 2 Tunneling Protocol v3 (L2TP)",
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/l2tp/l2tp_test.c b/src/plugins/l2tp/l2tp_test.c
index 87abf5d0a2a..33691313cbe 100644
--- a/src/plugins/l2tp/l2tp_test.c
+++ b/src/plugins/l2tp/l2tp_test.c
@@ -26,7 +26,7 @@
/* define message IDs */
#include <l2tp/l2tp.api_enum.h>
#include <l2tp/l2tp.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
@@ -41,13 +41,11 @@ l2tp_test_main_t l2tp_test_main;
#define __plugin_msg_base l2tp_test_main.msg_id_base
#include <vlibapi/vat_helper_macros.h>
-/* Macro to finish up custom dump fns */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
static void vl_api_l2tpv3_create_tunnel_reply_t_handler
(vl_api_l2tpv3_create_tunnel_reply_t * mp)
diff --git a/src/plugins/l2tp/packet.h b/src/plugins/l2tp/packet.h
index 66dfea2194c..d7d78f85e53 100644
--- a/src/plugins/l2tp/packet.h
+++ b/src/plugins/l2tp/packet.h
@@ -24,14 +24,12 @@
* tunnels. It is not present in IOS XR l2tpv3 tunnels.
* The Linux implementation is almost certainly wrong.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
u32 session_id;
u64 cookie; u32
l2_specific_sublayer; /* set to 0 (if present) */
}) l2tpv3_header_t;
-/* *INDENT-ON* */
#endif /* __included_l2tp_packet_h__ */
diff --git a/src/plugins/l3xc/FEATURE.yaml b/src/plugins/l3xc/FEATURE.yaml
index 5086fee3c57..0ee8c0c2168 100644
--- a/src/plugins/l3xc/FEATURE.yaml
+++ b/src/plugins/l3xc/FEATURE.yaml
@@ -4,7 +4,7 @@ maintainer: Neale Ranns <nranns@cisco.com>
features:
- cross connect all ingress traffic on an L3 interface to an output FIB path.
- the path can describe any output (with the exception of MPLS labels)
- - The same functions can be acheived by using a dedicated VRF for the table
+ - The same functions can be achieved by using a dedicated VRF for the table
and adding a default route with the same path. However, the L3XC is more
efficient in memory and CPU
diff --git a/src/plugins/l3xc/l3xc.c b/src/plugins/l3xc/l3xc.c
index ac0e4cf0d2c..427c38e9ab5 100644
--- a/src/plugins/l3xc/l3xc.c
+++ b/src/plugins/l3xc/l3xc.c
@@ -67,11 +67,11 @@ l3xc_stack (l3xc_t * l3xc)
*/
dpo_id_t via_dpo = DPO_INVALID;
- fib_path_list_contribute_forwarding (l3xc->l3xc_pl,
- (FIB_PROTOCOL_IP4 == l3xc->l3xc_proto ?
- FIB_FORW_CHAIN_TYPE_UNICAST_IP4 :
- FIB_FORW_CHAIN_TYPE_UNICAST_IP6),
- FIB_PATH_LIST_FWD_FLAG_NONE, &via_dpo);
+ fib_path_list_contribute_forwarding (
+ l3xc->l3xc_pl,
+ (FIB_PROTOCOL_IP4 == l3xc->l3xc_proto ? FIB_FORW_CHAIN_TYPE_UNICAST_IP4 :
+ FIB_FORW_CHAIN_TYPE_UNICAST_IP6),
+ FIB_PATH_LIST_FWD_FLAG_COLLAPSE, &via_dpo);
dpo_stack_from_node ((FIB_PROTOCOL_IP4 == l3xc->l3xc_proto ?
l3xc_ip4_node.index :
@@ -264,7 +264,6 @@ out:
return (NULL);
}
-/* *INDENT-OFF* */
/**
* Create an L3XC policy.
*/
@@ -274,7 +273,6 @@ VLIB_CLI_COMMAND (l3xc_cmd_node, static) = {
.short_help = "l3xc [add|del] <INTERFACE> via ...",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static u8 *
format_l3xc (u8 * s, va_list * args)
@@ -305,13 +303,11 @@ l3xc_walk (l3xc_walk_cb_t cb, void *ctx)
{
u32 l3xci;
- /* *INDENT-OFF* */
pool_foreach_index (l3xci, l3xc_pool)
{
if (!cb(l3xci, ctx))
break;
}
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -320,24 +316,20 @@ l3xc_show_cmd (vlib_main_t * vm,
{
l3xc_t *l3xc;
- /* *INDENT-OFF* */
pool_foreach (l3xc, l3xc_pool)
{
vlib_cli_output(vm, "%U", format_l3xc, l3xc);
}
- /* *INDENT-ON* */
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l3xc_show_cmd_node, static) = {
.path = "show l3xc",
.function = l3xc_show_cmd,
.short_help = "show l3xc",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static fib_node_t *
l3xc_get_node (fib_node_index_t index)
@@ -381,7 +373,7 @@ static const fib_node_vft_t l3xc_vft = {
static clib_error_t *
l3xc_init (vlib_main_t * vm)
{
- l3xc_fib_node_type = fib_node_register_new_type (&l3xc_vft);
+ l3xc_fib_node_type = fib_node_register_new_type ("l3xc", &l3xc_vft);
return (NULL);
}
diff --git a/src/plugins/l3xc/l3xc_api.c b/src/plugins/l3xc/l3xc_api.c
index 847acaac331..f09100b3546 100644
--- a/src/plugins/l3xc/l3xc_api.c
+++ b/src/plugins/l3xc/l3xc_api.c
@@ -37,6 +37,7 @@
*/
static u32 l3xc_base_msg_id;
+#define REPLY_MSG_ID_BASE (l3xc_base_msg_id)
#include <vlibapi/api_helper_macros.h>
static void
@@ -96,12 +97,7 @@ done:
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_L3XC_UPDATE_REPLY + l3xc_base_msg_id,
- ({
- rmp->stats_index = 0;
- }))
- /* *INDENT-ON* */
+ REPLY_MACRO2 (VL_API_L3XC_UPDATE_REPLY, ({ rmp->stats_index = 0; }))
}
static void
@@ -116,7 +112,7 @@ vl_api_l3xc_del_t_handler (vl_api_l3xc_del_t * mp)
BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO (VL_API_L3XC_DEL_REPLY + l3xc_base_msg_id);
+ REPLY_MACRO (VL_API_L3XC_DEL_REPLY);
}
typedef struct l3xc_dump_walk_ctx_t_
@@ -213,12 +209,10 @@ l3xc_api_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (l3xc_api_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "L3 Cross-Connect (L3XC)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/l3xc/l3xc_node.c b/src/plugins/l3xc/l3xc_node.c
index 62db8c328b0..0f79bebeff9 100644
--- a/src/plugins/l3xc/l3xc_node.c
+++ b/src/plugins/l3xc/l3xc_node.c
@@ -199,7 +199,6 @@ static char *l3xc_error_strings[] = {
#undef l3xc_error
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l3xc_ip4_node) =
{
.function = l3xc_input_ip4,
@@ -245,7 +244,6 @@ VNET_FEATURE_INIT (l3xc_ip6_feat, static) =
.node_name = "l3xc-input-ip6",
.runs_after = VNET_FEATURES ("acl-plugin-in-ip6-fa"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lacp/cli.c b/src/plugins/lacp/cli.c
index fee9a5a2269..7cf97e09dc4 100644
--- a/src/plugins/lacp/cli.c
+++ b/src/plugins/lacp/cli.c
@@ -28,7 +28,6 @@ lacp_dump_ifs (lacp_interface_details_t ** out_lacpifs)
lacp_interface_details_t *r_lacpifs = NULL;
lacp_interface_details_t *lacpif = NULL;
- /* *INDENT-OFF* */
pool_foreach (mif, bm->neighbors) {
if (mif->lacp_enabled == 0)
continue;
@@ -61,7 +60,6 @@ lacp_dump_ifs (lacp_interface_details_t ** out_lacpifs)
lacpif->ptx_state = mif->ptx_state;
lacpif->mux_state = mif->mux_state;
}
- /* *INDENT-ON* */
*out_lacpifs = r_lacpifs;
@@ -309,14 +307,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_lacp_command, static) = {
.path = "show lacp",
.short_help = "show lacp [<interface>] [details]",
.function = show_lacp_fn,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
debug_lacp_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -384,13 +380,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (debug_lacp_command, static) = {
.path = "debug lacp",
.short_help = "debug lacp <interface> <on | off>",
.function = debug_lacp_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
lacp_cli_init (vlib_main_t * vm)
diff --git a/src/plugins/lacp/input.c b/src/plugins/lacp/input.c
index ebca2ad9185..5ccd1037fdb 100644
--- a/src/plugins/lacp/input.c
+++ b/src/plugins/lacp/input.c
@@ -16,7 +16,7 @@
#define _GNU_SOURCE
#include <vnet/bonding/node.h>
#include <lacp/node.h>
-#include <vpp/stats/stat_segment.h>
+#include <vlib/stats/stats.h>
static int
lacp_packet_scan (vlib_main_t * vm, member_if_t * mif)
@@ -155,7 +155,7 @@ lacp_input (vlib_main_t * vm, vlib_buffer_t * b0, u32 bi0)
{
mif->last_marker_pdu_recd_time = vlib_time_now (vm);
if (mif->last_marker_pkt)
- _vec_len (mif->last_marker_pkt) = 0;
+ vec_set_len (mif->last_marker_pkt, 0);
vec_validate (mif->last_marker_pkt,
vlib_buffer_length_in_chain (vm, b0) - 1);
nbytes = vlib_buffer_contents (vm, bi0, mif->last_marker_pkt);
@@ -176,7 +176,7 @@ lacp_input (vlib_main_t * vm, vlib_buffer_t * b0, u32 bi0)
* and reuse it.
*/
if (mif->last_rx_pkt)
- _vec_len (mif->last_rx_pkt) = 0;
+ vec_set_len (mif->last_rx_pkt, 0);
/*
* Make sure the per-neighbor rx buffer is big enough to hold
@@ -213,19 +213,19 @@ lacp_input (vlib_main_t * vm, vlib_buffer_t * b0, u32 bi0)
/* Actually scan the packet */
e = lacp_packet_scan (vm, mif);
bif = bond_get_bond_if_by_dev_instance (mif->bif_dev_instance);
- stat_segment_set_state_counter (bm->stats[bif->sw_if_index]
- [mif->sw_if_index].actor_state,
- mif->actor.state);
- stat_segment_set_state_counter (bm->stats[bif->sw_if_index]
- [mif->sw_if_index].partner_state,
- mif->partner.state);
+ vlib_stats_set_gauge (
+ bm->stats[bif->sw_if_index][mif->sw_if_index].actor_state,
+ mif->actor.state);
+ vlib_stats_set_gauge (
+ bm->stats[bif->sw_if_index][mif->sw_if_index].partner_state,
+ mif->partner.state);
mif->last_packet_signature_valid = 1;
mif->last_packet_signature = last_packet_signature;
}
mif->pdu_received++;
if (mif->last_rx_pkt)
- _vec_len (mif->last_rx_pkt) = 0;
+ vec_set_len (mif->last_rx_pkt, 0);
return e;
}
@@ -239,12 +239,10 @@ lacp_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (lacp_init) =
{
.runs_after = VLIB_INITS("lacp_periodic_init"),
};
-/* *INDENT-ON* */
/*
* packet trace format function, very similar to
diff --git a/src/plugins/lacp/lacp.c b/src/plugins/lacp/lacp.c
index 44a32aa23a9..ba66f7b245d 100644
--- a/src/plugins/lacp/lacp.c
+++ b/src/plugins/lacp/lacp.c
@@ -21,7 +21,7 @@
#include <vppinfra/hash.h>
#include <vnet/bonding/node.h>
#include <lacp/node.h>
-#include <vpp/stats/stat_segment.h>
+#include <vlib/stats/stats.h>
lacp_main_t lacp_main;
@@ -142,7 +142,6 @@ lacp_periodic (vlib_main_t * vm)
bond_if_t *bif;
u8 actor_state, partner_state;
- /* *INDENT-OFF* */
pool_foreach (mif, bm->neighbors)
{
if (mif->port_enabled == 0)
@@ -173,19 +172,18 @@ lacp_periodic (vlib_main_t * vm)
if (actor_state != mif->actor.state)
{
bif = bond_get_bond_if_by_dev_instance (mif->bif_dev_instance);
- stat_segment_set_state_counter (bm->stats[bif->sw_if_index]
- [mif->sw_if_index].actor_state,
- mif->actor.state);
+ vlib_stats_set_gauge (
+ bm->stats[bif->sw_if_index][mif->sw_if_index].actor_state,
+ mif->actor.state);
}
if (partner_state != mif->partner.state)
{
bif = bond_get_bond_if_by_dev_instance (mif->bif_dev_instance);
- stat_segment_set_state_counter (bm->stats[bif->sw_if_index]
- [mif->sw_if_index].partner_state,
- mif->partner.state);
+ vlib_stats_set_gauge (
+ bm->stats[bif->sw_if_index][mif->sw_if_index].partner_state,
+ mif->partner.state);
}
}
- /* *INDENT-ON* */
}
static void
@@ -216,12 +214,10 @@ lacp_interface_enable_disable (vlib_main_t * vm, bond_if_t * bif,
ASSERT (lm->lacp_int >= 1);
if (lm->lacp_int == 0)
{
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "lacp-int-en-dis: BUG lacp_int == 0",
};
- /* *INDENT-ON* */
ELOG_DATA (&vlib_global_main.elog_main, e);
}
else
@@ -380,12 +376,12 @@ lacp_init_state_machines (vlib_main_t * vm, member_if_t * mif)
lacp_init_mux_machine (vm, mif);
lacp_init_ptx_machine (vm, mif);
lacp_init_rx_machine (vm, mif);
- stat_segment_set_state_counter (bm->stats[bif->sw_if_index]
- [mif->sw_if_index].actor_state,
- mif->actor.state);
- stat_segment_set_state_counter (bm->stats[bif->sw_if_index]
- [mif->sw_if_index].partner_state,
- mif->partner.state);
+ vlib_stats_set_gauge (
+ bm->stats[bif->sw_if_index][mif->sw_if_index].actor_state,
+ mif->actor.state);
+ vlib_stats_set_gauge (
+ bm->stats[bif->sw_if_index][mif->sw_if_index].partner_state,
+ mif->partner.state);
}
VLIB_INIT_FUNCTION (lacp_periodic_init);
@@ -453,12 +449,10 @@ lacp_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lacp_hw_interface_up_down);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Link Aggregation Control Protocol (LACP)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lacp/lacp_api.c b/src/plugins/lacp/lacp_api.c
index fce2492be8c..cdf05aa370c 100644
--- a/src/plugins/lacp/lacp_api.c
+++ b/src/plugins/lacp/lacp_api.c
@@ -31,14 +31,11 @@
#include <lacp/lacp.api_enum.h>
#include <lacp/lacp.api_types.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-
-/* Macro to finish up custom dump fns */
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
#define REPLY_MSG_ID_BASE lm->msg_id_base
#include <vlibapi/api_helper_macros.h>
@@ -125,7 +122,8 @@ lacp_plugin_api_hookup (vlib_main_t * vm)
lm->msg_id_base = setup_message_id_table ();
/* Mark these APIs as mp safe */
- am->is_mp_safe[lm->msg_id_base + VL_API_SW_INTERFACE_LACP_DUMP] = 1;
+ vl_api_set_msg_thread_safe (
+ am, lm->msg_id_base + VL_API_SW_INTERFACE_LACP_DUMP, 1);
return 0;
}
diff --git a/src/plugins/lacp/lacp_doc.md b/src/plugins/lacp/lacp_doc.md
deleted file mode 100644
index 7df82b5689a..00000000000
--- a/src/plugins/lacp/lacp_doc.md
+++ /dev/null
@@ -1,104 +0,0 @@
-# VPP Link Aggregation Control Protocol (LACP) implementation {#lacp_plugin_doc}
-
-This document is to describe the usage of VPP LACP implementation.
-
-## LACP
-
-The Link Aggregation Control Protocol (LACP) is an 802.3ad standard which
-provides a protocol for exchanging information between Partner Systems on a
-link to allow their protocol instances to reach agreement on the Link Aggregation
-Group to which the link belongs and enable transmission and reception for the
-higher layer. Multiple links may be bundled to the same Aggregation Group to form
-a high bandwidth transmission medium and create a fault-tolerant link.
-
-
-### Configuration
-
-1. Create the bond interface
-create bond mode lacp [hw-addr <mac-address>] [load-balance { l2 | l23 | l34 } [numa-only]]
-
-2. Enslave the physical interface to the bond
-bond add <bond-interface-name> <slave-interface> [passive] [long-timeout]"
-
-3. Delete the bond interface
-delete bond {<interface> | sw_if_index <sw_idx>}
-
-4. Detach the slave interface from the bond
-bond del <slave-interface>
-
-### Configuration example
-
-```
-create bond mode lacp
-set interface state BondEthernet0 up
-bond add BondEthernet0 TenGigabitEthernet7/0/0
-bond add BondEthernet0 TenGigabitEthernet7/0/1
-bond add BondEthernet0 TenGigabitEthernet5/0/0
-bond add BondEthernet0 TenGigabitEthernet5/0/1
-```
-
-```
-bond del TenGigabitEthernet5/0/1
-```
-
-```
-delete bond BondEthernet0
-```
-
-### Operational data
-
-```
-show lacp [<interface>] [details]
-```
-
-Example:
-
-```
-DBGvpp# show lacp
- actor state partner state
-interface name sw_if_index bond interface exp/def/dis/col/syn/agg/tim/act exp/def/dis/col/syn/agg/tim/act
-GigabitEthernet2/0/1 1 BondEthernet0 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1
- LAG ID: [(ffff,e4-c7-22-f3-26-71,0000,00ff,0001), (ffff,fc-99-47-4a-0c-8b,0009,00ff,0001)]
- RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
-TenGigabitEthernet4/0/0 2 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1
- LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0001), (8000,00-2a-6a-e5-50-c1,0140,8000,011d)]
- RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
-TenGigabitEthernet4/0/1 3 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1
- LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0002), (8000,00-2a-6a-e5-50-c1,0140,8000,011e)]
- RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
-TenGigabitEthernet8/0/1 7 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1
- LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0003), (8000,00-2a-6a-e5-50-01,007a,8000,0114)]
- RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
-TenGigabitEthernet8/0/0 6 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1
- LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0004), (8000,00-2a-6a-e5-50-01,007a,8000,0115)]
- RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
-TenGigabitEthernet6/0/1 5 BondEthernet2 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1
- LAG ID: [(ffff,90-e2-ba-36-31-21,0002,00ff,0001), (ffff,90-e2-ba-29-f5-31,000f,00ff,0002)]
- RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
-TenGigabitEthernet6/0/0 4 BondEthernet2 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1
- LAG ID: [(ffff,90-e2-ba-36-31-21,0002,00ff,0002), (ffff,90-e2-ba-29-f5-31,000f,00ff,0001)]
- RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
-DBGvpp#
-```
-
-```
-show bond [details]
-````
-
-Example:
-
-```
-DBGvpp# show bond
-sh bond
-interface name sw_if_index mode load balance active slaves slaves
-BondEthernet0 10 lacp l2 1 1
-BondEthernet1 11 lacp l34 4 4
-BondEthernet2 12 lacp l23 2 2
-DBGvpp#
-```
-
-### Debugging
-
-```
-debug lacp [<interface>] <on | off>
-```
diff --git a/src/plugins/lacp/lacp_doc.rst b/src/plugins/lacp/lacp_doc.rst
new file mode 100644
index 00000000000..04b51ba22f8
--- /dev/null
+++ b/src/plugins/lacp/lacp_doc.rst
@@ -0,0 +1,109 @@
+LACP Protocol
+=============
+
+This document is to describe the usage of VPP Link Aggregation Control
+Protocol (LACP) implementation.
+
+LACP
+----
+
+The Link Aggregation Control Protocol (LACP) is an 802.3ad standard
+which provides a protocol for exchanging information between Partner
+Systems on a link to allow their protocol instances to reach agreement
+on the Link Aggregation Group to which the link belongs and enable
+transmission and reception for the higher layer. Multiple links may be
+bundled to the same Aggregation Group to form a high bandwidth
+transmission medium and create a fault-tolerant link.
+
+Configuration
+~~~~~~~~~~~~~
+
+1. Create the bond interface create bond mode lacp [hw-addr ]
+ [load-balance { l2 \| l23 \| l34 } [numa-only]]
+
+2. Enslave the physical interface to the bond bond add [passive]
+ [long-timeout]”
+
+3. Delete the bond interface delete bond { \| sw_if_index }
+
+4. Detach the slave interface from the bond bond del
+
+Configuration example
+~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ create bond mode lacp
+ set interface state BondEthernet0 up
+ bond add BondEthernet0 TenGigabitEthernet7/0/0
+ bond add BondEthernet0 TenGigabitEthernet7/0/1
+ bond add BondEthernet0 TenGigabitEthernet5/0/0
+ bond add BondEthernet0 TenGigabitEthernet5/0/1
+
+::
+
+ bond del TenGigabitEthernet5/0/1
+
+::
+
+ delete bond BondEthernet0
+
+Operational data
+~~~~~~~~~~~~~~~~
+
+::
+
+ show lacp [<interface>] [details]
+
+Example:
+
+::
+
+ DBGvpp# show lacp
+ actor state partner state
+ interface name sw_if_index bond interface exp/def/dis/col/syn/agg/tim/act exp/def/dis/col/syn/agg/tim/act
+ GigabitEthernet2/0/1 1 BondEthernet0 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1
+ LAG ID: [(ffff,e4-c7-22-f3-26-71,0000,00ff,0001), (ffff,fc-99-47-4a-0c-8b,0009,00ff,0001)]
+ RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+ TenGigabitEthernet4/0/0 2 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1
+ LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0001), (8000,00-2a-6a-e5-50-c1,0140,8000,011d)]
+ RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+ TenGigabitEthernet4/0/1 3 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1
+ LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0002), (8000,00-2a-6a-e5-50-c1,0140,8000,011e)]
+ RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+ TenGigabitEthernet8/0/1 7 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1
+ LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0003), (8000,00-2a-6a-e5-50-01,007a,8000,0114)]
+ RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+ TenGigabitEthernet8/0/0 6 BondEthernet1 0 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1
+ LAG ID: [(ffff,90-e2-ba-76-cf-2d,0001,00ff,0004), (8000,00-2a-6a-e5-50-01,007a,8000,0115)]
+ RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+ TenGigabitEthernet6/0/1 5 BondEthernet2 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1
+ LAG ID: [(ffff,90-e2-ba-36-31-21,0002,00ff,0001), (ffff,90-e2-ba-29-f5-31,000f,00ff,0002)]
+ RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+ TenGigabitEthernet6/0/0 4 BondEthernet2 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1
+ LAG ID: [(ffff,90-e2-ba-36-31-21,0002,00ff,0002), (ffff,90-e2-ba-29-f5-31,000f,00ff,0001)]
+ RX-state: CURRENT, TX-state: TRANSMIT, MUX-state: COLLECTING_DISTRIBUTING, PTX-state: PERIODIC_TX
+ DBGvpp#
+
+::
+
+ show bond [details]
+
+Example:
+
+::
+
+ DBGvpp# show bond
+ sh bond
+ interface name sw_if_index mode load balance active slaves slaves
+ BondEthernet0 10 lacp l2 1 1
+ BondEthernet1 11 lacp l34 4 4
+ BondEthernet2 12 lacp l23 2 2
+ DBGvpp#
+
+Debugging
+~~~~~~~~~
+
+::
+
+ debug lacp [<interface>] <on | off>
diff --git a/src/plugins/lacp/lacp_test.c b/src/plugins/lacp/lacp_test.c
index 3a5e6351693..e5078520fd3 100644
--- a/src/plugins/lacp/lacp_test.c
+++ b/src/plugins/lacp/lacp_test.c
@@ -31,7 +31,7 @@
#include <vnet/format_fns.h>
#include <lacp/lacp.api_enum.h>
#include <lacp/lacp.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
diff --git a/src/plugins/lacp/mux_machine.c b/src/plugins/lacp/mux_machine.c
index 974dbd9dac9..ee43894cd53 100644
--- a/src/plugins/lacp/mux_machine.c
+++ b/src/plugins/lacp/mux_machine.c
@@ -208,13 +208,11 @@ lacp_mux_debug_func (member_if_t * mif, int event, int state,
lacp_fsm_state_t * transition)
{
vlib_worker_thread_t *w = vlib_worker_threads + os_get_thread_index ();
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "%s",
.format_args = "T4",
};
- /* *INDENT-ON* */
struct
{
u32 event;
diff --git a/src/plugins/lacp/node.c b/src/plugins/lacp/node.c
index 4f78880c741..4426dafab5b 100644
--- a/src/plugins/lacp/node.c
+++ b/src/plugins/lacp/node.c
@@ -33,7 +33,7 @@ lacp_state_struct lacp_state_array[] = {
The interior node is neither pipelined nor dual-looped, because
it would be very unusual to see more than one LACP packet in
- a given input frame. So, it's a very simple / straighforward
+ a given input frame. So, it's a very simple / straightforward
example.
*/
@@ -112,7 +112,6 @@ lacp_node_fn (vlib_main_t * vm,
/*
* lacp input graph node declaration
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lacp_input_node, static) = {
.function = lacp_node_fn,
.name = "lacp-input",
@@ -129,19 +128,16 @@ VLIB_REGISTER_NODE (lacp_input_node, static) = {
[LACP_INPUT_NEXT_NORMAL] = "error-drop",
},
};
-/* *INDENT-ON* */
static void
lacp_elog_start_event (void)
{
lacp_main_t *lm = &lacp_main;
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "Starting LACP process, interface count = %d",
.format_args = "i4",
};
- /* *INDENT-ON* */
struct
{
u32 count;
@@ -155,13 +151,11 @@ static void
lacp_elog_stop_event (void)
{
lacp_main_t *lm = &lacp_main;
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "Stopping LACP process, interface count = %d",
.format_args = "i4",
};
- /* *INDENT-ON* */
struct
{
u32 count;
diff --git a/src/plugins/lacp/ptx_machine.c b/src/plugins/lacp/ptx_machine.c
index bb9d033c13a..92a99c920e9 100644
--- a/src/plugins/lacp/ptx_machine.c
+++ b/src/plugins/lacp/ptx_machine.c
@@ -195,13 +195,11 @@ lacp_ptx_debug_func (member_if_t * mif, int event, int state,
lacp_fsm_state_t * transition)
{
vlib_worker_thread_t *w = vlib_worker_threads + os_get_thread_index ();
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "%s",
.format_args = "T4",
};
- /* *INDENT-ON* */
struct
{
u32 event;
diff --git a/src/plugins/lacp/rx_machine.c b/src/plugins/lacp/rx_machine.c
index 2fadbe636cf..9c161b02078 100644
--- a/src/plugins/lacp/rx_machine.c
+++ b/src/plugins/lacp/rx_machine.c
@@ -343,7 +343,6 @@ lacp_port_is_moved (vlib_main_t * vm, member_if_t * mif)
member_if_t *mif2;
lacp_pdu_t *lacpdu = (lacp_pdu_t *) mif->last_rx_pkt;
- /* *INDENT-OFF* */
pool_foreach (mif2, bm->neighbors) {
{
if ((mif != mif2) && (mif2->rx_state == LACP_RX_STATE_PORT_DISABLED) &&
@@ -353,7 +352,6 @@ lacp_port_is_moved (vlib_main_t * vm, member_if_t * mif)
return 1;
}
}
- /* *INDENT-ON* */
return 0;
}
@@ -400,13 +398,11 @@ lacp_rx_debug_func (member_if_t * mif, int event, int state,
lacp_fsm_state_t * transition)
{
vlib_worker_thread_t *w = vlib_worker_threads + os_get_thread_index ();
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "%s",
.format_args = "T4",
};
- /* *INDENT-ON* */
struct
{
u32 event;
diff --git a/src/plugins/lacp/tx_machine.c b/src/plugins/lacp/tx_machine.c
index 1eb3bc1c4b7..c36f44c07ce 100644
--- a/src/plugins/lacp/tx_machine.c
+++ b/src/plugins/lacp/tx_machine.c
@@ -84,13 +84,11 @@ lacp_tx_debug_func (member_if_t * mif, int event, int state,
lacp_fsm_state_t * transition)
{
vlib_worker_thread_t *w = vlib_worker_threads + os_get_thread_index ();
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "%s",
.format_args = "T4",
};
- /* *INDENT-ON* */
struct
{
u32 event;
diff --git a/src/plugins/lb/api.c b/src/plugins/lb/api.c
index e44f815cb9c..ea2e482135b 100644
--- a/src/plugins/lb/api.c
+++ b/src/plugins/lb/api.c
@@ -30,17 +30,15 @@
#include <lb/lb.api_enum.h>
#include <lb/lb.api_types.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define REPLY_MSG_ID_BASE lbm->msg_id_base
#include <vlibapi/api_helper_macros.h>
-/* Macro to finish up custom dump fns */
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
static void
vl_api_lb_conf_t_handler
@@ -72,7 +70,7 @@ vl_api_lb_add_del_vip_t_handler
lb_main_t *lbm = &lb_main;
vl_api_lb_conf_reply_t * rmp;
int rv = 0;
- lb_vip_add_args_t args;
+ lb_vip_add_args_t args = {};
/* if port == 0, it means all-port VIP */
if (mp->port == 0)
@@ -130,6 +128,80 @@ vl_api_lb_add_del_vip_t_handler
}
static void
+vl_api_lb_add_del_vip_v2_t_handler (vl_api_lb_add_del_vip_v2_t *mp)
+{
+ lb_main_t *lbm = &lb_main;
+ vl_api_lb_conf_reply_t *rmp;
+ int rv = 0;
+ lb_vip_add_args_t args = {};
+
+ /* if port == 0, it means all-port VIP */
+ if (mp->port == 0)
+ {
+ mp->protocol = ~0;
+ }
+
+ ip_address_decode (&mp->pfx.address, &(args.prefix));
+
+ if (mp->is_del)
+ {
+ u32 vip_index;
+ if (!(rv = lb_vip_find_index (&(args.prefix), mp->pfx.len, mp->protocol,
+ ntohs (mp->port), &vip_index)))
+ rv = lb_vip_del (vip_index);
+ }
+ else
+ {
+ u32 vip_index;
+ lb_vip_type_t type = 0;
+
+ if (ip46_prefix_is_ip4 (&(args.prefix), mp->pfx.len))
+ {
+ if (mp->encap == LB_API_ENCAP_TYPE_GRE4)
+ type = LB_VIP_TYPE_IP4_GRE4;
+ else if (mp->encap == LB_API_ENCAP_TYPE_GRE6)
+ type = LB_VIP_TYPE_IP4_GRE6;
+ else if (mp->encap == LB_API_ENCAP_TYPE_L3DSR)
+ type = LB_VIP_TYPE_IP4_L3DSR;
+ else if (mp->encap == LB_API_ENCAP_TYPE_NAT4)
+ type = LB_VIP_TYPE_IP4_NAT4;
+ }
+ else
+ {
+ if (mp->encap == LB_API_ENCAP_TYPE_GRE4)
+ type = LB_VIP_TYPE_IP6_GRE4;
+ else if (mp->encap == LB_API_ENCAP_TYPE_GRE6)
+ type = LB_VIP_TYPE_IP6_GRE6;
+ else if (mp->encap == LB_API_ENCAP_TYPE_NAT6)
+ type = LB_VIP_TYPE_IP6_NAT6;
+ }
+
+ args.plen = mp->pfx.len;
+ args.protocol = mp->protocol;
+ args.port = ntohs (mp->port);
+ args.type = type;
+ args.new_length = ntohl (mp->new_flows_table_length);
+
+ if (mp->src_ip_sticky)
+ args.src_ip_sticky = 1;
+
+ if (mp->encap == LB_API_ENCAP_TYPE_L3DSR)
+ {
+ args.encap_args.dscp = (u8) (mp->dscp & 0x3F);
+ }
+ else if ((mp->encap == LB_API_ENCAP_TYPE_NAT4) ||
+ (mp->encap == LB_API_ENCAP_TYPE_NAT6))
+ {
+ args.encap_args.srv_type = mp->type;
+ args.encap_args.target_port = ntohs (mp->target_port);
+ }
+
+ rv = lb_vip_add (args, &vip_index);
+ }
+ REPLY_MACRO (VL_API_LB_ADD_DEL_VIP_V2_REPLY);
+}
+
+static void
vl_api_lb_add_del_as_t_handler
(vl_api_lb_add_del_as_t * mp)
{
@@ -211,7 +283,6 @@ static void send_lb_as_details
lb_main_t *lbm = &lb_main;
int msg_size = 0;
u32 *as_index;
- u32 asindex = 0;
/* construct as list under this vip */
lb_as_t *as;
@@ -235,7 +306,6 @@ static void send_lb_as_details
rmp->in_use_since = htonl(as->last_used);
vl_api_send_msg (reg, (u8 *) rmp);
- asindex++;
}
}
@@ -260,7 +330,6 @@ vl_api_lb_as_dump_t_handler
dump_all = (prefix.ip6.as_u64[0] == 0) && (prefix.ip6.as_u64[1] == 0);
- /* *INDENT-OFF* */
pool_foreach (vip, lbm->vips)
{
if ( dump_all
@@ -272,7 +341,6 @@ vl_api_lb_as_dump_t_handler
send_lb_as_details(reg, mp->context, vip);
}
}
- /* *INDENT-ON* */
}
static void
diff --git a/src/plugins/lb/cli.c b/src/plugins/lb/cli.c
index 7b5dc5c8549..afa73ef616c 100644
--- a/src/plugins/lb/cli.c
+++ b/src/plugins/lb/cli.c
@@ -32,6 +32,7 @@ lb_vip_command_fn (vlib_main_t * vm,
clib_error_t *error = 0;
args.new_length = 1024;
+ args.src_ip_sticky = 0;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -49,6 +50,8 @@ lb_vip_command_fn (vlib_main_t * vm,
;
else if (unformat(line_input, "del"))
del = 1;
+ else if (unformat (line_input, "src_ip_sticky"))
+ args.src_ip_sticky = 1;
else if (unformat(line_input, "protocol tcp"))
{
args.protocol = (u8)IP_PROTOCOL_TCP;
@@ -177,6 +180,7 @@ done:
return error;
}
+/* clang-format off */
VLIB_CLI_COMMAND (lb_vip_command, static) =
{
.path = "lb vip",
@@ -185,9 +189,10 @@ VLIB_CLI_COMMAND (lb_vip_command, static) =
"[encap (gre6|gre4|l3dsr|nat4|nat6)] "
"[dscp <n>] "
"[type (nodeport|clusterip) target_port <n>] "
- "[new_len <n>] [del]",
+ "[new_len <n>] [src_ip_sticky] [del]",
.function = lb_vip_command_fn,
};
+/* clang-format on */
static clib_error_t *
lb_as_command_fn (vlib_main_t * vm,
@@ -442,24 +447,22 @@ lb_set_interface_nat_command_fn (vlib_main_t * vm,
{
if (lb_nat4_interface_add_del (*sw_if_index, is_del))
{
- error = clib_error_return(
- 0, "%s %U failed", is_del ? "del" : "add",
- format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface (vnm, *sw_if_index));
- goto done;
- }
- }
+ error = clib_error_return (
+ 0, "%s %U failed", is_del ? "del" : "add",
+ format_vnet_sw_if_index_name, vnm, *sw_if_index);
+ goto done;
+ }
+ }
else
{
if (lb_nat6_interface_add_del (*sw_if_index, is_del))
{
- error = clib_error_return(
- 0, "%s %U failed", is_del ? "del" : "add",
- format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface (vnm, *sw_if_index));
- goto done;
- }
- }
+ error = clib_error_return (
+ 0, "%s %U failed", is_del ? "del" : "add",
+ format_vnet_sw_if_index_name, vnm, *sw_if_index);
+ goto done;
+ }
+ }
}
done:
diff --git a/src/plugins/lb/lb.api b/src/plugins/lb/lb.api
index 4bf30e76b59..96f047ddbc2 100644
--- a/src/plugins/lb/lb.api
+++ b/src/plugins/lb/lb.api
@@ -1,4 +1,4 @@
-option version = "1.0.0";
+option version = "1.1.0";
import "plugins/lb/lb_types.api";
import "vnet/interface_types.api";
@@ -54,6 +54,39 @@ autoreply define lb_add_del_vip {
option vat_help = "<prefix> [protocol (tcp|udp) port <n>] [encap (gre6|gre4|l3dsr|nat4|nat6)] [dscp <n>] [type (nodeport|clusterip) target_port <n>] [new_len <n>] [del]";
};
+/** \brief Add a virtual address (or prefix)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param pfx - ip prefix and length
+ @param protocol - tcp or udp.
+ @param port - destination port. (0) means 'all-port VIP'
+ @param encap - Encap is ip4 GRE(0) or ip6 GRE(1) or L3DSR(2) or NAT4(3) or NAT6(4).
+ @param dscp - DSCP bit corresponding to VIP(applicable in L3DSR mode only).
+ @param type - service type(applicable in NAT4/NAT6 mode only).
+ @param target_port - Pod's port corresponding to specific service(applicable in NAT4/NAT6 mode only).
+ @param node_port - Node's port(applicable in NAT4/NAT6 mode only).
+ @param new_flows_table_length - Size of the new connections flow table used
+ for this VIP (must be power of 2).
+ @param src_ip_sticky - source ip based sticky session.
+ @param is_del - The VIP should be removed.
+*/
+autoreply define lb_add_del_vip_v2 {
+ u32 client_index;
+ u32 context;
+ vl_api_address_with_prefix_t pfx;
+ u8 protocol [default=255];
+ u16 port;
+ vl_api_lb_encap_type_t encap;
+ u8 dscp;
+ vl_api_lb_srv_type_t type ; /* LB_API_SRV_TYPE_CLUSTERIP */
+ u16 target_port;
+ u16 node_port;
+ u32 new_flows_table_length [default=1024];
+ bool src_ip_sticky;
+ bool is_del;
+ option vat_help = "<prefix> [protocol (tcp|udp) port <n>] [encap (gre6|gre4|l3dsr|nat4|nat6)] [dscp <n>] [type (nodeport|clusterip) target_port <n>] [new_len <n>] [src_ip_sticky] [del]";
+};
+
/** \brief Add an application server for a given VIP
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/plugins/lb/lb.c b/src/plugins/lb/lb.c
index 6fc7f0f92b2..7ae1884ff31 100644
--- a/src/plugins/lb/lb.c
+++ b/src/plugins/lb/lb.c
@@ -198,15 +198,18 @@ u8 *format_lb_vip_detailed (u8 * s, va_list * args)
lb_vip_t *vip = va_arg (*args, lb_vip_t *);
u32 indent = format_get_indent (s);
- s = format(s, "%U %U [%lu] %U%s\n"
+ /* clang-format off */
+ s = format(s, "%U %U [%lu] %U%s%s\n"
"%U new_size:%u\n",
format_white_space, indent,
format_lb_vip_type, vip->type,
vip - lbm->vips,
format_ip46_prefix, &vip->prefix, (u32) vip->plen, IP46_TYPE_ANY,
+ lb_vip_is_src_ip_sticky (vip) ? " src_ip_sticky" : "",
(vip->flags & LB_VIP_FLAGS_USED)?"":" removed",
format_white_space, indent,
vip->new_flow_table_mask + 1);
+ /* clang-format on */
if (vip->port != 0)
{
@@ -370,9 +373,9 @@ void lb_garbage_collection()
}
vec_foreach(i, to_be_removed_vips) {
- vip = &lbm->vips[*i];
- pool_put(lbm->vips, vip);
- pool_free(vip->as_indexes);
+ vip = &lbm->vips[*i];
+ pool_free (vip->as_indexes);
+ pool_put (lbm->vips, vip);
}
vec_free(to_be_removed_vips);
@@ -411,7 +414,7 @@ out:
}
//First, let's sort the ASs
- vec_alloc(sort_arr, pool_elts(vip->as_indexes));
+ vec_validate (sort_arr, pool_elts (vip->as_indexes) - 1);
i = 0;
pool_foreach (as_index, vip->as_indexes) {
@@ -422,7 +425,7 @@ out:
sort_arr[i].as_index = as - lbm->ass;
i++;
}
- _vec_len(sort_arr) = i;
+ vec_set_len (sort_arr, i);
vec_sort_with_function(sort_arr, lb_pseudorand_compare);
@@ -1147,6 +1150,10 @@ int lb_vip_add(lb_vip_add_args_t args, u32 *vip_index)
}
vip->flags = LB_VIP_FLAGS_USED;
+ if (args.src_ip_sticky)
+ {
+ vip->flags |= LB_VIP_FLAGS_SRC_IP_STICKY;
+ }
vip->as_indexes = 0;
//Validate counters
@@ -1249,12 +1256,10 @@ int lb_vip_del(u32 vip_index)
return rv;
}
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Load Balancer (LB)",
};
-/* *INDENT-ON* */
u8 *format_lb_dpo (u8 * s, va_list * va)
{
@@ -1412,7 +1417,7 @@ lb_init (vlib_main_t * vm)
lb_dpo_nat4_port_nodes);
lbm->dpo_nat6_port_type = dpo_register_new_type(&lb_vft,
lb_dpo_nat6_port_nodes);
- lbm->fib_node_type = fib_node_register_new_type(&lb_fib_node_vft);
+ lbm->fib_node_type = fib_node_register_new_type ("lb", &lb_fib_node_vft);
//Init AS reference counters
vlib_refcount_init(&lbm->as_refcount);
diff --git a/src/plugins/lb/lb.h b/src/plugins/lb/lb.h
index ebbb1f6f8f0..46da40970c9 100644
--- a/src/plugins/lb/lb.h
+++ b/src/plugins/lb/lb.h
@@ -22,7 +22,7 @@
* The load-balancer receives traffic destined to VIP (Virtual IP)
* addresses from one or multiple(ECMP) routers.
* The load-balancer tunnels the traffic toward many application servers
- * ensuring session stickyness (i.e. that a single sessions is tunneled
+ * ensuring session stickiness (i.e. that a single sessions is tunneled
* towards a single application server).
*
*/
@@ -324,6 +324,7 @@ typedef struct {
*/
u8 flags;
#define LB_VIP_FLAGS_USED 0x1
+#define LB_VIP_FLAGS_SRC_IP_STICKY 0x2
/**
* Pool of AS indexes used for this VIP.
@@ -346,11 +347,14 @@ typedef struct {
|| (vip)->type == LB_VIP_TYPE_IP4_L3DSR \
|| (vip)->type == LB_VIP_TYPE_IP4_NAT4 )
+#define lb_vip_is_src_ip_sticky(vip) \
+ (((vip)->flags & LB_VIP_FLAGS_SRC_IP_STICKY) != 0)
+
+/* clang-format off */
#define lb_vip_is_gre4(vip) (((vip)->type == LB_VIP_TYPE_IP6_GRE4 \
|| (vip)->type == LB_VIP_TYPE_IP4_GRE4) \
&& ((vip)->port == 0))
-
#define lb_vip_is_gre6(vip) (((vip)->type == LB_VIP_TYPE_IP6_GRE6 \
|| (vip)->type == LB_VIP_TYPE_IP4_GRE6) \
&& ((vip)->port == 0))
@@ -362,27 +366,28 @@ typedef struct {
#define lb_vip_is_gre6_port(vip) (((vip)->type == LB_VIP_TYPE_IP6_GRE6 \
|| (vip)->type == LB_VIP_TYPE_IP4_GRE6) \
&& ((vip)->port != 0))
+/* clang-format on */
always_inline bool
lb_vip_is_l3dsr(const lb_vip_t *vip)
{
- return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port ==0);
+ return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port == 0);
}
always_inline bool
lb_vip_is_l3dsr_port(const lb_vip_t *vip)
{
- return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port !=0);
+ return (vip->type == LB_VIP_TYPE_IP4_L3DSR && vip->port != 0);
}
always_inline bool
lb_vip_is_nat4_port(const lb_vip_t *vip)
{
- return (vip->type == LB_VIP_TYPE_IP4_NAT4 && vip->port !=0);
+ return (vip->type == LB_VIP_TYPE_IP4_NAT4 && vip->port != 0);
}
always_inline bool
lb_vip_is_nat6_port(const lb_vip_t *vip)
{
- return (vip->type == LB_VIP_TYPE_IP6_NAT6 && vip->port !=0);
+ return (vip->type == LB_VIP_TYPE_IP6_NAT6 && vip->port != 0);
}
format_function_t format_lb_vip;
@@ -575,6 +580,7 @@ typedef struct {
u8 plen;
u8 protocol;
u16 port;
+ u8 src_ip_sticky;
lb_vip_type_t type;
u32 new_length;
lb_vip_encap_args_t encap_args;
diff --git a/src/plugins/lb/lb_plugin_doc.md b/src/plugins/lb/lb_plugin_doc.md
deleted file mode 100644
index 5f6538974e9..00000000000
--- a/src/plugins/lb/lb_plugin_doc.md
+++ /dev/null
@@ -1,192 +0,0 @@
-# Load Balancer plugin for VPP {#lb_plugin_doc}
-
-## Version
-
-The load balancer plugin is currently in *beta* version.
-Both CLIs and APIs are subject to *heavy* changes,
-which also means feedback is really welcome regarding features, apis, etc...
-
-## Overview
-
-This plugin provides load balancing for VPP in a way that is largely inspired
-from Google's MagLev: http://research.google.com/pubs/pub44824.html
-
-The load balancer is configured with a set of Virtual IPs (VIP, which can be
-prefixes), and for each VIP, with a set of Application Server addresses (ASs).
-
-There are four encap types to steer traffic to different ASs:
-1). IPv4+GRE ad IPv6+GRE encap types:
-Traffic received for a given VIP (or VIP prefix) is tunneled using GRE towards
-the different ASs in a way that (tries to) ensure that a given session will
-always be tunneled to the same AS.
-
-2). IPv4+L3DSR encap types:
-L3DSR is used to overcome Layer 2 limitations of Direct Server Return Load Balancing.
-It maps VIP to DSCP bits, and reuse TOS bits to transfer DSCP bits
-to server, and then server will get VIP from DSCP-to-VIP mapping.
-
-Both VIPs or ASs can be IPv4 or IPv6, but for a given VIP, all ASs must be using
-the same encap. type (i.e. IPv4+GRE or IPv6+GRE or IPv4+L3DSR).
-Meaning that for a given VIP, all AS addresses must be of the same family.
-
-3). IPv4/IPv6 + NAT4/NAT6 encap types:
-This type provides kube-proxy data plane on user space,
-which is used to replace linux kernel's kube-proxy based on iptables.
-
-Currently, load balancer plugin supports three service types:
-a) Cluster IP plus Port: support any protocols, including TCP, UDP.
-b) Node IP plus Node Port: currently only support UDP.
-c) External Load Balancer.
-
-For Cluster IP plus Port case:
-kube-proxy is configured with a set of Virtual IPs (VIP, which can be
-prefixes), and for each VIP, with a set of AS addresses (ASs).
-
-For a specific session received for a given VIP (or VIP prefix),
-first packet selects a AS according to internal load balancing algorithm,
-then does DNAT operation and sent to chosen AS.
-At the same time, will create a session entry to store AS chosen result.
-Following packets for that session will look up session table first,
-which ensures that a given session will always be routed to the same AS.
-
-For returned packet from AS, it will do SNAT operation and sent out.
-
-Please refer to below for details:
-https://schd.ws/hosted_files/ossna2017/1e/VPP_K8S_GTPU_OSSNA.pdf
-
-
-## Performance
-
-The load balancer has been tested up to 1 millions flows and still forwards more
-than 3Mpps per core in such circumstances.
-Although 3Mpps seems already good, it is likely that performance will be improved
-in next versions.
-
-## Configuration
-
-### Global LB parameters
-
-The load balancer needs to be configured with some parameters:
-
- lb conf [ip4-src-address <addr>] [ip6-src-address <addr>]
- [buckets <n>] [timeout <s>]
-
-ip4-src-address: the source address used to send encap. packets using IPv4 for GRE4 mode.
- or Node IP4 address for NAT4 mode.
-
-ip6-src-address: the source address used to send encap. packets using IPv6 for GRE6 mode.
- or Node IP6 address for NAT6 mode.
-
-buckets: the *per-thread* established-connections-table number of buckets.
-
-timeout: the number of seconds a connection will remain in the
- established-connections-table while no packet for this flow
- is received.
-
-### Configure the VIPs
-
- lb vip <prefix> [encap (gre6|gre4|l3dsr|nat4|nat6)] \
- [dscp <n>] [port <n> target_port <n> node_port <n>] [new_len <n>] [del]
-
-new_len is the size of the new-connection-table. It should be 1 or 2 orders of
-magnitude bigger than the number of ASs for the VIP in order to ensure a good
-load balancing.
-Encap l3dsr and dscp is used to map VIP to dscp bit and rewrite DSCP bit in packets.
-So the selected server could get VIP from DSCP bit in this packet and perform DSR.
-Encap nat4/nat6 and port/target_port/node_port is used to do kube-proxy data plane.
-
-Examples:
-
- lb vip 2002::/16 encap gre6 new_len 1024
- lb vip 2003::/16 encap gre4 new_len 2048
- lb vip 80.0.0.0/8 encap gre6 new_len 16
- lb vip 90.0.0.0/8 encap gre4 new_len 1024
- lb vip 100.0.0.0/8 encap l3dsr dscp 2 new_len 32
- lb vip 90.1.2.1/32 encap nat4 port 3306 target_port 3307 node_port 30964 new_len 1024
- lb vip 2004::/16 encap nat6 port 6306 target_port 6307 node_port 30966 new_len 1024
-
-### Configure the ASs (for each VIP)
-
- lb as <vip-prefix> [<address> [<address> [...]]] [del]
-
-You can add (or delete) as many ASs at a time (for a single VIP).
-Note that the AS address family must correspond to the VIP encap. IP family.
-
-Examples:
-
- lb as 2002::/16 2001::2 2001::3 2001::4
- lb as 2003::/16 10.0.0.1 10.0.0.2
- lb as 80.0.0.0/8 2001::2
- lb as 90.0.0.0/8 10.0.0.1
-
-### Configure SNAT
-
- lb set interface nat4 in <intfc> [del]
-
-Set SNAT feature in a specific interface.
-(applicable in NAT4 mode only)
-
- lb set interface nat6 in <intfc> [del]
-
-Set SNAT feature in a specific interface.
-(applicable in NAT6 mode only)
-
-## Monitoring
-
-The plugin provides quite a bunch of counters and information.
-These are still subject to quite significant changes.
-
- show lb
- show lb vip
- show lb vip verbose
-
- show node counters
-
-
-## Design notes
-
-### Multi-Threading
-
-MagLev is a distributed system which pseudo-randomly generates a
-new-connections-table based on AS names such that each server configured with
-the same set of ASs ends up with the same table. Connection stickyness is then
-ensured with an established-connections-table. Using ECMP, it is assumed (but
-not relied on) that servers will mostly receive traffic for different flows.
-
-This implementation pushes the parallelism a little bit further by using
-one established-connections table per thread. This is equivalent to assuming
-that RSS will make a job similar to ECMP, and is pretty useful as threads don't
-need to get a lock in order to write in the table.
-
-### Hash Table
-
-A load balancer requires an efficient read and write hash table. The hash table
-used by ip6-forward is very read-efficient, but not so much for writing. In
-addition, it is not a big deal if writing into the hash table fails (again,
-MagLev uses a flow table but does not heaviliy relies on it).
-
-The plugin therefore uses a very specific (and stupid) hash table.
- - Fixed (and power of 2) number of buckets (configured at runtime)
- - Fixed (and power of 2) elements per buckets (configured at compilation time)
-
-### Reference counting
-
-When an AS is removed, there is two possible ways to react.
- - Keep using the AS for established connections
- - Change AS for established connections (likely to cause error for TCP)
-
-In the first case, although an AS is removed from the configuration, its
-associated state needs to stay around as long as it is used by at least one
-thread.
-
-In order to avoid locks, a specific reference counter is used. The design is quite
-similar to clib counters but:
- - It is possible to decrease the value
- - Summing will not zero the per-thread counters
- - Only the thread can reallocate its own counters vector (to avoid concurrency issues)
-
-This reference counter is lock free, but reading a count of 0 does not mean
-the value can be freed unless it is ensured by *other* means that no other thread
-is concurrently referencing the object. In the case of this plugin, it is assumed
-that no concurrent event will take place after a few seconds.
-
diff --git a/src/plugins/lb/lb_plugin_doc.rst b/src/plugins/lb/lb_plugin_doc.rst
new file mode 100644
index 00000000000..603453e7848
--- /dev/null
+++ b/src/plugins/lb/lb_plugin_doc.rst
@@ -0,0 +1,223 @@
+Load Balancer plugin
+====================
+
+Version
+-------
+
+The load balancer plugin is currently in *beta* version. Both CLIs and
+APIs are subject to *heavy* changes, which also means feedback is really
+welcome regarding features, apis, etc…
+
+Overview
+--------
+
+This plugin provides load balancing for VPP in a way that is largely
+inspired from Google’s MagLev:
+http://research.google.com/pubs/pub44824.html
+
+The load balancer is configured with a set of Virtual IPs (VIP, which
+can be prefixes), and for each VIP, with a set of Application Server
+addresses (ASs).
+
+There are four encap types to steer traffic to different ASs: 1).
+IPv4+GRE ad IPv6+GRE encap types: Traffic received for a given VIP (or
+VIP prefix) is tunneled using GRE towards the different ASs in a way
+that (tries to) ensure that a given session will always be tunneled to
+the same AS.
+
+2). IPv4+L3DSR encap types: L3DSR is used to overcome Layer 2
+limitations of Direct Server Return Load Balancing. It maps VIP to DSCP
+bits, and reuse TOS bits to transfer DSCP bits to server, and then
+server will get VIP from DSCP-to-VIP mapping.
+
+Both VIPs or ASs can be IPv4 or IPv6, but for a given VIP, all ASs must
+be using the same encap. type (i.e. IPv4+GRE or IPv6+GRE or IPv4+L3DSR).
+Meaning that for a given VIP, all AS addresses must be of the same
+family.
+
+3). IPv4/IPv6 + NAT4/NAT6 encap types: This type provides kube-proxy
+data plane on user space, which is used to replace linux kernel’s
+kube-proxy based on iptables.
+
+Currently, load balancer plugin supports three service types: a) Cluster
+IP plus Port: support any protocols, including TCP, UDP. b) Node IP plus
+Node Port: currently only support UDP. c) External Load Balancer.
+
+For Cluster IP plus Port case: kube-proxy is configured with a set of
+Virtual IPs (VIP, which can be prefixes), and for each VIP, with a set
+of AS addresses (ASs).
+
+For a specific session received for a given VIP (or VIP prefix), first
+packet selects a AS according to internal load balancing algorithm, then
+does DNAT operation and sent to chosen AS. At the same time, will create
+a session entry to store AS chosen result. Following packets for that
+session will look up session table first, which ensures that a given
+session will always be routed to the same AS.
+
+For returned packet from AS, it will do SNAT operation and sent out.
+
+Please refer to below for details:
+https://schd.ws/hosted_files/ossna2017/1e/VPP_K8S_GTPU_OSSNA.pdf
+
+Performance
+-----------
+
+The load balancer has been tested up to 1 millions flows and still
+forwards more than 3Mpps per core in such circumstances. Although 3Mpps
+seems already good, it is likely that performance will be improved in
+next versions.
+
+Configuration
+-------------
+
+Global LB parameters
+~~~~~~~~~~~~~~~~~~~~
+
+The load balancer needs to be configured with some parameters:
+
+::
+
+ lb conf [ip4-src-address <addr>] [ip6-src-address <addr>]
+ [buckets <n>] [timeout <s>]
+
+ip4-src-address: the source address used to send encap. packets using
+IPv4 for GRE4 mode. or Node IP4 address for NAT4 mode.
+
+ip6-src-address: the source address used to send encap. packets using
+IPv6 for GRE6 mode. or Node IP6 address for NAT6 mode.
+
+buckets: the *per-thread* established-connections-table number of
+buckets.
+
+timeout: the number of seconds a connection will remain in the
+established-connections-table while no packet for this flow is received.
+
+Configure the VIPs
+~~~~~~~~~~~~~~~~~~
+
+::
+
+ lb vip <prefix> [encap (gre6|gre4|l3dsr|nat4|nat6)] \
+ [dscp <n>] [port <n> target_port <n> node_port <n>] [new_len <n>] [del]
+
+new_len is the size of the new-connection-table. It should be 1 or 2
+orders of magnitude bigger than the number of ASs for the VIP in order
+to ensure a good load balancing. Encap l3dsr and dscp is used to map VIP
+to dscp bit and rewrite DSCP bit in packets. So the selected server
+could get VIP from DSCP bit in this packet and perform DSR. Encap
+nat4/nat6 and port/target_port/node_port is used to do kube-proxy data
+plane.
+
+Examples:
+
+::
+
+ lb vip 2002::/16 encap gre6 new_len 1024
+ lb vip 2003::/16 encap gre4 new_len 2048
+ lb vip 80.0.0.0/8 encap gre6 new_len 16
+ lb vip 90.0.0.0/8 encap gre4 new_len 1024
+ lb vip 100.0.0.0/8 encap l3dsr dscp 2 new_len 32
+ lb vip 90.1.2.1/32 encap nat4 port 3306 target_port 3307 node_port 30964 new_len 1024
+ lb vip 2004::/16 encap nat6 port 6306 target_port 6307 node_port 30966 new_len 1024
+
+Configure the ASs (for each VIP)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ lb as <vip-prefix> [<address> [<address> [...]]] [del]
+
+You can add (or delete) as many ASs at a time (for a single VIP). Note
+that the AS address family must correspond to the VIP encap. IP family.
+
+Examples:
+
+::
+
+ lb as 2002::/16 2001::2 2001::3 2001::4
+ lb as 2003::/16 10.0.0.1 10.0.0.2
+ lb as 80.0.0.0/8 2001::2
+ lb as 90.0.0.0/8 10.0.0.1
+
+Configure SNAT
+~~~~~~~~~~~~~~
+
+::
+
+ lb set interface nat4 in <intfc> [del]
+
+Set SNAT feature in a specific interface. (applicable in NAT4 mode only)
+
+::
+
+ lb set interface nat6 in <intfc> [del]
+
+Set SNAT feature in a specific interface. (applicable in NAT6 mode only)
+
+Monitoring
+----------
+
+The plugin provides quite a bunch of counters and information. These are
+still subject to quite significant changes.
+
+::
+
+ show lb
+ show lb vip
+ show lb vip verbose
+
+ show node counters
+
+Design notes
+------------
+
+Multi-Threading
+~~~~~~~~~~~~~~~
+
+MagLev is a distributed system which pseudo-randomly generates a
+new-connections-table based on AS names such that each server configured
+with the same set of ASs ends up with the same table. Connection
+stickiness is then ensured with an established-connections-table. Using
+ECMP, it is assumed (but not relied on) that servers will mostly receive
+traffic for different flows.
+
+This implementation pushes the parallelism a little bit further by using
+one established-connections table per thread. This is equivalent to
+assuming that RSS will make a job similar to ECMP, and is pretty useful
+as threads don’t need to get a lock in order to write in the table.
+
+Hash Table
+~~~~~~~~~~
+
+A load balancer requires an efficient read and write hash table. The
+hash table used by ip6-forward is very read-efficient, but not so much
+for writing. In addition, it is not a big deal if writing into the hash
+table fails (again, MagLev uses a flow table but does not heavily
+relies on it).
+
+The plugin therefore uses a very specific (and stupid) hash table. -
+Fixed (and power of 2) number of buckets (configured at runtime) - Fixed
+(and power of 2) elements per buckets (configured at compilation time)
+
+Reference counting
+~~~~~~~~~~~~~~~~~~
+
+When an AS is removed, there is two possible ways to react. - Keep using
+the AS for established connections - Change AS for established
+connections (likely to cause error for TCP)
+
+In the first case, although an AS is removed from the configuration, its
+associated state needs to stay around as long as it is used by at least
+one thread.
+
+In order to avoid locks, a specific reference counter is used. The
+design is quite similar to clib counters but: - It is possible to
+decrease the value - Summing will not zero the per-thread counters -
+Only the thread can reallocate its own counters vector (to avoid
+concurrency issues)
+
+This reference counter is lock free, but reading a count of 0 does not
+mean the value can be freed unless it is ensured by *other* means that
+no other thread is concurrently referencing the object. In the case of
+this plugin, it is assumed that no concurrent event will take place
+after a few seconds.
diff --git a/src/plugins/lb/lb_test.c b/src/plugins/lb/lb_test.c
index 80fc38e2746..f64bdd220b5 100644
--- a/src/plugins/lb/lb_test.c
+++ b/src/plugins/lb/lb_test.c
@@ -207,6 +207,105 @@ static int api_lb_add_del_vip (vat_main_t * vam)
return ret;
}
+static int
+api_lb_add_del_vip_v2 (vat_main_t *vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_lb_add_del_vip_v2_t *mp;
+ int ret;
+ ip46_address_t ip_prefix;
+ u8 prefix_length = 0;
+ u8 protocol = 0;
+ u32 port = 0;
+ u32 encap = 0;
+ u32 dscp = ~0;
+ u32 srv_type = LB_SRV_TYPE_CLUSTERIP;
+ u32 target_port = 0;
+ u32 new_length = 1024;
+ u8 src_ip_sticky = 0;
+ int is_del = 0;
+
+ if (!unformat (line_input, "%U", unformat_ip46_prefix, &ip_prefix,
+ &prefix_length, IP46_TYPE_ANY, &prefix_length))
+ {
+ errmsg ("lb_add_del_vip: invalid vip prefix\n");
+ return -99;
+ }
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "new_len %d", &new_length))
+ ;
+ else if (unformat (line_input, "del"))
+ is_del = 1;
+ else if (unformat (line_input, "src_ip_sticky"))
+ src_ip_sticky = 1;
+ else if (unformat (line_input, "protocol tcp"))
+ {
+ protocol = IP_PROTOCOL_TCP;
+ }
+ else if (unformat (line_input, "protocol udp"))
+ {
+ protocol = IP_PROTOCOL_UDP;
+ }
+ else if (unformat (line_input, "port %d", &port))
+ ;
+ else if (unformat (line_input, "encap gre4"))
+ encap = LB_ENCAP_TYPE_GRE4;
+ else if (unformat (line_input, "encap gre6"))
+ encap = LB_ENCAP_TYPE_GRE6;
+ else if (unformat (line_input, "encap l3dsr"))
+ encap = LB_ENCAP_TYPE_L3DSR;
+ else if (unformat (line_input, "encap nat4"))
+ encap = LB_ENCAP_TYPE_NAT4;
+ else if (unformat (line_input, "encap nat6"))
+ encap = LB_ENCAP_TYPE_NAT6;
+ else if (unformat (line_input, "dscp %d", &dscp))
+ ;
+ else if (unformat (line_input, "type clusterip"))
+ srv_type = LB_SRV_TYPE_CLUSTERIP;
+ else if (unformat (line_input, "type nodeport"))
+ srv_type = LB_SRV_TYPE_NODEPORT;
+ else if (unformat (line_input, "target_port %d", &target_port))
+ ;
+ else
+ {
+ errmsg ("invalid arguments\n");
+ return -99;
+ }
+ }
+
+ if ((encap != LB_ENCAP_TYPE_L3DSR) && (dscp != ~0))
+ {
+ errmsg ("lb_vip_add error: should not configure dscp for none L3DSR.");
+ return -99;
+ }
+
+ if ((encap == LB_ENCAP_TYPE_L3DSR) && (dscp >= 64))
+ {
+ errmsg ("lb_vip_add error: dscp for L3DSR should be less than 64.");
+ return -99;
+ }
+
+ M (LB_ADD_DEL_VIP, mp);
+ ip_address_encode (&ip_prefix, IP46_TYPE_ANY, &mp->pfx.address);
+ mp->pfx.len = prefix_length;
+ mp->protocol = (u8) protocol;
+ mp->port = htons ((u16) port);
+ mp->encap = (u8) encap;
+ mp->dscp = (u8) dscp;
+ mp->type = (u8) srv_type;
+ mp->target_port = htons ((u16) target_port);
+ mp->node_port = htons ((u16) target_port);
+ mp->new_flows_table_length = htonl (new_length);
+ mp->is_del = is_del;
+ mp->src_ip_sticky = src_ip_sticky;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
static int api_lb_add_del_as (vat_main_t * vam)
{
diff --git a/src/plugins/lb/lb_types.api b/src/plugins/lb/lb_types.api
index 3378a5fec4f..a6e1980b6be 100644
--- a/src/plugins/lb/lb_types.api
+++ b/src/plugins/lb/lb_types.api
@@ -28,9 +28,9 @@ enum lb_encap_type
LB_API_ENCAP_TYPE_GRE4 = 0,
LB_API_ENCAP_TYPE_GRE6 = 1,
LB_API_ENCAP_TYPE_L3DSR = 2,
- LB_API_ENCAP_TYPE_NAT4 = 3 ,
- LB_API_ENCAP_TYPE_NAT6 =4,
- LB_API_ENCAP_N_TYPES =5,
+ LB_API_ENCAP_TYPE_NAT4 = 3,
+ LB_API_ENCAP_TYPE_NAT6 = 4,
+ LB_API_ENCAP_N_TYPES = 5,
};
/* Lookup types */
@@ -38,8 +38,8 @@ enum lb_lkp_type_t
{
LB_API_LKP_SAME_IP_PORT = 0,
LB_API_LKP_DIFF_IP_PORT = 1,
- LB_API_LKP_ALL_PORT_IP =2,
- LB_API_LKP_N_TYPES =3,
+ LB_API_LKP_ALL_PORT_IP = 2,
+ LB_API_LKP_N_TYPES = 3,
};
enum lb_vip_type
diff --git a/src/plugins/lb/lbhash.h b/src/plugins/lb/lbhash.h
index f822d79ded8..8253e9d52f0 100644
--- a/src/plugins/lb/lbhash.h
+++ b/src/plugins/lb/lbhash.h
@@ -88,8 +88,7 @@ lb_hash_t *lb_hash_alloc(u32 buckets, u32 timeout)
sizeof(lb_hash_bucket_t) * (buckets + 1);
u8 *mem = 0;
lb_hash_t *h;
- vec_alloc_aligned(mem, size, CLIB_CACHE_LINE_BYTES);
- clib_memset(mem, 0, size);
+ vec_validate_aligned (mem, size - 1, CLIB_CACHE_LINE_BYTES);
h = (lb_hash_t *)mem;
h->buckets_mask = (buckets - 1);
h->timeout = timeout;
diff --git a/src/plugins/lb/node.c b/src/plugins/lb/node.c
index b5e9da71376..a37fe11a9b4 100644
--- a/src/plugins/lb/node.c
+++ b/src/plugins/lb/node.c
@@ -174,26 +174,22 @@ lb_node_get_other_ports6 (ip6_header_t *ip60)
}
static_always_inline void
-lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4,
- u32 *hash, u32 *vip_idx, u8 per_port_vip)
+lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4, u32 *hash,
+ u32 *vip_idx, u8 per_port_vip)
{
vip_port_key_t key;
clib_bihash_kv_8_8_t kv, value;
+ ip4_header_t *ip40;
+ ip6_header_t *ip60;
+ lb_vip_t *vip0;
+ u64 ports;
/* For vip case, retrieve vip index for ip lookup */
*vip_idx = vnet_buffer (p)->ip.adj_index[VLIB_TX];
- if (per_port_vip)
- {
- /* For per-port-vip case, ip lookup stores placeholder index */
- key.vip_prefix_index = *vip_idx;
- }
-
+ /* Extract the L4 port number from the packet */
if (is_input_v4)
{
- ip4_header_t *ip40;
- u64 ports;
-
ip40 = vlib_buffer_get_current (p);
if (PREDICT_TRUE(
ip40->protocol == IP_PROTOCOL_TCP
@@ -202,20 +198,10 @@ lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4,
| ((u64) ((udp_header_t *) (ip40 + 1))->dst_port);
else
ports = lb_node_get_other_ports4 (ip40);
-
- *hash = lb_hash_hash (*((u64 *) &ip40->address_pair), ports, 0, 0, 0);
-
- if (per_port_vip)
- {
- key.protocol = ip40->protocol;
- key.port = (u16)(ports & 0xFFFF);
- }
}
else
{
- ip6_header_t *ip60;
ip60 = vlib_buffer_get_current (p);
- u64 ports;
if (PREDICT_TRUE(
ip60->protocol == IP_PROTOCOL_TCP
@@ -224,33 +210,68 @@ lb_node_get_hash (lb_main_t *lbm, vlib_buffer_t *p, u8 is_input_v4,
| ((u64) ((udp_header_t *) (ip60 + 1))->dst_port);
else
ports = lb_node_get_other_ports6 (ip60);
-
- *hash = lb_hash_hash (ip60->src_address.as_u64[0],
- ip60->src_address.as_u64[1],
- ip60->dst_address.as_u64[0],
- ip60->dst_address.as_u64[1], ports);
-
- if (per_port_vip)
- {
- key.protocol = ip60->protocol;
- key.port = (u16)(ports & 0xFFFF);
- }
}
- /* For per-port-vip case, retrieve vip index for vip_port_filter table */
if (per_port_vip)
{
+ /* For per-port-vip case, ip lookup stores placeholder index */
+ key.vip_prefix_index = *vip_idx;
+ key.port = (u16) (ports & 0xFFFF);
+ key.rsv = 0;
+ if (is_input_v4)
+ {
+ key.protocol = ip40->protocol;
+ }
+ else
+ {
+ key.protocol = ip60->protocol;
+ }
+
+ /* For per-port-vip case, retrieve vip index for vip_port_filter table */
kv.key = key.as_u64;
- if (clib_bihash_search_8_8(&lbm->vip_index_per_port, &kv, &value) < 0)
- {
- /* return default vip */
- *vip_idx = 0;
- return;
- }
- *vip_idx = value.value;
+ if (clib_bihash_search_8_8 (&lbm->vip_index_per_port, &kv, &value) < 0)
+ {
+ /* Set default vip */
+ *vip_idx = 0;
+ }
+ else
+ {
+ *vip_idx = value.value;
+ }
+ }
+
+ vip0 = pool_elt_at_index (lbm->vips, *vip_idx);
+
+ if (is_input_v4)
+ {
+ if (lb_vip_is_src_ip_sticky (vip0))
+ {
+ *hash = lb_hash_hash (*((u64 *) &ip40->address_pair), 0, 0, 0, 0);
+ }
+ else
+ {
+ *hash =
+ lb_hash_hash (*((u64 *) &ip40->address_pair), ports, 0, 0, 0);
+ }
+ }
+ else
+ {
+ if (lb_vip_is_src_ip_sticky (vip0))
+ {
+ *hash = lb_hash_hash (
+ ip60->src_address.as_u64[0], ip60->src_address.as_u64[1],
+ ip60->dst_address.as_u64[0], ip60->dst_address.as_u64[1], 0);
+ }
+ else
+ {
+ *hash = lb_hash_hash (
+ ip60->src_address.as_u64[0], ip60->src_address.as_u64[1],
+ ip60->dst_address.as_u64[0], ip60->dst_address.as_u64[1], ports);
+ }
}
}
+/* clang-format off */
static_always_inline uword
lb_node_fn (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -565,6 +586,7 @@ lb_node_fn (vlib_main_t * vm,
return frame->n_vectors;
}
+/* clang-format on */
u8 *
format_nodeport_lb_trace (u8 * s, va_list * args)
diff --git a/src/plugins/linux-cp/CMakeLists.txt b/src/plugins/linux-cp/CMakeLists.txt
index 080f73ecce4..c891689b4b4 100644
--- a/src/plugins/linux-cp/CMakeLists.txt
+++ b/src/plugins/linux-cp/CMakeLists.txt
@@ -12,12 +12,18 @@
# limitations under the License.
vpp_find_path(LIBNL3_INCLUDE_DIR NAMES libnl3/netlink/route/link/vlan.h)
+vpp_find_path(LIBMNL_INCLUDE_DIR NAMES libmnl/libmnl.h)
if (NOT LIBNL3_INCLUDE_DIR)
message(WARNING "-- libnl3 headers not found - linux-cp plugin disabled")
return()
endif()
+if (NOT LIBMNL_INCLUDE_DIR)
+ message(WARNING "-- libmnl headers not found - linux-cp plugin disabled")
+ return()
+endif()
+
vpp_plugin_find_library(linux-cp LIBNL3_LIB libnl-3.so)
vpp_plugin_find_library(linux-cp LIBNL3_ROUTE_LIB libnl-route-3.so.200)
@@ -27,6 +33,8 @@ include_directories(${LIBMNL_INCLUDE_DIR})
add_vpp_library(lcp
SOURCES
lcp_interface.c
+ lcp_interface_sync.c
+ lcp_mpls_sync.c
lcp_adj.c
lcp.c
@@ -59,3 +67,12 @@ add_vpp_plugin(linux_cp_unittest
LINK_LIBRARIES
lcp
)
+
+add_vpp_plugin(linux_nl
+ SOURCES
+ lcp_router.c
+ lcp_nl.c
+
+ LINK_LIBRARIES
+ lcp
+)
diff --git a/src/plugins/linux-cp/FEATURE.yaml b/src/plugins/linux-cp/FEATURE.yaml
index 088b0606f58..425858591f2 100644
--- a/src/plugins/linux-cp/FEATURE.yaml
+++ b/src/plugins/linux-cp/FEATURE.yaml
@@ -3,10 +3,10 @@ name: Linux Control Plane (integration)
maintainer: Neale Ranns <neale@grahpiant.com>
description: |-
- This plugin provides the beginnings of an integration with the
- Linux network stack.
- The plugin provides the capability to 'mirror' VPP interfaces in
- the Linux kernel. This means that for any interface in VPP the user
+ These plugins provide an integration with the Linux network stack.
+
+ The "linux_cp" plugin provides the capability to 'mirror' VPP interfaces
+ in the Linux kernel. This means that for any interface in VPP the user
can create a corresponding TAP or TUN device in the Linux kernel
and have VPP plumb them together.
The plumbing mechanics is different in each direction.
@@ -17,8 +17,10 @@ description: |-
In the TX direction, packets received by VPP an the mirror Tap/Tun
are cross-connected to the VPP interfaces. For IP packets, IP output
features are applied.
- This is the beginnings of integration, because there needs to be
- an external agent that will configure (and synchronize) the IP
+ If MPLS is enabled on a VPP interface, state is synced to Linux and
+ in TX direction a special feature is enabled to pass MPLS packets through
+ untouched.
+ The "linux_nl" plugin listens to netlink messages and synchronizes the IP
configuration of the paired interfaces.
state: experimental
diff --git a/src/plugins/linux-cp/lcp.api b/src/plugins/linux-cp/lcp.api
index 319dd3e6483..e7eaa5a3669 100644
--- a/src/plugins/linux-cp/lcp.api
+++ b/src/plugins/linux-cp/lcp.api
@@ -21,19 +21,20 @@ option version = "1.0.0";
import "vnet/interface_types.api";
-/** \brief Set the default Linux Control Plane namespace
+/** \brief Set the default Linux Control Plane netns
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param namespace - the new default namespace; namespace[0] == 0 iff none
+ @param netns - the new default netns; netns[0] == 0 if none
*/
autoreply define lcp_default_ns_set
{
u32 client_index;
u32 context;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
+ option in_progress;
};
-/** \brief get the default Linux Control Plane namespace
+/** \brief get the default Linux Control Plane netns
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
*/
@@ -43,15 +44,16 @@ define lcp_default_ns_get
u32 context;
};
-/** \brief get the default Linux Control Plane namespace
+/** \brief get the default Linux Control Plane netns
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param namespace - the default namespace; namespace[0] == 0 iff none
+ @param netns - the default netns; netns[0] == 0 if none
*/
define lcp_default_ns_get_reply
{
u32 context;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
+ option in_progress;
};
enum lcp_itf_host_type : u8
@@ -67,32 +69,59 @@ enum lcp_itf_host_type : u8
@param sw_if_index - index of VPP PHY SW interface
@param host_if_name - host tap interface name
@param host_if_type - the type of host interface to create (tun, tap)
- @param namespace - optional tap namespace; namespace[0] == 0 iff none
+ @param netns - optional tap netns; netns[0] == 0 if none
*/
autoreply autoendian define lcp_itf_pair_add_del
{
+ option deprecated;
+
u32 client_index;
u32 context;
bool is_add;
vl_api_interface_index_t sw_if_index;
string host_if_name[16]; /* IFNAMSIZ */
vl_api_lcp_itf_host_type_t host_if_type;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
};
autoendian define lcp_itf_pair_add_del_v2
{
+ option in_progress;
+
u32 client_index;
u32 context;
bool is_add;
vl_api_interface_index_t sw_if_index;
string host_if_name[16]; /* IFNAMSIZ */
vl_api_lcp_itf_host_type_t host_if_type;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
};
define lcp_itf_pair_add_del_v2_reply
{
+ option in_progress;
+
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t host_sw_if_index;
+};
+autoendian define lcp_itf_pair_add_del_v3
+{
+ option in_progress;
+
+ u32 client_index;
+ u32 context;
+ bool is_add;
+ vl_api_interface_index_t sw_if_index;
+ string host_if_name[16]; /* IFNAMSIZ */
+ vl_api_lcp_itf_host_type_t host_if_type;
+ string netns[32]; /* LCP_NS_LEN */
+};
+define lcp_itf_pair_add_del_v3_reply
+{
+ option in_progress;
+
u32 context;
i32 retval;
+ u32 vif_index;
vl_api_interface_index_t host_sw_if_index;
};
@@ -101,13 +130,26 @@ define lcp_itf_pair_add_del_v2_reply
@param context - sender context, to match reply w/ request
@param sw_if_index - interface to use as filter (~0 == "all")
*/
-define lcp_itf_pair_get
+autoendian define lcp_itf_pair_get
{
u32 client_index;
u32 context;
u32 cursor;
};
-define lcp_itf_pair_get_reply
+autoendian define lcp_itf_pair_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 cursor;
+};
+autoendian define lcp_itf_pair_get_v2
+{
+ u32 client_index;
+ u32 context;
+ u32 cursor;
+ vl_api_interface_index_t sw_if_index;
+};
+autoendian define lcp_itf_pair_get_v2_reply
{
u32 context;
i32 retval;
@@ -121,7 +163,7 @@ define lcp_itf_pair_get_reply
@param vif_index - tap linux index
@param host_if_name - host interface name
@param host_if_type - host interface type (tun, tap)
- @param namespace - host interface namespace
+ @param netns - host interface netns
*/
autoendian define lcp_itf_pair_details
{
@@ -131,7 +173,8 @@ autoendian define lcp_itf_pair_details
u32 vif_index;
string host_if_name[16]; /* IFNAMSIZ */
vl_api_lcp_itf_host_type_t host_if_type;
- string namespace[32]; /* LCP_NS_LEN */
+ string netns[32]; /* LCP_NS_LEN */
+ option in_progress;
};
service {
@@ -139,6 +182,11 @@ service {
stream lcp_itf_pair_details;
};
+service {
+ rpc lcp_itf_pair_get_v2 returns lcp_itf_pair_get_v2_reply
+ stream lcp_itf_pair_details;
+};
+
/** \brief Replace end/begin
*/
autoreply define lcp_itf_pair_replace_begin
diff --git a/src/plugins/linux-cp/lcp.c b/src/plugins/linux-cp/lcp.c
index f4c491c9cb3..34e8550a13f 100644
--- a/src/plugins/linux-cp/lcp.c
+++ b/src/plugins/linux-cp/lcp.c
@@ -20,6 +20,7 @@
#include <net/if.h>
#include <plugins/linux-cp/lcp.h>
+#include <plugins/linux-cp/lcp_interface.h>
lcp_main_t lcp_main;
@@ -28,8 +29,9 @@ lcp_get_default_ns (void)
{
lcp_main_t *lcpm = &lcp_main;
- if (lcpm->default_namespace[0] == 0)
- return 0;
+ if (!lcpm->default_namespace || lcpm->default_namespace[0] == 0)
+ return NULL;
+
return lcpm->default_namespace;
}
@@ -59,16 +61,15 @@ lcp_set_default_ns (u8 *ns)
if (!p || *p == 0)
{
- clib_memset (lcpm->default_namespace, 0,
- sizeof (lcpm->default_namespace));
+ lcpm->default_namespace = NULL;
if (lcpm->default_ns_fd > 0)
close (lcpm->default_ns_fd);
lcpm->default_ns_fd = 0;
return 0;
}
- clib_strncpy ((char *) lcpm->default_namespace, p, LCP_NS_LEN - 1);
-
+ vec_validate_init_c_string (lcpm->default_namespace, p,
+ clib_strnlen (p, LCP_NS_LEN));
s = format (0, "/var/run/netns/%s%c", (char *) lcpm->default_namespace, 0);
lcpm->default_ns_fd = open ((char *) s, O_RDONLY);
vec_free (s);
@@ -76,6 +77,112 @@ lcp_set_default_ns (u8 *ns)
return 0;
}
+void
+lcp_set_sync (u8 is_auto)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->lcp_sync = (is_auto != 0);
+
+ // If we set to 'on', do a one-off sync of LCP interfaces
+ if (is_auto)
+ lcp_itf_pair_sync_state_all ();
+}
+
+int
+lcp_sync (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->lcp_sync;
+}
+
+void
+lcp_set_auto_subint (u8 is_auto)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->lcp_auto_subint = (is_auto != 0);
+}
+
+int
+lcp_auto_subint (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->lcp_auto_subint;
+}
+
+void
+lcp_set_del_static_on_link_down (u8 is_del)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->del_static_on_link_down = (is_del != 0);
+}
+
+u8
+lcp_get_del_static_on_link_down (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->del_static_on_link_down;
+}
+
+void
+lcp_set_del_dynamic_on_link_down (u8 is_del)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->del_dynamic_on_link_down = (is_del != 0);
+}
+
+u8
+lcp_get_del_dynamic_on_link_down (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->del_dynamic_on_link_down;
+}
+
+void
+lcp_set_netlink_processing_active (u8 is_processing)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ lcpm->netlink_processing_active = (is_processing != 0);
+}
+
+u8
+lcp_get_netlink_processing_active (void)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ return lcpm->netlink_processing_active;
+}
+
+void
+lcp_set_default_num_queues (u16 num_queues, u8 is_tx)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ if (is_tx)
+ lcpm->num_tx_queues = num_queues;
+ else
+ lcpm->num_rx_queues = num_queues;
+}
+
+u16
+lcp_get_default_num_queues (u8 is_tx)
+{
+ lcp_main_t *lcpm = &lcp_main;
+
+ if (is_tx)
+ return lcpm->num_tx_queues;
+
+ return lcpm->num_rx_queues ?: vlib_num_workers ();
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/linux-cp/lcp.h b/src/plugins/linux-cp/lcp.h
index 7fdad3798bc..e89b149f67d 100644
--- a/src/plugins/linux-cp/lcp.h
+++ b/src/plugins/linux-cp/lcp.h
@@ -22,11 +22,17 @@
typedef struct lcp_main_s
{
u16 msg_id_base; /* API message ID base */
- u8 default_namespace[LCP_NS_LEN]; /* default namespace if set */
+ u8 *default_namespace; /* default namespace if set */
int default_ns_fd;
- u8 auto_intf;
- /* Set when Unit testing */
- u8 test_mode;
+ u8 lcp_auto_subint; /* Automatically create/delete LCP sub-interfaces */
+ u8 lcp_sync; /* Automatically sync VPP changes to LCP */
+ u8 del_static_on_link_down; /* Delete static routes when link goes down */
+ u8 del_dynamic_on_link_down; /* Delete dynamic routes when link goes down */
+ u16 num_rx_queues;
+ u16 num_tx_queues;
+ u8 test_mode; /* Set when Unit testing */
+ u8 netlink_processing_active; /* Set while a batch of Netlink messages are
+ being processed */
} lcp_main_t;
extern lcp_main_t lcp_main;
@@ -38,6 +44,31 @@ int lcp_set_default_ns (u8 *ns);
u8 *lcp_get_default_ns (void); /* Returns NULL or shared string */
int lcp_get_default_ns_fd (void);
+/**
+ * Get/Set whether to delete static routes when the link goes down.
+ */
+void lcp_set_del_static_on_link_down (u8 is_del);
+u8 lcp_get_del_static_on_link_down (void);
+
+/**
+ * Get/Set whether to delete dynamic routes when the link goes down.
+ */
+void lcp_set_del_dynamic_on_link_down (u8 is_del);
+u8 lcp_get_del_dynamic_on_link_down (void);
+
+/**
+ * Get/Set when we're processing a batch of netlink messages.
+ * This is used to avoid looping messages between lcp-sync and netlink.
+ */
+void lcp_set_netlink_processing_active (u8 is_processing);
+u8 lcp_get_netlink_processing_active (void);
+
+/**
+ * Get/Set the default queue number for LCP host taps.
+ */
+void lcp_set_default_num_queues (u16 num_queues, u8 is_tx);
+u16 lcp_get_default_num_queues (u8 is_tx);
+
#endif
/*
diff --git a/src/plugins/linux-cp/lcp.rst b/src/plugins/linux-cp/lcp.rst
index 6d81901cf7b..6f82a29bfbb 100644
--- a/src/plugins/linux-cp/lcp.rst
+++ b/src/plugins/linux-cp/lcp.rst
@@ -8,9 +8,9 @@ Linux Control Plane Integration
Overview
________
-This plugin allows VPP to integrate with the Linux. The
+This plugin allows VPP to integrate with the Linux kernel. The
general model is that Linux is the network stack, i.e. it has the
-control plane protocols, like ARP, IPv6 ND/MLD, Ping, etc, and VPP
+control plane protocols, like ARP, IPv6 ND/MLD, ping, etc, and VPP
provides a SW based ASIC for forwarding.
Interfaces
@@ -20,16 +20,17 @@ VPP owns the interfaces in the system; physical (.e.g PCI), quasi
physical (e.g. vhost), or virtual (e.g. tunnel). However,
for the Linux networking stack to function it needs a representation
of these interfaces; it needs a mirror image in the kernel. For this
-mirror we use a Tap interface, if the VPP interface is multi-point, a
-Tun if it's point-to-point. A physical and its mirror form an
+mirror we use a TAP interface, if the VPP interface is multi-point, a
+TUN if it's point-to-point. A physical and its mirror form an
interface 'pair'.
-The host interface has two identities; the sw_if_index of the Tap and
-the virtual interface index in the kernel. It may be in a Linux namespace.
+The host interface has two identities; the sw_if_index of the TAP and
+the virtual interface index in the kernel. It may be in a Linux network
+namespace.
The creation of the interface pairs is required from the control
plane. It can be statically configured in the VPP startup
-configuration file. The intent here was to make the pair creation
+configuration file. The intent here is to make the pair creation
explicit, rather than have VPP guess which of the interfaces it owns
require a mirror.
@@ -41,27 +42,23 @@ interfaces. Any configuration that is made on these Linux interfaces,
also needs to be applied on the corresponding physical interface in
VPP.
-This is functionality is not provided in this plugin, but it can be
-achieved in various ways, for example by listening to the netlink
-messages and applying the config. As a result all e.g. routes
-programmed in Linux, will also be present in VPP's FIB.
+This is functionality is provided by the "linux_nl" plugin.
-Linux will own the [ARP/ND] nieghbor tables (which will be copied via
+Linux will own the [ARP/ND] neighbor tables (which will be copied via
netlink to VPP also). This means that Linux will send packets with the
peer's MAC address in the rewrite to VPP. The receiving TAP interface
must therefore be in promiscuous mode.
-
Forwarding
__________
The basic principle is to x-connect traffic from a Linux host interface
-(received on the Tap/Tun) to its paired the physical, and vice-versa.
+(received on the tap/tun) to its paired the physical, and vice-versa.
Host to Physical
^^^^^^^^^^^^^^^^
-All packets sent by the host, and received by VPP on a Tap/Tun should
+All packets sent by the host, and received by VPP on a tap/tun should
be sent to its paired physical interface. However, they should be sent
with the same consequences as if they had originated from VPP,
i.e. they should be subject to all output features on the physical
@@ -73,17 +70,18 @@ adjacency that VPP would have used to send this packet; this adjacency
is stored in the buffer's meta data so that it is available to all
output features. Then the packet is sent through the physical
interface's IP output feature arc.
+
All ARP packets are x-connected from the tap to the physical.
Physical to Host
^^^^^^^^^^^^^^^^
All ARP packets received on the physical are sent to the paired
-Tap. This allows the Linux network stack to build the nieghbour table.
+tap. This allows the Linux network stack to build the neighbor table.
IP packets that are punted are sent to the host. They are sent on the
tap that is paired with the physical on which they were originally
-received. The packet is sent on the Tap/Tun 'exactly' as it was
+received. The packet is sent on the tap/tun 'exactly' as it was
received (i.e. with the L2 rewrite) but post any translations that
input features may have made.
@@ -92,5 +90,4 @@ Recommendations
^^^^^^^^^^^^^^^
When using this plugin disable the ARP, ND, IGMP plugins; this is the
-task for Linux.
-Disable ping plugin, since Linux will now respond.
+task for Linux. Disable ping plugin, since Linux will now respond.
diff --git a/src/plugins/linux-cp/lcp_adj.c b/src/plugins/linux-cp/lcp_adj.c
index bfbc2fec913..b10c70616b5 100644
--- a/src/plugins/linux-cp/lcp_adj.c
+++ b/src/plugins/linux-cp/lcp_adj.c
@@ -185,8 +185,8 @@ lcp_adj_show_cmd (vlib_main_t *vm, unformat_input_t *input,
if (unformat (input, "verbose"))
verbose = 1;
- vlib_cli_output (vm, "Linux-CP Adjs:\n%U", BV (format_bihash), &lcp_adj_tbl,
- verbose);
+ vlib_cli_output (vm, "linux-cp adjacencies:\n%U", BV (format_bihash),
+ &lcp_adj_tbl, verbose);
return 0;
}
@@ -210,7 +210,7 @@ lcp_adj_init (vlib_main_t *vm)
{
adj_type = adj_delegate_register_new_type (&lcp_adj_vft);
- BV (clib_bihash_init) (&lcp_adj_tbl, "linux-cp ADJ table", 1024, 1 << 24);
+ BV (clib_bihash_init) (&lcp_adj_tbl, "linux-cp adjacencies", 1024, 1 << 24);
BV (clib_bihash_set_kvp_format_fn) (&lcp_adj_tbl, format_lcp_adj_kvp);
return (NULL);
diff --git a/src/plugins/linux-cp/lcp_api.c b/src/plugins/linux-cp/lcp_api.c
index c9aa01566c6..74421230e9d 100644
--- a/src/plugins/linux-cp/lcp_api.c
+++ b/src/plugins/linux-cp/lcp_api.c
@@ -41,27 +41,11 @@ api_encode_host_type (lip_host_type_t type)
return LCP_API_ITF_HOST_TAP;
}
-void
-lcp_set_auto_intf (u8 is_auto)
-{
- lcp_main_t *lcpm = &lcp_main;
-
- lcpm->auto_intf = (is_auto != 0);
-}
-
-int
-lcp_auto_intf (void)
-{
- lcp_main_t *lcpm = &lcp_main;
-
- return lcpm->auto_intf;
-}
-
static int
vl_api_lcp_itf_pair_add (u32 phy_sw_if_index, lip_host_type_t lip_host_type,
u8 *mp_host_if_name, size_t sizeof_host_if_name,
u8 *mp_namespace, size_t sizeof_mp_namespace,
- u32 *host_sw_if_index_p)
+ u32 *host_sw_if_index_p, u32 *vif_index_p)
{
u8 *host_if_name, *netns;
int host_len, netns_len, rv;
@@ -80,6 +64,13 @@ vl_api_lcp_itf_pair_add (u32 phy_sw_if_index, lip_host_type_t lip_host_type,
rv = lcp_itf_pair_create (phy_sw_if_index, host_if_name, lip_host_type,
netns, host_sw_if_index_p);
+ if (!rv && (vif_index_p != NULL))
+ {
+ lcp_itf_pair_t *pair =
+ lcp_itf_pair_get (lcp_itf_pair_find_by_phy (phy_sw_if_index));
+ *vif_index_p = pair->lip_vif_index;
+ }
+
vec_free (host_if_name);
vec_free (netns);
@@ -94,20 +85,15 @@ vl_api_lcp_itf_pair_add_del_t_handler (vl_api_lcp_itf_pair_add_del_t *mp)
lip_host_type_t lip_host_type;
int rv;
- if (!vnet_sw_if_index_is_api_valid (mp->sw_if_index))
- {
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
- goto bad_sw_if_index;
- }
+ VALIDATE_SW_IF_INDEX_END (mp);
phy_sw_if_index = mp->sw_if_index;
lip_host_type = api_decode_host_type (mp->host_if_type);
if (mp->is_add)
{
- rv =
- vl_api_lcp_itf_pair_add (phy_sw_if_index, lip_host_type,
- mp->host_if_name, sizeof (mp->host_if_name),
- mp->namespace, sizeof (mp->namespace), NULL);
+ rv = vl_api_lcp_itf_pair_add (
+ phy_sw_if_index, lip_host_type, mp->host_if_name,
+ sizeof (mp->host_if_name), mp->netns, sizeof (mp->netns), NULL, NULL);
}
else
{
@@ -115,7 +101,7 @@ vl_api_lcp_itf_pair_add_del_t_handler (vl_api_lcp_itf_pair_add_del_t *mp)
}
BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO (VL_API_LCP_ITF_PAIR_ADD_DEL_REPLY);
+ REPLY_MACRO_END (VL_API_LCP_ITF_PAIR_ADD_DEL_REPLY);
}
static void
@@ -126,20 +112,45 @@ vl_api_lcp_itf_pair_add_del_v2_t_handler (vl_api_lcp_itf_pair_add_del_v2_t *mp)
lip_host_type_t lip_host_type;
int rv;
- if (!vnet_sw_if_index_is_api_valid (mp->sw_if_index))
+ VALIDATE_SW_IF_INDEX_END (mp);
+
+ phy_sw_if_index = mp->sw_if_index;
+ lip_host_type = api_decode_host_type (mp->host_if_type);
+ if (mp->is_add)
{
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
- goto bad_sw_if_index;
+ rv = vl_api_lcp_itf_pair_add (
+ phy_sw_if_index, lip_host_type, mp->host_if_name,
+ sizeof (mp->host_if_name), mp->netns, sizeof (mp->netns),
+ &host_sw_if_index, NULL);
}
+ else
+ {
+ rv = lcp_itf_pair_delete (phy_sw_if_index);
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO2_END (VL_API_LCP_ITF_PAIR_ADD_DEL_V2_REPLY,
+ { rmp->host_sw_if_index = host_sw_if_index; });
+}
+
+static void
+vl_api_lcp_itf_pair_add_del_v3_t_handler (vl_api_lcp_itf_pair_add_del_v3_t *mp)
+{
+ u32 phy_sw_if_index, host_sw_if_index = ~0, vif_index = ~0;
+ vl_api_lcp_itf_pair_add_del_v3_reply_t *rmp;
+ lip_host_type_t lip_host_type;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX_END (mp);
phy_sw_if_index = mp->sw_if_index;
lip_host_type = api_decode_host_type (mp->host_if_type);
if (mp->is_add)
{
- rv = vl_api_lcp_itf_pair_add (phy_sw_if_index, lip_host_type,
- mp->host_if_name,
- sizeof (mp->host_if_name), mp->namespace,
- sizeof (mp->namespace), &host_sw_if_index);
+ rv = vl_api_lcp_itf_pair_add (
+ phy_sw_if_index, lip_host_type, mp->host_if_name,
+ sizeof (mp->host_if_name), mp->netns, sizeof (mp->netns),
+ &host_sw_if_index, &vif_index);
}
else
{
@@ -147,8 +158,10 @@ vl_api_lcp_itf_pair_add_del_v2_t_handler (vl_api_lcp_itf_pair_add_del_v2_t *mp)
}
BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO2 (VL_API_LCP_ITF_PAIR_ADD_DEL_V2_REPLY,
- { rmp->host_sw_if_index = ntohl (host_sw_if_index); });
+ REPLY_MACRO2_END (VL_API_LCP_ITF_PAIR_ADD_DEL_V3_REPLY, ({
+ rmp->host_sw_if_index = host_sw_if_index;
+ rmp->vif_index = vif_index;
+ }));
}
static void
@@ -158,7 +171,7 @@ send_lcp_itf_pair_details (index_t lipi, vl_api_registration_t *rp,
vl_api_lcp_itf_pair_details_t *rmp;
lcp_itf_pair_t *lcp_pair = lcp_itf_pair_get (lipi);
- REPLY_MACRO_DETAILS4 (
+ REPLY_MACRO_DETAILS4_END (
VL_API_LCP_ITF_PAIR_DETAILS, rp, context, ({
rmp->phy_sw_if_index = lcp_pair->lip_phy_sw_if_index;
rmp->host_sw_if_index = lcp_pair->lip_host_sw_if_index;
@@ -167,9 +180,11 @@ send_lcp_itf_pair_details (index_t lipi, vl_api_registration_t *rp,
memcpy_s (rmp->host_if_name, sizeof (rmp->host_if_name),
lcp_pair->lip_host_name, vec_len (lcp_pair->lip_host_name));
+ rmp->host_if_name[vec_len (lcp_pair->lip_host_name)] = 0;
- clib_strncpy ((char *) rmp->namespace, (char *) lcp_pair->lip_namespace,
- vec_len (lcp_pair->lip_namespace));
+ memcpy_s (rmp->netns, sizeof (rmp->netns), lcp_pair->lip_namespace,
+ vec_len (lcp_pair->lip_namespace));
+ rmp->netns[vec_len (lcp_pair->lip_namespace)] = 0;
}));
}
@@ -179,19 +194,51 @@ vl_api_lcp_itf_pair_get_t_handler (vl_api_lcp_itf_pair_get_t *mp)
vl_api_lcp_itf_pair_get_reply_t *rmp;
i32 rv = 0;
- REPLY_AND_DETAILS_MACRO (
+ REPLY_AND_DETAILS_MACRO_END (
VL_API_LCP_ITF_PAIR_GET_REPLY, lcp_itf_pair_pool,
({ send_lcp_itf_pair_details (cursor, rp, mp->context); }));
}
static void
+vl_api_lcp_itf_pair_get_v2_t_handler (vl_api_lcp_itf_pair_get_v2_t *mp)
+{
+ vl_api_lcp_itf_pair_get_v2_reply_t *rmp;
+ i32 rv = 0;
+
+ if (mp->sw_if_index == ~0)
+ {
+ REPLY_AND_DETAILS_MACRO_END (
+ VL_API_LCP_ITF_PAIR_GET_REPLY, lcp_itf_pair_pool,
+ ({ send_lcp_itf_pair_details (cursor, rp, mp->context); }));
+ }
+ else
+ {
+ VALIDATE_SW_IF_INDEX_END (mp);
+
+ u32 pair_index = lcp_itf_pair_find_by_phy (mp->sw_if_index);
+ if (pair_index == INDEX_INVALID)
+ {
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ goto bad_sw_if_index;
+ }
+ send_lcp_itf_pair_details (
+ pair_index, vl_api_client_index_to_registration (mp->client_index),
+ mp->context);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO2_END (VL_API_LCP_ITF_PAIR_GET_V2_REPLY,
+ ({ rmp->cursor = ~0; }));
+ }
+}
+
+static void
vl_api_lcp_default_ns_set_t_handler (vl_api_lcp_default_ns_set_t *mp)
{
vl_api_lcp_default_ns_set_reply_t *rmp;
int rv;
- mp->namespace[LCP_NS_LEN - 1] = 0;
- rv = lcp_set_default_ns (mp->namespace);
+ mp->netns[LCP_NS_LEN - 1] = 0;
+ rv = lcp_set_default_ns (mp->netns);
REPLY_MACRO (VL_API_LCP_DEFAULT_NS_SET_REPLY);
}
@@ -199,25 +246,14 @@ vl_api_lcp_default_ns_set_t_handler (vl_api_lcp_default_ns_set_t *mp)
static void
vl_api_lcp_default_ns_get_t_handler (vl_api_lcp_default_ns_get_t *mp)
{
- lcp_main_t *lcpm = &lcp_main;
vl_api_lcp_default_ns_get_reply_t *rmp;
- vl_api_registration_t *reg;
- char *ns;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- rmp = vl_msg_api_alloc (sizeof (*rmp));
- clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id = (VL_API_LCP_DEFAULT_NS_GET_REPLY + lcpm->msg_id_base);
- rmp->context = mp->context;
-
- ns = (char *) lcp_get_default_ns ();
- if (ns)
- clib_strncpy ((char *) rmp->namespace, ns, LCP_NS_LEN - 1);
- vl_api_send_msg (reg, (u8 *) rmp);
+ REPLY_MACRO_DETAILS2 (VL_API_LCP_DEFAULT_NS_GET_REPLY, ({
+ char *ns = (char *) lcp_get_default_ns ();
+ if (ns)
+ clib_strncpy ((char *) rmp->netns, ns,
+ LCP_NS_LEN - 1);
+ }));
}
static void
@@ -250,7 +286,7 @@ vl_api_lcp_itf_pair_replace_end_t_handler (
#include <linux-cp/lcp.api.c>
static clib_error_t *
-lcp_plugin_api_hookup (vlib_main_t *vm)
+lcp_api_init (vlib_main_t *vm)
{
/* Ask for a correctly-sized block of API message decode slots */
lcp_msg_id_base = setup_message_id_table ();
@@ -258,7 +294,7 @@ lcp_plugin_api_hookup (vlib_main_t *vm)
return (NULL);
}
-VLIB_INIT_FUNCTION (lcp_plugin_api_hookup);
+VLIB_INIT_FUNCTION (lcp_api_init);
#include <vpp/app/version.h>
VLIB_PLUGIN_REGISTER () = {
diff --git a/src/plugins/linux-cp/lcp_cli.c b/src/plugins/linux-cp/lcp_cli.c
index cb874b1c023..0dcf600b301 100644
--- a/src/plugins/linux-cp/lcp_cli.c
+++ b/src/plugins/linux-cp/lcp_cli.c
@@ -34,81 +34,178 @@ lcp_itf_pair_create_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
unformat_input_t _line_input, *line_input = &_line_input;
vnet_main_t *vnm = vnet_get_main ();
- u32 sw_if_index;
- u8 *host_if_name;
- lip_host_type_t host_if_type;
- u8 *ns;
- int r;
+ u32 sw_if_index = ~0;
+ u8 *host_if_name = NULL;
+ lip_host_type_t host_if_type = LCP_ITF_HOST_TAP;
+ u8 *ns = NULL;
+ clib_error_t *error = NULL;
+
+ if (unformat_user (input, unformat_line_input, line_input))
+ {
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "host-if %s", &host_if_name))
+ ;
+ else if (unformat (line_input, "netns %s", &ns))
+ ;
+ else if (unformat (line_input, "tun"))
+ host_if_type = LCP_ITF_HOST_TUN;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
+ }
+ unformat_free (line_input);
+ }
+
+ if (error)
+ ;
+ else if (sw_if_index == ~0)
+ error = clib_error_return (0, "interface name or sw_if_index required");
+ else if (!host_if_name)
+ error = clib_error_return (0, "host interface name required");
+ else if (vec_len (ns) >= LCP_NS_LEN)
+ error = clib_error_return (
+ 0, "Namespace name should be fewer than %d characters", LCP_NS_LEN);
+ else
+ {
+ int r;
+
+ r = lcp_itf_pair_create (sw_if_index, host_if_name, host_if_type, ns,
+ NULL);
+ if (r)
+ error = clib_error_return (0, "linux-cp pair creation failed (%d)", r);
+ }
+
+ vec_free (host_if_name);
+ vec_free (ns);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (lcp_itf_pair_create_command, static) = {
+ .path = "lcp create",
+ .short_help = "lcp create <sw_if_index>|<if-name> host-if <host-if-name> "
+ "netns <namespace> [tun]",
+ .function = lcp_itf_pair_create_command_fn,
+};
+
+static clib_error_t *
+lcp_sync_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
- sw_if_index = ~0;
- host_if_name = ns = NULL;
- host_if_type = LCP_ITF_HOST_TAP;
-
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "%d", &sw_if_index))
- ;
- else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
- &sw_if_index))
- ;
- else if (unformat (line_input, "host-if %s", &host_if_name))
- ;
- else if (unformat (line_input, "netns %s", &ns))
- ;
- else if (unformat (line_input, "tun"))
- host_if_type = LCP_ITF_HOST_TUN;
+ if (unformat (line_input, "on") || unformat (line_input, "enable"))
+ lcp_set_sync (1);
+ else if (unformat (line_input, "off") ||
+ unformat (line_input, "disable"))
+ lcp_set_sync (0);
else
- {
- unformat_free (line_input);
- vec_free (host_if_name);
- vec_free (ns);
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
}
unformat_free (line_input);
+ return 0;
+}
- if (!host_if_name)
- {
- vec_free (ns);
- return clib_error_return (0, "host interface name required");
- }
+VLIB_CLI_COMMAND (lcp_sync_command, static) = {
+ .path = "lcp lcp-sync",
+ .short_help = "lcp lcp-sync [on|enable|off|disable]",
+ .function = lcp_sync_command_fn,
+};
- if (sw_if_index == ~0)
- {
- vec_free (host_if_name);
- vec_free (ns);
- return clib_error_return (0, "interface name or sw_if_index required");
- }
+static clib_error_t *
+lcp_auto_subint_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
- if (vec_len (ns) >= LCP_NS_LEN)
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- vec_free (host_if_name);
- vec_free (ns);
- return clib_error_return (
- 0, "Namespace name should be fewer than %d characters", LCP_NS_LEN);
+ if (unformat (line_input, "on") || unformat (line_input, "enable"))
+ lcp_set_auto_subint (1);
+ else if (unformat (line_input, "off") ||
+ unformat (line_input, "disable"))
+ lcp_set_auto_subint (0);
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
}
- r = lcp_itf_pair_create (sw_if_index, host_if_name, host_if_type, ns, NULL);
+ unformat_free (line_input);
+ return 0;
+}
- vec_free (host_if_name);
- vec_free (ns);
+VLIB_CLI_COMMAND (lcp_auto_subint_command, static) = {
+ .path = "lcp lcp-auto-subint",
+ .short_help = "lcp lcp-auto-subint [on|enable|off|disable]",
+ .function = lcp_auto_subint_command_fn,
+};
- if (r)
- return clib_error_return (0, "linux-cp pair creation failed (%d)", r);
+static clib_error_t *
+lcp_param_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "del-static-on-link-down"))
+ {
+ if (unformat (line_input, "on") || unformat (line_input, "enable"))
+ lcp_set_del_static_on_link_down (1 /* is_del */);
+ else if (unformat (line_input, "off") ||
+ unformat (line_input, "disable"))
+ lcp_set_del_static_on_link_down (0 /* is_del */);
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+ else if (unformat (line_input, "del-dynamic-on-link-down"))
+ {
+ if (unformat (line_input, "on") || unformat (line_input, "enable"))
+ lcp_set_del_dynamic_on_link_down (1 /* is_del */);
+ else if (unformat (line_input, "off") ||
+ unformat (line_input, "disable"))
+ lcp_set_del_dynamic_on_link_down (0 /* is_del */);
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+
+ unformat_free (line_input);
return 0;
}
-VLIB_CLI_COMMAND (lcp_itf_pair_create_command, static) = {
- .path = "lcp create",
- .short_help = "lcp create <sw_if_index>|<if-name> host-if <host-if-name> "
- "netns <namespace> [tun]",
- .function = lcp_itf_pair_create_command_fn,
+VLIB_CLI_COMMAND (lcp_param_command, static) = {
+ .path = "lcp param",
+ .short_help = "lcp param [del-static-on-link-down (on|enable|off|disable)] "
+ "[del-dynamic-on-link-down (on|enable|off|disable)]",
+ .function = lcp_param_command_fn,
};
static clib_error_t *
@@ -118,6 +215,7 @@ lcp_default_netns_command_fn (vlib_main_t *vm, unformat_input_t *input,
unformat_input_t _line_input, *line_input = &_line_input;
u8 *ns;
int r;
+ clib_error_t *error = NULL;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -130,10 +228,15 @@ lcp_default_netns_command_fn (vlib_main_t *vm, unformat_input_t *input,
;
else if (unformat (line_input, "clear netns"))
;
+ else
+ {
+ vec_free (ns);
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
}
- unformat_free (line_input);
-
vlib_cli_output (vm, "lcp set default netns '%s'\n", (char *) ns);
r = lcp_set_default_ns (ns);
@@ -141,7 +244,10 @@ lcp_default_netns_command_fn (vlib_main_t *vm, unformat_input_t *input,
if (r)
return clib_error_return (0, "linux-cp set default netns failed (%d)", r);
- return 0;
+done:
+ unformat_free (line_input);
+
+ return error;
}
VLIB_CLI_COMMAND (lcp_default_netns_command, static) = {
@@ -156,36 +262,42 @@ lcp_itf_pair_delete_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
vnet_main_t *vnm = vnet_get_main ();
unformat_input_t _line_input, *line_input = &_line_input;
- u32 sw_if_index;
- int r;
+ u32 sw_if_index = ~0;
+ clib_error_t *error = NULL;
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- sw_if_index = ~0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ if (unformat_user (input, unformat_line_input, line_input))
{
- if (unformat (line_input, "%d", &sw_if_index))
- ;
- else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
- &sw_if_index))
- ;
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
+ }
+ unformat_free (line_input);
}
- unformat_free (line_input);
-
- if (sw_if_index == ~0)
- return clib_error_return (0, "interface name or sw_if_index required");
+ if (error)
+ ;
+ else if (sw_if_index == ~0)
+ error = clib_error_return (0, "interface name or sw_if_index required");
+ else
+ {
+ int r;
- r = lcp_itf_pair_delete (sw_if_index);
+ r = lcp_itf_pair_delete (sw_if_index);
+ if (r)
+ error = clib_error_return (0, "linux-cp pair deletion failed (%d)", r);
+ }
- if (r)
- return clib_error_return (0, "linux-cp pair deletion failed (%d)", r);
- return 0;
+ return error;
}
VLIB_CLI_COMMAND (lcp_itf_pair_delete_command, static) = {
diff --git a/src/plugins/linux-cp/lcp_interface.c b/src/plugins/linux-cp/lcp_interface.c
index da409619746..e1f4a6a1d69 100644
--- a/src/plugins/linux-cp/lcp_interface.c
+++ b/src/plugins/linux-cp/lcp_interface.c
@@ -21,6 +21,7 @@
#include <linux-cp/lcp_interface.h>
#include <netlink/route/link/vlan.h>
+#include <linux/if_ether.h>
#include <vnet/plugin/plugin.h>
#include <vnet/plugin/plugin.h>
@@ -38,12 +39,12 @@
#include <vlibapi/api_helper_macros.h>
#include <vnet/ipsec/ipsec_punt.h>
-static vlib_log_class_t lcp_itf_pair_logger;
+vlib_log_class_t lcp_itf_pair_logger;
/**
* Pool of LIP objects
*/
-lcp_itf_pair_t *lcp_itf_pair_pool;
+lcp_itf_pair_t *lcp_itf_pair_pool = NULL;
u32
lcp_itf_num_pairs (void)
@@ -72,12 +73,6 @@ lcp_itf_pair_register_vft (lcp_itf_pair_vft_t *lcp_itf_vft)
vec_add1 (lcp_itf_vfts, *lcp_itf_vft);
}
-#define LCP_ITF_PAIR_DBG(...) \
- vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__);
-
-#define LCP_ITF_PAIR_INFO(...) \
- vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__);
-
u8 *
format_lcp_itf_pair (u8 *s, va_list *args)
{
@@ -136,6 +131,13 @@ lcp_itf_pair_show (u32 phy_sw_if_index)
ns = lcp_get_default_ns ();
vlib_cli_output (vm, "lcp default netns '%s'\n",
ns ? (char *) ns : "<unset>");
+ vlib_cli_output (vm, "lcp lcp-auto-subint %s\n",
+ lcp_auto_subint () ? "on" : "off");
+ vlib_cli_output (vm, "lcp lcp-sync %s\n", lcp_sync () ? "on" : "off");
+ vlib_cli_output (vm, "lcp del-static-on-link-down %s\n",
+ lcp_get_del_static_on_link_down () ? "on" : "off");
+ vlib_cli_output (vm, "lcp del-dynamic-on-link-down %s\n",
+ lcp_get_del_dynamic_on_link_down () ? "on" : "off");
if (phy_sw_if_index == ~0)
{
@@ -152,6 +154,11 @@ lcp_itf_pair_show (u32 phy_sw_if_index)
lcp_itf_pair_t *
lcp_itf_pair_get (u32 index)
{
+ if (!lcp_itf_pair_pool)
+ return NULL;
+ if (index == INDEX_INVALID)
+ return NULL;
+
return pool_elt_at_index (lcp_itf_pair_pool, index);
}
@@ -168,18 +175,6 @@ lcp_itf_pair_find_by_vif (u32 vif_index)
return INDEX_INVALID;
}
-int
-lcp_itf_pair_add_sub (u32 vif, u8 *host_if_name, u32 sub_sw_if_index,
- u32 phy_sw_if_index, u8 *ns)
-{
- lcp_itf_pair_t *lip;
-
- lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (phy_sw_if_index));
-
- return lcp_itf_pair_add (lip->lip_host_sw_if_index, sub_sw_if_index,
- host_if_name, vif, lip->lip_host_type, ns);
-}
-
const char *lcp_itf_l3_feat_names[N_LCP_ITF_HOST][N_AF] = {
[LCP_ITF_HOST_TAP] = {
[AF_IP4] = "linux-cp-xc-ip4",
@@ -235,17 +230,23 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
index_t lipi;
lcp_itf_pair_t *lip;
+ if (host_sw_if_index == ~0)
+ {
+ LCP_ITF_PAIR_ERR ("pair_add: Cannot add LIP - invalid host");
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
+
lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index);
- LCP_ITF_PAIR_INFO ("add: host:%U phy:%U, host_if:%v vif:%d ns:%v",
+ if (lipi != INDEX_INVALID)
+ return VNET_API_ERROR_VALUE_EXIST;
+
+ LCP_ITF_PAIR_INFO ("add: host:%U phy:%U, host_if:%v vif:%d ns:%s",
format_vnet_sw_if_index_name, vnet_get_main (),
host_sw_if_index, format_vnet_sw_if_index_name,
vnet_get_main (), phy_sw_if_index, host_name, host_index,
ns);
- if (lipi != INDEX_INVALID)
- return VNET_API_ERROR_VALUE_EXIST;
-
/*
* Create a new pair.
*/
@@ -266,9 +267,6 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
lip->lip_vif_index = host_index;
lip->lip_namespace = vec_dup (ns);
- if (lip->lip_host_sw_if_index == ~0)
- return 0;
-
/*
* First use of this host interface.
* Enable the x-connect feature on the host to send
@@ -314,10 +312,13 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
}
else
{
- vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 1, NULL,
- 0);
- vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 1, NULL,
- 0);
+ if (hash_elts (lip_db_by_vif) == 1)
+ {
+ vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 1,
+ NULL, 0);
+ vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 1,
+ NULL, 0);
+ }
}
/* invoke registered callbacks for pair addition */
@@ -336,7 +337,7 @@ lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index, u8 *host_name,
}
static clib_error_t *
-lcp_netlink_add_link_vlan (int parent, u32 vlan, const char *name)
+lcp_netlink_add_link_vlan (int parent, u32 vlan, u16 proto, const char *name)
{
struct rtnl_link *link;
struct nl_sock *sk;
@@ -344,17 +345,25 @@ lcp_netlink_add_link_vlan (int parent, u32 vlan, const char *name)
sk = nl_socket_alloc ();
if ((err = nl_connect (sk, NETLINK_ROUTE)) < 0)
- return clib_error_return (NULL, "Unable to connect socket: %d", err);
+ {
+ LCP_ITF_PAIR_ERR ("netlink_add_link_vlan: connect error: %s",
+ nl_geterror (err));
+ return clib_error_return (NULL, "Unable to connect socket: %d", err);
+ }
link = rtnl_link_vlan_alloc ();
rtnl_link_set_link (link, parent);
rtnl_link_set_name (link, name);
-
rtnl_link_vlan_set_id (link, vlan);
+ rtnl_link_vlan_set_protocol (link, htons (proto));
if ((err = rtnl_link_add (sk, link, NLM_F_CREATE)) < 0)
- return clib_error_return (NULL, "Unable to add link %s: %d", name, err);
+ {
+ LCP_ITF_PAIR_ERR ("netlink_add_link_vlan: link add error: %s",
+ nl_geterror (err));
+ return clib_error_return (NULL, "Unable to add link %s: %d", name, err);
+ }
rtnl_link_put (link);
nl_close (sk);
@@ -400,10 +409,11 @@ lcp_itf_pair_del (u32 phy_sw_if_index)
lip = lcp_itf_pair_get (lipi);
- LCP_ITF_PAIR_INFO ("pair delete: {%U, %U, %s}", format_vnet_sw_if_index_name,
- vnet_get_main (), lip->lip_phy_sw_if_index,
- format_vnet_sw_if_index_name, vnet_get_main (),
- lip->lip_host_sw_if_index, lip->lip_host_name);
+ LCP_ITF_PAIR_NOTICE (
+ "pair_del: host:%U phy:%U host_if:%v vif:%d ns:%v",
+ format_vnet_sw_if_index_name, vnet_get_main (), lip->lip_host_sw_if_index,
+ format_vnet_sw_if_index_name, vnet_get_main (), lip->lip_phy_sw_if_index,
+ lip->lip_host_name, lip->lip_vif_index, lip->lip_namespace);
/* invoke registered callbacks for pair deletion */
vec_foreach (vft, lcp_itf_vfts)
@@ -432,12 +442,14 @@ lcp_itf_pair_del (u32 phy_sw_if_index)
}
else
{
- vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 0, NULL,
- 0);
- vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 0, NULL,
- 0);
+ if (hash_elts (lip_db_by_vif) == 1)
+ {
+ vnet_feature_enable_disable ("ip4-punt", "linux-cp-punt-l3", 0, 0,
+ NULL, 0);
+ vnet_feature_enable_disable ("ip6-punt", "linux-cp-punt-l3", 0, 0,
+ NULL, 0);
+ }
}
-
lip_db_by_phy[phy_sw_if_index] = INDEX_INVALID;
lip_db_by_host[lip->lip_host_sw_if_index] = INDEX_INVALID;
hash_unset (lip_db_by_vif, lip->lip_vif_index);
@@ -454,24 +466,45 @@ lcp_itf_pair_delete_by_index (index_t lipi)
{
u32 host_sw_if_index;
lcp_itf_pair_t *lip;
- u8 *host_name;
+ u8 *host_name, *ns;
lip = lcp_itf_pair_get (lipi);
host_name = vec_dup (lip->lip_host_name);
host_sw_if_index = lip->lip_host_sw_if_index;
+ ns = vec_dup (lip->lip_namespace);
lcp_itf_pair_del (lip->lip_phy_sw_if_index);
if (vnet_sw_interface_is_sub (vnet_get_main (), host_sw_if_index))
{
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+ if (ns)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open ((u8 *) ns);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
lcp_netlink_del_link ((const char *) host_name);
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+
vnet_delete_sub_interface (host_sw_if_index);
}
else
tap_delete_if (vlib_get_main (), host_sw_if_index);
vec_free (host_name);
+ vec_free (ns);
}
int
@@ -489,6 +522,23 @@ lcp_itf_pair_delete (u32 phy_sw_if_index)
return 0;
}
+/**
+ * lcp_itf_interface_add_del
+ *
+ * Registered to receive interface Add and delete notifications
+ */
+static clib_error_t *
+lcp_itf_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add)
+{
+ if (!is_add)
+ /* remove any interface pair we have for this interface */
+ lcp_itf_pair_delete (sw_if_index);
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_interface_add_del);
+
void
lcp_itf_pair_walk (lcp_itf_pair_walk_cb_t cb, void *ctx)
{
@@ -501,58 +551,17 @@ lcp_itf_pair_walk (lcp_itf_pair_walk_cb_t cb, void *ctx)
};
}
-typedef struct lcp_itf_pair_names_t_
-{
- u8 *lipn_host_name;
- u8 *lipn_phy_name;
- u8 *lipn_namespace;
- u32 lipn_phy_sw_if_index;
-} lcp_itf_pair_names_t;
-
-static lcp_itf_pair_names_t *lipn_names;
-
static clib_error_t *
lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input)
{
- u8 *host, *phy;
- u8 *ns;
u8 *default_ns;
+ u32 tmp;
- host = phy = ns = default_ns = NULL;
+ default_ns = NULL;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- vec_reset_length (host);
-
- if (unformat (input, "pair %s %s %s", &phy, &host, &ns))
- {
- lcp_itf_pair_names_t *lipn;
-
- if (vec_len (ns) > LCP_NS_LEN)
- {
- return clib_error_return (0,
- "linux-cp IF namespace must"
- " be less than %d characters",
- LCP_NS_LEN);
- }
-
- vec_add2 (lipn_names, lipn, 1);
-
- lipn->lipn_host_name = vec_dup (host);
- lipn->lipn_phy_name = vec_dup (phy);
- lipn->lipn_namespace = vec_dup (ns);
- }
- else if (unformat (input, "pair %v %v", &phy, &host))
- {
- lcp_itf_pair_names_t *lipn;
-
- vec_add2 (lipn_names, lipn, 1);
-
- lipn->lipn_host_name = vec_dup (host);
- lipn->lipn_phy_name = vec_dup (phy);
- lipn->lipn_namespace = 0;
- }
- else if (unformat (input, "default netns %v", &default_ns))
+ if (unformat (input, "default netns %v", &default_ns))
{
vec_add1 (default_ns, 0);
if (lcp_set_default_ns (default_ns) < 0)
@@ -563,14 +572,22 @@ lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input)
LCP_NS_LEN);
}
}
- else if (unformat (input, "interface-auto-create"))
- lcp_set_auto_intf (1 /* is_auto */);
+ else if (unformat (input, "lcp-auto-subint"))
+ lcp_set_auto_subint (1 /* is_auto */);
+ else if (unformat (input, "lcp-sync"))
+ lcp_set_sync (1 /* is_auto */);
+ else if (unformat (input, "del-static-on-link-down"))
+ lcp_set_del_static_on_link_down (1 /* is_del */);
+ else if (unformat (input, "del-dynamic-on-link-down"))
+ lcp_set_del_dynamic_on_link_down (1 /* is_del */);
+ else if (unformat (input, "num-rx-queues %d", &tmp))
+ lcp_set_default_num_queues (tmp, 0 /* is_tx */);
+ else if (unformat (input, "num-tx-queues %d", &tmp))
+ lcp_set_default_num_queues (tmp, 1 /* is_tx */);
else
return clib_error_return (0, "interfaces not found");
}
- vec_free (host);
- vec_free (phy);
vec_free (default_ns);
return NULL;
@@ -615,22 +632,81 @@ lcp_validate_if_name (u8 *name)
return 1;
}
-static void
-lcp_itf_set_vif_link_state (u32 vif_index, u8 up, u8 *ns)
+void
+lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state)
{
int curr_ns_fd, vif_ns_fd;
+ if (!lip)
+ return;
+
curr_ns_fd = vif_ns_fd = -1;
- if (ns)
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ /* Set the same link state on the netlink interface
+ */
+ vnet_netlink_set_link_state (lip->lip_vif_index, state);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+
+ return;
+}
+
+void
+lcp_itf_set_interface_addr (const lcp_itf_pair_t *lip)
+{
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ ip_lookup_main_t *lm4 = &im4->lookup_main;
+ ip_lookup_main_t *lm6 = &im6->lookup_main;
+ ip_interface_address_t *ia = 0;
+ int vif_ns_fd = -1;
+ int curr_ns_fd = -1;
+
+ if (!lip)
+ return;
+
+ if (lip->lip_namespace)
{
curr_ns_fd = clib_netns_open (NULL /* self */);
- vif_ns_fd = clib_netns_open (ns);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
if (vif_ns_fd != -1)
clib_setns (vif_ns_fd);
}
- vnet_netlink_set_link_state (vif_index, up);
+ /* Sync any IP4 addressing info into LCP */
+ foreach_ip_interface_address (
+ lm4, ia, lip->lip_phy_sw_if_index, 1 /* honor unnumbered */, ({
+ ip4_address_t *r4 = ip_interface_address_get_address (lm4, ia);
+ LCP_ITF_PAIR_NOTICE ("set_interface_addr: %U add ip4 %U/%d",
+ format_lcp_itf_pair, lip, format_ip4_address, r4,
+ ia->address_length);
+ vnet_netlink_add_ip4_addr (lip->lip_vif_index, r4, ia->address_length);
+ }));
+
+ /* Sync any IP6 addressing info into LCP */
+ foreach_ip_interface_address (
+ lm6, ia, lip->lip_phy_sw_if_index, 1 /* honor unnumbered */, ({
+ ip6_address_t *r6 = ip_interface_address_get_address (lm6, ia);
+ LCP_ITF_PAIR_NOTICE ("set_interface_addr: %U add ip6 %U/%d",
+ format_lcp_itf_pair, lip, format_ip6_address, r6,
+ ia->address_length);
+ vnet_netlink_add_ip6_addr (lip->lip_vif_index, r6, ia->address_length);
+ }));
if (vif_ns_fd != -1)
close (vif_ns_fd);
@@ -642,6 +718,64 @@ lcp_itf_set_vif_link_state (u32 vif_index, u8 up, u8 *ns)
}
}
+typedef struct
+{
+ u32 vlan;
+ bool dot1ad;
+
+ u32 matched_sw_if_index;
+} lcp_itf_match_t;
+
+static walk_rc_t
+lcp_itf_pair_find_walk (vnet_main_t *vnm, u32 sw_if_index, void *arg)
+{
+ lcp_itf_match_t *match = arg;
+ const vnet_sw_interface_t *sw;
+
+ sw = vnet_get_sw_interface (vnm, sw_if_index);
+ if (sw && (sw->sub.eth.inner_vlan_id == 0) &&
+ (sw->sub.eth.outer_vlan_id == match->vlan) &&
+ (sw->sub.eth.flags.dot1ad == match->dot1ad))
+ {
+ LCP_ITF_PAIR_DBG ("find_walk: found match outer %d dot1ad %d "
+ "inner-dot1q %d: interface %U",
+ sw->sub.eth.outer_vlan_id, sw->sub.eth.flags.dot1ad,
+ sw->sub.eth.inner_vlan_id,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw->sw_if_index);
+ match->matched_sw_if_index = sw->sw_if_index;
+ return WALK_STOP;
+ }
+
+ return WALK_CONTINUE;
+}
+
+/* Return the index of the sub-int on the phy that has the given vlan and
+ * proto,
+ */
+static index_t
+lcp_itf_pair_find_by_outer_vlan (u32 sup_if_index, u16 vlan, bool dot1ad)
+{
+ lcp_itf_match_t match;
+ const vnet_hw_interface_t *hw;
+
+ match.vlan = vlan;
+ match.dot1ad = dot1ad;
+ match.matched_sw_if_index = INDEX_INVALID;
+ hw = vnet_get_sup_hw_interface (vnet_get_main (), sup_if_index);
+
+ vnet_hw_interface_walk_sw (vnet_get_main (), hw->hw_if_index,
+ lcp_itf_pair_find_walk, &match);
+
+ if (match.matched_sw_if_index >= vec_len (lip_db_by_phy))
+ return INDEX_INVALID;
+
+ return lip_db_by_phy[match.matched_sw_if_index];
+}
+
+static clib_error_t *lcp_itf_pair_link_up_down (vnet_main_t *vnm,
+ u32 hw_if_index, u32 flags);
+
int
lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
lip_host_type_t host_if_type, u8 *ns,
@@ -649,24 +783,53 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
{
vlib_main_t *vm;
vnet_main_t *vnm;
- u32 vif_index = 0, host_sw_if_index;
+ u32 vif_index = 0, host_sw_if_index = ~0;
const vnet_sw_interface_t *sw;
const vnet_hw_interface_t *hw;
+ const lcp_itf_pair_t *lip;
+ index_t lipi;
+
+ lipi = lcp_itf_pair_find_by_phy (phy_sw_if_index);
+
+ if (lipi != INDEX_INVALID)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: already created");
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
if (!vnet_sw_if_index_is_api_valid (phy_sw_if_index))
- return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: invalid phy index %u", phy_sw_if_index);
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
if (!lcp_validate_if_name (host_if_name))
- return VNET_API_ERROR_INVALID_ARGUMENT;
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: invalid host-if-name '%s'",
+ host_if_name);
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
vnm = vnet_get_main ();
sw = vnet_get_sw_interface (vnm, phy_sw_if_index);
hw = vnet_get_sup_hw_interface (vnm, phy_sw_if_index);
+ if (!sw || !hw)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: invalid interface");
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
+
+ if (hw->hw_class_index != ethernet_hw_interface_class.index &&
+ host_if_type == LCP_ITF_HOST_TAP)
+ {
+ LCP_ITF_PAIR_ERR (
+ "pair_create: don't create TAP for non-eth interface; use tun");
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
/*
* Use interface-specific netns if supplied.
- * Otherwise, use default netns if defined.
- * Otherwise ignore a netns and use the OS default.
+ * Otherwise, use netns if defined, otherwise use the OS default.
*/
if (ns == 0 || ns[0] == 0)
ns = lcp_get_default_ns ();
@@ -674,16 +837,50 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
/* sub interfaces do not need a tap created */
if (vnet_sw_interface_is_sub (vnm, phy_sw_if_index))
{
- const lcp_itf_pair_t *lip;
+ index_t parent_if_index;
int orig_ns_fd, ns_fd;
clib_error_t *err;
- u16 vlan;
+ u16 outer_vlan, inner_vlan;
+ u16 outer_proto, inner_proto;
+ u16 vlan, proto;
+ u32 parent_vif_index;
- /*
- * Find the parent tap by finding the pair from the parent phy
- */
- lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw->sup_sw_if_index));
- vlan = sw->sub.eth.outer_vlan_id;
+ err = vnet_sw_interface_supports_addressing (vnm, phy_sw_if_index);
+ if (err)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: can't create LCP for a "
+ "sub-interface without exact-match set");
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+
+ outer_vlan = sw->sub.eth.outer_vlan_id;
+ inner_vlan = sw->sub.eth.inner_vlan_id;
+ outer_proto = inner_proto = ETH_P_8021Q;
+ if (1 == sw->sub.eth.flags.dot1ad)
+ outer_proto = ETH_P_8021AD;
+
+ LCP_ITF_PAIR_INFO ("pair_create: subif: dot1%s outer %d inner %d on %U",
+ sw->sub.eth.flags.dot1ad ? "ad" : "q", outer_vlan,
+ inner_vlan, format_vnet_sw_if_index_name, vnm,
+ hw->sw_if_index);
+
+ parent_if_index = lcp_itf_pair_find_by_phy (sw->sup_sw_if_index);
+ if (INDEX_INVALID == parent_if_index)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: can't find LCP for %U",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw->sup_sw_if_index);
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
+ lip = lcp_itf_pair_get (parent_if_index);
+ if (!lip)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: can't create LCP for a "
+ "sub-interface without an LCP on the parent");
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+ LCP_ITF_PAIR_DBG ("pair_create: parent %U", format_lcp_itf_pair, lip);
+ parent_vif_index = lip->lip_vif_index;
/*
* see if the requested host interface has already been created
@@ -708,11 +905,56 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
/*
* no existing host interface, create it now
*/
- err = lcp_netlink_add_link_vlan (lip->lip_vif_index, vlan,
- (const char *) host_if_name);
- if (!err && -1 != ns_fd)
- err = vnet_netlink_set_link_netns (vif_index, ns_fd, NULL);
+ /*
+ * Find the parent tap:
+ * - if this is an outer VLAN, use the pair from the parent phy
+ * - if this is an inner VLAN, find the pair from the outer sub-int,
+ * which must exist.
+ */
+ if (inner_vlan)
+ {
+ index_t linux_parent_if_index;
+ const lcp_itf_pair_t *llip;
+
+ vlan = inner_vlan;
+ proto = inner_proto;
+ linux_parent_if_index = lcp_itf_pair_find_by_outer_vlan (
+ hw->sw_if_index, sw->sub.eth.outer_vlan_id,
+ sw->sub.eth.flags.dot1ad);
+ if (INDEX_INVALID == linux_parent_if_index ||
+ !(llip = lcp_itf_pair_get (linux_parent_if_index)))
+ {
+ LCP_ITF_PAIR_ERR (
+ "pair_create: can't find LCP for outer vlan %d "
+ "proto %s on %U",
+ outer_vlan,
+ outer_proto == ETH_P_8021AD ? "dot1ad" : "dot1q",
+ format_vnet_sw_if_index_name, vnm, hw->sw_if_index);
+ err = clib_error_return (0, "parent pair not found");
+ goto socket_close;
+ }
+
+ LCP_ITF_PAIR_DBG ("pair_create: linux parent %U",
+ format_lcp_itf_pair, llip);
+ parent_vif_index = llip->lip_vif_index;
+ }
+ else
+ {
+ vlan = outer_vlan;
+ proto = outer_proto;
+ }
+
+ err = lcp_netlink_add_link_vlan (parent_vif_index, vlan, proto,
+ (const char *) host_if_name);
+ if (err != 0)
+ {
+ LCP_ITF_PAIR_ERR ("pair_create: cannot create link "
+ "outer(proto:0x%04x,vlan:%u).inner(proto:0x%"
+ "04x,vlan:%u) name:'%s'",
+ outer_proto, outer_vlan, inner_proto,
+ inner_vlan, host_if_name);
+ }
if (!err)
vif_index = if_nametoindex ((char *) host_if_name);
@@ -721,13 +963,20 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
/*
* create a sub-interface on the tap
*/
- if (!err && vnet_create_sub_interface (lip->lip_host_sw_if_index,
- sw->sub.id, sw->sub.eth.raw_flags,
- sw->sub.eth.inner_vlan_id, vlan,
- &host_sw_if_index))
- LCP_ITF_PAIR_INFO ("failed create vlan: %d on %U", vlan,
- format_vnet_sw_if_index_name, vnet_get_main (),
- lip->lip_host_sw_if_index);
+ if (!err &&
+ vnet_create_sub_interface (lip->lip_host_sw_if_index, sw->sub.id,
+ sw->sub.eth.raw_flags, inner_vlan,
+ outer_vlan, &host_sw_if_index))
+ {
+ LCP_ITF_PAIR_ERR (
+ "pair_create: failed to create tap subint: %d.%d on %U",
+ outer_vlan, inner_vlan, format_vnet_sw_if_index_name, vnm,
+ lip->lip_host_sw_if_index);
+ err = clib_error_return (
+ 0, "failed to create tap subint: %d.%d. on %U", outer_vlan,
+ inner_vlan, format_vnet_sw_if_index_name, vnm,
+ lip->lip_host_sw_if_index);
+ }
socket_close:
if (orig_ns_fd != -1)
@@ -744,15 +993,21 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
else
{
tap_create_if_args_t args = {
- .num_rx_queues = clib_max (1, vlib_num_workers ()),
+ .num_rx_queues =
+ clib_max (1, lcp_get_default_num_queues (0 /* is_tx */)),
+ .num_tx_queues =
+ clib_max (1, lcp_get_default_num_queues (1 /* is_tx */)),
.id = hw->hw_if_index,
.sw_if_index = ~0,
.rx_ring_sz = 256,
.tx_ring_sz = 256,
.host_if_name = host_if_name,
.host_namespace = 0,
+ .rv = 0,
+ .error = NULL,
};
ethernet_interface_t *ei;
+ u32 host_sw_mtu_size;
if (host_if_type == LCP_ITF_HOST_TUN)
args.tap_flags |= TAP_FLAG_TUN;
@@ -762,38 +1017,45 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
mac_address_copy (&args.host_mac_addr, &ei->address.mac);
}
- if (sw->mtu[VNET_MTU_L3])
+ /*
+ * The TAP interface does copy forward the host MTU based on the VPP
+ * interface's L3 MTU, but it should also ensure that the VPP tap
+ * interface has an MTU that is greater-or-equal to those. Considering
+ * users can set the interfaces at runtime (set interface mtu packet ...)
+ * ensure that the tap MTU is large enough, taking the VPP interface L3
+ * if it's set, and otherwise a sensible default.
+ */
+ host_sw_mtu_size = sw->mtu[VNET_MTU_L3];
+ if (host_sw_mtu_size)
{
args.host_mtu_set = 1;
- args.host_mtu_size = sw->mtu[VNET_MTU_L3];
+ args.host_mtu_size = host_sw_mtu_size;
}
+ else
+ host_sw_mtu_size = ETHERNET_MAX_PACKET_BYTES;
if (ns && ns[0] != 0)
args.host_namespace = ns;
vm = vlib_get_main ();
tap_create_if (vm, &args);
-
if (args.rv < 0)
{
+ LCP_ITF_PAIR_ERR ("pair_create: could not create tap, retval:%d",
+ args.rv);
+ clib_error_free (args.error);
return args.rv;
}
+ vnet_sw_interface_set_mtu (vnm, args.sw_if_index, host_sw_mtu_size);
+
/*
* get the hw and ethernet of the tap
*/
hw = vnet_get_sup_hw_interface (vnm, args.sw_if_index);
-
- /*
- * Set the interface down on the host side.
- * This controls whether the host can RX/TX.
- */
virtio_main_t *mm = &virtio_main;
virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
- lcp_itf_set_vif_link_state (vif->ifindex, 0 /* down */,
- args.host_namespace);
-
/*
* Leave the TAP permanently up on the VPP side.
* This TAP will be shared by many sub-interface.
@@ -819,14 +1081,35 @@ lcp_itf_pair_create (u32 phy_sw_if_index, u8 *host_if_name,
return -1;
}
- vnet_sw_interface_admin_up (vnm, host_sw_if_index);
- lcp_itf_pair_add (host_sw_if_index, phy_sw_if_index, host_if_name, vif_index,
- host_if_type, ns);
-
LCP_ITF_PAIR_INFO ("pair create: {%U, %U, %s}", format_vnet_sw_if_index_name,
vnet_get_main (), phy_sw_if_index,
format_vnet_sw_if_index_name, vnet_get_main (),
host_sw_if_index, host_if_name);
+ lcp_itf_pair_add (host_sw_if_index, phy_sw_if_index, host_if_name, vif_index,
+ host_if_type, ns);
+
+ /*
+ * Copy the link state from VPP into the host side.
+ * The TAP is shared by many interfaces, always keep it up.
+ * This controls whether the host can RX/TX.
+ */
+ sw = vnet_get_sw_interface (vnm, phy_sw_if_index);
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_vif (vif_index));
+ LCP_ITF_PAIR_INFO ("pair create: %U sw-flags %u hw-flags %u",
+ format_lcp_itf_pair, lip, sw->flags, hw->flags);
+ vnet_sw_interface_admin_up (vnm, host_sw_if_index);
+ lcp_itf_set_link_state (lip, sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ /*
+ * Reflect current link state and link speed of the hardware interface on the
+ * TAP interface.
+ */
+ if (host_if_type == LCP_ITF_HOST_TAP &&
+ !vnet_sw_interface_is_sub (vnm, phy_sw_if_index))
+ {
+ hw = vnet_get_sup_hw_interface (vnm, phy_sw_if_index);
+ lcp_itf_pair_link_up_down (vnm, hw->hw_if_index, hw->flags);
+ }
if (host_sw_if_indexp)
*host_sw_if_indexp = host_sw_if_index;
@@ -890,70 +1173,6 @@ lcp_itf_pair_replace_end (void)
return (0);
}
-static uword
-lcp_itf_pair_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
- vlib_frame_t *f)
-{
- uword *event_data = 0;
- uword *lipn_index;
-
- while (1)
- {
- vlib_process_wait_for_event (vm);
-
- vlib_process_get_events (vm, &event_data);
-
- vec_foreach (lipn_index, event_data)
- {
- lcp_itf_pair_names_t *lipn;
-
- lipn = &lipn_names[*lipn_index];
- lcp_itf_pair_create (lipn->lipn_phy_sw_if_index,
- lipn->lipn_host_name, LCP_ITF_HOST_TAP,
- lipn->lipn_namespace, NULL);
- }
-
- vec_reset_length (event_data);
- }
-
- return 0;
-}
-
-VLIB_REGISTER_NODE (lcp_itf_pair_process_node, static) = {
- .function = lcp_itf_pair_process,
- .name = "linux-cp-itf-process",
- .type = VLIB_NODE_TYPE_PROCESS,
-};
-
-static clib_error_t *
-lcp_itf_phy_add (vnet_main_t *vnm, u32 sw_if_index, u32 is_create)
-{
- lcp_itf_pair_names_t *lipn;
- vlib_main_t *vm = vlib_get_main ();
- vnet_hw_interface_t *hw;
-
- if (!is_create || vnet_sw_interface_is_sub (vnm, sw_if_index))
- return NULL;
-
- hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
-
- vec_foreach (lipn, lipn_names)
- {
- if (!vec_cmp (hw->name, lipn->lipn_phy_name))
- {
- lipn->lipn_phy_sw_if_index = sw_if_index;
-
- vlib_process_signal_event (vm, lcp_itf_pair_process_node.index, 0,
- lipn - lipn_names);
- break;
- }
- }
-
- return NULL;
-}
-
-VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_phy_add);
-
static clib_error_t *
lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
{
@@ -980,7 +1199,8 @@ lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
tap_set_carrier (si->hw_if_index,
(flags & VNET_HW_INTERFACE_FLAG_LINK_UP));
- if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP)
+ if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP &&
+ hi->link_speed != UINT32_MAX)
{
tap_set_speed (si->hw_if_index, hi->link_speed / 1000);
}
@@ -992,13 +1212,15 @@ lcp_itf_pair_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lcp_itf_pair_link_up_down);
static clib_error_t *
-lcp_itf_pair_init (vlib_main_t *vm)
+lcp_interface_init (vlib_main_t *vm)
{
vlib_punt_hdl_t punt_hdl = vlib_punt_client_register ("linux-cp");
/* punt IKE */
vlib_punt_register (punt_hdl, ipsec_punt_reason[IPSEC_PUNT_IP4_SPI_UDP_0],
"linux-cp-punt");
+ vlib_punt_register (punt_hdl, ipsec_punt_reason[IPSEC_PUNT_IP6_SPI_UDP_0],
+ "linux-cp-punt");
/* punt all unknown ports */
udp_punt_unknown (vm, 0, 1);
@@ -1011,7 +1233,7 @@ lcp_itf_pair_init (vlib_main_t *vm)
return NULL;
}
-VLIB_INIT_FUNCTION (lcp_itf_pair_init) = {
+VLIB_INIT_FUNCTION (lcp_interface_init) = {
.runs_after = VLIB_INITS ("vnet_interface_init", "tcp_init", "udp_init"),
};
diff --git a/src/plugins/linux-cp/lcp_interface.h b/src/plugins/linux-cp/lcp_interface.h
index bed30248845..cfcd3925a15 100644
--- a/src/plugins/linux-cp/lcp_interface.h
+++ b/src/plugins/linux-cp/lcp_interface.h
@@ -21,6 +21,22 @@
#include <plugins/linux-cp/lcp.h>
+extern vlib_log_class_t lcp_itf_pair_logger;
+
+#define LCP_ITF_PAIR_DBG(...) \
+ vlib_log_debug (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_INFO(...) \
+ vlib_log_info (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_NOTICE(...) \
+ vlib_log_notice (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_WARN(...) \
+ vlib_log_warn (lcp_itf_pair_logger, __VA_ARGS__);
+
+#define LCP_ITF_PAIR_ERR(...) vlib_log_err (lcp_itf_pair_logger, __VA_ARGS__);
+
#define foreach_lcp_itf_pair_flag _ (STALE, 0, "stale")
typedef enum lip_flag_t_
@@ -88,8 +104,6 @@ extern index_t lcp_itf_pair_find_by_vif (u32 vif_index);
extern int lcp_itf_pair_add (u32 host_sw_if_index, u32 phy_sw_if_index,
u8 *host_name, u32 host_index,
lip_host_type_t host_type, u8 *ns);
-extern int lcp_itf_pair_add_sub (u32 vif, u8 *host_name, u32 sub_sw_if_index,
- u32 phy_sw_if_index, u8 *ns);
extern int lcp_itf_pair_del (u32 phy_sw_if_index);
/**
@@ -144,12 +158,6 @@ lcp_itf_pair_find_by_host (u32 host_sw_if_index)
return (lip_db_by_host[host_sw_if_index]);
}
-/**
- * manage interface auto creation
- */
-void lcp_set_auto_intf (u8 is_auto);
-int lcp_auto_intf (void);
-
typedef void (*lcp_itf_pair_add_cb_t) (lcp_itf_pair_t *);
typedef void (*lcp_itf_pair_del_cb_t) (lcp_itf_pair_t *);
@@ -160,6 +168,36 @@ typedef struct lcp_itf_pair_vft
} lcp_itf_pair_vft_t;
void lcp_itf_pair_register_vft (lcp_itf_pair_vft_t *lcp_itf_vft);
+
+/**
+ * sub-interface auto creation/deletion for LCP
+ */
+void lcp_set_auto_subint (u8 is_auto);
+int lcp_auto_subint (void);
+
+/**
+ * sync state changes from VPP into LCP
+ */
+void lcp_set_sync (u8 is_auto);
+int lcp_sync (void);
+
+/* Set TAP and Linux host link state */
+void lcp_itf_set_link_state (const lcp_itf_pair_t *lip, u8 state);
+
+/* Set any VPP L3 addresses on Linux host device */
+void lcp_itf_set_interface_addr (const lcp_itf_pair_t *lip);
+
+/* Sync all state from VPP to a specific Linux device, all sub-interfaces
+ * of a hardware interface, or all interfaces in the system.
+ *
+ * Note: in some circumstances, this syncer will (have to) make changes to
+ * the VPP interface, for example if its MTU is greater than its parent.
+ * See the function for rationale.
+ */
+void lcp_itf_pair_sync_state (lcp_itf_pair_t *lip);
+void lcp_itf_pair_sync_state_hw (vnet_hw_interface_t *hi);
+void lcp_itf_pair_sync_state_all ();
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/linux-cp/lcp_interface_sync.c b/src/plugins/linux-cp/lcp_interface_sync.c
new file mode 100644
index 00000000000..ca7638e1799
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_interface_sync.c
@@ -0,0 +1,445 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright 2021 Cisco and/or its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/devices/netlink.h>
+#include <vnet/ip/ip.h>
+#include <vppinfra/linux/netns.h>
+#include <plugins/linux-cp/lcp_interface.h>
+
+/* helper function to copy forward all sw interface link state flags
+ * MTU, and IP addresses into their counterpart LIP interface.
+ *
+ * This is called upon MTU changes and state changes.
+ */
+void
+lcp_itf_pair_sync_state (lcp_itf_pair_t *lip)
+{
+ vnet_sw_interface_t *sw;
+ vnet_sw_interface_t *sup_sw;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+ u32 mtu;
+ u32 netlink_mtu;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return;
+
+ sw =
+ vnet_get_sw_interface_or_null (vnet_get_main (), lip->lip_phy_sw_if_index);
+ if (!sw)
+ return;
+ sup_sw =
+ vnet_get_sw_interface_or_null (vnet_get_main (), sw->sup_sw_if_index);
+ if (!sup_sw)
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ LCP_ITF_PAIR_INFO ("sync_state: %U flags %u sup-flags %u mtu %u sup-mtu %u",
+ format_lcp_itf_pair, lip, sw->flags, sup_sw->flags,
+ sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]);
+
+ /* Linux will not allow children to be admin-up if their parent is
+ * admin-down. If child is up but parent is not, force it down.
+ */
+ int state = sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP;
+
+ if (state && !(sup_sw->flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
+ {
+ LCP_ITF_PAIR_WARN (
+ "sync_state: %U flags %u sup-flags %u mtu %u sup-mtu %u: "
+ "forcing state to sup-flags to satisfy netlink",
+ format_lcp_itf_pair, lip, sw->flags, sup_sw->flags,
+ sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]);
+ state = 0;
+ }
+ lcp_itf_set_link_state (lip, state);
+
+ /* Linux will clamp MTU of children when the parent is lower. VPP is fine
+ * with differing MTUs. VPP assumes that if a subint has MTU of 0, that it
+ * inherits from its parent. Linux likes to be more explicit, so we
+ * reconcile any differences.
+ */
+ mtu = sw->mtu[VNET_MTU_L3];
+ if (mtu == 0)
+ mtu = sup_sw->mtu[VNET_MTU_L3];
+
+ if (sup_sw->mtu[VNET_MTU_L3] < sw->mtu[VNET_MTU_L3])
+ {
+ LCP_ITF_PAIR_WARN ("sync_state: %U flags %u mtu %u sup-mtu %u: "
+ "clamping to sup-mtu to satisfy netlink",
+ format_lcp_itf_pair, lip, sw->flags,
+ sw->mtu[VNET_MTU_L3], sup_sw->mtu[VNET_MTU_L3]);
+ mtu = sup_sw->mtu[VNET_MTU_L3];
+ }
+
+ /* Set MTU on all of {sw, tap, netlink}. Only send a netlink message if we
+ * really do want to change the MTU.
+ */
+ vnet_sw_interface_set_mtu (vnet_get_main (), lip->lip_phy_sw_if_index, mtu);
+ vnet_sw_interface_set_mtu (vnet_get_main (), lip->lip_host_sw_if_index, mtu);
+ if (NULL == vnet_netlink_get_link_mtu (lip->lip_vif_index, &netlink_mtu))
+ {
+ if (netlink_mtu != mtu)
+ vnet_netlink_set_link_mtu (lip->lip_vif_index, mtu);
+ }
+
+ /* Linux will remove IPv6 addresses on children when the parent state
+ * goes down, so we ensure all IPv4/IPv6 addresses are synced.
+ */
+ lcp_itf_set_interface_addr (lip);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+
+ return;
+}
+
+static walk_rc_t
+lcp_itf_pair_walk_sync_state_all_cb (index_t lipi, void *ctx)
+{
+ lcp_itf_pair_t *lip;
+ lip = lcp_itf_pair_get (lipi);
+ if (!lip)
+ return WALK_CONTINUE;
+
+ lcp_itf_pair_sync_state (lip);
+ return WALK_CONTINUE;
+}
+
+static walk_rc_t
+lcp_itf_pair_walk_sync_state_hw_cb (vnet_main_t *vnm, u32 sw_if_index,
+ void *arg)
+{
+ lcp_itf_pair_t *lip;
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ {
+ return WALK_CONTINUE;
+ }
+
+ lcp_itf_pair_sync_state (lip);
+ return WALK_CONTINUE;
+}
+
+void
+lcp_itf_pair_sync_state_all ()
+{
+ lcp_itf_pair_walk (lcp_itf_pair_walk_sync_state_all_cb, 0);
+}
+
+void
+lcp_itf_pair_sync_state_hw (vnet_hw_interface_t *hi)
+{
+ if (!hi)
+ return;
+ LCP_ITF_PAIR_DBG ("sync_state_hw: hi %U", format_vnet_sw_if_index_name,
+ vnet_get_main (), hi->hw_if_index);
+
+ vnet_hw_interface_walk_sw (vnet_get_main (), hi->hw_if_index,
+ lcp_itf_pair_walk_sync_state_hw_cb, NULL);
+}
+
+static clib_error_t *
+lcp_itf_admin_state_change (vnet_main_t *vnm, u32 sw_if_index, u32 flags)
+{
+ lcp_itf_pair_t *lip;
+ vnet_hw_interface_t *hi;
+ vnet_sw_interface_t *si;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return 0;
+
+ LCP_ITF_PAIR_DBG ("admin_state_change: sw %U %u",
+ format_vnet_sw_if_index_name, vnm, sw_if_index, flags);
+
+ // Sync interface state changes into host
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ return NULL;
+ LCP_ITF_PAIR_INFO ("admin_state_change: %U flags %u", format_lcp_itf_pair,
+ lip, flags);
+
+ if (vnet_sw_interface_is_sub (vnm, sw_if_index))
+ {
+ lcp_itf_pair_sync_state (lip);
+ return NULL;
+ }
+
+ // When Linux changes link on a parent interface, all of its children also
+ // change. If a parent interface changes MTU, all of its children are clamped
+ // at that MTU by Linux. Neither holds true in VPP, so we are forced to undo
+ // change by walking the sub-interfaces of a phy and syncing their state back
+ // into Linux.
+ si = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (!si)
+ return NULL;
+
+ hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index);
+ if (!hi)
+ return NULL;
+ LCP_ITF_PAIR_DBG ("admin_state_change: si %U hi %U, syncing children",
+ format_vnet_sw_if_index_name, vnm, si->sw_if_index,
+ format_vnet_sw_if_index_name, vnm, hi->sw_if_index);
+
+ lcp_itf_pair_sync_state_hw (hi);
+
+ return NULL;
+}
+
+VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (lcp_itf_admin_state_change);
+
+static clib_error_t *
+lcp_itf_mtu_change (vnet_main_t *vnm, u32 sw_if_index, u32 flags)
+{
+ vnet_sw_interface_t *si;
+ vnet_hw_interface_t *hi;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return NULL;
+
+ LCP_ITF_PAIR_DBG ("mtu_change: sw %U %u", format_vnet_sw_if_index_name, vnm,
+ sw_if_index, flags);
+
+ if (vnet_sw_interface_is_sub (vnm, sw_if_index))
+ {
+ lcp_itf_pair_t *lip;
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (lip)
+ lcp_itf_pair_sync_state (lip);
+ return NULL;
+ }
+
+ // When Linux changes link on a parent interface, all of its children also
+ // change. If a parent interface changes MTU, all of its children are clamped
+ // at that MTU by Linux. Neither holds true in VPP, so we are forced to undo
+ // change by walking the sub-interfaces of a phy and syncing their state back
+ // into Linux.
+ si = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (!si)
+ return NULL;
+
+ hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index);
+ if (!hi)
+ return NULL;
+ LCP_ITF_PAIR_DBG ("mtu_change: si %U hi %U, syncing children",
+ format_vnet_sw_if_index_name, vnm, si->sw_if_index,
+ format_vnet_sw_if_index_name, vnm, hi->sw_if_index);
+
+ lcp_itf_pair_sync_state_hw (hi);
+
+ return NULL;
+}
+
+VNET_SW_INTERFACE_MTU_CHANGE_FUNCTION (lcp_itf_mtu_change);
+
+static void
+lcp_itf_ip4_add_del_interface_addr (ip4_main_t *im, uword opaque,
+ u32 sw_if_index, ip4_address_t *address,
+ u32 address_length, u32 if_address_index,
+ u32 is_del)
+{
+ const lcp_itf_pair_t *lip;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return;
+
+ LCP_ITF_PAIR_DBG ("ip4_addr_%s: si:%U %U/%u", is_del ? "del" : "add",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index, format_ip4_address, address, address_length);
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ LCP_ITF_PAIR_DBG ("ip4_addr_%s: %U ip4 %U/%u", is_del ? "del" : "add",
+ format_lcp_itf_pair, lip, format_ip4_address, address,
+ address_length);
+
+ if (is_del)
+ vnet_netlink_del_ip4_addr (lip->lip_vif_index, address, address_length);
+ else
+ vnet_netlink_add_ip4_addr (lip->lip_vif_index, address, address_length);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+ return;
+}
+
+static void
+lcp_itf_ip6_add_del_interface_addr (ip6_main_t *im, uword opaque,
+ u32 sw_if_index, ip6_address_t *address,
+ u32 address_length, u32 if_address_index,
+ u32 is_del)
+{
+ const lcp_itf_pair_t *lip;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+
+ if (!lcp_sync () || lcp_get_netlink_processing_active ())
+ return;
+
+ LCP_ITF_PAIR_DBG ("ip6_addr_%s: si:%U %U/%u", is_del ? "del" : "add",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index, format_ip6_address, address, address_length);
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+ LCP_ITF_PAIR_DBG ("ip6_addr_%s: %U ip4 %U/%u", is_del ? "del" : "add",
+ format_lcp_itf_pair, lip, format_ip6_address, address,
+ address_length);
+ if (is_del)
+ vnet_netlink_del_ip6_addr (lip->lip_vif_index, address, address_length);
+ else
+ vnet_netlink_add_ip6_addr (lip->lip_vif_index, address, address_length);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+}
+
+static clib_error_t *
+lcp_itf_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_create)
+{
+ const vnet_sw_interface_t *sw;
+ uword is_sub;
+
+ if (!lcp_auto_subint ())
+ return NULL;
+
+ sw = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (!sw)
+ return NULL;
+
+ is_sub = vnet_sw_interface_is_sub (vnm, sw_if_index);
+ if (!is_sub)
+ return NULL;
+
+ LCP_ITF_PAIR_DBG ("interface_%s: sw %U parent %U", is_create ? "add" : "del",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw->sw_if_index, format_vnet_sw_if_index_name,
+ vnet_get_main (), sw->sup_sw_if_index);
+
+ if (is_create)
+ {
+ const lcp_itf_pair_t *sup_lip;
+ u8 *name = 0;
+
+ // If the parent has a LIP auto-create a LIP for this interface
+ sup_lip =
+ lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw->sup_sw_if_index));
+ if (!sup_lip)
+ return NULL;
+
+ name = format (name, "%s.%d%c", sup_lip->lip_host_name, sw->sub.id, 0);
+
+ LCP_ITF_PAIR_INFO (
+ "interface_%s: %U has parent %U, auto-creating LCP with host-if %s",
+ is_create ? "add" : "del", format_vnet_sw_if_index_name,
+ vnet_get_main (), sw->sw_if_index, format_lcp_itf_pair, sup_lip, name);
+
+ lcp_itf_pair_create (sw->sw_if_index, name, LCP_ITF_HOST_TAP,
+ sup_lip->lip_namespace, NULL);
+
+ vec_free (name);
+ }
+ else
+ {
+ lcp_itf_pair_delete (sw_if_index);
+ }
+
+ return NULL;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (lcp_itf_interface_add_del);
+
+static clib_error_t *
+lcp_itf_sync_init (vlib_main_t *vm)
+{
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+
+ ip4_add_del_interface_address_callback_t cb4;
+ ip6_add_del_interface_address_callback_t cb6;
+
+ cb4.function = lcp_itf_ip4_add_del_interface_addr;
+ cb4.function_opaque = 0;
+ vec_add1 (im4->add_del_interface_address_callbacks, cb4);
+
+ cb6.function = lcp_itf_ip6_add_del_interface_addr;
+ cb6.function_opaque = 0;
+ vec_add1 (im6->add_del_interface_address_callbacks, cb6);
+
+ return NULL;
+}
+
+VLIB_INIT_FUNCTION (lcp_itf_sync_init) = {
+ .runs_after = VLIB_INITS ("vnet_interface_init", "tcp_init", "udp_init"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_mpls_sync.c b/src/plugins/linux-cp/lcp_mpls_sync.c
new file mode 100644
index 00000000000..c08fcb4d1d9
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_mpls_sync.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+
+#include <linux-cp/lcp_interface.h>
+
+#include <vnet/plugin/plugin.h>
+#include <vnet/mpls/mpls.h>
+#include <vppinfra/linux/netns.h>
+
+#include <fcntl.h>
+
+vlib_log_class_t lcp_mpls_sync_logger;
+
+#define LCP_MPLS_SYNC_DBG(...) \
+ vlib_log_debug (lcp_mpls_sync_logger, __VA_ARGS__);
+
+void
+lcp_mpls_sync_pair_add_cb (lcp_itf_pair_t *lip)
+{
+ u8 phy_is_enabled = mpls_sw_interface_is_enabled (lip->lip_phy_sw_if_index);
+ LCP_MPLS_SYNC_DBG ("pair_add_cb: mpls enabled %u, parent %U", phy_is_enabled,
+ format_lcp_itf_pair, lip);
+ if (phy_is_enabled)
+ mpls_sw_interface_enable_disable (&mpls_main, lip->lip_host_sw_if_index,
+ 1);
+}
+
+void
+lcp_mpls_sync_state_cb (struct mpls_main_t *mm, uword opaque, u32 sw_if_index,
+ u32 is_enable)
+{
+ lcp_itf_pair_t *lip;
+ index_t lipi;
+ int curr_ns_fd = -1;
+ int vif_ns_fd = -1;
+ int ctl_fd = -1;
+ u8 *ctl_path = NULL;
+
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: called for sw_if_index %u", sw_if_index);
+
+ // If device is LCP PHY, sync state to host tap.
+ lipi = lcp_itf_pair_find_by_phy (sw_if_index);
+ if (INDEX_INVALID != lipi)
+ {
+ lip = lcp_itf_pair_get (lipi);
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: mpls enabled %u parent %U", is_enable,
+ format_lcp_itf_pair, lip);
+ mpls_sw_interface_enable_disable (&mpls_main, lip->lip_host_sw_if_index,
+ is_enable);
+ return;
+ }
+
+ // If device is LCP host, toggle MPLS XC feature.
+ lipi = lcp_itf_pair_find_by_host (sw_if_index);
+ if (INDEX_INVALID == lipi)
+ return;
+ lip = lcp_itf_pair_get (lipi);
+
+ vnet_feature_enable_disable ("mpls-input", "linux-cp-xc-mpls", sw_if_index,
+ is_enable, NULL, 0);
+
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: mpls xc state %u parent %U", is_enable,
+ format_lcp_itf_pair, lip);
+
+ // If syncing is enabled, sync Linux state as well.
+ // This can happen regardless of lcp_get_netlink_processing_active(),
+ // provided it does not generate Netlink messages.
+ if (!lcp_sync ())
+ return;
+
+ if (lip->lip_namespace)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ vif_ns_fd = clib_netns_open (lip->lip_namespace);
+ if (vif_ns_fd != -1)
+ clib_setns (vif_ns_fd);
+ }
+
+ ctl_path = format (NULL, "/proc/sys/net/mpls/conf/%s/input%c",
+ lip->lip_host_name, NULL);
+ if (NULL == ctl_path)
+ {
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: failed to format sysctl");
+ goto SYNC_CLEANUP;
+ }
+
+ ctl_fd = open ((char *) ctl_path, O_WRONLY);
+ if (ctl_fd < 0)
+ {
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: failed to open %s for writing",
+ ctl_path);
+ goto SYNC_CLEANUP;
+ }
+
+ if (fdformat (ctl_fd, "%u", is_enable) < 1)
+ {
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: failed to write to %s", ctl_path);
+ goto SYNC_CLEANUP;
+ }
+
+ LCP_MPLS_SYNC_DBG ("sync_state_cb: set mpls input for %s",
+ lip->lip_host_name);
+
+SYNC_CLEANUP:
+ if (ctl_fd > -1)
+ close (ctl_fd);
+
+ if (NULL != ctl_path)
+ vec_free (ctl_path);
+
+ if (vif_ns_fd != -1)
+ close (vif_ns_fd);
+
+ if (curr_ns_fd != -1)
+ {
+ clib_setns (curr_ns_fd);
+ close (curr_ns_fd);
+ }
+}
+
+static clib_error_t *
+lcp_mpls_sync_init (vlib_main_t *vm)
+{
+ lcp_itf_pair_vft_t mpls_sync_itf_pair_vft = {
+ .pair_add_fn = lcp_mpls_sync_pair_add_cb,
+ };
+ lcp_itf_pair_register_vft (&mpls_sync_itf_pair_vft);
+
+ mpls_interface_state_change_add_callback (lcp_mpls_sync_state_cb, 0);
+
+ lcp_mpls_sync_logger = vlib_log_register_class ("linux-cp", "mpls-sync");
+
+ return NULL;
+}
+
+VLIB_INIT_FUNCTION (lcp_mpls_sync_init) = {
+ .runs_after = VLIB_INITS ("lcp_interface_init", "mpls_init"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_nl.c b/src/plugins/linux-cp/lcp_nl.c
new file mode 100644
index 00000000000..85b6447007a
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_nl.c
@@ -0,0 +1,1043 @@
+/*
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <fcntl.h>
+
+#include <linux-cp/lcp_nl.h>
+
+#include <netlink/route/rule.h>
+#include <netlink/msg.h>
+#include <netlink/netlink.h>
+#include <netlink/socket.h>
+#include <netlink/route/link.h>
+#include <netlink/route/route.h>
+#include <netlink/route/neighbour.h>
+#include <netlink/route/addr.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vppinfra/error.h>
+#include <vppinfra/linux/netns.h>
+
+#include <vnet/fib/fib_table.h>
+
+#include <libmnl/libmnl.h>
+
+#include <plugins/linux-cp/lcp_interface.h>
+
+typedef enum nl_status_t_
+{
+ NL_STATUS_NOTIF_PROC,
+ NL_STATUS_SYNC,
+} nl_status_t;
+
+typedef enum nl_sock_type_t_
+{
+ NL_SOCK_TYPE_LINK,
+ NL_SOCK_TYPE_ADDR,
+ NL_SOCK_TYPE_NEIGH,
+ NL_SOCK_TYPE_ROUTE,
+} nl_sock_type_t;
+
+#define NL_SOCK_TYPES_N (NL_SOCK_TYPE_ROUTE + 1)
+
+/* Socket type, message type, type name, function subname */
+#define foreach_sock_type \
+ _ (NL_SOCK_TYPE_LINK, RTM_GETLINK, "link", link) \
+ _ (NL_SOCK_TYPE_ADDR, RTM_GETADDR, "address", link_addr) \
+ _ (NL_SOCK_TYPE_NEIGH, RTM_GETNEIGH, "neighbor", neigh) \
+ _ (NL_SOCK_TYPE_ROUTE, RTM_GETROUTE, "route", route)
+
+typedef enum nl_event_type_t_
+{
+ NL_EVENT_READ,
+ NL_EVENT_ERR,
+} nl_event_type_t;
+
+typedef struct nl_main
+{
+
+ nl_status_t nl_status;
+
+ struct nl_sock *sk_route;
+ struct nl_sock *sk_route_sync[NL_SOCK_TYPES_N];
+ vlib_log_class_t nl_logger;
+ nl_vft_t *nl_vfts;
+ struct nl_cache *nl_caches[LCP_NL_N_OBJS];
+ nl_msg_info_t *nl_msg_queue;
+ uword clib_file_index;
+
+ u32 rx_buf_size;
+ u32 tx_buf_size;
+ u32 batch_size;
+ u32 batch_delay_ms;
+
+ u32 sync_batch_limit;
+ u32 sync_batch_delay_ms;
+ u32 sync_attempt_delay_ms;
+
+} nl_main_t;
+
+#define NL_RX_BUF_SIZE_DEF (1 << 27) /* 128 MB */
+#define NL_TX_BUF_SIZE_DEF (1 << 18) /* 256 kB */
+#define NL_BATCH_SIZE_DEF (1 << 11) /* 2048 */
+#define NL_BATCH_DELAY_MS_DEF 50 /* 50 ms, max 20 batch/s */
+
+#define NL_SYNC_BATCH_LIMIT_DEF (1 << 10) /* 1024 */
+#define NL_SYNC_BATCH_DELAY_MS_DEF 20 /* 20ms, max 50 batch/s */
+#define NL_SYNC_ATTEMPT_DELAY_MS_DEF 2000 /* 2s */
+
+static nl_main_t nl_main = {
+ .rx_buf_size = NL_RX_BUF_SIZE_DEF,
+ .tx_buf_size = NL_TX_BUF_SIZE_DEF,
+ .batch_size = NL_BATCH_SIZE_DEF,
+ .batch_delay_ms = NL_BATCH_DELAY_MS_DEF,
+ .sync_batch_limit = NL_SYNC_BATCH_LIMIT_DEF,
+ .sync_batch_delay_ms = NL_SYNC_BATCH_DELAY_MS_DEF,
+ .sync_attempt_delay_ms = NL_SYNC_ATTEMPT_DELAY_MS_DEF,
+};
+
+/* #define foreach_nl_nft_proto \ */
+/* _(IP4, "ip", AF_INT) \ */
+/* _(IP6, "ip6", NFPROTO_IPV6) */
+
+/* typedef enum nl_nft_proto_t_ */
+/* { */
+/* #define _(a,b,c) NL_NFT_PROTO_##a = c, */
+/* foreach_nl_nft_proto */
+/* #undef _ */
+/* } nl_nft_proto_t; */
+
+#define FOREACH_VFT(__func, __arg) \
+ { \
+ nl_main_t *nm = &nl_main; \
+ nl_vft_t *__nv; \
+ vec_foreach (__nv, nm->nl_vfts) \
+ { \
+ if (!__nv->__func.cb) \
+ continue; \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_sync (vlib_get_main ()); \
+ \
+ __nv->__func.cb (__arg); \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_release (vlib_get_main ()); \
+ } \
+ }
+
+#define FOREACH_VFT_NO_ARG(__func) \
+ { \
+ nl_main_t *nm = &nl_main; \
+ nl_vft_t *__nv; \
+ vec_foreach (__nv, nm->nl_vfts) \
+ { \
+ if (!__nv->__func.cb) \
+ continue; \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_sync (vlib_get_main ()); \
+ \
+ __nv->__func.cb (); \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_release (vlib_get_main ()); \
+ } \
+ }
+
+#define FOREACH_VFT_CTX(__func, __arg, __ctx) \
+ { \
+ nl_main_t *nm = &nl_main; \
+ nl_vft_t *__nv; \
+ vec_foreach (__nv, nm->nl_vfts) \
+ { \
+ if (!__nv->__func.cb) \
+ continue; \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_sync (vlib_get_main ()); \
+ \
+ __nv->__func.cb (__arg, __ctx); \
+ \
+ if (!__nv->__func.is_mp_safe) \
+ vlib_worker_thread_barrier_release (vlib_get_main ()); \
+ } \
+ }
+
+void
+nl_register_vft (const nl_vft_t *nv)
+{
+ nl_main_t *nm = &nl_main;
+
+ vec_add1 (nm->nl_vfts, *nv);
+}
+
+#define NL_DBG(...) vlib_log_debug (nl_main.nl_logger, __VA_ARGS__);
+#define NL_INFO(...) vlib_log_notice (nl_main.nl_logger, __VA_ARGS__);
+#define NL_ERROR(...) vlib_log_err (nl_main.nl_logger, __VA_ARGS__);
+
+static void lcp_nl_open_socket (void);
+static void lcp_nl_close_socket (void);
+static void lcp_nl_open_sync_socket (nl_sock_type_t sock_type);
+static void lcp_nl_close_sync_socket (nl_sock_type_t sock_type);
+
+static void
+nl_route_del (struct rtnl_route *rr, void *arg)
+{
+ FOREACH_VFT (nvl_rt_route_del, rr);
+}
+
+static void
+nl_route_add (struct rtnl_route *rr, void *arg)
+{
+ int is_replace = 0;
+
+ if (arg)
+ {
+ nl_msg_info_t *msg_info = (nl_msg_info_t *) arg;
+ struct nlmsghdr *nlh = nlmsg_hdr (msg_info->msg);
+
+ is_replace = (nlh->nlmsg_flags & NLM_F_REPLACE);
+ }
+
+ FOREACH_VFT_CTX (nvl_rt_route_add, rr, is_replace);
+}
+
+static void
+nl_route_sync_begin (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_route_sync_begin);
+}
+
+static void
+nl_route_sync_end (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_route_sync_end);
+}
+
+static void
+nl_neigh_del (struct rtnl_neigh *rn, void *arg)
+{
+ FOREACH_VFT (nvl_rt_neigh_del, rn);
+}
+
+static void
+nl_neigh_add (struct rtnl_neigh *rn, void *arg)
+{
+ FOREACH_VFT (nvl_rt_neigh_add, rn);
+}
+
+static void
+nl_neigh_sync_begin (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_neigh_sync_begin);
+}
+
+static void
+nl_neigh_sync_end (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_neigh_sync_end);
+}
+
+static void
+nl_link_addr_del (struct rtnl_addr *rla, void *arg)
+{
+ FOREACH_VFT (nvl_rt_addr_del, rla);
+}
+
+static void
+nl_link_addr_add (struct rtnl_addr *rla, void *arg)
+{
+ FOREACH_VFT (nvl_rt_addr_add, rla);
+}
+
+static void
+nl_link_addr_sync_begin (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_addr_sync_begin);
+}
+
+static void
+nl_link_addr_sync_end (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_addr_sync_end);
+}
+
+static void
+nl_link_del (struct rtnl_link *rl, void *arg)
+{
+ FOREACH_VFT_CTX (nvl_rt_link_del, rl, arg);
+}
+
+static void
+nl_link_add (struct rtnl_link *rl, void *arg)
+{
+ FOREACH_VFT_CTX (nvl_rt_link_add, rl, arg);
+}
+
+static void
+nl_link_sync_begin (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_link_sync_begin);
+}
+
+static void
+nl_link_sync_end (void)
+{
+ FOREACH_VFT_NO_ARG (nvl_rt_link_sync_end);
+}
+
+static void
+nl_route_dispatch (struct nl_object *obj, void *arg)
+{
+ /* nothing can be done without interface mappings */
+ if (!lcp_itf_num_pairs ())
+ return;
+
+ switch (nl_object_get_msgtype (obj))
+ {
+ case RTM_NEWROUTE:
+ nl_route_add ((struct rtnl_route *) obj, arg);
+ break;
+ case RTM_DELROUTE:
+ nl_route_del ((struct rtnl_route *) obj, arg);
+ break;
+ case RTM_NEWNEIGH:
+ nl_neigh_add ((struct rtnl_neigh *) obj, arg);
+ break;
+ case RTM_DELNEIGH:
+ nl_neigh_del ((struct rtnl_neigh *) obj, arg);
+ break;
+ case RTM_NEWADDR:
+ nl_link_addr_add ((struct rtnl_addr *) obj, arg);
+ break;
+ case RTM_DELADDR:
+ nl_link_addr_del ((struct rtnl_addr *) obj, arg);
+ break;
+ case RTM_NEWLINK:
+ nl_link_add ((struct rtnl_link *) obj, arg);
+ break;
+ case RTM_DELLINK:
+ nl_link_del ((struct rtnl_link *) obj, arg);
+ break;
+ default:
+ NL_INFO ("unhandled: %s", nl_object_get_type (obj));
+ break;
+ }
+}
+
+static int
+nl_route_process_msgs (void)
+{
+ nl_main_t *nm = &nl_main;
+ nl_msg_info_t *msg_info;
+ int err, n_msgs = 0;
+
+ lcp_set_netlink_processing_active (1);
+
+ /* process a batch of messages. break if we hit our limit */
+ vec_foreach (msg_info, nm->nl_msg_queue)
+ {
+ if ((err = nl_msg_parse (msg_info->msg, nl_route_dispatch, msg_info)) <
+ 0)
+ NL_ERROR ("Unable to parse object: %s", nl_geterror (err));
+ nlmsg_free (msg_info->msg);
+ if (++n_msgs >= nm->batch_size)
+ break;
+ }
+
+ /* remove the messages we processed from the head of the queue */
+ if (n_msgs)
+ vec_delete (nm->nl_msg_queue, n_msgs, 0);
+
+ NL_DBG ("Processed %u messages", n_msgs);
+
+ lcp_set_netlink_processing_active (0);
+
+ return n_msgs;
+}
+
+static int
+lcp_nl_route_discard_msgs (void)
+{
+ nl_main_t *nm = &nl_main;
+ nl_msg_info_t *msg_info;
+ int n_msgs;
+
+ n_msgs = vec_len (nm->nl_msg_queue);
+ if (n_msgs == 0)
+ return 0;
+
+ vec_foreach (msg_info, nm->nl_msg_queue)
+ {
+ nlmsg_free (msg_info->msg);
+ }
+
+ vec_reset_length (nm->nl_msg_queue);
+
+ NL_INFO ("Discarded %u messages", n_msgs);
+
+ return n_msgs;
+}
+
+static int
+lcp_nl_route_send_dump_req (nl_sock_type_t sock_type, int msg_type)
+{
+ nl_main_t *nm = &nl_main;
+ struct nl_sock *sk_route = nm->sk_route_sync[sock_type];
+ int err;
+ struct rtgenmsg rt_hdr = {
+ .rtgen_family = AF_UNSPEC,
+ };
+
+ err =
+ nl_send_simple (sk_route, msg_type, NLM_F_DUMP, &rt_hdr, sizeof (rt_hdr));
+
+ if (err < 0)
+ {
+ NL_ERROR ("Unable to send a dump request: %s", nl_geterror (err));
+ }
+ else
+ NL_INFO ("Dump request sent via socket %d of type %d",
+ nl_socket_get_fd (sk_route), sock_type);
+
+ return err;
+}
+
+static int
+lcp_nl_route_dump_cb (struct nl_msg *msg, void *arg)
+{
+ int err;
+
+ if ((err = nl_msg_parse (msg, nl_route_dispatch, NULL)) < 0)
+ NL_ERROR ("Unable to parse object: %s", nl_geterror (err));
+
+ return NL_OK;
+}
+
+static int
+lcp_nl_recv_dump_replies (nl_sock_type_t sock_type, int msg_limit,
+ int *is_done_rcvd)
+{
+ nl_main_t *nm = &nl_main;
+ struct nl_sock *sk_route = nm->sk_route_sync[sock_type];
+ struct sockaddr_nl nla;
+ uint8_t *buf = NULL;
+ int n_bytes;
+ struct nlmsghdr *hdr;
+ struct nl_msg *msg = NULL;
+ int err = 0;
+ int done = 0;
+ int n_msgs = 0;
+
+ lcp_set_netlink_processing_active (1);
+
+continue_reading:
+ n_bytes = nl_recv (sk_route, &nla, &buf, /* creds */ NULL);
+ if (n_bytes <= 0)
+ {
+ lcp_set_netlink_processing_active (0);
+ return n_bytes;
+ }
+
+ hdr = (struct nlmsghdr *) buf;
+ while (nlmsg_ok (hdr, n_bytes))
+ {
+ nlmsg_free (msg);
+ msg = nlmsg_convert (hdr);
+ if (!msg)
+ {
+ err = -NLE_NOMEM;
+ goto out;
+ }
+
+ n_msgs++;
+
+ nlmsg_set_proto (msg, NETLINK_ROUTE);
+ nlmsg_set_src (msg, &nla);
+
+ /* Message that terminates a multipart message. Finish parsing and signal
+ * the caller that all dump replies have been received
+ */
+ if (hdr->nlmsg_type == NLMSG_DONE)
+ {
+ done = 1;
+ goto out;
+ }
+ /* Message to be ignored. Continue parsing */
+ else if (hdr->nlmsg_type == NLMSG_NOOP)
+ ;
+ /* Message that indicates data was lost. Finish parsing and return an
+ * error
+ */
+ else if (hdr->nlmsg_type == NLMSG_OVERRUN)
+ {
+ err = -NLE_MSG_OVERFLOW;
+ goto out;
+ }
+ /* Message that indicates an error. Finish parsing, extract the error
+ * code, and return it */
+ else if (hdr->nlmsg_type == NLMSG_ERROR)
+ {
+ struct nlmsgerr *e = nlmsg_data (hdr);
+
+ if (hdr->nlmsg_len < nlmsg_size (sizeof (*e)))
+ {
+ err = -NLE_MSG_TRUNC;
+ goto out;
+ }
+ else if (e->error)
+ {
+ err = -nl_syserr2nlerr (e->error);
+ goto out;
+ }
+ /* Message is an acknowledgement (err_code = 0). Continue parsing */
+ else
+ ;
+ }
+ /* Message that contains the requested data. Pass it for processing and
+ * continue parsing
+ */
+ else
+ {
+ lcp_nl_route_dump_cb (msg, NULL);
+ }
+
+ hdr = nlmsg_next (hdr, &n_bytes);
+ }
+
+ nlmsg_free (msg);
+ free (buf);
+ msg = NULL;
+ buf = NULL;
+
+ if (!done && n_msgs < msg_limit)
+ goto continue_reading;
+
+out:
+ lcp_set_netlink_processing_active (0);
+
+ nlmsg_free (msg);
+ free (buf);
+
+ if (err)
+ return err;
+
+ *is_done_rcvd = done;
+
+ return n_msgs;
+}
+
+#define DAY_F64 (1.0 * (24 * 60 * 60))
+
+static uword
+nl_route_process (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ nl_main_t *nm = &nl_main;
+ uword event_type;
+ uword *event_data = 0;
+ f64 wait_time = DAY_F64;
+ int n_msgs;
+ int is_done;
+
+ while (1)
+ {
+ if (nm->nl_status == NL_STATUS_NOTIF_PROC)
+ {
+ /* If we process a batch of messages and stop because we reached the
+ * batch size limit, we want to wake up after the batch delay and
+ * process more. Otherwise we just want to wait for a read event.
+ */
+ vlib_process_wait_for_event_or_clock (vm, wait_time);
+ event_type = vlib_process_get_events (vm, &event_data);
+ vec_reset_length (event_data);
+
+ switch (event_type)
+ {
+ /* Process batch of queued messages on timeout or read event
+ * signal
+ */
+ case ~0:
+ case NL_EVENT_READ:
+ nl_route_process_msgs ();
+ wait_time = (vec_len (nm->nl_msg_queue) != 0) ?
+ nm->batch_delay_ms * 1e-3 :
+ DAY_F64;
+ break;
+
+ /* Initiate synchronization if there was an error polling or
+ * reading the notification socket
+ */
+ case NL_EVENT_ERR:
+ nm->nl_status = NL_STATUS_SYNC;
+ break;
+
+ default:
+ NL_ERROR ("Unknown event type: %u", (u32) event_type);
+ }
+ }
+ else if (nm->nl_status == NL_STATUS_SYNC)
+ {
+ /* Stop processing notifications - close the notification socket and
+ * discard all messages that are currently in the queue
+ */
+ lcp_nl_close_socket ();
+ lcp_nl_route_discard_msgs ();
+
+ /* Wait some time before next synchronization attempt. Allows to
+ * reduce the number of failed attempts that stall the main thread by
+ * waiting out the notification storm
+ */
+ NL_INFO ("Wait before next synchronization attempt for %ums",
+ nm->sync_attempt_delay_ms);
+ vlib_process_suspend (vm, nm->sync_attempt_delay_ms * 1e-3);
+
+ /* Open netlink synchronization socket, one for every data type of
+ * interest: link, address, neighbor, and route. That is needed to
+ * be able to send dump requests for every data type simultaneously.
+ * If send a dump request while the previous one is in progress,
+ * the request will fail and EBUSY returned
+ */
+#define _(stype, mtype, tname, fn) lcp_nl_open_sync_socket (stype);
+ foreach_sock_type
+#undef _
+
+ /* Start reading notifications and enqueueing them for further
+ * processing. The notifications will serve as a difference between
+ * the snapshot made after the dump request and the actual state at
+ * the moment. Once all the dump replies are processed, the
+ * notifications will be processed
+ */
+ lcp_nl_open_socket ();
+
+ /* Request the current entry set from the kernel for every data type
+ * of interest. Thus requesting a snapshot of the current routing
+ * state that the kernel will make and then reply with
+ */
+#define _(stype, mtype, tname, fn) lcp_nl_route_send_dump_req (stype, mtype);
+ foreach_sock_type
+#undef _
+
+ /* Process all the dump replies */
+#define _(stype, mtype, tname, fn) \
+ nl_##fn##_sync_begin (); \
+ is_done = 0; \
+ do \
+ { \
+ n_msgs = \
+ lcp_nl_recv_dump_replies (stype, nm->sync_batch_limit, &is_done); \
+ if (n_msgs < 0) \
+ { \
+ NL_ERROR ("Error receiving dump replies of type " tname \
+ ": %s (%d)", \
+ nl_geterror (n_msgs), n_msgs); \
+ break; \
+ } \
+ else if (n_msgs == 0) \
+ { \
+ NL_ERROR ("EOF while receiving dump replies of type " tname); \
+ break; \
+ } \
+ else \
+ NL_INFO ("Processed %u dump replies of type " tname, n_msgs); \
+ \
+ /* Suspend the processing loop and wait until event signal is \
+ * received or timeout expires. During synchronization, only \
+ * error event is expected because read event is suppressed. \
+ * Allows not to stall the main thread and detect errors on the \
+ * notification socket that will make synchronization \
+ * incomplete \
+ */ \
+ vlib_process_wait_for_event_or_clock (vm, \
+ nm->sync_batch_delay_ms * 1e-3); \
+ event_type = vlib_process_get_events (vm, &event_data); \
+ vec_reset_length (event_data); \
+ \
+ /* If error event received, stop synchronization and repeat an \
+ * attempt later \
+ */ \
+ if (event_type == NL_EVENT_ERR) \
+ goto sync_later; \
+ } \
+ while (!is_done); \
+ nl_##fn##_sync_end ();
+
+ foreach_sock_type
+#undef _
+
+ /* Start processing notifications */
+ nm->nl_status = NL_STATUS_NOTIF_PROC;
+
+ /* Trigger messages processing if there are notifications received
+ * during synchronization
+ */
+ wait_time = (vec_len (nm->nl_msg_queue) != 0) ? 1e-3 : DAY_F64;
+
+ sync_later:
+ /* Close netlink synchronization sockets */
+#define _(stype, mtype, tname, fn) lcp_nl_close_sync_socket (stype);
+ foreach_sock_type
+#undef _
+ }
+ else
+ NL_ERROR ("Unknown status: %d", nm->nl_status);
+ }
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (nl_route_process_node, static) = {
+ .function = nl_route_process,
+ .name = "linux-cp-netlink-process",
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .process_log2_n_stack_bytes = 17,
+};
+
+static int
+nl_route_cb (struct nl_msg *msg, void *arg)
+{
+ nl_main_t *nm = &nl_main;
+ nl_msg_info_t *msg_info = 0;
+
+ /* delay processing - increment ref count and queue for later */
+ vec_add2 (nm->nl_msg_queue, msg_info, 1);
+
+ /* store a timestamp for the message */
+ msg_info->ts = vlib_time_now (vlib_get_main ());
+ msg_info->msg = msg;
+ nlmsg_get (msg);
+
+ return 0;
+}
+
+int
+lcp_nl_drain_messages (void)
+{
+ int err;
+ nl_main_t *nm = &nl_main;
+
+ /* Read until there's an error */
+ while ((err = nl_recvmsgs_default (nm->sk_route)) > -1)
+ ;
+
+ /* If there was an error other then EAGAIN, signal process node */
+ if (err != -NLE_AGAIN)
+ vlib_process_signal_event (vlib_get_main (), nl_route_process_node.index,
+ NL_EVENT_ERR, 0);
+ else
+ {
+ /* If netlink notification processing is active, signal process node
+ * there were notifications read
+ */
+ if (nm->nl_status == NL_STATUS_NOTIF_PROC)
+ vlib_process_signal_event (
+ vlib_get_main (), nl_route_process_node.index, NL_EVENT_READ, 0);
+ }
+
+ return err;
+}
+
+void
+lcp_nl_pair_add_cb (lcp_itf_pair_t *pair)
+{
+ lcp_nl_drain_messages ();
+}
+
+static clib_error_t *
+nl_route_read_cb (clib_file_t *f)
+{
+ int err;
+ err = lcp_nl_drain_messages ();
+ if (err < 0 && err != -NLE_AGAIN)
+ NL_ERROR ("Error reading netlink socket (fd %d): %s (%d)",
+ f->file_descriptor, nl_geterror (err), err);
+
+ return 0;
+}
+
+static clib_error_t *
+nl_route_error_cb (clib_file_t *f)
+{
+ NL_ERROR ("Error polling netlink socket (fd %d)", f->file_descriptor);
+
+ /* notify process node */
+ vlib_process_signal_event (vlib_get_main (), nl_route_process_node.index,
+ NL_EVENT_ERR, 0);
+
+ return clib_error_return (0, "Error polling netlink socket %d",
+ f->file_descriptor);
+}
+
+struct nl_cache *
+lcp_nl_get_cache (lcp_nl_obj_t t)
+{
+ nl_main_t *nm = &nl_main;
+
+ return nm->nl_caches[t];
+}
+
+/* Set the RX buffer size to be used on the netlink socket */
+void
+lcp_nl_set_buffer_size (u32 buf_size)
+{
+ nl_main_t *nm = &nl_main;
+
+ nm->rx_buf_size = buf_size;
+
+ if (nm->sk_route)
+ nl_socket_set_buffer_size (nm->sk_route, nm->rx_buf_size, nm->tx_buf_size);
+}
+
+/* Set the batch size - maximum netlink messages to process at one time */
+void
+lcp_nl_set_batch_size (u32 batch_size)
+{
+ nl_main_t *nm = &nl_main;
+
+ nm->batch_size = batch_size;
+}
+
+/* Set the batch delay - how long to wait in ms between processing batches */
+void
+lcp_nl_set_batch_delay (u32 batch_delay_ms)
+{
+ nl_main_t *nm = &nl_main;
+
+ nm->batch_delay_ms = batch_delay_ms;
+}
+
+static clib_error_t *
+lcp_itf_pair_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ u32 buf_size, batch_size, batch_delay_ms;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "nl-rx-buffer-size %u", &buf_size))
+ lcp_nl_set_buffer_size (buf_size);
+ else if (unformat (input, "nl-batch-size %u", &batch_size))
+ lcp_nl_set_batch_size (batch_size);
+ else if (unformat (input, "nl-batch-delay-ms %u", &batch_delay_ms))
+ lcp_nl_set_batch_delay (batch_delay_ms);
+ else
+ return clib_error_return (0, "invalid netlink option: %U",
+ format_unformat_error, input);
+ }
+
+ return NULL;
+}
+
+VLIB_CONFIG_FUNCTION (lcp_itf_pair_config, "linux-nl");
+
+static void
+lcp_nl_close_socket (void)
+{
+ nl_main_t *nm = &nl_main;
+
+ /* delete existing fd from epoll fd set */
+ if (nm->clib_file_index != ~0)
+ {
+ clib_file_main_t *fm = &file_main;
+ clib_file_t *f = clib_file_get (fm, nm->clib_file_index);
+
+ if (f)
+ {
+ NL_INFO ("Stopping poll of fd %u", f->file_descriptor);
+ fm->file_update (f, UNIX_FILE_UPDATE_DELETE);
+ }
+ else
+ /* stored index was not a valid file, reset stored index to ~0 */
+ nm->clib_file_index = ~0;
+ }
+
+ /* If we already created a socket, close/free it */
+ if (nm->sk_route)
+ {
+ NL_INFO ("Closing netlink socket %d", nl_socket_get_fd (nm->sk_route));
+ nl_socket_free (nm->sk_route);
+ nm->sk_route = NULL;
+ }
+}
+
+static void
+lcp_nl_open_socket (void)
+{
+ nl_main_t *nm = &nl_main;
+ int dest_ns_fd, curr_ns_fd;
+
+ /* Allocate a new socket for both routes and acls
+ * Notifications do not use sequence numbers, disable sequence number
+ * checking.
+ * Define a callback function, which will be called for each notification
+ * received
+ */
+ nm->sk_route = nl_socket_alloc ();
+ nl_socket_disable_seq_check (nm->sk_route);
+
+ dest_ns_fd = lcp_get_default_ns_fd ();
+ if (dest_ns_fd)
+ {
+ curr_ns_fd = open ("/proc/self/ns/net", O_RDONLY);
+ setns (dest_ns_fd, CLONE_NEWNET);
+ }
+
+ nl_connect (nm->sk_route, NETLINK_ROUTE);
+
+ if (dest_ns_fd && curr_ns_fd >= 0)
+ {
+ setns (curr_ns_fd, CLONE_NEWNET);
+ close (curr_ns_fd);
+ }
+
+ /* Subscribe to all the 'routing' notifications on the route socket */
+ nl_socket_add_memberships (nm->sk_route, RTNLGRP_LINK, RTNLGRP_IPV6_IFADDR,
+ RTNLGRP_IPV4_IFADDR, RTNLGRP_IPV4_ROUTE,
+ RTNLGRP_IPV6_ROUTE, RTNLGRP_NEIGH, RTNLGRP_NOTIFY,
+#ifdef RTNLGRP_MPLS_ROUTE /* not defined on CentOS/RHEL 7 */
+ RTNLGRP_MPLS_ROUTE,
+#endif
+ RTNLGRP_IPV4_RULE, RTNLGRP_IPV6_RULE, 0);
+
+ /* Set socket in nonblocking mode and increase buffer sizes */
+ nl_socket_set_nonblocking (nm->sk_route);
+ nl_socket_set_buffer_size (nm->sk_route, nm->rx_buf_size, nm->tx_buf_size);
+
+ if (nm->clib_file_index == ~0)
+ {
+ clib_file_t rt_file = {
+ .read_function = nl_route_read_cb,
+ .error_function = nl_route_error_cb,
+ .file_descriptor = nl_socket_get_fd (nm->sk_route),
+ .description = format (0, "linux-cp netlink route socket"),
+ };
+
+ nm->clib_file_index = clib_file_add (&file_main, &rt_file);
+ NL_INFO ("Added file %u", nm->clib_file_index);
+ }
+ else
+ /* clib file already created and socket was closed due to error */
+ {
+ clib_file_main_t *fm = &file_main;
+ clib_file_t *f = clib_file_get (fm, nm->clib_file_index);
+
+ f->file_descriptor = nl_socket_get_fd (nm->sk_route);
+ fm->file_update (f, UNIX_FILE_UPDATE_ADD);
+ NL_INFO ("Starting poll of %d", f->file_descriptor);
+ }
+
+ nl_socket_modify_cb (nm->sk_route, NL_CB_VALID, NL_CB_CUSTOM, nl_route_cb,
+ NULL);
+ NL_INFO ("Opened netlink socket %d", nl_socket_get_fd (nm->sk_route));
+}
+
+static void
+lcp_nl_open_sync_socket (nl_sock_type_t sock_type)
+{
+ nl_main_t *nm = &nl_main;
+ int dest_ns_fd, curr_ns_fd;
+ struct nl_sock *sk_route;
+
+ /* Allocate a new blocking socket for routes that will be used for dump
+ * requests. Buffer sizes are left default because replies to dump requests
+ * are flow-controlled and the kernel will not overflow the socket by sending
+ * these
+ */
+
+ nm->sk_route_sync[sock_type] = sk_route = nl_socket_alloc ();
+
+ dest_ns_fd = lcp_get_default_ns_fd ();
+ if (dest_ns_fd > 0)
+ {
+ curr_ns_fd = clib_netns_open (NULL /* self */);
+ if (clib_setns (dest_ns_fd) == -1)
+ NL_ERROR ("Cannot set destination ns");
+ }
+
+ nl_connect (sk_route, NETLINK_ROUTE);
+
+ if (dest_ns_fd > 0)
+ {
+ if (curr_ns_fd == -1)
+ {
+ NL_ERROR ("No previous ns to set");
+ }
+ else
+ {
+ if (clib_setns (curr_ns_fd) == -1)
+ NL_ERROR ("Cannot set previous ns");
+ close (curr_ns_fd);
+ }
+ }
+
+ NL_INFO ("Opened netlink synchronization socket %d of type %d",
+ nl_socket_get_fd (sk_route), sock_type);
+}
+
+static void
+lcp_nl_close_sync_socket (nl_sock_type_t sock_type)
+{
+ nl_main_t *nm = &nl_main;
+ struct nl_sock *sk_route = nm->sk_route_sync[sock_type];
+
+ if (sk_route)
+ {
+ NL_INFO ("Closing netlink synchronization socket %d of type %d",
+ nl_socket_get_fd (sk_route), sock_type);
+ nl_socket_free (sk_route);
+ nm->sk_route_sync[sock_type] = NULL;
+ }
+}
+
+#include <vnet/plugin/plugin.h>
+clib_error_t *
+lcp_nl_init (vlib_main_t *vm)
+{
+ nl_main_t *nm = &nl_main;
+ lcp_itf_pair_vft_t nl_itf_pair_vft = {
+ .pair_add_fn = lcp_nl_pair_add_cb,
+ };
+
+ nm->nl_status = NL_STATUS_NOTIF_PROC;
+ nm->clib_file_index = ~0;
+ nm->nl_logger = vlib_log_register_class ("nl", "nl");
+
+ lcp_nl_open_socket ();
+ lcp_itf_pair_register_vft (&nl_itf_pair_vft);
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (lcp_nl_init) = {
+ .runs_after = VLIB_INITS ("lcp_interface_init", "tuntap_init",
+ "ip_neighbor_init"),
+};
+
+#include <vpp/app/version.h>
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "linux Control Plane - Netlink listener",
+ .default_disabled = 1,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_nl.h b/src/plugins/linux-cp/lcp_nl.h
new file mode 100644
index 00000000000..41757e9b983
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_nl.h
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+
+#include <netlink/route/link.h>
+#include <netlink/route/route.h>
+#include <netlink/route/neighbour.h>
+#include <netlink/route/addr.h>
+
+typedef void (*nl_rt_link_cb_t) (struct rtnl_link *rl, void *ctx);
+typedef void (*nl_rt_link_sync_cb_t) (void);
+typedef void (*nl_rt_addr_cb_t) (struct rtnl_addr *ra);
+typedef void (*nl_rt_addr_sync_cb_t) (void);
+typedef void (*nl_rt_neigh_cb_t) (struct rtnl_neigh *rr);
+typedef void (*nl_rt_neigh_sync_cb_t) (void);
+typedef void (*nl_rt_route_add_cb_t) (struct rtnl_route *rn, int is_replace);
+typedef void (*nl_rt_route_del_cb_t) (struct rtnl_route *rn);
+typedef void (*nl_rt_route_sync_cb_t) (void);
+
+#define NL_RT_COMMON uword is_mp_safe
+
+typedef struct nl_rt_link_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_link_cb_t cb;
+} nl_rt_link_t;
+
+typedef struct nl_rt_link_sync_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_link_sync_cb_t cb;
+} nl_rt_link_sync_t;
+
+typedef struct nl_rt_addr_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_addr_cb_t cb;
+} nl_rt_addr_t;
+
+typedef struct nl_rt_addr_sync_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_addr_sync_cb_t cb;
+} nl_rt_addr_sync_t;
+
+typedef struct nl_rt_neigh_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_neigh_cb_t cb;
+} nl_rt_neigh_t;
+
+typedef struct nl_rt_neigh_sync_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_neigh_sync_cb_t cb;
+} nl_rt_neigh_sync_t;
+
+typedef struct nl_rt_route_add_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_route_add_cb_t cb;
+} nl_rt_route_add_t;
+
+typedef struct nl_rt_route_del_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_route_del_cb_t cb;
+} nl_rt_route_del_t;
+
+typedef struct nl_rt_route_sync_t_
+{
+ NL_RT_COMMON;
+
+ nl_rt_route_sync_cb_t cb;
+} nl_rt_route_sync_t;
+
+#undef NL_RT_COMMON
+
+typedef struct nl_vft_t_
+{
+ nl_rt_link_t nvl_rt_link_add;
+ nl_rt_link_t nvl_rt_link_del;
+ nl_rt_link_sync_t nvl_rt_link_sync_begin;
+ nl_rt_link_sync_t nvl_rt_link_sync_end;
+ nl_rt_addr_t nvl_rt_addr_add;
+ nl_rt_addr_t nvl_rt_addr_del;
+ nl_rt_addr_sync_t nvl_rt_addr_sync_begin;
+ nl_rt_addr_sync_t nvl_rt_addr_sync_end;
+ nl_rt_neigh_t nvl_rt_neigh_add;
+ nl_rt_neigh_t nvl_rt_neigh_del;
+ nl_rt_neigh_sync_t nvl_rt_neigh_sync_begin;
+ nl_rt_neigh_sync_t nvl_rt_neigh_sync_end;
+ nl_rt_route_add_t nvl_rt_route_add;
+ nl_rt_route_del_t nvl_rt_route_del;
+ nl_rt_route_sync_t nvl_rt_route_sync_begin;
+ nl_rt_route_sync_t nvl_rt_route_sync_end;
+} nl_vft_t;
+
+extern void nl_register_vft (const nl_vft_t *nv);
+
+typedef enum lcp_nl_obj_t_
+{
+ LCP_NL_LINK,
+ LCP_NL_ADDR,
+ LCP_NL_NEIGH,
+ LCP_NL_ROUTE,
+} lcp_nl_obj_t;
+
+/* struct type to hold context on the netlink message being processed.
+ *
+ * At creation of a pair, a tap/tun is created and configured to match its
+ * corresponding hardware interface (MAC address, link state, MTU). Netlink
+ * messages are sent announcing the creation and subsequent configuration.
+ * We do not need to (and should not) act on those messages since applying
+ * those same configurations again is unnecessary and can be disruptive. So
+ * a timestamp for a message is stored and can be compared against the time
+ * the interface came under linux-cp management in order to figure out
+ * whether we should apply any configuration.
+ */
+typedef struct nl_msg_info
+{
+ struct nl_msg *msg;
+ f64 ts;
+} nl_msg_info_t;
+
+#define LCP_NL_N_OBJS (LCP_NL_ROUTE + 1)
+
+extern struct nl_cache *lcp_nl_get_cache (lcp_nl_obj_t t);
+extern int lcp_nl_drain_messages (void);
+extern void lcp_nl_set_buffer_size (u32 buf_size);
+extern void lcp_nl_set_batch_size (u32 batch_size);
+extern void lcp_nl_set_batch_delay (u32 batch_delay_ms);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/linux-cp/lcp_node.c b/src/plugins/linux-cp/lcp_node.c
index b2ffc5fb63f..241cc5e4bff 100644
--- a/src/plugins/linux-cp/lcp_node.c
+++ b/src/plugins/linux-cp/lcp_node.c
@@ -31,6 +31,7 @@
#include <vnet/ip/ip4.h>
#include <vnet/ip/ip6.h>
#include <vnet/l2/l2_input.h>
+#include <vnet/mpls/mpls.h>
#define foreach_lip_punt \
_ (IO, "punt to host") \
@@ -438,14 +439,112 @@ VNET_FEATURE_INIT (lcp_xc_ip6_mcast_node, static) = {
typedef enum
{
+ LCP_XC_MPLS_NEXT_DROP,
+ LCP_XC_MPLS_NEXT_IO,
+ LCP_XC_MPLS_N_NEXT,
+} lcp_xc_mpls_next_t;
+
+static_always_inline uword
+lcp_xc_mpls_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 n_left_from, *from, *to_next, n_left_to_next;
+ lcp_xc_next_t next_index;
+
+ next_index = 0;
+ n_left_from = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ const ethernet_header_t *eth;
+ const lcp_itf_pair_t *lip;
+ u32 next0, bi0, lipi, ai;
+ vlib_buffer_t *b0;
+ // const ip_adjacency_t *adj;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ lipi =
+ lcp_itf_pair_find_by_host (vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+ lip = lcp_itf_pair_get (lipi);
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip->lip_phy_sw_if_index;
+ vlib_buffer_advance (b0, -lip->lip_rewrite_len);
+ eth = vlib_buffer_get_current (b0);
+
+ ai = ADJ_INDEX_INVALID;
+ next0 = LCP_XC_MPLS_NEXT_DROP;
+ if (!ethernet_address_cast (eth->dst_address))
+ ai = lcp_adj_lkup ((u8 *) eth, lip->lip_rewrite_len,
+ vnet_buffer (b0)->sw_if_index[VLIB_TX]);
+ if (ai != ADJ_INDEX_INVALID)
+ {
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai;
+ next0 = LCP_XC_MPLS_NEXT_IO;
+ }
+
+ if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ lcp_xc_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->phy_sw_if_index = lip->lip_phy_sw_if_index;
+ t->adj_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+ }
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (lcp_xc_mpls)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return (lcp_xc_mpls_inline (vm, node, frame));
+}
+
+VLIB_REGISTER_NODE (
+ lcp_xc_mpls) = { .name = "linux-cp-xc-mpls",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lcp_xc_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_next_nodes = LCP_XC_MPLS_N_NEXT,
+ .next_nodes = {
+ [LCP_XC_MPLS_NEXT_DROP] = "error-drop",
+ [LCP_XC_MPLS_NEXT_IO] = "interface-output",
+ } };
+
+VNET_FEATURE_INIT (lcp_xc_mpls_node, static) = {
+ .arc_name = "mpls-input",
+ .node_name = "linux-cp-xc-mpls",
+};
+
+typedef enum
+{
LCP_XC_L3_NEXT_XC,
+ LCP_XC_L3_NEXT_LOOKUP,
LCP_XC_L3_N_NEXT,
} lcp_xc_l3_next_t;
/**
* X-connect all packets from the HOST to the PHY on L3 interfaces
*
- * There's only one adjacency that can be used on thises links.
+ * There's only one adjacency that can be used on these links.
*/
static_always_inline u32
lcp_xc_l3_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
@@ -453,6 +552,7 @@ lcp_xc_l3_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
u32 n_left_from, *from, *to_next, n_left_to_next;
lcp_xc_next_t next_index;
+ vnet_main_t *vnm = vnet_get_main ();
next_index = 0;
n_left_from = frame->n_vectors;
@@ -488,10 +588,24 @@ lcp_xc_l3_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
lcp_itf_pair_find_by_host (vnet_buffer (b0)->sw_if_index[VLIB_RX]);
lip = lcp_itf_pair_get (lipi);
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = lip->lip_phy_sw_if_index;
- next0 = LCP_XC_L3_NEXT_XC;
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
- lip->lip_phy_adjs.adj_index[af];
+ /* P2P tunnels can use generic adjacency */
+ if (PREDICT_TRUE (
+ vnet_sw_interface_is_p2p (vnm, lip->lip_phy_sw_if_index)))
+ {
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ lip->lip_phy_sw_if_index;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+ lip->lip_phy_adjs.adj_index[af];
+ next0 = LCP_XC_L3_NEXT_XC;
+ }
+ /* P2MP tunnels require a fib lookup to find the right adjacency */
+ else
+ {
+ /* lookup should use FIB table associated with phy interface */
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] =
+ lip->lip_phy_sw_if_index;
+ next0 = LCP_XC_L3_NEXT_LOOKUP;
+ }
if (PREDICT_FALSE ((b0->flags & VLIB_BUFFER_IS_TRACED)))
{
@@ -534,6 +648,7 @@ VLIB_REGISTER_NODE (lcp_xc_l3_ip4_node) = {
.n_next_nodes = LCP_XC_L3_N_NEXT,
.next_nodes = {
[LCP_XC_L3_NEXT_XC] = "ip4-midchain",
+ [LCP_XC_L3_NEXT_LOOKUP] = "ip4-lookup",
},
};
@@ -556,6 +671,7 @@ VLIB_REGISTER_NODE (lcp_xc_l3_ip6_node) = {
.n_next_nodes = LCP_XC_L3_N_NEXT,
.next_nodes = {
[LCP_XC_L3_NEXT_XC] = "ip6-midchain",
+ [LCP_XC_L3_NEXT_LOOKUP] = "ip6-lookup",
},
};
@@ -671,10 +787,14 @@ VLIB_NODE_FN (lcp_arp_phy_node)
c0 = vlib_buffer_copy (vm, b0);
vlib_buffer_advance (b0, len0);
- /* Send to the host */
- vnet_buffer (c0)->sw_if_index[VLIB_TX] =
- lip0->lip_host_sw_if_index;
- reply_copies[n_copies++] = vlib_get_buffer_index (vm, c0);
+ if (c0)
+ {
+ /* Send to the host */
+ vnet_buffer (c0)->sw_if_index[VLIB_TX] =
+ lip0->lip_host_sw_if_index;
+ reply_copies[n_copies++] =
+ vlib_get_buffer_index (vm, c0);
+ }
}
}
if (arp1->opcode == clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_reply))
@@ -699,10 +819,14 @@ VLIB_NODE_FN (lcp_arp_phy_node)
c1 = vlib_buffer_copy (vm, b1);
vlib_buffer_advance (b1, len1);
- /* Send to the host */
- vnet_buffer (c1)->sw_if_index[VLIB_TX] =
- lip1->lip_host_sw_if_index;
- reply_copies[n_copies++] = vlib_get_buffer_index (vm, c1);
+ if (c1)
+ {
+ /* Send to the host */
+ vnet_buffer (c1)->sw_if_index[VLIB_TX] =
+ lip1->lip_host_sw_if_index;
+ reply_copies[n_copies++] =
+ vlib_get_buffer_index (vm, c1);
+ }
}
}
@@ -771,10 +895,14 @@ VLIB_NODE_FN (lcp_arp_phy_node)
c0 = vlib_buffer_copy (vm, b0);
vlib_buffer_advance (b0, len0);
- /* Send to the host */
- vnet_buffer (c0)->sw_if_index[VLIB_TX] =
- lip0->lip_host_sw_if_index;
- reply_copies[n_copies++] = vlib_get_buffer_index (vm, c0);
+ if (c0)
+ {
+ /* Send to the host */
+ vnet_buffer (c0)->sw_if_index[VLIB_TX] =
+ lip0->lip_host_sw_if_index;
+ reply_copies[n_copies++] =
+ vlib_get_buffer_index (vm, c0);
+ }
}
}
diff --git a/src/plugins/linux-cp/lcp_router.c b/src/plugins/linux-cp/lcp_router.c
new file mode 100644
index 00000000000..0efd53e64ef
--- /dev/null
+++ b/src/plugins/linux-cp/lcp_router.c
@@ -0,0 +1,1578 @@
+/*
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <sys/socket.h>
+#include <linux/if.h>
+#include <linux/mpls.h>
+
+//#include <vlib/vlib.h>
+#include <vlib/unix/plugin.h>
+#include <linux-cp/lcp_nl.h>
+#include <linux-cp/lcp_interface.h>
+
+#include <netlink/msg.h>
+#include <netlink/netlink.h>
+#include <netlink/socket.h>
+#include <netlink/route/link.h>
+#include <netlink/route/route.h>
+#include <netlink/route/neighbour.h>
+#include <netlink/route/nexthop.h>
+#include <netlink/route/addr.h>
+#include <netlink/route/link/vlan.h>
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/mfib/mfib_table.h>
+#include <vnet/ip/ip6_ll_table.h>
+#include <vnet/ip-neighbor/ip_neighbor.h>
+#include <vnet/ip/ip6_link.h>
+
+typedef struct lcp_router_table_t_
+{
+ uint32_t nlt_id;
+ fib_protocol_t nlt_proto;
+ u32 nlt_fib_index;
+ u32 nlt_mfib_index;
+ u32 nlt_refs;
+} lcp_router_table_t;
+
+static uword *lcp_router_table_db[FIB_PROTOCOL_MAX];
+static lcp_router_table_t *lcp_router_table_pool;
+static vlib_log_class_t lcp_router_logger;
+
+const static fib_prefix_t pfx_all1s = {
+ .fp_addr = {
+ .ip4 = {
+ .as_u32 = 0xffffffff,
+ }
+ },
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = 32,
+};
+
+static fib_source_t lcp_rt_fib_src;
+static fib_source_t lcp_rt_fib_src_dynamic;
+
+#define LCP_ROUTER_DBG(...) vlib_log_debug (lcp_router_logger, __VA_ARGS__);
+
+#define LCP_ROUTER_INFO(...) vlib_log_notice (lcp_router_logger, __VA_ARGS__);
+
+#define LCP_ROUTER_ERROR(...) vlib_log_err (lcp_router_logger, __VA_ARGS__);
+
+static const mfib_prefix_t ip4_specials[] = {
+ /* ALL prefixes are in network order */
+ {
+ /* (*,224.0.0.0)/24 - all local subnet */
+ .fp_grp_addr = {
+ .ip4.data_u32 = 0x000000e0,
+ },
+ .fp_len = 24,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ },
+};
+
+static const mfib_prefix_t ip6_specials[] = {
+ /* ALL prefixes are in network order */
+ {
+ /* (*,ff00::)/8 - all local subnet */
+ .fp_grp_addr = {
+ .ip6.as_u64[0] = 0x00000000000000ff,
+ },
+ .fp_len = 8,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ },
+};
+
+/* VIF to PHY DB of managed interfaces */
+static uword *lcp_routing_itf_db;
+
+static u32
+lcp_router_intf_h2p (u32 host)
+{
+ lcp_itf_pair_t *lip;
+ index_t lipi;
+ uword *p;
+
+ /*
+ * first check the linux side created interface (i.e. vlans, tunnels etc)
+ */
+ p = hash_get (lcp_routing_itf_db, host);
+
+ if (p)
+ return p[0];
+
+ /*
+ * then check the paired phys
+ */
+ lipi = lcp_itf_pair_find_by_vif (host);
+
+ if (INDEX_INVALID == lipi)
+ return (~0);
+
+ lip = lcp_itf_pair_get (lipi);
+
+ return lip->lip_phy_sw_if_index;
+}
+
+/*
+ * Check timestamps on netlink message and interface pair to decide whether
+ * the message should be applied. See the declaration of nl_msg_info_t for
+ * an explanation on why this is necessary.
+ * If timestamps are good (message ts is newer than intf pair ts), return 0.
+ * Else, return -1.
+ */
+static int
+lcp_router_lip_ts_check (nl_msg_info_t *msg_info, lcp_itf_pair_t *lip)
+{
+ if (!msg_info)
+ return 0;
+
+ if (msg_info->ts > lip->lip_create_ts)
+ return 0;
+
+ LCP_ROUTER_INFO ("Early message received for %U",
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ return -1;
+}
+
+static void
+lcp_router_link_del (struct rtnl_link *rl, void *ctx)
+{
+ index_t lipi;
+
+ if (!lcp_auto_subint ())
+ return;
+
+ lipi = lcp_itf_pair_find_by_vif (rtnl_link_get_ifindex (rl));
+
+ if (INDEX_INVALID != lipi)
+ {
+ lcp_itf_pair_t *lip;
+
+ lip = lcp_itf_pair_get (lipi);
+
+ if (lcp_router_lip_ts_check ((nl_msg_info_t *) ctx, lip))
+ return;
+
+ LCP_ROUTER_INFO ("delete link: %s - %U", rtnl_link_get_type (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ lcp_itf_pair_delete (lip->lip_phy_sw_if_index);
+
+ if (rtnl_link_is_vlan (rl))
+ {
+ LCP_ROUTER_INFO ("delete vlan: %s -> %U", rtnl_link_get_name (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ vnet_delete_sub_interface (lip->lip_phy_sw_if_index);
+ vnet_delete_sub_interface (lip->lip_host_sw_if_index);
+ }
+ }
+ else
+ LCP_ROUTER_INFO ("ignore link del: %s - %s", rtnl_link_get_type (rl),
+ rtnl_link_get_name (rl));
+}
+
+static void
+lcp_router_ip4_mroutes_add_del (u32 sw_if_index, u8 is_add)
+{
+ const fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT,
+ };
+ u32 mfib_index;
+ int ii;
+
+ mfib_index =
+ mfib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
+
+ for (ii = 0; ii < ARRAY_LEN (ip4_specials); ii++)
+ {
+ if (is_add)
+ {
+ mfib_table_entry_path_update (mfib_index, &ip4_specials[ii],
+ MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, &path);
+ }
+ else
+ {
+ mfib_table_entry_path_remove (mfib_index, &ip4_specials[ii],
+ MFIB_SOURCE_PLUGIN_LOW, &path);
+ }
+ }
+}
+
+static void
+lcp_router_ip6_mroutes_add_del (u32 sw_if_index, u8 is_add)
+{
+ const fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_IP6,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = sw_if_index,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT,
+ };
+ u32 mfib_index;
+ int ii;
+
+ mfib_index =
+ mfib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP6, sw_if_index);
+
+ for (ii = 0; ii < ARRAY_LEN (ip6_specials); ii++)
+ {
+ if (is_add)
+ {
+ mfib_table_entry_path_update (mfib_index, &ip6_specials[ii],
+ MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, &path);
+ }
+ else
+ {
+ mfib_table_entry_path_remove (mfib_index, &ip6_specials[ii],
+ MFIB_SOURCE_PLUGIN_LOW, &path);
+ }
+ }
+}
+
+static void
+lcp_router_link_mtu (struct rtnl_link *rl, u32 sw_if_index)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 mtu;
+ vnet_sw_interface_t *sw;
+ vnet_hw_interface_t *hw;
+
+ mtu = rtnl_link_get_mtu (rl);
+ if (!mtu)
+ return;
+
+ sw = vnet_get_sw_interface (vnm, sw_if_index);
+ hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+
+ /* If HW interface, try to change hw link */
+ if ((sw->sw_if_index == sw->sup_sw_if_index) &&
+ (hw->hw_class_index == ethernet_hw_interface_class.index))
+ vnet_hw_interface_set_mtu (vnm, hw->hw_if_index, mtu);
+ else
+ vnet_sw_interface_set_mtu (vnm, sw->sw_if_index, mtu);
+}
+
+static walk_rc_t
+lcp_router_link_addr_adj_upd_cb (vnet_main_t *vnm, u32 sw_if_index, void *arg)
+{
+ lcp_itf_pair_t *lip;
+
+ lip = lcp_itf_pair_get (lcp_itf_pair_find_by_phy (sw_if_index));
+ if (!lip)
+ {
+ return WALK_CONTINUE;
+ }
+
+ vnet_update_adjacency_for_sw_interface (vnm, lip->lip_phy_sw_if_index,
+ lip->lip_phy_adjs.adj_index[AF_IP4]);
+ vnet_update_adjacency_for_sw_interface (vnm, lip->lip_phy_sw_if_index,
+ lip->lip_phy_adjs.adj_index[AF_IP6]);
+
+ return WALK_CONTINUE;
+}
+
+static void
+lcp_router_link_addr (struct rtnl_link *rl, lcp_itf_pair_t *lip)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ struct nl_addr *mac_addr;
+ vnet_sw_interface_t *sw;
+ vnet_hw_interface_t *hw;
+ void *mac_addr_bytes;
+
+ mac_addr = rtnl_link_get_addr (rl);
+ if (!mac_addr || (nl_addr_get_family (mac_addr) != AF_LLC))
+ return;
+
+ sw = vnet_get_sw_interface (vnm, lip->lip_phy_sw_if_index);
+
+ /* can only change address on hw interface */
+ if (sw->sw_if_index != sw->sup_sw_if_index)
+ return;
+
+ hw = vnet_get_sup_hw_interface (vnm, lip->lip_phy_sw_if_index);
+ if (!vec_len (hw->hw_address))
+ return;
+
+ mac_addr_bytes = nl_addr_get_binary_addr (mac_addr);
+ if (clib_memcmp (mac_addr_bytes, hw->hw_address, nl_addr_get_len (mac_addr)))
+ vnet_hw_interface_change_mac_address (vnm, hw->hw_if_index,
+ mac_addr_bytes);
+
+ /* mcast adjacencies need to be updated */
+ vnet_hw_interface_walk_sw (vnm, hw->hw_if_index,
+ lcp_router_link_addr_adj_upd_cb, NULL);
+}
+
+static void lcp_router_table_flush (lcp_router_table_t *nlt,
+ u32 *sw_if_index_to_bool,
+ fib_source_t source);
+
+static void
+lcp_router_link_add (struct rtnl_link *rl, void *ctx)
+{
+ index_t lipi;
+ int up;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ lipi = lcp_itf_pair_find_by_vif (rtnl_link_get_ifindex (rl));
+ up = IFF_UP & rtnl_link_get_flags (rl);
+
+ if (INDEX_INVALID != lipi)
+ {
+ lcp_itf_pair_t *lip;
+ u32 sw_if_flags;
+ u32 sw_if_up;
+
+ lip = lcp_itf_pair_get (lipi);
+ if (!vnet_get_sw_interface (vnm, lip->lip_phy_sw_if_index))
+ return;
+
+ if (lcp_router_lip_ts_check ((nl_msg_info_t *) ctx, lip))
+ return;
+
+ sw_if_flags =
+ vnet_sw_interface_get_flags (vnm, lip->lip_phy_sw_if_index);
+ sw_if_up = (sw_if_flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+
+ if (!sw_if_up && up)
+ {
+ vnet_sw_interface_admin_up (vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ }
+ else if (sw_if_up && !up)
+ {
+ vnet_sw_interface_admin_down (vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+
+ /* When an interface is brought down administratively, the kernel
+ * removes routes which resolve through that interface. For IPv4
+ * routes, the kernel will not send any explicit RTM_DELROUTE
+ * messages about removing them. In order to synchronize with the
+ * kernel, affected IPv4 routes need to be manually removed from the
+ * FIB. The behavior is different for IPv6 routes. Explicit
+ * RTM_DELROUTE messages are sent about IPv6 routes being removed.
+ */
+ u32 fib_index;
+ lcp_router_table_t *nlt;
+
+ fib_index = fib_table_get_index_for_sw_if_index (
+ FIB_PROTOCOL_IP4, lip->lip_phy_sw_if_index);
+
+ pool_foreach (nlt, lcp_router_table_pool)
+ {
+ if (fib_index == nlt->nlt_fib_index &&
+ FIB_PROTOCOL_IP4 == nlt->nlt_proto)
+ {
+ u32 *sw_if_index_to_bool = NULL;
+
+ vec_validate_init_empty (sw_if_index_to_bool,
+ lip->lip_phy_sw_if_index, false);
+ sw_if_index_to_bool[lip->lip_phy_sw_if_index] = true;
+
+ lcp_router_table_flush (nlt, sw_if_index_to_bool,
+ lcp_rt_fib_src);
+ lcp_router_table_flush (nlt, sw_if_index_to_bool,
+ lcp_rt_fib_src_dynamic);
+
+ vec_free (sw_if_index_to_bool);
+ break;
+ }
+ }
+ }
+
+ LCP_ROUTER_DBG ("link: %s (%d) -> %U/%U %s", rtnl_link_get_name (rl),
+ rtnl_link_get_ifindex (rl), format_vnet_sw_if_index_name,
+ vnm, lip->lip_phy_sw_if_index,
+ format_vnet_sw_if_index_name, vnm,
+ lip->lip_host_sw_if_index, (up ? "up" : "down"));
+
+ lcp_router_link_mtu (rl, lip->lip_phy_sw_if_index);
+ lcp_router_link_addr (rl, lip);
+ }
+ else if (lcp_auto_subint () && rtnl_link_is_vlan (rl))
+ {
+ /* Find the pair based on the parent VIF */
+ lipi = lcp_itf_pair_find_by_vif (rtnl_link_get_link (rl));
+
+ if (INDEX_INVALID != lipi)
+ {
+ u32 sub_phy_sw_if_index, sub_host_sw_if_index;
+ const lcp_itf_pair_t *lip;
+ int vlan;
+ u8 *ns = 0; /* FIXME */
+
+ lip = lcp_itf_pair_get (lipi);
+
+ vlan = rtnl_link_vlan_get_id (rl);
+
+ /* create the vlan interface on the parent phy */
+ if (vnet_create_sub_interface (lip->lip_phy_sw_if_index, vlan, 18, 0,
+ vlan, &sub_phy_sw_if_index))
+ {
+ LCP_ROUTER_INFO ("failed create phy vlan: %s on %U",
+ rtnl_link_get_name (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index);
+ return;
+ }
+
+ /* pool could grow during the previous operation */
+ lip = lcp_itf_pair_get (lipi);
+
+ /* create the vlan interface on the parent host */
+ if (vnet_create_sub_interface (lip->lip_host_sw_if_index, vlan, 18,
+ 0, vlan, &sub_host_sw_if_index))
+ {
+ LCP_ROUTER_INFO ("failed create vlan: %s on %U",
+ rtnl_link_get_name (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_host_sw_if_index);
+ return;
+ }
+
+ char *if_name;
+ u8 *if_namev = 0;
+
+ LCP_ROUTER_INFO (
+ "create vlan: %s -> (%U, %U) : (%U, %U)", rtnl_link_get_name (rl),
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_phy_sw_if_index, format_vnet_sw_if_index_name,
+ vnet_get_main (), sub_phy_sw_if_index,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ lip->lip_host_sw_if_index, format_vnet_sw_if_index_name,
+ vnet_get_main (), sub_host_sw_if_index);
+
+ if ((if_name = rtnl_link_get_name (rl)) != NULL)
+ vec_validate_init_c_string (if_namev, if_name,
+ strnlen (if_name, IFNAMSIZ));
+ lcp_itf_pair_add (sub_host_sw_if_index, sub_phy_sw_if_index,
+ if_namev, rtnl_link_get_ifindex (rl),
+ lip->lip_host_type, ns);
+ if (up)
+ vnet_sw_interface_admin_up (vnet_get_main (), sub_phy_sw_if_index);
+ vnet_sw_interface_admin_up (vnet_get_main (), sub_host_sw_if_index);
+
+ vec_free (if_namev);
+ }
+ else
+ {
+ LCP_ROUTER_INFO ("ignore parent-link add: %s - %s",
+ rtnl_link_get_type (rl), rtnl_link_get_name (rl));
+ }
+ }
+ else
+ LCP_ROUTER_INFO ("ignore link add: %s - %s", rtnl_link_get_type (rl),
+ rtnl_link_get_name (rl));
+}
+
+static void
+lcp_router_link_sync_begin (void)
+{
+ LCP_ROUTER_INFO ("Begin synchronization of interface configurations");
+}
+
+static void
+lcp_router_link_sync_end (void)
+{
+ LCP_ROUTER_INFO ("End synchronization of interface configurations");
+}
+
+static clib_error_t *
+lcp_router_link_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
+{
+ vnet_hw_interface_t *hi;
+ index_t lipi;
+
+ hi = vnet_get_hw_interface_or_null (vnm, hw_if_index);
+ if (!hi)
+ return 0;
+
+ lipi = lcp_itf_pair_find_by_phy (hi->sw_if_index);
+ if (lipi == INDEX_INVALID)
+ return 0;
+
+ /* When the link goes down on an interface, the kernel processes routes which
+ * resolve through that interface depending on how they were created:
+ * - Legacy Route API: the kernel retains the routes and marks them as
+ * "linkdown";
+ * - Nexthop API: the kernel removes the next-hop objects and the routes
+ * which reference them.
+ *
+ * For IPv4 routes created with Nexthop API, the kernel will not send any
+ * explicit RTM_DELROUTE messages about removing them. In order to
+ * synchronize with the kernel, affected routes need to be manually removed
+ * from the FIB.
+ *
+ * The behavior is different for IPv6 routes created with Nexthop API. The
+ * kernel will send explicit RTM_DELROUTE messages about IPv6 routes being
+ * removed.
+ */
+ if (!(flags & VNET_HW_INTERFACE_FLAG_LINK_UP) &&
+ (lcp_get_del_static_on_link_down () ||
+ lcp_get_del_dynamic_on_link_down ()))
+ {
+ u32 fib_index;
+ u32 **fib_index_to_sw_if_index_to_bool = NULL;
+ u32 id, sw_if_index;
+ lcp_router_table_t *nlt;
+
+ fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+ hi->sw_if_index);
+
+ vec_validate_init_empty (fib_index_to_sw_if_index_to_bool, fib_index,
+ NULL);
+ vec_validate_init_empty (fib_index_to_sw_if_index_to_bool[fib_index],
+ hi->sw_if_index, false);
+ fib_index_to_sw_if_index_to_bool[fib_index][hi->sw_if_index] = true;
+
+ /* clang-format off */
+ hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
+ ({
+ fib_index = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+ sw_if_index);
+ vec_validate_init_empty (fib_index_to_sw_if_index_to_bool, fib_index,
+ NULL);
+ vec_validate_init_empty (fib_index_to_sw_if_index_to_bool[fib_index],
+ sw_if_index, false);
+ fib_index_to_sw_if_index_to_bool[fib_index][sw_if_index] = true;
+ }));
+ /* clang-format on */
+
+ vec_foreach_index (fib_index, fib_index_to_sw_if_index_to_bool)
+ {
+ u32 *sw_if_index_to_bool;
+
+ sw_if_index_to_bool = fib_index_to_sw_if_index_to_bool[fib_index];
+ if (NULL == sw_if_index_to_bool)
+ continue;
+
+ pool_foreach (nlt, lcp_router_table_pool)
+ {
+ if (fib_index == nlt->nlt_fib_index &&
+ FIB_PROTOCOL_IP4 == nlt->nlt_proto)
+ {
+ if (lcp_get_del_static_on_link_down ())
+ lcp_router_table_flush (nlt, sw_if_index_to_bool,
+ lcp_rt_fib_src);
+ if (lcp_get_del_dynamic_on_link_down ())
+ lcp_router_table_flush (nlt, sw_if_index_to_bool,
+ lcp_rt_fib_src_dynamic);
+ break;
+ }
+ }
+
+ vec_free (sw_if_index_to_bool);
+ }
+
+ vec_free (fib_index_to_sw_if_index_to_bool);
+ }
+
+ return 0;
+}
+
+VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (lcp_router_link_up_down);
+
+static fib_protocol_t
+lcp_router_proto_k2f (uint32_t k)
+{
+ switch (k)
+ {
+ case AF_INET6:
+ return FIB_PROTOCOL_IP6;
+ case AF_INET:
+ return FIB_PROTOCOL_IP4;
+ case AF_MPLS:
+ return FIB_PROTOCOL_MPLS;
+ default:
+ ASSERT (0);
+ return FIB_PROTOCOL_NONE;
+ }
+}
+
+static void
+lcp_router_mk_addr (const struct nl_addr *rna, ip_address_t *ia)
+{
+ fib_protocol_t fproto;
+
+ ip_address_reset (ia);
+ fproto = lcp_router_proto_k2f (nl_addr_get_family (rna));
+ ASSERT (FIB_PROTOCOL_MPLS != fproto);
+
+ ip_address_set (ia, nl_addr_get_binary_addr (rna),
+ FIB_PROTOCOL_IP4 == fproto ? AF_IP4 : AF_IP6);
+}
+
+static fib_protocol_t
+lcp_router_mk_addr46 (const struct nl_addr *rna, ip46_address_t *ia)
+{
+ fib_protocol_t fproto;
+
+ fproto = lcp_router_proto_k2f (nl_addr_get_family (rna));
+ ASSERT (FIB_PROTOCOL_MPLS != fproto);
+
+ ip46_address_reset (ia);
+ if (FIB_PROTOCOL_IP4 == fproto)
+ memcpy (&ia->ip4, nl_addr_get_binary_addr (rna), nl_addr_get_len (rna));
+ else
+ memcpy (&ia->ip6, nl_addr_get_binary_addr (rna), nl_addr_get_len (rna));
+
+ return (fproto);
+}
+
+static void
+lcp_router_link_addr_add_del (struct rtnl_addr *rla, int is_del)
+{
+ u32 sw_if_index;
+
+ sw_if_index = lcp_router_intf_h2p (rtnl_addr_get_ifindex (rla));
+
+ if (~0 != sw_if_index)
+ {
+ ip_address_t nh;
+
+ lcp_router_mk_addr (rtnl_addr_get_local (rla), &nh);
+
+ if (AF_IP4 == ip_addr_version (&nh))
+ {
+ ip4_add_del_interface_address (
+ vlib_get_main (), sw_if_index, &ip_addr_v4 (&nh),
+ rtnl_addr_get_prefixlen (rla), is_del);
+ lcp_router_ip4_mroutes_add_del (sw_if_index, !is_del);
+ }
+ else if (AF_IP6 == ip_addr_version (&nh))
+ {
+ if (ip6_address_is_link_local_unicast (&ip_addr_v6 (&nh)))
+ if (is_del)
+ ip6_link_disable (sw_if_index);
+ else
+ {
+ ip6_link_enable (sw_if_index, NULL);
+ ip6_link_set_local_address (sw_if_index, &ip_addr_v6 (&nh));
+ }
+ else
+ ip6_add_del_interface_address (
+ vlib_get_main (), sw_if_index, &ip_addr_v6 (&nh),
+ rtnl_addr_get_prefixlen (rla), is_del);
+ lcp_router_ip6_mroutes_add_del (sw_if_index, !is_del);
+ }
+
+ LCP_ROUTER_DBG ("link-addr: %U %U/%d", format_vnet_sw_if_index_name,
+ vnet_get_main (), sw_if_index, format_ip_address, &nh,
+ rtnl_addr_get_prefixlen (rla));
+ }
+}
+
+static void
+lcp_router_link_addr_del (struct rtnl_addr *la)
+{
+ lcp_router_link_addr_add_del (la, 1);
+}
+
+static void
+lcp_router_link_addr_add (struct rtnl_addr *la)
+{
+ lcp_router_link_addr_add_del (la, 0);
+}
+
+static walk_rc_t
+lcp_router_address_mark (index_t index, void *ctx)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ lcp_itf_pair_t *lip = lcp_itf_pair_get (index);
+ if (!lip)
+ return WALK_CONTINUE;
+
+ ip_interface_address_mark_one_interface (
+ vnm, vnet_get_sw_interface (vnm, lip->lip_phy_sw_if_index), 0);
+
+ return WALK_CONTINUE;
+}
+
+static void
+lcp_router_link_addr_sync_begin (void)
+{
+ lcp_itf_pair_walk (lcp_router_address_mark, 0);
+
+ LCP_ROUTER_INFO ("Begin synchronization of interface addresses");
+}
+
+static void
+lcp_router_link_addr_sync_end (void)
+{
+ ip_interface_address_sweep ();
+
+ LCP_ROUTER_INFO ("End synchronization of interface addresses");
+}
+
+static void
+lcp_router_mk_mac_addr (const struct nl_addr *rna, mac_address_t *mac)
+{
+ mac_address_from_bytes (mac, nl_addr_get_binary_addr (rna));
+}
+
+static void
+lcp_router_neigh_del (struct rtnl_neigh *rn)
+{
+ u32 sw_if_index;
+
+ sw_if_index = lcp_router_intf_h2p (rtnl_neigh_get_ifindex (rn));
+
+ if (~0 != sw_if_index)
+ {
+ ip_address_t nh;
+ int rv;
+ struct nl_addr *rna;
+
+ if ((rna = rtnl_neigh_get_dst (rn)) == NULL)
+ return;
+ lcp_router_mk_addr (rna, &nh);
+
+ if (ip46_address_is_multicast (&ip_addr_46 (&nh)))
+ {
+ LCP_ROUTER_DBG ("ignore neighbor del: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ return;
+ }
+
+ rv = ip_neighbor_del (&nh, sw_if_index);
+
+ if (rv)
+ {
+ LCP_ROUTER_ERROR (
+ "Failed to delete neighbor: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index);
+ }
+ else
+ {
+ LCP_ROUTER_DBG ("neighbor del: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ }
+ }
+ else
+ LCP_ROUTER_INFO ("ignore neighbour del on: %d",
+ rtnl_neigh_get_ifindex (rn));
+}
+
+#ifndef NUD_VALID
+#define NUD_VALID \
+ (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE | \
+ NUD_DELAY)
+#endif
+
+static void
+lcp_router_neigh_add (struct rtnl_neigh *rn)
+{
+ u32 sw_if_index;
+
+ sw_if_index = lcp_router_intf_h2p (rtnl_neigh_get_ifindex (rn));
+
+ if (~0 != sw_if_index)
+ {
+ struct nl_addr *ll;
+ ip_address_t nh;
+ int state;
+ struct nl_addr *rna;
+
+ if ((rna = rtnl_neigh_get_dst (rn)) == NULL)
+ return;
+ lcp_router_mk_addr (rna, &nh);
+
+ if (ip46_address_is_multicast (&ip_addr_46 (&nh)))
+ {
+ LCP_ROUTER_DBG ("ignore neighbor add: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ return;
+ }
+
+ ll = rtnl_neigh_get_lladdr (rn);
+ state = rtnl_neigh_get_state (rn);
+
+ if (ll && (state & NUD_VALID))
+ {
+ mac_address_t mac;
+ ip_neighbor_flags_t flags;
+ int rv;
+
+ lcp_router_mk_mac_addr (ll, &mac);
+
+ if (state & (NUD_NOARP | NUD_PERMANENT))
+ flags = IP_NEIGHBOR_FLAG_STATIC;
+ else
+ flags = IP_NEIGHBOR_FLAG_DYNAMIC;
+
+ rv = ip_neighbor_add (&nh, &mac, sw_if_index, flags, NULL);
+
+ if (rv)
+ {
+ LCP_ROUTER_ERROR (
+ "Failed to create neighbor: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index);
+ }
+ else
+ {
+ LCP_ROUTER_DBG ("neighbor add: %U %U", format_ip_address, &nh,
+ format_vnet_sw_if_index_name, vnet_get_main (),
+ sw_if_index);
+ }
+ }
+ else
+ /* It's a delete */
+ lcp_router_neigh_del (rn);
+ }
+ else
+ LCP_ROUTER_INFO ("ignore neighbour add on: %d",
+ rtnl_neigh_get_ifindex (rn));
+}
+
+static walk_rc_t
+lcp_router_neighbor_mark (index_t index, void *ctx)
+{
+ lcp_itf_pair_t *lip = lcp_itf_pair_get (index);
+ if (!lip)
+ return WALK_CONTINUE;
+
+ ip_neighbor_walk (AF_IP4, lip->lip_phy_sw_if_index, ip_neighbor_mark_one, 0);
+ ip_neighbor_walk (AF_IP6, lip->lip_phy_sw_if_index, ip_neighbor_mark_one, 0);
+
+ return WALK_CONTINUE;
+}
+
+static void
+lcp_router_neigh_sync_begin (void)
+{
+ lcp_itf_pair_walk (lcp_router_neighbor_mark, 0);
+
+ LCP_ROUTER_INFO ("Begin synchronization of neighbors");
+}
+
+static void
+lcp_router_neigh_sync_end (void)
+{
+ ip_neighbor_sweep (AF_IP4);
+ ip_neighbor_sweep (AF_IP6);
+
+ LCP_ROUTER_INFO ("End synchronization of neighbors");
+}
+
+static lcp_router_table_t *
+lcp_router_table_find (uint32_t id, fib_protocol_t fproto)
+{
+ uword *p;
+
+ p = hash_get (lcp_router_table_db[fproto], id);
+
+ if (p)
+ return pool_elt_at_index (lcp_router_table_pool, p[0]);
+
+ return (NULL);
+}
+
+static uint32_t
+lcp_router_table_k2f (uint32_t k)
+{
+ // the kernel's table ID 255 is the default table
+ if (k == 255 || k == 254)
+ return 0;
+ return k;
+}
+
+static lcp_router_table_t *
+lcp_router_table_add_or_lock (uint32_t id, fib_protocol_t fproto)
+{
+ lcp_router_table_t *nlt;
+
+ id = lcp_router_table_k2f (id);
+ nlt = lcp_router_table_find (id, fproto);
+
+ if (NULL == nlt)
+ {
+ pool_get_zero (lcp_router_table_pool, nlt);
+
+ nlt->nlt_id = id;
+ nlt->nlt_proto = fproto;
+
+ nlt->nlt_fib_index = fib_table_find_or_create_and_lock (
+ nlt->nlt_proto, nlt->nlt_id, lcp_rt_fib_src);
+ nlt->nlt_mfib_index = mfib_table_find_or_create_and_lock (
+ nlt->nlt_proto, nlt->nlt_id, MFIB_SOURCE_PLUGIN_LOW);
+
+ hash_set (lcp_router_table_db[fproto], nlt->nlt_id,
+ nlt - lcp_router_table_pool);
+
+ if (FIB_PROTOCOL_IP4 == fproto)
+ {
+ /* Set the all 1s address in this table to punt */
+ fib_table_entry_special_add (nlt->nlt_fib_index, &pfx_all1s,
+ lcp_rt_fib_src, FIB_ENTRY_FLAG_LOCAL);
+
+ const fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ };
+ int ii;
+
+ for (ii = 0; ii < ARRAY_LEN (ip4_specials); ii++)
+ {
+ mfib_table_entry_path_update (
+ nlt->nlt_mfib_index, &ip4_specials[ii], MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, &path);
+ }
+ }
+ else if (FIB_PROTOCOL_IP6 == fproto)
+ {
+ const fib_route_path_t path = {
+ .frp_proto = DPO_PROTO_IP6,
+ .frp_addr = zero_addr,
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ };
+ int ii;
+
+ for (ii = 0; ii < ARRAY_LEN (ip6_specials); ii++)
+ {
+ mfib_table_entry_path_update (
+ nlt->nlt_mfib_index, &ip6_specials[ii], MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, &path);
+ }
+ }
+ }
+
+ nlt->nlt_refs++;
+
+ return (nlt);
+}
+
+static void
+lcp_router_table_unlock (lcp_router_table_t *nlt)
+{
+ nlt->nlt_refs--;
+
+ if (0 == nlt->nlt_refs)
+ {
+ if (FIB_PROTOCOL_IP4 == nlt->nlt_proto)
+ {
+ /* Set the all 1s address in this table to punt */
+ fib_table_entry_special_remove (nlt->nlt_fib_index, &pfx_all1s,
+ lcp_rt_fib_src);
+ }
+
+ fib_table_unlock (nlt->nlt_fib_index, nlt->nlt_proto, lcp_rt_fib_src);
+
+ hash_unset (lcp_router_table_db[nlt->nlt_proto], nlt->nlt_id);
+ pool_put (lcp_router_table_pool, nlt);
+ }
+}
+
+static void
+lcp_router_route_mk_prefix (struct rtnl_route *r, fib_prefix_t *p)
+{
+ const struct nl_addr *addr = rtnl_route_get_dst (r);
+ u32 *baddr = nl_addr_get_binary_addr (addr);
+ u32 blen = nl_addr_get_len (addr);
+ ip46_address_t *paddr = &p->fp_addr;
+ u32 entry;
+
+ ip46_address_reset (paddr);
+ p->fp_proto = lcp_router_proto_k2f (nl_addr_get_family (addr));
+
+ switch (p->fp_proto)
+ {
+ case FIB_PROTOCOL_MPLS:
+ entry = ntohl (*baddr);
+ p->fp_label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+ p->fp_len = 21;
+ p->fp_eos = MPLS_NON_EOS;
+ return;
+ case FIB_PROTOCOL_IP4:
+ memcpy (&paddr->ip4, baddr, blen);
+ break;
+ case FIB_PROTOCOL_IP6:
+ memcpy (&paddr->ip6, baddr, blen);
+ break;
+ }
+
+ p->fp_len = nl_addr_get_prefixlen (addr);
+}
+
+static void
+lcp_router_route_mk_mprefix (struct rtnl_route *r, mfib_prefix_t *p)
+{
+ const struct nl_addr *addr;
+
+ addr = rtnl_route_get_dst (r);
+
+ p->fp_len = nl_addr_get_prefixlen (addr);
+ p->fp_proto = lcp_router_mk_addr46 (addr, &p->fp_grp_addr);
+
+ addr = rtnl_route_get_src (r);
+ if (addr)
+ p->fp_proto = lcp_router_mk_addr46 (addr, &p->fp_src_addr);
+}
+
+static int
+lcp_router_mpls_nladdr_to_path (fib_route_path_t *path, struct nl_addr *addr)
+{
+ if (!addr)
+ return 0;
+
+ struct mpls_label *stack = nl_addr_get_binary_addr (addr);
+ u32 entry, label;
+ u8 exp, ttl;
+ int label_count = 0;
+
+ while (1)
+ {
+ entry = ntohl (stack[label_count++].entry);
+ label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+ exp = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
+ ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
+
+ fib_mpls_label_t fml = {
+ .fml_value = label,
+ .fml_exp = exp,
+ .fml_ttl = ttl,
+ };
+ vec_add1 (path->frp_label_stack, fml);
+
+ if (entry & MPLS_LS_S_MASK)
+ break;
+ }
+ return label_count;
+}
+
+typedef struct lcp_router_route_path_parse_t_
+{
+ fib_route_path_t *paths;
+ fib_protocol_t route_proto;
+ bool is_mcast;
+ fib_route_path_flags_t type_flags;
+ u8 preference;
+} lcp_router_route_path_parse_t;
+
+static void
+lcp_router_route_path_parse (struct rtnl_nexthop *rnh, void *arg)
+{
+ lcp_router_route_path_parse_t *ctx = arg;
+ fib_route_path_t *path;
+ u32 sw_if_index;
+ int label_count = 0;
+
+ sw_if_index = lcp_router_intf_h2p (rtnl_route_nh_get_ifindex (rnh));
+
+ if (~0 != sw_if_index)
+ {
+ fib_protocol_t fproto;
+ struct nl_addr *addr;
+
+ vec_add2 (ctx->paths, path, 1);
+
+ path->frp_flags = FIB_ROUTE_PATH_FLAG_NONE | ctx->type_flags;
+ path->frp_sw_if_index = sw_if_index;
+ path->frp_preference = ctx->preference;
+
+ /*
+ * FIB Path Weight of 0 is meaningless and replaced with 1 further along.
+ * See fib_path_create. fib_path_cmp_w_route_path would fail to match
+ * such a fib_route_path_t with any fib_path_t, because a fib_path_t's
+ * fp_weight can never be 0.
+ */
+ path->frp_weight = clib_max (1, rtnl_route_nh_get_weight (rnh));
+
+ addr = rtnl_route_nh_get_gateway (rnh);
+ if (!addr)
+ addr = rtnl_route_nh_get_via (rnh);
+
+ if (addr)
+ fproto = lcp_router_mk_addr46 (addr, &path->frp_addr);
+ else
+ fproto = ctx->route_proto;
+
+ path->frp_proto = fib_proto_to_dpo (fproto);
+
+ if (ctx->route_proto == FIB_PROTOCOL_MPLS)
+ {
+ addr = rtnl_route_nh_get_newdst (rnh);
+ label_count = lcp_router_mpls_nladdr_to_path (path, addr);
+ if (label_count)
+ {
+ LCP_ROUTER_DBG (" is label swap to %u",
+ path->frp_label_stack[0].fml_value);
+ }
+ else
+ {
+ fib_mpls_label_t fml = {
+ .fml_value = MPLS_LABEL_POP,
+ };
+ vec_add1 (path->frp_label_stack, fml);
+ LCP_ROUTER_DBG (" is label pop");
+ }
+ }
+
+#ifdef NL_CAPABILITY_VERSION_3_6_0
+ addr = rtnl_route_nh_get_encap_mpls_dst (rnh);
+ label_count = lcp_router_mpls_nladdr_to_path (path, addr);
+ if (label_count)
+ LCP_ROUTER_DBG (" has encap mpls, %d labels", label_count);
+#endif
+
+ if (ctx->is_mcast)
+ path->frp_mitf_flags = MFIB_ITF_FLAG_FORWARD;
+
+ LCP_ROUTER_DBG (" path:[%U]", format_fib_route_path, path);
+ }
+}
+
+/*
+ * blackhole, unreachable, prohibit will not have a next hop in an
+ * RTM_NEWROUTE. Add a path for them.
+ */
+static void
+lcp_router_route_path_add_special (struct rtnl_route *rr,
+ lcp_router_route_path_parse_t *ctx)
+{
+ fib_route_path_t *path;
+
+ if (rtnl_route_get_type (rr) < RTN_BLACKHOLE)
+ return;
+
+ /* if it already has a path, it does not need us to add one */
+ if (vec_len (ctx->paths) > 0)
+ return;
+
+ vec_add2 (ctx->paths, path, 1);
+
+ path->frp_flags = FIB_ROUTE_PATH_FLAG_NONE | ctx->type_flags;
+ path->frp_sw_if_index = ~0;
+ path->frp_proto = fib_proto_to_dpo (ctx->route_proto);
+ path->frp_preference = ctx->preference;
+
+ LCP_ROUTER_DBG (" path:[%U]", format_fib_route_path, path);
+}
+
+/*
+ * Map of supported route types. Some types are omitted:
+ * RTN_LOCAL - interface address addition creates these automatically
+ * RTN_BROADCAST - same as RTN_LOCAL
+ * RTN_UNSPEC, RTN_ANYCAST, RTN_THROW, RTN_NAT, RTN_XRESOLVE -
+ * There's not a VPP equivalent for these currently.
+ */
+static const u8 lcp_router_route_type_valid[__RTN_MAX] = {
+ [RTN_UNICAST] = 1, [RTN_MULTICAST] = 1, [RTN_BLACKHOLE] = 1,
+ [RTN_UNREACHABLE] = 1, [RTN_PROHIBIT] = 1,
+};
+
+/* Map of fib entry flags by route type */
+static const fib_entry_flag_t lcp_router_route_type_feflags[__RTN_MAX] = {
+ [RTN_LOCAL] = FIB_ENTRY_FLAG_LOCAL | FIB_ENTRY_FLAG_CONNECTED,
+ [RTN_BROADCAST] = FIB_ENTRY_FLAG_DROP | FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT,
+ [RTN_BLACKHOLE] = FIB_ENTRY_FLAG_DROP,
+};
+
+/* Map of fib route path flags by route type */
+static const fib_route_path_flags_t
+ lcp_router_route_type_frpflags[__RTN_MAX] = {
+ [RTN_UNREACHABLE] = FIB_ROUTE_PATH_ICMP_UNREACH,
+ [RTN_PROHIBIT] = FIB_ROUTE_PATH_ICMP_PROHIBIT,
+ [RTN_BLACKHOLE] = FIB_ROUTE_PATH_DROP,
+ };
+
+static inline fib_source_t
+lcp_router_proto_fib_source (u8 rt_proto)
+{
+ return (rt_proto <= RTPROT_STATIC) ? lcp_rt_fib_src : lcp_rt_fib_src_dynamic;
+}
+
+static fib_entry_flag_t
+lcp_router_route_mk_entry_flags (uint8_t rtype, int table_id, uint8_t rproto)
+{
+ fib_entry_flag_t fef = FIB_ENTRY_FLAG_NONE;
+
+ fef |= lcp_router_route_type_feflags[rtype];
+ if ((rproto == RTPROT_KERNEL) || PREDICT_FALSE (255 == table_id))
+ /* kernel proto is interface prefixes, 255 is linux's 'local' table */
+ fef |= FIB_ENTRY_FLAG_ATTACHED | FIB_ENTRY_FLAG_CONNECTED;
+
+ return (fef);
+}
+
+static void
+lcp_router_route_del (struct rtnl_route *rr)
+{
+ fib_entry_flag_t entry_flags;
+ uint32_t table_id;
+ fib_prefix_t pfx;
+ lcp_router_table_t *nlt;
+ uint8_t rtype, rproto;
+
+ rtype = rtnl_route_get_type (rr);
+ table_id = rtnl_route_get_table (rr);
+ rproto = rtnl_route_get_protocol (rr);
+
+ /* skip unsupported route types and local table */
+ if (!lcp_router_route_type_valid[rtype] || (table_id == 255))
+ return;
+
+ lcp_router_route_mk_prefix (rr, &pfx);
+ entry_flags = lcp_router_route_mk_entry_flags (rtype, table_id, rproto);
+ nlt = lcp_router_table_find (lcp_router_table_k2f (table_id), pfx.fp_proto);
+
+ LCP_ROUTER_DBG ("route del: %d:%U %U", rtnl_route_get_table (rr),
+ format_fib_prefix, &pfx, format_fib_entry_flags,
+ entry_flags);
+
+ if (NULL == nlt)
+ return;
+
+ lcp_router_route_path_parse_t np = {
+ .route_proto = pfx.fp_proto,
+ .type_flags = lcp_router_route_type_frpflags[rtype],
+ };
+
+ rtnl_route_foreach_nexthop (rr, lcp_router_route_path_parse, &np);
+ lcp_router_route_path_add_special (rr, &np);
+
+ if (0 != vec_len (np.paths))
+ {
+ fib_source_t fib_src;
+
+ fib_src = lcp_router_proto_fib_source (rproto);
+
+ switch (pfx.fp_proto)
+ {
+ case FIB_PROTOCOL_IP6:
+ fib_table_entry_delete (nlt->nlt_fib_index, &pfx, fib_src);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ fib_table_entry_path_remove2 (nlt->nlt_fib_index, &pfx, fib_src,
+ np.paths);
+ /* delete the EOS route in addition to NEOS - fallthrough */
+ pfx.fp_eos = MPLS_EOS;
+ default:
+ fib_table_entry_path_remove2 (nlt->nlt_fib_index, &pfx, fib_src,
+ np.paths);
+ }
+ }
+
+ vec_free (np.paths);
+
+ lcp_router_table_unlock (nlt);
+}
+
+static fib_route_path_t *
+lcp_router_fib_route_path_dup (fib_route_path_t *old)
+{
+ int idx;
+ fib_route_path_t *p;
+
+ fib_route_path_t *new = vec_dup (old);
+ if (!new)
+ return NULL;
+
+ for (idx = 0; idx < vec_len (new); idx++)
+ {
+ p = &new[idx];
+ if (p->frp_label_stack)
+ p->frp_label_stack = vec_dup (p->frp_label_stack);
+ }
+
+ return new;
+}
+
+static void
+lcp_router_route_add (struct rtnl_route *rr, int is_replace)
+{
+ fib_entry_flag_t entry_flags;
+ uint32_t table_id;
+ fib_prefix_t pfx;
+ lcp_router_table_t *nlt;
+ uint8_t rtype, rproto;
+
+ rtype = rtnl_route_get_type (rr);
+ table_id = rtnl_route_get_table (rr);
+ rproto = rtnl_route_get_protocol (rr);
+
+ /* skip unsupported route types and local table */
+ if (!lcp_router_route_type_valid[rtype] || (table_id == 255))
+ return;
+
+ lcp_router_route_mk_prefix (rr, &pfx);
+ entry_flags = lcp_router_route_mk_entry_flags (rtype, table_id, rproto);
+
+ nlt = lcp_router_table_add_or_lock (table_id, pfx.fp_proto);
+ /* Skip any kernel routes and IPv6 LL or multicast routes */
+ if (rproto == RTPROT_KERNEL ||
+ (FIB_PROTOCOL_IP6 == pfx.fp_proto &&
+ (ip6_address_is_multicast (&pfx.fp_addr.ip6) ||
+ ip6_address_is_link_local_unicast (&pfx.fp_addr.ip6))))
+ {
+ LCP_ROUTER_DBG ("route skip: %d:%U %U", rtnl_route_get_table (rr),
+ format_fib_prefix, &pfx, format_fib_entry_flags,
+ entry_flags);
+ return;
+ }
+ LCP_ROUTER_DBG ("route %s: %d:%U %U", is_replace ? "replace" : "add",
+ rtnl_route_get_table (rr), format_fib_prefix, &pfx,
+ format_fib_entry_flags, entry_flags);
+
+ lcp_router_route_path_parse_t np = {
+ .route_proto = pfx.fp_proto,
+ .is_mcast = (rtype == RTN_MULTICAST),
+ .type_flags = lcp_router_route_type_frpflags[rtype],
+ .preference = (u8) rtnl_route_get_priority (rr),
+ };
+
+ rtnl_route_foreach_nexthop (rr, lcp_router_route_path_parse, &np);
+ lcp_router_route_path_add_special (rr, &np);
+
+ if (0 != vec_len (np.paths))
+ {
+ if (rtype == RTN_MULTICAST)
+ {
+ /* it's not clear to me how linux expresses the RPF paramters
+ * so we'll allow from all interfaces and hope for the best */
+ mfib_prefix_t mpfx = {};
+
+ lcp_router_route_mk_mprefix (rr, &mpfx);
+
+ mfib_table_entry_update (nlt->nlt_mfib_index, &mpfx,
+ MFIB_SOURCE_PLUGIN_LOW, MFIB_RPF_ID_NONE,
+ MFIB_ENTRY_FLAG_ACCEPT_ALL_ITF);
+
+ mfib_table_entry_paths_update (nlt->nlt_mfib_index, &mpfx,
+ MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_ENTRY_FLAG_NONE, np.paths);
+ }
+ else
+ {
+ fib_source_t fib_src;
+ const fib_route_path_t *rpath;
+
+ vec_foreach (rpath, np.paths)
+ {
+ if (fib_route_path_is_attached (rpath))
+ {
+ entry_flags |= FIB_ENTRY_FLAG_ATTACHED;
+ break;
+ }
+ }
+
+ fib_src = lcp_router_proto_fib_source (rproto);
+
+ if (pfx.fp_proto == FIB_PROTOCOL_MPLS)
+ {
+ /* in order to avoid double-frees, we duplicate the paths. */
+ fib_route_path_t *pathdup =
+ lcp_router_fib_route_path_dup (np.paths);
+ if (is_replace)
+ fib_table_entry_update (nlt->nlt_fib_index, &pfx, fib_src,
+ entry_flags, pathdup);
+ else
+ fib_table_entry_path_add2 (nlt->nlt_fib_index, &pfx, fib_src,
+ entry_flags, pathdup);
+ vec_free (pathdup);
+
+ /* install EOS route in addition to NEOS */
+ pfx.fp_eos = MPLS_EOS;
+ pfx.fp_payload_proto = np.paths[0].frp_proto;
+ }
+
+ if (is_replace)
+ fib_table_entry_update (nlt->nlt_fib_index, &pfx, fib_src,
+ entry_flags, np.paths);
+ else
+ fib_table_entry_path_add2 (nlt->nlt_fib_index, &pfx, fib_src,
+ entry_flags, np.paths);
+ }
+ }
+ else
+ {
+ LCP_ROUTER_DBG ("no paths for route: %d:%U %U",
+ rtnl_route_get_table (rr), format_fib_prefix, &pfx,
+ format_fib_entry_flags, entry_flags);
+ }
+ vec_free (np.paths);
+}
+
+static void
+lcp_router_route_sync_begin (void)
+{
+ lcp_router_table_t *nlt;
+
+ pool_foreach (nlt, lcp_router_table_pool)
+ {
+ fib_table_mark (nlt->nlt_fib_index, nlt->nlt_proto, lcp_rt_fib_src);
+ fib_table_mark (nlt->nlt_fib_index, nlt->nlt_proto,
+ lcp_rt_fib_src_dynamic);
+
+ LCP_ROUTER_INFO ("Begin synchronization of %U routes in table %u",
+ format_fib_protocol, nlt->nlt_proto,
+ nlt->nlt_fib_index);
+ }
+}
+
+static void
+lcp_router_route_sync_end (void)
+{
+ lcp_router_table_t *nlt;
+
+ pool_foreach (nlt, lcp_router_table_pool)
+ {
+ fib_table_sweep (nlt->nlt_fib_index, nlt->nlt_proto, lcp_rt_fib_src);
+ fib_table_sweep (nlt->nlt_fib_index, nlt->nlt_proto,
+ lcp_rt_fib_src_dynamic);
+
+ LCP_ROUTER_INFO ("End synchronization of %U routes in table %u",
+ format_fib_protocol, nlt->nlt_proto,
+ nlt->nlt_fib_index);
+ }
+}
+
+typedef struct lcp_router_table_flush_ctx_t_
+{
+ fib_node_index_t *lrtf_entries;
+ u32 *lrtf_sw_if_index_to_bool;
+ fib_source_t lrtf_source;
+} lcp_router_table_flush_ctx_t;
+
+static fib_table_walk_rc_t
+lcp_router_table_flush_cb (fib_node_index_t fib_entry_index, void *arg)
+{
+ lcp_router_table_flush_ctx_t *ctx = arg;
+ u32 sw_if_index;
+
+ sw_if_index = fib_entry_get_resolving_interface_for_source (
+ fib_entry_index, ctx->lrtf_source);
+
+ if (sw_if_index < vec_len (ctx->lrtf_sw_if_index_to_bool) &&
+ ctx->lrtf_sw_if_index_to_bool[sw_if_index])
+ {
+ vec_add1 (ctx->lrtf_entries, fib_entry_index);
+ }
+ return (FIB_TABLE_WALK_CONTINUE);
+}
+
+static void
+lcp_router_table_flush (lcp_router_table_t *nlt, u32 *sw_if_index_to_bool,
+ fib_source_t source)
+{
+ fib_node_index_t *fib_entry_index;
+ lcp_router_table_flush_ctx_t ctx = {
+ .lrtf_entries = NULL,
+ .lrtf_sw_if_index_to_bool = sw_if_index_to_bool,
+ .lrtf_source = source,
+ };
+
+ LCP_ROUTER_DBG (
+ "Flush table: proto %U, fib-index %u, max sw_if_index %u, source %U",
+ format_fib_protocol, nlt->nlt_proto, nlt->nlt_fib_index,
+ vec_len (sw_if_index_to_bool) - 1, format_fib_source, source);
+
+ fib_table_walk (nlt->nlt_fib_index, nlt->nlt_proto,
+ lcp_router_table_flush_cb, &ctx);
+
+ LCP_ROUTER_DBG ("Flush table: entries number to delete %u",
+ vec_len (ctx.lrtf_entries));
+
+ vec_foreach (fib_entry_index, ctx.lrtf_entries)
+ {
+ fib_table_entry_delete_index (*fib_entry_index, source);
+ lcp_router_table_unlock (nlt);
+ }
+
+ vec_free (ctx.lrtf_entries);
+}
+
+const nl_vft_t lcp_router_vft = {
+ .nvl_rt_link_add = { .is_mp_safe = 0, .cb = lcp_router_link_add },
+ .nvl_rt_link_del = { .is_mp_safe = 0, .cb = lcp_router_link_del },
+ .nvl_rt_link_sync_begin = { .is_mp_safe = 0,
+ .cb = lcp_router_link_sync_begin },
+ .nvl_rt_link_sync_end = { .is_mp_safe = 0, .cb = lcp_router_link_sync_end },
+ .nvl_rt_addr_add = { .is_mp_safe = 0, .cb = lcp_router_link_addr_add },
+ .nvl_rt_addr_del = { .is_mp_safe = 0, .cb = lcp_router_link_addr_del },
+ .nvl_rt_addr_sync_begin = { .is_mp_safe = 0,
+ .cb = lcp_router_link_addr_sync_begin },
+ .nvl_rt_addr_sync_end = { .is_mp_safe = 0,
+ .cb = lcp_router_link_addr_sync_end },
+ .nvl_rt_neigh_add = { .is_mp_safe = 0, .cb = lcp_router_neigh_add },
+ .nvl_rt_neigh_del = { .is_mp_safe = 0, .cb = lcp_router_neigh_del },
+ .nvl_rt_neigh_sync_begin = { .is_mp_safe = 0,
+ .cb = lcp_router_neigh_sync_begin },
+ .nvl_rt_neigh_sync_end = { .is_mp_safe = 0,
+ .cb = lcp_router_neigh_sync_end },
+ .nvl_rt_route_add = { .is_mp_safe = 1, .cb = lcp_router_route_add },
+ .nvl_rt_route_del = { .is_mp_safe = 1, .cb = lcp_router_route_del },
+ .nvl_rt_route_sync_begin = { .is_mp_safe = 0,
+ .cb = lcp_router_route_sync_begin },
+ .nvl_rt_route_sync_end = { .is_mp_safe = 0,
+ .cb = lcp_router_route_sync_end },
+};
+
+static clib_error_t *
+lcp_router_init (vlib_main_t *vm)
+{
+ lcp_router_logger = vlib_log_register_class ("linux-cp", "router");
+
+ nl_register_vft (&lcp_router_vft);
+
+ /*
+ * allocate 2 route sources. The low priority source will be for
+ * dynamic routes. If a dynamic route daemon (FRR) tries to remove its
+ * route, it will use the low priority source to ensure it will not
+ * remove static routes which were added with the higher priority source.
+ */
+ lcp_rt_fib_src =
+ fib_source_allocate ("lcp-rt", FIB_SOURCE_PRIORITY_HI, FIB_SOURCE_BH_API);
+
+ lcp_rt_fib_src_dynamic = fib_source_allocate (
+ "lcp-rt-dynamic", FIB_SOURCE_PRIORITY_HI + 1, FIB_SOURCE_BH_API);
+
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (lcp_router_init) = {
+ .runs_before = VLIB_INITS ("lcp_nl_init"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/lisp/CMakeLists.txt b/src/plugins/lisp/CMakeLists.txt
index bf0d60aab0f..743a17e69c8 100644
--- a/src/plugins/lisp/CMakeLists.txt
+++ b/src/plugins/lisp/CMakeLists.txt
@@ -11,6 +11,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+if(NOT OPENSSL_FOUND)
+ message(WARNING "OpenSSL not found - lisp plugin disabled")
+ return()
+endif()
+
##############################################################################
# LISP control plane: lisp-cp
##############################################################################
diff --git a/src/plugins/lisp/lisp-cp/control.c b/src/plugins/lisp/lisp-cp/control.c
index 7293e03eea0..692048ebd21 100644
--- a/src/plugins/lisp/lisp-cp/control.c
+++ b/src/plugins/lisp/lisp-cp/control.c
@@ -581,7 +581,6 @@ vnet_lisp_adjacencies_get_by_vni (u32 vni)
fwd_entry_t *fwd;
lisp_adjacency_t *adjs = 0, adj;
- /* *INDENT-OFF* */
pool_foreach (fwd, lcm->fwd_entry_pool)
{
if (gid_address_vni (&fwd->reid) != vni)
@@ -591,7 +590,6 @@ vnet_lisp_adjacencies_get_by_vni (u32 vni)
gid_address_copy (&adj.leid, &fwd->leid);
vec_add1 (adjs, adj);
}
- /* *INDENT-ON* */
return adjs;
}
@@ -804,7 +802,6 @@ vnet_lisp_map_cache_add_del (vnet_lisp_add_del_mapping_args_t * a,
/* Remove remote (if present) from the vectors of lcl-to-rmts
* TODO: Address this in a more efficient way.
*/
- /* *INDENT-OFF* */
pool_foreach (rmts, lcm->lcl_to_rmt_adjacencies)
{
vec_foreach_index (rmts_itr, rmts[0])
@@ -817,7 +814,6 @@ vnet_lisp_map_cache_add_del (vnet_lisp_add_del_mapping_args_t * a,
}
}
}
- /* *INDENT-ON* */
}
/* remove mapping from dictionary */
@@ -1389,12 +1385,10 @@ vnet_lisp_clear_all_remote_adjacencies (void)
vnet_lisp_add_del_mapping_args_t _dm_args, *dm_args = &_dm_args;
vnet_lisp_add_del_locator_set_args_t _ls, *ls = &_ls;
- /* *INDENT-OFF* */
pool_foreach_index (mi, lcm->mapping_pool)
{
vec_add1 (map_indices, mi);
}
- /* *INDENT-ON* */
vec_foreach (map_indexp, map_indices)
{
@@ -2167,7 +2161,6 @@ lisp_cp_enable_l2_l3_ifaces (lisp_cp_main_t * lcm, u8 with_default_route)
{
u32 vni, dp_table;
- /* *INDENT-OFF* */
hash_foreach(vni, dp_table, lcm->table_id_by_vni, ({
dp_add_del_iface(lcm, vni, /* is_l2 */ 0, /* is_add */1,
with_default_route);
@@ -2176,7 +2169,6 @@ lisp_cp_enable_l2_l3_ifaces (lisp_cp_main_t * lcm, u8 with_default_route)
dp_add_del_iface(lcm, vni, /* is_l2 */ 1, 1,
with_default_route);
}));
- /* *INDENT-ON* */
}
static void
@@ -2188,12 +2180,10 @@ lisp_cp_disable_l2_l3_ifaces (lisp_cp_main_t * lcm)
hash_free (lcm->fwd_entry_by_mapping_index);
pool_free (lcm->fwd_entry_pool);
/* Clear state tracking rmt-lcl fwd entries */
- /* *INDENT-OFF* */
pool_foreach (rmts, lcm->lcl_to_rmt_adjacencies)
{
vec_free(rmts[0]);
}
- /* *INDENT-ON* */
hash_free (lcm->lcl_to_rmt_adjs_by_lcl_idx);
pool_free (lcm->lcl_to_rmt_adjacencies);
}
@@ -2465,7 +2455,6 @@ build_itr_rloc_list (lisp_cp_main_t * lcm, locator_set_t * loc_set)
/* Add ipv4 locators first TODO sort them */
- /* *INDENT-OFF* */
foreach_ip_interface_address (&lcm->im4->lookup_main, ia,
loc->sw_if_index, 1 /* unnumbered */,
({
@@ -2486,7 +2475,6 @@ build_itr_rloc_list (lisp_cp_main_t * lcm, locator_set_t * loc_set)
ip_prefix_normalize (ippref);
vec_add1 (rlocs, gid[0]);
}));
- /* *INDENT-ON* */
}
return rlocs;
@@ -2638,7 +2626,6 @@ add_locators (lisp_cp_main_t * lcm, mapping_t * m, u32 locator_set_index,
new = loc[0];
if (loc->local)
{
- /* *INDENT-OFF* */
foreach_ip_interface_address (&lcm->im4->lookup_main, ia,
loc->sw_if_index, 1 /* unnumbered */,
({
@@ -2655,7 +2642,6 @@ add_locators (lisp_cp_main_t * lcm, mapping_t * m, u32 locator_set_index,
ia);
ip_address_set (new_ip, addr, AF_IP6);
}));
- /* *INDENT-ON* */
if (probed_loc && ip_address_cmp (probed_loc, new_ip) == 0)
new.probed = 1;
@@ -2669,7 +2655,6 @@ build_map_register_record_list (lisp_cp_main_t * lcm)
{
mapping_t *recs = 0, rec, *m;
- /* *INDENT-OFF* */
pool_foreach (m, lcm->mapping_pool)
{
/* for now build only local mappings */
@@ -2680,7 +2665,6 @@ build_map_register_record_list (lisp_cp_main_t * lcm)
add_locators (lcm, &rec, m->locator_set_index, NULL);
vec_add1 (recs, rec);
}
- /* *INDENT-ON* */
return recs;
}
@@ -2834,7 +2818,6 @@ lisp_cp_output (vlib_main_t * vm, vlib_node_runtime_t * node,
}
/* placeholder node used only for statistics */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lisp_cp_output_node) = {
.function = lisp_cp_output,
.name = "lisp-cp-output",
@@ -2851,7 +2834,6 @@ VLIB_REGISTER_NODE (lisp_cp_output_node) = {
[LISP_CP_INPUT_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
static int
send_rloc_probe (lisp_cp_main_t * lcm, gid_address_t * deid,
@@ -2899,7 +2881,6 @@ send_rloc_probes (lisp_cp_main_t * lcm)
locator_pair_t *lp;
u32 si, rloc_probes_sent = 0;
- /* *INDENT-OFF* */
pool_foreach (e, lcm->fwd_entry_pool)
{
if (vec_len (e->locator_pairs) == 0)
@@ -2929,7 +2910,6 @@ send_rloc_probes (lisp_cp_main_t * lcm)
rloc_probes_sent++;
}
}
- /* *INDENT-ON* */
vlib_node_increment_counter (vlib_get_main (), lisp_cp_output_node.index,
LISP_CP_OUTPUT_ERROR_RLOC_PROBES_SENT,
@@ -3038,7 +3018,6 @@ _send_encapsulated_map_request (lisp_cp_main_t * lcm,
/* if there is already a pending request remember it */
- /* *INDENT-OFF* */
pool_foreach (pmr, lcm->pending_map_requests_pool)
{
if (!gid_address_cmp (&pmr->src, seid)
@@ -3048,7 +3027,6 @@ _send_encapsulated_map_request (lisp_cp_main_t * lcm,
break;
}
}
- /* *INDENT-ON* */
if (!is_resend && duplicate_pmr)
{
@@ -3597,7 +3575,6 @@ lisp_cp_lookup_nsh (vlib_main_t * vm,
return (lisp_cp_lookup_inline (vm, node, from_frame, LISP_AFI_LCAF));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lisp_cp_lookup_ip4_node) = {
.function = lisp_cp_lookup_ip4,
.name = "lisp-cp-lookup-ip4",
@@ -3615,9 +3592,7 @@ VLIB_REGISTER_NODE (lisp_cp_lookup_ip4_node) = {
[LISP_CP_LOOKUP_NEXT_ARP_NDP_REPLY_TX] = "interface-output",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lisp_cp_lookup_ip6_node) = {
.function = lisp_cp_lookup_ip6,
.name = "lisp-cp-lookup-ip6",
@@ -3635,9 +3610,7 @@ VLIB_REGISTER_NODE (lisp_cp_lookup_ip6_node) = {
[LISP_CP_LOOKUP_NEXT_ARP_NDP_REPLY_TX] = "interface-output",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lisp_cp_lookup_l2_node) = {
.function = lisp_cp_lookup_l2,
.name = "lisp-cp-lookup-l2",
@@ -3655,9 +3628,7 @@ VLIB_REGISTER_NODE (lisp_cp_lookup_l2_node) = {
[LISP_CP_LOOKUP_NEXT_ARP_NDP_REPLY_TX] = "interface-output",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lisp_cp_lookup_nsh_node) = {
.function = lisp_cp_lookup_nsh,
.name = "lisp-cp-lookup-nsh",
@@ -3675,7 +3646,6 @@ VLIB_REGISTER_NODE (lisp_cp_lookup_nsh_node) = {
[LISP_CP_LOOKUP_NEXT_ARP_NDP_REPLY_TX] = "interface-output",
},
};
-/* *INDENT-ON* */
/* lisp_cp_input statistics */
#define foreach_lisp_cp_input_error \
@@ -3890,11 +3860,9 @@ process_map_reply (map_records_arg_t * a)
/* remove pending map request entry */
- /* *INDENT-OFF* */
clib_fifo_foreach (noncep, pmr->nonces, ({
hash_unset(lcm->pending_map_requests_by_nonce, noncep[0]);
}));
- /* *INDENT-ON* */
clib_fifo_free (pmr->nonces);
pool_put (lcm->pending_map_requests_pool, pmr);
@@ -4063,12 +4031,10 @@ map_record_args_get ()
map_records_arg_t *rec;
/* Cleanup first */
- /* *INDENT-OFF* */
pool_foreach (rec, lcm->map_records_args_pool[vlib_get_thread_index()]) {
if (rec->is_free)
map_records_arg_free (rec);
}
- /* *INDENT-ON* */
pool_get (lcm->map_records_args_pool[vlib_get_thread_index ()], rec);
return rec;
@@ -4276,6 +4242,11 @@ process_map_request (vlib_main_t * vm, vlib_node_runtime_t * node,
rloc_probe_recv++;
clib_memset (&m, 0, sizeof (m));
u32 mi = gid_dictionary_lookup (&lcm->mapping_index_by_gid, &dst);
+ if (GID_LOOKUP_MISS == mi)
+ {
+ clib_warning ("Cannot find mapping index by gid!");
+ continue;
+ }
// TODO: select best locator; for now use the first one
dst_loc = &gid_address_ip (&itr_rlocs[0]);
@@ -4443,7 +4414,6 @@ lisp_cp_input (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lisp_cp_input_node) = {
.function = lisp_cp_input,
.name = "lisp-cp-input",
@@ -4460,7 +4430,6 @@ VLIB_REGISTER_NODE (lisp_cp_input_node) = {
[LISP_CP_INPUT_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
clib_error_t *
lisp_cp_init (vlib_main_t * vm)
@@ -4548,13 +4517,11 @@ vnet_lisp_get_stats (void)
lisp_stats_key_t *key;
u32 index;
- /* *INDENT-OFF* */
hash_foreach_mem (key, index, lgm->lisp_stats_index_by_key,
{
if (lisp_stats_api_fill (lcm, lgm, &stat, key, index))
vec_add1 (stats, stat);
});
- /* *INDENT-ON* */
return stats;
}
@@ -4650,7 +4617,6 @@ remove_dead_pending_map_requests (lisp_cp_main_t * lcm)
pending_map_request_t *pmr;
u32 *to_be_removed = 0, *pmr_index;
- /* *INDENT-OFF* */
pool_foreach (pmr, lcm->pending_map_requests_pool)
{
if (pmr->to_be_removed)
@@ -4662,7 +4628,6 @@ remove_dead_pending_map_requests (lisp_cp_main_t * lcm)
vec_add1 (to_be_removed, pmr - lcm->pending_map_requests_pool);
}
}
- /* *INDENT-ON* */
vec_foreach (pmr_index, to_be_removed)
pool_put_index (lcm->pending_map_requests_pool, pmr_index[0]);
@@ -4748,7 +4713,6 @@ update_map_register (lisp_cp_main_t * lcm, f64 dt)
if (!lcm->is_enabled || !lcm->map_registering)
return;
- /* *INDENT-OFF* */
pool_foreach (pmr, lcm->pending_map_registers_pool)
{
if (!update_pending_map_register (pmr, dt, &del_all))
@@ -4758,7 +4722,6 @@ update_map_register (lisp_cp_main_t * lcm, f64 dt)
vec_add1 (to_be_removed, pmr - lcm->pending_map_registers_pool);
}
}
- /* *INDENT-ON* */
if (del_all)
{
@@ -4808,13 +4771,11 @@ send_map_resolver_service (vlib_main_t * vm,
/* currently no signals are expected - just wait for clock */
(void) vlib_process_get_events (vm, 0);
- /* *INDENT-OFF* */
pool_foreach (pmr, lcm->pending_map_requests_pool)
{
if (!pmr->to_be_removed)
update_pending_request (pmr, period);
}
- /* *INDENT-ON* */
remove_dead_pending_map_requests (lcm);
@@ -4830,7 +4791,7 @@ send_map_resolver_service (vlib_main_t * vm,
{
process_expired_mapping (lcm, mi[0]);
}
- _vec_len (expired) = 0;
+ vec_set_len (expired, 0);
}
}
diff --git a/src/plugins/lisp/lisp-cp/control.h b/src/plugins/lisp/lisp-cp/control.h
index 524f5028465..e65ceafd431 100644
--- a/src/plugins/lisp/lisp-cp/control.h
+++ b/src/plugins/lisp/lisp-cp/control.h
@@ -299,7 +299,7 @@ extern vlib_node_registration_t lisp_cp_input_node;
extern vlib_node_registration_t lisp_cp_lookup_ip4_node;
extern vlib_node_registration_t lisp_cp_lookup_ip6_node;
-clib_error_t *lisp_cp_init ();
+clib_error_t *lisp_cp_init (vlib_main_t *);
always_inline lisp_cp_main_t *
vnet_lisp_cp_get_main ()
diff --git a/src/plugins/lisp/lisp-cp/gid_dictionary.c b/src/plugins/lisp/lisp-cp/gid_dictionary.c
index a0cc9c09e3c..995678ceadc 100644
--- a/src/plugins/lisp/lisp-cp/gid_dictionary.c
+++ b/src/plugins/lisp/lisp-cp/gid_dictionary.c
@@ -488,13 +488,11 @@ ip4_compute_prefix_lengths_in_search_order (gid_ip4_table_t * db)
vec_reset_length (db->ip4_prefix_lengths_in_search_order);
/* Note: bitmap reversed so this is in fact a longest prefix match */
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, db->ip4_non_empty_dst_address_length_bitmap)
{
int dst_address_length = 32 - i;
vec_add1 (db->ip4_prefix_lengths_in_search_order, dst_address_length);
}
- /* *INDENT-ON* */
}
@@ -671,13 +669,11 @@ ip6_compute_prefix_lengths_in_search_order (gid_ip6_table_t * db)
vec_reset_length (db->ip6_prefix_lengths_in_search_order);
/* Note: bitmap reversed so this is in fact a longest prefix match */
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, db->ip6_non_empty_dst_address_length_bitmap)
{
int dst_address_length = 128 - i;
vec_add1 (db->ip6_prefix_lengths_in_search_order, dst_address_length);
}
- /* *INDENT-ON* */
}
static u32
diff --git a/src/plugins/lisp/lisp-cp/lisp_api.c b/src/plugins/lisp/lisp-cp/lisp_api.c
index d8f889a24fa..37267635d85 100644
--- a/src/plugins/lisp/lisp-cp/lisp_api.c
+++ b/src/plugins/lisp/lisp-cp/lisp_api.c
@@ -109,12 +109,10 @@ vl_api_lisp_add_del_locator_set_t_handler (vl_api_lisp_add_del_locator_set_t *
vec_free (locator_name);
vec_free (a->locators);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_LISP_ADD_DEL_LOCATOR_SET_REPLY,
({
rmp->ls_index = clib_host_to_net_u32 (ls_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -287,12 +285,10 @@ static void
int rv = 0;
vl_api_show_lisp_map_request_mode_reply_t *rmp;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_SHOW_LISP_MAP_REQUEST_MODE_REPLY,
({
rmp->is_src_dst = vnet_lisp_get_map_request_mode ();
}));
- /* *INDENT-ON* */
}
static void
@@ -362,13 +358,11 @@ vl_api_show_lisp_use_petr_t_handler (vl_api_show_lisp_use_petr_t * mp)
}
}
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SHOW_LISP_USE_PETR_REPLY,
{
rmp->is_petr_enable = status;
ip_address_encode2 (&gid_address_ip (&addr), &rmp->ip_address);
});
- /* *INDENT-ON* */
}
static void
@@ -589,7 +583,6 @@ vl_api_lisp_locator_set_dump_t_handler (vl_api_lisp_locator_set_dump_t * mp)
return;
filter = mp->filter;
- /* *INDENT-OFF* */
pool_foreach (lsit, lcm->locator_set_pool)
{
if (filter && !((1 == filter && lsit->local) ||
@@ -600,7 +593,6 @@ vl_api_lisp_locator_set_dump_t_handler (vl_api_lisp_locator_set_dump_t * mp)
send_lisp_locator_set_details (lcm, lsit, reg, mp->context,
lsit - lcm->locator_set_pool);
}
- /* *INDENT-ON* */
}
static void
@@ -703,13 +695,11 @@ vl_api_lisp_eid_table_dump_t_handler (vl_api_lisp_eid_table_dump_t * mp)
}
else
{
- /* *INDENT-OFF* */
pool_foreach (mapit, lcm->mapping_pool)
{
send_lisp_eid_table_details(mapit, reg, mp->context,
mp->filter);
}
- /* *INDENT-ON* */
}
}
@@ -820,12 +810,10 @@ vl_api_lisp_eid_table_map_dump_t_handler (vl_api_lisp_eid_table_map_dump_t *
vni_table = lcm->table_id_by_vni;
}
- /* *INDENT-OFF* */
hash_foreach_pair (p, vni_table,
({
send_eid_table_map_pair (p, reg, mp->context);
}));
- /* *INDENT-ON* */
}
static void
@@ -868,12 +856,10 @@ static void
vl_api_show_lisp_rloc_probe_state_reply_t *rmp = 0;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SHOW_LISP_RLOC_PROBE_STATE_REPLY,
{
rmp->is_enabled = vnet_lisp_rloc_probe_state_get ();
});
- /* *INDENT-ON* */
}
static void
@@ -883,12 +869,10 @@ static void
vl_api_show_lisp_map_register_state_reply_t *rmp = 0;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SHOW_LISP_MAP_REGISTER_STATE_REPLY,
{
rmp->is_enabled = vnet_lisp_map_register_state_get ();
});
- /* *INDENT-ON* */
}
static void
@@ -903,13 +887,11 @@ vl_api_lisp_adjacencies_get_t_handler (vl_api_lisp_adjacencies_get_t * mp)
adjs = vnet_lisp_adjacencies_get_by_vni (vni);
size = vec_len (adjs) * sizeof (vl_api_lisp_adjacency_t);
- /* *INDENT-OFF* */
REPLY_MACRO4 (VL_API_LISP_ADJACENCIES_GET_REPLY, size,
{
rmp->count = clib_host_to_net_u32 (vec_len (adjs));
lisp_adjacency_copy (rmp->adjacencies, adjs);
});
- /* *INDENT-ON* */
vec_free (adjs);
}
@@ -927,7 +909,6 @@ vl_api_lisp_eid_table_vni_dump_t_handler (vl_api_lisp_eid_table_vni_dump_t *
if (!reg)
return;
- /* *INDENT-OFF* */
hash_foreach_pair (p, lcm->table_id_by_vni,
({
hash_set (vnis, p->key, 0);
@@ -942,7 +923,6 @@ vl_api_lisp_eid_table_vni_dump_t_handler (vl_api_lisp_eid_table_vni_dump_t *
({
send_eid_table_vni (p->key, reg, mp->context);
}));
- /* *INDENT-ON* */
hash_free (vnis);
}
@@ -953,13 +933,11 @@ vl_api_show_lisp_status_t_handler (vl_api_show_lisp_status_t * mp)
vl_api_show_lisp_status_reply_t *rmp = NULL;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_SHOW_LISP_STATUS_REPLY,
({
rmp->is_gpe_enabled = vnet_lisp_gpe_enable_disable_status ();
rmp->is_lisp_enabled = vnet_lisp_enable_disable_status ();
}));
- /* *INDENT-ON* */
}
static void
@@ -983,13 +961,11 @@ static void
tmp_str = format (0, "%s", loc_set->name);
}
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_LISP_GET_MAP_REQUEST_ITR_RLOCS_REPLY,
({
strncpy((char *) rmp->locator_set_name, (char *) tmp_str,
ARRAY_LEN(rmp->locator_set_name) - 1);
}));
- /* *INDENT-ON* */
vec_free (tmp_str);
}
@@ -1027,14 +1003,12 @@ vl_api_show_lisp_pitr_t_handler (vl_api_show_lisp_pitr_t * mp)
}
vec_add1 (tmp_str, 0);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_SHOW_LISP_PITR_REPLY,
({
rmp->is_enabled = lcm->flags & LISP_FLAG_PITR_MODE;
strncpy((char *) rmp->locator_set_name, (char *) tmp_str,
ARRAY_LEN(rmp->locator_set_name) - 1);
}));
- /* *INDENT-ON* */
}
/*
diff --git a/src/plugins/lisp/lisp-cp/lisp_cli.c b/src/plugins/lisp/lisp-cp/lisp_cli.c
index 569d695b033..6c15898216d 100644
--- a/src/plugins/lisp/lisp-cp/lisp_cli.c
+++ b/src/plugins/lisp/lisp-cp/lisp_cli.c
@@ -64,13 +64,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_show_adjacencies_command) = {
.path = "show lisp adjacencies",
.short_help = "show lisp adjacencies",
.function = lisp_show_adjacencies_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_add_del_map_server_command_fn (vlib_main_t * vm,
@@ -120,13 +118,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_add_del_map_server_command) = {
.path = "lisp map-server",
.short_help = "lisp map-server add|del <ip>",
.function = lisp_add_del_map_server_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -227,14 +223,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_add_del_local_eid_command) = {
.path = "lisp eid-table",
.short_help = "lisp eid-table add/del [vni <vni>] eid <eid> "
"locator-set <locator-set> [key <secret-key> key-id sha1|sha256 ]",
.function = lisp_add_del_local_eid_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_eid_table_map_command_fn (vlib_main_t * vm,
@@ -274,13 +268,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_eid_table_map_command) = {
.path = "lisp eid-table map",
.short_help = "lisp eid-table map [del] vni <vni> vrf <vrf> | bd <bdi>",
.function = lisp_eid_table_map_command_fn,
};
-/* *INDENT-ON* */
/**
* Handler for add/del remote mapping CLI.
@@ -418,7 +410,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_add_del_remote_mapping_command) = {
.path = "lisp remote-mapping",
.short_help = "lisp remote-mapping add|del [del-all] vni <vni> "
@@ -427,7 +418,6 @@ VLIB_CLI_COMMAND (lisp_add_del_remote_mapping_command) = {
"w <weight> [rloc <dst-locator> ... ]",
.function = lisp_add_del_remote_mapping_command_fn,
};
-/* *INDENT-ON* */
/**
* Handler for add/del adjacency CLI.
@@ -525,14 +515,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_add_del_adjacency_command) = {
.path = "lisp adjacency",
.short_help = "lisp adjacency add|del vni <vni> reid <remote-eid> "
"leid <local-eid>",
.function = lisp_add_del_adjacency_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -575,13 +563,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_map_request_mode_command) = {
.path = "lisp map-request mode",
.short_help = "lisp map-request mode dst-only|src-dst",
.function = lisp_map_request_mode_command_fn,
};
-/* *INDENT-ON* */
static u8 *
@@ -609,13 +595,11 @@ lisp_show_map_request_mode_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_show_map_request_mode_command) = {
.path = "show lisp map-request mode",
.short_help = "show lisp map-request mode",
.function = lisp_show_map_request_mode_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_map_resolvers_command_fn (vlib_main_t * vm,
@@ -632,13 +616,11 @@ lisp_show_map_resolvers_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_show_map_resolvers_command) = {
.path = "show lisp map-resolvers",
.short_help = "show lisp map-resolvers",
.function = lisp_show_map_resolvers_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -691,13 +673,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_pitr_set_locator_set_command) = {
.path = "lisp pitr",
.short_help = "lisp pitr [disable] ls <locator-set-name>",
.function = lisp_pitr_set_locator_set_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_pitr_command_fn (vlib_main_t * vm,
@@ -744,13 +724,11 @@ lisp_show_pitr_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_show_pitr_command) = {
.path = "show lisp pitr",
.short_help = "Show pitr",
.function = lisp_show_pitr_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_eid_entry (u8 * s, va_list * args)
@@ -840,7 +818,6 @@ lisp_show_eid_table_command_fn (vlib_main_t * vm,
if (print_all)
{
- /* *INDENT-OFF* */
pool_foreach (mapit, lcm->mapping_pool)
{
if (mapit->pitr_set)
@@ -856,7 +833,6 @@ lisp_show_eid_table_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%U", format_eid_entry, lcm->vnet_main,
lcm, mapit, ls);
}
- /* *INDENT-ON* */
}
else
{
@@ -884,13 +860,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_cp_show_eid_table_command) = {
.path = "show lisp eid-table",
.short_help = "show lisp eid-table [local|remote|eid <eid>]",
.function = lisp_show_eid_table_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -906,13 +880,11 @@ lisp_enable_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_cp_enable_command) = {
.path = "lisp enable",
.short_help = "lisp enable",
.function = lisp_enable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_disable_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -927,13 +899,11 @@ lisp_disable_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_cp_disable_command) = {
.path = "lisp disable",
.short_help = "lisp disable",
.function = lisp_disable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_map_register_enable_disable_command_fn (vlib_main_t * vm,
@@ -980,13 +950,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_map_register_enable_disable_command) = {
.path = "lisp map-register",
.short_help = "lisp map-register [enable|disable]",
.function = lisp_map_register_enable_disable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_rloc_probe_enable_disable_command_fn (vlib_main_t * vm,
@@ -1033,13 +1001,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_rloc_probe_enable_disable_command) = {
.path = "lisp rloc-probe",
.short_help = "lisp rloc-probe [enable|disable]",
.function = lisp_rloc_probe_enable_disable_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_lisp_status (u8 * s, va_list * args)
@@ -1060,13 +1026,11 @@ lisp_show_status_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_show_status_command) = {
.path = "show lisp status",
.short_help = "show lisp status",
.function = lisp_show_status_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_eid_table_map_command_fn (vlib_main_t * vm,
@@ -1112,12 +1076,10 @@ lisp_show_eid_table_map_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%=10s%=10s", "VNI", is_l2 ? "BD" : "VRF");
- /* *INDENT-OFF* */
hash_foreach_pair (p, vni_table,
({
vlib_cli_output (vm, "%=10d%=10d", p->key, p->value[0]);
}));
- /* *INDENT-ON* */
done:
unformat_free (line_input);
@@ -1125,13 +1087,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_show_eid_table_map_command) = {
.path = "show lisp eid-table map",
.short_help = "show lisp eid-table map l2|l3",
.function = lisp_show_eid_table_map_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -1201,14 +1161,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_cp_add_del_locator_set_command) = {
.path = "lisp locator-set",
.short_help = "lisp locator-set add/del <name> [iface <iface-name> "
"p <priority> w <weight>]",
.function = lisp_add_del_locator_set_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_add_del_locator_in_set_command_fn (vlib_main_t * vm,
@@ -1277,14 +1235,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_cp_add_del_locator_in_set_command) = {
.path = "lisp locator",
.short_help = "lisp locator add/del locator-set <name> iface <iface-name> "
"p <priority> w <weight>",
.function = lisp_add_del_locator_in_set_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_cp_show_locator_sets_command_fn (vlib_main_t * vm,
@@ -1299,7 +1255,6 @@ lisp_cp_show_locator_sets_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%s%=16s%=16s%=16s", "Locator-set", "Locator",
"Priority", "Weight");
- /* *INDENT-OFF* */
pool_foreach (lsit, lcm->locator_set_pool)
{
u8 * msg = 0;
@@ -1331,17 +1286,14 @@ lisp_cp_show_locator_sets_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%v", msg);
vec_free (msg);
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_cp_show_locator_sets_command) = {
.path = "show lisp locator-set",
.short_help = "Shows locator-sets",
.function = lisp_cp_show_locator_sets_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -1396,13 +1348,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_add_del_map_resolver_command) = {
.path = "lisp map-resolver",
.short_help = "lisp map-resolver add/del <ip_address>",
.function = lisp_add_del_map_resolver_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -1451,13 +1401,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_add_del_map_request_command) = {
.path = "lisp map-request itr-rlocs",
.short_help = "lisp map-request itr-rlocs add/del <locator_set_name>",
.function = lisp_add_del_mreq_itr_rlocs_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_mreq_itr_rlocs_command_fn (vlib_main_t * vm,
@@ -1481,13 +1429,11 @@ lisp_show_mreq_itr_rlocs_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_show_map_request_command) = {
.path = "show lisp map-request itr-rlocs",
.short_help = "Shows map-request itr-rlocs",
.function = lisp_show_mreq_itr_rlocs_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_use_petr_set_locator_set_command_fn (vlib_main_t * vm,
@@ -1534,7 +1480,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_use_petr_set_locator_set_command) = {
.path = "lisp use-petr",
.short_help = "lisp use-petr [disable] <petr-ip>",
@@ -1586,13 +1531,11 @@ lisp_show_petr_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_show_petr_command) = {
.path = "show lisp petr",
.short_help = "Show petr",
.function = lisp_show_petr_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lisp/lisp-cp/lisp_cp_test.c b/src/plugins/lisp/lisp-cp/lisp_cp_test.c
index c0284d301a7..a6c3ca92ce0 100644
--- a/src/plugins/lisp/lisp-cp/lisp_cp_test.c
+++ b/src/plugins/lisp/lisp-cp/lisp_cp_test.c
@@ -26,7 +26,7 @@
/* define message IDs */
#include <lisp/lisp-cp/lisp.api_enum.h>
#include <lisp/lisp-cp/lisp.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
@@ -41,13 +41,11 @@ lisp_test_main_t lisp_test_main;
#define __plugin_msg_base lisp_test_main.msg_id_base
#include <vlibapi/vat_helper_macros.h>
-/* Macro to finish up custom dump fns */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
typedef struct
{
@@ -118,7 +116,6 @@ format_lisp_eid_vat (u8 * s, va_list * args)
-/* *INDENT-OFF* */
/** Used for parsing LISP eids */
typedef struct lisp_eid_vat_t_
{
@@ -132,7 +129,6 @@ typedef struct lisp_eid_vat_t_
/**< type of eid */
u8 type;
} __clib_packed lisp_eid_vat_t;
-/* *INDENT-ON* */
static uword
unformat_lisp_eid_vat (unformat_input_t * input, va_list * args)
diff --git a/src/plugins/lisp/lisp-cp/lisp_msg_serdes.c b/src/plugins/lisp/lisp-cp/lisp_msg_serdes.c
index 14d90982d4f..509462d8e23 100644
--- a/src/plugins/lisp/lisp-cp/lisp_msg_serdes.c
+++ b/src/plugins/lisp/lisp-cp/lisp_msg_serdes.c
@@ -264,9 +264,14 @@ lisp_msg_parse_addr (vlib_buffer_t * b, gid_address_t * eid)
u32 len;
clib_memset (eid, 0, sizeof (*eid));
len = gid_address_parse (vlib_buffer_get_current (b), eid);
- if (len != ~0)
- vlib_buffer_pull (b, len);
- return len;
+ if ((len != ~0) && vlib_buffer_pull (b, len))
+ {
+ return len;
+ }
+ else
+ {
+ return ~0;
+ }
}
u32
@@ -280,7 +285,10 @@ lisp_msg_parse_eid_rec (vlib_buffer_t * b, gid_address_t * eid)
return len;
gid_address_ippref_len (eid) = EID_REC_MLEN (h);
- vlib_buffer_pull (b, len + sizeof (eid_record_hdr_t));
+ if (!vlib_buffer_pull (b, len + sizeof (eid_record_hdr_t)))
+ {
+ return ~0;
+ }
return len + sizeof (eid_record_hdr_t);
}
diff --git a/src/plugins/lisp/lisp-cp/lisp_types.h b/src/plugins/lisp/lisp-cp/lisp_types.h
index 3f7d0302640..e92f8f80c70 100644
--- a/src/plugins/lisp/lisp-cp/lisp_types.h
+++ b/src/plugins/lisp/lisp-cp/lisp_types.h
@@ -198,7 +198,8 @@ u8 gid_address_len (gid_address_t * a);
void *gid_address_cast (gid_address_t * gid, gid_address_type_t type);
void gid_address_copy (gid_address_t * dst, gid_address_t * src);
u32 gid_address_parse (u8 * offset, gid_address_t * a);
-void gid_address_ip_set (gid_address_t * dst, void *src, u8 version);
+void gid_address_ip_set (gid_address_t *dst, void *src,
+ ip_address_family_t version);
#define gid_address_type(_a) (_a)->type
#define gid_address_ippref(_a) (_a)->ippref
@@ -238,7 +239,6 @@ void gid_address_ip_set (gid_address_t * dst, void *src, u8 version);
_(nsh) \
_(sd)
-/* *INDENT-OFF* */
#define _(_n) \
u16 _n ## _size_to_write (void * pref); \
u16 _n ## _write (u8 * p, void * pref); \
@@ -248,12 +248,11 @@ void _n ## _copy (void * dst , void * src);
foreach_gid_address_type_fcns
#undef _
-/* *INDENT-ON* */
always_inline u64
mac_to_u64 (u8 * m)
{
- return (*((u64 *) m) & 0xffffffffffff);
+ return (*(u32 *) m) | ((u64) (*(u16 *) (m + 4)) << 32);
}
typedef struct
diff --git a/src/plugins/lisp/lisp-cp/one_api.c b/src/plugins/lisp/lisp-cp/one_api.c
index 3969dd245ab..b5b523433f2 100644
--- a/src/plugins/lisp/lisp-cp/one_api.c
+++ b/src/plugins/lisp/lisp-cp/one_api.c
@@ -44,7 +44,6 @@ static u32 one_base_msg_id;
#define REPLY_DETAILS(t, body) \
do { \
vl_api_registration_t * reg; \
- rv = vl_msg_api_pd_handler (mp, rv); \
reg = vl_api_client_index_to_registration (mp->client_index); \
if (!reg) \
return; \
@@ -161,12 +160,10 @@ static void
int rv = 0;
u32 ttl = vnet_lisp_map_register_get_ttl ();
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SHOW_ONE_MAP_REGISTER_TTL_REPLY,
({
rmp->ttl = clib_host_to_net_u32 (ttl);
}));
- /* *INDENT-ON* */
}
static void
@@ -213,12 +210,10 @@ vl_api_one_add_del_locator_set_t_handler (vl_api_one_add_del_locator_set_t *
vec_free (locator_name);
vec_free (a->locators);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_ONE_ADD_DEL_LOCATOR_SET_REPLY,
({
rmp->ls_index = clib_host_to_net_u32 (ls_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -467,12 +462,10 @@ static void
int rv = 0;
vl_api_show_one_map_request_mode_reply_t *rmp;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_SHOW_ONE_MAP_REQUEST_MODE_REPLY,
({
rmp->mode = vnet_lisp_get_map_request_mode ();
}));
- /* *INDENT-ON* */
}
static void
@@ -560,7 +553,6 @@ vl_api_show_one_use_petr_t_handler (vl_api_show_one_use_petr_t * mp)
}
}
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SHOW_ONE_USE_PETR_REPLY,
{
rmp->status = status;
@@ -568,7 +560,6 @@ vl_api_show_one_use_petr_t_handler (vl_api_show_one_use_petr_t * mp)
ip_address_encode2 (ip, &rmp->ip_address);
});
- /* *INDENT-ON* */
}
static void
@@ -790,7 +781,6 @@ vl_api_one_locator_set_dump_t_handler (vl_api_one_locator_set_dump_t * mp)
return;
filter = mp->filter;
- /* *INDENT-OFF* */
pool_foreach (lsit, lcm->locator_set_pool)
{
if (filter && !((1 == filter && lsit->local) ||
@@ -801,7 +791,6 @@ vl_api_one_locator_set_dump_t_handler (vl_api_one_locator_set_dump_t * mp)
send_one_locator_set_details (lcm, lsit, reg, mp->context,
lsit - lcm->locator_set_pool);
}
- /* *INDENT-ON* */
}
static void
@@ -920,13 +909,11 @@ vl_api_one_eid_table_dump_t_handler (vl_api_one_eid_table_dump_t * mp)
}
else
{
- /* *INDENT-OFF* */
pool_foreach (mapit, lcm->mapping_pool)
{
send_one_eid_table_details(mapit, reg, mp->context,
mp->filter);
}
- /* *INDENT-ON* */
}
}
@@ -1035,12 +1022,10 @@ vl_api_one_eid_table_map_dump_t_handler (vl_api_one_eid_table_map_dump_t * mp)
vni_table = lcm->table_id_by_vni;
}
- /* *INDENT-OFF* */
hash_foreach_pair (p, vni_table,
({
send_eid_table_map_pair (p, reg, mp->context);
}));
- /* *INDENT-ON* */
}
static void
@@ -1113,12 +1098,10 @@ static void
vl_api_show_one_rloc_probe_state_reply_t *rmp = 0;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SHOW_ONE_RLOC_PROBE_STATE_REPLY,
{
rmp->is_enable = vnet_lisp_rloc_probe_state_get ();
});
- /* *INDENT-ON* */
}
static void
@@ -1128,12 +1111,10 @@ static void
vl_api_show_one_map_register_state_reply_t *rmp = 0;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SHOW_ONE_MAP_REGISTER_STATE_REPLY,
{
rmp->is_enable = vnet_lisp_map_register_state_get ();
});
- /* *INDENT-ON* */
}
static void
@@ -1148,13 +1129,11 @@ vl_api_one_adjacencies_get_t_handler (vl_api_one_adjacencies_get_t * mp)
adjs = vnet_lisp_adjacencies_get_by_vni (vni);
size = vec_len (adjs) * sizeof (vl_api_one_adjacency_t);
- /* *INDENT-OFF* */
REPLY_MACRO4 (VL_API_ONE_ADJACENCIES_GET_REPLY, size,
{
rmp->count = clib_host_to_net_u32 (vec_len (adjs));
one_adjacency_copy (rmp->adjacencies, adjs);
});
- /* *INDENT-ON* */
vec_free (adjs);
}
@@ -1171,7 +1150,6 @@ vl_api_one_eid_table_vni_dump_t_handler (vl_api_one_eid_table_vni_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
hash_foreach_pair (p, lcm->table_id_by_vni,
({
hash_set (vnis, p->key, 0);
@@ -1186,7 +1164,6 @@ vl_api_one_eid_table_vni_dump_t_handler (vl_api_one_eid_table_vni_dump_t * mp)
({
send_eid_table_vni (p->key, reg, mp->context);
}));
- /* *INDENT-ON* */
hash_free (vnis);
}
@@ -1197,13 +1174,11 @@ vl_api_show_one_status_t_handler (vl_api_show_one_status_t * mp)
vl_api_show_one_status_reply_t *rmp = NULL;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_SHOW_ONE_STATUS_REPLY,
({
rmp->gpe_status = vnet_lisp_gpe_enable_disable_status ();
rmp->feature_status = vnet_lisp_enable_disable_status ();
}));
- /* *INDENT-ON* */
}
static void
@@ -1227,13 +1202,11 @@ static void
tmp_str = format (0, "%s", loc_set->name);
}
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_ONE_GET_MAP_REQUEST_ITR_RLOCS_REPLY,
({
strncpy((char *) rmp->locator_set_name, (char *) tmp_str,
ARRAY_LEN(rmp->locator_set_name) - 1);
}));
- /* *INDENT-ON* */
vec_free (tmp_str);
}
@@ -1270,14 +1243,12 @@ vl_api_show_one_nsh_mapping_t_handler (vl_api_show_one_nsh_mapping_t * mp)
}
vec_add1 (tmp_str, 0);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_SHOW_ONE_NSH_MAPPING_REPLY,
({
rmp->is_set = is_set;
strncpy((char *) rmp->locator_set_name, (char *) tmp_str,
ARRAY_LEN(rmp->locator_set_name) - 1);
}));
- /* *INDENT-ON* */
}
static void
@@ -1313,14 +1284,12 @@ vl_api_show_one_pitr_t_handler (vl_api_show_one_pitr_t * mp)
}
vec_add1 (tmp_str, 0);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_SHOW_ONE_PITR_REPLY,
({
rmp->status = lcm->flags & LISP_FLAG_PITR_MODE;
strncpy((char *) rmp->locator_set_name, (char *) tmp_str,
ARRAY_LEN(rmp->locator_set_name) - 1);
}));
- /* *INDENT-ON* */
}
static void
@@ -1330,12 +1299,10 @@ static void
vl_api_show_one_stats_enable_disable_reply_t *rmp = NULL;
vnet_api_error_t rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SHOW_ONE_STATS_ENABLE_DISABLE_REPLY,
({
rmp->is_enable = vnet_lisp_stats_enable_disable_state ();
}));
- /* *INDENT-ON* */
}
static void
@@ -1363,12 +1330,10 @@ vl_api_one_stats_dump_t_handler (vl_api_one_stats_dump_t * mp)
{
vl_api_one_stats_details_t *rmp;
lisp_api_stats_t *stats, *stat;
- u8 rv = 0;
stats = vnet_lisp_get_stats ();
vec_foreach (stat, stats)
{
- /* *INDENT-OFF* */
REPLY_DETAILS (VL_API_ONE_STATS_DETAILS,
({
fid_to_api_eid (&stat->deid, &rmp->deid);
@@ -1381,7 +1346,6 @@ vl_api_one_stats_dump_t_handler (vl_api_one_stats_dump_t * mp)
rmp->pkt_count = clib_host_to_net_u32 (stat->counters.packets);
rmp->bytes = clib_host_to_net_u32 (stat->counters.bytes);
}));
- /* *INDENT-ON* */
}
}
@@ -1441,7 +1405,6 @@ vl_api_one_ndp_bd_get_t_handler (vl_api_one_ndp_bd_get_t * mp)
u32 *bds = vnet_lisp_ndp_bds_get ();
u32 size = hash_elts (bds) * sizeof (u32);
- /* *INDENT-OFF* */
REPLY_MACRO4 (VL_API_ONE_NDP_BD_GET_REPLY, size,
{
rmp->count = clib_host_to_net_u32 (hash_elts (bds));
@@ -1450,7 +1413,6 @@ vl_api_one_ndp_bd_get_t_handler (vl_api_one_ndp_bd_get_t * mp)
rmp->bridge_domains[i++] = clib_host_to_net_u32 (p->key);
}));
});
- /* *INDENT-ON* */
hash_free (bds);
}
@@ -1466,7 +1428,6 @@ vl_api_one_l2_arp_bd_get_t_handler (vl_api_one_l2_arp_bd_get_t * mp)
u32 *bds = vnet_lisp_l2_arp_bds_get ();
u32 size = hash_elts (bds) * sizeof (u32);
- /* *INDENT-OFF* */
REPLY_MACRO4 (VL_API_ONE_L2_ARP_BD_GET_REPLY, size,
{
rmp->count = clib_host_to_net_u32 (hash_elts (bds));
@@ -1475,7 +1436,6 @@ vl_api_one_l2_arp_bd_get_t_handler (vl_api_one_l2_arp_bd_get_t * mp)
rmp->bridge_domains[i++] = clib_host_to_net_u32 (p->key);
}));
});
- /* *INDENT-ON* */
hash_free (bds);
}
@@ -1493,7 +1453,6 @@ vl_api_one_l2_arp_entries_get_t_handler (vl_api_one_l2_arp_entries_get_t * mp)
entries = vnet_lisp_l2_arp_entries_get_by_bd (bd);
u32 size = vec_len (entries) * sizeof (vl_api_one_l2_arp_entry_t);
- /* *INDENT-OFF* */
REPLY_MACRO4 (VL_API_ONE_L2_ARP_ENTRIES_GET_REPLY, size,
{
rmp->count = clib_host_to_net_u32 (vec_len (entries));
@@ -1504,7 +1463,6 @@ vl_api_one_l2_arp_entries_get_t_handler (vl_api_one_l2_arp_entries_get_t * mp)
i++;
}
});
- /* *INDENT-ON* */
vec_free (entries);
}
@@ -1530,12 +1488,10 @@ static void
u32 value = vnet_lisp_map_register_fallback_threshold_get ();
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SHOW_ONE_MAP_REGISTER_FALLBACK_THRESHOLD_REPLY,
({
rmp->value = clib_host_to_net_u32 (value);
}));
- /* *INDENT-ON* */
}
static void
@@ -1558,12 +1514,10 @@ static void
int rv = 0;
u8 proto = (u8) vnet_lisp_get_transport_protocol ();
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_ONE_GET_TRANSPORT_PROTOCOL_REPLY,
({
rmp->protocol = proto;
}));
- /* *INDENT-ON* */
}
static void
@@ -1579,7 +1533,6 @@ vl_api_one_ndp_entries_get_t_handler (vl_api_one_ndp_entries_get_t * mp)
entries = vnet_lisp_ndp_entries_get_by_bd (bd);
u32 size = vec_len (entries) * sizeof (vl_api_one_ndp_entry_t);
- /* *INDENT-OFF* */
REPLY_MACRO4 (VL_API_ONE_NDP_ENTRIES_GET_REPLY, size,
{
rmp->count = clib_host_to_net_u32 (vec_len (entries));
@@ -1590,7 +1543,6 @@ vl_api_one_ndp_entries_get_t_handler (vl_api_one_ndp_entries_get_t * mp)
i++;
}
});
- /* *INDENT-ON* */
vec_free (entries);
}
@@ -1611,12 +1563,10 @@ vl_api_one_show_xtr_mode_t_handler (vl_api_one_show_xtr_mode_t * mp)
vl_api_one_show_xtr_mode_reply_t *rmp = 0;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_ONE_SHOW_XTR_MODE_REPLY,
{
rmp->is_enable = vnet_lisp_get_xtr_mode ();
});
- /* *INDENT-ON* */
}
static void
@@ -1635,12 +1585,10 @@ vl_api_one_show_pitr_mode_t_handler (vl_api_one_show_pitr_mode_t * mp)
vl_api_one_show_pitr_mode_reply_t *rmp = 0;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_ONE_SHOW_PITR_MODE_REPLY,
{
rmp->is_enable = vnet_lisp_get_pitr_mode ();
});
- /* *INDENT-ON* */
}
static void
@@ -1659,12 +1607,10 @@ vl_api_one_show_petr_mode_t_handler (vl_api_one_show_petr_mode_t * mp)
vl_api_one_show_petr_mode_reply_t *rmp = 0;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_ONE_SHOW_PETR_MODE_REPLY,
{
rmp->is_enable = vnet_lisp_get_petr_mode ();
});
- /* *INDENT-ON* */
}
/*
@@ -1689,12 +1635,10 @@ VLIB_API_INIT_FUNCTION (one_api_hookup);
#include <vlib/unix/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Locator ID Separation Protocol (LISP)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lisp/lisp-cp/one_cli.c b/src/plugins/lisp/lisp-cp/one_cli.c
index b85fea16ea7..8658d5fafd7 100644
--- a/src/plugins/lisp/lisp-cp/one_cli.c
+++ b/src/plugins/lisp/lisp-cp/one_cli.c
@@ -62,13 +62,11 @@ lisp_show_adjacencies_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_adjacencies_command) = {
.path = "show one adjacencies",
.short_help = "show one adjacencies",
.function = lisp_show_adjacencies_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_add_del_map_server_command_fn (vlib_main_t * vm,
@@ -116,13 +114,11 @@ lisp_add_del_map_server_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_add_del_map_server_command) = {
.path = "one map-server",
.short_help = "one map-server add|del <ip>",
.function = lisp_add_del_map_server_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -224,14 +220,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_add_del_local_eid_command) = {
.path = "one eid-table",
.short_help = "one eid-table add/del [vni <vni>] eid <eid> "
"locator-set <locator-set> [key <secret-key> key-id sha1|sha256 ]",
.function = lisp_add_del_local_eid_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_eid_table_map_command_fn (vlib_main_t * vm,
@@ -271,13 +265,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_eid_table_map_command) = {
.path = "one eid-table map",
.short_help = "one eid-table map [del] vni <vni> vrf <vrf> | bd <bdi>",
.function = lisp_eid_table_map_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_add_del_ndp_entry_command_fn (vlib_main_t * vm,
@@ -335,13 +327,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_add_del_ndp_entry_command) = {
.path = "one ndp",
.short_help = "one ndp [del] bd <bd> mac <mac> ip <ipv6>",
.function = lisp_add_del_ndp_entry_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_add_del_l2_arp_entry_command_fn (vlib_main_t * vm,
@@ -399,13 +389,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_add_del_l2_arp_entry_command) = {
.path = "one l2 arp",
.short_help = "one l2 arp [del] bd <bd> mac <mac> ip <ipv4>",
.function = lisp_add_del_l2_arp_entry_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_l2_arp_entries_command_fn (vlib_main_t * vm,
@@ -416,7 +404,6 @@ lisp_show_l2_arp_entries_command_fn (vlib_main_t * vm,
lisp_api_l2_arp_entry_t *entries, *e;
hash_pair_t *p;
- /* *INDENT-OFF* */
hash_foreach_pair (p, ht,
({
entries = vnet_lisp_l2_arp_entries_get_by_bd (p->key);
@@ -429,19 +416,16 @@ lisp_show_l2_arp_entries_command_fn (vlib_main_t * vm,
}
vec_free (entries);
}));
- /* *INDENT-ON* */
hash_free (ht);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_l2_arp_entries_command) = {
.path = "show one l2 arp entries",
.short_help = "Show ONE L2 ARP entries",
.function = lisp_show_l2_arp_entries_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_ndp_entries_command_fn (vlib_main_t * vm,
@@ -452,7 +436,6 @@ lisp_show_ndp_entries_command_fn (vlib_main_t * vm,
lisp_api_ndp_entry_t *entries, *e;
hash_pair_t *p;
- /* *INDENT-OFF* */
hash_foreach_pair (p, ht,
({
entries = vnet_lisp_ndp_entries_get_by_bd (p->key);
@@ -465,19 +448,16 @@ lisp_show_ndp_entries_command_fn (vlib_main_t * vm,
}
vec_free (entries);
}));
- /* *INDENT-ON* */
hash_free (ht);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_ndp_entries_command) = {
.path = "show one ndp entries",
.short_help = "Show ONE NDP entries",
.function = lisp_show_ndp_entries_command_fn,
};
-/* *INDENT-ON* */
/**
* Handler for add/del remote mapping CLI.
@@ -613,7 +593,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_add_del_remote_mapping_command) = {
.path = "one remote-mapping",
.short_help =
@@ -623,7 +602,6 @@ VLIB_CLI_COMMAND (one_add_del_remote_mapping_command) = {
"[rloc <dst-locator> ... ]",
.function = lisp_add_del_remote_mapping_command_fn,
};
-/* *INDENT-ON* */
/**
* Handler for add/del adjacency CLI.
@@ -721,14 +699,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_add_del_adjacency_command) = {
.path = "one adjacency",
.short_help = "one adjacency add|del vni <vni> reid <remote-eid> "
"leid <local-eid>",
.function = lisp_add_del_adjacency_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -770,13 +746,11 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_map_request_mode_command) = {
.path = "one map-request mode",
.short_help = "one map-request mode dst-only|src-dst",
.function = lisp_map_request_mode_command_fn,
};
-/* *INDENT-ON* */
static u8 *
@@ -804,13 +778,11 @@ lisp_show_map_request_mode_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_map_request_mode_command) = {
.path = "show one map-request mode",
.short_help = "show one map-request mode",
.function = lisp_show_map_request_mode_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_map_resolvers_command_fn (vlib_main_t * vm,
@@ -827,13 +799,11 @@ lisp_show_map_resolvers_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_map_resolvers_command) = {
.path = "show one map-resolvers",
.short_help = "show one map-resolvers",
.function = lisp_show_map_resolvers_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_nsh_set_locator_set_command_fn (vlib_main_t * vm,
@@ -884,13 +854,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_nsh_set_locator_set_command) = {
.path = "one nsh-mapping",
.short_help = "one nsh-mapping [del] ls <locator-set-name>",
.function = lisp_nsh_set_locator_set_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_map_register_fallback_threshold_show_command_fn (vlib_main_t * vm,
@@ -904,14 +872,12 @@ lisp_map_register_fallback_threshold_show_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_map_register_fallback_threshold_show_command) = {
.path = "show one map-register fallback-threshold",
.short_help = "show one map-register fallback-threshold",
.function = lisp_map_register_fallback_threshold_show_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_map_register_fallback_threshold_command_fn (vlib_main_t * vm,
@@ -949,13 +915,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_map_register_fallback_threshold_command) = {
.path = "one map-register fallback-threshold",
.short_help = "one map-register fallback-threshold <count>",
.function = lisp_map_register_fallback_threshold_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_pitr_set_locator_set_command_fn (vlib_main_t * vm,
@@ -1006,13 +970,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_pitr_set_locator_set_command) = {
.path = "one pitr",
.short_help = "one pitr [disable] ls <locator-set-name>",
.function = lisp_pitr_set_locator_set_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_pitr_command_fn (vlib_main_t * vm,
@@ -1059,13 +1021,11 @@ lisp_show_pitr_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_pitr_command) = {
.path = "show one pitr",
.short_help = "Show pitr",
.function = lisp_show_pitr_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_eid_entry (u8 * s, va_list * args)
@@ -1156,7 +1116,6 @@ lisp_show_eid_table_command_fn (vlib_main_t * vm,
if (print_all)
{
- /* *INDENT-OFF* */
pool_foreach (mapit, lcm->mapping_pool)
{
if (mapit->pitr_set || mapit->nsh_set)
@@ -1172,7 +1131,6 @@ lisp_show_eid_table_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%U", format_eid_entry, lcm->vnet_main,
lcm, mapit, ls);
}
- /* *INDENT-ON* */
}
else
{
@@ -1200,13 +1158,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_show_eid_table_command) = {
.path = "show one eid-table",
.short_help = "show one eid-table [local|remote|eid <eid>]",
.function = lisp_show_eid_table_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_enable_disable_pitr_mode_command_fn (vlib_main_t * vm,
@@ -1253,13 +1209,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_enable_disable_pitr_mode_command) = {
.path = "one pitr mode",
.short_help = "one pitr mode [enable|disable]",
.function = lisp_enable_disable_pitr_mode_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -1307,13 +1261,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_enable_disable_petr_mode_command) = {
.path = "one petr mode",
.short_help = "one petr mode [enable|disable]",
.function = lisp_enable_disable_petr_mode_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_enable_disable_xtr_mode_command_fn (vlib_main_t * vm,
@@ -1360,13 +1312,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_enable_disable_xtr_mode_command) = {
.path = "one xtr mode",
.short_help = "one xtr mode [enable|disable]",
.function = lisp_enable_disable_xtr_mode_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
one_enable_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -1381,13 +1331,11 @@ one_enable_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_enable_command) = {
.path = "one enable",
.short_help = "one enable",
.function = one_enable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
one_disable_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -1402,13 +1350,11 @@ one_disable_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_disable_command) = {
.path = "one disable",
.short_help = "one disable",
.function = one_disable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_map_register_set_ttl_command_fn (vlib_main_t * vm,
@@ -1449,13 +1395,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_map_register_set_ttl_command) = {
.path = "one map-register ttl",
.short_help = "one map-register ttl",
.function = lisp_map_register_set_ttl_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_map_register_show_ttl_command_fn (vlib_main_t * vm,
@@ -1468,14 +1412,12 @@ lisp_map_register_show_ttl_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_map_register_show_ttl_command) = {
.path = "show one map-register ttl",
.short_help = "show one map-register ttl",
.function = lisp_map_register_show_ttl_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_map_register_enable_disable_command_fn (vlib_main_t * vm,
@@ -1522,13 +1464,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_map_register_enable_disable_command) = {
.path = "one map-register",
.short_help = "one map-register [enable|disable]",
.function = lisp_map_register_enable_disable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_rloc_probe_enable_disable_command_fn (vlib_main_t * vm,
@@ -1575,13 +1515,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_rloc_probe_enable_disable_command) = {
.path = "one rloc-probe",
.short_help = "one rloc-probe [enable|disable]",
.function = lisp_rloc_probe_enable_disable_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_lisp_status (u8 * s, va_list * args)
@@ -1602,13 +1540,11 @@ lisp_show_status_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_status_command) = {
.path = "show one status",
.short_help = "show one status",
.function = lisp_show_status_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_eid_table_map_command_fn (vlib_main_t * vm,
@@ -1654,12 +1590,10 @@ lisp_show_eid_table_map_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%=10s%=10s", "VNI", is_l2 ? "BD" : "VRF");
- /* *INDENT-OFF* */
hash_foreach_pair (p, vni_table,
({
vlib_cli_output (vm, "%=10d%=10d", p->key, p->value[0]);
}));
- /* *INDENT-ON* */
done:
unformat_free (line_input);
@@ -1667,13 +1601,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_eid_table_map_command) = {
.path = "show one eid-table map",
.short_help = "show one eid-table map l2|l3",
.function = lisp_show_eid_table_map_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -1742,14 +1674,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_add_del_locator_set_command) = {
.path = "one locator-set",
.short_help = "one locator-set add/del <name> [iface <iface-name> "
"p <priority> w <weight>]",
.function = lisp_add_del_locator_set_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_add_del_locator_in_set_command_fn (vlib_main_t * vm,
@@ -1818,14 +1748,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_add_del_locator_in_set_command) = {
.path = "one locator",
.short_help = "one locator add/del locator-set <name> iface <iface-name> "
"p <priority> w <weight>",
.function = lisp_add_del_locator_in_set_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_cp_show_locator_sets_command_fn (vlib_main_t * vm,
@@ -1840,7 +1768,6 @@ lisp_cp_show_locator_sets_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%s%=16s%=16s%=16s", "Locator-set", "Locator",
"Priority", "Weight");
- /* *INDENT-OFF* */
pool_foreach (lsit, lcm->locator_set_pool)
{
u8 * msg = 0;
@@ -1872,17 +1799,14 @@ lisp_cp_show_locator_sets_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%v", msg);
vec_free (msg);
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_show_locator_sets_command) = {
.path = "show one locator-set",
.short_help = "Shows locator-sets",
.function = lisp_cp_show_locator_sets_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -1936,13 +1860,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_add_del_map_resolver_command) = {
.path = "one map-resolver",
.short_help = "one map-resolver add/del <ip_address>",
.function = lisp_add_del_map_resolver_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -1991,13 +1913,11 @@ done:
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_add_del_map_request_command) = {
.path = "one map-request itr-rlocs",
.short_help = "one map-request itr-rlocs add/del <locator_set_name>",
.function = lisp_add_del_mreq_itr_rlocs_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_mreq_itr_rlocs_command_fn (vlib_main_t * vm,
@@ -2021,13 +1941,11 @@ lisp_show_mreq_itr_rlocs_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_map_request_command) = {
.path = "show one map-request itr-rlocs",
.short_help = "Shows map-request itr-rlocs",
.function = lisp_show_mreq_itr_rlocs_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_use_petr_set_locator_set_command_fn (vlib_main_t * vm,
@@ -2073,7 +1991,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_use_petr_set_locator_set_command) = {
.path = "one use-petr",
.short_help = "one use-petr [disable] <petr-ip>",
@@ -2125,13 +2042,11 @@ lisp_show_petr_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_petr_command) = {
.path = "show one petr",
.short_help = "Show petr",
.function = lisp_show_petr_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_map_servers_command_fn (vlib_main_t * vm,
@@ -2148,13 +2063,11 @@ lisp_show_map_servers_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_map_servers_command) = {
.path = "show one map-servers",
.short_help = "show one map servers",
.function = lisp_show_map_servers_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_map_register_state_command_fn (vlib_main_t * vm,
@@ -2170,13 +2083,11 @@ lisp_show_map_register_state_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_map_register_state_command) = {
.path = "show one map-register state",
.short_help = "show one map-register state",
.function = lisp_show_map_register_state_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_rloc_probe_state_command_fn (vlib_main_t * vm,
@@ -2192,13 +2103,11 @@ lisp_show_rloc_probe_state_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_rloc_probe_state_command) = {
.path = "show one rloc state",
.short_help = "show one RLOC state",
.function = lisp_show_rloc_probe_state_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_stats_command_fn (vlib_main_t * vm,
@@ -2210,13 +2119,11 @@ lisp_show_stats_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_stats_command) = {
.path = "show one statistics status",
.short_help = "show ONE statistics enable/disable status",
.function = lisp_show_stats_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_stats_details_command_fn (vlib_main_t * vm,
@@ -2244,13 +2151,11 @@ lisp_show_stats_details_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_show_stats_details_command) = {
.path = "show one statistics details",
.short_help = "show ONE statistics",
.function = lisp_show_stats_details_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_stats_enable_disable_command_fn (vlib_main_t * vm,
@@ -2282,13 +2187,11 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_stats_enable_disable_command) = {
.path = "one statistics",
.short_help = "enable/disable ONE statistics collecting",
.function = lisp_stats_enable_disable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_stats_flush_command_fn (vlib_main_t * vm,
@@ -2299,13 +2202,11 @@ lisp_stats_flush_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_stats_flush_command) = {
.path = "one statistics flush",
.short_help = "Flush ONE statistics",
.function = lisp_stats_flush_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_show_one_modes_command_fn (vlib_main_t * vm,
@@ -2323,13 +2224,11 @@ lisp_show_one_modes_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (one_cp_show_one_modes_modes_command) = {
.path = "show one modes",
.short_help = "show one modes",
.function = lisp_show_one_modes_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lisp/lisp-cp/one_test.c b/src/plugins/lisp/lisp-cp/one_test.c
index 6966122b2b5..475b52de15e 100644
--- a/src/plugins/lisp/lisp-cp/one_test.c
+++ b/src/plugins/lisp/lisp-cp/one_test.c
@@ -26,7 +26,7 @@
/* define message IDs */
#include <lisp/lisp-cp/one.api_enum.h>
#include <lisp/lisp-cp/one.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
@@ -41,13 +41,11 @@ one_test_main_t one_test_main;
#define __plugin_msg_base one_test_main.msg_id_base
#include <vlibapi/vat_helper_macros.h>
-/* Macro to finish up custom dump fns */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
#define LISP_PING(_lm, mp_ping) \
if (!(_lm)->ping_id) \
@@ -673,7 +671,6 @@ vl_api_show_one_pitr_reply_t_handler (vl_api_show_one_pitr_reply_t * mp)
vam->result_ready = 1;
}
-/* *INDENT-OFF* */
/** Used for parsing LISP eids */
typedef CLIB_PACKED(struct{
union {
@@ -684,7 +681,6 @@ typedef CLIB_PACKED(struct{
u32 len; /**< prefix length if IP */
u8 type; /**< type of eid */
}) lisp_eid_vat_t;
-/* *INDENT-ON* */
static uword
unformat_lisp_eid_vat (unformat_input_t * input, va_list * args)
diff --git a/src/plugins/lisp/lisp-cp/packets.c b/src/plugins/lisp/lisp-cp/packets.c
index 3f4292b4841..6c36a550ab4 100644
--- a/src/plugins/lisp/lisp-cp/packets.c
+++ b/src/plugins/lisp/lisp-cp/packets.c
@@ -217,7 +217,6 @@ pkt_push_ecm_hdr (vlib_buffer_t * b)
return h;
}
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lisp/lisp-cp/packets.h b/src/plugins/lisp/lisp-cp/packets.h
index 68cd949e75d..c10fdb28c79 100644
--- a/src/plugins/lisp/lisp-cp/packets.h
+++ b/src/plugins/lisp/lisp-cp/packets.h
@@ -27,7 +27,6 @@ void *pkt_push_udp_and_ip (vlib_main_t * vm, vlib_buffer_t * b, u16 sp,
void *pkt_push_ecm_hdr (vlib_buffer_t * b);
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lisp/lisp-gpe/decap.c b/src/plugins/lisp/lisp-gpe/decap.c
index 59fd5646ee7..18e32675a32 100644
--- a/src/plugins/lisp/lisp-gpe/decap.c
+++ b/src/plugins/lisp/lisp-gpe/decap.c
@@ -456,7 +456,6 @@ static char *lisp_gpe_ip4_input_error_strings[] = {
#undef lisp_gpe_error
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lisp_gpe_ip4_input_node) = {
.function = lisp_gpe_ip4_input,
.name = "lisp-gpe-ip4-input",
@@ -476,9 +475,7 @@ VLIB_REGISTER_NODE (lisp_gpe_ip4_input_node) = {
.format_trace = format_lisp_gpe_rx_trace,
// $$$$ .unformat_buffer = unformat_lisp_gpe_header,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lisp_gpe_ip6_input_node) = {
.function = lisp_gpe_ip6_input,
.name = "lisp-gpe-ip6-input",
@@ -498,7 +495,6 @@ VLIB_REGISTER_NODE (lisp_gpe_ip6_input_node) = {
.format_trace = format_lisp_gpe_rx_trace,
// $$$$ .unformat_buffer = unformat_lisp_gpe_header,
};
-/* *INDENT-ON* */
/**
* Adds arc from lisp-gpe-input to nsh-input if nsh-input is available
@@ -556,7 +552,6 @@ static char *lisp_gpe_nsh_placeholder_error_strings[] = {
"lisp gpe placeholder nsh decap",
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (lisp_gpe_nsh_placeholder_input_node) = {
.function = lisp_gpe_nsh_placeholder_input,
.name = "lisp-gpe-nsh-placeholder-input",
@@ -571,7 +566,6 @@ VLIB_REGISTER_NODE (lisp_gpe_nsh_placeholder_input_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_add_placeholder_nsh_node_command_fn (vlib_main_t * vm,
@@ -586,12 +580,10 @@ lisp_add_placeholder_nsh_node_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_add_placeholder_nsh_node_command, static) = {
.path = "test one nsh add-placeholder-decap-node",
.function = lisp_add_placeholder_nsh_node_command_fn,
};
-/* *INDENT-ON* */
VLIB_INIT_FUNCTION (gpe_decap_init);
diff --git a/src/plugins/lisp/lisp-gpe/interface.c b/src/plugins/lisp/lisp-gpe/interface.c
index 1d2abaf3f90..ed2b08f9aaf 100644
--- a/src/plugins/lisp/lisp-gpe/interface.c
+++ b/src/plugins/lisp/lisp-gpe/interface.c
@@ -88,12 +88,10 @@ format_lisp_gpe_tx_trace (u8 * s, va_list * args)
*
* @return number of vectors in frame.
*/
-static uword
-lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
+VLIB_NODE_FN (lisp_tunnel_output)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
{
u32 n_left_from, next_index, *from, *to_next;
- lisp_gpe_main_t *lgm = &lisp_gpe_main;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
@@ -112,7 +110,6 @@ lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
const ip_adjacency_t *adj0;
const dpo_id_t *dpo0;
vlib_buffer_t *b0;
- u8 is_v4_0;
bi0 = from[0];
to_next[0] = bi0;
@@ -122,11 +119,7 @@ lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
-
- /* Fixup the checksum and len fields in the LISP tunnel encap
- * that was applied at the midchain node */
- is_v4_0 = is_v4_packet (vlib_buffer_get_current (b0));
- ip_udp_fixup_one (lgm->vlib_main, b0, is_v4_0);
+ b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
/* Follow the DPO on which the midchain is stacked */
adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
@@ -151,6 +144,13 @@ lisp_gpe_interface_tx (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
+VLIB_REGISTER_NODE (lisp_tunnel_output) = {
+ .name = "lisp-tunnel-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_lisp_gpe_tx_trace,
+ .sibling_of = "tunnel-output",
+};
+
static u8 *
format_lisp_gpe_name (u8 * s, va_list * args)
{
@@ -158,14 +158,10 @@ format_lisp_gpe_name (u8 * s, va_list * args)
return format (s, "lisp_gpe%d", dev_instance);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (lisp_gpe_device_class) = {
.name = "LISP_GPE",
.format_device_name = format_lisp_gpe_name,
- .format_tx_trace = format_lisp_gpe_tx_trace,
- .tx_function = lisp_gpe_interface_tx,
};
-/* *INDENT-ON* */
u8 *
format_lisp_gpe_header_with_length (u8 * s, va_list * args)
@@ -190,14 +186,12 @@ format_lisp_gpe_header_with_length (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (lisp_gpe_hw_class) = {
.name = "LISP_GPE",
.format_header = format_lisp_gpe_header_with_length,
.build_rewrite = lisp_gpe_build_rewrite,
.update_adjacency = lisp_gpe_update_adjacency,
};
-/* *INDENT-ON* */
typedef struct
@@ -302,14 +296,12 @@ format_l2_lisp_gpe_name (u8 * s, va_list * args)
return format (s, "l2_lisp_gpe%d", dev_instance);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (l2_lisp_gpe_device_class,static) = {
.name = "L2_LISP_GPE",
.format_device_name = format_l2_lisp_gpe_name,
.format_tx_trace = format_l2_lisp_gpe_tx_trace,
.tx_function = l2_lisp_gpe_interface_tx,
};
-/* *INDENT-ON* */
typedef struct
{
@@ -406,14 +398,12 @@ format_nsh_lisp_gpe_name (u8 * s, va_list * args)
return format (s, "nsh_lisp_gpe%d", dev_instance);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (nsh_lisp_gpe_device_class,static) = {
.name = "NSH_LISP_GPE",
.format_device_name = format_nsh_lisp_gpe_name,
.format_tx_trace = format_nsh_lisp_gpe_tx_trace,
.tx_function = nsh_lisp_gpe_interface_tx,
};
-/* *INDENT-ON* */
static vnet_hw_interface_t *
lisp_gpe_create_iface (lisp_gpe_main_t * lgm, u32 vni, u32 dp_table,
@@ -431,7 +421,7 @@ lisp_gpe_create_iface (lisp_gpe_main_t * lgm, u32 vni, u32 dp_table,
if (flen > 0)
{
hw_if_index = lgm->free_tunnel_hw_if_indices[flen - 1];
- _vec_len (lgm->free_tunnel_hw_if_indices) -= 1;
+ vec_dec_len (lgm->free_tunnel_hw_if_indices, 1);
hi = vnet_get_hw_interface (vnm, hw_if_index);
@@ -507,13 +497,11 @@ lisp_gpe_iface_set_table (u32 sw_if_index, u32 table_id)
fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, table_id,
FIB_SOURCE_LISP);
- vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index);
ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
ip4_sw_interface_enable_disable (sw_if_index, 1);
fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id,
FIB_SOURCE_LISP);
- vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
ip6_sw_interface_enable_disable (sw_if_index, 1);
}
@@ -928,13 +916,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (add_del_lisp_gpe_iface_command, static) = {
.path = "gpe iface",
.short_help = "gpe iface add/del vni <vni> vrf <vrf>",
.function = lisp_gpe_add_del_iface_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lisp/lisp-gpe/lisp_gpe.c b/src/plugins/lisp/lisp-gpe/lisp_gpe.c
index 1ae3131323c..7474d0fb6a5 100644
--- a/src/plugins/lisp/lisp-gpe/lisp_gpe.c
+++ b/src/plugins/lisp/lisp-gpe/lisp_gpe.c
@@ -169,7 +169,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_gpe_add_del_fwd_entry_command, static) = {
.path = "gpe entry",
.short_help = "gpe entry add/del vni <vni> vrf/bd <id> [leid <leid>]"
@@ -177,7 +176,6 @@ VLIB_CLI_COMMAND (lisp_gpe_add_del_fwd_entry_command, static) = {
"[negative action <action>]",
.function = lisp_gpe_add_del_fwd_entry_command_fn,
};
-/* *INDENT-ON* */
/** Check if LISP-GPE is enabled. */
u8
@@ -271,13 +269,11 @@ gpe_set_encap_mode_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (gpe_set_encap_mode_command, static) = {
.path = "gpe encap",
.short_help = "gpe encap [lisp|vxlan]",
.function = gpe_set_encap_mode_command_fn,
};
-/* *INDENT-ON* */
/** Format GPE encap mode. */
u8 *
@@ -307,13 +303,11 @@ gpe_show_encap_mode_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (gpe_show_encap_mode_command, static) = {
.path = "show gpe encap",
.short_help = "show GPE encapulation mode",
.function = gpe_show_encap_mode_command_fn,
};
-/* *INDENT-ON* */
/** CLI command to enable/disable LISP-GPE. */
static clib_error_t *
@@ -352,13 +346,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (enable_disable_lisp_gpe_command, static) = {
.path = "gpe",
.short_help = "gpe [enable|disable]",
.function = lisp_gpe_enable_disable_command_fn,
};
-/* *INDENT-ON* */
/** CLI command to show LISP-GPE interfaces. */
static clib_error_t *
@@ -371,31 +363,25 @@ lisp_show_iface_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%=10s%=12s", "vrf", "hw_if_index");
- /* *INDENT-OFF* */
hash_foreach_pair (p, lgm->l3_ifaces.hw_if_index_by_dp_table, ({
vlib_cli_output (vm, "%=10d%=10d", p->key, p->value[0]);
}));
- /* *INDENT-ON* */
if (0 != lgm->l2_ifaces.hw_if_index_by_dp_table)
{
vlib_cli_output (vm, "%=10s%=12s", "bd_id", "hw_if_index");
- /* *INDENT-OFF* */
hash_foreach_pair (p, lgm->l2_ifaces.hw_if_index_by_dp_table, ({
vlib_cli_output (vm, "%=10d%=10d", p->key, p->value[0]);
}));
- /* *INDENT-ON* */
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_show_iface_command) = {
.path = "show gpe interface",
.short_help = "show gpe interface",
.function = lisp_show_iface_command_fn,
};
-/* *INDENT-ON* */
/** CLI command to show GPE fwd native route path. */
static clib_error_t *
@@ -428,13 +414,11 @@ gpe_show_native_fwd_rpath_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (gpe_show_native_fwd_rpath_command) = {
.path = "show gpe native-forward",
.short_help = "show gpe native-forward",
.function = gpe_show_native_fwd_rpath_command_fn,
};
-/* *INDENT-ON* */
void
gpe_update_native_fwd_path (u8 ip_version)
@@ -578,14 +562,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (gpe_native_forward_command) = {
.path = "gpe native-forward",
.short_help = "gpe native-forward [del] via <nh-ip-addr> [iface] "
"[table <table>]",
.function = gpe_native_forward_command_fn,
};
-/* *INDENT-ON* */
/** Format LISP-GPE status. */
u8 *
@@ -709,13 +691,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_test_nsh_command, static) = {
.path = "test one nsh",
.short_help = "test gpe nsh pcap <path-to-pcap-file>",
.function = lisp_test_nsh_command_fn,
};
-/* *INDENT-ON* */
VLIB_INIT_FUNCTION (lisp_gpe_init);
diff --git a/src/plugins/lisp/lisp-gpe/lisp_gpe.h b/src/plugins/lisp/lisp-gpe/lisp_gpe.h
index 10dc4fe7aa7..d7e877124a1 100644
--- a/src/plugins/lisp/lisp-gpe/lisp_gpe.h
+++ b/src/plugins/lisp/lisp-gpe/lisp_gpe.h
@@ -36,22 +36,18 @@
#include <vppinfra/bihash_template.h>
/** IP4-UDP-LISP encap header */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip4_header_t ip4; /* 20 bytes */
udp_header_t udp; /* 8 bytes */
lisp_gpe_header_t lisp; /* 8 bytes */
}) ip4_udp_lisp_gpe_header_t;
-/* *INDENT-ON* */
/** IP6-UDP-LISP encap header */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip6_header_t ip6; /* 40 bytes */
udp_header_t udp; /* 8 bytes */
lisp_gpe_header_t lisp; /* 8 bytes */
}) ip6_udp_lisp_gpe_header_t;
-/* *INDENT-ON* */
#define foreach_lisp_gpe_ip_input_next \
_(DROP, "error-drop") \
diff --git a/src/plugins/lisp/lisp-gpe/lisp_gpe_adjacency.c b/src/plugins/lisp/lisp-gpe/lisp_gpe_adjacency.c
index 8d20412a1f2..562b3b5eafb 100644
--- a/src/plugins/lisp/lisp-gpe/lisp_gpe_adjacency.c
+++ b/src/plugins/lisp/lisp-gpe/lisp_gpe_adjacency.c
@@ -285,7 +285,8 @@ lisp_gpe_fixup (vlib_main_t * vm,
/* Fixup the checksum and len fields in the LISP tunnel encap
* that was applied at the midchain node */
- ip_udp_fixup_one (vm, b, is_v4_packet (vlib_buffer_get_current (b)));
+ ip_udp_fixup_one (vm, b, is_v4_packet (vlib_buffer_get_current (b)),
+ UDP_ENCAP_FIXUP_NONE);
}
/**
@@ -317,8 +318,6 @@ lisp_gpe_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
lgt = lisp_gpe_tunnel_get (ladj->tunnel_index);
linkt = adj_get_link_type (ai);
af = ADJ_FLAG_MIDCHAIN_IP_STACK;
- if (VNET_LINK_ETHERNET == linkt)
- af |= ADJ_FLAG_MIDCHAIN_NO_COUNT;
adj_nbr_midchain_update_rewrite
(ai, lisp_gpe_fixup, NULL, af,
@@ -558,7 +557,6 @@ lisp_gpe_adjacency_show (vlib_main_t * vm,
}
else
{
- /* *INDENT-OFF* */
pool_foreach (ladj, lisp_adj_pool)
{
vlib_cli_output (vm, "[%d] %U\n",
@@ -566,19 +564,16 @@ lisp_gpe_adjacency_show (vlib_main_t * vm,
format_lisp_gpe_adjacency, ladj,
LISP_GPE_ADJ_FORMAT_FLAG_NONE);
}
- /* *INDENT-ON* */
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_lisp_gpe_tunnel_command, static) =
{
.path = "show gpe adjacency",
.function = lisp_gpe_adjacency_show,
};
-/* *INDENT-ON* */
#define LISP_ADJ_NBR_DEFAULT_HASH_NUM_BUCKETS (256)
#define LISP_ADJ_NBR_DEFAULT_HASH_MEMORY_SIZE (1<<20)
diff --git a/src/plugins/lisp/lisp-gpe/lisp_gpe_api.c b/src/plugins/lisp/lisp-gpe/lisp_gpe_api.c
index df90ef54403..f77a6f4059f 100644
--- a/src/plugins/lisp/lisp-gpe/lisp_gpe_api.c
+++ b/src/plugins/lisp/lisp-gpe/lisp_gpe_api.c
@@ -212,7 +212,6 @@ vl_api_gpe_fwd_entry_vnis_get_t_handler (vl_api_gpe_fwd_entry_vnis_get_t * mp)
u32 *vnis = vnet_lisp_gpe_get_fwd_entry_vnis ();
u32 size = hash_elts (vnis) * sizeof (u32);
- /* *INDENT-OFF* */
REPLY_MACRO4 (VL_API_GPE_FWD_ENTRY_VNIS_GET_REPLY, size,
{
rmp->count = clib_host_to_net_u32 (hash_elts (vnis));
@@ -221,7 +220,6 @@ vl_api_gpe_fwd_entry_vnis_get_t_handler (vl_api_gpe_fwd_entry_vnis_get_t * mp)
rmp->vnis[i++] = clib_host_to_net_u32 (p->key);
}));
});
- /* *INDENT-ON* */
hash_free (vnis);
}
@@ -239,14 +237,12 @@ vl_api_gpe_fwd_entries_get_t_handler (vl_api_gpe_fwd_entries_get_t * mp)
e = vnet_lisp_gpe_fwd_entries_get_by_vni (mp->vni);
size = vec_len (e) * sizeof (vl_api_gpe_fwd_entry_t);
- /* *INDENT-OFF* */
REPLY_MACRO4 (VL_API_GPE_FWD_ENTRIES_GET_REPLY, size,
{
rmp->count = vec_len (e);
gpe_fwd_entries_copy (rmp->entries, e);
gpe_fwd_entries_get_reply_t_host_to_net (rmp);
});
- /* *INDENT-ON* */
vec_free (e);
}
@@ -294,12 +290,10 @@ vl_api_gpe_add_del_fwd_entry_t_handler (vl_api_gpe_add_del_fwd_entry_t * mp)
rv = vnet_lisp_gpe_add_del_fwd_entry (a, 0);
vec_free (pairs);
send_reply:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_GPE_ADD_DEL_FWD_ENTRY_REPLY,
{
rmp->fwd_entry_index = clib_host_to_net_u32 (a->fwd_entry_index);
});
- /* *INDENT-ON* */
}
static void
@@ -365,12 +359,10 @@ vl_api_gpe_get_encap_mode_t_handler (vl_api_gpe_get_encap_mode_t * mp)
vl_api_gpe_get_encap_mode_reply_t *rmp;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_GPE_GET_ENCAP_MODE_REPLY,
({
rmp->encap_mode = vnet_gpe_get_encap_mode ();
}));
- /* *INDENT-ON* */
}
static void
@@ -464,7 +456,6 @@ vl_api_gpe_native_fwd_rpaths_get_t_handler (vl_api_gpe_native_fwd_rpaths_get_t
size = vec_len (lgm->native_fwd_rpath[rpath_index])
* sizeof (vl_api_gpe_native_fwd_rpath_t);
- /* *INDENT-OFF* */
REPLY_MACRO4 (VL_API_GPE_NATIVE_FWD_RPATHS_GET_REPLY, size,
{
rmp->count = vec_len (lgm->native_fwd_rpath[rpath_index]);
@@ -472,7 +463,6 @@ vl_api_gpe_native_fwd_rpaths_get_t_handler (vl_api_gpe_native_fwd_rpaths_get_t
lgm->native_fwd_rpath[rpath_index]);
gpe_native_fwd_rpaths_get_reply_t_host_to_net (rmp);
});
- /* *INDENT-ON* */
}
/*
diff --git a/src/plugins/lisp/lisp-gpe/lisp_gpe_fwd_entry.c b/src/plugins/lisp/lisp-gpe/lisp_gpe_fwd_entry.c
index 5f196fb22e3..d0d86d58391 100644
--- a/src/plugins/lisp/lisp-gpe/lisp_gpe_fwd_entry.c
+++ b/src/plugins/lisp/lisp-gpe/lisp_gpe_fwd_entry.c
@@ -1356,7 +1356,6 @@ vnet_lisp_gpe_fwd_entry_flush (void)
lisp_gpe_main_t *lgm = &lisp_gpe_main;
lisp_gpe_fwd_entry_t *lfe;
- /* *INDENT-OFF* */
pool_foreach (lfe, lgm->lisp_fwd_entry_pool)
{
switch (fid_addr_type(&lfe->key->rmt))
@@ -1372,7 +1371,6 @@ vnet_lisp_gpe_fwd_entry_flush (void)
break;
}
}
- /* *INDENT-ON* */
}
static u8 *
@@ -1476,7 +1474,6 @@ lisp_gpe_fwd_entry_show (vlib_main_t * vm,
return (NULL);
}
- /* *INDENT-OFF* */
pool_foreach (lfe, lgm->lisp_fwd_entry_pool)
{
if ((vni == ~0) ||
@@ -1484,18 +1481,15 @@ lisp_gpe_fwd_entry_show (vlib_main_t * vm,
vlib_cli_output (vm, "%U", format_lisp_gpe_fwd_entry, lfe,
LISP_GPE_FWD_ENTRY_FORMAT_NONE);
}
- /* *INDENT-ON* */
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_gpe_fwd_entry_show_command, static) = {
.path = "show gpe entry",
.short_help = "show gpe entry vni <vni> vrf <vrf> [leid <leid>] reid <reid>",
.function = lisp_gpe_fwd_entry_show,
};
-/* *INDENT-ON* */
clib_error_t *
lisp_gpe_fwd_entry_init (vlib_main_t * vm)
@@ -1521,12 +1515,10 @@ vnet_lisp_gpe_get_fwd_entry_vnis (void)
lisp_gpe_fwd_entry_t *lfe;
u32 *vnis = 0;
- /* *INDENT-OFF* */
pool_foreach (lfe, lgm->lisp_fwd_entry_pool)
{
hash_set (vnis, lfe->key->vni, 0);
}
- /* *INDENT-ON* */
return vnis;
}
@@ -1538,7 +1530,6 @@ vnet_lisp_gpe_fwd_entries_get_by_vni (u32 vni)
lisp_gpe_fwd_entry_t *lfe;
lisp_api_gpe_fwd_entry_t *entries = 0, e;
- /* *INDENT-OFF* */
pool_foreach (lfe, lgm->lisp_fwd_entry_pool)
{
if (lfe->key->vni == vni)
@@ -1554,7 +1545,6 @@ vnet_lisp_gpe_fwd_entries_get_by_vni (u32 vni)
vec_add1 (entries, e);
}
}
- /* *INDENT-ON* */
return entries;
}
diff --git a/src/plugins/lisp/lisp-gpe/lisp_gpe_sub_interface.c b/src/plugins/lisp/lisp-gpe/lisp_gpe_sub_interface.c
index 9c48c0064ca..4ba46c8240f 100644
--- a/src/plugins/lisp/lisp-gpe/lisp_gpe_sub_interface.c
+++ b/src/plugins/lisp/lisp-gpe/lisp_gpe_sub_interface.c
@@ -93,14 +93,12 @@ lisp_gpe_sub_interface_set_table (u32 sw_if_index, u32 table_id)
FIB_SOURCE_LISP);
ASSERT (FIB_NODE_INDEX_INVALID != fib_index);
- vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index);
ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP6, table_id,
FIB_SOURCE_LISP);
ASSERT (FIB_NODE_INDEX_INVALID != fib_index);
- vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
}
@@ -170,6 +168,8 @@ lisp_gpe_sub_interface_find_or_create_and_lock (const ip_address_t * lrloc,
vnet_sw_interface_set_flags (vnet_get_main (),
l3s->sw_if_index,
VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ vnet_set_interface_l3_output_node (vlib_get_main (), l3s->sw_if_index,
+ (u8 *) "lisp-tunnel-output");
lisp_gpe_sub_interface_db_insert (l3s);
}
@@ -202,6 +202,7 @@ lisp_gpe_sub_interface_unlock (index_t l3si)
lisp_gpe_tenant_l3_iface_unlock (l3s->key->vni);
vnet_sw_interface_set_flags (vnet_get_main (), l3s->sw_if_index, 0);
+ vnet_reset_interface_l3_output_node (vlib_get_main (), l3s->sw_if_index);
vnet_delete_sub_interface (l3s->sw_if_index);
lisp_gpe_sub_interface_db_remove (l3s);
@@ -223,9 +224,7 @@ format_lisp_gpe_sub_interface (u8 * s, va_list * ap)
lisp_gpe_sub_interface_t *l3s = va_arg (*ap, lisp_gpe_sub_interface_t *);
vnet_main_t *vnm = vnet_get_main ();
- s = format (s, "%-16U",
- format_vnet_sw_interface_name,
- vnm, vnet_get_sw_interface (vnm, l3s->sw_if_index));
+ s = format (s, "%-16U", format_vnet_sw_if_index_name, vnm, l3s->sw_if_index);
s = format (s, "%=8d", l3s->key->vni);
s = format (s, "%=15d", l3s->sw_if_index);
s = format (s, "%U", format_ip_address, &l3s->key->local_rloc);
@@ -244,23 +243,19 @@ lisp_gpe_sub_interface_show (vlib_main_t * vm,
vlib_cli_output (vm, "%-16s%=8s%=15s%s", "Name", "VNI", "sw_if_index",
"local RLOC");
- /* *INDENT-OFF* */
pool_foreach (l3s, lisp_gpe_sub_interface_pool)
{
vlib_cli_output (vm, "%U", format_lisp_gpe_sub_interface, l3s);
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_gpe_sub_interface_command) = {
.path = "show gpe sub-interface",
.short_help = "show gpe sub-interface",
.function = lisp_gpe_sub_interface_show,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_gpe_sub_interface_module_init (vlib_main_t * vm)
diff --git a/src/plugins/lisp/lisp-gpe/lisp_gpe_tenant.c b/src/plugins/lisp/lisp-gpe/lisp_gpe_tenant.c
index b6173b273c0..450c611c5f7 100644
--- a/src/plugins/lisp/lisp-gpe/lisp_gpe_tenant.c
+++ b/src/plugins/lisp/lisp-gpe/lisp_gpe_tenant.c
@@ -262,13 +262,11 @@ lisp_gpe_tenant_flush (void)
{
lisp_gpe_tenant_t *lt;
- /* *INDENT-OFF* */
pool_foreach (lt, lisp_gpe_tenant_pool)
{
lisp_gpe_tenant_l2_iface_unlock(lt->lt_vni);
lisp_gpe_tenant_l3_iface_unlock(lt->lt_vni);
}
- /* *INDENT-ON* */
}
/**
@@ -305,23 +303,19 @@ lisp_gpe_tenant_show (vlib_main_t * vm,
{
lisp_gpe_tenant_t *lt;
- /* *INDENT-OFF* */
pool_foreach (lt, lisp_gpe_tenant_pool)
{
vlib_cli_output (vm, "%U", format_lisp_gpe_tenant, lt);
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_gpe_tenant_command) = {
.path = "show gpe tenant",
.short_help = "show gpe tenant",
.function = lisp_gpe_tenant_show,
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/lisp/lisp-gpe/lisp_gpe_test.c b/src/plugins/lisp/lisp-gpe/lisp_gpe_test.c
index 54f7713162a..10167b14975 100644
--- a/src/plugins/lisp/lisp-gpe/lisp_gpe_test.c
+++ b/src/plugins/lisp/lisp-gpe/lisp_gpe_test.c
@@ -26,7 +26,7 @@
/* define message IDs */
#include <lisp/lisp-gpe/lisp_gpe.api_enum.h>
#include <lisp/lisp-gpe/lisp_gpe.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
@@ -41,13 +41,11 @@ lisp_gpe_test_main_t lisp_gpe_test_main;
#define __plugin_msg_base lisp_gpe_test_main.msg_id_base
#include <vlibapi/vat_helper_macros.h>
-/* Macro to finish up custom dump fns */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
#define LISP_PING(_lm, mp_ping) \
if (!(_lm)->ping_id) \
@@ -258,7 +256,6 @@ end:
}
-/* *INDENT-OFF* */
/** Used for parsing LISP eids */
typedef CLIB_PACKED(struct{
union {
@@ -269,7 +266,6 @@ typedef CLIB_PACKED(struct{
u32 len; /**< prefix length if IP */
u8 type; /**< type of eid */
}) lisp_eid_vat_t;
-/* *INDENT-ON* */
static uword
unformat_lisp_eid_vat (unformat_input_t * input, va_list * args)
diff --git a/src/plugins/lisp/lisp-gpe/lisp_gpe_tunnel.c b/src/plugins/lisp/lisp-gpe/lisp_gpe_tunnel.c
index 14ee095d2de..8dca55c4315 100644
--- a/src/plugins/lisp/lisp-gpe/lisp_gpe_tunnel.c
+++ b/src/plugins/lisp/lisp-gpe/lisp_gpe_tunnel.c
@@ -253,24 +253,20 @@ show_lisp_gpe_tunnel_command_fn (vlib_main_t * vm,
}
else
{
- /* *INDENT-OFF* */
pool_foreach (lgt, lisp_gpe_tunnel_pool)
{
vlib_cli_output (vm, "%U", format_lisp_gpe_tunnel, lgt);
}
- /* *INDENT-ON* */
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_lisp_gpe_tunnel_command, static) =
{
.path = "show gpe tunnel",
.function = show_lisp_gpe_tunnel_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
lisp_gpe_tunnel_module_init (vlib_main_t * vm)
diff --git a/src/plugins/lisp/test/lisp_cp_test.c b/src/plugins/lisp/test/lisp_cp_test.c
index 228ff32d010..d1908566f8e 100644
--- a/src/plugins/lisp/test/lisp_cp_test.c
+++ b/src/plugins/lisp/test/lisp_cp_test.c
@@ -99,7 +99,6 @@ test_lisp_msg_push_ecm ()
/* clear ip checksum */
clib_memset ((u8 *) ih + 10, 0, 2);
- /* *INDENT-OFF* */
u8 expected_ip4_hdr[] = {
0x45, /* version; IHL */
0x00, /* services */
@@ -112,7 +111,6 @@ test_lisp_msg_push_ecm ()
0xd4, 0xc3, 0xb2, 0xa1, /* src IP */
0x63, 0x72, 0x81, 0x90, /* dst IP */
};
- /* *INDENT-ON* */
_assert (0 == memcmp (ih, expected_ip4_hdr, sizeof (expected_ip4_hdr)));
@@ -120,14 +118,12 @@ test_lisp_msg_push_ecm ()
/* clear udp checksum */
clib_memset ((u8 *) uh + 6, 0, 2);
- /* *INDENT-OFF* */
u8 expected_udp_hdr[] = {
0x00, 0x15, /* src port */
0x00, 0x14, /* dst port */
0x03, 0x8c, /* length */
0x00, 0x00, /* checksum */
};
- /* *INDENT-ON* */
_assert (0 == memcmp (uh, expected_udp_hdr, sizeof (expected_udp_hdr)));
@@ -149,7 +145,6 @@ test_lisp_msg_parse_mapping_record ()
b = clib_mem_alloc (buff_len);
clib_memset ((u8 *) b, 0, buff_len);
- /* *INDENT-OFF* */
u8 map_reply_records[] = {
/* 1. record */
0x01, 0x02, 0x03, 0x04, /* record TTL */
@@ -167,7 +162,6 @@ test_lisp_msg_parse_mapping_record ()
0x00, 0x01, /* Loc-AFI */
0xaa, 0xbb, 0xcc, 0xdd, /* Loator */
};
- /* *INDENT-ON* */
b->current_length = buff_len;
clib_memcpy (b->data, map_reply_records, sizeof (map_reply_records));
@@ -322,7 +316,6 @@ test_lisp_msg_put_mreq_with_lcaf ()
/* clear Nonce to simplify comparison */
clib_memset ((u8 *) h + 4, 0, 8);
- /* *INDENT-OFF* */
u8 expected_data[] =
{
0x10, 0x40, 0x00, 0x01, /* type; flags; IRC; REC count */
@@ -349,7 +342,6 @@ test_lisp_msg_put_mreq_with_lcaf ()
0x00, 0x01, /* EID-prefix-AFI */
0xf0, 0xde, 0xbc, 0x9a, /* EID-prefix */
};
- /* *INDENT-ON* */
_assert (0 == memcmp (expected_data, (u8 *) h, sizeof (expected_data)));
done:
@@ -377,7 +369,6 @@ test_lisp_msg_put_mreq ()
print_map_request (h);
- /* *INDENT-OFF* */
u8 expected_data[50] = {
0x10, 0x40, 0x01, 0x01, /* type; flags; IRC; REC count */
0x00, 0x00, 0x00, 0x00,
@@ -400,7 +391,6 @@ test_lisp_msg_put_mreq ()
0x00, 0x01, /* EID-prefix-AFI */
0xf0, 0xde, 0xbc, 0x9a, /* EID-prefix */
};
- /* *INDENT-ON* */
_assert (0 == memcmp (expected_data, (u8 *) h, sizeof (expected_data)));
@@ -415,7 +405,6 @@ build_test_map_records ()
{
mapping_t *records = 0;
- /* *INDENT-OFF* */
mapping_t r = {
.ttl = MAP_REGISTER_DEFAULT_TTL,
.eid = {
@@ -439,7 +428,6 @@ build_test_map_records ()
}
}
};
- /* *INDENT-ON* */
vec_add1 (r.locators, loc);
vec_add1 (records, r);
@@ -482,7 +470,6 @@ test_lisp_map_register ()
/* clear authentication data */
clib_memset ((u8 *) b->data + 16, 0, 20);
- /* *INDENT-OFF* */
u8 expected_data[] = {
0x30, 0x00, 0x01, 0x01, /* type; rsvd; want notify; REC count */
0x00, 0x00, 0x00, 0x00,
@@ -509,7 +496,6 @@ test_lisp_map_register ()
0x00, 0x04, 0x00, 0x01, /* flags, AFI = ipv4 */
0x66, 0x77, 0x88, 0x99, /* ipv4 locator address */
};
- /* *INDENT-ON* */
_assert (0 == memcmp (expected_data, b->data, sizeof (expected_data)));
done:
@@ -537,20 +523,17 @@ test_lisp_parse_map_reply ()
{
clib_error_t *error = 0;
- /* *INDENT-OFF* */
u8 map_reply_data[] =
{
0x00, 0x00, 0x00, 0x01, /* type; rsvd; mapping count */
0x00, 0x00, 0x00, 0x00,
};
- /* *INDENT-ON* */
vlib_buffer_t *b = create_buffer (map_reply_data, sizeof (map_reply_data));
map_records_arg_t *mrecs = parse_map_reply (b);
_assert (0 == mrecs);
clib_mem_free (b);
- /* *INDENT-OFF* */
u8 map_reply_data2[] =
{
0x00, 0x00, 0x00, 0x01, /* type; rsvd */
@@ -561,7 +544,6 @@ test_lisp_parse_map_reply ()
0x01, 0x02, 0x03, 0x04, /* record TTL */
0x01, /* locator count */
};
- /* *INDENT-ON* */
b = create_buffer (map_reply_data2, sizeof (map_reply_data2));
mrecs = parse_map_reply (b);
@@ -585,7 +567,6 @@ test_lisp_parse_lcaf ()
b = clib_mem_alloc (buff_len);
clib_memset ((u8 *) b, 0, buff_len);
- /* *INDENT-OFF* */
u8 map_reply_records[] =
{
/* 1. record */
@@ -644,7 +625,6 @@ test_lisp_parse_lcaf ()
0x00, 0x01, /* Loc-AFI */
0xaa, 0xbb, 0xcc, 0xdd, /* Loator */
};
- /* *INDENT-ON* */
b->current_length = buff_len;
memcpy (b->data, map_reply_records, sizeof (map_reply_records));
@@ -785,13 +765,11 @@ test_gid_parse_ip_pref ()
gid_address_t _gid_addr, *gid_addr = &_gid_addr;
gid_address_t _gid_addr_copy, *copy = &_gid_addr_copy;
- /* *INDENT-OFF* */
u8 data[] =
{
0x00, 0x01, /* AFI = IPv4 */
0x10, 0xbb, 0xcc, 0xdd, /* ipv4 address */
};
- /* *INDENT-ON* */
u32 len = gid_address_parse (data, gid_addr);
_assert (6 == len);
@@ -808,14 +786,12 @@ test_gid_parse_mac ()
gid_address_t _gid, *gid = &_gid;
gid_address_t _gid_copy, *gid_copy = &_gid_copy;
- /* *INDENT-OFF* */
u8 data[] =
{
0x40, 0x05, /* AFI = MAC address */
0x10, 0xbb, 0xcc, 0xdd, /* MAC */
0x77, 0x99,
};
- /* *INDENT-ON* */
u32 len = gid_address_parse (data, gid);
_assert (8 == len);
@@ -843,7 +819,6 @@ test_gid_write_nsh (void)
u16 len = gid_address_put (b, &g);
- /* *INDENT-OFF* */
u8 expected[] =
{
0x40, 0x03, 0x00, 0x00, /* AFI = LCAF*/
@@ -852,7 +827,6 @@ test_gid_write_nsh (void)
/* Service Path ID, Service index */
0x11, 0x22, 0x33, 0x42, /* SPI, SI */
};
- /* *INDENT-ON* */
_assert (sizeof (expected) == len);
_assert (0 == memcmp (expected, b, len));
@@ -871,7 +845,6 @@ test_gid_parse_nsh ()
clib_memset (gid_addr, 0, sizeof (gid_addr[0]));
clib_memset (copy, 0, sizeof (copy[0]));
- /* *INDENT-OFF* */
u8 data[] =
{
0x40, 0x03, 0x00, 0x00, /* AFI = LCAF*/
@@ -880,7 +853,6 @@ test_gid_parse_nsh ()
/* Service Path ID, Service index */
0x55, 0x99, 0x42, 0x09, /* SPI, SI */
};
- /* *INDENT-ON* */
u32 len = gid_address_parse (data, gid_addr);
_assert (sizeof (data) == len);
@@ -907,7 +879,6 @@ test_gid_parse_lcaf ()
clib_memset (gid_addr, 0, sizeof (gid_addr[0]));
clib_memset (gid_addr_copy, 0, sizeof (gid_addr_copy[0]));
- /* *INDENT-OFF* */
u8 data[] =
{
0x40, 0x03, /* AFI = LCAF*/
@@ -922,7 +893,6 @@ test_gid_parse_lcaf ()
0x00, 0x01, /* AFI = ipv4 */
0x10, 0xbb, 0xcc, 0xdd, /* ipv4 address */
};
- /* *INDENT-ON* */
u32 len = gid_address_parse (data, gid_addr);
_assert (18 == len);
@@ -951,7 +921,6 @@ test_gid_parse_lcaf_complex ()
clib_memset (gid_addr, 0, sizeof (gid_addr[0]));
clib_memset (gid_addr_copy, 0, sizeof (gid_addr_copy[0]));
- /* *INDENT-OFF* */
u8 data[] = {
0x40, 0x03, /* AFI = LCAF */
@@ -988,7 +957,6 @@ test_gid_parse_lcaf_complex ()
0x10, 0xbb, 0xcc, 0xdd,
0x10, 0xbb, 0xcc, 0xdd, /* ipv6 address */
};
- /* *INDENT-ON* */
u32 len = gid_address_parse (data, gid_addr);
_assert (54 == len);
@@ -1056,7 +1024,6 @@ test_write_mac_in_lcaf (void)
u16 len = gid_address_put (b, &g);
- /* *INDENT-OFF* */
u8 expected[] =
{
0x40, 0x03, /* AFI = LCAF */
@@ -1071,7 +1038,6 @@ test_write_mac_in_lcaf (void)
0x01, 0x02, 0x03, 0x04,
0x05, 0x06 /* MAC */
};
- /* *INDENT-ON* */
_assert (sizeof (expected) == len);
_assert (0 == memcmp (expected, b, len));
@@ -1096,14 +1062,12 @@ test_mac_address_write (void)
u16 len = gid_address_put (b, &g);
_assert (8 == len);
- /* *INDENT-OFF* */
u8 expected[] =
{
0x40, 0x05, /* AFI = MAC */
0x01, 0x02, 0x03, 0x04,
0x05, 0x06 /* MAC */
};
- /* *INDENT-ON* */
_assert (0 == memcmp (expected, b, len));
done:
@@ -1118,7 +1082,6 @@ test_src_dst_with_vni_serdes (void)
u8 *b = clib_mem_alloc (500);
clib_memset (b, 0, 500);
- /* *INDENT-OFF* */
fid_address_t src =
{
.type = FID_ADDR_IP_PREF,
@@ -1161,7 +1124,6 @@ test_src_dst_with_vni_serdes (void)
.vni_mask = 0x9
};
- /* *INDENT-ON* */
u16 size_to_put = gid_address_size_to_put (&g);
_assert (36 == size_to_put);
@@ -1170,7 +1132,6 @@ test_src_dst_with_vni_serdes (void)
u16 write_len = gid_address_put (b, &g);
_assert (size_to_put == write_len);
- /* *INDENT-OFF* */
u8 expected_data[] =
{
0x40, 0x03, 0x00, 0x00, /* AFI = LCAF, reserved1, flags */
@@ -1187,7 +1148,6 @@ test_src_dst_with_vni_serdes (void)
0x00, 0x01, /* AFI = ip4 */
0x09, 0x08, 0x00, 0x00, /* destination */
};
- /* *INDENT-ON* */
_assert (0 == memcmp (expected_data, b, sizeof (expected_data)));
@@ -1205,7 +1165,6 @@ test_src_dst_deser_bad_afi (void)
{
clib_error_t *error = 0;
- /* *INDENT-OFF* */
u8 expected_data[] =
{
0x40, 0x03, 0x00, 0x00, /* AFI = LCAF, reserved1, flags */
@@ -1220,7 +1179,6 @@ test_src_dst_deser_bad_afi (void)
0x10, 0x21, 0x32, 0x43,
0x54, 0x65, /* destination */
};
- /* *INDENT-ON* */
gid_address_t p;
_assert (~0 == gid_address_parse (expected_data, &p));
@@ -1265,7 +1223,6 @@ test_src_dst_serdes (void)
u16 write_len = gid_address_put (b, &g);
_assert (size_to_put == write_len);
- /* *INDENT-OFF* */
u8 expected_data[] =
{
0x40, 0x03, 0x00, 0x00, /* AFI = LCAF, reserved1, flags */
@@ -1280,7 +1237,6 @@ test_src_dst_serdes (void)
0x10, 0x21, 0x32, 0x43,
0x54, 0x65, /* destination */
};
- /* *INDENT-ON* */
_assert (0 == memcmp (expected_data, b, sizeof (expected_data)));
@@ -1320,7 +1276,6 @@ test_gid_address_write (void)
u16 write_len = gid_address_put (b, &g);
_assert (18 == write_len);
- /* *INDENT-OFF* */
u8 expected_gid_data[] =
{
0x40, 0x03, /* AFI = LCAF */
@@ -1334,7 +1289,6 @@ test_gid_address_write (void)
0x00, 0x01, /* AFI = IPv4 */
0xdd, 0xcc, 0xbb, 0xaa, /* ipv4 addr */
};
- /* *INDENT-ON* */
_assert (0 == memcmp (expected_gid_data, b, sizeof (expected_gid_data)));
done:
@@ -1413,25 +1367,21 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (lisp_cp_command, static) =
{
.path = "test lisp cp",
.short_help = "lisp cp internal unit tests",
.function = lisp_cp_test,
};
-/* *INDENT-ON* */
#include <vlib/unix/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Test Locator ID Separation Protocol (LISP)",
.default_disabled = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lldp/lldp.api b/src/plugins/lldp/lldp.api
index 6be060b0dd1..c5edee7cf39 100644
--- a/src/plugins/lldp/lldp.api
+++ b/src/plugins/lldp/lldp.api
@@ -56,3 +56,79 @@ autoreply define sw_interface_set_lldp
bool enable [default=true];
string port_desc[];
};
+
+/** \brief Dump lldp neighbors
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+autoendian define lldp_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 cursor;
+};
+
+autoendian define lldp_dump_reply
+{
+ u32 context;
+ i32 retval;
+ u32 cursor;
+};
+
+enum port_id_subtype
+{
+ PORT_ID_SUBTYPE_RESERVED = 0x00,
+ PORT_ID_SUBTYPE_INTF_ALIAS = 0x01,
+ PORT_ID_SUBTYPE_PORT_COMP = 0x02,
+ PORT_ID_SUBTYPE_MAC_ADDR = 0x03,
+ PORT_ID_SUBTYPE_NET_ADDR = 0x04,
+ PORT_ID_SUBTYPE_INTF_NAME = 0x05,
+ PORT_ID_SUBTYPE_AGENT_CIRCUIT_ID = 0x06,
+ PORT_ID_SUBTYPE_LOCAL = 0x07,
+};
+
+enum chassis_id_subtype
+{
+ CHASSIS_ID_SUBTYPE_RESERVED = 0x00,
+ CHASSIS_ID_SUBTYPE_CHASSIS_COMP = 0x01,
+ CHASSIS_ID_SUBTYPE_INTF_ALIAS = 0x02,
+ CHASSIS_ID_SUBTYPE_PORT_COMP = 0x03,
+ CHASSIS_ID_SUBTYPE_MAC_ADDR = 0x04,
+ CHASSIS_ID_SUBTYPE_NET_ADDR = 0x05,
+ CHASSIS_ID_SUBTYPE_INTF_NAME = 0x06,
+ CHASSIS_ID_SUBTYPE_LOCAL = 0x07,
+};
+
+/** \brief Details about neighbor
+ @param context - returned sender context, to match reply w/ request
+ @param sw_if_index - interface where neighbor was discovered
+ @param last_heard - last heard time
+ @param last_sent - last sent time
+ @param chassis_id - chassis id value
+ @param chassis_id_len - length for chassis id
+ @param port_id - port id value
+ @param port_id_len - length for port id
+ @param ttl - time to length for the neighbour
+ @param port_id_subtype - subtype for port_id
+ @param chassis_id_sybtype - sybtype for chassis_id
+*/
+autoendian define lldp_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ f64 last_heard;
+ f64 last_sent;
+ u8 chassis_id[64];
+ u8 chassis_id_len;
+ u8 port_id[64];
+ u8 port_id_len;
+ u16 ttl;
+ vl_api_port_id_subtype_t port_id_subtype;
+ vl_api_chassis_id_subtype_t chassis_id_subtype;
+ option status="in_progress";
+};
+
+service {
+ rpc lldp_dump returns lldp_dump_reply
+ stream lldp_details;
+};
diff --git a/src/plugins/lldp/lldp_api.c b/src/plugins/lldp/lldp_api.c
index 69eab6949c4..bb5d1cbb5e8 100644
--- a/src/plugins/lldp/lldp_api.c
+++ b/src/plugins/lldp/lldp_api.c
@@ -23,6 +23,7 @@
#include <vnet/interface.h>
#include <vnet/api_errno.h>
#include <lldp/lldp.h>
+#include <lldp/lldp_node.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
@@ -42,7 +43,7 @@ static u32 lldp_base_msg_id;
#include <vlibapi/api_helper_macros.h>
static void
-vl_api_lldp_config_t_handler (vl_api_lldp_config_t * mp)
+vl_api_lldp_config_t_handler (vl_api_lldp_config_t *mp)
{
vl_api_lldp_config_reply_t *rmp;
int rv = 0;
@@ -50,8 +51,8 @@ vl_api_lldp_config_t_handler (vl_api_lldp_config_t * mp)
sys_name = vl_api_from_api_to_new_vec (mp, &mp->system_name);
- if (lldp_cfg_set (&sys_name, ntohl (mp->tx_hold), ntohl (mp->tx_interval))
- != lldp_ok)
+ if (lldp_cfg_set (&sys_name, ntohl (mp->tx_hold), ntohl (mp->tx_interval)) !=
+ lldp_ok)
{
vec_free (sys_name);
rv = VNET_API_ERROR_INVALID_VALUE;
@@ -61,7 +62,7 @@ vl_api_lldp_config_t_handler (vl_api_lldp_config_t * mp)
}
static void
-vl_api_sw_interface_set_lldp_t_handler (vl_api_sw_interface_set_lldp_t * mp)
+vl_api_sw_interface_set_lldp_t_handler (vl_api_sw_interface_set_lldp_t *mp)
{
vl_api_sw_interface_set_lldp_reply_t *rmp;
int rv = 0;
@@ -81,7 +82,7 @@ vl_api_sw_interface_set_lldp_t_handler (vl_api_sw_interface_set_lldp_t * mp)
if (ip4.as_u32 != 0)
{
vec_validate (mgmt_ip4, sizeof (ip4_address_t) - 1);
- clib_memcpy (mgmt_ip4, &ip4, vec_len (mgmt_ip4));
+ clib_memcpy (mgmt_ip4, &ip4, sizeof (ip4));
}
ip6_address_decode (mp->mgmt_ip6, &ip6);
@@ -89,7 +90,7 @@ vl_api_sw_interface_set_lldp_t_handler (vl_api_sw_interface_set_lldp_t * mp)
if (!ip6_address_is_zero (&ip6))
{
vec_validate (mgmt_ip6, sizeof (ip6_address_t) - 1);
- clib_memcpy (mgmt_ip6, &ip6, vec_len (mgmt_ip6));
+ clib_memcpy (mgmt_ip6, &ip6, sizeof (ip6));
}
if (memcmp (mp->mgmt_oid, no_data, strlen ((char *) mp->mgmt_oid)) != 0)
@@ -100,7 +101,7 @@ vl_api_sw_interface_set_lldp_t_handler (vl_api_sw_interface_set_lldp_t * mp)
VALIDATE_SW_IF_INDEX (mp);
- if (lldp_cfg_intf_set (ntohl (mp->sw_if_index), (u8 **) & port_desc,
+ if (lldp_cfg_intf_set (ntohl (mp->sw_if_index), (u8 **) &port_desc,
&mgmt_ip4, &mgmt_ip6, &mgmt_oid,
mp->enable) != lldp_ok)
{
@@ -116,6 +117,41 @@ vl_api_sw_interface_set_lldp_t_handler (vl_api_sw_interface_set_lldp_t * mp)
REPLY_MACRO (VL_API_SW_INTERFACE_SET_LLDP_REPLY);
}
+static void
+send_lldp (u32 index, vl_api_registration_t *rp, u32 context)
+{
+ vl_api_lldp_details_t *rmp = 0;
+ vnet_main_t *vnm = &vnet_main;
+ lldp_main_t *lm = &lldp_main;
+ const lldp_intf_t *n = vec_elt_at_index (lm->intfs, index);
+ const vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, n->hw_if_index);
+
+ REPLY_MACRO_DETAILS4_END (
+ VL_API_LLDP_DETAILS, rp, context, ({
+ rmp->sw_if_index = hw->sw_if_index;
+ rmp->last_heard = n->last_heard;
+ rmp->last_sent = n->last_sent;
+ rmp->ttl = n->ttl;
+ rmp->port_id_subtype = (vl_api_port_id_subtype_t) n->port_id_subtype;
+ rmp->chassis_id_subtype =
+ (vl_api_chassis_id_subtype_t) n->chassis_id_subtype;
+ rmp->chassis_id_len = vec_len (n->chassis_id);
+ clib_memcpy (&rmp->chassis_id, n->chassis_id, rmp->chassis_id_len);
+ rmp->port_id_len = vec_len (n->port_id);
+ clib_memcpy (&rmp->port_id, n->port_id, rmp->port_id_len);
+ }));
+}
+
+static void
+vl_api_lldp_dump_t_handler (vl_api_lldp_dump_t *mp)
+{
+ int rv = 0;
+ lldp_main_t *lm = &lldp_main;
+ vl_api_lldp_dump_reply_t *rmp;
+
+ REPLY_AND_DETAILS_MACRO_END (VL_API_LLDP_DUMP_REPLY, lm->intfs,
+ ({ send_lldp (cursor, rp, mp->context); }));
+}
/*
* * lldp_api_hookup
@@ -127,7 +163,7 @@ vl_api_sw_interface_set_lldp_t_handler (vl_api_sw_interface_set_lldp_t * mp)
#include <lldp/lldp.api.c>
static clib_error_t *
-lldp_api_hookup (vlib_main_t * vm)
+lldp_api_hookup (vlib_main_t *vm)
{
/*
* Set up the (msg_name, crc, message-id) table
@@ -142,13 +178,10 @@ VLIB_API_INIT_FUNCTION (lldp_api_hookup);
#include <vlib/unix/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
- .version = VPP_BUILD_VER,
- .description = "Link Layer Discovery Protocol (LLDP)",
+ .version = VPP_BUILD_VER,
+ .description = "Link Layer Discovery Protocol (LLDP)",
};
-/* *INDENT-ON* */
-
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/lldp/lldp_cli.c b/src/plugins/lldp/lldp_cli.c
index e77d699393c..1ed3efa4251 100644
--- a/src/plugins/lldp/lldp_cli.c
+++ b/src/plugins/lldp/lldp_cli.c
@@ -103,7 +103,7 @@ lldp_cfg_intf_set (u32 hw_if_index, u8 ** port_desc, u8 ** mgmt_ip4,
}
/* Add MAC address to an interface's filter */
- if (hi->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_MAC_FILTER)
+ if (hi->caps & VNET_HW_IF_CAP_MAC_FILTER)
{
error =
vnet_hw_interface_add_del_mac_address (lm->vnet_main,
@@ -130,7 +130,7 @@ lldp_cfg_intf_set (u32 hw_if_index, u8 ** port_desc, u8 ** mgmt_ip4,
lldp_intf_t *n = lldp_get_intf (lm, hi->sw_if_index);
lldp_delete_intf (lm, n);
/* Remove MAC address from the interface's filter */
- if ((n) && (hi->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_MAC_FILTER))
+ if ((n) && (hi->caps & VNET_HW_IF_CAP_MAC_FILTER))
{
error =
vnet_hw_interface_add_del_mac_address (lm->vnet_main,
@@ -175,13 +175,13 @@ lldp_intf_cmd (vlib_main_t * vm, unformat_input_t * input,
if (unformat (input, "mgmt-ip4 %U", unformat_ip4_address, &ip4_addr))
{
vec_validate (mgmt_ip4, sizeof (ip4_address_t) - 1);
- clib_memcpy (mgmt_ip4, &ip4_addr, vec_len (mgmt_ip4));
+ clib_memcpy (mgmt_ip4, &ip4_addr, sizeof (ip4_addr));
}
else
if (unformat (input, "mgmt-ip6 %U", unformat_ip6_address, &ip6_addr))
{
vec_validate (mgmt_ip6, sizeof (ip6_address_t) - 1);
- clib_memcpy (mgmt_ip6, &ip6_addr, vec_len (mgmt_ip6));
+ clib_memcpy (mgmt_ip6, &ip6_addr, sizeof (ip6_addr));
}
else if (unformat (input, "mgmt-oid %s", &mgmt_oid))
;
@@ -298,7 +298,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(set_interface_lldp_cmd, static) = {
.path = "set interface lldp",
.short_help = "set interface lldp <interface> | sw_if_index <idx>"
@@ -313,7 +312,6 @@ VLIB_CLI_COMMAND(set_lldp_cmd, static) = {
"[tx-interval <value>]",
.function = lldp_cfg_cmd,
};
-/* *INDENT-ON* */
static const char *
lldp_chassis_id_subtype_str (lldp_chassis_id_subtype_t t)
@@ -580,7 +578,6 @@ format_lldp_intfs_detail (u8 * s, vlib_main_t * vm, const lldp_main_t * lm)
s = format (s, "\nLLDP-enabled interface table:\n");
f64 now = vlib_time_now (vm);
- /* *INDENT-OFF* */
pool_foreach (
n, lm->intfs) {
hw = vnet_get_hw_interface(vnm, n->hw_if_index);
@@ -640,7 +637,6 @@ format_lldp_intfs_detail (u8 * s, vlib_main_t * vm, const lldp_main_t * lm)
now, format_time_ago, n->last_heard, now);
}
}
- /* *INDENT-ON* */
return s;
}
@@ -663,7 +659,6 @@ format_lldp_intfs (u8 * s, va_list * va)
"Peer chassis ID", "Remote port ID", "Last heard", "Last sent",
"Status");
- /* *INDENT-OFF* */
pool_foreach (
n, lm->intfs) {
const vnet_hw_interface_t *hw =
@@ -689,7 +684,6 @@ format_lldp_intfs (u8 * s, va_list * va)
format_time_ago, n->last_sent, now, "inactive");
}
}
- /* *INDENT-ON* */
return s;
}
@@ -710,13 +704,11 @@ show_lldp (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(show_lldp_command, static) = {
.path = "show lldp",
.short_help = "show lldp [detail]",
.function = show_lldp,
};
-/* *INDENT-ON* */
/*
* packet trace format function, very similar to
diff --git a/src/plugins/lldp/lldp_doc.md b/src/plugins/lldp/lldp_doc.md
deleted file mode 100644
index 717de898c4e..00000000000
--- a/src/plugins/lldp/lldp_doc.md
+++ /dev/null
@@ -1,86 +0,0 @@
-# VPP Link Layer Discovery Protocol (LLDP) implementation {#lldp_doc}
-
-This is a memo intended to contain documentation of the VPP LLDP implementation
-Everything that is not directly obvious should come here.
-
-
-## LLDP
-LLDP is a link layer protocol to advertise the capabilities and current status of the system.
-
-There are 2 nodes handling LLDP
-
-1.) input-node which processes incoming packets and updates the local database
-2.) process-node which is responsible for sending out LLDP packets from VPP side
-
-
-### Configuration
-
-LLDP has a global configuration and a per-interface enable setting.
-
-Global configuration is modified using the "set lldp" command
-
-set lldp [system-name <string>] [tx-hold <value>] [tx-interval <value>]
-
-system-name: the name of the VPP system sent to peers in the system-name TLV
-tx-hold: multiplier for tx-interval when setting time-to-live (TTL) value in the LLDP packets (TTL = tx-hold * tx-interval + 1, if TTL > 65535, then TTL = 65535)
-tx-interval: time interval between sending out LLDP packets
-
-Per interface setting is done using the "set interface lldp" command
-
-set interface lldp <interface> | if_index <idx> [port-desc <string>] [disable]
-
-interface: the name of the interface for which to enable/disable LLDP
-if_index: sw interface index can be used if interface name is not used.
-port-desc: port description
-disable: LLDP feature can be enabled or disabled per interface.
-
-### Configuration example
-
-Configure system-name as "VPP" and transmit interval to 10 seconds:
-
-set lldp system-name VPP tx-interval 10
-
-Enable LLDP on interface TenGigabitEthernet5/0/1 with port description
-
-set interface lldp TenGigabitEthernet5/0/1 port-desc vtf:eth0
-
-
-### Operational data
-
-The list of LLDP-enabled interfaces which are up can be shown using "show lldp" command
-
-Example:
-DBGvpp# show lldp
-Local interface Peer chassis ID Remote port ID Last heard Last sent Status
-GigabitEthernet2/0/1 never 27.0s ago inactive
-TenGigabitEthernet5/0/1 8c:60:4f:dd:ca:52 Eth1/3/3 20.1s ago 18.3s ago active
-
-All LLDP configuration data with all LLDP-enabled interfaces can be shown using "show lldp detail" command
-
-Example:
-DBGvpp# show lldp detail
-LLDP configuration:
-Configured system name: vpp
-Configured tx-hold: 4
-Configured tx-interval: 30
-
-LLDP-enabled interface table:
-
-Interface name: GigabitEthernet2/0/1
-Interface/peer state: inactive(timeout)
-Last known peer chassis ID:
-Last known peer port ID:
-Last packet sent: 12.4s ago
-Last packet received: never
-
-Interface name: GigabitEthernet2/0/2
-Interface/peer state: interface down
-Last packet sent: never
-
-Interface name: TenGigabitEthernet5/0/1
-Interface/peer state: active
-Peer chassis ID: 8c:60:4f:dd:ca:52(MAC address)
-Remote port ID: Eth1/3/3(Locally assigned)
-Last packet sent: 3.6s ago
-Last packet received: 5.5s ago
-
diff --git a/src/plugins/lldp/lldp_doc.rst b/src/plugins/lldp/lldp_doc.rst
new file mode 100644
index 00000000000..a6737985aab
--- /dev/null
+++ b/src/plugins/lldp/lldp_doc.rst
@@ -0,0 +1,84 @@
+LLDP Protocol
+=============
+
+This is a memo intended to contain documentation of the VPP LLDP (Link
+Layer Discovery Protocol) implementation Everything that is not directly
+obvious should come here.
+
+LLDP
+----
+
+LLDP is a link layer protocol to advertise the capabilities and current
+status of the system.
+
+There are 2 nodes handling LLDP
+
+1.) input-node which processes incoming packets and updates the local
+database 2.) process-node which is responsible for sending out LLDP
+packets from VPP side
+
+Configuration
+~~~~~~~~~~~~~
+
+LLDP has a global configuration and a per-interface enable setting.
+
+Global configuration is modified using the “set lldp” command
+
+set lldp [system-name ] [tx-hold ] [tx-interval ]
+
+system-name: the name of the VPP system sent to peers in the system-name
+TLV tx-hold: multiplier for tx-interval when setting time-to-live (TTL)
+value in the LLDP packets (TTL = tx-hold \* tx-interval + 1, if TTL >
+65535, then TTL = 65535) tx-interval: time interval between sending out
+LLDP packets
+
+Per interface setting is done using the “set interface lldp” command
+
+set interface lldp \| if_index [port-desc ] [disable]
+
+interface: the name of the interface for which to enable/disable LLDP
+if_index: sw interface index can be used if interface name is not used.
+port-desc: port description disable: LLDP feature can be enabled or
+disabled per interface.
+
+Configuration example
+~~~~~~~~~~~~~~~~~~~~~
+
+Configure system-name as “VPP” and transmit interval to 10 seconds:
+
+set lldp system-name VPP tx-interval 10
+
+Enable LLDP on interface TenGigabitEthernet5/0/1 with port description
+
+set interface lldp TenGigabitEthernet5/0/1 port-desc vtf:eth0
+
+Operational data
+~~~~~~~~~~~~~~~~
+
+The list of LLDP-enabled interfaces which are up can be shown using
+“show lldp” command
+
+Example: DBGvpp# show lldp Local interface Peer chassis ID Remote port
+ID Last heard Last sent Status GigabitEthernet2/0/1 never 27.0s ago
+inactive TenGigabitEthernet5/0/1 8c:60:4f:dd:ca:52 Eth1/3/3 20.1s ago
+18.3s ago active
+
+All LLDP configuration data with all LLDP-enabled interfaces can be
+shown using “show lldp detail” command
+
+Example: DBGvpp# show lldp detail LLDP configuration: Configured system
+name: vpp Configured tx-hold: 4 Configured tx-interval: 30
+
+LLDP-enabled interface table:
+
+Interface name: GigabitEthernet2/0/1 Interface/peer state:
+inactive(timeout) Last known peer chassis ID: Last known peer port ID:
+Last packet sent: 12.4s ago Last packet received: never
+
+Interface name: GigabitEthernet2/0/2 Interface/peer state: interface
+down Last packet sent: never
+
+Interface name: TenGigabitEthernet5/0/1 Interface/peer state: active
+Peer chassis ID: 8c:60:4f:dd:ca:52(MAC address) Remote port ID:
+Eth1/3/3(Locally assigned) Last packet sent: 3.6s ago Last packet
+received: 5.5s ago
diff --git a/src/plugins/lldp/lldp_input.c b/src/plugins/lldp/lldp_input.c
index 327ef10f9de..b8aa846e385 100644
--- a/src/plugins/lldp/lldp_input.c
+++ b/src/plugins/lldp/lldp_input.c
@@ -48,13 +48,13 @@ lldp_rpc_update_peer_cb (const lldp_intf_update_t * a)
if (n->chassis_id)
{
- _vec_len (n->chassis_id) = 0;
+ vec_set_len (n->chassis_id, 0);
}
vec_add (n->chassis_id, chassis_id, a->chassis_id_len);
n->chassis_id_subtype = a->chassis_id_subtype;
if (n->port_id)
{
- _vec_len (n->port_id) = 0;
+ vec_set_len (n->port_id, 0);
}
vec_add (n->port_id, portid, a->portid_len);
n->port_id_subtype = a->portid_subtype;
diff --git a/src/plugins/lldp/lldp_node.c b/src/plugins/lldp/lldp_node.c
index dbb54af91f0..dbbb5d46402 100644
--- a/src/plugins/lldp/lldp_node.c
+++ b/src/plugins/lldp/lldp_node.c
@@ -102,7 +102,6 @@ lldp_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
/*
* lldp input graph node declaration
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(lldp_input_node, static) = {
.function = lldp_node_fn,
.name = "lldp-input",
@@ -120,7 +119,6 @@ VLIB_REGISTER_NODE(lldp_input_node, static) = {
[LLDP_INPUT_NEXT_NORMAL] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* lldp process node function
@@ -220,7 +218,7 @@ lldp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
#endif
if (event_data)
{
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
}
}
@@ -230,13 +228,11 @@ lldp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
/*
* lldp process node declaration
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(lldp_process_node, static) = {
.function = lldp_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "lldp-process",
};
-/* *INDENT-ON* */
void
lldp_schedule_intf (lldp_main_t * lm, lldp_intf_t * n)
diff --git a/src/plugins/lldp/lldp_protocol.h b/src/plugins/lldp/lldp_protocol.h
index e641b26e20d..c4219162dfe 100644
--- a/src/plugins/lldp/lldp_protocol.h
+++ b/src/plugins/lldp/lldp_protocol.h
@@ -56,12 +56,10 @@ struct lldp_tlv_head
u8 byte2; /* contains the lower bits of length */
};
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
struct lldp_tlv_head head;
u8 v[0];
}) lldp_tlv_t;
-/* *INDENT-ON* */
lldp_tlv_code_t lldp_tlv_get_code (const lldp_tlv_t * tlv);
void lldp_tlv_set_code (lldp_tlv_t * tlv, lldp_tlv_code_t code);
@@ -89,13 +87,11 @@ typedef enum
#undef F
} lldp_chassis_id_subtype_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
struct lldp_tlv_head head;
u8 subtype;
u8 id[0];
}) lldp_chassis_id_tlv_t;
-/* *INDENT-ON* */
#define foreach_port_id_subtype(F) \
F (0, reserved, "Reserved") \
@@ -118,7 +114,6 @@ typedef enum
#undef F
} lldp_port_id_subtype_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
struct lldp_tlv_head head;
u8 subtype;
@@ -129,7 +124,6 @@ typedef CLIB_PACKED (struct {
struct lldp_tlv_head head;
u16 ttl;
}) lldp_ttl_tlv_t;
-/* *INDENT-ON* */
#endif /* __included_lldp_protocol_h__ */
diff --git a/src/plugins/lldp/lldp_test.c b/src/plugins/lldp/lldp_test.c
index 661487c7835..ba5ecb20260 100644
--- a/src/plugins/lldp/lldp_test.c
+++ b/src/plugins/lldp/lldp_test.c
@@ -38,13 +38,11 @@ lldp_test_main_t lldp_test_main;
#define __plugin_msg_base lldp_test_main.msg_id_base
#include <vlibapi/vat_helper_macros.h>
-/* Macro to finish up custom dump fns */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
static int
api_lldp_config (vat_main_t * vam)
@@ -144,4 +142,16 @@ api_sw_interface_set_lldp (vat_main_t * vam)
return ret;
}
+static int
+api_lldp_dump (vat_main_t *vam)
+{
+ return 0;
+}
+
+static void
+vl_api_lldp_dump_reply_t_handler (vl_api_lldp_dump_reply_t *mp)
+{
+ // not yet implemented
+}
+
#include <lldp/lldp.api_test.c>
diff --git a/src/plugins/mactime/CMakeLists.txt b/src/plugins/mactime/CMakeLists.txt
index debf033f71e..7111f415f25 100644
--- a/src/plugins/mactime/CMakeLists.txt
+++ b/src/plugins/mactime/CMakeLists.txt
@@ -38,6 +38,5 @@ if(VPP_BUILD_MACTIME_TOP)
svm
vppinfra
Threads::Threads
- rt m dl crypto
)
endif()
diff --git a/src/plugins/mactime/builtins.c b/src/plugins/mactime/builtins.c
index 321502454a2..c487d0375bf 100644
--- a/src/plugins/mactime/builtins.c
+++ b/src/plugins/mactime/builtins.c
@@ -1,5 +1,4 @@
#include <vnet/vnet.h>
-#include <builtinurl/builtinurl.h>
#include <http_static/http_static.h>
#include <mactime/mactime.h>
#include <vlib/unix/plugin.h>
@@ -15,9 +14,8 @@ mactime_ip_neighbor_copy (index_t ipni, void *ctx)
return (WALK_CONTINUE);
}
-static int
-handle_get_mactime (http_builtin_method_type_t reqtype,
- u8 * request, http_session_t * hs)
+static hss_url_handler_rc_t
+handle_get_mactime (hss_url_handler_args_t *args)
{
mactime_main_t *mm = &mactime_main;
mactime_device_t *dp;
@@ -147,21 +145,20 @@ handle_get_mactime (http_builtin_method_type_t reqtype,
vec_free (macstring);
vec_free (pool_indices);
- hs->data = s;
- hs->data_offset = 0;
- hs->cache_pool_index = ~0;
- hs->free_data = 1;
- return 0;
+ args->data = s;
+ args->data_len = vec_len (s);
+ args->free_vec_data = 1;
+ return HSS_URL_HANDLER_OK;
}
void
mactime_url_init (vlib_main_t * vm)
{
- void (*fp) (void *, char *, int);
+ hss_register_url_fn fp;
/* Look up the builtin URL registration handler */
fp = vlib_get_plugin_symbol ("http_static_plugin.so",
- "http_static_server_register_builtin_handler");
+ "hss_register_url_handler");
if (fp == 0)
{
@@ -169,7 +166,7 @@ mactime_url_init (vlib_main_t * vm)
return;
}
- (*fp) (handle_get_mactime, "mactime.json", HTTP_BUILTIN_METHOD_GET);
+ (*fp) (handle_get_mactime, "mactime.json", HTTP_REQ_GET);
}
/*
diff --git a/src/plugins/mactime/mactime.c b/src/plugins/mactime/mactime.c
index b8b1884119d..933e44ea5c1 100644
--- a/src/plugins/mactime/mactime.c
+++ b/src/plugins/mactime/mactime.c
@@ -28,7 +28,6 @@
#include <mactime/mactime.api_enum.h>
#include <mactime/mactime.api_types.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define REPLY_MSG_ID_BASE mm->msg_id_base
#include <vlibapi/api_helper_macros.h>
@@ -141,7 +140,6 @@ mactime_enable_disable_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (mactime_enable_disable_command, static) =
{
.path = "mactime enable-disable",
@@ -149,7 +147,6 @@ VLIB_CLI_COMMAND (mactime_enable_disable_command, static) =
"mactime enable-disable <interface-name> [disable]",
.function = mactime_enable_disable_command_fn,
};
-/* *INDENT-ON* */
/** Enable / disable time-base src mac filtration on an interface
@@ -194,7 +191,6 @@ vl_api_mactime_dump_t_handler (vl_api_mactime_dump_t * mp)
goto send_reply;
}
- /* *INDENT-OFF* */
pool_foreach (dev, mm->devices)
{
message_size = sizeof(*ep) + vec_len(dev->device_name) +
@@ -231,15 +227,12 @@ vl_api_mactime_dump_t_handler (vl_api_mactime_dump_t * mp)
ep->device_name [ARRAY_LEN(ep->device_name) -1] = 0;
vl_api_send_msg (rp, (u8 *)ep);
}
- /* *INDENT-OFF* */
send_reply:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_MACTIME_DUMP_REPLY,
({
rmp->table_epoch = clib_host_to_net_u32 (mm->device_table_epoch);
}));
- /* *INDENT-ON* */
}
/** Create a lookup table entry for the indicated mac address
@@ -429,12 +422,10 @@ mactime_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (mactime_init) =
{
.runs_after = VLIB_INITS("ip_neighbor_init"),
};
-/* *INDENT-ON* */
static clib_error_t *
mactime_config (vlib_main_t * vm, unformat_input_t * input)
@@ -462,30 +453,24 @@ mactime_config (vlib_main_t * vm, unformat_input_t * input)
VLIB_CONFIG_FUNCTION (mactime_config, "mactime");
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (mactime, static) =
{
.arc_name = "device-input",
.node_name = "mactime",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
-/* *INDENT-ON */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (mactime_tx, static) = {
.arc_name = "interface-output",
.node_name = "mactime-tx",
.runs_before = VNET_FEATURES ("interface-output-arc-end"),
};
-/* *INDENT-ON */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Time-based MAC Source Address Filter",
};
-/* *INDENT-ON* */
u8 *
format_bytes_with_width (u8 * s, va_list * va)
@@ -576,12 +561,10 @@ show_mactime_command_fn (vlib_main_t * vm,
if (verbose)
vlib_cli_output (vm, "Time now: %U", format_clib_timebase_time, now);
- /* *INDENT-OFF* */
pool_foreach (dp, mm->devices)
{
vec_add1 (pool_indices, dp - mm->devices);
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "%-15s %18s %14s %10s %11s %13s",
"Device Name", "Addresses", "Status",
@@ -637,7 +620,8 @@ show_mactime_command_fn (vlib_main_t * vm,
print:
vec_reset_length (macstring);
- macstring = format (0, "%U", format_mac_address, dp->mac_address);
+ macstring =
+ format (macstring, "%U", format_mac_address, dp->mac_address);
switch (current_status)
{
case 0:
@@ -692,14 +676,12 @@ show_mactime_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_mactime_command, static) =
{
.path = "show mactime",
.short_help = "show mactime [verbose]",
.function = show_mactime_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
clear_mactime_command_fn (vlib_main_t * vm,
@@ -718,14 +700,12 @@ clear_mactime_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_mactime_command, static) =
{
.path = "clear mactime",
.short_help = "clear mactime counters",
.function = clear_mactime_command_fn,
};
-/* *INDENT-ON* */
diff --git a/src/plugins/mactime/mactime_test.c b/src/plugins/mactime/mactime_test.c
index 67655ccefad..5f31dca7012 100644
--- a/src/plugins/mactime/mactime_test.c
+++ b/src/plugins/mactime/mactime_test.c
@@ -281,12 +281,10 @@ api_mactime_dump (vat_main_t * vam)
W (ret);
fformat (vam->ofp, "%U", format_device, 0 /* header */ , 0 /* verbose */ );
- /* *INDENT-OFF* */
pool_foreach (dev, tm->devices)
{
fformat (vam->ofp, "%U", format_device, dev, verbose);
}
- /* *INDENT-ON* */
return ret;
}
diff --git a/src/plugins/mactime/mactime_top.c b/src/plugins/mactime/mactime_top.c
index 72d1964f32f..1517ec43e17 100644
--- a/src/plugins/mactime/mactime_top.c
+++ b/src/plugins/mactime/mactime_top.c
@@ -106,7 +106,6 @@ vl_api_mactime_details_t_handler (vl_api_mactime_details_t * mp)
}
}
-#define vl_print(handle, ...) fformat(handle, __VA_ARGS__)
#define vl_endianfun /* define message structures */
#include <mactime/mactime.api.h>
#undef vl_endianfun
@@ -143,14 +142,11 @@ connect_to_vpp (char *name)
if (mm->msg_id_base == (u16) ~ 0)
return -1;
-#define _(N,n) \
- vl_msg_api_set_handlers((VL_API_##N + mm->msg_id_base), \
- #n, \
- vl_api_##n##_t_handler, \
- vl_noop_handler, \
- vl_api_##n##_t_endian, \
- vl_api_##n##_t_print, \
- sizeof(vl_api_##n##_t), 1);
+#define _(N, n) \
+ vl_msg_api_set_handlers ((VL_API_##N + mm->msg_id_base), #n, \
+ vl_api_##n##_t_handler, vl_api_##n##_t_endian, \
+ vl_api_##n##_t_format, sizeof (vl_api_##n##_t), 1, \
+ vl_api_##n##_t_tojson, vl_api_##n##_t_fromjson);
foreach_mactime_api_msg;
#undef _
@@ -189,18 +185,16 @@ scrape_stats_segment (mt_main_t * mm)
mactime_device_t *dev;
stat_segment_access_t sa;
stat_client_main_t *sm = mm->stat_client_main;
- stat_segment_directory_entry_t *ep;
+ vlib_stats_entry_t *ep;
int need_update2 = 0;
static u32 *pool_indices;
int i, j;
vec_reset_length (pool_indices);
- /* *INDENT-OFF* */
pool_foreach (dev, mm->devices)
{
vec_add1 (pool_indices, dev->pool_index);
}
- /* *INDENT-ON* */
/* Nothing to do... */
if (vec_len (pool_indices) == 0)
@@ -443,13 +437,11 @@ print_device_table (mt_main_t * mm)
{
mactime_device_t *dev;
- fformat (stdout, "%U", format_device, 0 /* header */ , 0 /* verbose */ );
- /* *INDENT-OFF* */
+ fformat (stdout, "%U", format_device, NULL /* header */, 0 /* verbose */);
pool_foreach (dev, mm->devices)
{
fformat (stdout, "%U", format_device, dev, 0 /* verbose */);
}
- /* *INDENT-ON* */
}
int
diff --git a/src/plugins/mactime/node.c b/src/plugins/mactime/node.c
index 465cee380b1..fad487e666e 100644
--- a/src/plugins/mactime/node.c
+++ b/src/plugins/mactime/node.c
@@ -349,7 +349,6 @@ mactime_node_fn (vlib_main_t * vm,
return mactime_node_inline (vm, node, frame, 0 /* is_tx */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (mactime_node) =
{
.function = mactime_node_fn,
@@ -370,7 +369,6 @@ VLIB_REGISTER_NODE (mactime_node) =
[MACTIME_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
static uword
mactime_tx_node_fn (vlib_main_t * vm,
@@ -379,7 +377,6 @@ mactime_tx_node_fn (vlib_main_t * vm,
return mactime_node_inline (vm, node, frame, 1 /* is_tx */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (mactime_tx_node) =
{
.function = mactime_tx_node_fn,
@@ -400,7 +397,6 @@ VLIB_REGISTER_NODE (mactime_tx_node) =
[MACTIME_NEXT_ETHERNET_INPUT] = "ethernet-input", /* notused */
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/map/examples/gen-rules.py b/src/plugins/map/examples/gen-rules.py
index 7964aa9a359..3d98f65b95d 100755
--- a/src/plugins/map/examples/gen-rules.py
+++ b/src/plugins/map/examples/gen-rules.py
@@ -20,38 +20,64 @@ import sys
# map add domain ip4-pfx <pfx> ip6-pfx ::/0 ip6-src <ip6-src> ea-bits-len 0 psid-offset 6 psid-len 6
# map add rule index <0> psid <psid> ip6-dst <ip6-dst>
-def_ip4_pfx = '192.0.2.0/24'
-def_ip6_pfx = '2001:db8::/32'
-def_ip6_src = '2001:db8::1'
+def_ip4_pfx = "192.0.2.0/24"
+def_ip6_pfx = "2001:db8::/32"
+def_ip6_src = "2001:db8::1"
def_psid_offset = 6
def_psid_len = 6
def_ea_bits_len = 0
-parser = argparse.ArgumentParser(description='MAP VPP configuration generator')
-parser.add_argument('-t', action="store", dest="mapmode")
-parser.add_argument('-f', action="store", dest="format", default="vpp")
-parser.add_argument('--ip4-prefix', action="store", dest="ip4_pfx", default=def_ip4_pfx)
-parser.add_argument('--ip6-prefix', action="store", dest="ip6_pfx", default=def_ip6_pfx)
-parser.add_argument('--ip6-src', action="store", dest="ip6_src", default=def_ip6_src)
-parser.add_argument('--psid-len', action="store", dest="psid_len", default=def_psid_len)
-parser.add_argument('--psid-offset', action="store", dest="psid_offset", default=def_psid_offset)
-parser.add_argument('--ea-bits-len', action="store", dest="ea_bits_len", default=def_ea_bits_len)
+parser = argparse.ArgumentParser(description="MAP VPP configuration generator")
+parser.add_argument("-t", action="store", dest="mapmode")
+parser.add_argument("-f", action="store", dest="format", default="vpp")
+parser.add_argument("--ip4-prefix", action="store", dest="ip4_pfx", default=def_ip4_pfx)
+parser.add_argument("--ip6-prefix", action="store", dest="ip6_pfx", default=def_ip6_pfx)
+parser.add_argument("--ip6-src", action="store", dest="ip6_src", default=def_ip6_src)
+parser.add_argument("--psid-len", action="store", dest="psid_len", default=def_psid_len)
+parser.add_argument(
+ "--psid-offset", action="store", dest="psid_offset", default=def_psid_offset
+)
+parser.add_argument(
+ "--ea-bits-len", action="store", dest="ea_bits_len", default=def_ea_bits_len
+)
args = parser.parse_args()
+
#
# Print domain
#
def domain_print(i, ip4_pfx, ip6_pfx, ip6_src, eabits_len, psid_offset, psid_len):
- if format == 'vpp':
- print("map add domain ip4-pfx " + ip4_pfx + " ip6-pfx", ip6_pfx, "ip6-src " + ip6_src +
- " ea-bits-len", eabits_len, "psid-offset", psid_offset, "psid-len", psid_len)
- if format == 'confd':
- print("vpp softwire softwire-instances softwire-instance", i, "br-ipv6 " + ip6_src +
- " ipv6-prefix " + ip6_pfx + " ipv4-prefix " + ip4_pfx +
- " ea-bits-len", eabits_len, "psid-offset", psid_offset, "psid-len", psid_len)
- if format == 'xml':
+ if format == "vpp":
+ print(
+ "map add domain ip4-pfx " + ip4_pfx + " ip6-pfx",
+ ip6_pfx,
+ "ip6-src " + ip6_src + " ea-bits-len",
+ eabits_len,
+ "psid-offset",
+ psid_offset,
+ "psid-len",
+ psid_len,
+ )
+ if format == "confd":
+ print(
+ "vpp softwire softwire-instances softwire-instance",
+ i,
+ "br-ipv6 "
+ + ip6_src
+ + " ipv6-prefix "
+ + ip6_pfx
+ + " ipv4-prefix "
+ + ip4_pfx
+ + " ea-bits-len",
+ eabits_len,
+ "psid-offset",
+ psid_offset,
+ "psid-len",
+ psid_len,
+ )
+ if format == "xml":
print("<softwire-instance>")
- print("<id>", i, "</id>");
+ print("<id>", i, "</id>")
print(" <br-ipv6>" + ip6_src + "</br-ipv6>")
print(" <ipv6-prefix>" + ip6_pfx + "</ipv6-prefix>")
print(" <ipv4-prefix>" + ip4_pfx + "</ipv4-prefix>")
@@ -59,32 +85,54 @@ def domain_print(i, ip4_pfx, ip6_pfx, ip6_src, eabits_len, psid_offset, psid_len
print(" <psid-len>", psid_len, "</psid-len>")
print(" <psid-offset>", psid_offset, "</psid-offset>")
+
def domain_print_end():
- if format == 'xml':
+ if format == "xml":
print("</softwire-instance>")
+
def rule_print(i, psid, dst):
- if format == 'vpp':
+ if format == "vpp":
print("map add rule index", i, "psid", psid, "ip6-dst", dst)
- if format == 'confd':
+ if format == "confd":
print("binding", psid, "ipv6-addr", dst)
- if format == 'xml':
+ if format == "xml":
print(" <binding>")
print(" <psid>", psid, "</psid>")
print(" <ipv6-addr>", dst, "</ipv6-addr>")
print(" </binding>")
+
#
# Algorithmic mapping Shared IPv4 address
#
-def algo(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
- domain_print(0, ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len)
+def algo(
+ ip4_pfx_str,
+ ip6_pfx_str,
+ ip6_src_str,
+ ea_bits_len,
+ psid_offset,
+ psid_len,
+ ip6_src_ecmp=False,
+):
+ domain_print(
+ 0, ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len
+ )
domain_print_end()
+
#
# 1:1 Full IPv4 address
#
-def lw46(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
+def lw46(
+ ip4_pfx_str,
+ ip6_pfx_str,
+ ip6_src_str,
+ ea_bits_len,
+ psid_offset,
+ psid_len,
+ ip6_src_ecmp=False,
+):
ip4_pfx = ipaddress.ip_network(ip4_pfx_str)
ip6_src = ipaddress.ip_address(ip6_src_str)
ip6_dst = ipaddress.ip_network(ip6_pfx_str)
@@ -92,15 +140,26 @@ def lw46(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_l
mod = ip4_pfx.num_addresses / 1024
for i in range(ip4_pfx.num_addresses):
- domain_print(i, str(ip4_pfx[i]) + "/32", str(ip6_dst[i]) + "/128", str(ip6_src), 0, 0, 0)
+ domain_print(
+ i, str(ip4_pfx[i]) + "/32", str(ip6_dst[i]) + "/128", str(ip6_src), 0, 0, 0
+ )
domain_print_end()
if ip6_src_ecmp and not i % mod:
ip6_src = ip6_src + 1
+
#
# 1:1 Shared IPv4 address, shared BR (16) VPP CLI
#
-def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
+def lw46_shared(
+ ip4_pfx_str,
+ ip6_pfx_str,
+ ip6_src_str,
+ ea_bits_len,
+ psid_offset,
+ psid_len,
+ ip6_src_ecmp=False,
+):
ip4_pfx = ipaddress.ip_network(ip4_pfx_str)
ip6_src = ipaddress.ip_address(ip6_src_str)
ip6_dst = ipaddress.ip_network(ip6_pfx_str)
@@ -109,7 +168,7 @@ def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset,
for i in range(ip4_pfx.num_addresses):
domain_print(i, str(ip4_pfx[i]) + "/32", "::/0", str(ip6_src), 0, 0, psid_len)
for psid in range(0x1 << int(psid_len)):
- rule_print(i, psid, str(ip6_dst[(i * (0x1<<int(psid_len))) + psid]))
+ rule_print(i, psid, str(ip6_dst[(i * (0x1 << int(psid_len))) + psid]))
domain_print_end()
if ip6_src_ecmp and not i % mod:
ip6_src = ip6_src + 1
@@ -118,7 +177,15 @@ def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset,
#
# 1:1 Shared IPv4 address, shared BR
#
-def lw46_shared_b(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
+def lw46_shared_b(
+ ip4_pfx_str,
+ ip6_pfx_str,
+ ip6_src_str,
+ ea_bits_len,
+ psid_offset,
+ psid_len,
+ ip6_src_ecmp=False,
+):
ip4_pfx = ipaddress.ip_network(ip4_pfx_str)
ip6_src = ipaddress.ip_address(ip6_src_str)
ip6_dst = list(ipaddress.ip_network(ip6_pfx_str).subnets(new_prefix=56))
@@ -127,15 +194,16 @@ def lw46_shared_b(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offse
for i in range(ip4_pfx.num_addresses):
domain_print(i, str(ip4_pfx[i]) + "/32", "::/0", str(ip6_src), 0, 0, psid_len)
for psid in range(0x1 << psid_len):
- enduserprefix = list(ip6_dst.pop(0).subnets(new_prefix=64))[255-1]
- rule_print(i, psid, enduserprefix[(i * (0x1<<psid_len)) + psid])
+ enduserprefix = list(ip6_dst.pop(0).subnets(new_prefix=64))[255 - 1]
+ rule_print(i, psid, enduserprefix[(i * (0x1 << psid_len)) + psid])
domain_print_end()
if ip6_src_ecmp and not i % mod:
ip6_src = ip6_src + 1
def xml_header_print():
- print('''
+ print(
+ """
<?xml version="1.0" encoding="UTF-8"?>
<hello xmlns="urn:ietf:params:xml:ns:netconf:base:1.0">
<capabilities>
@@ -156,10 +224,13 @@ def xml_header_print():
<softwire>
<softwire-instances>
- ''')
+ """
+ )
+
def xml_footer_print():
- print('''
+ print(
+ """
</softwire-instances>
</softwire>
</vpp>
@@ -175,12 +246,20 @@ def xml_footer_print():
</rpc>
]]>]]>
- ''')
+ """
+ )
format = args.format
-if format == 'xml':
+if format == "xml":
xml_header_print()
-globals()[args.mapmode](args.ip4_pfx, args.ip6_pfx, args.ip6_src, args.ea_bits_len, args.psid_offset, args.psid_len)
-if format == 'xml':
+globals()[args.mapmode](
+ args.ip4_pfx,
+ args.ip6_pfx,
+ args.ip6_src,
+ args.ea_bits_len,
+ args.psid_offset,
+ args.psid_len,
+)
+if format == "xml":
xml_footer_print()
diff --git a/src/plugins/map/examples/test_map.py b/src/plugins/map/examples/test_map.py
index 7a48964b3f2..f141ba3338c 100755
--- a/src/plugins/map/examples/test_map.py
+++ b/src/plugins/map/examples/test_map.py
@@ -1,128 +1,164 @@
#!/usr/bin/env python3
-import time,argparse,sys,cmd, unittest
+import time, argparse, sys, cmd, unittest
from ipaddress import *
-parser = argparse.ArgumentParser(description='VPP MAP test')
-parser.add_argument('-i', nargs='*', action="store", dest="inputdir")
+parser = argparse.ArgumentParser(description="VPP MAP test")
+parser.add_argument("-i", nargs="*", action="store", dest="inputdir")
args = parser.parse_args()
for dir in args.inputdir:
sys.path.append(dir)
from vpp_papi import *
+
#
# 1:1 Shared IPv4 address, shared BR (16) VPP CLI
#
-def lw46_shared(ip4_pfx_str, ip6_pfx_str, ip6_src_str, ea_bits_len, psid_offset, psid_len, ip6_src_ecmp = False):
+def lw46_shared(
+ ip4_pfx_str,
+ ip6_pfx_str,
+ ip6_src_str,
+ ea_bits_len,
+ psid_offset,
+ psid_len,
+ ip6_src_ecmp=False,
+):
ip4_pfx = ip_network(ip4_pfx_str)
ip6_src = ip_address(ip6_src_str)
ip6_dst = ip_network(ip6_pfx_str)
- ip6_nul = IPv6Address(u'0::0')
+ ip6_nul = IPv6Address("0::0")
mod = ip4_pfx.num_addresses / 1024
for i in range(ip4_pfx.num_addresses):
a = time.clock()
- t = map_add_domain(0, ip6_nul.packed, ip4_pfx[i].packed, ip6_src.packed, 0, 32, 128, ea_bits_len, psid_offset, psid_len, 0, 0)
- #print "Return from map_add_domain", t
+ t = map_add_domain(
+ 0,
+ ip6_nul.packed,
+ ip4_pfx[i].packed,
+ ip6_src.packed,
+ 0,
+ 32,
+ 128,
+ ea_bits_len,
+ psid_offset,
+ psid_len,
+ 0,
+ 0,
+ )
+ # print "Return from map_add_domain", t
if t == None:
- print "map_add_domain failed"
+ print("map_add_domain failed")
continue
if t.retval != 0:
- print "map_add_domain failed", t
+ print(f"map_add_domain failed, {t}")
continue
for psid in range(0x1 << int(psid_len)):
- r = map_add_del_rule(0, t.index, 1, (ip6_dst[(i * (0x1<<int(psid_len))) + psid]).packed, psid)
- #print "Return from map_add_del_rule", r
+ r = map_add_del_rule(
+ 0,
+ t.index,
+ 1,
+ (ip6_dst[(i * (0x1 << int(psid_len))) + psid]).packed,
+ psid,
+ )
+ # print "Return from map_add_del_rule", r
if ip6_src_ecmp and not i % mod:
ip6_src = ip6_src + 1
- print "Running time:", time.clock() - a
+ print(f"Running time: {time.clock() - a}")
+
class TestMAP(unittest.TestCase):
- '''
+ """
def test_delete_all(self):
t = map_domain_dump(0)
self.assertNotEqual(t, None)
- print "Number of domains configured: ", len(t)
+ print(f"Number of domains configured: {len(t)}")
for d in t:
ts = map_del_domain(0, d.domainindex)
self.assertNotEqual(ts, None)
t = map_domain_dump(0)
self.assertNotEqual(t, None)
- print "Number of domains configured: ", len(t)
- self.assertEqual(len(t), 0)
+ print(f"Number of domains configured: {len(t)}")
+ self.assertEqual(len(t), 0)/
- '''
+ """
def test_a_million_rules(self):
- ip4_pfx = u'192.0.2.0/24'
- ip6_pfx = u'2001:db8::/32'
- ip6_src = u'2001:db8::1'
+ ip4_pfx = "192.0.2.0/24"
+ ip6_pfx = "2001:db8::/32"
+ ip6_src = "2001:db8::1"
psid_offset = 6
psid_len = 6
ea_bits_len = 0
lw46_shared(ip4_pfx, ip6_pfx, ip6_src, ea_bits_len, psid_offset, psid_len)
+
#
# RX thread, that should sit on blocking vpe_api_read()
-#
+#
#
#
#
import threading
-class RXThread (threading.Thread):
+
+
+class RXThread(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
def run(self):
- print "Starting "
+ print("Starting ")
i = 0
while True:
msg = vpe_api_read()
if msg:
- #print msg
- id = unpack('>H', msg[0:2])
- size = unpack('>H', msg[2:4])
- print "Received", id, "of size", size
+ # print msg
+ id = unpack(">H", msg[0:2])
+ size = unpack(">H", msg[2:4])
+ print(f"Received {id} of size {size}")
i += 1
- #del msg
+ # del msg
continue
- #time.sleep(0.001)
+ # time.sleep(0.001)
return
+
# Create RX thread
rxthread = RXThread()
rxthread.setDaemon(True)
-
-print "Connect", connect_to_vpe("client124")
+
+print(f"Connect {connect_to_vpe('client124')}")
import timeit
+
rxthread.start()
-print "After thread started"
+print("After thread started")
-#pneum_kill_thread()
-print "After thread killed"
+# pneum_kill_thread()
+print("After thread killed")
-#t = show_version(0)
-#print "Result from show version", t
+# t = show_version(0)
+# print "Result from show version", t
-print timeit.timeit('t = show_version(0)', number=1000, setup="from __main__ import show_version")
+print(
+ f"{timeit.timeit('t = show_version(0)', number=1000, setup='from __main__ import show_version')}"
+)
time.sleep(10)
-#print timeit.timeit('control_ping(0)', number=10, setup="from __main__ import control_ping")
+# print timeit.timeit('control_ping(0)', number=10, setup="from __main__ import control_ping")
disconnect_from_vpe()
sys.exit()
-print t.program, t.version,t.builddate,t.builddirectory
+print(f"{t.program} {t.version}{t.builddate}{t.builddirectory}")
-'''
+"""
t = map_domain_dump(0)
if not t:
@@ -131,11 +167,9 @@ if not t:
for d in t:
print("IP6 prefix:",str(IPv6Address(d.ip6prefix)))
print( "IP4 prefix:",str(IPv4Address(d.ip4prefix)))
-'''
+"""
suite = unittest.TestLoader().loadTestsFromTestCase(TestMAP)
unittest.TextTestRunner(verbosity=2).run(suite)
disconnect_from_vpe()
-
-
diff --git a/src/plugins/map/gen-rules.py b/src/plugins/map/gen-rules.py
index e43b8e155be..e804763d0cf 100755
--- a/src/plugins/map/gen-rules.py
+++ b/src/plugins/map/gen-rules.py
@@ -21,87 +21,143 @@ import sys
# map add domain ip4-pfx <pfx> ip6-pfx ::/0 ip6-src <ip6-src> ea-bits-len 0 psid-offset 6 psid-len 6
# map add rule index <0> psid <psid> ip6-dst <ip6-dst>
-parser = argparse.ArgumentParser(description='MAP VPP configuration generator')
-parser.add_argument('-t', action="store", dest="mapmode")
+parser = argparse.ArgumentParser(description="MAP VPP configuration generator")
+parser.add_argument("-t", action="store", dest="mapmode")
args = parser.parse_args()
+
#
# 1:1 Shared IPv4 address, shared BR
#
def shared11br():
- ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
- ip6_dst = ipaddress.ip_network('bbbb::/32')
+ ip4_pfx = ipaddress.ip_network("20.0.0.0/16")
+ ip6_dst = ipaddress.ip_network("bbbb::/32")
psid_len = 6
for i in range(ip4_pfx.num_addresses):
- print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1",
- "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ print(
+ "map add domain ip4-pfx "
+ + str(ip4_pfx[i])
+ + "/32 ip6-pfx ::/0 ip6-shared-src cccc:bbbb::1",
+ "ea-bits-len 0 psid-offset 6 psid-len",
+ psid_len,
+ )
for psid in range(0x1 << psid_len):
- print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+ print(
+ "map add rule index",
+ i,
+ "psid",
+ psid,
+ "ip6-dst",
+ ip6_dst[(i * (0x1 << psid_len)) + psid],
+ )
#
# 1:1 Shared IPv4 address
#
def shared11():
- ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
- ip6_src = ipaddress.ip_network('cccc:bbbb::/64')
- ip6_dst = ipaddress.ip_network('bbbb::/32')
+ ip4_pfx = ipaddress.ip_network("20.0.0.0/16")
+ ip6_src = ipaddress.ip_network("cccc:bbbb::/64")
+ ip6_dst = ipaddress.ip_network("bbbb::/32")
psid_len = 6
for i in range(ip4_pfx.num_addresses):
- print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i],
- "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ print(
+ "map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src",
+ ip6_src[i],
+ "ea-bits-len 0 psid-offset 6 psid-len",
+ psid_len,
+ )
for psid in range(0x1 << psid_len):
- print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+ print(
+ "map add rule index",
+ i,
+ "psid",
+ psid,
+ "ip6-dst",
+ ip6_dst[(i * (0x1 << psid_len)) + psid],
+ )
+
#
# 1:1 Shared IPv4 address small
#
def smallshared11():
- ip4_pfx = ipaddress.ip_network('20.0.0.0/24')
- ip6_src = ipaddress.ip_network('cccc:bbbb::/64')
- ip6_dst = ipaddress.ip_network('bbbb::/32')
+ ip4_pfx = ipaddress.ip_network("20.0.0.0/24")
+ ip6_src = ipaddress.ip_network("cccc:bbbb::/64")
+ ip6_dst = ipaddress.ip_network("bbbb::/32")
psid_len = 6
for i in range(ip4_pfx.num_addresses):
- print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src", ip6_src[i],
- "ea-bits-len 0 psid-offset 6 psid-len", psid_len)
+ print(
+ "map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx ::/0 ip6-src",
+ ip6_src[i],
+ "ea-bits-len 0 psid-offset 6 psid-len",
+ psid_len,
+ )
for psid in range(0x1 << psid_len):
- print("map add rule index", i, "psid", psid, "ip6-dst", ip6_dst[(i * (0x1<<psid_len)) + psid])
+ print(
+ "map add rule index",
+ i,
+ "psid",
+ psid,
+ "ip6-dst",
+ ip6_dst[(i * (0x1 << psid_len)) + psid],
+ )
+
#
# 1:1 Full IPv4 address
#
def full11():
- ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
- ip6_src = ipaddress.ip_network('cccc:bbbb::/64')
- ip6_dst = ipaddress.ip_network('bbbb::/32')
+ ip4_pfx = ipaddress.ip_network("20.0.0.0/16")
+ ip6_src = ipaddress.ip_network("cccc:bbbb::/64")
+ ip6_dst = ipaddress.ip_network("bbbb::/32")
psid_len = 0
for i in range(ip4_pfx.num_addresses):
- print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-src", ip6_src[i],
- "ea-bits-len 0 psid-offset 0 psid-len 0")
+ print(
+ "map add domain ip4-pfx "
+ + str(ip4_pfx[i])
+ + "/32 ip6-pfx "
+ + str(ip6_dst[i])
+ + "/128 ip6-src",
+ ip6_src[i],
+ "ea-bits-len 0 psid-offset 0 psid-len 0",
+ )
+
+
def full11br():
- ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
- ip6_dst = ipaddress.ip_network('bbbb::/32')
+ ip4_pfx = ipaddress.ip_network("20.0.0.0/16")
+ ip6_dst = ipaddress.ip_network("bbbb::/32")
psid_len = 0
for i in range(ip4_pfx.num_addresses):
- print("map add domain ip4-pfx " + str(ip4_pfx[i]) + "/32 ip6-pfx " + str(ip6_dst[i]) + "/128 ip6-shared-src cccc:bbbb::1",
- "ea-bits-len 0 psid-offset 0 psid-len 0")
+ print(
+ "map add domain ip4-pfx "
+ + str(ip4_pfx[i])
+ + "/32 ip6-pfx "
+ + str(ip6_dst[i])
+ + "/128 ip6-shared-src cccc:bbbb::1",
+ "ea-bits-len 0 psid-offset 0 psid-len 0",
+ )
+
#
# Algorithmic mapping Shared IPv4 address
#
def algo():
- print("map add domain ip4-pfx 20.0.0.0/24 ip6-pfx bbbb::/32 ip6-src cccc:bbbb::1 ea-bits-len 16 psid-offset 6 psid-len 8")
- print("map add domain ip4-pfx 20.0.1.0/24 ip6-pfx bbbb:1::/32 ip6-src cccc:bbbb::2 ea-bits-len 8 psid-offset 0 psid-len 0")
+ print(
+ "map add domain ip4-pfx 20.0.0.0/24 ip6-pfx bbbb::/32 ip6-src cccc:bbbb::1 ea-bits-len 16 psid-offset 6 psid-len 8"
+ )
+ print(
+ "map add domain ip4-pfx 20.0.1.0/24 ip6-pfx bbbb:1::/32 ip6-src cccc:bbbb::2 ea-bits-len 8 psid-offset 0 psid-len 0"
+ )
+
#
# IP4 forwarding
#
def ip4():
- ip4_pfx = ipaddress.ip_network('20.0.0.0/16')
+ ip4_pfx = ipaddress.ip_network("20.0.0.0/16")
for i in range(ip4_pfx.num_addresses):
- print("ip route add " + str(ip4_pfx[i]) + "/32 via 172.16.0.2")
+ print("ip route add " + str(ip4_pfx[i]) + "/32 via 172.16.0.2")
globals()[args.mapmode]()
-
-
diff --git a/src/plugins/map/ip4_map.c b/src/plugins/map/ip4_map.c
index 1ab5cc2dc4f..652808e6d37 100644
--- a/src/plugins/map/ip4_map.c
+++ b/src/plugins/map/ip4_map.c
@@ -155,6 +155,7 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
map_main_t *mm = &map_main;
vlib_combined_counter_main_t *cm = mm->domain_counters;
u32 thread_index = vm->thread_index;
+ u32 *buffer0 = 0;
while (n_left_from > 0)
{
@@ -170,7 +171,6 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
ip6_header_t *ip6h0;
u32 next0 = IP4_MAP_NEXT_IP6_LOOKUP;
u32 map_domain_index0 = ~0;
- u32 *buffer0 = 0;
bool free_original_buffer0 = false;
u32 *frag_from0, frag_left0;
@@ -322,10 +322,10 @@ ip4_map (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
+ vec_free (buffer0);
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_map_feature, static) =
{
.arc_name = "ip4-unicast",
@@ -354,7 +354,6 @@ VLIB_REGISTER_NODE(ip4_map_node) = {
[IP4_MAP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/map/ip4_map_t.c b/src/plugins/map/ip4_map_t.c
index 8ae76f331f6..fe29af458a2 100644
--- a/src/plugins/map/ip4_map_t.c
+++ b/src/plugins/map/ip4_map_t.c
@@ -56,7 +56,6 @@ typedef enum
//This is used to pass information within the buffer data.
//Buffer structure being too small to contain big structures like this.
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip6_address_t daddr;
ip6_address_t saddr;
@@ -64,7 +63,6 @@ typedef CLIB_PACKED (struct {
//sizeof(ip6) + sizeof(ip_frag) - sizeof(ip4)
u8 unused[28];
}) ip4_mapt_pseudo_header_t;
-/* *INDENT-ON* */
typedef struct
{
@@ -684,7 +682,6 @@ ip4_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_map_t_feature, static) = {
.arc_name = "ip4-unicast",
.node_name = "ip4-map-t",
@@ -710,9 +707,7 @@ VLIB_REGISTER_NODE(ip4_map_t_fragmented_node) = {
[IP4_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip4_map_t_icmp_node) = {
.function = ip4_map_t_icmp,
.name = "ip4-map-t-icmp",
@@ -731,9 +726,7 @@ VLIB_REGISTER_NODE(ip4_map_t_icmp_node) = {
[IP4_MAPT_ICMP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip4_map_t_tcp_udp_node) = {
.function = ip4_map_t_tcp_udp,
.name = "ip4-map-t-tcp-udp",
@@ -752,9 +745,7 @@ VLIB_REGISTER_NODE(ip4_map_t_tcp_udp_node) = {
[IP4_MAPT_TCP_UDP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip4_map_t_node) = {
.function = ip4_map_t,
.name = "ip4-map-t",
@@ -774,7 +765,6 @@ VLIB_REGISTER_NODE(ip4_map_t_node) = {
[IP4_MAPT_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/map/ip6_map.c b/src/plugins/map/ip6_map.c
index 1193dda0a80..3d9b21dfcd9 100644
--- a/src/plugins/map/ip6_map.c
+++ b/src/plugins/map/ip6_map.c
@@ -803,7 +803,6 @@ ip6_map_icmp_relay (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip6_map_feature, static) =
{
.arc_name = "ip6-unicast",
@@ -836,9 +835,7 @@ VLIB_REGISTER_NODE(ip6_map_node) = {
[IP6_MAP_NEXT_ICMP] = "ip6-icmp-error",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip6_map_post_ip4_reass_node) = {
.function = ip6_map_post_ip4_reass,
.name = "ip6-map-post-ip4-reass",
@@ -854,9 +851,7 @@ VLIB_REGISTER_NODE(ip6_map_post_ip4_reass_node) = {
[IP6_MAP_POST_IP4_REASS_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = {
.function = ip6_map_icmp_relay,
.name = "ip6-map-icmp-relay",
@@ -871,7 +866,6 @@ VLIB_REGISTER_NODE(ip6_map_icmp_relay_node, static) = {
[IP6_ICMP_RELAY_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
clib_error_t *
ip6_map_init (vlib_main_t * vm)
diff --git a/src/plugins/map/ip6_map_t.c b/src/plugins/map/ip6_map_t.c
index 861c049b0f4..51853d619e6 100644
--- a/src/plugins/map/ip6_map_t.c
+++ b/src/plugins/map/ip6_map_t.c
@@ -529,7 +529,10 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
ip60 = vlib_buffer_get_current (p0);
d0 =
- ip6_map_get_domain (&ip60->dst_address,
+ /* Originally using the IPv6 dest for rule lookup, now source
+ * [dgeist] ip6_map_get_domain (&ip60->dst_address,
+ */
+ ip6_map_get_domain (&ip60->src_address,
&vnet_buffer (p0)->map_t.map_domain_index,
&error0);
if (!d0)
@@ -687,7 +690,6 @@ ip6_map_t (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip6_map_t_fragmented_node) = {
.function = ip6_map_t_fragmented,
.name = "ip6-map-t-fragmented",
@@ -707,9 +709,7 @@ VLIB_REGISTER_NODE(ip6_map_t_fragmented_node) = {
[IP6_MAPT_FRAGMENTED_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip6_map_t_icmp_node) = {
.function = ip6_map_t_icmp,
.name = "ip6-map-t-icmp",
@@ -729,9 +729,7 @@ VLIB_REGISTER_NODE(ip6_map_t_icmp_node) = {
[IP6_MAPT_ICMP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ip6_map_t_tcp_udp_node) = {
.function = ip6_map_t_tcp_udp,
.name = "ip6-map-t-tcp-udp",
@@ -751,9 +749,7 @@ VLIB_REGISTER_NODE(ip6_map_t_tcp_udp_node) = {
[IP6_MAPT_TCP_UDP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip6_map_t_feature, static) = {
.arc_name = "ip6-unicast",
.node_name = "ip6-map-t",
@@ -781,7 +777,6 @@ VLIB_REGISTER_NODE(ip6_map_t_node) = {
[IP6_MAPT_NEXT_ICMP] = "ip6-icmp-error",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/map/lpm.c b/src/plugins/map/lpm.c
index c0e5bad1417..a2fc3337167 100644
--- a/src/plugins/map/lpm.c
+++ b/src/plugins/map/lpm.c
@@ -28,7 +28,13 @@ masked_address32 (uint32_t addr, uint8_t len)
static uint64_t
masked_address64 (uint64_t addr, uint8_t len)
{
- return len == 64 ? addr : addr & ~(~0ull >> len);
+ /* This was originally causing non-64-bit masks to not match due to LSB vs
+ * MSB masking (0s at the head of the value) Probably needs some corner case
+ * checking in case my masking logic was off [dgeist]
+ *
+ * return len == 64 ? addr : addr & ~(~0ull >> len);
+ */
+ return len == 64 ? addr : addr & ((1ull << (len)) - 1);
}
static void
@@ -126,13 +132,25 @@ lpm_128_add (lpm_t *lpm, void *addr_v, u8 pfxlen, u32 value)
BVT(clib_bihash_kv) kv;
ip6_address_t *addr = addr_v;
- kv.key[0] = masked_address64(addr->as_u64[0], pfxlen > 64 ? 64 : pfxlen);
+ /* This is a quick hack. It works for pfxlen < 64 but needs validation for
+ * other [dgeist]
+ *
+ * kv.key[0] = masked_address64(addr->as_u64[0], pfxlen > 64 ? 64 : pfxlen);
+ */
+ kv.key[0] = masked_address64 (addr->as_u64[0], pfxlen > 64 ? 64 : 64);
kv.key[1] = masked_address64(addr->as_u64[1], pfxlen > 64 ? pfxlen - 64 : 0);
kv.key[2] = pfxlen;
kv.value = value;
BV(clib_bihash_add_del)(&lpm->bihash, &kv, 1);
lpm->prefix_length_refcount[pfxlen]++;
- lpm->prefix_lengths_bitmap = clib_bitmap_set (lpm->prefix_lengths_bitmap, 128 - pfxlen, 1);
+ /* Populating the lengths bitmap table with prefix of 48 instead of 80
+ * (128 - 48) [dgeist]
+ *
+ * lpm->prefix_lengths_bitmap = clib_bitmap_set (
+ * lpm->prefix_lengths_bitmap, 128 - pfxlen, 1);
+ */
+ lpm->prefix_lengths_bitmap = clib_bitmap_set (
+ lpm->prefix_lengths_bitmap, pfxlen > 64 ? 128 - pfxlen : pfxlen, 1);
}
static void
@@ -148,8 +166,8 @@ lpm_128_delete (lpm_t *lpm, void *addr_v, u8 pfxlen)
/* refcount accounting */
ASSERT (lpm->prefix_length_refcount[pfxlen] > 0);
if (--lpm->prefix_length_refcount[pfxlen] == 0) {
- lpm->prefix_lengths_bitmap = clib_bitmap_set (lpm->prefix_lengths_bitmap,
- 128 - pfxlen, 0);
+ lpm->prefix_lengths_bitmap =
+ clib_bitmap_set (lpm->prefix_lengths_bitmap, 128 - pfxlen, 0);
}
}
diff --git a/src/plugins/map/map.c b/src/plugins/map/map.c
index 938793c8947..3cffadd39e8 100644
--- a/src/plugins/map/map.c
+++ b/src/plugins/map/map.c
@@ -176,6 +176,10 @@ map_create_domain (ip4_address_t * ip4_prefix,
mm->ip6_src_prefix_tbl->add (mm->ip6_src_prefix_tbl, &d->ip6_src,
d->ip6_src_len, *map_domain_index);
+ /* Let's build a table with the MAP rule ip6 prefixes as well [dgeist] */
+ mm->ip6_prefix_tbl->add (mm->ip6_prefix_tbl, &d->ip6_prefix,
+ d->ip6_prefix_len, *map_domain_index);
+
/* Validate packet/byte counters */
map_domain_counter_lock (mm);
int i;
@@ -218,6 +222,9 @@ map_delete_domain (u32 map_domain_index)
d->ip4_prefix_len);
mm->ip6_src_prefix_tbl->delete (mm->ip6_src_prefix_tbl, &d->ip6_src,
d->ip6_src_len);
+ /* Addition to remove the new table [dgeist] */
+ mm->ip6_prefix_tbl->delete (mm->ip6_prefix_tbl, &d->ip6_prefix,
+ d->ip6_prefix_len);
/* Release user-assigned MAP domain name. */
map_free_extras (map_domain_index);
@@ -979,10 +986,8 @@ show_map_domain_command_fn (vlib_main_t * vm, unformat_input_t * input,
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
{
- /* *INDENT-OFF* */
pool_foreach (d, mm->domains)
{vlib_cli_output(vm, "%U", format_map_domain, d, counters);}
- /* *INDENT-ON* */
return 0;
}
@@ -1008,10 +1013,8 @@ show_map_domain_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (map_domain_index == ~0)
{
- /* *INDENT-OFF* */
pool_foreach (d, mm->domains)
{vlib_cli_output(vm, "%U", format_map_domain, d, counters);}
- /* *INDENT-ON* */
}
else
{
@@ -1062,7 +1065,6 @@ show_map_stats_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
- /* *INDENT-OFF* */
pool_foreach (d, mm->domains) {
if (d->rules) {
rulecount+= 0x1 << d->psid_length;
@@ -1071,7 +1073,6 @@ show_map_stats_command_fn (vlib_main_t * vm, unformat_input_t * input,
domains += sizeof(*d);
domaincount++;
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "MAP domains structure: %d\n", sizeof (map_domain_t));
vlib_cli_output (vm, "MAP domains: %d (%d bytes)\n", domaincount, domains);
@@ -1255,7 +1256,6 @@ done:
}
-/* *INDENT-OFF* */
/*?
* Set or copy the IP TOS/Traffic Class field
@@ -1312,7 +1312,7 @@ VLIB_CLI_COMMAND(map_pre_resolve_command, static) = {
/*?
* Enable or disable the MAP-E inbound security check
- * Specifiy if the inbound security check should be done on fragments
+ * Specify if the inbound security check should be done on fragments
*
* @cliexpar
* @cliexstart{map params security-check}
@@ -1333,9 +1333,8 @@ VLIB_CLI_COMMAND(map_security_check_command, static) = {
.function = map_security_check_command_fn,
};
-
/*?
- * Specifiy the IPv4 source address used for relayed ICMP error messages
+ * Specify the IPv4 source address used for relayed ICMP error messages
*
* @cliexpar
* @cliexstart{map params icmp source-address}
@@ -1470,7 +1469,6 @@ VLIB_PLUGIN_REGISTER() = {
.description = "Mapping of Address and Port (MAP)",
};
-/* *INDENT-ON* */
/*
* map_init
diff --git a/src/plugins/map/map.h b/src/plugins/map/map.h
index d874aa47b3a..86b8ec22df4 100644
--- a/src/plugins/map/map.h
+++ b/src/plugins/map/map.h
@@ -335,7 +335,11 @@ ip6_map_get_domain (ip6_address_t * addr, u32 * map_domain_index, u8 * error)
{
map_main_t *mm = &map_main;
u32 mdi =
- mm->ip6_src_prefix_tbl->lookup (mm->ip6_src_prefix_tbl, addr, 128);
+ /* This is the old src (ip6 destination) hash lookup [dgeist]
+ *
+ * mm->ip6_src_prefix_tbl->lookup (mm->ip6_src_prefix_tbl, addr, 128);
+ */
+ mm->ip6_prefix_tbl->lookup (mm->ip6_prefix_tbl, addr, 128);
if (mdi == ~0)
{
*error = MAP_ERROR_NO_DOMAIN;
diff --git a/src/plugins/map/map_api.c b/src/plugins/map/map_api.c
index f81216dddbd..1dbff4ca0d1 100644
--- a/src/plugins/map/map_api.c
+++ b/src/plugins/map/map_api.c
@@ -50,13 +50,11 @@ vl_api_map_add_domain_t_handler (vl_api_map_add_domain_t * mp)
mp->ip6_src.len, mp->ea_bits_len, mp->psid_offset,
mp->psid_length, &index, mp->mtu, flags, mp->tag);
- /* *INDENT-OFF* */
REPLY_MACRO2_END(VL_API_MAP_ADD_DOMAIN_REPLY,
({
rmp->index = index;
}));
- /* *INDENT-ON* */
}
static void
@@ -98,7 +96,6 @@ send_domain_details (u32 map_domain_index, vl_api_registration_t * rp,
vec_elt_at_index (mm->domain_extras, map_domain_index);
int tag_len = clib_min (ARRAY_LEN (rmp->tag), vec_len (de->tag) + 1);
- /* *INDENT-OFF* */
REPLY_MACRO_DETAILS4(VL_API_MAP_DOMAIN_DETAILS, rp, context,
({
rmp->domain_index = htonl (map_domain_index);
@@ -119,7 +116,6 @@ send_domain_details (u32 map_domain_index, vl_api_registration_t * rp,
memcpy (rmp->tag, de->tag, tag_len - 1);
rmp->tag[tag_len - 1] = '\0';
}));
- /* *INDENT-ON* */
}
static void
@@ -136,12 +132,10 @@ vl_api_map_domain_dump_t_handler (vl_api_map_domain_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach_index (i, mm->domains)
{
send_domain_details(i, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void
@@ -152,12 +146,10 @@ vl_api_map_domains_get_t_handler (vl_api_map_domains_get_t * mp)
i32 rv = 0;
- /* *INDENT-OFF* */
REPLY_AND_DETAILS_MACRO (VL_API_MAP_DOMAINS_GET_REPLY, mm->domains,
({
send_domain_details (cursor, rp, mp->context);
}));
- /* *INDENT-ON* */
}
static void
diff --git a/src/plugins/map/map_doc.md b/src/plugins/map/map_doc.md
deleted file mode 100644
index f3e2a56706d..00000000000
--- a/src/plugins/map/map_doc.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# VPP MAP and Lw4o6 implementation {#map_doc}
-
-This is a memo intended to contain documentation of the VPP MAP and Lw4o6 implementations.
-Everything that is not directly obvious should come here.
-
-
-
-## MAP-E Virtual Reassembly
-
-The MAP-E implementation supports handling of IPv4 fragments as well as IPv4-in-IPv6 inner and outer fragments. This is called virtual reassembly because the fragments are not actually reassembled. Instead, some meta-data are kept about the first fragment and reused for subsequent fragments.
-
-Fragment caching and handling is not always necessary. It is performed when:
-* An IPv4 fragment is received and the destination IPv4 address is shared.
-* An IPv6 packet is received with an inner IPv4 fragment, the IPv4 source address is shared, and 'security-check fragments' is on.
-* An IPv6 fragment is received.
-
-There are 3 dedicated nodes:
-* ip4-map-reass
-* ip6-map-ip4-reass
-* ip6-map-ip6-reass
-
-ip4-map sends all fragments to ip4-map-reass.
-ip6-map sends all inner-fragments to ip6-map-ip4-reass.
-ip6-map sends all outer-fragments to ip6-map-ip6-reass.
-
-IPv4 (resp. IPv6) virtual reassembly makes use of a hash table in order to store IPv4 (resp. IPv6) reassembly structures. The hash-key is based on the IPv4-src:IPv4-dst:Frag-ID:Protocol tuple (resp. IPv6-src:IPv6-dst:Frag-ID tuple, as the protocol is IPv4-in-IPv6). Therefore, each packet reassembly makes use of exactly one reassembly structure. When such a structure is allocated, it is timestamped with the current time. Finally, those structures are capable of storing a limited number of buffer indexes.
-
-An IPv4 (resp. IPv6) reassembly structure can cache up to MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY (resp. MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) buffers. Buffers are cached until the first fragment is received.
-
-#### Virtual Reassembly configuration
-
-IPv4 and IPv6 virtual reassembly support the following configuration:
- map params reassembly [ip4 | ip6] [lifetime <lifetime-ms>] [pool-size <pool-size>] [buffers <buffers>] [ht-ratio <ht-ratio>]
-
-lifetime:
- The time in milliseconds a reassembly structure is considered valid. The longer, the more reliable is reassembly, but the more likely it is to exhaust the pool of reassembly structures. IPv4 standard suggests a lifetime of 15 seconds. IPv6 specifies a lifetime of 60 seconds. Those values are not realistic for high-throughput cases.
-
-buffers:
- The upper limit of buffers that are allowed to be cached. It can be used to protect against fragmentation attacks which would aim to exhaust the global buffers pool.
-
-pool-size:
- The number of reassembly structures that can be allocated. As each structure can store a small fixed number of fragments, it also sets an upper-bound of 'pool-size * MAP_IPX_REASS_MAX_FRAGMENTS_PER_REASSEMBLY' buffers that can be cached in total.
-
-ht-ratio:
- The amount of buckets in the hash-table is pool-size * ht-ratio.
-
-
-Any time pool-size and ht-ratio is modified, the hash-table is destroyed and created again, which means all current state is lost.
-
-
-##### Additional considerations
-
-Reassembly at high rate is expensive in terms of buffers. There is a trade-off between the lifetime and number of allocated buffers. Reducing the lifetime helps, but at the cost of loosing state for fragments that are wide appart.
-
-Let:
-R be the packet rate at which fragments are received.
-F be the number of fragments per packet.
-
-Assuming the first fragment is always received last. We should have:
-buffers > lifetime * R / F * (F - 1)
-pool-size > lifetime * R/F
-
-This is a worst case. Receiving the first fragment earlier helps reducing the number of required buffers. Also, an optimization is implemented (MAP_IP6_REASS_COUNT_BYTES and MAP_IP4_REASS_COUNT_BYTES) which counts the number of transmitted bytes and remembers the total number of bytes which should be transmitted based on the last fragment, and therefore helps reducing 'pool-size'.
-
-But the formula shows that it is challenging to forward a significant amount of fragmented packets at high rates. For instance, with a lifetime of 1 second, 5Mpps packet rate would require buffering up to 2.5 millions fragments.
-
-If you want to do that, be prepared to configure a lot of fragments.
-
-
diff --git a/src/plugins/map/map_doc.rst b/src/plugins/map/map_doc.rst
new file mode 100644
index 00000000000..663e815d545
--- /dev/null
+++ b/src/plugins/map/map_doc.rst
@@ -0,0 +1,99 @@
+MAP and Lw4o6
+=============
+
+This is a memo intended to contain documentation of the VPP MAP and
+Lw4o6 implementations. Everything that is not directly obvious should
+come here.
+
+MAP-E Virtual Reassembly
+------------------------
+
+The MAP-E implementation supports handling of IPv4 fragments as well as
+IPv4-in-IPv6 inner and outer fragments. This is called virtual
+reassembly because the fragments are not actually reassembled. Instead,
+some meta-data are kept about the first fragment and reused for
+subsequent fragments.
+
+Fragment caching and handling is not always necessary. It is performed
+when: \* An IPv4 fragment is received and the destination IPv4 address
+is shared. \* An IPv6 packet is received with an inner IPv4 fragment,
+the IPv4 source address is shared, and ‘security-check fragments’ is on.
+\* An IPv6 fragment is received.
+
+There are 3 dedicated nodes: \* ip4-map-reass \* ip6-map-ip4-reass \*
+ip6-map-ip6-reass
+
+ip4-map sends all fragments to ip4-map-reass. ip6-map sends all
+inner-fragments to ip6-map-ip4-reass. ip6-map sends all outer-fragments
+to ip6-map-ip6-reass.
+
+IPv4 (resp. IPv6) virtual reassembly makes use of a hash table in order
+to store IPv4 (resp. IPv6) reassembly structures. The hash-key is based
+on the IPv4-src:IPv4-dst:Frag-ID:Protocol tuple (resp.
+IPv6-src:IPv6-dst:Frag-ID tuple, as the protocol is IPv4-in-IPv6).
+Therefore, each packet reassembly makes use of exactly one reassembly
+structure. When such a structure is allocated, it is timestamped with
+the current time. Finally, those structures are capable of storing a
+limited number of buffer indexes.
+
+An IPv4 (resp. IPv6) reassembly structure can cache up to
+MAP_IP4_REASS_MAX_FRAGMENTS_PER_REASSEMBLY (resp.
+MAP_IP6_REASS_MAX_FRAGMENTS_PER_REASSEMBLY) buffers. Buffers are cached
+until the first fragment is received.
+
+Virtual Reassembly configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+IPv4 and IPv6 virtual reassembly support the following configuration:
+map params reassembly [ip4 \| ip6] [lifetime ] [pool-size ] [buffers ]
+[ht-ratio ]
+
+lifetime: The time in milliseconds a reassembly structure is considered
+valid. The longer, the more reliable is reassembly, but the more likely
+it is to exhaust the pool of reassembly structures. IPv4 standard
+suggests a lifetime of 15 seconds. IPv6 specifies a lifetime of 60
+seconds. Those values are not realistic for high-throughput cases.
+
+buffers: The upper limit of buffers that are allowed to be cached. It
+can be used to protect against fragmentation attacks which would aim to
+exhaust the global buffers pool.
+
+pool-size: The number of reassembly structures that can be allocated. As
+each structure can store a small fixed number of fragments, it also sets
+an upper-bound of ‘pool-size \*
+MAP_IPX_REASS_MAX_FRAGMENTS_PER_REASSEMBLY’ buffers that can be cached
+in total.
+
+ht-ratio: The amount of buckets in the hash-table is pool-size \*
+ht-ratio.
+
+Any time pool-size and ht-ratio is modified, the hash-table is destroyed
+and created again, which means all current state is lost.
+
+Additional considerations
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Reassembly at high rate is expensive in terms of buffers. There is a
+trade-off between the lifetime and number of allocated buffers. Reducing
+the lifetime helps, but at the cost of loosing state for fragments that
+are wide apart.
+
+Let: R be the packet rate at which fragments are received. F be the
+number of fragments per packet.
+
+Assuming the first fragment is always received last. We should have:
+buffers > lifetime \* R / F \* (F - 1) pool-size > lifetime \* R/F
+
+This is a worst case. Receiving the first fragment earlier helps
+reducing the number of required buffers. Also, an optimization is
+implemented (MAP_IP6_REASS_COUNT_BYTES and MAP_IP4_REASS_COUNT_BYTES)
+which counts the number of transmitted bytes and remembers the total
+number of bytes which should be transmitted based on the last fragment,
+and therefore helps reducing ‘pool-size’.
+
+But the formula shows that it is challenging to forward a significant
+amount of fragmented packets at high rates. For instance, with a
+lifetime of 1 second, 5Mpps packet rate would require buffering up to
+2.5 millions fragments.
+
+If you want to do that, be prepared to configure a lot of fragments.
diff --git a/src/plugins/marvell/README.md b/src/plugins/marvell/README.md
deleted file mode 100644
index 3f3c27e3618..00000000000
--- a/src/plugins/marvell/README.md
+++ /dev/null
@@ -1,65 +0,0 @@
-# Marvell device plugin for VPP {#marvell_plugin_doc}
-
-##Overview
-This plugins provides native device support for Marvell PP2 network device, by use of Marvell Usermode SDK ([MUSDK][1]).
-Code is developed and tested on [MACCHIATObin][2] board.
-
-##Prerequisites
-Plugins depends on installed MUSDK and Marvell provided linux [kernel][3] with MUSDK provided kernel patches (see `patches/linux` in musdk repo and relevant documentation.
-Kernel version used: **4.14.22 armada-18.09.3**
-MUSDK version used: **armada-18.09.3**
-Following kernel modules from MUSDK must be loaded for plugin to work:
-* `musdk_cma.ko`
-* `mv_pp_uio.ko`
-
-##Musdk 18.09.3 compilation steps
-
-```
-./bootstrap
-./configure --prefix=/opt/vpp/external/aarch64/ CFLAGS="-Wno-error=unused-result -g -fPIC" --enable-shared=no
-sed -i -e 's/marvell,mv-pp-uio/generic-uio/' modules/pp2/mv_pp_uio.c
-sed -i -e 's/O_CREAT/O_CREAT, S_IRUSR | S_IWUSR/' src/lib/file_utils.c
-make
-sudo make install
-```
-
-## Usage
-### Interface Cration
-Interfaces are dynamically created with following CLI:
-```
-create interface marvell pp2 name eth0
-set interface state mv-ppio-0/0 up
-```
-
-Where `eth0` is linux interface name and `mv-ppio-X/Y` is VPP interface name where X is PP2 device ID and Y is PPIO ID
-Interface needs to be assigned to MUSDK in FDT configuration and linux interface state must be up.
-
-### Interface Deletion
-Interface can be deleted with following CLI:
-```
-delete interface marvell pp2 <interface name>
-```
-
-
-### Interface Statistics
-Interface statistics can be displayed with `sh hardware-interface mv-ppio0/0`
-command.
-
-### Interaction with DPDK plugin
-This plugin doesn't have any dependency on DPDK or DPDK plugin but it can
-work with DPDK plugin enabled or disabled. It is observed that performace is
-better around 30% when DPDK plugin is disabled, as DPDK plugin registers
-own buffer manager, which needs to deal with additional metadata in each packet.
-
-DPKD plugin can be disabled by adding following config to the startup.conf.
-
-```
-plugins {
- dpdk_plugin.so { disable }
-}
-```
-
-
-[1]: https://github.com/MarvellEmbeddedProcessors/musdk-marvell
-[2]: http://macchiatobin.net
-[3]: https://github.com/MarvellEmbeddedProcessors/linux-marvell
diff --git a/src/plugins/marvell/README.rst b/src/plugins/marvell/README.rst
new file mode 100644
index 00000000000..19cf1c49d0e
--- /dev/null
+++ b/src/plugins/marvell/README.rst
@@ -0,0 +1,85 @@
+Marvell device plugin
+=====================
+
+Overview
+--------
+
+This plugins provides native device support for Marvell PP2 network
+device, by use of Marvell Usermode SDK
+(`MUSDK <https://github.com/MarvellEmbeddedProcessors/musdk-marvell>`__).
+Code is developed and tested on
+`MACCHIATObin <http://macchiatobin.net>`__ board.
+
+Prerequisites
+-------------
+
+Plugins depends on installed MUSDK and Marvell provided linux
+`kernel <https://github.com/MarvellEmbeddedProcessors/linux-marvell>`__
+with MUSDK provided kernel patches (see ``patches/linux`` in musdk repo
+and relevant documentation. Kernel version used: **4.14.22
+armada-18.09.3** MUSDK version used: **armada-18.09.3** Following kernel
+modules from MUSDK must be loaded for plugin to work: \*
+``musdk_cma.ko`` \* ``mv_pp_uio.ko``
+
+Musdk 18.09.3 compilation steps
+-------------------------------
+
+::
+
+ ./bootstrap
+ ./configure --prefix=/opt/vpp/external/aarch64/ CFLAGS="-Wno-error=unused-result -g -fPIC" --enable-shared=no
+ sed -i -e 's/marvell,mv-pp-uio/generic-uio/' modules/pp2/mv_pp_uio.c
+ sed -i -e 's/O_CREAT/O_CREAT, S_IRUSR | S_IWUSR/' src/lib/file_utils.c
+ make
+ sudo make install
+
+Usage
+-----
+
+Interface Creation
+~~~~~~~~~~~~~~~~~~
+
+Interfaces are dynamically created with following CLI:
+
+::
+
+ create interface marvell pp2 name eth0
+ set interface state mv-ppio-0/0 up
+
+Where ``eth0`` is linux interface name and ``mv-ppio-X/Y`` is VPP
+interface name where X is PP2 device ID and Y is PPIO ID Interface needs
+to be assigned to MUSDK in FDT configuration and linux interface state
+must be up.
+
+Interface Deletion
+~~~~~~~~~~~~~~~~~~
+
+Interface can be deleted with following CLI:
+
+::
+
+ delete interface marvell pp2 <interface name>
+
+Interface Statistics
+~~~~~~~~~~~~~~~~~~~~
+
+Interface statistics can be displayed with
+``sh hardware-interface mv-ppio0/0`` command.
+
+Interaction with DPDK plugin
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This plugin doesn’t have any dependency on DPDK or DPDK plugin but it
+can work with DPDK plugin enabled or disabled. It is observed that
+performance is better around 30% when DPDK plugin is disabled, as DPDK
+plugin registers own buffer manager, which needs to deal with additional
+metadata in each packet.
+
+DPKD plugin can be disabled by adding following config to the
+startup.conf.
+
+::
+
+ plugins {
+ dpdk_plugin.so { disable }
+ }
diff --git a/src/plugins/marvell/plugin.c b/src/plugins/marvell/plugin.c
index fe673092a5e..ed90776ba95 100644
--- a/src/plugins/marvell/plugin.c
+++ b/src/plugins/marvell/plugin.c
@@ -19,12 +19,10 @@
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Marvell PP2 Device Driver",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/marvell/pp2/cli.c b/src/plugins/marvell/pp2/cli.c
index 28ef35b2b24..f4ecb1873c9 100644
--- a/src/plugins/marvell/pp2/cli.c
+++ b/src/plugins/marvell/pp2/cli.c
@@ -59,13 +59,11 @@ mrvl_pp2_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (mrvl_pp2_create_command, static) = {
.path = "create interface marvell pp2",
.short_help = "create interface marvell pp2 [name <ifname>] [rx-queue-size slots] [tx-queue-size slots]",
.function = mrvl_pp2_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
mrvl_pp2_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -110,14 +108,12 @@ mrvl_pp2_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (mrvl_pp2_delete_command, static) = {
.path = "delete interface marvell pp2",
.short_help = "delete interface marvell pp2 "
"{<interface> | sw_if_index <sw_idx>}",
.function = mrvl_pp2_delete_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
mrvl_pp2_cli_init (vlib_main_t * vm)
diff --git a/src/plugins/marvell/pp2/format.c b/src/plugins/marvell/pp2/format.c
index 838f5169b05..877010ea561 100644
--- a/src/plugins/marvell/pp2/format.c
+++ b/src/plugins/marvell/pp2/format.c
@@ -22,7 +22,6 @@
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
-#include <vppinfra/linux/syscall.h>
#include <vnet/plugin/plugin.h>
#include <marvell/pp2/pp2.h>
diff --git a/src/plugins/marvell/pp2/input.c b/src/plugins/marvell/pp2/input.c
index 44f01355e39..2545f91becb 100644
--- a/src/plugins/marvell/pp2/input.c
+++ b/src/plugins/marvell/pp2/input.c
@@ -218,8 +218,8 @@ mrvl_pp2_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
{
n_rx_bytes += mrvl_pp2_next_from_desc (node, d, b0, &next0);
n_rx_bytes += mrvl_pp2_next_from_desc (node, d + 1, b1, &next1);
- vnet_feature_start_device_input_x2 (ppif->sw_if_index, &next0,
- &next1, b0, b1);
+ vnet_feature_start_device_input (ppif->sw_if_index, &next0, b0);
+ vnet_feature_start_device_input (ppif->sw_if_index, &next1, b1);
}
else
{
@@ -262,8 +262,7 @@ mrvl_pp2_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (PREDICT_TRUE (ppif->per_interface_next_index == ~0))
{
n_rx_bytes += mrvl_pp2_next_from_desc (node, d, b0, &next0);
- vnet_feature_start_device_input_x1 (ppif->sw_if_index, &next0,
- b0);
+ vnet_feature_start_device_input (ppif->sw_if_index, &next0, b0);
}
else
{
@@ -370,7 +369,6 @@ mrvl_pp2_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return n_rx;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (mrvl_pp2_input_node) = {
.function = mrvl_pp2_input_fn,
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
@@ -383,7 +381,6 @@ VLIB_REGISTER_NODE (mrvl_pp2_input_node) = {
.error_strings = mrvl_pp2_input_error_strings,
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/marvell/pp2/pp2.c b/src/plugins/marvell/pp2/pp2.c
index dfe5c157018..030ab9b4496 100644
--- a/src/plugins/marvell/pp2/pp2.c
+++ b/src/plugins/marvell/pp2/pp2.c
@@ -22,7 +22,6 @@
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
-#include <vppinfra/linux/syscall.h>
#include <vnet/plugin/plugin.h>
#include <marvell/pp2/pp2.h>
#include <vnet/interface/rx_queue_funcs.h>
@@ -130,7 +129,6 @@ mrvl_pp2_delete_if (mrvl_pp2_if_t * ppif)
pp2_ppio_deinit (ppif->ppio);
}
- /* *INDENT-OFF* */
/* free buffers hanging in the tx ring */
vec_foreach (outq, ppif->outqs)
{
@@ -163,7 +161,6 @@ mrvl_pp2_delete_if (mrvl_pp2_if_t * ppif)
pp2_bpool_deinit (inq->bpool);
}
vec_free (ppif->inqs);
- /* *INDENT-ON* */
pool_put (ppm->interfaces, ppif);
@@ -178,6 +175,7 @@ mrvl_pp2_create_if (mrvl_pp2_create_if_args_t * args)
vlib_main_t *vm = vlib_get_main ();
vnet_main_t *vnm = vnet_get_main ();
vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vnet_eth_interface_registration_t eir = {};
mrvl_pp2_main_t *ppm = &mrvl_pp2_main;
struct pp2_bpool_params bpool_params = { 0 };
struct pp2_ppio_params ppio_params = { 0 };
@@ -282,16 +280,11 @@ mrvl_pp2_create_if (mrvl_pp2_create_if_args_t * args)
goto error;
}
- args->error = ethernet_register_interface (vnm, mrvl_pp2_device_class.index,
- ppif->dev_instance,
- mac_addr,
- &ppif->hw_if_index,
- mrvl_pp2_eth_flag_change);
- if (args->error)
- {
- args->rv = VNET_API_ERROR_INVALID_REGISTRATION;
- goto error;
- }
+ eir.dev_class_index = mrvl_pp2_device_class.index;
+ eir.dev_instance = ppif->dev_instance;
+ eir.address = mac_addr;
+ eir.cb.flag_change = mrvl_pp2_eth_flag_change;
+ ppif->hw_if_index = vnet_eth_register_interface (vnm, &eir);
sw = vnet_get_hw_sw_interface (vnm, ppif->hw_if_index);
ppif->sw_if_index = sw->sw_if_index;
@@ -380,7 +373,6 @@ static char *mrvl_pp2_tx_func_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (mrvl_pp2_device_class,) =
{
.name = "Marvell PPv2 interface",
@@ -393,7 +385,6 @@ VNET_DEVICE_CLASS (mrvl_pp2_device_class,) =
.clear_counters = mrvl_pp2_clear_interface_counters,
.rx_redirect_to_node = mrvl_pp2_set_interface_next_node,
};
-/* *INDENT-ON* */
static clib_error_t *
mrvl_pp2_init (vlib_main_t * vm)
diff --git a/src/plugins/marvell/pp2/pp2_api.c b/src/plugins/marvell/pp2/pp2_api.c
index a3672c442d4..c1f3a9e1d1d 100644
--- a/src/plugins/marvell/pp2/pp2_api.c
+++ b/src/plugins/marvell/pp2/pp2_api.c
@@ -28,6 +28,7 @@
#include <marvell/pp2/pp2.api_enum.h>
#include <marvell/pp2/pp2.api_types.h>
+#define REPLY_MSG_ID_BASE (pp2->msg_id_base)
#include <vlibapi/api_helper_macros.h>
static void
@@ -48,12 +49,8 @@ vl_api_mrvl_pp2_create_t_handler (vl_api_mrvl_pp2_create_t * mp)
{
clib_error_free (args.error);
}
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_MRVL_PP2_CREATE_REPLY + pp2->msg_id_base,
- ({
- rmp->sw_if_index = ntohl (args.sw_if_index);
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO2 (VL_API_MRVL_PP2_CREATE_REPLY,
+ ({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
}
static void
@@ -78,7 +75,7 @@ vl_api_mrvl_pp2_delete_t_handler (vl_api_mrvl_pp2_delete_t * mp)
mrvl_pp2_delete_if (dif);
reply:
- REPLY_MACRO (VL_API_MRVL_PP2_DELETE_REPLY + pp2->msg_id_base);
+ REPLY_MACRO (VL_API_MRVL_PP2_DELETE_REPLY);
}
#include <marvell/pp2/pp2.api.c>
diff --git a/src/plugins/mdata/mdata.c b/src/plugins/mdata/mdata.c
index 69622960c39..c45b1e38910 100644
--- a/src/plugins/mdata/mdata.c
+++ b/src/plugins/mdata/mdata.c
@@ -203,25 +203,23 @@ mdata_enable_disable_command_fn (vlib_main_t * vm,
/*?
* This command enables or disables buffer metadata change tracking
*
- *@cliexpar
+ * @cliexpar
* To enable buffer metadata change tracking:
- *@cliexstart{buffer metadata tracking on}
+ * @cliexstart{buffer metadata tracking on}
* Tracking enabled
- *@cliexend
+ * @cliexend
*
- *@cliexstart{buffer metadata tracking off}
+ * @cliexstart{buffer metadata tracking off}
* Tracking disabled
- *@cliexend
+ * @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (mdata_enable_disable_command, static) =
{
.path = "buffer metadata tracking",
.short_help = "buffer metadata tracking [on][off]",
.function = mdata_enable_disable_command_fn,
};
-/* *INDENT-ON* */
/* API message handler */
static void vl_api_mdata_enable_disable_t_handler
@@ -256,13 +254,11 @@ mdata_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (mdata_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Buffer metadata change tracker."
};
-/* *INDENT-ON* */
#define foreach_primary_metadata_field \
@@ -350,15 +346,12 @@ _(tcp.data_len) \
_(tcp.flags) \
_(snat.flags)
-#define foreach_opaque2_metadata_field \
-_(qos.bits) \
-_(qos.source) \
-_(loop_counter) \
-_(gbp.flags) \
-_(gbp.sclass) \
-_(gso_size) \
-_(gso_l4_hdr_sz) \
-_(pg_replay_timestamp)
+#define foreach_opaque2_metadata_field \
+ _ (qos.bits) \
+ _ (qos.source) \
+ _ (loop_counter) \
+ _ (gso_size) \
+ _ (gso_l4_hdr_sz)
static u8 *
format_buffer_metadata_changes (u8 * s, va_list * args)
@@ -472,23 +465,21 @@ show_metadata_command_fn (vlib_main_t * vm,
/*?
* This command displays buffer metadata change information
- *@cliexpar
+ * @cliexpar
* How to display buffer metadata change information
- *@cliexstart{show buffer metadata}
+ * @cliexstart{show buffer metadata}
* ethernet-input: current_data current_length flags error
* vnet_buffer_t: l2_hdr_offset l3_hdr_offset
* vnet_buffer2_t: no changes
- *@cliexend
+ * @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_metadata_command, static) =
{
.path = "show buffer metadata",
.short_help = "show buffer metadata",
.function = show_metadata_command_fn,
};
-/* *INDENT-OFF* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/mdata/mdata_doc.md b/src/plugins/mdata/mdata_doc.md
deleted file mode 100644
index cbbfb012183..00000000000
--- a/src/plugins/mdata/mdata_doc.md
+++ /dev/null
@@ -1,24 +0,0 @@
-# Buffer metadata change tracker {#mdata_doc}
-
-## Introduction
-
-The mdata plugin uses the vlib main loop "before" performance counter
-hook to snapshoot buffer metadata before calling the node dispatch
-function. Similarly, the plugin uses the main loop "after" hook to
-compare a vectors' worth of buffer metadata after the fact.
-
-The comparison function is a simple octet-by-octet A != B check. We
-accumulate changed octets per-node across the entire run, using a
-single spinlock-protected accumulator.
-
-The "show buffer metadata" command produces a report of all fields
-whose values are changed by nodes visited during a given run.
-
-Since many fields in the vnet_buffer_opaque_t are union members,
-it may appear that a certain node changes numerous fields. The entire
-point of the exercise is to warn developers that if a packet visits
-node N, data placed into opaque union field F *will* be affected.
-
-One should never assume much about buffer metadata immutability across
-arbitrary subgraphs. This tool generates accurate reports, to the
-extent that one exercises the required subgraph trajectories.
diff --git a/src/plugins/mdata/mdata_doc.rst b/src/plugins/mdata/mdata_doc.rst
new file mode 100644
index 00000000000..95746bd3d0e
--- /dev/null
+++ b/src/plugins/mdata/mdata_doc.rst
@@ -0,0 +1,26 @@
+Buffer metadata change tracker
+==============================
+
+Introduction
+------------
+
+The mdata plugin uses the vlib main loop “before” performance counter
+hook to snapshoot buffer metadata before calling the node dispatch
+function. Similarly, the plugin uses the main loop “after” hook to
+compare a vectors’ worth of buffer metadata after the fact.
+
+The comparison function is a simple octet-by-octet A != B check. We
+accumulate changed octets per-node across the entire run, using a single
+spinlock-protected accumulator.
+
+The “show buffer metadata” command produces a report of all fields whose
+values are changed by nodes visited during a given run.
+
+Since many fields in the vnet_buffer_opaque_t are union members, it may
+appear that a certain node changes numerous fields. The entire point of
+the exercise is to warn developers that if a packet visits node N, data
+placed into opaque union field F *will* be affected.
+
+One should never assume much about buffer metadata immutability across
+arbitrary subgraphs. This tool generates accurate reports, to the extent
+that one exercises the required subgraph trajectories.
diff --git a/src/plugins/memif/CMakeLists.txt b/src/plugins/memif/CMakeLists.txt
index b86d30adb97..4bbf6ba39db 100644
--- a/src/plugins/memif/CMakeLists.txt
+++ b/src/plugins/memif/CMakeLists.txt
@@ -33,3 +33,5 @@ add_vpp_plugin(memif
INSTALL_HEADERS
memif.h
)
+
+add_compile_definitions(MEMIF_CACHELINE_SIZE=${VPP_CACHE_LINE_SIZE})
diff --git a/src/plugins/memif/cli.c b/src/plugins/memif/cli.c
index b313e9737b3..c2ed63747fa 100644
--- a/src/plugins/memif/cli.c
+++ b/src/plugins/memif/cli.c
@@ -33,7 +33,7 @@ memif_socket_filename_create_command_fn (vlib_main_t * vm,
vlib_cli_command_t * cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
- int r;
+ clib_error_t *err;
u32 socket_id;
u8 *socket_filename;
@@ -53,6 +53,7 @@ memif_socket_filename_create_command_fn (vlib_main_t * vm,
else
{
vec_free (socket_filename);
+ unformat_free (line_input);
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
}
@@ -72,37 +73,18 @@ memif_socket_filename_create_command_fn (vlib_main_t * vm,
return clib_error_return (0, "Invalid socket filename");
}
- r = memif_socket_filename_add_del (1, socket_id, socket_filename);
+ err = memif_socket_filename_add_del (1, socket_id, (char *) socket_filename);
vec_free (socket_filename);
- if (r < 0)
- {
- switch (r)
- {
- case VNET_API_ERROR_INVALID_ARGUMENT:
- return clib_error_return (0, "Invalid argument");
- case VNET_API_ERROR_SYSCALL_ERROR_1:
- return clib_error_return (0, "Syscall error 1");
- case VNET_API_ERROR_ENTRY_ALREADY_EXISTS:
- return clib_error_return (0, "Already exists");
- case VNET_API_ERROR_UNEXPECTED_INTF_STATE:
- return clib_error_return (0, "Interface still in use");
- default:
- return clib_error_return (0, "Unknown error");
- }
- }
-
- return 0;
+ return err;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (memif_socket_filename_create_command, static) = {
.path = "create memif socket",
.short_help = "create memif socket [id <id>] [filename <path>]",
.function = memif_socket_filename_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
memif_socket_filename_delete_command_fn (vlib_main_t * vm,
@@ -110,7 +92,6 @@ memif_socket_filename_delete_command_fn (vlib_main_t * vm,
vlib_cli_command_t * cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
- int r;
u32 socket_id;
/* Get a line of input. */
@@ -125,6 +106,7 @@ memif_socket_filename_delete_command_fn (vlib_main_t * vm,
;
else
{
+ unformat_free (line_input);
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
}
@@ -137,42 +119,21 @@ memif_socket_filename_delete_command_fn (vlib_main_t * vm,
return clib_error_return (0, "Invalid socket id");
}
- r = memif_socket_filename_add_del (0, socket_id, 0);
-
- if (r < 0)
- {
- switch (r)
- {
- case VNET_API_ERROR_INVALID_ARGUMENT:
- return clib_error_return (0, "Invalid argument");
- case VNET_API_ERROR_SYSCALL_ERROR_1:
- return clib_error_return (0, "Syscall error 1");
- case VNET_API_ERROR_ENTRY_ALREADY_EXISTS:
- return clib_error_return (0, "Already exists");
- case VNET_API_ERROR_UNEXPECTED_INTF_STATE:
- return clib_error_return (0, "Interface still in use");
- default:
- return clib_error_return (0, "Unknown error");
- }
- }
-
- return 0;
+ return memif_socket_filename_add_del (0, socket_id, 0);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (memif_socket_filename_delete_command, static) = {
.path = "delete memif socket",
.short_help = "delete memif socket [id <id>]",
.function = memif_socket_filename_delete_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
- int r;
+ clib_error_t *err;
u32 ring_size = MEMIF_DEFAULT_RING_SIZE;
memif_create_if_args_t args = { 0 };
args.buffer_size = MEMIF_DEFAULT_BUFFER_SIZE;
@@ -207,14 +168,19 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
args.is_master = 0;
else if (unformat (line_input, "no-zero-copy"))
args.is_zero_copy = 0;
+ else if (unformat (line_input, "use-dma"))
+ args.use_dma = 1;
else if (unformat (line_input, "mode ip"))
args.mode = MEMIF_INTERFACE_MODE_IP;
else if (unformat (line_input, "hw-addr %U",
unformat_ethernet_address, args.hw_addr))
args.hw_addr_set = 1;
else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ {
+ unformat_free (line_input);
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
}
unformat_free (line_input);
@@ -234,27 +200,13 @@ memif_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
args.rx_queues = rx_queues;
args.tx_queues = tx_queues;
- r = memif_create_if (vm, &args);
+ err = memif_create_if (vm, &args);
vec_free (args.secret);
- if (r <= VNET_API_ERROR_SYSCALL_ERROR_1
- && r >= VNET_API_ERROR_SYSCALL_ERROR_10)
- return clib_error_return (0, "%s (errno %d)", strerror (errno), errno);
-
- if (r == VNET_API_ERROR_INVALID_ARGUMENT)
- return clib_error_return (0, "Invalid argument");
-
- if (r == VNET_API_ERROR_INVALID_INTERFACE)
- return clib_error_return (0, "Invalid interface name");
-
- if (r == VNET_API_ERROR_SUBIF_ALREADY_EXISTS)
- return clib_error_return (0, "Interface with same id already exists");
-
- return 0;
+ return err;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (memif_create_command, static) = {
.path = "create interface memif",
.short_help = "create interface memif [id <id>] [socket-id <socket-id>] "
@@ -264,7 +216,6 @@ VLIB_CLI_COMMAND (memif_create_command, static) = {
"[mode ip] [secret <string>]",
.function = memif_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
memif_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -289,8 +240,11 @@ memif_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
vnm, &sw_if_index))
;
else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ {
+ unformat_free (line_input);
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
}
unformat_free (line_input);
@@ -308,13 +262,11 @@ memif_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (memif_delete_command, static) = {
.path = "delete interface memif",
.short_help = "delete interface memif {<interface> | sw_if_index <sw_idx>}",
.function = memif_delete_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_memif_if_flags (u8 * s, va_list * args)
@@ -378,23 +330,22 @@ format_memif_descriptor (u8 * s, va_list * args)
if (ring)
{
s = format (s, "%Udescriptor table:\n", format_white_space, indent);
- s =
- format (s,
- "%Uid flags len address offset user address\n",
- format_white_space, indent);
- s =
- format (s,
- "%U===== ===== ======== ================== ====== ==================\n",
- format_white_space, indent);
+ s = format (s,
+ "%Uid flags region len address offset "
+ " user address\n",
+ format_white_space, indent);
+ s = format (s,
+ "%U===== ===== ====== ======== ================== "
+ "========== ==================\n",
+ format_white_space, indent);
for (slot = 0; slot < ring_size; slot++)
{
- s = format (s, "%U%-5d %-5d %-7d 0x%016lx %-6d 0x%016lx\n",
- format_white_space, indent, slot,
- ring->desc[slot].flags,
- ring->desc[slot].length,
+ s = format (s, "%U%-5d %-5d %-6d %-7d 0x%016lx %-10d 0x%016lx\n",
+ format_white_space, indent, slot, ring->desc[slot].flags,
+ ring->desc[slot].region, ring->desc[slot].length,
mif->regions[ring->desc[slot].region].shm,
- ring->desc[slot].offset, memif_get_buffer (mif, ring,
- slot));
+ ring->desc[slot].offset,
+ memif_get_buffer (mif, ring, slot));
}
s = format (s, "\n");
}
@@ -437,7 +388,6 @@ memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "sockets\n");
vlib_cli_output (vm, " %-3s %-11s %s\n", "id", "listener", "filename");
- /* *INDENT-OFF* */
hash_foreach (sock_id, msf_idx, mm->socket_file_index_by_sock_id,
({
memif_socket_file_t *msf;
@@ -453,17 +403,14 @@ memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output(vm, " %-3u %-11v %s\n", sock_id, s, filename);
vec_reset_length (s);
}));
- /* *INDENT-ON* */
vec_free (s);
vlib_cli_output (vm, "\n");
if (vec_len (hw_if_indices) == 0)
{
- /* *INDENT-OFF* */
pool_foreach (mif, mm->interfaces)
vec_add1 (hw_if_indices, mif->hw_if_index);
- /* *INDENT-ON* */
}
for (hw_if_index = 0; hw_if_index < vec_len (hw_if_indices); hw_if_index++)
@@ -498,7 +445,6 @@ memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, " remote-disc-reason \"%s\"",
mif->remote_disc_string);
- /* *INDENT-OFF* */
vec_foreach_index (i, mif->regions)
{
mr = vec_elt_at_index (mif->regions, i);
@@ -519,20 +465,17 @@ memif_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (show_descr)
vlib_cli_output (vm, " %U", format_memif_descriptor, mif, mq);
}
- /* *INDENT-ON* */
}
done:
vec_free (hw_if_indices);
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (memif_show_command, static) = {
.path = "show memif",
.short_help = "show memif [<interface>] [descriptors]",
.function = memif_show_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
memif_cli_init (vlib_main_t * vm)
diff --git a/src/plugins/memif/device.c b/src/plugins/memif/device.c
index fc66420a6ad..017a001168b 100644
--- a/src/plugins/memif/device.c
+++ b/src/plugins/memif/device.c
@@ -194,8 +194,8 @@ retry:
else
{
/* we need to rollback vectors before bailing out */
- _vec_len (ptd->buffers) = saved_ptd_buffers_len;
- _vec_len (ptd->copy_ops) = saved_ptd_copy_ops_len;
+ vec_set_len (ptd->buffers, saved_ptd_buffers_len);
+ vec_set_len (ptd->copy_ops, saved_ptd_copy_ops_len);
vlib_error_count (vm, node->node_index,
MEMIF_TX_ERROR_ROLLBACK, 1);
slot = saved_slot;
@@ -369,6 +369,270 @@ no_free_slots:
return n_left;
}
+CLIB_MARCH_FN (memif_tx_dma_completion_cb, void, vlib_main_t *vm,
+ vlib_dma_batch_t *b)
+{
+ memif_main_t *mm = &memif_main;
+ memif_if_t *mif = vec_elt_at_index (mm->interfaces, b->cookie >> 16);
+ memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, b->cookie & 0xffff);
+ memif_dma_info_t *dma_info = mq->dma_info + mq->dma_info_head;
+ memif_per_thread_data_t *ptd = &dma_info->data;
+
+ vlib_buffer_free (vm, ptd->buffers, vec_len (ptd->buffers));
+
+ dma_info->finished = 1;
+ vec_reset_length (ptd->buffers);
+ vec_reset_length (ptd->copy_ops);
+
+ __atomic_store_n (&mq->ring->tail, dma_info->dma_tail, __ATOMIC_RELEASE);
+
+ mq->dma_info_head++;
+ if (mq->dma_info_head == mq->dma_info_size)
+ mq->dma_info_head = 0;
+ mq->dma_info_full = 0;
+}
+
+#ifndef CLIB_MARCH_VARIANT
+void
+memif_tx_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b)
+{
+ return CLIB_MARCH_FN_SELECT (memif_tx_dma_completion_cb) (vm, b);
+}
+#endif
+
+static_always_inline uword
+memif_interface_tx_dma_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ u32 *buffers, memif_if_t *mif,
+ memif_ring_type_t type, memif_queue_t *mq,
+ u32 n_left)
+{
+ memif_ring_t *ring;
+ u32 n_copy_op;
+ u16 ring_size, mask, slot, free_slots;
+ int n_retries = 5, fallback = 0;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+ memif_copy_op_t *co;
+ memif_region_index_t last_region = ~0;
+ void *last_region_shm = 0;
+ u16 head, tail;
+ memif_dma_info_t *dma_info;
+ memif_per_thread_data_t *ptd;
+ memif_main_t *mm = &memif_main;
+ u16 mif_id = mif - mm->interfaces;
+
+ ring = mq->ring;
+ ring_size = 1 << mq->log2_ring_size;
+ mask = ring_size - 1;
+
+ dma_info = mq->dma_info + mq->dma_info_tail;
+ ptd = &dma_info->data;
+
+ /* do software fallback if dma info ring is full */
+ u16 dma_mask = mq->dma_info_size - 1;
+ if ((((mq->dma_info_tail + 1) & dma_mask) == mq->dma_info_head) ||
+ ((mq->dma_info_head == dma_mask) && (mq->dma_info_tail == 0)))
+ {
+ if (!mq->dma_info_full)
+ mq->dma_info_full = 1;
+ else
+ fallback = 1;
+ }
+
+ vlib_dma_batch_t *b = NULL;
+ if (PREDICT_TRUE (!fallback))
+ b = vlib_dma_batch_new (vm, mif->dma_tx_config);
+ if (!b)
+ return n_left;
+
+retry:
+
+ slot = tail = mq->dma_tail;
+ head = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE);
+ mq->last_tail += tail - mq->last_tail;
+ free_slots = head - mq->dma_tail;
+
+ while (n_left && free_slots)
+ {
+ memif_desc_t *d0;
+ void *mb0;
+ i32 src_off;
+ u32 bi0, dst_off, src_left, dst_left, bytes_to_copy;
+ u32 saved_ptd_copy_ops_len = _vec_len (ptd->copy_ops);
+ u32 saved_ptd_buffers_len = _vec_len (ptd->buffers);
+ u16 saved_slot = slot;
+
+ clib_prefetch_load (&ring->desc[(slot + 8) & mask]);
+
+ d0 = &ring->desc[slot & mask];
+ if (PREDICT_FALSE (last_region != d0->region))
+ {
+ last_region_shm = mif->regions[d0->region].shm;
+ last_region = d0->region;
+ }
+ mb0 = last_region_shm + d0->offset;
+
+ dst_off = 0;
+
+ /* slave is the producer, so it should be able to reset buffer length */
+ dst_left = d0->length;
+
+ if (PREDICT_TRUE (n_left >= 4))
+ vlib_prefetch_buffer_header (vlib_get_buffer (vm, buffers[3]), LOAD);
+ bi0 = buffers[0];
+
+ next_in_chain:
+
+ b0 = vlib_get_buffer (vm, bi0);
+ src_off = b0->current_data;
+ src_left = b0->current_length;
+
+ while (src_left)
+ {
+ if (PREDICT_FALSE (dst_left == 0))
+ {
+ if (free_slots)
+ {
+ d0->length = dst_off;
+ d0->flags = MEMIF_DESC_FLAG_NEXT;
+ d0 = &ring->desc[slot & mask];
+ dst_off = 0;
+ dst_left = (type == MEMIF_RING_S2M) ? mif->run.buffer_size :
+ d0->length;
+
+ if (PREDICT_FALSE (last_region != d0->region))
+ {
+ last_region_shm = mif->regions[d0->region].shm;
+ last_region = d0->region;
+ }
+ mb0 = last_region_shm + d0->offset;
+ }
+ else
+ {
+ /* we need to rollback vectors before bailing out */
+ vec_set_len (ptd->buffers, saved_ptd_buffers_len);
+ vec_set_len (ptd->copy_ops, saved_ptd_copy_ops_len);
+ vlib_error_count (vm, node->node_index,
+ MEMIF_TX_ERROR_ROLLBACK, 1);
+ slot = saved_slot;
+ goto no_free_slots;
+ }
+ }
+ bytes_to_copy = clib_min (src_left, dst_left);
+ memif_add_copy_op (ptd, mb0 + dst_off, bytes_to_copy, src_off,
+ vec_len (ptd->buffers));
+ src_off += bytes_to_copy;
+ dst_off += bytes_to_copy;
+ src_left -= bytes_to_copy;
+ dst_left -= bytes_to_copy;
+ }
+
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ slot++;
+ free_slots--;
+ bi0 = b0->next_buffer;
+ goto next_in_chain;
+ }
+
+ vec_add1_aligned (ptd->buffers, buffers[0], CLIB_CACHE_LINE_BYTES);
+ d0->length = dst_off;
+ d0->flags = 0;
+
+ free_slots -= 1;
+ slot += 1;
+
+ buffers++;
+ n_left--;
+ }
+no_free_slots:
+
+ /* copy data */
+ n_copy_op = vec_len (ptd->copy_ops);
+ co = ptd->copy_ops;
+ while (n_copy_op >= 8)
+ {
+ clib_prefetch_load (co[4].data);
+ clib_prefetch_load (co[5].data);
+ clib_prefetch_load (co[6].data);
+ clib_prefetch_load (co[7].data);
+
+ b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]);
+ b1 = vlib_get_buffer (vm, ptd->buffers[co[1].buffer_vec_index]);
+ b2 = vlib_get_buffer (vm, ptd->buffers[co[2].buffer_vec_index]);
+ b3 = vlib_get_buffer (vm, ptd->buffers[co[3].buffer_vec_index]);
+
+ if (PREDICT_TRUE (!fallback))
+ {
+ vlib_dma_batch_add (vm, b, co[0].data,
+ b0->data + co[0].buffer_offset, co[0].data_len);
+ vlib_dma_batch_add (vm, b, co[1].data,
+ b1->data + co[1].buffer_offset, co[1].data_len);
+ vlib_dma_batch_add (vm, b, co[2].data,
+ b2->data + co[2].buffer_offset, co[2].data_len);
+ vlib_dma_batch_add (vm, b, co[3].data,
+ b3->data + co[3].buffer_offset, co[3].data_len);
+ }
+ else
+ {
+ clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset,
+ co[0].data_len);
+ clib_memcpy_fast (co[1].data, b1->data + co[1].buffer_offset,
+ co[1].data_len);
+ clib_memcpy_fast (co[2].data, b2->data + co[2].buffer_offset,
+ co[2].data_len);
+ clib_memcpy_fast (co[3].data, b3->data + co[3].buffer_offset,
+ co[3].data_len);
+ }
+
+ co += 4;
+ n_copy_op -= 4;
+ }
+ while (n_copy_op)
+ {
+ b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]);
+ if (PREDICT_TRUE (!fallback))
+ vlib_dma_batch_add (vm, b, co[0].data, b0->data + co[0].buffer_offset,
+ co[0].data_len);
+ else
+ clib_memcpy_fast (co[0].data, b0->data + co[0].buffer_offset,
+ co[0].data_len);
+ co += 1;
+ n_copy_op -= 1;
+ }
+
+ /* save dma info before retry */
+ dma_info->dma_tail = slot;
+ mq->dma_tail = slot;
+ vec_reset_length (ptd->copy_ops);
+
+ if (n_left && n_retries--)
+ goto retry;
+
+ if (PREDICT_TRUE (!fallback))
+ {
+ vlib_dma_batch_set_cookie (vm, b,
+ ((u64) mif_id << 16) | (mq - mif->tx_queues));
+ vlib_dma_batch_submit (vm, b);
+ dma_info->finished = 0;
+
+ if (b->n_enq)
+ {
+ mq->dma_info_tail++;
+ if (mq->dma_info_tail == mq->dma_info_size)
+ mq->dma_info_tail = 0;
+ }
+ }
+ else if (fallback && dma_info->finished)
+ {
+ /* if dma has been completed, update ring immediately */
+ vlib_buffer_free (vm, ptd->buffers, vec_len (ptd->buffers));
+ vec_reset_length (ptd->buffers);
+ __atomic_store_n (&mq->ring->tail, slot, __ATOMIC_RELEASE);
+ }
+
+ return n_left;
+}
+
VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
@@ -376,22 +640,19 @@ VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm,
memif_main_t *nm = &memif_main;
vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
memif_if_t *mif = pool_elt_at_index (nm->interfaces, rund->dev_instance);
+ vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
memif_queue_t *mq;
+ u32 qid = tf->queue_id;
u32 *from, thread_index = vm->thread_index;
memif_per_thread_data_t *ptd = vec_elt_at_index (memif_main.per_thread_data,
thread_index);
- u8 tx_queues = vec_len (mif->tx_queues);
uword n_left;
- if (tx_queues < vlib_get_n_threads ())
- {
- ASSERT (tx_queues > 0);
- mq = vec_elt_at_index (mif->tx_queues, thread_index % tx_queues);
- }
- else
- mq = vec_elt_at_index (mif->tx_queues, thread_index);
+ ASSERT (vec_len (mif->tx_queues) > qid);
+ mq = vec_elt_at_index (mif->tx_queues, qid);
- clib_spinlock_lock_if_init (&mif->lockp);
+ if (tf->shared_queue)
+ clib_spinlock_lock (&mq->lockp);
from = vlib_frame_vector_args (frame);
n_left = frame->n_vectors;
@@ -402,10 +663,17 @@ VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm,
n_left = memif_interface_tx_inline (vm, node, from, mif, MEMIF_RING_S2M,
mq, ptd, n_left);
else
- n_left = memif_interface_tx_inline (vm, node, from, mif, MEMIF_RING_M2S,
- mq, ptd, n_left);
+ {
+ if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && (mif->dma_tx_config >= 0))
+ n_left = memif_interface_tx_dma_inline (vm, node, from, mif,
+ MEMIF_RING_M2S, mq, n_left);
+ else
+ n_left = memif_interface_tx_inline (vm, node, from, mif,
+ MEMIF_RING_M2S, mq, ptd, n_left);
+ }
- clib_spinlock_unlock_if_init (&mif->lockp);
+ if (tf->shared_queue)
+ clib_spinlock_unlock (&mq->lockp);
if (n_left)
vlib_error_count (vm, node->node_index, MEMIF_TX_ERROR_NO_FREE_SLOTS,
@@ -418,7 +686,12 @@ VNET_DEVICE_CLASS_TX_FN (memif_device_class) (vlib_main_t * vm,
mq->int_count++;
}
- if ((mif->flags & MEMIF_IF_FLAG_ZERO_COPY) == 0)
+ if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) && (mif->dma_tx_config >= 0))
+ {
+ if (n_left)
+ vlib_buffer_free (vm, from + frame->n_vectors - n_left, n_left);
+ }
+ else if ((mif->flags & MEMIF_IF_FLAG_ZERO_COPY) == 0)
vlib_buffer_free (vm, from, frame->n_vectors);
else if (n_left)
vlib_buffer_free (vm, from + frame->n_vectors - n_left, n_left);
@@ -468,16 +741,6 @@ memif_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid,
return 0;
}
-static clib_error_t *
-memif_subif_add_del_function (vnet_main_t * vnm,
- u32 hw_if_index,
- struct vnet_sw_interface_t *st, int is_add)
-{
- /* Nothing for now */
- return 0;
-}
-
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (memif_device_class) = {
.name = "memif",
.format_device_name = format_memif_device_name,
@@ -488,11 +751,9 @@ VNET_DEVICE_CLASS (memif_device_class) = {
.rx_redirect_to_node = memif_set_interface_next_node,
.clear_counters = memif_clear_hw_interface_counters,
.admin_up_down_function = memif_interface_admin_up_down,
- .subif_add_del_function = memif_subif_add_del_function,
.rx_mode_change_function = memif_interface_rx_mode_change,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/memif/memif.api b/src/plugins/memif/memif.api
index 9e32db5b470..5973ad60054 100644
--- a/src/plugins/memif/memif.api
+++ b/src/plugins/memif/memif.api
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-option version = "3.0.0";
+option version = "3.1.0";
import "vnet/interface_types.api";
import "vnet/ethernet/ethernet_types.api";
@@ -43,6 +43,8 @@ enum memif_mode
*/
autoreply define memif_socket_filename_add_del
{
+ option deprecated;
+
u32 client_index;
u32 context;
bool is_add; /* 0 = remove, 1 = add association */
@@ -51,6 +53,40 @@ autoreply define memif_socket_filename_add_del
option vat_help = "[add|del] id <id> filename <file>";
};
+/** \brief Create or remove named socket file for memif interfaces
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - 0 = remove, 1 = add association
+ @param socket_id - non-0 32-bit integer used to identify a socket file
+ ~0 means autogenerate
+ @param socket_filename - filename of the socket to be used for connection
+ establishment; id 0 always maps to default "/var/vpp/memif.sock";
+ no socket filename needed when is_add == 0.
+ socket_filename starting with '@' will create an abstract socket
+ in the given namespace
+*/
+define memif_socket_filename_add_del_v2
+{
+ u32 client_index;
+ u32 context;
+ bool is_add; /* 0 = remove, 1 = add association */
+ u32 socket_id [default=0xffffffff]; /* unique non-0 id for given socket file name */
+ string socket_filename[]; /* NUL terminated filename */
+ option vat_help = "[add|del] id <id> filename <file>";
+};
+
+/** \brief Create memory interface socket file response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+ @param socket_id - non-0 32-bit integer used to identify a socket file
+*/
+define memif_socket_filename_add_del_v2_reply
+{
+ u32 context;
+ i32 retval;
+ u32 socket_id;
+};
+
/** \brief Create memory interface
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -70,6 +106,8 @@ autoreply define memif_socket_filename_add_del
*/
define memif_create
{
+ option deprecated;
+
u32 client_index;
u32 context;
@@ -94,6 +132,58 @@ define memif_create
*/
define memif_create_reply
{
+ option deprecated;
+
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+/** \brief Create memory interface
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param role - role of the interface in the connection (master/slave)
+ @param mode - interface mode
+ @param rx_queues - number of rx queues (only valid for slave)
+ @param tx_queues - number of tx queues (only valid for slave)
+ @param id - 32bit integer used to authenticate and match opposite sides
+ of the connection
+ @param socket_id - socket filename id to be used for connection
+ establishment
+ @param ring_size - the number of entries of RX/TX rings
+ @param buffer_size - size of the buffer allocated for each ring entry
+ @param no_zero_copy - if true, disable zero copy
+ @param use_dma - if true, use dma accelerate memory copy
+ @param hw_addr - interface MAC address
+ @param secret - optional, default is "", max length 24
+*/
+define memif_create_v2
+{
+ u32 client_index;
+ u32 context;
+
+ vl_api_memif_role_t role; /* 0 = master, 1 = slave */
+ vl_api_memif_mode_t mode; /* 0 = ethernet, 1 = ip, 2 = punt/inject */
+ u8 rx_queues; /* optional, default is 1 */
+ u8 tx_queues; /* optional, default is 1 */
+ u32 id; /* optional, default is 0 */
+ u32 socket_id; /* optional, default is 0, "/var/vpp/memif.sock" */
+ u32 ring_size; /* optional, default is 1024 entries, must be power of 2 */
+ u16 buffer_size; /* optional, default is 2048 bytes */
+ bool no_zero_copy; /* disable zero copy */
+ bool use_dma; /* use dma acceleration */
+ vl_api_mac_address_t hw_addr; /* optional, randomly generated if zero */
+ string secret[24]; /* optional, default is "", max length 24 */
+ option vat_help = "[id <id>] [socket-id <id>] [ring_size <size>] [buffer_size <size>] [hw_addr <mac_address>] [secret <string>] [mode ip] <master|slave>";
+};
+
+/** \brief Create memory interface response
+ @param context - sender context, to match reply w/ request
+ @param retval - return value for request
+ @param sw_if_index - software index of the newly created interface
+*/
+define memif_create_v2_reply
+{
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
diff --git a/src/plugins/memif/memif.c b/src/plugins/memif/memif.c
index 9bbbe7f9d89..7e3dd44db2c 100644
--- a/src/plugins/memif/memif.c
+++ b/src/plugins/memif/memif.c
@@ -26,7 +26,6 @@
#include <sys/un.h>
#include <sys/uio.h>
#include <sys/mman.h>
-#include <sys/prctl.h>
#include <sys/eventfd.h>
#include <inttypes.h>
#include <limits.h>
@@ -36,6 +35,7 @@
#include <vnet/plugin/plugin.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
#include <vpp/app/version.h>
#include <memif/memif.h>
#include <memif/private.h>
@@ -49,6 +49,14 @@ memif_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
return 0;
}
+static clib_error_t *
+memif_eth_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hi,
+ u32 flags)
+{
+ /* nothing for now */
+ return 0;
+}
+
static void
memif_queue_intfd_close (memif_queue_t * mq)
{
@@ -91,6 +99,8 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err)
memif_region_t *mr;
memif_queue_t *mq;
int i;
+ vlib_main_t *vm = vlib_get_main ();
+ int with_barrier = 0;
if (mif == 0)
return;
@@ -132,7 +142,12 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err)
clib_mem_free (mif->sock);
}
- /* *INDENT-OFF* */
+ if (vlib_worker_thread_barrier_held () == 0)
+ {
+ with_barrier = 1;
+ vlib_worker_thread_barrier_sync (vm);
+ }
+
vec_foreach_index (i, mif->rx_queues)
{
mq = vec_elt_at_index (mif->rx_queues, i);
@@ -146,9 +161,7 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err)
}
}
vnet_hw_if_unregister_all_rx_queues (vnm, mif->hw_if_index);
- vnet_hw_if_update_runtime_data (vnm, mif->hw_if_index);
- /* *INDENT-OFF* */
vec_foreach_index (i, mif->tx_queues)
{
mq = vec_elt_at_index (mif->tx_queues, i);
@@ -158,9 +171,12 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err)
{
memif_disconnect_free_zc_queue_buffer(mq, 0);
}
+ clib_spinlock_free (&mq->lockp);
}
mq->ring = 0;
}
+ vnet_hw_if_unregister_all_tx_queues (vnm, mif->hw_if_index);
+ vnet_hw_if_update_runtime_data (vnm, mif->hw_if_index);
/* free tx and rx queues */
vec_foreach (mq, mif->rx_queues)
@@ -182,11 +198,13 @@ memif_disconnect (memif_if_t * mif, clib_error_t * err)
if (mr->fd > -1)
close (mr->fd);
}
- /* *INDENT-ON* */
vec_free (mif->regions);
vec_free (mif->remote_name);
vec_free (mif->remote_if_name);
clib_fifo_free (mif->msg_queue);
+
+ if (with_barrier)
+ vlib_worker_thread_barrier_release (vm);
}
static clib_error_t *
@@ -228,19 +246,22 @@ memif_int_fd_read_ready (clib_file_t * uf)
clib_error_t *
memif_connect (memif_if_t * mif)
{
+ memif_main_t *mm = &memif_main;
vlib_main_t *vm = vlib_get_main ();
vnet_main_t *vnm = vnet_get_main ();
clib_file_t template = { 0 };
memif_region_t *mr;
- int i;
+ int i, j;
+ u32 n_txqs = 0, n_threads = vlib_get_n_threads ();
clib_error_t *err = NULL;
+ u8 max_log2_ring_sz = 0;
+ int with_barrier = 0;
memif_log_debug (mif, "connect %u", mif->dev_instance);
vec_free (mif->local_disc_string);
vec_free (mif->remote_disc_string);
- /* *INDENT-OFF* */
vec_foreach (mr, mif->regions)
{
if (mr->shm)
@@ -259,15 +280,21 @@ memif_connect (memif_if_t * mif)
goto error;
}
}
- /* *INDENT-ON* */
template.read_function = memif_int_fd_read_ready;
template.write_function = memif_int_fd_write_ready;
- /* *INDENT-OFF* */
+ with_barrier = 1;
+ if (vlib_worker_thread_barrier_held ())
+ with_barrier = 0;
+
+ if (with_barrier)
+ vlib_worker_thread_barrier_sync (vm);
+
vec_foreach_index (i, mif->tx_queues)
{
memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, i);
+ max_log2_ring_sz = clib_max (max_log2_ring_sz, mq->log2_ring_size);
mq->ring = mif->regions[mq->region].shm + mq->offset;
if (mq->ring->cookie != MEMIF_COOKIE)
@@ -275,6 +302,50 @@ memif_connect (memif_if_t * mif)
err = clib_error_return (0, "wrong cookie on tx ring %u", i);
goto error;
}
+ mq->queue_index =
+ vnet_hw_if_register_tx_queue (vnm, mif->hw_if_index, i);
+ clib_spinlock_init (&mq->lockp);
+
+ if (mif->flags & MEMIF_IF_FLAG_USE_DMA)
+ {
+ memif_dma_info_t *dma_info;
+ mq->dma_head = 0;
+ mq->dma_tail = 0;
+ mq->dma_info_head = 0;
+ mq->dma_info_tail = 0;
+ mq->dma_info_size = MEMIF_DMA_INFO_SIZE;
+ vec_validate_aligned (mq->dma_info, MEMIF_DMA_INFO_SIZE,
+ CLIB_CACHE_LINE_BYTES);
+
+ vec_foreach (dma_info, mq->dma_info)
+ {
+ vec_validate_aligned (dma_info->data.desc_data,
+ pow2_mask (max_log2_ring_sz),
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (dma_info->data.desc_len,
+ pow2_mask (max_log2_ring_sz),
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (dma_info->data.desc_status,
+ pow2_mask (max_log2_ring_sz),
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (dma_info->data.copy_ops, 0,
+ CLIB_CACHE_LINE_BYTES);
+ vec_reset_length (dma_info->data.copy_ops);
+ vec_validate_aligned (dma_info->data.buffers, 0,
+ CLIB_CACHE_LINE_BYTES);
+ vec_reset_length (dma_info->data.buffers);
+ }
+ }
+ }
+
+ if (vec_len (mif->tx_queues) > 0)
+ {
+ n_txqs = vec_len (mif->tx_queues);
+ for (j = 0; j < n_threads; j++)
+ {
+ u32 qi = mif->tx_queues[j % n_txqs].queue_index;
+ vnet_hw_if_tx_queue_assign_thread (vnm, qi, j);
+ }
}
vec_foreach_index (i, mif->rx_queues)
@@ -284,6 +355,8 @@ memif_connect (memif_if_t * mif)
u32 qi;
int rv;
+ max_log2_ring_sz = clib_max (max_log2_ring_sz, mq->log2_ring_size);
+
mq->ring = mif->regions[mq->region].shm + mq->offset;
if (mq->ring->cookie != MEMIF_COOKIE)
{
@@ -293,6 +366,37 @@ memif_connect (memif_if_t * mif)
qi = vnet_hw_if_register_rx_queue (vnm, mif->hw_if_index, i,
VNET_HW_IF_RXQ_THREAD_ANY);
mq->queue_index = qi;
+
+ if (mif->flags & MEMIF_IF_FLAG_USE_DMA)
+ {
+ memif_dma_info_t *dma_info;
+ mq->dma_head = 0;
+ mq->dma_tail = 0;
+ mq->dma_info_head = 0;
+ mq->dma_info_tail = 0;
+ mq->dma_info_size = MEMIF_DMA_INFO_SIZE;
+ vec_validate_aligned (mq->dma_info, MEMIF_DMA_INFO_SIZE,
+ CLIB_CACHE_LINE_BYTES);
+ vec_foreach (dma_info, mq->dma_info)
+ {
+ vec_validate_aligned (dma_info->data.desc_data,
+ pow2_mask (max_log2_ring_sz),
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (dma_info->data.desc_len,
+ pow2_mask (max_log2_ring_sz),
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (dma_info->data.desc_status,
+ pow2_mask (max_log2_ring_sz),
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (dma_info->data.copy_ops, 0,
+ CLIB_CACHE_LINE_BYTES);
+ vec_reset_length (dma_info->data.copy_ops);
+ vec_validate_aligned (dma_info->data.buffers, 0,
+ CLIB_CACHE_LINE_BYTES);
+ vec_reset_length (dma_info->data.buffers);
+ }
+ }
+
if (mq->int_fd > -1)
{
template.file_descriptor = mq->int_fd;
@@ -324,7 +428,23 @@ memif_connect (memif_if_t * mif)
vnet_hw_if_rx_queue_set_int_pending (vnm, qi);
}
}
- /* *INDENT-ON* */
+
+ if (1 << max_log2_ring_sz > vec_len (mm->per_thread_data[0].desc_data))
+ {
+ memif_per_thread_data_t *ptd;
+
+ vec_foreach (ptd, mm->per_thread_data)
+ {
+ vec_validate_aligned (ptd->desc_data, pow2_mask (max_log2_ring_sz),
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (ptd->desc_len, pow2_mask (max_log2_ring_sz),
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (ptd->desc_status, pow2_mask (max_log2_ring_sz),
+ CLIB_CACHE_LINE_BYTES);
+ }
+ }
+ if (with_barrier)
+ vlib_worker_thread_barrier_release (vm);
mif->flags &= ~MEMIF_IF_FLAG_CONNECTING;
mif->flags |= MEMIF_IF_FLAG_CONNECTED;
@@ -334,6 +454,8 @@ memif_connect (memif_if_t * mif)
return 0;
error:
+ if (with_barrier)
+ vlib_worker_thread_barrier_release (vm);
memif_log_err (mif, "%U", format_clib_error, err);
return err;
}
@@ -405,7 +527,6 @@ memif_init_regions_and_queues (memif_if_t * mif)
if (mif->flags & MEMIF_IF_FLAG_ZERO_COPY)
{
vlib_buffer_pool_t *bp;
- /* *INDENT-OFF* */
vec_foreach (bp, vm->buffer_main->buffer_pools)
{
vlib_physmem_map_t *pm;
@@ -416,7 +537,6 @@ memif_init_regions_and_queues (memif_if_t * mif)
r->shm = pm->base;
r->is_external = 1;
}
- /* *INDENT-ON* */
}
for (i = 0; i < mif->run.num_s2m_rings; i++)
@@ -461,7 +581,6 @@ memif_init_regions_and_queues (memif_if_t * mif)
vec_validate_aligned (mif->tx_queues, mif->run.num_s2m_rings - 1,
CLIB_CACHE_LINE_BYTES);
- /* *INDENT-OFF* */
vec_foreach_index (i, mif->tx_queues)
{
memif_queue_t *mq = vec_elt_at_index (mif->tx_queues, i);
@@ -470,6 +589,7 @@ memif_init_regions_and_queues (memif_if_t * mif)
err = clib_error_return_unix (0, "eventfd[tx queue %u]", i);
goto error;
}
+
mq->int_clib_file_index = ~0;
mq->ring = memif_get_ring (mif, MEMIF_RING_S2M, i);
mq->log2_ring_size = mif->cfg.log2_ring_size;
@@ -481,13 +601,11 @@ memif_init_regions_and_queues (memif_if_t * mif)
vec_validate_aligned (mq->buffers, 1 << mq->log2_ring_size,
CLIB_CACHE_LINE_BYTES);
}
- /* *INDENT-ON* */
ASSERT (mif->rx_queues == 0);
vec_validate_aligned (mif->rx_queues, mif->run.num_m2s_rings - 1,
CLIB_CACHE_LINE_BYTES);
- /* *INDENT-OFF* */
vec_foreach_index (i, mif->rx_queues)
{
memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, i);
@@ -507,7 +625,6 @@ memif_init_regions_and_queues (memif_if_t * mif)
vec_validate_aligned (mq->buffers, 1 << mq->log2_ring_size,
CLIB_CACHE_LINE_BYTES);
}
- /* *INDENT-ON* */
return 0;
@@ -558,7 +675,6 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
}
last_run_duration = start_time = vlib_time_now (vm);
- /* *INDENT-OFF* */
pool_foreach (mif, mm->interfaces)
{
memif_socket_file_t * msf = vec_elt_at_index (mm->socket_files, mif->socket_file_index);
@@ -583,8 +699,8 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
{
clib_memset (sock, 0, sizeof(clib_socket_t));
sock->config = (char *) msf->filename;
- sock->flags = CLIB_SOCKET_F_IS_CLIENT | CLIB_SOCKET_F_SEQPACKET |
- CLIB_SOCKET_F_BLOCKING;
+ sock->is_seqpacket = 1;
+ sock->is_blocking = 1;
if ((err = clib_socket_init (sock)))
{
@@ -611,162 +727,160 @@ memif_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
}
}
}
- /* *INDENT-ON* */
last_run_duration = vlib_time_now (vm) - last_run_duration;
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (memif_process_node,static) = {
.function = memif_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "memif-process",
};
-/* *INDENT-ON* */
-static int
-memif_add_socket_file (u32 sock_id, u8 * socket_filename)
+/*
+ * Returns an unused socket id, and ~0 if it can't find one.
+ */
+u32
+memif_get_unused_socket_id ()
{
memif_main_t *mm = &memif_main;
uword *p;
- memif_socket_file_t *msf;
+ int i, j;
- p = hash_get (mm->socket_file_index_by_sock_id, sock_id);
- if (p)
+ static u32 seed = 0;
+ /* limit to 1M tries */
+ for (j = 0; j < 1 << 10; j++)
{
- msf = pool_elt_at_index (mm->socket_files, *p);
- if (strcmp ((char *) msf->filename, (char *) socket_filename) == 0)
+ seed = random_u32 (&seed);
+ for (i = 0; i < 1 << 10; i++)
{
- /* Silently accept identical "add". */
- return 0;
+ /* look around randomly generated id */
+ seed += (2 * (i % 2) - 1) * i;
+ if (seed == (u32) ~0)
+ continue;
+ p = hash_get (mm->socket_file_index_by_sock_id, seed);
+ if (!p)
+ return seed;
}
-
- /* But don't allow a direct add of a different filename. */
- return VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
}
- pool_get (mm->socket_files, msf);
- clib_memset (msf, 0, sizeof (memif_socket_file_t));
-
- msf->filename = socket_filename;
- msf->socket_id = sock_id;
-
- hash_set (mm->socket_file_index_by_sock_id, sock_id,
- msf - mm->socket_files);
-
- return 0;
+ return ~0;
}
-static int
-memif_delete_socket_file (u32 sock_id)
+clib_error_t *
+memif_socket_filename_add_del (u8 is_add, u32 sock_id, char *sock_filename)
{
memif_main_t *mm = &memif_main;
uword *p;
memif_socket_file_t *msf;
+ clib_error_t *err = 0;
+ char *dir = 0, *tmp;
+ u32 idx = 0;
+ u8 *name = 0;
- p = hash_get (mm->socket_file_index_by_sock_id, sock_id);
- if (!p)
- {
- /* Don't delete non-existent entries. */
- return VNET_API_ERROR_INVALID_ARGUMENT;
- }
+ /* allow adding socket id 0 */
+ if (sock_id == 0 && is_add == 0)
+ return vnet_error (VNET_ERR_INVALID_ARGUMENT, "cannot delete socket id 0");
- msf = pool_elt_at_index (mm->socket_files, *p);
- if (msf->ref_cnt > 0)
+ if (sock_id == ~0)
+ return vnet_error (VNET_ERR_INVALID_ARGUMENT,
+ "socked id is not specified");
+
+ if (is_add == 0)
{
- return VNET_API_ERROR_UNEXPECTED_INTF_STATE;
- }
+ p = hash_get (mm->socket_file_index_by_sock_id, sock_id);
+ if (!p)
+ /* Don't delete non-existent entries. */
+ return vnet_error (VNET_ERR_INVALID_ARGUMENT,
+ "socket file with id %u does not exist", sock_id);
- vec_free (msf->filename);
- pool_put (mm->socket_files, msf);
+ msf = pool_elt_at_index (mm->socket_files, *p);
+ if (msf->ref_cnt > 0)
+ return vnet_error (VNET_ERR_UNEXPECTED_INTF_STATE,
+ "socket file '%s' is in use", msf->filename);
- hash_unset (mm->socket_file_index_by_sock_id, sock_id);
+ vec_free (msf->filename);
+ pool_put (mm->socket_files, msf);
- return 0;
-}
-
-int
-memif_socket_filename_add_del (u8 is_add, u32 sock_id, u8 * sock_filename)
-{
- char *dir = 0, *tmp;
- u32 idx = 0;
+ hash_unset (mm->socket_file_index_by_sock_id, sock_id);
- /* allow adding socket id 0 */
- if ((sock_id == 0 && is_add == 0) || sock_id == ~0)
- {
- return VNET_API_ERROR_INVALID_ARGUMENT;
+ return 0;
}
- if (is_add == 0)
+ if (sock_filename == 0 || sock_filename[0] == 0)
+ return vnet_error (VNET_ERR_INVALID_ARGUMENT,
+ "socket filename not specified");
+
+ if (clib_socket_prefix_is_valid (sock_filename))
{
- return memif_delete_socket_file (sock_id);
+ name = format (0, "%s%c", sock_filename, 0);
}
-
- if (sock_filename == 0 || sock_filename[0] == 0)
+ else if (sock_filename[0] == '/')
{
- return VNET_API_ERROR_INVALID_ARGUMENT;
+ name = format (0, "%s%c", sock_filename, 0);
}
-
- if (sock_filename[0] != '/')
+ else
{
- clib_error_t *error;
-
/* copy runtime dir path */
vec_add (dir, vlib_unix_get_runtime_dir (),
strlen (vlib_unix_get_runtime_dir ()));
vec_add1 (dir, '/');
/* if sock_filename contains dirs, add them to path */
- tmp = strrchr ((char *) sock_filename, '/');
+ tmp = strrchr (sock_filename, '/');
if (tmp)
{
- idx = tmp - (char *) sock_filename;
+ idx = tmp - sock_filename;
vec_add (dir, sock_filename, idx);
}
vec_add1 (dir, '\0');
/* create socket dir */
- error = vlib_unix_recursive_mkdir (dir);
- if (error)
+ if ((err = vlib_unix_recursive_mkdir (dir)))
{
- clib_error_free (error);
- return VNET_API_ERROR_SYSCALL_ERROR_1;
+ clib_error_free (err);
+ err = vnet_error (VNET_ERR_SYSCALL_ERROR_1,
+ "unable to create socket dir");
+ goto done;
}
- sock_filename = format (0, "%s/%s%c", vlib_unix_get_runtime_dir (),
- sock_filename, 0);
+ name =
+ format (0, "%s/%s%c", vlib_unix_get_runtime_dir (), sock_filename, 0);
}
- else
- {
- sock_filename = vec_dup (sock_filename);
- /* check if directory exists */
- tmp = strrchr ((char *) sock_filename, '/');
- if (tmp)
+ p = hash_get (mm->socket_file_index_by_sock_id, sock_id);
+ if (p)
+ {
+ msf = pool_elt_at_index (mm->socket_files, *p);
+ if (strcmp ((char *) msf->filename, (char *) name) == 0)
{
- idx = tmp - (char *) sock_filename;
- vec_add (dir, sock_filename, idx);
- vec_add1 (dir, '\0');
+ /* Silently accept identical "add". */
+ goto done;
}
- /* check dir existance and access rights for effective user/group IDs */
- if ((dir == NULL)
- ||
- (faccessat ( /* ignored */ -1, dir, F_OK | R_OK | W_OK, AT_EACCESS)
- < 0))
- {
- vec_free (dir);
- return VNET_API_ERROR_INVALID_ARGUMENT;
- }
+ /* But don't allow a direct add of a different filename. */
+ err = vnet_error (VNET_ERR_ENTRY_ALREADY_EXISTS, "entry already exists");
+ goto done;
}
- vec_free (dir);
- return memif_add_socket_file (sock_id, sock_filename);
+ pool_get (mm->socket_files, msf);
+ clib_memset (msf, 0, sizeof (memif_socket_file_t));
+
+ msf->filename = name;
+ msf->socket_id = sock_id;
+ name = 0;
+
+ hash_set (mm->socket_file_index_by_sock_id, sock_id, msf - mm->socket_files);
+
+done:
+ vec_free (name);
+ vec_free (dir);
+ return err;
}
-int
-memif_delete_if (vlib_main_t * vm, memif_if_t * mif)
+clib_error_t *
+memif_delete_if (vlib_main_t *vm, memif_if_t *mif)
{
vnet_main_t *vnm = vnet_get_main ();
memif_main_t *mm = &memif_main;
@@ -797,7 +911,6 @@ memif_delete_if (vlib_main_t * vm, memif_if_t * mif)
}
/* free interface data structures */
- clib_spinlock_free (&mif->lockp);
mhash_unset (&msf->dev_instance_by_id, &mif->id, 0);
/* remove socket file */
@@ -806,10 +919,8 @@ memif_delete_if (vlib_main_t * vm, memif_if_t * mif)
if (msf->is_listener)
{
int i;
- /* *INDENT-OFF* */
vec_foreach_index (i, msf->pending_clients)
memif_socket_close (msf->pending_clients + i);
- /* *INDENT-ON* */
memif_socket_close (&msf->sock);
vec_free (msf->pending_clients);
}
@@ -827,6 +938,7 @@ memif_delete_if (vlib_main_t * vm, memif_if_t * mif)
}
}
+ vec_free (mif->local_disc_string);
clib_memset (mif, 0, sizeof (*mif));
pool_put (mm->interfaces, mif);
@@ -837,33 +949,39 @@ memif_delete_if (vlib_main_t * vm, memif_if_t * mif)
return 0;
}
-/* *INDENT-OFF* */
-VNET_HW_INTERFACE_CLASS (memif_ip_hw_if_class, static) =
-{
+VNET_HW_INTERFACE_CLASS (memif_ip_hw_if_class, static) = {
.name = "memif-ip",
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+ .tx_hash_fn_type = VNET_HASH_FN_TYPE_IP,
};
-/* *INDENT-ON* */
-int
-memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
+static void
+memif_prepare_dma_args (vlib_dma_config_t *args)
+{
+ args->max_batches = 256;
+ args->max_transfer_size = VLIB_BUFFER_DEFAULT_DATA_SIZE;
+ args->barrier_before_last = 1;
+ args->sw_fallback = 1;
+ args->callback_fn = NULL;
+}
+
+clib_error_t *
+memif_create_if (vlib_main_t *vm, memif_create_if_args_t *args)
{
memif_main_t *mm = &memif_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
vnet_main_t *vnm = vnet_get_main ();
+ vnet_eth_interface_registration_t eir = {};
memif_if_t *mif = 0;
vnet_sw_interface_t *sw;
- clib_error_t *error = 0;
- int ret = 0;
uword *p;
- vnet_hw_interface_t *hw;
memif_socket_file_t *msf = 0;
- int rv = 0;
+ clib_error_t *err = 0;
p = hash_get (mm->socket_file_index_by_sock_id, args->socket_id);
if (p == 0)
{
- rv = VNET_API_ERROR_INVALID_ARGUMENT;
+ err = vnet_error (VNET_ERR_INVALID_ARGUMENT, "unknown socket id");
goto done;
}
@@ -874,14 +992,17 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
{
if ((!msf->is_listener != !args->is_master))
{
- rv = VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
+ err =
+ vnet_error (VNET_ERR_SUBIF_ALREADY_EXISTS,
+ "socket file cannot be used by both master and slave");
goto done;
}
p = mhash_get (&msf->dev_instance_by_id, &args->id);
if (p)
{
- rv = VNET_API_ERROR_SUBIF_ALREADY_EXISTS;
+ err = vnet_error (VNET_ERR_SUBIF_ALREADY_EXISTS,
+ "interface already exists");
goto done;
}
}
@@ -889,25 +1010,6 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
/* Create new socket file */
if (msf->ref_cnt == 0)
{
- struct stat file_stat;
-
- /* If we are creating listener make sure file doesn't exist or if it
- * exists thn delete it if it is old socket file */
- if (args->is_master && (stat ((char *) msf->filename, &file_stat) == 0))
- {
- if (S_ISSOCK (file_stat.st_mode))
- {
- unlink ((char *) msf->filename);
- }
- else
- {
- error = clib_error_return (0, "File exists for %s",
- msf->filename);
- rv = VNET_API_ERROR_VALUE_EXIST;
- goto done;
- }
- }
-
mhash_init (&msf->dev_instance_by_id, sizeof (uword),
sizeof (memif_interface_id_t));
msf->dev_instance_by_fd = hash_create (0, sizeof (uword));
@@ -933,8 +1035,6 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
bt->total_length_not_including_first_buffer = 0;
vnet_buffer (bt)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- /* initially prealloc copy_ops so we can use
- _vec_len instead of vec_elen */
vec_validate_aligned (ptd->copy_ops, 0, CLIB_CACHE_LINE_BYTES);
vec_reset_length (ptd->copy_ops);
vec_validate_aligned (ptd->buffers, 0, CLIB_CACHE_LINE_BYTES);
@@ -952,8 +1052,19 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
if (args->secret)
mif->secret = vec_dup (args->secret);
- if (tm->n_vlib_mains > 1)
- clib_spinlock_init (&mif->lockp);
+ /* register dma config if enabled */
+ if (args->use_dma)
+ {
+ vlib_dma_config_t dma_args;
+ bzero (&dma_args, sizeof (dma_args));
+ memif_prepare_dma_args (&dma_args);
+
+ dma_args.max_transfers = 1 << args->log2_ring_size;
+ dma_args.callback_fn = memif_dma_completion_cb;
+ mif->dma_input_config = vlib_dma_config_add (vm, &dma_args);
+ dma_args.callback_fn = memif_tx_dma_completion_cb;
+ mif->dma_tx_config = vlib_dma_config_add (vm, &dma_args);
+ }
if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET)
{
@@ -969,10 +1080,13 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
args->hw_addr[0] = 2;
args->hw_addr[1] = 0xfe;
}
- error = ethernet_register_interface (vnm, memif_device_class.index,
- mif->dev_instance, args->hw_addr,
- &mif->hw_if_index,
- memif_eth_flag_change);
+
+ eir.dev_class_index = memif_device_class.index;
+ eir.dev_instance = mif->dev_instance;
+ eir.address = args->hw_addr;
+ eir.cb.flag_change = memif_eth_flag_change;
+ eir.cb.set_max_frame_size = memif_eth_set_max_frame_size;
+ mif->hw_if_index = vnet_eth_register_interface (vnm, &eir);
}
else if (mif->mode == MEMIF_INTERFACE_MODE_IP)
{
@@ -983,11 +1097,9 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
mif->dev_instance);
}
else
- error = clib_error_return (0, "unsupported interface mode");
-
- if (error)
{
- ret = VNET_API_ERROR_SYSCALL_ERROR_2;
+ err =
+ vnet_error (VNET_ERR_SYSCALL_ERROR_2, "unsupported interface mode");
goto error;
}
@@ -1006,7 +1118,6 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
/* If this is new one, start listening */
if (msf->is_listener && msf->ref_cnt == 0)
{
- struct stat file_stat;
clib_socket_t *s = clib_mem_alloc (sizeof (clib_socket_t));
ASSERT (msf->sock == 0);
@@ -1014,19 +1125,15 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
clib_memset (s, 0, sizeof (clib_socket_t));
s->config = (char *) msf->filename;
- s->flags = CLIB_SOCKET_F_IS_SERVER |
- CLIB_SOCKET_F_ALLOW_GROUP_WRITE |
- CLIB_SOCKET_F_SEQPACKET | CLIB_SOCKET_F_PASSCRED;
+ s->local_only = 1;
+ s->is_server = 1;
+ s->allow_group_write = 1;
+ s->is_seqpacket = 1;
+ s->passcred = 1;
- if ((error = clib_socket_init (s)))
+ if ((err = clib_socket_init (s)))
{
- ret = VNET_API_ERROR_SYSCALL_ERROR_4;
- goto error;
- }
-
- if (stat ((char *) msf->filename, &file_stat) == -1)
- {
- ret = VNET_API_ERROR_SYSCALL_ERROR_8;
+ err->code = VNET_ERR_SYSCALL_ERROR_4;
goto error;
}
@@ -1047,8 +1154,10 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
mif->flags |= MEMIF_IF_FLAG_ZERO_COPY;
}
- hw = vnet_get_hw_interface (vnm, mif->hw_if_index);
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+ if (args->use_dma)
+ mif->flags |= MEMIF_IF_FLAG_USE_DMA;
+
+ vnet_hw_if_set_caps (vnm, mif->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
vnet_hw_if_set_input_node (vnm, mif->hw_if_index, memif_input_node.index);
mhash_set (&msf->dev_instance_by_id, &mif->id, mif->dev_instance, 0);
@@ -1061,15 +1170,12 @@ memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args)
error:
memif_delete_if (vm, mif);
- if (error)
- {
- memif_log_err (mif, "%U", format_clib_error, error);
- clib_error_free (error);
- }
- return ret;
+ if (err)
+ memif_log_err (mif, "%U", format_clib_error, err);
+ return err;
done:
- return rv;
+ return err;
}
clib_error_t *
@@ -1081,7 +1187,14 @@ memif_interface_admin_up_down (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
static clib_error_t *error = 0;
if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
- mif->flags |= MEMIF_IF_FLAG_ADMIN_UP;
+ {
+ if (mif->flags & MEMIF_IF_FLAG_CONNECTED)
+ {
+ vnet_hw_interface_set_flags (vnm, mif->hw_if_index,
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ }
+ mif->flags |= MEMIF_IF_FLAG_ADMIN_UP;
+ }
else
mif->flags &= ~MEMIF_IF_FLAG_ADMIN_UP;
@@ -1108,19 +1221,15 @@ memif_init (vlib_main_t * vm)
* for socket-id 0 to MEMIF_DEFAULT_SOCKET_FILENAME in the
* default run-time directory.
*/
- memif_socket_filename_add_del (1, 0, (u8 *) MEMIF_DEFAULT_SOCKET_FILENAME);
-
- return 0;
+ return memif_socket_filename_add_del (1, 0, MEMIF_DEFAULT_SOCKET_FILENAME);
}
VLIB_INIT_FUNCTION (memif_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Packet Memory Interface (memif) -- Experimental",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/memif/memif_api.c b/src/plugins/memif/memif_api.c
index a50e7ce8882..16a52ffc0dc 100644
--- a/src/plugins/memif/memif_api.c
+++ b/src/plugins/memif/memif_api.c
@@ -48,8 +48,6 @@ void
memif_main_t *mm = &memif_main;
u8 is_add;
u32 socket_id;
- u32 len;
- u8 *socket_filename;
vl_api_memif_socket_filename_add_del_reply_t *rmp;
int rv;
@@ -65,24 +63,52 @@ void
}
/* socket filename */
- socket_filename = 0;
mp->socket_filename[ARRAY_LEN (mp->socket_filename) - 1] = 0;
- len = strlen ((char *) mp->socket_filename);
- if (mp->is_add)
+
+ rv = vnet_get_api_error_and_free (memif_socket_filename_add_del (
+ is_add, socket_id, (char *) mp->socket_filename));
+
+reply:
+ REPLY_MACRO (VL_API_MEMIF_SOCKET_FILENAME_ADD_DEL_REPLY);
+}
+
+/**
+ * @brief Message handler for memif_socket_filename_add_del API.
+ * @param mp the vl_api_memif_socket_filename_add_del_t API message
+ */
+void
+vl_api_memif_socket_filename_add_del_v2_t_handler (
+ vl_api_memif_socket_filename_add_del_v2_t *mp)
+{
+ vl_api_memif_socket_filename_add_del_v2_reply_t *rmp;
+ memif_main_t *mm = &memif_main;
+ char *socket_filename = 0;
+ u32 socket_id;
+ int rv;
+
+ /* socket_id */
+ socket_id = clib_net_to_host_u32 (mp->socket_id);
+ if (socket_id == 0)
{
- vec_validate (socket_filename, len);
- memcpy (socket_filename, mp->socket_filename, len);
+ rv = VNET_API_ERROR_INVALID_ARGUMENT;
+ goto reply;
}
- rv = memif_socket_filename_add_del (is_add, socket_id, socket_filename);
+ /* socket filename */
+ socket_filename = vl_api_from_api_to_new_c_string (&mp->socket_filename);
+ if (mp->is_add && socket_id == (u32) ~0)
+ socket_id = memif_get_unused_socket_id ();
+
+ rv = vnet_get_api_error_and_free (
+ memif_socket_filename_add_del (mp->is_add, socket_id, socket_filename));
vec_free (socket_filename);
reply:
- REPLY_MACRO (VL_API_MEMIF_SOCKET_FILENAME_ADD_DEL_REPLY);
+ REPLY_MACRO2 (VL_API_MEMIF_SOCKET_FILENAME_ADD_DEL_V2_REPLY,
+ ({ rmp->socket_id = htonl (socket_id); }));
}
-
/**
* @brief Message handler for memif_create API.
* @param mp vl_api_memif_create_t * mp the api message
@@ -164,17 +190,107 @@ vl_api_memif_create_t_handler (vl_api_memif_create_t * mp)
args.hw_addr_set = 1;
}
- rv = memif_create_if (vm, &args);
+ rv = vnet_get_api_error_and_free (memif_create_if (vm, &args));
vec_free (args.secret);
reply:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_MEMIF_CREATE_REPLY,
({
rmp->sw_if_index = htonl (args.sw_if_index);
}));
- /* *INDENT-ON* */
+}
+
+/**
+ * @brief Message handler for memif_create_v2 API.
+ * @param mp vl_api_memif_create_v2_t * mp the api message
+ */
+void
+vl_api_memif_create_v2_t_handler (vl_api_memif_create_v2_t *mp)
+{
+ memif_main_t *mm = &memif_main;
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_memif_create_reply_t *rmp;
+ memif_create_if_args_t args = { 0 };
+ u32 ring_size = MEMIF_DEFAULT_RING_SIZE;
+ static const u8 empty_hw_addr[6];
+ int rv = 0;
+ mac_address_t mac;
+
+ /* id */
+ args.id = clib_net_to_host_u32 (mp->id);
+
+ /* socket-id */
+ args.socket_id = clib_net_to_host_u32 (mp->socket_id);
+
+ /* secret */
+ mp->secret[ARRAY_LEN (mp->secret) - 1] = 0;
+ if (strlen ((char *) mp->secret) > 0)
+ {
+ vec_validate (args.secret, strlen ((char *) mp->secret));
+ strncpy ((char *) args.secret, (char *) mp->secret,
+ vec_len (args.secret));
+ }
+
+ /* role */
+ args.is_master = (ntohl (mp->role) == MEMIF_ROLE_API_MASTER);
+
+ /* mode */
+ args.mode = ntohl (mp->mode);
+
+ args.is_zero_copy = mp->no_zero_copy ? 0 : 1;
+
+ args.use_dma = mp->use_dma;
+
+ /* rx/tx queues */
+ if (args.is_master == 0)
+ {
+ args.rx_queues = MEMIF_DEFAULT_RX_QUEUES;
+ args.tx_queues = MEMIF_DEFAULT_TX_QUEUES;
+ if (mp->rx_queues)
+ {
+ args.rx_queues = mp->rx_queues;
+ }
+ if (mp->tx_queues)
+ {
+ args.tx_queues = mp->tx_queues;
+ }
+ }
+
+ /* ring size */
+ if (mp->ring_size)
+ {
+ ring_size = ntohl (mp->ring_size);
+ }
+ if (!is_pow2 (ring_size))
+ {
+ rv = VNET_API_ERROR_INVALID_ARGUMENT;
+ goto reply;
+ }
+ args.log2_ring_size = min_log2 (ring_size);
+
+ /* buffer size */
+ args.buffer_size = MEMIF_DEFAULT_BUFFER_SIZE;
+ if (mp->buffer_size)
+ {
+ args.buffer_size = ntohs (mp->buffer_size);
+ }
+
+ /* MAC address */
+ mac_address_decode (mp->hw_addr, &mac);
+ if (memcmp (&mac, empty_hw_addr, 6) != 0)
+ {
+ memcpy (args.hw_addr, &mac, 6);
+ args.hw_addr_set = 1;
+ }
+
+ rv = vnet_api_error (memif_create_if (vm, &args));
+
+ vec_free (args.secret);
+
+reply:
+ REPLY_MACRO2 (VL_API_MEMIF_CREATE_V2_REPLY,
+ ({ rmp->sw_if_index = htonl (args.sw_if_index); }));
}
/**
@@ -201,7 +317,7 @@ vl_api_memif_delete_t_handler (vl_api_memif_delete_t * mp)
else
{
mif = pool_elt_at_index (mm->interfaces, hi->dev_instance);
- rv = memif_delete_if (vm, mif);
+ rv = vnet_get_api_error_and_free (memif_delete_if (vm, mif));
}
REPLY_MACRO (VL_API_MEMIF_DELETE_REPLY);
@@ -279,7 +395,6 @@ vl_api_memif_dump_t_handler (vl_api_memif_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (mif, mm->interfaces)
{
swif = vnet_get_sw_interface (vnm, mif->sw_if_index);
@@ -289,9 +404,8 @@ vl_api_memif_dump_t_handler (vl_api_memif_dump_t * mp)
vnm, swif, 0);
send_memif_details (reg, mif, swif, if_name, mp->context);
- _vec_len (if_name) = 0;
+ vec_set_len (if_name, 0);
}
- /* *INDENT-ON* */
vec_free (if_name);
}
@@ -335,7 +449,6 @@ void
if (!reg)
return;
- /* *INDENT-OFF* */
hash_foreach (sock_id, msf_idx, mm->socket_file_index_by_sock_id,
({
memif_socket_file_t *msf;
@@ -345,7 +458,6 @@ void
filename = msf->filename;
send_memif_socket_filename_details(reg, sock_id, filename, mp->context);
}));
- /* *INDENT-ON* */
}
/* Set up the API message handling tables */
diff --git a/src/plugins/memif/memif_test.c b/src/plugins/memif/memif_test.c
index 1ec6703d135..d3290ac0340 100644
--- a/src/plugins/memif/memif_test.c
+++ b/src/plugins/memif/memif_test.c
@@ -33,8 +33,7 @@
#include <vnet/format_fns.h>
#include <memif/memif.api_enum.h>
#include <memif/memif.api_types.h>
-#include <vpp/api/vpe.api_types.h>
-//#include <vnet/ethernet/ethernet_types.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
@@ -122,6 +121,86 @@ api_memif_socket_filename_add_del (vat_main_t * vam)
return ret;
}
+/* memif_socket_filename_add_del API */
+static int
+api_memif_socket_filename_add_del_v2 (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_memif_socket_filename_add_del_v2_t *mp;
+ u8 is_add;
+ u32 socket_id;
+ u8 *socket_filename;
+ int ret;
+
+ is_add = 1;
+ socket_id = ~0;
+ socket_filename = 0;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "id %u", &socket_id))
+ ;
+ else if (unformat (i, "filename %s", &socket_filename))
+ ;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "add"))
+ is_add = 1;
+ else
+ {
+ vec_free (socket_filename);
+ clib_warning ("unknown input `%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (socket_id == 0 || socket_id == ~0)
+ {
+ vec_free (socket_filename);
+ errmsg ("Invalid socket id");
+ return -99;
+ }
+
+ if (is_add && (!socket_filename || *socket_filename == 0))
+ {
+ vec_free (socket_filename);
+ errmsg ("Invalid socket filename");
+ return -99;
+ }
+
+ M2 (MEMIF_SOCKET_FILENAME_ADD_DEL_V2, mp, strlen ((char *) socket_filename));
+
+ mp->is_add = is_add;
+ mp->socket_id = htonl (socket_id);
+ char *p = (char *) &mp->socket_filename;
+ p += vl_api_vec_to_api_string (socket_filename, (vl_api_string_t *) p);
+
+ vec_free (socket_filename);
+
+ S (mp);
+ W (ret);
+
+ return ret;
+}
+
+/* memif socket-create reply handler */
+static void
+vl_api_memif_socket_filename_add_del_v2_reply_t_handler (
+ vl_api_memif_socket_filename_add_del_v2_reply_t *mp)
+{
+ vat_main_t *vam = memif_test_main.vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (retval == 0)
+ {
+ fformat (vam->ofp, "created memif socket with socket_id %d\n",
+ ntohl (mp->socket_id));
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
/* memif_socket_filename_add_del reply handler */
#define VL_API_MEMIF_SOCKET_FILENAME_ADD_DEL_REPLY_T_HANDLER
static void vl_api_memif_socket_filename_add_del_reply_t_handler
@@ -246,6 +325,120 @@ static void vl_api_memif_create_reply_t_handler
vam->regenerate_interface_table = 1;
}
+/* memif-create_v2 API */
+static int
+api_memif_create_v2 (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_memif_create_v2_t *mp;
+ u32 id = 0;
+ u32 socket_id = 0;
+ u8 *secret = 0;
+ u8 role = 1;
+ u32 ring_size = 0;
+ u8 use_dma = 0;
+ u32 buffer_size = 0;
+ u8 hw_addr[6] = { 0 };
+ u32 rx_queues = MEMIF_DEFAULT_RX_QUEUES;
+ u32 tx_queues = MEMIF_DEFAULT_TX_QUEUES;
+ int ret;
+ u8 mode = MEMIF_INTERFACE_MODE_ETHERNET;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "id %u", &id))
+ ;
+ else if (unformat (i, "socket-id %u", &socket_id))
+ ;
+ else if (unformat (i, "secret %s", &secret))
+ ;
+ else if (unformat (i, "ring_size %u", &ring_size))
+ ;
+ else if (unformat (i, "buffer_size %u", &buffer_size))
+ ;
+ else if (unformat (i, "master"))
+ role = 0;
+ else if (unformat (i, "use_dma %u", &use_dma))
+ ;
+ else if (unformat (i, "slave %U", unformat_memif_queues, &rx_queues,
+ &tx_queues))
+ role = 1;
+ else if (unformat (i, "mode ip"))
+ mode = MEMIF_INTERFACE_MODE_IP;
+ else if (unformat (i, "hw_addr %U", unformat_ethernet_address, hw_addr))
+ ;
+ else
+ {
+ clib_warning ("unknown input '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (socket_id == ~0)
+ {
+ errmsg ("invalid socket-id\n");
+ return -99;
+ }
+
+ if (!is_pow2 (ring_size))
+ {
+ errmsg ("ring size must be power of 2\n");
+ return -99;
+ }
+
+ if (rx_queues > 255 || rx_queues < 1)
+ {
+ errmsg ("rx queue must be between 1 - 255\n");
+ return -99;
+ }
+
+ if (tx_queues > 255 || tx_queues < 1)
+ {
+ errmsg ("tx queue must be between 1 - 255\n");
+ return -99;
+ }
+
+ M2 (MEMIF_CREATE, mp, strlen ((char *) secret));
+
+ mp->mode = mode;
+ mp->id = clib_host_to_net_u32 (id);
+ mp->role = role;
+ mp->use_dma = use_dma;
+ mp->ring_size = clib_host_to_net_u32 (ring_size);
+ mp->buffer_size = clib_host_to_net_u16 (buffer_size & 0xffff);
+ mp->socket_id = clib_host_to_net_u32 (socket_id);
+
+ char *p = (char *) &mp->secret;
+ p += vl_api_vec_to_api_string (secret, (vl_api_string_t *) p);
+ vec_free (secret);
+
+ memcpy (mp->hw_addr, hw_addr, 6);
+ mp->rx_queues = rx_queues;
+ mp->tx_queues = tx_queues;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+/* memif-create_v2 reply handler */
+static void
+vl_api_memif_create_v2_reply_t_handler (vl_api_memif_create_reply_t *mp)
+{
+ vat_main_t *vam = memif_test_main.vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (retval == 0)
+ {
+ fformat (vam->ofp, "created memif with sw_if_index %d\n",
+ ntohl (mp->sw_if_index));
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+ vam->regenerate_interface_table = 1;
+}
+
/* memif-delete API */
static int
api_memif_delete (vat_main_t * vam)
diff --git a/src/plugins/memif/node.c b/src/plugins/memif/node.c
index 8bb3758c5fd..70933f4aa9d 100644
--- a/src/plugins/memif/node.c
+++ b/src/plugins/memif/node.c
@@ -30,6 +30,8 @@
#include <memif/memif.h>
#include <memif/private.h>
+#define MEMIF_IP_OFFSET 14
+
#define foreach_memif_input_error \
_ (BUFFER_ALLOC_FAIL, buffer_alloc, ERROR, "buffer allocation failed") \
_ (BAD_DESC, bad_desc, ERROR, "bad descriptor") \
@@ -140,96 +142,141 @@ memif_add_to_chain (vlib_main_t * vm, vlib_buffer_t * b, u32 * buffers,
}
}
-static_always_inline uword
-memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
- memif_if_t *mif, memif_ring_type_t type, u16 qid,
- memif_interface_mode_t mode)
+static_always_inline u16
+memif_parse_desc (memif_per_thread_data_t *ptd, memif_if_t *mif,
+ memif_queue_t *mq, u16 next, u16 n_avail)
{
- vnet_main_t *vnm = vnet_get_main ();
- memif_main_t *mm = &memif_main;
- memif_ring_t *ring;
- memif_queue_t *mq;
- u16 buffer_size = vlib_buffer_get_default_data_size (vm);
- uword n_trace;
- u16 nexts[MEMIF_RX_VECTOR_SZ], *next = nexts;
- u32 _to_next_bufs[MEMIF_RX_VECTOR_SZ], *to_next_bufs = _to_next_bufs, *bi;
- u32 n_rx_packets = 0, n_rx_bytes = 0;
- u32 n_left, n_left_to_next;
- u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- vlib_buffer_t *b0, *b1, *b2, *b3;
- u32 thread_index = vm->thread_index;
- memif_per_thread_data_t *ptd = vec_elt_at_index (mm->per_thread_data,
- thread_index);
- vlib_buffer_t bt;
- u16 cur_slot, last_slot, ring_size, n_slots, mask;
- i16 start_offset;
- u16 n_buffers = 0, n_alloc;
- memif_copy_op_t *co;
- memif_packet_op_t *po;
- memif_region_index_t last_region = ~0;
- void *last_region_shm = 0;
- void *last_region_max = 0;
+ memif_ring_t *ring = mq->ring;
+ memif_desc_t *descs = ring->desc;
+ void **desc_data = ptd->desc_data;
+ u16 *desc_len = ptd->desc_len;
+ memif_desc_status_t *desc_status = ptd->desc_status;
+ u16 n_desc = 0, n_pkts = 0;
+ u32 i = 0;
+ u16 mask = pow2_mask (mq->log2_ring_size);
+ memif_desc_t *d = 0;
+ u32 slot = next;
+
+ while (i < n_avail)
+ {
+ u8 flags;
+ d = descs + (slot++ & mask);
+ desc_data[i] = (void *) ((u64) d->region << 32 | d->offset);
+ desc_len[i] = d->length;
+ desc_status[i].as_u8 = flags = d->flags;
+ i++;
+ if (PREDICT_FALSE ((flags & MEMIF_DESC_FLAG_NEXT)) == 0)
+ {
+ n_desc = i;
+ if (++n_pkts == MEMIF_RX_VECTOR_SZ)
+ goto frame_full;
+ }
+ }
+frame_full:
- mq = vec_elt_at_index (mif->rx_queues, qid);
- ring = mq->ring;
- ring_size = 1 << mq->log2_ring_size;
- mask = ring_size - 1;
+ /* done */
+ ptd->n_packets = n_pkts;
+ return n_desc;
+}
- /* assume that somebody will want to add ethernet header on the packet
- so start with IP header at offset 14 */
- start_offset = (mode == MEMIF_INTERFACE_MODE_IP) ? 14 : 0;
+static_always_inline void
+memif_desc_status_set_err (memif_desc_status_t *p,
+ memif_desc_status_err_code_t e)
+{
+ memif_desc_status_t s = { .err = 1, .err_code = e };
+ p->as_u8 |= s.as_u8;
+}
- /* for S2M rings, we are consumers of packet buffers, and for M2S rings we
- are producers of empty buffers */
- cur_slot = (type == MEMIF_RING_S2M) ? mq->last_head : mq->last_tail;
+static_always_inline void
+memif_validate_desc_data (memif_per_thread_data_t *ptd, memif_if_t *mif,
+ u16 n_desc, int is_ethernet)
+{
+ void **desc_data = ptd->desc_data;
+ u16 *desc_len = ptd->desc_len;
+ memif_desc_status_t *desc_status = ptd->desc_status;
+ u16 n_regions = vec_len (mif->regions);
+ u32 n_rx_bytes = 0;
+ u16 max_len = 0;
+ u8 xor_status = 0;
+
+ for (u32 i = 0; i < n_desc; i++)
+ {
+ u16 region = ((u64) desc_data[i]) >> 32;
+ u32 offset = (u64) desc_data[i];
+ u16 len = desc_len[i];
+ memif_region_t *r = mif->regions + region;
+
+ if (region >= n_regions)
+ memif_desc_status_set_err (desc_status + i,
+ MEMIF_DESC_STATUS_ERR_BAD_REGION);
+ else if (offset + len > r->region_size)
+ memif_desc_status_set_err (desc_status + i,
+ MEMIF_DESC_STATUS_ERR_REGION_OVERRUN);
+ else if (is_ethernet && len > ETHERNET_MAX_PACKET_BYTES)
+ memif_desc_status_set_err (desc_status + i,
+ MEMIF_DESC_STATUS_ERR_DATA_TOO_BIG);
+ else if (len == 0)
+ memif_desc_status_set_err (desc_status + i,
+ MEMIF_DESC_STATUS_ERR_ZERO_LENGTH);
+ else
+ {
+ desc_data[i] = r->shm + offset;
+ if (len > max_len)
+ max_len = len;
+ n_rx_bytes += len;
+ }
+ xor_status |= desc_status[i].as_u8;
+ }
- if (type == MEMIF_RING_S2M)
- last_slot = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE);
- else
- last_slot = __atomic_load_n (&ring->tail, __ATOMIC_ACQUIRE);
+ ptd->max_desc_len = max_len;
+ ptd->xor_status = xor_status;
+ ptd->n_rx_bytes = n_rx_bytes;
+}
- if (cur_slot == last_slot)
- goto refill;
- n_slots = last_slot - cur_slot;
+static_always_inline u32
+memif_process_desc (vlib_main_t *vm, vlib_node_runtime_t *node,
+ memif_per_thread_data_t *ptd, memif_if_t *mif)
+{
+ u16 buffer_size = vlib_buffer_get_default_data_size (vm);
+ int is_ip = mif->mode == MEMIF_INTERFACE_MODE_IP;
+ i16 start_offset = (is_ip) ? MEMIF_IP_OFFSET : 0;
+ memif_packet_op_t *po = ptd->packet_ops;
+ void **desc_data = ptd->desc_data;
+ u16 *desc_len = ptd->desc_len;
+ memif_desc_status_t *desc_status = ptd->desc_status;
+ u32 n_buffers = 0;
+ u32 n_left = ptd->n_packets;
+ u32 packet_len;
+ int i = -1;
+ int bad_packets = 0;
/* construct copy and packet vector out of ring slots */
- while (n_slots && n_rx_packets < MEMIF_RX_VECTOR_SZ)
+ while (n_left)
{
u32 dst_off, src_off, n_bytes_left;
- u16 s0;
- memif_desc_t *d0;
void *mb0;
- po = ptd->packet_ops + n_rx_packets;
- n_rx_packets++;
po->first_buffer_vec_index = n_buffers++;
- po->packet_len = 0;
+
+ packet_len = 0;
src_off = 0;
dst_off = start_offset;
next_slot:
- clib_prefetch_load (&ring->desc[(cur_slot + 8) & mask]);
- s0 = cur_slot & mask;
- d0 = &ring->desc[s0];
- n_bytes_left = d0->length;
+ i++; /* next descriptor */
+ n_bytes_left = desc_len[i];
- /* slave resets buffer length,
- * so it can produce full size buffer for master
- */
- if (type == MEMIF_RING_M2S)
- d0->length = mif->run.buffer_size;
+ packet_len += n_bytes_left;
+ mb0 = desc_data[i];
- po->packet_len += n_bytes_left;
- if (PREDICT_FALSE (last_region != d0->region))
+ if (PREDICT_FALSE (desc_status[i].err))
{
- last_region_shm = mif->regions[d0->region].shm;
- last_region = d0->region;
- last_region_max =
- last_region_shm + mif->regions[last_region].region_size;
+ vlib_error_count (vm, node->node_index, MEMIF_INPUT_ERROR_BAD_DESC,
+ 1);
+ bad_packets++;
+ ASSERT (n_buffers > 0);
+ n_buffers--;
+ goto next_packet;
}
- mb0 = last_region_shm + d0->offset;
-
- if (PREDICT_FALSE (mb0 + n_bytes_left > last_region_max))
- vlib_error_count (vm, node->node_index, MEMIF_INPUT_ERROR_BAD_DESC, 1);
else
do
{
@@ -249,115 +296,98 @@ memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
}
while (PREDICT_FALSE (n_bytes_left));
- cur_slot++;
- n_slots--;
- if ((d0->flags & MEMIF_DESC_FLAG_NEXT) && n_slots)
+ if (desc_status[i].next)
{
src_off = 0;
goto next_slot;
}
- }
- /* allocate free buffers */
- vec_validate_aligned (ptd->buffers, n_buffers - 1, CLIB_CACHE_LINE_BYTES);
- n_alloc = vlib_buffer_alloc_from_pool (vm, ptd->buffers, n_buffers,
- mq->buffer_pool_index);
- if (PREDICT_FALSE (n_alloc != n_buffers))
- {
- if (n_alloc)
- vlib_buffer_free (vm, ptd->buffers, n_alloc);
- vlib_error_count (vm, node->node_index,
- MEMIF_INPUT_ERROR_BUFFER_ALLOC_FAIL, 1);
- goto refill;
+ /* update packet op */
+ po->packet_len = packet_len;
+ po++;
+
+ next_packet:
+ /* next packet */
+ n_left--;
}
+ ASSERT (ptd->n_packets >= bad_packets);
+ ptd->n_packets -= bad_packets;
+ return n_buffers;
+}
+static_always_inline void
+memif_fill_buffer_mdata_simple (vlib_node_runtime_t *node,
+ memif_per_thread_data_t *ptd,
+ vlib_buffer_t **b, u16 *next, int is_ip)
+{
+ vlib_buffer_t bt;
+ u16 *dl = ptd->desc_len;
+ /* process buffer metadata */
+
+ u32 n_left = ptd->n_packets;
+
+ /* copy template into local variable - will save per packet load */
+ vlib_buffer_copy_template (&bt, &ptd->buffer_template);
- /* copy data */
- n_left = vec_len (ptd->copy_ops);
- co = ptd->copy_ops;
while (n_left >= 8)
{
- clib_prefetch_load (co[4].data);
- clib_prefetch_load (co[5].data);
- clib_prefetch_load (co[6].data);
- clib_prefetch_load (co[7].data);
-
- b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]);
- b1 = vlib_get_buffer (vm, ptd->buffers[co[1].buffer_vec_index]);
- b2 = vlib_get_buffer (vm, ptd->buffers[co[2].buffer_vec_index]);
- b3 = vlib_get_buffer (vm, ptd->buffers[co[3].buffer_vec_index]);
-
- clib_memcpy_fast (b0->data + co[0].buffer_offset, co[0].data,
- co[0].data_len);
- clib_memcpy_fast (b1->data + co[1].buffer_offset, co[1].data,
- co[1].data_len);
- clib_memcpy_fast (b2->data + co[2].buffer_offset, co[2].data,
- co[2].data_len);
- clib_memcpy_fast (b3->data + co[3].buffer_offset, co[3].data,
- co[3].data_len);
-
- co += 4;
- n_left -= 4;
- }
- while (n_left)
- {
- b0 = vlib_get_buffer (vm, ptd->buffers[co[0].buffer_vec_index]);
- clib_memcpy_fast (b0->data + co[0].buffer_offset, co[0].data,
- co[0].data_len);
- co += 1;
- n_left -= 1;
- }
+ vlib_prefetch_buffer_header (b[4], STORE);
+ vlib_prefetch_buffer_header (b[5], STORE);
+ vlib_prefetch_buffer_header (b[6], STORE);
+ vlib_prefetch_buffer_header (b[7], STORE);
+
+ vlib_buffer_copy_template (b[0], &bt);
+ vlib_buffer_copy_template (b[1], &bt);
+ vlib_buffer_copy_template (b[2], &bt);
+ vlib_buffer_copy_template (b[3], &bt);
+
+ b[0]->current_length = dl[0];
+ b[1]->current_length = dl[1];
+ b[2]->current_length = dl[2];
+ b[3]->current_length = dl[3];
+
+ if (is_ip)
+ {
+ next[0] = memif_next_from_ip_hdr (node, b[0]);
+ next[1] = memif_next_from_ip_hdr (node, b[1]);
+ next[2] = memif_next_from_ip_hdr (node, b[2]);
+ next[3] = memif_next_from_ip_hdr (node, b[3]);
+ }
- /* release slots from the ring */
- if (type == MEMIF_RING_S2M)
- {
- __atomic_store_n (&ring->tail, cur_slot, __ATOMIC_RELEASE);
- mq->last_head = cur_slot;
- }
- else
- {
- mq->last_tail = cur_slot;
+ /* next */
+ n_left -= 4;
+ b += 4;
+ dl += 4;
+ next += 4;
}
- /* prepare buffer template and next indices */
- vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_RX] =
- mif->sw_if_index;
- vnet_buffer (&ptd->buffer_template)->feature_arc_index = 0;
- ptd->buffer_template.current_data = start_offset;
- ptd->buffer_template.current_config_index = 0;
- ptd->buffer_template.buffer_pool_index = mq->buffer_pool_index;
- ptd->buffer_template.ref_count = 1;
-
- if (mode == MEMIF_INTERFACE_MODE_ETHERNET)
+ while (n_left)
{
- next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- if (mif->per_interface_next_index != ~0)
- next_index = mif->per_interface_next_index;
- else
- vnet_feature_start_device_input_x1 (mif->sw_if_index, &next_index,
- &ptd->buffer_template);
-
- vlib_get_new_next_frame (vm, node, next_index, to_next_bufs,
- n_left_to_next);
- if (PREDICT_TRUE (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT))
- {
- vlib_next_frame_t *nf;
- vlib_frame_t *f;
- ethernet_input_frame_t *ef;
- nf = vlib_node_runtime_get_next_frame (vm, node, next_index);
- f = vlib_get_frame (vm, nf->frame);
- f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+ /* enqueue buffer */
+ vlib_buffer_copy_template (b[0], &bt);
+ b[0]->current_length = dl[0];
+ if (is_ip)
+ next[0] = memif_next_from_ip_hdr (node, b[0]);
- ef = vlib_frame_scalar_args (f);
- ef->sw_if_index = mif->sw_if_index;
- ef->hw_if_index = mif->hw_if_index;
- vlib_frame_no_append (f);
- }
+ /* next */
+ n_left -= 1;
+ b += 1;
+ dl += 1;
+ next += 1;
}
+}
+static_always_inline void
+memif_fill_buffer_mdata (vlib_main_t *vm, vlib_node_runtime_t *node,
+ memif_per_thread_data_t *ptd, memif_if_t *mif,
+ u32 *bi, u16 *next, int is_ip)
+{
+ u16 buffer_size = vlib_buffer_get_default_data_size (vm);
+ vlib_buffer_t *b0, *b1, *b2, *b3, bt;
+ memif_packet_op_t *po;
/* process buffer metadata */
- u32 n_from = n_rx_packets;
+
+ u32 n_from = ptd->n_packets;
po = ptd->packet_ops;
- bi = to_next_bufs;
/* copy template into local variable - will save per packet load */
vlib_buffer_copy_template (&bt, &ptd->buffer_template);
@@ -397,20 +427,16 @@ memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_buffer_copy_template (b3, &bt);
b0->current_length = po[0].packet_len;
- n_rx_bytes += b0->current_length;
b1->current_length = po[1].packet_len;
- n_rx_bytes += b1->current_length;
b2->current_length = po[2].packet_len;
- n_rx_bytes += b2->current_length;
b3->current_length = po[3].packet_len;
- n_rx_bytes += b3->current_length;
memif_add_to_chain (vm, b0, ptd->buffers + fbvi[0] + 1, buffer_size);
memif_add_to_chain (vm, b1, ptd->buffers + fbvi[1] + 1, buffer_size);
memif_add_to_chain (vm, b2, ptd->buffers + fbvi[2] + 1, buffer_size);
memif_add_to_chain (vm, b3, ptd->buffers + fbvi[3] + 1, buffer_size);
- if (mode == MEMIF_INTERFACE_MODE_IP)
+ if (is_ip)
{
next[0] = memif_next_from_ip_hdr (node, b0);
next[1] = memif_next_from_ip_hdr (node, b1);
@@ -426,21 +452,18 @@ memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
}
while (n_from)
{
- u32 fbvi[4];
+ u32 fbvi[1];
/* enqueue buffer */
fbvi[0] = po[0].first_buffer_vec_index;
bi[0] = ptd->buffers[fbvi[0]];
b0 = vlib_get_buffer (vm, bi[0]);
vlib_buffer_copy_template (b0, &bt);
b0->current_length = po->packet_len;
- n_rx_bytes += b0->current_length;
memif_add_to_chain (vm, b0, ptd->buffers + fbvi[0] + 1, buffer_size);
- if (mode == MEMIF_INTERFACE_MODE_IP)
- {
- next[0] = memif_next_from_ip_hdr (node, b0);
- }
+ if (is_ip)
+ next[0] = memif_next_from_ip_hdr (node, b0);
/* next */
n_from -= 1;
@@ -448,11 +471,216 @@ memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
bi += 1;
next += 1;
}
+}
+
+static_always_inline void
+memif_advance_ring (memif_ring_type_t type, memif_queue_t *mq,
+ memif_ring_t *ring, u16 cur_slot)
+{
+ if (type == MEMIF_RING_S2M)
+ {
+ __atomic_store_n (&ring->tail, cur_slot, __ATOMIC_RELEASE);
+ mq->last_head = cur_slot;
+ }
+ else
+ {
+ mq->last_tail = cur_slot;
+ }
+}
+
+static_always_inline uword
+memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ memif_if_t *mif, memif_ring_type_t type, u16 qid,
+ memif_interface_mode_t mode)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ memif_main_t *mm = &memif_main;
+ memif_ring_t *ring;
+ memif_queue_t *mq;
+ u16 buffer_size = vlib_buffer_get_default_data_size (vm);
+ uword n_trace;
+ u16 nexts[MEMIF_RX_VECTOR_SZ], *next = nexts;
+ u32 _to_next_bufs[MEMIF_RX_VECTOR_SZ], *to_next_bufs = _to_next_bufs, *bi;
+ u32 n_left_to_next;
+ u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ vlib_buffer_t *buffer_ptrs[MEMIF_RX_VECTOR_SZ];
+ u32 thread_index = vm->thread_index;
+ memif_per_thread_data_t *ptd =
+ vec_elt_at_index (mm->per_thread_data, thread_index);
+ u16 cur_slot, ring_size, n_slots, mask;
+ u16 n_buffers, n_alloc, n_desc;
+ i16 start_offset;
+ memif_copy_op_t *co;
+ int is_slave = (mif->flags & MEMIF_IF_FLAG_IS_SLAVE) != 0;
+ int is_simple = 1;
+ int i;
+
+ mq = vec_elt_at_index (mif->rx_queues, qid);
+ ring = mq->ring;
+ ring_size = 1 << mq->log2_ring_size;
+ mask = ring_size - 1;
+
+ start_offset = (mode == MEMIF_INTERFACE_MODE_IP) ? MEMIF_IP_OFFSET : 0;
+
+ if (is_slave)
+ {
+ cur_slot = mq->last_tail;
+ n_slots = __atomic_load_n (&ring->tail, __ATOMIC_ACQUIRE) - cur_slot;
+ }
+ else
+ {
+ cur_slot = mq->last_head;
+ n_slots = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE) - cur_slot;
+ }
+
+ if (n_slots == 0)
+ {
+ ptd->n_packets = 0;
+ goto refill;
+ }
+
+ n_desc = memif_parse_desc (ptd, mif, mq, cur_slot, n_slots);
+
+ if (n_desc != ptd->n_packets)
+ is_simple = 0;
+
+ cur_slot += n_desc;
+
+ if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET)
+ memif_validate_desc_data (ptd, mif, n_desc, /* is_ethernet */ 1);
+ else
+ memif_validate_desc_data (ptd, mif, n_desc, /* is_ethernet */ 0);
+
+ if (ptd->max_desc_len > buffer_size - start_offset)
+ is_simple = 0;
+
+ if (ptd->xor_status != 0)
+ is_simple = 0;
+
+ if (is_simple)
+ n_buffers = ptd->n_packets;
+ else
+ n_buffers = memif_process_desc (vm, node, ptd, mif);
+
+ if (PREDICT_FALSE (n_buffers == 0))
+ {
+ /* All descriptors are bad. Release slots in the ring and bail */
+ memif_advance_ring (type, mq, ring, cur_slot);
+ goto refill;
+ }
+
+ /* allocate free buffers */
+ vec_validate_aligned (ptd->buffers, n_buffers - 1, CLIB_CACHE_LINE_BYTES);
+ n_alloc = vlib_buffer_alloc_from_pool (vm, ptd->buffers, n_buffers,
+ mq->buffer_pool_index);
+ if (PREDICT_FALSE (n_alloc != n_buffers))
+ {
+ if (n_alloc)
+ vlib_buffer_free (vm, ptd->buffers, n_alloc);
+ vlib_error_count (vm, node->node_index,
+ MEMIF_INPUT_ERROR_BUFFER_ALLOC_FAIL, 1);
+ goto refill;
+ }
+
+ /* copy data */
+ if (is_simple)
+ {
+ int n_pkts = ptd->n_packets;
+ void **desc_data = ptd->desc_data;
+ u16 *desc_len = ptd->desc_len;
+
+ vlib_get_buffers (vm, ptd->buffers, buffer_ptrs, n_buffers);
+
+ for (i = 0; i + 8 < n_pkts; i++)
+ {
+ clib_prefetch_load (desc_data[i + 8]);
+ clib_prefetch_store (buffer_ptrs[i + 8]->data);
+ clib_memcpy_fast (buffer_ptrs[i]->data + start_offset, desc_data[i],
+ desc_len[i]);
+ }
+ for (; i < n_pkts; i++)
+ clib_memcpy_fast (buffer_ptrs[i]->data + start_offset, desc_data[i],
+ desc_len[i]);
+ }
+ else
+ {
+ vlib_buffer_t *b;
+ u32 n_pkts = vec_len (ptd->copy_ops);
+ co = ptd->copy_ops;
+
+ for (i = 0; i + 8 < n_pkts; i++)
+ {
+ clib_prefetch_load (co[i + 8].data);
+ b = vlib_get_buffer (vm, ptd->buffers[co[i].buffer_vec_index]);
+ clib_memcpy_fast (b->data + co[i].buffer_offset, co[i].data,
+ co[i].data_len);
+ }
+ for (; i < n_pkts; i++)
+ {
+ b = vlib_get_buffer (vm, ptd->buffers[co[i].buffer_vec_index]);
+ clib_memcpy_fast (b->data + co[i].buffer_offset, co[i].data,
+ co[i].data_len);
+ }
+ }
+
+ /* release slots from the ring */
+ memif_advance_ring (type, mq, ring, cur_slot);
+
+ /* prepare buffer template and next indices */
+ vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_RX] = mif->sw_if_index;
+ vnet_buffer (&ptd->buffer_template)->feature_arc_index = 0;
+ ptd->buffer_template.current_data = start_offset;
+ ptd->buffer_template.current_config_index = 0;
+ ptd->buffer_template.buffer_pool_index = mq->buffer_pool_index;
+ ptd->buffer_template.ref_count = 1;
+
+ if (mode == MEMIF_INTERFACE_MODE_ETHERNET)
+ {
+ next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ if (mif->per_interface_next_index != ~0)
+ next_index = mif->per_interface_next_index;
+ else
+ vnet_feature_start_device_input (mif->sw_if_index, &next_index,
+ &ptd->buffer_template);
+
+ vlib_get_new_next_frame (vm, node, next_index, to_next_bufs,
+ n_left_to_next);
+ if (PREDICT_TRUE (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT))
+ {
+ vlib_next_frame_t *nf;
+ vlib_frame_t *f;
+ ethernet_input_frame_t *ef;
+ nf = vlib_node_runtime_get_next_frame (vm, node, next_index);
+ f = vlib_get_frame (vm, nf->frame);
+ f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+ ef = vlib_frame_scalar_args (f);
+ ef->sw_if_index = mif->sw_if_index;
+ ef->hw_if_index = mif->hw_if_index;
+ vlib_frame_no_append (f);
+ }
+ }
+
+ if (is_simple)
+ {
+ vlib_buffer_copy_indices (to_next_bufs, ptd->buffers, ptd->n_packets);
+ if (mode == MEMIF_INTERFACE_MODE_IP)
+ memif_fill_buffer_mdata_simple (node, ptd, buffer_ptrs, nexts, 1);
+ else
+ memif_fill_buffer_mdata_simple (node, ptd, buffer_ptrs, nexts, 0);
+ }
+ else
+ {
+ if (mode == MEMIF_INTERFACE_MODE_IP)
+ memif_fill_buffer_mdata (vm, node, ptd, mif, to_next_bufs, nexts, 1);
+ else
+ memif_fill_buffer_mdata (vm, node, ptd, mif, to_next_bufs, nexts, 0);
+ }
/* packet trace if enabled */
if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node))))
{
- u32 n_left = n_rx_packets;
+ u32 n_left = ptd->n_packets;
bi = to_next_bufs;
next = nexts;
u32 ni = next_index;
@@ -483,16 +711,16 @@ memif_device_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (mode == MEMIF_INTERFACE_MODE_ETHERNET)
{
- n_left_to_next -= n_rx_packets;
+ n_left_to_next -= ptd->n_packets;
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
else
- vlib_buffer_enqueue_to_next (vm, node, to_next_bufs, nexts, n_rx_packets);
+ vlib_buffer_enqueue_to_next (vm, node, to_next_bufs, nexts,
+ ptd->n_packets);
- vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters
- + VNET_INTERFACE_COUNTER_RX, thread_index,
- mif->sw_if_index, n_rx_packets,
- n_rx_bytes);
+ vlib_increment_combined_counter (
+ vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, mif->sw_if_index, ptd->n_packets, ptd->n_rx_bytes);
/* refill ring with empty buffers */
refill:
@@ -514,7 +742,7 @@ refill:
__atomic_store_n (&ring->head, head, __ATOMIC_RELEASE);
}
- return n_rx_packets;
+ return ptd->n_packets;
}
static_always_inline uword
@@ -675,14 +903,14 @@ memif_device_input_zc_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
next0 = next1 = next2 = next3 = next_index;
/* redirect if feature path enabled */
- vnet_feature_start_device_input_x1 (mif->sw_if_index,
- &next0, b0);
- vnet_feature_start_device_input_x1 (mif->sw_if_index,
- &next1, b1);
- vnet_feature_start_device_input_x1 (mif->sw_if_index,
- &next2, b2);
- vnet_feature_start_device_input_x1 (mif->sw_if_index,
- &next3, b3);
+ vnet_feature_start_device_input (mif->sw_if_index, &next0,
+ b0);
+ vnet_feature_start_device_input (mif->sw_if_index, &next1,
+ b1);
+ vnet_feature_start_device_input (mif->sw_if_index, &next2,
+ b2);
+ vnet_feature_start_device_input (mif->sw_if_index, &next3,
+ b3);
}
}
@@ -730,8 +958,8 @@ memif_device_input_zc_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
next0 = next_index;
/* redirect if feature path enabled */
- vnet_feature_start_device_input_x1 (mif->sw_if_index,
- &next0, b0);
+ vnet_feature_start_device_input (mif->sw_if_index, &next0,
+ b0);
}
}
@@ -774,7 +1002,7 @@ refill:
n_alloc = vlib_buffer_alloc_to_ring_from_pool (
vm, mq->buffers, slot, ring_size, n_slots, mq->buffer_pool_index);
dt->region = mq->buffer_pool_index + 1;
- offset = (u64) mif->regions[dt->region].shm + start_offset;
+ offset = (u64) mif->regions[dt->region].shm - start_offset;
if (PREDICT_FALSE (n_alloc != n_slots))
vlib_error_count (vm, node->node_index,
@@ -826,6 +1054,244 @@ done:
return n_rx_packets;
}
+CLIB_MARCH_FN (memif_dma_completion_cb, void, vlib_main_t *vm,
+ vlib_dma_batch_t *b)
+{
+ memif_main_t *mm = &memif_main;
+ memif_if_t *mif = vec_elt_at_index (mm->interfaces, b->cookie >> 16);
+ u32 thread_index = vm->thread_index;
+ u32 n_left_to_next = 0;
+ u16 nexts[MEMIF_RX_VECTOR_SZ], *next;
+ u32 _to_next_bufs[MEMIF_RX_VECTOR_SZ], *to_next_bufs = _to_next_bufs, *bi;
+ uword n_trace;
+ memif_dma_info_t *dma_info;
+ u16 qid = b->cookie & 0xffff;
+ memif_queue_t *mq = vec_elt_at_index (mif->rx_queues, qid);
+ dma_info = mq->dma_info + mq->dma_info_head;
+ memif_per_thread_data_t *ptd = &dma_info->data;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+
+ __atomic_store_n (&mq->ring->tail, dma_info->dma_tail, __ATOMIC_RELEASE);
+
+ /* prepare buffer template and next indices */
+ i16 start_offset =
+ (dma_info->mode == MEMIF_INTERFACE_MODE_IP) ? MEMIF_IP_OFFSET : 0;
+ vnet_buffer (&ptd->buffer_template)->sw_if_index[VLIB_RX] = mif->sw_if_index;
+ vnet_buffer (&ptd->buffer_template)->feature_arc_index = 0;
+ ptd->buffer_template.current_data = start_offset;
+ ptd->buffer_template.current_config_index = 0;
+ ptd->buffer_template.buffer_pool_index = mq->buffer_pool_index;
+ ptd->buffer_template.ref_count = 1;
+
+ if (dma_info->mode == MEMIF_INTERFACE_MODE_ETHERNET)
+ {
+ next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
+ if (mif->per_interface_next_index != ~0)
+ next_index = mif->per_interface_next_index;
+ else
+ vnet_feature_start_device_input (mif->sw_if_index, &next_index,
+ &ptd->buffer_template);
+
+ vlib_get_new_next_frame (vm, dma_info->node, next_index, to_next_bufs,
+ n_left_to_next);
+ if (PREDICT_TRUE (next_index == VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT))
+ {
+ vlib_next_frame_t *nf;
+ vlib_frame_t *f;
+ ethernet_input_frame_t *ef;
+ nf =
+ vlib_node_runtime_get_next_frame (vm, dma_info->node, next_index);
+ f = vlib_get_frame (vm, nf->frame);
+ f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+ ef = vlib_frame_scalar_args (f);
+ ef->sw_if_index = mif->sw_if_index;
+ ef->hw_if_index = mif->hw_if_index;
+ vlib_frame_no_append (f);
+ }
+ }
+
+ vec_reset_length (ptd->buffers);
+
+ if (dma_info->mode == MEMIF_INTERFACE_MODE_IP)
+ memif_fill_buffer_mdata (vm, dma_info->node, ptd, mif, to_next_bufs, nexts,
+ 1);
+ else
+ memif_fill_buffer_mdata (vm, dma_info->node, ptd, mif, to_next_bufs, nexts,
+ 0);
+
+ /* packet trace if enabled */
+ if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, dma_info->node))))
+ {
+ u32 n_left = ptd->n_packets;
+ bi = to_next_bufs;
+ next = nexts;
+ u32 ni = next_index;
+ while (n_trace && n_left)
+ {
+ vlib_buffer_t *b;
+ memif_input_trace_t *tr;
+ if (dma_info->mode != MEMIF_INTERFACE_MODE_ETHERNET)
+ ni = next[0];
+ b = vlib_get_buffer (vm, bi[0]);
+ if (PREDICT_TRUE (vlib_trace_buffer (vm, dma_info->node, ni, b,
+ /* follow_chain */ 0)))
+ {
+ tr = vlib_add_trace (vm, dma_info->node, b, sizeof (*tr));
+ tr->next_index = ni;
+ tr->hw_if_index = mif->hw_if_index;
+ tr->ring = qid;
+ n_trace--;
+ }
+
+ /* next */
+ n_left--;
+ bi++;
+ next++;
+ }
+ vlib_set_trace_count (vm, dma_info->node, n_trace);
+ }
+
+ if (dma_info->mode == MEMIF_INTERFACE_MODE_ETHERNET)
+ {
+ n_left_to_next -= ptd->n_packets;
+ vlib_put_next_frame (vm, dma_info->node, next_index, n_left_to_next);
+ }
+ else
+ vlib_buffer_enqueue_to_next (vm, dma_info->node, to_next_bufs, nexts,
+ ptd->n_packets);
+
+ vlib_increment_combined_counter (
+ vnm->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX,
+ thread_index, mif->sw_if_index, ptd->n_packets, ptd->n_rx_bytes);
+
+ mq->dma_info_head++;
+ if (mq->dma_info_head == mq->dma_info_size)
+ mq->dma_info_head = 0;
+
+ return;
+}
+
+#ifndef CLIB_MARCH_VARIANT
+void
+memif_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b)
+{
+ return CLIB_MARCH_FN_SELECT (memif_dma_completion_cb) (vm, b);
+}
+#endif
+
+static_always_inline uword
+memif_device_input_inline_dma (vlib_main_t *vm, vlib_node_runtime_t *node,
+ memif_if_t *mif, memif_ring_type_t type,
+ u16 qid, memif_interface_mode_t mode)
+{
+ memif_main_t *mm = &memif_main;
+ memif_ring_t *ring;
+ memif_queue_t *mq;
+ memif_per_thread_data_t *ptd;
+ u16 cur_slot, n_slots;
+ u16 n_buffers, n_alloc, n_desc;
+ memif_copy_op_t *co;
+ memif_dma_info_t *dma_info;
+
+ u16 mif_id = mif - mm->interfaces;
+ u32 i;
+
+ mq = vec_elt_at_index (mif->rx_queues, qid);
+ ring = mq->ring;
+
+ cur_slot = mq->last_head;
+ n_slots = __atomic_load_n (&ring->head, __ATOMIC_ACQUIRE) - cur_slot;
+
+ if (n_slots == 0)
+ return 0;
+
+ if ((mq->dma_info_tail + 1 == mq->dma_info_head) ||
+ ((mq->dma_info_head == mq->dma_info_size - 1) &&
+ (mq->dma_info_tail == 0)))
+ return 0;
+
+ vlib_dma_batch_t *db;
+ db = vlib_dma_batch_new (vm, mif->dma_input_config);
+ if (!db)
+ return 0;
+
+ dma_info = mq->dma_info + mq->dma_info_tail;
+ dma_info->node = node;
+ dma_info->mode = mode;
+ ptd = &dma_info->data;
+ vec_validate_aligned (dma_info->data.desc_len,
+ pow2_mask (mq->log2_ring_size), CLIB_CACHE_LINE_BYTES);
+
+ n_desc = memif_parse_desc (&dma_info->data, mif, mq, cur_slot, n_slots);
+ cur_slot += n_desc;
+
+ if (mif->mode == MEMIF_INTERFACE_MODE_ETHERNET)
+ memif_validate_desc_data (&dma_info->data, mif, n_desc,
+ /* is_ethernet */ 1);
+ else
+ memif_validate_desc_data (&dma_info->data, mif, n_desc,
+ /* is_ethernet */ 0);
+
+ n_buffers = memif_process_desc (vm, node, ptd, mif);
+
+ if (PREDICT_FALSE (n_buffers == 0))
+ {
+ /* All descriptors are bad. Release slots in the ring and bail */
+ memif_advance_ring (type, mq, ring, cur_slot);
+ goto done;
+ }
+
+ /* allocate free buffers */
+ vec_validate_aligned (dma_info->data.buffers, n_buffers - 1,
+ CLIB_CACHE_LINE_BYTES);
+ n_alloc = vlib_buffer_alloc_from_pool (vm, dma_info->data.buffers, n_buffers,
+ mq->buffer_pool_index);
+ if (PREDICT_FALSE (n_alloc != n_buffers))
+ {
+ if (n_alloc)
+ vlib_buffer_free (vm, dma_info->data.buffers, n_alloc);
+ vlib_error_count (vm, node->node_index,
+ MEMIF_INPUT_ERROR_BUFFER_ALLOC_FAIL, 1);
+ goto done;
+ }
+
+ dma_info->data.n_rx_bytes = ptd->n_rx_bytes;
+ dma_info->data.n_packets = ptd->n_packets;
+ /* copy data */
+ vlib_buffer_t *b;
+ u32 n_pkts = clib_min (MEMIF_RX_VECTOR_SZ, vec_len (ptd->copy_ops));
+ co = ptd->copy_ops;
+
+ for (i = 0; i < n_pkts; i++)
+ {
+ b = vlib_get_buffer (vm, ptd->buffers[co[i].buffer_vec_index]);
+ vlib_dma_batch_add (vm, db, b->data + co[i].buffer_offset, co[i].data,
+ co[i].data_len);
+ }
+
+ for (i = n_pkts; i < vec_len (ptd->copy_ops); i++)
+ {
+ b = vlib_get_buffer (vm, ptd->buffers[co[i].buffer_vec_index]);
+ vlib_dma_batch_add (vm, db, b->data + co[i].buffer_offset, co[i].data,
+ co[i].data_len);
+ }
+
+ dma_info->dma_tail = cur_slot;
+ mq->last_head = dma_info->dma_tail;
+ mq->dma_info_tail++;
+ if (mq->dma_info_tail == mq->dma_info_size)
+ mq->dma_info_tail = 0;
+
+done:
+ vlib_dma_batch_set_cookie (vm, db, ((u64) mif_id << 16) | qid);
+ vlib_dma_batch_submit (vm, db);
+ vec_reset_length (ptd->copy_ops);
+
+ return ptd->n_packets;
+}
VLIB_NODE_FN (memif_input_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -867,12 +1333,25 @@ VLIB_NODE_FN (memif_input_node) (vlib_main_t * vm,
}
else
{
- if (mif->mode == MEMIF_INTERFACE_MODE_IP)
- n_rx += memif_device_input_inline (
- vm, node, mif, MEMIF_RING_S2M, qid, mode_ip);
+ if ((mif->flags & MEMIF_IF_FLAG_USE_DMA) &&
+ (mif->dma_input_config >= 0))
+ {
+ if (mif->mode == MEMIF_INTERFACE_MODE_IP)
+ n_rx += memif_device_input_inline_dma (
+ vm, node, mif, MEMIF_RING_S2M, qid, mode_ip);
+ else
+ n_rx += memif_device_input_inline_dma (
+ vm, node, mif, MEMIF_RING_S2M, qid, mode_eth);
+ }
else
- n_rx += memif_device_input_inline (
- vm, node, mif, MEMIF_RING_S2M, qid, mode_eth);
+ {
+ if (mif->mode == MEMIF_INTERFACE_MODE_IP)
+ n_rx += memif_device_input_inline (
+ vm, node, mif, MEMIF_RING_S2M, qid, mode_ip);
+ else
+ n_rx += memif_device_input_inline (
+ vm, node, mif, MEMIF_RING_S2M, qid, mode_eth);
+ }
}
}
}
@@ -880,7 +1359,6 @@ VLIB_NODE_FN (memif_input_node) (vlib_main_t * vm,
return n_rx;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (memif_input_node) = {
.name = "memif-input",
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
@@ -892,7 +1370,6 @@ VLIB_REGISTER_NODE (memif_input_node) = {
.error_counters = memif_input_error_counters,
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/memif/private.h b/src/plugins/memif/private.h
index 838651abc27..f6335410ba8 100644
--- a/src/plugins/memif/private.h
+++ b/src/plugins/memif/private.h
@@ -16,6 +16,7 @@
*/
#include <vppinfra/lock.h>
+#include <vlib/dma/dma.h>
#include <vlib/log.h>
#define MEMIF_DEFAULT_SOCKET_FILENAME "memif.sock"
@@ -24,7 +25,7 @@
#define MEMIF_DEFAULT_TX_QUEUES 1
#define MEMIF_DEFAULT_BUFFER_SIZE 2048
-#define MEMIF_MAX_M2S_RING (vlib_get_n_threads ())
+#define MEMIF_MAX_M2S_RING 256
#define MEMIF_MAX_S2M_RING 256
#define MEMIF_MAX_REGION 256
#define MEMIF_MAX_LOG2_RING_SIZE 14
@@ -120,9 +121,15 @@ typedef struct
int fd;
} memif_msg_fifo_elt_t;
+#define MEMIF_RX_VECTOR_SZ VLIB_FRAME_SIZE
+#define MEMIF_DMA_INFO_SIZE VLIB_FRAME_SIZE
+
+struct memif_dma_info;
+
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ clib_spinlock_t lockp;
/* ring data */
memif_ring_t *ring;
memif_log2_ring_size_t log2_ring_size;
@@ -134,6 +141,15 @@ typedef struct
u32 *buffers;
u8 buffer_pool_index;
+ /* dma data */
+ u16 dma_head;
+ u16 dma_tail;
+ struct memif_dma_info *dma_info;
+ u16 dma_info_head;
+ u16 dma_info_tail;
+ u16 dma_info_size;
+ u8 dma_info_full;
+
/* interrupts */
int int_fd;
uword int_clib_file_index;
@@ -144,14 +160,15 @@ typedef struct
u32 queue_index;
} memif_queue_t;
-#define foreach_memif_if_flag \
- _(0, ADMIN_UP, "admin-up") \
- _(1, IS_SLAVE, "slave") \
- _(2, CONNECTING, "connecting") \
- _(3, CONNECTED, "connected") \
- _(4, DELETING, "deleting") \
- _(5, ZERO_COPY, "zero-copy") \
- _(6, ERROR, "error")
+#define foreach_memif_if_flag \
+ _ (0, ADMIN_UP, "admin-up") \
+ _ (1, IS_SLAVE, "slave") \
+ _ (2, CONNECTING, "connecting") \
+ _ (3, CONNECTED, "connected") \
+ _ (4, DELETING, "deleting") \
+ _ (5, ZERO_COPY, "zero-copy") \
+ _ (6, ERROR, "error") \
+ _ (7, USE_DMA, "use_dma")
typedef enum
{
@@ -163,7 +180,6 @@ typedef enum
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- clib_spinlock_t lockp;
u32 flags;
memif_interface_id_t id;
u32 hw_if_index;
@@ -207,11 +223,15 @@ typedef struct
/* disconnect strings */
u8 *local_disc_string;
u8 *remote_disc_string;
+
+ /* dma config index */
+ int dma_input_config;
+ int dma_tx_config;
} memif_if_t;
typedef struct
{
- u32 packet_len;
+ u16 packet_len;
u16 first_buffer_vec_index;
} memif_packet_op_t;
@@ -224,21 +244,61 @@ typedef struct
u16 buffer_vec_index;
} memif_copy_op_t;
-#define MEMIF_RX_VECTOR_SZ VLIB_FRAME_SIZE
+typedef enum
+{
+ MEMIF_DESC_STATUS_OK = 0,
+ MEMIF_DESC_STATUS_ERR_BAD_REGION,
+ MEMIF_DESC_STATUS_ERR_REGION_OVERRUN,
+ MEMIF_DESC_STATUS_ERR_DATA_TOO_BIG,
+ MEMIF_DESC_STATUS_ERR_ZERO_LENGTH
+} __clib_packed memif_desc_status_err_code_t;
+
+typedef union
+{
+ struct
+ {
+ u8 next : 1;
+ u8 err : 1;
+ u8 reserved : 2;
+ memif_desc_status_err_code_t err_code : 4;
+ };
+ u8 as_u8;
+} memif_desc_status_t;
+
+STATIC_ASSERT_SIZEOF (memif_desc_status_t, 1);
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-
+ u16 n_packets;
+ u16 max_desc_len;
+ u32 n_rx_bytes;
+ u8 xor_status;
/* copy vector */
- memif_packet_op_t packet_ops[MEMIF_RX_VECTOR_SZ];
memif_copy_op_t *copy_ops;
u32 *buffers;
+ memif_packet_op_t packet_ops[MEMIF_RX_VECTOR_SZ];
+
+ /* temp storage for compressed descriptors */
+ void **desc_data;
+ u16 *desc_len;
+ memif_desc_status_t *desc_status;
/* buffer template */
vlib_buffer_t buffer_template;
} memif_per_thread_data_t;
+typedef struct memif_dma_info
+{
+ /* per thread data */
+ memif_interface_mode_t mode;
+ vlib_node_runtime_t *node;
+ u32 dma_head;
+ u32 dma_tail;
+ u8 finished;
+ memif_per_thread_data_t data;
+} memif_dma_info_t;
+
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@@ -278,6 +338,7 @@ typedef struct
u8 *secret;
u8 is_master;
u8 is_zero_copy;
+ u8 use_dma;
memif_interface_mode_t mode:8;
memif_log2_ring_size_t log2_ring_size;
u16 buffer_size;
@@ -290,10 +351,11 @@ typedef struct
u32 sw_if_index;
} memif_create_if_args_t;
-int memif_socket_filename_add_del (u8 is_add, u32 sock_id,
- u8 * sock_filename);
-int memif_create_if (vlib_main_t * vm, memif_create_if_args_t * args);
-int memif_delete_if (vlib_main_t * vm, memif_if_t * mif);
+u32 memif_get_unused_socket_id ();
+clib_error_t *memif_socket_filename_add_del (u8 is_add, u32 sock_id,
+ char *sock_filename);
+clib_error_t *memif_create_if (vlib_main_t *vm, memif_create_if_args_t *args);
+clib_error_t *memif_delete_if (vlib_main_t *vm, memif_if_t *mif);
clib_error_t *memif_plugin_api_hookup (vlib_main_t * vm);
clib_error_t *memif_interface_admin_up_down (vnet_main_t *vnm, u32 hw_if_index,
u32 flags);
@@ -322,7 +384,8 @@ clib_error_t *memif_slave_conn_fd_error (clib_file_t * uf);
clib_error_t *memif_msg_send_disconnect (memif_if_t * mif,
clib_error_t * err);
u8 *format_memif_device_name (u8 * s, va_list * args);
-
+void memif_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b);
+void memif_tx_dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b);
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/memif/socket.c b/src/plugins/memif/socket.c
index 5a381a7c7cd..001f26f13ef 100644
--- a/src/plugins/memif/socket.c
+++ b/src/plugins/memif/socket.c
@@ -25,7 +25,6 @@
#include <sys/un.h>
#include <sys/uio.h>
#include <sys/mman.h>
-#include <sys/prctl.h>
#include <sys/eventfd.h>
#include <inttypes.h>
#include <limits.h>
@@ -446,14 +445,12 @@ memif_msg_receive (memif_if_t ** mifp, clib_socket_t * sock, clib_file_t * uf)
if ((err = memif_init_regions_and_queues (mif)))
goto error;
memif_msg_enq_init (mif);
- /* *INDENT-OFF* */
vec_foreach_index (i, mif->regions)
memif_msg_enq_add_region (mif, i);
vec_foreach_index (i, mif->tx_queues)
memif_msg_enq_add_ring (mif, i, MEMIF_RING_S2M);
vec_foreach_index (i, mif->rx_queues)
memif_msg_enq_add_ring (mif, i, MEMIF_RING_M2S);
- /* *INDENT-ON* */
memif_msg_enq_connect (mif);
break;
@@ -648,7 +645,8 @@ memif_master_conn_fd_error (clib_file_t * uf)
memif_log_warn (0, "Error on unknown file descriptor %d",
uf->file_descriptor);
- memif_file_del (uf);
+ if (uf->file_descriptor != ~0)
+ memif_file_del (uf);
return 0;
}
diff --git a/src/plugins/mss_clamp/mss_clamp_node.c b/src/plugins/mss_clamp/mss_clamp_node.c
index 4a40b2329e1..de00a5a8094 100644
--- a/src/plugins/mss_clamp/mss_clamp_node.c
+++ b/src/plugins/mss_clamp/mss_clamp_node.c
@@ -24,6 +24,7 @@
#include <vnet/feature/feature.h>
#include <vnet/ip/ip4.h>
#include <vnet/ip/ip6.h>
+#include <vnet/tcp/tcp_packet.h>
extern vlib_node_registration_t mssc_ip4_in_node, mssc_ip4_out_node;
extern vlib_node_registration_t mssc_ip6_in_node, mssc_ip6_out_node;
@@ -181,17 +182,15 @@ mssc_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame,
{
ip6_header_t *ip0 = (ip6_header_t *) h0;
ip6_header_t *ip1 = (ip6_header_t *) h1;
-
- if (IP_PROTOCOL_TCP == ip0->protocol)
- {
- clamped0 = mssc_mss_fixup (b[0], ip6_next_header (ip0),
- cm->max_mss6[sw_if_index0]);
- }
- if (IP_PROTOCOL_TCP == ip1->protocol)
- {
- clamped1 = mssc_mss_fixup (b[1], ip6_next_header (ip1),
- cm->max_mss6[sw_if_index1]);
- }
+ tcp_header_t *tcp0 =
+ ip6_ext_header_find (vm, b[0], ip0, IP_PROTOCOL_TCP, NULL);
+ tcp_header_t *tcp1 =
+ ip6_ext_header_find (vm, b[1], ip1, IP_PROTOCOL_TCP, NULL);
+
+ if (tcp0)
+ clamped0 = mssc_mss_fixup (b[0], tcp0, cm->max_mss6[sw_if_index0]);
+ if (tcp1)
+ clamped1 = mssc_mss_fixup (b[1], tcp1, cm->max_mss6[sw_if_index1]);
}
pkts_clamped += clamped0 + clamped1;
@@ -254,12 +253,11 @@ mssc_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame,
else if (FIB_PROTOCOL_IP6 == fproto)
{
ip6_header_t *ip0 = (ip6_header_t *) h0;
+ tcp_header_t *tcp0 =
+ ip6_ext_header_find (vm, b[0], ip0, IP_PROTOCOL_TCP, NULL);
- if (IP_PROTOCOL_TCP == ip0->protocol)
- {
- clamped0 = mssc_mss_fixup (b[0], ip6_next_header (ip0),
- cm->max_mss6[sw_if_index0]);
- }
+ if (tcp0)
+ clamped0 = mssc_mss_fixup (b[0], tcp0, cm->max_mss6[sw_if_index0]);
}
pkts_clamped += clamped0;
diff --git a/src/plugins/nat/CMakeLists.txt b/src/plugins/nat/CMakeLists.txt
index 2545da6da18..c53e0e39c7c 100644
--- a/src/plugins/nat/CMakeLists.txt
+++ b/src/plugins/nat/CMakeLists.txt
@@ -62,12 +62,10 @@ add_vpp_plugin(nat44_ei
nat44-ei/nat44_ei_in2out.c
nat44-ei/nat44_ei_out2in.c
nat44-ei/nat44_ei_handoff.c
- nat44-ei/nat44_ei_hairpinning.c
MULTIARCH_SOURCES
nat44-ei/nat44_ei_in2out.c
nat44-ei/nat44_ei_out2in.c
- nat44-ei/nat44_ei_hairpinning.c
API_FILES
nat44-ei/nat44_ei.api
@@ -199,7 +197,7 @@ add_custom_target(test_pnat-run
DEPENDS test_pnat
)
-if("${CMAKE_VERSION}" VERSION_GREATER_EQUAL "3.13" AND "${CMAKE_C_COMPILER_ID}" MATCHES "(Apple)?[Cc]lang")
+if(VPP_BUILD_TESTS_WITH_COVERAGE)
set(TARGET_NAME test_pnat)
set(COV_SOURCES ${CMAKE_SOURCE_DIR}/plugins/nat/pnat/pnat.c ${CMAKE_SOURCE_DIR}/plugins/nat/pnat/pnat_node.h ${CMAKE_SOURCE_DIR}/plugins/nat/pnat/pnat_node.c)
diff --git a/src/plugins/nat/FEATURE.yaml b/src/plugins/nat/FEATURE.yaml
index bbb8586390e..e2efdf5618d 100644
--- a/src/plugins/nat/FEATURE.yaml
+++ b/src/plugins/nat/FEATURE.yaml
@@ -2,7 +2,7 @@
name: Network Address Translation
maintainer:
- Ole Troan <ot@cisco.com>
- - Filip Varga <fivarga@cisco.com>
+ - Filip Varga <filipvarga89@gmail.com>
features:
- NAT44-EI - IPv4 Endpoint Independent NAT
- 1:1 NAT
diff --git a/src/plugins/nat/det44/det44.api b/src/plugins/nat/det44/det44.api
index 7b6aef70883..ddb9c497ea0 100644
--- a/src/plugins/nat/det44/det44.api
+++ b/src/plugins/nat/det44/det44.api
@@ -39,7 +39,6 @@ autoreply define det44_plugin_enable_disable {
u32 inside_vrf;
u32 outside_vrf;
bool enable;
- option status="in_progress";
};
/** \brief Enable/disable DET44 feature on the interface
@@ -55,7 +54,6 @@ autoreply define det44_interface_add_del_feature {
bool is_add;
bool is_inside;
vl_api_interface_index_t sw_if_index;
- option status="in_progress";
};
/** \brief Dump interfaces with DET44 feature
@@ -65,7 +63,6 @@ autoreply define det44_interface_add_del_feature {
define det44_interface_dump {
u32 client_index;
u32 context;
- option status="in_progress";
};
/** \brief DET44 interface details response
@@ -78,7 +75,6 @@ define det44_interface_details {
bool is_inside;
bool is_outside;
vl_api_interface_index_t sw_if_index;
- option status="in_progress";
};
/** \brief Add/delete DET44 mapping
diff --git a/src/plugins/nat/det44/det44.c b/src/plugins/nat/det44/det44.c
index 1dbbfdfdebe..f251bc9c608 100644
--- a/src/plugins/nat/det44/det44.c
+++ b/src/plugins/nat/det44/det44.c
@@ -29,7 +29,6 @@
det44_main_t det44_main;
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_det44_in2out, static) = {
.arc_name = "ip4-unicast",
.node_name = "det44-in2out",
@@ -47,7 +46,6 @@ VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Deterministic NAT (CGN)",
};
-/* *INDENT-ON* */
void
det44_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
@@ -150,14 +148,12 @@ snat_det_add_map (ip4_address_t * in_addr, u8 in_plen,
}
/* Add/del external address range to FIB */
- /* *INDENT-OFF* */
pool_foreach (i, dm->interfaces) {
if (det44_interface_is_inside(i))
continue;
det44_add_del_addr_to_fib(out_addr, out_plen, i->sw_if_index, is_add);
goto out;
}
- /* *INDENT-ON* */
out:
return 0;
}
@@ -203,7 +199,6 @@ det44_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
// rather make a structure and when enable call is used
// then register nodes
- /* *INDENT-OFF* */
pool_foreach (tmp, dm->interfaces) {
if (tmp->sw_if_index == sw_if_index)
{
@@ -211,7 +206,6 @@ det44_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
goto out;
}
}
- /* *INDENT-ON* */
out:
feature_name = is_inside ? "det44-in2out" : "det44-out2in";
@@ -270,7 +264,6 @@ out:
// add/del outside interface fib to registry
u8 found = 0;
det44_fib_t *outside_fib;
- /* *INDENT-OFF* */
vec_foreach (outside_fib, dm->outside_fibs)
{
if (outside_fib->fib_index == fib_index)
@@ -292,7 +285,6 @@ out:
break;
}
}
- /* *INDENT-ON* */
if (!is_del && !found)
{
vec_add2 (dm->outside_fibs, outside_fib, 1);
@@ -301,12 +293,10 @@ out:
}
// add/del outside address to FIB
snat_det_map_t *mp;
- /* *INDENT-OFF* */
pool_foreach (mp, dm->det_maps) {
det44_add_del_addr_to_fib(&mp->out_addr,
mp->out_plen, sw_if_index, !is_del);
}
- /* *INDENT-ON* */
}
return 0;
}
@@ -324,19 +314,29 @@ det44_expire_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
snat_det_session_t *ses;
snat_det_map_t *mp;
- vlib_process_wait_for_event_or_clock (vm, 10.0);
- vlib_process_get_events (vm, NULL);
- u32 now = (u32) vlib_time_now (vm);
- /* *INDENT-OFF* */
- pool_foreach (mp, dm->det_maps) {
- vec_foreach(ses, mp->sessions)
- {
- /* Delete if session expired */
- if (ses->in_port && (ses->expire < now))
- snat_det_ses_close (mp, ses);
- }
- }
- /* *INDENT-ON* */
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, 10.0);
+ vlib_process_get_events (vm, NULL);
+ u32 now = (u32) vlib_time_now (vm);
+
+ if (!plugin_enabled ())
+ {
+ continue;
+ }
+
+ pool_foreach (mp, dm->det_maps)
+ {
+ vec_foreach (ses, mp->sessions)
+ {
+ // close expired sessions
+ if (ses->in_port && (ses->expire < now))
+ {
+ snat_det_ses_close (mp, ses);
+ }
+ }
+ }
+ }
return 0;
}
@@ -374,10 +374,11 @@ det44_plugin_enable (det44_config_t c)
c.inside_vrf_id,
dm->fib_src_hi);
- det44_create_expire_walk_process ();
dm->mss_clamping = 0;
dm->config = c;
dm->enabled = 1;
+
+ det44_create_expire_walk_process ();
return 0;
}
@@ -395,6 +396,8 @@ det44_plugin_disable ()
return 1;
}
+ dm->enabled = 0;
+
// DET44 cleanup (order dependent)
// 1) remove interfaces (det44_interface_add_del) removes map ranges from fib
// 2) free sessions
@@ -428,15 +431,12 @@ det44_plugin_disable ()
}
vec_free (interfaces);
- /* *INDENT-OFF* */
pool_foreach (mp, dm->det_maps)
{
vec_free (mp->sessions);
}
- /* *INDENT-ON* */
det44_reset_timeouts ();
- dm->enabled = 0;
pool_free (dm->interfaces);
pool_free (dm->det_maps);
@@ -467,7 +467,6 @@ det44_update_outside_fib (ip4_main_t * im,
if (!vec_len (dm->outside_fibs))
return;
- /* *INDENT-OFF* */
pool_foreach (i, dm->interfaces)
{
if (i->sw_if_index == sw_if_index)
@@ -477,7 +476,6 @@ det44_update_outside_fib (ip4_main_t * im,
match = 1;
}
}
- /* *INDENT-ON* */
if (!match)
return;
diff --git a/src/plugins/nat/det44/det44.h b/src/plugins/nat/det44/det44.h
index 02b0fa7e81d..e576bfb65e8 100644
--- a/src/plugins/nat/det44/det44.h
+++ b/src/plugins/nat/det44/det44.h
@@ -40,6 +40,7 @@
#include <nat/lib/lib.h>
#include <nat/lib/inlines.h>
#include <nat/lib/ipfix_logging.h>
+#include <nat/lib/nat_proto.h>
/* Session state */
#define foreach_det44_session_state \
@@ -228,7 +229,7 @@ plugin_enabled ()
extern vlib_node_registration_t det44_in2out_node;
extern vlib_node_registration_t det44_out2in_node;
-int det44_plugin_enable ();
+int det44_plugin_enable (det44_config_t);
int det44_plugin_disable ();
int det44_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del);
@@ -278,13 +279,11 @@ snat_det_map_by_user (ip4_address_t * user_addr)
{
det44_main_t *dm = &det44_main;
snat_det_map_t *mp;
- /* *INDENT-OFF* */
pool_foreach (mp, dm->det_maps)
{
if (is_addr_in_net(user_addr, &mp->in_addr, mp->in_plen))
return mp;
}
- /* *INDENT-ON* */
return 0;
}
@@ -293,13 +292,11 @@ snat_det_map_by_out (ip4_address_t * out_addr)
{
det44_main_t *dm = &det44_main;
snat_det_map_t *mp;
- /* *INDENT-OFF* */
pool_foreach (mp, dm->det_maps)
{
if (is_addr_in_net(out_addr, &mp->out_addr, mp->out_plen))
return mp;
}
- /* *INDENT-ON* */
return 0;
}
diff --git a/src/plugins/nat/det44/det44_api.c b/src/plugins/nat/det44/det44_api.c
index 1486180aa99..c7e17dfd147 100644
--- a/src/plugins/nat/det44/det44_api.c
+++ b/src/plugins/nat/det44/det44_api.c
@@ -67,14 +67,12 @@ vl_api_det44_forward_t_handler (vl_api_det44_forward_t * mp)
hi_port = lo_port + m->ports_per_host - 1;
send_reply:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_DET44_FORWARD_REPLY,
({
rmp->out_port_lo = ntohs (lo_port);
rmp->out_port_hi = ntohs (hi_port);
clib_memcpy (rmp->out_addr, &out_addr, 4);
}))
- /* *INDENT-ON* */
}
static void
@@ -98,12 +96,10 @@ vl_api_det44_reverse_t_handler (vl_api_det44_reverse_t * mp)
snat_det_reverse (m, &out_addr, htons (mp->out_port), &in_addr);
send_reply:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_DET44_REVERSE_REPLY,
({
clib_memcpy (rmp->in_addr, &in_addr, 4);
}))
- /* *INDENT-ON* */
}
static void
@@ -139,10 +135,8 @@ vl_api_det44_map_dump_t_handler (vl_api_det44_map_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
vec_foreach(m, dm->det_maps)
sent_det44_map_details(m, reg, mp->context);
- /* *INDENT-ON* */
}
static void
@@ -328,12 +322,10 @@ vl_api_det44_interface_dump_t_handler (vl_api_det44_interface_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (i, dm->interfaces)
{
det44_send_interface_details(i, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void
@@ -359,7 +351,6 @@ vl_api_det44_get_timeouts_t_handler (vl_api_det44_get_timeouts_t * mp)
nat_timeouts_t timeouts;
int rv = 0;
timeouts = det44_get_timeouts ();
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_DET44_GET_TIMEOUTS_REPLY,
({
rmp->udp = htonl (timeouts.udp);
@@ -367,7 +358,6 @@ vl_api_det44_get_timeouts_t_handler (vl_api_det44_get_timeouts_t * mp)
rmp->tcp_transitory = htonl (timeouts.tcp.transitory);
rmp->icmp = htonl (timeouts.icmp);
}))
- /* *INDENT-ON* */
}
/*
@@ -412,14 +402,12 @@ vl_api_nat_det_forward_t_handler (vl_api_nat_det_forward_t * mp)
hi_port = lo_port + m->ports_per_host - 1;
send_reply:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_NAT_DET_FORWARD_REPLY,
({
rmp->out_port_lo = ntohs (lo_port);
rmp->out_port_hi = ntohs (hi_port);
clib_memcpy (rmp->out_addr, &out_addr, 4);
}))
- /* *INDENT-ON* */
}
static void
@@ -443,12 +431,10 @@ vl_api_nat_det_reverse_t_handler (vl_api_nat_det_reverse_t * mp)
snat_det_reverse (m, &out_addr, htons (mp->out_port), &in_addr);
send_reply:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_NAT_DET_REVERSE_REPLY,
({
clib_memcpy (rmp->in_addr, &in_addr, 4);
}))
- /* *INDENT-ON* */
}
static void
@@ -484,10 +470,8 @@ vl_api_nat_det_map_dump_t_handler (vl_api_nat_det_map_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
vec_foreach(m, dm->det_maps)
sent_nat_det_map_details(m, reg, mp->context);
- /* *INDENT-ON* */
}
static void
diff --git a/src/plugins/nat/det44/det44_cli.c b/src/plugins/nat/det44/det44_cli.c
index 5bd81d306f4..5d0ad04363e 100644
--- a/src/plugins/nat/det44/det44_cli.c
+++ b/src/plugins/nat/det44/det44_cli.c
@@ -512,7 +512,7 @@ VLIB_CLI_COMMAND (det44_map_command, static) = {
/*?
* @cliexpar
- * @cliexpstart{show det44 mappings}
+ * @cliexstart{show det44 mappings}
* Show DET44 mappings
* vpp# show det44 mappings
* DET44 mappings:
diff --git a/src/plugins/nat/det44/det44_in2out.c b/src/plugins/nat/det44/det44_in2out.c
index 5fe4a9a0658..3f5e05a064c 100644
--- a/src/plugins/nat/det44/det44_in2out.c
+++ b/src/plugins/nat/det44/det44_in2out.c
@@ -1011,7 +1011,6 @@ VLIB_NODE_FN (det44_in2out_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (det44_in2out_node) = {
.name = "det44-in2out",
.vector_size = sizeof (u32),
@@ -1028,7 +1027,6 @@ VLIB_REGISTER_NODE (det44_in2out_node) = {
[DET44_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/det44/det44_inlines.h b/src/plugins/nat/det44/det44_inlines.h
index aeb55b385d3..e5e70bbaebc 100644
--- a/src/plugins/nat/det44/det44_inlines.h
+++ b/src/plugins/nat/det44/det44_inlines.h
@@ -91,7 +91,6 @@ det44_translate (vlib_node_runtime_t * node, u32 sw_if_index0,
if (sw_if_index == ~0)
{
// TODO: go over use cases
- /* *INDENT-OFF* */
vec_foreach (outside_fib, dm->outside_fibs)
{
fei = fib_table_lookup (outside_fib->fib_index, &pfx);
@@ -102,18 +101,15 @@ det44_translate (vlib_node_runtime_t * node, u32 sw_if_index0,
break;
}
}
- /* *INDENT-ON* */
}
if (sw_if_index != ~0)
{
det44_interface_t *i;
- /* *INDENT-OFF* */
pool_foreach (i, dm->interfaces) {
/* NAT packet aimed at outside interface */
if ((det44_interface_is_outside (i)) && (sw_if_index == i->sw_if_index))
return 0;
}
- /* *INDENT-ON* */
}
}
return 1;
diff --git a/src/plugins/nat/det44/det44_out2in.c b/src/plugins/nat/det44/det44_out2in.c
index 111bc61c476..ab6acd4f8e9 100644
--- a/src/plugins/nat/det44/det44_out2in.c
+++ b/src/plugins/nat/det44/det44_out2in.c
@@ -173,6 +173,9 @@ icmp_match_out2in_det (vlib_node_runtime_t * node,
}
det44_log_info ("unknown dst address: %U",
format_ip4_address, &ip0->dst_address);
+ b0->error = node->errors[DET44_OUT2IN_ERROR_NO_TRANSLATION];
+ next0 = DET44_OUT2IN_NEXT_DROP;
+
goto out;
}
@@ -815,7 +818,6 @@ VLIB_NODE_FN (det44_out2in_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (det44_out2in_node) = {
.name = "det44-out2in",
.vector_size = sizeof (u32),
@@ -832,7 +834,6 @@ VLIB_REGISTER_NODE (det44_out2in_node) = {
[DET44_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/dslite/dslite.c b/src/plugins/nat/dslite/dslite.c
index 4fe4422df13..a2654b5a44b 100644
--- a/src/plugins/nat/dslite/dslite.c
+++ b/src/plugins/nat/dslite/dslite.c
@@ -101,7 +101,6 @@ dslite_init_datastructures (void)
u32 b4_buckets = 128;
u32 b4_memory_size = 64 << 20;
- /* *INDENT-OFF* */
vec_foreach (td, dm->per_thread_data)
{
clib_bihash_init_24_8 (&td->in2out, "dslite in2out", translation_buckets,
@@ -112,7 +111,6 @@ dslite_init_datastructures (void)
clib_bihash_init_16_8 (&td->b4_hash, "dslite b4s", b4_buckets, b4_memory_size);
}
- /* *INDENT-ON* */
dm->is_enabled = 1;
}
@@ -281,13 +279,11 @@ format_dslite_ce_trace (u8 * s, va_list * args)
VLIB_INIT_FUNCTION (dslite_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Dual-Stack Lite",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/dslite/dslite.h b/src/plugins/nat/dslite/dslite.h
index 3c798bf54fe..f05670c9bf5 100644
--- a/src/plugins/nat/dslite/dslite.h
+++ b/src/plugins/nat/dslite/dslite.h
@@ -61,7 +61,6 @@ typedef struct
};
} dslite_session_key_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
nat_session_key_t out2in;
@@ -72,7 +71,6 @@ typedef CLIB_PACKED (struct
u64 total_bytes;
u32 total_pkts;
}) dslite_session_t;
-/* *INDENT-ON* */
typedef struct
{
diff --git a/src/plugins/nat/dslite/dslite_api.c b/src/plugins/nat/dslite/dslite_api.c
index 420e8212ad9..4bb53c37660 100644
--- a/src/plugins/nat/dslite/dslite_api.c
+++ b/src/plugins/nat/dslite/dslite_api.c
@@ -53,13 +53,11 @@ vl_api_dslite_get_aftr_addr_t_handler (vl_api_dslite_get_aftr_addr_t * mp)
dslite_main_t *dm = &dslite_main;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_DSLITE_GET_AFTR_ADDR_REPLY,
({
memcpy (rmp->ip4_addr, &dm->aftr_ip4_addr.as_u8, 4);
memcpy (rmp->ip6_addr, &dm->aftr_ip6_addr.as_u8, 16);
}))
- /* *INDENT-ON* */
}
static void
@@ -88,13 +86,11 @@ vl_api_dslite_get_b4_addr_t_handler (vl_api_dslite_get_b4_addr_t * mp)
dslite_main_t *dm = &dslite_main;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_DSLITE_GET_B4_ADDR_REPLY,
({
memcpy (rmp->ip4_addr, &dm->b4_ip4_addr.as_u8, 4);
memcpy (rmp->ip6_addr, &dm->b4_ip6_addr.as_u8, 16);
}))
- /* *INDENT-ON* */
}
static void
@@ -154,12 +150,10 @@ vl_api_dslite_address_dump_t_handler (vl_api_dslite_address_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
vec_foreach (a, dm->pool.pool_addr)
{
send_dslite_address_details (a, reg, mp->context);
}
- /* *INDENT-ON* */
}
/* API definitions */
diff --git a/src/plugins/nat/dslite/dslite_ce_decap.c b/src/plugins/nat/dslite/dslite_ce_decap.c
index f36a87f8bc1..b5bdafc0e26 100644
--- a/src/plugins/nat/dslite/dslite_ce_decap.c
+++ b/src/plugins/nat/dslite/dslite_ce_decap.c
@@ -114,7 +114,6 @@ VLIB_NODE_FN (dslite_ce_decap_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dslite_ce_decap_node) = {
.name = "dslite-ce-decap",
.vector_size = sizeof (u32),
@@ -130,7 +129,6 @@ VLIB_REGISTER_NODE (dslite_ce_decap_node) = {
[DSLITE_IN2OUT_NEXT_IP6_ICMP] = "ip6-icmp-input",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/dslite/dslite_ce_encap.c b/src/plugins/nat/dslite/dslite_ce_encap.c
index d8d0e400919..19596efd32d 100644
--- a/src/plugins/nat/dslite/dslite_ce_encap.c
+++ b/src/plugins/nat/dslite/dslite_ce_encap.c
@@ -107,7 +107,6 @@ VLIB_NODE_FN (dslite_ce_encap_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dslite_ce_encap_node) = {
.name = "dslite-ce-encap",
.vector_size = sizeof (u32),
@@ -122,7 +121,6 @@ VLIB_REGISTER_NODE (dslite_ce_encap_node) = {
[DSLITE_CE_ENCAP_NEXT_IP6_LOOKUP] = "ip6-lookup",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/dslite/dslite_cli.c b/src/plugins/nat/dslite/dslite_cli.c
index 193cb3fe248..8ed9deb2a2d 100644
--- a/src/plugins/nat/dslite/dslite_cli.c
+++ b/src/plugins/nat/dslite/dslite_cli.c
@@ -95,12 +95,10 @@ dslite_show_pool_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "DS-Lite pool:");
- /* *INDENT-OFF* */
vec_foreach (a, dm->pool.pool_addr)
{
vlib_cli_output (vm, "%U", format_ip4_address, &a->addr);
}
- /* *INDENT-ON* */
return 0;
}
@@ -267,7 +265,6 @@ dslite_show_sessions_command_fn (vlib_main_t * vm,
dslite_per_thread_data_t *td;
dslite_b4_t *b4;
- /* *INDENT-OFF* */
vec_foreach (td, dm->per_thread_data)
{
pool_foreach (b4, td->b4s)
@@ -275,12 +272,10 @@ dslite_show_sessions_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%U", format_dslite_b4, td, b4);
}
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
/*?
* @cliexpar
@@ -394,7 +389,6 @@ VLIB_CLI_COMMAND (dslite_show_sessions, static) = {
.function = dslite_show_sessions_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/dslite/dslite_in2out.c b/src/plugins/nat/dslite/dslite_in2out.c
index 409c59c218c..522c3cf4123 100644
--- a/src/plugins/nat/dslite/dslite_in2out.c
+++ b/src/plugins/nat/dslite/dslite_in2out.c
@@ -460,7 +460,6 @@ VLIB_NODE_FN (dslite_in2out_node) (vlib_main_t * vm,
return dslite_in2out_node_fn_inline (vm, node, frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dslite_in2out_node) = {
.name = "dslite-in2out",
.vector_size = sizeof (u32),
@@ -477,7 +476,6 @@ VLIB_REGISTER_NODE (dslite_in2out_node) = {
[DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (dslite_in2out_slowpath_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -486,7 +484,6 @@ VLIB_NODE_FN (dslite_in2out_slowpath_node) (vlib_main_t * vm,
return dslite_in2out_node_fn_inline (vm, node, frame, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dslite_in2out_slowpath_node) = {
.name = "dslite-in2out-slowpath",
.vector_size = sizeof (u32),
@@ -503,7 +500,6 @@ VLIB_REGISTER_NODE (dslite_in2out_slowpath_node) = {
[DSLITE_IN2OUT_NEXT_SLOWPATH] = "dslite-in2out-slowpath",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/dslite/dslite_out2in.c b/src/plugins/nat/dslite/dslite_out2in.c
index c2fa767bd7d..531bbb468bb 100644
--- a/src/plugins/nat/dslite/dslite_out2in.c
+++ b/src/plugins/nat/dslite/dslite_out2in.c
@@ -266,7 +266,6 @@ VLIB_NODE_FN (dslite_out2in_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (dslite_out2in_node) = {
.name = "dslite-out2in",
.vector_size = sizeof (u32),
@@ -282,7 +281,6 @@ VLIB_REGISTER_NODE (dslite_out2in_node) = {
[DSLITE_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/extras/nat_100ks.py b/src/plugins/nat/extras/nat_100ks.py
index c85a4591cd3..4e8dc2486d6 100644
--- a/src/plugins/nat/extras/nat_100ks.py
+++ b/src/plugins/nat/extras/nat_100ks.py
@@ -1,35 +1,39 @@
from trex_stl_lib.api import *
-class STLS1:
- def create_stream (self):
- base_pkt = Ether()/IP(dst="2.2.0.1")/UDP(dport=12)
+class STLS1:
+ def create_stream(self):
+ base_pkt = Ether() / IP(dst="2.2.0.1") / UDP(dport=12)
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
vm = STLVM()
- vm.tuple_var(name="tuple", ip_min="10.0.0.3", ip_max="10.0.3.234", port_min=1025, port_max=1124, limit_flows = 100000)
+ vm.tuple_var(
+ name="tuple",
+ ip_min="10.0.0.3",
+ ip_max="10.0.3.234",
+ port_min=1025,
+ port_max=1124,
+ limit_flows=100000,
+ )
vm.write(fv_name="tuple.ip", pkt_offset="IP.src")
vm.fix_chksum()
vm.write(fv_name="tuple.port", pkt_offset="UDP.sport")
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
- def get_streams (self, direction = 0, **kwargs):
+ def get_streams(self, direction=0, **kwargs):
return [self.create_stream()]
# dynamic load - used for trex console or simulator
def register():
return STLS1()
-
-
-
diff --git a/src/plugins/nat/extras/nat_10Ms.py b/src/plugins/nat/extras/nat_10Ms.py
index 6ce62a0b5e7..96a18ec018a 100644
--- a/src/plugins/nat/extras/nat_10Ms.py
+++ b/src/plugins/nat/extras/nat_10Ms.py
@@ -1,35 +1,39 @@
from trex_stl_lib.api import *
-class STLS1:
- def create_stream (self):
- base_pkt = Ether()/IP(dst="2.2.0.1")/UDP(dport=12)
+class STLS1:
+ def create_stream(self):
+ base_pkt = Ether() / IP(dst="2.2.0.1") / UDP(dport=12)
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
vm = STLVM()
- vm.tuple_var(name="tuple", ip_min="10.0.0.3", ip_max="10.1.134.162", port_min=1025, port_max=1124, limit_flows=10000000)
+ vm.tuple_var(
+ name="tuple",
+ ip_min="10.0.0.3",
+ ip_max="10.1.134.162",
+ port_min=1025,
+ port_max=1124,
+ limit_flows=10000000,
+ )
vm.write(fv_name="tuple.ip", pkt_offset="IP.src")
vm.fix_chksum()
vm.write(fv_name="tuple.port", pkt_offset="UDP.sport")
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
- def get_streams (self, direction = 0, **kwargs):
+ def get_streams(self, direction=0, **kwargs):
return [self.create_stream()]
# dynamic load - used for trex console or simulator
def register():
return STLS1()
-
-
-
diff --git a/src/plugins/nat/extras/nat_10ks.py b/src/plugins/nat/extras/nat_10ks.py
index 33c7196eb9e..c210d5e81d8 100644
--- a/src/plugins/nat/extras/nat_10ks.py
+++ b/src/plugins/nat/extras/nat_10ks.py
@@ -1,35 +1,39 @@
from trex_stl_lib.api import *
-class STLS1:
- def create_stream (self):
- base_pkt = Ether()/IP(dst="2.2.0.1")/UDP(dport=12)
+class STLS1:
+ def create_stream(self):
+ base_pkt = Ether() / IP(dst="2.2.0.1") / UDP(dport=12)
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
vm = STLVM()
- vm.tuple_var(name="tuple", ip_min="10.0.0.3", ip_max="10.0.0.102", port_min=1025, port_max=1124, limit_flows = 10000)
+ vm.tuple_var(
+ name="tuple",
+ ip_min="10.0.0.3",
+ ip_max="10.0.0.102",
+ port_min=1025,
+ port_max=1124,
+ limit_flows=10000,
+ )
vm.write(fv_name="tuple.ip", pkt_offset="IP.src")
vm.fix_chksum()
vm.write(fv_name="tuple.port", pkt_offset="UDP.sport")
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
- def get_streams (self, direction = 0, **kwargs):
+ def get_streams(self, direction=0, **kwargs):
return [self.create_stream()]
# dynamic load - used for trex console or simulator
def register():
return STLS1()
-
-
-
diff --git a/src/plugins/nat/extras/nat_1Ms.py b/src/plugins/nat/extras/nat_1Ms.py
index 73a91a70985..7271cf73781 100644
--- a/src/plugins/nat/extras/nat_1Ms.py
+++ b/src/plugins/nat/extras/nat_1Ms.py
@@ -1,35 +1,39 @@
from trex_stl_lib.api import *
-class STLS1:
- def create_stream (self):
- base_pkt = Ether()/IP(dst="2.2.0.1")/UDP(dport=12)
+class STLS1:
+ def create_stream(self):
+ base_pkt = Ether() / IP(dst="2.2.0.1") / UDP(dport=12)
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
vm = STLVM()
- vm.tuple_var(name="tuple", ip_min="10.0.0.3", ip_max="10.0.39.18", port_min=1025, port_max=1124, limit_flows = 1000000)
+ vm.tuple_var(
+ name="tuple",
+ ip_min="10.0.0.3",
+ ip_max="10.0.39.18",
+ port_min=1025,
+ port_max=1124,
+ limit_flows=1000000,
+ )
vm.write(fv_name="tuple.ip", pkt_offset="IP.src")
vm.fix_chksum()
vm.write(fv_name="tuple.port", pkt_offset="UDP.sport")
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
- def get_streams (self, direction = 0, **kwargs):
+ def get_streams(self, direction=0, **kwargs):
return [self.create_stream()]
# dynamic load - used for trex console or simulator
def register():
return STLS1()
-
-
-
diff --git a/src/plugins/nat/extras/nat_out2in_100ks.py b/src/plugins/nat/extras/nat_out2in_100ks.py
index 55ab5d42ee1..911f2cefda4 100644
--- a/src/plugins/nat/extras/nat_out2in_100ks.py
+++ b/src/plugins/nat/extras/nat_out2in_100ks.py
@@ -1,35 +1,39 @@
from trex_stl_lib.api import *
-class STLS1:
- def create_stream (self):
- base_pkt = Ether()/IP(src="2.2.0.1")/UDP(sport=12)
+class STLS1:
+ def create_stream(self):
+ base_pkt = Ether() / IP(src="2.2.0.1") / UDP(sport=12)
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
vm = STLVM()
- vm.tuple_var(name="tuple", ip_min="173.16.1.3", ip_max="173.16.4.234", port_min=1025, port_max=1124, limit_flows = 100000)
+ vm.tuple_var(
+ name="tuple",
+ ip_min="173.16.1.3",
+ ip_max="173.16.4.234",
+ port_min=1025,
+ port_max=1124,
+ limit_flows=100000,
+ )
vm.write(fv_name="tuple.ip", pkt_offset="IP.dst")
vm.fix_chksum()
vm.write(fv_name="tuple.port", pkt_offset="UDP.dport")
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
- def get_streams (self, direction = 0, **kwargs):
+ def get_streams(self, direction=0, **kwargs):
return [self.create_stream()]
# dynamic load - used for trex console or simulator
def register():
return STLS1()
-
-
-
diff --git a/src/plugins/nat/extras/nat_out2in_10Ms.py b/src/plugins/nat/extras/nat_out2in_10Ms.py
index 48d3d199080..b3493641ea0 100644
--- a/src/plugins/nat/extras/nat_out2in_10Ms.py
+++ b/src/plugins/nat/extras/nat_out2in_10Ms.py
@@ -1,35 +1,39 @@
from trex_stl_lib.api import *
-class STLS1:
- def create_stream (self):
- base_pkt = Ether()/IP(src="2.2.0.1")/UDP(sport=12)
+class STLS1:
+ def create_stream(self):
+ base_pkt = Ether() / IP(src="2.2.0.1") / UDP(sport=12)
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
vm = STLVM()
- vm.tuple_var(name="tuple", ip_min="173.16.1.3", ip_max="173.17.135.162", port_min=1025, port_max=1124, limit_flows = 10000000)
+ vm.tuple_var(
+ name="tuple",
+ ip_min="173.16.1.3",
+ ip_max="173.17.135.162",
+ port_min=1025,
+ port_max=1124,
+ limit_flows=10000000,
+ )
vm.write(fv_name="tuple.ip", pkt_offset="IP.dst")
vm.fix_chksum()
vm.write(fv_name="tuple.port", pkt_offset="UDP.dport")
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
- def get_streams (self, direction = 0, **kwargs):
+ def get_streams(self, direction=0, **kwargs):
return [self.create_stream()]
# dynamic load - used for trex console or simulator
def register():
return STLS1()
-
-
-
diff --git a/src/plugins/nat/extras/nat_out2in_10ks.py b/src/plugins/nat/extras/nat_out2in_10ks.py
index e961504fcf9..abd82ce320d 100644
--- a/src/plugins/nat/extras/nat_out2in_10ks.py
+++ b/src/plugins/nat/extras/nat_out2in_10ks.py
@@ -1,35 +1,39 @@
from trex_stl_lib.api import *
-class STLS1:
- def create_stream (self):
- base_pkt = Ether()/IP(src="2.2.0.1")/UDP(sport=12)
+class STLS1:
+ def create_stream(self):
+ base_pkt = Ether() / IP(src="2.2.0.1") / UDP(sport=12)
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
vm = STLVM()
- vm.tuple_var(name="tuple", ip_min="173.16.1.3", ip_max="173.16.1.102", port_min=1025, port_max=1124, limit_flows = 100000)
+ vm.tuple_var(
+ name="tuple",
+ ip_min="173.16.1.3",
+ ip_max="173.16.1.102",
+ port_min=1025,
+ port_max=1124,
+ limit_flows=100000,
+ )
vm.write(fv_name="tuple.ip", pkt_offset="IP.dst")
vm.fix_chksum()
vm.write(fv_name="tuple.port", pkt_offset="UDP.dport")
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
- def get_streams (self, direction = 0, **kwargs):
+ def get_streams(self, direction=0, **kwargs):
return [self.create_stream()]
# dynamic load - used for trex console or simulator
def register():
return STLS1()
-
-
-
diff --git a/src/plugins/nat/extras/nat_out2in_1Ms.py b/src/plugins/nat/extras/nat_out2in_1Ms.py
index d2cb0810263..c08ef191e6c 100644
--- a/src/plugins/nat/extras/nat_out2in_1Ms.py
+++ b/src/plugins/nat/extras/nat_out2in_1Ms.py
@@ -1,35 +1,39 @@
from trex_stl_lib.api import *
-class STLS1:
- def create_stream (self):
- base_pkt = Ether()/IP(src="2.2.0.1")/UDP(sport=12)
+class STLS1:
+ def create_stream(self):
+ base_pkt = Ether() / IP(src="2.2.0.1") / UDP(sport=12)
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
vm = STLVM()
- vm.tuple_var(name="tuple", ip_min="173.16.1.3", ip_max="173.16.40.18", port_min=1025, port_max=1124, limit_flows = 1000000)
+ vm.tuple_var(
+ name="tuple",
+ ip_min="173.16.1.3",
+ ip_max="173.16.40.18",
+ port_min=1025,
+ port_max=1124,
+ limit_flows=1000000,
+ )
vm.write(fv_name="tuple.ip", pkt_offset="IP.dst")
vm.fix_chksum()
vm.write(fv_name="tuple.port", pkt_offset="UDP.dport")
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
- def get_streams (self, direction = 0, **kwargs):
+ def get_streams(self, direction=0, **kwargs):
return [self.create_stream()]
# dynamic load - used for trex console or simulator
def register():
return STLS1()
-
-
-
diff --git a/src/plugins/nat/extras/nat_ses_open.py b/src/plugins/nat/extras/nat_ses_open.py
index d614d4e7356..a267a6b67fb 100644
--- a/src/plugins/nat/extras/nat_ses_open.py
+++ b/src/plugins/nat/extras/nat_ses_open.py
@@ -1,44 +1,72 @@
from trex_stl_lib.api import *
-class STLS1:
- def __init__ (self):
- self.ip_range = {'local': {'start': "10.0.0.3", 'end': "10.1.255.255"},
- 'external': {'start': "172.16.1.3", 'end': "172.16.1.3"},
- 'remote': {'start': "2.2.0.1", 'end': "2.2.0.1"}}
- self.port_range = {'local': {'start': 1025, 'end': 65535},
- 'remote': {'start': 12, 'end': 12}}
+class STLS1:
+ def __init__(self):
+ self.ip_range = {
+ "local": {"start": "10.0.0.3", "end": "10.1.255.255"},
+ "external": {"start": "172.16.1.3", "end": "172.16.1.3"},
+ "remote": {"start": "2.2.0.1", "end": "2.2.0.1"},
+ }
+ self.port_range = {
+ "local": {"start": 1025, "end": 65535},
+ "remote": {"start": 12, "end": 12},
+ }
- def create_stream (self, vm):
- base_pkt = Ether()/IP()/UDP()
+ def create_stream(self, vm):
+ base_pkt = Ether() / IP() / UDP()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
pad = Padding()
- pad.load = '\x00' * pad_len
- base_pkt = base_pkt/pad
-
+ pad.load = "\x00" * pad_len
+ base_pkt = base_pkt / pad
+
pkt = STLPktBuilder(pkt=base_pkt, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
- def get_streams (self, direction = 0, **kwargs):
+ def get_streams(self, direction=0, **kwargs):
if direction == 0:
- ip_src = self.ip_range['remote']
- ip_dst = self.ip_range['external']
- src_port = self.port_range['remote']
- dst_port = self.port_range['local']
+ ip_src = self.ip_range["remote"]
+ ip_dst = self.ip_range["external"]
+ src_port = self.port_range["remote"]
+ dst_port = self.port_range["local"]
else:
- ip_src = self.ip_range['local']
- ip_dst = self.ip_range['remote']
- src_port = self.port_range['local']
- dst_port = self.port_range['remote']
+ ip_src = self.ip_range["local"]
+ ip_dst = self.ip_range["remote"]
+ src_port = self.port_range["local"]
+ dst_port = self.port_range["remote"]
vm = STLVM()
- vm.var(name="ip_src", min_value=ip_src['start'], max_value=ip_src['end'], size=4, op="random")
- vm.var(name="ip_dst", min_value=ip_dst['start'], max_value=ip_dst['end'], size=4, op="random")
- vm.var(name="src_port", min_value=src_port['start'], max_value=src_port['end'], size=2, op="random")
- vm.var(name="dst_port", min_value=dst_port['start'], max_value=dst_port['end'], size=2, op="random")
+ vm.var(
+ name="ip_src",
+ min_value=ip_src["start"],
+ max_value=ip_src["end"],
+ size=4,
+ op="random",
+ )
+ vm.var(
+ name="ip_dst",
+ min_value=ip_dst["start"],
+ max_value=ip_dst["end"],
+ size=4,
+ op="random",
+ )
+ vm.var(
+ name="src_port",
+ min_value=src_port["start"],
+ max_value=src_port["end"],
+ size=2,
+ op="random",
+ )
+ vm.var(
+ name="dst_port",
+ min_value=dst_port["start"],
+ max_value=dst_port["end"],
+ size=2,
+ op="random",
+ )
vm.write(fv_name="ip_src", pkt_offset="IP.src")
vm.write(fv_name="ip_dst", pkt_offset="IP.dst")
@@ -47,12 +75,9 @@ class STLS1:
vm.fix_chksum()
- return [ self.create_stream(vm) ]
+ return [self.create_stream(vm)]
# dynamic load - used for trex console or simulator
def register():
return STLS1()
-
-
-
diff --git a/src/plugins/nat/extras/nat_static_gen_cfg.py b/src/plugins/nat/extras/nat_static_gen_cfg.py
index 9e59bbfc0c2..009cf099582 100755
--- a/src/plugins/nat/extras/nat_static_gen_cfg.py
+++ b/src/plugins/nat/extras/nat_static_gen_cfg.py
@@ -2,24 +2,24 @@
import ipaddress
import argparse
-parser = argparse.ArgumentParser(description='Generate NAT plugin config.')
-parser.add_argument('static_map_num', metavar='N', type=int, nargs=1,
- help='number of static mappings')
+parser = argparse.ArgumentParser(description="Generate NAT plugin config.")
+parser.add_argument(
+ "static_map_num", metavar="N", type=int, nargs=1, help="number of static mappings"
+)
args = parser.parse_args()
-file_name = 'nat_static_%s' % (args.static_map_num[0])
-outfile = open(file_name, 'w')
+file_name = "nat_static_%s" % (args.static_map_num[0])
+outfile = open(file_name, "w")
-outfile.write('set int ip address TenGigabitEthernet4/0/0 172.16.2.1/24\n')
-outfile.write('set int ip address TenGigabitEthernet4/0/1 173.16.1.1/24\n')
-outfile.write('set int state TenGigabitEthernet4/0/0 up\n')
-outfile.write('set int state TenGigabitEthernet4/0/1 up\n')
-outfile.write('ip route add 2.2.0.0/16 via 173.16.1.2 TenGigabitEthernet4/0/1\n')
-outfile.write('ip route add 10.0.0.0/24 via 172.16.2.2 TenGigabitEthernet4/0/0\n')
-outfile.write('set int nat44 in TenGigabitEthernet4/0/0 out TenGigabitEthernet4/0/1\n')
-
-for i in range (0, args.static_map_num[0]):
- local = str(ipaddress.IPv4Address(u'10.0.0.3') + i)
- external = str(ipaddress.IPv4Address(u'173.16.1.3') + i)
- outfile.write('nat44 add static mapping local %s external %s\n' % (local, external))
+outfile.write("set int ip address TenGigabitEthernet4/0/0 172.16.2.1/24\n")
+outfile.write("set int ip address TenGigabitEthernet4/0/1 173.16.1.1/24\n")
+outfile.write("set int state TenGigabitEthernet4/0/0 up\n")
+outfile.write("set int state TenGigabitEthernet4/0/1 up\n")
+outfile.write("ip route add 2.2.0.0/16 via 173.16.1.2 TenGigabitEthernet4/0/1\n")
+outfile.write("ip route add 10.0.0.0/24 via 172.16.2.2 TenGigabitEthernet4/0/0\n")
+outfile.write("set int nat44 in TenGigabitEthernet4/0/0 out TenGigabitEthernet4/0/1\n")
+for i in range(0, args.static_map_num[0]):
+ local = str(ipaddress.IPv4Address("10.0.0.3") + i)
+ external = str(ipaddress.IPv4Address("173.16.1.3") + i)
+ outfile.write("nat44 add static mapping local %s external %s\n" % (local, external))
diff --git a/src/plugins/nat/extras/nat_test_fast_path.py b/src/plugins/nat/extras/nat_test_fast_path.py
index e869d40872a..fb880fb9e96 100644
--- a/src/plugins/nat/extras/nat_test_fast_path.py
+++ b/src/plugins/nat/extras/nat_test_fast_path.py
@@ -2,7 +2,6 @@ from trex_stl_lib.api import *
class STLS1:
-
def create_stream(self):
# base_pkt = Ether()/IP(dst="2.2.0.1")/UDP(dport=12)
@@ -24,37 +23,46 @@ class STLS1:
# return STLStream(packet=pkt, mode=STLTXCont())
- vm = STLScVmRaw([STLVmTupleGen(ip_min="10.0.0.1", ip_max="10.255.255.254",
- port_min=1025, port_max=65535,
- # name="stuple", limit_flows=10000),
- name="stuple", limit_flows=100),
- STLVmTupleGen(ip_min="2.0.0.1", ip_max="2.255.255.254",
- port_min=1025, port_max=65535,
- # name="dtuple", limit_flows=100000000),
- name="dtuple", limit_flows=100),
-
- # write ip to packet IP.src
- STLVmWrFlowVar(fv_name="stuple.ip",
- pkt_offset="IP.src"),
- STLVmWrFlowVar(fv_name="dtuple.ip",
- pkt_offset="IP.dst"),
- # fix checksum
- STLVmFixIpv4(offset="IP"),
- # write udp.port
- STLVmWrFlowVar(fv_name="stuple.port",
- pkt_offset="UDP.sport"),
- STLVmWrFlowVar(fv_name="dtuple.port",
- pkt_offset="UDP.dport"),
- ]
- )
-
- base_pkt = Ether()/IP(src="16.0.0.1", dst="2.0.0.1")/UDP(dport=12, sport=1025)
+ vm = STLScVmRaw(
+ [
+ STLVmTupleGen(
+ ip_min="10.0.0.1",
+ ip_max="10.255.255.254",
+ port_min=1025,
+ port_max=65535,
+ # name="stuple", limit_flows=10000),
+ name="stuple",
+ limit_flows=100,
+ ),
+ STLVmTupleGen(
+ ip_min="2.0.0.1",
+ ip_max="2.255.255.254",
+ port_min=1025,
+ port_max=65535,
+ # name="dtuple", limit_flows=100000000),
+ name="dtuple",
+ limit_flows=100,
+ ),
+ # write ip to packet IP.src
+ STLVmWrFlowVar(fv_name="stuple.ip", pkt_offset="IP.src"),
+ STLVmWrFlowVar(fv_name="dtuple.ip", pkt_offset="IP.dst"),
+ # fix checksum
+ STLVmFixIpv4(offset="IP"),
+ # write udp.port
+ STLVmWrFlowVar(fv_name="stuple.port", pkt_offset="UDP.sport"),
+ STLVmWrFlowVar(fv_name="dtuple.port", pkt_offset="UDP.dport"),
+ ]
+ )
+
+ base_pkt = (
+ Ether() / IP(src="16.0.0.1", dst="2.0.0.1") / UDP(dport=12, sport=1025)
+ )
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
diff --git a/src/plugins/nat/extras/nat_test_slow_path.py b/src/plugins/nat/extras/nat_test_slow_path.py
index a6351b98adf..3145a2c6a59 100644
--- a/src/plugins/nat/extras/nat_test_slow_path.py
+++ b/src/plugins/nat/extras/nat_test_slow_path.py
@@ -2,7 +2,6 @@ from trex_stl_lib.api import *
class STLS1:
-
def create_stream(self):
# base_pkt = Ether()/IP(dst="2.2.0.1")/UDP(dport=12)
@@ -24,35 +23,44 @@ class STLS1:
# return STLStream(packet=pkt, mode=STLTXCont())
- vm = STLScVmRaw([STLVmTupleGen(ip_min="10.0.0.1", ip_max="10.255.255.254",
- port_min=1025, port_max=65535,
- name="stuple", limit_flows=10000),
- STLVmTupleGen(ip_min="2.0.0.1", ip_max="2.255.255.254",
- port_min=1025, port_max=65535,
- name="dtuple", limit_flows=100000000),
-
- # write ip to packet IP.src
- STLVmWrFlowVar(fv_name="stuple.ip",
- pkt_offset="IP.src"),
- STLVmWrFlowVar(fv_name="dtuple.ip",
- pkt_offset="IP.dst"),
- # fix checksum
- STLVmFixIpv4(offset="IP"),
- # write udp.port
- STLVmWrFlowVar(fv_name="stuple.port",
- pkt_offset="UDP.sport"),
- STLVmWrFlowVar(fv_name="dtuple.port",
- pkt_offset="UDP.dport"),
- ]
- )
-
- base_pkt = Ether()/IP(src="16.0.0.1", dst="2.0.0.1")/UDP(dport=12, sport=1025)
+ vm = STLScVmRaw(
+ [
+ STLVmTupleGen(
+ ip_min="10.0.0.1",
+ ip_max="10.255.255.254",
+ port_min=1025,
+ port_max=65535,
+ name="stuple",
+ limit_flows=10000,
+ ),
+ STLVmTupleGen(
+ ip_min="2.0.0.1",
+ ip_max="2.255.255.254",
+ port_min=1025,
+ port_max=65535,
+ name="dtuple",
+ limit_flows=100000000,
+ ),
+ # write ip to packet IP.src
+ STLVmWrFlowVar(fv_name="stuple.ip", pkt_offset="IP.src"),
+ STLVmWrFlowVar(fv_name="dtuple.ip", pkt_offset="IP.dst"),
+ # fix checksum
+ STLVmFixIpv4(offset="IP"),
+ # write udp.port
+ STLVmWrFlowVar(fv_name="stuple.port", pkt_offset="UDP.sport"),
+ STLVmWrFlowVar(fv_name="dtuple.port", pkt_offset="UDP.dport"),
+ ]
+ )
+
+ base_pkt = (
+ Ether() / IP(src="16.0.0.1", dst="2.0.0.1") / UDP(dport=12, sport=1025)
+ )
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
return STLStream(packet=pkt, mode=STLTXCont())
diff --git a/src/plugins/nat/extras/nat_test_slow_path_with_latency.py b/src/plugins/nat/extras/nat_test_slow_path_with_latency.py
index 6c7663434c5..0c08e7a5e80 100644
--- a/src/plugins/nat/extras/nat_test_slow_path_with_latency.py
+++ b/src/plugins/nat/extras/nat_test_slow_path_with_latency.py
@@ -2,7 +2,6 @@ from trex_stl_lib.api import *
class STLS1:
-
def create_stream(self, port_id):
# base_pkt = Ether()/IP(dst="2.2.0.1")/UDP(dport=12)
@@ -24,49 +23,61 @@ class STLS1:
# return STLStream(packet=pkt, mode=STLTXCont())
- vm = STLScVmRaw([STLVmTupleGen(ip_min="10.0.0.1", ip_max="10.255.255.254",
- port_min=1025, port_max=65535,
- name="stuple", limit_flows=10000),
- STLVmTupleGen(ip_min="2.0.0.1", ip_max="2.255.255.254",
- port_min=1025, port_max=65535,
- name="dtuple", limit_flows=100000000),
-
- # write ip to packet IP.src
- STLVmWrFlowVar(fv_name="stuple.ip",
- pkt_offset="IP.src"),
- STLVmWrFlowVar(fv_name="dtuple.ip",
- pkt_offset="IP.dst"),
- # fix checksum
- STLVmFixIpv4(offset="IP"),
- # write udp.port
- STLVmWrFlowVar(fv_name="stuple.port",
- pkt_offset="UDP.sport"),
- STLVmWrFlowVar(fv_name="dtuple.port",
- pkt_offset="UDP.dport"),
- ]
- )
-
- base_pkt = Ether()/IP(src="16.0.0.1", dst="2.0.0.1")/UDP(dport=12, sport=1025)
+ vm = STLScVmRaw(
+ [
+ STLVmTupleGen(
+ ip_min="10.0.0.1",
+ ip_max="10.255.255.254",
+ port_min=1025,
+ port_max=65535,
+ name="stuple",
+ limit_flows=10000,
+ ),
+ STLVmTupleGen(
+ ip_min="2.0.0.1",
+ ip_max="2.255.255.254",
+ port_min=1025,
+ port_max=65535,
+ name="dtuple",
+ limit_flows=100000000,
+ ),
+ # write ip to packet IP.src
+ STLVmWrFlowVar(fv_name="stuple.ip", pkt_offset="IP.src"),
+ STLVmWrFlowVar(fv_name="dtuple.ip", pkt_offset="IP.dst"),
+ # fix checksum
+ STLVmFixIpv4(offset="IP"),
+ # write udp.port
+ STLVmWrFlowVar(fv_name="stuple.port", pkt_offset="UDP.sport"),
+ STLVmWrFlowVar(fv_name="dtuple.port", pkt_offset="UDP.dport"),
+ ]
+ )
+
+ base_pkt = (
+ Ether() / IP(src="16.0.0.1", dst="2.0.0.1") / UDP(dport=12, sport=1025)
+ )
pad = Padding()
if len(base_pkt) < 64:
pad_len = 64 - len(base_pkt)
- pad.load = '\x00' * pad_len
+ pad.load = "\x00" * pad_len
- pad = max(0, 64 - len(base_pkt)) * 'x'
- pad_latency = max(0, (64-4) - len(base_pkt)) * 'x'
+ pad = max(0, 64 - len(base_pkt)) * "x"
+ pad_latency = max(0, (64 - 4) - len(base_pkt)) * "x"
- pkt = STLPktBuilder(pkt=base_pkt/pad, vm=vm)
+ pkt = STLPktBuilder(pkt=base_pkt / pad, vm=vm)
- return [STLStream(packet=pkt, mode=STLTXCont()),
- # latency stream
- STLStream(packet = STLPktBuilder(pkt = base_pkt/pad_latency),
- mode = STLTXCont(pps=1000),
- flow_stats = STLFlowLatencyStats(pg_id = 12+port_id))
- ]
+ return [
+ STLStream(packet=pkt, mode=STLTXCont()),
+ # latency stream
+ STLStream(
+ packet=STLPktBuilder(pkt=base_pkt / pad_latency),
+ mode=STLTXCont(pps=1000),
+ flow_stats=STLFlowLatencyStats(pg_id=12 + port_id),
+ ),
+ ]
def get_streams(self, direction=0, **kwargs):
# return [self.create_stream()]
- return self.create_stream(kwargs['port_id'])
+ return self.create_stream(kwargs["port_id"])
# dynamic load - used for trex console or simulator
diff --git a/src/plugins/nat/lib/alloc.h b/src/plugins/nat/lib/alloc.h
index a9a2c15fedc..882809e829c 100644
--- a/src/plugins/nat/lib/alloc.h
+++ b/src/plugins/nat/lib/alloc.h
@@ -21,6 +21,7 @@
#define included_nat_lib_alloc_h__
#include <vnet/ip/ip.h>
+#include <nat/lib/nat_proto.h>
typedef struct nat_ip4_pool_addr_s nat_ip4_pool_addr_t;
typedef struct nat_ip4_addr_port_s nat_ip4_addr_port_t;
@@ -41,14 +42,12 @@ struct nat_ip4_pool_addr_s
{
ip4_address_t addr;
u32 fib_index;
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
u16 busy_##n##_ports; \
u16 * busy_##n##_ports_per_thread; \
uword * busy_##n##_port_bitmap;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
};
struct nat_ip4_addr_port_s
diff --git a/src/plugins/nat/lib/inlines.h b/src/plugins/nat/lib/inlines.h
index fe1f7dd27bc..24e3ba83a5b 100644
--- a/src/plugins/nat/lib/inlines.h
+++ b/src/plugins/nat/lib/inlines.h
@@ -20,51 +20,18 @@
#include <vnet/ip/icmp46_packet.h>
-always_inline nat_protocol_t
-ip_proto_to_nat_proto (u8 ip_proto)
-{
- static const nat_protocol_t lookup_table[256] = {
- [IP_PROTOCOL_TCP] = NAT_PROTOCOL_TCP,
- [IP_PROTOCOL_UDP] = NAT_PROTOCOL_UDP,
- [IP_PROTOCOL_ICMP] = NAT_PROTOCOL_ICMP,
- [IP_PROTOCOL_ICMP6] = NAT_PROTOCOL_ICMP,
- };
-
- return lookup_table[ip_proto];
-}
-
-static_always_inline u8
-nat_proto_to_ip_proto (nat_protocol_t nat_proto)
-{
- ASSERT (nat_proto <= NAT_PROTOCOL_ICMP);
-
- static const u8 lookup_table[256] = {
- [NAT_PROTOCOL_OTHER] = ~0,
- [NAT_PROTOCOL_TCP] = IP_PROTOCOL_TCP,
- [NAT_PROTOCOL_UDP] = IP_PROTOCOL_UDP,
- [NAT_PROTOCOL_ICMP] = IP_PROTOCOL_ICMP,
- };
-
- ASSERT (NAT_PROTOCOL_OTHER == nat_proto || NAT_PROTOCOL_TCP == nat_proto
- || NAT_PROTOCOL_UDP == nat_proto || NAT_PROTOCOL_ICMP == nat_proto);
-
- return lookup_table[nat_proto];
-}
-
-static_always_inline u8
+static_always_inline u64
icmp_type_is_error_message (u8 icmp_type)
{
- switch (icmp_type)
- {
- case ICMP4_destination_unreachable:
- case ICMP4_time_exceeded:
- case ICMP4_parameter_problem:
- case ICMP4_source_quench:
- case ICMP4_redirect:
- case ICMP4_alternate_host_address:
- return 1;
- }
- return 0;
+ int bmp = 0;
+ bmp |= 1 << ICMP4_destination_unreachable;
+ bmp |= 1 << ICMP4_time_exceeded;
+ bmp |= 1 << ICMP4_parameter_problem;
+ bmp |= 1 << ICMP4_source_quench;
+ bmp |= 1 << ICMP4_redirect;
+ bmp |= 1 << ICMP4_alternate_host_address;
+
+ return (1ULL << icmp_type) & bmp;
}
#endif /* included_nat_inlines_h__ */
diff --git a/src/plugins/nat/lib/ipfix_logging.c b/src/plugins/nat/lib/ipfix_logging.c
index 6e5e4b6c750..593fa09f7e2 100644
--- a/src/plugins/nat/lib/ipfix_logging.c
+++ b/src/plugins/nat/lib/ipfix_logging.c
@@ -51,7 +51,7 @@ typedef struct
u8 nat_event;
u32 src_ip;
u32 nat_src_ip;
- nat_protocol_t nat_proto;
+ ip_protocol_t proto;
u16 src_port;
u16 nat_src_port;
u32 vrf_id;
@@ -143,12 +143,9 @@ do { \
* @returns template packet
*/
static inline u8 *
-nat_template_rewrite (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
- u16 collector_port,
- nat_event_t event, quota_exceed_event_t quota_event)
+nat_template_rewrite (ipfix_exporter_t *exp, flow_report_t *fr,
+ u16 collector_port, nat_event_t event,
+ quota_exceed_event_t quota_event)
{
nat_ipfix_logging_main_t *silm = &nat_ipfix_logging_main;
ip4_header_t *ip;
@@ -164,7 +161,7 @@ nat_template_rewrite (flow_report_main_t * frm,
flow_report_stream_t *stream;
u32 stream_index;
- stream = &frm->streams[fr->stream_index];
+ stream = &exp->streams[fr->stream_index];
stream_index = clib_atomic_fetch_or(&silm->stream_index, 0);
clib_atomic_cmp_and_swap (&silm->stream_index,
@@ -241,8 +238,8 @@ nat_template_rewrite (flow_report_main_t * frm,
ip->ip_version_and_header_length = 0x45;
ip->ttl = 254;
ip->protocol = IP_PROTOCOL_UDP;
- ip->src_address.as_u32 = src_address->as_u32;
- ip->dst_address.as_u32 = collector_address->as_u32;
+ ip->src_address.as_u32 = exp->src_address.ip.ip4.as_u32;
+ ip->dst_address.as_u32 = exp->ipfix_collector.ip.ip4.as_u32;
udp->src_port = clib_host_to_net_u16 (stream->src_port);
udp->dst_port = clib_host_to_net_u16 (collector_port);
udp->length = clib_host_to_net_u16 (vec_len (rewrite) - sizeof (*ip));
@@ -390,97 +387,72 @@ nat_template_rewrite (flow_report_main_t * frm,
}
u8 *
-nat_template_rewrite_addr_exhausted (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
- u16 collector_port,
- ipfix_report_element_t *elts,
- u32 n_elts, u32 *stream_index)
+nat_template_rewrite_addr_exhausted (ipfix_exporter_t *exp, flow_report_t *fr,
+ u16 collector_port,
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return nat_template_rewrite (frm, fr, collector_address, src_address,
- collector_port, NAT_ADDRESSES_EXHAUTED, 0);
+ return nat_template_rewrite (exp, fr, collector_port, NAT_ADDRESSES_EXHAUTED,
+ 0);
}
u8 *
-nat_template_rewrite_nat44_session (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
- u16 collector_port,
- ipfix_report_element_t *elts,
- u32 n_elts, u32 *stream_index)
+nat_template_rewrite_nat44_session (ipfix_exporter_t *exp, flow_report_t *fr,
+ u16 collector_port,
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return nat_template_rewrite (frm, fr, collector_address, src_address,
- collector_port, NAT44_SESSION_CREATE, 0);
+ return nat_template_rewrite (exp, fr, collector_port, NAT44_SESSION_CREATE,
+ 0);
}
u8 *
-nat_template_rewrite_max_entries_per_usr (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
- u16 collector_port,
- ipfix_report_element_t *elts,
- u32 n_elts, u32 *stream_index)
+nat_template_rewrite_max_entries_per_usr (
+ ipfix_exporter_t *exp, flow_report_t *fr, ip4_address_t *collector_address,
+ ip4_address_t *src_address, u16 collector_port, ipfix_report_element_t *elts,
+ u32 n_elts, u32 *stream_index)
{
- return nat_template_rewrite (frm, fr, collector_address, src_address,
- collector_port, QUOTA_EXCEEDED,
- MAX_ENTRIES_PER_USER);
+ return nat_template_rewrite (exp, fr, collector_port, QUOTA_EXCEEDED,
+ MAX_ENTRIES_PER_USER);
}
u8 *
-nat_template_rewrite_max_sessions (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+nat_template_rewrite_max_sessions (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t *elts,
- u32 n_elts, u32 *stream_index)
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return nat_template_rewrite (frm, fr, collector_address, src_address,
- collector_port, QUOTA_EXCEEDED,
- MAX_SESSION_ENTRIES);
+ return nat_template_rewrite (exp, fr, collector_port, QUOTA_EXCEEDED,
+ MAX_SESSION_ENTRIES);
}
u8 *
-nat_template_rewrite_max_bibs (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+nat_template_rewrite_max_bibs (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t *elts,
- u32 n_elts, u32 *stream_index)
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return nat_template_rewrite (frm, fr, collector_address, src_address,
- collector_port, QUOTA_EXCEEDED,
- MAX_BIB_ENTRIES);
+ return nat_template_rewrite (exp, fr, collector_port, QUOTA_EXCEEDED,
+ MAX_BIB_ENTRIES);
}
u8 *
-nat_template_rewrite_nat64_bib (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
- u16 collector_port,
- ipfix_report_element_t *elts,
- u32 n_elts, u32 *stream_index)
+nat_template_rewrite_nat64_bib (ipfix_exporter_t *exp, flow_report_t *fr,
+ u16 collector_port,
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return nat_template_rewrite (frm, fr, collector_address, src_address,
- collector_port, NAT64_BIB_CREATE, 0);
+ return nat_template_rewrite (exp, fr, collector_port, NAT64_BIB_CREATE, 0);
}
u8 *
-nat_template_rewrite_nat64_session (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
- u16 collector_port,
- ipfix_report_element_t *elts,
- u32 n_elts, u32 *stream_index)
+nat_template_rewrite_nat64_session (ipfix_exporter_t *exp, flow_report_t *fr,
+ u16 collector_port,
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
- return nat_template_rewrite (frm, fr, collector_address, src_address,
- collector_port, NAT64_SESSION_CREATE, 0);
+ return nat_template_rewrite (exp, fr, collector_port, NAT64_SESSION_CREATE,
+ 0);
}
static inline void
@@ -497,16 +469,17 @@ nat_ipfix_header_create (flow_report_main_t * frm,
ip4_header_t *ip;
udp_header_t *udp;
vlib_main_t *vm = vlib_get_main ();
-
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
+
stream_index = clib_atomic_fetch_or(&silm->stream_index, 0);
- stream = &frm->streams[stream_index];
+ stream = &exp->streams[stream_index];
b0->current_data = 0;
b0->current_length = sizeof (*ip) + sizeof (*udp) + sizeof (*h) +
sizeof (*s);
b0->flags |= (VLIB_BUFFER_TOTAL_LENGTH_VALID | VNET_BUFFER_F_FLOW_REPORT);
vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = frm->fib_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = exp->fib_index;
tp = vlib_buffer_get_current (b0);
ip = (ip4_header_t *) & tp->ip4;
udp = (udp_header_t *) (ip + 1);
@@ -517,10 +490,10 @@ nat_ipfix_header_create (flow_report_main_t * frm,
ip->ttl = 254;
ip->protocol = IP_PROTOCOL_UDP;
ip->flags_and_fragment_offset = 0;
- ip->src_address.as_u32 = frm->src_address.as_u32;
- ip->dst_address.as_u32 = frm->ipfix_collector.as_u32;
+ ip->src_address.as_u32 = exp->src_address.ip.ip4.as_u32;
+ ip->dst_address.as_u32 = exp->ipfix_collector.ip.ip4.as_u32;
udp->src_port = clib_host_to_net_u16 (stream->src_port);
- udp->dst_port = clib_host_to_net_u16 (frm->collector_port);
+ udp->dst_port = clib_host_to_net_u16 (exp->collector_port);
udp->checksum = 0;
h->export_time = clib_host_to_net_u32 ((u32)
@@ -545,6 +518,7 @@ nat_ipfix_send (flow_report_main_t *frm, vlib_frame_t *f, vlib_buffer_t *b0,
ip4_header_t *ip;
udp_header_t *udp;
vlib_main_t *vm = vlib_get_main ();
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
tp = vlib_buffer_get_current (b0);
ip = (ip4_header_t *) & tp->ip4;
@@ -563,7 +537,7 @@ nat_ipfix_send (flow_report_main_t *frm, vlib_frame_t *f, vlib_buffer_t *b0,
ip->checksum = ip4_header_checksum (ip);
udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
- if (frm->udp_checksum)
+ if (exp->udp_checksum)
{
udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
if (udp->checksum == 0)
@@ -577,9 +551,8 @@ nat_ipfix_send (flow_report_main_t *frm, vlib_frame_t *f, vlib_buffer_t *b0,
static void
nat_ipfix_logging_nat44_ses (u32 thread_index, u8 nat_event, u32 src_ip,
- u32 nat_src_ip, nat_protocol_t nat_proto,
- u16 src_port, u16 nat_src_port, u32 fib_index,
- int do_flush)
+ u32 nat_src_ip, ip_protocol_t proto, u16 src_port,
+ u16 nat_src_port, u32 fib_index, int do_flush)
{
nat_ipfix_logging_main_t *silm = &nat_ipfix_logging_main;
nat_ipfix_per_thread_data_t *sitd = &silm->per_thread_data[thread_index];
@@ -590,11 +563,9 @@ nat_ipfix_logging_nat44_ses (u32 thread_index, u8 nat_event, u32 src_ip,
u32 offset;
vlib_main_t *vm = vlib_get_main ();
u64 now;
- u8 proto;
u16 template_id;
u32 vrf_id;
-
- proto = nat_proto_to_ip_proto (nat_proto);
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3);
now += silm->milisecond_time_0;
@@ -667,8 +638,8 @@ nat_ipfix_logging_nat44_ses (u32 thread_index, u8 nat_event, u32 src_ip,
b0->current_length += NAT44_SESSION_CREATE_LEN;
}
- if (PREDICT_FALSE
- (do_flush || (offset + NAT44_SESSION_CREATE_LEN) > frm->path_mtu))
+ if (PREDICT_FALSE (do_flush ||
+ (offset + NAT44_SESSION_CREATE_LEN) > exp->path_mtu))
{
template_id = clib_atomic_fetch_or (
&silm->nat44_session_template_id,
@@ -695,6 +666,7 @@ nat_ipfix_logging_addr_exhausted (u32 thread_index, u32 pool_id, int do_flush)
u64 now;
u8 nat_event = NAT_ADDRESSES_EXHAUTED;
u16 template_id;
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3);
now += silm->milisecond_time_0;
@@ -750,8 +722,8 @@ nat_ipfix_logging_addr_exhausted (u32 thread_index, u32 pool_id, int do_flush)
b0->current_length += NAT_ADDRESSES_EXHAUTED_LEN;
}
- if (PREDICT_FALSE
- (do_flush || (offset + NAT_ADDRESSES_EXHAUTED_LEN) > frm->path_mtu))
+ if (PREDICT_FALSE (do_flush ||
+ (offset + NAT_ADDRESSES_EXHAUTED_LEN) > exp->path_mtu))
{
template_id = clib_atomic_fetch_or (
&silm->addr_exhausted_template_id,
@@ -780,6 +752,7 @@ nat_ipfix_logging_max_entries_per_usr (u32 thread_index,
u8 nat_event = QUOTA_EXCEEDED;
u32 quota_event = clib_host_to_net_u32 (MAX_ENTRIES_PER_USER);
u16 template_id;
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3);
now += silm->milisecond_time_0;
@@ -842,8 +815,8 @@ nat_ipfix_logging_max_entries_per_usr (u32 thread_index,
b0->current_length += MAX_ENTRIES_PER_USER_LEN;
}
- if (PREDICT_FALSE
- (do_flush || (offset + MAX_ENTRIES_PER_USER_LEN) > frm->path_mtu))
+ if (PREDICT_FALSE (do_flush ||
+ (offset + MAX_ENTRIES_PER_USER_LEN) > exp->path_mtu))
{
template_id = clib_atomic_fetch_or (
&silm->max_entries_per_user_template_id,
@@ -871,6 +844,7 @@ nat_ipfix_logging_max_ses (u32 thread_index, u32 limit, int do_flush)
u8 nat_event = QUOTA_EXCEEDED;
u32 quota_event = clib_host_to_net_u32 (MAX_SESSION_ENTRIES);
u16 template_id;
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3);
now += silm->milisecond_time_0;
@@ -930,8 +904,7 @@ nat_ipfix_logging_max_ses (u32 thread_index, u32 limit, int do_flush)
b0->current_length += MAX_SESSIONS_LEN;
}
- if (PREDICT_FALSE
- (do_flush || (offset + MAX_SESSIONS_LEN) > frm->path_mtu))
+ if (PREDICT_FALSE (do_flush || (offset + MAX_SESSIONS_LEN) > exp->path_mtu))
{
template_id = clib_atomic_fetch_or (
&silm->max_sessions_template_id,
@@ -959,6 +932,7 @@ nat_ipfix_logging_max_bib (u32 thread_index, u32 limit, int do_flush)
u8 nat_event = QUOTA_EXCEEDED;
u32 quota_event = clib_host_to_net_u32 (MAX_BIB_ENTRIES);
u16 template_id;
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3);
now += silm->milisecond_time_0;
@@ -1018,8 +992,7 @@ nat_ipfix_logging_max_bib (u32 thread_index, u32 limit, int do_flush)
b0->current_length += MAX_BIBS_LEN;
}
- if (PREDICT_FALSE
- (do_flush || (offset + MAX_BIBS_LEN) > frm->path_mtu))
+ if (PREDICT_FALSE (do_flush || (offset + MAX_BIBS_LEN) > exp->path_mtu))
{
template_id = clib_atomic_fetch_or (
&silm->max_bibs_template_id,
@@ -1048,6 +1021,7 @@ nat_ipfix_logging_nat64_bibe (u32 thread_index, u8 nat_event,
vlib_main_t *vm = vlib_get_main ();
u64 now;
u16 template_id;
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3);
now += silm->milisecond_time_0;
@@ -1119,8 +1093,7 @@ nat_ipfix_logging_nat64_bibe (u32 thread_index, u8 nat_event,
b0->current_length += NAT64_BIB_LEN;
}
- if (PREDICT_FALSE
- (do_flush || (offset + NAT64_BIB_LEN) > frm->path_mtu))
+ if (PREDICT_FALSE (do_flush || (offset + NAT64_BIB_LEN) > exp->path_mtu))
{
template_id = clib_atomic_fetch_or (
&silm->nat64_bib_template_id,
@@ -1151,6 +1124,7 @@ nat_ipfix_logging_nat64_ses (u32 thread_index, u8 nat_event,
vlib_main_t *vm = vlib_get_main ();
u64 now;
u16 template_id;
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
now = (u64) ((vlib_time_now (vm) - silm->vlib_time_0) * 1e3);
now += silm->milisecond_time_0;
@@ -1234,8 +1208,7 @@ nat_ipfix_logging_nat64_ses (u32 thread_index, u8 nat_event,
b0->current_length += NAT64_SES_LEN;
}
- if (PREDICT_FALSE
- (do_flush || (offset + NAT64_SES_LEN) > frm->path_mtu))
+ if (PREDICT_FALSE (do_flush || (offset + NAT64_SES_LEN) > exp->path_mtu))
{
template_id = clib_atomic_fetch_or (
&silm->nat64_ses_template_id,
@@ -1307,54 +1280,34 @@ nat_ipfix_flush_from_main (void)
/**
* @brief Generate NAT44 session create event
- *
- * @param thread_index thread index
- * @param src_ip source IPv4 address
- * @param nat_src_ip transaltes source IPv4 address
- * @param nat_proto NAT transport protocol
- * @param src_port source port
- * @param nat_src_port translated source port
- * @param vrf_id VRF ID
*/
void
-nat_ipfix_logging_nat44_ses_create (u32 thread_index,
- u32 src_ip,
- u32 nat_src_ip,
- nat_protocol_t nat_proto,
- u16 src_port,
- u16 nat_src_port, u32 fib_index)
+nat_ipfix_logging_nat44_ses_create (u32 thread_index, u32 src_ip,
+ u32 nat_src_ip, ip_protocol_t proto,
+ u16 src_port, u16 nat_src_port,
+ u32 fib_index)
{
skip_if_disabled ();
nat_ipfix_logging_nat44_ses (thread_index, NAT44_SESSION_CREATE, src_ip,
- nat_src_ip, nat_proto, src_port, nat_src_port,
- fib_index, 0);
+ nat_src_ip, proto, src_port, nat_src_port,
+ fib_index, 0);
}
/**
* @brief Generate NAT44 session delete event
- *
- * @param thread_index thread index
- * @param src_ip source IPv4 address
- * @param nat_src_ip transaltes source IPv4 address
- * @param nat_proto NAT transport protocol
- * @param src_port source port
- * @param nat_src_port translated source port
- * @param vrf_id VRF ID
*/
void
-nat_ipfix_logging_nat44_ses_delete (u32 thread_index,
- u32 src_ip,
- u32 nat_src_ip,
- nat_protocol_t nat_proto,
- u16 src_port,
- u16 nat_src_port, u32 fib_index)
+nat_ipfix_logging_nat44_ses_delete (u32 thread_index, u32 src_ip,
+ u32 nat_src_ip, ip_protocol_t proto,
+ u16 src_port, u16 nat_src_port,
+ u32 fib_index)
{
skip_if_disabled ();
nat_ipfix_logging_nat44_ses (thread_index, NAT44_SESSION_DELETE, src_ip,
- nat_src_ip, nat_proto, src_port, nat_src_port,
- fib_index, 0);
+ nat_src_ip, proto, src_port, nat_src_port,
+ fib_index, 0);
}
/**
@@ -1366,9 +1319,23 @@ nat_ipfix_logging_nat44_ses_delete (u32 thread_index,
void
nat_ipfix_logging_addresses_exhausted (u32 thread_index, u32 pool_id)
{
- //TODO: This event SHOULD be rate limited
+ nat_ipfix_logging_main_t *silm = &nat_ipfix_logging_main;
+ static f64 *last_sent = 0;
+
skip_if_disabled ();
+ /* TODO: make rate configurable, use 1pps so far */
+ clib_spinlock_lock_if_init (&silm->addr_exhausted_lock);
+ f64 now = vlib_time_now (vlib_get_main ());
+ vec_validate (last_sent, pool_id);
+ if (now < last_sent[pool_id] + 1.0)
+ {
+ clib_spinlock_unlock_if_init (&silm->addr_exhausted_lock);
+ return;
+ }
+ last_sent[pool_id] = now;
+ clib_spinlock_unlock_if_init (&silm->addr_exhausted_lock);
+
nat_ipfix_logging_addr_exhausted (thread_index, pool_id, 0);
}
@@ -1409,9 +1376,22 @@ deterministic_nat_data_callback
void
nat_ipfix_logging_max_sessions (u32 thread_index, u32 limit)
{
- //TODO: This event SHOULD be rate limited
+ nat_ipfix_logging_main_t *silm = &nat_ipfix_logging_main;
+ static f64 last_sent = 0;
+
skip_if_disabled ();
+ /* TODO: make rate configurable, use 1pps so far */
+ clib_spinlock_lock_if_init (&silm->max_sessions_lock);
+ f64 now = vlib_time_now (vlib_get_main ());
+ if (now < last_sent + 1.0)
+ {
+ clib_spinlock_unlock_if_init (&silm->max_sessions_lock);
+ return;
+ }
+ last_sent = now;
+ clib_spinlock_unlock_if_init (&silm->max_sessions_lock);
+
nat_ipfix_logging_max_ses (thread_index, limit, 0);
}
@@ -1424,9 +1404,22 @@ nat_ipfix_logging_max_sessions (u32 thread_index, u32 limit)
void
nat_ipfix_logging_max_bibs (u32 thread_index, u32 limit)
{
- //TODO: This event SHOULD be rate limited
+ nat_ipfix_logging_main_t *silm = &nat_ipfix_logging_main;
+ static f64 last_sent = 0;
+
skip_if_disabled ();
+ /* TODO: make rate configurable, use 1pps so far */
+ clib_spinlock_lock_if_init (&silm->max_bibs_lock);
+ f64 now = vlib_time_now (vlib_get_main ());
+ if (now < last_sent + 1.0)
+ {
+ clib_spinlock_unlock_if_init (&silm->max_bibs_lock);
+ return;
+ }
+ last_sent = now;
+ clib_spinlock_unlock_if_init (&silm->max_bibs_lock);
+
nat_ipfix_logging_max_bib (thread_index, limit, 0);
}
@@ -1497,12 +1490,13 @@ nat_ipfix_logging_nat64_session (u32 thread_index,
}
vlib_frame_t *
-data_callback (flow_report_main_t * frm, flow_report_t * fr,
- vlib_frame_t * f, u32 * to_next, u32 node_index)
+data_callback (flow_report_main_t *frm, ipfix_exporter_t *exp,
+ flow_report_t *fr, vlib_frame_t *f, u32 *to_next,
+ u32 node_index)
{
nat_ipfix_logging_main_t *silm = &nat_ipfix_logging_main;
- if (PREDICT_FALSE (++silm->call_counter >= vec_len (frm->reports)))
+ if (PREDICT_FALSE (++silm->call_counter >= vec_len (exp->reports)))
{
nat_ipfix_flush_from_main();
silm->call_counter = 0;
@@ -1524,7 +1518,7 @@ int
nat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port)
{
nat_ipfix_logging_main_t *silm = &nat_ipfix_logging_main;
- flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = &flow_report_main.exporters[0];
vnet_flow_report_add_del_args_t a;
int rv;
u8 e = enable ? 1 : 0;
@@ -1539,7 +1533,7 @@ nat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port)
a.flow_data_callback = data_callback;
a.rewrite_callback = nat_template_rewrite_nat44_session;
- rv = vnet_flow_report_add_del (frm, &a, NULL);
+ rv = vnet_flow_report_add_del (exp, &a, NULL);
if (rv)
{
//nat_elog_warn_X1 ("vnet_flow_report_add_del returned %d", "i4", rv);
@@ -1547,7 +1541,7 @@ nat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port)
}
a.rewrite_callback = nat_template_rewrite_addr_exhausted;
- rv = vnet_flow_report_add_del (frm, &a, NULL);
+ rv = vnet_flow_report_add_del (exp, &a, NULL);
if (rv)
{
//nat_elog_warn_X1 ("vnet_flow_report_add_del returned %d", "i4", rv);
@@ -1555,7 +1549,7 @@ nat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port)
}
a.rewrite_callback = nat_template_rewrite_max_sessions;
- rv = vnet_flow_report_add_del (frm, &a, NULL);
+ rv = vnet_flow_report_add_del (exp, &a, NULL);
if (rv)
{
//nat_elog_warn_X1 ("vnet_flow_report_add_del returned %d", "i4", rv);
@@ -1563,7 +1557,7 @@ nat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port)
}
a.rewrite_callback = nat_template_rewrite_max_bibs;
- rv = vnet_flow_report_add_del (frm, &a, NULL);
+ rv = vnet_flow_report_add_del (exp, &a, NULL);
if (rv)
{
//nat_elog_warn_X1 ("vnet_flow_report_add_del returned %d", "i4", rv);
@@ -1571,7 +1565,7 @@ nat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port)
}
a.rewrite_callback = nat_template_rewrite_nat64_bib;
- rv = vnet_flow_report_add_del (frm, &a, NULL);
+ rv = vnet_flow_report_add_del (exp, &a, NULL);
if (rv)
{
//nat_elog_warn_X1 ("vnet_flow_report_add_del returned %d", "i4", rv);
@@ -1579,7 +1573,7 @@ nat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port)
}
a.rewrite_callback = nat_template_rewrite_nat64_session;
- rv = vnet_flow_report_add_del (frm, &a, NULL);
+ rv = vnet_flow_report_add_del (exp, &a, NULL);
if (rv)
{
//nat_elog_warn_X1 ("vnet_flow_report_add_del returned %d", "i4", rv);
@@ -1589,7 +1583,7 @@ nat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port)
// if endpoint dependent per user max entries is also required
/*
a.rewrite_callback = nat_template_rewrite_max_entries_per_usr;
- rv = vnet_flow_report_add_del (frm, &a, NULL);
+ rv = vnet_flow_report_add_del (exp, &a, NULL);
if (rv)
{
//nat_elog_warn_X1 ("vnet_flow_report_add_del returned %d", "i4", rv);
@@ -1620,6 +1614,11 @@ nat_ipfix_logging_init (vlib_main_t * vm)
silm->milisecond_time_0 = unix_time_now_nsec () * 1e-6;
vec_validate (silm->per_thread_data, tm->n_vlib_mains - 1);
+
+ /* Set up rate-limit */
+ clib_spinlock_init (&silm->addr_exhausted_lock);
+ clib_spinlock_init (&silm->max_sessions_lock);
+ clib_spinlock_init (&silm->max_bibs_lock);
}
static uword
@@ -1631,11 +1630,9 @@ ipfix_flush_process (vlib_main_t *vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat_ipfix_flush_node) = {
.function = ipfix_flush_process,
.name = "nat-ipfix-flush",
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_INTERRUPT,
};
-/* *INDENT-ON* */
diff --git a/src/plugins/nat/lib/ipfix_logging.h b/src/plugins/nat/lib/ipfix_logging.h
index 0b2357a2604..dc7927a160c 100644
--- a/src/plugins/nat/lib/ipfix_logging.h
+++ b/src/plugins/nat/lib/ipfix_logging.h
@@ -108,6 +108,10 @@ typedef struct {
/** nat data callbacks call counter */
u16 call_counter;
+ /** rate-limit locks */
+ clib_spinlock_t addr_exhausted_lock;
+ clib_spinlock_t max_sessions_lock;
+ clib_spinlock_t max_bibs_lock;
} nat_ipfix_logging_main_t;
extern nat_ipfix_logging_main_t nat_ipfix_logging_main;
@@ -117,15 +121,13 @@ int nat_ipfix_logging_enabled ();
void nat_ipfix_logging_init (vlib_main_t * vm);
int nat_ipfix_logging_enable_disable (int enable, u32 domain_id, u16 src_port);
void nat_ipfix_logging_nat44_ses_create (u32 thread_index, u32 src_ip,
- u32 nat_src_ip,
- nat_protocol_t nat_proto,
- u16 src_port, u16 nat_src_port,
- u32 fib_index);
+ u32 nat_src_ip, ip_protocol_t proto,
+ u16 src_port, u16 nat_src_port,
+ u32 fib_index);
void nat_ipfix_logging_nat44_ses_delete (u32 thread_index, u32 src_ip,
- u32 nat_src_ip,
- nat_protocol_t nat_proto,
- u16 src_port, u16 nat_src_port,
- u32 fib_index);
+ u32 nat_src_ip, ip_protocol_t proto,
+ u16 src_port, u16 nat_src_port,
+ u32 fib_index);
void nat_ipfix_logging_addresses_exhausted(u32 thread_index, u32 pool_id);
void nat_ipfix_logging_max_entries_per_user(u32 thread_index,
u32 limit, u32 src_ip);
diff --git a/src/plugins/nat/lib/lib.c b/src/plugins/nat/lib/lib.c
index d2def2cc480..30bafac73c0 100644
--- a/src/plugins/nat/lib/lib.c
+++ b/src/plugins/nat/lib/lib.c
@@ -14,6 +14,7 @@
*/
#include <nat/lib/lib.h>
+#include <nat/lib/nat_proto.h>
uword
unformat_nat_protocol (unformat_input_t *input, va_list *args)
diff --git a/src/plugins/nat/lib/lib.h b/src/plugins/nat/lib/lib.h
index b0b5229b337..dc2c43beaaf 100644
--- a/src/plugins/nat/lib/lib.h
+++ b/src/plugins/nat/lib/lib.h
@@ -21,6 +21,17 @@
#include <vlibapi/api.h>
+typedef struct
+{
+ u16 identifier;
+ u16 sequence;
+} nat_icmp_echo_header_t;
+
+typedef struct
+{
+ u16 src_port, dst_port;
+} nat_tcp_udp_header_t;
+
/* NAT API Configuration flags */
#define foreach_nat_config_flag \
_(0x01, IS_TWICE_NAT) \
@@ -54,19 +65,6 @@ typedef enum
#undef _
} nat_error_t;
-#define foreach_nat_protocol \
- _ (OTHER, 0, other, "other") \
- _ (UDP, 1, udp, "udp") \
- _ (TCP, 2, tcp, "tcp") \
- _ (ICMP, 3, icmp, "icmp")
-
-typedef enum
-{
-#define _(N, i, n, s) NAT_PROTOCOL_##N = i,
- foreach_nat_protocol
-#undef _
-} nat_protocol_t;
-
/* default protocol timeouts */
#define NAT_UDP_TIMEOUT 300
#define NAT_TCP_TRANSITORY_TIMEOUT 240
@@ -96,29 +94,6 @@ nat_reset_timeouts (nat_timeouts_t * timeouts)
}
static_always_inline u32
-nat_session_get_timeout (nat_timeouts_t *timeouts, nat_protocol_t proto,
- u8 state)
-{
- switch (proto)
- {
- case NAT_PROTOCOL_ICMP:
- return timeouts->icmp;
- case NAT_PROTOCOL_UDP:
- return timeouts->udp;
- case NAT_PROTOCOL_TCP:
- {
- if (state)
- return timeouts->tcp.transitory;
- else
- return timeouts->tcp.established;
- }
- default:
- return timeouts->udp;
- }
- return 0;
-}
-
-static_always_inline u32
nat_calc_bihash_buckets (u32 n_elts)
{
n_elts = n_elts / 2.5;
@@ -138,10 +113,6 @@ nat_calc_bihash_buckets (u32 n_elts)
return lower_pow2;
}
-u8 *format_nat_protocol (u8 *s, va_list *args);
-
-uword unformat_nat_protocol (unformat_input_t *input, va_list *args);
-
#endif /* included_nat_lib_h__ */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/lib/log.h b/src/plugins/nat/lib/log.h
index 26bd93f2589..a82028ed8bf 100644
--- a/src/plugins/nat/lib/log.h
+++ b/src/plugins/nat/lib/log.h
@@ -21,20 +21,7 @@
#include <vppinfra/elog.h>
-#define foreach_nat_log_level \
- _ (0x00, LOG_NONE) \
- _ (0x01, LOG_ERROR) \
- _ (0x02, LOG_WARNING) \
- _ (0x03, LOG_NOTICE) \
- _ (0x04, LOG_INFO) \
- _ (0x05, LOG_DEBUG)
-
-typedef enum nat_log_level_t_
-{
-#define _(n, f) NAT_##f = n,
- foreach_nat_log_level
-#undef _
-} nat_log_level_t;
+#include <nat/lib/nat_types.api_types.h>
#define nat_elog(_pm, _level, _str) \
do \
diff --git a/src/plugins/nat/lib/nat_proto.h b/src/plugins/nat/lib/nat_proto.h
new file mode 100644
index 00000000000..4b57b994e22
--- /dev/null
+++ b/src/plugins/nat/lib/nat_proto.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_nat_proto_h__
+#define included_nat_proto_h__
+
+#include <vnet/ip/ip.h>
+
+#define foreach_nat_protocol \
+ _ (OTHER, 0, other, "other") \
+ _ (UDP, 1, udp, "udp") \
+ _ (TCP, 2, tcp, "tcp") \
+ _ (ICMP, 3, icmp, "icmp")
+
+typedef enum
+{
+#define _(N, i, n, s) NAT_PROTOCOL_##N = i,
+ foreach_nat_protocol
+#undef _
+ NAT_N_PROTOCOLS
+} nat_protocol_t;
+
+always_inline nat_protocol_t
+ip_proto_to_nat_proto (ip_protocol_t ip_proto)
+{
+ static const nat_protocol_t lookup_table[256] = {
+ [IP_PROTOCOL_TCP] = NAT_PROTOCOL_TCP,
+ [IP_PROTOCOL_UDP] = NAT_PROTOCOL_UDP,
+ [IP_PROTOCOL_ICMP] = NAT_PROTOCOL_ICMP,
+ [IP_PROTOCOL_ICMP6] = NAT_PROTOCOL_ICMP,
+ };
+
+ return lookup_table[ip_proto];
+}
+
+static_always_inline ip_protocol_t
+nat_proto_to_ip_proto (nat_protocol_t nat_proto)
+{
+ ASSERT (nat_proto <= NAT_PROTOCOL_ICMP);
+
+ static const u8 lookup_table[256] = {
+ [NAT_PROTOCOL_OTHER] = ~0,
+ [NAT_PROTOCOL_TCP] = IP_PROTOCOL_TCP,
+ [NAT_PROTOCOL_UDP] = IP_PROTOCOL_UDP,
+ [NAT_PROTOCOL_ICMP] = IP_PROTOCOL_ICMP,
+ };
+
+ ASSERT (NAT_PROTOCOL_OTHER == nat_proto || NAT_PROTOCOL_TCP == nat_proto ||
+ NAT_PROTOCOL_UDP == nat_proto || NAT_PROTOCOL_ICMP == nat_proto);
+
+ return lookup_table[nat_proto];
+}
+
+u8 *format_nat_protocol (u8 *s, va_list *args);
+
+uword unformat_nat_protocol (unformat_input_t *input, va_list *args);
+
+#endif /* included_nat_proto_h__ */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/lib/nat_syslog.c b/src/plugins/nat/lib/nat_syslog.c
index 2c395bf7fd8..98777ebf280 100644
--- a/src/plugins/nat/lib/nat_syslog.c
+++ b/src/plugins/nat/lib/nat_syslog.c
@@ -23,38 +23,7 @@
#include <nat/lib/nat_syslog.h>
#include <nat/lib/inlines.h>
-#define NAT_FACILITY SYSLOG_FACILITY_LOCAL0
-
-#define NAT_APPNAME "NAT"
-
-#define SADD_SDEL_SEVERITY SYSLOG_SEVERITY_INFORMATIONAL
-#define APMADD_APMDEL_SEVERITY SYSLOG_SEVERITY_INFORMATIONAL
-
-#define SADD_MSGID "SADD"
-#define SDEL_MSGID "SDEL"
-#define APMADD_MSGID "APMADD"
-#define APMDEL_MSGID "APMDEL"
-
-#define NSESS_SDID "nsess"
-#define NAPMAP_SDID "napmap"
-
-#define SSUBIX_SDPARAM_NAME "SSUBIX"
-#define SVLAN_SDPARAM_NAME "SVLAN"
-#define IATYP_SDPARAM_NAME "IATYP"
-#define ISADDR_SDPARAM_NAME "ISADDR"
-#define ISPORT_SDPARAM_NAME "ISPORT"
-#define IDADDR_SDPARAM_NAME "IDADDR"
-#define IDPORT_SDPARAM_NAME "IDPORT"
-#define XATYP_SDPARAM_NAME "XATYP"
-#define XSADDR_SDPARAM_NAME "XSADDR"
-#define XSPORT_SDPARAM_NAME "XSPORT"
-#define XDADDR_SDPARAM_NAME "XDADDR"
-#define XDPORT_SDPARAM_NAME "XDPORT"
-#define PROTO_SDPARAM_NAME "PROTO"
-#define SV6ENC_SDPARAM_NAME "SV6ENC"
-
-#define IATYP_IPV4 "IPv4"
-#define IATYP_IPV6 "IPv6"
+#include <nat/lib/nat_syslog_constants.h>
static inline void
nat_syslog_nat44_apmap (u32 ssubix, u32 sfibix, ip4_address_t * isaddr,
@@ -142,82 +111,6 @@ nat_syslog_dslite_apmdel (u32 ssubix, ip6_address_t * sv6enc,
}
static inline void
-nat_syslog_nat44_sess (u32 ssubix, u32 sfibix, ip4_address_t * isaddr,
- u16 isport, ip4_address_t * xsaddr, u16 xsport,
- ip4_address_t * idaddr, u16 idport,
- ip4_address_t * xdaddr, u16 xdport,
- nat_protocol_t proto, u8 is_add, u8 is_twicenat)
-{
- syslog_msg_t syslog_msg;
- fib_table_t *fib;
-
- if (!syslog_is_enabled ())
- return;
-
- if (syslog_severity_filter_block (SADD_SDEL_SEVERITY))
- return;
-
- fib = fib_table_get (sfibix, FIB_PROTOCOL_IP4);
-
- syslog_msg_init (&syslog_msg, NAT_FACILITY, SADD_SDEL_SEVERITY, NAT_APPNAME,
- is_add ? SADD_MSGID : SDEL_MSGID);
-
- syslog_msg_sd_init (&syslog_msg, NSESS_SDID);
- syslog_msg_add_sd_param (&syslog_msg, SSUBIX_SDPARAM_NAME, "%d", ssubix);
- syslog_msg_add_sd_param (&syslog_msg, SVLAN_SDPARAM_NAME, "%d",
- fib->ft_table_id);
- syslog_msg_add_sd_param (&syslog_msg, IATYP_SDPARAM_NAME, IATYP_IPV4);
- syslog_msg_add_sd_param (&syslog_msg, ISADDR_SDPARAM_NAME, "%U",
- format_ip4_address, isaddr);
- syslog_msg_add_sd_param (&syslog_msg, ISPORT_SDPARAM_NAME, "%d",
- clib_net_to_host_u16 (isport));
- syslog_msg_add_sd_param (&syslog_msg, XATYP_SDPARAM_NAME, IATYP_IPV4);
- syslog_msg_add_sd_param (&syslog_msg, XSADDR_SDPARAM_NAME, "%U",
- format_ip4_address, xsaddr);
- syslog_msg_add_sd_param (&syslog_msg, XSPORT_SDPARAM_NAME, "%d",
- clib_net_to_host_u16 (xsport));
- syslog_msg_add_sd_param (&syslog_msg, PROTO_SDPARAM_NAME, "%d",
- nat_proto_to_ip_proto (proto));
- syslog_msg_add_sd_param (&syslog_msg, XDADDR_SDPARAM_NAME, "%U",
- format_ip4_address, xdaddr);
- syslog_msg_add_sd_param (&syslog_msg, XDPORT_SDPARAM_NAME, "%d",
- clib_net_to_host_u16 (xdport));
- if (is_twicenat)
- {
- syslog_msg_add_sd_param (&syslog_msg, IDADDR_SDPARAM_NAME, "%U",
- format_ip4_address, idaddr);
- syslog_msg_add_sd_param (&syslog_msg, IDPORT_SDPARAM_NAME, "%d",
- clib_net_to_host_u16 (idport));
- }
-
- syslog_msg_send (&syslog_msg);
-}
-
-void
-nat_syslog_nat44_sadd (u32 ssubix, u32 sfibix, ip4_address_t * isaddr,
- u16 isport, ip4_address_t * idaddr, u16 idport,
- ip4_address_t * xsaddr, u16 xsport,
- ip4_address_t * xdaddr, u16 xdport,
- nat_protocol_t proto, u8 is_twicenat)
-{
- nat_syslog_nat44_sess (ssubix, sfibix, isaddr, isport, xsaddr, xsport,
- idaddr, idport, xdaddr, xdport, proto, 1,
- is_twicenat);
-}
-
-void
-nat_syslog_nat44_sdel (u32 ssubix, u32 sfibix, ip4_address_t * isaddr,
- u16 isport, ip4_address_t * idaddr, u16 idport,
- ip4_address_t * xsaddr, u16 xsport,
- ip4_address_t * xdaddr, u16 xdport,
- nat_protocol_t proto, u8 is_twicenat)
-{
- nat_syslog_nat44_sess (ssubix, sfibix, isaddr, isport, xsaddr, xsport,
- idaddr, idport, xdaddr, xdport, proto, 0,
- is_twicenat);
-}
-
-static inline void
nat_syslog_nat64_sess (u32 sfibix, ip6_address_t * isaddr, u16 isport,
ip4_address_t * xsaddr, u16 xsport,
ip4_address_t * xdaddr, u16 xdport,
diff --git a/src/plugins/nat/lib/nat_syslog.h b/src/plugins/nat/lib/nat_syslog.h
index 9721664cf54..f929bf310b4 100644
--- a/src/plugins/nat/lib/nat_syslog.h
+++ b/src/plugins/nat/lib/nat_syslog.h
@@ -20,6 +20,7 @@
#define __included_nat_syslog_h__
#include <nat/lib/lib.h>
+#include <nat/lib/nat_proto.h>
void nat_syslog_nat44_apmadd (u32 ssubix, u32 sfibix, ip4_address_t * isaddr,
u16 isport, ip4_address_t * xsaddr, u16 xsport,
@@ -41,18 +42,6 @@ nat_syslog_dslite_apmdel (u32 ssubix, ip6_address_t * sv6enc,
ip4_address_t * xsaddr, u16 xsport,
nat_protocol_t proto);
-void nat_syslog_nat44_sadd (u32 ssubix, u32 sfibix, ip4_address_t * isaddr,
- u16 isport, ip4_address_t * idaddr, u16 idport,
- ip4_address_t * xsaddr, u16 xsport,
- ip4_address_t * xdaddr, u16 xdport,
- nat_protocol_t proto, u8 is_twicenat);
-
-void nat_syslog_nat44_sdel (u32 ssubix, u32 sfibix, ip4_address_t * isaddr,
- u16 isport, ip4_address_t * idaddr, u16 idport,
- ip4_address_t * xsaddr, u16 xsport,
- ip4_address_t * xdaddr, u16 xdport,
- nat_protocol_t proto, u8 is_twicenat);
-
void nat_syslog_nat64_sadd (u32 sfibix, ip6_address_t * isaddr, u16 isport,
ip4_address_t * xsaddr, u16 xsport,
ip4_address_t * xdaddr, u16 xdport,
diff --git a/src/plugins/nat/lib/nat_syslog_constants.h b/src/plugins/nat/lib/nat_syslog_constants.h
new file mode 100644
index 00000000000..eeea7d2654e
--- /dev/null
+++ b/src/plugins/nat/lib/nat_syslog_constants.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @file
+ * @brief NAT syslog logging constants
+ */
+#ifndef __included_nat_syslog_constants_h__
+#define __included_nat_syslog_constants_h__
+
+#define NAT_FACILITY SYSLOG_FACILITY_LOCAL0
+
+#define NAT_APPNAME "NAT"
+
+#define SADD_SDEL_SEVERITY SYSLOG_SEVERITY_INFORMATIONAL
+#define APMADD_APMDEL_SEVERITY SYSLOG_SEVERITY_INFORMATIONAL
+
+#define SADD_MSGID "SADD"
+#define SDEL_MSGID "SDEL"
+#define APMADD_MSGID "APMADD"
+#define APMDEL_MSGID "APMDEL"
+
+#define NSESS_SDID "nsess"
+#define NAPMAP_SDID "napmap"
+
+#define SSUBIX_SDPARAM_NAME "SSUBIX"
+#define SVLAN_SDPARAM_NAME "SVLAN"
+#define IATYP_SDPARAM_NAME "IATYP"
+#define ISADDR_SDPARAM_NAME "ISADDR"
+#define ISPORT_SDPARAM_NAME "ISPORT"
+#define IDADDR_SDPARAM_NAME "IDADDR"
+#define IDPORT_SDPARAM_NAME "IDPORT"
+#define XATYP_SDPARAM_NAME "XATYP"
+#define XSADDR_SDPARAM_NAME "XSADDR"
+#define XSPORT_SDPARAM_NAME "XSPORT"
+#define XDADDR_SDPARAM_NAME "XDADDR"
+#define XDPORT_SDPARAM_NAME "XDPORT"
+#define PROTO_SDPARAM_NAME "PROTO"
+#define SV6ENC_SDPARAM_NAME "SV6ENC"
+
+#define IATYP_IPV4 "IPv4"
+#define IATYP_IPV6 "IPv6"
+
+#endif /* __included_nat_syslog_constants_h__ */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/nat/nat44-ed/nat44_ed.api b/src/plugins/nat/nat44-ed/nat44_ed.api
index c65b7a81166..322260f7f96 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed.api
+++ b/src/plugins/nat/nat44-ed/nat44_ed.api
@@ -13,13 +13,13 @@
* limitations under the License.
*/
-option version = "5.3.0";
+option version = "5.5.0";
import "vnet/ip/ip_types.api";
import "vnet/interface_types.api";
import "plugins/nat/lib/nat_types.api";
/**
- * @file nat44.api
+ * @file nat44_ed.api
* @brief VPP control-plane API messages.
*
* This file defines VPP control-plane API messages which are generally
@@ -35,41 +35,6 @@ enum nat44_config_flags : u8
NAT44_IS_OUT2IN_DPO = 0x08,
};
-/** \brief Enable/disable NAT44 plugin
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param inside_vrf - inside vrf id
- @param outside_vrf - outside vrf id
- @param users - maximum number of users per thread
- (NAT44_IS_ENDPOINT_INDEPENDENT)
- @param user_memory - overwrite hash allocation parameter
- (NAT44_IS_ENDPOINT_INDEPENDENT)
- @param sessions - maximum number of sessions per thread
- @param session_memory - overwrite hash allocation parameter
- @param user_sessions - maximum number of sessions per user
- (NAT44_IS_ENDPOINT_INDEPENDENT)
- @param enable - true if enable, false if disable
- @param flags - flag NAT44_IS_ENDPOINT_INDEPENDENT,
- NAT44_IS_ENDPOINT_DEPENDENT,
- NAT44_IS_STATIC_MAPPING_ONLY,
- NAT44_IS_CONNECTION_TRACKING,
- NAT44_IS_OUT2IN_DPO
-*/
-autoreply define nat44_plugin_enable_disable {
- option deprecated;
- u32 client_index;
- u32 context;
- u32 inside_vrf;
- u32 outside_vrf;
- u32 users;
- u32 user_memory;
- u32 sessions;
- u32 session_memory;
- u32 user_sessions;
- bool enable;
- vl_api_nat44_config_flags_t flags;
-};
-
/** \brief Enable/disable NAT44ED plugin
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -82,7 +47,6 @@ autoreply define nat44_plugin_enable_disable {
NAT44_IS_CONNECTION_TRACKING
*/
autoreply define nat44_ed_plugin_enable_disable {
- option in_progress;
u32 client_index;
u32 context;
u32 inside_vrf;
@@ -93,146 +57,65 @@ autoreply define nat44_ed_plugin_enable_disable {
vl_api_nat44_config_flags_t flags;
};
-/** \brief Control ping from client to api server request
+/** \brief Enable/disable forwarding for NAT44
+ Forward packets which don't match existing translation
+ or static mapping instead of dropping them.
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
+ @param enable - true for enable, false for disable
*/
-define nat_control_ping
-{
+autoreply define nat44_forwarding_enable_disable {
option deprecated;
u32 client_index;
u32 context;
+ bool enable;
};
-/** \brief Control ping from the client to the server response
+/** \brief Enable/disable NAT IPFIX logging
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param retval - return code for the request
- @param vpe_pid - the pid of the vpe, returned by the server
+ @param domain_id - observation domain ID
+ @param src_port - source port number
+ @param enable - true if enable, false if disable
*/
-define nat_control_ping_reply
-{
+autoreply define nat_ipfix_enable_disable {
option deprecated;
- u32 context;
- i32 retval;
u32 client_index;
- u32 vpe_pid;
+ u32 context;
+ u32 domain_id;
+ u16 src_port;
+ bool enable;
};
-/** \brief Show NAT plugin startup config
+/** \brief Set values of timeouts for NAT sessions (seconds)
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
+ @param udp - UDP timeout (default 300sec)
+ @param tcp_established - TCP established timeout (default 7440sec)
+ @param tcp_transitory - TCP transitory timeout (default 240sec)
+ @param icmp - ICMP timeout (default 60sec)
*/
-define nat_show_config
-{
+autoreply define nat_set_timeouts {
option deprecated;
u32 client_index;
u32 context;
+ u32 udp;
+ u32 tcp_established;
+ u32 tcp_transitory;
+ u32 icmp;
};
-/** \brief DEPRECATED: Show NAT plugin startup config reply
- @param context - sender context, to match reply w/ request
- @param retval - return code for the request
- @param static_mapping_only - if true dynamic translations disabled
- @param static_mapping_connection_tracking - if true create session data
- @param deterministic - if true deterministic mapping
- @param endpoint_dependent - if true endpoint-dependent mode
- @param out2in_dpo - if true out2in dpo mode
- @param dslite_ce - if true DS-Lite is CE/B4 element, if false AFTR elemet
- @param translation_buckets - number of translation hash buckets
- @param translation_memory_size - translation hash memory size
- @param user_buckets - number of user hash buckets
- @param user_memory_size - user hash memory size
- @param max_translations_per_user - maximum number of translations per user
- @param outside_vrf_id - outside VRF id
- @param inside_vrf_id - default inside VRF id
- @param nat64_bib_buckets - number of NAT64 BIB hash buckets
- @param nat64_bib_memory_size - memory size of NAT64 BIB hash
- @param nat64_st_buckets - number of NAT64 session table hash buckets
- @param nat64_st_memory_size - memory size of NAT64 session table hash
-*/
-define nat_show_config_reply
-{
- option deprecated;
- u32 context;
- i32 retval;
- bool static_mapping_only;
- bool static_mapping_connection_tracking;
- bool deterministic;
- bool endpoint_dependent;
- bool out2in_dpo;
- bool dslite_ce;
- u32 translation_buckets;
- u32 translation_memory_size;
- u32 user_buckets;
- u64 user_memory_size;
- u32 max_translations_per_user;
- u32 outside_vrf_id;
- u32 inside_vrf_id;
- u32 nat64_bib_buckets;
- u64 nat64_bib_memory_size;
- u32 nat64_st_buckets;
- u64 nat64_st_memory_size;
-};
-
-/** \brief Show NAT plugin startup config
+/** \brief NAT44 set session limit
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
+ @param session_limit - session limit
+ @param vrf_id - vrf id
*/
-define nat_show_config_2
-{
- option deprecated;
+autoreply define nat44_set_session_limit {
u32 client_index;
u32 context;
-};
-
-/** \brief Show NAT plugin startup config reply
- @param context - sender context, to match reply w/ request
- @param retval - return code for the request
- @param static_mapping_only - if true dynamic translations disabled
- @param static_mapping_connection_tracking - if true create session data
- @param deterministic - if true deterministic mapping
- @param endpoint_dependent - if true endpoint-dependent mode
- @param out2in_dpo - if true out2in dpo mode
- @param dslite_ce - if true DS-Lite is CE/B4 element, if false AFTR elemet
- @param translation_buckets - number of translation hash buckets
- @param translation_memory_size - translation hash memory size
- @param user_buckets - number of user hash buckets
- @param user_memory_size - user hash memory size
- @param max_translations_per_user - maximum number of translations per user
- @param outside_vrf_id - outside VRF id
- @param inside_vrf_id - default inside VRF id
- @param nat64_bib_buckets - number of NAT64 BIB hash buckets
- @param nat64_bib_memory_size - memory size of NAT64 BIB hash
- @param nat64_st_buckets - number of NAT64 session table hash buckets
- @param nat64_st_memory_size - memory size of NAT64 session table hash
- @param max_translations_per_thread - max translations per worker thread
- @param max_users_per_thread - max users per worker thread
-*/
-define nat_show_config_2_reply
-{
- option deprecated;
- u32 context;
- i32 retval;
- bool static_mapping_only;
- bool static_mapping_connection_tracking;
- bool deterministic;
- bool endpoint_dependent;
- bool out2in_dpo;
- bool dslite_ce;
- u32 translation_buckets;
- u64 translation_memory_size;
- u32 user_buckets;
- u64 user_memory_size;
- u32 max_translations_per_user;
- u32 outside_vrf_id;
- u32 inside_vrf_id;
- u32 nat64_bib_buckets;
- u64 nat64_bib_memory_size;
- u32 nat64_st_buckets;
- u64 nat64_st_memory_size;
- u32 max_translations_per_thread;
- u32 max_users_per_thread;
+ u32 session_limit;
+ u32 vrf_id;
};
/** \brief Show NAT44 plugin running config
@@ -241,7 +124,6 @@ define nat_show_config_2_reply
*/
define nat44_show_running_config
{
- option in_progress;
u32 client_index;
u32 context;
};
@@ -267,7 +149,6 @@ define nat44_show_running_config
*/
define nat44_show_running_config_reply
{
- option in_progress;
u32 context;
i32 retval;
u32 inside_vrf;
@@ -284,41 +165,6 @@ define nat44_show_running_config_reply
vl_api_nat44_config_flags_t flags;
};
-/** \brief Run nat44 garbage collection
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
-*/
-autoreply define nat44_session_cleanup {
- option deprecated;
- u32 client_index;
- u32 context;
-};
-
-/** \brief NAT44 set session limit
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param session_limit - session limit
- @param vrf_id - vrf id
-*/
-autoreply define nat44_set_session_limit {
- u32 client_index;
- u32 context;
- u32 session_limit;
- u32 vrf_id;
-};
-
-/** \brief Set NAT logging level
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param log_level - logging level
-*/
-autoreply define nat_set_log_level {
- option deprecated;
- u32 client_index;
- u32 context;
- vl_api_nat_log_level_t log_level;
-};
-
/** \brief Set NAT workers
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -352,121 +198,82 @@ define nat_worker_details {
string name[64];
};
-/** \brief Enable/disable NAT IPFIX logging
+/** \brief Add/delete inter VRF NAT44-ED routing table
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param domain_id - observation domain ID
- @param src_port - source port number
- @param enable - true if enable, false if disable
+ @param table_vrf_id - id of (rx) VRF used for resolving
+ destination (tx) VRF during dynamic
+ session creation
+ @param is_add - if true add else del
*/
-autoreply define nat_ipfix_enable_disable {
- option deprecated;
+autoreply define nat44_ed_add_del_vrf_table {
u32 client_index;
u32 context;
- u32 domain_id;
- u16 src_port;
- bool enable;
+ u32 table_vrf_id;
+ bool is_add;
};
-/** \brief Set values of timeouts for NAT sessions (seconds)
+/** \brief Add/del inter VRF NAT44-ED route record
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param udp - UDP timeout (default 300sec)
- @param tcp_established - TCP established timeout (default 7440sec)
- @param tcp_transitory - TCP transitory timeout (default 240sec)
- @param icmp - ICMP timeout (default 60sec)
+ @param table_vrf_id - id of the VRF NAT routing table
+ @param vrf_id - id of resolving destination (tx) VRF table
+ @param is_add - if true add else del
*/
-autoreply define nat_set_timeouts {
- option deprecated;
+autoreply define nat44_ed_add_del_vrf_route {
u32 client_index;
u32 context;
- u32 udp;
- u32 tcp_established;
- u32 tcp_transitory;
- u32 icmp;
+ u32 table_vrf_id;
+ u32 vrf_id;
+ bool is_add;
};
-/** \brief Get values of timeouts for NAT sessions (seconds)
+/** \brief Dump NAT44-ED inter VRF NAT routing tables
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
*/
-define nat_get_timeouts {
- option deprecated;
+define nat44_ed_vrf_tables_dump {
u32 client_index;
u32 context;
-};
-
-/** \brief Get values of timeouts for NAT sessions reply
- @param context - sender context, to match reply w/ request
- @param retval - return code
- @param udp - UDP timeout
- @param tcp_established - TCP established timeout
- @param tcp_transitory - TCP transitory timeout
- @param icmp - ICMP timeout
-*/
-define nat_get_timeouts_reply {
option deprecated;
- u32 context;
- i32 retval;
- u32 udp;
- u32 tcp_established;
- u32 tcp_transitory;
- u32 icmp;
};
-/** \brief Set address and port assignment algorithm
- @param client_index - opaque cookie to identify the sender
+/** \brief NAT44-ED inter VRF NAT routing table details response
@param context - sender context, to match reply w/ request
- @param alg - address and port assignment algorithm:
- 0 - default, 1 - MAP-E, 2 - port range
- (see nat_addr_and_port_alloc_alg_t in nat.h)
- @param psid_offset - number of offset bits (valid only for MAP-E alg)
- @param psid_length - length of PSID (valid only for MAP-E alg)
- @param psid - Port Set Identifier (PSID) value (valid only for MAP-E alg)
- @param start_port - beginning of the port range
- @param end_port - end of the port range
+ @param table_vrf_id - id of the VRF NAT routing table
+ @param n_vrf_ids - number of vrf_ids
+ @param vrf_ids - ids of resolving destination (tx) VRFs
*/
-autoreply define nat_set_addr_and_port_alloc_alg {
- u32 client_index;
+define nat44_ed_vrf_tables_details {
u32 context;
- u8 alg;
- u8 psid_offset;
- u8 psid_length;
- u16 psid;
- u16 start_port;
- u16 end_port;
+ u32 table_vrf_id;
+ u32 n_vrf_ids;
+ u32 vrf_ids[n_vrf_ids];
+ option deprecated;
};
-/** \brief Get address and port assignment algorithm
+/** \brief Dump NAT44-ED inter VRF NAT routing tables
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
*/
-define nat_get_addr_and_port_alloc_alg {
+define nat44_ed_vrf_tables_v2_dump {
u32 client_index;
u32 context;
+ option status="in_progress";
};
-/** \brief Get address and port assignment algorithm reply
+/** \brief NAT44-ED inter VRF NAT routing table details response
@param context - sender context, to match reply w/ request
- @param retval - return code
- @param alg - address and port assignment algorithm:
- 0 - default, 1 - MAP-E, 2 - port range
- (see nat_addr_and_port_alloc_alg_t in nat.h)
- @param psid_offset - number of offset bits (valid only for MAP-E alg)
- @param psid_length - length of PSID (valid only for MAP-E alg)
- @param psid - Port Set Identifier (PSID) value (valid only for MAP-E alg)
- @param start_port - beginning of the port range
- @param end_port - end of the port range
+ @param table_vrf_id - id of the VRF NAT routing table
+ @param n_vrf_ids - number of vrf_ids
+ @param vrf_ids - ids of resolving destination (tx) VRFs
*/
-define nat_get_addr_and_port_alloc_alg_reply {
+define nat44_ed_vrf_tables_v2_details {
u32 context;
- i32 retval;
- u8 alg;
- u8 psid_offset;
- u8 psid_length;
- u16 psid;
- u16 start_port;
- u16 end_port;
+ u32 table_vrf_id;
+ u32 n_vrf_ids;
+ u32 vrf_ids[n_vrf_ids];
+ option status="in_progress";
};
/** \brief Set TCP MSS rewriting configuration
@@ -504,138 +311,73 @@ define nat_get_mss_clamping_reply {
bool enable;
};
-/** \brief Set HA listener (local settings)
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param ip_address - local IP4 address
- @param port - local UDP port number
- @param path_mtu - path MTU between local and failover
-*/
-autoreply define nat_ha_set_listener {
- u32 client_index;
- u32 context;
- vl_api_ip4_address_t ip_address;
- u16 port;
- u32 path_mtu;
-};
-
-/** \brief Set HA failover (remote settings)
+/** \brief Set NAT handoff frame queue options
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param ip_address - failover IP4 address
- @param port - failvoer UDP port number
- @param session_refresh_interval - number of seconds after which to send
- session counters refresh
+ @param frame_queue_nelts - number of worker handoff frame queue elements
*/
-autoreply define nat_ha_set_failover {
+autoreply define nat44_ed_set_fq_options {
u32 client_index;
u32 context;
- vl_api_ip4_address_t ip_address;
- u16 port;
- u32 session_refresh_interval;
+ u32 frame_queue_nelts;
};
-/** \brief Get HA listener/local configuration
+/** \brief Show NAT handoff frame queue options
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
*/
-define nat_ha_get_listener {
+define nat44_ed_show_fq_options
+{
u32 client_index;
u32 context;
};
-/** \brief Get HA listener/local configuration reply
+/** \brief Show NAT handoff frame queue options reply
@param context - sender context, to match reply w/ request
- @param retval - return code
- @param ip_address - local IP4 address
- @param port - local UDP port number
- @param path_mtu - Path MTU between local and failover
+ @param retval - return code for the request
+ @param frame_queue_nelts - number of worker handoff frame queue elements
*/
-define nat_ha_get_listener_reply {
+define nat44_ed_show_fq_options_reply
+{
u32 context;
i32 retval;
- vl_api_ip4_address_t ip_address;
- u16 port;
- u32 path_mtu;
+ u32 frame_queue_nelts;
};
-/** \brief Get HA failover/remote settings
+/** \brief Add/delete NAT44 pool address from specific interfce
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
+ @param is_add - true if add, false if delete
+ @param sw_if_index - software index of the interface
+ @param flags - flag NAT_TWICE_NAT if NAT address range for external hosts
*/
-define nat_ha_get_failover {
+autoreply define nat44_add_del_interface_addr {
u32 client_index;
u32 context;
+ bool is_add;
+ vl_api_interface_index_t sw_if_index;
+ vl_api_nat_config_flags_t flags;
};
-/** \brief Get HA failover/remote settings reply
- @param context - sender context, to match reply w/ request
- @param retval - return code
- @param ip_address - failover IP4 address
- @param port - failvoer UDP port number
- @param session_refresh_interval - number of seconds after which to send
- session counters refresh
-*/
-define nat_ha_get_failover_reply {
- u32 context;
- i32 retval;
- vl_api_ip4_address_t ip_address;
- u16 port;
- u32 session_refresh_interval;
-};
-
-/** \brief Flush the current HA data (for testing)
+/** \brief Dump NAT44 pool addresses interfaces
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
*/
-autoreply define nat_ha_flush {
+define nat44_interface_addr_dump {
u32 client_index;
u32 context;
};
-/** \brief Resync HA (resend existing sessions to new failover)
- @param client_index - opaque cookie to identify the sender
+/** \brief NAT44 pool addresses interfaces details response
@param context - sender context, to match reply w/ request
- @param want_resync_event - resync completed event sent to the sender via
- nat_ha_resync_completed_event API message if
- non-zero
- @param pid - sender's pid
-*/
-autoreply define nat_ha_resync
-{
- u32 client_index;
- u32 context;
- u8 want_resync_event;
- u32 pid;
-};
-
-/** \brief Tell client about a HA resync completion event
- @param client_index - opaque cookie to identify the sender
- @param pid - client pid registered to receive notification
- @param missed_count - number of missed (not ACKed) messages
-*/
-define nat_ha_resync_completed_event
-{
- u32 client_index;
- u32 pid;
- u32 missed_count;
-};
-
-service {
- rpc nat_ha_resync returns nat_ha_resync_reply events nat_ha_resync_completed_event;
-};
+ @param sw_if_index - software index of the interface
+ @param flags - flag NAT_TWICE_NAT if NAT address range for external hosts
-/** \brief Del NAT44 user
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param ip_address - IPv4 address
- @param fib_index - FIB index
*/
-autoreply define nat44_del_user {
- u32 client_index;
+define nat44_interface_addr_details {
u32 context;
- vl_api_ip4_address_t ip_address;
- u32 fib_index;
+ vl_api_interface_index_t sw_if_index;
+ vl_api_nat_config_flags_t flags;
};
/** \brief Add/del NAT44 address range
@@ -719,41 +461,42 @@ define nat44_interface_details {
vl_api_interface_index_t sw_if_index;
};
-/** \brief Enable/disbale NAT44 as an interface output feature (postrouting
+/** \brief add/del NAT output interface (postrouting
in2out translation)
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param is_add - true if add, false if delete
- @param flags - flag NAT_IS_INSIDE if interface is inside else
- interface is outside
@param sw_if_index - software index of the interface
*/
-autoreply define nat44_interface_add_del_output_feature {
+autoendian autoreply define nat44_ed_add_del_output_interface {
u32 client_index;
u32 context;
bool is_add;
- vl_api_nat_config_flags_t flags;
vl_api_interface_index_t sw_if_index;
};
-/** \brief Dump interfaces with NAT44 output feature
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
-*/
-define nat44_interface_output_feature_dump {
+service {
+ rpc nat44_ed_output_interface_get returns nat44_ed_output_interface_get_reply
+ stream nat44_ed_output_interface_details;
+};
+
+define nat44_ed_output_interface_get
+{
u32 client_index;
u32 context;
+ u32 cursor;
};
-/** \brief NAT44 interface with output feature details response
- @param context - sender context, to match reply w/ request
- @param flags - flag NAT_IS_INSIDE if interface is inside else
- interface is outside
- @param sw_if_index - software index of the interface
-*/
-define nat44_interface_output_feature_details {
+define nat44_ed_output_interface_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 cursor;
+};
+
+define nat44_ed_output_interface_details
+{
u32 context;
- vl_api_nat_config_flags_t flags;
vl_api_interface_index_t sw_if_index;
};
@@ -779,6 +522,8 @@ define nat44_interface_output_feature_details {
@param tag - opaque string tag
*/
autoreply define nat44_add_del_static_mapping {
+ option deprecated;
+
u32 client_index;
u32 context;
bool is_add;
@@ -817,7 +562,6 @@ autoreply define nat44_add_del_static_mapping {
@param tag - opaque string tag
*/
autoreply define nat44_add_del_static_mapping_v2 {
- option in_progress;
u32 client_index;
u32 context;
bool is_add;
@@ -929,117 +673,6 @@ define nat44_identity_mapping_details {
string tag[64];
};
-/** \brief Add/delete NAT44 pool address from specific interfce
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param is_add - true if add, false if delete
- @param sw_if_index - software index of the interface
- @param flags - flag NAT_TWICE_NAT if NAT address range for external hosts
-*/
-autoreply define nat44_add_del_interface_addr {
- u32 client_index;
- u32 context;
- bool is_add;
- vl_api_interface_index_t sw_if_index;
- vl_api_nat_config_flags_t flags;
-};
-
-/** \brief Dump NAT44 pool addresses interfaces
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
-*/
-define nat44_interface_addr_dump {
- u32 client_index;
- u32 context;
-};
-
-/** \brief NAT44 pool addresses interfaces details response
- @param context - sender context, to match reply w/ request
- @param sw_if_index - software index of the interface
- @param flags - flag NAT_TWICE_NAT if NAT address range for external hosts
-
-*/
-define nat44_interface_addr_details {
- u32 context;
- vl_api_interface_index_t sw_if_index;
- vl_api_nat_config_flags_t flags;
-};
-
-/** \brief Dump NAT44 users
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
-*/
-define nat44_user_dump {
- u32 client_index;
- u32 context;
-};
-
-/** \brief NAT44 users response
- @param context - sender context, to match reply w/ request
- @vrf_id - VRF ID
- @param ip_address - IPv4 address
- @param nsessions - number of dynamic sessions
- @param nstaticsessions - number of static sessions
-*/
-define nat44_user_details {
- u32 context;
- u32 vrf_id;
- vl_api_ip4_address_t ip_address;
- u32 nsessions;
- u32 nstaticsessions;
-};
-
-/** \brief NAT44 user's sessions
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param ip_address - IPv4 address of the user to dump
- @param vrf_id - VRF_ID
-*/
-define nat44_user_session_dump {
- u32 client_index;
- u32 context;
- vl_api_ip4_address_t ip_address;
- u32 vrf_id;
-};
-
-/** \brief NAT44 user's sessions response
- @param context - sender context, to match reply w/ request
- @param outside_ip_address - outside IPv4 address
- @param outside_port - outside port
- @param inside_ip_address - inside IPv4 address
- @param inside_port - inside port
- @param protocol - protocol
- @param flags - flag NAT_IS_STATIC if session is static,
- flag NAT_IS_TWICE_NAT if session is twice-nat,
- flag NAT_IS_EXT_HOST_VALID if external host address
- and port are valid
- @param last_heard - last heard timer
- @param total_bytes - count of bytes sent through session
- @param total_pkts - count of pakets sent through session
- @param ext_host_address - external host IPv4 address
- @param ext_host_port - external host port
- @param ext_host_nat_address - post-NAT external host IPv4 address (valid
- only if twice-nat session)
- @param ext_host_nat_port - post-NAT external host port (valid only if
- twice-nat session)
-*/
-define nat44_user_session_details {
- u32 context;
- vl_api_ip4_address_t outside_ip_address;
- u16 outside_port;
- vl_api_ip4_address_t inside_ip_address;
- u16 inside_port;
- u16 protocol;
- vl_api_nat_config_flags_t flags;
- u64 last_heard;
- u64 total_bytes;
- u32 total_pkts;
- vl_api_ip4_address_t ext_host_address;
- u16 ext_host_port;
- vl_api_ip4_address_t ext_host_nat_address;
- u16 ext_host_nat_port;
-};
-
/** \brief NAT44 load-balancing address and port pair
@param addr - IPv4 address of the internal node
@param port - L4 port number of the internal node
@@ -1167,72 +800,195 @@ autoreply define nat44_del_session {
u16 ext_host_port;
};
-/** \brief Enable/disable forwarding for NAT44
- Forward packets which don't match existing translation
- or static mapping instead of dropping them.
+/** \brief Dump NAT44 users
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param enable - true for enable, false for disable
*/
-autoreply define nat44_forwarding_enable_disable {
- option deprecated;
+define nat44_user_dump {
u32 client_index;
u32 context;
- bool enable;
};
-/** \brief Check if forwarding is enabled or disabled
+/** \brief NAT44 users response
+ @param context - sender context, to match reply w/ request
+ @vrf_id - VRF ID
+ @param ip_address - IPv4 address
+ @param nsessions - number of dynamic sessions
+ @param nstaticsessions - number of static sessions
+*/
+define nat44_user_details {
+ u32 context;
+ u32 vrf_id;
+ vl_api_ip4_address_t ip_address;
+ u32 nsessions;
+ u32 nstaticsessions;
+};
+
+/** \brief NAT44 user's sessions
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
+ @param ip_address - IPv4 address of the user to dump
+ @param vrf_id - VRF_ID
*/
-define nat44_forwarding_is_enabled {
+define nat44_user_session_dump {
option deprecated;
+
u32 client_index;
u32 context;
+ vl_api_ip4_address_t ip_address;
+ u32 vrf_id;
};
-/** \brief Response to check if forwarding is enabled or disabled
+/** \brief NAT44 user's sessions response
@param context - sender context, to match reply w/ request
- @param enabled - true if enabled, false if disabled
+ @param outside_ip_address - outside IPv4 address
+ @param outside_port - outside port
+ @param inside_ip_address - inside IPv4 address
+ @param inside_port - inside port
+ @param protocol - protocol
+ @param flags - flag NAT_IS_STATIC if session is static,
+ flag NAT_IS_TWICE_NAT if session is twice-nat,
+ flag NAT_IS_EXT_HOST_VALID if external host address
+ and port are valid
+ @param last_heard - last heard timer
+ @param total_bytes - count of bytes sent through session
+ @param total_pkts - count of pakets sent through session
+ @param ext_host_address - external host IPv4 address
+ @param ext_host_port - external host port
+ @param ext_host_nat_address - post-NAT external host IPv4 address (valid
+ only if twice-nat session)
+ @param ext_host_nat_port - post-NAT external host port (valid only if
+ twice-nat session)
*/
-define nat44_forwarding_is_enabled_reply {
+define nat44_user_session_details {
option deprecated;
+
u32 context;
- bool enabled;
+ vl_api_ip4_address_t outside_ip_address;
+ u16 outside_port;
+ vl_api_ip4_address_t inside_ip_address;
+ u16 inside_port;
+ u16 protocol;
+ vl_api_nat_config_flags_t flags;
+ u64 last_heard;
+ u64 total_bytes;
+ u32 total_pkts;
+ vl_api_ip4_address_t ext_host_address;
+ u16 ext_host_port;
+ vl_api_ip4_address_t ext_host_nat_address;
+ u16 ext_host_nat_port;
};
-/** \brief Set NAT handoff frame queue options
+/** \brief NAT44 user's sessions
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param frame_queue_nelts - number of worker handoff frame queue elements
+ @param ip_address - IPv4 address of the user to dump
+ @param vrf_id - VRF_ID
*/
-autoreply define nat44_ed_set_fq_options {
- option in_progress;
+define nat44_user_session_v2_dump {
+ option deprecated;
+
u32 client_index;
u32 context;
- u32 frame_queue_nelts;
+ vl_api_ip4_address_t ip_address;
+ u32 vrf_id;
};
-/** \brief Show NAT handoff frame queue options
- @param client_index - opaque cookie to identify the sender
+/** \brief NAT44 user's sessions response
@param context - sender context, to match reply w/ request
+ @param outside_ip_address - outside IPv4 address
+ @param outside_port - outside port
+ @param inside_ip_address - inside IPv4 address
+ @param inside_port - inside port
+ @param protocol - protocol
+ @param flags - flag NAT_IS_STATIC if session is static,
+ flag NAT_IS_TWICE_NAT if session is twice-nat,
+ flag NAT_IS_EXT_HOST_VALID if external host address
+ and port are valid
+ @param last_heard - last heard timer
+ @param total_bytes - count of bytes sent through session
+ @param total_pkts - count of pakets sent through session
+ @param ext_host_address - external host IPv4 address
+ @param ext_host_port - external host port
+ @param ext_host_nat_address - post-NAT external host IPv4 address (valid
+ only if twice-nat session)
+ @param ext_host_nat_port - post-NAT external host port (valid only if
+ twice-nat session)
+ @param is_timed_out - true, if session is timed out, and false, if session
+ is active
*/
-define nat44_ed_show_fq_options
-{
- option in_progress;
- u32 client_index;
+define nat44_user_session_v2_details {
+ option deprecated;
+
u32 context;
+ vl_api_ip4_address_t outside_ip_address;
+ u16 outside_port;
+ vl_api_ip4_address_t inside_ip_address;
+ u16 inside_port;
+ u16 protocol;
+ vl_api_nat_config_flags_t flags;
+ u64 last_heard;
+ u64 total_bytes;
+ u32 total_pkts;
+ vl_api_ip4_address_t ext_host_address;
+ u16 ext_host_port;
+ vl_api_ip4_address_t ext_host_nat_address;
+ u16 ext_host_nat_port;
+ bool is_timed_out;
};
-/** \brief Show NAT handoff frame queue options reply
+/** \brief NAT44 user's sessions response
@param context - sender context, to match reply w/ request
- @param retval - return code for the request
- @param frame_queue_nelts - number of worker handoff frame queue elements
+ @param outside_ip_address - outside IPv4 address
+ @param outside_port - outside port
+ @param inside_ip_address - inside IPv4 address
+ @param inside_port - inside port
+ @param protocol - protocol
+ @param flags - flag NAT_IS_STATIC if session is static,
+ flag NAT_IS_TWICE_NAT if session is twice-nat,
+ flag NAT_IS_EXT_HOST_VALID if external host address
+ and port are valid
+ @param last_heard - last heard timer since VPP start
+ @param time_since_last_heard - difference between current vpp time and last_heard value
+ @param total_bytes - count of bytes sent through session
+ @param total_pkts - count of pakets sent through session
+ @param ext_host_address - external host IPv4 address
+ @param ext_host_port - external host port
+ @param ext_host_nat_address - post-NAT external host IPv4 address (valid
+ only if twice-nat session)
+ @param ext_host_nat_port - post-NAT external host port (valid only if
+ twice-nat session)
+ @param is_timed_out - true, if session is timed out, and false, if session
+ is active
*/
-define nat44_ed_show_fq_options_reply
-{
- option in_progress;
+define nat44_user_session_v3_details {
u32 context;
- i32 retval;
- u32 frame_queue_nelts;
+ vl_api_ip4_address_t outside_ip_address;
+ u16 outside_port;
+ vl_api_ip4_address_t inside_ip_address;
+ u16 inside_port;
+ u16 protocol;
+ vl_api_nat_config_flags_t flags;
+ u64 last_heard;
+ u64 time_since_last_heard;
+ u64 total_bytes;
+ u32 total_pkts;
+ vl_api_ip4_address_t ext_host_address;
+ u16 ext_host_port;
+ vl_api_ip4_address_t ext_host_nat_address;
+ u16 ext_host_nat_port;
+ bool is_timed_out;
+};
+
+/** \brief NAT44 user's sessions
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param ip_address - IPv4 address of the user to dump
+ @param vrf_id - VRF_ID
+*/
+define nat44_user_session_v3_dump {
+ u32 client_index;
+ u32 context;
+ vl_api_ip4_address_t ip_address;
+ u32 vrf_id;
};
diff --git a/src/plugins/nat/nat44-ed/nat44_ed.c b/src/plugins/nat/nat44-ed/nat44_ed.c
index 4e13907a9d8..08e577747c3 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed.c
@@ -1,6 +1,4 @@
/*
- * snat.c - simple nat plugin
- *
* Copyright (c) 2016 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -28,15 +26,17 @@
#include <vppinfra/bihash_16_8.h>
#include <nat/lib/log.h>
-#include <nat/lib/nat_syslog.h>
#include <nat/lib/nat_inlines.h>
#include <nat/lib/ipfix_logging.h>
+#include <vnet/syslog/syslog.h>
+#include <nat/lib/nat_syslog_constants.h>
+#include <nat/lib/nat_syslog.h>
#include <nat/nat44-ed/nat44_ed.h>
#include <nat/nat44-ed/nat44_ed_affinity.h>
#include <nat/nat44-ed/nat44_ed_inlines.h>
-#include <vpp/stats/stat_segment.h>
+#include <vlib/stats/stats.h>
snat_main_t snat_main;
@@ -59,7 +59,7 @@ static_always_inline void nat_validate_interface_counters (snat_main_t *sm,
if (PREDICT_FALSE (sm->enabled)) \
{ \
nat_log_err ("plugin enabled"); \
- return 1; \
+ return VNET_API_ERROR_FEATURE_ALREADY_ENABLED; \
} \
} \
while (0)
@@ -71,12 +71,11 @@ static_always_inline void nat_validate_interface_counters (snat_main_t *sm,
if (PREDICT_FALSE (!sm->enabled)) \
{ \
nat_log_err ("plugin disabled"); \
- return 1; \
+ return VNET_API_ERROR_FEATURE_ALREADY_DISABLED; \
} \
} \
while (0)
-/* Hook up input features */
VNET_FEATURE_INIT (nat_pre_in2out, static) = {
.arc_name = "ip4-unicast",
.node_name = "nat-pre-in2out",
@@ -90,6 +89,18 @@ VNET_FEATURE_INIT (nat_pre_out2in, static) = {
"ip4-dhcp-client-detect",
"ip4-sv-reassembly-feature"),
};
+VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "nat44-ed-classify",
+ .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
+ "ip4-sv-reassembly-feature"),
+};
+VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "nat44-handoff-classify",
+ .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
+ "ip4-sv-reassembly-feature"),
+};
VNET_FEATURE_INIT (snat_in2out_worker_handoff, static) = {
.arc_name = "ip4-unicast",
.node_name = "nat44-in2out-worker-handoff",
@@ -101,17 +112,6 @@ VNET_FEATURE_INIT (snat_out2in_worker_handoff, static) = {
.runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
"ip4-dhcp-client-detect"),
};
-VNET_FEATURE_INIT (ip4_snat_in2out, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "nat44-in2out",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
-};
-VNET_FEATURE_INIT (ip4_snat_out2in, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "nat44-out2in",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
- "ip4-dhcp-client-detect"),
-};
VNET_FEATURE_INIT (ip4_nat44_ed_in2out, static) = {
.arc_name = "ip4-unicast",
.node_name = "nat44-ed-in2out",
@@ -123,32 +123,9 @@ VNET_FEATURE_INIT (ip4_nat44_ed_out2in, static) = {
.runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
"ip4-dhcp-client-detect"),
};
-VNET_FEATURE_INIT (ip4_nat44_ed_classify, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "nat44-ed-classify",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
-};
-VNET_FEATURE_INIT (ip4_nat_handoff_classify, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "nat44-handoff-classify",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
-};
-VNET_FEATURE_INIT (ip4_snat_in2out_fast, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "nat44-in2out-fast",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"),
-};
-VNET_FEATURE_INIT (ip4_snat_out2in_fast, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "nat44-out2in-fast",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature",
- "ip4-dhcp-client-detect"),
-};
-
-/* Hook up output features */
-VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = {
+VNET_FEATURE_INIT (nat_pre_in2out_output, static) = {
.arc_name = "ip4-output",
- .node_name = "nat44-in2out-output",
+ .node_name = "nat-pre-in2out-output",
.runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
.runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
};
@@ -158,12 +135,6 @@ VNET_FEATURE_INIT (ip4_snat_in2out_output_worker_handoff, static) = {
.runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
.runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
};
-VNET_FEATURE_INIT (nat_pre_in2out_output, static) = {
- .arc_name = "ip4-output",
- .node_name = "nat-pre-in2out-output",
- .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"),
- .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"),
-};
VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = {
.arc_name = "ip4-output",
.node_name = "nat44-ed-in2out-output",
@@ -176,156 +147,375 @@ VLIB_PLUGIN_REGISTER () = {
.description = "Network Address Translation (NAT)",
};
-static void nat44_ed_db_init (u32 translations, u32 translation_buckets);
-
+static void nat44_ed_db_init ();
static void nat44_ed_db_free ();
+static void nat44_ed_worker_db_free (snat_main_per_thread_data_t *tsm);
+
+static int nat44_ed_add_static_mapping_internal (
+ ip4_address_t l_addr, ip4_address_t e_addr, u16 l_port, u16 e_port,
+ ip_protocol_t proto, u32 vrf_id, u32 sw_if_index, u32 flags,
+ ip4_address_t pool_addr, u8 *tag);
+static int nat44_ed_del_static_mapping_internal (ip4_address_t l_addr,
+ ip4_address_t e_addr,
+ u16 l_port, u16 e_port,
+ ip_protocol_t proto,
+ u32 vrf_id, u32 flags);
u32 nat_calc_bihash_buckets (u32 n_elts);
-u8 *
-format_session_kvp (u8 * s, va_list * args)
+static_always_inline int
+nat44_ed_sm_i2o_add (snat_main_t *sm, snat_static_mapping_t *m,
+ ip4_address_t addr, u16 port, u32 fib_index, u8 proto)
{
- clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
+ ASSERT (!pool_is_free (sm->static_mappings, m));
+ clib_bihash_kv_16_8_t kv;
+ nat44_ed_sm_init_i2o_kv (&kv, addr.as_u32, port, fib_index, proto,
+ m - sm->static_mappings);
+ return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, 1 /*is_add*/);
+}
- s = format (s, "%U thread-index %llu session-index %llu", format_snat_key,
- v->key, nat_value_get_thread_index (v),
- nat_value_get_session_index (v));
+static_always_inline int
+nat44_ed_sm_i2o_del (snat_main_t *sm, ip4_address_t addr, u16 port,
+ u32 fib_index, u8 proto)
+{
+ clib_bihash_kv_16_8_t kv;
+ nat44_ed_sm_init_i2o_k (&kv, addr.as_u32, port, fib_index, proto);
+ return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, 0 /*is_add*/);
+}
- return s;
+static_always_inline int
+nat44_ed_sm_o2i_add (snat_main_t *sm, snat_static_mapping_t *m,
+ ip4_address_t addr, u16 port, u32 fib_index, u8 proto)
+{
+ ASSERT (!pool_is_free (sm->static_mappings, m));
+ clib_bihash_kv_16_8_t kv;
+ nat44_ed_sm_init_o2i_kv (&kv, addr.as_u32, port, fib_index, proto,
+ m - sm->static_mappings);
+ return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, 1 /*is_add*/);
}
-u8 *
-format_static_mapping_kvp (u8 * s, va_list * args)
+static_always_inline int
+nat44_ed_sm_o2i_del (snat_main_t *sm, ip4_address_t addr, u16 port,
+ u32 fib_index, u8 proto)
{
- clib_bihash_kv_8_8_t *v = va_arg (*args, clib_bihash_kv_8_8_t *);
+ clib_bihash_kv_16_8_t kv;
+ nat44_ed_sm_init_o2i_k (&kv, addr.as_u32, port, fib_index, proto);
+ return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, 0 /*is_add*/);
+}
+
+void
+nat44_ed_free_session_data (snat_main_t *sm, snat_session_t *s,
+ u32 thread_index, u8 is_ha)
+{
+ per_vrf_sessions_unregister_session (s, thread_index);
+
+ if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 0))
+ nat_elog_warn (sm, "flow hash del failed");
- s = format (s, "%U static-mapping-index %llu",
- format_snat_key, v->key, v->value);
+ if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0))
+ nat_elog_warn (sm, "flow hash del failed");
- return s;
+ if (na44_ed_is_fwd_bypass_session (s))
+ {
+ return;
+ }
+
+ if (nat44_ed_is_affinity_session (s))
+ nat_affinity_unlock (s->ext_host_addr, s->out2in.addr, s->proto,
+ s->out2in.port);
+
+ if (!is_ha)
+ nat_syslog_nat44_sdel (0, s->in2out.fib_index, &s->in2out.addr,
+ s->in2out.port, &s->ext_host_nat_addr,
+ s->ext_host_nat_port, &s->out2in.addr,
+ s->out2in.port, &s->ext_host_addr, s->ext_host_port,
+ s->proto, nat44_ed_is_twice_nat_session (s));
+
+ if (!is_ha)
+ {
+ /* log NAT event */
+ nat_ipfix_logging_nat44_ses_delete (
+ thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
+ s->in2out.port, s->out2in.port, s->in2out.fib_index);
+ }
}
-u8 *
-format_ed_session_kvp (u8 * s, va_list * args)
+static ip_interface_address_t *
+nat44_ed_get_ip_interface_address (u32 sw_if_index, ip4_address_t addr)
{
- clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
+ snat_main_t *sm = &snat_main;
+
+ ip_lookup_main_t *lm = &sm->ip4_main->lookup_main;
+ ip_interface_address_t *ia;
+ ip4_address_t *ip4a;
+
+ foreach_ip_interface_address (
+ lm, ia, sw_if_index, 1, ({
+ ip4a = ip_interface_address_get_address (lm, ia);
+ nat_log_debug ("sw_if_idx: %u addr: %U ? %U", sw_if_index,
+ format_ip4_address, ip4a, format_ip4_address, &addr);
+ if (ip4a->as_u32 == addr.as_u32)
+ {
+ return ia;
+ }
+ }));
+ return NULL;
+}
- u8 proto;
- u16 r_port, l_port;
- ip4_address_t l_addr, r_addr;
+static int
+nat44_ed_resolve_nat_addr_len (snat_address_t *ap,
+ snat_interface_t *interfaces)
+{
+ ip_interface_address_t *ia;
+ snat_interface_t *i;
u32 fib_index;
- split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port);
- s = format (s,
- "local %U:%d remote %U:%d proto %U fib %d thread-index %u "
- "session-index %u",
- format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port),
- format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port),
- format_ip_protocol, proto, fib_index,
- ed_value_get_thread_index (v), ed_value_get_session_index (v));
+ pool_foreach (i, interfaces)
+ {
+ if (!nat44_ed_is_interface_outside (i))
+ {
+ continue;
+ }
- return s;
+ fib_index = ip4_fib_table_get_index_for_sw_if_index (i->sw_if_index);
+ if (fib_index != ap->fib_index)
+ {
+ continue;
+ }
+
+ if ((ia = nat44_ed_get_ip_interface_address (i->sw_if_index, ap->addr)))
+ {
+ ap->addr_len = ia->address_length;
+ ap->sw_if_index = i->sw_if_index;
+ ap->net.as_u32 = ap->addr.as_u32 & ip4_main.fib_masks[ap->addr_len];
+
+ nat_log_debug ("pool addr %U binds to -> sw_if_idx: %u net: %U/%u",
+ format_ip4_address, &ap->addr, ap->sw_if_index,
+ format_ip4_address, &ap->net, ap->addr_len);
+ return 0;
+ }
+ }
+ return 1;
}
-void
-nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index,
- u8 is_ha)
+static void
+nat44_ed_update_outside_if_addresses (snat_address_t *ap)
{
- per_vrf_sessions_unregister_session (s, thread_index);
-
- if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 0))
- nat_elog_warn (sm, "flow hash del failed");
+ snat_main_t *sm = &snat_main;
- if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0))
- nat_elog_warn (sm, "flow hash del failed");
+ if (!nat44_ed_resolve_nat_addr_len (ap, sm->interfaces))
+ {
+ return;
+ }
- if (is_fwd_bypass_session (s))
+ if (!nat44_ed_resolve_nat_addr_len (ap, sm->output_feature_interfaces))
{
return;
}
+}
+
+static void
+nat44_ed_bind_if_addr_to_nat_addr (u32 sw_if_index)
+{
+ snat_main_t *sm = &snat_main;
+ ip_interface_address_t *ia;
+ snat_address_t *ap;
+
+ u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
+
+ vec_foreach (ap, sm->addresses)
+ {
+ if (fib_index != ap->fib_index)
+ {
+ continue;
+ }
+
+ if ((ia = nat44_ed_get_ip_interface_address (sw_if_index, ap->addr)))
+ {
+ ap->addr_len = ia->address_length;
+ ap->sw_if_index = sw_if_index;
+ ap->net.as_u32 = ap->addr.as_u32 & ip4_main.fib_masks[ap->addr_len];
+
+ nat_log_debug ("pool addr %U binds to -> sw_if_idx: %u net: %U/%u",
+ format_ip4_address, &ap->addr, ap->sw_if_index,
+ format_ip4_address, &ap->net, ap->addr_len);
+ return;
+ }
+ }
+}
- if (is_affinity_sessions (s))
- nat_affinity_unlock (s->ext_host_addr, s->out2in.addr,
- s->nat_proto, s->out2in.port);
+static_always_inline snat_fib_entry_reg_t *
+nat44_ed_get_fib_entry_reg (ip4_address_t addr, u32 sw_if_index, int *out_idx)
+{
+ snat_main_t *sm = &snat_main;
+ snat_fib_entry_reg_t *fe;
+ int i;
- if (!is_ha)
- nat_syslog_nat44_sdel (
- 0, s->in2out.fib_index, &s->in2out.addr, s->in2out.port,
- &s->ext_host_nat_addr, s->ext_host_nat_port, &s->out2in.addr,
- s->out2in.port, &s->ext_host_addr, s->ext_host_port, s->nat_proto,
- is_twice_nat_session (s));
+ for (i = 0; i < vec_len (sm->fib_entry_reg); i++)
+ {
+ fe = sm->fib_entry_reg + i;
+ if ((addr.as_u32 == fe->addr.as_u32) && (sw_if_index == fe->sw_if_index))
+ {
+ if (out_idx)
+ {
+ *out_idx = i;
+ }
+ return fe;
+ }
+ }
+ return NULL;
+}
- if (snat_is_unk_proto_session (s))
- return;
+static void
+nat44_ed_add_fib_entry_reg (ip4_address_t addr, u32 sw_if_index)
+{
+ // Add the external NAT address to the FIB as receive entries. This ensures
+ // that VPP will reply to ARP for this address and we don't need to enable
+ // proxy ARP on the outside interface.
+ snat_main_t *sm = &snat_main;
+ snat_fib_entry_reg_t *fe;
- if (!is_ha)
+ if (!(fe = nat44_ed_get_fib_entry_reg (addr, sw_if_index, 0)))
{
- /* log NAT event */
- nat_ipfix_logging_nat44_ses_delete (thread_index,
- s->in2out.addr.as_u32,
- s->out2in.addr.as_u32,
- s->nat_proto,
- s->in2out.port,
- s->out2in.port,
- s->in2out.fib_index);
+ fib_prefix_t prefix = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = addr.as_u32,
+ },
+ };
+ u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
+ fib_table_entry_update_one_path (fib_index, &prefix, sm->fib_src_low,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL |
+ FIB_ENTRY_FLAG_EXCLUSIVE),
+ DPO_PROTO_IP4, NULL, sw_if_index, ~0, 1,
+ NULL, FIB_ROUTE_PATH_FLAG_NONE);
+
+ vec_add2 (sm->fib_entry_reg, fe, 1);
+ clib_memset (fe, 0, sizeof (*fe));
+ fe->addr.as_u32 = addr.as_u32;
+ fe->sw_if_index = sw_if_index;
+ }
+ fe->count++;
+}
+
+static void
+nat44_ed_del_fib_entry_reg (ip4_address_t addr, u32 sw_if_index)
+{
+ snat_main_t *sm = &snat_main;
+ snat_fib_entry_reg_t *fe;
+ int i;
+
+ if ((fe = nat44_ed_get_fib_entry_reg (addr, sw_if_index, &i)))
+ {
+ fe->count--;
+ if (0 == fe->count)
+ {
+ fib_prefix_t prefix = {
+ .fp_len = 32,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4.as_u32 = addr.as_u32,
+ },
+ };
+ u32 fib_index =
+ ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
+ fib_table_entry_delete (fib_index, &prefix, sm->fib_src_low);
+ vec_del1 (sm->fib_entry_reg, i);
+ }
}
+}
- /* Twice NAT address and port for external host */
- if (is_twice_nat_session (s))
+static void
+nat44_ed_add_del_interface_fib_reg_entries (ip4_address_t addr, u8 is_add)
+{
+ snat_main_t *sm = &snat_main;
+ snat_interface_t *i;
+
+ pool_foreach (i, sm->interfaces)
+ {
+ if (nat44_ed_is_interface_outside (i))
+ {
+ if (is_add)
+ {
+ nat44_ed_add_fib_entry_reg (addr, i->sw_if_index);
+ }
+ else
+ {
+ nat44_ed_del_fib_entry_reg (addr, i->sw_if_index);
+ }
+ }
+ }
+ pool_foreach (i, sm->output_feature_interfaces)
{
- snat_free_outside_address_and_port (sm->twice_nat_addresses,
- thread_index,
- &s->ext_host_nat_addr,
- s->ext_host_nat_port, s->nat_proto);
+ if (nat44_ed_is_interface_outside (i))
+ {
+ if (is_add)
+ {
+ nat44_ed_add_fib_entry_reg (addr, i->sw_if_index);
+ }
+ else
+ {
+ nat44_ed_del_fib_entry_reg (addr, i->sw_if_index);
+ }
+ }
}
+}
- if (snat_is_session_static (s))
- return;
+static_always_inline void
+nat44_ed_add_del_nat_addr_fib_reg_entries (u32 sw_if_index, u8 is_add)
+{
+ snat_main_t *sm = &snat_main;
+ snat_address_t *ap;
- snat_free_outside_address_and_port (sm->addresses, thread_index,
- &s->out2in.addr, s->out2in.port,
- s->nat_proto);
+ vec_foreach (ap, sm->addresses)
+ {
+ if (is_add)
+ {
+ nat44_ed_add_fib_entry_reg (ap->addr, sw_if_index);
+ }
+ else
+ {
+ nat44_ed_del_fib_entry_reg (ap->addr, sw_if_index);
+ }
+ }
}
-void
-snat_add_del_addr_to_fib (ip4_address_t * addr, u8 p_len, u32 sw_if_index,
- int is_add)
+static_always_inline void
+nat44_ed_add_del_sm_fib_reg_entries (u32 sw_if_index, u8 is_add)
{
snat_main_t *sm = &snat_main;
- fib_prefix_t prefix = {
- .fp_len = p_len,
- .fp_proto = FIB_PROTOCOL_IP4,
- .fp_addr = {
- .ip4.as_u32 = addr->as_u32,
- },
- };
- u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
+ snat_static_mapping_t *m;
- if (is_add)
- fib_table_entry_update_one_path (fib_index,
- &prefix,
- sm->fib_src_low,
- (FIB_ENTRY_FLAG_CONNECTED |
- FIB_ENTRY_FLAG_LOCAL |
- FIB_ENTRY_FLAG_EXCLUSIVE),
- DPO_PROTO_IP4,
- NULL,
- sw_if_index,
- ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
- else
- fib_table_entry_delete (fib_index, &prefix, sm->fib_src_low);
+ pool_foreach (m, sm->static_mappings)
+ {
+ if (is_add)
+ {
+ nat44_ed_add_fib_entry_reg (m->external_addr, sw_if_index);
+ }
+ else
+ {
+ nat44_ed_del_fib_entry_reg (m->external_addr, sw_if_index);
+ }
+ }
}
int
-snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id,
- u8 twice_nat)
+nat44_ed_add_address (ip4_address_t *addr, u32 vrf_id, u8 twice_nat)
{
- snat_address_t *ap;
- snat_interface_t *i;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
+ snat_main_t *sm = &snat_main;
+ snat_address_t *ap, *addresses;
+
+ addresses = twice_nat ? sm->twice_nat_addresses : sm->addresses;
- /* Check if address already exists */
- vec_foreach (ap, twice_nat ? sm->twice_nat_addresses : sm->addresses)
+ if (!sm->enabled)
+ {
+ return VNET_API_ERROR_UNSUPPORTED;
+ }
+
+ // check if address already exists
+ vec_foreach (ap, addresses)
{
if (ap->addr.as_u32 == addr->as_u32)
{
@@ -335,62 +525,231 @@ snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id,
}
if (twice_nat)
- vec_add2 (sm->twice_nat_addresses, ap, 1);
+ {
+ vec_add2 (sm->twice_nat_addresses, ap, 1);
+ }
else
- vec_add2 (sm->addresses, ap, 1);
+ {
+ vec_add2 (sm->addresses, ap, 1);
+ }
+ ap->addr_len = ~0;
+ ap->fib_index = ~0;
ap->addr = *addr;
+
if (vrf_id != ~0)
- ap->fib_index =
- fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
- sm->fib_src_low);
+ {
+ ap->fib_index = fib_table_find_or_create_and_lock (
+ FIB_PROTOCOL_IP4, vrf_id, sm->fib_src_low);
+ }
+
+ if (!twice_nat)
+ {
+ // if we don't have enabled interface we don't add address
+ // to fib
+ nat44_ed_add_del_interface_fib_reg_entries (*addr, 1);
+ nat44_ed_update_outside_if_addresses (ap);
+ }
+ return 0;
+}
+
+int
+nat44_ed_del_address (ip4_address_t addr, u8 twice_nat)
+{
+ snat_main_t *sm = &snat_main;
+ snat_address_t *a = 0, *addresses;
+ snat_session_t *ses;
+ u32 *ses_to_be_removed = 0, *ses_index;
+ snat_main_per_thread_data_t *tsm;
+ int j;
+
+ addresses = twice_nat ? sm->twice_nat_addresses : sm->addresses;
+
+ for (j = 0; j < vec_len (addresses); j++)
+ {
+ if (addresses[j].addr.as_u32 == addr.as_u32)
+ {
+ a = addresses + j;
+ break;
+ }
+ }
+ if (!a)
+ {
+ nat_log_err ("no such address");
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ // delete dynamic sessions only
+ vec_foreach (tsm, sm->per_thread_data)
+ {
+ pool_foreach (ses, tsm->sessions)
+ {
+ if (ses->flags & SNAT_SESSION_FLAG_STATIC_MAPPING)
+ {
+ continue;
+ }
+ if (ses->out2in.addr.as_u32 == addr.as_u32)
+ {
+ nat44_ed_free_session_data (sm, ses, tsm - sm->per_thread_data,
+ 0);
+ vec_add1 (ses_to_be_removed, ses - tsm->sessions);
+ }
+ }
+ vec_foreach (ses_index, ses_to_be_removed)
+ {
+ ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
+ nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1);
+ }
+ vec_free (ses_to_be_removed);
+ }
+
+ if (!twice_nat)
+ {
+ nat44_ed_add_del_interface_fib_reg_entries (addr, 0);
+ }
+
+ if (a->fib_index != ~0)
+ {
+ fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
+ }
+
+ if (!twice_nat)
+ {
+ vec_del1 (sm->addresses, j);
+ }
else
- ap->fib_index = ~0;
+ {
+ vec_del1 (sm->twice_nat_addresses, j);
+ }
- #define _(N, i, n, s) \
- clib_memset(ap->busy_##n##_port_refcounts, 0, sizeof(ap->busy_##n##_port_refcounts));\
- ap->busy_##n##_ports = 0; \
- ap->busy_##n##_ports_per_thread = 0;\
- vec_validate_init_empty (ap->busy_##n##_ports_per_thread, tm->n_vlib_mains - 1, 0);
- foreach_nat_protocol
- #undef _
+ return 0;
+}
- if (twice_nat)
- return 0;
+vrf_table_t *
+nat44_ed_get_vrf_table (u32 table_vrf_id)
+{
+ snat_main_t *sm = &snat_main;
+ vrf_table_t *t;
- /* Add external address to FIB */
- pool_foreach (i, sm->interfaces)
- {
- if (nat_interface_is_inside (i))
- continue;
+ pool_foreach (t, sm->vrf_tables)
+ {
+ if (table_vrf_id == t->table_vrf_id)
+ {
+ return t;
+ }
+ }
+ return NULL;
+}
- snat_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
- break;
- }
- pool_foreach (i, sm->output_feature_interfaces)
- {
- if (nat_interface_is_inside (i))
- continue;
+vrf_route_t *
+nat44_ed_get_vrf_route (vrf_table_t *t, u32 vrf_id)
+{
+ vrf_route_t *r;
- snat_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
- break;
- }
+ pool_foreach (r, t->routes)
+ {
+ if (vrf_id == r->vrf_id)
+ {
+ return r;
+ }
+ }
+ return NULL;
+}
+
+int
+nat44_ed_add_del_vrf_table (u32 table_vrf_id, bool is_add)
+{
+ snat_main_t *sm = &snat_main;
+ vrf_table_t *t;
+ vrf_route_t *r;
+
+ t = nat44_ed_get_vrf_table (table_vrf_id);
+ if (t)
+ {
+ if (is_add)
+ {
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+ pool_foreach (r, t->routes)
+ {
+ fib_table_unlock (r->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
+ }
+ fib_table_unlock (t->table_fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
+
+ pool_free (t->routes);
+ pool_put (sm->vrf_tables, t);
+ }
+ else
+ {
+ if (!is_add)
+ {
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+ pool_get (sm->vrf_tables, t);
+ clib_memset (t, 0, sizeof (*t));
+ t->table_vrf_id = table_vrf_id;
+ t->table_fib_index = fib_table_find_or_create_and_lock (
+ FIB_PROTOCOL_IP4, table_vrf_id, sm->fib_src_low);
+ }
return 0;
}
-static int
-is_snat_address_used_in_static_mapping (snat_main_t * sm, ip4_address_t addr)
+void
+nat44_ed_del_vrf_tables ()
{
- snat_static_mapping_t *m;
- pool_foreach (m, sm->static_mappings)
- {
- if (is_sm_addr_only (m->flags) || is_sm_out2in_only (m->flags) ||
- is_sm_identity_nat (m->flags))
- continue;
- if (m->external_addr.as_u32 == addr.as_u32)
- return 1;
- }
+ snat_main_t *sm = &snat_main;
+ vrf_table_t *t;
+ vrf_route_t *r;
+
+ pool_foreach (t, sm->vrf_tables)
+ {
+ pool_foreach (r, t->routes)
+ {
+ fib_table_unlock (r->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
+ }
+ fib_table_unlock (t->table_fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
+ pool_free (t->routes);
+ }
+ pool_free (sm->vrf_tables);
+}
+
+int
+nat44_ed_add_del_vrf_route (u32 table_vrf_id, u32 vrf_id, bool is_add)
+{
+ snat_main_t *sm = &snat_main;
+ vrf_table_t *t;
+ vrf_route_t *r;
+
+ t = nat44_ed_get_vrf_table (table_vrf_id);
+ if (!t)
+ {
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ r = nat44_ed_get_vrf_route (t, vrf_id);
+ if (r)
+ {
+ if (is_add)
+ {
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+ fib_table_unlock (r->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
+ pool_put (t->routes, r);
+ }
+ else
+ {
+ if (!is_add)
+ {
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+ pool_get (t->routes, r);
+ clib_memset (r, 0, sizeof (*r));
+ r->vrf_id = vrf_id;
+ r->fib_index = fib_table_find_or_create_and_lock (
+ FIB_PROTOCOL_IP4, vrf_id, sm->fib_src_low);
+ }
+
return 0;
}
@@ -401,9 +760,9 @@ get_thread_idx_by_port (u16 e_port)
u32 thread_idx = sm->num_workers;
if (sm->num_workers > 1)
{
- thread_idx =
- sm->first_worker_index +
- sm->workers[(e_port - 1024) / sm->port_per_thread];
+ thread_idx = sm->first_worker_index +
+ sm->workers[(e_port - ED_USER_PORT_OFFSET) /
+ sm->port_per_thread % _vec_len (sm->workers)];
}
return thread_idx;
}
@@ -427,18 +786,17 @@ nat_ed_static_mapping_del_sessions (snat_main_t * sm,
}
if (!addr_only)
{
- if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
- s->out2in.port != e_port ||
- s->in2out.port != l_port ||
- s->nat_proto != protocol)
- continue;
+ if ((s->out2in.addr.as_u32 != e_addr.as_u32) ||
+ s->out2in.port != e_port || s->in2out.port != l_port ||
+ s->proto != protocol)
+ continue;
}
- if (is_lb_session (s))
+ if (nat44_ed_is_lb_session (s))
continue;
- if (!snat_is_session_static (s))
+ if (!nat44_ed_is_session_static (s))
continue;
- nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
+ nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
vec_add1 (indexes_to_free, s - tsm->sessions);
if (!addr_only)
break;
@@ -452,118 +810,50 @@ nat_ed_static_mapping_del_sessions (snat_main_t * sm,
vec_free (indexes_to_free);
}
-int
-nat44_ed_reserve_port (ip4_address_t addr, u16 port, nat_protocol_t proto)
+static_always_inline snat_static_mapping_t *
+nat44_ed_sm_lookup (snat_main_t *sm, clib_bihash_kv_16_8_t *kv)
{
- u32 ti = get_thread_idx_by_port (port);
- snat_main_t *sm = &snat_main;
- snat_address_t *a = 0;
- int i;
-
- for (i = 0; i < vec_len (sm->addresses); i++)
+ clib_bihash_kv_16_8_t v;
+ int rc = clib_bihash_search_16_8 (&sm->flow_hash, kv, &v);
+ if (!rc)
{
- a = sm->addresses + i;
-
- if (a->addr.as_u32 != addr.as_u32)
- continue;
-
- switch (proto)
- {
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_port_refcounts[port]) \
- goto done; \
- ++a->busy_##n##_port_refcounts[port]; \
- if (port > 1024) \
- { \
- a->busy_##n##_ports++; \
- a->busy_##n##_ports_per_thread[ti]++; \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (sm, "unknown protocol");
- goto done;
- }
-
- return 0;
+ ASSERT (0 == ed_value_get_thread_index (&v));
+ return pool_elt_at_index (sm->static_mappings,
+ ed_value_get_session_index (&v));
}
-
-done:
- return 1;
+ return NULL;
}
-int
-nat44_ed_free_port (ip4_address_t addr, u16 port, nat_protocol_t proto)
+snat_static_mapping_t *
+nat44_ed_sm_o2i_lookup (snat_main_t *sm, ip4_address_t addr, u16 port,
+ u32 fib_index, u8 proto)
{
- u32 ti = get_thread_idx_by_port (port);
- snat_main_t *sm = &snat_main;
- snat_address_t *a = 0;
- int i;
-
- for (i = 0; i < vec_len (sm->addresses); i++)
- {
- a = sm->addresses + i;
-
- if (a->addr.as_u32 != addr.as_u32)
- continue;
-
- switch (proto)
- {
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- --a->busy_##n##_port_refcounts[port]; \
- if (port > 1024) \
- { \
- a->busy_##n##_ports--; \
- a->busy_##n##_ports_per_thread[ti]--; \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (sm, "unknown protocol");
- goto done;
- }
-
- return 0;
- }
-
-done:
- return 1;
+ clib_bihash_kv_16_8_t kv;
+ nat44_ed_sm_init_o2i_k (&kv, addr.as_u32, port, fib_index, proto);
+ return nat44_ed_sm_lookup (sm, &kv);
}
-void
-nat44_ed_add_resolve_record (ip4_address_t l_addr, u16 l_port, u16 e_port,
- nat_protocol_t proto, u32 vrf_id, u32 sw_if_index,
- u32 flags, ip4_address_t pool_addr, u8 *tag)
+snat_static_mapping_t *
+nat44_ed_sm_i2o_lookup (snat_main_t *sm, ip4_address_t addr, u16 port,
+ u32 fib_index, u8 proto)
{
- snat_static_map_resolve_t *rp;
- snat_main_t *sm = &snat_main;
-
- vec_add2 (sm->to_resolve, rp, 1);
- rp->l_addr.as_u32 = l_addr.as_u32;
- rp->l_port = l_port;
- rp->e_port = e_port;
- rp->sw_if_index = sw_if_index;
- rp->vrf_id = vrf_id;
- rp->proto = proto;
- rp->flags = flags;
- rp->pool_addr = pool_addr;
- rp->tag = vec_dup (tag);
+ clib_bihash_kv_16_8_t kv;
+ nat44_ed_sm_init_i2o_k (&kv, addr.as_u32, port, fib_index, proto);
+ return nat44_ed_sm_lookup (sm, &kv);
}
-int
+static snat_static_mapping_resolve_t *
nat44_ed_get_resolve_record (ip4_address_t l_addr, u16 l_port, u16 e_port,
- nat_protocol_t proto, u32 vrf_id, u32 sw_if_index,
- u32 flags, int *out)
+ ip_protocol_t proto, u32 vrf_id, u32 sw_if_index,
+ u32 flags, int *out_idx)
{
- snat_static_map_resolve_t *rp;
+ snat_static_mapping_resolve_t *rp;
snat_main_t *sm = &snat_main;
int i;
- for (i = 0; i < vec_len (sm->to_resolve); i++)
+ for (i = 0; i < vec_len (sm->sm_to_resolve); i++)
{
- rp = sm->to_resolve + i;
+ rp = sm->sm_to_resolve + i;
if (rp->sw_if_index == sw_if_index && rp->vrf_id == vrf_id)
{
@@ -592,27 +882,27 @@ nat44_ed_get_resolve_record (ip4_address_t l_addr, u16 l_port, u16 e_port,
{
continue;
}
- if (out)
+ if (out_idx)
{
- *out = i;
+ *out_idx = i;
}
- return 0;
+ return rp;
}
}
- return 1;
+ return NULL;
}
-int
+static int
nat44_ed_del_resolve_record (ip4_address_t l_addr, u16 l_port, u16 e_port,
- nat_protocol_t proto, u32 vrf_id, u32 sw_if_index,
+ ip_protocol_t proto, u32 vrf_id, u32 sw_if_index,
u32 flags)
{
snat_main_t *sm = &snat_main;
int i;
- if (!nat44_ed_get_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
- sw_if_index, flags, &i))
+ if (nat44_ed_get_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
+ sw_if_index, flags, &i))
{
- vec_del1 (sm->to_resolve, i);
+ vec_del1 (sm->sm_to_resolve, i);
return 0;
}
return 1;
@@ -639,45 +929,98 @@ nat44_ed_validate_sm_input (u32 flags)
int
nat44_ed_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
- u16 l_port, u16 e_port, nat_protocol_t proto,
+ u16 l_port, u16 e_port, ip_protocol_t proto,
u32 vrf_id, u32 sw_if_index, u32 flags,
ip4_address_t pool_addr, u8 *tag)
{
+ snat_static_mapping_resolve_t *rp;
snat_main_t *sm = &snat_main;
- clib_bihash_kv_8_8_t kv, value;
- snat_interface_t *interface;
- nat44_lb_addr_port_t *local;
- snat_static_mapping_t *m;
- u32 fib_index = ~0;
int rv;
+ if (!sm->enabled)
+ {
+ return VNET_API_ERROR_UNSUPPORTED;
+ }
+
rv = nat44_ed_validate_sm_input (flags);
if (rv != 0)
{
return rv;
}
- if (is_sm_addr_only (flags))
+ // interface bound mapping
+ if (is_sm_switch_address (flags))
{
- e_port = l_port = proto = 0;
+ if (nat44_ed_get_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
+ sw_if_index, flags, 0))
+ {
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+
+ vec_add2 (sm->sm_to_resolve, rp, 1);
+ rp->l_addr.as_u32 = l_addr.as_u32;
+ rp->l_port = l_port;
+ rp->e_port = e_port;
+ rp->sw_if_index = sw_if_index;
+ rp->vrf_id = vrf_id;
+ rp->proto = proto;
+ rp->flags = flags;
+ rp->pool_addr = pool_addr;
+ rp->tag = vec_dup (tag);
+ rp->is_resolved = 0;
+
+ ip4_address_t *first_int_addr =
+ ip4_interface_first_address (sm->ip4_main, sw_if_index, 0);
+ if (!first_int_addr)
+ {
+ return 0;
+ }
+
+ e_addr.as_u32 = first_int_addr->as_u32;
+ rp->is_resolved = 1;
}
- if (is_sm_switch_address (flags))
+ rv = nat44_ed_add_static_mapping_internal (l_addr, e_addr, l_port, e_port,
+ proto, vrf_id, sw_if_index, flags,
+ pool_addr, tag);
+ if ((0 != rv) && is_sm_switch_address (flags))
{
- // this mapping is interface bound
- ip4_address_t *first_int_addr;
+ nat44_ed_del_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
+ sw_if_index, flags);
+ }
+
+ return rv;
+}
+
+int
+nat44_ed_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
+ u16 l_port, u16 e_port, ip_protocol_t proto,
+ u32 vrf_id, u32 sw_if_index, u32 flags)
+{
+ snat_main_t *sm = &snat_main;
+ int rv;
+
+ if (!sm->enabled)
+ {
+ return VNET_API_ERROR_UNSUPPORTED;
+ }
+
+ rv = nat44_ed_validate_sm_input (flags);
+ if (rv != 0)
+ {
+ return rv;
+ }
- // check if this record isn't registered for resolve
- if (!nat44_ed_get_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
- sw_if_index, flags, 0))
+ // interface bound mapping
+ if (is_sm_switch_address (flags))
+ {
+ if (nat44_ed_del_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
+ sw_if_index, flags))
{
- return VNET_API_ERROR_VALUE_EXIST;
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
}
- // register record for resolve
- nat44_ed_add_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
- sw_if_index, flags, pool_addr, tag);
- first_int_addr =
+ ip4_address_t *first_int_addr =
ip4_interface_first_address (sm->ip4_main, sw_if_index, 0);
if (!first_int_addr)
{
@@ -688,25 +1031,44 @@ nat44_ed_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
e_addr.as_u32 = first_int_addr->as_u32;
}
+ return nat44_ed_del_static_mapping_internal (l_addr, e_addr, l_port, e_port,
+ proto, vrf_id, flags);
+}
+
+static int
+nat44_ed_add_static_mapping_internal (ip4_address_t l_addr,
+ ip4_address_t e_addr, u16 l_port,
+ u16 e_port, ip_protocol_t proto,
+ u32 vrf_id, u32 sw_if_index, u32 flags,
+ ip4_address_t pool_addr, u8 *tag)
+{
+ snat_main_t *sm = &snat_main;
+ nat44_lb_addr_port_t *local;
+ snat_static_mapping_t *m;
+ u32 fib_index = ~0;
+
+ if (is_sm_addr_only (flags))
+ {
+ e_port = l_port = proto = 0;
+ }
+
if (is_sm_identity_nat (flags))
{
l_port = e_port;
l_addr.as_u32 = e_addr.as_u32;
}
- // fib index 0
- init_nat_k (&kv, e_addr, e_port, 0, proto);
-
- if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+ m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
+ if (m)
{
- m = pool_elt_at_index (sm->static_mappings, value.value);
+ // case:
+ // adding local identity nat record for different vrf table
+
if (!is_sm_identity_nat (m->flags))
{
return VNET_API_ERROR_VALUE_EXIST;
}
- // case:
- // adding local identity nat record for different vrf table
pool_foreach (local, m->locals)
{
if (local->vrf_id == vrf_id)
@@ -721,9 +1083,8 @@ nat44_ed_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
local->fib_index = fib_table_find_or_create_and_lock (
FIB_PROTOCOL_IP4, vrf_id, sm->fib_src_low);
- init_nat_kv (&kv, m->local_addr, m->local_port, local->fib_index,
- m->proto, 0, m - sm->static_mappings);
- clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
+ nat44_ed_sm_i2o_add (sm, m, m->local_addr, m->local_port,
+ local->fib_index, m->proto);
return 0;
}
@@ -745,28 +1106,12 @@ nat44_ed_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
// identity nat supports multiple records in local mapping
if (!(is_sm_out2in_only (flags) || is_sm_identity_nat (flags)))
{
- init_nat_k (&kv, l_addr, l_port, fib_index, proto);
- if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
+ if (nat44_ed_sm_i2o_lookup (sm, l_addr, l_port, fib_index, proto))
{
return VNET_API_ERROR_VALUE_EXIST;
}
}
- if (!(is_sm_out2in_only (flags) || is_sm_addr_only (flags) ||
- sm->static_mapping_only))
- {
- if (nat44_ed_reserve_port (e_addr, e_port, proto))
- {
- // remove resolve record
- if (is_sm_switch_address (flags) && !is_sm_identity_nat (flags))
- {
- nat44_ed_del_resolve_record (l_addr, l_port, e_port, proto,
- vrf_id, sw_if_index, flags);
- }
- return VNET_API_ERROR_NO_SUCH_ENTRY;
- }
- }
-
pool_get (sm->static_mappings, m);
clib_memset (m, 0, sizeof (*m));
@@ -774,13 +1119,9 @@ nat44_ed_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
m->local_addr = l_addr;
m->external_addr = e_addr;
+ m->pool_addr = pool_addr;
m->tag = vec_dup (tag);
- if (is_sm_exact_address (flags) && is_sm_twice_nat (flags))
- {
- m->pool_addr = pool_addr;
- }
-
if (!is_sm_addr_only (flags))
{
m->local_port = l_port;
@@ -803,14 +1144,11 @@ nat44_ed_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
if (!is_sm_out2in_only (flags))
{
- init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto, 0,
- m - sm->static_mappings);
- clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
+ nat44_ed_sm_i2o_add (sm, m, m->local_addr, m->local_port, fib_index,
+ m->proto);
}
- init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
- m - sm->static_mappings);
- clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1);
+ nat44_ed_sm_o2i_add (sm, m, m->external_addr, m->external_port, 0, m->proto);
if (sm->num_workers > 1)
{
@@ -824,79 +1162,29 @@ nat44_ed_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
vec_add1 (m->workers, worker_index);
}
- if (is_sm_identity_nat (flags) || !is_sm_addr_only (flags))
- return 0;
-
- pool_foreach (interface, sm->interfaces)
- {
- if (nat_interface_is_inside (interface))
- continue;
-
- snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, 1);
- break;
- }
-
- pool_foreach (interface, sm->output_feature_interfaces)
- {
- if (nat_interface_is_inside (interface))
- continue;
-
- snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, 1);
- break;
- }
+ nat44_ed_add_del_interface_fib_reg_entries (e_addr, 1);
return 0;
}
-int
-nat44_ed_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
- u16 l_port, u16 e_port, nat_protocol_t proto,
- u32 vrf_id, u32 sw_if_index, u32 flags)
+static int
+nat44_ed_del_static_mapping_internal (ip4_address_t l_addr,
+ ip4_address_t e_addr, u16 l_port,
+ u16 e_port, ip_protocol_t proto,
+ u32 vrf_id, u32 flags)
{
snat_main_per_thread_data_t *tsm;
snat_main_t *sm = &snat_main;
- clib_bihash_kv_8_8_t kv, value;
- snat_interface_t *interface;
nat44_lb_addr_port_t *local;
snat_static_mapping_t *m;
u32 fib_index = ~0;
- int rv;
-
- rv = nat44_ed_validate_sm_input (flags);
- if (rv != 0)
- {
- return rv;
- }
if (is_sm_addr_only (flags))
{
e_port = l_port = proto = 0;
}
- if (is_sm_switch_address (flags))
- {
- // this mapping is interface bound
- ip4_address_t *first_int_addr;
-
- // delete record registered for resolve
- if (nat44_ed_del_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
- sw_if_index, flags))
- {
- return VNET_API_ERROR_NO_SUCH_ENTRY;
- }
-
- first_int_addr =
- ip4_interface_first_address (sm->ip4_main, sw_if_index, 0);
- if (!first_int_addr)
- {
- // dhcp resolution required
- return 0;
- }
-
- e_addr.as_u32 = first_int_addr->as_u32;
- }
-
if (is_sm_identity_nat (flags))
{
l_port = e_port;
@@ -904,24 +1192,17 @@ nat44_ed_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
}
// fib index 0
- init_nat_k (&kv, e_addr, e_port, 0, proto);
-
- if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+ m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
+ if (!m)
{
- if (is_sm_switch_address (flags))
- {
- return 0;
- }
return VNET_API_ERROR_NO_SUCH_ENTRY;
}
- m = pool_elt_at_index (sm->static_mappings, value.value);
-
if (is_sm_identity_nat (flags))
{
- u8 failure = 1;
+ u8 found = 0;
- if (!is_sm_switch_address (flags))
+ if (vrf_id == ~0)
{
vrf_id = sm->inside_vrf_id;
}
@@ -933,11 +1214,11 @@ nat44_ed_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
local = pool_elt_at_index (m->locals, local - m->locals);
fib_index = local->fib_index;
pool_put (m->locals, local);
- failure = 0;
+ found = 1;
}
}
- if (failure)
+ if (!found)
{
return VNET_API_ERROR_NO_SUCH_ENTRY;
}
@@ -947,66 +1228,38 @@ nat44_ed_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
fib_index = m->fib_index;
}
- if (!(is_sm_out2in_only (flags) || is_sm_addr_only (flags) ||
- sm->static_mapping_only))
+ if (!is_sm_out2in_only (flags))
{
- if (nat44_ed_free_port (e_addr, e_port, proto))
- {
- return VNET_API_ERROR_INVALID_VALUE;
- }
+ nat44_ed_sm_i2o_del (sm, l_addr, l_port, fib_index, proto);
}
- if (!is_sm_out2in_only (flags))
+ // delete sessions for static mapping
+ if (sm->num_workers > 1)
{
- init_nat_k (&kv, l_addr, l_port, fib_index, proto);
- clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0);
+ tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
}
-
- if (!sm->static_mapping_only || sm->static_mapping_connection_tracking)
+ else
{
- // delete sessions for static mapping
- if (sm->num_workers > 1)
- tsm = vec_elt_at_index (sm->per_thread_data, m->workers[0]);
- else
- tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
-
- nat_ed_static_mapping_del_sessions (
- sm, tsm, m->local_addr, m->local_port, m->proto, fib_index,
- is_sm_addr_only (flags), e_addr, e_port);
+ tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
}
- fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
-
- if (pool_elts (m->locals))
- return 0;
-
- // fib_index 0
- init_nat_k (&kv, e_addr, e_port, 0, proto);
- clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0);
-
- vec_free (m->tag);
- vec_free (m->workers);
- pool_put (sm->static_mappings, m);
+ nat_ed_static_mapping_del_sessions (sm, tsm, m->local_addr, m->local_port,
+ m->proto, fib_index,
+ is_sm_addr_only (flags), e_addr, e_port);
- if (is_sm_identity_nat (flags) || !is_sm_addr_only (flags))
- return 0;
+ fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
- pool_foreach (interface, sm->interfaces)
+ if (!pool_elts (m->locals))
{
- if (nat_interface_is_inside (interface))
- continue;
+ // this is last record remove all required stuff
+ // fib_index 0
+ nat44_ed_sm_o2i_del (sm, e_addr, e_port, 0, proto);
- snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, 0);
- break;
- }
-
- pool_foreach (interface, sm->output_feature_interfaces)
- {
- if (nat_interface_is_inside (interface))
- continue;
+ vec_free (m->tag);
+ vec_free (m->workers);
+ pool_put (sm->static_mappings, m);
- snat_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index, 0);
- break;
+ nat44_ed_add_del_interface_fib_reg_entries (e_addr, 0);
}
return 0;
@@ -1014,66 +1267,59 @@ nat44_ed_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
int
nat44_ed_add_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
- nat_protocol_t proto,
+ ip_protocol_t proto,
nat44_lb_addr_port_t *locals, u32 flags,
u8 *tag, u32 affinity)
{
snat_main_t *sm = &snat_main;
snat_static_mapping_t *m;
- clib_bihash_kv_8_8_t kv, value;
snat_address_t *a = 0;
nat44_lb_addr_port_t *local;
uword *bitmap = 0;
+ int rc = 0;
int i;
- init_nat_k (&kv, e_addr, e_port, 0, proto);
- if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
- m = 0;
- else
- m = pool_elt_at_index (sm->static_mappings, value.value);
+ if (!sm->enabled)
+ {
+ return VNET_API_ERROR_UNSUPPORTED;
+ }
+
+ m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
if (m)
- return VNET_API_ERROR_VALUE_EXIST;
+ {
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
if (vec_len (locals) < 2)
- return VNET_API_ERROR_INVALID_VALUE;
+ {
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
- /* Find external address in allocated addresses and reserve port for
- address and port pair mapping when dynamic translations enabled */
- if (!(sm->static_mapping_only || is_sm_out2in_only (flags)))
+ if (!is_sm_out2in_only (flags))
{
+ /* Find external address in allocated addresses and reserve port for
+ address and port pair mapping when dynamic translations enabled */
for (i = 0; i < vec_len (sm->addresses); i++)
{
if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
{
- a = sm->addresses + i;
/* External port must be unused */
- switch (proto)
+ a = sm->addresses + i;
+ if (nat44_ed_sm_o2i_lookup (sm, a->addr, e_port, 0, proto))
{
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_port_refcounts[e_port]) \
- return VNET_API_ERROR_INVALID_VALUE; \
- ++a->busy_##n##_port_refcounts[e_port]; \
- if (e_port > 1024) \
- { \
- a->busy_##n##_ports++; \
- a->busy_##n##_ports_per_thread[get_thread_idx_by_port (e_port)]++; \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (sm, "unknown protocol");
- return VNET_API_ERROR_INVALID_VALUE_2;
+ return VNET_API_ERROR_VALUE_EXIST;
}
break;
}
}
- /* External address must be allocated */
+ // external address must be allocated
if (!a)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
+ {
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
}
pool_get (sm->static_mappings, m);
@@ -1093,11 +1339,10 @@ nat44_ed_add_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
else
m->affinity_per_service_list_head_index = ~0;
- init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
- m - sm->static_mappings);
- if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 1))
+ if (nat44_ed_sm_o2i_add (sm, m, m->external_addr, m->external_port, 0,
+ m->proto))
{
- nat_elog_err (sm, "static_mapping_by_external key add failed");
+ nat_log_err ("sm o2i key add failed");
return VNET_API_ERROR_UNSPECIFIED;
}
@@ -1107,10 +1352,17 @@ nat44_ed_add_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
FIB_PROTOCOL_IP4, locals[i].vrf_id, sm->fib_src_low);
if (!is_sm_out2in_only (flags))
{
- init_nat_kv (&kv, locals[i].addr, locals[i].port,
- locals[i].fib_index, m->proto, 0,
- m - sm->static_mappings);
- clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1);
+ if (nat44_ed_sm_i2o_add (sm, m, locals[i].addr, locals[i].port, 0,
+ proto))
+ {
+ nat_log_err ("sm i2o key add failed");
+ rc = VNET_API_ERROR_UNSPECIFIED;
+ // here we continue with add operation so that it can be safely
+ // reversed in delete path - otherwise we'd have to track what
+ // we've done and deal with partial cleanups and since bihash
+ // adds are (extremely improbable) the only points of failure,
+ // it's easier to just do it this way
+ }
}
locals[i].prefix = (i == 0) ?
locals[i].probability :
@@ -1137,68 +1389,36 @@ nat44_ed_add_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
}
}
- return 0;
+ return rc;
}
int
nat44_ed_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
- nat_protocol_t proto, u32 flags)
+ ip_protocol_t proto, u32 flags)
{
snat_main_t *sm = &snat_main;
snat_static_mapping_t *m;
- clib_bihash_kv_8_8_t kv, value;
- snat_address_t *a = 0;
nat44_lb_addr_port_t *local;
snat_main_per_thread_data_t *tsm;
snat_session_t *s;
- int i;
- init_nat_k (&kv, e_addr, e_port, 0, proto);
- if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
- m = 0;
- else
- m = pool_elt_at_index (sm->static_mappings, value.value);
+ if (!sm->enabled)
+ {
+ return VNET_API_ERROR_UNSUPPORTED;
+ }
+ m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
if (!m)
return VNET_API_ERROR_NO_SUCH_ENTRY;
if (!is_sm_lb (m->flags))
return VNET_API_ERROR_INVALID_VALUE;
- /* Free external address port */
- if (!(sm->static_mapping_only || is_sm_out2in_only (flags)))
- {
- for (i = 0; i < vec_len (sm->addresses); i++)
- {
- if (sm->addresses[i].addr.as_u32 == e_addr.as_u32)
- {
- a = sm->addresses + i;
- switch (proto)
- {
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- --a->busy_##n##_port_refcounts[e_port]; \
- if (e_port > 1024) \
- { \
- a->busy_##n##_ports--; \
- a->busy_##n##_ports_per_thread[get_thread_idx_by_port (e_port)]--; \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (sm, "unknown protocol");
- return VNET_API_ERROR_INVALID_VALUE_2;
- }
- break;
- }
- }
- }
-
- init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
- if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_external, &kv, 0))
+ if (nat44_ed_sm_o2i_del (sm, m->external_addr, m->external_port, 0,
+ m->proto))
{
- nat_elog_err (sm, "static_mapping_by_external key del failed");
+ nat_log_err ("sm o2i key del failed");
return VNET_API_ERROR_UNSPECIFIED;
}
@@ -1207,12 +1427,11 @@ nat44_ed_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
fib_table_unlock (local->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
if (!is_sm_out2in_only (flags))
{
- init_nat_k (&kv, local->addr, local->port, local->fib_index,
- m->proto);
- if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0))
+ if (nat44_ed_sm_i2o_del (sm, local->addr, local->port,
+ local->fib_index, m->proto))
{
- nat_elog_err (sm, "static_mapping_by_local key del failed");
- return VNET_API_ERROR_UNSPECIFIED;
+ nat_log_err ("sm i2o key del failed");
+ // For the same reasons as above
}
}
@@ -1231,14 +1450,14 @@ nat44_ed_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
/* Delete sessions */
pool_foreach (s, tsm->sessions)
{
- if (!(is_lb_session (s)))
+ if (!(nat44_ed_is_lb_session (s)))
continue;
if ((s->in2out.addr.as_u32 != local->addr.as_u32) ||
s->in2out.port != local->port)
continue;
- nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
+ nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
}
}
@@ -1259,12 +1478,11 @@ nat44_ed_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
int
nat44_ed_add_del_lb_static_mapping_local (ip4_address_t e_addr, u16 e_port,
ip4_address_t l_addr, u16 l_port,
- nat_protocol_t proto, u32 vrf_id,
+ ip_protocol_t proto, u32 vrf_id,
u8 probability, u8 is_add)
{
snat_main_t *sm = &snat_main;
snat_static_mapping_t *m = 0;
- clib_bihash_kv_8_8_t kv, value;
nat44_lb_addr_port_t *local, *prev_local, *match_local = 0;
snat_main_per_thread_data_t *tsm;
snat_session_t *s;
@@ -1272,15 +1490,22 @@ nat44_ed_add_del_lb_static_mapping_local (ip4_address_t e_addr, u16 e_port,
uword *bitmap = 0;
int i;
- init_nat_k (&kv, e_addr, e_port, 0, proto);
- if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
- m = pool_elt_at_index (sm->static_mappings, value.value);
+ if (!sm->enabled)
+ {
+ return VNET_API_ERROR_UNSUPPORTED;
+ }
+
+ m = nat44_ed_sm_o2i_lookup (sm, e_addr, e_port, 0, proto);
if (!m)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
+ {
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
if (!is_sm_lb (m->flags))
- return VNET_API_ERROR_INVALID_VALUE;
+ {
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
pool_foreach (local, m->locals)
{
@@ -1295,7 +1520,9 @@ nat44_ed_add_del_lb_static_mapping_local (ip4_address_t e_addr, u16 e_port,
if (is_add)
{
if (match_local)
- return VNET_API_ERROR_VALUE_EXIST;
+ {
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
pool_get (m->locals, local);
clib_memset (local, 0, sizeof (*local));
@@ -1309,10 +1536,13 @@ nat44_ed_add_del_lb_static_mapping_local (ip4_address_t e_addr, u16 e_port,
if (!is_sm_out2in_only (m->flags))
{
- init_nat_kv (&kv, l_addr, l_port, local->fib_index, proto, 0,
- m - sm->static_mappings);
- if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 1))
- nat_elog_err (sm, "static_mapping_by_local key add failed");
+ if (nat44_ed_sm_i2o_add (sm, m, l_addr, l_port, local->fib_index,
+ proto))
+ {
+ nat_log_err ("sm i2o key add failed");
+ pool_put (m->locals, local);
+ return VNET_API_ERROR_UNSPECIFIED;
+ }
}
}
else
@@ -1328,9 +1558,9 @@ nat44_ed_add_del_lb_static_mapping_local (ip4_address_t e_addr, u16 e_port,
if (!is_sm_out2in_only (m->flags))
{
- init_nat_k (&kv, l_addr, l_port, match_local->fib_index, proto);
- if (clib_bihash_add_del_8_8 (&sm->static_mapping_by_local, &kv, 0))
- nat_elog_err (sm, "static_mapping_by_local key del failed");
+ if (nat44_ed_sm_i2o_del (sm, l_addr, l_port, match_local->fib_index,
+ proto))
+ nat_log_err ("sm i2o key del failed");
}
if (sm->num_workers > 1)
@@ -1347,15 +1577,15 @@ nat44_ed_add_del_lb_static_mapping_local (ip4_address_t e_addr, u16 e_port,
/* Delete sessions */
pool_foreach (s, tsm->sessions) {
- if (!(is_lb_session (s)))
- continue;
+ if (!(nat44_ed_is_lb_session (s)))
+ continue;
- if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) ||
- s->in2out.port != match_local->port)
- continue;
+ if ((s->in2out.addr.as_u32 != match_local->addr.as_u32) ||
+ s->in2out.port != match_local->port)
+ continue;
- nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
- nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
+ nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
+ nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
}
pool_put (m->locals, match_local);
@@ -1396,116 +1626,6 @@ nat44_ed_add_del_lb_static_mapping_local (ip4_address_t e_addr, u16 e_port,
return 0;
}
-int
-snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm,
- u8 twice_nat)
-{
- snat_address_t *a = 0;
- snat_session_t *ses;
- u32 *ses_to_be_removed = 0, *ses_index;
- snat_main_per_thread_data_t *tsm;
- snat_static_mapping_t *m;
- snat_interface_t *interface;
- int i;
- snat_address_t *addresses =
- twice_nat ? sm->twice_nat_addresses : sm->addresses;
-
- /* Find SNAT address */
- for (i = 0; i < vec_len (addresses); i++)
- {
- if (addresses[i].addr.as_u32 == addr.as_u32)
- {
- a = addresses + i;
- break;
- }
- }
- if (!a)
- {
- nat_log_err ("no such address");
- return VNET_API_ERROR_NO_SUCH_ENTRY;
- }
-
- if (delete_sm)
- {
- pool_foreach (m, sm->static_mappings)
- {
- if (m->external_addr.as_u32 == addr.as_u32)
- {
- nat44_ed_del_static_mapping (m->local_addr, m->external_addr,
- m->local_port, m->external_port,
- m->proto, m->vrf_id, ~0, m->flags);
- }
- }
- }
- else
- {
- /* Check if address is used in some static mapping */
- if (is_snat_address_used_in_static_mapping (sm, addr))
- {
- nat_log_err ("address used in static mapping");
- return VNET_API_ERROR_UNSPECIFIED;
- }
- }
-
- if (a->fib_index != ~0)
- fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, sm->fib_src_low);
-
- /* Delete sessions using address */
- if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
- {
- vec_foreach (tsm, sm->per_thread_data)
- {
- pool_foreach (ses, tsm->sessions) {
- if (ses->out2in.addr.as_u32 == addr.as_u32)
- {
- nat_free_session_data (sm, ses, tsm - sm->per_thread_data, 0);
- vec_add1 (ses_to_be_removed, ses - tsm->sessions);
- }
- }
-
- vec_foreach (ses_index, ses_to_be_removed)
- {
- ses = pool_elt_at_index (tsm->sessions, ses_index[0]);
- nat_ed_session_delete (sm, ses, tsm - sm->per_thread_data, 1);
- }
-
- vec_free (ses_to_be_removed);
- }
- }
-
-#define _(N, i, n, s) \
- vec_free (a->busy_##n##_ports_per_thread);
- foreach_nat_protocol
-#undef _
-
- if (twice_nat)
- {
- vec_del1 (sm->twice_nat_addresses, i);
- return 0;
- }
- else vec_del1 (sm->addresses, i);
-
- /* Delete external address from FIB */
- pool_foreach (interface, sm->interfaces)
- {
- if (nat_interface_is_inside (interface))
- continue;
-
- snat_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
- break;
- }
- pool_foreach (interface, sm->output_feature_interfaces)
- {
- if (nat_interface_is_inside (interface))
- continue;
-
- snat_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
- break;
- }
-
- return 0;
-}
-
void
expire_per_vrf_sessions (u32 fib_index)
{
@@ -1515,19 +1635,19 @@ expire_per_vrf_sessions (u32 fib_index)
vec_foreach (tsm, sm->per_thread_data)
{
- vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
- {
- if ((per_vrf_sessions->rx_fib_index == fib_index) ||
- (per_vrf_sessions->tx_fib_index == fib_index))
- {
- per_vrf_sessions->expired = 1;
- }
- }
+ pool_foreach (per_vrf_sessions, tsm->per_vrf_sessions_pool)
+ {
+ if ((per_vrf_sessions->rx_fib_index == fib_index) ||
+ (per_vrf_sessions->tx_fib_index == fib_index))
+ {
+ per_vrf_sessions->expired = 1;
+ }
+ }
}
}
void
-update_per_vrf_sessions_vec (u32 fib_index, int is_del)
+update_per_vrf_sessions_pool (u32 fib_index, int is_del)
{
snat_main_t *sm = &snat_main;
nat_fib_t *fib;
@@ -1563,10 +1683,10 @@ update_per_vrf_sessions_vec (u32 fib_index, int is_del)
}
}
-static_always_inline nat_outside_fib_t *
-nat44_ed_get_outside_fib (nat_outside_fib_t *outside_fibs, u32 fib_index)
+static_always_inline nat_fib_t *
+nat44_ed_get_outside_fib (nat_fib_t *outside_fibs, u32 fib_index)
{
- nat_outside_fib_t *f;
+ nat_fib_t *f;
vec_foreach (f, outside_fibs)
{
if (f->fib_index == fib_index)
@@ -1597,10 +1717,8 @@ nat44_ed_add_interface (u32 sw_if_index, u8 is_inside)
const char *del_feature_name, *feature_name;
snat_main_t *sm = &snat_main;
- nat_outside_fib_t *outside_fib;
- snat_static_mapping_t *m;
+ nat_fib_t *outside_fib;
snat_interface_t *i;
- snat_address_t *ap;
u32 fib_index;
int rv;
@@ -1619,8 +1737,8 @@ nat44_ed_add_interface (u32 sw_if_index, u8 is_inside)
i = nat44_ed_get_interface (sm->interfaces, sw_if_index);
if (i)
{
- if ((nat_interface_is_inside (i) && is_inside) ||
- (nat_interface_is_outside (i) && !is_inside))
+ if ((nat44_ed_is_interface_inside (i) && is_inside) ||
+ (nat44_ed_is_interface_outside (i) && !is_inside))
{
return 0;
}
@@ -1672,7 +1790,7 @@ nat44_ed_add_interface (u32 sw_if_index, u8 is_inside)
fib_index =
fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
- update_per_vrf_sessions_vec (fib_index, 0 /*is_del*/);
+ update_per_vrf_sessions_pool (fib_index, 0 /*is_del*/);
if (!is_inside)
{
@@ -1681,28 +1799,19 @@ nat44_ed_add_interface (u32 sw_if_index, u8 is_inside)
outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
if (outside_fib)
{
- outside_fib->refcount++;
+ outside_fib->ref_count++;
}
else
{
vec_add2 (sm->outside_fibs, outside_fib, 1);
outside_fib->fib_index = fib_index;
- outside_fib->refcount = 1;
+ outside_fib->ref_count = 1;
}
- vec_foreach (ap, sm->addresses)
- {
- snat_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, 1);
- }
- pool_foreach (m, sm->static_mappings)
- {
- if (!(is_sm_addr_only (m->flags)) ||
- (m->local_addr.as_u32 == m->external_addr.as_u32))
- {
- continue;
- }
- snat_add_del_addr_to_fib (&m->external_addr, 32, sw_if_index, 1);
- }
+ nat44_ed_add_del_nat_addr_fib_reg_entries (sw_if_index, 1);
+ nat44_ed_add_del_sm_fib_reg_entries (sw_if_index, 1);
+
+ nat44_ed_bind_if_addr_to_nat_addr (sw_if_index);
}
else
{
@@ -1718,10 +1827,8 @@ nat44_ed_del_interface (u32 sw_if_index, u8 is_inside)
const char *del_feature_name, *feature_name;
snat_main_t *sm = &snat_main;
- nat_outside_fib_t *outside_fib;
- snat_static_mapping_t *m;
+ nat_fib_t *outside_fib;
snat_interface_t *i;
- snat_address_t *ap;
u32 fib_index;
int rv;
@@ -1738,7 +1845,7 @@ nat44_ed_del_interface (u32 sw_if_index, u8 is_inside)
return VNET_API_ERROR_NO_SUCH_ENTRY;
}
- if (nat_interface_is_inside (i) && nat_interface_is_outside (i))
+ if (nat44_ed_is_interface_inside (i) && nat44_ed_is_interface_outside (i))
{
if (sm->num_workers > 1)
{
@@ -1798,34 +1905,22 @@ nat44_ed_del_interface (u32 sw_if_index, u8 is_inside)
fib_index =
fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
- update_per_vrf_sessions_vec (fib_index, 1 /*is_del*/);
+ update_per_vrf_sessions_pool (fib_index, 1 /*is_del*/);
if (!is_inside)
{
outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
if (outside_fib)
{
- outside_fib->refcount--;
- if (!outside_fib->refcount)
+ outside_fib->ref_count--;
+ if (!outside_fib->ref_count)
{
vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
}
}
- vec_foreach (ap, sm->addresses)
- {
- snat_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, 0);
- }
-
- pool_foreach (m, sm->static_mappings)
- {
- if (!(is_sm_addr_only (m->flags)) ||
- (m->local_addr.as_u32 == m->external_addr.as_u32))
- {
- continue;
- }
- snat_add_del_addr_to_fib (&m->external_addr, 32, sw_if_index, 0);
- }
+ nat44_ed_add_del_nat_addr_fib_reg_entries (sw_if_index, 0);
+ nat44_ed_add_del_sm_fib_reg_entries (sw_if_index, 0);
}
return 0;
@@ -1836,10 +1931,8 @@ nat44_ed_add_output_interface (u32 sw_if_index)
{
snat_main_t *sm = &snat_main;
- nat_outside_fib_t *outside_fib;
- snat_static_mapping_t *m;
+ nat_fib_t *outside_fib;
snat_interface_t *i;
- snat_address_t *ap;
u32 fib_index;
int rv;
@@ -1911,34 +2004,24 @@ nat44_ed_add_output_interface (u32 sw_if_index)
fib_index =
fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
- update_per_vrf_sessions_vec (fib_index, 0 /*is_del*/);
+ update_per_vrf_sessions_pool (fib_index, 0 /*is_del*/);
outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
if (outside_fib)
{
- outside_fib->refcount++;
+ outside_fib->ref_count++;
}
else
{
vec_add2 (sm->outside_fibs, outside_fib, 1);
outside_fib->fib_index = fib_index;
- outside_fib->refcount = 1;
+ outside_fib->ref_count = 1;
}
- vec_foreach (ap, sm->addresses)
- {
- snat_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, 1);
- }
+ nat44_ed_add_del_nat_addr_fib_reg_entries (sw_if_index, 1);
+ nat44_ed_add_del_sm_fib_reg_entries (sw_if_index, 1);
- pool_foreach (m, sm->static_mappings)
- {
- if (!((is_sm_addr_only (m->flags))) ||
- (m->local_addr.as_u32 == m->external_addr.as_u32))
- {
- continue;
- }
- snat_add_del_addr_to_fib (&m->external_addr, 32, sw_if_index, 1);
- }
+ nat44_ed_bind_if_addr_to_nat_addr (sw_if_index);
return 0;
}
@@ -1948,10 +2031,8 @@ nat44_ed_del_output_interface (u32 sw_if_index)
{
snat_main_t *sm = &snat_main;
- nat_outside_fib_t *outside_fib;
- snat_static_mapping_t *m;
+ nat_fib_t *outside_fib;
snat_interface_t *i;
- snat_address_t *ap;
u32 fib_index;
int rv;
@@ -2013,32 +2094,20 @@ nat44_ed_del_output_interface (u32 sw_if_index)
fib_index =
fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
- update_per_vrf_sessions_vec (fib_index, 1 /*is_del*/);
+ update_per_vrf_sessions_pool (fib_index, 1 /*is_del*/);
outside_fib = nat44_ed_get_outside_fib (sm->outside_fibs, fib_index);
if (outside_fib)
{
- outside_fib->refcount--;
- if (!outside_fib->refcount)
+ outside_fib->ref_count--;
+ if (!outside_fib->ref_count)
{
vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
}
}
- vec_foreach (ap, sm->addresses)
- {
- snat_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, 0);
- }
-
- pool_foreach (m, sm->static_mappings)
- {
- if (!((is_sm_addr_only (m->flags))) ||
- (m->local_addr.as_u32 == m->external_addr.as_u32))
- {
- continue;
- }
- snat_add_del_addr_to_fib (&m->external_addr, 32, sw_if_index, 0);
- }
+ nat44_ed_add_del_nat_addr_fib_reg_entries (sw_if_index, 0);
+ nat44_ed_add_del_sm_fib_reg_entries (sw_if_index, 0);
return 0;
}
@@ -2064,7 +2133,7 @@ snat_set_workers (uword * bitmap)
j++;
}
- sm->port_per_thread = (0xffff - 1024) / _vec_len (sm->workers);
+ sm->port_per_thread = (65536 - ED_USER_PORT_OFFSET) / _vec_len (sm->workers);
return 0;
}
@@ -2074,17 +2143,28 @@ nat44_ed_set_frame_queue_nelts (u32 frame_queue_nelts)
{
fail_if_enabled ();
snat_main_t *sm = &snat_main;
+
+ if ((sm->fq_in2out_index != ~0) || (sm->fq_out2in_index != ~0) ||
+ (sm->fq_in2out_output_index != ~0))
+ {
+ // frame queu nelts can be set only before first
+ // call to nat44_plugin_enable after that it
+ // doesn't make sense
+ nat_log_err ("Frame queue was already initialized. "
+ "Change is not possible");
+ return 1;
+ }
+
sm->frame_queue_nelts = frame_queue_nelts;
return 0;
}
static void
-snat_update_outside_fib (ip4_main_t * im, uword opaque,
- u32 sw_if_index, u32 new_fib_index,
- u32 old_fib_index)
+nat44_ed_update_outside_fib_cb (ip4_main_t *im, uword opaque, u32 sw_if_index,
+ u32 new_fib_index, u32 old_fib_index)
{
snat_main_t *sm = &snat_main;
- nat_outside_fib_t *outside_fib;
+ nat_fib_t *outside_fib;
snat_interface_t *i;
u8 is_add = 1;
u8 match = 0;
@@ -2096,20 +2176,20 @@ snat_update_outside_fib (ip4_main_t * im, uword opaque,
}
pool_foreach (i, sm->interfaces)
- {
+ {
if (i->sw_if_index == sw_if_index)
{
- if (!(nat_interface_is_outside (i)))
+ if (!(nat44_ed_is_interface_outside (i)))
return;
match = 1;
}
}
pool_foreach (i, sm->output_feature_interfaces)
- {
+ {
if (i->sw_if_index == sw_if_index)
{
- if (!(nat_interface_is_outside (i)))
+ if (!(nat44_ed_is_interface_outside (i)))
return;
match = 1;
}
@@ -2119,54 +2199,45 @@ snat_update_outside_fib (ip4_main_t * im, uword opaque,
return;
vec_foreach (outside_fib, sm->outside_fibs)
- {
- if (outside_fib->fib_index == old_fib_index)
- {
- outside_fib->refcount--;
- if (!outside_fib->refcount)
- vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
- break;
- }
- }
+ {
+ if (outside_fib->fib_index == old_fib_index)
+ {
+ outside_fib->ref_count--;
+ if (!outside_fib->ref_count)
+ vec_del1 (sm->outside_fibs, outside_fib - sm->outside_fibs);
+ break;
+ }
+ }
vec_foreach (outside_fib, sm->outside_fibs)
- {
- if (outside_fib->fib_index == new_fib_index)
- {
- outside_fib->refcount++;
- is_add = 0;
- break;
- }
- }
+ {
+ if (outside_fib->fib_index == new_fib_index)
+ {
+ outside_fib->ref_count++;
+ is_add = 0;
+ break;
+ }
+ }
if (is_add)
{
vec_add2 (sm->outside_fibs, outside_fib, 1);
- outside_fib->refcount = 1;
+ outside_fib->ref_count = 1;
outside_fib->fib_index = new_fib_index;
}
}
-static void
-snat_update_outside_fib (ip4_main_t * im, uword opaque,
- u32 sw_if_index, u32 new_fib_index,
- u32 old_fib_index);
+static void nat44_ed_update_outside_fib_cb (ip4_main_t *im, uword opaque,
+ u32 sw_if_index, u32 new_fib_index,
+ u32 old_fib_index);
-static void
-snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
- uword opaque,
- u32 sw_if_index,
- ip4_address_t * address,
- u32 address_length,
- u32 if_address_index, u32 is_delete);
+static void nat44_ed_add_del_interface_address_cb (
+ ip4_main_t *im, uword opaque, u32 sw_if_index, ip4_address_t *address,
+ u32 address_length, u32 if_address_index, u32 is_delete);
-static void
-nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
- uword opaque,
- u32 sw_if_index,
- ip4_address_t * address,
- u32 address_length,
- u32 if_address_index, u32 is_delete);
+static void nat44_ed_add_del_static_mapping_cb (
+ ip4_main_t *im, uword opaque, u32 sw_if_index, ip4_address_t *address,
+ u32 address_length, u32 if_address_index, u32 is_delete);
void
test_key_calc_split ()
@@ -2188,8 +2259,8 @@ test_key_calc_split ()
u32 thread_index = 3000000001;
u32 session_index = 3000000221;
clib_bihash_kv_16_8_t kv;
- init_ed_kv (&kv, l_addr, l_port, r_addr, r_port, fib_index, proto,
- thread_index, session_index);
+ init_ed_kv (&kv, l_addr.as_u32, l_port, r_addr.as_u32, r_port, fib_index,
+ proto, thread_index, session_index);
ip4_address_t l_addr2;
ip4_address_t r_addr2;
clib_memset (&l_addr2, 0, sizeof (l_addr2));
@@ -2208,16 +2279,6 @@ test_key_calc_split ()
ASSERT (fib_index == fib_index2);
ASSERT (thread_index == ed_value_get_thread_index (&kv));
ASSERT (session_index == ed_value_get_session_index (&kv));
-
- fib_index = 7001;
- proto = 5;
- nat_protocol_t proto3 = ~0;
- u64 key = calc_nat_key (l_addr, l_port, fib_index, proto);
- split_nat_key (key, &l_addr2, &l_port2, &fib_index2, &proto3);
- ASSERT (l_addr.as_u32 == l_addr2.as_u32);
- ASSERT (l_port == l_port2);
- ASSERT (proto == proto3);
- ASSERT (fib_index == fib_index2);
}
static clib_error_t *
@@ -2237,21 +2298,6 @@ nat_ip_table_add_del (vnet_main_t * vnm, u32 table_id, u32 is_add)
VNET_IP_TABLE_ADD_DEL_FUNCTION (nat_ip_table_add_del);
-void
-nat44_set_node_indexes (snat_main_t * sm, vlib_main_t * vm)
-{
- vlib_node_t *node;
-
- node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
- sm->out2in_node_index = node->index;
-
- node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
- sm->in2out_node_index = node->index;
-
- node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-output");
- sm->in2out_output_node_index = node->index;
-}
-
#define nat_validate_simple_counter(c, i) \
do \
{ \
@@ -2295,12 +2341,8 @@ nat_init (vlib_main_t * vm)
clib_memset (sm, 0, sizeof (*sm));
- // required
- sm->vnet_main = vnet_get_main ();
// convenience
sm->ip4_main = &ip4_main;
- sm->api_main = vlibapi_get_main ();
- sm->ip4_lookup_main = &ip4_main.lookup_main;
// frame queue indices used for handoff
sm->fq_out2in_index = ~0;
@@ -2309,15 +2351,13 @@ nat_init (vlib_main_t * vm)
sm->log_level = NAT_LOG_ERROR;
- nat44_set_node_indexes (sm, vm);
-
sm->log_class = vlib_log_register_class ("nat", 0);
nat_ipfix_logging_init (vm);
nat_init_simple_counter (sm->total_sessions, "total-sessions",
"/nat44-ed/total-sessions");
- sm->max_cfg_sessions_gauge = stat_segment_new_entry (
- (u8 *) "/nat44-ed/max-cfg-sessions", STAT_DIR_TYPE_SCALAR_INDEX);
+ sm->max_cfg_sessions_gauge =
+ vlib_stats_add_gauge ("/nat44-ed/max-cfg-sessions");
#define _(x) \
nat_init_simple_counter (sm->counters.fastpath.in2out.x, #x, \
@@ -2344,7 +2384,7 @@ nat_init (vlib_main_t * vm)
}
}
num_threads = tm->n_vlib_mains - 1;
- sm->port_per_thread = 0xffff - 1024;
+ sm->port_per_thread = 65536 - ED_USER_PORT_OFFSET;
vec_validate (sm->per_thread_data, num_threads);
/* Use all available workers by default */
@@ -2361,13 +2401,13 @@ nat_init (vlib_main_t * vm)
}
/* callbacks to call when interface address changes. */
- cbi.function = snat_ip4_add_del_interface_address_cb;
+ cbi.function = nat44_ed_add_del_interface_address_cb;
vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
- cbi.function = nat_ip4_add_del_addr_only_sm_cb;
+ cbi.function = nat44_ed_add_del_static_mapping_cb;
vec_add1 (sm->ip4_main->add_del_interface_address_callbacks, cbi);
/* callbacks to call when interface to table biding changes */
- cbt.function = snat_update_outside_fib;
+ cbt.function = nat44_ed_update_outside_fib_cb;
vec_add1 (sm->ip4_main->table_bind_callbacks, cbt);
sm->fib_src_low =
@@ -2392,26 +2432,15 @@ nat44_plugin_enable (nat44_config_t c)
fail_if_enabled ();
- if (c.static_mapping_only && !c.connection_tracking)
- {
- nat_log_err ("unsupported combination of configuration");
- return 1;
- }
-
- sm->static_mapping_only = c.static_mapping_only;
- sm->static_mapping_connection_tracking = c.connection_tracking;
-
sm->forwarding_enabled = 0;
sm->mss_clamping = 0;
- sm->pat = (!c.static_mapping_only ||
- (c.static_mapping_only && c.connection_tracking));
if (!c.sessions)
c.sessions = 63 * 1024;
sm->max_translations_per_thread = c.sessions;
- stat_segment_set_state_counter (sm->max_cfg_sessions_gauge,
- sm->max_translations_per_thread);
+ vlib_stats_set_gauge (sm->max_cfg_sessions_gauge,
+ sm->max_translations_per_thread);
sm->translation_buckets = nat_calc_bihash_buckets (c.sessions);
vec_add1 (sm->max_translations_per_fib, sm->max_translations_per_thread);
@@ -2425,7 +2454,7 @@ nat44_plugin_enable (nat44_config_t c)
sm->outside_fib_index = fib_table_find_or_create_and_lock (
FIB_PROTOCOL_IP4, c.outside_vrf, sm->fib_src_hi);
- nat44_ed_db_init (sm->max_translations_per_thread, sm->translation_buckets);
+ nat44_ed_db_init ();
nat_affinity_enable ();
@@ -2440,20 +2469,26 @@ nat44_plugin_enable (nat44_config_t c)
if (sm->num_workers > 1)
{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_node_t *node;
+
if (sm->fq_in2out_index == ~0)
{
- sm->fq_in2out_index = vlib_frame_queue_main_init (
- sm->in2out_node_index, sm->frame_queue_nelts);
+ node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out");
+ sm->fq_in2out_index =
+ vlib_frame_queue_main_init (node->index, sm->frame_queue_nelts);
}
if (sm->fq_out2in_index == ~0)
{
- sm->fq_out2in_index = vlib_frame_queue_main_init (
- sm->out2in_node_index, sm->frame_queue_nelts);
+ node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-out2in");
+ sm->fq_out2in_index =
+ vlib_frame_queue_main_init (node->index, sm->frame_queue_nelts);
}
if (sm->fq_in2out_output_index == ~0)
{
- sm->fq_in2out_output_index = vlib_frame_queue_main_init (
- sm->in2out_output_node_index, sm->frame_queue_nelts);
+ node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-in2out-output");
+ sm->fq_in2out_output_index =
+ vlib_frame_queue_main_init (node->index, sm->frame_queue_nelts);
}
}
@@ -2463,86 +2498,213 @@ nat44_plugin_enable (nat44_config_t c)
return 0;
}
-void
-nat44_addresses_free (snat_address_t ** addresses)
+int
+nat44_ed_del_addresses ()
{
- snat_address_t *ap;
- vec_foreach (ap, *addresses)
+ snat_main_t *sm = &snat_main;
+ snat_address_t *a, *vec;
+ int error = 0;
+
+ vec = vec_dup (sm->addresses);
+ vec_foreach (a, vec)
{
- #define _(N, i, n, s) \
- vec_free (ap->busy_##n##_ports_per_thread);
- foreach_nat_protocol
- #undef _
+ error = nat44_ed_del_address (a->addr, 0);
+ if (error)
+ {
+ nat_log_err ("error occurred while removing adderess");
+ }
}
- vec_free (*addresses);
- *addresses = 0;
+ vec_free (vec);
+ vec_free (sm->addresses);
+ sm->addresses = 0;
+
+ vec = vec_dup (sm->twice_nat_addresses);
+ vec_foreach (a, vec)
+ {
+ error = nat44_ed_del_address (a->addr, 1);
+ if (error)
+ {
+ nat_log_err ("error occurred while removing adderess");
+ }
+ }
+ vec_free (vec);
+ vec_free (sm->twice_nat_addresses);
+ sm->twice_nat_addresses = 0;
+
+ vec_free (sm->addr_to_resolve);
+ sm->addr_to_resolve = 0;
+
+ return error;
}
int
-nat44_plugin_disable ()
+nat44_ed_del_interfaces ()
{
snat_main_t *sm = &snat_main;
snat_interface_t *i, *pool;
int error = 0;
- fail_if_disabled ();
-
pool = pool_dup (sm->interfaces);
pool_foreach (i, pool)
{
- if (nat_interface_is_inside (i))
+ if (nat44_ed_is_interface_inside (i))
{
error = nat44_ed_del_interface (i->sw_if_index, 1);
}
- if (nat_interface_is_outside (i))
+ if (nat44_ed_is_interface_outside (i))
{
error = nat44_ed_del_interface (i->sw_if_index, 0);
}
+
if (error)
- {
- nat_log_err ("error occurred while removing interface %u",
- i->sw_if_index);
- }
+ {
+ nat_log_err ("error occurred while removing interface");
+ }
}
- pool_free (sm->interfaces);
pool_free (pool);
+ pool_free (sm->interfaces);
sm->interfaces = 0;
+ return error;
+}
+
+int
+nat44_ed_del_output_interfaces ()
+{
+ snat_main_t *sm = &snat_main;
+ snat_interface_t *i, *pool;
+ int error = 0;
pool = pool_dup (sm->output_feature_interfaces);
pool_foreach (i, pool)
{
error = nat44_ed_del_output_interface (i->sw_if_index);
if (error)
- {
- nat_log_err ("error occurred while removing interface %u",
- i->sw_if_index);
- }
+ {
+ nat_log_err ("error occurred while removing output interface");
+ }
}
- pool_free (sm->output_feature_interfaces);
pool_free (pool);
+ pool_free (sm->output_feature_interfaces);
sm->output_feature_interfaces = 0;
+ return error;
+}
+
+static clib_error_t *
+nat44_ed_sw_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add)
+{
+ snat_main_t *sm = &snat_main;
+ snat_interface_t *i;
+ int error = 0;
+
+ if (is_add)
+ return 0;
+
+ if (!sm->enabled)
+ return 0;
+
+ i = nat44_ed_get_interface (sm->interfaces, sw_if_index);
+ if (i)
+ {
+ bool is_inside = nat44_ed_is_interface_inside (i);
+ bool is_outside = nat44_ed_is_interface_outside (i);
+
+ if (is_inside)
+ {
+ error |= nat44_ed_del_interface (sw_if_index, 1);
+ }
+ if (is_outside)
+ {
+ error |= nat44_ed_del_interface (sw_if_index, 0);
+ }
+
+ if (error)
+ {
+ nat_log_err ("error occurred while removing interface");
+ }
+ }
+
+ i = nat44_ed_get_interface (sm->output_feature_interfaces, sw_if_index);
+ if (i)
+ {
+ error = nat44_ed_del_output_interface (sw_if_index);
+ if (error)
+ {
+ nat_log_err ("error occurred while removing output interface");
+ }
+ }
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (nat44_ed_sw_interface_add_del);
+
+int
+nat44_ed_del_static_mappings ()
+{
+ snat_main_t *sm = &snat_main;
+ snat_static_mapping_t *m, *pool;
+ int error = 0;
+
+ pool = pool_dup (sm->static_mappings);
+ pool_foreach (m, pool)
+ {
+ error = nat44_ed_del_static_mapping_internal (
+ m->local_addr, m->external_addr, m->local_port, m->external_port,
+ m->proto, m->vrf_id, m->flags);
+ if (error)
+ {
+ nat_log_err ("error occurred while removing mapping");
+ }
+ }
+ pool_free (pool);
+ pool_free (sm->static_mappings);
+ sm->static_mappings = 0;
+
+ vec_free (sm->sm_to_resolve);
+ sm->sm_to_resolve = 0;
+
+ return error;
+}
+
+int
+nat44_plugin_disable ()
+{
+ snat_main_t *sm = &snat_main;
+ int rc, error = 0;
+
+ fail_if_disabled ();
+
+ rc = nat44_ed_del_static_mappings ();
+ if (rc)
+ error = VNET_API_ERROR_BUG;
+
+ rc = nat44_ed_del_addresses ();
+ if (rc)
+ error = VNET_API_ERROR_BUG;
+
+ rc = nat44_ed_del_interfaces ();
+ if (rc)
+ error = VNET_API_ERROR_BUG;
+
+ rc = nat44_ed_del_output_interfaces ();
+ if (rc)
+ error = VNET_API_ERROR_BUG;
+
+ nat44_ed_del_vrf_tables ();
vec_free (sm->max_translations_per_fib);
+ sm->max_translations_per_fib = 0;
nat44_ed_db_free ();
- nat44_addresses_free (&sm->addresses);
- nat44_addresses_free (&sm->twice_nat_addresses);
-
- vec_free (sm->to_resolve);
- vec_free (sm->auto_add_sw_if_indices);
- vec_free (sm->auto_add_sw_if_indices_twice_nat);
+ clib_memset (&sm->rconfig, 0, sizeof (sm->rconfig));
- sm->to_resolve = 0;
- sm->auto_add_sw_if_indices = 0;
- sm->auto_add_sw_if_indices_twice_nat = 0;
+ nat_affinity_disable ();
sm->forwarding_enabled = 0;
-
sm->enabled = 0;
- clib_memset (&sm->rconfig, 0, sizeof (sm->rconfig));
- return 0;
+ return error;
}
void
@@ -2556,14 +2718,16 @@ nat44_ed_forwarding_enable_disable (u8 is_enable)
sm->forwarding_enabled = is_enable != 0;
- if (is_enable)
- return;
+ if (!sm->enabled || is_enable)
+ {
+ return;
+ }
vec_foreach (tsm, sm->per_thread_data)
{
pool_foreach (s, tsm->sessions)
{
- if (is_fwd_bypass_session (s))
+ if (na44_ed_is_fwd_bypass_session (s))
{
vec_add1 (ses_to_be_removed, s - tsm->sessions);
}
@@ -2571,7 +2735,7 @@ nat44_ed_forwarding_enable_disable (u8 is_enable)
vec_foreach (ses_index, ses_to_be_removed)
{
s = pool_elt_at_index (tsm->sessions, ses_index[0]);
- nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
+ nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
}
@@ -2579,125 +2743,91 @@ nat44_ed_forwarding_enable_disable (u8 is_enable)
}
}
-void
-snat_free_outside_address_and_port (snat_address_t *addresses,
- u32 thread_index, ip4_address_t *addr,
- u16 port, nat_protocol_t protocol)
+static_always_inline snat_static_mapping_t *
+nat44_ed_sm_match (snat_main_t *sm, ip4_address_t match_addr, u16 match_port,
+ u32 match_fib_index, ip_protocol_t match_protocol,
+ int by_external)
{
- snat_main_t *sm = &snat_main;
- snat_address_t *a;
- u32 address_index;
- u16 port_host_byte_order = clib_net_to_host_u16 (port);
-
- for (address_index = 0; address_index < vec_len (addresses);
- address_index++)
+ snat_static_mapping_t *m;
+ if (!by_external)
{
- if (addresses[address_index].addr.as_u32 == addr->as_u32)
- break;
- }
-
- ASSERT (address_index < vec_len (addresses));
-
- a = addresses + address_index;
+ m = nat44_ed_sm_i2o_lookup (sm, match_addr, match_port, match_fib_index,
+ match_protocol);
+ if (m)
+ return m;
- switch (protocol)
- {
-#define _(N, i, n, s) \
- case NAT_PROTOCOL_##N: \
- ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \
- --a->busy_##n##_port_refcounts[port_host_byte_order]; \
- a->busy_##n##_ports--; \
- a->busy_##n##_ports_per_thread[thread_index]--; \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (sm, "unknown protocol");
- return;
- }
-}
+ // try address only mapping
+ m = nat44_ed_sm_i2o_lookup (sm, match_addr, 0, match_fib_index, 0);
+ if (m)
+ return m;
-int
-nat_set_outside_address_and_port (snat_address_t *addresses, u32 thread_index,
- ip4_address_t addr, u16 port,
- nat_protocol_t protocol)
-{
- snat_main_t *sm = &snat_main;
- snat_address_t *a = 0;
- u32 address_index;
- u16 port_host_byte_order = clib_net_to_host_u16 (port);
-
- for (address_index = 0; address_index < vec_len (addresses);
- address_index++)
- {
- if (addresses[address_index].addr.as_u32 != addr.as_u32)
- continue;
-
- a = addresses + address_index;
- switch (protocol)
- {
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_port_refcounts[port_host_byte_order]) \
- return VNET_API_ERROR_INSTANCE_IN_USE; \
- ++a->busy_##n##_port_refcounts[port_host_byte_order]; \
- a->busy_##n##_ports_per_thread[thread_index]++; \
- a->busy_##n##_ports++; \
- return 0;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (sm, "unknown protocol");
- return 1;
+ // default static mapping fib index (based on configuration)
+ if (sm->inside_fib_index != match_fib_index)
+ {
+ m = nat44_ed_sm_i2o_lookup (sm, match_addr, match_port,
+ sm->inside_fib_index, match_protocol);
+ if (m)
+ return m;
+
+ // try address only mapping
+ m = nat44_ed_sm_i2o_lookup (sm, match_addr, 0, sm->inside_fib_index,
+ 0);
+ if (m)
+ return m;
+ }
+ // TODO: this specific use case may be deprecated (needs testing)
+ if (sm->outside_fib_index != match_fib_index)
+ {
+ m = nat44_ed_sm_i2o_lookup (sm, match_addr, match_port,
+ sm->outside_fib_index, match_protocol);
+ if (m)
+ return m;
+
+ // try address only mapping
+ m = nat44_ed_sm_i2o_lookup (sm, match_addr, 0, sm->outside_fib_index,
+ 0);
+ if (m)
+ return m;
}
}
+ else
+ {
+ m =
+ nat44_ed_sm_o2i_lookup (sm, match_addr, match_port, 0, match_protocol);
+ if (m)
+ return m;
- return VNET_API_ERROR_NO_SUCH_ENTRY;
+ // try address only mapping
+ m = nat44_ed_sm_o2i_lookup (sm, match_addr, 0, 0, 0);
+ if (m)
+ return m;
+ }
+ return 0;
}
int
-snat_static_mapping_match (vlib_main_t *vm, snat_main_t *sm,
- ip4_address_t match_addr, u16 match_port,
- u32 match_fib_index, nat_protocol_t match_protocol,
+snat_static_mapping_match (vlib_main_t *vm, ip4_address_t match_addr,
+ u16 match_port, u32 match_fib_index,
+ ip_protocol_t match_protocol,
ip4_address_t *mapping_addr, u16 *mapping_port,
- u32 *mapping_fib_index, u8 by_external,
+ u32 *mapping_fib_index, int by_external,
u8 *is_addr_only, twice_nat_type_t *twice_nat,
lb_nat_type_t *lb, ip4_address_t *ext_host_addr,
u8 *is_identity_nat, snat_static_mapping_t **out)
{
- clib_bihash_kv_8_8_t kv, value;
- clib_bihash_8_8_t *mapping_hash;
+ snat_main_t *sm = &snat_main;
snat_static_mapping_t *m;
u32 rand, lo = 0, hi, mid, *tmp = 0, i;
nat44_lb_addr_port_t *local;
u8 backend_index;
- if (!by_external)
- {
- mapping_hash = &sm->static_mapping_by_local;
- init_nat_k (&kv, match_addr, match_port, match_fib_index,
- match_protocol);
- if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
- {
- /* Try address only mapping */
- init_nat_k (&kv, match_addr, 0, match_fib_index, 0);
- if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
- return 1;
- }
- }
- else
+ m = nat44_ed_sm_match (sm, match_addr, match_port, match_fib_index,
+ match_protocol, by_external);
+ if (!m)
{
- mapping_hash = &sm->static_mapping_by_external;
- init_nat_k (&kv, match_addr, match_port, 0, match_protocol);
- if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
- {
- /* Try address only mapping */
- init_nat_k (&kv, match_addr, 0, 0, 0);
- if (clib_bihash_search_8_8 (mapping_hash, &kv, &value))
- return 1;
- }
+ return 1;
}
- m = pool_elt_at_index (sm->static_mappings, value.value);
-
if (by_external)
{
if (is_sm_lb (m->flags))
@@ -2829,7 +2959,7 @@ nat44_ed_get_in2out_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
if (PREDICT_FALSE (is_output))
{
fib_index = sm->outside_fib_index;
- nat_outside_fib_t *outside_fib;
+ nat_fib_t *outside_fib;
fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
fib_prefix_t pfx = {
.fp_proto = FIB_PROTOCOL_IP4,
@@ -2864,9 +2994,33 @@ nat44_ed_get_in2out_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
}
}
- init_ed_k (&kv16, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
- ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
- fib_index, ip->protocol);
+ if (PREDICT_FALSE (ip->protocol == IP_PROTOCOL_ICMP))
+ {
+ ip4_address_t lookup_saddr, lookup_daddr;
+ u16 lookup_sport, lookup_dport;
+ u8 lookup_protocol;
+
+ if (!nat_get_icmp_session_lookup_values (
+ b, ip, &lookup_saddr, &lookup_sport, &lookup_daddr,
+ &lookup_dport, &lookup_protocol))
+ {
+ init_ed_k (&kv16, lookup_saddr.as_u32, lookup_sport,
+ lookup_daddr.as_u32, lookup_dport, rx_fib_index,
+ lookup_protocol);
+ if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
+ {
+ next_worker_index = ed_value_get_thread_index (&value16);
+ vnet_buffer2 (b)->nat.cached_session_index =
+ ed_value_get_session_index (&value16);
+ goto out;
+ }
+ }
+ }
+
+ init_ed_k (&kv16, ip->src_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_dst_port, fib_index,
+ ip->protocol);
if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
{
@@ -2877,9 +3031,10 @@ nat44_ed_get_in2out_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
}
// dst NAT
- init_ed_k (&kv16, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
- ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
- rx_fib_index, ip->protocol);
+ init_ed_k (&kv16, ip->dst_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_dst_port, ip->src_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_src_port, rx_fib_index,
+ ip->protocol);
if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))
{
next_worker_index = ed_value_get_thread_index (&value16);
@@ -2890,7 +3045,9 @@ nat44_ed_get_in2out_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
}
hash = ip->src_address.as_u32 + (ip->src_address.as_u32 >> 8) +
- (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24);
+ (ip->src_address.as_u32 >> 16) + (ip->src_address.as_u32 >> 24) +
+ rx_fib_index + (rx_fib_index >> 8) + (rx_fib_index >> 16) +
+ (rx_fib_index >> 24);
if (PREDICT_TRUE (is_pow2 (_vec_len (sm->workers))))
next_worker_index += sm->workers[hash & (_vec_len (sm->workers) - 1)];
@@ -2921,17 +3078,16 @@ nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
u32 rx_fib_index, u8 is_output)
{
snat_main_t *sm = &snat_main;
- clib_bihash_kv_8_8_t kv, value;
clib_bihash_kv_16_8_t kv16, value16;
- u32 proto, next_worker_index = 0;
+ u8 proto, next_worker_index = 0;
u16 port;
snat_static_mapping_t *m;
u32 hash;
- proto = ip_proto_to_nat_proto (ip->protocol);
+ proto = ip->protocol;
- if (PREDICT_FALSE (proto == NAT_PROTOCOL_ICMP))
+ if (PREDICT_FALSE (IP_PROTOCOL_ICMP == proto))
{
ip4_address_t lookup_saddr, lookup_daddr;
u16 lookup_sport, lookup_dport;
@@ -2940,8 +3096,9 @@ nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
b, ip, &lookup_saddr, &lookup_sport, &lookup_daddr, &lookup_dport,
&lookup_protocol))
{
- init_ed_k (&kv16, lookup_saddr, lookup_sport, lookup_daddr,
- lookup_dport, rx_fib_index, lookup_protocol);
+ init_ed_k (&kv16, lookup_saddr.as_u32, lookup_sport,
+ lookup_daddr.as_u32, lookup_dport, rx_fib_index,
+ lookup_protocol);
if (PREDICT_TRUE (
!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
{
@@ -2955,9 +3112,10 @@ nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
}
}
- init_ed_k (&kv16, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
- ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
- rx_fib_index, ip->protocol);
+ init_ed_k (&kv16, ip->src_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_dst_port, rx_fib_index,
+ ip->protocol);
if (PREDICT_TRUE (
!clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16)))
@@ -2975,18 +3133,18 @@ nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
/* first try static mappings without port */
if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
{
- init_nat_k (&kv, ip->dst_address, 0, 0, 0);
- if (!clib_bihash_search_8_8
- (&sm->static_mapping_by_external, &kv, &value))
+ m = nat44_ed_sm_o2i_lookup (sm, ip->dst_address, 0, 0, proto);
+ if (m)
{
- m = pool_elt_at_index (sm->static_mappings, value.value);
- next_worker_index = m->workers[0];
- goto done;
+ {
+ next_worker_index = m->workers[0];
+ goto done;
+ }
}
}
/* unknown protocol */
- if (PREDICT_FALSE (proto == NAT_PROTOCOL_OTHER))
+ if (PREDICT_FALSE (nat44_ed_is_unk_proto (proto)))
{
/* use current thread */
next_worker_index = vlib_get_thread_index ();
@@ -2999,7 +3157,7 @@ nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
{
udp_header_t *udp = ip4_next_header (ip);
icmp46_header_t *icmp = (icmp46_header_t *) udp;
- icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
+ nat_icmp_echo_header_t *echo = (nat_icmp_echo_header_t *) (icmp + 1);
if (!icmp_type_is_error_message
(vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))
port = vnet_buffer (b)->ip.reass.l4_src_port;
@@ -3007,18 +3165,19 @@ nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
{
/* if error message, then it's not fragmented and we can access it */
ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
- proto = ip_proto_to_nat_proto (inner_ip->protocol);
+ proto = inner_ip->protocol;
void *l4_header = ip4_next_header (inner_ip);
switch (proto)
{
- case NAT_PROTOCOL_ICMP:
+ case IP_PROTOCOL_ICMP:
icmp = (icmp46_header_t *) l4_header;
- echo = (icmp_echo_header_t *) (icmp + 1);
+ echo = (nat_icmp_echo_header_t *) (icmp + 1);
port = echo->identifier;
break;
- case NAT_PROTOCOL_UDP:
- case NAT_PROTOCOL_TCP:
- port = ((tcp_udp_header_t *) l4_header)->src_port;
+ case IP_PROTOCOL_UDP:
+ /* breakthrough */
+ case IP_PROTOCOL_TCP:
+ port = ((nat_tcp_udp_header_t *) l4_header)->src_port;
break;
default:
next_worker_index = vlib_get_thread_index ();
@@ -3030,11 +3189,9 @@ nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
/* try static mappings with port */
if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
{
- init_nat_k (&kv, ip->dst_address, port, 0, proto);
- if (!clib_bihash_search_8_8
- (&sm->static_mapping_by_external, &kv, &value))
+ m = nat44_ed_sm_o2i_lookup (sm, ip->dst_address, port, 0, proto);
+ if (m)
{
- m = pool_elt_at_index (sm->static_mappings, value.value);
if (!is_sm_lb (m->flags))
{
next_worker_index = m->workers[0];
@@ -3054,9 +3211,7 @@ nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
}
/* worker by outside port */
- next_worker_index = sm->first_worker_index;
- next_worker_index +=
- sm->workers[(clib_net_to_host_u16 (port) - 1024) / sm->port_per_thread];
+ next_worker_index = get_thread_idx_by_port (clib_net_to_host_u16 (port));
done:
nat_elog_debug_handoff (sm, "HANDOFF OUT2IN", next_worker_index,
@@ -3085,16 +3240,12 @@ nat44_set_session_limit (u32 session_limit, u32 vrf_id)
{
snat_main_t *sm = &snat_main;
u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
- u32 len = vec_len (sm->max_translations_per_fib);
-
- if (len <= fib_index)
- {
- vec_validate (sm->max_translations_per_fib, fib_index + 1);
- for (; len < vec_len (sm->max_translations_per_fib); len++)
- sm->max_translations_per_fib[len] = sm->max_translations_per_thread;
- }
+ if (~0 == fib_index)
+ return -1;
+ vec_validate_init_empty (sm->max_translations_per_fib, fib_index,
+ sm->max_translations_per_thread);
sm->max_translations_per_fib[fib_index] = session_limit;
return 0;
}
@@ -3108,8 +3259,8 @@ nat44_update_session_limit (u32 session_limit, u32 vrf_id)
return 1;
sm->max_translations_per_thread = nat44_get_max_session_limit ();
- stat_segment_set_state_counter (sm->max_cfg_sessions_gauge,
- sm->max_translations_per_thread);
+ vlib_stats_set_gauge (sm->max_cfg_sessions_gauge,
+ sm->max_translations_per_thread);
sm->translation_buckets =
nat_calc_bihash_buckets (sm->max_translations_per_thread);
@@ -3119,11 +3270,11 @@ nat44_update_session_limit (u32 session_limit, u32 vrf_id)
}
static void
-nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations,
- u32 translation_buckets)
+nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations)
{
dlist_elt_t *head;
+ pool_alloc (tsm->per_vrf_sessions_pool, translations);
pool_alloc (tsm->sessions, translations);
pool_alloc (tsm->lru_pool, translations);
@@ -3149,7 +3300,7 @@ nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations,
}
static void
-reinit_ed_flow_hash ()
+nat44_ed_flow_hash_init ()
{
snat_main_t *sm = &snat_main;
// we expect 2 flows per session, so multiply translation_buckets by 2
@@ -3160,34 +3311,16 @@ reinit_ed_flow_hash ()
}
static void
-nat44_ed_db_init (u32 translations, u32 translation_buckets)
+nat44_ed_db_init ()
{
snat_main_t *sm = &snat_main;
snat_main_per_thread_data_t *tsm;
- u32 static_mapping_buckets = 1024;
- u32 static_mapping_memory_size = 64 << 20;
-
- reinit_ed_flow_hash ();
-
- clib_bihash_init_8_8 (&sm->static_mapping_by_local,
- "static_mapping_by_local", static_mapping_buckets,
- static_mapping_memory_size);
- clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_local,
- format_static_mapping_kvp);
- clib_bihash_init_8_8 (&sm->static_mapping_by_external,
- "static_mapping_by_external", static_mapping_buckets,
- static_mapping_memory_size);
- clib_bihash_set_kvp_format_fn_8_8 (&sm->static_mapping_by_external,
- format_static_mapping_kvp);
+ nat44_ed_flow_hash_init ();
- if (sm->pat)
+ vec_foreach (tsm, sm->per_thread_data)
{
- vec_foreach (tsm, sm->per_thread_data)
- {
- nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
- sm->translation_buckets);
- }
+ nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread);
}
}
@@ -3196,283 +3329,299 @@ nat44_ed_worker_db_free (snat_main_per_thread_data_t *tsm)
{
pool_free (tsm->lru_pool);
pool_free (tsm->sessions);
- vec_free (tsm->per_vrf_sessions_vec);
+ pool_free (tsm->per_vrf_sessions_pool);
}
static void
-nat44_ed_db_free ()
+nat44_ed_flow_hash_free ()
{
snat_main_t *sm = &snat_main;
- snat_main_per_thread_data_t *tsm;
- pool_free (sm->static_mappings);
clib_bihash_free_16_8 (&sm->flow_hash);
- clib_bihash_free_8_8 (&sm->static_mapping_by_local);
- clib_bihash_free_8_8 (&sm->static_mapping_by_external);
+}
+
+static void
+nat44_ed_db_free ()
+{
+ snat_main_t *sm = &snat_main;
+ snat_main_per_thread_data_t *tsm;
- if (sm->pat)
+ vec_foreach (tsm, sm->per_thread_data)
{
- vec_foreach (tsm, sm->per_thread_data)
- {
- nat44_ed_worker_db_free (tsm);
- }
+ nat44_ed_worker_db_free (tsm);
}
+
+ nat44_ed_flow_hash_free ();
}
void
nat44_ed_sessions_clear ()
{
snat_main_t *sm = &snat_main;
- snat_main_per_thread_data_t *tsm;
-
- reinit_ed_flow_hash ();
- if (sm->pat)
- {
- vec_foreach (tsm, sm->per_thread_data)
- {
-
- nat44_ed_worker_db_free (tsm);
- nat44_ed_worker_db_init (tsm, sm->max_translations_per_thread,
- sm->translation_buckets);
- }
- }
+ nat44_ed_db_free ();
+ nat44_ed_db_init ();
vlib_zero_simple_counter (&sm->total_sessions, 0);
}
static void
-nat_ip4_add_del_addr_only_sm_cb (ip4_main_t * im,
- uword opaque,
- u32 sw_if_index,
- ip4_address_t * address,
- u32 address_length,
- u32 if_address_index, u32 is_delete)
+nat44_ed_add_del_static_mapping_cb (ip4_main_t *im, uword opaque,
+ u32 sw_if_index, ip4_address_t *address,
+ u32 address_length, u32 if_address_index,
+ u32 is_delete)
{
+ snat_static_mapping_resolve_t *rp;
snat_main_t *sm = &snat_main;
- snat_static_map_resolve_t *rp;
- snat_static_mapping_t *m;
- clib_bihash_kv_8_8_t kv, value;
- ip4_address_t l_addr;
- int i, rv;
+ int rv = 0;
if (!sm->enabled)
- return;
-
- for (i = 0; i < vec_len (sm->to_resolve); i++)
{
- rp = sm->to_resolve + i;
- if (rp->addr_only == 0)
- continue;
- if (rp->sw_if_index == sw_if_index)
- goto match;
+ return;
}
- return;
-
-match:
- init_nat_k (&kv, *address, rp->addr_only ? 0 : rp->e_port,
- sm->outside_fib_index, rp->addr_only ? 0 : rp->proto);
- if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
- m = 0;
- else
- m = pool_elt_at_index (sm->static_mappings, value.value);
-
- if (!is_delete)
+ vec_foreach (rp, sm->sm_to_resolve)
{
- /* Don't trip over lease renewal, static config */
- if (m)
- return;
- }
- else
- {
- if (!m)
- return;
+ if (sw_if_index == rp->sw_if_index)
+ {
+ if (is_delete)
+ {
+ if (rp->is_resolved)
+ {
+ rv = nat44_ed_del_static_mapping_internal (
+ rp->l_addr, address[0], rp->l_port, rp->e_port, rp->proto,
+ rp->vrf_id, rp->flags);
+ if (rv)
+ {
+ nat_log_err ("ed del static mapping failed");
+ }
+ else
+ {
+ rp->is_resolved = 0;
+ }
+ }
+ }
+ else
+ {
+ if (!rp->is_resolved)
+ {
+ rv = nat44_ed_add_static_mapping_internal (
+ rp->l_addr, address[0], rp->l_port, rp->e_port, rp->proto,
+ rp->vrf_id, ~0, rp->flags, rp->pool_addr, rp->tag);
+ if (rv)
+ {
+ nat_log_err ("ed add static mapping failed");
+ }
+ else
+ {
+ rp->is_resolved = 1;
+ }
+ }
+ }
+ }
}
+}
- /* Indetity mapping? */
- if (rp->l_addr.as_u32 == 0)
- l_addr.as_u32 = address[0].as_u32;
- else
- l_addr.as_u32 = rp->l_addr.as_u32;
+static int
+nat44_ed_get_addr_resolve_record (u32 sw_if_index, u8 twice_nat, int *out)
+{
+ snat_main_t *sm = &snat_main;
+ snat_address_resolve_t *rp;
+ int i;
- if (is_delete)
+ for (i = 0; i < vec_len (sm->addr_to_resolve); i++)
{
- rv = nat44_ed_del_static_mapping (l_addr, address[0], rp->l_port,
- rp->e_port, rp->proto, rp->vrf_id, ~0,
- rp->flags);
- }
- else
- {
- rv = nat44_ed_add_static_mapping (l_addr, address[0], rp->l_port,
- rp->e_port, rp->proto, rp->vrf_id, ~0,
- rp->flags, rp->pool_addr, rp->tag);
+ rp = sm->addr_to_resolve + i;
+
+ if ((rp->sw_if_index == sw_if_index) && (rp->is_twice_nat == twice_nat))
+ {
+ if (out)
+ {
+ *out = i;
+ }
+ return 0;
+ }
}
- if (rv)
+ return 1;
+}
+static int
+nat44_ed_del_addr_resolve_record (u32 sw_if_index, u8 twice_nat)
+{
+ snat_main_t *sm = &snat_main;
+ int i;
+ if (!nat44_ed_get_addr_resolve_record (sw_if_index, twice_nat, &i))
{
- nat_elog_notice_X1 (sm, "add_static_mapping returned %d", "i4", rv);
+ vec_del1 (sm->addr_to_resolve, i);
+ return 0;
}
+ return 1;
}
static void
-snat_ip4_add_del_interface_address_cb (ip4_main_t * im,
- uword opaque,
- u32 sw_if_index,
- ip4_address_t * address,
+nat44_ed_add_del_interface_address_cb (ip4_main_t *im, uword opaque,
+ u32 sw_if_index, ip4_address_t *address,
u32 address_length,
u32 if_address_index, u32 is_delete)
{
snat_main_t *sm = &snat_main;
- snat_static_map_resolve_t *rp;
- ip4_address_t l_addr;
- int i, j;
- int rv;
+ snat_address_resolve_t *arp;
+ snat_address_t *ap;
u8 twice_nat = 0;
- snat_address_t *addresses = sm->addresses;
+ int i, rv;
if (!sm->enabled)
- return;
-
- for (i = 0; i < vec_len (sm->auto_add_sw_if_indices); i++)
{
- if (sw_if_index == sm->auto_add_sw_if_indices[i])
- goto match;
+ return;
}
- for (i = 0; i < vec_len (sm->auto_add_sw_if_indices_twice_nat); i++)
+ if (nat44_ed_get_addr_resolve_record (sw_if_index, twice_nat, &i))
{
twice_nat = 1;
- addresses = sm->twice_nat_addresses;
- if (sw_if_index == sm->auto_add_sw_if_indices_twice_nat[i])
- goto match;
+ if (nat44_ed_get_addr_resolve_record (sw_if_index, twice_nat, &i))
+ {
+ u32 fib_index =
+ ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
+ vec_foreach (ap, sm->addresses)
+ {
+ if ((fib_index == ap->fib_index) &&
+ (address->as_u32 == ap->addr.as_u32))
+ {
+ if (!is_delete)
+ {
+ ap->addr_len = address_length;
+ ap->sw_if_index = sw_if_index;
+ ap->net.as_u32 =
+ ap->addr.as_u32 & ip4_main.fib_masks[ap->addr_len];
+
+ nat_log_debug (
+ "pool addr %U binds to -> sw_if_idx: %u net: %U/%u",
+ format_ip4_address, &ap->addr, ap->sw_if_index,
+ format_ip4_address, &ap->net, ap->addr_len);
+ }
+ else
+ {
+ ap->addr_len = ~0;
+ }
+ break;
+ }
+ }
+ return;
+ }
}
- return;
+ arp = sm->addr_to_resolve + i;
-match:
if (!is_delete)
{
- /* Don't trip over lease renewal, static config */
- for (j = 0; j < vec_len (addresses); j++)
- if (addresses[j].addr.as_u32 == address->as_u32)
+ if (arp->is_resolved)
+ {
return;
+ }
- (void) snat_add_address (sm, address, ~0, twice_nat);
- /* Scan static map resolution vector */
- for (j = 0; j < vec_len (sm->to_resolve); j++)
+ rv = nat44_ed_add_address (address, ~0, arp->is_twice_nat);
+ if (0 == rv)
{
- rp = sm->to_resolve + j;
- if (rp->addr_only)
- continue;
- /* On this interface? */
- if (rp->sw_if_index == sw_if_index)
- {
-
- // TODO: remove if not needed (handled by function)
- /* Indetity mapping? */
- if (rp->l_addr.as_u32 == 0)
- l_addr.as_u32 = address[0].as_u32;
- else
- l_addr.as_u32 = rp->l_addr.as_u32;
-
- /* Add the static mapping */
- rv = nat44_ed_add_static_mapping (
- l_addr, address[0], rp->l_port, rp->e_port, rp->proto,
- rp->vrf_id, ~0, rp->flags, rp->pool_addr, rp->tag);
- if (rv)
- {
- nat_elog_notice_X1 (sm, "add_static_mapping returned %d",
- "i4", rv);
- }
- }
+ arp->is_resolved = 1;
}
- return;
}
else
{
- (void) snat_del_address (sm, address[0], 1, twice_nat);
- return;
+ if (!arp->is_resolved)
+ {
+ return;
+ }
+
+ rv = nat44_ed_del_address (address[0], arp->is_twice_nat);
+ if (0 == rv)
+ {
+ arp->is_resolved = 0;
+ }
}
}
int
-snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del,
- u8 twice_nat)
+nat44_ed_add_interface_address (u32 sw_if_index, u8 twice_nat)
{
+ snat_main_t *sm = &snat_main;
ip4_main_t *ip4_main = sm->ip4_main;
ip4_address_t *first_int_addr;
- snat_static_map_resolve_t *rp;
- u32 *indices_to_delete = 0;
- int i, j;
- u32 *auto_add_sw_if_indices =
- twice_nat ? sm->
- auto_add_sw_if_indices_twice_nat : sm->auto_add_sw_if_indices;
+ snat_address_resolve_t *ap;
+ int rv;
- first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0 /* just want the address */
- );
+ if (!sm->enabled)
+ {
+ nat_log_err ("nat44 is disabled");
+ return VNET_API_ERROR_UNSUPPORTED;
+ }
- for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
+ if (!nat44_ed_get_addr_resolve_record (sw_if_index, twice_nat, 0))
{
- if (auto_add_sw_if_indices[i] == sw_if_index)
- {
- if (is_del)
- {
- /* if have address remove it */
- if (first_int_addr)
- (void) snat_del_address (sm, first_int_addr[0], 1, twice_nat);
- else
- {
- for (j = 0; j < vec_len (sm->to_resolve); j++)
- {
- rp = sm->to_resolve + j;
- if (rp->sw_if_index == sw_if_index)
- vec_add1 (indices_to_delete, j);
- }
- if (vec_len (indices_to_delete))
- {
- for (j = vec_len (indices_to_delete) - 1; j >= 0; j--)
- vec_del1 (sm->to_resolve, j);
- vec_free (indices_to_delete);
- }
- }
- if (twice_nat)
- vec_del1 (sm->auto_add_sw_if_indices_twice_nat, i);
- else
- vec_del1 (sm->auto_add_sw_if_indices, i);
- }
- else
- return VNET_API_ERROR_VALUE_EXIST;
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
- return 0;
+ vec_add2 (sm->addr_to_resolve, ap, 1);
+ ap->sw_if_index = sw_if_index;
+ ap->is_twice_nat = twice_nat;
+ ap->is_resolved = 0;
+
+ first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0);
+ if (first_int_addr)
+ {
+ rv = nat44_ed_add_address (first_int_addr, ~0, twice_nat);
+ if (0 != rv)
+ {
+ nat44_ed_del_addr_resolve_record (sw_if_index, twice_nat);
+ return rv;
}
+ ap->is_resolved = 1;
}
- if (is_del)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
+ return 0;
+}
- /* add to the auto-address list */
- if (twice_nat)
- vec_add1 (sm->auto_add_sw_if_indices_twice_nat, sw_if_index);
- else
- vec_add1 (sm->auto_add_sw_if_indices, sw_if_index);
+int
+nat44_ed_del_interface_address (u32 sw_if_index, u8 twice_nat)
+{
+ snat_main_t *sm = &snat_main;
+ ip4_main_t *ip4_main = sm->ip4_main;
+ ip4_address_t *first_int_addr;
- /* If the address is already bound - or static - add it now */
+ if (!sm->enabled)
+ {
+ nat_log_err ("nat44 is disabled");
+ return VNET_API_ERROR_UNSUPPORTED;
+ }
+
+ if (nat44_ed_del_addr_resolve_record (sw_if_index, twice_nat))
+ {
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0);
if (first_int_addr)
- (void) snat_add_address (sm, first_int_addr, ~0, twice_nat);
+ {
+ return nat44_ed_del_address (first_int_addr[0], twice_nat);
+ }
return 0;
}
int
-nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
- ip4_address_t * eh_addr, u16 eh_port, u8 proto,
+nat44_ed_del_session (snat_main_t *sm, ip4_address_t *addr, u16 port,
+ ip4_address_t *eh_addr, u16 eh_port, u8 proto,
u32 vrf_id, int is_in)
{
ip4_header_t ip;
clib_bihash_kv_16_8_t kv, value;
- u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
+ u32 fib_index;
snat_session_t *s;
snat_main_per_thread_data_t *tsm;
+ if (!sm->enabled)
+ {
+ return VNET_API_ERROR_UNSUPPORTED;
+ }
+
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
ip.dst_address.as_u32 = ip.src_address.as_u32 = addr->as_u32;
if (sm->num_workers > 1)
tsm = vec_elt_at_index (
@@ -3481,7 +3630,8 @@ nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
else
tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
- init_ed_k (&kv, *addr, port, *eh_addr, eh_port, fib_index, proto);
+ init_ed_k (&kv, addr->as_u32, port, eh_addr->as_u32, eh_port, fib_index,
+ proto);
if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
{
return VNET_API_ERROR_NO_SUCH_ENTRY;
@@ -3490,7 +3640,7 @@ nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
if (pool_is_free_index (tsm->sessions, ed_value_get_session_index (&value)))
return VNET_API_ERROR_UNSPECIFIED;
s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
- nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
+ nat44_ed_free_session_data (sm, s, tsm - sm->per_thread_data, 0);
nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1);
return 0;
}
@@ -3596,13 +3746,13 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
static_always_inline void
nat_6t_flow_ip4_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
- nat_6t_flow_t *f, nat_protocol_t proto,
+ nat_6t_flow_t *f, ip_protocol_t proto,
int is_icmp_inner_ip4, int skip_saddr_rewrite)
{
udp_header_t *udp = ip4_next_header (ip);
tcp_header_t *tcp = (tcp_header_t *) udp;
- if ((NAT_PROTOCOL_TCP == proto || NAT_PROTOCOL_UDP == proto) &&
+ if ((IP_PROTOCOL_TCP == proto || IP_PROTOCOL_UDP == proto) &&
!vnet_buffer (b)->ip.reass.is_non_first_fragment)
{
if (!is_icmp_inner_ip4)
@@ -3620,7 +3770,7 @@ nat_6t_flow_ip4_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
udp->dst_port = f->rewrite.sport;
}
- if (NAT_PROTOCOL_TCP == proto)
+ if (IP_PROTOCOL_TCP == proto)
{
ip_csum_t tcp_sum = tcp->checksum;
tcp_sum = ip_csum_sub_even (tcp_sum, f->l3_csum_delta);
@@ -3628,7 +3778,7 @@ nat_6t_flow_ip4_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
mss_clamping (sm->mss_clamping, tcp, &tcp_sum);
tcp->checksum = ip_csum_fold (tcp_sum);
}
- else if (proto == NAT_PROTOCOL_UDP && udp->checksum)
+ else if (IP_PROTOCOL_UDP == proto && udp->checksum)
{
ip_csum_t udp_sum = udp->checksum;
udp_sum = ip_csum_sub_even (udp_sum, f->l3_csum_delta);
@@ -3685,7 +3835,7 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED;
icmp46_header_t *icmp = ip4_next_header (ip);
- icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1);
+ nat_icmp_echo_header_t *echo = (nat_icmp_echo_header_t *) (icmp + 1);
if ((!vnet_buffer (b)->ip.reass.is_non_first_fragment))
{
@@ -3694,15 +3844,6 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
return NAT_ED_TRNSL_ERR_PACKET_TRUNCATED;
}
- ssize_t icmp_offset = (u8 *) icmp - (u8 *) vlib_buffer_get_current (b);
- ip_csum_t sum =
- ip_incremental_checksum (0, icmp, b->current_length - icmp_offset);
- sum = (u16) ~ip_csum_fold (sum);
- if (sum != 0)
- {
- return NAT_ED_TRNSL_ERR_INVALID_CSUM;
- }
-
if (!icmp_type_is_error_message (icmp->type))
{
if ((f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE) &&
@@ -3710,7 +3851,7 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
{
ip_csum_t sum = icmp->checksum;
sum = ip_csum_update (sum, echo->identifier, f->rewrite.icmp_id,
- icmp_echo_header_t,
+ nat_icmp_echo_header_t,
identifier /* changed member */);
echo->identifier = f->rewrite.icmp_id;
icmp->checksum = ip_csum_fold (sum);
@@ -3718,6 +3859,15 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
}
else
{
+ ip_csum_t sum = ip_incremental_checksum (
+ 0, icmp,
+ clib_net_to_host_u16 (ip->length) - ip4_header_bytes (ip));
+ sum = (u16) ~ip_csum_fold (sum);
+ if (sum != 0)
+ {
+ return NAT_ED_TRNSL_ERR_INVALID_CSUM;
+ }
+
// errors are not fragmented
ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1);
@@ -3726,8 +3876,7 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
return NAT_ED_TRNSL_ERR_INNER_IP_CORRUPT;
}
- nat_protocol_t inner_proto =
- ip_proto_to_nat_proto (inner_ip->protocol);
+ ip_protocol_t inner_proto = inner_ip->protocol;
ip_csum_t old_icmp_sum = icmp->checksum;
ip_csum_t old_inner_ip_sum = inner_ip->checksum;
@@ -3739,7 +3888,7 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
switch (inner_proto)
{
- case NAT_PROTOCOL_UDP:
+ case IP_PROTOCOL_UDP:
udp = (udp_header_t *) (inner_ip + 1);
if (!it_fits (vm, b, udp, sizeof (*udp)))
{
@@ -3760,7 +3909,7 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
new_icmp_sum = ip_csum_fold (new_icmp_sum);
icmp->checksum = new_icmp_sum;
break;
- case NAT_PROTOCOL_TCP:
+ case IP_PROTOCOL_TCP:
tcp = (tcp_header_t *) (inner_ip + 1);
if (!it_fits (vm, b, tcp, sizeof (*tcp)))
{
@@ -3781,7 +3930,10 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
new_icmp_sum = ip_csum_fold (new_icmp_sum);
icmp->checksum = new_icmp_sum;
break;
- case NAT_PROTOCOL_ICMP:
+ case IP_PROTOCOL_ICMP:
+ nat_6t_flow_ip4_translate (sm, b, inner_ip, f, inner_proto,
+ 1 /* is_icmp_inner_ip4 */,
+ 0 /* skip_saddr_rewrite */);
if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
{
icmp46_header_t *inner_icmp = ip4_next_header (inner_ip);
@@ -3789,19 +3941,21 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
{
return NAT_ED_TRNSL_ERR_PACKET_TRUNCATED;
}
- icmp_echo_header_t *inner_echo =
- (icmp_echo_header_t *) (inner_icmp + 1);
+ nat_icmp_echo_header_t *inner_echo =
+ (nat_icmp_echo_header_t *) (inner_icmp + 1);
if (f->rewrite.icmp_id != inner_echo->identifier)
{
ip_csum_t sum = icmp->checksum;
- sum = ip_csum_update (
- sum, inner_echo->identifier, f->rewrite.icmp_id,
- icmp_echo_header_t, identifier /* changed member */);
+ sum = ip_csum_update (sum, inner_echo->identifier,
+ f->rewrite.icmp_id,
+ nat_icmp_echo_header_t,
+ identifier /* changed member */);
icmp->checksum = ip_csum_fold (sum);
ip_csum_t inner_sum = inner_icmp->checksum;
inner_sum = ip_csum_update (
sum, inner_echo->identifier, f->rewrite.icmp_id,
- icmp_echo_header_t, identifier /* changed member */);
+ nat_icmp_echo_header_t,
+ identifier /* changed member */);
inner_icmp->checksum = ip_csum_fold (inner_sum);
inner_echo->identifier = f->rewrite.icmp_id;
}
@@ -3820,7 +3974,7 @@ nat_6t_flow_icmp_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
static_always_inline nat_translation_error_e
nat_6t_flow_buf_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
ip4_header_t *ip, nat_6t_flow_t *f,
- nat_protocol_t proto, int is_output_feature,
+ ip_protocol_t proto, int is_output_feature,
int is_i2o)
{
if (!is_output_feature && f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
@@ -3828,7 +3982,7 @@ nat_6t_flow_buf_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
vnet_buffer (b)->sw_if_index[VLIB_TX] = f->rewrite.fib_index;
}
- if (NAT_PROTOCOL_ICMP == proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
if (ip->src_address.as_u32 != f->rewrite.saddr.as_u32)
{
@@ -3856,7 +4010,7 @@ nat_6t_flow_buf_translate (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
nat_translation_error_e
nat_6t_flow_buf_translate_i2o (vlib_main_t *vm, snat_main_t *sm,
vlib_buffer_t *b, ip4_header_t *ip,
- nat_6t_flow_t *f, nat_protocol_t proto,
+ nat_6t_flow_t *f, ip_protocol_t proto,
int is_output_feature)
{
return nat_6t_flow_buf_translate (vm, sm, b, ip, f, proto, is_output_feature,
@@ -3866,116 +4020,130 @@ nat_6t_flow_buf_translate_i2o (vlib_main_t *vm, snat_main_t *sm,
nat_translation_error_e
nat_6t_flow_buf_translate_o2i (vlib_main_t *vm, snat_main_t *sm,
vlib_buffer_t *b, ip4_header_t *ip,
- nat_6t_flow_t *f, nat_protocol_t proto,
+ nat_6t_flow_t *f, ip_protocol_t proto,
int is_output_feature)
{
return nat_6t_flow_buf_translate (vm, sm, b, ip, f, proto, is_output_feature,
0 /* is_i2o */);
}
-u8 *
-format_nat_6t (u8 *s, va_list *args)
+static_always_inline void
+nat_syslog_nat44_sess (u32 ssubix, u32 sfibix, ip4_address_t *isaddr,
+ u16 isport, ip4_address_t *xsaddr, u16 xsport,
+ ip4_address_t *idaddr, u16 idport,
+ ip4_address_t *xdaddr, u16 xdport, u8 proto, u8 is_add,
+ u8 is_twicenat)
{
- nat_6t_t *t = va_arg (*args, nat_6t_t *);
+ syslog_msg_t syslog_msg;
+ fib_table_t *fib;
- s = format (s, "saddr %U sport %u daddr %U dport %u proto %U fib_idx %u",
- format_ip4_address, t->saddr.as_u8,
- clib_net_to_host_u16 (t->sport), format_ip4_address,
- t->daddr.as_u8, clib_net_to_host_u16 (t->dport),
- format_ip_protocol, t->proto, t->fib_index);
- return s;
+ if (!syslog_is_enabled ())
+ return;
+
+ if (syslog_severity_filter_block (SADD_SDEL_SEVERITY))
+ return;
+
+ fib = fib_table_get (sfibix, FIB_PROTOCOL_IP4);
+
+ syslog_msg_init (&syslog_msg, NAT_FACILITY, SADD_SDEL_SEVERITY, NAT_APPNAME,
+ is_add ? SADD_MSGID : SDEL_MSGID);
+
+ syslog_msg_sd_init (&syslog_msg, NSESS_SDID);
+ syslog_msg_add_sd_param (&syslog_msg, SSUBIX_SDPARAM_NAME, "%d", ssubix);
+ syslog_msg_add_sd_param (&syslog_msg, SVLAN_SDPARAM_NAME, "%d",
+ fib->ft_table_id);
+ syslog_msg_add_sd_param (&syslog_msg, IATYP_SDPARAM_NAME, IATYP_IPV4);
+ syslog_msg_add_sd_param (&syslog_msg, ISADDR_SDPARAM_NAME, "%U",
+ format_ip4_address, isaddr);
+ syslog_msg_add_sd_param (&syslog_msg, ISPORT_SDPARAM_NAME, "%d",
+ clib_net_to_host_u16 (isport));
+ syslog_msg_add_sd_param (&syslog_msg, XATYP_SDPARAM_NAME, IATYP_IPV4);
+ syslog_msg_add_sd_param (&syslog_msg, XSADDR_SDPARAM_NAME, "%U",
+ format_ip4_address, xsaddr);
+ syslog_msg_add_sd_param (&syslog_msg, XSPORT_SDPARAM_NAME, "%d",
+ clib_net_to_host_u16 (xsport));
+ syslog_msg_add_sd_param (&syslog_msg, PROTO_SDPARAM_NAME, "%d", proto);
+ syslog_msg_add_sd_param (&syslog_msg, XDADDR_SDPARAM_NAME, "%U",
+ format_ip4_address, xdaddr);
+ syslog_msg_add_sd_param (&syslog_msg, XDPORT_SDPARAM_NAME, "%d",
+ clib_net_to_host_u16 (xdport));
+ if (is_twicenat)
+ {
+ syslog_msg_add_sd_param (&syslog_msg, IDADDR_SDPARAM_NAME, "%U",
+ format_ip4_address, idaddr);
+ syslog_msg_add_sd_param (&syslog_msg, IDPORT_SDPARAM_NAME, "%d",
+ clib_net_to_host_u16 (idport));
+ }
+
+ syslog_msg_send (&syslog_msg);
}
-u8 *
-format_nat_ed_translation_error (u8 *s, va_list *args)
+void
+nat_syslog_nat44_sadd (u32 ssubix, u32 sfibix, ip4_address_t *isaddr,
+ u16 isport, ip4_address_t *idaddr, u16 idport,
+ ip4_address_t *xsaddr, u16 xsport,
+ ip4_address_t *xdaddr, u16 xdport, u8 proto,
+ u8 is_twicenat)
{
- nat_translation_error_e e = va_arg (*args, nat_translation_error_e);
-
- switch (e)
- {
- case NAT_ED_TRNSL_ERR_SUCCESS:
- s = format (s, "success");
- break;
- case NAT_ED_TRNSL_ERR_TRANSLATION_FAILED:
- s = format (s, "translation-failed");
- break;
- case NAT_ED_TRNSL_ERR_FLOW_MISMATCH:
- s = format (s, "flow-mismatch");
- break;
- case NAT_ED_TRNSL_ERR_PACKET_TRUNCATED:
- s = format (s, "packet-truncated");
- break;
- case NAT_ED_TRNSL_ERR_INNER_IP_CORRUPT:
- s = format (s, "inner-ip-corrupted");
- break;
- case NAT_ED_TRNSL_ERR_INVALID_CSUM:
- s = format (s, "invalid-checksum");
- break;
- }
- return s;
+ nat_syslog_nat44_sess (ssubix, sfibix, isaddr, isport, xsaddr, xsport,
+ idaddr, idport, xdaddr, xdport, proto, 1,
+ is_twicenat);
}
-u8 *
-format_nat_6t_flow (u8 *s, va_list *args)
+void
+nat_syslog_nat44_sdel (u32 ssubix, u32 sfibix, ip4_address_t *isaddr,
+ u16 isport, ip4_address_t *idaddr, u16 idport,
+ ip4_address_t *xsaddr, u16 xsport,
+ ip4_address_t *xdaddr, u16 xdport, u8 proto,
+ u8 is_twicenat)
+{
+ nat_syslog_nat44_sess (ssubix, sfibix, isaddr, isport, xsaddr, xsport,
+ idaddr, idport, xdaddr, xdport, proto, 0,
+ is_twicenat);
+}
+__clib_export void
+nat44_original_dst_lookup (ip4_address_t *i2o_src, u16 i2o_src_port,
+ ip4_address_t *i2o_dst, u16 i2o_dst_port,
+ ip_protocol_t proto, u32 *original_dst,
+ u16 *original_dst_port)
{
- nat_6t_flow_t *f = va_arg (*args, nat_6t_flow_t *);
+ snat_main_per_thread_data_t *tsm;
+ snat_main_t *sm = &snat_main;
+ u32 fib_index = 0;
+ snat_session_t *s;
+ ip4_header_t ip;
- s = format (s, "match: %U ", format_nat_6t, &f->match);
- int r = 0;
- if (f->ops & NAT_FLOW_OP_SADDR_REWRITE)
- {
- s = format (s, "rewrite: saddr %U ", format_ip4_address,
- f->rewrite.saddr.as_u8);
- r = 1;
- }
- if (f->ops & NAT_FLOW_OP_SPORT_REWRITE)
- {
- if (!r)
- {
- s = format (s, "rewrite: ");
- r = 1;
- }
- s = format (s, "sport %u ", clib_net_to_host_u16 (f->rewrite.sport));
- }
- if (f->ops & NAT_FLOW_OP_DADDR_REWRITE)
+ ip.src_address.as_u32 = i2o_src->as_u32;
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, 0);
+
+ if (sm->num_workers > 1)
{
- if (!r)
- {
- s = format (s, "rewrite: ");
- r = 1;
- }
- s = format (s, "daddr %U ", format_ip4_address, f->rewrite.daddr.as_u8);
+ tsm = vec_elt_at_index (
+ sm->per_thread_data,
+ nat44_ed_get_in2out_worker_index (0, &ip, fib_index, 0));
}
- if (f->ops & NAT_FLOW_OP_DPORT_REWRITE)
+ else
{
- if (!r)
- {
- s = format (s, "rewrite: ");
- r = 1;
- }
- s = format (s, "dport %u ", clib_net_to_host_u16 (f->rewrite.dport));
+ tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
}
- if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
+
+ /* query */
+ clib_bihash_kv_16_8_t kv = { 0 }, value;
+ init_ed_k (&kv, i2o_src->as_u32, i2o_src_port, i2o_dst->as_u32, i2o_dst_port,
+ fib_index, proto);
+ if (tsm->sessions == NULL ||
+ clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
{
- if (!r)
- {
- s = format (s, "rewrite: ");
- r = 1;
- }
- s = format (s, "icmp-id %u ", clib_net_to_host_u16 (f->rewrite.icmp_id));
+ return;
}
- if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
+ s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value));
+ if (s)
{
- if (!r)
- {
- s = format (s, "rewrite: ");
- r = 1;
- }
- s = format (s, "txfib %u ", f->rewrite.fib_index);
+ *original_dst = s->i2o.rewrite.saddr.as_u32;
+ *original_dst_port = s->i2o.rewrite.sport;
}
- return s;
+ return;
}
-
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/nat/nat44-ed/nat44_ed.h b/src/plugins/nat/nat44-ed/nat44_ed.h
index b74b46f81b7..706511475cf 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed.h
+++ b/src/plugins/nat/nat44-ed/nat44_ed.h
@@ -12,10 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-/**
- * @file nat.c
- * NAT plugin global declarations
- */
+
#ifndef __included_nat44_ed_h__
#define __included_nat44_ed_h__
@@ -39,6 +36,16 @@
/* default number of worker handoff frame queue elements */
#define NAT_FQ_NELTS_DEFAULT 64
+/* number of attempts to get a port for ED overloading algorithm, if rolling
+ * a dice this many times doesn't produce a free port, it's treated
+ * as if there were no free ports available to conserve resources */
+#define ED_PORT_ALLOC_ATTEMPTS (10)
+
+/* system ports range is 0-1023, first user port is 1024 per
+ * https://www.rfc-editor.org/rfc/rfc6335#section-6
+ */
+#define ED_USER_PORT_OFFSET 1024
+
/* NAT buffer flags */
#define SNAT_FLAG_HAIRPINNING (1 << 0)
@@ -58,16 +65,9 @@ typedef enum nat44_config_flags_t_
typedef struct
{
- /* nat44 plugin features */
- u8 static_mapping_only;
- u8 connection_tracking;
-
u32 inside_vrf;
u32 outside_vrf;
-
- /* maximum number of sessions */
u32 sessions;
-
} nat44_config_t;
typedef enum
@@ -91,46 +91,12 @@ typedef struct
u32 arc_next_index;
} nat_pre_trace_t;
-/* External address and port allocation modes */
-#define foreach_nat_addr_and_port_alloc_alg \
- _(0, DEFAULT, "default") \
- _(1, MAPE, "map-e") \
- _(2, RANGE, "port-range")
-
-typedef enum
-{
-#define _(v, N, s) NAT_ADDR_AND_PORT_ALLOC_ALG_##N = v,
- foreach_nat_addr_and_port_alloc_alg
-#undef _
-} nat_addr_and_port_alloc_alg_t;
-
-/* Session state */
-#define foreach_snat_session_state \
- _(0, UNKNOWN, "unknown") \
- _(1, UDP_ACTIVE, "udp-active") \
- _(2, TCP_SYN_SENT, "tcp-syn-sent") \
- _(3, TCP_ESTABLISHED, "tcp-established") \
- _(4, TCP_FIN_WAIT, "tcp-fin-wait") \
- _(5, TCP_CLOSE_WAIT, "tcp-close-wait") \
- _(6, TCP_CLOSING, "tcp-closing") \
- _(7, TCP_LAST_ACK, "tcp-last-ack") \
- _(8, TCP_CLOSED, "tcp-closed") \
- _(9, ICMP_ACTIVE, "icmp-active")
-
-typedef enum
-{
-#define _(v, N, s) SNAT_SESSION_##N = v,
- foreach_snat_session_state
-#undef _
-} snat_session_state_t;
-
#define foreach_nat_in2out_ed_error \
_ (UNSUPPORTED_PROTOCOL, "unsupported protocol") \
_ (OUT_OF_PORTS, "out of ports") \
_ (BAD_ICMP_TYPE, "unsupported ICMP type") \
_ (MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded") \
_ (NON_SYN, "non-SYN packet try to create session") \
- _ (TCP_CLOSED, "drops due to TCP in transitory timeout") \
_ (TRNSL_FAILED, "couldn't translate packet")
typedef enum
@@ -160,19 +126,42 @@ typedef enum
NAT_OUT2IN_ED_N_ERROR,
} nat_out2in_ed_error_t;
+typedef enum
+{
+ NAT44_ED_TCP_FLAG_FIN = 0,
+ NAT44_ED_TCP_FLAG_SYN,
+ NAT44_ED_TCP_FLAG_RST,
+ NAT44_ED_TCP_FLAG_ACK,
+ NAT44_ED_TCP_N_FLAG,
+} nat44_ed_tcp_flag_e;
+
+typedef enum
+{
+ NAT44_ED_DIR_I2O = 0,
+ NAT44_ED_DIR_O2I,
+ NAT44_ED_N_DIR,
+} nat44_ed_dir_e;
/* Endpoint dependent TCP session state */
-#define NAT44_SES_I2O_FIN 1
-#define NAT44_SES_O2I_FIN 2
-#define NAT44_SES_I2O_FIN_ACK 4
-#define NAT44_SES_O2I_FIN_ACK 8
-#define NAT44_SES_I2O_SYN 16
-#define NAT44_SES_O2I_SYN 32
-#define NAT44_SES_RST 64
+typedef enum
+{
+ NAT44_ED_TCP_STATE_CLOSED = 0,
+ NAT44_ED_TCP_STATE_ESTABLISHED,
+ NAT44_ED_TCP_STATE_CLOSING,
+ NAT44_ED_TCP_N_STATE,
+} nat44_ed_tcp_state_e;
+
+format_function_t format_ed_session_kvp;
+format_function_t format_snat_session;
+format_function_t format_snat_static_mapping;
+format_function_t format_snat_static_map_to_resolve;
+format_function_t format_nat_ed_translation_error;
+format_function_t format_nat_6t_flow;
+format_function_t format_nat_6t;
+format_function_t format_nat44_ed_tcp_state;
/* Session flags */
#define SNAT_SESSION_FLAG_STATIC_MAPPING (1 << 0)
-#define SNAT_SESSION_FLAG_UNKNOWN_PROTO (1 << 1)
#define SNAT_SESSION_FLAG_LOAD_BALANCING (1 << 2)
#define SNAT_SESSION_FLAG_TWICE_NAT (1 << 3)
#define SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT (1 << 4)
@@ -308,7 +297,7 @@ typedef CLIB_PACKED(struct
u16 port;
} in2out;
- nat_protocol_t nat_proto;
+ ip_protocol_t proto;
nat_6t_flow_t i2o;
nat_6t_flow_t o2i;
@@ -341,10 +330,8 @@ typedef CLIB_PACKED(struct
u16 ext_host_nat_port;
/* TCP session state */
- u8 state;
- u32 i2o_fin_seq;
- u32 o2i_fin_seq;
- u64 tcp_closed_timestamp;
+ u8 tcp_flags[NAT44_ED_N_DIR];
+ nat44_ed_tcp_state_e tcp_state;
/* per vrf sessions index */
u32 per_vrf_sessions_index;
@@ -355,29 +342,14 @@ typedef CLIB_PACKED(struct
typedef struct
{
ip4_address_t addr;
+ ip4_address_t net;
+ u32 sw_if_index;
u32 fib_index;
-#define _(N, i, n, s) \
- u32 busy_##n##_ports; \
- u32 * busy_##n##_ports_per_thread; \
- u32 busy_##n##_port_refcounts[65535];
- foreach_nat_protocol
-#undef _
+ u32 addr_len;
} snat_address_t;
typedef struct
{
- u32 fib_index;
- u32 ref_count;
-} nat_fib_t;
-
-typedef struct
-{
- u32 fib_index;
- u32 refcount;
-} nat_outside_fib_t;
-
-typedef struct
-{
/* backend IP address */
ip4_address_t addr;
/* backend port number */
@@ -412,7 +384,7 @@ typedef enum
typedef struct
{
- /* prefered pool address */
+ /* preferred pool address */
ip4_address_t pool_addr;
/* local IP address */
ip4_address_t local_addr;
@@ -426,7 +398,7 @@ typedef struct
u32 vrf_id;
u32 fib_index;
/* protocol */
- nat_protocol_t proto;
+ ip_protocol_t proto;
/* 0 = disabled, otherwise client IP affinity sticky time in seconds */
u32 affinity;
/* worker threads used by backends/local host */
@@ -449,21 +421,31 @@ typedef struct
typedef struct
{
+ u8 is_resolved;
ip4_address_t l_addr;
ip4_address_t pool_addr;
u16 l_port;
u16 e_port;
u32 sw_if_index;
u32 vrf_id;
- nat_protocol_t proto;
+ ip_protocol_t proto;
u32 flags;
- int addr_only;
- int twice_nat;
- int out2in_only;
- int identity_nat;
- int exact;
u8 *tag;
-} snat_static_map_resolve_t;
+} snat_static_mapping_resolve_t;
+
+typedef struct
+{
+ u8 is_resolved;
+ u8 is_twice_nat;
+ u32 sw_if_index;
+} snat_address_resolve_t;
+
+typedef struct
+{
+ u32 count;
+ u32 sw_if_index;
+ ip4_address_t addr;
+} snat_fib_entry_reg_t;
typedef struct
{
@@ -487,7 +469,7 @@ typedef struct
/* real thread index */
u32 thread_index;
- per_vrf_sessions_t *per_vrf_sessions_vec;
+ per_vrf_sessions_t *per_vrf_sessions_pool;
} snat_main_per_thread_data_t;
@@ -498,17 +480,24 @@ u32 nat44_ed_get_in2out_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
u32 nat44_ed_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip,
u32 rx_fib_index, u8 is_output);
-/* Return worker thread index for given packet */
-/* NAT address and port allocation function */
-typedef int (nat_alloc_out_addr_and_port_function_t) (snat_address_t *
- addresses,
- u32 fib_index,
- u32 thread_index,
- nat_protocol_t proto,
- ip4_address_t * addr,
- u16 * port,
- u16 port_per_thread,
- u32 snat_thread_index);
+typedef struct nat_fib_s
+{
+ u32 fib_index;
+ u32 ref_count;
+} nat_fib_t;
+
+typedef struct vrf_route_s
+{
+ u32 vrf_id;
+ u32 fib_index;
+} vrf_route_t;
+
+typedef struct vrf_table_s
+{
+ u32 table_vrf_id;
+ u32 table_fib_index;
+ vrf_route_t *routes;
+} vrf_table_t;
typedef struct snat_main_s
{
@@ -521,12 +510,6 @@ typedef struct snat_main_s
/* Per thread data */
snat_main_per_thread_data_t *per_thread_data;
- /* Find a static mapping by local */
- clib_bihash_8_8_t static_mapping_by_local;
-
- /* Find a static mapping by external */
- clib_bihash_8_8_t static_mapping_by_external;
-
/* Static mapping pool */
snat_static_mapping_t *static_mappings;
@@ -537,39 +520,40 @@ typedef struct snat_main_s
/* Endpoint dependent lookup table */
clib_bihash_16_8_t flow_hash;
+ // vector of fibs
+ nat_fib_t *fibs;
+
+ u32 inside_vrf_id;
+ u32 inside_fib_index;
+
+ u32 outside_vrf_id;
+ u32 outside_fib_index;
+
+ // vector of outside fibs
+ nat_fib_t *outside_fibs;
+
+ // VRF routing table for dynamic sessions
+ vrf_table_t *vrf_tables;
+
/* Interface pool */
snat_interface_t *interfaces;
snat_interface_t *output_feature_interfaces;
+ // broken api backward compatibility
+ snat_interface_t *output_feature_dummy_interfaces;
/* Vector of outside addresses */
snat_address_t *addresses;
- /* Address and port allocation function */
- nat_alloc_out_addr_and_port_function_t *alloc_addr_and_port;
- /* Address and port allocation type */
- nat_addr_and_port_alloc_alg_t addr_and_port_alloc_alg;
- /* Port set parameters (MAP-E) */
- u8 psid_offset;
- u8 psid_length;
- u16 psid;
- /* Port range parameters */
- u16 start_port;
- u16 end_port;
-
- /* vector of fibs */
- nat_fib_t *fibs;
-
- /* vector of outside fibs */
- nat_outside_fib_t *outside_fibs;
-
/* Vector of twice NAT addresses for external hosts */
snat_address_t *twice_nat_addresses;
- /* sw_if_indices whose intfc addresses should be auto-added */
- u32 *auto_add_sw_if_indices;
- u32 *auto_add_sw_if_indices_twice_nat;
+ /* first interface address should be auto-added */
+ snat_address_resolve_t *addr_to_resolve;
+
+ /* vector of fib entries */
+ snat_fib_entry_reg_t *fib_entry_reg;
/* vector of interface address static mappings to resolve. */
- snat_static_map_resolve_t *to_resolve;
+ snat_static_mapping_resolve_t *sm_to_resolve;
/* Randomize port allocation order */
u32 random_seed;
@@ -579,20 +563,11 @@ typedef struct snat_main_s
u32 fq_in2out_output_index;
u32 fq_out2in_index;
- u32 out2in_node_index;
- u32 in2out_node_index;
- u32 in2out_output_node_index;
-
nat44_config_t rconfig;
- //nat44_config_t cconfig;
/* If forwarding is enabled */
u8 forwarding_enabled;
- /* static mapping config */
- u8 static_mapping_only;
- u8 static_mapping_connection_tracking;
-
/* Is translation memory size calculated or user defined */
u8 translation_memory_size_set;
@@ -600,11 +575,6 @@ typedef struct snat_main_s
u32 max_translations_per_thread;
u32 *max_translations_per_fib;
- u32 outside_vrf_id;
- u32 outside_fib_index;
- u32 inside_vrf_id;
- u32 inside_fib_index;
-
nat_timeouts_t timeouts;
/* TCP MSS clamping */
@@ -657,24 +627,27 @@ typedef struct snat_main_s
u8 log_level;
/* convenience */
- api_main_t *api_main;
ip4_main_t *ip4_main;
- ip_lookup_main_t *ip4_lookup_main;
fib_source_t fib_src_hi;
fib_source_t fib_src_low;
- /* pat - dynamic mapping enabled or conneciton tracking */
- u8 pat;
-
/* number of worker handoff frame queue elements */
u32 frame_queue_nelts;
/* nat44 plugin enabled */
u8 enabled;
- vnet_main_t *vnet_main;
-
+ /* TCP session state machine table:
+ * first dimension is possible states
+ * second dimension is direction (in2out/out2in)
+ * third dimension is TCP flag (SYN, RST, FIN)
+ *
+ * value is next state to change to
+ */
+ nat44_ed_tcp_state_e tcp_state_change_table[NAT44_ED_TCP_N_STATE]
+ [NAT44_ED_N_DIR]
+ [NAT44_ED_TCP_N_FLAG];
} snat_main_t;
typedef struct
@@ -689,106 +662,109 @@ typedef struct
uword *cached_presence_by_ip4_address;
} snat_runtime_t;
+/*
+ * Why is this here? Because we don't need to touch this layer to
+ * simply reply to an icmp. We need to change id to a unique
+ * value to NAT an echo request/reply.
+ */
+
extern snat_main_t snat_main;
-// nat pre ed next_node feature classification
extern vlib_node_registration_t nat_default_node;
extern vlib_node_registration_t nat_pre_in2out_node;
extern vlib_node_registration_t nat_pre_out2in_node;
-extern vlib_node_registration_t snat_in2out_node;
-extern vlib_node_registration_t snat_in2out_output_node;
-extern vlib_node_registration_t snat_out2in_node;
-extern vlib_node_registration_t snat_in2out_worker_handoff_node;
-extern vlib_node_registration_t snat_in2out_output_worker_handoff_node;
-extern vlib_node_registration_t snat_out2in_worker_handoff_node;
extern vlib_node_registration_t nat44_ed_in2out_node;
extern vlib_node_registration_t nat44_ed_in2out_output_node;
extern vlib_node_registration_t nat44_ed_out2in_node;
-extern fib_source_t nat_fib_src_hi;
-extern fib_source_t nat_fib_src_low;
-
-/* format functions */
-format_function_t format_snat_static_mapping;
-format_function_t format_snat_static_map_to_resolve;
-format_function_t format_snat_session;
-format_function_t format_snat_key;
-format_function_t format_static_mapping_key;
-format_function_t format_nat_protocol;
-format_function_t format_nat_addr_and_port_alloc_alg;
-/* unformat functions */
-unformat_function_t unformat_nat_protocol;
+extern vlib_node_registration_t snat_in2out_worker_handoff_node;
+extern vlib_node_registration_t snat_in2out_output_worker_handoff_node;
+extern vlib_node_registration_t snat_out2in_worker_handoff_node;
/** \brief Check if SNAT session is created from static mapping.
@param s SNAT session
- @return 1 if SNAT session is created from static mapping otherwise 0
-*/
-#define snat_is_session_static(s) (s->flags & SNAT_SESSION_FLAG_STATIC_MAPPING)
-
-/** \brief Check if SNAT session for unknown protocol.
- @param s SNAT session
- @return 1 if SNAT session for unknown protocol otherwise 0
+ @return true if SNAT session is created from static mapping otherwise 0
*/
-#define snat_is_unk_proto_session(s) (s->flags & SNAT_SESSION_FLAG_UNKNOWN_PROTO)
+always_inline bool
+nat44_ed_is_session_static (snat_session_t *s)
+{
+ return s->flags & SNAT_SESSION_FLAG_STATIC_MAPPING;
+}
/** \brief Check if NAT session is twice NAT.
@param s NAT session
- @return 1 if NAT session is twice NAT
+ @return true if NAT session is twice NAT
*/
-#define is_twice_nat_session(s) (s->flags & SNAT_SESSION_FLAG_TWICE_NAT)
+always_inline bool
+nat44_ed_is_twice_nat_session (snat_session_t *s)
+{
+ return s->flags & SNAT_SESSION_FLAG_TWICE_NAT;
+}
/** \brief Check if NAT session is load-balancing.
@param s NAT session
- @return 1 if NAT session is load-balancing
+ @return true if NAT session is load-balancing
*/
-#define is_lb_session(s) (s->flags & SNAT_SESSION_FLAG_LOAD_BALANCING)
+always_inline bool
+nat44_ed_is_lb_session (snat_session_t *s)
+{
+ return s->flags & SNAT_SESSION_FLAG_LOAD_BALANCING;
+}
/** \brief Check if NAT session is forwarding bypass.
@param s NAT session
- @return 1 if NAT session is load-balancing
-*/
-#define is_fwd_bypass_session(s) (s->flags & SNAT_SESSION_FLAG_FWD_BYPASS)
-
-/** \brief Check if NAT session is endpoint dependent.
- @param s NAT session
- @return 1 if NAT session is endpoint dependent
+ @return true if NAT session is load-balancing
*/
-#define is_ed_session(s) (s->flags & SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT)
+always_inline bool
+na44_ed_is_fwd_bypass_session (snat_session_t *s)
+{
+ return s->flags & SNAT_SESSION_FLAG_FWD_BYPASS;
+}
/** \brief Check if NAT session has affinity record.
@param s NAT session
- @return 1 if NAT session has affinity record
+ @return true if NAT session has affinity record
*/
-#define is_affinity_sessions(s) (s->flags & SNAT_SESSION_FLAG_AFFINITY)
+always_inline bool
+nat44_ed_is_affinity_session (snat_session_t *s)
+{
+ return s->flags & SNAT_SESSION_FLAG_AFFINITY;
+}
/** \brief Check if exact pool address should be used.
@param s SNAT session
- @return 1 if exact pool address or 0
+ @return true if exact pool address
*/
-#define is_exact_address_session(s) (s->flags & SNAT_SESSION_FLAG_EXACT_ADDRESS)
+always_inline bool
+nat44_ed_is_exact_address_session (snat_session_t *s)
+{
+ return s->flags & SNAT_SESSION_FLAG_EXACT_ADDRESS;
+}
/** \brief Check if NAT interface is inside.
@param i NAT interface
- @return 1 if inside interface
+ @return true if inside interface
*/
-#define nat_interface_is_inside(i) i->flags & NAT_INTERFACE_FLAG_IS_INSIDE
+always_inline bool
+nat44_ed_is_interface_inside (snat_interface_t *i)
+{
+ return i->flags & NAT_INTERFACE_FLAG_IS_INSIDE;
+}
/** \brief Check if NAT interface is outside.
@param i NAT interface
- @return 1 if outside interface
-*/
-#define nat_interface_is_outside(i) i->flags & NAT_INTERFACE_FLAG_IS_OUTSIDE
-
-/** \brief Check if NAT44 endpoint-dependent TCP session is closed.
- @param s NAT session
- @return 1 if session is closed
+ @return true if outside interface
*/
-#define nat44_is_ses_closed(s) s->state == 0xf
+always_inline bool
+nat44_ed_is_interface_outside (snat_interface_t *i)
+{
+ return i->flags & NAT_INTERFACE_FLAG_IS_OUTSIDE;
+}
/** \brief Check if client initiating TCP connection (received SYN from client)
@param t TCP header
- @return 1 if client initiating TCP connection
+ @return true if client initiating TCP connection
*/
always_inline bool
tcp_flags_is_init (u8 f)
@@ -844,111 +820,59 @@ is_sm_switch_address (u32 f)
return (f & NAT_SM_FLAG_SWITCH_ADDRESS);
}
-/* logging */
#define nat_log_err(...) \
vlib_log(VLIB_LOG_LEVEL_ERR, snat_main.log_class, __VA_ARGS__)
#define nat_log_warn(...) \
vlib_log(VLIB_LOG_LEVEL_WARNING, snat_main.log_class, __VA_ARGS__)
-#define nat_log_notice(...) \
- vlib_log(VLIB_LOG_LEVEL_NOTICE, snat_main.log_class, __VA_ARGS__)
#define nat_log_info(...) \
vlib_log(VLIB_LOG_LEVEL_INFO, snat_main.log_class, __VA_ARGS__)
#define nat_log_debug(...)\
vlib_log(VLIB_LOG_LEVEL_DEBUG, snat_main.log_class, __VA_ARGS__)
+clib_error_t *nat44_api_hookup (vlib_main_t *vm);
+
+int snat_set_workers (uword *bitmap);
+
+int nat44_plugin_enable (nat44_config_t c);
+int nat44_plugin_disable ();
+
+int nat44_ed_add_interface (u32 sw_if_index, u8 is_inside);
+int nat44_ed_del_interface (u32 sw_if_index, u8 is_inside);
+int nat44_ed_add_output_interface (u32 sw_if_index);
+int nat44_ed_del_output_interface (u32 sw_if_index);
+
+int nat44_ed_add_address (ip4_address_t *addr, u32 vrf_id, u8 twice_nat);
+int nat44_ed_del_address (ip4_address_t addr, u8 twice_nat);
+int nat44_ed_add_interface_address (u32 sw_if_index, u8 twice_nat);
+int nat44_ed_del_interface_address (u32 sw_if_index, u8 twice_nat);
+
+int nat44_ed_add_del_vrf_table (u32 table_vrf_id, bool is_add);
+int nat44_ed_add_del_vrf_route (u32 table_vrf_id, u32 vrf_id, bool is_add);
+void nat44_ed_del_vrf_tables ();
+
int nat44_ed_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
- u16 l_port, u16 e_port, nat_protocol_t proto,
+ u16 l_port, u16 e_port, ip_protocol_t proto,
u32 vrf_id, u32 sw_if_index, u32 flags,
ip4_address_t pool_addr, u8 *tag);
int nat44_ed_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
- u16 l_port, u16 e_port, nat_protocol_t proto,
+ u16 l_port, u16 e_port, ip_protocol_t proto,
u32 vrf_id, u32 sw_if_index, u32 flags);
int nat44_ed_add_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
- nat_protocol_t proto,
+ ip_protocol_t proto,
nat44_lb_addr_port_t *locals, u32 flags,
u8 *tag, u32 affinity);
int nat44_ed_del_lb_static_mapping (ip4_address_t e_addr, u16 e_port,
- nat_protocol_t proto, u32 flags);
+ ip_protocol_t proto, u32 flags);
int nat44_ed_add_del_lb_static_mapping_local (ip4_address_t e_addr, u16 e_port,
ip4_address_t l_addr, u16 l_port,
- nat_protocol_t proto, u32 vrf_id,
+ ip_protocol_t proto, u32 vrf_id,
u8 probability, u8 is_add);
/**
- * @brief Enable NAT44 plugin
- *
- * @param c nat44_config_t
- *
- * @return 0 on success, non-zero value otherwise
- */
-int nat44_plugin_enable (nat44_config_t c);
-
-/**
- * @brief Disable NAT44 plugin
- *
- * @return 0 on success, non-zero value otherwise
- */
-int nat44_plugin_disable ();
-
-/**
- * @brief Add external address to NAT44 pool
- *
- * @param sm snat global configuration data
- * @param addr IPv4 address
- * @param vrf_id VRF id of tenant, ~0 means independent of VRF
- * @param twice_nat 1 if twice NAT address
- *
- * @return 0 on success, non-zero value otherwise
- */
-int snat_add_address (snat_main_t * sm, ip4_address_t * addr, u32 vrf_id,
- u8 twice_nat);
-
-/**
- * @brief Delete external address from NAT44 pool
- *
- * @param sm snat global configuration data
- * @param addr IPv4 address
- * @param delete_sm 1 if delete static mapping using address
- * @param twice_nat 1 if twice NAT address
- *
- * @return 0 on success, non-zero value otherwise
- */
-int snat_del_address (snat_main_t * sm, ip4_address_t addr, u8 delete_sm,
- u8 twice_nat);
-
-clib_error_t *nat44_api_hookup (vlib_main_t * vm);
-
-/**
- * @brief Set NAT plugin workers
- *
- * @param bitmap NAT workers bitmap
- *
- * @return 0 on success, non-zero value otherwise
- */
-int snat_set_workers (uword * bitmap);
-
-int nat44_ed_add_interface (u32 sw_if_index, u8 is_inside);
-int nat44_ed_del_interface (u32 sw_if_index, u8 is_inside);
-int nat44_ed_add_output_interface (u32 sw_if_index);
-int nat44_ed_del_output_interface (u32 sw_if_index);
-
-/**
- * @brief Add/delete NAT44 pool address from specific interface
- *
- * @param sw_if_index software index of the interface
- * @param is_del 1 = delete, 0 = add
- * @param twice_nat 1 = twice NAT address for external hosts
- *
- * @return 0 on success, non-zero value otherwise
- */
-int snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del,
- u8 twice_nat);
-
-/**
* @brief Delete NAT44 endpoint-dependent session
*
* @param sm snat global configuration data
@@ -960,20 +884,12 @@ int snat_add_interface_address (snat_main_t * sm, u32 sw_if_index, int is_del,
*
* @return 0 on success, non-zero value otherwise
*/
-int nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port,
- ip4_address_t * eh_addr, u16 eh_port, u8 proto,
+int nat44_ed_del_session (snat_main_t *sm, ip4_address_t *addr, u16 port,
+ ip4_address_t *eh_addr, u16 eh_port, u8 proto,
u32 vrf_id, int is_in);
-/**
- * @brief Free NAT44 session data (lookup keys, external address port)
- *
- * @param sm snat global configuration data
- * @param s NAT session
- * @param thread_index thread index
- * @param is_ha is HA event
- */
-void nat_free_session_data (snat_main_t * sm, snat_session_t * s,
- u32 thread_index, u8 is_ha);
+void nat44_ed_free_session_data (snat_main_t *sm, snat_session_t *s,
+ u32 thread_index, u8 is_ha);
/**
* @brief Set NAT44 session limit (session limit, vrf id)
@@ -993,89 +909,20 @@ int nat44_set_session_limit (u32 session_limit, u32 vrf_id);
*/
int nat44_update_session_limit (u32 session_limit, u32 vrf_id);
-/**
- * @brief Free outside address and port pair
- *
- * @param addresses vector of outside addresses
- * @param thread_index thread index
- * @param key address, port and protocol
- */
-void
-snat_free_outside_address_and_port (snat_address_t * addresses,
- u32 thread_index,
- ip4_address_t * addr,
- u16 port, nat_protocol_t protocol);
-
void expire_per_vrf_sessions (u32 fib_index);
-/**
- * @brief Match NAT44 static mapping.
- *
- * @param key address and port to match
- * @param addr external/local address of the matched mapping
- * @param port port of the matched mapping
- * @param fib_index fib index of the matched mapping
- * @param by_external if 0 match by local address otherwise match by external
- * address
- * @param is_addr_only 1 if matched mapping is address only
- * @param twice_nat matched mapping is twice NAT type
- * @param lb 1 if matched mapping is load-balanced
- * @param ext_host_addr external host address
- * @param is_identity_nat 1 if indentity mapping
- * @param out if !=0 set to pointer of the mapping structure
- *
- * @returns 0 if match found otherwise 1.
- */
-int snat_static_mapping_match (
- vlib_main_t *vm, snat_main_t *sm, ip4_address_t match_addr, u16 match_port,
- u32 match_fib_index, nat_protocol_t match_protocol,
- ip4_address_t *mapping_addr, u16 *mapping_port, u32 *mapping_fib_index,
- u8 by_external, u8 *is_addr_only, twice_nat_type_t *twice_nat,
- lb_nat_type_t *lb, ip4_address_t *ext_host_addr, u8 *is_identity_nat,
- snat_static_mapping_t **out);
-
-/**
- * @brief Add/del NAT address to FIB.
- *
- * Add the external NAT address to the FIB as receive entries. This ensures
- * that VPP will reply to ARP for this address and we don't need to enable
- * proxy ARP on the outside interface.
- *
- * @param addr IPv4 address
- * @param plen address prefix length
- * @param sw_if_index software index of the outside interface
- * @param is_add 0 = delete, 1 = add.
- */
-void snat_add_del_addr_to_fib (ip4_address_t * addr,
- u8 p_len, u32 sw_if_index, int is_add);
-
-int nat_set_outside_address_and_port (snat_address_t *addresses,
- u32 thread_index, ip4_address_t addr,
- u16 port, nat_protocol_t protocol);
-
-/*
- * Why is this here? Because we don't need to touch this layer to
- * simply reply to an icmp. We need to change id to a unique
- * value to NAT an echo request/reply.
- */
-
-typedef struct
-{
- u16 identifier;
- u16 sequence;
-} icmp_echo_header_t;
-
-typedef struct
-{
- u16 src_port, dst_port;
-} tcp_udp_header_t;
+int snat_static_mapping_match (vlib_main_t *vm, ip4_address_t match_addr,
+ u16 match_port, u32 match_fib_index,
+ ip_protocol_t match_protocol,
+ ip4_address_t *mapping_addr, u16 *mapping_port,
+ u32 *mapping_fib_index, int by_external,
+ u8 *is_addr_only, twice_nat_type_t *twice_nat,
+ lb_nat_type_t *lb, ip4_address_t *ext_host_addr,
+ u8 *is_identity_nat,
+ snat_static_mapping_t **out);
u32 get_thread_idx_by_port (u16 e_port);
-u8 *format_static_mapping_kvp (u8 *s, va_list *args);
-
-u8 *format_session_kvp (u8 *s, va_list *args);
-
u32 nat_calc_bihash_buckets (u32 n_elts);
void nat44_addresses_free (snat_address_t **addresses);
@@ -1084,6 +931,28 @@ void nat44_ed_sessions_clear ();
int nat44_ed_set_frame_queue_nelts (u32 frame_queue_nelts);
+void nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f);
+
+snat_static_mapping_t *nat44_ed_sm_i2o_lookup (snat_main_t *sm,
+ ip4_address_t addr, u16 port,
+ u32 fib_index, u8 proto);
+
+snat_static_mapping_t *nat44_ed_sm_o2i_lookup (snat_main_t *sm,
+ ip4_address_t addr, u16 port,
+ u32 fib_index, u8 proto);
+
+void nat_syslog_nat44_sadd (u32 ssubix, u32 sfibix, ip4_address_t *isaddr,
+ u16 isport, ip4_address_t *idaddr, u16 idport,
+ ip4_address_t *xsaddr, u16 xsport,
+ ip4_address_t *xdaddr, u16 xdport, u8 proto,
+ u8 is_twicenat);
+
+void nat_syslog_nat44_sdel (u32 ssubix, u32 sfibix, ip4_address_t *isaddr,
+ u16 isport, ip4_address_t *idaddr, u16 idport,
+ ip4_address_t *xsaddr, u16 xsport,
+ ip4_address_t *xdaddr, u16 xdport, u8 proto,
+ u8 is_twicenat);
+
typedef enum
{
NAT_ED_TRNSL_ERR_SUCCESS = 0,
@@ -1096,17 +965,11 @@ typedef enum
nat_translation_error_e nat_6t_flow_buf_translate_i2o (
vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
- nat_6t_flow_t *f, nat_protocol_t proto, int is_output_feature);
+ nat_6t_flow_t *f, ip_protocol_t proto, int is_output_feature);
nat_translation_error_e nat_6t_flow_buf_translate_o2i (
vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
- nat_6t_flow_t *f, nat_protocol_t proto, int is_output_feature);
-
-void nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f);
-
-format_function_t format_nat_ed_translation_error;
-format_function_t format_nat_6t_flow;
-format_function_t format_ed_session_kvp;
+ nat_6t_flow_t *f, ip_protocol_t proto, int is_output_feature);
#endif /* __included_nat44_ed_h__ */
/*
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_affinity.c b/src/plugins/nat/nat44-ed/nat44_ed_affinity.c
index 89f11c64ef3..178671c6b7e 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_affinity.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_affinity.c
@@ -37,10 +37,9 @@ format_affinity_kvp (u8 * s, va_list * args)
k.as_u64[1] = v->key[1];
s = format (s, "client %U backend %U:%d proto %U index %llu",
- format_ip4_address, &k.client_addr,
- format_ip4_address, &k.service_addr,
- clib_net_to_host_u16 (k.service_port),
- format_nat_protocol, k.proto);
+ format_ip4_address, &k.client_addr, format_ip4_address,
+ &k.service_addr, clib_net_to_host_u16 (k.service_port),
+ format_ip_protocol, k.proto);
return s;
}
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_api.c b/src/plugins/nat/nat44-ed/nat44_ed_api.c
index 74d48b2d821..1f01410afce 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_api.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_api.c
@@ -31,6 +31,8 @@
#include <nat/nat44-ed/nat44_ed.api_enum.h>
#include <nat/nat44-ed/nat44_ed.api_types.h>
+#include <nat/nat44-ed/nat44_ed_inlines.h>
+
#define REPLY_MSG_ID_BASE sm->msg_id_base
#include <vlibapi/api_helper_macros.h>
@@ -47,15 +49,19 @@ vl_api_nat44_ed_plugin_enable_disable_t_handler (
if (mp->enable)
{
- c.static_mapping_only = mp->flags & NAT44_API_IS_STATIC_MAPPING_ONLY;
- c.connection_tracking = mp->flags & NAT44_API_IS_CONNECTION_TRACKING;
-
- c.inside_vrf = ntohl (mp->inside_vrf);
- c.outside_vrf = ntohl (mp->outside_vrf);
-
- c.sessions = ntohl (mp->sessions);
+ if ((mp->flags & NAT44_API_IS_STATIC_MAPPING_ONLY) ||
+ (mp->flags & NAT44_API_IS_CONNECTION_TRACKING))
+ {
+ rv = VNET_API_ERROR_UNSUPPORTED;
+ }
+ else
+ {
+ c.sessions = ntohl (mp->sessions);
+ c.inside_vrf = ntohl (mp->inside_vrf);
+ c.outside_vrf = ntohl (mp->outside_vrf);
- rv = nat44_plugin_enable (c);
+ rv = nat44_plugin_enable (c);
+ }
}
else
{
@@ -171,21 +177,6 @@ vl_api_nat44_set_session_limit_t_handler (vl_api_nat44_set_session_limit_t *
}
static void
-vl_api_nat_set_log_level_t_handler (vl_api_nat_set_log_level_t * mp)
-{
- snat_main_t *sm = &snat_main;
- vl_api_nat_set_log_level_reply_t *rmp;
- int rv = 0;
-
- if (sm->log_level > NAT_LOG_DEBUG)
- rv = VNET_API_ERROR_UNSUPPORTED;
- else
- sm->log_level = mp->log_level;
-
- REPLY_MACRO (VL_API_NAT_SET_WORKERS_REPLY);
-}
-
-static void
vl_api_nat_ipfix_enable_disable_t_handler (vl_api_nat_ipfix_enable_disable_t *
mp)
{
@@ -217,22 +208,6 @@ vl_api_nat_set_timeouts_t_handler (vl_api_nat_set_timeouts_t * mp)
}
static void
-vl_api_nat_get_timeouts_t_handler (vl_api_nat_get_timeouts_t * mp)
-{
- snat_main_t *sm = &snat_main;
- vl_api_nat_get_timeouts_reply_t *rmp;
- int rv = 0;
-
- REPLY_MACRO2 (VL_API_NAT_GET_TIMEOUTS_REPLY,
- ({
- rmp->udp = htonl (sm->timeouts.udp);
- rmp->tcp_established = htonl (sm->timeouts.tcp.established);
- rmp->tcp_transitory = htonl (sm->timeouts.tcp.transitory);
- rmp->icmp = htonl (sm->timeouts.icmp);
- }))
-}
-
-static void
vl_api_nat_set_mss_clamping_t_handler (vl_api_nat_set_mss_clamping_t * mp)
{
snat_main_t *sm = &snat_main;
@@ -275,12 +250,6 @@ static void
int rv = 0;
u32 *tmp;
- if (sm->static_mapping_only)
- {
- rv = VNET_API_ERROR_FEATURE_DISABLED;
- goto send_reply;
- }
-
is_add = mp->is_add;
twice_nat = mp->flags & NAT_API_IS_TWICE_NAT;
@@ -303,9 +272,13 @@ static void
for (i = 0; i < count; i++)
{
if (is_add)
- rv = snat_add_address (sm, &this_addr, vrf_id, twice_nat);
+ {
+ rv = nat44_ed_add_address (&this_addr, vrf_id, twice_nat);
+ }
else
- rv = snat_del_address (sm, this_addr, 0, twice_nat);
+ {
+ rv = nat44_ed_del_address (this_addr, twice_nat);
+ }
if (rv)
goto send_reply;
@@ -400,9 +373,9 @@ send_nat44_interface_details (snat_interface_t * i,
rmp->_vl_msg_id = ntohs (VL_API_NAT44_INTERFACE_DETAILS + sm->msg_id_base);
rmp->sw_if_index = ntohl (i->sw_if_index);
- if (nat_interface_is_inside (i))
+ if (nat44_ed_is_interface_inside (i))
rmp->flags |= NAT_API_IS_INSIDE;
- if (nat_interface_is_outside (i))
+ if (nat44_ed_is_interface_outside (i))
rmp->flags |= NAT_API_IS_OUTSIDE;
rmp->context = context;
@@ -422,74 +395,76 @@ vl_api_nat44_interface_dump_t_handler (vl_api_nat44_interface_dump_t * mp)
return;
pool_foreach (i, sm->interfaces)
- {
- send_nat44_interface_details(i, reg, mp->context);
- }
+ {
+ send_nat44_interface_details (i, reg, mp->context);
+ }
}
static void
- vl_api_nat44_interface_add_del_output_feature_t_handler
- (vl_api_nat44_interface_add_del_output_feature_t * mp)
+vl_api_nat44_ed_add_del_output_interface_t_handler (
+ vl_api_nat44_ed_add_del_output_interface_t *mp)
{
- vl_api_nat44_interface_add_del_output_feature_reply_t *rmp;
+ vl_api_nat44_ed_add_del_output_interface_reply_t *rmp;
snat_main_t *sm = &snat_main;
- u32 sw_if_index;
int rv = 0;
- VALIDATE_SW_IF_INDEX (mp);
-
- sw_if_index = ntohl (mp->sw_if_index);
+ VALIDATE_SW_IF_INDEX_END (mp);
if (mp->is_add)
{
- rv = nat44_ed_add_output_interface (sw_if_index);
+ rv = nat44_ed_add_output_interface (mp->sw_if_index);
}
else
{
- rv = nat44_ed_del_output_interface (sw_if_index);
+ rv = nat44_ed_del_output_interface (mp->sw_if_index);
}
- BAD_SW_IF_INDEX_LABEL;
- REPLY_MACRO (VL_API_NAT44_INTERFACE_ADD_DEL_OUTPUT_FEATURE_REPLY);
+bad_sw_if_index:
+ REPLY_MACRO_END (VL_API_NAT44_ED_ADD_DEL_OUTPUT_INTERFACE_REPLY);
}
+#define vl_endianfun
+#include <nat/nat44-ed/nat44_ed.api.h>
+#undef vl_endianfun
static void
-send_nat44_interface_output_feature_details (snat_interface_t * i,
- vl_api_registration_t * reg,
- u32 context)
+send_nat44_ed_output_interface_details (u32 index, vl_api_registration_t *rp,
+ u32 context)
{
- vl_api_nat44_interface_output_feature_details_t *rmp;
snat_main_t *sm = &snat_main;
-
- rmp = vl_msg_api_alloc (sizeof (*rmp));
- clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id =
- ntohs (VL_API_NAT44_INTERFACE_OUTPUT_FEATURE_DETAILS + sm->msg_id_base);
- rmp->sw_if_index = ntohl (i->sw_if_index);
- rmp->context = context;
-
- if (nat_interface_is_inside (i))
- rmp->flags |= NAT_API_IS_INSIDE;
-
- vl_api_send_msg (reg, (u8 *) rmp);
+ vl_api_nat44_ed_output_interface_details_t *rmp;
+ snat_interface_t *i =
+ pool_elt_at_index (sm->output_feature_interfaces, index);
+
+ /* Make sure every field is initiated (or don't skip the clib_memset()) */
+ REPLY_MACRO_DETAILS4 (
+ VL_API_NAT44_ED_OUTPUT_INTERFACE_DETAILS, rp, context, ({
+ rmp->sw_if_index = i->sw_if_index;
+
+ /* Endian hack until apigen registers _details
+ * endian functions */
+ vl_api_nat44_ed_output_interface_details_t_endian (rmp);
+ rmp->_vl_msg_id = htons (rmp->_vl_msg_id);
+ rmp->context = htonl (rmp->context);
+ }));
}
static void
- vl_api_nat44_interface_output_feature_dump_t_handler
- (vl_api_nat44_interface_output_feature_dump_t * mp)
+vl_api_nat44_ed_output_interface_get_t_handler (
+ vl_api_nat44_ed_output_interface_get_t *mp)
{
- vl_api_registration_t *reg;
+ vl_api_nat44_ed_output_interface_get_reply_t *rmp;
snat_main_t *sm = &snat_main;
- snat_interface_t *i;
+ i32 rv = 0;
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
+ if (pool_elts (sm->output_feature_interfaces) == 0)
+ {
+ REPLY_MACRO (VL_API_NAT44_ED_OUTPUT_INTERFACE_GET_REPLY);
+ return;
+ }
- pool_foreach (i, sm->output_feature_interfaces)
- {
- send_nat44_interface_output_feature_details (i, reg, mp->context);
- }
+ REPLY_AND_DETAILS_MACRO (
+ VL_API_NAT44_ED_OUTPUT_INTERFACE_GET_REPLY, sm->output_feature_interfaces,
+ ({ send_nat44_ed_output_interface_details (cursor, rp, mp->context); }));
}
static void
@@ -504,7 +479,7 @@ static void
ip4_address_t l_addr, e_addr, pool_addr = { 0 };
u32 sw_if_index, flags = 0, vrf_id;
u16 l_port = 0, e_port = 0;
- nat_protocol_t proto = 0;
+ ip_protocol_t proto = 0;
u8 *tag = 0;
memcpy (&l_addr.as_u8, mp->local_ip_address, 4);
@@ -517,7 +492,7 @@ static void
{
l_port = mp->local_port;
e_port = mp->external_port;
- proto = ip_proto_to_nat_proto (mp->protocol);
+ proto = mp->protocol;
}
if (mp->flags & NAT_API_IS_TWICE_NAT)
@@ -578,7 +553,7 @@ static void
ip4_address_t l_addr, e_addr, pool_addr;
u32 sw_if_index, flags = 0, vrf_id;
u16 l_port = 0, e_port = 0;
- nat_protocol_t proto;
+ ip_protocol_t proto;
u8 *tag = 0;
memcpy (&l_addr.as_u8, mp->local_ip_address, 4);
@@ -615,7 +590,7 @@ static void
}
sw_if_index = clib_net_to_host_u32 (mp->external_sw_if_index);
- if (sw_if_index)
+ if (sw_if_index != ~0)
{
flags |= NAT_SM_FLAG_SWITCH_ADDRESS;
}
@@ -624,7 +599,7 @@ static void
memcpy (&e_addr.as_u8, mp->external_ip_address, 4);
}
- proto = ip_proto_to_nat_proto (mp->protocol);
+ proto = mp->protocol;
vrf_id = clib_net_to_host_u32 (mp->vrf_id);
if (mp->is_add)
@@ -688,7 +663,7 @@ send_nat44_static_mapping_details (snat_static_mapping_t * m,
}
else
{
- rmp->protocol = nat_proto_to_ip_proto (m->proto);
+ rmp->protocol = m->proto;
rmp->external_port = m->external_port;
rmp->local_port = m->local_port;
}
@@ -700,9 +675,8 @@ send_nat44_static_mapping_details (snat_static_mapping_t * m,
}
static void
-send_nat44_static_map_resolve_details (snat_static_map_resolve_t * m,
- vl_api_registration_t * reg,
- u32 context)
+send_nat44_static_map_resolve_details (snat_static_mapping_resolve_t *m,
+ vl_api_registration_t *reg, u32 context)
{
vl_api_nat44_static_mapping_details_t *rmp;
snat_main_t *sm = &snat_main;
@@ -716,19 +690,22 @@ send_nat44_static_map_resolve_details (snat_static_map_resolve_t * m,
rmp->vrf_id = htonl (m->vrf_id);
rmp->context = context;
- if (m->twice_nat)
- rmp->flags |= NAT_API_IS_TWICE_NAT;
+ if (is_sm_twice_nat (m->flags))
+ {
+ rmp->flags |= NAT_API_IS_TWICE_NAT;
+ }
- if (m->addr_only)
+ if (is_sm_addr_only (m->flags))
{
rmp->flags |= NAT_API_IS_ADDR_ONLY;
}
else
{
- rmp->protocol = nat_proto_to_ip_proto (m->proto);
+ rmp->protocol = m->proto;
rmp->external_port = m->e_port;
rmp->local_port = m->l_port;
}
+
if (m->tag)
strncpy ((char *) rmp->tag, (char *) m->tag, vec_len (m->tag));
@@ -742,7 +719,7 @@ vl_api_nat44_static_mapping_dump_t_handler (vl_api_nat44_static_mapping_dump_t
vl_api_registration_t *reg;
snat_main_t *sm = &snat_main;
snat_static_mapping_t *m;
- snat_static_map_resolve_t *rp;
+ snat_static_mapping_resolve_t *rp;
int j;
reg = vl_api_client_index_to_registration (mp->client_index);
@@ -755,10 +732,10 @@ vl_api_nat44_static_mapping_dump_t_handler (vl_api_nat44_static_mapping_dump_t
send_nat44_static_mapping_details (m, reg, mp->context);
}
- for (j = 0; j < vec_len (sm->to_resolve); j++)
+ for (j = 0; j < vec_len (sm->sm_to_resolve); j++)
{
- rp = sm->to_resolve + j;
- if (!rp->identity_nat)
+ rp = sm->sm_to_resolve + j;
+ if (!is_sm_identity_nat (rp->flags))
send_nat44_static_map_resolve_details (rp, reg, mp->context);
}
}
@@ -774,7 +751,7 @@ static void
ip4_address_t addr, pool_addr = { 0 };
u32 sw_if_index, flags, vrf_id;
- nat_protocol_t proto = 0;
+ ip_protocol_t proto = 0;
u16 port = 0;
u8 *tag = 0;
@@ -787,7 +764,7 @@ static void
else
{
port = mp->port;
- proto = ip_proto_to_nat_proto (mp->protocol);
+ proto = mp->protocol;
}
sw_if_index = clib_net_to_host_u32 (mp->sw_if_index);
@@ -840,7 +817,7 @@ send_nat44_identity_mapping_details (snat_static_mapping_t * m, int index,
rmp->port = m->local_port;
rmp->sw_if_index = ~0;
rmp->vrf_id = htonl (local->vrf_id);
- rmp->protocol = nat_proto_to_ip_proto (m->proto);
+ rmp->protocol = m->proto;
rmp->context = context;
if (m->tag)
strncpy ((char *) rmp->tag, (char *) m->tag, vec_len (m->tag));
@@ -849,8 +826,8 @@ send_nat44_identity_mapping_details (snat_static_mapping_t * m, int index,
}
static void
-send_nat44_identity_map_resolve_details (snat_static_map_resolve_t * m,
- vl_api_registration_t * reg,
+send_nat44_identity_map_resolve_details (snat_static_mapping_resolve_t *m,
+ vl_api_registration_t *reg,
u32 context)
{
vl_api_nat44_identity_mapping_details_t *rmp;
@@ -861,13 +838,13 @@ send_nat44_identity_map_resolve_details (snat_static_map_resolve_t * m,
rmp->_vl_msg_id =
ntohs (VL_API_NAT44_IDENTITY_MAPPING_DETAILS + sm->msg_id_base);
- if (m->addr_only)
+ if (is_sm_addr_only (m->flags))
rmp->flags = (vl_api_nat_config_flags_t) NAT_API_IS_ADDR_ONLY;
rmp->port = m->l_port;
rmp->sw_if_index = htonl (m->sw_if_index);
rmp->vrf_id = htonl (m->vrf_id);
- rmp->protocol = nat_proto_to_ip_proto (m->proto);
+ rmp->protocol = m->proto;
rmp->context = context;
if (m->tag)
strncpy ((char *) rmp->tag, (char *) m->tag, vec_len (m->tag));
@@ -882,7 +859,7 @@ static void
vl_api_registration_t *reg;
snat_main_t *sm = &snat_main;
snat_static_mapping_t *m;
- snat_static_map_resolve_t *rp;
+ snat_static_mapping_resolve_t *rp;
int j;
reg = vl_api_client_index_to_registration (mp->client_index);
@@ -890,20 +867,20 @@ static void
return;
pool_foreach (m, sm->static_mappings)
- {
- if (is_sm_identity_nat (m->flags) && !is_sm_lb (m->flags))
- {
- pool_foreach_index (j, m->locals)
- {
- send_nat44_identity_mapping_details (m, j, reg, mp->context);
- }
- }
- }
+ {
+ if (is_sm_identity_nat (m->flags) && !is_sm_lb (m->flags))
+ {
+ pool_foreach_index (j, m->locals)
+ {
+ send_nat44_identity_mapping_details (m, j, reg, mp->context);
+ }
+ }
+ }
- for (j = 0; j < vec_len (sm->to_resolve); j++)
+ for (j = 0; j < vec_len (sm->sm_to_resolve); j++)
{
- rp = sm->to_resolve + j;
- if (rp->identity_nat)
+ rp = sm->sm_to_resolve + j;
+ if (is_sm_identity_nat (rp->flags))
send_nat44_identity_map_resolve_details (rp, reg, mp->context);
}
}
@@ -915,25 +892,24 @@ static void
snat_main_t *sm = &snat_main;
vl_api_nat44_add_del_interface_addr_reply_t *rmp;
u32 sw_if_index = ntohl (mp->sw_if_index);
+ u8 twice_nat;
int rv = 0;
- u8 is_del;
-
- if (sm->static_mapping_only)
- {
- rv = VNET_API_ERROR_FEATURE_DISABLED;
- goto send_reply;
- }
-
- is_del = !mp->is_add;
VALIDATE_SW_IF_INDEX (mp);
- rv = snat_add_interface_address (sm, sw_if_index, is_del,
- mp->flags & NAT_API_IS_TWICE_NAT);
+ twice_nat = mp->flags & NAT_API_IS_TWICE_NAT;
+
+ if (mp->is_add)
+ {
+ rv = nat44_ed_add_interface_address (sw_if_index, twice_nat);
+ }
+ else
+ {
+ rv = nat44_ed_del_interface_address (sw_if_index, twice_nat);
+ }
BAD_SW_IF_INDEX_LABEL;
-send_reply:
REPLY_MACRO (VL_API_NAT44_ADD_DEL_INTERFACE_ADDR_REPLY);
}
@@ -962,21 +938,18 @@ static void
vl_api_nat44_interface_addr_dump_t_handler (vl_api_nat44_interface_addr_dump_t
* mp)
{
- vl_api_registration_t *reg;
snat_main_t *sm = &snat_main;
- u32 *i;
+ vl_api_registration_t *reg;
+ snat_address_resolve_t *ap;
reg = vl_api_client_index_to_registration (mp->client_index);
if (!reg)
return;
- vec_foreach (i, sm->auto_add_sw_if_indices)
- {
- send_nat44_interface_addr_details (*i, reg, mp->context, 0);
- }
- vec_foreach (i, sm->auto_add_sw_if_indices_twice_nat)
+ vec_foreach (ap, sm->addr_to_resolve)
{
- send_nat44_interface_addr_details (*i, reg, mp->context, 1);
+ send_nat44_interface_addr_details (ap->sw_if_index, reg, mp->context,
+ ap->is_twice_nat);
}
}
@@ -1010,7 +983,7 @@ vl_api_nat44_add_del_lb_static_mapping_t_handler (
vl_api_nat44_add_del_lb_static_mapping_reply_t *rmp;
nat44_lb_addr_port_t *locals = 0;
ip4_address_t e_addr;
- nat_protocol_t proto;
+ ip_protocol_t proto;
u32 flags = 0;
u8 *tag = 0;
int rv = 0;
@@ -1018,7 +991,7 @@ vl_api_nat44_add_del_lb_static_mapping_t_handler (
locals = unformat_nat44_lb_addr_port (mp->locals,
clib_net_to_host_u32 (mp->local_num));
clib_memcpy (&e_addr, mp->external_addr, 4);
- proto = ip_proto_to_nat_proto (mp->protocol);
+ proto = mp->protocol;
if (mp->flags & NAT_API_IS_TWICE_NAT)
{
@@ -1063,11 +1036,11 @@ vl_api_nat44_lb_static_mapping_add_del_local_t_handler (
vl_api_nat44_lb_static_mapping_add_del_local_reply_t *rmp;
int rv = 0;
ip4_address_t e_addr, l_addr;
- nat_protocol_t proto;
+ ip_protocol_t proto;
clib_memcpy (&e_addr, mp->external_addr, 4);
clib_memcpy (&l_addr, mp->local.addr, 4);
- proto = ip_proto_to_nat_proto (mp->protocol);
+ proto = mp->protocol;
rv = nat44_ed_add_del_lb_static_mapping_local (
e_addr, mp->external_port, l_addr, mp->local.port, proto,
@@ -1089,13 +1062,14 @@ send_nat44_lb_static_mapping_details (snat_static_mapping_t *m,
rmp = vl_msg_api_alloc (
sizeof (*rmp) + (pool_elts (m->locals) * sizeof (nat44_lb_addr_port_t)));
+
clib_memset (rmp, 0, sizeof (*rmp));
rmp->_vl_msg_id =
ntohs (VL_API_NAT44_LB_STATIC_MAPPING_DETAILS + sm->msg_id_base);
clib_memcpy (rmp->external_addr, &(m->external_addr), 4);
rmp->external_port = m->external_port;
- rmp->protocol = nat_proto_to_ip_proto (m->proto);
+ rmp->protocol = m->proto;
rmp->context = context;
if (is_sm_self_twice_nat (m->flags))
@@ -1169,7 +1143,7 @@ vl_api_nat44_del_session_t_handler (vl_api_nat44_del_session_t *mp)
is_in = mp->flags & NAT_API_IS_INSIDE;
- rv = nat44_del_ed_session (sm, &addr, port, &eh_addr, eh_port, mp->protocol,
+ rv = nat44_ed_del_session (sm, &addr, port, &eh_addr, eh_port, mp->protocol,
vrf_id, is_in);
REPLY_MACRO (VL_API_NAT44_DEL_SESSION_REPLY);
@@ -1187,253 +1161,160 @@ vl_api_nat44_forwarding_enable_disable_t_handler (
}
static void
-vl_api_nat44_forwarding_is_enabled_t_handler (
- vl_api_nat44_forwarding_is_enabled_t *mp)
+vl_api_nat44_show_running_config_t_handler (
+ vl_api_nat44_show_running_config_t *mp)
{
- vl_api_registration_t *reg;
+ vl_api_nat44_show_running_config_reply_t *rmp;
snat_main_t *sm = &snat_main;
- vl_api_nat44_forwarding_is_enabled_reply_t *rmp;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
+ nat44_config_t *rc = &sm->rconfig;
+ int rv = 0;
- rmp = vl_msg_api_alloc (sizeof (*rmp));
- clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id =
- ntohs (VL_API_NAT44_FORWARDING_IS_ENABLED_REPLY + sm->msg_id_base);
- rmp->context = mp->context;
+ REPLY_MACRO2_ZERO (
+ VL_API_NAT44_SHOW_RUNNING_CONFIG_REPLY, ({
+ rmp->inside_vrf = htonl (rc->inside_vrf);
+ rmp->outside_vrf = htonl (rc->outside_vrf);
- rmp->enabled = sm->forwarding_enabled;
+ rmp->sessions = htonl (rc->sessions);
+ rmp->translation_buckets = htonl (sm->translation_buckets);
- vl_api_send_msg (reg, (u8 *) rmp);
-}
+ // OBSOLETE
+ rmp->users = 0;
+ rmp->user_buckets = 0;
+ rmp->user_sessions = 0;
-/* Obsolete calls hold back because of deprecation
- * should not be used */
+ rmp->timeouts.udp = htonl (sm->timeouts.udp);
+ rmp->timeouts.tcp_established = htonl (sm->timeouts.tcp.established);
+ rmp->timeouts.tcp_transitory = htonl (sm->timeouts.tcp.transitory);
+ rmp->timeouts.icmp = htonl (sm->timeouts.icmp);
-static void
-vl_api_nat_set_addr_and_port_alloc_alg_t_handler (
- vl_api_nat_set_addr_and_port_alloc_alg_t *mp)
-{
- snat_main_t *sm = &snat_main;
- vl_api_nat_set_addr_and_port_alloc_alg_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT_SET_ADDR_AND_PORT_ALLOC_ALG_REPLY);
+ rmp->forwarding_enabled = sm->forwarding_enabled == 1;
+ // consider how to split functionality between subplugins
+ rmp->ipfix_logging_enabled = nat_ipfix_logging_enabled ();
+ rmp->flags |= NAT44_IS_ENDPOINT_DEPENDENT;
+ }));
}
static void
-vl_api_nat_get_addr_and_port_alloc_alg_t_handler (
- vl_api_nat_get_addr_and_port_alloc_alg_t *mp)
+vl_api_nat44_ed_add_del_vrf_table_t_handler (
+ vl_api_nat44_ed_add_del_vrf_table_t *mp)
{
snat_main_t *sm = &snat_main;
- vl_api_nat_get_addr_and_port_alloc_alg_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT_GET_ADDR_AND_PORT_ALLOC_ALG_REPLY);
+ vl_api_nat44_ed_add_del_vrf_table_reply_t *rmp;
+ int rv = nat44_ed_add_del_vrf_table (clib_net_to_host_u32 (mp->table_vrf_id),
+ mp->is_add);
+ REPLY_MACRO (VL_API_NAT44_ED_ADD_DEL_VRF_TABLE_REPLY);
}
static void
-vl_api_nat_ha_set_listener_t_handler (vl_api_nat_ha_set_listener_t *mp)
+vl_api_nat44_ed_add_del_vrf_route_t_handler (
+ vl_api_nat44_ed_add_del_vrf_route_t *mp)
{
snat_main_t *sm = &snat_main;
- vl_api_nat_ha_set_listener_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT_HA_SET_LISTENER_REPLY);
+ vl_api_nat44_ed_add_del_vrf_route_reply_t *rmp;
+ int rv =
+ nat44_ed_add_del_vrf_route (clib_net_to_host_u32 (mp->table_vrf_id),
+ clib_net_to_host_u32 (mp->vrf_id), mp->is_add);
+ REPLY_MACRO (VL_API_NAT44_ED_ADD_DEL_VRF_ROUTE_REPLY);
}
static void
-vl_api_nat_ha_get_listener_t_handler (vl_api_nat_ha_get_listener_t *mp)
+nat44_ed_vrf_tables_send_details (vl_api_registration_t *rp, u32 context,
+ vrf_table_t *t)
{
snat_main_t *sm = &snat_main;
- vl_api_nat_ha_get_listener_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT_HA_GET_LISTENER_REPLY);
-}
+ vl_api_nat44_ed_vrf_tables_details_t *mp;
-static void
-vl_api_nat_ha_set_failover_t_handler (vl_api_nat_ha_set_failover_t *mp)
-{
- snat_main_t *sm = &snat_main;
- vl_api_nat_ha_set_failover_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT_HA_SET_FAILOVER_REPLY);
-}
+ u32 *vrf_ids = 0;
+ vrf_route_t *r;
-static void
-vl_api_nat_ha_get_failover_t_handler (vl_api_nat_ha_get_failover_t *mp)
-{
- snat_main_t *sm = &snat_main;
- vl_api_nat_ha_get_failover_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT_HA_GET_FAILOVER_REPLY);
-}
+ mp = vl_msg_api_alloc_zero (sizeof (*mp) +
+ sizeof (mp->vrf_ids[0]) * vec_len (t->routes));
+ mp->_vl_msg_id =
+ ntohs (VL_API_NAT44_ED_VRF_TABLES_DETAILS + sm->msg_id_base);
+ mp->context = context;
+ mp->n_vrf_ids = clib_host_to_net_u32 (vec_len (t->routes));
-static void
-vl_api_nat_ha_flush_t_handler (vl_api_nat_ha_flush_t *mp)
-{
- snat_main_t *sm = &snat_main;
- vl_api_nat_ha_flush_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT_HA_FLUSH_REPLY);
-}
+ pool_foreach (r, t->routes)
+ {
+ vec_add1 (vrf_ids, r->vrf_id);
+ }
-static void
-vl_api_nat_ha_resync_t_handler (vl_api_nat_ha_resync_t *mp)
-{
- snat_main_t *sm = &snat_main;
- vl_api_nat_ha_resync_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT_HA_RESYNC_REPLY);
-}
+ // copy the records
+ clib_memcpy (mp->vrf_ids, vrf_ids,
+ sizeof (mp->vrf_ids[0]) * vec_len (t->routes));
-static void
-vl_api_nat44_del_user_t_handler (vl_api_nat44_del_user_t *mp)
-{
- snat_main_t *sm = &snat_main;
- vl_api_nat44_del_user_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT44_DEL_USER_REPLY);
-}
+ vec_free (vrf_ids);
-static void
-vl_api_nat44_session_cleanup_t_handler (vl_api_nat44_session_cleanup_t *mp)
-{
- snat_main_t *sm = &snat_main;
- vl_api_nat44_session_cleanup_reply_t *rmp;
- int rv = VNET_API_ERROR_UNSUPPORTED;
- REPLY_MACRO (VL_API_NAT44_SESSION_CLEANUP_REPLY);
+ // send the message
+ vl_api_send_msg (rp, (u8 *) mp);
}
static void
-vl_api_nat44_plugin_enable_disable_t_handler (
- vl_api_nat44_plugin_enable_disable_t *mp)
+nat44_ed_vrf_tables_send_details_v2 (vl_api_registration_t *rp, u32 context,
+ vrf_table_t *t)
{
snat_main_t *sm = &snat_main;
- nat44_config_t c = { 0 };
- vl_api_nat44_plugin_enable_disable_reply_t *rmp;
- int rv = 0;
+ vl_api_nat44_ed_vrf_tables_v2_details_t *mp;
- if (mp->enable)
- {
- if (mp->users || mp->user_sessions)
- {
- rv = VNET_API_ERROR_UNSUPPORTED;
- }
- else
- {
- c.static_mapping_only = mp->flags & NAT44_API_IS_STATIC_MAPPING_ONLY;
- c.connection_tracking = mp->flags & NAT44_API_IS_CONNECTION_TRACKING;
+ u32 *vrf_ids = 0;
+ vrf_route_t *r;
- c.inside_vrf = ntohl (mp->inside_vrf);
- c.outside_vrf = ntohl (mp->outside_vrf);
-
- c.sessions = ntohl (mp->sessions);
-
- rv = nat44_plugin_enable (c);
- }
- }
- else
+ mp = vl_msg_api_alloc_zero (sizeof (*mp) +
+ sizeof (mp->vrf_ids[0]) * vec_len (t->routes));
+ mp->_vl_msg_id = clib_net_to_host_u16 (VL_API_NAT44_ED_VRF_TABLES_DETAILS +
+ sm->msg_id_base);
+ mp->context = context;
+ mp->n_vrf_ids = clib_net_to_host_u32 (vec_len (t->routes));
+ mp->table_vrf_id = clib_net_to_host_u32 (t->table_vrf_id);
+ pool_foreach (r, t->routes)
{
- rv = nat44_plugin_disable ();
+ vec_add1 (vrf_ids, clib_net_to_host_u32 (r->vrf_id));
}
- REPLY_MACRO (VL_API_NAT44_PLUGIN_ENABLE_DISABLE_REPLY);
-}
+ // copy the records
+ clib_memcpy (mp->vrf_ids, vrf_ids,
+ sizeof (mp->vrf_ids[0]) * vec_len (t->routes));
-static void
-vl_api_nat_control_ping_t_handler (vl_api_nat_control_ping_t *mp)
-{
- vl_api_nat_control_ping_reply_t *rmp;
- snat_main_t *sm = &snat_main;
- int rv = 0;
+ vec_free (vrf_ids);
- REPLY_MACRO2 (VL_API_NAT_CONTROL_PING_REPLY,
- ({ rmp->vpe_pid = ntohl (getpid ()); }));
+ // send the message
+ vl_api_send_msg (rp, (u8 *) mp);
}
static void
-vl_api_nat_show_config_t_handler (vl_api_nat_show_config_t *mp)
+vl_api_nat44_ed_vrf_tables_dump_t_handler (
+ vl_api_nat44_ed_vrf_tables_dump_t *mp)
{
- vl_api_nat_show_config_reply_t *rmp;
snat_main_t *sm = &snat_main;
- int rv = 0;
+ vl_api_registration_t *rp;
+ vrf_table_t *t;
- REPLY_MACRO2_ZERO (VL_API_NAT_SHOW_CONFIG_REPLY, ({
- rmp->translation_buckets =
- htonl (sm->translation_buckets);
- rmp->user_buckets = 0;
- rmp->max_translations_per_user = 0;
- rmp->outside_vrf_id = htonl (sm->outside_vrf_id);
- rmp->inside_vrf_id = htonl (sm->inside_vrf_id);
- rmp->static_mapping_only = sm->static_mapping_only;
- rmp->static_mapping_connection_tracking =
- sm->static_mapping_connection_tracking;
- rmp->endpoint_dependent = 1;
- rmp->out2in_dpo = 0;
- }));
-}
-
-static void
-vl_api_nat_show_config_2_t_handler (vl_api_nat_show_config_2_t *mp)
-{
- vl_api_nat_show_config_2_reply_t *rmp;
- snat_main_t *sm = &snat_main;
- int rv = 0;
+ rp = vl_api_client_index_to_registration (mp->client_index);
+ if (rp == 0)
+ return;
- REPLY_MACRO2_ZERO (
- VL_API_NAT_SHOW_CONFIG_2_REPLY, ({
- rmp->translation_buckets = htonl (sm->translation_buckets);
- rmp->user_buckets = 0;
- rmp->max_translations_per_user = 0;
- rmp->outside_vrf_id = htonl (sm->outside_vrf_id);
- rmp->inside_vrf_id = htonl (sm->inside_vrf_id);
- rmp->static_mapping_only = sm->static_mapping_only;
- rmp->static_mapping_connection_tracking =
- sm->static_mapping_connection_tracking;
- rmp->endpoint_dependent = 1;
- rmp->out2in_dpo = 0;
- rmp->max_translations_per_thread =
- clib_net_to_host_u32 (sm->max_translations_per_thread);
- rmp->max_users_per_thread = 0;
- }));
+ pool_foreach (t, sm->vrf_tables)
+ {
+ nat44_ed_vrf_tables_send_details (rp, mp->context, t);
+ }
}
static void
-vl_api_nat44_show_running_config_t_handler (
- vl_api_nat44_show_running_config_t *mp)
+vl_api_nat44_ed_vrf_tables_v2_dump_t_handler (
+ vl_api_nat44_ed_vrf_tables_v2_dump_t *mp)
{
- vl_api_nat44_show_running_config_reply_t *rmp;
snat_main_t *sm = &snat_main;
- nat44_config_t *rc = &sm->rconfig;
- int rv = 0;
-
- REPLY_MACRO2_ZERO (
- VL_API_NAT44_SHOW_RUNNING_CONFIG_REPLY, ({
- rmp->inside_vrf = htonl (rc->inside_vrf);
- rmp->outside_vrf = htonl (rc->outside_vrf);
+ vl_api_registration_t *rp;
+ vrf_table_t *t;
- rmp->sessions = htonl (rc->sessions);
- rmp->translation_buckets = htonl (sm->translation_buckets);
-
- // OBSOLETE
- rmp->users = 0;
- rmp->user_buckets = 0;
- rmp->user_sessions = 0;
-
- rmp->timeouts.udp = htonl (sm->timeouts.udp);
- rmp->timeouts.tcp_established = htonl (sm->timeouts.tcp.established);
- rmp->timeouts.tcp_transitory = htonl (sm->timeouts.tcp.transitory);
- rmp->timeouts.icmp = htonl (sm->timeouts.icmp);
+ rp = vl_api_client_index_to_registration (mp->client_index);
+ if (rp == 0)
+ return;
- rmp->forwarding_enabled = sm->forwarding_enabled == 1;
- // consider how to split functionality between subplugins
- rmp->ipfix_logging_enabled = nat_ipfix_logging_enabled ();
- rmp->flags |= NAT44_IS_ENDPOINT_DEPENDENT;
- if (rc->static_mapping_only)
- rmp->flags |= NAT44_IS_STATIC_MAPPING_ONLY;
- if (rc->connection_tracking)
- rmp->flags |= NAT44_IS_CONNECTION_TRACKING;
- }));
+ pool_foreach (t, sm->vrf_tables)
+ {
+ nat44_ed_vrf_tables_send_details_v2 (rp, mp->context, t);
+ }
}
/* user (internal host) key */
@@ -1515,7 +1396,7 @@ nat_ed_user_create_helper (user_create_helper_t *uch, snat_session_t *s)
{
u = pool_elt_at_index (uch->users, value.value);
}
- if (snat_is_session_static (s))
+ if (nat44_ed_is_session_static (s))
{
++u->nstaticsessions;
}
@@ -1598,40 +1479,27 @@ send_nat44_user_session_details (snat_session_t * s,
clib_memcpy (rmp->outside_ip_address, (&s->out2in.addr), 4);
clib_memcpy (rmp->inside_ip_address, (&s->in2out.addr), 4);
- if (snat_is_session_static (s))
+ if (nat44_ed_is_session_static (s))
rmp->flags |= NAT_API_IS_STATIC;
- if (is_twice_nat_session (s))
+ if (nat44_ed_is_twice_nat_session (s))
rmp->flags |= NAT_API_IS_TWICE_NAT;
- if (is_ed_session (s) || is_fwd_bypass_session (s))
- rmp->flags |= NAT_API_IS_EXT_HOST_VALID;
+ rmp->flags |= NAT_API_IS_EXT_HOST_VALID;
rmp->last_heard = clib_host_to_net_u64 ((u64) s->last_heard);
rmp->total_bytes = clib_host_to_net_u64 (s->total_bytes);
rmp->total_pkts = ntohl (s->total_pkts);
rmp->context = context;
- if (snat_is_unk_proto_session (s))
- {
- rmp->outside_port = 0;
- rmp->inside_port = 0;
- rmp->protocol = ntohs (s->in2out.port);
- }
- else
+ rmp->outside_port = s->out2in.port;
+ rmp->inside_port = s->in2out.port;
+ rmp->protocol = clib_host_to_net_u16 (s->proto);
+ clib_memcpy (rmp->ext_host_address, &s->ext_host_addr, 4);
+ rmp->ext_host_port = s->ext_host_port;
+ if (nat44_ed_is_twice_nat_session (s))
{
- rmp->outside_port = s->out2in.port;
- rmp->inside_port = s->in2out.port;
- rmp->protocol = ntohs (nat_proto_to_ip_proto (s->nat_proto));
- }
- if (is_ed_session (s) || is_fwd_bypass_session (s))
- {
- clib_memcpy (rmp->ext_host_address, &s->ext_host_addr, 4);
- rmp->ext_host_port = s->ext_host_port;
- if (is_twice_nat_session (s))
- {
- clib_memcpy (rmp->ext_host_nat_address, &s->ext_host_nat_addr, 4);
- rmp->ext_host_nat_port = s->ext_host_nat_port;
- }
+ clib_memcpy (rmp->ext_host_nat_address, &s->ext_host_nat_addr, 4);
+ rmp->ext_host_nat_port = s->ext_host_nat_port;
}
vl_api_send_msg (reg, (u8 *) rmp);
@@ -1670,6 +1538,167 @@ vl_api_nat44_user_session_dump_t_handler (vl_api_nat44_user_session_dump_t *
}
}
+static void
+send_nat44_user_session_v2_details (snat_session_t *s,
+ vl_api_registration_t *reg, u32 context)
+{
+ vl_api_nat44_user_session_v2_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ u64 now = vlib_time_now (vnm->vlib_main);
+ u64 sess_timeout_time = 0;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_NAT44_USER_SESSION_V2_DETAILS + sm->msg_id_base);
+ clib_memcpy (rmp->outside_ip_address, (&s->out2in.addr), 4);
+ clib_memcpy (rmp->inside_ip_address, (&s->in2out.addr), 4);
+
+ if (nat44_ed_is_session_static (s))
+ rmp->flags |= NAT_API_IS_STATIC;
+
+ if (nat44_ed_is_twice_nat_session (s))
+ rmp->flags |= NAT_API_IS_TWICE_NAT;
+
+ rmp->flags |= NAT_API_IS_EXT_HOST_VALID;
+
+ rmp->last_heard = clib_host_to_net_u64 ((u64) s->last_heard);
+ rmp->total_bytes = clib_host_to_net_u64 (s->total_bytes);
+ rmp->total_pkts = ntohl (s->total_pkts);
+ rmp->context = context;
+ rmp->outside_port = s->out2in.port;
+ rmp->inside_port = s->in2out.port;
+ rmp->protocol = clib_host_to_net_u16 (s->proto);
+ clib_memcpy (rmp->ext_host_address, &s->ext_host_addr, 4);
+ rmp->ext_host_port = s->ext_host_port;
+ if (nat44_ed_is_twice_nat_session (s))
+ {
+ clib_memcpy (rmp->ext_host_nat_address, &s->ext_host_nat_addr, 4);
+ rmp->ext_host_nat_port = s->ext_host_nat_port;
+ }
+
+ sess_timeout_time = s->last_heard + nat44_session_get_timeout (sm, s);
+ rmp->is_timed_out = (now >= sess_timeout_time);
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+send_nat44_user_session_v3_details (snat_session_t *s,
+ vl_api_registration_t *reg, u32 context)
+{
+ vl_api_nat44_user_session_v3_details_t *rmp;
+ snat_main_t *sm = &snat_main;
+ u64 now = vlib_time_now (vlib_get_main ());
+ u64 sess_timeout_time = 0;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_NAT44_USER_SESSION_V3_DETAILS + sm->msg_id_base);
+ clib_memcpy (rmp->outside_ip_address, (&s->out2in.addr), 4);
+ clib_memcpy (rmp->inside_ip_address, (&s->in2out.addr), 4);
+
+ if (nat44_ed_is_session_static (s))
+ rmp->flags |= NAT_API_IS_STATIC;
+
+ if (nat44_ed_is_twice_nat_session (s))
+ rmp->flags |= NAT_API_IS_TWICE_NAT;
+
+ rmp->flags |= NAT_API_IS_EXT_HOST_VALID;
+
+ rmp->last_heard = clib_host_to_net_u64 ((u64) s->last_heard);
+ rmp->time_since_last_heard =
+ clib_host_to_net_u64 ((u64) (now - s->last_heard));
+ rmp->total_bytes = clib_host_to_net_u64 (s->total_bytes);
+ rmp->total_pkts = ntohl (s->total_pkts);
+ rmp->context = context;
+ rmp->outside_port = s->out2in.port;
+ rmp->inside_port = s->in2out.port;
+ rmp->protocol = clib_host_to_net_u16 (s->proto);
+ clib_memcpy (rmp->ext_host_address, &s->ext_host_addr, 4);
+ rmp->ext_host_port = s->ext_host_port;
+ if (nat44_ed_is_twice_nat_session (s))
+ {
+ clib_memcpy (rmp->ext_host_nat_address, &s->ext_host_nat_addr, 4);
+ rmp->ext_host_nat_port = s->ext_host_nat_port;
+ }
+
+ sess_timeout_time = s->last_heard + nat44_session_get_timeout (sm, s);
+ rmp->is_timed_out = (now >= sess_timeout_time);
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_nat44_user_session_v2_dump_t_handler (
+ vl_api_nat44_user_session_v2_dump_t *mp)
+{
+ snat_main_per_thread_data_t *tsm;
+ snat_main_t *sm = &snat_main;
+ vl_api_registration_t *reg;
+ snat_user_key_t ukey;
+ snat_session_t *s;
+ ip4_header_t ip;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ clib_memcpy (&ukey.addr, mp->ip_address, 4);
+ ip.src_address.as_u32 = ukey.addr.as_u32;
+ ukey.fib_index = fib_table_find (FIB_PROTOCOL_IP4, ntohl (mp->vrf_id));
+ if (sm->num_workers > 1)
+ tsm = vec_elt_at_index (
+ sm->per_thread_data,
+ nat44_ed_get_in2out_worker_index (0, &ip, ukey.fib_index, 0));
+ else
+ tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
+
+ pool_foreach (s, tsm->sessions)
+ {
+ if (s->in2out.addr.as_u32 == ukey.addr.as_u32)
+ {
+ send_nat44_user_session_v2_details (s, reg, mp->context);
+ }
+ }
+}
+
+static void
+vl_api_nat44_user_session_v3_dump_t_handler (
+ vl_api_nat44_user_session_v3_dump_t *mp)
+{
+ snat_main_per_thread_data_t *tsm;
+ snat_main_t *sm = &snat_main;
+ vl_api_registration_t *reg;
+ snat_user_key_t ukey;
+ snat_session_t *s;
+ ip4_header_t ip;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ clib_memcpy (&ukey.addr, mp->ip_address, 4);
+ ip.src_address.as_u32 = ukey.addr.as_u32;
+ ukey.fib_index = fib_table_find (FIB_PROTOCOL_IP4, ntohl (mp->vrf_id));
+ if (sm->num_workers > 1)
+ tsm = vec_elt_at_index (
+ sm->per_thread_data,
+ nat44_ed_get_in2out_worker_index (0, &ip, ukey.fib_index, 0));
+ else
+ tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers);
+
+ pool_foreach (s, tsm->sessions)
+ {
+ if (s->in2out.addr.as_u32 == ukey.addr.as_u32)
+ {
+ send_nat44_user_session_v3_details (s, reg, mp->context);
+ }
+ }
+}
+
/* API definitions */
#include <vnet/format_fns.h>
#include <nat/nat44-ed/nat44_ed.api.c>
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_classify.c b/src/plugins/nat/nat44-ed/nat44_ed_classify.c
index 5a9f4e42657..229cf3669e6 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_classify.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_classify.c
@@ -98,7 +98,6 @@ nat44_handoff_classify_node_fn_inline (vlib_main_t * vm,
u32 next0 = NAT_NEXT_IN2OUT_CLASSIFY;
ip4_header_t *ip0;
snat_address_t *ap;
- clib_bihash_kv_8_8_t kv0, value0;
/* speculatively enqueue b0 to the current next frame */
bi0 = from[0];
@@ -122,23 +121,19 @@ nat44_handoff_classify_node_fn_inline (vlib_main_t * vm,
if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
{
- init_nat_k (&kv0, ip0->dst_address, 0, 0, 0);
/* try to classify the fragment based on IP header alone */
- if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external,
- &kv0, &value0))
+ m = nat44_ed_sm_o2i_lookup (sm, ip0->dst_address, 0, 0, 0);
+ if (m)
{
- m = pool_elt_at_index (sm->static_mappings, value0.value);
if (m->local_addr.as_u32 != m->external_addr.as_u32)
next0 = NAT_NEXT_OUT2IN_CLASSIFY;
goto enqueue0;
}
- init_nat_k (&kv0, ip0->dst_address,
- vnet_buffer (b0)->ip.reass.l4_dst_port, 0,
- ip_proto_to_nat_proto (ip0->protocol));
- if (!clib_bihash_search_8_8
- (&sm->static_mapping_by_external, &kv0, &value0))
+ m = nat44_ed_sm_o2i_lookup (
+ sm, ip0->dst_address, vnet_buffer (b0)->ip.reass.l4_dst_port,
+ 0, ip0->protocol);
+ if (m)
{
- m = pool_elt_at_index (sm->static_mappings, value0.value);
if (m->local_addr.as_u32 != m->external_addr.as_u32)
next0 = NAT_NEXT_OUT2IN_CLASSIFY;
}
@@ -202,7 +197,6 @@ nat44_ed_classify_node_fn_inline (vlib_main_t * vm,
u32 sw_if_index0, rx_fib_index0;
ip4_header_t *ip0;
snat_address_t *ap;
- clib_bihash_kv_8_8_t kv0, value0;
clib_bihash_kv_16_8_t ed_kv0, ed_value0;
/* speculatively enqueue b0 to the current next frame */
@@ -227,11 +221,11 @@ nat44_ed_classify_node_fn_inline (vlib_main_t * vm,
rx_fib_index0 =
fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
sw_if_index0);
- init_ed_k (&ed_kv0, ip0->src_address,
+ init_ed_k (&ed_kv0, ip0->src_address.as_u32,
vnet_buffer (b0)->ip.reass.l4_src_port,
- ip0->dst_address,
- vnet_buffer (b0)->ip.reass.l4_dst_port,
- rx_fib_index0, ip0->protocol);
+ ip0->dst_address.as_u32,
+ vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
+ ip0->protocol);
/* process whole packet */
if (!clib_bihash_search_16_8 (&sm->flow_hash, &ed_kv0,
&ed_value0))
@@ -272,23 +266,19 @@ nat44_ed_classify_node_fn_inline (vlib_main_t * vm,
if (PREDICT_FALSE (pool_elts (sm->static_mappings)))
{
- init_nat_k (&kv0, ip0->dst_address, 0, 0, 0);
/* try to classify the fragment based on IP header alone */
- if (!clib_bihash_search_8_8 (&sm->static_mapping_by_external,
- &kv0, &value0))
+ m = nat44_ed_sm_o2i_lookup (sm, ip0->dst_address, 0, 0, 0);
+ if (m)
{
- m = pool_elt_at_index (sm->static_mappings, value0.value);
if (m->local_addr.as_u32 != m->external_addr.as_u32)
next0 = NAT_NEXT_OUT2IN_ED_FAST_PATH;
goto enqueue0;
}
- init_nat_k (&kv0, ip0->dst_address,
- vnet_buffer (b0)->ip.reass.l4_dst_port, 0,
- ip_proto_to_nat_proto (ip0->protocol));
- if (!clib_bihash_search_8_8
- (&sm->static_mapping_by_external, &kv0, &value0))
+ m = nat44_ed_sm_o2i_lookup (
+ sm, ip0->dst_address, vnet_buffer (b0)->ip.reass.l4_dst_port,
+ 0, ip0->protocol);
+ if (m)
{
- m = pool_elt_at_index (sm->static_mappings, value0.value);
if (m->local_addr.as_u32 != m->external_addr.as_u32)
next0 = NAT_NEXT_OUT2IN_ED_FAST_PATH;
}
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_cli.c b/src/plugins/nat/nat44-ed/nat44_ed_cli.c
index acf9069af2b..14313d05a35 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_cli.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_cli.c
@@ -38,23 +38,15 @@ nat44_ed_enable_disable_command_fn (vlib_main_t *vm, unformat_input_t *input,
clib_error_t *error = 0;
nat44_config_t c = { 0 };
- u8 enable_set = 0, enable = 0, mode_set = 0;
+ u8 enable_set = 0, enable = 0;
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (!mode_set && unformat (line_input, "static-mapping-only"))
- {
- mode_set = 1;
- c.static_mapping_only = 1;
- if (unformat (line_input, "connection-tracking"))
- {
- c.connection_tracking = 1;
- }
- }
- else if (unformat (line_input, "inside-vrf %u", &c.inside_vrf));
+ if (unformat (line_input, "inside-vrf %u", &c.inside_vrf))
+ ;
else if (unformat (line_input, "outside-vrf %u", &c.outside_vrf));
else if (unformat (line_input, "sessions %u", &c.sessions));
else if (!enable_set)
@@ -116,7 +108,6 @@ set_workers_command_fn (vlib_main_t * vm,
int rv = 0;
clib_error_t *error = 0;
- /* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
@@ -162,8 +153,8 @@ done:
}
static clib_error_t *
-nat_show_workers_commnad_fn (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
+nat_show_workers_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
snat_main_t *sm = &snat_main;
u32 *worker;
@@ -189,10 +180,9 @@ snat_set_log_level_command_fn (vlib_main_t * vm,
{
unformat_input_t _line_input, *line_input = &_line_input;
snat_main_t *sm = &snat_main;
- u8 log_level = NAT_LOG_NONE;
+ u32 log_level = NAT_LOG_NONE;
clib_error_t *error = 0;
- /* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
@@ -283,12 +273,7 @@ nat44_show_hash_command_fn (vlib_main_t * vm, unformat_input_t * input,
else if (unformat (input, "verbose"))
verbose = 2;
- vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->static_mapping_by_local,
- verbose);
- vlib_cli_output (vm, "%U",
- format_bihash_8_8, &sm->static_mapping_by_external,
- verbose);
- vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->flow_hash, verbose);
+ vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->flow_hash, verbose);
vec_foreach_index (i, sm->per_thread_data)
{
vlib_cli_output (vm, "-------- thread %d %s --------\n",
@@ -296,8 +281,7 @@ nat44_show_hash_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->flow_hash, verbose);
}
- vlib_cli_output (vm, "%U", format_bihash_16_8, &nam->affinity_hash,
- verbose);
+ vlib_cli_output (vm, "%U", format_bihash_16_8, &nam->affinity_hash, verbose);
vlib_cli_output (vm, "-------- hash table parameters --------\n");
vlib_cli_output (vm, "translation buckets: %u", sm->translation_buckets);
@@ -313,7 +297,6 @@ nat_set_mss_clamping_command_fn (vlib_main_t * vm, unformat_input_t * input,
clib_error_t *error = 0;
u32 mss;
- /* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
@@ -356,7 +339,6 @@ add_address_command_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
- snat_main_t *sm = &snat_main;
ip4_address_t start_addr, end_addr, this_addr;
u32 start_host_order, end_host_order;
u32 vrf_id = ~0;
@@ -366,7 +348,6 @@ add_address_command_fn (vlib_main_t * vm,
clib_error_t *error = 0;
u8 twice_nat = 0;
- /* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
@@ -392,12 +373,6 @@ add_address_command_fn (vlib_main_t * vm,
}
}
- if (sm->static_mapping_only)
- {
- error = clib_error_return (0, "static mapping only mode");
- goto done;
- }
-
start_host_order = clib_host_to_net_u32 (start_addr.as_u32);
end_host_order = clib_host_to_net_u32 (end_addr.as_u32);
@@ -419,9 +394,13 @@ add_address_command_fn (vlib_main_t * vm,
for (i = 0; i < count; i++)
{
if (is_add)
- rv = snat_add_address (sm, &this_addr, vrf_id, twice_nat);
+ {
+ rv = nat44_ed_add_address (&this_addr, vrf_id, twice_nat);
+ }
else
- rv = snat_del_address (sm, this_addr, 0, twice_nat);
+ {
+ rv = nat44_ed_del_address (this_addr, twice_nat);
+ }
switch (rv)
{
@@ -495,15 +474,12 @@ nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input,
u64 now = vlib_time_now (vm);
u64 sess_timeout_time = 0;
- u32 udp_sessions = 0;
- u32 tcp_sessions = 0;
- u32 icmp_sessions = 0;
-
- u32 timed_out = 0;
- u32 transitory = 0;
- u32 transitory_wait_closed = 0;
- u32 transitory_closed = 0;
- u32 established = 0;
+ struct
+ {
+ u32 total;
+ u32 timed_out;
+ } udp = { 0 }, tcp = { 0 }, tcp_established = { 0 }, tcp_transitory = { 0 },
+ icmp = { 0 }, other = { 0 };
u32 fib;
@@ -517,45 +493,48 @@ nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input,
{
pool_foreach (s, tsm->sessions)
{
- sess_timeout_time = s->last_heard +
- (f64) nat44_session_get_timeout (sm, s);
- if (now >= sess_timeout_time)
- timed_out++;
-
- switch (s->nat_proto)
- {
- case NAT_PROTOCOL_ICMP:
- icmp_sessions++;
- break;
- case NAT_PROTOCOL_TCP:
- tcp_sessions++;
- if (s->state)
- {
- if (s->tcp_closed_timestamp)
- {
- if (now >= s->tcp_closed_timestamp)
- {
- ++transitory_closed;
- }
- else
- {
- ++transitory_wait_closed;
- }
- }
- transitory++;
- }
- else
- established++;
- break;
- case NAT_PROTOCOL_UDP:
- default:
- udp_sessions++;
- break;
- }
- }
- nat44_show_lru_summary (vm, tsm, now, sess_timeout_time);
- count += pool_elts (tsm->sessions);
- }
+ sess_timeout_time =
+ s->last_heard + (f64) nat44_session_get_timeout (sm, s);
+
+ switch (s->proto)
+ {
+ case IP_PROTOCOL_ICMP:
+ ++icmp.total;
+ if (now >= sess_timeout_time)
+ ++icmp.timed_out;
+ break;
+ case IP_PROTOCOL_TCP:
+ ++tcp.total;
+ if (now >= sess_timeout_time)
+ ++tcp.timed_out;
+ if (nat44_ed_tcp_is_established (s->tcp_state))
+ {
+ ++tcp_established.total;
+ if (now >= sess_timeout_time)
+ ++tcp_established.timed_out;
+ }
+ else
+ {
+ ++tcp_transitory.total;
+ if (now >= sess_timeout_time)
+ ++tcp_transitory.timed_out;
+ }
+ break;
+ case IP_PROTOCOL_UDP:
+ ++udp.total;
+ if (now >= sess_timeout_time)
+ ++udp.timed_out;
+ break;
+ default:
+ ++other.total;
+ if (now >= sess_timeout_time)
+ ++other.timed_out;
+ break;
+ }
+ }
+ nat44_show_lru_summary (vm, tsm, now, sess_timeout_time);
+ count += pool_elts (tsm->sessions);
+ }
}
else
{
@@ -564,55 +543,66 @@ nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input,
{
sess_timeout_time = s->last_heard +
(f64) nat44_session_get_timeout (sm, s);
- if (now >= sess_timeout_time)
- timed_out++;
-
- switch (s->nat_proto)
- {
- case NAT_PROTOCOL_ICMP:
- icmp_sessions++;
- break;
- case NAT_PROTOCOL_TCP:
- tcp_sessions++;
- if (s->state)
- {
- if (s->tcp_closed_timestamp)
- {
- if (now >= s->tcp_closed_timestamp)
- {
- ++transitory_closed;
- }
- else
- {
- ++transitory_wait_closed;
- }
- }
- transitory++;
- }
- else
- established++;
- break;
- case NAT_PROTOCOL_UDP:
- default:
- udp_sessions++;
- break;
- }
+
+ switch (s->proto)
+ {
+ case IP_PROTOCOL_ICMP:
+ ++icmp.total;
+ if (now >= sess_timeout_time)
+ ++icmp.timed_out;
+ break;
+ case IP_PROTOCOL_TCP:
+ ++tcp.total;
+ if (now >= sess_timeout_time)
+ ++tcp.timed_out;
+ if (nat44_ed_tcp_is_established (s->tcp_state))
+ {
+ ++tcp_established.total;
+ if (now >= sess_timeout_time)
+ ++tcp_established.timed_out;
+ }
+ else
+ {
+ ++tcp_transitory.total;
+ if (now >= sess_timeout_time)
+ ++tcp_transitory.timed_out;
+ }
+ break;
+ case IP_PROTOCOL_UDP:
+ ++udp.total;
+ if (now >= sess_timeout_time)
+ ++udp.timed_out;
+ break;
+ default:
+ ++other.total;
+ if (now >= sess_timeout_time)
+ ++other.timed_out;
+ break;
+ }
}
nat44_show_lru_summary (vm, tsm, now, sess_timeout_time);
count = pool_elts (tsm->sessions);
}
- vlib_cli_output (vm, "total timed out sessions: %u", timed_out);
- vlib_cli_output (vm, "total sessions: %u", count);
- vlib_cli_output (vm, "total tcp sessions: %u", tcp_sessions);
- vlib_cli_output (vm, "total tcp established sessions: %u", established);
- vlib_cli_output (vm, "total tcp transitory sessions: %u", transitory);
- vlib_cli_output (vm, "total tcp transitory (WAIT-CLOSED) sessions: %u",
- transitory_wait_closed);
- vlib_cli_output (vm, "total tcp transitory (CLOSED) sessions: %u",
- transitory_closed);
- vlib_cli_output (vm, "total udp sessions: %u", udp_sessions);
- vlib_cli_output (vm, "total icmp sessions: %u", icmp_sessions);
+ u32 timed_out =
+ tcp.timed_out + icmp.timed_out + udp.timed_out + other.timed_out;
+ vlib_cli_output (vm, "total sessions: %u (timed out: %u)", count, timed_out);
+ vlib_cli_output (vm, "tcp sessions:");
+ vlib_cli_output (vm, " total: %u (timed out: %u)", tcp.total,
+ tcp.timed_out);
+ vlib_cli_output (vm, " established: %u (timed out: %u)",
+ tcp_established.total, tcp_established.timed_out);
+ vlib_cli_output (vm, " transitory: %u (timed out: %u)",
+ tcp_transitory.total, tcp_transitory.timed_out);
+ vlib_cli_output (vm, "udp sessions:");
+ vlib_cli_output (vm, " total: %u (timed out: %u)", udp.total,
+ udp.timed_out);
+ vlib_cli_output (vm, "icmp sessions:");
+ vlib_cli_output (vm, " total: %u (timed out: %u)", icmp.total,
+ icmp.timed_out);
+ vlib_cli_output (vm, "other sessions:");
+ vlib_cli_output (vm, " total: %u (timed out: %u)", other.total,
+ other.timed_out);
return 0;
}
@@ -628,14 +618,14 @@ nat44_show_addresses_command_fn (vlib_main_t * vm, unformat_input_t * input,
{
vlib_cli_output (vm, "%U", format_ip4_address, &ap->addr);
if (ap->fib_index != ~0)
- vlib_cli_output (vm, " tenant VRF: %u",
- fib_table_get(ap->fib_index, FIB_PROTOCOL_IP4)->ft_table_id);
+ vlib_cli_output (
+ vm, " tenant VRF: %u",
+ fib_table_get (ap->fib_index, FIB_PROTOCOL_IP4)->ft_table_id);
else
vlib_cli_output (vm, " tenant VRF independent");
- #define _(N, i, n, s) \
- vlib_cli_output (vm, " %d busy %s ports", ap->busy_##n##_ports, s);
- foreach_nat_protocol
- #undef _
+
+ if (ap->addr_len != ~0)
+ vlib_cli_output (vm, " synced with interface address");
}
vlib_cli_output (vm, "NAT44 twice-nat pool addresses:");
vec_foreach (ap, sm->twice_nat_addresses)
@@ -646,10 +636,9 @@ nat44_show_addresses_command_fn (vlib_main_t * vm, unformat_input_t * input,
fib_table_get(ap->fib_index, FIB_PROTOCOL_IP4)->ft_table_id);
else
vlib_cli_output (vm, " tenant VRF independent");
- #define _(N, i, n, s) \
- vlib_cli_output (vm, " %d busy %s ports", ap->busy_##n##_ports, s);
- foreach_nat_protocol
- #undef _
+
+ if (ap->addr_len != ~0)
+ vlib_cli_output (vm, " synced with interface address");
}
return 0;
}
@@ -669,7 +658,6 @@ snat_feature_command_fn (vlib_main_t * vm,
sw_if_index = ~0;
- /* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
@@ -804,21 +792,22 @@ nat44_show_interfaces_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "NAT44 interfaces:");
pool_foreach (i, sm->interfaces)
{
- vlib_cli_output (vm, " %U %s", format_vnet_sw_if_index_name, vnm,
- i->sw_if_index,
- (nat_interface_is_inside(i) &&
- nat_interface_is_outside(i)) ? "in out" :
- (nat_interface_is_inside(i) ? "in" : "out"));
+ vlib_cli_output (vm, " %U %s", format_vnet_sw_if_index_name, vnm,
+ i->sw_if_index,
+ (nat44_ed_is_interface_inside (i) &&
+ nat44_ed_is_interface_outside (i)) ?
+ "in out" :
+ (nat44_ed_is_interface_inside (i) ? "in" : "out"));
}
pool_foreach (i, sm->output_feature_interfaces)
{
- vlib_cli_output (vm, " %U output-feature %s",
- format_vnet_sw_if_index_name, vnm,
- i->sw_if_index,
- (nat_interface_is_inside(i) &&
- nat_interface_is_outside(i)) ? "in out" :
- (nat_interface_is_inside(i) ? "in" : "out"));
+ vlib_cli_output (vm, " %U output-feature %s",
+ format_vnet_sw_if_index_name, vnm, i->sw_if_index,
+ (nat44_ed_is_interface_inside (i) &&
+ nat44_ed_is_interface_outside (i)) ?
+ "in out" :
+ (nat44_ed_is_interface_inside (i) ? "in" : "out"));
}
return 0;
@@ -832,14 +821,13 @@ add_static_mapping_command_fn (vlib_main_t * vm,
unformat_input_t _line_input, *line_input = &_line_input;
vnet_main_t *vnm = vnet_get_main ();
clib_error_t *error = 0;
- int rv;
-
- nat_protocol_t proto = NAT_PROTOCOL_OTHER;
ip4_address_t l_addr, e_addr, pool_addr;
u32 l_port = 0, e_port = 0, vrf_id = ~0;
u8 l_port_set = 0, e_port_set = 0;
- u32 sw_if_index, flags = 0;
- int is_add = 1;
+ int is_add = 1, rv;
+ u32 flags = 0;
+ u32 sw_if_index = ~0;
+ ip_protocol_t proto = 0;
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
@@ -881,7 +869,7 @@ add_static_mapping_command_fn (vlib_main_t * vm,
}
else if (unformat (line_input, "vrf %u", &vrf_id))
;
- else if (unformat (line_input, "%U", unformat_nat_protocol, &proto))
+ else if (unformat (line_input, "%U", unformat_ip_protocol, &proto))
;
else if (unformat (line_input, "self-twice-nat"))
{
@@ -923,8 +911,6 @@ add_static_mapping_command_fn (vlib_main_t * vm,
e_port = clib_host_to_net_u16 (e_port);
}
- // TODO: specific pool_addr for both pool & twice nat pool ?
-
if (is_add)
{
rv =
@@ -937,25 +923,17 @@ add_static_mapping_command_fn (vlib_main_t * vm,
vrf_id, sw_if_index, flags);
}
- // TODO: fix returns
-
switch (rv)
{
- case VNET_API_ERROR_INVALID_VALUE:
- error = clib_error_return (0, "External port already in use.");
- goto done;
+ case VNET_API_ERROR_UNSUPPORTED:
+ error = clib_error_return (0, "Plugin disabled.");
+ break;
case VNET_API_ERROR_NO_SUCH_ENTRY:
- if (is_add)
- error = clib_error_return (0, "External address must be allocated.");
- else
- error = clib_error_return (0, "Mapping not exist.");
- goto done;
- case VNET_API_ERROR_NO_SUCH_FIB:
- error = clib_error_return (0, "No such VRF id.");
- goto done;
+ error = clib_error_return (0, "Mapping not exist.");
+ break;
case VNET_API_ERROR_VALUE_EXIST:
error = clib_error_return (0, "Mapping already exist.");
- goto done;
+ break;
default:
break;
}
@@ -966,7 +944,6 @@ done:
return error;
}
-// TODO: either delete this bullshit or update it
static clib_error_t *
add_identity_mapping_command_fn (vlib_main_t * vm,
unformat_input_t * input,
@@ -978,12 +955,11 @@ add_identity_mapping_command_fn (vlib_main_t * vm,
int rv, is_add = 1, port_set = 0;
u32 sw_if_index, port, flags, vrf_id = ~0;
- nat_protocol_t proto;
+ ip_protocol_t proto = 0;
ip4_address_t addr;
flags = NAT_SM_FLAG_IDENTITY_NAT;
- /* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
@@ -998,7 +974,7 @@ add_identity_mapping_command_fn (vlib_main_t * vm,
}
else if (unformat (line_input, "vrf %u", &vrf_id))
;
- else if (unformat (line_input, "%U %u", unformat_nat_protocol, &proto,
+ else if (unformat (line_input, "%U %u", unformat_ip_protocol, &proto,
&port))
{
port_set = 1;
@@ -1036,25 +1012,17 @@ add_identity_mapping_command_fn (vlib_main_t * vm,
sw_if_index, flags);
}
- // TODO: fix returns
-
switch (rv)
{
- case VNET_API_ERROR_INVALID_VALUE:
- error = clib_error_return (0, "External port already in use.");
- goto done;
+ case VNET_API_ERROR_UNSUPPORTED:
+ error = clib_error_return (0, "Plugin disabled.");
+ break;
case VNET_API_ERROR_NO_SUCH_ENTRY:
- if (is_add)
- error = clib_error_return (0, "External address must be allocated.");
- else
- error = clib_error_return (0, "Mapping not exist.");
- goto done;
- case VNET_API_ERROR_NO_SUCH_FIB:
- error = clib_error_return (0, "No such VRF id.");
- goto done;
+ error = clib_error_return (0, "Mapping not exist.");
+ break;
case VNET_API_ERROR_VALUE_EXIST:
error = clib_error_return (0, "Mapping already exist.");
- goto done;
+ break;
default:
break;
}
@@ -1075,12 +1043,11 @@ add_lb_static_mapping_command_fn (vlib_main_t * vm,
ip4_address_t l_addr, e_addr;
u32 l_port = 0, e_port = 0, vrf_id = 0, probability = 0, affinity = 0;
u8 proto_set = 0;
- nat_protocol_t proto;
+ ip_protocol_t proto;
nat44_lb_addr_port_t *locals = 0, local;
int rv, is_add = 1;
u32 flags = 0;
- /* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
@@ -1091,6 +1058,7 @@ add_lb_static_mapping_command_fn (vlib_main_t * vm,
{
clib_memset (&local, 0, sizeof (local));
local.addr = l_addr;
+ l_port = clib_host_to_net_u16 (l_port);
local.port = (u16) l_port;
local.probability = (u8) probability;
vec_add1 (locals, local);
@@ -1101,6 +1069,7 @@ add_lb_static_mapping_command_fn (vlib_main_t * vm,
{
clib_memset (&local, 0, sizeof (local));
local.addr = l_addr;
+ l_port = clib_host_to_net_u16 (l_port);
local.port = (u16) l_port;
local.probability = (u8) probability;
local.vrf_id = vrf_id;
@@ -1108,8 +1077,10 @@ add_lb_static_mapping_command_fn (vlib_main_t * vm,
}
else if (unformat (line_input, "external %U:%u", unformat_ip4_address,
&e_addr, &e_port))
- ;
- else if (unformat (line_input, "protocol %U", unformat_nat_protocol,
+ {
+ e_port = clib_host_to_net_u16 (e_port);
+ }
+ else if (unformat (line_input, "protocol %U", unformat_ip_protocol,
&proto))
{
proto_set = 1;
@@ -1197,10 +1168,9 @@ add_lb_backend_command_fn (vlib_main_t * vm,
u32 l_port = 0, e_port = 0, vrf_id = 0, probability = 0;
int is_add = 1;
int rv;
- nat_protocol_t proto;
+ ip_protocol_t proto;
u8 proto_set = 0;
- /* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
@@ -1216,7 +1186,7 @@ add_lb_backend_command_fn (vlib_main_t * vm,
else if (unformat (line_input, "external %U:%u", unformat_ip4_address,
&e_addr, &e_port))
;
- else if (unformat (line_input, "protocol %U", unformat_nat_protocol,
+ else if (unformat (line_input, "protocol %U", unformat_ip_protocol,
&proto))
proto_set = 1;
else if (unformat (line_input, "del"))
@@ -1276,14 +1246,14 @@ nat44_show_static_mappings_command_fn (vlib_main_t * vm,
{
snat_main_t *sm = &snat_main;
snat_static_mapping_t *m;
- snat_static_map_resolve_t *rp;
+ snat_static_mapping_resolve_t *rp;
vlib_cli_output (vm, "NAT44 static mappings:");
pool_foreach (m, sm->static_mappings)
{
vlib_cli_output (vm, " %U", format_snat_static_mapping, m);
}
- vec_foreach (rp, sm->to_resolve)
+ vec_foreach (rp, sm->sm_to_resolve)
vlib_cli_output (vm, " %U", format_snat_static_map_to_resolve, rp);
return 0;
@@ -1294,27 +1264,31 @@ snat_add_interface_address_command_fn (vlib_main_t * vm,
unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- snat_main_t *sm = &snat_main;
unformat_input_t _line_input, *line_input = &_line_input;
- u32 sw_if_index;
- int rv;
- int is_del = 0;
+ vnet_main_t *vnm = vnet_get_main ();
clib_error_t *error = 0;
+ int rv, is_del = 0;
u8 twice_nat = 0;
+ u32 sw_if_index;
+
+ sw_if_index = ~0;
- /* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "%U", unformat_vnet_sw_interface,
- sm->vnet_main, &sw_if_index))
+ if (unformat (line_input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
;
else if (unformat (line_input, "twice-nat"))
- twice_nat = 1;
+ {
+ twice_nat = 1;
+ }
else if (unformat (line_input, "del"))
- is_del = 1;
+ {
+ is_del = 1;
+ }
else
{
error = clib_error_return (0, "unknown input '%U'",
@@ -1323,19 +1297,86 @@ snat_add_interface_address_command_fn (vlib_main_t * vm,
}
}
- rv = snat_add_interface_address (sm, sw_if_index, is_del, twice_nat);
+ if (is_del)
+ {
+ rv = nat44_ed_del_interface_address (sw_if_index, twice_nat);
+ }
+ else
+ {
+ rv = nat44_ed_add_interface_address (sw_if_index, twice_nat);
+ }
- switch (rv)
+ if (0 != rv)
{
- case 0:
- break;
+ error =
+ clib_error_return (0, "%s %U address failed", is_del ? "del" : "add",
+ format_vnet_sw_if_index_name, vnm, sw_if_index);
+ goto done;
+ }
- default:
- error = clib_error_return (0, "snat_add_interface_address returned %d",
- rv);
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+nat44_ed_add_del_vrf_table_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ bool is_add = true, not_set = true;
+ u32 vrf_id = ~0;
+ int rv;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%u", &vrf_id))
+ ;
+ else if (not_set)
+ {
+ if (unformat (line_input, "add"))
+ {
+ is_add = true;
+ }
+ else if (unformat (line_input, "del"))
+ {
+ is_add = false;
+ }
+ not_set = false;
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (not_set)
+ {
+ error = clib_error_return (0, "missing required parameter");
+ goto done;
+ }
+
+ if (~0 == vrf_id)
+ {
+ error = clib_error_return (0, "missing vrf id");
goto done;
}
+ rv = nat44_ed_add_del_vrf_table (vrf_id, is_add);
+ if (rv)
+ {
+ error = clib_error_return (0, "%s vrf table returned %d",
+ is_add ? "add" : "del", rv);
+ }
+
done:
unformat_free (line_input);
@@ -1343,27 +1384,107 @@ done:
}
static clib_error_t *
+nat44_ed_add_del_vrf_route_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ bool is_add = true, not_set = true;
+ u32 vrf_id = ~0, table_vrf_id = ~0;
+ int rv;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return clib_error_return (0, NAT44_ED_EXPECTED_ARGUMENT);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "table %u", &table_vrf_id))
+ ;
+ else if (unformat (line_input, "%u", &vrf_id))
+ ;
+ else if (not_set)
+ {
+ if (unformat (line_input, "add"))
+ {
+ is_add = true;
+ }
+ else if (unformat (line_input, "del"))
+ {
+ is_add = false;
+ }
+ not_set = false;
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (not_set)
+ {
+ error = clib_error_return (0, "missing required parameter");
+ goto done;
+ }
+
+ if ((~0 == vrf_id) || (~0 == table_vrf_id))
+ {
+ error = clib_error_return (0, "missing vrf id");
+ goto done;
+ }
+
+ rv = nat44_ed_add_del_vrf_route (table_vrf_id, vrf_id, is_add);
+ if (rv)
+ {
+ error = clib_error_return (0, "%s vrf table returned %d",
+ is_add ? "add" : "del", rv);
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+static clib_error_t *
+nat44_ed_show_vrf_tables_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ snat_main_t *sm = &snat_main;
+ vrf_table_t *t;
+ vrf_route_t *r;
+ int i = 0;
+
+ pool_foreach (t, sm->vrf_tables)
+ {
+ vlib_cli_output (vm, "table %u:", t->table_vrf_id);
+ pool_foreach (r, t->routes)
+ {
+ vlib_cli_output (vm, "[%u] vrf-id %u", i, r->vrf_id);
+ i++;
+ }
+ }
+
+ return 0;
+}
+
+static clib_error_t *
nat44_show_interface_address_command_fn (vlib_main_t * vm,
unformat_input_t * input,
vlib_cli_command_t * cmd)
{
snat_main_t *sm = &snat_main;
vnet_main_t *vnm = vnet_get_main ();
- u32 *sw_if_index;
+ snat_address_resolve_t *ap;
vlib_cli_output (vm, "NAT44 pool address interfaces:");
- vec_foreach (sw_if_index, sm->auto_add_sw_if_indices)
- {
- vlib_cli_output (vm, " %U", format_vnet_sw_if_index_name, vnm,
- *sw_if_index);
- }
- vlib_cli_output (vm, "NAT44 twice-nat pool address interfaces:");
- vec_foreach (sw_if_index, sm->auto_add_sw_if_indices_twice_nat)
+ vec_foreach (ap, sm->addr_to_resolve)
{
- vlib_cli_output (vm, " %U", format_vnet_sw_if_index_name, vnm,
- *sw_if_index);
+ vlib_cli_output (vm, " %U%s", format_vnet_sw_if_index_name, vnm,
+ ap->sw_if_index, ap->is_twice_nat ? " twice-nat" : "");
}
-
return 0;
}
@@ -1375,22 +1496,61 @@ nat44_show_sessions_command_fn (vlib_main_t * vm, unformat_input_t * input,
clib_error_t *error = 0;
snat_main_per_thread_data_t *tsm;
snat_main_t *sm = &snat_main;
-
- int i = 0;
+ ip4_address_t i2o_sa, i2o_da, o2i_sa, o2i_da;
+ u8 filter_i2o_sa = 0, filter_i2o_da = 0;
+ u8 filter_o2i_sa = 0, filter_o2i_da = 0;
+ u16 i2o_sp, i2o_dp, o2i_sp, o2i_dp;
+ u8 filter_i2o_sp = 0, filter_i2o_dp = 0;
+ u8 filter_o2i_sp = 0, filter_o2i_dp = 0;
+ ip_protocol_t proto;
+ u8 filter_proto = 0;
+ u8 had_input = 1, filtering = 0;
+ int i = 0, showed_sessions;
if (!unformat_user (input, unformat_line_input, line_input))
- goto print;
+ {
+ had_input = 0;
+ goto print;
+ }
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- error = clib_error_return (0, "unknown input '%U'",
- format_unformat_error, line_input);
- break;
+ if (unformat (line_input, "filter i2o saddr %U", unformat_ip4_address,
+ &i2o_sa))
+ filter_i2o_sa = filtering = 1;
+ else if (unformat (line_input, "filter i2o daddr %U",
+ unformat_ip4_address, &i2o_da))
+ filter_i2o_da = filtering = 1;
+ else if (unformat (line_input, "filter o2i saddr %U",
+ unformat_ip4_address, &o2i_sa))
+ filter_o2i_sa = filtering = 1;
+ else if (unformat (line_input, "filter o2i daddr %U",
+ unformat_ip4_address, &o2i_da))
+ filter_o2i_da = filtering = 1;
+ else if (unformat (line_input, "filter i2o sport %u", &i2o_sp))
+ filter_i2o_sp = filtering = 1;
+ else if (unformat (line_input, "filter i2o dport %u", &i2o_dp))
+ filter_i2o_dp = filtering = 1;
+ else if (unformat (line_input, "filter o2i sport %u", &o2i_sp))
+ filter_o2i_sp = filtering = 1;
+ else if (unformat (line_input, "filter o2i dport %u", &o2i_dp))
+ filter_o2i_dp = filtering = 1;
+ else if (unformat (line_input, "filter i2o proto %U",
+ unformat_ip_protocol, &proto))
+ filter_proto = filtering = 1;
+ else if (unformat (line_input, "filter o2i proto %U",
+ unformat_ip_protocol, &proto))
+ filter_proto = filtering = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
}
- unformat_free (line_input);
print:
- vlib_cli_output (vm, "NAT44 ED sessions:");
+ vlib_cli_output (vm, "NAT44 ED sessions:");
vec_foreach_index (i, sm->per_thread_data)
{
@@ -1400,12 +1560,53 @@ print:
i, vlib_worker_threads[i].name,
pool_elts (tsm->sessions));
- snat_session_t *s;
- pool_foreach (s, tsm->sessions)
- {
- vlib_cli_output (vm, " %U\n", format_snat_session, tsm, s);
- }
+ showed_sessions = 0;
+ snat_session_t *s;
+ pool_foreach (s, tsm->sessions)
+ {
+ if (filtering)
+ {
+ if (filter_i2o_sa && i2o_sa.as_u32 != s->i2o.match.saddr.as_u32)
+ continue;
+ if (filter_i2o_da && i2o_da.as_u32 != s->i2o.match.daddr.as_u32)
+ continue;
+ if (filter_o2i_sa && o2i_sa.as_u32 != s->o2i.match.saddr.as_u32)
+ continue;
+ if (filter_o2i_da && o2i_da.as_u32 != s->o2i.match.daddr.as_u32)
+ continue;
+ if (filter_i2o_sp &&
+ i2o_sp != clib_net_to_host_u16 (s->i2o.match.sport))
+ continue;
+ if (filter_i2o_dp &&
+ i2o_dp != clib_net_to_host_u16 (s->i2o.match.dport))
+ continue;
+ if (filter_o2i_sp &&
+ o2i_sp != clib_net_to_host_u16 (s->o2i.match.sport))
+ continue;
+ if (filter_o2i_dp &&
+ o2i_dp != clib_net_to_host_u16 (s->o2i.match.dport))
+ continue;
+ if (filter_proto && proto != s->proto)
+ continue;
+ showed_sessions++;
+ }
+ vlib_cli_output (vm, " %U\n", format_snat_session, sm, tsm, s,
+ vlib_time_now (vm));
+ }
+ if (filtering)
+ {
+ vlib_cli_output (vm,
+ "Showed: %d, Filtered: %d of total %d "
+ "sessions of thread %d\n\n",
+ showed_sessions,
+ pool_elts (tsm->sessions) - showed_sessions,
+ pool_elts (tsm->sessions), i);
+ }
}
+
+done:
+ if (had_input)
+ unformat_free (line_input);
return error;
}
@@ -1457,7 +1658,7 @@ nat44_del_session_command_fn (vlib_main_t * vm,
u32 port = 0, eh_port = 0, vrf_id = sm->outside_vrf_id;
clib_error_t *error = 0;
ip4_address_t addr, eh_addr;
- nat_protocol_t proto;
+ ip_protocol_t proto;
int is_in = 0;
int rv;
@@ -1466,9 +1667,8 @@ nat44_del_session_command_fn (vlib_main_t * vm,
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat
- (line_input, "%U:%u %U", unformat_ip4_address, &addr, &port,
- unformat_nat_protocol, &proto))
+ if (unformat (line_input, "%U:%u %U", unformat_ip4_address, &addr, &port,
+ unformat_ip_protocol, &proto))
;
else if (unformat (line_input, "in"))
{
@@ -1493,10 +1693,9 @@ nat44_del_session_command_fn (vlib_main_t * vm,
}
}
- rv =
- nat44_del_ed_session (sm, &addr, clib_host_to_net_u16 (port), &eh_addr,
- clib_host_to_net_u16 (eh_port),
- nat_proto_to_ip_proto (proto), vrf_id, is_in);
+ rv = nat44_ed_del_session (sm, &addr, clib_host_to_net_u16 (port), &eh_addr,
+ clib_host_to_net_u16 (eh_port), proto, vrf_id,
+ is_in);
switch (rv)
{
@@ -1648,21 +1847,19 @@ done:
* @cliexstart{nat44}
* Enable nat44 plugin
* To enable nat44-ed, use:
- * vpp# nat44 enable
+ * vpp# nat44 plugin enable
* To disable nat44-ed, use:
- * vpp# nat44 disable
- * To enable nat44-ed static mapping with connection tracking, use:
- * vpp# nat44-ed enable static-mapping connection-tracking
+ * vpp# nat44 plugin disable
* To set inside-vrf outside-vrf, use:
- * vpp# nat44 enable inside-vrf <id> outside-vrf <id>
+ * vpp# nat44 plugin enable inside-vrf <id> outside-vrf <id>
* @cliexend
?*/
VLIB_CLI_COMMAND (nat44_ed_enable_disable_command, static) = {
- .path = "nat44",
- .short_help = "nat44 <enable [sessions <max-number>] [static-mapping-only "
- "connection-tracking] [inside-vrf <vrf-id>] "
- "[outside-vrf <vrf-id>]>|disable",
+ .path = "nat44 plugin",
.function = nat44_ed_enable_disable_command_fn,
+ .short_help =
+ "nat44 plugin <enable [sessions <max-number>] [inside-vrf <vrf-id>] "
+ "[outside-vrf <vrf-id>]>|disable",
};
/*?
@@ -1691,7 +1888,7 @@ VLIB_CLI_COMMAND (set_workers_command, static) = {
VLIB_CLI_COMMAND (nat_show_workers_command, static) = {
.path = "show nat workers",
.short_help = "show nat workers",
- .function = nat_show_workers_commnad_fn,
+ .function = nat_show_workers_command_fn,
};
/*?
@@ -2019,9 +2216,48 @@ VLIB_CLI_COMMAND (nat44_show_static_mappings_command, static) = {
* @cliexend
?*/
VLIB_CLI_COMMAND (snat_add_interface_address_command, static) = {
- .path = "nat44 add interface address",
- .short_help = "nat44 add interface address <interface> [twice-nat] [del]",
- .function = snat_add_interface_address_command_fn,
+ .path = "nat44 add interface address",
+ .function = snat_add_interface_address_command_fn,
+ .short_help = "nat44 add interface address <interface> [twice-nat] [del]",
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{nat44 vrf table}
+ * Add empty inter VRF routing table
+ * vpp# nat44 vrf table add 10
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (nat44_ed_add_del_vrf_table_command, static) = {
+ .path = "nat44 vrf table",
+ .short_help = "nat44 vrf table [add|del] <vrf-id>",
+ .function = nat44_ed_add_del_vrf_table_command_fn,
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{nat44 vrf route}
+ * Add inter VRF route record to VRF routing table
+ * vpp# nat44 vrf route add table 10 20
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (nat44_ed_add_del_vrf_route_command, static) = {
+ .path = "nat44 vrf route",
+ .short_help = "nat44 vrf route [add|del] table <vrf-id> <vrf-id>",
+ .function = nat44_ed_add_del_vrf_route_command_fn,
+};
+
+/*?
+ * @cliexpar
+ * @cliexstart{show nat44 vrf tables}
+ * Show inter VRF route tables
+ * vpp# show nat44 vrf tables
+ * @cliexend
+?*/
+VLIB_CLI_COMMAND (nat44_ed_show_vrf_tables_command, static) = {
+ .path = "show nat44 vrf tables",
+ .short_help = "show nat44 vrf tables",
+ .function = nat44_ed_show_vrf_tables_command_fn,
};
/*?
@@ -2049,7 +2285,9 @@ VLIB_CLI_COMMAND (nat44_show_interface_address_command, static) = {
?*/
VLIB_CLI_COMMAND (nat44_show_sessions_command, static) = {
.path = "show nat44 sessions",
- .short_help = "show nat44 sessions",
+ .short_help = "show nat44 sessions [filter {i2o | o2i} {saddr <ip4-addr> "
+ "| sport <n> | daddr <ip4-addr> | dport <n> | proto <proto>} "
+ "[filter .. [..]]]",
.function = nat44_show_sessions_command_fn,
};
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_doc.rst b/src/plugins/nat/nat44-ed/nat44_ed_doc.rst
new file mode 100644
index 00000000000..a6c461d4260
--- /dev/null
+++ b/src/plugins/nat/nat44-ed/nat44_ed_doc.rst
@@ -0,0 +1,729 @@
+.. _NAT44_Endpoint_Dependent:
+
+.. toctree::
+
+NAT44-ED: NAT44 Endpoint Dependent
+==================================
+
+Introduction
+------------
+
+NAT44-ED is the IPv4 endpoint dependent network address translation
+plugin. The component implements an address and port-dependent mapping
+and address and port-dependent filtering NAT as described in
+`RFC4787 <https://tools.ietf.org/html/rfc4787>`__.
+
+The outside address and port (X1’:x1’) is reused for internal hosts
+(X:x) for different values of Y:y. A flow is matched by {source address,
+destination address, protocol, transport source port, transport
+destination port, fib index}. As long as all these are unique the
+mapping is valid. While a single outside address in theory allows for
+2^16 source ports \* 2^32 destination IP addresses \* 2^16 destination
+ports = 2^64 sessions, this number is much smaller in practice. Few
+destination ports are generally used (80, 443) and a fraction of the IP
+address space is available. The limitation is 2^16 bindings per outside
+IP address to a single destination address and port (Y:y).
+
+The implementation is split, a control-plane / slow-path and a
+data-plane / fast-path. Essentially acting as a flow router. The
+data-plane does a 6-tuple flow lookup (SA, DA, P, SP, DP, FIB) and on a
+match runs the per-flow packet handling instructions on the packet. On a
+flow lookup miss, the packet is punted to the slow-path, where depending
+on policy new sessions are created.
+
+The support set of packet handling instructions is ever-increasing.
+Currently, the implementation supports rewrite of SA, DA, SP, DP and TCP
+MSS. The fast-path also does connection tracking and expiry of older
+sessions.
+
+NAT44-ED uses 6
+tuple\ ``(src address, src port, dst address, dst port, protocol and fib)``\ for
+matching communication.
+
+Structure
+~~~~~~~~~
+
+1) Dynamic NAT
+
+- also called PAT (Port Address Translation)
+- supports port overloading
+
+2) Static NAT
+
+- types of Static NAT:
+
+ a) identity mapping
+
+ - exceptions to translations
+
+ b) static mapping
+
+ - supported features:
+
+ 1. address only mapping
+
+ - one to one translation without ports
+
+ 2. twice-nat
+
+ - double-nat, translation of source and destination
+
+ 3. self-twice-nat
+
+ - double nat, translation of source and destination, where
+ external host address is the same as local host address
+
+ 4. out2in-only mapping
+
+ - session is created only from outside interface (out2in feature)
+
+ c) load balanced static mapping
+
+ - translates one frontend (``addr``:``port``) to multiple backends
+ (``addr``:``port``)
+
+3) Interfaces
+
+a) inside interface (in2out feature) - local to external network
+ translation - feature is before ip4-lookup
+b) outside interface (out2in feature) - external to local network
+ translation - feature is before ip4-lookup
+c) inside & outside interface (classify feature) - local or external
+ network translation - correct type of translation is determined per
+ communication - feature is before ip4-lookup
+d) output interface (output feature) - used for post routing translation
+ - feature is after ip4-lookup
+
+4) Addresses
+
+a) interface address - automatically managed external address - first
+ address of VPP interface
+b) pool address - range of external addresses
+
+5) Logging and Accounting
+
+a) ipfix logging
+b) syslog
+
+6) Miscellaneous Features
+
+a) inter-vrf translation control 1. basic
+
+ - nat44 plugin enable inside-vrf / outside-vrf
+ - inside/outside interface vrf’s
+
+ 2. advanced
+
+ - vrf table routing feature
+
+b) udp/tcp/icmp timeouts - configurable timeouts for these protocols
+c) session limiting 1. basic (plugin enable [sessions ] 2. advanced
+ (per vrf table / global limiting)
+d) mss-clamping - MSS (maximum segment size) is by default determined by
+ egress interface MTU (maximum transmission unit) size - used to lower
+ MSS value in VPN tunnel scenarios where additional headers can
+ enlarge the packet beyond MTU causing drops
+e) hairpinning - hosts on the same lan segment communicating via
+ external address
+f) forwarding - if enabled translation only occurs if active session or
+ static configuration exist, rest of the traffic is passed without
+ being translated
+
+Session Table
+-------------
+
+Session table exists per thread and contains pool of sessions that can
+be either expired or not expired. NAT44-ED plugin doesn’t use scavenging
+for clearing expired sessions. Rather then using scavenging plugin uses
+LRU doubly-linked list. LRU contains ordered list of sessions indices.
+Head of the list contains last updated session. Each session holds
+record of the LRU head (tcp transitory, tcp established, udp, icmp or
+unknown lru head). Because of this plugin can reach maximum number of
+sessions without requirement to clear old sessions. During session
+creation if a maximum number of sessions was reached LRU head is
+checked. Expired head record gets deleted and a new session gets
+created. For better performance LRU head records exist. Each time a new
+packet is received session index gets moved to the tail of LRU list.
+
+Terminology
+-----------
+
+IN2OUT (inside to outside translation) OUT2IN (outside to inside
+translation)
+
+NAT (network address translation) PAT (port address translation) MSS
+(maximum segment size) MTU (maximum transmission unit) VRF (virtual
+routing and forwarding)
+
+HAIRPINNING
+
+Dynamic NAT (Minimal Required Configuration)
+--------------------------------------------
+
+::
+
+ +-------------+
+ | 10.0.0.0/24 |
+ +-------------+
+ |
+ +----------------------+
+ | GigabitEthernet0/8/0 |
+ +----------------------+
+ +----------------------+
+ | GigabitEthernet0/a/0 |
+ +----------------------+
+ |
+ +-------------+
+ | 10.0.1.0/24 |
+ +-------------+
+
+1) enable nat plugin
+
+..
+
+ nat44 plugin enable sessions 10000
+
+2) configure NAT interfaces, two options:
+
+a) add inside NAT interface on local VPP interface, add outside NAT
+ interface on external VPP interface
+
+..
+
+ set interface nat44 in GigabitEthernet0/8/0 out GigabitEthernet0/a/0
+
+b) add output NAT interface on external VPP interface
+
+..
+
+ set interface nat44 in GigabitEthernet0/a/0 output-feature
+
+3) configure NAT address
+
+a) add external address range
+
+..
+
+ nat44 add address 10.0.1.1
+
+b) add external VPP interface address
+
+..
+
+ nat44 add interface address GigabitEthernet0/a/0
+
+Static NAT
+----------
+
+Identity Mapping
+~~~~~~~~~~~~~~~~
+
+ nat44 add identity mapping ``ip4-addr``\ \|external ``interface``
+ [``protocol`` ``port``] [vrf ``table-id``] [del]
+
+Static Mapping
+~~~~~~~~~~~~~~
+
+ nat44 add static mapping tcp|udp|icmp local ``addr``
+ [``port|icmp-echo-id``] external ``addr`` [``port|icmp-echo-id``]
+ [vrf ``table-id``] [twice-nat|self-twice-nat] [out2in-only] [exact
+ ``pool-addr``] [del]
+
+Load Balanced Static Mapping
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ nat44 add load-balancing back-end protocol tcp|udp external
+ ``addr``:``port`` local ``addr``:``port`` [vrf ``table-id``]
+ probability ``n`` [del]
+
+..
+
+ nat44 add load-balancing static mapping protocol tcp|udp external
+ ``addr``:``port`` local ``addr``:``port`` [vrf ``table-id``]
+ probability ``n`` [twice-nat|self-twice-nat] [out2in-only] [affinity
+ ``timeout-seconds``] [del]
+
+Interfaces
+----------
+
+Inside Interface
+~~~~~~~~~~~~~~~~
+
+::
+
+ NAT INSIDE IF
+ +----------------------+
+ | GigabitEthernet0/8/0 |
+ +----------------------+
+
+..
+
+ set interface nat44 in GigabitEthernet0/8/0 [del]
+
+NAT inside interface is used for translating local to external
+communication. Translates Dynamic and Static NAT traffic. If no matching
+session is found a new session is created for both Dynamic NAT and
+Static NAT. Dynamic NAT sessions can get created only on inside
+interface.
+
+Outside Interface
+~~~~~~~~~~~~~~~~~
+
+::
+
+ NAT OUTSIDE IF
+ +----------------------+
+ | GigabitEthernet0/a/0 |
+ +----------------------+
+
+..
+
+ set interface nat44 out GigabitEthernet0/a/0 [del]
+
+NAT outside interface is used for translating external to local
+communication. Translates Dynamic and Static NAT traffic. New session
+gets created only if no matching session is found and matching Static
+NAT configuration exists.
+
+Inside & Outside Interface
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ NAT IN AND OUT IF
+ +----------------------+
+ | GigabitEthernet0/8/0 |
+ +----------------------+
+ NAT IN AND OUT IF
+ +----------------------+
+ | GigabitEthernet0/a/0 |
+ +----------------------+
+
+..
+
+ set interface nat44 in GigabitEthernet0/8/0 out GigabitEthernet0/8/0
+ [del]
+
+ set interface nat44 in GigabitEthernet0/a/0 out GigabitEthernet0/a/0
+ [del]
+
+If one VPP interface is configured both as inside and outside NAT
+interface then classification feature is used. By default NAT inside
+interface uses in2out feature and NAT outside uses out2in feature.
+Classification feature determines if the communication should be passed
+to in2out feature or to out2in feature. Traffic will get passed to
+out2in feature if destination address is one of NAT addresses or a
+static mapping in out2in direction flow matches this communication. By
+default all traffic is passed to in2out feature.
+
+Output Interface
+~~~~~~~~~~~~~~~~
+
+::
+
+ +-------------+ +-------------+
+ | 10.0.2.0/24 | | 10.0.3.0/24 |
+ +-------------+ +-------------+
+ | |
+ +----------------------+ +----------------------+
+ | GigabitEthernet0/7/0 | | GigabitEthernet0/8/0 |
+ +----------------------+ +----------------------+
+ NAT OUTPUT IF
+ +----------------------+
+ | GigabitEthernet0/a/0 |
+ +----------------------+
+ +----------+
+ | 10.0.1.1 |
+ +----------+
+ |
+ +-------------+
+ | 10.0.1.0/24 |
+ +-------------+
+
+..
+
+ set interface nat44 in GigabitEthernet0/a/0 output-feature [del]
+
+NAT output interface acts as both inside and outside interfaces. Inside
+rules apply for all egress communication on VPP interface and outside
+rules apply for all ingress communication. Compared to inside/outside
+NAT configuration method non of the local interfaces require to be
+configured as inside NAT interfaces. Translation only occurs after
+routing decision has been made and just before leaving NAT output
+interface. In above example all traffic destined for 10.0.1.0/24 from
+10.0.2.0/24 or 10.0.3.0/24 will get translated. NAT output interface
+acts as post-routing feature.
+
+Addresses
+---------
+
+Interface Address
+~~~~~~~~~~~~~~~~~
+
+ nat44 add interface address ``interface`` `twice-nat <#twice-nat>`__
+ [del]
+
+NAT interface address is a standard external pool address that gets auto
+added upon resolving first VPP interface address. Supports both standard
+address and twice-nat address. Twice-nat address is used in conjunction
+with static mapping twice-nat and self-twice-nat feature.
+
+Pool Address
+~~~~~~~~~~~~
+
+ nat44 add address ``ip4-range-start`` [- ``ip4-range-end``]
+ [tenant-vrf ``vrf-id``] `twice-nat <#twice-nat>`__ [del]
+
+Statically configured address or range of addresses that supports both
+standard and twice-nat address. Specifying vrf-id lets user assign
+address/addresses to specific NAT inside interfaces that belong to the
+same vrf table.
+
+Logging
+-------
+
+ nat set logging level ``level``
+
+Configuration of logging level is used only for internal VPP logging.
+
+ nat ipfix logging [domain ``domain-id``] [src-port ``port``]
+ [disable]
+
+Both syslog and ipfix support connection tracking capabilities. Session
+creation, session deletion, maximum sessions exceeded among other things
+are logged by syslog and ipfix.
+
+Miscellaneous
+-------------
+
+VRFs
+~~~~
+
+::
+
+ VRF 0 VRF 1
+ +-------------+ +-------------+
+ | 10.0.2.0/24 | | 10.0.3.0/24 |
+ +-------------+ +-------------+
+ | |
+ NAT INSIDE IF NAT INSIDE IF
+ +----------------------+ +----------------------+
+ | GigabitEthernet0/7/0 | | GigabitEthernet0/8/0 |
+ +----------------------+ +----------------------+
+ NAT OUTSIDE IF NAT OUTSIDE IF
+ +----------------------+ +----------------------+
+ | GigabitEthernet0/a/0 | | GigabitEthernet0/b/0 |
+ +----------------------+ +----------------------+
+ VRF 2 VRF 3
+ | |
+ +--------------------------+
+ |
+ +------------+------------+------------+
+ | | | |
+ +----------+ +----------+ +----------+ +----------+
+ | 10.0.0.1 | | 10.0.0.2 | | 10.0.1.1 | | 10.0.1.2 |
+ +----------+ +----------+ +----------+ +----------+
+ VRF 0 POOL VRF 1 POOL VRF 0 POOL VRF 1 POOL
+
+..
+
+ nat44 add address ``ip4-addr`` [tenant-vrf ``vrf-id``] [del]
+
+ nat44 plugin enable inside-vrf ``vrf-id`` outside-vrf ``vrf-id``
+ [disable]",
+
+Default behavior
+^^^^^^^^^^^^^^^^
+
+By design NAT supports passing communication between VRFs. Passing
+communication between multiple different VRFs is also supported (GE0/7/0
+-> GE0/b/0, GE0/8/0 -> GE0/a/0).
+
+NAT pool address tenant-vrf configuration parameter is used to constrain
+pool address to specific inside VRF. Example communication (in the above
+diagram): 1) from GE0/7/0 -> GE0/b/0 would choose 10.0.1.1 pool address
+2) from GE0/8/0 -> GE0/b/0 would choose 10.0.1.2 pool address
+
+Plugin enable parameters inside-vrf and outside-vrf are used as follows:
+
+Both ``inside-vrf`` and ``outside-vrf`` configuration parameters are
+used in conjunction with Static NAT, inside-vrf is only used for Static
+NAT.
+
+inside VRF: - used only in conjunction with static mappings - default
+inside VRF parameter is used in in2out feature to lookup static mapping
+if mapping can’t be found by inside interface VRF - used as default when
+adding static mappings as in2out vrf
+
+outside VRF: - used in conjunction with static mappings - secondary
+option for looking up static mappings in in2out feature based on outside
+VRF - used as default destination vrf in in2out feature during session
+creation if non of outside interfaces can resolve destination IP address
+
+Session creation default behavior (in2out only): - ingress interface fib
+is used as inside fib - Outside fib is chosen based on ability to
+resolve destination address in one of the outside interface networks. if
+there is no such network that is able to resolve destination a default
+outside fib (outside vrf index) is used.
+
+Default behavior enables use of multiple outside and inside fibs with
+some limitations. The limitation in the default behavior is that if each
+interface belonging to different fib contains default gateway every time
+first interface network fib gets used as outside fib index during
+session creation.
+
+VRF tables
+^^^^^^^^^^
+
+ nat44 vrf table [add|del] ``vrf-id``
+
+..
+
+ nat44 vrf route [add|del] table ``vrf-id`` ``vrf-id``
+
+VRF tables change the default behavior of working with inter-vrf
+communication. Adding empty VRF table disables passing communication
+between VRFs. Adding additional routes to the table makes destination
+VRF decision making algorithm do lookups into these tables. During
+session creation destination VRF in in2out feature is resolved by
+traversing VRF routes in the matching VRF table. If VRF route resolves
+destination IPv4 address then this VRF gets used. If non VRF route can
+resolve destination IPv4 address If VRF route can’t be found source VRF
+will be used. Priority of VRF routes is based on order of configuration.
+
+Timeouts
+~~~~~~~~
+
+ set nat timeout [udp ``sec`` \| tcp-established ``sec``
+ tcp-transitory ``sec`` \| icmp ``sec`` \| reset]
+
+Session Limiting
+~~~~~~~~~~~~~~~~
+
+ nat44 plugin enable sessions ``max-number``
+
+Maximum number of sessions value is used on per-thread (per-worker)
+basis.
+
+ set nat44 session limit ``limit`` [vrf ``table-id``]
+
+Per-vrf session limiting makes it possible to split maximum number of
+sessions between different VRFs.
+
+MSS Clamping
+~~~~~~~~~~~~
+
+ nat mss-clamping ``mss-value``\ \|disable
+
+Forwarding
+~~~~~~~~~~
+
+ nat44 forwarding enable|disable
+
+Additional Configuration Commands
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ | set nat frame-queue-nelts ``number``
+ | set nat workers ``workers-list``
+ | nat44 del session in|out ``addr``:``port`` tcp|udp|icmp [vrf
+ ``id``] [external-host ``addr``:``port``]
+
+Show commands
+^^^^^^^^^^^^^
+
+::
+
+ show nat workers
+ show nat timeouts
+ show nat44 summary
+ show nat44 sessions
+ show nat44 addresses
+ show nat mss-clamping
+ show nat44 interfaces
+ show nat44 vrf tables
+ show nat44 hash tables
+ nat44 show static mappings
+ show nat44 interface address
+
+Configuration Examples
+----------------------
+
+TWICE-NAT
+~~~~~~~~~
+
+Twice NAT lets you translate both the source and destination address in
+a single rule. Currently, twice NAT44 is supported only for local
+network service session initiated from outside network. Twice NAT static
+mappings can only get initiated (create sessions) from outside network.
+
+Topology
+^^^^^^^^
+
+::
+
+ +--------------------------+
+ | 10.0.0.2/24 (local host) |
+ +--------------------------+
+ |
+ +---------------------------------+
+ | 10.0.0.1/24 (eth0) (nat inside) |
+ | 20.0.0.1/24 (eth1) (nat outside)|
+ +---------------------------------+
+ |
+ +---------------------------+
+ | 20.0.0.2/24 (remote host) |
+ +---------------------------+
+
+In this example traffic will be initiated from remote host. Remote host
+will be accessing local host via twice-nat mapping.
+
+Translation will occur as follows:
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+outside to inside translation:
+
+
+ | src address: 20.0.0.2 -> 192.168.160.101
+ | dst address: 20.0.0.1 -> 10.0.0.2
+
+inside to outside translation:
+
+
+ | src address: 10.0.0.2 -> 20.0.0.1
+ | dst address: 192.168.160.101 -> 20.0.0.2
+
+Configuration
+^^^^^^^^^^^^^
+
+Enable nat44-ed plugin:
+
+
+::
+
+ nat44 plugin enable sessions 1000
+
+Configure inside interface:
+
+
+::
+
+ set int state eth0 up
+ set int ip address eth0 10.0.0.1/24
+ set int nat44 in eth0
+
+Configure outside interface:
+
+
+::
+
+ set int state eth1 up
+ set int ip address eth1 20.0.0.1/24
+ set int nat44 out eth1
+
+Configure nat address pools:
+
+
+::
+
+ nat44 add address 20.0.0.1
+ nat44 add address 192.168.160.101 twice-nat
+
+- alternatively we could use ``nat44 add interface address eth1``
+- both pools are required
+- pool ``20.0.0.1`` is used for out2in incoming traffic
+- special twice-nat pool ``192.168.160.101`` is used for secondary
+ translation
+
+Finally, add twice-nat mapping:
+
+
+ nat44 add static mapping tcp local 10.0.0.2 5201 external 20.0.0.1
+ 5201 twice-nat
+
+SELF TWICE-NAT
+~~~~~~~~~~~~~~
+
+Self twice NAT works similar to twice NAT with few exceptions. Self
+twice NAT is a feature that lets client and service running on the same
+host to communicate via NAT device. This means that external address is
+the same address as local address. Self twice NAT static mappings can
+only get initiated (create sessions) from outside network.
+
+.. _topology-self-twice-nat:
+
+Topology
+^^^^^^^^
+
+::
+
+ +--------------------------+
+ | 10.0.0.2/24 (local host) |
+ +--------------------------+
+ |
+ +-------------------------------------------+
+ | 10.0.0.1/24 (eth0) (nat inside & outside) |
+ +-------------------------------------------+
+
+In this example traffic will be initiated from local host. Local host
+will be accessing itself via self-twice-nat mapping.
+
+.. _translation-will-occur-as-follows-1:
+
+Translation will occur as follows:
+''''''''''''''''''''''''''''''''''
+
+.. _outside-to-inside-translation-1:
+
+outside to inside translation:
+
+
+ | src address: 10.0.0.2 -> 192.168.160.101
+ | dst address: 10.0.0.1 -> 10.0.0.2
+
+.. _inside-to-outside-translation-1:
+
+inside to outside translation:
+
+
+ | src address: 10.0.0.2 -> 10.0.0.1
+ | dst address: 192.168.160.101 -> 10.0.0.2
+
+.. _configuration-1:
+
+Configuration
+^^^^^^^^^^^^^
+
+.. _enable-nat44-ed-plugin-1:
+
+Enable nat44-ed plugin:
+
+
+::
+
+ nat44 plugin enable sessions 1000
+
+Configure NAT interface:
+
+
+::
+
+ set int state eth0 up
+ set int ip address eth0 10.0.0.1/24
+ set int nat44 in eth0
+ set int nat44 out eth0
+
+.. _configure-nat-address-pools-1:
+
+Configure nat address pools:
+
+
+::
+
+ nat44 add address 10.0.0.1
+ nat44 add address 192.168.160.101 twice-nat
+
+Finally, add self-twice-nat mapping:
+
+
+ nat44 add static mapping tcp local 10.0.0.2 5201 external 10.0.0.1
+ 5201 self-twice-nat
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_format.c b/src/plugins/nat/nat44-ed/nat44_ed_format.c
index 597bc2b4d0b..ee3e925e529 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_format.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_format.c
@@ -12,111 +12,42 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-/**
- * @file
- * @brief NAT formatting
- */
#include <nat/nat44-ed/nat44_ed.h>
#include <nat/nat44-ed/nat44_ed_inlines.h>
-uword
-unformat_nat_protocol (unformat_input_t * input, va_list * args)
-{
- u32 *r = va_arg (*args, u32 *);
-
- if (0);
-#define _(N, i, n, s) else if (unformat (input, s)) *r = NAT_PROTOCOL_##N;
- foreach_nat_protocol
-#undef _
- else
- return 0;
- return 1;
-}
-
-u8 *
-format_nat_protocol (u8 * s, va_list * args)
-{
- u32 i = va_arg (*args, u32);
- u8 *t = 0;
-
- switch (i)
- {
-#define _(N, j, n, str) case NAT_PROTOCOL_##N: t = (u8 *) str; break;
- foreach_nat_protocol
-#undef _
- default:
- s = format (s, "unknown");
- return s;
- }
- s = format (s, "%s", t);
- return s;
-}
-
-u8 *
-format_nat_addr_and_port_alloc_alg (u8 * s, va_list * args)
-{
- u32 i = va_arg (*args, u32);
- u8 *t = 0;
-
- switch (i)
- {
-#define _(v, N, s) case NAT_ADDR_AND_PORT_ALLOC_ALG_##N: t = (u8 *) s; break;
- foreach_nat_addr_and_port_alloc_alg
-#undef _
- default:
- s = format (s, "unknown");
- return s;
- }
- s = format (s, "%s", t);
- return s;
-}
-
u8 *
-format_snat_key (u8 * s, va_list * args)
+format_ed_session_kvp (u8 *s, va_list *args)
{
- u64 key = va_arg (*args, u64);
+ clib_bihash_kv_16_8_t *v = va_arg (*args, clib_bihash_kv_16_8_t *);
- ip4_address_t addr;
- u16 port;
- nat_protocol_t protocol;
+ u8 proto;
+ u16 r_port, l_port;
+ ip4_address_t l_addr, r_addr;
u32 fib_index;
- split_nat_key (key, &addr, &port, &fib_index, &protocol);
+ split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port);
+ s = format (s,
+ "local %U:%d remote %U:%d proto %U fib %d thread-index %u "
+ "session-index %u",
+ format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port),
+ format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port),
+ format_ip_protocol, proto, fib_index,
+ ed_value_get_thread_index (v), ed_value_get_session_index (v));
- s = format (s, "%U proto %U port %d fib %d",
- format_ip4_address, &addr,
- format_nat_protocol, protocol,
- clib_net_to_host_u16 (port), fib_index);
- return s;
-}
-
-u8 *
-format_snat_session_state (u8 * s, va_list * args)
-{
- u32 i = va_arg (*args, u32);
- u8 *t = 0;
-
- switch (i)
- {
-#define _(v, N, str) case SNAT_SESSION_##N: t = (u8 *) str; break;
- foreach_snat_session_state
-#undef _
- default:
- t = format (t, "unknown");
- }
- s = format (s, "%s", t);
return s;
}
u8 *
format_snat_session (u8 * s, va_list * args)
{
+ snat_main_t *sm = va_arg (*args, snat_main_t *);
snat_main_per_thread_data_t *tsm =
va_arg (*args, snat_main_per_thread_data_t *);
snat_session_t *sess = va_arg (*args, snat_session_t *);
+ f64 now = va_arg (*args, f64);
- if (snat_is_unk_proto_session (sess))
+ if (nat44_ed_is_unk_proto (sess->proto))
{
s = format (s, " i2o %U proto %u fib %u\n",
format_ip4_address, &sess->in2out.addr,
@@ -127,26 +58,23 @@ format_snat_session (u8 * s, va_list * args)
}
else
{
- s = format (s, " i2o %U proto %U port %d fib %d\n",
- format_ip4_address, &sess->in2out.addr,
- format_nat_protocol, sess->nat_proto,
+ s = format (s, " i2o %U proto %U port %d fib %d\n", format_ip4_address,
+ &sess->in2out.addr, format_ip_protocol, sess->proto,
clib_net_to_host_u16 (sess->in2out.port),
sess->in2out.fib_index);
s = format (s, " o2i %U proto %U port %d fib %d\n",
- format_ip4_address, &sess->out2in.addr, format_nat_protocol,
- sess->nat_proto, clib_net_to_host_u16 (sess->out2in.port),
+ format_ip4_address, &sess->out2in.addr, format_ip_protocol,
+ sess->proto, clib_net_to_host_u16 (sess->out2in.port),
sess->out2in.fib_index);
}
- if (is_ed_session (sess) || is_fwd_bypass_session (sess))
+ if (nat44_ed_is_twice_nat_session (sess))
{
- if (is_twice_nat_session (sess))
- {
- s = format (s, " external host o2i %U:%d i2o %U:%d\n",
- format_ip4_address, &sess->ext_host_addr,
- clib_net_to_host_u16 (sess->ext_host_port),
- format_ip4_address, &sess->ext_host_nat_addr,
- clib_net_to_host_u16 (sess->ext_host_nat_port));
- }
+ s = format (s, " external host o2i %U:%d i2o %U:%d\n",
+ format_ip4_address, &sess->ext_host_addr,
+ clib_net_to_host_u16 (sess->ext_host_port),
+ format_ip4_address, &sess->ext_host_nat_addr,
+ clib_net_to_host_u16 (sess->ext_host_nat_port));
+ }
else
{
if (sess->ext_host_addr.as_u32)
@@ -156,20 +84,21 @@ format_snat_session (u8 * s, va_list * args)
}
s = format (s, " i2o flow: %U\n", format_nat_6t_flow, &sess->i2o);
s = format (s, " o2i flow: %U\n", format_nat_6t_flow, &sess->o2i);
- }
s = format (s, " index %llu\n", sess - tsm->sessions);
s = format (s, " last heard %.2f\n", sess->last_heard);
- s = format (s, " total pkts %d, total bytes %lld\n",
- sess->total_pkts, sess->total_bytes);
- if (snat_is_session_static (sess))
+ s = format (s, " timeout in %.2f\n",
+ nat44_session_get_timeout (sm, sess) - (now - sess->last_heard));
+ s = format (s, " total pkts %d, total bytes %lld\n", sess->total_pkts,
+ sess->total_bytes);
+ if (nat44_ed_is_session_static (sess))
s = format (s, " static translation\n");
else
s = format (s, " dynamic translation\n");
- if (is_fwd_bypass_session (sess))
+ if (na44_ed_is_fwd_bypass_session (sess))
s = format (s, " forwarding-bypass\n");
- if (is_lb_session (sess))
+ if (nat44_ed_is_lb_session (sess))
s = format (s, " load-balancing\n");
- if (is_twice_nat_session (sess))
+ if (nat44_ed_is_twice_nat_session (sess))
s = format (s, " twice-nat\n");
return s;
}
@@ -186,9 +115,8 @@ format_snat_static_mapping (u8 * s, va_list * args)
s = format (s, "identity mapping %U",
format_ip4_address, &m->local_addr);
else
- s = format (s, "identity mapping %U %U:%d",
- format_nat_protocol, m->proto,
- format_ip4_address, &m->local_addr,
+ s = format (s, "identity mapping %U %U:%d", format_ip_protocol,
+ m->proto, format_ip4_address, &m->local_addr,
clib_net_to_host_u16 (m->local_port));
pool_foreach (local, m->locals)
@@ -212,8 +140,8 @@ format_snat_static_mapping (u8 * s, va_list * args)
if (is_sm_lb (m->flags))
{
s =
- format (s, "%U external %U:%d %s %s", format_nat_protocol,
- m->proto, format_ip4_address, &m->external_addr,
+ format (s, "%U external %U:%d %s %s", format_ip_protocol, m->proto,
+ format_ip4_address, &m->external_addr,
clib_net_to_host_u16 (m->external_port),
is_sm_twice_nat (m->flags) ?
"twice-nat" :
@@ -230,7 +158,7 @@ format_snat_static_mapping (u8 * s, va_list * args)
}
else
s = format (s, "%U local %U:%d external %U:%d vrf %d %s %s",
- format_nat_protocol, m->proto, format_ip4_address,
+ format_ip_protocol, m->proto, format_ip4_address,
&m->local_addr, clib_net_to_host_u16 (m->local_port),
format_ip4_address, &m->external_addr,
clib_net_to_host_u16 (m->external_port), m->vrf_id,
@@ -245,21 +173,146 @@ format_snat_static_mapping (u8 * s, va_list * args)
u8 *
format_snat_static_map_to_resolve (u8 * s, va_list * args)
{
- snat_static_map_resolve_t *m = va_arg (*args, snat_static_map_resolve_t *);
+ snat_static_mapping_resolve_t *m =
+ va_arg (*args, snat_static_mapping_resolve_t *);
vnet_main_t *vnm = vnet_get_main ();
- if (m->addr_only)
+ if (is_sm_addr_only (m->flags))
s = format (s, "local %U external %U vrf %d",
format_ip4_address, &m->l_addr,
format_vnet_sw_if_index_name, vnm, m->sw_if_index, m->vrf_id);
else
- s = format (s, "%U local %U:%d external %U:%d vrf %d",
- format_nat_protocol, m->proto,
- format_ip4_address, &m->l_addr,
- clib_net_to_host_u16 (m->l_port),
- format_vnet_sw_if_index_name, vnm, m->sw_if_index,
- clib_net_to_host_u16 (m->e_port), m->vrf_id);
+ s = format (s, "%U local %U:%d external %U:%d vrf %d", format_ip_protocol,
+ m->proto, format_ip4_address, &m->l_addr,
+ clib_net_to_host_u16 (m->l_port), format_vnet_sw_if_index_name,
+ vnm, m->sw_if_index, clib_net_to_host_u16 (m->e_port),
+ m->vrf_id);
+
+ return s;
+}
+
+u8 *
+format_nat_ed_translation_error (u8 *s, va_list *args)
+{
+ nat_translation_error_e e = va_arg (*args, nat_translation_error_e);
+
+ switch (e)
+ {
+ case NAT_ED_TRNSL_ERR_SUCCESS:
+ s = format (s, "success");
+ break;
+ case NAT_ED_TRNSL_ERR_TRANSLATION_FAILED:
+ s = format (s, "translation-failed");
+ break;
+ case NAT_ED_TRNSL_ERR_FLOW_MISMATCH:
+ s = format (s, "flow-mismatch");
+ break;
+ case NAT_ED_TRNSL_ERR_PACKET_TRUNCATED:
+ s = format (s, "packet-truncated");
+ break;
+ case NAT_ED_TRNSL_ERR_INNER_IP_CORRUPT:
+ s = format (s, "inner-ip-corrupted");
+ break;
+ case NAT_ED_TRNSL_ERR_INVALID_CSUM:
+ s = format (s, "invalid-checksum");
+ break;
+ }
+ return s;
+}
+
+u8 *
+format_nat_6t_flow (u8 *s, va_list *args)
+{
+ nat_6t_flow_t *f = va_arg (*args, nat_6t_flow_t *);
+
+ s = format (s, "match: %U ", format_nat_6t, &f->match);
+ int r = 0;
+ if (f->ops & NAT_FLOW_OP_SADDR_REWRITE)
+ {
+ s = format (s, "rewrite: saddr %U ", format_ip4_address,
+ f->rewrite.saddr.as_u8);
+ r = 1;
+ }
+ if (f->ops & NAT_FLOW_OP_SPORT_REWRITE)
+ {
+ if (!r)
+ {
+ s = format (s, "rewrite: ");
+ r = 1;
+ }
+ s = format (s, "sport %u ", clib_net_to_host_u16 (f->rewrite.sport));
+ }
+ if (f->ops & NAT_FLOW_OP_DADDR_REWRITE)
+ {
+ if (!r)
+ {
+ s = format (s, "rewrite: ");
+ r = 1;
+ }
+ s = format (s, "daddr %U ", format_ip4_address, f->rewrite.daddr.as_u8);
+ }
+ if (f->ops & NAT_FLOW_OP_DPORT_REWRITE)
+ {
+ if (!r)
+ {
+ s = format (s, "rewrite: ");
+ r = 1;
+ }
+ s = format (s, "dport %u ", clib_net_to_host_u16 (f->rewrite.dport));
+ }
+ if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE)
+ {
+ if (!r)
+ {
+ s = format (s, "rewrite: ");
+ r = 1;
+ }
+ s = format (s, "icmp-id %u ", clib_net_to_host_u16 (f->rewrite.icmp_id));
+ }
+ if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE)
+ {
+ if (!r)
+ {
+ s = format (s, "rewrite: ");
+ r = 1;
+ }
+ s = format (s, "txfib %u ", f->rewrite.fib_index);
+ }
+ return s;
+}
+
+u8 *
+format_nat_6t (u8 *s, va_list *args)
+{
+ nat_6t_t *t = va_arg (*args, nat_6t_t *);
+ s = format (s, "saddr %U sport %u daddr %U dport %u proto %U fib_idx %u",
+ format_ip4_address, t->saddr.as_u8,
+ clib_net_to_host_u16 (t->sport), format_ip4_address,
+ t->daddr.as_u8, clib_net_to_host_u16 (t->dport),
+ format_ip_protocol, t->proto, t->fib_index);
+ return s;
+}
+
+u8 *
+format_nat44_ed_tcp_state (u8 *s, va_list *args)
+{
+ nat44_ed_tcp_state_e e = va_arg (*args, nat44_ed_tcp_state_e);
+ switch (e)
+ {
+ case NAT44_ED_TCP_STATE_CLOSED:
+ s = format (s, "closed");
+ break;
+ case NAT44_ED_TCP_STATE_ESTABLISHED:
+ s = format (s, "established");
+ break;
+ case NAT44_ED_TCP_STATE_CLOSING:
+ s = format (s, "closing");
+ break;
+ case NAT44_ED_TCP_N_STATE:
+ s = format (s, "BUG! unexpected N_STATE! BUG!");
+ break;
+ }
return s;
}
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_handoff.c b/src/plugins/nat/nat44-ed/nat44_ed_handoff.c
index c5ceff4e454..5cb4effb6c4 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_handoff.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_handoff.c
@@ -19,7 +19,6 @@
#include <vlib/vlib.h>
#include <vnet/vnet.h>
-#include <vnet/handoff.h>
#include <vnet/fib/ip4_fib.h>
#include <vppinfra/error.h>
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_in2out.c b/src/plugins/nat/nat44-ed/nat44_ed_in2out.c
index 0065d7703b1..9b4dac3b356 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_in2out.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_in2out.c
@@ -25,18 +25,12 @@
#include <vnet/udp/udp_local.h>
#include <vppinfra/error.h>
-#include <nat/lib/nat_syslog.h>
#include <nat/lib/nat_inlines.h>
#include <nat/lib/ipfix_logging.h>
#include <nat/nat44-ed/nat44_ed.h>
#include <nat/nat44-ed/nat44_ed_inlines.h>
-/* number of attempts to get a port for ED overloading algorithm, if rolling
- * a dice this many times doesn't produce a free port, it's treated
- * as if there were no free ports available to conserve resources */
-#define ED_PORT_ALLOC_ATTEMPTS (10)
-
static char *nat_in2out_ed_error_strings[] = {
#define _(sym,string) string,
foreach_nat_in2out_ed_error
@@ -55,6 +49,7 @@ typedef struct
u8 is_slow_path;
u8 translation_via_i2of;
u8 lookup_skipped;
+ u8 tcp_state;
} nat_in2out_ed_trace_t;
static u8 *
@@ -84,7 +79,7 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args)
{
if (t->lookup_skipped)
{
- s = format (s, "\n lookup skipped - cached session index used");
+ s = format (s, "\n lookup skipped - cached session index used");
}
else
{
@@ -92,93 +87,33 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args)
&t->search_key);
}
}
-
- return s;
-}
-
-/**
- * @brief Check if packet should be translated
- *
- * Packets aimed at outside interface and external address with active session
- * should be translated.
- *
- * @param sm NAT main
- * @param rt NAT runtime data
- * @param sw_if_index0 index of the inside interface
- * @param ip0 IPv4 header
- * @param proto0 NAT protocol
- * @param rx_fib_index0 RX FIB index
- *
- * @returns 0 if packet should be translated otherwise 1
- */
-static inline int
-snat_not_translate_fast (snat_main_t *sm, vlib_node_runtime_t *node,
- u32 sw_if_index0, ip4_header_t *ip0, u32 proto0,
- u32 rx_fib_index0)
-{
- fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
- nat_outside_fib_t *outside_fib;
- fib_prefix_t pfx = {
- .fp_proto = FIB_PROTOCOL_IP4,
- .fp_len = 32,
- .fp_addr = {
- .ip4.as_u32 = ip0->dst_address.as_u32,
- }
- ,
- };
-
- /* Don't NAT packet aimed at the intfc address */
- if (PREDICT_FALSE (
- is_interface_addr (sm, node, sw_if_index0, ip0->dst_address.as_u32)))
- return 1;
-
- fei = fib_table_lookup (rx_fib_index0, &pfx);
- if (FIB_NODE_INDEX_INVALID != fei)
+ if (IP_PROTOCOL_TCP == t->i2of.match.proto)
{
- u32 sw_if_index = fib_entry_get_resolving_interface (fei);
- if (sw_if_index == ~0)
- {
- vec_foreach (outside_fib, sm->outside_fibs)
- {
- fei = fib_table_lookup (outside_fib->fib_index, &pfx);
- if (FIB_NODE_INDEX_INVALID != fei)
- {
- sw_if_index = fib_entry_get_resolving_interface (fei);
- if (sw_if_index != ~0)
- break;
- }
- }
- }
- if (sw_if_index == ~0)
- return 1;
-
- snat_interface_t *i;
- pool_foreach (i, sm->interfaces)
- {
- /* NAT packet aimed at outside interface */
- if ((nat_interface_is_outside (i)) &&
- (sw_if_index == i->sw_if_index))
- return 0;
- }
+ s = format (s, "\n TCP state: %U", format_nat44_ed_tcp_state,
+ t->tcp_state);
}
- return 1;
+ return s;
}
static int
nat_ed_alloc_addr_and_port_with_snat_address (
- snat_main_t *sm, u32 nat_proto, u32 thread_index, snat_address_t *a,
+ snat_main_t *sm, u8 proto, u32 thread_index, snat_address_t *a,
u16 port_per_thread, u32 snat_thread_index, snat_session_t *s,
ip4_address_t *outside_addr, u16 *outside_port)
{
- const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024;
+ const u16 port_thread_offset =
+ (port_per_thread * snat_thread_index) + ED_USER_PORT_OFFSET;
+
+ /* Backup original match in case of failure */
+ const nat_6t_t match = s->o2i.match;
s->o2i.match.daddr = a->addr;
/* first try port suggested by caller */
u16 port = clib_net_to_host_u16 (*outside_port);
u16 port_offset = port - port_thread_offset;
- if (port <= port_thread_offset ||
- port > port_thread_offset + port_per_thread)
+ if (port < port_thread_offset ||
+ port >= port_thread_offset + port_per_thread)
{
/* need to pick a different port, suggested port doesn't fit in
* this thread's port range */
@@ -188,27 +123,13 @@ nat_ed_alloc_addr_and_port_with_snat_address (
u16 attempts = ED_PORT_ALLOC_ATTEMPTS;
do
{
- if (NAT_PROTOCOL_ICMP == nat_proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
s->o2i.match.sport = clib_host_to_net_u16 (port);
}
s->o2i.match.dport = clib_host_to_net_u16 (port);
if (0 == nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
{
-#define _(N, i, n, s) \
- case NAT_PROTOCOL_##N: \
- ++a->busy_##n##_port_refcounts[port]; \
- a->busy_##n##_ports_per_thread[thread_index]++; \
- a->busy_##n##_ports++; \
- break;
- switch (nat_proto)
- {
- foreach_nat_protocol;
- default:
- nat_elog_info (sm, "unknown protocol");
- return 1;
- }
-#undef _
*outside_addr = a->addr;
*outside_port = clib_host_to_net_u16 (port);
return 0;
@@ -218,58 +139,141 @@ nat_ed_alloc_addr_and_port_with_snat_address (
--attempts;
}
while (attempts > 0);
+
+ /* Revert match */
+ s->o2i.match = match;
return 1;
}
static int
-nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto,
+nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index,
+ u32 tx_sw_if_index, u32 nat_proto,
u32 thread_index, ip4_address_t s_addr,
- u16 port_per_thread, u32 snat_thread_index,
+ ip4_address_t d_addr, u32 snat_thread_index,
snat_session_t *s, ip4_address_t *outside_addr,
u16 *outside_port)
{
- int i;
- snat_address_t *a, *ga = 0;
-
if (vec_len (sm->addresses) > 0)
{
- int s_addr_offset = s_addr.as_u32 % vec_len (sm->addresses);
-
- for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
+ u32 s_addr_offset = (s_addr.as_u32 + (s_addr.as_u32 >> 8) +
+ (s_addr.as_u32 >> 16) + (s_addr.as_u32 >> 24)) %
+ vec_len (sm->addresses);
+ snat_address_t *a, *ja = 0, *ra = 0, *ba = 0;
+ int i;
+
+ // output feature
+ if (tx_sw_if_index != ~0)
{
- a = sm->addresses + i;
- if (a->fib_index == rx_fib_index)
+ for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
{
- return nat_ed_alloc_addr_and_port_with_snat_address (
- sm, nat_proto, thread_index, a, port_per_thread,
- snat_thread_index, s, outside_addr, outside_port);
+ a = sm->addresses + i;
+ if (a->fib_index == rx_fib_index)
+ {
+ if (a->sw_if_index == tx_sw_if_index)
+ {
+ if ((a->addr_len != ~0) &&
+ (a->net.as_u32 ==
+ (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
+
+ {
+ return nat_ed_alloc_addr_and_port_with_snat_address (
+ sm, nat_proto, thread_index, a,
+ sm->port_per_thread, snat_thread_index, s,
+ outside_addr, outside_port);
+ }
+ ra = a;
+ }
+ ja = a;
+ }
+ else if (a->fib_index == ~0)
+ {
+ ba = a;
+ }
}
- else if (a->fib_index == ~0)
+ for (i = 0; i < s_addr_offset; ++i)
{
- ga = a;
+ a = sm->addresses + i;
+ if (a->fib_index == rx_fib_index)
+ {
+ if (a->sw_if_index == tx_sw_if_index)
+ {
+ if ((a->addr_len != ~0) &&
+ (a->net.as_u32 ==
+ (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
+
+ {
+ return nat_ed_alloc_addr_and_port_with_snat_address (
+ sm, nat_proto, thread_index, a,
+ sm->port_per_thread, snat_thread_index, s,
+ outside_addr, outside_port);
+ }
+ ra = a;
+ }
+ ja = a;
+ }
+ else if (a->fib_index == ~0)
+ {
+ ba = a;
+ }
}
- }
-
- for (i = 0; i < s_addr_offset; ++i)
- {
- a = sm->addresses + i;
- if (a->fib_index == rx_fib_index)
+ if (ra)
{
return nat_ed_alloc_addr_and_port_with_snat_address (
- sm, nat_proto, thread_index, a, port_per_thread,
+ sm, nat_proto, thread_index, ra, sm->port_per_thread,
snat_thread_index, s, outside_addr, outside_port);
}
- else if (a->fib_index == ~0)
+ }
+ else
+ {
+ // first try nat pool addresses to sw interface addreses mappings
+ for (i = s_addr_offset; i < vec_len (sm->addresses); ++i)
{
- ga = a;
+ a = sm->addresses + i;
+ if (a->fib_index == rx_fib_index)
+ {
+ if ((a->addr_len != ~0) &&
+ (a->net.as_u32 ==
+ (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
+ {
+ return nat_ed_alloc_addr_and_port_with_snat_address (
+ sm, nat_proto, thread_index, a, sm->port_per_thread,
+ snat_thread_index, s, outside_addr, outside_port);
+ }
+ ja = a;
+ }
+ else if (a->fib_index == ~0)
+ {
+ ba = a;
+ }
+ }
+ for (i = 0; i < s_addr_offset; ++i)
+ {
+ a = sm->addresses + i;
+ if (a->fib_index == rx_fib_index)
+ {
+ if ((a->addr_len != ~0) &&
+ (a->net.as_u32 ==
+ (d_addr.as_u32 & ip4_main.fib_masks[a->addr_len])))
+ {
+ return nat_ed_alloc_addr_and_port_with_snat_address (
+ sm, nat_proto, thread_index, a, sm->port_per_thread,
+ snat_thread_index, s, outside_addr, outside_port);
+ }
+ ja = a;
+ }
+ else if (a->fib_index == ~0)
+ {
+ ba = a;
+ }
}
}
- if (ga)
+ if (ja || ba)
{
+ a = ja ? ja : ba;
return nat_ed_alloc_addr_and_port_with_snat_address (
- sm, nat_proto, thread_index, a, port_per_thread, snat_thread_index,
- s, outside_addr, outside_port);
+ sm, nat_proto, thread_index, a, sm->port_per_thread,
+ snat_thread_index, s, outside_addr, outside_port);
}
}
/* Totally out of translations to use... */
@@ -277,74 +281,167 @@ nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto,
return 1;
}
+static_always_inline int
+nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
+ u16 match_port, ip_protocol_t match_protocol,
+ ip4_address_t *daddr, u16 *dport)
+{
+ snat_static_mapping_t *m =
+ nat44_ed_sm_o2i_lookup (sm, match_addr, match_port, 0, match_protocol);
+ if (!m)
+ {
+ /* Try address only mapping */
+ m = nat44_ed_sm_o2i_lookup (sm, match_addr, 0, 0, 0);
+ if (!m)
+ return 0;
+ }
+ *daddr = m->local_addr;
+ if (dport)
+ {
+ /* Address only mapping doesn't change port */
+ *dport = is_sm_addr_only (m->flags) ? match_port : m->local_port;
+ }
+ return 1;
+}
+
+static_always_inline vrf_table_t *
+get_vrf_table_by_fib (u32 fib_index)
+{
+ snat_main_t *sm = &snat_main;
+ vrf_table_t *t;
+
+ pool_foreach (t, sm->vrf_tables)
+ {
+ if (fib_index == t->table_fib_index)
+ {
+ return t;
+ }
+ }
+
+ return 0;
+}
+
static_always_inline u32
-nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr)
+get_tx_fib_index (u32 rx_fib_index, ip4_address_t addr)
{
fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
- nat_outside_fib_t *outside_fib;
fib_prefix_t pfx = {
.fp_proto = FIB_PROTOCOL_IP4,
.fp_len = 32,
.fp_addr = {.ip4.as_u32 = addr.as_u32,}
,
};
- // TODO: multiple vrfs none can resolve addr
- vec_foreach (outside_fib, sm->outside_fibs)
+
+ snat_main_t *sm = &snat_main;
+ vrf_table_t *t = get_vrf_table_by_fib (rx_fib_index);
+ // default to rx fib
+ u32 tx_fib_index = rx_fib_index;
+
+ if (0 != t)
{
- fei = fib_table_lookup (outside_fib->fib_index, &pfx);
- if (FIB_NODE_INDEX_INVALID != fei)
- {
- if (fib_entry_get_resolving_interface (fei) != ~0)
- {
- return outside_fib->fib_index;
- }
- }
+ // managed routes to other fibs
+ vrf_route_t *r;
+ pool_foreach (r, t->routes)
+ {
+ fei = fib_table_lookup (r->fib_index, &pfx);
+ if ((FIB_NODE_INDEX_INVALID != fei) &&
+ (~0 != fib_entry_get_resolving_interface (fei)))
+ {
+ tx_fib_index = r->fib_index;
+ break;
+ }
+ }
}
- return ~0;
+ else
+ {
+ // default to configured fib
+ tx_fib_index = sm->outside_fib_index;
+
+ // default routes to other fibs
+ nat_fib_t *f;
+ vec_foreach (f, sm->outside_fibs)
+ {
+ fei = fib_table_lookup (f->fib_index, &pfx);
+ if ((FIB_NODE_INDEX_INVALID != fei) &&
+ (~0 != fib_entry_get_resolving_interface (fei)))
+ {
+ tx_fib_index = f->fib_index;
+ break;
+ }
+ }
+ }
+
+ return tx_fib_index;
}
static_always_inline int
-nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr,
- u16 match_port, nat_protocol_t match_protocol,
- u32 match_fib_index, ip4_address_t *daddr,
- u16 *dport)
+is_destination_resolvable (u32 rx_fib_index, ip4_address_t addr)
{
- clib_bihash_kv_8_8_t kv, value;
- init_nat_k (&kv, match_addr, match_port, match_fib_index, match_protocol);
- if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+ fib_node_index_t fei = FIB_NODE_INDEX_INVALID;
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = 32,
+ .fp_addr = {.ip4.as_u32 = addr.as_u32,}
+ ,
+ };
+
+ snat_main_t *sm = &snat_main;
+ vrf_table_t *t = get_vrf_table_by_fib (rx_fib_index);
+ u32 ii;
+
+ if (0 != t)
{
- /* Try address only mapping */
- init_nat_k (&kv, match_addr, 0, 0, 0);
- if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv,
- &value))
- return 0;
+ // managed routes to other fibs
+ vrf_route_t *r;
+ pool_foreach (r, t->routes)
+ {
+ fei = fib_table_lookup (r->fib_index, &pfx);
+ if ((FIB_NODE_INDEX_INVALID != fei) &&
+ (~0 != (ii = fib_entry_get_resolving_interface (fei))))
+ {
+ return 1;
+ }
+ }
}
-
- snat_static_mapping_t *m =
- pool_elt_at_index (sm->static_mappings, value.value);
- *daddr = m->local_addr;
- if (dport)
+ else
{
- /* Address only mapping doesn't change port */
- *dport = is_sm_addr_only (m->flags) ? match_port : m->local_port;
+ // default routes to other fibs
+ nat_fib_t *f;
+ vec_foreach (f, sm->outside_fibs)
+ {
+ fei = fib_table_lookup (f->fib_index, &pfx);
+ if ((FIB_NODE_INDEX_INVALID != fei) &&
+ (~0 != (ii = fib_entry_get_resolving_interface (fei))))
+ {
+ snat_interface_t *i;
+ pool_foreach (i, sm->interfaces)
+ {
+ if ((nat44_ed_is_interface_outside (i)) &&
+ (ii == i->sw_if_index))
+ {
+ return 1;
+ }
+ }
+ }
+ }
}
- return 1;
+
+ return 0;
}
static u32
slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
ip4_address_t l_addr, ip4_address_t r_addr, u16 l_port,
- u16 r_port, u8 proto, u32 rx_fib_index,
+ u16 r_port, u8 proto, u32 rx_fib_index, u32 tx_sw_if_index,
snat_session_t **sessionp, vlib_node_runtime_t *node, u32 next,
u32 thread_index, f64 now)
{
snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
ip4_address_t outside_addr;
u16 outside_port;
- u32 outside_fib_index;
+ u32 tx_fib_index;
u8 is_identity_nat = 0;
- u32 nat_proto = ip_proto_to_nat_proto (proto);
snat_session_t *s = NULL;
lb_nat_type_t lb = 0;
ip4_address_t daddr = r_addr;
@@ -363,33 +460,14 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
}
}
- outside_fib_index = sm->outside_fib_index;
-
- switch (vec_len (sm->outside_fibs))
- {
- case 0:
- outside_fib_index = sm->outside_fib_index;
- break;
- case 1:
- outside_fib_index = sm->outside_fibs[0].fib_index;
- break;
- default:
- outside_fib_index = nat_outside_fib_index_lookup (sm, r_addr);
- break;
- }
-
ip4_address_t sm_addr;
u16 sm_port;
u32 sm_fib_index;
- /* First try to match static mapping by local address and port */
- int is_sm;
- if (snat_static_mapping_match (vm, sm, l_addr, l_port, rx_fib_index,
- nat_proto, &sm_addr, &sm_port, &sm_fib_index,
- 0, 0, 0, &lb, 0, &is_identity_nat, 0))
- {
- is_sm = 0;
- }
- else
+ int is_sm = 0;
+ // First try to match static mapping by local address and port
+ if (!snat_static_mapping_match (vm, l_addr, l_port, rx_fib_index, proto,
+ &sm_addr, &sm_port, &sm_fib_index, 0, 0, 0,
+ &lb, 0, &is_identity_nat, 0))
{
if (PREDICT_FALSE (is_identity_nat))
{
@@ -399,7 +477,7 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
is_sm = 1;
}
- if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP))
+ if (PREDICT_TRUE (proto == IP_PROTOCOL_TCP))
{
if (PREDICT_FALSE (!tcp_flags_is_init (
vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)))
@@ -412,28 +490,31 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
s = nat_ed_session_alloc (sm, thread_index, now, proto);
ASSERT (s);
+ tx_fib_index = get_tx_fib_index (rx_fib_index, r_addr);
+
if (!is_sm)
{
s->in2out.addr = l_addr;
s->in2out.port = l_port;
- s->nat_proto = nat_proto;
+ s->proto = proto;
s->in2out.fib_index = rx_fib_index;
- s->out2in.fib_index = outside_fib_index;
+ s->out2in.fib_index = tx_fib_index;
// suggest using local port to allocation function
outside_port = l_port;
- // hairpinning?
- int is_hairpinning = nat44_ed_external_sm_lookup (
- sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
- s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
+ if (PREDICT_FALSE (nat44_ed_external_sm_lookup (sm, r_addr, r_port,
+ proto, &daddr, &dport)))
+ {
+ s->flags |= SNAT_SESSION_FLAG_HAIRPINNING;
+ }
// destination addr/port updated with real values in
// nat_ed_alloc_addr_and_port
nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0,
s->out2in.fib_index, proto);
nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
- if (NAT_PROTOCOL_ICMP == nat_proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port);
}
@@ -442,11 +523,11 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
nat_6t_flow_dport_rewrite_set (&s->o2i, l_port);
}
nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
+ nat_6t_flow_saddr_rewrite_set (&s->o2i, r_addr.as_u32);
if (nat_ed_alloc_addr_and_port (
- sm, rx_fib_index, nat_proto, thread_index, l_addr,
- sm->port_per_thread, tsm->snat_thread_index, s, &outside_addr,
- &outside_port))
+ sm, rx_fib_index, tx_sw_if_index, proto, thread_index, l_addr,
+ r_addr, tsm->snat_thread_index, s, &outside_addr, &outside_port))
{
nat_elog_notice (sm, "addresses exhausted");
b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS];
@@ -463,17 +544,17 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
s->out2in.port = outside_port = sm_port;
s->in2out.addr = l_addr;
s->in2out.port = l_port;
- s->nat_proto = nat_proto;
+ s->proto = proto;
s->in2out.fib_index = rx_fib_index;
- s->out2in.fib_index = outside_fib_index;
+ s->out2in.fib_index = tx_fib_index;
s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
// hairpinning?
- int is_hairpinning = nat44_ed_external_sm_lookup (
- sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport);
+ int is_hairpinning = nat44_ed_external_sm_lookup (sm, r_addr, r_port,
+ proto, &daddr, &dport);
s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
- if (NAT_PROTOCOL_ICMP == nat_proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr,
sm_port, s->out2in.fib_index, proto);
@@ -487,6 +568,7 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
}
nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32);
nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
+ nat_6t_flow_saddr_rewrite_set (&s->o2i, r_addr.as_u32);
if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2))
{
nat_elog_notice (sm, "out2in key add failed");
@@ -496,7 +578,6 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
if (lb)
s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
- s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
s->ext_host_addr = r_addr;
s->ext_host_port = r_port;
@@ -505,7 +586,7 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32);
nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32);
- if (NAT_PROTOCOL_ICMP == nat_proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port);
}
@@ -514,7 +595,7 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port);
nat_6t_flow_dport_rewrite_set (&s->i2o, dport);
}
- nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
+ nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1))
{
@@ -523,17 +604,14 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
}
/* log NAT event */
- nat_ipfix_logging_nat44_ses_create (thread_index,
- s->in2out.addr.as_u32,
- s->out2in.addr.as_u32,
- s->nat_proto,
- s->in2out.port,
- s->out2in.port, s->in2out.fib_index);
+ nat_ipfix_logging_nat44_ses_create (
+ thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
+ s->in2out.port, s->out2in.port, s->in2out.fib_index);
nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
s->in2out.port, &s->ext_host_nat_addr,
s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
- &s->ext_host_addr, s->ext_host_port, s->nat_proto, 0);
+ &s->ext_host_addr, s->ext_host_port, s->proto, 0);
per_vrf_sessions_register_session (s, thread_index);
@@ -542,12 +620,6 @@ slow_path_ed (vlib_main_t *vm, snat_main_t *sm, vlib_buffer_t *b,
error:
if (s)
{
- if (!is_sm)
- {
- snat_free_outside_address_and_port (sm->addresses, thread_index,
- &outside_addr, outside_port,
- nat_proto);
- }
nat_ed_session_delete (sm, s, thread_index, 1);
}
*sessionp = s = NULL;
@@ -555,38 +627,55 @@ error:
}
static_always_inline int
-nat44_ed_not_translate (vlib_main_t *vm, snat_main_t *sm,
- vlib_node_runtime_t *node, u32 sw_if_index,
- vlib_buffer_t *b, ip4_header_t *ip, u32 proto,
- u32 rx_fib_index, u32 thread_index)
+nat44_ed_not_translate (vlib_main_t *vm, vlib_node_runtime_t *node,
+ u32 sw_if_index, vlib_buffer_t *b, ip4_header_t *ip,
+ u32 proto, u32 rx_fib_index)
{
+ snat_main_t *sm = &snat_main;
+
clib_bihash_kv_16_8_t kv, value;
+ ip4_address_t placeholder_addr;
+ u32 placeholder_fib_index;
+ u16 placeholder_port;
+
+ init_ed_k (&kv, ip->dst_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_dst_port, ip->src_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_src_port, sm->outside_fib_index,
+ ip->protocol);
+
+ // do nat if active session or is static mapping
+ if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value) ||
+ !snat_static_mapping_match (
+ vm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
+ sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
+ &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
+ {
+ return 0;
+ }
- init_ed_k (&kv, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
- ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
- sm->outside_fib_index, ip->protocol);
+ // do not nat if forwarding enabled
+ if (sm->forwarding_enabled)
+ {
+ return 1;
+ }
- /* NAT packet aimed at external address if has active sessions */
- if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
+ // do not nat packet aimed at the interface address
+ if (PREDICT_FALSE (
+ is_interface_addr (sm, node, sw_if_index, ip->dst_address.as_u32)))
{
- /* or is static mappings */
- ip4_address_t placeholder_addr;
- u16 placeholder_port;
- u32 placeholder_fib_index;
- if (!snat_static_mapping_match (
- vm, sm, ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port,
- sm->outside_fib_index, proto, &placeholder_addr, &placeholder_port,
- &placeholder_fib_index, 1, 0, 0, 0, 0, 0, 0))
- return 0;
+ return 1;
}
- else
- return 0;
- if (sm->forwarding_enabled)
- return 1;
+ // do nat packets with resolvable destination
+ // destination can be resolved either by:
+ // a) vrf routing table entry
+ // b) (non output feature) outside interface fib
+ if (is_destination_resolvable (rx_fib_index, ip->dst_address))
+ {
+ return 0;
+ }
- return snat_not_translate_fast (sm, node, sw_if_index, ip, proto,
- rx_fib_index);
+ return 1;
}
static_always_inline int
@@ -610,18 +699,18 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
&lookup_sport, &lookup_daddr,
&lookup_dport, &lookup_protocol))
return 0;
- init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport,
- 0, lookup_protocol);
+ init_ed_k (&kv, lookup_saddr.as_u32, lookup_sport, lookup_daddr.as_u32,
+ lookup_dport, 0, lookup_protocol);
}
else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP)
{
- init_ed_k (&kv, ip->src_address, vnet_buffer (b)->ip.reass.l4_src_port,
- ip->dst_address, vnet_buffer (b)->ip.reass.l4_dst_port, 0,
- ip->protocol);
+ init_ed_k (&kv, ip->src_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_src_port, ip->dst_address.as_u32,
+ vnet_buffer (b)->ip.reass.l4_dst_port, 0, ip->protocol);
}
else
{
- init_ed_k (&kv, ip->src_address, 0, ip->dst_address, 0, 0,
+ init_ed_k (&kv, ip->src_address.as_u32, 0, ip->dst_address.as_u32, 0, 0,
ip->protocol);
}
@@ -632,11 +721,13 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
pool_elt_at_index (tsm->sessions,
ed_value_get_session_index (&value));
- if (is_fwd_bypass_session (s))
+ if (na44_ed_is_fwd_bypass_session (s))
{
if (ip->protocol == IP_PROTOCOL_TCP)
{
- nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index);
+ nat44_set_tcp_session_state_i2o (
+ sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags,
+ thread_index);
}
/* Accounting */
nat44_session_update_counters (s, now,
@@ -658,7 +749,7 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
ip4_header_t *ip, u16 src_port,
u16 dst_port, u32 thread_index,
u32 rx_sw_if_index, u32 tx_sw_if_index,
- f64 now, int is_multi_worker)
+ int is_multi_worker)
{
clib_bihash_kv_16_8_t kv, value;
snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
@@ -668,20 +759,14 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
u32 tx_fib_index = ip4_fib_table_get_index_for_sw_if_index (tx_sw_if_index);
/* src NAT check */
- init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
- tx_fib_index, ip->protocol);
+ init_ed_k (&kv, ip->src_address.as_u32, src_port, ip->dst_address.as_u32,
+ dst_port, tx_fib_index, ip->protocol);
if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
{
ASSERT (thread_index == ed_value_get_thread_index (&value));
s =
pool_elt_at_index (tsm->sessions,
ed_value_get_session_index (&value));
- if (nat44_is_ses_closed (s)
- && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp))
- {
- nat_free_session_data (sm, s, thread_index, 0);
- nat_ed_session_delete (sm, s, thread_index, 1);
- }
return 1;
}
@@ -706,8 +791,8 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
s = NULL;
}
- init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port,
- rx_fib_index, ip->protocol);
+ init_ed_k (&kv, ip->dst_address.as_u32, dst_port, ip->src_address.as_u32,
+ src_port, rx_fib_index, ip->protocol);
if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
{
ASSERT (thread_index == ed_value_get_thread_index (&value));
@@ -716,15 +801,16 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
ed_value_get_session_index (&value));
skip_dst_nat_lookup:
- if (is_fwd_bypass_session (s))
+ if (na44_ed_is_fwd_bypass_session (s))
return 0;
/* hairpinning */
pool_foreach (i, sm->output_feature_interfaces)
- {
- if ((nat_interface_is_inside (i)) && (rx_sw_if_index == i->sw_if_index))
- return 0;
- }
+ {
+ if ((nat44_ed_is_interface_inside (i)) &&
+ (rx_sw_if_index == i->sw_if_index))
+ return 0;
+ }
return 1;
}
@@ -734,9 +820,9 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
static inline u32
icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
icmp46_header_t *icmp, u32 sw_if_index,
- u32 rx_fib_index, vlib_node_runtime_t *node,
- u32 next, f64 now, u32 thread_index,
- nat_protocol_t nat_proto, snat_session_t **s_p,
+ u32 tx_sw_if_index, u32 rx_fib_index,
+ vlib_node_runtime_t *node, u32 next, f64 now,
+ u32 thread_index, snat_session_t **s_p,
int is_multi_worker)
{
vlib_main_t *vm = vlib_get_main ();
@@ -756,20 +842,19 @@ icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
return NAT_NEXT_DROP;
}
- if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0)
+ if (tx_sw_if_index != ~0)
{
if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
- vnet_buffer (b)->sw_if_index[VLIB_TX], now, is_multi_worker)))
+ tx_sw_if_index, is_multi_worker)))
{
return next;
}
}
else
{
- if (PREDICT_FALSE (nat44_ed_not_translate (vm, sm, node, sw_if_index, b,
- ip, NAT_PROTOCOL_ICMP,
- rx_fib_index, thread_index)))
+ if (PREDICT_FALSE (nat44_ed_not_translate (
+ vm, node, sw_if_index, b, ip, IP_PROTOCOL_ICMP, rx_fib_index)))
{
return next;
}
@@ -782,9 +867,10 @@ icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
return NAT_NEXT_DROP;
}
- next = slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address,
- lookup_sport, lookup_dport, ip->protocol, rx_fib_index,
- &s, node, next, thread_index, vlib_time_now (vm));
+ next =
+ slow_path_ed (vm, sm, b, ip->src_address, ip->dst_address, lookup_sport,
+ lookup_dport, ip->protocol, rx_fib_index, tx_sw_if_index, &s,
+ node, next, thread_index, vlib_time_now (vm));
if (NAT_NEXT_DROP == next)
goto out;
@@ -822,12 +908,11 @@ nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
vlib_main_t *vm,
vlib_node_runtime_t *node)
{
- clib_bihash_kv_8_8_t kv, value;
clib_bihash_kv_16_8_t s_kv, s_value;
snat_static_mapping_t *m = NULL;
snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
snat_session_t *s = NULL;
- u32 outside_fib_index = sm->outside_fib_index;
+ u32 tx_fib_index;
int i;
ip4_address_t new_src_addr = { 0 };
ip4_address_t new_dst_addr = ip->dst_address;
@@ -842,25 +927,13 @@ nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
return 0;
}
- switch (vec_len (sm->outside_fibs))
- {
- case 0:
- outside_fib_index = sm->outside_fib_index;
- break;
- case 1:
- outside_fib_index = sm->outside_fibs[0].fib_index;
- break;
- default:
- outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address);
- break;
- }
+ tx_fib_index = get_tx_fib_index (rx_fib_index, ip->dst_address);
- init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0);
-
- /* Try to find static mapping first */
- if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value))
+ // Try to find static mapping first
+ m = nat44_ed_sm_i2o_lookup (sm, ip->src_address, 0, rx_fib_index,
+ ip->protocol);
+ if (m)
{
- m = pool_elt_at_index (sm->static_mappings, value.value);
new_src_addr = m->external_addr;
}
else
@@ -869,8 +942,9 @@ nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
{
if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32)
{
- init_ed_k (&s_kv, s->out2in.addr, 0, ip->dst_address, 0,
- outside_fib_index, ip->protocol);
+ init_ed_k (&s_kv, s->out2in.addr.as_u32, 0,
+ ip->dst_address.as_u32, 0, tx_fib_index,
+ ip->protocol);
if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
{
new_src_addr = s->out2in.addr;
@@ -883,8 +957,9 @@ nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
{
for (i = 0; i < vec_len (sm->addresses); i++)
{
- init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0,
- outside_fib_index, ip->protocol);
+ init_ed_k (&s_kv, sm->addresses[i].addr.as_u32, 0,
+ ip->dst_address.as_u32, 0, tx_fib_index,
+ ip->protocol);
if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value))
{
new_src_addr = sm->addresses[i].addr;
@@ -910,28 +985,25 @@ nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0,
ip->dst_address, 0, rx_fib_index, ip->protocol);
nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32);
- nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
+ nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
// hairpinning?
- int is_hairpinning =
- nat44_ed_external_sm_lookup (sm, ip->dst_address, 0, NAT_PROTOCOL_OTHER,
- outside_fib_index, &new_dst_addr, NULL);
+ int is_hairpinning = nat44_ed_external_sm_lookup (
+ sm, ip->dst_address, 0, ip->protocol, &new_dst_addr, NULL);
s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING;
nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32);
- nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index);
+ nat_6t_flow_txfib_rewrite_set (&s->i2o, tx_fib_index);
nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0,
- outside_fib_index, ip->protocol);
+ tx_fib_index, ip->protocol);
nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32);
nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32);
nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index);
s->ext_host_addr.as_u32 = ip->dst_address.as_u32;
- s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
- s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
s->out2in.addr.as_u32 = new_src_addr.as_u32;
- s->out2in.fib_index = outside_fib_index;
+ s->out2in.fib_index = tx_fib_index;
s->in2out.addr.as_u32 = ip->src_address.as_u32;
s->in2out.fib_index = rx_fib_index;
s->in2out.port = s->out2in.port = ip->protocol;
@@ -988,11 +1060,13 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
while (n_left_from > 0)
{
vlib_buffer_t *b0;
- u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0;
- nat_protocol_t proto0;
+ u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
+ u32 tx_sw_if_index0;
+ u32 cntr_sw_if_index0;
+ ip_protocol_t proto0;
ip4_header_t *ip0;
snat_session_t *s0 = 0;
- clib_bihash_kv_16_8_t kv0, value0;
+ clib_bihash_kv_16_8_t kv0 = { 0 }, value0;
nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
nat_6t_flow_t *f = 0;
nat_6t_t lookup;
@@ -1023,9 +1097,12 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
ip0 =
(ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) + iph_offset0);
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- rx_fib_index0 =
- fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
+ rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ cntr_sw_if_index0 =
+ is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
+ rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+ rx_sw_if_index0);
lookup.fib_index = rx_fib_index0;
if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
@@ -1038,7 +1115,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
goto trace0;
}
- proto0 = ip_proto_to_nat_proto (ip0->protocol);
+ proto0 = ip0->protocol;
if (is_output_feature)
{
@@ -1048,7 +1125,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
goto trace0;
}
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
+ if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
{
if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
ICMP4_echo_request &&
@@ -1102,8 +1179,8 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
s0 = NULL;
}
- init_ed_k (&kv0, lookup.saddr, lookup.sport, lookup.daddr, lookup.dport,
- lookup.fib_index, lookup.proto);
+ init_ed_k (&kv0, lookup.saddr.as_u32, lookup.sport, lookup.daddr.as_u32,
+ lookup.dport, lookup.fib_index, lookup.proto);
// lookup flow
if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
@@ -1125,25 +1202,10 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
{
// session is closed, go slow path
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
- next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
- goto trace0;
- }
-
- if (s0->tcp_closed_timestamp)
- {
- if (now >= s0->tcp_closed_timestamp)
- {
- // session is closed, go slow path, freed in slow path
- next[0] = def_slow;
- }
- else
- {
- // session in transitory timeout, drop
- b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
- next[0] = NAT_NEXT_DROP;
- }
+ s0 = 0;
+ next[0] = def_slow;
goto trace0;
}
@@ -1153,8 +1215,9 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
s0->last_heard + (f64) nat44_session_get_timeout (sm, s0);
if (now >= sess_timeout_time)
{
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
+ s0 = 0;
// session is closed, go slow path
next[0] = def_slow;
goto trace0;
@@ -1174,8 +1237,9 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
else
{
translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
+ s0 = 0;
next[0] = NAT_NEXT_DROP;
b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
goto trace0;
@@ -1185,8 +1249,9 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
(translation_error = nat_6t_flow_buf_translate_i2o (
vm, sm, b0, ip0, f, proto0, is_output_feature)))
{
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
+ s0 = 0;
next[0] = NAT_NEXT_DROP;
b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
goto trace0;
@@ -1194,22 +1259,24 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
switch (proto0)
{
- case NAT_PROTOCOL_TCP:
+ case IP_PROTOCOL_TCP:
vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
- thread_index, sw_if_index0, 1);
- nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
+ thread_index, cntr_sw_if_index0, 1);
+ nat44_set_tcp_session_state_i2o (
+ sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
+ thread_index);
break;
- case NAT_PROTOCOL_UDP:
+ case IP_PROTOCOL_UDP:
vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
break;
- case NAT_PROTOCOL_ICMP:
+ case IP_PROTOCOL_ICMP:
vlib_increment_simple_counter (&sm->counters.fastpath.in2out.icmp,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
break;
- case NAT_PROTOCOL_OTHER:
+ default:
vlib_increment_simple_counter (&sm->counters.fastpath.in2out.other,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
break;
}
@@ -1227,7 +1294,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
{
nat_in2out_ed_trace_t *t =
vlib_add_trace (vm, node, b0, sizeof (*t));
- t->sw_if_index = sw_if_index0;
+ t->sw_if_index = rx_sw_if_index0;
t->next_index = next[0];
t->is_slow_path = 0;
t->translation_error = translation_error;
@@ -1240,6 +1307,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
t->translation_via_i2of = (&s0->i2o == f);
+ t->tcp_state = s0->tcp_state;
}
else
{
@@ -1250,7 +1318,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
if (next[0] == NAT_NEXT_DROP)
{
vlib_increment_simple_counter (&sm->counters.fastpath.in2out.drops,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
}
n_left_from--;
@@ -1285,13 +1353,15 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
while (n_left_from > 0)
{
vlib_buffer_t *b0;
- u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0;
- nat_protocol_t proto0;
+ u32 rx_sw_if_index0, rx_fib_index0, iph_offset0 = 0;
+ u32 tx_sw_if_index0;
+ u32 cntr_sw_if_index0;
+ ip_protocol_t proto0;
ip4_header_t *ip0;
udp_header_t *udp0;
icmp46_header_t *icmp0;
snat_session_t *s0 = 0;
- clib_bihash_kv_16_8_t kv0, value0;
+ clib_bihash_kv_16_8_t kv0 = { 0 }, value0;
int translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
b0 = *b;
@@ -1304,9 +1374,12 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
ip0 = (ip4_header_t *) ((u8 *) vlib_buffer_get_current (b0) +
iph_offset0);
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- rx_fib_index0 =
- fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index0);
+ rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ cntr_sw_if_index0 =
+ is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
+ rx_fib_index0 = fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4,
+ rx_sw_if_index0);
if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
{
@@ -1320,9 +1393,9 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
udp0 = ip4_next_header (ip0);
icmp0 = (icmp46_header_t *) udp0;
- proto0 = ip_proto_to_nat_proto (ip0->protocol);
+ proto0 = ip0->protocol;
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
+ if (PREDICT_FALSE (nat44_ed_is_unk_proto (proto0)))
{
s0 = nat44_ed_in2out_slowpath_unknown_proto (
sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
@@ -1334,57 +1407,57 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
(translation_error = nat_6t_flow_buf_translate_i2o (
vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
{
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
+ s0 = 0;
next[0] = NAT_NEXT_DROP;
b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
goto trace0;
}
vlib_increment_simple_counter (&sm->counters.slowpath.in2out.other,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
goto trace0;
}
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
+ if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
{
next[0] = icmp_in2out_ed_slow_path (
- sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next[0],
- now, thread_index, proto0, &s0, is_multi_worker);
+ sm, b0, ip0, icmp0, rx_sw_if_index0, tx_sw_if_index0,
+ rx_fib_index0, node, next[0], now, thread_index, &s0,
+ is_multi_worker);
if (NAT_NEXT_DROP != next[0] && s0 &&
NAT_ED_TRNSL_ERR_SUCCESS !=
(translation_error = nat_6t_flow_buf_translate_i2o (
vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
{
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
+ s0 = 0;
next[0] = NAT_NEXT_DROP;
b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
goto trace0;
}
- vlib_increment_simple_counter (&sm->counters.slowpath.in2out.icmp,
- thread_index, sw_if_index0, 1);
+ if (NAT_NEXT_DROP != next[0])
+ {
+ vlib_increment_simple_counter (
+ &sm->counters.slowpath.in2out.icmp, thread_index,
+ cntr_sw_if_index0, 1);
+ }
goto trace0;
}
- init_ed_k (&kv0, ip0->src_address,
- vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
- vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
- ip0->protocol);
+ init_ed_k (
+ &kv0, ip0->src_address.as_u32, vnet_buffer (b0)->ip.reass.l4_src_port,
+ ip0->dst_address.as_u32, vnet_buffer (b0)->ip.reass.l4_dst_port,
+ rx_fib_index0, ip0->protocol);
if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
{
ASSERT (thread_index == ed_value_get_thread_index (&value0));
s0 =
pool_elt_at_index (tsm->sessions,
ed_value_get_session_index (&value0));
-
- if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
- {
- nat_free_session_data (sm, s0, thread_index, 0);
- nat_ed_session_delete (sm, s0, thread_index, 1);
- s0 = NULL;
- }
}
if (!s0)
@@ -1394,34 +1467,34 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
- sw_if_index0, vnet_buffer (b0)->sw_if_index[VLIB_TX], now,
- is_multi_worker)))
+ rx_sw_if_index0, tx_sw_if_index0, is_multi_worker)))
goto trace0;
/*
* Send DHCP packets to the ipv4 stack, or we won't
* be able to use dhcp client on the outside interface
*/
- if (PREDICT_FALSE
- (proto0 == NAT_PROTOCOL_UDP
- && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
- clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server))
- && ip0->dst_address.as_u32 == 0xffffffff))
+ if (PREDICT_FALSE (
+ proto0 == IP_PROTOCOL_UDP &&
+ (vnet_buffer (b0)->ip.reass.l4_dst_port ==
+ clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_server)) &&
+ ip0->dst_address.as_u32 == 0xffffffff))
goto trace0;
}
else
{
- if (PREDICT_FALSE (nat44_ed_not_translate (
- vm, sm, node, sw_if_index0, b0, ip0, proto0, rx_fib_index0,
- thread_index)))
+ if (PREDICT_FALSE (
+ nat44_ed_not_translate (vm, node, rx_sw_if_index0, b0, ip0,
+ proto0, rx_fib_index0)))
goto trace0;
}
- next[0] = slow_path_ed (
- vm, sm, b0, ip0->src_address, ip0->dst_address,
- vnet_buffer (b0)->ip.reass.l4_src_port,
- vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->protocol,
- rx_fib_index0, &s0, node, next[0], thread_index, now);
+ next[0] =
+ slow_path_ed (vm, sm, b0, ip0->src_address, ip0->dst_address,
+ vnet_buffer (b0)->ip.reass.l4_src_port,
+ vnet_buffer (b0)->ip.reass.l4_dst_port,
+ ip0->protocol, rx_fib_index0, tx_sw_if_index0, &s0,
+ node, next[0], thread_index, now);
if (PREDICT_FALSE (next[0] == NAT_NEXT_DROP))
goto trace0;
@@ -1437,23 +1510,26 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
(translation_error = nat_6t_flow_buf_translate_i2o (
vm, sm, b0, ip0, &s0->i2o, proto0, is_output_feature)))
{
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
+ s0 = 0;
next[0] = NAT_NEXT_DROP;
b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TRNSL_FAILED];
goto trace0;
}
- if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
+ if (PREDICT_TRUE (proto0 == IP_PROTOCOL_TCP))
{
vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
- thread_index, sw_if_index0, 1);
- nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
+ thread_index, cntr_sw_if_index0, 1);
+ nat44_set_tcp_session_state_i2o (
+ sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
+ thread_index);
}
else
{
vlib_increment_simple_counter (&sm->counters.slowpath.in2out.udp,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
}
/* Accounting */
@@ -1469,7 +1545,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
{
nat_in2out_ed_trace_t *t =
vlib_add_trace (vm, node, b0, sizeof (*t));
- t->sw_if_index = sw_if_index0;
+ t->sw_if_index = rx_sw_if_index0;
t->next_index = next[0];
t->is_slow_path = 1;
t->translation_error = translation_error;
@@ -1481,6 +1557,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
t->translation_via_i2of = 1;
+ t->tcp_state = s0->tcp_state;
}
else
@@ -1492,7 +1569,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
if (next[0] == NAT_NEXT_DROP)
{
vlib_increment_simple_counter (&sm->counters.slowpath.in2out.drops,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
}
n_left_from--;
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_inlines.h b/src/plugins/nat/nat44-ed/nat44_ed_inlines.h
index 0d75e736849..04e5236b7f9 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_inlines.h
+++ b/src/plugins/nat/nat44-ed/nat44_ed_inlines.h
@@ -12,6 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/**
* @brief The NAT inline functions
*/
@@ -24,116 +25,53 @@
#include <vnet/fib/ip4_fib.h>
#include <nat/lib/log.h>
+#include <nat/lib/ipfix_logging.h>
#include <nat/nat44-ed/nat44_ed.h>
-always_inline u64
-calc_nat_key (ip4_address_t addr, u16 port, u32 fib_index, u8 proto)
-{
- ASSERT (fib_index <= (1 << 14) - 1);
- ASSERT (proto <= (1 << 3) - 1);
- return (u64) addr.as_u32 << 32 | (u64) port << 16 | fib_index << 3 |
- (proto & 0x7);
-}
-
-always_inline void
-split_nat_key (u64 key, ip4_address_t *addr, u16 *port, u32 *fib_index,
- nat_protocol_t *proto)
-{
- if (addr)
- {
- addr->as_u32 = key >> 32;
- }
- if (port)
- {
- *port = (key >> 16) & (u16) ~0;
- }
- if (fib_index)
- {
- *fib_index = key >> 3 & ((1 << 13) - 1);
- }
- if (proto)
- {
- *proto = key & 0x7;
- }
-}
-
always_inline void
-init_nat_k (clib_bihash_kv_8_8_t *kv, ip4_address_t addr, u16 port,
- u32 fib_index, nat_protocol_t proto)
+init_ed_k (clib_bihash_kv_16_8_t *kv, u32 l_addr, u16 l_port, u32 r_addr,
+ u16 r_port, u32 fib_index, ip_protocol_t proto)
{
- kv->key = calc_nat_key (addr, port, fib_index, proto);
- kv->value = ~0ULL;
+ kv->key[0] = (u64) r_addr << 32 | l_addr;
+ kv->key[1] =
+ (u64) r_port << 48 | (u64) l_port << 32 | fib_index << 8 | proto;
}
always_inline void
-init_nat_kv (clib_bihash_kv_8_8_t *kv, ip4_address_t addr, u16 port,
- u32 fib_index, nat_protocol_t proto, u32 thread_index,
- u32 session_index)
+init_ed_kv (clib_bihash_kv_16_8_t *kv, u32 l_addr, u16 l_port, u32 r_addr,
+ u16 r_port, u32 fib_index, u8 proto, u32 thread_index,
+ u32 session_index)
{
- init_nat_k (kv, addr, port, fib_index, proto);
+ init_ed_k (kv, l_addr, l_port, r_addr, r_port, fib_index, proto);
kv->value = (u64) thread_index << 32 | session_index;
}
always_inline void
-init_nat_i2o_k (clib_bihash_kv_8_8_t *kv, snat_session_t *s)
+nat44_ed_sm_init_i2o_kv (clib_bihash_kv_16_8_t *kv, u32 addr, u16 port,
+ u32 fib_index, u8 proto, u32 sm_index)
{
- return init_nat_k (kv, s->in2out.addr, s->in2out.port, s->in2out.fib_index,
- s->nat_proto);
+ return init_ed_kv (kv, addr, port, 0, 0, fib_index, proto, 0, sm_index);
}
always_inline void
-init_nat_i2o_kv (clib_bihash_kv_8_8_t *kv, snat_session_t *s, u32 thread_index,
- u32 session_index)
+nat44_ed_sm_init_o2i_kv (clib_bihash_kv_16_8_t *kv, u32 e_addr, u16 e_port,
+ u32 fib_index, u8 proto, u32 sm_index)
{
- init_nat_k (kv, s->in2out.addr, s->in2out.port, s->in2out.fib_index,
- s->nat_proto);
- kv->value = (u64) thread_index << 32 | session_index;
+ return init_ed_kv (kv, 0, 0, e_addr, e_port, fib_index, proto, 0, sm_index);
}
always_inline void
-init_nat_o2i_k (clib_bihash_kv_8_8_t *kv, snat_session_t *s)
+nat44_ed_sm_init_i2o_k (clib_bihash_kv_16_8_t *kv, u32 addr, u16 port,
+ u32 fib_index, u8 proto)
{
- return init_nat_k (kv, s->out2in.addr, s->out2in.port, s->out2in.fib_index,
- s->nat_proto);
+ return nat44_ed_sm_init_i2o_kv (kv, addr, port, fib_index, proto, 0);
}
always_inline void
-init_nat_o2i_kv (clib_bihash_kv_8_8_t *kv, snat_session_t *s, u32 thread_index,
- u32 session_index)
+nat44_ed_sm_init_o2i_k (clib_bihash_kv_16_8_t *kv, u32 e_addr, u16 e_port,
+ u32 fib_index, u8 proto)
{
- init_nat_k (kv, s->out2in.addr, s->out2in.port, s->out2in.fib_index,
- s->nat_proto);
- kv->value = (u64) thread_index << 32 | session_index;
-}
-
-always_inline u32
-nat_value_get_thread_index (clib_bihash_kv_8_8_t *value)
-{
- return value->value >> 32;
-}
-
-always_inline u32
-nat_value_get_session_index (clib_bihash_kv_8_8_t *value)
-{
- return value->value & ~(u32) 0;
-}
-
-always_inline void
-init_ed_k (clib_bihash_kv_16_8_t *kv, ip4_address_t l_addr, u16 l_port,
- ip4_address_t r_addr, u16 r_port, u32 fib_index, u8 proto)
-{
- kv->key[0] = (u64) r_addr.as_u32 << 32 | l_addr.as_u32;
- kv->key[1] =
- (u64) r_port << 48 | (u64) l_port << 32 | fib_index << 8 | proto;
-}
-
-always_inline void
-init_ed_kv (clib_bihash_kv_16_8_t *kv, ip4_address_t l_addr, u16 l_port,
- ip4_address_t r_addr, u16 r_port, u32 fib_index, u8 proto,
- u32 thread_index, u32 session_index)
-{
- init_ed_k (kv, l_addr, l_port, r_addr, r_port, fib_index, proto);
- kv->value = (u64) thread_index << 32 | session_index;
+ return nat44_ed_sm_init_o2i_kv (kv, e_addr, e_port, fib_index, proto, 0);
}
always_inline u32
@@ -187,13 +125,13 @@ nat_get_icmp_session_lookup_values (vlib_buffer_t *b, ip4_header_t *ip0,
u16 *lookup_dport, u8 *lookup_protocol)
{
icmp46_header_t *icmp0;
- icmp_echo_header_t *echo0, *inner_echo0 = 0;
+ nat_icmp_echo_header_t *echo0, *inner_echo0 = 0;
ip4_header_t *inner_ip0 = 0;
void *l4_header = 0;
icmp46_header_t *inner_icmp0;
icmp0 = (icmp46_header_t *) ip4_next_header (ip0);
- echo0 = (icmp_echo_header_t *) (icmp0 + 1);
+ echo0 = (nat_icmp_echo_header_t *) (icmp0 + 1);
// avoid warning about unused variables in caller by setting to bogus values
*lookup_sport = 0;
@@ -215,18 +153,18 @@ nat_get_icmp_session_lookup_values (vlib_buffer_t *b, ip4_header_t *ip0,
*lookup_protocol = inner_ip0->protocol;
lookup_saddr->as_u32 = inner_ip0->dst_address.as_u32;
lookup_daddr->as_u32 = inner_ip0->src_address.as_u32;
- switch (ip_proto_to_nat_proto (inner_ip0->protocol))
+ switch (inner_ip0->protocol)
{
- case NAT_PROTOCOL_ICMP:
+ case IP_PROTOCOL_ICMP:
inner_icmp0 = (icmp46_header_t *) l4_header;
- inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1);
+ inner_echo0 = (nat_icmp_echo_header_t *) (inner_icmp0 + 1);
*lookup_sport = inner_echo0->identifier;
*lookup_dport = inner_echo0->identifier;
break;
- case NAT_PROTOCOL_UDP:
- case NAT_PROTOCOL_TCP:
- *lookup_sport = ((tcp_udp_header_t *) l4_header)->dst_port;
- *lookup_dport = ((tcp_udp_header_t *) l4_header)->src_port;
+ case IP_PROTOCOL_UDP:
+ case IP_PROTOCOL_TCP:
+ *lookup_sport = ((nat_tcp_udp_header_t *) l4_header)->dst_port;
+ *lookup_dport = ((nat_tcp_udp_header_t *) l4_header)->src_port;
break;
default:
return NAT_IN2OUT_ED_ERROR_UNSUPPORTED_PROTOCOL;
@@ -235,21 +173,29 @@ nat_get_icmp_session_lookup_values (vlib_buffer_t *b, ip4_header_t *ip0,
return 0;
}
+always_inline int
+nat44_ed_tcp_is_established (nat44_ed_tcp_state_e state)
+{
+ return state == NAT44_ED_TCP_STATE_ESTABLISHED ? 1 : 0;
+}
+
always_inline u32
nat44_session_get_timeout (snat_main_t *sm, snat_session_t *s)
{
- switch (s->nat_proto)
+ switch (s->proto)
{
- case NAT_PROTOCOL_ICMP:
+ case IP_PROTOCOL_ICMP:
+ /* fallthrough */
+ case IP_PROTOCOL_ICMP6:
return sm->timeouts.icmp;
- case NAT_PROTOCOL_UDP:
+ case IP_PROTOCOL_UDP:
return sm->timeouts.udp;
- case NAT_PROTOCOL_TCP:
+ case IP_PROTOCOL_TCP:
{
- if (s->state)
- return sm->timeouts.tcp.transitory;
- else
+ if (nat44_ed_tcp_is_established (s->tcp_state))
return sm->timeouts.tcp.established;
+ else
+ return sm->timeouts.tcp.transitory;
}
default:
return sm->timeouts.udp;
@@ -300,7 +246,7 @@ nat_ed_lru_insert (snat_main_per_thread_data_t *tsm, snat_session_t *s,
static_always_inline void
nat_6t_flow_to_ed_k (clib_bihash_kv_16_8_t *kv, nat_6t_flow_t *f)
{
- init_ed_k (kv, f->match.saddr, f->match.sport, f->match.daddr,
+ init_ed_k (kv, f->match.saddr.as_u32, f->match.sport, f->match.daddr.as_u32,
f->match.dport, f->match.fib_index, f->match.proto);
}
@@ -308,7 +254,7 @@ static_always_inline void
nat_6t_flow_to_ed_kv (clib_bihash_kv_16_8_t *kv, nat_6t_flow_t *f,
u32 thread_idx, u32 session_idx)
{
- init_ed_kv (kv, f->match.saddr, f->match.sport, f->match.daddr,
+ init_ed_kv (kv, f->match.saddr.as_u32, f->match.sport, f->match.daddr.as_u32,
f->match.dport, f->match.fib_index, f->match.proto, thread_idx,
session_idx);
}
@@ -348,6 +294,15 @@ nat_ed_ses_o2i_flow_hash_add_del (snat_main_t *sm, u32 thread_idx,
else
{
nat_6t_flow_to_ed_kv (&kv, &s->o2i, thread_idx, s - tsm->sessions);
+ if (!(s->flags & SNAT_SESSION_FLAG_STATIC_MAPPING))
+ {
+ if (nat44_ed_sm_o2i_lookup (sm, s->o2i.match.daddr,
+ s->o2i.match.dport, 0,
+ s->o2i.match.proto))
+ {
+ return -1;
+ }
+ }
nat_6t_l3_l4_csum_calc (&s->o2i);
}
ASSERT (thread_idx == s->thread_index);
@@ -393,10 +348,9 @@ nat_lru_free_one_with_head (snat_main_t *sm, int thread_index, f64 now,
sess_timeout_time =
s->last_heard + (f64) nat44_session_get_timeout (sm, s);
- if (now >= sess_timeout_time ||
- (s->tcp_closed_timestamp && now >= s->tcp_closed_timestamp))
+ if (now >= sess_timeout_time)
{
- nat_free_session_data (sm, s, thread_index, 0);
+ nat44_ed_free_session_data (sm, s, thread_index, 0);
nat_ed_session_delete (sm, s, thread_index, 0);
return 1;
}
@@ -460,23 +414,16 @@ per_vrf_sessions_cleanup (u32 thread_index)
per_vrf_sessions_t *per_vrf_sessions;
u32 *to_free = 0, *i;
- vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
+ pool_foreach (per_vrf_sessions, tsm->per_vrf_sessions_pool)
{
- if (per_vrf_sessions->expired)
- {
- if (per_vrf_sessions->ses_count == 0)
- {
- vec_add1 (to_free, per_vrf_sessions - tsm->per_vrf_sessions_vec);
- }
- }
+ if (per_vrf_sessions->expired && per_vrf_sessions->ses_count == 0)
+ vec_add1 (to_free, per_vrf_sessions - tsm->per_vrf_sessions_pool);
}
- if (vec_len (to_free))
+ vec_foreach (i, to_free)
{
- vec_foreach (i, to_free)
- {
- vec_del1 (tsm->per_vrf_sessions_vec, *i);
- }
+ per_vrf_sessions = pool_elt_at_index (tsm->per_vrf_sessions_pool, *i);
+ pool_put (tsm->per_vrf_sessions_pool, per_vrf_sessions);
}
vec_free (to_free);
@@ -495,7 +442,7 @@ per_vrf_sessions_register_session (snat_session_t *s, u32 thread_index)
// s->per_vrf_sessions_index == ~0 ... reuse of old session
- vec_foreach (per_vrf_sessions, tsm->per_vrf_sessions_vec)
+ pool_foreach (per_vrf_sessions, tsm->per_vrf_sessions_pool)
{
// ignore already expired registrations
if (per_vrf_sessions->expired)
@@ -514,14 +461,13 @@ per_vrf_sessions_register_session (snat_session_t *s, u32 thread_index)
}
// create a new registration
- vec_add2 (tsm->per_vrf_sessions_vec, per_vrf_sessions, 1);
+ pool_get (tsm->per_vrf_sessions_pool, per_vrf_sessions);
clib_memset (per_vrf_sessions, 0, sizeof (*per_vrf_sessions));
-
per_vrf_sessions->rx_fib_index = s->in2out.fib_index;
per_vrf_sessions->tx_fib_index = s->out2in.fib_index;
done:
- s->per_vrf_sessions_index = per_vrf_sessions - tsm->per_vrf_sessions_vec;
+ s->per_vrf_sessions_index = per_vrf_sessions - tsm->per_vrf_sessions_pool;
per_vrf_sessions->ses_count++;
}
@@ -537,7 +483,7 @@ per_vrf_sessions_unregister_session (snat_session_t *s, u32 thread_index)
tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
per_vrf_sessions =
- vec_elt_at_index (tsm->per_vrf_sessions_vec, s->per_vrf_sessions_index);
+ pool_elt_at_index (tsm->per_vrf_sessions_pool, s->per_vrf_sessions_index);
ASSERT (per_vrf_sessions->ses_count != 0);
@@ -557,7 +503,7 @@ per_vrf_sessions_is_expired (snat_session_t *s, u32 thread_index)
tsm = vec_elt_at_index (sm->per_thread_data, thread_index);
per_vrf_sessions =
- vec_elt_at_index (tsm->per_vrf_sessions_vec, s->per_vrf_sessions_index);
+ pool_elt_at_index (tsm->per_vrf_sessions_pool, s->per_vrf_sessions_index);
return per_vrf_sessions->expired;
}
@@ -754,100 +700,120 @@ is_interface_addr (snat_main_t *sm, vlib_node_runtime_t *node,
}
always_inline void
-nat44_set_tcp_session_state_i2o (snat_main_t *sm, f64 now, snat_session_t *ses,
- vlib_buffer_t *b, u32 thread_index)
+nat44_ed_session_reopen (u32 thread_index, snat_session_t *s)
+{
+ nat_syslog_nat44_sdel (0, s->in2out.fib_index, &s->in2out.addr,
+ s->in2out.port, &s->ext_host_nat_addr,
+ s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
+ &s->ext_host_addr, s->ext_host_port, s->proto,
+ nat44_ed_is_twice_nat_session (s));
+
+ nat_ipfix_logging_nat44_ses_delete (
+ thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
+ s->in2out.port, s->out2in.port, s->in2out.fib_index);
+ nat_ipfix_logging_nat44_ses_create (
+ thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
+ s->in2out.port, s->out2in.port, s->in2out.fib_index);
+
+ nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
+ s->in2out.port, &s->ext_host_nat_addr,
+ s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
+ &s->ext_host_addr, s->ext_host_port, s->proto, 0);
+ s->total_pkts = 0;
+ s->total_bytes = 0;
+}
+
+/*
+ * "Some rise by SYN, and some by virtue FIN" - William Shakespeare
+ * TCP state tracking patterned after RFC 7857 (and RFC 6146, which is
+ * referenced by RFC 7857). In contrast to the state machine in RFC7857 we only
+ * transition to ESTABLISHED state after seeing a full 3-way handshake (SYNs
+ * and ACKs in both directions). RFC7857 as a means of protecting against
+ * spurious RSTs closing a session, goes back to ESTABLISHED if a data packet
+ * is received after the RST. This state machine will leave the state in
+ * transitory if RST is seen. Our implementation also goes beyond by supporting
+ * creation of a new session while old session is in transitory timeout after
+ * seeing FIN packets from both sides.
+ */
+always_inline void
+nat44_set_tcp_session_state (snat_main_t *sm, f64 now, snat_session_t *ses,
+ u8 tcp_flags, u32 thread_index,
+ nat44_ed_dir_e dir)
{
snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
- u8 tcp_flags = vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags;
- u32 tcp_ack_number = vnet_buffer (b)->ip.reass.tcp_ack_number;
- u32 tcp_seq_number = vnet_buffer (b)->ip.reass.tcp_seq_number;
- if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST))
- ses->state = NAT44_SES_RST;
- if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST))
- ses->state = 0;
- if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) &&
- (ses->state & NAT44_SES_O2I_SYN))
- ses->state = 0;
- if (tcp_flags & TCP_FLAG_SYN)
- ses->state |= NAT44_SES_I2O_SYN;
- if (tcp_flags & TCP_FLAG_FIN)
- {
- ses->i2o_fin_seq = clib_net_to_host_u32 (tcp_seq_number);
- ses->state |= NAT44_SES_I2O_FIN;
- }
- if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_O2I_FIN))
+ u8 old_flags = ses->tcp_flags[dir];
+ ses->tcp_flags[dir] |=
+ tcp_flags & (TCP_FLAG_FIN | TCP_FLAG_SYN | TCP_FLAG_RST | TCP_FLAG_ACK);
+ if (old_flags == ses->tcp_flags[dir])
+ return;
+
+ u8 old_state = ses->tcp_state;
+
+ switch (old_state)
{
- if (clib_net_to_host_u32 (tcp_ack_number) > ses->o2i_fin_seq)
+ case NAT44_ED_TCP_STATE_CLOSED:
+ // ESTABLISHED when a SYN and ACK is seen from both sides
+ if ((ses->tcp_flags[NAT44_ED_DIR_I2O] &
+ ses->tcp_flags[NAT44_ED_DIR_O2I]) == (TCP_FLAG_SYN | TCP_FLAG_ACK))
{
- ses->state |= NAT44_SES_O2I_FIN_ACK;
- if (nat44_is_ses_closed (ses))
- { // if session is now closed, save the timestamp
- ses->tcp_closed_timestamp = now + sm->timeouts.tcp.transitory;
- ses->last_lru_update = now;
- }
+ ses->tcp_state = NAT44_ED_TCP_STATE_ESTABLISHED;
+ ses->lru_head_index = tsm->tcp_estab_lru_head_index;
}
+ break;
+ case NAT44_ED_TCP_STATE_ESTABLISHED:
+ // CLOSING when a FIN is seen from either side or session has been RST
+ if ((ses->tcp_flags[dir] & TCP_FLAG_FIN) ||
+ (ses->tcp_flags[dir] & TCP_FLAG_RST))
+ {
+ ses->tcp_state = NAT44_ED_TCP_STATE_CLOSING;
+ ses->tcp_flags[NAT44_ED_DIR_I2O] = 0;
+ ses->tcp_flags[NAT44_ED_DIR_O2I] = 0;
+ // need to update last heard otherwise session might get
+ // immediately timed out if it has been idle longer than
+ // transitory timeout
+ ses->last_heard = now;
+ ses->lru_head_index = tsm->tcp_trans_lru_head_index;
+ }
+ break;
+ case NAT44_ED_TCP_STATE_CLOSING:
+ // Allow a transitory session to reopen
+ if ((ses->tcp_flags[NAT44_ED_DIR_I2O] &
+ ses->tcp_flags[NAT44_ED_DIR_O2I]) == (TCP_FLAG_SYN | TCP_FLAG_ACK))
+ {
+ nat44_ed_session_reopen (thread_index, ses);
+ ses->tcp_state = NAT44_ED_TCP_STATE_ESTABLISHED;
+ ses->lru_head_index = tsm->tcp_estab_lru_head_index;
+ }
+ break;
}
-
- // move the session to proper LRU
- if (ses->state)
- {
- ses->lru_head_index = tsm->tcp_trans_lru_head_index;
- }
- else
- {
- ses->lru_head_index = tsm->tcp_estab_lru_head_index;
- }
+ if (old_state == ses->tcp_state)
+ return;
+ ses->last_lru_update = now;
clib_dlist_remove (tsm->lru_pool, ses->lru_index);
clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index);
}
always_inline void
+nat44_set_tcp_session_state_i2o (snat_main_t *sm, f64 now, snat_session_t *ses,
+ u8 tcp_flags, u32 thread_index)
+{
+ return nat44_set_tcp_session_state (sm, now, ses, tcp_flags, thread_index,
+ NAT44_ED_DIR_I2O);
+}
+
+always_inline void
nat44_set_tcp_session_state_o2i (snat_main_t *sm, f64 now, snat_session_t *ses,
- u8 tcp_flags, u32 tcp_ack_number,
- u32 tcp_seq_number, u32 thread_index)
+ u8 tcp_flags, u32 thread_index)
{
- snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
- if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST))
- ses->state = NAT44_SES_RST;
- if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST))
- ses->state = 0;
- if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) &&
- (ses->state & NAT44_SES_O2I_SYN))
- ses->state = 0;
- if (tcp_flags & TCP_FLAG_SYN)
- ses->state |= NAT44_SES_O2I_SYN;
- if (tcp_flags & TCP_FLAG_FIN)
- {
- ses->o2i_fin_seq = clib_net_to_host_u32 (tcp_seq_number);
- ses->state |= NAT44_SES_O2I_FIN;
- }
- if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_FIN))
- {
- if (clib_net_to_host_u32 (tcp_ack_number) > ses->i2o_fin_seq)
- ses->state |= NAT44_SES_I2O_FIN_ACK;
- if (nat44_is_ses_closed (ses))
- { // if session is now closed, save the timestamp
- ses->tcp_closed_timestamp = now + sm->timeouts.tcp.transitory;
- ses->last_lru_update = now;
- }
- }
- // move the session to proper LRU
- if (ses->state)
- {
- ses->lru_head_index = tsm->tcp_trans_lru_head_index;
- }
- else
- {
- ses->lru_head_index = tsm->tcp_estab_lru_head_index;
- }
- clib_dlist_remove (tsm->lru_pool, ses->lru_index);
- clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index);
+ return nat44_set_tcp_session_state (sm, now, ses, tcp_flags, thread_index,
+ NAT44_ED_DIR_O2I);
}
always_inline void
nat44_session_update_counters (snat_session_t *s, f64 now, uword bytes,
u32 thread_index)
{
+ // regardless of TCP state, reset the timer if data packet is seen.
s->last_heard = now;
s->total_pkts++;
s->total_bytes += bytes;
@@ -868,6 +834,19 @@ nat44_session_update_lru (snat_main_t *sm, snat_session_t *s, u32 thread_index)
}
}
+static_always_inline int
+nat44_ed_is_unk_proto (u8 proto)
+{
+ static const int lookup_table[256] = {
+ [IP_PROTOCOL_TCP] = 1,
+ [IP_PROTOCOL_UDP] = 1,
+ [IP_PROTOCOL_ICMP] = 1,
+ [IP_PROTOCOL_ICMP6] = 1,
+ };
+
+ return 1 - lookup_table[proto];
+}
+
#endif /* __included_nat44_ed_inlines_h__ */
/*
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_out2in.c b/src/plugins/nat/nat44-ed/nat44_ed_out2in.c
index 186d1d6c004..fe4a41c5e08 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_out2in.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_out2in.c
@@ -25,7 +25,6 @@
#include <vnet/udp/udp_local.h>
#include <vppinfra/error.h>
-#include <nat/lib/nat_syslog.h>
#include <nat/lib/ipfix_logging.h>
#include <nat/nat44-ed/nat44_ed.h>
@@ -42,7 +41,6 @@ typedef enum
NAT_ED_SP_REASON_NO_REASON,
NAT_ED_SP_REASON_LOOKUP_FAILED,
NAT_ED_SP_REASON_VRF_EXPIRED,
- NAT_ED_SP_TCP_CLOSED,
NAT_ED_SP_SESS_EXPIRED,
} nat_slow_path_reason_e;
@@ -58,6 +56,7 @@ typedef struct
u8 is_slow_path;
u8 translation_via_i2of;
u8 lookup_skipped;
+ u8 tcp_state;
nat_slow_path_reason_e slow_path_reason;
} nat44_ed_out2in_trace_t;
@@ -73,8 +72,6 @@ format_slow_path_reason (u8 *s, va_list *args)
return format (s, "slow path because lookup failed");
case NAT_ED_SP_REASON_VRF_EXPIRED:
return format (s, "slow path because vrf expired");
- case NAT_ED_SP_TCP_CLOSED:
- return format (s, "slow path because tcp closed");
case NAT_ED_SP_SESS_EXPIRED:
return format (s, "slow path because session expired");
}
@@ -108,14 +105,19 @@ format_nat44_ed_out2in_trace (u8 * s, va_list * args)
{
if (t->lookup_skipped)
{
- s = format (s, "\n lookup skipped - cached session index used");
+ s = format (s, "\n lookup skipped - cached session index used");
}
else
{
s = format (s, "\n search key %U", format_ed_session_kvp,
&t->search_key);
}
- s = format (s, "\n %U", format_slow_path_reason, t->slow_path_reason);
+ s = format (s, "\n %U", format_slow_path_reason, t->slow_path_reason);
+ }
+ if (IP_PROTOCOL_TCP == t->i2of.match.proto)
+ {
+ s = format (s, "\n TCP state: %U", format_nat44_ed_tcp_state,
+ t->tcp_state);
}
return s;
@@ -123,12 +125,12 @@ format_nat44_ed_out2in_trace (u8 * s, va_list * args)
static int
next_src_nat (snat_main_t *sm, ip4_header_t *ip, u16 src_port, u16 dst_port,
- u32 thread_index, u32 rx_fib_index)
+ u32 rx_fib_index)
{
clib_bihash_kv_16_8_t kv, value;
- init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port,
- rx_fib_index, ip->protocol);
+ init_ed_k (&kv, ip->src_address.as_u32, src_port, ip->dst_address.as_u32,
+ dst_port, rx_fib_index, ip->protocol);
if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
return 1;
@@ -142,8 +144,8 @@ static void create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b,
static snat_session_t *create_session_for_static_mapping_ed (
snat_main_t *sm, vlib_buffer_t *b, ip4_address_t i2o_addr, u16 i2o_port,
u32 i2o_fib_index, ip4_address_t o2i_addr, u16 o2i_port, u32 o2i_fib_index,
- nat_protocol_t nat_proto, vlib_node_runtime_t *node, u32 rx_fib_index,
- u32 thread_index, twice_nat_type_t twice_nat, lb_nat_type_t lb_nat, f64 now,
+ ip_protocol_t proto, vlib_node_runtime_t *node, u32 thread_index,
+ twice_nat_type_t twice_nat, lb_nat_type_t lb_nat, f64 now,
snat_static_mapping_t *mapping);
static inline u32
@@ -180,10 +182,10 @@ icmp_out2in_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
goto out;
}
- if (snat_static_mapping_match (
- vm, sm, ip->dst_address, lookup_sport, rx_fib_index,
- ip_proto_to_nat_proto (ip->protocol), &sm_addr, &sm_port,
- &sm_fib_index, 1, &is_addr_only, 0, 0, 0, &identity_nat, &m))
+ if (snat_static_mapping_match (vm, ip->dst_address, lookup_sport,
+ rx_fib_index, ip->protocol, &sm_addr,
+ &sm_port, &sm_fib_index, 1, &is_addr_only, 0,
+ 0, 0, &identity_nat, &m))
{
// static mapping not matched
if (!sm->forwarding_enabled)
@@ -198,8 +200,7 @@ icmp_out2in_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
}
else
{
- if (next_src_nat (sm, ip, lookup_sport, lookup_dport, thread_index,
- rx_fib_index))
+ if (next_src_nat (sm, ip, lookup_sport, lookup_dport, rx_fib_index))
{
next = NAT_NEXT_IN2OUT_ED_FAST_PATH;
}
@@ -230,8 +231,8 @@ icmp_out2in_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
/* Create session initiated by host from external network */
s = create_session_for_static_mapping_ed (
sm, b, sm_addr, sm_port, sm_fib_index, ip->dst_address, lookup_sport,
- rx_fib_index, ip_proto_to_nat_proto (lookup_protocol), node, rx_fib_index,
- thread_index, 0, 0, vlib_time_now (vm), m);
+ rx_fib_index, lookup_protocol, node, thread_index, 0, 0,
+ vlib_time_now (vm), m);
if (!s)
next = NAT_NEXT_DROP;
@@ -266,44 +267,30 @@ out:
return next;
}
-// allocate exact address based on preference
static_always_inline int
-nat_alloc_addr_and_port_exact (snat_address_t * a,
- u32 thread_index,
- nat_protocol_t proto,
- ip4_address_t * addr,
- u16 * port,
- u16 port_per_thread, u32 snat_thread_index)
+nat44_ed_alloc_i2o_port (snat_main_t *sm, snat_address_t *a, snat_session_t *s,
+ ip4_address_t i2o_addr, u16 i2o_port,
+ u32 i2o_fib_index, ip_protocol_t proto,
+ u32 thread_index, u32 snat_thread_index,
+ ip4_address_t *outside_addr, u16 *outside_port)
{
- snat_main_t *sm = &snat_main;
u32 portnum;
- switch (proto)
+ for (int i = 0; i < ED_PORT_ALLOC_ATTEMPTS; ++i)
{
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \
- { \
- while (1) \
- { \
- portnum = (port_per_thread * \
- snat_thread_index) + \
- snat_random_port(0, port_per_thread - 1) + 1024; \
- if (a->busy_##n##_port_refcounts[portnum]) \
- continue; \
- --a->busy_##n##_port_refcounts[portnum]; \
- a->busy_##n##_ports_per_thread[thread_index]++; \
- a->busy_##n##_ports++; \
- *addr = a->addr; \
- *port = clib_host_to_net_u16(portnum); \
- return 0; \
- } \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (sm, "unknown protocol");
- return 1;
+ portnum = (sm->port_per_thread * snat_thread_index) +
+ snat_random_port (0, sm->port_per_thread - 1) +
+ ED_USER_PORT_OFFSET;
+ portnum = clib_host_to_net_u16 (portnum);
+ nat_6t_i2o_flow_init (sm, thread_index, s, i2o_addr, i2o_port, a->addr,
+ portnum, i2o_fib_index, proto);
+ if (!nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s,
+ 1 /* is_add */))
+ {
+ *outside_addr = a->addr;
+ *outside_port = portnum;
+ return 0;
+ }
}
/* Totally out of translations to use... */
@@ -312,80 +299,56 @@ nat_alloc_addr_and_port_exact (snat_address_t * a,
}
static_always_inline int
-nat44_ed_alloc_outside_addr_and_port (snat_address_t *addresses, u32 fib_index,
- u32 thread_index, nat_protocol_t proto,
- ip4_address_t *addr, u16 *port,
- u16 port_per_thread,
- u32 snat_thread_index)
+nat44_ed_alloc_i2o_addr_and_port (snat_main_t *sm, snat_address_t *addresses,
+ snat_session_t *s, ip4_address_t i2o_addr,
+ u16 i2o_port, u32 i2o_fib_index,
+ ip_protocol_t proto, u32 thread_index,
+ u32 snat_thread_index,
+ ip4_address_t *outside_addr,
+ u16 *outside_port)
{
- snat_main_t *sm = &snat_main;
snat_address_t *a, *ga = 0;
- u32 portnum;
int i;
- for (i = 0; i < vec_len (addresses); i++)
+ if (vec_len (addresses) > 0)
{
- a = addresses + i;
- switch (proto)
+ int s_addr_offset = i2o_addr.as_u32 % vec_len (addresses);
+
+ for (i = s_addr_offset; i < vec_len (addresses); ++i)
{
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \
- { \
- if (a->fib_index == fib_index) \
- { \
- while (1) \
- { \
- portnum = (port_per_thread * snat_thread_index) + \
- snat_random_port (0, port_per_thread - 1) + 1024; \
- if (a->busy_##n##_port_refcounts[portnum]) \
- continue; \
- --a->busy_##n##_port_refcounts[portnum]; \
- a->busy_##n##_ports_per_thread[thread_index]++; \
- a->busy_##n##_ports++; \
- *addr = a->addr; \
- *port = clib_host_to_net_u16 (portnum); \
- return 0; \
- } \
- } \
- else if (a->fib_index == ~0) \
- { \
- ga = a; \
- } \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (sm, "unknown protocol");
- return 1;
+ a = addresses + i;
+ if (a->fib_index == i2o_fib_index)
+ {
+ return nat44_ed_alloc_i2o_port (
+ sm, a, s, i2o_addr, i2o_port, i2o_fib_index, proto,
+ thread_index, snat_thread_index, outside_addr, outside_port);
+ }
+ else if (a->fib_index == ~0)
+ {
+ ga = a;
+ }
}
- }
- if (ga)
- {
- a = ga;
- switch (proto)
+ for (i = 0; i < s_addr_offset; ++i)
{
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- while (1) \
- { \
- portnum = (port_per_thread * snat_thread_index) + \
- snat_random_port (0, port_per_thread - 1) + 1024; \
- if (a->busy_##n##_port_refcounts[portnum]) \
- continue; \
- ++a->busy_##n##_port_refcounts[portnum]; \
- a->busy_##n##_ports_per_thread[thread_index]++; \
- a->busy_##n##_ports++; \
- *addr = a->addr; \
- *port = clib_host_to_net_u16 (portnum); \
- return 0; \
- }
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (sm, "unknown protocol");
- return 1;
+ a = addresses + i;
+ if (a->fib_index == i2o_fib_index)
+ {
+ return nat44_ed_alloc_i2o_port (
+ sm, a, s, i2o_addr, i2o_port, i2o_fib_index, proto,
+ thread_index, snat_thread_index, outside_addr, outside_port);
+ }
+ else if (a->fib_index == ~0)
+ {
+ ga = a;
+ }
+ }
+
+ if (ga)
+ {
+ return nat44_ed_alloc_i2o_port (
+ sm, a, s, i2o_addr, i2o_port, i2o_fib_index, proto, thread_index,
+ snat_thread_index, outside_addr, outside_port);
}
}
@@ -398,23 +361,23 @@ static snat_session_t *
create_session_for_static_mapping_ed (
snat_main_t *sm, vlib_buffer_t *b, ip4_address_t i2o_addr, u16 i2o_port,
u32 i2o_fib_index, ip4_address_t o2i_addr, u16 o2i_port, u32 o2i_fib_index,
- nat_protocol_t nat_proto, vlib_node_runtime_t *node, u32 rx_fib_index,
- u32 thread_index, twice_nat_type_t twice_nat, lb_nat_type_t lb_nat, f64 now,
+ ip_protocol_t proto, vlib_node_runtime_t *node, u32 thread_index,
+ twice_nat_type_t twice_nat, lb_nat_type_t lb_nat, f64 now,
snat_static_mapping_t *mapping)
{
snat_session_t *s;
ip4_header_t *ip;
snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
- if (PREDICT_FALSE
- (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index)))
+ if (PREDICT_FALSE (
+ nat44_ed_maximum_sessions_exceeded (sm, o2i_fib_index, thread_index)))
{
b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED];
nat_elog_notice (sm, "maximum sessions exceeded");
return 0;
}
- s = nat_ed_session_alloc (sm, thread_index, now, nat_proto);
+ s = nat_ed_session_alloc (sm, thread_index, now, proto);
if (!s)
{
b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED];
@@ -426,22 +389,21 @@ create_session_for_static_mapping_ed (
s->ext_host_addr.as_u32 = ip->src_address.as_u32;
s->ext_host_port =
- nat_proto == NAT_PROTOCOL_ICMP ? 0 : vnet_buffer (b)->ip.reass.l4_src_port;
+ proto == IP_PROTOCOL_ICMP ? 0 : vnet_buffer (b)->ip.reass.l4_src_port;
s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
if (lb_nat)
s->flags |= SNAT_SESSION_FLAG_LOAD_BALANCING;
if (lb_nat == AFFINITY_LB_NAT)
s->flags |= SNAT_SESSION_FLAG_AFFINITY;
- s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
s->out2in.addr = o2i_addr;
s->out2in.port = o2i_port;
s->out2in.fib_index = o2i_fib_index;
s->in2out.addr = i2o_addr;
s->in2out.port = i2o_port;
s->in2out.fib_index = i2o_fib_index;
- s->nat_proto = nat_proto;
+ s->proto = proto;
- if (NAT_PROTOCOL_ICMP == nat_proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
nat_6t_o2i_flow_init (sm, thread_index, s, s->ext_host_addr, o2i_port,
o2i_addr, o2i_port, o2i_fib_index, ip->protocol);
@@ -487,33 +449,23 @@ create_session_for_static_mapping_ed (
if (filter)
{
- rc = nat_alloc_addr_and_port_exact (filter,
- thread_index,
- nat_proto,
- &s->ext_host_nat_addr,
- &s->ext_host_nat_port,
- sm->port_per_thread,
- tsm->snat_thread_index);
+ rc = nat44_ed_alloc_i2o_port (
+ sm, filter, s, i2o_addr, i2o_port, i2o_fib_index, proto,
+ thread_index, tsm->snat_thread_index, &s->ext_host_nat_addr,
+ &s->ext_host_nat_port);
s->flags |= SNAT_SESSION_FLAG_EXACT_ADDRESS;
}
else
{
- rc = nat44_ed_alloc_outside_addr_and_port (
- sm->twice_nat_addresses, 0, thread_index, nat_proto,
- &s->ext_host_nat_addr, &s->ext_host_nat_port, sm->port_per_thread,
- tsm->snat_thread_index);
+ rc = nat44_ed_alloc_i2o_addr_and_port (
+ sm, sm->twice_nat_addresses, s, i2o_addr, i2o_port, i2o_fib_index,
+ proto, thread_index, tsm->snat_thread_index, &s->ext_host_nat_addr,
+ &s->ext_host_nat_port);
}
if (rc)
{
b->error = node->errors[NAT_OUT2IN_ED_ERROR_OUT_OF_PORTS];
- if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0))
- {
- nat_elog_warn (sm, "out2in flow hash del failed");
- }
- snat_free_outside_address_and_port (
- sm->twice_nat_addresses, thread_index, &s->ext_host_nat_addr,
- s->ext_host_nat_port, s->nat_proto);
nat_ed_session_delete (sm, s, thread_index, 1);
return 0;
}
@@ -521,7 +473,7 @@ create_session_for_static_mapping_ed (
s->flags |= SNAT_SESSION_FLAG_TWICE_NAT;
nat_6t_flow_saddr_rewrite_set (&s->o2i, s->ext_host_nat_addr.as_u32);
- if (NAT_PROTOCOL_ICMP == nat_proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
nat_6t_flow_icmp_id_rewrite_set (&s->o2i, s->ext_host_nat_port);
}
@@ -532,11 +484,8 @@ create_session_for_static_mapping_ed (
nat_6t_l3_l4_csum_calc (&s->o2i);
- nat_6t_i2o_flow_init (sm, thread_index, s, i2o_addr, i2o_port,
- s->ext_host_nat_addr, s->ext_host_nat_port,
- i2o_fib_index, ip->protocol);
nat_6t_flow_daddr_rewrite_set (&s->i2o, s->ext_host_addr.as_u32);
- if (NAT_PROTOCOL_ICMP == nat_proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
nat_6t_flow_icmp_id_rewrite_set (&s->i2o, s->ext_host_port);
}
@@ -544,10 +493,21 @@ create_session_for_static_mapping_ed (
{
nat_6t_flow_dport_rewrite_set (&s->i2o, s->ext_host_port);
}
+
+ nat_6t_flow_saddr_rewrite_set (&s->i2o, o2i_addr.as_u32);
+ if (IP_PROTOCOL_ICMP == proto)
+ {
+ nat_6t_flow_icmp_id_rewrite_set (&s->i2o, o2i_port);
+ }
+ else
+ {
+ nat_6t_flow_sport_rewrite_set (&s->i2o, o2i_port);
+ }
+ nat_6t_l3_l4_csum_calc (&s->i2o);
}
else
{
- if (NAT_PROTOCOL_ICMP == nat_proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
nat_6t_i2o_flow_init (sm, thread_index, s, i2o_addr, i2o_port,
s->ext_host_addr, i2o_port, i2o_fib_index,
@@ -559,10 +519,9 @@ create_session_for_static_mapping_ed (
s->ext_host_addr, s->ext_host_port,
i2o_fib_index, ip->protocol);
}
- }
nat_6t_flow_saddr_rewrite_set (&s->i2o, o2i_addr.as_u32);
- if (NAT_PROTOCOL_ICMP == nat_proto)
+ if (IP_PROTOCOL_ICMP == proto)
{
nat_6t_flow_icmp_id_rewrite_set (&s->i2o, o2i_port);
}
@@ -581,19 +540,16 @@ create_session_for_static_mapping_ed (
nat_ed_session_delete (sm, s, thread_index, 1);
return 0;
}
-
- nat_ipfix_logging_nat44_ses_create (thread_index,
- s->in2out.addr.as_u32,
- s->out2in.addr.as_u32,
- s->nat_proto,
- s->in2out.port,
- s->out2in.port, s->in2out.fib_index);
+ }
+ nat_ipfix_logging_nat44_ses_create (
+ thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
+ s->in2out.port, s->out2in.port, s->in2out.fib_index);
nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
s->in2out.port, &s->ext_host_nat_addr,
s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
- &s->ext_host_addr, s->ext_host_port, s->nat_proto,
- is_twice_nat_session (s));
+ &s->ext_host_addr, s->ext_host_port, s->proto,
+ nat44_ed_is_twice_nat_session (s));
per_vrf_sessions_register_session (s, thread_index);
@@ -636,8 +592,8 @@ create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b, snat_session_t *s,
lookup_protocol = ip->protocol;
}
- init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport,
- rx_fib_index, lookup_protocol);
+ init_ed_k (&kv, lookup_saddr.as_u32, lookup_sport, lookup_daddr.as_u32,
+ lookup_dport, rx_fib_index, lookup_protocol);
if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value))
{
@@ -654,8 +610,6 @@ create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b, snat_session_t *s,
}
else
{
- u32 proto;
-
if (PREDICT_FALSE
(nat44_ed_maximum_sessions_exceeded
(sm, rx_fib_index, thread_index)))
@@ -668,19 +622,12 @@ create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b, snat_session_t *s,
return;
}
- proto = ip_proto_to_nat_proto (ip->protocol);
-
s->ext_host_addr = ip->src_address;
s->ext_host_port = lookup_dport;
s->flags |= SNAT_SESSION_FLAG_FWD_BYPASS;
s->out2in.addr = ip->dst_address;
s->out2in.port = lookup_sport;
- s->nat_proto = proto;
- if (proto == NAT_PROTOCOL_OTHER)
- {
- s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
- s->out2in.port = ip->protocol;
- }
+ s->proto = ip->protocol;
s->out2in.fib_index = rx_fib_index;
s->in2out.addr = s->out2in.addr;
s->in2out.port = s->out2in.port;
@@ -702,10 +649,9 @@ create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b, snat_session_t *s,
if (ip->protocol == IP_PROTOCOL_TCP)
{
- tcp_header_t *tcp = ip4_next_header (ip);
- nat44_set_tcp_session_state_o2i (sm, now, s, tcp->flags,
- tcp->ack_number, tcp->seq_number,
- thread_index);
+ nat44_set_tcp_session_state_o2i (
+ sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags,
+ thread_index);
}
/* Accounting */
@@ -721,7 +667,6 @@ nat44_ed_out2in_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
vlib_main_t *vm,
vlib_node_runtime_t *node)
{
- clib_bihash_kv_8_8_t kv, value;
snat_static_mapping_t *m;
snat_session_t *s;
@@ -733,15 +678,13 @@ nat44_ed_out2in_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
return 0;
}
- init_nat_k (&kv, ip->dst_address, 0, 0, 0);
- if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value))
+ m = nat44_ed_sm_o2i_lookup (sm, ip->dst_address, 0, 0, ip->protocol);
+ if (!m)
{
b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION];
return 0;
}
- m = pool_elt_at_index (sm->static_mappings, value.value);
-
/* Create a new session */
s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol);
if (!s)
@@ -752,9 +695,7 @@ nat44_ed_out2in_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b,
}
s->ext_host_addr.as_u32 = ip->src_address.as_u32;
- s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO;
s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING;
- s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT;
s->out2in.addr.as_u32 = ip->dst_address.as_u32;
s->out2in.fib_index = rx_fib_index;
s->in2out.addr.as_u32 = m->local_addr.as_u32;
@@ -816,10 +757,10 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
{
vlib_buffer_t *b0;
u32 sw_if_index0, rx_fib_index0;
- nat_protocol_t proto0;
+ ip_protocol_t proto0;
ip4_header_t *ip0;
snat_session_t *s0 = 0;
- clib_bihash_kv_16_8_t kv0, value0;
+ clib_bihash_kv_16_8_t kv0 = {}, value0;
nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS;
nat_slow_path_reason_e slow_path_reason = NAT_ED_SP_REASON_NO_REASON;
nat_6t_flow_t *f = 0;
@@ -865,9 +806,9 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
goto trace0;
}
- proto0 = ip_proto_to_nat_proto (ip0->protocol);
+ proto0 = ip0->protocol;
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
+ if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
{
if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags !=
ICMP4_echo_request &&
@@ -916,8 +857,8 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
s0 = NULL;
}
- init_ed_k (&kv0, lookup.saddr, lookup.sport, lookup.daddr, lookup.dport,
- lookup.fib_index, lookup.proto);
+ init_ed_k (&kv0, lookup.saddr.as_u32, lookup.sport, lookup.daddr.as_u32,
+ lookup.dport, lookup.fib_index, lookup.proto);
// lookup flow
if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
@@ -938,30 +879,14 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index)))
{
// session is closed, go slow path
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
+ s0 = 0;
slow_path_reason = NAT_ED_SP_REASON_VRF_EXPIRED;
next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
goto trace0;
}
- if (s0->tcp_closed_timestamp)
- {
- if (now >= s0->tcp_closed_timestamp)
- {
- // session is closed, go slow path, freed in slow path
- slow_path_reason = NAT_ED_SP_TCP_CLOSED;
- next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
- }
- else
- {
- // session in transitory timeout, drop
- b0->error = node->errors[NAT_OUT2IN_ED_ERROR_TCP_CLOSED];
- next[0] = NAT_NEXT_DROP;
- }
- goto trace0;
- }
-
// drop if session expired
u64 sess_timeout_time;
sess_timeout_time =
@@ -969,8 +894,9 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
if (now >= sess_timeout_time)
{
// session is closed, go slow path
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
+ s0 = 0;
slow_path_reason = NAT_ED_SP_SESS_EXPIRED;
next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
goto trace0;
@@ -992,7 +918,7 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
* be able to use dhcp client on the outside interface
*/
if (PREDICT_FALSE (
- proto0 == NAT_PROTOCOL_UDP &&
+ proto0 == IP_PROTOCOL_UDP &&
(vnet_buffer (b0)->ip.reass.l4_dst_port ==
clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client))))
{
@@ -1017,8 +943,9 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
// create_bypass_for_fwd (sm, b0, s0, ip0, rx_fib_index0,
// thread_index);
translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH;
- nat_free_session_data (sm, s0, thread_index, 0);
+ nat44_ed_free_session_data (sm, s0, thread_index, 0);
nat_ed_session_delete (sm, s0, thread_index, 1);
+ s0 = 0;
next[0] = NAT_NEXT_DROP;
b0->error = node->errors[NAT_OUT2IN_ED_ERROR_TRNSL_FAILED];
goto trace0;
@@ -1037,27 +964,23 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
switch (proto0)
{
- case NAT_PROTOCOL_TCP:
+ case IP_PROTOCOL_TCP:
vlib_increment_simple_counter (&sm->counters.fastpath.out2in.tcp,
thread_index, sw_if_index0, 1);
nat44_set_tcp_session_state_o2i (sm, now, s0,
vnet_buffer (b0)->ip.
reass.icmp_type_or_tcp_flags,
- vnet_buffer (b0)->ip.
- reass.tcp_ack_number,
- vnet_buffer (b0)->ip.
- reass.tcp_seq_number,
thread_index);
break;
- case NAT_PROTOCOL_UDP:
+ case IP_PROTOCOL_UDP:
vlib_increment_simple_counter (&sm->counters.fastpath.out2in.udp,
thread_index, sw_if_index0, 1);
break;
- case NAT_PROTOCOL_ICMP:
+ case IP_PROTOCOL_ICMP:
vlib_increment_simple_counter (&sm->counters.fastpath.out2in.icmp,
thread_index, sw_if_index0, 1);
break;
- case NAT_PROTOCOL_OTHER:
+ default:
vlib_increment_simple_counter (&sm->counters.fastpath.out2in.other,
thread_index, sw_if_index0, 1);
break;
@@ -1090,6 +1013,7 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
t->translation_via_i2of = (&s0->i2o == f);
+ t->tcp_state = s0->tcp_state;
}
else
{
@@ -1135,12 +1059,12 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
{
vlib_buffer_t *b0;
u32 sw_if_index0, rx_fib_index0;
- nat_protocol_t proto0;
+ ip_protocol_t proto0;
ip4_header_t *ip0;
udp_header_t *udp0;
icmp46_header_t *icmp0;
snat_session_t *s0 = 0;
- clib_bihash_kv_16_8_t kv0, value0;
+ clib_bihash_kv_16_8_t kv0 = {}, value0;
lb_nat_type_t lb_nat0;
twice_nat_type_t twice_nat0;
u8 identity_nat0;
@@ -1171,9 +1095,9 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
udp0 = ip4_next_header (ip0);
icmp0 = (icmp46_header_t *) udp0;
- proto0 = ip_proto_to_nat_proto (ip0->protocol);
+ proto0 = ip0->protocol;
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
+ if (PREDICT_FALSE (nat44_ed_is_unk_proto (proto0)))
{
s0 = nat44_ed_out2in_slowpath_unknown_proto (
sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node);
@@ -1198,7 +1122,7 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
goto trace0;
}
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
+ if (PREDICT_FALSE (proto0 == IP_PROTOCOL_ICMP))
{
next[0] = icmp_out2in_ed_slow_path
(sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node,
@@ -1215,15 +1139,19 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
goto trace0;
}
- vlib_increment_simple_counter (&sm->counters.slowpath.out2in.icmp,
- thread_index, sw_if_index0, 1);
+ if (NAT_NEXT_DROP != next[0])
+ {
+ vlib_increment_simple_counter (
+ &sm->counters.slowpath.out2in.icmp, thread_index, sw_if_index0,
+ 1);
+ }
goto trace0;
}
- init_ed_k (&kv0, ip0->src_address,
- vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address,
- vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0,
- ip0->protocol);
+ init_ed_k (
+ &kv0, ip0->src_address.as_u32, vnet_buffer (b0)->ip.reass.l4_src_port,
+ ip0->dst_address.as_u32, vnet_buffer (b0)->ip.reass.l4_dst_port,
+ rx_fib_index0, ip0->protocol);
s0 = NULL;
if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0))
@@ -1232,13 +1160,6 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
s0 =
pool_elt_at_index (tsm->sessions,
ed_value_get_session_index (&value0));
-
- if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
- {
- nat_free_session_data (sm, s0, thread_index, 0);
- nat_ed_session_delete (sm, s0, thread_index, 1);
- s0 = NULL;
- }
}
if (!s0)
@@ -1247,19 +1168,18 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
destination address and port in packet */
if (snat_static_mapping_match (
- vm, sm, ip0->dst_address,
- vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, proto0,
- &sm_addr, &sm_port, &sm_fib_index, 1, 0, &twice_nat0, &lb_nat0,
- &ip0->src_address, &identity_nat0, &m))
+ vm, ip0->dst_address, vnet_buffer (b0)->ip.reass.l4_dst_port,
+ rx_fib_index0, proto0, &sm_addr, &sm_port, &sm_fib_index, 1, 0,
+ &twice_nat0, &lb_nat0, &ip0->src_address, &identity_nat0, &m))
{
/*
* Send DHCP packets to the ipv4 stack, or we won't
* be able to use dhcp client on the outside interface
*/
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_UDP
- && (vnet_buffer (b0)->ip.reass.l4_dst_port ==
- clib_host_to_net_u16
- (UDP_DST_PORT_dhcp_to_client))))
+ if (PREDICT_FALSE (
+ proto0 == IP_PROTOCOL_UDP &&
+ (vnet_buffer (b0)->ip.reass.l4_dst_port ==
+ clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client))))
{
goto trace0;
}
@@ -1272,10 +1192,9 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
}
else
{
- if (next_src_nat
- (sm, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
- vnet_buffer (b0)->ip.reass.l4_dst_port,
- thread_index, rx_fib_index0))
+ if (next_src_nat (
+ sm, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
+ vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0))
{
next[0] = NAT_NEXT_IN2OUT_ED_FAST_PATH;
}
@@ -1291,9 +1210,9 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
if (PREDICT_FALSE (identity_nat0))
goto trace0;
- if ((proto0 == NAT_PROTOCOL_TCP)
- && !tcp_flags_is_init (vnet_buffer (b0)->ip.
- reass.icmp_type_or_tcp_flags))
+ if ((proto0 == IP_PROTOCOL_TCP) &&
+ !tcp_flags_is_init (
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
{
b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NON_SYN];
next[0] = NAT_NEXT_DROP;
@@ -1301,16 +1220,10 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
}
/* Create session initiated by host from external network */
- s0 = create_session_for_static_mapping_ed (sm, b0,
- sm_addr, sm_port,
- sm_fib_index,
- ip0->dst_address,
- vnet_buffer (b0)->
- ip.reass.l4_dst_port,
- rx_fib_index0, proto0,
- node, rx_fib_index0,
- thread_index, twice_nat0,
- lb_nat0, now, m);
+ s0 = create_session_for_static_mapping_ed (
+ sm, b0, sm_addr, sm_port, sm_fib_index, ip0->dst_address,
+ vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, proto0,
+ node, thread_index, twice_nat0, lb_nat0, now, m);
if (!s0)
{
next[0] = NAT_NEXT_DROP;
@@ -1326,17 +1239,13 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
goto trace0;
}
- if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
+ if (PREDICT_TRUE (proto0 == IP_PROTOCOL_TCP))
{
vlib_increment_simple_counter (&sm->counters.slowpath.out2in.tcp,
thread_index, sw_if_index0, 1);
nat44_set_tcp_session_state_o2i (sm, now, s0,
vnet_buffer (b0)->ip.
reass.icmp_type_or_tcp_flags,
- vnet_buffer (b0)->ip.
- reass.tcp_ack_number,
- vnet_buffer (b0)->ip.
- reass.tcp_seq_number,
thread_index);
}
else
@@ -1369,6 +1278,7 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
t->session_index = s0 - tsm->sessions;
clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
+ t->tcp_state = s0->tcp_state;
}
else
{
diff --git a/src/plugins/nat/nat44-ed/tcp_conn_track.rst b/src/plugins/nat/nat44-ed/tcp_conn_track.rst
new file mode 100644
index 00000000000..faf0dec8b06
--- /dev/null
+++ b/src/plugins/nat/nat44-ed/tcp_conn_track.rst
@@ -0,0 +1,65 @@
+NAT44ED TCP connection tracking
+===============================
+
+TCP connection tracking in endpoint-dependent NAT is based on RFC 7857
+and RFC 6146, which RFC 7857 references.
+
+See RFC 7857 for the original graph - our graph is slightly different,
+allowing creation of new session, while an old session is in transitory
+timeout after seeing FIN packets from both sides:
+
+After discussion on vpp-dev and with Andrew Yourtschenko we agreed that
+it's friendly behaviour to allow creating a new session while the old
+one is closed and in transitory timeout. The alternative means VPP is
+insisting that a 5-tuple connection cannot be created while an old one
+is finished and timing out. There is no apparent reason why our change
+would break anything and we agreed that it could only help users.
+
+::
+
+
+ +------------transitory timeout----------------+
+ | |
+ | +-------------+ |
+ | session created---->+ CLOSED | |
+ | +-------------+ |
+ | | | |
++-----+ | SYN SYN |
+| v v IN2OUT OUT2IN |
+| +->session removed | | |
+| | ^ ^ ^ ^ ^ v v |
+| | | | | | | +-------+ +-------+ |
+| | | | | | +----transitory timeout---+SYN_I2O| |SYN_O2I+--+
+| | | | | | +---------+ |-------| |-------|
+| | | | | +-transitory---+RST_TRANS| | |
+| | | | | timeout +---------+ SYN SYN
+| | | | | | ^ OUT2IN IN2OUT
+| | | | | | | | |
+| | | | | | | v v
+| | | | | | | +-----------+
+| | | | | | +--RST----+ESTABLISHED+<-SYN IN2OUT-+
+| | | | | | +-----------+ |
+| | | | | +---data pkt-----^ | | | ^ |
+| | | | | | | | | |
+| | | | +----established timeout---------------+ | | | |
+| | | | | | | |
+| | | | +-----FIN IN2OUT---------+ | | |
+| | | | v | | |
+| | | | +-------+ +--FIN OUT2IN----+ | |
+| | | +--established---+FIN_I2O| | | |
+| | | timeout +-------+ v +-SYN OUT2IN-+ |
+| | | | +-------+ | |
+| | +----established-------------+FIN_O2I| +--------------+ |
+| | timeout | +-------+ |REOPEN_SYN_I2O| +--------------+
+| | | | +--------------+ |REOPEN_SYN_O2I|
+| | FIN FIN ^ | +--------------+
+| | OUT2IN IN2OUT | | ^ |
+| | | | | | | |
+| | v v | | | |
+| | +-------------+ | | | |
+| +--transitory timeout---+ FIN_TRANS +-SYN IN2OUT-+ | | |
+| +-------------+ | | |
+| | | | |
+| +--------SYN OUT2IN----|-----------+ |
+| v |
++------------------transitory timeout-------------------+<-------------+
diff --git a/src/plugins/nat/nat44-ei/nat44_ei.api b/src/plugins/nat/nat44-ei/nat44_ei.api
index 9ea1a3a1dde..6d24b541e8d 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei.api
+++ b/src/plugins/nat/nat44-ei/nat44_ei.api
@@ -550,6 +550,45 @@ define nat44_ei_interface_output_feature_details {
vl_api_interface_index_t sw_if_index;
};
+/** \brief add/del NAT output interface (postrouting
+ in2out translation)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - true if add, false if delete
+ @param sw_if_index - software index of the interface
+*/
+autoendian autoreply define nat44_ei_add_del_output_interface {
+ u32 client_index;
+ u32 context;
+ bool is_add;
+ vl_api_interface_index_t sw_if_index;
+};
+
+service {
+ rpc nat44_ei_output_interface_get returns nat44_ei_output_interface_get_reply
+ stream nat44_ei_output_interface_details;
+};
+
+define nat44_ei_output_interface_get
+{
+ u32 client_index;
+ u32 context;
+ u32 cursor;
+};
+
+define nat44_ei_output_interface_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 cursor;
+};
+
+define nat44_ei_output_interface_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+};
+
/** \brief Add/delete NAT44 static mapping
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -784,6 +823,52 @@ define nat44_ei_user_session_details {
u16 ext_host_port;
};
+/** \brief NAT44 user's sessions
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param ip_address - IPv4 address of the user to dump
+ @param vrf_id - VRF_ID
+*/
+define nat44_ei_user_session_v2_dump {
+ option in_progress;
+ u32 client_index;
+ u32 context;
+ vl_api_ip4_address_t ip_address;
+ u32 vrf_id;
+};
+
+/** \brief NAT44 user's sessions response
+ @param context - sender context, to match reply w/ request
+ @param outside_ip_address - outside IPv4 address
+ @param outside_port - outside port
+ @param inside_ip_address - inside IPv4 address
+ @param inside_port - inside port
+ @param protocol - protocol
+ @param flags - flag NAT_IS_STATIC if session is static
+ @param last_heard - last heard timer since VPP start
+ @param time_since_last_heard - difference between current vpp time and last_heard value
+ @param total_bytes - count of bytes sent through session
+ @param total_pkts - count of pakets sent through session
+ @param ext_host_address - external host IPv4 address
+ @param ext_host_port - external host port
+*/
+define nat44_ei_user_session_v2_details {
+ option in_progress;
+ u32 context;
+ vl_api_ip4_address_t outside_ip_address;
+ u16 outside_port;
+ vl_api_ip4_address_t inside_ip_address;
+ u16 inside_port;
+ u16 protocol;
+ vl_api_nat44_ei_config_flags_t flags;
+ u64 last_heard;
+ u64 time_since_last_heard;
+ u64 total_bytes;
+ u32 total_pkts;
+ vl_api_ip4_address_t ext_host_address;
+ u16 ext_host_port;
+};
+
/** \brief Delete NAT44 session
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/plugins/nat/nat44-ei/nat44_ei.c b/src/plugins/nat/nat44-ei/nat44_ei.c
index 3c9a9a85346..e16625a2946 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei.c
@@ -40,7 +40,6 @@
nat44_ei_main_t nat44_ei_main;
extern vlib_node_registration_t nat44_ei_hairpinning_node;
-extern vlib_node_registration_t nat44_ei_hairpin_dst_node;
extern vlib_node_registration_t
nat44_ei_in2out_hairpinning_finish_ip4_lookup_node;
extern vlib_node_registration_t
@@ -62,7 +61,7 @@ extern vlib_node_registration_t
if (PREDICT_FALSE (nm->enabled)) \
{ \
nat44_ei_log_err ("plugin enabled"); \
- return 1; \
+ return VNET_API_ERROR_FEATURE_ALREADY_ENABLED; \
} \
} \
while (0)
@@ -74,7 +73,7 @@ extern vlib_node_registration_t
if (PREDICT_FALSE (!nm->enabled)) \
{ \
nat44_ei_log_err ("plugin disabled"); \
- return 1; \
+ return VNET_API_ERROR_FEATURE_ALREADY_DISABLED; \
} \
} \
while (0)
@@ -111,31 +110,6 @@ VNET_FEATURE_INIT (ip4_nat44_ei_in2out_output, static) = {
.runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa",
"ip4-sv-reassembly-output-feature"),
};
-VNET_FEATURE_INIT (ip4_nat44_ei_in2out_fast, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "nat44-ei-in2out-fast",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
- "ip4-sv-reassembly-feature"),
-};
-VNET_FEATURE_INIT (ip4_nat44_ei_out2in_fast, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "nat44-ei-out2in-fast",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
- "ip4-sv-reassembly-feature",
- "ip4-dhcp-client-detect"),
-};
-VNET_FEATURE_INIT (ip4_nat44_ei_hairpin_dst, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "nat44-ei-hairpin-dst",
- .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa",
- "ip4-sv-reassembly-feature"),
-};
-VNET_FEATURE_INIT (ip4_nat44_ei_hairpin_src, static) = {
- .arc_name = "ip4-output",
- .node_name = "nat44-ei-hairpin-src",
- .runs_after = VNET_FEATURES ("acl-plugin-out-ip4-fa",
- "ip4-sv-reassembly-output-feature"),
-};
VNET_FEATURE_INIT (ip4_nat44_ei_hairpinning, static) = {
.arc_name = "ip4-local",
.node_name = "nat44-ei-hairpinning",
@@ -200,6 +174,39 @@ typedef struct
void nat44_ei_add_del_addr_to_fib (ip4_address_t *addr, u8 p_len,
u32 sw_if_index, int is_add);
+static void nat44_ei_worker_db_free (nat44_ei_main_per_thread_data_t *tnm);
+
+static int nat44_ei_add_static_mapping_internal (
+ ip4_address_t l_addr, ip4_address_t e_addr, u16 l_port, u16 e_port,
+ nat_protocol_t proto, u32 vrf_id, u32 sw_if_index, u32 flags,
+ ip4_address_t pool_addr, u8 *tag);
+
+static int nat44_ei_del_static_mapping_internal (
+ ip4_address_t l_addr, ip4_address_t e_addr, u16 l_port, u16 e_port,
+ nat_protocol_t proto, u32 vrf_id, u32 sw_if_index, u32 flags);
+
+always_inline bool
+nat44_ei_port_is_used (nat44_ei_address_t *a, u8 proto, u16 port)
+{
+ return clib_bitmap_get (a->busy_port_bitmap[proto], port);
+}
+
+always_inline void
+nat44_ei_port_get (nat44_ei_address_t *a, u8 proto, u16 port)
+{
+ ASSERT (!nat44_ei_port_is_used (a, proto, port));
+ a->busy_port_bitmap[proto] =
+ clib_bitmap_set (a->busy_port_bitmap[proto], port, 1);
+}
+
+always_inline void
+nat44_ei_port_put (nat44_ei_address_t *a, u8 proto, u16 port)
+{
+ ASSERT (nat44_ei_port_is_used (a, proto, port));
+ a->busy_port_bitmap[proto] =
+ clib_bitmap_set (a->busy_port_bitmap[proto], port, 0);
+}
+
static u8 *
format_nat44_ei_classify_trace (u8 *s, va_list *args)
{
@@ -219,8 +226,6 @@ format_nat44_ei_classify_trace (u8 *s, va_list *args)
return s;
}
-static void nat44_ei_db_free ();
-
static void nat44_ei_db_init (u32 translations, u32 translation_buckets,
u32 user_buckets);
@@ -304,6 +309,76 @@ nat_validate_interface_counters (nat44_ei_main_t *nm, u32 sw_if_index)
nat_validate_simple_counter (nm->counters.hairpinning, sw_if_index);
}
+static void
+nat44_ei_add_del_addr_to_fib_foreach_out_if (ip4_address_t *addr, u8 is_add)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_interface_t *i;
+
+ pool_foreach (i, nm->interfaces)
+ {
+ if (nat44_ei_interface_is_outside (i) && !nm->out2in_dpo)
+ {
+ nat44_ei_add_del_addr_to_fib (addr, 32, i->sw_if_index, is_add);
+ }
+ }
+ pool_foreach (i, nm->output_feature_interfaces)
+ {
+ if (nat44_ei_interface_is_outside (i) && !nm->out2in_dpo)
+ {
+ nat44_ei_add_del_addr_to_fib (addr, 32, i->sw_if_index, is_add);
+ }
+ }
+}
+
+static_always_inline void
+nat44_ei_add_del_addr_to_fib_foreach_addr (u32 sw_if_index, u8 is_add)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_address_t *ap;
+
+ vec_foreach (ap, nm->addresses)
+ {
+ nat44_ei_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, is_add);
+ }
+}
+
+static_always_inline void
+nat44_ei_add_del_addr_to_fib_foreach_addr_only_sm (u32 sw_if_index, u8 is_add)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_static_mapping_t *m;
+
+ pool_foreach (m, nm->static_mappings)
+ {
+ if (is_sm_addr_only (m->flags) &&
+ !(m->local_addr.as_u32 == m->external_addr.as_u32))
+ {
+ nat44_ei_add_del_addr_to_fib (&m->external_addr, 32, sw_if_index,
+ is_add);
+ }
+ }
+}
+
+static int
+nat44_ei_is_address_used_in_static_mapping (ip4_address_t addr)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_static_mapping_t *m;
+ pool_foreach (m, nm->static_mappings)
+ {
+ if (is_sm_addr_only (m->flags) || is_sm_identity_nat (m->flags))
+ {
+ continue;
+ }
+ if (m->external_addr.as_u32 == addr.as_u32)
+ {
+ return 1;
+ }
+ }
+ return 0;
+}
+
clib_error_t *
nat44_ei_init (vlib_main_t *vm)
{
@@ -372,14 +447,15 @@ nat44_ei_init (vlib_main_t *vm)
/* Use all available workers by default */
if (nm->num_workers > 1)
{
-
for (i = 0; i < nm->num_workers; i++)
bitmap = clib_bitmap_set (bitmap, i, 1);
nat44_ei_set_workers (bitmap);
clib_bitmap_free (bitmap);
}
else
- nm->per_thread_data[0].snat_thread_index = 0;
+ {
+ nm->per_thread_data[0].snat_thread_index = 0;
+ }
/* callbacks to call when interface address changes. */
cbi.function = nat44_ei_ip4_add_del_interface_address_cb;
@@ -402,8 +478,6 @@ nat44_ei_init (vlib_main_t *vm)
nm->hairpinning_fq_index =
vlib_frame_queue_main_init (nat44_ei_hairpinning_node.index, 0);
- nm->hairpin_dst_fq_index =
- vlib_frame_queue_main_init (nat44_ei_hairpin_dst_node.index, 0);
nm->in2out_hairpinning_finish_ip4_lookup_node_fq_index =
vlib_frame_queue_main_init (
nat44_ei_in2out_hairpinning_finish_ip4_lookup_node.index, 0);
@@ -466,43 +540,104 @@ nat44_ei_plugin_enable (nat44_ei_config_t c)
nm->user_buckets);
nat44_ei_set_alloc_default ();
- // TODO: zero simple counter for all counters missing
-
vlib_zero_simple_counter (&nm->total_users, 0);
vlib_zero_simple_counter (&nm->total_sessions, 0);
vlib_zero_simple_counter (&nm->user_limit_reached, 0);
+ if (nm->num_workers > 1)
+ {
+ if (nm->fq_in2out_index == ~0)
+ {
+ nm->fq_in2out_index = vlib_frame_queue_main_init (
+ nm->in2out_node_index, nm->frame_queue_nelts);
+ }
+ if (nm->fq_out2in_index == ~0)
+ {
+ nm->fq_out2in_index = vlib_frame_queue_main_init (
+ nm->out2in_node_index, nm->frame_queue_nelts);
+ }
+ if (nm->fq_in2out_output_index == ~0)
+ {
+ nm->fq_in2out_output_index = vlib_frame_queue_main_init (
+ nm->in2out_output_node_index, nm->frame_queue_nelts);
+ }
+ }
+
nat_ha_enable ();
nm->enabled = 1;
return 0;
}
-void
-nat44_ei_addresses_free (nat44_ei_address_t **addresses)
+static_always_inline nat44_ei_outside_fib_t *
+nat44_ei_get_outside_fib (nat44_ei_outside_fib_t *outside_fibs, u32 fib_index)
{
- nat44_ei_address_t *ap;
- vec_foreach (ap, *addresses)
+ nat44_ei_outside_fib_t *f;
+ vec_foreach (f, outside_fibs)
{
-#define _(N, i, n, s) vec_free (ap->busy_##n##_ports_per_thread);
- foreach_nat_protocol
-#undef _
+ if (f->fib_index == fib_index)
+ {
+ return f;
+ }
}
- vec_free (*addresses);
- *addresses = 0;
+ return 0;
+}
+
+static_always_inline nat44_ei_interface_t *
+nat44_ei_get_interface (nat44_ei_interface_t *interfaces, u32 sw_if_index)
+{
+ nat44_ei_interface_t *i;
+ pool_foreach (i, interfaces)
+ {
+ if (i->sw_if_index == sw_if_index)
+ {
+ return i;
+ }
+ }
+ return 0;
+}
+
+static_always_inline int
+nat44_ei_hairpinning_enable (u8 is_enable)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ u32 sw_if_index = 0; // local0
+
+ if (is_enable)
+ {
+ nm->hairpin_reg += 1;
+ if (1 == nm->hairpin_reg)
+ {
+ return vnet_feature_enable_disable (
+ "ip4-local", "nat44-ei-hairpinning", sw_if_index, is_enable, 0, 0);
+ }
+ }
+ else
+ {
+ if (0 == nm->hairpin_reg)
+ return 1;
+
+ nm->hairpin_reg -= 1;
+ if (0 == nm->hairpin_reg)
+ {
+ return vnet_feature_enable_disable (
+ "ip4-local", "nat44-ei-hairpinning", sw_if_index, is_enable, 0, 0);
+ }
+ }
+
+ return 0;
}
int
-nat44_ei_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
+nat44_ei_add_interface (u32 sw_if_index, u8 is_inside)
{
const char *feature_name, *del_feature_name;
nat44_ei_main_t *nm = &nat44_ei_main;
- nat44_ei_interface_t *i;
- nat44_ei_address_t *ap;
- nat44_ei_static_mapping_t *m;
+
nat44_ei_outside_fib_t *outside_fib;
- u32 fib_index =
- fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
+ nat44_ei_interface_t *i;
+ u32 fib_index;
+ int rv;
fail_if_disabled ();
@@ -512,470 +647,665 @@ nat44_ei_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del)
return VNET_API_ERROR_UNSUPPORTED;
}
- pool_foreach (i, nm->output_feature_interfaces)
+ if (nat44_ei_get_interface (nm->output_feature_interfaces, sw_if_index))
{
- if (i->sw_if_index == sw_if_index)
+ nat44_ei_log_err ("error interface already configured");
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+
+ i = nat44_ei_get_interface (nm->interfaces, sw_if_index);
+ if (i)
+ {
+ if ((nat44_ei_interface_is_inside (i) && is_inside) ||
+ (nat44_ei_interface_is_outside (i) && !is_inside))
{
- nat44_ei_log_err ("error interface already configured");
- return VNET_API_ERROR_VALUE_EXIST;
+ return 0;
+ }
+ if (nm->num_workers > 1)
+ {
+ del_feature_name = !is_inside ? "nat44-ei-in2out-worker-handoff" :
+ "nat44-ei-out2in-worker-handoff";
+ feature_name = "nat44-ei-handoff-classify";
+ }
+ else
+ {
+ del_feature_name =
+ !is_inside ? "nat44-ei-in2out" : "nat44-ei-out2in";
+
+ feature_name = "nat44-ei-classify";
}
- }
- if (nm->static_mapping_only && !(nm->static_mapping_connection_tracking))
- feature_name = is_inside ? "nat44-ei-in2out-fast" : "nat44-ei-out2in-fast";
+ rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
+ if (rv)
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
+ sw_if_index, 0, 0, 0);
+ if (rv)
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable ("ip4-unicast", feature_name,
+ sw_if_index, 1, 0, 0);
+ if (rv)
+ {
+ return rv;
+ }
+ if (!is_inside)
+ {
+ rv = nat44_ei_hairpinning_enable (0);
+ if (rv)
+ {
+ return rv;
+ }
+ }
+ }
else
{
if (nm->num_workers > 1)
- feature_name = is_inside ? "nat44-ei-in2out-worker-handoff" :
- "nat44-ei-out2in-worker-handoff";
+ {
+ feature_name = is_inside ? "nat44-ei-in2out-worker-handoff" :
+ "nat44-ei-out2in-worker-handoff";
+ }
else
- feature_name = is_inside ? "nat44-ei-in2out" : "nat44-ei-out2in";
- }
+ {
+ feature_name = is_inside ? "nat44-ei-in2out" : "nat44-ei-out2in";
+ }
+ nat_validate_interface_counters (nm, sw_if_index);
+
+ rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
+ if (rv)
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable ("ip4-unicast", feature_name,
+ sw_if_index, 1, 0, 0);
+ if (rv)
+ {
+ return rv;
+ }
+ if (is_inside && !nm->out2in_dpo)
+ {
+ rv = nat44_ei_hairpinning_enable (1);
+ if (rv)
+ {
+ return rv;
+ }
+ }
- if (nm->fq_in2out_index == ~0 && nm->num_workers > 1)
- nm->fq_in2out_index = vlib_frame_queue_main_init (nm->in2out_node_index,
- nm->frame_queue_nelts);
+ pool_get (nm->interfaces, i);
+ i->sw_if_index = sw_if_index;
+ i->flags = 0;
+ }
- if (nm->fq_out2in_index == ~0 && nm->num_workers > 1)
- nm->fq_out2in_index = vlib_frame_queue_main_init (nm->out2in_node_index,
- nm->frame_queue_nelts);
+ fib_index =
+ fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
if (!is_inside)
{
- vec_foreach (outside_fib, nm->outside_fibs)
+ i->flags |= NAT44_EI_INTERFACE_FLAG_IS_OUTSIDE;
+
+ outside_fib = nat44_ei_get_outside_fib (nm->outside_fibs, fib_index);
+ if (outside_fib)
{
- if (outside_fib->fib_index == fib_index)
- {
- if (is_del)
- {
- outside_fib->refcount--;
- if (!outside_fib->refcount)
- vec_del1 (nm->outside_fibs,
- outside_fib - nm->outside_fibs);
- }
- else
- outside_fib->refcount++;
- goto feature_set;
- }
+ outside_fib->refcount++;
}
- if (!is_del)
+ else
{
vec_add2 (nm->outside_fibs, outside_fib, 1);
- outside_fib->refcount = 1;
outside_fib->fib_index = fib_index;
+ outside_fib->refcount = 1;
}
- }
-feature_set:
- pool_foreach (i, nm->interfaces)
+ nat44_ei_add_del_addr_to_fib_foreach_addr (sw_if_index, 1);
+ nat44_ei_add_del_addr_to_fib_foreach_addr_only_sm (sw_if_index, 1);
+ }
+ else
{
- if (i->sw_if_index == sw_if_index)
- {
- if (is_del)
- {
- if (nat44_ei_interface_is_inside (i) &&
- nat44_ei_interface_is_outside (i))
- {
- if (is_inside)
- i->flags &= ~NAT44_EI_INTERFACE_FLAG_IS_INSIDE;
- else
- i->flags &= ~NAT44_EI_INTERFACE_FLAG_IS_OUTSIDE;
+ i->flags |= NAT44_EI_INTERFACE_FLAG_IS_INSIDE;
+ }
- if (nm->num_workers > 1)
- {
- del_feature_name = "nat44-ei-handoff-classify";
- clib_warning (
- "del_feature_name = nat44-ei-handoff-classify");
- feature_name = !is_inside ?
- "nat44-ei-in2out-worker-handoff" :
- "nat44-ei-out2in-worker-handoff";
- }
- else
- {
- del_feature_name = "nat44-ei-classify";
- clib_warning ("del_feature_name = nat44-ei-classify");
- feature_name =
- !is_inside ? "nat44-ei-in2out" : "nat44-ei-out2in";
- }
+ return 0;
+}
- int rv =
- ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
- if (rv)
- return rv;
- rv = vnet_feature_enable_disable (
- "ip4-unicast", del_feature_name, sw_if_index, 0, 0, 0);
- if (rv)
- return rv;
- rv = vnet_feature_enable_disable (
- "ip4-unicast", feature_name, sw_if_index, 1, 0, 0);
- if (rv)
- return rv;
- if (!is_inside)
- {
- rv = vnet_feature_enable_disable ("ip4-local",
- "nat44-ei-hairpinning",
- sw_if_index, 1, 0, 0);
- if (rv)
- return rv;
- }
- }
- else
- {
- int rv =
- ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
- if (rv)
- return rv;
- rv = vnet_feature_enable_disable (
- "ip4-unicast", feature_name, sw_if_index, 0, 0, 0);
- if (rv)
- return rv;
- pool_put (nm->interfaces, i);
- if (is_inside)
- {
- rv = vnet_feature_enable_disable ("ip4-local",
- "nat44-ei-hairpinning",
- sw_if_index, 0, 0, 0);
- if (rv)
- return rv;
- }
- }
- }
- else
- {
- if ((nat44_ei_interface_is_inside (i) && is_inside) ||
- (nat44_ei_interface_is_outside (i) && !is_inside))
- return 0;
+int
+nat44_ei_del_interface (u32 sw_if_index, u8 is_inside)
+{
+ const char *feature_name, *del_feature_name;
+ nat44_ei_main_t *nm = &nat44_ei_main;
- if (nm->num_workers > 1)
- {
- del_feature_name = !is_inside ?
- "nat44-ei-in2out-worker-handoff" :
- "nat44-ei-out2in-worker-handoff";
- feature_name = "nat44-ei-handoff-classify";
- clib_warning ("feature_name = nat44-ei-handoff-classify");
- }
- else
- {
- del_feature_name =
- !is_inside ? "nat44-ei-in2out" : "nat44-ei-out2in";
- feature_name = "nat44-ei-classify";
- clib_warning ("feature_name = nat44-ei-classify");
- }
+ nat44_ei_outside_fib_t *outside_fib;
+ nat44_ei_interface_t *i;
+ u32 fib_index;
+ int rv;
- int rv =
- ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
- if (rv)
- return rv;
- rv = vnet_feature_enable_disable (
- "ip4-unicast", del_feature_name, sw_if_index, 0, 0, 0);
- if (rv)
- return rv;
- rv = vnet_feature_enable_disable ("ip4-unicast", feature_name,
- sw_if_index, 1, 0, 0);
- if (rv)
- return rv;
- if (!is_inside)
- {
- rv = vnet_feature_enable_disable (
- "ip4-local", "nat44-ei-hairpinning", sw_if_index, 0, 0, 0);
- if (rv)
- return rv;
- }
- goto set_flags;
- }
+ fail_if_disabled ();
- goto fib;
- }
+ if (nm->out2in_dpo && !is_inside)
+ {
+ nat44_ei_log_err ("error unsupported");
+ return VNET_API_ERROR_UNSUPPORTED;
}
- if (is_del)
+ i = nat44_ei_get_interface (nm->interfaces, sw_if_index);
+ if (i == 0)
{
nat44_ei_log_err ("error interface couldn't be found");
return VNET_API_ERROR_NO_SUCH_ENTRY;
}
- pool_get (nm->interfaces, i);
- i->sw_if_index = sw_if_index;
- i->flags = 0;
- nat_validate_interface_counters (nm, sw_if_index);
-
- int rv = vnet_feature_enable_disable ("ip4-unicast", feature_name,
- sw_if_index, 1, 0, 0);
- if (rv)
- return rv;
-
- rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
- if (rv)
- return rv;
-
- if (is_inside && !nm->out2in_dpo)
+ if (nat44_ei_interface_is_inside (i) && nat44_ei_interface_is_outside (i))
{
- rv = vnet_feature_enable_disable ("ip4-local", "nat44-ei-hairpinning",
+ if (nm->num_workers > 1)
+ {
+ del_feature_name = "nat44-ei-handoff-classify";
+ feature_name = !is_inside ? "nat44-ei-in2out-worker-handoff" :
+ "nat44-ei-out2in-worker-handoff";
+ }
+ else
+ {
+ del_feature_name = "nat44-ei-classify";
+ feature_name = !is_inside ? "nat44-ei-in2out" : "nat44-ei-out2in";
+ }
+
+ rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
+ if (rv)
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable ("ip4-unicast", del_feature_name,
+ sw_if_index, 0, 0, 0);
+ if (rv)
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable ("ip4-unicast", feature_name,
sw_if_index, 1, 0, 0);
if (rv)
- return rv;
- }
-
-set_flags:
- if (is_inside)
- {
- i->flags |= NAT44_EI_INTERFACE_FLAG_IS_INSIDE;
- return 0;
+ {
+ return rv;
+ }
+ if (is_inside)
+ {
+ i->flags &= ~NAT44_EI_INTERFACE_FLAG_IS_INSIDE;
+ }
+ else
+ {
+ rv = nat44_ei_hairpinning_enable (1);
+ if (rv)
+ {
+ return rv;
+ }
+ i->flags &= ~NAT44_EI_INTERFACE_FLAG_IS_OUTSIDE;
+ }
}
else
- i->flags |= NAT44_EI_INTERFACE_FLAG_IS_OUTSIDE;
+ {
+ if (nm->num_workers > 1)
+ {
+ feature_name = is_inside ? "nat44-ei-in2out-worker-handoff" :
+ "nat44-ei-out2in-worker-handoff";
+ }
+ else
+ {
+ feature_name = is_inside ? "nat44-ei-in2out" : "nat44-ei-out2in";
+ }
- /* Add/delete external addresses to FIB */
-fib:
- vec_foreach (ap, nm->addresses)
- nat44_ei_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, !is_del);
+ rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
+ if (rv)
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable ("ip4-unicast", feature_name,
+ sw_if_index, 0, 0, 0);
+ if (rv)
+ {
+ return rv;
+ }
+ if (is_inside)
+ {
+ rv = nat44_ei_hairpinning_enable (0);
+ if (rv)
+ {
+ return rv;
+ }
+ }
- pool_foreach (m, nm->static_mappings)
+ // remove interface
+ pool_put (nm->interfaces, i);
+ }
+
+ if (!is_inside)
{
- if (!(nat44_ei_is_addr_only_static_mapping (m)) ||
- (m->local_addr.as_u32 == m->external_addr.as_u32))
- continue;
+ fib_index =
+ fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
+ outside_fib = nat44_ei_get_outside_fib (nm->outside_fibs, fib_index);
+ if (outside_fib)
+ {
+ outside_fib->refcount--;
+ if (!outside_fib->refcount)
+ {
+ vec_del1 (nm->outside_fibs, outside_fib - nm->outside_fibs);
+ }
+ }
- nat44_ei_add_del_addr_to_fib (&m->external_addr, 32, sw_if_index,
- !is_del);
+ nat44_ei_add_del_addr_to_fib_foreach_addr (sw_if_index, 0);
+ nat44_ei_add_del_addr_to_fib_foreach_addr_only_sm (sw_if_index, 0);
}
return 0;
}
int
-nat44_ei_interface_add_del_output_feature (u32 sw_if_index, u8 is_inside,
- int is_del)
+nat44_ei_add_output_interface (u32 sw_if_index)
{
nat44_ei_main_t *nm = &nat44_ei_main;
- nat44_ei_interface_t *i;
- nat44_ei_address_t *ap;
- nat44_ei_static_mapping_t *m;
+
nat44_ei_outside_fib_t *outside_fib;
- u32 fib_index =
- fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
+ nat44_ei_interface_t *i;
+ u32 fib_index;
+ int rv;
fail_if_disabled ();
- if (nm->static_mapping_only && !(nm->static_mapping_connection_tracking))
+ if (nat44_ei_get_interface (nm->interfaces, sw_if_index))
{
- nat44_ei_log_err ("error unsupported");
- return VNET_API_ERROR_UNSUPPORTED;
+ nat44_ei_log_err ("error interface already configured");
+ return VNET_API_ERROR_VALUE_EXIST;
}
- pool_foreach (i, nm->interfaces)
+ if (nat44_ei_get_interface (nm->output_feature_interfaces, sw_if_index))
{
- if (i->sw_if_index == sw_if_index)
- {
- nat44_ei_log_err ("error interface already configured");
- return VNET_API_ERROR_VALUE_EXIST;
- }
+ nat44_ei_log_err ("error interface already configured");
+ return VNET_API_ERROR_VALUE_EXIST;
}
- if (!is_inside)
+ if (nm->num_workers > 1)
{
- vec_foreach (outside_fib, nm->outside_fibs)
+ rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
+ if (rv)
{
- if (outside_fib->fib_index == fib_index)
- {
- if (is_del)
- {
- outside_fib->refcount--;
- if (!outside_fib->refcount)
- vec_del1 (nm->outside_fibs,
- outside_fib - nm->outside_fibs);
- }
- else
- outside_fib->refcount++;
- goto feature_set;
- }
+ return rv;
}
- if (!is_del)
+ rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 1);
+ if (rv)
{
- vec_add2 (nm->outside_fibs, outside_fib, 1);
- outside_fib->refcount = 1;
- outside_fib->fib_index = fib_index;
+ return rv;
+ }
+ rv = vnet_feature_enable_disable (
+ "ip4-unicast", "nat44-ei-out2in-worker-handoff", sw_if_index, 1, 0, 0);
+ if (rv)
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable (
+ "ip4-output", "nat44-ei-in2out-output-worker-handoff", sw_if_index, 1,
+ 0, 0);
+ if (rv)
+ {
+ return rv;
}
}
-
-feature_set:
- if (is_inside)
+ else
{
- int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
+ rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1);
if (rv)
- return rv;
- rv =
- ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del);
+ {
+ return rv;
+ }
+ rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 1);
if (rv)
- return rv;
- rv = vnet_feature_enable_disable ("ip4-unicast", "nat44-ei-hairpin-dst",
- sw_if_index, !is_del, 0, 0);
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable ("ip4-unicast", "nat44-ei-out2in",
+ sw_if_index, 1, 0, 0);
if (rv)
- return rv;
- rv = vnet_feature_enable_disable ("ip4-output", "nat44-ei-hairpin-src",
- sw_if_index, !is_del, 0, 0);
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable ("ip4-output", "nat44-ei-in2out-output",
+ sw_if_index, 1, 0, 0);
if (rv)
- return rv;
- goto fq;
+ {
+ return rv;
+ }
+ }
+
+ nat_validate_interface_counters (nm, sw_if_index);
+
+ pool_get (nm->output_feature_interfaces, i);
+ i->sw_if_index = sw_if_index;
+ i->flags = 0;
+ i->flags |= NAT44_EI_INTERFACE_FLAG_IS_INSIDE;
+ i->flags |= NAT44_EI_INTERFACE_FLAG_IS_OUTSIDE;
+
+ fib_index =
+ fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
+ outside_fib = nat44_ei_get_outside_fib (nm->outside_fibs, fib_index);
+ if (outside_fib)
+ {
+ outside_fib->refcount++;
+ }
+ else
+ {
+ vec_add2 (nm->outside_fibs, outside_fib, 1);
+ outside_fib->fib_index = fib_index;
+ outside_fib->refcount = 1;
+ }
+
+ nat44_ei_add_del_addr_to_fib_foreach_addr (sw_if_index, 1);
+ nat44_ei_add_del_addr_to_fib_foreach_addr_only_sm (sw_if_index, 1);
+
+ return 0;
+}
+
+int
+nat44_ei_del_output_interface (u32 sw_if_index)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+
+ nat44_ei_outside_fib_t *outside_fib;
+ nat44_ei_interface_t *i;
+ u32 fib_index;
+ int rv;
+
+ fail_if_disabled ();
+
+ i = nat44_ei_get_interface (nm->output_feature_interfaces, sw_if_index);
+ if (!i)
+ {
+ nat44_ei_log_err ("error interface couldn't be found");
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
}
if (nm->num_workers > 1)
{
- int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
+ rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
if (rv)
- return rv;
- rv =
- ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del);
+ {
+ return rv;
+ }
+ rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 0);
if (rv)
- return rv;
- rv = vnet_feature_enable_disable ("ip4-unicast",
- "nat44-ei-out2in-worker-handoff",
- sw_if_index, !is_del, 0, 0);
+ {
+ return rv;
+ }
+ rv = vnet_feature_enable_disable (
+ "ip4-unicast", "nat44-ei-out2in-worker-handoff", sw_if_index, 0, 0, 0);
if (rv)
- return rv;
+ {
+ return rv;
+ }
rv = vnet_feature_enable_disable (
- "ip4-output", "nat44-ei-in2out-output-worker-handoff", sw_if_index,
- !is_del, 0, 0);
+ "ip4-output", "nat44-ei-in2out-output-worker-handoff", sw_if_index, 0,
+ 0, 0);
if (rv)
- return rv;
+ {
+ return rv;
+ }
}
else
{
- int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, !is_del);
+ rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0);
if (rv)
- return rv;
- rv =
- ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, !is_del);
+ {
+ return rv;
+ }
+ rv = ip4_sv_reass_output_enable_disable_with_refcnt (sw_if_index, 0);
if (rv)
- return rv;
+ {
+ return rv;
+ }
rv = vnet_feature_enable_disable ("ip4-unicast", "nat44-ei-out2in",
- sw_if_index, !is_del, 0, 0);
+ sw_if_index, 0, 0, 0);
if (rv)
- return rv;
+ {
+ return rv;
+ }
rv = vnet_feature_enable_disable ("ip4-output", "nat44-ei-in2out-output",
- sw_if_index, !is_del, 0, 0);
+ sw_if_index, 0, 0, 0);
if (rv)
- return rv;
+ {
+ return rv;
+ }
}
-fq:
- if (nm->fq_in2out_output_index == ~0 && nm->num_workers > 1)
- nm->fq_in2out_output_index =
- vlib_frame_queue_main_init (nm->in2out_output_node_index, 0);
+ pool_put (nm->output_feature_interfaces, i);
- if (nm->fq_out2in_index == ~0 && nm->num_workers > 1)
- nm->fq_out2in_index =
- vlib_frame_queue_main_init (nm->out2in_node_index, 0);
-
- pool_foreach (i, nm->output_feature_interfaces)
+ fib_index =
+ fib_table_get_index_for_sw_if_index (FIB_PROTOCOL_IP4, sw_if_index);
+ outside_fib = nat44_ei_get_outside_fib (nm->outside_fibs, fib_index);
+ if (outside_fib)
{
- if (i->sw_if_index == sw_if_index)
+ outside_fib->refcount--;
+ if (!outside_fib->refcount)
{
- if (is_del)
- pool_put (nm->output_feature_interfaces, i);
- else
- return VNET_API_ERROR_VALUE_EXIST;
-
- goto fib;
+ vec_del1 (nm->outside_fibs, outside_fib - nm->outside_fibs);
}
}
+ nat44_ei_add_del_addr_to_fib_foreach_addr (sw_if_index, 0);
+ nat44_ei_add_del_addr_to_fib_foreach_addr_only_sm (sw_if_index, 0);
+
+ return 0;
+}
+
+int
+nat44_ei_add_del_output_interface (u32 sw_if_index, int is_del)
+{
if (is_del)
{
- nat44_ei_log_err ("error interface couldn't be found");
- return VNET_API_ERROR_NO_SUCH_ENTRY;
+ return nat44_ei_del_output_interface (sw_if_index);
}
-
- pool_get (nm->output_feature_interfaces, i);
- i->sw_if_index = sw_if_index;
- i->flags = 0;
- nat_validate_interface_counters (nm, sw_if_index);
- if (is_inside)
- i->flags |= NAT44_EI_INTERFACE_FLAG_IS_INSIDE;
else
- i->flags |= NAT44_EI_INTERFACE_FLAG_IS_OUTSIDE;
-
- /* Add/delete external addresses to FIB */
-fib:
- if (is_inside)
- return 0;
+ {
+ return nat44_ei_add_output_interface (sw_if_index);
+ }
+}
- vec_foreach (ap, nm->addresses)
- nat44_ei_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, !is_del);
+int
+nat44_ei_del_addresses ()
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_address_t *a, *vec;
+ int error = 0;
- pool_foreach (m, nm->static_mappings)
+ vec = vec_dup (nm->addresses);
+ vec_foreach (a, vec)
{
- if (!((nat44_ei_is_addr_only_static_mapping (m))) ||
- (m->local_addr.as_u32 == m->external_addr.as_u32))
- continue;
+ error = nat44_ei_del_address (a->addr, 0);
- nat44_ei_add_del_addr_to_fib (&m->external_addr, 32, sw_if_index,
- !is_del);
+ if (error)
+ {
+ nat44_ei_log_err ("error occurred while removing adderess");
+ }
}
+ vec_free (vec);
+ vec_free (nm->addresses);
+ nm->addresses = 0;
- return 0;
+ vec_free (nm->auto_add_sw_if_indices);
+ nm->auto_add_sw_if_indices = 0;
+ return error;
}
int
-nat44_ei_plugin_disable ()
+nat44_ei_del_interfaces ()
{
nat44_ei_main_t *nm = &nat44_ei_main;
- nat44_ei_interface_t *i, *vec;
+ nat44_ei_interface_t *i, *pool;
int error = 0;
- // first unregister all nodes from interfaces
- vec = vec_dup (nm->interfaces);
- vec_foreach (i, vec)
+ pool = pool_dup (nm->interfaces);
+ pool_foreach (i, pool)
{
if (nat44_ei_interface_is_inside (i))
- error = nat44_ei_interface_add_del (i->sw_if_index, 1, 1);
+ {
+ error = nat44_ei_del_interface (i->sw_if_index, 1);
+ }
if (nat44_ei_interface_is_outside (i))
- error = nat44_ei_interface_add_del (i->sw_if_index, 0, 1);
+ {
+ error = nat44_ei_del_interface (i->sw_if_index, 0);
+ }
if (error)
{
- nat44_ei_log_err ("error occurred while removing interface %u",
- i->sw_if_index);
+ nat44_ei_log_err ("error occurred while removing interface");
}
}
- vec_free (vec);
+ pool_free (pool);
+ pool_free (nm->interfaces);
nm->interfaces = 0;
+ return error;
+}
- vec = vec_dup (nm->output_feature_interfaces);
- vec_foreach (i, vec)
- {
- if (nat44_ei_interface_is_inside (i))
- error =
- nat44_ei_interface_add_del_output_feature (i->sw_if_index, 1, 1);
- if (nat44_ei_interface_is_outside (i))
- error =
- nat44_ei_interface_add_del_output_feature (i->sw_if_index, 0, 1);
+int
+nat44_ei_del_output_interfaces ()
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_interface_t *i, *pool;
+ int error = 0;
+ pool = pool_dup (nm->output_feature_interfaces);
+ pool_foreach (i, pool)
+ {
+ error = nat44_ei_del_output_interface (i->sw_if_index);
if (error)
{
- nat44_ei_log_err ("error occurred while removing interface %u",
- i->sw_if_index);
+ nat44_ei_log_err ("error occurred while removing output interface");
}
}
- vec_free (vec);
+ pool_free (pool);
+ pool_free (nm->output_feature_interfaces);
nm->output_feature_interfaces = 0;
+ return error;
+}
- nat_ha_disable ();
- nat44_ei_db_free ();
+static clib_error_t *
+nat44_ei_sw_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_interface_t *i;
+ int error = 0;
- nat44_ei_addresses_free (&nm->addresses);
+ if (is_add)
+ return 0;
- vec_free (nm->to_resolve);
- vec_free (nm->auto_add_sw_if_indices);
+ if (!nm->enabled)
+ return 0;
+
+ i = nat44_ei_get_interface (nm->interfaces, sw_if_index);
+ if (i)
+ {
+ bool is_inside = nat44_ei_interface_is_inside (i);
+ bool is_outside = nat44_ei_interface_is_outside (i);
+
+ if (is_inside)
+ {
+ error |= nat44_ei_del_interface (sw_if_index, 1);
+ }
+ if (is_outside)
+ {
+ error |= nat44_ei_del_interface (sw_if_index, 0);
+ }
+
+ if (error)
+ {
+ nat44_ei_log_err ("error occurred while removing interface");
+ }
+ }
+
+ i = nat44_ei_get_interface (nm->output_feature_interfaces, sw_if_index);
+ if (i)
+ {
+ error = nat44_ei_del_output_interface (sw_if_index);
+ if (error)
+ {
+ nat44_ei_log_err ("error occurred while removing output interface");
+ }
+ }
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (nat44_ei_sw_interface_add_del);
+
+int
+nat44_ei_del_static_mappings ()
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_static_mapping_t *m, *pool;
+ int error = 0;
+ pool = pool_dup (nm->static_mappings);
+ pool_foreach (m, pool)
+ {
+ error = nat44_ei_del_static_mapping_internal (
+ m->local_addr, m->external_addr, m->local_port, m->external_port,
+ m->proto, m->vrf_id, ~0, m->flags);
+ if (error)
+ {
+ nat44_ei_log_err ("error occurred while removing mapping");
+ }
+ }
+ pool_free (pool);
+ pool_free (nm->static_mappings);
+ nm->static_mappings = 0;
+
+ vec_free (nm->to_resolve);
nm->to_resolve = 0;
- nm->auto_add_sw_if_indices = 0;
- nm->forwarding_enabled = 0;
+ clib_bihash_free_8_8 (&nm->static_mapping_by_local);
+ clib_bihash_free_8_8 (&nm->static_mapping_by_external);
+
+ return error;
+}
+
+int
+nat44_ei_plugin_disable ()
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_main_per_thread_data_t *tnm;
+ int rc, error = 0;
+
+ fail_if_disabled ();
+
+ nat_ha_disable ();
+
+ rc = nat44_ei_del_static_mappings ();
+ if (rc)
+ error = VNET_API_ERROR_BUG;
+
+ rc = nat44_ei_del_addresses ();
+ if (rc)
+ error = VNET_API_ERROR_BUG;
+
+ rc = nat44_ei_del_interfaces ();
+ if (rc)
+ error = VNET_API_ERROR_BUG;
+
+ rc = nat44_ei_del_output_interfaces ();
+ if (rc)
+ error = VNET_API_ERROR_BUG;
+
+ if (nm->pat)
+ {
+ clib_bihash_free_8_8 (&nm->in2out);
+ clib_bihash_free_8_8 (&nm->out2in);
+
+ vec_foreach (tnm, nm->per_thread_data)
+ {
+ nat44_ei_worker_db_free (tnm);
+ }
+ }
- nm->enabled = 0;
clib_memset (&nm->rconfig, 0, sizeof (nm->rconfig));
+ nm->forwarding_enabled = 0;
+ nm->enabled = 0;
+
return error;
}
@@ -984,7 +1314,6 @@ nat44_ei_set_outside_address_and_port (nat44_ei_address_t *addresses,
u32 thread_index, ip4_address_t addr,
u16 port, nat_protocol_t protocol)
{
- nat44_ei_main_t *nm = &nat44_ei_main;
nat44_ei_address_t *a = 0;
u32 address_index;
u16 port_host_byte_order = clib_net_to_host_u16 (port);
@@ -995,21 +1324,13 @@ nat44_ei_set_outside_address_and_port (nat44_ei_address_t *addresses,
continue;
a = addresses + address_index;
- switch (protocol)
- {
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_port_refcounts[port_host_byte_order]) \
- return VNET_API_ERROR_INSTANCE_IN_USE; \
- ++a->busy_##n##_port_refcounts[port_host_byte_order]; \
- a->busy_##n##_ports_per_thread[thread_index]++; \
- a->busy_##n##_ports++; \
- return 0;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (nm, "unknown protocol");
- return 1;
- }
+ if (nat44_ei_port_is_used (a, protocol, port_host_byte_order))
+ return VNET_API_ERROR_INSTANCE_IN_USE;
+
+ nat44_ei_port_get (a, protocol, port_host_byte_order);
+ a->busy_ports_per_thread[protocol][thread_index]++;
+ a->busy_ports[protocol]++;
+ return 0;
}
return VNET_API_ERROR_NO_SUCH_ENTRY;
@@ -1044,7 +1365,6 @@ nat44_ei_free_outside_address_and_port (nat44_ei_address_t *addresses,
u32 thread_index, ip4_address_t *addr,
u16 port, nat_protocol_t protocol)
{
- nat44_ei_main_t *nm = &nat44_ei_main;
nat44_ei_address_t *a;
u32 address_index;
u16 port_host_byte_order = clib_net_to_host_u16 (port);
@@ -1058,21 +1378,9 @@ nat44_ei_free_outside_address_and_port (nat44_ei_address_t *addresses,
ASSERT (address_index < vec_len (addresses));
a = addresses + address_index;
-
- switch (protocol)
- {
-#define _(N, i, n, s) \
- case NAT_PROTOCOL_##N: \
- ASSERT (a->busy_##n##_port_refcounts[port_host_byte_order] >= 1); \
- --a->busy_##n##_port_refcounts[port_host_byte_order]; \
- a->busy_##n##_ports--; \
- a->busy_##n##_ports_per_thread[thread_index]--; \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (nm, "unknown protocol");
- return;
- }
+ nat44_ei_port_put (a, protocol, port_host_byte_order);
+ a->busy_ports[protocol]--;
+ a->busy_ports_per_thread[protocol][thread_index]--;
}
void
@@ -1102,7 +1410,8 @@ nat44_ei_free_session_data_v2 (nat44_ei_main_t *nm, nat44_ei_session_t *s,
/* log NAT event */
nat_ipfix_logging_nat44_ses_delete (
thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32,
- s->nat_proto, s->in2out.port, s->out2in.port, s->in2out.fib_index);
+ nat_proto_to_ip_proto (s->nat_proto), s->in2out.port, s->out2in.port,
+ s->in2out.fib_index);
nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
s->ext_host_port, s->nat_proto, s->out2in.fib_index,
@@ -1270,7 +1579,8 @@ nat44_ei_free_session_data (nat44_ei_main_t *nm, nat44_ei_session_t *s,
nat_ipfix_logging_nat44_ses_delete (
thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32,
- s->nat_proto, s->in2out.port, s->out2in.port, s->in2out.fib_index);
+ nat_proto_to_ip_proto (s->nat_proto), s->in2out.port, s->out2in.port,
+ s->in2out.fib_index);
nat_ha_sdel (&s->out2in.addr, s->out2in.port, &s->ext_host_addr,
s->ext_host_port, s->nat_proto, s->out2in.fib_index,
@@ -1425,6 +1735,20 @@ nat44_ei_get_in2out_worker_index (ip4_header_t *ip0, u32 rx_fib_index0,
}
u32
+nat44_ei_get_thread_idx_by_port (u16 e_port)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ u32 thread_idx = nm->num_workers;
+ if (nm->num_workers > 1)
+ {
+ thread_idx = nm->first_worker_index +
+ nm->workers[(e_port - 1024) / nm->port_per_thread %
+ _vec_len (nm->workers)];
+ }
+ return thread_idx;
+}
+
+u32
nat44_ei_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip0,
u32 rx_fib_index0, u8 is_output)
{
@@ -1502,9 +1826,8 @@ nat44_ei_get_out2in_worker_index (vlib_buffer_t *b, ip4_header_t *ip0,
}
/* worker by outside port */
- next_worker_index = nm->first_worker_index;
- next_worker_index +=
- nm->workers[(clib_net_to_host_u16 (port) - 1024) / nm->port_per_thread];
+ next_worker_index =
+ nat44_ei_get_thread_idx_by_port (clib_net_to_host_u16 (port));
return next_worker_index;
}
@@ -1522,75 +1845,95 @@ nat44_ei_alloc_default_cb (nat44_ei_address_t *addresses, u32 fib_index,
if (vec_len (addresses) > 0)
{
-
int s_addr_offset = s_addr.as_u32 % vec_len (addresses);
for (i = s_addr_offset; i < vec_len (addresses); ++i)
{
a = addresses + i;
- switch (proto)
+
+ if (a->busy_ports_per_thread[proto][thread_index] < port_per_thread)
{
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_ports_per_thread[thread_index] < port_per_thread) \
- { \
- if (a->fib_index == fib_index) \
- { \
- while (1) \
- { \
- portnum = (port_per_thread * snat_thread_index) + \
- nat_random_port (&nm->random_seed, 0, \
- port_per_thread - 1) + \
- 1024; \
- if (a->busy_##n##_port_refcounts[portnum]) \
- continue; \
- --a->busy_##n##_port_refcounts[portnum]; \
- a->busy_##n##_ports_per_thread[thread_index]++; \
- a->busy_##n##_ports++; \
- *addr = a->addr; \
- *port = clib_host_to_net_u16 (portnum); \
- return 0; \
- } \
- } \
- else if (a->fib_index == ~0) \
- { \
- ga = a; \
- } \
- } \
- break;
- foreach_nat_protocol;
- default:
- nat_elog_info (nm, "unknown protocol");
- return 1;
+ if (a->fib_index == fib_index)
+ {
+ while (1)
+ {
+ portnum = (port_per_thread * snat_thread_index) +
+ nat_random_port (&nm->random_seed, 0,
+ port_per_thread - 1) +
+ 1024;
+ if (nat44_ei_port_is_used (a, proto, portnum))
+ continue;
+ nat44_ei_port_get (a, proto, portnum);
+ a->busy_ports_per_thread[proto][thread_index]++;
+ a->busy_ports[proto]++;
+ *addr = a->addr;
+ *port = clib_host_to_net_u16 (portnum);
+ return 0;
+ }
+ }
+ else if (a->fib_index == ~0)
+ {
+ ga = a;
+ }
}
}
for (i = 0; i < s_addr_offset; ++i)
{
a = addresses + i;
- switch (proto)
+ if (a->busy_ports_per_thread[proto][thread_index] < port_per_thread)
{
- foreach_nat_protocol;
- default:
- nat_elog_info (nm, "unknown protocol");
- return 1;
+ if (a->fib_index == fib_index)
+ {
+ while (1)
+ {
+ portnum = (port_per_thread * snat_thread_index) +
+ nat_random_port (&nm->random_seed, 0,
+ port_per_thread - 1) +
+ 1024;
+ if (nat44_ei_port_is_used (a, proto, portnum))
+ continue;
+ nat44_ei_port_get (a, proto, portnum);
+ a->busy_ports_per_thread[proto][thread_index]++;
+ a->busy_ports[proto]++;
+ *addr = a->addr;
+ *port = clib_host_to_net_u16 (portnum);
+ return 0;
+ }
+ }
+ else if (a->fib_index == ~0)
+ {
+ ga = a;
+ }
}
}
- if (ga)
- {
- a = ga;
- // fake fib index to reuse macro
- fib_index = ~0;
- switch (proto)
+
+ if (ga)
{
- foreach_nat_protocol;
- default : nat_elog_info (nm, "unknown protocol");
- return 1;
+ a = ga;
+ if (a->busy_ports_per_thread[proto][thread_index] < port_per_thread)
+ {
+ if (a->fib_index == ~0)
+ {
+ while (1)
+ {
+ portnum = (port_per_thread * snat_thread_index) +
+ nat_random_port (&nm->random_seed, 0,
+ port_per_thread - 1) +
+ 1024;
+ if (nat44_ei_port_is_used (a, proto, portnum))
+ continue;
+ nat44_ei_port_get (a, proto, portnum);
+ a->busy_ports_per_thread[proto][thread_index]++;
+ a->busy_ports[proto]++;
+ *addr = a->addr;
+ *port = clib_host_to_net_u16 (portnum);
+ return 0;
+ }
+ }
+ }
}
}
- }
-
-#undef _
/* Totally out of translations to use... */
nat_ipfix_logging_addresses_exhausted (thread_index, 0);
@@ -1612,30 +1955,20 @@ nat44_ei_alloc_range_cb (nat44_ei_address_t *addresses, u32 fib_index,
if (!vec_len (addresses))
goto exhausted;
- switch (proto)
- {
-#define _(N, i, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_ports < ports) \
- { \
- while (1) \
- { \
- portnum = nat_random_port (&nm->random_seed, nm->start_port, \
- nm->end_port); \
- if (a->busy_##n##_port_refcounts[portnum]) \
- continue; \
- ++a->busy_##n##_port_refcounts[portnum]; \
- a->busy_##n##_ports++; \
- *addr = a->addr; \
- *port = clib_host_to_net_u16 (portnum); \
- return 0; \
- } \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (nm, "unknown protocol");
- return 1;
+ if (a->busy_ports[proto] < ports)
+ {
+ while (1)
+ {
+ portnum =
+ nat_random_port (&nm->random_seed, nm->start_port, nm->end_port);
+ if (nat44_ei_port_is_used (a, proto, portnum))
+ continue;
+ nat44_ei_port_get (a, proto, portnum);
+ a->busy_ports[proto]++;
+ *addr = a->addr;
+ *port = clib_host_to_net_u16 (portnum);
+ return 0;
+ }
}
exhausted:
@@ -1659,32 +1992,22 @@ nat44_ei_alloc_mape_cb (nat44_ei_address_t *addresses, u32 fib_index,
if (!vec_len (addresses))
goto exhausted;
- switch (proto)
- {
-#define _(N, i, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_ports < ports) \
- { \
- while (1) \
- { \
- A = nat_random_port (&nm->random_seed, 1, \
- pow2_mask (nm->psid_offset)); \
- j = nat_random_port (&nm->random_seed, 0, pow2_mask (m)); \
- portnum = A | (nm->psid << nm->psid_offset) | (j << (16 - m)); \
- if (a->busy_##n##_port_refcounts[portnum]) \
- continue; \
- ++a->busy_##n##_port_refcounts[portnum]; \
- a->busy_##n##_ports++; \
- *addr = a->addr; \
- *port = clib_host_to_net_u16 (portnum); \
- return 0; \
- } \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (nm, "unknown protocol");
- return 1;
+ if (a->busy_ports[proto] < ports)
+ {
+ while (1)
+ {
+ A =
+ nat_random_port (&nm->random_seed, 1, pow2_mask (nm->psid_offset));
+ j = nat_random_port (&nm->random_seed, 0, pow2_mask (m));
+ portnum = A | (nm->psid << nm->psid_offset) | (j << (16 - m));
+ if (nat44_ei_port_is_used (a, proto, portnum))
+ continue;
+ nat44_ei_port_get (a, proto, portnum);
+ a->busy_ports[proto]++;
+ *addr = a->addr;
+ *port = clib_host_to_net_u16 (portnum);
+ return 0;
+ }
}
exhausted:
@@ -1725,30 +2048,6 @@ nat44_ei_set_alloc_mape (u16 psid, u16 psid_offset, u16 psid_length)
nm->psid_length = psid_length;
}
-static void
-nat44_ei_add_static_mapping_when_resolved (ip4_address_t l_addr, u16 l_port,
- u16 e_port, nat_protocol_t proto,
- u32 sw_if_index, u32 vrf_id,
- int addr_only, int identity_nat,
- u8 *tag)
-{
- nat44_ei_main_t *nm = &nat44_ei_main;
- nat44_ei_static_map_resolve_t *rp;
-
- vec_add2 (nm->to_resolve, rp, 1);
- clib_memset (rp, 0, sizeof (*rp));
-
- rp->l_addr.as_u32 = l_addr.as_u32;
- rp->l_port = l_port;
- rp->e_port = e_port;
- rp->sw_if_index = sw_if_index;
- rp->vrf_id = vrf_id;
- rp->proto = proto;
- rp->addr_only = addr_only;
- rp->identity_nat = identity_nat;
- rp->tag = vec_dup (tag);
-}
-
void
nat44_ei_delete_session (nat44_ei_main_t *nm, nat44_ei_session_t *ses,
u32 thread_index)
@@ -1787,10 +2086,13 @@ nat44_ei_del_session (nat44_ei_main_t *nm, ip4_address_t *addr, u16 port,
{
nat44_ei_main_per_thread_data_t *tnm;
clib_bihash_kv_8_8_t kv, value;
- u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
+ u32 fib_index;
nat44_ei_session_t *s;
clib_bihash_8_8_t *t;
+ fail_if_disabled ();
+
+ fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id);
init_nat_k (&kv, *addr, port, fib_index, proto);
t = is_in ? &nm->in2out : &nm->out2in;
if (!clib_bihash_search_8_8 (t, &kv, &value))
@@ -1812,19 +2114,6 @@ nat44_ei_del_session (nat44_ei_main_t *nm, ip4_address_t *addr, u16 port,
return VNET_API_ERROR_NO_SUCH_ENTRY;
}
-u32
-nat44_ei_get_thread_idx_by_port (u16 e_port)
-{
- nat44_ei_main_t *nm = &nat44_ei_main;
- u32 thread_idx = nm->num_workers;
- if (nm->num_workers > 1)
- {
- thread_idx = nm->first_worker_index +
- nm->workers[(e_port - 1024) / nm->port_per_thread];
- }
- return thread_idx;
-}
-
void
nat44_ei_add_del_addr_to_fib (ip4_address_t *addr, u8 p_len, u32 sw_if_index,
int is_add)
@@ -1840,412 +2129,560 @@ nat44_ei_add_del_addr_to_fib (ip4_address_t *addr, u8 p_len, u32 sw_if_index,
u32 fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
if (is_add)
- fib_table_entry_update_one_path (
- fib_index, &prefix, nm->fib_src_low,
- (FIB_ENTRY_FLAG_CONNECTED | FIB_ENTRY_FLAG_LOCAL |
- FIB_ENTRY_FLAG_EXCLUSIVE),
- DPO_PROTO_IP4, NULL, sw_if_index, ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
+ {
+ fib_table_entry_update_one_path (fib_index, &prefix, nm->fib_src_low,
+ (FIB_ENTRY_FLAG_CONNECTED |
+ FIB_ENTRY_FLAG_LOCAL |
+ FIB_ENTRY_FLAG_EXCLUSIVE),
+ DPO_PROTO_IP4, NULL, sw_if_index, ~0, 1,
+ NULL, FIB_ROUTE_PATH_FLAG_NONE);
+ }
else
- fib_table_entry_delete (fib_index, &prefix, nm->fib_src_low);
+ {
+ fib_table_entry_delete (fib_index, &prefix, nm->fib_src_low);
+ }
}
int
-nat44_ei_add_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
- u16 l_port, u16 e_port, nat_protocol_t proto,
- u32 sw_if_index, u32 vrf_id, u8 addr_only,
- u8 identity_nat, u8 *tag, u8 is_add)
+nat44_ei_reserve_port (ip4_address_t addr, u16 port, nat_protocol_t proto)
{
+ u32 ti = nat44_ei_get_thread_idx_by_port (port);
nat44_ei_main_t *nm = &nat44_ei_main;
- nat44_ei_static_mapping_t *m = 0;
- clib_bihash_kv_8_8_t kv, value;
nat44_ei_address_t *a = 0;
- u32 fib_index = ~0;
- nat44_ei_interface_t *interface;
- nat44_ei_main_per_thread_data_t *tnm;
- nat44_ei_user_key_t u_key;
- nat44_ei_user_t *u;
- dlist_elt_t *head, *elt;
- u32 elt_index, head_index;
- u32 ses_index;
- u64 user_index;
- nat44_ei_session_t *s;
- nat44_ei_static_map_resolve_t *rp, *rp_match = 0;
- nat44_ei_lb_addr_port_t *local;
- u32 find = ~0;
int i;
- if (sw_if_index != ~0)
+ for (i = 0; i < vec_len (nm->addresses); i++)
{
- ip4_address_t *first_int_addr;
+ a = nm->addresses + i;
- for (i = 0; i < vec_len (nm->to_resolve); i++)
- {
- rp = nm->to_resolve + i;
- if (rp->sw_if_index != sw_if_index ||
- rp->l_addr.as_u32 != l_addr.as_u32 || rp->vrf_id != vrf_id ||
- rp->addr_only != addr_only)
- continue;
+ if (a->addr.as_u32 != addr.as_u32)
+ continue;
- if (!addr_only)
- {
- if ((rp->l_port != l_port && rp->e_port != e_port) ||
- rp->proto != proto)
- continue;
- }
+ if (nat44_ei_port_is_used (a, proto, port))
+ continue;
- rp_match = rp;
- break;
+ nat44_ei_port_get (a, proto, port);
+ if (port > 1024)
+ {
+ a->busy_ports[proto]++;
+ a->busy_ports_per_thread[proto][ti]++;
}
+ return 0;
+ }
- /* Might be already set... */
- first_int_addr = ip4_interface_first_address (
- nm->ip4_main, sw_if_index, 0 /* just want the address */);
+ return 1;
+}
- if (is_add)
- {
- if (rp_match)
- return VNET_API_ERROR_VALUE_EXIST;
+int
+nat44_ei_free_port (ip4_address_t addr, u16 port, nat_protocol_t proto)
+{
+ u32 ti = nat44_ei_get_thread_idx_by_port (port);
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_address_t *a = 0;
+ int i;
- nat44_ei_add_static_mapping_when_resolved (
- l_addr, l_port, e_port, proto, sw_if_index, vrf_id, addr_only,
- identity_nat, tag);
+ for (i = 0; i < vec_len (nm->addresses); i++)
+ {
+ a = nm->addresses + i;
- /* DHCP resolution required? */
- if (!first_int_addr)
- return 0;
+ if (a->addr.as_u32 != addr.as_u32)
+ continue;
- e_addr.as_u32 = first_int_addr->as_u32;
- /* Identity mapping? */
- if (l_addr.as_u32 == 0)
- l_addr.as_u32 = e_addr.as_u32;
- }
- else
+ nat44_ei_port_put (a, proto, port);
+ if (port > 1024)
{
- if (!rp_match)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-
- vec_del1 (nm->to_resolve, i);
-
- if (!first_int_addr)
- return 0;
-
- e_addr.as_u32 = first_int_addr->as_u32;
- /* Identity mapping? */
- if (l_addr.as_u32 == 0)
- l_addr.as_u32 = e_addr.as_u32;
+ a->busy_ports[proto]--;
+ a->busy_ports_per_thread[proto][ti]--;
}
+ return 0;
}
- init_nat_k (&kv, e_addr, addr_only ? 0 : e_port, 0, addr_only ? 0 : proto);
- if (!clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv, &value))
- m = pool_elt_at_index (nm->static_mappings, value.value);
+ return 1;
+}
- if (is_add)
- {
- if (m)
- {
- // identity mapping for second vrf
- if (nat44_ei_is_identity_static_mapping (m))
- {
- pool_foreach (local, m->locals)
- {
- if (local->vrf_id == vrf_id)
- return VNET_API_ERROR_VALUE_EXIST;
- }
- pool_get (m->locals, local);
- local->vrf_id = vrf_id;
- local->fib_index = fib_table_find_or_create_and_lock (
- FIB_PROTOCOL_IP4, vrf_id, nm->fib_src_low);
- init_nat_kv (&kv, m->local_addr, m->local_port, local->fib_index,
- m->proto, 0, m - nm->static_mappings);
- clib_bihash_add_del_8_8 (&nm->static_mapping_by_local, &kv, 1);
- return 0;
- }
- return VNET_API_ERROR_VALUE_EXIST;
- }
+void
+nat44_ei_add_resolve_record (ip4_address_t l_addr, u16 l_port, u16 e_port,
+ nat_protocol_t proto, u32 vrf_id, u32 sw_if_index,
+ u32 flags, ip4_address_t pool_addr, u8 *tag)
+{
+ nat44_ei_static_map_resolve_t *rp;
+ nat44_ei_main_t *nm = &nat44_ei_main;
- /* Convert VRF id to FIB index */
- if (vrf_id != ~0)
- {
- fib_index = fib_table_find_or_create_and_lock (
- FIB_PROTOCOL_IP4, vrf_id, nm->fib_src_low);
- }
- /* If not specified use inside VRF id from NAT44 plugin config */
- else
- {
- fib_index = nm->inside_fib_index;
- vrf_id = nm->inside_vrf_id;
- fib_table_lock (fib_index, FIB_PROTOCOL_IP4, nm->fib_src_low);
- }
+ vec_add2 (nm->to_resolve, rp, 1);
+ rp->l_addr.as_u32 = l_addr.as_u32;
+ rp->l_port = l_port;
+ rp->e_port = e_port;
+ rp->sw_if_index = sw_if_index;
+ rp->vrf_id = vrf_id;
+ rp->proto = proto;
+ rp->flags = flags;
+ rp->pool_addr = pool_addr;
+ rp->tag = vec_dup (tag);
+}
- if (!identity_nat)
- {
- init_nat_k (&kv, l_addr, addr_only ? 0 : l_port, fib_index,
- addr_only ? 0 : proto);
- if (!clib_bihash_search_8_8 (&nm->static_mapping_by_local, &kv,
- &value))
- return VNET_API_ERROR_VALUE_EXIST;
- }
+int
+nat44_ei_get_resolve_record (ip4_address_t l_addr, u16 l_port, u16 e_port,
+ nat_protocol_t proto, u32 vrf_id, u32 sw_if_index,
+ u32 flags, int *out)
+{
+ nat44_ei_static_map_resolve_t *rp;
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ int i;
+
+ for (i = 0; i < vec_len (nm->to_resolve); i++)
+ {
+ rp = nm->to_resolve + i;
- /* Find external address in allocated addresses and reserve port for
- address and port pair mapping when dynamic translations enabled */
- if (!(addr_only || nm->static_mapping_only))
+ if (rp->sw_if_index == sw_if_index && rp->vrf_id == vrf_id)
{
- for (i = 0; i < vec_len (nm->addresses); i++)
+ if (is_sm_identity_nat (rp->flags) && is_sm_identity_nat (flags))
{
- if (nm->addresses[i].addr.as_u32 == e_addr.as_u32)
+ if (!(is_sm_addr_only (rp->flags) && is_sm_addr_only (flags)))
{
- a = nm->addresses + i;
- /* External port must be unused */
- switch (proto)
+ if (rp->e_port != e_port || rp->proto != proto)
{
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- if (a->busy_##n##_port_refcounts[e_port]) \
- return VNET_API_ERROR_INVALID_VALUE; \
- ++a->busy_##n##_port_refcounts[e_port]; \
- if (e_port > 1024) \
- { \
- a->busy_##n##_ports++; \
- a->busy_##n##_ports_per_thread[nat44_ei_get_thread_idx_by_port ( \
- e_port)]++; \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : nat_elog_info (nm, "unknown protocol");
- return VNET_API_ERROR_INVALID_VALUE_2;
+ continue;
}
- break;
}
}
- /* External address must be allocated */
- if (!a && (l_addr.as_u32 != e_addr.as_u32))
+ else if (rp->l_addr.as_u32 == l_addr.as_u32)
{
- if (sw_if_index != ~0)
+ if (!(is_sm_addr_only (rp->flags) && is_sm_addr_only (flags)))
{
- for (i = 0; i < vec_len (nm->to_resolve); i++)
+ if (rp->l_port != l_port || rp->e_port != e_port ||
+ rp->proto != proto)
{
- rp = nm->to_resolve + i;
- if (rp->addr_only)
- continue;
- if (rp->sw_if_index != sw_if_index &&
- rp->l_addr.as_u32 != l_addr.as_u32 &&
- rp->vrf_id != vrf_id && rp->l_port != l_port &&
- rp->e_port != e_port && rp->proto != proto)
- continue;
-
- vec_del1 (nm->to_resolve, i);
- break;
+ continue;
}
}
- return VNET_API_ERROR_NO_SUCH_ENTRY;
}
+ else
+ {
+ continue;
+ }
+ if (out)
+ {
+ *out = i;
+ }
+ return 0;
}
+ }
+ return 1;
+}
- pool_get (nm->static_mappings, m);
- clib_memset (m, 0, sizeof (*m));
- m->tag = vec_dup (tag);
- m->local_addr = l_addr;
- m->external_addr = e_addr;
+int
+nat44_ei_del_resolve_record (ip4_address_t l_addr, u16 l_port, u16 e_port,
+ nat_protocol_t proto, u32 vrf_id, u32 sw_if_index,
+ u32 flags)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ int i;
+ if (!nat44_ei_get_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
+ sw_if_index, flags, &i))
+ {
+ vec_del1 (nm->to_resolve, i);
+ return 0;
+ }
+ return 1;
+}
- if (addr_only)
- m->flags |= NAT44_EI_STATIC_MAPPING_FLAG_ADDR_ONLY;
- else
+void
+delete_matching_dynamic_sessions (const nat44_ei_static_mapping_t *m,
+ u32 worker_index)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ clib_bihash_kv_8_8_t kv, value;
+ nat44_ei_session_t *s;
+ nat44_ei_user_key_t u_key;
+ nat44_ei_user_t *u;
+ nat44_ei_main_per_thread_data_t *tnm;
+ dlist_elt_t *head, *elt;
+ u32 elt_index, head_index;
+ u32 ses_index;
+ u64 user_index;
+
+ if (nm->static_mapping_only)
+ return;
+
+ tnm = vec_elt_at_index (nm->per_thread_data, worker_index);
+
+ u_key.addr = m->local_addr;
+ u_key.fib_index = m->fib_index;
+ kv.key = u_key.as_u64;
+ if (!clib_bihash_search_8_8 (&tnm->user_hash, &kv, &value))
+ {
+ user_index = value.value;
+ u = pool_elt_at_index (tnm->users, user_index);
+ if (u->nsessions)
{
- m->local_port = l_port;
- m->external_port = e_port;
- m->proto = proto;
+ head_index = u->sessions_per_user_list_head_index;
+ head = pool_elt_at_index (tnm->list_pool, head_index);
+ elt_index = head->next;
+ elt = pool_elt_at_index (tnm->list_pool, elt_index);
+ ses_index = elt->value;
+ while (ses_index != ~0)
+ {
+ s = pool_elt_at_index (tnm->sessions, ses_index);
+ elt = pool_elt_at_index (tnm->list_pool, elt->next);
+ ses_index = elt->value;
+
+ if (nat44_ei_is_session_static (s))
+ continue;
+
+ if (!is_sm_addr_only (m->flags) &&
+ s->in2out.port != m->local_port)
+ continue;
+
+ nat44_ei_free_session_data_v2 (nm, s, tnm - nm->per_thread_data,
+ 0);
+ nat44_ei_delete_session (nm, s, tnm - nm->per_thread_data);
+
+ if (!is_sm_addr_only (m->flags))
+ break;
+ }
}
+ }
+}
+
+int
+nat44_ei_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
+ u16 l_port, u16 e_port, nat_protocol_t proto,
+ u32 vrf_id, u32 sw_if_index, u32 flags,
+ ip4_address_t pool_addr, u8 *tag)
+
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
- if (identity_nat)
+ if (is_sm_switch_address (flags))
+ {
+ if (!nat44_ei_get_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
+ sw_if_index, flags, 0))
{
- m->flags |= NAT44_EI_STATIC_MAPPING_FLAG_IDENTITY_NAT;
- pool_get (m->locals, local);
- local->vrf_id = vrf_id;
- local->fib_index = fib_index;
+ return VNET_API_ERROR_VALUE_EXIST;
}
- else
+
+ nat44_ei_add_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
+ sw_if_index, flags, pool_addr, tag);
+
+ ip4_address_t *first_int_addr =
+ ip4_interface_first_address (nm->ip4_main, sw_if_index, 0);
+ if (!first_int_addr)
{
- m->vrf_id = vrf_id;
- m->fib_index = fib_index;
+ // dhcp resolution required
+ return 0;
}
- if (nm->num_workers > 1)
+ e_addr.as_u32 = first_int_addr->as_u32;
+ }
+
+ return nat44_ei_add_static_mapping_internal (l_addr, e_addr, l_port, e_port,
+ proto, vrf_id, sw_if_index,
+ flags, pool_addr, tag);
+}
+
+int
+nat44_ei_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
+ u16 l_port, u16 e_port, nat_protocol_t proto,
+ u32 vrf_id, u32 sw_if_index, u32 flags)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+
+ if (is_sm_switch_address (flags))
+ {
+
+ if (nat44_ei_del_resolve_record (l_addr, l_port, e_port, proto, vrf_id,
+ sw_if_index, flags))
{
- ip4_header_t ip = {
- .src_address = m->local_addr,
- };
- vec_add1 (m->workers,
- nat44_ei_get_in2out_worker_index (&ip, m->fib_index, 0));
- tnm = vec_elt_at_index (nm->per_thread_data, m->workers[0]);
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
}
- else
- tnm = vec_elt_at_index (nm->per_thread_data, nm->num_workers);
- init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto, 0,
- m - nm->static_mappings);
- clib_bihash_add_del_8_8 (&nm->static_mapping_by_local, &kv, 1);
+ ip4_address_t *first_int_addr =
+ ip4_interface_first_address (nm->ip4_main, sw_if_index, 0);
+ if (!first_int_addr)
+ {
+ // dhcp resolution required
+ return 0;
+ }
- init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
- m - nm->static_mappings);
- clib_bihash_add_del_8_8 (&nm->static_mapping_by_external, &kv, 1);
+ e_addr.as_u32 = first_int_addr->as_u32;
+ }
- /* Delete dynamic sessions matching local address (+ local port) */
- // TODO: based on type of NAT EI/ED
- if (!(nm->static_mapping_only))
- {
- u_key.addr = m->local_addr;
- u_key.fib_index = m->fib_index;
- kv.key = u_key.as_u64;
- if (!clib_bihash_search_8_8 (&tnm->user_hash, &kv, &value))
- {
- user_index = value.value;
- u = pool_elt_at_index (tnm->users, user_index);
- if (u->nsessions)
- {
- head_index = u->sessions_per_user_list_head_index;
- head = pool_elt_at_index (tnm->list_pool, head_index);
- elt_index = head->next;
- elt = pool_elt_at_index (tnm->list_pool, elt_index);
- ses_index = elt->value;
- while (ses_index != ~0)
- {
- s = pool_elt_at_index (tnm->sessions, ses_index);
- elt = pool_elt_at_index (tnm->list_pool, elt->next);
- ses_index = elt->value;
+ return nat44_ei_del_static_mapping_internal (
+ l_addr, e_addr, l_port, e_port, proto, vrf_id, sw_if_index, flags);
+}
- if (nat44_ei_is_session_static (s))
- continue;
+static int
+nat44_ei_add_static_mapping_internal (ip4_address_t l_addr,
+ ip4_address_t e_addr, u16 l_port,
+ u16 e_port, nat_protocol_t proto,
+ u32 vrf_id, u32 sw_if_index, u32 flags,
+ ip4_address_t pool_addr, u8 *tag)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ clib_bihash_kv_8_8_t kv, value;
+ nat44_ei_lb_addr_port_t *local;
+ nat44_ei_static_mapping_t *m;
+ u32 fib_index = ~0;
+ u32 worker_index;
- if (!addr_only && s->in2out.port != m->local_port)
- continue;
+ fail_if_disabled ();
- nat44_ei_free_session_data_v2 (
- nm, s, tnm - nm->per_thread_data, 0);
- nat44_ei_delete_session (nm, s,
- tnm - nm->per_thread_data);
+ if (is_sm_addr_only (flags))
+ {
+ e_port = l_port = proto = 0;
+ }
- if (!addr_only)
- break;
- }
- }
+ if (is_sm_identity_nat (flags))
+ {
+ l_port = e_port;
+ l_addr.as_u32 = e_addr.as_u32;
+ }
+
+ // fib index 0
+ init_nat_k (&kv, e_addr, e_port, 0, proto);
+
+ if (!clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv, &value))
+ {
+ m = pool_elt_at_index (nm->static_mappings, value.value);
+ if (!is_sm_identity_nat (m->flags))
+ {
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+
+ // case:
+ // adding local identity nat record for different vrf table
+ pool_foreach (local, m->locals)
+ {
+ if (local->vrf_id == vrf_id)
+ {
+ return VNET_API_ERROR_VALUE_EXIST;
}
}
+
+ pool_get (m->locals, local);
+
+ local->vrf_id = vrf_id;
+ local->fib_index = fib_table_find_or_create_and_lock (
+ FIB_PROTOCOL_IP4, vrf_id, nm->fib_src_low);
+
+ init_nat_kv (&kv, m->local_addr, m->local_port, local->fib_index,
+ m->proto, 0, m - nm->static_mappings);
+ clib_bihash_add_del_8_8 (&nm->static_mapping_by_local, &kv, 1);
+
+ return 0;
+ }
+
+ if (vrf_id != ~0)
+ {
+ fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4, vrf_id,
+ nm->fib_src_low);
}
else
{
- if (!m)
+ // fallback to default vrf
+ vrf_id = nm->inside_vrf_id;
+ fib_index = nm->inside_fib_index;
+ fib_table_lock (fib_index, FIB_PROTOCOL_IP4, nm->fib_src_low);
+ }
+
+ if (!is_sm_identity_nat (flags))
+ {
+ init_nat_k (&kv, l_addr, l_port, fib_index, proto);
+ if (!clib_bihash_search_8_8 (&nm->static_mapping_by_local, &kv, &value))
{
- if (sw_if_index != ~0)
- return 0;
- else
- return VNET_API_ERROR_NO_SUCH_ENTRY;
+ return VNET_API_ERROR_VALUE_EXIST;
}
+ }
- if (identity_nat)
+ if (!(is_sm_addr_only (flags) || nm->static_mapping_only))
+ {
+ if (nat44_ei_reserve_port (e_addr, e_port, proto))
{
- if (vrf_id == ~0)
- vrf_id = nm->inside_vrf_id;
-
- pool_foreach (local, m->locals)
+ // remove resolve record
+ if ((is_sm_switch_address (flags)) && !is_sm_identity_nat (flags))
{
- if (local->vrf_id == vrf_id)
- find = local - m->locals;
+ nat44_ei_del_resolve_record (l_addr, l_port, e_port, proto,
+ vrf_id, sw_if_index, flags);
}
- if (find == ~0)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+ }
+
+ pool_get (nm->static_mappings, m);
+ clib_memset (m, 0, sizeof (*m));
- local = pool_elt_at_index (m->locals, find);
- fib_index = local->fib_index;
- pool_put (m->locals, local);
+ m->flags = flags;
+ m->local_addr = l_addr;
+ m->external_addr = e_addr;
+
+ m->tag = vec_dup (tag);
+
+ if (!is_sm_addr_only (flags))
+ {
+ m->local_port = l_port;
+ m->external_port = e_port;
+ m->proto = proto;
+ }
+
+ if (is_sm_identity_nat (flags))
+ {
+ pool_get (m->locals, local);
+
+ local->vrf_id = vrf_id;
+ local->fib_index = fib_index;
+ }
+ else
+ {
+ m->vrf_id = vrf_id;
+ m->fib_index = fib_index;
+ }
+
+ init_nat_kv (&kv, m->local_addr, m->local_port, fib_index, m->proto, 0,
+ m - nm->static_mappings);
+ clib_bihash_add_del_8_8 (&nm->static_mapping_by_local, &kv, 1);
+
+ init_nat_kv (&kv, m->external_addr, m->external_port, 0, m->proto, 0,
+ m - nm->static_mappings);
+ clib_bihash_add_del_8_8 (&nm->static_mapping_by_external, &kv, 1);
+
+ if (nm->num_workers > 1)
+ {
+ // store worker index for this record
+ ip4_header_t ip = {
+ .src_address = m->local_addr,
+ };
+ worker_index = nat44_ei_get_in2out_worker_index (&ip, m->fib_index, 0);
+ vec_add1 (m->workers, worker_index);
+ }
+ else
+ {
+ worker_index = nm->num_workers;
+ }
+ delete_matching_dynamic_sessions (m, worker_index);
+
+ if (is_sm_addr_only (flags))
+ {
+ nat44_ei_add_del_addr_to_fib_foreach_out_if (&e_addr, 1);
+ }
+
+ return 0;
+}
+
+static int
+nat44_ei_del_static_mapping_internal (ip4_address_t l_addr,
+ ip4_address_t e_addr, u16 l_port,
+ u16 e_port, nat_protocol_t proto,
+ u32 vrf_id, u32 sw_if_index, u32 flags)
+{
+ nat44_ei_main_per_thread_data_t *tnm;
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ clib_bihash_kv_8_8_t kv, value;
+ nat44_ei_lb_addr_port_t *local;
+ nat44_ei_static_mapping_t *m;
+ u32 fib_index = ~0;
+ nat44_ei_user_key_t u_key;
+
+ fail_if_disabled ();
+
+ if (is_sm_addr_only (flags))
+ {
+ e_port = l_port = proto = 0;
+ }
+
+ if (is_sm_identity_nat (flags))
+ {
+ l_port = e_port;
+ l_addr.as_u32 = e_addr.as_u32;
+ }
+
+ // fib index 0
+ init_nat_k (&kv, e_addr, e_port, 0, proto);
+
+ if (clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv, &value))
+ {
+ if (is_sm_switch_address (flags))
+ {
+ return 0;
}
- else
- fib_index = m->fib_index;
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+
+ m = pool_elt_at_index (nm->static_mappings, value.value);
- /* Free external address port */
- if (!(addr_only || nm->static_mapping_only))
+ if (is_sm_identity_nat (flags))
+ {
+ u8 found = 0;
+
+ if (vrf_id == ~0)
{
- for (i = 0; i < vec_len (nm->addresses); i++)
+ vrf_id = nm->inside_vrf_id;
+ }
+
+ pool_foreach (local, m->locals)
+ {
+ if (local->vrf_id == vrf_id)
{
- if (nm->addresses[i].addr.as_u32 == e_addr.as_u32)
- {
- a = nm->addresses + i;
- switch (proto)
- {
-#define _(N, j, n, s) \
- case NAT_PROTOCOL_##N: \
- --a->busy_##n##_port_refcounts[e_port]; \
- if (e_port > 1024) \
- { \
- a->busy_##n##_ports--; \
- a->busy_##n##_ports_per_thread[nat44_ei_get_thread_idx_by_port ( \
- e_port)]--; \
- } \
- break;
- foreach_nat_protocol
-#undef _
- default : return VNET_API_ERROR_INVALID_VALUE_2;
- }
- break;
- }
+ local = pool_elt_at_index (m->locals, local - m->locals);
+ fib_index = local->fib_index;
+ pool_put (m->locals, local);
+ found = 1;
}
}
+ if (!found)
+ {
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ }
+ }
+ else
+ {
+ fib_index = m->fib_index;
+ }
+
+ if (!(is_sm_addr_only (flags) || nm->static_mapping_only))
+ {
+ if (nat44_ei_free_port (e_addr, e_port, proto))
+ {
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+ }
+ init_nat_k (&kv, l_addr, l_port, fib_index, proto);
+ clib_bihash_add_del_8_8 (&nm->static_mapping_by_local, &kv, 0);
+
+ if (!nm->static_mapping_only || nm->static_mapping_connection_tracking)
+ {
+ // delete sessions for static mapping
if (nm->num_workers > 1)
tnm = vec_elt_at_index (nm->per_thread_data, m->workers[0]);
else
tnm = vec_elt_at_index (nm->per_thread_data, nm->num_workers);
- init_nat_k (&kv, m->local_addr, m->local_port, fib_index, m->proto);
- clib_bihash_add_del_8_8 (&nm->static_mapping_by_local, &kv, 0);
-
- /* Delete session(s) for static mapping if exist */
- if (!(nm->static_mapping_only) ||
- (nm->static_mapping_only && nm->static_mapping_connection_tracking))
- {
- u_key.addr = m->local_addr;
- u_key.fib_index = fib_index;
- kv.key = u_key.as_u64;
- nat44_ei_static_mapping_del_sessions (nm, tnm, u_key, addr_only,
- e_addr, e_port);
- }
+ u_key.addr = m->local_addr;
+ u_key.fib_index = fib_index;
+ kv.key = u_key.as_u64;
+ nat44_ei_static_mapping_del_sessions (
+ nm, tnm, u_key, is_sm_addr_only (flags), e_addr, e_port);
+ }
- fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, nm->fib_src_low);
- if (pool_elts (m->locals))
- return 0;
+ fib_table_unlock (fib_index, FIB_PROTOCOL_IP4, nm->fib_src_low);
- init_nat_k (&kv, m->external_addr, m->external_port, 0, m->proto);
+ if (!pool_elts (m->locals))
+ {
+ // this is last record remove all required stuff
+ // fib_index 0
+ init_nat_k (&kv, e_addr, e_port, 0, proto);
clib_bihash_add_del_8_8 (&nm->static_mapping_by_external, &kv, 0);
vec_free (m->tag);
vec_free (m->workers);
- /* Delete static mapping from pool */
pool_put (nm->static_mappings, m);
- }
- if (!addr_only || (l_addr.as_u32 == e_addr.as_u32))
- return 0;
-
- /* Add/delete external address to FIB */
- pool_foreach (interface, nm->interfaces)
- {
- if (nat44_ei_interface_is_inside (interface) || nm->out2in_dpo)
- continue;
-
- nat44_ei_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index,
- is_add);
- break;
+ if (is_sm_addr_only (flags) && !is_sm_identity_nat (flags))
+ {
+ nat44_ei_add_del_addr_to_fib_foreach_out_if (&e_addr, 0);
+ }
}
- pool_foreach (interface, nm->output_feature_interfaces)
- {
- if (nat44_ei_interface_is_inside (interface) || nm->out2in_dpo)
- continue;
- nat44_ei_add_del_addr_to_fib (&e_addr, 32, interface->sw_if_index,
- is_add);
- break;
- }
return 0;
}
@@ -2300,16 +2737,16 @@ nat44_ei_static_mapping_match (ip4_address_t match_addr, u16 match_port,
}
/* Address only mapping doesn't change port */
- if (nat44_ei_is_addr_only_static_mapping (m))
+ if (is_sm_addr_only (m->flags))
*mapping_port = match_port;
else
*mapping_port = port;
if (PREDICT_FALSE (is_addr_only != 0))
- *is_addr_only = nat44_ei_is_addr_only_static_mapping (m);
+ *is_addr_only = is_sm_addr_only (m->flags);
if (PREDICT_FALSE (is_identity_nat != 0))
- *is_identity_nat = nat44_ei_is_identity_static_mapping (m);
+ *is_identity_nat = is_sm_identity_nat (m->flags);
return 0;
}
@@ -2418,27 +2855,6 @@ nat44_ei_worker_db_init (nat44_ei_main_per_thread_data_t *tnm,
}
static void
-nat44_ei_db_free ()
-{
- nat44_ei_main_t *nm = &nat44_ei_main;
- nat44_ei_main_per_thread_data_t *tnm;
-
- pool_free (nm->static_mappings);
- clib_bihash_free_8_8 (&nm->static_mapping_by_local);
- clib_bihash_free_8_8 (&nm->static_mapping_by_external);
-
- if (nm->pat)
- {
- clib_bihash_free_8_8 (&nm->in2out);
- clib_bihash_free_8_8 (&nm->out2in);
- vec_foreach (tnm, nm->per_thread_data)
- {
- nat44_ei_worker_db_free (tnm);
- }
- }
-}
-
-static void
nat44_ei_db_init (u32 translations, u32 translation_buckets, u32 user_buckets)
{
nat44_ei_main_t *nm = &nat44_ei_main;
@@ -2572,11 +2988,13 @@ nat44_ei_update_outside_fib (ip4_main_t *im, uword opaque, u32 sw_if_index,
}
int
-nat44_ei_add_address (nat44_ei_main_t *nm, ip4_address_t *addr, u32 vrf_id)
+nat44_ei_add_address (ip4_address_t *addr, u32 vrf_id)
{
- nat44_ei_address_t *ap;
- nat44_ei_interface_t *i;
+ nat44_ei_main_t *nm = &nat44_ei_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
+ nat44_ei_address_t *ap;
+
+ fail_if_disabled ();
/* Check if address already exists */
vec_foreach (ap, nm->addresses)
@@ -2590,137 +3008,49 @@ nat44_ei_add_address (nat44_ei_main_t *nm, ip4_address_t *addr, u32 vrf_id)
vec_add2 (nm->addresses, ap, 1);
+ ap->fib_index = ~0;
ap->addr = *addr;
- if (vrf_id != ~0)
- ap->fib_index = fib_table_find_or_create_and_lock (
- FIB_PROTOCOL_IP4, vrf_id, nm->fib_src_low);
- else
- ap->fib_index = ~0;
-
-#define _(N, i, n, s) \
- clib_memset (ap->busy_##n##_port_refcounts, 0, \
- sizeof (ap->busy_##n##_port_refcounts)); \
- ap->busy_##n##_ports = 0; \
- ap->busy_##n##_ports_per_thread = 0; \
- vec_validate_init_empty (ap->busy_##n##_ports_per_thread, \
- tm->n_vlib_mains - 1, 0);
- foreach_nat_protocol
-#undef _
- /* Add external address to FIB */
- pool_foreach (i, nm->interfaces)
- {
- if (nat44_ei_interface_is_inside (i) || nm->out2in_dpo)
- continue;
-
- nat44_ei_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
- break;
- }
- pool_foreach (i, nm->output_feature_interfaces)
+ if (vrf_id != ~0)
{
- if (nat44_ei_interface_is_inside (i) || nm->out2in_dpo)
- continue;
-
- nat44_ei_add_del_addr_to_fib (addr, 32, i->sw_if_index, 1);
- break;
+ ap->fib_index = fib_table_find_or_create_and_lock (
+ FIB_PROTOCOL_IP4, vrf_id, nm->fib_src_low);
}
- return 0;
-}
-
-int
-nat44_ei_add_interface_address (nat44_ei_main_t *nm, u32 sw_if_index,
- int is_del)
-{
- ip4_main_t *ip4_main = nm->ip4_main;
- ip4_address_t *first_int_addr;
- nat44_ei_static_map_resolve_t *rp;
- u32 *indices_to_delete = 0;
- int i, j;
- u32 *auto_add_sw_if_indices = nm->auto_add_sw_if_indices;
-
- first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index,
- 0 /* just want the address */);
-
- for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
+ nat_protocol_t proto;
+ for (proto = 0; proto < NAT_N_PROTOCOLS; ++proto)
{
- if (auto_add_sw_if_indices[i] == sw_if_index)
- {
- if (is_del)
- {
- /* if have address remove it */
- if (first_int_addr)
- (void) nat44_ei_del_address (nm, first_int_addr[0], 1);
- else
- {
- for (j = 0; j < vec_len (nm->to_resolve); j++)
- {
- rp = nm->to_resolve + j;
- if (rp->sw_if_index == sw_if_index)
- vec_add1 (indices_to_delete, j);
- }
- if (vec_len (indices_to_delete))
- {
- for (j = vec_len (indices_to_delete) - 1; j >= 0; j--)
- vec_del1 (nm->to_resolve, j);
- vec_free (indices_to_delete);
- }
- }
- vec_del1 (nm->auto_add_sw_if_indices, i);
- }
- else
- return VNET_API_ERROR_VALUE_EXIST;
-
- return 0;
- }
+ ap->busy_port_bitmap[proto] = 0;
+ ap->busy_ports[proto] = 0;
+ ap->busy_ports_per_thread[proto] = 0;
+ vec_validate_init_empty (ap->busy_ports_per_thread[proto],
+ tm->n_vlib_mains - 1, 0);
}
- if (is_del)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-
- /* add to the auto-address list */
- vec_add1 (nm->auto_add_sw_if_indices, sw_if_index);
-
- /* If the address is already bound - or static - add it now */
- if (first_int_addr)
- (void) nat44_ei_add_address (nm, first_int_addr, ~0);
+ nat44_ei_add_del_addr_to_fib_foreach_out_if (addr, 1);
return 0;
}
-static int
-nat44_ei_is_address_used_in_static_mapping (ip4_address_t addr)
-{
- nat44_ei_main_t *nm = &nat44_ei_main;
- nat44_ei_static_mapping_t *m;
- pool_foreach (m, nm->static_mappings)
- {
- if (nat44_ei_is_addr_only_static_mapping (m) ||
- nat44_ei_is_identity_static_mapping (m))
- continue;
- if (m->external_addr.as_u32 == addr.as_u32)
- return 1;
- }
- return 0;
-}
-
int
-nat44_ei_del_address (nat44_ei_main_t *nm, ip4_address_t addr, u8 delete_sm)
+nat44_ei_del_address (ip4_address_t addr, u8 delete_sm)
{
+ nat44_ei_main_t *nm = &nat44_ei_main;
nat44_ei_address_t *a = 0;
nat44_ei_session_t *ses;
u32 *ses_to_be_removed = 0, *ses_index;
nat44_ei_main_per_thread_data_t *tnm;
- nat44_ei_interface_t *interface;
nat44_ei_static_mapping_t *m;
- int i;
+ int j;
+
+ fail_if_disabled ();
/* Find SNAT address */
- for (i = 0; i < vec_len (nm->addresses); i++)
+ for (j = 0; j < vec_len (nm->addresses); j++)
{
- if (nm->addresses[i].addr.as_u32 == addr.as_u32)
+ if (nm->addresses[j].addr.as_u32 == addr.as_u32)
{
- a = nm->addresses + i;
+ a = nm->addresses + j;
break;
}
}
@@ -2735,11 +3065,9 @@ nat44_ei_del_address (nat44_ei_main_t *nm, ip4_address_t addr, u8 delete_sm)
pool_foreach (m, nm->static_mappings)
{
if (m->external_addr.as_u32 == addr.as_u32)
- (void) nat44_ei_add_del_static_mapping (
+ nat44_ei_del_static_mapping_internal (
m->local_addr, m->external_addr, m->local_port, m->external_port,
- m->proto, ~0 /* sw_if_index */, m->vrf_id,
- nat44_ei_is_addr_only_static_mapping (m),
- nat44_ei_is_identity_static_mapping (m), m->tag, 0);
+ m->proto, m->vrf_id, ~0, m->flags);
}
}
else
@@ -2752,11 +3080,9 @@ nat44_ei_del_address (nat44_ei_main_t *nm, ip4_address_t addr, u8 delete_sm)
}
}
- if (a->fib_index != ~0)
- fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, nm->fib_src_low);
-
/* Delete sessions using address */
- if (a->busy_tcp_ports || a->busy_udp_ports || a->busy_icmp_ports)
+ if (a->busy_ports[NAT_PROTOCOL_TCP] || a->busy_ports[NAT_PROTOCOL_UDP] ||
+ a->busy_ports[NAT_PROTOCOL_ICMP])
{
vec_foreach (tnm, nm->per_thread_data)
{
@@ -2778,28 +3104,116 @@ nat44_ei_del_address (nat44_ei_main_t *nm, ip4_address_t addr, u8 delete_sm)
}
}
-#define _(N, i, n, s) vec_free (a->busy_##n##_ports_per_thread);
- foreach_nat_protocol
-#undef _
- vec_del1 (nm->addresses, i);
+ nat44_ei_add_del_addr_to_fib_foreach_out_if (&addr, 0);
- /* Delete external address from FIB */
- pool_foreach (interface, nm->interfaces)
+ if (a->fib_index != ~0)
{
- if (nat44_ei_interface_is_inside (interface) || nm->out2in_dpo)
- continue;
- nat44_ei_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
- break;
+ fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP4, nm->fib_src_low);
}
- pool_foreach (interface, nm->output_feature_interfaces)
+ nat_protocol_t proto;
+ for (proto = 0; proto < NAT_N_PROTOCOLS; ++proto)
{
- if (nat44_ei_interface_is_inside (interface) || nm->out2in_dpo)
- continue;
- nat44_ei_add_del_addr_to_fib (&addr, 32, interface->sw_if_index, 0);
- break;
+ vec_free (a->busy_ports_per_thread[proto]);
+ }
+
+ vec_del1 (nm->addresses, j);
+ return 0;
+}
+
+int
+nat44_ei_add_interface_address (u32 sw_if_index)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ ip4_main_t *ip4_main = nm->ip4_main;
+ ip4_address_t *first_int_addr;
+ u32 *auto_add_sw_if_indices = nm->auto_add_sw_if_indices;
+ int i;
+
+ for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
+ {
+ if (auto_add_sw_if_indices[i] == sw_if_index)
+ {
+ return VNET_API_ERROR_VALUE_EXIST;
+ }
+ }
+
+ /* add to the auto-address list */
+ vec_add1 (nm->auto_add_sw_if_indices, sw_if_index);
+
+ // if the address is already bound - or static - add it now
+ first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0);
+ if (first_int_addr)
+ {
+ (void) nat44_ei_add_address (first_int_addr, ~0);
+ }
+
+ return 0;
+}
+
+int
+nat44_ei_del_interface_address (u32 sw_if_index)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ ip4_main_t *ip4_main = nm->ip4_main;
+ ip4_address_t *first_int_addr;
+ nat44_ei_static_map_resolve_t *rp;
+ u32 *indices_to_delete = 0;
+ int i, j;
+ u32 *auto_add_sw_if_indices = nm->auto_add_sw_if_indices;
+
+ fail_if_disabled ();
+
+ first_int_addr = ip4_interface_first_address (ip4_main, sw_if_index, 0);
+
+ for (i = 0; i < vec_len (auto_add_sw_if_indices); i++)
+ {
+ if (auto_add_sw_if_indices[i] == sw_if_index)
+ {
+ first_int_addr =
+ ip4_interface_first_address (ip4_main, sw_if_index, 0);
+ if (first_int_addr)
+ {
+ (void) nat44_ei_del_address (first_int_addr[0], 1);
+ }
+ else
+ {
+ for (j = 0; j < vec_len (nm->to_resolve); j++)
+ {
+ rp = nm->to_resolve + j;
+ if (rp->sw_if_index == sw_if_index)
+ {
+ vec_add1 (indices_to_delete, j);
+ }
+ }
+ if (vec_len (indices_to_delete))
+ {
+ for (j = vec_len (indices_to_delete) - 1; j >= 0; j--)
+ {
+ vec_del1 (nm->to_resolve, j);
+ }
+ vec_free (indices_to_delete);
+ }
+ }
+
+ vec_del1 (nm->auto_add_sw_if_indices, i);
+ return 0;
+ }
}
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+}
+static_always_inline int
+is_sw_if_index_reg_for_auto_resolve (u32 *sw_if_indices, u32 sw_if_index)
+{
+ u32 *i;
+ vec_foreach (i, sw_if_indices)
+ {
+ if (*i == sw_if_index)
+ {
+ return 1;
+ }
+ }
return 0;
}
@@ -2812,61 +3226,59 @@ nat44_ei_ip4_add_del_interface_address_cb (ip4_main_t *im, uword opaque,
{
nat44_ei_main_t *nm = &nat44_ei_main;
nat44_ei_static_map_resolve_t *rp;
- ip4_address_t l_addr;
- int i, j;
- int rv;
nat44_ei_address_t *addresses = nm->addresses;
+ int rv, i;
if (!nm->enabled)
- return;
-
- for (i = 0; i < vec_len (nm->auto_add_sw_if_indices); i++)
{
- if (sw_if_index == nm->auto_add_sw_if_indices[i])
- goto match;
+ return;
}
- return;
+ if (!is_sw_if_index_reg_for_auto_resolve (nm->auto_add_sw_if_indices,
+ sw_if_index))
+ {
+ return;
+ }
-match:
if (!is_delete)
{
/* Don't trip over lease renewal, static config */
- for (j = 0; j < vec_len (addresses); j++)
- if (addresses[j].addr.as_u32 == address->as_u32)
- return;
+ for (i = 0; i < vec_len (addresses); i++)
+ {
+ if (addresses[i].addr.as_u32 == address->as_u32)
+ {
+ return;
+ }
+ }
+
+ (void) nat44_ei_add_address (address, ~0);
- (void) nat44_ei_add_address (nm, address, ~0);
/* Scan static map resolution vector */
- for (j = 0; j < vec_len (nm->to_resolve); j++)
+ for (i = 0; i < vec_len (nm->to_resolve); i++)
{
- rp = nm->to_resolve + j;
- if (rp->addr_only)
- continue;
+ rp = nm->to_resolve + i;
+ if (is_sm_addr_only (rp->flags))
+ {
+ continue;
+ }
/* On this interface? */
if (rp->sw_if_index == sw_if_index)
{
- /* Indetity mapping? */
- if (rp->l_addr.as_u32 == 0)
- l_addr.as_u32 = address[0].as_u32;
- else
- l_addr.as_u32 = rp->l_addr.as_u32;
- /* Add the static mapping */
- rv = nat44_ei_add_del_static_mapping (
- l_addr, address[0], rp->l_port, rp->e_port, rp->proto,
- ~0 /* sw_if_index */, rp->vrf_id, rp->addr_only,
- rp->identity_nat, rp->tag, 1);
+ rv = nat44_ei_add_static_mapping_internal (
+ rp->l_addr, address[0], rp->l_port, rp->e_port, rp->proto,
+ rp->vrf_id, ~0, rp->flags, rp->pool_addr, rp->tag);
if (rv)
- nat_elog_notice_X1 (
- nm, "nat44_ei_add_del_static_mapping returned %d", "i4", rv);
+ {
+ nat_elog_notice_X1 (
+ nm, "add_static_mapping_internal returned %d", "i4", rv);
+ }
}
}
- return;
}
else
{
- (void) nat44_ei_del_address (nm, address[0], 1);
- return;
+ // remove all static mapping records
+ (void) nat44_ei_del_address (address[0], 1);
}
}
@@ -2889,57 +3301,64 @@ nat44_ei_ip4_add_del_addr_only_sm_cb (ip4_main_t *im, uword opaque,
nat44_ei_static_map_resolve_t *rp;
nat44_ei_static_mapping_t *m;
clib_bihash_kv_8_8_t kv, value;
- int i, rv;
- ip4_address_t l_addr;
+ int i, rv = 0, match = 0;
if (!nm->enabled)
- return;
+ {
+ return;
+ }
for (i = 0; i < vec_len (nm->to_resolve); i++)
{
rp = nm->to_resolve + i;
- if (rp->addr_only == 0)
- continue;
- if (rp->sw_if_index == sw_if_index)
- goto match;
+
+ if (is_sm_addr_only (rp->flags) && rp->sw_if_index == sw_if_index)
+ {
+ match = 1;
+ break;
+ }
}
- return;
+ if (!match)
+ {
+ return;
+ }
-match:
- init_nat_k (&kv, *address, rp->addr_only ? 0 : rp->e_port,
- nm->outside_fib_index, rp->addr_only ? 0 : rp->proto);
+ init_nat_k (&kv, *address, is_sm_addr_only (rp->flags) ? 0 : rp->e_port,
+ nm->outside_fib_index,
+ is_sm_addr_only (rp->flags) ? 0 : rp->proto);
if (clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv, &value))
m = 0;
else
m = pool_elt_at_index (nm->static_mappings, value.value);
- if (!is_delete)
+ if (is_delete)
{
- /* Don't trip over lease renewal, static config */
- if (m)
+ if (!m)
return;
+ rv = nat44_ei_del_static_mapping_internal (
+ rp->l_addr, address[0], rp->l_port, rp->e_port, rp->proto, rp->vrf_id,
+ ~0, rp->flags);
+ if (rv)
+ {
+ nat_elog_notice_X1 (nm, "nat44_ei_del_static_mapping returned %d",
+ "i4", rv);
+ }
}
else
{
- if (!m)
+ if (m)
return;
- }
+ rv = nat44_ei_add_static_mapping_internal (
+ rp->l_addr, address[0], rp->l_port, rp->e_port, rp->proto, rp->vrf_id,
+ ~0, rp->flags, rp->pool_addr, rp->tag);
- /* Indetity mapping? */
- if (rp->l_addr.as_u32 == 0)
- l_addr.as_u32 = address[0].as_u32;
- else
- l_addr.as_u32 = rp->l_addr.as_u32;
- /* Add the static mapping */
-
- rv = nat44_ei_add_del_static_mapping (
- l_addr, address[0], rp->l_port, rp->e_port, rp->proto,
- ~0 /* sw_if_index */, rp->vrf_id, rp->addr_only, rp->identity_nat, rp->tag,
- !is_delete);
- if (rv)
- nat_elog_notice_X1 (nm, "nat44_ei_add_del_static_mapping returned %d",
- "i4", rv);
+ if (rv)
+ {
+ nat_elog_notice_X1 (nm, "nat44_ei_add_static_mapping returned %d",
+ "i4", rv);
+ }
+ }
}
static_always_inline uword
diff --git a/src/plugins/nat/nat44-ei/nat44_ei.h b/src/plugins/nat/nat44-ei/nat44_ei.h
index 055f81c069b..b4aa0f26c0b 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei.h
+++ b/src/plugins/nat/nat44-ei/nat44_ei.h
@@ -36,6 +36,7 @@
#include <nat/lib/lib.h>
#include <nat/lib/inlines.h>
+#include <nat/lib/nat_proto.h>
/* default number of worker handoff frame queue elements */
#define NAT_FQ_NELTS_DEFAULT 64
@@ -62,19 +63,17 @@ typedef enum
#define NAT44_EI_SESSION_FLAG_UNKNOWN_PROTO (1 << 1)
/* Static mapping flags */
-#define NAT44_EI_STATIC_MAPPING_FLAG_ADDR_ONLY (1 << 0)
-#define NAT44_EI_STATIC_MAPPING_FLAG_IDENTITY_NAT (1 << 1)
+#define NAT44_EI_SM_FLAG_ADDR_ONLY (1 << 0)
+#define NAT44_EI_SM_FLAG_IDENTITY_NAT (1 << 1)
+#define NAT44_EI_SM_FLAG_SWITCH_ADDRESS (1 << 2)
typedef struct
{
ip4_address_t addr;
u32 fib_index;
-#define _(N, i, n, s) \
- u32 busy_##n##_ports; \
- u32 *busy_##n##_ports_per_thread; \
- u32 busy_##n##_port_refcounts[0xffff + 1];
- foreach_nat_protocol
-#undef _
+ u32 busy_ports[NAT_N_PROTOCOLS];
+ u32 *busy_ports_per_thread[NAT_N_PROTOCOLS];
+ uword *busy_port_bitmap[NAT_N_PROTOCOLS];
} nat44_ei_address_t;
clib_error_t *nat44_ei_api_hookup (vlib_main_t *vm);
@@ -138,13 +137,9 @@ typedef struct
u32 vrf_id;
u32 flags;
nat_protocol_t proto;
- u8 addr_only;
- u8 identity_nat;
- u8 exact;
u8 *tag;
} nat44_ei_static_map_resolve_t;
-// TODO: cleanup/redo (there is no lb in EI nat)
typedef struct
{
/* backend IP address */
@@ -161,7 +156,7 @@ typedef struct
typedef struct
{
- /* prefered pool address */
+ /* preferred pool address */
ip4_address_t pool_addr;
/* local IP address */
ip4_address_t local_addr;
@@ -339,6 +334,8 @@ typedef struct nat44_ei_main_s
/* Interface pool */
nat44_ei_interface_t *interfaces;
nat44_ei_interface_t *output_feature_interfaces;
+ // broken api backward compatibility
+ nat44_ei_interface_t *output_feature_dummy_interfaces;
/* Is translation memory size calculated or user defined */
u8 translation_memory_size_set;
@@ -470,12 +467,14 @@ typedef struct nat44_ei_main_s
/* nat44 plugin enabled */
u8 enabled;
+ /* hairpinning registration counter */
+ u32 hairpin_reg;
+
nat44_ei_config_t rconfig;
u32 in2out_hairpinning_finish_ip4_lookup_node_fq_index;
u32 in2out_hairpinning_finish_interface_output_node_fq_index;
u32 hairpinning_fq_index;
- u32 hairpin_dst_fq_index;
vnet_main_t *vnet_main;
} nat44_ei_main_t;
@@ -483,9 +482,17 @@ typedef struct nat44_ei_main_s
extern nat44_ei_main_t nat44_ei_main;
int nat44_ei_plugin_enable (nat44_ei_config_t c);
-
int nat44_ei_plugin_disable ();
+int nat44_ei_add_interface (u32 sw_if_index, u8 is_inside);
+int nat44_ei_del_interface (u32 sw_if_index, u8 is_inside);
+int nat44_ei_add_output_interface (u32 sw_if_index);
+int nat44_ei_del_output_interface (u32 sw_if_index);
+int nat44_ei_add_address (ip4_address_t *addr, u32 vrf_id);
+int nat44_ei_del_address (ip4_address_t addr, u8 delete_sm);
+int nat44_ei_add_interface_address (u32 sw_if_index);
+int nat44_ei_del_interface_address (u32 sw_if_index);
+
/**
* @brief Delete specific NAT44 EI user and his sessions
*
@@ -532,29 +539,14 @@ void nat44_ei_set_alloc_mape (u16 psid, u16 psid_offset, u16 psid_length);
*/
void nat44_ei_set_alloc_range (u16 start_port, u16 end_port);
-/**
- * @brief Add/delete NAT44-EI static mapping
- *
- * @param l_addr local IPv4 address
- * @param e_addr external IPv4 address
- * @param l_port local port number
- * @param e_port external port number
- * @param proto L4 protocol
- * @param sw_if_index use interface address as external IPv4 address
- * @param vrf_id local VRF ID
- * @param addr_only 1 = 1:1NAT, 0 = 1:1NAPT
- * @param identity_nat identity NAT
- * @param tag opaque string tag
- * @param is_add 1 = add, 0 = delete
- *
- * @return 0 on success, non-zero value otherwise
+int nat44_ei_add_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
+ u16 l_port, u16 e_port, nat_protocol_t proto,
+ u32 vrf_id, u32 sw_if_index, u32 flags,
+ ip4_address_t pool_addr, u8 *tag);
- */
-int nat44_ei_add_del_static_mapping (ip4_address_t l_addr,
- ip4_address_t e_addr, u16 l_port,
- u16 e_port, nat_protocol_t proto,
- u32 sw_if_index, u32 vrf_id, u8 addr_only,
- u8 identity_nat, u8 *tag, u8 is_add);
+int nat44_ei_del_static_mapping (ip4_address_t l_addr, ip4_address_t e_addr,
+ u16 l_port, u16 e_port, nat_protocol_t proto,
+ u32 vrf_id, u32 sw_if_index, u32 flags);
/**
* @brief Delete NAT44-EI session
@@ -619,9 +611,6 @@ int nat44_ei_set_outside_address_and_port (nat44_ei_address_t *addresses,
ip4_address_t addr, u16 port,
nat_protocol_t protocol);
-int nat44_ei_del_address (nat44_ei_main_t *nm, ip4_address_t addr,
- u8 delete_sm);
-
void nat44_ei_free_session_data (nat44_ei_main_t *nm, nat44_ei_session_t *s,
u32 thread_index, u8 is_ha);
@@ -629,55 +618,56 @@ int nat44_ei_set_workers (uword *bitmap);
void nat44_ei_add_del_address_dpo (ip4_address_t addr, u8 is_add);
-int nat44_ei_add_address (nat44_ei_main_t *nm, ip4_address_t *addr,
- u32 vrf_id);
-
void nat44_ei_delete_session (nat44_ei_main_t *nm, nat44_ei_session_t *ses,
u32 thread_index);
-int nat44_ei_interface_add_del (u32 sw_if_index, u8 is_inside, int is_del);
-
-int nat44_ei_interface_add_del_output_feature (u32 sw_if_index, u8 is_inside,
- int is_del);
-
-int nat44_ei_add_interface_address (nat44_ei_main_t *nm, u32 sw_if_index,
- int is_del);
-
/* Call back functions for clib_bihash_add_or_overwrite_stale */
int nat44_i2o_is_idle_session_cb (clib_bihash_kv_8_8_t *kv, void *arg);
int nat44_o2i_is_idle_session_cb (clib_bihash_kv_8_8_t *kv, void *arg);
-int nat44_ei_hairpinning (vlib_main_t *vm, vlib_node_runtime_t *node,
- nat44_ei_main_t *nm, u32 thread_index,
- vlib_buffer_t *b0, ip4_header_t *ip0,
- udp_header_t *udp0, tcp_header_t *tcp0, u32 proto0,
- int do_trace, u32 *required_thread_index);
+int nat44_ei_set_frame_queue_nelts (u32 frame_queue_nelts);
-void nat44_ei_hairpinning_sm_unknown_proto (nat44_ei_main_t *nm,
- vlib_buffer_t *b,
- ip4_header_t *ip);
+always_inline bool
+nat44_ei_is_session_static (nat44_ei_session_t *s)
+{
+ return (s->flags & NAT44_EI_SESSION_FLAG_STATIC_MAPPING);
+}
-u32 nat44_ei_icmp_hairpinning (nat44_ei_main_t *nm, vlib_buffer_t *b0,
- u32 thread_index, ip4_header_t *ip0,
- icmp46_header_t *icmp0,
- u32 *required_thread_index);
+always_inline bool
+nat44_ei_is_unk_proto_session (nat44_ei_session_t *s)
+{
+ return (s->flags & NAT44_EI_SESSION_FLAG_UNKNOWN_PROTO);
+}
-int nat44_ei_set_frame_queue_nelts (u32 frame_queue_nelts);
+always_inline bool
+nat44_ei_interface_is_inside (nat44_ei_interface_t *i)
+{
+ return (i->flags & NAT44_EI_INTERFACE_FLAG_IS_INSIDE);
+}
+
+always_inline bool
+nat44_ei_interface_is_outside (nat44_ei_interface_t *i)
+{
+ return (i->flags & NAT44_EI_INTERFACE_FLAG_IS_OUTSIDE);
+}
-#define nat44_ei_is_session_static(sp) \
- (sp->flags & NAT44_EI_SESSION_FLAG_STATIC_MAPPING)
-#define nat44_ei_is_unk_proto_session(sp) \
- (sp->flags & NAT44_EI_SESSION_FLAG_UNKNOWN_PROTO)
+always_inline bool
+is_sm_addr_only (u32 f)
+{
+ return (f & NAT44_EI_SM_FLAG_ADDR_ONLY);
+}
-#define nat44_ei_interface_is_inside(ip) \
- (ip->flags & NAT44_EI_INTERFACE_FLAG_IS_INSIDE)
-#define nat44_ei_interface_is_outside(ip) \
- (ip->flags & NAT44_EI_INTERFACE_FLAG_IS_OUTSIDE)
+always_inline bool
+is_sm_identity_nat (u32 f)
+{
+ return (f & NAT44_EI_SM_FLAG_IDENTITY_NAT);
+}
-#define nat44_ei_is_addr_only_static_mapping(mp) \
- (mp->flags & NAT44_EI_STATIC_MAPPING_FLAG_ADDR_ONLY)
-#define nat44_ei_is_identity_static_mapping(mp) \
- (mp->flags & NAT44_EI_STATIC_MAPPING_FLAG_IDENTITY_NAT)
+always_inline bool
+is_sm_switch_address (u32 f)
+{
+ return (f & NAT44_EI_SM_FLAG_SWITCH_ADDRESS);
+}
/* logging */
#define nat44_ei_log_err(...) \
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_api.c b/src/plugins/nat/nat44-ei/nat44_ei_api.c
index 427140ffb92..8671a556929 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_api.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei_api.c
@@ -173,7 +173,9 @@ vl_api_nat44_ei_plugin_enable_disable_t_handler (
rv = nat44_ei_plugin_enable (c);
}
else
- rv = nat44_ei_plugin_disable ();
+ {
+ rv = nat44_ei_plugin_disable ();
+ }
REPLY_MACRO (VL_API_NAT44_EI_PLUGIN_ENABLE_DISABLE_REPLY);
}
@@ -469,9 +471,9 @@ vl_api_nat44_ei_add_del_address_range_t_handler (
for (i = 0; i < count; i++)
{
if (is_add)
- rv = nat44_ei_add_address (nm, &this_addr, vrf_id);
+ rv = nat44_ei_add_address (&this_addr, vrf_id);
else
- rv = nat44_ei_del_address (nm, this_addr, 0);
+ rv = nat44_ei_del_address (this_addr, 0);
if (rv)
goto send_reply;
@@ -533,18 +535,22 @@ vl_api_nat44_ei_interface_add_del_feature_t_handler (
nat44_ei_main_t *nm = &nat44_ei_main;
vl_api_nat44_ei_interface_add_del_feature_reply_t *rmp;
u32 sw_if_index = ntohl (mp->sw_if_index);
- u8 is_del;
int rv = 0;
- is_del = !mp->is_add;
-
VALIDATE_SW_IF_INDEX (mp);
- rv = nat44_ei_interface_add_del (sw_if_index, mp->flags & NAT44_EI_IF_INSIDE,
- is_del);
+ if (mp->is_add)
+ {
+ rv =
+ nat44_ei_add_interface (sw_if_index, mp->flags & NAT44_EI_IF_INSIDE);
+ }
+ else
+ {
+ rv =
+ nat44_ei_del_interface (sw_if_index, mp->flags & NAT44_EI_IF_INSIDE);
+ }
BAD_SW_IF_INDEX_LABEL;
-
REPLY_MACRO (VL_API_NAT44_EI_INTERFACE_ADD_DEL_FEATURE_REPLY);
}
@@ -588,19 +594,75 @@ vl_api_nat44_ei_interface_dump_t_handler (vl_api_nat44_ei_interface_dump_t *mp)
}
}
+static_always_inline int
+add_del_dummy_output_interface (u32 sw_if_index, u8 is_inside, u8 is_add)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_interface_t *i;
+ int rv = 1;
+
+ pool_foreach (i, nm->output_feature_dummy_interfaces)
+ {
+ if (i->sw_if_index == sw_if_index)
+ {
+ if (!is_add)
+ {
+ pool_put (nm->output_feature_dummy_interfaces, i);
+ rv = 0;
+ }
+ goto done;
+ }
+ }
+
+ if (is_add)
+ {
+ pool_get (nm->output_feature_dummy_interfaces, i);
+ i->sw_if_index = sw_if_index;
+
+ if (is_inside)
+ {
+ i->flags |= NAT44_EI_INTERFACE_FLAG_IS_INSIDE;
+ }
+ else
+ {
+ i->flags |= NAT44_EI_INTERFACE_FLAG_IS_OUTSIDE;
+ }
+
+ rv = 0;
+ }
+
+done:
+ return rv;
+}
+
static void
vl_api_nat44_ei_interface_add_del_output_feature_t_handler (
vl_api_nat44_ei_interface_add_del_output_feature_t *mp)
{
- nat44_ei_main_t *nm = &nat44_ei_main;
vl_api_nat44_ei_interface_add_del_output_feature_reply_t *rmp;
- u32 sw_if_index = ntohl (mp->sw_if_index);
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ u32 sw_if_index;
int rv = 0;
VALIDATE_SW_IF_INDEX (mp);
- rv = nat44_ei_interface_add_del_output_feature (
- sw_if_index, mp->flags & NAT44_EI_IF_INSIDE, !mp->is_add);
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ // register all interfaces in the dummy structure
+ rv = add_del_dummy_output_interface (
+ sw_if_index, mp->flags & NAT44_EI_IF_INSIDE, mp->is_add);
+
+ if (!(mp->flags & NAT44_EI_IF_INSIDE))
+ {
+ if (mp->is_add)
+ {
+ rv = nat44_ei_add_output_interface (sw_if_index);
+ }
+ else
+ {
+ rv = nat44_ei_del_output_interface (sw_if_index);
+ }
+ }
BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_NAT44_EI_INTERFACE_ADD_DEL_OUTPUT_FEATURE_REPLY);
@@ -622,7 +684,9 @@ send_nat44_ei_interface_output_feature_details (nat44_ei_interface_t *i,
rmp->context = context;
if (nat44_ei_interface_is_inside (i))
- rmp->flags |= NAT44_EI_IF_INSIDE;
+ {
+ rmp->flags |= NAT44_EI_IF_INSIDE;
+ }
vl_api_send_msg (reg, (u8 *) rmp);
}
@@ -639,49 +703,135 @@ vl_api_nat44_ei_interface_output_feature_dump_t_handler (
if (!reg)
return;
- pool_foreach (i, nm->output_feature_interfaces)
+ pool_foreach (i, nm->output_feature_dummy_interfaces)
{
send_nat44_ei_interface_output_feature_details (i, reg, mp->context);
}
}
static void
+vl_api_nat44_ei_add_del_output_interface_t_handler (
+ vl_api_nat44_ei_add_del_output_interface_t *mp)
+{
+ vl_api_nat44_ei_add_del_output_interface_reply_t *rmp;
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX_END (mp);
+
+ if (mp->is_add)
+ {
+ rv = nat44_ei_add_output_interface (mp->sw_if_index);
+ }
+ else
+ {
+ rv = nat44_ei_del_output_interface (mp->sw_if_index);
+ }
+
+bad_sw_if_index:
+ REPLY_MACRO_END (VL_API_NAT44_EI_ADD_DEL_OUTPUT_INTERFACE_REPLY);
+}
+
+#define vl_endianfun
+#include <nat/nat44-ei/nat44_ei.api.h>
+#undef vl_endianfun
+static void
+send_nat44_ei_output_interface_details (u32 index, vl_api_registration_t *rp,
+ u32 context)
+{
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ vl_api_nat44_ei_output_interface_details_t *rmp;
+ nat44_ei_interface_t *i =
+ pool_elt_at_index (nm->output_feature_interfaces, index);
+
+ /* Make sure every field is initiated (or don't skip the clib_memset()) */
+ REPLY_MACRO_DETAILS4 (
+ VL_API_NAT44_EI_OUTPUT_INTERFACE_DETAILS, rp, context, ({
+ rmp->sw_if_index = i->sw_if_index;
+
+ /* Endian hack until apigen registers _details
+ * endian functions */
+ vl_api_nat44_ei_output_interface_details_t_endian (rmp);
+ rmp->_vl_msg_id = htons (rmp->_vl_msg_id);
+ rmp->context = htonl (rmp->context);
+ }));
+}
+
+static void
+vl_api_nat44_ei_output_interface_get_t_handler (
+ vl_api_nat44_ei_output_interface_get_t *mp)
+{
+ vl_api_nat44_ei_output_interface_get_reply_t *rmp;
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ i32 rv = 0;
+
+ if (pool_elts (nm->output_feature_interfaces) == 0)
+ {
+ REPLY_MACRO (VL_API_NAT44_EI_OUTPUT_INTERFACE_GET_REPLY);
+ return;
+ }
+
+ REPLY_AND_DETAILS_MACRO (
+ VL_API_NAT44_EI_OUTPUT_INTERFACE_GET_REPLY, nm->output_feature_interfaces,
+ ({ send_nat44_ei_output_interface_details (cursor, rp, mp->context); }));
+}
+
+static void
vl_api_nat44_ei_add_del_static_mapping_t_handler (
vl_api_nat44_ei_add_del_static_mapping_t *mp)
{
- nat44_ei_main_t *nm = &nat44_ei_main;
vl_api_nat44_ei_add_del_static_mapping_reply_t *rmp;
- ip4_address_t local_addr, external_addr;
- u16 local_port = 0, external_port = 0;
- u32 vrf_id, external_sw_if_index;
+
+ nat44_ei_main_t *nm = &nat44_ei_main;
int rv = 0;
- nat_protocol_t proto;
+
+ ip4_address_t l_addr, e_addr, pool_addr = { 0 };
+ u32 sw_if_index, flags = 0, vrf_id;
+ u16 l_port = 0, e_port = 0;
+ nat_protocol_t proto = 0;
u8 *tag = 0;
- memcpy (&local_addr.as_u8, mp->local_ip_address, 4);
- memcpy (&external_addr.as_u8, mp->external_ip_address, 4);
+ memcpy (&l_addr.as_u8, mp->local_ip_address, 4);
- if (!(mp->flags & NAT44_EI_ADDR_ONLY_MAPPING))
+ if (mp->flags & NAT44_EI_ADDR_ONLY_MAPPING)
{
- local_port = mp->local_port;
- external_port = mp->external_port;
+ flags |= NAT44_EI_SM_FLAG_ADDR_ONLY;
+ }
+ else
+ {
+ l_port = mp->local_port;
+ e_port = mp->external_port;
+ proto = ip_proto_to_nat_proto (mp->protocol);
}
- vrf_id = clib_net_to_host_u32 (mp->vrf_id);
- external_sw_if_index = clib_net_to_host_u32 (mp->external_sw_if_index);
- proto = ip_proto_to_nat_proto (mp->protocol);
-
- mp->tag[sizeof (mp->tag) - 1] = 0;
- tag = format (0, "%s", mp->tag);
- vec_terminate_c_string (tag);
-
- rv = nat44_ei_add_del_static_mapping (
- local_addr, external_addr, local_port, external_port, proto,
- external_sw_if_index, vrf_id, mp->flags & NAT44_EI_ADDR_ONLY_MAPPING, 0,
- tag, mp->is_add);
+ sw_if_index = clib_net_to_host_u32 (mp->external_sw_if_index);
+ if (sw_if_index != ~0)
+ {
+ flags |= NAT44_EI_SM_FLAG_SWITCH_ADDRESS;
+ }
+ else
+ {
+ memcpy (&e_addr.as_u8, mp->external_ip_address, 4);
+ }
- vec_free (tag);
+ vrf_id = clib_net_to_host_u32 (mp->vrf_id);
+ if (mp->is_add)
+ {
+ mp->tag[sizeof (mp->tag) - 1] = 0;
+ tag = format (0, "%s", mp->tag);
+ vec_terminate_c_string (tag);
+
+ rv = nat44_ei_add_static_mapping (l_addr, e_addr, l_port, e_port, proto,
+ vrf_id, sw_if_index, flags, pool_addr,
+ tag);
+ vec_free (tag);
+ }
+ else
+ {
+ rv = nat44_ei_del_static_mapping (l_addr, e_addr, l_port, e_port, proto,
+ vrf_id, sw_if_index, flags);
+ }
REPLY_MACRO (VL_API_NAT44_EI_ADD_DEL_STATIC_MAPPING_REPLY);
}
@@ -704,7 +854,7 @@ send_nat44_ei_static_mapping_details (nat44_ei_static_mapping_t *m,
rmp->vrf_id = htonl (m->vrf_id);
rmp->context = context;
- if (nat44_ei_is_addr_only_static_mapping (m))
+ if (is_sm_addr_only (m->flags))
{
rmp->flags |= NAT44_EI_ADDR_ONLY_MAPPING;
}
@@ -738,7 +888,7 @@ send_nat44_ei_static_map_resolve_details (nat44_ei_static_map_resolve_t *m,
rmp->vrf_id = htonl (m->vrf_id);
rmp->context = context;
- if (m->addr_only)
+ if (is_sm_addr_only (m->flags))
{
rmp->flags |= NAT44_EI_ADDR_ONLY_MAPPING;
}
@@ -770,14 +920,14 @@ vl_api_nat44_ei_static_mapping_dump_t_handler (
pool_foreach (m, nm->static_mappings)
{
- if (!nat44_ei_is_identity_static_mapping (m))
+ if (!is_sm_identity_nat (m->flags))
send_nat44_ei_static_mapping_details (m, reg, mp->context);
}
for (j = 0; j < vec_len (nm->to_resolve); j++)
{
rp = nm->to_resolve + j;
- if (!rp->identity_nat)
+ if (!is_sm_identity_nat (rp->flags))
send_nat44_ei_static_map_resolve_details (rp, reg, mp->context);
}
}
@@ -786,35 +936,56 @@ static void
vl_api_nat44_ei_add_del_identity_mapping_t_handler (
vl_api_nat44_ei_add_del_identity_mapping_t *mp)
{
- nat44_ei_main_t *nm = &nat44_ei_main;
vl_api_nat44_ei_add_del_identity_mapping_reply_t *rmp;
- ip4_address_t addr;
- u16 port = 0;
- u32 vrf_id, sw_if_index;
+
+ nat44_ei_main_t *nm = &nat44_ei_main;
int rv = 0;
- nat_protocol_t proto = NAT_PROTOCOL_OTHER;
+
+ ip4_address_t addr, pool_addr = { 0 };
+ u32 sw_if_index, flags, vrf_id;
+ nat_protocol_t proto = 0;
+ u16 port = 0;
u8 *tag = 0;
- if (!(mp->flags & NAT44_EI_ADDR_ONLY_MAPPING))
+ flags = NAT44_EI_SM_FLAG_IDENTITY_NAT;
+
+ if (mp->flags & NAT44_EI_ADDR_ONLY_MAPPING)
+ {
+ flags |= NAT44_EI_SM_FLAG_ADDR_ONLY;
+ }
+ else
{
port = mp->port;
proto = ip_proto_to_nat_proto (mp->protocol);
}
- vrf_id = clib_net_to_host_u32 (mp->vrf_id);
+
sw_if_index = clib_net_to_host_u32 (mp->sw_if_index);
if (sw_if_index != ~0)
- addr.as_u32 = 0;
+ {
+ flags |= NAT44_EI_SM_FLAG_SWITCH_ADDRESS;
+ }
else
- memcpy (&addr.as_u8, mp->ip_address, 4);
- mp->tag[sizeof (mp->tag) - 1] = 0;
- tag = format (0, "%s", mp->tag);
- vec_terminate_c_string (tag);
+ {
+ memcpy (&addr.as_u8, mp->ip_address, 4);
+ }
- rv = nat44_ei_add_del_static_mapping (
- addr, addr, port, port, proto, sw_if_index, vrf_id,
- mp->flags & NAT44_EI_ADDR_ONLY_MAPPING, 1, tag, mp->is_add);
+ vrf_id = clib_net_to_host_u32 (mp->vrf_id);
- vec_free (tag);
+ if (mp->is_add)
+ {
+ mp->tag[sizeof (mp->tag) - 1] = 0;
+ tag = format (0, "%s", mp->tag);
+ vec_terminate_c_string (tag);
+
+ rv = nat44_ei_add_static_mapping (addr, addr, port, port, proto, vrf_id,
+ sw_if_index, flags, pool_addr, tag);
+ vec_free (tag);
+ }
+ else
+ {
+ rv = nat44_ei_del_static_mapping (addr, addr, port, port, proto, vrf_id,
+ sw_if_index, flags);
+ }
REPLY_MACRO (VL_API_NAT44_EI_ADD_DEL_IDENTITY_MAPPING_REPLY);
}
@@ -833,7 +1004,7 @@ send_nat44_ei_identity_mapping_details (nat44_ei_static_mapping_t *m,
rmp->_vl_msg_id =
ntohs (VL_API_NAT44_EI_IDENTITY_MAPPING_DETAILS + nm->msg_id_base);
- if (nat44_ei_is_addr_only_static_mapping (m))
+ if (is_sm_addr_only (m->flags))
rmp->flags |= NAT44_EI_ADDR_ONLY_MAPPING;
clib_memcpy (rmp->ip_address, &(m->local_addr), 4);
@@ -860,7 +1031,7 @@ send_nat44_ei_identity_map_resolve_details (nat44_ei_static_map_resolve_t *m,
rmp->_vl_msg_id =
ntohs (VL_API_NAT44_EI_IDENTITY_MAPPING_DETAILS + nm->msg_id_base);
- if (m->addr_only)
+ if (is_sm_addr_only (m->flags))
rmp->flags = (vl_api_nat44_ei_config_flags_t) NAT44_EI_ADDR_ONLY_MAPPING;
rmp->port = m->l_port;
@@ -890,7 +1061,7 @@ vl_api_nat44_ei_identity_mapping_dump_t_handler (
pool_foreach (m, nm->static_mappings)
{
- if (nat44_ei_is_identity_static_mapping (m))
+ if (is_sm_identity_nat (m->flags))
{
pool_foreach_index (j, m->locals)
{
@@ -902,7 +1073,7 @@ vl_api_nat44_ei_identity_mapping_dump_t_handler (
for (j = 0; j < vec_len (nm->to_resolve); j++)
{
rp = nm->to_resolve + j;
- if (rp->identity_nat)
+ if (is_sm_identity_nat (rp->flags))
send_nat44_ei_identity_map_resolve_details (rp, reg, mp->context);
}
}
@@ -915,13 +1086,17 @@ vl_api_nat44_ei_add_del_interface_addr_t_handler (
vl_api_nat44_ei_add_del_interface_addr_reply_t *rmp;
u32 sw_if_index = ntohl (mp->sw_if_index);
int rv = 0;
- u8 is_del;
-
- is_del = !mp->is_add;
VALIDATE_SW_IF_INDEX (mp);
- rv = nat44_ei_add_interface_address (nm, sw_if_index, is_del);
+ if (mp->is_add)
+ {
+ rv = nat44_ei_add_interface_address (sw_if_index);
+ }
+ else
+ {
+ rv = nat44_ei_del_interface_address (sw_if_index);
+ }
BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_NAT44_EI_ADD_DEL_INTERFACE_ADDR_REPLY);
@@ -1044,6 +1219,44 @@ send_nat44_ei_user_session_details (nat44_ei_session_t *s,
}
static void
+send_nat44_ei_user_session_v2_details (nat44_ei_session_t *s,
+ vl_api_registration_t *reg, u32 context)
+{
+ vl_api_nat44_ei_user_session_v2_details_t *rmp;
+ nat44_ei_main_t *nm = &nat44_ei_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (VL_API_NAT44_EI_USER_SESSION_V2_DETAILS + nm->msg_id_base);
+ clib_memcpy (rmp->outside_ip_address, (&s->out2in.addr), 4);
+ clib_memcpy (rmp->inside_ip_address, (&s->in2out.addr), 4);
+
+ if (nat44_ei_is_session_static (s))
+ rmp->flags |= NAT44_EI_STATIC_MAPPING;
+
+ rmp->last_heard = clib_host_to_net_u64 ((u64) s->last_heard);
+ rmp->time_since_last_heard = clib_host_to_net_u64 (
+ (u64) (vlib_time_now (vlib_get_main ()) - s->last_heard));
+ rmp->total_bytes = clib_host_to_net_u64 (s->total_bytes);
+ rmp->total_pkts = ntohl (s->total_pkts);
+ rmp->context = context;
+ if (nat44_ei_is_unk_proto_session (s))
+ {
+ rmp->outside_port = 0;
+ rmp->inside_port = 0;
+ rmp->protocol = ntohs (s->in2out.port);
+ }
+ else
+ {
+ rmp->outside_port = s->out2in.port;
+ rmp->inside_port = s->in2out.port;
+ rmp->protocol = ntohs (nat_proto_to_ip_proto (s->nat_proto));
+ }
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
vl_api_nat44_ei_user_session_dump_t_handler (
vl_api_nat44_ei_user_session_dump_t *mp)
{
@@ -1097,6 +1310,59 @@ vl_api_nat44_ei_user_session_dump_t_handler (
}
static void
+vl_api_nat44_ei_user_session_v2_dump_t_handler (
+ vl_api_nat44_ei_user_session_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ nat44_ei_main_t *nm = &nat44_ei_main;
+ nat44_ei_main_per_thread_data_t *tnm;
+ nat44_ei_session_t *s;
+ clib_bihash_kv_8_8_t key, value;
+ nat44_ei_user_key_t ukey;
+ nat44_ei_user_t *u;
+ u32 session_index, head_index, elt_index;
+ dlist_elt_t *head, *elt;
+ ip4_header_t ip;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ clib_memcpy (&ukey.addr, mp->ip_address, 4);
+ ip.src_address.as_u32 = ukey.addr.as_u32;
+ ukey.fib_index = fib_table_find (FIB_PROTOCOL_IP4, ntohl (mp->vrf_id));
+ key.key = ukey.as_u64;
+ if (nm->num_workers > 1)
+ tnm = vec_elt_at_index (
+ nm->per_thread_data,
+ nat44_ei_get_in2out_worker_index (&ip, ukey.fib_index, 0));
+ else
+ tnm = vec_elt_at_index (nm->per_thread_data, nm->num_workers);
+
+ if (clib_bihash_search_8_8 (&tnm->user_hash, &key, &value))
+ return;
+ u = pool_elt_at_index (tnm->users, value.value);
+ if (!u->nsessions && !u->nstaticsessions)
+ return;
+
+ head_index = u->sessions_per_user_list_head_index;
+ head = pool_elt_at_index (tnm->list_pool, head_index);
+ elt_index = head->next;
+ elt = pool_elt_at_index (tnm->list_pool, elt_index);
+ session_index = elt->value;
+ while (session_index != ~0)
+ {
+ s = pool_elt_at_index (tnm->sessions, session_index);
+
+ send_nat44_ei_user_session_v2_details (s, reg, mp->context);
+
+ elt_index = elt->next;
+ elt = pool_elt_at_index (tnm->list_pool, elt_index);
+ session_index = elt->value;
+ }
+}
+
+static void
vl_api_nat44_ei_del_session_t_handler (vl_api_nat44_ei_del_session_t *mp)
{
nat44_ei_main_t *nm = &nat44_ei_main;
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_cli.c b/src/plugins/nat/nat44-ei/nat44_ei_cli.c
index a009f0292d3..eab50a4bc6c 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_cli.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei_cli.c
@@ -112,9 +112,9 @@ format_nat44_ei_static_mapping (u8 *s, va_list *args)
nat44_ei_static_mapping_t *m = va_arg (*args, nat44_ei_static_mapping_t *);
nat44_ei_lb_addr_port_t *local;
- if (nat44_ei_is_identity_static_mapping (m))
+ if (is_sm_identity_nat (m->flags))
{
- if (nat44_ei_is_addr_only_static_mapping (m))
+ if (is_sm_addr_only (m->flags))
s = format (s, "identity mapping %U", format_ip4_address,
&m->local_addr);
else
@@ -130,7 +130,7 @@ format_nat44_ei_static_mapping (u8 *s, va_list *args)
return s;
}
- if (nat44_ei_is_addr_only_static_mapping (m))
+ if (is_sm_addr_only (m->flags))
{
s = format (s, "local %U external %U vrf %d", format_ip4_address,
&m->local_addr, format_ip4_address, &m->external_addr,
@@ -154,7 +154,7 @@ format_nat44_ei_static_map_to_resolve (u8 *s, va_list *args)
va_arg (*args, nat44_ei_static_map_resolve_t *);
vnet_main_t *vnm = vnet_get_main ();
- if (m->addr_only)
+ if (is_sm_addr_only (m->flags))
s =
format (s, "local %U external %U vrf %d", format_ip4_address, &m->l_addr,
format_vnet_sw_if_index_name, vnm, m->sw_if_index, m->vrf_id);
@@ -312,7 +312,7 @@ done:
}
static clib_error_t *
-nat_show_workers_commnad_fn (vlib_main_t *vm, unformat_input_t *input,
+nat_show_workers_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
nat44_ei_main_t *nm = &nat44_ei_main;
@@ -338,7 +338,7 @@ nat44_ei_set_log_level_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
unformat_input_t _line_input, *line_input = &_line_input;
nat44_ei_main_t *nm = &nat44_ei_main;
- u8 log_level = NAT_LOG_NONE;
+ u32 log_level = NAT_LOG_NONE;
clib_error_t *error = 0;
if (!unformat_user (input, unformat_line_input, line_input))
@@ -790,9 +790,9 @@ add_address_command_fn (vlib_main_t *vm, unformat_input_t *input,
for (i = 0; i < count; i++)
{
if (is_add)
- rv = nat44_ei_add_address (nm, &this_addr, vrf_id);
+ rv = nat44_ei_add_address (&this_addr, vrf_id);
else
- rv = nat44_ei_del_address (nm, this_addr, 0);
+ rv = nat44_ei_del_address (this_addr, 0);
switch (rv)
{
@@ -841,7 +841,7 @@ nat44_ei_show_addresses_command_fn (vlib_main_t *vm, unformat_input_t *input,
else
vlib_cli_output (vm, " tenant VRF independent");
#define _(N, i, n, s) \
- vlib_cli_output (vm, " %d busy %s ports", ap->busy_##n##_ports, s);
+ vlib_cli_output (vm, " %d busy %s ports", ap->busy_ports[i], s);
foreach_nat_protocol
#undef _
}
@@ -859,8 +859,7 @@ nat44_ei_feature_command_fn (vlib_main_t *vm, unformat_input_t *input,
u32 *inside_sw_if_indices = 0;
u32 *outside_sw_if_indices = 0;
u8 is_output_feature = 0;
- int is_del = 0;
- int i;
+ int i, rv, is_del = 0;
sw_if_index = ~0;
@@ -894,8 +893,15 @@ nat44_ei_feature_command_fn (vlib_main_t *vm, unformat_input_t *input,
sw_if_index = inside_sw_if_indices[i];
if (is_output_feature)
{
- if (nat44_ei_interface_add_del_output_feature (sw_if_index, 1,
- is_del))
+ if (is_del)
+ {
+ rv = nat44_ei_del_output_interface (sw_if_index);
+ }
+ else
+ {
+ rv = nat44_ei_add_output_interface (sw_if_index);
+ }
+ if (rv)
{
error = clib_error_return (
0, "%s %U failed", is_del ? "del" : "add",
@@ -905,7 +911,15 @@ nat44_ei_feature_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
else
{
- if (nat44_ei_interface_add_del (sw_if_index, 1, is_del))
+ if (is_del)
+ {
+ rv = nat44_ei_del_interface (sw_if_index, 1);
+ }
+ else
+ {
+ rv = nat44_ei_add_interface (sw_if_index, 1);
+ }
+ if (rv)
{
error = clib_error_return (
0, "%s %U failed", is_del ? "del" : "add",
@@ -923,8 +937,15 @@ nat44_ei_feature_command_fn (vlib_main_t *vm, unformat_input_t *input,
sw_if_index = outside_sw_if_indices[i];
if (is_output_feature)
{
- if (nat44_ei_interface_add_del_output_feature (sw_if_index, 0,
- is_del))
+ if (is_del)
+ {
+ rv = nat44_ei_del_output_interface (sw_if_index);
+ }
+ else
+ {
+ rv = nat44_ei_add_output_interface (sw_if_index);
+ }
+ if (rv)
{
error = clib_error_return (
0, "%s %U failed", is_del ? "del" : "add",
@@ -934,7 +955,15 @@ nat44_ei_feature_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
else
{
- if (nat44_ei_interface_add_del (sw_if_index, 0, is_del))
+ if (is_del)
+ {
+ rv = nat44_ei_del_interface (sw_if_index, 0);
+ }
+ else
+ {
+ rv = nat44_ei_add_interface (sw_if_index, 0);
+ }
+ if (rv)
{
error = clib_error_return (
0, "%s %U failed", is_del ? "del" : "add",
@@ -990,14 +1019,16 @@ add_static_mapping_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
- clib_error_t *error = 0;
- ip4_address_t l_addr, e_addr;
- u32 l_port = 0, e_port = 0, vrf_id = ~0;
- int is_add = 1, addr_only = 1, rv;
- u32 sw_if_index = ~0;
vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error = 0;
+ int rv;
+
nat_protocol_t proto = NAT_PROTOCOL_OTHER;
- u8 proto_set = 0;
+ ip4_address_t l_addr, e_addr, pool_addr = { 0 };
+ u32 l_port = 0, e_port = 0, vrf_id = ~0;
+ u8 l_port_set = 0, e_port_set = 0;
+ u32 sw_if_index = ~0, flags = 0;
+ int is_add = 1;
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_EI_EXPECTED_ARGUMENT);
@@ -1006,29 +1037,37 @@ add_static_mapping_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
if (unformat (line_input, "local %U %u", unformat_ip4_address, &l_addr,
&l_port))
- addr_only = 0;
+ {
+ l_port_set = 1;
+ }
else if (unformat (line_input, "local %U", unformat_ip4_address,
&l_addr))
;
else if (unformat (line_input, "external %U %u", unformat_ip4_address,
&e_addr, &e_port))
- addr_only = 0;
+ {
+ e_port_set = 1;
+ }
else if (unformat (line_input, "external %U", unformat_ip4_address,
&e_addr))
;
else if (unformat (line_input, "external %U %u",
unformat_vnet_sw_interface, vnm, &sw_if_index,
&e_port))
- addr_only = 0;
+ {
+ e_port_set = 1;
+ }
else if (unformat (line_input, "external %U", unformat_vnet_sw_interface,
vnm, &sw_if_index))
;
else if (unformat (line_input, "vrf %u", &vrf_id))
;
else if (unformat (line_input, "%U", unformat_nat_protocol, &proto))
- proto_set = 1;
+ ;
else if (unformat (line_input, "del"))
- is_add = 0;
+ {
+ is_add = 0;
+ }
else
{
error = clib_error_return (0, "unknown input: '%U'",
@@ -1037,25 +1076,38 @@ add_static_mapping_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
}
- if (addr_only)
+ if (l_port_set != e_port_set)
{
- if (proto_set)
- {
- error = clib_error_return (
- 0, "address only mapping doesn't support protocol");
- goto done;
- }
+ error = clib_error_return (0, "Either both ports are set or none.");
+ goto done;
}
- else if (!proto_set)
+
+ if (!l_port_set)
{
- error = clib_error_return (0, "protocol is required");
- goto done;
+ flags |= NAT44_EI_SM_FLAG_ADDR_ONLY;
+ }
+ else
+ {
+ l_port = clib_host_to_net_u16 (l_port);
+ e_port = clib_host_to_net_u16 (e_port);
+ }
+
+ if (sw_if_index != ~0)
+ {
+ flags |= NAT44_EI_SM_FLAG_SWITCH_ADDRESS;
}
- rv = nat44_ei_add_del_static_mapping (
- l_addr, e_addr, clib_host_to_net_u16 (l_port),
- clib_host_to_net_u16 (e_port), proto, sw_if_index, vrf_id, addr_only, 0, 0,
- is_add);
+ if (is_add)
+ {
+ rv =
+ nat44_ei_add_static_mapping (l_addr, e_addr, l_port, e_port, proto,
+ vrf_id, sw_if_index, flags, pool_addr, 0);
+ }
+ else
+ {
+ rv = nat44_ei_del_static_mapping (l_addr, e_addr, l_port, e_port, proto,
+ vrf_id, sw_if_index, flags);
+ }
switch (rv)
{
@@ -1091,17 +1143,15 @@ add_identity_mapping_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
clib_error_t *error = 0;
- u32 port = 0, vrf_id = ~0;
+
+ int rv, is_add = 1, port_set = 0;
+ u32 sw_if_index = ~0, port, flags, vrf_id = ~0;
+ nat_protocol_t proto = NAT_PROTOCOL_OTHER;
ip4_address_t addr;
- int is_add = 1;
- int addr_only = 1;
- u32 sw_if_index = ~0;
- vnet_main_t *vnm = vnet_get_main ();
- int rv;
- nat_protocol_t proto;
- addr.as_u32 = 0;
+ flags = NAT44_EI_SM_FLAG_IDENTITY_NAT;
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_EI_EXPECTED_ARGUMENT);
@@ -1117,9 +1167,13 @@ add_identity_mapping_command_fn (vlib_main_t *vm, unformat_input_t *input,
;
else if (unformat (line_input, "%U %u", unformat_nat_protocol, &proto,
&port))
- addr_only = 0;
+ {
+ port_set = 1;
+ }
else if (unformat (line_input, "del"))
- is_add = 0;
+ {
+ is_add = 0;
+ }
else
{
error = clib_error_return (0, "unknown input: '%U'",
@@ -1128,9 +1182,31 @@ add_identity_mapping_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
}
- rv = nat44_ei_add_del_static_mapping (
- addr, addr, clib_host_to_net_u16 (port), clib_host_to_net_u16 (port),
- proto, sw_if_index, vrf_id, addr_only, 1, 0, is_add);
+ if (!port_set)
+ {
+ flags |= NAT44_EI_SM_FLAG_ADDR_ONLY;
+ }
+ else
+ {
+ port = clib_host_to_net_u16 (port);
+ }
+
+ if (sw_if_index != ~0)
+ {
+ flags |= NAT44_EI_SM_FLAG_SWITCH_ADDRESS;
+ }
+
+ if (is_add)
+ {
+
+ rv = nat44_ei_add_static_mapping (addr, addr, port, port, proto, vrf_id,
+ sw_if_index, flags, addr, 0);
+ }
+ else
+ {
+ rv = nat44_ei_del_static_mapping (addr, addr, port, port, proto, vrf_id,
+ sw_if_index, flags);
+ }
switch (rv)
{
@@ -1184,12 +1260,11 @@ nat44_ei_add_interface_address_command_fn (vlib_main_t *vm,
unformat_input_t *input,
vlib_cli_command_t *cmd)
{
- nat44_ei_main_t *nm = &nat44_ei_main;
unformat_input_t _line_input, *line_input = &_line_input;
- u32 sw_if_index;
- int rv;
- int is_del = 0;
+ nat44_ei_main_t *nm = &nat44_ei_main;
clib_error_t *error = 0;
+ int rv, is_del = 0;
+ u32 sw_if_index;
if (!unformat_user (input, unformat_line_input, line_input))
return clib_error_return (0, NAT44_EI_EXPECTED_ARGUMENT);
@@ -1200,7 +1275,9 @@ nat44_ei_add_interface_address_command_fn (vlib_main_t *vm,
nm->vnet_main, &sw_if_index))
;
else if (unformat (line_input, "del"))
- is_del = 1;
+ {
+ is_del = 1;
+ }
else
{
error = clib_error_return (0, "unknown input '%U'",
@@ -1209,17 +1286,21 @@ nat44_ei_add_interface_address_command_fn (vlib_main_t *vm,
}
}
- rv = nat44_ei_add_interface_address (nm, sw_if_index, is_del);
-
- switch (rv)
+ if (!is_del)
{
- case 0:
- break;
-
- default:
- error = clib_error_return (
- 0, "nat44_ei_add_interface_address returned %d", rv);
- goto done;
+ rv = nat44_ei_add_interface_address (sw_if_index);
+ if (rv)
+ {
+ error = clib_error_return (0, "add address returned %d", rv);
+ }
+ }
+ else
+ {
+ rv = nat44_ei_del_interface_address (sw_if_index);
+ if (rv)
+ {
+ error = clib_error_return (0, "del address returned %d", rv);
+ }
}
done:
@@ -1252,6 +1333,8 @@ nat44_ei_show_sessions_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
unformat_input_t _line_input, *line_input = &_line_input;
clib_error_t *error = 0;
+ ip4_address_t saddr;
+ u8 filter_saddr = 0;
nat44_ei_main_per_thread_data_t *tnm;
nat44_ei_main_t *nm = &nat44_ei_main;
@@ -1266,6 +1349,9 @@ nat44_ei_show_sessions_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
if (unformat (line_input, "detail"))
detail = 1;
+ else if (unformat (line_input, "filter saddr %U", unformat_ip4_address,
+ &saddr))
+ filter_saddr = 1;
else
{
error = clib_error_return (0, "unknown input '%U'",
@@ -1288,6 +1374,8 @@ print:
nat44_ei_user_t *u;
pool_foreach (u, tnm->users)
{
+ if (filter_saddr && saddr.as_u32 != u->addr.as_u32)
+ continue;
vlib_cli_output (vm, " %U", format_nat44_ei_user, tnm, u, detail);
}
}
@@ -1486,7 +1574,6 @@ nat_show_timeouts_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
nat44_ei_main_t *nm = &nat44_ei_main;
- // TODO: make format timeout function
vlib_cli_output (vm, "udp timeout: %dsec", nm->timeouts.udp);
vlib_cli_output (vm, "tcp-established timeout: %dsec",
nm->timeouts.tcp.established);
@@ -1502,23 +1589,23 @@ nat_show_timeouts_command_fn (vlib_main_t *vm, unformat_input_t *input,
* @cliexstart{nat44 ei}
* Enable nat44 ei plugin
* To enable nat44-ei, use:
- * vpp# nat44 ei enable
+ * vpp# nat44 ei plugin enable
* To disable nat44-ei, use:
- * vpp# nat44 ei disable
+ * vpp# nat44 ei plugin disable
* To enable nat44 ei static mapping only, use:
- * vpp# nat44 ei enable static-mapping
+ * vpp# nat44 ei plugin enable static-mapping
* To enable nat44 ei static mapping with connection tracking, use:
- * vpp# nat44 ei enable static-mapping connection-tracking
+ * vpp# nat44 ei plugin enable static-mapping connection-tracking
* To enable nat44 ei out2in dpo, use:
- * vpp# nat44 ei enable out2in-dpo
+ * vpp# nat44 ei plugin enable out2in-dpo
* To set inside-vrf outside-vrf, use:
- * vpp# nat44 ei enable inside-vrf <id> outside-vrf <id>
+ * vpp# nat44 ei plugin enable inside-vrf <id> outside-vrf <id>
* @cliexend
?*/
VLIB_CLI_COMMAND (nat44_ei_enable_disable_command, static) = {
- .path = "nat44 ei",
+ .path = "nat44 ei plugin",
.short_help =
- "nat44 ei <enable [sessions <max-number>] [users <max-number>] "
+ "nat44 ei plugin <enable [sessions <max-number>] [users <max-number>] "
"[static-mappig-only [connection-tracking]|out2in-dpo] [inside-vrf "
"<vrf-id>] [outside-vrf <vrf-id>] [user-sessions <max-number>]>|disable",
.function = nat44_ei_enable_disable_command_fn,
@@ -1550,7 +1637,7 @@ VLIB_CLI_COMMAND (set_workers_command, static) = {
VLIB_CLI_COMMAND (nat_show_workers_command, static) = {
.path = "show nat44 ei workers",
.short_help = "show nat44 ei workers",
- .function = nat_show_workers_commnad_fn,
+ .function = nat_show_workers_command_fn,
};
/*?
@@ -1930,7 +2017,7 @@ VLIB_CLI_COMMAND (nat44_ei_show_interface_address_command, static) = {
?*/
VLIB_CLI_COMMAND (nat44_ei_show_sessions_command, static) = {
.path = "show nat44 ei sessions",
- .short_help = "show nat44 ei sessions [detail]",
+ .short_help = "show nat44 ei sessions [detail] [filter saddr <ip>]",
.function = nat44_ei_show_sessions_command_fn,
};
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_ha.c b/src/plugins/nat/nat44-ei/nat44_ei_ha.c
index 39bce255bd6..9546a595cc2 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_ha.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei_ha.c
@@ -926,14 +926,12 @@ nat_ha_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat_ha_worker_node) = {
.function = nat_ha_worker_fn,
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_INTERRUPT,
.name = "nat44-ei-ha-worker",
};
-/* *INDENT-ON* */
/* periodically send interrupt to each thread */
static uword
@@ -969,13 +967,11 @@ nat_ha_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat_ha_process_node) = {
.function = nat_ha_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "nat44-ei-ha-process",
};
-/* *INDENT-ON* */
void
nat_ha_get_resync_status (u8 * in_resync, u32 * resync_ack_missed)
@@ -1166,7 +1162,6 @@ nat_ha_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat_ha_node) = {
.function = nat_ha_node_fn,
.name = "nat44-ei-ha",
@@ -1181,7 +1176,6 @@ VLIB_REGISTER_NODE (nat_ha_node) = {
[NAT_HA_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
typedef struct
{
@@ -1286,7 +1280,6 @@ nat_ha_resync (u32 client_index, u32 pid,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat_ha_handoff_node) = {
.function = nat_ha_handoff_node_fn,
.name = "nat44-ei-ha-handoff",
@@ -1300,7 +1293,6 @@ VLIB_REGISTER_NODE (nat_ha_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_ha_doc.md b/src/plugins/nat/nat44-ei/nat44_ei_ha_doc.md
deleted file mode 100644
index f0ea209e250..00000000000
--- a/src/plugins/nat/nat44-ei/nat44_ei_ha_doc.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# Active-Passive NAT HA {#nat_ha_doc}
-
-## Introduction
-
-One NAT node actively manages traffic while the other is synchronized and ready to transition to the active state and takes over seamlessly and enforces the same NAT sessions when failure occur. Both nodes share the same configuration settings.
-
-## Configuration
-
-### NAT HA protocol
-Session synchronization traffic is distributed through an IPv4 UDP connection. The active node sends NAT HA protocol events to passive node. To achieve reliable transfer NAT HA protocol uses acknowledgement with re-transmission. This require the passive node to respond with an acknowledgement message as it receives the data. The active node keeps a record of each packet it sends and maintains a timer from when the packet was sent. The active node re-transmits a packet if the timer expires before receiving the acknowledgement.
-
-### Topology
-
-The two NAT nodes have a dedicated link (interface GE0/0/3 on both) to synchronize NAT sessions using NAT HA protocol.
-
-```
- +-----------------------+
- | outside network |
- +-----------------------+
- / \
- / \
- / \
- / \
- / \
-+---------+ +---------+
-| GE0/0/1 | Active Passive | GE0/0/1 |
-| | | |
-| GE0/0/3|-------------------|GE0/0/3 |
-| | sync network | |
-| GE0/0/0 | | GE0/0/0 |
-+---------+ +---------+
- \ /
- \ /
- \ /
- \ /
- \ /
- +-----------------------+
- | inside network |
- +-----------------------+
-```
-
-### Active node configuration
-
-```
-set interface ip address GigabitEthernet0/0/1 10.15.7.101/24
-set interface ip address GigabitEthernet0/0/0 172.16.10.101/24
-set interface ip address GigabitEthernet0/0/3 10.0.0.1/24
-set interface state GigabitEthernet0/0/0 up
-set interface state GigabitEthernet0/0/1 up
-set interface state GigabitEthernet0/0/3 up
-set interface nat44 in GigabitEthernet0/0/0 out GigabitEthernet0/0/1
-nat44 add address 10.15.7.100
-nat ha listener 10.0.0.1:1234
-nat ha failover 10.0.0.2:2345
-```
-
-### Passive node configuration
-
-```
-set interface ip address GigabitEthernet0/0/1 10.15.7.102/24
-set interface ip address GigabitEthernet0/0/0 172.16.10.102/24
-set interface ip address GigabitEthernet0/0/3 10.0.0.2/24
-set interface state GigabitEthernet0/0/0 up
-set interface state GigabitEthernet0/0/1 up
-set interface state GigabitEthernet0/0/3 up
-set interface nat44 in GigabitEthernet0/0/0 out GigabitEthernet0/0/1
-nat44 add address 10.15.7.100
-nat ha listener 10.0.0.2:2345
-```
-
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_ha_doc.rst b/src/plugins/nat/nat44-ei/nat44_ei_ha_doc.rst
new file mode 100644
index 00000000000..46befc52351
--- /dev/null
+++ b/src/plugins/nat/nat44-ei/nat44_ei_ha_doc.rst
@@ -0,0 +1,88 @@
+Active-Passive NAT HA
+=====================
+
+Introduction
+------------
+
+One NAT node actively manages traffic while the other is synchronized
+and ready to transition to the active state and takes over seamlessly
+and enforces the same NAT sessions when failure occur. Both nodes share
+the same configuration settings.
+
+Configuration
+-------------
+
+NAT HA protocol
+~~~~~~~~~~~~~~~
+
+Session synchronization traffic is distributed through an IPv4 UDP
+connection. The active node sends NAT HA protocol events to passive
+node. To achieve reliable transfer NAT HA protocol uses acknowledgment
+with re-transmission. This require the passive node to respond with an
+acknowledgment message as it receives the data. The active node keeps a
+record of each packet it sends and maintains a timer from when the
+packet was sent. The active node re-transmits a packet if the timer
+expires before receiving the acknowledgment.
+
+Topology
+~~~~~~~~
+
+The two NAT nodes have a dedicated link (interface GE0/0/3 on both) to
+synchronize NAT sessions using NAT HA protocol.
+
+::
+
+ +-----------------------+
+ | outside network |
+ +-----------------------+
+ / \
+ / \
+ / \
+ / \
+ / \
+ +---------+ +---------+
+ | GE0/0/1 | Active Passive | GE0/0/1 |
+ | | | |
+ | GE0/0/3|-------------------|GE0/0/3 |
+ | | sync network | |
+ | GE0/0/0 | | GE0/0/0 |
+ +---------+ +---------+
+ \ /
+ \ /
+ \ /
+ \ /
+ \ /
+ +-----------------------+
+ | inside network |
+ +-----------------------+
+
+Active node configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ set interface ip address GigabitEthernet0/0/1 10.15.7.101/24
+ set interface ip address GigabitEthernet0/0/0 172.16.10.101/24
+ set interface ip address GigabitEthernet0/0/3 10.0.0.1/24
+ set interface state GigabitEthernet0/0/0 up
+ set interface state GigabitEthernet0/0/1 up
+ set interface state GigabitEthernet0/0/3 up
+ set interface nat44 in GigabitEthernet0/0/0 out GigabitEthernet0/0/1
+ nat44 add address 10.15.7.100
+ nat ha listener 10.0.0.1:1234
+ nat ha failover 10.0.0.2:2345
+
+Passive node configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ set interface ip address GigabitEthernet0/0/1 10.15.7.102/24
+ set interface ip address GigabitEthernet0/0/0 172.16.10.102/24
+ set interface ip address GigabitEthernet0/0/3 10.0.0.2/24
+ set interface state GigabitEthernet0/0/0 up
+ set interface state GigabitEthernet0/0/1 up
+ set interface state GigabitEthernet0/0/3 up
+ set interface nat44 in GigabitEthernet0/0/0 out GigabitEthernet0/0/1
+ nat44 add address 10.15.7.100
+ nat ha listener 10.0.0.2:2345
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_hairpinning.c b/src/plugins/nat/nat44-ei/nat44_ei_hairpinning.c
deleted file mode 100644
index a049e4659a7..00000000000
--- a/src/plugins/nat/nat44-ei/nat44_ei_hairpinning.c
+++ /dev/null
@@ -1,756 +0,0 @@
-/*
- * nat44_ei.c - nat44 endpoint dependent plugin
- * * Copyright (c) 2020 Cisco and/or its affiliates. * Licensed under the
- * Apache License, Version 2.0 (the "License"); you may not use this file
- * except in compliance with the License. You may obtain a copy of the License
- * at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- * License for the specific language governing permissions and limitations
- * under the License.
- */
-
-#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <vnet/fib/ip4_fib.h>
-
-#include <nat/nat44-ei/nat44_ei.h>
-#include <nat/nat44-ei/nat44_ei_inlines.h>
-#include <nat/nat44-ei/nat44_ei_hairpinning.h>
-
-/* NAT buffer flags */
-#define NAT44_EI_FLAG_HAIRPINNING (1 << 0)
-
-typedef enum
-{
- NAT44_EI_HAIRPIN_SRC_NEXT_DROP,
- NAT44_EI_HAIRPIN_SRC_NEXT_SNAT_IN2OUT,
- NAT44_EI_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH,
- NAT44_EI_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT,
- NAT44_EI_HAIRPIN_SRC_N_NEXT,
-} nat44_ei_hairpin_src_next_t;
-
-typedef enum
-{
- NAT44_EI_HAIRPIN_NEXT_LOOKUP,
- NAT44_EI_HAIRPIN_NEXT_DROP,
- NAT44_EI_HAIRPIN_NEXT_HANDOFF,
- NAT44_EI_HAIRPIN_N_NEXT,
-} nat44_ei_hairpin_next_t;
-
-typedef struct
-{
- ip4_address_t addr;
- u16 port;
- u32 fib_index;
- u32 session_index;
-} nat44_ei_hairpin_trace_t;
-
-static u8 *
-format_nat44_ei_hairpin_trace (u8 *s, va_list *args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- nat44_ei_hairpin_trace_t *t = va_arg (*args, nat44_ei_hairpin_trace_t *);
-
- s = format (s, "new dst addr %U port %u fib-index %u", format_ip4_address,
- &t->addr, clib_net_to_host_u16 (t->port), t->fib_index);
- if (~0 == t->session_index)
- {
- s = format (s, " is-static-mapping");
- }
- else
- {
- s = format (s, " session-index %u", t->session_index);
- }
-
- return s;
-}
-
-extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local;
-
-static_always_inline int
-nat44_ei_is_hairpinning (nat44_ei_main_t *nm, ip4_address_t *dst_addr)
-{
- nat44_ei_address_t *ap;
- clib_bihash_kv_8_8_t kv, value;
-
- vec_foreach (ap, nm->addresses)
- {
- if (ap->addr.as_u32 == dst_addr->as_u32)
- return 1;
- }
-
- init_nat_k (&kv, *dst_addr, 0, 0, 0);
- if (!clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv, &value))
- return 1;
-
- return 0;
-}
-
-#ifndef CLIB_MARCH_VARIANT
-void
-nat44_ei_hairpinning_sm_unknown_proto (nat44_ei_main_t *nm, vlib_buffer_t *b,
- ip4_header_t *ip)
-{
- clib_bihash_kv_8_8_t kv, value;
- nat44_ei_static_mapping_t *m;
- u32 old_addr, new_addr;
- ip_csum_t sum;
-
- init_nat_k (&kv, ip->dst_address, 0, 0, 0);
- if (clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv, &value))
- return;
-
- m = pool_elt_at_index (nm->static_mappings, value.value);
-
- old_addr = ip->dst_address.as_u32;
- new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
- sum = ip->checksum;
- sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
- ip->checksum = ip_csum_fold (sum);
-
- if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
- vnet_buffer (b)->sw_if_index[VLIB_TX] = m->fib_index;
-}
-#endif
-
-#ifndef CLIB_MARCH_VARIANT
-int
-nat44_ei_hairpinning (vlib_main_t *vm, vlib_node_runtime_t *node,
- nat44_ei_main_t *nm, u32 thread_index, vlib_buffer_t *b0,
- ip4_header_t *ip0, udp_header_t *udp0,
- tcp_header_t *tcp0, u32 proto0, int do_trace,
- u32 *required_thread_index)
-{
- nat44_ei_session_t *s0 = NULL;
- clib_bihash_kv_8_8_t kv0, value0;
- ip_csum_t sum0;
- u32 new_dst_addr0 = 0, old_dst_addr0, si = ~0;
- u16 new_dst_port0 = ~0, old_dst_port0;
- int rv;
- ip4_address_t sm0_addr;
- u16 sm0_port;
- u32 sm0_fib_index;
- u32 old_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
-
- /* Check if destination is static mappings */
- if (!nat44_ei_static_mapping_match (
- ip0->dst_address, udp0->dst_port, nm->outside_fib_index, proto0,
- &sm0_addr, &sm0_port, &sm0_fib_index, 1 /* by external */, 0, 0))
- {
- new_dst_addr0 = sm0_addr.as_u32;
- new_dst_port0 = sm0_port;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = sm0_fib_index;
- }
- /* or active session */
- else
- {
- init_nat_k (&kv0, ip0->dst_address, udp0->dst_port,
- nm->outside_fib_index, proto0);
- rv = clib_bihash_search_8_8 (&nm->out2in, &kv0, &value0);
- if (rv)
- {
- rv = 0;
- goto trace;
- }
-
- if (thread_index != nat_value_get_thread_index (&value0))
- {
- *required_thread_index = nat_value_get_thread_index (&value0);
- return 0;
- }
-
- si = nat_value_get_session_index (&value0);
- s0 = pool_elt_at_index (nm->per_thread_data[thread_index].sessions, si);
- new_dst_addr0 = s0->in2out.addr.as_u32;
- new_dst_port0 = s0->in2out.port;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
- }
-
- /* Check if anything has changed and if not, then return 0. This
- helps avoid infinite loop, repeating the three nodes
- nat44-hairpinning-->ip4-lookup-->ip4-local, in case nothing has
- changed. */
- old_dst_addr0 = ip0->dst_address.as_u32;
- old_dst_port0 = tcp0->dst;
- if (new_dst_addr0 == old_dst_addr0 && new_dst_port0 == old_dst_port0 &&
- vnet_buffer (b0)->sw_if_index[VLIB_TX] == old_sw_if_index)
- return 0;
-
- /* Destination is behind the same NAT, use internal address and port */
- if (new_dst_addr0)
- {
- old_dst_addr0 = ip0->dst_address.as_u32;
- ip0->dst_address.as_u32 = new_dst_addr0;
- sum0 = ip0->checksum;
- sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, ip4_header_t,
- dst_address);
- ip0->checksum = ip_csum_fold (sum0);
-
- old_dst_port0 = tcp0->dst;
- if (PREDICT_TRUE (new_dst_port0 != old_dst_port0))
- {
- if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
- {
- tcp0->dst = new_dst_port0;
- sum0 = tcp0->checksum;
- sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
- ip4_header_t, dst_address);
- sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
- ip4_header_t /* cheat */, length);
- tcp0->checksum = ip_csum_fold (sum0);
- }
- else
- {
- udp0->dst_port = new_dst_port0;
- udp0->checksum = 0;
- }
- }
- else
- {
- if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
- {
- sum0 = tcp0->checksum;
- sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
- ip4_header_t, dst_address);
- tcp0->checksum = ip_csum_fold (sum0);
- }
- }
- rv = 1;
- goto trace;
- }
- rv = 0;
-trace:
- if (do_trace && PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
- (b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- nat44_ei_hairpin_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
- t->addr.as_u32 = new_dst_addr0;
- t->port = new_dst_port0;
- t->fib_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
- if (s0)
- {
- t->session_index = si;
- }
- else
- {
- t->session_index = ~0;
- }
- }
- return rv;
-}
-#endif
-
-#ifndef CLIB_MARCH_VARIANT
-u32
-nat44_ei_icmp_hairpinning (nat44_ei_main_t *nm, vlib_buffer_t *b0,
- u32 thread_index, ip4_header_t *ip0,
- icmp46_header_t *icmp0, u32 *required_thread_index)
-{
- clib_bihash_kv_8_8_t kv0, value0;
- u32 old_dst_addr0, new_dst_addr0;
- u32 old_addr0, new_addr0;
- u16 old_port0, new_port0;
- u16 old_checksum0, new_checksum0;
- u32 si, ti = 0;
- ip_csum_t sum0;
- nat44_ei_session_t *s0;
- nat44_ei_static_mapping_t *m0;
-
- if (icmp_type_is_error_message (
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
- {
- ip4_header_t *inner_ip0 = 0;
- tcp_udp_header_t *l4_header = 0;
-
- inner_ip0 = (ip4_header_t *) ((icmp_echo_header_t *) (icmp0 + 1) + 1);
- l4_header = ip4_next_header (inner_ip0);
- u32 protocol = ip_proto_to_nat_proto (inner_ip0->protocol);
-
- if (protocol != NAT_PROTOCOL_TCP && protocol != NAT_PROTOCOL_UDP)
- return 1;
-
- init_nat_k (&kv0, ip0->dst_address, l4_header->src_port,
- nm->outside_fib_index, protocol);
- if (clib_bihash_search_8_8 (&nm->out2in, &kv0, &value0))
- return 1;
- ti = nat_value_get_thread_index (&value0);
- if (ti != thread_index)
- {
- *required_thread_index = ti;
- return 1;
- }
- si = nat_value_get_session_index (&value0);
- s0 = pool_elt_at_index (nm->per_thread_data[ti].sessions, si);
- new_dst_addr0 = s0->in2out.addr.as_u32;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
-
- /* update inner source IP address */
- old_addr0 = inner_ip0->src_address.as_u32;
- inner_ip0->src_address.as_u32 = new_dst_addr0;
- new_addr0 = inner_ip0->src_address.as_u32;
- sum0 = icmp0->checksum;
- sum0 =
- ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, src_address);
- icmp0->checksum = ip_csum_fold (sum0);
-
- /* update inner IP header checksum */
- old_checksum0 = inner_ip0->checksum;
- sum0 = inner_ip0->checksum;
- sum0 =
- ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, src_address);
- inner_ip0->checksum = ip_csum_fold (sum0);
- new_checksum0 = inner_ip0->checksum;
- sum0 = icmp0->checksum;
- sum0 = ip_csum_update (sum0, old_checksum0, new_checksum0, ip4_header_t,
- checksum);
- icmp0->checksum = ip_csum_fold (sum0);
-
- /* update inner source port */
- old_port0 = l4_header->src_port;
- l4_header->src_port = s0->in2out.port;
- new_port0 = l4_header->src_port;
- sum0 = icmp0->checksum;
- sum0 = ip_csum_update (sum0, old_port0, new_port0, tcp_udp_header_t,
- src_port);
- icmp0->checksum = ip_csum_fold (sum0);
- }
- else
- {
- init_nat_k (&kv0, ip0->dst_address, 0, nm->outside_fib_index, 0);
- if (clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv0,
- &value0))
- {
- icmp_echo_header_t *echo0 = (icmp_echo_header_t *) (icmp0 + 1);
- u16 icmp_id0 = echo0->identifier;
- init_nat_k (&kv0, ip0->dst_address, icmp_id0, nm->outside_fib_index,
- NAT_PROTOCOL_ICMP);
- int rv = clib_bihash_search_8_8 (&nm->out2in, &kv0, &value0);
- if (!rv)
- {
- ti = nat_value_get_thread_index (&value0);
- if (ti != thread_index)
- {
- *required_thread_index = ti;
- return 1;
- }
- si = nat_value_get_session_index (&value0);
- s0 = pool_elt_at_index (nm->per_thread_data[ti].sessions, si);
- new_dst_addr0 = s0->in2out.addr.as_u32;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
- echo0->identifier = s0->in2out.port;
- sum0 = icmp0->checksum;
- sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
- icmp_echo_header_t, identifier);
- icmp0->checksum = ip_csum_fold (sum0);
- goto change_addr;
- }
-
- return 1;
- }
-
- m0 = pool_elt_at_index (nm->static_mappings, value0.value);
-
- new_dst_addr0 = m0->local_addr.as_u32;
- if (vnet_buffer (b0)->sw_if_index[VLIB_TX] == ~0)
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = m0->fib_index;
- }
-change_addr:
- /* Destination is behind the same NAT, use internal address and port */
- if (new_dst_addr0)
- {
- old_dst_addr0 = ip0->dst_address.as_u32;
- ip0->dst_address.as_u32 = new_dst_addr0;
- sum0 = ip0->checksum;
- sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, ip4_header_t,
- dst_address);
- ip0->checksum = ip_csum_fold (sum0);
- }
- return 0;
-}
-#endif
-
-void nat44_ei_hairpinning_unknown_proto (nat44_ei_main_t *nm, vlib_buffer_t *b,
- ip4_header_t *ip);
-
-#ifndef CLIB_MARCH_VARIANT
-void
-nat44_ei_hairpinning_unknown_proto (nat44_ei_main_t *nm, vlib_buffer_t *b,
- ip4_header_t *ip)
-{
- clib_bihash_kv_8_8_t kv, value;
- nat44_ei_static_mapping_t *m;
- u32 old_addr, new_addr;
- ip_csum_t sum;
-
- init_nat_k (&kv, ip->dst_address, 0, 0, 0);
- if (clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv, &value))
- return;
-
- m = pool_elt_at_index (nm->static_mappings, value.value);
-
- old_addr = ip->dst_address.as_u32;
- new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
- sum = ip->checksum;
- sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
- ip->checksum = ip_csum_fold (sum);
-
- if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
- vnet_buffer (b)->sw_if_index[VLIB_TX] = m->fib_index;
-}
-#endif
-
-VLIB_NODE_FN (nat44_ei_hairpin_src_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- u32 n_left_from, *from, *to_next;
- nat44_ei_hairpin_src_next_t next_index;
- nat44_ei_main_t *nm = &nat44_ei_main;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- vlib_buffer_t *b0;
- u32 next0;
- nat44_ei_interface_t *i;
- u32 sw_if_index0;
-
- /* speculatively enqueue b0 to the current next frame */
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-
- pool_foreach (i, nm->output_feature_interfaces)
- {
- /* Only packets from NAT inside interface */
- if ((nat44_ei_interface_is_inside (i)) &&
- (sw_if_index0 == i->sw_if_index))
- {
- if (PREDICT_FALSE ((vnet_buffer (b0)->snat.flags) &
- NAT44_EI_FLAG_HAIRPINNING))
- {
- if (PREDICT_TRUE (nm->num_workers > 1))
- {
- next0 = NAT44_EI_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH;
- goto skip_feature_next;
- }
- else
- {
- next0 = NAT44_EI_HAIRPIN_SRC_NEXT_SNAT_IN2OUT;
- goto skip_feature_next;
- }
- }
- break;
- }
- }
-
- vnet_feature_next (&next0, b0);
- skip_feature_next:
-
- if (next0 != NAT44_EI_HAIRPIN_SRC_NEXT_DROP)
- {
- vlib_increment_simple_counter (
- &nm->counters.hairpinning, vm->thread_index, sw_if_index0, 1);
- }
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (nat44_ei_hairpin_dst_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- u32 n_left_from, *from, *to_next;
- u32 thread_index = vm->thread_index;
- nat44_ei_hairpin_next_t next_index;
- nat44_ei_main_t *nm = &nat44_ei_main;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- vlib_buffer_t *b0;
- u32 next0;
- ip4_header_t *ip0;
- u32 proto0;
- u32 sw_if_index0;
- u32 required_thread_index = thread_index;
-
- /* speculatively enqueue b0 to the current next frame */
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- next0 = NAT44_EI_HAIRPIN_NEXT_LOOKUP;
- ip0 = vlib_buffer_get_current (b0);
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-
- proto0 = ip_proto_to_nat_proto (ip0->protocol);
-
- vnet_buffer (b0)->snat.flags = 0;
- if (PREDICT_FALSE (nat44_ei_is_hairpinning (nm, &ip0->dst_address)))
- {
- if (proto0 == NAT_PROTOCOL_TCP || proto0 == NAT_PROTOCOL_UDP)
- {
- udp_header_t *udp0 = ip4_next_header (ip0);
- tcp_header_t *tcp0 = (tcp_header_t *) udp0;
-
- nat44_ei_hairpinning (vm, node, nm, thread_index, b0, ip0,
- udp0, tcp0, proto0, 1 /* do_trace */,
- &required_thread_index);
- }
- else if (proto0 == NAT_PROTOCOL_ICMP)
- {
- icmp46_header_t *icmp0 = ip4_next_header (ip0);
-
- nat44_ei_icmp_hairpinning (nm, b0, thread_index, ip0, icmp0,
- &required_thread_index);
- }
- else
- {
- nat44_ei_hairpinning_unknown_proto (nm, b0, ip0);
- }
-
- vnet_buffer (b0)->snat.flags = NAT44_EI_FLAG_HAIRPINNING;
- }
-
- if (thread_index != required_thread_index)
- {
- vnet_buffer (b0)->snat.required_thread_index =
- required_thread_index;
- next0 = NAT44_EI_HAIRPIN_NEXT_HANDOFF;
- }
-
- if (next0 != NAT44_EI_HAIRPIN_NEXT_DROP)
- {
- vlib_increment_simple_counter (
- &nm->counters.hairpinning, vm->thread_index, sw_if_index0, 1);
- }
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (nat44_ei_hairpinning_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- u32 n_left_from, *from, *to_next;
- u32 thread_index = vm->thread_index;
- nat44_ei_hairpin_next_t next_index;
- nat44_ei_main_t *nm = &nat44_ei_main;
- vnet_feature_main_t *fm = &feature_main;
- u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
- vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- vlib_buffer_t *b0;
- u32 next0;
- ip4_header_t *ip0;
- u32 proto0;
- udp_header_t *udp0;
- tcp_header_t *tcp0;
- u32 sw_if_index0;
- u32 required_thread_index = thread_index;
-
- /* speculatively enqueue b0 to the current next frame */
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- ip0 = vlib_buffer_get_current (b0);
- udp0 = ip4_next_header (ip0);
- tcp0 = (tcp_header_t *) udp0;
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-
- proto0 = ip_proto_to_nat_proto (ip0->protocol);
- int next0_resolved = 0;
-
- if (nat44_ei_hairpinning (vm, node, nm, thread_index, b0, ip0, udp0,
- tcp0, proto0, 1 /* do_trace */,
- &required_thread_index))
- {
- next0 = NAT44_EI_HAIRPIN_NEXT_LOOKUP;
- next0_resolved = 1;
- }
-
- if (thread_index != required_thread_index)
- {
- vnet_buffer (b0)->snat.required_thread_index =
- required_thread_index;
- next0 = NAT44_EI_HAIRPIN_NEXT_HANDOFF;
- next0_resolved = 1;
- }
-
- if (!next0_resolved)
- vnet_get_config_data (&cm->config_main, &b0->current_config_index,
- &next0, 0);
-
- if (next0 != NAT44_EI_HAIRPIN_NEXT_DROP)
- {
- vlib_increment_simple_counter (
- &nm->counters.hairpinning, vm->thread_index, sw_if_index0, 1);
- }
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (nat44_ei_hairpinning_dst_handoff_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- return nat44_ei_hairpinning_handoff_fn_inline (
- vm, node, frame, nat44_ei_main.hairpin_dst_fq_index);
-}
-
-VLIB_NODE_FN (nat44_ei_hairpinning_handoff_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- return nat44_ei_hairpinning_handoff_fn_inline (
- vm, node, frame, nat44_ei_main.hairpinning_fq_index);
-}
-
-VLIB_REGISTER_NODE (nat44_ei_hairpinning_dst_handoff_node) = {
- .name = "nat44-ei-hairpin-dst-handoff",
- .vector_size = sizeof (u32),
- .n_errors = ARRAY_LEN(nat44_ei_hairpinning_handoff_error_strings),
- .error_strings = nat44_ei_hairpinning_handoff_error_strings,
- .format_trace = format_nat44_ei_hairpinning_handoff_trace,
-
- .n_next_nodes = 1,
-
- .next_nodes = {
- [0] = "error-drop",
- },
-};
-
-VLIB_REGISTER_NODE (nat44_ei_hairpinning_handoff_node) = {
- .name = "nat44-ei-hairpinning-handoff",
- .vector_size = sizeof (u32),
- .n_errors = ARRAY_LEN(nat44_ei_hairpinning_handoff_error_strings),
- .error_strings = nat44_ei_hairpinning_handoff_error_strings,
- .format_trace = format_nat44_ei_hairpinning_handoff_trace,
-
- .n_next_nodes = 1,
-
- .next_nodes = {
- [0] = "error-drop",
- },
-};
-
-VLIB_REGISTER_NODE (nat44_ei_hairpin_src_node) = {
- .name = "nat44-ei-hairpin-src",
- .vector_size = sizeof (u32),
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_next_nodes = NAT44_EI_HAIRPIN_SRC_N_NEXT,
- .next_nodes = {
- [NAT44_EI_HAIRPIN_SRC_NEXT_DROP] = "error-drop",
- [NAT44_EI_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-ei-in2out-output",
- [NAT44_EI_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output",
- [NAT44_EI_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-ei-in2out-output-worker-handoff",
- },
-};
-
-VLIB_REGISTER_NODE (nat44_ei_hairpin_dst_node) = {
- .name = "nat44-ei-hairpin-dst",
- .vector_size = sizeof (u32),
- .type = VLIB_NODE_TYPE_INTERNAL,
- .format_trace = format_nat44_ei_hairpin_trace,
- .n_next_nodes = NAT44_EI_HAIRPIN_N_NEXT,
- .next_nodes = {
- [NAT44_EI_HAIRPIN_NEXT_DROP] = "error-drop",
- [NAT44_EI_HAIRPIN_NEXT_LOOKUP] = "ip4-lookup",
- [NAT44_EI_HAIRPIN_NEXT_HANDOFF] = "nat44-ei-hairpin-dst-handoff",
- },
-};
-
-VLIB_REGISTER_NODE (nat44_ei_hairpinning_node) = {
- .name = "nat44-ei-hairpinning",
- .vector_size = sizeof (u32),
- .type = VLIB_NODE_TYPE_INTERNAL,
- .format_trace = format_nat44_ei_hairpin_trace,
- .n_next_nodes = NAT44_EI_HAIRPIN_N_NEXT,
- .next_nodes = {
- [NAT44_EI_HAIRPIN_NEXT_DROP] = "error-drop",
- [NAT44_EI_HAIRPIN_NEXT_LOOKUP] = "ip4-lookup",
- [NAT44_EI_HAIRPIN_NEXT_HANDOFF] = "nat44-ei-hairpinning-handoff",
- },
-};
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_hairpinning.h b/src/plugins/nat/nat44-ei/nat44_ei_hairpinning.h
deleted file mode 100644
index 908e6b2cfc9..00000000000
--- a/src/plugins/nat/nat44-ei/nat44_ei_hairpinning.h
+++ /dev/null
@@ -1,92 +0,0 @@
-#ifndef __included_nat44_ei_hairpinning_h__
-#define __included_nat44_ei_hairpinning_h__
-
-#include <nat/nat44-ei/nat44_ei.h>
-
-#define foreach_nat44_ei_hairpinning_handoff_error \
- _ (CONGESTION_DROP, "congestion drop")
-
-typedef enum
-{
-#define _(sym, str) NAT44_EI_HAIRPINNING_HANDOFF_ERROR_##sym,
- foreach_nat44_ei_hairpinning_handoff_error
-#undef _
- NAT44_EI_HAIRPINNING_HANDOFF_N_ERROR,
-} nat44_ei_hairpinning_handoff_error_t;
-
-static char *nat44_ei_hairpinning_handoff_error_strings[] = {
-#define _(sym, string) string,
- foreach_nat44_ei_hairpinning_handoff_error
-#undef _
-};
-
-typedef struct
-{
- u32 next_worker_index;
-} nat44_ei_hairpinning_handoff_trace_t;
-
-static u8 *
-format_nat44_ei_hairpinning_handoff_trace (u8 *s, va_list *args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- nat44_ei_hairpinning_handoff_trace_t *t =
- va_arg (*args, nat44_ei_hairpinning_handoff_trace_t *);
-
- s = format (s, "nat44-ei-hairpinning-handoff: next-worker %d",
- t->next_worker_index);
-
- return s;
-}
-
-always_inline uword
-nat44_ei_hairpinning_handoff_fn_inline (vlib_main_t *vm,
- vlib_node_runtime_t *node,
- vlib_frame_t *frame, u32 fq_index)
-{
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
- u32 n_enq, n_left_from, *from;
- u16 thread_indices[VLIB_FRAME_SIZE], *ti;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- vlib_get_buffers (vm, from, bufs, n_left_from);
-
- b = bufs;
- ti = thread_indices;
-
- while (n_left_from > 0)
- {
- ti[0] = vnet_buffer (b[0])->snat.required_thread_index;
-
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
- (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
- {
- nat44_ei_hairpinning_handoff_trace_t *t =
- vlib_add_trace (vm, node, b[0], sizeof (*t));
- t->next_worker_index = ti[0];
- }
-
- n_left_from -= 1;
- ti += 1;
- b += 1;
- }
- n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
- thread_indices, frame->n_vectors, 1);
-
- if (n_enq < frame->n_vectors)
- vlib_node_increment_counter (
- vm, node->node_index, NAT44_EI_HAIRPINNING_HANDOFF_ERROR_CONGESTION_DROP,
- frame->n_vectors - n_enq);
- return frame->n_vectors;
-}
-
-#endif // __included_nat44_ei_hairpinning_h__
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_handoff.c b/src/plugins/nat/nat44-ei/nat44_ei_handoff.c
index c7a1317026b..f1821d7721f 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_handoff.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei_handoff.c
@@ -15,7 +15,6 @@
#include <vlib/vlib.h>
#include <vnet/vnet.h>
-#include <vnet/handoff.h>
#include <vnet/fib/ip4_fib.h>
#include <vppinfra/error.h>
@@ -83,8 +82,6 @@ nat44_ei_worker_handoff_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_get_buffers (vm, from, b, n_left_from);
- // TODO: move to nm
- // TODO: remove callbacks and use inlines that should be moved here
if (is_in2out)
{
fq_index = is_output ? nm->fq_in2out_output_index : nm->fq_in2out_index;
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c
index 7ac1a92a61b..01b333a5234 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c
@@ -34,50 +34,8 @@
#include <nat/lib/nat_inlines.h>
#include <nat/nat44-ei/nat44_ei_inlines.h>
#include <nat/nat44-ei/nat44_ei.h>
-#include <nat/nat44-ei/nat44_ei_hairpinning.h>
-typedef struct
-{
- u32 sw_if_index;
- u32 next_index;
- u32 session_index;
- u32 is_slow_path;
- u32 is_hairpinning;
-} nat44_ei_in2out_trace_t;
-
-/* packet trace format function */
-static u8 *
-format_nat44_ei_in2out_trace (u8 *s, va_list *args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- nat44_ei_in2out_trace_t *t = va_arg (*args, nat44_ei_in2out_trace_t *);
- char *tag;
-
- tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
-
- s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
- t->sw_if_index, t->next_index, t->session_index);
- if (t->is_hairpinning)
- {
- s = format (s, ", with-hairpinning");
- }
-
- return s;
-}
-
-static u8 *
-format_nat44_ei_in2out_fast_trace (u8 *s, va_list *args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- nat44_ei_in2out_trace_t *t = va_arg (*args, nat44_ei_in2out_trace_t *);
-
- s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
- t->sw_if_index, t->next_index);
-
- return s;
-}
+extern vnet_feature_arc_registration_t vnet_feat_arc_ip4_local;
#define foreach_nat44_ei_in2out_error \
_ (UNSUPPORTED_PROTOCOL, "unsupported protocol") \
@@ -88,6 +46,9 @@ format_nat44_ei_in2out_fast_trace (u8 *s, va_list *args)
_ (MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded") \
_ (CANNOT_CREATE_USER, "cannot create NAT user")
+#define foreach_nat44_ei_hairpinning_handoff_error \
+ _ (CONGESTION_DROP, "congestion drop")
+
typedef enum
{
#define _(sym, str) NAT44_EI_IN2OUT_ERROR_##sym,
@@ -104,6 +65,20 @@ static char *nat44_ei_in2out_error_strings[] = {
typedef enum
{
+#define _(sym, str) NAT44_EI_HAIRPINNING_HANDOFF_ERROR_##sym,
+ foreach_nat44_ei_hairpinning_handoff_error
+#undef _
+ NAT44_EI_HAIRPINNING_HANDOFF_N_ERROR,
+} nat44_ei_hairpinning_handoff_error_t;
+
+static char *nat44_ei_hairpinning_handoff_error_strings[] = {
+#define _(sym, string) string,
+ foreach_nat44_ei_hairpinning_handoff_error
+#undef _
+};
+
+typedef enum
+{
NAT44_EI_IN2OUT_NEXT_LOOKUP,
NAT44_EI_IN2OUT_NEXT_DROP,
NAT44_EI_IN2OUT_NEXT_ICMP_ERROR,
@@ -119,7 +94,98 @@ typedef enum
NAT44_EI_IN2OUT_HAIRPINNING_FINISH_N_NEXT,
} nat44_ei_in2out_hairpinnig_finish_next_t;
-static inline int
+typedef enum
+{
+ NAT44_EI_HAIRPIN_NEXT_LOOKUP,
+ NAT44_EI_HAIRPIN_NEXT_DROP,
+ NAT44_EI_HAIRPIN_NEXT_HANDOFF,
+ NAT44_EI_HAIRPIN_N_NEXT,
+} nat44_ei_hairpin_next_t;
+
+typedef struct
+{
+ u32 sw_if_index;
+ u32 next_index;
+ u32 session_index;
+ u32 is_slow_path;
+ u32 is_hairpinning;
+} nat44_ei_in2out_trace_t;
+
+typedef struct
+{
+ ip4_address_t addr;
+ u16 port;
+ u32 fib_index;
+ u32 session_index;
+} nat44_ei_hairpin_trace_t;
+
+typedef struct
+{
+ u32 next_worker_index;
+} nat44_ei_hairpinning_handoff_trace_t;
+
+static u8 *
+format_nat44_ei_in2out_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ nat44_ei_in2out_trace_t *t = va_arg (*args, nat44_ei_in2out_trace_t *);
+ char *tag;
+ tag = t->is_slow_path ? "NAT44_IN2OUT_SLOW_PATH" : "NAT44_IN2OUT_FAST_PATH";
+ s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag,
+ t->sw_if_index, t->next_index, t->session_index);
+ if (t->is_hairpinning)
+ s = format (s, ", with-hairpinning");
+ return s;
+}
+
+static u8 *
+format_nat44_ei_in2out_fast_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ nat44_ei_in2out_trace_t *t = va_arg (*args, nat44_ei_in2out_trace_t *);
+ s = format (s, "NAT44_IN2OUT_FAST: sw_if_index %d, next index %d",
+ t->sw_if_index, t->next_index);
+ return s;
+}
+
+static u8 *
+format_nat44_ei_hairpin_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ nat44_ei_hairpin_trace_t *t = va_arg (*args, nat44_ei_hairpin_trace_t *);
+
+ s = format (s, "new dst addr %U port %u fib-index %u", format_ip4_address,
+ &t->addr, clib_net_to_host_u16 (t->port), t->fib_index);
+ if (~0 == t->session_index)
+ {
+ s = format (s, " is-static-mapping");
+ }
+ else
+ {
+ s = format (s, " session-index %u", t->session_index);
+ }
+
+ return s;
+}
+
+static u8 *
+format_nat44_ei_hairpinning_handoff_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ nat44_ei_hairpinning_handoff_trace_t *t =
+ va_arg (*args, nat44_ei_hairpinning_handoff_trace_t *);
+
+ s = format (s, "nat44-ei-hairpinning-handoff: next-worker %d",
+ t->next_worker_index);
+
+ return s;
+}
+
+static_always_inline int
nat44_ei_not_translate_fast (vlib_node_runtime_t *node, u32 sw_if_index0,
ip4_header_t *ip0, u32 proto0, u32 rx_fib_index0)
{
@@ -177,7 +243,7 @@ nat44_ei_not_translate_fast (vlib_node_runtime_t *node, u32 sw_if_index0,
return 1;
}
-static inline int
+static_always_inline int
nat44_ei_not_translate (nat44_ei_main_t *nm, vlib_node_runtime_t *node,
u32 sw_if_index0, ip4_header_t *ip0, u32 proto0,
u32 rx_fib_index0, u32 thread_index)
@@ -212,7 +278,7 @@ nat44_ei_not_translate (nat44_ei_main_t *nm, vlib_node_runtime_t *node,
rx_fib_index0);
}
-static inline int
+static_always_inline int
nat44_ei_not_translate_output_feature (nat44_ei_main_t *nm, ip4_header_t *ip0,
u32 proto0, u16 src_port, u16 dst_port,
u32 thread_index, u32 sw_if_index)
@@ -271,13 +337,10 @@ nat44_i2o_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void *arg)
if (clib_bihash_add_del_8_8 (&nm->out2in, &s_kv, 0))
nat_elog_warn (nm, "out2in key del failed");
- nat_ipfix_logging_nat44_ses_delete (ctx->thread_index,
- s->in2out.addr.as_u32,
- s->out2in.addr.as_u32,
- s->nat_proto,
- s->in2out.port,
- s->out2in.port,
- s->in2out.fib_index);
+ nat_ipfix_logging_nat44_ses_delete (
+ ctx->thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32,
+ nat_proto_to_ip_proto (s->nat_proto), s->in2out.port, s->out2in.port,
+ s->in2out.fib_index);
nat_syslog_nat44_apmdel (s->user_index, s->in2out.fib_index,
&s->in2out.addr, s->in2out.port,
@@ -430,8 +493,9 @@ slow_path (nat44_ei_main_t *nm, vlib_buffer_t *b0, ip4_header_t *ip0,
/* log NAT event */
nat_ipfix_logging_nat44_ses_create (
- thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->nat_proto,
- s->in2out.port, s->out2in.port, s->in2out.fib_index);
+ thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32,
+ nat_proto_to_ip_proto (s->nat_proto), s->in2out.port, s->out2in.port,
+ s->in2out.fib_index);
nat_syslog_nat44_apmadd (s->user_index, s->in2out.fib_index, &s->in2out.addr,
s->in2out.port, &s->out2in.addr, s->out2in.port,
@@ -445,7 +509,6 @@ slow_path (nat44_ei_main_t *nm, vlib_buffer_t *b0, ip4_header_t *ip0,
return next0;
}
-#ifndef CLIB_MARCH_VARIANT
static_always_inline nat44_ei_in2out_error_t
icmp_get_key (vlib_buffer_t *b, ip4_header_t *ip0, ip4_address_t *addr,
u16 *port, nat_protocol_t *nat_proto)
@@ -490,22 +553,7 @@ icmp_get_key (vlib_buffer_t *b, ip4_header_t *ip0, ip4_address_t *addr,
return -1; /* success */
}
-/**
- * Get address and port values to be used for ICMP packet translation
- * and create session if needed
- *
- * @param[in,out] nm NAT main
- * @param[in,out] node NAT node runtime
- * @param[in] thread_index thread index
- * @param[in,out] b0 buffer containing packet to be translated
- * @param[in,out] ip0 ip header
- * @param[out] p_proto protocol used for matching
- * @param[out] p_value address and port after NAT translation
- * @param[out] p_dont_translate if packet should not be translated
- * @param d optional parameter
- * @param e optional parameter
- */
-u32
+static_always_inline u32
nat44_ei_icmp_match_in2out_slow (vlib_node_runtime_t *node, u32 thread_index,
vlib_buffer_t *b0, ip4_header_t *ip0,
ip4_address_t *addr, u16 *port,
@@ -607,10 +655,8 @@ out:
*p_s0 = s0;
return next0;
}
-#endif
-#ifndef CLIB_MARCH_VARIANT
-u32
+static_always_inline u32
nat44_ei_icmp_match_in2out_fast (vlib_node_runtime_t *node, u32 thread_index,
vlib_buffer_t *b0, ip4_header_t *ip0,
ip4_address_t *addr, u16 *port,
@@ -676,16 +722,135 @@ nat44_ei_icmp_match_in2out_fast (vlib_node_runtime_t *node, u32 thread_index,
out:
return next0;
}
-#endif
-u32 nat44_ei_icmp_in2out (vlib_buffer_t *b0, ip4_header_t *ip0,
- icmp46_header_t *icmp0, u32 sw_if_index0,
- u32 rx_fib_index0, vlib_node_runtime_t *node,
- u32 next0, u32 thread_index,
- nat44_ei_session_t **p_s0);
+static_always_inline u32
+nat44_ei_icmp_hairpinning (nat44_ei_main_t *nm, vlib_buffer_t *b0,
+ u32 thread_index, ip4_header_t *ip0,
+ icmp46_header_t *icmp0, u32 *required_thread_index)
+{
+ clib_bihash_kv_8_8_t kv0, value0;
+ u32 old_dst_addr0, new_dst_addr0;
+ u32 old_addr0, new_addr0;
+ u16 old_port0, new_port0;
+ u16 old_checksum0, new_checksum0;
+ u32 si, ti = 0;
+ ip_csum_t sum0;
+ nat44_ei_session_t *s0;
+ nat44_ei_static_mapping_t *m0;
-#ifndef CLIB_MARCH_VARIANT
-u32
+ if (icmp_type_is_error_message (
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags))
+ {
+ ip4_header_t *inner_ip0 = 0;
+ tcp_udp_header_t *l4_header = 0;
+
+ inner_ip0 = (ip4_header_t *) ((icmp_echo_header_t *) (icmp0 + 1) + 1);
+ l4_header = ip4_next_header (inner_ip0);
+ u32 protocol = ip_proto_to_nat_proto (inner_ip0->protocol);
+
+ if (protocol != NAT_PROTOCOL_TCP && protocol != NAT_PROTOCOL_UDP)
+ return 1;
+
+ init_nat_k (&kv0, ip0->dst_address, l4_header->src_port,
+ nm->outside_fib_index, protocol);
+ if (clib_bihash_search_8_8 (&nm->out2in, &kv0, &value0))
+ return 1;
+ ti = nat_value_get_thread_index (&value0);
+ if (ti != thread_index)
+ {
+ *required_thread_index = ti;
+ return 1;
+ }
+ si = nat_value_get_session_index (&value0);
+ s0 = pool_elt_at_index (nm->per_thread_data[ti].sessions, si);
+ new_dst_addr0 = s0->in2out.addr.as_u32;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+
+ /* update inner source IP address */
+ old_addr0 = inner_ip0->src_address.as_u32;
+ inner_ip0->src_address.as_u32 = new_dst_addr0;
+ new_addr0 = inner_ip0->src_address.as_u32;
+ sum0 = icmp0->checksum;
+ sum0 =
+ ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, src_address);
+ icmp0->checksum = ip_csum_fold (sum0);
+
+ /* update inner IP header checksum */
+ old_checksum0 = inner_ip0->checksum;
+ sum0 = inner_ip0->checksum;
+ sum0 =
+ ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, src_address);
+ inner_ip0->checksum = ip_csum_fold (sum0);
+ new_checksum0 = inner_ip0->checksum;
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, old_checksum0, new_checksum0, ip4_header_t,
+ checksum);
+ icmp0->checksum = ip_csum_fold (sum0);
+
+ /* update inner source port */
+ old_port0 = l4_header->src_port;
+ l4_header->src_port = s0->in2out.port;
+ new_port0 = l4_header->src_port;
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, old_port0, new_port0, tcp_udp_header_t,
+ src_port);
+ icmp0->checksum = ip_csum_fold (sum0);
+ }
+ else
+ {
+ init_nat_k (&kv0, ip0->dst_address, 0, nm->outside_fib_index, 0);
+ if (clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv0,
+ &value0))
+ {
+ icmp_echo_header_t *echo0 = (icmp_echo_header_t *) (icmp0 + 1);
+ u16 icmp_id0 = echo0->identifier;
+ init_nat_k (&kv0, ip0->dst_address, icmp_id0, nm->outside_fib_index,
+ NAT_PROTOCOL_ICMP);
+ int rv = clib_bihash_search_8_8 (&nm->out2in, &kv0, &value0);
+ if (!rv)
+ {
+ ti = nat_value_get_thread_index (&value0);
+ if (ti != thread_index)
+ {
+ *required_thread_index = ti;
+ return 1;
+ }
+ si = nat_value_get_session_index (&value0);
+ s0 = pool_elt_at_index (nm->per_thread_data[ti].sessions, si);
+ new_dst_addr0 = s0->in2out.addr.as_u32;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+ echo0->identifier = s0->in2out.port;
+ sum0 = icmp0->checksum;
+ sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port,
+ icmp_echo_header_t, identifier);
+ icmp0->checksum = ip_csum_fold (sum0);
+ goto change_addr;
+ }
+
+ return 1;
+ }
+
+ m0 = pool_elt_at_index (nm->static_mappings, value0.value);
+
+ new_dst_addr0 = m0->local_addr.as_u32;
+ if (vnet_buffer (b0)->sw_if_index[VLIB_TX] == ~0)
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = m0->fib_index;
+ }
+change_addr:
+ /* Destination is behind the same NAT, use internal address and port */
+ if (new_dst_addr0)
+ {
+ old_dst_addr0 = ip0->dst_address.as_u32;
+ ip0->dst_address.as_u32 = new_dst_addr0;
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, ip4_header_t,
+ dst_address);
+ ip0->checksum = ip_csum_fold (sum0);
+ }
+ return 0;
+}
+
+static_always_inline u32
nat44_ei_icmp_in2out (vlib_buffer_t *b0, ip4_header_t *ip0,
icmp46_header_t *icmp0, u32 sw_if_index0,
u32 rx_fib_index0, vlib_node_runtime_t *node, u32 next0,
@@ -856,7 +1021,6 @@ nat44_ei_icmp_in2out (vlib_buffer_t *b0, ip4_header_t *ip0,
out:
return next0;
}
-#endif
static_always_inline u32
nat44_ei_icmp_in2out_slow_path (nat44_ei_main_t *nm, vlib_buffer_t *b0,
@@ -881,6 +1045,31 @@ nat44_ei_icmp_in2out_slow_path (nat44_ei_main_t *nm, vlib_buffer_t *b0,
return next0;
}
+static_always_inline void
+nat44_ei_hairpinning_sm_unknown_proto (nat44_ei_main_t *nm, vlib_buffer_t *b,
+ ip4_header_t *ip)
+{
+ clib_bihash_kv_8_8_t kv, value;
+ nat44_ei_static_mapping_t *m;
+ u32 old_addr, new_addr;
+ ip_csum_t sum;
+
+ init_nat_k (&kv, ip->dst_address, 0, 0, 0);
+ if (clib_bihash_search_8_8 (&nm->static_mapping_by_external, &kv, &value))
+ return;
+
+ m = pool_elt_at_index (nm->static_mappings, value.value);
+
+ old_addr = ip->dst_address.as_u32;
+ new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32;
+ sum = ip->checksum;
+ sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address);
+ ip->checksum = ip_csum_fold (sum);
+
+ if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0)
+ vnet_buffer (b)->sw_if_index[VLIB_TX] = m->fib_index;
+}
+
static int
nat_in2out_sm_unknown_proto (nat44_ei_main_t *nm, vlib_buffer_t *b,
ip4_header_t *ip, u32 rx_fib_index)
@@ -913,7 +1102,174 @@ nat_in2out_sm_unknown_proto (nat44_ei_main_t *nm, vlib_buffer_t *b,
return 0;
}
-static inline uword
+static_always_inline int
+nat44_ei_hairpinning (vlib_main_t *vm, vlib_node_runtime_t *node,
+ nat44_ei_main_t *nm, u32 thread_index, vlib_buffer_t *b0,
+ ip4_header_t *ip0, udp_header_t *udp0,
+ tcp_header_t *tcp0, u32 proto0, int do_trace,
+ u32 *required_thread_index)
+{
+ nat44_ei_session_t *s0 = NULL;
+ clib_bihash_kv_8_8_t kv0, value0;
+ ip_csum_t sum0;
+ u32 new_dst_addr0 = 0, old_dst_addr0, si = ~0;
+ u16 new_dst_port0 = ~0, old_dst_port0;
+ int rv;
+ ip4_address_t sm0_addr;
+ u16 sm0_port;
+ u32 sm0_fib_index;
+ u32 old_sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+ /* Check if destination is static mappings */
+ if (!nat44_ei_static_mapping_match (
+ ip0->dst_address, udp0->dst_port, nm->outside_fib_index, proto0,
+ &sm0_addr, &sm0_port, &sm0_fib_index, 1 /* by external */, 0, 0))
+ {
+ new_dst_addr0 = sm0_addr.as_u32;
+ new_dst_port0 = sm0_port;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = sm0_fib_index;
+ }
+ /* or active session */
+ else
+ {
+ init_nat_k (&kv0, ip0->dst_address, udp0->dst_port,
+ nm->outside_fib_index, proto0);
+ rv = clib_bihash_search_8_8 (&nm->out2in, &kv0, &value0);
+ if (rv)
+ {
+ rv = 0;
+ goto trace;
+ }
+
+ if (thread_index != nat_value_get_thread_index (&value0))
+ {
+ *required_thread_index = nat_value_get_thread_index (&value0);
+ return 0;
+ }
+
+ si = nat_value_get_session_index (&value0);
+ s0 = pool_elt_at_index (nm->per_thread_data[thread_index].sessions, si);
+ new_dst_addr0 = s0->in2out.addr.as_u32;
+ new_dst_port0 = s0->in2out.port;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index;
+ }
+
+ /* Check if anything has changed and if not, then return 0. This
+ helps avoid infinite loop, repeating the three nodes
+ nat44-hairpinning-->ip4-lookup-->ip4-local, in case nothing has
+ changed. */
+ old_dst_addr0 = ip0->dst_address.as_u32;
+ old_dst_port0 = tcp0->dst;
+ if (new_dst_addr0 == old_dst_addr0 && new_dst_port0 == old_dst_port0 &&
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] == old_sw_if_index)
+ return 0;
+
+ /* Destination is behind the same NAT, use internal address and port */
+ if (new_dst_addr0)
+ {
+ old_dst_addr0 = ip0->dst_address.as_u32;
+ ip0->dst_address.as_u32 = new_dst_addr0;
+ sum0 = ip0->checksum;
+ sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0, ip4_header_t,
+ dst_address);
+ ip0->checksum = ip_csum_fold (sum0);
+
+ old_dst_port0 = tcp0->dst;
+ if (PREDICT_TRUE (new_dst_port0 != old_dst_port0))
+ {
+ if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
+ {
+ tcp0->dst = new_dst_port0;
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
+ ip4_header_t, dst_address);
+ sum0 = ip_csum_update (sum0, old_dst_port0, new_dst_port0,
+ ip4_header_t /* cheat */, length);
+ tcp0->checksum = ip_csum_fold (sum0);
+ }
+ else
+ {
+ udp0->dst_port = new_dst_port0;
+ udp0->checksum = 0;
+ }
+ }
+ else
+ {
+ if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
+ {
+ sum0 = tcp0->checksum;
+ sum0 = ip_csum_update (sum0, old_dst_addr0, new_dst_addr0,
+ ip4_header_t, dst_address);
+ tcp0->checksum = ip_csum_fold (sum0);
+ }
+ }
+ rv = 1;
+ goto trace;
+ }
+ rv = 0;
+trace:
+ if (do_trace && PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ nat44_ei_hairpin_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
+ t->addr.as_u32 = new_dst_addr0;
+ t->port = new_dst_port0;
+ t->fib_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ if (s0)
+ {
+ t->session_index = si;
+ }
+ else
+ {
+ t->session_index = ~0;
+ }
+ }
+ return rv;
+}
+
+static_always_inline uword
+nat44_ei_hairpinning_handoff_fn_inline (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame, u32 fq_index)
+{
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+ u32 n_enq, n_left_from, *from;
+ u16 thread_indices[VLIB_FRAME_SIZE], *ti;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+
+ b = bufs;
+ ti = thread_indices;
+
+ while (n_left_from > 0)
+ {
+ ti[0] = vnet_buffer (b[0])->snat.required_thread_index;
+
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ nat44_ei_hairpinning_handoff_trace_t *t =
+ vlib_add_trace (vm, node, b[0], sizeof (*t));
+ t->next_worker_index = ti[0];
+ }
+
+ n_left_from -= 1;
+ ti += 1;
+ b += 1;
+ }
+ n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
+ thread_indices, frame->n_vectors, 1);
+
+ if (n_enq < frame->n_vectors)
+ vlib_node_increment_counter (
+ vm, node->node_index, NAT44_EI_HAIRPINNING_HANDOFF_ERROR_CONGESTION_DROP,
+ frame->n_vectors - n_enq);
+ return frame->n_vectors;
+}
+
+static_always_inline uword
nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame, int is_slow_path,
int is_output_feature)
@@ -934,7 +1290,9 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
vlib_buffer_t *b0, *b1;
u32 next0, next1;
- u32 sw_if_index0, sw_if_index1;
+ u32 rx_sw_if_index0, rx_sw_if_index1;
+ u32 tx_sw_if_index0, tx_sw_if_index1;
+ u32 cntr_sw_if_index0, cntr_sw_if_index1;
ip4_header_t *ip0, *ip1;
ip_csum_t sum0, sum1;
u32 new_addr0, old_addr0, new_addr1, old_addr1;
@@ -978,13 +1336,16 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp0 = (tcp_header_t *) udp0;
icmp0 = (icmp46_header_t *) udp0;
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ cntr_sw_if_index0 =
+ is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
rx_fib_index0 =
- vec_elt (nm->ip4_main->fib_index_by_sw_if_index, sw_if_index0);
+ vec_elt (nm->ip4_main->fib_index_by_sw_if_index, rx_sw_if_index0);
next0 = next1 = NAT44_EI_IN2OUT_NEXT_LOOKUP;
- if (PREDICT_FALSE (ip0->ttl == 1))
+ if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
{
vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
@@ -1010,19 +1371,19 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (
is_slow_path ? &nm->counters.slowpath.in2out.other :
&nm->counters.fastpath.in2out.other,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
goto trace00;
}
if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
{
next0 = nat44_ei_icmp_in2out_slow_path (
- nm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next0,
- now, thread_index, &s0);
+ nm, b0, ip0, icmp0, rx_sw_if_index0, rx_fib_index0, node,
+ next0, now, thread_index, &s0);
vlib_increment_simple_counter (
is_slow_path ? &nm->counters.slowpath.in2out.icmp :
&nm->counters.fastpath.in2out.icmp,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
goto trace00;
}
}
@@ -1055,7 +1416,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
nm, ip0, proto0,
vnet_buffer (b0)->ip.reass.l4_src_port,
vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
- sw_if_index0)))
+ rx_sw_if_index0)))
goto trace00;
/*
@@ -1073,7 +1434,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
else
{
if (PREDICT_FALSE (nat44_ei_not_translate (
- nm, node, sw_if_index0, ip0, proto0, rx_fib_index0,
+ nm, node, rx_sw_if_index0, ip0, proto0, rx_fib_index0,
thread_index)))
goto trace00;
}
@@ -1131,7 +1492,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (is_slow_path ?
&nm->counters.slowpath.in2out.tcp :
&nm->counters.fastpath.in2out.tcp,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
}
else
{
@@ -1155,7 +1516,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (is_slow_path ?
&nm->counters.slowpath.in2out.udp :
&nm->counters.fastpath.in2out.udp,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
}
/* Accounting */
@@ -1171,7 +1532,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
nat44_ei_in2out_trace_t *t =
vlib_add_trace (vm, node, b0, sizeof (*t));
t->is_slow_path = is_slow_path;
- t->sw_if_index = sw_if_index0;
+ t->sw_if_index = rx_sw_if_index0;
t->next_index = next0;
t->session_index = ~0;
if (s0)
@@ -1183,7 +1544,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (
is_slow_path ? &nm->counters.slowpath.in2out.drops :
&nm->counters.fastpath.in2out.drops,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
}
if (is_output_feature)
@@ -1196,11 +1557,14 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp1 = (tcp_header_t *) udp1;
icmp1 = (icmp46_header_t *) udp1;
- sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ rx_sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ tx_sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_TX];
+ cntr_sw_if_index1 =
+ is_output_feature ? tx_sw_if_index1 : rx_sw_if_index1;
rx_fib_index1 =
- vec_elt (nm->ip4_main->fib_index_by_sw_if_index, sw_if_index1);
+ vec_elt (nm->ip4_main->fib_index_by_sw_if_index, rx_sw_if_index1);
- if (PREDICT_FALSE (ip1->ttl == 1))
+ if (PREDICT_FALSE (!is_output_feature && ip1->ttl == 1))
{
vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
icmp4_error_set_vnet_buffer (b1, ICMP4_time_exceeded,
@@ -1226,19 +1590,19 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (
is_slow_path ? &nm->counters.slowpath.in2out.other :
&nm->counters.fastpath.in2out.other,
- thread_index, sw_if_index1, 1);
+ thread_index, cntr_sw_if_index1, 1);
goto trace01;
}
if (PREDICT_FALSE (proto1 == NAT_PROTOCOL_ICMP))
{
next1 = nat44_ei_icmp_in2out_slow_path (
- nm, b1, ip1, icmp1, sw_if_index1, rx_fib_index1, node, next1,
- now, thread_index, &s1);
+ nm, b1, ip1, icmp1, rx_sw_if_index1, rx_fib_index1, node,
+ next1, now, thread_index, &s1);
vlib_increment_simple_counter (
is_slow_path ? &nm->counters.slowpath.in2out.icmp :
&nm->counters.fastpath.in2out.icmp,
- thread_index, sw_if_index1, 1);
+ thread_index, cntr_sw_if_index1, 1);
goto trace01;
}
}
@@ -1271,7 +1635,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
nm, ip1, proto1,
vnet_buffer (b1)->ip.reass.l4_src_port,
vnet_buffer (b1)->ip.reass.l4_dst_port, thread_index,
- sw_if_index1)))
+ rx_sw_if_index1)))
goto trace01;
/*
@@ -1289,7 +1653,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
else
{
if (PREDICT_FALSE (nat44_ei_not_translate (
- nm, node, sw_if_index1, ip1, proto1, rx_fib_index1,
+ nm, node, rx_sw_if_index1, ip1, proto1, rx_fib_index1,
thread_index)))
goto trace01;
}
@@ -1346,7 +1710,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (is_slow_path ?
&nm->counters.slowpath.in2out.tcp :
&nm->counters.fastpath.in2out.tcp,
- thread_index, sw_if_index1, 1);
+ thread_index, cntr_sw_if_index1, 1);
}
else
{
@@ -1370,7 +1734,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (is_slow_path ?
&nm->counters.slowpath.in2out.udp :
&nm->counters.fastpath.in2out.udp,
- thread_index, sw_if_index1, 1);
+ thread_index, cntr_sw_if_index1, 1);
}
/* Accounting */
@@ -1385,7 +1749,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
nat44_ei_in2out_trace_t *t =
vlib_add_trace (vm, node, b1, sizeof (*t));
- t->sw_if_index = sw_if_index1;
+ t->sw_if_index = rx_sw_if_index1;
t->next_index = next1;
t->session_index = ~0;
if (s1)
@@ -1397,7 +1761,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (
is_slow_path ? &nm->counters.slowpath.in2out.drops :
&nm->counters.fastpath.in2out.drops,
- thread_index, sw_if_index1, 1);
+ thread_index, cntr_sw_if_index1, 1);
}
n_left_from -= 2;
@@ -1410,7 +1774,9 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
vlib_buffer_t *b0;
u32 next0;
- u32 sw_if_index0;
+ u32 rx_sw_if_index0;
+ u32 tx_sw_if_index0;
+ u32 cntr_sw_if_index0;
ip4_header_t *ip0;
ip_csum_t sum0;
u32 new_addr0, old_addr0;
@@ -1438,11 +1804,14 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp0 = (tcp_header_t *) udp0;
icmp0 = (icmp46_header_t *) udp0;
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ rx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ tx_sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ cntr_sw_if_index0 =
+ is_output_feature ? tx_sw_if_index0 : rx_sw_if_index0;
rx_fib_index0 =
- vec_elt (nm->ip4_main->fib_index_by_sw_if_index, sw_if_index0);
+ vec_elt (nm->ip4_main->fib_index_by_sw_if_index, rx_sw_if_index0);
- if (PREDICT_FALSE (ip0->ttl == 1))
+ if (PREDICT_FALSE (!is_output_feature && ip0->ttl == 1))
{
vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
@@ -1468,19 +1837,19 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (
is_slow_path ? &nm->counters.slowpath.in2out.other :
&nm->counters.fastpath.in2out.other,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
goto trace0;
}
if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
{
next0 = nat44_ei_icmp_in2out_slow_path (
- nm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next0,
- now, thread_index, &s0);
+ nm, b0, ip0, icmp0, rx_sw_if_index0, rx_fib_index0, node,
+ next0, now, thread_index, &s0);
vlib_increment_simple_counter (
is_slow_path ? &nm->counters.slowpath.in2out.icmp :
&nm->counters.fastpath.in2out.icmp,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
goto trace0;
}
}
@@ -1513,7 +1882,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
nm, ip0, proto0,
vnet_buffer (b0)->ip.reass.l4_src_port,
vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
- sw_if_index0)))
+ rx_sw_if_index0)))
goto trace0;
/*
@@ -1531,7 +1900,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
else
{
if (PREDICT_FALSE (nat44_ei_not_translate (
- nm, node, sw_if_index0, ip0, proto0, rx_fib_index0,
+ nm, node, rx_sw_if_index0, ip0, proto0, rx_fib_index0,
thread_index)))
goto trace0;
}
@@ -1590,7 +1959,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (is_slow_path ?
&nm->counters.slowpath.in2out.tcp :
&nm->counters.fastpath.in2out.tcp,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
}
else
{
@@ -1615,7 +1984,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (is_slow_path ?
&nm->counters.slowpath.in2out.udp :
&nm->counters.fastpath.in2out.udp,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
}
/* Accounting */
@@ -1631,7 +2000,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
nat44_ei_in2out_trace_t *t =
vlib_add_trace (vm, node, b0, sizeof (*t));
t->is_slow_path = is_slow_path;
- t->sw_if_index = sw_if_index0;
+ t->sw_if_index = rx_sw_if_index0;
t->next_index = next0;
t->session_index = ~0;
if (s0)
@@ -1643,7 +2012,7 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_increment_simple_counter (
is_slow_path ? &nm->counters.slowpath.in2out.drops :
&nm->counters.fastpath.in2out.drops,
- thread_index, sw_if_index0, 1);
+ thread_index, cntr_sw_if_index0, 1);
}
n_left_from--;
@@ -1656,128 +2025,10 @@ nat44_ei_in2out_node_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
return frame->n_vectors;
}
-VLIB_NODE_FN (nat44_ei_in2out_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- return nat44_ei_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */,
- 0);
-}
-
-VLIB_REGISTER_NODE (nat44_ei_in2out_node) = {
- .name = "nat44-ei-in2out",
- .vector_size = sizeof (u32),
- .format_trace = format_nat44_ei_in2out_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
- .error_strings = nat44_ei_in2out_error_strings,
-
- .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
-
- .n_next_nodes = NAT44_EI_IN2OUT_N_NEXT,
-
- /* edit / add dispositions here */
- .next_nodes = {
- [NAT44_EI_IN2OUT_NEXT_DROP] = "error-drop",
- [NAT44_EI_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
- [NAT44_EI_IN2OUT_NEXT_SLOW_PATH] = "nat44-ei-in2out-slowpath",
- [NAT44_EI_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-ei-in2out-hairpinning-handoff-ip4-lookup",
- },
-};
-
-VLIB_NODE_FN (nat44_ei_in2out_output_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- return nat44_ei_in2out_node_fn_inline (vm, node, frame, 0 /* is_slow_path */,
- 1);
-}
-
-VLIB_REGISTER_NODE (nat44_ei_in2out_output_node) = {
- .name = "nat44-ei-in2out-output",
- .vector_size = sizeof (u32),
- .format_trace = format_nat44_ei_in2out_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
- .error_strings = nat44_ei_in2out_error_strings,
-
- .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
-
- .n_next_nodes = NAT44_EI_IN2OUT_N_NEXT,
-
- /* edit / add dispositions here */
- .next_nodes = {
- [NAT44_EI_IN2OUT_NEXT_DROP] = "error-drop",
- [NAT44_EI_IN2OUT_NEXT_LOOKUP] = "interface-output",
- [NAT44_EI_IN2OUT_NEXT_SLOW_PATH] = "nat44-ei-in2out-output-slowpath",
- [NAT44_EI_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-ei-in2out-hairpinning-handoff-interface-output",
- },
-};
-
-VLIB_NODE_FN (nat44_ei_in2out_slowpath_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- return nat44_ei_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */,
- 0);
-}
-
-VLIB_REGISTER_NODE (nat44_ei_in2out_slowpath_node) = {
- .name = "nat44-ei-in2out-slowpath",
- .vector_size = sizeof (u32),
- .format_trace = format_nat44_ei_in2out_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
- .error_strings = nat44_ei_in2out_error_strings,
-
- .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
-
- .n_next_nodes = NAT44_EI_IN2OUT_N_NEXT,
-
- /* edit / add dispositions here */
- .next_nodes = {
- [NAT44_EI_IN2OUT_NEXT_DROP] = "error-drop",
- [NAT44_EI_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
- [NAT44_EI_IN2OUT_NEXT_SLOW_PATH] = "nat44-ei-in2out-slowpath",
- [NAT44_EI_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-ei-in2out-hairpinning-handoff-ip4-lookup",
- },
-};
-
-VLIB_NODE_FN (nat44_ei_in2out_output_slowpath_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- return nat44_ei_in2out_node_fn_inline (vm, node, frame, 1 /* is_slow_path */,
- 1);
-}
-
-VLIB_REGISTER_NODE (nat44_ei_in2out_output_slowpath_node) = {
- .name = "nat44-ei-in2out-output-slowpath",
- .vector_size = sizeof (u32),
- .format_trace = format_nat44_ei_in2out_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
- .error_strings = nat44_ei_in2out_error_strings,
-
- .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
-
- .n_next_nodes = NAT44_EI_IN2OUT_N_NEXT,
-
- /* edit / add dispositions here */
- .next_nodes = {
- [NAT44_EI_IN2OUT_NEXT_DROP] = "error-drop",
- [NAT44_EI_IN2OUT_NEXT_LOOKUP] = "interface-output",
- [NAT44_EI_IN2OUT_NEXT_SLOW_PATH] = "nat44-ei-in2out-output-slowpath",
- [NAT44_EI_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-ei-in2out-hairpinning-handoff-interface-output",
- },
-};
-
-VLIB_NODE_FN (nat44_ei_in2out_fast_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+static_always_inline uword
+nat44_ei_in2out_hairpinning_finish_inline (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
{
u32 n_left_from, *from, *to_next;
u32 thread_index = vm->thread_index;
@@ -1802,20 +2053,12 @@ VLIB_NODE_FN (nat44_ei_in2out_fast_node)
u32 next0;
u32 sw_if_index0;
ip4_header_t *ip0;
- ip_csum_t sum0;
- u32 new_addr0, old_addr0;
- u16 old_port0, new_port0;
udp_header_t *udp0;
tcp_header_t *tcp0;
icmp46_header_t *icmp0;
u32 proto0;
- u32 rx_fib_index0;
- ip4_address_t sm0_addr;
- u16 sm0_port;
- u32 sm0_fib_index;
u32 required_thread_index = thread_index;
- /* speculatively enqueue b0 to the current next frame */
bi0 = from[0];
to_next[0] = bi0;
from += 1;
@@ -1824,7 +2067,7 @@ VLIB_NODE_FN (nat44_ei_in2out_fast_node)
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
- next0 = NAT44_EI_IN2OUT_NEXT_LOOKUP;
+ next0 = NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_LOOKUP;
ip0 = vlib_buffer_get_current (b0);
udp0 = ip4_next_header (ip0);
@@ -1832,117 +2075,36 @@ VLIB_NODE_FN (nat44_ei_in2out_fast_node)
icmp0 = (icmp46_header_t *) udp0;
sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- rx_fib_index0 =
- ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
-
- if (PREDICT_FALSE (ip0->ttl == 1))
- {
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
- ICMP4_time_exceeded_ttl_exceeded_in_transit,
- 0);
- next0 = NAT44_EI_IN2OUT_NEXT_ICMP_ERROR;
- goto trace0;
- }
-
proto0 = ip_proto_to_nat_proto (ip0->protocol);
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
- goto trace0;
-
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
- {
- next0 = nat44_ei_icmp_in2out (b0, ip0, icmp0, sw_if_index0,
- rx_fib_index0, node, next0, ~0, 0);
- goto trace0;
- }
-
- if (nat44_ei_static_mapping_match (
- ip0->src_address, udp0->src_port, rx_fib_index0, proto0,
- &sm0_addr, &sm0_port, &sm0_fib_index, 0, 0, 0))
- {
- b0->error = node->errors[NAT44_EI_IN2OUT_ERROR_NO_TRANSLATION];
- next0 = NAT44_EI_IN2OUT_NEXT_DROP;
- goto trace0;
- }
-
- new_addr0 = sm0_addr.as_u32;
- new_port0 = sm0_port;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = sm0_fib_index;
- old_addr0 = ip0->src_address.as_u32;
- ip0->src_address.as_u32 = new_addr0;
-
- sum0 = ip0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t,
- src_address /* changed member */ );
- ip0->checksum = ip_csum_fold (sum0);
-
- if (PREDICT_FALSE (new_port0 != udp0->dst_port))
- {
- old_port0 = udp0->src_port;
- udp0->src_port = new_port0;
-
- if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
- {
- sum0 = tcp0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t,
- dst_address /* changed member */ );
- sum0 = ip_csum_update (sum0, old_port0, new_port0,
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
- mss_clamping (nm->mss_clamping, tcp0, &sum0);
- tcp0->checksum = ip_csum_fold (sum0);
- }
- else if (udp0->checksum)
- {
- sum0 = udp0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t,
- dst_address /* changed member */ );
- sum0 = ip_csum_update (sum0, old_port0, new_port0,
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
- udp0->checksum = ip_csum_fold (sum0);
- }
- }
- else
+ switch (proto0)
{
- if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
- {
- sum0 = tcp0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t,
- dst_address /* changed member */ );
- mss_clamping (nm->mss_clamping, tcp0, &sum0);
- tcp0->checksum = ip_csum_fold (sum0);
- }
- else if (udp0->checksum)
- {
- sum0 = udp0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t,
- dst_address /* changed member */ );
- udp0->checksum = ip_csum_fold (sum0);
- }
+ case NAT_PROTOCOL_TCP:
+ // fallthrough
+ case NAT_PROTOCOL_UDP:
+ is_hairpinning = nat44_ei_hairpinning (
+ vm, node, nm, thread_index, b0, ip0, udp0, tcp0, proto0,
+ 0 /* do_trace */, &required_thread_index);
+ break;
+ case NAT_PROTOCOL_ICMP:
+ is_hairpinning = (0 == nat44_ei_icmp_hairpinning (
+ nm, b0, thread_index, ip0, icmp0,
+ &required_thread_index));
+ break;
+ case NAT_PROTOCOL_OTHER:
+ // this should never happen
+ next0 = NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP;
+ break;
}
- /* Hairpinning */
- is_hairpinning = nat44_ei_hairpinning (
- vm, node, nm, thread_index, b0, ip0, udp0, tcp0, proto0,
- 0 /* do_trace */, &required_thread_index);
-
if (thread_index != required_thread_index)
{
- vnet_buffer (b0)->snat.required_thread_index =
- required_thread_index;
- next0 = NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF;
+ // but we already did a handoff ...
+ next0 = NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP;
}
- trace0:
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b0->flags & VLIB_BUFFER_IS_TRACED)))
{
nat44_ei_in2out_trace_t *t =
vlib_add_trace (vm, node, b0, sizeof (*t));
@@ -1951,18 +2113,15 @@ VLIB_NODE_FN (nat44_ei_in2out_fast_node)
t->is_hairpinning = is_hairpinning;
}
- if (next0 != NAT44_EI_IN2OUT_NEXT_DROP)
+ if (next0 != NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP)
{
-
vlib_increment_simple_counter (
&nm->counters.fastpath.in2out.other, sw_if_index0,
vm->thread_index, 1);
}
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -1971,83 +2130,16 @@ VLIB_NODE_FN (nat44_ei_in2out_fast_node)
return frame->n_vectors;
}
-VLIB_REGISTER_NODE (nat44_ei_in2out_fast_node) = {
- .name = "nat44-ei-in2out-fast",
- .vector_size = sizeof (u32),
- .format_trace = format_nat44_ei_in2out_fast_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
- .error_strings = nat44_ei_in2out_error_strings,
-
- .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
-
- .n_next_nodes = NAT44_EI_IN2OUT_N_NEXT,
-
- /* edit / add dispositions here */
- .next_nodes = {
- [NAT44_EI_IN2OUT_NEXT_DROP] = "error-drop",
- [NAT44_EI_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
- [NAT44_EI_IN2OUT_NEXT_SLOW_PATH] = "nat44-ei-in2out-slowpath",
- [NAT44_EI_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-ei-in2out-hairpinning-handoff-ip4-lookup",
- },
-};
-
-VLIB_NODE_FN (nat44_ei_in2out_hairpinning_handoff_ip4_lookup_node)
+VLIB_NODE_FN (nat44_ei_hairpinning_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return nat44_ei_hairpinning_handoff_fn_inline (
- vm, node, frame,
- nat44_ei_main.in2out_hairpinning_finish_ip4_lookup_node_fq_index);
-}
-
-VLIB_REGISTER_NODE (nat44_ei_in2out_hairpinning_handoff_ip4_lookup_node) = {
- .name = "nat44-ei-in2out-hairpinning-handoff-ip4-lookup",
- .vector_size = sizeof (u32),
- .n_errors = ARRAY_LEN(nat44_ei_hairpinning_handoff_error_strings),
- .error_strings = nat44_ei_hairpinning_handoff_error_strings,
- .format_trace = format_nat44_ei_hairpinning_handoff_trace,
-
- .n_next_nodes = 1,
-
- .next_nodes = {
- [0] = "error-drop",
- },
-};
-
-VLIB_NODE_FN (nat44_ei_in2out_hairpinning_handoff_interface_output_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- return nat44_ei_hairpinning_handoff_fn_inline (
- vm, node, frame,
- nat44_ei_main.in2out_hairpinning_finish_interface_output_node_fq_index);
-}
-
-VLIB_REGISTER_NODE (nat44_ei_in2out_hairpinning_handoff_interface_output_node) = {
- .name = "nat44-ei-in2out-hairpinning-handoff-interface-output",
- .vector_size = sizeof (u32),
- .n_errors = ARRAY_LEN(nat44_ei_hairpinning_handoff_error_strings),
- .error_strings = nat44_ei_hairpinning_handoff_error_strings,
- .format_trace = format_nat44_ei_hairpinning_handoff_trace,
-
- .n_next_nodes = 1,
-
- .next_nodes = {
- [0] = "error-drop",
- },
-};
-
-static_always_inline int
-nat44_ei_in2out_hairpinning_finish_inline (vlib_main_t *vm,
- vlib_node_runtime_t *node,
- vlib_frame_t *frame)
-{
u32 n_left_from, *from, *to_next;
u32 thread_index = vm->thread_index;
- nat44_ei_in2out_next_t next_index;
+ nat44_ei_hairpin_next_t next_index;
nat44_ei_main_t *nm = &nat44_ei_main;
- int is_hairpinning = 0;
+ vnet_feature_main_t *fm = &feature_main;
+ u8 arc_index = vnet_feat_arc_ip4_local.feature_arc_index;
+ vnet_feature_config_main_t *cm = &fm->feature_config_mains[arc_index];
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -2064,15 +2156,13 @@ nat44_ei_in2out_hairpinning_finish_inline (vlib_main_t *vm,
u32 bi0;
vlib_buffer_t *b0;
u32 next0;
- u32 sw_if_index0;
ip4_header_t *ip0;
+ u32 proto0;
udp_header_t *udp0;
tcp_header_t *tcp0;
- icmp46_header_t *icmp0;
- u32 proto0;
+ u32 sw_if_index0;
u32 required_thread_index = thread_index;
- /* speculatively enqueue b0 to the current next frame */
bi0 = from[0];
to_next[0] = bi0;
from += 1;
@@ -2081,60 +2171,39 @@ nat44_ei_in2out_hairpinning_finish_inline (vlib_main_t *vm,
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
- next0 = NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_LOOKUP;
-
ip0 = vlib_buffer_get_current (b0);
udp0 = ip4_next_header (ip0);
tcp0 = (tcp_header_t *) udp0;
- icmp0 = (icmp46_header_t *) udp0;
-
sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
proto0 = ip_proto_to_nat_proto (ip0->protocol);
+ int next0_resolved = 0;
- switch (proto0)
+ if (nat44_ei_hairpinning (vm, node, nm, thread_index, b0, ip0, udp0,
+ tcp0, proto0, 1, &required_thread_index))
{
- case NAT_PROTOCOL_TCP:
- // fallthrough
- case NAT_PROTOCOL_UDP:
- is_hairpinning = nat44_ei_hairpinning (
- vm, node, nm, thread_index, b0, ip0, udp0, tcp0, proto0,
- 0 /* do_trace */, &required_thread_index);
- break;
- case NAT_PROTOCOL_ICMP:
- is_hairpinning = (0 == nat44_ei_icmp_hairpinning (
- nm, b0, thread_index, ip0, icmp0,
- &required_thread_index));
- break;
- case NAT_PROTOCOL_OTHER:
- // this should never happen
- next0 = NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP;
- break;
+ next0 = NAT44_EI_HAIRPIN_NEXT_LOOKUP;
+ next0_resolved = 1;
}
if (thread_index != required_thread_index)
{
- // but we already did a handoff ...
- next0 = NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP;
+ vnet_buffer (b0)->snat.required_thread_index =
+ required_thread_index;
+ next0 = NAT44_EI_HAIRPIN_NEXT_HANDOFF;
+ next0_resolved = 1;
}
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
- (b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- nat44_ei_in2out_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- t->sw_if_index = sw_if_index0;
- t->next_index = next0;
- t->is_hairpinning = is_hairpinning;
- }
+ if (!next0_resolved)
+ vnet_get_config_data (&cm->config_main, &b0->current_config_index,
+ &next0, 0);
- if (next0 != NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP)
+ if (next0 != NAT44_EI_HAIRPIN_NEXT_DROP)
{
vlib_increment_simple_counter (
- &nm->counters.fastpath.in2out.other, sw_if_index0,
- vm->thread_index, 1);
+ &nm->counters.hairpinning, vm->thread_index, sw_if_index0, 1);
}
- /* verify speculative enqueue, maybe switch current next frame */
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
}
@@ -2145,58 +2214,216 @@ nat44_ei_in2out_hairpinning_finish_inline (vlib_main_t *vm,
return frame->n_vectors;
}
+VLIB_NODE_FN (nat44_ei_in2out_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return nat44_ei_in2out_node_fn_inline (vm, node, frame, 0, 0);
+}
+
+VLIB_NODE_FN (nat44_ei_in2out_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return nat44_ei_in2out_node_fn_inline (vm, node, frame, 0, 1);
+}
+
+VLIB_NODE_FN (nat44_ei_in2out_slowpath_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return nat44_ei_in2out_node_fn_inline (vm, node, frame, 1, 0);
+}
+
+VLIB_NODE_FN (nat44_ei_in2out_output_slowpath_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return nat44_ei_in2out_node_fn_inline (vm, node, frame, 1, 1);
+}
+
+VLIB_NODE_FN (nat44_ei_in2out_hairpinning_handoff_ip4_lookup_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return nat44_ei_hairpinning_handoff_fn_inline (
+ vm, node, frame,
+ nat44_ei_main.in2out_hairpinning_finish_ip4_lookup_node_fq_index);
+}
+
+VLIB_NODE_FN (nat44_ei_in2out_hairpinning_handoff_interface_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return nat44_ei_hairpinning_handoff_fn_inline (
+ vm, node, frame,
+ nat44_ei_main.in2out_hairpinning_finish_interface_output_node_fq_index);
+}
+
VLIB_NODE_FN (nat44_ei_in2out_hairpinning_finish_ip4_lookup_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
return nat44_ei_in2out_hairpinning_finish_inline (vm, node, frame);
}
-VLIB_REGISTER_NODE (nat44_ei_in2out_hairpinning_finish_ip4_lookup_node) = {
- .name = "nat44-ei-in2out-hairpinning-finish-ip4-lookup",
+VLIB_NODE_FN (nat44_ei_in2out_hairpinning_finish_interface_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return nat44_ei_in2out_hairpinning_finish_inline (vm, node, frame);
+}
+
+VLIB_NODE_FN (nat44_ei_hairpinning_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return nat44_ei_hairpinning_handoff_fn_inline (
+ vm, node, frame, nat44_ei_main.hairpinning_fq_index);
+}
+
+VLIB_REGISTER_NODE (nat44_ei_in2out_node) = {
+ .name = "nat44-ei-in2out",
.vector_size = sizeof (u32),
- .format_trace = format_nat44_ei_in2out_fast_trace,
+ .format_trace = format_nat44_ei_in2out_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
+ .error_strings = nat44_ei_in2out_error_strings,
+ .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
+ .n_next_nodes = NAT44_EI_IN2OUT_N_NEXT,
+ .next_nodes = {
+ [NAT44_EI_IN2OUT_NEXT_DROP] = "error-drop",
+ [NAT44_EI_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
+ [NAT44_EI_IN2OUT_NEXT_SLOW_PATH] = "nat44-ei-in2out-slowpath",
+ [NAT44_EI_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-ei-in2out-hairpinning-handoff-ip4-lookup",
+ },
+};
+
+VLIB_REGISTER_NODE (nat44_ei_in2out_output_node) = {
+ .name = "nat44-ei-in2out-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_nat44_ei_in2out_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
+ .error_strings = nat44_ei_in2out_error_strings,
+ .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
+ .n_next_nodes = NAT44_EI_IN2OUT_N_NEXT,
+ .next_nodes = {
+ [NAT44_EI_IN2OUT_NEXT_DROP] = "error-drop",
+ [NAT44_EI_IN2OUT_NEXT_LOOKUP] = "interface-output",
+ [NAT44_EI_IN2OUT_NEXT_SLOW_PATH] = "nat44-ei-in2out-output-slowpath",
+ [NAT44_EI_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-ei-in2out-hairpinning-handoff-interface-output",
+ },
+};
+VLIB_REGISTER_NODE (nat44_ei_in2out_slowpath_node) = {
+ .name = "nat44-ei-in2out-slowpath",
+ .vector_size = sizeof (u32),
+ .format_trace = format_nat44_ei_in2out_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
.error_strings = nat44_ei_in2out_error_strings,
+ .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
+ .n_next_nodes = NAT44_EI_IN2OUT_N_NEXT,
+ .next_nodes = {
+ [NAT44_EI_IN2OUT_NEXT_DROP] = "error-drop",
+ [NAT44_EI_IN2OUT_NEXT_LOOKUP] = "ip4-lookup",
+ [NAT44_EI_IN2OUT_NEXT_SLOW_PATH] = "nat44-ei-in2out-slowpath",
+ [NAT44_EI_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-ei-in2out-hairpinning-handoff-ip4-lookup",
+ },
+};
+VLIB_REGISTER_NODE (nat44_ei_in2out_output_slowpath_node) = {
+ .name = "nat44-ei-in2out-output-slowpath",
+ .vector_size = sizeof (u32),
+ .format_trace = format_nat44_ei_in2out_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
+ .error_strings = nat44_ei_in2out_error_strings,
.runtime_data_bytes = sizeof (nat44_ei_runtime_t),
+ .n_next_nodes = NAT44_EI_IN2OUT_N_NEXT,
+ .next_nodes = {
+ [NAT44_EI_IN2OUT_NEXT_DROP] = "error-drop",
+ [NAT44_EI_IN2OUT_NEXT_LOOKUP] = "interface-output",
+ [NAT44_EI_IN2OUT_NEXT_SLOW_PATH] = "nat44-ei-in2out-output-slowpath",
+ [NAT44_EI_IN2OUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [NAT44_EI_IN2OUT_NEXT_HAIRPINNING_HANDOFF] = "nat44-ei-in2out-hairpinning-handoff-interface-output",
+ },
+};
- .n_next_nodes = NAT44_EI_IN2OUT_HAIRPINNING_FINISH_N_NEXT,
+VLIB_REGISTER_NODE (nat44_ei_in2out_hairpinning_handoff_ip4_lookup_node) = {
+ .name = "nat44-ei-in2out-hairpinning-handoff-ip4-lookup",
+ .vector_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(nat44_ei_hairpinning_handoff_error_strings),
+ .error_strings = nat44_ei_hairpinning_handoff_error_strings,
+ .format_trace = format_nat44_ei_hairpinning_handoff_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
- /* edit / add dispositions here */
+VLIB_REGISTER_NODE (nat44_ei_in2out_hairpinning_handoff_interface_output_node) = {
+ .name = "nat44-ei-in2out-hairpinning-handoff-interface-output",
+ .vector_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(nat44_ei_hairpinning_handoff_error_strings),
+ .error_strings = nat44_ei_hairpinning_handoff_error_strings,
+ .format_trace = format_nat44_ei_hairpinning_handoff_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (nat44_ei_in2out_hairpinning_finish_ip4_lookup_node) = {
+ .name = "nat44-ei-in2out-hairpinning-finish-ip4-lookup",
+ .vector_size = sizeof (u32),
+ .format_trace = format_nat44_ei_in2out_fast_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
+ .error_strings = nat44_ei_in2out_error_strings,
+ .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
+ .n_next_nodes = NAT44_EI_IN2OUT_HAIRPINNING_FINISH_N_NEXT,
.next_nodes = {
[NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP] = "error-drop",
[NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_LOOKUP] = "ip4-lookup",
},
};
-VLIB_NODE_FN (nat44_ei_in2out_hairpinning_finish_interface_output_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- return nat44_ei_in2out_hairpinning_finish_inline (vm, node, frame);
-}
-
VLIB_REGISTER_NODE (nat44_ei_in2out_hairpinning_finish_interface_output_node) = {
.name = "nat44-ei-in2out-hairpinning-finish-interface-output",
.vector_size = sizeof (u32),
.format_trace = format_nat44_ei_in2out_fast_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
-
.n_errors = ARRAY_LEN(nat44_ei_in2out_error_strings),
.error_strings = nat44_ei_in2out_error_strings,
-
.runtime_data_bytes = sizeof (nat44_ei_runtime_t),
-
.n_next_nodes = NAT44_EI_IN2OUT_HAIRPINNING_FINISH_N_NEXT,
-
- /* edit / add dispositions here */
.next_nodes = {
[NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_DROP] = "error-drop",
[NAT44_EI_IN2OUT_HAIRPINNING_FINISH_NEXT_LOOKUP] = "interface-output",
},
};
+VLIB_REGISTER_NODE (nat44_ei_hairpinning_handoff_node) = {
+ .name = "nat44-ei-hairpinning-handoff",
+ .vector_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(nat44_ei_hairpinning_handoff_error_strings),
+ .error_strings = nat44_ei_hairpinning_handoff_error_strings,
+ .format_trace = format_nat44_ei_hairpinning_handoff_trace,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (nat44_ei_hairpinning_node) = {
+ .name = "nat44-ei-hairpinning",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .format_trace = format_nat44_ei_hairpin_trace,
+ .n_next_nodes = NAT44_EI_HAIRPIN_N_NEXT,
+ .next_nodes = {
+ [NAT44_EI_HAIRPIN_NEXT_DROP] = "error-drop",
+ [NAT44_EI_HAIRPIN_NEXT_LOOKUP] = "ip4-lookup",
+ [NAT44_EI_HAIRPIN_NEXT_HANDOFF] = "nat44-ei-hairpinning-handoff",
+ },
+};
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_inlines.h b/src/plugins/nat/nat44-ei/nat44_ei_inlines.h
index 672927256d1..399486c77dc 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_inlines.h
+++ b/src/plugins/nat/nat44-ei/nat44_ei_inlines.h
@@ -20,6 +20,7 @@
#include <nat/nat44-ei/nat44_ei.h>
#include <nat/nat44-ei/nat44_ei_ha.h>
+#include <nat/lib/nat_proto.h>
always_inline u64
calc_nat_key (ip4_address_t addr, u16 port, u32 fib_index, u8 proto)
@@ -220,6 +221,29 @@ nat44_ei_session_update_counters (nat44_ei_session_t *s, f64 now, uword bytes,
&s->ha_last_refreshed, now);
}
+static_always_inline u32
+nat_session_get_timeout (nat_timeouts_t *timeouts, nat_protocol_t proto,
+ u8 state)
+{
+ switch (proto)
+ {
+ case NAT_PROTOCOL_ICMP:
+ return timeouts->icmp;
+ case NAT_PROTOCOL_UDP:
+ return timeouts->udp;
+ case NAT_PROTOCOL_TCP:
+ {
+ if (state)
+ return timeouts->tcp.transitory;
+ else
+ return timeouts->tcp.established;
+ }
+ default:
+ return timeouts->udp;
+ }
+ return 0;
+}
+
#endif /* __included_nat44_ei_inlines_h__ */
/*
diff --git a/src/plugins/nat/nat44-ei/nat44_ei_out2in.c b/src/plugins/nat/nat44-ei/nat44_ei_out2in.c
index 7796b11cfd7..5d91cb04f7c 100644
--- a/src/plugins/nat/nat44-ei/nat44_ei_out2in.c
+++ b/src/plugins/nat/nat44-ei/nat44_ei_out2in.c
@@ -56,18 +56,6 @@ format_nat44_ei_out2in_trace (u8 *s, va_list *args)
return s;
}
-static u8 *
-format_nat44_ei_out2in_fast_trace (u8 *s, va_list *args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- nat44_ei_out2in_trace_t *t = va_arg (*args, nat44_ei_out2in_trace_t *);
-
- s = format (s, "NAT44_OUT2IN_FAST: sw_if_index %d, next index %d",
- t->sw_if_index, t->next_index);
- return s;
-}
-
#define foreach_nat44_ei_out2in_error \
_ (UNSUPPORTED_PROTOCOL, "unsupported protocol") \
_ (OUT_OF_PORTS, "out of ports") \
@@ -124,13 +112,10 @@ nat44_o2i_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void *arg)
if (clib_bihash_add_del_8_8 (&nm->in2out, &s_kv, 0))
nat_elog_warn (nm, "out2in key del failed");
- nat_ipfix_logging_nat44_ses_delete (ctx->thread_index,
- s->in2out.addr.as_u32,
- s->out2in.addr.as_u32,
- s->nat_proto,
- s->in2out.port,
- s->out2in.port,
- s->in2out.fib_index);
+ nat_ipfix_logging_nat44_ses_delete (
+ ctx->thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32,
+ nat_proto_to_ip_proto (s->nat_proto), s->in2out.port, s->out2in.port,
+ s->in2out.fib_index);
nat_syslog_nat44_apmdel (s->user_index, s->in2out.fib_index,
&s->in2out.addr, s->in2out.port,
@@ -233,12 +218,10 @@ create_session_for_static_mapping (
nat_elog_notice (nm, "out2in key add failed");
/* log NAT event */
- nat_ipfix_logging_nat44_ses_create (thread_index,
- s->in2out.addr.as_u32,
- s->out2in.addr.as_u32,
- s->nat_proto,
- s->in2out.port,
- s->out2in.port, s->in2out.fib_index);
+ nat_ipfix_logging_nat44_ses_create (
+ thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32,
+ nat_proto_to_ip_proto (s->nat_proto), s->in2out.port, s->out2in.port,
+ s->in2out.fib_index);
nat_syslog_nat44_apmadd (s->user_index, s->in2out.fib_index,
&s->in2out.addr, s->in2out.port, &s->out2in.addr,
@@ -1341,7 +1324,6 @@ VLIB_NODE_FN (nat44_ei_out2in_node)
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat44_ei_out2in_node) = {
.name = "nat44-ei-out2in",
.vector_size = sizeof (u32),
@@ -1362,190 +1344,6 @@ VLIB_REGISTER_NODE (nat44_ei_out2in_node) = {
[NAT44_EI_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
},
};
-/* *INDENT-ON* */
-
-VLIB_NODE_FN (nat44_ei_out2in_fast_node)
-(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
-{
- u32 n_left_from, *from;
- nat44_ei_main_t *nm = &nat44_ei_main;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
-
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
- u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
- vlib_get_buffers (vm, from, b, n_left_from);
- while (n_left_from > 0)
- {
- vlib_buffer_t *b0;
- u32 next0 = NAT44_EI_OUT2IN_NEXT_DROP;
- u32 sw_if_index0;
- ip4_header_t *ip0;
- ip_csum_t sum0;
- u32 new_addr0, old_addr0;
- u16 new_port0, old_port0;
- udp_header_t *udp0;
- tcp_header_t *tcp0;
- icmp46_header_t *icmp0;
- u32 proto0;
- u32 rx_fib_index0;
- ip4_address_t sm_addr0;
- u16 sm_port0;
- u32 sm_fib_index0;
-
- b0 = *b;
- b++;
-
- ip0 = vlib_buffer_get_current (b0);
- udp0 = ip4_next_header (ip0);
- tcp0 = (tcp_header_t *) udp0;
- icmp0 = (icmp46_header_t *) udp0;
-
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- rx_fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
-
- vnet_feature_next (&next0, b0);
-
- if (PREDICT_FALSE (ip0->ttl == 1))
- {
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- icmp4_error_set_vnet_buffer (b0, ICMP4_time_exceeded,
- ICMP4_time_exceeded_ttl_exceeded_in_transit,
- 0);
- next0 = NAT44_EI_OUT2IN_NEXT_ICMP_ERROR;
- goto trace00;
- }
-
- proto0 = ip_proto_to_nat_proto (ip0->protocol);
-
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER))
- goto trace00;
-
- if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP))
- {
- next0 = nat44_ei_icmp_out2in (b0, ip0, icmp0, sw_if_index0,
- rx_fib_index0, node, next0, ~0, 0);
- goto trace00;
- }
-
- if (nat44_ei_static_mapping_match (ip0->dst_address, udp0->dst_port,
- rx_fib_index0, proto0, &sm_addr0,
- &sm_port0, &sm_fib_index0, 1, 0, 0))
- {
- b0->error = node->errors[NAT44_EI_OUT2IN_ERROR_NO_TRANSLATION];
- goto trace00;
- }
-
- new_addr0 = sm_addr0.as_u32;
- new_port0 = sm_port0;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = sm_fib_index0;
- old_addr0 = ip0->dst_address.as_u32;
- ip0->dst_address.as_u32 = new_addr0;
-
- sum0 = ip0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t, dst_address /* changed member */ );
- ip0->checksum = ip_csum_fold (sum0);
-
- if (PREDICT_FALSE (new_port0 != udp0->dst_port))
- {
- old_port0 = udp0->dst_port;
- udp0->dst_port = new_port0;
-
- if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
- {
- sum0 = tcp0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t,
- dst_address /* changed member */ );
- sum0 = ip_csum_update (sum0, old_port0, new_port0,
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
- tcp0->checksum = ip_csum_fold (sum0);
- }
- else if (udp0->checksum)
- {
- sum0 = udp0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t,
- dst_address /* changed member */ );
- sum0 = ip_csum_update (sum0, old_port0, new_port0,
- ip4_header_t /* cheat */ ,
- length /* changed member */ );
- udp0->checksum = ip_csum_fold (sum0);
- }
- }
- else
- {
- if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP))
- {
- sum0 = tcp0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t,
- dst_address /* changed member */ );
- tcp0->checksum = ip_csum_fold (sum0);
- }
- else if (udp0->checksum)
- {
- sum0 = udp0->checksum;
- sum0 = ip_csum_update (sum0, old_addr0, new_addr0,
- ip4_header_t,
- dst_address /* changed member */ );
- udp0->checksum = ip_csum_fold (sum0);
- }
- }
-
- trace00:
-
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- nat44_ei_out2in_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- t->sw_if_index = sw_if_index0;
- t->next_index = next0;
- }
-
- if (next0 == NAT44_EI_OUT2IN_NEXT_DROP)
- {
- vlib_increment_simple_counter (&nm->counters.fastpath.out2in.drops,
- vm->thread_index, sw_if_index0, 1);
- }
-
- n_left_from--;
- next[0] = next0;
- next++;
- }
-
- vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
- frame->n_vectors);
-
- return frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (nat44_ei_out2in_fast_node) = {
- .name = "nat44-ei-out2in-fast",
- .vector_size = sizeof (u32),
- .format_trace = format_nat44_ei_out2in_fast_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(nat44_ei_out2in_error_strings),
- .error_strings = nat44_ei_out2in_error_strings,
-
- .runtime_data_bytes = sizeof (nat44_ei_runtime_t),
-
- .n_next_nodes = NAT44_EI_OUT2IN_N_NEXT,
-
- /* edit / add dispositions here */
- .next_nodes = {
- [NAT44_EI_OUT2IN_NEXT_LOOKUP] = "ip4-lookup",
- [NAT44_EI_OUT2IN_NEXT_DROP] = "error-drop",
- [NAT44_EI_OUT2IN_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- },
-};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/nat64/nat64.c b/src/plugins/nat/nat64/nat64.c
index 1c1cdfba3fb..950eea60e5e 100644
--- a/src/plugins/nat/nat64/nat64.c
+++ b/src/plugins/nat/nat64/nat64.c
@@ -26,7 +26,6 @@
nat64_main_t nat64_main;
-/* *INDENT-OFF* */
/* Hook up input features */
VNET_FEATURE_INIT (nat64_in2out, static) = {
.arc_name = "ip6-unicast",
@@ -62,7 +61,6 @@ static u8 well_known_prefix[] = {
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00
};
-/* *INDENT-ON* */
#define nat_elog_str(_str) \
do \
@@ -135,6 +133,20 @@ nat64_get_worker_in2out (ip6_address_t * addr)
return next_worker_index;
}
+static u32
+get_thread_idx_by_port (u16 e_port)
+{
+ nat64_main_t *nm = &nat64_main;
+ u32 thread_idx = nm->num_workers;
+ if (nm->num_workers > 1)
+ {
+ thread_idx = nm->first_worker_index +
+ nm->workers[(e_port - 1024) / nm->port_per_thread %
+ _vec_len (nm->workers)];
+ }
+ return thread_idx;
+}
+
u32
nat64_get_worker_out2in (vlib_buffer_t * b, ip4_header_t * ip)
{
@@ -157,14 +169,12 @@ nat64_get_worker_out2in (vlib_buffer_t * b, ip4_header_t * ip)
clib_memset (&daddr, 0, sizeof (daddr));
daddr.ip4.as_u32 = ip->dst_address.as_u32;
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
bibe = nat64_db_bib_entry_find (db, &daddr, 0, ip->protocol, 0, 0);
if (bibe)
return (u32) (db - nm->db);
}
- /* *INDENT-ON* */
return vlib_get_thread_index ();
}
@@ -202,7 +212,7 @@ nat64_get_worker_out2in (vlib_buffer_t * b, ip4_header_t * ip)
/* worker by outside port (TCP/UDP) */
port = clib_net_to_host_u16 (port);
if (port > 1024)
- return nm->first_worker_index + ((port - 1024) / nm->port_per_thread);
+ return get_thread_idx_by_port (port);
return vlib_get_thread_index ();
}
@@ -282,12 +292,10 @@ nat64_init (vlib_main_t * vm)
for (i = 0; i < nm->num_workers; i++)
bitmap = clib_bitmap_set (bitmap, i, 1);
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, bitmap)
{
vec_add1(nm->workers, i);
}
- /* *INDENT-ON* */
clib_bitmap_free (bitmap);
@@ -323,7 +331,6 @@ nat64_init_hash (nat64_config_t c)
vec_validate (nm->db, tm->n_vlib_mains - 1);
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
if (nat64_db_init (db, c, nat64_free_out_addr_and_port))
@@ -332,7 +339,6 @@ nat64_init_hash (nat64_config_t c)
rv = 1;
}
}
- /* *INDENT-ON* */
return rv;
}
@@ -344,7 +350,6 @@ nat64_free_hash ()
nat64_db_t *db;
int rv = 0;
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
if (nat64_db_free (db))
@@ -353,7 +358,6 @@ nat64_free_hash ()
rv = 1;
}
}
- /* *INDENT-ON* */
vec_free (nm->db);
@@ -408,7 +412,6 @@ nat64_add_del_pool_addr (u32 thread_index,
if (a->fib_index != ~0)
fib_table_unlock (a->fib_index, FIB_PROTOCOL_IP6, nm->fib_src_hi);
/* Delete sessions using address */
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
nat64_db_free_out_addr (thread_index, db, &a->addr);
@@ -417,12 +420,10 @@ nat64_add_del_pool_addr (u32 thread_index,
vlib_set_simple_counter (&nm->total_sessions, db - nm->db, 0,
db->st.st_entries_num);
}
- /* *INDENT-ON* */
vec_del1 (nm->addr_pool, i);
}
/* Add/del external address to FIB */
- /* *INDENT-OFF* */
pool_foreach (interface, nm->interfaces)
{
if (nat64_interface_is_inside(interface))
@@ -431,7 +432,6 @@ nat64_add_del_pool_addr (u32 thread_index,
nat64_add_del_addr_to_fib (addr, 32, interface->sw_if_index, is_add);
break;
}
- /* *INDENT-ON* */
return 0;
}
@@ -442,13 +442,11 @@ nat64_pool_addr_walk (nat64_pool_addr_walk_fn_t fn, void *ctx)
nat64_main_t *nm = &nat64_main;
nat64_address_t *a = 0;
- /* *INDENT-OFF* */
vec_foreach (a, nm->addr_pool)
{
if (fn (a, ctx))
break;
};
- /* *INDENT-ON* */
}
int
@@ -546,7 +544,6 @@ nat64_interface_add_del (u32 sw_if_index, u8 is_inside, u8 is_add)
// TODO: is enabled ? we can't signal if it is not
/* Check if interface already exists */
- /* *INDENT-OFF* */
pool_foreach (i, nm->interfaces)
{
if (i->sw_if_index == sw_if_index)
@@ -555,7 +552,6 @@ nat64_interface_add_del (u32 sw_if_index, u8 is_inside, u8 is_add)
break;
}
}
- /* *INDENT-ON* */
if (is_add)
{
@@ -596,10 +592,8 @@ nat64_interface_add_del (u32 sw_if_index, u8 is_inside, u8 is_add)
if (!is_inside)
{
- /* *INDENT-OFF* */
vec_foreach (ap, nm->addr_pool)
nat64_add_del_addr_to_fib (&ap->addr, 32, sw_if_index, is_add);
- /* *INDENT-ON* */
}
if (nm->num_workers > 1)
@@ -641,13 +635,11 @@ nat64_interfaces_walk (nat64_interface_walk_fn_t fn, void *ctx)
nat64_main_t *nm = &nat64_main;
nat64_interface_t *i = 0;
- /* *INDENT-OFF* */
pool_foreach (i, nm->interfaces)
{
if (fn (i, ctx))
break;
}
- /* *INDENT-ON* */
}
// TODO: plugin independent
@@ -822,7 +814,6 @@ nat64_static_bib_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
nat64_db_bib_entry_t *bibe;
ip46_address_t addr;
- /* *INDENT-OFF* */
pool_foreach (static_bib, nm->static_bibs)
{
if ((static_bib->thread_index != thread_index) || (static_bib->done))
@@ -859,21 +850,18 @@ nat64_static_bib_worker_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
static_bib->done = 1;
}
- /* *INDENT-ON* */
return 0;
}
static vlib_node_registration_t nat64_static_bib_worker_node;
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_static_bib_worker_node, static) = {
.function = nat64_static_bib_worker_fn,
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_INTERRUPT,
.name = "nat64-static-bib-worker",
};
-/* *INDENT-ON* */
int
nat64_add_del_static_bib_entry (ip6_address_t * in_addr,
@@ -916,7 +904,7 @@ nat64_add_del_static_bib_entry (ip6_address_t * in_addr,
/* outside port must be assigned to same thread as internall address */
if ((out_port > 1024) && (nm->num_workers > 1))
{
- if (thread_index != ((out_port - 1024) / nm->port_per_thread))
+ if (thread_index != get_thread_idx_by_port (out_port))
return VNET_API_ERROR_INVALID_VALUE_2;
}
@@ -977,7 +965,6 @@ nat64_add_del_static_bib_entry (ip6_address_t * in_addr,
if (nm->num_workers)
{
- /* *INDENT-OFF* */
pool_foreach (static_bib, nm->static_bibs)
{
if (static_bib->done)
@@ -985,7 +972,6 @@ nat64_add_del_static_bib_entry (ip6_address_t * in_addr,
}
vec_foreach (index, to_be_free)
pool_put_index (nm->static_bibs, index[0]);
- /* *INDENT-ON* */
vec_free (to_be_free);
pool_get (nm->static_bibs, static_bib);
static_bib->in_addr.as_u64[0] = in_addr->as_u64[0];
@@ -1258,13 +1244,11 @@ nat64_prefix_walk (nat64_prefix_walk_fn_t fn, void *ctx)
nat64_main_t *nm = &nat64_main;
nat64_prefix_t *p = 0;
- /* *INDENT-OFF* */
vec_foreach (p, nm->pref64)
{
if (fn (p, ctx))
break;
};
- /* *INDENT-ON* */
}
void
@@ -1273,7 +1257,6 @@ nat64_compose_ip6 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index)
nat64_main_t *nm = &nat64_main;
nat64_prefix_t *p, *gp = 0, *prefix = 0;
- /* *INDENT-OFF* */
vec_foreach (p, nm->pref64)
{
if (p->fib_index == fib_index)
@@ -1285,7 +1268,6 @@ nat64_compose_ip6 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index)
if (p->fib_index == 0)
gp = p;
};
- /* *INDENT-ON* */
if (!prefix)
prefix = gp;
@@ -1344,7 +1326,6 @@ nat64_extract_ip4 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index)
nat64_prefix_t *p, *gp = 0;
u8 plen = 0;
- /* *INDENT-OFF* */
vec_foreach (p, nm->pref64)
{
if (p->fib_index == fib_index)
@@ -1356,7 +1337,6 @@ nat64_extract_ip4 (ip6_address_t * ip6, ip4_address_t * ip4, u32 fib_index)
if (p->vrf_id == 0)
gp = p;
};
- /* *INDENT-ON* */
if (!plen)
{
@@ -1431,14 +1411,12 @@ nat64_expire_worker_walk_fn (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_expire_worker_walk_node, static) = {
.function = nat64_expire_worker_walk_fn,
.type = VLIB_NODE_TYPE_INPUT,
.state = VLIB_NODE_STATE_INTERRUPT,
.name = "nat64-expire-worker-walk",
};
-/* *INDENT-ON* */
/**
* @brief Centralized process to drive per worker expire walk.
@@ -1566,12 +1544,10 @@ nat64_plugin_disable ()
}
nm->enabled = 0;
- /* *INDENT-OFF* */
pool_foreach (i, nm->interfaces)
{
vec_add1 (interfaces, *i);
}
- /* *INDENT-ON* */
vec_foreach (i, interfaces)
{
rv = nat64_interface_add_del (i->sw_if_index, i->flags, 0);
diff --git a/src/plugins/nat/nat64/nat64.h b/src/plugins/nat/nat64/nat64.h
index 1180f9df778..9eb8d915390 100644
--- a/src/plugins/nat/nat64/nat64.h
+++ b/src/plugins/nat/nat64/nat64.h
@@ -93,14 +93,12 @@ typedef struct
{
ip4_address_t addr;
u32 fib_index;
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
u16 busy_##n##_ports; \
u16 * busy_##n##_ports_per_thread; \
u32 busy_##n##_port_refcounts[65535];
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
} nat64_address_t;
typedef struct
diff --git a/src/plugins/nat/nat64/nat64_api.c b/src/plugins/nat/nat64/nat64_api.c
index e64b6434fd2..87cca01b59b 100644
--- a/src/plugins/nat/nat64/nat64_api.c
+++ b/src/plugins/nat/nat64/nat64_api.c
@@ -69,7 +69,6 @@ vl_api_nat64_get_timeouts_t_handler (vl_api_nat64_get_timeouts_t * mp)
vl_api_nat64_get_timeouts_reply_t *rmp;
int rv = 0;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_NAT64_GET_TIMEOUTS_REPLY,
({
rmp->udp = htonl (nm->udp_timeout);
@@ -77,7 +76,6 @@ vl_api_nat64_get_timeouts_t_handler (vl_api_nat64_get_timeouts_t * mp)
rmp->tcp_transitory = htonl (nm->tcp_trans_timeout);
rmp->icmp = htonl (nm->icmp_timeout);
}))
- /* *INDENT-ON* */
}
static void
@@ -298,10 +296,8 @@ vl_api_nat64_bib_dump_t_handler (vl_api_nat64_bib_dump_t * mp)
.context = mp->context,
};
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
nat64_db_bib_walk (db, mp->proto, nat64_api_bib_walk, &ctx);
- /* *INDENT-ON* */
}
static int
@@ -356,13 +352,11 @@ vl_api_nat64_st_dump_t_handler (vl_api_nat64_st_dump_t * mp)
.context = mp->context,
};
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
ctx.db = db;
nat64_db_st_walk (db, mp->proto, nat64_api_st_walk, &ctx);
}
- /* *INDENT-ON* */
}
static void
diff --git a/src/plugins/nat/nat64/nat64_cli.c b/src/plugins/nat/nat64/nat64_cli.c
index 3af715c2457..2cef71080f9 100644
--- a/src/plugins/nat/nat64/nat64_cli.c
+++ b/src/plugins/nat/nat64/nat64_cli.c
@@ -484,10 +484,8 @@ nat64_show_bib_command_fn (vlib_main_t * vm,
else
vlib_cli_output (vm, "NAT64 %U BIB entries:", format_nat_protocol, proto);
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
nat64_db_bib_walk (db, p, nat64_cli_bib_walk, vm);
- /* *INDENT-ON* */
done:
unformat_free (line_input);
@@ -586,13 +584,11 @@ nat64_show_st_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "NAT64 sessions:");
else
vlib_cli_output (vm, "NAT64 %U sessions:", format_nat_protocol, proto);
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
ctx.db = db;
nat64_db_st_walk (db, p, nat64_cli_st_walk, &ctx);
}
- /* *INDENT-ON* */
done:
unformat_free (line_input);
@@ -775,7 +771,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
/*?
* @cliexpar
* @cliexstart{nat64 plugin}
@@ -983,7 +978,6 @@ VLIB_CLI_COMMAND (nat64_add_interface_address_command, static) = {
.short_help = "nat64 add interface address <interface> [del]",
.function = nat64_add_interface_address_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/nat64/nat64_db.c b/src/plugins/nat/nat64/nat64_db.c
index 82ef70de5cf..e4e9febcb12 100644
--- a/src/plugins/nat/nat64/nat64_db.c
+++ b/src/plugins/nat/nat64/nat64_db.c
@@ -54,13 +54,11 @@ nat64_db_free (nat64_db_t * db)
clib_bihash_free_48_8 (&db->st.in2out);
clib_bihash_free_48_8 (&db->st.out2in);
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
pool_free (db->bib._##n##_bib); \
pool_free (db->st._##n##_st);
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
pool_free (db->bib._unk_proto_bib);
pool_free (db->st._unk_proto_st);
@@ -82,14 +80,13 @@ nat64_db_bib_entry_create (u32 thread_index, nat64_db_t * db,
if (db->bib.bib_entries_num >= db->bib.limit)
{
db->free_addr_port_cb (db, out_addr, out_port, proto);
- //nat_ipfix_logging_max_bibs (thread_index, db->bib.limit);
+ nat_ipfix_logging_max_bibs (thread_index, db->bib.limit);
return 0;
}
/* create pool entry */
switch (ip_proto_to_nat_proto (proto))
{
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
pool_get (db->bib._##n##_bib, bibe); \
@@ -97,7 +94,6 @@ nat64_db_bib_entry_create (u32 thread_index, nat64_db_t * db,
break;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
default:
pool_get (db->bib._unk_proto_bib, bibe);
kv.value = bibe - db->bib._unk_proto_bib;
@@ -155,7 +151,6 @@ nat64_db_bib_entry_free (u32 thread_index, nat64_db_t * db,
switch (ip_proto_to_nat_proto (bibe->proto))
{
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
bib = db->bib._##n##_bib; \
@@ -163,7 +158,6 @@ nat64_db_bib_entry_free (u32 thread_index, nat64_db_t * db,
break;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
default:
bib = db->bib._unk_proto_bib;
st = db->st._unk_proto_st;
@@ -232,14 +226,12 @@ nat64_db_bib_entry_find (nat64_db_t * db, ip46_address_t * addr, u16 port,
switch (ip_proto_to_nat_proto (proto))
{
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
bib = db->bib._##n##_bib; \
break;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
default:
bib = db->bib._unk_proto_bib;
break;
@@ -271,7 +263,6 @@ nat64_db_bib_walk (nat64_db_t * db, u8 proto,
if (proto == 255)
{
- /* *INDENT-OFF* */
#define _(N, i, n, s) \
bib = db->bib._##n##_bib; \
pool_foreach (bibe, bib) { \
@@ -285,32 +276,27 @@ nat64_db_bib_walk (nat64_db_t * db, u8 proto,
if (fn (bibe, ctx))
return;
}
- /* *INDENT-ON* */
}
else
{
switch (ip_proto_to_nat_proto (proto))
{
- /* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
bib = db->bib._##n##_bib; \
break;
foreach_nat_protocol
#undef _
- /* *INDENT-ON* */
default:
bib = db->bib._unk_proto_bib;
break;
}
- /* *INDENT-OFF* */
pool_foreach (bibe, bib)
{
if (fn (bibe, ctx))
return;
}
- /* *INDENT-ON* */
}
}
@@ -321,14 +307,12 @@ nat64_db_bib_entry_by_index (nat64_db_t * db, u8 proto, u32 bibe_index)
switch (ip_proto_to_nat_proto (proto))
{
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
bib = db->bib._##n##_bib; \
break;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
default:
bib = db->bib._unk_proto_bib;
break;
@@ -345,7 +329,6 @@ nat64_db_st_walk (nat64_db_t * db, u8 proto,
if (proto == 255)
{
- /* *INDENT-OFF* */
#define _(N, i, n, s) \
st = db->st._##n##_st; \
pool_foreach (ste, st) { \
@@ -359,32 +342,27 @@ nat64_db_st_walk (nat64_db_t * db, u8 proto,
if (fn (ste, ctx))
return;
}
- /* *INDENT-ON* */
}
else
{
switch (ip_proto_to_nat_proto (proto))
{
- /* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
st = db->st._##n##_st; \
break;
foreach_nat_protocol
#undef _
- /* *INDENT-ON* */
default:
st = db->st._unk_proto_st;
break;
}
- /* *INDENT-OFF* */
pool_foreach (ste, st)
{
if (fn (ste, ctx))
return;
}
- /* *INDENT-ON* */
}
}
@@ -401,14 +379,13 @@ nat64_db_st_entry_create (u32 thread_index, nat64_db_t * db,
if (db->st.st_entries_num >= db->st.limit)
{
- //nat_ipfix_logging_max_sessions (thread_index, db->st.limit);
+ nat_ipfix_logging_max_sessions (thread_index, db->st.limit);
return 0;
}
/* create pool entry */
switch (ip_proto_to_nat_proto (bibe->proto))
{
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
pool_get (db->st._##n##_st, ste); \
@@ -417,7 +394,6 @@ nat64_db_st_entry_create (u32 thread_index, nat64_db_t * db,
break;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
default:
pool_get (db->st._unk_proto_st, ste);
kv.value = ste - db->st._unk_proto_st;
@@ -494,7 +470,6 @@ nat64_db_st_entry_free (u32 thread_index,
switch (ip_proto_to_nat_proto (ste->proto))
{
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
st = db->st._##n##_st; \
@@ -502,7 +477,6 @@ nat64_db_st_entry_free (u32 thread_index,
break;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
default:
st = db->st._unk_proto_st;
bib = db->bib._unk_proto_bib;
@@ -579,14 +553,12 @@ nat64_db_st_entry_find (nat64_db_t * db, ip46_address_t * l_addr,
switch (ip_proto_to_nat_proto (proto))
{
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
st = db->st._##n##_st; \
break;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
default:
st = db->st._unk_proto_st;
break;
@@ -622,14 +594,12 @@ nat64_db_st_entry_get_index (nat64_db_t * db, nat64_db_st_entry_t * ste)
switch (ip_proto_to_nat_proto (ste->proto))
{
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
st = db->st._##n##_st; \
break;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
default:
st = db->st._unk_proto_st;
return (u32) ~ 0;
@@ -645,14 +615,12 @@ nat64_db_st_entry_by_index (nat64_db_t * db, u8 proto, u32 ste_index)
switch (ip_proto_to_nat_proto (proto))
{
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
case NAT_PROTOCOL_##N: \
st = db->st._##n##_st; \
break;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
default:
st = db->st._unk_proto_st;
break;
@@ -667,7 +635,6 @@ nad64_db_st_free_expired (u32 thread_index, nat64_db_t * db, u32 now)
u32 *ste_to_be_free = 0, *ste_index;
nat64_db_st_entry_t *st, *ste;
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
st = db->st._##n##_st; \
pool_foreach (ste, st) {\
@@ -692,7 +659,6 @@ nad64_db_st_free_expired (u32 thread_index, nat64_db_t * db, u32 now)
nat64_db_st_entry_free (thread_index, db,
pool_elt_at_index(st, ste_index[0]));
vec_free (ste_to_be_free);
-/* *INDENT-ON* */
}
void
@@ -704,7 +670,6 @@ nat64_db_free_out_addr (u32 thread_index,
nat64_db_bib_entry_t *bibe;
db->addr_free = 1;
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
st = db->st._##n##_st; \
pool_foreach (ste, st) { \
@@ -730,7 +695,6 @@ nat64_db_free_out_addr (u32 thread_index,
pool_elt_at_index(st, ste_index[0]));
vec_free (ste_to_be_free);
db->addr_free = 0;
-/* *INDENT-ON* */
}
/*
diff --git a/src/plugins/nat/nat64/nat64_db.h b/src/plugins/nat/nat64/nat64_db.h
index 711b6bf6b03..a7d433fb8ea 100644
--- a/src/plugins/nat/nat64/nat64_db.h
+++ b/src/plugins/nat/nat64/nat64_db.h
@@ -18,6 +18,7 @@
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
#include <vnet/fib/fib_source.h>
+#include <nat/lib/nat_proto.h>
#include <vppinfra/bihash_24_8.h>
#include <vppinfra/bihash_48_8.h>
@@ -46,7 +47,6 @@ typedef struct
};
} nat64_db_bib_entry_key_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct
{
ip6_address_t in_addr;
@@ -58,17 +58,14 @@ typedef CLIB_PACKED(struct
u8 proto;
u8 is_static;
}) nat64_db_bib_entry_t;
-/* *INDENT-ON* */
typedef struct
{
/* BIBs */
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
nat64_db_bib_entry_t *_##n##_bib;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
nat64_db_bib_entry_t *_unk_proto_bib;
/* BIB lookup */
@@ -97,7 +94,6 @@ typedef struct
};
} nat64_db_st_entry_key_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct
{
ip6_address_t in_r_addr;
@@ -108,17 +104,14 @@ typedef CLIB_PACKED(struct
u8 proto;
u8 tcp_state;
}) nat64_db_st_entry_t;
-/* *INDENT-ON* */
typedef struct
{
/* session tables */
-/* *INDENT-OFF* */
#define _(N, i, n, s) \
nat64_db_st_entry_t *_##n##_st;
foreach_nat_protocol
#undef _
-/* *INDENT-ON* */
nat64_db_st_entry_t *_unk_proto_st;
/* session lookup */
diff --git a/src/plugins/nat/nat64/nat64_doc.md b/src/plugins/nat/nat64/nat64_doc.md
deleted file mode 100644
index f65b46338b0..00000000000
--- a/src/plugins/nat/nat64/nat64_doc.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# Stateful NAT64: Network Address and Protocol Translation from IPv6 Clients to IPv4 Servers {#nat64_doc}
-
-## Introduction
-
-Stateful NAT64 in VPP allows IPv6-only clients to contact IPv4 servers using unicast UDP, TCP, or ICMP based on RFC 6146.
-
-## Configuration
-
-### Enable/disable NAT64 feature on the interface
-
-> set interface nat64 in|out <intfc> [del]
-
-in: inside/local/IPv6 network
-out: outside/external/IPv4 network
-intfc: interface name
-
-### Add/delete NAT64 pool address
-
-One or more public IPv4 addresses assigned to a NAT64 are shared among several IPv6-only clients.
-
-> nat64 add pool address <ip4-range-start> [- <ip4-range-end>] [tenant-vrf <tenant-vrf-id>] [del]
-
-ip4-range-start: First IPv4 address of the range
-ip4-range-end: Last IPv4 address of the range (optional, not used for single address)
-tenant-vrf-id: VRF id of the tenant associated with the pool address (optional, if not set pool address is global)
-
-### Add/delete static BIB entry
-
-Stateful NAT64 also supports IPv4-initiated communications to a subset of the IPv6 hosts through staticaly configured bindings.
-
-> nat64 add static bib <ip6-addr> <in-port> <ip4-addr> <out-port> tcp|udp|icmp [vfr <table-id>] [del]
-
-ip6-addr: inside IPv6 address of the host
-in-port: inside port or ICMPv6 identifier
-ip4-addr: outside IPv4 address of the host
-out-port: outside port or ICMPv4 identifier
-table-id: VRF id of the tenant associated with the BIB entry (optional, default use global VRF)
-
-### Set NAT64 session timeouts
-
-Session is deleted when timer expires. If all sessions corresponding to a dynamically create BIB entry are deleted, then the BIB entry is also deleted. When packets are flowing sessiom timer is refreshed to keep the session alive.
-
-> set nat64 timeouts udp <sec> icmp <sec> tcp-trans <sec> tcp-est <sec> tcp-incoming-syn <sec> | reset
-
-udp: UDP session timeout value (default 300sec)
-icmp: ICMP session timeout value (default 60sec)
-tcp-trans: transitory TCP session timeout value (default 240sec)
-tcp-est: established TCP session timeout value (default 7440sec)
-tcp-incoming-syn: incoming SYN TCP session timeout value (default 6sec)
-reset: reset timers to default values
-
-### Set NAT64 prefix
-
-Stateful NAT64 support the algorithm for generating IPv6 representations of IPv4 addresses defined in RFC 6052. If no prefix is configured, Well-Known Prefix (64:ff9b::/96) is used.
-
-> nat64 add prefix <ip6-prefix>/<plen> [tenant-vrf <vrf-id>] [del]
-
-ip6-prefix: IPv6 prefix
-plen: prefix length (valid values: 32, 40, 48, 56, 64, or 96)
-tenant-vrf: VRF id of the tenant associated with the prefix
-
-### Show commands
-
-> show nat64 pool
-> show nat64 interfaces
-> show nat64 bib tcp|udp|icmp
-> show nat64 session table tcp|udp|icmp
-> show nat64 timeouts
-> show nat64 prefix
-
-## Notes
-
-Multi thread is not supported yet (CLI/API commands are disabled when VPP runs with multiple threads).
diff --git a/src/plugins/nat/nat64/nat64_doc.rst b/src/plugins/nat/nat64/nat64_doc.rst
new file mode 100644
index 00000000000..f375fba68bd
--- /dev/null
+++ b/src/plugins/nat/nat64/nat64_doc.rst
@@ -0,0 +1,91 @@
+Stateful NAT64
+==============
+
+This document describes stateful NAT64 Network Address and Protocol
+Translation
+
+Introduction
+------------
+
+Stateful NAT64 in VPP allows IPv6-only clients to contact IPv4 servers
+using unicast UDP, TCP, or ICMP based on RFC 6146.
+
+Configuration
+-------------
+
+Enable/disable NAT64 feature on the interface
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ set interface nat64 in|out [del]
+
+in: inside/local/IPv6 network out: outside/external/IPv4 network intfc:
+interface name
+
+Add/delete NAT64 pool address
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+One or more public IPv4 addresses assigned to a NAT64 are shared among
+several IPv6-only clients.
+
+ nat64 add pool address [- ] [tenant-vrf ] [del]
+
+ip4-range-start: First IPv4 address of the range ip4-range-end: Last
+IPv4 address of the range (optional, not used for single address)
+tenant-vrf-id: VRF id of the tenant associated with the pool address
+(optional, if not set pool address is global)
+
+Add/delete static BIB entry
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Stateful NAT64 also supports IPv4-initiated communications to a subset
+of the IPv6 hosts through statically configured bindings.
+
+ nat64 add static bib tcp|udp|icmp [vfr ] [del]
+
+ip6-addr: inside IPv6 address of the host in-port: inside port or ICMPv6
+identifier ip4-addr: outside IPv4 address of the host out-port: outside
+port or ICMPv4 identifier table-id: VRF id of the tenant associated with
+the BIB entry (optional, default use global VRF)
+
+Set NAT64 session timeouts
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Session is deleted when timer expires. If all sessions corresponding to
+a dynamically create BIB entry are deleted, then the BIB entry is also
+deleted. When packets are flowing session timer is refreshed to keep the
+session alive.
+
+ set nat64 timeouts udp icmp tcp-trans tcp-est tcp-incoming-syn \|
+ reset
+
+udp: UDP session timeout value (default 300sec) icmp: ICMP session
+timeout value (default 60sec) tcp-trans: transitory TCP session timeout
+value (default 240sec) tcp-est: established TCP session timeout value
+(default 7440sec) tcp-incoming-syn: incoming SYN TCP session timeout
+value (default 6sec) reset: reset timers to default values
+
+Set NAT64 prefix
+~~~~~~~~~~~~~~~~
+
+Stateful NAT64 support the algorithm for generating IPv6 representations
+of IPv4 addresses defined in RFC 6052. If no prefix is configured,
+Well-Known Prefix (64:ff9b::/96) is used.
+
+ nat64 add prefix / [tenant-vrf ] [del]
+
+ip6-prefix: IPv6 prefix plen: prefix length (valid values: 32, 40, 48,
+56, 64, or 96) tenant-vrf: VRF id of the tenant associated with the
+prefix
+
+Show commands
+~~~~~~~~~~~~~
+
+ show nat64 pool show nat64 interfaces show nat64 bib tcp|udp|icmp
+ show nat64 session table tcp|udp|icmp show nat64 timeouts show nat64
+ prefix
+
+Notes
+-----
+
+Multi thread is not supported yet (CLI/API commands are disabled when
+VPP runs with multiple threads).
diff --git a/src/plugins/nat/nat64/nat64_in2out.c b/src/plugins/nat/nat64/nat64_in2out.c
index 52d3c7f0a3b..63eab72a7fb 100644
--- a/src/plugins/nat/nat64/nat64_in2out.c
+++ b/src/plugins/nat/nat64/nat64_in2out.c
@@ -86,14 +86,12 @@ nat64_not_translate (u32 sw_if_index, ip6_address_t ip6_addr)
ip_lookup_main_t *lm6 = &im6->lookup_main;
ip_interface_address_t *ia = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
({
addr = ip_interface_address_get_address (lm6, ia);
if (0 == ip6_address_compare (addr, &ip6_addr))
return 1;
}));
- /* *INDENT-ON* */
return 0;
}
@@ -744,7 +742,6 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
bibe = 0;
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
bibe = nat64_db_bib_entry_find (db, &daddr, dport, proto, 0, 0);
@@ -752,7 +749,6 @@ nat64_in2out_tcp_udp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
if (bibe)
break;
}
- /* *INDENT-ON* */
if (!bibe)
return -1;
@@ -851,7 +847,6 @@ nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
daddr.ip4.as_u32 = bibe->out_addr.as_u32;
ste = 0;
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
ste = nat64_db_st_entry_find (db, &saddr, &daddr, sport, dport, proto,
@@ -860,7 +855,6 @@ nat64_in2out_icmp_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
if (ste)
break;
}
- /* *INDENT-ON* */
if (!ste)
return -1;
@@ -1006,7 +1000,6 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
daddr.ip4.as_u32 = ste->out_r_addr.as_u32;
bibe = 0;
- /* *INDENT-OFF* */
vec_foreach (db, nm->db)
{
bibe = nat64_db_bib_entry_find (db, &daddr, 0, proto, 0, 0);
@@ -1014,7 +1007,6 @@ nat64_in2out_unk_proto_hairpinning (vlib_main_t * vm, vlib_buffer_t * b,
if (bibe)
break;
}
- /* *INDENT-ON* */
if (!bibe)
return -1;
@@ -1226,7 +1218,6 @@ VLIB_NODE_FN (nat64_in2out_node) (vlib_main_t * vm,
return nat64_in2out_node_fn_inline (vm, node, frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_in2out_node) = {
.name = "nat64-in2out",
.vector_size = sizeof (u32),
@@ -1243,7 +1234,6 @@ VLIB_REGISTER_NODE (nat64_in2out_node) = {
[NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (nat64_in2out_slowpath_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1252,7 +1242,6 @@ VLIB_NODE_FN (nat64_in2out_slowpath_node) (vlib_main_t * vm,
return nat64_in2out_node_fn_inline (vm, node, frame, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
.name = "nat64-in2out-slowpath",
.vector_size = sizeof (u32),
@@ -1269,7 +1258,6 @@ VLIB_REGISTER_NODE (nat64_in2out_slowpath_node) = {
[NAT64_IN2OUT_NEXT_SLOWPATH] = "nat64-in2out-slowpath",
},
};
-/* *INDENT-ON* */
typedef struct nat64_in2out_frag_set_ctx_t_
{
@@ -1384,7 +1372,6 @@ VLIB_NODE_FN (nat64_in2out_handoff_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = {
.name = "nat64-in2out-handoff",
.vector_size = sizeof (u32),
@@ -1399,7 +1386,6 @@ VLIB_REGISTER_NODE (nat64_in2out_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/nat64/nat64_out2in.c b/src/plugins/nat/nat64/nat64_out2in.c
index 7cc2d3d3b70..a8faead4470 100644
--- a/src/plugins/nat/nat64/nat64_out2in.c
+++ b/src/plugins/nat/nat64/nat64_out2in.c
@@ -639,7 +639,6 @@ VLIB_NODE_FN (nat64_out2in_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_out2in_node) = {
.name = "nat64-out2in",
.vector_size = sizeof (u32),
@@ -655,7 +654,6 @@ VLIB_REGISTER_NODE (nat64_out2in_node) = {
[NAT64_OUT2IN_NEXT_IP4_LOOKUP] = "ip4-lookup",
},
};
-/* *INDENT-ON* */
typedef struct nat64_out2in_frag_set_ctx_t_
{
@@ -769,7 +767,6 @@ VLIB_NODE_FN (nat64_out2in_handoff_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat64_out2in_handoff_node) = {
.name = "nat64-out2in-handoff",
.vector_size = sizeof (u32),
@@ -784,7 +781,6 @@ VLIB_REGISTER_NODE (nat64_out2in_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/nat66/nat66_cli.c b/src/plugins/nat/nat66/nat66_cli.c
index 0ca40875584..9ef8d056685 100644
--- a/src/plugins/nat/nat66/nat66_cli.c
+++ b/src/plugins/nat/nat66/nat66_cli.c
@@ -330,16 +330,16 @@ nat66_show_static_mappings_command_fn (vlib_main_t * vm,
* @cliexpar
* @cliexstart{nat66}
* To enable NAT66 plugin
- * vpp# nat66 enable
+ * vpp# nat66 plugin enable
* To disable NAT66 plugin
- * vpp# nat66 disable
+ * vpp# nat66 plugin disable
* To enable NAT66 plugin with outside-vrf id 10
- * vpp# nat66 enable outside-vrf 10
+ * vpp# nat66 plugin enable outside-vrf 10
* @cliexend
?*/
VLIB_CLI_COMMAND (nat66_enable_disable_command, static) = {
- .path = "nat66",
- .short_help = "nat66 <enable [outside-vrf <vrf-id>]>|disable",
+ .path = "nat66 plugin",
+ .short_help = "nat66 plugin <enable [outside-vrf <vrf-id>]>|disable",
.function = nat66_enable_disable_command_fn,
};
diff --git a/src/plugins/nat/nat66/nat66_in2out.c b/src/plugins/nat/nat66/nat66_in2out.c
index 356100f89ef..aa2229e1997 100644
--- a/src/plugins/nat/nat66/nat66_in2out.c
+++ b/src/plugins/nat/nat66/nat66_in2out.c
@@ -94,14 +94,12 @@ nat66_not_translate (u32 rx_fib_index, ip6_address_t ip6_addr)
sw_if_index = fib_entry_get_resolving_interface (fei);
}
- /* *INDENT-OFF* */
pool_foreach (i, nm->interfaces)
{
/* NAT packet aimed at outside interface */
if (nat66_interface_is_outside (i) && sw_if_index == i->sw_if_index)
return 0;
}
- /* *INDENT-ON* */
return 1;
}
@@ -235,7 +233,6 @@ VLIB_NODE_FN (nat66_in2out_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat66_in2out_node) = {
.name = "nat66-in2out",
.vector_size = sizeof (u32),
@@ -250,7 +247,6 @@ VLIB_REGISTER_NODE (nat66_in2out_node) = {
[NAT66_IN2OUT_NEXT_IP6_LOOKUP] = "ip6-lookup",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/nat66/nat66_out2in.c b/src/plugins/nat/nat66/nat66_out2in.c
index 9d44b4880eb..820e0c79033 100644
--- a/src/plugins/nat/nat66/nat66_out2in.c
+++ b/src/plugins/nat/nat66/nat66_out2in.c
@@ -193,7 +193,6 @@ VLIB_NODE_FN (nat66_out2in_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nat66_out2in_node) = {
.name = "nat66-out2in",
.vector_size = sizeof (u32),
@@ -208,7 +207,6 @@ VLIB_REGISTER_NODE (nat66_out2in_node) = {
[NAT66_OUT2IN_NEXT_IP6_LOOKUP] = "ip6-lookup",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nat/pnat/pnat.api b/src/plugins/nat/pnat/pnat.api
index b6632159d7c..de555c41412 100644
--- a/src/plugins/nat/pnat/pnat.api
+++ b/src/plugins/nat/pnat/pnat.api
@@ -26,6 +26,7 @@ enum pnat_mask
PNAT_DPORT = 0x8,
PNAT_COPY_BYTE = 0x10,
PNAT_CLEAR_BYTE = 0x20,
+ PNAT_PROTO = 0x40,
};
enum pnat_attachment_point
@@ -65,6 +66,7 @@ autoendian define pnat_binding_add
vl_api_pnat_rewrite_tuple_t rewrite;
};
+
autoendian define pnat_binding_add_reply
{
u32 context;
@@ -72,6 +74,22 @@ autoendian define pnat_binding_add_reply
u32 binding_index;
};
+autoendian define pnat_binding_add_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_pnat_match_tuple_t match;
+ vl_api_pnat_rewrite_tuple_t rewrite;
+};
+
+
+autoendian define pnat_binding_add_v2_reply
+{
+ u32 context;
+ i32 retval;
+ u32 binding_index;
+};
+
autoendian autoreply define pnat_binding_del
{
u32 client_index;
diff --git a/src/plugins/nat/pnat/pnat.c b/src/plugins/nat/pnat/pnat.c
index 547b063f286..2b4a6b49e96 100644
--- a/src/plugins/nat/pnat/pnat.c
+++ b/src/plugins/nat/pnat/pnat.c
@@ -56,7 +56,9 @@ static pnat_mask_fast_t pnat_mask2fast(pnat_mask_t lookup_mask) {
m.as_u64[0] = 0xffffffff00000000;
if (lookup_mask & PNAT_DA)
m.as_u64[0] |= 0x00000000ffffffff;
- m.as_u64[1] = 0xffffffff00000000;
+ m.as_u64[1] = 0x00ffffff00000000;
+ if (lookup_mask & PNAT_PROTO)
+ m.as_u64[1] |= 0xff00000000000000;
if (lookup_mask & PNAT_SPORT)
m.as_u64[1] |= 0x00000000ffff0000;
if (lookup_mask & PNAT_DPORT)
diff --git a/src/plugins/nat/pnat/pnat.md b/src/plugins/nat/pnat/pnat.md
deleted file mode 100644
index 1e6bc130848..00000000000
--- a/src/plugins/nat/pnat/pnat.md
+++ /dev/null
@@ -1,37 +0,0 @@
-# PNAT: 1:1 match and rewrite programmable NAT {#pnat_doc}
-
-PNAT is a stateless statically configured, match and rewrite plugin.
-It uses a set of match and rewrite rules that are applied on the IP
-input and output feature paths. A PNAT rule is unidirectional.
-
-The match is done using up to a 6-tuple; IP source and destination address,
-IP protocol, transport layer source and destination ports, and FIB table / interface index.
-
-While multiple match/rewrite rules can be applied to an interface (per direction), the match
-pattern must be the same across all rules on that interface/direction.
-
-If required in the future, matching could be done using the general classifier, allowing matching
-on any protocol field, as well having an ordered set of match patterns.
-
-If the packet does not match, it will by default be passed to the next graph node in the feature chain.
-If desired a different miss behaviour could be implemented, e.g. similarly to dynamic NAT, the packet punted to a slow path.
-
-## Rewrite instructions
-
-``` c
-typedef enum {
- PNAT_INSTR_NONE = 1 << 0,
- PNAT_INSTR_SOURCE_ADDRESS = 1 << 1,
- PNAT_INSTR_SOURCE_PORT = 1 << 2,
- PNAT_INSTR_DESTINATION_ADDRESS = 1 << 3,
- PNAT_INSTR_DESTINATION_PORT = 1 << 4,
-} pnat_instructions_t;
-```
-
-These are the supported rewrite instructions.
-The IP checksum and the TCP/UDP checksum are incrementally updated as required.
-
-There are only a few "sanity checks" on the rewrites. For example, the rewrite in the outbound direction
-is applied on the ip-output feature chain. If one were to rewrite the IP destination address, the routing
-decision and determination of the next-hop has already been done, and the packet would still be forwarded
-to the original next-hop.
diff --git a/src/plugins/nat/pnat/pnat.rst b/src/plugins/nat/pnat/pnat.rst
new file mode 100644
index 00000000000..5cac047a236
--- /dev/null
+++ b/src/plugins/nat/pnat/pnat.rst
@@ -0,0 +1,45 @@
+PNAT 1:1 match & rewrite NAT
+============================
+
+PNAT is a stateless statically configured, match and rewrite plugin. It
+uses a set of match and rewrite rules that are applied on the IP input
+and output feature paths. A PNAT rule is unidirectional.
+
+The match is done using up to a 6-tuple; IP source and destination
+address, IP protocol, transport layer source and destination ports, and
+FIB table / interface index.
+
+While multiple match/rewrite rules can be applied to an interface (per
+direction), the match pattern must be the same across all rules on that
+interface/direction.
+
+If required in the future, matching could be done using the general
+classifier, allowing matching on any protocol field, as well having an
+ordered set of match patterns.
+
+If the packet does not match, it will by default be passed to the next
+graph node in the feature chain. If desired a different miss behaviour
+could be implemented, e.g. similarly to dynamic NAT, the packet punted
+to a slow path.
+
+Rewrite instructions
+--------------------
+
+.. code:: c
+
+ typedef enum {
+ PNAT_INSTR_NONE = 1 << 0,
+ PNAT_INSTR_SOURCE_ADDRESS = 1 << 1,
+ PNAT_INSTR_SOURCE_PORT = 1 << 2,
+ PNAT_INSTR_DESTINATION_ADDRESS = 1 << 3,
+ PNAT_INSTR_DESTINATION_PORT = 1 << 4,
+ } pnat_instructions_t;
+
+These are the supported rewrite instructions. The IP checksum and the
+TCP/UDP checksum are incrementally updated as required.
+
+There are only a few “sanity checks” on the rewrites. For example, the
+rewrite in the outbound direction is applied on the ip-output feature
+chain. If one were to rewrite the IP destination address, the routing
+decision and determination of the next-hop has already been done, and
+the packet would still be forwarded to the original next-hop.
diff --git a/src/plugins/nat/pnat/pnat_api.c b/src/plugins/nat/pnat/pnat_api.c
index eaefbaf4738..02e61219d1e 100644
--- a/src/plugins/nat/pnat/pnat_api.c
+++ b/src/plugins/nat/pnat/pnat_api.c
@@ -13,6 +13,7 @@
* limitations under the License.
*/
#include "pnat.h"
+#include <vnet/vnet.h>
#include <pnat/pnat.api_enum.h>
#include <pnat/pnat.api_types.h>
#include <vlibmemory/api.h>
@@ -22,6 +23,7 @@
#include <vnet/ip/reass/ip4_sv_reass.h>
#include <vnet/ip/reass/ip6_full_reass.h>
#include <vnet/ip/reass/ip6_sv_reass.h>
+#include <vpp/app/version.h>
/*
* This file contains the API handlers for the pnat.api
@@ -34,22 +36,33 @@ static void vl_api_pnat_binding_add_t_handler(vl_api_pnat_binding_add_t *mp) {
pnat_main_t *pm = &pnat_main;
vl_api_pnat_binding_add_reply_t *rmp;
u32 binding_index;
+
+ // for backward compatibility
+ if (mp->match.proto == 0)
+ mp->match.mask |= PNAT_PROTO;
+
int rv = pnat_binding_add(&mp->match, &mp->rewrite, &binding_index);
REPLY_MACRO2_END(VL_API_PNAT_BINDING_ADD_REPLY,
({ rmp->binding_index = binding_index; }));
}
static void
+vl_api_pnat_binding_add_v2_t_handler(vl_api_pnat_binding_add_t *mp) {
+ pnat_main_t *pm = &pnat_main;
+ vl_api_pnat_binding_add_reply_t *rmp;
+ u32 binding_index;
+ int rv = pnat_binding_add(&mp->match, &mp->rewrite, &binding_index);
+ REPLY_MACRO2_END(VL_API_PNAT_BINDING_ADD_V2_REPLY,
+ ({ rmp->binding_index = binding_index; }));
+}
+
+static void
vl_api_pnat_binding_attach_t_handler(vl_api_pnat_binding_attach_t *mp) {
pnat_main_t *pm = &pnat_main;
vl_api_pnat_binding_attach_reply_t *rmp;
int rv;
- /* Ensure that the interface exists */
- if (!vnet_sw_if_index_is_api_valid(mp->sw_if_index)) {
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
- goto bad_sw_if_index;
- }
+ VALIDATE_SW_IF_INDEX_END(mp);
rv =
pnat_binding_attach(mp->sw_if_index, mp->attachment, mp->binding_index);
@@ -64,11 +77,7 @@ vl_api_pnat_binding_detach_t_handler(vl_api_pnat_binding_detach_t *mp) {
vl_api_pnat_binding_detach_reply_t *rmp;
int rv;
- /* Ensure that the interface exists */
- if (!vnet_sw_if_index_is_api_valid(mp->sw_if_index)) {
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
- goto bad_sw_if_index;
- }
+ VALIDATE_SW_IF_INDEX_END(mp);
rv =
pnat_binding_detach(mp->sw_if_index, mp->attachment, mp->binding_index);
@@ -193,7 +202,7 @@ clib_error_t *pnat_plugin_api_hookup(vlib_main_t *vm) {
*/
#include <vnet/plugin/plugin.h>
VLIB_PLUGIN_REGISTER() = {
- .version = "0.0.1",
+ .version = VPP_BUILD_VER,
.description = "Policy 1:1 NAT",
};
diff --git a/src/plugins/nat/pnat/pnat_cli.c b/src/plugins/nat/pnat/pnat_cli.c
index 082f0778acb..ce9beee540d 100644
--- a/src/plugins/nat/pnat/pnat_cli.c
+++ b/src/plugins/nat/pnat/pnat_cli.c
@@ -122,6 +122,8 @@ uword unformat_pnat_match_tuple(unformat_input_t *input, va_list *args) {
t->mask |= PNAT_SA;
else if (unformat(input, "dst %U", unformat_ip4_address, &t->dst))
t->mask |= PNAT_DA;
+ else if (unformat(input, "proto %U", unformat_ip_protocol, &t->proto))
+ t->mask |= PNAT_PROTO;
else if (unformat(input, "sport %d", &sport)) {
if (sport == 0 || sport > 65535)
return 0;
@@ -132,9 +134,7 @@ uword unformat_pnat_match_tuple(unformat_input_t *input, va_list *args) {
return 0;
t->mask |= PNAT_DPORT;
t->dport = dport;
- } else if (unformat(input, "proto %U", unformat_ip_protocol, &t->proto))
- ;
- else
+ } else
break;
}
return 1;
diff --git a/src/plugins/nat/pnat/pnat_node.h b/src/plugins/nat/pnat/pnat_node.h
index 595189c2efb..f1afb450934 100644
--- a/src/plugins/nat/pnat/pnat_node.h
+++ b/src/plugins/nat/pnat/pnat_node.h
@@ -20,6 +20,7 @@
#include <pnat/pnat.api_enum.h>
#include <vnet/feature/feature.h>
#include <vnet/udp/udp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
#include <vnet/ip/format.h>
/* PNAT next-nodes */
diff --git a/src/plugins/nat/pnat/tests/pnat_test.c b/src/plugins/nat/pnat/tests/pnat_test.c
index e7d946941b9..f515dd6d376 100644
--- a/src/plugins/nat/pnat/tests/pnat_test.c
+++ b/src/plugins/nat/pnat/tests/pnat_test.c
@@ -25,6 +25,11 @@
#include <vnet/fib/ip4_fib.h>
#include "../pnat.h"
#include <pnat/pnat.api_enum.h> /* For error counters */
+#ifdef __FreeBSD__
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#endif /* __FreeBSD__ */
#include <arpa/inet.h>
#include "pnat_test_stubs.h"
@@ -569,7 +574,8 @@ int main(int argc, char **argv) {
ip_checksum_init(vm);
- u32 node_index = vlib_register_node(vm, &pnat_input_node);
+ u32 node_index =
+ vlib_register_node(vm, &pnat_input_node, "%s", pnat_input_node.name);
node = vlib_node_get_runtime(vm, node_index);
assert(node);
diff --git a/src/plugins/nat/pnat/tests/pnat_test_stubs.h b/src/plugins/nat/pnat/tests/pnat_test_stubs.h
index bfe1838ffa4..2dc59ac8586 100644
--- a/src/plugins/nat/pnat/tests/pnat_test_stubs.h
+++ b/src/plugins/nat/pnat/tests/pnat_test_stubs.h
@@ -19,12 +19,6 @@
void os_panic(void) {}
void os_exit(int code) {}
u32 ip4_fib_table_get_index_for_sw_if_index(u32 sw_if_index) { return 0; }
-#include <vpp/stats/stat_segment.h>
-clib_error_t *stat_segment_register_gauge(u8 *names,
- stat_segment_update_fn update_fn,
- u32 index) {
- return 0;
-};
#include <vnet/feature/feature.h>
vnet_feature_main_t feature_main;
void classify_get_trace_chain(void){};
diff --git a/src/plugins/nat/pnat/tests/test_genpackets.py b/src/plugins/nat/pnat/tests/test_genpackets.py
index 9d32d3e3656..40867317078 100755
--- a/src/plugins/nat/pnat/tests/test_genpackets.py
+++ b/src/plugins/nat/pnat/tests/test_genpackets.py
@@ -6,33 +6,35 @@ from importlib.machinery import SourceFileLoader
from scapy.all import *
from scapy.contrib.geneve import GENEVE
+
def hexstring(p):
s = bytes(p.__class__(p))
return ",".join("0x{:02x}".format(c) for c in s)
+
def output_test(filename, tests):
(name, ext) = os.path.basename(filename).split(".")
- print('/* DO NOT EDIT: automatically generated by test_genpackets.py */')
- print('/* clang-format off */')
- print('test_t tests_{}[] = {{'.format(name))
+ print("/* DO NOT EDIT: automatically generated by test_genpackets.py */")
+ print("/* clang-format off */")
+ print("test_t tests_{}[] = {{".format(name))
for t in tests:
- print(' {')
+ print(" {")
print(' .name = "{}",'.format(t[0]))
- print(' .nsend = {},'.format(len(t[1])))
- print(' .send = (char []){{{}}},'.format(hexstring(t[1])))
- print(' .nexpect = {},'.format(len(t[2])))
- print(' .expect = (char []){{{}}},'.format(hexstring(t[2])))
- print(' .expect_next_index = {}'.format(t[3]))
- print(' },')
- print('};')
- print('/* clang-format on */')
+ print(" .nsend = {},".format(len(t[1])))
+ print(" .send = (char []){{{}}},".format(hexstring(t[1])))
+ print(" .nexpect = {},".format(len(t[2])))
+ print(" .expect = (char []){{{}}},".format(hexstring(t[2])))
+ print(" .expect_next_index = {}".format(t[3]))
+ print(" },")
+ print("};")
+ print("/* clang-format on */")
+
# Read tests from file
for filename in sys.argv[1:]:
with open(filename) as f:
- content = f.read().replace('\n', '')
+ content = f.read().replace("\n", "")
tests = eval(content)
output_test(filename, tests)
-
diff --git a/src/plugins/npt66/CMakeLists.txt b/src/plugins/npt66/CMakeLists.txt
new file mode 100644
index 00000000000..aee784d96f0
--- /dev/null
+++ b/src/plugins/npt66/CMakeLists.txt
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright(c) 2023 Cisco Systems, Inc.
+
+add_vpp_plugin(npt66
+ SOURCES
+ npt66.c
+ npt66_api.c
+ npt66_cli.c
+ npt66_node.c
+
+
+ MULTIARCH_SOURCES
+ npt66_node.c
+
+ API_FILES
+ npt66.api
+)
diff --git a/src/plugins/npt66/FEATURE.yaml b/src/plugins/npt66/FEATURE.yaml
new file mode 100644
index 00000000000..8874ae22017
--- /dev/null
+++ b/src/plugins/npt66/FEATURE.yaml
@@ -0,0 +1,16 @@
+---
+name: NPTv6
+maintainer: Ole Troan <otroan@employees.org>
+features:
+ - NPTv6
+
+description: "This plugin implements NPTv6 as described in RFC6296.
+ It supports arbitrary prefix lengths. And performs an
+ algorithmic mapping between internal and external IPv6 prefixes.
+ The mapping is checksum neutral.
+ The implementation is currently limited to a single statically configured binding
+ per interface.
+ A typical IPv6 CE use case, the external prefix would be learnt via DHCP PD
+ "
+state: development
+properties: [API, CLI, MULTITHREAD]
diff --git a/src/plugins/npt66/npt66.api b/src/plugins/npt66/npt66.api
new file mode 100644
index 00000000000..63640ac2097
--- /dev/null
+++ b/src/plugins/npt66/npt66.api
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright(c) 2023 Cisco Systems, Inc.
+
+option version = "0.0.1";
+
+import "vnet/interface_types.api";
+import "vnet/ip/ip_types.api";
+
+autoendian autoreply define npt66_binding_add_del
+{
+ u32 client_index;
+ u32 context;
+
+ bool is_add;
+ vl_api_interface_index_t sw_if_index;
+ vl_api_ip6_prefix_t internal;
+ vl_api_ip6_prefix_t external;
+};
+
+counters npt66 {
+ rx {
+ severity info;
+ type counter64;
+ units "packets";
+ description "packets translated from external to internal";
+ };
+ tx {
+ severity info;
+ type counter64;
+ units "packets";
+ description "packets translated from internal to external";
+ };
+ translation {
+ severity error;
+ type counter64;
+ units "packets";
+ description "packet translation failed";
+ };
+
+}; \ No newline at end of file
diff --git a/src/plugins/npt66/npt66.c b/src/plugins/npt66/npt66.c
new file mode 100644
index 00000000000..277fce496fc
--- /dev/null
+++ b/src/plugins/npt66/npt66.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright(c) 2023 Cisco Systems, Inc.
+
+/*
+ * npt66.c: NPT66 plugin
+ * An implementation of Network Prefix Translation for IPv6-to-IPv6 (NPTv6) as
+ * specified in RFC6296.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <vlib/vlib.h>
+#include <vnet/feature/feature.h>
+#include <vppinfra/pool.h>
+#include "npt66.h"
+
+static int
+npt66_feature_enable_disable (u32 sw_if_index, bool is_add)
+{
+ if (vnet_feature_enable_disable ("ip6-unicast", "npt66-input", sw_if_index,
+ is_add, 0, 0) != 0)
+ return -1;
+ if (vnet_feature_enable_disable ("ip6-output", "npt66-output", sw_if_index,
+ is_add, 0, 0) != 0)
+ return -1;
+ return 0;
+}
+
+static void
+ipv6_prefix_zero (ip6_address_t *address, int prefix_len)
+{
+ int byte_index = prefix_len / 8;
+ int bit_offset = prefix_len % 8;
+ uint8_t mask = (1 << (8 - bit_offset)) - 1;
+ if (byte_index < 16)
+ {
+ address->as_u8[byte_index] &= mask;
+ for (int i = byte_index + 1; i < 16; i++)
+ {
+ address->as_u8[i] = 0;
+ }
+ }
+}
+
+int
+npt66_binding_add_del (u32 sw_if_index, ip6_address_t *internal,
+ int internal_plen, ip6_address_t *external,
+ int external_plen, bool is_add)
+{
+ npt66_main_t *nm = &npt66_main;
+ int rv = 0;
+
+ /* Currently limited to a single binding per interface */
+ npt66_binding_t *b = npt66_interface_by_sw_if_index (sw_if_index);
+
+ if (is_add)
+ {
+ bool configure_feature = false;
+ /* Ensure prefix lengths are less than or equal to a /64 */
+ if (internal_plen > 64 || external_plen > 64)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ /* Create a binding entry (or update existing) */
+ if (!b)
+ {
+ pool_get_zero (nm->bindings, b);
+ configure_feature = true;
+ }
+ b->internal = *internal;
+ b->internal_plen = internal_plen;
+ b->external = *external;
+ b->external_plen = external_plen;
+ b->sw_if_index = sw_if_index;
+
+ ipv6_prefix_zero (&b->internal, internal_plen);
+ ipv6_prefix_zero (&b->external, external_plen);
+ vec_validate_init_empty (nm->interface_by_sw_if_index, sw_if_index, ~0);
+ nm->interface_by_sw_if_index[sw_if_index] = b - nm->bindings;
+
+ uword delta = 0;
+ delta = ip_csum_add_even (delta, b->external.as_u64[0]);
+ delta = ip_csum_add_even (delta, b->external.as_u64[1]);
+ delta = ip_csum_sub_even (delta, b->internal.as_u64[0]);
+ delta = ip_csum_sub_even (delta, b->internal.as_u64[1]);
+ delta = ip_csum_fold (delta);
+ b->delta = delta;
+
+ if (configure_feature)
+ rv = npt66_feature_enable_disable (sw_if_index, is_add);
+ }
+ else
+ {
+ /* Delete a binding entry */
+ npt66_binding_t *b = npt66_interface_by_sw_if_index (sw_if_index);
+ if (!b)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+ nm->interface_by_sw_if_index[sw_if_index] = ~0;
+ pool_put (nm->bindings, b);
+ rv = npt66_feature_enable_disable (sw_if_index, is_add);
+ }
+
+ return rv;
+}
+
+/*
+ * Do a lookup in the interface vector (interface_by_sw_if_index)
+ * and return pool entry.
+ */
+npt66_binding_t *
+npt66_interface_by_sw_if_index (u32 sw_if_index)
+{
+ npt66_main_t *nm = &npt66_main;
+
+ if (!nm->interface_by_sw_if_index ||
+ sw_if_index > (vec_len (nm->interface_by_sw_if_index) - 1))
+ return 0;
+ u32 index = nm->interface_by_sw_if_index[sw_if_index];
+ if (index == ~0)
+ return 0;
+ if (pool_is_free_index (nm->bindings, index))
+ return 0;
+ return pool_elt_at_index (nm->bindings, index);
+}
diff --git a/src/plugins/npt66/npt66.h b/src/plugins/npt66/npt66.h
new file mode 100644
index 00000000000..428dadb1672
--- /dev/null
+++ b/src/plugins/npt66/npt66.h
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright(c) 2023 Cisco Systems, Inc.
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip6_packet.h>
+
+typedef struct
+{
+ u32 sw_if_index;
+ ip6_address_t internal;
+ ip6_address_t external;
+ u8 internal_plen;
+ u8 external_plen;
+ uword delta;
+} npt66_binding_t;
+typedef struct
+{
+ u32 *interface_by_sw_if_index;
+ npt66_binding_t *bindings;
+ u16 msg_id_base;
+} npt66_main_t;
+
+extern npt66_main_t npt66_main;
+
+int npt66_binding_add_del (u32 sw_if_index, ip6_address_t *internal,
+ int internal_plen, ip6_address_t *external,
+ int external_plen, bool is_add);
+npt66_binding_t *npt66_interface_by_sw_if_index (u32 sw_if_index);
diff --git a/src/plugins/npt66/npt66_api.c b/src/plugins/npt66/npt66_api.c
new file mode 100644
index 00000000000..ab27cec616e
--- /dev/null
+++ b/src/plugins/npt66/npt66_api.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright(c) 2023 Cisco Systems, Inc.
+
+#include <stdbool.h>
+#include <npt66/npt66.h>
+#include <vnet/vnet.h>
+#include <npt66/npt66.api_enum.h>
+#include <npt66/npt66.api_types.h>
+#include <vlibmemory/api.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip_types_api.h>
+#include <vpp/app/version.h>
+
+npt66_main_t npt66_main;
+
+/*
+ * This file contains the API handlers for the pnat.api
+ */
+
+#define REPLY_MSG_ID_BASE npt66_main.msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static void
+vl_api_npt66_binding_add_del_t_handler (vl_api_npt66_binding_add_del_t *mp)
+{
+ vl_api_npt66_binding_add_del_reply_t *rmp;
+ int rv;
+ clib_warning ("Interface index: %d", mp->sw_if_index);
+ VALIDATE_SW_IF_INDEX_END (mp);
+
+ rv = npt66_binding_add_del (
+ mp->sw_if_index, (ip6_address_t *) &mp->internal.address, mp->internal.len,
+ (ip6_address_t *) &mp->external.address, mp->external.len, mp->is_add);
+
+bad_sw_if_index:
+ REPLY_MACRO_END (VL_API_NPT66_BINDING_ADD_DEL_REPLY);
+}
+
+/* API definitions */
+#include <vnet/format_fns.h>
+#include <npt66/npt66.api.c>
+
+/* Set up the API message handling tables */
+clib_error_t *
+npt66_plugin_api_hookup (vlib_main_t *vm)
+{
+ npt66_main_t *nm = &npt66_main;
+
+ nm->msg_id_base = setup_message_id_table ();
+ return 0;
+}
+
+/*
+ * Register the plugin and hook up the API
+ */
+#include <vnet/plugin/plugin.h>
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "NPTv6",
+ .default_disabled = 1,
+};
+
+clib_error_t *
+npt66_init (vlib_main_t *vm)
+{
+ npt66_main_t *nm = &npt66_main;
+ memset (nm, 0, sizeof (*nm));
+
+ return npt66_plugin_api_hookup (vm);
+}
+
+VLIB_INIT_FUNCTION (npt66_init);
diff --git a/src/plugins/npt66/npt66_cli.c b/src/plugins/npt66/npt66_cli.c
new file mode 100644
index 00000000000..b875eb924c6
--- /dev/null
+++ b/src/plugins/npt66/npt66_cli.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright(c) 2023 Cisco Systems, Inc.
+
+#include <stdbool.h>
+#include <vlib/vlib.h>
+#include <vnet/feature/feature.h>
+#include <vnet/ip/ip.h>
+#include <vppinfra/clib_error.h>
+#include "npt66.h"
+
+static clib_error_t *
+set_npt66_binding_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ bool internal_set = false, external_set = false;
+ bool add = true;
+ u32 sw_if_index = ~0;
+ ip6_address_t internal, external;
+ int internal_plen = 0, external_plen = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "internal %U/%d", unformat_ip6_address,
+ &internal, &internal_plen))
+ internal_set = true;
+ else if (unformat (line_input, "external %U/%d", unformat_ip6_address,
+ &external, &external_plen))
+ external_set = true;
+ else if (unformat (line_input, "interface %U",
+ unformat_vnet_sw_interface, vnet_get_main (),
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "del"))
+ {
+ add = false;
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+ if (sw_if_index == ~0)
+ {
+ error = clib_error_return (0, "interface is required `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ if (!internal_set)
+ {
+ error = clib_error_return (0, "missing parameter: internal `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ if (!external_set)
+ {
+ error = clib_error_return (0, "missing parameter: external `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+
+ int rv = npt66_binding_add_del (sw_if_index, &internal, internal_plen,
+ &external, external_plen, add);
+ if (rv)
+ {
+ error = clib_error_return (0, "Adding binding failed %d", rv);
+ goto done;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (set_npt66_binding_command, static) = {
+ .path = "set npt66 binding",
+ .short_help = "set npt66 binding interface <name> internal <pfx> "
+ "external <pfx> [del]",
+ .function = set_npt66_binding_command_fn,
+};
+
+static u8 *
+format_npt66_binding (u8 *s, va_list *args)
+{
+ u32 index = va_arg (*args, u32);
+ npt66_binding_t *b = va_arg (*args, npt66_binding_t *);
+ s = format (s, "[%d] internal: %U/%d external: %U/%d", index,
+ format_ip6_address, &b->internal, b->internal_plen,
+ format_ip6_address, &b->external, b->external_plen);
+ return s;
+}
+
+static clib_error_t *
+show_npt66_bindings_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ npt66_main_t *nm = &npt66_main;
+ npt66_binding_t *b;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ pool_foreach (b, nm->bindings)
+ {
+ vlib_cli_output (vm, "%U", format_npt66_binding, b - nm->bindings, b);
+ }
+ return error;
+}
+
+VLIB_CLI_COMMAND (show_npt66_bindings_command, static) = {
+ .path = "show npt66 bindings",
+ .short_help = "show npt66 bindings",
+ .function = show_npt66_bindings_command_fn,
+};
diff --git a/src/plugins/npt66/npt66_node.c b/src/plugins/npt66/npt66_node.c
new file mode 100644
index 00000000000..f74f9143998
--- /dev/null
+++ b/src/plugins/npt66/npt66_node.c
@@ -0,0 +1,372 @@
+// SPDX-License-Identifier: Apache-2.0
+// Copyright(c) 2023 Cisco Systems, Inc.
+
+// This file contains the implementation of the NPT66 node.
+// RFC6296: IPv6-to-IPv6 Network Prefix Translation (NPTv6)
+
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip6.h>
+#include <vnet/ip/ip6_packet.h>
+
+#include <npt66/npt66.h>
+#include <npt66/npt66.api_enum.h>
+
+typedef struct
+{
+ u32 pool_index;
+ ip6_address_t internal;
+ ip6_address_t external;
+} npt66_trace_t;
+
+static inline u8 *
+format_npt66_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ npt66_trace_t *t = va_arg (*args, npt66_trace_t *);
+
+ if (t->pool_index != ~0)
+ s = format (s, "npt66: index %d internal: %U external: %U\n",
+ t->pool_index, format_ip6_address, &t->internal,
+ format_ip6_address, &t->external);
+ else
+ s = format (s, "npt66: index %d (binding not found)\n", t->pool_index);
+ return s;
+}
+
+/* NPT66 next-nodes */
+typedef enum
+{
+ NPT66_NEXT_DROP,
+ NPT66_N_NEXT
+} npt66_next_t;
+
+static ip6_address_t
+ip6_prefix_copy (ip6_address_t dest, ip6_address_t src, int plen)
+{
+ int bytes_to_copy = plen / 8;
+ int residual_bits = plen % 8;
+
+ // Copy full bytes
+ for (int i = 0; i < bytes_to_copy; i++)
+ {
+ dest.as_u8[i] = src.as_u8[i];
+ }
+
+ // Handle the residual bits, if any
+ if (residual_bits)
+ {
+ uint8_t mask = 0xFF << (8 - residual_bits);
+ dest.as_u8[bytes_to_copy] = (dest.as_u8[bytes_to_copy] & ~mask) |
+ (src.as_u8[bytes_to_copy] & mask);
+ }
+ return dest;
+}
+static int
+ip6_prefix_cmp (ip6_address_t a, ip6_address_t b, int plen)
+{
+ int bytes_to_compare = plen / 8;
+ int residual_bits = plen % 8;
+
+ // Compare full bytes
+ for (int i = 0; i < bytes_to_compare; i++)
+ {
+ if (a.as_u8[i] != b.as_u8[i])
+ {
+ return 0; // prefixes are not identical
+ }
+ }
+
+ // Compare the residual bits, if any
+ if (residual_bits)
+ {
+ uint8_t mask = 0xFF << (8 - residual_bits);
+ if ((a.as_u8[bytes_to_compare] & mask) !=
+ (b.as_u8[bytes_to_compare] & mask))
+ {
+ return 0; // prefixes are not identical
+ }
+ }
+ return 1; // prefixes are identical
+}
+
+static int
+npt66_adjust_checksum (int plen, bool add, ip_csum_t delta,
+ ip6_address_t *address)
+{
+ if (plen <= 48)
+ {
+ // TODO: Check for 0xFFFF
+ if (address->as_u16[3] == 0xffff)
+ return -1;
+ address->as_u16[3] = add ? ip_csum_add_even (address->as_u16[3], delta) :
+ ip_csum_sub_even (address->as_u16[3], delta);
+ }
+ else
+ {
+ /* For prefixes longer than 48 find a 16-bit word in the interface id */
+ for (int i = 4; i < 8; i++)
+ {
+ if (address->as_u16[i] == 0xffff)
+ continue;
+ address->as_u16[i] = add ?
+ ip_csum_add_even (address->as_u16[i], delta) :
+ ip_csum_sub_even (address->as_u16[i], delta);
+ break;
+ }
+ }
+ return 0;
+}
+
+static int
+npt66_translate (ip6_header_t *ip, npt66_binding_t *binding, int dir)
+{
+ int rv = 0;
+ if (dir == VLIB_TX)
+ {
+ if (!ip6_prefix_cmp (ip->src_address, binding->internal,
+ binding->internal_plen))
+ {
+ clib_warning (
+ "npt66_translate: src address is not internal (%U -> %U)",
+ format_ip6_address, &ip->src_address, format_ip6_address,
+ &ip->dst_address);
+ goto done;
+ }
+ ip->src_address = ip6_prefix_copy (ip->src_address, binding->external,
+ binding->external_plen);
+ /* Checksum neutrality */
+ rv = npt66_adjust_checksum (binding->internal_plen, false,
+ binding->delta, &ip->src_address);
+ }
+ else
+ {
+ if (!ip6_prefix_cmp (ip->dst_address, binding->external,
+ binding->external_plen))
+ {
+ clib_warning (
+ "npt66_translate: dst address is not external (%U -> %U)",
+ format_ip6_address, &ip->src_address, format_ip6_address,
+ &ip->dst_address);
+ goto done;
+ }
+ ip->dst_address = ip6_prefix_copy (ip->dst_address, binding->internal,
+ binding->internal_plen);
+ rv = npt66_adjust_checksum (binding->internal_plen, true, binding->delta,
+ &ip->dst_address);
+ }
+done:
+ return rv;
+}
+
+static int
+npt66_icmp6_translate (vlib_buffer_t *b, ip6_header_t *outer_ip,
+ icmp46_header_t *icmp, npt66_binding_t *binding,
+ int dir)
+{
+ ip6_header_t *ip = (ip6_header_t *) (icmp + 2);
+ int rv = 0;
+ vlib_main_t *vm = vlib_get_main ();
+
+ if (clib_net_to_host_u16 (outer_ip->payload_length) <
+ sizeof (icmp46_header_t) + 4 + sizeof (ip6_header_t))
+ {
+ clib_warning ("ICMP6 payload too short");
+ return -1;
+ }
+
+ // Validate checksums
+ int bogus_length;
+ u16 sum16;
+ sum16 = ip6_tcp_udp_icmp_compute_checksum (vm, b, outer_ip, &bogus_length);
+ if (sum16 != 0 && sum16 != 0xffff)
+ {
+ clib_warning ("ICMP6 checksum failed");
+ return -1;
+ }
+ if (dir == VLIB_RX)
+ {
+ if (!ip6_prefix_cmp (ip->src_address, binding->external,
+ binding->external_plen))
+ {
+ clib_warning (
+ "npt66_icmp6_translate: src address is not internal (%U -> %U)",
+ format_ip6_address, &ip->src_address, format_ip6_address,
+ &ip->dst_address);
+ goto done;
+ }
+ ip->src_address = ip6_prefix_copy (ip->src_address, binding->internal,
+ binding->internal_plen);
+ /* Checksum neutrality */
+ rv = npt66_adjust_checksum (binding->internal_plen, true, binding->delta,
+ &ip->src_address);
+ }
+ else
+ {
+ if (!ip6_prefix_cmp (ip->dst_address, binding->external,
+ binding->external_plen))
+ {
+ clib_warning (
+ "npt66_icmp6_translate: dst address is not external (%U -> %U)",
+ format_ip6_address, &ip->src_address, format_ip6_address,
+ &ip->dst_address);
+ goto done;
+ }
+ ip->dst_address = ip6_prefix_copy (ip->dst_address, binding->internal,
+ binding->internal_plen);
+ rv = npt66_adjust_checksum (binding->internal_plen, false,
+ binding->delta, &ip->dst_address);
+ }
+done:
+
+ return rv;
+}
+
+/*
+ * Lookup the packet tuple in the flow cache, given the lookup mask.
+ * If a binding is found, rewrite the packet according to instructions,
+ * otherwise follow configured default action (forward, punt or drop)
+ */
+// TODO: Make use of SVR configurable
+static_always_inline uword
+npt66_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, int dir)
+{
+ npt66_main_t *nm = &npt66_main;
+ u32 n_left_from, *from;
+ u16 nexts[VLIB_FRAME_SIZE] = { 0 }, *next = nexts;
+ u32 pool_indicies[VLIB_FRAME_SIZE], *pi = pool_indicies;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ ip6_header_t *ip;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ vlib_get_buffers (vm, from, b, n_left_from);
+ npt66_binding_t *binding;
+
+ /* Stage 1: build vector of flow hash (based on lookup mask) */
+ while (n_left_from > 0)
+ {
+ u32 sw_if_index = vnet_buffer (b[0])->sw_if_index[dir];
+ u32 iph_offset =
+ dir == VLIB_TX ? vnet_buffer (b[0])->ip.save_rewrite_length : 0;
+ ip = (ip6_header_t *) (vlib_buffer_get_current (b[0]) + iph_offset);
+ binding = npt66_interface_by_sw_if_index (sw_if_index);
+ ASSERT (binding);
+ *pi = binding - nm->bindings;
+
+ /* By default pass packet to next node in the feature chain */
+ vnet_feature_next_u16 (next, b[0]);
+ int rv;
+ icmp46_header_t *icmp = (icmp46_header_t *) (ip + 1);
+ if (ip->protocol == IP_PROTOCOL_ICMP6 && icmp->type < 128)
+ {
+ rv = npt66_icmp6_translate (b[0], ip, icmp, binding, dir);
+ if (rv < 0)
+ {
+ clib_warning ("ICMP6 npt66_translate failed");
+ *next = NPT66_NEXT_DROP;
+ goto next;
+ }
+ }
+ rv = npt66_translate (ip, binding, dir);
+
+ if (rv < 0)
+ {
+ vlib_node_increment_counter (vm, node->node_index,
+ NPT66_ERROR_TRANSLATION, 1);
+ *next = NPT66_NEXT_DROP;
+ goto next;
+ }
+ else if (dir == VLIB_TX)
+ vlib_node_increment_counter (vm, node->node_index, NPT66_ERROR_TX, 1);
+ else
+ vlib_node_increment_counter (vm, node->node_index, NPT66_ERROR_RX, 1);
+
+ next:
+ next += 1;
+ n_left_from -= 1;
+ b += 1;
+ pi += 1;
+ }
+
+ /* Packet trace */
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+ {
+ u32 i;
+ b = bufs;
+ pi = pool_indicies;
+
+ for (i = 0; i < frame->n_vectors; i++)
+ {
+ if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ npt66_trace_t *t = vlib_add_trace (vm, node, b[0], sizeof (*t));
+ if (*pi != ~0)
+ {
+ if (!pool_is_free_index (nm->bindings, *pi))
+ {
+ npt66_binding_t *tr =
+ pool_elt_at_index (nm->bindings, *pi);
+ t->internal = tr->internal;
+ t->external = tr->external;
+ }
+ }
+ t->pool_index = *pi;
+
+ b += 1;
+ pi += 1;
+ }
+ else
+ break;
+ }
+ }
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (npt66_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return npt66_node_inline (vm, node, frame, VLIB_RX);
+}
+VLIB_NODE_FN (npt66_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return npt66_node_inline (vm, node, frame, VLIB_TX);
+}
+
+VLIB_REGISTER_NODE(npt66_input_node) = {
+ .name = "npt66-input",
+ .vector_size = sizeof(u32),
+ .format_trace = format_npt66_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = NPT66_N_ERROR,
+ .error_counters = npt66_error_counters,
+ .n_next_nodes = NPT66_N_NEXT,
+ .next_nodes =
+ {
+ [NPT66_NEXT_DROP] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (npt66_output_node) = {
+ .name = "npt66-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_npt66_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = NPT66_N_ERROR,
+ .error_counters = npt66_error_counters,
+ .sibling_of = "npt66-input",
+};
+
+/* Hook up features */
+VNET_FEATURE_INIT (npt66_input, static) = {
+ .arc_name = "ip6-unicast",
+ .node_name = "npt66-input",
+};
+VNET_FEATURE_INIT (npt66_output, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "npt66-output",
+};
diff --git a/src/plugins/nsh/FEATURE.yaml b/src/plugins/nsh/FEATURE.yaml
index a6ef3749952..986008e41a5 100644
--- a/src/plugins/nsh/FEATURE.yaml
+++ b/src/plugins/nsh/FEATURE.yaml
@@ -8,6 +8,7 @@ features:
- NSH Proxy
- NSH OAM
- NSH Metadata
+ - Requires vxlan_plugin.so to run
description: "NSH for SFC"
state: production
diff --git a/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export.c b/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export.c
index cb2bb2107c9..d2b59ababa9 100644
--- a/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export.c
+++ b/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export.c
@@ -136,14 +136,12 @@ set_nsh_md2_ioam_export_ipfix_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_nsh_md2_ioam_ipfix_command, static) =
{
.path = "set nsh-md2-ioam export ipfix",
.short_help = "set nsh-md2-ioam export ipfix collector <ip4-address> src <ip4-address>",
.function = set_nsh_md2_ioam_export_ipfix_command_fn,
};
-/* *INDENT-ON* */
#define IPFIX_NSH_MD2_IOAM_EXPORT_ID 274 // TODO: Move this to ioam/ioam_export.h
diff --git a/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export_thread.c b/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export_thread.c
index 54dc01bc021..40ff8c087dd 100644
--- a/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export_thread.c
+++ b/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_export_thread.c
@@ -33,14 +33,12 @@ nsh_md2_ioam_export_process (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nsh_md2_ioam_export_process_node, static) =
{
.function = nsh_md2_ioam_export_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "nsh-md2-ioam-export-process",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_node.c b/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_node.c
index 06874f64a73..b3bf8c59b96 100644
--- a/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_node.c
+++ b/src/plugins/nsh/nsh-md2-ioam/export-nsh-md2-ioam/nsh_md2_ioam_node.c
@@ -129,7 +129,6 @@ nsh_md2_ioam_export_node_fn (vlib_main_t * vm,
/*
* Node for iOAM export
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nsh_md2_ioam_export_node) =
{
.function = nsh_md2_ioam_export_node_fn,
@@ -144,7 +143,6 @@ VLIB_REGISTER_NODE (nsh_md2_ioam_export_node) =
.next_nodes =
{[EXPORT_NEXT_NSH_MD2_IOAM_INPUT] = "nsh-pop"},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nsh/nsh-md2-ioam/md2_ioam_transit.c b/src/plugins/nsh/nsh-md2-ioam/md2_ioam_transit.c
index 876e00324e3..6c372a5bd4e 100644
--- a/src/plugins/nsh/nsh-md2-ioam/md2_ioam_transit.c
+++ b/src/plugins/nsh/nsh-md2-ioam/md2_ioam_transit.c
@@ -53,13 +53,11 @@ typedef enum
} nsh_md2_ioam_encap_transit_next_t;
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (nsh_md2_ioam_encap_transit, static) =
{
.arc_name = "ip4-output",
.node_name = "nsh-md2-ioam-encap-transit",
};
-/* *INDENT-ON* */
static uword
@@ -162,7 +160,6 @@ nsh_md2_ioam_encap_transit (vlib_main_t * vm,
}
extern u8 *format_nsh_node_map_trace (u8 * s, va_list * args);
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (nsh_md2_ioam_encap_transit_node) = {
.function = nsh_md2_ioam_encap_transit,
.name = "nsh-md2-ioam-encap-transit",
@@ -181,7 +178,6 @@ VLIB_REGISTER_NODE (nsh_md2_ioam_encap_transit_node) = {
},
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam.c b/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam.c
index 63e6a98fe27..2aac6760546 100644
--- a/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam.c
+++ b/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam.c
@@ -449,7 +449,6 @@ static clib_error_t *nsh_md2_ioam_set_transit_rewrite_command_fn
return rv;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (nsh_md2_ioam_set_transit_rewrite_cmd, static) = {
.path = "set nsh-md2-ioam-transit",
.short_help = "set nsh-ioam-lisp-gpe-transit dst-ip <dst_ip> [outer-fib-index <outer_fib_index>] [disable]",
@@ -500,7 +499,7 @@ void
nsh_md2_ioam_interface_init (void)
{
nsh_md2_ioam_main_t *hm = &nsh_md2_ioam_main;
- hm->fib_entry_type = fib_node_register_new_type (&nsh_md2_ioam_vft);
+ hm->fib_entry_type = fib_node_register_new_type ("nsh", &nsh_md2_ioam_vft);
return;
}
diff --git a/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_api.c b/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_api.c
index 9ed835bd98f..36c221619e5 100644
--- a/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_api.c
+++ b/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_api.c
@@ -38,7 +38,6 @@
#undef vl_endianfun
/* instantiate all the print functions we know about */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define vl_printfun
#include <nsh/nsh.api.h>
#undef vl_printfun
diff --git a/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_trace.c b/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_trace.c
index aad3cffb1db..2c553b39e45 100644
--- a/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_trace.c
+++ b/src/plugins/nsh/nsh-md2-ioam/nsh_md2_ioam_trace.c
@@ -38,7 +38,6 @@ typedef union
} time_u64_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
u16 class;
u8 type;
@@ -48,7 +47,6 @@ typedef CLIB_PACKED(struct {
u8 reserve;
u32 elts[0]; /* Variable type. So keep it generic */
}) nsh_md2_ioam_trace_option_t;
-/* *INDENT-ON* */
#define foreach_nsh_md2_ioam_trace_stats \
@@ -333,13 +331,11 @@ nsh_md2_ioam_show_ioam_trace_cmd_fn (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (nsh_md2_ioam_show_ioam_trace_cmd, static) = {
.path = "show ioam nsh-lisp-gpe trace",
.short_help = "iOAM trace statistics",
.function = nsh_md2_ioam_show_ioam_trace_cmd_fn,
};
-/* *INDENT-ON* */
int
@@ -376,12 +372,10 @@ nsh_md2_ioam_trace_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (nsh_md2_ioam_trace_init) =
{
.runs_after = VLIB_INITS ("nsh_init", "nsh_md2_ioam_init"),
};
-/* *INDENT-ON* */
int
nsh_md2_ioam_trace_profile_cleanup (void)
diff --git a/src/plugins/nsh/nsh.c b/src/plugins/nsh/nsh.c
index ea084e4a553..a2c24e27b26 100644
--- a/src/plugins/nsh/nsh.c
+++ b/src/plugins/nsh/nsh.c
@@ -18,8 +18,8 @@
#include <vnet/vnet.h>
#include <vnet/plugin/plugin.h>
#include <nsh/nsh.h>
-#include <vnet/gre/gre.h>
-#include <vnet/vxlan/vxlan.h>
+#include <gre/gre.h>
+#include <vxlan/vxlan.h>
#include <vnet/vxlan-gpe/vxlan_gpe.h>
#include <vnet/l2/l2_classify.h>
#include <vnet/adj/adj.h>
@@ -164,14 +164,12 @@ format_nsh_tunnel_with_length (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (nsh_hw_class) = {
.name = "NSH",
.format_header = format_nsh_tunnel_with_length,
.build_rewrite = default_build_rewrite,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
void
nsh_md2_set_next_ioam_export_override (uword next)
@@ -184,10 +182,11 @@ nsh_md2_set_next_ioam_export_override (uword next)
clib_error_t *
nsh_init (vlib_main_t * vm)
{
- vlib_node_t *node;
+ vlib_node_t *node, *gre4_input, *gre6_input;
nsh_main_t *nm = &nsh_main;
clib_error_t *error = 0;
uword next_node;
+ vlib_node_registration_t *vxlan4_input, *vxlan6_input;
/* Init the main structures from VPP */
nm->vlib_main = vm;
@@ -239,19 +238,32 @@ nsh_init (vlib_main_t * vm)
vlib_node_add_next (vm, vxlan6_gpe_input_node.index,
nsh_aware_vnf_proxy_node.index);
- vlib_node_add_next (vm, gre4_input_node.index, nm->nsh_input_node_index);
- vlib_node_add_next (vm, gre4_input_node.index, nm->nsh_proxy_node_index);
- vlib_node_add_next (vm, gre4_input_node.index,
- nsh_aware_vnf_proxy_node.index);
-
- vlib_node_add_next (vm, gre6_input_node.index, nm->nsh_input_node_index);
- vlib_node_add_next (vm, gre6_input_node.index, nm->nsh_proxy_node_index);
- vlib_node_add_next (vm, gre6_input_node.index,
- nsh_aware_vnf_proxy_node.index);
+ gre4_input = vlib_get_node_by_name (vm, (u8 *) "gre4-input");
+ gre6_input = vlib_get_node_by_name (vm, (u8 *) "gre6-input");
+ if (gre4_input == 0 || gre6_input == 0)
+ {
+ error = clib_error_return (0, "gre_plugin.so is not loaded");
+ return error;
+ }
+ vlib_node_add_next (vm, gre4_input->index, nm->nsh_input_node_index);
+ vlib_node_add_next (vm, gre4_input->index, nm->nsh_proxy_node_index);
+ vlib_node_add_next (vm, gre4_input->index, nsh_aware_vnf_proxy_node.index);
+ vlib_node_add_next (vm, gre6_input->index, nm->nsh_input_node_index);
+ vlib_node_add_next (vm, gre6_input->index, nm->nsh_proxy_node_index);
+ vlib_node_add_next (vm, gre6_input->index, nsh_aware_vnf_proxy_node.index);
/* Add NSH-Proxy support */
- vlib_node_add_next (vm, vxlan4_input_node.index, nm->nsh_proxy_node_index);
- vlib_node_add_next (vm, vxlan6_input_node.index, nm->nsh_proxy_node_index);
+ vxlan4_input =
+ vlib_get_plugin_symbol ("vxlan_plugin.so", "vxlan4_input_node");
+ vxlan6_input =
+ vlib_get_plugin_symbol ("vxlan_plugin.so", "vxlan6_input_node");
+ if (vxlan4_input == 0 || vxlan6_input == 0)
+ {
+ error = clib_error_return (0, "vxlan_plugin.so is not loaded");
+ return error;
+ }
+ vlib_node_add_next (vm, vxlan4_input->index, nm->nsh_proxy_node_index);
+ vlib_node_add_next (vm, vxlan6_input->index, nm->nsh_proxy_node_index);
/* Add NSH-Classifier support */
vlib_node_add_next (vm, ip4_classify_node.index,
@@ -270,12 +282,10 @@ nsh_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (nsh_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Network Service Header (NSH)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nsh/nsh_api.c b/src/plugins/nsh/nsh_api.c
index 1faaea36c50..c7a686c8b83 100644
--- a/src/plugins/nsh/nsh_api.c
+++ b/src/plugins/nsh/nsh_api.c
@@ -64,13 +64,11 @@ format_nsh_name (u8 * s, va_list * args)
return format (s, "nsh_tunnel%d", dev_instance);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (nsh_device_class, static) = {
.name = "NSH",
.format_device_name = format_nsh_name,
.admin_up_down_function = nsh_interface_admin_up_down,
};
-/* *INDENT-ON* */
static void send_nsh_entry_details
(nsh_entry_t * t, vl_api_registration_t * rp, u32 context)
@@ -169,7 +167,7 @@ vl_api_nsh_add_del_map_t_handler (vl_api_nsh_add_del_map_t * mp)
{
vl_api_nsh_add_del_map_reply_t *rmp;
int rv;
- nsh_add_del_map_args_t _a, *a = &_a;
+ nsh_add_del_map_args_t _a = { 0 }, *a = &_a;
u32 map_index = ~0;
a->is_add = mp->is_add;
@@ -346,7 +344,7 @@ nsh_add_del_map (nsh_add_del_map_args_t * a, u32 * map_indexp)
{
nsh_hw_if = nm->free_nsh_tunnel_hw_if_indices
[vec_len (nm->free_nsh_tunnel_hw_if_indices) - 1];
- _vec_len (nm->free_nsh_tunnel_hw_if_indices) -= 1;
+ vec_dec_len (nm->free_nsh_tunnel_hw_if_indices, 1);
hi = vnet_get_hw_interface (vnm, nsh_hw_if);
hi->dev_instance = map_index;
@@ -550,7 +548,7 @@ static void vl_api_nsh_add_del_entry_t_handler
{
vl_api_nsh_add_del_entry_reply_t *rmp;
int rv;
- nsh_add_del_entry_args_t _a, *a = &_a;
+ nsh_add_del_entry_args_t _a = { 0 }, *a = &_a;
u32 entry_index = ~0;
u8 tlvs_len = 0;
u8 *data = 0;
diff --git a/src/plugins/nsh/nsh_cli.c b/src/plugins/nsh/nsh_cli.c
index 7bcaf1c51ad..4288a9654c5 100644
--- a/src/plugins/nsh/nsh_cli.c
+++ b/src/plugins/nsh/nsh_cli.c
@@ -140,7 +140,6 @@ nsh_get_adj_by_sw_if_index (u32 sw_if_index)
{
adj_index_t ai = ~0;
- /* *INDENT-OFF* */
pool_foreach_index (ai, adj_pool)
{
if (sw_if_index == adj_get_sw_if_index(ai))
@@ -148,7 +147,6 @@ nsh_get_adj_by_sw_if_index (u32 sw_if_index)
return ai;
}
}
- /* *INDENT-ON* */
return ~0;
}
@@ -290,7 +288,6 @@ nsh_add_del_map_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_nsh_map_command, static) = {
.path = "create nsh map",
.short_help =
@@ -299,7 +296,6 @@ VLIB_CLI_COMMAND (create_nsh_map_command, static) = {
" encap-vxlan4-intf <nn> | encap-vxlan6-intf <nn>| encap-eth-intf <nn> | encap-none]\n",
.function = nsh_add_del_map_command_fn,
};
-/* *INDENT-ON* */
/**
* CLI command for showing the mapping between NSH entries
@@ -322,12 +318,10 @@ show_nsh_map_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_nsh_map_command, static) = {
.path = "show nsh map",
.function = show_nsh_map_command_fn,
};
-/* *INDENT-ON* */
/**
* CLI command for adding NSH entry
@@ -494,7 +488,6 @@ nsh_add_del_entry_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_nsh_entry_command, static) = {
.path = "create nsh entry",
.short_help =
@@ -502,7 +495,6 @@ VLIB_CLI_COMMAND (create_nsh_entry_command, static) = {
" [c1 <nn> c2 <nn> c3 <nn> c4 <nn>] [tlv-ioam-trace] [del]\n",
.function = nsh_add_del_entry_command_fn,
};
-/* *INDENT-ON* */
/* format from network order */
u8 *
@@ -621,12 +613,10 @@ show_nsh_entry_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_nsh_entry_command, static) = {
.path = "show nsh entry",
.function = show_nsh_entry_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nsh/nsh_node.c b/src/plugins/nsh/nsh_node.c
index a467d2c34c2..5c084985948 100644
--- a/src/plugins/nsh/nsh_node.c
+++ b/src/plugins/nsh/nsh_node.c
@@ -906,7 +906,6 @@ static char *nsh_node_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
/* register nsh-input node */
VLIB_REGISTER_NODE (nsh_input_node) = {
@@ -978,7 +977,6 @@ VLIB_REGISTER_NODE (nsh_aware_vnf_proxy_node) =
#undef _
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nsh/nsh_output.c b/src/plugins/nsh/nsh_output.c
index 3a3da02f764..9b7014a0ed0 100644
--- a/src/plugins/nsh/nsh_output.c
+++ b/src/plugins/nsh/nsh_output.c
@@ -366,7 +366,6 @@ VNET_FEATURE_INIT (nsh_interface_output, static) = {
};
/* Built-in ip4 tx feature path definition */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (nsh_eth_output, static) =
{
.arc_name = "nsh-eth-output",
@@ -379,7 +378,6 @@ VNET_FEATURE_INIT (nsh_eth_tx_drop, static) =
.node_name = "error-drop",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
/**
* @brief Next index values from the NSH incomplete adj node
*/
diff --git a/src/plugins/nsh/nsh_pop.c b/src/plugins/nsh/nsh_pop.c
index 90b8a73b5fb..8de319e158b 100644
--- a/src/plugins/nsh/nsh_pop.c
+++ b/src/plugins/nsh/nsh_pop.c
@@ -19,7 +19,6 @@
#include <vnet/plugin/plugin.h>
#include <nsh/nsh.h>
#include <vnet/gre/packet.h>
-#include <vnet/vxlan/vxlan.h>
#include <vnet/vxlan-gpe/vxlan_gpe.h>
#include <vnet/l2/l2_classify.h>
diff --git a/src/plugins/nsim/node.c b/src/plugins/nsim/node.c
index 159db4a1cc7..a8ba909ab07 100644
--- a/src/plugins/nsim/node.c
+++ b/src/plugins/nsim/node.c
@@ -299,7 +299,6 @@ VLIB_NODE_FN (nsim_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
0 /* is_trace */ , 1 /* is_cross_connect */ );
}
-/* *INDENT-OFF* */
#ifndef CLIB_MARCH_VARIANT
VLIB_REGISTER_NODE (nsim_node) =
{
@@ -319,7 +318,6 @@ VLIB_REGISTER_NODE (nsim_node) =
},
};
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-ON* */
VLIB_NODE_FN (nsim_feature_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -333,7 +331,6 @@ VLIB_NODE_FN (nsim_feature_node) (vlib_main_t * vm,
0 /* is_trace */ , 0 /* is_cross_connect */ );
}
-/* *INDENT-OFF* */
#ifndef CLIB_MARCH_VARIANT
VLIB_REGISTER_NODE (nsim_feature_node) =
{
@@ -353,7 +350,6 @@ VLIB_REGISTER_NODE (nsim_feature_node) =
},
};
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nsim/nsim.c b/src/plugins/nsim/nsim.c
index fc8ffebd4a2..1c5b26bbaaa 100644
--- a/src/plugins/nsim/nsim.c
+++ b/src/plugins/nsim/nsim.c
@@ -127,20 +127,20 @@ nsim_output_feature_enable_disable (nsim_main_t * nsm, u32 sw_if_index,
}
static nsim_wheel_t *
-nsim_wheel_alloc (nsim_main_t * nsm, u32 wheel_slots)
+nsim_wheel_alloc (nsim_main_t *nsm)
{
u32 pagesize = getpagesize ();
nsim_wheel_t *wp;
- nsm->mmap_size = sizeof (nsim_wheel_t)
- + wheel_slots * sizeof (nsim_wheel_entry_t);
+ nsm->mmap_size = sizeof (nsim_wheel_t) +
+ nsm->wheel_slots_per_wrk * sizeof (nsim_wheel_entry_t);
nsm->mmap_size += pagesize - 1;
nsm->mmap_size &= ~(pagesize - 1);
wp = clib_mem_vm_alloc (nsm->mmap_size);
ASSERT (wp != 0);
- wp->wheel_size = wheel_slots;
+ wp->wheel_size = nsm->wheel_slots_per_wrk;
wp->cursize = 0;
wp->head = 0;
wp->tail = 0;
@@ -150,7 +150,7 @@ nsim_wheel_alloc (nsim_main_t * nsm, u32 wheel_slots)
}
static int
-nsim_configure (nsim_main_t * nsm, f64 bandwidth, f64 delay, f64 packet_size,
+nsim_configure (nsim_main_t *nsm, f64 bandwidth, f64 delay, u32 packet_size,
f64 drop_fraction, f64 reorder_fraction)
{
u64 total_buffer_size_in_bytes, per_worker_buffer_size, wheel_slots_per_wrk;
@@ -163,7 +163,7 @@ nsim_configure (nsim_main_t * nsm, f64 bandwidth, f64 delay, f64 packet_size,
if (delay == 0.0)
return VNET_API_ERROR_INVALID_VALUE_2;
- if (packet_size < 64.0 || packet_size > 9000.0)
+ if (packet_size < 64 || packet_size > 9000)
return VNET_API_ERROR_INVALID_VALUE_3;
if (reorder_fraction > 0.0 && delay == 0.0)
@@ -201,13 +201,14 @@ nsim_configure (nsim_main_t * nsm, f64 bandwidth, f64 delay, f64 packet_size,
/* Save these for the show command */
nsm->bandwidth = bandwidth;
nsm->packet_size = packet_size;
+ nsm->wheel_slots_per_wrk = wheel_slots_per_wrk;
vec_validate (nsm->wheel_by_thread, num_workers);
/* Initialize the output scheduler wheels */
i = (!nsm->poll_main_thread && num_workers) ? 1 : 0;
for (; i < num_workers + 1; i++)
- nsm->wheel_by_thread[i] = nsim_wheel_alloc (nsm, wheel_slots_per_wrk);
+ nsm->wheel_by_thread[i] = nsim_wheel_alloc (nsm);
vlib_worker_thread_barrier_sync (vm);
@@ -333,7 +334,6 @@ VLIB_CONFIG_FUNCTION (nsim_config, "nsim");
* @cliend
* @cliexcmd{nsim enable-disable <intfc> <intfc> [disable]}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (nsim_enable_disable_command, static) =
{
.path = "nsim cross-connect enable-disable",
@@ -342,7 +342,6 @@ VLIB_CLI_COMMAND (nsim_enable_disable_command, static) =
"<interface-name-2> [disable]",
.function = nsim_cross_connect_enable_disable_command_fn,
};
-/* *INDENT-ON* */
/* API message handler */
static void vl_api_nsim_cross_connect_enable_disable_t_handler
@@ -522,7 +521,6 @@ nsim_output_feature_enable_disable_command_fn (vlib_main_t * vm,
* @cliend
* @cliexcmd{nsim output-feature enable-disable <intfc> [disable]}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (nsim_output_feature_enable_disable_command, static) =
{
.path = "nsim output-feature enable-disable",
@@ -530,7 +528,6 @@ VLIB_CLI_COMMAND (nsim_output_feature_enable_disable_command, static) =
"nsim output-feature enable-disable <interface-name> [disable]",
.function = nsim_output_feature_enable_disable_command_fn,
};
-/* *INDENT-ON* */
#include <nsim/nsim.api.c>
static clib_error_t *
@@ -549,30 +546,24 @@ nsim_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (nsim_init);
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (nsim, static) =
{
.arc_name = "device-input",
.node_name = "nsim",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
-/* *INDENT-ON */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (nsim_feature, static) = {
.arc_name = "interface-output",
.node_name = "nsim-output-feature",
.runs_before = VNET_FEATURES ("interface-output-arc-end"),
};
-/* *INDENT-ON */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Network Delay Simulator",
};
-/* *INDENT-ON* */
static uword
unformat_delay (unformat_input_t * input, va_list * args)
@@ -602,19 +593,59 @@ unformat_bandwidth (unformat_input_t * input, va_list * args)
*result = tmp * 1e9;
else if (unformat (input, "%f gbyte", &tmp))
*result = tmp * 8e9;
+ else if (unformat (input, "%f gbps", &tmp))
+ *result = tmp * 1e9;
+ else if (unformat (input, "%f mbps", &tmp))
+ *result = tmp * 1e6;
+ else if (unformat (input, "%f kbps", &tmp))
+ *result = tmp * 1e3;
+ else if (unformat (input, "%f bps", &tmp))
+ *result = tmp;
else
return 0;
return 1;
}
static u8 *
+format_delay (u8 *s, va_list *args)
+{
+ f64 delay = va_arg (*args, f64);
+
+ if (delay < 1e-3)
+ s = format (s, "%.1f us", delay * 1e6);
+ else if (delay < 1)
+ s = format (s, "%.1f ms", delay * 1e3);
+ else
+ s = format (s, "%f sec", delay);
+
+ return s;
+}
+
+static u8 *
+format_bandwidth (u8 *s, va_list *args)
+{
+ f64 bandwidth = va_arg (*args, f64);
+
+ if (bandwidth >= 1e9)
+ s = format (s, "%.1f gbps", bandwidth / 1e9);
+ else if (bandwidth >= 1e6)
+ s = format (s, "%.1f mbps", bandwidth / 1e6);
+ else if (bandwidth >= 1e3)
+ s = format (s, "%.1f kbps", bandwidth / 1e3);
+ else
+ s = format (s, "%f bps", bandwidth);
+
+ return s;
+}
+
+static u8 *
format_nsim_config (u8 * s, va_list * args)
{
int verbose = va_arg (*args, int);
nsim_main_t *nsm = &nsim_main;
s = format (s, "configuration\n");
- s = format (s, " delay (ms): %.2f\n", nsm->delay * 1e3);
+ s = format (s, " delay: %U\n", format_delay, nsm->delay);
if (nsm->drop_fraction)
s = format (s, " drop fraction: %.5f\n", nsm->drop_fraction);
else
@@ -624,7 +655,8 @@ format_nsim_config (u8 * s, va_list * args)
else
s = format (s, " reorder fraction: 0\n");
s = format (s, " packet size: %u\n", nsm->packet_size);
- s = format (s, " throughput (Gbps): %.2f\n", nsm->bandwidth / 1e9);
+ s = format (s, " worker wheel size: %u\n", nsm->wheel_slots_per_wrk);
+ s = format (s, " throughput: %U\n", format_bandwidth, nsm->bandwidth);
if (verbose)
{
@@ -666,9 +698,8 @@ static clib_error_t *
set_nsim_command_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
- f64 drop_fraction = 0.0, reorder_fraction = 0.0;
- f64 delay, bandwidth, packet_size = 1500.0;
- u32 packets_per_drop, packets_per_reorder;
+ f64 drop_fraction = 0.0, reorder_fraction = 0.0, delay, bandwidth;
+ u32 packets_per_drop, packets_per_reorder, packet_size = 1500;
nsim_main_t *nsm = &nsim_main;
int rv;
@@ -679,7 +710,7 @@ set_nsim_command_fn (vlib_main_t * vm,
else if (unformat (input, "bandwidth %U", unformat_bandwidth,
&bandwidth))
;
- else if (unformat (input, "packet-size %f", &packet_size))
+ else if (unformat (input, "packet-size %u", &packet_size))
;
else if (unformat (input, "packets-per-drop %d", &packets_per_drop))
{
@@ -756,7 +787,6 @@ set_nsim_command_fn (vlib_main_t * vm,
* @cliend
* @cliexcmd{set nsim delay <nn> bandwidth <bb> packet-size <nn>}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_nsim_command, static) =
{
.path = "set nsim",
@@ -764,7 +794,6 @@ VLIB_CLI_COMMAND (set_nsim_command, static) =
" [packets-per-drop <nn>][drop-fraction <f64: 0.0 - 1.0>]",
.function = set_nsim_command_fn,
};
-/* *INDENT-ON*/
static clib_error_t *
@@ -801,14 +830,12 @@ show_nsim_command_fn (vlib_main_t * vm,
* @cliexcmd{show nsim}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_nsim_command, static) =
{
.path = "show nsim",
.short_help = "Display network delay simulator configuration",
.function = show_nsim_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/nsim/nsim.h b/src/plugins/nsim/nsim.h
index d7d32b9d234..b35a1c685de 100644
--- a/src/plugins/nsim/nsim.h
+++ b/src/plugins/nsim/nsim.h
@@ -101,9 +101,10 @@ typedef struct
/* Config parameters */
f64 delay;
f64 bandwidth;
- f64 packet_size;
f64 drop_fraction;
f64 reorder_fraction;
+ u32 packet_size;
+ u32 wheel_slots_per_wrk;
u32 poll_main_thread;
u64 mmap_size;
diff --git a/src/plugins/nsim/nsim_input.c b/src/plugins/nsim/nsim_input.c
index 65a7ae11251..889d9c9841f 100644
--- a/src/plugins/nsim/nsim_input.c
+++ b/src/plugins/nsim/nsim_input.c
@@ -126,7 +126,6 @@ VLIB_NODE_FN (nsim_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
}
-/* *INDENT-OFF* */
#ifndef CLIB_MARCH_VARIANT
VLIB_REGISTER_NODE (nsim_input_node) =
{
@@ -142,7 +141,6 @@ VLIB_REGISTER_NODE (nsim_input_node) =
.error_strings = nsim_tx_error_strings,
};
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/oddbuf/CMakeLists.txt b/src/plugins/oddbuf/CMakeLists.txt
index 9720e927baf..5ce2b3115dc 100644
--- a/src/plugins/oddbuf/CMakeLists.txt
+++ b/src/plugins/oddbuf/CMakeLists.txt
@@ -26,4 +26,7 @@ add_vpp_plugin(oddbuf
API_TEST_SOURCES
oddbuf_test.c
+
+ COMPONENT
+ vpp-plugin-devtools
)
diff --git a/src/plugins/oddbuf/node.c b/src/plugins/oddbuf/node.c
index a1de52a7e8c..6652baaaec5 100644
--- a/src/plugins/oddbuf/node.c
+++ b/src/plugins/oddbuf/node.c
@@ -183,7 +183,6 @@ VLIB_NODE_FN (oddbuf_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
0 /* is_trace */ );
}
-/* *INDENT-OFF* */
#ifndef CLIB_MARCH_VARIANT
VLIB_REGISTER_NODE (oddbuf_node) =
{
@@ -203,7 +202,6 @@ VLIB_REGISTER_NODE (oddbuf_node) =
},
};
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/oddbuf/oddbuf.c b/src/plugins/oddbuf/oddbuf.c
index 8277836aeb8..44cdd11eaae 100644
--- a/src/plugins/oddbuf/oddbuf.c
+++ b/src/plugins/oddbuf/oddbuf.c
@@ -106,7 +106,6 @@ oddbuf_enable_disable_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (oddbuf_enable_disable_command, static) =
{
.path = "oddbuf enable-disable",
@@ -114,7 +113,6 @@ VLIB_CLI_COMMAND (oddbuf_enable_disable_command, static) =
"oddbuf enable-disable <interface-name> [disable]",
.function = oddbuf_enable_disable_command_fn,
};
-/* *INDENT-ON* */
/* API message handler */
static void vl_api_oddbuf_enable_disable_t_handler
@@ -157,23 +155,19 @@ oddbuf_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (oddbuf_init);
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (oddbuf, static) =
{
.arc_name = "device-input",
.node_name = "oddbuf",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
-/* *INDENT-ON */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Awkward chained buffer geometry generator",
.default_disabled = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -205,7 +199,6 @@ oddbuf_config_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (oddbuf_config_command, static) =
{
.path = "oddbuf configure",
@@ -213,7 +206,6 @@ VLIB_CLI_COMMAND (oddbuf_config_command, static) =
"oddbuf configure n_to_copy <nn> offset <nn> first_offset <nn>",
.function = oddbuf_config_command_fn,
};
-/* *INDENT-ON* */
diff --git a/src/plugins/perfmon/CMakeLists.txt b/src/plugins/perfmon/CMakeLists.txt
index af0bd3c3f51..d820eb2f37e 100644
--- a/src/plugins/perfmon/CMakeLists.txt
+++ b/src/plugins/perfmon/CMakeLists.txt
@@ -11,24 +11,57 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
+if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*|aarch64.*")
return()
endif()
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
+ list(APPEND ARCH_PMU_SOURCES
+ intel/dispatch_wrapper.c
+ intel/core.c
+ intel/uncore.c
+ intel/bundle/backend_bound_core.c
+ intel/bundle/backend_bound_mem.c
+ intel/bundle/branch_mispred.c
+ intel/bundle/cache_hit_miss.c
+ intel/bundle/frontend_bound_bw_src.c
+ intel/bundle/frontend_bound_bw_uops.c
+ intel/bundle/frontend_bound_lat.c
+ intel/bundle/iio_bw.c
+ intel/bundle/inst_and_clock.c
+ intel/bundle/load_blocks.c
+ intel/bundle/mem_bw.c
+ intel/bundle/power_license.c
+ intel/bundle/topdown_icelake.c
+ intel/bundle/topdown_metrics.c
+ intel/bundle/topdown_tremont.c
+ )
+endif()
+
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64.*")
+ list(APPEND ARCH_PMU_SOURCES
+ arm/dispatch_wrapper.c
+ arm/events.c
+ arm/bundle/inst_clock.c
+ arm/bundle/cache_data.c
+ arm/bundle/cache_inst.c
+ arm/bundle/cache_data_tlb.c
+ arm/bundle/cache_inst_tlb.c
+ arm/bundle/mem_access.c
+ arm/bundle/branch_pred.c
+ arm/bundle/stall.c
+ )
+endif()
+
add_vpp_plugin(perfmon
SOURCES
cli.c
- dispatch_wrapper.c
linux.c
perfmon.c
- table.c
- intel/core.c
- intel/uncore.c
- intel/bundle/inst_and_clock.c
- intel/bundle/load_blocks.c
- intel/bundle/mem_bw.c
- intel/bundle/cache_hit_miss.c
- intel/bundle/branch_mispred.c
- intel/bundle/power_license.c
- intel/bundle/topdown_metrics.c
+ ${ARCH_PMU_SOURCES}
+
+ COMPONENT
+ vpp-plugin-devtools
+
+ SUPPORTED_OS_LIST Linux
)
diff --git a/src/plugins/perfmon/arm/bundle/branch_pred.c b/src/plugins/perfmon/arm/bundle/branch_pred.c
new file mode 100644
index 00000000000..7ab656f2758
--- /dev/null
+++ b/src/plugins/perfmon/arm/bundle/branch_pred.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+
+/* as per .events[n] in PERFMON_REGISTER_BUNDLE */
+enum
+{
+ BR_RETIRED,
+ BR_MIS_PRED_RETIRED,
+ BR_PRED,
+ BR_MIS_PRED
+};
+
+static u8 *
+format_arm_branch_pred (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+
+ switch (row)
+ {
+ case 0:
+ s = format (s, "%.2f", (f64) ns->value[BR_RETIRED] / ns->n_calls);
+ break;
+
+ case 1:
+ s = format (s, "%.2f", (f64) ns->value[BR_RETIRED] / ns->n_packets);
+ break;
+
+ case 2:
+ s =
+ format (s, "%.2f", (f64) ns->value[BR_MIS_PRED_RETIRED] / ns->n_calls);
+ break;
+
+ case 3:
+ s = format (s, "%.2f",
+ (f64) ns->value[BR_MIS_PRED_RETIRED] / ns->n_packets);
+ break;
+
+ case 4:
+ s =
+ format (s, "%.2f%%",
+ (ns->value[BR_RETIRED] ? (f64) ns->value[BR_MIS_PRED_RETIRED] /
+ ns->value[BR_RETIRED] * 100 :
+ 0));
+ break;
+
+ case 5:
+ s = format (s, "%.2f", (f64) ns->value[BR_PRED] / ns->n_calls);
+ break;
+
+ case 6:
+ s = format (s, "%.2f", (f64) ns->value[BR_PRED] / ns->n_packets);
+ break;
+
+ case 7:
+ s = format (s, "%.2f", (f64) ns->value[BR_MIS_PRED] / ns->n_calls);
+ break;
+
+ case 8:
+ s = format (s, "%.2f", (f64) ns->value[BR_MIS_PRED] / ns->n_packets);
+ break;
+
+ case 9:
+ s = format (s, "%.2f%%",
+ (ns->value[BR_PRED] ?
+ (f64) ns->value[BR_MIS_PRED] / ns->value[BR_PRED] * 100 :
+ 0));
+ break;
+
+ case 10:
+ s = format (s, "%llu", ns->n_packets);
+ break;
+ }
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (arm_branch_pred) = {
+ .name = "branch-pred",
+ .description = "Branch (mis)predictions per call/packet",
+ .source = "arm",
+ .type = PERFMON_BUNDLE_TYPE_NODE,
+ .events[0] = ARMV8_PMUV3_BR_RETIRED,
+ .events[1] = ARMV8_PMUV3_BR_MIS_PRED_RETIRED,
+ .events[2] = ARMV8_PMUV3_BR_PRED,
+ .events[3] = ARMV8_PMUV3_BR_MIS_PRED,
+ .n_events = 4,
+ .n_columns = 11,
+ .format_fn = format_arm_branch_pred,
+ .column_headers = PERFMON_STRINGS ("[1.1]", "[1.2]", "[1.3]", "[1.4]", "\%",
+ "[2.1]", "[2.2]", "[2.3]", "[2.4]", "\%",
+ "pkts"),
+ /*
+ * set a bit for every event used in each column
+ * this allows us to disable columns at bundle registration if an
+ * event is not supported
+ */
+ .column_events = PERFMON_COLUMN_EVENTS (
+ SET_BIT (BR_RETIRED), SET_BIT (BR_RETIRED), SET_BIT (BR_MIS_PRED_RETIRED),
+ SET_BIT (BR_MIS_PRED_RETIRED),
+ SET_BIT (BR_RETIRED) | SET_BIT (BR_MIS_PRED_RETIRED), SET_BIT (BR_PRED),
+ SET_BIT (BR_PRED), SET_BIT (BR_MIS_PRED), SET_BIT (BR_MIS_PRED),
+ SET_BIT (BR_PRED) | SET_BIT (BR_MIS_PRED), 0),
+ .footer =
+ "An instruction that has been executed and retired is defined to\n"
+ "be architecturally executed. When a PE can perform speculative\n"
+ "execution, an instruction is not architecturally executed if the\n"
+ "PE discards the results of the speculative execution.\n\n"
+ "Per node statistics:\n"
+ "[1] Branch instruction architecturally executed\n"
+ " [1.1] Branches/call\n"
+ " [1.2] Branches/pkt\n"
+ " [1.3] Mispredicted/call \n"
+ " [1.4] Mispredicted/pkt\n"
+ " [\%] Percentage of branches mispredicted\n"
+ "[2] Predictable branch speculatively executed\n"
+ " [2.1] Branches/call\n"
+ " [2.2] Branches/pkt\n"
+ " [2.3] Mispredicted/call \n"
+ " [2.4] Mispredicted/pkt\n"
+ " [\%] Percentage of branches mispredicted\n\n"
+ "- See Armv8-A Architecture Reference Manual, D7.10 PMU events and"
+ " event numbers for full description.\n"
+};
diff --git a/src/plugins/perfmon/arm/bundle/cache_data.c b/src/plugins/perfmon/arm/bundle/cache_data.c
new file mode 100644
index 00000000000..d7587700a8c
--- /dev/null
+++ b/src/plugins/perfmon/arm/bundle/cache_data.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+
+/* as per .events[n] in PERFMON_REGISTER_BUNDLE */
+enum
+{
+ L1D_CACHE,
+ L1D_CACHE_REFILL,
+ L2D_CACHE,
+ L2D_CACHE_REFILL,
+ L3D_CACHE,
+ L3D_CACHE_REFILL
+};
+
+static u8 *
+format_arm_cache_data (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+
+ switch (row)
+ {
+ case 0:
+ s = format (s, "%.2f", (f64) ns->value[L1D_CACHE] / ns->n_packets);
+ break;
+
+ case 1:
+ s =
+ format (s, "%.2f", (f64) ns->value[L1D_CACHE_REFILL] / ns->n_packets);
+ break;
+
+ case 2:
+ s = format (s, "%.2f%%",
+ (ns->value[L1D_CACHE] ? (f64) ns->value[L1D_CACHE_REFILL] /
+ ns->value[L1D_CACHE] * 100 :
+ 0));
+ break;
+
+ case 3:
+ s = format (s, "%.2f", (f64) ns->value[L2D_CACHE] / ns->n_packets);
+ break;
+
+ case 4:
+ s =
+ format (s, "%.2f", (f64) ns->value[L2D_CACHE_REFILL] / ns->n_packets);
+ break;
+
+ case 5:
+ s = format (s, "%.2f%%",
+ (ns->value[L2D_CACHE] ? (f64) ns->value[L2D_CACHE_REFILL] /
+ ns->value[L2D_CACHE] * 100 :
+ 0));
+ break;
+
+ case 6:
+ s = format (s, "%.2f", (f64) ns->value[L3D_CACHE] / ns->n_packets);
+ break;
+
+ case 7:
+ s =
+ format (s, "%.2f", (f64) ns->value[L3D_CACHE_REFILL] / ns->n_packets);
+ break;
+
+ case 8:
+ s = format (s, "%.2f%%",
+ (ns->value[L3D_CACHE] ? (f64) ns->value[L3D_CACHE_REFILL] /
+ ns->value[L3D_CACHE] * 100 :
+ 0));
+ break;
+
+ case 9:
+ s = format (s, "%llu", ns->n_packets);
+ break;
+ }
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (arm_cache_data) = {
+ .name = "cache-data",
+ .description = "L1D/L2D/L3D data cache accesses and refills per packet",
+ .source = "arm",
+ .type = PERFMON_BUNDLE_TYPE_NODE,
+ .events[0] = ARMV8_PMUV3_L1D_CACHE,
+ .events[1] = ARMV8_PMUV3_L1D_CACHE_REFILL,
+ .events[2] = ARMV8_PMUV3_L2D_CACHE,
+ .events[3] = ARMV8_PMUV3_L2D_CACHE_REFILL,
+ .events[4] = ARMV8_PMUV3_L3D_CACHE,
+ .events[5] = ARMV8_PMUV3_L3D_CACHE_REFILL,
+ .n_events = 6,
+ .n_columns = 10,
+ .format_fn = format_arm_cache_data,
+ .column_headers = PERFMON_STRINGS ("L1D: access", "refill", "\%*",
+ "L2D: access", "refill", "\%*",
+ "L3D: access", "refill", "\%*", "pkts"),
+ /*
+ * set a bit for every event used in each column
+ * this allows us to disable columns at bundle registration if an
+ * event is not supported
+ */
+ .column_events = PERFMON_COLUMN_EVENTS (
+ SET_BIT (L1D_CACHE), SET_BIT (L1D_CACHE_REFILL),
+ SET_BIT (L1D_CACHE) | SET_BIT (L1D_CACHE_REFILL), SET_BIT (L2D_CACHE),
+ SET_BIT (L2D_CACHE_REFILL),
+ SET_BIT (L2D_CACHE) | SET_BIT (L2D_CACHE_REFILL), SET_BIT (L3D_CACHE),
+ SET_BIT (L3D_CACHE_REFILL),
+ SET_BIT (L3D_CACHE) | SET_BIT (L3D_CACHE_REFILL), 0),
+ .footer = "all stats are per packet except refill rate (\%)\n"
+ "*\% percentage shown is total refills/accesses\n\n"
+ "- See Armv8-A Architecture Reference Manual, D7.10 PMU events and"
+ " event numbers for full description.\n"
+};
diff --git a/src/plugins/perfmon/arm/bundle/cache_data_tlb.c b/src/plugins/perfmon/arm/bundle/cache_data_tlb.c
new file mode 100644
index 00000000000..9adb2bc18b2
--- /dev/null
+++ b/src/plugins/perfmon/arm/bundle/cache_data_tlb.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+
+/* as per .events[n] in PERFMON_REGISTER_BUNDLE */
+enum
+{
+ L1D_TLB,
+ L1D_TLB_REFILL,
+ L2D_TLB,
+ L2D_TLB_REFILL
+};
+
+static u8 *
+format_arm_cache_data_tlb (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+
+ switch (row)
+ {
+ case 0:
+ s = format (s, "%.2f", (f64) ns->value[L1D_TLB] / ns->n_packets);
+ break;
+
+ case 1:
+ s = format (s, "%.2f", (f64) ns->value[L1D_TLB_REFILL] / ns->n_packets);
+ break;
+
+ case 2:
+ s = format (s, "%.2f%%",
+ (ns->value[L1D_TLB] ? (f64) ns->value[L1D_TLB_REFILL] /
+ ns->value[L1D_TLB] * 100 :
+ 0));
+ break;
+
+ case 3:
+ s = format (s, "%.2f", (f64) ns->value[L2D_TLB] / ns->n_packets);
+ break;
+
+ case 4:
+ s = format (s, "%.2f", (f64) ns->value[L2D_TLB_REFILL] / ns->n_packets);
+ break;
+
+ case 5:
+ s = format (s, "%.2f%%",
+ (ns->value[L2D_TLB] ? (f64) ns->value[L2D_TLB_REFILL] /
+ ns->value[L2D_TLB] * 100 :
+ 0));
+ break;
+
+ case 6:
+ s = format (s, "%llu", ns->n_packets);
+ break;
+ }
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (arm_cache_data_tlb) = {
+ .name = "cache-data-tlb",
+ .description = "L1/L2 data TLB cache accesses, refills, walks per packet",
+ .source = "arm",
+ .type = PERFMON_BUNDLE_TYPE_NODE,
+ .events[0] = ARMV8_PMUV3_L1D_TLB,
+ .events[1] = ARMV8_PMUV3_L1D_TLB_REFILL,
+ .events[2] = ARMV8_PMUV3_L2D_TLB,
+ .events[3] = ARMV8_PMUV3_L2D_TLB_REFILL,
+ .n_events = 4,
+ .n_columns = 7,
+ .format_fn = format_arm_cache_data_tlb,
+ .column_headers = PERFMON_STRINGS ("L1D-TLB: access", "refill", "\%*",
+ "L2D-TLB: access", "refill", "\%*",
+ "pkts"),
+ /*
+ * set a bit for every event used in each column
+ * this allows us to disable columns at bundle registration if an
+ * event is not supported
+ */
+ .column_events = PERFMON_COLUMN_EVENTS (
+ SET_BIT (L1D_TLB), SET_BIT (L1D_TLB_REFILL),
+ SET_BIT (L1D_TLB) | SET_BIT (L1D_TLB_REFILL), SET_BIT (L2D_TLB),
+ SET_BIT (L2D_TLB_REFILL), SET_BIT (L2D_TLB) | SET_BIT (L2D_TLB_REFILL), 0),
+ .footer =
+ "all stats are per packet except refill rates (\%)\n"
+ "*\% percentage shown is total refills/accesses\n\n"
+ "TLB: Memory-read operation or Memory-write operation that"
+ " causes a TLB access to at least the Level 1/2 data or unified TLB.\n"
+ "- See Armv8-A Architecture Reference Manual, D7.10 PMU events and"
+ " event numbers for full description.\n"
+};
diff --git a/src/plugins/perfmon/arm/bundle/cache_inst.c b/src/plugins/perfmon/arm/bundle/cache_inst.c
new file mode 100644
index 00000000000..b9d49c09e12
--- /dev/null
+++ b/src/plugins/perfmon/arm/bundle/cache_inst.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+
+/* as per .events[n] in PERFMON_REGISTER_BUNDLE */
+enum
+{
+ L1I_CACHE,
+ L1I_CACHE_REFILL,
+ L2I_CACHE,
+ L2I_CACHE_REFILL
+};
+
+static u8 *
+format_arm_cache_inst (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+
+ switch (row)
+ {
+ case 0:
+ s = format (s, "%.2f", (f64) ns->value[L1I_CACHE] / ns->n_packets);
+ break;
+
+ case 1:
+ s =
+ format (s, "%.2f", (f64) ns->value[L1I_CACHE_REFILL] / ns->n_packets);
+ break;
+
+ case 2:
+ s = format (s, "%.2f%%",
+ (ns->value[L1I_CACHE] ? (f64) ns->value[L1I_CACHE_REFILL] /
+ ns->value[L1I_CACHE] * 100 :
+ 0));
+ break;
+
+ case 3:
+ s = format (s, "%.2f", (f64) ns->value[L2I_CACHE] / ns->n_packets);
+ break;
+
+ case 4:
+ s =
+ format (s, "%.2f", (f64) ns->value[L2I_CACHE_REFILL] / ns->n_packets);
+ break;
+
+ case 5:
+ s = format (s, "%.2f%%",
+ (ns->value[L2I_CACHE] ? (f64) ns->value[L2I_CACHE_REFILL] /
+ ns->value[L2I_CACHE] * 100 :
+ 0));
+ break;
+
+ case 6:
+ s = format (s, "%llu", ns->n_packets);
+ break;
+ }
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (arm_cache_inst) = {
+ .name = "cache-inst",
+ .description = "L1I/L2I instruction cache accesses and refills per packet",
+ .source = "arm",
+ .type = PERFMON_BUNDLE_TYPE_NODE,
+ .events[0] = ARMV8_PMUV3_L1I_CACHE,
+ .events[1] = ARMV8_PMUV3_L1I_CACHE_REFILL,
+ .events[2] = ARMV8_PMUV3_L2I_CACHE,
+ .events[3] = ARMV8_PMUV3_L2I_CACHE_REFILL,
+ .n_events = 4,
+ .n_columns = 7,
+ .format_fn = format_arm_cache_inst,
+ .column_headers = PERFMON_STRINGS ("L1I: access", "refill", "\%*",
+ "L2I: access", "refill", "\%*", "pkts"),
+ /*
+ * set a bit for every event used in each column
+ * this allows us to disable columns at bundle registration if an
+ * event is not supported
+ */
+ .column_events = PERFMON_COLUMN_EVENTS (
+ SET_BIT (L1I_CACHE), SET_BIT (L1I_CACHE_REFILL),
+ SET_BIT (L1I_CACHE) | SET_BIT (L1I_CACHE_REFILL), SET_BIT (L2I_CACHE),
+ SET_BIT (L2I_CACHE_REFILL),
+ SET_BIT (L2I_CACHE) | SET_BIT (L2I_CACHE_REFILL), 0),
+ .footer = "all stats are per packet except refill rate (\%)\n"
+ "*\% percentage shown is total refills/accesses\n"
+};
diff --git a/src/plugins/perfmon/arm/bundle/cache_inst_tlb.c b/src/plugins/perfmon/arm/bundle/cache_inst_tlb.c
new file mode 100644
index 00000000000..7366be2fc16
--- /dev/null
+++ b/src/plugins/perfmon/arm/bundle/cache_inst_tlb.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+
+enum /* as per .events[n] in PERFMON_REGISTER_BUNDLE */
+{
+ L1I_TLB,
+ L1I_TLB_REFILL,
+ L2I_TLB,
+ L2I_TLB_REFILL,
+};
+
+static u8 *
+format_arm_cache_inst_tlb (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+
+ switch (row)
+ {
+ case 0:
+ s = format (s, "%.2f", (f64) ns->value[L1I_TLB] / ns->n_packets);
+ break;
+
+ case 1:
+ s = format (s, "%.2f", (f64) ns->value[L1I_TLB_REFILL] / ns->n_packets);
+ break;
+
+ case 2:
+ s = format (s, "%.2f%%",
+ (ns->value[L1I_TLB] ? (f64) ns->value[L1I_TLB_REFILL] /
+ ns->value[L1I_TLB] * 100 :
+ 0));
+ break;
+
+ case 3:
+ s = format (s, "%.2f", (f64) ns->value[L2I_TLB] / ns->n_packets);
+ break;
+
+ case 4:
+ s = format (s, "%.2f", (f64) ns->value[L2I_TLB_REFILL] / ns->n_packets);
+ break;
+
+ case 5:
+ s = format (s, "%.2f%%",
+ (ns->value[L2I_TLB] ? (f64) ns->value[L2I_TLB_REFILL] /
+ ns->value[L2I_TLB] * 100 :
+ 0));
+ break;
+
+ case 6:
+ s = format (s, "%llu", ns->n_packets);
+ break;
+ }
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (arm_cache_inst_tlb) = {
+ .name = "cache-inst-tlb",
+ .description =
+ "L1/L2 instruction TLB cache accesses, refills, walks per packet",
+ .source = "arm",
+ .type = PERFMON_BUNDLE_TYPE_NODE,
+ .events[0] = ARMV8_PMUV3_L1I_TLB,
+ .events[1] = ARMV8_PMUV3_L1I_TLB_REFILL,
+ .events[2] = ARMV8_PMUV3_L2I_TLB,
+ .events[3] = ARMV8_PMUV3_L2I_TLB_REFILL,
+ .n_events = 4,
+ .n_columns = 7,
+ .format_fn = format_arm_cache_inst_tlb,
+ .column_headers = PERFMON_STRINGS ("L1I-TLB: access", "refill", "\%*",
+ "L2I-TLB: access", "refill", "\%*",
+ "pkts"),
+ /*
+ * set a bit for every event used in each column
+ * this allows us to disable columns at bundle registration if an
+ * event is not supported
+ */
+ .column_events = PERFMON_COLUMN_EVENTS (
+ SET_BIT (L1I_TLB), SET_BIT (L1I_TLB_REFILL),
+ SET_BIT (L1I_TLB) | SET_BIT (L1I_TLB_REFILL), SET_BIT (L2I_TLB),
+ SET_BIT (L2I_TLB_REFILL), SET_BIT (L2I_TLB) | SET_BIT (L2I_TLB_REFILL), 0),
+ .footer = "all stats are per packet except refill rate (\%)\n"
+ "*\% percentage shown is total refills/accesses\n\n"
+ "TLB: Instruction memory access that causes a TLB access to at "
+ "least the Level 1/2 instruction TLB.\n"
+ "- See Armv8-A Architecture Reference Manual, D7.10 PMU events and"
+ " event numbers for full description.\n"
+};
diff --git a/src/plugins/perfmon/arm/bundle/inst_clock.c b/src/plugins/perfmon/arm/bundle/inst_clock.c
new file mode 100644
index 00000000000..272e524cffc
--- /dev/null
+++ b/src/plugins/perfmon/arm/bundle/inst_clock.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+
+/* as per .events[n] in PERFMON_REGISTER_BUNDLE */
+enum
+{
+ CPU_CYCLES,
+ INST_RETIRED
+};
+
+static u8 *
+format_arm_inst_clock (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+
+ switch (row)
+ {
+ case 0:
+ s = format (s, "%llu", ns->n_packets);
+ break;
+
+ case 1:
+ s = format (s, "%llu", ns->n_calls);
+ break;
+
+ case 2:
+ s = format (s, "%llu", ns->value[0]); /* Cycles */
+ break;
+
+ case 3:
+ s = format (s, "%llu", ns->value[1]); /* Inst */
+ break;
+
+ case 4:
+ s = format (s, "%.2f",
+ (f64) ns->n_packets / ns->n_calls); /* Packets/Call */
+ break;
+
+ case 5:
+ s = format (s, "%.2f",
+ (f64) ns->value[0] / ns->n_packets); /* Clocks/Packet */
+ break;
+
+ case 6:
+ s =
+ format (s, "%.2f",
+ (f64) ns->value[1] / ns->n_packets); /* Instructions/Packet */
+ break;
+
+ case 7:
+ s = format (s, "%.2f", (f64) ns->value[1] / ns->value[0]); /* IPC */
+ break;
+ }
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (arm_inst_clock) = {
+ .name = "inst-and-clock",
+ .description =
+ "CPU cycles, instructions, instructions/packet, cycles/packet and IPC",
+ .source = "arm",
+ .type = PERFMON_BUNDLE_TYPE_NODE,
+ .events[0] = ARMV8_PMUV3_CPU_CYCLES,
+ .events[1] = ARMV8_PMUV3_INST_RETIRED,
+ .n_events = 2,
+ .n_columns = 8,
+ .format_fn = format_arm_inst_clock,
+ .column_headers = PERFMON_STRINGS ("Packets", "Calls", "CPU Cycles", "Inst*",
+ "Pkts/Call", "Cycles/Pkt", "Inst/Pkt",
+ "IPC"),
+ /*
+ * set a bit for every event used in each column
+ * this allows us to disable columns at bundle registration if an
+ * event is not supported
+ */
+ .column_events =
+ PERFMON_COLUMN_EVENTS (0, 0, SET_BIT (CPU_CYCLES), SET_BIT (INST_RETIRED),
+ 0, SET_BIT (CPU_CYCLES), SET_BIT (INST_RETIRED),
+ SET_BIT (CPU_CYCLES) | SET_BIT (INST_RETIRED)),
+ .footer = "* Instructions retired: the counter increments for every "
+ "architecturally executed instruction\n"
+ "- See Armv8-A Architecture Reference Manual, D7.10 PMU events and"
+ " event numbers for full description.\n"
+};
diff --git a/src/plugins/perfmon/arm/bundle/mem_access.c b/src/plugins/perfmon/arm/bundle/mem_access.c
new file mode 100644
index 00000000000..cfe8f4dc425
--- /dev/null
+++ b/src/plugins/perfmon/arm/bundle/mem_access.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+
+/* as per .events[n] in PERFMON_REGISTER_BUNDLE */
+enum
+{
+ MEM_ACCESS,
+ BUS_ACCESS,
+ MEMORY_ERROR
+};
+
+static u8 *
+format_arm_memory_access (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+
+ switch (row)
+ {
+ case 0:
+ s = format (s, "%.2f", (f64) ns->value[MEM_ACCESS] / ns->n_packets);
+ break;
+
+ case 1:
+ s = format (s, "%.3f", (f64) ns->value[BUS_ACCESS] / ns->n_packets);
+ break;
+
+ case 2:
+ s = format (s, "%llu", ns->value[MEMORY_ERROR]);
+ break;
+
+ case 3:
+ s = format (s, "%llu", ns->n_packets);
+ break;
+ }
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (arm_memory_access) = {
+ .name = "memory-access",
+ .description = "Memory/bus accesses per pkt + total memory errors",
+ .source = "arm",
+ .type = PERFMON_BUNDLE_TYPE_NODE,
+ .events[0] = ARMV8_PMUV3_MEM_ACCESS,
+ .events[1] = ARMV8_PMUV3_BUS_ACCESS,
+ .events[2] = ARMV8_PMUV3_MEMORY_ERROR,
+ .n_events = 3,
+ .n_columns = 4,
+ .format_fn = format_arm_memory_access,
+ .column_headers = PERFMON_STRINGS ("Mem-access/pkt", "Bus-access/pkt",
+ "Total-mem-errors", "pkts"),
+ /*
+ * set a bit for every event used in each column
+ * this allows us to disable columns at bundle registration if an
+ * event is not supported
+ */
+ .column_events = PERFMON_COLUMN_EVENTS (SET_BIT (MEM_ACCESS),
+ SET_BIT (BUS_ACCESS),
+ SET_BIT (MEMORY_ERROR), 0),
+ .footer =
+ "Mem-access: The counter counts Memory-read operations and Memory-write"
+ " operations that the PE made\n"
+ "Bus-access: The counter counts Memory-read operations and Memory-write"
+ " operations that access outside of the boundary of the PE and its "
+ "closely-coupled caches\n"
+ "Mem-error: Memory error refers to a physical error in memory closely "
+ "coupled to this PE, and detected by the hardware, such as a parity or"
+ " ECC error\n"
+ "- See Armv8-A Architecture Reference Manual, D7.10 PMU events and"
+ " event numbers for full description.\n"
+};
diff --git a/src/plugins/perfmon/arm/bundle/stall.c b/src/plugins/perfmon/arm/bundle/stall.c
new file mode 100644
index 00000000000..deef9045516
--- /dev/null
+++ b/src/plugins/perfmon/arm/bundle/stall.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+
+/* as per .events[n] in PERFMON_REGISTER_BUNDLE */
+enum
+{
+ STALL_BACKEND,
+ STALL_FRONTEND
+};
+
+static u8 *
+format_arm_stall (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ns = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+
+ switch (row)
+ {
+ case 0:
+ s = format (s, "%llu", ns->value[STALL_BACKEND] / ns->n_packets);
+ break;
+
+ case 1:
+ s = format (s, "%llu", ns->value[STALL_FRONTEND] / ns->n_packets);
+ break;
+
+ case 2:
+ s = format (s, "%llu", ns->value[STALL_BACKEND] / ns->n_calls);
+ break;
+
+ case 3:
+ s = format (s, "%llu", ns->value[STALL_FRONTEND] / ns->n_calls);
+ break;
+
+ case 4:
+ s = format (s, "%llu", ns->n_packets);
+ break;
+
+ case 5:
+ s = format (s, "%llu", ns->n_calls);
+ break;
+ }
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (arm_stall) = {
+ .name = "stall",
+ .description = "PE cycle stalls per pkt/call",
+ .source = "arm",
+ .type = PERFMON_BUNDLE_TYPE_NODE,
+ .events[0] = ARMV8_PMUV3_STALL_BACKEND,
+ .events[1] = ARMV8_PMUV3_STALL_FRONTEND,
+ .n_events = 2,
+ .n_columns = 6,
+ .format_fn = format_arm_stall,
+ .column_headers = PERFMON_STRINGS ("Backend/pkt", "Frontend/pkt",
+ "Backend/call", "Frontend/call",
+ "packets", "calls"),
+ /*
+ * set a bit for every event used in each column
+ * this allows us to disable columns at bundle registration if an
+ * event is not supported
+ */
+ .column_events = PERFMON_COLUMN_EVENTS (SET_BIT (STALL_BACKEND),
+ SET_BIT (STALL_FRONTEND),
+ SET_BIT (STALL_BACKEND),
+ SET_BIT (STALL_FRONTEND), 0, 0),
+ .footer =
+ "The stall counter counts every Attributable cycle on which no\n"
+ "Attributable instruction or operation was sent for execution\n"
+ "on this PE.\n\n"
+ " Stall backend: No operation issued due to the backend\n"
+ " Stall frontend: No operation issued due to the frontend\n"
+ "The division between frontend and backend is IMPLEMENTATION DEFINED\n\n"
+ "- See Armv8-A Architecture Reference Manual, D7.10 PMU events and"
+ " event numbers for full description.\n"
+};
diff --git a/src/plugins/perfmon/arm/dispatch_wrapper.c b/src/plugins/perfmon/arm/dispatch_wrapper.c
new file mode 100644
index 00000000000..df79bcd8631
--- /dev/null
+++ b/src/plugins/perfmon/arm/dispatch_wrapper.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <linux/limits.h>
+#include <sys/ioctl.h>
+
+#include <perfmon/perfmon.h>
+
+#define barrier() asm volatile("dmb ish" : : : "memory");
+
+typedef int64_t s64;
+
+static_always_inline u64
+get_pmc_register (u32 pmc_idx)
+{
+ u64 value = 0;
+ if (pmc_idx == 31)
+ /* i.e. CPU Cycle event code 0x11 - need to read via pmccntr_el0 */
+ asm volatile("mrs %x0, pmccntr_el0" : "=r"(value));
+ else
+ {
+ /* set event register 0x0-0x1F */
+ asm volatile("msr pmselr_el0, %x0" : : "r"((pmc_idx)));
+ /* get register value */
+ asm volatile("mrs %x0, pmxevcntr_el0" : "=r"(value));
+ }
+ asm volatile("isb" : : : "memory");
+ return value;
+}
+
+static_always_inline u64
+read_pmc_from_mmap (struct perf_event_mmap_page *pc)
+{
+ u32 seq, idx, width;
+ u64 offset = 0;
+ s64 pmc = 0;
+
+ do
+ {
+ seq = pc->lock;
+ barrier ();
+ idx = pc->index;
+ offset = pc->offset;
+ if (pc->cap_user_rdpmc && idx)
+ {
+ width = pc->pmc_width;
+ pmc = get_pmc_register (idx - 1);
+ /* for 32 bit regs, left shift 32b to zero/discard the top bits */
+ pmc <<= 64 - width;
+ pmc >>= 64 - width;
+ }
+ barrier ();
+ }
+ while (pc->lock != seq);
+
+ return pmc + offset;
+}
+
+static_always_inline void
+perfmon_read_pmcs (u64 *counters, perfmon_thread_runtime_t *rt, u8 n_counters)
+{
+ switch (n_counters)
+ {
+ default:
+ case 7:
+ counters[6] = read_pmc_from_mmap (rt->mmap_pages[6]);
+ case 6:
+ counters[5] = read_pmc_from_mmap (rt->mmap_pages[5]);
+ case 5:
+ counters[4] = read_pmc_from_mmap (rt->mmap_pages[4]);
+ case 4:
+ counters[3] = read_pmc_from_mmap (rt->mmap_pages[3]);
+ case 3:
+ counters[2] = read_pmc_from_mmap (rt->mmap_pages[2]);
+ case 2:
+ counters[1] = read_pmc_from_mmap (rt->mmap_pages[1]);
+ case 1:
+ counters[0] = read_pmc_from_mmap (rt->mmap_pages[0]);
+ break;
+ }
+}
+
+uword
+perfmon_dispatch_wrapper (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ perfmon_main_t *pm = &perfmon_main;
+ perfmon_thread_runtime_t *rt =
+ vec_elt_at_index (pm->thread_runtimes, vm->thread_index);
+ perfmon_node_stats_t *s =
+ vec_elt_at_index (rt->node_stats, node->node_index);
+ u8 n_events = rt->n_events;
+ u64 before[n_events];
+ u64 after[n_events];
+
+ uword rv;
+
+ clib_prefetch_load (s);
+
+ perfmon_read_pmcs (before, rt, n_events);
+ rv = node->function (vm, node, frame);
+ perfmon_read_pmcs (after, rt, n_events);
+
+ if (rv == 0)
+ return rv;
+
+ s->n_calls += 1;
+ s->n_packets += rv;
+
+ for (int i = 0; i < n_events; i++)
+ {
+ s->value[i] += after[i] - before[i];
+ }
+
+ return rv;
+}
+
+clib_error_t *
+arm_config_dispatch_wrapper (perfmon_bundle_t *b,
+ vlib_node_function_t **dispatch_wrapper)
+{
+ (*dispatch_wrapper) = perfmon_dispatch_wrapper;
+ return 0;
+}
diff --git a/src/vnet/vxlan-gbp/vxlan_gbp_error.def b/src/plugins/perfmon/arm/dispatch_wrapper.h
index 43ad4dac064..903971f8b5e 100644
--- a/src/vnet/vxlan-gbp/vxlan_gbp_error.def
+++ b/src/plugins/perfmon/arm/dispatch_wrapper.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
+ * Copyright (c) 2022 Arm and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -12,6 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-vxlan_gbp_error (DECAPSULATED, "good packets decapsulated")
-vxlan_gbp_error (NO_SUCH_TUNNEL, "no such tunnel packets")
-vxlan_gbp_error (BAD_FLAGS, "packets with bad flags field in vxlan gbp header")
+
+clib_error_t *
+arm_config_dispatch_wrapper (perfmon_bundle_t *b,
+ vlib_node_function_t **dispatch_wrapper);
diff --git a/src/plugins/perfmon/arm/events.c b/src/plugins/perfmon/arm/events.c
new file mode 100644
index 00000000000..f2406b3ea05
--- /dev/null
+++ b/src/plugins/perfmon/arm/events.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/linux/sysfs.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/arm/events.h>
+#include <perfmon/arm/dispatch_wrapper.h>
+#include <linux/perf_event.h>
+#include <dirent.h>
+
+VLIB_REGISTER_LOG_CLASS (if_default_log, static) = {
+ .class_name = "perfmon",
+};
+
+#define log_debug(fmt, ...) \
+ vlib_log_debug (if_default_log.class, fmt, __VA_ARGS__)
+#define log_warn(fmt, ...) \
+ vlib_log_warn (if_default_log.class, fmt, __VA_ARGS__)
+#define log_err(fmt, ...) vlib_log_err (if_default_log.class, fmt, __VA_ARGS__)
+
+/*
+ * config1 = 2 : user access enabled and always 32-bit
+ * config1 = 3 : user access enabled and always 64-bit
+ *
+ * Since there is no discovery into whether 64b counters are supported
+ * or not, first attempt to request 64b counters, then fall back to
+ * 32b if perf_event_open returns EOPNOTSUPP
+ */
+static perfmon_event_t events[] = {
+#define _(event, n, desc) \
+ [ARMV8_PMUV3_##n] = { \
+ .type = PERF_TYPE_RAW, \
+ .config = event, \
+ .config1 = 3, \
+ .name = #n, \
+ .description = desc, \
+ .exclude_kernel = 1, \
+ },
+ foreach_perf_arm_event
+#undef _
+};
+
+u8 *
+format_arm_config (u8 *s, va_list *args)
+{
+ u64 config = va_arg (*args, u64);
+
+ s = format (s, "event=0x%02x", config & 0xff);
+
+ return s;
+}
+
+static clib_error_t *
+arm_init (vlib_main_t *vm, perfmon_source_t *src)
+{
+ clib_error_t *err;
+
+ /*
+ check /proc/sys/kernel/perf_user_access flag to check if userspace
+ access to perf counters is enabled (disabled by default)
+ - if this file doesn't exist, we are on an unsupported kernel ver
+ - if the file exists and is 0, user access needs to be granted
+ with 'sudo sysctl kernel/perf_user_access=1'
+ */
+ u32 perf_user_access_enabled;
+ char *path = "/proc/sys/kernel/perf_user_access";
+ err = clib_sysfs_read (path, "%u", &perf_user_access_enabled);
+ if (err)
+ {
+ if (err->code == ENOENT) /* No such file or directory */
+ {
+ return clib_error_create (
+ "linux kernel version is unsupported, please upgrade to v5.17+ "
+ "- user access to perf counters is not possible");
+ }
+ return clib_error_return_unix (0, "failed to read: %s", path);
+ }
+
+ if (perf_user_access_enabled == 1)
+ log_debug ("user access to perf counters is enabled in %s", path);
+ else
+ {
+ return clib_error_create (
+ "user access to perf counters is not enabled: run"
+ " \'sudo sysctl kernel/perf_user_access=1\'");
+ }
+
+ /*
+ perfmon/arm/events.h has up to 0xFF/256 possible PMUv3 event codes
+ supported - create a bitmap to store whether each event is
+ implemented or not
+ */
+ uword *bitmap = NULL;
+ clib_bitmap_alloc (bitmap, 256);
+
+ struct dirent *dir_entry;
+ const char *event_path =
+ "/sys/bus/event_source/devices/armv8_pmuv3_0/events";
+ DIR *event_dir = opendir (event_path);
+
+ if (event_dir == NULL)
+ {
+ err =
+ clib_error_return_unix (0, "error listing directory: %s", event_path);
+ log_err ("%U", format_clib_error, err);
+ return err;
+ }
+
+ while ((dir_entry = readdir (event_dir)) != NULL)
+ {
+ if (dir_entry->d_name[0] != '.')
+ {
+ u8 *s = NULL;
+ u8 *tmpstr = NULL;
+ unformat_input_t input;
+ u32 config;
+
+ s = format (s, "%s/%s%c", event_path, dir_entry->d_name, 0);
+ err = clib_sysfs_read ((char *) s, "%s", &tmpstr);
+ if (err)
+ {
+ log_err ("%U", format_clib_error, err);
+ continue;
+ }
+ unformat_init_vector (&input, tmpstr);
+ if (unformat (&input, "event=0x%x", &config))
+ {
+ /* it's possible to have have event codes up to 0xFFFF */
+ if (config < 0xFF) /* perfmon supports < 0xFF */
+ {
+ clib_bitmap_set (bitmap, config, 1);
+ }
+ log_debug ("found supported event in sysfs: %s \'%s\' 0x%x",
+ dir_entry->d_name, tmpstr, config);
+ }
+ else
+ {
+ err = clib_error_create ("error parsing event: %s %s",
+ dir_entry->d_name, tmpstr);
+ log_err ("%U", format_clib_error, err);
+ continue;
+ }
+ }
+ }
+ closedir (event_dir);
+
+ for (int i = 0; i < ARRAY_LEN (events); i++)
+ {
+ if (clib_bitmap_get (bitmap, events[i].config))
+ events[i].implemented = 1;
+ }
+ clib_bitmap_free (bitmap);
+
+ return 0;
+}
+
+u8
+arm_bundle_supported (perfmon_bundle_t *b)
+{
+ clib_bitmap_alloc (b->event_disabled, b->n_events);
+ for (u32 i = 0; i < b->n_events; i++)
+ {
+ perfmon_event_t *e = b->src->events + b->events[i];
+ if (!e->implemented)
+ {
+ log_debug (
+ "bundle \'%s\': perf event %s is not implemented on this CPU",
+ b->name, e->name);
+ clib_bitmap_set (b->event_disabled, i, 1);
+ }
+ }
+
+ /* if no events are implemented, fail and do not register bundle */
+ if (clib_bitmap_count_set_bits (b->event_disabled) == b->n_events)
+ {
+ return 0;
+ }
+
+ /* disable columns that use unimplemented events */
+ clib_bitmap_alloc (b->column_disabled, b->n_columns);
+ if (b->column_events)
+ {
+ u32 disabled_event;
+ /* iterate through set bits */
+ clib_bitmap_foreach (disabled_event, b->event_disabled)
+ {
+ for (u32 j = 0; j < b->n_columns; j++)
+ {
+ if (clib_bitmap_get (b->column_disabled, j))
+ continue;
+ if (GET_BIT (b->column_events[j], disabled_event))
+ {
+ clib_bitmap_set (b->column_disabled, j, 1);
+ log_debug (
+ "bundle \'%s\': disabling column %d as event unsupported",
+ b->name, j);
+ }
+ }
+ }
+ }
+
+ return 1;
+}
+
+PERFMON_REGISTER_SOURCE (arm) = {
+ .name = "arm",
+ .description = "Arm PMU events",
+ .events = events,
+ .n_events = ARRAY_LEN (events),
+ .init_fn = arm_init,
+ .format_config = format_arm_config,
+ .bundle_support = arm_bundle_supported,
+ .config_dispatch_wrapper = arm_config_dispatch_wrapper,
+};
diff --git a/src/plugins/perfmon/arm/events.h b/src/plugins/perfmon/arm/events.h
new file mode 100644
index 00000000000..5b7c49801d0
--- /dev/null
+++ b/src/plugins/perfmon/arm/events.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __perfmon_arm_h
+#define __perfmon_arm_h
+
+/*
+ * Events from the Armv8 PMUv3 - See "Arm Architecture Reference Manual Armv8,
+ * for Armv8-A architecture profile" D7.10 PMU events and event numbers:
+ * https://developer.arm.com/documentation/ddi0487/latest/
+ * EventCode, name, description
+ */
+#define foreach_perf_arm_event \
+ _ (0x0D, BR_IMMED_RETIRED, "Immediate branch architecturally executed") \
+ _ (0x10, BR_MIS_PRED, \
+ "Mispredicted or not predicted branch Speculatively executed") \
+ _ (0x22, BR_MIS_PRED_RETIRED, \
+ "Instruction architecturally executed, mispredicted branch") \
+ _ (0x12, BR_PRED, "Predictable branch Speculatively executed") \
+ _ (0x21, BR_RETIRED, "Branch instruction architecturally executed") \
+ _ (0x0E, BR_RETURN_RETIRED, \
+ "Function return instruction architecturally executed and the " \
+ "condition code check pass") \
+ _ (0x19, BUS_ACCESS, "Attributable Bus access") \
+ _ (0x1D, BUS_CYCLES, "Bus cycle") \
+ _ (0x1E, CHAIN, \
+ "For an odd numbered counter, increment when an overflow occurs on" \
+ "the preceding even-numbered counter on the same PE") \
+ _ (0x0B, CID_WRITE_RETIRED, \
+ "Instruction architecturally executed, Condition code check pass, " \
+ "write to CONTEXTIDR") \
+ _ (0x11, CPU_CYCLES, "Cycle counter") \
+ _ (0x34, DTLB_WALK, \
+ "Access to data or unified TLB causes a translation table walk") \
+ _ (0x0A, EXC_RETURN, \
+ "Exception return instruction architecturally executed and the " \
+ "condition code check pass") \
+ _ (0x09, EXC_TAKEN, "Exception entry") \
+ _ (0x08, INST_RETIRED, "Instruction architecturally executed") \
+ _ (0x1B, INST_SPEC, "Operation Speculatively executed") \
+ _ (0x35, ITLB_WALK, \
+ "Access to instruction TLB that causes a translation table walk") \
+ _ (0x04, L1D_CACHE, "Level 1 data cache access") \
+ _ (0x1F, L1D_CACHE_ALLOCATE, \
+ "Level 1 data cache allocation without refill") \
+ _ (0x39, L1D_CACHE_LMISS_RD, "Level 1 data cache long-latency read miss") \
+ _ (0x03, L1D_CACHE_REFILL, "Level 1 data cache refill") \
+ _ (0x15, L1D_CACHE_WB, "Attributable Level 1 data cache write-back") \
+ _ (0x25, L1D_TLB, "Level 1 data or unified TLB access") \
+ _ (0x05, L1D_TLB_REFILL, "Level 1 data or unified TLB refill") \
+ _ (0x14, L1I_CACHE, "Level 1 instruction cache access") \
+ _ (0x01, L1I_CACHE_REFILL, "Level 1 instruction cache refill") \
+ _ (0x26, L1I_TLB, "Level 1 instruction TLB access") \
+ _ (0x02, L1I_TLB_REFILL, "Level 1 instruction TLB refill") \
+ _ (0x16, L2D_CACHE, "Level 2 data cache access") \
+ _ (0x20, L2D_CACHE_ALLOCATE, \
+ "Level 2 data cache allocation without refill") \
+ _ (0x17, L2D_CACHE_REFILL, "Level 2 data cache refill") \
+ _ (0x18, L2D_CACHE_WB, "Attributable Level 2 data cache write-back") \
+ _ (0x2F, L2D_TLB, "Level 2 data or unified TLB access") \
+ _ (0x2D, L2D_TLB_REFILL, "Level 2 data or unified TLB refill") \
+ _ (0x27, L2I_CACHE, "Level 2 instruction cache access") \
+ _ (0x28, L2I_CACHE_REFILL, "Attributable Level 2 instruction cache refill") \
+ _ (0x30, L2I_TLB, "Level 2 instruction TLB access") \
+ _ (0x2E, L2I_TLB_REFILL, "Level 2 instruction TLB refill") \
+ _ (0x2B, L3D_CACHE, "Level 3 data cache access") \
+ _ (0x29, L3D_CACHE_ALLOCATE, \
+ "Level 3 data cache allocation without refill") \
+ _ (0x2A, L3D_CACHE_REFILL, "Attributable Level 3 data cache refill") \
+ _ (0x2C, L3D_CACHE_WB, "Attributable Level 3 data cache write-back") \
+ _ (0x06, LD_RETIRED, \
+ "Memory-reading instruction architecturally executed and condition" \
+ " code check pass") \
+ _ (0x32, LL_CACHE, "Last Level cache access") \
+ _ (0x33, LL_CACHE_MISS, "Last Level cache miss") \
+ _ (0x37, LL_CACHE_MISS_RD, "Last level cache miss, read") \
+ _ (0x36, LL_CACHE_RD, "Last level data cache access, read") \
+ _ (0x1A, MEMORY_ERROR, "Local memory error") \
+ _ (0x13, MEM_ACCESS, "Data memory access") \
+ _ (0x3A, OP_RETIRED, "Micro-operation architecturally executed") \
+ _ (0x3B, OP_SPEC, "Micro-operation Speculatively executed") \
+ _ (0x0C, PC_WRITE_RETIRED, \
+ "Software change to the Program Counter (PC). Instruction is " \
+ "architecturally executed and condition code check pass") \
+ _ (0x31, REMOTE_ACCESS, \
+ "Access to another socket in a multi-socket system") \
+ _ (0x38, REMOTE_ACCESS_RD, \
+ "Access to another socket in a multi-socket system, read") \
+ _ (0x3C, STALL, "No operation sent for execution") \
+ _ (0x24, STALL_BACKEND, "No operation issued due to the backend") \
+ _ (0x23, STALL_FRONTEND, "No operation issued due to the frontend") \
+ _ (0x3F, STALL_SLOT, "No operation sent for execution on a Slot") \
+ _ (0x3D, STALL_SLOT_BACKEND, \
+ "No operation sent for execution on a Slot due to the backend") \
+ _ (0x3E, STALL_SLOT_FRONTEND, \
+ "No operation sent for execution on a Slot due to the frontend") \
+ _ (0x07, ST_RETIRED, \
+ "Memory-writing instruction architecturally executed and condition" \
+ " code check pass") \
+ _ (0x00, SW_INCR, \
+ "Instruction architecturally executed, Condition code check pass, " \
+ "software increment") \
+ _ (0x1C, TTBR_WRITE_RETIRED, \
+ "Instruction architecturally executed, Condition code check pass, " \
+ "write to TTBR") \
+ _ (0x0F, UNALIGNED_LDST_RETIRED, \
+ "Unaligned memory memory-reading or memory-writing instruction " \
+ "architecturally executed and condition code check pass")
+
+typedef enum
+{
+#define _(event, n, desc) ARMV8_PMUV3_##n,
+ foreach_perf_arm_event
+#undef _
+ ARM_N_EVENTS,
+} perf_arm_event_t;
+
+#endif
diff --git a/src/plugins/perfmon/cli.c b/src/plugins/perfmon/cli.c
index 3305480031d..2c59ddd478a 100644
--- a/src/plugins/perfmon/cli.c
+++ b/src/plugins/perfmon/cli.c
@@ -15,7 +15,7 @@
#include <vnet/vnet.h>
#include <perfmon/perfmon.h>
-#include <perfmon/table.h>
+#include <vppinfra/format_table.h>
uword
unformat_perfmon_bundle_name (unformat_input_t *input, va_list *args)
@@ -38,6 +38,40 @@ unformat_perfmon_bundle_name (unformat_input_t *input, va_list *args)
}
uword
+unformat_perfmon_active_type (unformat_input_t *input, va_list *args)
+{
+ perfmon_bundle_t *b = va_arg (*args, perfmon_bundle_t *);
+ perfmon_bundle_type_t *bundle_type = va_arg (*args, perfmon_bundle_type_t *);
+ char *str = 0;
+
+ char *_str_types[PERFMON_BUNDLE_TYPE_MAX];
+
+#define _(type, pstr) _str_types[type] = (char *) pstr;
+
+ foreach_perfmon_bundle_type
+#undef _
+
+ if (!b) return 0;
+
+ if (unformat (input, "%s", &str) == 0)
+ return 0;
+
+ for (int i = PERFMON_BUNDLE_TYPE_NODE; i < PERFMON_BUNDLE_TYPE_MAX; i++)
+ {
+ /* match the name and confirm it is available on this cpu */
+ if (strncmp (str, _str_types[i], strlen (_str_types[i])) == 0 &&
+ (b->type_flags & 1 << i))
+ {
+ *bundle_type = i;
+ break;
+ }
+ }
+
+ vec_free (str);
+ return 1;
+}
+
+uword
unformat_perfmon_source_name (unformat_input_t *input, va_list *args)
{
perfmon_main_t *pm = &perfmon_main;
@@ -57,23 +91,33 @@ unformat_perfmon_source_name (unformat_input_t *input, va_list *args)
return p ? 1 : 0;
}
+typedef enum
+{
+ FORMAT_PERFMON_BUNDLE_NONE = 0,
+ FORMAT_PERFMON_BUNDLE_VERBOSE = 1,
+ FORMAT_PERFMON_BUNDLE_SHOW_CONFIG = 2
+} format_perfmon_bundle_args_t;
+
u8 *
format_perfmon_bundle (u8 *s, va_list *args)
{
perfmon_bundle_t *b = va_arg (*args, perfmon_bundle_t *);
- int verbose = va_arg (*args, int);
+ format_perfmon_bundle_args_t cfg =
+ va_arg (*args, format_perfmon_bundle_args_t);
- const char *bundle_type[] = {
- [PERFMON_BUNDLE_TYPE_NODE] = "node",
- [PERFMON_BUNDLE_TYPE_THREAD] = "thread",
- [PERFMON_BUNDLE_TYPE_SYSTEM] = "system",
- };
+ int vl = 0;
- if (b == 0)
- return format (s, "%-20s%-10s%-20s%s", "Name", "Type", "Source",
- "Description");
+ u8 *_bundle_type = 0;
+ const char *bundle_type[PERFMON_BUNDLE_TYPE_MAX];
+#define _(type, pstr) bundle_type[type] = (const char *) pstr;
- if (verbose)
+ foreach_perfmon_bundle_type
+#undef _
+
+ if (b == 0) return format (s, "%-20s%-20s%-20s%s", "Name", "Type(s)",
+ "Source", "Description");
+
+ if (cfg != FORMAT_PERFMON_BUNDLE_NONE)
{
s = format (s, "name: %s\n", b->name);
s = format (s, "description: %s\n", b->description);
@@ -81,16 +125,47 @@ format_perfmon_bundle (u8 *s, va_list *args)
for (int i = 0; i < b->n_events; i++)
{
perfmon_event_t *e = b->src->events + b->events[i];
- s = format (s, "event %u: %s\n", i, e->name);
+ s = format (s, "event %u: %s", i, e->name);
+
+ format_function_t *format_config = b->src->format_config;
+
+ if (format_config && cfg == FORMAT_PERFMON_BUNDLE_SHOW_CONFIG)
+ s = format (s, " (%U)", format_config, e->config);
+
+ s = format (s, "\n");
}
}
else
- s = format (s, "%-20s%-10s%-20s%s", b->name, bundle_type[b->type],
- b->src->name, b->description);
+ {
+ s = format (s, "%-20s", b->name);
+ for (int i = PERFMON_BUNDLE_TYPE_NODE; i < PERFMON_BUNDLE_TYPE_MAX; i++)
+ {
+ /* check the type is available on this uarch*/
+ if (b->type_flags & 1 << i)
+ _bundle_type = format (_bundle_type, "%s,", bundle_type[i]);
+ }
+ /* remove any stray commas */
+ if ((vl = vec_len (_bundle_type)))
+ _bundle_type[vl - 1] = 0;
+
+ s =
+ format (s, "%-20s%-20s%s", _bundle_type, b->src->name, b->description);
+ }
+
+ vec_free (_bundle_type);
return s;
}
+static int
+bundle_name_sort_cmp (void *a1, void *a2)
+{
+ perfmon_bundle_t **n1 = a1;
+ perfmon_bundle_t **n2 = a2;
+
+ return clib_strcmp ((char *) (*n1)->name, (char *) (*n2)->name);
+}
+
static clib_error_t *
show_perfmon_bundle_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
@@ -99,6 +174,7 @@ show_perfmon_bundle_command_fn (vlib_main_t *vm, unformat_input_t *input,
unformat_input_t _line_input, *line_input = &_line_input;
perfmon_bundle_t *b = 0, **vb = 0;
int verbose = 0;
+ format_perfmon_bundle_args_t cfg = FORMAT_PERFMON_BUNDLE_NONE;
if (unformat_user (input, unformat_line_input, line_input))
{
@@ -116,20 +192,31 @@ show_perfmon_bundle_command_fn (vlib_main_t *vm, unformat_input_t *input,
unformat_free (line_input);
}
- if (vb == 0)
+ if (verbose) /* if verbose is specified */
+ cfg = FORMAT_PERFMON_BUNDLE_VERBOSE;
+
+ if (vb)
+ {
+ if (verbose) /* if verbose is specified with a bundle */
+ cfg = FORMAT_PERFMON_BUNDLE_SHOW_CONFIG;
+ else
+ cfg = FORMAT_PERFMON_BUNDLE_VERBOSE;
+ }
+ else
{
char *key;
hash_foreach_mem (key, b, pm->bundle_by_name, vec_add (vb, &b, 1););
}
- else
- verbose = 1;
- if (verbose == 0)
- vlib_cli_output (vm, "%U\n", format_perfmon_bundle, 0, 0);
+ if (cfg == FORMAT_PERFMON_BUNDLE_NONE)
+ vlib_cli_output (vm, "%U\n", format_perfmon_bundle, 0, cfg);
+
+ vec_sort_with_function (vb, bundle_name_sort_cmp);
for (int i = 0; i < vec_len (vb); i++)
- if (!vb[i]->cpu_supports || vb[i]->cpu_supports ())
- vlib_cli_output (vm, "%U\n", format_perfmon_bundle, vb[i], verbose);
+ /* bundle type will be unknown if no cpu_supports matched */
+ if (vb[i]->type_flags)
+ vlib_cli_output (vm, "%U\n", format_perfmon_bundle, vb[i], cfg);
vec_free (vb);
return 0;
@@ -283,7 +370,8 @@ show_perfmon_stats_command_fn (vlib_main_t *vm, unformat_input_t *input,
vec_validate (readings, n_instances - 1);
/*Only perform read() for THREAD or SYSTEM bundles*/
- for (int i = 0; i < n_instances && b->type != PERFMON_BUNDLE_TYPE_NODE; i++)
+ for (int i = 0;
+ i < n_instances && b->active_type != PERFMON_BUNDLE_TYPE_NODE; i++)
{
in = vec_elt_at_index (it->instances, i);
r = vec_elt_at_index (readings, i);
@@ -312,8 +400,8 @@ show_perfmon_stats_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
in = vec_elt_at_index (it->instances, i);
r = vec_elt_at_index (readings, i);
- table_format_cell (t, col, -1, "%s", in->name);
- if (b->type == PERFMON_BUNDLE_TYPE_NODE)
+ table_format_cell (t, col, -1, "%s", in->name, b->active_type);
+ if (b->active_type == PERFMON_BUNDLE_TYPE_NODE)
{
perfmon_thread_runtime_t *tr;
tr = vec_elt_at_index (pm->thread_runtimes, i);
@@ -322,19 +410,80 @@ show_perfmon_stats_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
perfmon_node_stats_t ns;
table_format_cell (t, ++col, -1, "%U", format_vlib_node_name,
- vm, j);
+ vm, j, b->active_type);
table_set_cell_align (t, col, -1, TTAA_RIGHT);
table_set_cell_fg_color (t, col, -1, TTAC_CYAN);
- clib_memcpy_fast (&ns, tr->node_stats + j, sizeof (ns));
+
+ if (PREDICT_TRUE (clib_bitmap_is_zero (b->event_disabled)))
+ clib_memcpy_fast (&ns, tr->node_stats + j, sizeof (ns));
+ /* if some events are not implemented, we need to realign these
+ to display under the correct column headers */
+ else
+ {
+ perfmon_node_stats_t *tr_ns = tr->node_stats + j;
+ ns.n_calls = tr_ns->n_calls;
+ ns.n_packets = tr_ns->n_packets;
+ /* loop through all events in bundle + manually copy into
+ the correct place, until we've read all values that are
+ implemented */
+ int num_enabled_events =
+ b->n_events -
+ clib_bitmap_count_set_bits (b->event_disabled);
+ for (int i = 0, k = 0; k < num_enabled_events; i++)
+ {
+ if (!clib_bitmap_get (b->event_disabled, i))
+ {
+ ns.value[i] = tr_ns->value[k];
+ k++;
+ }
+ }
+ }
for (int j = 0; j < n_row; j++)
- table_format_cell (t, col, j, "%U", b->format_fn, &ns, j);
+ {
+ if (clib_bitmap_get (b->column_disabled, j))
+ table_format_cell (t, col, j, "-");
+ else
+ table_format_cell (t, col, j, "%U", b->format_fn, &ns, j,
+ b->active_type);
+ }
}
}
- else
+ else /* b->type != PERFMON_BUNDLE_TYPE_NODE */
{
- for (int j = 0; j < n_row; j++)
- table_format_cell (t, i, j, "%U", b->format_fn, r, j);
+ if (PREDICT_TRUE (clib_bitmap_is_zero (b->event_disabled)))
+ {
+ for (int j = 0; j < n_row; j++)
+ table_format_cell (t, i, j, "%U", b->format_fn, r, j,
+ b->active_type);
+ }
+ /* similarly for THREAD/SYSTEM bundles, if some events are not
+ implemented, we need to realign readings under column headings */
+ else
+ {
+ perfmon_reading_t aligned_r[b->n_events];
+ aligned_r->nr = r->nr;
+ aligned_r->time_enabled = r->time_enabled;
+ aligned_r->time_running = r->time_running;
+ int num_enabled_events =
+ b->n_events - clib_bitmap_count_set_bits (b->event_disabled);
+ for (int i = 0, k = 0; k < num_enabled_events; i++)
+ {
+ if (!clib_bitmap_get (b->event_disabled, i))
+ {
+ aligned_r->value[i] = r->value[k];
+ k++;
+ }
+ }
+ for (int j = 0; j < n_row; j++)
+ {
+ if (clib_bitmap_get (b->column_disabled, j))
+ table_format_cell (t, col, j, "-");
+ else
+ table_format_cell (t, i, j, "%U", b->format_fn, aligned_r,
+ j, b->active_type);
+ }
+ }
}
col++;
}
@@ -380,6 +529,7 @@ perfmon_start_command_fn (vlib_main_t *vm, unformat_input_t *input,
perfmon_main_t *pm = &perfmon_main;
unformat_input_t _line_input, *line_input = &_line_input;
perfmon_bundle_t *b = 0;
+ perfmon_bundle_type_t bundle_type = PERFMON_BUNDLE_TYPE_UNKNOWN;
if (pm->is_running)
return clib_error_return (0, "please stop first");
@@ -391,6 +541,9 @@ perfmon_start_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
if (unformat (line_input, "bundle %U", unformat_perfmon_bundle_name, &b))
;
+ else if (unformat (line_input, "type %U", unformat_perfmon_active_type,
+ b, &bundle_type))
+ ;
else
return clib_error_return (0, "unknown input '%U'",
format_unformat_error, line_input);
@@ -400,12 +553,28 @@ perfmon_start_command_fn (vlib_main_t *vm, unformat_input_t *input,
if (b == 0)
return clib_error_return (0, "please specify bundle name");
+ /* if there is more than one valid mode */
+ if (count_set_bits (b->type_flags) > 1)
+ {
+ /* what did the user indicate */
+ if (!bundle_type)
+ return clib_error_return (0, "please specify a valid type");
+ }
+ /* otherwise just use the default */
+ else if (!bundle_type)
+ {
+ bundle_type =
+ (perfmon_bundle_type_t) count_trailing_zeros (b->type_flags);
+ }
+
+ b->active_type = bundle_type;
+
return perfmon_start (vm, b);
}
VLIB_CLI_COMMAND (perfmon_start_command, static) = {
.path = "perfmon start",
- .short_help = "perfmon start bundle [<bundle-name>]",
+ .short_help = "perfmon start bundle [<bundle-name>] type [<node|thread>]",
.function = perfmon_start_command_fn,
.is_mp_safe = 1,
};
diff --git a/src/plugins/perfmon/dispatch_wrapper.c b/src/plugins/perfmon/dispatch_wrapper.c
deleted file mode 100644
index fe0a449df99..00000000000
--- a/src/plugins/perfmon/dispatch_wrapper.c
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Copyright (c) 2020 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "vppinfra/string.h"
-#include <vnet/vnet.h>
-
-#include <vlibapi/api.h>
-#include <vlibmemory/api.h>
-#include <vnet/plugin/plugin.h>
-#include <vpp/app/version.h>
-#include <linux/limits.h>
-#include <sys/ioctl.h>
-
-#include <perfmon/perfmon.h>
-
-static_always_inline void
-perfmon_read_pmcs (u64 *counters, int *pmc_index, u8 n_counters)
-{
- switch (n_counters)
- {
- default:
- case 7:
- counters[6] = _rdpmc (pmc_index[6]);
- case 6:
- counters[5] = _rdpmc (pmc_index[5]);
- case 5:
- counters[4] = _rdpmc (pmc_index[4]);
- case 4:
- counters[3] = _rdpmc (pmc_index[3]);
- case 3:
- counters[2] = _rdpmc (pmc_index[2]);
- case 2:
- counters[1] = _rdpmc (pmc_index[1]);
- case 1:
- counters[0] = _rdpmc (pmc_index[0]);
- break;
- }
-}
-
-static_always_inline int
-perfmon_calc_mmap_offset (perfmon_thread_runtime_t *tr, u8 i)
-{
- return (int) (tr->mmap_pages[i]->index + tr->mmap_pages[i]->offset);
-}
-
-static_always_inline int
-perfmon_metric_index (perfmon_bundle_t *b, u8 i)
-{
- return (int) (b->metrics[i]);
-}
-
-uword
-perfmon_dispatch_wrapper_mmap (vlib_main_t *vm, vlib_node_runtime_t *node,
- vlib_frame_t *frame)
-{
- perfmon_main_t *pm = &perfmon_main;
- perfmon_thread_runtime_t *rt =
- vec_elt_at_index (pm->thread_runtimes, vm->thread_index);
- perfmon_node_stats_t *s =
- vec_elt_at_index (rt->node_stats, node->node_index);
-
- u8 n_events = rt->n_events;
-
- u64 before[PERF_MAX_EVENTS];
- u64 after[PERF_MAX_EVENTS];
- int pmc_index[PERF_MAX_EVENTS];
- uword rv;
-
- clib_prefetch_load (s);
-
- switch (n_events)
- {
- default:
- case 7:
- pmc_index[6] = perfmon_calc_mmap_offset (rt, 6);
- case 6:
- pmc_index[5] = perfmon_calc_mmap_offset (rt, 5);
- case 5:
- pmc_index[4] = perfmon_calc_mmap_offset (rt, 4);
- case 4:
- pmc_index[3] = perfmon_calc_mmap_offset (rt, 3);
- case 3:
- pmc_index[2] = perfmon_calc_mmap_offset (rt, 2);
- case 2:
- pmc_index[1] = perfmon_calc_mmap_offset (rt, 1);
- case 1:
- pmc_index[0] = perfmon_calc_mmap_offset (rt, 0);
- break;
- }
-
- perfmon_read_pmcs (&before[0], pmc_index, n_events);
- rv = node->function (vm, node, frame);
- perfmon_read_pmcs (&after[0], pmc_index, n_events);
-
- if (rv == 0)
- return rv;
-
- s->n_calls += 1;
- s->n_packets += rv;
-
- for (int i = 0; i < n_events; i++)
- s->value[i] += after[i] - before[i];
-
- return rv;
-}
-
-uword
-perfmon_dispatch_wrapper_metrics (vlib_main_t *vm, vlib_node_runtime_t *node,
- vlib_frame_t *frame)
-{
- perfmon_main_t *pm = &perfmon_main;
- perfmon_thread_runtime_t *rt =
- vec_elt_at_index (pm->thread_runtimes, vm->thread_index);
- perfmon_node_stats_t *s =
- vec_elt_at_index (rt->node_stats, node->node_index);
-
- u8 n_events = rt->n_events;
-
- u64 before[PERF_MAX_EVENTS];
- int pmc_index[PERF_MAX_EVENTS];
- uword rv;
-
- clib_prefetch_load (s);
-
- switch (n_events)
- {
- default:
- case 7:
- pmc_index[6] = perfmon_metric_index (rt->bundle, 6);
- case 6:
- pmc_index[5] = perfmon_metric_index (rt->bundle, 5);
- case 5:
- pmc_index[4] = perfmon_metric_index (rt->bundle, 4);
- case 4:
- pmc_index[3] = perfmon_metric_index (rt->bundle, 3);
- case 3:
- pmc_index[2] = perfmon_metric_index (rt->bundle, 2);
- case 2:
- pmc_index[1] = perfmon_metric_index (rt->bundle, 1);
- case 1:
- pmc_index[0] = perfmon_metric_index (rt->bundle, 0);
- break;
- }
-
- perfmon_read_pmcs (&before[0], pmc_index, n_events);
- rv = node->function (vm, node, frame);
-
- clib_memcpy_fast (&s->t[0].value[0], &before, sizeof (before));
- perfmon_read_pmcs (&s->t[1].value[0], pmc_index, n_events);
-
- if (rv == 0)
- return rv;
-
- s->n_calls += 1;
- s->n_packets += rv;
-
- return rv;
-}
diff --git a/src/plugins/perfmon/intel/bundle/backend_bound_core.c b/src/plugins/perfmon/intel/bundle/backend_bound_core.c
new file mode 100644
index 00000000000..16905235119
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/backend_bound_core.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+enum
+{
+ PORT0 = 0,
+ PORT1 = 1,
+ PORT5 = 2,
+ PORT6 = 3,
+ PORT2_3 = 4,
+ PORT4_9 = 5,
+ PORT7_8 = 6,
+ DISTRIBUTED = 7,
+};
+
+static u8 *
+format_intel_backend_bound_core (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+ f64 sv = 0;
+
+ if (!ss->n_packets)
+ return s;
+
+ if (0 == row)
+ {
+ sv = ss->value[DISTRIBUTED] / ss->n_packets;
+
+ s = format (s, "%.0f", sv);
+ return s;
+ }
+
+ switch (row)
+ {
+ case 1:
+ sv = ss->value[PORT0] / (f64) ss->value[DISTRIBUTED];
+ break;
+ case 2:
+ sv = ss->value[PORT1] / (f64) ss->value[DISTRIBUTED];
+ break;
+ case 3:
+ sv = ss->value[PORT5] / (f64) ss->value[DISTRIBUTED];
+ break;
+ case 4:
+ sv = ss->value[PORT6] / (f64) ss->value[DISTRIBUTED];
+ break;
+ case 5:
+ sv = (ss->value[PORT2_3]) / (f64) (2 * ss->value[DISTRIBUTED]);
+ break;
+ case 6:
+ sv = (ss->value[PORT4_9] + ss->value[PORT7_8]) /
+ (f64) (4 * ss->value[DISTRIBUTED]);
+ break;
+ }
+
+ sv = clib_max (sv * 100, 0);
+ s = format (s, "%04.1f", sv);
+
+ return s;
+}
+
+static perfmon_cpu_supports_t backend_bound_core_cpu_supports[] = {
+ { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
+};
+
+PERFMON_REGISTER_BUNDLE (intel_core_backend_bound_core) = {
+ .name = "td-backend-core",
+ .description = "Topdown BackEnd-bound Core - % cycles core resources busy",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_0, /* 0xFF */
+ .events[1] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_1, /* 0xFF */
+ .events[2] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_5, /* 0xFF */
+ .events[3] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_6, /* 0xFF */
+ .events[4] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_2_3, /* 0xFF */
+ .events[5] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_4_9, /* 0xFF */
+ .events[6] = INTEL_CORE_E_UOPS_DISPATCHED_PORT_7_8, /* 0xFF */
+ .events[7] = INTEL_CORE_E_CPU_CLK_UNHALTED_DISTRIBUTED, /* 0xFF */
+ .n_events = 8,
+ .format_fn = format_intel_backend_bound_core,
+ .cpu_supports = backend_bound_core_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (backend_bound_core_cpu_supports),
+ .column_headers = PERFMON_STRINGS ("Clocks/Packet", "%Port0", "%Port1",
+ "%Port5", "%Port6", "%Load", "%Store"),
+};
diff --git a/src/plugins/perfmon/intel/bundle/backend_bound_mem.c b/src/plugins/perfmon/intel/bundle/backend_bound_mem.c
new file mode 100644
index 00000000000..ccf1ed12153
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/backend_bound_mem.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2021 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+enum
+{
+ STALLS_L1D_MISS = 0,
+ STALLS_L2_MISS = 1,
+ STALLS_L3_MISS = 2,
+ STALLS_MEM_ANY = 3,
+ STALLS_TOTAL = 4,
+ BOUND_ON_STORES = 5,
+ FB_FULL = 6,
+ THREAD = 7,
+};
+
+static u8 *
+format_intel_backend_bound_mem (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+ f64 sv = 0;
+
+ if (!ss->n_packets)
+ return s;
+
+ if (0 == row)
+ {
+ sv = ss->value[THREAD] / ss->n_packets;
+
+ s = format (s, "%.0f", sv);
+ return s;
+ }
+
+ switch (row)
+ {
+ case 1:
+ sv = ss->value[BOUND_ON_STORES];
+ break;
+ case 2:
+ sv = ss->value[STALLS_MEM_ANY] - ss->value[STALLS_L1D_MISS];
+ break;
+ case 3:
+ sv = ss->value[FB_FULL];
+ break;
+ case 4:
+ sv = ss->value[STALLS_L1D_MISS] - ss->value[STALLS_L2_MISS];
+ break;
+ case 5:
+ sv = ss->value[STALLS_L2_MISS] - ss->value[STALLS_L3_MISS];
+ break;
+ case 6:
+ sv = ss->value[STALLS_L3_MISS];
+ break;
+ }
+
+ sv = clib_max ((sv / ss->value[THREAD]) * 100, 0);
+
+ s = format (s, "%04.1f", sv);
+
+ return s;
+}
+
+static perfmon_cpu_supports_t backend_bound_mem_cpu_supports[] = {
+ { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
+};
+
+PERFMON_REGISTER_BUNDLE (intel_core_backend_bound_mem) = {
+ .name = "td-backend-mem",
+ .description = "Topdown BackEnd-bound Memory - % cycles not retiring "
+ "instructions due to memory stalls",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L1D_MISS, /* 0x0F */
+ .events[1] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L2_MISS, /* 0x0F */
+ .events[2] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_L3_MISS, /* 0x0F */
+ .events[3] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_MEM_ANY, /* 0xFF */
+ .events[4] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_TOTAL, /* 0xFF */
+ .events[5] = INTEL_CORE_E_EXE_ACTIVITY_BOUND_ON_STORES, /* 0xFF */
+ .events[6] = INTEL_CORE_E_L1D_PEND_MISS_FB_FULL, /* 0x0F */
+ .events[7] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P, /* 0xFF */
+ .n_events = 8,
+ .format_fn = format_intel_backend_bound_mem,
+ .cpu_supports = backend_bound_mem_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (backend_bound_mem_cpu_supports),
+ .column_headers = PERFMON_STRINGS ("Clocks/Packet", "%Store Bound",
+ "%L1 Bound", "%FB Full", "%L2 Bound",
+ "%L3 Bound", "%DRAM Bound"),
+};
diff --git a/src/plugins/perfmon/intel/bundle/frontend_bound_bw_src.c b/src/plugins/perfmon/intel/bundle/frontend_bound_bw_src.c
new file mode 100644
index 00000000000..3db4ca9c0f3
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/frontend_bound_bw_src.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+enum
+{
+ DSB_UOPS,
+ MS_UOPS,
+ MITE_UOPS,
+ LSD_UOPS,
+};
+
+static u8 *
+format_intel_frontend_bound_bw_src (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+ f64 sv = 0;
+ f64 uops = ss->value[DSB_UOPS] + ss->value[MS_UOPS] + ss->value[MITE_UOPS] +
+ ss->value[LSD_UOPS];
+
+ if (!ss->n_packets)
+ return s;
+
+ if (row == 0)
+ {
+ sv = uops / ss->n_packets;
+ s = format (s, "%.0f", sv);
+
+ return s;
+ }
+
+ switch (row)
+ {
+ case 1:
+ sv = (ss->value[DSB_UOPS] / uops) * 100;
+ break;
+ case 2:
+ sv = (ss->value[MS_UOPS] / uops) * 100;
+ break;
+ case 3:
+ sv = (ss->value[MITE_UOPS] / uops) * 100;
+ break;
+ case 4:
+ sv = (ss->value[LSD_UOPS] / uops) * 100;
+ break;
+ }
+
+ s = format (s, "%04.1f", sv);
+
+ return s;
+}
+
+static perfmon_cpu_supports_t frontend_bound_bw_cpu_supports_src[] = {
+ { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
+};
+
+PERFMON_REGISTER_BUNDLE (intel_core_frontend_bound_bw_src) = {
+ .name = "td-frontend-bw-src",
+ .description =
+ "Topdown FrontEnd-bound BandWidth - % uops from each uop fetch source",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_IDQ_DSB_UOPS, /* 0x0F */
+ .events[1] = INTEL_CORE_E_IDQ_MS_UOPS, /* 0x0F */
+ .events[2] = INTEL_CORE_E_IDQ_MITE_UOPS, /* 0x0F */
+ .events[3] = INTEL_CORE_E_LSD_UOPS, /* 0x0F */
+ .n_events = 4,
+ .format_fn = format_intel_frontend_bound_bw_src,
+ .cpu_supports = frontend_bound_bw_cpu_supports_src,
+ .n_cpu_supports = ARRAY_LEN (frontend_bound_bw_cpu_supports_src),
+ .column_headers = PERFMON_STRINGS ("UOPs/PKT", "% DSB UOPS", "% MS UOPS",
+ "% MITE UOPS", "% LSD UOPS"),
+ .footer =
+ "For more information, see the Intel(R) 64 and IA-32 Architectures\n"
+ "Optimization Reference Manual section on the Front End.",
+};
diff --git a/src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c b/src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c
new file mode 100644
index 00000000000..6bf08af8154
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/frontend_bound_bw_uops.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+enum
+{
+ THREAD_P,
+ THREE_UOP,
+ TWO_UOP,
+ ONE_UOP,
+ NO_UOP,
+ FOUR_UOP,
+};
+
+static u8 *
+format_intel_frontend_bound_bw_uops (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+ f64 sv = 0;
+ f64 cycles = ss->value[THREAD_P];
+
+ switch (row)
+ {
+ case 0:
+ sv = (ss->value[FOUR_UOP] / cycles) * 100;
+ break;
+ case 1:
+ sv = ((ss->value[THREE_UOP] - ss->value[TWO_UOP]) / cycles) * 100;
+ break;
+ case 2:
+ sv = ((ss->value[TWO_UOP] - ss->value[ONE_UOP]) / cycles) * 100;
+ break;
+ case 3:
+ sv = ((ss->value[ONE_UOP] - ss->value[NO_UOP]) / cycles) * 100;
+ break;
+ case 4:
+ sv = (ss->value[NO_UOP] / cycles) * 100;
+ break;
+ }
+
+ s = format (s, "%04.1f", sv);
+
+ return s;
+}
+
+static perfmon_cpu_supports_t frontend_bound_bw_cpu_supports_uops[] = {
+ { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
+};
+
+PERFMON_REGISTER_BUNDLE (intel_core_frontend_bound_bw_uops) = {
+ .name = "td-frontend-bw-uops",
+ .description = "Topdown FrontEnd-bound BandWidth - distribution of "
+ "uops delivered to frontend",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P, /* 0x0F */
+ .events[1] =
+ INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_3_UOP_DELIV_CORE, /* 0xFF */
+ .events[2] =
+ INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_2_UOP_DELIV_CORE, /* 0xFF */
+ .events[3] =
+ INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_1_UOP_DELIV_CORE, /* 0xFF */
+ .events[4] =
+ INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_0_UOP_DELIV_CORE, /* 0xFF */
+ .events[5] = INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CYCLES_FE_WAS_OK, /* 0xFF */
+ .n_events = 6,
+ .format_fn = format_intel_frontend_bound_bw_uops,
+ .cpu_supports = frontend_bound_bw_cpu_supports_uops,
+ .n_cpu_supports = ARRAY_LEN (frontend_bound_bw_cpu_supports_uops),
+ .column_headers = PERFMON_STRINGS ("% 4 UOPS", "% 3 UOPS", "% 2 UOPS",
+ "% 1 UOPS", "% 0 UOPS"),
+ .footer =
+ "For more information, see the Intel(R) 64 and IA-32 Architectures\n"
+ "Optimization Reference Manual section on the Front End.",
+};
diff --git a/src/plugins/perfmon/intel/bundle/frontend_bound_lat.c b/src/plugins/perfmon/intel/bundle/frontend_bound_lat.c
new file mode 100644
index 00000000000..aea2149663f
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/frontend_bound_lat.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+static const int MS_Switches_Cost = 3;
+static const int BA_Clear_Cost = 10;
+
+enum
+{
+ ICACHE_MISS,
+ DSB_SWITCHES,
+ RESTEER,
+ MS_SWITCHES,
+ BACLEARS,
+ THREAD,
+};
+
+static u8 *
+format_intel_frontend_bound_lat (u8 *s, va_list *args)
+{
+ perfmon_node_stats_t *ss = va_arg (*args, perfmon_node_stats_t *);
+ int row = va_arg (*args, int);
+ f64 sv = 0;
+ f64 cycles = ss->value[THREAD];
+
+ if (!ss->n_packets)
+ return s;
+
+ if (!row)
+ {
+ sv = ss->value[THREAD] / ss->n_packets;
+
+ s = format (s, "%.0f", sv);
+
+ return s;
+ }
+
+ switch (row)
+ {
+ case 1:
+ sv = ss->value[ICACHE_MISS] / cycles;
+ break;
+ case 2:
+ sv = ss->value[DSB_SWITCHES] / cycles;
+ break;
+ case 3:
+ sv =
+ (ss->value[RESTEER] + (ss->value[BACLEARS] * BA_Clear_Cost)) / cycles;
+ break;
+ case 4:
+ sv = (ss->value[MS_SWITCHES] * MS_Switches_Cost) / cycles;
+ break;
+ }
+
+ s = format (s, "%04.1f", sv * 100);
+
+ return s;
+}
+
+static perfmon_cpu_supports_t frontend_bound_lat_cpu_supports[] = {
+ { clib_cpu_supports_avx512_bitalg, PERFMON_BUNDLE_TYPE_NODE },
+};
+
+PERFMON_REGISTER_BUNDLE (intel_core_frontend_bound_lat) = {
+ .name = "td-frontend-lat",
+ .description = "Topdown FrontEnd-bound Latency - % cycles not retiring uops "
+ "due to frontend latency",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_ICACHE_16B_IFDATA_STALL, /* 0x0F */
+ .events[1] = INTEL_CORE_E_DSB2MITE_SWITCHES_PENALTY_CYCLES, /* 0x0F */
+ .events[2] = INTEL_CORE_E_INT_MISC_CLEAR_RESTEER_CYCLES, /* 0xFF */
+ .events[3] = INTEL_CORE_E_IDQ_MS_SWITCHES, /* 0x0F */
+ .events[4] = INTEL_CORE_E_BACLEARS_ANY, /* 0x0F */
+ .events[5] = INTEL_CORE_E_CPU_CLK_UNHALTED_THREAD_P, /* FIXED */
+ .n_events = 6,
+ .format_fn = format_intel_frontend_bound_lat,
+ .cpu_supports = frontend_bound_lat_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (frontend_bound_lat_cpu_supports),
+ .column_headers = PERFMON_STRINGS ("Clocks/Packet", "% iCache Miss",
+ "% DSB Switch", "% Branch Resteer",
+ "% MS Switch"),
+ .footer =
+ "For more information, see the Intel(R) 64 and IA-32 Architectures\n"
+ "Optimization Reference Manual on the Front End.",
+};
diff --git a/src/plugins/perfmon/intel/bundle/iio_bw.c b/src/plugins/perfmon/intel/bundle/iio_bw.c
new file mode 100644
index 00000000000..f746ca2c906
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/iio_bw.c
@@ -0,0 +1,263 @@
+/*
+ * Copyright (c) 2021 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/uncore.h>
+#include <vlib/pci/pci.h>
+#include <vppinfra/format.h>
+#include <linux/limits.h>
+#include <fcntl.h>
+#include <math.h>
+
+typedef struct
+{
+ u8 socket_id;
+ u8 sad_id;
+ u8 iio_unit_id;
+} iio_uncore_sad_t;
+typedef u32 index_t;
+
+static const char *procfs_pci_path = "/proc/bus/pci";
+
+#define PCM_INTEL_PCI_VENDOR_ID 0x8086
+#define SNR_ICX_SAD_CONTROL_CFG_OFFSET 0x3F4
+#define SNR_ICX_MESH2IIO_MMAP_DID 0x09A2
+
+static const u8 icx_sad_to_pmu_id_mapping[] = { 5, 0, 1, 2, 3, 4 };
+
+static const char *iio_bw_footer_message =
+ "* this bundle currently only measures x8 and x16 PCIe devices on Port #0\n"
+ "or Port #2. Please see the \"Intel® Xeon® Processor Scalable Memory\n"
+ "Family Uncore Performance Monitoring Reference Manual(336274)\"\n"
+ "Section 2.4 for more information.";
+
+static u32
+get_sad_ctrl_cfg (vlib_pci_addr_t *addr)
+{
+ int fd = 0;
+ u32 value;
+ u8 *dev_node_name = format (0, "%s/%02x/%02x.%x", procfs_pci_path, addr->bus,
+ addr->slot, addr->function);
+
+ fd = open ((char *) dev_node_name, O_RDWR);
+ if (fd < 0)
+ return -1;
+
+ if (pread (fd, &value, sizeof (u32), SNR_ICX_SAD_CONTROL_CFG_OFFSET) <
+ sizeof (u32))
+ value = -1;
+
+ close (fd);
+
+ return value;
+}
+
+static u64
+get_bus_to_sad_mappings (vlib_main_t *vm, index_t **ph, iio_uncore_sad_t **pp)
+{
+ index_t *h = 0;
+ iio_uncore_sad_t *p = 0, *e = 0;
+ vlib_pci_addr_t *addr = 0, *addrs;
+
+ addrs = vlib_pci_get_all_dev_addrs ();
+
+ vec_foreach (addr, addrs)
+ {
+ vlib_pci_device_info_t *d;
+ d = vlib_pci_get_device_info (vm, addr, 0);
+
+ if (!d)
+ continue;
+
+ if (d->vendor_id == PCM_INTEL_PCI_VENDOR_ID &&
+ d->device_id == SNR_ICX_MESH2IIO_MMAP_DID)
+ {
+
+ u32 sad_ctrl_cfg = get_sad_ctrl_cfg (addr);
+ if (sad_ctrl_cfg == 0xFFFFFFFF)
+ {
+ vlib_pci_free_device_info (d);
+ continue;
+ }
+
+ pool_get_zero (p, e);
+
+ e->socket_id = (sad_ctrl_cfg & 0xf);
+ e->sad_id = (sad_ctrl_cfg >> 4) & 0x7;
+ e->iio_unit_id = icx_sad_to_pmu_id_mapping[e->sad_id];
+
+ hash_set (h, addr->bus, e - p);
+ }
+
+ vlib_pci_free_device_info (d);
+ }
+
+ vec_free (addrs);
+
+ *ph = h;
+ *pp = p;
+
+ return 0;
+}
+
+u8 *
+format_stack_socket (u8 *s, va_list *va)
+{
+ iio_uncore_sad_t *e, *p = va_arg (*va, iio_uncore_sad_t *);
+ index_t *h = va_arg (*va, index_t *);
+ vlib_pci_addr_t root_bus, *addr = va_arg (*va, vlib_pci_addr_t *);
+ clib_error_t *err = vlib_pci_get_device_root_bus (addr, &root_bus);
+ if (err)
+ {
+ clib_error_free (err);
+ return s;
+ }
+
+ uword *pu = hash_get (h, root_bus.bus);
+ if (pu)
+ {
+ e = pool_elt_at_index (p, (index_t) pu[0]);
+
+ s = format (s, "IIO%u/%u", e->socket_id, e->iio_unit_id);
+ }
+ else
+ {
+ s = format (s, "[ERR: hash lookup for bus '%u' failed]", root_bus.bus);
+ }
+ return s;
+}
+
+static clib_error_t *
+init_intel_uncore_iio_bw (vlib_main_t *vm, struct perfmon_bundle *b)
+{
+ index_t *h = 0;
+ iio_uncore_sad_t *p = 0;
+ vlib_pci_addr_t *addr = 0, *addrs;
+ u8 *s = 0;
+
+ get_bus_to_sad_mappings (vm, &h, &p);
+
+ s = format (0, "%-10s%-5s%-13s%-12s%-14s%-16s%s\n", "Stack", "Port",
+ "Address", "VID:PID", "Link Speed", "Driver", "Product Name");
+
+ addrs = vlib_pci_get_all_dev_addrs ();
+
+ vec_foreach (addr, addrs)
+ {
+ vlib_pci_device_info_t *d;
+ d = vlib_pci_get_device_info (vm, addr, 0);
+
+ if (!d)
+ continue;
+
+ if (d->device_class != PCI_CLASS_NETWORK_ETHERNET)
+ continue;
+
+ s = format (
+ s, "%-10U%-5U%-13U%04x:%04x %-14U%-16s%v\n", format_stack_socket, p,
+ h, addr, format_vlib_pci_link_port, &d->config, format_vlib_pci_addr,
+ addr, d->vendor_id, d->device_id, format_vlib_pci_link_speed, d,
+ d->driver_name ? (char *) d->driver_name : "", d->product_name);
+
+ vlib_pci_free_device_info (d);
+ }
+
+ b->footer = (char *) format (s, "\n%s", iio_bw_footer_message);
+
+ vec_free (addrs);
+ pool_free (p);
+ hash_free (h);
+
+ return 0;
+}
+
+static u8 *
+format_intel_uncore_iio_bw (u8 *s, va_list *args)
+{
+ perfmon_reading_t *r = va_arg (*args, perfmon_reading_t *);
+ int col = va_arg (*args, int);
+ f64 tr = r->time_running * 1e-9;
+ f64 value = 0;
+
+ switch (col)
+ {
+ case 0:
+ s = format (s, "%9.2f", tr);
+ break;
+ default:
+ if (r->time_running)
+ {
+ value = r->value[col - 1] * 4 / tr;
+
+ if (value > 1.0e6)
+ s = format (s, "%9.0fM", value * 1e-6);
+ else if (value > 1.0e3)
+ s = format (s, "%9.0fK", value * 1e-3);
+ else
+ s = format (s, "%9.0f ", value);
+ }
+
+ break;
+ }
+
+ return s;
+}
+
+/*
+ * This bundle is currently only supported and tested on Intel Icelake.
+ */
+static int
+is_icelake ()
+{
+ return clib_cpu_supports_avx512_bitalg () && !clib_cpu_supports_movdir64b ();
+}
+
+static perfmon_cpu_supports_t iio_bw_cpu_supports[] = {
+ { is_icelake, PERFMON_BUNDLE_TYPE_SYSTEM }
+};
+
+PERFMON_REGISTER_BUNDLE (intel_uncore_iio_bw_pci) = {
+ .name = "iio-bandwidth-pci",
+ .description = "pci iio memory reads and writes per iio stack *",
+ .source = "intel-uncore",
+ .events[0] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_OF_CPU_PART0_RD,
+ .events[1] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART0_WR,
+ .events[2] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART2_RD,
+ .events[3] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART2_WR,
+ .n_events = 4,
+ .cpu_supports = iio_bw_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (iio_bw_cpu_supports),
+ .format_fn = format_intel_uncore_iio_bw,
+ .init_fn = init_intel_uncore_iio_bw,
+ .column_headers = PERFMON_STRINGS ("RunTime", "PCIe Rd/P0", "PCIe Wr/P0",
+ "PCIe Rd/P2", "PCIe Wr/P2")
+};
+
+PERFMON_REGISTER_BUNDLE (intel_uncore_iio_bw_cpu) = {
+ .name = "iio-bandwidth-cpu",
+ .description = "cpu iio memory reads and writes per iio stack *",
+ .source = "intel-uncore",
+ .events[0] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART0_RD,
+ .events[1] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART0_WR,
+ .events[2] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART2_RD,
+ .events[3] = INTEL_UNCORE_E_IIO_UNC_IIO_DATA_REQ_BY_CPU_PART2_WR,
+ .n_events = 4,
+ .cpu_supports = iio_bw_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (iio_bw_cpu_supports),
+ .format_fn = format_intel_uncore_iio_bw,
+ .init_fn = init_intel_uncore_iio_bw,
+ .column_headers = PERFMON_STRINGS ("RunTime", "CPU Rd/P0", "CPU Wr/P0",
+ "CPU Rd/P2", "CPU Wr/P2")
+};
diff --git a/src/plugins/perfmon/intel/bundle/topdown_icelake.c b/src/plugins/perfmon/intel/bundle/topdown_icelake.c
new file mode 100644
index 00000000000..a3392e52f0a
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/topdown_icelake.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+static int
+is_icelake ()
+{
+ return clib_cpu_supports_avx512_bitalg () && !clib_cpu_supports_movdir64b ();
+}
+
+static perfmon_cpu_supports_t topdown_lvl2_cpu_supports_icx[] = {
+ { is_icelake, PERFMON_BUNDLE_TYPE_THREAD }
+};
+
+#define GET_METRIC(m, i) (f64) (((m) >> (i * 8)) & 0xff)
+
+enum
+{
+ TD_SLOTS = 0,
+ STALLS_MEM_ANY,
+ STALLS_TOTAL,
+ BOUND_ON_STORES,
+ RECOVERY_CYCLES,
+ UOP_DROPPING,
+ UOP_NOT_DELIVERED,
+ TD_RETIRING,
+ TD_BAD_SPEC,
+ TD_FE_BOUND,
+ TD_BE_BOUND,
+};
+
+static_always_inline f64
+memory_bound_fraction (perfmon_reading_t *ss)
+{
+ return (ss->value[STALLS_MEM_ANY] + ss->value[BOUND_ON_STORES]) /
+ (f64) (ss->value[STALLS_TOTAL] + ss->value[BOUND_ON_STORES]);
+}
+
+static_always_inline f64
+perf_metrics_sum (perfmon_reading_t *ss)
+{
+ return ss->value[TD_RETIRING] + ss->value[TD_BAD_SPEC] +
+ ss->value[TD_FE_BOUND] + ss->value[TD_BE_BOUND];
+}
+
+static_always_inline f64
+retiring (perfmon_reading_t *ss)
+{
+ return ss->value[TD_RETIRING] / perf_metrics_sum (ss);
+}
+
+static_always_inline f64
+bad_speculation (perfmon_reading_t *ss)
+{
+ return ss->value[TD_BAD_SPEC] / perf_metrics_sum (ss);
+}
+
+static_always_inline f64
+frontend_bound (perfmon_reading_t *ss)
+{
+ return (ss->value[TD_FE_BOUND] / perf_metrics_sum (ss)) -
+ (ss->value[UOP_DROPPING] / perf_metrics_sum (ss));
+}
+
+static_always_inline f64
+backend_bound (perfmon_reading_t *ss)
+{
+ return (ss->value[TD_BE_BOUND] / perf_metrics_sum (ss)) +
+ ((5 * ss->value[RECOVERY_CYCLES]) / perf_metrics_sum (ss));
+}
+
+static_always_inline f64
+fetch_latency (perfmon_reading_t *ss)
+{
+ f64 r = ((5 * ss->value[UOP_NOT_DELIVERED] - ss->value[UOP_DROPPING]) /
+ (f64) ss->value[TD_SLOTS]);
+ return r;
+}
+
+static_always_inline f64
+fetch_bandwidth (perfmon_reading_t *ss)
+{
+ return clib_max (0, frontend_bound (ss) - fetch_latency (ss));
+}
+
+static_always_inline f64
+memory_bound (perfmon_reading_t *ss)
+{
+ return backend_bound (ss) * memory_bound_fraction (ss);
+}
+
+static_always_inline f64
+core_bound (perfmon_reading_t *ss)
+{
+ return backend_bound (ss) - memory_bound (ss);
+}
+
+static u8 *
+format_topdown_lvl2_icx (u8 *s, va_list *args)
+{
+ perfmon_reading_t *ss = va_arg (*args, perfmon_reading_t *);
+ u64 idx = va_arg (*args, int);
+ f64 sv = 0;
+
+ switch (idx)
+ {
+ case 0:
+ sv = retiring (ss);
+ break;
+ case 1:
+ sv = bad_speculation (ss);
+ break;
+ case 2:
+ sv = frontend_bound (ss);
+ break;
+ case 3:
+ sv = backend_bound (ss);
+ break;
+ case 4:
+ sv = fetch_latency (ss);
+ break;
+ case 5:
+ sv = fetch_bandwidth (ss);
+ break;
+ case 6:
+ sv = memory_bound (ss);
+ break;
+ case 7:
+ sv = core_bound (ss);
+ break;
+ }
+
+ s = format (s, "%f", sv * 100);
+
+ return s;
+}
+
+PERFMON_REGISTER_BUNDLE (topdown_lvl2_metric_icx) = {
+ .name = "topdown",
+ .description = "Top-down Microarchitecture Analysis Level 1 & 2",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
+ .events[1] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_MEM_ANY,
+ .events[2] = INTEL_CORE_E_CYCLE_ACTIVITY_STALLS_TOTAL,
+ .events[3] = INTEL_CORE_E_EXE_ACTIVITY_BOUND_ON_STORES,
+ .events[4] = INTEL_CORE_E_INT_MISC_RECOVERY_CYCLES,
+ .events[5] = INTEL_CORE_E_INT_MISC_UOP_DROPPING,
+ .events[6] = INTEL_CORE_E_IDQ_UOPS_NOT_DELIVERED_CORE,
+ .events[7] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC,
+ .events[8] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC,
+ .events[9] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC,
+ .events[10] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC,
+ .n_events = 11,
+ .cpu_supports = topdown_lvl2_cpu_supports_icx,
+ .n_cpu_supports = ARRAY_LEN (topdown_lvl2_cpu_supports_icx),
+ .format_fn = format_topdown_lvl2_icx,
+ .column_headers = PERFMON_STRINGS ("% RT", "% BS", "% FE", "% BE", "% FE.FL",
+ "% FE.FB", "% BE.MB", "% BE.CB"),
+ .footer = "Retiring (RT), Bad Speculation (BS),\n"
+ " FrontEnd bound (FE), BackEnd bound (BE),\n"
+ " Fetch Latency (FL), Fetch Bandwidth (FB),\n"
+ " Memory Bound (MB), Core Bound (CB)",
+};
diff --git a/src/plugins/perfmon/intel/bundle/topdown_metrics.c b/src/plugins/perfmon/intel/bundle/topdown_metrics.c
index 386f3843bc3..a464dfe1c88 100644
--- a/src/plugins/perfmon/intel/bundle/topdown_metrics.c
+++ b/src/plugins/perfmon/intel/bundle/topdown_metrics.c
@@ -14,102 +14,202 @@
*/
#include <vnet/vnet.h>
+#include <vppinfra/math.h>
#include <perfmon/perfmon.h>
#include <perfmon/intel/core.h>
#define GET_METRIC(m, i) (((m) >> (i * 8)) & 0xff)
#define GET_RATIO(m, i) (((m) >> (i * 32)) & 0xffffffff)
-#define RDPMC_FIXED_SLOTS (1 << 30) /* fixed slots */
-#define RDPMC_L1_METRICS (1 << 29) /* l1 metric counters */
+#define RDPMC_SLOTS (1 << 30) /* fixed slots */
+#define RDPMC_METRICS (1 << 29) /* l1 & l2 metric counters */
#define FIXED_COUNTER_SLOTS 3
-#define METRIC_COUNTER_TOPDOWN_L1 0
+#define METRIC_COUNTER_TOPDOWN_L1_L2 0
typedef enum
{
- TOPDOWN_E_METRIC_RETIRING = 0,
- TOPDOWN_E_METRIC_BAD_SPEC,
- TOPDOWN_E_METRIC_FE_BOUND,
- TOPDOWN_E_METRIC_BE_BOUND,
-} topdown_lvl1_counters_t;
+ TOPDOWN_E_RETIRING = 0,
+ TOPDOWN_E_BAD_SPEC,
+ TOPDOWN_E_FE_BOUND,
+ TOPDOWN_E_BE_BOUND,
+ TOPDOWN_E_HEAVYOPS,
+ TOPDOWN_E_LIGHTOPS,
+ TOPDOWN_E_BMISPRED,
+ TOPDOWN_E_MCHCLEAR,
+ TOPDOWN_E_FETCHLAT,
+ TOPDOWN_E_FETCH_BW,
+ TOPDOWN_E_MEMBOUND,
+ TOPDOWN_E_CORBOUND,
+ TOPDOWN_E_MAX,
+} topdown_e_t;
enum
{
- TOPDOWN_SLOTS = 0,
- TOPDOWN_METRICS,
-} topdown_lvl1_metrics_t;
+ TOPDOWN_E_RDPMC_SLOTS = 0,
+ TOPDOWN_E_RDPMC_METRICS,
+};
+
+typedef f64 (topdown_lvl1_parse_fn_t) (void *, topdown_e_t);
-static_always_inline f32
-topdown_lvl1_parse_row (perfmon_node_stats_t *ns, topdown_lvl1_counters_t e)
+/* Parse thread level states from perfmon_reading */
+static_always_inline f64
+topdown_lvl1_perf_reading (void *ps, topdown_e_t e)
{
+ perfmon_reading_t *ss = (perfmon_reading_t *) ps;
+
+ /* slots are at value[0], everthing else follows at +1 */
+ return ((f64) ss->value[e + 1] / ss->value[0]) * 100;
+}
+
+static_always_inline f64
+topdown_lvl1_rdpmc_metric (void *ps, topdown_e_t e)
+{
+ perfmon_node_stats_t *ss = (perfmon_node_stats_t *) ps;
f64 slots_t0 =
- ns->t[0].value[TOPDOWN_SLOTS] *
- ((f64) GET_METRIC (ns->t[0].value[TOPDOWN_METRICS], e) / 0xff);
+ ss->t[0].value[TOPDOWN_E_RDPMC_SLOTS] *
+ ((f64) GET_METRIC (ss->t[0].value[TOPDOWN_E_RDPMC_METRICS], e) / 0xff);
f64 slots_t1 =
- ns->t[1].value[TOPDOWN_SLOTS] *
- ((f64) GET_METRIC (ns->t[1].value[TOPDOWN_METRICS], e) / 0xff);
- u64 slots_delta =
- ns->t[1].value[TOPDOWN_SLOTS] - ns->t[0].value[TOPDOWN_SLOTS];
+ ss->t[1].value[TOPDOWN_E_RDPMC_SLOTS] *
+ ((f64) GET_METRIC (ss->t[1].value[TOPDOWN_E_RDPMC_METRICS], e) / 0xff);
+ u64 slots_delta = ss->t[1].value[TOPDOWN_E_RDPMC_SLOTS] -
+ ss->t[0].value[TOPDOWN_E_RDPMC_SLOTS];
slots_t1 = slots_t1 - slots_t0;
return (slots_t1 / slots_delta) * 100;
}
-static u8 *
-format_topdown_lvl1 (u8 *s, va_list *args)
+/* Convert the TopDown enum to the perf reading index */
+#define TO_LVL2_PERF_IDX(e) \
+ ({ \
+ u8 to_idx[TOPDOWN_E_MAX] = { 0, 0, 0, 0, 5, 5, 6, 6, 7, 7, 8, 8 }; \
+ to_idx[e]; \
+ })
+
+/* Parse thread level stats from perfmon_reading */
+static_always_inline f64
+topdown_lvl2_perf_reading (void *ps, topdown_e_t e)
+{
+ perfmon_reading_t *ss = (perfmon_reading_t *) ps;
+ u64 value = ss->value[TO_LVL2_PERF_IDX (e)];
+
+ /* If it is an L1 metric, call L1 format */
+ if (TOPDOWN_E_BE_BOUND >= e)
+ {
+ return topdown_lvl1_perf_reading (ps, e);
+ }
+
+ /* all the odd metrics, are inferred from even and L1 metrics */
+ if (e & 0x1)
+ {
+ topdown_e_t e1 = TO_LVL2_PERF_IDX (e) - 4;
+ value = ss->value[e1] - value;
+ }
+
+ return (f64) value / ss->value[0] * 100;
+}
+
+/* Convert the TopDown enum to the rdpmc metric byte position */
+#define TO_LVL2_METRIC_BYTE(e) \
+ ({ \
+ u8 to_metric[TOPDOWN_E_MAX] = { 0, 0, 0, 0, 4, 4, 5, 5, 6, 6, 7, 7 }; \
+ to_metric[e]; \
+ })
+
+/* Convert the TopDown L2 enum to the reference TopDown L1 enum */
+#define TO_LVL1_REF(e) \
+ ({ \
+ u8 to_lvl1[TOPDOWN_E_MAX] = { -1, \
+ -1, \
+ -1, \
+ -1, \
+ TOPDOWN_E_RETIRING, \
+ TOPDOWN_E_RETIRING, \
+ TOPDOWN_E_BAD_SPEC, \
+ TOPDOWN_E_BAD_SPEC, \
+ TOPDOWN_E_FE_BOUND, \
+ TOPDOWN_E_FE_BOUND, \
+ TOPDOWN_E_BE_BOUND, \
+ TOPDOWN_E_BE_BOUND }; \
+ to_lvl1[e]; \
+ })
+
+static_always_inline f64
+topdown_lvl2_rdpmc_metric (void *ps, topdown_e_t e)
{
- perfmon_node_stats_t *st = va_arg (*args, perfmon_node_stats_t *);
- u64 row = va_arg (*args, int);
+ f64 r, l1_value = 0;
- switch (row)
+ /* If it is an L1 metric, call L1 format */
+ if (TOPDOWN_E_BE_BOUND >= e)
{
- case 0:
- s = format (s, "%f",
- topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BAD_SPEC) +
- topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_RETIRING));
- break;
- case 1:
- s = format (s, "%f",
- topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BE_BOUND) +
- topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_FE_BOUND));
- break;
- case 2:
- s = format (s, "%f",
- topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_RETIRING));
- break;
- case 3:
- s = format (s, "%f",
- topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BAD_SPEC));
- break;
- case 4:
- s = format (s, "%f",
- topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_FE_BOUND));
- break;
- case 5:
- s = format (s, "%f",
- topdown_lvl1_parse_row (st, TOPDOWN_E_METRIC_BE_BOUND));
- break;
+ return topdown_lvl1_rdpmc_metric (ps, e);
}
+
+ /* all the odd metrics, are inferred from even and L1 metrics */
+ if (e & 0x1)
+ {
+ /* get the L1 reference metric */
+ l1_value = topdown_lvl1_rdpmc_metric (ps, TO_LVL1_REF (e));
+ }
+
+ /* calculate the l2 metric */
+ r =
+ fabs (l1_value - topdown_lvl1_rdpmc_metric (ps, TO_LVL2_METRIC_BYTE (e)));
+ return r;
+}
+
+static u8 *
+format_topdown_lvl2 (u8 *s, va_list *args)
+{
+ void *ps = va_arg (*args, void *);
+ u64 idx = va_arg (*args, int);
+ perfmon_bundle_type_t type = va_arg (*args, perfmon_bundle_type_t);
+ f64 sv = 0;
+
+ topdown_lvl1_parse_fn_t *parse_fn,
+ *parse_fns[PERFMON_BUNDLE_TYPE_MAX] = { 0, topdown_lvl2_rdpmc_metric,
+ topdown_lvl2_perf_reading, 0 };
+
+ parse_fn = parse_fns[type];
+ ASSERT (parse_fn);
+
+ sv = parse_fn (ps, (topdown_e_t) idx);
+ s = format (s, "%f", sv);
+
return s;
}
-PERFMON_REGISTER_BUNDLE (topdown_lvl1) = {
- .name = "topdown-level1",
- .description = "Top-down Microarchitecture Analysis Level 1",
+static perfmon_cpu_supports_t topdown_lvl2_cpu_supports[] = {
+ /* Intel SPR supports papi/thread or rdpmc/node */
+ { clib_cpu_supports_avx512_fp16, PERFMON_BUNDLE_TYPE_NODE_OR_THREAD }
+};
+
+PERFMON_REGISTER_BUNDLE (topdown_lvl2_metric) = {
+ .name = "topdown",
+ .description = "Top-down Microarchitecture Analysis Level 1 & 2",
.source = "intel-core",
- .type = PERFMON_BUNDLE_TYPE_NODE,
- .offset_type = PERFMON_OFFSET_TYPE_METRICS,
.events[0] = INTEL_CORE_E_TOPDOWN_SLOTS,
- .events[1] = INTEL_CORE_E_TOPDOWN_L1_METRICS,
- .metrics[0] = RDPMC_FIXED_SLOTS | FIXED_COUNTER_SLOTS,
- .metrics[1] = RDPMC_L1_METRICS | METRIC_COUNTER_TOPDOWN_L1,
- .n_events = 2,
- .cpu_supports = clib_cpu_supports_avx512_bitalg,
- .format_fn = format_topdown_lvl1,
- .column_headers = PERFMON_STRINGS ("% NS", "% ST", "% NS.RT", "% NS.BS",
- "% ST.FE", "% ST.BE"),
- .footer = "Not Stalled (NS),STalled (ST),\n"
- " Retiring (RT), Bad Speculation (BS),\n"
- " FrontEnd bound (FE), BackEnd bound (BE)",
+ .events[1] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_METRIC,
+ .events[2] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_METRIC,
+ .events[3] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_METRIC,
+ .events[4] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_METRIC,
+ .events[5] = INTEL_CORE_E_TOPDOWN_L2_HEAVYOPS_METRIC,
+ .events[6] = INTEL_CORE_E_TOPDOWN_L2_BMISPRED_METRIC,
+ .events[7] = INTEL_CORE_E_TOPDOWN_L2_FETCHLAT_METRIC,
+ .events[8] = INTEL_CORE_E_TOPDOWN_L2_MEMBOUND_METRIC,
+ .n_events = 9,
+ .preserve_samples = 0x1FF,
+ .cpu_supports = topdown_lvl2_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (topdown_lvl2_cpu_supports),
+ .format_fn = format_topdown_lvl2,
+ .column_headers = PERFMON_STRINGS ("% RT", "% BS", "% FE", "% BE", "% RT.HO",
+ "% RT.LO", "% BS.BM", "% BS.MC",
+ "% FE.FL", "% FE.FB", "% BE.MB",
+ "% BE.CB"),
+ .footer = "Retiring (RT), Bad Speculation (BS),\n"
+ " FrontEnd bound (1FE), BackEnd bound (BE),\n"
+ " Light Operations (LO), Heavy Operations (HO),\n"
+ " Branch Misprediction (BM), Machine Clears (MC),\n"
+ " Fetch Latency (FL), Fetch Bandwidth (FB),\n"
+ " Memory Bound (MB), Core Bound (CB)",
};
diff --git a/src/plugins/perfmon/intel/bundle/topdown_tremont.c b/src/plugins/perfmon/intel/bundle/topdown_tremont.c
new file mode 100644
index 00000000000..b2626eb0480
--- /dev/null
+++ b/src/plugins/perfmon/intel/bundle/topdown_tremont.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2021 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <perfmon/perfmon.h>
+#include <perfmon/intel/core.h>
+
+typedef enum
+{
+ TOPDOWN_E_RETIRING = 0,
+ TOPDOWN_E_BAD_SPEC,
+ TOPDOWN_E_FE_BOUND,
+ TOPDOWN_E_BE_BOUND,
+ TOPDOWN_E_MAX,
+} topdown_lvl1_t;
+
+static u8 *
+format_topdown_lvl1 (u8 *s, va_list *args)
+{
+ perfmon_reading_t *ss = va_arg (*args, perfmon_reading_t *);
+ u64 idx = va_arg (*args, int);
+ f64 sv = 0;
+ u64 total = 0;
+
+ for (int i = 0; i < TOPDOWN_E_MAX; i++)
+ total += ss->value[i];
+
+ switch (idx)
+ {
+ case 0:
+ sv = (f64) ss->value[TOPDOWN_E_RETIRING] + ss->value[TOPDOWN_E_BAD_SPEC];
+ break;
+ case 1:
+ sv = (f64) ss->value[TOPDOWN_E_FE_BOUND] + ss->value[TOPDOWN_E_BE_BOUND];
+ break;
+ default:
+ sv = (f64) ss->value[idx - 2];
+ break;
+ }
+
+ sv = (sv / total) * 100;
+ s = format (s, "%f", sv);
+ return s;
+}
+
+static int
+is_tremont ()
+{
+ return clib_cpu_supports_movdir64b () && !clib_cpu_supports_avx2 ();
+}
+
+static perfmon_cpu_supports_t topdown_lvl1_cpu_supports[] = {
+ { is_tremont, PERFMON_BUNDLE_TYPE_THREAD }
+};
+
+PERFMON_REGISTER_BUNDLE (topdown_lvl1_tremont) = {
+ .name = "topdown-level1",
+ .description = "Top-down Microarchitecture Analysis Level 1",
+ .source = "intel-core",
+ .events[0] = INTEL_CORE_E_TOPDOWN_L1_RETIRING_TREMONT,
+ .events[1] = INTEL_CORE_E_TOPDOWN_L1_BAD_SPEC_TREMONT,
+ .events[2] = INTEL_CORE_E_TOPDOWN_L1_FE_BOUND_TREMONT,
+ .events[3] = INTEL_CORE_E_TOPDOWN_L1_BE_BOUND_TREMONT,
+ .n_events = 4,
+ .cpu_supports = topdown_lvl1_cpu_supports,
+ .n_cpu_supports = ARRAY_LEN (topdown_lvl1_cpu_supports),
+ .format_fn = format_topdown_lvl1,
+ .column_headers = PERFMON_STRINGS ("% NS", "% ST", "% NS.RT", "% NS.BS",
+ "% ST.FE", "% ST.BE"),
+ .footer = "Not Stalled (NS),STalled (ST),\n"
+ " Retiring (RT), Bad Speculation (BS),\n"
+ " FrontEnd bound (FE), BackEnd bound (BE)",
+};
diff --git a/src/plugins/perfmon/intel/core.c b/src/plugins/perfmon/intel/core.c
index cef6f32d7e6..d6a16b2125e 100644
--- a/src/plugins/perfmon/intel/core.c
+++ b/src/plugins/perfmon/intel/core.c
@@ -16,6 +16,7 @@
#include <vnet/vnet.h>
#include <perfmon/perfmon.h>
#include <perfmon/intel/core.h>
+#include <perfmon/intel/dispatch_wrapper.h>
#include <linux/perf_event.h>
static perfmon_event_t events[] = {
@@ -25,9 +26,12 @@ static perfmon_event_t events[] = {
event, umask, edge, any, inv, cmask), \
.name = #n "." #suffix, \
.description = desc, \
+ .implemented = 1, \
.exclude_kernel = 1 },
- foreach_perf_intel_core_event
+ foreach_perf_intel_core_event foreach_perf_intel_peusdo_event
+ foreach_perf_intel_tremont_event
+
#undef _
};
@@ -55,6 +59,10 @@ format_intel_core_config (u8 *s, va_list *args)
if ((v = (config >> 24) & 0xff))
s = format (s, ", cmask=0x%02x", v);
+ /* show the raw config, for convenience sake */
+ if (!((config >> 16) & 0xffff))
+ s = format (s, ", raw=r%x", config & 0xffff);
+
return s;
}
@@ -71,11 +79,79 @@ intel_core_init (vlib_main_t *vm, perfmon_source_t *src)
return 0;
}
+perfmon_event_type_t
+intel_core_get_event_type (u32 event)
+{
+ u64 config = events[event].config;
+ u8 eventcode = (config & 0xFF);
+ u8 umask = ((config >> 8) & 0xFF);
+
+ if (!eventcode) /* is fixed or pseudo */
+ {
+ if (umask >= 0x80) /* is pseudo */
+ return PERFMON_EVENT_TYPE_PSEUDO;
+ else /* is fixed */
+ return PERFMON_EVENT_TYPE_FIXED;
+ }
+ else
+ return PERFMON_EVENT_TYPE_GENERAL;
+}
+
+static u8
+is_enough_counters (perfmon_bundle_t *b)
+{
+ u8 bl[PERFMON_EVENT_TYPE_MAX];
+ u8 cpu[PERFMON_EVENT_TYPE_MAX];
+
+ clib_memset (&bl, 0, sizeof (bl));
+ clib_memset (&cpu, 0, sizeof (cpu));
+
+ /* how many does this uarch support */
+ if (!clib_get_pmu_counter_count (&cpu[PERFMON_EVENT_TYPE_FIXED],
+ &cpu[PERFMON_EVENT_TYPE_GENERAL]))
+ return 0;
+
+ /* how many does the bundle require */
+ for (u16 i = 0; i < b->n_events; i++)
+ {
+ /* if source allows us to identify events, otherwise assume general */
+ if (b->src->get_event_type)
+ bl[b->src->get_event_type (b->events[i])]++;
+ else
+ bl[PERFMON_EVENT_TYPE_GENERAL]++;
+ }
+
+ /* consciously ignoring pseudo events here */
+ return cpu[PERFMON_EVENT_TYPE_GENERAL] >= bl[PERFMON_EVENT_TYPE_GENERAL] &&
+ cpu[PERFMON_EVENT_TYPE_FIXED] >= bl[PERFMON_EVENT_TYPE_FIXED];
+}
+
+u8
+intel_bundle_supported (perfmon_bundle_t *b)
+{
+ perfmon_cpu_supports_t *supports = b->cpu_supports;
+
+ if (!is_enough_counters (b))
+ return 0;
+
+ if (!b->cpu_supports)
+ return 1;
+
+ for (int i = 0; i < b->n_cpu_supports; ++i)
+ if (supports[i].cpu_supports ())
+ return 1;
+
+ return 0;
+}
+
PERFMON_REGISTER_SOURCE (intel_core) = {
.name = "intel-core",
.description = "intel arch core events",
.events = events,
.n_events = ARRAY_LEN (events),
.init_fn = intel_core_init,
+ .get_event_type = intel_core_get_event_type,
.format_config = format_intel_core_config,
+ .bundle_support = intel_bundle_supported,
+ .config_dispatch_wrapper = intel_config_dispatch_wrapper,
};
diff --git a/src/plugins/perfmon/intel/core.h b/src/plugins/perfmon/intel/core.h
index cd5c31ba260..b2b0434acb3 100644
--- a/src/plugins/perfmon/intel/core.h
+++ b/src/plugins/perfmon/intel/core.h
@@ -16,12 +16,46 @@
#ifndef __perfmon_intel_h
#define __perfmon_intel_h
+u8 intel_bundle_supported (perfmon_bundle_t *b);
+
#define PERF_INTEL_CODE(event, umask, edge, any, inv, cmask) \
((event) | (umask) << 8 | (edge) << 18 | (any) << 21 | (inv) << 23 | \
(cmask) << 24)
/* EventCode, UMask, EdgeDetect, AnyThread, Invert, CounterMask
* counter_unit, name, suffix, description */
+#define foreach_perf_intel_peusdo_event \
+ _ (0x00, 0x80, 0, 0, 0, 0x00, TOPDOWN, L1_RETIRING_METRIC, \
+ "TMA retiring slots for an unhalted logical processor.") \
+ _ (0x00, 0x81, 0, 0, 0, 0x00, TOPDOWN, L1_BAD_SPEC_METRIC, \
+ "TMA bad spec slots or an unhalted logical processor.") \
+ _ (0x00, 0x82, 0, 0, 0, 0x00, TOPDOWN, L1_FE_BOUND_METRIC, \
+ "TMA fe bound slots for an unhalted logical processor.") \
+ _ (0x00, 0x83, 0, 0, 0, 0x00, TOPDOWN, L1_BE_BOUND_METRIC, \
+ "TMA be bound slots for an unhalted logical processor.") \
+ _ (0x00, 0x84, 0, 0, 0, 0x00, TOPDOWN, L2_HEAVYOPS_METRIC, \
+ "TMA heavy operations for an unhalted logical processor.") \
+ _ (0x00, 0x85, 0, 0, 0, 0x00, TOPDOWN, L2_BMISPRED_METRIC, \
+ "TMA branch misprediction slots or an unhalted logical processor.") \
+ _ (0x00, 0x86, 0, 0, 0, 0x00, TOPDOWN, L2_FETCHLAT_METRIC, \
+ "TMA fetch latency slots for an unhalted logical processor.") \
+ _ (0x00, 0x87, 0, 0, 0, 0x00, TOPDOWN, L2_MEMBOUND_METRIC, \
+ "TMA mem bound slots for an unhalted logical processor.")
+
+/* EventCode, UMask, EdgeDetect, AnyThread, Invert, CounterMask
+ * counter_unit, name, suffix, description */
+#define foreach_perf_intel_tremont_event \
+ _ (0xc2, 0x00, 0, 0, 0, 0x00, TOPDOWN, L1_RETIRING_TREMONT, \
+ "TMA retiring slots for an unhalted logical processor.") \
+ _ (0x71, 0x00, 0, 0, 0, 0x00, TOPDOWN, L1_FE_BOUND_TREMONT, \
+ "TMA fe bound slots for an unhalted logical processor.") \
+ _ (0x73, 0x06, 0, 0, 0, 0x00, TOPDOWN, L1_BAD_SPEC_TREMONT, \
+ "TMA bad spec slots or an unhalted logical processor.") \
+ _ (0x74, 0x00, 0, 0, 0, 0x00, TOPDOWN, L1_BE_BOUND_TREMONT, \
+ "TMA be bound slots for an unhalted logical processor.")
+
+/* EventCode, UMask, EdgeDetect, AnyThread, Invert, CounterMask
+ * counter_unit, name, suffix, description */
#define foreach_perf_intel_core_event \
_ (0x00, 0x02, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, THREAD, \
"Core cycles when the thread is not in halt state") \
@@ -29,8 +63,6 @@
"Reference cycles when the core is not in halt state.") \
_ (0x00, 0x04, 0, 0, 0, 0x00, TOPDOWN, SLOTS, \
"TMA slots available for an unhalted logical processor.") \
- _ (0x00, 0x80, 0, 0, 0, 0x00, TOPDOWN, L1_METRICS, \
- "TMA slots metrics for an unhalted logical processor.") \
_ (0x03, 0x02, 0, 0, 0, 0x00, LD_BLOCKS, STORE_FORWARD, \
"Loads blocked due to overlapping with a preceding store that cannot be" \
" forwarded.") \
@@ -59,6 +91,12 @@
_ (0x0D, 0x01, 0, 0, 0, 0x00, INT_MISC, RECOVERY_CYCLES, \
"Core cycles the allocator was stalled due to recovery from earlier " \
"clear event for this thread (e.g. misprediction or memory nuke)") \
+ _ (0x0D, 0x10, 0, 0, 0, 0x00, INT_MISC, UOP_DROPPING, \
+ "Estimated number of Top-down Microarchitecture Analysis slots that got" \
+ " due to non front-end reasons") \
+ _ (0x0D, 0x80, 0, 0, 0, 0x00, INT_MISC, CLEAR_RESTEER_CYCLES, \
+ "Counts cycles after recovery from a branch misprediction or machine" \
+ "clear till the first uop is issued from the resteered path.") \
_ (0x0E, 0x01, 0, 0, 0, 0x00, UOPS_ISSUED, ANY, \
"Uops that Resource Allocation Table (RAT) issues to Reservation " \
"Station (RS)") \
@@ -93,12 +131,80 @@
_ (0x51, 0x01, 0, 0, 0, 0x00, L1D, REPLACEMENT, \
"L1D data line replacements") \
_ (0x51, 0x04, 0, 0, 0, 0x00, L1D, M_EVICT, "L1D data line evictions") \
+ _ (0x79, 0x04, 0, 0, 0, 0x00, IDQ, MITE_UOPS, \
+ "Counts the number of uops delivered to Instruction Decode Queue (IDQ) " \
+ "from the MITE path.") \
+ _ (0x79, 0x08, 0, 0, 0, 0x00, IDQ, DSB_UOPS, \
+ "Counts the number of uops delivered to Instruction Decode Queue (IDQ) " \
+ "from the Decode Stream Buffer (DSB) path.") \
+ _ (0x79, 0x30, 0, 0, 0, 0x00, IDQ, MS_UOPS, \
+ "Counts the number of uops delivered to Instruction Decode Queue (IDQ) " \
+ "from the Microcode Sequencer (MS) path.") \
+ _ (0x79, 0x30, 1, 0, 0, 0x01, IDQ, MS_SWITCHES, \
+ "Number of switches from DSB or MITE to the MS") \
+ _ ( \
+ 0x80, 0x04, 0, 0, 0, 0x00, ICACHE_16B, IFDATA_STALL, \
+ "Cycles where a code fetch is stalled due to L1 instruction cache miss.") \
+ _ (0x83, 0x04, 0, 0, 0, 0x00, ICACHE_64B, IFTAG_STALL, \
+ "Cycles where a code fetch is stalled due to L1 instruction cache tag " \
+ "miss.") \
_ (0x83, 0x02, 0, 0, 0, 0x00, ICACHE_64B, IFTAG_MISS, \
"Instruction fetch tag lookups that miss in the instruction cache " \
"(L1I). Counts at 64-byte cache-line granularity.") \
- _ (0x9C, 0x01, 0, 0, 0, 0x00, IDQ_UOPS_NOT_DELIVERED, CORE, \
+ _ (0x9C, 0x01, 0, 0, 0, 0x05, IDQ_UOPS_NOT_DELIVERED, CORE, \
"Uops not delivered to Resource Allocation Table (RAT) per thread when " \
"backend of the machine is not stalled") \
+ _ (0x9C, 0x01, 0, 0, 1, 0x01, IDQ_UOPS_NOT_DELIVERED, CYCLES_FE_WAS_OK, \
+ "Cycles with 4 uops delivered by the front end or Resource Allocation " \
+ "Table (RAT) was stalling FE.x") \
+ _ (0x9C, 0x01, 0, 0, 0, 0x01, IDQ_UOPS_NOT_DELIVERED_CYCLES_3_UOP_DELIV, \
+ CORE, "Cycles with 3 uops delivered by the front end.") \
+ _ (0x9C, 0x01, 0, 0, 0, 0x02, IDQ_UOPS_NOT_DELIVERED_CYCLES_2_UOP_DELIV, \
+ CORE, "Cycles with 2 uops delivered by the front end.") \
+ _ (0x9C, 0x01, 0, 0, 0, 0x03, IDQ_UOPS_NOT_DELIVERED_CYCLES_1_UOP_DELIV, \
+ CORE, "Cycles with 1 uops delivered by the front end.") \
+ _ (0x9C, 0x01, 0, 0, 0, 0x04, IDQ_UOPS_NOT_DELIVERED_CYCLES_0_UOP_DELIV, \
+ CORE, "Cycles with 0 uops delivered by the front end.") \
+ _ (0xA1, 0x01, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_0, \
+ "Number of uops executed on port 0") \
+ _ (0xA1, 0x02, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_1, \
+ "Number of uops executed on port 1") \
+ _ (0xA1, 0x04, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_2_3, \
+ "Number of uops executed on port 2 and 3") \
+ _ (0xA1, 0x10, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_4_9, \
+ "Number of uops executed on port 4 and 9") \
+ _ (0xA1, 0x20, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_5, \
+ "Number of uops executed on port 5") \
+ _ (0xA1, 0x40, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_6, \
+ "Number of uops executed on port 6") \
+ _ (0xA1, 0x80, 0, 0, 0, 0x00, UOPS_DISPATCHED, PORT_7_8, \
+ "Number of uops executed on port 7 and 8") \
+ _ (0xA2, 0x08, 0, 0, 0, 0x00, RESOURCE_STALLS, SB, \
+ "Counts allocation stall cycles caused by the store buffer (SB) being " \
+ "full. This counts cycles that the pipeline back-end blocked uop " \
+ "delivery" \
+ "from the front-end.") \
+ _ (0xA3, 0x04, 0, 0, 0, 0x04, CYCLE_ACTIVITY, STALLS_TOTAL, \
+ "Total execution stalls.") \
+ _ (0xA3, 0x05, 0, 0, 0, 0x05, CYCLE_ACTIVITY, STALLS_L2_MISS, \
+ "Execution stalls while L2 cache miss demand load is outstanding") \
+ _ (0xA3, 0x06, 0, 0, 0, 0x06, CYCLE_ACTIVITY, STALLS_L3_MISS, \
+ "Execution stalls while L3 cache miss demand load is outstanding") \
+ _ (0xA3, 0x0C, 0, 0, 0, 0x0C, CYCLE_ACTIVITY, STALLS_L1D_MISS, \
+ "Execution stalls while L1 cache miss demand load is outstanding") \
+ _ (0xA3, 0x14, 0, 0, 0, 0x14, CYCLE_ACTIVITY, STALLS_MEM_ANY, \
+ "Execution stalls while memory subsystem has an outstanding load.") \
+ _ (0xA6, 0x40, 0, 0, 0, 0x02, EXE_ACTIVITY, BOUND_ON_STORES, \
+ "Cycles where the Store Buffer was full and no loads caused an " \
+ "execution stall.") \
+ _ (0xA8, 0x01, 0, 0, 0, 0x00, LSD, UOPS, \
+ "Counts the number of uops delivered to the back-end by the LSD" \
+ "(Loop Stream Detector)") \
+ _ (0xAB, 0x02, 0, 0, 0, 0x00, DSB2MITE_SWITCHES, PENALTY_CYCLES, \
+ "This event counts fetch penalty cycles when a transition occurs from" \
+ "DSB to MITE.") \
+ _ (0xB1, 0x01, 0, 0, 0, 0x00, UOPS_EXECUTED, THREAD, \
+ "Counts the number of uops to be executed per-thread each cycle.") \
_ (0xC0, 0x00, 0, 0, 0, 0x00, INST_RETIRED, ANY_P, \
"Number of instructions retired. General Counter - architectural event") \
_ (0xC2, 0x02, 0, 0, 0, 0x00, UOPS_RETIRED, RETIRE_SLOTS, \
@@ -109,8 +215,6 @@
"All mispredicted macro branch instructions retired.") \
_ (0xC4, 0x20, 0, 0, 0, 0x00, BR_INST_RETIRED, NEAR_TAKEN, \
"Taken branch instructions retired.") \
- _ (0xD0, 0x81, 0, 0, 0, 0x00, MEM_INST_RETIRED, ALL_LOADS, \
- "All retired load instructions.") \
_ (0xD0, 0x82, 0, 0, 0, 0x00, MEM_INST_RETIRED, ALL_STORES, \
"All retired store instructions.") \
_ (0xD1, 0x01, 0, 0, 0, 0x00, MEM_LOAD_RETIRED, L1_HIT, \
@@ -152,10 +256,20 @@
_ (0xD3, 0x08, 0, 0, 0, 0x00, MEM_LOAD_L3_MISS_RETIRED, REMOTE_FWD, \
"Retired load instructions whose data sources was forwarded from a " \
"remote cache") \
+ _ (0xE6, 0x01, 0, 0, 0, 0x00, BACLEARS, ANY, \
+ "Counts the total number when the front end is resteered, mainly when " \
+ "the BPU cannot provide a correct prediction and this is corrected by " \
+ "other branch handling mechanisms at the front end.") \
+ _ (0xEC, 0x02, 0, 0, 0, 0x00, CPU_CLK_UNHALTED, DISTRIBUTED, \
+ "Cycle counts are evenly distributed between active threads in the " \
+ " Core") \
_ (0xF0, 0x40, 0, 0, 0, 0x00, L2_TRANS, L2_WB, \
"L2 writebacks that access L2 cache") \
_ (0xF1, 0x1F, 0, 0, 0, 0x00, L2_LINES_IN, ALL, \
"L2 cache lines filling L2") \
+ _ (0xF4, 0x04, 0, 0, 0, 0x00, SQ_MISC, SQ_FULL, \
+ "Counts the cycles for which the thread is active and the superQ cannot" \
+ "take any more entries.") \
_ (0xFE, 0x02, 0, 0, 0, 0x00, IDI_MISC, WB_UPGRADE, \
"Counts number of cache lines that are allocated and written back to L3" \
" with the intention that they are more likely to be reused shortly") \
@@ -167,9 +281,10 @@ typedef enum
{
#define _(event, umask, edge, any, inv, cmask, name, suffix, desc) \
INTEL_CORE_E_##name##_##suffix,
- foreach_perf_intel_core_event
+ foreach_perf_intel_core_event foreach_perf_intel_peusdo_event
+ foreach_perf_intel_tremont_event
#undef _
- INTEL_CORE_N_EVENTS,
+ INTEL_CORE_N_EVENTS,
} perf_intel_core_event_t;
#endif
diff --git a/src/plugins/perfmon/intel/dispatch_wrapper.c b/src/plugins/perfmon/intel/dispatch_wrapper.c
new file mode 100644
index 00000000000..d424b54b85f
--- /dev/null
+++ b/src/plugins/perfmon/intel/dispatch_wrapper.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "vppinfra/string.h"
+#include <vnet/vnet.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+#include <linux/limits.h>
+#include <sys/ioctl.h>
+
+#include <perfmon/perfmon.h>
+
+vlib_node_function_t *perfmon_dispatch_wrappers[PERF_MAX_EVENTS + 1];
+
+static_always_inline void
+perfmon_read_pmcs (u64 *counters, u32 *indexes, u8 n_counters)
+{
+ for (int i = 0; i < n_counters; i++)
+ counters[i] = _rdpmc (indexes[i] - 1);
+}
+
+static_always_inline uword
+perfmon_dispatch_wrapper_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, u8 n_events)
+{
+ perfmon_main_t *pm = &perfmon_main;
+ perfmon_thread_runtime_t *rt =
+ vec_elt_at_index (pm->thread_runtimes, vm->thread_index);
+ perfmon_node_stats_t *s =
+ vec_elt_at_index (rt->node_stats, node->node_index);
+
+ struct
+ {
+ u64 t[2][PERF_MAX_EVENTS];
+ } samples;
+ uword rv;
+
+ clib_prefetch_load (s);
+
+ perfmon_read_pmcs (&samples.t[0][0], &rt->indexes[0], n_events);
+ rv = node->function (vm, node, frame);
+ perfmon_read_pmcs (&samples.t[1][0], &rt->indexes[0], n_events);
+
+ if (rv == 0)
+ return rv;
+
+ s->n_calls += 1;
+ s->n_packets += rv;
+
+ for (int i = 0; i < n_events; i++)
+ {
+ if (!(rt->preserve_samples & 1 << i))
+ {
+ s->value[i] += samples.t[1][i] - samples.t[0][i];
+ }
+ else
+ {
+ s->t[0].value[i] = samples.t[0][i];
+ s->t[1].value[i] = samples.t[1][i];
+ }
+ }
+
+ return rv;
+}
+
+static_always_inline u32
+perfmon_mmap_read_index (const struct perf_event_mmap_page *mmap_page)
+{
+ u32 idx;
+ u32 seq;
+
+ /* See documentation in /usr/include/linux/perf_event.h, for more details
+ * but the 2 main important things are:
+ * 1) if seq != mmap_page->lock, it means the kernel is currently updating
+ * the user page and we need to read it again
+ * 2) if idx == 0, it means the perf event is currently turned off and we
+ * just need to read the kernel-updated 'offset', otherwise we must also
+ * add the current hw value (hence rdmpc) */
+ do
+ {
+ seq = mmap_page->lock;
+ CLIB_COMPILER_BARRIER ();
+
+ idx = mmap_page->index;
+
+ CLIB_COMPILER_BARRIER ();
+ }
+ while (mmap_page->lock != seq);
+
+ return idx;
+}
+
+static_always_inline clib_error_t *
+read_mmap_indexes (perfmon_bundle_t *b)
+{
+ perfmon_main_t *pm = &perfmon_main;
+ for (int i = 0; i < vec_len (pm->thread_runtimes); i++)
+ {
+ perfmon_thread_runtime_t *tr;
+ tr = vec_elt_at_index (pm->thread_runtimes, i);
+
+ for (int j = 0; j < b->n_events; j++)
+ {
+ tr->indexes[j] = perfmon_mmap_read_index (tr->mmap_pages[j]);
+
+ /* if a zero index is returned generate error */
+ if (!tr->indexes[j])
+ {
+ return clib_error_return (0, "invalid rdpmc index");
+ }
+ }
+ }
+ return 0;
+}
+
+clib_error_t *
+intel_config_dispatch_wrapper (perfmon_bundle_t *b,
+ vlib_node_function_t **dispatch_wrapper)
+{
+ clib_error_t *err = 0;
+ if ((err = read_mmap_indexes (b)) != 0)
+ return err;
+
+ (*dispatch_wrapper) = perfmon_dispatch_wrappers[b->n_events];
+ return 0;
+}
+
+#define foreach_n_events \
+ _ (1) _ (2) _ (3) _ (4) _ (5) _ (6) _ (7) _ (8) _ (9) _ (10) _ (11) _ (12)
+
+#define _(x) \
+ static uword perfmon_dispatch_wrapper##x ( \
+ vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) \
+ { \
+ return perfmon_dispatch_wrapper_inline (vm, node, frame, x); \
+ }
+
+foreach_n_events
+#undef _
+
+ vlib_node_function_t *perfmon_dispatch_wrappers[PERF_MAX_EVENTS + 1] = {
+#define _(x) [x] = &perfmon_dispatch_wrapper##x,
+ foreach_n_events
+#undef _
+ };
diff --git a/src/plugins/perfmon/intel/dispatch_wrapper.h b/src/plugins/perfmon/intel/dispatch_wrapper.h
new file mode 100644
index 00000000000..bcf4885d54d
--- /dev/null
+++ b/src/plugins/perfmon/intel/dispatch_wrapper.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (c) 2022 Arm and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+clib_error_t *
+intel_config_dispatch_wrapper (perfmon_bundle_t *b,
+ vlib_node_function_t **dispatch_wrapper);
diff --git a/src/plugins/perfmon/intel/uncore.c b/src/plugins/perfmon/intel/uncore.c
index e8939cb67c9..316ebb13571 100644
--- a/src/plugins/perfmon/intel/uncore.c
+++ b/src/plugins/perfmon/intel/uncore.c
@@ -15,6 +15,8 @@
#include <vnet/vnet.h>
#include <vppinfra/linux/sysfs.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/unix.h>
#include <perfmon/perfmon.h>
#include <perfmon/intel/core.h>
#include <perfmon/intel/uncore.h>
@@ -35,14 +37,21 @@ VLIB_REGISTER_LOG_CLASS (if_intel_uncore_log, static) = {
((event) | (umask) << 8 | (edge) << 18 | (any) << 21 | (inv) << 23 | \
(cmask) << 24)
+static intel_uncore_unit_type_names_t uncore_unit_names[] = {
+ { INTEL_UNCORE_UNIT_IIO,
+ PERFMON_STRINGS ("PCIe0", "PCIe1", "MCP", "PCIe2", "PCIe3", "CBDMA/DMI") }
+};
+
static perfmon_event_t intel_uncore_events[] = {
-#define _(unit, event, umask, n, suffix, desc) \
+#define _(unit, event, umask, ch_mask, fc_mask, n, suffix, desc) \
[INTEL_UNCORE_E_##unit##_##n##_##suffix] = { \
- .config = (event) | (umask) << 8, \
+ .config = \
+ (event) | (umask) << 8 | (u64) (ch_mask) << 36 | (u64) (fc_mask) << 48, \
.name = #n "." #suffix, \
.description = desc, \
.type_from_instance = 1, \
.instance_type = INTEL_UNCORE_UNIT_##unit, \
+ .implemented = 1, \
},
foreach_intel_uncore_event
@@ -57,6 +66,32 @@ intel_uncore_instance_name_cmp (void *v1, void *v2)
return strcmp (i1->name, i2->name);
}
+static u8 *
+format_instance_name (intel_uncore_unit_type_t u, char *unit_fmt, u8 socket_id,
+ u8 ubox)
+{
+ u8 *s = 0;
+
+ /* uncore ubox may have specific names */
+ for (u8 i = 0; i < ARRAY_LEN (uncore_unit_names); i++)
+ {
+ intel_uncore_unit_type_names_t *n = &uncore_unit_names[i];
+
+ if (n->unit_type == u)
+ {
+ u8 *fmt = 0;
+
+ fmt = format (0, "%s (%s)%c", unit_fmt, (n->unit_names[ubox]), 0);
+ s = format (0, (char *) fmt, socket_id, ubox);
+ vec_free (fmt);
+
+ return s;
+ }
+ }
+
+ return format (0, unit_fmt, socket_id, ubox);
+}
+
static void
intel_uncore_add_unit (perfmon_source_t *src, intel_uncore_unit_type_t u,
char *name, char *type_str, char *fmt,
@@ -94,7 +129,8 @@ intel_uncore_add_unit (perfmon_source_t *src, intel_uncore_unit_type_t u,
in->type = perf_type;
in->cpu = j;
in->pid = -1;
- in->name = (char *) format (0, fmt, socket_by_cpu_id[j], i);
+ in->name =
+ (char *) format_instance_name (u, fmt, socket_by_cpu_id[j], i);
vec_terminate_c_string (in->name);
log_debug ("found %s %s", type_str, in->name);
}
@@ -114,12 +150,9 @@ intel_uncore_init (vlib_main_t *vm, perfmon_source_t *src)
u32 i, j;
u8 *s = 0;
- if ((err = clib_sysfs_read ("/sys/devices/system/node/has_cpu", "%U",
- unformat_bitmap_list, &node_bitmap)))
- {
- clib_error_free (err);
- return clib_error_return (0, "failed to discover numa topology");
- }
+ node_bitmap = os_get_online_cpu_node_bitmap ();
+ if (!node_bitmap)
+ return clib_error_return (0, "failed to discover numa topology");
clib_bitmap_foreach (i, node_bitmap)
{
@@ -132,6 +165,14 @@ intel_uncore_init (vlib_main_t *vm, perfmon_source_t *src)
goto done;
}
+ if (!cpumask)
+ {
+ clib_error_free (err);
+ err = clib_error_return (
+ 0, "while discovering numa topology: cpumask unexpectedly NULL");
+ goto done;
+ }
+
clib_bitmap_foreach (j, cpumask)
{
vec_validate_init_empty (numa_by_cpu_id, j, -1);
@@ -179,4 +220,5 @@ PERFMON_REGISTER_SOURCE (intel_uncore) = {
.n_events = INTEL_UNCORE_N_EVENTS,
.init_fn = intel_uncore_init,
.format_config = format_intel_core_config,
+ .bundle_support = intel_bundle_supported,
};
diff --git a/src/plugins/perfmon/intel/uncore.h b/src/plugins/perfmon/intel/uncore.h
index 03227d6069c..4afbffce858 100644
--- a/src/plugins/perfmon/intel/uncore.h
+++ b/src/plugins/perfmon/intel/uncore.h
@@ -18,7 +18,8 @@
#define foreach_intel_uncore_unit_type \
_ (IMC, "imc", "integrated Memory Controller (iMC)", "iMC%u/%u") \
- _ (UPI, "upi", "Ultra Path Interconnect (UPI)", "UPI%u/%u")
+ _ (UPI, "upi", "Ultra Path Interconnect (UPI)", "UPI%u/%u") \
+ _ (IIO, "iio", "Internal IO (IIO)", "IIO%u/%u")
typedef enum
{
@@ -28,21 +29,60 @@ typedef enum
INTEL_UNCORE_N_UNITS,
} intel_uncore_unit_type_t;
+typedef struct
+{
+ intel_uncore_unit_type_t unit_type;
+ char **unit_names;
+} intel_uncore_unit_type_names_t;
+
#define PERF_INTEL_CODE(event, umask, edge, any, inv, cmask) \
((event) | (umask) << 8 | (edge) << 18 | (any) << 21 | (inv) << 23 | \
(cmask) << 24)
-/* Type, EventCode, UMask, name, suffix, description */
+/* Type, EventCode, UMask, ch_mask, fc_mask, name, suffix, description */
#define foreach_intel_uncore_event \
- _ (IMC, 0x04, 0x03, UNC_M_CAS_COUNT, RD, \
+ _ (IMC, 0x04, 0x03, 0, 0, UNC_M_CAS_COUNT, RD, \
"All DRAM Read CAS Commands issued (including underfills)") \
- _ (IMC, 0x04, 0x0c, UNC_M_CAS_COUNT, WR, \
+ _ (IMC, 0x04, 0x0c, 0, 0, UNC_M_CAS_COUNT, WR, \
"All DRAM Write CAS commands issued") \
- _ (IMC, 0x04, 0x0f, UNC_M_CAS_COUNT, ALL, "All DRAM CAS commands issued")
+ _ (IMC, 0x04, 0x0f, 0, 0, UNC_M_CAS_COUNT, ALL, \
+ "All DRAM CAS commands issued") \
+ _ (IIO, 0x83, 0x01, 0x1, 0x7, UNC_IIO_DATA_REQ_OF_CPU_PART0, WR, \
+ "Four byte data request of the CPU : Card writing to DRAM") \
+ _ (IIO, 0x83, 0x01, 0x2, 0x7, UNC_IIO_DATA_REQ_OF_CPU_PART1, WR, \
+ "Four byte data request of the CPU : Card writing to DRAM") \
+ _ (IIO, 0x83, 0x01, 0x4, 0x7, UNC_IIO_DATA_REQ_OF_CPU_PART2, WR, \
+ "Four byte data request of the CPU : Card writing to DRAM") \
+ _ (IIO, 0x83, 0x01, 0x8, 0x7, UNC_IIO_DATA_REQ_OF_CPU_PART3, WR, \
+ "Four byte data request of the CPU : Card writing to DRAM") \
+ _ (IIO, 0x83, 0x04, 0x1, 0x7, UNC_IIO_DATA_REQ_OF_CPU_PART0, RD, \
+ "Four byte data request of the CPU : Card reading from DRAM") \
+ _ (IIO, 0x83, 0x04, 0x2, 0x7, UNC_IIO_DATA_REQ_OF_CPU_PART1, RD, \
+ "Four byte data request of the CPU : Card reading from DRAM") \
+ _ (IIO, 0x83, 0x04, 0x4, 0x7, UNC_IIO_DATA_REQ_OF_CPU_PART2, RD, \
+ "Four byte data request of the CPU : Card reading from DRAM") \
+ _ (IIO, 0x83, 0x04, 0x8, 0x7, UNC_IIO_DATA_REQ_OF_CPU_PART3, RD, \
+ "Four byte data request of the CPU : Card reading from DRAM") \
+ _ (IIO, 0xC0, 0x01, 0x1, 0x7, UNC_IIO_DATA_REQ_BY_CPU_PART0, WR, \
+ "Data requested by the CPU : Core writing to Card's MMIO space") \
+ _ (IIO, 0xC0, 0x01, 0x2, 0x7, UNC_IIO_DATA_REQ_BY_CPU_PART1, WR, \
+ "Data requested by the CPU : Core writing to Card's MMIO space") \
+ _ (IIO, 0xC0, 0x01, 0x4, 0x7, UNC_IIO_DATA_REQ_BY_CPU_PART2, WR, \
+ "Data requested by the CPU : Core writing to Card's MMIO space") \
+ _ (IIO, 0xC0, 0x01, 0x8, 0x7, UNC_IIO_DATA_REQ_BY_CPU_PART3, WR, \
+ "Data requested by the CPU : Core writing to Card's MMIO space") \
+ _ (IIO, 0x83, 0x80, 0x1, 0x7, UNC_IIO_DATA_REQ_BY_CPU_PART0, RD, \
+ "Data requested by the CPU : Core reading from Card's MMIO space") \
+ _ (IIO, 0x83, 0x80, 0x2, 0x7, UNC_IIO_DATA_REQ_BY_CPU_PART1, RD, \
+ "Data requested by the CPU : Core reading from Card's MMIO space") \
+ _ (IIO, 0x83, 0x80, 0x4, 0x7, UNC_IIO_DATA_REQ_BY_CPU_PART2, RD, \
+ "Data requested by the CPU : Core reading from Card's MMIO space") \
+ _ (IIO, 0x83, 0x80, 0x8, 0x7, UNC_IIO_DATA_REQ_BY_CPU_PART3, RD, \
+ "Data requested by the CPU : Core reading from Card's MMIO space")
typedef enum
{
-#define _(unit, event, umask, name, suffix, desc) \
+#define _(unit, event, umask, ch_mask, fc_mask, name, suffix, desc) \
INTEL_UNCORE_E_##unit##_##name##_##suffix,
foreach_intel_uncore_event
#undef _
diff --git a/src/plugins/perfmon/linux.c b/src/plugins/perfmon/linux.c
index 3715267266a..ef21f2d72fd 100644
--- a/src/plugins/perfmon/linux.c
+++ b/src/plugins/perfmon/linux.c
@@ -39,7 +39,12 @@ typedef enum
static perfmon_event_t events[] = {
#define _(n, s) \
- [n] = { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_##n, .name = s },
+ [n] = { \
+ .type = PERF_TYPE_SOFTWARE, \
+ .config = PERF_COUNT_SW_##n, \
+ .name = s, \
+ .implemented = 1, \
+ },
foreach_perf_sw_counter
#undef _
};
diff --git a/src/plugins/perfmon/perfmon.c b/src/plugins/perfmon/perfmon.c
index 46c8cf9ca04..e618f9b314a 100644
--- a/src/plugins/perfmon/perfmon.c
+++ b/src/plugins/perfmon/perfmon.c
@@ -70,7 +70,7 @@ perfmon_reset (vlib_main_t *vm)
vec_free (tr->node_stats);
for (int j = 0; j < PERF_MAX_EVENTS; j++)
if (tr->mmap_pages[j])
- munmap (tr->mmap_pages, page_size);
+ munmap (tr->mmap_pages[j], page_size);
}
vec_free (pm->thread_runtimes);
@@ -97,7 +97,7 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b)
s = b->src;
ASSERT (b->n_events);
- if (b->type == PERFMON_BUNDLE_TYPE_NODE)
+ if (b->active_type == PERFMON_BUNDLE_TYPE_NODE)
is_node = 1;
if (s->instances_by_type == 0)
@@ -141,15 +141,19 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b)
vec_validate (pm->group_fds, i);
pm->group_fds[i] = -1;
+ u8 n_events_opened = 0;
for (int j = 0; j < b->n_events; j++)
{
int fd;
perfmon_event_t *e = s->events + b->events[j];
+ if (!e->implemented)
+ continue;
struct perf_event_attr pe = {
.size = sizeof (struct perf_event_attr),
.type = e->type_from_instance ? in->type : e->type,
.config = e->config,
+ .config1 = e->config1,
.exclude_kernel = e->exclude_kernel,
.read_format =
(PERF_FORMAT_GROUP | PERF_FORMAT_TOTAL_TIME_ENABLED |
@@ -157,6 +161,7 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b)
.disabled = 1,
};
+ perf_event_open:
log_debug ("perf_event_open pe.type=%u pe.config=0x%x pid=%d "
"cpu=%d group_fd=%d",
pe.type, pe.config, in->pid, in->cpu, pm->group_fds[i]);
@@ -165,8 +170,17 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b)
if (fd == -1)
{
- err = clib_error_return_unix (0, "perf_event_open");
- goto error;
+ if (errno ==
+ EOPNOTSUPP) /* 64b counters not supported on aarch64 */
+ {
+ pe.config1 = 2; /* retry with 32b counter width */
+ goto perf_event_open;
+ }
+ else
+ {
+ err = clib_error_return_unix (0, "perf_event_open");
+ goto error;
+ }
}
vec_add1 (pm->fds_to_close, fd);
@@ -178,24 +192,26 @@ perfmon_set (vlib_main_t *vm, perfmon_bundle_t *b)
{
perfmon_thread_runtime_t *tr;
tr = vec_elt_at_index (pm->thread_runtimes, i);
- tr->mmap_pages[j] =
+ tr->mmap_pages[n_events_opened] =
mmap (0, page_size, PROT_READ, MAP_SHARED, fd, 0);
- if (tr->mmap_pages[j] == MAP_FAILED)
+ if (tr->mmap_pages[n_events_opened] == MAP_FAILED)
{
err = clib_error_return_unix (0, "mmap");
goto error;
}
}
+ n_events_opened++;
}
- if (is_node)
+ if (is_node && n_events_opened)
{
perfmon_thread_runtime_t *rt;
rt = vec_elt_at_index (pm->thread_runtimes, i);
rt->bundle = b;
- rt->n_events = b->n_events;
+ rt->n_events = n_events_opened;
rt->n_nodes = n_nodes;
+ rt->preserve_samples = b->preserve_samples;
vec_validate_aligned (rt->node_stats, n_nodes - 1,
CLIB_CACHE_LINE_BYTES);
}
@@ -236,22 +252,20 @@ perfmon_start (vlib_main_t *vm, perfmon_bundle_t *b)
return clib_error_return_unix (0, "ioctl(PERF_EVENT_IOC_ENABLE)");
}
}
- if (b->type == PERFMON_BUNDLE_TYPE_NODE)
+ if (b->active_type == PERFMON_BUNDLE_TYPE_NODE)
{
-
- vlib_node_function_t *funcs[PERFMON_OFFSET_TYPE_MAX];
-#define _(type, pfunc) funcs[type] = pfunc;
-
- foreach_permon_offset_type
-#undef _
-
- ASSERT (funcs[b->offset_type]);
+ vlib_node_function_t *dispatch_wrapper = NULL;
+ err = b->src->config_dispatch_wrapper (b, &dispatch_wrapper);
+ if (err || !dispatch_wrapper)
+ {
+ perfmon_reset (vm);
+ return err;
+ }
for (int i = 0; i < vlib_get_n_threads (); i++)
vlib_node_set_dispatch_wrapper (vlib_get_main_by_index (i),
- funcs[b->offset_type]);
+ dispatch_wrapper);
}
-
pm->sample_time = vlib_time_now (vm);
pm->is_running = 1;
@@ -267,7 +281,7 @@ perfmon_stop (vlib_main_t *vm)
if (pm->is_running != 1)
return clib_error_return (0, "not running");
- if (pm->active_bundle->type == PERFMON_BUNDLE_TYPE_NODE)
+ if (pm->active_bundle->active_type == PERFMON_BUNDLE_TYPE_NODE)
{
for (int i = 0; i < vlib_get_n_threads (); i++)
vlib_node_set_dispatch_wrapper (vlib_get_main_by_index (i), 0);
@@ -311,7 +325,7 @@ perfmon_init (vlib_main_t *vm)
}
hash_set_mem (pm->source_by_name, s->name, s);
- log_debug ("source '%s' regisrtered", s->name);
+ log_debug ("source '%s' registered", s->name);
s = s->next;
}
@@ -320,8 +334,6 @@ perfmon_init (vlib_main_t *vm)
{
clib_error_t *err;
uword *p;
- if (hash_get_mem (pm->bundle_by_name, b->name) != 0)
- clib_panic ("duplicate bundle name '%s'", b->name);
if ((p = hash_get_mem (pm->source_by_name, b->source)) == 0)
{
@@ -332,6 +344,13 @@ perfmon_init (vlib_main_t *vm)
}
b->src = (perfmon_source_t *) p[0];
+ if (b->src->bundle_support && !b->src->bundle_support (b))
+ {
+ log_debug ("skipping bundle '%s' - not supported", b->name);
+ b = b->next;
+ continue;
+ }
+
if (b->init_fn && ((err = (b->init_fn) (vm, b))))
{
log_warn ("skipping bundle '%s' - %U", b->name, format_clib_error,
@@ -341,8 +360,11 @@ perfmon_init (vlib_main_t *vm)
continue;
}
+ if (hash_get_mem (pm->bundle_by_name, b->name) != 0)
+ clib_panic ("duplicate bundle name '%s'", b->name);
+
hash_set_mem (pm->bundle_by_name, b->name, b);
- log_debug ("bundle '%s' regisrtered", b->name);
+ log_debug ("bundle '%s' registered", b->name);
b = b->next;
}
diff --git a/src/plugins/perfmon/perfmon.h b/src/plugins/perfmon/perfmon.h
index bba22cf6b1d..b76cf4b2138 100644
--- a/src/plugins/perfmon/perfmon.h
+++ b/src/plugins/perfmon/perfmon.h
@@ -23,7 +23,19 @@
#include <vppinfra/cpu.h>
#include <vlib/vlib.h>
-#define PERF_MAX_EVENTS 7 /* 3 fixed and 4 programmable */
+#if defined(__x86_64__)
+#define PERF_MAX_EVENTS 12 /* 4 fixed and 8 programable on ICX */
+#elif defined(__aarch64__)
+#define PERF_MAX_EVENTS 7 /* 6 events + 1 CPU cycle counter */
+#endif
+
+typedef enum
+{
+ PERFMON_EVENT_TYPE_GENERAL,
+ PERFMON_EVENT_TYPE_FIXED,
+ PERFMON_EVENT_TYPE_PSEUDO,
+ PERFMON_EVENT_TYPE_MAX,
+} perfmon_event_type_t;
typedef enum
{
@@ -31,19 +43,30 @@ typedef enum
PERFMON_BUNDLE_TYPE_NODE,
PERFMON_BUNDLE_TYPE_THREAD,
PERFMON_BUNDLE_TYPE_SYSTEM,
+ PERFMON_BUNDLE_TYPE_MAX,
+ PERFMON_BUNDLE_TYPE_NODE_OR_THREAD,
} perfmon_bundle_type_t;
+#define foreach_perfmon_bundle_type \
+ _ (PERFMON_BUNDLE_TYPE_UNKNOWN, "not supported") \
+ _ (PERFMON_BUNDLE_TYPE_NODE, "node") \
+ _ (PERFMON_BUNDLE_TYPE_THREAD, "thread") \
+ _ (PERFMON_BUNDLE_TYPE_SYSTEM, "system")
+
typedef enum
{
- PERFMON_OFFSET_TYPE_MMAP,
- PERFMON_OFFSET_TYPE_METRICS,
- PERFMON_OFFSET_TYPE_MAX,
-} perfmon_offset_type_t;
+#define _(e, str) e##_FLAG = 1 << e,
+ foreach_perfmon_bundle_type
+#undef _
+
+} perfmon_bundle_type_flag_t;
typedef struct
{
u32 type_from_instance : 1;
u32 exclude_kernel : 1;
+ u32 config1 : 2;
+ u32 implemented : 1;
union
{
u32 type;
@@ -69,15 +92,15 @@ typedef struct
} perfmon_instance_type_t;
struct perfmon_source;
-vlib_node_function_t perfmon_dispatch_wrapper_mmap;
-vlib_node_function_t perfmon_dispatch_wrapper_metrics;
-
-#define foreach_permon_offset_type \
- _ (PERFMON_OFFSET_TYPE_MMAP, perfmon_dispatch_wrapper_mmap) \
- _ (PERFMON_OFFSET_TYPE_METRICS, perfmon_dispatch_wrapper_metrics)
+typedef struct perfmon_bundle perfmon_bundle_t;
typedef clib_error_t *(perfmon_source_init_fn_t) (vlib_main_t *vm,
struct perfmon_source *);
+typedef perfmon_event_type_t (perfmon_source_get_event_type) (u32 event);
+typedef u8 (perfmon_source_bundle_support_t) (perfmon_bundle_t *);
+typedef clib_error_t *(perfmon_source_config_dispatch_wrapper_t) (
+ perfmon_bundle_t *b, vlib_node_function_t **dispatch_wrapper);
+
typedef struct perfmon_source
{
char *name;
@@ -87,31 +110,52 @@ typedef struct perfmon_source
u32 n_events;
perfmon_instance_type_t *instances_by_type;
format_function_t *format_config;
+ perfmon_source_get_event_type *get_event_type;
perfmon_source_init_fn_t *init_fn;
+ perfmon_source_bundle_support_t *bundle_support;
+ perfmon_source_config_dispatch_wrapper_t *config_dispatch_wrapper;
} perfmon_source_t;
-struct perfmon_bundle;
-
typedef clib_error_t *(perfmon_bundle_init_fn_t) (vlib_main_t *vm,
struct perfmon_bundle *);
+typedef struct
+{
+ clib_cpu_supports_func_t cpu_supports;
+ perfmon_bundle_type_t bundle_type;
+} perfmon_cpu_supports_t;
+
typedef struct perfmon_bundle
{
char *name;
char *description;
char *source;
char *footer;
- perfmon_bundle_type_t type;
- perfmon_offset_type_t offset_type;
+
+ union
+ {
+ perfmon_bundle_type_flag_t type_flags;
+ perfmon_bundle_type_t type;
+ };
+ perfmon_bundle_type_t active_type;
+
u32 events[PERF_MAX_EVENTS];
- u32 metrics[PERF_MAX_EVENTS];
u32 n_events;
+ u32 n_columns;
+
+ uword *event_disabled;
+ uword *column_disabled;
+ u8 *column_events;
+
+ u16 preserve_samples;
+
+ perfmon_cpu_supports_t *cpu_supports;
+ u32 n_cpu_supports;
perfmon_bundle_init_fn_t *init_fn;
char **column_headers;
format_function_t *format_fn;
- clib_cpu_supports_func_t cpu_supports;
/* do not set manually */
perfmon_source_t *src;
@@ -147,6 +191,8 @@ typedef struct
u16 n_nodes;
perfmon_node_stats_t *node_stats;
perfmon_bundle_t *bundle;
+ u32 indexes[PERF_MAX_EVENTS];
+ u16 preserve_samples;
struct perf_event_mmap_page *mmap_pages[PERF_MAX_EVENTS];
} perfmon_thread_runtime_t;
@@ -168,6 +214,41 @@ typedef struct
extern perfmon_main_t perfmon_main;
+#define PERFMON_BUNDLE_TYPE_TO_FLAGS(type) \
+ ({ \
+ uword rtype = 0; \
+ if (type == PERFMON_BUNDLE_TYPE_NODE_OR_THREAD) \
+ rtype = \
+ 1 << PERFMON_BUNDLE_TYPE_THREAD | 1 << PERFMON_BUNDLE_TYPE_NODE; \
+ else \
+ rtype = 1 << type; \
+ rtype; \
+ })
+
+always_inline uword
+perfmon_cpu_update_bundle_type (perfmon_bundle_t *b)
+{
+ perfmon_cpu_supports_t *supports = b->cpu_supports;
+ uword type = 0;
+
+ /* either supports or b->type should be set, but not both */
+ ASSERT (!!supports ^ !!b->type);
+
+ /* if nothing specific for this bundle, go with the defaults */
+ if (!supports)
+ type = PERFMON_BUNDLE_TYPE_TO_FLAGS (b->type);
+ else
+ {
+ /* more than one type may be supported by a given bundle */
+ for (int i = 0; i < b->n_cpu_supports; ++i)
+ if (supports[i].cpu_supports ())
+ type |= PERFMON_BUNDLE_TYPE_TO_FLAGS (supports[i].bundle_type);
+ }
+
+ return type;
+}
+#undef PERFMON_BUNDLE_TYPE_TO_FLAGS
+
#define PERFMON_REGISTER_SOURCE(x) \
perfmon_source_t __perfmon_source_##x; \
static void __clib_constructor __perfmon_source_registration_##x (void) \
@@ -184,6 +265,8 @@ extern perfmon_main_t perfmon_main;
{ \
perfmon_main_t *pm = &perfmon_main; \
__perfmon_bundle_##x.next = pm->bundles; \
+ __perfmon_bundle_##x.type_flags = \
+ perfmon_cpu_update_bundle_type (&__perfmon_bundle_##x); \
pm->bundles = &__perfmon_bundle_##x; \
} \
perfmon_bundle_t __perfmon_bundle_##x
@@ -195,4 +278,7 @@ clib_error_t *perfmon_stop (vlib_main_t *vm);
#define PERFMON_STRINGS(...) \
(char *[]) { __VA_ARGS__, 0 }
+#define PERFMON_COLUMN_EVENTS(...) \
+ (u8[]) { __VA_ARGS__ }
+
#endif
diff --git a/src/plugins/ping/CMakeLists.txt b/src/plugins/ping/CMakeLists.txt
index 2828f769fcc..d0040ff373a 100644
--- a/src/plugins/ping/CMakeLists.txt
+++ b/src/plugins/ping/CMakeLists.txt
@@ -14,4 +14,9 @@
add_vpp_plugin(ping
SOURCES
ping.c
+ ping.h
+ ping_api.c
+
+ API_FILES
+ ping.api
)
diff --git a/src/plugins/l2e/l2e.api b/src/plugins/ping/ping.api
index 586e2bae5ca..4cf043f5c31 100644
--- a/src/plugins/l2e/l2e.api
+++ b/src/plugins/ping/ping.api
@@ -1,6 +1,6 @@
/* Hey Emacs use -*- mode: C -*- */
/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2023 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -14,22 +14,29 @@
* limitations under the License.
*/
-option version = "1.0.0";
+option version = "0.1.0";
import "vnet/interface_types.api";
+import "vnet/ip/ip_types.api";
-/** \brief L2 emulation at L3
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param sw_if_index - interface the operation is applied to
- @param enable - Turn the service on or off
-*/
-autoreply define l2_emulation
+autoreply define want_ping_finished_events
{
- option status="in_progress";
u32 client_index;
u32 context;
- vl_api_interface_index_t sw_if_index;
- bool enable;
+ vl_api_address_t address;
+ u32 repeat [default=1];
+ f64 interval [default=1.0];
+};
+
+define ping_finished_event
+{
+ u32 client_index;
+ u32 request_count;
+ u32 reply_count;
+};
+
+service {
+ rpc want_ping_finished_events returns want_ping_finished_events_reply
+ events ping_finished_event;
};
/*
diff --git a/src/plugins/ping/ping.c b/src/plugins/ping/ping.c
index d09babd0be2..40e4495aaf2 100644
--- a/src/plugins/ping/ping.c
+++ b/src/plugins/ping/ping.c
@@ -19,8 +19,9 @@
#include <vlib/unix/unix.h>
#include <vnet/fib/ip6_fib.h>
#include <vnet/fib/ip4_fib.h>
-#include <vnet/fib/fib_sas.h>
+#include <vnet/ip/ip_sas.h>
#include <vnet/ip/ip6_link.h>
+#include <vnet/ip/ip6_ll_table.h>
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
@@ -98,70 +99,6 @@ format_ip46_ping_result (u8 * s, va_list * args)
*
*/
-
-static_always_inline uword
-get_cli_process_id_by_icmp_id_mt (vlib_main_t * vm, u16 icmp_id)
-{
- ping_main_t *pm = &ping_main;
- uword cli_process_id = PING_CLI_UNKNOWN_NODE;
- ping_run_t *pr;
-
- clib_spinlock_lock_if_init (&pm->ping_run_check_lock);
- vec_foreach (pr, pm->active_ping_runs)
- {
- if (pr->icmp_id == icmp_id)
- {
- cli_process_id = pr->cli_process_id;
- break;
- }
- }
- clib_spinlock_unlock_if_init (&pm->ping_run_check_lock);
- return cli_process_id;
-}
-
-
-static_always_inline void
-set_cli_process_id_by_icmp_id_mt (vlib_main_t * vm, u16 icmp_id,
- uword cli_process_id)
-{
- ping_main_t *pm = &ping_main;
- ping_run_t *pr;
-
- clib_spinlock_lock_if_init (&pm->ping_run_check_lock);
- vec_foreach (pr, pm->active_ping_runs)
- {
- if (pr->icmp_id == icmp_id)
- {
- pr->cli_process_id = cli_process_id;
- goto have_found_and_set;
- }
- }
- /* no such key yet - add a new one */
- ping_run_t new_pr = {.icmp_id = icmp_id,.cli_process_id = cli_process_id };
- vec_add1 (pm->active_ping_runs, new_pr);
-have_found_and_set:
- clib_spinlock_unlock_if_init (&pm->ping_run_check_lock);
-}
-
-
-static_always_inline void
-clear_cli_process_id_by_icmp_id_mt (vlib_main_t * vm, u16 icmp_id)
-{
- ping_main_t *pm = &ping_main;
- ping_run_t *pr;
-
- clib_spinlock_lock_if_init (&pm->ping_run_check_lock);
- vec_foreach (pr, pm->active_ping_runs)
- {
- if (pr->icmp_id == icmp_id)
- {
- vec_del1 (pm->active_ping_runs, pm->active_ping_runs - pr);
- break;
- }
- }
- clib_spinlock_unlock_if_init (&pm->ping_run_check_lock);
-}
-
static_always_inline int
ip46_get_icmp_id_and_seq (vlib_main_t * vm, vlib_buffer_t * b0,
u16 * out_icmp_id, u16 * out_icmp_seq, int is_ip6)
@@ -338,7 +275,6 @@ ip6_icmp_echo_reply_node_fn (vlib_main_t * vm,
1 /* is_ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_echo_reply_node, static) =
{
.function = ip6_icmp_echo_reply_node_fn,
@@ -364,7 +300,6 @@ VLIB_REGISTER_NODE (ip4_icmp_echo_reply_node, static) =
[ICMP46_ECHO_REPLY_NEXT_PUNT] = "ip4-punt",
},
};
-/* *INDENT-ON* */
static uword
ip4_icmp_echo_request (vlib_main_t * vm,
@@ -559,7 +494,6 @@ format_icmp_input_trace (u8 * s, va_list * va)
return s;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_icmp_echo_request_node,static) = {
.function = ip4_icmp_echo_request,
.name = "ip4-icmp-echo-request",
@@ -573,7 +507,200 @@ VLIB_REGISTER_NODE (ip4_icmp_echo_request_node,static) = {
[0] = "ip4-load-balance",
},
};
-/* *INDENT-ON* */
+
+typedef enum
+{
+ ICMP6_ECHO_REQUEST_NEXT_LOOKUP,
+ ICMP6_ECHO_REQUEST_NEXT_OUTPUT,
+ ICMP6_ECHO_REQUEST_N_NEXT,
+} icmp6_echo_request_next_t;
+
+static uword
+ip6_icmp_echo_request (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u32 *from, *to_next;
+ u32 n_left_from, n_left_to_next, next_index;
+ ip6_main_t *im = &ip6_main;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1,
+ sizeof (icmp6_input_trace_t));
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 2 && n_left_to_next > 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ ip6_header_t *ip0, *ip1;
+ icmp46_header_t *icmp0, *icmp1;
+ ip6_address_t tmp0, tmp1;
+ ip_csum_t sum0, sum1;
+ u32 bi0, bi1;
+ u32 fib_index0, fib_index1;
+ u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
+ u32 next1 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
+
+ bi0 = to_next[0] = from[0];
+ bi1 = to_next[1] = from[1];
+
+ from += 2;
+ n_left_from -= 2;
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ p1 = vlib_get_buffer (vm, bi1);
+ ip0 = vlib_buffer_get_current (p0);
+ ip1 = vlib_buffer_get_current (p1);
+ icmp0 = ip6_next_header (ip0);
+ icmp1 = ip6_next_header (ip1);
+
+ /* Check icmp type to echo reply and update icmp checksum. */
+ sum0 = icmp0->checksum;
+ sum1 = icmp1->checksum;
+
+ ASSERT (icmp0->type == ICMP6_echo_request);
+ ASSERT (icmp1->type == ICMP6_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply,
+ icmp46_header_t, type);
+ sum1 = ip_csum_update (sum1, ICMP6_echo_request, ICMP6_echo_reply,
+ icmp46_header_t, type);
+
+ icmp0->checksum = ip_csum_fold (sum0);
+ icmp1->checksum = ip_csum_fold (sum1);
+
+ icmp0->type = ICMP6_echo_reply;
+ icmp1->type = ICMP6_echo_reply;
+
+ /* Swap source and destination address. */
+ tmp0 = ip0->src_address;
+ tmp1 = ip1->src_address;
+
+ ip0->src_address = ip0->dst_address;
+ ip1->src_address = ip1->dst_address;
+
+ ip0->dst_address = tmp0;
+ ip1->dst_address = tmp1;
+
+ /* New hop count. */
+ ip0->hop_limit = im->host_config.ttl;
+ ip1->hop_limit = im->host_config.ttl;
+
+ if (ip6_address_is_link_local_unicast (&ip0->src_address) &&
+ !ip6_address_is_link_local_unicast (&ip0->dst_address))
+ {
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
+ }
+ if (ip6_address_is_link_local_unicast (&ip1->src_address) &&
+ !ip6_address_is_link_local_unicast (&ip1->dst_address))
+ {
+ fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p1)->sw_if_index[VLIB_RX]);
+ vnet_buffer (p1)->sw_if_index[VLIB_TX] = fib_index1;
+ }
+ p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ p1->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+
+ /* verify speculative enqueues, maybe switch current next frame */
+ /* if next0==next1==next_index then nothing special needs to be done
+ */
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0,
+ next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ ip6_header_t *ip0;
+ icmp46_header_t *icmp0;
+ u32 bi0;
+ ip6_address_t tmp0;
+ ip_csum_t sum0;
+ u32 fib_index0;
+ u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
+
+ bi0 = to_next[0] = from[0];
+
+ from += 1;
+ n_left_from -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, bi0);
+ ip0 = vlib_buffer_get_current (p0);
+ icmp0 = ip6_next_header (ip0);
+
+ /* Check icmp type to echo reply and update icmp checksum. */
+ sum0 = icmp0->checksum;
+
+ ASSERT (icmp0->type == ICMP6_echo_request);
+ sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply,
+ icmp46_header_t, type);
+
+ icmp0->checksum = ip_csum_fold (sum0);
+
+ icmp0->type = ICMP6_echo_reply;
+
+ /* Swap source and destination address. */
+ tmp0 = ip0->src_address;
+ ip0->src_address = ip0->dst_address;
+ ip0->dst_address = tmp0;
+
+ ip0->hop_limit = im->host_config.ttl;
+
+ if (ip6_address_is_link_local_unicast (&ip0->src_address) &&
+ !ip6_address_is_link_local_unicast (&ip0->dst_address))
+ {
+ /* if original packet was to the link local, then the
+ * fib index is that of the LL table, we can't use that
+ * to foward the response if the new destination
+ * is global, so reset to the fib index of the link.
+ * In other case, the fib index we need has been written
+ * to the buffer already. */
+ fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
+ vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+ vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
+ }
+ p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ /* Verify speculative enqueue, maybe switch current next frame */
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ vlib_error_count (vm, ip6_icmp_input_node.index,
+ ICMP6_ERROR_ECHO_REPLIES_SENT, frame->n_vectors);
+
+ return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (ip6_icmp_echo_request_node,static) = {
+ .function = ip6_icmp_echo_request,
+ .name = "ip6-icmp-echo-request",
+
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_icmp6_input_trace,
+
+ .n_next_nodes = ICMP6_ECHO_REQUEST_N_NEXT,
+ .next_nodes = {
+ [ICMP6_ECHO_REQUEST_NEXT_LOOKUP] = "ip6-lookup",
+ [ICMP6_ECHO_REQUEST_NEXT_OUTPUT] = "interface-output",
+ },
+};
/*
* A swarm of address-family agnostic helper functions
@@ -682,13 +809,16 @@ ip46_get_resolving_interface (u32 fib_index, ip46_address_t * pa46,
}
static u32
-ip46_fib_table_get_index_for_sw_if_index (u32 sw_if_index, int is_ip6)
+ip46_fib_table_get_index_for_sw_if_index (u32 sw_if_index, int is_ip6,
+ ip46_address_t *pa46)
{
- u32 fib_table_index = is_ip6 ?
- ip6_fib_table_get_index_for_sw_if_index (sw_if_index) :
- ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
- return fib_table_index;
-
+ if (is_ip6)
+ {
+ if (ip6_address_is_link_local_unicast (&pa46->ip6))
+ return ip6_ll_fib_get (sw_if_index);
+ return ip6_fib_table_get_index_for_sw_if_index (sw_if_index);
+ }
+ return ip4_fib_table_get_index_for_sw_if_index (sw_if_index);
}
@@ -735,13 +865,15 @@ ip46_set_src_address (u32 sw_if_index, vlib_buffer_t * b0, int is_ip6)
{
ip6_header_t *ip6 = vlib_buffer_get_current (b0);
- res = fib_sas6_get (sw_if_index, &ip6->dst_address, &ip6->src_address);
+ res = ip6_sas_by_sw_if_index (sw_if_index, &ip6->dst_address,
+ &ip6->src_address);
}
else
{
ip4_header_t *ip4 = vlib_buffer_get_current (b0);
- res = fib_sas4_get (sw_if_index, &ip4->dst_address, &ip4->src_address);
+ res = ip4_sas_by_sw_if_index (sw_if_index, &ip4->dst_address,
+ &ip4->src_address);
}
return res;
}
@@ -870,12 +1002,10 @@ at_most_a_frame (u32 count)
}
static int
-ip46_enqueue_packet (vlib_main_t * vm, vlib_buffer_t * b0, u32 burst,
- int is_ip6)
+ip46_enqueue_packet (vlib_main_t *vm, vlib_buffer_t *b0, u32 burst,
+ u32 lookup_node_index)
{
vlib_frame_t *f = 0;
- u32 lookup_node_index =
- is_ip6 ? ip6_lookup_node.index : ip4_lookup_node.index;
int n_sent = 0;
u16 n_to_send;
@@ -978,7 +1108,7 @@ send_ip46_ping (vlib_main_t * vm,
}
else
fib_index =
- ip46_fib_table_get_index_for_sw_if_index (sw_if_index, is_ip6);
+ ip46_fib_table_get_index_for_sw_if_index (sw_if_index, is_ip6, pa46);
if (~0 == fib_index)
ERROR_OUT (SEND_PING_NO_TABLE);
@@ -1002,7 +1132,23 @@ send_ip46_ping (vlib_main_t * vm,
ip46_fix_len_and_csum (vm, l4_header_offset, data_len, b0, is_ip6);
- int n_sent = ip46_enqueue_packet (vm, b0, burst, is_ip6);
+ u32 node_index = ip6_lookup_node.index;
+ if (is_ip6)
+ {
+ if (pa46->ip6.as_u32[0] == clib_host_to_net_u32 (0xff020000))
+ {
+ node_index = ip6_rewrite_mcast_node.index;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = sw_if_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index;
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX] =
+ ip6_link_get_mcast_adj (sw_if_index);
+ }
+ }
+ else
+ {
+ node_index = ip4_lookup_node.index;
+ }
+ int n_sent = ip46_enqueue_packet (vm, b0, burst, node_index);
if (n_sent < burst)
err = SEND_PING_NO_BUFFERS;
@@ -1015,9 +1161,8 @@ done:
return err;
}
-static send_ip46_ping_result_t
-send_ip6_ping (vlib_main_t * vm,
- u32 table_id, ip6_address_t * pa6,
+send_ip46_ping_result_t
+send_ip6_ping (vlib_main_t *vm, u32 table_id, ip6_address_t *pa6,
u32 sw_if_index, u16 seq_host, u16 id_host, u16 data_len,
u32 burst, u8 verbose)
{
@@ -1027,9 +1172,8 @@ send_ip6_ping (vlib_main_t * vm,
id_host, data_len, burst, verbose, 1 /* is_ip6 */ );
}
-static send_ip46_ping_result_t
-send_ip4_ping (vlib_main_t * vm,
- u32 table_id, ip4_address_t * pa4,
+send_ip46_ping_result_t
+send_ip4_ping (vlib_main_t *vm, u32 table_id, ip4_address_t *pa4,
u32 sw_if_index, u16 seq_host, u16 id_host, u16 data_len,
u32 burst, u8 verbose)
{
@@ -1432,7 +1576,6 @@ done:
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ping_command, static) =
{
.path = "ping",
@@ -1443,7 +1586,6 @@ VLIB_CLI_COMMAND (ping_command, static) =
" [burst <count:1>] [verbose]",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
ping_cli_init (vlib_main_t * vm)
@@ -1461,18 +1603,20 @@ ping_cli_init (vlib_main_t * vm)
ip4_icmp_register_type (vm, ICMP4_echo_request,
ip4_icmp_echo_request_node.index);
+ icmp6_register_type (vm, ICMP6_echo_request,
+ ip6_icmp_echo_request_node.index);
+
+ ping_plugin_api_hookup (vm);
return 0;
}
VLIB_INIT_FUNCTION (ping_cli_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Ping (ping)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/ping/ping.h b/src/plugins/ping/ping.h
index 7826945ea8b..fdccd07b57d 100644
--- a/src/plugins/ping/ping.h
+++ b/src/plugins/ping/ping.h
@@ -52,6 +52,9 @@ typedef struct ping_run_t
typedef struct ping_main_t
{
+ /* API message ID base */
+ u16 msg_id_base;
+
ip6_main_t *ip6_main;
ip4_main_t *ip4_main;
/* a vector of current ping runs. */
@@ -69,7 +72,6 @@ extern ping_main_t ping_main;
#define PING_CLI_UNKNOWN_NODE (~0)
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u16 id;
@@ -78,7 +80,6 @@ typedef CLIB_PACKED (struct {
u8 data[0];
}) icmp46_echo_request_t;
-/* *INDENT-ON* */
typedef enum
@@ -88,4 +89,74 @@ typedef enum
ICMP46_ECHO_REPLY_N_NEXT,
} icmp46_echo_reply_next_t;
+static_always_inline uword
+get_cli_process_id_by_icmp_id_mt (vlib_main_t *vm, u16 icmp_id)
+{
+ ping_main_t *pm = &ping_main;
+ uword cli_process_id = PING_CLI_UNKNOWN_NODE;
+ ping_run_t *pr;
+
+ clib_spinlock_lock_if_init (&pm->ping_run_check_lock);
+ vec_foreach (pr, pm->active_ping_runs)
+ {
+ if (pr->icmp_id == icmp_id)
+ {
+ cli_process_id = pr->cli_process_id;
+ break;
+ }
+ }
+ clib_spinlock_unlock_if_init (&pm->ping_run_check_lock);
+ return cli_process_id;
+}
+
+static_always_inline void
+set_cli_process_id_by_icmp_id_mt (vlib_main_t *vm, u16 icmp_id,
+ uword cli_process_id)
+{
+ ping_main_t *pm = &ping_main;
+ ping_run_t *pr;
+
+ clib_spinlock_lock_if_init (&pm->ping_run_check_lock);
+ vec_foreach (pr, pm->active_ping_runs)
+ {
+ if (pr->icmp_id == icmp_id)
+ {
+ pr->cli_process_id = cli_process_id;
+ goto have_found_and_set;
+ }
+ }
+ /* no such key yet - add a new one */
+ ping_run_t new_pr = { .icmp_id = icmp_id, .cli_process_id = cli_process_id };
+ vec_add1 (pm->active_ping_runs, new_pr);
+have_found_and_set:
+ clib_spinlock_unlock_if_init (&pm->ping_run_check_lock);
+}
+
+static_always_inline void
+clear_cli_process_id_by_icmp_id_mt (vlib_main_t *vm, u16 icmp_id)
+{
+ ping_main_t *pm = &ping_main;
+ ping_run_t *pr;
+
+ clib_spinlock_lock_if_init (&pm->ping_run_check_lock);
+ vec_foreach (pr, pm->active_ping_runs)
+ {
+ if (pr->icmp_id == icmp_id)
+ {
+ vec_del1 (pm->active_ping_runs, pr - pm->active_ping_runs);
+ break;
+ }
+ }
+ clib_spinlock_unlock_if_init (&pm->ping_run_check_lock);
+}
+clib_error_t *ping_plugin_api_hookup (vlib_main_t *vm);
+send_ip46_ping_result_t send_ip4_ping (vlib_main_t *vm, u32 table_id,
+ ip4_address_t *pa4, u32 sw_if_index,
+ u16 seq_host, u16 id_host, u16 data_len,
+ u32 burst, u8 verbose);
+send_ip46_ping_result_t send_ip6_ping (vlib_main_t *vm, u32 table_id,
+ ip6_address_t *pa6, u32 sw_if_index,
+ u16 seq_host, u16 id_host, u16 data_len,
+ u32 burst, u8 verbose);
+
#endif /* included_ping_ping_h */
diff --git a/src/plugins/ping/ping_api.c b/src/plugins/ping/ping_api.c
new file mode 100644
index 00000000000..5578fa560f2
--- /dev/null
+++ b/src/plugins/ping/ping_api.c
@@ -0,0 +1,155 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/pci/pci.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/format_fns.h>
+#include <vnet/ip/ip_types_api.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+#include <ping/ping.h>
+
+/* define message IDs */
+#include <ping/ping.api_enum.h>
+#include <ping/ping.api_types.h>
+
+#define REPLY_MSG_ID_BASE pm->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static void
+ping_api_send_ping_event (vl_api_want_ping_finished_events_t *mp,
+ u32 request_count, u32 reply_count)
+{
+ ping_main_t *pm = &ping_main;
+
+ vl_api_registration_t *rp;
+ rp = vl_api_client_index_to_registration (mp->client_index);
+
+ vl_api_ping_finished_event_t *e = vl_msg_api_alloc (sizeof (*e));
+ clib_memset (e, 0, sizeof (*e));
+
+ e->_vl_msg_id = htons (VL_API_PING_FINISHED_EVENT + pm->msg_id_base);
+ e->request_count = htonl (request_count);
+ e->reply_count = htonl (reply_count);
+
+ vl_api_send_msg (rp, (u8 *) e);
+}
+
+void
+vl_api_want_ping_finished_events_t_handler (
+ vl_api_want_ping_finished_events_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ ping_main_t *pm = &ping_main;
+ vl_api_want_ping_finished_events_reply_t *rmp;
+
+ uword curr_proc = vlib_current_process (vm);
+
+ u16 icmp_id;
+ static u32 rand_seed = 0;
+
+ if (PREDICT_FALSE (!rand_seed))
+ rand_seed = random_default_seed ();
+
+ icmp_id = random_u32 (&rand_seed) & 0xffff;
+
+ while (~0 != get_cli_process_id_by_icmp_id_mt (vm, icmp_id))
+ icmp_id++;
+
+ set_cli_process_id_by_icmp_id_mt (vm, icmp_id, curr_proc);
+
+ int rv = 0;
+ u32 request_count = 0;
+ u32 reply_count = 0;
+
+ u32 table_id = 0;
+ ip_address_t dst_addr = { 0 };
+ u32 sw_if_index = ~0;
+ f64 ping_interval = clib_net_to_host_f64 (mp->interval);
+ u32 ping_repeat = ntohl (mp->repeat);
+ u32 data_len = PING_DEFAULT_DATA_LEN;
+ u32 ping_burst = 1;
+ u32 verbose = 0;
+ ip_address_decode2 (&mp->address, &dst_addr);
+
+ vl_api_registration_t *rp;
+ rp = vl_api_client_index_to_registration (mp->client_index);
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id =
+ htons ((VL_API_WANT_PING_FINISHED_EVENTS_REPLY) + (REPLY_MSG_ID_BASE));
+ rmp->context = mp->context;
+ rmp->retval = ntohl (rv);
+ vl_api_send_msg (rp, (u8 *) rmp);
+
+ int i;
+ send_ip46_ping_result_t res = SEND_PING_OK;
+ for (i = 1; i <= ping_repeat; i++)
+ {
+ f64 sleep_interval;
+ f64 time_ping_sent = vlib_time_now (vm);
+
+ if (dst_addr.version == AF_IP4)
+ res = send_ip4_ping (vm, table_id, &dst_addr.ip.ip4, sw_if_index, i,
+ icmp_id, data_len, ping_burst, verbose);
+ else
+ res = send_ip6_ping (vm, table_id, &dst_addr.ip.ip6, sw_if_index, i,
+ icmp_id, data_len, ping_burst, verbose);
+
+ if (SEND_PING_OK == res)
+ request_count += 1;
+ else
+ continue;
+
+ while ((sleep_interval =
+ time_ping_sent + ping_interval - vlib_time_now (vm)) > 0.0)
+ {
+ uword event_type;
+ vlib_process_wait_for_event_or_clock (vm, sleep_interval);
+ event_type = vlib_process_get_events (vm, 0);
+
+ if (event_type == ~0)
+ break;
+
+ if (event_type == PING_RESPONSE_IP4 ||
+ event_type == PING_RESPONSE_IP6)
+ reply_count += 1;
+ }
+ }
+
+ ping_api_send_ping_event (mp, request_count, reply_count);
+
+ clear_cli_process_id_by_icmp_id_mt (vm, icmp_id);
+}
+
+/* set tup the API message handling tables */
+#include <ping/ping.api.c>
+
+clib_error_t *
+ping_plugin_api_hookup (vlib_main_t *vm)
+{
+ ping_main_t *pm = &ping_main;
+
+ /* ask for a correctly-sized block of API message decode slots */
+ pm->msg_id_base = setup_message_id_table ();
+
+ return 0;
+} \ No newline at end of file
diff --git a/src/plugins/pppoe/pppoe.c b/src/plugins/pppoe/pppoe.c
index 1589725eddd..0d5f9c1aeac 100644
--- a/src/plugins/pppoe/pppoe.c
+++ b/src/plugins/pppoe/pppoe.c
@@ -77,13 +77,11 @@ pppoe_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
return /* no error */ 0;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (pppoe_device_class,static) = {
.name = "PPPoE",
.format_device_name = format_pppoe_name,
.admin_up_down_function = pppoe_interface_admin_up_down,
};
-/* *INDENT-ON* */
static u8 *
format_pppoe_header_with_length (u8 * s, va_list * args)
@@ -256,7 +254,6 @@ pppoe_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
dpo_reset (&dpo);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (pppoe_hw_class) =
{
.name = "PPPoE",
@@ -265,7 +262,6 @@ VNET_HW_INTERFACE_CLASS (pppoe_hw_class) =
.update_adjacency = pppoe_update_adj,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
#define foreach_copy_field \
_(session_id) \
@@ -353,7 +349,7 @@ int vnet_pppoe_add_del_session
pool_get_aligned (pem->sessions, t, CLIB_CACHE_LINE_BYTES);
clib_memset (t, 0, sizeof (*t));
- clib_memcpy (t->local_mac, hi->hw_address, 6);
+ clib_memcpy (t->local_mac, hi->hw_address, vec_len (hi->hw_address));
/* copy from arg structure */
#define _(x) t->x = a->x;
@@ -374,7 +370,7 @@ int vnet_pppoe_add_del_session
vnet_interface_main_t *im = &vnm->interface_main;
hw_if_index = pem->free_pppoe_session_hw_if_indices
[vec_len (pem->free_pppoe_session_hw_if_indices) - 1];
- _vec_len (pem->free_pppoe_session_hw_if_indices) -= 1;
+ vec_dec_len (pem->free_pppoe_session_hw_if_indices, 1);
hi = vnet_get_hw_interface (vnm, hw_if_index);
hi->dev_instance = t - pem->sessions;
@@ -413,6 +409,8 @@ int vnet_pppoe_add_del_session
si->flags &= ~VNET_SW_INTERFACE_FLAG_HIDDEN;
vnet_sw_interface_set_flags (vnm, sw_if_index,
VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ vnet_set_interface_l3_output_node (vnm->vlib_main, sw_if_index,
+ (u8 *) "tunnel-output");
/* add reverse route for client ip */
fib_table_entry_path_add (a->decap_fib_index, &pfx,
@@ -431,6 +429,7 @@ int vnet_pppoe_add_del_session
t = pool_elt_at_index (pem->sessions, result.fields.session_index);
sw_if_index = t->sw_if_index;
+ vnet_reset_interface_l3_output_node (vnm->vlib_main, sw_if_index);
vnet_sw_interface_set_flags (vnm, t->sw_if_index, 0 /* down */ );
vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, t->sw_if_index);
si->flags |= VNET_SW_INTERFACE_FLAG_HIDDEN;
@@ -610,7 +609,6 @@ done:
* @cliexcmd{create pppoe session client-ip 10.0.3.1 session-id 13
* client-mac 00:01:02:03:04:05 del }
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_pppoe_session_command, static) = {
.path = "create pppoe session",
.short_help =
@@ -618,9 +616,7 @@ VLIB_CLI_COMMAND (create_pppoe_session_command, static) = {
" client-mac <client-mac> [decap-vrf-id <nn>] [del]",
.function = pppoe_add_del_session_command_fn,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
static clib_error_t *
show_pppoe_session_command_fn (vlib_main_t * vm,
unformat_input_t * input,
@@ -639,7 +635,6 @@ show_pppoe_session_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-ON* */
/*?
* Display all the PPPoE Session entries.
@@ -651,13 +646,11 @@ show_pppoe_session_command_fn (vlib_main_t * vm,
* local-mac a0:b0:c0:d0:e0:f0 client-mac 00:01:02:03:04:05
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_pppoe_session_command, static) = {
.path = "show pppoe session",
.short_help = "show pppoe session",
.function = show_pppoe_session_command_fn,
};
-/* *INDENT-ON* */
typedef struct pppoe_show_walk_ctx_t_
{
@@ -721,7 +714,7 @@ show_pppoe_fib_command_fn (vlib_main_t * vm,
}
/*?
- * This command dispays the MAC Address entries of the PPPoE FIB table.
+ * This command displays the MAC Address entries of the PPPoE FIB table.
* Output can be filtered to just get the number of MAC Addresses or display
* each MAC Address.
*
@@ -729,18 +722,16 @@ show_pppoe_fib_command_fn (vlib_main_t * vm,
* Example of how to display the number of MAC Address entries in the PPPoE
* FIB table:
* @cliexstart{show pppoe fib}
- * Mac Address session_id Interface sw_if_index session_index
- * 52:54:00:53:18:33 1 GigabitEthernet0/8/0 2 0
- * 52:54:00:53:18:55 2 GigabitEthernet0/8/1 3 1
+ * Mac Address session_id Interface sw_if_index session_index
+ * 52:54:00:53:18:33 1 GigabitEthernet0/8/0 2 0
+ * 52:54:00:53:18:55 2 GigabitEthernet0/8/1 3 1
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_pppoe_fib_command, static) = {
.path = "show pppoe fib",
.short_help = "show pppoe fib",
.function = show_pppoe_fib_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
pppoe_init (vlib_main_t * vm)
@@ -772,12 +763,10 @@ pppoe_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (pppoe_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "PPP over Ethernet (PPPoE)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/pppoe/pppoe.h b/src/plugins/pppoe/pppoe.h
index a72b7349328..444de42f4a5 100644
--- a/src/plugins/pppoe/pppoe.h
+++ b/src/plugins/pppoe/pppoe.h
@@ -106,7 +106,6 @@ extern char *pppoe_error_strings[];
#define PPPOE_NUM_BUCKETS (64 * 1024)
#define PPPOE_MEMORY_SIZE (8<<20)
-/* *INDENT-OFF* */
/*
* The PPPoE key is the mac address and session ID
*/
@@ -127,9 +126,7 @@ typedef struct
u64 raw;
};
} pppoe_entry_key_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
/*
* The PPPoE entry results
*/
@@ -147,7 +144,6 @@ typedef struct
u64 raw;
};
} pppoe_entry_result_t;
-/* *INDENT-ON* */
typedef struct
{
diff --git a/src/plugins/pppoe/pppoe_api.c b/src/plugins/pppoe/pppoe_api.c
index 6705fb6acea..c7099a3491f 100644
--- a/src/plugins/pppoe/pppoe_api.c
+++ b/src/plugins/pppoe/pppoe_api.c
@@ -64,12 +64,10 @@ static void vl_api_pppoe_add_del_session_t_handler
rv = vnet_pppoe_add_del_session (&a, &sw_if_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_PPPOE_ADD_DEL_SESSION_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void send_pppoe_session_details
@@ -120,12 +118,10 @@ vl_api_pppoe_session_dump_t_handler (vl_api_pppoe_session_dump_t * mp)
if (~0 == sw_if_index)
{
- /* *INDENT-OFF* */
pool_foreach (t, pem->sessions)
{
send_pppoe_session_details(t, reg, mp->context);
}
- /* *INDENT-ON* */
}
else
{
@@ -148,9 +144,7 @@ vl_api_pppoe_add_del_cp_t_handler (vl_api_pppoe_add_del_cp_t * mp)
rv = pppoe_add_del_cp (ntohl (mp->sw_if_index), mp->is_add);
- /* *INDENT-OFF* */
REPLY_MACRO(VL_API_PPPOE_ADD_DEL_CP_REPLY);
- /* *INDENT-ON* */
}
#include <pppoe/pppoe.api.c>
diff --git a/src/plugins/pppoe/pppoe_cp.c b/src/plugins/pppoe/pppoe_cp.c
index 6c6ba249fcc..82891d5b654 100644
--- a/src/plugins/pppoe/pppoe_cp.c
+++ b/src/plugins/pppoe/pppoe_cp.c
@@ -97,14 +97,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_pppoe_cp_cmd, static) =
{
.path = "create pppoe cp",
.short_help = "create pppoe cp-if-index <intfc> [del]",
.function = pppoe_add_del_cp_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/pppoe/pppoe_decap.c b/src/plugins/pppoe/pppoe_decap.c
index 71b9874081e..7c456a7a9cc 100644
--- a/src/plugins/pppoe/pppoe_decap.c
+++ b/src/plugins/pppoe/pppoe_decap.c
@@ -133,8 +133,10 @@ VLIB_NODE_FN (pppoe_input_node) (vlib_main_t * vm,
pppoe0 = (pppoe_header_t*)(vlan0+1);
if( type0 != ETHERNET_TYPE_PPPOE_DISCOVERY && type0 != ETHERNET_TYPE_PPPOE_SESSION ) {
error0 = PPPOE_ERROR_BAD_VER_TYPE;
- next0 = PPPOE_INPUT_NEXT_DROP;
- goto trace0;
+ result0.fields.session_index =
+ ~0; // avoid tracing random data
+ next0 = PPPOE_INPUT_NEXT_DROP;
+ goto trace0;
}
} else {
pppoe0 = (pppoe_header_t*)(h0+1);
@@ -152,6 +154,7 @@ VLIB_NODE_FN (pppoe_input_node) (vlib_main_t * vm,
vlib_buffer_advance(b0, sizeof(*h0)+sizeof(*vlan0));
error0 = PPPOE_ERROR_CONTROL_PLANE;
next0 = PPPOE_INPUT_NEXT_CP_INPUT;
+ result0.fields.session_index = ~0;
goto trace0;
}
@@ -228,8 +231,10 @@ VLIB_NODE_FN (pppoe_input_node) (vlib_main_t * vm,
pppoe1 = (pppoe_header_t*)(vlan1+1);
if( type1 != ETHERNET_TYPE_PPPOE_DISCOVERY && type1 != ETHERNET_TYPE_PPPOE_SESSION ) {
error1 = PPPOE_ERROR_BAD_VER_TYPE;
- next1 = PPPOE_INPUT_NEXT_DROP;
- goto trace1;
+ result1.fields.session_index =
+ ~0; // avoid tracing random data
+ next1 = PPPOE_INPUT_NEXT_DROP;
+ goto trace1;
}
} else {
pppoe1 = (pppoe_header_t*)(h1+1);
@@ -247,6 +252,7 @@ VLIB_NODE_FN (pppoe_input_node) (vlib_main_t * vm,
vlib_buffer_advance(b1, sizeof(*h1)+sizeof(*vlan1));
error1 = PPPOE_ERROR_CONTROL_PLANE;
next1 = PPPOE_INPUT_NEXT_CP_INPUT;
+ result1.fields.session_index = ~0;
goto trace1;
}
@@ -354,8 +360,10 @@ VLIB_NODE_FN (pppoe_input_node) (vlib_main_t * vm,
pppoe0 = (pppoe_header_t*)(vlan0+1);
if( type0 != ETHERNET_TYPE_PPPOE_DISCOVERY && type0 != ETHERNET_TYPE_PPPOE_SESSION ) {
error0 = PPPOE_ERROR_BAD_VER_TYPE;
- next0 = PPPOE_INPUT_NEXT_DROP;
- goto trace00;
+ result0.fields.session_index =
+ ~0; // avoid tracing random data
+ next0 = PPPOE_INPUT_NEXT_DROP;
+ goto trace00;
}
} else {
pppoe0 = (pppoe_header_t*)(h0+1);
@@ -372,6 +380,7 @@ VLIB_NODE_FN (pppoe_input_node) (vlib_main_t * vm,
vlib_buffer_advance(b0, sizeof(*h0)+sizeof(*vlan0));
error0 = PPPOE_ERROR_CONTROL_PLANE;
next0 = PPPOE_INPUT_NEXT_CP_INPUT;
+ result0.fields.session_index = ~0;
goto trace00;
}
@@ -485,11 +494,9 @@ VLIB_REGISTER_NODE (pppoe_input_node) = {
.format_trace = format_pppoe_rx_trace,
};
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (pppoe_input_node, static) =
{
.arc_name = "device-input",
.node_name = "pppoe-input",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
-/* *INDENT-ON */
diff --git a/src/plugins/prom/CMakeLists.txt b/src/plugins/prom/CMakeLists.txt
new file mode 100644
index 00000000000..6c1976c74f3
--- /dev/null
+++ b/src/plugins/prom/CMakeLists.txt
@@ -0,0 +1,21 @@
+# Copyright (c) 2022 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(prom
+ SOURCES
+ prom.c
+ prom_cli.c
+
+ LINK_LIBRARIES
+ vppapiclient
+)
diff --git a/src/plugins/prom/FEATURE.yaml b/src/plugins/prom/FEATURE.yaml
new file mode 100644
index 00000000000..65fefa7f177
--- /dev/null
+++ b/src/plugins/prom/FEATURE.yaml
@@ -0,0 +1,10 @@
+---
+name: Prom (Prometheus Exporter)
+maintainer: Florin Coras <fcoras@cisco.com>
+features:
+ - Stats scraper
+ - Prometheus exporter
+description: "HTTP static server url handler that scrapes stats and exports
+ them in Prometheus format"
+state: experimental
+properties: [MULTITHREAD]
diff --git a/src/plugins/prom/prom.c b/src/plugins/prom/prom.c
new file mode 100644
index 00000000000..934e8480d3c
--- /dev/null
+++ b/src/plugins/prom/prom.c
@@ -0,0 +1,436 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+#include <prom/prom.h>
+#include <vpp-api/client/stat_client.h>
+#include <vlib/stats/stats.h>
+#include <ctype.h>
+
+static prom_main_t prom_main;
+
+static u8 *
+make_stat_name (char *name)
+{
+ prom_main_t *pm = &prom_main;
+ char *p = name;
+
+ while (*p)
+ {
+ if (!isalnum (*p))
+ *p = '_';
+ p++;
+ }
+
+ /* Reuse vector, instead of always allocating, when building a name. */
+ vec_reset_length (pm->name_scratch_pad);
+ pm->name_scratch_pad =
+ format (pm->name_scratch_pad, "%v%s", pm->stat_name_prefix, name);
+ return pm->name_scratch_pad;
+}
+
+static u8 *
+dump_counter_vector_simple (stat_segment_data_t *res, u8 *s, u8 used_only)
+{
+ u8 need_header = 1;
+ int j, k;
+ u8 *name;
+
+ name = make_stat_name (res->name);
+
+ for (k = 0; k < vec_len (res->simple_counter_vec); k++)
+ for (j = 0; j < vec_len (res->simple_counter_vec[k]); j++)
+ {
+ if (used_only && !res->simple_counter_vec[k][j])
+ continue;
+ if (need_header)
+ {
+ s = format (s, "# TYPE %v counter\n", name);
+ need_header = 0;
+ }
+ s = format (s, "%v{thread=\"%d\",interface=\"%d\"} %lld\n", name, k, j,
+ res->simple_counter_vec[k][j]);
+ }
+
+ return s;
+}
+
+static u8 *
+dump_counter_vector_combined (stat_segment_data_t *res, u8 *s, u8 used_only)
+{
+ u8 need_header = 1;
+ int j, k;
+ u8 *name;
+
+ name = make_stat_name (res->name);
+
+ for (k = 0; k < vec_len (res->simple_counter_vec); k++)
+ for (j = 0; j < vec_len (res->combined_counter_vec[k]); j++)
+ {
+ if (used_only && !res->combined_counter_vec[k][j].packets)
+ continue;
+ if (need_header)
+ {
+ s = format (s, "# TYPE %v_packets counter\n", name);
+ s = format (s, "# TYPE %v_bytes counter\n", name);
+ need_header = 0;
+ }
+ s = format (s, "%v_packets{thread=\"%d\",interface=\"%d\"} %lld\n",
+ name, k, j, res->combined_counter_vec[k][j].packets);
+ s = format (s, "%v_bytes{thread=\"%d\",interface=\"%d\"} %lld\n", name,
+ k, j, res->combined_counter_vec[k][j].bytes);
+ }
+
+ return s;
+}
+
+static u8 *
+dump_scalar_index (stat_segment_data_t *res, u8 *s, u8 used_only)
+{
+ u8 *name;
+
+ if (used_only && !res->scalar_value)
+ return s;
+
+ name = make_stat_name (res->name);
+
+ s = format (s, "# TYPE %v counter\n", name);
+ s = format (s, "%v %.2f\n", name, res->scalar_value);
+
+ return s;
+}
+
+static u8 *
+dump_name_vector (stat_segment_data_t *res, u8 *s, u8 used_only)
+{
+ u8 *name;
+ int k;
+
+ name = make_stat_name (res->name);
+
+ s = format (s, "# TYPE %v_info gauge\n", name);
+ for (k = 0; k < vec_len (res->name_vector); k++)
+ s = format (s, "%v_info{index=\"%d\",name=\"%s\"} 1\n", name, k,
+ res->name_vector[k]);
+
+ return s;
+}
+
+static u8 *
+scrape_stats_segment (u8 *s, u8 **patterns, u8 used_only)
+{
+ stat_segment_data_t *res;
+ static u32 *stats = 0;
+ int i;
+
+ stats = stat_segment_ls (patterns);
+
+retry:
+ res = stat_segment_dump (stats);
+ if (res == 0)
+ { /* Memory layout has changed */
+ if (stats)
+ vec_free (stats);
+ stats = stat_segment_ls (patterns);
+ goto retry;
+ }
+
+ for (i = 0; i < vec_len (res); i++)
+ {
+ switch (res[i].type)
+ {
+ case STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE:
+ s = dump_counter_vector_simple (&res[i], s, used_only);
+ break;
+
+ case STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED:
+ s = dump_counter_vector_combined (&res[i], s, used_only);
+ break;
+
+ case STAT_DIR_TYPE_SCALAR_INDEX:
+ s = dump_scalar_index (&res[i], s, used_only);
+ break;
+
+ case STAT_DIR_TYPE_NAME_VECTOR:
+ s = dump_name_vector (&res[i], s, used_only);
+ break;
+
+ case STAT_DIR_TYPE_EMPTY:
+ break;
+
+ default:
+ clib_warning ("Unknown value %d\n", res[i].type);
+ ;
+ }
+ }
+ stat_segment_data_free (res);
+ vec_free (stats);
+
+ return s;
+}
+
+static void
+send_data_to_hss (hss_session_handle_t sh)
+{
+ hss_url_handler_args_t args = {};
+ prom_main_t *pm = &prom_main;
+
+ args.sh = sh;
+ args.data = vec_dup (pm->stats);
+ args.data_len = vec_len (pm->stats);
+ args.sc = HTTP_STATUS_OK;
+ args.free_vec_data = 1;
+
+ pm->send_data (&args);
+}
+
+static void
+send_data_to_hss_rpc (void *rpc_args)
+{
+ send_data_to_hss (*(hss_session_handle_t *) rpc_args);
+}
+
+static uword
+prom_scraper_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
+ vlib_frame_t *f)
+{
+ uword *event_data = 0, event_type;
+ prom_main_t *pm = &prom_main;
+ hss_session_handle_t sh;
+ f64 timeout = 10000.0;
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, timeout);
+ event_type = vlib_process_get_events (vm, (uword **) &event_data);
+ switch (event_type)
+ {
+ case ~0:
+ /* timeout, do nothing */
+ break;
+ case PROM_SCRAPER_EVT_RUN:
+ sh.as_u64 = event_data[0];
+ vec_reset_length (pm->stats);
+ pm->stats = scrape_stats_segment (pm->stats, pm->stats_patterns,
+ pm->used_only);
+ session_send_rpc_evt_to_thread_force (sh.thread_index,
+ send_data_to_hss_rpc, &sh);
+ pm->last_scrape = vlib_time_now (vm);
+ break;
+ default:
+ clib_warning ("unexpected event %u", event_type);
+ break;
+ }
+
+ vec_reset_length (event_data);
+ }
+ return 0;
+}
+
+VLIB_REGISTER_NODE (prom_scraper_process_node) = {
+ .function = prom_scraper_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "prom-scraper-process",
+ .state = VLIB_NODE_STATE_DISABLED,
+};
+
+static void
+prom_scraper_process_enable (vlib_main_t *vm)
+{
+ prom_main_t *pm = &prom_main;
+ vlib_node_t *n;
+
+ vlib_node_set_state (vm, prom_scraper_process_node.index,
+ VLIB_NODE_STATE_POLLING);
+ n = vlib_get_node (vm, prom_scraper_process_node.index);
+ vlib_start_process (vm, n->runtime_index);
+
+ pm->scraper_node_index = n->index;
+}
+
+static void
+signal_run_to_scraper (uword *args)
+{
+ prom_main_t *pm = &prom_main;
+ ASSERT (vlib_get_thread_index () == 0);
+ vlib_process_signal_event (pm->vm, pm->scraper_node_index,
+ PROM_SCRAPER_EVT_RUN, *args);
+}
+
+hss_url_handler_rc_t
+prom_stats_dump (hss_url_handler_args_t *args)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ f64 now = vlib_time_now (vm);
+ prom_main_t *pm = &prom_main;
+
+ /* If we've recently scraped stats, return data */
+ if ((now - pm->last_scrape) < pm->min_scrape_interval)
+ {
+ send_data_to_hss (args->sh);
+ return HSS_URL_HANDLER_ASYNC;
+ }
+
+ if (vm->thread_index != 0)
+ vl_api_rpc_call_main_thread (signal_run_to_scraper, (u8 *) &args->sh,
+ sizeof (args->sh));
+ else
+ signal_run_to_scraper (&args->sh.as_u64);
+
+ return HSS_URL_HANDLER_ASYNC;
+}
+
+void
+prom_stat_patterns_add (u8 **patterns)
+{
+ prom_main_t *pm = &prom_main;
+
+ u8 **pattern, **existing;
+ u8 found;
+ u32 len;
+
+ vec_foreach (pattern, patterns)
+ {
+ found = 0;
+ len = vec_len (*pattern);
+ if (len == 0)
+ continue;
+ vec_foreach (existing, pm->stats_patterns)
+ {
+ if (vec_len (*existing) != len)
+ continue;
+ if (!memcmp (*existing, *pattern, len - 1))
+ {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ vec_add1 (pm->stats_patterns, *pattern);
+ }
+}
+
+void
+prom_stat_patterns_free (void)
+{
+ prom_main_t *pm = &prom_main;
+ u8 **pattern;
+
+ vec_foreach (pattern, pm->stats_patterns)
+ vec_free (*pattern);
+ vec_free (pm->stats_patterns);
+}
+
+void
+prom_stat_patterns_set (u8 **patterns)
+{
+ prom_stat_patterns_free ();
+ prom_stat_patterns_add (patterns);
+}
+
+u8 **
+prom_stat_patterns_get (void)
+{
+ return prom_main.stats_patterns;
+}
+
+void
+prom_stat_name_prefix_set (u8 *prefix)
+{
+ prom_main_t *pm = &prom_main;
+
+ vec_free (pm->stat_name_prefix);
+ pm->stat_name_prefix = prefix;
+}
+
+void
+prom_report_used_only (u8 used_only)
+{
+ prom_main_t *pm = &prom_main;
+
+ pm->used_only = used_only;
+}
+
+static void
+prom_stat_segment_client_init (void)
+{
+ stat_client_main_t *scm = &stat_client_main;
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ uword size;
+
+ size = sm->memory_size ? sm->memory_size : STAT_SEGMENT_DEFAULT_SIZE;
+ scm->memory_size = size;
+ scm->shared_header = sm->shared_header;
+ scm->directory_vector =
+ stat_segment_adjust (scm, (void *) scm->shared_header->directory_vector);
+}
+
+void
+prom_enable (vlib_main_t *vm)
+{
+ prom_main_t *pm = &prom_main;
+
+ pm->register_url = vlib_get_plugin_symbol ("http_static_plugin.so",
+ "hss_register_url_handler");
+ pm->send_data =
+ vlib_get_plugin_symbol ("http_static_plugin.so", "hss_session_send_data");
+ pm->register_url (prom_stats_dump, "stats.prom", HTTP_REQ_GET);
+
+ pm->is_enabled = 1;
+ pm->vm = vm;
+ if (!pm->stat_name_prefix)
+ pm->stat_name_prefix = format (0, "vpp");
+
+ prom_scraper_process_enable (vm);
+ prom_stat_segment_client_init ();
+}
+
+static clib_error_t *
+prom_init (vlib_main_t *vm)
+{
+ prom_main_t *pm = &prom_main;
+
+ pm->is_enabled = 0;
+ pm->min_scrape_interval = 1;
+ pm->used_only = 0;
+ pm->stat_name_prefix = 0;
+
+ return 0;
+}
+
+prom_main_t *
+prom_get_main (void)
+{
+ return &prom_main;
+}
+
+VLIB_INIT_FUNCTION (prom_init) = {
+ .runs_after = VLIB_INITS ("hss_main_init"),
+};
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Prometheus Stats Exporter",
+ .default_disabled = 0,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/prom/prom.h b/src/plugins/prom/prom.h
new file mode 100644
index 00000000000..898e4c209d1
--- /dev/null
+++ b/src/plugins/prom/prom.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_PLUGINS_PROM_PROM_H_
+#define SRC_PLUGINS_PROM_PROM_H_
+
+#include <vnet/session/session.h>
+#include <http_static/http_static.h>
+
+typedef struct prom_main_
+{
+ u8 *stats;
+ f64 last_scrape;
+ hss_register_url_fn register_url;
+ hss_session_send_fn send_data;
+ u32 scraper_node_index;
+ u8 is_enabled;
+ u8 *name_scratch_pad;
+ vlib_main_t *vm;
+
+ /*
+ * Configs
+ */
+ u8 **stats_patterns;
+ u8 *stat_name_prefix;
+ f64 min_scrape_interval;
+ u8 used_only;
+} prom_main_t;
+
+typedef enum prom_process_evt_codes_
+{
+ PROM_SCRAPER_EVT_RUN,
+} prom_process_evt_codes_t;
+
+void prom_enable (vlib_main_t *vm);
+prom_main_t *prom_get_main (void);
+
+void prom_stat_patterns_set (u8 **patterns);
+void prom_stat_patterns_add (u8 **patterns);
+u8 **prom_stat_patterns_get (void);
+void prom_stat_patterns_free (void);
+
+void prom_stat_name_prefix_set (u8 *prefix);
+void prom_report_used_only (u8 used_only);
+
+#endif /* SRC_PLUGINS_PROM_PROM_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/prom/prom_cli.c b/src/plugins/prom/prom_cli.c
new file mode 100644
index 00000000000..705e54ac1b8
--- /dev/null
+++ b/src/plugins/prom/prom_cli.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <prom/prom.h>
+
+static uword
+unformat_stats_patterns (unformat_input_t *input, va_list *args)
+{
+ u8 ***patterns = va_arg (*args, u8 ***);
+ u8 *pattern;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%s", &pattern))
+ vec_add1 (*patterns, pattern);
+ else
+ return 0;
+ }
+ return 1;
+}
+
+static clib_error_t *
+prom_patterns_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 is_clear = 0, is_show = 0, **pattern = 0;
+ clib_error_t *error = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "show"))
+ is_show = 1;
+ else if (unformat (line_input, "clear"))
+ is_clear = 1;
+ else if (unformat (line_input, "add %U", unformat_stats_patterns,
+ &pattern))
+ {
+ prom_stat_patterns_add (pattern);
+ vec_free (pattern);
+ }
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
+ }
+ unformat_free (line_input);
+
+ if (error)
+ return error;
+
+ if (is_clear)
+ prom_stat_patterns_free ();
+
+ if (is_show)
+ {
+ u8 **patterns = prom_stat_patterns_get ();
+ vec_foreach (pattern, patterns)
+ vlib_cli_output (vm, " %v\n", *pattern);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (prom_patterns_command, static) = {
+ .path = "prom patterns",
+ .short_help = "prom patterns [show] [clear] [add <patterns>...]",
+ .function = prom_patterns_command_fn,
+};
+
+static clib_error_t *
+prom_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u8 **patterns = 0, *stat_name_prefix = 0;
+ prom_main_t *pm = prom_get_main ();
+ clib_error_t *error = 0;
+ u8 is_enable = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ goto no_input;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "enable"))
+ is_enable = 1;
+ else if (unformat (line_input, "min-scrape-interval %f",
+ &pm->min_scrape_interval))
+ ;
+ else if (unformat (line_input, "used-only"))
+ prom_report_used_only (1 /* used only */);
+ else if (unformat (line_input, "all-stats"))
+ prom_report_used_only (0 /* used only */);
+ else if (unformat (line_input, "stat-name-prefix %_%v%_",
+ &stat_name_prefix))
+ prom_stat_name_prefix_set (stat_name_prefix);
+ else if (unformat (line_input, "stat-patterns %U",
+ unformat_stats_patterns, &patterns))
+ prom_stat_patterns_set (patterns);
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ break;
+ }
+ }
+
+ unformat_free (line_input);
+
+ if (error)
+ return error;
+
+no_input:
+
+ if (is_enable && !pm->is_enabled)
+ prom_enable (vm);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (prom_enable_command, static) = {
+ .path = "prom",
+ .short_help = "prom [enable] [min-scrape-interval <n>] [used-only] "
+ "[all-stats] [stat-name-prefix <prefix>] "
+ "[stat-patterns <patterns>...]",
+ .function = prom_command_fn,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/quic/CMakeLists.txt b/src/plugins/quic/CMakeLists.txt
index dfed91f51d9..65bdc32a239 100644
--- a/src/plugins/quic/CMakeLists.txt
+++ b/src/plugins/quic/CMakeLists.txt
@@ -12,8 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+if(NOT OPENSSL_FOUND)
+ message(WARNING "OpenSSL not found - quic plugin disabled")
+ return()
+endif()
+
unset(QUIC_LINK_LIBRARIES)
-set(EXPECTED_QUICLY_VERSION "0.1.3-vpp")
+set(EXPECTED_QUICLY_VERSION "0.1.4-vpp")
vpp_find_path(QUICLY_INCLUDE_DIR NAMES quicly.h)
vpp_find_path(PICOTLS_INCLUDE_DIR NAMES picotls.h)
diff --git a/src/plugins/quic/quic.c b/src/plugins/quic/quic.c
index 26f2216a3d8..60d4ac21c19 100644
--- a/src/plugins/quic/quic.c
+++ b/src/plugins/quic/quic.c
@@ -14,6 +14,9 @@
*/
#include <sys/socket.h>
+#include <sys/syscall.h>
+
+#include <openssl/rand.h>
#include <vnet/session/application.h>
#include <vnet/session/transport.h>
@@ -103,7 +106,6 @@ quic_app_cert_key_pair_delete_callback (app_cert_key_pair_t * ckpair)
for (i = 0; i < num_threads; i++)
{
- /* *INDENT-OFF* */
pool_foreach (crctx, qm->wrk_ctx[i].crypto_ctx_pool) {
if (crctx->ckpair_index == ckpair->cert_key_index)
{
@@ -111,7 +113,6 @@ quic_app_cert_key_pair_delete_callback (app_cert_key_pair_t * ckpair)
clib_bihash_add_del_24_8 (&qm->wrk_ctx[i].crypto_context_hash, &kv, 0 /* is_add */ );
}
}
- /* *INDENT-ON* */
}
return 0;
}
@@ -151,11 +152,9 @@ quic_list_crypto_context_command_fn (vlib_main_t * vm,
int i, num_threads = 1 /* main thread */ + vtm->n_threads;
for (i = 0; i < num_threads; i++)
{
- /* *INDENT-OFF* */
pool_foreach (crctx, qm->wrk_ctx[i].crypto_ctx_pool) {
vlib_cli_output (vm, "[%d][Q]%U", i, format_crypto_context, crctx);
}
- /* *INDENT-ON* */
}
return 0;
}
@@ -388,7 +387,8 @@ quic_ctx_alloc (u32 thread_index)
quic_main_t *qm = &quic_main;
quic_ctx_t *ctx;
- pool_get (qm->ctx_pool[thread_index], ctx);
+ pool_get_aligned_safe (qm->ctx_pool[thread_index], ctx,
+ CLIB_CACHE_LINE_BYTES);
clib_memset (ctx, 0, sizeof (quic_ctx_t));
ctx->c_thread_index = thread_index;
@@ -675,6 +675,7 @@ quic_send_datagram (session_t *udp_session, struct iovec *packet,
hdr.is_ip4 = tc->is_ip4;
clib_memcpy (&hdr.lcl_ip, &tc->lcl_ip, sizeof (ip46_address_t));
hdr.lcl_port = tc->lcl_port;
+ hdr.gso_size = 0;
/* Read dest address from quicly-provided sockaddr */
if (hdr.is_ip4)
@@ -782,12 +783,10 @@ quic_on_stream_destroy (quicly_stream_t * stream, int err)
quic_stream_data_t *stream_data = (quic_stream_data_t *) stream->data;
quic_ctx_t *sctx = quic_ctx_get (stream_data->ctx_id,
stream_data->thread_index);
- session_t *stream_session = session_get (sctx->c_s_index,
- sctx->c_thread_index);
QUIC_DBG (2, "DESTROYED_STREAM: session 0x%lx (%U)",
session_handle (stream_session), quic_format_err, err);
- stream_session->session_state = SESSION_STATE_CLOSED;
+ session_transport_closing_notify (&sctx->connection);
session_transport_delete_notify (&sctx->connection);
quic_increment_counter (QUIC_ERROR_CLOSED_STREAM, 1);
@@ -830,12 +829,13 @@ quic_on_receive (quicly_stream_t * stream, size_t off, const void *src,
size_t len)
{
QUIC_DBG (3, "received data: %lu bytes, offset %lu", len, off);
- u32 max_enq, rlen, rv;
+ u32 max_enq;
quic_ctx_t *sctx;
session_t *stream_session;
app_worker_t *app_wrk;
svm_fifo_t *f;
quic_stream_data_t *stream_data;
+ int rlen;
if (!len)
return;
@@ -876,6 +876,14 @@ quic_on_receive (quicly_stream_t * stream, size_t off, const void *src,
{
/* Streams live on the same thread so (f, stream_data) should stay consistent */
rlen = svm_fifo_enqueue (f, len, (u8 *) src);
+ if (PREDICT_FALSE (rlen < 0))
+ {
+ /*
+ * drop, fifo full
+ * drop, fifo grow
+ */
+ return;
+ }
QUIC_DBG (3, "Session [idx %u, app_wrk %u, ti %u, rx-fifo 0x%llx]: "
"Enqueuing %u (rlen %u) at off %u in %u space, ",
stream_session->session_index,
@@ -886,10 +894,7 @@ quic_on_receive (quicly_stream_t * stream, size_t off, const void *src,
app_wrk = app_worker_get_if_valid (stream_session->app_wrk_index);
if (PREDICT_TRUE (app_wrk != 0))
{
- rv = app_worker_lock_and_send_event (app_wrk, stream_session,
- SESSION_IO_EVT_RX);
- if (rv)
- QUIC_ERR ("Failed to ping app for RX");
+ app_worker_rx_notify (app_wrk, stream_session);
}
quic_ack_rx_data (stream_session);
}
@@ -898,6 +903,14 @@ quic_on_receive (quicly_stream_t * stream, size_t off, const void *src,
rlen = svm_fifo_enqueue_with_offset (f,
off - stream_data->app_rx_data_len,
len, (u8 *) src);
+ if (PREDICT_FALSE (rlen < 0))
+ {
+ /*
+ * drop, fifo full
+ * drop, fifo grow
+ */
+ return;
+ }
QUIC_ASSERT (rlen == 0);
}
return;
@@ -1031,6 +1044,8 @@ quic_on_stream_open (quicly_stream_open_t * self, quicly_stream_t * stream)
stream_session->session_type =
session_type_from_proto_and_ip (TRANSPORT_PROTO_QUIC, qctx->udp_is_ip4);
quic_session = session_get (qctx->c_s_index, qctx->c_thread_index);
+ /* Make sure quic session is in listening state */
+ quic_session->session_state = SESSION_STATE_LISTENING;
stream_session->listener_handle = listen_session_get_handle (quic_session);
app_wrk = app_worker_get (stream_session->app_wrk_index);
@@ -1044,6 +1059,7 @@ quic_on_stream_open (quicly_stream_open_t * self, quicly_stream_t * stream)
SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL |
SVM_FIFO_WANT_DEQ_NOTIF_IF_EMPTY);
+ stream_session->session_state = SESSION_STATE_ACCEPTING;
if ((rv = app_worker_accept_notify (app_wrk, stream_session)))
{
QUIC_ERR ("failed to notify accept worker app");
@@ -1139,9 +1155,8 @@ quic_update_timer (quic_ctx_t * ctx)
quic_session = session_get (ctx->c_s_index, ctx->c_thread_index);
if (svm_fifo_set_event (quic_session->tx_fifo))
{
- rv = session_send_io_evt_to_thread_custom (quic_session,
- quic_session->thread_index,
- SESSION_IO_EVT_BUILTIN_TX);
+ rv = session_send_io_evt_to_thread_custom (
+ quic_session, quic_session->thread_index, SESSION_IO_EVT_TX);
if (PREDICT_FALSE (rv))
QUIC_ERR ("Failed to enqueue builtin_tx %d", rv);
}
@@ -1277,6 +1292,7 @@ quic_connect_stream (session_t * quic_session, session_endpoint_cfg_t * sep)
stream_data->app_rx_data_len = 0;
stream_data->app_tx_data_len = 0;
stream_session->session_state = SESSION_STATE_READY;
+ stream_session->opaque = sep->opaque;
/* For now we only reset streams. Cleanup will be triggered by timers */
if ((rv = app_worker_init_connected (app_wrk, stream_session)))
@@ -1441,7 +1457,8 @@ quic_proto_on_close (u32 ctx_index, u32 thread_index)
}
static u32
-quic_start_listen (u32 quic_listen_session_index, transport_endpoint_t * tep)
+quic_start_listen (u32 quic_listen_session_index,
+ transport_endpoint_cfg_t *tep)
{
vnet_listen_args_t _bargs, *args = &_bargs;
transport_endpt_crypto_cfg_t *ccfg;
@@ -1552,7 +1569,7 @@ format_quic_ctx (u8 * s, va_list * args)
if (!ctx)
return s;
- str = format (str, "[#%d][Q] ", ctx->c_thread_index);
+ str = format (str, "[%d:%d][Q] ", ctx->c_thread_index, ctx->c_s_index);
if (quic_ctx_is_listener (ctx))
str = format (str, "Listener, UDP %ld", ctx->udp_session_handle);
@@ -1670,15 +1687,6 @@ quic_on_quic_session_connected (quic_ctx_t * ctx)
quic_proto_on_close (ctx_id, thread_index);
return;
}
-
- /* If the app opens a stream in its callback it may invalidate ctx */
- ctx = quic_ctx_get (ctx_id, thread_index);
- /*
- * app_worker_connect_notify() might have reallocated pool, reload
- * quic_session pointer
- */
- quic_session = session_get (ctx->c_s_index, thread_index);
- quic_session->session_state = SESSION_STATE_LISTENING;
}
static void
@@ -2105,7 +2113,6 @@ quic_accept_connection (quic_rx_packet_ctx_t * pctx)
quic_session = session_alloc (ctx->c_thread_index);
QUIC_DBG (2, "Allocated quic_session, 0x%lx ctx %u",
session_handle (quic_session), ctx->c_c_index);
- quic_session->session_state = SESSION_STATE_LISTENING;
ctx->c_s_index = quic_session->session_index;
lctx = quic_ctx_get (ctx->listener_ctx_id, 0);
@@ -2131,6 +2138,7 @@ quic_accept_connection (quic_rx_packet_ctx_t * pctx)
}
app_wrk = app_worker_get (quic_session->app_wrk_index);
+ quic_session->session_state = SESSION_STATE_ACCEPTING;
if ((rv = app_worker_accept_notify (app_wrk, quic_session)))
{
QUIC_ERR ("failed to notify accept worker app");
@@ -2416,7 +2424,6 @@ quic_get_transport_endpoint (u32 ctx_index, u32 thread_index,
quic_common_get_transport_endpoint (ctx, tep, is_lcl);
}
-/* *INDENT-OFF* */
static session_cb_vft_t quic_app_cb_vft = {
.session_accept_callback = quic_udp_session_accepted_callback,
.session_disconnect_callback = quic_udp_session_disconnect_callback,
@@ -2452,7 +2459,6 @@ static const transport_proto_vft_t quic_proto = {
.service_type = TRANSPORT_SERVICE_APP,
},
};
-/* *INDENT-ON* */
static quicly_stream_open_t on_stream_open = { quic_on_stream_open };
static quicly_closed_by_remote_t on_closed_by_remote = {
@@ -2498,6 +2504,11 @@ quic_init (vlib_main_t * vm)
u64 options[APP_OPTIONS_N_OPTIONS];
quic_main_t *qm = &quic_main;
u32 num_threads, i;
+ u8 seed[32];
+
+ if (syscall (SYS_getrandom, &seed, sizeof (seed), 0) != sizeof (seed))
+ return clib_error_return_unix (0, "getrandom() failed");
+ RAND_seed (seed, sizeof (seed));
num_threads = 1 /* main thread */ + vtm->n_threads;
@@ -2550,6 +2561,7 @@ quic_init (vlib_main_t * vm)
transport_register_protocol (TRANSPORT_PROTO_QUIC, &quic_proto,
FIB_PROTOCOL_IP6, ~0);
+ quic_load_openssl3_legacy_provider ();
clib_bitmap_alloc (qm->available_crypto_engines,
app_crypto_engine_n_types ());
quic_register_cipher_suite (CRYPTO_ENGINE_PICOTLS,
@@ -2563,14 +2575,19 @@ quic_init (vlib_main_t * vm)
qm->vnet_crypto_enabled = 1;
if (qm->vnet_crypto_enabled == 1)
{
+ u8 empty_key[32] = {};
quic_register_cipher_suite (CRYPTO_ENGINE_VPP,
quic_crypto_cipher_suites);
qm->default_crypto_engine = CRYPTO_ENGINE_VPP;
+ vec_validate (qm->per_thread_crypto_key_indices, num_threads);
+ for (i = 0; i < num_threads; i++)
+ {
+ qm->per_thread_crypto_key_indices[i] = vnet_crypto_key_add (
+ vm, VNET_CRYPTO_ALG_AES_256_CTR, empty_key, 32);
+ }
}
qm->max_packets_per_key = DEFAULT_MAX_PACKETS_PER_KEY;
- clib_rwlock_init (&qm->crypto_keys_quic_rw_lock);
-
qm->default_quic_cc = QUIC_CC_RENO;
vec_free (a->name);
@@ -2651,7 +2668,6 @@ quic_get_counter_value (u32 event_code)
u32 code, i;
u64 c, sum = 0;
- int index = 0;
vm = vlib_get_main ();
em = &vm->error_main;
@@ -2666,7 +2682,6 @@ quic_get_counter_value (u32 event_code)
if (i < vec_len (em->counters_last_clear))
c -= em->counters_last_clear[i];
sum += c;
- index++;
}
return sum;
}
@@ -2683,7 +2698,6 @@ quic_show_aggregated_stats (vlib_main_t * vm)
clib_memset (&agg_stats, 0, sizeof (agg_stats));
for (i = 0; i < num_workers + 1; i++)
{
- /* *INDENT-OFF* */
pool_foreach (ctx, qm->ctx_pool[i])
{
if (quic_ctx_is_conn (ctx) && ctx->conn)
@@ -2703,7 +2717,6 @@ quic_show_aggregated_stats (vlib_main_t * vm)
else if (quic_ctx_is_stream (ctx))
nstream++;
}
- /* *INDENT-ON* */
}
vlib_cli_output (vm, "-------- Connections --------");
vlib_cli_output (vm, "Current: %u", nconn);
@@ -2878,7 +2891,6 @@ quic_show_connections_command_fn (vlib_main_t * vm,
for (int i = 0; i < num_workers + 1; i++)
{
- /* *INDENT-OFF* */
pool_foreach (ctx, qm->ctx_pool[i])
{
if (quic_ctx_is_stream (ctx) && show_stream)
@@ -2888,7 +2900,6 @@ quic_show_connections_command_fn (vlib_main_t * vm,
else if (quic_ctx_is_conn (ctx) && show_conn)
vlib_cli_output (vm, "%U", quic_format_connection_ctx, ctx);
}
- /* *INDENT-ON* */
}
done:
@@ -2896,7 +2907,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (quic_plugin_crypto_command, static) = {
.path = "quic set crypto api",
.short_help = "quic set crypto api [picotls|vpp]",
@@ -2937,7 +2947,6 @@ VLIB_PLUGIN_REGISTER () =
.description = "Quic transport protocol",
.default_disabled = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
quic_config_fn (vlib_main_t * vm, unformat_input_t * input)
@@ -2957,7 +2966,7 @@ quic_config_fn (vlib_main_t * vm, unformat_input_t * input)
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "fifo-size %U", unformat_memory_size, &tmp))
+ if (unformat (line_input, "fifo-size %U", unformat_memory_size, &tmp))
{
if (tmp >= 0x100000000ULL)
{
@@ -2968,9 +2977,9 @@ quic_config_fn (vlib_main_t * vm, unformat_input_t * input)
}
qm->udp_fifo_size = tmp;
}
- else if (unformat (input, "conn-timeout %u", &i))
+ else if (unformat (line_input, "conn-timeout %u", &i))
qm->connection_timeout = i;
- else if (unformat (input, "fifo-prealloc %u", &i))
+ else if (unformat (line_input, "fifo-prealloc %u", &i))
qm->udp_fifo_prealloc = i;
else
{
@@ -2993,7 +3002,6 @@ quic_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (quic_input_node) =
{
.function = quic_node_fn,
@@ -3003,7 +3011,6 @@ VLIB_REGISTER_NODE (quic_input_node) =
.n_errors = ARRAY_LEN (quic_error_strings),
.error_strings = quic_error_strings,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/quic/quic.h b/src/plugins/quic/quic.h
index 901bdbc39b2..2c5a21c01a4 100644
--- a/src/plugins/quic/quic.h
+++ b/src/plugins/quic/quic.h
@@ -263,8 +263,7 @@ typedef struct quic_main_
u32 connection_timeout;
u8 vnet_crypto_enabled;
-
- clib_rwlock_t crypto_keys_quic_rw_lock;
+ u32 *per_thread_crypto_key_indices;
} quic_main_t;
#endif /* __included_quic_h__ */
diff --git a/src/plugins/quic/quic_crypto.c b/src/plugins/quic/quic_crypto.c
index 602b3f8570c..c5cc5a4a714 100644
--- a/src/plugins/quic/quic_crypto.c
+++ b/src/plugins/quic/quic_crypto.c
@@ -15,22 +15,31 @@
#include <quic/quic.h>
#include <quic/quic_crypto.h>
+#include <vnet/session/session.h>
#include <quicly.h>
#include <picotls/openssl.h>
+#include <pthread.h>
#define QUICLY_EPOCH_1RTT 3
extern quic_main_t quic_main;
-extern quic_ctx_t *quic_get_conn_ctx (quicly_conn_t * conn);
+extern quic_ctx_t *quic_get_conn_ctx (quicly_conn_t *conn);
vnet_crypto_main_t *cm = &crypto_main;
+typedef struct crypto_key_
+{
+ vnet_crypto_alg_t algo;
+ u8 key[32];
+ u16 key_len;
+} crypto_key_t;
+
struct cipher_context_t
{
ptls_cipher_context_t super;
vnet_crypto_op_t op;
vnet_crypto_op_id_t id;
- u32 key_index;
+ crypto_key_t key;
};
struct aead_crypto_context_t
@@ -39,7 +48,8 @@ struct aead_crypto_context_t
EVP_CIPHER_CTX *evp_ctx;
uint8_t static_iv[PTLS_MAX_IV_SIZE];
vnet_crypto_op_t op;
- u32 key_index;
+ crypto_key_t key;
+
vnet_crypto_op_id_t id;
uint8_t iv[PTLS_MAX_IV_SIZE];
};
@@ -114,6 +124,29 @@ Exit:
return ret;
}
+static u32
+quic_crypto_set_key (crypto_key_t *key)
+{
+ u8 thread_index = vlib_get_thread_index ();
+ u32 key_id = quic_main.per_thread_crypto_key_indices[thread_index];
+ vnet_crypto_key_t *vnet_key = vnet_crypto_get_key (key_id);
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_crypto_engine_t *engine;
+
+ vec_foreach (engine, cm->engines)
+ if (engine->key_op_handler)
+ engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_DEL, key_id);
+
+ vnet_key->alg = key->algo;
+ clib_memcpy (vnet_key->data, key->key, key->key_len);
+
+ vec_foreach (engine, cm->engines)
+ if (engine->key_op_handler)
+ engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_ADD, key_id);
+
+ return key_id;
+}
+
static size_t
quic_crypto_aead_decrypt (quic_ctx_t *qctx, ptls_aead_context_t *_ctx,
void *_output, const void *input, size_t inlen,
@@ -132,7 +165,7 @@ quic_crypto_aead_decrypt (quic_ctx_t *qctx, ptls_aead_context_t *_ctx,
decrypted_pn);
ctx->op.src = (u8 *) input;
ctx->op.dst = _output;
- ctx->op.key_index = ctx->key_index;
+ ctx->op.key_index = quic_crypto_set_key (&ctx->key);
ctx->op.len = inlen - ctx->super.algo->tag_size;
ctx->op.tag_len = ctx->super.algo->tag_size;
ctx->op.tag = ctx->op.src + ctx->op.len;
@@ -143,7 +176,7 @@ quic_crypto_aead_decrypt (quic_ctx_t *qctx, ptls_aead_context_t *_ctx,
}
void
-quic_crypto_decrypt_packet (quic_ctx_t * qctx, quic_rx_packet_ctx_t * pctx)
+quic_crypto_decrypt_packet (quic_ctx_t *qctx, quic_rx_packet_ctx_t *pctx)
{
ptls_cipher_context_t *header_protection = NULL;
ptls_aead_context_t *aead = NULL;
@@ -172,28 +205,26 @@ quic_crypto_decrypt_packet (quic_ctx_t * qctx, quic_rx_packet_ctx_t * pctx)
/* decipher the header protection, as well as obtaining pnbits, pnlen */
if (encrypted_len < header_protection->algo->iv_size + QUICLY_MAX_PN_SIZE)
return;
- ptls_cipher_init (header_protection,
- pctx->packet.octets.base + pctx->packet.encrypted_off +
- QUICLY_MAX_PN_SIZE);
+ ptls_cipher_init (header_protection, pctx->packet.octets.base +
+ pctx->packet.encrypted_off +
+ QUICLY_MAX_PN_SIZE);
ptls_cipher_encrypt (header_protection, hpmask, hpmask, sizeof (hpmask));
pctx->packet.octets.base[0] ^=
- hpmask[0] & (QUICLY_PACKET_IS_LONG_HEADER (pctx->packet.octets.base[0]) ?
- 0xf : 0x1f);
+ hpmask[0] &
+ (QUICLY_PACKET_IS_LONG_HEADER (pctx->packet.octets.base[0]) ? 0xf : 0x1f);
pnlen = (pctx->packet.octets.base[0] & 0x3) + 1;
for (i = 0; i != pnlen; ++i)
{
pctx->packet.octets.base[pctx->packet.encrypted_off + i] ^=
hpmask[i + 1];
- pnbits =
- (pnbits << 8) | pctx->packet.octets.base[pctx->packet.encrypted_off +
- i];
+ pnbits = (pnbits << 8) |
+ pctx->packet.octets.base[pctx->packet.encrypted_off + i];
}
size_t aead_off = pctx->packet.encrypted_off + pnlen;
- pn =
- quicly_determine_packet_number (pnbits, pnlen * 8,
- next_expected_packet_number);
+ pn = quicly_determine_packet_number (pnbits, pnlen * 8,
+ next_expected_packet_number);
int key_phase_bit =
(pctx->packet.octets.base[0] & QUICLY_KEY_PHASE_BIT) != 0;
@@ -203,7 +234,7 @@ quic_crypto_decrypt_packet (quic_ctx_t * qctx, quic_rx_packet_ctx_t * pctx)
pctx->packet.octets.base[0] ^=
hpmask[0] &
(QUICLY_PACKET_IS_LONG_HEADER (pctx->packet.octets.base[0]) ? 0xf :
- 0x1f);
+ 0x1f);
for (i = 0; i != pnlen; ++i)
{
pctx->packet.octets.base[pctx->packet.encrypted_off + i] ^=
@@ -218,8 +249,8 @@ quic_crypto_decrypt_packet (quic_ctx_t * qctx, quic_rx_packet_ctx_t * pctx)
pctx->packet.octets.len - aead_off, pn, pctx->packet.octets.base,
aead_off)) == SIZE_MAX)
{
- fprintf (stderr,
- "%s: aead decryption failure (pn: %d)\n", __FUNCTION__, pn);
+ fprintf (stderr, "%s: aead decryption failure (pn: %d)\n", __FUNCTION__,
+ pn);
return;
}
@@ -260,7 +291,7 @@ quic_crypto_encrypt_packet (struct st_quicly_crypto_engine_t *engine,
aead_ctx->op.iv = aead_ctx->iv;
ptls_aead__build_iv (aead_ctx->super.algo, aead_ctx->op.iv,
aead_ctx->static_iv, packet_number);
- aead_ctx->op.key_index = aead_ctx->key_index;
+ aead_ctx->op.key_index = quic_crypto_set_key (&aead_ctx->key);
aead_ctx->op.src = (u8 *) input;
aead_ctx->op.dst = output;
aead_ctx->op.len = inlen;
@@ -280,7 +311,8 @@ quic_crypto_encrypt_packet (struct st_quicly_crypto_engine_t *engine,
vnet_crypto_op_init (&hp_ctx->op, hp_ctx->id);
memset (supp.output, 0, sizeof (supp.output));
hp_ctx->op.iv = (u8 *) supp.input;
- hp_ctx->op.key_index = hp_ctx->key_index;
+ hp_ctx->op.key_index = quic_crypto_set_key (&hp_ctx->key);
+ ;
hp_ctx->op.src = (u8 *) supp.output;
hp_ctx->op.dst = (u8 *) supp.output;
hp_ctx->op.len = sizeof (supp.output);
@@ -301,7 +333,6 @@ quic_crypto_cipher_setup_crypto (ptls_cipher_context_t *_ctx, int is_enc,
{
struct cipher_context_t *ctx = (struct cipher_context_t *) _ctx;
- vlib_main_t *vm = vlib_get_main ();
vnet_crypto_alg_t algo;
if (!strcmp (ctx->super.algo->name, "AES128-CTR"))
{
@@ -326,24 +357,26 @@ quic_crypto_cipher_setup_crypto (ptls_cipher_context_t *_ctx, int is_enc,
if (quic_main.vnet_crypto_enabled)
{
- clib_rwlock_writer_lock (&quic_main.crypto_keys_quic_rw_lock);
- ctx->key_index =
- vnet_crypto_key_add (vm, algo, (u8 *) key, _ctx->algo->key_size);
- clib_rwlock_writer_unlock (&quic_main.crypto_keys_quic_rw_lock);
+ // ctx->key_index =
+ // quic_crypto_go_setup_key (algo, key, _ctx->algo->key_size);
+ ctx->key.algo = algo;
+ ctx->key.key_len = _ctx->algo->key_size;
+ assert (ctx->key.key_len <= 32);
+ clib_memcpy (&ctx->key.key, key, ctx->key.key_len);
}
return 0;
}
static int
-quic_crypto_aes128ctr_setup_crypto (ptls_cipher_context_t * ctx, int is_enc,
+quic_crypto_aes128ctr_setup_crypto (ptls_cipher_context_t *ctx, int is_enc,
const void *key)
{
return quic_crypto_cipher_setup_crypto (ctx, 1, key, EVP_aes_128_ctr ());
}
static int
-quic_crypto_aes256ctr_setup_crypto (ptls_cipher_context_t * ctx, int is_enc,
+quic_crypto_aes256ctr_setup_crypto (ptls_cipher_context_t *ctx, int is_enc,
const void *key)
{
return quic_crypto_cipher_setup_crypto (ctx, 1, key, EVP_aes_256_ctr ());
@@ -354,7 +387,6 @@ quic_crypto_aead_setup_crypto (ptls_aead_context_t *_ctx, int is_enc,
const void *key, const void *iv,
const EVP_CIPHER *cipher)
{
- vlib_main_t *vm = vlib_get_main ();
struct aead_crypto_context_t *ctx = (struct aead_crypto_context_t *) _ctx;
vnet_crypto_alg_t algo;
@@ -382,11 +414,12 @@ quic_crypto_aead_setup_crypto (ptls_aead_context_t *_ctx, int is_enc,
if (quic_main.vnet_crypto_enabled)
{
clib_memcpy (ctx->static_iv, iv, ctx->super.algo->iv_size);
-
- clib_rwlock_writer_lock (&quic_main.crypto_keys_quic_rw_lock);
- ctx->key_index = vnet_crypto_key_add (vm, algo,
- (u8 *) key, _ctx->algo->key_size);
- clib_rwlock_writer_unlock (&quic_main.crypto_keys_quic_rw_lock);
+ // ctx->key_index =
+ // quic_crypto_go_setup_key (algo, key, _ctx->algo->key_size);
+ ctx->key.algo = algo;
+ ctx->key.key_len = _ctx->algo->key_size;
+ assert (ctx->key.key_len <= 32);
+ clib_memcpy (&ctx->key.key, key, ctx->key.key_len);
}
return 0;
@@ -469,6 +502,7 @@ ptls_cipher_algorithm_t quic_crypto_aes256ctr = {
quic_crypto_aes256ctr_setup_crypto
};
+#define PTLS_X86_CACHE_LINE_ALIGN_BITS 6
ptls_aead_algorithm_t quic_crypto_aes128gcm = {
"AES128-GCM",
PTLS_AESGCM_CONFIDENTIALITY_LIMIT,
@@ -478,6 +512,9 @@ ptls_aead_algorithm_t quic_crypto_aes128gcm = {
PTLS_AES128_KEY_SIZE,
PTLS_AESGCM_IV_SIZE,
PTLS_AESGCM_TAG_SIZE,
+ { PTLS_TLS12_AESGCM_FIXED_IV_SIZE, PTLS_TLS12_AESGCM_RECORD_IV_SIZE },
+ 1,
+ PTLS_X86_CACHE_LINE_ALIGN_BITS,
sizeof (struct aead_crypto_context_t),
quic_crypto_aead_aes128gcm_setup_crypto
};
@@ -491,18 +528,21 @@ ptls_aead_algorithm_t quic_crypto_aes256gcm = {
PTLS_AES256_KEY_SIZE,
PTLS_AESGCM_IV_SIZE,
PTLS_AESGCM_TAG_SIZE,
+ { PTLS_TLS12_AESGCM_FIXED_IV_SIZE, PTLS_TLS12_AESGCM_RECORD_IV_SIZE },
+ 1,
+ PTLS_X86_CACHE_LINE_ALIGN_BITS,
sizeof (struct aead_crypto_context_t),
quic_crypto_aead_aes256gcm_setup_crypto
};
ptls_cipher_suite_t quic_crypto_aes128gcmsha256 = {
- PTLS_CIPHER_SUITE_AES_128_GCM_SHA256,
- &quic_crypto_aes128gcm, &ptls_openssl_sha256
+ PTLS_CIPHER_SUITE_AES_128_GCM_SHA256, &quic_crypto_aes128gcm,
+ &ptls_openssl_sha256
};
ptls_cipher_suite_t quic_crypto_aes256gcmsha384 = {
- PTLS_CIPHER_SUITE_AES_256_GCM_SHA384,
- &quic_crypto_aes256gcm, &ptls_openssl_sha384
+ PTLS_CIPHER_SUITE_AES_256_GCM_SHA384, &quic_crypto_aes256gcm,
+ &ptls_openssl_sha384
};
ptls_cipher_suite_t *quic_crypto_cipher_suites[] = {
diff --git a/src/plugins/quic/quic_crypto.h b/src/plugins/quic/quic_crypto.h
index 2adb20237a3..7299b613053 100644
--- a/src/plugins/quic/quic_crypto.h
+++ b/src/plugins/quic/quic_crypto.h
@@ -18,6 +18,19 @@
#include <quicly.h>
+#if OPENSSL_VERSION_NUMBER >= 0x30000000L
+#include <openssl/provider.h>
+
+#define quic_load_openssl3_legacy_provider() \
+ do \
+ { \
+ (void) OSSL_PROVIDER_load (NULL, "legacy"); \
+ } \
+ while (0)
+#else
+#define quic_load_openssl3_legacy_provider()
+#endif
+
struct quic_ctx_t;
extern ptls_cipher_suite_t *quic_crypto_cipher_suites[];
diff --git a/src/plugins/rdma/CMakeLists.txt b/src/plugins/rdma/CMakeLists.txt
index f598ff8c701..ef8bc90c6dd 100644
--- a/src/plugins/rdma/CMakeLists.txt
+++ b/src/plugins/rdma/CMakeLists.txt
@@ -19,17 +19,16 @@ if (NOT IBVERBS_INCLUDE_DIR)
endif()
vpp_plugin_find_library(rdma IBVERBS_LIB libibverbs.a)
-vpp_plugin_find_library(rdma RDMA_UTIL_LIB librdma_util.a)
vpp_plugin_find_library(rdma MLX5_LIB libmlx5.a)
-if (NOT IBVERBS_LIB OR NOT RDMA_UTIL_LIB OR NOT MLX5_LIB)
+if (NOT IBVERBS_LIB OR NOT MLX5_LIB)
message(WARNING "rdma plugin - ibverbs not found - rdma plugin disabled")
return()
endif()
-string_append(RDMA_LINK_FLAGS "-Wl,--whole-archive,${MLX5_LIB},--no-whole-archive")
+string_append(RDMA_LINK_FLAGS "-Wl,--whole-archive,${MLX5_LIB},--no-whole-archive -Wl,--exclude-libs,ALL")
-set(CMAKE_REQUIRED_FLAGS "-fPIC -shared -pthread -Wno-unused-command-line-argument ${RDMA_LINK_FLAGS} ${IBVERBS_LIB} ${RDMA_UTIL_LIB}")
+set(CMAKE_REQUIRED_FLAGS "-fPIC -shared -pthread -Wno-unused-command-line-argument ${RDMA_LINK_FLAGS} ${IBVERBS_LIB}")
set(CMAKE_REQUIRED_INCLUDES "${IBVERBS_INCLUDE_DIR}")
set(CMAKE_REQUIRED_LIBRARIES "c") # force linkage by including libc explicitely
CHECK_C_SOURCE_COMPILES("
@@ -73,5 +72,4 @@ add_vpp_plugin(rdma
LINK_LIBRARIES
${IBVERBS_LIB}
- ${RDMA_UTIL_LIB}
)
diff --git a/src/plugins/rdma/api.c b/src/plugins/rdma/api.c
index 7fe77105596..3fb17ff6ee0 100644
--- a/src/plugins/rdma/api.c
+++ b/src/plugins/rdma/api.c
@@ -27,6 +27,7 @@
#include <rdma/rdma.api_enum.h>
#include <rdma/rdma.api_types.h>
+#define REPLY_MSG_ID_BASE (rm->msg_id_base)
#include <vlibapi/api_helper_macros.h>
static rdma_mode_t
@@ -41,6 +42,8 @@ rdma_api_mode (vl_api_rdma_mode_t mode)
case RDMA_API_MODE_DV:
return RDMA_MODE_DV;
}
+ /* Fail the debug build. Useful for investigating endian issues. */
+ ASSERT (0);
return RDMA_MODE_AUTO;
}
@@ -79,6 +82,35 @@ rdma_api_rss6 (const vl_api_rdma_rss6_t rss6)
}
static void
+vl_api_rdma_create_v4_t_handler (vl_api_rdma_create_v4_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ rdma_main_t *rm = &rdma_main;
+ vl_api_rdma_create_v4_reply_t *rmp;
+ rdma_create_if_args_t args;
+ int rv;
+
+ clib_memset (&args, 0, sizeof (rdma_create_if_args_t));
+
+ args.ifname = mp->host_if;
+ args.name = mp->name;
+ args.rxq_num = mp->rxq_num;
+ args.rxq_size = mp->rxq_size;
+ args.txq_size = mp->txq_size;
+ args.mode = rdma_api_mode (mp->mode);
+ args.disable_striding_rq = 0;
+ args.no_multi_seg = mp->no_multi_seg;
+ args.max_pktlen = mp->max_pktlen;
+ args.rss4 = rdma_api_rss4 (mp->rss4);
+ args.rss6 = rdma_api_rss6 (mp->rss6);
+ rdma_create_if (vm, &args);
+ rv = args.rv;
+
+ REPLY_MACRO2_END (VL_API_RDMA_CREATE_V4_REPLY,
+ ({ rmp->sw_if_index = args.sw_if_index; }));
+}
+
+static void
vl_api_rdma_create_v3_t_handler (vl_api_rdma_create_v3_t *mp)
{
vlib_main_t *vm = vlib_get_main ();
@@ -103,7 +135,7 @@ vl_api_rdma_create_v3_t_handler (vl_api_rdma_create_v3_t *mp)
rdma_create_if (vm, &args);
rv = args.rv;
- REPLY_MACRO2 (VL_API_RDMA_CREATE_V3_REPLY + rm->msg_id_base,
+ REPLY_MACRO2 (VL_API_RDMA_CREATE_V3_REPLY,
({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
}
@@ -130,12 +162,8 @@ vl_api_rdma_create_v2_t_handler (vl_api_rdma_create_v2_t * mp)
rdma_create_if (vm, &args);
rv = args.rv;
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_RDMA_CREATE_V2_REPLY + rm->msg_id_base,
- ({
- rmp->sw_if_index = ntohl (args.sw_if_index);
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO2 (VL_API_RDMA_CREATE_V2_REPLY,
+ ({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
}
static void
@@ -162,12 +190,8 @@ vl_api_rdma_create_t_handler (vl_api_rdma_create_t * mp)
rdma_create_if (vm, &args);
rv = args.rv;
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_RDMA_CREATE_REPLY + rm->msg_id_base,
- ({
- rmp->sw_if_index = ntohl (args.sw_if_index);
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO2 (VL_API_RDMA_CREATE_REPLY,
+ ({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
}
static void
@@ -195,7 +219,7 @@ vl_api_rdma_delete_t_handler (vl_api_rdma_delete_t * mp)
rdma_delete_if (vm, rd);
reply:
- REPLY_MACRO (VL_API_RDMA_DELETE_REPLY + rm->msg_id_base);
+ REPLY_MACRO (VL_API_RDMA_DELETE_REPLY);
}
/* set tup the API message handling tables */
diff --git a/src/plugins/rdma/cli.c b/src/plugins/rdma/cli.c
index 8f191e34b63..bcedd625220 100644
--- a/src/plugins/rdma/cli.c
+++ b/src/plugins/rdma/cli.c
@@ -44,17 +44,15 @@ rdma_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (rdma_create_command, static) = {
.path = "create interface rdma",
.short_help = "create interface rdma <host-if ifname> [name <name>]"
- " [rx-queue-size <size>] [tx-queue-size <size>]"
- " [num-rx-queues <size>] [mode <auto|ibv|dv]"
- " [no-multi-seg] [no-striding]"
- " [max-pktlen <size>]",
+ " [rx-queue-size <size>] [tx-queue-size <size>]"
+ " [num-rx-queues <size>] [mode <auto|ibv|dv>]"
+ " [no-multi-seg] [no-striding]"
+ " [max-pktlen <size>]",
.function = rdma_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
rdma_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -99,14 +97,12 @@ rdma_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (rdma_delete_command, static) = {
.path = "delete interface rdma",
.short_help = "delete interface rdma "
"{<interface> | sw_if_index <sw_idx>}",
.function = rdma_delete_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
test_rdma_dump_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -162,13 +158,11 @@ test_rdma_dump_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_rdma_mlx5dv_dump_command, static) = {
.path = "test rdma dump",
.short_help = "test rdma dump {<interface> | sw_if_index <sw_idx>}",
.function = test_rdma_dump_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
rdma_cli_init (vlib_main_t * vm)
diff --git a/src/plugins/rdma/device.c b/src/plugins/rdma/device.c
index 1198d99b14e..8aeb586a42d 100644
--- a/src/plugins/rdma/device.c
+++ b/src/plugins/rdma/device.c
@@ -183,11 +183,11 @@ rdma_mac_change (vnet_hw_interface_t * hw, const u8 * old, const u8 * new)
return 0;
}
-static u32
-rdma_dev_change_mtu (rdma_device_t * rd)
+static clib_error_t *
+rdma_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw,
+ u32 frame_size)
{
- rdma_log__ (VLIB_LOG_LEVEL_ERR, rd, "MTU change not supported");
- return ~0;
+ return vnet_error (VNET_ERR_UNSUPPORTED, 0);
}
static u32
@@ -202,8 +202,6 @@ rdma_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags)
return rdma_dev_set_ucast (rd);
case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL:
return rdma_dev_set_promisc (rd);
- case ETHERNET_INTERFACE_FLAG_MTU:
- return rdma_dev_change_mtu (rd);
}
rdma_log__ (VLIB_LOG_LEVEL_ERR, rd, "unknown flag %x requested", flags);
@@ -355,18 +353,20 @@ rdma_async_event_cleanup (rdma_device_t * rd)
static clib_error_t *
rdma_register_interface (vnet_main_t * vnm, rdma_device_t * rd)
{
- clib_error_t *err =
- ethernet_register_interface (vnm, rdma_device_class.index,
- rd->dev_instance, rd->hwaddr.bytes,
- &rd->hw_if_index, rdma_flag_change);
-
+ vnet_eth_interface_registration_t eir = {};
+
+ eir.dev_class_index = rdma_device_class.index;
+ eir.dev_instance = rd->dev_instance;
+ eir.address = rd->hwaddr.bytes;
+ eir.cb.flag_change = rdma_flag_change;
+ eir.cb.set_max_frame_size = rdma_set_max_frame_size;
+ rd->hw_if_index = vnet_eth_register_interface (vnm, &eir);
/* Indicate ability to support L3 DMAC filtering and
* initialize interface to L3 non-promisc mode */
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, rd->hw_if_index);
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_MAC_FILTER;
+ vnet_hw_if_set_caps (vnm, rd->hw_if_index, VNET_HW_IF_CAP_MAC_FILTER);
ethernet_set_flags (vnm, rd->hw_if_index,
ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
- return err;
+ return 0;
}
static void
@@ -445,9 +445,10 @@ rdma_rxq_init (vlib_main_t * vm, rdma_device_t * rd, u16 qid, u32 n_desc,
if (is_mlx5dv)
{
struct mlx5dv_cq_init_attr dvcq = { };
- dvcq.comp_mask = MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
+ dvcq.comp_mask = MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE |
+ MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE;
dvcq.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
-
+ dvcq.cqe_size = 64;
if ((cqex = mlx5dv_create_cq (rd->ctx, &cqa, &dvcq)) == 0)
return clib_error_return_unix (0, "Create mlx5dv rx CQ Failed");
}
@@ -717,15 +718,30 @@ rdma_txq_init (vlib_main_t * vm, rdma_device_t * rd, u16 qid, u32 n_desc)
struct ibv_qp_init_attr qpia;
struct ibv_qp_attr qpa;
int qp_flags;
+ int is_mlx5dv = !!(rd->flags & RDMA_DEVICE_F_MLX5DV);
vec_validate_aligned (rd->txqs, qid, CLIB_CACHE_LINE_BYTES);
txq = vec_elt_at_index (rd->txqs, qid);
ASSERT (is_pow2 (n_desc));
txq->bufs_log2sz = min_log2 (n_desc);
vec_validate_aligned (txq->bufs, n_desc - 1, CLIB_CACHE_LINE_BYTES);
-
- if ((txq->cq = ibv_create_cq (rd->ctx, n_desc, NULL, NULL, 0)) == 0)
- return clib_error_return_unix (0, "Create CQ Failed");
+ if (is_mlx5dv)
+ {
+ struct ibv_cq_init_attr_ex cqa = {};
+ struct ibv_cq_ex *cqex;
+ struct mlx5dv_cq_init_attr dvcq = {};
+ dvcq.comp_mask = MLX5DV_CQ_INIT_ATTR_MASK_CQE_SIZE;
+ dvcq.cqe_size = 64;
+ cqa.cqe = n_desc;
+ if ((cqex = mlx5dv_create_cq (rd->ctx, &cqa, &dvcq)) == 0)
+ return clib_error_return_unix (0, "Create mlx5dv tx CQ Failed");
+ txq->cq = ibv_cq_ex_to_cq (cqex);
+ }
+ else
+ {
+ if ((txq->cq = ibv_create_cq (rd->ctx, n_desc, NULL, NULL, 0)) == 0)
+ return clib_error_return_unix (0, "Create CQ Failed");
+ }
memset (&qpia, 0, sizeof (qpia));
qpia.send_cq = txq->cq;
@@ -866,7 +882,7 @@ sysfs_path_to_pci_addr (char *path, vlib_pci_addr_t * addr)
unformat_input_t in;
u8 *s;
- s = clib_sysfs_link_to_name (path);
+ s = clib_file_get_resolved_basename (path);
if (!s)
return 0;
@@ -1022,7 +1038,7 @@ are explicitly disabled, and if the interface supports it.*/
/*
* FIXME: add support for interrupt mode
* vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, rd->hw_if_index);
- * hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+ * hw->caps |= VNET_HW_IF_CAP_INT_MODE;
*/
vnet_hw_if_set_input_node (vnm, rd->hw_if_index, rdma_input_node.index);
@@ -1136,15 +1152,4 @@ rdma_init (vlib_main_t * vm)
return 0;
}
-VLIB_INIT_FUNCTION (rdma_init) =
-{
- .runs_after = VLIB_INITS ("pci_bus_init"),
-};
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+VLIB_INIT_FUNCTION (rdma_init);
diff --git a/src/plugins/rdma/format.c b/src/plugins/rdma/format.c
index aada52a1ec3..a999460bd55 100644
--- a/src/plugins/rdma/format.c
+++ b/src/plugins/rdma/format.c
@@ -58,13 +58,13 @@ format_rdma_bit_flag (u8 * s, va_list * args)
while (flags)
{
- if ((flags & (1 << i)))
+ if ((flags & ((u64) 1 << i)))
{
if (i < n_strs && strs[i] != 0)
s = format (s, " %s", strs[i]);
else
s = format (s, " unknown(%u)", i);
- flags ^= 1 << i;
+ flags ^= (u64) 1 << i;
}
i++;
}
@@ -122,8 +122,8 @@ format_rdma_device (u8 * s, va_list * args)
format_vlib_pci_addr, &rd->pci->addr);
if ((d = vlib_pci_get_device_info (vm, &rd->pci->addr, 0)))
{
- s = format (s, "%Uproduct name: %s\n", format_white_space, indent,
- d->product_name ? (char *) d->product_name : "");
+ s = format (s, "%Uproduct name: %v\n", format_white_space, indent,
+ d->product_name);
s = format (s, "%Upart number: %U\n", format_white_space, indent,
format_vlib_pci_vpd, d->vpd_r, "PN");
s = format (s, "%Urevision: %U\n", format_white_space, indent,
@@ -281,7 +281,7 @@ format_rdma_rxq (u8 * s, va_list * args)
if (rd->flags & RDMA_DEVICE_F_MLX5DV)
{
- u32 next_cqe_index = rxq->cq_ci & (rxq->size - 1);
+ u32 next_cqe_index = rxq->cq_ci & ((1 << rxq->log2_cq_size) - 1);
s = format (s, "\n%Uwq: stride %u wqe-cnt %u",
format_white_space, indent + 2, rxq->wq_stride,
rxq->wqe_cnt);
@@ -292,9 +292,8 @@ format_rdma_rxq (u8 * s, va_list * args)
next_cqe_index);
s = format (s, "\n%U%U", format_white_space, indent + 6,
format_mlx5_cqe_rx, rxq->cqes + next_cqe_index);
- s = format (s, "\n%U%U", format_white_space, indent + 6,
- format_hexdump, rxq->cqes + next_cqe_index,
- sizeof (mlx5dv_cqe_t));
+ s = format (s, "\n%U%U", format_white_space, indent + 6, format_hexdump,
+ rxq->cqes + next_cqe_index, (u32) sizeof (mlx5dv_cqe_t));
}
return s;
diff --git a/src/plugins/rdma/input.c b/src/plugins/rdma/input.c
index f1c508affa2..a7d41a1684d 100644
--- a/src/plugins/rdma/input.c
+++ b/src/plugins/rdma/input.c
@@ -228,7 +228,6 @@ rdma_device_input_refill (vlib_main_t * vm, rdma_device_t * rd,
about what RDMA core does (CYCLIC_RQ or LINKED_LIST_RQ). In cyclic
mode, the SRQ header is ignored anyways... */
-/* *INDENT-OFF* */
if (is_striding && !(current_data_seg & (wqe_sz - 1)))
*(mlx5dv_wqe_srq_next_t *) wqe = (mlx5dv_wqe_srq_next_t)
{
@@ -237,7 +236,6 @@ rdma_device_input_refill (vlib_main_t * vm, rdma_device_t * rd,
.signature = 0,
.rsvd1 = {0}
};
-/* *INDENT-ON* */
/* TODO: when log_skip_wqe > 2, hw_prefetcher doesn't work, lots of LLC store
misses occur for wqes, to be fixed... */
@@ -609,6 +607,7 @@ rdma_device_poll_cq_mlx5dv (rdma_device_t * rd, rdma_rxq_t * rxq,
n_rx_packets++;
cq_ci++;
byte_cnt++;
+ cqe_flags++;
continue;
}
@@ -670,46 +669,77 @@ rdma_device_mlx5dv_l3_validate_and_swap_bc (rdma_per_thread_data_t
* ptd, int n_rx_packets, u32 * bc)
{
u16 mask = CQE_FLAG_L3_HDR_TYPE_MASK | CQE_FLAG_L3_OK;
- u16 match = CQE_FLAG_L3_HDR_TYPE_IP4 << CQE_FLAG_L3_HDR_TYPE_SHIFT;
+ u16 match =
+ CQE_FLAG_L3_HDR_TYPE_IP4 << CQE_FLAG_L3_HDR_TYPE_SHIFT | CQE_FLAG_L3_OK;
+
+ /* convert mask/match to big endian for subsequant comparison */
+ mask = clib_host_to_net_u16 (mask);
+ match = clib_host_to_net_u16 (match);
/* verify that all ip4 packets have l3_ok flag set and convert packet
length from network to host byte order */
int skip_ip4_cksum = 1;
+ int n_left = n_rx_packets;
+ u16 *cqe_flags = ptd->cqe_flags;
#if defined CLIB_HAVE_VEC256
- u16x16 mask16 = u16x16_splat (mask);
- u16x16 match16 = u16x16_splat (match);
- u16x16 r = { };
+ if (n_left >= 16)
+ {
+ u16x16 mask16 = u16x16_splat (mask);
+ u16x16 match16 = u16x16_splat (match);
+ u16x16 r16 = {};
+
+ while (n_left >= 16)
+ {
+ r16 |= (*(u16x16 *) cqe_flags & mask16) != match16;
- for (int i = 0; i * 16 < n_rx_packets; i++)
- r |= (ptd->cqe_flags16[i] & mask16) != match16;
+ *(u32x8 *) bc = u32x8_byte_swap (*(u32x8 *) bc);
+ *(u32x8 *) (bc + 8) = u32x8_byte_swap (*(u32x8 *) (bc + 8));
- if (!u16x16_is_all_zero (r))
- skip_ip4_cksum = 0;
+ cqe_flags += 16;
+ bc += 16;
+ n_left -= 16;
+ }
- for (int i = 0; i < n_rx_packets; i += 8)
- *(u32x8 *) (bc + i) = u32x8_byte_swap (*(u32x8 *) (bc + i));
+ if (!u16x16_is_all_zero (r16))
+ skip_ip4_cksum = 0;
+ }
#elif defined CLIB_HAVE_VEC128
- u16x8 mask8 = u16x8_splat (mask);
- u16x8 match8 = u16x8_splat (match);
- u16x8 r = { };
+ if (n_left >= 8)
+ {
+ u16x8 mask8 = u16x8_splat (mask);
+ u16x8 match8 = u16x8_splat (match);
+ u16x8 r8 = {};
- for (int i = 0; i * 8 < n_rx_packets; i++)
- r |= (ptd->cqe_flags8[i] & mask8) != match8;
+ while (n_left >= 8)
+ {
+ r8 |= (*(u16x8 *) cqe_flags & mask8) != match8;
- if (!u16x8_is_all_zero (r))
- skip_ip4_cksum = 0;
+ *(u32x4 *) bc = u32x4_byte_swap (*(u32x4 *) bc);
+ *(u32x4 *) (bc + 4) = u32x4_byte_swap (*(u32x4 *) (bc + 4));
- for (int i = 0; i < n_rx_packets; i += 4)
- *(u32x4 *) (bc + i) = u32x4_byte_swap (*(u32x4 *) (bc + i));
-#else
- for (int i = 0; i < n_rx_packets; i++)
- if ((ptd->cqe_flags[i] & mask) != match)
- skip_ip4_cksum = 0;
+ cqe_flags += 8;
+ bc += 8;
+ n_left -= 8;
+ }
- for (int i = 0; i < n_rx_packets; i++)
- bc[i] = clib_net_to_host_u32 (bc[i]);
+ if (!u16x8_is_all_zero (r8))
+ skip_ip4_cksum = 0;
+ }
#endif
+
+ while (n_left >= 1)
+ {
+ if ((cqe_flags[0] & mask) != match)
+ skip_ip4_cksum = 0;
+
+ bc[0] = clib_net_to_host_u32 (bc[0]);
+
+ cqe_flags += 1;
+ bc += 1;
+ n_left -= 1;
+ }
+
return skip_ip4_cksum;
}
@@ -945,7 +975,7 @@ rdma_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* update buffer template for input feature arcs if any */
next_index = rd->per_interface_next_index;
if (PREDICT_FALSE (vnet_device_input_have_features (rd->sw_if_index)))
- vnet_feature_start_device_input_x1 (rd->sw_if_index, &next_index, &bt);
+ vnet_feature_start_device_input (rd->sw_if_index, &next_index, &bt);
vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
@@ -1028,7 +1058,7 @@ VLIB_NODE_FN (rdma_input_node) (vlib_main_t * vm,
if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_ADMIN_UP) == 0)
continue;
- if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_ERROR))
+ if (PREDICT_FALSE (rd->flags & RDMA_DEVICE_F_ERROR))
continue;
if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_MLX5DV))
@@ -1041,7 +1071,6 @@ VLIB_NODE_FN (rdma_input_node) (vlib_main_t * vm,
return n_rx;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (rdma_input_node) = {
.name = "rdma-input",
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
@@ -1053,7 +1082,6 @@ VLIB_REGISTER_NODE (rdma_input_node) = {
.error_strings = rdma_input_error_strings,
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/rdma/output.c b/src/plugins/rdma/output.c
index 3cc3ab74437..8574ac32e61 100644
--- a/src/plugins/rdma/output.c
+++ b/src/plugins/rdma/output.c
@@ -480,20 +480,20 @@ rdma_device_output_tx_ibverb (vlib_main_t * vm,
* common tx/free functions
*/
-static_always_inline void
-rdma_device_output_free (vlib_main_t * vm, const vlib_node_runtime_t * node,
- rdma_txq_t * txq, int is_mlx5dv)
+static void
+rdma_device_output_free (vlib_main_t *vm, const vlib_node_runtime_t *node,
+ const rdma_device_t *rd, rdma_txq_t *txq)
{
- if (is_mlx5dv)
+ if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_MLX5DV))
rdma_device_output_free_mlx5 (vm, node, txq);
else
rdma_device_output_free_ibverb (vm, node, txq);
}
-static_always_inline u32
-rdma_device_output_tx_try (vlib_main_t * vm, const vlib_node_runtime_t * node,
- const rdma_device_t * rd, rdma_txq_t * txq,
- u32 n_left_from, u32 * bi, int is_mlx5dv)
+static u32
+rdma_device_output_tx_try (vlib_main_t *vm, const vlib_node_runtime_t *node,
+ const rdma_device_t *rd, rdma_txq_t *txq,
+ u32 n_left_from, u32 *bi)
{
vlib_buffer_t *b[VLIB_FRAME_SIZE];
const u32 mask = pow2_mask (txq->bufs_log2sz);
@@ -511,30 +511,28 @@ rdma_device_output_tx_try (vlib_main_t * vm, const vlib_node_runtime_t * node,
vlib_get_buffers (vm, bi, b, n_left_from);
- n_left_from = is_mlx5dv ?
- rdma_device_output_tx_mlx5 (vm, node, rd, txq, n_left_from, bi,
- b) : rdma_device_output_tx_ibverb (vm, node,
- rd, txq,
- n_left_from,
- bi, b);
+ if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_MLX5DV))
+ n_left_from =
+ rdma_device_output_tx_mlx5 (vm, node, rd, txq, n_left_from, bi, b);
+ else
+ n_left_from =
+ rdma_device_output_tx_ibverb (vm, node, rd, txq, n_left_from, bi, b);
return n_left_from;
}
-static_always_inline uword
+static uword
rdma_device_output_tx (vlib_main_t *vm, vlib_node_runtime_t *node,
rdma_device_t *rd, rdma_txq_t *txq, u32 *from,
- u32 n_left_from, int is_mlx5dv)
+ u32 n_left_from)
{
int i;
for (i = 0; i < RDMA_TX_RETRIES && n_left_from > 0; i++)
{
u32 n_enq;
- rdma_device_output_free (vm, node, txq, is_mlx5dv);
- n_enq = rdma_device_output_tx_try (vm, node, rd, txq, n_left_from, from,
- is_mlx5dv);
-
+ rdma_device_output_free (vm, node, rd, txq);
+ n_enq = rdma_device_output_tx_try (vm, node, rd, txq, n_left_from, from);
n_left_from -= n_enq;
from += n_enq;
}
@@ -560,12 +558,7 @@ VNET_DEVICE_CLASS_TX_FN (rdma_device_class) (vlib_main_t * vm,
clib_spinlock_lock_if_init (&txq->lock);
- if (PREDICT_TRUE (rd->flags & RDMA_DEVICE_F_MLX5DV))
- n_left = rdma_device_output_tx (vm, node, rd, txq, from, n_buffers,
- 1 /* is_mlx5dv */);
- else
- n_left = rdma_device_output_tx (vm, node, rd, txq, from, n_buffers,
- 0 /* is_mlx5dv */);
+ n_left = rdma_device_output_tx (vm, node, rd, txq, from, n_buffers);
clib_spinlock_unlock_if_init (&txq->lock);
diff --git a/src/plugins/rdma/plugin.c b/src/plugins/rdma/plugin.c
index b0dddee42b6..0d2cccc96f8 100644
--- a/src/plugins/rdma/plugin.c
+++ b/src/plugins/rdma/plugin.c
@@ -19,12 +19,10 @@
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "RDMA IBverbs Device Driver",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/rdma/rdma.api b/src/plugins/rdma/rdma.api
index f2c70c7e514..4c06d8c6658 100644
--- a/src/plugins/rdma/rdma.api
+++ b/src/plugins/rdma/rdma.api
@@ -98,6 +98,8 @@ enum rdma_rss6
};
/** \brief
+ Same as v4, just not an autoendian (expect buggy handling of flag values).
+
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param host_if - Linux netdev interface name
@@ -114,6 +116,9 @@ enum rdma_rss6
define rdma_create_v3
{
+ option deprecated;
+ option replaced_by="rdma_create_v4";
+
u32 client_index;
u32 context;
@@ -130,6 +135,38 @@ define rdma_create_v3
option vat_help = "<host-if ifname> [name <name>] [rx-queue-size <size>] [tx-queue-size <size>] [num-rx-queues <size>] [mode <auto|ibv|dv>] [no-multi-seg] [max-pktlen <size>] [rss <ipv4|ipv4-udp|ipv4-tcp>] [rss <ipv6|ipv6-udp|ipv6-tcp>]";
};
+/** \brief
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param host_if - Linux netdev interface name
+ @param name - new rdma interface name
+ @param rxq_num - number of receive queues (optional)
+ @param rxq_size - receive queue size (optional)
+ @param txq_size - transmit queue size (optional)
+ @param mode - operation mode (optional)
+ @param no_multi_seg (optional) - disable chained buffer RX
+ @param max_pktlen (optional) - maximal RX packet size.
+ @param rss4 (optional) - IPv4 RSS
+ @param rss6 (optional) - IPv6 RSS
+*/
+
+autoendian define rdma_create_v4
+{
+ u32 client_index;
+ u32 context;
+
+ string host_if[64];
+ string name[64];
+ u16 rxq_num [default=1];
+ u16 rxq_size [default=1024];
+ u16 txq_size [default=1024];
+ vl_api_rdma_mode_t mode [default=0];
+ bool no_multi_seg [default=0];
+ u16 max_pktlen [default=0];
+ vl_api_rdma_rss4_t rss4 [default=0];
+ vl_api_rdma_rss6_t rss6 [default=0];
+ option vat_help = "<host-if ifname> [name <name>] [rx-queue-size <size>] [tx-queue-size <size>] [num-rx-queues <size>] [mode <auto|ibv|dv>] [no-multi-seg] [max-pktlen <size>] [rss <ipv4|ipv4-udp|ipv4-tcp>] [rss <ipv6|ipv6-udp|ipv6-tcp>]";
+};
/** \brief
@param context - sender context, to match reply w/ request
@@ -139,6 +176,8 @@ define rdma_create_v3
define rdma_create_reply
{
+ option deprecated;
+
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
@@ -152,6 +191,8 @@ define rdma_create_reply
define rdma_create_v2_reply
{
+ option deprecated;
+
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
@@ -176,6 +217,19 @@ define rdma_create_v3_reply
@param sw_if_index - interface index
*/
+autoendian define rdma_create_v4_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+/** \brief
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface index
+*/
+
autoreply define rdma_delete
{
u32 client_index;
diff --git a/src/plugins/rdma/rdma_doc.md b/src/plugins/rdma/rdma_doc.md
deleted file mode 100644
index 3fed5b6fc49..00000000000
--- a/src/plugins/rdma/rdma_doc.md
+++ /dev/null
@@ -1,75 +0,0 @@
-# RDMA (ibverb) Ethernet driver {#rdma_doc}
-
-This driver relies on Linux rdma-core (libibverb) userspace poll-mode driver
-to rx/tx Ethernet packets. Despite using the RDMA APIs, this is **not** about
-RDMA (no Infiniband, no RoCE, no iWARP), just pure traditional Ethernet
-packets.
-
-## Maturity level
-Under development: it should work, but has not been thoroughly tested.
-
-## Supported Hardware
- - Mellanox ConnectX-4
- - Mellanox ConnectX-5
-
-## Features
- - bifurcation: MAC based flow steering for transparent sharing of a single
-physical port between multiple virtual interfaces including Linux netdev
- - multiqueue
-
-## Security considerations
-When creating a rdma interface, it will receive all packets to the MAC address
-attributed to the interface plus a copy of all broadcast and multicast
-traffic.
-The MAC address is under the control of VPP: **the user controlling VPP can
-divert all traffic of any MAC address to the VPP process, including the Linux
-netdev MAC address as long as it can create a rdma interface**.
-The rights to create a rdma interface are controlled by the access rights of
-the `/dev/infiniband/uverbs[0-9]+`device nodes.
-
-## Quickstart
-1. Make sure the `ib_uverbs` module is loaded:
-```
-~# modprobe ib_uverbs
-```
-2. In VPP, create a new rdma virtual interface tied to the Linux netdev of the
-physical port you want to use (`enp94s0f0` in this example):
-```
-vpp# create int rdma host-if enp94s0f0 name rdma-0
-```
-3. Use the interface as usual, eg.:
-```
-vpp# set int ip addr rdma-0 1.1.1.1/24
-vpp# set int st rdma-0 up
-vpp# ping 1.1.1.100`
-```
-
-## Containers support
-It should work in containers as long as:
- - the `ib_uverbs` module is loaded
- - the device nodes `/dev/infiniband/uverbs[0-9]+` are usable from the
- container (but see [security considerations](#Security considerations))
-
-## SR-IOV VFs support
-It should work on SR-IOV VFs the same way it does with PFs. Because of VFs
-security containment features, make sure the MAC address of the rdma VPP
-interface matches the MAC address assigned to the underlying VF.
-For example:
-```
-host# echo 1 > /sys/class/infiniband/mlx5_0/device/sriov_numvfs
-host# ip l set dev enp94s0f0 vf 0 mac 92:5d:f5:df:b1:6f spoof on trust off
-host# ip l set dev enp94s0f2 up
-vpp# create int rdma host-if enp94s0f2 name rdma-0
-vpp# set int mac address rdma-0 92:5d:f5:df:b1:6f
-```
-If you plan to use L2 features such as switching, make sure the underlying
-VF is configured in trusted mode and spoof-checking is disabled (of course, be
-aware of the [security considerations](#Security considerations)):
-```
-host# ip l set dev enp94s0f0 vf 0 spoof off trust on
-```
-
-## Direct Verb mode
-Direct Verb allows the driver to access the NIC HW RX/TX rings directly
-instead of having to go through libibverb and suffering associated overhead.
-It will be automatically selected if the adapter supports it.
diff --git a/src/plugins/rdma/rdma_doc.rst b/src/plugins/rdma/rdma_doc.rst
new file mode 100644
index 00000000000..c22ea550a75
--- /dev/null
+++ b/src/plugins/rdma/rdma_doc.rst
@@ -0,0 +1,102 @@
+RDMA (ibverb) device driver
+===========================
+
+This driver relies on Linux rdma-core (libibverb) userspace poll-mode
+driver to rx/tx Ethernet packets. Despite using the RDMA APIs, this is
+**not** about RDMA (no Infiniband, no RoCE, no iWARP), just pure
+traditional Ethernet packets.
+
+Maturity level
+--------------
+
+Under development: it should work, but has not been thoroughly tested.
+
+Supported Hardware
+------------------
+
+- Mellanox ConnectX-4
+- Mellanox ConnectX-5
+
+Features
+--------
+
+- bifurcation: MAC based flow steering for transparent sharing of a
+ single physical port between multiple virtual interfaces including
+ Linux netdev
+- multiqueue
+
+Security considerations
+-----------------------
+
+When creating a rdma interface, it will receive all packets to the MAC
+address attributed to the interface plus a copy of all broadcast and
+multicast traffic. The MAC address is under the control of VPP: **the
+user controlling VPP can divert all traffic of any MAC address to the
+VPP process, including the Linux netdev MAC address as long as it can
+create a rdma interface**. The rights to create a rdma interface are
+controlled by the access rights of the
+``/dev/infiniband/uverbs[0-9]+``\ device nodes.
+
+Quickstart
+----------
+
+1. Make sure the ``ib_uverbs`` module is loaded:
+
+::
+
+ ~# modprobe ib_uverbs
+
+2. In VPP, create a new rdma virtual interface tied to the Linux netdev
+ of the physical port you want to use (``enp94s0f0`` in this example):
+
+::
+
+ vpp# create int rdma host-if enp94s0f0 name rdma-0
+
+3. Use the interface as usual, e.g.:
+
+::
+
+ vpp# set int ip addr rdma-0 1.1.1.1/24
+ vpp# set int st rdma-0 up
+ vpp# ping 1.1.1.100`
+
+Containers support
+------------------
+
+It should work in containers as long as: - the ``ib_uverbs`` module is
+loaded - the device nodes ``/dev/infiniband/uverbs[0-9]+`` are usable
+from the container (but see `security
+considerations <#Security%20considerations>`__)
+
+SR-IOV VFs support
+------------------
+
+It should work on SR-IOV VFs the same way it does with PFs. Because of
+VFs security containment features, make sure the MAC address of the rdma
+VPP interface matches the MAC address assigned to the underlying VF. For
+example:
+
+::
+
+ host# echo 1 > /sys/class/infiniband/mlx5_0/device/sriov_numvfs
+ host# ip l set dev enp94s0f0 vf 0 mac 92:5d:f5:df:b1:6f spoof on trust off
+ host# ip l set dev enp94s0f2 up
+ vpp# create int rdma host-if enp94s0f2 name rdma-0
+ vpp# set int mac address rdma-0 92:5d:f5:df:b1:6f
+
+If you plan to use L2 features such as switching, make sure the
+underlying VF is configured in trusted mode and spoof-checking is
+disabled (of course, be aware of the `security
+considerations <#Security%20considerations>`__):
+
+::
+
+ host# ip l set dev enp94s0f0 vf 0 spoof off trust on
+
+Direct Verb mode
+----------------
+
+Direct Verb allows the driver to access the NIC HW RX/TX rings directly
+instead of having to go through libibverb and suffering associated
+overhead. It will be automatically selected if the adapter supports it.
diff --git a/src/plugins/rdma/rdma_mlx5dv.h b/src/plugins/rdma/rdma_mlx5dv.h
index efcefe7fbf7..bf01a3a37d6 100644
--- a/src/plugins/rdma/rdma_mlx5dv.h
+++ b/src/plugins/rdma/rdma_mlx5dv.h
@@ -24,16 +24,16 @@
#include <vppinfra/types.h>
#include <vppinfra/error.h>
/* CQE flags - bits 16-31 of qword at offset 0x1c */
-#define CQE_FLAG_L4_OK 10
-#define CQE_FLAG_L3_OK 9
-#define CQE_FLAG_L2_OK 8
-#define CQE_FLAG_IP_FRAG 7
+#define CQE_FLAG_L4_OK (1 << 10)
+#define CQE_FLAG_L3_OK (1 << 9)
+#define CQE_FLAG_L2_OK (1 << 8)
+#define CQE_FLAG_IP_FRAG (1 << 7)
#define CQE_FLAG_L4_HDR_TYPE(f) (((f) >> 4) & 7)
#define CQE_FLAG_L3_HDR_TYPE_SHIFT (2)
#define CQE_FLAG_L3_HDR_TYPE_MASK (3 << CQE_FLAG_L3_HDR_TYPE_SHIFT)
#define CQE_FLAG_L3_HDR_TYPE(f) (((f) & CQE_FLAG_L3_HDR_TYPE_MASK) >> CQE_FLAG_L3_HDR_TYPE_SHIFT)
-#define CQE_FLAG_L3_HDR_TYPE_IP4 1
-#define CQE_FLAG_L3_HDR_TYPE_IP6 2
+#define CQE_FLAG_L3_HDR_TYPE_IP4 2
+#define CQE_FLAG_L3_HDR_TYPE_IP6 1
#define CQE_FLAG_IP_EXT_OPTS 1
/* CQE byte count (Striding RQ) */
diff --git a/src/plugins/rdma/test_api.c b/src/plugins/rdma/test_api.c
index e9d5fcaad98..4ec4d3bf345 100644
--- a/src/plugins/rdma/test_api.c
+++ b/src/plugins/rdma/test_api.c
@@ -189,6 +189,41 @@ api_rdma_create_v3 (vat_main_t *vam)
return ret;
}
+static int
+api_rdma_create_v4 (vat_main_t *vam)
+{
+ vl_api_rdma_create_v4_t *mp;
+ rdma_create_if_args_t args;
+ int ret;
+
+ if (!unformat_user (vam->input, unformat_rdma_create_if_args, &args))
+ {
+ clib_warning ("unknown input `%U'", format_unformat_error, vam->input);
+ return -99;
+ }
+
+ M (RDMA_CREATE_V4, mp);
+
+ snprintf ((char *) mp->host_if, sizeof (mp->host_if), "%s", args.ifname);
+ if (args.name)
+ snprintf ((char *) mp->name, sizeof (mp->name), "%s", args.name);
+ else
+ mp->name[0] = 0;
+ mp->rxq_num = args.rxq_num;
+ mp->rxq_size = args.rxq_size;
+ mp->txq_size = args.txq_size;
+ mp->mode = api_rdma_mode (args.mode);
+ mp->no_multi_seg = args.no_multi_seg;
+ mp->max_pktlen = args.max_pktlen;
+ mp->rss4 = api_rdma_rss4 (args.rss4);
+ mp->rss6 = api_rdma_rss6 (args.rss6);
+
+ S (mp);
+ W (ret);
+
+ return ret;
+}
+
/* rdma-create reply handler */
static void
vl_api_rdma_create_reply_t_handler (vl_api_rdma_create_reply_t * mp)
@@ -243,6 +278,24 @@ vl_api_rdma_create_v3_reply_t_handler (vl_api_rdma_create_v3_reply_t *mp)
vam->regenerate_interface_table = 1;
}
+/* rdma-create reply handler v4 */
+static void
+vl_api_rdma_create_v4_reply_t_handler (vl_api_rdma_create_v4_reply_t *mp)
+{
+ vat_main_t *vam = rdma_test_main.vat_main;
+ i32 retval = mp->retval;
+
+ if (retval == 0)
+ {
+ fformat (vam->ofp, "created rdma with sw_if_index %d\n",
+ ntohl (mp->sw_if_index));
+ }
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+ vam->regenerate_interface_table = 1;
+}
+
/* rdma delete API */
static int
api_rdma_delete (vat_main_t * vam)
diff --git a/src/plugins/snort/cli.c b/src/plugins/snort/cli.c
index cbb33c7abe8..08740f41b37 100644
--- a/src/plugins/snort/cli.c
+++ b/src/plugins/snort/cli.c
@@ -85,6 +85,7 @@ snort_attach_command_fn (vlib_main_t *vm, unformat_input_t *input,
clib_error_t *err = 0;
u8 *name = 0;
u32 sw_if_index = ~0;
+ snort_attach_dir_t dir = SNORT_INOUT;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -97,6 +98,12 @@ snort_attach_command_fn (vlib_main_t *vm, unformat_input_t *input,
;
else if (unformat (line_input, "instance %s", &name))
;
+ else if (unformat (line_input, "input"))
+ dir = SNORT_INPUT;
+ else if (unformat (line_input, "output"))
+ dir = SNORT_OUTPUT;
+ else if (unformat (line_input, "inout"))
+ dir = SNORT_INOUT;
else
{
err = clib_error_return (0, "unknown input `%U'",
@@ -117,7 +124,8 @@ snort_attach_command_fn (vlib_main_t *vm, unformat_input_t *input,
goto done;
}
- err = snort_interface_enable_disable (vm, (char *) name, sw_if_index, 1);
+ err =
+ snort_interface_enable_disable (vm, (char *) name, sw_if_index, 1, dir);
done:
vec_free (name);
@@ -127,7 +135,8 @@ done:
VLIB_CLI_COMMAND (snort_attach_command, static) = {
.path = "snort attach",
- .short_help = "snort attach instance <name> interface <if-name>",
+ .short_help = "snort attach instance <name> interface <if-name> "
+ "[input|ouput|inout]",
.function = snort_attach_command_fn,
};
@@ -163,7 +172,7 @@ snort_detach_command_fn (vlib_main_t *vm, unformat_input_t *input,
goto done;
}
- err = snort_interface_enable_disable (vm, 0, sw_if_index, 0);
+ err = snort_interface_enable_disable (vm, 0, sw_if_index, 0, SNORT_INOUT);
done:
unformat_free (line_input);
diff --git a/src/plugins/snort/daq_vpp.c b/src/plugins/snort/daq_vpp.c
index 090b28af6f4..386092a0382 100644
--- a/src/plugins/snort/daq_vpp.c
+++ b/src/plugins/snort/daq_vpp.c
@@ -113,6 +113,7 @@ typedef struct _vpp_context
daq_vpp_input_mode_t input_mode;
const char *socket_name;
+ volatile bool interrupted;
} VPP_Context_t;
static VPP_Context_t *global_vpp_ctx = 0;
@@ -480,6 +481,16 @@ vpp_daq_start (void *handle)
}
static int
+vpp_daq_interrupt (void *handle)
+{
+ VPP_Context_t *vc = (VPP_Context_t *) handle;
+
+ vc->interrupted = true;
+
+ return DAQ_SUCCESS;
+}
+
+static int
vpp_daq_get_stats (void *handle, DAQ_Stats_t *stats)
{
memset (stats, 0, sizeof (DAQ_Stats_t));
@@ -532,6 +543,7 @@ vpp_daq_msg_receive_one (VPP_Context_t *vc, VPPQueuePair *qp,
dd->pkthdr.pktlen = d->length;
dd->pkthdr.address_space_id = d->address_space_id;
dd->msg.data = vc->bpools[d->buffer_pool].base + d->offset;
+ dd->msg.data_len = d->length;
next = next + 1;
msgs[0] = &dd->msg;
@@ -550,7 +562,16 @@ vpp_daq_msg_receive (void *handle, const unsigned max_recv,
{
VPP_Context_t *vc = (VPP_Context_t *) handle;
uint32_t n_qpairs_left = vc->num_qpairs;
- uint32_t n, n_events, n_recv = 0;
+ uint32_t n, n_recv = 0;
+ int32_t n_events;
+
+ /* If the receive has been interrupted, break out of loop and return. */
+ if (vc->interrupted)
+ {
+ vc->interrupted = false;
+ *rstat = DAQ_RSTAT_INTERRUPTED;
+ return 0;
+ }
/* first, we visit all qpairs. If we find any work there then we can give
* it back immediatelly. To avoid bias towards qpair 0 we remeber what
@@ -586,9 +607,14 @@ vpp_daq_msg_receive (void *handle, const unsigned max_recv,
n_events = epoll_wait (vc->epoll_fd, vc->epoll_events, vc->num_qpairs, 1000);
- if (n_events < 1)
+ if (n_events == 0)
{
- *rstat = n_events == -1 ? DAQ_RSTAT_ERROR : DAQ_RSTAT_TIMEOUT;
+ *rstat = DAQ_RSTAT_TIMEOUT;
+ return 0;
+ }
+ if (n_events < 0)
+ {
+ *rstat = errno == EINTR ? DAQ_RSTAT_TIMEOUT : DAQ_RSTAT_ERROR;
return 0;
}
@@ -602,8 +628,7 @@ vpp_daq_msg_receive (void *handle, const unsigned max_recv,
msgs += n;
n_recv += n;
}
-
- (void) read (qp->enq_fd, &ctr, sizeof (ctr));
+ ssize_t __clib_unused size = read (qp->enq_fd, &ctr, sizeof (ctr));
}
*rstat = DAQ_RSTAT_OK;
@@ -676,7 +701,7 @@ const DAQ_ModuleAPI_t DAQ_MODULE_DATA = {
/* .start = */ vpp_daq_start,
/* .inject = */ NULL,
/* .inject_relative = */ NULL,
- /* .interrupt = */ NULL,
+ /* .interrupt = */ vpp_daq_interrupt,
/* .stop = */ NULL,
/* .ioctl = */ NULL,
/* .get_stats = */ vpp_daq_get_stats,
diff --git a/src/plugins/snort/daq_vpp.h b/src/plugins/snort/daq_vpp.h
index 3b875aa15ad..ebec55435f3 100644
--- a/src/plugins/snort/daq_vpp.h
+++ b/src/plugins/snort/daq_vpp.h
@@ -67,7 +67,7 @@ typedef enum
typedef struct
{
- uint32_t offset;
+ uint64_t offset;
uint16_t length;
uint16_t address_space_id;
uint8_t buffer_pool;
diff --git a/src/plugins/snort/dequeue.c b/src/plugins/snort/dequeue.c
index d597b88f7a8..31745de404c 100644
--- a/src/plugins/snort/dequeue.c
+++ b/src/plugins/snort/dequeue.c
@@ -187,9 +187,9 @@ snort_deq_node_interrupt (vlib_main_t *vm, vlib_node_runtime_t *node,
snort_instance_t *si;
int inst = -1;
- while ((inst = clib_interrupt_get_next (ptd->interrupts, inst)) != -1)
+ while ((inst = clib_interrupt_get_next_and_clear (ptd->interrupts, inst)) !=
+ -1)
{
- clib_interrupt_clear (ptd->interrupts, inst);
si = vec_elt_at_index (sm->instances, inst);
qp = vec_elt_at_index (si->qpairs, vm->thread_index);
u32 ready = __atomic_load_n (&qp->ready, __ATOMIC_ACQUIRE);
diff --git a/src/plugins/snort/enqueue.c b/src/plugins/snort/enqueue.c
index 3f44e8013fd..409c0e49078 100644
--- a/src/plugins/snort/enqueue.c
+++ b/src/plugins/snort/enqueue.c
@@ -75,9 +75,16 @@ snort_enq_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
while (n_left)
{
+ u64 fa_data;
u32 instance_index, next_index, n;
- instance_index =
- *(u32 *) vnet_feature_next_with_data (&next_index, b[0], sizeof (u32));
+ u32 l3_offset;
+
+ fa_data =
+ *(u64 *) vnet_feature_next_with_data (&next_index, b[0], sizeof (u64));
+
+ instance_index = (u32) (fa_data & 0xffffffff);
+ l3_offset =
+ (fa_data >> 32) ? vnet_buffer (b[0])->ip.save_rewrite_length : 0;
si = vec_elt_at_index (sm->instances, instance_index);
/* if client isn't connected skip enqueue and take default action */
@@ -108,7 +115,7 @@ snort_enq_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
/* fill descriptor */
d->buffer_pool = b[0]->buffer_pool_index;
d->length = b[0]->current_length;
- d->offset = (u8 *) b[0]->data + b[0]->current_data -
+ d->offset = (u8 *) b[0]->data + b[0]->current_data + l3_offset -
sm->buffer_pool_base_addrs[d->buffer_pool];
d->address_space_id = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
}
@@ -190,7 +197,7 @@ snort_enq_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
}
__atomic_store_n (qp->enq_head, head, __ATOMIC_RELEASE);
- _vec_len (qp->freelist) = freelist_len;
+ vec_set_len (qp->freelist, freelist_len);
if (sm->input_mode == VLIB_NODE_STATE_INTERRUPT)
{
if (write (qp->enq_fd, &ctr, sizeof (ctr)) < 0)
diff --git a/src/plugins/snort/main.c b/src/plugins/snort/main.c
index 37b517215bc..2430fcdc5c2 100644
--- a/src/plugins/snort/main.c
+++ b/src/plugins/snort/main.c
@@ -13,7 +13,6 @@ snort_main_t snort_main;
VLIB_REGISTER_LOG_CLASS (snort_log, static) = {
.class_name = "snort",
- .default_syslog_level = VLIB_LOG_LEVEL_DEBUG,
};
#define log_debug(fmt, ...) vlib_log_debug (snort_log.class, fmt, __VA_ARGS__)
@@ -196,9 +195,18 @@ snort_deq_ready (clib_file_t *uf)
snort_per_thread_data_t *ptd =
vec_elt_at_index (sm->per_thread_data, vm->thread_index);
u64 counter;
+ ssize_t bytes_read;
- if (read (uf->file_descriptor, &counter, sizeof (counter)) < 0)
- return clib_error_return (0, "client closed socket");
+ bytes_read = read (uf->file_descriptor, &counter, sizeof (counter));
+ if (bytes_read < 0)
+ {
+ return clib_error_return (0, "client closed socket");
+ }
+
+ if (bytes_read < sizeof (counter))
+ {
+ return clib_error_return (0, "unexpected truncated read");
+ }
clib_interrupt_set (ptd->interrupts, uf->private_data);
vlib_node_set_interrupt_pending (vm, snort_deq_node.index);
@@ -251,8 +259,10 @@ snort_listener_init (vlib_main_t *vm)
s = clib_mem_alloc (sizeof (clib_socket_t));
clib_memset (s, 0, sizeof (clib_socket_t));
s->config = (char *) sm->socket_name;
- s->flags = CLIB_SOCKET_F_IS_SERVER | CLIB_SOCKET_F_ALLOW_GROUP_WRITE |
- CLIB_SOCKET_F_SEQPACKET | CLIB_SOCKET_F_PASSCRED;
+ s->is_server = 1;
+ s->allow_group_write = 1;
+ s->is_seqpacket = 1;
+ s->passcred = 1;
if ((err = clib_socket_init (s)))
{
@@ -299,8 +309,8 @@ snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz,
/* enq and deq head pointer */
qpair_mem_sz += 2 * round_pow2 (sizeof (u32), align);
- size =
- round_pow2 (tm->n_vlib_mains * qpair_mem_sz, clib_mem_get_page_size ());
+ size = round_pow2 ((uword) tm->n_vlib_mains * qpair_mem_sz,
+ clib_mem_get_page_size ());
fd = clib_mem_vm_create_fd (CLIB_MEM_PAGE_SZ_DEFAULT, "snort instance %s",
name);
@@ -386,7 +396,7 @@ snort_instance_create (vlib_main_t *vm, char *name, u8 log2_queue_sz,
for (i = 0; i < vlib_get_n_threads (); i++)
vlib_node_set_state (vlib_get_main_by_index (i), snort_deq_node.index,
- VLIB_NODE_STATE_INTERRUPT);
+ sm->input_mode);
done:
if (err)
@@ -401,12 +411,14 @@ done:
clib_error_t *
snort_interface_enable_disable (vlib_main_t *vm, char *instance_name,
- u32 sw_if_index, int is_enable)
+ u32 sw_if_index, int is_enable,
+ snort_attach_dir_t snort_dir)
{
snort_main_t *sm = &snort_main;
vnet_main_t *vnm = vnet_get_main ();
snort_instance_t *si;
clib_error_t *err = 0;
+ u64 fa_data;
u32 index;
if (is_enable)
@@ -432,8 +444,18 @@ snort_interface_enable_disable (vlib_main_t *vm, char *instance_name,
}
index = sm->instance_by_sw_if_index[sw_if_index] = si->index;
- vnet_feature_enable_disable ("ip4-unicast", "snort-enq", sw_if_index, 1,
- &index, sizeof (index));
+ if (snort_dir & SNORT_INPUT)
+ {
+ fa_data = (u64) index;
+ vnet_feature_enable_disable ("ip4-unicast", "snort-enq", sw_if_index,
+ 1, &fa_data, sizeof (fa_data));
+ }
+ if (snort_dir & SNORT_OUTPUT)
+ {
+ fa_data = (1LL << 32 | index);
+ vnet_feature_enable_disable ("ip4-output", "snort-enq", sw_if_index,
+ 1, &fa_data, sizeof (fa_data));
+ }
}
else
{
@@ -451,8 +473,18 @@ snort_interface_enable_disable (vlib_main_t *vm, char *instance_name,
si = vec_elt_at_index (sm->instances, index);
sm->instance_by_sw_if_index[sw_if_index] = ~0;
- vnet_feature_enable_disable ("ip4-unicast", "snort-enq", sw_if_index, 0,
- &index, sizeof (index));
+ if (snort_dir & SNORT_INPUT)
+ {
+ fa_data = (u64) index;
+ vnet_feature_enable_disable ("ip4-unicast", "snort-enq", sw_if_index,
+ 0, &fa_data, sizeof (fa_data));
+ }
+ if (snort_dir & SNORT_OUTPUT)
+ {
+ fa_data = (1LL << 32 | index);
+ vnet_feature_enable_disable ("ip4-output", "snort-enq", sw_if_index,
+ 0, &fa_data, sizeof (fa_data));
+ }
}
done:
@@ -490,6 +522,7 @@ static clib_error_t *
snort_init (vlib_main_t *vm)
{
snort_main_t *sm = &snort_main;
+ sm->input_mode = VLIB_NODE_STATE_INTERRUPT;
sm->instance_by_name = hash_create_string (0, sizeof (uword));
vlib_buffer_pool_t *bp;
@@ -518,3 +551,9 @@ VNET_FEATURE_INIT (snort_enq, static) = {
.node_name = "snort-enq",
.runs_before = VNET_FEATURES ("ip4-lookup"),
};
+
+VNET_FEATURE_INIT (snort_enq_out, static) = {
+ .arc_name = "ip4-output",
+ .node_name = "snort-enq",
+ .runs_before = VNET_FEATURES ("interface-output"),
+};
diff --git a/src/plugins/snort/snort.h b/src/plugins/snort/snort.h
index d069fa01661..79299aa6d91 100644
--- a/src/plugins/snort/snort.h
+++ b/src/plugins/snort/snort.h
@@ -90,6 +90,13 @@ typedef enum
SNORT_ENQ_N_NEXT_NODES,
} snort_enq_next_t;
+typedef enum
+{
+ SNORT_INPUT = 1,
+ SNORT_OUTPUT = 2,
+ SNORT_INOUT = 3
+} snort_attach_dir_t;
+
#define SNORT_ENQ_NEXT_NODES \
{ \
[SNORT_ENQ_NEXT_DROP] = "error-drop", \
@@ -100,7 +107,8 @@ clib_error_t *snort_instance_create (vlib_main_t *vm, char *name,
u8 log2_queue_sz, u8 drop_on_disconnect);
clib_error_t *snort_interface_enable_disable (vlib_main_t *vm,
char *instance_name,
- u32 sw_if_index, int is_enable);
+ u32 sw_if_index, int is_enable,
+ snort_attach_dir_t dir);
clib_error_t *snort_set_node_mode (vlib_main_t *vm, u32 mode);
always_inline void
diff --git a/src/plugins/srtp/srtp.c b/src/plugins/srtp/srtp.c
index 58a35c31606..bb54e672918 100644
--- a/src/plugins/srtp/srtp.c
+++ b/src/plugins/srtp/srtp.c
@@ -26,7 +26,9 @@ static inline u32
srtp_ctx_alloc_w_thread (u32 thread_index)
{
srtp_tc_t *ctx;
- pool_get_zero (srtp_main.ctx_pool[thread_index], ctx);
+ pool_get_aligned_safe (srtp_main.ctx_pool[thread_index], ctx,
+ CLIB_CACHE_LINE_BYTES);
+ clib_memset (ctx, 0, sizeof (*ctx));
ctx->c_thread_index = thread_index;
ctx->srtp_ctx_handle = ctx - srtp_main.ctx_pool[thread_index];
ctx->app_session_handle = SESSION_INVALID_HANDLE;
@@ -50,6 +52,7 @@ srtp_init_policy (srtp_tc_t *ctx, transport_endpt_cfg_srtp_t *cfg)
{
sp = &ctx->srtp_policy[i];
sp_cfg = &cfg->policies[i];
+ clib_memset (sp, 0, sizeof (*sp));
srtp_crypto_policy_set_rtp_default (&sp->rtp);
srtp_crypto_policy_set_rtcp_default (&sp->rtcp);
@@ -57,7 +60,6 @@ srtp_init_policy (srtp_tc_t *ctx, transport_endpt_cfg_srtp_t *cfg)
sp->ssrc.value = sp_cfg->ssrc_value;
sp->key = clib_mem_alloc (sp_cfg->key_len);
clib_memcpy (sp->key, sp_cfg->key, sp_cfg->key_len);
- sp->ekt = 0;
sp->next = i < 1 ? &ctx->srtp_policy[i + 1] : 0;
sp->window_size = sp_cfg->window_size;
sp->allow_repeat_tx = sp_cfg->allow_repeat_tx;
@@ -84,7 +86,8 @@ srtp_ctx_attach (u32 thread_index, void *ctx_ptr)
{
srtp_tc_t *ctx;
- pool_get (srtp_main.ctx_pool[thread_index], ctx);
+ pool_get_aligned_safe (srtp_main.ctx_pool[thread_index], ctx,
+ CLIB_CACHE_LINE_BYTES);
clib_memcpy (ctx, ctx_ptr, sizeof (*ctx));
ctx->c_thread_index = thread_index;
@@ -151,6 +154,7 @@ srtp_ctx_init_client (srtp_tc_t *ctx)
app_session = session_get (ctx->c_s_index, ctx->c_thread_index);
app_session->app_wrk_index = ctx->parent_app_wrk_index;
app_session->connection_index = ctx->srtp_ctx_handle;
+ app_session->opaque = ctx->parent_app_api_context;
app_session->session_type =
session_type_from_proto_and_ip (TRANSPORT_PROTO_SRTP, ctx->udp_is_ip4);
@@ -227,7 +231,7 @@ srtp_ctx_write (srtp_tc_t *ctx, session_t *app_session,
{
u32 n_wrote = 0, to_deq, dgram_sz;
session_dgram_pre_hdr_t hdr;
- app_session_transport_t at;
+ app_session_transport_t at = {};
svm_msg_q_t *mq;
session_t *us;
u8 buf[2000];
@@ -238,12 +242,13 @@ srtp_ctx_write (srtp_tc_t *ctx, session_t *app_session,
us = session_get_from_handle (ctx->srtp_session_handle);
to_deq = svm_fifo_max_dequeue_cons (app_session->tx_fifo);
mq = session_main_get_vpp_event_queue (us->thread_index);
+ sp->bytes_dequeued = to_deq;
while (to_deq > 0)
{
/* Peeking only pre-header dgram because the session is connected */
rv = svm_fifo_peek (app_session->tx_fifo, 0, sizeof (hdr), (u8 *) &hdr);
- ASSERT (rv == sizeof (hdr) && hdr.data_length < vec_len (buf));
+ ASSERT (rv == sizeof (hdr) && hdr.data_length < 2000);
ASSERT (to_deq >= hdr.data_length + SESSION_CONN_HDR_LEN);
dgram_sz = hdr.data_length + SESSION_CONN_HDR_LEN;
@@ -296,14 +301,16 @@ done:
session_transport_closed_notify (&ctx->connection);
}
+ ASSERT (sp->bytes_dequeued >= to_deq);
+ sp->bytes_dequeued -= to_deq;
+
return n_wrote > 0 ? clib_max (n_wrote / TRANSPORT_PACER_MIN_MSS, 1) : 0;
}
int
srtp_add_vpp_q_builtin_rx_evt (session_t *s)
{
- if (svm_fifo_set_event (s->rx_fifo))
- session_send_io_evt_to_thread (s->rx_fifo, SESSION_IO_EVT_BUILTIN_RX);
+ session_enqueue_notify (s);
return 0;
}
@@ -313,7 +320,7 @@ srtp_notify_app_enqueue (srtp_tc_t *ctx, session_t *app_session)
app_worker_t *app_wrk;
app_wrk = app_worker_get_if_valid (app_session->app_wrk_index);
if (PREDICT_TRUE (app_wrk != 0))
- app_worker_lock_and_send_event (app_wrk, app_session, SESSION_IO_EVT_RX);
+ app_worker_rx_notify (app_wrk, app_session);
}
static inline int
@@ -649,6 +656,7 @@ srtp_connect (transport_endpoint_cfg_t *tep)
ctx->parent_app_api_context = sep->opaque;
ctx->udp_is_ip4 = sep->is_ip4;
ctx->srtp_ctx_handle = ctx_index;
+ ctx->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
srtp_init_policy (ctx, (transport_endpt_cfg_srtp_t *) sep->ext_cfg->data);
@@ -702,7 +710,7 @@ srtp_disconnect (u32 ctx_handle, u32 thread_index)
}
static u32
-srtp_start_listen (u32 app_listener_index, transport_endpoint_t *tep)
+srtp_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep)
{
vnet_listen_args_t _bargs, *args = &_bargs;
session_handle_t udp_al_handle;
@@ -745,6 +753,8 @@ srtp_start_listen (u32 app_listener_index, transport_endpoint_t *tep)
lctx->srtp_session_handle = udp_al_handle;
lctx->app_session_handle = listen_session_get_handle (app_listener);
lctx->udp_is_ip4 = sep->is_ip4;
+ lctx->c_s_index = app_listener_index;
+ lctx->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
srtp_init_policy (lctx, (transport_endpt_cfg_srtp_t *) sep->ext_cfg->data);
@@ -812,7 +822,6 @@ srtp_custom_tx_callback (void *session, transport_send_params_t *sp)
SESSION_STATE_TRANSPORT_CLOSED))
return 0;
- sp->flags = 0;
ctx = srtp_ctx_get_w_thread (app_session->connection_index,
app_session->thread_index);
if (PREDICT_FALSE (ctx->is_migrated))
diff --git a/src/plugins/srtp/srtp_plugin.md b/src/plugins/srtp/srtp_plugin.md
deleted file mode 100644
index 81185864dbe..00000000000
--- a/src/plugins/srtp/srtp_plugin.md
+++ /dev/null
@@ -1,72 +0,0 @@
-# SRTP (Secure Real-time Transport Protocol) {#srtp_doc}
-
-libsrtp2 based SRTP transport protocol implementation.
-
-## Maturity level
-Experimental
-
-## Quickstart
-
-1. Install libsrtp2-dev. On debian based OS:
-
-```
-sudo apt get install libsrtp2-dev
-```
-
-2. Build vpp
-
-```
-make build
-```
-
-3. Test protocol using vcl test server and client. On server side, start vpp and server app:
-
-```
-export VT_PATH=$WS/build-root/build-vpp_debug-native/vpp/bin
-$VT_PATH/vcl_test_server 1234 -p srtp
-```
-
-On client side:
-
-```
-export VT_PATH=$WS/build-root/build-vpp_debug-native/vpp/bin
-$VT_PATH/vcl_test_client <server-ip> 1234 -U -X -S -N 10000 -T 128 -p srtp
-```
-
-## Custom libsrtp2 build
-
-1. Create `build/external/packages/srtp.mk` with following example contents:
-
-```
-srtp_version := 2.3.0
-srtp_tarball := srtp_$(srtp_version).tar.gz
-srtp_tarball_md5sum := da38ee5d9c31be212a12964c22d7f795
-srtp_tarball_strip_dirs := 1
-srtp_url := https://github.com/cisco/libsrtp/archive/v$(srtp_version).tar.gz
-
-define srtp_build_cmds
- @cd $(srtp_build_dir) && \
- $(CMAKE) -DCMAKE_INSTALL_PREFIX:PATH=$(srtp_install_dir) \
- -DCMAKE_C_FLAGS='-fPIC -fvisibility=hidden' $(srtp_src_dir) > $(srtp_build_log)
- @$(MAKE) $(MAKE_ARGS) -C $(srtp_build_dir) > $(srtp_build_log)
-endef
-
-define srtp_config_cmds
- @true
-endef
-
-define srtp_install_cmds
- @$(MAKE) $(MAKE_ARGS) -C $(srtp_build_dir) install > $(srtp_install_log)
-endef
-
-
-$(eval $(call package,srtp))
-```
-
-2. Include `srtp.mk` in `build/external/Makefile` and add to install target.
-
-3. Rebuild external dependencies:
-
-```
-make install-ext-deps
-```
diff --git a/src/plugins/srtp/srtp_plugin.rst b/src/plugins/srtp/srtp_plugin.rst
new file mode 100644
index 00000000000..568ebb66f01
--- /dev/null
+++ b/src/plugins/srtp/srtp_plugin.rst
@@ -0,0 +1,82 @@
+SRTP Protocol
+=============
+
+This document describe the VPP SRTP (Secure Real-time Transport
+Protocol) implementation libsrtp2 based SRTP transport protocol
+implementation.
+
+Maturity level
+--------------
+
+Experimental
+
+Quickstart
+----------
+
+1. Install libsrtp2-dev. On debian based OS:
+
+::
+
+ sudo apt get install libsrtp2-dev
+
+2. Build vpp
+
+::
+
+ make build
+
+3. Test protocol using vcl test server and client. On server side, start
+ vpp and server app:
+
+::
+
+ export VT_PATH=$WS/build-root/build-vpp_debug-native/vpp/bin
+ $VT_PATH/vcl_test_server 1234 -p srtp
+
+On client side:
+
+::
+
+ export VT_PATH=$WS/build-root/build-vpp_debug-native/vpp/bin
+ $VT_PATH/vcl_test_client <server-ip> 1234 -U -X -S -N 10000 -T 128 -p srtp
+
+Custom libsrtp2 build
+---------------------
+
+1. Create ``build/external/packages/srtp.mk`` with following example
+ contents:
+
+::
+
+ srtp_version := 2.3.0
+ srtp_tarball := srtp_$(srtp_version).tar.gz
+ srtp_tarball_md5sum := da38ee5d9c31be212a12964c22d7f795
+ srtp_tarball_strip_dirs := 1
+ srtp_url := https://github.com/cisco/libsrtp/archive/v$(srtp_version).tar.gz
+
+ define srtp_build_cmds
+ @cd $(srtp_build_dir) && \
+ $(CMAKE) -DCMAKE_INSTALL_PREFIX:PATH=$(srtp_install_dir) \
+ -DCMAKE_C_FLAGS='-fPIC -fvisibility=hidden' $(srtp_src_dir) > $(srtp_build_log)
+ @$(MAKE) $(MAKE_ARGS) -C $(srtp_build_dir) > $(srtp_build_log)
+ endef
+
+ define srtp_config_cmds
+ @true
+ endef
+
+ define srtp_install_cmds
+ @$(MAKE) $(MAKE_ARGS) -C $(srtp_build_dir) install > $(srtp_install_log)
+ endef
+
+
+ $(eval $(call package,srtp))
+
+2. Include ``srtp.mk`` in ``build/external/Makefile`` and add to install
+ target.
+
+3. Rebuild external dependencies:
+
+::
+
+ make install-ext-deps
diff --git a/src/plugins/srv6-ad-flow/ad-flow.c b/src/plugins/srv6-ad-flow/ad-flow.c
index fd9706dabe1..d13a1c95969 100644
--- a/src/plugins/srv6-ad-flow/ad-flow.c
+++ b/src/plugins/srv6-ad-flow/ad-flow.c
@@ -94,14 +94,6 @@ srv6_ad_flow_localsid_creation_fn (ip6_sr_localsid_t *localsid)
return SID_CREATE_INVALID_IFACE_INDEX;
}
- vnet_sw_interface_t *sw =
- vnet_get_sw_interface (sm->vnet_main, ls_mem->sw_if_index_in);
- if (sw->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
- {
- adj_unlock (ls_mem->nh_adj);
- clib_mem_free (ls_mem);
- return SID_CREATE_INVALID_IFACE_TYPE;
- }
if (ls_mem->inner_type == AD_TYPE_IP4)
{
@@ -366,7 +358,7 @@ unformat_srv6_ad_flow_localsid (unformat_input_t *input, va_list *args)
}
/* Allocate and initialize memory block for local SID parameters */
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
clib_memset (ls_mem, 0, sizeof *ls_mem);
*plugin_mem_p = ls_mem;
diff --git a/src/plugins/srv6-ad-flow/ad_flow_plugin_doc.md b/src/plugins/srv6-ad-flow/ad_flow_plugin_doc.md
deleted file mode 100644
index 1f58fc2b663..00000000000
--- a/src/plugins/srv6-ad-flow/ad_flow_plugin_doc.md
+++ /dev/null
@@ -1,25 +0,0 @@
-# SRv6 endpoint to SR-unaware appliance via per-flow dynamic proxy {#srv6_ad_flow_plugin_doc}
-
-## Overview
-
-TBD
-
-## CLI configuration
-
-The following command instantiates a new End.AD.Flow segment that sends the inner
-packets on interface `IFACE-OUT` towards an appliance at address `S-ADDR` and
-restores the encapsulation headers of the packets coming back on interface
-`IFACE-IN`.
-
-```
-sr localsid address SID behavior end.ad.flow nh S-ADDR oif IFACE-OUT iif IFACE-IN
-```
-
-For example, the below command configures the SID `1::A1` with an End.AD.Flow
-function for sending traffic on interface `GigabitEthernet0/8/0` to the
-appliance at address `A1::`, and receiving it back on interface
-`GigabitEthernet0/9/0`.
-
-```
-sr localsid address 1::A1 behavior end.ad.flow nh A1:: oif GigabitEthernet0/8/0 iif GigabitEthernet0/9/0
-```
diff --git a/src/plugins/srv6-ad-flow/ad_flow_plugin_doc.rst b/src/plugins/srv6-ad-flow/ad_flow_plugin_doc.rst
new file mode 100644
index 00000000000..7e628742f84
--- /dev/null
+++ b/src/plugins/srv6-ad-flow/ad_flow_plugin_doc.rst
@@ -0,0 +1,31 @@
+SRv6 per-flow dynamic proxy
+===========================
+
+This document describes SRv6 endpoint to SR-unaware appliance via
+per-flow dynamic proxy
+
+Overview
+--------
+
+TBD
+
+CLI configuration
+-----------------
+
+The following command instantiates a new End.AD.Flow segment that sends
+the inner packets on interface ``IFACE-OUT`` towards an appliance at
+address ``S-ADDR`` and restores the encapsulation headers of the packets
+coming back on interface ``IFACE-IN``.
+
+::
+
+ sr localsid address SID behavior end.ad.flow nh S-ADDR oif IFACE-OUT iif IFACE-IN
+
+For example, the below command configures the SID ``1::A1`` with an
+End.AD.Flow function for sending traffic on interface
+``GigabitEthernet0/8/0`` to the appliance at address ``A1::``, and
+receiving it back on interface ``GigabitEthernet0/9/0``.
+
+::
+
+ sr localsid address 1::A1 behavior end.ad.flow nh A1:: oif GigabitEthernet0/8/0 iif GigabitEthernet0/9/0
diff --git a/src/plugins/srv6-ad/ad.c b/src/plugins/srv6-ad/ad.c
index 045ddeb466d..fc8527d0f82 100644
--- a/src/plugins/srv6-ad/ad.c
+++ b/src/plugins/srv6-ad/ad.c
@@ -362,7 +362,7 @@ unformat_srv6_ad_localsid (unformat_input_t * input, va_list * args)
}
/* Allocate and initialize memory block for local SID parameters */
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
clib_memset (ls_mem, 0, sizeof *ls_mem);
*plugin_mem_p = ls_mem;
@@ -447,7 +447,6 @@ srv6_ad_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_ad2_rewrite, static) =
{
.arc_name = "device-input",
@@ -475,7 +474,6 @@ VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Dynamic Segment Routing for IPv6 (SRv6) Proxy",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-ad/ad_plugin_doc.md b/src/plugins/srv6-ad/ad_plugin_doc.md
deleted file mode 100644
index 993eeb63589..00000000000
--- a/src/plugins/srv6-ad/ad_plugin_doc.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# SRv6 endpoint to SR-unaware appliance via dynamic proxy (End.AD) {#srv6_ad_plugin_doc}
-
-## Overview
-
-The dynamic proxy is an improvement over the static proxy (@ref srv6_as_plugin_doc)
-that dynamically learns the SR information before removing it from the incoming
-traffic. The same information can then be re-attached to the traffic returning
-from the SF. As opposed to the static SR proxy, no CACHE information needs to be
-configured. Instead, the dynamic SR proxy relies on a local caching mechanism on
-the node instantiating this segment. Therefore, a dynamic proxy segment cannot
-be the last segment in an SR SC policy. A different SR behavior should thus be
-used if the SF is meant to be the final destination of an SR SC policy.
-
-Upon receiving a packet whose active segment matches a dynamic SR proxy
-function, the proxy node pops the top MPLS label or applies the SRv6 End
-behavior, then compares the updated SR information with the cache entry for the
-current segment. If the cache is empty or different, it is updated with the new
-SR information. The SR information is then removed and the inner packet is sent
-towards the SF.
-
-The cache entry is not mapped to any particular packet, but instead to an SR SC
-policy identified by the receiving interface (IFACE-IN). Any non-link-local IP
-packet or non-local Ethernet frame received on that interface will be
-re-encapsulated with the cached headers as described in @ref srv6_as_plugin_doc. The
-SF may thus drop, modify or generate new packets without affecting the proxy.
-
-For more information, please see
-[draft-xuclad-spring-sr-service-chaining](https://datatracker.ietf.org/doc/draft-xuclad-spring-sr-service-chaining/).
-
-## CLI configuration
-
-The following command instantiates a new End.AD segment that sends the inner
-packets on interface `IFACE-OUT` towards an appliance at address `S-ADDR` and
-restores the encapsulation headers of the packets coming back on interface
-`IFACE-IN`.
-
-```
-sr localsid address SID behavior end.ad nh S-ADDR oif IFACE-OUT iif IFACE-IN
-```
-
-For example, the below command configures the SID `1::A1` with an End.AD
-function for sending traffic on interface `GigabitEthernet0/8/0` to the
-appliance at address `A1::`, and receiving it back on interface
-`GigabitEthernet0/9/0`.
-
-```
-sr localsid address 1::A1 behavior end.ad nh A1:: oif GigabitEthernet0/8/0 iif GigabitEthernet0/9/0
-```
-
-## Pseudocode
-
-The dynamic proxy SRv6 pseudocode is obtained by inserting the following
-instructions between lines 1 and 2 of the static proxy SRv6 pseudocode.
-
-```
-IF NH=SRH & SL > 0 THEN
- Decrement SL and update the IPv6 DA with SRH[SL]
- IF C(IFACE-IN) different from IPv6 encaps THEN ;; Ref1
- Copy the IPv6 encaps into C(IFACE-IN) ;; Ref2
-ELSE
- Drop the packet
-```
-
-**Ref1:** "IPv6 encaps" represents the IPv6 header and any attached extension
-header.
-
-**Ref2:** C(IFACE-IN) represents the cache entry associated to the dynamic SR proxy
-segment. It is identified with IFACE-IN in order to efficiently retrieve the
-right SR information when a packet arrives on this interface.
-
-In addition, the inbound policy should check that C(IFACE-IN) has been defined
-before attempting to restore the IPv6 encapsulation, and drop the packet
-otherwise.
diff --git a/src/plugins/srv6-ad/ad_plugin_doc.rst b/src/plugins/srv6-ad/ad_plugin_doc.rst
new file mode 100644
index 00000000000..cfb6cea7a15
--- /dev/null
+++ b/src/plugins/srv6-ad/ad_plugin_doc.rst
@@ -0,0 +1,86 @@
+.. _srv6_ad_plugin_doc:
+
+SRv6 dynamic proxy
+==================
+
+SRv6 endpoint to SR-unaware appliance via dynamic proxy (End.AD)
+----------------------------------------------------------------
+
+Overview
+~~~~~~~~
+
+The dynamic proxy is an improvement over the static proxy (@ref
+srv6_as_plugin_doc) that dynamically learns the SR information before
+removing it from the incoming traffic. The same information can then be
+re-attached to the traffic returning from the SF. As opposed to the
+static SR proxy, no CACHE information needs to be configured. Instead,
+the dynamic SR proxy relies on a local caching mechanism on the node
+instantiating this segment. Therefore, a dynamic proxy segment cannot be
+the last segment in an SR SC policy. A different SR behavior should thus
+be used if the SF is meant to be the final destination of an SR SC
+policy.
+
+Upon receiving a packet whose active segment matches a dynamic SR proxy
+function, the proxy node pops the top MPLS label or applies the SRv6 End
+behavior, then compares the updated SR information with the cache entry
+for the current segment. If the cache is empty or different, it is
+updated with the new SR information. The SR information is then removed
+and the inner packet is sent towards the SF.
+
+The cache entry is not mapped to any particular packet, but instead to
+an SR SC policy identified by the receiving interface (IFACE-IN). Any
+non-link-local IP packet or non-local Ethernet frame received on that
+interface will be re-encapsulated with the cached headers as described
+in @ref srv6_as_plugin_doc. The SF may thus drop, modify or generate new
+packets without affecting the proxy.
+
+For more information, please see
+`draft-xuclad-spring-sr-service-chaining <https://datatracker.ietf.org/doc/draft-xuclad-spring-sr-service-chaining/>`__.
+
+CLI configuration
+~~~~~~~~~~~~~~~~~
+
+The following command instantiates a new End.AD segment that sends the
+inner packets on interface ``IFACE-OUT`` towards an appliance at address
+``S-ADDR`` and restores the encapsulation headers of the packets coming
+back on interface ``IFACE-IN``.
+
+::
+
+ sr localsid address SID behavior end.ad nh S-ADDR oif IFACE-OUT iif IFACE-IN
+
+For example, the below command configures the SID ``1::A1`` with an
+End.AD function for sending traffic on interface
+``GigabitEthernet0/8/0`` to the appliance at address ``A1::``, and
+receiving it back on interface ``GigabitEthernet0/9/0``.
+
+::
+
+ sr localsid address 1::A1 behavior end.ad nh A1:: oif GigabitEthernet0/8/0 iif GigabitEthernet0/9/0
+
+Pseudocode
+~~~~~~~~~~
+
+The dynamic proxy SRv6 pseudocode is obtained by inserting the following
+instructions between lines 1 and 2 of the static proxy SRv6 pseudocode.
+
+::
+
+ IF NH=SRH & SL > 0 THEN
+ Decrement SL and update the IPv6 DA with SRH[SL]
+ IF C(IFACE-IN) different from IPv6 encaps THEN ;; Ref1
+ Copy the IPv6 encaps into C(IFACE-IN) ;; Ref2
+ ELSE
+ Drop the packet
+
+**Ref1:** “IPv6 encaps” represents the IPv6 header and any attached
+extension header.
+
+**Ref2:** C(IFACE-IN) represents the cache entry associated to the
+dynamic SR proxy segment. It is identified with IFACE-IN in order to
+efficiently retrieve the right SR information when a packet arrives on
+this interface.
+
+In addition, the inbound policy should check that C(IFACE-IN) has been
+defined before attempting to restore the IPv6 encapsulation, and drop
+the packet otherwise.
diff --git a/src/plugins/srv6-ad/node.c b/src/plugins/srv6-ad/node.c
index 9d4ea44e19b..2b1d56b6570 100644
--- a/src/plugins/srv6-ad/node.c
+++ b/src/plugins/srv6-ad/node.c
@@ -203,7 +203,6 @@ srv6_ad_localsid_fn (vlib_main_t * vm,
{
ip6_sr_main_t *sm = &sr_main;
u32 n_left_from, next_index, *from, *to_next;
- u32 cnt_packets = 0;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -264,7 +263,6 @@ srv6_ad_localsid_fn (vlib_main_t * vm,
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
- cnt_packets++;
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -273,7 +271,6 @@ srv6_ad_localsid_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_ad_localsid_node) = {
.function = srv6_ad_localsid_fn,
.name = "srv6-ad-localsid",
@@ -288,7 +285,6 @@ VLIB_REGISTER_NODE (srv6_ad_localsid_node) = {
[SRV6_AD_LOCALSID_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/******************************* Rewriting node *******************************/
@@ -409,7 +405,6 @@ srv6_ad2_rewrite_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_ad2_rewrite_node) = {
.function = srv6_ad2_rewrite_fn,
.name = "srv6-ad2-rewrite",
@@ -424,7 +419,6 @@ VLIB_REGISTER_NODE (srv6_ad2_rewrite_node) = {
[SRV6_AD_REWRITE_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/**
@@ -552,7 +546,6 @@ srv6_ad4_rewrite_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_ad4_rewrite_node) = {
.function = srv6_ad4_rewrite_fn,
.name = "srv6-ad4-rewrite",
@@ -567,7 +560,6 @@ VLIB_REGISTER_NODE (srv6_ad4_rewrite_node) = {
[SRV6_AD_REWRITE_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/**
@@ -690,7 +682,6 @@ srv6_ad6_rewrite_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_ad6_rewrite_node) = {
.function = srv6_ad6_rewrite_fn,
.name = "srv6-ad6-rewrite",
@@ -705,7 +696,6 @@ VLIB_REGISTER_NODE (srv6_ad6_rewrite_node) = {
[SRV6_AD_REWRITE_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-am/am.c b/src/plugins/srv6-am/am.c
index 1408ebc470b..f308b5167d9 100644
--- a/src/plugins/srv6-am/am.c
+++ b/src/plugins/srv6-am/am.c
@@ -139,7 +139,7 @@ unformat_srv6_am_localsid (unformat_input_t * input, va_list * args)
unformat_vnet_sw_interface, vnm, &sw_if_index_in))
{
/* Allocate a portion of memory */
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
/* Set to zero the memory */
clib_memset (ls_mem, 0, sizeof *ls_mem);
@@ -226,7 +226,6 @@ srv6_am_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_am_rewrite, static) =
{
.arc_name = "ip6-unicast",
@@ -240,7 +239,6 @@ VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Masquerading Segment Routing for IPv6 (SRv6) Proxy",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-am/am_plugin_doc.md b/src/plugins/srv6-am/am_plugin_doc.md
deleted file mode 100644
index 11aad855408..00000000000
--- a/src/plugins/srv6-am/am_plugin_doc.md
+++ /dev/null
@@ -1,100 +0,0 @@
-# SRv6 endpoint to SR-unaware appliance via masquerading (End.AM) {#srv6_am_plugin_doc}
-
-The masquerading proxy is an SR endpoint behavior for processing SRv6 traffic on
-behalf of an SR-unaware SF. This proxy thus receives SR traffic that is formed
-of an IPv6 header and an SRH on top of an inner payload. The masquerading
-behavior is independent from the inner payload type. Hence, the inner payload
-can be of any type but it is usually expected to be a transport layer packet,
-such as TCP or UDP.
-
-A masquerading SR proxy segment is associated with the following mandatory
-parameters:
-
-- S-ADDR: Ethernet or IPv6 address of the SF
-- IFACE-OUT: Local interface for sending traffic towards the SF
-- IFACE-IN: Local interface receiving the traffic coming back from the SF
-
-A masquerading SR proxy segment is thus defined for a specific SF and bound to a
-pair of directed interfaces or sub-interfaces on the proxy. As opposed to the
-static and dynamic SR proxies, a masquerading segment can be present at the same
-time in any number of SR SC policies and the same interfaces can be bound to
-multiple masquerading proxy segments. The only restriction is that a
-masquerading proxy segment cannot be the last segment in an SR SC policy.
-
-The first part of the masquerading behavior is triggered when the proxy node
-receives an IPv6 packet whose Destination Address matches a masquerading proxy
-segment. The proxy inspects the IPv6 extension headers and substitutes the
-Destination Address with the last segment in the SRH attached to the IPv6
-header, which represents the final destination of the IPv6 packet. The packet is
-then sent out towards the SF.
-
-The SF receives an IPv6 packet whose source and destination addresses are
-respectively the original source and final destination. It does not attempt to
-inspect the SRH, as RFC8200 specifies that routing extension headers are not
-examined or processed by transit nodes. Instead, the SF simply forwards the
-packet based on its current Destination Address. In this scenario, we assume
-that the SF can only inspect, drop or perform limited changes to the packets.
-For example, Intrusion Detection Systems, Deep Packet Inspectors and non-NAT
-Firewalls are among the SFs that can be supported by a masquerading SR proxy.
-
-The second part of the masquerading behavior, also called de- masquerading, is
-an inbound policy attached to the proxy interface receiving the traffic
-returning from the SF, IFACE-IN. This policy inspects the incoming traffic and
-triggers a regular SRv6 endpoint processing (End) on any IPv6 packet that
-contains an SRH. This processing occurs before any lookup on the packet
-Destination Address is performed and it is sufficient to restore the right
-active segment as the Destination Address of the IPv6 packet.
-
-For more information, please see
-[draft-xuclad-spring-sr-service-chaining](https://datatracker.ietf.org/doc/draft-xuclad-spring-sr-service-chaining/).
-
-## CLI configuration
-
-The following command instantiates a new End.AM segment that sends masqueraded
-traffic on interface `IFACE-OUT` towards an appliance at address `S-ADDR` and
-restores the active segment in the IPv6 header of the packets coming back on
-interface `IFACE-IN`.
-
-```
-sr localsid address SID behavior end.am nh S-ADDR oif IFACE-OUT iif IFACE-IN
-```
-
-For example, the below command configures the SID `1::A1` with an End.AM
-function for sending traffic on interface `GigabitEthernet0/8/0` to the
-appliance at address `A1::`, and receiving it back on interface
-`GigabitEthernet0/9/0`.
-
-```
-sr localsid address 1::A1 behavior end.am nh A1:: oif GigabitEthernet0/8/0 iif GigabitEthernet0/9/0
-```
-
-## Pseudocode
-
-### Masquerading
-
-Upon receiving a packet destined for S, where S is an IPv6 masquerading proxy
-segment, a node N processes it as follows.
-
-```
-IF NH=SRH & SL > 0 THEN
- Update the IPv6 DA with SRH[0]
- Forward the packet on IFACE-OUT
-ELSE
- Drop the packet
-```
-
-### De-masquerading
-
-Upon receiving a non-link-local IPv6 packet on IFACE-IN, a node N processes it
-as follows.
-
-```
-IF NH=SRH & SL > 0 THEN
- Decrement SL
- Update the IPv6 DA with SRH[SL] ;; Ref1
- Lookup DA in appropriate table and proceed accordingly
-```
-
-**Ref1:** This pseudocode can be augmented to support the Penultimate Segment
-Popping (PSP) endpoint flavor. The exact pseudocode modification are provided in
-[draft-filsfils-spring-srv6-network-programming](https://datatracker.ietf.org/doc/draft-filsfils-spring-srv6-network-programming/).
diff --git a/src/plugins/srv6-am/am_plugin_doc.rst b/src/plugins/srv6-am/am_plugin_doc.rst
new file mode 100644
index 00000000000..576379868fd
--- /dev/null
+++ b/src/plugins/srv6-am/am_plugin_doc.rst
@@ -0,0 +1,116 @@
+.. _srv6_am_plugin_doc:
+
+SRv6 masquerading
+=================
+
+SRv6 endpoint to SR-unaware appliance via masquerading (End.AM)
+---------------------------------------------------------------
+
+The masquerading proxy is an SR endpoint behavior for processing SRv6
+traffic on behalf of an SR-unaware SF. This proxy thus receives SR
+traffic that is formed of an IPv6 header and an SRH on top of an inner
+payload. The masquerading behavior is independent from the inner payload
+type. Hence, the inner payload can be of any type but it is usually
+expected to be a transport layer packet, such as TCP or UDP.
+
+A masquerading SR proxy segment is associated with the following
+mandatory parameters:
+
+- S-ADDR: Ethernet or IPv6 address of the SF
+- IFACE-OUT: Local interface for sending traffic towards the SF
+- IFACE-IN: Local interface receiving the traffic coming back from the
+ SF
+
+A masquerading SR proxy segment is thus defined for a specific SF and
+bound to a pair of directed interfaces or sub-interfaces on the proxy.
+As opposed to the static and dynamic SR proxies, a masquerading segment
+can be present at the same time in any number of SR SC policies and the
+same interfaces can be bound to multiple masquerading proxy segments.
+The only restriction is that a masquerading proxy segment cannot be the
+last segment in an SR SC policy.
+
+The first part of the masquerading behavior is triggered when the proxy
+node receives an IPv6 packet whose Destination Address matches a
+masquerading proxy segment. The proxy inspects the IPv6 extension
+headers and substitutes the Destination Address with the last segment in
+the SRH attached to the IPv6 header, which represents the final
+destination of the IPv6 packet. The packet is then sent out towards the
+SF.
+
+The SF receives an IPv6 packet whose source and destination addresses
+are respectively the original source and final destination. It does not
+attempt to inspect the SRH, as RFC8200 specifies that routing extension
+headers are not examined or processed by transit nodes. Instead, the SF
+simply forwards the packet based on its current Destination Address. In
+this scenario, we assume that the SF can only inspect, drop or perform
+limited changes to the packets. For example, Intrusion Detection
+Systems, Deep Packet Inspectors and non-NAT Firewalls are among the SFs
+that can be supported by a masquerading SR proxy.
+
+The second part of the masquerading behavior, also called de-
+masquerading, is an inbound policy attached to the proxy interface
+receiving the traffic returning from the SF, IFACE-IN. This policy
+inspects the incoming traffic and triggers a regular SRv6 endpoint
+processing (End) on any IPv6 packet that contains an SRH. This
+processing occurs before any lookup on the packet Destination Address is
+performed and it is sufficient to restore the right active segment as
+the Destination Address of the IPv6 packet.
+
+For more information, please see
+`draft-xuclad-spring-sr-service-chaining <https://datatracker.ietf.org/doc/draft-xuclad-spring-sr-service-chaining/>`__.
+
+CLI configuration
+~~~~~~~~~~~~~~~~~
+
+The following command instantiates a new End.AM segment that sends
+masqueraded traffic on interface ``IFACE-OUT`` towards an appliance at
+address ``S-ADDR`` and restores the active segment in the IPv6 header of
+the packets coming back on interface ``IFACE-IN``.
+
+::
+
+ sr localsid address SID behavior end.am nh S-ADDR oif IFACE-OUT iif IFACE-IN
+
+For example, the below command configures the SID ``1::A1`` with an
+End.AM function for sending traffic on interface
+``GigabitEthernet0/8/0`` to the appliance at address ``A1::``, and
+receiving it back on interface ``GigabitEthernet0/9/0``.
+
+::
+
+ sr localsid address 1::A1 behavior end.am nh A1:: oif GigabitEthernet0/8/0 iif GigabitEthernet0/9/0
+
+Pseudocode
+~~~~~~~~~~
+
+Masquerading
+^^^^^^^^^^^^
+
+Upon receiving a packet destined for S, where S is an IPv6 masquerading
+proxy segment, a node N processes it as follows.
+
+::
+
+ IF NH=SRH & SL > 0 THEN
+ Update the IPv6 DA with SRH[0]
+ Forward the packet on IFACE-OUT
+ ELSE
+ Drop the packet
+
+De-masquerading
+^^^^^^^^^^^^^^^
+
+Upon receiving a non-link-local IPv6 packet on IFACE-IN, a node N
+processes it as follows.
+
+::
+
+ IF NH=SRH & SL > 0 THEN
+ Decrement SL
+ Update the IPv6 DA with SRH[SL] ;; Ref1
+ Lookup DA in appropriate table and proceed accordingly
+
+**Ref1:** This pseudocode can be augmented to support the Penultimate
+Segment Popping (PSP) endpoint flavor. The exact pseudocode modification
+are provided in
+`draft-filsfils-spring-srv6-network-programming <https://datatracker.ietf.org/doc/draft-filsfils-spring-srv6-network-programming/>`__.
diff --git a/src/plugins/srv6-am/node.c b/src/plugins/srv6-am/node.c
index dd71e60d5a2..beef6a30910 100644
--- a/src/plugins/srv6-am/node.c
+++ b/src/plugins/srv6-am/node.c
@@ -142,7 +142,6 @@ srv6_am_localsid_fn (vlib_main_t * vm,
{
ip6_sr_main_t *sm = &sr_main;
u32 n_left_from, next_index, *from, *to_next;
- u32 cnt_packets = 0;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -203,8 +202,6 @@ srv6_am_localsid_fn (vlib_main_t * vm,
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
-
- cnt_packets++;
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -213,7 +210,6 @@ srv6_am_localsid_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_am_localsid_node) = {
.function = srv6_am_localsid_fn,
.name = "srv6-am-localsid",
@@ -226,7 +222,6 @@ VLIB_REGISTER_NODE (srv6_am_localsid_node) = {
[SRV6_AM_LOCALSID_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/******************************* Rewriting node *******************************/
@@ -329,7 +324,6 @@ srv6_am_rewrite_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_am_rewrite_node) = {
.function = srv6_am_rewrite_fn,
.name = "srv6-am-rewrite",
@@ -344,7 +338,6 @@ VLIB_REGISTER_NODE (srv6_am_rewrite_node) = {
[SRV6_AM_REWRITE_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-as/as.c b/src/plugins/srv6-as/as.c
index d9dbd8aa608..bdf17527ae8 100644
--- a/src/plugins/srv6-as/as.c
+++ b/src/plugins/srv6-as/as.c
@@ -470,7 +470,7 @@ unformat_srv6_as_localsid (unformat_input_t * input, va_list * args)
}
/* Allocate and initialize memory block for local SID parameters */
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
clib_memset (ls_mem, 0, sizeof *ls_mem);
*plugin_mem_p = ls_mem;
@@ -557,7 +557,6 @@ srv6_as_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_as2_rewrite, static) =
{
.arc_name = "device-input",
@@ -585,7 +584,6 @@ VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Static Segment Routing for IPv6 (SRv6) Proxy",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-as/as_plugin_doc.md b/src/plugins/srv6-as/as_plugin_doc.md
deleted file mode 100644
index 7cda08b60d9..00000000000
--- a/src/plugins/srv6-as/as_plugin_doc.md
+++ /dev/null
@@ -1,152 +0,0 @@
-# SRv6 endpoint to SR-unaware appliance via static proxy (End.AS) {#srv6_as_plugin_doc}
-
-## Overview
-
-The static proxy is an SR endpoint behavior for processing SR-MPLS or SRv6
-encapsulated traffic on behalf of an SR-unaware SF. This proxy thus receives SR
-traffic that is formed of an MPLS label stack or an IPv6 header on top of an
-inner packet, which can be Ethernet, IPv4 or IPv6.
-
-A static SR proxy segment is associated with the following mandatory parameters:
-
-- INNER-TYPE: Inner packet type
-- S-ADDR: Ethernet or IP address of the SF (only for inner type IPv4 and IPv6)
-- IFACE-OUT: Local interface for sending traffic towards the SF
-- IFACE-IN: Local interface receiving the traffic coming back from the SF
-- CACHE: SR information to be attached on the traffic coming back from the SF,
-including at least
- * CACHE.SA: IPv6 source address (SRv6 only)
- * CACHE.LIST: Segment list expressed as MPLS labels or IPv6 address
-
-A static SR proxy segment is thus defined for a specific SF, inner packet type
-and cached SR information. It is also bound to a pair of directed interfaces on
-the proxy. These may be both directions of a single interface, or opposite
-directions of two different interfaces. The latter is recommended in case the SF
-is to be used as part of a bi-directional SR SC policy. If the proxy and the SF
-both support 802.1Q, IFACE-OUT and IFACE-IN can also represent sub-interfaces.
-
-The first part of this behavior is triggered when the proxy node receives a
-packet whose active segment matches a segment associated with the static proxy
-behavior. It removes the SR information from the packet then sends it on a
-specific interface towards the associated SF. This SR information corresponds to
-the full label stack for SR-MPLS or to the encapsulation IPv6 header with any
-attached extension header in the case of SRv6.
-
-The second part is an inbound policy attached to the proxy interface receiving
-the traffic returning from the SF, IFACE-IN. This policy attaches to the
-incoming traffic the cached SR information associated with the SR proxy segment.
-If the proxy segment uses the SR-MPLS data plane, CACHE contains a stack of
-labels to be pushed on top the packets. With the SRv6 data plane, CACHE is
-defined as a source address, an active segment and an optional SRH (tag,
-segments left, segment list and metadata). The proxy encapsulates the packets
-with an IPv6 header that has the source address, the active segment as
-destination address and the SRH as a routing extension header. After the SR
-information has been attached, the packets are forwarded according to the active
-segment, which is represented by the top MPLS label or the IPv6 Destination
-Address.
-
-In this scenario, there are no restrictions on the operations that can be
-performed by the SF on the stream of packets. It may operate at all protocol
-layers, terminate transport layer connections, generate new packets and initiate
-transport layer connections. This behavior may also be used to integrate an
-IPv4-only SF into an SRv6 policy. However, a static SR proxy segment can be used
-in only one service chain at a time. As opposed to most other segment types, a
-static SR proxy segment is bound to a unique list of segments, which represents
-a directed SR SC policy. This is due to the cached SR information being defined
-in the segment configuration. This limitation only prevents multiple segment
-lists from using the same static SR proxy segment at the same time, but a single
-segment list can be shared by any number of traffic flows. Besides, since the
-returning traffic from the SF is re-classified based on the incoming interface,
-an interface can be used as receiving interface (IFACE-IN) only for a single SR
-proxy segment at a time. In the case of a bi-directional SR SC policy, a
-different SR proxy segment and receiving interface are required for the return
-direction.
-
-For more information, please see
-[draft-xuclad-spring-sr-service-chaining](https://datatracker.ietf.org/doc/draft-xuclad-spring-sr-service-chaining/).
-
-## CLI configuration
-
-The following command instantiates a new End.AS segment that sends the inner
-packets on interface `IFACE-OUT` towards an appliance at address `S-ADDR` and
-restores the segment list ``<S1, S2, S3>`` with a source address `SRC-ADDR` on
-the packets coming back on interface `IFACE-IN`.
-
-```
-sr localsid address SID behavior end.ad nh S-ADDR oif IFACE-OUT iif IFACE-IN src SRC-ADDR next S1 next S2 next S3
-```
-
-For example, the below command configures the SID `1::A1` with an End.AS
-function for sending traffic on interface `GigabitEthernet0/8/0` to the
-appliance at address `A1::`, and receiving it back on interface
-`GigabitEthernet0/9/0`.
-
-```
-sr localsid address 1::A1 behavior end.ad nh A1:: oif GigabitEthernet0/8/0 iif GigabitEthernet0/9/0 src 1:: next 2::20 next 3::30 next 4::40
-```
-
-## Pseudocode
-
-### Static proxy for inner type IPv4
-
-Upon receiving an IPv6 packet destined for S, where S is an IPv6 static proxy
-segment for IPv4 traffic, a node N does:
-
-```
-IF ENH == 4 THEN ;; Ref1
- Remove the (outer) IPv6 header and its extension headers
- Forward the exposed packet on IFACE-OUT towards S-ADDR
-ELSE
- Drop the packet
-```
-
-**Ref1:** 4 refers to IPv4 encapsulation as defined by IANA allocation for Internet
-Protocol Numbers.
-
-Upon receiving a non link-local IPv4 packet on IFACE-IN, a node N does:
-
-```
-Decrement TTL and update checksum
-IF CACHE.SRH THEN ;; Ref2
- Push CACHE.SRH on top of the existing IPv4 header
- Set NH value of the pushed SRH to 4
-Push outer IPv6 header with SA, DA and traffic class from CACHE
-Set outer payload length and flow label
-Set NH value to 43 if an SRH was added, or 4 otherwise
-Lookup outer DA in appropriate table and proceed accordingly
-```
-
-**Ref2:** CACHE.SRH represents the SRH defined in CACHE, if any, for the static SR
-proxy segment associated with IFACE-IN.
-
-### Static proxy for inner type IPv6
-
-Upon receiving an IPv6 packet destined for S, where S is an IPv6 static proxy
-segment for IPv6 traffic, a node N does:
-
-```
-IF ENH == 41 THEN ;; Ref1
- Remove the (outer) IPv6 header and its extension headers
- Forward the exposed packet on IFACE-OUT towards S-ADDR
-ELSE
- Drop the packet
-```
-
-**Ref1:** 41 refers to IPv6 encapsulation as defined by IANA allocation for Internet
-Protocol Numbers.
-
-Upon receiving a non-link-local IPv6 packet on IFACE-IN, a node N does:
-
-```
-Decrement Hop Limit
-IF CACHE.SRH THEN ;; Ref2
- Push CACHE.SRH on top of the existing IPv6 header
- Set NH value of the pushed SRH to 41
-Push outer IPv6 header with SA, DA and traffic class from CACHE
-Set outer payload length and flow label
-Set NH value to 43 if an SRH was added, or 41 otherwise
-Lookup outer DA in appropriate table and proceed accordingly
-```
-
-**Ref2:** CACHE.SRH represents the SRH defined in CACHE, if any, for the static SR
-proxy segment associated with IFACE-IN.
diff --git a/src/plugins/srv6-as/as_plugin_doc.rst b/src/plugins/srv6-as/as_plugin_doc.rst
new file mode 100644
index 00000000000..9fa7f8fc19e
--- /dev/null
+++ b/src/plugins/srv6-as/as_plugin_doc.rst
@@ -0,0 +1,172 @@
+.. _srv6_as_plugin_doc:
+
+SRv6 static proxy
+=================
+
+The document describes SRv6 endpoint to SR-unaware appliance via static
+proxy (End.AS)
+
+Overview
+--------
+
+The static proxy is an SR endpoint behavior for processing SR-MPLS or
+SRv6 encapsulated traffic on behalf of an SR-unaware SF. This proxy thus
+receives SR traffic that is formed of an MPLS label stack or an IPv6
+header on top of an inner packet, which can be Ethernet, IPv4 or IPv6.
+
+A static SR proxy segment is associated with the following mandatory
+parameters:
+
+- INNER-TYPE: Inner packet type
+- S-ADDR: Ethernet or IP address of the SF (only for inner type IPv4
+ and IPv6)
+- IFACE-OUT: Local interface for sending traffic towards the SF
+- IFACE-IN: Local interface receiving the traffic coming back from the
+ SF
+- CACHE: SR information to be attached on the traffic coming back from
+ the SF, including at least
+
+ - CACHE.SA: IPv6 source address (SRv6 only)
+ - CACHE.LIST: Segment list expressed as MPLS labels or IPv6 address
+
+A static SR proxy segment is thus defined for a specific SF, inner
+packet type and cached SR information. It is also bound to a pair of
+directed interfaces on the proxy. These may be both directions of a
+single interface, or opposite directions of two different interfaces.
+The latter is recommended in case the SF is to be used as part of a
+bi-directional SR SC policy. If the proxy and the SF both support
+802.1Q, IFACE-OUT and IFACE-IN can also represent sub-interfaces.
+
+The first part of this behavior is triggered when the proxy node
+receives a packet whose active segment matches a segment associated with
+the static proxy behavior. It removes the SR information from the packet
+then sends it on a specific interface towards the associated SF. This SR
+information corresponds to the full label stack for SR-MPLS or to the
+encapsulation IPv6 header with any attached extension header in the case
+of SRv6.
+
+The second part is an inbound policy attached to the proxy interface
+receiving the traffic returning from the SF, IFACE-IN. This policy
+attaches to the incoming traffic the cached SR information associated
+with the SR proxy segment. If the proxy segment uses the SR-MPLS data
+plane, CACHE contains a stack of labels to be pushed on top the packets.
+With the SRv6 data plane, CACHE is defined as a source address, an
+active segment and an optional SRH (tag, segments left, segment list and
+metadata). The proxy encapsulates the packets with an IPv6 header that
+has the source address, the active segment as destination address and
+the SRH as a routing extension header. After the SR information has been
+attached, the packets are forwarded according to the active segment,
+which is represented by the top MPLS label or the IPv6 Destination
+Address.
+
+In this scenario, there are no restrictions on the operations that can
+be performed by the SF on the stream of packets. It may operate at all
+protocol layers, terminate transport layer connections, generate new
+packets and initiate transport layer connections. This behavior may also
+be used to integrate an IPv4-only SF into an SRv6 policy. However, a
+static SR proxy segment can be used in only one service chain at a time.
+As opposed to most other segment types, a static SR proxy segment is
+bound to a unique list of segments, which represents a directed SR SC
+policy. This is due to the cached SR information being defined in the
+segment configuration. This limitation only prevents multiple segment
+lists from using the same static SR proxy segment at the same time, but
+a single segment list can be shared by any number of traffic flows.
+Besides, since the returning traffic from the SF is re-classified based
+on the incoming interface, an interface can be used as receiving
+interface (IFACE-IN) only for a single SR proxy segment at a time. In
+the case of a bi-directional SR SC policy, a different SR proxy segment
+and receiving interface are required for the return direction.
+
+For more information, please see
+`draft-xuclad-spring-sr-service-chaining <https://datatracker.ietf.org/doc/draft-xuclad-spring-sr-service-chaining/>`__.
+
+CLI configuration
+-----------------
+
+The following command instantiates a new End.AS segment that sends the
+inner packets on interface ``IFACE-OUT`` towards an appliance at address
+``S-ADDR`` and restores the segment list ``<S1, S2, S3>`` with a source
+address ``SRC-ADDR`` on the packets coming back on interface
+``IFACE-IN``.
+
+::
+
+ sr localsid address SID behavior end.as nh S-ADDR oif IFACE-OUT iif IFACE-IN src SRC-ADDR next S1 next S2 next S3
+
+For example, the below command configures the SID ``1::A1`` with an
+End.AS function for sending traffic on interface
+``GigabitEthernet0/8/0`` to the appliance at address ``A1::``, and
+receiving it back on interface ``GigabitEthernet0/9/0``.
+
+::
+
+ sr localsid address 1::A1 behavior end.as nh A1:: oif GigabitEthernet0/8/0 iif GigabitEthernet0/9/0 src 1:: next 2::20 next 3::30 next 4::40
+
+Pseudocode
+----------
+
+Static proxy for inner type IPv4
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Upon receiving an IPv6 packet destined for S, where S is an IPv6 static
+proxy segment for IPv4 traffic, a node N does:
+
+::
+
+ IF ENH == 4 THEN ;; Ref1
+ Remove the (outer) IPv6 header and its extension headers
+ Forward the exposed packet on IFACE-OUT towards S-ADDR
+ ELSE
+ Drop the packet
+
+**Ref1:** 4 refers to IPv4 encapsulation as defined by IANA allocation
+for Internet Protocol Numbers.
+
+Upon receiving a non link-local IPv4 packet on IFACE-IN, a node N does:
+
+::
+
+ Decrement TTL and update checksum
+ IF CACHE.SRH THEN ;; Ref2
+ Push CACHE.SRH on top of the existing IPv4 header
+ Set NH value of the pushed SRH to 4
+ Push outer IPv6 header with SA, DA and traffic class from CACHE
+ Set outer payload length and flow label
+ Set NH value to 43 if an SRH was added, or 4 otherwise
+ Lookup outer DA in appropriate table and proceed accordingly
+
+**Ref2:** CACHE.SRH represents the SRH defined in CACHE, if any, for the
+static SR proxy segment associated with IFACE-IN.
+
+Static proxy for inner type IPv6
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Upon receiving an IPv6 packet destined for S, where S is an IPv6 static
+proxy segment for IPv6 traffic, a node N does:
+
+::
+
+ IF ENH == 41 THEN ;; Ref1
+ Remove the (outer) IPv6 header and its extension headers
+ Forward the exposed packet on IFACE-OUT towards S-ADDR
+ ELSE
+ Drop the packet
+
+**Ref1:** 41 refers to IPv6 encapsulation as defined by IANA allocation
+for Internet Protocol Numbers.
+
+Upon receiving a non-link-local IPv6 packet on IFACE-IN, a node N does:
+
+::
+
+ Decrement Hop Limit
+ IF CACHE.SRH THEN ;; Ref2
+ Push CACHE.SRH on top of the existing IPv6 header
+ Set NH value of the pushed SRH to 41
+ Push outer IPv6 header with SA, DA and traffic class from CACHE
+ Set outer payload length and flow label
+ Set NH value to 43 if an SRH was added, or 41 otherwise
+ Lookup outer DA in appropriate table and proceed accordingly
+
+**Ref2:** CACHE.SRH represents the SRH defined in CACHE, if any, for the
+static SR proxy segment associated with IFACE-IN.
diff --git a/src/plugins/srv6-as/node.c b/src/plugins/srv6-as/node.c
index 9e84a9848e5..e81881982af 100644
--- a/src/plugins/srv6-as/node.c
+++ b/src/plugins/srv6-as/node.c
@@ -169,7 +169,6 @@ srv6_as_localsid_fn (vlib_main_t * vm,
{
ip6_sr_main_t *sm = &sr_main;
u32 n_left_from, next_index, *from, *to_next;
- u32 cnt_packets = 0;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -227,8 +226,6 @@ srv6_as_localsid_fn (vlib_main_t * vm,
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
-
- cnt_packets++;
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -237,7 +234,6 @@ srv6_as_localsid_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_as_localsid_node) = {
.function = srv6_as_localsid_fn,
.name = "srv6-as-localsid",
@@ -252,7 +248,6 @@ VLIB_REGISTER_NODE (srv6_as_localsid_node) = {
[SRV6_AS_LOCALSID_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/******************************* Rewriting node *******************************/
@@ -373,7 +368,6 @@ srv6_as2_rewrite_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_as2_rewrite_node) = {
.function = srv6_as2_rewrite_fn,
.name = "srv6-as2-rewrite",
@@ -388,7 +382,6 @@ VLIB_REGISTER_NODE (srv6_as2_rewrite_node) = {
[SRV6_AS_REWRITE_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/**
@@ -517,7 +510,6 @@ srv6_as4_rewrite_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_as4_rewrite_node) = {
.function = srv6_as4_rewrite_fn,
.name = "srv6-as4-rewrite",
@@ -532,7 +524,6 @@ VLIB_REGISTER_NODE (srv6_as4_rewrite_node) = {
[SRV6_AS_REWRITE_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/**
@@ -656,7 +647,6 @@ srv6_as6_rewrite_fn (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (srv6_as6_rewrite_node) = {
.function = srv6_as6_rewrite_fn,
.name = "srv6-as6-rewrite",
@@ -671,7 +661,6 @@ VLIB_REGISTER_NODE (srv6_as6_rewrite_node) = {
[SRV6_AS_REWRITE_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-mobile/CMakeLists.txt b/src/plugins/srv6-mobile/CMakeLists.txt
index 5a9945c2e4f..a917c8ded82 100644
--- a/src/plugins/srv6-mobile/CMakeLists.txt
+++ b/src/plugins/srv6-mobile/CMakeLists.txt
@@ -21,6 +21,11 @@ add_vpp_plugin(srv6mobile
gtp6_d_di.c
gtp6_dt.c
node.c
+ sr_mobile_api.c
+
+ API_FILES
+ sr_mobile.api
+ sr_mobile_types.api
INSTALL_HEADERS
mobile.h
diff --git a/src/plugins/srv6-mobile/FEATURE.yaml b/src/plugins/srv6-mobile/FEATURE.yaml
index 3289b890506..45fb7da0201 100644
--- a/src/plugins/srv6-mobile/FEATURE.yaml
+++ b/src/plugins/srv6-mobile/FEATURE.yaml
@@ -1,5 +1,5 @@
---
-name: SRv6 Mobuile
+name: SRv6 Mobile
maintainer: Tetsuya Murakami <tetsuya.mrk@gmail.com>
features:
- GTP4.D
diff --git a/src/plugins/srv6-mobile/extra/Dockerfile.j2 b/src/plugins/srv6-mobile/extra/Dockerfile.j2
index 8e42af09d56..e8120bb736b 100644
--- a/src/plugins/srv6-mobile/extra/Dockerfile.j2
+++ b/src/plugins/srv6-mobile/extra/Dockerfile.j2
@@ -12,6 +12,7 @@ RUN set -eux; \
net-tools \
iproute2 \
tcpdump \
+ python3-cffi \
asciidoc \
xmlto \
libssl-dev \
diff --git a/src/plugins/srv6-mobile/extra/Dockerfile.j2.release b/src/plugins/srv6-mobile/extra/Dockerfile.j2.release
index aec520bfb85..7507f50403b 100644
--- a/src/plugins/srv6-mobile/extra/Dockerfile.j2.release
+++ b/src/plugins/srv6-mobile/extra/Dockerfile.j2.release
@@ -15,6 +15,7 @@ RUN set -eux; \
net-tools \
iproute2 \
tcpdump \
+ python3-cffi \
python2.7 \
libssl-dev \
netcat; \
diff --git a/src/plugins/srv6-mobile/extra/runner.py b/src/plugins/srv6-mobile/extra/runner.py
index b18fdc32766..31201935120 100755
--- a/src/plugins/srv6-mobile/extra/runner.py
+++ b/src/plugins/srv6-mobile/extra/runner.py
@@ -1,10 +1,8 @@
#!/usr/bin/env python3
-from os.path import dirname, realpath, split,\
- join, isdir, exists
+from os.path import dirname, realpath, split, join, isdir, exists
from os import remove, system, mkdir
-from logging import getLogger, basicConfig,\
- DEBUG, INFO, ERROR
+from logging import getLogger, basicConfig, DEBUG, INFO, ERROR
from argparse import ArgumentParser
from atexit import register
from shutil import rmtree
@@ -18,10 +16,7 @@ from scapy.contrib.gtp import *
from scapy.all import *
-verbose_levels = {
- 'error': ERROR,
- 'debug': DEBUG,
- 'info': INFO}
+verbose_levels = {"error": ERROR, "debug": DEBUG, "info": INFO}
class ContainerStartupError(Exception):
@@ -29,7 +24,6 @@ class ContainerStartupError(Exception):
class Container(object):
-
tmp = "/tmp"
cmd = "vppctl -s 0:5002"
cmd_bash = "/bin/bash"
@@ -74,7 +68,6 @@ class Container(object):
@classmethod
def new(cls, client, image, name):
-
temp = join(cls.tmp, name)
if isdir(temp):
rmtree(temp)
@@ -87,10 +80,8 @@ class Container(object):
image=image,
name=name,
privileged=True,
- volumes={
- temp: {
- 'bind': '/mnt',
- 'mode': 'rw'}})
+ volumes={temp: {"bind": "/mnt", "mode": "rw"}},
+ )
obj = cls.get(client, name)
if not obj:
@@ -119,7 +110,7 @@ class Container(object):
def vppctl_exec(self, cmd):
ec, resp = self._ref.exec_run(cmd="{} {}".format(self.cmd, cmd))
- assert(ec == 0)
+ assert ec == 0
return resp
def setup_host_interface(self, name, ip):
@@ -134,8 +125,7 @@ class Container(object):
self.vppctl_exec("create packet-generator interface pg0")
self.vppctl_exec("set int mac address pg0 {}".format(local_mac))
self.vppctl_exec("set int ip addr pg0 {}".format(local_ip))
- self.vppctl_exec(
- "set ip neighbor pg0 {} {}".format(remote_ip, remote_mac))
+ self.vppctl_exec("set ip neighbor pg0 {} {}".format(remote_ip, remote_mac))
self.vppctl_exec("set int state pg0 up")
def pg_create_interface4(self, local_ip, remote_ip, local_mac, remote_mac):
@@ -158,24 +148,32 @@ class Container(object):
self.vppctl_exec("set ip neighbor pg0 {} {}".format(remote_ip, remote_mac))
self.vppctl_exec("set int state pg0 up")
- def pg_create_interface4_name(self, ifname, local_ip, remote_ip, local_mac, remote_mac):
+ def pg_create_interface4_name(
+ self, ifname, local_ip, remote_ip, local_mac, remote_mac
+ ):
# remote_ip can't have subnet mask
time.sleep(2)
self.vppctl_exec("create packet-generator interface {}".format(ifname))
self.vppctl_exec("set int mac address {} {}".format(ifname, local_mac))
self.vppctl_exec("set int ip addr {} {}".format(ifname, local_ip))
- self.vppctl_exec("set ip neighbor {} {} {}".format(ifname, remote_ip, remote_mac))
+ self.vppctl_exec(
+ "set ip neighbor {} {} {}".format(ifname, remote_ip, remote_mac)
+ )
self.vppctl_exec("set int state {} up".format(ifname))
- def pg_create_interface6_name(self, ifname, local_ip, remote_ip, local_mac, remote_mac):
+ def pg_create_interface6_name(
+ self, ifname, local_ip, remote_ip, local_mac, remote_mac
+ ):
# remote_ip can't have subnet mask
time.sleep(2)
self.vppctl_exec("create packet-generator interface {}".format(ifname))
self.vppctl_exec("set int mac address {} {}".format(ifname, local_mac))
self.vppctl_exec("set int ip addr {} {}".format(ifname, local_ip))
- self.vppctl_exec("set ip neighbor {} {} {}".format(ifname, remote_ip, remote_mac))
+ self.vppctl_exec(
+ "set ip neighbor {} {} {}".format(ifname, remote_ip, remote_mac)
+ )
self.vppctl_exec("set int state {} up".format(ifname))
def pg_enable(self):
@@ -186,98 +184,106 @@ class Container(object):
wrpcap(self.pg_input_file, stream)
self.vppctl_exec(
"packet-generator new name pg-stream "
- "node ethernet-input pcap {}".format(
- self.pg_input_file_in))
+ "node ethernet-input pcap {}".format(self.pg_input_file_in)
+ )
def pg_start_capture(self):
if exists(self.pg_output_file):
remove(self.pg_output_file)
self.vppctl_exec(
- "packet-generator capture pg0 pcap {}".format(
- self.pg_output_file_in))
+ "packet-generator capture pg0 pcap {}".format(self.pg_output_file_in)
+ )
def pg_start_capture_name(self, ifname):
if exists(self.pg_output_file):
remove(self.pg_output_file)
self.vppctl_exec(
- "packet-generator capture {} pcap {}".format(
- ifname, self.pg_output_file_in))
+ "packet-generator capture {} pcap {}".format(ifname, self.pg_output_file_in)
+ )
def pg_read_packets(self):
return rdpcap(self.pg_output_file)
def set_ipv6_route(self, out_if_name, next_hop_ip, subnet):
self.vppctl_exec(
- "ip route add {} via host-{} {}".format(
- subnet, out_if_name, next_hop_ip))
+ "ip route add {} via host-{} {}".format(subnet, out_if_name, next_hop_ip)
+ )
def set_ipv6_route2(self, out_if_name, next_hop_ip, subnet):
self.vppctl_exec(
- "ip route add {} via {} {}".format(
- subnet, out_if_name, next_hop_ip))
+ "ip route add {} via {} {}".format(subnet, out_if_name, next_hop_ip)
+ )
def set_ip_pgroute(self, out_if_name, next_hop_ip, subnet):
- self.vppctl_exec("ip route add {} via {} {}".format(
- subnet, out_if_name, next_hop_ip))
+ self.vppctl_exec(
+ "ip route add {} via {} {}".format(subnet, out_if_name, next_hop_ip)
+ )
def set_ipv6_pgroute(self, out_if_name, next_hop_ip, subnet):
- self.vppctl_exec("ip route add {} via {} {}".format(
- subnet, out_if_name, next_hop_ip))
+ self.vppctl_exec(
+ "ip route add {} via {} {}".format(subnet, out_if_name, next_hop_ip)
+ )
def set_ipv6_default_route(self, out_if_name, next_hop_ip):
self.vppctl_exec(
- "ip route add ::/0 via host-{} {}".format(
- out_if_name, next_hop_ip))
+ "ip route add ::/0 via host-{} {}".format(out_if_name, next_hop_ip)
+ )
def enable_trace(self, count):
self.vppctl_exec("trace add af-packet-input {}".format(count))
class Containers(object):
-
def __init__(self, client, image):
self.client = client
self.image = image
def tmp_render(self, path, template, kwargs):
-
with open(path, "w") as fo:
fo.write(template.render(**kwargs))
register(lambda: remove(path))
def build(self, path, vpp_path):
- env = Environment(loader=FileSystemLoader(path),
- autoescape=True,
- trim_blocks=True)
-
- self.tmp_render(join(vpp_path, "Dockerfile"),
- env.get_template("Dockerfile.j2"),
- {'vpp_path': vpp_path})
-
- self.tmp_render(join(vpp_path, "startup.conf"),
- env.get_template("startup.conf.j2"),
- {'vpp_path': vpp_path})
-
- ref, _ = self.client.images.build(path=vpp_path,
- tag=self.image, rm=True)
+ env = Environment(
+ loader=FileSystemLoader(path), autoescape=True, trim_blocks=True
+ )
+
+ self.tmp_render(
+ join(vpp_path, "Dockerfile"),
+ env.get_template("Dockerfile.j2"),
+ {"vpp_path": vpp_path},
+ )
+
+ self.tmp_render(
+ join(vpp_path, "startup.conf"),
+ env.get_template("startup.conf.j2"),
+ {"vpp_path": vpp_path},
+ )
+
+ ref, _ = self.client.images.build(path=vpp_path, tag=self.image, rm=True)
return ref
def release(self, path, vpp_path):
- env = Environment(loader=FileSystemLoader(path),
- autoescape=True,
- trim_blocks=True)
-
- self.tmp_render(join(vpp_path, "Dockerfile"),
- env.get_template("Dockerfile.j2.release"),
- {'vpp_path': vpp_path})
-
- self.tmp_render(join(vpp_path, "startup.conf"),
- env.get_template("startup.conf.j2"),
- {'vpp_path': vpp_path})
-
- ref, _ = self.client.images.build(path=vpp_path,
- tag="srv6m-release-image", rm=True)
+ env = Environment(
+ loader=FileSystemLoader(path), autoescape=True, trim_blocks=True
+ )
+
+ self.tmp_render(
+ join(vpp_path, "Dockerfile"),
+ env.get_template("Dockerfile.j2.release"),
+ {"vpp_path": vpp_path},
+ )
+
+ self.tmp_render(
+ join(vpp_path, "startup.conf"),
+ env.get_template("startup.conf.j2"),
+ {"vpp_path": vpp_path},
+ )
+
+ ref, _ = self.client.images.build(
+ path=vpp_path, tag="srv6m-release-image", rm=True
+ )
return ref
def new(self, name):
@@ -299,7 +305,6 @@ class Containers(object):
class Network(object):
-
def __init__(self, ref, name):
self._name = name
self._ref = ref
@@ -310,8 +315,7 @@ class Network(object):
@classmethod
def new(cls, client, name):
- ref = client.networks.create(name, driver="bridge",
- check_duplicate=True)
+ ref = client.networks.create(name, driver="bridge", check_duplicate=True)
return cls(ref, name)
@classmethod
@@ -331,7 +335,6 @@ class Network(object):
class Networks(object):
-
def __init__(self, client):
self.client = client
@@ -343,7 +346,6 @@ class Networks(object):
class Program(object):
-
image = "srv6m-image"
name_prefix = "hck"
@@ -352,14 +354,9 @@ class Program(object):
# for exmaple what the vpp is supposed to be
# in our topoloty overview
- instance_names = ["vpp-1",
- "vpp-2",
- "vpp-3",
- "vpp-4"]
+ instance_names = ["vpp-1", "vpp-2", "vpp-3", "vpp-4"]
- network_names = ["net-1",
- "net-2",
- "net-3"]
+ network_names = ["net-1", "net-2", "net-3"]
def __init__(self, image=None, prefix=None):
self.path = dirname(realpath(__file__))
@@ -385,7 +382,6 @@ class Program(object):
return "{}-{}".format(self.name_prefix, name)
def stop_containers(self):
-
for name in self.instance_names:
instance = self.containers.get(self.get_name(name))
if instance:
@@ -397,7 +393,6 @@ class Program(object):
network.rem()
def start_containers(self):
-
self.stop_containers()
networks = list()
@@ -469,15 +464,20 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="B::2") / ICMPv6EchoRequest())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="B::2")
+ / ICMPv6EchoRequest()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -516,16 +516,17 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr D1::")
- c1.vppctl_exec(
- "sr policy add bsid D1::999:1 next D2:: next D3:: next D4::")
+ c1.vppctl_exec("sr policy add bsid D1::999:1 next D2:: next D3:: next D4::")
c1.vppctl_exec("sr steer l3 B::/120 via bsid D1::999:1")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -539,8 +540,11 @@ class Program(object):
c3.set_ipv6_route("eth2", "A3::2", "D4::/128")
c3.set_ipv6_route("eth1", "A2::1", "C::/120")
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="B::2") / ICMPv6EchoRequest())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="B::2")
+ / ICMPv6EchoRequest()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -561,7 +565,7 @@ class Program(object):
for p in c4.pg_read_packets():
p.show2()
- ''' T.Map is obsolete
+ """ T.Map is obsolete
def test_tmap(self):
# TESTS:
# trace add af-packet-input 10
@@ -844,7 +848,7 @@ class Program(object):
print("Receiving packet on {}:".format(c4.name))
for p in c4.pg_read_packets():
p.show2()
- '''
+ """
def test_gtp4(self):
# TESTS:
@@ -863,16 +867,20 @@ class Program(object):
local_ip="172.16.0.1/30",
remote_ip="172.16.0.2/30",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec("sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4")
+ c1.vppctl_exec(
+ "sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4"
+ )
c1.vppctl_exec("sr steer l3 172.20.0.1/32 via bsid D5::")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -880,8 +888,8 @@ class Program(object):
c3.vppctl_exec("sr localsid address D3:: behavior end")
c4.vppctl_exec(
- "sr localsid prefix D4::/32 "
- "behavior end.m.gtp4.e v4src_position 64")
+ "sr localsid prefix D4::/32 " "behavior end.m.gtp4.e v4src_position 64"
+ )
c2.set_ipv6_route("eth2", "A2::2", "D3::/128")
c2.set_ipv6_route("eth1", "A1::1", "C::/120")
@@ -889,17 +897,19 @@ class Program(object):
c3.set_ipv6_route("eth1", "A2::1", "C::/120")
c4.set_ip_pgroute("pg0", "1.0.0.1", "172.20.0.1/32")
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IP(src="172.20.0.2", dst="172.20.0.1") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- IP(src="172.99.0.1", dst="172.99.0.2") /
- ICMP())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IP(src="172.20.0.2", dst="172.20.0.1")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / IP(src="172.99.0.1", dst="172.99.0.2")
+ / ICMP()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
- time.sleep(10)
+ time.sleep(10)
c1.enable_trace(10)
c4.enable_trace(10)
@@ -933,16 +943,20 @@ class Program(object):
local_ip="172.16.0.1/30",
remote_ip="172.16.0.2/30",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:1111:aaaa:bbbb::")
- c1.vppctl_exec("sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4")
+ c1.vppctl_exec(
+ "sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4"
+ )
c1.vppctl_exec("sr steer l3 172.20.0.1/32 via bsid D5::")
c2.vppctl_exec("sr localsid prefix D2:1111:aaaa::/48 behavior end usid 16")
@@ -950,8 +964,8 @@ class Program(object):
c3.vppctl_exec("sr localsid prefix D2:1111:bbbb::/48 behavior end usid 16")
c4.vppctl_exec(
- "sr localsid prefix D4::/32 "
- "behavior end.m.gtp4.e v4src_position 64")
+ "sr localsid prefix D4::/32 " "behavior end.m.gtp4.e v4src_position 64"
+ )
c2.set_ipv6_route("eth2", "A2::2", "D2:1111:bbbb::/48")
c2.set_ipv6_route("eth1", "A1::1", "C::/120")
@@ -959,17 +973,19 @@ class Program(object):
c3.set_ipv6_route("eth1", "A2::1", "C::/120")
c4.set_ip_pgroute("pg0", "1.0.0.1", "172.20.0.1/32")
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IP(src="172.20.0.2", dst="172.20.0.1") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- IP(src="172.99.0.1", dst="172.99.0.2") /
- ICMP())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IP(src="172.20.0.2", dst="172.20.0.1")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / IP(src="172.99.0.1", dst="172.99.0.2")
+ / ICMP()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
- time.sleep(10)
+ time.sleep(10)
c1.enable_trace(10)
c4.enable_trace(10)
@@ -1003,16 +1019,20 @@ class Program(object):
local_ip="172.16.0.1/30",
remote_ip="172.16.0.2/30",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec("sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4")
+ c1.vppctl_exec(
+ "sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4"
+ )
c1.vppctl_exec("sr steer l3 172.20.0.1/32 via bsid D5::")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1020,8 +1040,8 @@ class Program(object):
c3.vppctl_exec("sr localsid address D3:: behavior end")
c4.vppctl_exec(
- "sr localsid prefix D4::/32 "
- "behavior end.m.gtp4.e v4src_position 64")
+ "sr localsid prefix D4::/32 " "behavior end.m.gtp4.e v4src_position 64"
+ )
c2.set_ipv6_route("eth2", "A2::2", "D3::/128")
c2.set_ipv6_route("eth1", "A1::1", "C::/120")
@@ -1029,13 +1049,15 @@ class Program(object):
c3.set_ipv6_route("eth1", "A2::1", "C::/120")
c4.set_ip_pgroute("pg0", "1.0.0.1", "172.20.0.1/32")
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IP(src="172.20.0.2", dst="172.20.0.1") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- GTPPDUSessionContainer(type=1, R=1, QFI=3) /
- IP(src="172.99.0.1", dst="172.99.0.2") /
- ICMP())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IP(src="172.20.0.2", dst="172.20.0.1")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / GTPPDUSessionContainer(type=1, R=1, QFI=3)
+ / IP(src="172.99.0.1", dst="172.99.0.2")
+ / ICMP()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1073,16 +1095,20 @@ class Program(object):
local_ip="172.16.0.1/30",
remote_ip="172.16.0.2/30",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec("sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4")
+ c1.vppctl_exec(
+ "sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4"
+ )
c1.vppctl_exec("sr steer l3 172.20.0.1/32 via bsid D5::")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1090,8 +1116,8 @@ class Program(object):
c3.vppctl_exec("sr localsid address D3:: behavior end")
c4.vppctl_exec(
- "sr localsid prefix D4::/32 "
- "behavior end.m.gtp4.e v4src_position 64")
+ "sr localsid prefix D4::/32 " "behavior end.m.gtp4.e v4src_position 64"
+ )
c2.set_ipv6_route("eth2", "A2::2", "D3::/128")
c2.set_ipv6_route("eth1", "A1::1", "C::/120")
@@ -1099,10 +1125,12 @@ class Program(object):
c3.set_ipv6_route("eth1", "A2::1", "C::/120")
c4.set_ip_pgroute("pg0", "1.0.0.1", "172.20.0.1/32")
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IP(src="172.20.0.2", dst="172.20.0.1") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="echo_request", S=1, teid=200, seq=200))
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IP(src="172.20.0.2", dst="172.20.0.1")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="echo_request", S=1, teid=200, seq=200)
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1140,16 +1168,20 @@ class Program(object):
local_ip="172.16.0.1/30",
remote_ip="172.16.0.2/30",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec("sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4")
+ c1.vppctl_exec(
+ "sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4"
+ )
c1.vppctl_exec("sr steer l3 172.20.0.1/32 via bsid D5::")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1157,8 +1189,8 @@ class Program(object):
c3.vppctl_exec("sr localsid address D3:: behavior end")
c4.vppctl_exec(
- "sr localsid prefix D4::/32 "
- "behavior end.m.gtp4.e v4src_position 64")
+ "sr localsid prefix D4::/32 " "behavior end.m.gtp4.e v4src_position 64"
+ )
c2.set_ipv6_route("eth2", "A2::2", "D3::/128")
c2.set_ipv6_route("eth1", "A1::1", "C::/120")
@@ -1166,10 +1198,12 @@ class Program(object):
c3.set_ipv6_route("eth1", "A2::1", "C::/120")
c4.set_ip_pgroute("pg0", "1.0.0.1", "172.20.0.1/32")
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IP(src="172.20.0.2", dst="172.20.0.1") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="echo_response", S=1, teid=200, seq=200))
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IP(src="172.20.0.2", dst="172.20.0.1")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="echo_response", S=1, teid=200, seq=200)
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1207,16 +1241,20 @@ class Program(object):
local_ip="172.16.0.1/30",
remote_ip="172.16.0.2/30",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec("sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4")
+ c1.vppctl_exec(
+ "sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4"
+ )
c1.vppctl_exec("sr steer l3 172.20.0.1/32 via bsid D5::")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1224,8 +1262,8 @@ class Program(object):
c3.vppctl_exec("sr localsid address D3:: behavior end")
c4.vppctl_exec(
- "sr localsid prefix D4::/32 "
- "behavior end.m.gtp4.e v4src_position 64")
+ "sr localsid prefix D4::/32 " "behavior end.m.gtp4.e v4src_position 64"
+ )
c2.set_ipv6_route("eth2", "A2::2", "D3::/128")
c2.set_ipv6_route("eth1", "A1::1", "C::/120")
@@ -1233,12 +1271,15 @@ class Program(object):
c3.set_ipv6_route("eth1", "A2::1", "C::/120")
c4.set_ip_pgroute("pg0", "1.0.0.1", "172.20.0.1/32")
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IP(src="172.20.0.2", dst="172.20.0.1") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="error_indication", S=1, teid=200, seq=200)/
- IE_TEIDI(TEIDI=65535)/IE_GSNAddress(address="1.1.1.1")/
- IE_PrivateExtension(extention_value="z"))
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IP(src="172.20.0.2", dst="172.20.0.1")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="error_indication", S=1, teid=200, seq=200)
+ / IE_TEIDI(TEIDI=65535)
+ / IE_GSNAddress(address="1.1.1.1")
+ / IE_PrivateExtension(extention_value="z")
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1276,16 +1317,20 @@ class Program(object):
local_ip="172.16.0.1/30",
remote_ip="172.16.0.2/30",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec("sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64")
+ c1.vppctl_exec(
+ "sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64"
+ )
c1.vppctl_exec("sr steer l3 172.20.0.1/32 via bsid D5::")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1293,8 +1338,8 @@ class Program(object):
c3.vppctl_exec("sr localsid address D3:: behavior end")
c4.vppctl_exec(
- "sr localsid prefix D4::/32 "
- "behavior end.m.gtp4.e v4src_position 64")
+ "sr localsid prefix D4::/32 " "behavior end.m.gtp4.e v4src_position 64"
+ )
c2.set_ipv6_route("eth2", "A2::2", "D3::/128")
c2.set_ipv6_route("eth1", "A1::1", "C::/120")
@@ -1302,12 +1347,14 @@ class Program(object):
c3.set_ipv6_route("eth1", "A2::1", "C::/120")
c4.set_ip_pgroute("pg0", "1.0.0.1", "172.20.0.1/32")
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IP(src="172.20.0.2", dst="172.20.0.1") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- IPv6(src="2001::1", dst="2002::1") /
- ICMPv6EchoRequest())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IP(src="172.20.0.2", dst="172.20.0.1")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / IPv6(src="2001::1", dst="2002::1")
+ / ICMPv6EchoRequest()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1345,16 +1392,20 @@ class Program(object):
local_ip="172.16.0.1/30",
remote_ip="172.16.0.2/30",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec("sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64")
+ c1.vppctl_exec(
+ "sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64"
+ )
c1.vppctl_exec("sr steer l3 172.20.0.1/32 via bsid D5::")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1362,8 +1413,8 @@ class Program(object):
c3.vppctl_exec("sr localsid address D3:: behavior end")
c4.vppctl_exec(
- "sr localsid prefix D4::/32 "
- "behavior end.m.gtp4.e v4src_position 64")
+ "sr localsid prefix D4::/32 " "behavior end.m.gtp4.e v4src_position 64"
+ )
c2.set_ipv6_route("eth2", "A2::2", "D3::/128")
c2.set_ipv6_route("eth1", "A1::1", "C::/120")
@@ -1371,13 +1422,15 @@ class Program(object):
c3.set_ipv6_route("eth1", "A2::1", "C::/120")
c4.set_ip_pgroute("pg0", "1.0.0.1", "172.20.0.1/32")
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IP(src="172.20.0.2", dst="172.20.0.1") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- GTPPDUSessionContainer(R=1, QFI=3) /
- IPv6(src="2001::1", dst="2002::1") /
- ICMPv6EchoRequest())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IP(src="172.20.0.2", dst="172.20.0.1")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / GTPPDUSessionContainer(R=1, QFI=3)
+ / IPv6(src="2001::1", dst="2002::1")
+ / ICMPv6EchoRequest()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1419,18 +1472,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1447,12 +1501,14 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- IP(src="172.99.0.1", dst="172.99.0.2") /
- ICMP())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / IP(src="172.99.0.1", dst="172.99.0.2")
+ / ICMP()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1494,18 +1550,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1522,13 +1579,15 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- GTPPDUSessionContainer(type=1, R=1, QFI=3) /
- IP(src="172.99.0.1", dst="172.99.0.2") /
- ICMP())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / GTPPDUSessionContainer(type=1, R=1, QFI=3)
+ / IP(src="172.99.0.1", dst="172.99.0.2")
+ / ICMP()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1570,18 +1629,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1598,10 +1658,12 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="echo_request", S=1, teid=200, seq=300))
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="echo_request", S=1, teid=200, seq=300)
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1643,18 +1705,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1671,10 +1734,12 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="echo_response", S=1, teid=200, seq=300))
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="echo_response", S=1, teid=200, seq=300)
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1716,18 +1781,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1744,12 +1810,15 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="error_indication", S=1, teid=200, seq=300)/
- IE_TEIDI(TEIDI=65535)/IE_GSNAddress(address="1.1.1.1")/
- IE_PrivateExtension(extention_value="z"))
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="error_indication", S=1, teid=200, seq=300)
+ / IE_TEIDI(TEIDI=65535)
+ / IE_GSNAddress(address="1.1.1.1")
+ / IE_PrivateExtension(extention_value="z")
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1791,18 +1860,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1819,12 +1889,14 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- IPv6(src="2001::1", dst="2002::1") /
- ICMPv6EchoRequest())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / IPv6(src="2001::1", dst="2002::1")
+ / ICMPv6EchoRequest()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1866,18 +1938,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d.di D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1894,13 +1967,15 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- GTPPDUSessionContainer(R=1, QFI=3) /
- IPv6(src="2001::1", dst="2002::1") /
- ICMPv6EchoRequest())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / GTPPDUSessionContainer(R=1, QFI=3)
+ / IPv6(src="2001::1", dst="2002::1")
+ / ICMPv6EchoRequest()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -1942,18 +2017,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -1971,12 +2047,14 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- IP(src="172.100.0.1", dst="172.200.0.1") /
- ICMP())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / IP(src="172.100.0.1", dst="172.200.0.1")
+ / ICMP()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -2018,18 +2096,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface4(
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -2047,13 +2126,15 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- GTPPDUSessionContainer(R=1, QFI=3) /
- IP(src="172.100.0.1", dst="172.200.0.1") /
- ICMP())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / GTPPDUSessionContainer(R=1, QFI=3)
+ / IP(src="172.100.0.1", dst="172.200.0.1")
+ / ICMP()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -2095,18 +2176,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -2124,12 +2206,14 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- IPv6(src="2001::1", dst="2002::1") /
- ICMPv6EchoRequest())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / IPv6(src="2001::1", dst="2002::1")
+ / ICMPv6EchoRequest()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -2171,18 +2255,19 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c4.pg_create_interface(
local_ip="B::1/120",
remote_ip="B::2",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D4:: next D2:: next D3::")
- c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.d D4::/64")
+ c1.vppctl_exec("sr localsid prefix D::/64 behavior end.m.gtp6.d D4::/64")
c2.vppctl_exec("sr localsid address D2:: behavior end")
@@ -2200,13 +2285,15 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- GTPPDUSessionContainer(R=1, QFI=3) /
- IPv6(src="2001::1", dst="2002::1") /
- ICMPv6EchoRequest())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / GTPPDUSessionContainer(R=1, QFI=3)
+ / IPv6(src="2001::1", dst="2002::1")
+ / ICMPv6EchoRequest()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -2246,19 +2333,22 @@ class Program(object):
local_ip="C::1/120",
remote_ip="C::2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c1.pg_create_interface4_name(
ifname="pg1",
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec(
- "sr localsid prefix D::/64 behavior end.m.gtp6.dt46 fib-table 0 local-fib-table 0")
+ "sr localsid prefix D::/64 behavior end.m.gtp6.dt46 fib-table 0 local-fib-table 0"
+ )
c1.vppctl_exec("set ip neighbor pg1 1.0.0.1 aa:bb:cc:dd:ee:22")
c1.set_ip_pgroute("pg1", "1.0.0.1", "172.200.0.1/32")
@@ -2266,12 +2356,14 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IPv6(src="C::2", dst="D::2") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- IP(src="172.100.0.1", dst="172.200.0.1") /
- ICMP())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IPv6(src="C::2", dst="D::2")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / IP(src="172.100.0.1", dst="172.200.0.1")
+ / ICMP()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -2310,14 +2402,16 @@ class Program(object):
local_ip="172.16.0.1/30",
remote_ip="172.16.0.2",
local_mac="aa:bb:cc:dd:ee:01",
- remote_mac="aa:bb:cc:dd:ee:02")
+ remote_mac="aa:bb:cc:dd:ee:02",
+ )
c1.pg_create_interface4_name(
ifname="pg1",
local_ip="1.0.0.2/30",
remote_ip="1.0.0.1",
local_mac="aa:bb:cc:dd:ee:11",
- remote_mac="aa:bb:cc:dd:ee:22")
+ remote_mac="aa:bb:cc:dd:ee:22",
+ )
c1.vppctl_exec("set sr encaps source addr A1::1")
c1.vppctl_exec("sr policy add bsid D5:: behavior t.m.gtp4.dt4 fib-table 0")
@@ -2329,12 +2423,14 @@ class Program(object):
print("Waiting...")
time.sleep(30)
- p = (Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01") /
- IP(src="172.20.0.2", dst="172.20.0.1") /
- UDP(sport=2152, dport=2152) /
- GTP_U_Header(gtp_type="g_pdu", teid=200) /
- IP(src="172.100.0.1", dst="172.200.0.1") /
- ICMP())
+ p = (
+ Ether(src="aa:bb:cc:dd:ee:02", dst="aa:bb:cc:dd:ee:01")
+ / IP(src="172.20.0.2", dst="172.20.0.1")
+ / UDP(sport=2152, dport=2152)
+ / GTP_U_Header(gtp_type="g_pdu", teid=200)
+ / IP(src="172.100.0.1", dst="172.200.0.1")
+ / ICMP()
+ )
print("Sending packet on {}:".format(c1.name))
p.show2()
@@ -2355,22 +2451,25 @@ class Program(object):
p.show2()
def status_containers(self):
-
print("Instances:")
for i, name in enumerate(self.instance_names):
name = self.get_name(name)
- print("\t[{}] {} - {}".format(
- i, name,
- "running" if self.containers.get(name) else "missing"))
+ print(
+ "\t[{}] {} - {}".format(
+ i, name, "running" if self.containers.get(name) else "missing"
+ )
+ )
print("Networks:")
for i, name in enumerate(self.network_names):
name = self.get_name(name)
- print("\t[{}] {} - {}".format(
- i, name,
- "running" if self.networks.get(name) else "missing"))
+ print(
+ "\t[{}] {} - {}".format(
+ i, name, "running" if self.networks.get(name) else "missing"
+ )
+ )
def build_image(self):
print("VPP Path (build): {}".format(self.vpp_path))
@@ -2382,7 +2481,9 @@ class Program(object):
system(
"docker cp release-build:{}/vpp-package.tgz {}/".format(
- self.vpp_path, self.vpp_path))
+ self.vpp_path, self.vpp_path
+ )
+ )
instance.rem()
@@ -2408,39 +2509,30 @@ class Program(object):
def get_args():
parser = ArgumentParser()
- parser.add_argument("--verbose", choices=['error', 'debug', 'info'])
+ parser.add_argument("--verbose", choices=["error", "debug", "info"])
- parser.add_argument('--image', choices=['debug', 'release'])
+ parser.add_argument("--image", choices=["debug", "release"])
subparsers = parser.add_subparsers()
- p1 = subparsers.add_parser(
- "infra", help="Infrastructure related commands.")
+ p1 = subparsers.add_parser("infra", help="Infrastructure related commands.")
p1.add_argument(
- "op",
- choices=[
- 'stop',
- 'start',
- 'status',
- 'restart',
- 'build',
- 'release'])
+ "op", choices=["stop", "start", "status", "restart", "build", "release"]
+ )
p1.add_argument("--prefix")
p1.add_argument("--image")
p2 = subparsers.add_parser("cmd", help="Instance related commands.")
- p2.add_argument("op", choices=['vppctl', 'bash'])
+ p2.add_argument("op", choices=["vppctl", "bash"])
p2.add_argument(
- "index",
- type=int,
- help="Container instance index. (./runner.py infra status)")
+ "index", type=int, help="Container instance index. (./runner.py infra status)"
+ )
- p2.add_argument(
- "--command", help="Only vppctl supports this optional argument.")
+ p2.add_argument("--command", help="Only vppctl supports this optional argument.")
p3 = subparsers.add_parser("test", help="Test related commands.")
@@ -2473,7 +2565,9 @@ def get_args():
"gtp6_ipv6",
"gtp6_ipv6_5g",
"gtp6_dt",
- "gtp4_dt"])
+ "gtp4_dt",
+ ],
+ )
args = parser.parse_args()
if not hasattr(args, "op") or not args.op:
@@ -2483,15 +2577,13 @@ def get_args():
return vars(args)
-def main(op=None, prefix=None, verbose=None,
- image=None, index=None, command=None):
-
+def main(op=None, prefix=None, verbose=None, image=None, index=None, command=None):
if verbose:
basicConfig(level=verbose_levels[verbose])
- if image == 'release':
+ if image == "release":
image = "srv6m-release-image"
- elif image == 'debug':
+ elif image == "debug":
image = "srv6m-image"
else:
image = "srv6m-image"
@@ -2501,23 +2593,23 @@ def main(op=None, prefix=None, verbose=None,
program = Program(image, prefix)
try:
- if op == 'build':
+ if op == "build":
program.build_image()
- elif op == 'release':
+ elif op == "release":
program.release_image()
- elif op == 'stop':
+ elif op == "stop":
program.stop_containers()
- elif op == 'start':
+ elif op == "start":
program.start_containers()
- elif op == 'status':
+ elif op == "status":
program.status_containers()
- elif op == 'vppctl':
+ elif op == "vppctl":
program.vppctl(index, command)
- elif op == 'bash':
+ elif op == "bash":
program.bash(index)
- elif op == 'ping':
+ elif op == "ping":
program.test_ping()
- elif op == 'srv6':
+ elif op == "srv6":
program.test_srv6()
# elif op == 'tmap':
# program.test_tmap()
@@ -2527,47 +2619,47 @@ def main(op=None, prefix=None, verbose=None,
# program.test_tmap_ipv6()
# elif op == 'tmap_ipv6_5g':
# program.test_tmap_ipv6_5g()
- elif op == 'gtp4':
+ elif op == "gtp4":
program.test_gtp4()
- elif op == 'gtp4_usid':
+ elif op == "gtp4_usid":
program.test_gtp4_usid()
- elif op == 'gtp4_5g':
+ elif op == "gtp4_5g":
program.test_gtp4_5g()
- elif op == 'gtp4_echo':
+ elif op == "gtp4_echo":
program.test_gtp4_echo()
- elif op == 'gtp4_reply':
+ elif op == "gtp4_reply":
program.test_gtp4_reply()
- elif op == 'gtp4_error':
+ elif op == "gtp4_error":
program.test_gtp4_error()
- elif op == 'gtp4_ipv6':
+ elif op == "gtp4_ipv6":
program.test_gtp4_ipv6()
- elif op == 'gtp4_ipv6_5g':
+ elif op == "gtp4_ipv6_5g":
program.test_gtp4_ipv6_5g()
- elif op == 'gtp6_drop_in':
+ elif op == "gtp6_drop_in":
program.test_gtp6_drop_in()
- elif op == 'gtp6_drop_in_5g':
+ elif op == "gtp6_drop_in_5g":
program.test_gtp6_drop_in_5g()
- elif op == 'gtp6_drop_in_echo':
+ elif op == "gtp6_drop_in_echo":
program.test_gtp6_drop_in_echo()
- elif op == 'gtp6_drop_in_reply':
+ elif op == "gtp6_drop_in_reply":
program.test_gtp6_drop_in_reply()
- elif op == 'gtp6_drop_in_error':
+ elif op == "gtp6_drop_in_error":
program.test_gtp6_drop_in_error()
- elif op == 'gtp6_drop_in_ipv6':
+ elif op == "gtp6_drop_in_ipv6":
program.test_gtp6_drop_in_ipv6()
- elif op == 'gtp6_drop_in_ipv6_5g':
+ elif op == "gtp6_drop_in_ipv6_5g":
program.test_gtp6_drop_in_ipv6_5g()
- elif op == 'gtp6':
+ elif op == "gtp6":
program.test_gtp6()
- elif op == 'gtp6_5g':
+ elif op == "gtp6_5g":
program.test_gtp6_5g()
- elif op == 'gtp6_ipv6':
+ elif op == "gtp6_ipv6":
program.test_gtp6_ipv6()
- elif op == 'gtp6_ipv6_5g':
+ elif op == "gtp6_ipv6_5g":
program.test_gtp6_ipv6_5g()
- elif op == 'gtp6_dt':
+ elif op == "gtp6_dt":
program.test_gtp6_dt()
- elif op == 'gtp4_dt':
+ elif op == "gtp4_dt":
program.test_gtp4_dt()
except Exception:
diff --git a/src/plugins/srv6-mobile/extra/runner_doc.md b/src/plugins/srv6-mobile/extra/runner_doc.md
deleted file mode 100644
index 64f06d77299..00000000000
--- a/src/plugins/srv6-mobile/extra/runner_doc.md
+++ /dev/null
@@ -1,105 +0,0 @@
-# What's `runner.py` doing? {#srv6_mobile_runner_doc}
-
-## Common configurations
-
-### VPP1
-```
-create host-interface name eth1
-set int ip addr host-eth1 A1::1/120
-set int state host-eth1 up
-ip route add ::/0 via host-eth1 A1::2
-```
-
-
-### VPP2
-
-```
-create host-interface name eth1
-set int ip addr host-eth1 A1::2/120
-create host-interface name eth2
-set int ip addr host-eth2 A2::1/120
-set int state host-eth1 up
-set int state host-eth2 up
-ip route add ::/0 via host-eth2 A2::2
-```
-
-
-### VPP3
-
-```
-create host-interface name eth1
-set int ip addr host-eth1 A2::2/120
-create host-interface name eth2
-set int ip addr host-eth2 A3::1/120
-set int state host-eth1 up
-set int state host-eth2 up
-ip route add ::/0 via host-eth1 A2::1
-```
-
-### VPP4
-
-```
-create host-interface name eth1
-set int ip addr host-eth1 A3::2/120
-set int state host-eth1 up
-ip route add ::/0 via host-eth1 A3::1
-```
-
-
-## Drop-in for GTP-U over IPv4
-
-Drop-in mode is handy to test both GTP-U-to-SRv6 and SRv6-to-GTP-U functions at same time. Let's see what's happened when you run `test gtp4`:
-
- $ ./runner.py test gtp4
-
-
-Setting up a virtual interface of packet generator:
-
-#### VPP1
-
-```
-create packet-generator interface pg0
-set int mac address pg0 aa:bb:cc:dd:ee:01
-set int ip addr pg0 172.16.0.1/30
-set ip arp pg0 172.16.0.2/30 aa:bb:cc:dd:ee:02
-```
-
-#### VPP4
-
-```
-create packet-generator interface pg0
-set int mac address pg0 aa:bb:cc:dd:ee:11
-set int ip addr pg0 1.0.0.2/30
-set ip arp pg0 1.0.0.1 aa:bb:cc:dd:ee:22
-```
-
-SRv6 and IP routing settings:
-
-#### VPP1
-
-```
-sr policy add bsid D4:: next D2:: next D3::
-sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4
-sr steer l3 172.20.0.1/32 via bsid D5::
-```
-
-#### VPP2
-
-```
-sr localsid address D2:: behavior end
-ip route add D3::/128 via host-eth2 A2::2
-```
-
-#### VPP3
-
-```
-sr localsid address D3:: behavior end
-ip route add D4::/32 via host-eth2 A3::2
-```
-
-#### VPP4
-
-```
-sr localsid prefix D4::/32 behavior end.m.gtp4.e v4src_position 64
-ip route add 172.20.0.1/32 via pg0 1.0.0.1
-```
diff --git a/src/plugins/srv6-mobile/extra/runner_doc.rst b/src/plugins/srv6-mobile/extra/runner_doc.rst
new file mode 100644
index 00000000000..b5be91cbfc8
--- /dev/null
+++ b/src/plugins/srv6-mobile/extra/runner_doc.rst
@@ -0,0 +1,135 @@
+.. _srv6_mobile_runner_doc:
+
+SRv6 Mobile Runner
+==================
+
+What’s ``runner.py`` doing?
+
+Common configurations
+---------------------
+
+VPP1
+~~~~
+
+::
+
+ create host-interface name eth1
+ set int ip addr host-eth1 A1::1/120
+ set int state host-eth1 up
+ ip route add ::/0 via host-eth1 A1::2
+
+VPP2
+~~~~
+
+::
+
+ create host-interface name eth1
+ set int ip addr host-eth1 A1::2/120
+ create host-interface name eth2
+ set int ip addr host-eth2 A2::1/120
+ set int state host-eth1 up
+ set int state host-eth2 up
+ ip route add ::/0 via host-eth2 A2::2
+
+VPP3
+~~~~
+
+::
+
+ create host-interface name eth1
+ set int ip addr host-eth1 A2::2/120
+ create host-interface name eth2
+ set int ip addr host-eth2 A3::1/120
+ set int state host-eth1 up
+ set int state host-eth2 up
+ ip route add ::/0 via host-eth1 A2::1
+
+VPP4
+~~~~
+
+::
+
+ create host-interface name eth1
+ set int ip addr host-eth1 A3::2/120
+ set int state host-eth1 up
+ ip route add ::/0 via host-eth1 A3::1
+
+Drop-in for GTP-U over IPv4
+---------------------------
+
+Drop-in mode is handy to test both GTP-U-to-SRv6 and SRv6-to-GTP-U
+functions at same time. Let’s see what’s happened when you run
+``test gtp4``:
+
+::
+
+ $ ./runner.py test gtp4
+
+Setting up a virtual interface of packet generator:
+
+.. _vpp1-1:
+
+VPP1
+~~~~
+
+::
+
+ create packet-generator interface pg0
+ set int mac address pg0 aa:bb:cc:dd:ee:01
+ set int ip addr pg0 172.16.0.1/30
+ set ip arp pg0 172.16.0.2/30 aa:bb:cc:dd:ee:02
+
+.. _vpp4-1:
+
+VPP4
+~~~~
+
+::
+
+ create packet-generator interface pg0
+ set int mac address pg0 aa:bb:cc:dd:ee:11
+ set int ip addr pg0 1.0.0.2/30
+ set ip arp pg0 1.0.0.1 aa:bb:cc:dd:ee:22
+
+SRv6 and IP routing settings:
+
+.. _vpp1-2:
+
+VPP1
+~~~~
+
+::
+
+ sr policy add bsid D4:: next D2:: next D3::
+ sr policy add bsid D5:: behavior t.m.gtp4.d D4::/32 v6src_prefix C1::/64 nhtype ipv4
+ sr steer l3 172.20.0.1/32 via bsid D5::
+
+.. _vpp2-1:
+
+VPP2
+~~~~
+
+::
+
+ sr localsid address D2:: behavior end
+ ip route add D3::/128 via host-eth2 A2::2
+
+.. _vpp3-1:
+
+VPP3
+~~~~
+
+::
+
+ sr localsid address D3:: behavior end
+ ip route add D4::/32 via host-eth2 A3::2
+
+.. _vpp4-2:
+
+VPP4
+~~~~
+
+::
+
+ sr localsid prefix D4::/32 behavior end.m.gtp4.e v4src_position 64
+ ip route add 172.20.0.1/32 via pg0 1.0.0.1
diff --git a/src/plugins/srv6-mobile/gtp4_d.c b/src/plugins/srv6-mobile/gtp4_d.c
index 7bafa560810..f519b4840cc 100644
--- a/src/plugins/srv6-mobile/gtp4_d.c
+++ b/src/plugins/srv6-mobile/gtp4_d.c
@@ -68,12 +68,13 @@ static u8 keyword_str[] = "t.m.gtp4.d";
static u8 def_str[] =
"Transit function with decapsulation for IPv4/GTP tunnel";
static u8 param_str[] =
- "<sr-prefix>/<sr-prefixlen> v6src_prefix <v6src_prefix>/<prefixlen> [nhtype <nhtype>]";
+ "<sr-prefix>/<sr-prefixlen> v6src_prefix <v6src_prefix>/<prefixlen> [nhtype "
+ "<nhtype>] fib-table <id>";
static u8 *
clb_format_srv6_t_m_gtp4_d (u8 * s, va_list * args)
{
- srv6_end_gtp4_param_t *ls_mem = va_arg (*args, void *);
+ srv6_end_gtp4_d_param_t *ls_mem = va_arg (*args, void *);
s = format (s, "SRv6 T.M.GTP4.D\n\t");
@@ -88,75 +89,114 @@ clb_format_srv6_t_m_gtp4_d (u8 * s, va_list * args)
if (ls_mem->nhtype != SRV6_NHTYPE_NONE)
{
if (ls_mem->nhtype == SRV6_NHTYPE_IPV4)
- s = format (s, ", NHType IPv4\n");
+ s = format (s, ", NHType IPv4");
else if (ls_mem->nhtype == SRV6_NHTYPE_IPV6)
- s = format (s, ", NHType IPv6\n");
+ s = format (s, ", NHType IPv6");
else if (ls_mem->nhtype == SRV6_NHTYPE_NON_IP)
- s = format (s, ", NHType Non-IP\n");
+ s = format (s, ", NHType Non-IP");
else
- s = format (s, ", NHType Unknow(%d)\n", ls_mem->nhtype);
+ s = format (s, ", NHType Unknow(%d)", ls_mem->nhtype);
}
- else
- s = format (s, "\n");
+
+ s = format (s, ", FIB table %d", ls_mem->fib_table);
+
+ s = format (s, ", Drop In %d\n", ls_mem->drop_in);
return s;
}
+void
+alloc_param_srv6_t_m_gtp4_d (void **plugin_mem_p, const void *v6src_prefix,
+ const u32 v6src_prefixlen, const void *sr_prefix,
+ const u32 sr_prefixlen, const u32 fib_index,
+ const u8 nhtype, const bool drop_in)
+{
+ srv6_end_gtp4_d_param_t *ls_mem;
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
+ clib_memset (ls_mem, 0, sizeof *ls_mem);
+ *plugin_mem_p = ls_mem;
+
+ ls_mem->v6src_prefixlen = v6src_prefixlen;
+ memcpy (&ls_mem->v6src_prefix, v6src_prefix, sizeof (ip6_address_t));
+ ls_mem->sr_prefixlen = sr_prefixlen;
+ memcpy (&ls_mem->sr_prefix, sr_prefix, sizeof (ip6_address_t));
+
+ ls_mem->nhtype = nhtype;
+ ls_mem->drop_in = drop_in;
+ ls_mem->fib_table = fib_index;
+ ls_mem->fib4_index = ip4_fib_index_from_table_id (fib_index);
+ ls_mem->fib6_index = ip6_fib_index_from_table_id (fib_index);
+}
+
static uword
clb_unformat_srv6_t_m_gtp4_d (unformat_input_t * input, va_list * args)
{
void **plugin_mem_p = va_arg (*args, void **);
- srv6_end_gtp4_param_t *ls_mem;
ip6_address_t sr_prefix;
u32 sr_prefixlen;
ip6_address_t v6src_prefix;
u32 v6src_prefixlen;
- u8 nhtype;
+ u32 fib_table = 0;
+ bool drop_in = false;
+ u8 nhtype = SRV6_NHTYPE_NONE;
+ bool config = false;
- if (unformat (input, "t.m.gtp4.d %U/%d v6src_prefix %U/%d nhtype ipv4",
- unformat_ip6_address, &sr_prefix, &sr_prefixlen,
- unformat_ip6_address, &v6src_prefix, &v6src_prefixlen))
- {
- nhtype = SRV6_NHTYPE_IPV4;
- }
- else
- if (unformat
- (input, "t.m.gtp4.d %U/%d v6src_prefix %U/%d nhtype ipv6",
- unformat_ip6_address, &sr_prefix, &sr_prefixlen,
- unformat_ip6_address, &v6src_prefix, &v6src_prefixlen))
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- nhtype = SRV6_NHTYPE_IPV6;
- }
- else
- if (unformat
- (input, "t.m.gtp4.d %U/%d v6src_prefix %U/%d nhtype non-ip",
- unformat_ip6_address, &sr_prefix, &sr_prefixlen,
- unformat_ip6_address, &v6src_prefix, &v6src_prefixlen))
- {
- nhtype = SRV6_NHTYPE_NON_IP;
- }
- else if (unformat (input, "t.m.gtp4.d %U/%d v6src_prefix %U/%d",
- unformat_ip6_address, &sr_prefix, &sr_prefixlen,
- unformat_ip6_address, &v6src_prefix, &v6src_prefixlen))
- {
- nhtype = SRV6_NHTYPE_NONE;
+ if (unformat (
+ input,
+ "t.m.gtp4.d %U/%d v6src_prefix %U/%d nhtype ipv4 fib-table %d",
+ unformat_ip6_address, &sr_prefix, &sr_prefixlen,
+ unformat_ip6_address, &v6src_prefix, &v6src_prefixlen, &fib_table))
+ {
+ config = true;
+ nhtype = SRV6_NHTYPE_IPV4;
+ }
+ else if (unformat (input,
+ "t.m.gtp4.d %U/%d v6src_prefix %U/%d nhtype ipv6 "
+ "fib-table %d",
+ unformat_ip6_address, &sr_prefix, &sr_prefixlen,
+ unformat_ip6_address, &v6src_prefix, &v6src_prefixlen,
+ &fib_table))
+ {
+ config = true;
+ nhtype = SRV6_NHTYPE_IPV6;
+ }
+ else if (unformat (
+ input, "t.m.gtp4.d %U/%d v6src_prefix %U/%d nhtype non-ip",
+ unformat_ip6_address, &sr_prefix, &sr_prefixlen,
+ unformat_ip6_address, &v6src_prefix, &v6src_prefixlen))
+ {
+ config = true;
+ nhtype = SRV6_NHTYPE_NON_IP;
+ }
+ else if (unformat (input,
+ "t.m.gtp4.d %U/%d v6src_prefix %U/%d fib-table %d",
+ unformat_ip6_address, &sr_prefix, &sr_prefixlen,
+ unformat_ip6_address, &v6src_prefix, &v6src_prefixlen,
+ &fib_table))
+ {
+ config = true;
+ nhtype = SRV6_NHTYPE_NONE;
+ }
+ else if (unformat (input, "drop-in"))
+ {
+ drop_in = true;
+ }
+ else
+ {
+ return 0;
+ }
}
- else
+
+ if (!config)
{
return 0;
}
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
- clib_memset (ls_mem, 0, sizeof *ls_mem);
- *plugin_mem_p = ls_mem;
-
- ls_mem->sr_prefix = sr_prefix;
- ls_mem->sr_prefixlen = sr_prefixlen;
-
- ls_mem->v6src_prefix = v6src_prefix;
- ls_mem->v6src_prefixlen = v6src_prefixlen;
-
- ls_mem->nhtype = nhtype;
+ alloc_param_srv6_t_m_gtp4_d (plugin_mem_p, &v6src_prefix, v6src_prefixlen,
+ &sr_prefix, sr_prefixlen, fib_table, nhtype,
+ drop_in);
return 1;
}
@@ -170,9 +210,9 @@ clb_creation_srv6_t_m_gtp4_d (ip6_sr_policy_t * sr_policy)
static int
clb_removal_srv6_t_m_gtp4_d (ip6_sr_policy_t * sr_policy)
{
- srv6_end_gtp4_param_t *ls_mem;
+ srv6_end_gtp4_d_param_t *ls_mem;
- ls_mem = (srv6_end_gtp4_param_t *) sr_policy->plugin_mem;
+ ls_mem = (srv6_end_gtp4_d_param_t *) sr_policy->plugin_mem;
clib_mem_free (ls_mem);
@@ -220,7 +260,6 @@ srv6_t_m_gtp4_d_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_t_m_gtp4_d, static) =
{
.arc_name = "ip4-unicast",
@@ -229,7 +268,6 @@ VNET_FEATURE_INIT (srv6_t_m_gtp4_d, static) =
};
VLIB_INIT_FUNCTION (srv6_t_m_gtp4_d_init);
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-mobile/gtp4_dt.c b/src/plugins/srv6-mobile/gtp4_dt.c
index 44a4af34c2f..10cea640036 100644
--- a/src/plugins/srv6-mobile/gtp4_dt.c
+++ b/src/plugins/srv6-mobile/gtp4_dt.c
@@ -90,11 +90,31 @@ clb_format_srv6_t_m_gtp4_dt (u8 * s, va_list * args)
return s;
}
+void
+alloc_param_srv6_t_m_gtp4_dt (void **plugin_mem_p, const u32 fib_index,
+ const u32 local_fib_index, const u8 type)
+{
+ srv6_t_gtp4_dt_param_t *ls_mem;
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
+ clib_memset (ls_mem, 0, sizeof *ls_mem);
+ *plugin_mem_p = ls_mem;
+
+ ls_mem->fib4_index = fib_table_find (FIB_PROTOCOL_IP4, fib_index);
+ ls_mem->fib6_index = fib_table_find (FIB_PROTOCOL_IP6, fib_index);
+
+ if (type == SRV6_GTP4_DT6 || type == SRV6_GTP4_DT46)
+ {
+ ls_mem->local_fib_index =
+ fib_table_find (FIB_PROTOCOL_IP6, local_fib_index);
+ }
+
+ ls_mem->type = type;
+}
+
static uword
clb_unformat_srv6_t_m_gtp4_dt (unformat_input_t * input, va_list * args)
{
void **plugin_mem_p = va_arg (*args, void **);
- srv6_t_gtp4_dt_param_t *ls_mem;
u32 fib_index = 0;
u32 local_fib_index = 0;
u32 type;
@@ -118,20 +138,8 @@ clb_unformat_srv6_t_m_gtp4_dt (unformat_input_t * input, va_list * args)
return 0;
}
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
- clib_memset (ls_mem, 0, sizeof *ls_mem);
- *plugin_mem_p = ls_mem;
-
- ls_mem->fib4_index = fib_table_find (FIB_PROTOCOL_IP4, fib_index);
- ls_mem->fib6_index = fib_table_find (FIB_PROTOCOL_IP6, fib_index);
-
- if (type == SRV6_GTP4_DT6 || type == SRV6_GTP4_DT46)
- {
- ls_mem->local_fib_index =
- fib_table_find (FIB_PROTOCOL_IP6, local_fib_index);
- }
-
- ls_mem->type = type;
+ alloc_param_srv6_t_m_gtp4_dt (plugin_mem_p, fib_index, local_fib_index,
+ type);
return 1;
}
@@ -185,7 +193,6 @@ srv6_t_m_gtp4_dt_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_t_m_gtp4_dt, static) =
{
.arc_name = "ip4-unicast",
@@ -194,7 +201,6 @@ VNET_FEATURE_INIT (srv6_t_m_gtp4_dt, static) =
};
VLIB_INIT_FUNCTION (srv6_t_m_gtp4_dt_init);
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-mobile/gtp4_e.c b/src/plugins/srv6-mobile/gtp4_e.c
index 211e95d11de..52f1615aa85 100644
--- a/src/plugins/srv6-mobile/gtp4_e.c
+++ b/src/plugins/srv6-mobile/gtp4_e.c
@@ -66,30 +66,70 @@ static u8 param_str[] = "";
static u8 *
clb_format_srv6_end_m_gtp4_e (u8 * s, va_list * args)
{
- srv6_end_gtp4_param_t *ls_mem = va_arg (*args, void *);
+ srv6_end_gtp4_e_param_t *ls_mem = va_arg (*args, void *);
- s = format (s, "SRv6 End gtp4.e\n\t");
+ s = format (s, "SRv6 End gtp4.e\n");
- s = format (s, "IPv4 address position: %d\n", ls_mem->v4src_position);
+ s = format (s, "\tIPv4 address position: %d\n", ls_mem->v4src_position);
+
+ s = format (s, "\tIPv4 source address: %U\n", format_ip4_address,
+ &ls_mem->v4src_addr);
+
+ s = format (s, "\tFib Table %d\n", ls_mem->fib_table);
return s;
}
+void
+alloc_param_srv6_end_m_gtp4_e (void **plugin_mem_p, const void *v4src_addr,
+ const u32 v4src_position, const u32 fib_table)
+{
+ srv6_end_gtp4_e_param_t *ls_mem;
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
+ clib_memset (ls_mem, 0, sizeof *ls_mem);
+ *plugin_mem_p = ls_mem;
+ ls_mem->v4src_position = v4src_position;
+ memcpy (&ls_mem->v4src_addr, v4src_addr, sizeof (ip4_address_t));
+
+ ls_mem->fib_table = fib_table;
+ ls_mem->fib4_index = ip4_fib_index_from_table_id (fib_table);
+ ls_mem->fib6_index = ip6_fib_index_from_table_id (fib_table);
+}
+
static uword
clb_unformat_srv6_end_m_gtp4_e (unformat_input_t * input, va_list * args)
{
void **plugin_mem_p = va_arg (*args, void **);
- srv6_end_gtp4_param_t *ls_mem;
- u32 v4src_position;
-
- if (!unformat (input, "end.m.gtp4.e v4src_position %d", &v4src_position))
+ ip4_address_t v4src_addr;
+ u32 v4src_position = 0;
+ u32 fib_table;
+ bool config = false;
+
+ memset (&v4src_addr, 0, sizeof (ip4_address_t));
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "end.m.gtp4.e v4src_position %d fib-table %d",
+ &v4src_position, &fib_table))
+ {
+ config = true;
+ }
+ else if (unformat (input, "end.m.gtp4.e v4src_addr %U fib-table %d",
+ unformat_ip4_address, &v4src_addr, &fib_table))
+ {
+ config = true;
+ }
+ else
+ {
+ return 0;
+ }
+ }
+
+ if (!config)
return 0;
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
- clib_memset (ls_mem, 0, sizeof *ls_mem);
- *plugin_mem_p = ls_mem;
-
- ls_mem->v4src_position = v4src_position;
+ alloc_param_srv6_end_m_gtp4_e (plugin_mem_p, &v4src_addr, v4src_position,
+ fib_table);
return 1;
}
@@ -103,7 +143,7 @@ clb_creation_srv6_end_m_gtp4_e (ip6_sr_localsid_t * localsid)
static int
clb_removal_srv6_end_m_gtp4_e (ip6_sr_localsid_t * localsid)
{
- srv6_end_gtp4_param_t *ls_mem;
+ srv6_end_gtp4_e_param_t *ls_mem;
ls_mem = localsid->plugin_mem;
@@ -163,7 +203,6 @@ srv6_end_m_gtp4_e_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_end_m_gtp4_e, static) =
{
.arc_name = "ip6-unicast",
@@ -177,7 +216,6 @@ VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "SRv6 GTP Endpoint Functions",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-mobile/gtp6_d.c b/src/plugins/srv6-mobile/gtp6_d.c
index c62320b33fd..ef831ba750c 100644
--- a/src/plugins/srv6-mobile/gtp6_d.c
+++ b/src/plugins/srv6-mobile/gtp6_d.c
@@ -61,12 +61,13 @@ static u8 fn_name[] = "SRv6-End.M.GTP6.D-plugin";
static u8 keyword_str[] = "end.m.gtp6.d";
static u8 def_str[] =
"Endpoint function with dencapsulation for IPv6/GTP tunnel";
-static u8 param_str[] = "<sr-prefix>/<sr-prefixlen> [nhtype <nhtype>]";
+static u8 param_str[] =
+ "<sr-prefix>/<sr-prefixlen> [nhtype <nhtype>] fib-table <id>";
static u8 *
clb_format_srv6_end_m_gtp6_d (u8 * s, va_list * args)
{
- srv6_end_gtp6_param_t *ls_mem = va_arg (*args, void *);
+ srv6_end_gtp6_d_param_t *ls_mem = va_arg (*args, void *);
s = format (s, "SRv6 End gtp6.d\n\t");
@@ -77,62 +78,98 @@ clb_format_srv6_end_m_gtp6_d (u8 * s, va_list * args)
if (ls_mem->nhtype != SRV6_NHTYPE_NONE)
{
if (ls_mem->nhtype == SRV6_NHTYPE_IPV4)
- s = format (s, ", NHType IPv4\n");
+ s = format (s, ", NHType IPv4");
else if (ls_mem->nhtype == SRV6_NHTYPE_IPV6)
- s = format (s, ", NHType IPv6\n");
+ s = format (s, ", NHType IPv6");
else if (ls_mem->nhtype == SRV6_NHTYPE_NON_IP)
- s = format (s, ", NHType Non-IP\n");
+ s = format (s, ", NHType Non-IP");
else
- s = format (s, ", NHType Unknow(%d)\n", ls_mem->nhtype);
+ s = format (s, ", NHType Unknow(%d)", ls_mem->nhtype);
}
- else
- s = format (s, "\n");
+
+ s = format (s, " FIB table %d", ls_mem->fib_table);
+
+ s = format (s, " Drop In %d", ls_mem->drop_in);
return s;
}
+void
+alloc_param_srv6_end_m_gtp6_d (void **plugin_mem_p, const void *sr_prefix,
+ const u32 sr_prefixlen, const u8 nhtype,
+ const bool drop_in, const u32 fib_table)
+{
+ srv6_end_gtp6_d_param_t *ls_mem;
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
+ clib_memset (ls_mem, 0, sizeof *ls_mem);
+ *plugin_mem_p = ls_mem;
+
+ ls_mem->sr_prefixlen = sr_prefixlen;
+ memcpy (&ls_mem->sr_prefix, sr_prefix, sizeof (ip6_address_t));
+ ls_mem->nhtype = nhtype;
+ ls_mem->drop_in = drop_in;
+ ls_mem->fib_table = fib_table;
+ ls_mem->fib4_index = ip4_fib_index_from_table_id (fib_table);
+ ls_mem->fib6_index = ip6_fib_index_from_table_id (fib_table);
+}
+
static uword
clb_unformat_srv6_end_m_gtp6_d (unformat_input_t * input, va_list * args)
{
void **plugin_mem_p = va_arg (*args, void **);
- srv6_end_gtp6_param_t *ls_mem;
ip6_address_t sr_prefix;
u32 sr_prefixlen;
- u8 nhtype;
+ u8 nhtype = SRV6_NHTYPE_NONE;
+ bool drop_in = false;
+ bool config = false;
+ u32 fib_table = 0;
- if (unformat (input, "end.m.gtp6.d %U/%d nh-type ipv4",
- unformat_ip6_address, &sr_prefix, &sr_prefixlen))
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- nhtype = SRV6_NHTYPE_IPV4;
- }
- else if (unformat (input, "end.m.gtp6.d %U/%d nh-type ipv6",
- unformat_ip6_address, &sr_prefix, &sr_prefixlen))
- {
- nhtype = SRV6_NHTYPE_IPV6;
- }
- else if (unformat (input, "end.m.gtp6.d %U/%d nh-type none",
- unformat_ip6_address, &sr_prefix, &sr_prefixlen))
- {
- nhtype = SRV6_NHTYPE_NON_IP;
- }
- else if (unformat (input, "end.m.gtp6.d %U/%d",
- unformat_ip6_address, &sr_prefix, &sr_prefixlen))
- {
- nhtype = SRV6_NHTYPE_NONE;
+ if (unformat (input, "end.m.gtp6.d %U/%d nh-type ipv4 fib-table %d",
+ unformat_ip6_address, &sr_prefix, &sr_prefixlen,
+ &fib_table))
+ {
+ config = true;
+ nhtype = SRV6_NHTYPE_IPV4;
+ }
+ else if (unformat (input, "end.m.gtp6.d %U/%d nh-type ipv6 fib-table %d",
+ unformat_ip6_address, &sr_prefix, &sr_prefixlen,
+ &fib_table))
+ {
+ config = true;
+ nhtype = SRV6_NHTYPE_IPV6;
+ }
+ else if (unformat (input, "end.m.gtp6.d %U/%d nh-type none",
+ unformat_ip6_address, &sr_prefix, &sr_prefixlen))
+ {
+ config = true;
+ nhtype = SRV6_NHTYPE_NON_IP;
+ }
+ else if (unformat (input, "end.m.gtp6.d %U/%d fib-table %d",
+ unformat_ip6_address, &sr_prefix, &sr_prefixlen,
+ &fib_table))
+ {
+ config = true;
+ nhtype = SRV6_NHTYPE_NONE;
+ }
+ else if (unformat (input, "drop-in"))
+ {
+ drop_in = true;
+ }
+ else
+ {
+ return 0;
+ }
}
- else
+
+ if (!config)
{
return 0;
}
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
- clib_memset (ls_mem, 0, sizeof *ls_mem);
- *plugin_mem_p = ls_mem;
-
- ls_mem->sr_prefix = sr_prefix;
- ls_mem->sr_prefixlen = sr_prefixlen;
-
- ls_mem->nhtype = nhtype;
+ alloc_param_srv6_end_m_gtp6_d (plugin_mem_p, &sr_prefix, sr_prefixlen,
+ nhtype, drop_in, fib_table);
return 1;
}
@@ -144,9 +181,15 @@ clb_creation_srv6_end_m_gtp6_d (ip6_sr_localsid_t * localsid)
}
static int
+clb_creation_srv6_end_m_gtp6_d_2 (ip6_sr_policy_t *sr_policy)
+{
+ return 0;
+}
+
+static int
clb_removal_srv6_end_m_gtp6_d (ip6_sr_localsid_t * localsid)
{
- srv6_end_gtp6_param_t *ls_mem;
+ srv6_end_gtp6_d_param_t *ls_mem;
ls_mem = localsid->plugin_mem;
@@ -155,6 +198,18 @@ clb_removal_srv6_end_m_gtp6_d (ip6_sr_localsid_t * localsid)
return 0;
}
+static int
+clb_removal_srv6_end_m_gtp6_d_2 (ip6_sr_policy_t *sr_policy)
+{
+ srv6_end_gtp6_d_param_t *ls_mem;
+
+ ls_mem = sr_policy->plugin_mem;
+
+ clib_mem_free (ls_mem);
+
+ return 0;
+}
+
static clib_error_t *
srv6_end_m_gtp6_d_init (vlib_main_t * vm)
{
@@ -193,10 +248,18 @@ srv6_end_m_gtp6_d_init (vlib_main_t * vm)
if (rc < 0)
clib_error_return (0, "SRv6 Endpoint GTP6.D LocalSID function"
"couldn't be registered");
+
+ rc = sr_policy_register_function (
+ vm, fn_name, keyword_str, def_str, param_str, 128, // prefix len
+ &dpo_type, clb_format_srv6_end_m_gtp6_d, clb_unformat_srv6_end_m_gtp6_d,
+ clb_creation_srv6_end_m_gtp6_d_2, clb_removal_srv6_end_m_gtp6_d_2);
+ if (rc < 0)
+ clib_error_return (0, "SRv6 GTP6.D Steering function"
+ "couldn't be registered");
+
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_end_m_gtp6_d, static) =
{
.arc_name = "ip6-unicast",
@@ -205,7 +268,6 @@ VNET_FEATURE_INIT (srv6_end_m_gtp6_d, static) =
};
VLIB_INIT_FUNCTION (srv6_end_m_gtp6_d_init);
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-mobile/gtp6_d_di.c b/src/plugins/srv6-mobile/gtp6_d_di.c
index 14318562e84..94bc684161d 100644
--- a/src/plugins/srv6-mobile/gtp6_d_di.c
+++ b/src/plugins/srv6-mobile/gtp6_d_di.c
@@ -66,7 +66,7 @@ static u8 param_str[] = "<sr-prefix>/<sr-prefixlen> [nhtype <nhtype>]";
static u8 *
clb_format_srv6_end_m_gtp6_d_di (u8 * s, va_list * args)
{
- srv6_end_gtp6_param_t *ls_mem = va_arg (*args, void *);
+ srv6_end_gtp6_d_param_t *ls_mem = va_arg (*args, void *);
s = format (s, "SRv6 End gtp6.d Drop-in\n\t");
@@ -91,11 +91,24 @@ clb_format_srv6_end_m_gtp6_d_di (u8 * s, va_list * args)
return s;
}
+void
+alloc_param_srv6_end_m_gtp6_di (void **plugin_mem_p, const void *sr_prefix,
+ const u32 sr_prefixlen, const u8 nhtype)
+{
+ srv6_end_gtp6_d_param_t *ls_mem;
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
+ clib_memset (ls_mem, 0, sizeof *ls_mem);
+ *plugin_mem_p = ls_mem;
+
+ ls_mem->sr_prefixlen = sr_prefixlen;
+ memcpy (&ls_mem->sr_prefix, sr_prefix, sizeof (ip6_address_t));
+ ls_mem->nhtype = nhtype;
+}
+
static uword
clb_unformat_srv6_end_m_gtp6_d_di (unformat_input_t * input, va_list * args)
{
void **plugin_mem_p = va_arg (*args, void **);
- srv6_end_gtp6_param_t *ls_mem;
ip6_address_t sr_prefix;
u32 sr_prefixlen = 0;
u8 nhtype;
@@ -125,13 +138,8 @@ clb_unformat_srv6_end_m_gtp6_d_di (unformat_input_t * input, va_list * args)
return 0;
}
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
- clib_memset (ls_mem, 0, sizeof *ls_mem);
- *plugin_mem_p = ls_mem;
-
- ls_mem->sr_prefix = sr_prefix;
- ls_mem->sr_prefixlen = sr_prefixlen;
- ls_mem->nhtype = nhtype;
+ alloc_param_srv6_end_m_gtp6_di (plugin_mem_p, &sr_prefix, sr_prefixlen,
+ nhtype);
return 1;
}
@@ -145,7 +153,7 @@ clb_creation_srv6_end_m_gtp6_d_di (ip6_sr_localsid_t * localsid)
static int
clb_removal_srv6_end_m_gtp6_d_di (ip6_sr_localsid_t * localsid)
{
- srv6_end_gtp6_param_t *ls_mem;
+ srv6_end_gtp6_d_param_t *ls_mem;
ls_mem = localsid->plugin_mem;
@@ -198,7 +206,6 @@ srv6_end_m_gtp6_d_di_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_end_m_gtp6_d_di, static) =
{
.arc_name = "ip6-unicast",
@@ -207,7 +214,6 @@ VNET_FEATURE_INIT (srv6_end_m_gtp6_d_di, static) =
};
VLIB_INIT_FUNCTION (srv6_end_m_gtp6_d_di_init);
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-mobile/gtp6_dt.c b/src/plugins/srv6-mobile/gtp6_dt.c
index cbd2327cc47..c4d4175e321 100644
--- a/src/plugins/srv6-mobile/gtp6_dt.c
+++ b/src/plugins/srv6-mobile/gtp6_dt.c
@@ -84,11 +84,31 @@ clb_format_srv6_end_m_gtp6_dt (u8 * s, va_list * args)
return s;
}
+void
+alloc_param_srv6_end_m_gtp6_dt (void **plugin_mem_p, const u32 fib_index,
+ const u32 local_fib_index, const u32 type)
+{
+ srv6_end_gtp6_dt_param_t *ls_mem;
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
+ clib_memset (ls_mem, 0, sizeof *ls_mem);
+ *plugin_mem_p = ls_mem;
+
+ ls_mem->fib4_index = fib_table_find (FIB_PROTOCOL_IP4, fib_index);
+ ls_mem->fib6_index = fib_table_find (FIB_PROTOCOL_IP6, fib_index);
+
+ if (type == SRV6_GTP6_DT6 || type == SRV6_GTP6_DT46)
+ {
+ ls_mem->local_fib_index =
+ fib_table_find (FIB_PROTOCOL_IP6, local_fib_index);
+ }
+
+ ls_mem->type = type;
+}
+
static uword
clb_unformat_srv6_end_m_gtp6_dt (unformat_input_t * input, va_list * args)
{
void **plugin_mem_p = va_arg (*args, void **);
- srv6_end_gtp6_dt_param_t *ls_mem;
u32 fib_index = 0;
u32 local_fib_index = 0;
u32 type;
@@ -111,22 +131,8 @@ clb_unformat_srv6_end_m_gtp6_dt (unformat_input_t * input, va_list * args)
{
return 0;
}
-
- ls_mem = clib_mem_alloc_aligned_at_offset (sizeof *ls_mem, 0, 0, 1);
- clib_memset (ls_mem, 0, sizeof *ls_mem);
- *plugin_mem_p = ls_mem;
-
- ls_mem->fib4_index = fib_table_find (FIB_PROTOCOL_IP4, fib_index);
- ls_mem->fib6_index = fib_table_find (FIB_PROTOCOL_IP6, fib_index);
-
- if (type == SRV6_GTP6_DT6 || type == SRV6_GTP6_DT46)
- {
- ls_mem->local_fib_index =
- fib_table_find (FIB_PROTOCOL_IP6, local_fib_index);
- }
-
- ls_mem->type = type;
-
+ alloc_param_srv6_end_m_gtp6_dt (plugin_mem_p, fib_index, local_fib_index,
+ type);
return 1;
}
@@ -179,7 +185,6 @@ srv6_end_m_gtp6_dt_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_end_m_gtp6_dt, static) =
{
.arc_name = "ip6-unicast",
@@ -188,7 +193,6 @@ VNET_FEATURE_INIT (srv6_end_m_gtp6_dt, static) =
};
VLIB_INIT_FUNCTION (srv6_end_m_gtp6_dt_init);
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-mobile/gtp6_e.c b/src/plugins/srv6-mobile/gtp6_e.c
index d139a649409..dd3a889928b 100644
--- a/src/plugins/srv6-mobile/gtp6_e.c
+++ b/src/plugins/srv6-mobile/gtp6_e.c
@@ -66,15 +66,40 @@ static u8 param_str[] = "";
static u8 *
clb_format_srv6_end_m_gtp6_e (u8 * s, va_list * args)
{
- s = format (s, "SRv6 End format function unsupported.");
+ srv6_end_gtp6_e_param_t *ls_mem = va_arg (*args, void *);
+ ;
+
+ s = format (s, "SRv6 End.M.GTP6.E function.");
+
+ s = format (s, "\tFib Table %d\n", ls_mem->fib_table);
+
return s;
}
+void
+alloc_param_srv6_end_m_gtp6_e (void **plugin_mem_p, const u32 fib_table)
+{
+ srv6_end_gtp6_e_param_t *ls_mem;
+ ls_mem = clib_mem_alloc (sizeof *ls_mem);
+ clib_memset (ls_mem, 0, sizeof *ls_mem);
+ *plugin_mem_p = ls_mem;
+
+ ls_mem->fib_table = fib_table;
+ ls_mem->fib4_index = ip4_fib_index_from_table_id (fib_table);
+ ls_mem->fib6_index = ip6_fib_index_from_table_id (fib_table);
+}
+
static uword
-clb_unformat_srv6_end_m_gtp6_e (unformat_input_t * input, va_list * args)
+clb_unformat_srv6_end_m_gtp6_e (unformat_input_t *input, va_list *args)
{
- if (!unformat (input, "end.m.gtp6.e"))
+ void **plugin_mem_p = va_arg (*args, void **);
+ u32 fib_table;
+
+ if (!unformat (input, "end.m.gtp6.e fib-table %d", &fib_table))
return 0;
+
+ alloc_param_srv6_end_m_gtp6_e (plugin_mem_p, fib_table);
+
return 1;
}
@@ -87,6 +112,12 @@ clb_creation_srv6_end_m_gtp6_e (ip6_sr_localsid_t * localsid)
static int
clb_removal_srv6_end_m_gtp6_e (ip6_sr_localsid_t * localsid)
{
+ srv6_end_gtp6_e_param_t *ls_mem;
+
+ ls_mem = localsid->plugin_mem;
+
+ clib_mem_free (ls_mem);
+
return 0;
}
@@ -137,7 +168,6 @@ srv6_end_m_gtp6_e_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (srv6_end_m_gtp6_e, static) =
{
.arc_name = "ip6-unicast",
@@ -146,7 +176,6 @@ VNET_FEATURE_INIT (srv6_end_m_gtp6_e, static) =
};
VLIB_INIT_FUNCTION (srv6_end_m_gtp6_e_init);
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/srv6-mobile/mobile.h b/src/plugins/srv6-mobile/mobile.h
index 517e7c8f84c..a305a25b811 100644
--- a/src/plugins/srv6-mobile/mobile.h
+++ b/src/plugins/srv6-mobile/mobile.h
@@ -20,6 +20,8 @@
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
#include <vnet/srv6/sr.h>
#include <vnet/srv6/sr_packet.h>
@@ -69,24 +71,41 @@
#define GTPU_IE_MAX_SIZ 256
#define SRH_TLV_USER_PLANE_CONTAINER 0x0a /* tentative */
-/* *INDENT-OFF* */
+typedef enum mobile_policy_function_list
+{
+ SRV6_MOBILE_POLICY_UNKNOWN_FUNCTION = 0,
+ SRV6_MOBILE_POLICY_T_M_GTP4_D,
+ SRV6_MOBILE_POLICY_T_M_GTP4_DT4,
+ SRV6_MOBILE_POLICY_T_M_GTP4_DT6,
+ SRV6_MOBILE_POLICY_T_M_GTP4_DT46,
+ SRV6_MOBILE_POLICY_END_M_GTP6_D,
+} mobile_policy_function_list_t;
+
+typedef enum mobile_localsid_function_list
+{
+ SRV6_MOBILE_LOCALSID_UNKNOWN_FUNCTION = 0,
+ SRV6_MOBILE_LOCALSID_END_M_GTP4_E,
+ SRV6_MOBILE_LOCALSID_END_M_GTP6_E,
+ SRV6_MOBILE_LOCALSID_END_M_GTP6_D,
+ SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DI,
+ SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DT4,
+ SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DT6,
+ SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DT46,
+} mobile_localsid_function_list_t;
+
typedef struct
{
u8 type;
u8 restart_counter;
} __attribute__ ((packed)) gtpu_recovery_ie;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef struct
{
u16 seq;
u8 npdu_num;
u8 nextexthdr;
} __attribute__ ((packed)) gtpu_exthdr_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef struct
{
u8 ver_flags;
@@ -95,7 +114,6 @@ typedef struct
u32 teid;
gtpu_exthdr_t ext[0];
} __attribute__ ((packed)) gtpu_header_t;
-/* *INDENT-ON* */
#define GTPU_TYPE_ECHO_REQUEST 1
#define GTPU_TYPE_ECHO_REPLY 2
@@ -103,7 +121,6 @@ typedef struct
#define GTPU_TYPE_END_MARKER 254
#define GTPU_TYPE_GTPU 255
-/* *INDENT-OFF* */
typedef struct
{
BITALIGN2 (u8 ppi:3,
@@ -111,9 +128,7 @@ typedef struct
u8 padding[3];
} __attribute__ ((packed)) gtpu_paging_policy_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef struct
{
u8 exthdrlen;
@@ -131,7 +146,6 @@ typedef struct
gtpu_paging_policy_t paging[0];
u8 nextexthdr;
} __attribute__ ((packed)) gtpu_pdu_session_t;
-/* *INDENT-ON* */
#define GTPU_PDU_SESSION_P_BIT_MASK 0x80
#define GTPU_PDU_SESSION_R_BIT_MASK 0x40
@@ -141,47 +155,51 @@ typedef struct
#define SRV6_PDU_SESSION_R_BIT_MASK 0x02
#define SRV6_PDU_SESSION_QFI_MASK 0xfC
-/* *INDENT-OFF* */
typedef struct
{
ip4_header_t ip4; /* 20 bytes */
udp_header_t udp; /* 8 bytes */
gtpu_header_t gtpu; /* 8 bytes */
} __attribute__ ((packed)) ip4_gtpu_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef struct
{
ip6_header_t ip6; /* 40 bytes */
udp_header_t udp; /* 8 bytes */
gtpu_header_t gtpu; /* 8 bytes */
} __attribute__ ((packed)) ip6_gtpu_header_t;
-/* *INDENT-ON* */
#define GTPU_V1_VER (1<<5)
#define GTPU_PT_GTP (1<<4)
-/* *INDENT-OFF* */
typedef struct
{
u8 type;
u8 length;
u8 value[0];
} __attribute__ ((packed)) user_plane_sub_tlv_t;
-/* *INDENT-ON* */
#define USER_PLANE_SUB_TLV_IE 0x01
-typedef struct srv6_end_gtp6_param_s
+/* SRv6 mobile Plugin Params */
+
+/* GTP6.D, GTP6.Di */
+typedef struct srv6_end_gtp6_d_param_s
{
u8 nhtype;
ip6_address_t sr_prefix;
u32 sr_prefixlen;
-} srv6_end_gtp6_param_t;
+ bool drop_in;
+
+ u32 fib_table;
+ u32 fib4_index;
+ u32 fib6_index;
+} srv6_end_gtp6_d_param_t;
+
+/* GTP6.DT */
typedef struct srv6_end_gtp6_dt_param_s
{
u8 type;
@@ -191,6 +209,15 @@ typedef struct srv6_end_gtp6_dt_param_s
u32 local_fib_index;
} srv6_end_gtp6_dt_param_t;
+/* GTP6.E */
+typedef struct srv6_end_gtp6_e_param_s
+{
+ u32 fib_table;
+ u32 fib4_index;
+ u32 fib6_index;
+} srv6_end_gtp6_e_param_t;
+
+/* GTP4.DT */
typedef struct srv6_t_gtp4_dt_param_s
{
u8 type;
@@ -200,7 +227,19 @@ typedef struct srv6_t_gtp4_dt_param_s
u32 local_fib_index;
} srv6_t_gtp4_dt_param_t;
-typedef struct srv6_end_gtp4_param_s
+/* GTP4.E */
+typedef struct srv6_end_gtp4_e_param_s
+{
+ u32 v4src_position;
+ ip4_address_t v4src_addr;
+
+ u32 fib_table;
+ u32 fib4_index;
+ u32 fib6_index;
+} srv6_end_gtp4_e_param_t;
+
+/* GTP4.D */
+typedef struct srv6_end_gtp4_d_param_s
{
u8 nhtype;
@@ -210,8 +249,12 @@ typedef struct srv6_end_gtp4_param_s
ip6_address_t v6src_prefix;
u32 v6src_prefixlen;
- u32 v4src_position;
-} srv6_end_gtp4_param_t;
+ bool drop_in;
+
+ u32 fib_table;
+ u32 fib4_index;
+ u32 fib6_index;
+} srv6_end_gtp4_d_param_t;
typedef struct srv6_end_main_v4_s
{
diff --git a/src/plugins/srv6-mobile/mobile_plugin_doc.md b/src/plugins/srv6-mobile/mobile_plugin_doc.md
deleted file mode 100644
index 3a44e795838..00000000000
--- a/src/plugins/srv6-mobile/mobile_plugin_doc.md
+++ /dev/null
@@ -1,201 +0,0 @@
-SRv6 Mobile User Plane Plugins {#srv6_mobile_plugin_doc}
-========================
-
-# Introduction
-
-This plugin module can provide the stateless mobile user plane protocols translation between GTP-U and SRv6. The plugin also provides FIB table lookup for an IPv4/IPv6 packet encapsulated in GTP-U. These plugin functions take advantage of SRv6 network programmability.
-
-[SRv6 Mobile User Plane](https://tools.ietf.org/html/draft-ietf-dmm-srv6-mobile-uplane) defines the user plane protocol using SRv6
-including following stateless translation functions:
-
-- **T.M.GTP4.D:**
- GTP-U over UDP/IPv4 -> SRv6
-- **End.M.GTP4.E:**
- SRv6 -> GTP-U over UDP/IPv4
-- **End.M.GTP6.D:**
- GTP-U over UDP/IPv6 -> SRv6
-- **End.M.GTP6.E:**
- SRv6 -> GTP-U over UDP/IPv6
-
-These functions benefit user plane(overlay) to be able to utilize data plane(underlay) networks properly. And also it benefits data plane to be able to handle user plane in routing paradigm.
-
-In addition to the above functions, the plugin supports following functions:
-
-- **T.M.GTP4.DT{4|6|46}:**
- FIB table lookup for IPv4/IP6 encapsulated in GTP-U over UDP/IPv4
-- **End.M.GTP6.DT{4|6|46}:**
- FIB table lookup for IPv4/IP6 encapsulated in GTP-U over UDP/IPv6
-
-Noted that the prefix of function names follow naming convention of SRv6 network programming. "T" means transit function, "End" means end function, "M" means Mobility specific function. The suffix "D" and "E" mean that "decapsulation" and "encapsulation" respectively.
-
-
-# Implementation
-
-All SRv6 mobile functions are implemented as VPP plugin modules. The plugin modules leverage the sr_policy and sr_localsid mechanisms.
-
-# Configurations
-
-## GTP-U to SRv6
-
-The GTP-U tunnel and flow identifiers of a receiving packet are mapped to a Segment Identifier(SID) of sending SRv6 packets.
-
-### IPv4 infrastructure case
-
-In case that **IPv4** networks are the infrastructure of GTP-U, T.M.GTP4.D function translates the receiving GTP-U packets to SRv6 packets.
-
-A T.M.GTP4.D function is associated with the following mandatory parameters:
-
-- SID: A SRv6 SID to represents the function
-- DST-PREFIX: Prefix of remote SRv6 segment. The destination address or last SID of out packets consists of the prefix followed by dst IPv4 address, QFI and TEID of the receiving packets.
-- SRC-PREFIX: Prefix for src address of sending packets. The src IPv6 address consists of the prefix followed by the src IPv4 address of the receiving packets.
-
-The following command instantiates a new T.M.GTP4.D function.
-
-```
-sr policy add bsid SID behavior t.m.gtp4.d DST-PREFIX v6src_prefix SRC-PREFIX [nhtype {ipv4|ipv6|non-ip}]
-```
-
-For example, the below command configures the SID 2001:db8::1 with `t.m.gtp4.d` behavior for translating receiving GTP-U over IPv4 packets to SRv6 packets with next-header type is IPv4.
-
-```
-sr policy add bsid 2001:db8::1 behavior t.m.gtp4.d D1::/32 v6src_prefix A1::/64 nhtype ipv4
-```
-
-It should be interesting how a SRv6 BSID works to decapsulate the receiving GTP-U packets over IPv4 header. To utilize ```t.m.gtp4.d``` function, you need to configure some SR steering policy like:
-
-```
-sr steer l3 172.20.0.1/32 via bsid 2001:db8::1
-```
-
-The above steering policy with the BSID of `t.m.gtp4.d` would work properly for the GTP-U packets destined to 172.20.0.1.
-
-If you have a SID(s) list of SR policy which the configured gtp4.d function to be applied, the SR Policy can be configured as following:
-
-```
-sr policy add bsid D1:: next A1:: next B1:: next C1::
-```
-
-### IPv6 infrastructure case
-
-In case that GTP-U is deployed over **IPv6** infrastructure, you don't need to configure T.M.GTP4.D function and associated SR steering policy. Instead of that, you just need to configure a localsid of End.M.GTP6.D segment.
-
-An End.M.GTP6.D segment is associated with the following mandatory parameters:
-
-- SID-PREFIX: SRv6 SID prefix to represent the function. In this function, it should be the dst address of receiving GTP-U packets.
-- DST-PREFIX: Prefix of remote SRv6 Segment. The destination address or last SID of output packets consists of the prefix followed by QFI and TEID of the receiving packets.
-
-The following command instantiates a new End.M.GTP6.D function.
-
-```
-sr localsid prefix SID-PREFIX behavior end.m.gtp6.d DST-PREFIX [nhtype {ipv4|ipv6|non-ip}]
-```
-For example, the below command configures the SID prefix 2001:db8::/64 with `end.m.gtp6.d` behavior for translating receiving GTP-U over IPv6 packets which have IPv6 destination addresses within 2001:db8::/64 to SRv6 packets. The dst IPv6 address of the outgoing packets consists of D4::/64 followed by QFI and TEID.
-
-```
-sr localsid prefix 2001:db8::/64 behavior end.m.gtp6.d D4::/64
-```
-
-In another case, the translated packets from GTP-U over IPv6 to SRv6 will be re-translated back to GTP-U, which is so called 'Drop-In' mode.
-
-In Drop-In mode, an additional IPv6 specific end segment is required, named End.M.GTP6.D.Di. It is because that unlike `end.m.gtp6.d`, it needs to preserve original IPv6 dst address as the last SID in the SRH.
-
-Regardless of that difference exists, the required configuration parameters are same as `end.m.gtp6.d`.
-
-The following command instantiates a new End.M.GTP6.D.Di function.
-
-```
-sr localsid prefix 2001:db8::/64 behavior end.m.gtp6.d.di D4::/64
-```
-
-
-## SRv6 to GTP-U
-
-The SRv6 Mobile functions on SRv6 to GTP-U direction are End.M.GTP4.E and End.M.GTP6.D.
-
-In this direction with GTP-U over IPv4 infrastructure, an End.M.GTP4.E segment is associated with the following mandatory parameters:
-
-- SID-PREFIX: SRv6 SID prefix to represent the function.
-- V4SRC-ADDR-POSITION: Integer number indicates bit position where IPv4 src address embedded.
-
-The following command instantiates a new End.M.GTP4.E function.
-
-```
-sr localsid prefix SID-PREFIX behavior end.m.gtp4.e v4src_position V4SRC-ADDR-POSITION
-```
-
-For example, the below command configures the SID prefix 2001:db8::/32 with `end.m.gtp4.e` behavior for translating the receiving SRv6 packets to GTP-U packets encapsulated with UDP/IPv4 header. All the GTP-U tunnel and flow identifiers are extracted from the active SID in the receiving packets. The src IPv4 address of sending GTP-U packets is extracted from the configured bit position in the src IPv6 address.
-
-```
-sr localsid prefix 2001:db8::/32 behavior end.m.gtp4.e v4src_position 64
-```
-
-In IPv6 infrastructure case, an End.M.GTP6.E segment is associated with the following mandatory parameters:
-
-- SID-PREFIX: SRv6 SID prefix to represent the function.
-
-The following command instantiates a new End.M.GTP6.E function.
-
-```
-sr localsid prefix SID-PREFIX behavior end.m.gtp6.e
-```
-
-For example, the below command configures the SID prefix 2001:db8::/64 with `end.m.gtp6.e` behavior for translating the receiving SRv6 packets to GTP-U packets encapsulated with UDP/IPv6 header. While the last SID indicates GTP-U dst IPv6 address, 32-bits GTP-U TEID and 6-bits QFI are extracted from the active SID in the receiving packets.
-
-```
-sr localsid prefix 2001:db8::/64 behavior end.m.gtp6.e
-```
-
-## FIB Table Lookup for Inner IPv4/IPv6 packet
-
-SRv6 Mobile functions of `t.m.gtp4.dt*` and `end.m.gtp6.dt*` support decapsulating outer IP/UDP/GTP-U headers and forwarding inner IP packet based on specific fib table.
-
-In case of the both outer and inner IP address families are IPv4, `t.m.gtp4.dt4` function supports GTP-U decapsulation and fib lookup for inner IPv4 with an associated steering policy and the following parameters:
-
-- SID: A SRv6 SID to represents the function
-- FIB: fib-table number for inner IPv4 packet lookup and forwarding
-
-The following command instantiates a new T.M.GTP4.DT4 function.
-
-```
-sr policy add bsid SID behavior t.m.gtp4.dt4 fib-table FIB
-```
-
-For example, the below commands configure D5:: as the SID instantiates `t.m.gtp4.dt4` function. A steering policy for packets destine to 172.20.0.1 binds to the SID.
-
-```
-sr steer l3 172.20.0.1/32 via bsid D5::
-sr policy add bsid D5:: behavior t.m.gtp4.dt4 fib-table 0
-```
-
-In addition, inner IPv6, or mix of IPv4 and IPv6 inner packet cases require the function to be configured with local-fib table.
-
-- LOCAL-FIB: fib-table number for lookup and forward GTP-U packet based on outer IP destination address
-
-This is inner IPv6 case specific. The reason is that GTP-U encapsulates link local IPv6 packet for NDP (Neighber Discovery Protocol). Outer GTP-U header should be kept until the packets reach to the node responsible for NDP handling. It is typically UPF(User Plane Function) node.
-
-The following command instantiate a new T.M.GTP4.DT6 function.
-
-```
-sr policy add bsid D5:: behavior t.m.gtp4.dt6 fib-table 0 local-fib-table LOCAL-FIB
-```
-
-Following example configures fib 0 for inner packet and fib 1 for outer GTP-U packet forwarding:
-
-```
-sr policy add bsid D5:: behavior t.m.gtp4.dt6 fib-table 0 local-fib-table 1
-```
-
-If you need to suport both IPv4 and IPv6 inner packet lookup with just one SID, you can configure `t.m.gtp4.dt46` function:
-
-```
-sr policy add bsid D5:: behavior t.m.gtp4.dt46 fib-table 0 local-fib-table 1
-```
-
-In case of GTP-U over IPv6 case, `end.m.gtp6.dt4`, `end.m.gtp6.dt6` and `end.m.gtp6.dt46` functions support inner IPv4, IPv6 and IPv4/IPv6 lookup and forwarding respectively. Specifiyng fib table for inner IP packet forwarding is required as same as GTP-U over IPv4 case, and local-fib table for inner IPv6 and IPv4/IPv6 cases as well.
-
-```
-sr localsid prefix D::/64 behavior end.m.gtp6.dt46 fib-table 0 local-fib-table 0
-```
-
-To run some demo setup please refer to: @subpage srv6_mobile_runner_doc
-
diff --git a/src/plugins/srv6-mobile/mobile_plugin_doc.rst b/src/plugins/srv6-mobile/mobile_plugin_doc.rst
new file mode 100644
index 00000000000..1aca3aaf229
--- /dev/null
+++ b/src/plugins/srv6-mobile/mobile_plugin_doc.rst
@@ -0,0 +1,278 @@
+.. _srv6_mobile_plugin_doc:
+
+SRv6 Mobile User Plane
+======================
+
+Introduction
+------------
+
+This plugin module can provide the stateless mobile user plane protocols
+translation between GTP-U and SRv6. The plugin also provides FIB table
+lookup for an IPv4/IPv6 packet encapsulated in GTP-U. These plugin
+functions take advantage of SRv6 network programmability.
+
+`SRv6 Mobile User
+Plane <https://tools.ietf.org/html/draft-ietf-dmm-srv6-mobile-uplane>`__
+defines the user plane protocol using SRv6 including following stateless
+translation functions:
+
+- **T.M.GTP4.D:** GTP-U over UDP/IPv4 -> SRv6
+- **End.M.GTP4.E:** SRv6 -> GTP-U over UDP/IPv4
+- **End.M.GTP6.D:** GTP-U over UDP/IPv6 -> SRv6
+- **End.M.GTP6.E:** SRv6 -> GTP-U over UDP/IPv6
+
+These functions benefit user plane(overlay) to be able to utilize data
+plane(underlay) networks properly. And also it benefits data plane to be
+able to handle user plane in routing paradigm.
+
+In addition to the above functions, the plugin supports following
+functions:
+
+- **T.M.GTP4.DT{4|6|46}:** FIB table lookup for IPv4/IP6 encapsulated
+ in GTP-U over UDP/IPv4
+- **End.M.GTP6.DT{4|6|46}:** FIB table lookup for IPv4/IP6 encapsulated
+ in GTP-U over UDP/IPv6
+
+Noted that the prefix of function names follow naming convention of SRv6
+network programming. “T” means transit function, “End” means end
+function, “M” means Mobility specific function. The suffix “D” and “E”
+mean that “decapsulation” and “encapsulation” respectively.
+
+Implementation
+--------------
+
+All SRv6 mobile functions are implemented as VPP plugin modules. The
+plugin modules leverage the sr_policy and sr_localsid mechanisms.
+
+Configurations
+--------------
+
+GTP-U to SRv6
+~~~~~~~~~~~~~
+
+The GTP-U tunnel and flow identifiers of a receiving packet are mapped
+to a Segment Identifier(SID) of sending SRv6 packets.
+
+IPv4 infrastructure case
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+In case that **IPv4** networks are the infrastructure of GTP-U,
+T.M.GTP4.D function translates the receiving GTP-U packets to SRv6
+packets.
+
+A T.M.GTP4.D function is associated with the following mandatory
+parameters:
+
+- SID: A SRv6 SID to represents the function
+- DST-PREFIX: Prefix of remote SRv6 segment. The destination address or
+ last SID of out packets consists of the prefix followed by dst IPv4
+ address, QFI and TEID of the receiving packets.
+- SRC-PREFIX: Prefix for src address of sending packets. The src IPv6
+ address consists of the prefix followed by the src IPv4 address of
+ the receiving packets.
+
+The following command instantiates a new T.M.GTP4.D function.
+
+::
+
+ sr policy add bsid SID behavior t.m.gtp4.d DST-PREFIX v6src_prefix SRC-PREFIX [nhtype {ipv4|ipv6|non-ip}]
+
+For example, the below command configures the SID 2001:db8::1 with
+``t.m.gtp4.d`` behavior for translating receiving GTP-U over IPv4
+packets to SRv6 packets with next-header type is IPv4.
+
+::
+
+ sr policy add bsid 2001:db8::1 behavior t.m.gtp4.d D1::/32 v6src_prefix A1::/64 nhtype ipv4
+
+It should be interesting how a SRv6 BSID works to decapsulate the
+receiving GTP-U packets over IPv4 header. To utilize ``t.m.gtp4.d``
+function, you need to configure some SR steering policy like:
+
+::
+
+ sr steer l3 172.20.0.1/32 via bsid 2001:db8::1
+
+The above steering policy with the BSID of ``t.m.gtp4.d`` would work
+properly for the GTP-U packets destined to 172.20.0.1.
+
+If you have a SID(s) list of SR policy which the configured gtp4.d
+function to be applied, the SR Policy can be configured as following:
+
+::
+
+ sr policy add bsid D1:: next A1:: next B1:: next C1::
+
+IPv6 infrastructure case
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+In case that GTP-U is deployed over **IPv6** infrastructure, you don’t
+need to configure T.M.GTP4.D function and associated SR steering policy.
+Instead of that, you just need to configure a localsid of End.M.GTP6.D
+segment.
+
+An End.M.GTP6.D segment is associated with the following mandatory
+parameters:
+
+- SID-PREFIX: SRv6 SID prefix to represent the function. In this
+ function, it should be the dst address of receiving GTP-U packets.
+- DST-PREFIX: Prefix of remote SRv6 Segment. The destination address or
+ last SID of output packets consists of the prefix followed by QFI and
+ TEID of the receiving packets.
+
+The following command instantiates a new End.M.GTP6.D function.
+
+::
+
+ sr localsid prefix SID-PREFIX behavior end.m.gtp6.d DST-PREFIX [nhtype {ipv4|ipv6|non-ip}]
+
+For example, the below command configures the SID prefix 2001:db8::/64
+with ``end.m.gtp6.d`` behavior for translating receiving GTP-U over IPv6
+packets which have IPv6 destination addresses within 2001:db8::/64 to
+SRv6 packets. The dst IPv6 address of the outgoing packets consists of
+D4::/64 followed by QFI and TEID.
+
+::
+
+ sr localsid prefix 2001:db8::/64 behavior end.m.gtp6.d D4::/64
+
+In another case, the translated packets from GTP-U over IPv6 to SRv6
+will be re-translated back to GTP-U, which is so called ‘Drop-In’ mode.
+
+In Drop-In mode, an additional IPv6 specific end segment is required,
+named End.M.GTP6.D.Di. It is because that unlike ``end.m.gtp6.d``, it
+needs to preserve original IPv6 dst address as the last SID in the SRH.
+
+Regardless of that difference exists, the required configuration
+parameters are same as ``end.m.gtp6.d``.
+
+The following command instantiates a new End.M.GTP6.D.Di function.
+
+::
+
+ sr localsid prefix 2001:db8::/64 behavior end.m.gtp6.d.di D4::/64
+
+SRv6 to GTP-U
+~~~~~~~~~~~~~
+
+The SRv6 Mobile functions on SRv6 to GTP-U direction are End.M.GTP4.E
+and End.M.GTP6.D.
+
+In this direction with GTP-U over IPv4 infrastructure, an End.M.GTP4.E
+segment is associated with the following mandatory parameters:
+
+- SID-PREFIX: SRv6 SID prefix to represent the function.
+- V4SRC-ADDR-POSITION: Integer number indicates bit position where IPv4
+ src address embedded.
+
+The following command instantiates a new End.M.GTP4.E function.
+
+::
+
+ sr localsid prefix SID-PREFIX behavior end.m.gtp4.e v4src_position V4SRC-ADDR-POSITION
+
+For example, the below command configures the SID prefix 2001:db8::/32
+with ``end.m.gtp4.e`` behavior for translating the receiving SRv6
+packets to GTP-U packets encapsulated with UDP/IPv4 header. All the
+GTP-U tunnel and flow identifiers are extracted from the active SID in
+the receiving packets. The src IPv4 address of sending GTP-U packets is
+extracted from the configured bit position in the src IPv6 address.
+
+::
+
+ sr localsid prefix 2001:db8::/32 behavior end.m.gtp4.e v4src_position 64
+
+In IPv6 infrastructure case, an End.M.GTP6.E segment is associated with
+the following mandatory parameters:
+
+- SID-PREFIX: SRv6 SID prefix to represent the function.
+
+The following command instantiates a new End.M.GTP6.E function.
+
+::
+
+ sr localsid prefix SID-PREFIX behavior end.m.gtp6.e
+
+For example, the below command configures the SID prefix 2001:db8::/64
+with ``end.m.gtp6.e`` behavior for translating the receiving SRv6
+packets to GTP-U packets encapsulated with UDP/IPv6 header. While the
+last SID indicates GTP-U dst IPv6 address, 32-bits GTP-U TEID and 6-bits
+QFI are extracted from the active SID in the receiving packets.
+
+::
+
+ sr localsid prefix 2001:db8::/64 behavior end.m.gtp6.e
+
+FIB Table Lookup for Inner IPv4/IPv6 packet
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+SRv6 Mobile functions of ``t.m.gtp4.dt*`` and ``end.m.gtp6.dt*`` support
+decapsulating outer IP/UDP/GTP-U headers and forwarding inner IP packet
+based on specific fib table.
+
+In case of the both outer and inner IP address families are IPv4,
+``t.m.gtp4.dt4`` function supports GTP-U decapsulation and fib lookup
+for inner IPv4 with an associated steering policy and the following
+parameters:
+
+- SID: A SRv6 SID to represents the function
+- FIB: fib-table number for inner IPv4 packet lookup and forwarding
+
+The following command instantiates a new T.M.GTP4.DT4 function.
+
+::
+
+ sr policy add bsid SID behavior t.m.gtp4.dt4 fib-table FIB
+
+For example, the below commands configure D5:: as the SID instantiates
+``t.m.gtp4.dt4`` function. A steering policy for packets destine to
+172.20.0.1 binds to the SID.
+
+::
+
+ sr steer l3 172.20.0.1/32 via bsid D5::
+ sr policy add bsid D5:: behavior t.m.gtp4.dt4 fib-table 0
+
+In addition, inner IPv6, or mix of IPv4 and IPv6 inner packet cases
+require the function to be configured with local-fib table.
+
+- LOCAL-FIB: fib-table number for lookup and forward GTP-U packet based
+ on outer IP destination address
+
+This is inner IPv6 case specific. The reason is that GTP-U encapsulates
+link local IPv6 packet for NDP (Neighbor Discovery Protocol). Outer
+GTP-U header should be kept until the packets reach to the node
+responsible for NDP handling. It is typically UPF(User Plane Function)
+node.
+
+The following command instantiate a new T.M.GTP4.DT6 function.
+
+::
+
+ sr policy add bsid D5:: behavior t.m.gtp4.dt6 fib-table 0 local-fib-table LOCAL-FIB
+
+Following example configures fib 0 for inner packet and fib 1 for outer
+GTP-U packet forwarding:
+
+::
+
+ sr policy add bsid D5:: behavior t.m.gtp4.dt6 fib-table 0 local-fib-table 1
+
+If you need to support both IPv4 and IPv6 inner packet lookup with just
+one SID, you can configure ``t.m.gtp4.dt46`` function:
+
+::
+
+ sr policy add bsid D5:: behavior t.m.gtp4.dt46 fib-table 0 local-fib-table 1
+
+In case of GTP-U over IPv6 case, ``end.m.gtp6.dt4``, ``end.m.gtp6.dt6``
+and ``end.m.gtp6.dt46`` functions support inner IPv4, IPv6 and IPv4/IPv6
+lookup and forwarding respectively. Specifying fib table for inner IP
+packet forwarding is required as same as GTP-U over IPv4 case, and
+local-fib table for inner IPv6 and IPv4/IPv6 cases as well.
+
+::
+
+ sr localsid prefix D::/64 behavior end.m.gtp6.dt46 fib-table 0 local-fib-table 0
+
+To run some demo setup please refer to: :ref:`srv6_mobile_runner_doc`
diff --git a/src/plugins/srv6-mobile/node.c b/src/plugins/srv6-mobile/node.c
index 448d6332b15..ed0697a8009 100644
--- a/src/plugins/srv6-mobile/node.c
+++ b/src/plugins/srv6-mobile/node.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020 Arrcus Inc and/or its affiliates.
+ * Copyright (c) 2019 Arrcus Inc and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -102,11 +102,11 @@ format_srv6_end_rewrite_trace6 (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
srv6_end_rewrite_trace_t *t = va_arg (*args, srv6_end_rewrite_trace_t *);
- return format (s,
- "SRv6-END-rewrite: src %U dst %U\n\tTEID: 0x%x\n\tsr_prefix: %U/%d",
- format_ip6_address, &t->src, format_ip6_address, &t->dst,
- clib_net_to_host_u32 (t->teid), format_ip6_address,
- &t->sr_prefix, t->sr_prefixlen);
+ return format (
+ s, "SRv6-END-rewrite: src %U dst %U\n\tTEID: 0x%x\n\tsr_prefix: %U/%d",
+ format_ip6_address, &t->src, format_ip6_address, &t->dst,
+ clib_net_to_host_u32 (t->teid), format_ip6_address, &t->sr_prefix,
+ t->sr_prefixlen);
}
#define foreach_srv6_end_v4_error \
@@ -245,7 +245,8 @@ typedef enum
typedef enum
{
SRV6_T_M_GTP4_D_NEXT_DROP,
- SRV6_T_M_GTP4_D_NEXT_LOOKUP,
+ SRV6_T_M_GTP4_D_NEXT_LOOKUP4,
+ SRV6_T_M_GTP4_D_NEXT_LOOKUP6,
SRV6_T_M_GTP4_D_N_NEXT,
} srv6_T_m_gtp4_d_next_t;
@@ -259,7 +260,8 @@ typedef enum
typedef enum
{
SRV6_END_M_GTP6_D_NEXT_DROP,
- SRV6_END_M_GTP6_D_NEXT_LOOKUP,
+ SRV6_END_M_GTP6_D_NEXT_LOOKUP4,
+ SRV6_END_M_GTP6_D_NEXT_LOOKUP6,
SRV6_END_M_GTP6_D_N_NEXT,
} srv6_end_m_gtp6_d_next_t;
@@ -317,9 +319,8 @@ gtpu_type_get (u16 tag)
}
// Function for SRv6 GTP4.E function.
-VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (srv6_end_m_gtp4_e)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
srv6_end_main_v4_t *sm = &srv6_end_main_v4;
ip6_sr_main_t *sm2 = &sr_main;
@@ -343,7 +344,7 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
u32 bi0;
vlib_buffer_t *b0;
ip6_sr_localsid_t *ls0;
- srv6_end_gtp4_param_t *ls_param;
+ srv6_end_gtp4_e_param_t *ls_param;
ip6srv_combo_header_t *ip6srv0;
ip6_address_t src0, dst0;
@@ -362,11 +363,10 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
- ls0 =
- pool_elt_at_index (sm2->localsids,
- vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls0 = pool_elt_at_index (sm2->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
- ls_param = (srv6_end_gtp4_param_t *) ls0->plugin_mem;
+ ls_param = (srv6_end_gtp4_e_param_t *) ls0->plugin_mem;
ip6srv0 = vlib_buffer_get_current (b0);
src0 = ip6srv0->ip.src_address;
@@ -374,10 +374,10 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
len0 = vlib_buffer_length_in_chain (vm, b0);
- if ((ip6srv0->ip.protocol == IPPROTO_IPV6_ROUTE
- && len0 <
- sizeof (ip6srv_combo_header_t) + ip6srv0->sr.length * 8)
- || (len0 < sizeof (ip6_header_t)))
+ if ((ip6srv0->ip.protocol == IPPROTO_IPV6_ROUTE &&
+ len0 <
+ sizeof (ip6srv_combo_header_t) + ip6srv0->sr.length * 8) ||
+ (len0 < sizeof (ip6_header_t)))
{
next0 = SRV6_END_M_GTP4_E_NEXT_DROP;
@@ -388,7 +388,7 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
u8 gtpu_type = 0;
u16 tag = 0;
u32 teid = 0;
- u8 *teid8p = (u8 *) & teid;
+ u8 *teid8p = (u8 *) &teid;
u8 qfi = 0;
u16 seq = 0;
u32 index;
@@ -418,9 +418,9 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
qfi = dst0.as_u8[offset + 4];
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
clib_memcpy_fast (&seq, &dst0.as_u8[offset + 5], 2);
}
@@ -443,11 +443,11 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
qfi |= dst0.as_u8[offset + 4] << shift;
qfi |= dst0.as_u8[offset + 5] >> (8 - shift);
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
- sp = (u8 *) & seq;
+ sp = (u8 *) &seq;
for (index = 0; index < 2; index++)
{
sp[index] = dst0.as_u8[offset + 5 + index] << shift;
@@ -472,9 +472,9 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
hdrlen =
sizeof (gtpu_exthdr_t) + sizeof (gtpu_pdu_session_t);
}
- else if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ else if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
hdrlen = sizeof (gtpu_exthdr_t);
}
@@ -494,11 +494,10 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
if (ext_len >
sizeof (ip6_address_t) * (ip6srv0->sr.last_entry + 1))
{
- tlv =
- (ip6_sr_tlv_t *) ((u8 *) & ip6srv0->sr +
- sizeof (ip6_sr_header_t) +
- sizeof (ip6_address_t) *
- (ip6srv0->sr.last_entry + 1));
+ tlv = (ip6_sr_tlv_t *) ((u8 *) &ip6srv0->sr +
+ sizeof (ip6_sr_header_t) +
+ sizeof (ip6_address_t) *
+ (ip6srv0->sr.last_entry + 1));
if (tlv->type == SRH_TLV_USER_PLANE_CONTAINER)
{
@@ -518,7 +517,7 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
{
vlib_buffer_advance (b0,
(word) sizeof (ip6srv_combo_header_t) +
- ip6srv0->sr.length * 8);
+ ip6srv0->sr.length * 8);
}
else
{
@@ -549,38 +548,9 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
hdr0->gtpu.type = gtpu_type;
- if (qfi)
- {
- u8 type = 0;
- gtpu_pdu_session_t *sess;
-
- hdr0->gtpu.ver_flags |= GTPU_EXTHDR_FLAG;
-
- hdr0->gtpu.ext->seq = 0;
-
- hdr0->gtpu.ext->npdu_num = 0;
- hdr0->gtpu.ext->nextexthdr = GTPU_EXTHDR_PDU_SESSION;
-
- type = qfi & SRV6_PDU_SESSION_U_BIT_MASK;
-
- qfi =
- ((qfi & SRV6_PDU_SESSION_QFI_MASK) >> 2) |
- ((qfi & SRV6_PDU_SESSION_R_BIT_MASK) << 5);
-
- sess =
- (gtpu_pdu_session_t *) (((char *) hdr0) +
- sizeof (ip4_gtpu_header_t) +
- sizeof (gtpu_exthdr_t));
- sess->exthdrlen = 1;
- sess->type = type;
- sess->spare = 0;
- sess->u.val = qfi;
- sess->nextexthdr = 0;
- }
-
- if (gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ if (gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
hdr0->gtpu.ver_flags |= GTPU_SEQ_FLAG;
hdr0->gtpu.ext->seq = seq;
@@ -609,41 +579,80 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
}
}
}
+ else
+ {
+ if (qfi)
+ {
+ hdr0->gtpu.ext->seq = 0;
+ hdr0->gtpu.ext->npdu_num = 0;
+ }
+ }
+
+ if (qfi)
+ {
+ u8 type = 0;
+ gtpu_pdu_session_t *sess;
+
+ hdr0->gtpu.ver_flags |= GTPU_EXTHDR_FLAG;
+
+ hdr0->gtpu.ext->nextexthdr = GTPU_EXTHDR_PDU_SESSION;
+
+ type = qfi & SRV6_PDU_SESSION_U_BIT_MASK;
+
+ qfi = ((qfi & SRV6_PDU_SESSION_QFI_MASK) >> 2) |
+ ((qfi & SRV6_PDU_SESSION_R_BIT_MASK) << 5);
+
+ sess = (gtpu_pdu_session_t *) (((char *) hdr0) +
+ sizeof (ip4_gtpu_header_t) +
+ sizeof (gtpu_exthdr_t));
+ sess->exthdrlen = 1;
+ sess->type = type;
+ sess->spare = 0;
+ sess->u.val = qfi;
+ sess->nextexthdr = 0;
+ }
- offset = ls_param->v4src_position / 8;
- shift = ls_param->v4src_position % 8;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls_param->fib4_index;
- if (PREDICT_TRUE (shift == 0))
+ if (ls_param->v4src_position)
{
- for (index = 0; index < 4; index++)
+ offset = ls_param->v4src_position / 8;
+ shift = ls_param->v4src_position % 8;
+
+ if (PREDICT_TRUE (shift == 0))
+ {
+ for (index = 0; index < 4; index++)
+ {
+ hdr0->ip4.src_address.as_u8[index] =
+ src0.as_u8[offset + index];
+ }
+ }
+ else
{
- hdr0->ip4.src_address.as_u8[index] =
- src0.as_u8[offset + index];
+ for (index = 0; index < 4; index++)
+ {
+ hdr0->ip4.src_address.as_u8[index] =
+ src0.as_u8[offset + index] << shift;
+ hdr0->ip4.src_address.as_u8[index] |=
+ src0.as_u8[offset + index + 1] >> (8 - shift);
+ }
}
}
else
{
- for (index = 0; index < 4; index++)
- {
- hdr0->ip4.src_address.as_u8[index] =
- src0.as_u8[offset + index] << shift;
- hdr0->ip4.src_address.as_u8[index] |=
- src0.as_u8[offset + index + 1] >> (8 - shift);
- }
+ clib_memcpy_fast (&hdr0->ip4.src_address,
+ &ls_param->v4src_addr, 4);
}
key = hash_memory (p, plen < 40 ? plen : 40, 0);
port = hash_uword_to_u16 (&key);
hdr0->udp.src_port = port;
- hdr0->udp.length = clib_host_to_net_u16 (len0 +
- sizeof (udp_header_t) +
- sizeof
- (gtpu_header_t));
+ hdr0->udp.length = clib_host_to_net_u16 (
+ len0 + sizeof (udp_header_t) + sizeof (gtpu_header_t));
- hdr0->ip4.length = clib_host_to_net_u16 (len0 +
- sizeof
- (ip4_gtpu_header_t));
+ hdr0->ip4.length =
+ clib_host_to_net_u16 (len0 + sizeof (ip4_gtpu_header_t));
hdr0->ip4.checksum = ip4_header_checksum (&hdr0->ip4);
@@ -662,11 +671,12 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
}
}
- vlib_increment_combined_counter
- (((next0 ==
- SRV6_END_M_GTP4_E_NEXT_DROP) ? &(sm2->sr_ls_invalid_counters) :
- &(sm2->sr_ls_valid_counters)), thread_index,
- ls0 - sm2->localsids, 1, vlib_buffer_length_in_chain (vm, b0));
+ vlib_increment_combined_counter (
+ ((next0 == SRV6_END_M_GTP4_E_NEXT_DROP) ?
+ &(sm2->sr_ls_invalid_counters) :
+ &(sm2->sr_ls_valid_counters)),
+ thread_index, ls0 - sm2->localsids, 1,
+ vlib_buffer_length_in_chain (vm, b0));
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
@@ -685,515 +695,590 @@ VLIB_NODE_FN (srv6_end_m_gtp4_e) (vlib_main_t * vm,
}
// Function for SRv6 GTP4.D function.
-VLIB_NODE_FN (srv6_t_m_gtp4_d) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+static inline u32
+srv6_gtp4_decap_processing (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t *b0)
{
srv6_t_main_v4_decap_t *sm = &srv6_t_main_v4_decap;
ip6_sr_main_t *sm2 = &sr_main;
- u32 n_left_from, next_index, *from, *to_next;
- u32 good_n = 0, bad_n = 0;
+ ip6_sr_sl_t *sl0;
+ srv6_end_gtp4_d_param_t *ls_param;
+ ip4_header_t *ip4;
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
+ uword len0;
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
+ u32 next0 = SRV6_T_M_GTP4_D_NEXT_LOOKUP6;
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ sl0 = pool_elt_at_index (sm2->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
- while (n_left_from > 0 && n_left_to_next > 0)
+ ls_param = (srv6_end_gtp4_d_param_t *) sl0->plugin_mem;
+
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+
+ ip4 = vlib_buffer_get_current (b0);
+
+ if (ip4->protocol != IP_PROTOCOL_UDP || len0 < sizeof (ip4_gtpu_header_t))
+ {
+ next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
+ }
+ else
+ {
+ uword *p;
+ ip6_sr_policy_t *sr_policy = NULL;
+ ip6_sr_sl_t *sl = NULL;
+ u32 *sl_index;
+ u32 hdr_len;
+
+ ip4_gtpu_header_t *hdr;
+ ip4_address_t src, dst;
+ u8 *srcp, *dstp;
+ ip6_header_t *encap = NULL;
+ ip6_address_t seg;
+ ip6_address_t src6;
+ u8 gtpu_type;
+ u32 teid;
+ u8 *teidp;
+ u8 qfi = 0;
+ u8 *qfip = NULL;
+ u16 seq = 0;
+ u8 *seqp;
+ u32 offset, shift, index;
+ ip6srv_combo_header_t *ip6srv;
+ gtpu_pdu_session_t *sess = NULL;
+ int ie_size = 0;
+ u16 tlv_siz = 0;
+ u8 ie_buf[GTPU_IE_MAX_SIZ];
+
+ // Decap from GTP-U.
+ hdr = (ip4_gtpu_header_t *) ip4;
+
+ hdr_len = sizeof (ip4_gtpu_header_t);
+
+ teid = hdr->gtpu.teid;
+ teidp = (u8 *) &teid;
+
+ seqp = (u8 *) &seq;
+
+ gtpu_type = hdr->gtpu.type;
+
+ if (hdr->gtpu.ver_flags & (GTPU_EXTHDR_FLAG | GTPU_SEQ_FLAG))
{
- u32 bi0;
- vlib_buffer_t *b0;
- ip6_sr_sl_t *sl0;
- srv6_end_gtp4_param_t *ls_param;
- ip4_header_t *ip4;
+ // Extention header.
+ hdr_len += sizeof (gtpu_exthdr_t);
- uword len0;
+ seq = hdr->gtpu.ext->seq;
- u32 next0 = SRV6_T_M_GTP4_D_NEXT_LOOKUP;
+ if (hdr->gtpu.ext->nextexthdr == GTPU_EXTHDR_PDU_SESSION)
+ {
+ // PDU Session Container.
+ sess = (gtpu_pdu_session_t *) (((char *) hdr) + hdr_len);
+ qfi = sess->u.val & ~GTPU_PDU_SESSION_P_BIT_MASK;
+ qfip = (u8 *) &qfi;
- // defaults
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
+ hdr_len += sizeof (gtpu_pdu_session_t);
- b0 = vlib_get_buffer (vm, bi0);
+ if (sess->u.val & GTPU_PDU_SESSION_P_BIT_MASK)
+ {
+ hdr_len += sizeof (gtpu_paging_policy_t);
+ }
+ }
+ }
- sl0 =
- pool_elt_at_index (sm2->sid_lists,
- vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ src = hdr->ip4.src_address;
+ srcp = (u8 *) &src;
- ls_param = (srv6_end_gtp4_param_t *) sl0->plugin_mem;
+ dst = hdr->ip4.dst_address;
+ dstp = (u8 *) &dst;
- len0 = vlib_buffer_length_in_chain (vm, b0);
+ seg = ls_param->sr_prefix;
- ip4 = vlib_buffer_get_current (b0);
+ offset = ls_param->sr_prefixlen / 8;
+ shift = ls_param->sr_prefixlen % 8;
+
+ if (PREDICT_TRUE (shift == 0))
+ {
+ clib_memcpy_fast (&seg.as_u8[offset], dstp, 4);
- if (ip4->protocol != IP_PROTOCOL_UDP
- || len0 < sizeof (ip4_gtpu_header_t))
+ if (qfip)
{
- next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
+ qfi = ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
+ ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
- bad_n++;
+ if (sess->type)
+ {
+ qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
+ }
+
+ seg.as_u8[offset + 4] = qfi;
+ }
+
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ {
+ clib_memcpy_fast (&seg.as_u8[offset + 5], seqp, 2);
}
else
{
- uword *p;
- ip6_sr_policy_t *sr_policy = NULL;
- ip6_sr_sl_t *sl = NULL;
- u32 *sl_index;
- u32 hdr_len;
-
- ip4_gtpu_header_t *hdr;
- ip4_address_t src, dst;
- u8 *srcp, *dstp;
- ip6_header_t *encap = NULL;
- ip6_address_t seg;
- ip6_address_t src6;
- u8 gtpu_type;
- u32 teid;
- u8 *teidp;
- u8 qfi = 0;
- u8 *qfip = NULL;
- u16 seq = 0;
- u8 *seqp;
- u32 offset, shift, index;
- ip6srv_combo_header_t *ip6srv;
- gtpu_pdu_session_t *sess = NULL;
- int ie_size = 0;
- u16 tlv_siz = 0;
- u8 ie_buf[GTPU_IE_MAX_SIZ];
+ clib_memcpy_fast (&seg.as_u8[offset + 5], teidp, 4);
+ }
+ }
+ else
+ {
+ for (index = 0; index < 4; index++)
+ {
+ seg.as_u8[offset + index] |= dstp[index] >> shift;
+ seg.as_u8[offset + index + 1] |= dstp[index] << (8 - shift);
+ }
- // Decap from GTP-U.
- hdr = (ip4_gtpu_header_t *) ip4;
+ if (qfip)
+ {
+ qfi = ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
+ ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
- hdr_len = sizeof (ip4_gtpu_header_t);
+ if (sess->type)
+ {
+ qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
+ }
- teid = hdr->gtpu.teid;
- teidp = (u8 *) & teid;
+ seg.as_u8[offset + 4] |= qfi >> shift;
+ seg.as_u8[offset + 5] |= qfi << (8 - shift);
+ }
- seqp = (u8 *) & seq;
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ {
+ for (index = 0; index < 2; index++)
+ {
+ seg.as_u8[offset + 5 + index] |= seqp[index] >> shift;
+ seg.as_u8[offset + 6 + index] |= seqp[index] << (8 - shift);
+ }
+ }
+ else
+ {
+ for (index = 0; index < 4; index++)
+ {
+ seg.as_u8[offset + index + 5] |= teidp[index] >> shift;
+ seg.as_u8[offset + index + 6] |= teidp[index] << (8 - shift);
+ }
+ }
+ }
- gtpu_type = hdr->gtpu.type;
+ if (PREDICT_FALSE (gtpu_type == GTPU_TYPE_ERROR_INDICATION))
+ {
+ u16 payload_len;
- if (hdr->gtpu.ver_flags & (GTPU_EXTHDR_FLAG | GTPU_SEQ_FLAG))
+ payload_len = clib_net_to_host_u16 (hdr->gtpu.length);
+ if (payload_len != 0)
+ {
+ ie_size = payload_len - (hdr_len - sizeof (ip4_gtpu_header_t));
+ if (ie_size > 0)
{
- // Extention header.
- hdr_len += sizeof (gtpu_exthdr_t);
+ u8 *ies;
- seq = hdr->gtpu.ext->seq;
+ ies = (u8 *) ((u8 *) hdr + hdr_len);
+ clib_memcpy_fast (ie_buf, ies, ie_size);
+ hdr_len += ie_size;
+ }
+ }
+ }
- if (hdr->gtpu.ext->nextexthdr == GTPU_EXTHDR_PDU_SESSION)
- {
- // PDU Session Container.
- sess =
- (gtpu_pdu_session_t *) (((char *) hdr) + hdr_len);
- qfi = sess->u.val & ~GTPU_PDU_SESSION_P_BIT_MASK;
- qfip = (u8 *) & qfi;
+ src6 = ls_param->v6src_prefix;
- hdr_len += sizeof (gtpu_pdu_session_t);
+ offset = ls_param->v6src_prefixlen / 8;
+ shift = ls_param->v6src_prefixlen % 8;
- if (sess->u.val & GTPU_PDU_SESSION_P_BIT_MASK)
- {
- hdr_len += sizeof (gtpu_paging_policy_t);
- }
- }
- }
+ if (PREDICT_TRUE (shift == 0))
+ {
+ clib_memcpy_fast (&src6.as_u8[offset], srcp, 4);
+ }
+ else
+ {
+ for (index = 0; index < 4; index++)
+ {
+ src6.as_u8[offset + index] |= srcp[offset] >> shift;
+ src6.as_u8[offset + index + 1] |= srcp[offset] << (8 - shift);
+ }
+ }
- src = hdr->ip4.src_address;
- srcp = (u8 *) & src;
+ vlib_buffer_advance (b0, (word) hdr_len);
- dst = hdr->ip4.dst_address;
- dstp = (u8 *) & dst;
+ // Encap to SRv6.
+ if (PREDICT_TRUE (gtpu_type == GTPU_TYPE_GTPU))
+ {
+ encap = vlib_buffer_get_current (b0);
+ }
- seg = ls_param->sr_prefix;
+ len0 = vlib_buffer_length_in_chain (vm, b0);
- offset = ls_param->sr_prefixlen / 8;
- shift = ls_param->sr_prefixlen % 8;
+ p = mhash_get (&sm2->sr_policies_index_hash, &ls_param->sr_prefix);
+ if (p)
+ {
+ sr_policy = pool_elt_at_index (sm2->sr_policies, p[0]);
+ }
- if (PREDICT_TRUE (shift == 0))
- {
- clib_memcpy_fast (&seg.as_u8[offset], dstp, 4);
+ if (sr_policy)
+ {
+ vec_foreach (sl_index, sr_policy->segments_lists)
+ {
+ sl = pool_elt_at_index (sm2->sid_lists, *sl_index);
+ if (sl != NULL)
+ break;
+ }
+ }
- if (qfip)
- {
- qfi =
- ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
- ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
+ if (sl)
+ {
+ hdr_len = sizeof (ip6srv_combo_header_t);
+ hdr_len += vec_len (sl->segments) * sizeof (ip6_address_t);
+ hdr_len += sizeof (ip6_address_t);
+ }
+ else
+ {
+ hdr_len = sizeof (ip6_header_t);
- if (sess->type)
- {
- qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
- }
+ if (PREDICT_FALSE (gtpu_type != GTPU_TYPE_GTPU))
+ {
+ hdr_len += sizeof (ip6_sr_header_t);
+ hdr_len += sizeof (ip6_address_t);
+ }
+ }
- seg.as_u8[offset + 4] = qfi;
- }
+ if (ie_size)
+ {
+ tlv_siz =
+ sizeof (ip6_sr_tlv_t) + sizeof (user_plane_sub_tlv_t) + ie_size;
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
- {
- clib_memcpy_fast (&seg.as_u8[offset + 5], seqp, 2);
- }
- else
- {
- clib_memcpy_fast (&seg.as_u8[offset + 5], teidp, 4);
- }
- }
- else
- {
- for (index = 0; index < 4; index++)
- {
- seg.as_u8[offset + index] |= dstp[index] >> shift;
- seg.as_u8[offset + index + 1] |=
- dstp[index] << (8 - shift);
- }
+ tlv_siz = (tlv_siz & ~0x07) + (tlv_siz & 0x07 ? 0x08 : 0x0);
+ hdr_len += tlv_siz;
+ }
- if (qfip)
- {
- qfi =
- ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
- ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
+ vlib_buffer_advance (b0, -(word) hdr_len);
+ ip6srv = vlib_buffer_get_current (b0);
- if (sess->type)
- {
- qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
- }
+ if (sl)
+ {
+ clib_memcpy_fast (ip6srv, sl->rewrite, vec_len (sl->rewrite));
- seg.as_u8[offset + 4] |= qfi >> shift;
- seg.as_u8[offset + 5] |= qfi << (8 - shift);
- }
+ if (vec_len (sl->segments) > 1)
+ {
+ ip6srv->sr.tag = clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
- {
- for (index = 0; index < 2; index++)
- {
- seg.as_u8[offset + 5 + index] |=
- seqp[index] >> shift;
- seg.as_u8[offset + 6 + index] |=
- seqp[index] << (8 - shift);
- }
- }
- else
- {
- for (index = 0; index < 4; index++)
- {
- seg.as_u8[offset + index + 5] |=
- teidp[index] >> shift;
- seg.as_u8[offset + index + 6] |=
- teidp[index] << (8 - shift);
- }
- }
- }
+ ip6srv->sr.segments_left += 1;
+ ip6srv->sr.last_entry += 1;
- if (PREDICT_FALSE (gtpu_type == GTPU_TYPE_ERROR_INDICATION))
- {
- u16 payload_len;
+ ip6srv->sr.length += sizeof (ip6_address_t) / 8;
+ ip6srv->sr.segments[0] = seg;
- payload_len = clib_net_to_host_u16 (hdr->gtpu.length);
- if (payload_len != 0)
- {
- ie_size =
- payload_len - (hdr_len - sizeof (ip4_gtpu_header_t));
- if (ie_size > 0)
- {
- u8 *ies;
+ clib_memcpy_fast (&ip6srv->sr.segments[1],
+ (u8 *) (sl->rewrite + sizeof (ip6_header_t) +
+ sizeof (ip6_sr_header_t)),
+ vec_len (sl->segments) *
+ sizeof (ip6_address_t));
+ }
+ else
+ {
+ ip6srv->ip.protocol = IP_PROTOCOL_IPV6_ROUTE;
- ies = (u8 *) ((u8 *) hdr + hdr_len);
- clib_memcpy_fast (ie_buf, ies, ie_size);
- hdr_len += ie_size;
- }
- }
- }
+ ip6srv->sr.type = ROUTING_HEADER_TYPE_SR;
- src6 = ls_param->v6src_prefix;
+ ip6srv->sr.segments_left = 1;
+ ip6srv->sr.last_entry = 0;
- offset = ls_param->v6src_prefixlen / 8;
- shift = ls_param->v6src_prefixlen % 8;
+ ip6srv->sr.length =
+ ((sizeof (ip6_sr_header_t) + sizeof (ip6_address_t)) / 8) - 1;
+ ip6srv->sr.flags = 0;
- if (PREDICT_TRUE (shift == 0))
+ ip6srv->sr.tag = clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
+
+ ip6srv->sr.segments[0] = seg;
+ if (vec_len (sl->segments))
{
- clib_memcpy_fast (&src6.as_u8[offset], srcp, 4);
+ ip6srv->sr.segments[1] = sl->segments[0];
+ ip6srv->sr.length += sizeof (ip6_address_t) / 8;
+ ip6srv->sr.last_entry++;
}
- else
+ }
+
+ if (PREDICT_TRUE (encap != NULL))
+ {
+ if (ls_param->nhtype == SRV6_NHTYPE_NONE)
{
- for (index = 0; index < 4; index++)
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) == 6)
+ ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
+ else
+ ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
+ }
+ else if (ls_param->nhtype == SRV6_NHTYPE_IPV4)
+ {
+ ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) != 4)
{
- src6.as_u8[offset + index] |= srcp[offset] >> shift;
- src6.as_u8[offset + index + 1] |=
- srcp[offset] << (8 - shift);
+ // Bad encap packet.
+ next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
+ goto DONE;
}
}
-
- vlib_buffer_advance (b0, (word) hdr_len);
-
- // Encap to SRv6.
- if (PREDICT_TRUE (gtpu_type == GTPU_TYPE_GTPU))
+ else if (ls_param->nhtype == SRV6_NHTYPE_IPV6)
{
- encap = vlib_buffer_get_current (b0);
+ ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) != 6)
+ {
+ // Bad encap packet.
+ next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
+ goto DONE;
+ }
}
-
- len0 = vlib_buffer_length_in_chain (vm, b0);
-
- p =
- mhash_get (&sm2->sr_policies_index_hash,
- &ls_param->sr_prefix);
- if (p)
+ else if (ls_param->nhtype == SRV6_NHTYPE_NON_IP)
{
- sr_policy = pool_elt_at_index (sm2->sr_policies, p[0]);
+ ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
}
+ }
+ else
+ {
+ ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
+ }
+ }
+ else
+ {
+ clib_memcpy_fast (ip6srv, &sm->cache_hdr, sizeof (ip6_header_t));
- if (sr_policy)
+ ip6srv->ip.dst_address = seg;
+
+ if (PREDICT_FALSE (gtpu_type != GTPU_TYPE_GTPU))
+ {
+ ip6srv->ip.protocol = IP_PROTOCOL_IPV6_ROUTE;
+
+ ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
+
+ ip6srv->sr.type = ROUTING_HEADER_TYPE_SR;
+
+ ip6srv->sr.tag = clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
+
+ ip6srv->sr.segments_left = 0;
+ ip6srv->sr.last_entry = 0;
+
+ ip6srv->sr.length = sizeof (ip6_address_t) / 8;
+ ip6srv->sr.segments[0] = seg;
+ }
+ else
+ {
+ if (ls_param->nhtype == SRV6_NHTYPE_NONE)
{
- vec_foreach (sl_index, sr_policy->segments_lists)
- {
- sl = pool_elt_at_index (sm2->sid_lists, *sl_index);
- if (sl != NULL)
- break;
- }
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) == 6)
+ ip6srv->ip.protocol = IP_PROTOCOL_IPV6;
+ else
+ ip6srv->ip.protocol = IP_PROTOCOL_IP_IN_IP;
}
-
- if (sl)
+ else if (ls_param->nhtype == SRV6_NHTYPE_IPV4)
{
- hdr_len = sizeof (ip6srv_combo_header_t);
- hdr_len += vec_len (sl->segments) * sizeof (ip6_address_t);
- hdr_len += sizeof (ip6_address_t);
+ ip6srv->ip.protocol = IP_PROTOCOL_IP_IN_IP;
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) != 4)
+ {
+ // Bad encap packet.
+ next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
+ goto DONE;
+ }
}
- else
+ else if (ls_param->nhtype == SRV6_NHTYPE_IPV6)
{
- hdr_len = sizeof (ip6_header_t);
-
- if (PREDICT_FALSE (gtpu_type != GTPU_TYPE_GTPU))
+ ip6srv->ip.protocol = IP_PROTOCOL_IPV6;
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) != 6)
{
- hdr_len += sizeof (ip6_sr_header_t);
- hdr_len += sizeof (ip6_address_t);
+ // Bad encap packet.
+ next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
+ goto DONE;
}
}
-
- if (ie_size)
+ else if (ls_param->nhtype == SRV6_NHTYPE_NON_IP)
{
- tlv_siz =
- sizeof (ip6_sr_tlv_t) + sizeof (user_plane_sub_tlv_t) +
- ie_size;
-
- tlv_siz = (tlv_siz & ~0x07) + (tlv_siz & 0x07 ? 0x08 : 0x0);
- hdr_len += tlv_siz;
+ ip6srv->ip.protocol = IP_PROTOCOL_IP6_ETHERNET;
}
+ }
+ }
- vlib_buffer_advance (b0, -(word) hdr_len);
- ip6srv = vlib_buffer_get_current (b0);
+ ip6srv->ip.src_address = src6;
- if (sl)
- {
- clib_memcpy_fast (ip6srv, sl->rewrite,
- vec_len (sl->rewrite));
+ if (PREDICT_FALSE (ie_size))
+ {
+ ip6_sr_tlv_t *tlv;
+ user_plane_sub_tlv_t *sub_tlv;
- if (vec_len (sl->segments) > 1)
- {
- ip6srv->sr.tag =
- clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
+ tlv = (ip6_sr_tlv_t *) ((u8 *) ip6srv + (hdr_len - tlv_siz));
+ tlv->type = SRH_TLV_USER_PLANE_CONTAINER;
+ tlv->length = (u8) (tlv_siz - sizeof (ip6_sr_tlv_t));
+ clib_memset (tlv->value, 0, tlv->length);
- ip6srv->sr.segments_left += 1;
- ip6srv->sr.last_entry += 1;
+ sub_tlv = (user_plane_sub_tlv_t *) tlv->value;
+ sub_tlv->type = USER_PLANE_SUB_TLV_IE;
+ sub_tlv->length = (u8) ie_size;
+ clib_memcpy_fast (sub_tlv->value, ie_buf, ie_size);
- ip6srv->sr.length += sizeof (ip6_address_t) / 8;
- ip6srv->sr.segments[0] = seg;
+ ip6srv->sr.length += (u8) (tlv_siz / 8);
+ }
- clib_memcpy_fast (&ip6srv->sr.segments[1],
- (u8 *) (sl->rewrite +
- sizeof (ip6_header_t) +
- sizeof (ip6_sr_header_t)),
- vec_len (sl->segments) *
- sizeof (ip6_address_t));
- }
- else
- {
- ip6srv->ip.protocol = IP_PROTOCOL_IPV6_ROUTE;
+ ip6srv->ip.payload_length =
+ clib_host_to_net_u16 (len0 + hdr_len - sizeof (ip6_header_t));
- ip6srv->sr.type = ROUTING_HEADER_TYPE_SR;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0; /* default FIB */
- ip6srv->sr.segments_left = 1;
- ip6srv->sr.last_entry = 0;
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ srv6_end_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip6srv->ip.src_address.as_u8,
+ sizeof (tr->src.as_u8));
+ clib_memcpy (tr->dst.as_u8, ip6srv->ip.dst_address.as_u8,
+ sizeof (tr->dst.as_u8));
+ }
+ }
- ip6srv->sr.length =
- ((sizeof (ip6_sr_header_t) +
- sizeof (ip6_address_t)) / 8) - 1;
- ip6srv->sr.flags = 0;
+DONE:
+ return next0;
+}
- ip6srv->sr.tag =
- clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
+VLIB_NODE_FN (srv6_t_m_gtp4_d)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ srv6_t_main_v4_decap_t *sm = &srv6_t_main_v4_decap;
+ ip6_sr_main_t *sm2 = &sr_main;
+ u32 n_left_from, next_index, *from, *to_next;
- ip6srv->sr.segments[0] = seg;
- if (vec_len (sl->segments))
- {
- ip6srv->sr.segments[1] = sl->segments[0];
- ip6srv->sr.length += sizeof (ip6_address_t) / 8;
- ip6srv->sr.last_entry++;
- }
- }
+ ip6_sr_sl_t *sl0;
+ srv6_end_gtp4_d_param_t *ls_param;
- if (PREDICT_TRUE (encap != NULL))
- {
- if (ls_param->nhtype == SRV6_NHTYPE_NONE)
- {
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) == 6)
- ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
- else
- ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_IPV4)
- {
- ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) != 4)
- {
- // Bad encap packet.
- next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
- bad_n++;
- goto DONE;
- }
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_IPV6)
- {
- ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) != 6)
- {
- // Bad encap packet.
- next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
- bad_n++;
- goto DONE;
- }
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_NON_IP)
- {
- ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
- }
- }
- else
- {
- ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
- }
- }
- else
- {
- clib_memcpy_fast (ip6srv, &sm->cache_hdr,
- sizeof (ip6_header_t));
+ u32 good_n = 0, bad_n = 0;
- ip6srv->ip.dst_address = seg;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
- if (PREDICT_FALSE (gtpu_type != GTPU_TYPE_GTPU))
- {
- ip6srv->ip.protocol = IP_PROTOCOL_IPV6_ROUTE;
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
- ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
- ip6srv->sr.tag =
- clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
- ip6srv->sr.segments_left = 0;
- ip6srv->sr.last_entry = 0;
+ u32 next0;
- ip6srv->sr.length = sizeof (ip6_address_t) / 8;
- ip6srv->sr.segments[0] = seg;
- }
- else
+ ip4_gtpu_header_t *hdr;
+ u32 hdrlen;
+ u8 gtpu_type;
+ bool gtp4;
+ bool ipv4;
+
+ // defaults
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ sl0 = pool_elt_at_index (sm2->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+
+ ls_param = (srv6_end_gtp4_d_param_t *) sl0->plugin_mem;
+
+ hdr = vlib_buffer_get_current (b0);
+ gtpu_type = hdr->gtpu.type;
+
+ gtp4 = false;
+ ipv4 = true;
+
+ if (PREDICT_FALSE (gtpu_type != GTPU_TYPE_GTPU || ls_param->drop_in))
+ {
+ gtp4 = true;
+ }
+ else
+ {
+ ip6_header_t *ip6;
+
+ hdrlen = sizeof (ip4_gtpu_header_t);
+
+ if (hdr->gtpu.ver_flags & (GTPU_EXTHDR_FLAG | GTPU_SEQ_FLAG))
+ {
+ hdrlen += sizeof (gtpu_exthdr_t);
+ if (hdr->gtpu.ext->nextexthdr == GTPU_EXTHDR_PDU_SESSION)
{
- if (ls_param->nhtype == SRV6_NHTYPE_NONE)
- {
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) == 6)
- ip6srv->ip.protocol = IP_PROTOCOL_IPV6;
- else
- ip6srv->ip.protocol = IP_PROTOCOL_IP_IN_IP;
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_IPV4)
- {
- ip6srv->ip.protocol = IP_PROTOCOL_IP_IN_IP;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) != 4)
- {
- // Bad encap packet.
- next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
- bad_n++;
- goto DONE;
- }
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_IPV6)
- {
- ip6srv->ip.protocol = IP_PROTOCOL_IPV6;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) != 6)
- {
- // Bad encap packet.
- next0 = SRV6_T_M_GTP4_D_NEXT_DROP;
- bad_n++;
- goto DONE;
- }
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_NON_IP)
+ gtpu_pdu_session_t *sess;
+ sess = (gtpu_pdu_session_t *) (((char *) hdr) + hdrlen);
+ hdrlen += sizeof (gtpu_pdu_session_t);
+
+ if (sess->u.val & GTPU_PDU_SESSION_P_BIT_MASK)
{
- ip6srv->ip.protocol = IP_PROTOCOL_IP6_ETHERNET;
+ hdrlen += sizeof (gtpu_paging_policy_t);
}
}
}
- ip6srv->ip.src_address = src6;
-
- if (PREDICT_FALSE (ie_size))
+ ip6 = (ip6_header_t *) (((char *) hdr) + hdrlen);
+ if ((clib_net_to_host_u32 (
+ ip6->ip_version_traffic_class_and_flow_label) >>
+ 28) == 6)
{
- ip6_sr_tlv_t *tlv;
- user_plane_sub_tlv_t *sub_tlv;
-
- tlv =
- (ip6_sr_tlv_t *) ((u8 *) ip6srv + (hdr_len - tlv_siz));
- tlv->type = SRH_TLV_USER_PLANE_CONTAINER;
- tlv->length = (u8) (tlv_siz - sizeof (ip6_sr_tlv_t));
- clib_memset (tlv->value, 0, tlv->length);
-
- sub_tlv = (user_plane_sub_tlv_t *) tlv->value;
- sub_tlv->type = USER_PLANE_SUB_TLV_IE;
- sub_tlv->length = (u8) ie_size;
- clib_memcpy_fast (sub_tlv->value, ie_buf, ie_size);
-
- ip6srv->sr.length += (u8) (tlv_siz / 8);
+ ipv4 = false;
+ if (((ip6->dst_address.as_u8[0] == 0xff) &&
+ (ip6->dst_address.as_u8[1] == 0x02)) ||
+ ((ip6->dst_address.as_u8[0] == 0xfe) &&
+ ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80)))
+ {
+ // Inner desitnation is IPv6 link local
+ gtp4 = true;
+ }
}
+ }
- ip6srv->ip.payload_length =
- clib_host_to_net_u16 (len0 + hdr_len - sizeof (ip6_header_t));
-
- good_n++;
+ if (gtp4)
+ {
+ next0 = srv6_gtp4_decap_processing (vm, node, b0);
+ if (PREDICT_TRUE (next0 == SRV6_T_M_GTP4_D_NEXT_LOOKUP6))
+ good_n++;
+ else
+ bad_n++;
+ }
+ else
+ {
+ /* Strip off the outer header (IPv4 + GTP + UDP + IEs) */
+ vlib_buffer_advance (b0, (word) hdrlen);
- if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
- PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ if (ipv4)
{
- srv6_end_rewrite_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof (*tr));
- clib_memcpy (tr->src.as_u8, ip6srv->ip.src_address.as_u8,
- sizeof (tr->src.as_u8));
- clib_memcpy (tr->dst.as_u8, ip6srv->ip.dst_address.as_u8,
- sizeof (tr->dst.as_u8));
+ next0 = SRV6_T_M_GTP4_D_NEXT_LOOKUP4;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ ls_param->fib4_index;
+ }
+ else
+ {
+ next0 = SRV6_T_M_GTP4_D_NEXT_LOOKUP6;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ ls_param->fib6_index;
}
}
- DONE:
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
}
@@ -1218,8 +1303,8 @@ VLIB_REGISTER_NODE (srv6_end_m_gtp4_e) =
srv6_end_error_v4_strings,.n_next_nodes =
SRV6_END_M_GTP4_E_N_NEXT,.next_nodes =
{
- [SRV6_END_M_GTP4_E_NEXT_DROP] =
- "error-drop",[SRV6_END_M_GTP4_E_NEXT_LOOKUP] = "ip4-lookup",}
+ [SRV6_END_M_GTP4_E_NEXT_DROP] = "error-drop",
+ [SRV6_END_M_GTP4_E_NEXT_LOOKUP] = "ip4-lookup",}
,};
VLIB_REGISTER_NODE (srv6_t_m_gtp4_d) =
@@ -1230,14 +1315,14 @@ VLIB_REGISTER_NODE (srv6_t_m_gtp4_d) =
srv6_t_error_v4_d_strings,.n_next_nodes =
SRV6_T_M_GTP4_D_N_NEXT,.next_nodes =
{
- [SRV6_T_M_GTP4_D_NEXT_DROP] =
- "error-drop",[SRV6_T_M_GTP4_D_NEXT_LOOKUP] = "ip6-lookup",}
+ [SRV6_T_M_GTP4_D_NEXT_DROP] = "error-drop",
+ [SRV6_T_M_GTP4_D_NEXT_LOOKUP4] = "ip4-lookup",
+ [SRV6_T_M_GTP4_D_NEXT_LOOKUP6] = "ip6-lookup",}
,};
// Function for SRv6 GTP6.E function
-VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (srv6_end_m_gtp6_e)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
srv6_end_main_v6_t *sm = &srv6_end_main_v6;
ip6_sr_main_t *sm2 = &sr_main;
@@ -1261,6 +1346,7 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
u32 bi0;
vlib_buffer_t *b0;
ip6_sr_localsid_t *ls0;
+ srv6_end_gtp6_e_param_t *ls_param;
ip6srv_combo_header_t *ip6srv0;
ip6_address_t dst0, src0, seg0;
@@ -1284,9 +1370,10 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
- ls0 =
- pool_elt_at_index (sm2->localsids,
- vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls0 = pool_elt_at_index (sm2->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+
+ ls_param = (srv6_end_gtp6_e_param_t *) ls0->plugin_mem;
ip6srv0 = vlib_buffer_get_current (b0);
dst0 = ip6srv0->ip.dst_address;
@@ -1297,9 +1384,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
len0 = vlib_buffer_length_in_chain (vm, b0);
- if ((ip6srv0->ip.protocol != IPPROTO_IPV6_ROUTE)
- || (len0 <
- sizeof (ip6srv_combo_header_t) + 8 * ip6srv0->sr.length))
+ if ((ip6srv0->ip.protocol != IPPROTO_IPV6_ROUTE) ||
+ (len0 < sizeof (ip6srv_combo_header_t) + 8 * ip6srv0->sr.length))
{
next0 = SRV6_END_M_GTP6_E_NEXT_DROP;
@@ -1313,7 +1399,7 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
// logic
u32 teid = 0;
- u8 *teid8p = (u8 *) & teid;
+ u8 *teid8p = (u8 *) &teid;
u8 qfi = 0;
u16 seq = 0;
u8 gtpu_type = 0;
@@ -1332,10 +1418,9 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
if (PREDICT_TRUE (shift == 0))
{
qfi = dst0.as_u8[offset];
-
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
clib_memcpy_fast (&seq, &dst0.as_u8[offset + 1], 2);
}
@@ -1351,14 +1436,14 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
qfi |= dst0.as_u8[offset] << shift;
qfi |= dst0.as_u8[offset + 1] >> (8 - shift);
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
- sp = (u8 *) & seq;
+ sp = (u8 *) &seq;
for (index = 0; index < 2; index++)
{
- sp[index] = dst0.as_u8[offset + index + 1] << shift;
+ sp[index] = dst0.as_u8[offset + 1 + index] << shift;
sp[index] |=
dst0.as_u8[offset + index + 2] >> (8 - shift);
}
@@ -1380,9 +1465,9 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
hdrlen =
sizeof (gtpu_exthdr_t) + sizeof (gtpu_pdu_session_t);
}
- else if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ else if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
hdrlen = sizeof (gtpu_exthdr_t);
}
@@ -1402,11 +1487,10 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
if (ext_len >
sizeof (ip6_address_t) * (ip6srv0->sr.last_entry + 1))
{
- tlv =
- (ip6_sr_tlv_t *) ((u8 *) & ip6srv0->sr +
- sizeof (ip6_sr_header_t) +
- sizeof (ip6_address_t) *
- (ip6srv0->sr.last_entry + 1));
+ tlv = (ip6_sr_tlv_t *) ((u8 *) &ip6srv0->sr +
+ sizeof (ip6_sr_header_t) +
+ sizeof (ip6_address_t) *
+ (ip6srv0->sr.last_entry + 1));
if (tlv->type == SRH_TLV_USER_PLANE_CONTAINER)
{
@@ -1422,9 +1506,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
}
}
- vlib_buffer_advance (b0,
- (word) sizeof (ip6srv_combo_header_t) +
- ip6srv0->sr.length * 8);
+ vlib_buffer_advance (b0, (word) sizeof (ip6srv_combo_header_t) +
+ ip6srv0->sr.length * 8);
// get length of encapsulated IPv6 packet (the remaining part)
p = vlib_buffer_get_current (b0);
@@ -1447,37 +1530,9 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
hdr0->gtpu.type = gtpu_type;
- if (qfi)
- {
- u8 type = 0;
- gtpu_pdu_session_t *sess;
-
- hdr0->gtpu.ver_flags |= GTPU_EXTHDR_FLAG;
-
- hdr0->gtpu.ext->seq = 0;
- hdr0->gtpu.ext->npdu_num = 0;
- hdr0->gtpu.ext->nextexthdr = GTPU_EXTHDR_PDU_SESSION;
-
- type = qfi & SRV6_PDU_SESSION_U_BIT_MASK;
-
- qfi =
- ((qfi & SRV6_PDU_SESSION_QFI_MASK) >> 2) |
- ((qfi & SRV6_PDU_SESSION_R_BIT_MASK) << 5);
-
- sess =
- (gtpu_pdu_session_t *) (((char *) hdr0) +
- sizeof (ip6_gtpu_header_t) +
- sizeof (gtpu_exthdr_t));
- sess->exthdrlen = 1;
- sess->type = type;
- sess->spare = 0;
- sess->u.val = qfi;
- sess->nextexthdr = 0;
- }
-
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
hdr0->gtpu.ver_flags |= GTPU_SEQ_FLAG;
hdr0->gtpu.ext->seq = seq;
@@ -1506,29 +1561,57 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
}
}
}
+ else
+ {
+ if (qfi)
+ {
+ hdr0->gtpu.ext->seq = 0;
+ hdr0->gtpu.ext->npdu_num = 0;
+ }
+ }
+
+ if (qfi)
+ {
+ u8 type = 0;
+ gtpu_pdu_session_t *sess;
+
+ hdr0->gtpu.ver_flags |= GTPU_EXTHDR_FLAG;
+
+ hdr0->gtpu.ext->nextexthdr = GTPU_EXTHDR_PDU_SESSION;
+
+ type = qfi & SRV6_PDU_SESSION_U_BIT_MASK;
+
+ qfi = ((qfi & SRV6_PDU_SESSION_QFI_MASK) >> 2) |
+ ((qfi & SRV6_PDU_SESSION_R_BIT_MASK) << 5);
+
+ sess = (gtpu_pdu_session_t *) (((char *) hdr0) +
+ sizeof (ip6_gtpu_header_t) +
+ sizeof (gtpu_exthdr_t));
+ sess->exthdrlen = 1;
+ sess->type = type;
+ sess->spare = 0;
+ sess->u.val = qfi;
+ sess->nextexthdr = 0;
+ }
- hdr0->udp.length = clib_host_to_net_u16 (len0 +
- sizeof (udp_header_t) +
- sizeof
- (gtpu_header_t));
+ hdr0->udp.length = clib_host_to_net_u16 (
+ len0 + sizeof (udp_header_t) + sizeof (gtpu_header_t));
clib_memcpy_fast (hdr0->ip6.src_address.as_u8, src0.as_u8,
sizeof (ip6_address_t));
clib_memcpy_fast (hdr0->ip6.dst_address.as_u8, &seg0.as_u8,
sizeof (ip6_address_t));
- hdr0->ip6.payload_length = clib_host_to_net_u16 (len0 +
- sizeof
- (udp_header_t)
- +
- sizeof
- (gtpu_header_t));
+ hdr0->ip6.payload_length = clib_host_to_net_u16 (
+ len0 + sizeof (udp_header_t) + sizeof (gtpu_header_t));
// UDP source port.
key = hash_memory (p, plen < 40 ? plen : 40, 0);
port = hash_uword_to_u16 (&key);
hdr0->udp.src_port = port;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = ls_param->fib6_index;
+
good_n++;
if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
@@ -1544,11 +1627,12 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
}
}
- vlib_increment_combined_counter
- (((next0 ==
- SRV6_END_M_GTP6_E_NEXT_DROP) ? &(sm2->sr_ls_invalid_counters) :
- &(sm2->sr_ls_valid_counters)), thread_index,
- ls0 - sm2->localsids, 1, vlib_buffer_length_in_chain (vm, b0));
+ vlib_increment_combined_counter (
+ ((next0 == SRV6_END_M_GTP6_E_NEXT_DROP) ?
+ &(sm2->sr_ls_invalid_counters) :
+ &(sm2->sr_ls_valid_counters)),
+ thread_index, ls0 - sm2->localsids, 1,
+ vlib_buffer_length_in_chain (vm, b0));
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
@@ -1567,491 +1651,570 @@ VLIB_NODE_FN (srv6_end_m_gtp6_e) (vlib_main_t * vm,
}
// Function for SRv6 GTP6.D function
-VLIB_NODE_FN (srv6_end_m_gtp6_d) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+static inline u32
+srv6_gtp6_decap_processing (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t *b0)
{
srv6_end_main_v6_decap_t *sm = &srv6_end_main_v6_decap;
ip6_sr_main_t *sm2 = &sr_main;
- u32 n_left_from, next_index, *from, *to_next;
- u32 thread_index = vm->thread_index;
- u32 good_n = 0, bad_n = 0;
+ ip6_sr_localsid_t *ls0;
+ srv6_end_gtp6_d_param_t *ls_param;
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
+ ip6_gtpu_header_t *hdr0 = NULL;
+ uword len0;
- while (n_left_from > 0)
+ ip6_address_t seg0, src0, dst0;
+ u32 teid = 0;
+ u8 *teidp;
+ u8 gtpu_type = 0;
+ u8 qfi;
+ u8 *qfip = NULL;
+ u16 seq = 0;
+ u8 *seqp;
+ u32 offset, shift;
+ u32 hdrlen;
+ ip6_header_t *encap = NULL;
+ gtpu_pdu_session_t *sess = NULL;
+ int ie_size = 0;
+ u16 tlv_siz = 0;
+ u8 ie_buf[GTPU_IE_MAX_SIZ];
+
+ u32 next0 = SRV6_END_M_GTP6_D_NEXT_LOOKUP6;
+
+ ls0 = pool_elt_at_index (sm2->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+
+ ls_param = (srv6_end_gtp6_d_param_t *) ls0->plugin_mem;
+
+ hdr0 = vlib_buffer_get_current (b0);
+
+ hdrlen = sizeof (ip6_gtpu_header_t);
+
+ len0 = vlib_buffer_length_in_chain (vm, b0);
+
+ if ((hdr0->ip6.protocol != IP_PROTOCOL_UDP) ||
+ (hdr0->udp.dst_port != clib_host_to_net_u16 (SRV6_GTP_UDP_DST_PORT)) ||
+ (len0 < sizeof (ip6_gtpu_header_t)))
{
- u32 n_left_to_next;
+ next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
+ }
+ else
+ {
+ seg0 = ls_param->sr_prefix;
+ src0 = hdr0->ip6.src_address;
+ dst0 = hdr0->ip6.dst_address;
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ gtpu_type = hdr0->gtpu.type;
- while (n_left_from > 0 && n_left_to_next > 0)
+ teid = hdr0->gtpu.teid;
+ teidp = (u8 *) &teid;
+
+ seqp = (u8 *) &seq;
+
+ if (hdr0->gtpu.ver_flags & (GTPU_EXTHDR_FLAG | GTPU_SEQ_FLAG))
{
- u32 bi0;
- vlib_buffer_t *b0;
- ip6_sr_localsid_t *ls0;
- srv6_end_gtp6_param_t *ls_param;
+ // Extention header.
+ hdrlen += sizeof (gtpu_exthdr_t);
- ip6_gtpu_header_t *hdr0 = NULL;
- uword len0;
+ seq = hdr0->gtpu.ext->seq;
- ip6_address_t seg0, src0;
- u32 teid = 0;
- u8 *teidp;
- u8 gtpu_type = 0;
- u8 qfi;
- u8 *qfip = NULL;
- u16 seq = 0;
- u8 *seqp;
- u32 offset, shift;
- u32 hdrlen;
- ip6_header_t *encap = NULL;
- gtpu_pdu_session_t *sess = NULL;
- int ie_size = 0;
- u16 tlv_siz = 0;
- u8 ie_buf[GTPU_IE_MAX_SIZ];
+ if (hdr0->gtpu.ext->nextexthdr == GTPU_EXTHDR_PDU_SESSION)
+ {
+ // PDU Session Container.
+ sess = (gtpu_pdu_session_t *) (((char *) hdr0) +
+ sizeof (ip6_gtpu_header_t) +
+ sizeof (gtpu_exthdr_t));
+ qfi = sess->u.val & ~GTPU_PDU_SESSION_P_BIT_MASK;
+ qfip = (u8 *) &qfi;
- u32 next0 = SRV6_END_M_GTP6_D_NEXT_LOOKUP;
+ hdrlen += sizeof (gtpu_pdu_session_t);
- // defaults
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
+ if (sess->u.val & GTPU_PDU_SESSION_P_BIT_MASK)
+ {
+ hdrlen += sizeof (gtpu_paging_policy_t);
+ }
+ }
+ }
- b0 = vlib_get_buffer (vm, bi0);
- ls0 =
- pool_elt_at_index (sm2->localsids,
- vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ offset = ls_param->sr_prefixlen / 8;
+ shift = ls_param->sr_prefixlen % 8;
- ls_param = (srv6_end_gtp6_param_t *) ls0->plugin_mem;
+ if (PREDICT_TRUE (shift == 0))
+ {
+ if (qfip)
+ {
+ qfi = ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
+ ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
- hdr0 = vlib_buffer_get_current (b0);
+ if (sess->type)
+ {
+ qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
+ }
- hdrlen = sizeof (ip6_gtpu_header_t);
+ seg0.as_u8[offset] = qfi;
+ }
- len0 = vlib_buffer_length_in_chain (vm, b0);
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ {
+ clib_memcpy_fast (&seg0.as_u8[offset + 1], seqp, 2);
+ }
+ else
+ {
+ clib_memcpy_fast (&seg0.as_u8[offset + 1], teidp, 4);
+ }
+ }
+ else
+ {
+ int idx;
- if ((hdr0->ip6.protocol != IP_PROTOCOL_UDP)
- || (hdr0->udp.dst_port !=
- clib_host_to_net_u16 (SRV6_GTP_UDP_DST_PORT))
- || (len0 < sizeof (ip6_gtpu_header_t)))
+ if (qfip)
{
- next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
+ qfi = ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
+ ((qfi & ~GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
- bad_n++;
+ if (sess->type)
+ {
+ qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
+ }
+
+ seg0.as_u8[offset] |= qfi >> shift;
+ seg0.as_u8[offset + 1] |= qfi << (8 - shift);
+ }
+
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ {
+ for (idx = 0; idx < 2; idx++)
+ {
+ seg0.as_u8[offset + idx + 1] |= seqp[idx] >> shift;
+ seg0.as_u8[offset + idx + 2] |= seqp[idx] << (8 - shift);
+ }
}
else
{
- seg0 = ls_param->sr_prefix;
- src0 = hdr0->ip6.src_address;
+ for (idx = 0; idx < 4; idx++)
+ {
+ seg0.as_u8[offset + idx + 1] |= teidp[idx] >> shift;
+ seg0.as_u8[offset + idx + 2] |= teidp[idx] << (8 - shift);
+ }
+ }
+ }
- gtpu_type = hdr0->gtpu.type;
+ if (PREDICT_FALSE (gtpu_type == GTPU_TYPE_ERROR_INDICATION))
+ {
+ u16 payload_len;
- teid = hdr0->gtpu.teid;
- teidp = (u8 *) & teid;
+ payload_len = clib_net_to_host_u16 (hdr0->gtpu.length);
+ if (payload_len != 0)
+ {
+ ie_size = payload_len - (hdrlen - sizeof (ip6_gtpu_header_t));
+ if (ie_size > 0)
+ {
+ u8 *ies;
- seqp = (u8 *) & seq;
+ ies = (u8 *) ((u8 *) hdr0 + hdrlen);
+ clib_memcpy_fast (ie_buf, ies, ie_size);
+ hdrlen += ie_size;
+ }
+ }
+ }
- if (hdr0->gtpu.ver_flags & (GTPU_EXTHDR_FLAG | GTPU_SEQ_FLAG))
- {
- // Extention header.
- hdrlen += sizeof (gtpu_exthdr_t);
+ // jump over variable length data
+ vlib_buffer_advance (b0, (word) hdrlen);
- seq = hdr0->gtpu.ext->seq;
+ // get length of encapsulated IPv6 packet (the remaining part)
+ len0 = vlib_buffer_length_in_chain (vm, b0);
- if (hdr0->gtpu.ext->nextexthdr == GTPU_EXTHDR_PDU_SESSION)
- {
- // PDU Session Container.
- sess =
- (gtpu_pdu_session_t *) (((char *) hdr0) +
- sizeof (ip6_gtpu_header_t) +
- sizeof (gtpu_exthdr_t));
- qfi = sess->u.val & ~GTPU_PDU_SESSION_P_BIT_MASK;
- qfip = (u8 *) & qfi;
+ if (PREDICT_TRUE (gtpu_type == GTPU_TYPE_GTPU))
+ {
+ encap = vlib_buffer_get_current (b0);
+ }
- hdrlen += sizeof (gtpu_pdu_session_t);
+ uword *p;
+ ip6srv_combo_header_t *ip6srv;
+ ip6_sr_policy_t *sr_policy = NULL;
+ ip6_sr_sl_t *sl = NULL;
+ u32 *sl_index;
+ u32 hdr_len;
- if (sess->u.val & GTPU_PDU_SESSION_P_BIT_MASK)
- {
- hdrlen += sizeof (gtpu_paging_policy_t);
- }
- }
- }
+ p = mhash_get (&sm2->sr_policies_index_hash, &ls_param->sr_prefix);
+ if (p)
+ {
+ sr_policy = pool_elt_at_index (sm2->sr_policies, p[0]);
+ }
- offset = ls_param->sr_prefixlen / 8;
- shift = ls_param->sr_prefixlen % 8;
+ if (sr_policy)
+ {
+ vec_foreach (sl_index, sr_policy->segments_lists)
+ {
+ sl = pool_elt_at_index (sm2->sid_lists, *sl_index);
+ if (sl != NULL)
+ break;
+ }
+ }
- if (PREDICT_TRUE (shift == 0))
- {
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
- {
- clib_memcpy_fast (&seg0.as_u8[offset + 1], seqp, 2);
- }
- else
- {
- clib_memcpy_fast (&seg0.as_u8[offset + 1], teidp, 4);
- }
+ if (sl)
+ {
+ hdr_len = sizeof (ip6srv_combo_header_t);
+ hdr_len += vec_len (sl->segments) * sizeof (ip6_address_t);
+ hdr_len += sizeof (ip6_address_t) * 2;
+ }
+ else
+ {
+ hdr_len = sizeof (ip6_header_t);
+ hdr_len += sizeof (ip6_sr_header_t);
+ hdr_len += sizeof (ip6_address_t) * 2;
+ }
- if (qfip)
- {
- qfi =
- ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
- ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
+ if (ie_size)
+ {
+ tlv_siz =
+ sizeof (ip6_sr_tlv_t) + sizeof (user_plane_sub_tlv_t) + ie_size;
- if (sess->type)
- {
- qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
- }
+ tlv_siz = (tlv_siz & ~0x07) + (tlv_siz & 0x07 ? 0x08 : 0x0);
+ hdr_len += tlv_siz;
+ }
- seg0.as_u8[offset] = qfi;
- }
- }
- else
- {
- int idx;
+ // jump back to data[0] or pre_data if required
+ vlib_buffer_advance (b0, -(word) hdr_len);
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
- {
- for (idx = 0; idx < 2; idx++)
- {
- seg0.as_u8[offset + idx + 1] |= seqp[idx] >> shift;
- seg0.as_u8[offset + idx + 2] |=
- seqp[idx] << (8 - shift);
- }
- }
- else
- {
- for (idx = 0; idx < 4; idx++)
- {
- seg0.as_u8[offset + idx + 1] |= teidp[idx] >> shift;
- seg0.as_u8[offset + idx + 2] |=
- teidp[idx] << (8 - shift);
- }
- }
+ ip6srv = vlib_buffer_get_current (b0);
- if (qfip)
- {
- qfi =
- ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
- ((qfi & ~GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
+ if (sl)
+ {
+ clib_memcpy_fast (ip6srv, sl->rewrite, vec_len (sl->rewrite));
- if (sess->type)
- {
- qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
- }
+ if (vec_len (sl->segments) > 1)
+ {
+ ip6srv->ip.src_address = src0;
- seg0.as_u8[offset] |= qfi >> shift;
- seg0.as_u8[offset + 1] |= qfi << (8 - shift);
- }
- }
+ ip6srv->sr.tag = clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
- if (PREDICT_FALSE (gtpu_type == GTPU_TYPE_ERROR_INDICATION))
- {
- u16 payload_len;
+ ip6srv->sr.type = ROUTING_HEADER_TYPE_SR;
+ ip6srv->sr.segments_left += 2;
+ ip6srv->sr.last_entry += 2;
- payload_len = clib_net_to_host_u16 (hdr0->gtpu.length);
- if (payload_len != 0)
- {
- ie_size =
- payload_len - (hdrlen - sizeof (ip6_gtpu_header_t));
- if (ie_size > 0)
- {
- u8 *ies;
+ ip6srv->sr.length += (sizeof (ip6_address_t) * 2) / 8;
+ ip6srv->sr.segments[0] = dst0;
+ ip6srv->sr.segments[1] = seg0;
- ies = (u8 *) ((u8 *) hdr0 + hdrlen);
- clib_memcpy_fast (ie_buf, ies, ie_size);
- hdrlen += ie_size;
- }
- }
- }
+ clib_memcpy_fast (&ip6srv->sr.segments[2],
+ (u8 *) (sl->rewrite + sizeof (ip6_header_t) +
+ sizeof (ip6_sr_header_t)),
+ vec_len (sl->segments) *
+ sizeof (ip6_address_t));
+ }
+ else
+ {
+ ip6srv->ip.src_address = src0;
+ ip6srv->ip.protocol = IP_PROTOCOL_IPV6_ROUTE;
- // jump over variable length data
- vlib_buffer_advance (b0, (word) hdrlen);
+ ip6srv->sr.type = ROUTING_HEADER_TYPE_SR;
+ ip6srv->sr.segments_left = 2;
+ ip6srv->sr.last_entry = 1;
+ ip6srv->sr.length = (sizeof (ip6_address_t) * 2) / 8;
+ ip6srv->sr.flags = 0;
- // get length of encapsulated IPv6 packet (the remaining part)
- len0 = vlib_buffer_length_in_chain (vm, b0);
+ ip6srv->sr.tag = clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
- if (PREDICT_TRUE (gtpu_type == GTPU_TYPE_GTPU))
+ ip6srv->sr.segments[0] = dst0;
+ ip6srv->sr.segments[1] = seg0;
+
+ if (vec_len (sl->segments))
{
- encap = vlib_buffer_get_current (b0);
+ ip6srv->sr.segments[2] = sl->segments[0];
+ ip6srv->sr.last_entry++;
+ ip6srv->sr.length += sizeof (ip6_address_t) / 8;
}
+ }
- uword *p;
- ip6srv_combo_header_t *ip6srv;
- ip6_sr_policy_t *sr_policy = NULL;
- ip6_sr_sl_t *sl = NULL;
- u32 *sl_index;
- u32 hdr_len;
-
- p =
- mhash_get (&sm2->sr_policies_index_hash,
- &ls_param->sr_prefix);
- if (p)
+ if (PREDICT_TRUE (encap != NULL))
+ {
+ if (ls_param->nhtype == SRV6_NHTYPE_NONE)
{
- sr_policy = pool_elt_at_index (sm2->sr_policies, p[0]);
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) == 6)
+ ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
+ else
+ ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
}
-
- if (sr_policy)
+ else if (ls_param->nhtype == SRV6_NHTYPE_IPV4)
{
- vec_foreach (sl_index, sr_policy->segments_lists)
- {
- sl = pool_elt_at_index (sm2->sid_lists, *sl_index);
- if (sl != NULL)
- break;
- }
+ ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) != 4)
+ {
+ // Bad encap packet.
+ next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
+ goto DONE;
+ }
+ }
+ else if (ls_param->nhtype == SRV6_NHTYPE_IPV6)
+ {
+ ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) != 6)
+ {
+ // Bad encap packet.
+ next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
+ goto DONE;
+ }
+ }
+ else if (ls_param->nhtype == SRV6_NHTYPE_NON_IP)
+ {
+ ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
}
+ }
+ else
+ {
+ ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
+ }
+ }
+ else
+ {
+ clib_memcpy_fast (ip6srv, &sm->cache_hdr, sizeof (ip6_header_t));
- if (sl)
+ ip6srv->ip.src_address = src0;
+ ip6srv->ip.dst_address = seg0;
+
+ ip6srv->ip.protocol = IP_PROTOCOL_IPV6_ROUTE;
+
+ ip6srv->sr.type = ROUTING_HEADER_TYPE_SR;
+ ip6srv->sr.segments_left = 1;
+ ip6srv->sr.last_entry = 1;
+
+ ip6srv->sr.length = (sizeof (ip6_address_t) * 2) / 8;
+ ip6srv->sr.segments[0] = dst0;
+ ip6srv->sr.segments[1] = seg0;
+
+ if (PREDICT_FALSE (gtpu_type) != GTPU_TYPE_GTPU)
+ {
+ ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
+ ip6srv->sr.tag = clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
+ }
+ else
+ {
+ if (ls_param->nhtype == SRV6_NHTYPE_NONE)
{
- hdr_len = sizeof (ip6srv_combo_header_t);
- hdr_len += vec_len (sl->segments) * sizeof (ip6_address_t);
- hdr_len += sizeof (ip6_address_t);
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) != 6)
+ ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
}
- else
+ else if (ls_param->nhtype == SRV6_NHTYPE_IPV4)
{
- hdr_len = sizeof (ip6_header_t);
- if (PREDICT_FALSE (gtpu_type) != GTPU_TYPE_GTPU)
+ ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) != 4)
{
- hdr_len += sizeof (ip6_sr_header_t);
- hdr_len += sizeof (ip6_address_t);
+ // Bad encap packet.
+ next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
+ goto DONE;
}
}
-
- if (ie_size)
+ else if (ls_param->nhtype == SRV6_NHTYPE_IPV6)
{
- tlv_siz =
- sizeof (ip6_sr_tlv_t) + sizeof (user_plane_sub_tlv_t) +
- ie_size;
-
- tlv_siz = (tlv_siz & ~0x07) + (tlv_siz & 0x07 ? 0x08 : 0x0);
- hdr_len += tlv_siz;
+ ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
+ 28) != 6)
+ {
+ // Bad encap packet.
+ next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
+ goto DONE;
+ }
+ }
+ else if (ls_param->nhtype == SRV6_NHTYPE_NON_IP)
+ {
+ ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
}
+ }
+ }
- // jump back to data[0] or pre_data if required
- vlib_buffer_advance (b0, -(word) hdr_len);
+ if (PREDICT_FALSE (ie_size))
+ {
+ ip6_sr_tlv_t *tlv;
+ user_plane_sub_tlv_t *sub_tlv;
- ip6srv = vlib_buffer_get_current (b0);
+ tlv = (ip6_sr_tlv_t *) ((u8 *) ip6srv + (hdr_len - tlv_siz));
+ tlv->type = SRH_TLV_USER_PLANE_CONTAINER;
+ tlv->length = (u8) (tlv_siz - sizeof (ip6_sr_tlv_t));
+ clib_memset (tlv->value, 0, tlv->length);
- if (sl)
- {
- clib_memcpy_fast (ip6srv, sl->rewrite,
- vec_len (sl->rewrite));
+ sub_tlv = (user_plane_sub_tlv_t *) tlv->value;
+ sub_tlv->type = USER_PLANE_SUB_TLV_IE;
+ sub_tlv->length = (u8) ie_size;
+ clib_memcpy_fast (sub_tlv->value, ie_buf, ie_size);
- if (vec_len (sl->segments) > 1)
- {
- ip6srv->ip.src_address = src0;
+ ip6srv->sr.length += (u8) (tlv_siz / 8);
+ }
- ip6srv->sr.tag =
- clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
+ ip6srv->ip.payload_length =
+ clib_host_to_net_u16 (len0 + hdr_len - sizeof (ip6_header_t));
- ip6srv->sr.segments_left += 1;
- ip6srv->sr.last_entry += 1;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0; /* default FIB */
- ip6srv->sr.length += sizeof (ip6_address_t) / 8;
- ip6srv->sr.segments[0] = seg0;
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ srv6_end_rewrite_trace_t *tr =
+ vlib_add_trace (vm, node, b0, sizeof (*tr));
+ clib_memcpy (tr->src.as_u8, ip6srv->ip.src_address.as_u8,
+ sizeof (ip6_address_t));
+ clib_memcpy (tr->dst.as_u8, ip6srv->ip.dst_address.as_u8,
+ sizeof (ip6_address_t));
+ tr->teid = teid;
+ clib_memcpy (tr->sr_prefix.as_u8, ls_param->sr_prefix.as_u8,
+ sizeof (ip6_address_t));
+ tr->sr_prefixlen = ls_param->sr_prefixlen;
+ }
+ }
- clib_memcpy_fast (&ip6srv->sr.segments[1],
- (u8 *) (sl->rewrite +
- sizeof (ip6_header_t) +
- sizeof (ip6_sr_header_t)),
- vec_len (sl->segments) *
- sizeof (ip6_address_t));
- }
- else
- {
- ip6srv->ip.src_address = src0;
- ip6srv->ip.protocol = IP_PROTOCOL_IPV6_ROUTE;
+DONE:
+ return next0;
+}
- ip6srv->sr.type = ROUTING_HEADER_TYPE_SR;
- ip6srv->sr.segments_left = 1;
- ip6srv->sr.last_entry = 0;
- ip6srv->sr.length =
- ((sizeof (ip6_sr_header_t) +
- sizeof (ip6_address_t)) / 8) - 1;
- ip6srv->sr.flags = 0;
+VLIB_NODE_FN (srv6_end_m_gtp6_d)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ srv6_end_main_v6_decap_t *sm = &srv6_end_main_v6_decap;
+ ip6_sr_main_t *sm2 = &sr_main;
+ u32 n_left_from, next_index, *from, *to_next;
+ u32 thread_index = vm->thread_index;
+ ip6_sr_localsid_t *ls0;
+ srv6_end_gtp6_d_param_t *ls_param;
- ip6srv->sr.tag =
- clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
+ u32 good_n = 0, bad_n = 0;
- ip6srv->sr.segments[0] = seg0;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
- if (vec_len (sl->segments))
- {
- ip6srv->sr.segments[1] = sl->segments[0];
- ip6srv->sr.last_entry++;
- ip6srv->sr.length += sizeof (ip6_address_t) / 8;
- }
- }
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
- if (PREDICT_TRUE (encap != NULL))
- {
- if (ls_param->nhtype == SRV6_NHTYPE_NONE)
- {
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) == 6)
- ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
- else
- ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_IPV4)
- {
- ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) != 4)
- {
- // Bad encap packet.
- next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
- bad_n++;
- goto DONE;
- }
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_IPV6)
- {
- ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) != 6)
- {
- // Bad encap packet.
- next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
- bad_n++;
- goto DONE;
- }
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_NON_IP)
- {
- ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
- }
- }
- else
- {
- ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
- }
- }
- else
- {
- clib_memcpy_fast (ip6srv, &sm->cache_hdr,
- sizeof (ip6_header_t));
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
- ip6srv->ip.src_address = src0;
- ip6srv->ip.dst_address = seg0;
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
- if (PREDICT_FALSE (gtpu_type) != GTPU_TYPE_GTPU)
- {
- ip6srv->ip.protocol = IP_PROTOCOL_IPV6_ROUTE;
+ u32 next0;
- ip6srv->sr.protocol = IP_PROTOCOL_IP6_ETHERNET;
+ ip6_gtpu_header_t *hdr;
+ u32 hdrlen;
+ u8 gtpu_type;
+ bool gtp6;
+ bool ipv4;
- ip6srv->sr.tag =
- clib_host_to_net_u16 (srh_tagfield[gtpu_type]);
+ // defaults
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
- ip6srv->sr.segments_left = 0;
- ip6srv->sr.last_entry = 0;
+ ls0 = pool_elt_at_index (sm2->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
- ip6srv->sr.length = sizeof (ip6_address_t) / 8;
- ip6srv->sr.segments[0] = seg0;
- }
- else
+ ls_param = (srv6_end_gtp6_d_param_t *) ls0->plugin_mem;
+
+ hdr = vlib_buffer_get_current (b0);
+ gtpu_type = hdr->gtpu.type;
+
+ gtp6 = false;
+ ipv4 = true;
+
+ if (PREDICT_FALSE (gtpu_type != GTPU_TYPE_GTPU || ls_param->drop_in))
+ {
+ gtp6 = true;
+ }
+ else
+ {
+ ip6_header_t *ip6;
+
+ hdrlen = sizeof (ip6_gtpu_header_t);
+
+ if (hdr->gtpu.ver_flags & (GTPU_EXTHDR_FLAG | GTPU_SEQ_FLAG))
+ {
+ hdrlen += sizeof (gtpu_exthdr_t);
+ if (hdr->gtpu.ext->nextexthdr == GTPU_EXTHDR_PDU_SESSION)
{
- if (ls_param->nhtype == SRV6_NHTYPE_NONE)
- {
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) != 6)
- ip6srv->ip.protocol = IP_PROTOCOL_IP_IN_IP;
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_IPV4)
- {
- ip6srv->ip.protocol = IP_PROTOCOL_IP_IN_IP;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) != 4)
- {
- // Bad encap packet.
- next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
- bad_n++;
- goto DONE;
- }
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_IPV6)
- {
- ip6srv->ip.protocol = IP_PROTOCOL_IPV6;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label)
- >> 28) != 6)
- {
- // Bad encap packet.
- next0 = SRV6_END_M_GTP6_D_NEXT_DROP;
- bad_n++;
- goto DONE;
- }
- }
- else if (ls_param->nhtype == SRV6_NHTYPE_NON_IP)
+ gtpu_pdu_session_t *sess;
+ sess = (gtpu_pdu_session_t *) (((char *) hdr) + hdrlen);
+ hdrlen += sizeof (gtpu_pdu_session_t);
+
+ if (sess->u.val & GTPU_PDU_SESSION_P_BIT_MASK)
{
- ip6srv->ip.protocol = IP_PROTOCOL_IP6_ETHERNET;
+ hdrlen += sizeof (gtpu_paging_policy_t);
}
}
}
- if (PREDICT_FALSE (ie_size))
+ ip6 = (ip6_header_t *) (((char *) hdr) + hdrlen);
+ if ((clib_net_to_host_u32 (
+ ip6->ip_version_traffic_class_and_flow_label) >>
+ 28) == 6)
{
- ip6_sr_tlv_t *tlv;
- user_plane_sub_tlv_t *sub_tlv;
-
- tlv =
- (ip6_sr_tlv_t *) ((u8 *) ip6srv + (hdr_len - tlv_siz));
- tlv->type = SRH_TLV_USER_PLANE_CONTAINER;
- tlv->length = (u8) (tlv_siz - sizeof (ip6_sr_tlv_t));
- clib_memset (tlv->value, 0, tlv->length);
-
- sub_tlv = (user_plane_sub_tlv_t *) tlv->value;
- sub_tlv->type = USER_PLANE_SUB_TLV_IE;
- sub_tlv->length = (u8) ie_size;
- clib_memcpy_fast (sub_tlv->value, ie_buf, ie_size);
-
- ip6srv->sr.length += (u8) (tlv_siz / 8);
+ ipv4 = false;
+ if (((ip6->dst_address.as_u8[0] == 0xff) &&
+ (ip6->dst_address.as_u8[1] == 0x02)) ||
+ ((ip6->dst_address.as_u8[0] == 0xfe) &&
+ ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80)))
+ {
+ // Inner desitnation is IPv6 link local
+ gtp6 = true;
+ }
}
+ }
- ip6srv->ip.payload_length =
- clib_host_to_net_u16 (len0 + hdr_len - sizeof (ip6_header_t));
-
- good_n++;
+ if (gtp6)
+ {
+ next0 = srv6_gtp6_decap_processing (vm, node, b0);
+ if (PREDICT_TRUE (next0 == SRV6_END_M_GTP6_D_NEXT_LOOKUP6))
+ good_n++;
+ else
+ bad_n++;
+
+ vlib_increment_combined_counter (
+ ((next0 == SRV6_END_M_GTP6_D_NEXT_DROP) ?
+ &(sm2->sr_ls_invalid_counters) :
+ &(sm2->sr_ls_valid_counters)),
+ thread_index, ls0 - sm2->localsids, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+ }
+ else
+ {
+ /* Strip off the outer header (IPv6 + GTP + UDP + IEs) */
+ vlib_buffer_advance (b0, (word) hdrlen);
- if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
- PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ if (ipv4)
{
- srv6_end_rewrite_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof (*tr));
- clib_memcpy (tr->src.as_u8, ip6srv->ip.src_address.as_u8,
- sizeof (ip6_address_t));
- clib_memcpy (tr->dst.as_u8, ip6srv->ip.dst_address.as_u8,
- sizeof (ip6_address_t));
- tr->teid = teid;
- clib_memcpy (tr->sr_prefix.as_u8, ls_param->sr_prefix.as_u8,
- sizeof (ip6_address_t));
- tr->sr_prefixlen = ls_param->sr_prefixlen;
+ next0 = SRV6_END_M_GTP6_D_NEXT_LOOKUP4;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ ls_param->fib4_index;
+ }
+ else
+ {
+ next0 = SRV6_END_M_GTP6_D_NEXT_LOOKUP6;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] =
+ ls_param->fib6_index;
}
}
- DONE:
- vlib_increment_combined_counter
- (((next0 ==
- SRV6_END_M_GTP6_D_NEXT_DROP) ? &(sm2->sr_ls_invalid_counters) :
- &(sm2->sr_ls_valid_counters)), thread_index,
- ls0 - sm2->localsids, 1, vlib_buffer_length_in_chain (vm, b0));
-
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
}
@@ -2069,15 +2232,14 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d) (vlib_main_t * vm,
}
// Function for SRv6 GTP6.D.DI function
-VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (srv6_end_m_gtp6_d_di)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
srv6_end_main_v6_decap_di_t *sm = &srv6_end_main_v6_decap_di;
ip6_sr_main_t *sm2 = &sr_main;
u32 n_left_from, next_index, *from, *to_next;
u32 thread_index = vm->thread_index;
- srv6_end_gtp6_param_t *ls_param;
+ srv6_end_gtp6_d_param_t *ls_param;
u32 good_n = 0, bad_n = 0;
@@ -2129,11 +2291,10 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
- ls0 =
- pool_elt_at_index (sm2->localsids,
- vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls0 = pool_elt_at_index (sm2->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
- ls_param = (srv6_end_gtp6_param_t *) ls0->plugin_mem;
+ ls_param = (srv6_end_gtp6_d_param_t *) ls0->plugin_mem;
hdr0 = vlib_buffer_get_current (b0);
@@ -2141,10 +2302,10 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
len0 = vlib_buffer_length_in_chain (vm, b0);
- if ((hdr0->ip6.protocol != IP_PROTOCOL_UDP)
- || (hdr0->udp.dst_port !=
- clib_host_to_net_u16 (SRV6_GTP_UDP_DST_PORT))
- || (len0 < sizeof (ip6_gtpu_header_t)))
+ if ((hdr0->ip6.protocol != IP_PROTOCOL_UDP) ||
+ (hdr0->udp.dst_port !=
+ clib_host_to_net_u16 (SRV6_GTP_UDP_DST_PORT)) ||
+ (len0 < sizeof (ip6_gtpu_header_t)))
{
next0 = SRV6_END_M_GTP6_D_DI_NEXT_DROP;
@@ -2159,9 +2320,9 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
seg0 = ls_param->sr_prefix;
teid = hdr0->gtpu.teid;
- teidp = (u8 *) & teid;
+ teidp = (u8 *) &teid;
- seqp = (u8 *) & seq;
+ seqp = (u8 *) &seq;
if (hdr0->gtpu.ver_flags & (GTPU_EXTHDR_FLAG | GTPU_SEQ_FLAG))
{
@@ -2173,8 +2334,7 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
if (hdr0->gtpu.ext->nextexthdr == GTPU_EXTHDR_PDU_SESSION)
{
// PDU Session Container.
- sess =
- (gtpu_pdu_session_t *) (((char *) hdr0) + hdrlen);
+ sess = (gtpu_pdu_session_t *) (((char *) hdr0) + hdrlen);
qfi = sess->u.val & ~GTPU_PDU_SESSION_P_BIT_MASK;
qfip = &qfi;
@@ -2190,71 +2350,70 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
offset = ls_param->sr_prefixlen / 8;
shift = ls_param->sr_prefixlen % 8;
+ offset += 1;
if (PREDICT_TRUE (shift == 0))
{
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
- clib_memcpy_fast (&seg0.as_u8[offset + 1], seqp, 2);
+ clib_memcpy_fast (&seg0.as_u8[offset], seqp, 2);
}
else
{
- clib_memcpy_fast (&seg0.as_u8[offset + 1], teidp, 4);
+ clib_memcpy_fast (&seg0.as_u8[offset], teidp, 4);
}
if (qfip)
{
- qfi =
- ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
- ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
+ qfi = ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
+ ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
if (sess->type)
{
qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
}
- seg0.as_u8[offset] = qfi;
+ seg0.as_u8[offset + 4] = qfi;
}
}
else
{
int idx;
- if (gtpu_type == GTPU_TYPE_ECHO_REQUEST
- || gtpu_type == GTPU_TYPE_ECHO_REPLY
- || gtpu_type == GTPU_TYPE_ERROR_INDICATION)
+ if (gtpu_type == GTPU_TYPE_ECHO_REQUEST ||
+ gtpu_type == GTPU_TYPE_ECHO_REPLY ||
+ gtpu_type == GTPU_TYPE_ERROR_INDICATION)
{
for (idx = 0; idx < 2; idx++)
{
- seg0.as_u8[offset + idx + 1] |= seqp[idx] >> shift;
- seg0.as_u8[offset + idx + 2] |=
- seqp[idx] << (8 - shift);
+ seg0.as_u8[offset + idx] |= seqp[idx] >> shift;
+ seg0.as_u8[offset + idx + 1] |= seqp[idx]
+ << (8 - shift);
}
}
else
{
for (idx = 0; idx < 4; idx++)
{
- seg0.as_u8[offset + idx + 1] |= teidp[idx] >> shift;
- seg0.as_u8[offset + idx + 2] |=
- teidp[idx] << (8 - shift);
+ seg0.as_u8[offset + idx] |= teidp[idx] >> shift;
+ seg0.as_u8[offset + idx + 1] |= teidp[idx]
+ << (8 - shift);
}
}
if (qfip)
{
- qfi =
- ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
- ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
+ qfi = ((qfi & GTPU_PDU_SESSION_QFI_MASK) << 2) |
+ ((qfi & GTPU_PDU_SESSION_R_BIT_MASK) >> 5);
if (sess->type)
{
qfi |= SRV6_PDU_SESSION_U_BIT_MASK;
}
- seg0.as_u8[offset] |= qfi >> shift;
- seg0.as_u8[offset + 1] |= qfi << (8 - shift);
+ seg0.as_u8[offset + 4] |= qfi >> shift;
+ seg0.as_u8[offset + 5] |= qfi << (8 - shift);
}
}
@@ -2297,8 +2456,7 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
u32 hdr_len;
p =
- mhash_get (&sm2->sr_policies_index_hash,
- &ls_param->sr_prefix);
+ mhash_get (&sm2->sr_policies_index_hash, &ls_param->sr_prefix);
if (p)
{
sr_policy = pool_elt_at_index (sm2->sr_policies, p[0]);
@@ -2307,11 +2465,11 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
if (sr_policy)
{
vec_foreach (sl_index, sr_policy->segments_lists)
- {
- sl = pool_elt_at_index (sm2->sid_lists, *sl_index);
- if (sl != NULL)
- break;
- }
+ {
+ sl = pool_elt_at_index (sm2->sid_lists, *sl_index);
+ if (sl != NULL)
+ break;
+ }
}
hdr_len = sizeof (ip6srv_combo_header_t);
@@ -2323,9 +2481,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
if (ie_size)
{
- tlv_siz =
- sizeof (ip6_sr_tlv_t) + sizeof (user_plane_sub_tlv_t) +
- ie_size;
+ tlv_siz = sizeof (ip6_sr_tlv_t) +
+ sizeof (user_plane_sub_tlv_t) + ie_size;
tlv_siz = (tlv_siz & ~0x07) + (tlv_siz & 0x07 ? 0x08 : 0x0);
hdr_len += tlv_siz;
@@ -2356,12 +2513,11 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
ip6srv->sr.segments[0] = dst0;
ip6srv->sr.segments[1] = seg0;
- clib_memcpy_fast (&ip6srv->sr.segments[2],
- (u8 *) (sl->rewrite +
- sizeof (ip6_header_t) +
- sizeof (ip6_sr_header_t)),
- vec_len (sl->segments) *
- sizeof (ip6_address_t));
+ clib_memcpy_fast (
+ &ip6srv->sr.segments[2],
+ (u8 *) (sl->rewrite + sizeof (ip6_header_t) +
+ sizeof (ip6_sr_header_t)),
+ vec_len (sl->segments) * sizeof (ip6_address_t));
}
else
{
@@ -2371,9 +2527,10 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
ip6srv->sr.type = ROUTING_HEADER_TYPE_SR;
ip6srv->sr.segments_left = 2;
ip6srv->sr.last_entry = 1;
- ip6srv->sr.length =
- ((sizeof (ip6_sr_header_t) +
- 2 * sizeof (ip6_address_t)) / 8) - 1;
+ ip6srv->sr.length = ((sizeof (ip6_sr_header_t) +
+ 2 * sizeof (ip6_address_t)) /
+ 8) -
+ 1;
ip6srv->sr.flags = 0;
ip6srv->sr.tag =
@@ -2402,8 +2559,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
ip6srv->sr.segments_left = 1;
ip6srv->sr.last_entry = 0;
ip6srv->sr.length =
- ((sizeof (ip6_sr_header_t) +
- sizeof (ip6_address_t)) / 8) - 1;
+ ((sizeof (ip6_sr_header_t) + sizeof (ip6_address_t)) / 8) -
+ 1;
ip6srv->sr.flags = 0;
ip6srv->sr.tag =
@@ -2417,8 +2574,7 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
ip6_sr_tlv_t *tlv;
user_plane_sub_tlv_t *sub_tlv;
- tlv =
- (ip6_sr_tlv_t *) ((u8 *) ip6srv + (hdr_len - tlv_siz));
+ tlv = (ip6_sr_tlv_t *) ((u8 *) ip6srv + (hdr_len - tlv_siz));
tlv->type = SRH_TLV_USER_PLANE_CONTAINER;
tlv->length = (u8) (tlv_siz - sizeof (ip6_sr_tlv_t));
clib_memset (tlv->value, 0, tlv->length);
@@ -2438,8 +2594,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
{
if (ls_param->nhtype == SRV6_NHTYPE_NONE)
{
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label) >>
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
28) == 6)
ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
else
@@ -2448,8 +2604,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
else if (ls_param->nhtype == SRV6_NHTYPE_IPV4)
{
ip6srv->sr.protocol = IP_PROTOCOL_IP_IN_IP;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label) >>
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
28) != 4)
{
// Bad encap packet.
@@ -2461,8 +2617,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
else if (ls_param->nhtype == SRV6_NHTYPE_IPV6)
{
ip6srv->sr.protocol = IP_PROTOCOL_IPV6;
- if ((clib_net_to_host_u32
- (encap->ip_version_traffic_class_and_flow_label) >>
+ if ((clib_net_to_host_u32 (
+ encap->ip_version_traffic_class_and_flow_label) >>
28) != 6)
{
// Bad encap packet.
@@ -2500,12 +2656,12 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
}
DONE:
- vlib_increment_combined_counter
- (((next0 ==
- SRV6_END_M_GTP6_D_DI_NEXT_DROP) ?
- &(sm2->sr_ls_invalid_counters) : &(sm2->sr_ls_valid_counters)),
- thread_index, ls0 - sm2->localsids, 1,
- vlib_buffer_length_in_chain (vm, b0));
+ vlib_increment_combined_counter (
+ ((next0 == SRV6_END_M_GTP6_D_DI_NEXT_DROP) ?
+ &(sm2->sr_ls_invalid_counters) :
+ &(sm2->sr_ls_valid_counters)),
+ thread_index, ls0 - sm2->localsids, 1,
+ vlib_buffer_length_in_chain (vm, b0));
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
@@ -2524,9 +2680,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_d_di) (vlib_main_t * vm,
}
// Function for SRv6 GTP6.DT function
-VLIB_NODE_FN (srv6_end_m_gtp6_dt) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (srv6_end_m_gtp6_dt)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
srv6_end_main_v6_dt_t *sm = &srv6_end_main_v6_dt;
ip6_sr_main_t *sm2 = &sr_main;
@@ -2570,9 +2725,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_dt) (vlib_main_t * vm,
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
- ls0 =
- pool_elt_at_index (sm2->localsids,
- vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls0 = pool_elt_at_index (sm2->localsids,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
ls_param = (srv6_end_gtp6_dt_param_t *) ls0->plugin_mem;
@@ -2582,10 +2736,10 @@ VLIB_NODE_FN (srv6_end_m_gtp6_dt) (vlib_main_t * vm,
len0 = vlib_buffer_length_in_chain (vm, b0);
- if ((hdr0->ip6.protocol != IP_PROTOCOL_UDP)
- || (hdr0->udp.dst_port !=
- clib_host_to_net_u16 (SRV6_GTP_UDP_DST_PORT))
- || (len0 < sizeof (ip6_gtpu_header_t)))
+ if ((hdr0->ip6.protocol != IP_PROTOCOL_UDP) ||
+ (hdr0->udp.dst_port !=
+ clib_host_to_net_u16 (SRV6_GTP_UDP_DST_PORT)) ||
+ (len0 < sizeof (ip6_gtpu_header_t)))
{
next0 = SRV6_END_M_GTP6_DT_NEXT_DROP;
@@ -2638,9 +2792,9 @@ VLIB_NODE_FN (srv6_end_m_gtp6_dt) (vlib_main_t * vm,
else if (ls_param->type == SRV6_GTP6_DT6)
{
ip6 = (ip6_header_t *) ((u8 *) hdr0 + hdrlen);
- if ((clib_net_to_host_u32
- (ip6->ip_version_traffic_class_and_flow_label) >> 28)
- != 6)
+ if ((clib_net_to_host_u32 (
+ ip6->ip_version_traffic_class_and_flow_label) >>
+ 28) != 6)
{
next0 = SRV6_END_M_GTP6_DT_NEXT_DROP;
bad_n++;
@@ -2648,8 +2802,10 @@ VLIB_NODE_FN (srv6_end_m_gtp6_dt) (vlib_main_t * vm,
}
next0 = SRV6_END_M_GTP6_DT_NEXT_LOOKUP6;
- if ((ip6->dst_address.as_u8[0] == 0xff)
- && ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80))
+ if (((ip6->dst_address.as_u8[0] == 0xff) &&
+ (ip6->dst_address.as_u8[1] == 0x02)) ||
+ ((ip6->dst_address.as_u8[0] == 0xfe) &&
+ ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80)))
{
vnet_buffer (b0)->sw_if_index[VLIB_TX] =
ls_param->local_fib_index;
@@ -2664,13 +2820,15 @@ VLIB_NODE_FN (srv6_end_m_gtp6_dt) (vlib_main_t * vm,
else if (ls_param->type == SRV6_GTP6_DT46)
{
ip6 = (ip6_header_t *) ((u8 *) hdr0 + hdrlen);
- if ((clib_net_to_host_u32
- (ip6->ip_version_traffic_class_and_flow_label) >> 28)
- == 6)
+ if ((clib_net_to_host_u32 (
+ ip6->ip_version_traffic_class_and_flow_label) >>
+ 28) == 6)
{
next0 = SRV6_END_M_GTP6_DT_NEXT_LOOKUP6;
- if ((ip6->dst_address.as_u8[0] == 0xff)
- && ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80))
+ if (((ip6->dst_address.as_u8[0] == 0xff) &&
+ (ip6->dst_address.as_u8[1] == 0x02)) ||
+ ((ip6->dst_address.as_u8[0] == 0xfe) &&
+ ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80)))
{
vnet_buffer (b0)->sw_if_index[VLIB_TX] =
ls_param->local_fib_index;
@@ -2682,10 +2840,9 @@ VLIB_NODE_FN (srv6_end_m_gtp6_dt) (vlib_main_t * vm,
ls_param->fib6_index;
}
}
- else
- if ((clib_net_to_host_u32
- (ip6->ip_version_traffic_class_and_flow_label) >> 28)
- == 4)
+ else if ((clib_net_to_host_u32 (
+ ip6->ip_version_traffic_class_and_flow_label) >>
+ 28) == 4)
{
vlib_buffer_advance (b0, (word) hdrlen);
next0 = SRV6_END_M_GTP6_DT_NEXT_LOOKUP4;
@@ -2722,11 +2879,12 @@ VLIB_NODE_FN (srv6_end_m_gtp6_dt) (vlib_main_t * vm,
}
DONE:
- vlib_increment_combined_counter
- (((next0 ==
- SRV6_END_M_GTP6_DT_NEXT_DROP) ? &(sm2->sr_ls_invalid_counters)
- : &(sm2->sr_ls_valid_counters)), thread_index,
- ls0 - sm2->localsids, 1, vlib_buffer_length_in_chain (vm, b0));
+ vlib_increment_combined_counter (
+ ((next0 == SRV6_END_M_GTP6_DT_NEXT_DROP) ?
+ &(sm2->sr_ls_invalid_counters) :
+ &(sm2->sr_ls_valid_counters)),
+ thread_index, ls0 - sm2->localsids, 1,
+ vlib_buffer_length_in_chain (vm, b0));
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
@@ -2745,9 +2903,8 @@ VLIB_NODE_FN (srv6_end_m_gtp6_dt) (vlib_main_t * vm,
}
// Function for SRv6 GTP4.DT function
-VLIB_NODE_FN (srv6_t_m_gtp4_dt) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (srv6_t_m_gtp4_dt)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
srv6_t_main_v4_dt_t *sm = &srv6_t_main_v4_dt;
ip6_sr_main_t *sm2 = &sr_main;
@@ -2790,9 +2947,8 @@ VLIB_NODE_FN (srv6_t_m_gtp4_dt) (vlib_main_t * vm,
n_left_to_next -= 1;
b0 = vlib_get_buffer (vm, bi0);
- ls0 =
- pool_elt_at_index (sm2->sid_lists,
- vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
+ ls0 = pool_elt_at_index (sm2->sid_lists,
+ vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
ls_param = (srv6_t_gtp4_dt_param_t *) ls0->plugin_mem;
@@ -2802,10 +2958,10 @@ VLIB_NODE_FN (srv6_t_m_gtp4_dt) (vlib_main_t * vm,
len0 = vlib_buffer_length_in_chain (vm, b0);
- if ((hdr0->ip4.protocol != IP_PROTOCOL_UDP)
- || (hdr0->udp.dst_port !=
- clib_host_to_net_u16 (SRV6_GTP_UDP_DST_PORT))
- || (len0 < sizeof (ip4_gtpu_header_t)))
+ if ((hdr0->ip4.protocol != IP_PROTOCOL_UDP) ||
+ (hdr0->udp.dst_port !=
+ clib_host_to_net_u16 (SRV6_GTP_UDP_DST_PORT)) ||
+ (len0 < sizeof (ip4_gtpu_header_t)))
{
next0 = SRV6_T_M_GTP4_DT_NEXT_DROP;
@@ -2858,9 +3014,9 @@ VLIB_NODE_FN (srv6_t_m_gtp4_dt) (vlib_main_t * vm,
else if (ls_param->type == SRV6_GTP4_DT6)
{
ip6 = (ip6_header_t *) ((u8 *) hdr0 + hdrlen);
- if ((clib_net_to_host_u32
- (ip6->ip_version_traffic_class_and_flow_label) >> 28)
- != 6)
+ if ((clib_net_to_host_u32 (
+ ip6->ip_version_traffic_class_and_flow_label) >>
+ 28) != 6)
{
next0 = SRV6_T_M_GTP4_DT_NEXT_DROP;
bad_n++;
@@ -2868,8 +3024,10 @@ VLIB_NODE_FN (srv6_t_m_gtp4_dt) (vlib_main_t * vm,
}
next0 = SRV6_T_M_GTP4_DT_NEXT_LOOKUP6;
- if ((ip6->dst_address.as_u8[0] == 0xff)
- && ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80))
+ if (((ip6->dst_address.as_u8[0] == 0xff) &&
+ (ip6->dst_address.as_u8[1] == 0x02)) ||
+ ((ip6->dst_address.as_u8[0] == 0xfe) &&
+ ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80)))
{
next0 = SRV6_T_M_GTP4_DT_NEXT_LOOKUP4;
vnet_buffer (b0)->sw_if_index[VLIB_TX] =
@@ -2885,13 +3043,15 @@ VLIB_NODE_FN (srv6_t_m_gtp4_dt) (vlib_main_t * vm,
else if (ls_param->type == SRV6_GTP4_DT46)
{
ip6 = (ip6_header_t *) ((u8 *) hdr0 + hdrlen);
- if ((clib_net_to_host_u32
- (ip6->ip_version_traffic_class_and_flow_label) >> 28)
- == 6)
+ if ((clib_net_to_host_u32 (
+ ip6->ip_version_traffic_class_and_flow_label) >>
+ 28) == 6)
{
next0 = SRV6_T_M_GTP4_DT_NEXT_LOOKUP6;
- if ((ip6->dst_address.as_u8[0] == 0xff)
- && ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80))
+ if (((ip6->dst_address.as_u8[0] == 0xff) &&
+ (ip6->dst_address.as_u8[1] == 0x02)) ||
+ ((ip6->dst_address.as_u8[0] == 0xfe) &&
+ ((ip6->dst_address.as_u8[1] & 0xc0) == 0x80)))
{
next0 = SRV6_T_M_GTP4_DT_NEXT_LOOKUP4;
vnet_buffer (b0)->sw_if_index[VLIB_TX] =
@@ -2904,10 +3064,9 @@ VLIB_NODE_FN (srv6_t_m_gtp4_dt) (vlib_main_t * vm,
ls_param->fib6_index;
}
}
- else
- if ((clib_net_to_host_u32
- (ip6->ip_version_traffic_class_and_flow_label) >> 28)
- == 4)
+ else if ((clib_net_to_host_u32 (
+ ip6->ip_version_traffic_class_and_flow_label) >>
+ 28) == 4)
{
vlib_buffer_advance (b0, (word) hdrlen);
next0 = SRV6_T_M_GTP4_DT_NEXT_LOOKUP4;
@@ -2968,8 +3127,8 @@ VLIB_REGISTER_NODE (srv6_end_m_gtp6_e) =
srv6_end_error_v6_e_strings,.n_next_nodes =
SRV6_END_M_GTP6_E_N_NEXT,.next_nodes =
{
- [SRV6_END_M_GTP6_E_NEXT_DROP] =
- "error-drop",[SRV6_END_M_GTP6_E_NEXT_LOOKUP] = "ip6-lookup",}
+ [SRV6_END_M_GTP6_E_NEXT_DROP] = "error-drop",
+ [SRV6_END_M_GTP6_E_NEXT_LOOKUP] = "ip6-lookup",}
,};
VLIB_REGISTER_NODE (srv6_end_m_gtp6_d) =
@@ -2980,8 +3139,9 @@ VLIB_REGISTER_NODE (srv6_end_m_gtp6_d) =
srv6_end_error_v6_d_strings,.n_next_nodes =
SRV6_END_M_GTP6_D_N_NEXT,.next_nodes =
{
- [SRV6_END_M_GTP6_D_NEXT_DROP] =
- "error-drop",[SRV6_END_M_GTP6_D_NEXT_LOOKUP] = "ip6-lookup",}
+ [SRV6_END_M_GTP6_D_NEXT_DROP] = "error-drop",
+ [SRV6_END_M_GTP6_D_NEXT_LOOKUP4] = "ip4-lookup",
+ [SRV6_END_M_GTP6_D_NEXT_LOOKUP6] = "ip6-lookup",}
,};
VLIB_REGISTER_NODE (srv6_end_m_gtp6_d_di) =
@@ -2993,7 +3153,7 @@ VLIB_REGISTER_NODE (srv6_end_m_gtp6_d_di) =
SRV6_END_M_GTP6_D_DI_N_NEXT,.next_nodes =
{
[SRV6_END_M_GTP6_D_DI_NEXT_DROP] = "error-drop",
- [SRV6_END_M_GTP6_D_DI_NEXT_LOOKUP] = "ip6-lookup",}
+ [SRV6_END_M_GTP6_D_DI_NEXT_LOOKUP] = "ip6-lookup",}
,};
VLIB_REGISTER_NODE (srv6_end_m_gtp6_dt) =
@@ -3004,10 +3164,9 @@ VLIB_REGISTER_NODE (srv6_end_m_gtp6_dt) =
srv6_end_error_v6_dt_strings,.n_next_nodes =
SRV6_END_M_GTP6_DT_N_NEXT,.next_nodes =
{
- [SRV6_END_M_GTP6_DT_NEXT_DROP] =
- "error-drop",
- [SRV6_END_M_GTP6_DT_NEXT_LOOKUP4]
- = "ip4-lookup",[SRV6_END_M_GTP6_DT_NEXT_LOOKUP6] = "ip6-lookup",}
+ [SRV6_END_M_GTP6_DT_NEXT_DROP] = "error-drop",
+ [SRV6_END_M_GTP6_DT_NEXT_LOOKUP4] = "ip4-lookup",
+ [SRV6_END_M_GTP6_DT_NEXT_LOOKUP6] = "ip6-lookup",}
,};
VLIB_REGISTER_NODE (srv6_t_m_gtp4_dt) =
@@ -3018,10 +3177,9 @@ VLIB_REGISTER_NODE (srv6_t_m_gtp4_dt) =
srv6_t_error_v4_dt_strings,.n_next_nodes =
SRV6_T_M_GTP4_DT_N_NEXT,.next_nodes =
{
- [SRV6_T_M_GTP4_DT_NEXT_DROP] =
- "error-drop",
- [SRV6_T_M_GTP4_DT_NEXT_LOOKUP4] =
- "ip4-lookup",[SRV6_T_M_GTP4_DT_NEXT_LOOKUP6] = "ip6-lookup",}
+ [SRV6_T_M_GTP4_DT_NEXT_DROP] = "error-drop",
+ [SRV6_T_M_GTP4_DT_NEXT_LOOKUP4] = "ip4-lookup",
+ [SRV6_T_M_GTP4_DT_NEXT_LOOKUP6] = "ip6-lookup",}
,};
/*
diff --git a/src/plugins/srv6-mobile/sr_mobile.api b/src/plugins/srv6-mobile/sr_mobile.api
new file mode 100644
index 00000000000..1487085a695
--- /dev/null
+++ b/src/plugins/srv6-mobile/sr_mobile.api
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2022 BBSakura Networks Inc and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option version = "0.1.0";
+
+import "vnet/interface_types.api";
+import "vnet/ip/ip_types.api";
+import "vnet/srv6/sr_types.api";
+import "vnet/srv6/sr.api";
+import "plugins/srv6-mobile/sr_mobile_types.api";
+
+/** \brief IPv6 SR for Mobile LocalSID add/del request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_del - Boolean of whether its a delete instruction
+ @param localsid_prefix - IPv6 address of the localsid
+ @param behavior - the behavior of the SR policy.
+ @param fib_table - FIB table in which we should install the localsid entry
+ @param local_fib_table - lookup and forward GTP-U packet based on outer IP destination address. optional
+ @param drop_in - that reconverts to GTPv1 mode. optional
+ @param nhtype - next-header type. optional.
+ @param sr_prefix - v6 src ip encoding prefix.optional.
+ @param v4src_position - bit position where IPv4 src address embedded. optional.
+*/
+autoreply define sr_mobile_localsid_add_del
+{
+ u32 client_index;
+ u32 context;
+ bool is_del [default=false];
+ vl_api_ip6_prefix_t localsid_prefix;
+ string behavior[64];
+ u32 fib_table;
+ u32 local_fib_table;
+ bool drop_in;
+ vl_api_sr_mobile_nhtype_t nhtype;
+ vl_api_ip6_prefix_t sr_prefix;
+ vl_api_ip4_address_t v4src_addr;
+ u32 v4src_position;
+};
+
+/** \brief IPv6 SR for Mobile policy add
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid - the bindingSID of the SR Policy
+ @param sr_prefix - v6 dst ip encoding prefix. optional
+ @param v6src_position - v6 src prefix. optional
+ @param behavior - the behavior of the SR policy.
+ @param fib_table - the VRF where to install the FIB entry for the BSID
+ @param encap_src is a encaps IPv6 source addr. optional
+ @param local_fib_table - lookup and forward GTP-U packet based on outer IP destination address. optional
+ @param drop_in - that reconverts to GTPv1 mode. optional
+ @param nhtype - next-header type.
+*/
+autoreply define sr_mobile_policy_add
+{
+ u32 client_index;
+ u32 context;
+ vl_api_ip6_address_t bsid_addr;
+ vl_api_ip6_prefix_t sr_prefix;
+ vl_api_ip6_prefix_t v6src_prefix;
+ string behavior[64];
+ u32 fib_table;
+ u32 local_fib_table;
+ vl_api_ip6_address_t encap_src;
+ bool drop_in;
+ vl_api_sr_mobile_nhtype_t nhtype;
+};
diff --git a/src/plugins/srv6-mobile/sr_mobile_api.c b/src/plugins/srv6-mobile/sr_mobile_api.c
new file mode 100644
index 00000000000..51199317a3b
--- /dev/null
+++ b/src/plugins/srv6-mobile/sr_mobile_api.c
@@ -0,0 +1,339 @@
+/*
+ *------------------------------------------------------------------
+ * sr_mobile_api.c - ipv6 segment routing for mobile u-plane api
+ *
+ * Copyright (c) 2022 BBSakura Networks Inc and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdint.h>
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vlibmemory/api.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+#include <vnet/feature/feature.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/ip/ip_types_api.h>
+
+#include <vnet/format_fns.h>
+#include <vnet/srv6/sr.api_enum.h>
+#include <vnet/srv6/sr.api_types.h>
+
+#include <srv6-mobile/mobile.h>
+#include <srv6-mobile/sr_mobile.api_types.h>
+#include <srv6-mobile/sr_mobile_types.api_types.h>
+#include <srv6-mobile/sr_mobile.api_enum.h>
+
+#include <srv6-mobile/sr_mobile_api.h>
+
+u16 msg_id_base;
+#define REPLY_MSG_ID_BASE msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static inline uint16_t
+sr_plugin_localsid_fn_num_find_by (ip6_sr_main_t *sm, const char *keyword_str,
+ size_t keyword_len)
+{
+ sr_localsid_fn_registration_t *plugin = 0, **vec_plugins = 0;
+ sr_localsid_fn_registration_t **plugin_it = 0;
+ pool_foreach (plugin, sm->plugin_functions)
+ {
+ vec_add1 (vec_plugins, plugin);
+ }
+
+ vec_foreach (plugin_it, vec_plugins)
+ {
+ if (!srv6_mobile_strcmp_with_size (keyword_str, keyword_len,
+ (char *) (*plugin_it)->keyword_str))
+ {
+ return (*plugin_it)->sr_localsid_function_number;
+ }
+ }
+ return UINT16_MAX;
+}
+
+static inline uint16_t
+sr_plugin_policy_fn_num_find_by (ip6_sr_main_t *sm, const char *keyword_str,
+ size_t keyword_len)
+{
+ sr_policy_fn_registration_t *plugin = 0, **vec_plugins = 0;
+ sr_policy_fn_registration_t **plugin_it = 0;
+ pool_foreach (plugin, sm->policy_plugin_functions)
+ {
+ vec_add1 (vec_plugins, plugin);
+ }
+
+ vec_foreach (plugin_it, vec_plugins)
+ {
+ if (!srv6_mobile_strcmp_with_size (keyword_str, keyword_len,
+ (char *) (*plugin_it)->keyword_str))
+ {
+ return (*plugin_it)->sr_policy_function_number;
+ }
+ }
+ return UINT16_MAX;
+}
+
+static void
+vl_api_sr_mobile_localsid_add_del_t_handler (
+ vl_api_sr_mobile_localsid_add_del_t *mp)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ vl_api_sr_mobile_localsid_add_del_reply_t *rmp;
+ int rv = 0;
+ ip6_address_t localsid;
+ u16 localsid_prefix_len = 128;
+ void *ls_plugin_mem = 0;
+ u16 behavior = 0;
+ u32 dt_type;
+ size_t behavior_size = 0;
+ mobile_localsid_function_list_t kind_fn =
+ SRV6_MOBILE_LOCALSID_UNKNOWN_FUNCTION;
+
+ mp->behavior[sizeof (mp->behavior) - 1] = '\0';
+ behavior_size = sizeof (mp->behavior);
+ // search behavior index
+ if (mp->behavior[0])
+ {
+ if (!srv6_mobile_strcmp_with_size ((char *) mp->behavior, behavior_size,
+ "end.m.gtp4.e"))
+ {
+ kind_fn = SRV6_MOBILE_LOCALSID_END_M_GTP4_E;
+ }
+ else if (!srv6_mobile_strcmp_with_size ((char *) mp->behavior,
+ behavior_size, "end.m.gtp6.e"))
+ {
+ kind_fn = SRV6_MOBILE_LOCALSID_END_M_GTP6_E;
+ }
+ else if (!srv6_mobile_strcmp_with_size ((char *) mp->behavior,
+ behavior_size, "end.m.gtp6.d"))
+ {
+ kind_fn = SRV6_MOBILE_LOCALSID_END_M_GTP6_D;
+ }
+ else if (!srv6_mobile_strcmp_with_size (
+ (char *) mp->behavior, behavior_size, "end.m.gtp6.d.di"))
+ {
+ kind_fn = SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DI;
+ }
+ else if (!srv6_mobile_strcmp_with_size (
+ (char *) mp->behavior, behavior_size, "end.m.gtp6.d.dt4"))
+ {
+ kind_fn = SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DT4;
+ dt_type = SRV6_GTP6_DT4;
+ }
+ else if (!srv6_mobile_strcmp_with_size (
+ (char *) mp->behavior, behavior_size, "end.m.gtp6.d.dt6"))
+ {
+ kind_fn = SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DT6;
+ dt_type = SRV6_GTP6_DT6;
+ }
+ else if (!srv6_mobile_strcmp_with_size (
+ (char *) mp->behavior, behavior_size, "end.m.gtp6.d.dt46"))
+ {
+ kind_fn = SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DT46;
+ dt_type = SRV6_GTP6_DT46;
+ }
+ else
+ {
+ return;
+ }
+ switch (kind_fn)
+ {
+ case SRV6_MOBILE_LOCALSID_END_M_GTP4_E:
+ alloc_param_srv6_end_m_gtp4_e (&ls_plugin_mem, &mp->v4src_addr,
+ ntohl (mp->v4src_position),
+ ntohl (mp->fib_table));
+ break;
+ case SRV6_MOBILE_LOCALSID_END_M_GTP6_E:
+ alloc_param_srv6_end_m_gtp6_e (&ls_plugin_mem,
+ ntohl (mp->fib_table));
+ break;
+ case SRV6_MOBILE_LOCALSID_END_M_GTP6_D:
+ alloc_param_srv6_end_m_gtp6_d (
+ &ls_plugin_mem, &mp->sr_prefix.address, mp->sr_prefix.len,
+ (u8) ntohl (mp->nhtype), mp->drop_in, ntohl (mp->fib_table));
+ break;
+ case SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DI:
+ alloc_param_srv6_end_m_gtp6_di (
+ &ls_plugin_mem, &mp->sr_prefix.address, mp->sr_prefix.len,
+ (u8) ntohl (mp->nhtype));
+ break;
+ case SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DT4:
+ case SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DT6:
+ case SRV6_MOBILE_LOCALSID_END_M_GTP6_D_DT46:
+ alloc_param_srv6_end_m_gtp6_dt (
+ &ls_plugin_mem, ntohl (mp->fib_table), ntohl (mp->local_fib_table),
+ dt_type);
+ break;
+ case SRV6_MOBILE_LOCALSID_UNKNOWN_FUNCTION:
+ default:
+ return; // error
+ }
+ behavior = sr_plugin_localsid_fn_num_find_by (sm, (char *) mp->behavior,
+ behavior_size);
+ if (behavior == UINT16_MAX)
+ return;
+ }
+ else
+ {
+ return;
+ }
+ ip6_address_decode (mp->localsid_prefix.address, &localsid);
+ localsid_prefix_len = mp->localsid_prefix.len;
+
+ rv = sr_cli_localsid (mp->is_del, &localsid, localsid_prefix_len,
+ 0, // ignore end_psp
+ behavior,
+ 0, // ignore sw_if_index
+ 0, // ignore vlan_index
+ ntohl (mp->fib_table),
+ NULL, // ignore nh_addr
+ 0, // ignore usid_len
+ ls_plugin_mem);
+
+ REPLY_MACRO (VL_API_SR_MOBILE_LOCALSID_ADD_DEL_REPLY);
+}
+
+static void
+vl_api_sr_mobile_policy_add_t_handler (vl_api_sr_mobile_policy_add_t *mp)
+{
+ ip6_sr_main_t *sm = &sr_main;
+ vl_api_sr_mobile_policy_add_reply_t *rmp;
+ ip6_address_t bsid_addr;
+ ip6_address_t encap_src;
+ void *ls_plugin_mem = 0;
+ u16 behavior = 0;
+ size_t behavior_size = 0;
+
+ u32 dt_type;
+ mobile_policy_function_list_t kind_fn = SRV6_MOBILE_POLICY_UNKNOWN_FUNCTION;
+
+ ip6_address_decode (mp->bsid_addr, &bsid_addr);
+ ip6_address_decode (mp->encap_src, &encap_src);
+ if (ip6_address_is_zero (&encap_src))
+ {
+ encap_src = *sr_get_encaps_source ();
+ }
+ mp->behavior[sizeof (mp->behavior) - 1] = '\0';
+ behavior_size = sizeof (mp->behavior);
+
+ // search behavior index
+ if (mp->behavior[0])
+ {
+ if (!srv6_mobile_strcmp_with_size ((char *) mp->behavior, behavior_size,
+ "t.m.gtp4.d"))
+ {
+ kind_fn = SRV6_MOBILE_POLICY_T_M_GTP4_D;
+ }
+ else if (!srv6_mobile_strcmp_with_size ((char *) mp->behavior,
+ behavior_size, "t.m.gtp4.dt4"))
+ {
+ kind_fn = SRV6_MOBILE_POLICY_T_M_GTP4_DT4;
+ dt_type = SRV6_GTP4_DT4;
+ }
+ else if (!srv6_mobile_strcmp_with_size ((char *) mp->behavior,
+ behavior_size, "t.m.gtp4.dt6"))
+ {
+ kind_fn = SRV6_MOBILE_POLICY_T_M_GTP4_DT6;
+ dt_type = SRV6_GTP4_DT6;
+ }
+ else if (!srv6_mobile_strcmp_with_size ((char *) mp->behavior,
+ behavior_size, "t.m.gtp4.dt46"))
+ {
+ kind_fn = SRV6_MOBILE_POLICY_T_M_GTP4_DT46;
+ dt_type = SRV6_GTP4_DT46;
+ }
+ else if (!srv6_mobile_strcmp_with_size ((char *) mp->behavior,
+ behavior_size, "end.m.gtp6.d"))
+ {
+ kind_fn = SRV6_MOBILE_POLICY_END_M_GTP6_D;
+ }
+ else
+ {
+ return;
+ }
+
+ switch (kind_fn)
+ {
+ case SRV6_MOBILE_POLICY_T_M_GTP4_D:
+ alloc_param_srv6_t_m_gtp4_d (
+ &ls_plugin_mem, &mp->v6src_prefix.address, mp->v6src_prefix.len,
+ &mp->sr_prefix.address, mp->sr_prefix.len, ntohl (mp->fib_table),
+ mp->nhtype, mp->drop_in);
+ break;
+ case SRV6_MOBILE_POLICY_END_M_GTP6_D:
+ alloc_param_srv6_end_m_gtp6_d (
+ &ls_plugin_mem, &mp->sr_prefix.address, mp->sr_prefix.len,
+ mp->nhtype, mp->drop_in, ntohl (mp->fib_table));
+ break;
+ case SRV6_MOBILE_POLICY_T_M_GTP4_DT4:
+ case SRV6_MOBILE_POLICY_T_M_GTP4_DT6:
+ case SRV6_MOBILE_POLICY_T_M_GTP4_DT46:
+ alloc_param_srv6_t_m_gtp4_dt (&ls_plugin_mem, ntohl (mp->fib_table),
+ ntohl (mp->local_fib_table), dt_type);
+ break;
+ case SRV6_MOBILE_POLICY_UNKNOWN_FUNCTION:
+ default:
+ return; // error
+ }
+
+ behavior = sr_plugin_policy_fn_num_find_by (sm, (char *) mp->behavior,
+ behavior_size);
+ if (behavior == UINT16_MAX)
+ return;
+ }
+ else
+ {
+ return;
+ }
+
+ int rv = 0;
+ ip6_address_t *segments = 0, *this_seg;
+ vec_add2 (segments, this_seg, 1);
+ clib_memset (this_seg, 0, sizeof (*this_seg));
+
+ rv = sr_policy_add (&bsid_addr,
+ segments, // ignore segments
+ &encap_src,
+ (u32) ~0, // ignore weight
+ SR_POLICY_TYPE_DEFAULT, // ignore type
+ (u32) ~0, // ignore fib_table
+ 1, // ignore is_encap,
+ behavior, ls_plugin_mem);
+ vec_free (segments);
+ REPLY_MACRO (VL_API_SR_MOBILE_POLICY_ADD_REPLY);
+}
+
+#include <srv6-mobile/sr_mobile.api.c>
+static clib_error_t *
+sr_mobile_api_hookup (vlib_main_t *vm)
+{
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ REPLY_MSG_ID_BASE = setup_message_id_table ();
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (sr_mobile_api_hookup);
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/srv6-mobile/sr_mobile_api.h b/src/plugins/srv6-mobile/sr_mobile_api.h
new file mode 100644
index 00000000000..28979b1875c
--- /dev/null
+++ b/src/plugins/srv6-mobile/sr_mobile_api.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2022 BBSakura Networks Inc and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ * @brief Segment Routing for mobile u-plane api
+ *
+ */
+
+#ifndef included_sr_mobile_api_h
+#define included_sr_mobile_api_h
+#include <stdint.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/ip/ip_types_api.h>
+
+#define srv6_mobile_strcmp_with_size(s1, s1len, s2) \
+ ({ \
+ int __indicator = 0; \
+ strcmp_s_inline (s1, s1len, s2, &__indicator); \
+ __indicator; \
+ })
+
+void alloc_param_srv6_end_m_gtp4_e (void **plugin_mem_p,
+ const void *v4src_addr,
+ const u32 v4src_position,
+ const u32 fib_table);
+
+void alloc_param_srv6_end_m_gtp6_e (void **plugin_mem_p, const u32 fib_table);
+
+void alloc_param_srv6_end_m_gtp6_d (void **plugin_mem_p, const void *sr_prefix,
+ const u32 sr_prefixlen, const u8 nhtype,
+ const bool drop_in, const u32 fib_table);
+
+void alloc_param_srv6_end_m_gtp6_di (void **plugin_mem_p,
+ const void *sr_prefix,
+ const u32 sr_prefixlen, const u8 nhtype);
+
+void alloc_param_srv6_end_m_gtp6_dt (void **plugin_mem_p, const u32 fib_index,
+ const u32 local_fib_index,
+ const u32 type);
+
+void alloc_param_srv6_t_m_gtp4_d (void **plugin_mem_p,
+ const void *v6src_prefix,
+ const u32 v6src_prefixlen,
+ const void *sr_prefix,
+ const u32 sr_prefixlen, const u32 fib_index,
+ const u8 nhtype, const bool drop_in);
+
+void alloc_param_srv6_t_m_gtp4_dt (void **plugin_mem_p, const u32 fib_index,
+ const u32 local_fib_index, const u8 type);
+
+#endif /* included_sr_mobile_api_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/srv6-mobile/sr_mobile_types.api b/src/plugins/srv6-mobile/sr_mobile_types.api
new file mode 100644
index 00000000000..f2dbe302d00
--- /dev/null
+++ b/src/plugins/srv6-mobile/sr_mobile_types.api
@@ -0,0 +1,24 @@
+/*
+ * Copyright (c) 2022 BBSakura Networks Inc and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option version = "0.1.0";
+
+enum sr_mobile_nhtype : u8
+{
+ SRV6_NHTYPE_API_NONE = 0,
+ SRV6_NHTYPE_API_IPV4 = 1,
+ SRV6_NHTYPE_API_IPV6 = 2,
+ SRV6_NHTYPE_API_NON_IP = 3,
+}; \ No newline at end of file
diff --git a/src/plugins/stn/stn.c b/src/plugins/stn/stn.c
index 241f7169e8b..c0ac0d0b3a6 100644
--- a/src/plugins/stn/stn.c
+++ b/src/plugins/stn/stn.c
@@ -49,7 +49,7 @@ format_stn_rule (u8 * s, va_list * args)
s = format (s, "%Uiface: %U (%d)\n", format_white_space, indent,
format_vnet_sw_if_index_name, vnet_get_main(), r->sw_if_index,
r->sw_if_index);
- s = format (s, "%Unext_node: %s (%d)", format_white_space, indent,
+ s = format (s, "%Unext_node: %v (%d)", format_white_space, indent,
next_node->name, next_node->index);
return s;
}
@@ -195,7 +195,6 @@ stn_ip6_punt_fn (vlib_main_t * vm,
return stn_ip46_punt_fn(vm, node, frame, 0);
}
-/** *INDENT-OFF* */
VLIB_REGISTER_NODE (stn_ip6_punt, static) =
{
.function = stn_ip6_punt_fn,
@@ -215,7 +214,6 @@ VNET_FEATURE_INIT (stn_ip6_punt_feat_node, static) = {
.node_name = "stn-ip6-punt",
.runs_before = VNET_FEATURES("ip6-punt-redirect"),
};
-/** *INDENT-ON* */
u8 *
format_stn_ip4_punt_trace (u8 * s, va_list * args)
@@ -230,7 +228,6 @@ stn_ip4_punt_fn (vlib_main_t * vm,
return stn_ip46_punt_fn(vm, node, frame, 1);
}
-/** *INDENT-OFF* */
VLIB_REGISTER_NODE (stn_ip4_punt, static) =
{
.function = stn_ip4_punt_fn,
@@ -250,7 +247,6 @@ VNET_FEATURE_INIT (stn_ip4_punt_feat_node, static) = {
.node_name = "stn-ip4-punt",
.runs_before = VNET_FEATURES("ip4-punt-redirect"),
};
-/** *INDENT-ON* */
clib_error_t *
stn_init (vlib_main_t * vm)
@@ -275,12 +271,10 @@ stn_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (stn_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "VPP Steals the NIC (STN) for Container Integration",
};
-/* *INDENT-ON* */
int stn_rule_add_del (stn_rule_add_del_args_t *args)
{
diff --git a/src/plugins/stn/stn_api.c b/src/plugins/stn/stn_api.c
index e8685931db5..4d1af36d448 100644
--- a/src/plugins/stn/stn_api.c
+++ b/src/plugins/stn/stn_api.c
@@ -29,13 +29,11 @@
#define REPLY_MSG_ID_BASE stn_main.msg_id_base
#include <vlibapi/api_helper_macros.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-/* Macro to finish up custom dump fns */
-#define FINISH \
- vec_add1 (s, 0); \
- vl_print (handle, (char *)s); \
- vec_free (s); \
- return handle;
+#define FINISH \
+ vec_add1 (s, 0); \
+ vlib_cli_output (handle, (char *) s); \
+ vec_free (s); \
+ return handle;
static void
vl_api_stn_add_del_rule_t_handler (vl_api_stn_add_del_rule_t * mp)
@@ -84,11 +82,9 @@ vl_api_stn_rules_dump_t_handler (vl_api_stn_rules_dump_t * mp)
if (reg == 0)
return;
- /* *INDENT-OFF* */
pool_foreach (r, stn->rules) {
send_stn_rules_details (r, reg, mp->context);
}
- /* *INDENT-ON* */
}
#include <stn/stn.api.c>
diff --git a/src/plugins/stn/stn_test.c b/src/plugins/stn/stn_test.c
index c7514cf77c5..2499ba7b6ec 100644
--- a/src/plugins/stn/stn_test.c
+++ b/src/plugins/stn/stn_test.c
@@ -23,9 +23,9 @@
#include <vlibapi/vat_helper_macros.h>
/* Declare message IDs */
-#include <vpp/api/vpe.api_types.h>
#include <stn/stn.api_enum.h>
#include <stn/stn.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
diff --git a/src/plugins/svs/svs.c b/src/plugins/svs/svs.c
index 555283397ff..4da7fb9263d 100644
--- a/src/plugins/svs/svs.c
+++ b/src/plugins/svs/svs.c
@@ -363,7 +363,6 @@ format_svs_input_trace (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (svs_ip4_node) =
{
.function = svs_input_ip4,
@@ -402,7 +401,6 @@ VNET_FEATURE_INIT (svs_ip6_feat, static) =
.arc_name = "ip6-unicast",
.node_name = "svs-ip6",
};
-/* *INDENT-ON* */
static clib_error_t *
svs_table_cli (vlib_main_t * vm,
@@ -443,13 +441,11 @@ svs_table_cli (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (svs_table_cmd_cli, static) = {
.path = "svs table",
.short_help = "Source VRF select table [add|delete] [ip4|ip6] table-id X",
.function = svs_table_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
svs_enable_cli (vlib_main_t * vm,
@@ -497,13 +493,11 @@ svs_enable_cli (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (svs_enable_cli_cmd, static) = {
.path = "svs enable",
.short_help = "Source VRF select [enable|disable] [ip4|ip6] <table-id> X <interface>",
.function = svs_enable_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
svs_route_cli (vlib_main_t * vm,
@@ -559,13 +553,11 @@ svs_route_cli (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (svs_route_cmd_cli, static) = {
.path = "svs route",
.short_help = "Source VRF select route [add|delete] <table-id> <prefix> <src-table-id>",
.function = svs_route_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
svs_show_cli (vlib_main_t * vm,
@@ -588,13 +580,11 @@ svs_show_cli (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (svs_show_cli_cmd, static) = {
.path = "show svs",
.short_help = "Source VRF select show",
.function = svs_show_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
svs_init (vlib_main_t * vm)
diff --git a/src/plugins/svs/svs_api.c b/src/plugins/svs/svs_api.c
index 628acd01b01..b1660bc97dc 100644
--- a/src/plugins/svs/svs_api.c
+++ b/src/plugins/svs/svs_api.c
@@ -35,6 +35,7 @@
* Base message ID fot the plugin
*/
static u32 svs_base_msg_id;
+#define REPLY_MSG_ID_BASE (svs_base_msg_id)
#include <vlibapi/api_helper_macros.h>
static void
@@ -80,7 +81,7 @@ vl_api_svs_table_add_del_t_handler (vl_api_svs_table_add_del_t * mp)
}
error:
- REPLY_MACRO (VL_API_SVS_TABLE_ADD_DEL_REPLY + svs_base_msg_id);
+ REPLY_MACRO (VL_API_SVS_TABLE_ADD_DEL_REPLY);
}
static void
@@ -102,7 +103,7 @@ vl_api_svs_route_add_del_t_handler (vl_api_svs_route_add_del_t * mp)
rv = svs_route_delete (ntohl (mp->table_id), &pfx);
}
- REPLY_MACRO (VL_API_SVS_ROUTE_ADD_DEL_REPLY + svs_base_msg_id);
+ REPLY_MACRO (VL_API_SVS_ROUTE_ADD_DEL_REPLY);
}
static void
@@ -130,7 +131,7 @@ vl_api_svs_enable_disable_t_handler (vl_api_svs_enable_disable_t * mp)
BAD_SW_IF_INDEX_LABEL;
error:
- REPLY_MACRO (VL_API_SVS_ENABLE_DISABLE_REPLY + svs_base_msg_id);
+ REPLY_MACRO (VL_API_SVS_ENABLE_DISABLE_REPLY);
}
typedef struct svs_dump_walk_ctx_t_
@@ -191,12 +192,10 @@ svs_api_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (svs_api_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
- .version = VPP_BUILD_VER,
- .description = "Source Virtual Routing and Fowarding (VRF) Select",
+ .version = VPP_BUILD_VER,
+ .description = "Source Virtual Routing and Forwarding (VRF) Select",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/tlsmbedtls/tls_mbedtls.c b/src/plugins/tlsmbedtls/tls_mbedtls.c
index 3fccba2ec5a..af04f1adeb0 100644
--- a/src/plugins/tlsmbedtls/tls_mbedtls.c
+++ b/src/plugins/tlsmbedtls/tls_mbedtls.c
@@ -74,7 +74,8 @@ mbedtls_ctx_alloc (void)
mbedtls_main_t *tm = &mbedtls_main;
mbedtls_ctx_t **ctx;
- pool_get (tm->ctx_pool[thread_index], ctx);
+ pool_get_aligned_safe (tm->ctx_pool[thread_index], ctx,
+ CLIB_CACHE_LINE_BYTES);
if (!(*ctx))
*ctx = clib_mem_alloc (sizeof (mbedtls_ctx_t));
@@ -90,7 +91,8 @@ mbedtls_ctx_free (tls_ctx_t * ctx)
{
mbedtls_ctx_t *mc = (mbedtls_ctx_t *) ctx;
- if (mc->ssl.state == MBEDTLS_SSL_HANDSHAKE_OVER && !ctx->is_passive_close)
+ if (mc->ssl.state == MBEDTLS_SSL_HANDSHAKE_OVER &&
+ !(ctx->flags & TLS_CONN_F_PASSIVE_CLOSE))
mbedtls_ssl_close_notify (&mc->ssl);
if (mc->ssl.conf->endpoint == MBEDTLS_SSL_IS_SERVER)
{
@@ -550,11 +552,32 @@ mbedtls_transport_close (tls_ctx_t * ctx)
}
static int
+mbedtls_transport_reset (tls_ctx_t *ctx)
+{
+ if (!mbedtls_handshake_is_over (ctx))
+ {
+ session_close (session_get_from_handle (ctx->tls_session_handle));
+ return 0;
+ }
+
+ session_transport_reset_notify (&ctx->connection);
+ session_transport_closed_notify (&ctx->connection);
+ tls_disconnect_transport (ctx);
+ return 0;
+}
+
+static int
mbedtls_app_close (tls_ctx_t * ctx)
{
tls_disconnect_transport (ctx);
session_transport_delete_notify (&ctx->connection);
- mbedtls_ctx_free (ctx);
+ return 0;
+}
+
+static int
+mbedtls_reinit_ca_chain (void)
+{
+ /* Not supported Yet */
return 0;
}
@@ -571,7 +594,9 @@ const static tls_engine_vft_t mbedtls_engine = {
.ctx_start_listen = mbedtls_start_listen,
.ctx_stop_listen = mbedtls_stop_listen,
.ctx_transport_close = mbedtls_transport_close,
+ .ctx_transport_reset = mbedtls_transport_reset,
.ctx_app_close = mbedtls_app_close,
+ .ctx_reinit_cachain = mbedtls_reinit_ca_chain,
};
int
@@ -663,19 +688,15 @@ tls_mbedtls_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (tls_mbedtls_init) =
{
.runs_after = VLIB_INITS("tls_init"),
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Transport Layer Security (TLS) Engine, Mbedtls Based",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/tlsopenssl/CMakeLists.txt b/src/plugins/tlsopenssl/CMakeLists.txt
index eb67e4cceaf..70a62aedf9c 100644
--- a/src/plugins/tlsopenssl/CMakeLists.txt
+++ b/src/plugins/tlsopenssl/CMakeLists.txt
@@ -14,6 +14,7 @@
include (CheckFunctionExists)
if(OPENSSL_FOUND AND OPENSSL_VERSION VERSION_GREATER_EQUAL "1.1.0")
include_directories(${OPENSSL_INCLUDE_DIR})
+ add_compile_definitions(OPENSSL_SUPPRESS_DEPRECATED)
add_vpp_plugin(tlsopenssl
SOURCES
tls_bio.c
diff --git a/src/plugins/tlsopenssl/tls_async.c b/src/plugins/tlsopenssl/tls_async.c
index 89b4f77e331..d85af686d21 100644
--- a/src/plugins/tlsopenssl/tls_async.c
+++ b/src/plugins/tlsopenssl/tls_async.c
@@ -437,7 +437,7 @@ tls_async_do_job (int eidx, u32 thread_index)
if (ctx)
{
- ctx->resume = 1;
+ ctx->flags |= TLS_CONN_F_RESUME;
session_send_rpc_evt_to_thread (thread_index, event_handler, event);
}
return 1;
@@ -510,7 +510,6 @@ tls_async_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
VLIB_INIT_FUNCTION (tls_async_init);
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tls_async_process_node,static) = {
.function = tls_async_process,
.type = VLIB_NODE_TYPE_INPUT,
@@ -518,7 +517,6 @@ VLIB_REGISTER_NODE (tls_async_process_node,static) = {
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/tlsopenssl/tls_openssl.c b/src/plugins/tlsopenssl/tls_openssl.c
index 05cd13c9970..5d172a0adcf 100644
--- a/src/plugins/tlsopenssl/tls_openssl.c
+++ b/src/plugins/tlsopenssl/tls_openssl.c
@@ -27,6 +27,8 @@
#include <ctype.h>
#include <tlsopenssl/tls_openssl.h>
#include <tlsopenssl/tls_bios.h>
+#include <openssl/x509_vfy.h>
+#include <openssl/x509v3.h>
#define MAX_CRYPTO_LEN 64
@@ -38,7 +40,8 @@ openssl_ctx_alloc_w_thread (u32 thread_index)
openssl_main_t *om = &openssl_main;
openssl_ctx_t **ctx;
- pool_get (om->ctx_pool[thread_index], ctx);
+ pool_get_aligned_safe (om->ctx_pool[thread_index], ctx, 0);
+
if (!(*ctx))
*ctx = clib_mem_alloc (sizeof (openssl_ctx_t));
@@ -62,14 +65,15 @@ openssl_ctx_free (tls_ctx_t * ctx)
openssl_ctx_t *oc = (openssl_ctx_t *) ctx;
/* Cleanup ssl ctx unless migrated */
- if (!ctx->is_migrated)
+ if (!(ctx->flags & TLS_CONN_F_MIGRATED))
{
- if (SSL_is_init_finished (oc->ssl) && !ctx->is_passive_close)
+ if (SSL_is_init_finished (oc->ssl) &&
+ !(ctx->flags & TLS_CONN_F_PASSIVE_CLOSE))
SSL_shutdown (oc->ssl);
SSL_free (oc->ssl);
vec_free (ctx->srv_hostname);
-
+ SSL_CTX_free (oc->client_ssl_ctx);
#ifdef HAVE_OPENSSL_ASYNC
openssl_evt_free (ctx->evt_index, ctx->c_thread_index);
#endif
@@ -97,7 +101,7 @@ openssl_ctx_attach (u32 thread_index, void *ctx_ptr)
session_handle_t sh;
openssl_ctx_t **oc;
- pool_get (om->ctx_pool[thread_index], oc);
+ pool_get_aligned_safe (om->ctx_pool[thread_index], oc, 0);
/* Free the old instance instead of looking for an empty spot */
if (*oc)
clib_mem_free (*oc);
@@ -155,8 +159,12 @@ openssl_lctx_get (u32 lctx_index)
return pool_elt_at_index (openssl_main.lctx_pool, lctx_index);
}
+#define ossl_check_err_is_fatal(_ssl, _rv) \
+ if (PREDICT_FALSE (_rv < 0 && SSL_get_error (_ssl, _rv) == SSL_ERROR_SSL)) \
+ return -1;
+
static int
-openssl_read_from_ssl_into_fifo (svm_fifo_t * f, SSL * ssl)
+openssl_read_from_ssl_into_fifo (svm_fifo_t *f, SSL *ssl, u32 max_len)
{
int read, rv, n_fs, i;
const int n_segs = 2;
@@ -167,6 +175,7 @@ openssl_read_from_ssl_into_fifo (svm_fifo_t * f, SSL * ssl)
if (!max_enq)
return 0;
+ max_enq = clib_min (max_len, max_enq);
n_fs = svm_fifo_provision_chunks (f, fs, n_segs, max_enq);
if (n_fs < 0)
return 0;
@@ -174,17 +183,25 @@ openssl_read_from_ssl_into_fifo (svm_fifo_t * f, SSL * ssl)
/* Return early if we can't read anything */
read = SSL_read (ssl, fs[0].data, fs[0].len);
if (read <= 0)
- return 0;
+ {
+ ossl_check_err_is_fatal (ssl, read);
+ return 0;
+ }
- for (i = 1; i < n_fs; i++)
+ if (read == (int) fs[0].len)
{
- rv = SSL_read (ssl, fs[i].data, fs[i].len);
- read += rv > 0 ? rv : 0;
+ for (i = 1; i < n_fs; i++)
+ {
+ rv = SSL_read (ssl, fs[i].data, fs[i].len);
+ read += rv > 0 ? rv : 0;
- if (rv < (int) fs[i].len)
- break;
+ if (rv < (int) fs[i].len)
+ {
+ ossl_check_err_is_fatal (ssl, rv);
+ break;
+ }
+ }
}
-
svm_fifo_enqueue_nocopy (f, read);
return read;
@@ -194,10 +211,10 @@ static int
openssl_write_from_fifo_into_ssl (svm_fifo_t *f, SSL *ssl, u32 max_len)
{
int wrote = 0, rv, i = 0, len;
- const int n_segs = 2;
+ u32 n_segs = 2;
svm_fifo_seg_t fs[n_segs];
- len = svm_fifo_segments (f, 0, fs, n_segs, max_len);
+ len = svm_fifo_segments (f, 0, fs, &n_segs, max_len);
if (len <= 0)
return 0;
@@ -206,7 +223,10 @@ openssl_write_from_fifo_into_ssl (svm_fifo_t *f, SSL *ssl, u32 max_len)
rv = SSL_write (ssl, fs[i].data, fs[i].len);
wrote += (rv > 0) ? rv : 0;
if (rv < (int) fs[i].len)
- break;
+ {
+ ossl_check_err_is_fatal (ssl, rv);
+ break;
+ }
i++;
}
@@ -243,22 +263,18 @@ openssl_check_async_status (tls_ctx_t * ctx, openssl_resume_handler * handler,
static void
openssl_handle_handshake_failure (tls_ctx_t * ctx)
{
- session_t *app_session;
+ /* Failed to renegotiate handshake */
+ if (ctx->flags & TLS_CONN_F_HS_DONE)
+ {
+ tls_notify_app_io_error (ctx);
+ tls_disconnect_transport (ctx);
+ return;
+ }
if (SSL_is_server (((openssl_ctx_t *) ctx)->ssl))
{
- /*
- * Cleanup pre-allocated app session and close transport
- */
- app_session =
- session_get_if_valid (ctx->c_s_index, ctx->c_thread_index);
- if (app_session)
- {
- session_free (app_session);
- ctx->no_app_session = 1;
- ctx->c_s_index = SESSION_INVALID_INDEX;
- tls_disconnect_transport (ctx);
- }
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
+ tls_disconnect_transport (ctx);
}
else
{
@@ -266,6 +282,7 @@ openssl_handle_handshake_failure (tls_ctx_t * ctx)
* Also handles cleanup of the pre-allocated session
*/
tls_notify_app_connected (ctx, SESSION_E_TLS_HANDSHAKE);
+ tls_disconnect_transport (ctx);
}
}
@@ -277,9 +294,9 @@ openssl_ctx_handshake_rx (tls_ctx_t * ctx, session_t * tls_session)
while (SSL_in_init (oc->ssl))
{
- if (ctx->resume)
+ if (ctx->flags & TLS_CONN_F_RESUME)
{
- ctx->resume = 0;
+ ctx->flags &= ~TLS_CONN_F_RESUME;
}
else if (!svm_fifo_max_dequeue_cons (tls_session->rx_fifo))
break;
@@ -313,6 +330,10 @@ openssl_ctx_handshake_rx (tls_ctx_t * ctx, session_t * tls_session)
if (SSL_in_init (oc->ssl))
return -1;
+ /* Renegotiated handshake, app must not be notified */
+ if (PREDICT_FALSE (ctx->flags & TLS_CONN_F_HS_DONE))
+ return 0;
+
/*
* Handshake complete
*/
@@ -331,16 +352,20 @@ openssl_ctx_handshake_rx (tls_ctx_t * ctx, session_t * tls_session)
*/
if (ctx->srv_hostname)
{
- tls_notify_app_connected (ctx, SESSION_E_TLS_HANDSHAKE);
+ openssl_handle_handshake_failure (ctx);
return -1;
}
}
- tls_notify_app_connected (ctx, SESSION_E_NONE);
+ if (tls_notify_app_connected (ctx, SESSION_E_NONE))
+ {
+ tls_disconnect_transport (ctx);
+ return -1;
+ }
}
else
{
/* Need to check transport status */
- if (ctx->is_passive_close)
+ if (ctx->flags & TLS_CONN_F_PASSIVE_CLOSE)
{
openssl_handle_handshake_failure (ctx);
return -1;
@@ -354,7 +379,7 @@ openssl_ctx_handshake_rx (tls_ctx_t * ctx, session_t * tls_session)
return -1;
}
}
-
+ ctx->flags |= TLS_CONN_F_HS_DONE;
TLS_DBG (1, "Handshake for %u complete. TLS cipher is %s",
oc->openssl_ctx_index, SSL_get_cipher (oc->ssl));
return rv;
@@ -363,6 +388,8 @@ openssl_ctx_handshake_rx (tls_ctx_t * ctx, session_t * tls_session)
static void
openssl_confirm_app_close (tls_ctx_t * ctx)
{
+ openssl_ctx_t *oc = (openssl_ctx_t *) ctx;
+ SSL_shutdown (oc->ssl);
tls_disconnect_transport (ctx);
session_transport_closed_notify (&ctx->connection);
}
@@ -399,6 +426,14 @@ openssl_ctx_write_tls (tls_ctx_t *ctx, session_t *app_session,
goto check_tls_fifo;
wrote = openssl_write_from_fifo_into_ssl (f, oc->ssl, deq_max);
+
+ /* Unrecoverable protocol error. Reset connection */
+ if (PREDICT_FALSE (wrote < 0))
+ {
+ tls_notify_app_io_error (ctx);
+ return 0;
+ }
+
if (!wrote)
goto check_tls_fifo;
@@ -407,7 +442,8 @@ openssl_ctx_write_tls (tls_ctx_t *ctx, session_t *app_session,
check_tls_fifo:
- if (PREDICT_FALSE (ctx->app_closed && BIO_ctrl_pending (oc->rbio) <= 0))
+ if (PREDICT_FALSE ((ctx->flags & TLS_CONN_F_APP_CLOSED) &&
+ BIO_ctrl_pending (oc->rbio) <= 0))
openssl_confirm_app_close (ctx);
/* Deschedule and wait for deq notification if fifo is almost full */
@@ -419,8 +455,11 @@ check_tls_fifo:
sp->flags |= TRANSPORT_SND_F_DESCHED;
}
else
- /* Request tx reschedule of the app session */
- app_session->flags |= SESSION_F_CUSTOM_TX;
+ {
+ /* Request tx reschedule of the app session */
+ if (wrote)
+ app_session->flags |= SESSION_F_CUSTOM_TX;
+ }
return wrote;
}
@@ -479,7 +518,7 @@ done:
if (read)
tls_add_vpp_q_tx_evt (us);
- if (PREDICT_FALSE (ctx->app_closed &&
+ if (PREDICT_FALSE ((ctx->flags & TLS_CONN_F_APP_CLOSED) &&
!svm_fifo_max_enqueue_prod (us->rx_fifo)))
openssl_confirm_app_close (ctx);
@@ -500,23 +539,33 @@ static inline int
openssl_ctx_read_tls (tls_ctx_t *ctx, session_t *tls_session)
{
openssl_ctx_t *oc = (openssl_ctx_t *) ctx;
+ const u32 max_len = 128 << 10;
session_t *app_session;
- int read;
svm_fifo_t *f;
+ int read;
if (PREDICT_FALSE (SSL_in_init (oc->ssl)))
{
if (openssl_ctx_handshake_rx (ctx, tls_session) < 0)
return 0;
+
+ /* Application might force a session pool realloc on accept */
+ tls_session = session_get_from_handle (ctx->tls_session_handle);
}
app_session = session_get_from_handle (ctx->app_session_handle);
f = app_session->rx_fifo;
- read = openssl_read_from_ssl_into_fifo (f, oc->ssl);
+ read = openssl_read_from_ssl_into_fifo (f, oc->ssl, max_len);
- /* If handshake just completed, session may still be in accepting state */
- if (read && app_session->session_state >= SESSION_STATE_READY)
+ /* Unrecoverable protocol error. Reset connection */
+ if (PREDICT_FALSE (read < 0))
+ {
+ tls_notify_app_io_error (ctx);
+ return 0;
+ }
+
+ if (read)
tls_notify_app_enqueue (ctx, app_session);
if ((SSL_pending (oc->ssl) > 0) ||
@@ -597,6 +646,88 @@ openssl_ctx_read (tls_ctx_t *ctx, session_t *ts)
}
static int
+openssl_set_ckpair (SSL *ssl, u32 ckpair_index)
+{
+ app_cert_key_pair_t *ckpair;
+ BIO *cert_bio;
+ EVP_PKEY *pkey;
+ X509 *srvcert;
+
+ /* Configure a ckpair index only if non-default/test provided */
+ if (ckpair_index == 0)
+ return 0;
+
+ ckpair = app_cert_key_pair_get_if_valid (ckpair_index);
+ if (!ckpair)
+ return -1;
+
+ if (!ckpair->cert || !ckpair->key)
+ {
+ TLS_DBG (1, "tls cert and/or key not configured");
+ return -1;
+ }
+ /*
+ * Set the key and cert
+ */
+ cert_bio = BIO_new (BIO_s_mem ());
+ BIO_write (cert_bio, ckpair->cert, vec_len (ckpair->cert));
+ srvcert = PEM_read_bio_X509 (cert_bio, NULL, NULL, NULL);
+ if (!srvcert)
+ {
+ clib_warning ("unable to parse certificate");
+ return -1;
+ }
+ SSL_use_certificate (ssl, srvcert);
+ BIO_free (cert_bio);
+
+ cert_bio = BIO_new (BIO_s_mem ());
+ BIO_write (cert_bio, ckpair->key, vec_len (ckpair->key));
+ pkey = PEM_read_bio_PrivateKey (cert_bio, NULL, NULL, NULL);
+ if (!pkey)
+ {
+ clib_warning ("unable to parse pkey");
+ return -1;
+ }
+ SSL_use_PrivateKey (ssl, pkey);
+ BIO_free (cert_bio);
+ TLS_DBG (1, "TLS client using ckpair index: %d", ckpair_index);
+ return 0;
+}
+
+static int
+openssl_client_init_verify (SSL *ssl, const char *srv_hostname,
+ int set_hostname_verification,
+ int set_hostname_strict_check)
+{
+ if (set_hostname_verification)
+ {
+ X509_VERIFY_PARAM *param = SSL_get0_param (ssl);
+ if (!param)
+ {
+ TLS_DBG (1, "Couldn't fetch SSL param");
+ return -1;
+ }
+
+ if (set_hostname_strict_check)
+ X509_VERIFY_PARAM_set_hostflags (param,
+ X509_CHECK_FLAG_NO_PARTIAL_WILDCARDS);
+
+ if (!X509_VERIFY_PARAM_set1_host (param, srv_hostname, 0))
+ {
+ TLS_DBG (1, "Couldn't set hostname for verification");
+ return -1;
+ }
+ SSL_set_verify (ssl, SSL_VERIFY_PEER, 0);
+ }
+ if (!SSL_set_tlsext_host_name (ssl, srv_hostname))
+ {
+ TLS_DBG (1, "Couldn't set hostname");
+ return -1;
+ }
+ return 0;
+}
+
+static int
openssl_ctx_init_client (tls_ctx_t * ctx)
{
long flags = SSL_OP_NO_SSLv2 | SSL_OP_NO_SSLv3 | SSL_OP_NO_COMPRESSION;
@@ -613,30 +744,31 @@ openssl_ctx_init_client (tls_ctx_t * ctx)
return -1;
}
- oc->ssl_ctx = SSL_CTX_new (method);
- if (oc->ssl_ctx == NULL)
+ oc->client_ssl_ctx = SSL_CTX_new (method);
+ if (oc->client_ssl_ctx == NULL)
{
TLS_DBG (1, "SSL_CTX_new returned null");
return -1;
}
- SSL_CTX_set_ecdh_auto (oc->ssl_ctx, 1);
- SSL_CTX_set_mode (oc->ssl_ctx, SSL_MODE_ENABLE_PARTIAL_WRITE);
+ SSL_CTX_set_ecdh_auto (oc->client_ssl_ctx, 1);
+ SSL_CTX_set_mode (oc->client_ssl_ctx, SSL_MODE_ENABLE_PARTIAL_WRITE);
#ifdef HAVE_OPENSSL_ASYNC
if (om->async)
- SSL_CTX_set_mode (oc->ssl_ctx, SSL_MODE_ASYNC);
+ SSL_CTX_set_mode (oc->client_ssl_ctx, SSL_MODE_ASYNC);
#endif
- rv = SSL_CTX_set_cipher_list (oc->ssl_ctx, (const char *) om->ciphers);
+ rv =
+ SSL_CTX_set_cipher_list (oc->client_ssl_ctx, (const char *) om->ciphers);
if (rv != 1)
{
TLS_DBG (1, "Couldn't set cipher");
return -1;
}
- SSL_CTX_set_options (oc->ssl_ctx, flags);
- SSL_CTX_set_cert_store (oc->ssl_ctx, om->cert_store);
+ SSL_CTX_set_options (oc->client_ssl_ctx, flags);
+ SSL_CTX_set1_cert_store (oc->client_ssl_ctx, om->cert_store);
- oc->ssl = SSL_new (oc->ssl_ctx);
+ oc->ssl = SSL_new (oc->client_ssl_ctx);
if (oc->ssl == NULL)
{
TLS_DBG (1, "Couldn't initialize ssl struct");
@@ -657,12 +789,18 @@ openssl_ctx_init_client (tls_ctx_t * ctx)
SSL_set_bio (oc->ssl, oc->wbio, oc->rbio);
SSL_set_connect_state (oc->ssl);
- rv = SSL_set_tlsext_host_name (oc->ssl, ctx->srv_hostname);
- if (rv != 1)
+ /* Hostname validation and strict check by name are disabled by default */
+ rv = openssl_client_init_verify (oc->ssl, (const char *) ctx->srv_hostname,
+ 0, 0);
+ if (rv)
{
- TLS_DBG (1, "Couldn't set hostname");
+ TLS_DBG (1, "ERROR:verify init failed:%d", rv);
return -1;
}
+ if (openssl_set_ckpair (oc->ssl, ctx->ckpair_index))
+ {
+ TLS_DBG (1, "Couldn't set client certificate-key pair");
+ }
/*
* 2. Do the first steps in the handshake.
@@ -749,29 +887,59 @@ openssl_start_listen (tls_ctx_t * lctx)
return -1;
}
+ /* use the default OpenSSL built-in DH parameters */
+ rv = SSL_CTX_set_dh_auto (ssl_ctx, 1);
+ if (rv != 1)
+ {
+ TLS_DBG (1, "Couldn't set temp DH parameters");
+ return -1;
+ }
+
/*
* Set the key and cert
*/
cert_bio = BIO_new (BIO_s_mem ());
+ if (!cert_bio)
+ {
+ clib_warning ("unable to allocate memory");
+ return -1;
+ }
BIO_write (cert_bio, ckpair->cert, vec_len (ckpair->cert));
srvcert = PEM_read_bio_X509 (cert_bio, NULL, NULL, NULL);
if (!srvcert)
{
clib_warning ("unable to parse certificate");
- return -1;
+ goto err;
}
- SSL_CTX_use_certificate (ssl_ctx, srvcert);
+ rv = SSL_CTX_use_certificate (ssl_ctx, srvcert);
+ if (rv != 1)
+ {
+ clib_warning ("unable to use SSL certificate");
+ goto err;
+ }
+
BIO_free (cert_bio);
cert_bio = BIO_new (BIO_s_mem ());
+ if (!cert_bio)
+ {
+ clib_warning ("unable to allocate memory");
+ return -1;
+ }
BIO_write (cert_bio, ckpair->key, vec_len (ckpair->key));
pkey = PEM_read_bio_PrivateKey (cert_bio, NULL, NULL, NULL);
if (!pkey)
{
clib_warning ("unable to parse pkey");
- return -1;
+ goto err;
}
- SSL_CTX_use_PrivateKey (ssl_ctx, pkey);
+ rv = SSL_CTX_use_PrivateKey (ssl_ctx, pkey);
+ if (rv != 1)
+ {
+ clib_warning ("unable to use SSL PrivateKey");
+ goto err;
+ }
+
BIO_free (cert_bio);
olc_index = openssl_listen_ctx_alloc ();
@@ -785,6 +953,10 @@ openssl_start_listen (tls_ctx_t * lctx)
return 0;
+err:
+ if (cert_bio)
+ BIO_free (cert_bio);
+ return -1;
}
static int
@@ -892,6 +1064,22 @@ openssl_transport_close (tls_ctx_t * ctx)
}
static int
+openssl_transport_reset (tls_ctx_t *ctx)
+{
+ if (!openssl_handshake_is_over (ctx))
+ {
+ openssl_handle_handshake_failure (ctx);
+ return 0;
+ }
+
+ session_transport_reset_notify (&ctx->connection);
+ session_transport_closed_notify (&ctx->connection);
+ tls_disconnect_transport (ctx);
+
+ return 0;
+}
+
+static int
openssl_app_close (tls_ctx_t * ctx)
{
openssl_ctx_t *oc = (openssl_ctx_t *) ctx;
@@ -902,30 +1090,9 @@ openssl_app_close (tls_ctx_t * ctx)
if (BIO_ctrl_pending (oc->rbio) <= 0
&& !svm_fifo_max_dequeue_cons (app_session->tx_fifo))
openssl_confirm_app_close (ctx);
- else
- ctx->app_closed = 1;
return 0;
}
-const static tls_engine_vft_t openssl_engine = {
- .ctx_alloc = openssl_ctx_alloc,
- .ctx_alloc_w_thread = openssl_ctx_alloc_w_thread,
- .ctx_free = openssl_ctx_free,
- .ctx_attach = openssl_ctx_attach,
- .ctx_detach = openssl_ctx_detach,
- .ctx_get = openssl_ctx_get,
- .ctx_get_w_thread = openssl_ctx_get_w_thread,
- .ctx_init_server = openssl_ctx_init_server,
- .ctx_init_client = openssl_ctx_init_client,
- .ctx_write = openssl_ctx_write,
- .ctx_read = openssl_ctx_read,
- .ctx_handshake_is_over = openssl_handshake_is_over,
- .ctx_start_listen = openssl_start_listen,
- .ctx_stop_listen = openssl_stop_listen,
- .ctx_transport_close = openssl_transport_close,
- .ctx_app_close = openssl_app_close,
-};
-
int
tls_init_ca_chain (void)
{
@@ -975,21 +1142,50 @@ tls_init_ca_chain (void)
}
int
+openssl_reinit_ca_chain (void)
+{
+ openssl_main_t *om = &openssl_main;
+
+ /* Remove/free existing x509_store */
+ if (om->cert_store)
+ {
+ X509_STORE_free (om->cert_store);
+ }
+ return tls_init_ca_chain ();
+}
+
+const static tls_engine_vft_t openssl_engine = {
+ .ctx_alloc = openssl_ctx_alloc,
+ .ctx_alloc_w_thread = openssl_ctx_alloc_w_thread,
+ .ctx_free = openssl_ctx_free,
+ .ctx_attach = openssl_ctx_attach,
+ .ctx_detach = openssl_ctx_detach,
+ .ctx_get = openssl_ctx_get,
+ .ctx_get_w_thread = openssl_ctx_get_w_thread,
+ .ctx_init_server = openssl_ctx_init_server,
+ .ctx_init_client = openssl_ctx_init_client,
+ .ctx_write = openssl_ctx_write,
+ .ctx_read = openssl_ctx_read,
+ .ctx_handshake_is_over = openssl_handshake_is_over,
+ .ctx_start_listen = openssl_start_listen,
+ .ctx_stop_listen = openssl_stop_listen,
+ .ctx_transport_close = openssl_transport_close,
+ .ctx_transport_reset = openssl_transport_reset,
+ .ctx_app_close = openssl_app_close,
+ .ctx_reinit_cachain = openssl_reinit_ca_chain,
+};
+
+int
tls_openssl_set_ciphers (char *ciphers)
{
openssl_main_t *om = &openssl_main;
- int i;
if (!ciphers)
{
return -1;
}
- vec_validate (om->ciphers, strlen (ciphers) - 1);
- for (i = 0; i < vec_len (om->ciphers); i++)
- {
- om->ciphers[i] = toupper (ciphers[i]);
- }
+ vec_validate_init_c_string (om->ciphers, ciphers, strlen (ciphers));
return 0;
@@ -1009,12 +1205,6 @@ tls_openssl_init (vlib_main_t * vm)
SSL_library_init ();
SSL_load_error_strings ();
- if (tls_init_ca_chain ())
- {
- clib_warning ("failed to initialize TLS CA chain");
- return 0;
- }
-
vec_validate (om->ctx_pool, num_threads - 1);
vec_validate (om->rx_bufs, num_threads - 1);
vec_validate (om->tx_bufs, num_threads - 1);
@@ -1031,14 +1221,18 @@ tls_openssl_init (vlib_main_t * vm)
tls_openssl_set_ciphers
("ALL:!ADH:!LOW:!EXP:!MD5:!RC4-SHA:!DES-CBC3-SHA:@STRENGTH");
+ if (tls_init_ca_chain ())
+ {
+ clib_warning ("failed to initialize TLS CA chain");
+ return 0;
+ }
+
return error;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (tls_openssl_init) =
{
.runs_after = VLIB_INITS("tls_init"),
};
-/* *INDENT-ON* */
#ifdef HAVE_OPENSSL_ASYNC
static clib_error_t *
@@ -1109,22 +1303,18 @@ tls_openssl_set_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tls_openssl_set_command, static) =
{
.path = "tls openssl set",
.short_help = "tls openssl set [engine <engine name>] [alg [algorithm] [async]",
.function = tls_openssl_set_command_fn,
};
-/* *INDENT-ON* */
#endif
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Transport Layer Security (TLS) Engine, OpenSSL Based",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/tlsopenssl/tls_openssl.h b/src/plugins/tlsopenssl/tls_openssl.h
index a4beecc8ec1..1600cd77aba 100644
--- a/src/plugins/tlsopenssl/tls_openssl.h
+++ b/src/plugins/tlsopenssl/tls_openssl.h
@@ -33,7 +33,7 @@ typedef struct tls_ctx_openssl_
{
tls_ctx_t ctx; /**< First */
u32 openssl_ctx_index;
- SSL_CTX *ssl_ctx;
+ SSL_CTX *client_ssl_ctx;
SSL *ssl;
BIO *rbio;
BIO *wbio;
diff --git a/src/plugins/tlsopenssl/tls_openssl_api.c b/src/plugins/tlsopenssl/tls_openssl_api.c
index c34829f0b29..0b17271313d 100644
--- a/src/plugins/tlsopenssl/tls_openssl_api.c
+++ b/src/plugins/tlsopenssl/tls_openssl_api.c
@@ -23,7 +23,6 @@
#include <tlsopenssl/tls_openssl.api_enum.h>
#include <tlsopenssl/tls_openssl.api_types.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define REPLY_MSG_ID_BASE om->msg_id_base
#include <vlibapi/api_helper_macros.h>
diff --git a/src/plugins/tlspicotls/CMakeLists.txt b/src/plugins/tlspicotls/CMakeLists.txt
index f23ae6ccb8a..e60a0e0ebd4 100644
--- a/src/plugins/tlspicotls/CMakeLists.txt
+++ b/src/plugins/tlspicotls/CMakeLists.txt
@@ -1,11 +1,16 @@
include (CheckFunctionExists)
+if(NOT OPENSSL_FOUND)
+ message(WARNING "OpenSSL not found - tlspicotls plugin disabled")
+ return()
+endif()
+
# We should rely on a picotls specific version, but as
# we expect dependancies to be built with vpp-ext-deps
# it's reasonable to make this check to avoid breaking
# existing builds when upgrading the quicly/picotls
# versions
-set(EXPECTED_QUICLY_VERSION "0.1.3-vpp")
+set(EXPECTED_QUICLY_VERSION "0.1.4-vpp")
vpp_find_path(QUICLY_INCLUDE_DIR NAMES quicly.h)
diff --git a/src/plugins/tlspicotls/pico_vpp_crypto.c b/src/plugins/tlspicotls/pico_vpp_crypto.c
index 9af0f2f4d92..3d28d50b352 100644
--- a/src/plugins/tlspicotls/pico_vpp_crypto.c
+++ b/src/plugins/tlspicotls/pico_vpp_crypto.c
@@ -31,19 +31,22 @@ struct cipher_context_t
{
ptls_cipher_context_t super;
vnet_crypto_op_t op;
+ vnet_crypto_op_id_t id;
u32 key_index;
};
struct vpp_aead_context_t
{
ptls_aead_context_t super;
+ EVP_CIPHER_CTX *evp_ctx;
+ uint8_t static_iv[PTLS_MAX_IV_SIZE];
vnet_crypto_op_t op;
+ u32 key_index;
+ vnet_crypto_op_id_t id;
vnet_crypto_op_chunk_t chunks[2];
vnet_crypto_alg_t alg;
- u32 key_index;
u32 chunk_index;
uint8_t iv[PTLS_MAX_IV_SIZE];
- uint8_t static_iv[PTLS_MAX_IV_SIZE];
};
static void
@@ -51,23 +54,7 @@ ptls_vpp_crypto_cipher_do_init (ptls_cipher_context_t * _ctx, const void *iv)
{
struct cipher_context_t *ctx = (struct cipher_context_t *) _ctx;
- vnet_crypto_op_id_t id;
- if (!strcmp (ctx->super.algo->name, "AES128-CTR"))
- {
- id = VNET_CRYPTO_OP_AES_128_CTR_ENC;
- }
- else if (!strcmp (ctx->super.algo->name, "AES256-CTR"))
- {
- id = VNET_CRYPTO_OP_AES_256_CTR_ENC;
- }
- else
- {
- TLS_DBG (1, "%s, Invalid crypto cipher : ", __FUNCTION__,
- _ctx->algo->name);
- assert (0);
- }
-
- vnet_crypto_op_init (&ctx->op, id);
+ vnet_crypto_op_init (&ctx->op, ctx->id);
ctx->op.iv = (u8 *) iv;
ctx->op.key_index = ctx->key_index;
}
@@ -109,10 +96,14 @@ ptls_vpp_crypto_cipher_setup_crypto (ptls_cipher_context_t * _ctx, int is_enc,
if (!strcmp (ctx->super.algo->name, "AES128-CTR"))
{
algo = VNET_CRYPTO_ALG_AES_128_CTR;
+ ctx->id = is_enc ? VNET_CRYPTO_OP_AES_128_CTR_ENC :
+ VNET_CRYPTO_OP_AES_128_CTR_DEC;
}
else if (!strcmp (ctx->super.algo->name, "AES256-CTR"))
{
algo = VNET_CRYPTO_ALG_AES_256_CTR;
+ ctx->id = is_enc ? VNET_CRYPTO_OP_AES_256_CTR_ENC :
+ VNET_CRYPTO_OP_AES_256_CTR_DEC;
}
else
{
@@ -138,20 +129,22 @@ ptls_vpp_crypto_aead_decrypt (ptls_aead_context_t *_ctx, void *_output,
struct vpp_aead_context_t *ctx = (struct vpp_aead_context_t *) _ctx;
int tag_size = ctx->super.algo->tag_size;
- ctx->op.dst = _output;
- ctx->op.src = (void *) input;
- ctx->op.len = inlen - tag_size;;
+ vnet_crypto_op_init (&ctx->op, ctx->id);
+ ctx->op.aad = (u8 *) aad;
+ ctx->op.aad_len = aadlen;
ctx->op.iv = ctx->iv;
ptls_aead__build_iv (ctx->super.algo, ctx->op.iv, ctx->static_iv, seq);
- ctx->op.aad = (void *) aad;
- ctx->op.aad_len = aadlen;
- ctx->op.tag = (void *) input + inlen - tag_size;
+ ctx->op.src = (u8 *) input;
+ ctx->op.dst = _output;
+ ctx->op.key_index = ctx->key_index;
+ ctx->op.len = inlen - tag_size;
ctx->op.tag_len = tag_size;
+ ctx->op.tag = ctx->op.src + ctx->op.len;
vnet_crypto_process_ops (vm, &(ctx->op), 1);
assert (ctx->op.status == VNET_CRYPTO_OP_STATUS_COMPLETED);
- return inlen - tag_size;
+ return ctx->op.len;
}
static void
@@ -159,10 +152,13 @@ ptls_vpp_crypto_aead_encrypt_init (ptls_aead_context_t *_ctx, uint64_t seq,
const void *aad, size_t aadlen)
{
struct vpp_aead_context_t *ctx = (struct vpp_aead_context_t *) _ctx;
- ctx->op.iv = ctx->iv;
- ptls_aead__build_iv (ctx->super.algo, ctx->op.iv, ctx->static_iv, seq);
+
+ vnet_crypto_op_init (&ctx->op, ctx->id);
ctx->op.aad = (void *) aad;
ctx->op.aad_len = aadlen;
+ ctx->op.iv = ctx->iv;
+ ptls_aead__build_iv (ctx->super.algo, ctx->op.iv, ctx->static_iv, seq);
+ ctx->op.key_index = ctx->key_index;
ctx->op.n_chunks = 2;
ctx->op.chunk_index = 0;
@@ -201,7 +197,12 @@ ptls_vpp_crypto_aead_encrypt_final (ptls_aead_context_t * _ctx, void *_output)
static void
ptls_vpp_crypto_aead_dispose_crypto (ptls_aead_context_t * _ctx)
{
- /* Do nothing */
+ vlib_main_t *vm = vlib_get_main ();
+ struct vpp_aead_context_t *ctx = (struct vpp_aead_context_t *) _ctx;
+
+ clib_rwlock_writer_lock (&picotls_main.crypto_keys_rw_lock);
+ vnet_crypto_key_del (vm, ctx->key_index);
+ clib_rwlock_writer_unlock (&picotls_main.crypto_keys_rw_lock);
}
static int
@@ -213,23 +214,15 @@ ptls_vpp_crypto_aead_setup_crypto (ptls_aead_context_t *_ctx, int is_enc,
struct vpp_aead_context_t *ctx = (struct vpp_aead_context_t *) _ctx;
u16 key_len = ctx->super.algo->key_size;
- memset (&(ctx->op), 0, sizeof (vnet_crypto_op_t));
-
if (alg == VNET_CRYPTO_ALG_AES_128_GCM)
{
- if (is_enc)
- vnet_crypto_op_init (&(ctx->op), VNET_CRYPTO_OP_AES_128_GCM_ENC);
- else
- vnet_crypto_op_init (&(ctx->op), VNET_CRYPTO_OP_AES_128_GCM_DEC);
+ ctx->id = is_enc ? VNET_CRYPTO_OP_AES_128_GCM_ENC :
+ VNET_CRYPTO_OP_AES_128_GCM_DEC;
}
else if (alg == VNET_CRYPTO_ALG_AES_256_GCM)
{
- if (is_enc)
- {
- vnet_crypto_op_init (&(ctx->op), VNET_CRYPTO_OP_AES_256_GCM_ENC);
- }
- else
- vnet_crypto_op_init (&(ctx->op), VNET_CRYPTO_OP_AES_256_GCM_DEC);
+ ctx->id = is_enc ? VNET_CRYPTO_OP_AES_256_GCM_ENC :
+ VNET_CRYPTO_OP_AES_256_GCM_DEC;
}
else
{
@@ -239,18 +232,23 @@ ptls_vpp_crypto_aead_setup_crypto (ptls_aead_context_t *_ctx, int is_enc,
}
ctx->alg = alg;
+ ctx->chunk_index = 0;
+ clib_memcpy (ctx->static_iv, iv, ctx->super.algo->iv_size);
clib_rwlock_writer_lock (&picotls_main.crypto_keys_rw_lock);
- ctx->op.key_index =
- vnet_crypto_key_add (vm, ctx->alg, (void *) key, key_len);
+ ctx->key_index = vnet_crypto_key_add (vm, alg, (void *) key, key_len);
clib_rwlock_writer_unlock (&picotls_main.crypto_keys_rw_lock);
- ctx->chunk_index = 0;
- clib_memcpy (ctx->static_iv, iv, ctx->super.algo->iv_size);
- ctx->super.do_decrypt = ptls_vpp_crypto_aead_decrypt;
- ctx->super.do_encrypt_init = ptls_vpp_crypto_aead_encrypt_init;
- ctx->super.do_encrypt_update = ptls_vpp_crypto_aead_encrypt_update;
- ctx->super.do_encrypt_final = ptls_vpp_crypto_aead_encrypt_final;
+ if (is_enc)
+ {
+ ctx->super.do_encrypt_init = ptls_vpp_crypto_aead_encrypt_init;
+ ctx->super.do_encrypt_update = ptls_vpp_crypto_aead_encrypt_update;
+ ctx->super.do_encrypt_final = ptls_vpp_crypto_aead_encrypt_final;
+ }
+ else
+ {
+ ctx->super.do_decrypt = ptls_vpp_crypto_aead_decrypt;
+ }
ctx->super.dispose_crypto = ptls_vpp_crypto_aead_dispose_crypto;
return 0;
@@ -308,6 +306,7 @@ ptls_cipher_algorithm_t ptls_vpp_crypto_aes256ctr = {
ptls_vpp_crypto_aes256ctr_setup_crypto
};
+#define PTLS_X86_CACHE_LINE_ALIGN_BITS 6
ptls_aead_algorithm_t ptls_vpp_crypto_aes128gcm = {
"AES128-GCM",
PTLS_AESGCM_CONFIDENTIALITY_LIMIT,
@@ -317,6 +316,9 @@ ptls_aead_algorithm_t ptls_vpp_crypto_aes128gcm = {
PTLS_AES128_KEY_SIZE,
PTLS_AESGCM_IV_SIZE,
PTLS_AESGCM_TAG_SIZE,
+ { PTLS_TLS12_AESGCM_FIXED_IV_SIZE, PTLS_TLS12_AESGCM_RECORD_IV_SIZE },
+ 1,
+ PTLS_X86_CACHE_LINE_ALIGN_BITS,
sizeof (struct vpp_aead_context_t),
ptls_vpp_crypto_aead_aes128gcm_setup_crypto
};
@@ -330,6 +332,9 @@ ptls_aead_algorithm_t ptls_vpp_crypto_aes256gcm = {
PTLS_AES256_KEY_SIZE,
PTLS_AESGCM_IV_SIZE,
PTLS_AESGCM_TAG_SIZE,
+ { PTLS_TLS12_AESGCM_FIXED_IV_SIZE, PTLS_TLS12_AESGCM_RECORD_IV_SIZE },
+ 1,
+ PTLS_X86_CACHE_LINE_ALIGN_BITS,
sizeof (struct vpp_aead_context_t),
ptls_vpp_crypto_aead_aes256gcm_setup_crypto
};
diff --git a/src/plugins/tlspicotls/tls_picotls.c b/src/plugins/tlspicotls/tls_picotls.c
index ef02f66a552..7375b928206 100644
--- a/src/plugins/tlspicotls/tls_picotls.c
+++ b/src/plugins/tlspicotls/tls_picotls.c
@@ -27,11 +27,11 @@ static ptls_key_exchange_algorithm_t *default_key_exchange[] = {
static u32
picotls_ctx_alloc (void)
{
- u8 thread_id = vlib_get_thread_index ();
+ u32 thread_id = vlib_get_thread_index ();
picotls_main_t *pm = &picotls_main;
picotls_ctx_t **ctx;
- pool_get (pm->ctx_pool[thread_id], ctx);
+ pool_get_aligned_safe (pm->ctx_pool[thread_id], ctx, CLIB_CACHE_LINE_BYTES);
if (!(*ctx))
*ctx = clib_mem_alloc (sizeof (picotls_ctx_t));
@@ -48,7 +48,7 @@ picotls_ctx_free (tls_ctx_t * ctx)
{
picotls_ctx_t *ptls_ctx = (picotls_ctx_t *) ctx;
vec_free (ptls_ctx->rx_content);
- vec_free (ptls_ctx->write_content);
+ ptls_free (ptls_ctx->tls);
pool_put_index (picotls_main.ctx_pool[ctx->c_thread_index],
ptls_ctx->ptls_ctx_idx);
}
@@ -179,8 +179,7 @@ picotls_stop_listen (tls_ctx_t * lctx)
static void
picotls_handle_handshake_failure (tls_ctx_t * ctx)
{
- session_free (session_get (ctx->c_s_index, ctx->c_thread_index));
- ctx->no_app_session = 1;
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
ctx->c_s_index = SESSION_INVALID_INDEX;
tls_disconnect_transport (ctx);
}
@@ -200,13 +199,27 @@ picotls_transport_close (tls_ctx_t * ctx)
picotls_handle_handshake_failure (ctx);
return 0;
}
- picotls_ctx_t *ptls_ctx = (picotls_ctx_t *) ctx;
- ptls_free (ptls_ctx->tls);
session_transport_closing_notify (&ctx->connection);
return 0;
}
static int
+picotls_transport_reset (tls_ctx_t *ctx)
+{
+ if (!picotls_handshake_is_over (ctx))
+ {
+ picotls_handle_handshake_failure (ctx);
+ return 0;
+ }
+
+ session_transport_reset_notify (&ctx->connection);
+ session_transport_closed_notify (&ctx->connection);
+ tls_disconnect_transport (ctx);
+
+ return 0;
+}
+
+static int
picotls_app_close (tls_ctx_t * ctx)
{
session_t *app_session;
@@ -215,309 +228,435 @@ picotls_app_close (tls_ctx_t * ctx)
if (!svm_fifo_max_dequeue_cons (app_session->tx_fifo))
picotls_confirm_app_close (ctx);
else
- ctx->app_closed = 1;
+ ctx->flags |= TLS_CONN_F_APP_CLOSED;
return 0;
}
static inline int
-picotls_do_handshake (picotls_ctx_t * ptls_ctx, session_t * tls_session,
- u8 * input, int input_len)
+picotls_do_handshake (picotls_ctx_t *ptls_ctx, session_t *tcp_session)
{
+ int rv = PTLS_ERROR_IN_PROGRESS, write = 0, i = 0, read = 0, len;
+ svm_fifo_t *tcp_rx_fifo = tcp_session->rx_fifo;
+ ptls_buffer_t *buf = &ptls_ctx->read_buffer;
+ u32 n_segs = 2, max_len = 16384;
ptls_t *tls = ptls_ctx->tls;
- ptls_buffer_t buf;
- int rv = PTLS_ERROR_IN_PROGRESS;
- int write = 0, off;
+ svm_fifo_seg_t fs[n_segs];
+ uword deq_now;
+
+ ptls_buffer_init (buf, "", 0);
+
+ len = svm_fifo_segments (tcp_rx_fifo, 0, fs, &n_segs, max_len);
+ if (len <= 0)
+ return 0;
- do
+ while (read < len && i < n_segs)
{
- off = 0;
- do
+ deq_now = fs[i].len;
+ rv = ptls_handshake (tls, buf, fs[i].data, &deq_now, NULL);
+
+ write += picotls_try_handshake_write (ptls_ctx, tcp_session, buf);
+ read += deq_now;
+
+ if (!(rv == 0 || rv == PTLS_ERROR_IN_PROGRESS))
{
- ptls_buffer_init (&buf, "", 0);
- size_t consumed = input_len - off;
- rv = ptls_handshake (tls, &buf, input + off, &consumed, NULL);
- off += consumed;
- ptls_ctx->rx_offset += consumed;
- if ((rv == 0 || rv == PTLS_ERROR_IN_PROGRESS) && buf.off != 0)
- {
- write = picotls_try_handshake_write (ptls_ctx, tls_session,
- &buf);
- }
- ptls_buffer_dispose (&buf);
+ clib_error ("unexpected error %u", rv);
+ break;
}
- while (rv == PTLS_ERROR_IN_PROGRESS && input_len != off);
+
+ if (!rv)
+ break;
+
+ if (deq_now < fs[i].len)
+ {
+ fs[i].data += deq_now;
+ fs[i].len -= deq_now;
+ }
+ else
+ i++;
}
- while (rv == PTLS_ERROR_IN_PROGRESS);
+
+ if (read)
+ svm_fifo_dequeue_drop (tcp_rx_fifo, read);
+
+ ptls_buffer_dispose (buf);
return write;
}
static inline int
-picotls_ctx_read (tls_ctx_t * ctx, session_t * tls_session)
+ptls_copy_buf_to_fs (ptls_buffer_t *buf, u32 to_copy, svm_fifo_seg_t *fs,
+ u32 *fs_idx, u32 max_fs)
{
- picotls_ctx_t *ptls_ctx = (picotls_ctx_t *) ctx;
- int from_tls_len = 0, off, crypto_len, ret;
- u32 deq_max, deq_now;
- u32 enq_max;
- ptls_buffer_t *buf = &ptls_ctx->read_buffer;
- svm_fifo_t *tls_rx_fifo, *app_rx_fifo;
- session_t *app_session;
-
- tls_rx_fifo = tls_session->rx_fifo;
+ u32 idx = *fs_idx;
- if (!picotls_handshake_is_over (ctx))
+ while (to_copy)
{
- deq_max = svm_fifo_max_dequeue_cons (tls_rx_fifo);
- if (!deq_max)
- goto done_hs;
-
- vec_validate (ptls_ctx->rx_content, deq_max);
- ptls_ctx->rx_offset = 0;
- ptls_ctx->rx_len = 0;
+ if (fs[idx].len <= to_copy)
+ {
+ clib_memcpy_fast (fs[idx].data, buf->base + (buf->off - to_copy),
+ fs[idx].len);
+ to_copy -= fs[idx].len;
+ idx += 1;
+ /* no more space in the app's rx fifo */
+ if (idx == max_fs)
+ break;
+ }
+ else
+ {
+ clib_memcpy_fast (fs[idx].data, buf->base + (buf->off - to_copy),
+ to_copy);
+ fs[idx].len -= to_copy;
+ fs[idx].data += to_copy;
+ to_copy = 0;
+ }
+ }
- off = svm_fifo_dequeue (tls_rx_fifo, deq_max, TLS_RX_LEN (ptls_ctx));
- from_tls_len += off;
- ptls_ctx->rx_len += off;
+ *fs_idx = idx;
- picotls_do_handshake (ptls_ctx, tls_session, TLS_RX_OFFSET (ptls_ctx),
- from_tls_len);
- if (picotls_handshake_is_over (ctx))
- ret = ptls_is_server (ptls_ctx->tls) ?
- tls_notify_app_accept (ctx) :
- tls_notify_app_connected (ctx, SESSION_E_NONE);
+ return to_copy;
+}
- done_hs:
- if (!TLS_RX_IS_LEFT (ptls_ctx))
- return 0;
- }
+static u32
+ptls_tcp_to_app_write (picotls_ctx_t *ptls_ctx, svm_fifo_t *app_rx_fifo,
+ svm_fifo_t *tcp_rx_fifo)
+{
+ u32 ai = 0, thread_index, min_buf_len, to_copy, left, wrote = 0;
+ ptls_buffer_t *buf = &ptls_ctx->read_buffer;
+ int ret, i = 0, read = 0, tcp_len, n_fs_app;
+ u32 n_segs = 4, max_len = 1 << 16;
+ svm_fifo_seg_t tcp_fs[n_segs], app_fs[n_segs];
+ picotls_main_t *pm = &picotls_main;
+ uword deq_now;
+ u8 is_nocopy;
- app_session = session_get_from_handle (ctx->app_session_handle);
- app_rx_fifo = app_session->rx_fifo;
+ thread_index = ptls_ctx->ctx.c_thread_index;
- if (TLS_READ_IS_LEFT (ptls_ctx))
- goto enq_buf;
+ n_fs_app = svm_fifo_provision_chunks (app_rx_fifo, app_fs, n_segs, max_len);
+ if (n_fs_app <= 0)
+ return 0;
- ptls_buffer_init (buf, "", 0);
- ptls_ctx->read_buffer_offset = 0;
+ tcp_len = svm_fifo_segments (tcp_rx_fifo, 0, tcp_fs, &n_segs, max_len);
+ if (tcp_len <= 0)
+ return 0;
- if (!TLS_RX_IS_LEFT (ptls_ctx))
+ if (ptls_ctx->read_buffer_offset)
{
- deq_max = svm_fifo_max_dequeue_cons (tls_rx_fifo);
- if (!deq_max)
- goto app_fifo;
-
- deq_now = clib_min (deq_max, svm_fifo_max_read_chunk (tls_rx_fifo));
+ to_copy = buf->off - ptls_ctx->read_buffer_offset;
+ left = ptls_copy_buf_to_fs (buf, to_copy, app_fs, &ai, n_fs_app);
+ wrote += to_copy - left;
+ if (left)
+ {
+ ptls_ctx->read_buffer_offset = buf->off - left;
+ goto do_checks;
+ }
+ ptls_ctx->read_buffer_offset = 0;
+ }
- if (PREDICT_FALSE (deq_now < deq_max))
+ while (ai < n_fs_app && read < tcp_len)
+ {
+ deq_now = clib_min (tcp_fs[i].len, tcp_len - read);
+ min_buf_len = deq_now + (16 << 10);
+ is_nocopy = app_fs[ai].len < min_buf_len ? 0 : 1;
+ if (is_nocopy)
{
- off =
- svm_fifo_dequeue (tls_rx_fifo, deq_max, TLS_RX_LEN (ptls_ctx));
- from_tls_len += off;
- ptls_ctx->rx_len += off;
+ ptls_buffer_init (buf, app_fs[ai].data, app_fs[ai].len);
+ ret = ptls_receive (ptls_ctx->tls, buf, tcp_fs[i].data, &deq_now);
+ assert (ret == 0 || ret == PTLS_ERROR_IN_PROGRESS);
+
+ wrote += buf->off;
+ if (buf->off == app_fs[ai].len)
+ {
+ ai++;
+ }
+ else
+ {
+ app_fs[ai].len -= buf->off;
+ app_fs[ai].data += buf->off;
+ }
}
else
{
- ret =
- ptls_receive (ptls_ctx->tls, buf, svm_fifo_head (tls_rx_fifo),
- (size_t *) & deq_now);
- svm_fifo_dequeue_drop (tls_rx_fifo, deq_now);
- goto enq_buf;
- }
- }
+ vec_validate (pm->rx_bufs[thread_index], min_buf_len);
+ ptls_buffer_init (buf, pm->rx_bufs[thread_index], min_buf_len);
+ ret = ptls_receive (ptls_ctx->tls, buf, tcp_fs[i].data, &deq_now);
+ assert (ret == 0 || ret == PTLS_ERROR_IN_PROGRESS);
-app_fifo:
+ left = ptls_copy_buf_to_fs (buf, buf->off, app_fs, &ai, n_fs_app);
+ if (!left)
+ {
+ ptls_ctx->read_buffer_offset = 0;
+ wrote += buf->off;
+ }
+ else
+ {
+ ptls_ctx->read_buffer_offset = buf->off - left;
+ wrote += ptls_ctx->read_buffer_offset;
+ }
+ }
- enq_max = svm_fifo_max_enqueue_prod (app_rx_fifo);
- if (!enq_max)
- goto final;
+ assert (deq_now <= tcp_fs[i].len);
+ read += deq_now;
+ if (deq_now < tcp_fs[i].len)
+ {
+ tcp_fs[i].data += deq_now;
+ tcp_fs[i].len -= deq_now;
+ }
+ else
+ i++;
+ }
- crypto_len = clib_min (enq_max, TLS_RX_LEFT_LEN (ptls_ctx));
- off = 0;
+do_checks:
- do
+ if (read)
{
- size_t consumed = crypto_len - off;
- ret =
- ptls_receive (ptls_ctx->tls, buf,
- TLS_RX_OFFSET (ptls_ctx), &consumed);
- off += consumed;
- ptls_ctx->rx_offset += off;
+ svm_fifo_dequeue_drop (tcp_rx_fifo, read);
+ if (svm_fifo_needs_deq_ntf (tcp_rx_fifo, read))
+ {
+ svm_fifo_clear_deq_ntf (tcp_rx_fifo);
+ session_send_io_evt_to_thread (tcp_rx_fifo, SESSION_IO_EVT_RX);
+ }
}
- while (ret == 0 && off < crypto_len);
-enq_buf:
+ if (wrote)
+ svm_fifo_enqueue_nocopy (app_rx_fifo, wrote);
- off =
- svm_fifo_enqueue (app_rx_fifo, TLS_READ_LEFT_LEN (ptls_ctx),
- TLS_READ_OFFSET (ptls_ctx));
- if (off < 0)
- {
- tls_add_vpp_q_builtin_rx_evt (tls_session);
- return 0;
- }
+ return wrote;
+}
+
+static inline int
+picotls_ctx_read (tls_ctx_t *ctx, session_t *tcp_session)
+{
+ picotls_ctx_t *ptls_ctx = (picotls_ctx_t *) ctx;
+ svm_fifo_t *tcp_rx_fifo;
+ session_t *app_session;
+ int wrote;
- ptls_ctx->read_buffer_offset += off;
- if (!TLS_RX_IS_LEFT (ptls_ctx))
+ if (PREDICT_FALSE (!ptls_handshake_is_complete (ptls_ctx->tls)))
{
- ptls_ctx->rx_len = 0;
- ptls_ctx->rx_offset = 0;
+ picotls_do_handshake (ptls_ctx, tcp_session);
+ if (picotls_handshake_is_over (ctx))
+ {
+ if (ptls_is_server (ptls_ctx->tls))
+ {
+ if (tls_notify_app_accept (ctx))
+ {
+ ctx->c_s_index = SESSION_INVALID_INDEX;
+ tls_disconnect_transport (ctx);
+ return -1;
+ }
+ }
+ else
+ {
+ tls_notify_app_connected (ctx, SESSION_E_NONE);
+ }
+ }
+
+ ctx->flags |= TLS_CONN_F_HS_DONE;
+ if (!svm_fifo_max_dequeue (tcp_session->rx_fifo))
+ return 0;
}
-final:
- ptls_buffer_dispose (buf);
+ tcp_rx_fifo = tcp_session->rx_fifo;
+ app_session = session_get_from_handle (ctx->app_session_handle);
+ wrote = ptls_tcp_to_app_write (ptls_ctx, app_session->rx_fifo, tcp_rx_fifo);
- if (app_session->session_state >= SESSION_STATE_READY)
+ if (wrote)
tls_notify_app_enqueue (ctx, app_session);
- if (TLS_RX_IS_LEFT (ptls_ctx) || TLS_READ_IS_LEFT (ptls_ctx)
- || svm_fifo_max_dequeue (tls_rx_fifo))
- tls_add_vpp_q_builtin_rx_evt (tls_session);
+ if (ptls_ctx->read_buffer_offset || svm_fifo_max_dequeue (tcp_rx_fifo))
+ tls_add_vpp_q_builtin_rx_evt (tcp_session);
- return from_tls_len;
+ return wrote;
}
-static inline int
-picotls_content_process (picotls_ctx_t * ptls_ctx, svm_fifo_t * src_fifo,
- svm_fifo_t * dst_fifo, int content_len,
- int total_record_overhead, int is_no_copy)
+static inline u32
+ptls_compute_deq_len (picotls_ctx_t *ptls_ctx, u32 dst_chunk, u32 src_chunk,
+ u32 dst_space, u8 *is_nocopy)
{
- ptls_buffer_t *buf = &ptls_ctx->write_buffer;
- int total_length = content_len + total_record_overhead;
- int to_dst_len;
- if (is_no_copy)
- {
- ptls_buffer_init (buf, svm_fifo_tail (dst_fifo), total_length);
- ptls_send (ptls_ctx->tls, buf, svm_fifo_head (src_fifo), content_len);
-
- assert (!buf->is_allocated);
- assert (buf->base == svm_fifo_tail (dst_fifo));
+ int record_overhead = ptls_get_record_overhead (ptls_ctx->tls);
+ int num_records;
+ u32 deq_len, total_overhead;
- svm_fifo_dequeue_drop (src_fifo, content_len);
- svm_fifo_enqueue_nocopy (dst_fifo, buf->off);
- to_dst_len = buf->off;
+ if (dst_chunk >= clib_min (8192, src_chunk + record_overhead))
+ {
+ *is_nocopy = 1;
+ deq_len = clib_min (src_chunk, dst_chunk);
+ num_records = ceil ((f64) deq_len / PTLS_MAX_PLAINTEXT_RECORD_SIZE);
+ total_overhead = num_records * record_overhead;
+ if (deq_len + total_overhead > dst_chunk)
+ deq_len = dst_chunk - total_overhead;
}
else
{
- assert (!TLS_WRITE_IS_LEFT (ptls_ctx));
- vec_validate (ptls_ctx->write_content, total_length);
- ptls_buffer_init (buf, ptls_ctx->write_content, total_length);
-
- ptls_send (ptls_ctx->tls, buf, svm_fifo_head (src_fifo), content_len);
- svm_fifo_dequeue_drop (src_fifo, content_len);
-
- to_dst_len = svm_fifo_enqueue (dst_fifo, buf->off, buf->base);
+ deq_len = clib_min (src_chunk, dst_space);
+ num_records = ceil ((f64) deq_len / PTLS_MAX_PLAINTEXT_RECORD_SIZE);
+ total_overhead = num_records * record_overhead;
+ if (deq_len + total_overhead > dst_space)
+ deq_len = dst_space - total_overhead;
}
- ptls_ctx->write_buffer_offset += to_dst_len;
- return to_dst_len;
+
+ return deq_len;
}
-static inline int
-picotls_ctx_write (tls_ctx_t * ctx, session_t * app_session,
- transport_send_params_t * sp)
+static u32
+ptls_app_to_tcp_write (picotls_ctx_t *ptls_ctx, session_t *app_session,
+ svm_fifo_t *tcp_tx_fifo, u32 max_len)
{
- picotls_ctx_t *ptls_ctx = (picotls_ctx_t *) ctx;
- u32 deq_max, deq_now;
- u32 enq_max, enq_now;
- int from_app_len = 0, to_tls_len = 0, is_nocopy = 0;
- svm_fifo_t *tls_tx_fifo, *app_tx_fifo;
- session_t *tls_session;
-
- int record_overhead = ptls_get_record_overhead (ptls_ctx->tls);
- int num_records, total_overhead;
+ u32 wrote = 0, max_enq, thread_index, app_buf_len, left, ti = 0;
+ int read = 0, rv, i = 0, len, n_tcp_segs = 4, deq_len;
+ u32 n_app_segs = 2, min_chunk = 2048;
+ svm_fifo_seg_t app_fs[n_app_segs], tcp_fs[n_tcp_segs];
+ picotls_main_t *pm = &picotls_main;
+ ptls_buffer_t _buf, *buf = &_buf;
+ svm_fifo_t *app_tx_fifo;
+ u8 is_nocopy, *app_buf;
+ u32 first_chunk_len;
- tls_session = session_get_from_handle (ctx->tls_session_handle);
- tls_tx_fifo = tls_session->tx_fifo;
+ thread_index = app_session->thread_index;
app_tx_fifo = app_session->tx_fifo;
- if (PREDICT_FALSE (TLS_WRITE_IS_LEFT (ptls_ctx)))
+ len = svm_fifo_segments (app_tx_fifo, 0, app_fs, &n_app_segs, max_len);
+ if (len <= 0)
+ return 0;
+
+ n_tcp_segs = svm_fifo_provision_chunks (tcp_tx_fifo, tcp_fs, n_tcp_segs,
+ 1000 + max_len);
+ if (n_tcp_segs <= 0)
+ return 0;
+
+ while ((left = len - read) && ti < n_tcp_segs)
{
- enq_max = svm_fifo_max_enqueue_prod (tls_tx_fifo);
- int to_write = clib_min (enq_max,
- ptls_ctx->write_buffer.off -
- ptls_ctx->write_buffer_offset);
- to_tls_len =
- svm_fifo_enqueue (tls_tx_fifo, to_write, TLS_WRITE_OFFSET (ptls_ctx));
- if (to_tls_len < 0)
- {
- app_session->flags |= SESSION_F_CUSTOM_TX;
- return 0;
- }
- ptls_ctx->write_buffer_offset += to_tls_len;
+ /* If we wrote something and are left with few bytes, postpone write
+ * as we may be able to encrypt a bigger chunk next time */
+ if (wrote && left < min_chunk)
+ break;
- if (TLS_WRITE_IS_LEFT (ptls_ctx))
+ /* Avoid short records if possible */
+ if (app_fs[i].len < min_chunk && min_chunk < left)
{
- app_session->flags |= SESSION_F_CUSTOM_TX;
- return to_tls_len;
+ app_buf_len = app_fs[i].len + app_fs[i + 1].len;
+ app_buf = pm->rx_bufs[thread_index];
+ vec_validate (pm->rx_bufs[thread_index], app_buf_len);
+ clib_memcpy_fast (pm->rx_bufs[thread_index], app_fs[i].data,
+ app_fs[i].len);
+ clib_memcpy_fast (pm->rx_bufs[thread_index] + app_fs[i].len,
+ app_fs[i + 1].data, app_buf_len - app_fs[i].len);
+ first_chunk_len = app_fs[i].len;
+ i += 1;
}
else
{
- ptls_buffer_init (&ptls_ctx->write_buffer, "", 0);
- ptls_ctx->write_buffer_offset = 0;
+ app_buf = app_fs[i].data;
+ app_buf_len = app_fs[i].len;
+ first_chunk_len = 0;
}
- }
+ is_nocopy = 0;
+ max_enq = tcp_fs[ti].len;
+ max_enq += ti < (n_tcp_segs - 1) ? tcp_fs[ti + 1].len : 0;
- deq_max = svm_fifo_max_dequeue_cons (app_tx_fifo);
- if (!deq_max)
- return deq_max;
+ deq_len = ptls_compute_deq_len (ptls_ctx, tcp_fs[ti].len, app_buf_len,
+ max_enq, &is_nocopy);
+ if (is_nocopy)
+ {
+ ptls_buffer_init (buf, tcp_fs[ti].data, tcp_fs[ti].len);
+ rv = ptls_send (ptls_ctx->tls, buf, app_buf, deq_len);
- deq_now = clib_min (deq_max, sp->max_burst_size);
- deq_now = clib_min (deq_now, svm_fifo_max_read_chunk (app_tx_fifo));
+ assert (rv == 0);
+ wrote += buf->off;
- enq_max = svm_fifo_max_enqueue_prod (tls_tx_fifo);
- /** There is no engough enqueue space for one record **/
- if (enq_max <= record_overhead)
- {
- app_session->flags |= SESSION_F_CUSTOM_TX;
- return 0;
- }
+ tcp_fs[ti].len -= buf->off;
+ tcp_fs[ti].data += buf->off;
+ if (!tcp_fs[ti].len)
+ ti += 1;
+ }
+ else
+ {
+ vec_validate (pm->tx_bufs[thread_index], max_enq);
+ ptls_buffer_init (buf, pm->tx_bufs[thread_index], max_enq);
+ rv = ptls_send (ptls_ctx->tls, buf, app_buf, deq_len);
- enq_now = clib_min (enq_max, svm_fifo_max_write_chunk (tls_tx_fifo));
+ assert (rv == 0);
+ wrote += buf->off;
- /** Allowed to execute no-copy crypto operation **/
- if (enq_now > record_overhead)
- {
- is_nocopy = 1;
- from_app_len = clib_min (deq_now, enq_now);
- num_records =
- ceil ((f64) from_app_len / PTLS_MAX_PLAINTEXT_RECORD_SIZE);
- total_overhead = num_records * record_overhead;
- if (from_app_len + total_overhead > enq_now)
- from_app_len = enq_now - total_overhead;
+ left = ptls_copy_buf_to_fs (buf, buf->off, tcp_fs, &ti, n_tcp_segs);
+ assert (left == 0);
+ }
+
+ read += deq_len;
+ ASSERT (deq_len >= first_chunk_len);
+
+ if (deq_len == app_buf_len)
+ {
+ i += 1;
+ }
+ else
+ {
+ app_fs[i].len -= deq_len - first_chunk_len;
+ app_fs[i].data += deq_len - first_chunk_len;
+ }
}
- else
+
+ if (read)
{
- from_app_len = clib_min (deq_now, enq_max);
- num_records =
- ceil ((f64) from_app_len / PTLS_MAX_PLAINTEXT_RECORD_SIZE);
- total_overhead = num_records * record_overhead;
- if (from_app_len + total_overhead > enq_max)
- from_app_len = enq_max - total_overhead;
+ svm_fifo_dequeue_drop (app_tx_fifo, read);
+ if (svm_fifo_needs_deq_ntf (app_tx_fifo, read))
+ session_dequeue_notify (app_session);
}
- to_tls_len =
- picotls_content_process (ptls_ctx, app_tx_fifo, tls_tx_fifo,
- from_app_len, total_overhead, is_nocopy);
- if (!TLS_WRITE_IS_LEFT (ptls_ctx))
+ if (wrote)
{
- ptls_ctx->write_buffer_offset = 0;
- ptls_buffer_init (&ptls_ctx->write_buffer, "", 0);
+ svm_fifo_enqueue_nocopy (tcp_tx_fifo, wrote);
+ if (svm_fifo_set_event (tcp_tx_fifo))
+ session_send_io_evt_to_thread (tcp_tx_fifo, SESSION_IO_EVT_TX);
}
- if (svm_fifo_needs_deq_ntf (app_tx_fifo, from_app_len))
- session_dequeue_notify (app_session);
+ return wrote;
+}
+
+static inline int
+picotls_ctx_write (tls_ctx_t *ctx, session_t *app_session,
+ transport_send_params_t *sp)
+{
+ picotls_ctx_t *ptls_ctx = (picotls_ctx_t *) ctx;
+ u32 deq_max, deq_now, enq_max, enq_buf, wrote = 0;
+ svm_fifo_t *tcp_tx_fifo;
+ session_t *tcp_session;
- if (to_tls_len)
- tls_add_vpp_q_tx_evt (tls_session);
+ tcp_session = session_get_from_handle (ctx->tls_session_handle);
+ tcp_tx_fifo = tcp_session->tx_fifo;
- if (from_app_len < deq_max || TLS_WRITE_IS_LEFT (ptls_ctx))
- app_session->flags |= SESSION_F_CUSTOM_TX;
+ enq_max = svm_fifo_max_enqueue_prod (tcp_tx_fifo);
+ if (enq_max < 2048)
+ goto check_tls_fifo;
+
+ deq_max = svm_fifo_max_dequeue_cons (app_session->tx_fifo);
+ deq_max = clib_min (deq_max, enq_max);
+ if (!deq_max)
+ goto check_tls_fifo;
+
+ deq_now = clib_min (deq_max, sp->max_burst_size);
+ wrote = ptls_app_to_tcp_write (ptls_ctx, app_session, tcp_tx_fifo, deq_now);
+
+check_tls_fifo:
- if (ctx->app_closed)
+ if (ctx->flags & TLS_CONN_F_APP_CLOSED)
picotls_app_close (ctx);
- return to_tls_len;
+ /* Deschedule and wait for deq notification if fifo is almost full */
+ enq_buf = clib_min (svm_fifo_size (tcp_tx_fifo) / 2, TLSP_MIN_ENQ_SPACE);
+ if (enq_max < wrote + enq_buf)
+ {
+ svm_fifo_add_want_deq_ntf (tcp_tx_fifo, SVM_FIFO_WANT_DEQ_NOTIF);
+ transport_connection_deschedule (&ctx->connection);
+ sp->flags |= TRANSPORT_SND_F_DESCHED;
+ }
+ else
+ /* Request tx reschedule of the app session */
+ app_session->flags |= SESSION_F_CUSTOM_TX;
+
+ return wrote;
}
static int
@@ -538,7 +677,6 @@ picotls_ctx_init_server (tls_ctx_t * ctx)
ptls_ctx->rx_len = 0;
ptls_ctx->rx_offset = 0;
- ptls_ctx->write_buffer_offset = 0;
return 0;
}
@@ -562,7 +700,6 @@ picotls_ctx_init_client (tls_ctx_t *ctx)
ptls_ctx->rx_len = 0;
ptls_ctx->rx_offset = 0;
- ptls_ctx->write_buffer_offset = 0;
ptls_buffer_init (&hs_buf, "", 0);
if (ptls_handshake (ptls_ctx->tls, &hs_buf, NULL, NULL, &hsprop) !=
@@ -601,6 +738,13 @@ picotls_init_client_ptls_ctx (ptls_context_t **client_ptls_ctx)
return 0;
}
+int
+picotls_reinit_ca_chain (void)
+{
+ /* Not supported yet */
+ return 0;
+}
+
const static tls_engine_vft_t picotls_engine = {
.ctx_alloc = picotls_ctx_alloc,
.ctx_free = picotls_ctx_free,
@@ -614,7 +758,9 @@ const static tls_engine_vft_t picotls_engine = {
.ctx_read = picotls_ctx_read,
.ctx_write = picotls_ctx_write,
.ctx_transport_close = picotls_transport_close,
+ .ctx_transport_reset = picotls_transport_reset,
.ctx_app_close = picotls_app_close,
+ .ctx_reinit_cachain = picotls_reinit_ca_chain,
};
static clib_error_t *
@@ -628,6 +774,8 @@ tls_picotls_init (vlib_main_t * vm)
num_threads = 1 + vtm->n_threads;
vec_validate (pm->ctx_pool, num_threads - 1);
+ vec_validate (pm->rx_bufs, num_threads - 1);
+ vec_validate (pm->tx_bufs, num_threads - 1);
clib_rwlock_init (&picotls_main.crypto_keys_rw_lock);
@@ -638,18 +786,14 @@ tls_picotls_init (vlib_main_t * vm)
return error;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (tls_picotls_init) = {
.runs_after = VLIB_INITS ("tls_init"),
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Transport Layer Security (TLS) Engine, Picotls Based",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/tlspicotls/tls_picotls.h b/src/plugins/tlspicotls/tls_picotls.h
index 29b279c7a83..d24d7e2f172 100644
--- a/src/plugins/tlspicotls/tls_picotls.h
+++ b/src/plugins/tlspicotls/tls_picotls.h
@@ -16,9 +16,7 @@
#define TLS_READ_IS_LEFT(x) ((x)->read_buffer.off != 0 && (x)->read_buffer.off != (x)->read_buffer_offset)
#define TLS_READ_LEFT_LEN(x) ((x)->read_buffer.off - (x)->read_buffer_offset)
-#define TLS_WRITE_OFFSET(x) ((x)->write_buffer.base + (x)->write_buffer_offset)
-#define TLS_WRITE_IS_LEFT(x) ((x)->write_buffer.off != 0 && (x)->write_buffer.off != (x)->write_buffer_offset)
-
+#define TLSP_MIN_ENQ_SPACE (1 << 16)
typedef struct tls_ctx_picotls_
{
@@ -29,10 +27,7 @@ typedef struct tls_ctx_picotls_
int rx_offset;
int rx_len;
ptls_buffer_t read_buffer;
- ptls_buffer_t write_buffer;
- uint8_t *write_content;
int read_buffer_offset;
- int write_buffer_offset;
} picotls_ctx_t;
typedef struct tls_listen_ctx_picotls_
@@ -45,6 +40,8 @@ typedef struct picotls_main_
{
picotls_ctx_t ***ctx_pool;
picotls_listen_ctx_t *lctx_pool;
+ u8 **tx_bufs;
+ u8 **rx_bufs;
ptls_context_t *client_ptls_ctx;
clib_rwlock_t crypto_keys_rw_lock;
} picotls_main_t;
diff --git a/src/plugins/tracedump/CMakeLists.txt b/src/plugins/tracedump/CMakeLists.txt
index 7860d95bc11..6dffdedcc81 100644
--- a/src/plugins/tracedump/CMakeLists.txt
+++ b/src/plugins/tracedump/CMakeLists.txt
@@ -26,7 +26,7 @@ add_vpp_plugin(tracedump
API_TEST_SOURCES
graph_test.c
tracedump_test.c
-)
-# API_TEST_SOURCES
-# tracedump_test.c
+ COMPONENT
+ vpp-plugin-devtools
+)
diff --git a/src/plugins/tracedump/graph_api.c b/src/plugins/tracedump/graph_api.c
index 0626f7d6b42..20eb1b920fc 100644
--- a/src/plugins/tracedump/graph_api.c
+++ b/src/plugins/tracedump/graph_api.c
@@ -252,7 +252,7 @@ graph_api_hookup (vlib_main_t * vm)
gmp->msg_id_base = setup_message_id_table ();
- am->is_mp_safe[gmp->msg_id_base + VL_API_GRAPH_NODE_GET] = 1;
+ vl_api_set_msg_thread_safe (am, gmp->msg_id_base + VL_API_GRAPH_NODE_GET, 1);
return 0;
}
diff --git a/src/plugins/tracedump/graph_cli.c b/src/plugins/tracedump/graph_cli.c
index 2440295a1a7..6af4706f87d 100644
--- a/src/plugins/tracedump/graph_cli.c
+++ b/src/plugins/tracedump/graph_cli.c
@@ -16,7 +16,11 @@
*/
#include <sys/socket.h>
+#ifdef __linux__
#include <linux/if.h>
+#else
+#include <net/if.h>
+#endif /* __linux__ */
#include <vnet/vnet.h>
#include <vnet/plugin/plugin.h>
@@ -75,9 +79,9 @@ graph_node_show_cmd (vlib_main_t * vm,
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (input, "node %d", &index))
- n = vlib_get_node (vm, index);
- else if (unformat (input, "node %v", &name))
- n = vlib_get_node_by_name (vm, name);
+ n = vlib_get_node (vm, index);
+ else if (unformat (input, "node %s", &name))
+ n = vlib_get_node_by_name (vm, name);
else if (unformat (input, "want_arcs"))
want_arcs = true;
@@ -132,13 +136,11 @@ graph_node_show_cmd (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (graph_node_show_command, static) = {
.path = "show graph",
.short_help = "show graph [node <index>|<name>] [want_arcs] [input|trace_supported] [drop] [output] [punt] [handoff] [no_free] [polling] [interrupt]",
.function = graph_node_show_cmd,
};
-/* *INDENT-ON* */
/*
diff --git a/src/plugins/tracedump/graph_test.c b/src/plugins/tracedump/graph_test.c
index 79e1df61c5f..37dfbcdcaa0 100644
--- a/src/plugins/tracedump/graph_test.c
+++ b/src/plugins/tracedump/graph_test.c
@@ -27,7 +27,7 @@
#include <vnet/format_fns.h>
#include <tracedump/graph.api_enum.h>
#include <tracedump/graph.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
diff --git a/src/plugins/tracedump/setup.pg b/src/plugins/tracedump/setup.pg
index 91d5ebe2d2a..1ebdb0dd7f0 100644
--- a/src/plugins/tracedump/setup.pg
+++ b/src/plugins/tracedump/setup.pg
@@ -1,33 +1,33 @@
set term pag off
-packet-generator new {
- name worker0
- worker 0
- limit 12
- rate 1.2e7
- size 128-128
- interface local0
- node ethernet-input
- data { IP4: 1.2.40 -> 3cfd.fed0.b6c8
- UDP: 192.168.40.1 - 192.168.40.100 -> 192.168.50.10
- UDP: 1234 -> 2345
- incrementing 114
- }
+packet-generator new { \
+ name worker0 \
+ worker 0 \
+ limit 12 \
+ rate 1.2e7 \
+ size 128-128 \
+ interface local0 \
+ node ethernet-input \
+ data { IP4: 1.2.40 -> 3cfd.fed0.b6c8 \
+ UDP: 192.168.40.1 - 192.168.40.100 -> 192.168.50.10 \
+ UDP: 1234 -> 2345 \
+ incrementing 114 \
+ } \
}
-packet-generator new {
- name worker1
- worker 1
- limit 12
- rate 1.2e7
- size 128-128
- interface local0
- node ethernet-input
- data { IP4: 1.2.4 -> 3cfd.fed0.b6c9
- UDP: 192.168.41.1 - 192.168.41.100 -> 192.168.51.10
- UDP: 1234 -> 2345
- incrementing 114
- }
+packet-generator new { \
+ name worker1 \
+ worker 1 \
+ limit 12 \
+ rate 1.2e7 \
+ size 128-128 \
+ interface local0 \
+ node ethernet-input \
+ data { IP4: 1.2.4 -> 3cfd.fed0.b6c9 \
+ UDP: 192.168.41.1 - 192.168.41.100 -> 192.168.51.10 \
+ UDP: 1234 -> 2345 \
+ incrementing 114 \
+ } \
}
trace add pg-input 20
diff --git a/src/plugins/tracedump/tracedump.api b/src/plugins/tracedump/tracedump.api
index 540b0664074..1b3813fb184 100644
--- a/src/plugins/tracedump/tracedump.api
+++ b/src/plugins/tracedump/tracedump.api
@@ -25,7 +25,7 @@
*/
-option version = "0.1.0";
+option version = "0.2.0";
enum trace_filter_flag : u32
{
@@ -147,3 +147,69 @@ define trace_details {
u32 packet_number;
string trace_data[];
};
+
+/** \brief trace_clear_cache
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+autoreply define trace_clear_cache {
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief trace_v2_dump
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param thread_id - specific thread to dump from, ~0 to dump from all
+ @param position - position of the first packet to dump in the per thread cache, ~0 to only clear the cache
+ @param max - maximum of packets to dump from each thread
+ @param clear_cache - dispose of any cached data before we begin
+*/
+define trace_v2_dump {
+ u32 client_index;
+ u32 context;
+
+ u32 thread_id [default=0xffffffff];
+ u32 position;
+ u32 max [default=50];
+ bool clear_cache;
+
+ option vat_help = "trace_v2_dump [thread_id <tid>] [position <pos>] [max <max>]";
+};
+
+/** \brief trace_v2_details
+ @param context - sender context, to match reply w/ request
+ @param thread_id - thread index from which the packet come from
+ @param position - position of the packet in its thread cache
+ @param more - true if there is still more packets to dump for this thread
+ @param trace_data - string packet data
+*/
+define trace_v2_details {
+ u32 context;
+
+ u32 thread_id;
+ u32 position;
+ bool more;
+
+ string trace_data[];
+};
+
+autoreply define trace_set_filter_function
+{
+ u32 client_index;
+ u32 context;
+
+ string filter_function_name[];
+};
+
+define trace_filter_function_dump {
+ u32 client_index;
+ u32 context;
+};
+
+define trace_filter_function_details {
+ u32 context;
+
+ bool selected;
+ string name[];
+}; \ No newline at end of file
diff --git a/src/plugins/tracedump/tracedump.c b/src/plugins/tracedump/tracedump.c
index f1073fe247d..6a26865c1f0 100644
--- a/src/plugins/tracedump/tracedump.c
+++ b/src/plugins/tracedump/tracedump.c
@@ -213,12 +213,15 @@ vl_api_trace_dump_t_handler (vl_api_trace_dump_t * mp)
iterator_position = clib_net_to_host_u32 (mp->position);
max_records = clib_net_to_host_u32 (mp->max_records);
- /* Don't overflow the existing queue space. */
- svm_queue_t *q = rp->vl_input_queue;
- u32 queue_slots_available = q->maxsize - q->cursize;
- int chunk = (queue_slots_available > 0) ? queue_slots_available - 1 : 0;
- if (chunk < max_records)
- max_records = chunk;
+ /* Don't overflow the existing queue space for shared memory API clients. */
+ if (rp->vl_input_queue)
+ {
+ svm_queue_t *q = rp->vl_input_queue;
+ u32 queue_slots_available = q->maxsize - q->cursize;
+ int chunk = (queue_slots_available > 0) ? queue_slots_available - 1 : 0;
+ if (chunk < max_records)
+ max_records = chunk;
+ }
/* Need a fresh cache for this client? */
if (vec_len (client_trace_cache) == 0
@@ -285,9 +288,9 @@ vl_api_trace_dump_t_handler (vl_api_trace_dump_t * mp)
{
/* More threads, but not more in this thread? */
if (j == (vec_len (client_trace_cache[i]) - 1))
- dmp->more_threads = 1;
+ last_more_threads = dmp->more_threads = 1;
else
- dmp->more_this_thread = 1;
+ last_more_this_thread = dmp->more_this_thread = 1;
}
/* Done, may or may not be at the end of a batch. */
dmp->done = 0;
@@ -332,6 +335,199 @@ doublebreak:;
vec_free (s);
}
+/* API message handler */
+static void
+vl_api_trace_v2_dump_t_handler (vl_api_trace_v2_dump_t *mp)
+{
+ vl_api_registration_t *rp;
+ vl_api_trace_v2_details_t *dmp;
+ tracedump_main_t *tdmp = &tracedump_main;
+ vlib_trace_header_t ***client_trace_cache, **th;
+ int i, j;
+ u32 client_index;
+ u32 first_position, max, first_thread_id, last_thread_id;
+ u32 n_threads = vlib_get_n_threads ();
+ u8 *s = 0;
+
+ rp = vl_api_client_index_to_registration (mp->client_index);
+ if (rp == 0)
+ return;
+
+ client_index = rp->vl_api_registration_pool_index;
+
+ vec_validate_init_empty (tdmp->traces, client_index, 0);
+
+ client_trace_cache = tdmp->traces[client_index];
+
+ if (mp->clear_cache)
+ {
+ toss_client_cache (tdmp, client_index, client_trace_cache);
+ client_trace_cache = 0;
+ }
+
+ /* Now, where were we? */
+ first_thread_id = last_thread_id = clib_net_to_host_u32 (mp->thread_id);
+ first_position = clib_net_to_host_u32 (mp->position);
+ max = clib_net_to_host_u32 (mp->max);
+
+ if (first_thread_id == ~0)
+ {
+ first_thread_id = 0;
+ last_thread_id = n_threads - 1;
+ }
+
+ /* Don't overflow the existing queue space for shared memory API clients. */
+ if (rp->vl_input_queue)
+ {
+ svm_queue_t *q = rp->vl_input_queue;
+ u32 queue_slots_available = q->maxsize - q->cursize;
+ int chunk = (queue_slots_available > 0) ? queue_slots_available - 1 : 0;
+ /* split available slots among requested threads */
+ if (chunk < max * (last_thread_id - first_thread_id + 1))
+ max = chunk / (last_thread_id - first_thread_id + 1);
+ }
+
+ /* Need a fresh cache for this client? */
+ if (vec_len (client_trace_cache) == 0 && first_position != ~0)
+ {
+ vlib_worker_thread_barrier_sync (vlib_get_first_main ());
+
+ /* Make a slot for each worker thread */
+ vec_validate (client_trace_cache, n_threads - 1);
+ i = 0;
+
+ foreach_vlib_main ()
+ {
+ vlib_trace_main_t *tm = &this_vlib_main->trace_main;
+
+ /* Filter as directed */
+ trace_apply_filter (this_vlib_main);
+
+ pool_foreach (th, tm->trace_buffer_pool)
+ {
+ vec_add1 (client_trace_cache[i], th[0]);
+ }
+
+ /* Sort them by increasing time. */
+ if (vec_len (client_trace_cache[i]))
+ vec_sort_with_function (client_trace_cache[i], trace_cmp);
+
+ i++;
+ }
+ vlib_worker_thread_barrier_release (vlib_get_first_main ());
+ }
+
+ /* Save the cache, one way or the other */
+ tdmp->traces[client_index] = client_trace_cache;
+
+ for (i = first_thread_id;
+ i <= last_thread_id && i < vec_len (client_trace_cache); i++)
+ {
+ // dump a number of 'max' packets per thead
+ for (j = first_position;
+ j < vec_len (client_trace_cache[i]) && j < first_position + max;
+ j++)
+ {
+ th = &client_trace_cache[i][j];
+
+ vec_reset_length (s);
+
+ s =
+ format (s, "%U", format_vlib_trace, vlib_get_first_main (), th[0]);
+
+ dmp = vl_msg_api_alloc (sizeof (*dmp) + vec_len (s));
+ dmp->_vl_msg_id =
+ htons (VL_API_TRACE_V2_DETAILS + (tdmp->msg_id_base));
+ dmp->context = mp->context;
+ dmp->thread_id = ntohl (i);
+ dmp->position = ntohl (j);
+ dmp->more = j < vec_len (client_trace_cache[i]) - 1;
+ vl_api_vec_to_api_string (s, &dmp->trace_data);
+
+ vl_api_send_msg (rp, (u8 *) dmp);
+ }
+ }
+
+ vec_free (s);
+}
+
+static void
+vl_api_trace_clear_cache_t_handler (vl_api_trace_clear_cache_t *mp)
+{
+ vl_api_registration_t *rp;
+ tracedump_main_t *tdmp = &tracedump_main;
+ vlib_trace_header_t ***client_trace_cache;
+ vl_api_trace_clear_cache_reply_t *rmp;
+ u32 client_index;
+
+ rp = vl_api_client_index_to_registration (mp->client_index);
+ if (rp == 0)
+ return;
+
+ client_index = rp->vl_api_registration_pool_index;
+ vec_validate_init_empty (tdmp->traces, client_index, 0);
+ client_trace_cache = tdmp->traces[client_index];
+ toss_client_cache (tdmp, client_index, client_trace_cache);
+
+ int rv = 0;
+ REPLY_MACRO (VL_API_TRACE_CLEAR_CACHE_REPLY);
+}
+
+static void
+vl_api_trace_set_filter_function_t_handler (
+ vl_api_trace_set_filter_function_t *mp)
+{
+ vl_api_trace_set_filter_function_reply_t *rmp;
+ tracedump_main_t *tdmp = &tracedump_main;
+ unformat_input_t input = { 0 };
+ vlib_is_packet_traced_fn_t *f;
+ char *filter_name;
+ int rv = 0;
+ filter_name = vl_api_from_api_to_new_c_string (&mp->filter_function_name);
+ unformat_init_cstring (&input, filter_name);
+ if (unformat (&input, "%U", unformat_vlib_trace_filter_function, &f) == 0)
+ {
+ rv = -1;
+ goto done;
+ }
+ vlib_set_trace_filter_function (f);
+done:
+ unformat_free (&input);
+ vec_free (filter_name);
+ REPLY_MACRO (VL_API_TRACE_SET_FILTER_FUNCTION_REPLY);
+}
+
+static void
+vl_api_trace_filter_function_dump_t_handler (
+ vl_api_trace_filter_function_dump_t *mp)
+{
+ vl_api_registration_t *rp;
+ vl_api_trace_filter_function_details_t *dmp;
+ tracedump_main_t *tdmp = &tracedump_main;
+ vlib_trace_filter_main_t *tfm = &vlib_trace_filter_main;
+ vlib_trace_filter_function_registration_t *reg =
+ tfm->trace_filter_registration;
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_is_packet_traced_fn_t *current =
+ vm->trace_main.current_trace_filter_function;
+ rp = vl_api_client_index_to_registration (mp->client_index);
+
+ if (rp == 0)
+ return;
+
+ while (reg)
+ {
+ dmp = vl_msg_api_alloc (sizeof (*dmp) + strlen (reg->name));
+ dmp->_vl_msg_id =
+ htons (VL_API_TRACE_FILTER_FUNCTION_DETAILS + (tdmp->msg_id_base));
+ dmp->context = mp->context;
+ vl_api_c_string_to_api_string (reg->name, &dmp->name);
+ dmp->selected = current == reg->function;
+ vl_api_send_msg (rp, (u8 *) dmp);
+ reg = reg->next;
+ }
+}
+
/* API definitions */
#include <tracedump/tracedump.api.c>
@@ -349,19 +545,18 @@ tracedump_init (vlib_main_t * vm)
/* Add our API messages to the global name_crc hash table */
tdmp->msg_id_base = setup_message_id_table ();
- am->is_mp_safe[tdmp->msg_id_base + VL_API_TRACE_DUMP] = 1;
+ vl_api_set_msg_thread_safe (am, tdmp->msg_id_base + VL_API_TRACE_DUMP, 1);
+ vl_api_set_msg_thread_safe (am, tdmp->msg_id_base + VL_API_TRACE_V2_DUMP, 1);
return error;
}
VLIB_INIT_FUNCTION (tracedump_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Streaming packet trace dump plugin",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/tracedump/tracedump_test.c b/src/plugins/tracedump/tracedump_test.c
index 3bf50efb4ac..b813acc3ecc 100644
--- a/src/plugins/tracedump/tracedump_test.c
+++ b/src/plugins/tracedump/tracedump_test.c
@@ -155,6 +155,18 @@ vl_api_trace_details_t_handler (vl_api_trace_details_t * dmp)
packet_number, vl_api_format_string, (&dmp->trace_data));
}
+static void
+vl_api_trace_v2_details_t_handler (vl_api_trace_v2_details_t *dmp)
+{
+ u32 thread_id, position;
+
+ thread_id = clib_net_to_host_u32 (dmp->thread_id);
+ position = clib_net_to_host_u32 (dmp->position);
+ fformat (stdout, "thread %d position %d more %d", thread_id, position,
+ dmp->more);
+ fformat (stdout, "Packet %d\n%U\n\n", position, vl_api_format_string,
+ (&dmp->trace_data));
+}
static void
vl_api_trace_dump_reply_t_handler (vl_api_trace_dump_reply_t * rmp)
@@ -203,7 +215,7 @@ vl_api_trace_dump_reply_t_handler (vl_api_trace_dump_reply_t * rmp)
}
static int
-api_trace_dump (vat_main_t * vam)
+api_trace_dump (vat_main_t *vam)
{
vl_api_trace_dump_t *mp;
int ret;
@@ -220,8 +232,26 @@ api_trace_dump (vat_main_t * vam)
return ret;
}
+static int
+api_trace_v2_dump (vat_main_t *vam)
+{
+ vl_api_trace_v2_dump_t *mp;
+ int ret;
+
+ M (TRACE_V2_DUMP, mp);
+ mp->clear_cache = 1;
+ mp->thread_id = ~0;
+ mp->position = 0;
+ mp->max = clib_host_to_net_u32 (10);
+
+ S (mp);
+
+ W (ret);
+ return ret;
+}
+
int
-api_trace_clear_capture (vat_main_t * vam)
+api_trace_clear_capture (vat_main_t *vam)
{
vl_api_trace_clear_capture_t *mp;
int ret;
@@ -232,26 +262,75 @@ api_trace_clear_capture (vat_main_t * vam)
return ret;
}
+static int
+api_trace_clear_cache (vat_main_t *vam)
+{
+ vl_api_trace_clear_capture_t *mp;
+ int ret;
+
+ M (TRACE_CLEAR_CACHE, mp);
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_trace_set_filter_function (vat_main_t *vam)
+{
+ vl_api_trace_set_filter_function_t *mp;
+ int ret;
+ M (TRACE_SET_FILTER_FUNCTION, mp);
+ S (mp);
+ W (ret);
+ return ret;
+}
+static int
+api_trace_filter_function_dump (vat_main_t *vam)
+{
+ vl_api_trace_filter_function_dump_t *mp;
+ int ret;
+
+ M (TRACE_FILTER_FUNCTION_DUMP, mp);
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_trace_filter_function_details_t_handler (
+ vl_api_trace_filter_function_details_t *dmp)
+{
+ fformat (stdout, "name: %U, selected: %u\n\n", vl_api_format_string,
+ &dmp->name, dmp->selected);
+}
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define vl_endianfun
#include <tracedump/tracedump.api.h>
#undef vl_endianfun
#define vl_printfun
#include <tracedump/tracedump.api.h>
#undef vl_printfun
+#define vl_calcsizefun
+#include <tracedump/tracedump.api.h>
+#undef vl_calcsizefun
void
manual_setup_message_id_table (vat_main_t * vam)
{
- vl_msg_api_set_handlers (VL_API_TRACE_DETAILS
- + tracedump_test_main.msg_id_base, "trace_details",
- vl_api_trace_details_t_handler, vl_noop_handler,
- vl_api_trace_details_t_endian,
- vl_api_trace_details_t_print,
- sizeof (vl_api_trace_details_t), 1);
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){
+ .id = VL_API_TRACE_DETAILS + tracedump_test_main.msg_id_base,
+ .name = "trace_details",
+ .handler = vl_api_trace_details_t_handler,
+ .endian = vl_api_trace_details_t_endian,
+ .format_fn = vl_api_trace_details_t_format,
+ .size = sizeof (vl_api_trace_details_t),
+ .traced = 1,
+ .tojson = vl_api_trace_details_t_tojson,
+ .fromjson = vl_api_trace_details_t_fromjson,
+ .calc_size = vl_api_trace_details_t_calc_size,
+ });
}
#define VL_API_LOCAL_SETUP_MESSAGE_ID_TABLE manual_setup_message_id_table
diff --git a/src/plugins/tracenode/CMakeLists.txt b/src/plugins/tracenode/CMakeLists.txt
new file mode 100644
index 00000000000..6b6ba2e9865
--- /dev/null
+++ b/src/plugins/tracenode/CMakeLists.txt
@@ -0,0 +1,37 @@
+
+# Copyright (c) 2023 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(tracenode
+ SOURCES
+ node.c
+ api.c
+ cli.c
+ plugin.c
+ tracenode.c
+
+ MULTIARCH_SOURCES
+ node.c
+
+ API_FILES
+ tracenode.api
+
+ INSTALL_HEADERS
+ tracenode.h
+
+ API_TEST_SOURCES
+ test.c
+
+ COMPONENT
+ vpp-plugin-devtools
+)
diff --git a/src/plugins/tracenode/FEATURE.yaml b/src/plugins/tracenode/FEATURE.yaml
new file mode 100644
index 00000000000..c405dd11d59
--- /dev/null
+++ b/src/plugins/tracenode/FEATURE.yaml
@@ -0,0 +1,8 @@
+---
+name: Trace node
+maintainer: Maxime Peim <mpeim@cisco.com>
+features:
+ - allow trace filtering on encapsulated (inner) packets
+description: "Allow tracing on IP feature arc. Encapsulated packets can then be traced and filtered."
+state: experimental
+properties: [CLI, API]
diff --git a/src/plugins/tracenode/api.c b/src/plugins/tracenode/api.c
new file mode 100644
index 00000000000..0b01ad8b9f5
--- /dev/null
+++ b/src/plugins/tracenode/api.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <tracenode/tracenode.h>
+#include <vlibmemory/api.h>
+
+/* define message IDs */
+#include <tracenode/tracenode.api_enum.h>
+#include <tracenode/tracenode.api_types.h>
+
+#define REPLY_MSG_ID_BASE (tnm->msg_id_base)
+#include <vlibapi/api_helper_macros.h>
+
+static void
+vl_api_tracenode_enable_disable_t_handler (
+ vl_api_tracenode_enable_disable_t *mp)
+{
+ tracenode_main_t *tnm = &tracenode_main;
+ vl_api_tracenode_enable_disable_reply_t *rmp;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = tracenode_feature_enable_disable (ntohl (mp->sw_if_index), mp->is_pcap,
+ mp->enable);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_TRACENODE_ENABLE_DISABLE_REPLY);
+}
+
+#include <tracenode/tracenode.api.c>
+
+clib_error_t *
+tracenode_plugin_api_hookup (vlib_main_t *vm)
+{
+ tracenode_main_t *tnm = &tracenode_main;
+
+ /* ask for a correctly-sized block of API message decode slots */
+ tnm->msg_id_base = setup_message_id_table ();
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */ \ No newline at end of file
diff --git a/src/plugins/tracenode/cli.c b/src/plugins/tracenode/cli.c
new file mode 100644
index 00000000000..8d0ed4176d6
--- /dev/null
+++ b/src/plugins/tracenode/cli.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <tracenode/tracenode.h>
+
+static clib_error_t *
+tracenode_feature_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 sw_if_index = ~0;
+ int enable = 1, is_pcap = 0;
+ int rv;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "disable"))
+ enable = 0;
+ else if (unformat (line_input, "pcap"))
+ is_pcap = 1;
+ else if (unformat (line_input, "%U", unformat_vnet_sw_interface,
+ vnet_get_main (), &sw_if_index))
+ {
+ if (sw_if_index == 0)
+ return clib_error_return (0, "Local interface not supported...");
+ }
+
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ return clib_error_return (0, "Software interface required");
+
+ if ((rv = tracenode_feature_enable_disable (sw_if_index, is_pcap, enable)) !=
+ 0)
+ return clib_error_return (
+ 0, "vnet_enable_disable_tracenode_feature returned %d", rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (tracenode_feature, static) = {
+ .path = "tracenode feature",
+ .short_help = "tracenode feature <intfc> [disable] [pcap]",
+ .function = tracenode_feature_cmd_fn,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/tracenode/node.c b/src/plugins/tracenode/node.c
new file mode 100644
index 00000000000..444d93f1708
--- /dev/null
+++ b/src/plugins/tracenode/node.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/feature/feature.h>
+#include <vnet/classify/pcap_classify.h>
+
+typedef struct
+{
+ u32 sw_if_index;
+} tracenode_trace_t;
+
+static u8 *
+format_tracenode_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+ tracenode_trace_t *t = va_arg (*args, tracenode_trace_t *);
+
+ s = format (s, "Packet traced from interface %U added",
+ format_vnet_sw_if_index_name, vnm, t->sw_if_index);
+ return s;
+}
+
+static_always_inline u32
+tracenode_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, int is_pcap)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_pcap_t *pp = &vnm->pcap;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
+ u32 *from = vlib_frame_vector_args (frame), *from0 = from;
+ const u32 n_tot = frame->n_vectors;
+ u32 n_left = n_tot;
+
+ vlib_get_buffers (vm, from, b, n_tot);
+
+ while (n_left > 0)
+ {
+ /* TODO: dual/quad loop */
+
+ /* enqueue b0 to the current next frame */
+ vnet_feature_next_u16 (next, b[0]);
+
+ /* buffer already traced */
+ if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+ goto skip;
+
+ if (is_pcap && vnet_is_packet_pcaped (pp, b[0], ~0))
+ {
+ pcap_add_buffer (&pp->pcap_main, vm, from0[0],
+ pp->max_bytes_per_pkt);
+ }
+ else if (!is_pcap && vlib_trace_buffer (vm, node, next[0], b[0],
+ 1 /* follow_chain */))
+ {
+ tracenode_trace_t *tr = vlib_add_trace (vm, node, b[0], sizeof *tr);
+ tr->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
+ }
+
+ skip:
+ b++;
+ from0++;
+ next++;
+ n_left--;
+ }
+
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, n_tot);
+ return n_tot;
+}
+
+VLIB_NODE_FN (trace_filtering_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return tracenode_inline (vm, node, frame, 0 /* is_pcap */);
+}
+
+VLIB_NODE_FN (pcap_filtering_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return tracenode_inline (vm, node, frame, 1 /* is_pcap */);
+}
+
+VLIB_REGISTER_NODE (trace_filtering_node) = {
+ .name = "trace-filtering",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .format_trace = format_tracenode_trace,
+};
+
+VLIB_REGISTER_NODE (pcap_filtering_node) = {
+ .name = "pcap-filtering",
+ .vector_size = sizeof (u32),
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .format_trace = format_tracenode_trace,
+};
+
+VNET_FEATURE_INIT (trace_filtering4, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "trace-filtering",
+ .runs_after = VNET_FEATURES ("ip4-full-reassembly-feature",
+ "ip4-sv-reassembly-feature"),
+};
+
+VNET_FEATURE_INIT (trace_filtering6, static) = {
+ .arc_name = "ip6-unicast",
+ .node_name = "trace-filtering",
+ .runs_after = VNET_FEATURES ("ip6-full-reassembly-feature",
+ "ip6-sv-reassembly-feature"),
+};
+
+VNET_FEATURE_INIT (pcap_filtering4, static) = {
+ .arc_name = "ip4-unicast",
+ .node_name = "pcap-filtering",
+ .runs_after = VNET_FEATURES ("ip4-full-reassembly-feature",
+ "ip4-sv-reassembly-feature"),
+};
+
+VNET_FEATURE_INIT (pcap_filtering6, static) = {
+ .arc_name = "ip6-unicast",
+ .node_name = "pcap-filtering",
+ .runs_after = VNET_FEATURES ("ip6-full-reassembly-feature",
+ "ip6-sv-reassembly-feature"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/gbp/gbp_types.h b/src/plugins/tracenode/plugin.c
index ac983b1cdd2..19ce6ba5610 100644
--- a/src/plugins/gbp/gbp_types.h
+++ b/src/plugins/tracenode/plugin.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
+ * Copyright (c) 2023 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -13,19 +13,14 @@
* limitations under the License.
*/
-#ifndef __GBP_TYPES_H__
-#define __GBP_TYPES_H__
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
-#include <vnet/vnet.h>
-
-typedef u32 vnid_t;
-#define VNID_INVALID ((u16)~0)
-
-typedef u16 gbp_scope_t;
-typedef u16 sclass_t;
-#define SCLASS_INVALID ((u16)~0)
-
-#endif
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Tracing packet node",
+};
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/tracenode/test.c b/src/plugins/tracenode/test.c
new file mode 100644
index 00000000000..a409fd2a59a
--- /dev/null
+++ b/src/plugins/tracenode/test.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vppinfra/error.h>
+#include <vnet/api_errno.h>
+#include <stdbool.h>
+
+#define __plugin_msg_base tracenode_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/* Declare message IDs */
+#include <tracenode/tracenode.api_enum.h>
+#include <tracenode/tracenode.api_types.h>
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} tracenode_test_main_t;
+
+tracenode_test_main_t tracenode_test_main;
+
+int
+api_tracenode_enable_disable (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_tracenode_enable_disable_t *mp;
+ u32 sw_if_index;
+ bool is_pcap, enable;
+
+ sw_if_index = ~0;
+ is_pcap = false;
+ enable = true;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "disable"))
+ enable = 0;
+ else if (unformat (i, "pcap"))
+ is_pcap = 1;
+ else if (unformat (i, "%U", unformat_vnet_sw_interface, vnet_get_main (),
+ &sw_if_index))
+ {
+ if (sw_if_index == 0)
+ {
+ clib_warning ("Local interface not supported...");
+ return -99;
+ }
+ }
+
+ else
+ {
+ clib_warning ("Unknown input: %U\n", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (TRACENODE_ENABLE_DISABLE, mp);
+ mp->sw_if_index = htonl (sw_if_index);
+ mp->is_pcap = is_pcap;
+ mp->enable = enable;
+
+ int ret = 0;
+ S (mp);
+ W (ret);
+
+ return ret;
+}
+
+#include <tracenode/tracenode.api_test.c>
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/tracenode/tracenode.api b/src/plugins/tracenode/tracenode.api
new file mode 100644
index 00000000000..198f8218b55
--- /dev/null
+++ b/src/plugins/tracenode/tracenode.api
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option version = "0.1.0";
+
+import "vnet/interface_types.api";
+
+/** \brief Enable/disable trace filtering feature
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface on which to enable/disable trace filtering feature
+ @param is_pcap - if non-zero enable the feature for pcap capture, else for trace
+ @param enable - if non-zero then enable the feature, else disable it
+*/
+autoreply define tracenode_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ bool is_pcap [default=false];
+ bool enable [default=true];
+
+ option vat_help = "tracenode_enable_disable <intfc> [disable] [pcap]";
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/tracenode/tracenode.c b/src/plugins/tracenode/tracenode.c
new file mode 100644
index 00000000000..e292c7da95c
--- /dev/null
+++ b/src/plugins/tracenode/tracenode.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <tracenode/tracenode.h>
+
+tracenode_main_t tracenode_main;
+
+int
+tracenode_feature_enable_disable (u32 sw_if_index, bool is_pcap, bool enable)
+{
+ tracenode_main_t *tnm = &tracenode_main;
+ char *node_name = is_pcap ? "pcap-filtering" : "trace-filtering";
+ int rv = 0;
+
+ if (pool_is_free_index (tnm->vnet_main->interface_main.sw_interfaces,
+ sw_if_index))
+ return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+
+ if (clib_bitmap_get (tnm->feature_enabled_by_sw_if, sw_if_index) == enable)
+ return 0;
+
+ if ((rv = vnet_feature_enable_disable ("ip4-unicast", node_name, sw_if_index,
+ enable, 0, 0)) != 0)
+ return rv;
+
+ if ((rv = vnet_feature_enable_disable ("ip6-unicast", node_name, sw_if_index,
+ enable, 0, 0)) != 0)
+ return rv;
+
+ tnm->feature_enabled_by_sw_if =
+ clib_bitmap_set (tnm->feature_enabled_by_sw_if, sw_if_index, enable);
+
+ return 0;
+}
+
+static clib_error_t *
+tracenode_init (vlib_main_t *vm)
+{
+ tracenode_main_t *tnm = &tracenode_main;
+ clib_error_t *error = 0;
+
+ memset (tnm, 0, sizeof (*tnm));
+
+ tnm->vnet_main = vnet_get_main ();
+
+ error = tracenode_plugin_api_hookup (vm);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (tracenode_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/tracenode/tracenode.h b/src/plugins/tracenode/tracenode.h
new file mode 100644
index 00000000000..7af60aa20b1
--- /dev/null
+++ b/src/plugins/tracenode/tracenode.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef _TRACENODE_H_
+#define _TRACENODE_H_
+#include <vlib/vlib.h>
+#include <vnet/feature/feature.h>
+#include <stdbool.h>
+
+typedef struct
+{
+ vnet_main_t *vnet_main;
+ uword *feature_enabled_by_sw_if;
+ u16 msg_id_base;
+} tracenode_main_t;
+
+extern tracenode_main_t tracenode_main;
+
+clib_error_t *tracenode_plugin_api_hookup (vlib_main_t *vm);
+
+int tracenode_feature_enable_disable (u32 sw_if_index, bool is_pcap,
+ bool enable);
+
+#endif /* _TRACENODE_H_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/unittest/CMakeLists.txt b/src/plugins/unittest/CMakeLists.txt
index 115ced3393b..0382841379c 100644
--- a/src/plugins/unittest/CMakeLists.txt
+++ b/src/plugins/unittest/CMakeLists.txt
@@ -15,9 +15,10 @@ set(chacha20_poly1305)
if (OPENSSL_VERSION VERSION_GREATER_EQUAL 1.1.0)
set(chacha20_poly1305 crypto/chacha20_poly1305.c)
endif()
-
+include_directories(${CMAKE_SOURCE_DIR}/vpp-api ${CMAKE_CURRENT_BINARY_DIR}/../../vpp-api)
add_vpp_plugin(unittest
SOURCES
+ api_test.c
api_fuzz_test.c
bier_test.c
bihash_test.c
@@ -25,6 +26,7 @@ add_vpp_plugin(unittest
crypto/aes_cbc.c
crypto/aes_ctr.c
crypto/aes_gcm.c
+ crypto/aes_gmac.c
${chacha20_poly1305}
crypto/rfc2202_hmac_md5.c
crypto/rfc2202_hmac_sha1.c
@@ -32,8 +34,11 @@ add_vpp_plugin(unittest
crypto/sha.c
crypto_test.c
fib_test.c
+ gso_test.c
+ hash_test.c
interface_test.c
ipsec_test.c
+ ip_psh_cksum_test.c
llist_test.c
mactime_test.c
mem_bulk_test.c
@@ -54,4 +59,8 @@ add_vpp_plugin(unittest
util_test.c
vlib_test.c
counter_test.c
+
+ COMPONENT
+ vpp-plugin-devtools
+ LINK_LIBRARIES vapiclient
)
diff --git a/src/plugins/unittest/api_fuzz_test.c b/src/plugins/unittest/api_fuzz_test.c
index 113835300bb..121c52a310b 100644
--- a/src/plugins/unittest/api_fuzz_test.c
+++ b/src/plugins/unittest/api_fuzz_test.c
@@ -27,7 +27,7 @@ static u32 fuzz_seed = 0xdeaddabe;
static u16 fuzz_first;
static u16 fuzz_cli_first, fuzz_cli_last;
-extern void (*vl_msg_api_fuzz_hook) (u16, void *);
+extern void (*vl_mem_api_fuzz_hook) (u16, void *);
static void
fuzz_hook (u16 id, void *the_msg)
@@ -114,10 +114,10 @@ test_api_fuzz_command_fn (vlib_main_t * vm,
if (fuzz_first == 0xFFFF)
{
- vl_msg_api_fuzz_hook = 0;
+ vl_mem_api_fuzz_hook = 0;
return clib_error_return (0, "fuzz_first is ~0, fuzzing disabled");
}
- vl_msg_api_fuzz_hook = fuzz_hook;
+ vl_mem_api_fuzz_hook = fuzz_hook;
vlib_cli_output (vm, "Fuzzing enabled: first %d, skip cli range %d - %d",
(u32) fuzz_first, (u32) fuzz_cli_first,
@@ -126,14 +126,12 @@ test_api_fuzz_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_api_fuzz, static) = {
.path = "test api fuzz",
.short_help = "test api fuzz [disable][seed nnn]\n"
" [fuzz-first nn][fuzz-cli-first nn][fuzz-cli-last nn]",
.function = test_api_fuzz_command_fn,
};
-/* *INDENT-ON* */
static u8 main_loop_enter_enable_api_fuzz;
@@ -172,7 +170,7 @@ api_fuzz_api_init (vlib_main_t * vm)
(0, "Couldn't find 'memclnt_keepalive_reply' ID");
}
/* Turn on fuzzing */
- vl_msg_api_fuzz_hook = fuzz_hook;
+ vl_mem_api_fuzz_hook = fuzz_hook;
return 0;
}
diff --git a/src/plugins/unittest/api_test.c b/src/plugins/unittest/api_test.c
new file mode 100644
index 00000000000..515bafefa36
--- /dev/null
+++ b/src/plugins/unittest/api_test.c
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vpp/app/version.h>
+#include <stdbool.h>
+#include <vapi/vapi.h>
+
+#include <vapi/memclnt.api.vapi.h>
+#include <vapi/vlib.api.vapi.h>
+#include <vapi/vpe.api.vapi.h>
+
+/*
+ * Example of how to call the VPP binary API from an internal API client.
+ * Using the VAPI C language binding.
+ */
+
+DEFINE_VAPI_MSG_IDS_VPE_API_JSON;
+
+/*
+ * Connect an VPP binary API client to VPP API
+ */
+static vapi_ctx_t
+connect_to_vpp (void)
+{
+ vapi_ctx_t ctx;
+ if (vapi_ctx_alloc (&ctx) != VAPI_OK)
+ {
+ clib_warning ("ctx_alloc failed");
+ return 0;
+ }
+ if (vapi_connect_from_vpp (ctx, "apifromplugin", 64, 32, VAPI_MODE_BLOCKING,
+ true) != VAPI_OK)
+ {
+ clib_warning ("vapi_connect failed");
+ vapi_ctx_free (ctx);
+ return 0;
+ }
+ return ctx;
+}
+
+/*
+ * Gets called when the show_version_reply message is received
+ */
+vapi_error_e
+show_version_cb (vapi_ctx_t ctx, void *caller_ctx, vapi_error_e rv,
+ bool is_last, vapi_payload_show_version_reply *p)
+{
+ if (rv != VAPI_OK)
+ clib_warning ("Return value: %d", rv);
+ fformat (
+ stdout,
+ "show_version_reply: program: `%s', version: `%s', build directory: "
+ "`%s', build date: `%s'\n",
+ p->program, p->version, p->build_directory, p->build_date);
+ return VAPI_OK;
+}
+
+static void *
+api_show_version_blocking_fn (void *args)
+{
+ vapi_ctx_t ctx;
+
+ if ((ctx = connect_to_vpp ()) == 0)
+ return clib_error_return (0, "API connection failed");
+
+ int called;
+ vapi_msg_show_version *sv = vapi_alloc_show_version (ctx);
+ vapi_error_e vapi_rv = vapi_show_version (ctx, sv, show_version_cb, &called);
+ if (vapi_rv != VAPI_OK)
+ clib_warning ("call failed");
+
+ vapi_disconnect_from_vpp (ctx);
+ vapi_ctx_free (ctx);
+
+ return 0;
+}
+
+static clib_error_t *
+test_api_test_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ /* Run call in a pthread */
+ pthread_t thread;
+ int rv = pthread_create (&thread, NULL, api_show_version_blocking_fn, 0);
+ if (rv)
+ {
+ return clib_error_return (0, "API call failed");
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (test_api_command, static) = {
+ .path = "test api internal",
+ .short_help = "test internal api client",
+ .function = test_api_test_command_fn,
+};
diff --git a/src/plugins/unittest/bier_test.c b/src/plugins/unittest/bier_test.c
index 9b4ba67e7cd..924c9212929 100644
--- a/src/plugins/unittest/bier_test.c
+++ b/src/plugins/unittest/bier_test.c
@@ -118,30 +118,29 @@ bier_test_mk_intf (u32 ninterfaces)
for (i = 0; i < ninterfaces; i++)
{
- hw_address[5] = i;
-
- error = ethernet_register_interface(vnet_get_main(),
- test_interface_device_class.index,
- i /* instance */,
- hw_address,
- &tm->hw_if_indicies[i],
- /* flag change */ 0);
-
- error = vnet_hw_interface_set_flags(vnet_get_main(),
- tm->hw_if_indicies[i],
- VNET_HW_INTERFACE_FLAG_LINK_UP);
- BIER_TEST((NULL == error), "ADD interface %d", i);
-
- tm->hw[i] = vnet_get_hw_interface(vnet_get_main(),
- tm->hw_if_indicies[i]);
- vec_validate (ip4_main.fib_index_by_sw_if_index, tm->hw[i]->sw_if_index);
- vec_validate (ip6_main.fib_index_by_sw_if_index, tm->hw[i]->sw_if_index);
- ip4_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
- ip6_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
- error = vnet_sw_interface_set_flags(vnet_get_main(),
- tm->hw[i]->sw_if_index,
- VNET_SW_INTERFACE_FLAG_ADMIN_UP);
- BIER_TEST((NULL == error), "UP interface %d", i);
+ vnet_eth_interface_registration_t eir = {};
+ vnet_main_t *vnm = vnet_get_main ();
+
+ hw_address[5] = i;
+
+ eir.dev_class_index = test_interface_device_class.index;
+ eir.dev_instance = i;
+ eir.address = hw_address;
+ tm->hw_if_indicies[i] = vnet_eth_register_interface (vnm, &eir);
+
+ error =
+ vnet_hw_interface_set_flags (vnet_get_main (), tm->hw_if_indicies[i],
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ BIER_TEST ((NULL == error), "ADD interface %d", i);
+
+ tm->hw[i] =
+ vnet_get_hw_interface (vnet_get_main (), tm->hw_if_indicies[i]);
+ ip4_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+ ip6_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+ error =
+ vnet_sw_interface_set_flags (vnet_get_main (), tm->hw[i]->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ BIER_TEST ((NULL == error), "UP interface %d", i);
}
/*
* re-eval after the inevitable realloc
@@ -770,10 +769,9 @@ bier_test_mpls_imp (void)
.frp_flags = FIB_ROUTE_PATH_BIER_IMP,
.frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
};
- mfib_table_entry_path_update(0, // default table
- &pfx_1_1_1_1_c_239_1_1_1 ,
- MFIB_SOURCE_API,
- &path_via_bier_imp_1);
+ mfib_table_entry_path_update (0, // default table
+ &pfx_1_1_1_1_c_239_1_1_1, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_bier_imp_1);
mfib_table_entry_delete(0,
&pfx_1_1_1_1_c_239_1_1_1 ,
MFIB_SOURCE_API);
diff --git a/src/plugins/unittest/bihash_test.c b/src/plugins/unittest/bihash_test.c
index c5cc752ade0..1d3aeeca23d 100644
--- a/src/plugins/unittest/bihash_test.c
+++ b/src/plugins/unittest/bihash_test.c
@@ -207,16 +207,16 @@ test_bihash_threads (bihash_test_main_t * tm)
tm->thread_barrier = 1;
/* Start the worker threads */
+ tm->threads_running = 0;
for (i = 0; i < tm->nthreads; i++)
{
rv = pthread_create (&handle, NULL, test_bihash_thread_fn,
(void *) (uword) i);
if (rv)
- {
- clib_unix_warning ("pthread_create returned %d", rv);
- }
+ clib_unix_warning ("pthread_create returned %d", rv);
+ else
+ tm->threads_running++;
}
- tm->threads_running = i;
tm->sequence_number = 0;
CLIB_MEMORY_BARRIER ();
@@ -338,14 +338,18 @@ test_bihash (bihash_test_main_t * tm)
{
kv.key = tm->keys[i];
if (BV (clib_bihash_search) (h, &kv, &kv) < 0)
- if (BV (clib_bihash_search) (h, &kv, &kv) < 0)
- clib_warning
- ("[%d] search for key %lld failed unexpectedly\n", i,
- tm->keys[i]);
+ {
+ if (BV (clib_bihash_search) (h, &kv, &kv) < 0)
+ {
+ return clib_error_return (
+ 0, "[%d] search for key %lld failed unexpectedly\n", i,
+ tm->keys[i]);
+ }
+ }
if (kv.value != (u64) (i + 1))
- clib_warning
- ("[%d] search for key %lld returned %lld, not %lld\n", i,
- tm->keys, kv.value, (u64) (i + 1));
+ return clib_error_return (
+ 0, "[%d] search for key %lld returned %lld, not %lld\n", i,
+ tm->keys, kv.value, (u64) (i + 1));
}
}
@@ -373,7 +377,8 @@ test_bihash (bihash_test_main_t * tm)
{
p = hash_get (tm->key_hash, tm->keys[i]);
if (p == 0 || p[0] != (uword) (i + 1))
- clib_warning ("ugh, couldn't find %lld\n", tm->keys[i]);
+ return clib_error_return (0, "ugh, couldn't find %lld\n",
+ tm->keys[i]);
}
}
@@ -401,8 +406,8 @@ test_bihash (bihash_test_main_t * tm)
rv = BV (clib_bihash_add_del) (h, &kv, 0 /* is_add */ );
if (rv < 0)
- clib_warning ("delete key %lld not ok but should be",
- tm->keys[i]);
+ return clib_error_return (
+ 0, "delete key %lld not ok but should be", tm->keys[i]);
if (tm->careful_delete_tests)
{
@@ -412,14 +417,14 @@ test_bihash (bihash_test_main_t * tm)
rv = BV (clib_bihash_search) (h, &kv, &kv);
if (j <= i && rv >= 0)
{
- clib_warning
- ("i %d j %d search ok but should not be, value %lld",
- i, j, kv.value);
+ return clib_error_return (
+ 0, "i %d j %d search ok but should not be, value %lld",
+ i, j, kv.value);
}
if (j > i && rv < 0)
{
- clib_warning ("i %d j %d search not ok but should be",
- i, j);
+ return clib_error_return (
+ 0, "i %d j %d search not ok but should be", i, j);
}
}
}
@@ -471,6 +476,7 @@ test_bihash_command_fn (vlib_main_t * vm,
tm->ncycles = 10;
tm->report_every_n = 50000;
tm->seed = 0x1badf00d;
+ tm->search_iter = 1;
memset (&tm->stats, 0, sizeof (tm->stats));
@@ -512,7 +518,7 @@ test_bihash_command_fn (vlib_main_t * vm,
/* Preallocate hash table, key vector */
tm->key_hash = hash_create (tm->nitems, sizeof (uword));
vec_validate (tm->keys, tm->nitems - 1);
- _vec_len (tm->keys) = 0;
+ vec_set_len (tm->keys, 0);
switch (which)
{
@@ -535,14 +541,12 @@ test_bihash_command_fn (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_bihash_command, static) =
{
.path = "test bihash",
.short_help = "test bihash",
.function = test_bihash_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
bihash_test_init (vlib_main_t * vm)
diff --git a/src/plugins/unittest/bitmap_test.c b/src/plugins/unittest/bitmap_test.c
index 04a06d39a18..1b05be7b333 100644
--- a/src/plugins/unittest/bitmap_test.c
+++ b/src/plugins/unittest/bitmap_test.c
@@ -12,60 +12,219 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+#include <stdbool.h>
#include <vlib/vlib.h>
#include <vppinfra/bitmap.h>
static clib_error_t *
+check_bitmap (const char *test_name, const uword *bm, u32 expected_len, ...)
+{
+ clib_error_t *error = 0;
+ u32 i;
+ uword expected_value;
+
+ va_list va;
+ va_start (va, expected_len);
+
+ if (vec_len (bm) != expected_len)
+ {
+ error = clib_error_create ("%s failed, wrong "
+ "bitmap's size (%u != %u expected)",
+ test_name, vec_len (bm), expected_len);
+ goto done;
+ }
+
+ for (i = 0; i < expected_len; ++i)
+ {
+ expected_value = va_arg (va, uword);
+ if (bm[i] != expected_value)
+ {
+ error = clib_error_create (
+ "%s failed, wrong "
+ "bitmap's value at index %u (%u != %u expected)",
+ test_name, i, bm[i], expected_value);
+ break;
+ }
+ }
+
+done:
+ va_end (va);
+ return error;
+}
+
+static clib_error_t *
+check_bitmap_will_expand (const char *test_name, uword **bm, uword index,
+ bool expected_will_expand)
+{
+ uword max_bytes = vec_max_bytes (*bm);
+ bool result;
+
+ result = clib_bitmap_will_expand (*bm, index);
+ if (result != expected_will_expand)
+ {
+ return clib_error_create (
+ "%s failed, wrong "
+ "bitmap's expansion before set (%u != %u expected)",
+ test_name, result, expected_will_expand);
+ }
+
+ *bm = clib_bitmap_set (*bm, index, 1);
+ result = vec_max_bytes (*bm) > max_bytes;
+ if (result != expected_will_expand)
+ {
+ return clib_error_create (
+ "%s failed, wrong "
+ "bitmap's expansion after set (%u != %u expected)",
+ test_name, result, expected_will_expand);
+ }
+
+ return 0;
+}
+
+static clib_error_t *
test_bitmap_command_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
- u64 *bm = 0;
- u64 *bm2 = 0;
- u64 *dup;
- uword junk;
+ clib_error_t *error = 0;
+ uword *bm = 0;
+ uword *bm2 = 0;
+ uword *bm3 = 0;
+ uword *dup = 0;
- bm = clib_bitmap_set_multiple (bm, 2, ~0ULL, BITS (uword));
+ /* bm should look like:
+ * bm[0] bm[1]
+ * LSB |0011...11|1100...00| MSB
+ */
+ bm = clib_bitmap_set_multiple (0, 2, ~0ULL, BITS (uword));
+ error = check_bitmap ("clib_bitmap_set_multiple 1", bm, 2, ~0ULL << 2, 3);
+ if (error != 0)
+ goto done;
- junk = clib_bitmap_next_clear (bm, 3);
- junk = clib_bitmap_next_clear (bm, 65);
+ /* bm2 should look like:
+ * bm2[0]
+ * LSB |11...11| MSB
+ */
+ bm2 = clib_bitmap_set_multiple (0, 0, ~0ULL, BITS (uword));
+ error = check_bitmap ("clib_bitmap_set_multiple 2", bm2, 1, ~0ULL);
+ if (error != 0)
+ goto done;
- bm2 = clib_bitmap_set_multiple (bm2, 0, ~0ULL, BITS (uword));
- _vec_len (bm2) = 1;
- junk = clib_bitmap_next_clear (bm2, 0);
+ /* bm should look like:
+ * bm[0] bm[1]
+ * LSB |0011...1100|000...000| MSB
+ */
+ bm = clib_bitmap_set_multiple (bm, 2, pow2_mask (BITS (uword) - 3),
+ BITS (uword));
+ error = check_bitmap ("clib_bitmap_set_multiple 3", bm, 2,
+ pow2_mask (BITS (uword) - 3) << 2, 0);
+ if (error != 0)
+ goto done;
+ /* bm2 should look like:
+ * bm2[0]
+ * LSB |101...111| MSB
+ */
+ bm2 = clib_bitmap_xori (bm2, 1);
+ error = check_bitmap ("clib_bitmap_xori 1", bm2, 1, ~0ULL ^ 2);
+ if (error != 0)
+ goto done;
- bm = clib_bitmap_set_multiple (bm, 2, ~0ULL, BITS (uword) - 3);
- junk = clib_bitmap_get_multiple (bm, 2, BITS (uword));
- junk = clib_bitmap_first_set (bm);
- junk = 1 << 3;
- bm = clib_bitmap_xori (bm, junk);
- bm = clib_bitmap_andi (bm, junk);
- bm = clib_bitmap_xori_notrim (bm, junk);
- bm = clib_bitmap_andi_notrim (bm, junk);
+ /* bm should look like:
+ * bm[0] bm[1]
+ * LSB |0011...1100|000...001| MSB
+ */
+ bm = clib_bitmap_xori (bm, 2 * BITS (uword) - 1);
+ error = check_bitmap ("clib_bitmap_xori 2", bm, 2,
+ pow2_mask (BITS (uword) - 3) << 2,
+ 1ULL << (BITS (uword) - 1));
+ if (error != 0)
+ goto done;
- bm = clib_bitmap_set_multiple (bm, 2, ~0ULL, BITS (uword) - 3);
- bm2 = clib_bitmap_set_multiple (bm2, 2, ~0ULL, BITS (uword) - 3);
+ /* bm should look like:
+ * bm[0] bm[1]
+ * LSB |00100...00|000...001| MSB
+ */
+ bm = clib_bitmap_andi (bm, 2);
+ error =
+ check_bitmap ("clib_bitmap_andi", bm, 2, 4, 1ULL << (BITS (uword) - 1));
+ if (error != 0)
+ goto done;
+ /* bm should look like:
+ * bm[0]
+ * LSB |00100...00| MSB
+ */
+ bm = clib_bitmap_xori (bm, 2 * BITS (uword) - 1);
+ error = check_bitmap ("clib_bitmap_xori 3", bm, 1, 4);
+ if (error != 0)
+ goto done;
+
+ /* bm and bm2 should look like:
+ * bm[0] bm[1]
+ * LSB |0011...11|1100...00| MSB
+ * bm2[0] bm2[1]
+ * LSB |101...111|0011...11| MSB
+ */
+ bm = clib_bitmap_set_multiple (bm, 2, ~0ULL, BITS (uword));
+ bm2 =
+ clib_bitmap_set_multiple (bm2, BITS (uword) + 2, ~0ULL, BITS (uword) - 3);
dup = clib_bitmap_dup_and (bm, bm2);
- vec_free (dup);
- dup = clib_bitmap_dup_andnot (bm, bm2);
- vec_free (dup);
+ error = check_bitmap ("clib_bitmap_dup_and", dup, 1, bm[0] & bm2[0]);
+ if (error != 0)
+ goto done;
+
+ /* bm should look like:
+ * bm[0] bm[1] ... bm[3]
+ * LSB |0011...11|11...11| ... |11...11| MSB
+ */
+ bm = clib_bitmap_set_region (bm, 5, 1, 4 * BITS (uword) - 5);
+ error = check_bitmap ("clib_bitmap_set_region 1", bm, 4, ~0ULL << 2, ~0ULL,
+ ~0ULL, ~0ULL);
+ if (error != 0)
+ goto done;
+
+ /* bm should look like:
+ * bm[0] bm[1] ... bm[3]
+ * LSB |0011...11|11...11| ... |11...1100000| MSB
+ */
+ bm = clib_bitmap_set_region (bm, 4 * BITS (uword) - 5, 0, 5);
+ error = check_bitmap ("clib_bitmap_set_region 2", bm, 4, ~0ULL << 2, ~0ULL,
+ ~0ULL, pow2_mask (BITS (uword) - 5));
+ if (error != 0)
+ goto done;
+
+ error = check_bitmap_will_expand ("clib_bitmap_will_expand 1", &bm, 0, 0);
+ if (error != 0)
+ goto done;
+
+ error = check_bitmap_will_expand ("clib_bitmap_will_expand 2", &bm,
+ vec_max_len (bm) * BITS (uword) - 1, 0);
+ if (error != 0)
+ goto done;
+
+ error = check_bitmap_will_expand ("clib_bitmap_will_expand 3", &bm,
+ vec_max_len (bm) * BITS (uword), 1);
+ if (error != 0)
+ goto done;
+
+ error = check_bitmap_will_expand ("clib_bitmap_will_expand 4", &bm3, 0, 1);
+ if (error != 0)
+ goto done;
+
+done:
vec_free (bm);
vec_free (bm2);
+ vec_free (bm3);
+ vec_free (dup);
- return 0;
+ return error;
}
-
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (test_bihash_command, static) =
-{
+VLIB_CLI_COMMAND (test_bitmap_command, static) = {
.path = "test bitmap",
.short_help = "Coverage test for bitmap.h",
.function = test_bitmap_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/counter_test.c b/src/plugins/unittest/counter_test.c
index 24b9e1e386e..71f8f93f94d 100644
--- a/src/plugins/unittest/counter_test.c
+++ b/src/plugins/unittest/counter_test.c
@@ -19,7 +19,7 @@
#include <vppinfra/error.h>
#include <vlib/counter.h>
-#include <vpp/stats/stat_segment.h>
+#include <vlib/stats/stats.h>
enum
{
@@ -38,31 +38,10 @@ enum
static uint64_t
get_stats_epoch ()
{
- stat_segment_main_t *sm = &stat_segment_main;
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
return sm->shared_header->epoch;
}
-/*
- * Return the maximum element count of the vector based on its allocated
- * memory.
- */
-static int
-get_vec_mem_size (void *v, uword data_size)
-{
- stat_segment_main_t *sm = &stat_segment_main;
-
- if (v == 0)
- return 0;
-
- uword aligned_header_bytes = vec_header_bytes (0);
- void *p = v - aligned_header_bytes;
- void *oldheap = clib_mem_set_heap (sm->heap);
- int mem_size = (clib_mem_size (p) - aligned_header_bytes) / data_size;
- clib_mem_set_heap (oldheap);
-
- return mem_size;
-}
-
/* number of times to repeat the counter expand tests */
#define EXPAND_TEST_ROUNDS 3
@@ -90,8 +69,7 @@ test_simple_counter_expand (vlib_main_t *vm)
// Check how many elements fit into the counter vector without expanding
// that. The next validate calls should not increase the stats segment
// epoch.
- int mem_size = get_vec_mem_size (counter.counters[0],
- sizeof ((counter.counters[0])[0]));
+ int mem_size = vec_max_len (counter.counters[0]);
for (index = 1; index <= mem_size - 1; index++)
{
vlib_validate_simple_counter (&counter, index);
@@ -111,6 +89,9 @@ test_simple_counter_expand (vlib_main_t *vm)
epoch = new_epoch;
}
+ vlib_free_simple_counter (&counter);
+ vlib_validate_simple_counter (&counter, 0);
+
return 0;
}
@@ -138,8 +119,7 @@ test_combined_counter_expand (vlib_main_t *vm)
// Check how many elements fit into the counter vector without expanding
// that. The next validate calls should not increase the stats segment
// epoch.
- int mem_size = get_vec_mem_size (counter.counters[0],
- sizeof ((counter.counters[0])[0]));
+ int mem_size = vec_max_len (counter.counters[0]);
for (index = 1; index <= mem_size - 1; index++)
{
vlib_validate_combined_counter (&counter, index);
@@ -159,6 +139,9 @@ test_combined_counter_expand (vlib_main_t *vm)
epoch = new_epoch;
}
+ vlib_free_combined_counter (&counter);
+ vlib_validate_combined_counter (&counter, 0);
+
return 0;
}
diff --git a/src/plugins/unittest/crypto/aes_cbc.c b/src/plugins/unittest/crypto/aes_cbc.c
index 05a16c29a5c..89b0163207f 100644
--- a/src/plugins/unittest/crypto/aes_cbc.c
+++ b/src/plugins/unittest/crypto/aes_cbc.c
@@ -53,7 +53,6 @@ static u8 ciphertext128[] = {
0x12, 0x0E, 0xCA, 0x30, 0x75, 0x86, 0xE1, 0xA7,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (nist_aes128_cbc) = {
.name = "NIST SP 800-38A",
.alg = VNET_CRYPTO_ALG_AES_128_CBC,
@@ -63,7 +62,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (nist_aes128_cbc) = {
.ciphertext = TEST_DATA (ciphertext128),
};
-/* *INDENT-ON* */
static u8 key192[24] = {
0x8E, 0x73, 0xB0, 0xF7, 0xDA, 0x0E, 0x64, 0x52,
@@ -82,7 +80,6 @@ static u8 ciphertext192[64] = {
0xD9, 0x20, 0xA9, 0xE6, 0x4F, 0x56, 0x15, 0xCD,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (nist_aes192_cbc) = {
.name = "NIST SP 800-38A",
.alg = VNET_CRYPTO_ALG_AES_192_CBC,
@@ -92,7 +89,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (nist_aes192_cbc) = {
.ciphertext = TEST_DATA (ciphertext192),
};
-/* *INDENT-ON* */
static u8 key256[32] = {
0x60, 0x3D, 0xEB, 0x10, 0x15, 0xCA, 0x71, 0xBE,
@@ -112,7 +108,6 @@ static u8 ciphertext256[64] = {
0xDA, 0x6C, 0x19, 0x07, 0x8C, 0x6A, 0x9D, 0x1B,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (nist_aes256_cbc) = {
.name = "NIST SP 800-38A",
.alg = VNET_CRYPTO_ALG_AES_256_CBC,
@@ -151,7 +146,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (nist_aes256_incr2) = {
.key.length = 32,
.plaintext_incremental = 1056,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/crypto/aes_ctr.c b/src/plugins/unittest/crypto/aes_ctr.c
index 80e66111c12..581e283b0c3 100644
--- a/src/plugins/unittest/crypto/aes_ctr.c
+++ b/src/plugins/unittest/crypto/aes_ctr.c
@@ -41,7 +41,6 @@ static u8 tc1_ciphertext[] = {
0x1b, 0xef, 0x68, 0x64, 0x99, 0x0d, 0xb6, 0xce,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (nist_aes128_ctr_tc1) = {
.name = "CTR-AES128 TC1",
.alg = VNET_CRYPTO_ALG_AES_128_CTR,
@@ -50,7 +49,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (nist_aes128_ctr_tc1) = {
.plaintext = TEST_DATA (tc1_plaintext),
.ciphertext = TEST_DATA (tc1_ciphertext),
};
-/* *INDENT-ON* */
static u8 tc1_192_key[] = {
0x8e, 0x73, 0xb0, 0xf7, 0xda, 0x0e, 0x64, 0x52,
@@ -68,7 +66,6 @@ static u8 tc1_192_ciphertext[] = {
0x4f, 0x2b, 0x04, 0x59, 0xfe, 0x7e, 0x6e, 0x0b,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (nist_aes192_ctr_tc1) = {
.name = "CTR-AES192 TC1",
.alg = VNET_CRYPTO_ALG_AES_192_CTR,
@@ -77,7 +74,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (nist_aes192_ctr_tc1) = {
.plaintext = TEST_DATA (tc1_plaintext),
.ciphertext = TEST_DATA (tc1_192_ciphertext),
};
-/* *INDENT-ON* */
static u8 tc1_256_key[] = {
0x60, 0x3d, 0xeb, 0x10, 0x15, 0xca, 0x71, 0xbe,
@@ -96,7 +92,6 @@ static u8 tc1_256_ciphertext[] = {
0xb7, 0xa7, 0xf5, 0x04, 0xbb, 0xf3, 0xd2, 0x28,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (nist_aes256_ctr_tc1) = {
.name = "CTR-AES256 TC1",
.alg = VNET_CRYPTO_ALG_AES_256_CTR,
@@ -105,7 +100,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (nist_aes256_ctr_tc1) = {
.plaintext = TEST_DATA (tc1_plaintext),
.ciphertext = TEST_DATA (tc1_256_ciphertext),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/crypto/aes_gcm.c b/src/plugins/unittest/crypto/aes_gcm.c
index 3d1b221bf32..daa9ef11dcc 100644
--- a/src/plugins/unittest/crypto/aes_gcm.c
+++ b/src/plugins/unittest/crypto/aes_gcm.c
@@ -166,7 +166,6 @@ static u8 tc4_tag256[] = {
0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (aes_gcm128_tc1) = {
.name = "128-GCM Spec. TC1",
.alg = VNET_CRYPTO_ALG_AES_128_GCM,
@@ -318,7 +317,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (aes_gcm256_inc5) = {
.aad.length = 20,
.tag.length = 16,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/crypto/aes_gmac.c b/src/plugins/unittest/crypto/aes_gmac.c
new file mode 100644
index 00000000000..f58b8cdc389
--- /dev/null
+++ b/src/plugins/unittest/crypto/aes_gmac.c
@@ -0,0 +1,3029 @@
+/* Copyright (c) 2023 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. */
+
+/* Test vectors published in NIST Cryptographic Algorithm Validation Program
+ * (CAVP)
+ * https://csrc.nist.gov/Projects/Cryptographic-Algorithm-Validation-Program/CAVP-TESTING-BLOCK-CIPHER-MODES#GCMVS
+ */
+
+#include <vppinfra/clib.h>
+#include <vnet/crypto/crypto.h>
+#include <unittest/crypto/crypto.h>
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc0) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x11\x75\x4c\xd7\x2a\xec\x30\x9b\xf5\x2f\x76\x87\x21\x2e\x89\x57"),
+ .iv = TEST_DATA_STR ("\x3c\x81\x9d\x9a\x9b\xed\x08\x76\x15\x03\x0b\x65"),
+ .tag = TEST_DATA_STR (
+ "\x25\x03\x27\xc6\x74\xaa\xf4\x77\xae\xf2\x67\x57\x48\xcf\x69\x71"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc1) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xca\x47\x24\x8a\xc0\xb6\xf8\x37\x2a\x97\xac\x43\x50\x83\x08\xed"),
+ .iv = TEST_DATA_STR ("\xff\xd2\xb5\x98\xfe\xab\xc9\x01\x92\x62\xd2\xbe"),
+ .tag = TEST_DATA_STR (
+ "\x60\xd2\x04\x04\xaf\x52\x7d\x24\x8d\x89\x3a\xe4\x95\x70\x7d\x1a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc2) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xdb\x1a\xd0\xbd\x1c\xf6\xdb\x0b\x5d\x86\xef\xdd\x89\x14\xb2\x18"),
+ .iv = TEST_DATA_STR ("\x36\xfa\xd6\xac\xb3\xc9\x8e\x01\x38\xae\xb9\xb1"),
+ .tag = TEST_DATA_STR (
+ "\x5e\xe2\xba\x73\x7d\x3f\x2a\x94\x4b\x33\x5a\x81\xf6\x65\x3c\xce"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc3) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x1c\x71\x35\xaf\x62\x7c\x04\xc3\x29\x57\xf3\x3f\x9a\xc0\x85\x90"),
+ .iv = TEST_DATA_STR ("\x35\x5c\x09\x4f\xa0\x9c\x8e\x92\x81\x17\x8d\x34"),
+ .tag = TEST_DATA_STR (
+ "\xb6\xab\x2c\x7d\x90\x6c\x9d\x9e\xc4\xc1\x49\x8d\x2c\xbb\x50\x29"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc4) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x6c\xa2\xc1\x12\x05\xa6\xe5\x5a\xb5\x04\xdb\xf3\x49\x1f\x8b\xdc"),
+ .iv = TEST_DATA_STR ("\xb1\x00\x8b\x65\x0a\x2f\xee\x64\x21\x75\xc6\x0d"),
+ .tag = TEST_DATA_STR (
+ "\x7a\x9a\x22\x5d\x5f\x9a\x0e\xbf\xe0\xe6\x9f\x37\x18\x71\xa6\x72"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc5) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x69\xf2\xca\x78\xbb\x56\x90\xac\xc6\x58\x73\x02\x62\x88\x28\xd5"),
+ .iv = TEST_DATA_STR ("\x70\x1d\xa2\x82\xcb\x6b\x60\x18\xda\xbd\x00\xd3"),
+ .tag = TEST_DATA_STR (
+ "\xab\x1d\x40\xdd\xa1\x79\x8d\x56\x68\x78\x92\xe2\x15\x9d\xec\xfd"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc6) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xdc\xf4\xe3\x39\xc4\x87\xb6\x79\x7a\xac\xa9\x31\x72\x5f\x7b\xbd"),
+ .iv = TEST_DATA_STR ("\x2c\x1d\x95\x5e\x35\x36\x67\x60\xea\xd8\x81\x7c"),
+ .tag = TEST_DATA_STR (
+ "\x32\xb5\x42\xc5\xf3\x44\xcc\xec\xeb\x46\x0a\x02\x93\x8d\x6b\x0c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc7) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x76\x58\xcd\xbb\x81\x57\x2a\x23\xa7\x8e\xe4\x59\x6f\x84\x4e\xe9"),
+ .iv = TEST_DATA_STR ("\x1c\x3b\xaa\xe9\xb9\x06\x59\x61\x84\x2c\xbe\x52"),
+ .tag = TEST_DATA_STR (
+ "\x70\xc7\x12\x3f\xc8\x19\xaa\x06\x0e\xd2\xd3\xc1\x59\xb6\xea\x41"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc8) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x28\x1a\x57\x0b\x1e\x8f\x26\x5e\xe0\x93\x03\xec\xae\x0c\xc4\x6d"),
+ .iv = TEST_DATA_STR ("\x8c\x29\x41\xf7\x3c\xf8\x71\x3a\xd5\xbc\x13\xdf"),
+ .tag = TEST_DATA_STR (
+ "\xa4\x2e\x5e\x5f\x6f\xb0\x0a\x9f\x12\x06\xb3\x02\xed\xbf\xd8\x7c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc9) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xcd\x33\x2a\x98\x6f\x82\xd9\x8c\x21\x52\x78\x13\x1a\xd3\x87\xb7"),
+ .iv = TEST_DATA_STR ("\x1d\x12\xb2\x59\xf4\x4b\x87\x3d\x39\x42\xbc\x11"),
+ .tag = TEST_DATA_STR (
+ "\x34\x23\x80\x23\x64\x81\x85\xd7\xef\x0c\xfc\xf5\x83\x6e\x93\xcc"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc10) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x80\xe1\xd9\x8d\x10\xb2\x72\x37\x38\x6f\x02\x91\x89\xec\x04\x48"),
+ .iv = TEST_DATA_STR ("\x23\x9e\xba\xb2\xf5\x24\xfd\x62\xc5\x54\xa1\x90"),
+ .tag = TEST_DATA_STR (
+ "\x4c\x0f\x29\xd9\x63\xf0\xed\x68\xdc\xcf\x34\x49\x6c\xf4\x3d\x00"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc11) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x40\x65\x0c\xdb\x61\xe3\xe1\x9a\x1a\x98\xfb\x4e\x05\x37\x7d\x35"),
+ .iv = TEST_DATA_STR ("\x69\xf0\xa8\x1a\xaf\x6b\xb8\x48\x62\x82\xf1\xb9"),
+ .tag = TEST_DATA_STR (
+ "\x26\x57\xe1\x2d\xec\x21\xc3\xec\xf0\x71\xaf\x61\x79\x52\x9f\xb4"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc12) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x1e\x89\xa6\xcd\x75\x28\xcc\xe1\xe2\xb2\xb5\xf7\xfd\x2b\x6b\x52"),
+ .iv = TEST_DATA_STR ("\xe1\x1f\xd4\x27\xa7\x82\xd5\x43\xf7\x8e\xfc\x60"),
+ .tag = TEST_DATA_STR (
+ "\xee\xed\xff\x87\x4c\x8e\xde\xea\x53\xe8\xbe\x2a\x13\xaf\xd8\x1b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc13) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x2a\x7a\xd6\x14\x66\x76\x05\x7d\xb7\x77\xde\xa4\x68\x3d\x0d\x45"),
+ .iv = TEST_DATA_STR ("\xed\x72\x1e\xa6\x74\x56\xd4\x59\x4a\xaf\xbd\x51"),
+ .tag = TEST_DATA_STR (
+ "\xee\x3c\xab\x57\x78\x88\x84\x39\xd9\x0f\xa7\x18\xb7\x57\x38\xad"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad0_tc14) = {
+ .name = "128-GMAC 0-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xa3\x64\xf4\x94\xa4\xcd\x01\x47\xc3\x47\x31\x07\x4d\xc1\xa8\x5b"),
+ .iv = TEST_DATA_STR ("\x4a\xa8\x47\x0d\xd4\x04\xe4\x05\x4b\x30\x09\x3a"),
+ .tag = TEST_DATA_STR (
+ "\xd8\xa7\xbb\xa3\xa4\x51\x90\x2e\x3a\xdc\x01\x06\x0c\x3c\x91\xa7"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc0) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x77\xbe\x63\x70\x89\x71\xc4\xe2\x40\xd1\xcb\x79\xe8\xd7\x7f\xeb"),
+ .iv = TEST_DATA_STR ("\xe0\xe0\x0f\x19\xfe\xd7\xba\x01\x36\xa7\x97\xf3"),
+ .aad = TEST_DATA_STR (
+ "\x7a\x43\xec\x1d\x9c\x0a\x5a\x78\xa0\xb1\x65\x33\xa6\x21\x3c\xab"),
+ .tag = TEST_DATA_STR (
+ "\x20\x9f\xcc\x8d\x36\x75\xed\x93\x8e\x9c\x71\x66\x70\x9d\xd9\x46"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc1) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x76\x80\xc5\xd3\xca\x61\x54\x75\x8e\x51\x0f\x4d\x25\xb9\x88\x20"),
+ .iv = TEST_DATA_STR ("\xf8\xf1\x05\xf9\xc3\xdf\x49\x65\x78\x03\x21\xf8"),
+ .aad = TEST_DATA_STR (
+ "\xc9\x4c\x41\x01\x94\xc7\x65\xe3\xdc\xc7\x96\x43\x79\x75\x8e\xd3"),
+ .tag = TEST_DATA_STR (
+ "\x94\xdc\xa8\xed\xfc\xf9\x0b\xb7\x4b\x15\x3c\x8d\x48\xa1\x79\x30"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc2) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xa8\x2b\xb1\xed\xc7\xc0\x1a\x36\x89\x00\x6f\x34\xbf\xed\x78\x3e"),
+ .iv = TEST_DATA_STR ("\x96\x38\x36\xb6\x7b\x18\x8b\xec\xf9\xba\x14\x11"),
+ .aad = TEST_DATA_STR (
+ "\x9d\x11\x5b\xb9\xbb\xd1\x19\xfb\x77\x7b\x63\x16\x06\x5a\x9a\xc8"),
+ .tag = TEST_DATA_STR (
+ "\xc4\x91\x88\x9f\xa3\xec\xa4\x54\x4b\xa0\xd5\x1b\x8e\x0f\x38\x37"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc3) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xb9\x78\x2d\x0a\x59\x86\xc6\x3f\x35\x2d\x3b\xc4\xc7\xec\xc9\x6d"),
+ .iv = TEST_DATA_STR ("\x45\x41\xe1\x5b\x92\xed\xea\x44\xec\xeb\x1f\x2a"),
+ .aad = TEST_DATA_STR (
+ "\xf1\xa9\xf0\x72\x34\x29\xc5\xb2\x61\x85\xac\x3e\xa7\xe1\x3d\x7a"),
+ .tag = TEST_DATA_STR (
+ "\x74\xd0\xd3\x69\x49\xf0\x27\x66\x70\xf9\xdd\xc5\x79\xe9\x4f\x3a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc4) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x59\xb9\x57\x85\xb3\x0f\x20\x56\x79\xfc\x4f\x3f\x9a\x90\x10\x2f"),
+ .iv = TEST_DATA_STR ("\x19\x08\x78\x7c\xc1\xe1\x88\x0a\x6e\xf5\xdd\x17"),
+ .aad = TEST_DATA_STR (
+ "\x39\x85\x2d\x31\x82\x94\x4a\x51\x77\xdb\x27\x7b\x63\x91\x07\x02"),
+ .tag = TEST_DATA_STR (
+ "\x8f\x9a\x96\xc0\x13\x99\x24\x85\xb4\x3e\x2b\x62\x74\x5a\xd1\x73"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc5) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x34\xdd\x79\x26\xab\x13\xd4\x07\x81\x60\xd8\x7d\xe2\xe3\xc7\x24"),
+ .iv = TEST_DATA_STR ("\xc1\x1c\xcd\xaf\x79\x8a\xb0\x3a\xf2\xd9\x7e\xf9"),
+ .aad = TEST_DATA_STR (
+ "\xaf\x69\x87\x17\xa6\xd7\x90\xb3\xbf\xc3\x91\x95\x85\x7b\xb5\xff"),
+ .tag = TEST_DATA_STR (
+ "\x48\x11\x60\x50\xbb\xd9\x11\x82\x70\xd0\xbe\x25\x2d\x29\xd5\xd4"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc6) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x8e\xc8\x6f\xab\x55\xaa\xab\x0e\x77\x45\x5e\x9c\xd3\xdb\xc7\x8e"),
+ .iv = TEST_DATA_STR ("\x15\xfd\x90\xa9\x86\x7e\x14\xf0\xd6\x3b\x53\xb9"),
+ .aad = TEST_DATA_STR (
+ "\xe7\x50\x9e\x27\x62\x09\xa6\xd3\xec\xfa\xbb\x53\xcc\xdc\xd2\x36"),
+ .tag = TEST_DATA_STR (
+ "\xd9\x6d\x6a\xc0\xd3\x09\xce\xbe\xde\xba\x2a\xf9\xf2\x62\x13\x2f"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc7) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x66\xb2\x47\x3d\x9e\x01\x21\x66\x6d\x47\x63\x3f\x70\x08\xeb\x1c"),
+ .iv = TEST_DATA_STR ("\xc1\x71\x6c\x68\xa2\x4d\x57\x77\x0b\x86\x7e\x51"),
+ .aad = TEST_DATA_STR (
+ "\xc2\x0f\x68\x63\x17\xd6\x7e\x53\xdd\x79\xba\xe5\xc4\x6d\xc1\x11"),
+ .tag = TEST_DATA_STR (
+ "\x9a\x08\x61\x68\x09\xcf\x15\x24\x7d\xfe\xb9\x75\x6b\xa4\xf6\x09"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc8) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x5b\x26\x2a\x9d\x00\x90\x4d\x30\xa2\x58\x7c\xaa\xde\x09\x13\x81"),
+ .iv = TEST_DATA_STR ("\xf7\xbc\x15\x4c\xa5\x62\xe8\xf2\xc1\x84\x55\x98"),
+ .aad = TEST_DATA_STR (
+ "\x23\x11\x2d\x07\x8c\x99\x14\xfa\x3d\xfe\x52\x18\xcd\x19\x10\x16"),
+ .tag = TEST_DATA_STR (
+ "\x98\x85\x4d\x19\x3a\x06\xdb\xe3\x2c\xe4\x49\x7e\xec\x5c\x9a\x8b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc9) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x2e\x4f\xb9\xcc\x32\x01\x88\xa6\xf1\xfa\x89\xa7\xa2\x52\x27\x3a"),
+ .iv = TEST_DATA_STR ("\x7a\x6d\x4e\xe6\x9c\x72\x56\xc1\x4f\xba\x8f\x5e"),
+ .aad = TEST_DATA_STR (
+ "\x80\xba\x4a\x20\x2a\x68\xc3\x59\x0d\x65\x57\x91\x2c\x6f\x87\x8e"),
+ .tag = TEST_DATA_STR (
+ "\x92\x80\x31\x32\x73\xbe\xfb\x8a\xfa\x0b\xce\xca\x5a\x96\x6d\x85"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc10) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x5e\xa9\x49\x73\xd8\x61\x6d\xaf\xa7\xf3\x1d\xb0\x71\x6d\x17\x29"),
+ .iv = TEST_DATA_STR ("\xa0\x5b\x62\x66\x9d\x25\x0e\x61\xb0\x77\xd2\x8a"),
+ .aad = TEST_DATA_STR (
+ "\x96\x20\xba\xf2\xf5\x8d\x01\x3f\x8a\x4c\x48\x71\x98\x9c\x1b\x17"),
+ .tag = TEST_DATA_STR (
+ "\x7e\x55\x03\x98\xde\xe7\x28\x25\x6d\x69\x28\xcd\xaa\xc4\x3b\x73"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc11) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x91\x03\x85\xf6\xf0\x7f\x9e\x57\xe4\x83\xc4\x7d\xd5\x20\x6b\xcc"),
+ .iv = TEST_DATA_STR ("\x51\x8f\x56\xe3\x36\x58\xdf\x31\x1d\x42\xd9\xfe"),
+ .aad = TEST_DATA_STR (
+ "\x5d\x15\x79\x09\xa2\xa4\x60\x71\x17\xe7\x7d\xa0\xe4\x49\x3b\x88"),
+ .tag = TEST_DATA_STR (
+ "\xa7\x04\x1e\xa4\xa1\xd7\x4d\x9e\x66\xb9\x57\x1b\x59\xb6\xa1\xd8"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc12) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xca\xb3\xaf\x7a\x15\xb4\x30\xe0\x34\xe7\x93\xbb\x30\xdb\x8a\xb2"),
+ .iv = TEST_DATA_STR ("\x96\x3a\x56\xe2\xe1\x2f\x38\x70\x62\xe1\x84\x98"),
+ .aad = TEST_DATA_STR (
+ "\xa0\x94\xa1\xdd\x11\x21\xd3\xaa\x52\xc8\x1e\x8f\x10\xbf\x9f\x0c"),
+ .tag = TEST_DATA_STR (
+ "\x1a\x31\xd2\x95\x60\x1e\xb3\xc8\x2a\x54\xb2\x34\x98\x4f\xfd\xf5"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc13) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x89\xc9\x49\xe9\xc8\x04\xaf\x01\x4d\x56\x04\xb3\x94\x59\xf2\xc8"),
+ .iv = TEST_DATA_STR ("\xd1\xb1\x04\xc8\x15\xbf\x1e\x94\xe2\x8c\x8f\x16"),
+ .aad = TEST_DATA_STR (
+ "\x82\xad\xcd\x63\x8d\x3f\xa9\xd9\xf3\xe8\x41\x00\xd6\x1e\x07\x77"),
+ .tag = TEST_DATA_STR (
+ "\x88\xdb\x9d\x62\x17\x2e\xd0\x43\xaa\x10\xf1\x6d\x22\x7d\xc4\x1b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad128_tc14) = {
+ .name = "128-GMAC 128-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xa4\xd9\x94\xc4\xac\x5a\xc0\xf0\x29\x13\x24\x57\x14\xfb\xe2\x35"),
+ .iv = TEST_DATA_STR ("\xa9\x47\x2d\xad\xcc\xa8\xd7\xe0\xe3\xb8\x08\x4d"),
+ .aad = TEST_DATA_STR (
+ "\xeb\x31\x8b\x9e\x17\x57\x52\x03\xdd\x29\xeb\xed\x20\xec\x82\xf9"),
+ .tag = TEST_DATA_STR (
+ "\x32\x3d\xf7\xf3\x36\x94\x10\x6f\x56\x73\x9d\xe0\x97\x32\x16\xa3"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc0) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x2f\xb4\x5e\x5b\x8f\x99\x3a\x2b\xfe\xbc\x4b\x15\xb5\x33\xe0\xb4"),
+ .iv = TEST_DATA_STR ("\x5b\x05\x75\x5f\x98\x4d\x2b\x90\xf9\x4b\x80\x27"),
+ .aad = TEST_DATA_STR ("\xe8\x54\x91\xb2\x20\x2c\xaf\x1d\x7d\xce\x03\xb9\x7e"
+ "\x09\x33\x1c\x32\x47\x39\x41"),
+ .tag = TEST_DATA_STR (
+ "\xc7\x5b\x78\x32\xb2\xa2\xd9\xbd\x82\x74\x12\xb6\xef\x57\x69\xdb"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc1) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x95\x21\x17\x04\x8f\x77\xe2\x76\xc2\xef\x65\x80\x53\x7c\x14\x03"),
+ .iv = TEST_DATA_STR ("\x07\x0b\x8f\xb4\x6a\x7a\xd5\x28\x85\xbe\x1b\x26"),
+ .aad = TEST_DATA_STR ("\x34\xb0\x88\xf9\x82\x81\x8b\x5f\x07\xda\xbe\x2b\x62"
+ "\xf9\x54\x7f\x4e\xd0\x99\x12"),
+ .tag = TEST_DATA_STR (
+ "\xbe\xdd\x4c\xf3\x0f\xd7\xa4\xab\xc4\x9b\xdc\xc3\xf3\xb2\x48\xb1"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc2) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x7f\x64\x53\xb3\x9b\xde\x01\x85\x60\xa1\x6a\x27\x04\x21\x75\x43"),
+ .iv = TEST_DATA_STR ("\x0f\x3e\xec\xf4\x8d\x68\x35\x32\x26\xa7\x7f\xe4"),
+ .aad = TEST_DATA_STR ("\x11\xe4\xec\xb2\x56\xeb\xff\x56\x45\x3f\xa2\xe7\x5e"
+ "\x43\xeb\x9d\x64\x10\x49\xe6"),
+ .tag = TEST_DATA_STR (
+ "\xb5\x12\x62\x3a\x12\xd5\x49\x2b\x7d\x76\xd3\x9b\xe0\xdf\x57\x77"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc3) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x93\x32\xe4\x33\xbf\x61\x00\xc6\xcc\x23\xb0\x87\x10\x62\x7c\x40"),
+ .iv = TEST_DATA_STR ("\xaa\xb3\xdb\x30\x15\xb2\x9d\x24\xf3\x29\xbe\xb4"),
+ .aad = TEST_DATA_STR ("\xbd\x84\x3a\x08\xf0\xa8\x22\xf8\xf4\xf7\x6c\x36\x48"
+ "\x38\x0a\xab\x76\x22\xe7\x19"),
+ .tag = TEST_DATA_STR (
+ "\xe5\x4f\x1d\x18\xc6\x1d\x8b\xe1\x54\x84\x72\x76\x05\xb5\xa5\xdc"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc4) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x57\x73\x75\x0a\x49\x30\x96\xa9\x9d\x84\xc0\x56\x3f\xc2\x93\xe9"),
+ .iv = TEST_DATA_STR ("\xc3\x90\xed\x70\xdc\x94\x97\x23\x44\x13\xad\x52"),
+ .aad = TEST_DATA_STR ("\x60\x12\x51\x72\x58\x71\x6c\x1f\x00\x35\xef\xa6\x0a"
+ "\x0f\x36\xb5\xc6\x5e\x73\x79"),
+ .tag = TEST_DATA_STR (
+ "\xb0\x11\xb2\x64\x61\x0e\x58\x08\x27\x05\x47\x6f\x04\x0b\x8c\x86"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc5) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x41\xb0\xd0\xfc\xe5\xd3\x13\x59\xcf\xd5\xdb\x40\x64\xe2\xd4\x6b"),
+ .iv = TEST_DATA_STR ("\xb9\x03\xe9\xd0\xce\xa2\x57\x95\xa8\x2e\x73\xe3"),
+ .aad = TEST_DATA_STR ("\x4c\xba\x50\x18\x76\xf3\x3e\x1f\xda\x9c\xd4\x56\xe3"
+ "\x18\x06\x83\xe3\x86\x3b\xd9"),
+ .tag = TEST_DATA_STR (
+ "\x18\xbc\x39\xd0\xb9\x5c\xf0\x59\xcd\x8c\x25\x00\x4f\x5e\x50\x7c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc6) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x47\x48\xb7\x82\xe3\xfe\x5e\x4e\xff\xeb\x7c\x67\x23\x2d\x2b\x07"),
+ .iv = TEST_DATA_STR ("\xc5\xe4\xdc\xf1\x8f\x86\x07\x6b\x88\xa5\xd5\xe9"),
+ .aad = TEST_DATA_STR ("\x3b\x2f\xca\xd8\x73\x9e\xd8\x7e\x1d\x02\xe8\x08\x45"
+ "\xf1\x20\xe2\x49\xea\x92\xb1"),
+ .tag = TEST_DATA_STR (
+ "\xb8\xae\x71\x8e\x28\x79\xc9\xcb\x65\x8d\x5d\x11\x22\xe6\x9b\xb7"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc7) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xe3\x0c\xc2\x20\x77\xd5\x95\x12\x16\xd0\x7f\x37\xc5\x1b\x58\xf9"),
+ .iv = TEST_DATA_STR ("\xfc\x58\x3a\xd1\x59\xb5\x2e\x0b\x63\x78\x15\x7e"),
+ .aad = TEST_DATA_STR ("\xc3\xcb\x7b\xe8\x88\x8e\xf4\x4c\xa5\xaa\x93\xdd\xe2"
+ "\x6d\x27\x51\x28\x8e\x1f\x5a"),
+ .tag = TEST_DATA_STR (
+ "\xa8\xce\x25\xb5\xdc\x8f\x84\xe2\xf5\xda\xe5\xf0\x85\xaa\xcc\xd4"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc8) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x7c\x8b\x10\xba\x75\xee\x6a\xb4\xa9\x97\xd3\xf5\x98\xb7\x9d\x40"),
+ .iv = TEST_DATA_STR ("\x6f\xb5\x51\x88\xdd\xf0\x0d\xde\x09\x59\x65\x87"),
+ .aad = TEST_DATA_STR ("\x2d\xdc\x0a\xcf\x97\x05\xf8\xd1\x8f\x90\x5b\x8f\x9d"
+ "\x47\x2e\x7d\xbf\x6b\x91\xe3"),
+ .tag = TEST_DATA_STR (
+ "\x57\x91\xd3\x80\x51\x09\xc5\xe1\x8a\xdf\xf4\xe8\x09\x06\xa0\x18"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc9) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x72\xc7\xdb\x6c\xa2\x9f\x83\x64\x1c\x3f\xff\x5b\x71\xc4\xbc\x30"),
+ .iv = TEST_DATA_STR ("\xf2\x00\x07\x42\xe2\x49\xac\x56\xd5\xb2\xf6\x5f"),
+ .aad = TEST_DATA_STR ("\xcd\x99\x4d\x2d\x08\x23\x27\x70\x92\x7d\x85\x4e\xf2"
+ "\xb6\xca\x2f\x08\x73\x70\xcf"),
+ .tag = TEST_DATA_STR (
+ "\xa5\x96\x6d\xf3\x9f\xee\xba\x03\x36\xf0\xb9\xa3\xf4\xff\xe6\xc3"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc10) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x28\x33\xcc\x10\x19\x50\x30\xe4\xa1\x15\x55\x32\x66\x6c\xb0\x49"),
+ .iv = TEST_DATA_STR ("\xad\x80\x2b\x9a\x5c\x94\x09\xfa\x3e\x7d\xcf\xcc"),
+ .aad = TEST_DATA_STR ("\xb3\xec\xbe\xa2\x79\x7d\x00\x6c\x07\xb8\xce\x62\x1b"
+ "\xe3\xb0\xec\xcd\x37\xc3\xec"),
+ .tag = TEST_DATA_STR (
+ "\x81\xde\xab\x8b\xde\xe0\xd3\x91\x49\x5e\xed\x40\x29\xa6\xd2\x05"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc11) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xd8\x98\x5b\xb5\xac\x02\x58\xad\xad\x86\x66\x0e\xbb\xc6\xd1\x9f"),
+ .iv = TEST_DATA_STR ("\xb5\xee\x26\xf8\xc4\x63\xbb\xfc\x27\x11\x5b\x0a"),
+ .aad = TEST_DATA_STR ("\x61\x3f\x51\xf8\x32\xfb\xf4\x34\xb8\xe3\xfe\x94\x54"
+ "\xae\x46\xa8\x62\xd8\x31\xf0"),
+ .tag = TEST_DATA_STR (
+ "\xfe\x9f\x0b\x1b\xdc\x68\xde\xe6\xe8\xdc\x2c\xe1\x26\x65\xd3\x36"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc12) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x9b\x8f\x69\x24\xdc\x22\xf1\x07\x3c\x1a\x38\x44\x8a\x2f\x04\x47"),
+ .iv = TEST_DATA_STR ("\x09\xcd\xab\xf8\x7d\x82\x82\x8e\xca\x1c\x0c\x7f"),
+ .aad = TEST_DATA_STR ("\x69\x21\x0e\x4e\x0a\x1c\xfd\x50\x38\x75\x66\x52\x79"
+ "\x0b\x9a\x8c\xfb\xbd\x94\x3d"),
+ .tag = TEST_DATA_STR (
+ "\xa6\x0c\x10\x4a\x6f\xb4\x63\x84\x27\xa8\x8a\x86\xc0\x49\x23\xbd"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc13) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x72\x13\x22\x13\xd5\xd9\x53\x09\xbf\x7e\x10\xf8\x31\x8d\x7c\x20"),
+ .iv = TEST_DATA_STR ("\xfb\x90\xbf\x28\x3c\x54\x11\x23\x03\x55\xd7\xa1"),
+ .aad = TEST_DATA_STR ("\xa3\x0b\xb1\x7c\x80\x89\xc6\xf5\xf6\x1b\x25\x0a\x94"
+ "\xcb\xbb\xfd\xf5\xf2\xa3\xe6"),
+ .tag = TEST_DATA_STR (
+ "\x09\x19\x1a\xf4\x18\x94\x9f\xe6\xbe\x8d\xbf\x13\xe0\x06\x52\x7a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad160_tc14) = {
+ .name = "128-GMAC 160-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x65\x2f\xfb\xad\x4e\x1f\xcb\xe7\x55\x64\x39\x5e\x6c\x1c\x39\x24"),
+ .iv = TEST_DATA_STR ("\x11\x13\x49\x63\x6d\x10\x6f\xd5\xf6\xa1\xe0\x88"),
+ .aad = TEST_DATA_STR ("\x5f\x52\xaa\x85\xdc\x3a\xc0\x42\x64\x7e\x32\xad\xa0"
+ "\x50\xd6\x7e\x59\xb5\x19\xaa"),
+ .tag = TEST_DATA_STR (
+ "\x28\xd9\x80\xd7\xbf\xd8\x78\xc2\x27\xc1\x40\xde\x34\x82\x76\x5b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc0) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x99\xe3\xe8\x79\x3e\x68\x6e\x57\x1d\x82\x85\xc5\x64\xf7\x5e\x2b"),
+ .iv = TEST_DATA_STR ("\xc2\xdd\x0a\xb8\x68\xda\x6a\xa8\xad\x9c\x0d\x23"),
+ .aad = TEST_DATA_STR (
+ "\xb6\x68\xe4\x2d\x4e\x44\x4c\xa8\xb2\x3c\xfd\xd9\x5a\x9f\xed\xd5\x17\x8a"
+ "\xa5\x21\x14\x48\x90\xb0\x93\x73\x3c\xf5\xcf\x22\x52\x6c\x59\x17\xee\x47"
+ "\x65\x41\x80\x9a\xc6\x86\x7a\x8c\x39\x93\x09\xfc"),
+ .tag = TEST_DATA_STR (
+ "\x3f\x4f\xba\x10\x0e\xaf\x1f\x34\xb0\xba\xad\xaa\xe9\x99\x5d\x85"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc1) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xf8\xe2\x9e\xfd\x00\xa4\x23\xc4\xea\x94\x56\x86\x3f\x83\xc5\x4f"),
+ .iv = TEST_DATA_STR ("\x2d\x3c\xf6\x7c\xbc\xe6\x9d\x63\x9b\xd1\xc0\x92"),
+ .aad = TEST_DATA_STR (
+ "\x02\xc7\x0f\xc8\xa2\x54\x46\x19\xc1\xc3\xe9\xfc\xe6\xb3\xc6\xc3\xbc\x24"
+ "\x64\x3e\x0f\x14\x0e\x6b\x48\xac\x50\x5e\xa6\x66\xcd\x9a\x20\x10\xc3\xa8"
+ "\xe2\xf5\xf1\x04\x37\x88\x7f\xe8\x03\xb5\x4d\xb3"),
+ .tag = TEST_DATA_STR (
+ "\x96\x3c\xb5\x0a\xca\x3e\x09\xdd\x0d\x9a\x01\x3c\x87\x34\x15\x5f"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc2) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x00\xe3\x49\x1d\xfc\xf3\xbe\xc3\x9c\x89\xcc\xfd\x80\xa5\xa8\x96"),
+ .iv = TEST_DATA_STR ("\x29\xf6\xff\x4e\xdc\x4a\xc3\xe9\x7f\xfb\x16\x80"),
+ .aad = TEST_DATA_STR (
+ "\x73\x81\x33\x51\xb3\x9f\x5e\x40\x00\xa9\xee\x8d\x2b\x85\xf1\x31\x63\x4a"
+ "\xca\xed\xe0\xdd\x25\xd6\x91\xa2\xb8\x29\xad\x4f\xe9\xea\x69\x9f\x12\x24"
+ "\x25\x19\x84\x7c\xb0\x83\xb0\xb4\xd3\xd8\xb3\xbc"),
+ .tag = TEST_DATA_STR (
+ "\x01\xb2\xe9\xba\x71\x9a\xd7\x7c\x75\x3b\x36\x4e\xcc\x5a\xab\xeb"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc3) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x0a\xd0\x6f\x4c\x19\xaf\x1d\x5f\x60\x2b\x38\xf8\x6e\x56\x29\x1c"),
+ .iv = TEST_DATA_STR ("\x0b\x23\x5c\x6a\x75\xce\xcd\xfc\xba\x90\x01\xce"),
+ .aad = TEST_DATA_STR (
+ "\x7d\x4f\x26\xf7\x89\x5b\x2e\xf3\xda\x2e\x4f\x93\xe4\x11\xcd\xb7\x40\x25"
+ "\xc7\x75\x9c\x03\x8d\x87\x23\x44\xa4\x5c\xe5\x6d\x92\xa5\x81\x86\x2c\x3b"
+ "\xac\xe0\x39\x09\x0a\x2c\xcf\xa4\x3b\x62\x3d\xcb"),
+ .tag = TEST_DATA_STR (
+ "\xb4\xbc\x9c\xe1\x47\x5d\x0c\x93\xdf\xd5\xa5\xd8\xd4\x5b\xd8\xe5"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc4) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xee\xeb\x33\xe0\xc8\xa4\x06\xea\x23\x6a\x07\x5c\xdb\xe9\xd6\xf9"),
+ .iv = TEST_DATA_STR ("\xb9\x35\xe8\xee\xd6\x62\x27\x83\x6e\xde\x18\x9a"),
+ .aad = TEST_DATA_STR (
+ "\x9a\x42\x91\xac\xb9\x92\x4b\xba\x42\x41\xb0\xc9\xc3\xc2\xe1\x26\x2b\x25"
+ "\xa7\xc7\xf0\x2c\x92\xad\xea\xdf\x92\x25\x4d\x61\x8a\xb5\x93\x88\xaa\x30"
+ "\xb4\x7e\xaf\xa5\x88\x99\xc3\x57\xcf\x28\x1e\x31"),
+ .tag = TEST_DATA_STR (
+ "\x14\x3d\x69\x54\xeb\x6f\xe7\x0a\xff\x70\xda\x97\x8c\xcd\x45\x09"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc5) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x60\x0b\x54\x42\xa0\xb5\x50\xa3\x8f\x85\xd2\xfb\x0a\xcc\x9c\x96"),
+ .iv = TEST_DATA_STR ("\x5e\x65\xdd\x6e\x8b\x20\xd6\xb2\x93\x1f\xe6\xc2"),
+ .aad = TEST_DATA_STR (
+ "\x46\x1e\x54\xa0\x92\xf8\x39\x24\x66\x84\x9f\xb0\x37\x0a\xe3\x0c\x14\xc1"
+ "\xbf\x39\x87\xab\x2e\xbb\xe9\x8e\x18\xd1\x3f\x04\x1d\x09\xd0\x43\xf7\xae"
+ "\xa7\x8b\xfc\xc4\x2f\x86\x4a\x9f\xb4\x0f\x00\x31"),
+ .tag = TEST_DATA_STR (
+ "\x2c\xd6\x26\xf9\xa0\x68\x63\x00\xcf\x23\xc0\xbc\x59\x7c\x63\xb4"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc6) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xce\x8d\x11\x03\x10\x0f\xa2\x90\xf9\x53\xfb\xb4\x39\xef\xde\xe4"),
+ .iv = TEST_DATA_STR ("\x48\x74\xc6\xf8\x08\x23\x66\xfc\x7e\x49\xb9\x33"),
+ .aad = TEST_DATA_STR (
+ "\xd6\x9d\x03\x3c\x32\x02\x97\x89\x26\x3c\x68\x9e\x11\xff\x7e\x9e\x8e\xef"
+ "\xc4\x8d\xdb\xc4\xe1\x0e\xea\xe1\xc9\xed\xbb\x44\xf0\x4e\x7c\xc6\x47\x15"
+ "\x01\xea\xdd\xa3\x94\x0a\xb4\x33\xd0\xa8\xc2\x10"),
+ .tag = TEST_DATA_STR (
+ "\xa5\x96\x4b\x77\xaf\x0b\x8a\xec\xd8\x44\xd6\xad\xec\x8b\x7b\x1c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc7) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xae\x71\x14\xc0\x9f\xfa\x04\x29\x88\x34\x41\x2f\x6a\x8d\xe4\x53"),
+ .iv = TEST_DATA_STR ("\xf3\x80\xc2\xd8\x60\xbe\x2a\xf4\x1e\x1b\xe5\xc6"),
+ .aad = TEST_DATA_STR (
+ "\x7e\x16\x08\x2f\x68\x9c\x63\xe8\xad\xdd\xd5\xcb\x2d\xa6\x10\xbb\xfb\x88"
+ "\xd0\x73\xcf\x8b\x20\x43\x84\xa9\x37\xaa\xb0\x37\x65\x23\xa5\x0d\x3d\x5f"
+ "\x13\x92\x97\x8f\x79\x60\x9f\x12\xdf\x8f\xc2\x88"),
+ .tag = TEST_DATA_STR (
+ "\x40\xd3\xa3\x63\x58\xa6\xf6\xca\xaa\x6a\xf9\x2c\xfd\x87\x4a\x22"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc8) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xd8\xf5\x20\xb6\xf3\xcf\x6b\x83\x5c\xe4\xcc\xe4\x8f\x4c\xb0\x33"),
+ .iv = TEST_DATA_STR ("\x01\x9a\x55\xc9\x86\x15\xc0\x22\xaf\xff\x96\x44"),
+ .aad = TEST_DATA_STR (
+ "\xc3\xfb\x51\x8d\xdb\x2d\x73\x41\x7e\x24\x33\x59\xa0\xed\x8c\x12\x67\x50"
+ "\xeb\x16\x3e\x7b\xd8\x45\x63\x71\x59\x39\x70\x75\xe3\xdb\x1d\xb7\x2f\xe2"
+ "\xf0\xe1\x3b\x59\x9c\x33\x3c\x47\x3f\xeb\x22\x45"),
+ .tag = TEST_DATA_STR (
+ "\x46\x7c\xfa\xd5\xaf\x11\x85\x2d\x6e\xca\x28\x9c\x86\xf9\x67\xad"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc9) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x13\xba\x95\x60\x6b\x01\xaf\x03\x5b\xf9\x61\xe3\x98\x52\xe3\x4b"),
+ .iv = TEST_DATA_STR ("\x9e\xc9\xcf\x3b\x00\x2c\xfe\xd9\xe7\x61\x93\x4f"),
+ .aad = TEST_DATA_STR (
+ "\xbb\x9d\xe5\x63\x83\x6d\x1f\x1b\x1d\xe9\x64\x51\x4e\xce\xbb\x8a\xd1\x05"
+ "\x01\xdb\x56\x22\x80\xb7\xbd\x98\x80\x48\x14\x73\x58\x17\x90\x8b\x28\x56"
+ "\xca\xfa\xde\xcd\x40\xb0\x48\x32\xfb\xde\x2b\xfb"),
+ .tag = TEST_DATA_STR (
+ "\x17\x2a\x3b\xcb\xc5\x00\x1d\xfd\x38\x15\x17\x5a\x88\xf7\x05\x6c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc10) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x1c\x97\xda\x5f\xc5\xa9\x64\x0f\x28\x96\x22\x84\x24\x08\xcb\xa2"),
+ .iv = TEST_DATA_STR ("\x6d\x76\x5a\x98\x8e\x93\x45\x88\x16\x3e\x29\xb7"),
+ .aad = TEST_DATA_STR (
+ "\x10\x26\xa5\x90\x81\x6d\x2e\x1a\xa6\x7a\xa0\xd1\x3d\x50\xa8\x41\x3a\xf4"
+ "\xd8\xee\x9b\x1f\xa5\xce\xb8\xde\xac\xc9\xf4\x1e\x8e\x76\x4b\x3a\xc1\x5f"
+ "\x98\x29\x5e\x88\x00\xad\xf6\xa7\x17\x54\x48\xcd"),
+ .tag = TEST_DATA_STR (
+ "\x49\x45\xa7\x9d\x5e\xdb\xb9\x34\xc5\xcf\x94\x39\x5c\x35\x9d\xeb"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc11) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x8d\xd4\x6f\x27\x1a\x20\x1c\xc2\x1c\xa0\x82\x32\x48\x15\x7e\x6b"),
+ .iv = TEST_DATA_STR ("\x18\x21\xb3\x10\xce\x2d\xba\x99\x9c\xdf\x75\x76"),
+ .aad = TEST_DATA_STR (
+ "\x34\xba\x40\x99\x97\xce\xba\x06\x5f\x4a\x54\x57\x07\x8a\x9e\x23\x2a\x84"
+ "\xf5\x94\x01\x1a\xec\xfd\xbf\xbd\x24\xa8\x02\xca\x12\x9e\x01\xcb\x13\x27"
+ "\xe2\x65\xb4\xa9\x00\x4f\xb4\xc5\x00\x3f\xff\xd3"),
+ .tag = TEST_DATA_STR (
+ "\x30\x4c\xc2\xcd\x2f\xcd\xd4\xab\xc8\x44\xbc\x9c\x1c\xbe\x02\x41"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc12) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x0c\x54\x5d\x95\x33\x3b\x6a\xcf\x8b\x29\x28\xf3\xef\xd0\x83\xde"),
+ .iv = TEST_DATA_STR ("\x31\xde\x89\xd0\x7e\x75\x77\x95\x6f\xa9\x5e\xf3"),
+ .aad = TEST_DATA_STR (
+ "\x55\x74\xd6\x5f\x5a\xff\xfb\x2d\x31\xcc\xa8\xf5\x8c\xf5\x94\x5b\x83\x55"
+ "\x3c\xd4\x5d\x2d\xba\x0e\x05\xfa\x54\xe4\x2a\xa3\xf5\xa0\x51\xe1\x62\x4d"
+ "\xe1\x6d\x4b\x93\xcb\xab\x79\x88\xc6\xd9\x5f\x8c"),
+ .tag = TEST_DATA_STR (
+ "\x4e\xd9\x1c\xfe\x90\xa4\x99\x00\xe0\x56\x56\x97\xbc\x82\xb6\x59"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc13) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x79\x0b\x39\xf3\x01\x38\x3a\x82\xb3\x77\xf5\x85\xd3\xbf\x0f\x26"),
+ .iv = TEST_DATA_STR ("\x2f\xd9\xc1\x42\xb5\xfc\x62\xe8\x7e\xff\xf1\xfd"),
+ .aad = TEST_DATA_STR (
+ "\x45\x63\x4e\x0a\xfc\x59\xae\x9f\x6e\x30\xf7\xf5\xfe\x43\xcf\x5a\x4e\x1f"
+ "\x78\xd0\xae\xbb\x9e\x5a\x7a\xd9\xd8\x6f\x25\x27\x8e\x52\x1f\x48\x45\xd4"
+ "\x9d\x6c\xb5\x33\xca\xc6\x43\x98\x39\x64\x7f\xd0"),
+ .tag = TEST_DATA_STR (
+ "\x69\x63\x7c\x3f\x92\x33\xda\x23\xf8\xdf\x7b\x09\xe8\xcf\xb2\x52"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad384_tc14) = {
+ .name = "128-GMAC 384-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x8f\x63\x65\x26\x32\xd0\x7b\x2a\x4a\x83\xc2\x6d\xed\xd3\x26\x57"),
+ .iv = TEST_DATA_STR ("\x74\x7b\xee\x0e\x1d\x46\x2a\x90\x16\xf1\x46\x8d"),
+ .aad = TEST_DATA_STR (
+ "\x9c\x00\xff\x96\x9b\x55\xa4\x97\xdc\x52\x3f\xa0\xce\xda\xa3\x39\xdc\x3c"
+ "\x6c\xe1\x8e\x61\xc7\xbf\x80\x0c\x36\x12\x01\x35\x1b\xc4\x97\x28\xc3\xbb"
+ "\x15\x06\x7e\x90\x61\x62\xee\x79\x1b\x8d\x33\x3a"),
+ .tag = TEST_DATA_STR (
+ "\xbd\x5a\x0c\xbf\x85\x9a\x61\x33\xa7\xf2\xd5\x04\xd9\x7c\xae\x05"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc0) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x20\xb5\xb6\xb8\x54\xe1\x87\xb0\x58\xa8\x4d\x57\xbc\x15\x38\xb6"),
+ .iv = TEST_DATA_STR ("\x94\xc1\x93\x5a\xfc\x06\x1c\xbf\x25\x4b\x93\x6f"),
+ .aad = TEST_DATA_STR (
+ "\xca\x41\x8e\x71\xdb\xf8\x10\x03\x81\x74\xea\xa3\x71\x9b\x3f\xcb\x80\x53"
+ "\x1c\x71\x10\xad\x91\x92\xd1\x05\xee\xaa\xfa\x15\xb8\x19\xac\x00\x56\x68"
+ "\x75\x2b\x34\x4e\xd1\xb2\x2f\xaf\x77\x04\x8b\xaf\x03\xdb\xdd\xb3\xb4\x7d"
+ "\x6b\x00\xe9\x5c\x4f\x00\x5e\x0c\xc9\xb7\x62\x7c\xca\xfd\x3f\x21\xb3\x31"
+ "\x2a\xa8\xd9\x1d\x3f\xa0\x89\x3f\xe5\xbf\xf7\xd4\x4c\xa4\x6f\x23\xaf"
+ "\xe0"),
+ .tag = TEST_DATA_STR (
+ "\xb3\x72\x86\xeb\xaf\x4a\x54\xe0\xff\xc2\xa1\xde\xaf\xc9\xf6\xdb"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc1) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x7a\xa5\x31\x88\xa9\xc5\x97\x12\x6a\x10\xd2\x48\x60\x3e\xbb\x62"),
+ .iv = TEST_DATA_STR ("\xaa\x45\xca\x5d\xac\x41\xa8\x25\xc4\x5d\x36\xbf"),
+ .aad = TEST_DATA_STR (
+ "\x41\x7f\xd5\x14\x7d\x56\xde\x0c\x74\x32\x95\x97\x82\x4e\xc2\x78\x8a\x34"
+ "\x4f\xb6\x0b\x40\x3e\xdf\x01\x87\xaf\xa1\x2e\x72\xa0\x50\x09\xbb\x70\xf8"
+ "\x3c\xca\xd1\x1e\xfa\x48\x7c\x19\x65\xcf\x84\xfe\xac\x06\x7c\x1f\xfd\xbf"
+ "\x53\x1f\xca\x97\xc5\x54\xf8\x75\xc4\xa1\xa1\xd3\xab\x3c\x53\xc8\xa7\x4e"
+ "\xf3\xee\x94\x15\xa8\x7e\x23\x16\x99\xc8\x2d\x76\x4d\xeb\xed\xa1\x81"
+ "\x32"),
+ .tag = TEST_DATA_STR (
+ "\x99\x7b\xf8\x46\x54\xbb\x96\x16\xc0\xcc\x9b\x45\xf8\x2c\x76\x73"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc2) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x72\xb5\x84\x8e\xd1\xd2\xba\xdb\xd4\x27\xe1\x6f\xc3\xb3\xe4\x4d"),
+ .iv = TEST_DATA_STR ("\xa8\x4c\x7e\x92\x8d\xc6\xe6\x37\x9a\x51\x3a\x20"),
+ .aad = TEST_DATA_STR (
+ "\x1c\x0d\xfc\xec\xbd\x7b\xb0\xe6\x80\xce\x04\x2d\x08\xb2\xd9\xa7\x41\x26"
+ "\x7b\xd1\xda\x76\x8d\xf2\xba\x08\x37\x92\x33\xa9\x97\x3f\x14\x92\x8e\x9d"
+ "\xa6\x35\x37\x68\xb9\xb2\x60\x1c\x03\x3f\xd9\x64\xb1\x6a\x16\xda\xaa\x3e"
+ "\xa3\x5a\xd7\xce\xf7\xe3\x1e\xb1\xf7\x34\x0a\xa3\x4e\x8b\xfc\x08\xb0\xa6"
+ "\xe6\x20\x52\x92\x57\x0c\xed\x43\x31\x68\x76\xd0\xd4\x99\xd9\x19\x2e"
+ "\x6b"),
+ .tag = TEST_DATA_STR (
+ "\x27\x0c\xd7\x86\xb9\x5e\x68\x20\xcd\xb6\x5a\x23\x1b\x75\x30\xed"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc3) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x6d\x05\x12\xeb\xf2\xe7\x3d\x63\xf4\x28\x49\xc5\x7f\x07\x3f\xd0"),
+ .iv = TEST_DATA_STR ("\xc1\xc4\x69\x27\xc7\x4c\x03\xf1\x93\x42\xc3\x3a"),
+ .aad = TEST_DATA_STR (
+ "\x28\xbf\x89\x03\xb2\xdf\xb7\xe6\x9f\x1a\x73\x51\x21\xc7\xef\xe9\xa4\xc4"
+ "\x2b\x6a\x29\x53\x27\xbc\xeb\x02\x46\xc8\x5d\x78\x2c\xe6\x2b\xf0\x75\xdb"
+ "\xdf\x6e\x8e\xc6\x58\x9c\x26\xd3\x06\x96\xcc\xce\xef\x03\x87\x0b\xd0\xab"
+ "\xfd\x26\xd3\x06\x00\xea\xfc\x65\x61\x37\x40\xb5\x4d\x77\x7d\x37\x9e\x8a"
+ "\xac\xf2\x41\xec\xfb\xa1\x1b\x06\x01\x86\xac\x06\x5d\xb1\x71\xaa\xb0"
+ "\x99"),
+ .tag = TEST_DATA_STR (
+ "\xa6\x86\xf5\x94\x1c\xeb\x51\x0e\x12\x6a\x63\x16\xe3\x40\x4d\xc0"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc4) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x64\x38\xbc\x79\x52\x0d\xef\x5d\xb5\x8e\x49\x63\x97\x74\x68\x7a"),
+ .iv = TEST_DATA_STR ("\xd6\x82\xb4\x74\x18\xce\xb5\xbc\x09\xc7\x13\xc2"),
+ .aad = TEST_DATA_STR (
+ "\xd2\x52\xb1\x64\xae\x55\x9e\xd1\x55\xc8\x41\x7b\x96\x65\x25\x29\xdf\x15"
+ "\x1f\x24\xcc\xf1\xce\x98\xd0\xc7\xdd\xf2\x93\xf4\xf1\x23\x66\x30\xa1\x9b"
+ "\x24\xdc\x23\x97\x8d\x33\x77\xa0\x99\x06\x5d\x0b\xa7\x1d\x4b\xb8\xa7\xdc"
+ "\x0c\xb7\x67\x60\xca\x7c\x4a\x0e\x12\xc8\xcb\x56\xc6\x10\x26\x46\x32\x3c"
+ "\x08\xc4\xf4\xf5\x62\x26\xfd\x5b\x71\xa8\x45\x90\x91\x3a\xd2\x0d\xa2"
+ "\x87"),
+ .tag = TEST_DATA_STR (
+ "\x04\xe7\x87\x96\xdb\xf4\x2e\x9f\xfa\x6b\xb9\xe3\x46\x58\x1f\x13"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc5) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x11\x7a\x0a\xa5\x92\xff\xf1\x7a\xe3\x6c\x94\x91\x7d\xb1\x6c\x65"),
+ .iv = TEST_DATA_STR ("\xc3\x53\x7b\xe6\x02\x9d\x54\xff\xef\xab\x27\x30"),
+ .aad = TEST_DATA_STR (
+ "\x29\xe9\x59\xb9\x68\x17\x54\x7a\xe0\x6b\xf8\x5f\xe1\x64\xe8\x2a\x26\x93"
+ "\xf8\x2a\x7a\xeb\x66\xd5\x35\xf0\xd2\xc3\xbf\xfd\x1b\xa1\x8e\x94\xef\x45"
+ "\x79\x39\xf0\xc0\x73\x3e\xda\x47\x38\xd1\x36\x38\x0f\xc8\x76\x07\x5c\x49"
+ "\x43\x22\x02\x37\xa5\x92\x9b\x01\xb3\x2d\xa2\xbc\x2a\x6a\xfd\x6a\xe1\xd8"
+ "\x9f\xd4\x70\x09\x38\x35\x96\x2f\xf6\x70\x8b\xb3\x9b\xa3\x65\x20\x2f"
+ "\x56"),
+ .tag = TEST_DATA_STR (
+ "\xb8\x7f\xcc\x4d\x5c\x48\x4e\x68\xea\x52\xc0\x1b\x55\xff\xa4\x38"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc6) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x5d\x99\x5a\x33\x8e\xd6\x0f\x8a\xb0\xb5\x9d\xa6\xc9\xa4\x0c\x52"),
+ .iv = TEST_DATA_STR ("\x27\x23\xc5\x4e\x31\xc5\xc5\x7f\x02\x36\xe8\x16"),
+ .aad = TEST_DATA_STR (
+ "\x23\x9c\x80\x68\x3f\xeb\x6a\xfd\x38\xf8\x75\x9a\x27\xcb\x5f\x35\x0f\xbc"
+ "\x2f\x75\x78\x38\xc4\x08\x58\xc9\xd0\x8f\x69\x9c\xc5\x6c\x42\x36\xf4\xa7"
+ "\x7b\xd8\x0d\xf0\xe8\xe4\x1d\x5f\x9b\xa7\x32\xdb\x2e\x0a\x3a\x5e\x95\x2e"
+ "\xde\x7b\xfd\xd5\xfc\xbe\xbd\x23\xd0\x72\x71\x13\x4d\xb5\xb8\x24\x61\x53"
+ "\x7c\x47\xe2\xca\x51\xb3\x48\xb0\x83\x0f\x5e\xe5\x75\xad\x4b\x44\x14"
+ "\xdc"),
+ .tag = TEST_DATA_STR (
+ "\x94\x35\x6a\x3b\xfa\xf0\x7f\x2e\xf0\xeb\xe3\xa5\x07\x07\x6b\x16"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc7) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xc8\xa8\x63\xa1\xeb\xaf\x10\xc0\xfc\x0e\x80\xdf\x12\x44\x4e\x6e"),
+ .iv = TEST_DATA_STR ("\xc3\xe8\xcd\xf0\x86\x82\x7f\xee\x70\x95\xd0\xea"),
+ .aad = TEST_DATA_STR (
+ "\x99\x27\xda\x88\xc5\xd3\x36\x25\x66\x99\xc7\x68\x45\xe9\x46\xdc\x53\xc8"
+ "\x7b\xf0\xe1\x1e\x4b\xec\x94\x50\x98\x16\x02\xb3\x20\x10\xd2\xb5\x2b\xfc"
+ "\x91\x28\x3a\x63\x29\xd4\x55\x59\x89\x98\xed\xe2\xe6\x1e\x35\x2e\x55\x31"
+ "\x10\x15\x4b\x4d\xa5\xce\x66\x8d\x66\x4b\x83\xf6\x71\xc0\x10\xbf\x22\x0b"
+ "\x7d\x32\xb3\x4f\x4c\xa6\x9b\x66\xcc\x87\x23\x3d\x79\x23\x37\xcb\x2b"
+ "\xff"),
+ .tag = TEST_DATA_STR (
+ "\x09\x88\x37\xde\x27\x70\x7e\xa3\x59\x3e\x31\xce\xb8\x27\x67\x32"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc8) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x69\xcc\x28\xb1\x61\xf2\x14\xa5\x80\xe6\xba\x4b\xc2\xe3\xde\x9d"),
+ .iv = TEST_DATA_STR ("\xf2\xa5\x66\xf9\xcf\x83\xfd\x28\x0c\x8f\xe0\x8e"),
+ .aad = TEST_DATA_STR (
+ "\xf8\xc5\x26\x3a\x4e\x06\xb4\x9e\x18\x45\x89\xa1\xe0\x71\x97\x86\x43\xc3"
+ "\x53\xaa\x27\xb4\x81\x7f\xe3\x9e\x45\xab\xc4\x42\xe2\x2a\xb5\xd6\x83\xbc"
+ "\xee\x5d\xbb\xd5\x89\xfa\x58\x3f\x17\x1b\xb5\x95\x36\xad\xdd\x2b\x6c\xef"
+ "\xd4\x98\x23\x41\x30\x05\xef\xb2\xa6\x65\xe2\x6a\x60\x29\xc9\x27\xd3\x89"
+ "\x1c\xb0\xd4\xf2\x3e\x8c\xcc\x60\xcf\xd0\x2c\xe8\x97\x8c\x45\x1d\xdc"
+ "\x11"),
+ .tag = TEST_DATA_STR (
+ "\xc9\xc8\x06\xcb\x8b\x1a\x88\x98\x09\x69\x5c\x2e\xc5\xa7\xa8\x6e"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc9) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xbb\xf3\x59\x20\xfc\xab\x2c\xed\xaa\xfd\xf3\xf0\x03\x21\xf5\x44"),
+ .iv = TEST_DATA_STR ("\x2c\x7e\xe3\xff\x1d\xf8\x4f\x36\x50\xbc\x92\x98"),
+ .aad = TEST_DATA_STR (
+ "\xa7\x5f\x50\xba\x9a\x50\xf4\x87\x99\x59\x4b\x61\x95\xb3\x12\x5e\xd9\x2d"
+ "\xf7\x31\x44\xbf\xcb\x62\x4c\xe6\x73\x23\xd8\x34\xba\x1a\xfa\xf0\xdf\x4c"
+ "\x6c\x02\x2c\x11\xd4\x8b\xd7\x5c\x86\x67\x5a\x59\x27\xac\x12\x50\x03\x0f"
+ "\x72\x0f\x97\x49\x8d\x4f\xe0\x78\x7b\xae\x65\x5d\xc5\x53\x7a\xc1\xbc\xac"
+ "\x19\x8a\x89\x3f\x9a\xf7\xc2\xef\x9b\x97\x1d\xd6\x4f\x7e\x7b\x62\x60"
+ "\x3e"),
+ .tag = TEST_DATA_STR (
+ "\xc7\xcd\x3f\x93\x8f\x4a\xb1\x86\x42\xd8\x62\x34\xed\xfc\x17\xed"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc10) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x96\x90\xde\x66\x97\x02\xba\x72\xae\xb9\x34\xf5\xac\x50\xe0\x3c"),
+ .iv = TEST_DATA_STR ("\xda\x87\x13\xfe\x2b\x20\x58\xc4\x38\xaf\xf2\x60"),
+ .aad = TEST_DATA_STR (
+ "\xf3\x0e\xe9\x50\xda\x37\xc7\x22\x4b\x5c\x93\xe9\xa2\x9c\xaf\xdb\xf8\xe2"
+ "\x07\x0f\x65\xc2\x26\x24\x4b\x1a\x68\x34\x59\xe0\xc5\xc1\x1c\x9b\x77\xc8"
+ "\xfc\x28\x6d\x42\x98\xa5\xb9\xcd\x1f\xee\x3e\x13\xd4\x69\x0a\x88\x78\x0d"
+ "\x35\xb5\x58\xb5\xd9\xe5\x2b\x1a\x67\xfc\x88\x57\x07\x66\x91\xdc\xa7\xf5"
+ "\xfe\x8e\xf2\x20\x65\xcc\x5d\x9c\x00\x3f\xfd\x25\xeb\xe2\x3e\x61\x44"
+ "\x0e"),
+ .tag = TEST_DATA_STR (
+ "\x7f\x92\x91\x45\x18\xdd\xbe\x84\x2b\x06\x77\x1f\x64\xc4\x0f\x59"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc11) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xe5\xd8\xc6\xe2\xac\x69\x35\xc8\x5e\x81\xee\x0e\xf7\x23\xea\xcf"),
+ .iv = TEST_DATA_STR ("\xc7\x31\x40\xee\x90\xcc\x1d\xcf\x88\x45\x7d\xa2"),
+ .aad = TEST_DATA_STR (
+ "\xf6\xc2\x67\xa6\xae\x5c\xe3\xcf\x4b\xcd\xf5\x9c\xfd\x1f\x77\x7c\x66\x13"
+ "\x3e\x0e\xc4\x77\x27\x85\xf3\x3e\x5f\xa8\x00\xd3\x10\xb2\x4b\x57\x73\xbc"
+ "\x60\x3a\x76\xb3\x0f\xc3\x23\x28\xa8\xe4\x0f\x02\xf8\x23\xa8\x13\xa9\xe4"
+ "\xb4\xfa\xc7\x26\xe9\x92\xc1\x83\xbd\x08\x15\x11\x1c\x1d\x3a\x35\x88\x4a"
+ "\x4e\xff\x32\x02\x7b\xa6\x0d\xba\x67\x9b\x46\x9a\xf3\x1b\xc5\x0c\x05"
+ "\x91"),
+ .tag = TEST_DATA_STR (
+ "\xf9\x38\xfd\x0d\x8c\x14\x8d\x81\x76\x51\x09\xdf\x66\xda\xc9\xaa"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc12) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xe2\x34\x58\xf6\xb3\x04\xc2\xd8\xfe\xb3\xde\xdd\x37\x41\xbc\x24"),
+ .iv = TEST_DATA_STR ("\x46\x19\x03\x6b\x50\xba\x01\x2f\xe5\x0b\xe1\xd7"),
+ .aad = TEST_DATA_STR (
+ "\x74\xbf\xdc\x6b\xc4\xbf\xc3\x8d\x66\x6b\x98\x5c\xfe\x04\x3c\x67\x79\x8b"
+ "\x2d\xb9\x8f\x14\x92\x68\xdb\xa2\x44\x36\xca\xb8\x3e\x9a\x91\xf2\x44\xff"
+ "\xc5\x74\x8c\x93\xf8\xdf\x33\x9a\xe2\x4b\xa4\x31\x8c\x50\xda\x01\x1a\xb3"
+ "\x68\xd3\x16\x7c\x16\xe5\x03\x30\x9b\x01\x35\x1a\x11\xf1\x4d\x06\x7c\xc6"
+ "\x76\x9b\x99\x89\xc7\xd9\x52\xe3\x31\x50\x11\xee\x2e\xa0\x34\xdb\x8c"
+ "\xb8"),
+ .tag = TEST_DATA_STR (
+ "\x60\x53\xab\x80\xc7\x46\x82\x1e\xc5\x0c\x97\xe5\xa1\x42\x4a\x85"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc13) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x53\x72\xac\x5d\x3b\x08\xd8\x60\x91\x91\x10\xbd\xeb\x7f\x31\xdf"),
+ .iv = TEST_DATA_STR ("\x06\xca\x97\x9d\x8c\x25\x0d\x9b\x7b\xe4\x55\x73"),
+ .aad = TEST_DATA_STR (
+ "\xe1\xf9\x58\x83\x4e\x63\xc7\x5c\x8c\x75\x8b\xaf\xaa\x2f\x25\x7e\xa5\x68"
+ "\x9d\x0d\x55\xb8\x77\xb4\xd6\x7b\x8b\x73\xc2\x5c\xe2\x4e\x9b\x09\x4b\x97"
+ "\x6d\xb9\x20\xa1\x59\x96\x8d\xa9\xd3\x3c\x51\x1a\xa8\x99\x9a\xba\x42\xb8"
+ "\xbb\x88\x6e\x65\x45\xdd\x10\x86\x93\x15\x0a\xf3\x57\x49\x6b\xb5\x89\x8b"
+ "\x4e\x8f\x72\x5d\x50\xef\x47\x4a\xfb\x83\x6a\x33\x58\xda\x22\x17\xbb"
+ "\x93"),
+ .tag = TEST_DATA_STR (
+ "\x93\x38\xe1\x4f\xe0\xb0\x8a\x96\x9a\x10\x4c\x82\x85\x28\xa6\xa4"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac128_aad720_tc14) = {
+ .name = "128-GMAC 720-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_128_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xbf\x1c\xb4\x9e\x98\x0c\xec\x0b\x15\x3f\xe3\x57\x38\x75\xac\x6c"),
+ .iv = TEST_DATA_STR ("\x54\x26\x66\x9d\x25\x52\x40\x36\xfb\xe8\x1e\x89"),
+ .aad = TEST_DATA_STR (
+ "\xb3\x36\x94\x97\x66\xe9\x94\x8a\x7e\x6f\x36\xa2\xd3\x77\xb8\x4a\x25\xc4"
+ "\xb4\x98\x87\x94\xf3\xde\xab\x7a\xf4\xb1\x4a\x12\xda\xc6\x41\xe2\x5f\xe2"
+ "\xae\x9f\xf5\x34\x50\xac\xe1\x51\x3a\xcd\x0b\x28\x4a\x49\x0b\x45\x5f\x04"
+ "\xf4\x0a\xf9\x44\x18\xc8\x79\x2e\xc1\xa0\x98\x3f\xb1\xd9\xa3\x1d\x93\xdc"
+ "\x3e\xd2\xc7\x5e\x6a\x6c\xe0\x92\x11\x1e\xab\xad\x03\x9b\xac\x2a\x49"
+ "\xf6"),
+ .tag = TEST_DATA_STR (
+ "\xe2\x99\x6a\x2b\x3b\x6b\xf5\x22\x17\xcf\xc4\xd0\xf5\xbb\x35\x1b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc0) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xaa\x74\x0a\xbf\xad\xcd\xa7\x79\x22\x0d\x3b\x40\x6c"
+ "\x5d\x7e\xc0\x9a\x77\xfe\x9d\x94\x10\x45\x39"),
+ .iv = TEST_DATA_STR ("\xab\x22\x65\xb4\xc1\x68\x95\x55\x61\xf0\x43\x15"),
+ .tag = TEST_DATA_STR (
+ "\xf1\x49\xe2\xb5\xf0\xad\xaa\x98\x42\xca\x5f\x45\xb7\x68\xa8\xfc"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc1) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x1b\xb1\xd6\xa3\xbf\xc7\x48\x78\x6f\x39\x51\xe4\x3c"
+ "\x18\x05\x4b\xfc\x8c\xe6\xab\x3d\xc3\xd3\x98"),
+ .iv = TEST_DATA_STR ("\xfe\xa5\x6a\x5c\xe5\xf7\xd4\xc8\x16\x80\x19\x5d"),
+ .tag = TEST_DATA_STR (
+ "\x0b\x0b\xc0\x76\x8b\x02\xf1\x26\xa2\x9b\xcb\x14\x4a\xbc\x6e\x4b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc2) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xb6\x28\x0d\x41\xff\x79\xec\x0a\x8d\xb1\x7c\x1e\x22"
+ "\xf4\x3c\x5f\xdc\x92\x89\x88\x46\xe6\x46\xb5"),
+ .iv = TEST_DATA_STR ("\xc4\x78\x88\xc1\x48\xb7\xdc\x05\x5f\x99\xaa\x08"),
+ .tag = TEST_DATA_STR (
+ "\xe3\x05\xfa\x02\x47\x24\x60\x6e\x14\x03\x26\x95\x9b\xfc\x53\x18"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc3) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x02\x87\xa6\xe3\xf9\x00\x56\xcb\x4b\x3d\x3f\x72\x23"
+ "\xe8\xd0\xaf\x58\xc0\x9f\x15\x6c\xb3\xb8\x05"),
+ .iv = TEST_DATA_STR ("\xea\x0c\xd8\x6c\x79\x7c\x34\x2c\xfb\xb5\xa9\xcc"),
+ .tag = TEST_DATA_STR (
+ "\x54\x9e\xc0\xd0\xff\xcf\xc1\x38\x1e\xd7\x09\xc3\xea\x8e\xac\xda"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc4) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x47\x3e\xa0\xd2\x98\xf6\x45\xcb\x01\xfa\x64\x83\x7b"
+ "\x47\x09\xb2\x83\x74\xf3\xe9\x55\xe7\xfb\xa2"),
+ .iv = TEST_DATA_STR ("\x25\x5b\x70\x4f\x33\xad\x24\xbb\x93\xd2\x67\x3a"),
+ .tag = TEST_DATA_STR (
+ "\xec\xbe\xe0\x40\xa6\xba\x2b\xd2\x71\x0f\x0d\xa5\x8b\x10\x96\xc2"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc5) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x6d\xa0\xbd\x47\x43\x24\x4d\xc4\xb3\xbe\x99\x3a\xb3"
+ "\x95\x4c\xcd\xc9\x00\x77\xff\x31\x1e\xa6\x57"),
+ .iv = TEST_DATA_STR ("\xd1\x9f\xaf\x29\xfe\xaf\xd0\x70\x83\x94\x38\x16"),
+ .tag = TEST_DATA_STR (
+ "\xbd\xd8\xdb\xa9\x1c\xf8\xaf\xfc\x30\x9f\x91\xb3\xe2\x39\x18\xaa"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc6) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xc9\xaa\x36\xf1\x7b\x9e\x57\xcf\xac\xb9\x28\xe1\x34"
+ "\x66\x00\xc2\x2d\x36\x7d\x18\x28\x54\xb4\x64"),
+ .iv = TEST_DATA_STR ("\x6d\x12\x0c\x4a\x05\xe2\xb4\x52\xcc\x22\x13\xc4"),
+ .tag = TEST_DATA_STR (
+ "\xd2\x3a\x67\x29\x61\x9f\xa7\xc3\x58\x79\x4b\x63\xbf\x5e\xe8\xd7"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc7) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x5e\xde\xf8\xc4\x17\x1c\x44\x58\x7c\xbb\xf0\xf4\x2d"
+ "\xc7\xdc\x05\xb3\x6c\x84\x91\xd6\x7c\x75\x82"),
+ .iv = TEST_DATA_STR ("\x21\xf2\x65\xfe\x1e\x26\xff\xe4\x91\xa0\xe5\x94"),
+ .tag = TEST_DATA_STR (
+ "\x9e\x73\xbc\x5f\x26\xd5\xf1\xc8\x5f\xd5\xc0\xdf\x48\x63\x61\x04"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc8) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x44\x17\x16\x09\x6e\x80\xa1\xa8\x62\x86\xd2\xe1\xdf"
+ "\x48\x94\xae\xf5\xfa\x91\x4d\x7f\x6b\xde\xda"),
+ .iv = TEST_DATA_STR ("\xb0\x10\x72\xd8\x25\xeb\x24\xba\x3c\x0a\xbc\x95"),
+ .tag = TEST_DATA_STR (
+ "\x28\xaf\x88\xfe\x13\x40\x68\xe9\xc8\x44\x5a\x19\x47\x84\x3e\xd2"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc9) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xd3\x6e\x99\x04\xe0\xab\x25\x51\xc8\xa4\x12\x56\xbc"
+ "\x66\xad\x25\x37\xf4\xd7\x8c\x56\x18\x33\x73"),
+ .iv = TEST_DATA_STR ("\xfc\xc8\x50\xfa\xc3\x38\x79\x4f\x3a\xdf\x50\x50"),
+ .tag = TEST_DATA_STR (
+ "\xa8\x8c\x92\xd6\xec\x0a\xbe\x95\x0c\x14\x7a\xf3\xb6\xbf\xae\xca"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc10) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x50\x04\xb2\x3a\xa5\x4b\x45\x3c\xe7\x98\xd0\xfa\xe7"
+ "\x07\x35\x00\xc4\xfb\xd4\x94\xc3\x50\xc5\xd0"),
+ .iv = TEST_DATA_STR ("\x31\x11\x9a\xc3\x7e\x06\x63\x25\x05\x48\xd8\x9a"),
+ .tag = TEST_DATA_STR (
+ "\x1b\x47\x45\x91\x10\x76\x4a\xae\x49\xf9\x44\xaf\x0c\x74\xd1\xf3"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc11) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xd7\xfe\x59\x41\x1c\x9a\x7e\xf5\x5c\xf5\x6e\xee\xe2"
+ "\xd9\x60\xc5\xcd\x65\x5f\x4f\xab\xce\x69\x69"),
+ .iv = TEST_DATA_STR ("\x56\x00\x39\x99\x1d\x07\x81\x71\xb8\x6e\x2e\x36"),
+ .tag = TEST_DATA_STR (
+ "\x33\x25\xa6\x83\x1b\x9a\x8d\xa5\x26\xad\x3a\x9c\x30\xbf\x89\x64"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc12) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x00\x60\xc6\x4b\x6b\x16\x81\xe4\x60\x30\x0a\x17\x63"
+ "\x39\x88\xee\xfc\x6f\xc2\xcb\xd4\x7a\xe6\xc9"),
+ .iv = TEST_DATA_STR ("\x14\xd0\x54\x6f\xda\x5d\x9c\x36\x46\x18\x9d\xd4"),
+ .tag = TEST_DATA_STR (
+ "\x66\x78\x4d\x25\xfb\x39\xfa\xcb\xdd\x80\xae\xfa\x7d\xa0\xf0\x2f"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc13) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x9c\x4f\x21\xe3\x7b\x91\xfe\x41\x9e\x35\xb6\xfc\xdc"
+ "\x4e\x70\xd0\x32\x55\x75\xf9\x11\xc1\x3b\x43"),
+ .iv = TEST_DATA_STR ("\xf5\x7a\xf6\x28\x46\xb2\x71\xe8\x02\xd6\x70\x1f"),
+ .tag = TEST_DATA_STR (
+ "\x6b\x1e\x00\x9e\x6e\xdf\x78\x9f\xc9\x43\x85\x73\x4d\xd5\x7d\x2f"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad0_tc14) = {
+ .name = "192-GMAC 0-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x8b\xed\xa5\x6e\xe0\xe1\xe6\xd8\x09\x8b\xa2\x67\xf0"
+ "\x9b\x96\xd8\x9a\x3a\x46\x22\xa6\x41\xe7\x9d"),
+ .iv = TEST_DATA_STR ("\x81\x29\x96\x6d\x15\xbd\xb7\x0e\x0d\x2f\xcc\xef"),
+ .tag = TEST_DATA_STR (
+ "\x28\x73\xdf\x0e\x03\x54\x86\x46\x81\x46\x30\xe0\xca\xc1\xe4\xe5"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc0) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x41\xc5\xda\x86\x67\xef\x72\x52\x20\xff\xe3\x9a\xe0"
+ "\xac\x59\x0a\xc9\xfc\xa7\x29\xab\x60\xad\xa0"),
+ .iv = TEST_DATA_STR ("\x05\xad\x13\xa5\xe2\xc2\xab\x66\x7e\x1a\x6f\xbc"),
+ .aad = TEST_DATA_STR (
+ "\x8b\x5c\x12\x4b\xef\x6e\x2f\x0f\xe4\xd8\xc9\x5c\xd5\xfa\x4c\xf1"),
+ .tag = TEST_DATA_STR (
+ "\x20\x4b\xdb\x1b\xd6\x21\x54\xbf\x08\x92\x2a\xaa\x54\xee\xd7\x05"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc1) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xae\xf7\x60\xf0\xcb\x88\x11\xf4\xab\x4a\x05\xfa\xa3"
+ "\x5f\xe8\xb9\x85\x4a\xde\x54\x8e\x04\x0e\x7f"),
+ .iv = TEST_DATA_STR ("\xd5\xda\xed\xc1\xd4\x95\x9a\x5d\x74\x4b\xc5\xf2"),
+ .aad = TEST_DATA_STR (
+ "\xae\xa8\xce\x76\xe3\xcf\x40\xd4\x73\xf6\x1a\x08\xd5\x9e\x53\xf5"),
+ .tag = TEST_DATA_STR (
+ "\x92\x6c\x61\xde\xf0\x2e\xd3\x0e\xd1\x58\xe3\x55\xac\x5d\x57\x10"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc2) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x12\x00\x0b\x16\x46\x22\x25\x44\x6c\xfe\x6e\x80\xd8"
+ "\x4d\x47\x1d\xbc\x9a\xa3\xf4\x78\xd4\x65\x83"),
+ .iv = TEST_DATA_STR ("\x6c\x88\x3c\xb8\xf9\xff\x7e\x57\x90\x34\x52\x4e"),
+ .aad = TEST_DATA_STR (
+ "\x23\x27\x13\xb8\xde\x07\x44\xb0\x82\x51\x54\x9a\xaa\x19\x15\x4f"),
+ .tag = TEST_DATA_STR (
+ "\x20\x94\xf4\x98\x9f\x85\x0a\xf3\xbb\xfc\x48\xb1\x89\x5e\xc1\xde"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc3) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xb3\xe8\xa0\xd0\x5d\xbc\xd4\x53\xbd\xc8\xec\x09\x75"
+ "\xf2\xbb\x06\x3a\x21\xd0\x39\x1d\xc9\x46\x45"),
+ .iv = TEST_DATA_STR ("\x3f\xaf\x8b\xf9\x1d\x4d\x95\xa7\xf9\x62\x8a\x65"),
+ .aad = TEST_DATA_STR (
+ "\x6e\x69\x26\x61\x76\x14\xbe\xf6\x15\x3a\x4c\xe6\x29\xa9\x1b\x69"),
+ .tag = TEST_DATA_STR (
+ "\xac\xbb\x55\xb7\x10\x2e\x86\x17\x75\x42\xbc\x5a\x7f\xc7\x17\xa1"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc4) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xdf\x91\x67\x43\xd0\x21\x80\xa2\x88\x80\xbc\x45\xe0"
+ "\xd0\xb7\x37\x47\x74\x69\x26\x01\xeb\x55\x56"),
+ .iv = TEST_DATA_STR ("\x48\x31\x18\x9f\x72\x75\x17\xd7\xf4\x22\xf1\x2f"),
+ .aad = TEST_DATA_STR (
+ "\x4a\x4e\x9b\x8c\xda\x53\x57\xf9\x5c\x37\x26\x48\x64\xe3\x89\xa9"),
+ .tag = TEST_DATA_STR (
+ "\xde\x3b\x10\xaf\x6d\x8c\x1e\x3c\xd5\x80\x20\xce\xac\x9c\x5f\x41"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc5) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xcd\x58\xa2\xca\x49\x5b\x28\x14\x79\x61\x61\x91\x1a"
+ "\xf0\x82\xa8\x52\xc8\xcb\xd3\xfe\x1c\xed\xb4"),
+ .iv = TEST_DATA_STR ("\x78\x66\xd4\x58\x8a\xce\x52\xed\x1d\x07\xd3\x46"),
+ .aad = TEST_DATA_STR (
+ "\xe0\xe6\xf8\x5c\x52\xab\xa6\x87\x3d\x7d\xb5\x0d\x80\x2e\xd6\x16"),
+ .tag = TEST_DATA_STR (
+ "\x0d\x2b\xd7\xc5\x1f\x7f\x88\x16\x20\xbf\x50\x8f\x4b\x66\x2d\xa6"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc6) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xd8\x0f\x1f\x01\xe6\x6c\xa7\x04\x1f\x12\xa9\xde\xc4"
+ "\x6c\xed\xfd\xf7\x5a\xef\x66\x4c\x58\xb2\x33"),
+ .iv = TEST_DATA_STR ("\x24\x08\xb3\x5e\x9b\xa6\x93\xe0\x89\x31\xf7\xf3"),
+ .aad = TEST_DATA_STR (
+ "\x28\x03\x5a\x77\xe8\xb1\xdf\x98\x20\x9b\xd5\x29\xe4\x72\xbe\x1c"),
+ .tag = TEST_DATA_STR (
+ "\xc1\xa2\x9c\xb9\x1f\x13\x12\xb8\xc6\xc8\x6a\xd8\x33\xa9\x73\x74"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc7) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x58\x29\x1d\xe8\x38\xf4\x7f\x6b\x30\xf2\xeb\x4f\x55"
+ "\x6b\xf2\xfd\x81\xb8\x49\xb9\xe8\x76\xf0\x48"),
+ .iv = TEST_DATA_STR ("\xdb\x4f\xa1\xcc\xc0\xed\x55\xdb\xe5\x33\xee\x90"),
+ .aad = TEST_DATA_STR (
+ "\x57\x6b\x6e\xaf\x76\x21\x1b\xe4\xd5\x40\x36\x3b\x23\xac\x29\x9d"),
+ .tag = TEST_DATA_STR (
+ "\x9d\x3f\xc9\x6b\xa6\xcc\x39\xaa\x30\x74\xb0\x1e\xe2\xcb\xa4\xd4"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc8) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xab\x3c\x5c\x4a\x37\x2e\xc0\x5f\xeb\x74\x23\xa5\x55"
+ "\xed\x6c\xc6\x6c\x5d\x3b\xd8\x55\x7e\xff\xa7"),
+ .iv = TEST_DATA_STR ("\xfb\x79\x7a\x5f\xa6\x3a\x38\x88\x0e\xd3\x80\xc6"),
+ .aad = TEST_DATA_STR (
+ "\x67\x63\xc5\x0c\x5d\xe0\xdb\x7f\x67\x5f\xe1\x6d\x0a\x5d\x5a\x79"),
+ .tag = TEST_DATA_STR (
+ "\x6a\xe6\xc7\x8d\xe5\xdf\xea\x5c\xb3\xe9\x6e\xe9\x59\x71\x37\x41"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc9) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x86\xcb\xf5\x79\xe9\xb0\x77\x0e\xc2\xa1\x37\x21\x8e"
+ "\x9f\xf7\x1a\xeb\xf0\x51\xf6\x4a\x31\x8c\x74"),
+ .iv = TEST_DATA_STR ("\x4d\xf9\xe3\xdd\x72\x0d\xce\x9e\xcc\xb3\x81\x76"),
+ .aad = TEST_DATA_STR (
+ "\xba\xbf\x21\xb7\x2e\x05\x67\xf2\x2e\x6f\xb1\x72\x11\x5b\x61\x2f"),
+ .tag = TEST_DATA_STR (
+ "\x51\x86\xbd\x05\x73\x93\x81\x1b\xc9\xc2\x8e\x8e\xb7\x71\x4b\x32"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc10) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xfd\x39\xed\xc5\x1d\xf0\x8e\x69\xf3\x97\x68\xdd\xff"
+ "\x3f\xa9\xa7\xf6\x76\x42\xd7\x3b\x2c\x33\xdd"),
+ .iv = TEST_DATA_STR ("\xf0\xf8\x07\x50\x24\xbb\x50\x97\x82\x79\xc5\x37"),
+ .aad = TEST_DATA_STR (
+ "\x7c\x95\xd4\xa5\x59\x15\xcf\x13\x7d\x3f\xa2\xbc\x0b\x9d\x5e\x99"),
+ .tag = TEST_DATA_STR (
+ "\xe9\x61\xe7\x9c\xd3\x49\x46\x1a\x14\x3b\x13\xe6\x2c\xf6\x9d\x3f"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc11) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x31\xc4\x45\x71\x32\x99\x96\x28\x7e\x98\xfc\x3d\xc7"
+ "\x18\x15\x68\xcd\x48\xa3\x35\xfd\x37\x97\x2f"),
+ .iv = TEST_DATA_STR ("\xb2\x34\x1f\xaa\x66\x1d\xc0\x49\x25\xf5\xa6\xb5"),
+ .aad = TEST_DATA_STR (
+ "\x2a\x0e\x83\xf4\xff\x96\x7e\xdd\xdc\x09\xdd\xc4\xc1\x69\xd5\x5d"),
+ .tag = TEST_DATA_STR (
+ "\x9b\xd9\x1d\x5d\xf6\x8a\xfc\x6d\x45\xbe\xbd\xe9\x0f\xcd\xb1\xee"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc12) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x23\xf9\x54\x34\x4d\x93\xa2\x7d\x5a\xbb\xb1\x92\x0a"
+ "\xd8\xe8\x9a\x05\x10\x67\x21\x71\x31\x64\x48"),
+ .iv = TEST_DATA_STR ("\x56\x0f\x42\x9c\x3d\xf4\x31\x41\x3e\x08\x6a\x75"),
+ .aad = TEST_DATA_STR (
+ "\x2d\x78\x32\xa2\xc4\x6b\x63\x44\xfe\x35\xf1\x48\xb5\xbf\x64\x1d"),
+ .tag = TEST_DATA_STR (
+ "\x6d\xd3\x5d\x46\x8e\xfd\xc9\xc9\x73\x97\x82\x33\x20\xc9\xb0\x69"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc13) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x11\xfa\xfc\xf1\x40\x9c\x57\x52\x05\x3d\x5c\xd1\x07"
+ "\x70\xce\xf2\x70\x77\xdf\x64\x55\xfb\x27\x3b"),
+ .iv = TEST_DATA_STR ("\xa6\x90\xc8\x70\x54\x65\x8e\xdc\x49\x94\x14\xd8"),
+ .aad = TEST_DATA_STR (
+ "\xd1\x51\x2c\x14\x46\x12\xb4\x5a\x77\x42\x65\x88\xc1\xc0\x25\x4d"),
+ .tag = TEST_DATA_STR (
+ "\xb1\x40\x66\x1e\xa4\xa7\x93\xbc\x67\xda\xa0\xfa\x00\x9a\x18\x5b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad128_tc14) = {
+ .name = "192-GMAC 128-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xd7\x1e\x1c\x94\xb2\x11\x84\x08\x8e\x6a\x63\xf3\xca"
+ "\xba\x9a\x9c\xcf\x4a\x15\xf0\xbc\x53\xfb\x02"),
+ .iv = TEST_DATA_STR ("\x82\x7c\xd7\x65\xa6\xdc\x8e\x4d\xe2\xe7\x66\x49"),
+ .aad = TEST_DATA_STR (
+ "\x4f\xc6\x66\xa1\xcf\x04\xcf\xdb\x0f\x5f\x68\x1b\x6f\x19\x86\xbb"),
+ .tag = TEST_DATA_STR (
+ "\x9c\xf4\x07\xee\x84\x47\x6d\x54\x8e\x05\x93\x9c\x3b\xeb\x9f\x53"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc0) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x09\x3e\xf7\x55\x1e\xbb\xff\x8e\xb0\xc0\xa8\xa4\xa6"
+ "\x2b\x19\x8f\x0c\x2e\x83\x8d\xe1\x0e\xee\xee"),
+ .iv = TEST_DATA_STR ("\xe6\x56\xe9\x39\x30\xed\x52\x10\xba\x3f\x03\x22"),
+ .aad = TEST_DATA_STR ("\x3d\xa2\x2d\xac\xfd\x11\xb2\x1b\x0a\x71\x31\x57\xf6"
+ "\x0a\xec\x0c\xd2\x2f\x1a\xdd"),
+ .tag = TEST_DATA_STR (
+ "\x1b\x2d\x27\x64\x57\x3e\x20\xae\x64\x0b\xf2\x9d\x48\xe5\xfe\x05"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc1) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x08\x63\xe6\xe0\xe9\x76\x3a\x30\x21\xed\x49\x45\x81"
+ "\x2b\xec\xf2\x7b\x82\x20\xd4\xb3\x29\x73\x57"),
+ .iv = TEST_DATA_STR ("\xad\xb4\xe4\xe6\x29\xcf\x4a\x86\x54\x0e\xfe\x1e"),
+ .aad = TEST_DATA_STR ("\x58\x14\x24\xd6\x33\xf3\xf9\x69\xd1\xb4\xf8\x35\x19"
+ "\x7a\x74\x0a\x69\x5b\x2c\x3b"),
+ .tag = TEST_DATA_STR (
+ "\x79\x47\x52\x19\xe6\x34\x9b\x68\xac\x71\x27\xfb\x55\x11\xe9\x20"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc2) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xb4\x33\x29\x9e\x54\xca\xcc\x5e\x6f\x7b\x34\xc0\xa1"
+ "\xe0\x55\x52\x24\xa6\xa7\x66\xf8\xae\x21\x01"),
+ .iv = TEST_DATA_STR ("\x8f\x36\x11\xd4\xf6\x97\xae\x52\x48\x25\xd2\x39"),
+ .aad = TEST_DATA_STR ("\xee\xbc\x05\x29\x17\xef\xb0\x31\x65\x0f\x38\xce\x70"
+ "\x4e\x92\xd4\xbf\x59\xf9\x41"),
+ .tag = TEST_DATA_STR (
+ "\x91\xd1\x76\x2a\x7f\x19\xfe\x1e\x75\xdc\xa5\x1e\xc8\x5c\xe3\x19"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc3) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xf3\x73\xef\xaf\xb2\x39\x44\x55\xe0\x64\xb4\x2d\x23"
+ "\x4d\x21\xbd\xf4\x52\xdf\x03\x64\x52\xcf\x2c"),
+ .iv = TEST_DATA_STR ("\xd6\x63\x86\x02\x84\xd5\xb8\x33\x32\xa3\xa0\x25"),
+ .aad = TEST_DATA_STR ("\xee\x1e\x7f\x47\x29\x85\xa6\x39\x74\x39\xb2\x8a\x52"
+ "\x6b\x6c\xed\xf5\x95\x3a\xb1"),
+ .tag = TEST_DATA_STR (
+ "\xd8\x19\x77\x43\x49\xbf\x96\x3a\x9d\xf7\xed\x46\x26\x1f\xc5\xca"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc4) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xd5\x04\x53\xfd\x22\x3b\x11\x0d\x2d\xf3\x12\xc4\x81"
+ "\x7d\x97\x6a\x59\x19\x4a\xda\x77\x7f\x97\x41"),
+ .iv = TEST_DATA_STR ("\x6a\x88\xcd\x46\x85\x86\xcb\x22\x7b\x92\x85\x40"),
+ .aad = TEST_DATA_STR ("\x3d\x7e\x66\x93\xe1\x63\x9d\xed\x24\x2e\x3c\x0b\x93"
+ "\x1b\x32\xe7\x2a\xdc\x70\x5f"),
+ .tag = TEST_DATA_STR (
+ "\xb3\x88\xde\x6f\x83\x48\xee\xea\xda\x78\xf6\xe9\xe0\x04\xdb\xe5"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc5) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xe6\xe6\x86\xc7\xbe\xf0\x1b\x17\x04\xab\x1b\xf1\x14"
+ "\x99\x0c\xea\xad\x41\xbe\x84\x80\x17\x74\x60"),
+ .iv = TEST_DATA_STR ("\xbe\x55\x59\xdc\x38\xe9\x79\x4c\x24\xac\x7a\x83"),
+ .aad = TEST_DATA_STR ("\xd8\x21\x8c\xc4\x56\xa5\x4f\x79\x1a\x3f\xd8\x78\x90"
+ "\x89\x09\x46\x35\x3b\x9b\xfb"),
+ .tag = TEST_DATA_STR (
+ "\xe3\x94\xe6\xff\x9e\x9a\x75\x40\xb7\x42\xff\xf8\xdc\x92\x3a\x59"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc6) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x85\xba\x70\xce\x5a\xa7\xcb\x81\x95\x70\x74\xfa\xb4"
+ "\xdf\x72\xc4\x6c\x9e\xc4\x46\x6b\xa0\xb4\x8a"),
+ .iv = TEST_DATA_STR ("\xb9\x1d\x11\xf7\x2f\x6c\x5f\xab\x56\x53\x5e\x5c"),
+ .aad = TEST_DATA_STR ("\x4e\x8a\xfc\x98\xf8\x27\x20\xcd\x8e\x35\xea\x8f\x8a"
+ "\xff\x20\xd2\x39\xde\x14\x07"),
+ .tag = TEST_DATA_STR (
+ "\x41\x3f\xc7\x28\x3d\x57\x8b\xaa\x1e\x62\x13\xf5\x41\x59\x04\x62"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc7) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x7e\xd5\xb2\xa4\xbf\x74\x27\xdb\xb2\xf6\x4e\xfb\xcd"
+ "\x06\x76\xab\x91\x2f\x12\xaf\xae\x85\xf8\xf7"),
+ .iv = TEST_DATA_STR ("\x1a\x06\xb0\xa2\x07\xb8\x9c\x19\x3c\xfb\xdb\x20"),
+ .aad = TEST_DATA_STR ("\x93\xbf\x5a\xbe\x39\x7e\xe6\xa9\x79\xc3\x88\x7c\xb5"
+ "\x7a\xf0\x1d\xf8\x3d\xf2\x91"),
+ .tag = TEST_DATA_STR (
+ "\x10\xca\x8e\xe1\x68\x70\xb9\x51\xc9\x18\x0e\xa1\x85\x36\x50\xbb"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc8) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x28\x1c\x87\xcc\x27\x80\x53\x75\x78\x6e\x11\x78\xd1"
+ "\xb0\xe2\x2a\x7a\x85\x46\xcf\x6f\x2f\xe1\x2b"),
+ .iv = TEST_DATA_STR ("\xb9\xc5\x70\x39\x2f\x02\x53\x89\x05\x5c\x9c\x35"),
+ .aad = TEST_DATA_STR ("\x54\xc8\x26\xa0\xca\x02\x76\x33\x51\x59\xa7\x54\x2e"
+ "\x22\x8c\x3d\xae\xbd\x38\x9a"),
+ .tag = TEST_DATA_STR (
+ "\x56\x0a\x32\x1c\xff\x6a\x8c\x1e\xac\x06\x01\x49\xc5\x95\x5f\xf8"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc9) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x90\xa1\x96\xe8\xc5\xd5\x77\xa6\xc5\x14\x38\x1b\xad"
+ "\xdb\xba\x7e\xd8\xe4\xd1\xe0\xa7\x96\x1f\x32"),
+ .iv = TEST_DATA_STR ("\x1c\x2c\x7c\x8b\xd0\x15\x33\x68\xb2\xa8\xc3\x49"),
+ .aad = TEST_DATA_STR ("\x0f\x40\x9b\xa3\x68\xc2\xef\x04\x33\xb9\xbd\x96\xff"
+ "\x73\x51\x1f\xce\x63\x93\x18"),
+ .tag = TEST_DATA_STR (
+ "\x7a\x7a\xc7\x70\xa5\xa4\xc6\x6b\x78\x7a\xa3\xa1\x26\x7b\xa3\x45"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc10) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x36\x37\x62\x07\xa2\xc1\xf2\x5e\xc2\x0f\x62\x1b\x06"
+ "\xdb\x26\x0c\x20\xbe\x7b\x39\xe7\x0b\x89\x3c"),
+ .iv = TEST_DATA_STR ("\xc9\xe9\x2a\x66\x62\xa7\xd6\xbb\x84\x94\x5c\x95"),
+ .aad = TEST_DATA_STR ("\xb2\xcd\xe6\x02\x9e\x0f\x93\x30\x92\xe9\x74\x74\xdc"
+ "\x8b\x0b\x17\x4d\xe5\x53\x52"),
+ .tag = TEST_DATA_STR (
+ "\x80\x3b\x69\x53\x80\x1b\xf9\x79\x09\x64\x37\xe0\x2f\x3c\xb1\x31"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc11) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x68\xe6\x21\xb4\x76\xdd\x7d\xda\xda\x51\x56\xc0\xc5"
+ "\x65\xc4\xc0\x05\xbc\xf9\x93\x08\x2a\x6c\x68"),
+ .iv = TEST_DATA_STR ("\xac\x0b\xd5\x9b\xf7\xb9\x34\x5d\x01\xec\x7d\x99"),
+ .aad = TEST_DATA_STR ("\xe3\x18\xce\x39\xbe\xae\x93\x72\xde\xe2\xba\xc3\x56"
+ "\x8c\xa3\x7e\xf8\x71\x4b\x1f"),
+ .tag = TEST_DATA_STR (
+ "\xf8\x42\x6c\x2c\x39\x0a\x5b\xd5\xde\x2f\x4f\x31\xb8\x9a\x8f\xf8"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc12) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xe5\x76\x06\xe3\x9d\xc3\x6d\xca\xcc\xfb\x3a\x13\x23"
+ "\xb5\xc1\x80\x3c\xea\x6d\x76\xcd\x96\x44\x18"),
+ .iv = TEST_DATA_STR ("\xf6\x35\xc6\xe1\x2b\xf9\x39\x46\x5c\xd7\x10\x45"),
+ .aad = TEST_DATA_STR ("\xdf\x1b\xef\x3c\xeb\x77\xb6\x7f\xf6\xdc\x7a\x16\x63"
+ "\xc5\x72\xfb\x00\x22\x05\x59"),
+ .tag = TEST_DATA_STR (
+ "\x59\xd5\xf4\x63\x40\x2a\x08\x95\x4f\xa9\xd1\x65\x44\x9d\x95\x1c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc13) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x09\xab\x6b\x91\x2e\x3b\x17\x4d\xea\x38\xa7\x27\x0b"
+ "\x36\xc3\x88\xe1\x08\xbc\x76\x0b\xa4\x81\xbf"),
+ .iv = TEST_DATA_STR ("\x13\x2e\x01\x26\x95\xf1\xe9\xb7\x99\x50\x5c\xef"),
+ .aad = TEST_DATA_STR ("\xd7\x91\xd5\x0c\xd3\x13\xdb\x40\x60\x75\xc9\x7b\x12"
+ "\x8b\x07\x8d\xa5\xb6\x8c\xa1"),
+ .tag = TEST_DATA_STR (
+ "\x72\x15\x2f\x6d\x3a\x95\x0d\x32\x3f\xd6\x19\xbe\x3d\x5b\x0c\x6f"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad160_tc14) = {
+ .name = "192-GMAC 160-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xc1\x3e\xf7\x2b\x80\xac\xfd\x6a\xdc\x0b\xb8\xf0\x37"
+ "\x72\x23\xcb\xa7\x33\x93\x9b\x50\x58\xf3\x36"),
+ .iv = TEST_DATA_STR ("\xc4\x57\x6f\x76\xb3\x8c\x9c\x91\xbb\x08\xb8\x3f"),
+ .aad = TEST_DATA_STR ("\x3f\x89\x65\x17\x15\x6c\xde\x96\xb2\x39\x09\xf2\x98"
+ "\xa7\x6c\xde\x59\x04\x7a\xe0"),
+ .tag = TEST_DATA_STR (
+ "\xba\x43\xda\x6f\x40\xaa\x9c\x3a\x66\xdc\x37\x2e\x3b\x3f\x94\x0c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc0) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xa8\x3a\xc8\x25\x54\xd5\xc3\x47\x5d\x03\x51\x4a\x94"
+ "\x21\xc4\x09\xcc\xad\x9b\xc4\x4a\x5f\x83\x07"),
+ .iv = TEST_DATA_STR ("\x38\xaa\x26\xf7\xb6\x8d\xc6\x74\xca\xe9\x0b\x84"),
+ .aad = TEST_DATA_STR (
+ "\x03\x97\xa7\x14\x93\x9f\x55\xc1\x8d\xa0\x13\x27\x13\x48\xbd\x23\x1e\x14"
+ "\xd0\x7f\x39\x7c\xa0\xdb\x20\xd3\xa7\x7c\x42\xf3\xf4\x1c\x25\xc6\x4f\xd9"
+ "\x3b\xd3\xbd\x9f\xcd\x35\x5a\x0b\xde\x4f\x19\x61"),
+ .tag = TEST_DATA_STR (
+ "\xe4\x8b\x36\xdc\x68\x3f\x32\xdb\xae\x3b\x13\xc3\xad\xb1\xb7\x89"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc1) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x5f\x1a\x7f\x44\x39\xf8\xc7\xbc\x30\x1e\xa5\xb8\x95"
+ "\x5f\x3c\x67\x98\x26\x3b\xe4\x7d\xac\xe3\x9c"),
+ .iv = TEST_DATA_STR ("\x2d\xcc\x19\xde\x07\x65\x5f\x72\x43\xfa\xb0\x45"),
+ .aad = TEST_DATA_STR (
+ "\x7e\x76\x78\x36\xe5\xd1\x6d\xfc\x44\x26\x23\x7e\xfc\x91\x4f\xc4\x0b\xfe"
+ "\x59\x4b\x54\x94\x6e\xd5\xf2\x00\x20\x3c\x93\xce\x58\x5c\x4c\xb4\xa2\x4a"
+ "\x33\x64\xcc\xb9\x80\x3a\x64\xac\x4e\x38\xde\x2d"),
+ .tag = TEST_DATA_STR (
+ "\x9d\x34\xef\xdb\x37\x01\x49\x49\x13\xe3\x86\x36\x81\xa9\xb4\x02"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc2) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x8d\xce\x48\xb9\x16\x91\x63\x83\x5a\x0b\x6a\x4a\x62"
+ "\x7c\x29\x03\x0b\x5b\xef\x3f\xf3\xba\xe1\xca"),
+ .iv = TEST_DATA_STR ("\xa0\x0d\x44\x38\xe9\x6e\x7a\x22\xe5\x72\x65\xce"),
+ .aad = TEST_DATA_STR (
+ "\x7c\xc8\x27\x3e\x62\x59\x55\x83\xd4\x27\xbb\xf4\x59\x2c\xd2\xc2\x52\x5a"
+ "\x28\xbb\x9e\x14\x3a\x9c\x9a\xf0\x63\x41\x10\xf2\xb6\x9c\xcb\x4e\xc0\x0c"
+ "\xc2\xaf\xaa\x86\xc9\x86\xd3\xef\x2c\x44\x76\xa9"),
+ .tag = TEST_DATA_STR (
+ "\xe2\x76\xfe\xda\x74\x32\xfa\xa0\xe4\xab\xd4\x6d\x59\x2b\x8f\xee"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc3) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x0e\xce\x58\x69\x34\x92\x86\xef\x12\x2b\xb4\xf0\x5a"
+ "\x0c\x0d\xf5\xc7\xc6\xdf\x49\x60\x79\x20\xff"),
+ .iv = TEST_DATA_STR ("\x8d\xa2\x36\x71\xb7\x2e\xc6\xaa\xab\x27\x46\x3e"),
+ .aad = TEST_DATA_STR (
+ "\x95\x82\x4e\xa2\xb8\x0c\x4e\x97\x91\x1f\xff\xa3\x9e\x3f\x0c\x21\xfc\x81"
+ "\xed\xd2\x68\x8a\x5a\x1e\x58\x3b\xa3\x62\xb4\x7b\x97\x97\x31\xbb\x25\x6c"
+ "\xff\x1a\x47\x9f\x27\xa3\x12\x40\x89\x1e\x57\xe2"),
+ .tag = TEST_DATA_STR (
+ "\xfb\xd7\x57\xb8\x96\x3b\xbb\x32\x6c\xda\x80\xf3\xd5\x08\xf8\x9b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc4) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x0a\xbd\x4a\xac\x60\x34\x31\x26\x17\x38\x61\x53\x4e"
+ "\x8f\x46\xfc\x46\x0f\x8f\x3e\x21\x69\xf5\xc5"),
+ .iv = TEST_DATA_STR ("\x5e\x46\xfe\x13\xe6\xc2\x44\xe8\x34\x19\x8d\x3d"),
+ .aad = TEST_DATA_STR (
+ "\x62\x48\x64\xae\xa6\x0c\x08\xe9\xa1\x3a\x8a\x9c\x09\x44\x57\xc9\xda\x22"
+ "\x26\x24\x7a\x77\x1a\xae\xd5\x97\xc5\xe2\xcb\xc3\xd6\xe6\x17\x9d\xef\x86"
+ "\xc9\xd0\x4f\x1f\x6e\x8c\xe5\xb9\x9f\x78\x9e\x3b"),
+ .tag = TEST_DATA_STR (
+ "\x67\x74\x56\xc4\x84\xab\x6b\xb1\xc3\x22\xf1\x00\xff\x9f\x8c\x43"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc5) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x82\xea\xe3\xc1\xde\xac\x84\x84\xe9\x9e\x97\xe6\x97"
+ "\xc7\xa4\x2d\xb0\x26\xd7\x11\xc3\xdb\x60\x0e"),
+ .iv = TEST_DATA_STR ("\x8f\xa3\xf1\x6b\xb6\xce\xf8\x75\x2c\x8e\x31\xef"),
+ .aad = TEST_DATA_STR (
+ "\x61\xe8\xf8\x8a\xe8\xc0\x55\xf7\xd9\xe6\x7e\x0f\x1d\x49\x93\xa3\xe5\xf7"
+ "\x3f\x36\x62\xdc\x1c\xa8\x88\x66\x33\xab\x9b\x2a\x8c\x69\x28\xdb\x5b\x7a"
+ "\x30\xfd\xec\xaa\x29\xdb\xbe\x01\xfd\xb1\x20\xbb"),
+ .tag = TEST_DATA_STR (
+ "\x7d\xe2\x16\x8f\x5c\x43\x4c\x06\xb7\xc4\xaf\x15\x37\x27\x45\x22"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc6) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x04\x2d\x69\x65\x4b\x27\xa1\x38\x0a\xde\xcc\x9b\xa7"
+ "\x50\x90\xf1\xca\x42\x2b\x72\x5a\x47\x93\xe0"),
+ .iv = TEST_DATA_STR ("\x87\xdb\x23\x7e\x9b\xf6\xcc\xbd\x08\x69\xf0\xf9"),
+ .aad = TEST_DATA_STR (
+ "\x49\x6e\xff\x4c\x74\xac\x08\xbc\xcd\xec\xec\x7a\x49\x40\xdd\xbe\xb8\x0b"
+ "\xa1\xa5\x58\x24\x7e\xaa\x18\xa4\x66\x72\xd8\x74\xd7\xde\x6d\xd3\xa5\x77"
+ "\x9e\xbc\xd9\x84\xc2\x29\x91\x3d\x10\xf6\xf7\xcc"),
+ .tag = TEST_DATA_STR (
+ "\xba\x06\xea\xab\x5b\x16\x66\x20\xef\xc8\x07\x2f\xa3\xa5\xb4\xb8"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc7) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xc1\x79\x23\x25\x76\xee\xb3\x8c\x98\xf8\x47\x87\x3d"
+ "\x00\x4b\x96\x46\x65\xa3\x87\xa0\xa7\xf0\x14"),
+ .iv = TEST_DATA_STR ("\x85\xd2\x99\x6d\x00\x3e\xf9\xfd\xc4\xa5\x4c\xe9"),
+ .aad = TEST_DATA_STR (
+ "\x92\x68\x24\x53\x5c\x61\x3f\xde\x98\x69\xdf\x1a\xaf\x76\x4a\x54\xc1\x36"
+ "\x16\x67\x7f\x09\x92\x09\x14\x2d\xa4\xb6\x5d\x9a\x86\x64\xd1\x78\x53\xec"
+ "\x10\x2f\xfa\x1b\x16\x88\x80\x6d\xbe\x50\x3a\x33"),
+ .tag = TEST_DATA_STR (
+ "\xdc\x13\x50\x36\xf7\x4e\x62\x34\xc4\xe3\x27\xfb\xb0\xae\xb9\x25"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc8) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xda\x39\xc0\xef\xb1\x00\xfd\x9c\xf2\xd9\x01\x70\x5a"
+ "\xa6\x35\x90\x3c\xe5\x3b\x66\x9e\xbd\xb5\xca"),
+ .iv = TEST_DATA_STR ("\x0e\x95\xbd\xcb\x66\x98\x24\xdb\xd7\xff\xc8\x8f"),
+ .aad = TEST_DATA_STR (
+ "\x46\x42\x87\x5e\x8e\x20\xc1\x65\xb5\xb1\x7f\x12\xfd\xc6\x30\x99\x6b\x58"
+ "\xb8\x57\x1c\x5a\x15\x94\x4c\xe1\x94\x50\x8c\x87\x12\x3a\xd5\x00\x41\xf5"
+ "\x9a\xfe\x02\xea\xc3\xac\x1e\x6b\xa5\xed\x92\x8b"),
+ .tag = TEST_DATA_STR (
+ "\x59\xf9\x96\xe9\xa7\x23\x14\xfc\x76\x75\xe5\xa9\x13\xfe\x8e\x36"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc9) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x5a\x41\x3f\xa7\x5a\x13\xb0\x36\x53\x81\x82\xad\x51"
+ "\x50\x6f\xdd\x77\x33\xf1\xba\x39\x00\x04\x84"),
+ .iv = TEST_DATA_STR ("\xa6\xcd\xa5\xb0\x22\xec\xfc\x5a\x2b\x75\x90\x13"),
+ .aad = TEST_DATA_STR (
+ "\x1e\xed\x51\xef\xc1\xf5\xca\xe5\x76\x90\xe0\x32\x06\xb4\x5a\x7b\x5c\xb4"
+ "\x58\x56\xab\x36\x31\x32\x34\x94\x85\x01\xdd\x02\xea\x4f\x24\xae\x90\xb5"
+ "\xb2\x46\x28\x91\xe4\x93\x3a\x1b\xd0\x38\x74\x63"),
+ .tag = TEST_DATA_STR (
+ "\x57\x29\x61\xb6\xe8\x50\xad\xb4\x60\x16\x64\xe0\xeb\x3e\x07\x36"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc10) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x80\x08\xf9\xd2\x5a\x1a\x70\x5b\x5f\x00\x79\xd3\xe3"
+ "\x9c\x49\x87\x28\x65\x37\x10\x06\x61\xde\x6e"),
+ .iv = TEST_DATA_STR ("\xa9\xdd\x20\xd7\x51\x2c\xe5\xb3\x54\x83\xa0\x82"),
+ .aad = TEST_DATA_STR (
+ "\xfb\xd2\x16\x02\x37\x74\x2f\x4c\xa7\x2f\x0b\x7e\xd6\x16\xa8\x47\xaf\x65"
+ "\xed\xd2\x81\x67\x97\xb1\xc9\xc8\xb0\xb7\x37\x7b\x57\x59\x3c\x56\xc5\x80"
+ "\x63\xc9\x6a\x30\x69\x8c\x51\xbe\xb6\x78\x6e\x74"),
+ .tag = TEST_DATA_STR (
+ "\xb2\xe2\x25\x89\x00\xd7\xfd\x7a\xc4\xe9\x63\x92\x38\xd6\x63\x8a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc11) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xcc\x95\x61\xf6\xd3\x0d\x9a\x2f\x25\x75\x07\x52\xd3"
+ "\x9a\x1f\x0b\xc8\x00\xe0\xe7\x24\x42\x7e\x64"),
+ .iv = TEST_DATA_STR ("\x3c\x56\x51\x80\x3f\xee\x90\x98\xbd\x69\x04\xed"),
+ .aad = TEST_DATA_STR (
+ "\x0e\x28\x55\x40\x35\x82\x98\xa1\x87\xd4\xf6\x82\x3f\xf8\x6c\xea\xb1\x23"
+ "\x4d\xbc\xef\xc0\x9b\x23\x33\xe7\x45\xf2\x3b\xb6\x0e\x63\x65\xcd\x36\x3d"
+ "\x9e\x9b\x3d\xfa\x9f\xb9\x27\x0d\x6a\x9a\x52\xab"),
+ .tag = TEST_DATA_STR (
+ "\x01\x53\xf9\x5c\x4c\x0b\x4b\x47\x98\x9d\xa7\x1e\xe7\x2c\x34\xc6"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc12) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x61\xab\xa2\x81\xeb\x81\x20\x5d\xc6\xd9\xbb\x6b\x17"
+ "\x87\xb7\x42\x51\x63\x87\x24\x1c\x15\x3c\xc2"),
+ .iv = TEST_DATA_STR ("\x72\x4b\x42\x24\x31\x2a\x59\x6f\xf2\x30\x03\x93"),
+ .aad = TEST_DATA_STR (
+ "\xdd\x06\xc9\xe0\x6a\x6f\xd9\xd8\xfe\xa3\x56\x25\x5c\xbf\x90\x93\x86\xf7"
+ "\xac\x5e\x9b\x5e\xaa\x5c\x55\x28\x20\x54\x82\x7f\x74\xe9\xe7\x43\x46\xac"
+ "\xff\x57\x25\x09\x73\x53\xe8\x6b\xff\xeb\x6d\xc6"),
+ .tag = TEST_DATA_STR (
+ "\x26\x44\xe8\xe6\x52\xc2\x58\xab\x02\x8b\x86\xcd\x7e\xf5\x5f\x5c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc13) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x73\xa4\x90\x52\xef\x91\x18\xf0\x54\x88\x81\x0c\x20"
+ "\x80\xd7\x38\x99\x39\x38\x6c\x18\x6d\x92\xb3"),
+ .iv = TEST_DATA_STR ("\x38\xf0\x06\xb2\xe8\x5e\x7a\xa2\xf4\xc8\x81\x89"),
+ .aad = TEST_DATA_STR (
+ "\x21\xfa\x5a\xbb\x18\xb2\xfb\xcc\xe3\xa1\x9b\x2e\xac\x8b\xe7\xa3\x01\x92"
+ "\x3f\xa2\x58\x10\x52\x86\x13\x3e\xd5\xf4\x78\x34\x84\x2a\x63\x84\xc4\xfc"
+ "\x0a\x39\x86\xe1\xa2\x5b\xba\x83\x47\x9f\x68\x16"),
+ .tag = TEST_DATA_STR (
+ "\x9d\xbd\x74\x84\xc9\xaa\xed\x54\xdf\x7e\xd6\x4b\xbe\xd2\x0c\x68"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad384_tc14) = {
+ .name = "192-GMAC 384-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x1e\x11\x8d\x10\x94\x26\xb2\xab\x64\x46\xb0\x65\x99"
+ "\xa4\xc9\x71\xf6\x68\x3a\x34\x35\x68\xef\x97"),
+ .iv = TEST_DATA_STR ("\xcc\x87\x23\x42\x15\xc9\x74\xfd\x44\x68\x9e\x25"),
+ .aad = TEST_DATA_STR (
+ "\x48\x67\x4b\xf3\x86\x06\x46\x02\xd0\x0f\xd7\x2a\x17\x39\x20\xaf\x9b\x4c"
+ "\x4f\x9a\xfb\xf1\x9e\xa7\x63\xff\x44\xe4\x7e\xf8\x9a\x10\x65\x80\xc2\x89"
+ "\xc3\x98\xf9\x7f\xaa\x60\xba\xf4\x9d\xc1\xa2\xaf"),
+ .tag = TEST_DATA_STR (
+ "\x97\xf1\x3f\x94\x2a\xf7\xb7\x79\x7e\xa0\x9c\xea\xbd\xc7\xdc\x9c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc0) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xc2\x5d\x34\x7f\xfb\x5b\x7b\xa0\x79\xbe\x22\x79\xa0"
+ "\xa7\xf2\x20\xf1\x9c\x74\xbb\x9c\x5a\x15\xb6"),
+ .iv = TEST_DATA_STR ("\xb3\x5f\x14\x21\x82\xfe\xa6\x5c\x64\x23\x68\xed"),
+ .aad = TEST_DATA_STR (
+ "\x19\x67\xa0\xbd\x80\xcf\x2c\x9c\x58\xe4\x41\xe1\x2c\xba\x78\x8f\x9c\x07"
+ "\x21\x77\xe1\xce\x02\xf3\x0d\x58\xae\x98\x1a\xb3\x7e\xac\x45\x2c\x0d\x9f"
+ "\x1c\x5f\x34\x85\xd7\xb1\x6a\xe0\x93\x66\x82\x1d\x23\xd4\x44\x79\xd5\x2c"
+ "\xcc\x4a\xcd\x8f\xa6\xf5\xb9\x01\x38\x45\xc6\x29\xf6\x9c\x61\x2c\x9c\xbb"
+ "\xcd\xca\x3b\xdf\x43\x85\x5f\xa7\xc7\x1b\xff\x45\x8a\x7d\x4c\x01\x9a"
+ "\xd9"),
+ .tag = TEST_DATA_STR (
+ "\xf5\xa0\xd6\x49\x24\xae\xab\x15\xa6\x36\xc7\xce\x4d\xb5\x22\x43"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc1) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xcb\x00\x0f\xdd\xd6\x7b\xf5\xa2\x4b\x03\xc8\xb0\x89"
+ "\x65\xfc\x56\x89\x62\xd7\xb2\xa0\xb4\xe6\x8e"),
+ .iv = TEST_DATA_STR ("\xac\xad\xc8\xf8\x22\xb8\x37\xb8\xfc\xd5\xac\x53"),
+ .aad = TEST_DATA_STR (
+ "\xce\x0e\x3e\x4e\x6f\xfe\xae\x66\xc5\x35\x66\x7e\x8a\x8c\xf1\x2f\xca\x0e"
+ "\x9d\xae\x69\x87\x83\x5e\x8e\xc6\x2f\xb9\x5b\x38\xf3\x1e\xc5\xe9\x37\xbd"
+ "\xfe\xd5\xb5\x51\x74\x83\x4b\x03\x8b\xa3\x32\x2b\x4a\x25\x65\xac\x41\x3b"
+ "\x6e\x20\x4f\x88\xc3\xa9\x32\x16\xb8\x81\x06\x49\x4e\xaa\x14\xa8\x20\x68"
+ "\xf0\x0a\x3b\xf2\x27\xb6\x27\x07\x53\x83\x68\x2b\xd6\xbe\xd6\x23\x1e"
+ "\xaf"),
+ .tag = TEST_DATA_STR (
+ "\x2c\x1c\xdf\xc8\xaf\xb7\x56\x9b\x87\x7b\xa5\xae\x13\xd6\x23\x5b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc2) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x94\x9f\x77\x6b\x66\xb5\x28\x34\xde\x80\xe2\x4d\xa2"
+ "\xc6\x68\x3c\x00\x94\x74\x3c\x6b\x4b\x57\xd1"),
+ .iv = TEST_DATA_STR ("\x75\x57\xf7\xb9\xa8\x55\x4e\x79\xf8\x69\x52\x9b"),
+ .aad = TEST_DATA_STR (
+ "\xe3\x6d\xb9\xd5\x1d\xed\xe1\x0f\x17\xe4\xba\x3a\xa2\x0e\xee\x49\xc2\x06"
+ "\x24\x4f\x89\xf6\x7f\xfa\x7d\x49\x94\x58\x93\xa0\x5f\xb6\xb5\x94\x8c\x53"
+ "\x61\xdc\x84\xb3\x3a\x4c\x35\x76\x8c\xb6\x54\x74\x08\xba\x61\x7e\xdb\xa4"
+ "\x17\x82\xa6\x5e\x4f\xca\x1a\x02\x79\x68\xf4\x4c\x43\x3f\x84\x53\xdb\xef"
+ "\xb3\x5a\xa4\xc2\x1b\x6c\x52\x0b\x10\x2a\xe4\xfd\xf2\x07\x9f\x81\xdd"
+ "\x0c"),
+ .tag = TEST_DATA_STR (
+ "\x88\xc0\x61\x2c\x1d\xde\xf9\x14\xb3\x43\x95\x05\x3f\x7f\x63\x2e"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc3) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x25\x87\x2c\x71\xd8\x70\x0c\x6e\x0a\x74\xf4\x4e\x95"
+ "\x46\x8b\x12\xf2\xdc\xeb\x94\xc2\x57\x57\x5d"),
+ .iv = TEST_DATA_STR ("\x20\xa8\x7b\xaf\xff\x89\x83\xae\x72\x5a\x6f\xf1"),
+ .aad = TEST_DATA_STR (
+ "\xc0\x9c\x11\x84\xd0\xfb\xe3\xaf\x22\x20\x2a\x59\xdf\xef\xd6\x6f\xcd\xa2"
+ "\x29\x3c\x90\x62\x6f\x14\x93\xd6\xfd\x79\xed\x5b\x5d\x01\xbf\x8a\xc9\x09"
+ "\x5f\x44\xa3\x1f\x9d\xb4\xa2\x6f\x79\x75\x4d\x75\xec\xf4\xfe\x02\x5f\x2c"
+ "\x1a\xdf\x3c\xe5\xf3\xae\x76\x72\x1d\xaf\x3d\xcc\x9d\xd8\x99\xe3\xf9\x6c"
+ "\x82\x73\xb2\x9b\xc1\x8f\xc3\x8a\xae\x1a\xaa\x12\x4d\xb3\x71\xaa\x47"
+ "\xfd"),
+ .tag = TEST_DATA_STR (
+ "\xb1\x66\x3e\xb5\xb6\x98\xae\x8a\x7a\x18\xa6\xee\x74\x81\xb9\x8b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc4) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x18\x8c\xa6\x91\x49\x83\xd3\xc1\xe5\x6c\x05\x9d\x0d"
+ "\x70\x1d\x57\x3a\x61\xdf\x2d\xea\xee\xb1\xa6"),
+ .iv = TEST_DATA_STR ("\x6c\x2f\xed\xb5\xf7\xf9\xf1\x15\x3a\xc3\x6c\xd8"),
+ .aad = TEST_DATA_STR (
+ "\x1d\xb4\xb3\x12\x70\x44\x94\x98\xba\x03\x97\x31\xb7\x33\x07\x68\xd1\x4c"
+ "\x27\xe3\x73\xb7\xde\xbd\xb9\x8f\x2a\x41\xb6\xae\xc3\xb2\x98\xa0\x3e\xa5"
+ "\xde\x8f\xed\x8f\xf2\x17\x96\x75\xea\x08\xe3\xc9\x81\x2c\x3f\x4f\x63\x76"
+ "\x5f\x40\x39\x53\x4c\x5c\xcf\x98\xfd\xc3\xe7\x0c\xb1\x30\x9a\xd4\x16\x1e"
+ "\x37\xe7\x14\xe6\x97\x28\x72\xfa\x65\x83\x72\x83\x25\xac\x52\x0d\x56"
+ "\x69"),
+ .tag = TEST_DATA_STR (
+ "\x29\xc5\x6f\x77\xd8\x26\x0c\xa2\x94\x83\x37\xb2\x1c\x0c\x37\xa2"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc5) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xce\xcc\xe8\xae\x97\x77\x18\x7e\x5a\x87\xec\xb2\xd7"
+ "\x35\xf7\x82\xf9\xf7\xaa\xb8\xb8\x7b\x13\x7d"),
+ .iv = TEST_DATA_STR ("\x7d\x56\x4d\xb2\x02\xd0\xfa\xb3\x8d\xed\x36\xdd"),
+ .aad = TEST_DATA_STR (
+ "\xf6\xfb\xd1\xb5\x75\x5d\x70\x91\x54\x31\x2e\x11\x0f\xd4\x60\x85\xa4\xb6"
+ "\xf6\x17\xc1\x27\xfe\xa7\x76\x36\xbf\xb8\xa5\x8a\x6a\x6d\x90\x30\xb2\xa6"
+ "\xc4\xe7\x0d\x7a\x3a\x89\x4a\x75\x96\x7f\x65\x02\xe0\xc8\x16\xfb\x30\x69"
+ "\xf2\xed\x94\xc8\x88\xd3\x07\x4c\x1c\x63\xc5\x95\x12\xbe\x45\x3e\x57\x5c"
+ "\xec\x11\x5c\x49\xeb\x4d\xba\x44\xd2\xf7\xc7\x8b\x33\x55\xb1\xe6\x77"
+ "\x87"),
+ .tag = TEST_DATA_STR (
+ "\xb2\x7c\x0b\xe6\x89\x85\x66\x26\xe5\x5e\x03\x77\xa0\x83\x34\x13"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc6) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x94\x47\x03\x91\xde\xf6\x95\xfe\x5d\xe5\xb8\x23\x3a"
+ "\x20\xfe\x52\x11\xbb\x1d\xbb\xb2\x73\x31\x3f"),
+ .iv = TEST_DATA_STR ("\x57\xce\x3a\x88\xf6\xd2\x72\x15\xc9\x43\x7c\x30"),
+ .aad = TEST_DATA_STR (
+ "\x51\x05\x96\x5c\xed\xe3\x1c\x1e\x2f\xbb\x1f\x5f\xb6\x41\xaa\x45\x65\xf8"
+ "\x15\xbf\x18\x1a\x42\x9c\xdc\x35\x3b\xcf\x41\x7a\x0e\x57\xb9\x57\x49\xb4"
+ "\x88\x6a\x80\x19\x01\x37\xf7\x7b\x99\xff\xe2\x80\x88\xa8\xa7\xf9\xf1\x2f"
+ "\xf4\xc6\x16\x53\xdf\x30\x57\x2b\xde\xed\x92\xf2\xfa\xc5\xc4\x93\xce\x6f"
+ "\xad\x20\xc0\xee\xd6\x6f\x95\x02\x6c\x76\x33\x48\x89\x20\xb9\x02\x32"
+ "\xa0"),
+ .tag = TEST_DATA_STR (
+ "\x50\x31\x79\x2c\xa7\x0d\xc4\x9e\xeb\xd8\xea\xd3\x76\xe6\xe3\x33"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc7) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x22\x0a\x24\x93\x89\x79\x3c\x97\xfb\xe2\x8b\xa6\xaf"
+ "\xeb\xf1\x2a\xc0\xde\x55\xed\x71\xaf\xfa\x68"),
+ .iv = TEST_DATA_STR ("\xb5\xa5\x71\x95\x1a\x37\x30\x30\xfc\xf0\xeb\x4d"),
+ .aad = TEST_DATA_STR (
+ "\xa2\x75\x20\x58\xa8\x46\x9b\x60\xd6\x99\x7a\x31\x5e\x5c\x88\x25\xec\xb2"
+ "\xf6\xfd\x1f\x60\x8d\x1a\xe5\xb5\xa4\xf5\xb4\xb9\x28\x62\xb8\x4d\x6b\x3e"
+ "\x74\x4e\x92\x3b\x02\x44\xb7\xb0\xfd\x6d\x6f\x36\xa8\xc1\x73\xd4\x6a\xd2"
+ "\x01\xdd\x8d\x8a\x55\xc0\x8d\x95\x49\x30\x26\x69\xb9\xd3\x3f\x46\x61\x80"
+ "\xf0\x58\x1e\xb3\x00\xbb\x8a\xb8\xb0\x61\x11\x32\x34\xd9\x68\xce\xcc"
+ "\xce"),
+ .tag = TEST_DATA_STR (
+ "\xb2\xcf\x3f\xa8\xca\x8d\x3e\xea\xaa\x3f\x82\x41\x10\x64\xc9\x87"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc8) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xe5\x4c\x36\xdb\xb4\x67\xfe\xb4\x30\xf4\x08\x7f\xe4"
+ "\xcf\x12\xba\xfc\x94\xa1\x78\x00\x68\x38\xe8"),
+ .iv = TEST_DATA_STR ("\x31\x9e\x97\x14\xb4\x92\x5c\xb1\x61\xbc\xfd\x91"),
+ .aad = TEST_DATA_STR (
+ "\xfc\x50\x39\x7c\xc9\x92\xfd\xe3\xd4\x44\xd2\xfd\xf3\x87\x77\xf2\x9a\xb6"
+ "\x04\x99\x63\xea\x08\xc4\xe2\xf0\x0c\x15\x98\xb8\xc0\xbe\xa7\xe9\x4f\x59"
+ "\x1b\xb8\x3e\xb5\x35\x1f\xfa\x4b\xff\xef\x3e\x3e\xc3\x57\xfe\x47\xb1\x7d"
+ "\xb7\xee\xc0\x4a\xd4\x66\x9b\x92\x13\x02\xe5\xc4\x1a\xc6\x9f\xe4\x45\x83"
+ "\x8f\xcf\xd5\xb8\xd5\x1e\x89\xb3\xef\xdf\x2e\x7a\xf4\xf0\x57\x6d\xfc"
+ "\x69"),
+ .tag = TEST_DATA_STR (
+ "\x45\x35\x3a\x04\x31\x39\x23\x75\x54\xb5\x11\x7d\x0b\x8d\x52\xa7"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc9) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xaa\xd8\x0b\x3b\xb6\xe2\x2d\x9d\x18\xf1\x07\x8f\x54"
+ "\x29\x73\xaa\x8f\xff\x28\xab\xfa\x2e\xd6\x37"),
+ .iv = TEST_DATA_STR ("\x6b\x53\x35\x92\x9a\x6f\xc7\xd3\x4c\x3e\x72\x8f"),
+ .aad = TEST_DATA_STR (
+ "\x31\x4a\x33\x07\xa6\x41\x8a\xd2\x29\xaf\x5b\x03\x25\xd2\xbd\x41\x98\xfe"
+ "\x82\xd8\xc5\xa8\x96\x02\xe9\x26\x84\x8c\x09\x6f\xd0\x1e\xa3\x94\x84\xdf"
+ "\x6e\x4a\xae\xd1\x8f\x2e\x2b\x07\x0c\xa3\x6e\xe5\xed\x66\xcd\xa3\xc0\x4a"
+ "\xb6\xeb\x41\xb3\x27\x52\x49\x4b\xa3\x56\xef\x13\x27\xd8\xfd\x6a\x83\x52"
+ "\xa6\x21\xe1\xbb\x0b\x20\x66\x3f\xc7\x04\x89\x9a\x85\x5d\x32\x77\x77"
+ "\x0c"),
+ .tag = TEST_DATA_STR (
+ "\x8f\xda\x0e\x49\x52\xbe\xef\x47\xbe\xa6\xf4\x8d\x9b\xdb\x3e\x79"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc10) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x69\xd3\xd0\x0e\x9a\xef\xe5\xb3\xa9\xaf\x64\x83\x8b"
+ "\x40\x45\x79\xd4\x59\x2f\x9c\xfe\xe8\x64\x57"),
+ .iv = TEST_DATA_STR ("\x05\x4c\xc3\x74\x8c\xd8\x44\x24\x10\x50\x3a\xd5"),
+ .aad = TEST_DATA_STR (
+ "\x10\x31\xcc\x7d\x96\x77\xc5\xf9\x57\x45\xc3\xdc\xc2\x6d\x62\x52\x76\x32"
+ "\x35\x56\x7d\x56\xc6\x13\x86\x7b\xce\x17\xec\x09\x9d\xef\x27\x8a\x64\x37"
+ "\xd1\xb7\x02\x64\x2b\xea\x5c\xfd\xed\x9a\xf6\xd0\xc5\xe0\x20\xf7\x04\x92"
+ "\xad\x7f\x04\xa1\xb4\xba\xd3\x95\x3b\x96\x13\x57\x4c\x2a\x18\xce\x5f\x14"
+ "\xd4\x36\x68\x79\xd1\x1e\x0b\x0a\x58\xfe\x09\x2f\x3c\xf0\xe0\x1a\xc0"
+ "\x3d"),
+ .tag = TEST_DATA_STR (
+ "\x1b\xff\xd2\x07\x47\xb2\x5e\x87\x25\x18\x44\x68\x28\x81\xf5\x3e"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc11) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x03\x5e\x18\x64\xfc\xaa\x90\x78\xd1\xc8\x30\x99\xb2"
+ "\x3f\xd7\x17\x8c\x94\x6a\x58\x69\xc3\x15\x77"),
+ .iv = TEST_DATA_STR ("\xe2\xc4\x07\xa6\xaa\xd6\xd9\x04\x0e\x5b\x67\x49"),
+ .aad = TEST_DATA_STR (
+ "\x2f\xc4\x1f\x0f\xd5\xe3\xec\xef\x75\xa1\xf1\xa0\xf0\x33\x51\x5e\x6f\x96"
+ "\x19\xb8\x7a\x8c\xa1\x68\x7b\xb2\xd6\x37\x52\xcc\x3d\x47\x36\x77\xdb\x30"
+ "\x0e\x76\x97\x8c\xd3\x42\xc5\x1f\x57\x6b\x15\x98\x56\x75\x02\xaf\x0e\xd1"
+ "\xca\x85\xc5\xde\x2d\x84\xc2\xa3\x21\x19\x61\x53\x8d\xf5\x15\x25\x0a\x69"
+ "\xe8\xd6\x7e\xa2\xe8\x77\xd8\xf5\x2e\x69\x7f\xc9\x0b\xad\x33\x0b\x97"
+ "\xe4"),
+ .tag = TEST_DATA_STR (
+ "\x3c\x90\x68\x75\x7b\xda\x60\x22\xea\xb5\xb1\x98\x75\x0b\xad\xc4"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc12) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x47\xcf\x37\x7a\x1d\xb1\x10\x6f\x8d\xd4\x54\x88\x4f"
+ "\x71\xef\x93\xf4\xa6\x45\xe0\xe3\xc9\xd4\x30"),
+ .iv = TEST_DATA_STR ("\xf4\x39\x46\xec\x30\x3f\x1e\xfc\x19\xdc\x21\xc2"),
+ .aad = TEST_DATA_STR (
+ "\x2e\xab\xfa\xdb\x99\x7d\x15\x4b\xea\x95\xd4\x5f\x7c\x4d\x5c\x5f\x18\x2b"
+ "\x1e\xd9\x89\x7a\xb0\x12\x41\xf6\x15\xf0\x4b\x8a\x16\xf7\xa9\x65\x2b\x34"
+ "\xa0\xee\x70\x52\xff\x5a\x20\x9a\xd4\xd2\x4a\x2b\xfc\x5e\x5e\xbc\x42\x4f"
+ "\x6d\xbb\xf0\x33\xf0\x59\x51\x24\x7a\xb3\x73\xcb\x9c\xce\x73\x5d\x7f\xb1"
+ "\x80\xa4\xf6\x2a\xd5\xa4\x12\x1e\xb7\xaa\x47\x26\x9f\x95\x41\xbd\xd9"
+ "\x5a"),
+ .tag = TEST_DATA_STR (
+ "\xfe\xed\xe5\x21\x2f\x35\xea\xa8\xfa\xa9\xe2\xe6\xbb\x7b\x1e\x18"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc13) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\x64\x98\xf9\x61\x00\xe7\xb0\xb6\xed\xd7\x2b\x61\xf8"
+ "\x64\xd3\x38\x23\xbc\xbd\x0b\x58\x51\xc5\x2a"),
+ .iv = TEST_DATA_STR ("\x81\xf0\x05\xdf\x39\x2a\xc0\x25\x0a\xe0\x7a\x69"),
+ .aad = TEST_DATA_STR (
+ "\xd1\x83\x82\x41\x68\x23\x15\xdc\x27\x3a\xe8\xc2\xd5\x9d\x71\x27\x17\x48"
+ "\xbf\x1e\xf0\x38\x5d\xe4\x05\xfc\x5c\x2f\xe5\xca\xcf\x57\xc8\xd5\x1d\x72"
+ "\xdf\x09\x6d\x2c\x3e\x46\x63\xf1\xc5\x9b\xd4\xda\x3c\xfe\xe9\x4e\x53\xab"
+ "\xa8\x7e\x49\x3a\xad\x38\x6b\xb3\x28\x3d\xd3\x37\xa0\xba\x57\xb8\x4f\x2d"
+ "\x35\xa8\xb6\xbf\xb2\x07\x7d\x22\xb8\x23\x98\xff\x6c\x34\x31\xec\xc4"
+ "\xf6"),
+ .tag = TEST_DATA_STR (
+ "\xe1\x49\xfb\xaa\x73\xf0\x50\x9d\x34\xbd\xdf\x03\x1c\x4c\xc4\x76"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac192_aad720_tc14) = {
+ .name = "192-GMAC 720-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_192_NULL_GMAC,
+ .key = TEST_DATA_STR ("\xba\xff\x99\xa6\xdd\x4d\x29\x81\x04\x3a\x48\xb5\x2f"
+ "\x36\xba\x5d\xbb\x73\x80\xca\xa7\x5b\xc6\x5d"),
+ .iv = TEST_DATA_STR ("\x98\x38\xd9\xf9\xb8\x63\x2c\xbd\x48\xa2\xba\x35"),
+ .aad = TEST_DATA_STR (
+ "\xe7\x81\xf8\xf1\xf5\xbf\xad\x3a\x50\xc4\x7e\x36\x33\x5e\x7a\x22\x5d\xbf"
+ "\x32\xbc\x15\x96\x7d\x66\xdd\x30\x06\xdd\x42\x4b\xa9\x71\xd8\xf1\xa9\xca"
+ "\x90\x61\x94\x50\xbd\xa4\x56\x29\x39\x01\x5f\x75\xb4\x67\xd6\x33\xbb\x57"
+ "\x43\xbb\xf3\x7c\x9a\x2b\x24\x15\xd7\x30\x65\xfa\xd7\x1d\xa3\x31\x2d\x81"
+ "\x7b\xa2\xe6\x24\xc6\x88\x63\xf7\x22\x78\x05\x2a\x4d\xb0\xe7\x3d\xbf"
+ "\x10"),
+ .tag = TEST_DATA_STR (
+ "\xf8\xed\xe3\x60\x48\x26\x1d\x8a\x3b\xf7\x8b\x19\x33\xf3\x3b\x22"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc0) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xb5\x2c\x50\x5a\x37\xd7\x8e\xda\x5d\xd3\x4f\x20\xc2\x25\x40\xea\x1b\x58"
+ "\x96\x3c\xf8\xe5\xbf\x8f\xfa\x85\xf9\xf2\x49\x25\x05\xb4"),
+ .iv = TEST_DATA_STR ("\x51\x6c\x33\x92\x9d\xf5\xa3\x28\x4f\xf4\x63\xd7"),
+ .tag = TEST_DATA_STR (
+ "\xbd\xc1\xac\x88\x4d\x33\x24\x57\xa1\xd2\x66\x4f\x16\x8c\x76\xf0"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc1) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x5f\xe0\x86\x1c\xdc\x26\x90\xce\x69\xb3\x65\x8c\x7f\x26\xf8\x45\x8e\xec"
+ "\x1c\x92\x43\xc5\xba\x08\x45\x30\x5d\x89\x7e\x96\xca\x0f"),
+ .iv = TEST_DATA_STR ("\x77\x0a\xc1\xa5\xa3\xd4\x76\xd5\xd9\x69\x44\xa1"),
+ .tag = TEST_DATA_STR (
+ "\x19\x6d\x69\x1e\x10\x47\x09\x3c\xa4\xb3\xd2\xef\x4b\xab\xa2\x16"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc2) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x76\x20\xb7\x9b\x17\xb2\x1b\x06\xd9\x70\x19\xaa\x70\xe1\xca\x10\x5e\x1c"
+ "\x03\xd2\xa0\xcf\x8b\x20\xb5\xa0\xce\x5c\x39\x03\xe5\x48"),
+ .iv = TEST_DATA_STR ("\x60\xf5\x6e\xb7\xa4\xb3\x8d\x4f\x03\x39\x55\x11"),
+ .tag = TEST_DATA_STR (
+ "\xf5\x70\xc3\x82\x02\xd9\x45\x64\xba\xb3\x9f\x75\x61\x7b\xc8\x7a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc3) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x7e\x2d\xb0\x03\x21\x18\x94\x76\xd1\x44\xc5\xf2\x7e\x78\x70\x87\x30\x2a"
+ "\x48\xb5\xf7\x78\x6c\xd9\x1e\x93\x64\x16\x28\xc2\x32\x8b"),
+ .iv = TEST_DATA_STR ("\xea\x9d\x52\x5b\xf0\x1d\xe7\xb2\x23\x4b\x60\x6a"),
+ .tag = TEST_DATA_STR (
+ "\xdb\x9d\xf5\xf1\x4f\x6c\x9f\x2a\xe8\x1f\xd4\x21\x41\x2d\xdb\xbb"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc4) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xa2\x3d\xfb\x84\xb5\x97\x6b\x46\xb1\x83\x0d\x93\xbc\xf6\x19\x41\xca\xe5"
+ "\xe4\x09\xe4\xf5\x55\x1d\xc6\x84\xbd\xce\xf9\x87\x64\x80"),
+ .iv = TEST_DATA_STR ("\x5a\xa3\x45\x90\x80\x48\xde\x10\xa2\xbd\x3d\x32"),
+ .tag = TEST_DATA_STR (
+ "\xf2\x82\x17\x64\x92\x30\xbd\x7a\x40\xa9\xa4\xdd\xab\xc6\x7c\x43"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc5) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xdf\xe9\x28\xf8\x64\x30\xb7\x8a\xdd\x7b\xb7\x69\x60\x23\xe6\x15\x3d\x76"
+ "\x97\x7e\x56\x10\x3b\x18\x02\x53\x49\x0a\xff\xb9\x43\x1c"),
+ .iv = TEST_DATA_STR ("\x1d\xd0\x78\x5a\xf9\xf5\x89\x79\xa1\x0b\xd6\x2d"),
+ .tag = TEST_DATA_STR (
+ "\xa5\x5e\xb0\x9e\x9e\xde\xf5\x8d\x9f\x67\x1d\x72\x20\x7f\x8b\x3c"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc6) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x34\x04\x8d\xb8\x15\x91\xee\x68\x22\x49\x56\xbd\x69\x89\xe1\x63\x0f\xcf"
+ "\x06\x8d\x7f\xf7\x26\xae\x81\xe5\xb2\x9f\x54\x8c\xfc\xfb"),
+ .iv = TEST_DATA_STR ("\x16\x21\xd3\x4c\xff\x2a\x5b\x25\x0c\x7b\x76\xfc"),
+ .tag = TEST_DATA_STR (
+ "\x49\x92\xec\x3d\x57\xcc\xcf\xa5\x8f\xd8\x91\x6c\x59\xb7\x0b\x11"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc7) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xa1\x11\x4f\x87\x49\xc7\x2b\x8c\xef\x62\xe7\x50\x3f\x1a\xd9\x21\xd3\x3e"
+ "\xee\xde\x32\xb0\xb5\xb8\xe0\xd6\x80\x7a\xa2\x33\xd0\xad"),
+ .iv = TEST_DATA_STR ("\xa1\x90\xed\x3f\xf2\xe2\x38\xbe\x56\xf9\x0b\xd6"),
+ .tag = TEST_DATA_STR (
+ "\xc8\x46\x4d\x95\xd5\x40\xfb\x19\x11\x56\xfb\xbc\x16\x08\x84\x2a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc8) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xdd\xbb\x99\xdc\x31\x02\xd3\x11\x02\xc0\xe1\x4b\x23\x85\x18\x60\x57\x66"
+ "\xc5\xb2\x3d\x9b\xea\x52\xc7\xc5\xa7\x71\x04\x2c\x85\xa0"),
+ .iv = TEST_DATA_STR ("\x95\xd1\x5e\xd7\x5c\x6a\x10\x9a\xac\x1b\x1d\x86"),
+ .tag = TEST_DATA_STR (
+ "\x81\x3d\x1d\xa3\x77\x5c\xac\xd7\x8e\x96\xd8\x6f\x03\x6c\xff\x96"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc9) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x1f\xaa\x50\x6b\x8f\x13\xa2\xe6\x66\x0a\xf7\x8d\x92\x91\x5a\xdf\x33\x36"
+ "\x58\xf7\x48\xf4\xe4\x8f\xa2\x01\x35\xa2\x9e\x9a\xbe\x5f"),
+ .iv = TEST_DATA_STR ("\xe5\x0f\x27\x8d\x36\x62\xc9\x9d\x75\x0f\x60\xd3"),
+ .tag = TEST_DATA_STR (
+ "\xae\xc7\xec\xe6\x6b\x73\x44\xaf\xd6\xf6\xcc\x74\x19\xcf\x60\x27"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc10) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xf3\x0b\x59\x42\xfa\xf5\x7d\x4c\x13\xe7\xa8\x24\x95\xae\xdf\x1b\x4e\x60"
+ "\x35\x39\xb2\xe1\x59\x93\x17\xcc\x6e\x53\x22\x5a\x24\x93"),
+ .iv = TEST_DATA_STR ("\x33\x6c\x38\x8e\x18\xe6\xab\xf9\x2b\xb7\x39\xa9"),
+ .tag = TEST_DATA_STR (
+ "\xdd\xaf\x8e\xf4\xcb\x2f\x8a\x6d\x40\x1f\x3b\xe5\xff\x0b\xaf\x6a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc11) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xda\xf4\xd9\xc1\x2c\x5d\x29\xfc\x3f\xa9\x36\x53\x2c\x96\x19\x6e\x56\xae"
+ "\x84\x2e\x47\x06\x3a\x4b\x29\xbf\xff\x2a\x35\xed\x92\x80"),
+ .iv = TEST_DATA_STR ("\x53\x81\xf2\x11\x97\xe0\x93\xb9\x6c\xda\xc4\xfa"),
+ .tag = TEST_DATA_STR (
+ "\x7f\x18\x32\xc7\xf7\xcd\x78\x12\xa0\x04\xb7\x9c\x3d\x39\x94\x73"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc12) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x6b\x52\x47\x54\x14\x9c\x81\x40\x1d\x29\xa4\xb8\xa6\xf4\xa4\x78\x33\x37"
+ "\x28\x06\xb2\xd4\x08\x3f\xf1\x7f\x2d\xb3\xbf\xc1\x7b\xca"),
+ .iv = TEST_DATA_STR ("\xac\x7d\x3d\x61\x8a\xb6\x90\x55\x5e\xc2\x44\x08"),
+ .tag = TEST_DATA_STR (
+ "\xdb\x07\xa8\x85\xe2\xbd\x39\xda\x74\x11\x6d\x06\xc3\x16\xa5\xc9"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc13) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xcf\xf0\x83\x30\x3f\xf4\x0a\x1f\x66\xc4\xae\xd1\xac\x7f\x50\x62\x8f\xe7"
+ "\xe9\x31\x1f\x5d\x03\x7e\xbf\x49\xf4\xa4\xb9\xf0\x22\x3f"),
+ .iv = TEST_DATA_STR ("\x45\xd4\x6e\x1b\xaa\xdc\xfb\xc8\xf0\xe9\x22\xff"),
+ .tag = TEST_DATA_STR (
+ "\x16\x87\xc6\xd4\x59\xea\x48\x1b\xf8\x8e\x4b\x22\x63\x22\x79\x06"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad0_tc14) = {
+ .name = "256-GMAC 0-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x39\x54\xf6\x0c\xdd\xbb\x39\xd2\xd8\xb0\x58\xad\xf5\x45\xd5\xb8\x24\x90"
+ "\xc8\xae\x92\x83\xaf\xa5\x27\x86\x89\x04\x1d\x41\x5a\x3a"),
+ .iv = TEST_DATA_STR ("\x8f\xb3\xd9\x8e\xf2\x4f\xba\x03\x74\x6a\xc8\x4f"),
+ .tag = TEST_DATA_STR (
+ "\x7f\xb1\x30\x85\x5d\xfe\x7a\x37\x33\x13\x36\x1f\x33\xf5\x52\x37"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc0) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x78\xdc\x4e\x0a\xaf\x52\xd9\x35\xc3\xc0\x1e\xea\x57\x42\x8f\x00\xca\x1f"
+ "\xd4\x75\xf5\xda\x86\xa4\x9c\x8d\xd7\x3d\x68\xc8\xe2\x23"),
+ .iv = TEST_DATA_STR ("\xd7\x9c\xf2\x2d\x50\x4c\xc7\x93\xc3\xfb\x6c\x8a"),
+ .aad = TEST_DATA_STR (
+ "\xb9\x6b\xaa\x8c\x1c\x75\xa6\x71\xbf\xb2\xd0\x8d\x06\xbe\x5f\x36"),
+ .tag = TEST_DATA_STR (
+ "\x3e\x5d\x48\x6a\xa2\xe3\x0b\x22\xe0\x40\xb8\x57\x23\xa0\x6e\x76"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc1) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x44\x57\xff\x33\x68\x3c\xca\x6c\xa4\x93\x87\x8b\xdc\x00\x37\x38\x93\xa9"
+ "\x76\x34\x12\xee\xf8\xcd\xdb\x54\xf9\x13\x18\xe0\xda\x88"),
+ .iv = TEST_DATA_STR ("\x69\x9d\x1f\x29\xd7\xb8\xc5\x53\x00\xbb\x1f\xd2"),
+ .aad = TEST_DATA_STR (
+ "\x67\x49\xda\xee\xa3\x67\xd0\xe9\x80\x9e\x2d\xc2\xf3\x09\xe6\xe3"),
+ .tag = TEST_DATA_STR (
+ "\xd6\x0c\x74\xd2\x51\x7f\xde\x4a\x74\xe0\xcd\x47\x09\xed\x43\xa9"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc2) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x4d\x01\xc9\x6e\xf9\xd9\x8d\x4f\xb4\xe9\xb6\x1b\xe5\xef\xa7\x72\xc9\x78"
+ "\x85\x45\xb3\xea\xc3\x9e\xb1\xca\xcb\x99\x7a\x5f\x07\x92"),
+ .iv = TEST_DATA_STR ("\x32\x12\x4a\x4d\x9e\x57\x6a\xea\x25\x89\xf2\x38"),
+ .aad = TEST_DATA_STR (
+ "\xd7\x2b\xad\x0c\x38\x49\x5e\xda\x50\xd5\x58\x11\x94\x5e\xe2\x05"),
+ .tag = TEST_DATA_STR (
+ "\x6d\x63\x97\xc9\xe2\x03\x0f\x5b\x80\x53\xbf\xe5\x10\xf3\xf2\xcf"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc3) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x83\x78\x19\x3a\x4c\xe6\x41\x80\x81\x4b\xd6\x05\x91\xd1\x05\x4a\x04\xdb"
+ "\xc4\xda\x02\xaf\xde\x45\x37\x99\xcd\x68\x88\xee\x0c\x6c"),
+ .iv = TEST_DATA_STR ("\xbd\x8b\x4e\x35\x2c\x7f\x69\x87\x8a\x47\x54\x35"),
+ .aad = TEST_DATA_STR (
+ "\x1c\x6b\x34\x3c\x4d\x04\x5c\xbb\xa5\x62\xba\xe3\xe5\xff\x1b\x18"),
+ .tag = TEST_DATA_STR (
+ "\x08\x33\x96\x7a\x6a\x53\xba\x24\xe7\x5c\x03\x72\xa6\xa1\x7b\xda"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc4) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x22\xfc\x82\xdb\x5b\x60\x69\x98\xad\x45\x09\x9b\x79\x78\xb5\xb4\xf9\xdd"
+ "\x4e\xa6\x01\x7e\x57\x37\x0a\xc5\x61\x41\xca\xaa\xbd\x12"),
+ .iv = TEST_DATA_STR ("\x88\x0d\x05\xc5\xee\x59\x9e\x5f\x15\x1e\x30\x2f"),
+ .aad = TEST_DATA_STR (
+ "\x3e\x3e\xb5\x74\x7e\x39\x0f\x7b\xc8\x0e\x74\x82\x33\x48\x4f\xfc"),
+ .tag = TEST_DATA_STR (
+ "\x2e\x12\x2a\x47\x8e\x64\x46\x32\x86\xf8\xb4\x89\xdc\xdd\x09\xc8"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc5) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xfc\x00\x96\x0d\xdd\x69\x8d\x35\x72\x8c\x5a\xc6\x07\x59\x6b\x51\xb3\xf8"
+ "\x97\x41\xd1\x4c\x25\xb8\xba\xda\xc9\x19\x76\x12\x0d\x99"),
+ .iv = TEST_DATA_STR ("\xa4\x24\xa3\x2a\x23\x7f\x0d\xf5\x30\xf0\x5e\x30"),
+ .aad = TEST_DATA_STR (
+ "\xcf\xb7\xe0\x5e\x31\x57\xf0\xc9\x05\x49\xd5\xc7\x86\x50\x63\x11"),
+ .tag = TEST_DATA_STR (
+ "\xdc\xdc\xb9\xe4\x00\x4b\x85\x2a\x0d\xa1\x2b\xdf\x25\x5b\x4d\xdd"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc6) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x69\x74\x99\x43\x09\x2f\x56\x05\xbf\x97\x1e\x18\x5c\x19\x1c\x61\x82\x61"
+ "\xb2\xc7\xcc\x16\x93\xcd\xa1\x08\x0c\xa2\xfd\x8d\x51\x11"),
+ .iv = TEST_DATA_STR ("\xbd\x0d\x62\xc0\x2e\xe6\x82\x06\x9b\xd1\xe1\x28"),
+ .aad = TEST_DATA_STR (
+ "\x69\x67\xdc\xe8\x78\xf0\x3b\x64\x3b\xf5\xcd\xba\x59\x6a\x7a\xf3"),
+ .tag = TEST_DATA_STR (
+ "\x37\x8f\x79\x6a\xe5\x43\xe1\xb2\x91\x15\xcc\x18\xac\xd1\x93\xf4"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc7) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xfc\x48\x75\xdb\x84\x81\x98\x34\xb1\xcb\x43\x82\x8d\x2f\x0a\xe3\x47\x3a"
+ "\xa3\x80\x11\x1c\x27\x37\xe8\x2a\x9a\xb1\x1f\xea\x1f\x19"),
+ .iv = TEST_DATA_STR ("\xda\x6a\x68\x4d\x3f\xf6\x3a\x2d\x10\x9d\xec\xd6"),
+ .aad = TEST_DATA_STR (
+ "\x91\xb6\xfa\x2a\xb4\xde\x44\x28\x2f\xfc\x86\xc8\xcd\xe6\xe7\xf5"),
+ .tag = TEST_DATA_STR (
+ "\x50\x4e\x81\xd2\xe7\x87\x7e\x4d\xad\x6f\x31\xcd\xeb\x07\xbd\xbd"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc8) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x9f\x9f\xe7\xd2\xa2\x6d\xcf\x59\xd6\x84\xf1\xc0\x94\x5b\x5f\xfa\xfe\x0a"
+ "\x47\x46\x84\x5e\xd3\x17\xd3\x5f\x3e\xd7\x6c\x93\x04\x4d"),
+ .iv = TEST_DATA_STR ("\x13\xb5\x99\x71\xcd\x4d\xd3\x6b\x19\xac\x71\x04"),
+ .aad = TEST_DATA_STR (
+ "\x19\x0a\x69\x34\xf4\x5f\x89\xc9\x00\x67\xc2\xf6\x2e\x04\xc5\x3b"),
+ .tag = TEST_DATA_STR (
+ "\x4f\x63\x6a\x29\x4b\xfb\xf5\x1f\xc0\xe1\x31\xd6\x94\xd5\xc2\x22"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc9) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xab\x91\x55\xd7\xd8\x1b\xa6\xf3\x31\x93\x69\x5c\xf4\x56\x6a\x9b\x6e\x97"
+ "\xa3\xe4\x09\xf5\x71\x59\xae\x6c\xa4\x96\x55\xcc\xa0\x71"),
+ .iv = TEST_DATA_STR ("\x26\xa9\xf8\xd6\x65\xd1\x63\xdd\xb9\x2d\x03\x5d"),
+ .aad = TEST_DATA_STR (
+ "\x4a\x20\x3a\xc2\x6b\x95\x1a\x1f\x67\x3c\x66\x05\x65\x3e\xc0\x2d"),
+ .tag = TEST_DATA_STR (
+ "\x43\x7e\xa7\x7a\x38\x79\xf0\x10\x69\x1e\x28\x8d\x62\x69\xa9\x96"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc10) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x0f\x1c\x62\xdd\x80\xb4\xa6\xd0\x9e\xe9\xd7\x87\xb1\xb0\x43\x27\xaa\x36"
+ "\x15\x29\xff\xa3\x40\x75\x60\x41\x4a\xc4\x7b\x7e\xf7\xbc"),
+ .iv = TEST_DATA_STR ("\xc8\x76\x13\xa3\xb7\x0d\x2a\x04\x8f\x32\xcb\x9a"),
+ .aad = TEST_DATA_STR (
+ "\x8f\x23\xd4\x04\xbe\x2d\x9e\x88\x8d\x21\x9f\x1b\x40\xaa\x29\xe8"),
+ .tag = TEST_DATA_STR (
+ "\x36\xd8\xa3\x09\xac\xbb\x87\x16\xc9\xc0\x8c\x7f\x5d\xe4\x91\x1e"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc11) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xf3\xe9\x54\xa3\x89\x56\xdf\x89\x02\x55\xf0\x17\x09\xe4\x57\xb3\x3f\x4b"
+ "\xfe\x7e\xcb\x36\xd0\xee\x50\xf2\x50\x04\x71\xee\xbc\xde"),
+ .iv = TEST_DATA_STR ("\x97\x99\xab\xd3\xc5\x21\x10\xc7\x04\xb0\xf3\x6a"),
+ .aad = TEST_DATA_STR (
+ "\xdd\xb7\x01\x73\xf4\x41\x57\x75\x5b\x6c\x9b\x70\x58\xf4\x0c\xb7"),
+ .tag = TEST_DATA_STR (
+ "\xb3\x23\xae\x3a\xbc\xb4\x15\xc7\xf4\x20\x87\x6c\x98\x0f\x48\x58"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc12) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x06\x25\x31\x65\x34\xfb\xd8\x2f\xe8\xfd\xea\x50\xfa\x57\x3c\x46\x20\x22"
+ "\xc4\x2f\x79\xe8\xb2\x13\x60\xe5\xa6\xdc\xe6\x6d\xde\x28"),
+ .iv = TEST_DATA_STR ("\xda\x64\xa6\x74\x90\x7c\xd6\xcf\x24\x8f\x5f\xbb"),
+ .aad = TEST_DATA_STR (
+ "\xf2\x4d\x48\xe0\x4f\x5a\x0d\x98\x7b\xa7\xc7\x45\xb7\x3b\x03\x64"),
+ .tag = TEST_DATA_STR (
+ "\xdf\x36\x0b\x81\x0f\x27\xe7\x94\x67\x3a\x8b\xb2\xdc\x0d\x68\xb0"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc13) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x28\xf0\x45\xac\x7c\x4f\xe5\xd4\xb0\x1a\x9d\xcd\x5f\x1a\xd3\xef\xff\x1c"
+ "\x4f\x17\x0f\xc8\xab\x87\x58\xd9\x72\x92\x86\x8d\x58\x28"),
+ .iv = TEST_DATA_STR ("\x5d\x85\xde\x95\xb0\xbd\xc4\x45\x14\x14\x39\x19"),
+ .aad = TEST_DATA_STR (
+ "\x60\x1d\x21\x58\xf1\x7a\xb3\xc7\xb4\xdc\xb6\x95\x0f\xbd\xcd\xde"),
+ .tag = TEST_DATA_STR (
+ "\x42\xc3\xf5\x27\x41\x8c\xf2\xc3\xf5\xd5\x01\x0c\xcb\xa8\xf2\x71"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad128_tc14) = {
+ .name = "256-GMAC 128-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x19\x31\x0e\xed\x5f\x5f\x44\xeb\x47\x07\x5c\x10\x5e\xb3\x1e\x36\xbb\xfd"
+ "\x13\x10\xf7\x41\xb9\xba\xa6\x6a\x81\x13\x8d\x35\x72\x42"),
+ .iv = TEST_DATA_STR ("\xa1\x24\x71\x20\x13\x8f\xa4\xf0\xe9\x6c\x99\x2c"),
+ .aad = TEST_DATA_STR (
+ "\x29\xd7\x46\x41\x43\x33\xe0\xf7\x2b\x4c\x3f\x44\xec\x6b\xfe\x42"),
+ .tag = TEST_DATA_STR (
+ "\xd5\x99\x7e\x2f\x95\x6d\xf3\xfa\x2c\x23\x88\xe2\x0f\x30\xc4\x80"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc0) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x88\x6c\xff\x5f\x3e\x6b\x8d\x0e\x1a\xd0\xa3\x8f\xcd\xb2\x6d\xe9\x7e\x8a"
+ "\xcb\xe7\x9f\x6b\xed\x66\x95\x9a\x59\x8f\xa5\x04\x7d\x65"),
+ .iv = TEST_DATA_STR ("\x3a\x8e\xfa\x1c\xd7\x4b\xba\xb5\x44\x8f\x99\x45"),
+ .aad = TEST_DATA_STR ("\x51\x9f\xee\x51\x9d\x25\xc7\xa3\x04\xd6\xc6\xaa\x18"
+ "\x97\xee\x1e\xb8\xc5\x96\x55"),
+ .tag = TEST_DATA_STR (
+ "\xf6\xd4\x75\x05\xec\x96\xc9\x8a\x42\xdc\x3a\xe7\x19\x87\x7b\x87"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc1) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x69\x37\xa5\x7d\x35\xfe\x6d\xc3\xfc\x42\x0b\x12\x3b\xcc\xdc\xe8\x74\xbd"
+ "\x4c\x18\xf2\xe7\xc0\x1c\xe2\xfa\xf3\x3d\x39\x44\xfd\x9d"),
+ .iv = TEST_DATA_STR ("\xa8\x72\x47\x79\x7b\x75\x84\x67\xb9\x63\x10\xf3"),
+ .aad = TEST_DATA_STR ("\xea\xd9\x61\x93\x9a\x33\xdd\x57\x8f\x8e\x93\xdb\x8b"
+ "\x28\xa1\xc8\x53\x62\x90\x5f"),
+ .tag = TEST_DATA_STR (
+ "\x59\x9d\xe3\xec\xf2\x2c\xb8\x67\xf0\x3f\x7f\x6d\x9f\xd7\x42\x8a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc2) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xe6\x5a\x33\x17\x76\xc9\xdc\xdf\x5e\xba\x6c\x59\xe0\x5e\xc0\x79\xd9\x74"
+ "\x73\xbc\xdc\xe8\x4d\xaf\x83\x6b\xe3\x23\x45\x62\x63\xa0"),
+ .iv = TEST_DATA_STR ("\xca\x73\x1f\x76\x8d\xa0\x1d\x02\xeb\x8e\x72\x7e"),
+ .aad = TEST_DATA_STR ("\xd7\x27\x45\x86\x51\x7b\xf1\xd8\xda\x86\x6f\x4a\x47"
+ "\xad\x0b\xcf\x29\x48\xa8\x62"),
+ .tag = TEST_DATA_STR (
+ "\xa8\xab\xe7\xa8\x08\x5f\x25\x13\x0a\x72\x06\xd3\x7a\x8a\xaf\x6d"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc3) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x77\xbb\x1b\x6e\xf8\x98\x68\x3c\x98\x1b\x2f\xc8\x99\x31\x9f\xfb\xb6\x00"
+ "\x0e\xdc\xa2\x25\x66\xb6\x34\xdb\x3a\x3c\x80\x40\x59\xe5"),
+ .iv = TEST_DATA_STR ("\x35\x4a\x19\x28\x37\x69\xb3\xb9\x91\xb0\x5a\x4c"),
+ .aad = TEST_DATA_STR ("\xb5\x56\x62\x51\xa8\xa8\xbe\xc2\x12\xdc\x08\x11\x32"
+ "\x29\xff\x85\x90\x16\x88\x00"),
+ .tag = TEST_DATA_STR (
+ "\xe5\xc2\xdc\xcf\x8f\xc7\xf2\x96\xca\xc9\x5d\x70\x71\xcb\x8d\x7d"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc4) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x2a\x43\x30\x8d\x52\x0a\x59\xed\x51\xe4\x7a\x3a\x91\x5e\x1d\xbf\x20\xa9"
+ "\x1f\x08\x86\x50\x6e\x48\x1a\xd3\xde\x65\xd5\x09\x75\xb4"),
+ .iv = TEST_DATA_STR ("\xbc\xbf\x99\x73\x3d\x8e\xc9\x0c\xb2\x3e\x6c\xe6"),
+ .aad = TEST_DATA_STR ("\xeb\x88\x28\x87\x29\x28\x9d\x26\xfe\x0e\x75\x7a\x99"
+ "\xad\x8e\xec\x96\x10\x60\x53"),
+ .tag = TEST_DATA_STR (
+ "\x01\xb0\x19\x69\x33\xaa\x49\x12\x3e\xab\x4e\x15\x71\x25\x03\x83"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc5) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x23\x79\xb3\x5f\x85\x10\x2d\xb4\xe7\xae\xcc\x52\xb7\x05\xbc\x69\x5d\x47"
+ "\x68\xd4\x12\xe2\xd7\xbe\xbe\x99\x92\x36\x78\x39\x72\xff"),
+ .iv = TEST_DATA_STR ("\x91\x89\x98\xc4\x80\x10\x37\xb1\xcd\x10\x2f\xaa"),
+ .aad = TEST_DATA_STR ("\xb3\x72\x23\x09\xe0\xf0\x66\x22\x5e\x8d\x16\x59\x08"
+ "\x4e\xbb\x07\xa9\x3b\x43\x5d"),
+ .tag = TEST_DATA_STR (
+ "\xdf\xb1\x8a\xee\x99\xd1\xf6\x7f\x57\x48\xd4\xb4\x84\x3c\xb6\x49"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc6) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x98\xb3\xcb\x75\x37\x16\x7e\x6d\x14\xa2\xa8\xb2\x31\x0f\xe9\x4b\x71\x5c"
+ "\x72\x9f\xdf\x85\x21\x65\x68\x15\x0b\x55\x6d\x07\x97\xba"),
+ .iv = TEST_DATA_STR ("\xbc\xa5\xe2\xe5\xa6\xb3\x0f\x18\xd2\x63\xc6\xb2"),
+ .aad = TEST_DATA_STR ("\x26\x0d\x3d\x72\xdb\x70\xd6\x77\xa4\xe3\xe1\xf3\xe1"
+ "\x14\x31\x21\x7a\x2e\x47\x13"),
+ .tag = TEST_DATA_STR (
+ "\xd6\xb7\x56\x0f\x8a\xc2\xf0\xa9\x0b\xad\x42\xa6\xa0\x72\x04\xbc"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc7) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x30\x34\x1a\xe0\xf1\x99\xb1\x0a\x15\x17\x5d\x00\x91\x3d\x50\x29\x52\x6a"
+ "\xb7\xf7\x61\xc0\xb9\x36\xa7\xdd\x5f\x1b\x15\x83\x42\x9d"),
+ .iv = TEST_DATA_STR ("\xdb\xe1\x09\xa8\xce\x5f\x7b\x24\x1e\x99\xf7\xaf"),
+ .aad = TEST_DATA_STR ("\xfe\x4b\xde\xe5\xca\x9c\x48\x06\xfa\x02\x47\x15\xfb"
+ "\xf6\x6a\xb8\x45\x28\x5f\xa7"),
+ .tag = TEST_DATA_STR (
+ "\xae\x91\xda\xed\x65\x8e\x26\xc0\xd1\x26\x57\x51\x47\xaf\x98\x99"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc8) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x82\x32\xb6\xa1\xd2\xe3\x67\xe9\xce\x1e\xa8\xd4\x2f\xcf\xc8\x3a\x4b\xc8"
+ "\xbd\xec\x46\x5c\x6b\xa3\x26\xe3\x53\xad\x92\x55\xf2\x07"),
+ .iv = TEST_DATA_STR ("\xcd\x2f\xb5\xff\x9c\xf0\xf3\x98\x68\xad\x86\x85"),
+ .aad = TEST_DATA_STR ("\x02\x41\x8b\x3d\xde\x54\x92\x4a\x96\x28\xde\x06\x00"
+ "\x4c\x08\x82\xae\x4e\xc3\xbb"),
+ .tag = TEST_DATA_STR (
+ "\xd5\x30\x8f\x63\x70\x86\x75\xce\xd1\x9b\x27\x10\xaf\xd2\xdb\x49"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc9) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xf9\xa1\x32\xa5\x0a\x50\x81\x45\xff\xd8\x29\x4e\x68\x94\x4e\xa4\x36\xce"
+ "\x0f\x9a\x97\xe1\x81\xf5\xe0\xd6\xc5\xd2\x72\x31\x1f\xc1"),
+ .iv = TEST_DATA_STR ("\x89\x29\x91\xb5\x4e\x94\xb9\xd5\x74\x42\xcc\xaf"),
+ .aad = TEST_DATA_STR ("\x4e\x0f\xbd\x37\x99\xda\x25\x0f\xa2\x79\x11\xb7\xe6"
+ "\x8d\x76\x23\xbf\xe6\x0a\x53"),
+ .tag = TEST_DATA_STR (
+ "\x89\x88\x1d\x5f\x78\x6e\x6d\x53\xe0\xd1\x9c\x3b\x4e\x68\x87\xd8"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc10) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x0e\x37\x46\xe5\x06\x46\x33\xea\x93\x11\xb2\xb8\x42\x7c\x53\x6a\xf9\x27"
+ "\x17\xde\x20\xee\xb6\x26\x0d\xb1\x33\x3c\x3d\x8a\x81\x14"),
+ .iv = TEST_DATA_STR ("\xf8\x4c\x3a\x1c\x94\x53\x3f\x7f\x25\xce\xc0\xac"),
+ .aad = TEST_DATA_STR ("\x8c\x0d\x41\xe6\x13\x53\x38\xc8\xd3\xe6\x3e\x2a\x5f"
+ "\xa0\xa9\x66\x7e\xc9\xa5\x80"),
+ .tag = TEST_DATA_STR (
+ "\x47\x9c\xcf\xe9\x24\x1d\xe2\xc4\x74\xf2\xed\xeb\xbb\x38\x5c\x09"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc11) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xb9\x97\xe9\xb0\x74\x6a\xba\xae\xd6\xe6\x4b\x63\xbd\xf6\x48\x82\x52\x6a"
+ "\xd9\x2e\x24\xa2\xf5\x64\x9d\xf0\x55\xc9\xec\x0f\x1d\xaa"),
+ .iv = TEST_DATA_STR ("\xf1\x41\xd8\xd7\x1b\x03\x37\x55\x02\x2f\x0a\x7d"),
+ .aad = TEST_DATA_STR ("\x68\x1d\x65\x83\xf5\x27\xb1\xa9\x2f\x66\xca\xae\x9b"
+ "\x1d\x4d\x02\x8e\x2e\x63\x1e"),
+ .tag = TEST_DATA_STR (
+ "\xb3\x04\x42\xa6\x39\x5e\xc1\x32\x46\xc4\x8b\x21\xff\xc6\x55\x09"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc12) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x87\x66\x0e\xc1\x70\x0d\x4e\x9f\x88\xa3\x23\xa4\x9f\x0b\x87\x1e\x6a\xaf"
+ "\x43\x4a\x2d\x84\x48\xd0\x4d\x4a\x22\xf6\x56\x10\x28\xe0"),
+ .iv = TEST_DATA_STR ("\x2a\x07\xb4\x25\x93\xcd\x24\xf0\xa6\xfe\x40\x6c"),
+ .aad = TEST_DATA_STR ("\x1d\xd2\x39\xb5\x71\x85\xb7\xe4\x57\xce\xd7\x3e\xbb"
+ "\xa0\x43\x05\x7f\x04\x9e\xdd"),
+ .tag = TEST_DATA_STR (
+ "\xdf\x7a\x50\x10\x49\xb3\x7a\x53\x40\x98\xcb\x45\xcb\x9c\x21\xb7"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc13) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xea\x47\x92\xe1\xf1\x71\x7b\x77\xa0\x0d\xe4\xd1\x09\xe6\x27\x54\x9b\x16"
+ "\x5c\x82\xaf\x35\xf3\x3c\xa7\xe1\xa6\xb8\xed\x62\xf1\x4f"),
+ .iv = TEST_DATA_STR ("\x74\x53\xcc\x8b\x46\xfe\x4b\x93\xbc\xc4\x83\x81"),
+ .aad = TEST_DATA_STR ("\x46\xd9\x89\x70\xa6\x36\xe7\xcd\x7b\x76\xfc\x36\x2a"
+ "\xe8\x82\x98\x43\x6f\x83\x4f"),
+ .tag = TEST_DATA_STR (
+ "\x51\x8d\xba\xcd\x36\xbe\x6f\xba\x5c\x12\x87\x16\x78\xa5\x55\x16"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad160_tc14) = {
+ .name = "256-GMAC 160-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x34\x89\x2c\xdd\x1d\x48\xca\x16\x6f\x7b\xa7\x31\x82\xcb\x97\x33\x6c\x2c"
+ "\x75\x4a\xc1\x60\xa3\xe3\x71\x83\xd6\xfb\x50\x78\xce\xc3"),
+ .iv = TEST_DATA_STR ("\xed\x31\x98\xc5\x86\x1b\x78\xc7\x1a\x6a\x4e\xec"),
+ .aad = TEST_DATA_STR ("\xa6\xfa\x6d\x0d\xd1\xe0\xb9\x5b\x46\x09\x95\x1b\xbb"
+ "\xe7\x14\xde\x0a\xe0\xcc\xfa"),
+ .tag = TEST_DATA_STR (
+ "\xc6\x38\x77\x95\x09\x6b\x34\x8e\xcf\x1d\x1f\x6c\xaa\xa3\xc8\x13"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc0) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xf4\x06\x9b\xb7\x39\xd0\x7d\x0c\xaf\xdc\xbc\x60\x9c\xa0\x15\x97\xf9\x85"
+ "\xc4\x3d\xb6\x3b\xba\xaa\x0d\xeb\xbb\x04\xd3\x84\xe4\x9c"),
+ .iv = TEST_DATA_STR ("\xd2\x5f\xf3\x0f\xdc\x3d\x46\x4f\xe1\x73\xe8\x05"),
+ .aad = TEST_DATA_STR (
+ "\x3e\x14\x49\xc4\x83\x7f\x08\x92\xf9\xd5\x51\x27\xc7\x5c\x4b\x25\xd6\x9b"
+ "\xe3\x34\xba\xf5\xf1\x93\x94\xd2\xd8\xbb\x46\x0c\xbf\x21\x20\xe1\x47\x36"
+ "\xd0\xf6\x34\xaa\x79\x2f\xec\xa2\x0e\x45\x5f\x11"),
+ .tag = TEST_DATA_STR (
+ "\x80\x5e\xc2\x93\x1c\x21\x81\xe5\xbf\xb7\x4f\xa0\xa9\x75\xf0\xcf"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc1) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x62\x18\x9d\xcc\x4b\xeb\x97\x46\x2d\x6c\x09\x27\xd8\xa2\x70\xd3\x9a\x1b"
+ "\x07\xd7\x2d\x0a\xd2\x88\x40\xba\xdd\x4f\x68\xcf\x9c\x8b"),
+ .iv = TEST_DATA_STR ("\x85\x9f\xda\x52\x47\xc8\x88\x82\x3a\x4b\x80\x32"),
+ .aad = TEST_DATA_STR (
+ "\xb2\x8d\x16\x21\xee\x11\x0f\x4c\x9d\x70\x9f\xad\x76\x4b\xba\x2d\xd6\xd2"
+ "\x91\xbc\x00\x37\x48\xfa\xac\x6d\x90\x19\x37\x12\x0d\x41\xc1\xb7\xce\x67"
+ "\x63\x37\x63\xe9\x9e\x05\xc7\x13\x63\xfc\xec\xa8"),
+ .tag = TEST_DATA_STR (
+ "\x27\x33\x09\x07\xd0\x00\x28\x80\xbb\xb4\xc1\xa1\xd2\x3c\x0b\xe2"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc2) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x59\x01\x2d\x85\xa1\xb9\x0a\xeb\x03\x59\xe6\x38\x4c\x99\x91\xe7\xbe\x21"
+ "\x93\x19\xf5\xb8\x91\xc9\x2c\x38\x4a\xde\x2f\x37\x18\x16"),
+ .iv = TEST_DATA_STR ("\x3c\x9c\xde\x00\xc2\x39\x12\xcf\xf9\x68\x9c\x7c"),
+ .aad = TEST_DATA_STR (
+ "\xe5\xda\xf4\x73\xa4\x70\x86\x0b\x55\x21\x0a\x48\x3c\x0d\x1a\x97\x8d\x8a"
+ "\xdd\x84\x3c\x2c\x09\x7f\x73\xa3\xcd\xa4\x9a\xc4\xa6\x14\xc8\xe8\x87\xd9"
+ "\x4e\x66\x92\x30\x9d\x2e\xd9\x7e\xbe\x1e\xaf\x5d"),
+ .tag = TEST_DATA_STR (
+ "\x04\x82\x39\xe4\xe5\xc2\xc8\xb3\x38\x90\xa7\xc9\x50\xcd\xa8\x52"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc3) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x4b\xe0\x9b\x40\x8a\xd6\x8b\x89\x0f\x94\xbe\x5e\xfa\x7f\xe9\xc9\x17\x36"
+ "\x27\x12\xa3\x48\x0c\x57\xcd\x38\x44\x93\x5f\x35\xac\xb7"),
+ .iv = TEST_DATA_STR ("\x8f\x35\x0b\xd3\xb8\xee\xa1\x73\xfc\x73\x70\xbc"),
+ .aad = TEST_DATA_STR (
+ "\x28\x19\xd6\x5a\xec\x94\x21\x98\xca\x97\xd4\x43\x5e\xfd\x9d\xd4\xd4\x39"
+ "\x3b\x96\xcf\x5b\xa4\x4f\x09\xbc\xe4\xba\x13\x5f\xc8\x63\x6e\x82\x75\xdc"
+ "\xb5\x15\x41\x4b\x8b\xef\xd3\x2f\x91\xfc\x48\x22"),
+ .tag = TEST_DATA_STR (
+ "\xa1\x33\xcb\x7a\x7d\x04\x71\xdb\xac\x61\xfb\x41\x58\x9a\x2e\xfe"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc4) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x13\xcb\x96\x5a\x4d\x9d\x1a\x36\xef\xad\x9f\x6c\xa1\xba\x76\x38\x6a\x5b"
+ "\xb1\x60\xd8\x0b\x09\x17\x27\x71\x02\x35\x7a\xc7\xaf\xc8"),
+ .iv = TEST_DATA_STR ("\xf3\x13\xad\xec\x42\xa6\x6d\x13\xc3\x95\x81\x80"),
+ .aad = TEST_DATA_STR (
+ "\x71\x7b\x48\x35\x88\x98\xe5\xcc\xfe\xa4\x28\x90\x49\xad\xcc\x1b\xb0\xdb"
+ "\x3b\x3e\xbd\x17\x67\xac\x24\xfb\x2b\x7d\x37\xdc\x80\xea\x23\x16\xc1\x7f"
+ "\x14\xfb\x51\xb5\xe1\x8c\xd5\xbb\x09\xaf\xe4\x14"),
+ .tag = TEST_DATA_STR (
+ "\x81\xb4\xef\x7a\x84\xdc\x4a\x0b\x1f\xdd\xbe\xfe\x37\xf5\x38\x52"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc5) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xd2\x7f\x1b\xeb\xbb\xde\xf0\xed\xca\x39\x3a\x62\x61\xb0\x33\x8a\xbb\xc4"
+ "\x91\x26\x2e\xab\x07\x37\xf5\x52\x46\x45\x8f\x66\x68\xcc"),
+ .iv = TEST_DATA_STR ("\xfc\x06\x2f\x85\x78\x86\xe2\x78\xf3\xa5\x67\xd2"),
+ .aad = TEST_DATA_STR (
+ "\x2b\xae\x92\xde\xa6\x4a\xa9\x91\x89\xde\x8e\xa4\xc0\x46\x74\x53\x06\x00"
+ "\x2e\x02\xcf\xb4\x6a\x41\x44\x4c\xe8\xbf\xcc\x32\x9b\xd4\x20\x59\x63\xd9"
+ "\xab\x53\x57\xb0\x26\xa4\xa3\x4b\x1a\x86\x17\x71"),
+ .tag = TEST_DATA_STR (
+ "\x5c\x5a\x6c\x46\x13\xf1\xe5\x22\x59\x63\x30\xd4\x5f\x24\x3f\xdd"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc6) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x7b\x4d\x19\xcd\x35\x69\xf7\x4c\x7b\x5d\xf6\x1a\xb7\x83\x79\xee\x6b\xfa"
+ "\x15\x10\x5d\x21\xb1\x0b\xf6\x09\x66\x99\x53\x90\x06\xd0"),
+ .iv = TEST_DATA_STR ("\xfb\xed\x56\x95\xc4\xa7\x39\xed\xed\x97\xb1\xe3"),
+ .aad = TEST_DATA_STR (
+ "\xc6\xf2\xe5\xd6\x63\xbf\xaf\x66\x8d\x01\x45\x50\xef\x2e\x66\xbf\x89\x97"
+ "\x87\x99\xa7\x85\xf1\xf2\xc7\x9a\x2c\xb3\xeb\x3f\x2f\xd4\x07\x62\x07\xd5"
+ "\xf7\xe1\xc2\x84\xb4\xaf\x5c\xff\xc4\xe4\x61\x98"),
+ .tag = TEST_DATA_STR (
+ "\x71\x01\xb4\x34\xfb\x90\xc7\xf9\x5b\x9b\x7a\x0d\xee\xeb\x5c\x81"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc7) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xd3\x43\x14\x88\xd8\xf0\x48\x59\x0b\xd7\x6e\xc6\x6e\x71\x42\x1e\xf0\x9f"
+ "\x65\x5d\x7c\xf8\x04\x3b\xf3\x2f\x75\xb4\xb2\xe7\xef\xcc"),
+ .iv = TEST_DATA_STR ("\xcc\x76\x6e\x98\xb4\x0a\x81\x51\x9f\xa4\x63\x92"),
+ .aad = TEST_DATA_STR (
+ "\x93\x32\x01\x79\xfd\xb4\x0c\xbc\x1c\xcf\x00\xb8\x72\xa3\xb4\xa5\xf6\xc7"
+ "\x0b\x56\xe4\x3a\x84\xfc\xac\x5e\xb4\x54\xa0\xa1\x9a\x74\x7d\x45\x20\x42"
+ "\x61\x1b\xf3\xbb\xaa\xfd\x92\x5e\x80\x6f\xfe\x8e"),
+ .tag = TEST_DATA_STR (
+ "\x3a\xfc\xc3\x36\xce\x8b\x71\x91\xea\xb0\x4a\xd6\x79\x16\x3c\x2a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc8) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xa4\x40\x94\x8c\x03\x78\x56\x1c\x39\x56\x81\x3c\x03\x1f\x81\x57\x32\x08"
+ "\xc7\xff\xa8\x15\x11\x4e\xf2\xee\xe1\xeb\x64\x2e\x74\xc6"),
+ .iv = TEST_DATA_STR ("\xc1\xf4\xff\xe5\x4b\x86\x80\x83\x2e\xed\x88\x19"),
+ .aad = TEST_DATA_STR (
+ "\x25\x34\x38\xf1\x32\xb1\x8e\x84\x83\x07\x45\x61\x89\x8c\x56\x52\xb4\x3a"
+ "\x82\xcc\x94\x1e\x8b\x4a\xe3\x7e\x79\x2a\x8e\xd6\xec\x5c\xe2\xbc\xec\x9f"
+ "\x1f\xfc\xf4\x21\x6e\x46\x69\x63\x07\xbb\x77\x4a"),
+ .tag = TEST_DATA_STR (
+ "\x12\x94\x45\xf0\xa3\xc9\x79\xa1\x12\xa3\xaf\xb1\x0a\x24\xe2\x45"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc9) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x79\x87\x06\xb6\x51\x03\x3d\x9e\x9b\xf2\xce\x06\x4f\xb1\x2b\xe7\xdf\x73"
+ "\x08\xcf\x45\xdf\x44\x77\x65\x88\xcd\x39\x1c\x49\xff\x85"),
+ .iv = TEST_DATA_STR ("\x5a\x43\x36\x8a\x39\xe7\xff\xb7\x75\xed\xfa\xf4"),
+ .aad = TEST_DATA_STR (
+ "\x92\x6b\x74\xfe\x63\x81\xeb\xd3\x57\x57\xe4\x2e\x8e\x55\x76\x01\xf2\x28"
+ "\x7b\xfc\x13\x3a\x13\xfd\x86\xd6\x1c\x01\xaa\x84\xf3\x97\x13\xbf\x99\xa8"
+ "\xdc\x07\xb8\x12\xf0\x27\x4c\x9d\x32\x80\xa1\x38"),
+ .tag = TEST_DATA_STR (
+ "\x89\xfe\x48\x1a\x3d\x95\xc0\x3a\x0a\x9d\x4e\xe3\xe3\xf0\xed\x4a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc10) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xc3\xaa\x2a\x39\xa9\xfe\xf4\xa4\x66\x61\x8d\x12\x88\xbb\x62\xf8\xda\x7b"
+ "\x1c\xb7\x60\xcc\xc8\xf1\xbe\x3e\x99\xe0\x76\xf0\x8e\xff"),
+ .iv = TEST_DATA_STR ("\x99\x65\xba\x5e\x23\xd9\x45\x3d\x72\x67\xca\x5b"),
+ .aad = TEST_DATA_STR (
+ "\x93\xef\xb6\xa2\xaf\xfc\x30\x4c\xb2\x5d\xfd\x49\xaa\x3e\x3c\xcd\xb2\x5c"
+ "\xea\xc3\xd3\xce\xa9\x0d\xd9\x9e\x38\x97\x69\x78\x21\x7a\xd5\xf2\xb9\x90"
+ "\xd1\x0b\x91\x72\x5c\x7f\xd2\x03\x5e\xcc\x6a\x30"),
+ .tag = TEST_DATA_STR (
+ "\x00\xa9\x4c\x18\xa4\x57\x2d\xcf\x4f\x9e\x22\x26\xa0\x3d\x4c\x07"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc11) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x14\xe0\x68\x58\x00\x8f\x7e\x77\x18\x6a\x2b\x3a\x79\x28\xa0\xc7\xfc\xee"
+ "\x22\x13\x6b\xc3\x6f\x53\x55\x3f\x20\xfa\x5c\x37\xed\xcd"),
+ .iv = TEST_DATA_STR ("\x32\xeb\xe0\xdc\x9a\xda\x84\x9b\x5e\xda\x7b\x48"),
+ .aad = TEST_DATA_STR (
+ "\x6c\x01\x52\xab\xfa\x48\x5b\x8c\xd6\x7c\x15\x4a\x5f\x04\x11\xf2\x21\x21"
+ "\x37\x97\x74\xd7\x45\xf4\x0e\xe5\x77\xb0\x28\xfd\x0e\x18\x82\x97\x58\x15"
+ "\x61\xae\x97\x22\x23\xd7\x5a\x24\xb4\x88\xae\xd7"),
+ .tag = TEST_DATA_STR (
+ "\x26\x25\xb0\xba\x6e\xe0\x2b\x58\xbc\x52\x9e\x43\xe2\xeb\x47\x1b"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc12) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xfb\xb5\x6b\x11\xc5\x1a\x09\x3c\xe1\x69\xa6\x99\x03\x99\xc4\xd7\x41\xf6"
+ "\x2b\x3c\xc6\x1f\x9e\x8a\x60\x9a\x1b\x6a\xe8\xe7\xe9\x65"),
+ .iv = TEST_DATA_STR ("\x9c\x5a\x95\x32\x47\xe9\x1a\xce\xce\xb9\xde\xfb"),
+ .aad = TEST_DATA_STR (
+ "\x46\xcb\x5c\x4f\x61\x79\x16\xa9\xb1\xb2\xe0\x32\x72\xcb\x05\x90\xce\x71"
+ "\x64\x98\x53\x30\x47\xd7\x3c\x81\xe4\xcb\xe9\x27\x8a\x36\x86\x11\x6f\x56"
+ "\x32\x75\x3e\xa2\xdf\x52\xef\xb3\x55\x1a\xea\x2d"),
+ .tag = TEST_DATA_STR (
+ "\x4f\x3b\x82\xe6\xbe\x4f\x08\x75\x60\x71\xf2\xc4\x6c\x31\xfe\xdf"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc13) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xb3\x03\xbf\x02\xf6\xa8\xdb\xb5\xbc\x4b\xac\xca\xb0\x80\x0d\xb5\xee\x06"
+ "\xde\x64\x8e\x2f\xae\x29\x9b\x95\xf1\x35\xc9\xb1\x07\xcc"),
+ .iv = TEST_DATA_STR ("\x90\x64\x95\xb6\x7e\xf4\xce\x00\xb4\x44\x22\xfa"),
+ .aad = TEST_DATA_STR (
+ "\x87\x2c\x6c\x37\x09\x26\x53\x5c\x3f\xa1\xba\xec\x03\x1e\x31\xe7\xc6\xc8"
+ "\x28\x08\xc8\xa0\x60\x74\x2d\xbe\xf1\x14\x96\x1c\x31\x4f\x19\x86\xb2\x13"
+ "\x1a\x9d\x91\xf3\x0f\x53\x06\x7e\xc0\x12\xc6\xb7"),
+ .tag = TEST_DATA_STR (
+ "\x64\xdd\xe3\x71\x69\x08\x2d\x18\x1a\x69\x10\x7f\x60\xc5\xc6\xbb"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad384_tc14) = {
+ .name = "256-GMAC 384-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x29\xf5\xf8\x07\x59\x03\x06\x3c\xb6\xd7\x05\x06\x69\xb1\xf7\x4e\x08\xa3"
+ "\xf7\x9e\xf5\x66\x29\x2d\xfd\xef\x1c\x06\xa4\x08\xe1\xab"),
+ .iv = TEST_DATA_STR ("\x35\xf2\x5c\x48\xb4\xb5\x35\x5e\x78\xb9\xfb\x3a"),
+ .aad = TEST_DATA_STR (
+ "\x10\x7e\x2e\x23\x15\x9f\xc5\xc0\x74\x8c\xa7\xa0\x77\xe5\xcc\x05\x3f\xa5"
+ "\xc6\x82\xff\x52\x69\xd3\x50\xee\x81\x7f\x8b\x5d\xe4\xd3\x97\x20\x41\xd1"
+ "\x07\xb1\xe2\xf2\xe5\x4c\xa9\x3b\x72\xcd\x04\x08"),
+ .tag = TEST_DATA_STR (
+ "\xfe\xe5\xa9\xba\xeb\xb5\xbe\x01\x65\xde\xaa\x86\x7e\x96\x7a\x9e"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc0) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC0",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x03\xcc\xb7\xdb\xc7\xb8\x42\x54\x65\xc2\xc3\xfc\x39\xed\x05\x93\x92\x9f"
+ "\xfd\x02\xa4\x5f\xf5\x83\xbd\x89\xb7\x9c\x6f\x64\x6f\xe9"),
+ .iv = TEST_DATA_STR ("\xfd\x11\x99\x85\x53\x3b\xd5\x52\x0b\x30\x1d\x12"),
+ .aad = TEST_DATA_STR (
+ "\x98\xe6\x8c\x10\xbf\x4b\x5a\xe6\x2d\x43\x49\x28\xfc\x64\x05\x14\x7c\x63"
+ "\x01\x41\x73\x03\xef\x3a\x70\x3d\xcf\xd2\xc0\xc3\x39\xa4\xd0\xa8\x9b\xd2"
+ "\x9f\xe6\x1f\xec\xf1\x06\x6a\xb0\x6d\x7a\x5c\x31\xa4\x8f\xfb\xfe\xd2\x2f"
+ "\x74\x9b\x17\xe9\xbd\x0d\xc1\xc6\xf8\xfb\xd6\xfd\x45\x87\x18\x4d\xb9\x64"
+ "\xd5\x45\x61\x32\x10\x6d\x78\x23\x38\xc3\xf1\x17\xec\x05\x22\x9b\x08"
+ "\x99"),
+ .tag = TEST_DATA_STR (
+ "\xcf\x54\xe7\x14\x13\x49\xb6\x6f\x24\x81\x54\x42\x78\x10\xc8\x7a"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc1) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC1",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x57\xe1\x12\xcd\x45\xf2\xc5\x7d\xdb\x81\x9e\xa6\x51\xc2\x06\x76\x31\x63"
+ "\xef\x01\x6c\xee\xad\x5c\x4e\xae\x40\xf2\xbb\xe0\xe4\xb4"),
+ .iv = TEST_DATA_STR ("\x18\x80\x22\xc2\x12\x5d\x2b\x1f\xcf\x9e\x47\x69"),
+ .aad = TEST_DATA_STR (
+ "\x09\xc8\xf4\x45\xce\x5b\x71\x46\x56\x95\xf8\x38\xc4\xbb\x2b\x00\x62\x4a"
+ "\x1c\x91\x85\xa3\xd5\x52\x54\x6d\x9d\x2e\xe4\x87\x00\x07\xaa\xf3\x00\x70"
+ "\x08\xf8\xae\x9a\xff\xb7\x58\x8b\x88\xd0\x9a\x90\xe5\x8b\x45\x7f\x88\xf1"
+ "\xe3\x75\x2e\x3f\xb9\x49\xce\x37\x86\x70\xb6\x7a\x95\xf8\xcf\x7f\x5c\x7c"
+ "\xeb\x65\x0e\xfd\x73\x5d\xbc\x65\x2c\xae\x06\xe5\x46\xa5\xdb\xd8\x61"
+ "\xbd"),
+ .tag = TEST_DATA_STR (
+ "\x9e\xfc\xdd\xfa\x0b\xe2\x15\x82\xa0\x57\x49\xf4\x05\x0d\x29\xfe"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc2) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC2",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xa4\xdd\xf3\xca\xb7\x45\x3a\xae\xfa\xd6\x16\xfd\x65\xd6\x3d\x13\x00\x5e"
+ "\x94\x59\xc1\x7d\x31\x73\xcd\x6e\xd7\xf2\xa8\x6c\x92\x1f"),
+ .iv = TEST_DATA_STR ("\x06\x17\x7b\x24\xc5\x8f\x3b\xe4\xf3\xdd\x49\x20"),
+ .aad = TEST_DATA_STR (
+ "\xf9\x5b\x04\x6d\x80\x48\x5e\x41\x1c\x56\xb8\x34\x20\x9d\x3a\xbd\x5a\x8a"
+ "\x9d\xdf\x72\xb1\xb9\x16\x67\x9a\xdf\xdd\xe8\x93\x04\x43\x15\xa5\xf4\x96"
+ "\x7f\xd0\x40\x5e\xc2\x97\xaa\x33\x2f\x67\x6f\xf0\xfa\x5b\xd7\x95\xeb\x60"
+ "\x9b\x2e\x4f\x08\x8d\xb1\xcd\xf3\x7c\xcf\xf0\x73\x5a\x5e\x53\xc4\xc1\x21"
+ "\x73\xa0\x02\x6a\xea\x42\x38\x8a\x7d\x71\x53\xa8\x83\x0b\x8a\x90\x1c"
+ "\xf9"),
+ .tag = TEST_DATA_STR (
+ "\x9d\x1b\xd8\xec\xb3\x27\x69\x06\x13\x8d\x0b\x03\xfc\xb8\xc1\xbb"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc3) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC3",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x24\xa9\x2b\x24\xe8\x59\x03\xcd\x4a\xaa\xbf\xe0\x7c\x31\x0d\xf5\xa4\xf8"
+ "\xf4\x59\xe0\x3a\x63\xcb\xd1\xb4\x78\x55\xb0\x9c\x0b\xe8"),
+ .iv = TEST_DATA_STR ("\x22\xe7\x56\xdc\x89\x8d\x4c\xf1\x22\x08\x06\x12"),
+ .aad = TEST_DATA_STR (
+ "\x2e\x01\xb2\x53\x6d\xbe\x37\x6b\xe1\x44\x29\x6f\x5c\x38\xfb\x09\x9e\x00"
+ "\x8f\x96\x2b\x9f\x0e\x89\x63\x34\xb6\x40\x83\x93\xbf\xf1\x02\x0a\x0e\x44"
+ "\x24\x77\xab\xfd\xb1\x72\x72\x13\xb6\xcc\xc5\x77\xf5\xe1\x6c\xb0\x57\xc8"
+ "\x94\x5a\x07\xe3\x07\x26\x4b\x65\x97\x9a\xed\x96\xb5\x99\x5f\x40\x25\x0f"
+ "\xfb\xaa\xa1\xa1\xf0\xec\xcf\x39\x40\x15\xf6\x29\x0f\x5e\x64\xdf\xe5"
+ "\xca"),
+ .tag = TEST_DATA_STR (
+ "\x0d\x7f\x1a\xed\x47\x08\xa0\x3b\x0c\x80\xb2\xa1\x87\x85\xc9\x6d"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc4) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC4",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x15\x27\x6f\xc6\x44\x38\x57\x8e\x0e\xc5\x33\x66\xb9\x0a\x0e\x23\xd9\x39"
+ "\x10\xfe\xc1\x0d\xc3\x00\x3d\x9b\x3f\x3f\xa7\x2d\xb7\x02"),
+ .iv = TEST_DATA_STR ("\xc5\xe9\x31\x94\x6d\x5c\xae\xbc\x22\x76\x56\xd2"),
+ .aad = TEST_DATA_STR (
+ "\x3f\x96\x7c\x83\xba\x02\xe7\x7c\x14\xe9\xd4\x11\x85\xeb\x87\xf1\x72\x25"
+ "\x0e\x93\xed\xb0\xf8\x2b\x67\x42\xc1\x24\x29\x8a\xb6\x94\x18\x35\x8e\xdd"
+ "\xef\xa3\x9f\xed\xc3\xca\xde\x9d\x80\xf0\x36\xd8\x64\xa5\x9e\xad\x37\xc8"
+ "\x77\x27\xc5\x6c\x70\x1a\x8c\xd9\x63\x44\x69\xff\x31\xc7\x04\xf5\xee\x39"
+ "\x35\x41\x57\xe6\x55\x84\x67\xb9\x28\x24\xda\x36\xb1\xc0\x71\xbe\xdf"
+ "\xe9"),
+ .tag = TEST_DATA_STR (
+ "\xa0\xff\xa1\x9a\xdc\xf3\x1d\x06\x1c\xd0\xdd\x46\xd2\x40\x15\xef"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc5) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC5",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xec\x09\x80\x4a\x04\x8b\xb8\x54\xc7\x16\x18\xb5\xa3\xa1\xc5\x90\x91\x0f"
+ "\xc8\xa6\x84\x55\x13\x9b\x71\x94\x86\xd2\x28\x0e\xa5\x9a"),
+ .iv = TEST_DATA_STR ("\xd0\xb1\x24\x7e\x71\x21\xa9\x27\x6a\xc1\x8c\xa3"),
+ .aad = TEST_DATA_STR (
+ "\x66\xb1\xd3\x9d\x41\x45\x96\x30\x8e\x86\x6b\x04\x47\x6e\x05\x3b\x71\xac"
+ "\xd1\xcd\x07\xce\x80\x93\x95\x77\xeb\xbe\xac\xe0\x43\x0f\x7e\x4c\x0c\x18"
+ "\x5f\xe1\xd9\x7a\xc7\x56\x99\x50\xc8\x3d\xb4\x0b\xbe\xd0\xf1\xd1\x73\xe1"
+ "\xaa\x0d\xc2\x8b\x47\x73\x70\x50\x32\xd9\x75\x51\xf7\xfc\xef\x7f\x55\xe4"
+ "\xb6\x9f\x88\xdf\x65\x00\x32\xdf\xc5\x23\x2c\x15\x66\x41\x10\x4b\x53"
+ "\x97"),
+ .tag = TEST_DATA_STR (
+ "\x84\x40\xe6\xd8\x64\xab\x77\x8f\x9b\xe4\x78\xf2\x03\x16\x2d\x86"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc6) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC6",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x4a\xdf\x86\xbf\xa5\x47\x72\x5e\x4b\x80\x36\x5a\x5a\x32\x7c\x10\x70\x40"
+ "\xfa\xcf\xff\x00\x7d\xc3\x51\x02\x06\x6b\xd6\xa9\x95\xc4"),
+ .iv = TEST_DATA_STR ("\xb1\x01\x8c\xc3\x31\x91\x12\x55\xa5\x5a\x07\x95"),
+ .aad = TEST_DATA_STR (
+ "\x05\x3c\xa4\x42\x8c\x99\x0b\x44\x56\xd3\xc1\x89\x5d\x5d\x52\xde\xff\x67"
+ "\x58\x96\xde\x9f\xaa\x53\xd8\xcf\x24\x12\x55\xf4\xa3\x1d\xc3\x39\x9f\x15"
+ "\xd8\x3b\xe3\x80\x25\x66\x16\xe5\xaf\x04\x3a\xbf\xb3\x75\x52\x65\x5a\xdf"
+ "\x4f\x2e\x68\xdd\xa2\x4b\xc3\x73\x69\x51\x13\x4f\x35\x9d\x9c\x0e\x28\x8b"
+ "\xb7\x98\xb6\xc3\xea\x46\x23\x92\x31\xa3\xcb\x28\x00\x66\xdb\x98\x62"
+ "\xe7"),
+ .tag = TEST_DATA_STR (
+ "\xc7\x42\x4f\x38\x08\x49\x30\xbf\xc5\xed\xc1\xfc\xf1\xe7\x60\x8d"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc7) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC7",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x3c\x92\xe0\xd1\xe3\x9a\x3c\x76\x65\x73\xc4\x64\x6c\x76\x8c\x40\x2c\xcf"
+ "\xf4\x8a\x56\x68\x2a\x93\x43\x35\x12\xab\xf0\x45\x6e\x00"),
+ .iv = TEST_DATA_STR ("\xd5\x7f\x31\x9e\x59\x01\x91\x84\x1d\x2b\x98\xbd"),
+ .aad = TEST_DATA_STR (
+ "\x84\x0d\x93\x94\xaa\x24\x0e\x52\xba\x15\x21\x51\xc1\x2a\xcd\x1c\xd4\x48"
+ "\x81\xe8\x54\x9d\xc8\x32\xb7\x1a\x45\xda\x7e\xfc\xc7\x4f\xb7\xe8\x44\xd9"
+ "\xfe\xc2\x5e\x5d\x49\x7b\x8f\xb8\xf4\x7f\x32\x8c\x8d\x99\x04\x5a\x19\xe3"
+ "\x66\xe6\xce\x5e\x19\xdc\x26\xf6\x7a\x81\xa9\x4f\xa6\xc9\x7c\x31\x4d\x88"
+ "\x6e\x7b\x56\xef\xf1\x44\xc0\x9f\x6f\xa5\x19\xdb\x63\x08\xbc\x73\x42"
+ "\x2e"),
+ .tag = TEST_DATA_STR (
+ "\xcb\x4e\xf7\x2d\xbd\xa4\x91\x4d\x74\x34\xf9\x68\x6f\x82\x3e\x2f"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc8) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC8",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xb6\x6b\xa3\x97\x33\x88\x8a\x9e\x0a\x2e\x30\x45\x28\x44\x16\x1d\xc3\x3c"
+ "\xb3\x83\xc0\x2c\xe1\x6c\x4e\xfa\xd5\x45\x25\x09\xb5\xb5"),
+ .iv = TEST_DATA_STR ("\x93\x7c\xb6\x65\xe3\x70\x59\xb2\xe4\x03\x59\xf2"),
+ .aad = TEST_DATA_STR (
+ "\xdb\xcd\x96\x94\xa8\x83\x48\x60\x03\x4e\x8e\xde\x3a\x5b\xd4\x19\xfc\xf9"
+ "\x1c\x00\x5a\xd9\x9f\x48\x8a\xa6\x23\xf5\x81\x62\x20\x93\xf9\xd4\x1e\x6a"
+ "\x68\xe2\x0f\xd2\x02\xf3\x02\xbc\xfc\x44\x17\xca\x89\x09\x0b\xfc\xd4\xd5"
+ "\x22\x4e\x8f\xf4\xeb\x5b\xba\xe4\xec\xb2\x7b\xaa\x23\x9f\x59\xc2\xf9\x9c"
+ "\xd4\x7c\x0a\x26\x9c\x49\x79\x06\xb4\x1a\x8f\x32\x0a\x3d\xd2\xdc\x2d"
+ "\xe2"),
+ .tag = TEST_DATA_STR (
+ "\xbd\xc8\x24\x93\x02\xd9\xd6\x66\xcf\x71\x68\x31\x7c\x11\x87\x43"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc9) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC9",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x2f\x9f\xcd\x10\x43\x45\x56\x95\x63\x8c\x99\x1a\x1b\x1d\x35\xad\x57\xc1"
+ "\x8e\xf0\x72\x73\x22\x74\x7b\x79\x91\xab\xc3\xd7\x87\xf3"),
+ .iv = TEST_DATA_STR ("\xd0\x6c\xf5\x48\xf6\x28\x69\xf4\xbe\xd7\xa3\x18"),
+ .aad = TEST_DATA_STR (
+ "\x43\x20\x23\xc1\x2c\xf1\xf6\x14\xe1\x00\x51\x12\xa1\x7d\xbe\x6c\x5d\x54"
+ "\x02\x2a\x95\xcf\x63\x35\xa5\xbc\x55\x00\x4c\x75\xf0\x9a\x56\x99\x73\x9e"
+ "\xcf\x92\x8e\x1c\x78\xd0\x3d\xad\x50\x96\xa1\x7a\x08\x4a\xfe\x1c\xc2\x20"
+ "\x41\xbb\xdf\xb5\x98\x5b\xd0\x8b\x0d\xcc\x59\xd2\xb0\x8c\xd8\x6b\x7a\xad"
+ "\x59\x7c\x4c\xd7\xb4\xba\x6d\x6a\x73\x70\xb8\x39\x95\xa6\x51\x1a\x1f"
+ "\x9e"),
+ .tag = TEST_DATA_STR (
+ "\x32\x2e\xb8\x4f\xb6\x88\x4f\x10\xcf\xb7\x66\xc2\xe3\xec\x77\x9e"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc10) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC10",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x21\xc5\x83\x9a\x63\xe1\x23\x0c\x06\xb0\x86\x34\x1c\x96\xab\x74\x58\x5e"
+ "\x69\xbc\xed\x94\x33\x2c\xae\xb1\xfa\x77\xd5\x10\xc2\x4f"),
+ .iv = TEST_DATA_STR ("\x5a\xb6\xe5\xed\x6e\xe7\x33\xbe\x72\x50\x85\x8c"),
+ .aad = TEST_DATA_STR (
+ "\xc9\x2f\x08\xe3\x0f\x67\xd4\x25\x16\x13\x3c\x48\xe9\x7b\x65\xcc\x9e\x12"
+ "\x43\x65\xe1\x10\xab\xa5\xe7\xb2\xcb\xe8\x3d\xeb\xcc\x99\xed\xf4\xeb\x00"
+ "\x07\xaf\x05\x2b\xda\x22\xd8\x59\x00\x27\x1b\x18\x97\xaf\x4f\xd9\xac\xe6"
+ "\xa2\xd0\x9d\x98\x4a\xc3\xde\x79\xd0\x5d\xe0\xb1\x05\xa8\x1b\x12\x54\x2b"
+ "\x2c\x48\xe2\x7d\x40\x9f\xd6\x99\x2d\xd0\x62\xd6\x05\x5d\x6f\xc6\x68"
+ "\x42"),
+ .tag = TEST_DATA_STR (
+ "\x53\xb0\xe4\x50\x30\x9d\x14\x64\x59\xf2\xa1\xe4\x6c\x9d\x9e\x23"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc11) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC11",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x25\xa1\x44\xf0\xfd\xba\x18\x41\x25\xd8\x1a\x87\xe7\xed\x82\xfa\xd3\x3c"
+ "\x70\x1a\x09\x4a\x67\xa8\x1f\xe4\x69\x2d\xc6\x9a\xfa\x31"),
+ .iv = TEST_DATA_STR ("\x8b\xf5\x75\xc5\xc2\xb4\x5b\x4e\xfc\x67\x46\xe4"),
+ .aad = TEST_DATA_STR (
+ "\x2a\x36\x7c\xb0\xd3\xb7\xc5\xb8\x32\x0b\x3c\xf9\x5e\x82\xb6\xba\x0b\xba"
+ "\x1d\x09\xa2\x05\x58\x85\xde\xdd\x9e\xf5\x64\x16\x23\x68\x22\x12\x10\x32"
+ "\x38\xb8\xf7\x75\xcc\xe4\x2d\xdf\xd4\xf6\x63\x82\xf2\xc3\xa5\xe8\xd6\xdf"
+ "\xf9\x16\x3c\xed\x83\x58\x0a\x75\x70\x55\x74\x02\x6b\x55\xdb\x90\xf7\x5f"
+ "\x8a\xbb\x30\x14\xc9\xa7\x07\x02\x1d\xed\xc0\x75\xda\x38\xbe\xbb\xf0"
+ "\xa0"),
+ .tag = TEST_DATA_STR (
+ "\x0e\x2c\xe9\xca\xc8\xdf\xce\xdb\x05\x72\xec\x6c\xab\x62\x1e\xfd"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc12) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC12",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\x42\xbc\x84\x1b\x3b\x03\xa8\x07\xcd\x36\x6a\x35\xec\xec\x8a\x6a\xeb\xef"
+ "\x7c\x4c\xba\x0e\xc8\xcb\x8d\xa0\xda\x41\xdf\x8c\xce\xf1"),
+ .iv = TEST_DATA_STR ("\x1b\xd4\x6f\x85\xdf\x5f\x4b\x3a\x12\x6e\xe3\x15"),
+ .aad = TEST_DATA_STR (
+ "\xed\xe3\xdc\xdd\xbd\xc7\xd8\xe5\xd0\x34\xc0\x16\x61\x33\x2e\xc3\x49\xcb"
+ "\x4e\x7a\x9f\xba\xaf\x7a\xbe\x2c\x64\x75\x87\xdb\x86\xcd\x42\x7c\xe6\x69"
+ "\x08\xe0\x70\xbc\x49\xef\x83\x87\x47\xe0\x6b\x45\xac\x48\x6d\xfb\xea\x6f"
+ "\x86\x98\xb4\x62\x5e\x21\xe6\x9d\xb8\x32\x7e\xc0\x5c\xfd\x74\xac\xcb\xe6"
+ "\x7a\xb6\x44\x94\x8c\xdb\x55\x4a\xf1\x79\xa1\xe2\x64\xe0\x8f\xe1\x66"
+ "\x41"),
+ .tag = TEST_DATA_STR (
+ "\x63\x3a\xb6\xaa\xf5\xb3\x2b\x53\xa7\x94\xf6\xbe\x62\x62\xfc\x5f"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc13) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC13",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xc2\x5b\x85\x00\xbe\x73\x21\x05\x96\xfc\x4a\x9f\xb4\xd8\x4d\x1a\x33\x79"
+ "\xa9\x1e\x3f\x0a\x6c\xc4\x17\x7d\x99\x60\x46\x62\x76\x79"),
+ .iv = TEST_DATA_STR ("\xb5\x6c\x48\xc0\xc4\xcd\x31\x8b\x20\x43\x70\x02"),
+ .aad = TEST_DATA_STR (
+ "\xbc\xd1\x4d\xd0\x43\xfd\xc8\xc3\x27\x95\x7e\x1c\x14\x28\x69\x85\x43\xec"
+ "\x86\x02\x52\x1a\x7c\x74\x78\x8d\x29\x6d\x37\xd4\x82\x8f\x10\xf9\x06\x56"
+ "\x88\x3d\x25\x31\xc7\x02\xeb\xda\x2d\xc0\xa6\x8d\xab\x00\x15\x45\x77\x45"
+ "\x44\x55\xfa\xd9\x86\xff\x8e\x09\x73\x09\x8d\xbf\x37\x0f\xf7\x03\xed\x98"
+ "\x22\x2b\x94\x57\x26\xed\x9b\xe7\x90\x92\x10\xdd\xbc\x67\x2e\x99\xfd"
+ "\xd9"),
+ .tag = TEST_DATA_STR (
+ "\x81\x71\xd4\xff\x60\xfe\x7e\xf6\xde\x02\x88\x32\x6a\xa7\x32\x23"),
+};
+
+UNITTEST_REGISTER_CRYPTO_TEST (aes_gmac256_aad720_tc14) = {
+ .name = "256-GMAC 720-aad NIST CAVS TC14",
+ .alg = VNET_CRYPTO_ALG_AES_256_NULL_GMAC,
+ .key = TEST_DATA_STR (
+ "\xdd\x95\x25\x9b\xc8\xee\xfa\x3e\x49\x3c\xb1\xa6\xba\x1d\x8e\xe2\xb3\x41"
+ "\xd5\x23\x0d\x50\x36\x30\x94\xa2\xcc\x34\x33\xb3\xd9\xb9"),
+ .iv = TEST_DATA_STR ("\xa1\xa6\xce\xd0\x84\xf4\xf1\x39\x90\x75\x0a\x9e"),
+ .aad = TEST_DATA_STR (
+ "\xd4\x6d\xb9\x0e\x13\x68\x4b\x26\x14\x9c\xb3\xb7\xf7\x76\xe2\x28\xa0\x53"
+ "\x8f\xa1\x89\x2c\x41\x8a\xaa\xd0\x7a\xa0\x8d\x30\x76\xf4\xa5\x2b\xee\x8f"
+ "\x13\x0f\xf5\x60\xdb\x2b\x8d\x10\x09\xe9\x26\x0f\xa6\x23\x3f\xc2\x27\x33"
+ "\xe0\x50\xc9\xe4\xf7\xcc\x69\x90\x62\x76\x5e\x26\x1d\xff\xff\x11\x59\xe9"
+ "\x06\x0b\x26\xc8\x06\x5d\xfa\xb0\x40\x55\xb5\x8c\x82\xc3\x40\xd9\x87"
+ "\xc9"),
+ .tag = TEST_DATA_STR (
+ "\x9e\x12\x0b\x01\x89\x9f\xe2\xcb\x3e\x3a\x0b\x0c\x05\x04\x59\x40"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/unittest/crypto/chacha20_poly1305.c b/src/plugins/unittest/crypto/chacha20_poly1305.c
index 650898524fd..740d6304b8f 100644
--- a/src/plugins/unittest/crypto/chacha20_poly1305.c
+++ b/src/plugins/unittest/crypto/chacha20_poly1305.c
@@ -61,7 +61,6 @@ static u8 tc1_ciphertext[] = {
0x61, 0x16
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (chacha20_poly1305_tc1) = {
.name = "CHACHA20-POLY1305 TC1",
.alg = VNET_CRYPTO_ALG_CHACHA20_POLY1305,
@@ -72,7 +71,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (chacha20_poly1305_tc1) = {
.plaintext = TEST_DATA (tc1_plaintext),
.ciphertext = TEST_DATA (tc1_ciphertext),
};
-/* *INDENT-ON* */
static u8 tc2_key[] = {
0x2d, 0xb0, 0x5d, 0x40, 0xc8, 0xed, 0x44, 0x88,
@@ -100,7 +98,6 @@ static u8 tc2_plaintext[] = { };
static u8 tc2_ciphertext[] = { };
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (chacha20_poly1305_tc2) = {
.name = "CHACHA20-POLY1305 TC2",
.alg = VNET_CRYPTO_ALG_CHACHA20_POLY1305,
@@ -111,7 +108,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (chacha20_poly1305_tc2) = {
.plaintext = TEST_DATA (tc2_plaintext),
.ciphertext = TEST_DATA (tc2_ciphertext),
};
-/* *INDENT-ON* */
static u8 tc3_key[] = {
0x4c, 0xf5, 0x96, 0x83, 0x38, 0xe6, 0xae, 0x7f,
@@ -137,7 +133,6 @@ static u8 tc3_plaintext[] = { };
static u8 tc3_ciphertext[] = { };
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (chacha20_poly1305_tc3) = {
.name = "CHACHA20-POLY1305 TC3",
.alg = VNET_CRYPTO_ALG_CHACHA20_POLY1305,
@@ -148,5 +143,4 @@ UNITTEST_REGISTER_CRYPTO_TEST (chacha20_poly1305_tc3) = {
.plaintext = TEST_DATA (tc3_plaintext),
.ciphertext = TEST_DATA (tc3_ciphertext),
};
-/* *INDENT-ON* */
diff --git a/src/plugins/unittest/crypto/crypto.h b/src/plugins/unittest/crypto/crypto.h
index 5e09a3ab0ec..90f75dbcfac 100644
--- a/src/plugins/unittest/crypto/crypto.h
+++ b/src/plugins/unittest/crypto/crypto.h
@@ -61,6 +61,10 @@ typedef struct
extern crypto_test_main_t crypto_test_main;
#define TEST_DATA(n) { .data = (u8 *) n, .length = sizeof (n)}
+#define TEST_DATA_STR(n) \
+ { \
+ .data = (u8 *) n, .length = sizeof (n) - 1 \
+ }
#define TEST_DATA_CHUNK(s,off,n) { .data = (u8 *) s + off, .length = n}
#define UNITTEST_REGISTER_CRYPTO_TEST(x) \
diff --git a/src/plugins/unittest/crypto/rfc2202_hmac_md5.c b/src/plugins/unittest/crypto/rfc2202_hmac_md5.c
index 7a39aed3030..c9604b84c1d 100644
--- a/src/plugins/unittest/crypto/rfc2202_hmac_md5.c
+++ b/src/plugins/unittest/crypto/rfc2202_hmac_md5.c
@@ -31,7 +31,6 @@ static u8 md5_tc1_digest[] = {
0x13, 0xf4, 0x8e, 0xf8, 0x15, 0x8b, 0xfc, 0x9d
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc1) = {
.name = "RFC2202 HMAC-MD5 TC1",
.alg = VNET_CRYPTO_ALG_HMAC_MD5,
@@ -39,7 +38,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc1) = {
.plaintext = TEST_DATA (md5_tc1_data),
.digest = TEST_DATA (md5_tc1_digest),
};
-/* *INDENT-ON* */
static char md5_tc2_key[4] = "Jefe";
@@ -50,7 +48,6 @@ static u8 md5_tc2_digest[] = {
0xea, 0xa8, 0x6e, 0x31, 0x0a, 0x5d, 0xb7, 0x38,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc2) = {
.name = "RFC2202 HMAC-MD5 TC2",
.alg = VNET_CRYPTO_ALG_HMAC_MD5,
@@ -58,7 +55,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc2) = {
.plaintext = TEST_DATA (md5_tc2_data),
.digest = TEST_DATA (md5_tc2_digest),
};
-/* *INDENT-ON* */
static char md5_tc3_key[16] = {
0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
@@ -80,7 +76,6 @@ static u8 md5_tc3_digest[] = {
0xdb, 0xb8, 0xc7, 0x33, 0xf0, 0xe8, 0xb3, 0xf6,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc3) = {
.name = "RFC2202 HMAC-MD5 TC3",
.alg = VNET_CRYPTO_ALG_HMAC_MD5,
@@ -88,7 +83,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc3) = {
.plaintext = TEST_DATA (md5_tc3_data),
.digest = TEST_DATA (md5_tc3_digest),
};
-/* *INDENT-ON* */
static u8 md5_tc4_key[25] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
@@ -112,7 +106,6 @@ static u8 md5_tc4_digest[] = {
0x3a, 0x75, 0x16, 0x47, 0x46, 0xff, 0xaa, 0x79,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc4) = {
.name = "RFC2202 HMAC-MD5 TC4",
.alg = VNET_CRYPTO_ALG_HMAC_MD5,
@@ -120,7 +113,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc4) = {
.plaintext = TEST_DATA (md5_tc4_data),
.digest = TEST_DATA (md5_tc4_digest),
};
-/* *INDENT-ON* */
static u8 md5_tc5_key[16] = {
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
@@ -134,7 +126,6 @@ static u8 md5_tc5_digest[] = {
0xf9, 0xba, 0xb9, 0x95, 0x69, 0x0e, 0xfd, 0x4c,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc5) = {
.name = "RFC2202 HMAC-MD5 TC5",
.alg = VNET_CRYPTO_ALG_HMAC_MD5,
@@ -142,7 +133,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc5) = {
.plaintext = TEST_DATA (md5_tc5_data),
.digest = TEST_DATA (md5_tc5_digest),
};
-/* *INDENT-ON* */
static u8 md5_tc6_key[80] = {
0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
@@ -165,7 +155,6 @@ static u8 md5_tc6_digest[] = {
0x0b, 0x62, 0xe6, 0xce, 0x61, 0xb9, 0xd0, 0xcd,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc6) = {
.name = "RFC2202 HMAC-MD5 TC6",
.alg = VNET_CRYPTO_ALG_HMAC_MD5,
@@ -173,7 +162,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc6) = {
.plaintext = TEST_DATA (md5_tc6_data),
.digest = TEST_DATA (md5_tc6_digest),
};
-/* *INDENT-ON* */
static char md5_tc7_data[73] =
"Test Using Larger Than Block-Size Key and Larger Than One Block-Size Data";
@@ -183,7 +171,6 @@ static u8 md5_tc7_digest[] = {
0x1f, 0xb1, 0xf5, 0x62, 0xdb, 0x3a, 0xa5, 0x3e,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc7) = {
.name = "RFC2202 HMAC-MD5 TC7",
.alg = VNET_CRYPTO_ALG_HMAC_MD5,
@@ -203,7 +190,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_md5_tc7_chained) = {
TEST_DATA_CHUNK (md5_tc7_data, 40, 33)
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/crypto/rfc2202_hmac_sha1.c b/src/plugins/unittest/crypto/rfc2202_hmac_sha1.c
index 2513c5ebad2..aa440625cc6 100644
--- a/src/plugins/unittest/crypto/rfc2202_hmac_sha1.c
+++ b/src/plugins/unittest/crypto/rfc2202_hmac_sha1.c
@@ -33,7 +33,6 @@ static u8 sha1_tc1_digest[] = {
0xf1, 0x46, 0xbe, 0x00
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc1) = {
.name = "RFC2202 HMAC-SHA-1 TC1",
.alg = VNET_CRYPTO_ALG_HMAC_SHA1,
@@ -41,7 +40,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc1) = {
.plaintext = TEST_DATA (sha1_tc1_data),
.digest = TEST_DATA (sha1_tc1_digest),
};
-/* *INDENT-ON* */
static char sha1_tc2_key[4] = "Jefe";
@@ -53,7 +51,6 @@ static u8 sha1_tc2_digest[] = {
0x25, 0x9a, 0x7c, 0x79
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc2) = {
.name = "RFC2202 HMAC-SHA-1 TC2",
.alg = VNET_CRYPTO_ALG_HMAC_SHA1,
@@ -61,7 +58,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc2) = {
.plaintext = TEST_DATA (sha1_tc2_data),
.digest = TEST_DATA (sha1_tc2_digest),
};
-/* *INDENT-ON* */
static u8 sha1_tc3_key[20] = {
0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
@@ -85,7 +81,6 @@ static u8 sha1_tc3_digest[] = {
0x63, 0xf1, 0x75, 0xd3,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc3) = {
.name = "RFC2202 HMAC-SHA-1 TC3",
.alg = VNET_CRYPTO_ALG_HMAC_SHA1,
@@ -93,7 +88,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc3) = {
.plaintext = TEST_DATA (sha1_tc3_data),
.digest = TEST_DATA (sha1_tc3_digest),
};
-/* *INDENT-ON* */
static u8 sha1_tc4_key[25] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
@@ -118,7 +112,6 @@ static u8 sha1_tc4_digest[] = {
0x2d, 0x72, 0x35, 0xda,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc4) = {
.name = "RFC2202 HMAC-SHA-1 TC4",
.alg = VNET_CRYPTO_ALG_HMAC_SHA1,
@@ -126,7 +119,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc4) = {
.plaintext = TEST_DATA (sha1_tc4_data),
.digest = TEST_DATA (sha1_tc4_digest),
};
-/* *INDENT-ON* */
static u8 sha1_tc5_key[20] = {
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
@@ -142,7 +134,6 @@ static u8 sha1_tc5_digest[] = {
0x4a, 0x9a, 0x5a, 0x04
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc5) = {
.name = "RFC2202 HMAC-SHA-1 TC5",
.alg = VNET_CRYPTO_ALG_HMAC_SHA1,
@@ -150,14 +141,12 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc5) = {
.plaintext = TEST_DATA (sha1_tc5_data),
.digest = TEST_DATA (sha1_tc5_digest),
};
-/* *INDENT-ON* */
static u8 sha1_tc5_digest_96[12] = {
0x4c, 0x1a, 0x03, 0x42, 0x4b, 0x55, 0xe0, 0x7f,
0xe7, 0xf2, 0x7b, 0xe1
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc5_trunc) = {
.name = "RFC2202 HMAC-SHA-1-96 TC5-trunc",
.alg = VNET_CRYPTO_ALG_HMAC_SHA1,
@@ -165,7 +154,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc5_trunc) = {
.plaintext = TEST_DATA (sha1_tc5_data),
.digest = TEST_DATA (sha1_tc5_digest_96),
};
-/* *INDENT-ON* */
static u8 sha1_tc6_key[80] = {
0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
@@ -189,7 +177,6 @@ static u8 sha1_tc6_digest[] = {
0xed, 0x40, 0x21, 0x12
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc6) = {
.name = "RFC2202 HMAC-SHA-1 TC6",
.alg = VNET_CRYPTO_ALG_HMAC_SHA1,
@@ -197,7 +184,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc6) = {
.plaintext = TEST_DATA (sha1_tc6_data),
.digest = TEST_DATA (sha1_tc6_digest),
};
-/* *INDENT-ON* */
static char sha1_tc7_data[73] =
"Test Using Larger Than Block-Size Key and Larger Than One Block-Size Data";
@@ -208,7 +194,6 @@ static u8 sha1_tc7_digest[20] = {
0xbb, 0xff, 0x1a, 0x91
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc7) = {
.name = "RFC2202 HMAC-SHA-1 TC7",
.alg = VNET_CRYPTO_ALG_HMAC_SHA1,
@@ -237,7 +222,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc_2202_sha1_tc7_inc) = {
.key.length = 80,
.digest.length = 12,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/crypto/rfc4231.c b/src/plugins/unittest/crypto/rfc4231.c
index 127e1bfe521..edd502e0609 100644
--- a/src/plugins/unittest/crypto/rfc4231.c
+++ b/src/plugins/unittest/crypto/rfc4231.c
@@ -61,7 +61,6 @@ static u8 tc1_digest_sha512[] = {
0x2e, 0x69, 0x6c, 0x20, 0x3a, 0x12, 0x68, 0x54
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc1_sha224) = {
.name = "RFC4231 TC1",
.alg = VNET_CRYPTO_ALG_HMAC_SHA224,
@@ -93,7 +92,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc1_sha512) = {
.plaintext = TEST_DATA (tc1_data),
.digest = TEST_DATA (tc1_digest_sha512),
};
-/* *INDENT-ON* */
static char tc2_key[4] = "Jefe";
@@ -133,7 +131,6 @@ static u8 tc2_digest_sha512[] = {
0x63, 0x6e, 0x07, 0x0a, 0x38, 0xbc, 0xe7, 0x37,
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc2_sha224) = {
.name = "RFC4231 TC2",
.alg = VNET_CRYPTO_ALG_HMAC_SHA224,
@@ -165,7 +162,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc2_sha512) = {
.plaintext = TEST_DATA (tc2_data),
.digest = TEST_DATA (tc2_digest_sha512),
};
-/* *INDENT-ON* */
static u8 tc3_key[20] = {
0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
@@ -217,7 +213,6 @@ static u8 tc3_digest_sha512[] = {
0x74, 0x27, 0x88, 0x59, 0xe1, 0x32, 0x92, 0xfb
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc3_sha224) = {
.name = "RFC4231 TC3",
.alg = VNET_CRYPTO_ALG_HMAC_SHA224,
@@ -249,7 +244,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc3_sha512) = {
.plaintext = TEST_DATA (tc3_data),
.digest = TEST_DATA (tc3_digest_sha512),
};
-/* *INDENT-ON* */
static u8 tc4_key[25] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
@@ -302,7 +296,6 @@ static u8 tc4_digest_sha512[] = {
0xe2, 0xad, 0xeb, 0xeb, 0x10, 0xa2, 0x98, 0xdd
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc4_sha224) = {
.name = "RFC4231 TC4",
.alg = VNET_CRYPTO_ALG_HMAC_SHA224,
@@ -334,7 +327,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc4_sha512) = {
.plaintext = TEST_DATA (tc4_data),
.digest = TEST_DATA (tc4_digest_sha512),
};
-/* *INDENT-ON* */
static u8 tc5_key[20] = {
0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
@@ -368,7 +360,6 @@ static u8 tc5_digest_sha512[16] = {
0x1d, 0x41, 0x79, 0xbc, 0x89, 0x1d, 0x87, 0xa6
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc5_sha224) = {
.name = "RFC4231 TC5",
.alg = VNET_CRYPTO_ALG_HMAC_SHA224,
@@ -400,7 +391,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc5_sha512) = {
.plaintext = TEST_DATA (tc5_data),
.digest = TEST_DATA (tc5_digest_sha512),
};
-/* *INDENT-ON* */
static u8 tc6_key[131] = {
0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
@@ -459,7 +449,6 @@ static u8 tc6_digest_sha512[] = {
0x8b, 0x91, 0x5a, 0x98, 0x5d, 0x78, 0x65, 0x98
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc6_sha224) = {
.name = "RFC4231 TC6",
.alg = VNET_CRYPTO_ALG_HMAC_SHA224,
@@ -491,7 +480,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc6_sha512) = {
.plaintext = TEST_DATA (tc6_data),
.digest = TEST_DATA (tc6_digest_sha512),
};
-/* *INDENT-ON* */
static u8 tc7_key[131] = {
0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
@@ -552,7 +540,6 @@ static u8 tc7_digest_sha512[] = {
0x65, 0xc9, 0x74, 0x40, 0xfa, 0x8c, 0x6a, 0x58
};
-/* *INDENT-OFF* */
UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc7_sha224) = {
.name = "RFC4231 TC7",
.alg = VNET_CRYPTO_ALG_HMAC_SHA224,
@@ -598,7 +585,6 @@ UNITTEST_REGISTER_CRYPTO_TEST (rfc4231_tc7_sha512_chain) = {
TEST_DATA_CHUNK (tc7_data, 150, 2),
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/crypto_test.c b/src/plugins/unittest/crypto_test.c
index ed21e86b8d7..4bc06f71c79 100644
--- a/src/plugins/unittest/crypto_test.c
+++ b/src/plugins/unittest/crypto_test.c
@@ -139,8 +139,7 @@ print_results (vlib_main_t * vm, unittest_crypto_test_registration_t ** rv,
if (vec_len (err))
fail = 1;
- vlib_cli_output (vm, "%-60v%s%v", s, vec_len (err) ? "FAIL: " : "OK",
- err);
+ vlib_cli_output (vm, "%-65v%s%v", s, vec_len (err) ? "FAIL: " : "OK", err);
if (tm->verbose)
{
if (tm->verbose == 2)
@@ -455,7 +454,6 @@ test_crypto_static (vlib_main_t * vm, crypto_test_main_t * tm,
current_op = ops;
current_chained_op = chained_ops;
- /* *INDENT-OFF* */
vec_foreach_index (i, rv)
{
r = rv[i];
@@ -645,7 +643,6 @@ test_crypto_static (vlib_main_t * vm, crypto_test_main_t * tm,
op->user_data = i;
}
}
- /* *INDENT-ON* */
vnet_crypto_process_ops (vm, ops, vec_len (ops));
vnet_crypto_process_chained_ops (vm, chained_ops, chunks,
@@ -671,10 +668,8 @@ test_crypto_get_key_sz (vnet_crypto_alg_t alg)
#define _(n, s, l) \
case VNET_CRYPTO_ALG_##n: \
return l;
- /* *INDENT-OFF* */
foreach_crypto_cipher_alg
foreach_crypto_aead_alg
- /* *INDENT-ON* */
#undef _
case VNET_CRYPTO_ALG_HMAC_MD5:
case VNET_CRYPTO_ALG_HMAC_SHA1:
@@ -857,7 +852,7 @@ test_crypto_perf (vlib_main_t * vm, crypto_test_main_t * tm)
vnet_crypto_op_t *ops1 = 0, *ops2 = 0, *op1, *op2;
vnet_crypto_alg_data_t *ad = vec_elt_at_index (cm->algs, tm->alg);
vnet_crypto_key_index_t key_index = ~0;
- u8 key[32];
+ u8 key[64];
int buffer_size = vlib_buffer_get_default_data_size (vm);
u64 seed = clib_cpu_time_now ();
u64 t0[5], t1[5], t2[5], n_bytes = 0;
@@ -925,7 +920,6 @@ test_crypto_perf (vlib_main_t * vm, crypto_test_main_t * tm)
ad->op_by_type[VNET_CRYPTO_OP_TYPE_ENCRYPT]);
vnet_crypto_op_init (op2,
ad->op_by_type[VNET_CRYPTO_OP_TYPE_DECRYPT]);
- op1->flags = VNET_CRYPTO_OP_FLAG_INIT_IV;
op1->src = op2->src = op1->dst = op2->dst = b->data;
op1->key_index = op2->key_index = key_index;
op1->iv = op2->iv = b->data - 64;
@@ -1062,14 +1056,12 @@ test_crypto_command_fn (vlib_main_t * vm,
return test_crypto (vm, tm);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_crypto_command, static) =
{
.path = "test crypto",
.short_help = "test crypto",
.function = test_crypto_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
crypto_test_init (vlib_main_t * vm)
diff --git a/src/plugins/unittest/fib_test.c b/src/plugins/unittest/fib_test.c
index 76b675bca83..fbac809d726 100644
--- a/src/plugins/unittest/fib_test.c
+++ b/src/plugins/unittest/fib_test.c
@@ -142,28 +142,21 @@ fib_test_mk_intf (u32 ninterfaces)
for (i = 0; i < ninterfaces; i++)
{
- hw_address[5] = i;
+ vnet_eth_interface_registration_t eir = {};
+ vnet_main_t *vnm = vnet_get_main();
- error = ethernet_register_interface(vnet_get_main(),
- test_interface_device_class.index,
- i /* instance */,
- hw_address,
- &tm->hw_if_indicies[i],
- /* flag change */ 0);
+ hw_address[5] = i;
- FIB_TEST((NULL == error), "ADD interface %d", i);
+ eir.dev_class_index = test_interface_device_class.index;
+ eir.dev_instance = i;
+ eir.address = hw_address;
+ tm->hw_if_indicies[i] = vnet_eth_register_interface (vnm, &eir);
error = vnet_hw_interface_set_flags(vnet_get_main(),
tm->hw_if_indicies[i],
VNET_HW_INTERFACE_FLAG_LINK_UP);
tm->hw[i] = vnet_get_hw_interface(vnet_get_main(),
tm->hw_if_indicies[i]);
- vec_validate (ip4_main.fib_index_by_sw_if_index,
- tm->hw[i]->sw_if_index);
- vec_validate (ip6_main.fib_index_by_sw_if_index,
- tm->hw[i]->sw_if_index);
- ip4_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
- ip6_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
error = vnet_sw_interface_set_flags(vnet_get_main(),
tm->hw[i]->sw_if_index,
@@ -788,6 +781,69 @@ fib_test_validate_entry (fib_node_index_t fei,
}
static int
+fib_test_multipath_v4 (const test_main_t *tm, const u32 fib_index,
+ const fib_prefix_t *pfx, const int n_paths,
+ const int expected_n_buckets)
+{
+ const int path_list_pool_size = fib_path_list_pool_size();
+ const int path_list_db_size = fib_path_list_db_size();
+ const int entry_pool_size = fib_entry_pool_size();
+ fib_route_path_t *r_paths = NULL;
+ const load_balance_t *lb;
+ const dpo_id_t *dpo;
+ u32 fei;
+ int res = 0;
+ int i;
+
+ for (i = 0; i < n_paths; i++)
+ {
+ fib_route_path_t r_path = {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_addr = {
+ .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02 + i),
+ },
+ .frp_sw_if_index = tm->hw[0]->sw_if_index,
+ .frp_weight = 1,
+ .frp_fib_index = ~0,
+ .frp_flags = FIB_ROUTE_PATH_ATTACHED,
+ };
+ vec_add1(r_paths, r_path);
+ }
+
+ fib_table_entry_update(fib_index,
+ pfx,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ r_paths);
+
+ fei = fib_table_lookup_exact_match(fib_index, pfx);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "prefix present");
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ lb = load_balance_get(dpo->dpoi_index);
+ FIB_TEST((lb->lb_n_buckets == expected_n_buckets),
+ "prefix lb over %d paths", lb->lb_n_buckets);
+
+ fib_table_entry_delete(fib_index,
+ pfx,
+ FIB_SOURCE_API);
+ FIB_TEST(FIB_NODE_INDEX_INVALID ==
+ fib_table_lookup_exact_match(fib_index, pfx), "prefix removed");
+ vec_free(r_paths);
+
+ /*
+ * add-remove test. no change.
+ */
+ FIB_TEST((path_list_db_size == fib_path_list_db_size()),
+ "path list DB population:%d", fib_path_list_db_size());
+ FIB_TEST((path_list_pool_size == fib_path_list_pool_size()),
+ "path list pool size is %d", fib_path_list_pool_size());
+ FIB_TEST((entry_pool_size == fib_entry_pool_size()),
+ "entry pool size is %d", fib_entry_pool_size());
+ return res;
+}
+
+static int
fib_test_v4 (void)
{
/*
@@ -826,9 +882,7 @@ fib_test_v4 (void)
FIB_SOURCE_API);
for (ii = 0; ii < 4; ii++)
- {
- ip4_main.fib_index_by_sw_if_index[tm->hw[ii]->sw_if_index] = fib_index;
- }
+ fib_table_bind (FIB_PROTOCOL_IP4, tm->hw[ii]->sw_if_index, fib_index);
fib_prefix_t pfx_0_0_0_0_s_0 = {
.fp_len = 0,
@@ -3623,52 +3677,26 @@ fib_test_v4 (void)
/*
* A route with multiple paths at once
*/
- fib_route_path_t *r_paths = NULL;
-
- for (ii = 0; ii < 4; ii++)
- {
- fib_route_path_t r_path = {
- .frp_proto = DPO_PROTO_IP4,
- .frp_addr = {
- .ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a02 + ii),
- },
- .frp_sw_if_index = tm->hw[0]->sw_if_index,
- .frp_weight = 1,
- .frp_fib_index = ~0,
- };
- vec_add1(r_paths, r_path);
- }
-
- fib_table_entry_update(fib_index,
- &pfx_4_4_4_4_s_32,
- FIB_SOURCE_API,
- FIB_ENTRY_FLAG_NONE,
- r_paths);
-
- fei = fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32);
- FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "4.4.4.4/32 present");
- dpo = fib_entry_contribute_ip_forwarding(fei);
+ FIB_TEST(0 ==
+ fib_test_multipath_v4(tm, fib_index, &pfx_4_4_4_4_s_32, 4, 4),
+ "multipath with 4 nexthops");
- lb = load_balance_get(dpo->dpoi_index);
- FIB_TEST((lb->lb_n_buckets == 4), "4.4.4.4/32 lb over %d paths", lb->lb_n_buckets);
-
- fib_table_entry_delete(fib_index,
- &pfx_4_4_4_4_s_32,
- FIB_SOURCE_API);
- FIB_TEST(FIB_NODE_INDEX_INVALID ==
- fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
- "4.4.4.4/32 removed");
- vec_free(r_paths);
+ /*
+ * A route with lots of multiple paths that will overflow max supported
+ * lb buckets because of normalization
+ */
+ FIB_TEST(0 ==
+ fib_test_multipath_v4(tm, fib_index, &pfx_4_4_4_4_s_32,
+ LB_MAX_BUCKETS / 2 + 23, LB_MAX_BUCKETS),
+ "multipath with too many nexthops");
/*
- * add-remove test. no change.
+ * A route with more paths than max supported lb buckets
*/
- FIB_TEST((1 == fib_path_list_db_size()), "path list DB population:%d",
- fib_path_list_db_size());
- FIB_TEST((PNBR+5 == fib_path_list_pool_size()), "path list pool size is %d",
- fib_path_list_pool_size());
- FIB_TEST((ENBR+7 == fib_entry_pool_size()), "entry pool size is %d",
- fib_entry_pool_size());
+ FIB_TEST(0 ==
+ fib_test_multipath_v4 (tm, fib_index, &pfx_4_4_4_4_s_32,
+ LB_MAX_BUCKETS + 13, LB_MAX_BUCKETS),
+ "multipath with too many nexthops");
/*
* A route deag route
@@ -3707,7 +3735,6 @@ fib_test_v4 (void)
FIB_TEST(FIB_NODE_INDEX_INVALID ==
fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
"4.4.4.4/32 removed");
- vec_free(r_paths);
/*
* A route deag route in a source lookup table
@@ -3746,7 +3773,6 @@ fib_test_v4 (void)
FIB_TEST(FIB_NODE_INDEX_INVALID ==
fib_table_lookup_exact_match(fib_index, &pfx_4_4_4_4_s_32),
"4.4.4.4/32 removed");
- vec_free(r_paths);
/*
* add-remove test. no change.
@@ -4397,6 +4423,9 @@ fib_test_v4 (void)
FIB_SOURCE_INTERFACE)),
"NO INterface Source'd prefixes");
+ for (ii = 0; ii < 4; ii++)
+ fib_table_bind (FIB_PROTOCOL_IP4, tm->hw[ii]->sw_if_index, 0);
+
fib_table_unlock(fib_index, FIB_PROTOCOL_IP4, FIB_SOURCE_API);
FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
@@ -4455,9 +4484,7 @@ fib_test_v6 (void)
FIB_SOURCE_API);
for (ii = 0; ii < 4; ii++)
- {
- ip6_main.fib_index_by_sw_if_index[tm->hw[ii]->sw_if_index] = fib_index;
- }
+ fib_table_bind (FIB_PROTOCOL_IP6, tm->hw[ii]->sw_if_index, fib_index);
fib_prefix_t pfx_0_0 = {
.fp_len = 0,
@@ -5176,12 +5203,11 @@ fib_test_v6 (void)
/*
* Add the interface back. routes stay unresolved.
*/
- error = ethernet_register_interface(vnet_get_main(),
- test_interface_device_class.index,
- 0 /* instance */,
- hw_address,
- &tm->hw_if_indicies[0],
- /* flag change */ 0);
+ vnet_eth_interface_registration_t eir = {};
+ eir.dev_class_index = test_interface_device_class.index;
+ eir.dev_instance = 0;
+ eir.address = hw_address;
+ tm->hw_if_indicies[0] = vnet_eth_register_interface (vnet_get_main(), &eir);
fei = fib_table_lookup_exact_match(fib_index, &pfx_2001_b_s_64);
FIB_TEST(load_balance_is_drop(fib_entry_contribute_ip_forwarding(fei)),
@@ -5276,6 +5302,10 @@ fib_test_v6 (void)
/*
* now remove the VRF
*/
+
+ for (ii = 0; ii < 4; ii++)
+ fib_table_bind (FIB_PROTOCOL_IP6, tm->hw[ii]->sw_if_index, 0);
+
fib_table_unlock(fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_API);
FIB_TEST((0 == fib_path_list_db_size()), "path list DB population:%d",
@@ -5314,14 +5344,12 @@ fib_test_ae (void)
{
const dpo_id_t *dpo, *dpo_drop;
const u32 fib_index = 0;
- fib_node_index_t fei;
+ fib_node_index_t dfrt, fei;
test_main_t *tm;
- ip4_main_t *im;
int res;
res = 0;
tm = &test_main;
- im = &ip4_main;
FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
adj_nbr_db_size());
@@ -5341,8 +5369,7 @@ fib_test_ae (void)
},
};
- vec_validate(im->fib_index_by_sw_if_index, tm->hw[0]->sw_if_index);
- im->fib_index_by_sw_if_index[tm->hw[0]->sw_if_index] = fib_index;
+ fib_table_bind (FIB_PROTOCOL_IP4, tm->hw[0]->sw_if_index, fib_index);
dpo_drop = drop_dpo_get(DPO_PROTO_IP4);
@@ -5415,6 +5442,44 @@ fib_test_ae (void)
import_fib_index1 = fib_table_find_or_create_and_lock(FIB_PROTOCOL_IP4,
11,
FIB_SOURCE_CLI);
+ /*
+ * Add default route in the import FIB
+ */
+ fib_prefix_t pfx_0_0_0_0_s_0 = {
+ .fp_len = 0,
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_addr = {
+ .ip4 = {
+ {0}
+ },
+ },
+ };
+
+ dfrt = fib_table_lookup(import_fib_index1, &pfx_0_0_0_0_s_0);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != dfrt), "default route present");
+
+ fib_table_entry_path_add(import_fib_index1,
+ &pfx_0_0_0_0_s_0,
+ FIB_SOURCE_API,
+ FIB_ENTRY_FLAG_NONE,
+ DPO_PROTO_IP4,
+ NULL,
+ tm->hw[0]->sw_if_index,
+ ~0, // invalid fib index
+ 1,
+ NULL,
+ FIB_ROUTE_PATH_FLAG_NONE);
+ fei = fib_table_lookup(fib_index, &pfx_0_0_0_0_s_0);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "default route present");
+ FIB_TEST((fei != dfrt), "default route added");
+
+ /*
+ * delete default route and check for the presence in the import table
+ */
+ fib_table_entry_delete(import_fib_index1, &pfx_0_0_0_0_s_0, FIB_SOURCE_API);
+ fei = fib_table_lookup(import_fib_index1, &pfx_0_0_0_0_s_0);
+ FIB_TEST((FIB_NODE_INDEX_INVALID != fei), "default route present");
+ FIB_TEST((fei == dfrt), "default route removed");
/*
* Add an attached route in the import FIB
@@ -5871,11 +5936,9 @@ static int
fib_test_pref (void)
{
test_main_t *tm;
- ip4_main_t *im;
int res, i;
tm = &test_main;
- im = &ip4_main;
res = 0;
const fib_prefix_t pfx_1_1_1_1_s_32 = {
@@ -5888,10 +5951,8 @@ fib_test_pref (void)
},
};
- vec_validate(im->fib_index_by_sw_if_index, tm->hw[2]->sw_if_index);
-
for (i = 0; i <= 2; i++)
- im->fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+ fib_table_bind (FIB_PROTOCOL_IP4, tm->hw[i]->sw_if_index, 0);
/*
* 2 high, 2 medium and 2 low preference non-recursive paths
@@ -6340,12 +6401,10 @@ fib_test_label (void)
const u32 fib_index = 0;
int lb_count, ii, res;
test_main_t *tm;
- ip4_main_t *im;
res = 0;
lb_count = pool_elts(load_balance_pool);
tm = &test_main;
- im = &ip4_main;
/*
* add interface routes. We'll assume this works. It's more rigorously
@@ -6365,8 +6424,7 @@ fib_test_label (void)
FIB_TEST((0 == adj_nbr_db_size()), "ADJ DB size is %d",
adj_nbr_db_size());
- vec_validate(im->fib_index_by_sw_if_index, tm->hw[0]->sw_if_index);
- im->fib_index_by_sw_if_index[tm->hw[0]->sw_if_index] = fib_index;
+ fib_table_bind (FIB_PROTOCOL_IP4, tm->hw[0]->sw_if_index, fib_index);
fib_table_entry_update_one_path(fib_index, &local0_pfx,
FIB_SOURCE_INTERFACE,
@@ -6411,8 +6469,7 @@ fib_test_label (void)
},
};
- vec_validate(im->fib_index_by_sw_if_index, tm->hw[1]->sw_if_index);
- im->fib_index_by_sw_if_index[tm->hw[1]->sw_if_index] = fib_index;
+ fib_table_bind (FIB_PROTOCOL_IP4, tm->hw[1]->sw_if_index, fib_index);
fib_table_entry_update_one_path(fib_index, &local1_pfx,
FIB_SOURCE_INTERFACE,
@@ -7557,6 +7614,7 @@ fib_test_child_get_node (fib_node_index_t index)
}
static int fib_test_walk_spawns_walks;
+static fib_node_type_t test_node_type;
static fib_node_back_walk_rc_t
fib_test_child_back_walk_notify (fib_node_t *node,
@@ -7567,9 +7625,9 @@ fib_test_child_back_walk_notify (fib_node_t *node,
vec_add1(tc->ctxs, *ctx);
if (1 == fib_test_walk_spawns_walks)
- fib_walk_sync(FIB_NODE_TYPE_TEST, tc->index, ctx);
+ fib_walk_sync(test_node_type, tc->index, ctx);
if (2 == fib_test_walk_spawns_walks)
- fib_walk_async(FIB_NODE_TYPE_TEST, tc->index,
+ fib_walk_async(test_node_type, tc->index,
FIB_WALK_PRIORITY_HIGH, ctx);
return (FIB_NODE_BACK_WALK_CONTINUE);
@@ -7610,23 +7668,23 @@ fib_test_walk (void)
res = 0;
vm = vlib_get_main();
- fib_node_register_type(FIB_NODE_TYPE_TEST, &fib_test_child_vft);
+ test_node_type = fib_node_register_new_type("fib-test", &fib_test_child_vft);
/*
* init a fake node on which we will add children
*/
fib_node_init(&fib_test_nodes[PARENT_INDEX].node,
- FIB_NODE_TYPE_TEST);
+ test_node_type);
FOR_EACH_TEST_CHILD(tc)
{
- fib_node_init(&tc->node, FIB_NODE_TYPE_TEST);
+ fib_node_init(&tc->node, test_node_type);
fib_node_lock(&tc->node);
tc->ctxs = NULL;
tc->index = ii;
- tc->sibling = fib_node_child_add(FIB_NODE_TYPE_TEST,
+ tc->sibling = fib_node_child_add(test_node_type,
PARENT_INDEX,
- FIB_NODE_TYPE_TEST, ii);
+ test_node_type, ii);
}
/*
@@ -7634,7 +7692,7 @@ fib_test_walk (void)
*/
high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &high_ctx);
FIB_TEST(N_TEST_CHILDREN+1 == fib_node_list_get_size(PARENT()->fn_children),
"Parent has %d children pre-walk",
@@ -7680,9 +7738,9 @@ fib_test_walk (void)
high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
low_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE;
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &high_ctx);
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_LOW, &low_ctx);
fib_walk_process_queues(vm, 1);
@@ -7708,9 +7766,9 @@ fib_test_walk (void)
high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
low_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &high_ctx);
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &low_ctx);
fib_walk_process_queues(vm, 1);
@@ -7736,9 +7794,9 @@ fib_test_walk (void)
high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
low_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE;
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &high_ctx);
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &low_ctx);
fib_walk_process_queues(vm, 1);
@@ -7764,7 +7822,7 @@ fib_test_walk (void)
*/
high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &high_ctx);
fib_walk_process_queues(vm, 0);
@@ -7818,7 +7876,7 @@ fib_test_walk (void)
/*
* schedule another walk that will catch-up and merge.
*/
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &high_ctx);
fib_walk_process_queues(vm, 1);
@@ -7851,13 +7909,13 @@ fib_test_walk (void)
*/
high_ctx.fnbw_reason = FIB_NODE_BW_REASON_FLAG_RESOLVE;
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &high_ctx);
fib_walk_process_queues(vm, 0);
fib_walk_process_queues(vm, 0);
- fib_walk_sync(FIB_NODE_TYPE_TEST, PARENT_INDEX, &high_ctx);
+ fib_walk_sync(test_node_type, PARENT_INDEX, &high_ctx);
FOR_EACH_TEST_CHILD(tc)
{
@@ -7886,9 +7944,9 @@ fib_test_walk (void)
* make the parent a child of one of its children, thus inducing a routing loop.
*/
fib_test_nodes[PARENT_INDEX].sibling =
- fib_node_child_add(FIB_NODE_TYPE_TEST,
+ fib_node_child_add(test_node_type,
1, // the first child
- FIB_NODE_TYPE_TEST,
+ test_node_type,
PARENT_INDEX);
/*
@@ -7897,7 +7955,7 @@ fib_test_walk (void)
*/
fib_test_walk_spawns_walks = 1;
- fib_walk_sync(FIB_NODE_TYPE_TEST, PARENT_INDEX, &high_ctx);
+ fib_walk_sync(test_node_type, PARENT_INDEX, &high_ctx);
FOR_EACH_TEST_CHILD(tc)
{
@@ -7938,7 +7996,7 @@ fib_test_walk (void)
* execute an async walk of the graph loop, with each child spawns sync walks
*/
high_ctx.fnbw_depth = 0;
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &high_ctx);
fib_walk_process_queues(vm, 1);
@@ -7960,7 +8018,7 @@ fib_test_walk (void)
*/
fib_test_walk_spawns_walks = 2;
high_ctx.fnbw_depth = 0;
- fib_walk_async(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_walk_async(test_node_type, PARENT_INDEX,
FIB_WALK_PRIORITY_HIGH, &high_ctx);
fib_walk_process_queues(vm, 1);
@@ -7978,7 +8036,7 @@ fib_test_walk (void)
}
- fib_node_child_remove(FIB_NODE_TYPE_TEST,
+ fib_node_child_remove(test_node_type,
1, // the first child
fib_test_nodes[PARENT_INDEX].sibling);
@@ -7987,7 +8045,7 @@ fib_test_walk (void)
*/
FOR_EACH_TEST_CHILD(tc)
{
- fib_node_child_remove(FIB_NODE_TYPE_TEST, PARENT_INDEX,
+ fib_node_child_remove(test_node_type, PARENT_INDEX,
tc->sibling);
fib_node_deinit(&tc->node);
fib_node_unlock(&tc->node);
@@ -8385,12 +8443,14 @@ fib_test_bfd (void)
bfd_10_10_10_1.hop_type = BFD_HOP_TYPE_SINGLE;
bfd_10_10_10_1.udp.key.sw_if_index = tm->hw[0]->sw_if_index;
- adj_bfd_notify(BFD_LISTEN_EVENT_CREATE, &bfd_10_10_10_1);
-
ai_10_10_10_1 = adj_nbr_add_or_lock(FIB_PROTOCOL_IP4,
VNET_LINK_IP4,
&nh_10_10_10_1,
tm->hw[0]->sw_if_index);
+ bfd_10_10_10_1.udp.adj_index = ai_10_10_10_1;
+
+ adj_bfd_notify(BFD_LISTEN_EVENT_CREATE, &bfd_10_10_10_1);
+
/*
* whilst the BFD session is not signalled, the adj is up
*/
@@ -8535,7 +8595,7 @@ lfib_test (void)
mpls_table_create(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API, NULL);
mpls_sw_interface_enable_disable(&mpls_main,
tm->hw[0]->sw_if_index,
- 1, 1);
+ 1);
ip46_address_t nh_10_10_10_1 = {
.ip4.as_u32 = clib_host_to_net_u32(0x0a0a0a01),
@@ -9106,7 +9166,7 @@ lfib_test (void)
*/
mpls_sw_interface_enable_disable(&mpls_main,
tm->hw[0]->sw_if_index,
- 0, 1);
+ 0);
mpls_table_delete(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API);
FIB_TEST(0 == pool_elts(mpls_disp_dpo_pool),
@@ -9128,22 +9188,15 @@ fib_test_inherit (void)
fib_node_index_t fei;
int n_feis, res, i;
test_main_t *tm;
- ip4_main_t *im4;
- ip6_main_t *im6;
tm = &test_main;
- im4 = &ip4_main;
- im6 = &ip6_main;
res = 0;
- vec_validate(im4->fib_index_by_sw_if_index, tm->hw[2]->sw_if_index);
- vec_validate(im6->fib_index_by_sw_if_index, tm->hw[2]->sw_if_index);
-
for (i = 0; i <= 2; i++)
- {
- im4->fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
- im6->fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
- }
+ {
+ fib_table_bind (FIB_PROTOCOL_IP4, tm->hw[i]->sw_if_index, 0);
+ fib_table_bind (FIB_PROTOCOL_IP6, tm->hw[i]->sw_if_index, 0);
+ }
n_feis = fib_entry_pool_size();
const ip46_address_t nh_10_10_10_1 = {
@@ -10610,7 +10663,7 @@ fib_test_sticky (void)
fib_route_path_t *r_paths2 = NULL;
r_paths2 = vec_dup(r_paths);
- _vec_len(r_paths2) = 3;
+ vec_set_len (r_paths2, 3);
pl_index = fib_path_list_create(FIB_PATH_LIST_FLAG_SHARED, r_paths2);
fib_path_list_lock(pl_index);
@@ -10681,7 +10734,7 @@ fib_test_sticky (void)
fib_route_path_t *r_paths3 = NULL;
r_paths3 = vec_dup(r_paths);
- _vec_len(r_paths3) = 3;
+ vec_set_len (r_paths3, 3);
r_paths3[0].frp_weight = 3;
diff --git a/src/plugins/unittest/gso_test.c b/src/plugins/unittest/gso_test.c
new file mode 100644
index 00000000000..43c614341d2
--- /dev/null
+++ b/src/plugins/unittest/gso_test.c
@@ -0,0 +1,456 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/time.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/error.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/gso/gso.h>
+#include <vnet/gso/hdr_offset_parser.h>
+#include <vnet/tcp/tcp_packet.h>
+
+#define MAX_GSO_PACKET_SIZE (TCP_MAX_GSO_SZ - 1)
+#define MIN_GSO_SEGMENT_SIZE 128
+#define MAX_GSO_SEGMENT_SIZE 2048
+#define DEFAULT_GSO_SEGMENT_SIZE 1448
+
+typedef struct _gso_test_data
+{
+ const char *name;
+ const char *description;
+ u8 *data;
+ u32 data_size;
+ u32 l4_hdr_len;
+ u8 is_l2;
+ u8 is_ip6;
+ struct _gso_test_data *next;
+} gso_test_data_t;
+
+typedef struct
+{
+ int verbose;
+
+ char *gso_name;
+ u32 warmup_rounds;
+ u32 rounds;
+ u32 n_buffers;
+ u32 buffer_size;
+ u32 packet_size;
+ u32 gso_size;
+ gso_test_data_t *gso_test_data;
+} gso_test_main_t;
+
+gso_test_main_t gso_test_main;
+
+#define GSO_TEST_REGISTER_DATA(x, ...) \
+ __VA_ARGS__ gso_test_data_t __gso_test_data_##x; \
+ static void __clib_constructor __gso_test_data_fn_##x (void) \
+ { \
+ gso_test_main_t *gtm = &gso_test_main; \
+ __gso_test_data_##x.next = gtm->gso_test_data; \
+ gtm->gso_test_data = &__gso_test_data_##x; \
+ } \
+ __VA_ARGS__ gso_test_data_t __gso_test_data_##x
+
+// ipv4
+u8 gso_ipv4_tcp_data[64] = {
+ 0x02, 0xfe, 0x39, 0xe5, 0x09, 0x8f, 0x02, 0xfe, 0x2d, 0x18, 0x63, 0x18, 0x08,
+ 0x00, 0x45, 0x00, 0x05, 0xdc, 0xdb, 0x42, 0x40, 0x00, 0x40, 0x06, 0xc4, 0x85,
+ 0xc0, 0xa8, 0x0a, 0x02, 0xc0, 0xa8, 0x0a, 0x01, 0xd8, 0xde, 0x14, 0x51, 0x34,
+ 0x93, 0xa8, 0x1b, 0x7b, 0xef, 0x2e, 0x7e, 0x80, 0x10, 0x00, 0xe5, 0xc7, 0x03,
+ 0x00, 0x00, 0x01, 0x01, 0x08, 0x0a, 0xce, 0xaa, 0x00, 0x2f, 0xf2, 0xc3
+};
+
+GSO_TEST_REGISTER_DATA (gso_ipv4_tcp, static) = {
+ .name = "ipv4-tcp",
+ .description = "IPv4 TCP",
+ .data = gso_ipv4_tcp_data,
+ .data_size = sizeof (gso_ipv4_tcp_data),
+ .l4_hdr_len = sizeof (tcp_header_t),
+ .is_l2 = 1,
+ .is_ip6 = 0,
+};
+
+// ipv6
+u8 gso_ipv6_tcp_data[] = {
+ 0x02, 0xfe, 0x39, 0xe5, 0x09, 0x8f, 0x02, 0xfe, 0x2d, 0x18, 0x63, 0x18,
+ 0x08, 0x00, 0x60, 0x0d, 0xf4, 0x97, 0x00, 0x40, 0x06, 0x40, 0xfd, 0x01,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x10, 0x00, 0xfd, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x10, 0x01, 0xd8, 0xde, 0x14, 0x51, 0x34, 0x93,
+ 0xa8, 0x1b, 0x7b, 0xef, 0x2e, 0x7e, 0x80, 0x10, 0x00, 0xe5, 0xc7, 0x03,
+ 0x00, 0x00, 0x01, 0x01, 0x08, 0x0a, 0xce, 0xaa, 0x00, 0x2f, 0xf2, 0xc3
+};
+
+GSO_TEST_REGISTER_DATA (gso_ipv6_tcp, static) = {
+ .name = "ipv6-tcp",
+ .description = "IPv6 TCP",
+ .data = gso_ipv6_tcp_data,
+ .data_size = sizeof (gso_ipv6_tcp_data),
+ .l4_hdr_len = sizeof (tcp_header_t),
+ .is_l2 = 1,
+ .is_ip6 = 1,
+};
+
+/*
+ * this does not support tunnel packets
+ */
+static void
+set_hdr_offsets (vlib_buffer_t *b0, u8 is_l2)
+{
+ u16 ethertype = 0, l2hdr_sz = 0;
+ vnet_buffer_oflags_t oflags = 0;
+ u8 l4_proto = 0;
+
+ if (!is_l2)
+ {
+ switch (b0->data[0] & 0xf0)
+ {
+ case 0x40:
+ ethertype = ETHERNET_TYPE_IP4;
+ break;
+ case 0x60:
+ ethertype = ETHERNET_TYPE_IP6;
+ break;
+ }
+ }
+ else
+ {
+ ethernet_header_t *eh = (ethernet_header_t *) b0->data;
+ ethertype = clib_net_to_host_u16 (eh->type);
+ l2hdr_sz = sizeof (ethernet_header_t);
+
+ if (ethernet_frame_is_tagged (ethertype))
+ {
+ ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (eh + 1);
+
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ l2hdr_sz += sizeof (*vlan);
+ if (ethertype == ETHERNET_TYPE_VLAN)
+ {
+ vlan++;
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ l2hdr_sz += sizeof (*vlan);
+ }
+ }
+ }
+
+ vnet_buffer (b0)->l2_hdr_offset = 0;
+ vnet_buffer (b0)->l3_hdr_offset = l2hdr_sz;
+
+ if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP4))
+ {
+ ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l2hdr_sz);
+ vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + ip4_header_bytes (ip4);
+ l4_proto = ip4->protocol;
+ oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
+ b0->flags |= (VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
+ }
+ else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6))
+ {
+ ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l2hdr_sz);
+ vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + sizeof (ip6_header_t);
+ /* FIXME IPv6 EH traversal */
+ l4_proto = ip6->protocol;
+ b0->flags |= (VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
+ VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
+ }
+ if (l4_proto == IP_PROTOCOL_TCP)
+ {
+ oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
+ }
+ else if (l4_proto == IP_PROTOCOL_UDP)
+ {
+ oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
+ }
+ if (oflags)
+ vnet_buffer_offload_flags_set (b0, oflags);
+}
+
+static u32
+fill_buffers (vlib_main_t *vm, u32 *buffer_indices,
+ gso_test_data_t *gso_test_data, u32 n_buffers, u32 buffer_size,
+ u32 packet_size, u32 gso_size)
+{
+ u32 i;
+ u8 *data = gso_test_data->data;
+ u32 data_size = gso_test_data->data_size;
+ u32 l4_hdr_len = gso_test_data->l4_hdr_len;
+ u8 is_l2 = gso_test_data->is_l2;
+
+ for (i = 0; i < n_buffers; i++)
+ {
+ u64 seed = clib_cpu_time_now ();
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
+ u32 len = 0;
+ u32 remaining_data =
+ (packet_size > buffer_size) ? (packet_size - buffer_size) : 0;
+
+ clib_memcpy_fast (b->data, data, data_size);
+ b->current_data = 0;
+
+ for (u32 j = data_size; j < buffer_size; j += 8)
+ *(u64 *) (b->data + j) = 1 + random_u64 (&seed);
+ b->current_length = buffer_size;
+
+ if (remaining_data)
+ {
+ vlib_buffer_t *pb = b;
+ u32 n_alloc,
+ n_bufs = ((remaining_data + buffer_size - 1) / buffer_size);
+ u32 *buffers = 0;
+ u32 fill_data_size;
+ u32 k = 0;
+
+ vec_validate (buffers, n_bufs - 1);
+ n_alloc = vlib_buffer_alloc (vm, buffers, n_bufs);
+ if (n_alloc < n_bufs)
+ {
+ vlib_buffer_free (vm, buffers, n_alloc);
+ vlib_cli_output (
+ vm, "vlib buffer alloc failed at %u requested %u actual %u", i,
+ n_bufs, n_alloc);
+ return i;
+ }
+
+ do
+ {
+ pb->next_buffer = buffers[k];
+ pb->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ pb = vlib_get_buffer (vm, buffers[k]);
+ pb->current_data = 0;
+ fill_data_size = clib_min (buffer_size, remaining_data);
+ remaining_data -= fill_data_size;
+ for (u32 l = 0; l < fill_data_size; l += 8)
+ *(u64 *) (pb->data + l) = 1 + random_u64 (&seed);
+ pb->current_length = fill_data_size;
+ k++;
+ len += fill_data_size;
+ }
+ while (k < n_bufs);
+
+ set_hdr_offsets (b, is_l2);
+ b->flags |= VNET_BUFFER_F_GSO;
+ vnet_buffer2 (b)->gso_size = gso_size;
+ vnet_buffer2 (b)->gso_l4_hdr_sz = l4_hdr_len;
+ }
+ b->total_length_not_including_first_buffer = len;
+ b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ }
+ return i;
+}
+
+static_always_inline u32
+gso_segment_buffer_test (vlib_main_t *vm, u32 bi,
+ vnet_interface_per_thread_data_t *ptd, u8 is_l2)
+{
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ u32 n_tx_bytes = 0;
+
+ if (PREDICT_TRUE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ n_tx_bytes = gso_segment_buffer_inline (vm, ptd, b, is_l2);
+ }
+
+ return n_tx_bytes;
+}
+
+static clib_error_t *
+test_gso_perf (vlib_main_t *vm, gso_test_main_t *gtm)
+{
+ clib_error_t *err = 0;
+ vnet_interface_per_thread_data_t *ptd = 0;
+ u32 packet_size = MAX_GSO_PACKET_SIZE;
+ u32 buffer_size = vlib_buffer_get_default_data_size (vm);
+ u32 gso_size;
+ u32 n_buffers, warmup_rounds, rounds;
+ u32 *buffer_indices = 0;
+ u64 t0, t1, t2[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ gso_test_data_t *gso_test_data = gtm->gso_test_data;
+ int i, j, k;
+
+ if (gtm->buffer_size > buffer_size)
+ return clib_error_return (0, "buffer size must be <= %u", buffer_size);
+
+ if (gtm->packet_size > packet_size)
+ return clib_error_return (0, "gso packet size must be <= %u", packet_size);
+
+ if ((gtm->gso_size > MAX_GSO_SEGMENT_SIZE) ||
+ (gtm->gso_size < MIN_GSO_SEGMENT_SIZE))
+ return clib_error_return (
+ 0, "gso segment size must be in between %u >= and <= %u",
+ MIN_GSO_SEGMENT_SIZE, MAX_GSO_SEGMENT_SIZE);
+
+ rounds = gtm->rounds ? gtm->rounds : 256;
+ n_buffers = gtm->n_buffers ? gtm->n_buffers : 256;
+ warmup_rounds = gtm->warmup_rounds ? gtm->warmup_rounds : 256;
+ buffer_size = gtm->buffer_size ? gtm->buffer_size : buffer_size;
+ gso_size = gtm->gso_size;
+ packet_size = gtm->packet_size ? gtm->packet_size : packet_size;
+
+ vec_validate_aligned (ptd, n_buffers - 1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (buffer_indices, n_buffers - 1, CLIB_CACHE_LINE_BYTES);
+
+ vlib_cli_output (vm,
+ "GSO Segmentation: packet-size %u gso-size %u buffer-size "
+ "%u n_buffers %u rounds %u "
+ "warmup-rounds %u",
+ packet_size, gso_size, buffer_size, n_buffers, rounds,
+ warmup_rounds);
+ vlib_cli_output (vm, " cpu-freq %.2f GHz",
+ (f64) vm->clib_time.clocks_per_second * 1e-9);
+
+ while (gso_test_data)
+ {
+ u32 n_filled = 0;
+ u32 n_alloc = vlib_buffer_alloc (vm, buffer_indices, n_buffers);
+ if (n_alloc != n_buffers)
+ {
+ vlib_cli_output (vm, " Test: %s FAILED", gso_test_data->description);
+ err = clib_error_return (0, "buffer alloc failure");
+ vlib_buffer_free (vm, buffer_indices, n_alloc);
+ goto done;
+ }
+ n_filled = fill_buffers (vm, buffer_indices, gso_test_data, n_buffers,
+ buffer_size, packet_size, gso_size);
+
+ u8 is_l2 = gso_test_data->is_l2;
+
+ for (k = 0; k < warmup_rounds; k++)
+ {
+ for (j = 0; j < n_filled; j++)
+ gso_segment_buffer_test (vm, buffer_indices[j], &ptd[j], is_l2);
+
+ for (j = 0; j < n_filled; j++)
+ {
+ vlib_buffer_free (vm, ptd[j].split_buffers,
+ vec_len (ptd[j].split_buffers));
+ vec_free (ptd[j].split_buffers);
+ }
+ }
+
+ for (i = 0; i < 10; i++)
+ {
+ for (k = 0; k < rounds; k++)
+ {
+ t0 = clib_cpu_time_now ();
+ for (j = 0; j < n_filled; j++)
+ gso_segment_buffer_test (vm, buffer_indices[j], &ptd[j],
+ is_l2);
+
+ t1 = clib_cpu_time_now ();
+ t2[i] += (t1 - t0);
+ for (j = 0; j < n_filled; j++)
+ {
+ vlib_buffer_free (vm, ptd[j].split_buffers,
+ vec_len (ptd[j].split_buffers));
+ vec_free (ptd[j].split_buffers);
+ }
+ }
+ }
+
+ vlib_cli_output (
+ vm, "===========================================================");
+ vlib_cli_output (vm, " Test: %s", gso_test_data->description);
+ vlib_cli_output (
+ vm, "===========================================================");
+ for (i = 0; i < 10; i++)
+ {
+ // ticks per packet
+ f64 tpp1 = (f64) (t2[i]) / (n_filled * rounds);
+ // ticks per Byte
+ f64 tpB1 = (f64) (t2[i]) / (n_filled * rounds * packet_size);
+ // Packets per second
+ f64 Kpps1 = vm->clib_time.clocks_per_second * 1e-3 / tpp1;
+ // Throughput Giga-bits per second
+ f64 Gbps1 = vm->clib_time.clocks_per_second * 8 * 1e-9 / tpB1;
+
+ vlib_cli_output (
+ vm, "%-2u: %.03f ticks/packet, %.02f Kpps, %.02f Gbps\n", i + 1,
+ tpp1, Kpps1, Gbps1);
+ }
+ if (n_alloc)
+ vlib_buffer_free (vm, buffer_indices, n_alloc);
+ clib_memset (t2, 0, sizeof (t2));
+ gso_test_data = gso_test_data->next;
+ }
+
+done:
+
+ vec_free (ptd);
+ vec_free (buffer_indices);
+ return err;
+}
+
+static clib_error_t *
+test_gso_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ gso_test_main_t *gtm = &gso_test_main;
+ clib_error_t *err = 0;
+ f64 end, start, total_time;
+
+ gtm->gso_size = DEFAULT_GSO_SEGMENT_SIZE;
+ gtm->warmup_rounds = 0;
+ gtm->rounds = 0;
+ gtm->n_buffers = 0;
+ gtm->buffer_size = 0;
+ gtm->packet_size = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose"))
+ gtm->verbose = 1;
+ else if (unformat (input, "detail"))
+ gtm->verbose = 2;
+ else if (unformat (input, "buffers %u", &gtm->n_buffers))
+ ;
+ else if (unformat (input, "buffer-size %u", &gtm->buffer_size))
+ ;
+ else if (unformat (input, "packet-size %u", &gtm->packet_size))
+ ;
+ else if (unformat (input, "gso-size %u", &gtm->gso_size))
+ ;
+ else if (unformat (input, "rounds %u", &gtm->rounds))
+ ;
+ else if (unformat (input, "warmup-rounds %u", &gtm->warmup_rounds))
+ ;
+ else
+ {
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+ }
+
+ start = clib_cpu_time_now ();
+ err = test_gso_perf (vm, gtm);
+ end = clib_cpu_time_now ();
+
+ total_time = (f64) (end - start) / vm->clib_time.clocks_per_second;
+ vlib_cli_output (vm, "Total Time Test Took %.02f seconds", total_time);
+
+ return err;
+}
+
+VLIB_CLI_COMMAND (test_gso_command, static) = {
+ .path = "test gso",
+ .short_help = "test gso [buffers <n>] [buffer-size <size>] [packet-size "
+ "<size>] [gso-size <size>] [rounds <n>] "
+ "[warmup-rounds <n>]",
+ .function = test_gso_command_fn,
+};
+
+static clib_error_t *
+gso_test_init (vlib_main_t *vm)
+{
+ return (0);
+}
+
+VLIB_INIT_FUNCTION (gso_test_init);
diff --git a/src/plugins/unittest/hash_test.c b/src/plugins/unittest/hash_test.c
new file mode 100644
index 00000000000..3b0a3cf04b9
--- /dev/null
+++ b/src/plugins/unittest/hash_test.c
@@ -0,0 +1,331 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/time.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/error.h>
+#include <vnet/hash/hash.h>
+#include <vnet/ethernet/ethernet.h>
+
+#define HASH_TEST_DATA_SIZE 2048
+
+typedef struct _hash_test_data
+{
+ const char *name;
+ const char *description;
+ u8 *data;
+ u32 data_size;
+ vnet_hash_fn_type_t ftype;
+ struct _hash_test_data *next;
+} hash_test_data_t;
+
+typedef struct
+{
+ int verbose;
+
+ char *hash_name;
+ u32 warmup_rounds;
+ u32 rounds;
+ u32 n_buffers;
+
+ hash_test_data_t *hash_test_data;
+} hash_test_main_t;
+
+hash_test_main_t hash_test_main;
+
+#define HASH_TEST_REGISTER_DATA(x, ...) \
+ __VA_ARGS__ hash_test_data_t __hash_test_data_##x; \
+ static void __clib_constructor __hash_test_data_fn_##x (void) \
+ { \
+ hash_test_main_t *htm = &hash_test_main; \
+ __hash_test_data_##x.next = htm->hash_test_data; \
+ htm->hash_test_data = &__hash_test_data_##x; \
+ } \
+ __VA_ARGS__ hash_test_data_t __hash_test_data_##x
+
+// qinq
+u8 eth_qinq_ipv4_tcp_data[72] = {
+ 0x02, 0xfe, 0x39, 0xe5, 0x09, 0x8f, 0x02, 0xfe, 0x2d, 0x18, 0x63, 0x18,
+ 0x88, 0xa8, 0x03, 0xe8, 0x81, 0x00, 0x03, 0xe8, 0x08, 0x00, 0x45, 0x00,
+ 0x05, 0xdc, 0xdb, 0x42, 0x40, 0x00, 0x40, 0x06, 0xc4, 0x85, 0xc0, 0xa8,
+ 0x0a, 0x02, 0xc0, 0xa8, 0x0a, 0x01, 0xd8, 0xde, 0x14, 0x51, 0x34, 0x93,
+ 0xa8, 0x1b, 0x7b, 0xef, 0x2e, 0x7e, 0x80, 0x10, 0x00, 0xe5, 0xc7, 0x03,
+ 0x00, 0x00, 0x01, 0x01, 0x08, 0x0a, 0xce, 0xaa, 0x00, 0x2f, 0xf2, 0xc3
+};
+
+HASH_TEST_REGISTER_DATA (eth_qinq_ipv4_tcp, static) = {
+ .name = "eth-qinq-ipv4-tcp",
+ .description = "Ethernet QinQ IPv4 TCP",
+ .data = eth_qinq_ipv4_tcp_data,
+ .data_size = sizeof (eth_qinq_ipv4_tcp_data),
+ .ftype = VNET_HASH_FN_TYPE_ETHERNET,
+};
+
+// vlan
+u8 eth_vlan_ipv4_tcp_data[68] = {
+ 0x02, 0xfe, 0x39, 0xe5, 0x09, 0x8f, 0x02, 0xfe, 0x2d, 0x18, 0x63, 0x18,
+ 0x81, 0x00, 0x03, 0xe8, 0x08, 0x00, 0x45, 0x00, 0x05, 0xdc, 0xdb, 0x42,
+ 0x40, 0x00, 0x40, 0x06, 0xc4, 0x85, 0xc0, 0xa8, 0x0a, 0x02, 0xc0, 0xa8,
+ 0x0a, 0x01, 0xd8, 0xde, 0x14, 0x51, 0x34, 0x93, 0xa8, 0x1b, 0x7b, 0xef,
+ 0x2e, 0x7e, 0x80, 0x10, 0x00, 0xe5, 0xc7, 0x03, 0x00, 0x00, 0x01, 0x01,
+ 0x08, 0x0a, 0xce, 0xaa, 0x00, 0x2f, 0xf2, 0xc3
+};
+
+HASH_TEST_REGISTER_DATA (eth_vlan_ipv4_tcp, static) = {
+ .name = "eth-vlan-ipv4-tcp",
+ .description = "Ethernet Vlan IPv4 TCP",
+ .data = eth_vlan_ipv4_tcp_data,
+ .data_size = sizeof (eth_vlan_ipv4_tcp_data),
+ .ftype = VNET_HASH_FN_TYPE_ETHERNET,
+};
+
+// ethernet
+u8 eth_ipv4_tcp_data[64] = {
+ 0x02, 0xfe, 0x39, 0xe5, 0x09, 0x8f, 0x02, 0xfe, 0x2d, 0x18, 0x63, 0x18, 0x08,
+ 0x00, 0x45, 0x00, 0x05, 0xdc, 0xdb, 0x42, 0x40, 0x00, 0x40, 0x06, 0xc4, 0x85,
+ 0xc0, 0xa8, 0x0a, 0x02, 0xc0, 0xa8, 0x0a, 0x01, 0xd8, 0xde, 0x14, 0x51, 0x34,
+ 0x93, 0xa8, 0x1b, 0x7b, 0xef, 0x2e, 0x7e, 0x80, 0x10, 0x00, 0xe5, 0xc7, 0x03,
+ 0x00, 0x00, 0x01, 0x01, 0x08, 0x0a, 0xce, 0xaa, 0x00, 0x2f, 0xf2, 0xc3
+};
+
+HASH_TEST_REGISTER_DATA (eth_ipv4_tcp, static) = {
+ .name = "eth-ipv4-tcp",
+ .description = "Ethernet IPv4 TCP",
+ .data = eth_ipv4_tcp_data,
+ .data_size = sizeof (eth_ipv4_tcp_data),
+ .ftype = VNET_HASH_FN_TYPE_ETHERNET,
+};
+
+// udp
+u8 eth_ipv4_udp_data[42] = { 0x62, 0x36, 0xbe, 0xff, 0x91, 0x20, 0x5e,
+ 0x2c, 0xaf, 0x2e, 0x1e, 0x51, 0x08, 0x00,
+ 0x45, 0x00, 0x05, 0xc4, 0x9d, 0xc3, 0x40,
+ 0x00, 0x33, 0x11, 0x49, 0x61, 0x3e, 0xd2,
+ 0x12, 0x28, 0x0a, 0x09, 0x00, 0x02, 0x14,
+ 0x58, 0xc0, 0xd8, 0x05, 0xb0, 0x75, 0xbd };
+
+HASH_TEST_REGISTER_DATA (eth_ipv4_udp, static) = {
+ .name = "eth-ipv4-udp",
+ .description = "Ethernet IPv4 UDP",
+ .data = eth_ipv4_udp_data,
+ .data_size = sizeof (eth_ipv4_udp_data),
+ .ftype = VNET_HASH_FN_TYPE_ETHERNET,
+};
+
+// ipv4
+u8 ipv4_tcp_data[50] = { 0x45, 0x00, 0x05, 0xdc, 0xdb, 0x42, 0x40, 0x00, 0x40,
+ 0x06, 0xc4, 0x85, 0xc0, 0xa8, 0x0a, 0x02, 0xc0, 0xa8,
+ 0x0a, 0x01, 0xd8, 0xde, 0x14, 0x51, 0x34, 0x93, 0xa8,
+ 0x1b, 0x7b, 0xef, 0x2e, 0x7e, 0x80, 0x10, 0x00, 0xe5,
+ 0xc7, 0x03, 0x00, 0x00, 0x01, 0x01, 0x08, 0x0a, 0xce,
+ 0xaa, 0x00, 0x2f, 0xf2, 0xc3 };
+
+HASH_TEST_REGISTER_DATA (ipv4_tcp, static) = {
+ .name = "ipv4-tcp",
+ .description = "IPv4 TCP",
+ .data = ipv4_tcp_data,
+ .data_size = sizeof (ipv4_tcp_data),
+ .ftype = VNET_HASH_FN_TYPE_IP,
+};
+
+u8 ipv4_icmp_data[84] = {
+ 0x45, 0x00, 0x00, 0x54, 0xb7, 0xe6, 0x40, 0x00, 0x40, 0x01, 0xed, 0x6e,
+ 0xc0, 0xa8, 0x0a, 0x01, 0xc0, 0xa8, 0x0a, 0x02, 0x08, 0x00, 0xc7, 0x84,
+ 0x00, 0x16, 0x00, 0x92, 0xfd, 0xdb, 0xd9, 0x60, 0x00, 0x00, 0x00, 0x00,
+ 0x91, 0xc3, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
+ 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
+
+};
+
+HASH_TEST_REGISTER_DATA (ipv4_icmp, static) = {
+ .name = "ipv4-icmp",
+ .description = "IPv4 ICMP",
+ .data = ipv4_icmp_data,
+ .data_size = sizeof (ipv4_icmp_data),
+ .ftype = VNET_HASH_FN_TYPE_IP,
+};
+
+// ip6
+u8 ipv6_icmp6_data[104] = {
+ 0x60, 0x0d, 0xf4, 0x97, 0x00, 0x40, 0x3a, 0x40, 0xfd, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0xfd, 0x01,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
+ 0x01, 0x80, 0x00, 0x10, 0x84, 0xb1, 0x25, 0x00, 0x01, 0x22, 0x57, 0xf0, 0x60,
+ 0x00, 0x00, 0x00, 0x00, 0xcb, 0x4a, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
+ 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
+ 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a,
+ 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
+};
+
+HASH_TEST_REGISTER_DATA (ipv6_icmp6, static) = {
+ .name = "ipv6-icmp6",
+ .description = "IPv6 ICMP6",
+ .data = ipv6_icmp6_data,
+ .data_size = sizeof (ipv6_icmp6_data),
+ .ftype = VNET_HASH_FN_TYPE_IP,
+};
+
+void
+fill_buffers (vlib_main_t *vm, u32 *buffer_indices, u8 *data, u32 data_size,
+ u32 n_buffers)
+{
+ int i, j;
+ u64 seed = clib_cpu_time_now ();
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
+ clib_memcpy_fast (b->data, data, data_size);
+ b->current_data = 0;
+ for (j = data_size; j < HASH_TEST_DATA_SIZE; j += 8)
+ *(u64 *) (b->data + j) = 1 + random_u64 (&seed);
+ b->current_length = HASH_TEST_DATA_SIZE;
+ }
+}
+
+static clib_error_t *
+test_hash_perf (vlib_main_t *vm, hash_test_main_t *htm)
+{
+ clib_error_t *err = 0;
+ u32 n_buffers, n_alloc = 0, warmup_rounds, rounds;
+ u32 *buffer_indices = 0;
+ u64 t0[5], t1[5];
+ vnet_hash_fn_t hf;
+ hash_test_data_t *hash_test_data = htm->hash_test_data;
+ void **p = 0;
+ int i, j;
+
+ rounds = htm->rounds ? htm->rounds : 100;
+ n_buffers = htm->n_buffers ? htm->n_buffers : 256;
+ warmup_rounds = htm->warmup_rounds ? htm->warmup_rounds : 100;
+
+ vec_validate_aligned (p, n_buffers - 1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (buffer_indices, n_buffers - 1, CLIB_CACHE_LINE_BYTES);
+ n_alloc = vlib_buffer_alloc (vm, buffer_indices, n_buffers);
+ if (n_alloc != n_buffers)
+ {
+ err = clib_error_return (0, "buffer alloc failure");
+ goto done;
+ }
+
+ vlib_cli_output (vm,
+ "%s: n_buffers %u rounds %u "
+ "warmup-rounds %u",
+ htm->hash_name, n_buffers, rounds, warmup_rounds);
+ vlib_cli_output (vm, " cpu-freq %.2f GHz",
+ (f64) vm->clib_time.clocks_per_second * 1e-9);
+
+ while (hash_test_data)
+ {
+ fill_buffers (vm, buffer_indices, hash_test_data->data,
+ hash_test_data->data_size, n_buffers);
+
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
+ p[i] = vlib_buffer_get_current (b);
+ }
+
+ hf =
+ vnet_hash_function_from_name (htm->hash_name, hash_test_data->ftype);
+
+ if (!hf)
+ {
+ err = clib_error_return (0, "wrong hash name");
+ goto done;
+ }
+
+ for (i = 0; i < 5; i++)
+ {
+ u32 h[n_buffers];
+ for (j = 0; j < warmup_rounds; j++)
+ {
+ hf (p, h, n_buffers);
+ }
+
+ t0[i] = clib_cpu_time_now ();
+ for (j = 0; j < rounds; j++)
+ hf (p, h, n_buffers);
+ t1[i] = clib_cpu_time_now ();
+ }
+
+ vlib_cli_output (
+ vm, "===========================================================");
+ vlib_cli_output (vm, " Test: %s", hash_test_data->description);
+ vlib_cli_output (
+ vm, "===========================================================");
+ for (i = 0; i < 5; i++)
+ {
+ f64 tpp1 = (f64) (t1[i] - t0[i]) / (n_buffers * rounds);
+ f64 Mpps1 = vm->clib_time.clocks_per_second * 1e-6 / tpp1;
+
+ vlib_cli_output (vm, "%-2u: %.03f ticks/packet, %.02f Mpps\n", i + 1,
+ tpp1, Mpps1);
+ }
+ hash_test_data = hash_test_data->next;
+ }
+
+done:
+ if (n_alloc)
+ vlib_buffer_free (vm, buffer_indices, n_alloc);
+
+ vec_free (p);
+ vec_free (buffer_indices);
+ return err;
+}
+
+static clib_error_t *
+test_hash_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ hash_test_main_t *tm = &hash_test_main;
+ clib_error_t *err = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose"))
+ tm->verbose = 1;
+ else if (unformat (input, "detail"))
+ tm->verbose = 2;
+ else if (unformat (input, "perf %s", &tm->hash_name))
+ ;
+ else if (unformat (input, "buffers %u", &tm->n_buffers))
+ ;
+ else if (unformat (input, "rounds %u", &tm->rounds))
+ ;
+ else if (unformat (input, "warmup-rounds %u", &tm->warmup_rounds))
+ ;
+ else
+ {
+ err = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ goto error;
+ }
+ }
+
+ err = test_hash_perf (vm, tm);
+
+error:
+ vec_free (tm->hash_name);
+
+ return err;
+}
+
+VLIB_CLI_COMMAND (test_hash_command, static) = {
+ .path = "test hash",
+ .short_help = "test hash [perf <hash-name>] [buffers <n>] [rounds <n>] "
+ "[warmup-rounds <n>]",
+ .function = test_hash_command_fn,
+};
+
+static clib_error_t *
+hash_test_init (vlib_main_t *vm)
+{
+ return (0);
+}
+
+VLIB_INIT_FUNCTION (hash_test_init);
diff --git a/src/plugins/unittest/interface_test.c b/src/plugins/unittest/interface_test.c
index 4cf5ae43b3c..b5d5b6f776e 100644
--- a/src/plugins/unittest/interface_test.c
+++ b/src/plugins/unittest/interface_test.c
@@ -57,14 +57,12 @@ test_interface_command_fn (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_interface_command, static) =
{
.path = "test interface link-state",
.short_help = "test interface link-state <interface> [up] [down]",
.function = test_interface_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/ip_psh_cksum_test.c b/src/plugins/unittest/ip_psh_cksum_test.c
new file mode 100644
index 00000000000..7a0e1c3a1f4
--- /dev/null
+++ b/src/plugins/unittest/ip_psh_cksum_test.c
@@ -0,0 +1,266 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/time.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/error.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip_psh_cksum.h>
+
+static_always_inline void
+compute_ip_phc (void *p)
+{
+ if ((((u8 *) p)[0] & 0xf0) == 0x40)
+ ip4_pseudo_header_cksum (p);
+ else if ((((u8 *) p)[0] & 0xf0) == 0x60)
+ ip6_pseudo_header_cksum (p);
+}
+
+void
+compute_ip_phc_func (void **p, u32 n_packets)
+{
+ u32 n_left_from = n_packets;
+
+ while (n_left_from >= 8)
+ {
+ clib_prefetch_load (p[4]);
+ clib_prefetch_load (p[5]);
+ clib_prefetch_load (p[6]);
+ clib_prefetch_load (p[7]);
+
+ compute_ip_phc (p[0]);
+ compute_ip_phc (p[1]);
+ compute_ip_phc (p[2]);
+ compute_ip_phc (p[3]);
+
+ n_left_from -= 4;
+ p += 4;
+ }
+
+ while (n_left_from > 0)
+ {
+ compute_ip_phc (p[0]);
+
+ n_left_from -= 1;
+ p += 1;
+ }
+}
+
+typedef struct _phc_test_data
+{
+ const char *name;
+ const char *description;
+ u8 *data;
+ u32 data_size;
+ struct _phc_test_data *next;
+} phc_test_data_t;
+
+typedef struct
+{
+ int verbose;
+
+ char *phc_name;
+ u32 warmup_rounds;
+ u32 rounds;
+ u32 n_buffers;
+ u32 buffer_size;
+ phc_test_data_t *phc_test_data;
+} phc_test_main_t;
+
+phc_test_main_t phc_test_main;
+
+#define PHC_TEST_REGISTER_DATA(x, ...) \
+ __VA_ARGS__ phc_test_data_t __phc_test_data_##x; \
+ static void __clib_constructor __phc_test_data_fn_##x (void) \
+ { \
+ phc_test_main_t *ptm = &phc_test_main; \
+ __phc_test_data_##x.next = ptm->phc_test_data; \
+ ptm->phc_test_data = &__phc_test_data_##x; \
+ } \
+ __VA_ARGS__ phc_test_data_t __phc_test_data_##x
+
+// ipv4
+u8 phc_ipv4_tcp_data[50] = {
+ 0x45, 0x00, 0x05, 0xdc, 0xdb, 0x42, 0x40, 0x00, 0x40, 0x06, 0xc4, 0x85, 0xc0,
+ 0xa8, 0x0a, 0x02, 0xc0, 0xa8, 0x0a, 0x01, 0xd8, 0xde, 0x14, 0x51, 0x34, 0x93,
+ 0xa8, 0x1b, 0x7b, 0xef, 0x2e, 0x7e, 0x80, 0x10, 0x00, 0xe5, 0xc7, 0x03, 0x00,
+ 0x00, 0x01, 0x01, 0x08, 0x0a, 0xce, 0xaa, 0x00, 0x2f, 0xf2, 0xc3
+};
+
+PHC_TEST_REGISTER_DATA (ipv4_tcp, static) = {
+ .name = "ipv4-tcp",
+ .description = "IPv4 TCP",
+ .data = phc_ipv4_tcp_data,
+ .data_size = sizeof (phc_ipv4_tcp_data),
+};
+
+// ip6
+u8 phc_ipv6_udp_data[65] = {
+ 0x60, 0x0d, 0xf4, 0x97, 0x00, 0x40, 0x3a, 0x40, 0xfd, 0x01, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0xfd, 0x01,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
+ 0x01, 0x80, 0x00, 0x10, 0x84, 0xb1, 0x25, 0x00, 0x01, 0x22, 0x57, 0xf0, 0x60,
+ 0x00, 0x00, 0x00, 0x00, 0xcb, 0x4a, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
+};
+
+PHC_TEST_REGISTER_DATA (ipv6_udp, static) = {
+ .name = "ipv6-udp",
+ .description = "IPv6 UDP",
+ .data = phc_ipv6_udp_data,
+ .data_size = sizeof (phc_ipv6_udp_data),
+};
+
+static void
+fill_buffers (vlib_main_t *vm, u32 *buffer_indices, u8 *data, u32 data_size,
+ u32 n_buffers, u32 buffer_size)
+{
+ int i, j;
+ u64 seed = clib_cpu_time_now ();
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
+ clib_memcpy_fast (b->data, data, data_size);
+ b->current_data = 0;
+ for (j = data_size; j < buffer_size; j += 8)
+ *(u64 *) (b->data + j) = 1 + random_u64 (&seed);
+ b->current_length = buffer_size;
+ }
+}
+
+static clib_error_t *
+test_phc_perf (vlib_main_t *vm, phc_test_main_t *ptm)
+{
+ clib_error_t *err = 0;
+ u32 buffer_size = vlib_buffer_get_default_data_size (vm);
+ u32 n_buffers, n_alloc = 0, warmup_rounds, rounds;
+ u32 *buffer_indices = 0;
+ u64 t0[5], t1[5];
+ phc_test_data_t *phc_test_data = ptm->phc_test_data;
+ void **p = 0;
+ int i, j;
+
+ if (ptm->buffer_size > buffer_size)
+ return clib_error_return (0, "buffer size must be <= %u", buffer_size);
+
+ rounds = ptm->rounds ? ptm->rounds : 100;
+ n_buffers = ptm->n_buffers ? ptm->n_buffers : 256;
+ warmup_rounds = ptm->warmup_rounds ? ptm->warmup_rounds : 100;
+ buffer_size = ptm->buffer_size ? ptm->buffer_size : buffer_size;
+
+ vec_validate_aligned (p, n_buffers - 1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (buffer_indices, n_buffers - 1, CLIB_CACHE_LINE_BYTES);
+ n_alloc = vlib_buffer_alloc (vm, buffer_indices, n_buffers);
+ if (n_alloc != n_buffers)
+ {
+ err = clib_error_return (0, "buffer alloc failure");
+ goto done;
+ }
+
+ vlib_cli_output (
+ vm,
+ "pseudo header checksum: buffer-size %u, n_buffers %u rounds %u "
+ "warmup-rounds %u",
+ buffer_size, n_buffers, rounds, warmup_rounds);
+ vlib_cli_output (vm, " cpu-freq %.2f GHz",
+ (f64) vm->clib_time.clocks_per_second * 1e-9);
+
+ while (phc_test_data)
+ {
+ fill_buffers (vm, buffer_indices, phc_test_data->data,
+ phc_test_data->data_size, n_buffers, buffer_size);
+
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_indices[i]);
+ p[i] = vlib_buffer_get_current (b);
+ }
+
+ for (i = 0; i < 5; i++)
+ {
+ for (j = 0; j < warmup_rounds; j++)
+ {
+ compute_ip_phc_func (p, n_buffers);
+ }
+
+ t0[i] = clib_cpu_time_now ();
+ for (j = 0; j < rounds; j++)
+ compute_ip_phc_func (p, n_buffers);
+ t1[i] = clib_cpu_time_now ();
+ }
+
+ vlib_cli_output (
+ vm, "===========================================================");
+ vlib_cli_output (vm, " Test: %s", phc_test_data->description);
+ vlib_cli_output (
+ vm, "===========================================================");
+ for (i = 0; i < 5; i++)
+ {
+ f64 tpp1 = (f64) (t1[i] - t0[i]) / (n_buffers * rounds);
+ f64 Mpps1 = vm->clib_time.clocks_per_second * 1e-6 / tpp1;
+
+ vlib_cli_output (vm, "%-2u: %.03f ticks/packet, %.02f Mpps\n", i + 1,
+ tpp1, Mpps1);
+ }
+ phc_test_data = phc_test_data->next;
+ }
+
+done:
+ if (n_alloc)
+ vlib_buffer_free (vm, buffer_indices, n_alloc);
+
+ vec_free (p);
+ vec_free (buffer_indices);
+ return err;
+}
+
+static clib_error_t *
+test_phc_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ phc_test_main_t *ptm = &phc_test_main;
+ clib_error_t *err = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose"))
+ ptm->verbose = 1;
+ else if (unformat (input, "detail"))
+ ptm->verbose = 2;
+ else if (unformat (input, "buffers %u", &ptm->n_buffers))
+ ;
+ else if (unformat (input, "buffer-size %u", &ptm->buffer_size))
+ ;
+ else if (unformat (input, "rounds %u", &ptm->rounds))
+ ;
+ else if (unformat (input, "warmup-rounds %u", &ptm->warmup_rounds))
+ ;
+ else
+ {
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+ }
+
+ test_phc_perf (vm, ptm);
+
+ return err;
+}
+
+VLIB_CLI_COMMAND (test_phc_command, static) = {
+ .path = "test phc",
+ .short_help = "test phc [buffers <n>] [buffer-size <size>] [rounds <n>] "
+ "[warmup-rounds <n>]",
+ .function = test_phc_command_fn,
+};
+
+static clib_error_t *
+phc_test_init (vlib_main_t *vm)
+{
+ return (0);
+}
+
+VLIB_INIT_FUNCTION (phc_test_init);
diff --git a/src/plugins/unittest/ipsec_test.c b/src/plugins/unittest/ipsec_test.c
index 0e9865052b4..98253eeb12a 100644
--- a/src/plugins/unittest/ipsec_test.c
+++ b/src/plugins/unittest/ipsec_test.c
@@ -15,10 +15,11 @@
#include <vnet/ipsec/ipsec.h>
#include <vnet/ipsec/ipsec_sa.h>
+#include <vnet/ipsec/ipsec_output.h>
static clib_error_t *
-test_ipsec_command_fn (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
+test_ipsec_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
u64 seq_num;
u32 sa_id;
@@ -47,25 +48,336 @@ test_ipsec_command_fn (vlib_main_t * vm,
sa->seq = seq_num & 0xffffffff;
sa->seq_hi = seq_num >> 32;
+ /* clear the window */
+ if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa))
+ clib_bitmap_zero (sa->replay_window_huge);
+ else
+ sa->replay_window = 0;
+
ipsec_sa_unlock (sa_index);
}
else
{
- return clib_error_return (0, "unknown SA `%U'",
- format_unformat_error, input);
+ return clib_error_return (0, "unknown SA `%U'", format_unformat_error,
+ input);
}
return (NULL);
}
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (test_ipsec_command, static) =
+static clib_error_t *
+test_ipsec_spd_outbound_perf_command_fn (vlib_main_t *vm,
+ unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
+ clib_error_t *err = 0;
+ ipsec_crypto_alg_t crypto_alg = IPSEC_CRYPTO_ALG_AES_GCM_128;
+ ipsec_integ_alg_t integ_alg = IPSEC_INTEG_ALG_NONE;
+ ipsec_protocol_t proto = IPSEC_PROTOCOL_ESP;
+ ipsec_sa_flags_t sa_flags = IPSEC_SA_FLAG_NONE;
+ ipsec_key_t ck = { 0 };
+ u8 key_data[] = { 31, 32, 33, 34, 35, 36, 37, 38,
+ 39, 30, 31, 32, 33, 34, 35, 36 };
+ ipsec_mk_key (&ck, key_data, 16);
+ ipsec_key_t ik = { 0 };
+ u32 sa_id = 123456, spi = 654321, salt = 1234, sai;
+ u16 udp_src = IPSEC_UDP_PORT_NONE, udp_dst = IPSEC_UDP_PORT_NONE;
+ tunnel_t tun = {};
+
+ /* SPD policy */
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *p0 = NULL;
+ ipsec_spd_t *spd0;
+ uword *pp;
+ u32 stat_index, spd_idx, spd_id = 1;
+ int is_add = 1;
+ int rv;
+ ipsec_policy_t *p_vec = NULL;
+ u64 i;
+ u64 flows = 100;
+
+ u64 t_add_0 = 0;
+ u64 t_add_1 = 0;
+ u64 t_add = 0;
+ u64 t_look_0 = 0;
+ u64 t_look_1 = 0;
+ u64 t_look = 0;
+ u8 flow_cache_enabled = im->output_flow_cache_flag;
+ u32 count_cached = 0;
+ u32 count_slow_path = 0;
+ u32 seed = random_default_seed ();
+ u32 *rand_val = NULL;
+ u32 ip4_start;
+#define BURST_MAX_SIZE 256
+ ipsec_policy_t *policies[BURST_MAX_SIZE];
+ ipsec4_spd_5tuple_t ip4_5tuples[BURST_MAX_SIZE];
+ u32 burst_size = 10;
+ int burst_enabled = 0;
+ u64 t0 = clib_cpu_time_now ();
+ u64 t1 = 0;
+ u32 k = 0, m;
+ u64 burst_counter = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "flows %d", &flows))
+ ;
+ else if (unformat (input, "burst %d", &burst_size))
+ {
+ if (burst_size == 0)
+ burst_enabled = 0;
+ else
+ {
+ burst_enabled = 1;
+ burst_size = clib_min (burst_size, BURST_MAX_SIZE);
+ }
+ }
+ else
+ break;
+ }
+
+ vlib_cli_output (vm, "Create env:");
+ /* creating a new SA */
+ rv = ipsec_sa_add_and_lock (sa_id, spi, proto, crypto_alg, &ck, integ_alg,
+ &ik, sa_flags, clib_host_to_net_u32 (salt),
+ udp_src, udp_dst, 0, &tun, &sai);
+ if (rv)
+ {
+ err = clib_error_return (0, "create sa failure");
+ goto done;
+ }
+ else
+ vlib_cli_output (vm, "\tAdd a new SA");
+
+ /* creating a new SPD */
+ rv = ipsec_add_del_spd (vm, spd_id, is_add);
+ if (rv)
+ {
+ err = clib_error_return (0, "create spd failure");
+ goto done;
+ }
+ else
+ vlib_cli_output (vm, "\tAdd a new SPD");
+
+ /* vector for spd_policy */
+ vec_validate (p_vec, flows + 1);
+ vec_validate (rand_val, flows + 1);
+
+ /* fill spd policy */
+ for (i = 0; i < flows; i++)
+ {
+ rand_val[i] = random_u32 (&seed) % flows;
+
+ p_vec[i].type = IPSEC_SPD_POLICY_IP4_OUTBOUND;
+ p_vec[i].priority = flows - i;
+ p_vec[i].policy = IPSEC_POLICY_ACTION_PROTECT;
+ p_vec[i].id = spd_id;
+ p_vec[i].sa_id = sa_id;
+ p_vec[i].protocol = IP_PROTOCOL_UDP;
+ p_vec[i].lport.start = 1;
+ p_vec[i].lport.stop = 1;
+ p_vec[i].rport.start = 1;
+ p_vec[i].rport.stop = 1;
+ /* address: 1.0.0.0 as u32 */
+ ip4_start = 16777216;
+ p_vec[i].laddr.start.ip4.data_u32 =
+ clib_host_to_net_u32 (ip4_start + i * 32);
+ p_vec[i].laddr.stop.ip4.data_u32 =
+ clib_host_to_net_u32 (ip4_start + i * 32);
+ p_vec[i].raddr.start.ip4.data_u32 =
+ clib_host_to_net_u32 (ip4_start + i * 32);
+ p_vec[i].raddr.stop.ip4.data_u32 =
+ clib_host_to_net_u32 (ip4_start + i * 32);
+ }
+
+ vlib_cli_output (vm, "Add SPD Policy");
+ t_add_0 = clib_cpu_time_now ();
+ for (i = 0; i < flows; i++)
+ {
+ rv = ipsec_add_del_policy (vm, &p_vec[i], is_add, &stat_index);
+ if (rv)
+ {
+ clib_warning ("No add SPD Policy: %u", stat_index);
+ err = clib_error_return (0, "add SPD Policy failure");
+ goto done;
+ }
+ }
+ t_add_1 = clib_cpu_time_now ();
+
+ pp = hash_get (im->spd_index_by_spd_id, spd_id);
+ spd_idx = pp[0];
+ spd0 = pool_elt_at_index (im->spds, spd_idx);
+
+ vlib_cli_output (vm, "Lookup SPD Policy");
+ u64 j = 0;
+ u64 n_lookup = 1000 * 1000;
+ t_look_0 = clib_cpu_time_now ();
+ for (i = 0; i < n_lookup; i++)
+ {
+ if (flows == j)
+ j = 0;
+
+ p0 = NULL;
+ if (flow_cache_enabled)
+ {
+ p0 = ipsec4_out_spd_find_flow_cache_entry (
+ im, 0,
+ clib_net_to_host_u32 (ip4_start +
+ ((flows - 1) - rand_val[j]) * 32),
+ clib_net_to_host_u32 (ip4_start +
+ ((flows - 1) - rand_val[j]) * 32),
+ clib_net_to_host_u16 (1), clib_net_to_host_u16 (1));
+ if (p0)
+ count_cached++;
+ }
+ if (p0 == NULL)
+ {
+ if (burst_enabled)
+ {
+ u32 src_addr = (ip4_start + ((flows - 1) - rand_val[j]) * 32);
+ u32 dst_addr = (ip4_start + ((flows - 1) - rand_val[j]) * 32);
+ ipsec4_spd_5tuple_t ip4_5tuple = {
+ .ip4_addr = { (ip4_address_t) src_addr,
+ (ip4_address_t) dst_addr },
+ .port = { 1, 1 },
+ .proto = IP_PROTOCOL_UDP
+ };
+
+ if (k == burst_size)
+ {
+ k = 0;
+ clib_memset (policies, 0,
+ burst_size * sizeof (ipsec_policy_t *));
+ burst_counter += ipsec_output_policy_match_n (
+ spd0, ip4_5tuples, policies, burst_size,
+ flow_cache_enabled);
+ for (m = 0; m < burst_size; m++)
+ {
+ ASSERT (policies[m] != 0);
+ }
+ }
+
+ clib_memcpy (ip4_5tuples + k, &ip4_5tuple,
+ sizeof (ipsec4_spd_5tuple_t));
+ k++;
+ }
+ else
+ {
+
+ p0 = ipsec_output_policy_match (
+ spd0, IP_PROTOCOL_UDP,
+ (ip4_start + ((flows - 1) - rand_val[j]) * 32),
+ (ip4_start + ((flows - 1) - rand_val[j]) * 32), 1, 1,
+ flow_cache_enabled);
+ }
+
+ count_slow_path++;
+ }
+ j++;
+ if (!burst_enabled)
+ ASSERT (p0 != 0);
+ }
+
+ if (burst_enabled && k > 0)
+ {
+ clib_memset (policies, 0, k * sizeof (ipsec_policy_t *));
+ burst_counter += ipsec_output_policy_match_n (
+ spd0, ip4_5tuples, policies, k, flow_cache_enabled);
+ for (m = 0; m < k; m++)
+ {
+ ASSERT (policies[m] != 0);
+ }
+ }
+ t_look_1 = clib_cpu_time_now ();
+
+ t_add = (t_add_1 - t_add_0);
+ t_look = (t_look_1 - t_look_0);
+
+ vlib_cli_output (vm, "Results Outbound:");
+ vlib_cli_output (vm, "Time to add %u flows: \t\t%12.10f s", flows,
+ (t_add / vm->clib_time.clocks_per_second));
+ vlib_cli_output (vm, "Average time to add 1 flow: \t\t%12.10f s",
+ ((t_add / flows) / vm->clib_time.clocks_per_second));
+ vlib_cli_output (vm, "Time to lookup %u flows: \t\t%12.10f s", flows,
+ (t_look / vm->clib_time.clocks_per_second));
+ vlib_cli_output (vm, "Average time to lookup 1 flow: \t\t%12.10f s",
+ ((t_look / n_lookup) / vm->clib_time.clocks_per_second));
+
+ vlib_cli_output (vm, " ");
+
+ vlib_cli_output (vm, "Cycle CPU to add %u flows: \t\t%32lu cycles", flows,
+ t_add);
+ vlib_cli_output (vm, "Average cycle CPU to add 1 flow: \t%32lu cycles",
+ t_add / flows);
+ vlib_cli_output (vm, "Cycle CPU to lookup %u flows: \t%32lu cycles", flows,
+ t_look);
+ vlib_cli_output (vm, "Average cycle CPU to lookup 1 flow: \t%32lu cycles",
+ t_look / n_lookup);
+
+ if (count_slow_path || count_cached)
+ vlib_cli_output (
+ vm, "flow cache hit rate: \t\t%12.10f\n cached: \t%d\n slow_path: \t%d",
+ ((float) count_cached) / ((float) count_cached + count_slow_path),
+ count_cached, count_slow_path);
+
+ if (burst_enabled)
+ vlib_cli_output (vm, "Total number of packets matched in bursts: \t\t%d\n",
+ burst_counter);
+
+done:
+ vlib_cli_output (vm, "Cleaning:");
+ /* delete SPD policy */
+ is_add = 0;
+ for (i = 0; i < flows; i++)
+ {
+ rv = ipsec_add_del_policy (vm, &p_vec[i], is_add, &stat_index);
+ if (rv)
+ {
+ clib_warning ("No delete SPD Policy: %u", i);
+ err = clib_error_return (0, "delete SPD Policy failure");
+ }
+ }
+ vlib_cli_output (vm, "\tDelete all SPD Policy");
+
+ /* delete SPD */
+ rv = ipsec_add_del_spd (vm, spd_id, is_add);
+ if (rv)
+ {
+ err = clib_error_return (0, "delete spd failure");
+ }
+ else
+ vlib_cli_output (vm, "\tDelete SPD");
+
+ /* delete SA */
+ rv = ipsec_sa_unlock_id (sa_id);
+ if (rv)
+ {
+ err = clib_error_return (0, "delete sa failure");
+ }
+ else
+ vlib_cli_output (vm, "\tDelete SA");
+
+ t1 = clib_cpu_time_now ();
+ vlib_cli_output (vm, "Time for test: \t%12.10f s",
+ ((t1 - t0) / vm->clib_time.clocks_per_second));
+
+ vec_free (p_vec);
+ vlib_cli_output (vm, "End");
+
+ return (err);
+}
+
+VLIB_CLI_COMMAND (test_ipsec_spd_perf_command, static) = {
+ .path = "test ipsec_spd_outbound_perf",
+ .short_help = "test ipsec_spd_outbound_perf flows <n_flows>",
+ .function = test_ipsec_spd_outbound_perf_command_fn,
+};
+
+VLIB_CLI_COMMAND (test_ipsec_command, static) = {
.path = "test ipsec",
.short_help = "test ipsec sa <ID> seq-num <VALUE>",
.function = test_ipsec_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/llist_test.c b/src/plugins/unittest/llist_test.c
index a67075de44e..5a712cde33e 100644
--- a/src/plugins/unittest/llist_test.c
+++ b/src/plugins/unittest/llist_test.c
@@ -132,13 +132,11 @@ llist_test_basic (vlib_main_t * vm, unformat_input_t * input)
list_test_is_sane (pelts, ll_test, he);
i--;
- /* *INDENT-OFF* */
clib_llist_foreach (pelts, ll_test, he, e, ({
if (i != e->data)
LLIST_TEST (0, "incorrect element i = %u data = %u", i, e->data);
i--;
}));
- /* *INDENT-ON* */
LLIST_TEST (i == -1, "head insertion works i = %d", i);
@@ -180,13 +178,11 @@ llist_test_basic (vlib_main_t * vm, unformat_input_t * input)
"list should not be empty");
i--;
- /* *INDENT-OFF* */
clib_llist_foreach_reverse (pelts, ll_test2, he2, e, ({
if (i != e->data)
LLIST_TEST (0, "incorrect element i = %u data = %u", i, e->data);
i--;
}));
- /* *INDENT-ON* */
LLIST_TEST (i == -1, "tail insertion works");
/*
@@ -217,13 +213,11 @@ llist_test_basic (vlib_main_t * vm, unformat_input_t * input)
i = 0;
- /* *INDENT-OFF* */
clib_llist_foreach (pelts, ll_test, he, e, ({
if (i != e->data)
LLIST_TEST (0, "incorrect element i = %u data = %u", i, e->data);
i++;
}));
- /* *INDENT-ON* */
LLIST_TEST (i == 100, "move from ll_test2 to ll_test worked i %u", i);
@@ -335,14 +329,12 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (llist_test_command, static) =
{
.path = "test llist",
.short_help = "internal llist unit tests",
.function = llist_test,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/mactime_test.c b/src/plugins/unittest/mactime_test.c
index 46d6263b938..5bc195c9694 100644
--- a/src/plugins/unittest/mactime_test.c
+++ b/src/plugins/unittest/mactime_test.c
@@ -165,14 +165,12 @@ test_time_range_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_time_range_command, static) =
{
.path = "test time-range",
.short_help = "test time-range",
.function = test_time_range_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/mfib_test.c b/src/plugins/unittest/mfib_test.c
index c456d59d7db..5cf821fb5f9 100644
--- a/src/plugins/unittest/mfib_test.c
+++ b/src/plugins/unittest/mfib_test.c
@@ -123,40 +123,31 @@ mfib_test_mk_intf (u32 ninterfaces)
for (i = 0; i < ninterfaces; i++)
{
- hw_address[5] = i;
-
- error = ethernet_register_interface(vnet_get_main(),
- test_interface_device_class.index,
- i /* instance */,
- hw_address,
- &tm->hw_if_indicies[i],
- /* flag change */ 0);
-
- MFIB_TEST((NULL == error), "ADD interface %d", i);
-
- error = vnet_hw_interface_set_flags(vnet_get_main(),
- tm->hw_if_indicies[i],
- VNET_HW_INTERFACE_FLAG_LINK_UP);
- tm->hw[i] = vnet_get_hw_interface(vnet_get_main(),
- tm->hw_if_indicies[i]);
- vec_validate (ip4_main.fib_index_by_sw_if_index,
- tm->hw[i]->sw_if_index);
- vec_validate (ip6_main.fib_index_by_sw_if_index,
- tm->hw[i]->sw_if_index);
- ip4_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
- ip6_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
-
- vec_validate (ip4_main.mfib_index_by_sw_if_index,
- tm->hw[i]->sw_if_index);
- vec_validate (ip6_main.mfib_index_by_sw_if_index,
- tm->hw[i]->sw_if_index);
- ip4_main.mfib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
- ip6_main.mfib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
-
- error = vnet_sw_interface_set_flags(vnet_get_main(),
- tm->hw[i]->sw_if_index,
- VNET_SW_INTERFACE_FLAG_ADMIN_UP);
- MFIB_TEST((NULL == error), "UP interface %d", i);
+ vnet_eth_interface_registration_t eir = {};
+ vnet_main_t *vnm = vnet_get_main ();
+
+ hw_address[5] = i;
+
+ eir.dev_class_index = test_interface_device_class.index;
+ eir.dev_instance = i;
+ eir.address = hw_address;
+ tm->hw_if_indicies[i] = vnet_eth_register_interface (vnm, &eir);
+
+ error =
+ vnet_hw_interface_set_flags (vnet_get_main (), tm->hw_if_indicies[i],
+ VNET_HW_INTERFACE_FLAG_LINK_UP);
+ tm->hw[i] =
+ vnet_get_hw_interface (vnet_get_main (), tm->hw_if_indicies[i]);
+ ip4_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+ ip6_main.fib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+
+ ip4_main.mfib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+ ip6_main.mfib_index_by_sw_if_index[tm->hw[i]->sw_if_index] = 0;
+
+ error =
+ vnet_sw_interface_set_flags (vnet_get_main (), tm->hw[i]->sw_if_index,
+ VNET_SW_INTERFACE_FLAG_ADMIN_UP);
+ MFIB_TEST ((NULL == error), "UP interface %d", i);
}
/*
* re-eval after the inevitable realloc
@@ -413,10 +404,8 @@ mfib_test_i (fib_protocol_t PROTO,
.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT,
};
- mfib_table_entry_path_update(fib_index,
- pfx_no_forward,
- MFIB_SOURCE_API,
- &path_via_if0);
+ mfib_table_entry_path_update (fib_index, pfx_no_forward, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if0);
mfei_no_f = mfib_table_lookup_exact_match(fib_index, pfx_no_forward);
MFIB_TEST(!mfib_test_entry(mfei_no_f,
@@ -464,18 +453,12 @@ mfib_test_i (fib_protocol_t PROTO,
/*
* An (S,G) with 1 accepting and 3 forwarding paths
*/
- mfib_table_entry_path_update(fib_index,
- pfx_s_g,
- MFIB_SOURCE_API,
- &path_via_if0);
- mfib_table_entry_path_update(fib_index,
- pfx_s_g,
- MFIB_SOURCE_API,
- &path_via_if1);
- mfib_table_entry_paths_update(fib_index,
- pfx_s_g,
- MFIB_SOURCE_API,
- two_paths);
+ mfib_table_entry_path_update (fib_index, pfx_s_g, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if0);
+ mfib_table_entry_path_update (fib_index, pfx_s_g, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if1);
+ mfib_table_entry_paths_update (fib_index, pfx_s_g, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, two_paths);
mfei_s_g = mfib_table_lookup_exact_match(fib_index, pfx_s_g);
@@ -504,14 +487,11 @@ mfib_test_i (fib_protocol_t PROTO,
* A (*,G), which the same G as the (S,G).
* different paths. test our LPM.
*/
- mfei_g_1 = mfib_table_entry_path_update(fib_index,
- pfx_star_g_1,
- MFIB_SOURCE_API,
- &path_via_if0);
- mfib_table_entry_path_update(fib_index,
- pfx_star_g_1,
- MFIB_SOURCE_API,
- &path_via_if1);
+ mfei_g_1 =
+ mfib_table_entry_path_update (fib_index, pfx_star_g_1, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if0);
+ mfib_table_entry_path_update (fib_index, pfx_star_g_1, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if1);
/*
* test we find the *,G and S,G via LPM and exact matches
@@ -574,14 +554,12 @@ mfib_test_i (fib_protocol_t PROTO,
* different paths. test our LPM.
*/
path_via_if2.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT;
- mfei_g_m = mfib_table_entry_path_update(fib_index,
- pfx_star_g_slash_m,
- MFIB_SOURCE_API,
- &path_via_if2);
- mfib_table_entry_path_update(fib_index,
- pfx_star_g_slash_m,
- MFIB_SOURCE_API,
- &path_via_if3);
+ mfei_g_m = mfib_table_entry_path_update (
+ fib_index, pfx_star_g_slash_m, MFIB_SOURCE_API, MFIB_ENTRY_FLAG_NONE,
+ &path_via_if2);
+ mfib_table_entry_path_update (fib_index, pfx_star_g_slash_m,
+ MFIB_SOURCE_API, MFIB_ENTRY_FLAG_NONE,
+ &path_via_if3);
/*
* test we find the (*,G/m), (*,G) and (S,G) via LPM and exact matches
@@ -655,10 +633,8 @@ mfib_test_i (fib_protocol_t PROTO,
.frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
};
- mfei = mfib_table_entry_path_update(fib_index,
- pfx_s_g,
- MFIB_SOURCE_API,
- &path_for_us);
+ mfei = mfib_table_entry_path_update (fib_index, pfx_s_g, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_for_us);
MFIB_TEST(!mfib_test_entry(mfei,
MFIB_ENTRY_FLAG_NONE,
@@ -692,10 +668,8 @@ mfib_test_i (fib_protocol_t PROTO,
* - expect it to be removed from the replication set.
*/
path_via_if3.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT;
- mfib_table_entry_path_update(fib_index,
- pfx_s_g,
- MFIB_SOURCE_API,
- &path_via_if3);
+ mfib_table_entry_path_update (fib_index, pfx_s_g, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if3);
MFIB_TEST(!mfib_test_entry(mfei,
MFIB_ENTRY_FLAG_NONE,
@@ -719,10 +693,8 @@ mfib_test_i (fib_protocol_t PROTO,
path_via_if3.frp_mitf_flags = (MFIB_ITF_FLAG_FORWARD |
MFIB_ITF_FLAG_ACCEPT |
MFIB_ITF_FLAG_NEGATE_SIGNAL);
- mfib_table_entry_path_update(fib_index,
- pfx_s_g,
- MFIB_SOURCE_API,
- &path_via_if3);
+ mfib_table_entry_path_update (fib_index, pfx_s_g, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if3);
mfei = mfib_table_lookup_exact_match(fib_index,
pfx_s_g);
@@ -824,7 +796,7 @@ mfib_test_i (fib_protocol_t PROTO,
/* MFIB_TEST_NS(!mfib_test_entry_no_itf(mfei, tm->hw[3]->sw_if_index)); */
/*
- * remove the last path and the accpeting only interface,
+ * remove the last path and the accepting only interface,
* the entry still has flags so it remains
*/
vec_reset_length(two_paths);
@@ -863,10 +835,9 @@ mfib_test_i (fib_protocol_t PROTO,
*/
path_via_if0.frp_mitf_flags = (MFIB_ITF_FLAG_ACCEPT |
MFIB_ITF_FLAG_NEGATE_SIGNAL);
- mfei_g_2 = mfib_table_entry_path_update(fib_index,
- pfx_star_g_2,
- MFIB_SOURCE_API,
- &path_via_if0);
+ mfei_g_2 =
+ mfib_table_entry_path_update (fib_index, pfx_star_g_2, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if0);
MFIB_TEST(!mfib_test_entry(mfei_g_2,
MFIB_ENTRY_FLAG_NONE,
0),
@@ -891,10 +862,9 @@ mfib_test_i (fib_protocol_t PROTO,
*/
path_via_if0.frp_mitf_flags = (MFIB_ITF_FLAG_ACCEPT |
MFIB_ITF_FLAG_NEGATE_SIGNAL);
- mfei_g_3 = mfib_table_entry_path_update(fib_index,
- pfx_star_g_3,
- MFIB_SOURCE_API,
- &path_via_if0);
+ mfei_g_3 =
+ mfib_table_entry_path_update (fib_index, pfx_star_g_3, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if0);
MFIB_TEST(!mfib_test_entry(mfei_g_3,
MFIB_ENTRY_FLAG_NONE,
0),
@@ -1073,14 +1043,12 @@ mfib_test_i (fib_protocol_t PROTO,
.frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
};
- mfei_g_1 = mfib_table_entry_path_update(fib_index,
- pfx_star_g_1,
- MFIB_SOURCE_API,
- &path_via_nbr1);
- mfei_g_1 = mfib_table_entry_path_update(fib_index,
- pfx_star_g_1,
- MFIB_SOURCE_API,
- &path_via_nbr2);
+ mfei_g_1 =
+ mfib_table_entry_path_update (fib_index, pfx_star_g_1, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_nbr1);
+ mfei_g_1 =
+ mfib_table_entry_path_update (fib_index, pfx_star_g_1, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_nbr2);
MFIB_TEST(!mfib_test_entry(mfei_g_1,
MFIB_ENTRY_FLAG_NONE,
2,
@@ -1201,9 +1169,7 @@ mfib_test_i (fib_protocol_t PROTO,
* MPLS enable an interface so we get the MPLS table created
*/
mpls_table_create(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API, NULL);
- mpls_sw_interface_enable_disable(&mpls_main,
- tm->hw[0]->sw_if_index,
- 1, 0);
+ mpls_sw_interface_enable_disable (&mpls_main, tm->hw[0]->sw_if_index, 1);
lfei = fib_table_entry_update_one_path(0, // default MPLS Table
&pfx_3500,
@@ -1241,10 +1207,8 @@ mfib_test_i (fib_protocol_t PROTO,
FIB_FORW_CHAIN_TYPE_MPLS_EOS,
&mldp_dpo);
- mfei = mfib_table_entry_path_update(fib_index,
- pfx_s_g,
- MFIB_SOURCE_API,
- &path_via_mldp);
+ mfei = mfib_table_entry_path_update (fib_index, pfx_s_g, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_mldp);
MFIB_TEST(!mfib_test_entry(mfei,
MFIB_ENTRY_FLAG_NONE,
@@ -1256,10 +1220,8 @@ mfib_test_i (fib_protocol_t PROTO,
/*
* add a for-us path. this tests two types of non-attached paths on one entry
*/
- mfei = mfib_table_entry_path_update(fib_index,
- pfx_s_g,
- MFIB_SOURCE_API,
- &path_for_us);
+ mfei = mfib_table_entry_path_update (fib_index, pfx_s_g, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_for_us);
MFIB_TEST(!mfib_test_entry(mfei,
MFIB_ENTRY_FLAG_NONE,
2,
@@ -1299,9 +1261,7 @@ mfib_test_i (fib_protocol_t PROTO,
/*
* MPLS disable the interface
*/
- mpls_sw_interface_enable_disable(&mpls_main,
- tm->hw[0]->sw_if_index,
- 0, 0);
+ mpls_sw_interface_enable_disable (&mpls_main, tm->hw[0]->sw_if_index, 0);
mpls_table_delete(MPLS_FIB_DEFAULT_TABLE_ID, FIB_SOURCE_API);
/*
@@ -1585,10 +1545,8 @@ mfib_test_rr_i (fib_protocol_t FPROTO,
/*
* Insert the less specific /28
*/
- mfib_table_entry_path_update(fib_index,
- pfx_cover,
- MFIB_SOURCE_API,
- &path_via_if1);
+ mfib_table_entry_path_update (fib_index, pfx_cover, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if1);
mfei_cover = mfib_table_lookup_exact_match(fib_index, pfx_cover);
@@ -1612,10 +1570,8 @@ mfib_test_rr_i (fib_protocol_t FPROTO,
/*
* add another path to the cover
*/
- mfib_table_entry_path_update(fib_index,
- pfx_cover,
- MFIB_SOURCE_API,
- &path_via_if2);
+ mfib_table_entry_path_update (fib_index, pfx_cover, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if2);
/*
* expect the /32 and /28 to be via both boths
@@ -1669,10 +1625,8 @@ mfib_test_rr_i (fib_protocol_t FPROTO,
/*
* add an accepting path to the cover
*/
- mfib_table_entry_path_update(fib_index,
- pfx_cover,
- MFIB_SOURCE_API,
- &path_via_if0);
+ mfib_table_entry_path_update (fib_index, pfx_cover, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if0);
/*
* expect the /32 and /28 to be via both boths
@@ -1707,10 +1661,8 @@ mfib_test_rr_i (fib_protocol_t FPROTO,
/*
* add a for-us path to the cover
*/
- mfib_table_entry_path_update(fib_index,
- pfx_cover,
- MFIB_SOURCE_API,
- &path_for_us);
+ mfib_table_entry_path_update (fib_index, pfx_cover, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_for_us);
/*
* expect the /32 and /28 to be via all three paths
@@ -1778,10 +1730,9 @@ mfib_test_rr_i (fib_protocol_t FPROTO,
/*
* source the /32 with its own path
*/
- mfei_host1 = mfib_table_entry_path_update(fib_index,
- pfx_host1,
- MFIB_SOURCE_API,
- &path_via_if2);
+ mfei_host1 =
+ mfib_table_entry_path_update (fib_index, pfx_host1, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if2);
MFIB_TEST(!mfib_test_entry(mfei_host1,
MFIB_ENTRY_FLAG_NONE,
1,
@@ -1811,10 +1762,9 @@ mfib_test_rr_i (fib_protocol_t FPROTO,
/*
* add the RR back then remove the path and RR
*/
- mfei_host1 = mfib_table_entry_path_update(fib_index,
- pfx_host1,
- MFIB_SOURCE_API,
- &path_via_if2);
+ mfei_host1 =
+ mfib_table_entry_path_update (fib_index, pfx_host1, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &path_via_if2);
MFIB_TEST(!mfib_test_entry(mfei_host1,
MFIB_ENTRY_FLAG_NONE,
1,
diff --git a/src/plugins/unittest/mpcap_node.c b/src/plugins/unittest/mpcap_node.c
index 083c22e32df..12c62cf13cd 100644
--- a/src/plugins/unittest/mpcap_node.c
+++ b/src/plugins/unittest/mpcap_node.c
@@ -215,7 +215,6 @@ VLIB_NODE_FN (mpcap_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
0 /* is_trace */ );
}
-/* *INDENT-OFF* */
#ifndef CLIB_MARCH_VARIANT
VLIB_REGISTER_NODE (mpcap_node) =
{
@@ -249,7 +248,6 @@ mpcap_node_init (vlib_main_t *vm)
VLIB_INIT_FUNCTION (mpcap_node_init);
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/pool_test.c b/src/plugins/unittest/pool_test.c
index 237b6beea09..23ac6d6d95f 100644
--- a/src/plugins/unittest/pool_test.c
+++ b/src/plugins/unittest/pool_test.c
@@ -19,29 +19,37 @@ static clib_error_t *
test_pool_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
- int i;
+ static int sizes[] = { 3, 31, 2042, 2048 };
+
+ int i, j;
u64 *pool;
+ uword this_size;
- pool_init_fixed (pool, 2048);
+ for (j = 0; j < ARRAY_LEN (sizes); j++)
+ {
+ this_size = sizes[j];
- i = 0;
+ pool_init_fixed (pool, this_size);
- while (pool_free_elts (pool) > 0)
- {
- u64 *p __attribute__ ((unused));
+ i = 0;
- pool_get (pool, p);
- i++;
- }
+ while (pool_free_elts (pool) > 0)
+ {
+ u64 *p __attribute__ ((unused));
- vlib_cli_output (vm, "allocated %d elts\n", i);
+ pool_get (pool, p);
+ i++;
+ }
- for (--i; i >= 0; i--)
- {
- pool_put_index (pool, i);
- }
+ vlib_cli_output (vm, "allocated %d elts\n", i);
- ALWAYS_ASSERT (pool_free_elts (pool) == 2048);
+ for (--i; i >= 0; i--)
+ {
+ pool_put_index (pool, i);
+ }
+
+ ALWAYS_ASSERT (pool_free_elts (pool) == this_size);
+ }
vlib_cli_output (vm, "Test succeeded...\n");
return 0;
diff --git a/src/plugins/unittest/punt_test.c b/src/plugins/unittest/punt_test.c
index 0c4622283e0..7d00e5b8920 100644
--- a/src/plugins/unittest/punt_test.c
+++ b/src/plugins/unittest/punt_test.c
@@ -129,7 +129,6 @@ punt_test_pg1_ip6 (vlib_main_t * vm,
return (punt_test_fwd (vm, node, frame, FIB_PROTOCOL_IP6, SW_IF_INDEX_PG1));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (punt_test_pg0_ip4_node) = {
.function = punt_test_pg0_ip4,
.name = "punt-test-pg0-ip4",
@@ -154,7 +153,6 @@ VLIB_REGISTER_NODE (punt_test_pg1_ip6_node) = {
.vector_size = sizeof (u32),
.format_trace = format_punt_trace,
};
-/* *INDENT-ON* */
typedef struct punt_feat_trace_t_
{
@@ -242,7 +240,6 @@ punt_test_feat_ip6 (vlib_main_t * vm,
return (punt_test_feat_inline (vm, node, frame, 0));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (punt_test_feat_ip6_node) = {
.function = punt_test_feat_ip6,
.name = "punt-test-feat-ip6",
@@ -273,7 +270,6 @@ VNET_FEATURE_INIT (punt_test_feat_ip4_feature, static) =
.arc_name = "ip4-unicast",
.node_name = "punt-test-feat-ip4",
};
-/* *INDENT-ON* */
static clib_error_t *
punt_test (vlib_main_t * vm,
@@ -382,14 +378,12 @@ punt_test (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_fib_command, static) =
{
.path = "test punt",
.short_help = "punt unit tests - DO NOT RUN ON A LIVE SYSTEM",
.function = punt_test,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/rbtree_test.c b/src/plugins/unittest/rbtree_test.c
index bfab98c3cd7..4a1fcc4dd70 100644
--- a/src/plugins/unittest/rbtree_test.c
+++ b/src/plugins/unittest/rbtree_test.c
@@ -238,14 +238,12 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (rbtree_test_command, static) =
{
.path = "test rbtree",
.short_help = "internal rbtree unit tests",
.function = rbtree_test,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/segment_manager_test.c b/src/plugins/unittest/segment_manager_test.c
index 31b417aef24..a106470ee48 100644
--- a/src/plugins/unittest/segment_manager_test.c
+++ b/src/plugins/unittest/segment_manager_test.c
@@ -79,7 +79,6 @@ placeholder_server_rx_callback (session_t * s)
return -1;
}
-/* *INDENT-OFF* */
static session_cb_vft_t placeholder_session_cbs = {
.session_reset_callback = placeholder_session_reset_callback,
.session_connected_callback = placeholder_session_connected_callback,
@@ -89,7 +88,6 @@ static session_cb_vft_t placeholder_session_cbs = {
.add_segment_callback = placeholder_add_segment_callback,
.del_segment_callback = placeholder_del_segment_callback,
};
-/* *INDENT-ON* */
static char *states_str[] = {
#define _(sym,str) str,
@@ -178,14 +176,14 @@ segment_manager_test_pressure_1 (vlib_main_t * vm, unformat_input_t * input)
svm_fifo_enqueue (rx_fifo, fifo_size, data);
svm_fifo_enqueue (tx_fifo, fifo_size, data);
svm_fifo_enqueue (tx_fifo, fifo_size, data);
- svm_fifo_enqueue (tx_fifo, fifo_size, data);
- /* 8 chunks : 49% */
+ /* 7 chunks : ~44% */
rv = fifo_segment_get_mem_status (fs);
SEG_MGR_TEST ((rv == MEMORY_PRESSURE_NO_PRESSURE),
"fifo_segment_get_mem_status %s", states_str[rv]);
/* grow fifos */
+ svm_fifo_enqueue (tx_fifo, fifo_size, data);
svm_fifo_enqueue (rx_fifo, fifo_size, data);
svm_fifo_enqueue (tx_fifo, fifo_size, data);
@@ -212,7 +210,7 @@ segment_manager_test_pressure_1 (vlib_main_t * vm, unformat_input_t * input)
svm_fifo_dequeue_drop (tx_fifo, fifo_size);
svm_fifo_dequeue_drop (tx_fifo, fifo_size);
- /* 10 chunks : 61% */
+ /* 10 chunks : 63% */
rv = fifo_segment_get_mem_status (fs);
SEG_MGR_TEST ((rv == MEMORY_PRESSURE_LOW_PRESSURE),
"fifo_segment_get_mem_status %s", states_str[rv]);
@@ -224,7 +222,7 @@ segment_manager_test_pressure_1 (vlib_main_t * vm, unformat_input_t * input)
svm_fifo_enqueue (tx_fifo, fifo_size, data);
svm_fifo_enqueue (tx_fifo, fifo_size, data);
- /* 14 chunks : 85% */
+ /* 14 chunks : 88% */
rv = fifo_segment_get_mem_status (fs);
SEG_MGR_TEST ((rv == MEMORY_PRESSURE_HIGH_PRESSURE),
"fifo_segment_get_mem_status %s", states_str[rv]);
@@ -234,8 +232,7 @@ segment_manager_test_pressure_1 (vlib_main_t * vm, unformat_input_t * input)
svm_fifo_dequeue_drop (tx_fifo, fifo_size);
svm_fifo_dequeue_drop (tx_fifo, fifo_size);
-
- /* 10 chunks : 61% */
+ /* 10 chunks : 63% */
rv = fifo_segment_get_mem_status (fs);
SEG_MGR_TEST ((rv == MEMORY_PRESSURE_LOW_PRESSURE),
"fifo_segment_get_mem_status %s", states_str[rv]);
@@ -285,7 +282,7 @@ segment_manager_test_pressure_2 (vlib_main_t * vm, unformat_input_t * input)
.options = options,
.namespace_id = 0,
.session_cb_vft = &placeholder_session_cbs,
- .name = format (0, "segment_manager_test_pressure_1"),
+ .name = format (0, "segment_manager_test_pressure_2"),
};
attach_args.options[APP_OPTIONS_SEGMENT_SIZE] = app_seg_size;
@@ -313,8 +310,8 @@ segment_manager_test_pressure_2 (vlib_main_t * vm, unformat_input_t * input)
&rx_fifo, &tx_fifo);
SEG_MGR_TEST ((rv == 0), "segment_manager_alloc_session_fifos %d", rv);
- svm_fifo_set_size (rx_fifo, size_2MB);
- svm_fifo_set_size (tx_fifo, size_2MB);
+ svm_fifo_set_size (rx_fifo, size_1MB);
+ svm_fifo_set_size (tx_fifo, size_1MB);
/* fill fifos (but not add chunks) */
svm_fifo_enqueue (rx_fifo, fifo_size - 1, data);
@@ -326,9 +323,10 @@ segment_manager_test_pressure_2 (vlib_main_t * vm, unformat_input_t * input)
for (i = 0; i < 509; ++i)
{
svm_fifo_enqueue (rx_fifo, fifo_size, data);
+ svm_fifo_enqueue (tx_fifo, fifo_size, data);
}
- /* 510 chunks : 100% of 2MB */
+ /* 100% of 2MB */
rv = fifo_segment_get_mem_status (fs);
SEG_MGR_TEST ((rv == MEMORY_PRESSURE_HIGH_PRESSURE),
"fifo_segment_get_mem_status %s", states_str[rv]);
@@ -337,24 +335,22 @@ segment_manager_test_pressure_2 (vlib_main_t * vm, unformat_input_t * input)
rv = svm_fifo_enqueue (rx_fifo, fifo_size, data);
SEG_MGR_TEST ((rv == SVM_FIFO_EGROW), "svm_fifo_enqueue %d", rv);
- /* then, no-memory is detected */
- rv = fifo_segment_get_mem_status (fs);
- SEG_MGR_TEST ((rv == MEMORY_PRESSURE_NO_MEMORY),
- "fifo_segment_get_mem_status %s", states_str[rv]);
-
/* shrink fifos */
for (i = 0; i < 20; ++i)
{
svm_fifo_dequeue_drop (rx_fifo, fifo_size);
+ svm_fifo_dequeue_drop (tx_fifo, fifo_size);
}
/* 489 chunks : 96%, it is high-pressure level
* but the reached-mem-limit record is not reset
* so the no-memory state lasts.
*/
- rv = fifo_segment_get_mem_status (fs);
- SEG_MGR_TEST ((rv == MEMORY_PRESSURE_NO_MEMORY),
- "fifo_segment_get_mem_status %s", states_str[rv]);
+ /*
+ rv = fifo_segment_get_mem_status (fs);
+ SEG_MGR_TEST ((rv == MEMORY_PRESSURE_NO_MEMORY),
+ "fifo_segment_get_mem_status %s", states_str[rv]);
+ */
/* shrink fifos */
for (i = 0; i < 133; ++i)
@@ -368,9 +364,10 @@ segment_manager_test_pressure_2 (vlib_main_t * vm, unformat_input_t * input)
"fifo_segment_get_mem_status %s", states_str[rv]);
/* shrink fifos */
- for (i = 0; i < 354; ++i)
+ for (i = 0; i < 360; ++i)
{
svm_fifo_dequeue_drop (rx_fifo, fifo_size);
+ svm_fifo_dequeue_drop (tx_fifo, fifo_size);
}
/* 2 chunks : 3% of 2MB */
@@ -409,7 +406,7 @@ segment_manager_test_fifo_balanced_alloc (vlib_main_t * vm,
.options = options,
.namespace_id = 0,
.session_cb_vft = &placeholder_session_cbs,
- .name = format (0, "segment_manager_test_pressure_1"),
+ .name = format (0, "segment_manager_test_fifo_balanced_alloc"),
};
attach_args.options[APP_OPTIONS_SEGMENT_SIZE] = app_seg_size;
@@ -509,8 +506,9 @@ segment_manager_test_fifo_balanced_alloc (vlib_main_t * vm,
return 0;
}
-static int
-segment_manager_test_fifo_ops (vlib_main_t * vm, unformat_input_t * input)
+/* disabled until fifo tuning and memory pressure are properly working */
+__clib_unused static int
+segment_manager_test_fifo_ops (vlib_main_t *vm, unformat_input_t *input)
{
int rv, i;
segment_manager_t *sm;
@@ -689,7 +687,7 @@ segment_manager_test_prealloc_hdrs (vlib_main_t * vm,
{
u32 fifo_size = size_4KB, prealloc_hdrs, sm_index, fs_index;
u64 options[APP_OPTIONS_N_OPTIONS];
- uword app_seg_size = size_2MB;
+ uword app_seg_size = size_2MB * 2;
segment_manager_t *sm;
fifo_segment_t *fs;
int rv;
@@ -701,10 +699,10 @@ segment_manager_test_prealloc_hdrs (vlib_main_t * vm,
.options = options,
.namespace_id = 0,
.session_cb_vft = &placeholder_session_cbs,
- .name = format (0, "segment_manager_prealloc_hdrs"),
+ .name = format (0, "segment_manager_test_prealloc_hdrs"),
};
- prealloc_hdrs = (app_seg_size - (16 << 10)) / sizeof (svm_fifo_t);
+ prealloc_hdrs = 64;
attach_args.options[APP_OPTIONS_SEGMENT_SIZE] = app_seg_size;
attach_args.options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
@@ -752,8 +750,6 @@ segment_manager_test (vlib_main_t * vm,
res = segment_manager_test_pressure_2 (vm, input);
else if (unformat (input, "alloc"))
res = segment_manager_test_fifo_balanced_alloc (vm, input);
- else if (unformat (input, "fifo_ops"))
- res = segment_manager_test_fifo_ops (vm, input);
else if (unformat (input, "prealloc_hdrs"))
res = segment_manager_test_prealloc_hdrs (vm, input);
@@ -765,8 +761,6 @@ segment_manager_test (vlib_main_t * vm,
goto done;
if ((res = segment_manager_test_fifo_balanced_alloc (vm, input)))
goto done;
- if ((res = segment_manager_test_fifo_ops (vm, input)))
- goto done;
if ((res = segment_manager_test_prealloc_hdrs (vm, input)))
goto done;
}
@@ -780,7 +774,6 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tcp_test_command, static) =
{
.path = "test segment-manager",
diff --git a/src/plugins/unittest/session_test.c b/src/plugins/unittest/session_test.c
index b54712c3885..b7627acc129 100644
--- a/src/plugins/unittest/session_test.c
+++ b/src/plugins/unittest/session_test.c
@@ -107,7 +107,6 @@ placeholder_server_rx_callback (session_t * s)
return -1;
}
-/* *INDENT-OFF* */
static session_cb_vft_t placeholder_session_cbs = {
.session_reset_callback = placeholder_session_reset_callback,
.session_connected_callback = placeholder_session_connected_callback,
@@ -117,7 +116,6 @@ static session_cb_vft_t placeholder_session_cbs = {
.add_segment_callback = placeholder_add_segment_callback,
.del_segment_callback = placeholder_del_segment_callback,
};
-/* *INDENT-ON* */
static int
session_create_lookpback (u32 table_id, u32 * sw_if_index,
@@ -136,7 +134,7 @@ session_create_lookpback (u32 table_id, u32 * sw_if_index,
if (table_id != 0)
{
ip_table_create (FIB_PROTOCOL_IP4, table_id, 0, 0);
- ip_table_bind (FIB_PROTOCOL_IP4, *sw_if_index, table_id, 0);
+ ip_table_bind (FIB_PROTOCOL_IP4, *sw_if_index, table_id);
}
vnet_sw_interface_set_flags (vnet_get_main (), *sw_if_index,
@@ -290,11 +288,11 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input)
/*
* Create the loopbacks
*/
- intf_addr[0].as_u32 = clib_host_to_net_u32 (0x01010101),
- session_create_lookpback (0, &sw_if_index[0], &intf_addr[0]);
+ intf_addr[0].as_u32 = clib_host_to_net_u32 (0x01010101);
+ session_create_lookpback (0, &sw_if_index[0], &intf_addr[0]);
- intf_addr[1].as_u32 = clib_host_to_net_u32 (0x02020202),
- session_create_lookpback (1, &sw_if_index[1], &intf_addr[1]);
+ intf_addr[1].as_u32 = clib_host_to_net_u32 (0x02020202);
+ session_create_lookpback (1, &sw_if_index[1], &intf_addr[1]);
session_add_del_route_via_lookup_in_table (0, 1, &intf_addr[1], 32,
1 /* is_add */ );
@@ -404,14 +402,6 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input)
SESSION_TEST ((tc->lcl_port == placeholder_client_port),
"ports should be equal");
- /* These sessions, because of the way they're established are pinned to
- * main thread, even when we have workers and we avoid polling main thread,
- * i.e., we can't cleanup pending disconnects, so force cleanup for both
- */
- session_transport_cleanup (s);
- s = session_get (accepted_session_index, accepted_session_thread);
- session_transport_cleanup (s);
-
vnet_app_detach_args_t detach_args = {
.app_index = server_index,
.api_client_index = ~0,
@@ -420,6 +410,10 @@ session_test_endpoint_cfg (vlib_main_t * vm, unformat_input_t * input)
detach_args.app_index = client_index;
vnet_application_detach (&detach_args);
+ ns_args.is_add = 0;
+ error = vnet_app_namespace_add_del (&ns_args);
+ SESSION_TEST ((error == 0), "app ns delete should succeed: %d", error);
+
/* Allow the disconnects to finish before removing the routes. */
vlib_process_suspend (vm, 10e-3);
@@ -531,7 +525,7 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input)
error = vnet_application_attach (&attach_args);
SESSION_TEST ((error != 0), "app attachment should fail");
- SESSION_TEST ((error == VNET_API_ERROR_APP_WRONG_NS_SECRET),
+ SESSION_TEST ((error == SESSION_E_WRONG_NS_SECRET),
"code should be wrong ns secret: %d", error);
/*
@@ -766,6 +760,10 @@ session_test_namespace (vlib_main_t * vm, unformat_input_t * input)
detach_args.app_index = server_index;
vnet_application_detach (&detach_args);
+ ns_args.is_add = 0;
+ error = vnet_app_namespace_add_del (&ns_args);
+ SESSION_TEST ((error == 0), "app ns delete should succeed: %d", error);
+
/*
* Cleanup
*/
@@ -1601,6 +1599,10 @@ session_test_rules (vlib_main_t * vm, unformat_input_t * input)
detach_args.app_index = server_index2;
vnet_application_detach (&detach_args);
+ ns_args.is_add = 0;
+ error = vnet_app_namespace_add_del (&ns_args);
+ SESSION_TEST ((error == 0), "app ns delete should succeed: %d", error);
+
vec_free (ns_id);
vec_free (attach_args.name);
return 0;
@@ -1621,6 +1623,7 @@ session_test_proxy (vlib_main_t * vm, unformat_input_t * input)
u16 lcl_port = 1234, rmt_port = 4321;
app_namespace_t *app_ns;
int verbose = 0, error = 0;
+ app_listener_t *al;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
@@ -1695,8 +1698,9 @@ session_test_proxy (vlib_main_t * vm, unformat_input_t * input)
SESSION_TEST ((tc != 0), "lookup 1.2.3.4 1234 5.6.7.8 4321 should be "
"successful");
s = listen_session_get (tc->s_index);
- SESSION_TEST ((s->app_index == server_index), "lookup should return"
- " the server");
+ al = app_listener_get (s->al_index);
+ SESSION_TEST ((al->app_index == server_index), "lookup should return"
+ " the server");
tc = session_lookup_connection_wt4 (0, &rmt_ip, &rmt_ip, lcl_port, rmt_port,
TRANSPORT_PROTO_TCP, 0, &is_filtered);
@@ -1767,6 +1771,74 @@ wait_for_event (svm_msg_q_t * mq, int fd, int epfd, u8 use_eventfd)
}
}
+/* Used to be part of application_worker.c prior to adding support for
+ * async rx
+ */
+static int
+test_mq_try_lock_and_alloc_msg (svm_msg_q_t *mq, session_mq_rings_e ring,
+ svm_msg_q_msg_t *msg)
+{
+ int rv, n_try = 0;
+
+ while (n_try < 75)
+ {
+ rv = svm_msg_q_lock_and_alloc_msg_w_ring (mq, ring, SVM_Q_NOWAIT, msg);
+ if (!rv)
+ return 0;
+ /*
+ * Break the loop if mq is full, usually this is because the
+ * app has crashed or is hanging on somewhere.
+ */
+ if (rv != -1)
+ break;
+ n_try += 1;
+ usleep (1);
+ }
+
+ return -1;
+}
+
+/* Used to be part of application_worker.c prior to adding support for
+ * async rx and was used for delivering io events over mq
+ * NB: removed handling of mq congestion
+ */
+static inline int
+test_app_send_io_evt_rx (app_worker_t *app_wrk, session_t *s)
+{
+ svm_msg_q_msg_t _mq_msg = { 0 }, *mq_msg = &_mq_msg;
+ session_event_t *evt;
+ svm_msg_q_t *mq;
+ u32 app_session;
+ int rv;
+
+ if (app_worker_application_is_builtin (app_wrk))
+ return app_worker_rx_notify (app_wrk, s);
+
+ if (svm_fifo_has_event (s->rx_fifo))
+ return 0;
+
+ app_session = s->rx_fifo->shr->client_session_index;
+ mq = app_wrk->event_queue;
+
+ rv = test_mq_try_lock_and_alloc_msg (mq, SESSION_MQ_IO_EVT_RING, mq_msg);
+
+ if (PREDICT_FALSE (rv))
+ {
+ clib_warning ("failed to alloc mq message");
+ return -1;
+ }
+
+ evt = svm_msg_q_msg_data (mq, mq_msg);
+ evt->event_type = SESSION_IO_EVT_RX;
+ evt->session_index = app_session;
+
+ (void) svm_fifo_set_event (s->rx_fifo);
+
+ svm_msg_q_add_and_unlock (mq, mq_msg);
+
+ return 0;
+}
+
static int
session_test_mq_speed (vlib_main_t * vm, unformat_input_t * input)
{
@@ -1881,7 +1953,7 @@ session_test_mq_speed (vlib_main_t * vm, unformat_input_t * input)
{
while (svm_fifo_has_event (rx_fifo))
;
- app_worker_lock_and_send_event (app_wrk, &s, SESSION_IO_EVT_RX);
+ test_app_send_io_evt_rx (app_wrk, &s);
}
}
@@ -1930,7 +2002,7 @@ session_test_mq_basic (vlib_main_t * vm, unformat_input_t * input)
smq = svm_msg_q_alloc (cfg);
svm_msg_q_attach (mq, smq);
- SESSION_TEST (mq != 0, "svm_msg_q_alloc");
+ SESSION_TEST (smq != 0, "svm_msg_q_alloc");
SESSION_TEST (vec_len (mq->rings) == 2, "ring allocation");
rings_ptr = (u8 *) mq->rings[0].shr->data;
vec_foreach (ring, mq->rings)
@@ -2056,14 +2128,12 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tcp_test_command, static) =
{
.path = "test session",
.short_help = "internal session unit tests",
.function = session_test,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/sparse_vec_test.c b/src/plugins/unittest/sparse_vec_test.c
index bb875452cdf..b2239c64a18 100644
--- a/src/plugins/unittest/sparse_vec_test.c
+++ b/src/plugins/unittest/sparse_vec_test.c
@@ -55,14 +55,12 @@ test_sparse_vec_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_sparse_vec_command, static) =
{
.path = "test sparse_vec",
.short_help = "test sparse_vec",
.function = test_sparse_vec_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/string_test.c b/src/plugins/unittest/string_test.c
index d3924185232..3b39bf56478 100644
--- a/src/plugins/unittest/string_test.c
+++ b/src/plugins/unittest/string_test.c
@@ -508,7 +508,7 @@ test_clib_strncmp (vlib_main_t * vm, unformat_input_t * input)
/* unterminated s1 */
s1[s1len] = 0x1;
- CLIB_MEM_UNPOISON (s1, CLIB_STRING_MACRO_MAX);
+ clib_mem_unpoison (s1, CLIB_STRING_MACRO_MAX);
indicator = clib_strncmp (s1, "Every moment is a fresh beginning",
sizeof ("every moment is a fresh beginning") - 1);
if (indicator != 0)
@@ -574,60 +574,6 @@ test_strcpy_s (vlib_main_t * vm, unformat_input_t * input)
}
static int
-test_clib_strcpy (vlib_main_t * vm, unformat_input_t * input)
-{
- char src[] = "The journey of a one thousand miles begins with one step.";
- char dst[100];
- int indicator;
- errno_t err;
-
- vlib_cli_output (vm, "Test clib_strcpy...");
-
- err = clib_strcpy (dst, src);
- if (err != EOK)
- return -1;
-
- /* This better not fail but check anyhow */
- if (strcmp_s (dst, clib_strnlen (dst, sizeof (dst)), src, &indicator) !=
- EOK)
- return -1;
- if (indicator != 0)
- return -1;
-
- /* verify it against strcpy */
- strcpy (dst, src); //NOSONAR
-
- /* This better not fail but check anyhow */
- if (strcmp_s (dst, clib_strnlen (dst, sizeof (dst)), src, &indicator) !=
- EOK)
- return -1;
- if (indicator != 0)
- return -1;
-
- /* Negative tests */
-
- err = clib_strcpy (0, 0);
- if (err == EOK)
- return -1;
-
- /* overlap fail */
-#if __GNUC__ < 8
- /* GCC 8 flunks this one at compile time... */
- err = clib_strcpy (dst, dst);
- if (err == EOK)
- return -1;
-#endif
-
- /* overlap fail */
- err = clib_strcpy (dst, dst + 1);
- if (err == EOK)
- return -1;
-
- /* OK, seems to work */
- return 0;
-}
-
-static int
test_strncpy_s (vlib_main_t * vm, unformat_input_t * input)
{
char src[] = "Those who dare to fail miserably can achieve greatly.";
@@ -904,71 +850,6 @@ test_strcat_s (vlib_main_t * vm, unformat_input_t * input)
}
static int
-test_clib_strcat (vlib_main_t * vm, unformat_input_t * input)
-{
- char src[100], dst[100], old_dst[100];
- size_t s1size = sizeof (dst); // including null
- errno_t err;
- int indicator;
-
- vlib_cli_output (vm, "Test clib_strcat...");
-
- strcpy_s (dst, sizeof (dst), "Tough time never last ");
- strcpy_s (src, sizeof (src), "but tough people do");
- err = clib_strcat (dst, src);
- if (err != EOK)
- return -1;
- if (strcmp_s (dst, s1size - 1,
- "Tough time never last but tough people do",
- &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
- /* verify it against strcat */
- strcpy_s (dst, sizeof (dst), "Tough time never last ");
- strcpy_s (src, sizeof (src), "but tough people do");
- strcat (dst, src);
- if (strcmp_s (dst, s1size - 1,
- "Tough time never last but tough people do",
- &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
-
- /* empty string concatenation */
- clib_strncpy (old_dst, dst, clib_strnlen (dst, sizeof (dst)));
- err = clib_strcat (dst, "");
- if (err != EOK)
- return -1;
- /* verify dst is untouched */
- if (strcmp_s (dst, s1size - 1, old_dst, &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
-
- /* negative stuff */
- err = clib_strcat (0, 0);
- if (err != EINVAL)
- return -1;
-
- /* overlap fail */
- err = clib_strcat (dst, dst + 1);
- if (err != EINVAL)
- return -1;
-
- /* overlap fail */
-#if __GNUC__ < 8
- /* GCC 8 flunks this one at compile time... */
- err = clib_strcat (dst, dst);
- if (err != EINVAL)
- return -1;
-#endif
-
- /* OK, seems to work */
- return 0;
-}
-
-static int
test_strncat_s (vlib_main_t * vm, unformat_input_t * input)
{
char src[100], dst[100], old_dst[100];
@@ -1096,126 +977,6 @@ test_strncat_s (vlib_main_t * vm, unformat_input_t * input)
}
static int
-test_clib_strncat (vlib_main_t * vm, unformat_input_t * input)
-{
- char src[100], dst[100], old_dst[100];
- size_t s1size = sizeof (dst); // including null
- errno_t err;
- char s1[] = "Two things are infinite: ";
- char s2[] = "the universe and human stupidity; ";
- int indicator;
-
- vlib_cli_output (vm, "Test clib_strncat...");
-
- /* n == strlen src */
- strcpy_s (dst, sizeof (dst), s1);
- strcpy_s (src, sizeof (src), s2);
- err = clib_strncat (dst, src, clib_strnlen (src, sizeof (src)));
- if (err != EOK)
- return -1;
- if (strcmp_s (dst, s1size - 1,
- "Two things are infinite: the universe and human stupidity; ",
- &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
- /* verify it against strncat */
- strcpy_s (dst, sizeof (dst), s1);
- strncat (dst, src, clib_strnlen (src, sizeof (src)));
- if (strcmp_s (dst, s1size - 1,
- "Two things are infinite: the universe and human stupidity; ",
- &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
-
- /* n > strlen src */
- strcpy_s (dst, sizeof (dst), s1);
- err = clib_strncat (dst, src, clib_strnlen (src, sizeof (src)) + 10);
- if (err != EOK)
- return -1;
- if (strcmp_s (dst, s1size - 1,
- "Two things are infinite: the universe and human stupidity; ",
- &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
- /* verify it against strncat */
- strcpy_s (dst, sizeof (dst), s1);
- strncat (dst, src, clib_strnlen (src, sizeof (src)));
- if (strcmp_s (dst, s1size - 1,
- "Two things are infinite: the universe and human stupidity; ",
- &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
-
- /* zero length strncat */
- clib_strncpy (old_dst, dst, clib_strnlen (dst, sizeof (dst)));
- err = clib_strncat (dst, src, 0);
- if (err != EOK)
- return -1;
- /* verify dst is untouched */
- if (strcmp_s (dst, s1size - 1, old_dst, &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
-
- /* empty string, wrong n concatenation */
- err = clib_strncat (dst, "", 10);
- if (err != EOK)
- return -1;
- /* verify dst is untouched */
- if (strcmp_s (dst, s1size - 1, old_dst, &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
-
- /* limited concatenation, string > n, copy up to n */
- strcpy_s (dst, sizeof (dst), s1);
- err = clib_strncat (dst, s2, 13);
- if (err != EOK)
- return -1;
- if (strcmp_s (dst, s1size - 1, "Two things are infinite: the universe ",
- &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
- /* verify it against strncat */
-#if __GNUC__ < 8
- /* GCC 8 debian flunks this one at compile time */
- strcpy_s (dst, sizeof (dst), s1);
- strncat (dst, s2, 13);
- if (strcmp_s (dst, s1size - 1, "Two things are infinite: the universe ",
- &indicator) != EOK)
- return -1;
- if (indicator != 0)
- return -1;
-#endif
-
- /* negative stuff */
- err = clib_strncat (0, 0, 1);
- if (err != EINVAL)
- return -1;
-
- /* overlap fail */
- err = clib_strncat (dst, dst + 1, s1size - 1);
- if (err != EINVAL)
- return -1;
-
- /* overlap fail */
-#if __GNUC__ < 8
- /* GCC 8 flunks this one at compile time... */
- err = clib_strncat (dst, dst, clib_strnlen (dst, sizeof (dst)));
- if (err != EINVAL)
- return -1;
-#endif
-
- /* OK, seems to work */
- return 0;
-}
-
-static int
test_strtok_s (vlib_main_t * vm, unformat_input_t * input)
{
int indicator;
@@ -1540,191 +1301,27 @@ test_strstr_s (vlib_main_t * vm, unformat_input_t * input)
return 0;
}
-static int
-test_clib_strstr (vlib_main_t * vm, unformat_input_t * input)
-{
- char *sub, *s;
- char s1[64];
- size_t s1len = sizeof (s1) - 1; // excluding null
- int indicator;
-
- vlib_cli_output (vm, "Test clib_strstr...");
-
- /* substring not present */
- strcpy_s (s1, s1len, "success is not final, failure is not fatal.");
- sub = clib_strstr (s1, "failures");
- if (sub != 0)
- return -1;
- /* verify it against strstr */
- sub = strstr (s1, "failures");
- if (sub != 0)
- return -1;
-
- /* substring present */
- sub = clib_strstr (s1, "failure");
- if (sub == 0)
- return -1;
- if (strcmp_s (sub, strlen (sub), "failure is not fatal.", &indicator) !=
- EOK)
- return -1;
- if (indicator != 0)
- return -1;
- /* verify it against strstr */
- sub = strstr (s1, "failure");
- if (sub == 0)
- return -1;
- if (strcmp_s (sub, strlen (sub), "failure is not fatal.", &indicator) !=
- EOK)
- return -1;
- if (indicator != 0)
- return -1;
-
- /* negative stuff */
-
- /* Null pointers test */
- s = 0;
- sub = clib_strstr (s, s);
- if (sub != 0)
- return -1;
- /*
- * Can't verify it against strstr for this test. Null pointers cause strstr
- * to crash. Go figure!
- */
-
- /* unterminated s1 and s2 */
- memset_s (s1, ARRAY_LEN (s1), 0xfe, ARRAY_LEN (s1));
- CLIB_MEM_UNPOISON (s1, CLIB_STRING_MACRO_MAX);
- sub = clib_strstr (s1, s1);
- if (sub == 0)
- return -1;
- /*
- * Can't verify it against strstr for this test. Unterminated string causes
- * strstr to crash. Go figure!
- */
-
- /* OK, seems to work */
- return 0;
-}
-
-static int
-test_clib_count_equal (vlib_main_t * vm, unformat_input_t * input)
-{
- u64 s64[15];
- u32 s32[31];
- u16 s16[63];
- u8 s8[127];
- uword count;
-
- vlib_cli_output (vm, "Test clib_count_equal_u64...");
- memset (s64, 0, sizeof (s64));
- count = clib_count_equal_u64 (s64, 0);
- if (0 != count)
- return -1;
- count = clib_count_equal_u64 (s64, 1);
- if (1 != count)
- return -1;
- count = clib_count_equal_u64 (s64, 3);
- if (3 != count)
- return -1;
- count = clib_count_equal_u64 (s64, 15);
- if (15 != count)
- return -1;
- s64[10] = 0xcafe;
- count = clib_count_equal_u64 (s64, 13);
- if (10 != count)
- return -1;
- s64[10] = 0;
-
- vlib_cli_output (vm, "Test clib_count_equal_u32...");
- memset (s32, 0, sizeof (s32));
- count = clib_count_equal_u32 (s32, 0);
- if (0 != count)
- return -1;
- count = clib_count_equal_u32 (s32, 1);
- if (1 != count)
- return -1;
- count = clib_count_equal_u32 (s32, 3);
- if (3 != count)
- return -1;
- count = clib_count_equal_u32 (s32, 31);
- if (31 != count)
- return -1;
- s32[10] = 0xcafe;
- count = clib_count_equal_u32 (s32, 13);
- if (10 != count)
- return -1;
- s32[10] = 0;
-
- vlib_cli_output (vm, "Test clib_count_equal_u16...");
- memset (s16, 0, sizeof (s16));
- count = clib_count_equal_u16 (s16, 0);
- if (0 != count)
- return -1;
- count = clib_count_equal_u16 (s16, 1);
- if (1 != count)
- return -1;
- count = clib_count_equal_u16 (s16, 3);
- if (3 != count)
- return -1;
- count = clib_count_equal_u16 (s16, 63);
- if (63 != count)
- return -1;
- s16[10] = 0xcafe;
- count = clib_count_equal_u16 (s16, 13);
- if (10 != count)
- return -1;
- s16[10] = 0;
-
- vlib_cli_output (vm, "Test clib_count_equal_u8...");
- memset (s8, 0, sizeof (s8));
- count = clib_count_equal_u8 (s8, 0);
- if (0 != count)
- return -1;
- count = clib_count_equal_u8 (s8, 1);
- if (1 != count)
- return -1;
- count = clib_count_equal_u8 (s8, 3);
- if (3 != count)
- return -1;
- count = clib_count_equal_u8 (s8, 127);
- if (127 != count)
- return -1;
- s8[10] = 0xfe;
- count = clib_count_equal_u8 (s8, 13);
- if (10 != count)
- return -1;
- s8[10] = 0;
-
- return 0;
-}
-
-
-#define foreach_string_test \
- _ (0, MEMCPY_S, "memcpy_s", memcpy_s) \
- _ (1, CLIB_MEMCPY, "clib_memcpy", clib_memcpy) \
- _ (2, MEMSET_S , "memset_s", memset_s) \
- _ (3, CLIB_MEMSET , "clib_memset", clib_memset) \
- _ (4, MEMCMP_S, "memcmp_s", memcmp_s) \
- _ (5, CLIB_MEMCMP, "clib_memcmp", clib_memcmp) \
- _ (6, STRCMP_S, "strcmp_s", strcmp_s) \
- _ (7, CLIB_STRCMP, "clib_strcmp", clib_strcmp) \
- _ (8, STRNCMP_S, "strncmp_s", strncmp_s) \
- _ (9, CLIB_STRNCMP, "clib_strncmp", clib_strncmp) \
- _ (10, STRCPY_S, "strcpy_s", strcpy_s) \
- _ (11, CLIB_STRCPY, "clib_strcpy", clib_strcpy) \
- _ (12, STRNCPY_S, "strncpy_s", strncpy_s) \
- _ (13, CLIB_STRNCPY, "clib_strncpy", clib_strncpy) \
- _ (14, STRCAT_S, "strcat_s", strcat_s) \
- _ (15, CLIB_STRCAT, "clib_strcat", clib_strcat) \
- _ (16, STRNCAT_S, "strncat_s", strncat_s) \
- _ (17, CLIB_STRNCAT, "clib_strncat", clib_strncat) \
- _ (18, STRTOK_S, "strtok_s", strtok_s) \
- _ (19, CLIB_STRTOK, "clib_strtok", clib_strtok) \
- _ (20, STRNLEN_S, "strnlen_s", strnlen_s) \
- _ (21, CLIB_STRNLEN, "clib_strnlen", clib_strnlen) \
- _ (22, STRSTR_S, "strstr_s", strstr_s) \
- _ (23, CLIB_STRSTR, "clib_strstr", clib_strstr) \
- _ (24, CLIB_COUNT_EQUAL, "clib_count_equal", clib_count_equal)
+#define foreach_string_test \
+ _ (0, MEMCPY_S, "memcpy_s", memcpy_s) \
+ _ (1, CLIB_MEMCPY, "clib_memcpy", clib_memcpy) \
+ _ (2, MEMSET_S, "memset_s", memset_s) \
+ _ (3, CLIB_MEMSET, "clib_memset", clib_memset) \
+ _ (4, MEMCMP_S, "memcmp_s", memcmp_s) \
+ _ (5, CLIB_MEMCMP, "clib_memcmp", clib_memcmp) \
+ _ (6, STRCMP_S, "strcmp_s", strcmp_s) \
+ _ (7, CLIB_STRCMP, "clib_strcmp", clib_strcmp) \
+ _ (8, STRNCMP_S, "strncmp_s", strncmp_s) \
+ _ (9, CLIB_STRNCMP, "clib_strncmp", clib_strncmp) \
+ _ (10, STRCPY_S, "strcpy_s", strcpy_s) \
+ _ (11, STRNCPY_S, "strncpy_s", strncpy_s) \
+ _ (12, CLIB_STRNCPY, "clib_strncpy", clib_strncpy) \
+ _ (13, STRCAT_S, "strcat_s", strcat_s) \
+ _ (14, STRNCAT_S, "strncat_s", strncat_s) \
+ _ (15, STRTOK_S, "strtok_s", strtok_s) \
+ _ (16, CLIB_STRTOK, "clib_strtok", clib_strtok) \
+ _ (17, STRNLEN_S, "strnlen_s", strnlen_s) \
+ _ (18, CLIB_STRNLEN, "clib_strnlen", clib_strnlen) \
+ _ (19, STRSTR_S, "strstr_s", strstr_s)
typedef enum
{
@@ -1732,7 +1329,7 @@ typedef enum
foreach_string_test
#undef _
#define STRING_TEST_FIRST STRING_TEST_MEMCPY_S
-#define STRING_TEST_LAST STRING_TEST_CLIB_COUNT_EQUAL
+#define STRING_TEST_LAST STRING_TEST_STRSTR_S
} string_test_t;
static uword
@@ -1806,19 +1403,16 @@ string_test_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (string_test_command, static) =
-{
+VLIB_CLI_COMMAND (string_test_command, static) = {
.path = "test string",
- .short_help = "test string [memcpy_s | clib_memcpy | memset_s | "
- "clib_memset | memcmp_s | clib_memcmp | strcmp_s | clib_strcmp | "
- "strncmp_s | clib_strncmp | strcpy_s | clib_strcpy | strncpy_s | "
- "clib_strncpy | strcat_s | clib_strcat | strncat_s | clib_strncat | "
- "strtok_s | clib_strtok | strnlen_s | clib_strnlen | strstr_s | "
- "clib_strstr | clib_count_equal ]",
+ .short_help =
+ "test string [memcpy_s | clib_memcpy | memset_s | "
+ "clib_memset | memcmp_s | clib_memcmp | strcmp_s | clib_strcmp | "
+ "strncmp_s | clib_strncmp | strcpy_s | strncpy_s | "
+ "clib_strncpy | strcat_s | strncat_s | "
+ "strtok_s | clib_strtok | strnlen_s | clib_strnlen | strstr_s ]",
.function = string_test_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/svm_fifo_test.c b/src/plugins/unittest/svm_fifo_test.c
index 4351bced921..9feb37cbc25 100644
--- a/src/plugins/unittest/svm_fifo_test.c
+++ b/src/plugins/unittest/svm_fifo_test.c
@@ -43,7 +43,6 @@ typedef struct
u32 len;
} test_pattern_t;
-/* *INDENT-OFF* */
test_pattern_t test_pattern[] = {
{380, 8}, {768, 8}, {1156, 8}, {1544, 8}, {1932, 8}, {2320, 8}, {2708, 8},
{2992, 8}, {372, 8}, {760, 8}, {1148, 8}, {1536, 8}, {1924, 8}, {2312, 8},
@@ -102,7 +101,6 @@ test_pattern_t test_pattern[] = {
/* missing from original data set */
{388, 4}, {776, 4}, {1164, 4}, {1552, 4}, {1940, 4}, {2328, 4},
};
-/* *INDENT-ON* */
int
pattern_cmp (const void *arg1, const void *arg2)
@@ -195,7 +193,7 @@ fifo_prepare (fifo_segment_t * fs, u32 fifo_size)
f = fifo_segment_alloc_fifo (fs, fifo_size, FIFO_SEGMENT_RX_FIFO);
/* Paint 1st fifo chunk with -1's */
- c = svm_fifo_head_chunk (f);
+ c = f_head_cptr (f);
clib_memset (c->data, 0xFF, c->length);
svm_fifo_init_ooo_lookup (f, 1 /* deq ooo */ );
@@ -1958,7 +1956,7 @@ sfifo_test_fifo_indirect (vlib_main_t * vm, unformat_input_t * input)
svm_fifo_enqueue_nocopy (f, 4096);
SFIFO_TEST (svm_fifo_is_sane (f), "fifo should be sane");
- c = svm_fifo_tail_chunk (f);
+ c = f_tail_cptr (f);
SFIFO_TEST (c == f_end_cptr (f), "tail is end chunk");
/* Initialize head chunk */
@@ -1972,7 +1970,7 @@ sfifo_test_fifo_indirect (vlib_main_t * vm, unformat_input_t * input)
rv = svm_fifo_dequeue (f, 4096, data_buf);
SFIFO_TEST (rv == 4096, "dequeue should work");
- c = svm_fifo_head_chunk (f);
+ c = f_head_cptr (f);
SFIFO_TEST (c == f_end_cptr (f), "head chunk should be last");
rv = svm_fifo_max_read_chunk (f);
@@ -1993,9 +1991,7 @@ sfifo_test_fifo_indirect (vlib_main_t * vm, unformat_input_t * input)
return 0;
}
-/* *INDENT-OFF* */
svm_fifo_trace_elem_t fifo_trace[] = {};
-/* *INDENT-ON* */
static int
sfifo_test_fifo_replay (vlib_main_t * vm, unformat_input_t * input)
@@ -2519,7 +2515,7 @@ sfifo_test_fifo_segment_mempig (int verbose)
fifo_segment_free_fifo (sp, f);
}
- _vec_len (flist) = 0;
+ vec_set_len (flist, 0);
for (i = 0; i < 1000; i++)
{
@@ -2863,14 +2859,12 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (svm_fifo_test_command, static) =
{
.path = "test svm fifo",
.short_help = "internal svm fifo unit tests",
.function = svm_fifo_test,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/tcp_test.c b/src/plugins/unittest/tcp_test.c
index 25b6744244e..34033a0b622 100644
--- a/src/plugins/unittest/tcp_test.c
+++ b/src/plugins/unittest/tcp_test.c
@@ -35,9 +35,7 @@
} \
}
-/* *INDENT-OFF* */
scoreboard_trace_elt_t sb_trace[] = {};
-/* *INDENT-ON* */
static int
tcp_test_scoreboard_replay (vlib_main_t * vm, unformat_input_t * input)
@@ -1596,14 +1594,12 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tcp_test_command, static) =
{
.path = "test tcp",
.short_help = "internal tcp unit tests",
.function = tcp_test,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/test_buffer.c b/src/plugins/unittest/test_buffer.c
index 18938d888bb..24c86cd8697 100644
--- a/src/plugins/unittest/test_buffer.c
+++ b/src/plugins/unittest/test_buffer.c
@@ -16,48 +16,237 @@
#include <vlib/vlib.h>
#include <vlib/buffer_funcs.h>
-#define TEST_I(_cond, _comment, _args...) \
-({ \
- int _evald = (_cond); \
- if (!(_evald)) { \
- fformat(stderr, "FAIL:%d: " _comment "\n", \
- __LINE__, ##_args); \
- } else { \
- fformat(stderr, "PASS:%d: " _comment "\n", \
- __LINE__, ##_args); \
- } \
- _evald; \
-})
-
-#define TEST(_cond, _comment, _args...) \
-{ \
- if (!TEST_I(_cond, _comment, ##_args)) { \
- return 1; \
- } \
+#define TEST_I(_cond, _comment, _args...) \
+ ({ \
+ int _evald = (0 == (_cond)); \
+ if (_evald) \
+ { \
+ fformat (stderr, "FAIL:%d: " _comment "\n", __LINE__, ##_args); \
+ } \
+ else \
+ { \
+ fformat (stderr, "PASS:%d: " _comment "\n", __LINE__, ##_args); \
+ } \
+ _evald; \
+ })
+
+#define TEST(_cond, _comment, _args...) \
+ { \
+ if (TEST_I (_cond, _comment, ##_args)) \
+ { \
+ goto err; \
+ } \
+ }
+
+typedef struct
+{
+ i16 current_data;
+ u16 current_length;
+ u8 ref_count;
+} chained_buffer_template_t;
+
+static int
+build_chain (vlib_main_t *vm, const chained_buffer_template_t *tmpl, u32 n,
+ clib_random_buffer_t *randbuf, u8 **rand, vlib_buffer_t **b_,
+ u32 *bi_)
+{
+ vlib_buffer_t *bufs[2 * VLIB_BUFFER_LINEARIZE_MAX], **b = bufs;
+ u32 bis[2 * VLIB_BUFFER_LINEARIZE_MAX + 1], *bi = bis;
+ u32 n_alloc;
+
+ if (rand)
+ vec_reset_length (*rand);
+
+ ASSERT (n <= ARRAY_LEN (bufs));
+ n_alloc = vlib_buffer_alloc (vm, bi, n);
+ if (n_alloc != n)
+ {
+ vlib_buffer_free (vm, bi, n_alloc);
+ return 0;
+ }
+
+ vlib_get_buffers (vm, bis, bufs, n);
+
+ while (n > 0)
+ {
+ b[0]->next_buffer = bi[1];
+ b[0]->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ b[0]->current_data = tmpl->current_data;
+ b[0]->current_length = tmpl->current_length;
+ b[0]->ref_count = 0xff == tmpl->ref_count ? 1 : tmpl->ref_count;
+
+ if (rand)
+ {
+ const u16 len = b[0]->current_length;
+ if (len)
+ {
+ vec_add (*rand, clib_random_buffer_get_data (randbuf, len), len);
+ void *dst = vlib_buffer_get_current (b[0]);
+ const void *src =
+ vec_elt_at_index (*rand, vec_len (*rand) - len);
+ clib_memcpy_fast (dst, src, len);
+ }
+ }
+
+ b++;
+ bi++;
+ tmpl++;
+ n--;
+ }
+
+ b[-1]->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+
+ *b_ = bufs[0];
+ *bi_ = bis[0];
+ return 1;
+}
+
+static int
+check_chain (vlib_main_t *vm, vlib_buffer_t *b, const u8 *rand)
+{
+ int len_chain = vlib_buffer_length_in_chain (vm, b);
+ int len;
+
+ /* check for data corruption */
+ if (clib_memcmp (vlib_buffer_get_current (b), vec_elt_at_index (rand, 0),
+ b->current_length))
+ return 0;
+ len = b->current_length;
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ if (clib_memcmp (vlib_buffer_get_current (b),
+ vec_elt_at_index (rand, len), b->current_length))
+ return 0;
+ len += b->current_length;
+ }
+
+ /* check for data truncation */
+ if (len != vec_len (rand))
+ return 0;
+
+ /* check total length update is correct */
+ if (len != len_chain)
+ return 0;
+
+ return 1;
+}
+
+static int
+test_chain (vlib_main_t *vm, const chained_buffer_template_t *tmpl,
+ const u32 n, const int clone_off, clib_random_buffer_t *randbuf,
+ u8 **rand)
+{
+ vlib_buffer_t *b;
+ u32 bi[2];
+ int ret = 0;
+
+ if (!build_chain (vm, tmpl, n, randbuf, rand, &b, bi))
+ goto err0;
+
+ if (clone_off)
+ {
+ if (2 != vlib_buffer_clone (vm, bi[0], bi, 2, clone_off))
+ goto err1;
+ b = vlib_get_buffer (vm, bi[0]);
+ }
+
+ if (!(ret = vlib_buffer_chain_linearize (vm, b)))
+ goto err2;
+
+ if (!check_chain (vm, b, *rand))
+ {
+ ret = 0;
+ goto err2;
+ }
+
+err2:
+ if (clone_off)
+ vlib_buffer_free_one (vm, bi[1]);
+err1:
+ vlib_buffer_free_one (vm, bi[0]);
+err0:
+ return ret;
}
-/* test function for a specific case where current_data is negative, verify
- * that there is no crash */
static int
-linearize_negative_current_data (vlib_main_t * vm)
+linearize_test (vlib_main_t *vm)
{
- u32 bi[32];
- TEST (ARRAY_LEN (bi) == vlib_buffer_alloc (vm, bi, ARRAY_LEN (bi)),
- "buff alloc");
+ chained_buffer_template_t tmpl[VLIB_BUFFER_LINEARIZE_MAX];
+ clib_random_buffer_t randbuf;
u32 data_size = vlib_buffer_get_default_data_size (vm);
- u32 i;
- for (i = 0; i < ARRAY_LEN (bi) - 1; ++i)
+ u8 *rand = 0;
+ int ret = 0;
+ int i;
+
+ clib_random_buffer_init (&randbuf, 0);
+
+ clib_memset (tmpl, 0xff, sizeof (tmpl));
+ for (i = 0; i < 2; i++)
{
- vlib_buffer_t *b = vlib_get_buffer (vm, bi[i]);
- b->next_buffer = bi[i + 1];
- b->flags |= VLIB_BUFFER_NEXT_PRESENT;
- b->current_data = -14;
- b->current_length = 14 + data_size;
+ tmpl[i].current_data = -14;
+ tmpl[i].current_length = 14 + data_size;
}
+ TEST (2 == test_chain (vm, tmpl, 2, 0, &randbuf, &rand),
+ "linearize chain with negative current data");
- (void) vlib_buffer_chain_linearize (vm, vlib_get_buffer (vm, bi[0]));
+ clib_memset (tmpl, 0xff, sizeof (tmpl));
+ tmpl[0].current_data = 12;
+ tmpl[0].current_length = data_size - 12;
+ tmpl[1].current_data = 0;
+ tmpl[1].current_length = 0;
+ TEST (1 == test_chain (vm, tmpl, 2, 0, &randbuf, &rand),
+ "linearize chain with empty next");
- return 0;
+ clib_memset (tmpl, 0xff, sizeof (tmpl));
+ tmpl[0].current_data = 0;
+ tmpl[0].current_length = data_size - 17;
+ tmpl[1].current_data = -5;
+ tmpl[1].current_length = 3;
+ tmpl[2].current_data = 17;
+ tmpl[2].current_length = 9;
+ tmpl[3].current_data = 3;
+ tmpl[3].current_length = 5;
+ TEST (1 == test_chain (vm, tmpl, 4, 0, &randbuf, &rand),
+ "linearize chain into a single buffer");
+
+ clib_memset (tmpl, 0xff, sizeof (tmpl));
+ tmpl[0].current_data = 0;
+ tmpl[0].current_length = data_size - 2;
+ tmpl[1].current_data = -VLIB_BUFFER_PRE_DATA_SIZE;
+ tmpl[1].current_length = 20;
+ tmpl[2].current_data = data_size - 10;
+ tmpl[2].current_length = 10;
+ tmpl[3].current_data = 0;
+ tmpl[3].current_length = data_size;
+ TEST (2 == test_chain (vm, tmpl, 4, data_size - 1, &randbuf, &rand),
+ "linearize cloned chain");
+
+ clib_memset (tmpl, 0xff, sizeof (tmpl));
+ for (i = 0; i < 100; i++)
+ {
+ u8 *r = clib_random_buffer_get_data (&randbuf, 1);
+ int n = clib_max (r[0] % ARRAY_LEN (tmpl), 1);
+ int j;
+ for (j = 0; j < n; j++)
+ {
+ r = clib_random_buffer_get_data (&randbuf, 3);
+ i16 current_data = (i16) r[0] - VLIB_BUFFER_PRE_DATA_SIZE;
+ u16 current_length = *(u16 *) (r + 1) % (data_size - current_data);
+ tmpl[j].current_data = current_data;
+ tmpl[j].current_length = current_length;
+ }
+ r = clib_random_buffer_get_data (&randbuf, 1);
+ TEST (
+ test_chain (vm, tmpl, n, r[0] > 250 ? r[0] % 128 : 0, &randbuf, &rand),
+ "linearize random chain %d", i);
+ }
+
+ ret = 1;
+err:
+ clib_random_buffer_free (&randbuf);
+ vec_free (rand);
+ return ret;
}
static clib_error_t *
@@ -65,22 +254,67 @@ test_linearize_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- if (linearize_negative_current_data (vm))
+ if (!linearize_test (vm))
{
- return clib_error_return (0, "linearize_negative_current_data failed");
+ return clib_error_return (0, "linearize test failed");
}
- return (NULL);
+ return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_linearize_command, static) =
{
.path = "test chained-buffer-linearization",
.short_help = "test chained-buffer-linearization",
.function = test_linearize_fn,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+test_linearize_speed_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ /* typical 9000-bytes TCP jumbo frames */
+ const chained_buffer_template_t tmpl[5] = { { 14, 2034, 1 },
+ { 0, 2048, 1 },
+ { 0, 2048, 1 },
+ { 0, 2048, 1 },
+ { 0, 808, 1 } };
+ int i, j;
+
+ for (i = 0; i < 10; i++)
+ {
+ u64 tot = 0;
+ for (j = 0; j < 100000; j++)
+ {
+ vlib_buffer_t *b;
+ u32 bi;
+
+ if (!build_chain (vm, tmpl, 5, 0, 0, &b, &bi))
+ return clib_error_create ("build_chain() failed");
+
+ CLIB_COMPILER_BARRIER ();
+ u64 start = clib_cpu_time_now ();
+ CLIB_COMPILER_BARRIER ();
+
+ vlib_buffer_chain_linearize (vm, b);
+
+ CLIB_COMPILER_BARRIER ();
+ tot += clib_cpu_time_now () - start;
+ CLIB_COMPILER_BARRIER ();
+
+ vlib_buffer_free_one (vm, bi);
+ }
+ vlib_cli_output (vm, "%.03f ticks/call", (f64) tot / j);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (test_linearize_speed_command, static) = {
+ .path = "test chained-buffer-linearization speed",
+ .short_help = "test chained-buffer-linearization speed",
+ .function = test_linearize_speed_fn,
+};
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/unittest.c b/src/plugins/unittest/unittest.c
index 60ebac130da..555516fc9de 100644
--- a/src/plugins/unittest/unittest.c
+++ b/src/plugins/unittest/unittest.c
@@ -19,14 +19,12 @@
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "C unit tests",
.default_disabled = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/util_test.c b/src/plugins/unittest/util_test.c
index 8dce270f814..53384e55494 100644
--- a/src/plugins/unittest/util_test.c
+++ b/src/plugins/unittest/util_test.c
@@ -22,13 +22,11 @@ test_crash_command_fn (vlib_main_t * vm,
{
u64 *p = (u64 *) 0xdefec8ed;
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "deliberate crash: touching %x",
.format_args = "i4",
};
- /* *INDENT-ON* */
elog (&vlib_global_main.elog_main, &e, 0xdefec8ed);
*p = 0xdeadbeef;
@@ -37,14 +35,12 @@ test_crash_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_crash_command, static) =
{
.path = "test crash",
.short_help = "crash the bus!",
.function = test_crash_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
test_hash_command_fn (vlib_main_t * vm,
@@ -98,14 +94,12 @@ test_hash_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_hash_command, static) =
{
.path = "test hash_memory",
.short_help = "page boundary crossing test",
.function = test_hash_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/unittest/vlib_test.c b/src/plugins/unittest/vlib_test.c
index 3154b7d21cf..c9c46901a4d 100644
--- a/src/plugins/unittest/vlib_test.c
+++ b/src/plugins/unittest/vlib_test.c
@@ -129,14 +129,12 @@ test_vlib_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_vlib_command, static) =
{
.path = "test vlib",
.short_help = "vlib code coverage unit test",
.function = test_vlib_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
test_format_vlib_command_fn (vlib_main_t * vm,
@@ -180,14 +178,12 @@ test_format_vlib_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_format_vlib_command, static) =
{
.path = "test format-vlib",
.short_help = "vlib format code coverate unit test",
.function = test_format_vlib_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
test_vlib2_command_fn (vlib_main_t * vm,
@@ -215,14 +211,12 @@ test_vlib2_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_vlib2_command, static) =
{
.path = "test vlib2",
.short_help = "vlib code coverage unit test #2",
.function = test_vlib2_command_fn,
};
-/* *INDENT-ON* */
diff --git a/src/plugins/urpf/ip4_urpf.c b/src/plugins/urpf/ip4_urpf.c
index 1d329029478..7cbf81c50c3 100644
--- a/src/plugins/urpf/ip4_urpf.c
+++ b/src/plugins/urpf/ip4_urpf.c
@@ -74,7 +74,6 @@ VLIB_NODE_FN (ip4_tx_urpf_strict) (vlib_main_t * vm,
return (urpf_inline (vm, node, frame, AF_IP4, VLIB_TX, URPF_MODE_STRICT));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_rx_urpf_loose) = {
.name = "ip4-rx-urpf-loose",
.vector_size = sizeof (u32),
@@ -160,7 +159,6 @@ VNET_FEATURE_INIT (ip4_tx_urpf_strict_feat, static) =
.arc_name = "ip4-output",
.node_name = "ip4-tx-urpf-strict",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/urpf/ip6_urpf.c b/src/plugins/urpf/ip6_urpf.c
index 48d991573b5..d278289b5e2 100644
--- a/src/plugins/urpf/ip6_urpf.c
+++ b/src/plugins/urpf/ip6_urpf.c
@@ -74,7 +74,6 @@ VLIB_NODE_FN (ip6_tx_urpf_strict) (vlib_main_t * vm,
return (urpf_inline (vm, node, frame, AF_IP6, VLIB_TX, URPF_MODE_STRICT));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_rx_urpf_loose) = {
.name = "ip6-rx-urpf-loose",
.vector_size = sizeof (u32),
@@ -160,7 +159,6 @@ VNET_FEATURE_INIT (ip6_tx_urpf_strict_feat, static) =
.arc_name = "ip6-output",
.node_name = "ip6-tx-urpf-strict",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/urpf/urpf.api b/src/plugins/urpf/urpf.api
index 944db08cc94..a4e897afd17 100644
--- a/src/plugins/urpf/urpf.api
+++ b/src/plugins/urpf/urpf.api
@@ -50,6 +50,51 @@ autoreply define urpf_update
vl_api_interface_index_t sw_if_index;
};
+/**
+ * @brief Enable uRPF on a given interface in a given direction
+ * @param client_index - opaque cookie to identify the sender
+ * @param context - sender context, to match reply w/ request
+ * @param mode - Mode
+ * @param af - Address Family
+ * @param sw_if_index - Interface
+ * @param is_input - Direction.
+ * @param table-id - Table ID
+ */
+autoreply define urpf_update_v2
+{
+ u32 client_index;
+ u32 context;
+ bool is_input[default = true];
+ vl_api_urpf_mode_t mode;
+ vl_api_address_family_t af;
+ vl_api_interface_index_t sw_if_index;
+ u32 table_id [default=0xffffffff];
+};
+
+/** @brief Dump uRPF enabled interface(s) in zero or more urpf_interface_details replies
+ @param client_index - opaque cookie to identify the sender
+ @param sw_if_index - sw_if_index of a specific interface, or -1 (default)
+ to return all uRPF enabled interfaces
+*/
+define urpf_interface_dump
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index [default=0xffffffff];
+};
+
+/** @brief uRPF enabled interface details
+*/
+define urpf_interface_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ bool is_input;
+ vl_api_urpf_mode_t mode;
+ vl_api_address_family_t af;
+ u32 table_id;
+};
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/urpf/urpf.c b/src/plugins/urpf/urpf.c
index 7e1986a4250..e5209caafb4 100644
--- a/src/plugins/urpf/urpf.c
+++ b/src/plugins/urpf/urpf.c
@@ -17,7 +17,6 @@
#include <vnet/fib/fib_table.h>
-/* *INDENT-OFF* */
static const char *urpf_feat_arcs[N_AF][VLIB_N_DIR] =
{
[AF_IP4] = {
@@ -53,12 +52,12 @@ static const char *urpf_feats[N_AF][VLIB_N_DIR][URPF_N_MODES] =
},
},
};
-/* *INDENT-ON* */
/**
* Per-af, per-direction, per-interface uRPF configs
*/
-static urpf_mode_t *urpf_cfgs[N_AF][VLIB_N_DIR];
+
+urpf_data_t *urpf_cfgs[N_AF][VLIB_N_DIR];
u8 *
format_urpf_mode (u8 * s, va_list * a)
@@ -95,34 +94,105 @@ unformat_urpf_mode (unformat_input_t * input, va_list * args)
return 0;
}
-void
-urpf_update (urpf_mode_t mode,
- u32 sw_if_index, ip_address_family_t af, vlib_dir_t dir)
+int
+urpf_update (urpf_mode_t mode, u32 sw_if_index, ip_address_family_t af,
+ vlib_dir_t dir, u32 table_id)
{
- urpf_mode_t old;
+ fib_protocol_t proto;
+ u32 fib_index;
+ if (table_id != ~0)
+ {
+ proto = ip_address_family_to_fib_proto (af);
+ fib_index = fib_table_find (proto, table_id);
+ if (fib_index == (~0))
+ return VNET_API_ERROR_INVALID_VALUE;
+ }
+ else
+ {
+ bool is_ip4 = (AF_IP4 == af);
+ u32 *fib_index_by_sw_if_index = is_ip4 ?
+ ip4_main.fib_index_by_sw_if_index :
+ ip6_main.fib_index_by_sw_if_index;
- vec_validate_init_empty (urpf_cfgs[af][dir], sw_if_index, URPF_MODE_OFF);
+ fib_index = fib_index_by_sw_if_index[sw_if_index];
+ }
+ urpf_data_t old;
+ urpf_mode_t off = URPF_MODE_OFF;
+ urpf_data_t empty = { .fib_index = 0, .mode = off };
+ vec_validate_init_empty (urpf_cfgs[af][dir], sw_if_index, empty);
old = urpf_cfgs[af][dir][sw_if_index];
- if (mode != old)
+ urpf_data_t data = { .fib_index = fib_index,
+ .mode = mode,
+ .fib_index_is_custom = (table_id != ~0) };
+ urpf_cfgs[af][dir][sw_if_index] = data;
+ if (data.mode != old.mode || data.fib_index != old.fib_index)
{
- if (URPF_MODE_OFF != old)
+ if (URPF_MODE_OFF != old.mode)
/* disable what we have */
vnet_feature_enable_disable (urpf_feat_arcs[af][dir],
- urpf_feats[af][dir][old],
+ urpf_feats[af][dir][old.mode],
sw_if_index, 0, 0, 0);
- if (URPF_MODE_OFF != mode)
+ if (URPF_MODE_OFF != data.mode)
/* enable what's new */
vnet_feature_enable_disable (urpf_feat_arcs[af][dir],
- urpf_feats[af][dir][mode],
+ urpf_feats[af][dir][data.mode],
sw_if_index, 1, 0, 0);
}
/* else - no change to existing config */
+ return 0;
+}
- urpf_cfgs[af][dir][sw_if_index] = mode;
+static void
+urpf_table_bind_v4 (ip4_main_t *im, uword opaque, u32 sw_if_index,
+ u32 new_fib_index, u32 old_fib_index)
+{
+ vlib_dir_t dir;
+ urpf_data_t empty = { .fib_index = 0, .mode = URPF_MODE_OFF };
+ FOREACH_VLIB_DIR (dir)
+ {
+ vec_validate_init_empty (urpf_cfgs[AF_IP4][dir], sw_if_index, empty);
+ if (!urpf_cfgs[AF_IP4][dir][sw_if_index].fib_index_is_custom)
+ {
+ urpf_cfgs[AF_IP4][dir][sw_if_index].fib_index = new_fib_index;
+ }
+ }
}
+static void
+urpf_table_bind_v6 (ip6_main_t *im, uword opaque, u32 sw_if_index,
+ u32 new_fib_index, u32 old_fib_index)
+{
+ vlib_dir_t dir;
+ urpf_data_t empty = { .fib_index = 0, .mode = URPF_MODE_OFF };
+ FOREACH_VLIB_DIR (dir)
+ {
+ vec_validate_init_empty (urpf_cfgs[AF_IP6][dir], sw_if_index, empty);
+ if (!urpf_cfgs[AF_IP6][dir][sw_if_index].fib_index_is_custom)
+ {
+ urpf_cfgs[AF_IP6][dir][sw_if_index].fib_index = new_fib_index;
+ }
+ }
+}
+
+static clib_error_t *
+urpf_init (vlib_main_t *vm)
+{
+ ip4_table_bind_callback_t cb4 = {
+ .function = urpf_table_bind_v4,
+ };
+ vec_add1 (ip4_main.table_bind_callbacks, cb4);
+
+ ip6_table_bind_callback_t cb6 = {
+ .function = urpf_table_bind_v6,
+ };
+ vec_add1 (ip6_main.table_bind_callbacks, cb6);
+ return (NULL);
+}
+
+VLIB_INIT_FUNCTION (urpf_init);
+
static clib_error_t *
urpf_cli_update (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
@@ -134,11 +204,13 @@ urpf_cli_update (vlib_main_t * vm,
urpf_mode_t mode;
u32 sw_if_index;
vlib_dir_t dir;
+ u32 table_id;
sw_if_index = ~0;
af = AF_IP4;
dir = VLIB_RX;
mode = URPF_MODE_STRICT;
+ table_id = ~0;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -150,6 +222,8 @@ urpf_cli_update (vlib_main_t * vm,
;
else if (unformat (line_input, "%U", unformat_urpf_mode, &mode))
;
+ else if (unformat (line_input, "table %d", &table_id))
+ ;
else if (unformat (line_input, "%U", unformat_ip_address_family, &af))
;
else if (unformat (line_input, "%U", unformat_vlib_rx_tx, &dir))
@@ -168,7 +242,13 @@ urpf_cli_update (vlib_main_t * vm,
goto done;
}
- urpf_update (mode, sw_if_index, af, dir);
+ int rv = 0;
+ rv = urpf_update (mode, sw_if_index, af, dir, table_id);
+ if (rv)
+ {
+ error = clib_error_return (0, "unknown table id");
+ goto done;
+ }
done:
unformat_free (line_input);
@@ -196,12 +276,12 @@ done:
*
* Example of graph node after range checking is enabled:
* @cliexstart{show vlib graph ip4-rx-urpf-loose}
- * Name Next Previous
- * ip4-rx-urpf-loose ip4-drop [0] ip4-input-no-checksum
- * ip4-source-and-port-range- ip4-input
+ * Name Next Previous
+ * ip4-rx-urpf-loose ip4-drop [0] ip4-input-no-checksum
+ * ip4-source-and-port-range- ip4-input
* @cliexend
*
- * Example of how to display the feature enabed on an interface:
+ * Example of how to display the feature enabled on an interface:
* @cliexstart{show ip interface features GigabitEthernet2/0/0}
* IP feature paths configured on GigabitEthernet2/0/0...
*
@@ -229,13 +309,12 @@ done:
* @cliexcmd{set urpf ip4 off GigabitEthernet2/0/0}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_source_check_command, static) = {
.path = "set urpf",
.function = urpf_cli_update,
- .short_help = "set urpf [ip4|ip6] [rx|tx] [off|strict|loose] <INTERFACE>",
+ .short_help = "set urpf [ip4|ip6] [rx|tx] [off|strict|loose] "
+ "<INTERFACE> [table <table>]",
};
-/* *INDENT-ON* */
static clib_error_t *
urpf_cli_accept (vlib_main_t * vm,
@@ -306,13 +385,11 @@ done:
* loose RPF tests:
* @cliexcmd{set urpf-accept table 7 10.0.0.0/8 add}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (urpf_accept_command, static) = {
.path = "set urpf-accept",
.function = urpf_cli_accept,
.short_help = "urpf-accept [table <table-id>] [add|del] <PREFIX>",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/urpf/urpf.h b/src/plugins/urpf/urpf.h
index 941cda25f4b..6983a2b440c 100644
--- a/src/plugins/urpf/urpf.h
+++ b/src/plugins/urpf/urpf.h
@@ -18,10 +18,10 @@
#include <vnet/ip/ip_types.h>
-#define foreach_urpf_mode \
- _(OFF, "off") \
- _(LOOSE, "loose") \
- _(STRICT, "strict") \
+#define foreach_urpf_mode \
+ _ (OFF, "off") \
+ _ (LOOSE, "loose") \
+ _ (STRICT, "strict")
typedef enum urpf_mode_t_
{
@@ -34,10 +34,17 @@ typedef enum urpf_mode_t_
extern u8 *format_urpf_mode (u8 * s, va_list * a);
-extern void urpf_update (urpf_mode_t mode,
- u32 sw_if_index,
- ip_address_family_t af, vlib_dir_t dir);
+typedef struct
+{
+ urpf_mode_t mode;
+ u32 fib_index;
+ u8 fib_index_is_custom;
+} urpf_data_t;
+
+extern urpf_data_t *urpf_cfgs[N_AF][VLIB_N_DIR];
+extern int urpf_update (urpf_mode_t mode, u32 sw_if_index,
+ ip_address_family_t af, vlib_dir_t dir, u32 table_id);
#endif
diff --git a/src/plugins/urpf/urpf_api.c b/src/plugins/urpf/urpf_api.c
index ad060399347..3d0f4b4e8d4 100644
--- a/src/plugins/urpf/urpf_api.c
+++ b/src/plugins/urpf/urpf_api.c
@@ -26,6 +26,8 @@
#include <vnet/format_fns.h>
#include <urpf/urpf.api_enum.h>
#include <urpf/urpf.api_types.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/ip/ip_types.h>
/**
* Base message ID fot the plugin
@@ -62,7 +64,34 @@ vl_api_urpf_update_t_handler (vl_api_urpf_update_t * mp)
VALIDATE_SW_IF_INDEX (mp);
rv = urpf_mode_decode (mp->mode, &mode);
+ if (rv)
+ goto done;
+ rv = ip_address_family_decode (mp->af, &af);
+ if (rv)
+ goto done;
+
+ rv = urpf_update (mode, htonl (mp->sw_if_index), af,
+ (mp->is_input ? VLIB_RX : VLIB_TX), 0);
+ if (rv)
+ goto done;
+
+ BAD_SW_IF_INDEX_LABEL;
+done:
+ REPLY_MACRO (VL_API_URPF_UPDATE_REPLY);
+}
+
+static void
+vl_api_urpf_update_v2_t_handler (vl_api_urpf_update_v2_t *mp)
+{
+ vl_api_urpf_update_reply_t *rmp;
+ ip_address_family_t af;
+ urpf_mode_t mode;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = urpf_mode_decode (mp->mode, &mode);
if (rv)
goto done;
@@ -71,12 +100,85 @@ vl_api_urpf_update_t_handler (vl_api_urpf_update_t * mp)
if (rv)
goto done;
- urpf_update (mode, htonl (mp->sw_if_index), af,
- (mp->is_input ? VLIB_RX : VLIB_TX));
+ rv = urpf_update (mode, htonl (mp->sw_if_index), af,
+ (mp->is_input ? VLIB_RX : VLIB_TX), ntohl (mp->table_id));
+
+ if (rv)
+ goto done;
BAD_SW_IF_INDEX_LABEL;
done:
- REPLY_MACRO (VL_API_URPF_UPDATE_REPLY);
+ REPLY_MACRO (VL_API_URPF_UPDATE_V2_REPLY);
+}
+
+static void
+send_urpf_interface_details (vpe_api_main_t *am, vl_api_registration_t *reg,
+ u32 context, const u32 sw_if_index,
+ const urpf_data_t *ud,
+ const ip_address_family_t af,
+ const vlib_dir_t dir)
+{
+ vl_api_urpf_interface_details_t *mp;
+
+ mp = vl_msg_api_alloc_zero (sizeof (*mp));
+ mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_URPF_INTERFACE_DETAILS);
+ mp->context = context;
+
+ mp->sw_if_index = htonl (sw_if_index);
+ mp->table_id = htonl (fib_table_get_table_id (
+ ud->fib_index, (af == AF_IP4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6)));
+ mp->af = (vl_api_address_family_t) af;
+ mp->mode = (vl_api_urpf_mode_t) ud->mode;
+ mp->is_input = (dir == VLIB_RX);
+
+ vl_api_send_msg (reg, (u8 *) mp);
+}
+
+static void
+send_urpf_interface (vpe_api_main_t *am, vl_api_registration_t *reg,
+ u32 context, const u32 sw_if_index)
+{
+ urpf_data_t *ud;
+ vlib_dir_t dir;
+ ip_address_family_t af;
+
+ FOR_EACH_IP_ADDRESS_FAMILY (af)
+ FOREACH_VLIB_DIR (dir)
+ if (sw_if_index < vec_len (urpf_cfgs[af][dir]))
+ {
+ ud = &urpf_cfgs[af][dir][sw_if_index];
+ if (ud->mode || ud->fib_index_is_custom)
+ send_urpf_interface_details (am, reg, context, sw_if_index, ud, af,
+ dir);
+ }
+}
+
+static void
+vl_api_urpf_interface_dump_t_handler (vl_api_urpf_interface_dump_t *mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ vl_api_registration_t *reg;
+ vnet_interface_main_t *im = &vnet_main.interface_main;
+ vnet_sw_interface_t *si;
+ u32 sw_if_index = ~0;
+ int __attribute__ ((unused)) rv = 0;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (sw_if_index == ~0)
+ {
+ pool_foreach (si, im->sw_interfaces)
+ {
+ send_urpf_interface (am, reg, mp->context, si->sw_if_index);
+ }
+ return;
+ }
+ VALIDATE_SW_IF_INDEX (mp);
+ send_urpf_interface (am, reg, mp->context, sw_if_index);
+ BAD_SW_IF_INDEX_LABEL;
}
#include <urpf/urpf.api.c>
@@ -92,12 +194,10 @@ urpf_api_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (urpf_api_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "Unicast Reverse Path Forwarding (uRPF)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/urpf/urpf_dp.h b/src/plugins/urpf/urpf_dp.h
index bfe1f659171..816d8b70b90 100644
--- a/src/plugins/urpf/urpf_dp.h
+++ b/src/plugins/urpf/urpf_dp.h
@@ -128,6 +128,11 @@ urpf_inline (vlib_main_t * vm,
h1 += vnet_buffer (b[1])->ip.save_rewrite_length;
}
+ fib_index0 =
+ urpf_cfgs[af][dir][vnet_buffer (b[0])->sw_if_index[dir]].fib_index;
+ fib_index1 =
+ urpf_cfgs[af][dir][vnet_buffer (b[1])->sw_if_index[dir]].fib_index;
+
if (AF_IP4 == af)
{
const ip4_header_t *ip0, *ip1;
@@ -135,11 +140,6 @@ urpf_inline (vlib_main_t * vm,
ip0 = (ip4_header_t *) h0;
ip1 = (ip4_header_t *) h1;
- fib_index0 = ip4_main.fib_index_by_sw_if_index
- [vnet_buffer (b[0])->sw_if_index[dir]];
- fib_index1 = ip4_main.fib_index_by_sw_if_index
- [vnet_buffer (b[1])->sw_if_index[dir]];
-
ip4_fib_forwarding_lookup_x2 (fib_index0,
fib_index1,
&ip0->src_address,
@@ -155,11 +155,6 @@ urpf_inline (vlib_main_t * vm,
{
const ip6_header_t *ip0, *ip1;
- fib_index0 = ip6_main.fib_index_by_sw_if_index
- [vnet_buffer (b[0])->sw_if_index[dir]];
- fib_index1 = ip6_main.fib_index_by_sw_if_index
- [vnet_buffer (b[1])->sw_if_index[dir]];
-
ip0 = (ip6_header_t *) h0;
ip1 = (ip6_header_t *) h1;
@@ -255,12 +250,13 @@ urpf_inline (vlib_main_t * vm,
if (VLIB_TX == dir)
h0 += vnet_buffer (b[0])->ip.save_rewrite_length;
+ fib_index0 =
+ urpf_cfgs[af][dir][vnet_buffer (b[0])->sw_if_index[dir]].fib_index;
+
if (AF_IP4 == af)
{
const ip4_header_t *ip0;
- fib_index0 = ip4_main.fib_index_by_sw_if_index
- [vnet_buffer (b[0])->sw_if_index[dir]];
ip0 = (ip4_header_t *) h0;
lb_index0 = ip4_fib_forwarding_lookup (fib_index0,
@@ -275,8 +271,6 @@ urpf_inline (vlib_main_t * vm,
const ip6_header_t *ip0;
ip0 = (ip6_header_t *) h0;
- fib_index0 = ip6_main.fib_index_by_sw_if_index
- [vnet_buffer (b[0])->sw_if_index[dir]];
lb_index0 = ip6_fib_table_fwding_lookup (fib_index0,
&ip0->src_address);
diff --git a/src/plugins/vhost/CMakeLists.txt b/src/plugins/vhost/CMakeLists.txt
new file mode 100644
index 00000000000..6b86c8c98d1
--- /dev/null
+++ b/src/plugins/vhost/CMakeLists.txt
@@ -0,0 +1,34 @@
+# Copyright (c) 2020 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(vhost
+ SOURCES
+ plugin.c
+ vhost_user.c
+ vhost_user_api.c
+ vhost_user_input.c
+ vhost_user_output.c
+ vhost_std.h
+ vhost_user.h
+ vhost_user_inline.h
+ virtio_std.h
+
+ MULTIARCH_SOURCES
+ vhost_user_input.c
+ vhost_user_output.c
+
+ API_FILES
+ vhost_user.api
+
+ SUPPORTED_OS_LIST Linux
+)
diff --git a/src/plugins/vhost/FEATURE.yaml b/src/plugins/vhost/FEATURE.yaml
new file mode 100644
index 00000000000..7104dda1dc5
--- /dev/null
+++ b/src/plugins/vhost/FEATURE.yaml
@@ -0,0 +1,13 @@
+---
+name: Vhost-user Device Driver
+maintainer: sluong@cisco.com
+features:
+ - Device mode to emulate vhost-user interface presented to VPP from the
+ guest VM.
+ - Support virtio 1.0 in virtio
+ - Support virtio 1.1 packed ring in virtio [experimental]
+ - Support multi-queue, GSO, checksum offload, indirect descriptor,
+ jumbo frame, and packed ring.
+description: "Vhost-user implementation"
+state: production
+properties: [API, CLI, STATS, MULTITHREAD]
diff --git a/src/plugins/vhost/plugin.c b/src/plugins/vhost/plugin.c
new file mode 100644
index 00000000000..0e6158ba7d8
--- /dev/null
+++ b/src/plugins/vhost/plugin.c
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "Vhost-User",
+};
diff --git a/src/plugins/vhost/vhost_std.h b/src/plugins/vhost/vhost_std.h
new file mode 100644
index 00000000000..7799093bac3
--- /dev/null
+++ b/src/plugins/vhost/vhost_std.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __VHOST_STD_H__
+#define __VHOST_STD_H__
+
+typedef struct
+{
+ u64 guest_phys_addr;
+ u64 memory_size;
+ u64 userspace_addr;
+ u64 mmap_offset;
+} vhost_memory_region_t;
+
+typedef struct
+{
+ u32 nregions;
+ u32 padding;
+ vhost_memory_region_t regions[0];
+} vhost_memory_t;
+
+typedef struct
+{
+ u32 index;
+ u32 num;
+} vhost_vring_state_t;
+
+typedef struct
+{
+ u32 index;
+ int fd;
+} vhost_vring_file_t;
+
+typedef struct
+{
+ u32 index;
+ u32 flags;
+ u64 desc_user_addr;
+ u64 used_user_addr;
+ u64 avail_user_addr;
+ u64 log_guest_addr;
+} vhost_vring_addr_t;
+
+typedef struct
+{
+ u64 size;
+ u64 offset;
+} vhost_user_log_t;
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/vhost_user.api b/src/plugins/vhost/vhost_user.api
index b026ba768a9..b026ba768a9 100644
--- a/src/vnet/devices/virtio/vhost_user.api
+++ b/src/plugins/vhost/vhost_user.api
diff --git a/src/vnet/devices/virtio/vhost_user.c b/src/plugins/vhost/vhost_user.c
index 3217c72f95d..fdee984f97b 100644
--- a/src/vnet/devices/virtio/vhost_user.c
+++ b/src/plugins/vhost/vhost_user.c
@@ -37,9 +37,10 @@
#include <vnet/devices/devices.h>
#include <vnet/feature/feature.h>
#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
-#include <vnet/devices/virtio/vhost_user.h>
-#include <vnet/devices/virtio/vhost_user_inline.h>
+#include <vhost/vhost_user.h>
+#include <vhost/vhost_user_inline.h>
/**
* @file
@@ -51,7 +52,6 @@
vlib_node_registration_t vhost_user_send_interrupt_node;
-/* *INDENT-OFF* */
vhost_user_main_t vhost_user_main = {
.mtu_bytes = 1518,
};
@@ -59,7 +59,6 @@ vhost_user_main_t vhost_user_main = {
VNET_HW_INTERFACE_CLASS (vhost_interface_class, static) = {
.name = "vhost-user",
};
-/* *INDENT-ON* */
static long
get_huge_page_size (int fd)
@@ -106,50 +105,55 @@ unmap_all_mem_regions (vhost_user_intf_t * vui)
}
vui->nregions = 0;
- for (q = 0; q < vui->num_qid; q++)
- {
- vq = &vui->vrings[q];
- vq->avail = 0;
- vq->used = 0;
- vq->desc = 0;
- }
+ FOR_ALL_VHOST_RX_TXQ (q, vui)
+ {
+ vq = &vui->vrings[q];
+ vq->avail = 0;
+ vq->used = 0;
+ vq->desc = 0;
+ }
}
static_always_inline void
-vhost_user_tx_thread_placement (vhost_user_intf_t * vui)
+vhost_user_tx_thread_placement (vhost_user_intf_t *vui, u32 qid)
{
- //Let's try to assign one queue to each thread
- u32 qid;
- u32 thread_index = 0;
+ vnet_main_t *vnm = vnet_get_main ();
+ vhost_user_vring_t *rxvq = &vui->vrings[qid];
+ u32 q = qid >> 1, rxvq_count;
- vui->use_tx_spinlock = 0;
- while (1)
+ ASSERT ((qid & 1) == 0);
+ if (!rxvq->started || !rxvq->enabled)
+ return;
+
+ rxvq_count = (qid >> 1) + 1;
+ if (rxvq->queue_index == ~0)
{
- for (qid = 0; qid < vui->num_qid / 2; qid++)
- {
- vhost_user_vring_t *rxvq = &vui->vrings[VHOST_VRING_IDX_RX (qid)];
- if (!rxvq->started || !rxvq->enabled)
- continue;
-
- vui->per_cpu_tx_qid[thread_index] = qid;
- thread_index++;
- if (thread_index == vlib_get_thread_main ()->n_vlib_mains)
- return;
- }
- //We need to loop, meaning the spinlock has to be used
- vui->use_tx_spinlock = 1;
- if (thread_index == 0)
- {
- //Could not find a single valid one
- for (thread_index = 0;
- thread_index < vlib_get_thread_main ()->n_vlib_mains;
- thread_index++)
- {
- vui->per_cpu_tx_qid[thread_index] = 0;
- }
- return;
- }
+ rxvq->queue_index =
+ vnet_hw_if_register_tx_queue (vnm, vui->hw_if_index, q);
+ rxvq->qid = q;
}
+
+ FOR_ALL_VHOST_RXQ (q, vui)
+ {
+ vhost_user_vring_t *rxvq = &vui->vrings[q];
+ u32 qi = rxvq->queue_index;
+
+ if (rxvq->queue_index == ~0)
+ break;
+ for (u32 i = 0; i < vlib_get_n_threads (); i++)
+ vnet_hw_if_tx_queue_unassign_thread (vnm, qi, i);
+ }
+
+ for (u32 i = 0; i < vlib_get_n_threads (); i++)
+ {
+ vhost_user_vring_t *rxvq =
+ &vui->vrings[VHOST_VRING_IDX_RX (i % rxvq_count)];
+ u32 qi = rxvq->queue_index;
+
+ vnet_hw_if_tx_queue_assign_thread (vnm, qi, i);
+ }
+
+ vnet_hw_if_update_runtime_data (vnm, vui->hw_if_index);
}
/**
@@ -163,16 +167,28 @@ vhost_user_rx_thread_placement (vhost_user_intf_t * vui, u32 qid)
vnet_main_t *vnm = vnet_get_main ();
int rv;
u32 q = qid >> 1;
+ vhost_user_main_t *vum = &vhost_user_main;
ASSERT ((qid & 1) == 1); // should be odd
// Assign new queue mappings for the interface
+ if (txvq->queue_index != ~0)
+ return;
vnet_hw_if_set_input_node (vnm, vui->hw_if_index,
vhost_user_input_node.index);
txvq->queue_index = vnet_hw_if_register_rx_queue (vnm, vui->hw_if_index, q,
VNET_HW_IF_RXQ_THREAD_ANY);
+ txvq->thread_index =
+ vnet_hw_if_get_rx_queue_thread_index (vnm, txvq->queue_index);
+
if (txvq->mode == VNET_HW_IF_RX_MODE_UNKNOWN)
/* Set polling as the default */
txvq->mode = VNET_HW_IF_RX_MODE_POLLING;
+ if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
+ {
+ vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
+ /* Keep a polling queue count for each thread */
+ cpu->polling_q_count++;
+ }
txvq->qid = q;
rv = vnet_hw_if_set_rx_queue_mode (vnm, txvq->queue_index, txvq->mode);
if (rv)
@@ -227,11 +243,11 @@ vhost_user_thread_placement (vhost_user_intf_t * vui, u32 qid)
{
if (qid & 1) // RX is odd, TX is even
{
- if (vui->vrings[qid].qid == -1)
+ if (vui->vrings[qid].queue_index == ~0)
vhost_user_rx_thread_placement (vui, qid);
}
else
- vhost_user_tx_thread_placement (vui);
+ vhost_user_tx_thread_placement (vui, qid);
}
static clib_error_t *
@@ -258,10 +274,17 @@ vhost_user_kickfd_read_ready (clib_file_t * uf)
vq->kickfd_idx);
}
- if (is_txq && (vhost_user_intf_ready (vui) &&
- ((vq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE) ||
- (vq->mode == VNET_HW_IF_RX_MODE_INTERRUPT))))
- vnet_hw_if_rx_queue_set_int_pending (vnm, vq->queue_index);
+ if (is_txq && (vq->mode != VNET_HW_IF_RX_MODE_POLLING) &&
+ vhost_user_intf_ready (vui))
+ {
+ vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, vq->thread_index);
+ /*
+ * If the thread has more than 1 queue and the other queue is in polling
+ * mode, there is no need to trigger an interrupt
+ */
+ if (cpu->polling_q_count == 0)
+ vnet_hw_if_rx_queue_set_int_pending (vnm, vq->queue_index);
+ }
return 0;
}
@@ -276,6 +299,9 @@ vhost_user_vring_init (vhost_user_intf_t * vui, u32 qid)
vring->callfd_idx = ~0;
vring->errfd = -1;
vring->qid = -1;
+ vring->queue_index = ~0;
+ vring->thread_index = ~0;
+ vring->mode = VNET_HW_IF_RX_MODE_POLLING;
clib_spinlock_init (&vring->vring_lock);
@@ -319,11 +345,16 @@ vhost_user_vring_close (vhost_user_intf_t * vui, u32 qid)
clib_spinlock_free (&vring->vring_lock);
- // save the qid so that we don't need to unassign and assign_rx_thread
- // when the interface comes back up. They are expensive calls.
+ // save the needed information in vrings prior to being wiped out
u16 q = vui->vrings[qid].qid;
+ u32 queue_index = vui->vrings[qid].queue_index;
+ u32 mode = vui->vrings[qid].mode;
+ u32 thread_index = vui->vrings[qid].thread_index;
vhost_user_vring_init (vui, qid);
vui->vrings[qid].qid = q;
+ vui->vrings[qid].queue_index = queue_index;
+ vui->vrings[qid].mode = mode;
+ vui->vrings[qid].thread_index = thread_index;
}
static_always_inline void
@@ -342,13 +373,38 @@ vhost_user_if_disconnect (vhost_user_intf_t * vui)
vui->is_ready = 0;
- for (q = 0; q < vui->num_qid; q++)
- vhost_user_vring_close (vui, q);
+ FOR_ALL_VHOST_RX_TXQ (q, vui) { vhost_user_vring_close (vui, q); }
unmap_all_mem_regions (vui);
vu_log_debug (vui, "interface ifindex %d disconnected", vui->sw_if_index);
}
+void
+vhost_user_set_operation_mode (vhost_user_intf_t *vui,
+ vhost_user_vring_t *txvq)
+{
+ if (vhost_user_is_packed_ring_supported (vui))
+ {
+ if (txvq->used_event)
+ {
+ if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
+ txvq->used_event->flags = VRING_EVENT_F_DISABLE;
+ else
+ txvq->used_event->flags = 0;
+ }
+ }
+ else
+ {
+ if (txvq->used)
+ {
+ if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
+ txvq->used->flags = VRING_USED_F_NO_NOTIFY;
+ else
+ txvq->used->flags = 0;
+ }
+ }
+}
+
static clib_error_t *
vhost_user_socket_read (clib_file_t * uf)
{
@@ -495,19 +551,20 @@ vhost_user_socket_read (clib_file_t * uf)
(vui->features & VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT)) ? 1 : 0;
ASSERT (vui->virtio_net_hdr_sz < VLIB_BUFFER_PRE_DATA_SIZE);
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vui->hw_if_index);
if (vui->enable_gso &&
((vui->features & FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS)
== FEATURE_VIRTIO_NET_F_HOST_GUEST_TSO_FEATURE_BITS))
{
- hw->caps |= (VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM);
+ vnet_hw_if_set_caps (vnm, vui->hw_if_index,
+ VNET_HW_IF_CAP_TCP_GSO |
+ VNET_HW_IF_CAP_TX_TCP_CKSUM |
+ VNET_HW_IF_CAP_TX_UDP_CKSUM);
}
else
{
- hw->caps &= ~(VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM);
+ vnet_hw_if_unset_caps (vnm, vui->hw_if_index,
+ VNET_HW_IF_CAP_TCP_GSO |
+ VNET_HW_IF_CAP_L4_TX_CKSUM);
}
vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
vui->is_ready = 0;
@@ -579,19 +636,19 @@ vhost_user_socket_read (clib_file_t * uf)
* Re-compute desc, used, and avail descriptor table if vring address
* is set.
*/
- for (q = 0; q < vui->num_qid; q++)
- {
- if (vui->vrings[q].desc_user_addr &&
- vui->vrings[q].used_user_addr && vui->vrings[q].avail_user_addr)
- {
- vui->vrings[q].desc =
- map_user_mem (vui, vui->vrings[q].desc_user_addr);
- vui->vrings[q].used =
- map_user_mem (vui, vui->vrings[q].used_user_addr);
- vui->vrings[q].avail =
- map_user_mem (vui, vui->vrings[q].avail_user_addr);
- }
- }
+ FOR_ALL_VHOST_RX_TXQ (q, vui)
+ {
+ if (vui->vrings[q].desc_user_addr && vui->vrings[q].used_user_addr &&
+ vui->vrings[q].avail_user_addr)
+ {
+ vui->vrings[q].desc =
+ map_user_mem (vui, vui->vrings[q].desc_user_addr);
+ vui->vrings[q].used =
+ map_user_mem (vui, vui->vrings[q].used_user_addr);
+ vui->vrings[q].avail =
+ map_user_mem (vui, vui->vrings[q].avail_user_addr);
+ }
+ }
vlib_worker_thread_barrier_release (vm);
break;
@@ -630,9 +687,12 @@ vhost_user_socket_read (clib_file_t * uf)
goto close_socket;
}
- vring_desc_t *desc = map_user_mem (vui, msg.addr.desc_user_addr);
- vring_used_t *used = map_user_mem (vui, msg.addr.used_user_addr);
- vring_avail_t *avail = map_user_mem (vui, msg.addr.avail_user_addr);
+ vnet_virtio_vring_desc_t *desc =
+ map_user_mem (vui, msg.addr.desc_user_addr);
+ vnet_virtio_vring_used_t *used =
+ map_user_mem (vui, msg.addr.used_user_addr);
+ vnet_virtio_vring_avail_t *avail =
+ map_user_mem (vui, msg.addr.avail_user_addr);
if ((desc == NULL) || (used == NULL) || (avail == NULL))
{
@@ -665,12 +725,8 @@ vhost_user_socket_read (clib_file_t * uf)
vui->vrings[msg.state.index].last_kick =
vui->vrings[msg.state.index].last_used_idx;
- /* tell driver that we don't want interrupts */
- if (vhost_user_is_packed_ring_supported (vui))
- vui->vrings[msg.state.index].used_event->flags =
- VRING_EVENT_F_DISABLE;
- else
- vui->vrings[msg.state.index].used->flags = VRING_USED_F_NO_NOTIFY;
+ /* tell driver that we want interrupts or not */
+ vhost_user_set_operation_mode (vui, &vui->vrings[msg.state.index]);
vlib_worker_thread_barrier_release (vm);
vhost_user_update_iface_state (vui);
break;
@@ -1135,12 +1191,10 @@ vhost_user_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (vhost_user_init) =
{
.runs_after = VLIB_INITS("ip4_init"),
};
-/* *INDENT-ON* */
static uword
vhost_user_send_interrupt_process (vlib_main_t * vm,
@@ -1180,42 +1234,32 @@ vhost_user_send_interrupt_process (vlib_main_t * vm,
case VHOST_USER_EVENT_START_TIMER:
stop_timer = 0;
+ timeout = 1e-3;
if (!vlib_process_suspend_time_is_zero (poll_time_remaining))
break;
/* fall through */
case ~0:
- /* *INDENT-OFF* */
pool_foreach (vui, vum->vhost_user_interfaces) {
next_timeout = timeout;
- for (qid = 0; qid < vui->num_qid / 2; qid += 2)
- {
- vhost_user_vring_t *rxvq = &vui->vrings[qid];
- vhost_user_vring_t *txvq = &vui->vrings[qid + 1];
-
- if (txvq->qid == -1)
- continue;
- if (txvq->n_since_last_int)
- {
- if (now >= txvq->int_deadline)
- vhost_user_send_call (vm, vui, txvq);
- else
- next_timeout = txvq->int_deadline - now;
- }
-
- if (rxvq->n_since_last_int)
- {
- if (now >= rxvq->int_deadline)
- vhost_user_send_call (vm, vui, rxvq);
- else
- next_timeout = rxvq->int_deadline - now;
- }
-
- if ((next_timeout < timeout) && (next_timeout > 0.0))
- timeout = next_timeout;
- }
+ FOR_ALL_VHOST_RX_TXQ (qid, vui)
+ {
+ vhost_user_vring_t *vq = &vui->vrings[qid];
+
+ if (vq->started == 0)
+ continue;
+ if (vq->n_since_last_int)
+ {
+ if (now >= vq->int_deadline)
+ vhost_user_send_call (vm, vui, vq);
+ else
+ next_timeout = vq->int_deadline - now;
+ }
+
+ if ((next_timeout < timeout) && (next_timeout > 0.0))
+ timeout = next_timeout;
+ }
}
- /* *INDENT-ON* */
break;
default:
@@ -1231,13 +1275,11 @@ vhost_user_send_interrupt_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vhost_user_send_interrupt_node) = {
.function = vhost_user_send_interrupt_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "vhost-user-send-interrupt-process",
};
-/* *INDENT-ON* */
static uword
vhost_user_process (vlib_main_t * vm,
@@ -1264,7 +1306,6 @@ vhost_user_process (vlib_main_t * vm,
timeout = 3.0;
- /* *INDENT-OFF* */
pool_foreach (vui, vum->vhost_user_interfaces) {
if (vui->unix_server_index == ~0) { //Nothing to do for server sockets
@@ -1336,18 +1377,15 @@ vhost_user_process (vlib_main_t * vm,
}
}
}
- /* *INDENT-ON* */
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vhost_user_process_node,static) = {
.function = vhost_user_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "vhost-user-process",
};
-/* *INDENT-ON* */
/**
* Disables and reset interface structure.
@@ -1364,10 +1402,8 @@ vhost_user_term_if (vhost_user_intf_t * vui)
vhost_user_update_gso_interface_count (vui, 0 /* delete */ );
vhost_user_update_iface_state (vui);
- for (q = 0; q < vui->num_qid; q++)
- {
- clib_spinlock_free (&vui->vrings[q].vring_lock);
- }
+ for (q = 0; q < vec_len (vui->vrings); q++)
+ clib_spinlock_free (&vui->vrings[q].vring_lock);
if (vui->unix_server_index != ~0)
{
@@ -1403,28 +1439,33 @@ vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm, u32 sw_if_index)
vu_log_debug (vui, "Deleting vhost-user interface %s (instance %d)",
hwif->name, hwif->dev_instance);
- for (qid = 1; qid < vui->num_qid / 2; qid += 2)
- {
- vhost_user_vring_t *txvq = &vui->vrings[qid];
+ FOR_ALL_VHOST_TXQ (qid, vui)
+ {
+ vhost_user_vring_t *txvq = &vui->vrings[qid];
- if (txvq->qid == -1)
- continue;
- if ((vum->ifq_count > 0) &&
- ((txvq->mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
- (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)))
- {
- vum->ifq_count--;
- // Stop the timer if there is no more interrupt interface/queue
- if ((vum->ifq_count == 0) &&
- (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
- {
- vlib_process_signal_event (vm,
- vhost_user_send_interrupt_node.index,
- VHOST_USER_EVENT_STOP_TIMER, 0);
- break;
- }
- }
- }
+ if ((txvq->mode == VNET_HW_IF_RX_MODE_POLLING) &&
+ (txvq->thread_index != ~0))
+ {
+ vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
+ ASSERT (cpu->polling_q_count != 0);
+ cpu->polling_q_count--;
+ }
+
+ if ((vum->ifq_count > 0) &&
+ ((txvq->mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
+ (txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)))
+ {
+ vum->ifq_count--;
+ // Stop the timer if there is no more interrupt interface/queue
+ if (vum->ifq_count == 0)
+ {
+ vlib_process_signal_event (vm,
+ vhost_user_send_interrupt_node.index,
+ VHOST_USER_EVENT_STOP_TIMER, 0);
+ break;
+ }
+ }
+ }
// Disable and reset interface
vhost_user_term_if (vui);
@@ -1454,11 +1495,9 @@ vhost_user_exit (vlib_main_t * vm)
vhost_user_intf_t *vui;
vlib_worker_thread_barrier_sync (vlib_get_main ());
- /* *INDENT-OFF* */
pool_foreach (vui, vum->vhost_user_interfaces) {
vhost_user_delete_if (vnm, vm, vui->sw_if_index);
}
- /* *INDENT-ON* */
vlib_worker_thread_barrier_release (vlib_get_main ());
return 0;
}
@@ -1514,8 +1553,8 @@ vhost_user_create_ethernet (vnet_main_t *vnm, vlib_main_t *vm,
vhost_user_create_if_args_t *args)
{
vhost_user_main_t *vum = &vhost_user_main;
+ vnet_eth_interface_registration_t eir = {};
u8 hwaddr[6];
- clib_error_t *error;
/* create hw and sw interface */
if (args->use_custom_mac)
@@ -1530,15 +1569,10 @@ vhost_user_create_ethernet (vnet_main_t *vnm, vlib_main_t *vm,
hwaddr[1] = 0xfe;
}
- error = ethernet_register_interface
- (vnm,
- vhost_user_device_class.index,
- vui - vum->vhost_user_interfaces /* device instance */ ,
- hwaddr /* ethernet address */ ,
- &vui->hw_if_index, 0 /* flag change */ );
-
- if (error)
- clib_error_report (error);
+ eir.dev_class_index = vhost_user_device_class.index;
+ eir.dev_instance = vui - vum->vhost_user_interfaces /* device instance */,
+ eir.address = hwaddr;
+ vui->hw_if_index = vnet_eth_register_interface (vnm, &eir);
}
/*
@@ -1552,9 +1586,7 @@ vhost_user_vui_init (vnet_main_t * vnm, vhost_user_intf_t * vui,
vnet_sw_interface_t *sw;
int q;
vhost_user_main_t *vum = &vhost_user_main;
- vnet_hw_interface_t *hw;
- hw = vnet_get_hw_interface (vnm, vui->hw_if_index);
sw = vnet_get_hw_sw_interface (vnm, vui->hw_if_index);
if (server_sock_fd != -1)
{
@@ -1607,15 +1639,11 @@ vhost_user_vui_init (vnet_main_t * vnm, vhost_user_intf_t * vui,
for (q = 0; q < vec_len (vui->vrings); q++)
vhost_user_vring_init (vui, q);
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+ vnet_hw_if_set_caps (vnm, vui->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
vnet_hw_interface_set_flags (vnm, vui->hw_if_index, 0);
if (sw_if_index)
*sw_if_index = vui->sw_if_index;
-
- vec_validate (vui->per_cpu_tx_qid,
- vlib_get_thread_main ()->n_vlib_mains - 1);
- vhost_user_tx_thread_placement (vui);
}
int
@@ -1889,7 +1917,8 @@ format_vhost_user_desc (u8 * s, va_list * args)
{
char *fmt = va_arg (*args, char *);
vhost_user_intf_t *vui = va_arg (*args, vhost_user_intf_t *);
- vring_desc_t *desc_table = va_arg (*args, vring_desc_t *);
+ vnet_virtio_vring_desc_t *desc_table =
+ va_arg (*args, vnet_virtio_vring_desc_t *);
int idx = va_arg (*args, int);
u32 *mem_hint = va_arg (*args, u32 *);
@@ -1918,13 +1947,15 @@ vhost_user_show_desc (vlib_main_t * vm, vhost_user_intf_t * vui, int q,
u32 mem_hint = 0;
u32 idx;
u32 n_entries;
- vring_desc_t *desc_table;
+ vnet_virtio_vring_desc_t *desc_table;
vhost_user_vring_t *vq = &vui->vrings[q];
if (vq->avail && vq->used)
- vlib_cli_output (vm, " avail.flags %x avail event idx %u avail.idx %d "
- "used event idx %u used.idx %d\n", vq->avail->flags,
- vhost_user_avail_event_idx (vq), vq->avail->idx,
+ vlib_cli_output (vm,
+ " avail.flags %x avail event idx %u avail.idx %d "
+ "used.flags %x used event idx %u used.idx %d\n",
+ vq->avail->flags, vhost_user_avail_event_idx (vq),
+ vq->avail->idx, vq->used->flags,
vhost_user_used_event_idx (vq), vq->used->idx);
vhost_user_show_fds (vm, vq);
@@ -1946,7 +1977,8 @@ vhost_user_show_desc (vlib_main_t * vm, vhost_user_intf_t * vui, int q,
desc_table, j, &mem_hint);
if (show_verbose && (desc_table[j].flags & VRING_DESC_F_INDIRECT))
{
- n_entries = desc_table[j].len / sizeof (vring_desc_t);
+ n_entries =
+ desc_table[j].len / sizeof (vnet_virtio_vring_desc_t);
desc_table = map_guest_mem (vui, desc_table[j].addr, &mem_hint);
if (desc_table)
{
@@ -1971,7 +2003,8 @@ format_vhost_user_packed_desc (u8 * s, va_list * args)
{
char *fmt = va_arg (*args, char *);
vhost_user_intf_t *vui = va_arg (*args, vhost_user_intf_t *);
- vring_packed_desc_t *desc_table = va_arg (*args, vring_packed_desc_t *);
+ vnet_virtio_vring_packed_desc_t *desc_table =
+ va_arg (*args, vnet_virtio_vring_packed_desc_t *);
int idx = va_arg (*args, int);
u32 *mem_hint = va_arg (*args, u32 *);
@@ -2013,7 +2046,7 @@ vhost_user_show_desc_packed (vlib_main_t * vm, vhost_user_intf_t * vui, int q,
u32 mem_hint = 0;
u32 idx;
u32 n_entries;
- vring_packed_desc_t *desc_table;
+ vnet_virtio_vring_packed_desc_t *desc_table;
vhost_user_vring_t *vq = &vui->vrings[q];
u16 off_wrap, event_idx;
@@ -2083,7 +2116,6 @@ show_vhost_user_command_fn (vlib_main_t * vm,
u32 hw_if_index, *hw_if_indices = 0;
vnet_hw_interface_t *hi;
u16 qid;
- u32 ci;
int i, j, q;
int show_descr = 0;
int show_verbose = 0;
@@ -2148,6 +2180,12 @@ show_vhost_user_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, " Number of rx virtqueues in interrupt mode: %d",
vum->ifq_count);
vlib_cli_output (vm, " Number of GSO interfaces: %d", vum->gso_count);
+ for (u32 tid = 0; tid <= vlib_num_workers (); tid++)
+ {
+ vhost_cpu_t *cpu = vec_elt_at_index (vum->cpus, tid);
+ vlib_cli_output (vm, " Thread %u: Polling queue count %u", tid,
+ cpu->polling_q_count);
+ }
for (i = 0; i < vec_len (hw_if_indices); i++)
{
@@ -2199,27 +2237,31 @@ show_vhost_user_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, " rx placement: ");
- for (qid = 1; qid < vui->num_qid / 2; qid += 2)
- {
- vnet_main_t *vnm = vnet_get_main ();
- uword thread_index;
- vhost_user_vring_t *txvq = &vui->vrings[qid];
-
- if (txvq->qid == -1)
- continue;
- thread_index =
- vnet_hw_if_get_rx_queue_thread_index (vnm, txvq->queue_index);
- vlib_cli_output (vm, " thread %d on vring %d, %U\n", thread_index,
- qid, format_vnet_hw_if_rx_mode, txvq->mode);
- }
+ FOR_ALL_VHOST_TXQ (qid, vui)
+ {
+ vhost_user_vring_t *txvq = &vui->vrings[qid];
+
+ if (txvq->qid == -1)
+ continue;
+ vlib_cli_output (vm, " thread %d on vring %d, %U\n",
+ txvq->thread_index, qid, format_vnet_hw_if_rx_mode,
+ txvq->mode);
+ }
- vlib_cli_output (vm, " tx placement: %s\n",
- vui->use_tx_spinlock ? "spin-lock" : "lock-free");
+ vlib_cli_output (vm, " tx placement\n");
- vec_foreach_index (ci, vui->per_cpu_tx_qid)
+ FOR_ALL_VHOST_RXQ (qid, vui)
{
- vlib_cli_output (vm, " thread %d on vring %d\n", ci,
- VHOST_VRING_IDX_RX (vui->per_cpu_tx_qid[ci]));
+ vhost_user_vring_t *rxvq = &vui->vrings[qid];
+ vnet_hw_if_tx_queue_t *txq;
+
+ if (rxvq->queue_index == ~0)
+ continue;
+ txq = vnet_hw_if_get_tx_queue (vnm, rxvq->queue_index);
+ if (txq->threads)
+ vlib_cli_output (vm, " threads %U on vring %u: %s\n",
+ format_bitmap_list, txq->threads, qid,
+ txq->shared_queue ? "spin-lock" : "lock-free");
}
vlib_cli_output (vm, "\n");
@@ -2244,29 +2286,29 @@ show_vhost_user_command_fn (vlib_main_t * vm,
vui->regions[j].mmap_offset,
pointer_to_uword (vui->region_mmap_addr[j]));
}
- for (q = 0; q < vui->num_qid; q++)
- {
- if (!vui->vrings[q].started)
- continue;
-
- vlib_cli_output (vm, "\n Virtqueue %d (%s%s)\n", q,
- (q & 1) ? "RX" : "TX",
- vui->vrings[q].enabled ? "" : " disabled");
-
- vlib_cli_output (vm,
- " qsz %d last_avail_idx %d last_used_idx %d"
- " last_kick %u\n",
- vui->vrings[q].qsz_mask + 1,
- vui->vrings[q].last_avail_idx,
- vui->vrings[q].last_used_idx,
- vui->vrings[q].last_kick);
-
- if (vhost_user_is_packed_ring_supported (vui))
- vhost_user_show_desc_packed (vm, vui, q, show_descr,
- show_verbose);
- else
- vhost_user_show_desc (vm, vui, q, show_descr, show_verbose);
- }
+ FOR_ALL_VHOST_RX_TXQ (q, vui)
+ {
+ if (!vui->vrings[q].started)
+ continue;
+
+ vlib_cli_output (vm, "\n Virtqueue %d (%s%s)\n", q,
+ (q & 1) ? "RX" : "TX",
+ vui->vrings[q].enabled ? "" : " disabled");
+ vlib_cli_output (vm, " global %s queue index %u\n",
+ (q & 1) ? "RX" : "TX", vui->vrings[q].queue_index);
+
+ vlib_cli_output (
+ vm,
+ " qsz %d last_avail_idx %d last_used_idx %d"
+ " last_kick %u\n",
+ vui->vrings[q].qsz_mask + 1, vui->vrings[q].last_avail_idx,
+ vui->vrings[q].last_used_idx, vui->vrings[q].last_kick);
+
+ if (vhost_user_is_packed_ring_supported (vui))
+ vhost_user_show_desc_packed (vm, vui, q, show_descr, show_verbose);
+ else
+ vhost_user_show_desc (vm, vui, q, show_descr, show_verbose);
+ }
vlib_cli_output (vm, "\n");
}
done:
@@ -2285,23 +2327,25 @@ done:
*
* There are several parameters associated with a vHost interface:
*
- * - <b>socket <socket-filename></b> - Name of the linux socket used by hypervisor
- * and VPP to manage the vHost interface. If in '<em>server</em>' mode, VPP will
- * create the socket if it does not already exist. If in '<em>client</em>' mode,
- * hypervisor will create the socket if it does not already exist. The VPP code
- * is indifferent to the file location. However, if SELinux is enabled, then the
- * socket needs to be created in '<em>/var/run/vpp/</em>'.
+ * - <b>socket <socket-filename></b> - Name of the linux socket used by
+ * hypervisor and VPP to manage the vHost interface. If in <em>server</em>
+ * mode, VPP will create the socket if it does not already exist. If in
+ * <em>client</em> mode, hypervisor will create the socket if it does not
+ * already exist. The VPP code is indifferent to the file location. However,
+ * if SELinux is enabled, then the socket needs to be created in
+ * <em>/var/run/vpp/</em>.
*
- * - <b>server</b> - Optional flag to indicate that VPP should be the server for
- * the linux socket. If not provided, VPP will be the client. In '<em>server</em>'
- * mode, the VM can be reset without tearing down the vHost Interface. In
- * '<em>client</em>' mode, VPP can be reset without bringing down the VM and
- * tearing down the vHost Interface.
+ * - <b>server</b> - Optional flag to indicate that VPP should be the server
+ * for the linux socket. If not provided, VPP will be the client. In
+ * <em>server</em> mode, the VM can be reset without tearing down the vHost
+ * Interface. In <em>client</em> mode, VPP can be reset without bringing down
+ * the VM and tearing down the vHost Interface.
*
- * - <b>feature-mask <hex></b> - Optional virtio/vhost feature set negotiated at
- * startup. <b>This is intended for degugging only.</b> It is recommended that this
- * parameter not be used except by experienced users. By default, all supported
- * features will be advertised. Otherwise, provide the set of features desired.
+ * - <b>feature-mask <hex></b> - Optional virtio/vhost feature set negotiated
+ * at startup. <b>This is intended for degugging only.</b> It is recommended
+ * that this parameter not be used except by experienced users. By default,
+ * all supported features will be advertised. Otherwise, provide the set of
+ * features desired.
* - 0x000008000 (15) - VIRTIO_NET_F_MRG_RXBUF
* - 0x000020000 (17) - VIRTIO_NET_F_CTRL_VQ
* - 0x000200000 (21) - VIRTIO_NET_F_GUEST_ANNOUNCE
@@ -2315,24 +2359,26 @@ done:
* - <b>hwaddr <mac-addr></b> - Optional ethernet address, can be in either
* X:X:X:X:X:X unix or X.X.X cisco format.
*
- * - <b>renumber <dev_instance></b> - Optional parameter which allows the instance
- * in the name to be specified. If instance already exists, name will be used
- * anyway and multiple instances will have the same name. Use with caution.
+ * - <b>renumber <dev_instance></b> - Optional parameter which allows the
+ * instance in the name to be specified. If instance already exists, name
+ * will be used anyway and multiple instances will have the same name. Use
+ * with caution.
*
* @cliexpar
- * Example of how to create a vhost interface with VPP as the client and all features enabled:
+ * Example of how to create a vhost interface with VPP as the client and all
+ * features enabled:
* @cliexstart{create vhost-user socket /var/run/vpp/vhost1.sock}
* VirtualEthernet0/0/0
* @cliexend
- * Example of how to create a vhost interface with VPP as the server and with just
- * multiple queues enabled:
- * @cliexstart{create vhost-user socket /var/run/vpp/vhost2.sock server feature-mask 0x40400000}
+ * Example of how to create a vhost interface with VPP as the server and with
+ * just multiple queues enabled:
+ * @cliexstart{create vhost-user socket /var/run/vpp/vhost2.sock server
+ * feature-mask 0x40400000}
* VirtualEthernet0/0/1
* @cliexend
* Once the vHost interface is created, enable the interface using:
* @cliexcmd{set interface state VirtualEthernet0/0/0 up}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vhost_user_connect_command, static) = {
.path = "create vhost-user",
.short_help = "create vhost-user socket <socket-filename> [server] "
@@ -2341,7 +2387,6 @@ VLIB_CLI_COMMAND (vhost_user_connect_command, static) = {
.function = vhost_user_connect_command_fn,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*?
* Delete a vHost User interface using the interface name or the
@@ -2355,7 +2400,6 @@ VLIB_CLI_COMMAND (vhost_user_connect_command, static) = {
* Example of how to delete a vhost interface by software interface index:
* @cliexcmd{delete vhost-user sw_if_index 1}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vhost_user_delete_command, static) = {
.path = "delete vhost-user",
.short_help = "delete vhost-user {<interface> | sw_if_index <sw_idx>}",
@@ -2364,9 +2408,9 @@ VLIB_CLI_COMMAND (vhost_user_delete_command, static) = {
/*?
* Display the attributes of a single vHost User interface (provide interface
- * name), multiple vHost User interfaces (provide a list of interface names seperated
- * by spaces) or all Vhost User interfaces (omit an interface name to display all
- * vHost interfaces).
+ * name), multiple vHost User interfaces (provide a list of interface names
+ * separated by spaces) or all Vhost User interfaces (omit an interface name
+ * to display all vHost interfaces).
*
* @cliexpar
* @parblock
@@ -2400,10 +2444,10 @@ VLIB_CLI_COMMAND (vhost_user_delete_command, static) = {
* thread 2 on vring 0
*
* Memory regions (total 2)
- * region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr
- * ====== ===== ================== ================== ================== ================== ==================
- * 0 60 0x0000000000000000 0x00000000000a0000 0x00002aaaaac00000 0x0000000000000000 0x00002aab2b400000
- * 1 61 0x00000000000c0000 0x000000003ff40000 0x00002aaaaacc0000 0x00000000000c0000 0x00002aababcc0000
+ * region fd guest_phys_addr memory_size userspace_addr mmap_offset mmap_addr
+ * ====== == =============== =========== ============== =========== ==========
+ * 0 60 0x00000000 0x000a0000 0xaac00000 0x00000000 0x2b400000
+ * 1 61 0x000c0000 0x3ff40000 0xaacc0000 0x000c0000 0xabcc0000
*
* Virtqueue 0 (TX)
* qsz 256 last_avail_idx 0 last_used_idx 0
@@ -2447,8 +2491,9 @@ VLIB_CLI_COMMAND (vhost_user_delete_command, static) = {
*
* @cliexend
*
- * The optional '<em>descriptors</em>' parameter will display the same output as
- * the previous example but will include the descriptor table for each queue.
+ * The optional '<em>descriptors</em>' parameter will display the same output
+ * as the previous example but will include the descriptor table for each
+ * queue.
* The output is truncated below:
* @cliexstart{show vhost-user VirtualEthernet0/0/0 descriptors}
* Virtio vhost-user interfaces
@@ -2495,14 +2540,12 @@ VLIB_CLI_COMMAND (vhost_user_delete_command, static) = {
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_vhost_user_command, static) = {
.path = "show vhost-user",
.short_help = "show vhost-user [<interface> [<interface> [..]]] "
"[[descriptors] [verbose]]",
.function = show_vhost_user_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
diff --git a/src/vnet/devices/virtio/vhost_user.h b/src/plugins/vhost/vhost_user.h
index 63a0bc0da48..a3582affb4b 100644
--- a/src/vnet/devices/virtio/vhost_user.h
+++ b/src/plugins/vhost/vhost_user.h
@@ -15,8 +15,8 @@
#ifndef __VIRTIO_VHOST_USER_H__
#define __VIRTIO_VHOST_USER_H__
-#include <vnet/devices/virtio/virtio_std.h>
-#include <vnet/devices/virtio/vhost_std.h>
+#include <vhost/virtio_std.h>
+#include <vhost/vhost_std.h>
/* vhost-user data structures */
@@ -30,8 +30,8 @@
* The max number for q pair is naturally 128.
*/
#define VHOST_VRING_MAX_MQ_PAIR_SZ 128
-#define VHOST_VRING_IDX_RX(qid) (2*qid)
-#define VHOST_VRING_IDX_TX(qid) (2*qid + 1)
+#define VHOST_VRING_IDX_RX(qid) (2 * (qid))
+#define VHOST_VRING_IDX_TX(qid) (2 * (qid) + 1)
#define VHOST_USER_VRING_NOFD_MASK 0x100
@@ -122,7 +122,6 @@ int vhost_user_modify_if (vnet_main_t * vnm, vlib_main_t * vm,
int vhost_user_delete_if (vnet_main_t * vnm, vlib_main_t * vm,
u32 sw_if_index);
-/* *INDENT-OFF* */
typedef struct vhost_user_memory_region
{
u64 guest_phys_addr;
@@ -175,7 +174,6 @@ typedef struct vhost_user_msg {
vhost_user_log_t log;
};
} __attribute ((packed)) vhost_user_msg_t;
-/* *INDENT-ON* */
typedef struct
{
@@ -186,18 +184,18 @@ typedef struct
u16 n_since_last_int;
union
{
- vring_desc_t *desc;
- vring_packed_desc_t *packed_desc;
+ vnet_virtio_vring_desc_t *desc;
+ vnet_virtio_vring_packed_desc_t *packed_desc;
};
union
{
- vring_avail_t *avail;
- vring_desc_event_t *avail_event;
+ vnet_virtio_vring_avail_t *avail;
+ vnet_virtio_vring_desc_event_t *avail_event;
};
union
{
- vring_used_t *used;
- vring_desc_event_t *used_event;
+ vnet_virtio_vring_used_t *used;
+ vnet_virtio_vring_desc_event_t *used_event;
};
uword desc_user_addr;
uword used_user_addr;
@@ -231,6 +229,7 @@ typedef struct
u16 last_kick;
u8 first_kick;
u32 queue_index;
+ u32 thread_index;
} vhost_user_vring_t;
#define VHOST_USER_EVENT_START_TIMER 1
@@ -278,10 +277,6 @@ typedef struct
void *log_base_addr;
u64 log_size;
- /* Whether to use spinlock or per_cpu_tx_qid assignment */
- u8 use_tx_spinlock;
- u16 *per_cpu_tx_qid;
-
u8 enable_gso;
/* Packed ring configured */
@@ -290,6 +285,12 @@ typedef struct
u8 enable_event_idx;
} vhost_user_intf_t;
+#define FOR_ALL_VHOST_TXQ(qid, vui) for (qid = 1; qid < vui->num_qid; qid += 2)
+
+#define FOR_ALL_VHOST_RXQ(qid, vui) for (qid = 0; qid < vui->num_qid; qid += 2)
+
+#define FOR_ALL_VHOST_RX_TXQ(qid, vui) for (qid = 0; qid < vui->num_qid; qid++)
+
typedef struct
{
uword dst;
@@ -303,7 +304,7 @@ typedef struct
u16 device_index; /** The device index */
u32 virtio_ring_flags; /** Runtime queue flags **/
u16 first_desc_len; /** Length of the first data descriptor **/
- virtio_net_hdr_mrg_rxbuf_t hdr; /** Virtio header **/
+ vnet_virtio_net_hdr_mrg_rxbuf_t hdr; /** Virtio header **/
} vhost_trace_t;
#define VHOST_USER_RX_BUFFERS_N (2 * VLIB_FRAME_SIZE + 2)
@@ -314,7 +315,7 @@ typedef struct
u32 rx_buffers_len;
u32 rx_buffers[VHOST_USER_RX_BUFFERS_N];
- virtio_net_hdr_mrg_rxbuf_t tx_headers[VLIB_FRAME_SIZE];
+ vnet_virtio_net_hdr_mrg_rxbuf_t tx_headers[VLIB_FRAME_SIZE];
vhost_copy_t copy[VHOST_USER_COPY_ARRAY_N];
/* This is here so it doesn't end-up
@@ -323,6 +324,7 @@ typedef struct
u32 *to_next_list;
vlib_buffer_t **rx_buffers_pdesc;
+ u32 polling_q_count;
} vhost_cpu_t;
typedef struct
@@ -365,6 +367,8 @@ typedef struct
int vhost_user_dump_ifs (vnet_main_t * vnm, vlib_main_t * vm,
vhost_user_intf_details_t ** out_vuids);
+void vhost_user_set_operation_mode (vhost_user_intf_t *vui,
+ vhost_user_vring_t *txvq);
extern vlib_node_registration_t vhost_user_send_interrupt_node;
extern vnet_device_class_t vhost_user_device_class;
diff --git a/src/vnet/devices/virtio/vhost_user_api.c b/src/plugins/vhost/vhost_user_api.c
index df6768d4cde..33447c556a8 100644
--- a/src/vnet/devices/virtio/vhost_user_api.c
+++ b/src/plugins/vhost/vhost_user_api.c
@@ -22,14 +22,13 @@
#include <vnet/interface.h>
#include <vnet/api_errno.h>
-#include <vnet/devices/virtio/vhost_user.h>
+#include <vhost/vhost_user.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/ethernet/ethernet_types_api.h>
-#include <vnet/devices/virtio/virtio_types_api.h>
#include <vnet/format_fns.h>
-#include <vnet/devices/virtio/vhost_user.api_enum.h>
-#include <vnet/devices/virtio/vhost_user.api_types.h>
+#include <vhost/vhost_user.api_enum.h>
+#include <vhost/vhost_user.api_types.h>
#define REPLY_MSG_ID_BASE msg_id_base
#include <vlibapi/api_helper_macros.h>
@@ -91,12 +90,10 @@ vl_api_create_vhost_user_if_t_handler (vl_api_create_vhost_user_if_t * mp)
}
}
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_CREATE_VHOST_USER_IF_REPLY,
({
rmp->sw_if_index = ntohl (args.sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -168,6 +165,7 @@ vl_api_create_vhost_user_if_v2_t_handler (vl_api_create_vhost_user_if_v2_t *
if (mp->use_custom_mac)
mac_address_decode (mp->mac_address, (mac_address_t *) args.hwaddr);
+ args.use_custom_mac = mp->use_custom_mac;
args.is_server = mp->is_server;
args.sock_filename = (char *) mp->sock_filename;
args.renumber = mp->renumber;
@@ -190,12 +188,10 @@ vl_api_create_vhost_user_if_v2_t_handler (vl_api_create_vhost_user_if_v2_t *
}
}
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_CREATE_VHOST_USER_IF_V2_REPLY,
({
rmp->sw_if_index = ntohl (args.sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -260,6 +256,13 @@ vl_api_delete_vhost_user_if_t_handler (vl_api_delete_vhost_user_if_t * mp)
}
static void
+vhost_user_features_encode (u64 features, u32 *first, u32 *last)
+{
+ *first = clib_net_to_host_u32 (features);
+ *last = clib_net_to_host_u32 (features >> 32);
+}
+
+static void
send_sw_interface_vhost_user_details (vpe_api_main_t * am,
vl_api_registration_t * reg,
vhost_user_intf_details_t * vui,
@@ -273,8 +276,8 @@ send_sw_interface_vhost_user_details (vpe_api_main_t * am,
ntohs (REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_VHOST_USER_DETAILS);
mp->sw_if_index = ntohl (vui->sw_if_index);
mp->virtio_net_hdr_sz = ntohl (vui->virtio_net_hdr_sz);
- virtio_features_encode (vui->features, (u32 *) & mp->features_first_32,
- (u32 *) & mp->features_last_32);
+ vhost_user_features_encode (vui->features, (u32 *) &mp->features_first_32,
+ (u32 *) &mp->features_last_32);
mp->is_server = vui->is_server;
mp->num_regions = ntohl (vui->num_regions);
mp->sock_errno = ntohl (vui->sock_errno);
@@ -323,20 +326,23 @@ static void
vec_free (ifaces);
}
-#include <vnet/devices/virtio/vhost_user.api.c>
+#include <vhost/vhost_user.api.c>
static clib_error_t *
vhost_user_api_hookup (vlib_main_t * vm)
{
api_main_t *am = vlibapi_get_main ();
- /* Mark CREATE_VHOST_USER_IF as mp safe */
- am->is_mp_safe[VL_API_CREATE_VHOST_USER_IF] = 1;
- am->is_mp_safe[VL_API_CREATE_VHOST_USER_IF_V2] = 1;
/*
* Set up the (msg_name, crc, message-id) table
*/
REPLY_MSG_ID_BASE = setup_message_id_table ();
+ /* Mark CREATE_VHOST_USER_IF as mp safe */
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_CREATE_VHOST_USER_IF, 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_CREATE_VHOST_USER_IF_V2, 1);
+
return 0;
}
diff --git a/src/vnet/devices/virtio/vhost_user_inline.h b/src/plugins/vhost/vhost_user_inline.h
index 5297453c317..e27f819e96d 100644
--- a/src/vnet/devices/virtio/vhost_user_inline.h
+++ b/src/plugins/vhost/vhost_user_inline.h
@@ -135,13 +135,11 @@ vhost_map_guest_mem_done:
}
}
#endif
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (el) =
{
.format = "failed to map guest mem addr %lx",
.format_args = "i8",
};
- /* *INDENT-ON* */
struct
{
uword addr;
@@ -199,12 +197,15 @@ vhost_user_log_dirty_pages_2 (vhost_user_intf_t * vui,
}
}
-
-#define vhost_user_log_dirty_ring(vui, vq, member) \
- if (PREDICT_FALSE(vq->log_used)) { \
- vhost_user_log_dirty_pages_2(vui, vq->log_guest_addr + STRUCT_OFFSET_OF(vring_used_t, member), \
- sizeof(vq->used->member), 0); \
- }
+#define vhost_user_log_dirty_ring(vui, vq, member) \
+ if (PREDICT_FALSE (vq->log_used)) \
+ { \
+ vhost_user_log_dirty_pages_2 ( \
+ vui, \
+ vq->log_guest_addr + \
+ STRUCT_OFFSET_OF (vnet_virtio_vring_used_t, member), \
+ sizeof (vq->used->member), 0); \
+ }
static_always_inline u8 *
format_vhost_trace (u8 * s, va_list * va)
@@ -425,7 +426,7 @@ vhost_user_advance_last_avail_table_idx (vhost_user_intf_t * vui,
{
if (chained)
{
- vring_packed_desc_t *desc_table = vring->packed_desc;
+ vnet_virtio_vring_packed_desc_t *desc_table = vring->packed_desc;
/* pick up the slot of the next avail idx */
while (desc_table[vring->last_avail_idx & vring->qsz_mask].flags &
@@ -449,9 +450,9 @@ vhost_user_undo_advanced_last_avail_idx (vhost_user_vring_t * vring)
}
static_always_inline void
-vhost_user_dequeue_descs (vhost_user_vring_t * rxvq,
- virtio_net_hdr_mrg_rxbuf_t * hdr,
- u16 * n_descs_processed)
+vhost_user_dequeue_descs (vhost_user_vring_t *rxvq,
+ vnet_virtio_net_hdr_mrg_rxbuf_t *hdr,
+ u16 *n_descs_processed)
{
u16 i;
diff --git a/src/vnet/devices/virtio/vhost_user_input.c b/src/plugins/vhost/vhost_user_input.c
index bdb3d27245b..ca5072485ff 100644
--- a/src/vnet/devices/virtio/vhost_user_input.c
+++ b/src/plugins/vhost/vhost_user_input.c
@@ -37,10 +37,11 @@
#include <vnet/devices/devices.h>
#include <vnet/feature/feature.h>
#include <vnet/udp/udp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
#include <vnet/interface/rx_queue_funcs.h>
-#include <vnet/devices/virtio/vhost_user.h>
-#include <vnet/devices/virtio/vhost_user_inline.h>
+#include <vhost/vhost_user.h>
+#include <vhost/vhost_user_inline.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
@@ -101,8 +102,8 @@ vhost_user_rx_trace (vhost_trace_t * t,
{
vhost_user_main_t *vum = &vhost_user_main;
u32 desc_current = txvq->avail->ring[last_avail_idx & txvq->qsz_mask];
- vring_desc_t *hdr_desc = 0;
- virtio_net_hdr_mrg_rxbuf_t *hdr;
+ vnet_virtio_vring_desc_t *hdr_desc = 0;
+ vnet_virtio_net_hdr_mrg_rxbuf_t *hdr;
u32 hint = 0;
clib_memset (t, 0, sizeof (*t));
@@ -248,8 +249,8 @@ vhost_user_input_rewind_buffers (vlib_main_t * vm,
}
static_always_inline void
-vhost_user_handle_rx_offload (vlib_buffer_t * b0, u8 * b0_data,
- virtio_net_hdr_t * hdr)
+vhost_user_handle_rx_offload (vlib_buffer_t *b0, u8 *b0_data,
+ vnet_virtio_net_hdr_t *hdr)
{
u8 l4_hdr_sz = 0;
u8 l4_proto = 0;
@@ -516,7 +517,7 @@ vhost_user_if_input (vlib_main_t *vm, vhost_user_main_t *vum,
u32 bi_current;
u16 desc_current;
u32 desc_data_offset;
- vring_desc_t *desc_table = txvq->desc;
+ vnet_virtio_vring_desc_t *desc_table = txvq->desc;
if (PREDICT_FALSE (cpu->rx_buffers_len <= 1))
{
@@ -579,7 +580,7 @@ vhost_user_if_input (vlib_main_t *vm, vhost_user_main_t *vum,
if (enable_csum)
{
- virtio_net_hdr_mrg_rxbuf_t *hdr;
+ vnet_virtio_net_hdr_mrg_rxbuf_t *hdr;
u8 *b_data;
u16 current;
@@ -768,7 +769,7 @@ vhost_user_mark_desc_consumed (vhost_user_intf_t * vui,
vhost_user_vring_t * txvq, u16 desc_head,
u16 n_descs_processed)
{
- vring_packed_desc_t *desc_table = txvq->packed_desc;
+ vnet_virtio_vring_packed_desc_t *desc_table = txvq->packed_desc;
u16 desc_idx;
u16 mask = txvq->qsz_mask;
@@ -790,8 +791,8 @@ vhost_user_rx_trace_packed (vhost_trace_t * t, vhost_user_intf_t * vui,
u16 desc_current)
{
vhost_user_main_t *vum = &vhost_user_main;
- vring_packed_desc_t *hdr_desc;
- virtio_net_hdr_mrg_rxbuf_t *hdr;
+ vnet_virtio_vring_packed_desc_t *hdr_desc;
+ vnet_virtio_net_hdr_mrg_rxbuf_t *hdr;
u32 hint = 0;
clib_memset (t, 0, sizeof (*t));
@@ -922,12 +923,13 @@ one_by_one:
}
static_always_inline u32
-vhost_user_do_offload (vhost_user_intf_t * vui,
- vring_packed_desc_t * desc_table, u16 desc_current,
- u16 mask, vlib_buffer_t * b_head, u32 * map_hint)
+vhost_user_do_offload (vhost_user_intf_t *vui,
+ vnet_virtio_vring_packed_desc_t *desc_table,
+ u16 desc_current, u16 mask, vlib_buffer_t *b_head,
+ u32 *map_hint)
{
u32 rc = VHOST_USER_INPUT_FUNC_ERROR_NO_ERROR;
- virtio_net_hdr_mrg_rxbuf_t *hdr;
+ vnet_virtio_net_hdr_mrg_rxbuf_t *hdr;
u8 *b_data;
u32 desc_data_offset = vui->virtio_net_hdr_sz;
@@ -988,7 +990,7 @@ vhost_user_compute_indirect_desc_len (vhost_user_intf_t * vui,
u32 buffer_data_size, u16 desc_current,
u32 * map_hint)
{
- vring_packed_desc_t *desc_table = txvq->packed_desc;
+ vnet_virtio_vring_packed_desc_t *desc_table = txvq->packed_desc;
u32 desc_len = 0;
u16 desc_data_offset = vui->virtio_net_hdr_sz;
u16 desc_idx = desc_current;
@@ -1014,7 +1016,7 @@ vhost_user_compute_chained_desc_len (vhost_user_intf_t * vui,
u32 buffer_data_size, u16 * current,
u16 * n_left)
{
- vring_packed_desc_t *desc_table = txvq->packed_desc;
+ vnet_virtio_vring_packed_desc_t *desc_table = txvq->packed_desc;
u32 desc_len = 0;
u16 mask = txvq->qsz_mask;
@@ -1037,14 +1039,13 @@ vhost_user_compute_chained_desc_len (vhost_user_intf_t * vui,
}
static_always_inline void
-vhost_user_assemble_packet (vring_packed_desc_t * desc_table,
- u16 * desc_idx, vlib_buffer_t * b_head,
- vlib_buffer_t ** b_current, u32 ** next,
- vlib_buffer_t *** b, u32 * bi_current,
- vhost_cpu_t * cpu, u16 * copy_len,
- u32 * buffers_used, u32 buffers_required,
- u32 * desc_data_offset, u32 buffer_data_size,
- u16 mask)
+vhost_user_assemble_packet (vnet_virtio_vring_packed_desc_t *desc_table,
+ u16 *desc_idx, vlib_buffer_t *b_head,
+ vlib_buffer_t **b_current, u32 **next,
+ vlib_buffer_t ***b, u32 *bi_current,
+ vhost_cpu_t *cpu, u16 *copy_len, u32 *buffers_used,
+ u32 buffers_required, u32 *desc_data_offset,
+ u32 buffer_data_size, u16 mask)
{
u32 desc_data_l;
@@ -1107,7 +1108,7 @@ vhost_user_if_input_packed (vlib_main_t *vm, vhost_user_main_t *vum,
u32 current_config_index = ~0;
u16 mask = txvq->qsz_mask;
u16 desc_current, desc_head, last_used_idx;
- vring_packed_desc_t *desc_table = 0;
+ vnet_virtio_vring_packed_desc_t *desc_table = 0;
u32 n_descs_processed = 0;
u32 rv;
vlib_buffer_t **b;
@@ -1446,7 +1447,6 @@ VLIB_NODE_FN (vhost_user_input_node) (vlib_main_t * vm,
return n_rx_packets;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vhost_user_input_node) = {
.type = VLIB_NODE_TYPE_INPUT,
.name = "vhost-user-input",
@@ -1462,7 +1462,6 @@ VLIB_REGISTER_NODE (vhost_user_input_node) = {
.n_errors = VHOST_USER_INPUT_FUNC_N_ERROR,
.error_strings = vhost_user_input_func_error_strings,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/devices/virtio/vhost_user_output.c b/src/plugins/vhost/vhost_user_output.c
index 40faefadb42..58fd4309f8c 100644
--- a/src/vnet/devices/virtio/vhost_user_output.c
+++ b/src/plugins/vhost/vhost_user_output.c
@@ -37,9 +37,10 @@
#include <vnet/ethernet/ethernet.h>
#include <vnet/devices/devices.h>
#include <vnet/feature/feature.h>
+#include <vnet/ip/ip_psh_cksum.h>
-#include <vnet/devices/virtio/vhost_user.h>
-#include <vnet/devices/virtio/vhost_user_inline.h>
+#include <vhost/vhost_user.h>
+#include <vhost/vhost_user_inline.h>
#include <vnet/gso/hdr_offset_parser.h>
/*
@@ -118,24 +119,6 @@ vhost_user_name_renumber (vnet_hw_interface_t * hi, u32 new_dev_instance)
return 0;
}
-/**
- * @brief Spin until the vring is successfully locked
- */
-static_always_inline void
-vhost_user_vring_lock (vhost_user_intf_t * vui, u32 qid)
-{
- clib_spinlock_lock_if_init (&vui->vrings[qid].vring_lock);
-}
-
-/**
- * @brief Unlock the vring lock
- */
-static_always_inline void
-vhost_user_vring_unlock (vhost_user_intf_t * vui, u32 qid)
-{
- clib_spinlock_unlock_if_init (&vui->vrings[qid].vring_lock);
-}
-
static_always_inline void
vhost_user_tx_trace (vhost_trace_t * t,
vhost_user_intf_t * vui, u16 qid,
@@ -144,7 +127,7 @@ vhost_user_tx_trace (vhost_trace_t * t,
vhost_user_main_t *vum = &vhost_user_main;
u32 last_avail_idx = rxvq->last_avail_idx;
u32 desc_current = rxvq->avail->ring[last_avail_idx & rxvq->qsz_mask];
- vring_desc_t *hdr_desc = 0;
+ vnet_virtio_vring_desc_t *hdr_desc = 0;
u32 hint = 0;
clib_memset (t, 0, sizeof (*t));
@@ -219,34 +202,47 @@ vhost_user_tx_copy (vhost_user_intf_t * vui, vhost_copy_t * cpy,
}
static_always_inline void
-vhost_user_handle_tx_offload (vhost_user_intf_t * vui, vlib_buffer_t * b,
- virtio_net_hdr_t * hdr)
+vhost_user_handle_tx_offload (vhost_user_intf_t *vui, vlib_buffer_t *b,
+ vnet_virtio_net_hdr_t *hdr)
{
generic_header_offset_t gho = { 0 };
int is_ip4 = b->flags & VNET_BUFFER_F_IS_IP4;
int is_ip6 = b->flags & VNET_BUFFER_F_IS_IP6;
vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
+ u16 psh_cksum = 0;
+ ip4_header_t *ip4 = 0;
+ ip6_header_t *ip6 = 0;
ASSERT (!(is_ip4 && is_ip6));
vnet_generic_header_offset_parser (b, &gho, 1 /* l2 */ , is_ip4, is_ip6);
if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
{
- ip4_header_t *ip4;
-
ip4 =
(ip4_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
ip4->checksum = ip4_header_checksum (ip4);
+ psh_cksum = ip4_pseudo_header_cksum (ip4);
+ }
+ else
+ {
+ ip6 = (ip6_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
+ psh_cksum = ip6_pseudo_header_cksum (ip6);
}
/* checksum offload */
if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
{
+ udp_header_t *udp =
+ (udp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
+ udp->checksum = psh_cksum;
hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
hdr->csum_start = gho.l4_hdr_offset;
hdr->csum_offset = offsetof (udp_header_t, checksum);
}
else if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
{
+ tcp_header_t *tcp =
+ (tcp_header_t *) (vlib_buffer_get_current (b) + gho.l4_hdr_offset);
+ tcp->checksum = psh_cksum;
hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
hdr->csum_start = gho.l4_hdr_offset;
hdr->csum_offset = offsetof (tcp_header_t, checksum);
@@ -286,7 +282,7 @@ vhost_user_mark_desc_available (vlib_main_t * vm, vhost_user_intf_t * vui,
vlib_frame_t * frame, u32 n_left)
{
u16 desc_idx, flags;
- vring_packed_desc_t *desc_table = rxvq->packed_desc;
+ vnet_virtio_vring_packed_desc_t *desc_table = rxvq->packed_desc;
u16 last_used_idx = rxvq->last_used_idx;
if (PREDICT_FALSE (*n_descs_processed == 0))
@@ -318,7 +314,7 @@ vhost_user_mark_desc_available (vlib_main_t * vm, vhost_user_intf_t * vui,
if (chained)
{
- vring_packed_desc_t *desc_table = rxvq->packed_desc;
+ vnet_virtio_vring_packed_desc_t *desc_table = rxvq->packed_desc;
while (desc_table[rxvq->last_used_idx & rxvq->qsz_mask].flags &
VRING_DESC_F_NEXT)
@@ -348,7 +344,7 @@ vhost_user_tx_trace_packed (vhost_trace_t * t, vhost_user_intf_t * vui,
vhost_user_main_t *vum = &vhost_user_main;
u32 last_avail_idx = rxvq->last_avail_idx;
u32 desc_current = last_avail_idx & rxvq->qsz_mask;
- vring_packed_desc_t *hdr_desc = 0;
+ vnet_virtio_vring_packed_desc_t *hdr_desc = 0;
u32 hint = 0;
clib_memset (t, 0, sizeof (*t));
@@ -377,17 +373,14 @@ vhost_user_tx_trace_packed (vhost_trace_t * t, vhost_user_intf_t * vui,
}
static_always_inline uword
-vhost_user_device_class_packed (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+vhost_user_device_class_packed (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, vhost_user_intf_t *vui,
+ vhost_user_vring_t *rxvq)
{
u32 *buffers = vlib_frame_vector_args (frame);
u32 n_left = frame->n_vectors;
vhost_user_main_t *vum = &vhost_user_main;
- vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
- vhost_user_intf_t *vui =
- pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance);
- u32 qid;
- vhost_user_vring_t *rxvq;
+ u32 qid = rxvq->qid;
u8 error;
u32 thread_index = vm->thread_index;
vhost_cpu_t *cpu = &vum->cpus[thread_index];
@@ -395,16 +388,12 @@ vhost_user_device_class_packed (vlib_main_t * vm, vlib_node_runtime_t * node,
u8 retry = 8;
u16 copy_len;
u16 tx_headers_len;
- vring_packed_desc_t *desc_table;
+ vnet_virtio_vring_packed_desc_t *desc_table;
u32 or_flags;
u16 desc_head, desc_index, desc_len;
u16 n_descs_processed;
u8 indirect, chained;
- qid = VHOST_VRING_IDX_RX (*vec_elt_at_index (vui->per_cpu_tx_qid,
- thread_index));
- rxvq = &vui->vrings[qid];
-
retry:
error = VHOST_USER_TX_FUNC_ERROR_NONE;
tx_headers_len = 0;
@@ -449,7 +438,7 @@ retry:
{
indirect = 1;
if (PREDICT_FALSE (desc_table[desc_head].len <
- sizeof (vring_packed_desc_t)))
+ sizeof (vnet_virtio_vring_packed_desc_t)))
{
error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
goto done;
@@ -472,7 +461,7 @@ retry:
buffer_len = desc_table[desc_index].len;
/* Get a header from the header array */
- virtio_net_hdr_mrg_rxbuf_t *hdr = &cpu->tx_headers[tx_headers_len];
+ vnet_virtio_net_hdr_mrg_rxbuf_t *hdr = &cpu->tx_headers[tx_headers_len];
tx_headers_len++;
hdr->hdr.flags = 0;
hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
@@ -556,7 +545,7 @@ retry:
* MRG is available
* This is the default setting for the guest VM
*/
- virtio_net_hdr_mrg_rxbuf_t *hdr =
+ vnet_virtio_net_hdr_mrg_rxbuf_t *hdr =
&cpu->tx_headers[tx_headers_len - 1];
desc_table[desc_index].len = desc_len;
@@ -682,7 +671,7 @@ done:
goto retry;
}
- vhost_user_vring_unlock (vui, qid);
+ clib_spinlock_unlock (&rxvq->vring_lock);
if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
{
@@ -706,7 +695,7 @@ VNET_DEVICE_CLASS_TX_FN (vhost_user_device_class) (vlib_main_t * vm,
vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
vhost_user_intf_t *vui =
pool_elt_at_index (vum->vhost_user_interfaces, rd->dev_instance);
- u32 qid = ~0;
+ u32 qid;
vhost_user_vring_t *rxvq;
u8 error;
u32 thread_index = vm->thread_index;
@@ -716,6 +705,7 @@ VNET_DEVICE_CLASS_TX_FN (vhost_user_device_class) (vlib_main_t * vm,
u16 copy_len;
u16 tx_headers_len;
u32 or_flags;
+ vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
if (PREDICT_FALSE (!vui->admin_up))
{
@@ -729,20 +719,20 @@ VNET_DEVICE_CLASS_TX_FN (vhost_user_device_class) (vlib_main_t * vm,
goto done3;
}
- qid = VHOST_VRING_IDX_RX (*vec_elt_at_index (vui->per_cpu_tx_qid,
- thread_index));
+ qid = VHOST_VRING_IDX_RX (tf->queue_id);
rxvq = &vui->vrings[qid];
+ ASSERT (tf->queue_id == rxvq->qid);
+
if (PREDICT_FALSE (rxvq->avail == 0))
{
error = VHOST_USER_TX_FUNC_ERROR_MMAP_FAIL;
goto done3;
}
-
- if (PREDICT_FALSE (vui->use_tx_spinlock))
- vhost_user_vring_lock (vui, qid);
+ if (tf->shared_queue)
+ clib_spinlock_lock (&rxvq->vring_lock);
if (vhost_user_is_packed_ring_supported (vui))
- return (vhost_user_device_class_packed (vm, node, frame));
+ return (vhost_user_device_class_packed (vm, node, frame, vui, rxvq));
retry:
error = VHOST_USER_TX_FUNC_ERROR_NONE;
@@ -752,7 +742,7 @@ retry:
{
vlib_buffer_t *b0, *current_b0;
u16 desc_head, desc_index, desc_len;
- vring_desc_t *desc_table;
+ vnet_virtio_vring_desc_t *desc_table;
uword buffer_map_addr;
u32 buffer_len;
u16 bytes_left;
@@ -783,8 +773,8 @@ retry:
* I don't know of any driver providing indirect for RX. */
if (PREDICT_FALSE (rxvq->desc[desc_head].flags & VRING_DESC_F_INDIRECT))
{
- if (PREDICT_FALSE
- (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
+ if (PREDICT_FALSE (rxvq->desc[desc_head].len <
+ sizeof (vnet_virtio_vring_desc_t)))
{
error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
goto done;
@@ -806,7 +796,8 @@ retry:
{
// Get a header from the header array
- virtio_net_hdr_mrg_rxbuf_t *hdr = &cpu->tx_headers[tx_headers_len];
+ vnet_virtio_net_hdr_mrg_rxbuf_t *hdr =
+ &cpu->tx_headers[tx_headers_len];
tx_headers_len++;
hdr->hdr.flags = 0;
hdr->hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
@@ -845,7 +836,7 @@ retry:
}
else if (vui->virtio_net_hdr_sz == 12) //MRG is available
{
- virtio_net_hdr_mrg_rxbuf_t *hdr =
+ vnet_virtio_net_hdr_mrg_rxbuf_t *hdr =
&cpu->tx_headers[tx_headers_len - 1];
//Move from available to used buffer
@@ -880,8 +871,8 @@ retry:
{
//It is seriously unlikely that a driver will put indirect descriptor
//after non-indirect descriptor.
- if (PREDICT_FALSE
- (rxvq->desc[desc_head].len < sizeof (vring_desc_t)))
+ if (PREDICT_FALSE (rxvq->desc[desc_head].len <
+ sizeof (vnet_virtio_vring_desc_t)))
{
error = VHOST_USER_TX_FUNC_ERROR_INDIRECT_OVERFLOW;
goto done;
@@ -1020,7 +1011,7 @@ done:
vhost_user_send_call (vm, vui, rxvq);
}
- vhost_user_vring_unlock (vui, qid);
+ clib_spinlock_unlock (&rxvq->vring_lock);
done3:
if (PREDICT_FALSE (n_left && error != VHOST_USER_TX_FUNC_ERROR_NONE))
@@ -1046,7 +1037,24 @@ vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
vhost_user_intf_t *vui =
pool_elt_at_index (vum->vhost_user_interfaces, hif->dev_instance);
vhost_user_vring_t *txvq = &vui->vrings[VHOST_VRING_IDX_TX (qid)];
+ vhost_cpu_t *cpu;
+
+ if (mode == txvq->mode)
+ return 0;
+
+ if ((mode != VNET_HW_IF_RX_MODE_POLLING) &&
+ (mode != VNET_HW_IF_RX_MODE_ADAPTIVE) &&
+ (mode != VNET_HW_IF_RX_MODE_INTERRUPT))
+ {
+ vu_log_err (vui, "unhandled mode %d changed for if %d queue %d", mode,
+ hw_if_index, qid);
+ return clib_error_return (0, "unsupported");
+ }
+ if (txvq->thread_index == ~0)
+ return clib_error_return (0, "Queue initialization is not finished yet");
+
+ cpu = vec_elt_at_index (vum->cpus, txvq->thread_index);
if ((mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
(mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
{
@@ -1057,11 +1065,14 @@ vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
}
if (txvq->mode == VNET_HW_IF_RX_MODE_POLLING)
{
+ ASSERT (cpu->polling_q_count != 0);
+ if (cpu->polling_q_count)
+ cpu->polling_q_count--;
vum->ifq_count++;
// Start the timer if this is the first encounter on interrupt
// interface/queue
if ((vum->ifq_count == 1) &&
- (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
+ ((vum->coalesce_time > 0.0) || (vum->coalesce_frames > 0)))
vlib_process_signal_event (vm,
vhost_user_send_interrupt_node.index,
VHOST_USER_EVENT_START_TIMER, 0);
@@ -1072,10 +1083,10 @@ vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
if (((txvq->mode == VNET_HW_IF_RX_MODE_INTERRUPT) ||
(txvq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)) && vum->ifq_count)
{
+ cpu->polling_q_count++;
vum->ifq_count--;
// Stop the timer if there is no more interrupt interface/queue
- if ((vum->ifq_count == 0) &&
- (vum->coalesce_time > 0.0) && (vum->coalesce_frames > 0))
+ if (vum->ifq_count == 0)
vlib_process_signal_event (vm,
vhost_user_send_interrupt_node.index,
VHOST_USER_EVENT_STOP_TIMER, 0);
@@ -1083,17 +1094,7 @@ vhost_user_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index,
}
txvq->mode = mode;
- if (mode == VNET_HW_IF_RX_MODE_POLLING)
- txvq->used->flags = VRING_USED_F_NO_NOTIFY;
- else if ((mode == VNET_HW_IF_RX_MODE_ADAPTIVE) ||
- (mode == VNET_HW_IF_RX_MODE_INTERRUPT))
- txvq->used->flags = 0;
- else
- {
- vu_log_err (vui, "unhandled mode %d changed for if %d queue %d", mode,
- hw_if_index, qid);
- return clib_error_return (0, "unsupported");
- }
+ vhost_user_set_operation_mode (vui, txvq);
return 0;
}
@@ -1121,7 +1122,6 @@ vhost_user_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
return /* no error */ 0;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (vhost_user_device_class) = {
.name = "vhost-user",
.tx_function_n_errors = VHOST_USER_TX_FUNC_N_ERROR,
@@ -1133,7 +1133,6 @@ VNET_DEVICE_CLASS (vhost_user_device_class) = {
.format_tx_trace = format_vhost_trace,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/vhost/virtio_std.h b/src/plugins/vhost/virtio_std.h
new file mode 100644
index 00000000000..fa826933a9c
--- /dev/null
+++ b/src/plugins/vhost/virtio_std.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __VIRTIO_STD_H__
+#define __VIRTIO_STD_H__
+
+#define foreach_virtio_net_features \
+ _ (VIRTIO_NET_F_CSUM, 0) /* Host handles pkts w/ partial csum */ \
+ _ (VIRTIO_NET_F_GUEST_CSUM, 1) /* Guest handles pkts w/ partial csum */ \
+ _ (VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
+ 2) /* Dynamic offload configuration. */ \
+ _ (VIRTIO_NET_F_MTU, 3) /* Initial MTU advice. */ \
+ _ (VIRTIO_NET_F_MAC, 5) /* Host has given MAC address. */ \
+ _ (VIRTIO_NET_F_GSO, 6) /* Host handles pkts w/ any GSO. */ \
+ _ (VIRTIO_NET_F_GUEST_TSO4, 7) /* Guest can handle TSOv4 in. */ \
+ _ (VIRTIO_NET_F_GUEST_TSO6, 8) /* Guest can handle TSOv6 in. */ \
+ _ (VIRTIO_NET_F_GUEST_ECN, 9) /* Guest can handle TSO[6] w/ ECN in. */ \
+ _ (VIRTIO_NET_F_GUEST_UFO, 10) /* Guest can handle UFO in. */ \
+ _ (VIRTIO_NET_F_HOST_TSO4, 11) /* Host can handle TSOv4 in. */ \
+ _ (VIRTIO_NET_F_HOST_TSO6, 12) /* Host can handle TSOv6 in. */ \
+ _ (VIRTIO_NET_F_HOST_ECN, 13) /* Host can handle TSO[6] w/ ECN in. */ \
+ _ (VIRTIO_NET_F_HOST_UFO, 14) /* Host can handle UFO in. */ \
+ _ (VIRTIO_NET_F_MRG_RXBUF, 15) /* Host can merge receive buffers. */ \
+ _ (VIRTIO_NET_F_STATUS, 16) /* virtio_net_config.status available */ \
+ _ (VIRTIO_NET_F_CTRL_VQ, 17) /* Control channel available */ \
+ _ (VIRTIO_NET_F_CTRL_RX, 18) /* Control channel RX mode support */ \
+ _ (VIRTIO_NET_F_CTRL_VLAN, 19) /* Control channel VLAN filtering */ \
+ _ (VIRTIO_NET_F_CTRL_RX_EXTRA, 20) /* Extra RX mode control support */ \
+ _ (VIRTIO_NET_F_GUEST_ANNOUNCE, \
+ 21) /* Guest can announce device on the network */ \
+ _ (VIRTIO_NET_F_MQ, 22) /* Device supports Receive Flow Steering */ \
+ _ (VIRTIO_NET_F_CTRL_MAC_ADDR, 23) /* Set MAC address */ \
+ _ (VIRTIO_F_NOTIFY_ON_EMPTY, 24) \
+ _ (VHOST_F_LOG_ALL, 26) /* Log all write descriptors */ \
+ _ (VIRTIO_F_ANY_LAYOUT, \
+ 27) /* Can the device handle any descriptor layout */ \
+ _ (VIRTIO_RING_F_INDIRECT_DESC, \
+ 28) /* Support indirect buffer descriptors */ \
+ _ (VIRTIO_RING_F_EVENT_IDX, \
+ 29) /* The Guest publishes the used index for which it expects an \
+ * interrupt at the end of the avail ring. Host should ignore the \
+ * avail->flags field. */ \
+ /* The Host publishes the avail index for which it expects a kick \
+ * at the end of the used ring. Guest should ignore the used->flags field. \
+ */ \
+ _ (VHOST_USER_F_PROTOCOL_FEATURES, 30) \
+ _ (VIRTIO_F_VERSION_1, 32) /* v1.0 compliant. */ \
+ _ (VIRTIO_F_IOMMU_PLATFORM, 33) \
+ _ (VIRTIO_F_RING_PACKED, 34) \
+ _ (VIRTIO_F_IN_ORDER, 35) /* all buffers are used by the device in the */ \
+ /* same order in which they have been made available */ \
+ _ (VIRTIO_F_ORDER_PLATFORM, 36) /* memory accesses by the driver and the */ \
+ /* device are ordered in a way described by the platfor */ \
+ _ (VIRTIO_F_NOTIFICATION_DATA, \
+ 38) /* the driver passes extra data (besides */ \
+ /* identifying the virtqueue) in its device notifications. */ \
+ _ (VIRTIO_NET_F_SPEED_DUPLEX, 63) /* Device set linkspeed and duplex */
+
+typedef enum
+{
+#define _(f, n) f = n,
+ foreach_virtio_net_features
+#undef _
+} vnet_virtio_net_feature_t;
+
+#define VIRTIO_FEATURE(X) (1ULL << X)
+
+#define VRING_MAX_SIZE 32768
+
+#define VRING_DESC_F_NEXT 1
+#define VRING_DESC_F_WRITE 2
+#define VRING_DESC_F_INDIRECT 4
+
+#define VRING_DESC_F_AVAIL (1 << 7)
+#define VRING_DESC_F_USED (1 << 15)
+
+#define foreach_virtio_event_idx_flags \
+ _ (VRING_EVENT_F_ENABLE, 0) \
+ _ (VRING_EVENT_F_DISABLE, 1) \
+ _ (VRING_EVENT_F_DESC, 2)
+
+typedef enum
+{
+#define _(f, n) f = n,
+ foreach_virtio_event_idx_flags
+#undef _
+} vnet_virtio_event_idx_flags_t;
+
+#define VRING_USED_F_NO_NOTIFY 1
+#define VRING_AVAIL_F_NO_INTERRUPT 1
+
+typedef struct
+{
+ u64 addr;
+ u32 len;
+ u16 flags;
+ u16 next;
+} vnet_virtio_vring_desc_t;
+
+typedef struct
+{
+ u16 flags;
+ u16 idx;
+ u16 ring[0];
+ /* u16 used_event; */
+} vnet_virtio_vring_avail_t;
+
+typedef struct
+{
+ u32 id;
+ u32 len;
+} vnet_virtio_vring_used_elem_t;
+
+typedef struct
+{
+ u16 flags;
+ u16 idx;
+ vnet_virtio_vring_used_elem_t ring[0];
+ /* u16 avail_event; */
+} vnet_virtio_vring_used_t;
+
+typedef CLIB_PACKED (struct {
+ u64 addr; // packet data buffer address
+ u32 len; // packet data buffer size
+ u16 id; // buffer id
+ u16 flags; // flags
+}) vnet_virtio_vring_packed_desc_t;
+
+STATIC_ASSERT_SIZEOF (vnet_virtio_vring_packed_desc_t, 16);
+
+typedef CLIB_PACKED (struct {
+ u16 off_wrap;
+ u16 flags;
+}) vnet_virtio_vring_desc_event_t;
+
+#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */
+#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
+
+#define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */
+#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */
+#define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */
+#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */
+#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */
+
+typedef CLIB_PACKED (struct {
+ u8 flags;
+ u8 gso_type;
+ u16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */
+ u16 gso_size; /* Bytes to append to hdr_len per frame */
+ u16 csum_start; /* Position to start checksumming from */
+ u16 csum_offset; /* Offset after that to place checksum */
+ u16 num_buffers; /* Number of merged rx buffers */
+}) vnet_virtio_net_hdr_v1_t;
+
+typedef CLIB_PACKED (struct {
+ u8 flags;
+ u8 gso_type;
+ u16 hdr_len;
+ u16 gso_size;
+ u16 csum_start;
+ u16 csum_offset;
+}) vnet_virtio_net_hdr_t;
+
+typedef CLIB_PACKED (struct {
+ vnet_virtio_net_hdr_t hdr;
+ u16 num_buffers;
+}) vnet_virtio_net_hdr_mrg_rxbuf_t;
+
+#endif
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/vmxnet3/README.md b/src/plugins/vmxnet3/README.md
deleted file mode 100644
index 6e9fb194c94..00000000000
--- a/src/plugins/vmxnet3/README.md
+++ /dev/null
@@ -1,64 +0,0 @@
-# VMWARE vmxnet3 device driver plugin {#vmxnet3_doc}
-
-##Overview
-This plugin provides native PCI driver support for VMWare vmxnet3.
-
-##Prerequisites
- * This code is tested with vfio-pci driver installed with Ubuntu 18.04 which
-has kernel version 4.15.0-33-generic.
-
- * This driver is tested with ESXi vSwitch version 6.5/6.7 for LRO/TSO support, VMware Workstation 15 Pro (no LRO/TSO), and VMware Fusion 11 Pro (no LRO/TSO)
-
- * Driver requires MSI-X interrupt support, which is not supported by
-uio_pci_generic driver. So vfio-pci must be used. On systems without IOMMU,
-vfio driver can still be used with 4.15.0-33-generic kernel (Ubuntu 18.04) which supports no-iommu mode.
-
-##Known issues
-
-* VLAN filter
-
-## Usage
-### System setup
-
-1. load VFIO driver
-```
-sudo modprobe vfio-pci
-```
-
-2. Make sure the interface is down
-```
-sudo ifconfig <if-name> down
-```
-
-Steps 3 and 4 are optional. They can be accomplished by specifying the optional keyword "bind" when creating the vmxnet3 interface.
-
-3. (systems without IOMMU only) enable unsafe NOIOMMU mode
-```
-echo Y | sudo tee /sys/module/vfio/parameters/enable_unsafe_noiommu_mode
-```
-
-4. Bind interface to vfio-pci
-```
-sudo dpdk-devbind.py --bind vfio-pci 0b:00.0
-```
-
-### Interface Creation
-Interface can be dynamically created with following CLI, with or without the bind option. If step 3 and 4 were executed, bind can be omitted.
-```
-create interface vmxnet3 0000:0b:00.0 bind
-set int state vmxnet3-0/b/0/0 up
-```
-
-### Interface Deletion
-Interface can be deleted with following CLI:
-```
-delete interface vmxnet3 <if-name>
-```
-
-### Interface Statistics
-Interface statistics can be displayed with `show hardware-interface <if-name>`
-command.
-
-### Show Interface CLI
-Interface and ring information can be obtained with
-`show vmxnet3 [if-name] [desc]`
diff --git a/src/plugins/vmxnet3/README.rst b/src/plugins/vmxnet3/README.rst
new file mode 100644
index 00000000000..14430433c17
--- /dev/null
+++ b/src/plugins/vmxnet3/README.rst
@@ -0,0 +1,86 @@
+VMWARE vmxnet3 device driver
+============================
+
+##Overview This plugin provides native PCI driver support for VMWare
+vmxnet3.
+
+##Prerequisites \* This code is tested with vfio-pci driver installed
+with Ubuntu 18.04 which has kernel version 4.15.0-33-generic.
+
+- This driver is tested with ESXi vSwitch version 6.5/6.7 for LRO/TSO
+ support, VMware Workstation 15 Pro (no LRO/TSO), and VMware Fusion 11
+ Pro (no LRO/TSO)
+
+- Driver requires MSI-X interrupt support, which is not supported by
+ uio_pci_generic driver. So vfio-pci must be used. On systems without
+ IOMMU, vfio driver can still be used with 4.15.0-33-generic kernel
+ (Ubuntu 18.04) which supports no-iommu mode.
+
+##Known issues
+
+- VLAN filter
+
+Usage
+-----
+
+System setup
+~~~~~~~~~~~~
+
+1. load VFIO driver
+
+::
+
+ sudo modprobe vfio-pci
+
+2. Make sure the interface is down
+
+::
+
+ sudo ifconfig <if-name> down
+
+Steps 3 and 4 are optional. They can be accomplished by specifying the
+optional keyword “bind” when creating the vmxnet3 interface.
+
+3. (systems without IOMMU only) enable unsafe NOIOMMU mode
+
+::
+
+ echo Y | sudo tee /sys/module/vfio/parameters/enable_unsafe_noiommu_mode
+
+4. Bind interface to vfio-pci
+
+::
+
+ sudo dpdk-devbind.py --bind vfio-pci 0b:00.0
+
+Interface Creation
+~~~~~~~~~~~~~~~~~~
+
+Interface can be dynamically created with following CLI, with or without
+the bind option. If step 3 and 4 were executed, bind can be omitted.
+
+::
+
+ create interface vmxnet3 0000:0b:00.0 bind
+ set int state vmxnet3-0/b/0/0 up
+
+Interface Deletion
+~~~~~~~~~~~~~~~~~~
+
+Interface can be deleted with following CLI:
+
+::
+
+ delete interface vmxnet3 <if-name>
+
+Interface Statistics
+~~~~~~~~~~~~~~~~~~~~
+
+Interface statistics can be displayed with
+``show hardware-interface <if-name>`` command.
+
+Show Interface CLI
+~~~~~~~~~~~~~~~~~~
+
+Interface and ring information can be obtained with
+``show vmxnet3 [if-name] [desc]``
diff --git a/src/plugins/vmxnet3/cli.c b/src/plugins/vmxnet3/cli.c
index 71342bd535c..d682e3ec2c9 100644
--- a/src/plugins/vmxnet3/cli.c
+++ b/src/plugins/vmxnet3/cli.c
@@ -47,8 +47,10 @@ vmxnet3_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
args.enable_gso = 1;
else if (unformat (line_input, "elog"))
args.enable_elog = 1;
+ else if (unformat (line_input, "bind force"))
+ args.bind = VMXNET3_BIND_FORCE;
else if (unformat (line_input, "bind"))
- args.bind = 1;
+ args.bind = VMXNET3_BIND_DEFAULT;
else if (unformat (line_input, "rx-queue-size %u", &size))
args.rxq_size = size;
else if (unformat (line_input, "tx-queue-size %u", &size))
@@ -58,12 +60,14 @@ vmxnet3_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
else if (unformat (line_input, "num-rx-queues %u", &size))
args.rxq_num = size;
else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ {
+ unformat_free (line_input);
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
}
unformat_free (line_input);
-
vmxnet3_create_if (vm, &args);
if (args.error == 0)
vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
@@ -72,16 +76,15 @@ vmxnet3_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vmxnet3_create_command, static) = {
.path = "create interface vmxnet3",
- .short_help = "create interface vmxnet3 <pci-address>"
- " [rx-queue-size <size>] [tx-queue-size <size>]"
- " [num-tx-queues <number>] [num-rx-queues <number>] [bind]"
- " [gso]",
+ .short_help =
+ "create interface vmxnet3 <pci-address>"
+ " [rx-queue-size <size>] [tx-queue-size <size>]"
+ " [num-tx-queues <number>] [num-rx-queues <number>] [bind [force]]"
+ " [gso]",
.function = vmxnet3_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
vmxnet3_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -126,14 +129,12 @@ vmxnet3_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vmxnet3_delete_command, static) = {
.path = "delete interface vmxnet3",
.short_help = "delete interface vmxnet3 "
"{<interface> | sw_if_index <sw_idx>}",
.function = vmxnet3_delete_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
vmxnet3_test_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -187,14 +188,12 @@ vmxnet3_test_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vmxnet3_test_command, static) = {
.path = "test vmxnet3",
.short_help = "test vmxnet3 <interface> | sw_if_index <sw_idx> [irq] "
"[elog-on] [elog-off]",
.function = vmxnet3_test_command_fn,
};
-/* *INDENT-ON* */
static void
show_vmxnet3 (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr,
@@ -213,6 +212,15 @@ show_vmxnet3 (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr,
vmxnet3_tx_comp *tx_comp;
u16 qid;
+ vlib_cli_output (vm, "Global:");
+ for (u32 tid = 0; tid <= vlib_num_workers (); tid++)
+ {
+ vmxnet3_per_thread_data_t *ptd =
+ vec_elt_at_index (vmxm->per_thread_data, tid);
+ vlib_cli_output (vm, " Thread %u: polling queue count %u", tid,
+ ptd->polling_q_count);
+ }
+
if (!hw_if_indices)
return;
@@ -568,24 +576,25 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_vmxnet3_command, static) = {
.path = "show vmxnet3",
.short_help = "show vmxnet3 [[<interface>] ([desc] | ([rx-comp] | "
"[rx-desc-0] | [rx-desc-1] | [tx-comp] | [tx-desc]) [<slot>])]",
.function = show_vmxnet3_fn,
};
-/* *INDENT-ON* */
clib_error_t *
vmxnet3_cli_init (vlib_main_t * vm)
{
vmxnet3_main_t *vmxm = &vmxnet3_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
/* initialize binary API */
vmxnet3_plugin_api_hookup (vm);
vmxm->log_default = vlib_log_register_class ("vmxnet3", 0);
+
+ vec_validate (vmxm->per_thread_data, tm->n_vlib_mains - 1);
return 0;
}
diff --git a/src/plugins/vmxnet3/format.c b/src/plugins/vmxnet3/format.c
index d463feb3bec..43d790d31eb 100644
--- a/src/plugins/vmxnet3/format.c
+++ b/src/plugins/vmxnet3/format.c
@@ -164,7 +164,7 @@ format_vmxnet3_input_trace (u8 * s, va_list * args)
s = format (s, "vmxnet3: %v (%d) next-node %U",
hi->name, t->hw_if_index, format_vlib_next_node_name, vm,
node->index, t->next_index);
- s = format (s, "\n buffer %U", format_vnet_buffer, &t->buffer);
+ s = format (s, "\n buffer %U", format_vnet_buffer_no_chain, &t->buffer);
return s;
}
diff --git a/src/plugins/vmxnet3/input.c b/src/plugins/vmxnet3/input.c
index 3015fb116ca..25632546b6d 100644
--- a/src/plugins/vmxnet3/input.c
+++ b/src/plugins/vmxnet3/input.c
@@ -23,6 +23,7 @@
#include <vnet/ip/ip6_packet.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/udp/udp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
#include <vnet/interface/rx_queue_funcs.h>
#include <vmxnet3/vmxnet3.h>
@@ -106,19 +107,11 @@ vmxnet3_handle_offload (vmxnet3_rx_comp * rx_comp, vlib_buffer_t * hb,
{
if (rx_comp->flags & VMXNET3_RXCF_TCP)
{
- tcp_header_t *tcp =
- (tcp_header_t *) (hb->data +
- vnet_buffer (hb)->l4_hdr_offset);
oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
- tcp->checksum = 0;
}
else if (rx_comp->flags & VMXNET3_RXCF_UDP)
{
- udp_header_t *udp =
- (udp_header_t *) (hb->data +
- vnet_buffer (hb)->l4_hdr_offset);
oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
- udp->checksum = 0;
}
}
}
@@ -384,8 +377,8 @@ vmxnet3_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (PREDICT_FALSE
(vnet_device_input_have_features (vd->sw_if_index)))
{
- vnet_feature_start_device_input_x1 (vd->sw_if_index,
- &next_index, hb);
+ vnet_feature_start_device_input (vd->sw_if_index, &next_index,
+ hb);
known_next = 1;
}
@@ -487,7 +480,6 @@ VLIB_NODE_FN (vmxnet3_input_node) (vlib_main_t * vm,
}
#ifndef CLIB_MARCH_VARIANT
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vmxnet3_input_node) = {
.name = "vmxnet3-input",
.sibling_of = "device-input",
@@ -500,7 +492,6 @@ VLIB_REGISTER_NODE (vmxnet3_input_node) = {
};
#endif
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/vmxnet3/plugin.c b/src/plugins/vmxnet3/plugin.c
index 76f1cfc5e3e..20caf97f612 100644
--- a/src/plugins/vmxnet3/plugin.c
+++ b/src/plugins/vmxnet3/plugin.c
@@ -19,12 +19,10 @@
#include <vnet/plugin/plugin.h>
#include <vpp/app/version.h>
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () = {
.version = VPP_BUILD_VER,
.description = "VMWare Vmxnet3 Device Driver",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/vmxnet3/vmxnet3.c b/src/plugins/vmxnet3/vmxnet3.c
index ff0a7dc706b..e64e0d135d6 100644
--- a/src/plugins/vmxnet3/vmxnet3.c
+++ b/src/plugins/vmxnet3/vmxnet3.c
@@ -69,11 +69,23 @@ vmxnet3_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid,
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
vmxnet3_device_t *vd = pool_elt_at_index (vmxm->devices, hw->dev_instance);
vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, qid);
+ vmxnet3_per_thread_data_t *ptd;
- if (mode == VNET_HW_IF_RX_MODE_POLLING)
- rxq->int_mode = 0;
+ if (mode == rxq->mode)
+ return 0;
+ if ((mode != VNET_HW_IF_RX_MODE_POLLING) &&
+ (mode != VNET_HW_IF_RX_MODE_INTERRUPT))
+ return clib_error_return (0, "Rx mode %U not supported",
+ format_vnet_hw_if_rx_mode, mode);
+ rxq->mode = mode;
+ ptd = vec_elt_at_index (vmxm->per_thread_data, rxq->thread_index);
+ if (rxq->mode == VNET_HW_IF_RX_MODE_POLLING)
+ ptd->polling_q_count++;
else
- rxq->int_mode = 1;
+ {
+ ASSERT (ptd->polling_q_count != 0);
+ ptd->polling_q_count--;
+ }
return 0;
}
@@ -133,7 +145,6 @@ static char *vmxnet3_tx_func_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (vmxnet3_device_class,) =
{
.name = "VMXNET3 interface",
@@ -146,7 +157,6 @@ VNET_DEVICE_CLASS (vmxnet3_device_class,) =
.tx_function_n_errors = VMXNET3_TX_N_ERROR,
.tx_function_error_strings = vmxnet3_tx_func_error_strings,
};
-/* *INDENT-ON* */
static u32
vmxnet3_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hw, u32 flags)
@@ -288,6 +298,7 @@ vmxnet3_rxq_init (vlib_main_t * vm, vmxnet3_device_t * vd, u16 qid, u16 qsz)
rxq = vec_elt_at_index (vd->rxqs, qid);
clib_memset (rxq, 0, sizeof (*rxq));
rxq->size = qsz;
+ rxq->mode = VNET_HW_IF_RX_MODE_POLLING;
for (rid = 0; rid < VMXNET3_RX_RING_SIZE; rid++)
{
rxq->rx_desc[rid] = vlib_physmem_alloc_aligned_on_numa
@@ -534,8 +545,13 @@ vmxnet3_rxq_irq_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 line)
u16 qid = line;
vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, qid);
- if (vec_len (vd->rxqs) > qid && vd->rxqs[qid].int_mode != 0)
- vnet_hw_if_rx_queue_set_int_pending (vnm, rxq->queue_index);
+ if (vec_len (vd->rxqs) > qid && (rxq->mode != VNET_HW_IF_RX_MODE_POLLING))
+ {
+ vmxnet3_per_thread_data_t *ptd =
+ vec_elt_at_index (vmxm->per_thread_data, rxq->thread_index);
+ if (ptd->polling_q_count == 0)
+ vnet_hw_if_rx_queue_set_int_pending (vnm, rxq->queue_index);
+ }
}
static void
@@ -554,8 +570,9 @@ vmxnet3_event_irq_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h,
{
vd->flags |= VMXNET3_DEVICE_F_LINK_UP;
vd->link_speed = ret >> 16;
- vnet_hw_interface_set_link_speed (vnm, vd->hw_if_index,
- vd->link_speed * 1000);
+ vnet_hw_interface_set_link_speed (
+ vnm, vd->hw_if_index,
+ (vd->link_speed == UINT32_MAX) ? UINT32_MAX : vd->link_speed * 1000);
vnet_hw_interface_set_flags (vnm, vd->hw_if_index,
VNET_HW_INTERFACE_FLAG_LINK_UP);
}
@@ -599,8 +616,11 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args)
{
vnet_main_t *vnm = vnet_get_main ();
vmxnet3_main_t *vmxm = &vmxnet3_main;
+ vnet_eth_interface_registration_t eir = {};
+
vmxnet3_device_t *vd;
vlib_pci_dev_handle_t h;
+ vnet_hw_if_caps_change_t cc = {};
clib_error_t *error = 0;
u16 qid;
u32 num_intr;
@@ -653,7 +673,6 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args)
return;
}
- /* *INDENT-OFF* */
pool_foreach (vd, vmxm->devices) {
if (vd->pci_addr.as_u32 == args->addr.as_u32)
{
@@ -666,11 +685,11 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args)
return;
}
}
- /* *INDENT-ON* */
if (args->bind)
{
- error = vlib_pci_bind_to_uio (vm, &args->addr, (char *) "auto");
+ error = vlib_pci_bind_to_uio (vm, &args->addr, (char *) "auto",
+ VMXNET3_BIND_FORCE == args->bind);
if (error)
{
args->rv = VNET_API_ERROR_INVALID_INTERFACE;
@@ -784,29 +803,24 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args)
}
/* create interface */
- error = ethernet_register_interface (vnm, vmxnet3_device_class.index,
- vd->dev_instance, vd->mac_addr,
- &vd->hw_if_index, vmxnet3_flag_change);
-
- if (error)
- {
- vmxnet3_log_error (vd,
- "error encountered on ethernet register interface");
- goto error;
- }
+ eir.dev_class_index = vmxnet3_device_class.index;
+ eir.dev_instance = vd->dev_instance;
+ eir.address = vd->mac_addr;
+ eir.cb.flag_change = vmxnet3_flag_change;
+ vd->hw_if_index = vnet_eth_register_interface (vnm, &eir);
vnet_sw_interface_t *sw = vnet_get_hw_sw_interface (vnm, vd->hw_if_index);
vd->sw_if_index = sw->sw_if_index;
args->sw_if_index = sw->sw_if_index;
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vd->hw_if_index);
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+ cc.mask = VNET_HW_IF_CAP_INT_MODE | VNET_HW_IF_CAP_TCP_GSO |
+ VNET_HW_IF_CAP_TX_TCP_CKSUM | VNET_HW_IF_CAP_TX_UDP_CKSUM;
if (vd->gso_enable)
- {
- hw->caps |= (VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM);
- }
+ cc.val = cc.mask;
+ else
+ cc.val = VNET_HW_IF_CAP_INT_MODE;
+
+ vnet_hw_if_change_caps (vnm, vd->hw_if_index, &cc);
vnet_hw_if_set_input_node (vnm, vd->hw_if_index, vmxnet3_input_node.index);
/* Disable interrupts */
@@ -815,12 +829,20 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args)
{
vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, qid);
u32 qi, fi;
+ vmxnet3_per_thread_data_t *ptd;
qi = vnet_hw_if_register_rx_queue (vnm, vd->hw_if_index, qid,
VNET_HW_IF_RXQ_THREAD_ANY);
fi = vlib_pci_get_msix_file_index (vm, vd->pci_dev_handle, qid);
vnet_hw_if_set_rx_queue_file_index (vnm, qi, fi);
rxq->queue_index = qi;
+ rxq->thread_index =
+ vnet_hw_if_get_rx_queue_thread_index (vnm, rxq->queue_index);
+ if (rxq->mode == VNET_HW_IF_RX_MODE_POLLING)
+ {
+ ptd = vec_elt_at_index (vmxm->per_thread_data, rxq->thread_index);
+ ptd->polling_q_count++;
+ }
rxq->buffer_pool_index =
vnet_hw_if_get_rx_queue_numa_node (vnm, rxq->queue_index);
vmxnet3_rxq_refill_ring0 (vm, vd, rxq);
@@ -843,8 +865,9 @@ vmxnet3_create_if (vlib_main_t * vm, vmxnet3_create_if_args_t * args)
vd->flags |= VMXNET3_DEVICE_F_INITIALIZED;
vmxnet3_enable_interrupt (vd);
- vnet_hw_interface_set_link_speed (vnm, vd->hw_if_index,
- vd->link_speed * 1000);
+ vnet_hw_interface_set_link_speed (
+ vnm, vd->hw_if_index,
+ (vd->link_speed == UINT32_MAX) ? UINT32_MAX : vd->link_speed * 1000);
if (vd->flags & VMXNET3_DEVICE_F_LINK_UP)
vnet_hw_interface_set_flags (vnm, vd->hw_if_index,
VNET_HW_INTERFACE_FLAG_LINK_UP);
@@ -880,13 +903,19 @@ vmxnet3_delete_if (vlib_main_t * vm, vmxnet3_device_t * vd)
vlib_pci_device_close (vm, vd->pci_dev_handle);
- /* *INDENT-OFF* */
vec_foreach_index (i, vd->rxqs)
{
vmxnet3_rxq_t *rxq = vec_elt_at_index (vd->rxqs, i);
u16 mask = rxq->size - 1;
u16 rid;
+ vmxnet3_per_thread_data_t *ptd =
+ vec_elt_at_index (vmxm->per_thread_data, rxq->thread_index);
+ if (rxq->mode == VNET_HW_IF_RX_MODE_POLLING)
+ {
+ ASSERT (ptd->polling_q_count != 0);
+ ptd->polling_q_count--;
+ }
for (rid = 0; rid < VMXNET3_RX_RING_SIZE; rid++)
{
vmxnet3_rx_ring *ring;
@@ -900,11 +929,9 @@ vmxnet3_delete_if (vlib_main_t * vm, vmxnet3_device_t * vd)
}
vlib_physmem_free (vm, rxq->rx_comp);
}
- /* *INDENT-ON* */
vec_free (vd->rxqs);
vec_free (vd->rx_stats);
- /* *INDENT-OFF* */
vec_foreach_index (i, vd->txqs)
{
vmxnet3_txq_t *txq = vec_elt_at_index (vd->txqs, i);
@@ -925,7 +952,6 @@ vmxnet3_delete_if (vlib_main_t * vm, vmxnet3_device_t * vd)
vlib_physmem_free (vm, txq->tx_desc);
vlib_physmem_free (vm, txq->tx_comp);
}
- /* *INDENT-ON* */
vec_free (vd->txqs);
vec_free (vd->tx_stats);
diff --git a/src/plugins/vmxnet3/vmxnet3.h b/src/plugins/vmxnet3/vmxnet3.h
index 75107689443..89602f8ee9e 100644
--- a/src/plugins/vmxnet3/vmxnet3.h
+++ b/src/plugins/vmxnet3/vmxnet3.h
@@ -513,10 +513,17 @@ typedef struct
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u32 polling_q_count;
+} vmxnet3_per_thread_data_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
u16 size;
- u8 int_mode;
+ u32 mode;
u8 buffer_pool_index;
u32 queue_index;
+ u32 thread_index;
vmxnet3_rx_ring rx_ring[VMXNET3_RX_RING_SIZE];
vmxnet3_rx_desc *rx_desc[VMXNET3_RX_RING_SIZE];
vmxnet3_rx_comp *rx_comp;
@@ -594,10 +601,18 @@ typedef struct
vmxnet3_device_t *devices;
u16 msg_id_base;
vlib_log_class_t log_default;
+ vmxnet3_per_thread_data_t *per_thread_data;
} vmxnet3_main_t;
extern vmxnet3_main_t vmxnet3_main;
+typedef enum
+{
+ VMXNET3_BIND_NONE = 0,
+ VMXNET3_BIND_DEFAULT = 1,
+ VMXNET3_BIND_FORCE = 2,
+} __clib_packed vmxnet3_bind_t;
+
typedef struct
{
vlib_pci_addr_t addr;
@@ -606,7 +621,7 @@ typedef struct
u16 rxq_num;
u16 txq_size;
u16 txq_num;
- u8 bind;
+ vmxnet3_bind_t bind;
u8 enable_gso;
/* return */
i32 rv;
diff --git a/src/plugins/vmxnet3/vmxnet3_api.c b/src/plugins/vmxnet3/vmxnet3_api.c
index cef0770a63b..c51c07b705d 100644
--- a/src/plugins/vmxnet3/vmxnet3_api.c
+++ b/src/plugins/vmxnet3/vmxnet3_api.c
@@ -29,6 +29,7 @@
#include <vmxnet3/vmxnet3.api_enum.h>
#include <vmxnet3/vmxnet3.api_types.h>
+#define REPLY_MSG_ID_BASE (vmxm->msg_id_base)
#include <vlibapi/api_helper_macros.h>
static void
@@ -54,12 +55,8 @@ vl_api_vmxnet3_create_t_handler (vl_api_vmxnet3_create_t * mp)
vmxnet3_create_if (vm, &args);
rv = args.rv;
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_VMXNET3_CREATE_REPLY + vmxm->msg_id_base,
- ({
- rmp->sw_if_index = ntohl (args.sw_if_index);
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO2 (VL_API_VMXNET3_CREATE_REPLY,
+ ({ rmp->sw_if_index = ntohl (args.sw_if_index); }));
}
static void
@@ -87,7 +84,7 @@ vl_api_vmxnet3_delete_t_handler (vl_api_vmxnet3_delete_t * mp)
vmxnet3_delete_if (vm, vd);
reply:
- REPLY_MACRO (VL_API_VMXNET3_DELETE_REPLY + vmxm->msg_id_base);
+ REPLY_MACRO (VL_API_VMXNET3_DELETE_REPLY);
}
static void
@@ -173,16 +170,14 @@ vl_api_vmxnet3_dump_t_handler (vl_api_vmxnet3_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (vd, vmxm->devices)
{
swif = vnet_get_sw_interface (vnm, vd->sw_if_index);
if_name = format (if_name, "%U%c", format_vnet_sw_interface_name, vnm,
swif, 0);
send_vmxnet3_details (reg, vd, swif, if_name, mp->context);
- _vec_len (if_name) = 0;
+ vec_set_len (if_name, 0);
}
- /* *INDENT-ON* */
vec_free (if_name);
}
@@ -211,7 +206,6 @@ static void vl_api_sw_vmxnet3_interface_dump_t_handler
(vnet_sw_interface_is_api_valid (vnm, filter_sw_if_index) == 0))
goto bad_sw_if_index;
- /* *INDENT-OFF* */
pool_foreach (vd, vmxm->devices)
{
if ((filter_sw_if_index == ~0) ||
@@ -221,10 +215,9 @@ static void vl_api_sw_vmxnet3_interface_dump_t_handler
if_name = format (if_name, "%U%c", format_vnet_sw_interface_name, vnm,
swif, 0);
send_vmxnet3_details (reg, vd, swif, if_name, mp->context);
- _vec_len (if_name) = 0;
+ vec_set_len (if_name, 0);
}
}
- /* *INDENT-ON* */
BAD_SW_IF_INDEX_LABEL;
vec_free (if_name);
diff --git a/src/plugins/vmxnet3/vmxnet3_test.c b/src/plugins/vmxnet3/vmxnet3_test.c
index 6fa9373486c..9b73c09d03c 100644
--- a/src/plugins/vmxnet3/vmxnet3_test.c
+++ b/src/plugins/vmxnet3/vmxnet3_test.c
@@ -34,7 +34,7 @@
/* declare message IDs */
#include <vmxnet3/vmxnet3.api_enum.h>
#include <vmxnet3/vmxnet3.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
diff --git a/src/plugins/vrrp/node.c b/src/plugins/vrrp/node.c
index 7ba18c4f75c..d5594ae0e43 100644
--- a/src/plugins/vrrp/node.c
+++ b/src/plugins/vrrp/node.c
@@ -86,22 +86,16 @@ typedef enum
VRRP_INPUT_N_NEXT,
} vrrp_next_t;
-typedef struct vrrp_input_process_args
-{
- u32 vr_index;
- vrrp_header_t *pkt;
-} vrrp_input_process_args_t;
-
/* Given a VR and a pointer to the VRRP header of an incoming packet,
* compare the local src address to the peers. Return < 0 if the local
* address < the peer address, 0 if they're equal, > 0 if
* the local address > the peer address
*/
static int
-vrrp_vr_addr_cmp (vrrp_vr_t * vr, vrrp_header_t * pkt)
+vrrp_vr_addr_cmp (vrrp_vr_t *vr, ip46_address_t *peer_addr)
{
vrrp_vr_config_t *vrc = &vr->config;
- void *peer_addr, *local_addr;
+ void *peer_addr_bytes, *local_addr;
ip46_address_t addr;
int addr_size;
@@ -109,7 +103,7 @@ vrrp_vr_addr_cmp (vrrp_vr_t * vr, vrrp_header_t * pkt)
if (vrrp_vr_is_ipv6 (vr))
{
- peer_addr = &(((ip6_header_t *) pkt) - 1)->src_address;
+ peer_addr_bytes = &peer_addr->ip6;
local_addr = &addr.ip6;
addr_size = 16;
ip6_address_copy (local_addr,
@@ -117,25 +111,26 @@ vrrp_vr_addr_cmp (vrrp_vr_t * vr, vrrp_header_t * pkt)
}
else
{
- peer_addr = &(((ip4_header_t *) pkt) - 1)->src_address;
+ peer_addr_bytes = &peer_addr->ip4;
local_addr = &addr.ip4;
addr_size = 4;
fib_sas4_get (vrc->sw_if_index, NULL, local_addr);
}
- return memcmp (local_addr, peer_addr, addr_size);
+ return memcmp (local_addr, peer_addr_bytes, addr_size);
}
static void
-vrrp_input_process_master (vrrp_vr_t * vr, vrrp_header_t * pkt)
+vrrp_input_process_master (vrrp_vr_t *vr, vrrp_input_process_args_t *args)
{
/* received priority 0, another VR is shutting down. send an adv and
* remain in the master state
*/
- if (pkt->priority == 0)
+ if (args->priority == 0)
{
clib_warning ("Received shutdown message from a peer on VR %U",
format_vrrp_vr_key, vr);
+ vrrp_incr_stat_counter (VRRP_STAT_COUNTER_PRIO0_RCVD, vr->stat_index);
vrrp_adv_send (vr, 0);
vrrp_vr_timer_set (vr, VRRP_VR_TIMER_ADV);
return;
@@ -146,11 +141,11 @@ vrrp_input_process_master (vrrp_vr_t * vr, vrrp_header_t * pkt)
* - received priority == adjusted priority and peer addr > local addr
* allow the local VR to be preempted by the peer
*/
- if ((pkt->priority > vrrp_vr_priority (vr)) ||
- ((pkt->priority == vrrp_vr_priority (vr)) &&
- (vrrp_vr_addr_cmp (vr, pkt) < 0)))
+ if ((args->priority > vrrp_vr_priority (vr)) ||
+ ((args->priority == vrrp_vr_priority (vr)) &&
+ (vrrp_vr_addr_cmp (vr, &args->src_addr) < 0)))
{
- vrrp_vr_transition (vr, VRRP_VR_STATE_BACKUP, pkt);
+ vrrp_vr_transition (vr, VRRP_VR_STATE_BACKUP, args);
return;
}
@@ -163,16 +158,17 @@ vrrp_input_process_master (vrrp_vr_t * vr, vrrp_header_t * pkt)
/* RFC 5798 section 6.4.2 */
static void
-vrrp_input_process_backup (vrrp_vr_t * vr, vrrp_header_t * pkt)
+vrrp_input_process_backup (vrrp_vr_t *vr, vrrp_input_process_args_t *args)
{
vrrp_vr_config_t *vrc = &vr->config;
vrrp_vr_runtime_t *vrt = &vr->runtime;
/* master shutting down, ready for election */
- if (pkt->priority == 0)
+ if (args->priority == 0)
{
clib_warning ("Master for VR %U is shutting down", format_vrrp_vr_key,
vr);
+ vrrp_incr_stat_counter (VRRP_STAT_COUNTER_PRIO0_RCVD, vr->stat_index);
vrt->master_down_int = vrt->skew;
vrrp_vr_timer_set (vr, VRRP_VR_TIMER_MASTER_DOWN);
return;
@@ -180,10 +176,9 @@ vrrp_input_process_backup (vrrp_vr_t * vr, vrrp_header_t * pkt)
/* no preempt set or adv from a higher priority router, update timers */
if (!(vrc->flags & VRRP_VR_PREEMPT) ||
- (pkt->priority >= vrrp_vr_priority (vr)))
+ (args->priority >= vrrp_vr_priority (vr)))
{
- vrt->master_adv_int = clib_net_to_host_u16 (pkt->rsvd_and_max_adv_int);
- vrt->master_adv_int &= ((u16) 0x0fff); /* ignore rsvd bits */
+ vrt->master_adv_int = args->max_adv_int;
vrrp_vr_skew_compute (vr);
vrrp_vr_master_down_compute (vr);
@@ -208,19 +203,21 @@ vrrp_input_process (vrrp_input_process_args_t * args)
return;
}
+ vrrp_incr_stat_counter (VRRP_STAT_COUNTER_ADV_RCVD, vr->stat_index);
+
switch (vr->runtime.state)
{
case VRRP_VR_STATE_INIT:
return;
case VRRP_VR_STATE_BACKUP:
/* this is usually the only state an advertisement should be received */
- vrrp_input_process_backup (vr, args->pkt);
+ vrrp_input_process_backup (vr, args);
break;
case VRRP_VR_STATE_MASTER:
/* might be getting preempted. or have a misbehaving peer */
clib_warning ("Received advertisement for master VR %U",
format_vrrp_vr_key, vr);
- vrrp_input_process_master (vr, args->pkt);
+ vrrp_input_process_master (vr, args);
break;
default:
clib_warning ("Received advertisement for VR %U in unknown state %d",
@@ -489,7 +486,6 @@ VLIB_NODE_FN (vrrp4_arp_input_node) (vlib_main_t * vm,
return vrrp_arp_nd_input_inline (vm, node, frame, 0 /* is_ipv6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vrrp4_arp_input_node) =
{
.name = "vrrp4-arp-input",
@@ -522,7 +518,6 @@ VLIB_NODE_FN (vrrp6_nd_input_node) (vlib_main_t * vm,
return vrrp_arp_nd_input_inline (vm, node, frame, 1 /* is_ipv6 */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vrrp6_nd_input_node) =
{
.name = "vrrp6-nd-input",
@@ -586,6 +581,7 @@ vrrp_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
addr_len = 16;
payload_len0 = clib_net_to_host_u16 (ip6->payload_length);
vlib_buffer_advance (b0, sizeof (*ip6));
+ clib_memcpy_fast (&args0.src_addr.ip6, &ip6->src_address, addr_len);
}
else
{
@@ -596,6 +592,7 @@ vrrp_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
addr_len = 4;
payload_len0 = clib_net_to_host_u16 (ip4->length) - sizeof(*ip4);
vlib_buffer_advance (b0, sizeof (*ip4));
+ clib_memcpy_fast (&args0.src_addr.ip4, &ip4->src_address, addr_len);
}
next0 = VRRP_INPUT_NEXT_DROP;
@@ -612,6 +609,7 @@ vrrp_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (*ttl0 != 255)
{
error0 = VRRP_ERROR_BAD_TTL;
+ vrrp_incr_err_counter (VRRP_ERR_COUNTER_TTL);
goto trace;
}
@@ -619,6 +617,7 @@ vrrp_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if ((vrrp0->vrrp_version_and_type >> 4) != 3)
{
error0 = VRRP_ERROR_NOT_VERSION_3;
+ vrrp_incr_err_counter (VRRP_ERR_COUNTER_VERSION);
goto trace;
}
@@ -627,6 +626,7 @@ vrrp_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
((u32) vrrp0->n_addrs) * addr_len)
{
error0 = VRRP_ERROR_INCOMPLETE_PKT;
+ vrrp_incr_err_counter (VRRP_ERR_COUNTER_PKT_LEN);
goto trace;
}
@@ -634,6 +634,7 @@ vrrp_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (rx_csum0 != vrrp_adv_csum (ip0, vrrp0, is_ipv6, payload_len0))
{
error0 = VRRP_ERROR_BAD_CHECKSUM;
+ vrrp_incr_err_counter (VRRP_ERR_COUNTER_CHKSUM);
goto trace;
}
@@ -643,6 +644,7 @@ vrrp_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vrrp0->vr_id, is_ipv6)))
{
error0 = VRRP_ERROR_UNKNOWN_VR;
+ vrrp_incr_err_counter (VRRP_ERR_COUNTER_VRID);
goto trace;
}
@@ -651,12 +653,14 @@ vrrp_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (vrrp0->n_addrs != vec_len (vr0->config.vr_addrs))
{
error0 = VRRP_ERROR_ADDR_MISMATCH;
+ vrrp_incr_err_counter (VRRP_ERR_COUNTER_ADDR_LIST);
goto trace;
}
/* signal main thread to process contents of packet */
args0.vr_index = vr0 - vmp->vrs;
- args0.pkt = vrrp0;
+ args0.priority = vrrp0->priority;
+ args0.max_adv_int = vrrp_adv_int_from_packet (vrrp0);
vl_api_rpc_call_main_thread (vrrp_input_process, (u8 *) &args0,
sizeof (args0));
@@ -693,7 +697,6 @@ VLIB_NODE_FN (vrrp4_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return vrrp_input_inline (vm, node, frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vrrp4_input_node) =
{
.name = "vrrp4-input",
@@ -1098,7 +1101,6 @@ vrrp_input_init (vlib_main_t *vm)
VLIB_INIT_FUNCTION (vrrp_input_init);
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/vrrp/setup.pg b/src/plugins/vrrp/setup.pg
index 9275fcc46c6..6328448d79e 100644
--- a/src/plugins/vrrp/setup.pg
+++ b/src/plugins/vrrp/setup.pg
@@ -7,14 +7,14 @@ set int state loop0 up
comment { Packet generator script. Src MAC 00:de:ad:be:ef:01 }
comment { Dst mac 01:ba:db:ab:be:01 ethtype 0800 }
-packet-generator new {
- name simple
- limit 1
- size 128-128
- interface loop0
- node vrrp
- data {
- hex 0x00deadbeef0001badbabbe010800
- incrementing 30
- }
+packet-generator new { \
+ name simple \
+ limit 1 \
+ size 128-128 \
+ interface loop0 \
+ node vrrp \
+ data { \
+ hex 0x00deadbeef0001badbabbe010800 \
+ incrementing 30 \
+ } \
}
diff --git a/src/plugins/vrrp/vrrp.api b/src/plugins/vrrp/vrrp.api
index a34b06ffc57..03193e99a2c 100644
--- a/src/plugins/vrrp/vrrp.api
+++ b/src/plugins/vrrp/vrrp.api
@@ -5,7 +5,7 @@
*
*/
-option version = "1.0.1";
+option version = "1.1.1";
import "vnet/interface_types.api";
import "vnet/ip/ip_types.api";
@@ -60,6 +60,55 @@ autoreply define vrrp_vr_add_del {
vl_api_address_t addrs[n_addrs];
};
+/** @brief Replace an existing VRRP virtual router in-place or create a new one
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param vrrp_index - an existing VRRP entry to replace, or 0xffffffff to crate a new one
+ @param sw_if_index - interface backed up by this vr
+ @param vr_id - the VR ID advertised by this vr
+ @param priority - the priority advertised for this vr
+ @param interval - interval between advertisements in centiseconds
+ @param flags - bit flags for booleans - preempt, accept, unicast, ipv6
+ @param n_addrs - number of addresses being backed up by this vr
+ @param addrs - the addresses backed up by this vr
+*/
+define vrrp_vr_update {
+ u32 client_index;
+ u32 context;
+ u32 vrrp_index;
+ vl_api_interface_index_t sw_if_index;
+ u8 vr_id;
+ u8 priority;
+ u16 interval;
+ vl_api_vrrp_vr_flags_t flags;
+ u8 n_addrs;
+ vl_api_address_t addrs[n_addrs];
+};
+
+/**
+ * @brief Reply to a VRRP add/replace
+ * @param context - returned sender context, to match reply w/ request
+ * @param vrrp_index - index of the updated or newly created VRRP instance
+ * @param retval 0 - no error
+ */
+define vrrp_vr_update_reply {
+ u32 context;
+ i32 retval;
+ u32 vrrp_index;
+};
+
+/**
+ * @brief Delete an existing VRRP instance
+ * @param client_index - opaque cookie to identify the sender
+ * @param context - returned sender context, to match reply w/ request
+ * @param vrrp_index - index of the VRRP instance to delete
+ */
+autoreply define vrrp_vr_del {
+ u32 client_index;
+ u32 context;
+ u32 vrrp_index;
+};
+
/** \brief VRRP: dump virtual router data
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/plugins/vrrp/vrrp.c b/src/plugins/vrrp/vrrp.c
index 5ee011cceb1..fb0659605c7 100644
--- a/src/plugins/vrrp/vrrp.c
+++ b/src/plugins/vrrp/vrrp.c
@@ -32,6 +32,97 @@ static const mac_address_t ipv6_vmac = {
.bytes = {0x00, 0x00, 0x5e, 0x00, 0x02, 0x00}
};
+vlib_simple_counter_main_t vrrp_errs[] = {
+ /* Total number of VRRP packets received with invalid checksum */
+ {
+ .name = "CHKSUM_ERRS",
+ .stat_segment_name = "/net/vrrp/chksum-errs",
+ },
+ /* Total number of VRRP packets received with unknown or unsupported version
+ */
+ {
+ .name = "VERSION_ERRS",
+ .stat_segment_name = "/net/vrrp/version-errs",
+ },
+ /* Total number of VRRP packets received with invalid VRID */
+ {
+ .name = "VRID_ERRS",
+ .stat_segment_name = "/net/vrrp/vrid-errs",
+ },
+ /* Total number of VRRP packets received with TTL/Hop limit != 255 */
+ {
+ .name = "TTL_ERRS",
+ .stat_segment_name = "/net/vrrp/ttl-errs",
+ },
+ /* Number of packets received with an address list not matching the locally
+ configured one */
+ {
+ .name = "ADDR_LIST_ERRS",
+ .stat_segment_name = "/net/vrrp/addr-list-errs",
+ },
+ /* Number of packets received with a length less than the VRRP header */
+ {
+ .name = "PACKET_LEN_ERRS",
+ .stat_segment_name = "/net/vrrp/packet-len-errs",
+ },
+};
+
+void
+vrrp_incr_err_counter (vrrp_err_counter_t err_type)
+{
+ if (err_type >= VRRP_ERR_COUNTER_MAX)
+ {
+ clib_warning ("Attempt to increse error counter of unknown type %u",
+ err_type);
+ return;
+ }
+ vlib_increment_simple_counter (&vrrp_errs[err_type],
+ vlib_get_main ()->thread_index, 0, 1);
+}
+
+// per-VRRP statistics
+
+/* Number of times a VRRP instance has transitioned to master */
+vlib_simple_counter_main_t vrrp_stats[] = {
+ {
+ .name = "MASTER_TRANS",
+ .stat_segment_name = "/net/vrrp/master-trans",
+ },
+ /* Number of VRRP advertisements sent by a VRRP instance */
+ {
+ .name = "ADV_SENT",
+ .stat_segment_name = "/net/vrrp/adv-sent",
+ },
+ /* Number of VRRP advertisements received by a VRRP instance */
+ {
+ .name = "ADV_RCVD",
+ .stat_segment_name = "/net/vrrp/adv-rcvd",
+ },
+ /* Number of VRRP priority-0 packets sent by a VRRP instance */
+ {
+ .name = "PRIO0_SENT",
+ .stat_segment_name = "/net/vrrp/prio0-sent",
+ },
+ /* Number of VRRP priority-0 packets received by a VRRP instance */
+ {
+ .name = "PRIO0_RCVD",
+ .stat_segment_name = "/net/vrrp/prio0-rcvd",
+ },
+};
+
+void
+vrrp_incr_stat_counter (vrrp_stat_counter_t stat_type, u32 stat_index)
+{
+ if (stat_type >= VRRP_STAT_COUNTER_MAX)
+ {
+ clib_warning ("Attempt to increse stat counter of unknown type %u",
+ stat_type);
+ return;
+ }
+ vlib_increment_simple_counter (
+ &vrrp_stats[stat_type], vlib_get_main ()->thread_index, stat_index, 1);
+}
+
typedef struct
{
vrrp_vr_key_t key;
@@ -227,9 +318,6 @@ vrrp_vr_transition_addrs (vrrp_vr_t * vr, vrrp_vr_state_t new_state)
if (vrrp_vr_is_owner (vr))
return;
- if (vrrp_vr_is_unicast (vr))
- return;
-
/* only need to do something if entering or leaving master state */
if ((vr->runtime.state != VRRP_VR_STATE_MASTER) &&
(new_state != VRRP_VR_STATE_MASTER))
@@ -293,6 +381,7 @@ vrrp_vr_transition (vrrp_vr_t * vr, vrrp_vr_state_t new_state, void *data)
if (new_state == VRRP_VR_STATE_MASTER)
{
+ vrrp_incr_stat_counter (VRRP_STAT_COUNTER_MASTER_TRANS, vr->stat_index);
/* RFC 5798 sec 6.4.1 (105) - startup event for VR with priority 255
* sec 6.4.2 (365) - master down timer fires on backup VR
*/
@@ -313,9 +402,10 @@ vrrp_vr_transition (vrrp_vr_t * vr, vrrp_vr_state_t new_state, void *data)
if (vr->runtime.state == VRRP_VR_STATE_MASTER)
{
- vrrp_header_t *pkt = data;
- vr->runtime.master_adv_int = vrrp_adv_int_from_packet (pkt);
+ vrrp_input_process_args_t *args = data;
+ if (args)
+ vr->runtime.master_adv_int = args->max_adv_int;
}
else /* INIT, INTF_DOWN */
vr->runtime.master_adv_int = vr->config.adv_interval;
@@ -384,10 +474,9 @@ static int
vrrp_intf_enable_disable_mcast (u8 enable, u32 sw_if_index, u8 is_ipv6)
{
vrrp_main_t *vrm = &vrrp_main;
- vrrp_vr_t *vr;
vrrp_intf_t *intf;
- u32 fib_index;
- u32 n_vrs = 0;
+ u32 fib_index, i;
+ u32 n_vrs_in_fib = 0;
const mfib_prefix_t *vrrp_prefix;
fib_protocol_t proto;
vnet_link_t link_type;
@@ -422,30 +511,29 @@ vrrp_intf_enable_disable_mcast (u8 enable, u32 sw_if_index, u8 is_ipv6)
via_itf.frp_proto = fib_proto_to_dpo (proto);
fib_index = mfib_table_get_index_for_sw_if_index (proto, sw_if_index);
- /* *INDENT-OFF* */
- pool_foreach (vr, vrm->vrs)
- {
- if (vrrp_vr_is_ipv6 (vr) == is_ipv6)
- n_vrs++;
- }
- /* *INDENT-ON* */
+ vec_foreach_index (i, vrm->vrrp_intfs)
+ {
+ if (mfib_table_get_index_for_sw_if_index (proto, i) != fib_index)
+ continue;
+
+ n_vrs_in_fib += vrrp_intf_num_vrs (i, is_ipv6);
+ }
if (enable)
{
- /* If this is the first VR configured, add the local mcast routes */
- if (n_vrs == 1)
- mfib_table_entry_path_update (fib_index, vrrp_prefix, MFIB_SOURCE_API,
- &for_us);
+ /* ensure that the local mcast route exists */
+ mfib_table_entry_path_update (fib_index, vrrp_prefix, MFIB_SOURCE_API,
+ MFIB_ENTRY_FLAG_NONE, &for_us);
mfib_table_entry_path_update (fib_index, vrrp_prefix, MFIB_SOURCE_API,
- &via_itf);
+ MFIB_ENTRY_FLAG_NONE, &via_itf);
intf->mcast_adj_index[! !is_ipv6] =
adj_mcast_add_or_lock (proto, link_type, sw_if_index);
}
else
{
/* Remove mcast local routes if this is the last VR being deleted */
- if (n_vrs == 0)
+ if (n_vrs_in_fib == 0)
mfib_table_entry_path_remove (fib_index, vrrp_prefix, MFIB_SOURCE_API,
&for_us);
@@ -509,7 +597,7 @@ vrrp_vr_valid_addrs_owner (vrrp_vr_config_t * vr_conf)
}
static int
-vrrp_vr_valid_addrs_unused (vrrp_vr_config_t * vr_conf)
+vrrp_vr_valid_addrs_unused (vrrp_vr_config_t *vr_conf, index_t vrrp_index)
{
ip46_address_t *vr_addr;
u8 is_ipv6 = (vr_conf->flags & VRRP_VR_IPV6) != 0;
@@ -521,7 +609,7 @@ vrrp_vr_valid_addrs_unused (vrrp_vr_config_t * vr_conf)
addr = (is_ipv6) ? (void *) &vr_addr->ip6 : (void *) &vr_addr->ip4;
vr_index = vrrp_vr_lookup_address (vr_conf->sw_if_index, is_ipv6, addr);
- if (vr_index != ~0)
+ if (vr_index != ~0 && vrrp_index != vr_index)
return VNET_API_ERROR_ADDRESS_IN_USE;
}
@@ -529,7 +617,7 @@ vrrp_vr_valid_addrs_unused (vrrp_vr_config_t * vr_conf)
}
static int
-vrrp_vr_valid_addrs (vrrp_vr_config_t * vr_conf)
+vrrp_vr_valid_addrs (vrrp_vr_config_t *vr_conf, index_t vrrp_index)
{
int ret = 0;
@@ -539,7 +627,7 @@ vrrp_vr_valid_addrs (vrrp_vr_config_t * vr_conf)
return ret;
/* make sure no other VR has already configured any of the VR addresses */
- ret = vrrp_vr_valid_addrs_unused (vr_conf);
+ ret = vrrp_vr_valid_addrs_unused (vr_conf, vrrp_index);
return ret;
}
@@ -574,7 +662,7 @@ vrrp_vr_addr_add_del (vrrp_vr_t * vr, u8 is_add, ip46_address_t * vr_addr)
{
if (!ip46_address_cmp (addr, vr_addr))
{
- vec_del1 (vr->config.vr_addrs, vr->config.vr_addrs - addr);
+ vec_del1 (vr->config.vr_addrs, addr - vr->config.vr_addrs);
break;
}
}
@@ -596,7 +684,7 @@ vrrp_vr_addr_add_del (vrrp_vr_t * vr, u8 is_add, ip46_address_t * vr_addr)
{
if (!ip46_address_cmp (addr, vr_addr))
{
- vec_del1 (vr->config.vr_addrs, vr->config.vr_addrs - addr);
+ vec_del1 (vr->config.vr_addrs, addr - vr->config.vr_addrs);
break;
}
}
@@ -617,9 +705,153 @@ vrrp_vr_addrs_add_del (vrrp_vr_t * vr, u8 is_add, ip46_address_t * vr_addrs)
}
}
+int
+vrrp_vr_update (index_t *vrrp_index, vrrp_vr_config_t *vr_conf)
+{
+ index_t index = *vrrp_index;
+ vrrp_main_t *vrm = &vrrp_main;
+ vrrp_vr_t *vr = NULL;
+ vrrp_vr_key_t key = { 0 };
+ uint8_t must_restart = 0;
+ int ret = 0;
+
+ /* no valid index -> create and return allocated index */
+ if (index == INDEX_INVALID)
+ {
+ return vrrp_vr_add_del (1, vr_conf, vrrp_index);
+ }
+ /* update: lookup vrrp instance */
+ if (pool_is_free_index (vrm->vrs, index))
+ return (VNET_API_ERROR_NO_SUCH_ENTRY);
+
+ /* fetch existing VR */
+ vr = pool_elt_at_index (vrm->vrs, index);
+
+ /* populate key */
+ key.vr_id = vr->config.vr_id;
+ key.is_ipv6 = !!(vr->config.flags & VRRP_VR_IPV6);
+ ;
+ key.sw_if_index = vr->config.sw_if_index;
+
+ /* Do not allow changes to the keys of the VRRP instance */
+ if (vr_conf->vr_id != key.vr_id || vr_conf->sw_if_index != key.sw_if_index ||
+ !!(vr_conf->flags & VRRP_VR_IPV6) != key.is_ipv6)
+ {
+ clib_warning ("Attempt to change VR ID, IP version or interface index "
+ "for VRRP instance with index %u",
+ index);
+ return VNET_API_ERROR_INVALID_ARGUMENT;
+ }
+
+ /* were IPvX addresses included ? */
+ if (!vec_len (vr_conf->vr_addrs))
+ {
+ clib_warning ("Conf of VR %u for IPv%d on sw_if_index %u "
+ " does not contain IP addresses",
+ key.vr_id, key.is_ipv6 ? 6 : 4, key.sw_if_index);
+ return VNET_API_ERROR_INVALID_SRC_ADDRESS;
+ }
+
+ /* Make sure the addresses are ok to use */
+ if ((ret = vrrp_vr_valid_addrs (vr_conf, index)) < 0)
+ return ret;
+
+ /* stop it if needed */
+ must_restart = (vr->runtime.state != VRRP_VR_STATE_INIT);
+ if (must_restart)
+ vrrp_vr_start_stop (0, &key);
+
+ /* overwrite new config */
+ vr->config.priority = vr_conf->priority;
+ vr->config.adv_interval = vr_conf->adv_interval;
+ vr->config.flags = vr_conf->flags;
+
+ /* check if any address has changed */
+ ip46_address_t *vr_addr, *conf_addr;
+ uint8_t found;
+ vec_foreach (vr_addr, vr->config.vr_addrs)
+ {
+ found = 0;
+ vec_foreach (conf_addr, vr_conf->vr_addrs)
+ {
+ if (ip46_address_is_equal (vr_addr, conf_addr))
+ {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ {
+ vrrp_vr_addr_add_del (vr, 0, vr_addr);
+ }
+ }
+ vec_foreach (conf_addr, vr_conf->vr_addrs)
+ {
+ found = 0;
+ vec_foreach (vr_addr, vr->config.vr_addrs)
+ {
+ if (ip46_address_is_equal (vr_addr, conf_addr))
+ {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ {
+ vrrp_vr_addr_add_del (vr, 1, conf_addr);
+ }
+ }
+
+ /* restart it if needed */
+ if (must_restart)
+ vrrp_vr_start_stop (1, &key);
+
+ return 0;
+}
+
+static void
+vrrp_vr_del_common (vrrp_vr_t *vr, vrrp_vr_key_t *key)
+{
+ vrrp_main_t *vrm = &vrrp_main;
+ ip46_address_t *vr_addrs_del_copy;
+
+ vrrp_vr_timer_cancel (vr);
+ vrrp_vr_tracking_ifs_add_del (vr, vr->tracking.interfaces, 0);
+ vr_addrs_del_copy = vec_dup (vr->config.vr_addrs);
+ vrrp_vr_addrs_add_del (vr, 0, vr_addrs_del_copy);
+ mhash_unset (&vrm->vr_index_by_key, key, 0);
+ vec_free (vr_addrs_del_copy);
+ vec_free (vr->config.peer_addrs);
+ vec_free (vr->config.vr_addrs);
+ vec_free (vr->tracking.interfaces);
+ pool_put (vrm->vrs, vr);
+}
+
+int
+vrrp_vr_del (index_t vrrp_index)
+{
+ vrrp_main_t *vrm = &vrrp_main;
+ vrrp_vr_key_t key;
+ vrrp_vr_t *vr = 0;
+
+ if (pool_is_free_index (vrm->vrs, vrrp_index))
+ {
+ return (VNET_API_ERROR_NO_SUCH_ENTRY);
+ }
+ else
+ {
+ vr = pool_elt_at_index (vrm->vrs, vrrp_index);
+ key.sw_if_index = vr->config.sw_if_index;
+ key.vr_id = vr->config.vr_id;
+ key.is_ipv6 = vrrp_vr_is_ipv6 (vr);
+ vrrp_vr_del_common (vr, &key);
+ return 0;
+ }
+}
+
/* Action function shared between message handler and debug CLI */
int
-vrrp_vr_add_del (u8 is_add, vrrp_vr_config_t * vr_conf)
+vrrp_vr_add_del (u8 is_add, vrrp_vr_config_t *vr_conf, index_t *ret_index)
{
vrrp_main_t *vrm = &vrrp_main;
vnet_main_t *vnm = vnet_get_main ();
@@ -661,7 +893,7 @@ vrrp_vr_add_del (u8 is_add, vrrp_vr_config_t * vr_conf)
}
/* Make sure the addresses are ok to use */
- if ((ret = vrrp_vr_valid_addrs (vr_conf)) < 0)
+ if ((ret = vrrp_vr_valid_addrs (vr_conf, INDEX_INVALID)) < 0)
return ret;
pool_get_zero (vrm->vrs, vr);
@@ -679,6 +911,20 @@ vrrp_vr_add_del (u8 is_add, vrrp_vr_config_t * vr_conf)
vr->runtime.mac = (key.is_ipv6) ? ipv6_vmac : ipv4_vmac;
vr->runtime.mac.bytes[5] = vr_conf->vr_id;
+ /* recall pool index for stats */
+ vr->stat_index = vr_index;
+ /* and return it if we were asked to */
+ if (ret_index != NULL)
+ {
+ *ret_index = vr_index;
+ }
+ /* allocate & reset stats */
+ for (int i = 0; i < VRRP_STAT_COUNTER_MAX; i++)
+ {
+ vlib_validate_simple_counter (&vrrp_stats[i], vr_index);
+ vlib_zero_simple_counter (&vrrp_stats[i], vr_index);
+ }
+
mhash_set (&vrm->vr_index_by_key, &key, vr_index, 0);
}
else
@@ -692,13 +938,7 @@ vrrp_vr_add_del (u8 is_add, vrrp_vr_config_t * vr_conf)
vr_index = p[0];
vr = pool_elt_at_index (vrm->vrs, vr_index);
-
- vrrp_vr_tracking_ifs_add_del (vr, vr->tracking.interfaces, is_add);
- vrrp_vr_addrs_add_del (vr, is_add, vr->config.vr_addrs);
- mhash_unset (&vrm->vr_index_by_key, &key, 0);
- vec_free (vr->config.vr_addrs);
- vec_free (vr->tracking.interfaces);
- pool_put (vrm->vrs, vr);
+ vrrp_vr_del_common (vr, &key);
}
vrrp_intf_vr_add_del (is_add, vr_conf->sw_if_index, vr_index, key.is_ipv6);
@@ -1266,19 +1506,24 @@ vrrp_init (vlib_main_t * vm)
vrrp_ip6_delegate_id = ip6_link_delegate_register (&vrrp_ip6_delegate_vft);
+ /* allocate & reset error counters */
+ for (int i = 0; i < VRRP_ERR_COUNTER_MAX; i++)
+ {
+ vlib_validate_simple_counter (&vrrp_errs[i], 0);
+ vlib_zero_simple_counter (&vrrp_errs[i], 0);
+ }
+
return error;
}
VLIB_INIT_FUNCTION (vrrp_init);
-/* *INDENT-OFF* */
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "VRRP v3 (RFC 5798)",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/vrrp/vrrp.h b/src/plugins/vrrp/vrrp.h
index c9325921959..acab7440ead 100644
--- a/src/plugins/vrrp/vrrp.h
+++ b/src/plugins/vrrp/vrrp.h
@@ -33,7 +33,6 @@ typedef struct vrrp_vr_key
u8 is_ipv6;
} vrrp_vr_key_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED
(struct vrrp4_arp_key {
union {
@@ -44,15 +43,12 @@ typedef CLIB_PACKED
u64 as_u64;
};
}) vrrp4_arp_key_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED
(struct vrrp6_nd_key {
u32 sw_if_index;
ip6_address_t addr;
}) vrrp6_nd_key_t;
-/* *INDENT-ON* */
typedef struct vrrp_vr_tracking_if
{
@@ -108,6 +104,7 @@ typedef struct vrrp_vr
vrrp_vr_config_t config;
vrrp_vr_runtime_t runtime;
vrrp_vr_tracking_t tracking;
+ u32 stat_index;
} vrrp_vr_t;
/* Timers */
@@ -185,9 +182,46 @@ extern vlib_node_registration_t vrrp_periodic_node;
#define VRRP_EVENT_VR_STOP 2
#define VRRP_EVENT_PERIODIC_ENABLE_DISABLE 3
+/* global error counter types */
+#define foreach_vrrp_err_counter \
+ _ (CHKSUM, 0) \
+ _ (VERSION, 1) \
+ _ (VRID, 2) \
+ _ (TTL, 3) \
+ _ (ADDR_LIST, 4) \
+ _ (PKT_LEN, 5)
+
+typedef enum vrrp_err_counter_
+{
+#define _(sym, val) VRRP_ERR_COUNTER_##sym = val,
+ foreach_vrrp_err_counter
+#undef _
+} vrrp_err_counter_t;
+
+#define VRRP_ERR_COUNTER_MAX 6
+
+/* per-instance stats */
+#define foreach_vrrp_stat_counter \
+ _ (MASTER_TRANS, 0) \
+ _ (ADV_SENT, 1) \
+ _ (ADV_RCVD, 2) \
+ _ (PRIO0_SENT, 3) \
+ _ (PRIO0_RCVD, 4)
+
+typedef enum vrrp_stat_counter_
+{
+#define _(sym, val) VRRP_STAT_COUNTER_##sym = val,
+ foreach_vrrp_stat_counter
+#undef _
+} vrrp_stat_counter_t;
+
+#define VRRP_STAT_COUNTER_MAX 5
+
clib_error_t *vrrp_plugin_api_hookup (vlib_main_t * vm);
-int vrrp_vr_add_del (u8 is_add, vrrp_vr_config_t * conf);
+int vrrp_vr_add_del (u8 is_add, vrrp_vr_config_t *conf, index_t *ret_index);
+int vrrp_vr_update (index_t *vrrp_index, vrrp_vr_config_t *vr_conf);
+int vrrp_vr_del (index_t vrrp_index);
int vrrp_vr_start_stop (u8 is_start, vrrp_vr_key_t * vr_key);
extern u8 *format_vrrp_vr (u8 * s, va_list * args);
extern u8 *format_vrrp_vr_key (u8 * s, va_list * args);
@@ -209,6 +243,9 @@ int vrrp_vr_tracking_ifs_add_del (vrrp_vr_t * vr,
u8 is_add);
void vrrp_vr_event (vrrp_vr_t * vr, vrrp_vr_state_t new_state);
+// stats
+void vrrp_incr_err_counter (vrrp_err_counter_t err_type);
+void vrrp_incr_stat_counter (vrrp_stat_counter_t stat_type, u32 stat_index);
always_inline void
vrrp_vr_skew_compute (vrrp_vr_t * vr)
diff --git a/src/plugins/vrrp/vrrp_all_api_h.h b/src/plugins/vrrp/vrrp_all_api_h.h
deleted file mode 100644
index 4f45909de70..00000000000
--- a/src/plugins/vrrp/vrrp_all_api_h.h
+++ /dev/null
@@ -1,11 +0,0 @@
-
-/*
- * vrrp_all_api_h.h - vrrp plug-in api #include file
- *
- * Copyright 2019-2020 Rubicon Communications, LLC (Netgate)
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- */
-/* Include the generated file, see BUILT_SOURCES in Makefile.am */
-#include <vrrp/vrrp.api.h>
diff --git a/src/plugins/vrrp/vrrp_api.c b/src/plugins/vrrp/vrrp_api.c
index 9a206fa6cdc..e31e0a74c14 100644
--- a/src/plugins/vrrp/vrrp_api.c
+++ b/src/plugins/vrrp/vrrp_api.c
@@ -25,6 +25,109 @@
/* API message handlers */
static void
+vl_api_vrrp_vr_update_t_handler (vl_api_vrrp_vr_update_t *mp)
+{
+ vl_api_vrrp_vr_update_reply_t *rmp;
+ vrrp_vr_config_t vr_conf;
+ u32 api_flags;
+ u32 vrrp_index = INDEX_INVALID;
+ ip46_address_t *addrs = 0;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ api_flags = htonl (mp->flags);
+
+ clib_memset (&vr_conf, 0, sizeof (vr_conf));
+
+ vr_conf.sw_if_index = ntohl (mp->sw_if_index);
+ vr_conf.vr_id = mp->vr_id;
+ vr_conf.priority = mp->priority;
+ vr_conf.adv_interval = ntohs (mp->interval);
+
+ if (api_flags & VRRP_API_VR_PREEMPT)
+ vr_conf.flags |= VRRP_VR_PREEMPT;
+
+ if (api_flags & VRRP_API_VR_ACCEPT)
+ vr_conf.flags |= VRRP_VR_ACCEPT;
+
+ if (api_flags & VRRP_API_VR_UNICAST)
+ vr_conf.flags |= VRRP_VR_UNICAST;
+
+ if (api_flags & VRRP_API_VR_IPV6)
+ vr_conf.flags |= VRRP_VR_IPV6;
+
+ int i;
+ for (i = 0; i < mp->n_addrs; i++)
+ {
+ ip46_address_t *addr;
+ void *src, *dst;
+ int len;
+
+ vec_add2 (addrs, addr, 1);
+
+ if (ntohl (mp->addrs[i].af) == ADDRESS_IP4)
+ {
+ src = &mp->addrs[i].un.ip4;
+ dst = &addr->ip4;
+ len = sizeof (addr->ip4);
+ }
+ else
+ {
+ src = &mp->addrs[i].un.ip6;
+ dst = &addr->ip6;
+ len = sizeof (addr->ip6);
+ }
+
+ clib_memcpy (dst, src, len);
+ }
+
+ vr_conf.vr_addrs = addrs;
+
+ if (vr_conf.priority == 0)
+ {
+ clib_warning ("VR priority must be > 0");
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ }
+ else if (vr_conf.adv_interval == 0)
+ {
+ clib_warning ("VR advertisement interval must be > 0");
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ }
+ else if (vr_conf.vr_id == 0)
+ {
+ clib_warning ("VR ID must be > 0");
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ }
+ else
+ {
+ vrrp_index = ntohl (mp->vrrp_index);
+ rv = vrrp_vr_update (&vrrp_index, &vr_conf);
+ }
+
+ vec_free (addrs);
+
+ BAD_SW_IF_INDEX_LABEL;
+ // clang-format off
+ REPLY_MACRO2 (VL_API_VRRP_VR_UPDATE_REPLY,
+ ({
+ rmp->vrrp_index = htonl (vrrp_index);
+ }));
+ // clang-format on
+}
+
+static void
+vl_api_vrrp_vr_del_t_handler (vl_api_vrrp_vr_del_t *mp)
+{
+ vl_api_vrrp_vr_del_reply_t *rmp;
+ int rv;
+
+ rv = vrrp_vr_del (ntohl (mp->vrrp_index));
+
+ REPLY_MACRO (VL_API_VRRP_VR_DEL_REPLY);
+}
+
+static void
vl_api_vrrp_vr_add_del_t_handler (vl_api_vrrp_vr_add_del_t * mp)
{
vl_api_vrrp_vr_add_del_reply_t *rmp;
@@ -103,7 +206,7 @@ vl_api_vrrp_vr_add_del_t_handler (vl_api_vrrp_vr_add_del_t * mp)
rv = VNET_API_ERROR_INVALID_VALUE;
}
else
- rv = vrrp_vr_add_del (mp->is_add, &vr_conf);
+ rv = vrrp_vr_add_del (mp->is_add, &vr_conf, NULL);
vec_free (addrs);
@@ -215,7 +318,6 @@ vl_api_vrrp_vr_dump_t_handler (vl_api_vrrp_vr_dump_t * mp)
sw_if_index = htonl (mp->sw_if_index);
- /* *INDENT-OFF* */
pool_foreach (vr, vmp->vrs) {
if (sw_if_index && (sw_if_index != ~0) &&
@@ -224,7 +326,6 @@ vl_api_vrrp_vr_dump_t_handler (vl_api_vrrp_vr_dump_t * mp)
send_vrrp_vr_details (vr, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void
@@ -363,7 +464,6 @@ vl_api_vrrp_vr_peer_dump_t_handler (vl_api_vrrp_vr_peer_dump_t * mp)
return;
}
- /* *INDENT-OFF* */
pool_foreach (vr, vmp->vrs) {
if (!vec_len (vr->config.peer_addrs))
@@ -372,7 +472,6 @@ vl_api_vrrp_vr_peer_dump_t_handler (vl_api_vrrp_vr_peer_dump_t * mp)
send_vrrp_vr_details (vr, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void
@@ -467,7 +566,6 @@ vl_api_vrrp_vr_track_if_dump_t_handler (vl_api_vrrp_vr_track_if_dump_t * mp)
return;
}
- /* *INDENT-OFF* */
pool_foreach (vr, vmp->vrs) {
if (!vec_len (vr->tracking.interfaces))
@@ -476,7 +574,6 @@ vl_api_vrrp_vr_track_if_dump_t_handler (vl_api_vrrp_vr_track_if_dump_t * mp)
send_vrrp_vr_track_if_details (vr, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void
@@ -510,14 +607,12 @@ vrrp_vr_event (vrrp_vr_t * vr, vrrp_vr_state_t new_state)
vpe_client_registration_t *reg;
vl_api_registration_t *vl_reg;
- /* *INDENT-OFF* */
pool_foreach (reg, vam->vrrp_vr_events_registrations)
{
vl_reg = vl_api_client_index_to_registration (reg->client_index);
if (vl_reg)
send_vrrp_vr_event (reg, vl_reg, vr, new_state);
}
- /* *INDENT-ON* */
}
pub_sub_handler (vrrp_vr_events, VRRP_VR_EVENTS);
@@ -535,7 +630,6 @@ vrrp_plugin_api_hookup (vlib_main_t * vm)
return 0;
}
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/vrrp/vrrp_cli.c b/src/plugins/vrrp/vrrp_cli.c
index a154a11a8fa..fb52da474fa 100644
--- a/src/plugins/vrrp/vrrp_cli.c
+++ b/src/plugins/vrrp/vrrp_cli.c
@@ -102,7 +102,7 @@ vrrp_vr_add_del_command_fn (vlib_main_t * vm,
vr_conf.adv_interval = (u16) interval;
vr_conf.vr_addrs = addrs;
- rv = vrrp_vr_add_del (is_add, &vr_conf);
+ rv = vrrp_vr_add_del (is_add, &vr_conf, NULL);
switch (rv)
{
@@ -151,7 +151,6 @@ vrrp_vr_add_command_fn (vlib_main_t * vm, unformat_input_t * input,
return vrrp_vr_add_del_command_fn (vm, input, cmd, 1 /* is_add */ );
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vrrp_vr_add_command, static) =
{
.path = "vrrp vr add",
@@ -159,7 +158,6 @@ VLIB_CLI_COMMAND (vrrp_vr_add_command, static) =
"vrrp vr add <interface> [vr_id <n>] [ipv6] [priority <value>] [interval <value>] [no_preempt] [accept_mode] [unicast] [<ip_addr> ...]",
.function = vrrp_vr_add_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
vrrp_vr_del_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -168,14 +166,12 @@ vrrp_vr_del_command_fn (vlib_main_t * vm, unformat_input_t * input,
return vrrp_vr_add_del_command_fn (vm, input, cmd, 0 /* is_add */ );
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vrrp_vr_del_command, static) =
{
.path = "vrrp vr del",
.short_help = "vrrp vr del <interface> [vr_id <n>] [ipv6]",
.function = vrrp_vr_del_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
vrrp_show_vr_command_fn (vlib_main_t * vm,
@@ -208,7 +204,6 @@ vrrp_show_vr_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vrrp_show_vr_command, static) =
{
.path = "show vrrp vr",
@@ -216,7 +211,6 @@ VLIB_CLI_COMMAND (vrrp_show_vr_command, static) =
"show vrrp vr [(<intf_name>|sw_if_index <n>)]",
.function = vrrp_show_vr_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
vrrp_proto_start_stop_command_fn (vlib_main_t * vm,
@@ -242,6 +236,8 @@ vrrp_proto_start_stop_command_fn (vlib_main_t * vm,
if (unformat (input, "%U", unformat_vnet_sw_interface, vmp->vnet_main,
&sw_if_index))
;
+ else if (unformat (input, "sw_if_index %u", &sw_if_index))
+ ;
else if (unformat (input, "vr_id %u", &vr_id))
;
else if (unformat (input, "ipv6"))
@@ -311,6 +307,8 @@ vrrp_peers_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (unformat (input, "%U", unformat_vnet_sw_interface, vmp->vnet_main,
&sw_if_index))
;
+ else if (unformat (input, "sw_if_index %u", &sw_if_index))
+ ;
else if (unformat (input, "vr_id %u", &vr_id))
;
else if (unformat (input, "ipv6"))
@@ -373,7 +371,6 @@ done:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vrrp_proto_start_stop_command, static) =
{
.path = "vrrp proto",
@@ -381,9 +378,7 @@ VLIB_CLI_COMMAND (vrrp_proto_start_stop_command, static) =
"vrrp proto (start|stop) (<intf_name>|sw_if_index <n>) vr_id <n> [ipv6]",
.function = vrrp_proto_start_stop_command_fn,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vrrp_peers_command, static) =
{
.path = "vrrp peers",
@@ -391,7 +386,6 @@ VLIB_CLI_COMMAND (vrrp_peers_command, static) =
"vrrp peers (<intf_name>|sw_if_index <n>) vr_id <n> [ipv6] <peer1_addr> [<peer2_addr> ...]",
.function = vrrp_peers_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
vrrp_vr_track_if_command_fn (vlib_main_t * vm,
@@ -418,6 +412,8 @@ vrrp_vr_track_if_command_fn (vlib_main_t * vm,
if (unformat (input, "%U", unformat_vnet_sw_interface, vmp->vnet_main,
&sw_if_index))
;
+ else if (unformat (input, "sw_if_index %u", &sw_if_index))
+ ;
else if (unformat (input, "add"))
is_add = 1;
else if (unformat (input, "del"))
@@ -487,7 +483,6 @@ done:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vrrp_vr_track_if_command, static) =
{
.path = "vrrp vr track-if",
@@ -495,7 +490,6 @@ VLIB_CLI_COMMAND (vrrp_vr_track_if_command, static) =
"vrrp vr track-if (add|del) (<intf_name>|sw_if_index <n>) vr_id <n> [ipv6] track-index <n> priority <n> [ track-index <n> priority <n> ...]",
.function = vrrp_vr_track_if_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/vrrp/vrrp_msg_enum.h b/src/plugins/vrrp/vrrp_msg_enum.h
deleted file mode 100644
index 48ae619205a..00000000000
--- a/src/plugins/vrrp/vrrp_msg_enum.h
+++ /dev/null
@@ -1,23 +0,0 @@
-
-/*
- * vrrp_msg_enum.h - vrrp plug-in message enumeration
- *
- * Copyright 2019-2020 Rubicon Communications, LLC (Netgate)
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- */
-#ifndef included_vrrp_msg_enum_h
-#define included_vrrp_msg_enum_h
-
-#include <vppinfra/byte_order.h>
-
-#define vl_msg_id(n,h) n,
-typedef enum {
-#include <vrrp/vrrp_all_api_h.h>
- /* We'll want to know how many messages IDs we need... */
- VL_MSG_FIRST_AVAILABLE,
-} vl_msg_id_t;
-#undef vl_msg_id
-
-#endif /* included_vrrp_msg_enum_h */
diff --git a/src/plugins/vrrp/vrrp_packet.c b/src/plugins/vrrp/vrrp_packet.c
index b470ddeba08..69e635f811a 100644
--- a/src/plugins/vrrp/vrrp_packet.c
+++ b/src/plugins/vrrp/vrrp_packet.c
@@ -102,13 +102,24 @@ vrrp_adv_l3_build (vrrp_vr_t * vr, vlib_buffer_t * b,
if (!vrrp_vr_is_ipv6 (vr)) /* IPv4 */
{
ip4_header_t *ip4 = vlib_buffer_get_current (b);
+ ip4_address_t *src4;
clib_memset (ip4, 0, sizeof (*ip4));
ip4->ip_version_and_header_length = 0x45;
ip4->ttl = 255;
ip4->protocol = IP_PROTOCOL_VRRP;
clib_memcpy (&ip4->dst_address, &dst->ip4, sizeof (dst->ip4));
- fib_sas4_get (vr->config.sw_if_index, NULL, &ip4->src_address);
+
+ /* RFC 5798 Section 5.1.1.1 - Source Address "is the primary IPv4
+ * address of the interface the packet is being sent from". Assume
+ * this is the first address on the interface.
+ */
+ src4 = ip_interface_get_first_ip (vr->config.sw_if_index, 1);
+ if (!src4)
+ {
+ return -1;
+ }
+ ip4->src_address.as_u32 = src4->as_u32;
ip4->length = clib_host_to_net_u16 (sizeof (*ip4) +
vrrp_adv_payload_len (vr));
ip4->checksum = ip4_header_checksum (ip4);
@@ -325,7 +336,12 @@ vrrp_adv_send (vrrp_vr_t * vr, int shutdown)
else
vrrp_adv_l2_build_multicast (vr, b);
- vrrp_adv_l3_build (vr, b, dst);
+ if (-1 == vrrp_adv_l3_build (vr, b, dst))
+ {
+ vlib_frame_free (vm, to_frame);
+ vlib_buffer_free (vm, bi, n_buffers);
+ return -1;
+ }
vrrp_adv_payload_build (vr, b, shutdown);
vlib_buffer_reset (b);
@@ -337,6 +353,12 @@ vrrp_adv_send (vrrp_vr_t * vr, int shutdown)
vlib_put_frame_to_node (vm, node_index, to_frame);
+ vrrp_incr_stat_counter (VRRP_STAT_COUNTER_ADV_SENT, vr->stat_index);
+ if (shutdown)
+ {
+ vrrp_incr_stat_counter (VRRP_STAT_COUNTER_PRIO0_SENT, vr->stat_index);
+ }
+
vec_free (bi);
return 0;
@@ -516,6 +538,8 @@ vrrp_garp_or_na_send (vrrp_vr_t * vr)
vlib_put_frame_to_node (vm, vmp->intf_output_node_idx, to_frame);
+ vec_free (bi);
+
return 0;
}
@@ -529,17 +553,25 @@ static const ip4_header_t igmp_ip4_mcast = {
.dst_address = {.as_u8 = IGMP4_MCAST_ADDR_AS_U8,},
};
-static void
-vrrp_igmp_pkt_build (vrrp_vr_t * vr, vlib_buffer_t * b)
+static int
+vrrp_igmp_pkt_build (vrrp_vr_t *vr, vlib_buffer_t *b)
{
ip4_header_t *ip4;
u8 *ip4_options;
igmp_membership_report_v3_t *report;
igmp_membership_group_v3_t *group;
+ ip4_address_t *src4;
ip4 = vlib_buffer_get_current (b);
clib_memcpy (ip4, &igmp_ip4_mcast, sizeof (*ip4));
- fib_sas4_get (vr->config.sw_if_index, NULL, &ip4->src_address);
+
+ /* Use the source address advertisements will use to join mcast group */
+ src4 = ip_interface_get_first_ip (vr->config.sw_if_index, 1);
+ if (!src4)
+ {
+ return -1;
+ }
+ ip4->src_address.as_u32 = src4->as_u32;
vlib_buffer_chain_increase_length (b, b, sizeof (*ip4));
vlib_buffer_advance (b, sizeof (*ip4));
@@ -581,6 +613,7 @@ vrrp_igmp_pkt_build (vrrp_vr_t * vr, vlib_buffer_t * b)
~ip_csum_fold (ip_incremental_checksum (0, report, payload_len));
vlib_buffer_reset (b);
+ return 0;
}
/* multicast listener report packet format for ethernet. */
@@ -720,7 +753,13 @@ vrrp_vr_multicast_group_join (vrrp_vr_t * vr)
}
else
{
- vrrp_igmp_pkt_build (vr, b);
+ if (-1 == vrrp_igmp_pkt_build (vr, b))
+ {
+ clib_warning ("IGMP packet build failed for %U", format_vrrp_vr_key,
+ vr);
+ vlib_buffer_free (vm, &bi, 1);
+ return -1;
+ }
node_index = ip4_rewrite_mcast_node.index;
}
diff --git a/src/plugins/vrrp/vrrp_packet.h b/src/plugins/vrrp/vrrp_packet.h
index 1cbf62d7c72..d5725b6a1a5 100644
--- a/src/plugins/vrrp/vrrp_packet.h
+++ b/src/plugins/vrrp/vrrp_packet.h
@@ -47,6 +47,15 @@ vrrp_adv_int_from_packet (vrrp_header_t * pkt)
return clib_net_to_host_u16 (pkt->rsvd_and_max_adv_int) & ((u16) 0x0fff);
}
+/* Fields from VRRP advertisement packets needed by main thread */
+typedef struct vrrp_input_process_args
+{
+ u32 vr_index;
+ ip46_address_t src_addr;
+ u8 priority;
+ u8 max_adv_int;
+} vrrp_input_process_args_t;
+
#endif /* __included_vrrp_packet_h__ */
/*
diff --git a/src/plugins/vrrp/vrrp_periodic.c b/src/plugins/vrrp/vrrp_periodic.c
index 9c1b76ae59d..5f9d7ae938e 100644
--- a/src/plugins/vrrp/vrrp_periodic.c
+++ b/src/plugins/vrrp/vrrp_periodic.c
@@ -210,14 +210,12 @@ vrrp_periodic_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vrrp_periodic_node) = {
.function = vrrp_periodic_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "vrrp-periodic-process",
.process_log2_n_stack_bytes = 17,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/vrrp/vrrp_test.c b/src/plugins/vrrp/vrrp_test.c
index 199f5417f1a..d2f79f65c3f 100644
--- a/src/plugins/vrrp/vrrp_test.c
+++ b/src/plugins/vrrp/vrrp_test.c
@@ -19,8 +19,7 @@ uword unformat_sw_if_index (unformat_input_t * input, va_list * args);
#include <vnet/format_fns.h>
#include <vrrp/vrrp.api_enum.h>
#include <vrrp/vrrp.api_types.h>
-#include <vpp/api/vpe.api_types.h>
-
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
@@ -36,6 +35,176 @@ vrrp_test_main_t vrrp_test_main;
#include <vlibapi/vat_helper_macros.h>
static int
+api_vrrp_vr_update (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ u32 sw_if_index = ~0;
+ u32 vr_id, priority, interval, vrrp_index;
+ u8 is_ipv6, no_preempt, accept_mode, vr_unicast;
+ u8 n_addrs4, n_addrs6;
+ vl_api_vrrp_vr_update_t *mp;
+ vl_api_address_t *api_addr;
+ ip46_address_t *ip_addr, *ip_addrs = 0;
+ ip46_address_t addr;
+ int ret = 0;
+
+ interval = priority = 100;
+ n_addrs4 = n_addrs6 = 0;
+ vr_id = is_ipv6 = no_preempt = accept_mode = vr_unicast = 0;
+ vrrp_index = INDEX_INVALID;
+
+ clib_memset (&addr, 0, sizeof (addr));
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %u", &sw_if_index))
+ ;
+ else if (unformat (i, "vr_id %u", &vr_id))
+ ;
+ else if (unformat (i, "vrrp_index %u", &vrrp_index))
+ ;
+ else if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else if (unformat (i, "priority %u", &priority))
+ ;
+ else if (unformat (i, "interval %u", &interval))
+ ;
+ else if (unformat (i, "no_preempt"))
+ no_preempt = 1;
+ else if (unformat (i, "accept_mode"))
+ accept_mode = 1;
+ else if (unformat (i, "unicast"))
+ vr_unicast = 1;
+ else if (unformat (i, "%U", unformat_ip4_address, &addr.ip4))
+ {
+ vec_add1 (ip_addrs, addr);
+ n_addrs4++;
+ clib_memset (&addr, 0, sizeof (addr));
+ }
+ else if (unformat (i, "%U", unformat_ip6_address, &addr.ip6))
+ {
+ vec_add1 (ip_addrs, addr);
+ n_addrs6++;
+ clib_memset (&addr, 0, sizeof (addr));
+ }
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("Interface not set\n");
+ ret = -99;
+ }
+ else if (n_addrs4 && (n_addrs6 || is_ipv6))
+ {
+ errmsg ("Address family mismatch\n");
+ ret = -99;
+ }
+
+ if (ret)
+ goto done;
+
+ /* Construct the API message */
+ M2 (VRRP_VR_UPDATE, mp, vec_len (ip_addrs) * sizeof (*api_addr));
+
+ mp->vrrp_index = htonl (vrrp_index);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->vr_id = vr_id;
+ mp->priority = priority;
+ mp->interval = htons (interval);
+ mp->flags = VRRP_API_VR_PREEMPT; /* preempt by default */
+
+ if (no_preempt)
+ mp->flags &= ~VRRP_API_VR_PREEMPT;
+
+ if (accept_mode)
+ mp->flags |= VRRP_API_VR_ACCEPT;
+
+ if (vr_unicast)
+ mp->flags |= VRRP_API_VR_UNICAST;
+
+ if (is_ipv6)
+ mp->flags |= VRRP_API_VR_IPV6;
+
+ mp->flags = htonl (mp->flags);
+
+ mp->n_addrs = n_addrs4 + n_addrs6;
+ api_addr = mp->addrs;
+
+ vec_foreach (ip_addr, ip_addrs)
+ {
+ void *src, *dst;
+ int len;
+
+ if (is_ipv6)
+ {
+ api_addr->af = ADDRESS_IP6;
+ src = &ip_addr->ip6;
+ dst = &api_addr->un.ip6;
+ len = sizeof (api_addr->un.ip6);
+ }
+ else
+ {
+ api_addr->af = ADDRESS_IP4;
+ src = &ip_addr->ip4;
+ dst = &api_addr->un.ip4;
+ len = sizeof (api_addr->un.ip4);
+ }
+ clib_memcpy (dst, src, len);
+ api_addr++;
+ }
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+
+done:
+ vec_free (ip_addrs);
+
+ return ret;
+}
+
+static void
+vl_api_vrrp_vr_update_reply_t_handler (vl_api_vrrp_vr_update_reply_t *mp)
+{
+}
+
+static int
+api_vrrp_vr_del (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_vrrp_vr_del_t *mp;
+ u32 vrrp_index = INDEX_INVALID;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "vrrp_index %u", &vrrp_index))
+ ;
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M (VRRP_VR_DEL, mp);
+ mp->vrrp_index = htonl (vrrp_index);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+
+ return ret;
+}
+
+static int
api_vrrp_vr_add_del (vat_main_t * vam)
{
unformat_input_t *i = vam->input;
diff --git a/src/plugins/vxlan/CMakeLists.txt b/src/plugins/vxlan/CMakeLists.txt
new file mode 100644
index 00000000000..bd0272a868e
--- /dev/null
+++ b/src/plugins/vxlan/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Copyright (c) 2022 Cisco and/or its affiliates.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at:
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+add_vpp_plugin(vxlan
+ SOURCES
+ vxlan.c
+ encap.c
+ decap.c
+ vxlan_api.c
+ plugin.c
+ vxlan.h
+ vxlan_packet.h
+
+ MULTIARCH_SOURCES
+ encap.c
+
+ API_FILES
+ vxlan.api
+)
diff --git a/src/vnet/vxlan/FEATURE.yaml b/src/plugins/vxlan/FEATURE.yaml
index dc7d21b010e..dc7d21b010e 100644
--- a/src/vnet/vxlan/FEATURE.yaml
+++ b/src/plugins/vxlan/FEATURE.yaml
diff --git a/src/vnet/vxlan/decap.c b/src/plugins/vxlan/decap.c
index 729293fb3e5..5f28c5e97bb 100644
--- a/src/vnet/vxlan/decap.c
+++ b/src/plugins/vxlan/decap.c
@@ -16,12 +16,12 @@
*/
#include <vlib/vlib.h>
-#include <vnet/vxlan/vxlan.h>
+#include <vxlan/vxlan.h>
#include <vnet/udp/udp_local.h>
#ifndef CLIB_MARCH_VARIANT
-vlib_node_registration_t vxlan4_input_node;
-vlib_node_registration_t vxlan6_input_node;
+__clib_export vlib_node_registration_t vxlan4_input_node;
+__clib_export vlib_node_registration_t vxlan6_input_node;
#endif
typedef struct
@@ -408,11 +408,10 @@ VLIB_NODE_FN (vxlan6_input_node) (vlib_main_t * vm,
static char *vxlan_error_strings[] = {
#define vxlan_error(n,s) s,
-#include <vnet/vxlan/vxlan_error.def>
+#include <vxlan/vxlan_error.def>
#undef vxlan_error
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan4_input_node) =
{
.name = "vxlan4-input",
@@ -442,7 +441,6 @@ VLIB_REGISTER_NODE (vxlan6_input_node) =
},
.format_trace = format_vxlan_rx_trace,
};
-/* *INDENT-ON* */
typedef enum
{
@@ -875,7 +873,6 @@ VLIB_NODE_FN (ip4_vxlan_bypass_node) (vlib_main_t * vm,
return ip_vxlan_bypass_inline (vm, node, frame, /* is_ip4 */ 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_vxlan_bypass_node) =
{
.name = "ip4-vxlan-bypass",
@@ -889,7 +886,6 @@ VLIB_REGISTER_NODE (ip4_vxlan_bypass_node) =
.format_trace = format_ip4_forward_next_trace,
};
-/* *INDENT-ON* */
/* Dummy init function to get us linked in. */
static clib_error_t *
@@ -907,7 +903,6 @@ VLIB_NODE_FN (ip6_vxlan_bypass_node) (vlib_main_t * vm,
return ip_vxlan_bypass_inline (vm, node, frame, /* is_ip4 */ 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_vxlan_bypass_node) =
{
.name = "ip6-vxlan-bypass",
@@ -921,7 +916,6 @@ VLIB_REGISTER_NODE (ip6_vxlan_bypass_node) =
.format_trace = format_ip6_forward_next_trace,
};
-/* *INDENT-ON* */
/* Dummy init function to get us linked in. */
static clib_error_t *
@@ -1299,7 +1293,6 @@ VLIB_NODE_FN (vxlan4_flow_input_node) (vlib_main_t * vm,
return f->n_vectors;
}
-/* *INDENT-OFF* */
#ifndef CLIB_MULTIARCH_VARIANT
VLIB_REGISTER_NODE (vxlan4_flow_input_node) = {
.name = "vxlan-flow-input",
@@ -1319,7 +1312,6 @@ VLIB_REGISTER_NODE (vxlan4_flow_input_node) = {
},
};
#endif
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/vxlan/dir.dox b/src/plugins/vxlan/dir.dox
index 31a9e2b6112..31a9e2b6112 100644
--- a/src/vnet/vxlan/dir.dox
+++ b/src/plugins/vxlan/dir.dox
diff --git a/src/vnet/vxlan/encap.c b/src/plugins/vxlan/encap.c
index 476e0f2b3b7..98464d809ba 100644
--- a/src/vnet/vxlan/encap.c
+++ b/src/plugins/vxlan/encap.c
@@ -19,7 +19,7 @@
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/interface_output.h>
-#include <vnet/vxlan/vxlan.h>
+#include <vxlan/vxlan.h>
#include <vnet/qos/qos_types.h>
#include <vnet/adj/rewrite.h>
@@ -68,9 +68,8 @@ format_vxlan_encap_trace (u8 * s, va_list * args)
#endif
always_inline uword
-vxlan_encap_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame, u8 is_ip4, u8 csum_offload)
+vxlan_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *from_frame, u8 is_ip4)
{
u32 n_left_from, next_index, *from, *to_next;
vxlan_main_t *vxm = &vxlan_main;
@@ -98,18 +97,11 @@ vxlan_encap_inline (vlib_main_t * vm,
u8 const underlay_hdr_len = is_ip4 ?
sizeof (ip4_vxlan_header_t) : sizeof (ip6_vxlan_header_t);
u16 const l3_len = is_ip4 ? sizeof (ip4_header_t) : sizeof (ip6_header_t);
- u32 const csum_flags =
- is_ip4 ? VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L4_HDR_OFFSET_VALID :
- VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
u32 const outer_packet_csum_offload_flags =
- is_ip4 ? VNET_BUFFER_OFFLOAD_F_IP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM :
- VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
- u32 const inner_packet_removed_flags =
- VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_IS_IP6 |
- VNET_BUFFER_F_L2_HDR_OFFSET_VALID | VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
+ is_ip4 ? (VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_VXLAN) :
+ (VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_VXLAN);
vlib_get_buffers (vm, from, bufs, n_left_from);
@@ -143,30 +135,6 @@ vxlan_encap_inline (vlib_main_t * vm,
vlib_buffer_t *b1 = b[1];
b += 2;
- u32 or_flags = b0->flags | b1->flags;
- if (csum_offload && (or_flags & VNET_BUFFER_F_OFFLOAD))
- {
- /* Only calculate the non-GSO packet csum offload */
- if ((b0->flags & VNET_BUFFER_F_GSO) == 0)
- {
- vnet_calc_checksums_inline (vm, b0,
- b0->flags &
- VNET_BUFFER_F_IS_IP4,
- b0->flags &
- VNET_BUFFER_F_IS_IP6);
- b0->flags &= ~inner_packet_removed_flags;
- }
- if ((b1->flags & VNET_BUFFER_F_GSO) == 0)
- {
- vnet_calc_checksums_inline (vm, b1,
- b1->flags &
- VNET_BUFFER_F_IS_IP4,
- b1->flags &
- VNET_BUFFER_F_IS_IP6);
- b1->flags &= ~inner_packet_removed_flags;
- }
- }
-
u32 flow_hash0 = vnet_l2_compute_flow_hash (b0);
u32 flow_hash1 = vnet_l2_compute_flow_hash (b1);
@@ -279,38 +247,55 @@ vxlan_encap_inline (vlib_main_t * vm,
udp1->length = payload_l1;
udp1->src_port = flow_hash1;
- if (csum_offload)
+ if (b0->flags & VNET_BUFFER_F_OFFLOAD)
{
- b0->flags |= csum_flags;
- vnet_buffer (b0)->l3_hdr_offset = l3_0 - b0->data;
- vnet_buffer (b0)->l4_hdr_offset = (u8 *) udp0 - b0->data;
+ vnet_buffer2 (b0)->outer_l3_hdr_offset = l3_0 - b0->data;
+ vnet_buffer2 (b0)->outer_l4_hdr_offset = (u8 *) udp0 - b0->data;
vnet_buffer_offload_flags_set (b0,
outer_packet_csum_offload_flags);
- b1->flags |= csum_flags;
- vnet_buffer (b1)->l3_hdr_offset = l3_1 - b1->data;
- vnet_buffer (b1)->l4_hdr_offset = (u8 *) udp1 - b1->data;
- vnet_buffer_offload_flags_set (b1,
- outer_packet_csum_offload_flags);
}
- /* IPv4 UDP checksum only if checksum offload is used */
+ /* IPv4 checksum only */
else if (is_ip4)
{
ip_csum_t sum0 = ip4_0->checksum;
sum0 = ip_csum_update (sum0, 0, ip4_0->length, ip4_header_t,
- length /* changed member */ );
+ length /* changed member */);
if (PREDICT_FALSE (ip4_0_tos))
{
sum0 = ip_csum_update (sum0, 0, ip4_0_tos, ip4_header_t,
- tos /* changed member */ );
+ tos /* changed member */);
}
ip4_0->checksum = ip_csum_fold (sum0);
+ }
+ /* IPv6 UDP checksum is mandatory */
+ else
+ {
+ int bogus = 0;
+
+ udp0->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip6_0, &bogus);
+ ASSERT (bogus == 0);
+ if (udp0->checksum == 0)
+ udp0->checksum = 0xffff;
+ }
+
+ if (b1->flags & VNET_BUFFER_F_OFFLOAD)
+ {
+ vnet_buffer2 (b1)->outer_l3_hdr_offset = l3_1 - b1->data;
+ vnet_buffer2 (b1)->outer_l4_hdr_offset = (u8 *) udp1 - b1->data;
+ vnet_buffer_offload_flags_set (b1,
+ outer_packet_csum_offload_flags);
+ }
+ /* IPv4 checksum only */
+ else if (is_ip4)
+ {
ip_csum_t sum1 = ip4_1->checksum;
sum1 = ip_csum_update (sum1, 0, ip4_1->length, ip4_header_t,
- length /* changed member */ );
+ length /* changed member */);
if (PREDICT_FALSE (ip4_1_tos))
{
sum1 = ip_csum_update (sum1, 0, ip4_1_tos, ip4_header_t,
- tos /* changed member */ );
+ tos /* changed member */);
}
ip4_1->checksum = ip_csum_fold (sum1);
}
@@ -319,11 +304,6 @@ vxlan_encap_inline (vlib_main_t * vm,
{
int bogus = 0;
- udp0->checksum = ip6_tcp_udp_icmp_compute_checksum
- (vm, b0, ip6_0, &bogus);
- ASSERT (bogus == 0);
- if (udp0->checksum == 0)
- udp0->checksum = 0xffff;
udp1->checksum = ip6_tcp_udp_icmp_compute_checksum
(vm, b1, ip6_1, &bogus);
ASSERT (bogus == 0);
@@ -381,20 +361,6 @@ vxlan_encap_inline (vlib_main_t * vm,
vlib_buffer_t *b0 = b[0];
b += 1;
- if (csum_offload && (b0->flags & VNET_BUFFER_F_OFFLOAD))
- {
- /* Only calculate the non-GSO packet csum offload */
- if ((b0->flags & VNET_BUFFER_F_GSO) == 0)
- {
- vnet_calc_checksums_inline (vm, b0,
- b0->flags &
- VNET_BUFFER_F_IS_IP4,
- b0->flags &
- VNET_BUFFER_F_IS_IP6);
- b0->flags &= ~inner_packet_removed_flags;
- }
- }
-
u32 flow_hash0 = vnet_l2_compute_flow_hash (b0);
/* Get next node index and adj index from tunnel next_dpo */
@@ -458,24 +424,23 @@ vxlan_encap_inline (vlib_main_t * vm,
udp0->length = payload_l0;
udp0->src_port = flow_hash0;
- if (csum_offload)
+ if (b0->flags & VNET_BUFFER_F_OFFLOAD)
{
- b0->flags |= csum_flags;
- vnet_buffer (b0)->l3_hdr_offset = l3_0 - b0->data;
- vnet_buffer (b0)->l4_hdr_offset = (u8 *) udp0 - b0->data;
+ vnet_buffer2 (b0)->outer_l3_hdr_offset = l3_0 - b0->data;
+ vnet_buffer2 (b0)->outer_l4_hdr_offset = (u8 *) udp0 - b0->data;
vnet_buffer_offload_flags_set (b0,
outer_packet_csum_offload_flags);
}
- /* IPv4 UDP checksum only if checksum offload is used */
+ /* IPv4 checksum only */
else if (is_ip4)
{
ip_csum_t sum0 = ip4_0->checksum;
sum0 = ip_csum_update (sum0, 0, ip4_0->length, ip4_header_t,
- length /* changed member */ );
+ length /* changed member */);
if (PREDICT_FALSE (ip4_0_tos))
{
sum0 = ip_csum_update (sum0, 0, ip4_0_tos, ip4_header_t,
- tos /* changed member */ );
+ tos /* changed member */);
}
ip4_0->checksum = ip_csum_fold (sum0);
}
@@ -527,8 +492,7 @@ VLIB_NODE_FN (vxlan4_encap_node) (vlib_main_t * vm,
{
/* Disable chksum offload as setup overhead in tx node is not worthwhile
for ip4 header checksum only, unless udp checksum is also required */
- return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 1,
- /* csum_offload */ 0);
+ return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 1);
}
VLIB_NODE_FN (vxlan6_encap_node) (vlib_main_t * vm,
@@ -536,11 +500,9 @@ VLIB_NODE_FN (vxlan6_encap_node) (vlib_main_t * vm,
vlib_frame_t * from_frame)
{
/* Enable checksum offload for ip6 as udp checksum is mandatory, */
- return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 0,
- /* csum_offload */ 1);
+ return vxlan_encap_inline (vm, node, from_frame, /* is_ip4 */ 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan4_encap_node) = {
.name = "vxlan4-encap",
.vector_size = sizeof (u32),
@@ -566,7 +528,6 @@ VLIB_REGISTER_NODE (vxlan6_encap_node) = {
[VXLAN_ENCAP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/vxlan/plugin.c b/src/plugins/vxlan/plugin.c
new file mode 100644
index 00000000000..eae82830524
--- /dev/null
+++ b/src/plugins/vxlan/plugin.c
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/plugin/plugin.h>
+#include <vpp/app/version.h>
+
+VLIB_PLUGIN_REGISTER () = {
+ .version = VPP_BUILD_VER,
+ .description = "VxLan Tunnels",
+};
diff --git a/src/vnet/vxlan/vxlan.api b/src/plugins/vxlan/vxlan.api
index b7e678595d8..9c617ff22c8 100644
--- a/src/vnet/vxlan/vxlan.api
+++ b/src/plugins/vxlan/vxlan.api
@@ -32,6 +32,8 @@ import "vnet/ip/ip_types.api";
*/
define vxlan_add_del_tunnel
{
+ option deprecated;
+
u32 client_index;
u32 context;
bool is_add [default=true];
@@ -60,6 +62,8 @@ define vxlan_add_del_tunnel
*/
define vxlan_add_del_tunnel_v2
{
+ option deprecated;
+
u32 client_index;
u32 context;
bool is_add [default=true];
@@ -108,12 +112,16 @@ define vxlan_add_del_tunnel_v3
define vxlan_add_del_tunnel_reply
{
+ option deprecated;
+
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
};
define vxlan_add_del_tunnel_v2_reply
{
+ option deprecated;
+
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
@@ -127,6 +135,8 @@ define vxlan_add_del_tunnel_v3_reply
define vxlan_tunnel_dump
{
+ option deprecated;
+
u32 client_index;
u32 context;
vl_api_interface_index_t sw_if_index;
@@ -140,6 +150,8 @@ define vxlan_tunnel_v2_dump
define vxlan_tunnel_details
{
+ option deprecated;
+
u32 context;
vl_api_interface_index_t sw_if_index;
u32 instance;
diff --git a/src/vnet/vxlan/vxlan.c b/src/plugins/vxlan/vxlan.c
index dcf480578a7..0885550d257 100644
--- a/src/vnet/vxlan/vxlan.c
+++ b/src/plugins/vxlan/vxlan.c
@@ -12,7 +12,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-#include <vnet/vxlan/vxlan.h>
+#include <vxlan/vxlan.h>
#include <vnet/ip/format.h>
#include <vnet/fib/fib_entry.h>
#include <vnet/fib/fib_table.h>
@@ -52,6 +52,14 @@ vxlan_eth_flag_change (vnet_main_t *vnm, vnet_hw_interface_t *hi, u32 flags)
return 0;
}
+static clib_error_t *
+vxlan_eth_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw,
+ u32 frame_size)
+{
+ /* nothing for now */
+ return 0;
+}
+
static u8 *
format_decap_next (u8 * s, va_list * args)
{
@@ -120,14 +128,12 @@ vxlan_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
return /* no error */ 0;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (vxlan_device_class, static) = {
.name = "VXLAN",
.format_device_name = format_vxlan_name,
.format_tx_trace = format_vxlan_encap_trace,
.admin_up_down_function = vxlan_interface_admin_up_down,
};
-/* *INDENT-ON* */
static u8 *
format_vxlan_header_with_length (u8 * s, va_list * args)
@@ -137,13 +143,11 @@ format_vxlan_header_with_length (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (vxlan_hw_class) = {
.name = "VXLAN",
.format_header = format_vxlan_header_with_length,
.build_rewrite = default_build_rewrite,
};
-/* *INDENT-ON* */
static void
vxlan_tunnel_restack_dpo (vxlan_tunnel_t * t)
@@ -310,7 +314,6 @@ vxlan_decap_next_is_valid (vxlan_main_t * vxm, u32 is_ip6,
return decap_next_index < r->n_next_nodes;
}
-/* *INDENT-OFF* */
typedef CLIB_PACKED(union
{
struct
@@ -320,7 +323,6 @@ typedef CLIB_PACKED(union
};
u64 as_u64;
}) mcast_shared_t;
-/* *INDENT-ON* */
static inline mcast_shared_t
mcast_shared_get (ip46_address_t * ip)
@@ -454,6 +456,7 @@ int vnet_vxlan_add_del_tunnel
vxlan_hw_class.index, dev_instance);
else
{
+ vnet_eth_interface_registration_t eir = {};
f64 now = vlib_time_now (vm);
u32 rnd;
rnd = (u32) (now * 1e6);
@@ -461,15 +464,13 @@ int vnet_vxlan_add_del_tunnel
memcpy (hw_addr + 2, &rnd, sizeof (rnd));
hw_addr[0] = 2;
hw_addr[1] = 0xfe;
- if (ethernet_register_interface (
- vnm, vxlan_device_class.index, dev_instance, hw_addr,
- &t->hw_if_index, vxlan_eth_flag_change))
- {
- hash_unset (vxm->instance_used, t->user_instance);
- pool_put (vxm->tunnels, t);
- return VNET_API_ERROR_SYSCALL_ERROR_2;
- }
+ eir.dev_class_index = vxlan_device_class.index;
+ eir.dev_instance = dev_instance;
+ eir.address = hw_addr;
+ eir.cb.flag_change = vxlan_eth_flag_change;
+ eir.cb.set_max_frame_size = vxlan_eth_set_max_frame_size;
+ t->hw_if_index = vnet_eth_register_interface (vnm, &eir);
}
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index);
@@ -530,7 +531,8 @@ int vnet_vxlan_add_del_tunnel
fib_prefix_t tun_dst_pfx;
vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
- fib_prefix_from_ip46_addr (&t->dst, &tun_dst_pfx);
+ fib_protocol_t fp = fib_ip_proto (is_ip6);
+ fib_prefix_from_ip46_addr (fp, &t->dst, &tun_dst_pfx);
if (!ip46_address_is_multicast (&t->dst))
{
/* Unicast tunnel -
@@ -554,8 +556,6 @@ int vnet_vxlan_add_del_tunnel
* with different VNIs, create the output fib adjacency only if
* it does not already exist
*/
- fib_protocol_t fp = fib_ip_proto (is_ip6);
-
if (vtep_addr_ref (&vxm->vtep_table,
t->encap_fib_index, &t->dst) == 1)
{
@@ -581,15 +581,16 @@ int vnet_vxlan_add_del_tunnel
* - the forwarding interface is for-us
* - the accepting interface is that from the API
*/
- mfib_table_entry_path_update (t->encap_fib_index,
- &mpfx, MFIB_SOURCE_VXLAN, &path);
+ mfib_table_entry_path_update (t->encap_fib_index, &mpfx,
+ MFIB_SOURCE_VXLAN,
+ MFIB_ENTRY_FLAG_NONE, &path);
path.frp_sw_if_index = a->mcast_sw_if_index;
path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
path.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT;
- mfei = mfib_table_entry_path_update (t->encap_fib_index,
- &mpfx,
- MFIB_SOURCE_VXLAN, &path);
+ mfei = mfib_table_entry_path_update (
+ t->encap_fib_index, &mpfx, MFIB_SOURCE_VXLAN,
+ MFIB_ENTRY_FLAG_NONE, &path);
/*
* Create the mcast adjacency to send traffic to the group
@@ -908,7 +909,6 @@ vxlan_add_del_tunnel_command_fn (vlib_main_t * vm,
* Example of how to delete a VXLAN Tunnel:
* @cliexcmd{create vxlan tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 del}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_vxlan_tunnel_command, static) = {
.path = "create vxlan tunnel",
.short_help =
@@ -919,7 +919,6 @@ VLIB_CLI_COMMAND (create_vxlan_tunnel_command, static) = {
" [src_port <local-vtep-udp-port>] [dst_port <remote-vtep-udp-port>]",
.function = vxlan_add_del_tunnel_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_vxlan_tunnel_command_fn (vlib_main_t * vm,
@@ -942,12 +941,10 @@ show_vxlan_tunnel_command_fn (vlib_main_t * vm,
if (pool_elts (vxm->tunnels) == 0)
vlib_cli_output (vm, "No vxlan tunnels configured...");
-/* *INDENT-OFF* */
pool_foreach (t, vxm->tunnels)
{
vlib_cli_output (vm, "%U", format_vxlan_tunnel, t);
}
-/* *INDENT-ON* */
if (raw)
{
@@ -972,13 +969,11 @@ show_vxlan_tunnel_command_fn (vlib_main_t * vm,
encap_fib_index 0 sw_if_index 5 decap_next l2
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_vxlan_tunnel_command, static) = {
.path = "show vxlan tunnel",
.short_help = "show vxlan tunnel [raw]",
.function = show_vxlan_tunnel_command_fn,
};
-/* *INDENT-ON* */
void
@@ -1073,7 +1068,7 @@ set_ip4_vxlan_bypass (vlib_main_t * vm,
/*?
* This command adds the 'ip4-vxlan-bypass' graph node for a given interface.
* By adding the IPv4 vxlan-bypass graph node to an interface, the node checks
- * for and validate input vxlan packet and bypass ip4-lookup, ip4-local,
+ * for and validate input vxlan packet and bypass ip4-lookup, ip4-local,
* ip4-udp-lookup nodes to speedup vxlan packet forwarding. This node will
* cause extra overhead to for non-vxlan packets which is kept at a minimum.
*
@@ -1112,13 +1107,11 @@ set_ip4_vxlan_bypass (vlib_main_t * vm,
* @cliexcmd{set interface ip vxlan-bypass GigabitEthernet2/0/0 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_vxlan_bypass_command, static) = {
.path = "set interface ip vxlan-bypass",
.function = set_ip4_vxlan_bypass,
.short_help = "set interface ip vxlan-bypass <interface> [del]",
};
-/* *INDENT-ON* */
static clib_error_t *
set_ip6_vxlan_bypass (vlib_main_t * vm,
@@ -1130,7 +1123,7 @@ set_ip6_vxlan_bypass (vlib_main_t * vm,
/*?
* This command adds the 'ip6-vxlan-bypass' graph node for a given interface.
* By adding the IPv6 vxlan-bypass graph node to an interface, the node checks
- * for and validate input vxlan packet and bypass ip6-lookup, ip6-local,
+ * for and validate input vxlan packet and bypass ip6-lookup, ip6-local,
* ip6-udp-lookup nodes to speedup vxlan packet forwarding. This node will
* cause extra overhead to for non-vxlan packets which is kept at a minimum.
*
@@ -1169,13 +1162,11 @@ set_ip6_vxlan_bypass (vlib_main_t * vm,
* @cliexcmd{set interface ip6 vxlan-bypass GigabitEthernet2/0/0 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip6_vxlan_bypass_command, static) = {
.path = "set interface ip6 vxlan-bypass",
.function = set_ip6_vxlan_bypass,
.short_help = "set interface ip6 vxlan-bypass <interface> [del]",
};
-/* *INDENT-ON* */
int
vnet_vxlan_add_del_rx_flow (u32 hw_if_index, u32 t_index, int is_add)
@@ -1290,14 +1281,12 @@ vxlan_offload_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vxlan_offload_command, static) = {
.path = "set flow-offload vxlan",
.short_help =
"set flow-offload vxlan hw <interface-name> rx <tunnel-name> [del]",
.function = vxlan_offload_command_fn,
};
-/* *INDENT-ON* */
#define VXLAN_HASH_NUM_BUCKETS (2 * 1024)
#define VXLAN_HASH_MEMORY_SIZE (1 << 20)
diff --git a/src/vnet/vxlan/vxlan.h b/src/plugins/vxlan/vxlan.h
index be819ab1069..ccddedeb279 100644
--- a/src/vnet/vxlan/vxlan.h
+++ b/src/plugins/vxlan/vxlan.h
@@ -26,14 +26,13 @@
#include <vnet/l2/l2_output.h>
#include <vnet/l2/l2_bd.h>
#include <vnet/ethernet/ethernet.h>
-#include <vnet/vxlan/vxlan_packet.h>
+#include <vxlan/vxlan_packet.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
#include <vnet/udp/udp_packet.h>
#include <vnet/dpo/dpo.h>
#include <vnet/adj/adj_types.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip4_header_t ip4; /* 20 bytes */
udp_header_t udp; /* 8 bytes */
@@ -45,7 +44,6 @@ typedef CLIB_PACKED (struct {
udp_header_t udp; /* 8 bytes */
vxlan_header_t vxlan; /* 8 bytes */
}) ip6_vxlan_header_t;
-/* *INDENT-ON* */
/*
* Key fields: remote ip, vni on incoming VXLAN packet
@@ -134,7 +132,7 @@ typedef struct
u32 dev_instance; /* Real device instance in tunnel vector */
u32 user_instance; /* Instance name being shown to user */
- VNET_DECLARE_REWRITE;
+ VNET_DECLARE_REWRITE;
} vxlan_tunnel_t;
#define foreach_vxlan_input_next \
@@ -152,7 +150,7 @@ typedef enum
typedef enum
{
#define vxlan_error(n,s) VXLAN_ERROR_##n,
-#include <vnet/vxlan/vxlan_error.def>
+#include <vxlan/vxlan_error.def>
#undef vxlan_error
VXLAN_N_ERROR,
} vxlan_input_error_t;
@@ -191,9 +189,7 @@ typedef struct
u32 flow_id_start;
/* cache for last 8 vxlan tunnel */
-#ifdef CLIB_HAVE_VEC512
vtep4_cache_t vtep4_u512;
-#endif
} vxlan_main_t;
diff --git a/src/vnet/vxlan/vxlan_api.c b/src/plugins/vxlan/vxlan_api.c
index c97597a2ef2..8fd0928cc63 100644
--- a/src/vnet/vxlan/vxlan_api.c
+++ b/src/plugins/vxlan/vxlan_api.c
@@ -23,7 +23,7 @@
#include <vnet/interface.h>
#include <vnet/api_errno.h>
#include <vnet/feature/feature.h>
-#include <vnet/vxlan/vxlan.h>
+#include <vxlan/vxlan.h>
#include <vnet/fib/fib_table.h>
#include <vnet/ip/ip_types_api.h>
#include <vnet/udp/udp_local.h>
@@ -354,7 +354,8 @@ vxlan_api_hookup (vlib_main_t * vm)
{
api_main_t *am = vlibapi_get_main ();
- am->api_trace_cfg[VL_API_VXLAN_ADD_DEL_TUNNEL].size += 16 * sizeof (u32);
+ vl_api_increase_msg_trace_size (am, VL_API_VXLAN_ADD_DEL_TUNNEL,
+ 16 * sizeof (u32));
/*
* Set up the (msg_name, crc, message-id) table
diff --git a/src/vnet/vxlan/vxlan_error.def b/src/plugins/vxlan/vxlan_error.def
index 17f905950f5..17f905950f5 100644
--- a/src/vnet/vxlan/vxlan_error.def
+++ b/src/plugins/vxlan/vxlan_error.def
diff --git a/src/vnet/vxlan/vxlan_packet.h b/src/plugins/vxlan/vxlan_packet.h
index d1d1ed813e5..d1d1ed813e5 100644
--- a/src/vnet/vxlan/vxlan_packet.h
+++ b/src/plugins/vxlan/vxlan_packet.h
diff --git a/src/plugins/wireguard/CMakeLists.txt b/src/plugins/wireguard/CMakeLists.txt
index 6dddc67298d..710b6a3b04a 100755..100644
--- a/src/plugins/wireguard/CMakeLists.txt
+++ b/src/plugins/wireguard/CMakeLists.txt
@@ -12,7 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+if(NOT OPENSSL_FOUND)
+ message(WARNING "OpenSSL not found - wireguard plugin disabled")
+ return()
+endif()
+
if (OPENSSL_VERSION VERSION_LESS 1.1.0)
+ message(WARNING "OpenSSL too old - wireguard plugin disabled")
return()
endif()
@@ -33,8 +39,11 @@ add_vpp_plugin(wireguard
wireguard_input.c
wireguard_output_tun.c
wireguard_handoff.c
+ wireguard_hchacha20.h
wireguard_key.c
wireguard_key.h
+ wireguard_chachapoly.c
+ wireguard_chachapoly.h
wireguard_cli.c
wireguard_messages.h
wireguard_noise.c
@@ -51,7 +60,7 @@ add_vpp_plugin(wireguard
wireguard_index_table.h
wireguard_api.c
- LINK_LIBRARIES ${OPENSSL_LIBRARIES}
+ LINK_LIBRARIES ${OPENSSL_CRYPTO_LIBRARIES}
API_FILES
wireguard.api
diff --git a/src/plugins/wireguard/FEATURE.yaml b/src/plugins/wireguard/FEATURE.yaml
index cf8b6d7f3c4..5c0a588a484 100644
--- a/src/plugins/wireguard/FEATURE.yaml
+++ b/src/plugins/wireguard/FEATURE.yaml
@@ -7,6 +7,3 @@ features:
description: "Wireguard protocol implementation"
state: development
properties: [API, CLI]
-missing:
- - IPv6 support
- - DoS protection as in the original protocol
diff --git a/src/plugins/wireguard/README.md b/src/plugins/wireguard/README.md
deleted file mode 100755
index df69d93789f..00000000000
--- a/src/plugins/wireguard/README.md
+++ /dev/null
@@ -1,55 +0,0 @@
-# Wireguard vpp-plugin {#wireguard_plugin_doc}
-
-## Overview
-This plugin is an implementation of [wireguard protocol](https://www.wireguard.com/) for VPP. It allows one to create secure VPN tunnels.
-This implementation is based on [wireguard-openbsd](https://git.zx2c4.com/wireguard-openbsd/).
-
-## Crypto
-
-The crypto protocols:
-
-- blake2s [[Source]](https://github.com/BLAKE2/BLAKE2)
-
-OpenSSL:
-
-- curve25519
-- chachapoly1305
-
-## Plugin usage example
-
-### Create wireguard interface
-
-```
-> vpp# wireguard create listen-port <port> private-key <priv_key> src <src_ip4> [generate-key]
-> *wg_interface*
-> vpp# set int state <wg_interface> up
-> vpp# set int ip address <wg_interface> <wg_ip4>
-```
-
-### Add a peer configuration:
-```
-> vpp# wireguard peer add <wg_interface> public-key <pub_key_other> endpoint <ip4_dst> allowed-ip <prefix> dst-port <port_dst> persistent-keepalive [keepalive_interval]
-> vpp# *peer_idx*
-```
-
-### Show config
-```
-> vpp# show wireguard interface
-> vpp# show wireguard peer
-```
-
-### Remove peer
-```
-> vpp# wireguard peer remove <peer_idx>
-```
-
-
-### Delete interface
-```
-> vpp# wireguard delete <wg_interface>
-```
-
-## Main next steps for improving this implementation
-1. Use all benefits of VPP-engine.
-2. Add IPv6 support (currently only supports IPv4)
-3. Add DoS protection as in original protocol (using cookie)
diff --git a/src/plugins/wireguard/README.rst b/src/plugins/wireguard/README.rst
new file mode 100644
index 00000000000..35dd2c41382
--- /dev/null
+++ b/src/plugins/wireguard/README.rst
@@ -0,0 +1,79 @@
+.. _wireguard_plugin_doc:
+
+Wireguard vpp-plugin
+====================
+
+Overview
+--------
+
+This plugin is an implementation of `wireguard
+protocol <https://www.wireguard.com/>`__ for VPP. It allows one to
+create secure VPN tunnels. This implementation is based on
+`wireguard-openbsd <https://git.zx2c4.com/wireguard-openbsd/>`__.
+
+Crypto
+------
+
+The crypto protocols:
+
+- blake2s `[Source] <https://github.com/BLAKE2/BLAKE2>`__
+
+OpenSSL:
+
+- curve25519
+- chachapoly1305
+
+Plugin usage example
+--------------------
+
+Create wireguard interface
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ > vpp# wireguard create listen-port <port> private-key <priv_key> src <src_ip4> [generate-key]
+ > *wg_interface*
+ > vpp# set int state <wg_interface> up
+ > vpp# set int ip address <wg_interface> <wg_ip4>
+
+Add a peer configuration:
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ > vpp# wireguard peer add <wg_interface> public-key <pub_key_other> endpoint <ip4_dst> allowed-ip <prefix> port <port_dst> persistent-keepalive [keepalive_interval]
+ > vpp# *peer_idx*
+
+Add routes for allowed-ip:
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ > ip route add <prefix> via <wg_ip4> <wg_interface>
+
+Show config
+~~~~~~~~~~~
+
+::
+
+ > vpp# show wireguard interface
+ > vpp# show wireguard peer
+
+Remove peer
+~~~~~~~~~~~
+
+::
+
+ > vpp# wireguard peer remove <peer_idx>
+
+Delete interface
+~~~~~~~~~~~~~~~~
+
+::
+
+ > vpp# wireguard delete <wg_interface>
+
+Main next steps for improving this implementation
+-------------------------------------------------
+
+1. Use all benefits of VPP-engine.
diff --git a/src/plugins/wireguard/blake/blake2-impl.h b/src/plugins/wireguard/blake/blake2-impl.h
index ad60b4a5775..ad60b4a5775 100755..100644
--- a/src/plugins/wireguard/blake/blake2-impl.h
+++ b/src/plugins/wireguard/blake/blake2-impl.h
diff --git a/src/plugins/wireguard/blake/blake2s.c b/src/plugins/wireguard/blake/blake2s.c
index 3ff312a1322..3ff312a1322 100755..100644
--- a/src/plugins/wireguard/blake/blake2s.c
+++ b/src/plugins/wireguard/blake/blake2s.c
diff --git a/src/plugins/wireguard/blake/blake2s.h b/src/plugins/wireguard/blake/blake2s.h
index 37da0acf28a..37da0acf28a 100755..100644
--- a/src/plugins/wireguard/blake/blake2s.h
+++ b/src/plugins/wireguard/blake/blake2s.h
diff --git a/src/plugins/wireguard/wireguard.api b/src/plugins/wireguard/wireguard.api
index e290fc41ffc..55a36c6f6e5 100755..100644
--- a/src/plugins/wireguard/wireguard.api
+++ b/src/plugins/wireguard/wireguard.api
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-option version = "0.1.0";
+option version = "1.3.0";
import "vnet/interface_types.api";
import "vnet/ip/ip_types.api";
@@ -83,19 +83,24 @@ define wireguard_interface_details
enum wireguard_peer_flags : u8
{
WIREGUARD_PEER_STATUS_DEAD = 0x1,
+ WIREGUARD_PEER_ESTABLISHED = 0x2,
};
-/** \brief Create new peer
+/** \brief Peer structure
+ @param peer_index - peer pool index
@param public_key - public key (in binary format) of destination peer
@param port - destination port
+ @param persistent_keepalive - keepalive packet timeout
@param table_id - The IP table in which 'endpoint' is reachable
@param endpoint - destination ip
- @param allowed_ip - allowed incoming ip tunnel
- @param tun_sw_if_index - tunnel interface
- @param persistent_keepalive - keepalive packet timeout
+ @param sw_if_index - tunnel SW interface
+ @param flags - peer status flags
+ @param n_allowed_ips - number of prefixes in allowed_ips
+ @param allowed_ips - allowed incoming tunnel prefixes
*/
typedef wireguard_peer
{
+ u32 peer_index;
u8 public_key[32];
u16 port;
u16 persistent_keepalive;
@@ -107,6 +112,41 @@ typedef wireguard_peer
vl_api_prefix_t allowed_ips[n_allowed_ips];
};
+service {
+ rpc want_wireguard_peer_events returns want_wireguard_peer_events_reply
+ events wireguard_peer_event;
+};
+/** \brief Register for wireguard peer events
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - index of the interface to dump peer info on, ~0 if on all
+ @param peer_index - index of the peer to dump info on, ~0 if on all
+ @param enable_disable - 1 => register for events, 0 => cancel registration
+ @param pid - sender's pid
+*/
+autoreply define want_wireguard_peer_events
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index [default=0xFFFFFFFF];
+ u32 peer_index [default=0xFFFFFFFF];
+ u32 enable_disable;
+ u32 pid;
+};
+/** \brief Interface Event generated by want_wireguard_peer_events
+ @param client_index - opaque cookie to identify the sender
+ @param pid - client pid registered to receive notification
+ @param peer_index - index of the peer for this event
+ @param deleted - interface was deleted
+*/
+define wireguard_peer_event
+{
+ u32 client_index;
+ u32 pid;
+ u32 peer_index;
+ vl_api_wireguard_peer_flags_t flags;
+};
+
/** \brief Create new peer
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -118,6 +158,12 @@ define wireguard_peer_add
u32 context;
vl_api_wireguard_peer_t peer;
};
+
+/** \brief Create new peer
+ @param context - sender context, to match reply w/ request
+ @param retval - return status
+ @param peer_index - Created or existing peer pool index
+*/
define wireguard_peer_add_reply
{
u32 context;
@@ -125,10 +171,10 @@ define wireguard_peer_add_reply
u32 peer_index;
};
-/** \brief Remove peer by public_key
+/** \brief Remove peer
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param public_key
+ @param peer_index - peer to be removed
*/
autoreply define wireguard_peer_remove
{
@@ -140,23 +186,34 @@ autoreply define wireguard_peer_remove
/** \brief Dump all peers
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
+ @param peer_index - peer index to be dumped. If 0xFFFFFFFF dumps all peers
*/
define wireguard_peers_dump {
u32 client_index;
u32 context;
+ u32 peer_index [default=0xFFFFFFFF];
};
-/** \brief Dump peers response
+/** \brief Dump peer details
@param context - sender context, to match reply w/ request
- @param is_dead - is peer valid yet
- @param public_key - peer public_key
- @param ip4_address - ip4 endpoint address
+ @param peer - peer details
*/
define wireguard_peers_details {
u32 context;
vl_api_wireguard_peer_t peer;
};
+/** \brief Wireguard Set Async mode
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param async_enable - wireguard async mode on or off, default off
+*/
+autoreply define wg_set_async_mode {
+ u32 client_index;
+ u32 context;
+ bool async_enable [default=false];
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/plugins/wireguard/wireguard.c b/src/plugins/wireguard/wireguard.c
index 58422299069..b1c8bc79870 100755..100644
--- a/src/plugins/wireguard/wireguard.c
+++ b/src/plugins/wireguard/wireguard.c
@@ -15,8 +15,8 @@
#include <vnet/vnet.h>
#include <vnet/plugin/plugin.h>
-#include <vnet/ipip/ipip.h>
#include <vpp/app/version.h>
+#include <vnet/crypto/crypto.h>
#include <wireguard/wireguard_send.h>
#include <wireguard/wireguard_key.h>
@@ -24,6 +24,45 @@
#include <wireguard/wireguard.h>
wg_main_t wg_main;
+wg_async_post_next_t wg_encrypt_async_next;
+wg_async_post_next_t wg_decrypt_async_next;
+
+void
+wg_set_async_mode (u32 is_enabled)
+{
+ if (is_enabled)
+ wg_op_mode_set_ASYNC ();
+ else
+ wg_op_mode_unset_ASYNC ();
+}
+
+static void
+wireguard_register_post_node (vlib_main_t *vm)
+
+{
+ wg_async_post_next_t *eit;
+ wg_async_post_next_t *dit;
+
+ eit = &wg_encrypt_async_next;
+ dit = &wg_decrypt_async_next;
+
+ eit->wg4_post_next =
+ vnet_crypto_register_post_node (vm, "wg4-output-tun-post-node");
+ eit->wg6_post_next =
+ vnet_crypto_register_post_node (vm, "wg6-output-tun-post-node");
+
+ dit->wg4_post_next =
+ vnet_crypto_register_post_node (vm, "wg4-input-post-node");
+ dit->wg6_post_next =
+ vnet_crypto_register_post_node (vm, "wg6-input-post-node");
+}
+
+void
+wg_secure_zero_memory (void *v, size_t n)
+{
+ static void *(*const volatile memset_v) (void *, int, size_t) = &memset;
+ memset_v (v, 0, n);
+}
static clib_error_t *
wg_init (vlib_main_t * vm)
@@ -32,9 +71,12 @@ wg_init (vlib_main_t * vm)
wmp->vlib_main = vm;
- wmp->in_fq_index = vlib_frame_queue_main_init (wg_input_node.index, 0);
- wmp->out_fq_index =
- vlib_frame_queue_main_init (wg_output_tun_node.index, 0);
+ wmp->in4_fq_index = vlib_frame_queue_main_init (wg4_input_node.index, 0);
+ wmp->in6_fq_index = vlib_frame_queue_main_init (wg6_input_node.index, 0);
+ wmp->out4_fq_index =
+ vlib_frame_queue_main_init (wg4_output_tun_node.index, 0);
+ wmp->out6_fq_index =
+ vlib_frame_queue_main_init (wg6_output_tun_node.index, 0);
vlib_thread_main_t *tm = vlib_get_thread_main ();
@@ -42,27 +84,32 @@ wg_init (vlib_main_t * vm)
CLIB_CACHE_LINE_BYTES);
wg_timer_wheel_init ();
+ wireguard_register_post_node (vm);
+ wmp->op_mode_flags = 0;
return (NULL);
}
VLIB_INIT_FUNCTION (wg_init);
-/* *INDENT-OFF* */
-VNET_FEATURE_INIT (wg_output_tun, static) =
-{
+VNET_FEATURE_INIT (wg4_output_tun, static) = {
.arc_name = "ip4-output",
- .node_name = "wg-output-tun",
+ .node_name = "wg4-output-tun",
.runs_after = VNET_FEATURES ("gso-ip4"),
};
+VNET_FEATURE_INIT (wg6_output_tun, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "wg6-output-tun",
+ .runs_after = VNET_FEATURES ("gso-ip6"),
+};
+
VLIB_PLUGIN_REGISTER () =
{
.version = VPP_BUILD_VER,
.description = "Wireguard Protocol",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/wireguard/wireguard.h b/src/plugins/wireguard/wireguard.h
index ef308c4c397..05cefc4f073 100755..100644
--- a/src/plugins/wireguard/wireguard.h
+++ b/src/plugins/wireguard/wireguard.h
@@ -18,16 +18,25 @@
#include <wireguard/wireguard_index_table.h>
#include <wireguard/wireguard_messages.h>
#include <wireguard/wireguard_timer.h>
+#include <vnet/buffer.h>
#define WG_DEFAULT_DATA_SIZE 2048
-extern vlib_node_registration_t wg_input_node;
-extern vlib_node_registration_t wg_output_tun_node;
+extern vlib_node_registration_t wg4_input_node;
+extern vlib_node_registration_t wg6_input_node;
+extern vlib_node_registration_t wg4_output_tun_node;
+extern vlib_node_registration_t wg6_output_tun_node;
typedef struct wg_per_thread_data_t_
{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vnet_crypto_op_t *crypto_ops;
+ vnet_crypto_op_t *chained_crypto_ops;
+ vnet_crypto_op_chunk_t *chunks;
+ vnet_crypto_async_frame_t **async_frames;
u8 data[WG_DEFAULT_DATA_SIZE];
} wg_per_thread_data_t;
+
typedef struct
{
/* convenience */
@@ -37,19 +46,81 @@ typedef struct
wg_index_table_t index_table;
- u32 in_fq_index;
- u32 out_fq_index;
+ u32 in4_fq_index;
+ u32 in6_fq_index;
+ u32 out4_fq_index;
+ u32 out6_fq_index;
wg_per_thread_data_t *per_thread_data;
u8 feature_init;
tw_timer_wheel_16t_2w_512sl_t timer_wheel;
+
+ /* operation mode flags (e.g. async) */
+ u8 op_mode_flags;
} wg_main_t;
+typedef struct
+{
+ /* wg post node index for async crypto */
+ u32 wg4_post_next;
+ u32 wg6_post_next;
+} wg_async_post_next_t;
+
+extern wg_async_post_next_t wg_encrypt_async_next;
+extern wg_async_post_next_t wg_decrypt_async_next;
extern wg_main_t wg_main;
+/**
+ * Wireguard operation mode
+ **/
+#define foreach_wg_op_mode_flags _ (0, ASYNC, "async")
+
+/**
+ * Helper function to set/unset and check op modes
+ **/
+typedef enum wg_op_mode_flags_t_
+{
+#define _(v, f, s) WG_OP_MODE_FLAG_##f = 1 << v,
+ foreach_wg_op_mode_flags
+#undef _
+} __clib_packed wg_op_mode_flags_t;
+
+#define _(a, v, s) \
+ always_inline int wg_op_mode_set_##v (void) \
+ { \
+ return (wg_main.op_mode_flags |= WG_OP_MODE_FLAG_##v); \
+ } \
+ always_inline int wg_op_mode_unset_##v (void) \
+ { \
+ return (wg_main.op_mode_flags &= ~WG_OP_MODE_FLAG_##v); \
+ } \
+ always_inline int wg_op_mode_is_set_##v (void) \
+ { \
+ return (wg_main.op_mode_flags & WG_OP_MODE_FLAG_##v); \
+ }
+foreach_wg_op_mode_flags
+#undef _
+
+ typedef struct
+{
+ u8 __pad[22];
+ u16 next_index;
+} wg_post_data_t;
+
+STATIC_ASSERT (sizeof (wg_post_data_t) <=
+ STRUCT_SIZE_OF (vnet_buffer_opaque_t, unused),
+ "Custom meta-data too large for vnet_buffer_opaque_t");
+
+#define wg_post_data(b) \
+ ((wg_post_data_t *) ((u8 *) ((b)->opaque) + \
+ STRUCT_OFFSET_OF (vnet_buffer_opaque_t, unused)))
+
#define WG_START_EVENT 1
void wg_feature_init (wg_main_t * wmp);
+void wg_set_async_mode (u32 is_enabled);
+
+void wg_secure_zero_memory (void *v, size_t n);
#endif /* __included_wg_h__ */
diff --git a/src/plugins/wireguard/wireguard_api.c b/src/plugins/wireguard/wireguard_api.c
index 36cc2509463..e736efcd6c0 100755..100644
--- a/src/plugins/wireguard/wireguard_api.c
+++ b/src/plugins/wireguard/wireguard_api.c
@@ -27,9 +27,9 @@
#include <wireguard/wireguard_key.h>
#include <wireguard/wireguard.h>
#include <wireguard/wireguard_if.h>
-#include <wireguard/wireguard_peer.h>
#define REPLY_MSG_ID_BASE wmp->msg_id_base
+#include <wireguard/wireguard_peer.h>
#include <vlibapi/api_helper_macros.h>
static void
@@ -47,26 +47,18 @@ static void
ip_address_decode2 (&mp->interface.src_ip, &src);
- if (AF_IP6 == ip_addr_version (&src))
- rv = VNET_API_ERROR_INVALID_PROTOCOL;
+ if (mp->generate_key)
+ curve25519_gen_secret (private_key);
else
- {
- if (mp->generate_key)
- curve25519_gen_secret (private_key);
- else
- clib_memcpy (private_key, mp->interface.private_key,
- NOISE_PUBLIC_KEY_LEN);
-
- rv = wg_if_create (ntohl (mp->interface.user_instance), private_key,
- ntohs (mp->interface.port), &src, &sw_if_index);
- }
+ clib_memcpy (private_key, mp->interface.private_key, NOISE_PUBLIC_KEY_LEN);
+
+ rv = wg_if_create (ntohl (mp->interface.user_instance), private_key,
+ ntohs (mp->interface.port), &src, &sw_if_index);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_WIREGUARD_INTERFACE_CREATE_REPLY,
{
rmp->sw_if_index = htonl(sw_if_index);
});
- /* *INDENT-ON* */
}
static void
@@ -85,9 +77,7 @@ static void
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
REPLY_MACRO(VL_API_WIREGUARD_INTERFACE_DELETE_REPLY);
- /* *INDENT-ON* */
}
typedef struct wg_deatils_walk_t_
@@ -119,6 +109,7 @@ wireguard_if_send_details (index_t wgii, void *data)
local->l_public, NOISE_PUBLIC_KEY_LEN);
rmp->interface.sw_if_index = htonl (wgi->sw_if_index);
rmp->interface.port = htons (wgi->port);
+ rmp->interface.user_instance = htonl (wgi->user_instance);
ip_address_encode2 (&wgi->src_ip, &rmp->interface.src_ip);
rmp->context = ctx->context;
@@ -147,7 +138,15 @@ vl_api_wireguard_interface_dump_t_handler (vl_api_wireguard_interface_dump_t *
.show_private_key = mp->show_private_key,
};
- wg_if_walk (wireguard_if_send_details, &ctx);
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ if (sw_if_index == ~0)
+ wg_if_walk (wireguard_if_send_details, &ctx);
+ else
+ {
+ index_t wgii = wg_if_find_by_sw_if_index (sw_if_index);
+ if (wgii != INDEX_INVALID)
+ wireguard_if_send_details (wgii, &ctx);
+ }
}
static void
@@ -177,29 +176,19 @@ vl_api_wireguard_peer_add_t_handler (vl_api_wireguard_peer_add_t * mp)
for (ii = 0; ii < mp->peer.n_allowed_ips; ii++)
ip_prefix_decode (&mp->peer.allowed_ips[ii], &allowed_ips[ii]);
- if (AF_IP6 == ip_addr_version (&endpoint) ||
- FIB_PROTOCOL_IP6 == allowed_ips[0].fp_proto)
- /* ip6 currently not supported, but the API needs to support it
- * else we'll need to change it later, and that's a PITA */
- rv = VNET_API_ERROR_INVALID_PROTOCOL;
- else
- rv = wg_peer_add (ntohl (mp->peer.sw_if_index),
- mp->peer.public_key,
- ntohl (mp->peer.table_id),
- &ip_addr_46 (&endpoint),
- allowed_ips,
- ntohs (mp->peer.port),
- ntohs (mp->peer.persistent_keepalive), &peeri);
+ rv = wg_peer_add (ntohl (mp->peer.sw_if_index), mp->peer.public_key,
+ ntohl (mp->peer.table_id), &ip_addr_46 (&endpoint),
+ allowed_ips, ntohs (mp->peer.port),
+ ntohs (mp->peer.persistent_keepalive), &peeri);
vec_free (allowed_ips);
done:
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
+
REPLY_MACRO2(VL_API_WIREGUARD_PEER_ADD_REPLY,
{
rmp->peer_index = ntohl (peeri);
});
- /* *INDENT-ON* */
}
static void
@@ -213,13 +202,11 @@ vl_api_wireguard_peer_remove_t_handler (vl_api_wireguard_peer_remove_t * mp)
rv = wg_peer_remove (ntohl (mp->peer_index));
- /* *INDENT-OFF* */
REPLY_MACRO(VL_API_WIREGUARD_PEER_REMOVE_REPLY);
- /* *INDENT-ON* */
}
static walk_rc_t
-send_wg_peers_details (index_t peeri, void *data)
+wg_api_send_peers_details (index_t peeri, void *data)
{
vl_api_wireguard_peers_details_t *rmp;
wg_deatils_walk_t *ctx = data;
@@ -227,7 +214,11 @@ send_wg_peers_details (index_t peeri, void *data)
u8 n_allowed_ips;
size_t ss;
+ if (pool_is_free_index (wg_peer_pool, peeri))
+ return (WALK_CONTINUE);
+
peer = wg_peer_get (peeri);
+
n_allowed_ips = vec_len (peer->allowed_ips);
ss = (sizeof (*rmp) + (n_allowed_ips * sizeof (rmp->peer.allowed_ips[0])));
@@ -237,8 +228,8 @@ send_wg_peers_details (index_t peeri, void *data)
rmp->_vl_msg_id = htons (VL_API_WIREGUARD_PEERS_DETAILS +
wg_main.msg_id_base);
- if (peer->is_dead)
- rmp->peer.flags = WIREGUARD_PEER_STATUS_DEAD;
+ rmp->peer.peer_index = htonl (peeri);
+ rmp->peer.flags = peer->flags;
clib_memcpy (rmp->peer.public_key,
peer->remote.r_public, NOISE_PUBLIC_KEY_LEN);
@@ -246,11 +237,12 @@ send_wg_peers_details (index_t peeri, void *data)
rmp->peer.port = htons (peer->dst.port);
rmp->peer.n_allowed_ips = n_allowed_ips;
rmp->peer.sw_if_index = htonl (peer->wg_sw_if_index);
+ rmp->peer.persistent_keepalive = htons (peer->persistent_keepalive_interval);
+ rmp->peer.table_id = htonl (peer->table_id);
int ii;
for (ii = 0; ii < n_allowed_ips; ii++)
- ip_prefix_encode (&peer->allowed_ips[ii].prefix,
- &rmp->peer.allowed_ips[ii]);
+ ip_prefix_encode (&peer->allowed_ips[ii], &rmp->peer.allowed_ips[ii]);
rmp->context = ctx->context;
@@ -276,7 +268,143 @@ vl_api_wireguard_peers_dump_t_handler (vl_api_wireguard_peers_dump_t * mp)
.context = mp->context,
};
- wg_peer_walk (send_wg_peers_details, &ctx);
+ if (mp->peer_index == ~0)
+ wg_peer_walk (wg_api_send_peers_details, &ctx);
+ else
+ wg_api_send_peers_details (ntohl (mp->peer_index), &ctx);
+}
+
+static vpe_client_registration_t *
+wg_api_client_lookup (wg_peer_t *peer, u32 client_index)
+{
+ uword *p;
+ vpe_client_registration_t *api_client = NULL;
+
+ p = hash_get (peer->api_client_by_client_index, client_index);
+ if (p)
+ api_client = vec_elt_at_index (peer->api_clients, p[0]);
+
+ return api_client;
+}
+
+static walk_rc_t
+wg_api_update_peer_api_client (index_t peeri, void *data)
+{
+ if (pool_is_free_index (wg_peer_pool, peeri))
+ return (WALK_CONTINUE);
+
+ vl_api_want_wireguard_peer_events_t *mp = data;
+ wg_peer_t *peer = wg_peer_get (peeri);
+
+ if (ntohl (mp->sw_if_index) != ~0 &&
+ ntohl (mp->sw_if_index) != peer->wg_sw_if_index)
+ {
+ return (WALK_CONTINUE);
+ }
+
+ vpe_client_registration_t *api_client;
+
+ api_client = wg_api_client_lookup (peer, mp->client_index);
+
+ if (api_client)
+ {
+ if (mp->enable_disable)
+ {
+ return (WALK_CONTINUE);
+ }
+ hash_unset (peer->api_client_by_client_index, api_client->client_index);
+ pool_put (peer->api_clients, api_client);
+ }
+ if (mp->enable_disable)
+ {
+ pool_get (peer->api_clients, api_client);
+ clib_memset (api_client, 0, sizeof (vpe_client_registration_t));
+ api_client->client_index = mp->client_index;
+ api_client->client_pid = mp->pid;
+ hash_set (peer->api_client_by_client_index, mp->client_index,
+ api_client - peer->api_clients);
+ }
+
+ return (WALK_CONTINUE);
+}
+
+static void
+vl_api_want_wireguard_peer_events_t_handler (
+ vl_api_want_wireguard_peer_events_t *mp)
+{
+ wg_main_t *wmp = &wg_main;
+ vl_api_want_wireguard_peer_events_reply_t *rmp;
+ int rv = 0;
+
+ wg_feature_init (wmp);
+
+ if (mp->peer_index == ~0)
+ wg_peer_walk (wg_api_update_peer_api_client, mp);
+ else
+ wg_api_update_peer_api_client (ntohl (mp->peer_index), mp);
+
+ REPLY_MACRO (VL_API_WANT_WIREGUARD_PEER_EVENTS_REPLY);
+}
+
+static void
+wg_api_send_peer_event (vl_api_registration_t *rp, index_t peer_index,
+ wg_peer_flags flags)
+{
+ vl_api_wireguard_peer_event_t *mp = vl_msg_api_alloc (sizeof (*mp));
+ clib_memset (mp, 0, sizeof (*mp));
+
+ mp->_vl_msg_id = htons (VL_API_WIREGUARD_PEER_EVENT + wg_main.msg_id_base);
+ mp->peer_index = htonl (peer_index);
+ mp->flags = flags;
+
+ vl_api_send_msg (rp, (u8 *) mp);
+}
+
+typedef struct
+{
+ index_t peeri;
+ wg_peer_flags flags;
+} wg_api_peer_event_args_t;
+
+static void
+wg_api_peer_event_cb (wg_api_peer_event_args_t *args)
+{
+ wg_peer_t *peer = wg_peer_get (args->peeri);
+ vpe_client_registration_t *api_client;
+ vl_api_registration_t *rp;
+
+ pool_foreach (api_client, peer->api_clients)
+ {
+ rp = vl_api_client_index_to_registration (api_client->client_index);
+ if (rp)
+ {
+ wg_api_send_peer_event (rp, args->peeri, args->flags);
+ }
+ };
+}
+
+void
+wg_api_peer_event (index_t peeri, wg_peer_flags flags)
+{
+ wg_api_peer_event_args_t args = {
+ .peeri = peeri,
+ .flags = flags,
+ };
+
+ vl_api_rpc_call_main_thread (wg_api_peer_event_cb, (u8 *) &args,
+ sizeof (args));
+}
+
+static void
+vl_api_wg_set_async_mode_t_handler (vl_api_wg_set_async_mode_t *mp)
+{
+ wg_main_t *wmp = &wg_main;
+ vl_api_wg_set_async_mode_reply_t *rmp;
+ int rv = 0;
+
+ wg_set_async_mode (mp->async_enable);
+
+ REPLY_MACRO (VL_API_WG_SET_ASYNC_MODE_REPLY);
}
/* set tup the API message handling tables */
diff --git a/src/plugins/wireguard/wireguard_chachapoly.c b/src/plugins/wireguard/wireguard_chachapoly.c
new file mode 100644
index 00000000000..0dd7908d2e2
--- /dev/null
+++ b/src/plugins/wireguard/wireguard_chachapoly.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2022 Rubicon Communications, LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <wireguard/wireguard.h>
+#include <wireguard/wireguard_chachapoly.h>
+#include <wireguard/wireguard_hchacha20.h>
+
+bool
+wg_chacha20poly1305_calc (vlib_main_t *vm, u8 *src, u32 src_len, u8 *dst,
+ u8 *aad, u32 aad_len, u64 nonce,
+ vnet_crypto_op_id_t op_id,
+ vnet_crypto_key_index_t key_index)
+{
+ vnet_crypto_op_t _op, *op = &_op;
+ u8 iv[12];
+ u8 tag_[NOISE_AUTHTAG_LEN] = {};
+ u8 src_[] = {};
+
+ clib_memset (iv, 0, 12);
+ clib_memcpy (iv + 4, &nonce, sizeof (nonce));
+
+ vnet_crypto_op_init (op, op_id);
+
+ op->tag_len = NOISE_AUTHTAG_LEN;
+ if (op_id == VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC)
+ {
+ op->tag = src + src_len - NOISE_AUTHTAG_LEN;
+ src_len -= NOISE_AUTHTAG_LEN;
+ op->flags |= VNET_CRYPTO_OP_FLAG_HMAC_CHECK;
+ }
+ else
+ op->tag = tag_;
+
+ op->src = !src ? src_ : src;
+ op->len = src_len;
+
+ op->dst = dst;
+ op->key_index = key_index;
+ op->aad = aad;
+ op->aad_len = aad_len;
+ op->iv = iv;
+
+ vnet_crypto_process_ops (vm, op, 1);
+ if (op_id == VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC)
+ {
+ clib_memcpy (dst + src_len, op->tag, NOISE_AUTHTAG_LEN);
+ }
+
+ return (op->status == VNET_CRYPTO_OP_STATUS_COMPLETED);
+}
+
+void
+wg_xchacha20poly1305_encrypt (vlib_main_t *vm, u8 *src, u32 src_len, u8 *dst,
+ u8 *aad, u32 aad_len,
+ u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
+ u8 key[CHACHA20POLY1305_KEY_SIZE])
+{
+ int i;
+ u32 derived_key[CHACHA20POLY1305_KEY_SIZE / sizeof (u32)];
+ u64 h_nonce;
+
+ clib_memcpy (&h_nonce, nonce + 16, sizeof (h_nonce));
+ h_nonce = le64toh (h_nonce);
+ hchacha20 (derived_key, nonce, key);
+
+ for (i = 0; i < (sizeof (derived_key) / sizeof (derived_key[0])); i++)
+ (derived_key[i]) = htole32 ((derived_key[i]));
+
+ uint32_t key_idx;
+
+ key_idx =
+ vnet_crypto_key_add (vm, VNET_CRYPTO_ALG_CHACHA20_POLY1305,
+ (uint8_t *) derived_key, CHACHA20POLY1305_KEY_SIZE);
+
+ wg_chacha20poly1305_calc (vm, src, src_len, dst, aad, aad_len, h_nonce,
+ VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC, key_idx);
+
+ vnet_crypto_key_del (vm, key_idx);
+ wg_secure_zero_memory (derived_key, CHACHA20POLY1305_KEY_SIZE);
+}
+
+bool
+wg_xchacha20poly1305_decrypt (vlib_main_t *vm, u8 *src, u32 src_len, u8 *dst,
+ u8 *aad, u32 aad_len,
+ u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
+ u8 key[CHACHA20POLY1305_KEY_SIZE])
+{
+ int ret, i;
+ u32 derived_key[CHACHA20POLY1305_KEY_SIZE / sizeof (u32)];
+ u64 h_nonce;
+
+ clib_memcpy (&h_nonce, nonce + 16, sizeof (h_nonce));
+ h_nonce = le64toh (h_nonce);
+ hchacha20 (derived_key, nonce, key);
+
+ for (i = 0; i < (sizeof (derived_key) / sizeof (derived_key[0])); i++)
+ (derived_key[i]) = htole32 ((derived_key[i]));
+
+ uint32_t key_idx;
+
+ key_idx =
+ vnet_crypto_key_add (vm, VNET_CRYPTO_ALG_CHACHA20_POLY1305,
+ (uint8_t *) derived_key, CHACHA20POLY1305_KEY_SIZE);
+
+ ret =
+ wg_chacha20poly1305_calc (vm, src, src_len, dst, aad, aad_len, h_nonce,
+ VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC, key_idx);
+
+ vnet_crypto_key_del (vm, key_idx);
+ wg_secure_zero_memory (derived_key, CHACHA20POLY1305_KEY_SIZE);
+
+ return ret;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/wireguard/wireguard_chachapoly.h b/src/plugins/wireguard/wireguard_chachapoly.h
new file mode 100644
index 00000000000..f09b2c8dd9d
--- /dev/null
+++ b/src/plugins/wireguard/wireguard_chachapoly.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2022 Rubicon Communications, LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_wg_chachapoly_h__
+#define __included_wg_chachapoly_h__
+
+#include <vlib/vlib.h>
+#include <vnet/crypto/crypto.h>
+
+#define XCHACHA20POLY1305_NONCE_SIZE 24
+#define CHACHA20POLY1305_KEY_SIZE 32
+
+bool wg_chacha20poly1305_calc (vlib_main_t *vm, u8 *src, u32 src_len, u8 *dst,
+ u8 *aad, u32 aad_len, u64 nonce,
+ vnet_crypto_op_id_t op_id,
+ vnet_crypto_key_index_t key_index);
+
+void wg_xchacha20poly1305_encrypt (vlib_main_t *vm, u8 *src, u32 src_len,
+ u8 *dst, u8 *aad, u32 aad_len,
+ u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
+ u8 key[CHACHA20POLY1305_KEY_SIZE]);
+
+bool wg_xchacha20poly1305_decrypt (vlib_main_t *vm, u8 *src, u32 src_len,
+ u8 *dst, u8 *aad, u32 aad_len,
+ u8 nonce[XCHACHA20POLY1305_NONCE_SIZE],
+ u8 key[CHACHA20POLY1305_KEY_SIZE]);
+
+#endif /* __included_wg_chachapoly_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/wireguard/wireguard_cli.c b/src/plugins/wireguard/wireguard_cli.c
index 3b4bf56a3dc..e412fa36c44 100755..100644
--- a/src/plugins/wireguard/wireguard_cli.c
+++ b/src/plugins/wireguard/wireguard_cli.c
@@ -25,7 +25,7 @@ wg_if_create_cli (vlib_main_t * vm,
{
wg_main_t *wmp = &wg_main;
unformat_input_t _line_input, *line_input = &_line_input;
- u8 private_key[NOISE_PUBLIC_KEY_LEN];
+ u8 private_key[NOISE_PUBLIC_KEY_LEN + 1];
u32 instance, sw_if_index;
ip_address_t src_ip;
clib_error_t *error;
@@ -94,14 +94,12 @@ wg_if_create_cli (vlib_main_t * vm,
/*?
* Create a Wireguard interface.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (wg_if_create_command, static) = {
.path = "wireguard create",
.short_help = "wireguard create listen-port <port> "
"private-key <key> src <IP> [generate-key]",
.function = wg_if_create_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
wg_if_delete_cli (vlib_main_t * vm,
@@ -143,13 +141,11 @@ wg_if_delete_cli (vlib_main_t * vm,
/*?
* Delete a Wireguard interface.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (wg_if_delete_command, static) = {
.path = "wireguard delete",
.short_help = "wireguard delete <interface>",
.function = wg_if_delete_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -162,10 +158,10 @@ wg_peer_add_command_fn (vlib_main_t * vm,
unformat_input_t _line_input, *line_input = &_line_input;
u8 *public_key_64 = 0;
- u8 public_key[NOISE_PUBLIC_KEY_LEN];
+ u8 public_key[NOISE_PUBLIC_KEY_LEN + 1];
fib_prefix_t allowed_ip, *allowed_ips = NULL;
ip_prefix_t pfx;
- ip_address_t ip;
+ ip_address_t ip = ip_address_initializer;
u32 portDst = 0, table_id = 0;
u32 persistent_keepalive = 0;
u32 tun_sw_if_index = ~0;
@@ -192,7 +188,7 @@ wg_peer_add_command_fn (vlib_main_t * vm,
;
else if (unformat (line_input, "table-id %d", &table_id))
;
- else if (unformat (line_input, "port %d", &portDst))
+ else if (unformat (line_input, "dst-port %d", &portDst))
;
else if (unformat (line_input, "persistent-keepalive %d",
&persistent_keepalive))
@@ -213,16 +209,14 @@ wg_peer_add_command_fn (vlib_main_t * vm,
}
}
- if (AF_IP6 == ip_addr_version (&ip) ||
- FIB_PROTOCOL_IP6 == allowed_ip.fp_proto)
- rv = VNET_API_ERROR_INVALID_PROTOCOL;
- else
- rv = wg_peer_add (tun_sw_if_index,
- public_key,
- table_id,
- &ip_addr_46 (&ip),
- allowed_ips,
- portDst, persistent_keepalive, &peer_index);
+ if (0 == vec_len (allowed_ips))
+ {
+ error = clib_error_return (0, "Allowed IPs are not specified");
+ goto done;
+ }
+
+ rv = wg_peer_add (tun_sw_if_index, public_key, table_id, &ip_addr_46 (&ip),
+ allowed_ips, portDst, persistent_keepalive, &peer_index);
switch (rv)
{
@@ -253,16 +247,14 @@ done:
return error;
}
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (wg_peer_add_command, static) =
-{
+VLIB_CLI_COMMAND (wg_peer_add_command, static) = {
.path = "wireguard peer add",
- .short_help = "wireguard peer add <wg_int> public-key <pub_key_other>"
- "endpoint <ip4_dst> allowed-ip <prefix>"
- "dst-port [port_dst] persistent-keepalive [keepalive_interval]",
+ .short_help =
+ "wireguard peer add <wg_int> public-key <pub_key_other> "
+ "endpoint <ip4_dst> allowed-ip <prefix> "
+ "dst-port [port_dst] persistent-keepalive [keepalive_interval]",
.function = wg_peer_add_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
wg_peer_remove_command_fn (vlib_main_t * vm,
@@ -301,14 +293,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (wg_peer_remove_command, static) =
{
.path = "wireguard peer remove",
.short_help = "wireguard peer remove <index>",
.function = wg_peer_remove_command_fn,
};
-/* *INDENT-ON* */
static walk_rc_t
wg_peer_show_one (index_t peeri, void *arg)
@@ -327,14 +317,12 @@ wg_show_peer_command_fn (vlib_main_t * vm,
return NULL;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (wg_show_peers_command, static) =
{
.path = "show wireguard peer",
.short_help = "show wireguard peer",
.function = wg_show_peer_command_fn,
};
-/* *INDENT-ON* */
static walk_rc_t
wg_if_show_one (index_t itfi, void *arg)
@@ -357,14 +345,67 @@ wg_show_if_command_fn (vlib_main_t * vm,
return NULL;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (wg_show_itfs_command, static) =
{
.path = "show wireguard interface",
.short_help = "show wireguard",
.function = wg_show_if_command_fn,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+wg_set_async_mode_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ int async_enable = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "on"))
+ async_enable = 1;
+ else if (unformat (line_input, "off"))
+ async_enable = 0;
+ else
+ return (clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, line_input));
+ }
+
+ wg_set_async_mode (async_enable);
+
+ unformat_free (line_input);
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (wg_set_async_mode_command, static) = {
+ .path = "set wireguard async mode",
+ .short_help = "set wireguard async mode on|off",
+ .function = wg_set_async_mode_command_fn,
+};
+
+static clib_error_t *
+wg_show_mode_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vlib_cli_output (vm, "Wireguard mode");
+
+#define _(v, f, s) \
+ vlib_cli_output (vm, "\t%s: %s", s, \
+ (wg_op_mode_is_set_##f () ? "enabled" : "disabled"));
+ foreach_wg_op_mode_flags
+#undef _
+
+ return (NULL);
+}
+
+VLIB_CLI_COMMAND (wg_show_modemode_command, static) = {
+ .path = "show wireguard mode",
+ .short_help = "show wireguard mode",
+ .function = wg_show_mode_command_fn,
+};
+
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/wireguard/wireguard_cookie.c b/src/plugins/wireguard/wireguard_cookie.c
index f54ce715906..4ebbfa0fa63 100755..100644
--- a/src/plugins/wireguard/wireguard_cookie.c
+++ b/src/plugins/wireguard/wireguard_cookie.c
@@ -20,6 +20,7 @@
#include <vlib/vlib.h>
#include <wireguard/wireguard_cookie.h>
+#include <wireguard/wireguard_chachapoly.h>
#include <wireguard/wireguard.h>
static void cookie_precompute_key (uint8_t *,
@@ -29,9 +30,14 @@ static void cookie_macs_mac1 (message_macs_t *, const void *, size_t,
const uint8_t[COOKIE_KEY_SIZE]);
static void cookie_macs_mac2 (message_macs_t *, const void *, size_t,
const uint8_t[COOKIE_COOKIE_SIZE]);
-static void cookie_checker_make_cookie (vlib_main_t * vm, cookie_checker_t *,
+static void cookie_checker_make_cookie (vlib_main_t *vm, cookie_checker_t *,
uint8_t[COOKIE_COOKIE_SIZE],
- ip4_address_t ip4, u16 udp_port);
+ ip46_address_t *ip, u16 udp_port);
+
+static void ratelimit_init (ratelimit_t *, ratelimit_entry_t *);
+static void ratelimit_deinit (ratelimit_t *);
+static void ratelimit_gc (ratelimit_t *, bool);
+static bool ratelimit_allow (ratelimit_t *, ip46_address_t *);
/* Public Functions */
void
@@ -43,6 +49,14 @@ cookie_maker_init (cookie_maker_t * cp, const uint8_t key[COOKIE_INPUT_SIZE])
}
void
+cookie_checker_init (cookie_checker_t *cc, ratelimit_entry_t *pool)
+{
+ clib_memset (cc, 0, sizeof (*cc));
+ ratelimit_init (&cc->cc_ratelimit_v4, pool);
+ ratelimit_init (&cc->cc_ratelimit_v6, pool);
+}
+
+void
cookie_checker_update (cookie_checker_t * cc, uint8_t key[COOKIE_INPUT_SIZE])
{
if (key)
@@ -58,6 +72,58 @@ cookie_checker_update (cookie_checker_t * cc, uint8_t key[COOKIE_INPUT_SIZE])
}
void
+cookie_checker_deinit (cookie_checker_t *cc)
+{
+ ratelimit_deinit (&cc->cc_ratelimit_v4);
+ ratelimit_deinit (&cc->cc_ratelimit_v6);
+}
+
+void
+cookie_checker_create_payload (vlib_main_t *vm, cookie_checker_t *cc,
+ message_macs_t *cm,
+ uint8_t nonce[COOKIE_NONCE_SIZE],
+ uint8_t ecookie[COOKIE_ENCRYPTED_SIZE],
+ ip46_address_t *ip, u16 udp_port)
+{
+ uint8_t cookie[COOKIE_COOKIE_SIZE];
+
+ cookie_checker_make_cookie (vm, cc, cookie, ip, udp_port);
+ RAND_bytes (nonce, COOKIE_NONCE_SIZE);
+
+ wg_xchacha20poly1305_encrypt (vm, cookie, COOKIE_COOKIE_SIZE, ecookie,
+ cm->mac1, COOKIE_MAC_SIZE, nonce,
+ cc->cc_cookie_key);
+
+ wg_secure_zero_memory (cookie, sizeof (cookie));
+}
+
+bool
+cookie_maker_consume_payload (vlib_main_t *vm, cookie_maker_t *cp,
+ uint8_t nonce[COOKIE_NONCE_SIZE],
+ uint8_t ecookie[COOKIE_ENCRYPTED_SIZE])
+{
+ uint8_t cookie[COOKIE_COOKIE_SIZE];
+
+ if (cp->cp_mac1_valid == 0)
+ {
+ return false;
+ }
+
+ if (!wg_xchacha20poly1305_decrypt (vm, ecookie, COOKIE_ENCRYPTED_SIZE,
+ cookie, cp->cp_mac1_last, COOKIE_MAC_SIZE,
+ nonce, cp->cp_cookie_key))
+ {
+ return false;
+ }
+
+ clib_memcpy (cp->cp_cookie, cookie, COOKIE_COOKIE_SIZE);
+ cp->cp_birthdate = vlib_time_now (vm);
+ cp->cp_mac1_valid = 0;
+
+ return true;
+}
+
+void
cookie_maker_mac (cookie_maker_t * cp, message_macs_t * cm, void *buf,
size_t len)
{
@@ -76,9 +142,9 @@ cookie_maker_mac (cookie_maker_t * cp, message_macs_t * cm, void *buf,
}
enum cookie_mac_state
-cookie_checker_validate_macs (vlib_main_t * vm, cookie_checker_t * cc,
- message_macs_t * cm, void *buf, size_t len,
- bool busy, ip4_address_t ip4, u16 udp_port)
+cookie_checker_validate_macs (vlib_main_t *vm, cookie_checker_t *cc,
+ message_macs_t *cm, void *buf, size_t len,
+ bool busy, ip46_address_t *ip, u16 udp_port)
{
message_macs_t our_cm;
uint8_t cookie[COOKIE_COOKIE_SIZE];
@@ -93,13 +159,20 @@ cookie_checker_validate_macs (vlib_main_t * vm, cookie_checker_t * cc,
if (!busy)
return VALID_MAC_BUT_NO_COOKIE;
- cookie_checker_make_cookie (vm, cc, cookie, ip4, udp_port);
+ cookie_checker_make_cookie (vm, cc, cookie, ip, udp_port);
cookie_macs_mac2 (&our_cm, buf, len, cookie);
/* If the mac2 is invalid, we want to send a cookie response */
if (clib_memcmp (our_cm.mac2, cm->mac2, COOKIE_MAC_SIZE) != 0)
return VALID_MAC_BUT_NO_COOKIE;
+ /* If the mac2 is valid, we may want to rate limit the peer */
+ ratelimit_t *rl;
+ rl = ip46_address_is_ip4 (ip) ? &cc->cc_ratelimit_v4 : &cc->cc_ratelimit_v6;
+
+ if (!ratelimit_allow (rl, ip))
+ return VALID_MAC_WITH_COOKIE_BUT_RATELIMITED;
+
return VALID_MAC_WITH_COOKIE;
}
@@ -139,9 +212,9 @@ cookie_macs_mac2 (message_macs_t * cm, const void *buf, size_t len,
}
static void
-cookie_checker_make_cookie (vlib_main_t * vm, cookie_checker_t * cc,
+cookie_checker_make_cookie (vlib_main_t *vm, cookie_checker_t *cc,
uint8_t cookie[COOKIE_COOKIE_SIZE],
- ip4_address_t ip4, u16 udp_port)
+ ip46_address_t *ip, u16 udp_port)
{
blake2s_state_t state;
@@ -155,11 +228,138 @@ cookie_checker_make_cookie (vlib_main_t * vm, cookie_checker_t * cc,
blake2s_init_key (&state, COOKIE_COOKIE_SIZE, cc->cc_secret,
COOKIE_SECRET_SIZE);
- blake2s_update (&state, ip4.as_u8, sizeof (ip4_address_t)); //TODO: IP6
+ if (ip46_address_is_ip4 (ip))
+ {
+ blake2s_update (&state, ip->ip4.as_u8, sizeof (ip4_address_t));
+ }
+ else
+ {
+ blake2s_update (&state, ip->ip6.as_u8, sizeof (ip6_address_t));
+ }
blake2s_update (&state, (u8 *) & udp_port, sizeof (u16));
blake2s_final (&state, cookie, COOKIE_COOKIE_SIZE);
}
+static void
+ratelimit_init (ratelimit_t *rl, ratelimit_entry_t *pool)
+{
+ rl->rl_pool = pool;
+}
+
+static void
+ratelimit_deinit (ratelimit_t *rl)
+{
+ ratelimit_gc (rl, /* force */ true);
+ hash_free (rl->rl_table);
+}
+
+static void
+ratelimit_gc (ratelimit_t *rl, bool force)
+{
+ u32 r_key;
+ u32 r_idx;
+ ratelimit_entry_t *r;
+
+ if (force)
+ {
+ /* clang-format off */
+ hash_foreach (r_key, r_idx, rl->rl_table, {
+ r = pool_elt_at_index (rl->rl_pool, r_idx);
+ pool_put (rl->rl_pool, r);
+ });
+ /* clang-format on */
+ return;
+ }
+
+ f64 now = vlib_time_now (vlib_get_main ());
+
+ if ((rl->rl_last_gc + ELEMENT_TIMEOUT) < now)
+ {
+ u32 *r_key_to_del = NULL;
+ u32 *pr_key;
+
+ rl->rl_last_gc = now;
+
+ /* clang-format off */
+ hash_foreach (r_key, r_idx, rl->rl_table, {
+ r = pool_elt_at_index (rl->rl_pool, r_idx);
+ if ((r->r_last_time + ELEMENT_TIMEOUT) < now)
+ {
+ vec_add1 (r_key_to_del, r_key);
+ pool_put (rl->rl_pool, r);
+ }
+ });
+ /* clang-format on */
+
+ vec_foreach (pr_key, r_key_to_del)
+ {
+ hash_unset (rl->rl_table, *pr_key);
+ }
+
+ vec_free (r_key_to_del);
+ }
+}
+
+static bool
+ratelimit_allow (ratelimit_t *rl, ip46_address_t *ip)
+{
+ u32 r_key;
+ uword *p;
+ u32 r_idx;
+ ratelimit_entry_t *r;
+ f64 now = vlib_time_now (vlib_get_main ());
+
+ if (ip46_address_is_ip4 (ip))
+ /* Use all 4 bytes of IPv4 address */
+ r_key = ip->ip4.as_u32;
+ else
+ /* Use top 8 bytes (/64) of IPv6 address */
+ r_key = ip->ip6.as_u32[0] ^ ip->ip6.as_u32[1];
+
+ /* Check if there is already an entry for the IP address */
+ p = hash_get (rl->rl_table, r_key);
+ if (p)
+ {
+ u64 tokens;
+ f64 diff;
+
+ r_idx = p[0];
+ r = pool_elt_at_index (rl->rl_pool, r_idx);
+
+ diff = now - r->r_last_time;
+ r->r_last_time = now;
+
+ tokens = r->r_tokens + diff * NSEC_PER_SEC;
+
+ if (tokens > TOKEN_MAX)
+ tokens = TOKEN_MAX;
+
+ if (tokens >= INITIATION_COST)
+ {
+ r->r_tokens = tokens - INITIATION_COST;
+ return true;
+ }
+
+ r->r_tokens = tokens;
+ return false;
+ }
+
+ /* No entry for the IP address */
+ ratelimit_gc (rl, /* force */ false);
+
+ if (hash_elts (rl->rl_table) >= RATELIMIT_SIZE_MAX)
+ return false;
+
+ pool_get (rl->rl_pool, r);
+ r_idx = r - rl->rl_pool;
+ hash_set (rl->rl_table, r_key, r_idx);
+
+ r->r_last_time = now;
+ r->r_tokens = TOKEN_MAX - INITIATION_COST;
+
+ return true;
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/wireguard/wireguard_cookie.h b/src/plugins/wireguard/wireguard_cookie.h
index 489cce81325..7467cf2ed4a 100755..100644
--- a/src/plugins/wireguard/wireguard_cookie.h
+++ b/src/plugins/wireguard/wireguard_cookie.h
@@ -18,14 +18,15 @@
#ifndef __included_wg_cookie_h__
#define __included_wg_cookie_h__
-#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip46_address.h>
#include <wireguard/wireguard_noise.h>
enum cookie_mac_state
{
INVALID_MAC,
VALID_MAC_BUT_NO_COOKIE,
- VALID_MAC_WITH_COOKIE
+ VALID_MAC_WITH_COOKIE,
+ VALID_MAC_WITH_COOKIE_BUT_RATELIMITED,
};
#define COOKIE_MAC_SIZE 16
@@ -50,8 +51,6 @@ enum cookie_mac_state
#define INITIATION_COST (NSEC_PER_SEC / INITIATIONS_PER_SECOND)
#define TOKEN_MAX (INITIATION_COST * INITIATIONS_BURSTABLE)
#define ELEMENT_TIMEOUT 1
-#define IPV4_MASK_SIZE 4 /* Use all 4 bytes of IPv4 address */
-#define IPV6_MASK_SIZE 8 /* Use top 8 bytes (/64) of IPv6 address */
typedef struct cookie_macs
{
@@ -59,6 +58,19 @@ typedef struct cookie_macs
uint8_t mac2[COOKIE_MAC_SIZE];
} message_macs_t;
+typedef struct ratelimit_entry
+{
+ f64 r_last_time;
+ u64 r_tokens;
+} ratelimit_entry_t;
+
+typedef struct ratelimit
+{
+ ratelimit_entry_t *rl_pool;
+ uword *rl_table;
+ f64 rl_last_gc;
+} ratelimit_t;
+
typedef struct cookie_maker
{
uint8_t cp_mac1_key[COOKIE_KEY_SIZE];
@@ -72,6 +84,9 @@ typedef struct cookie_maker
typedef struct cookie_checker
{
+ ratelimit_t cc_ratelimit_v4;
+ ratelimit_t cc_ratelimit_v6;
+
uint8_t cc_mac1_key[COOKIE_KEY_SIZE];
uint8_t cc_cookie_key[COOKIE_KEY_SIZE];
@@ -81,14 +96,22 @@ typedef struct cookie_checker
void cookie_maker_init (cookie_maker_t *, const uint8_t[COOKIE_INPUT_SIZE]);
+void cookie_checker_init (cookie_checker_t *, ratelimit_entry_t *);
void cookie_checker_update (cookie_checker_t *, uint8_t[COOKIE_INPUT_SIZE]);
+void cookie_checker_deinit (cookie_checker_t *);
+void cookie_checker_create_payload (vlib_main_t *vm, cookie_checker_t *cc,
+ message_macs_t *cm,
+ uint8_t nonce[COOKIE_NONCE_SIZE],
+ uint8_t ecookie[COOKIE_ENCRYPTED_SIZE],
+ ip46_address_t *ip, u16 udp_port);
+bool cookie_maker_consume_payload (vlib_main_t *vm, cookie_maker_t *cp,
+ uint8_t nonce[COOKIE_NONCE_SIZE],
+ uint8_t ecookie[COOKIE_ENCRYPTED_SIZE]);
void cookie_maker_mac (cookie_maker_t *, message_macs_t *, void *, size_t);
-enum cookie_mac_state cookie_checker_validate_macs (vlib_main_t * vm,
- cookie_checker_t *,
- message_macs_t *, void *,
- size_t, bool,
- ip4_address_t ip4,
- u16 udp_port);
+enum cookie_mac_state
+cookie_checker_validate_macs (vlib_main_t *vm, cookie_checker_t *,
+ message_macs_t *, void *, size_t, bool,
+ ip46_address_t *ip, u16 udp_port);
#endif /* __included_wg_cookie_h__ */
diff --git a/src/plugins/wireguard/wireguard_handoff.c b/src/plugins/wireguard/wireguard_handoff.c
index d3e37b30c88..195baf209a0 100644
--- a/src/plugins/wireguard/wireguard_handoff.c
+++ b/src/plugins/wireguard/wireguard_handoff.c
@@ -129,40 +129,77 @@ wg_handoff (vlib_main_t * vm,
return n_enq;
}
-VLIB_NODE_FN (wg_handshake_handoff) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
+VLIB_NODE_FN (wg4_handshake_handoff)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
{
wg_main_t *wmp = &wg_main;
- return wg_handoff (vm, node, from_frame, wmp->in_fq_index,
+ return wg_handoff (vm, node, from_frame, wmp->in4_fq_index,
WG_HANDOFF_HANDSHAKE);
}
-VLIB_NODE_FN (wg_input_data_handoff) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
+VLIB_NODE_FN (wg6_handshake_handoff)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
{
wg_main_t *wmp = &wg_main;
- return wg_handoff (vm, node, from_frame, wmp->in_fq_index,
+ return wg_handoff (vm, node, from_frame, wmp->in6_fq_index,
+ WG_HANDOFF_HANDSHAKE);
+}
+
+VLIB_NODE_FN (wg4_input_data_handoff)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ wg_main_t *wmp = &wg_main;
+
+ return wg_handoff (vm, node, from_frame, wmp->in4_fq_index,
+ WG_HANDOFF_INP_DATA);
+}
+
+VLIB_NODE_FN (wg6_input_data_handoff)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ wg_main_t *wmp = &wg_main;
+
+ return wg_handoff (vm, node, from_frame, wmp->in6_fq_index,
WG_HANDOFF_INP_DATA);
}
-VLIB_NODE_FN (wg_output_tun_handoff) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
+VLIB_NODE_FN (wg4_output_tun_handoff)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
{
wg_main_t *wmp = &wg_main;
- return wg_handoff (vm, node, from_frame, wmp->out_fq_index,
+ return wg_handoff (vm, node, from_frame, wmp->out4_fq_index,
WG_HANDOFF_OUT_TUN);
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (wg_handshake_handoff) =
+VLIB_NODE_FN (wg6_output_tun_handoff)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ wg_main_t *wmp = &wg_main;
+
+ return wg_handoff (vm, node, from_frame, wmp->out6_fq_index,
+ WG_HANDOFF_OUT_TUN);
+}
+
+VLIB_REGISTER_NODE (wg4_handshake_handoff) =
+{
+ .name = "wg4-handshake-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_wg_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (wg_handoff_error_strings),
+ .error_strings = wg_handoff_error_strings,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (wg6_handshake_handoff) =
{
- .name = "wg-handshake-handoff",
+ .name = "wg6-handshake-handoff",
.vector_size = sizeof (u32),
.format_trace = format_wg_handoff_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
@@ -174,9 +211,9 @@ VLIB_REGISTER_NODE (wg_handshake_handoff) =
},
};
-VLIB_REGISTER_NODE (wg_input_data_handoff) =
+VLIB_REGISTER_NODE (wg4_input_data_handoff) =
{
- .name = "wg-input-data-handoff",
+ .name = "wg4-input-data-handoff",
.vector_size = sizeof (u32),
.format_trace = format_wg_handoff_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
@@ -188,9 +225,37 @@ VLIB_REGISTER_NODE (wg_input_data_handoff) =
},
};
-VLIB_REGISTER_NODE (wg_output_tun_handoff) =
+VLIB_REGISTER_NODE (wg6_input_data_handoff) =
+{
+ .name = "wg6-input-data-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_wg_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (wg_handoff_error_strings),
+ .error_strings = wg_handoff_error_strings,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (wg4_output_tun_handoff) =
+{
+ .name = "wg4-output-tun-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_wg_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (wg_handoff_error_strings),
+ .error_strings = wg_handoff_error_strings,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (wg6_output_tun_handoff) =
{
- .name = "wg-output-tun-handoff",
+ .name = "wg6-output-tun-handoff",
.vector_size = sizeof (u32),
.format_trace = format_wg_handoff_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
@@ -201,7 +266,6 @@ VLIB_REGISTER_NODE (wg_output_tun_handoff) =
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/wireguard/wireguard_hchacha20.h b/src/plugins/wireguard/wireguard_hchacha20.h
new file mode 100644
index 00000000000..a2d139621c9
--- /dev/null
+++ b/src/plugins/wireguard/wireguard_hchacha20.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022 Rubicon Communications, LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * chacha-merged.c version 20080118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#ifndef __included_wg_hchacha20_h__
+#define __included_wg_hchacha20_h__
+
+#include <vlib/vlib.h>
+
+/* clang-format off */
+#define U32C(v) (v##U)
+#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
+
+#define ROTL32(v, n) \
+ (U32V((v) << (n)) | ((v) >> (32 - (n))))
+
+#define U8TO32_LITTLE(p) \
+ (((u32)((p)[0]) ) | \
+ ((u32)((p)[1]) << 8) | \
+ ((u32)((p)[2]) << 16) | \
+ ((u32)((p)[3]) << 24))
+
+#define ROTATE(v,c) (ROTL32(v,c))
+#define XOR(v,w) ((v) ^ (w))
+#define PLUS(v,w) (U32V((v) + (w)))
+
+#define QUARTERROUND(a,b,c,d) \
+ a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
+ c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
+ a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
+ c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
+/* clang-format on */
+
+static const char sigma[16] = "expand 32-byte k";
+
+static inline void
+hchacha20 (u32 derived_key[8], const u8 nonce[16], const u8 key[32])
+{
+ int i;
+ u32 x[] = { U8TO32_LITTLE (sigma + 0), U8TO32_LITTLE (sigma + 4),
+ U8TO32_LITTLE (sigma + 8), U8TO32_LITTLE (sigma + 12),
+ U8TO32_LITTLE (key + 0), U8TO32_LITTLE (key + 4),
+ U8TO32_LITTLE (key + 8), U8TO32_LITTLE (key + 12),
+ U8TO32_LITTLE (key + 16), U8TO32_LITTLE (key + 20),
+ U8TO32_LITTLE (key + 24), U8TO32_LITTLE (key + 28),
+ U8TO32_LITTLE (nonce + 0), U8TO32_LITTLE (nonce + 4),
+ U8TO32_LITTLE (nonce + 8), U8TO32_LITTLE (nonce + 12) };
+
+ for (i = 20; i > 0; i -= 2)
+ {
+ QUARTERROUND (x[0], x[4], x[8], x[12])
+ QUARTERROUND (x[1], x[5], x[9], x[13])
+ QUARTERROUND (x[2], x[6], x[10], x[14])
+ QUARTERROUND (x[3], x[7], x[11], x[15])
+ QUARTERROUND (x[0], x[5], x[10], x[15])
+ QUARTERROUND (x[1], x[6], x[11], x[12])
+ QUARTERROUND (x[2], x[7], x[8], x[13])
+ QUARTERROUND (x[3], x[4], x[9], x[14])
+ }
+
+ clib_memcpy (derived_key + 0, x + 0, sizeof (u32) * 4);
+ clib_memcpy (derived_key + 4, x + 12, sizeof (u32) * 4);
+}
+
+#endif /* __included_wg_hchacha20_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/wireguard/wireguard_if.c b/src/plugins/wireguard/wireguard_if.c
index f7eb5a1d9e0..afeeda1dd2b 100644
--- a/src/plugins/wireguard/wireguard_if.c
+++ b/src/plugins/wireguard/wireguard_if.c
@@ -32,13 +32,17 @@ static uword *wg_if_instances;
static index_t *wg_if_index_by_sw_if_index;
/* vector of interfaces key'd on their UDP port (in network order) */
-index_t *wg_if_index_by_port;
+index_t **wg_if_indexes_by_port;
+
+/* pool of ratelimit entries */
+static ratelimit_entry_t *wg_ratelimit_pool;
static u8 *
format_wg_if_name (u8 * s, va_list * args)
{
u32 dev_instance = va_arg (*args, u32);
- return format (s, "wg%d", dev_instance);
+ wg_if_t *wgi = wg_if_get (dev_instance);
+ return format (s, "wg%d", wgi->user_instance);
}
u8 *
@@ -49,7 +53,6 @@ format_wg_if (u8 * s, va_list * args)
noise_local_t *local = noise_local_get (wgi->local_idx);
u8 key[NOISE_KEY_LEN_BASE64];
-
s = format (s, "[%d] %U src:%U port:%d",
wgii,
format_vnet_sw_if_index_name, vnet_get_main (),
@@ -113,20 +116,20 @@ wg_remote_get (const uint8_t public[NOISE_PUBLIC_KEY_LEN])
}
static uint32_t
-wg_index_set (noise_remote_t * remote)
+wg_index_set (vlib_main_t *vm, noise_remote_t *remote)
{
wg_main_t *wmp = &wg_main;
u32 rnd_seed = (u32) (vlib_time_now (wmp->vlib_main) * 1e6);
u32 ret =
- wg_index_table_add (&wmp->index_table, remote->r_peer_idx, rnd_seed);
+ wg_index_table_add (vm, &wmp->index_table, remote->r_peer_idx, rnd_seed);
return ret;
}
static void
-wg_index_drop (uint32_t key)
+wg_index_drop (vlib_main_t *vm, uint32_t key)
{
wg_main_t *wmp = &wg_main;
- wg_index_table_del (&wmp->index_table, key);
+ wg_index_table_del (vm, &wmp->index_table, key);
}
static clib_error_t *
@@ -151,11 +154,21 @@ wg_if_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
void
wg_if_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
{
- /* The peers manage the adjacencies */
+ index_t wgii;
+
+ /* Convert any neighbour adjacency that has a next-hop reachable through
+ * the wg interface into a midchain. This is to avoid sending ARP/ND to
+ * resolve the next-hop address via the wg interface. Then, if one of the
+ * peers has matching prefix among allowed prefixes, the midchain will be
+ * updated to the corresponding one.
+ */
+ adj_nbr_midchain_update_rewrite (ai, NULL, NULL, ADJ_FLAG_NONE, NULL);
+
+ wgii = wg_if_find_by_sw_if_index (sw_if_index);
+ wg_if_peer_walk (wg_if_get (wgii), wg_peer_if_adj_change, &ai);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (wg_if_device_class) = {
.name = "Wireguard Tunnel",
.format_device_name = format_wg_if_name,
@@ -167,7 +180,6 @@ VNET_HW_INTERFACE_CLASS(wg_hw_interface_class) = {
.update_adjacency = wg_if_update_adj,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA,
};
-/* *INDENT-ON* */
/*
* Maintain a bitmap of allocated wg_if instance numbers.
@@ -251,13 +263,6 @@ wg_if_create (u32 user_instance,
*sw_if_indexp = (u32) ~ 0;
/*
- * Check if the required port is already in use
- */
- udp_dst_port_info_t *pi = udp_get_dst_port_info (&udp_main, port, UDP_IP4);
- if (pi)
- return VNET_API_ERROR_UDP_PORT_TAKEN;
-
- /*
* Allocate a wg_if instance. Either select on dynamically
* or try to use the desired user_instance number.
*/
@@ -265,13 +270,11 @@ wg_if_create (u32 user_instance,
if (instance == ~0)
return VNET_API_ERROR_INVALID_REGISTRATION;
- /* *INDENT-OFF* */
struct noise_upcall upcall = {
.u_remote_get = wg_remote_get,
.u_index_set = wg_index_set,
.u_index_drop = wg_index_drop,
};
- /* *INDENT-ON* */
pool_get (noise_local_pool, local);
@@ -283,7 +286,7 @@ wg_if_create (u32 user_instance,
return VNET_API_ERROR_INVALID_REGISTRATION;
}
- pool_get (wg_if_pool, wg_if);
+ pool_get_zero (wg_if_pool, wg_if);
/* tunnel index (or instance) */
u32 t_idx = wg_if - wg_if_pool;
@@ -292,13 +295,20 @@ wg_if_create (u32 user_instance,
if (~0 == wg_if->user_instance)
wg_if->user_instance = t_idx;
- udp_register_dst_port (vlib_get_main (), port, wg_input_node.index, 1);
+ vec_validate_init_empty (wg_if_indexes_by_port, port, NULL);
+ if (vec_len (wg_if_indexes_by_port[port]) == 0)
+ {
+ udp_register_dst_port (vlib_get_main (), port, wg4_input_node.index,
+ UDP_IP4);
+ udp_register_dst_port (vlib_get_main (), port, wg6_input_node.index,
+ UDP_IP6);
+ }
- vec_validate_init_empty (wg_if_index_by_port, port, INDEX_INVALID);
- wg_if_index_by_port[port] = wg_if - wg_if_pool;
+ vec_add1 (wg_if_indexes_by_port[port], t_idx);
wg_if->port = port;
wg_if->local_idx = local - noise_local_pool;
+ cookie_checker_init (&wg_if->cookie_checker, wg_ratelimit_pool);
cookie_checker_update (&wg_if->cookie_checker, local->l_public);
hw_if_index = vnet_register_interface (vnm,
@@ -314,6 +324,8 @@ wg_if_create (u32 user_instance,
ip_address_copy (&wg_if->src_ip, src_ip);
wg_if->sw_if_index = *sw_if_indexp = hi->sw_if_index;
+ vnet_set_interface_l3_output_node (vnm->vlib_main, hi->sw_if_index,
+ (u8 *) "tunnel-output");
return 0;
}
@@ -331,15 +343,38 @@ wg_if_delete (u32 sw_if_index)
return VNET_API_ERROR_INVALID_VALUE;
wg_if_t *wg_if;
- wg_if = wg_if_get (wg_if_find_by_sw_if_index (sw_if_index));
+ index_t wgii = wg_if_find_by_sw_if_index (sw_if_index);
+ wg_if = wg_if_get (wgii);
if (NULL == wg_if)
return VNET_API_ERROR_INVALID_SW_IF_INDEX_2;
if (wg_if_instance_free (wg_if->user_instance) < 0)
return VNET_API_ERROR_INVALID_VALUE_2;
- udp_unregister_dst_port (vlib_get_main (), wg_if->port, 1);
- wg_if_index_by_port[wg_if->port] = INDEX_INVALID;
+ // Remove peers before interface deletion
+ wg_if_peer_walk (wg_if, wg_peer_if_delete, NULL);
+
+ hash_free (wg_if->peers);
+
+ index_t *ii;
+ index_t *ifs = wg_if_indexes_get_by_port (wg_if->port);
+ vec_foreach (ii, ifs)
+ {
+ if (*ii == wgii)
+ {
+ vec_del1 (ifs, ifs - ii);
+ break;
+ }
+ }
+ if (vec_len (ifs) == 0)
+ {
+ udp_unregister_dst_port (vlib_get_main (), wg_if->port, 1);
+ udp_unregister_dst_port (vlib_get_main (), wg_if->port, 0);
+ }
+
+ cookie_checker_deinit (&wg_if->cookie_checker);
+
+ vnet_reset_interface_l3_output_node (vnm->vlib_main, sw_if_index);
vnet_delete_hw_interface (vnm, hw->hw_if_index);
pool_put_index (noise_local_pool, wg_if->local_idx);
pool_put (wg_if_pool, wg_if);
@@ -353,8 +388,12 @@ wg_if_peer_add (wg_if_t * wgi, index_t peeri)
hash_set (wgi->peers, peeri, peeri);
if (1 == hash_elts (wgi->peers))
- vnet_feature_enable_disable ("ip4-output", "wg-output-tun",
- wgi->sw_if_index, 1, 0, 0);
+ {
+ vnet_feature_enable_disable ("ip4-output", "wg4-output-tun",
+ wgi->sw_if_index, 1, 0, 0);
+ vnet_feature_enable_disable ("ip6-output", "wg6-output-tun",
+ wgi->sw_if_index, 1, 0, 0);
+ }
}
void
@@ -363,8 +402,12 @@ wg_if_peer_remove (wg_if_t * wgi, index_t peeri)
hash_unset (wgi->peers, peeri);
if (0 == hash_elts (wgi->peers))
- vnet_feature_enable_disable ("ip4-output", "wg-output-tun",
- wgi->sw_if_index, 0, 0, 0);
+ {
+ vnet_feature_enable_disable ("ip4-output", "wg4-output-tun",
+ wgi->sw_if_index, 0, 0, 0);
+ vnet_feature_enable_disable ("ip6-output", "wg6-output-tun",
+ wgi->sw_if_index, 0, 0, 0);
+ }
}
void
@@ -372,13 +415,11 @@ wg_if_walk (wg_if_walk_cb_t fn, void *data)
{
index_t wgii;
- /* *INDENT-OFF* */
pool_foreach_index (wgii, wg_if_pool)
{
if (WALK_STOP == fn(wgii, data))
break;
}
- /* *INDENT-ON* */
}
index_t
@@ -386,85 +427,14 @@ wg_if_peer_walk (wg_if_t * wgi, wg_if_peer_walk_cb_t fn, void *data)
{
index_t peeri, val;
- /* *INDENT-OFF* */
- hash_foreach (peeri, val, wgi->peers,
- {
- if (WALK_STOP == fn(wgi, peeri, data))
+ hash_foreach (peeri, val, wgi->peers, {
+ if (WALK_STOP == fn (peeri, data))
return peeri;
});
- /* *INDENT-ON* */
return INDEX_INVALID;
}
-
-static void
-wg_if_table_bind_v4 (ip4_main_t * im,
- uword opaque,
- u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
-{
- wg_if_t *wg_if;
-
- wg_if = wg_if_get (wg_if_find_by_sw_if_index (sw_if_index));
- if (NULL == wg_if)
- return;
-
- wg_peer_table_bind_ctx_t ctx = {
- .af = AF_IP4,
- .old_fib_index = old_fib_index,
- .new_fib_index = new_fib_index,
- };
-
- wg_if_peer_walk (wg_if, wg_peer_if_table_change, &ctx);
-}
-
-static void
-wg_if_table_bind_v6 (ip6_main_t * im,
- uword opaque,
- u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
-{
- wg_if_t *wg_if;
-
- wg_if = wg_if_get (wg_if_find_by_sw_if_index (sw_if_index));
- if (NULL == wg_if)
- return;
-
- wg_peer_table_bind_ctx_t ctx = {
- .af = AF_IP6,
- .old_fib_index = old_fib_index,
- .new_fib_index = new_fib_index,
- };
-
- wg_if_peer_walk (wg_if, wg_peer_if_table_change, &ctx);
-}
-
-static clib_error_t *
-wg_if_module_init (vlib_main_t * vm)
-{
- {
- ip4_table_bind_callback_t cb = {
- .function = wg_if_table_bind_v4,
- };
- vec_add1 (ip4_main.table_bind_callbacks, cb);
- }
- {
- ip6_table_bind_callback_t cb = {
- .function = wg_if_table_bind_v6,
- };
- vec_add1 (ip6_main.table_bind_callbacks, cb);
- }
-
- return (NULL);
-}
-
-/* *INDENT-OFF* */
-VLIB_INIT_FUNCTION (wg_if_module_init) =
-{
- .runs_after = VLIB_INITS("ip_main_init"),
-};
-/* *INDENT-ON* */
-
-
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/wireguard/wireguard_if.h b/src/plugins/wireguard/wireguard_if.h
index 7c11ad9b281..2a6ab8e4be5 100644
--- a/src/plugins/wireguard/wireguard_if.h
+++ b/src/plugins/wireguard/wireguard_if.h
@@ -31,13 +31,15 @@ typedef struct wg_if_t_
cookie_checker_t cookie_checker;
u16 port;
- wg_index_table_t index_table;
-
/* Source IP address for originated packets */
ip_address_t src_ip;
/* hash table of peers on this link */
uword *peers;
+
+ /* Under load params */
+ f64 handshake_counting_end;
+ u32 handshake_num;
} wg_if_t;
@@ -52,8 +54,7 @@ u8 *format_wg_if (u8 * s, va_list * va);
typedef walk_rc_t (*wg_if_walk_cb_t) (index_t wgi, void *data);
void wg_if_walk (wg_if_walk_cb_t fn, void *data);
-typedef walk_rc_t (*wg_if_peer_walk_cb_t) (wg_if_t * wgi, index_t peeri,
- void *data);
+typedef walk_rc_t (*wg_if_peer_walk_cb_t) (index_t peeri, void *data);
index_t wg_if_peer_walk (wg_if_t * wgi, wg_if_peer_walk_cb_t fn, void *data);
void wg_if_peer_add (wg_if_t * wgi, index_t peeri);
@@ -72,18 +73,56 @@ wg_if_get (index_t wgii)
return (pool_elt_at_index (wg_if_pool, wgii));
}
-extern index_t *wg_if_index_by_port;
+extern index_t **wg_if_indexes_by_port;
-static_always_inline wg_if_t *
-wg_if_get_by_port (u16 port)
+static_always_inline index_t *
+wg_if_indexes_get_by_port (u16 port)
{
- if (vec_len (wg_if_index_by_port) < port)
+ if (vec_len (wg_if_indexes_by_port) == 0)
return (NULL);
- if (INDEX_INVALID == wg_if_index_by_port[port])
+ if (vec_len (wg_if_indexes_by_port[port]) == 0)
return (NULL);
- return (wg_if_get (wg_if_index_by_port[port]));
+ return (wg_if_indexes_by_port[port]);
}
+#define HANDSHAKE_COUNTING_INTERVAL 0.5
+#define UNDER_LOAD_INTERVAL 1.0
+#define HANDSHAKE_NUM_PER_PEER_UNTIL_UNDER_LOAD 40
+
+static_always_inline bool
+wg_if_is_under_load (vlib_main_t *vm, wg_if_t *wgi)
+{
+ static f64 wg_under_load_end;
+ f64 now = vlib_time_now (vm);
+ u32 num_until_under_load =
+ hash_elts (wgi->peers) * HANDSHAKE_NUM_PER_PEER_UNTIL_UNDER_LOAD;
+
+ if (wgi->handshake_counting_end < now)
+ {
+ wgi->handshake_counting_end = now + HANDSHAKE_COUNTING_INTERVAL;
+ wgi->handshake_num = 0;
+ }
+ wgi->handshake_num++;
+
+ if (wgi->handshake_num >= num_until_under_load)
+ {
+ wg_under_load_end = now + UNDER_LOAD_INTERVAL;
+ return true;
+ }
+
+ if (wg_under_load_end > now)
+ {
+ return true;
+ }
+
+ return false;
+}
+
+static_always_inline void
+wg_if_dec_handshake_num (wg_if_t *wgi)
+{
+ wgi->handshake_num--;
+}
#endif
diff --git a/src/plugins/wireguard/wireguard_index_table.c b/src/plugins/wireguard/wireguard_index_table.c
index 5f81204b4c0..da53bfd75f1 100755..100644
--- a/src/plugins/wireguard/wireguard_index_table.c
+++ b/src/plugins/wireguard/wireguard_index_table.c
@@ -13,13 +13,15 @@
* limitations under the License.
*/
+#include <vlib/vlib.h>
#include <vppinfra/hash.h>
#include <vppinfra/pool.h>
#include <vppinfra/random.h>
#include <wireguard/wireguard_index_table.h>
u32
-wg_index_table_add (wg_index_table_t * table, u32 peer_pool_idx, u32 rnd_seed)
+wg_index_table_add (vlib_main_t *vm, wg_index_table_t *table,
+ u32 peer_pool_idx, u32 rnd_seed)
{
u32 key;
@@ -29,19 +31,25 @@ wg_index_table_add (wg_index_table_t * table, u32 peer_pool_idx, u32 rnd_seed)
if (hash_get (table->hash, key))
continue;
+ vlib_worker_thread_barrier_sync (vm);
hash_set (table->hash, key, peer_pool_idx);
+ vlib_worker_thread_barrier_release (vm);
break;
}
return key;
}
void
-wg_index_table_del (wg_index_table_t * table, u32 key)
+wg_index_table_del (vlib_main_t *vm, wg_index_table_t *table, u32 key)
{
uword *p;
p = hash_get (table->hash, key);
if (p)
- hash_unset (table->hash, key);
+ {
+ vlib_worker_thread_barrier_sync (vm);
+ hash_unset (table->hash, key);
+ vlib_worker_thread_barrier_release (vm);
+ }
}
u32 *
diff --git a/src/plugins/wireguard/wireguard_index_table.h b/src/plugins/wireguard/wireguard_index_table.h
index 67cae1f49d5..e9aa374c0ca 100755..100644
--- a/src/plugins/wireguard/wireguard_index_table.h
+++ b/src/plugins/wireguard/wireguard_index_table.h
@@ -16,6 +16,7 @@
#ifndef __included_wg_index_table_h__
#define __included_wg_index_table_h__
+#include <vlib/vlib.h>
#include <vppinfra/types.h>
typedef struct
@@ -23,9 +24,9 @@ typedef struct
uword *hash;
} wg_index_table_t;
-u32 wg_index_table_add (wg_index_table_t * table, u32 peer_pool_idx,
- u32 rnd_seed);
-void wg_index_table_del (wg_index_table_t * table, u32 key);
+u32 wg_index_table_add (vlib_main_t *vm, wg_index_table_t *table,
+ u32 peer_pool_idx, u32 rnd_seed);
+void wg_index_table_del (vlib_main_t *vm, wg_index_table_t *table, u32 key);
u32 *wg_index_table_lookup (const wg_index_table_t * table, u32 key);
#endif //__included_wg_index_table_h__
diff --git a/src/plugins/wireguard/wireguard_input.c b/src/plugins/wireguard/wireguard_input.c
index 5db814292f8..1eb7fbfed0b 100644
--- a/src/plugins/wireguard/wireguard_input.c
+++ b/src/plugins/wireguard/wireguard_input.c
@@ -25,14 +25,18 @@
#define foreach_wg_input_error \
_ (NONE, "No error") \
_ (HANDSHAKE_MAC, "Invalid MAC handshake") \
+ _ (HANDSHAKE_RATELIMITED, "Handshake ratelimited") \
_ (PEER, "Peer error") \
_ (INTERFACE, "Interface error") \
_ (DECRYPTION, "Failed during decryption") \
_ (KEEPALIVE_SEND, "Failed while sending Keepalive") \
_ (HANDSHAKE_SEND, "Failed while sending Handshake") \
_ (HANDSHAKE_RECEIVE, "Failed while receiving Handshake") \
- _ (TOO_BIG, "Packet too big") \
- _ (UNDEFINED, "Undefined error")
+ _ (COOKIE_DECRYPTION, "Failed during Cookie decryption") \
+ _ (COOKIE_SEND, "Failed during sending Cookie") \
+ _ (NO_BUFFERS, "No buffers") \
+ _ (UNDEFINED, "Undefined error") \
+ _ (CRYPTO_ENGINE_ERROR, "crypto engine error (packet dropped)")
typedef enum
{
@@ -56,6 +60,12 @@ typedef struct
index_t peer;
} wg_input_trace_t;
+typedef struct
+{
+ index_t peer;
+ u16 next;
+} wg_input_post_trace_t;
+
u8 *
format_wg_message_type (u8 * s, va_list * args)
{
@@ -79,11 +89,27 @@ format_wg_input_trace (u8 * s, va_list * args)
wg_input_trace_t *t = va_arg (*args, wg_input_trace_t *);
- s = format (s, "WG input: \n");
- s = format (s, " Type: %U\n", format_wg_message_type, t->type);
- s = format (s, " peer: %d\n", t->peer);
- s = format (s, " Length: %d\n", t->current_length);
- s = format (s, " Keepalive: %s", t->is_keepalive ? "true" : "false");
+ s = format (s, "Wireguard input: \n");
+ s = format (s, " Type: %U\n", format_wg_message_type, t->type);
+ s = format (s, " Peer: %d\n", t->peer);
+ s = format (s, " Length: %d\n", t->current_length);
+ s = format (s, " Keepalive: %s", t->is_keepalive ? "true" : "false");
+
+ return s;
+}
+
+/* post-node packet trace format function */
+static u8 *
+format_wg_input_post_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+
+ wg_input_post_trace_t *t = va_arg (*args, wg_input_post_trace_t *);
+
+ s = format (s, "WG input post: \n");
+ s = format (s, " peer: %u\n", t->peer);
+ s = format (s, " next: %u\n", t->next);
return s;
}
@@ -93,48 +119,52 @@ typedef enum
WG_INPUT_NEXT_HANDOFF_HANDSHAKE,
WG_INPUT_NEXT_HANDOFF_DATA,
WG_INPUT_NEXT_IP4_INPUT,
+ WG_INPUT_NEXT_IP6_INPUT,
WG_INPUT_NEXT_PUNT,
WG_INPUT_NEXT_ERROR,
WG_INPUT_N_NEXT,
} wg_input_next_t;
-/* static void */
-/* set_peer_address (wg_peer_t * peer, ip4_address_t ip4, u16 udp_port) */
-/* { */
-/* if (peer) */
-/* { */
-/* ip46_address_set_ip4 (&peer->dst.addr, &ip4); */
-/* peer->dst.port = udp_port; */
-/* } */
-/* } */
+static u8
+is_ip4_header (u8 *data)
+{
+ return (data[0] >> 4) == 0x4;
+}
static wg_input_error_t
-wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
+wg_handshake_process (vlib_main_t *vm, wg_main_t *wmp, vlib_buffer_t *b,
+ u32 node_idx, u8 is_ip4)
{
ASSERT (vm->thread_index == 0);
enum cookie_mac_state mac_state;
bool packet_needs_cookie;
bool under_load;
+ index_t *wg_ifs;
wg_if_t *wg_if;
wg_peer_t *peer = NULL;
void *current_b_data = vlib_buffer_get_current (b);
+ ip46_address_t src_ip;
+ if (is_ip4)
+ {
+ ip4_header_t *iph4 =
+ current_b_data - sizeof (udp_header_t) - sizeof (ip4_header_t);
+ ip46_address_set_ip4 (&src_ip, &iph4->src_address);
+ }
+ else
+ {
+ ip6_header_t *iph6 =
+ current_b_data - sizeof (udp_header_t) - sizeof (ip6_header_t);
+ ip46_address_set_ip6 (&src_ip, &iph6->src_address);
+ }
+
udp_header_t *uhd = current_b_data - sizeof (udp_header_t);
- ip4_header_t *iph =
- current_b_data - sizeof (udp_header_t) - sizeof (ip4_header_t);
- ip4_address_t ip4_src = iph->src_address;
- u16 udp_src_port = clib_host_to_net_u16 (uhd->src_port);;
- u16 udp_dst_port = clib_host_to_net_u16 (uhd->dst_port);;
+ u16 udp_src_port = clib_host_to_net_u16 (uhd->src_port);
+ u16 udp_dst_port = clib_host_to_net_u16 (uhd->dst_port);
message_header_t *header = current_b_data;
- under_load = false;
-
- wg_if = wg_if_get_by_port (udp_dst_port);
-
- if (NULL == wg_if)
- return WG_INPUT_ERROR_INTERFACE;
if (PREDICT_FALSE (header->type == MESSAGE_HANDSHAKE_COOKIE))
{
@@ -147,7 +177,9 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
else
return WG_INPUT_ERROR_PEER;
- // TODO: Implement cookie_maker_consume_payload
+ if (!cookie_maker_consume_payload (
+ vm, &peer->cookie_maker, packet->nonce, packet->encrypted_cookie))
+ return WG_INPUT_ERROR_COOKIE_DECRYPTION;
return WG_INPUT_ERROR_NONE;
}
@@ -159,16 +191,40 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
message_macs_t *macs = (message_macs_t *)
((u8 *) current_b_data + len - sizeof (*macs));
- mac_state =
- cookie_checker_validate_macs (vm, &wg_if->cookie_checker, macs,
- current_b_data, len, under_load, ip4_src,
- udp_src_port);
+ index_t *ii;
+ wg_ifs = wg_if_indexes_get_by_port (udp_dst_port);
+ if (NULL == wg_ifs)
+ return WG_INPUT_ERROR_INTERFACE;
+
+ vec_foreach (ii, wg_ifs)
+ {
+ wg_if = wg_if_get (*ii);
+ if (NULL == wg_if)
+ continue;
+
+ under_load = wg_if_is_under_load (vm, wg_if);
+ mac_state = cookie_checker_validate_macs (
+ vm, &wg_if->cookie_checker, macs, current_b_data, len, under_load,
+ &src_ip, udp_src_port);
+ if (mac_state == INVALID_MAC)
+ {
+ wg_if_dec_handshake_num (wg_if);
+ wg_if = NULL;
+ continue;
+ }
+ break;
+ }
+
+ if (NULL == wg_if)
+ return WG_INPUT_ERROR_HANDSHAKE_MAC;
if ((under_load && mac_state == VALID_MAC_WITH_COOKIE)
|| (!under_load && mac_state == VALID_MAC_BUT_NO_COOKIE))
packet_needs_cookie = false;
else if (under_load && mac_state == VALID_MAC_BUT_NO_COOKIE)
packet_needs_cookie = true;
+ else if (mac_state == VALID_MAC_WITH_COOKIE_BUT_RATELIMITED)
+ return WG_INPUT_ERROR_HANDSHAKE_RATELIMITED;
else
return WG_INPUT_ERROR_HANDSHAKE_MAC;
@@ -180,8 +236,16 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
if (packet_needs_cookie)
{
- // TODO: Add processing
+
+ if (!wg_send_handshake_cookie (vm, message->sender_index,
+ &wg_if->cookie_checker, macs,
+ &ip_addr_46 (&wg_if->src_ip),
+ wg_if->port, &src_ip, udp_src_port))
+ return WG_INPUT_ERROR_COOKIE_SEND;
+
+ return WG_INPUT_ERROR_NONE;
}
+
noise_remote_t *rp;
if (noise_consume_initiation
(vm, noise_local_get (wg_if->local_idx), &rp,
@@ -195,10 +259,11 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
return WG_INPUT_ERROR_PEER;
}
- // set_peer_address (peer, ip4_src, udp_src_port);
+ wg_peer_update_endpoint (rp->r_peer_idx, &src_ip, udp_src_port);
+
if (PREDICT_FALSE (!wg_send_handshake_response (vm, peer)))
{
- vlib_node_increment_counter (vm, wg_input_node.index,
+ vlib_node_increment_counter (vm, node_idx,
WG_INPUT_ERROR_HANDSHAKE_SEND, 1);
}
break;
@@ -206,13 +271,27 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
case MESSAGE_HANDSHAKE_RESPONSE:
{
message_handshake_response_t *resp = current_b_data;
+
+ if (packet_needs_cookie)
+ {
+ if (!wg_send_handshake_cookie (vm, resp->sender_index,
+ &wg_if->cookie_checker, macs,
+ &ip_addr_46 (&wg_if->src_ip),
+ wg_if->port, &src_ip, udp_src_port))
+ return WG_INPUT_ERROR_COOKIE_SEND;
+
+ return WG_INPUT_ERROR_NONE;
+ }
+
+ index_t peeri = INDEX_INVALID;
u32 *entry =
wg_index_table_lookup (&wmp->index_table, resp->receiver_index);
if (PREDICT_TRUE (entry != NULL))
{
- peer = wg_peer_get (*entry);
- if (peer->is_dead)
+ peeri = *entry;
+ peer = wg_peer_get (peeri);
+ if (wg_peer_is_dead (peer))
return WG_INPUT_ERROR_PEER;
}
else
@@ -225,12 +304,9 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
{
return WG_INPUT_ERROR_PEER;
}
- if (packet_needs_cookie)
- {
- // TODO: Add processing
- }
- // set_peer_address (peer, ip4_src, udp_src_port);
+ wg_peer_update_endpoint (peeri, &src_ip, udp_src_port);
+
if (noise_remote_begin_session (vm, &peer->remote))
{
@@ -238,9 +314,12 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
wg_timers_handshake_complete (peer);
if (PREDICT_FALSE (!wg_send_keepalive (vm, peer)))
{
- vlib_node_increment_counter (vm, wg_input_node.index,
- WG_INPUT_ERROR_KEEPALIVE_SEND,
- 1);
+ vlib_node_increment_counter (vm, node_idx,
+ WG_INPUT_ERROR_KEEPALIVE_SEND, 1);
+ }
+ else
+ {
+ wg_peer_update_flags (peeri, WG_PEER_ESTABLISHED, true);
}
}
break;
@@ -254,68 +333,450 @@ wg_handshake_process (vlib_main_t * vm, wg_main_t * wmp, vlib_buffer_t * b)
return WG_INPUT_ERROR_NONE;
}
-static_always_inline bool
-fib_prefix_is_cover_addr_4 (const fib_prefix_t * p1,
- const ip4_address_t * ip4)
+static_always_inline int
+wg_input_post_process (vlib_main_t *vm, vlib_buffer_t *b, u16 *next,
+ wg_peer_t *peer, message_data_t *data,
+ bool *is_keepalive)
{
- switch (p1->fp_proto)
+ next[0] = WG_INPUT_NEXT_PUNT;
+ noise_keypair_t *kp;
+ vlib_buffer_t *lb;
+
+ if ((kp = wg_get_active_keypair (&peer->remote, data->receiver_index)) ==
+ NULL)
+ return -1;
+
+ if (!noise_counter_recv (&kp->kp_ctr, data->counter))
{
- case FIB_PROTOCOL_IP4:
- return (ip4_destination_matches_route (&ip4_main,
- &p1->fp_addr.ip4,
- ip4, p1->fp_len) != 0);
- case FIB_PROTOCOL_IP6:
- return (false);
- case FIB_PROTOCOL_MPLS:
- break;
+ return -1;
+ }
+
+ lb = b;
+ /* Find last buffer in the chain */
+ while (lb->flags & VLIB_BUFFER_NEXT_PRESENT)
+ lb = vlib_get_buffer (vm, lb->next_buffer);
+
+ u16 encr_len = vlib_buffer_length_in_chain (vm, b) - sizeof (message_data_t);
+ u16 decr_len = encr_len - NOISE_AUTHTAG_LEN;
+
+ vlib_buffer_advance (b, sizeof (message_data_t));
+ vlib_buffer_chain_increase_length (b, lb, -NOISE_AUTHTAG_LEN);
+ vnet_buffer_offload_flags_clear (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM);
+
+ /* Keepalive packet has zero length */
+ if (decr_len == 0)
+ {
+ *is_keepalive = true;
+ return 0;
+ }
+
+ wg_timers_data_received (peer);
+
+ ip46_address_t src_ip;
+ u8 is_ip4_inner = is_ip4_header (vlib_buffer_get_current (b));
+ if (is_ip4_inner)
+ {
+ ip46_address_set_ip4 (
+ &src_ip, &((ip4_header_t *) vlib_buffer_get_current (b))->src_address);
+ }
+ else
+ {
+ ip46_address_set_ip6 (
+ &src_ip, &((ip6_header_t *) vlib_buffer_get_current (b))->src_address);
}
- return (false);
+
+ const fib_prefix_t *allowed_ip;
+ bool allowed = false;
+
+ /*
+ * we could make this into an ACL, but the expectation
+ * is that there aren't many allowed IPs and thus a linear
+ * walk is faster than an ACL
+ */
+ vec_foreach (allowed_ip, peer->allowed_ips)
+ {
+ if (fib_prefix_is_cover_addr_46 (allowed_ip, &src_ip))
+ {
+ allowed = true;
+ break;
+ }
+ }
+ if (allowed)
+ {
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = peer->wg_sw_if_index;
+ next[0] =
+ is_ip4_inner ? WG_INPUT_NEXT_IP4_INPUT : WG_INPUT_NEXT_IP6_INPUT;
+ }
+
+ return 0;
}
-VLIB_NODE_FN (wg_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+static_always_inline void
+wg_input_process_ops (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_crypto_op_t *ops, vlib_buffer_t *b[], u16 *nexts,
+ u16 drop_next)
{
- message_type_t header_type;
- u32 n_left_from;
- u32 *from;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
- u16 nexts[VLIB_FRAME_SIZE], *next;
- u32 thread_index = vm->thread_index;
+ u32 n_fail, n_ops = vec_len (ops);
+ vnet_crypto_op_t *op = ops;
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- b = bufs;
- next = nexts;
+ if (n_ops == 0)
+ return;
- vlib_get_buffers (vm, from, bufs, n_left_from);
+ n_fail = n_ops - vnet_crypto_process_ops (vm, op, n_ops);
+
+ while (n_fail)
+ {
+ ASSERT (op - ops < n_ops);
+
+ if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
+ {
+ u32 bi = op->user_data;
+ b[bi]->error = node->errors[WG_INPUT_ERROR_DECRYPTION];
+ nexts[bi] = drop_next;
+ n_fail--;
+ }
+ op++;
+ }
+}
+
+static_always_inline void
+wg_input_process_chained_ops (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_crypto_op_t *ops, vlib_buffer_t *b[],
+ u16 *nexts, vnet_crypto_op_chunk_t *chunks,
+ u16 drop_next)
+{
+ u32 n_fail, n_ops = vec_len (ops);
+ vnet_crypto_op_t *op = ops;
+ if (n_ops == 0)
+ return;
+
+ n_fail = n_ops - vnet_crypto_process_chained_ops (vm, op, chunks, n_ops);
+
+ while (n_fail)
+ {
+ ASSERT (op - ops < n_ops);
+
+ if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
+ {
+ u32 bi = op->user_data;
+ b[bi]->error = node->errors[WG_INPUT_ERROR_DECRYPTION];
+ nexts[bi] = drop_next;
+ n_fail--;
+ }
+ op++;
+ }
+}
+
+static_always_inline void
+wg_input_chain_crypto (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+ vlib_buffer_t *b, vlib_buffer_t *lb, u8 *start,
+ u32 start_len, u16 *n_ch)
+{
+ vnet_crypto_op_chunk_t *ch;
+ vlib_buffer_t *cb = b;
+ u32 n_chunks = 1;
+
+ vec_add2 (ptd->chunks, ch, 1);
+ ch->len = start_len;
+ ch->src = ch->dst = start;
+ cb = vlib_get_buffer (vm, cb->next_buffer);
+
+ while (1)
+ {
+ vec_add2 (ptd->chunks, ch, 1);
+ n_chunks += 1;
+ if (lb == cb)
+ ch->len = cb->current_length - NOISE_AUTHTAG_LEN;
+ else
+ ch->len = cb->current_length;
+
+ ch->src = ch->dst = vlib_buffer_get_current (cb);
+
+ if (!(cb->flags & VLIB_BUFFER_NEXT_PRESENT))
+ break;
+
+ cb = vlib_get_buffer (vm, cb->next_buffer);
+ }
+
+ if (n_ch)
+ *n_ch = n_chunks;
+}
+
+always_inline void
+wg_prepare_sync_dec_op (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+ vlib_buffer_t *b, vlib_buffer_t *lb,
+ vnet_crypto_op_t **crypto_ops, u8 *src, u32 src_len,
+ u8 *dst, u8 *aad, u32 aad_len,
+ vnet_crypto_key_index_t key_index, u32 bi, u8 *iv)
+{
+ vnet_crypto_op_t _op, *op = &_op;
+ u8 src_[] = {};
+
+ vec_add2_aligned (crypto_ops[0], op, 1, CLIB_CACHE_LINE_BYTES);
+ vnet_crypto_op_init (op, VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC);
+
+ op->tag_len = NOISE_AUTHTAG_LEN;
+ op->tag = vlib_buffer_get_tail (lb) - NOISE_AUTHTAG_LEN;
+ op->key_index = key_index;
+ op->aad = aad;
+ op->aad_len = aad_len;
+ op->iv = iv;
+ op->user_data = bi;
+ op->flags |= VNET_CRYPTO_OP_FLAG_HMAC_CHECK;
+
+ if (b != lb)
+ {
+ /* Chained buffers */
+ op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
+ op->chunk_index = vec_len (ptd->chunks);
+ wg_input_chain_crypto (vm, ptd, b, lb, src, src_len + NOISE_AUTHTAG_LEN,
+ &op->n_chunks);
+ }
+ else
+ {
+ op->src = !src ? src_ : src;
+ op->len = src_len;
+ op->dst = dst;
+ }
+}
+
+static_always_inline void
+wg_input_add_to_frame (vlib_main_t *vm, vnet_crypto_async_frame_t *f,
+ u32 key_index, u32 crypto_len, i16 crypto_start_offset,
+ u32 buffer_index, u16 next_node, u8 *iv, u8 *tag,
+ u8 flags)
+{
+ vnet_crypto_async_frame_elt_t *fe;
+ u16 index;
+
+ ASSERT (f->n_elts < VNET_CRYPTO_FRAME_SIZE);
+
+ index = f->n_elts;
+ fe = &f->elts[index];
+ f->n_elts++;
+ fe->key_index = key_index;
+ fe->crypto_total_length = crypto_len;
+ fe->crypto_start_offset = crypto_start_offset;
+ fe->iv = iv;
+ fe->tag = tag;
+ fe->flags = flags;
+ f->buffer_indices[index] = buffer_index;
+ f->next_node_index[index] = next_node;
+}
+
+static_always_inline enum noise_state_crypt
+wg_input_process (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+ vnet_crypto_op_t **crypto_ops,
+ vnet_crypto_async_frame_t **async_frame, vlib_buffer_t *b,
+ vlib_buffer_t *lb, u32 buf_idx, noise_remote_t *r,
+ uint32_t r_idx, uint64_t nonce, uint8_t *src, size_t srclen,
+ size_t srclen_total, uint8_t *dst, u32 from_idx, u8 *iv,
+ f64 time, u8 is_async, u16 async_next_node)
+{
+ noise_keypair_t *kp;
+ enum noise_state_crypt ret = SC_FAILED;
+
+ if ((kp = wg_get_active_keypair (r, r_idx)) == NULL)
+ {
+ goto error;
+ }
+
+ /* We confirm that our values are within our tolerances. These values
+ * are the same as the encrypt routine.
+ *
+ * kp_ctr isn't locked here, we're happy to accept a racy read. */
+ if (wg_birthdate_has_expired_opt (kp->kp_birthdate, REJECT_AFTER_TIME,
+ time) ||
+ kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES)
+ goto error;
+
+ /* Decrypt, then validate the counter. We don't want to validate the
+ * counter before decrypting as we do not know the message is authentic
+ * prior to decryption. */
+
+ clib_memset (iv, 0, 4);
+ clib_memcpy (iv + 4, &nonce, sizeof (nonce));
+
+ if (is_async)
+ {
+ u8 flags = VNET_CRYPTO_OP_FLAG_HMAC_CHECK;
+ u8 *tag = vlib_buffer_get_tail (lb) - NOISE_AUTHTAG_LEN;
+
+ if (b != lb)
+ flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
+
+ if (NULL == *async_frame ||
+ vnet_crypto_async_frame_is_full (*async_frame))
+ {
+ *async_frame = vnet_crypto_async_get_frame (
+ vm, VNET_CRYPTO_OP_CHACHA20_POLY1305_TAG16_AAD0_DEC);
+ if (PREDICT_FALSE (NULL == *async_frame))
+ goto error;
+ /* Save the frame to the list we'll submit at the end */
+ vec_add1 (ptd->async_frames, *async_frame);
+ }
+
+ wg_input_add_to_frame (vm, *async_frame, kp->kp_recv_index, srclen_total,
+ src - b->data, buf_idx, async_next_node, iv, tag,
+ flags);
+ }
+ else
+ {
+ wg_prepare_sync_dec_op (vm, ptd, b, lb, crypto_ops, src, srclen, dst,
+ NULL, 0, kp->kp_recv_index, from_idx, iv);
+ }
+
+ /* If we've received the handshake confirming data packet then move the
+ * next keypair into current. If we do slide the next keypair in, then
+ * we skip the REKEY_AFTER_TIME_RECV check. This is safe to do as a
+ * data packet can't confirm a session that we are an INITIATOR of. */
+ if (kp == r->r_next)
+ {
+ clib_rwlock_writer_lock (&r->r_keypair_lock);
+ if (kp == r->r_next && kp->kp_local_index == r_idx)
+ {
+ noise_remote_keypair_free (vm, r, &r->r_previous);
+ r->r_previous = r->r_current;
+ r->r_current = r->r_next;
+ r->r_next = NULL;
+
+ ret = SC_CONN_RESET;
+ clib_rwlock_writer_unlock (&r->r_keypair_lock);
+ goto error;
+ }
+ clib_rwlock_writer_unlock (&r->r_keypair_lock);
+ }
+
+ /* Similar to when we encrypt, we want to notify the caller when we
+ * are approaching our tolerances. We notify if:
+ * - we're the initiator and the current keypair is older than
+ * REKEY_AFTER_TIME_RECV seconds. */
+ ret = SC_KEEP_KEY_FRESH;
+ kp = r->r_current;
+ if (kp != NULL && kp->kp_valid && kp->kp_is_initiator &&
+ wg_birthdate_has_expired_opt (kp->kp_birthdate, REKEY_AFTER_TIME_RECV,
+ time))
+ goto error;
+
+ ret = SC_OK;
+error:
+ return ret;
+}
+
+static_always_inline void
+wg_find_outer_addr_port (vlib_buffer_t *b, ip46_address_t *addr, u16 *port,
+ u8 is_ip4)
+{
+ if (is_ip4)
+ {
+ ip4_udp_header_t *ip4_udp_hdr =
+ vlib_buffer_get_current (b) - sizeof (ip4_udp_header_t);
+ ip46_address_set_ip4 (addr, &ip4_udp_hdr->ip4.src_address);
+ *port = clib_net_to_host_u16 (ip4_udp_hdr->udp.src_port);
+ }
+ else
+ {
+ ip6_udp_header_t *ip6_udp_hdr =
+ vlib_buffer_get_current (b) - sizeof (ip6_udp_header_t);
+ ip46_address_set_ip6 (addr, &ip6_udp_hdr->ip6.src_address);
+ *port = clib_net_to_host_u16 (ip6_udp_hdr->udp.src_port);
+ }
+}
+
+always_inline uword
+wg_input_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, u8 is_ip4, u16 async_next_node)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
wg_main_t *wmp = &wg_main;
+ wg_per_thread_data_t *ptd =
+ vec_elt_at_index (wmp->per_thread_data, vm->thread_index);
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from = frame->n_vectors;
+
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ vlib_buffer_t *lb;
+ u32 thread_index = vm->thread_index;
+ vnet_crypto_op_t **crypto_ops;
+ const u16 drop_next = WG_INPUT_NEXT_PUNT;
+ message_type_t header_type;
+ vlib_buffer_t *data_bufs[VLIB_FRAME_SIZE];
+ u32 data_bi[VLIB_FRAME_SIZE]; /* buffer index for data */
+ u32 other_bi[VLIB_FRAME_SIZE]; /* buffer index for drop or handoff */
+ u16 other_nexts[VLIB_FRAME_SIZE], *other_next = other_nexts, n_other = 0;
+ u16 data_nexts[VLIB_FRAME_SIZE], *data_next = data_nexts, n_data = 0;
+ u16 n_async = 0;
+ const u8 is_async = wg_op_mode_is_set_ASYNC ();
+ vnet_crypto_async_frame_t *async_frame = NULL;
+
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+ vec_reset_length (ptd->crypto_ops);
+ vec_reset_length (ptd->chained_crypto_ops);
+ vec_reset_length (ptd->chunks);
+ vec_reset_length (ptd->async_frames);
+
+ f64 time = clib_time_now (&vm->clib_time) + vm->time_offset;
+
wg_peer_t *peer = NULL;
+ u32 *last_peer_time_idx = NULL;
+ u32 last_rec_idx = ~0;
+
+ bool is_keepalive = false;
+ u32 *peer_idx = NULL;
+ index_t peeri = INDEX_INVALID;
while (n_left_from > 0)
{
- bool is_keepalive = false;
- next[0] = WG_INPUT_NEXT_PUNT;
+ if (n_left_from > 2)
+ {
+ u8 *p;
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ p = vlib_buffer_get_current (b[1]);
+ CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (vlib_buffer_get_tail (b[1]), CLIB_CACHE_LINE_BYTES,
+ LOAD);
+ }
+
+ other_next[n_other] = WG_INPUT_NEXT_PUNT;
+ data_nexts[n_data] = WG_INPUT_N_NEXT;
+
header_type =
((message_header_t *) vlib_buffer_get_current (b[0]))->type;
- u32 *peer_idx;
if (PREDICT_TRUE (header_type == MESSAGE_DATA))
{
message_data_t *data = vlib_buffer_get_current (b[0]);
-
+ u8 *iv_data = b[0]->pre_data;
+ u32 buf_idx = from[b - bufs];
+ u32 n_bufs;
peer_idx = wg_index_table_lookup (&wmp->index_table,
data->receiver_index);
- if (peer_idx)
+ if (data->receiver_index != last_rec_idx)
{
- peer = wg_peer_get (*peer_idx);
+ peer_idx = wg_index_table_lookup (&wmp->index_table,
+ data->receiver_index);
+ if (PREDICT_TRUE (peer_idx != NULL))
+ {
+ peeri = *peer_idx;
+ peer = wg_peer_get (peeri);
+ last_rec_idx = data->receiver_index;
+ }
+ else
+ {
+ peer = NULL;
+ last_rec_idx = ~0;
+ }
}
- else
+
+ if (PREDICT_FALSE (!peer_idx))
{
- next[0] = WG_INPUT_NEXT_ERROR;
+ other_next[n_other] = WG_INPUT_NEXT_ERROR;
b[0]->error = node->errors[WG_INPUT_ERROR_PEER];
+ other_bi[n_other] = buf_idx;
+ n_other += 1;
goto out;
}
@@ -330,128 +791,445 @@ VLIB_NODE_FN (wg_input_node) (vlib_main_t * vm,
if (PREDICT_TRUE (thread_index != peer->input_thread_index))
{
- next[0] = WG_INPUT_NEXT_HANDOFF_DATA;
+ other_next[n_other] = WG_INPUT_NEXT_HANDOFF_DATA;
+ other_bi[n_other] = buf_idx;
+ n_other += 1;
goto next;
}
- u16 encr_len = b[0]->current_length - sizeof (message_data_t);
- u16 decr_len = encr_len - NOISE_AUTHTAG_LEN;
- if (PREDICT_FALSE (decr_len >= WG_DEFAULT_DATA_SIZE))
+ lb = b[0];
+ n_bufs = vlib_buffer_chain_linearize (vm, b[0]);
+ if (n_bufs == 0)
{
- b[0]->error = node->errors[WG_INPUT_ERROR_TOO_BIG];
+ other_next[n_other] = WG_INPUT_NEXT_ERROR;
+ b[0]->error = node->errors[WG_INPUT_ERROR_NO_BUFFERS];
+ other_bi[n_other] = buf_idx;
+ n_other += 1;
goto out;
}
- u8 *decr_data = wmp->per_thread_data[thread_index].data;
+ if (n_bufs > 1)
+ {
+ vlib_buffer_t *before_last = b[0];
+
+ /* Find last and before last buffer in the chain */
+ while (lb->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ before_last = lb;
+ lb = vlib_get_buffer (vm, lb->next_buffer);
+ }
+
+ /* Ensure auth tag is contiguous and not splitted into two last
+ * buffers */
+ if (PREDICT_FALSE (lb->current_length < NOISE_AUTHTAG_LEN))
+ {
+ u32 len_diff = NOISE_AUTHTAG_LEN - lb->current_length;
+
+ before_last->current_length -= len_diff;
+ if (before_last == b[0])
+ before_last->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+ vlib_buffer_advance (lb, (signed) -len_diff);
+
+ clib_memcpy_fast (vlib_buffer_get_current (lb),
+ vlib_buffer_get_tail (before_last),
+ len_diff);
+ }
+ }
+
+ u16 encr_len = b[0]->current_length - sizeof (message_data_t);
+ u16 decr_len = encr_len - NOISE_AUTHTAG_LEN;
+ u16 encr_len_total =
+ vlib_buffer_length_in_chain (vm, b[0]) - sizeof (message_data_t);
+ u16 decr_len_total = encr_len_total - NOISE_AUTHTAG_LEN;
+
+ if (lb != b[0])
+ crypto_ops = &ptd->chained_crypto_ops;
+ else
+ crypto_ops = &ptd->crypto_ops;
- enum noise_state_crypt state_cr = noise_remote_decrypt (vm,
- &peer->remote,
- data->receiver_index,
- data->counter,
- data->encrypted_data,
- encr_len,
- decr_data);
+ enum noise_state_crypt state_cr =
+ wg_input_process (vm, ptd, crypto_ops, &async_frame, b[0], lb,
+ buf_idx, &peer->remote, data->receiver_index,
+ data->counter, data->encrypted_data, decr_len,
+ decr_len_total, data->encrypted_data, n_data,
+ iv_data, time, is_async, async_next_node);
- if (PREDICT_FALSE (state_cr == SC_CONN_RESET))
+ if (PREDICT_FALSE (state_cr == SC_FAILED))
{
- wg_timers_handshake_complete (peer);
+ wg_peer_update_flags (*peer_idx, WG_PEER_ESTABLISHED, false);
+ other_next[n_other] = WG_INPUT_NEXT_ERROR;
+ b[0]->error = node->errors[WG_INPUT_ERROR_DECRYPTION];
+ other_bi[n_other] = buf_idx;
+ n_other += 1;
+ goto out;
}
- else if (PREDICT_FALSE (state_cr == SC_KEEP_KEY_FRESH))
+ if (!is_async)
{
- wg_send_handshake_from_mt (*peer_idx, false);
+ data_bufs[n_data] = b[0];
+ data_bi[n_data] = buf_idx;
+ n_data += 1;
}
- else if (PREDICT_FALSE (state_cr == SC_FAILED))
+ else
{
- next[0] = WG_INPUT_NEXT_ERROR;
- b[0]->error = node->errors[WG_INPUT_ERROR_DECRYPTION];
- goto out;
+ n_async += 1;
}
- clib_memcpy (vlib_buffer_get_current (b[0]), decr_data, decr_len);
- b[0]->current_length = decr_len;
- vnet_buffer_offload_flags_clear (b[0],
- VNET_BUFFER_OFFLOAD_F_UDP_CKSUM);
-
- wg_timers_any_authenticated_packet_received (peer);
- wg_timers_any_authenticated_packet_traversal (peer);
-
- /* Keepalive packet has zero length */
- if (decr_len == 0)
+ if (PREDICT_FALSE (state_cr == SC_CONN_RESET))
{
- is_keepalive = true;
- goto out;
+ wg_timers_handshake_complete (peer);
+ goto next;
}
-
- wg_timers_data_received (peer);
-
- ip4_header_t *iph = vlib_buffer_get_current (b[0]);
-
- const wg_peer_allowed_ip_t *allowed_ip;
- bool allowed = false;
-
- /*
- * we could make this into an ACL, but the expectation
- * is that there aren't many allowed IPs and thus a linear
- * walk is fater than an ACL
- */
- vec_foreach (allowed_ip, peer->allowed_ips)
- {
- if (fib_prefix_is_cover_addr_4 (&allowed_ip->prefix,
- &iph->src_address))
- {
- allowed = true;
- break;
- }
- }
- if (allowed)
+ else if (PREDICT_FALSE (state_cr == SC_KEEP_KEY_FRESH))
{
- vnet_buffer (b[0])->sw_if_index[VLIB_RX] = peer->wg_sw_if_index;
- next[0] = WG_INPUT_NEXT_IP4_INPUT;
+ wg_send_handshake_from_mt (peeri, false);
+ goto next;
}
+ else if (PREDICT_TRUE (state_cr == SC_OK))
+ goto next;
}
else
{
- peer_idx = NULL;
-
/* Handshake packets should be processed in main thread */
if (thread_index != 0)
{
- next[0] = WG_INPUT_NEXT_HANDOFF_HANDSHAKE;
+ other_next[n_other] = WG_INPUT_NEXT_HANDOFF_HANDSHAKE;
+ other_bi[n_other] = from[b - bufs];
+ n_other += 1;
goto next;
}
- wg_input_error_t ret = wg_handshake_process (vm, wmp, b[0]);
+ wg_input_error_t ret =
+ wg_handshake_process (vm, wmp, b[0], node->node_index, is_ip4);
if (ret != WG_INPUT_ERROR_NONE)
{
- next[0] = WG_INPUT_NEXT_ERROR;
+ other_next[n_other] = WG_INPUT_NEXT_ERROR;
b[0]->error = node->errors[ret];
+ other_bi[n_other] = from[b - bufs];
+ n_other += 1;
+ }
+ else
+ {
+ other_bi[n_other] = from[b - bufs];
+ n_other += 1;
}
}
out:
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
{
wg_input_trace_t *t = vlib_add_trace (vm, node, b[0], sizeof (*t));
t->type = header_type;
t->current_length = b[0]->current_length;
t->is_keepalive = is_keepalive;
- t->peer = peer_idx ? *peer_idx : INDEX_INVALID;
+ t->peer = peer_idx ? peeri : INDEX_INVALID;
}
+
next:
n_left_from -= 1;
- next += 1;
b += 1;
}
- vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+
+ /* decrypt packets */
+ wg_input_process_ops (vm, node, ptd->crypto_ops, data_bufs, data_nexts,
+ drop_next);
+ wg_input_process_chained_ops (vm, node, ptd->chained_crypto_ops, data_bufs,
+ data_nexts, ptd->chunks, drop_next);
+
+ /* process after decryption */
+ b = data_bufs;
+ n_left_from = n_data;
+ last_rec_idx = ~0;
+ last_peer_time_idx = NULL;
+
+ while (n_left_from > 0)
+ {
+ bool is_keepalive = false;
+ u32 *peer_idx = NULL;
+
+ if (PREDICT_FALSE (data_next[0] == WG_INPUT_NEXT_PUNT))
+ {
+ goto trace;
+ }
+ if (n_left_from > 2)
+ {
+ u8 *p;
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ p = vlib_buffer_get_current (b[1]);
+ CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (vlib_buffer_get_tail (b[1]), CLIB_CACHE_LINE_BYTES,
+ LOAD);
+ }
+
+ message_data_t *data = vlib_buffer_get_current (b[0]);
+ ip46_address_t out_src_ip;
+ u16 out_udp_src_port;
+
+ wg_find_outer_addr_port (b[0], &out_src_ip, &out_udp_src_port, is_ip4);
+
+ if (data->receiver_index != last_rec_idx)
+ {
+ peer_idx =
+ wg_index_table_lookup (&wmp->index_table, data->receiver_index);
+ if (PREDICT_TRUE (peer_idx != NULL))
+ {
+ peeri = *peer_idx;
+ peer = wg_peer_get (peeri);
+ last_rec_idx = data->receiver_index;
+ }
+ else
+ {
+ peer = NULL;
+ last_rec_idx = ~0;
+ }
+ }
+
+ if (PREDICT_TRUE (peer != NULL))
+ {
+ if (PREDICT_FALSE (wg_input_post_process (vm, b[0], data_next, peer,
+ data, &is_keepalive) < 0))
+ goto trace;
+ }
+ else
+ {
+ data_next[0] = WG_INPUT_NEXT_PUNT;
+ goto trace;
+ }
+
+ if (PREDICT_FALSE (peer_idx && (last_peer_time_idx != peer_idx)))
+ {
+ if (PREDICT_FALSE (
+ !ip46_address_is_equal (&peer->dst.addr, &out_src_ip) ||
+ peer->dst.port != out_udp_src_port))
+ wg_peer_update_endpoint_from_mt (peeri, &out_src_ip,
+ out_udp_src_port);
+ wg_timers_any_authenticated_packet_received_opt (peer, time);
+ wg_timers_any_authenticated_packet_traversal (peer);
+ wg_peer_update_flags (*peer_idx, WG_PEER_ESTABLISHED, true);
+ last_peer_time_idx = peer_idx;
+ }
+
+ vlib_increment_combined_counter (im->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_RX,
+ vm->thread_index, peer->wg_sw_if_index,
+ 1 /* packets */, b[0]->current_length);
+
+ trace:
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ wg_input_trace_t *t = vlib_add_trace (vm, node, b[0], sizeof (*t));
+ t->type = header_type;
+ t->current_length = b[0]->current_length;
+ t->is_keepalive = is_keepalive;
+ t->peer = peer_idx ? peeri : INDEX_INVALID;
+ }
+
+ b += 1;
+ n_left_from -= 1;
+ data_next += 1;
+ }
+
+ if (n_async)
+ {
+ /* submit all of the open frames */
+ vnet_crypto_async_frame_t **async_frame;
+ vec_foreach (async_frame, ptd->async_frames)
+ {
+ if (PREDICT_FALSE (
+ vnet_crypto_async_submit_open_frame (vm, *async_frame) < 0))
+ {
+ u32 n_drop = (*async_frame)->n_elts;
+ u32 *bi = (*async_frame)->buffer_indices;
+ u16 index = n_other;
+ while (n_drop--)
+ {
+ other_bi[index] = bi[0];
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi[0]);
+ other_nexts[index] = drop_next;
+ b->error = node->errors[WG_INPUT_ERROR_CRYPTO_ENGINE_ERROR];
+ bi++;
+ index++;
+ }
+ n_other += (*async_frame)->n_elts;
+
+ vnet_crypto_async_reset_frame (*async_frame);
+ vnet_crypto_async_free_frame (vm, *async_frame);
+ }
+ }
+ }
+
+ /* enqueue other bufs */
+ if (n_other)
+ vlib_buffer_enqueue_to_next (vm, node, other_bi, other_next, n_other);
+
+ /* enqueue data bufs */
+ if (n_data)
+ vlib_buffer_enqueue_to_next (vm, node, data_bi, data_nexts, n_data);
return frame->n_vectors;
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (wg_input_node) =
+always_inline uword
+wg_input_post (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame,
+ u8 is_ip4)
{
- .name = "wg-input",
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ wg_main_t *wmp = &wg_main;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left = frame->n_vectors;
+ wg_peer_t *peer = NULL;
+ u32 *peer_idx = NULL;
+ u32 *last_peer_time_idx = NULL;
+ index_t peeri = INDEX_INVALID;
+ u32 last_rec_idx = ~0;
+ f64 time = clib_time_now (&vm->clib_time) + vm->time_offset;
+
+ vlib_get_buffers (vm, from, b, n_left);
+
+ if (n_left >= 2)
+ {
+ vlib_prefetch_buffer_header (b[0], LOAD);
+ vlib_prefetch_buffer_header (b[1], LOAD);
+ }
+
+ while (n_left > 0)
+ {
+ if (n_left > 2)
+ {
+ u8 *p;
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ p = vlib_buffer_get_current (b[1]);
+ CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
+ }
+
+ bool is_keepalive = false;
+ message_data_t *data = vlib_buffer_get_current (b[0]);
+ ip46_address_t out_src_ip;
+ u16 out_udp_src_port;
+
+ wg_find_outer_addr_port (b[0], &out_src_ip, &out_udp_src_port, is_ip4);
+
+ if (data->receiver_index != last_rec_idx)
+ {
+ peer_idx =
+ wg_index_table_lookup (&wmp->index_table, data->receiver_index);
+
+ if (PREDICT_TRUE (peer_idx != NULL))
+ {
+ peeri = *peer_idx;
+ peer = wg_peer_get (peeri);
+ last_rec_idx = data->receiver_index;
+ }
+ else
+ {
+ peer = NULL;
+ last_rec_idx = ~0;
+ }
+ }
+
+ if (PREDICT_TRUE (peer != NULL))
+ {
+ if (PREDICT_FALSE (wg_input_post_process (vm, b[0], next, peer, data,
+ &is_keepalive) < 0))
+ goto trace;
+ }
+ else
+ {
+ next[0] = WG_INPUT_NEXT_PUNT;
+ goto trace;
+ }
+
+ if (PREDICT_FALSE (peer_idx && (last_peer_time_idx != peer_idx)))
+ {
+ if (PREDICT_FALSE (
+ !ip46_address_is_equal (&peer->dst.addr, &out_src_ip) ||
+ peer->dst.port != out_udp_src_port))
+ wg_peer_update_endpoint_from_mt (peeri, &out_src_ip,
+ out_udp_src_port);
+ wg_timers_any_authenticated_packet_received_opt (peer, time);
+ wg_timers_any_authenticated_packet_traversal (peer);
+ wg_peer_update_flags (*peer_idx, WG_PEER_ESTABLISHED, true);
+ last_peer_time_idx = peer_idx;
+ }
+
+ vlib_increment_combined_counter (im->combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_RX,
+ vm->thread_index, peer->wg_sw_if_index,
+ 1 /* packets */, b[0]->current_length);
+
+ trace:
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ wg_input_post_trace_t *t =
+ vlib_add_trace (vm, node, b[0], sizeof (*t));
+ t->next = next[0];
+ t->peer = peer_idx ? peeri : INDEX_INVALID;
+ }
+
+ b += 1;
+ next += 1;
+ n_left -= 1;
+ }
+
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (wg4_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return wg_input_inline (vm, node, frame, /* is_ip4 */ 1,
+ wg_decrypt_async_next.wg4_post_next);
+}
+
+VLIB_NODE_FN (wg6_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return wg_input_inline (vm, node, frame, /* is_ip4 */ 0,
+ wg_decrypt_async_next.wg6_post_next);
+}
+
+VLIB_NODE_FN (wg4_input_post_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return wg_input_post (vm, node, from_frame, /* is_ip4 */ 1);
+}
+
+VLIB_NODE_FN (wg6_input_post_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return wg_input_post (vm, node, from_frame, /* is_ip4 */ 0);
+}
+
+VLIB_REGISTER_NODE (wg4_input_node) =
+{
+ .name = "wg4-input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_wg_input_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (wg_input_error_strings),
+ .error_strings = wg_input_error_strings,
+ .n_next_nodes = WG_INPUT_N_NEXT,
+ /* edit / add dispositions here */
+ .next_nodes = {
+ [WG_INPUT_NEXT_HANDOFF_HANDSHAKE] = "wg4-handshake-handoff",
+ [WG_INPUT_NEXT_HANDOFF_DATA] = "wg4-input-data-handoff",
+ [WG_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+ [WG_INPUT_NEXT_IP6_INPUT] = "ip6-input",
+ [WG_INPUT_NEXT_PUNT] = "error-punt",
+ [WG_INPUT_NEXT_ERROR] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (wg6_input_node) =
+{
+ .name = "wg6-input",
.vector_size = sizeof (u32),
.format_trace = format_wg_input_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
@@ -460,14 +1238,37 @@ VLIB_REGISTER_NODE (wg_input_node) =
.n_next_nodes = WG_INPUT_N_NEXT,
/* edit / add dispositions here */
.next_nodes = {
- [WG_INPUT_NEXT_HANDOFF_HANDSHAKE] = "wg-handshake-handoff",
- [WG_INPUT_NEXT_HANDOFF_DATA] = "wg-input-data-handoff",
+ [WG_INPUT_NEXT_HANDOFF_HANDSHAKE] = "wg6-handshake-handoff",
+ [WG_INPUT_NEXT_HANDOFF_DATA] = "wg6-input-data-handoff",
[WG_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum",
+ [WG_INPUT_NEXT_IP6_INPUT] = "ip6-input",
[WG_INPUT_NEXT_PUNT] = "error-punt",
[WG_INPUT_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
+
+VLIB_REGISTER_NODE (wg4_input_post_node) = {
+ .name = "wg4-input-post-node",
+ .vector_size = sizeof (u32),
+ .format_trace = format_wg_input_post_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .sibling_of = "wg4-input",
+
+ .n_errors = ARRAY_LEN (wg_input_error_strings),
+ .error_strings = wg_input_error_strings,
+};
+
+VLIB_REGISTER_NODE (wg6_input_post_node) = {
+ .name = "wg6-input-post-node",
+ .vector_size = sizeof (u32),
+ .format_trace = format_wg_input_post_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .sibling_of = "wg6-input",
+
+ .n_errors = ARRAY_LEN (wg_input_error_strings),
+ .error_strings = wg_input_error_strings,
+};
+
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/wireguard/wireguard_key.c b/src/plugins/wireguard/wireguard_key.c
index 1ef1d8bf743..1ef1d8bf743 100755..100644
--- a/src/plugins/wireguard/wireguard_key.c
+++ b/src/plugins/wireguard/wireguard_key.c
diff --git a/src/plugins/wireguard/wireguard_key.h b/src/plugins/wireguard/wireguard_key.h
index ed96fb1da91..ed96fb1da91 100755..100644
--- a/src/plugins/wireguard/wireguard_key.h
+++ b/src/plugins/wireguard/wireguard_key.h
diff --git a/src/plugins/wireguard/wireguard_messages.h b/src/plugins/wireguard/wireguard_messages.h
index 3587c5c8a45..3587c5c8a45 100755..100644
--- a/src/plugins/wireguard/wireguard_messages.h
+++ b/src/plugins/wireguard/wireguard_messages.h
diff --git a/src/plugins/wireguard/wireguard_noise.c b/src/plugins/wireguard/wireguard_noise.c
index 850be2c86c8..5fe2e44b03b 100755..100644
--- a/src/plugins/wireguard/wireguard_noise.c
+++ b/src/plugins/wireguard/wireguard_noise.c
@@ -17,6 +17,7 @@
#include <openssl/hmac.h>
#include <wireguard/wireguard.h>
+#include <wireguard/wireguard_chachapoly.h>
/* This implements Noise_IKpsk2:
*
@@ -32,11 +33,13 @@ noise_local_t *noise_local_pool;
static noise_keypair_t *noise_remote_keypair_allocate (noise_remote_t *);
static void noise_remote_keypair_free (vlib_main_t * vm, noise_remote_t *,
noise_keypair_t **);
-static uint32_t noise_remote_handshake_index_get (noise_remote_t *);
-static void noise_remote_handshake_index_drop (noise_remote_t *);
+static uint32_t noise_remote_handshake_index_get (vlib_main_t *vm,
+ noise_remote_t *);
+static void noise_remote_handshake_index_drop (vlib_main_t *vm,
+ noise_remote_t *);
static uint64_t noise_counter_send (noise_counter_t *);
-static bool noise_counter_recv (noise_counter_t *, uint64_t);
+bool noise_counter_recv (noise_counter_t *, uint64_t);
static void noise_kdf (uint8_t *, uint8_t *, uint8_t *, const uint8_t *,
size_t, size_t, size_t, size_t,
@@ -67,8 +70,6 @@ static void noise_msg_ephemeral (uint8_t[NOISE_HASH_LEN],
static void noise_tai64n_now (uint8_t[NOISE_TIMESTAMP_LEN]);
-static void secure_zero_memory (void *v, size_t n);
-
/* Set/Get noise parameters */
void
noise_local_init (noise_local_t * l, struct noise_upcall *upcall)
@@ -87,7 +88,7 @@ noise_local_set_private (noise_local_t * l,
}
void
-noise_remote_init (noise_remote_t * r, uint32_t peer_pool_idx,
+noise_remote_init (vlib_main_t *vm, noise_remote_t *r, uint32_t peer_pool_idx,
const uint8_t public[NOISE_PUBLIC_KEY_LEN],
u32 noise_local_idx)
{
@@ -98,19 +99,19 @@ noise_remote_init (noise_remote_t * r, uint32_t peer_pool_idx,
r->r_local_idx = noise_local_idx;
r->r_handshake.hs_state = HS_ZEROED;
- noise_remote_precompute (r);
+ noise_remote_precompute (vm, r);
}
void
-noise_remote_precompute (noise_remote_t * r)
+noise_remote_precompute (vlib_main_t *vm, noise_remote_t *r)
{
noise_local_t *l = noise_local_get (r->r_local_idx);
if (!curve25519_gen_shared (r->r_ss, l->l_private, r->r_public))
clib_memset (r->r_ss, 0, NOISE_PUBLIC_KEY_LEN);
- noise_remote_handshake_index_drop (r);
- secure_zero_memory (&r->r_handshake, sizeof (r->r_handshake));
+ noise_remote_handshake_index_drop (vm, r);
+ wg_secure_zero_memory (&r->r_handshake, sizeof (r->r_handshake));
}
/* Handshake functions */
@@ -122,7 +123,7 @@ noise_create_initiation (vlib_main_t * vm, noise_remote_t * r,
{
noise_handshake_t *hs = &r->r_handshake;
noise_local_t *l = noise_local_get (r->r_local_idx);
- uint8_t _key[NOISE_SYMMETRIC_KEY_LEN];
+ uint8_t _key[NOISE_SYMMETRIC_KEY_LEN] = { 0 };
uint32_t key_idx;
uint8_t *key;
int ret = false;
@@ -143,6 +144,7 @@ noise_create_initiation (vlib_main_t * vm, noise_remote_t * r,
/* es */
if (!noise_mix_dh (hs->hs_ck, key, hs->hs_e, r->r_public))
goto error;
+ vnet_crypto_key_update (vm, key_idx);
/* s */
noise_msg_encrypt (vm, es, l->l_public, NOISE_PUBLIC_KEY_LEN, key_idx,
@@ -151,17 +153,18 @@ noise_create_initiation (vlib_main_t * vm, noise_remote_t * r,
/* ss */
if (!noise_mix_ss (hs->hs_ck, key, r->r_ss))
goto error;
+ vnet_crypto_key_update (vm, key_idx);
/* {t} */
noise_tai64n_now (ets);
noise_msg_encrypt (vm, ets, ets, NOISE_TIMESTAMP_LEN, key_idx, hs->hs_hash);
- noise_remote_handshake_index_drop (r);
+ noise_remote_handshake_index_drop (vm, r);
hs->hs_state = CREATED_INITIATION;
- hs->hs_local_index = noise_remote_handshake_index_get (r);
+ hs->hs_local_index = noise_remote_handshake_index_get (vm, r);
*s_idx = hs->hs_local_index;
ret = true;
error:
- secure_zero_memory (key, NOISE_SYMMETRIC_KEY_LEN);
+ wg_secure_zero_memory (key, NOISE_SYMMETRIC_KEY_LEN);
vnet_crypto_key_del (vm, key_idx);
return ret;
}
@@ -177,9 +180,9 @@ noise_consume_initiation (vlib_main_t * vm, noise_local_t * l,
{
noise_remote_t *r;
noise_handshake_t hs;
- uint8_t _key[NOISE_SYMMETRIC_KEY_LEN];
- uint8_t r_public[NOISE_PUBLIC_KEY_LEN];
- uint8_t timestamp[NOISE_TIMESTAMP_LEN];
+ uint8_t _key[NOISE_SYMMETRIC_KEY_LEN] = { 0 };
+ uint8_t r_public[NOISE_PUBLIC_KEY_LEN] = { 0 };
+ uint8_t timestamp[NOISE_TIMESTAMP_LEN] = { 0 };
u32 key_idx;
uint8_t *key;
int ret = false;
@@ -197,6 +200,7 @@ noise_consume_initiation (vlib_main_t * vm, noise_local_t * l,
/* es */
if (!noise_mix_dh (hs.hs_ck, key, l->l_private, ue))
goto error;
+ vnet_crypto_key_update (vm, key_idx);
/* s */
@@ -212,6 +216,7 @@ noise_consume_initiation (vlib_main_t * vm, noise_local_t * l,
/* ss */
if (!noise_mix_ss (hs.hs_ck, key, r->r_ss))
goto error;
+ vnet_crypto_key_update (vm, key_idx);
/* {t} */
if (!noise_msg_decrypt (vm, timestamp, ets,
@@ -238,15 +243,15 @@ noise_consume_initiation (vlib_main_t * vm, noise_local_t * l,
goto error;
/* Ok, we're happy to accept this initiation now */
- noise_remote_handshake_index_drop (r);
+ noise_remote_handshake_index_drop (vm, r);
r->r_handshake = hs;
*rp = r;
ret = true;
error:
- secure_zero_memory (key, NOISE_SYMMETRIC_KEY_LEN);
+ wg_secure_zero_memory (key, NOISE_SYMMETRIC_KEY_LEN);
vnet_crypto_key_del (vm, key_idx);
- secure_zero_memory (&hs, sizeof (hs));
+ wg_secure_zero_memory (&hs, sizeof (hs));
return ret;
}
@@ -256,8 +261,8 @@ noise_create_response (vlib_main_t * vm, noise_remote_t * r, uint32_t * s_idx,
uint8_t en[0 + NOISE_AUTHTAG_LEN])
{
noise_handshake_t *hs = &r->r_handshake;
- uint8_t _key[NOISE_SYMMETRIC_KEY_LEN];
- uint8_t e[NOISE_PUBLIC_KEY_LEN];
+ uint8_t _key[NOISE_SYMMETRIC_KEY_LEN] = { 0 };
+ uint8_t e[NOISE_PUBLIC_KEY_LEN] = { 0 };
uint32_t key_idx;
uint8_t *key;
int ret = false;
@@ -286,20 +291,21 @@ noise_create_response (vlib_main_t * vm, noise_remote_t * r, uint32_t * s_idx,
/* psk */
noise_mix_psk (hs->hs_ck, hs->hs_hash, key, r->r_psk);
+ vnet_crypto_key_update (vm, key_idx);
/* {} */
noise_msg_encrypt (vm, en, NULL, 0, key_idx, hs->hs_hash);
hs->hs_state = CREATED_RESPONSE;
- hs->hs_local_index = noise_remote_handshake_index_get (r);
+ hs->hs_local_index = noise_remote_handshake_index_get (vm, r);
*r_idx = hs->hs_remote_index;
*s_idx = hs->hs_local_index;
ret = true;
error:
- secure_zero_memory (key, NOISE_SYMMETRIC_KEY_LEN);
+ wg_secure_zero_memory (key, NOISE_SYMMETRIC_KEY_LEN);
vnet_crypto_key_del (vm, key_idx);
- secure_zero_memory (e, NOISE_PUBLIC_KEY_LEN);
+ wg_secure_zero_memory (e, NOISE_PUBLIC_KEY_LEN);
return ret;
}
@@ -310,8 +316,8 @@ noise_consume_response (vlib_main_t * vm, noise_remote_t * r, uint32_t s_idx,
{
noise_local_t *l = noise_local_get (r->r_local_idx);
noise_handshake_t hs;
- uint8_t _key[NOISE_SYMMETRIC_KEY_LEN];
- uint8_t preshared_key[NOISE_PUBLIC_KEY_LEN];
+ uint8_t _key[NOISE_SYMMETRIC_KEY_LEN] = { 0 };
+ uint8_t preshared_key[NOISE_PUBLIC_KEY_LEN] = { 0 };
uint32_t key_idx;
uint8_t *key;
int ret = false;
@@ -340,6 +346,7 @@ noise_consume_response (vlib_main_t * vm, noise_remote_t * r, uint32_t s_idx,
/* psk */
noise_mix_psk (hs.hs_ck, hs.hs_hash, key, preshared_key);
+ vnet_crypto_key_update (vm, key_idx);
/* {} */
@@ -358,8 +365,8 @@ noise_consume_response (vlib_main_t * vm, noise_remote_t * r, uint32_t s_idx,
ret = true;
}
error:
- secure_zero_memory (&hs, sizeof (hs));
- secure_zero_memory (key, NOISE_SYMMETRIC_KEY_LEN);
+ wg_secure_zero_memory (&hs, sizeof (hs));
+ wg_secure_zero_memory (key, NOISE_SYMMETRIC_KEY_LEN);
vnet_crypto_key_del (vm, key_idx);
return ret;
}
@@ -407,6 +414,8 @@ noise_remote_begin_session (vlib_main_t * vm, noise_remote_t * r)
/* Now we need to add_new_keypair */
clib_rwlock_writer_lock (&r->r_keypair_lock);
+ /* Activate barrier to synchronization keys between threads */
+ vlib_worker_thread_barrier_sync (vm);
next = r->r_next;
current = r->r_current;
previous = r->r_previous;
@@ -438,19 +447,20 @@ noise_remote_begin_session (vlib_main_t * vm, noise_remote_t * r)
r->r_next = noise_remote_keypair_allocate (r);
*r->r_next = kp;
}
+ vlib_worker_thread_barrier_release (vm);
clib_rwlock_writer_unlock (&r->r_keypair_lock);
- secure_zero_memory (&r->r_handshake, sizeof (r->r_handshake));
+ wg_secure_zero_memory (&r->r_handshake, sizeof (r->r_handshake));
- secure_zero_memory (&kp, sizeof (kp));
+ wg_secure_zero_memory (&kp, sizeof (kp));
return true;
}
void
noise_remote_clear (vlib_main_t * vm, noise_remote_t * r)
{
- noise_remote_handshake_index_drop (r);
- secure_zero_memory (&r->r_handshake, sizeof (r->r_handshake));
+ noise_remote_handshake_index_drop (vm, r);
+ wg_secure_zero_memory (&r->r_handshake, sizeof (r->r_handshake));
clib_rwlock_writer_lock (&r->r_keypair_lock);
noise_remote_keypair_free (vm, r, &r->r_next);
@@ -492,54 +502,6 @@ noise_remote_ready (noise_remote_t * r)
return ret;
}
-static bool
-chacha20poly1305_calc (vlib_main_t * vm,
- u8 * src,
- u32 src_len,
- u8 * dst,
- u8 * aad,
- u32 aad_len,
- u64 nonce,
- vnet_crypto_op_id_t op_id,
- vnet_crypto_key_index_t key_index)
-{
- vnet_crypto_op_t _op, *op = &_op;
- u8 iv[12];
- u8 tag_[NOISE_AUTHTAG_LEN] = { };
- u8 src_[] = { };
-
- clib_memset (iv, 0, 12);
- clib_memcpy (iv + 4, &nonce, sizeof (nonce));
-
- vnet_crypto_op_init (op, op_id);
-
- op->tag_len = NOISE_AUTHTAG_LEN;
- if (op_id == VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC)
- {
- op->tag = src + src_len - NOISE_AUTHTAG_LEN;
- src_len -= NOISE_AUTHTAG_LEN;
- }
- else
- op->tag = tag_;
-
- op->src = !src ? src_ : src;
- op->len = src_len;
-
- op->dst = dst;
- op->key_index = key_index;
- op->aad = aad;
- op->aad_len = aad_len;
- op->iv = iv;
-
- vnet_crypto_process_ops (vm, op, 1);
- if (op_id == VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC)
- {
- clib_memcpy (dst + src_len, op->tag, NOISE_AUTHTAG_LEN);
- }
-
- return (op->status == VNET_CRYPTO_OP_STATUS_COMPLETED);
-}
-
enum noise_state_crypt
noise_remote_encrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t * r_idx,
uint64_t * nonce, uint8_t * src, size_t srclen,
@@ -548,7 +510,6 @@ noise_remote_encrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t * r_idx,
noise_keypair_t *kp;
enum noise_state_crypt ret = SC_FAILED;
- clib_rwlock_reader_lock (&r->r_keypair_lock);
if ((kp = r->r_current) == NULL)
goto error;
@@ -569,9 +530,9 @@ noise_remote_encrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t * r_idx,
* are passed back out to the caller through the provided data pointer. */
*r_idx = kp->kp_remote_index;
- chacha20poly1305_calc (vm, src, srclen, dst, NULL, 0, *nonce,
- VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC,
- kp->kp_send_index);
+ wg_chacha20poly1305_calc (vm, src, srclen, dst, NULL, 0, *nonce,
+ VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC,
+ kp->kp_send_index);
/* If our values are still within tolerances, but we are approaching
* the tolerances, we notify the caller with ESTALE that they should
@@ -588,94 +549,6 @@ noise_remote_encrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t * r_idx,
ret = SC_OK;
error:
- clib_rwlock_reader_unlock (&r->r_keypair_lock);
- return ret;
-}
-
-enum noise_state_crypt
-noise_remote_decrypt (vlib_main_t * vm, noise_remote_t * r, uint32_t r_idx,
- uint64_t nonce, uint8_t * src, size_t srclen,
- uint8_t * dst)
-{
- noise_keypair_t *kp;
- enum noise_state_crypt ret = SC_FAILED;
- clib_rwlock_reader_lock (&r->r_keypair_lock);
-
- if (r->r_current != NULL && r->r_current->kp_local_index == r_idx)
- {
- kp = r->r_current;
- }
- else if (r->r_previous != NULL && r->r_previous->kp_local_index == r_idx)
- {
- kp = r->r_previous;
- }
- else if (r->r_next != NULL && r->r_next->kp_local_index == r_idx)
- {
- kp = r->r_next;
- }
- else
- {
- goto error;
- }
-
- /* We confirm that our values are within our tolerances. These values
- * are the same as the encrypt routine.
- *
- * kp_ctr isn't locked here, we're happy to accept a racy read. */
- if (wg_birthdate_has_expired (kp->kp_birthdate, REJECT_AFTER_TIME) ||
- kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES)
- goto error;
-
- /* Decrypt, then validate the counter. We don't want to validate the
- * counter before decrypting as we do not know the message is authentic
- * prior to decryption. */
- if (!chacha20poly1305_calc (vm, src, srclen, dst, NULL, 0, nonce,
- VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC,
- kp->kp_recv_index))
- goto error;
-
- if (!noise_counter_recv (&kp->kp_ctr, nonce))
- goto error;
-
- /* If we've received the handshake confirming data packet then move the
- * next keypair into current. If we do slide the next keypair in, then
- * we skip the REKEY_AFTER_TIME_RECV check. This is safe to do as a
- * data packet can't confirm a session that we are an INITIATOR of. */
- if (kp == r->r_next)
- {
- clib_rwlock_reader_unlock (&r->r_keypair_lock);
- clib_rwlock_writer_lock (&r->r_keypair_lock);
- if (kp == r->r_next && kp->kp_local_index == r_idx)
- {
- noise_remote_keypair_free (vm, r, &r->r_previous);
- r->r_previous = r->r_current;
- r->r_current = r->r_next;
- r->r_next = NULL;
-
- ret = SC_CONN_RESET;
- clib_rwlock_writer_unlock (&r->r_keypair_lock);
- clib_rwlock_reader_lock (&r->r_keypair_lock);
- goto error;
- }
- clib_rwlock_writer_unlock (&r->r_keypair_lock);
- clib_rwlock_reader_lock (&r->r_keypair_lock);
- }
-
- /* Similar to when we encrypt, we want to notify the caller when we
- * are approaching our tolerances. We notify if:
- * - we're the initiator and the current keypair is older than
- * REKEY_AFTER_TIME_RECV seconds. */
- ret = SC_KEEP_KEY_FRESH;
- kp = r->r_current;
- if (kp != NULL &&
- kp->kp_valid &&
- kp->kp_is_initiator &&
- wg_birthdate_has_expired (kp->kp_birthdate, REKEY_AFTER_TIME_RECV))
- goto error;
-
- ret = SC_OK;
-error:
- clib_rwlock_reader_unlock (&r->r_keypair_lock);
return ret;
}
@@ -689,86 +562,22 @@ noise_remote_keypair_allocate (noise_remote_t * r)
return kp;
}
-static void
-noise_remote_keypair_free (vlib_main_t * vm, noise_remote_t * r,
- noise_keypair_t ** kp)
-{
- noise_local_t *local = noise_local_get (r->r_local_idx);
- struct noise_upcall *u = &local->l_upcall;
- if (*kp)
- {
- u->u_index_drop ((*kp)->kp_local_index);
- vnet_crypto_key_del (vm, (*kp)->kp_send_index);
- vnet_crypto_key_del (vm, (*kp)->kp_recv_index);
- clib_mem_free (*kp);
- }
-}
-
static uint32_t
-noise_remote_handshake_index_get (noise_remote_t * r)
+noise_remote_handshake_index_get (vlib_main_t *vm, noise_remote_t *r)
{
noise_local_t *local = noise_local_get (r->r_local_idx);
struct noise_upcall *u = &local->l_upcall;
- return u->u_index_set (r);
+ return u->u_index_set (vm, r);
}
static void
-noise_remote_handshake_index_drop (noise_remote_t * r)
+noise_remote_handshake_index_drop (vlib_main_t *vm, noise_remote_t *r)
{
noise_handshake_t *hs = &r->r_handshake;
noise_local_t *local = noise_local_get (r->r_local_idx);
struct noise_upcall *u = &local->l_upcall;
if (hs->hs_state != HS_ZEROED)
- u->u_index_drop (hs->hs_local_index);
-}
-
-static uint64_t
-noise_counter_send (noise_counter_t * ctr)
-{
- uint64_t ret;
- ret = ctr->c_send++;
- return ret;
-}
-
-static bool
-noise_counter_recv (noise_counter_t * ctr, uint64_t recv)
-{
- uint64_t i, top, index_recv, index_ctr;
- unsigned long bit;
- bool ret = false;
-
- /* Check that the recv counter is valid */
- if (ctr->c_recv >= REJECT_AFTER_MESSAGES || recv >= REJECT_AFTER_MESSAGES)
- goto error;
-
- /* If the packet is out of the window, invalid */
- if (recv + COUNTER_WINDOW_SIZE < ctr->c_recv)
- goto error;
-
- /* If the new counter is ahead of the current counter, we'll need to
- * zero out the bitmap that has previously been used */
- index_recv = recv / COUNTER_BITS;
- index_ctr = ctr->c_recv / COUNTER_BITS;
-
- if (recv > ctr->c_recv)
- {
- top = clib_min (index_recv - index_ctr, COUNTER_NUM);
- for (i = 1; i <= top; i++)
- ctr->c_backtrack[(i + index_ctr) & (COUNTER_NUM - 1)] = 0;
- ctr->c_recv = recv;
- }
-
- index_recv %= COUNTER_NUM;
- bit = 1ul << (recv % COUNTER_BITS);
-
- if (ctr->c_backtrack[index_recv] & bit)
- goto error;
-
- ctr->c_backtrack[index_recv] |= bit;
-
- ret = true;
-error:
- return ret;
+ u->u_index_drop (vm, hs->hs_local_index);
}
static void
@@ -815,8 +624,8 @@ noise_kdf (uint8_t * a, uint8_t * b, uint8_t * c, const uint8_t * x,
out:
/* Clear sensitive data from stack */
- secure_zero_memory (sec, BLAKE2S_HASH_SIZE);
- secure_zero_memory (out, BLAKE2S_HASH_SIZE + 1);
+ wg_secure_zero_memory (sec, BLAKE2S_HASH_SIZE);
+ wg_secure_zero_memory (out, BLAKE2S_HASH_SIZE + 1);
}
static bool
@@ -831,7 +640,7 @@ noise_mix_dh (uint8_t ck[NOISE_HASH_LEN],
noise_kdf (ck, key, NULL, dh,
NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
ck);
- secure_zero_memory (dh, NOISE_PUBLIC_KEY_LEN);
+ wg_secure_zero_memory (dh, NOISE_PUBLIC_KEY_LEN);
return true;
}
@@ -872,7 +681,7 @@ noise_mix_psk (uint8_t ck[NOISE_HASH_LEN], uint8_t hash[NOISE_HASH_LEN],
NOISE_HASH_LEN, NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN,
NOISE_SYMMETRIC_KEY_LEN, ck);
noise_mix_hash (hash, tmp, NOISE_HASH_LEN);
- secure_zero_memory (tmp, NOISE_HASH_LEN);
+ wg_secure_zero_memory (tmp, NOISE_HASH_LEN);
}
static void
@@ -899,8 +708,8 @@ noise_msg_encrypt (vlib_main_t * vm, uint8_t * dst, uint8_t * src,
uint8_t hash[NOISE_HASH_LEN])
{
/* Nonce always zero for Noise_IK */
- chacha20poly1305_calc (vm, src, src_len, dst, hash, NOISE_HASH_LEN, 0,
- VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC, key_idx);
+ wg_chacha20poly1305_calc (vm, src, src_len, dst, hash, NOISE_HASH_LEN, 0,
+ VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC, key_idx);
noise_mix_hash (hash, dst, src_len + NOISE_AUTHTAG_LEN);
}
@@ -910,8 +719,9 @@ noise_msg_decrypt (vlib_main_t * vm, uint8_t * dst, uint8_t * src,
uint8_t hash[NOISE_HASH_LEN])
{
/* Nonce always zero for Noise_IK */
- if (!chacha20poly1305_calc (vm, src, src_len, dst, hash, NOISE_HASH_LEN, 0,
- VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC, key_idx))
+ if (!wg_chacha20poly1305_calc (vm, src, src_len, dst, hash, NOISE_HASH_LEN,
+ 0, VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC,
+ key_idx))
return false;
noise_mix_hash (hash, src, src_len);
return true;
@@ -949,13 +759,6 @@ noise_tai64n_now (uint8_t output[NOISE_TIMESTAMP_LEN])
clib_memcpy (output + sizeof (sec), &nsec, sizeof (nsec));
}
-static void
-secure_zero_memory (void *v, size_t n)
-{
- static void *(*const volatile memset_v) (void *, int, size_t) = &memset;
- memset_v (v, 0, n);
-}
-
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/wireguard/wireguard_noise.h b/src/plugins/wireguard/wireguard_noise.h
index 5b5a88fa250..fd2c09ebfa5 100755..100644
--- a/src/plugins/wireguard/wireguard_noise.h
+++ b/src/plugins/wireguard/wireguard_noise.h
@@ -121,8 +121,8 @@ typedef struct noise_local
{
void *u_arg;
noise_remote_t *(*u_remote_get) (const uint8_t[NOISE_PUBLIC_KEY_LEN]);
- uint32_t (*u_index_set) (noise_remote_t *);
- void (*u_index_drop) (uint32_t);
+ uint32_t (*u_index_set) (vlib_main_t *, noise_remote_t *);
+ void (*u_index_drop) (vlib_main_t *, uint32_t);
} l_upcall;
} noise_local_t;
@@ -136,15 +136,23 @@ noise_local_get (uint32_t locali)
return (pool_elt_at_index (noise_local_pool, locali));
}
+static_always_inline uint64_t
+noise_counter_send (noise_counter_t *ctr)
+{
+ uint64_t ret;
+ ret = ctr->c_send++;
+ return ret;
+}
+
void noise_local_init (noise_local_t *, struct noise_upcall *);
bool noise_local_set_private (noise_local_t *,
const uint8_t[NOISE_PUBLIC_KEY_LEN]);
-void noise_remote_init (noise_remote_t *, uint32_t,
+void noise_remote_init (vlib_main_t *, noise_remote_t *, uint32_t,
const uint8_t[NOISE_PUBLIC_KEY_LEN], uint32_t);
/* Should be called anytime noise_local_set_private is called */
-void noise_remote_precompute (noise_remote_t *);
+void noise_remote_precompute (vlib_main_t *, noise_remote_t *);
/* Cryptographic functions */
bool noise_create_initiation (vlib_main_t * vm, noise_remote_t *,
@@ -187,12 +195,83 @@ noise_remote_encrypt (vlib_main_t * vm, noise_remote_t *,
uint32_t * r_idx,
uint64_t * nonce,
uint8_t * src, size_t srclen, uint8_t * dst);
-enum noise_state_crypt
-noise_remote_decrypt (vlib_main_t * vm, noise_remote_t *,
- uint32_t r_idx,
- uint64_t nonce,
- uint8_t * src, size_t srclen, uint8_t * dst);
+static_always_inline noise_keypair_t *
+wg_get_active_keypair (noise_remote_t *r, uint32_t r_idx)
+{
+ if (r->r_current != NULL && r->r_current->kp_local_index == r_idx)
+ {
+ return r->r_current;
+ }
+ else if (r->r_previous != NULL && r->r_previous->kp_local_index == r_idx)
+ {
+ return r->r_previous;
+ }
+ else if (r->r_next != NULL && r->r_next->kp_local_index == r_idx)
+ {
+ return r->r_next;
+ }
+ else
+ {
+ return NULL;
+ }
+}
+
+inline bool
+noise_counter_recv (noise_counter_t *ctr, uint64_t recv)
+{
+ uint64_t i, top, index_recv, index_ctr;
+ unsigned long bit;
+ bool ret = false;
+
+ /* Check that the recv counter is valid */
+ if (ctr->c_recv >= REJECT_AFTER_MESSAGES || recv >= REJECT_AFTER_MESSAGES)
+ goto error;
+
+ /* If the packet is out of the window, invalid */
+ if (recv + COUNTER_WINDOW_SIZE < ctr->c_recv)
+ goto error;
+
+ /* If the new counter is ahead of the current counter, we'll need to
+ * zero out the bitmap that has previously been used */
+ index_recv = recv / COUNTER_BITS;
+ index_ctr = ctr->c_recv / COUNTER_BITS;
+
+ if (recv > ctr->c_recv)
+ {
+ top = clib_min (index_recv - index_ctr, COUNTER_NUM);
+ for (i = 1; i <= top; i++)
+ ctr->c_backtrack[(i + index_ctr) & (COUNTER_NUM - 1)] = 0;
+ ctr->c_recv = recv;
+ }
+
+ index_recv %= COUNTER_NUM;
+ bit = 1ul << (recv % COUNTER_BITS);
+
+ if (ctr->c_backtrack[index_recv] & bit)
+ goto error;
+
+ ctr->c_backtrack[index_recv] |= bit;
+
+ ret = true;
+error:
+ return ret;
+}
+
+static_always_inline void
+noise_remote_keypair_free (vlib_main_t *vm, noise_remote_t *r,
+ noise_keypair_t **kp)
+{
+ noise_local_t *local = noise_local_get (r->r_local_idx);
+ struct noise_upcall *u = &local->l_upcall;
+ if (*kp)
+ {
+ u->u_index_drop (vm, (*kp)->kp_local_index);
+ vnet_crypto_key_del (vm, (*kp)->kp_send_index);
+ vnet_crypto_key_del (vm, (*kp)->kp_recv_index);
+ clib_mem_free (*kp);
+ }
+}
#endif /* __included_wg_noise_h__ */
diff --git a/src/plugins/wireguard/wireguard_output_tun.c b/src/plugins/wireguard/wireguard_output_tun.c
index 53a8797c973..c9411f6ff20 100755..100644
--- a/src/plugins/wireguard/wireguard_output_tun.c
+++ b/src/plugins/wireguard/wireguard_output_tun.c
@@ -21,11 +21,12 @@
#include <wireguard/wireguard.h>
#include <wireguard/wireguard_send.h>
-#define foreach_wg_output_error \
- _(NONE, "No error") \
- _(PEER, "Peer error") \
- _(KEYPAIR, "Keypair error") \
- _(TOO_BIG, "packet too big") \
+#define foreach_wg_output_error \
+ _ (NONE, "No error") \
+ _ (PEER, "Peer error") \
+ _ (KEYPAIR, "Keypair error") \
+ _ (NO_BUFFERS, "No buffers") \
+ _ (CRYPTO_ENGINE_ERROR, "crypto engine error (packet dropped)")
typedef enum
{
@@ -51,18 +52,34 @@ typedef enum
typedef struct
{
- ip4_udp_header_t hdr;
index_t peer;
+ u8 header[sizeof (ip6_udp_header_t)];
+ u8 is_ip4;
} wg_output_tun_trace_t;
+typedef struct
+{
+ index_t peer;
+ u32 next_index;
+} wg_output_tun_post_trace_t;
+
u8 *
format_ip4_udp_header (u8 * s, va_list * args)
{
- ip4_udp_header_t *hdr = va_arg (*args, ip4_udp_header_t *);
+ ip4_udp_header_t *hdr4 = va_arg (*args, ip4_udp_header_t *);
+
+ s = format (s, "%U:$U", format_ip4_header, &hdr4->ip4, format_udp_header,
+ &hdr4->udp);
+ return (s);
+}
- s = format (s, "%U:$U",
- format_ip4_header, &hdr->ip4, format_udp_header, &hdr->udp);
+u8 *
+format_ip6_udp_header (u8 *s, va_list *args)
+{
+ ip6_udp_header_t *hdr6 = va_arg (*args, ip6_udp_header_t *);
+ s = format (s, "%U:$U", format_ip6_header, &hdr6->ip6, format_udp_header,
+ &hdr6->udp);
return (s);
}
@@ -76,50 +93,415 @@ format_wg_output_tun_trace (u8 * s, va_list * args)
wg_output_tun_trace_t *t = va_arg (*args, wg_output_tun_trace_t *);
s = format (s, "peer: %d\n", t->peer);
- s = format (s, " Encrypted packet: %U", format_ip4_udp_header, &t->hdr);
+ s = format (s, " Encrypted packet: ");
+
+ s = t->is_ip4 ? format (s, "%U", format_ip4_udp_header, t->header) :
+ format (s, "%U", format_ip6_udp_header, t->header);
return s;
}
-VLIB_NODE_FN (wg_output_tun_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+/* post node - packet trace format function */
+static u8 *
+format_wg_output_tun_post_trace (u8 *s, va_list *args)
{
- u32 n_left_from;
- u32 *from;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
- u16 nexts[VLIB_FRAME_SIZE], *next;
- u32 thread_index = vm->thread_index;
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- b = bufs;
- next = nexts;
+ wg_output_tun_post_trace_t *t = va_arg (*args, wg_output_tun_post_trace_t *);
- vlib_get_buffers (vm, from, bufs, n_left_from);
+ s = format (s, "peer: %d\n", t->peer);
+ s = format (s, " wg-post: next node index %u", t->next_index);
+ return s;
+}
+
+static_always_inline void
+wg_output_chain_crypto (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+ vlib_buffer_t *b, vlib_buffer_t *lb, u8 *start,
+ u32 start_len, u16 *n_ch)
+{
+ vnet_crypto_op_chunk_t *ch;
+ vlib_buffer_t *cb = b;
+ u32 n_chunks = 1;
+
+ vec_add2 (ptd->chunks, ch, 1);
+ ch->len = start_len;
+ ch->src = ch->dst = start;
+ cb = vlib_get_buffer (vm, cb->next_buffer);
+
+ while (1)
+ {
+ vec_add2 (ptd->chunks, ch, 1);
+ n_chunks += 1;
+ if (lb == cb)
+ ch->len = cb->current_length - NOISE_AUTHTAG_LEN;
+ else
+ ch->len = cb->current_length;
+
+ ch->src = ch->dst = vlib_buffer_get_current (cb);
+
+ if (!(cb->flags & VLIB_BUFFER_NEXT_PRESENT))
+ break;
+
+ cb = vlib_get_buffer (vm, cb->next_buffer);
+ }
+
+ if (n_ch)
+ *n_ch = n_chunks;
+}
+
+static_always_inline void
+wg_prepare_sync_enc_op (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+ vlib_buffer_t *b, vlib_buffer_t *lb,
+ vnet_crypto_op_t **crypto_ops, u8 *src, u32 src_len,
+ u8 *dst, u8 *aad, u32 aad_len, u64 nonce,
+ vnet_crypto_key_index_t key_index, u32 bi, u8 *iv)
+{
+ vnet_crypto_op_t _op, *op = &_op;
+ u8 src_[] = {};
+
+ clib_memset (iv, 0, 4);
+ clib_memcpy (iv + 4, &nonce, sizeof (nonce));
+
+ vec_add2_aligned (crypto_ops[0], op, 1, CLIB_CACHE_LINE_BYTES);
+ vnet_crypto_op_init (op, VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC);
+
+ op->tag_len = NOISE_AUTHTAG_LEN;
+ op->tag = vlib_buffer_get_tail (lb) - NOISE_AUTHTAG_LEN;
+ op->key_index = key_index;
+ op->aad = aad;
+ op->aad_len = aad_len;
+ op->iv = iv;
+ op->user_data = bi;
+
+ if (b != lb)
+ {
+ /* Chained buffers */
+ op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
+ op->chunk_index = vec_len (ptd->chunks);
+ wg_output_chain_crypto (vm, ptd, b, lb, src, src_len, &op->n_chunks);
+ }
+ else
+ {
+ op->src = !src ? src_ : src;
+ op->len = src_len;
+ op->dst = dst;
+ }
+}
+
+static_always_inline void
+wg_output_process_chained_ops (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_crypto_op_t *ops, vlib_buffer_t *b[],
+ u16 *nexts, vnet_crypto_op_chunk_t *chunks,
+ u16 drop_next)
+{
+ u32 n_fail, n_ops = vec_len (ops);
+ vnet_crypto_op_t *op = ops;
+
+ if (n_ops == 0)
+ return;
+
+ n_fail = n_ops - vnet_crypto_process_chained_ops (vm, op, chunks, n_ops);
+
+ while (n_fail)
+ {
+ ASSERT (op - ops < n_ops);
+
+ if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
+ {
+ u32 bi = op->user_data;
+ b[bi]->error = node->errors[WG_OUTPUT_ERROR_CRYPTO_ENGINE_ERROR];
+ nexts[bi] = drop_next;
+ n_fail--;
+ }
+ op++;
+ }
+}
+
+static_always_inline void
+wg_output_process_ops (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_crypto_op_t *ops, vlib_buffer_t *b[], u16 *nexts,
+ u16 drop_next)
+{
+ u32 n_fail, n_ops = vec_len (ops);
+ vnet_crypto_op_t *op = ops;
+
+ if (n_ops == 0)
+ return;
+ n_fail = n_ops - vnet_crypto_process_ops (vm, op, n_ops);
+
+ while (n_fail)
+ {
+ ASSERT (op - ops < n_ops);
+
+ if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
+ {
+ u32 bi = op->user_data;
+ b[bi]->error = node->errors[WG_OUTPUT_ERROR_CRYPTO_ENGINE_ERROR];
+ nexts[bi] = drop_next;
+ n_fail--;
+ }
+ op++;
+ }
+}
+
+static_always_inline void
+wg_output_tun_add_to_frame (vlib_main_t *vm, vnet_crypto_async_frame_t *f,
+ u32 key_index, u32 crypto_len,
+ i16 crypto_start_offset, u32 buffer_index,
+ u16 next_node, u8 *iv, u8 *tag, u8 flags)
+{
+ vnet_crypto_async_frame_elt_t *fe;
+ u16 index;
+
+ ASSERT (f->n_elts < VNET_CRYPTO_FRAME_SIZE);
+
+ index = f->n_elts;
+ fe = &f->elts[index];
+ f->n_elts++;
+ fe->key_index = key_index;
+ fe->crypto_total_length = crypto_len;
+ fe->crypto_start_offset = crypto_start_offset;
+ fe->iv = iv;
+ fe->tag = tag;
+ fe->flags = flags;
+ f->buffer_indices[index] = buffer_index;
+ f->next_node_index[index] = next_node;
+}
+
+static_always_inline enum noise_state_crypt
+wg_output_tun_process (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+ vlib_buffer_t *b, vlib_buffer_t *lb,
+ vnet_crypto_op_t **crypto_ops, noise_remote_t *r,
+ uint32_t *r_idx, uint64_t *nonce, uint8_t *src,
+ size_t srclen, uint8_t *dst, u32 bi, u8 *iv, f64 time)
+{
+ noise_keypair_t *kp;
+ enum noise_state_crypt ret = SC_FAILED;
+
+ if ((kp = r->r_current) == NULL)
+ goto error;
+
+ /* We confirm that our values are within our tolerances. We want:
+ * - a valid keypair
+ * - our keypair to be less than REJECT_AFTER_TIME seconds old
+ * - our receive counter to be less than REJECT_AFTER_MESSAGES
+ * - our send counter to be less than REJECT_AFTER_MESSAGES
+ */
+ if (!kp->kp_valid ||
+ wg_birthdate_has_expired_opt (kp->kp_birthdate, REJECT_AFTER_TIME,
+ time) ||
+ kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES ||
+ ((*nonce = noise_counter_send (&kp->kp_ctr)) > REJECT_AFTER_MESSAGES))
+ goto error;
+
+ /* We encrypt into the same buffer, so the caller must ensure that buf
+ * has NOISE_AUTHTAG_LEN bytes to store the MAC. The nonce and index
+ * are passed back out to the caller through the provided data pointer. */
+ *r_idx = kp->kp_remote_index;
+
+ wg_prepare_sync_enc_op (vm, ptd, b, lb, crypto_ops, src, srclen, dst, NULL,
+ 0, *nonce, kp->kp_send_index, bi, iv);
+
+ /* If our values are still within tolerances, but we are approaching
+ * the tolerances, we notify the caller with ESTALE that they should
+ * establish a new keypair. The current keypair can continue to be used
+ * until the tolerances are hit. We notify if:
+ * - our send counter is valid and not less than REKEY_AFTER_MESSAGES
+ * - we're the initiator and our keypair is older than
+ * REKEY_AFTER_TIME seconds */
+ ret = SC_KEEP_KEY_FRESH;
+ if ((kp->kp_valid && *nonce >= REKEY_AFTER_MESSAGES) ||
+ (kp->kp_is_initiator && wg_birthdate_has_expired_opt (
+ kp->kp_birthdate, REKEY_AFTER_TIME, time)))
+ goto error;
+
+ ret = SC_OK;
+error:
+ return ret;
+}
+
+static_always_inline enum noise_state_crypt
+wg_add_to_async_frame (vlib_main_t *vm, wg_per_thread_data_t *ptd,
+ vnet_crypto_async_frame_t **async_frame,
+ vlib_buffer_t *b, vlib_buffer_t *lb, u8 *payload,
+ u32 payload_len, u32 bi, u16 next, u16 async_next,
+ noise_remote_t *r, uint32_t *r_idx, uint64_t *nonce,
+ u8 *iv, f64 time)
+{
+ wg_post_data_t *post = wg_post_data (b);
+ u8 flag = 0;
+ u8 *tag;
+ noise_keypair_t *kp;
+
+ post->next_index = next;
+
+ /* crypto */
+ enum noise_state_crypt ret = SC_FAILED;
+
+ if ((kp = r->r_current) == NULL)
+ goto error;
+
+ /* We confirm that our values are within our tolerances. We want:
+ * - a valid keypair
+ * - our keypair to be less than REJECT_AFTER_TIME seconds old
+ * - our receive counter to be less than REJECT_AFTER_MESSAGES
+ * - our send counter to be less than REJECT_AFTER_MESSAGES
+ */
+ if (!kp->kp_valid ||
+ wg_birthdate_has_expired_opt (kp->kp_birthdate, REJECT_AFTER_TIME,
+ time) ||
+ kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES ||
+ ((*nonce = noise_counter_send (&kp->kp_ctr)) > REJECT_AFTER_MESSAGES))
+ goto error;
+
+ /* We encrypt into the same buffer, so the caller must ensure that buf
+ * has NOISE_AUTHTAG_LEN bytes to store the MAC. The nonce and index
+ * are passed back out to the caller through the provided data pointer. */
+ *r_idx = kp->kp_remote_index;
+
+ clib_memset (iv, 0, 4);
+ clib_memcpy (iv + 4, nonce, sizeof (*nonce));
+
+ /* get a frame for this op if we don't yet have one or it's full */
+ if (NULL == *async_frame || vnet_crypto_async_frame_is_full (*async_frame))
+ {
+ *async_frame = vnet_crypto_async_get_frame (
+ vm, VNET_CRYPTO_OP_CHACHA20_POLY1305_TAG16_AAD0_ENC);
+ if (PREDICT_FALSE (NULL == *async_frame))
+ goto error;
+ /* Save the frame to the list we'll submit at the end */
+ vec_add1 (ptd->async_frames, *async_frame);
+ }
+
+ if (b != lb)
+ flag |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
+
+ tag = vlib_buffer_get_tail (lb) - NOISE_AUTHTAG_LEN;
+
+ /* this always succeeds because we know the frame is not full */
+ wg_output_tun_add_to_frame (vm, *async_frame, kp->kp_send_index, payload_len,
+ payload - b->data, bi, async_next, iv, tag,
+ flag);
+
+ /* If our values are still within tolerances, but we are approaching
+ * the tolerances, we notify the caller with ESTALE that they should
+ * establish a new keypair. The current keypair can continue to be used
+ * until the tolerances are hit. We notify if:
+ * - our send counter is valid and not less than REKEY_AFTER_MESSAGES
+ * - we're the initiator and our keypair is older than
+ * REKEY_AFTER_TIME seconds */
+ ret = SC_KEEP_KEY_FRESH;
+ if ((kp->kp_valid && *nonce >= REKEY_AFTER_MESSAGES) ||
+ (kp->kp_is_initiator && wg_birthdate_has_expired_opt (
+ kp->kp_birthdate, REKEY_AFTER_TIME, time)))
+ goto error;
+
+ ret = SC_OK;
+error:
+ return ret;
+}
+
+static_always_inline void
+wg_calc_checksum (vlib_main_t *vm, vlib_buffer_t *b)
+{
+ int bogus = 0;
+ u8 ip_ver_out = (*((u8 *) vlib_buffer_get_current (b)) >> 4);
+
+ /* IPv6 UDP checksum is mandatory */
+ if (ip_ver_out == 6)
+ {
+ ip6_header_t *ip6 =
+ (ip6_header_t *) ((u8 *) vlib_buffer_get_current (b));
+ udp_header_t *udp = ip6_next_header (ip6);
+ udp->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus);
+ }
+}
+
+/* is_ip4 - inner header flag */
+always_inline uword
+wg_output_tun_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, u8 is_ip4, u16 async_next_node)
+{
wg_main_t *wmp = &wg_main;
+ wg_per_thread_data_t *ptd =
+ vec_elt_at_index (wmp->per_thread_data, vm->thread_index);
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left_from = frame->n_vectors;
+ ip4_udp_wg_header_t *hdr4_out = NULL;
+ ip6_udp_wg_header_t *hdr6_out = NULL;
+ message_data_t *message_data_wg = NULL;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ vlib_buffer_t *lb;
+ vnet_crypto_op_t **crypto_ops;
+ u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
+ vlib_buffer_t *sync_bufs[VLIB_FRAME_SIZE];
+ u32 thread_index = vm->thread_index;
+ u16 n_sync = 0;
+ const u16 drop_next = WG_OUTPUT_NEXT_ERROR;
+ const u8 is_async = wg_op_mode_is_set_ASYNC ();
+ vnet_crypto_async_frame_t *async_frame = NULL;
+ u16 n_async = 0;
+ u16 noop_nexts[VLIB_FRAME_SIZE], *noop_next = noop_nexts, n_noop = 0;
+ u16 err = !0;
+ u32 sync_bi[VLIB_FRAME_SIZE];
+ u32 noop_bi[VLIB_FRAME_SIZE];
+
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+ vec_reset_length (ptd->crypto_ops);
+ vec_reset_length (ptd->chained_crypto_ops);
+ vec_reset_length (ptd->chunks);
+ vec_reset_length (ptd->async_frames);
+
wg_peer_t *peer = NULL;
+ u32 adj_index = 0;
+ u32 last_adj_index = ~0;
+ index_t peeri = INDEX_INVALID;
+
+ f64 time = clib_time_now (&vm->clib_time) + vm->time_offset;
while (n_left_from > 0)
{
- ip4_udp_header_t *hdr = vlib_buffer_get_current (b[0]);
- u8 *plain_data = (vlib_buffer_get_current (b[0]) +
- sizeof (ip4_udp_header_t));
- u16 plain_data_len =
- clib_net_to_host_u16 (((ip4_header_t *) plain_data)->length);
- index_t peeri;
+ u8 iph_offset = 0;
+ u8 is_ip4_out = 1;
+ u8 *plain_data;
+ u16 plain_data_len;
+ u16 plain_data_len_total;
+ u16 n_bufs;
+ u16 b_space_left_at_beginning;
+ u32 bi = from[b - bufs];
+
+ if (n_left_from > 2)
+ {
+ u8 *p;
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ p = vlib_buffer_get_current (b[1]);
+ CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (vlib_buffer_get_tail (b[1]), CLIB_CACHE_LINE_BYTES,
+ LOAD);
+ }
- next[0] = WG_OUTPUT_NEXT_ERROR;
- peeri =
- wg_peer_get_by_adj_index (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
- peer = wg_peer_get (peeri);
+ noop_next[0] = WG_OUTPUT_NEXT_ERROR;
+ err = WG_OUTPUT_NEXT_ERROR;
- if (!peer || peer->is_dead)
+ adj_index = vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
+
+ if (PREDICT_FALSE (last_adj_index != adj_index))
+ {
+ peeri = wg_peer_get_by_adj_index (adj_index);
+ if (peeri == INDEX_INVALID)
+ {
+ b[0]->error = node->errors[WG_OUTPUT_ERROR_PEER];
+ goto out;
+ }
+ peer = wg_peer_get (peeri);
+ }
+
+ if (!peer || wg_peer_is_dead (peer))
{
b[0]->error = node->errors[WG_OUTPUT_ERROR_PEER];
goto out;
}
-
if (PREDICT_FALSE (~0 == peer->output_thread_index))
{
/* this is the first packet to use this peer, claim the peer
@@ -129,9 +511,10 @@ VLIB_NODE_FN (wg_output_tun_node) (vlib_main_t * vm,
wg_peer_assign_thread (thread_index));
}
- if (PREDICT_TRUE (thread_index != peer->output_thread_index))
+ if (PREDICT_FALSE (thread_index != peer->output_thread_index))
{
- next[0] = WG_OUTPUT_NEXT_HANDOFF;
+ noop_next[0] = WG_OUTPUT_NEXT_HANDOFF;
+ err = WG_OUTPUT_NEXT_HANDOFF;
goto next;
}
@@ -141,31 +524,119 @@ VLIB_NODE_FN (wg_output_tun_node) (vlib_main_t * vm,
b[0]->error = node->errors[WG_OUTPUT_ERROR_KEYPAIR];
goto out;
}
- size_t encrypted_packet_len = message_data_len (plain_data_len);
- /*
- * Ensure there is enough space to write the encrypted data
- * into the packet
- */
- if (PREDICT_FALSE (encrypted_packet_len >= WG_DEFAULT_DATA_SIZE) ||
- PREDICT_FALSE ((b[0]->current_data + encrypted_packet_len) >=
- vlib_buffer_get_default_data_size (vm)))
+ lb = b[0];
+ n_bufs = vlib_buffer_chain_linearize (vm, b[0]);
+ if (n_bufs == 0)
{
- b[0]->error = node->errors[WG_OUTPUT_ERROR_TOO_BIG];
+ b[0]->error = node->errors[WG_OUTPUT_ERROR_NO_BUFFERS];
goto out;
}
- message_data_t *encrypted_packet =
- (message_data_t *) wmp->per_thread_data[thread_index].data;
+ if (n_bufs > 1)
+ {
+ /* Find last buffer in the chain */
+ while (lb->flags & VLIB_BUFFER_NEXT_PRESENT)
+ lb = vlib_get_buffer (vm, lb->next_buffer);
+ }
+
+ /* Ensure there is enough free space at the beginning of the first buffer
+ * to write ethernet header (e.g. IPv6 VxLAN over IPv6 Wireguard will
+ * trigger this)
+ */
+ ASSERT ((signed) b[0]->current_data >=
+ (signed) -VLIB_BUFFER_PRE_DATA_SIZE);
+ b_space_left_at_beginning =
+ b[0]->current_data + VLIB_BUFFER_PRE_DATA_SIZE;
+ if (PREDICT_FALSE (b_space_left_at_beginning <
+ sizeof (ethernet_header_t)))
+ {
+ u32 size_diff =
+ sizeof (ethernet_header_t) - b_space_left_at_beginning;
+
+ /* Can only move buffer when it's single and has enough free space*/
+ if (lb == b[0] &&
+ vlib_buffer_space_left_at_end (vm, b[0]) >= size_diff)
+ {
+ vlib_buffer_move (vm, b[0],
+ b[0]->current_data + (signed) size_diff);
+ }
+ else
+ {
+ b[0]->error = node->errors[WG_OUTPUT_ERROR_NO_BUFFERS];
+ goto out;
+ }
+ }
+
+ /*
+ * Ensure there is enough free space at the end of the last buffer to
+ * write auth tag */
+ if (PREDICT_FALSE (vlib_buffer_space_left_at_end (vm, lb) <
+ NOISE_AUTHTAG_LEN))
+ {
+ u32 tmp_bi = 0;
+ if (vlib_buffer_alloc (vm, &tmp_bi, 1) != 1)
+ {
+ b[0]->error = node->errors[WG_OUTPUT_ERROR_NO_BUFFERS];
+ goto out;
+ }
+ lb = vlib_buffer_chain_buffer (vm, lb, tmp_bi);
+ }
+
+ iph_offset = vnet_buffer (b[0])->ip.save_rewrite_length;
+ plain_data = vlib_buffer_get_current (b[0]) + iph_offset;
+ plain_data_len = b[0]->current_length - iph_offset;
+ plain_data_len_total =
+ vlib_buffer_length_in_chain (vm, b[0]) - iph_offset;
+ size_t encrypted_packet_len = message_data_len (plain_data_len_total);
+ vlib_buffer_chain_increase_length (b[0], lb, NOISE_AUTHTAG_LEN);
+ u8 *iv_data = b[0]->pre_data;
+
+ is_ip4_out = ip46_address_is_ip4 (&peer->src.addr);
+ if (is_ip4_out)
+ {
+ hdr4_out = vlib_buffer_get_current (b[0]);
+ message_data_wg = &hdr4_out->wg;
+ }
+ else
+ {
+ hdr6_out = vlib_buffer_get_current (b[0]);
+ message_data_wg = &hdr6_out->wg;
+ }
+
+ if (PREDICT_FALSE (last_adj_index != adj_index))
+ {
+ wg_timers_any_authenticated_packet_sent_opt (peer, time);
+ wg_timers_data_sent_opt (peer, time);
+ wg_timers_any_authenticated_packet_traversal (peer);
+ last_adj_index = adj_index;
+ }
+
+ /* Here we are sure that can send packet to next node */
+ next[0] = WG_OUTPUT_NEXT_INTERFACE_OUTPUT;
+
+ if (lb != b[0])
+ crypto_ops = &ptd->chained_crypto_ops;
+ else
+ crypto_ops = &ptd->crypto_ops;
enum noise_state_crypt state;
- state =
- noise_remote_encrypt (vm,
- &peer->remote,
- &encrypted_packet->receiver_index,
- &encrypted_packet->counter, plain_data,
- plain_data_len,
- encrypted_packet->encrypted_data);
+
+ if (is_async)
+ {
+ state = wg_add_to_async_frame (
+ vm, ptd, &async_frame, b[0], lb, plain_data, plain_data_len_total,
+ bi, next[0], async_next_node, &peer->remote,
+ &message_data_wg->receiver_index, &message_data_wg->counter,
+ iv_data, time);
+ }
+ else
+ {
+ state = wg_output_tun_process (
+ vm, ptd, b[0], lb, crypto_ops, &peer->remote,
+ &message_data_wg->receiver_index, &message_data_wg->counter,
+ plain_data, plain_data_len, plain_data, n_sync, iv_data, time);
+ }
if (PREDICT_FALSE (state == SC_KEEP_KEY_FRESH))
{
@@ -173,27 +644,31 @@ VLIB_NODE_FN (wg_output_tun_node) (vlib_main_t * vm,
}
else if (PREDICT_FALSE (state == SC_FAILED))
{
- //TODO: Maybe wrong
+ // TODO: Maybe wrong
wg_send_handshake_from_mt (peeri, false);
+ wg_peer_update_flags (peeri, WG_PEER_ESTABLISHED, false);
+ noop_next[0] = WG_OUTPUT_NEXT_ERROR;
goto out;
}
- /* Here we are sure that can send packet to next node */
- next[0] = WG_OUTPUT_NEXT_INTERFACE_OUTPUT;
- encrypted_packet->header.type = MESSAGE_DATA;
-
- clib_memcpy (plain_data, (u8 *) encrypted_packet, encrypted_packet_len);
+ err = WG_OUTPUT_NEXT_INTERFACE_OUTPUT;
- hdr->udp.length = clib_host_to_net_u16 (encrypted_packet_len +
- sizeof (udp_header_t));
- b[0]->current_length = (encrypted_packet_len +
- sizeof (ip4_header_t) + sizeof (udp_header_t));
- ip4_header_set_len_w_chksum
- (&hdr->ip4, clib_host_to_net_u16 (b[0]->current_length));
-
- wg_timers_any_authenticated_packet_sent (peer);
- wg_timers_data_sent (peer);
- wg_timers_any_authenticated_packet_traversal (peer);
+ if (is_ip4_out)
+ {
+ hdr4_out->wg.header.type = MESSAGE_DATA;
+ hdr4_out->udp.length = clib_host_to_net_u16 (encrypted_packet_len +
+ sizeof (udp_header_t));
+ ip4_header_set_len_w_chksum (
+ &hdr4_out->ip4, clib_host_to_net_u16 (encrypted_packet_len +
+ sizeof (ip4_udp_header_t)));
+ }
+ else
+ {
+ hdr6_out->wg.header.type = MESSAGE_DATA;
+ hdr6_out->ip6.payload_length = hdr6_out->udp.length =
+ clib_host_to_net_u16 (encrypted_packet_len +
+ sizeof (udp_header_t));
+ }
out:
if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
@@ -201,23 +676,262 @@ VLIB_NODE_FN (wg_output_tun_node) (vlib_main_t * vm,
{
wg_output_tun_trace_t *t =
vlib_add_trace (vm, node, b[0], sizeof (*t));
- t->hdr = *hdr;
+
t->peer = peeri;
+ t->is_ip4 = is_ip4_out;
+ if (hdr4_out)
+ clib_memcpy (t->header, hdr4_out, sizeof (ip4_udp_header_t));
+ else if (hdr6_out)
+ clib_memcpy (t->header, hdr6_out, sizeof (ip6_udp_header_t));
}
+
next:
+ if (PREDICT_FALSE (err != WG_OUTPUT_NEXT_INTERFACE_OUTPUT))
+ {
+ noop_bi[n_noop] = bi;
+ n_noop++;
+ noop_next++;
+ goto next_left;
+ }
+ if (!is_async)
+ {
+ sync_bi[n_sync] = bi;
+ sync_bufs[n_sync] = b[0];
+ n_sync += 1;
+ next += 1;
+ }
+ else
+ {
+ n_async++;
+ }
+ next_left:
n_left_from -= 1;
- next += 1;
b += 1;
}
+ if (n_sync)
+ {
+ /* wg-output-process-ops */
+ wg_output_process_ops (vm, node, ptd->crypto_ops, sync_bufs, nexts,
+ drop_next);
+ wg_output_process_chained_ops (vm, node, ptd->chained_crypto_ops,
+ sync_bufs, nexts, ptd->chunks, drop_next);
+
+ int n_left_from_sync_bufs = n_sync;
+ while (n_left_from_sync_bufs > 0)
+ {
+ n_left_from_sync_bufs--;
+ wg_calc_checksum (vm, sync_bufs[n_left_from_sync_bufs]);
+ }
+
+ vlib_buffer_enqueue_to_next (vm, node, sync_bi, nexts, n_sync);
+ }
+ if (n_async)
+ {
+ /* submit all of the open frames */
+ vnet_crypto_async_frame_t **async_frame;
+
+ vec_foreach (async_frame, ptd->async_frames)
+ {
+ if (PREDICT_FALSE (
+ vnet_crypto_async_submit_open_frame (vm, *async_frame) < 0))
+ {
+ u32 n_drop = (*async_frame)->n_elts;
+ u32 *bi = (*async_frame)->buffer_indices;
+ u16 index = n_noop;
+ while (n_drop--)
+ {
+ noop_bi[index] = bi[0];
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi[0]);
+ noop_nexts[index] = drop_next;
+ b->error = node->errors[WG_OUTPUT_ERROR_CRYPTO_ENGINE_ERROR];
+ bi++;
+ index++;
+ }
+ n_noop += (*async_frame)->n_elts;
+
+ vnet_crypto_async_reset_frame (*async_frame);
+ vnet_crypto_async_free_frame (vm, *async_frame);
+ }
+ }
+ }
+ if (n_noop)
+ {
+ vlib_buffer_enqueue_to_next (vm, node, noop_bi, noop_nexts, n_noop);
+ }
+
+ return frame->n_vectors;
+}
+
+always_inline uword
+wg_output_tun_post (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ u16 nexts[VLIB_FRAME_SIZE], *next = nexts;
+ u32 *from = vlib_frame_vector_args (frame);
+ u32 n_left = frame->n_vectors;
+
+ index_t peeri = ~0;
+
+ vlib_get_buffers (vm, from, b, n_left);
+
+ if (n_left >= 4)
+ {
+ vlib_prefetch_buffer_header (b[0], LOAD);
+ vlib_prefetch_buffer_header (b[1], LOAD);
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ vlib_prefetch_buffer_header (b[3], LOAD);
+ }
+
+ while (n_left > 8)
+ {
+ vlib_prefetch_buffer_header (b[4], LOAD);
+ vlib_prefetch_buffer_header (b[5], LOAD);
+ vlib_prefetch_buffer_header (b[6], LOAD);
+ vlib_prefetch_buffer_header (b[7], LOAD);
+
+ next[0] = (wg_post_data (b[0]))->next_index;
+ next[1] = (wg_post_data (b[1]))->next_index;
+ next[2] = (wg_post_data (b[2]))->next_index;
+ next[3] = (wg_post_data (b[3]))->next_index;
+
+ wg_calc_checksum (vm, b[0]);
+ wg_calc_checksum (vm, b[1]);
+ wg_calc_checksum (vm, b[2]);
+ wg_calc_checksum (vm, b[3]);
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ {
+ if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ wg_output_tun_post_trace_t *tr =
+ vlib_add_trace (vm, node, b[0], sizeof (*tr));
+ peeri = wg_peer_get_by_adj_index (
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
+ tr->peer = peeri;
+ tr->next_index = next[0];
+ }
+ if (b[1]->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ wg_output_tun_post_trace_t *tr =
+ vlib_add_trace (vm, node, b[1], sizeof (*tr));
+ peeri = wg_peer_get_by_adj_index (
+ vnet_buffer (b[1])->ip.adj_index[VLIB_TX]);
+ tr->next_index = next[1];
+ }
+ if (b[2]->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ wg_output_tun_post_trace_t *tr =
+ vlib_add_trace (vm, node, b[2], sizeof (*tr));
+ peeri = wg_peer_get_by_adj_index (
+ vnet_buffer (b[2])->ip.adj_index[VLIB_TX]);
+ tr->next_index = next[2];
+ }
+ if (b[3]->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ wg_output_tun_post_trace_t *tr =
+ vlib_add_trace (vm, node, b[3], sizeof (*tr));
+ peeri = wg_peer_get_by_adj_index (
+ vnet_buffer (b[3])->ip.adj_index[VLIB_TX]);
+ tr->next_index = next[3];
+ }
+ }
+
+ b += 4;
+ next += 4;
+ n_left -= 4;
+ }
+
+ while (n_left > 0)
+ {
+ wg_calc_checksum (vm, b[0]);
+
+ next[0] = (wg_post_data (b[0]))->next_index;
+ if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) &&
+ (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
+ {
+ wg_output_tun_post_trace_t *tr =
+ vlib_add_trace (vm, node, b[0], sizeof (*tr));
+ peeri = wg_peer_get_by_adj_index (
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
+ tr->next_index = next[0];
+ }
+
+ b += 1;
+ next += 1;
+ n_left -= 1;
+ }
+
vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
return frame->n_vectors;
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (wg_output_tun_node) =
+VLIB_REGISTER_NODE (wg4_output_tun_post_node) = {
+ .name = "wg4-output-tun-post-node",
+ .vector_size = sizeof (u32),
+ .format_trace = format_wg_output_tun_post_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .sibling_of = "wg4-output-tun",
+ .n_errors = ARRAY_LEN (wg_output_error_strings),
+ .error_strings = wg_output_error_strings,
+};
+
+VLIB_REGISTER_NODE (wg6_output_tun_post_node) = {
+ .name = "wg6-output-tun-post-node",
+ .vector_size = sizeof (u32),
+ .format_trace = format_wg_output_tun_post_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .sibling_of = "wg6-output-tun",
+ .n_errors = ARRAY_LEN (wg_output_error_strings),
+ .error_strings = wg_output_error_strings,
+};
+
+VLIB_NODE_FN (wg4_output_tun_post_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return wg_output_tun_post (vm, node, from_frame);
+}
+
+VLIB_NODE_FN (wg6_output_tun_post_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return wg_output_tun_post (vm, node, from_frame);
+}
+
+VLIB_NODE_FN (wg4_output_tun_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return wg_output_tun_inline (vm, node, frame, /* is_ip4 */ 1,
+ wg_encrypt_async_next.wg4_post_next);
+}
+
+VLIB_NODE_FN (wg6_output_tun_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return wg_output_tun_inline (vm, node, frame, /* is_ip4 */ 0,
+ wg_encrypt_async_next.wg6_post_next);
+}
+
+VLIB_REGISTER_NODE (wg4_output_tun_node) =
+{
+ .name = "wg4-output-tun",
+ .vector_size = sizeof (u32),
+ .format_trace = format_wg_output_tun_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (wg_output_error_strings),
+ .error_strings = wg_output_error_strings,
+ .n_next_nodes = WG_OUTPUT_N_NEXT,
+ .next_nodes = {
+ [WG_OUTPUT_NEXT_HANDOFF] = "wg4-output-tun-handoff",
+ [WG_OUTPUT_NEXT_INTERFACE_OUTPUT] = "adj-midchain-tx",
+ [WG_OUTPUT_NEXT_ERROR] = "error-drop",
+ },
+};
+
+VLIB_REGISTER_NODE (wg6_output_tun_node) =
{
- .name = "wg-output-tun",
+ .name = "wg6-output-tun",
.vector_size = sizeof (u32),
.format_trace = format_wg_output_tun_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
@@ -225,12 +939,11 @@ VLIB_REGISTER_NODE (wg_output_tun_node) =
.error_strings = wg_output_error_strings,
.n_next_nodes = WG_OUTPUT_N_NEXT,
.next_nodes = {
- [WG_OUTPUT_NEXT_HANDOFF] = "wg-output-tun-handoff",
+ [WG_OUTPUT_NEXT_HANDOFF] = "wg6-output-tun-handoff",
[WG_OUTPUT_NEXT_INTERFACE_OUTPUT] = "adj-midchain-tx",
[WG_OUTPUT_NEXT_ERROR] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/plugins/wireguard/wireguard_peer.c b/src/plugins/wireguard/wireguard_peer.c
index f47019b110a..e71db86de0b 100644
--- a/src/plugins/wireguard/wireguard_peer.c
+++ b/src/plugins/wireguard/wireguard_peer.c
@@ -22,8 +22,8 @@
#include <wireguard/wireguard_key.h>
#include <wireguard/wireguard_send.h>
#include <wireguard/wireguard.h>
+#include <vnet/tunnel/tunnel_dp.h>
-static fib_source_t wg_fib_source;
wg_peer_t *wg_peer_pool;
index_t *wg_peer_by_adj_index;
@@ -36,48 +36,20 @@ wg_peer_endpoint_reset (wg_peer_endpoint_t * ep)
}
static void
-wg_peer_endpoint_init (wg_peer_endpoint_t * ep,
- const ip46_address_t * addr, u16 port)
+wg_peer_endpoint_init (wg_peer_endpoint_t *ep, const ip46_address_t *addr,
+ u16 port)
{
ip46_address_copy (&ep->addr, addr);
ep->port = port;
}
static void
-wg_peer_fib_flush (wg_peer_t * peer)
-{
- wg_peer_allowed_ip_t *allowed_ip;
-
- vec_foreach (allowed_ip, peer->allowed_ips)
- {
- fib_table_entry_delete_index (allowed_ip->fib_entry_index, wg_fib_source);
- allowed_ip->fib_entry_index = FIB_NODE_INDEX_INVALID;
- }
-}
-
-static void
-wg_peer_fib_populate (wg_peer_t * peer, u32 fib_index)
-{
- wg_peer_allowed_ip_t *allowed_ip;
-
- vec_foreach (allowed_ip, peer->allowed_ips)
- {
- allowed_ip->fib_entry_index =
- fib_table_entry_path_add (fib_index,
- &allowed_ip->prefix,
- wg_fib_source,
- FIB_ENTRY_FLAG_NONE,
- fib_proto_to_dpo (allowed_ip->
- prefix.fp_proto),
- &peer->dst.addr, peer->wg_sw_if_index, ~0, 1,
- NULL, FIB_ROUTE_PATH_FLAG_NONE);
- }
-}
-
-static void
wg_peer_clear (vlib_main_t * vm, wg_peer_t * peer)
{
+ index_t perri = peer - wg_peer_pool;
wg_timers_stop (peer);
+ wg_peer_update_flags (perri, WG_PEER_ESTABLISHED, false);
+ wg_peer_update_flags (perri, WG_PEER_STATUS_DEAD, true);
for (int i = 0; i < WG_N_TIMERS; i++)
{
peer->timers[i] = ~0;
@@ -91,16 +63,16 @@ wg_peer_clear (vlib_main_t * vm, wg_peer_t * peer)
wg_peer_endpoint_reset (&peer->src);
wg_peer_endpoint_reset (&peer->dst);
- if (INDEX_INVALID != peer->adj_index)
+ adj_index_t *adj_index;
+ vec_foreach (adj_index, peer->adj_indices)
{
- adj_unlock (peer->adj_index);
- wg_peer_by_adj_index[peer->adj_index] = INDEX_INVALID;
- }
- wg_peer_fib_flush (peer);
+ wg_peer_by_adj_index[*adj_index] = INDEX_INVALID;
+ if (adj_is_valid (*adj_index))
+ adj_midchain_delegate_unstack (*adj_index);
+ }
peer->input_thread_index = ~0;
peer->output_thread_index = ~0;
- peer->adj_index = INDEX_INVALID;
peer->timer_wheel = 0;
peer->persistent_keepalive_interval = 0;
peer->timer_handshake_attempts = 0;
@@ -111,107 +83,251 @@ wg_peer_clear (vlib_main_t * vm, wg_peer_t * peer)
peer->new_handshake_interval_tick = 0;
peer->rehandshake_interval_tick = 0;
peer->timer_need_another_keepalive = false;
- peer->is_dead = true;
+ peer->handshake_is_sent = false;
+ vec_free (peer->rewrite);
vec_free (peer->allowed_ips);
+ vec_free (peer->adj_indices);
}
static void
wg_peer_init (vlib_main_t * vm, wg_peer_t * peer)
{
- peer->adj_index = INDEX_INVALID;
+ peer->api_client_by_client_index = hash_create (0, sizeof (u32));
+ peer->api_clients = NULL;
wg_peer_clear (vm, peer);
}
-static u8 *
-wg_peer_build_rewrite (const wg_peer_t * peer)
-{
- // v4 only for now
- ip4_udp_header_t *hdr;
- u8 *rewrite = NULL;
-
- vec_validate (rewrite, sizeof (*hdr) - 1);
- hdr = (ip4_udp_header_t *) rewrite;
-
- hdr->ip4.ip_version_and_header_length = 0x45;
- hdr->ip4.ttl = 64;
- hdr->ip4.src_address = peer->src.addr.ip4;
- hdr->ip4.dst_address = peer->dst.addr.ip4;
- hdr->ip4.protocol = IP_PROTOCOL_UDP;
- hdr->ip4.checksum = ip4_header_checksum (&hdr->ip4);
-
- hdr->udp.src_port = clib_host_to_net_u16 (peer->src.port);
- hdr->udp.dst_port = clib_host_to_net_u16 (peer->dst.port);
- hdr->udp.checksum = 0;
-
- return (rewrite);
-}
-
static void
-wg_peer_adj_stack (wg_peer_t * peer)
+wg_peer_adj_stack (wg_peer_t *peer, adj_index_t ai)
{
ip_adjacency_t *adj;
u32 sw_if_index;
wg_if_t *wgi;
+ fib_protocol_t fib_proto;
- adj = adj_get (peer->adj_index);
+ if (!adj_is_valid (ai))
+ return;
+
+ adj = adj_get (ai);
sw_if_index = adj->rewrite_header.sw_if_index;
+ u8 is_ip4 = ip46_address_is_ip4 (&peer->src.addr);
+ fib_proto = is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
wgi = wg_if_get (wg_if_find_by_sw_if_index (sw_if_index));
if (!wgi)
return;
- if (!vnet_sw_interface_is_admin_up (vnet_get_main (), wgi->sw_if_index))
+ if (!vnet_sw_interface_is_admin_up (vnet_get_main (), wgi->sw_if_index) ||
+ !wg_peer_can_send (peer))
{
- adj_midchain_delegate_unstack (peer->adj_index);
+ adj_midchain_delegate_unstack (ai);
}
else
{
- /* *INDENT-OFF* */
fib_prefix_t dst = {
- .fp_len = 32,
- .fp_proto = FIB_PROTOCOL_IP4,
- .fp_addr = peer->dst.addr,
+ .fp_len = is_ip4 ? 32 : 128,
+ .fp_proto = fib_proto,
+ .fp_addr = peer->dst.addr,
};
- /* *INDENT-ON* */
u32 fib_index;
- fib_index = fib_table_find (FIB_PROTOCOL_IP4, peer->table_id);
+ fib_index = fib_table_find (fib_proto, peer->table_id);
+
+ adj_midchain_delegate_stack (ai, fib_index, &dst);
+ }
+}
+
+static void
+wg_peer_adj_reset_stacking (adj_index_t ai)
+{
+ adj_midchain_delegate_remove (ai);
+}
+
+static void
+wg_peer_66_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b,
+ const void *data)
+{
+ u8 iph_offset = 0;
+ ip6_header_t *ip6_out;
+ ip6_header_t *ip6_in;
+
+ /* Must set locally originated otherwise we're not allowed to
+ fragment the packet later */
+ b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+
+ ip6_out = vlib_buffer_get_current (b);
+ iph_offset = vnet_buffer (b)->ip.save_rewrite_length;
+ ip6_in = vlib_buffer_get_current (b) + iph_offset;
+
+ ip6_out->ip_version_traffic_class_and_flow_label =
+ ip6_in->ip_version_traffic_class_and_flow_label;
+}
+
+static void
+wg_peer_46_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b,
+ const void *data)
+{
+ u8 iph_offset = 0;
+ ip6_header_t *ip6_out;
+ ip4_header_t *ip4_in;
+
+ /* Must set locally originated otherwise we're not allowed to
+ fragment the packet later */
+ b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+
+ ip6_out = vlib_buffer_get_current (b);
+ iph_offset = vnet_buffer (b)->ip.save_rewrite_length;
+ ip4_in = vlib_buffer_get_current (b) + iph_offset;
+
+ u32 vtcfl = 0x6 << 28;
+ vtcfl |= ip4_in->tos << 20;
+ vtcfl |= vnet_buffer (b)->ip.flow_hash & 0x000fffff;
- adj_midchain_delegate_stack (peer->adj_index, fib_index, &dst);
+ ip6_out->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (vtcfl);
+}
+
+static adj_midchain_fixup_t
+wg_peer_get_fixup (wg_peer_t *peer, vnet_link_t lt)
+{
+ if (!ip46_address_is_ip4 (&peer->dst.addr))
+ {
+ if (lt == VNET_LINK_IP4)
+ return (wg_peer_46_fixup);
+ if (lt == VNET_LINK_IP6)
+ return (wg_peer_66_fixup);
}
+ return (NULL);
+}
+
+static void
+wg_peer_disable (vlib_main_t *vm, wg_peer_t *peer)
+{
+ index_t peeri = peer - wg_peer_pool;
+
+ wg_timers_stop (peer);
+ wg_peer_update_flags (peeri, WG_PEER_ESTABLISHED, false);
+
+ for (int i = 0; i < WG_N_TIMERS; i++)
+ {
+ peer->timers[i] = ~0;
+ peer->timers_dispatched[i] = 0;
+ }
+ peer->timer_handshake_attempts = 0;
+
+ peer->last_sent_handshake = vlib_time_now (vm) - (REKEY_TIMEOUT + 1);
+ peer->last_sent_packet = 0;
+ peer->last_received_packet = 0;
+ peer->session_derived = 0;
+ peer->rehandshake_started = 0;
+
+ peer->new_handshake_interval_tick = 0;
+ peer->rehandshake_interval_tick = 0;
+
+ peer->timer_need_another_keepalive = false;
+
+ noise_remote_clear (vm, &peer->remote);
+}
+
+static void
+wg_peer_enable (vlib_main_t *vm, wg_peer_t *peer)
+{
+ index_t peeri = peer - wg_peer_pool;
+ wg_if_t *wg_if;
+ u8 public_key[NOISE_PUBLIC_KEY_LEN];
+
+ wg_if = wg_if_get (wg_if_find_by_sw_if_index (peer->wg_sw_if_index));
+ clib_memcpy (public_key, peer->remote.r_public, NOISE_PUBLIC_KEY_LEN);
+
+ noise_remote_init (vm, &peer->remote, peeri, public_key, wg_if->local_idx);
+
+ wg_timers_send_first_handshake (peer);
}
walk_rc_t
-wg_peer_if_admin_state_change (wg_if_t * wgi, index_t peeri, void *data)
+wg_peer_if_admin_state_change (index_t peeri, void *data)
{
- wg_peer_adj_stack (wg_peer_get (peeri));
+ wg_peer_t *peer;
+ adj_index_t *adj_index;
+ vlib_main_t *vm = vlib_get_main ();
+
+ peer = wg_peer_get (peeri);
+ vec_foreach (adj_index, peer->adj_indices)
+ {
+ wg_peer_adj_stack (peer, *adj_index);
+ }
+
+ if (vnet_sw_interface_is_admin_up (vnet_get_main (), peer->wg_sw_if_index))
+ {
+ wg_peer_enable (vm, peer);
+ }
+ else
+ {
+ wg_peer_disable (vm, peer);
+ }
return (WALK_CONTINUE);
}
walk_rc_t
-wg_peer_if_table_change (wg_if_t * wgi, index_t peeri, void *data)
+wg_peer_if_adj_change (index_t peeri, void *data)
{
- wg_peer_table_bind_ctx_t *ctx = data;
+ adj_index_t *adj_index = data;
+ adj_midchain_fixup_t fixup;
+ ip_adjacency_t *adj;
wg_peer_t *peer;
+ fib_prefix_t *allowed_ip;
+
+ adj = adj_get (*adj_index);
peer = wg_peer_get (peeri);
+ vec_foreach (allowed_ip, peer->allowed_ips)
+ {
+ if (fib_prefix_is_cover_addr_46 (allowed_ip,
+ &adj->sub_type.nbr.next_hop))
+ {
+ vec_add1 (peer->adj_indices, *adj_index);
+
+ vec_validate_init_empty (wg_peer_by_adj_index, *adj_index,
+ INDEX_INVALID);
+ wg_peer_by_adj_index[*adj_index] = peeri;
+
+ fixup = wg_peer_get_fixup (peer, adj_get_link_type (*adj_index));
+ adj_nbr_midchain_update_rewrite (*adj_index, fixup, NULL,
+ ADJ_FLAG_MIDCHAIN_IP_STACK,
+ vec_dup (peer->rewrite));
+
+ wg_peer_adj_stack (peer, *adj_index);
+ return (WALK_STOP);
+ }
+ }
- wg_peer_fib_flush (peer);
- wg_peer_fib_populate (peer, ctx->new_fib_index);
+ return (WALK_CONTINUE);
+}
+adj_walk_rc_t
+wg_peer_adj_walk (adj_index_t ai, void *data)
+{
+ return wg_peer_if_adj_change ((*(index_t *) (data)), &ai) == WALK_CONTINUE ?
+ ADJ_WALK_RC_CONTINUE :
+ ADJ_WALK_RC_STOP;
+}
+
+walk_rc_t
+wg_peer_if_delete (index_t peeri, void *data)
+{
+ wg_peer_remove (peeri);
return (WALK_CONTINUE);
}
static int
-wg_peer_fill (vlib_main_t * vm, wg_peer_t * peer,
- u32 table_id,
- const ip46_address_t * dst,
- u16 port,
+wg_peer_fill (vlib_main_t *vm, wg_peer_t *peer, u32 table_id,
+ const ip46_address_t *dst, u16 port,
u16 persistent_keepalive_interval,
- const fib_prefix_t * allowed_ips, u32 wg_sw_if_index)
+ const fib_prefix_t *allowed_ips, u32 wg_sw_if_index)
{
+ index_t perri = peer - wg_peer_pool;
wg_peer_endpoint_init (&peer->dst, dst, port);
peer->table_id = table_id;
@@ -219,7 +335,7 @@ wg_peer_fill (vlib_main_t * vm, wg_peer_t * peer,
peer->timer_wheel = &wg_main.timer_wheel;
peer->persistent_keepalive_interval = persistent_keepalive_interval;
peer->last_sent_handshake = vlib_time_now (vm) - (REKEY_TIMEOUT + 1);
- peer->is_dead = false;
+ wg_peer_update_flags (perri, WG_PEER_STATUS_DEAD, false);
const wg_if_t *wgi = wg_if_get (wg_if_find_by_sw_if_index (wg_sw_if_index));
@@ -229,53 +345,102 @@ wg_peer_fill (vlib_main_t * vm, wg_peer_t * peer,
ip_address_to_46 (&wgi->src_ip, &peer->src.addr);
peer->src.port = wgi->port;
- /*
- * and an adjacency for the endpoint address in the overlay
- * on the wg interface
- */
- peer->rewrite = wg_peer_build_rewrite (peer);
-
- peer->adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4,
- VNET_LINK_IP4,
- &peer->dst.addr, wgi->sw_if_index);
-
- vec_validate_init_empty (wg_peer_by_adj_index,
- peer->adj_index, INDEX_INVALID);
- wg_peer_by_adj_index[peer->adj_index] = peer - wg_peer_pool;
-
- adj_nbr_midchain_update_rewrite (peer->adj_index,
- NULL,
- NULL,
- ADJ_FLAG_MIDCHAIN_IP_STACK,
- vec_dup (peer->rewrite));
- wg_peer_adj_stack (peer);
-
- /*
- * add a route in the overlay to each of the allowed-ips
- */
- u32 ii;
+ u8 is_ip4 = ip46_address_is_ip4 (&peer->dst.addr);
+ peer->rewrite = wg_build_rewrite (&peer->src.addr, peer->src.port,
+ &peer->dst.addr, peer->dst.port, is_ip4);
+ u32 ii;
vec_validate (peer->allowed_ips, vec_len (allowed_ips) - 1);
-
vec_foreach_index (ii, allowed_ips)
{
- peer->allowed_ips[ii].prefix = allowed_ips[ii];
+ peer->allowed_ips[ii] = allowed_ips[ii];
}
- wg_peer_fib_populate (peer,
- fib_table_get_index_for_sw_if_index
- (FIB_PROTOCOL_IP4, peer->wg_sw_if_index));
-
+ fib_protocol_t proto;
+ FOR_EACH_FIB_IP_PROTOCOL (proto)
+ {
+ adj_nbr_walk (wg_sw_if_index, proto, wg_peer_adj_walk, &perri);
+ }
return (0);
}
+void
+wg_peer_update_flags (index_t peeri, wg_peer_flags flag, bool add_del)
+{
+ wg_peer_t *peer = wg_peer_get (peeri);
+ if ((add_del && (peer->flags & flag)) || (!add_del && !(peer->flags & flag)))
+ {
+ return;
+ }
+
+ peer->flags ^= flag;
+ wg_api_peer_event (peeri, peer->flags);
+}
+
+void
+wg_peer_update_endpoint (index_t peeri, const ip46_address_t *addr, u16 port)
+{
+ wg_peer_t *peer = wg_peer_get (peeri);
+
+ if (ip46_address_is_equal (&peer->dst.addr, addr) && peer->dst.port == port)
+ return;
+
+ wg_peer_endpoint_init (&peer->dst, addr, port);
+
+ u8 is_ip4 = ip46_address_is_ip4 (&peer->dst.addr);
+ vec_free (peer->rewrite);
+ peer->rewrite = wg_build_rewrite (&peer->src.addr, peer->src.port,
+ &peer->dst.addr, peer->dst.port, is_ip4);
+
+ adj_index_t *adj_index;
+ vec_foreach (adj_index, peer->adj_indices)
+ {
+ if (adj_is_valid (*adj_index))
+ {
+ adj_midchain_fixup_t fixup =
+ wg_peer_get_fixup (peer, adj_get_link_type (*adj_index));
+ adj_nbr_midchain_update_rewrite (*adj_index, fixup, NULL,
+ ADJ_FLAG_MIDCHAIN_IP_STACK,
+ vec_dup (peer->rewrite));
+
+ wg_peer_adj_reset_stacking (*adj_index);
+ wg_peer_adj_stack (peer, *adj_index);
+ }
+ }
+}
+
+typedef struct wg_peer_upd_ep_args_t_
+{
+ index_t peeri;
+ ip46_address_t addr;
+ u16 port;
+} wg_peer_upd_ep_args_t;
+
+static void
+wg_peer_update_endpoint_thread_fn (wg_peer_upd_ep_args_t *args)
+{
+ wg_peer_update_endpoint (args->peeri, &args->addr, args->port);
+}
+
+void
+wg_peer_update_endpoint_from_mt (index_t peeri, const ip46_address_t *addr,
+ u16 port)
+{
+ wg_peer_upd_ep_args_t args = {
+ .peeri = peeri,
+ .port = port,
+ };
+
+ ip46_address_copy (&args.addr, addr);
+ vlib_rpc_call_main_thread (wg_peer_update_endpoint_thread_fn, (u8 *) &args,
+ sizeof (args));
+}
+
int
-wg_peer_add (u32 tun_sw_if_index,
- const u8 public_key[NOISE_PUBLIC_KEY_LEN],
- u32 table_id,
- const ip46_address_t * endpoint,
- const fib_prefix_t * allowed_ips,
- u16 port, u16 persistent_keepalive, u32 * peer_index)
+wg_peer_add (u32 tun_sw_if_index, const u8 public_key[NOISE_PUBLIC_KEY_LEN],
+ u32 table_id, const ip46_address_t *endpoint,
+ const fib_prefix_t *allowed_ips, u16 port,
+ u16 persistent_keepalive, u32 *peer_index)
{
wg_if_t *wg_if;
wg_peer_t *peer;
@@ -290,7 +455,6 @@ wg_peer_add (u32 tun_sw_if_index,
if (!wg_if)
return (VNET_API_ERROR_INVALID_SW_IF_INDEX);
- /* *INDENT-OFF* */
pool_foreach (peer, wg_peer_pool)
{
if (!memcmp (peer->remote.r_public, public_key, NOISE_PUBLIC_KEY_LEN))
@@ -298,12 +462,11 @@ wg_peer_add (u32 tun_sw_if_index,
return (VNET_API_ERROR_ENTRY_ALREADY_EXISTS);
}
}
- /* *INDENT-ON* */
if (pool_elts (wg_peer_pool) > MAX_PEERS)
return (VNET_API_ERROR_LIMIT_EXCEEDED);
- pool_get (wg_peer_pool, peer);
+ pool_get_zero (wg_peer_pool, peer);
wg_peer_init (vm, peer);
@@ -317,13 +480,13 @@ wg_peer_add (u32 tun_sw_if_index,
return (rv);
}
- noise_remote_init (&peer->remote, peer - wg_peer_pool, public_key,
+ noise_remote_init (vm, &peer->remote, peer - wg_peer_pool, public_key,
wg_if->local_idx);
cookie_maker_init (&peer->cookie_maker, public_key);
- if (peer->persistent_keepalive_interval != 0)
+ if (vnet_sw_interface_is_admin_up (vnet_get_main (), tun_sw_if_index))
{
- wg_send_keepalive (vm, peer);
+ wg_timers_send_first_handshake (peer);
}
*peer_index = peer - wg_peer_pool;
@@ -347,9 +510,6 @@ wg_peer_remove (index_t peeri)
wgi = wg_if_get (wg_if_find_by_sw_if_index (peer->wg_sw_if_index));
wg_if_peer_remove (wgi, peeri);
- vnet_feature_enable_disable ("ip4-output", "wg-output-tun",
- peer->wg_sw_if_index, 0, 0, 0);
-
noise_remote_clear (wmp->vlib_main, &peer->remote);
wg_peer_clear (wmp->vlib_main, peer);
pool_put (wg_peer_pool, peer);
@@ -362,13 +522,11 @@ wg_peer_walk (wg_peer_walk_cb_t fn, void *data)
{
index_t peeri;
- /* *INDENT-OFF* */
pool_foreach_index (peeri, wg_peer_pool)
{
if (WALK_STOP == fn(peeri, data))
return peeri;
}
- /* *INDENT-ON* */
return INDEX_INVALID;
}
@@ -377,8 +535,8 @@ format_wg_peer_endpoint (u8 * s, va_list * args)
{
wg_peer_endpoint_t *ep = va_arg (*args, wg_peer_endpoint_t *);
- s = format (s, "%U:%d",
- format_ip46_address, &ep->addr, IP46_TYPE_ANY, ep->port);
+ s = format (s, "%U:%d", format_ip46_address, &ep->addr, IP46_TYPE_ANY,
+ ep->port);
return (s);
}
@@ -387,48 +545,37 @@ u8 *
format_wg_peer (u8 * s, va_list * va)
{
index_t peeri = va_arg (*va, index_t);
- wg_peer_allowed_ip_t *allowed_ip;
+ fib_prefix_t *allowed_ip;
+ adj_index_t *adj_index;
u8 key[NOISE_KEY_LEN_BASE64];
wg_peer_t *peer;
peer = wg_peer_get (peeri);
key_to_base64 (peer->remote.r_public, NOISE_PUBLIC_KEY_LEN, key);
- s = format (s, "[%d] endpoint:[%U->%U] %U keep-alive:%d adj:%d",
- peeri,
- format_wg_peer_endpoint, &peer->src,
- format_wg_peer_endpoint, &peer->dst,
- format_vnet_sw_if_index_name, vnet_get_main (),
- peer->wg_sw_if_index,
- peer->persistent_keepalive_interval, peer->adj_index);
- s = format (s, "\n key:%=s %U",
- key, format_hex_bytes, peer->remote.r_public,
- NOISE_PUBLIC_KEY_LEN);
+ s = format (
+ s,
+ "[%d] endpoint:[%U->%U] %U keep-alive:%d flags: %d, api-clients count: %d",
+ peeri, format_wg_peer_endpoint, &peer->src, format_wg_peer_endpoint,
+ &peer->dst, format_vnet_sw_if_index_name, vnet_get_main (),
+ peer->wg_sw_if_index, peer->persistent_keepalive_interval, peer->flags,
+ pool_elts (peer->api_clients));
+ s = format (s, "\n adj:");
+ vec_foreach (adj_index, peer->adj_indices)
+ {
+ s = format (s, " %d", *adj_index);
+ }
+ s = format (s, "\n key:%=s %U", key, format_hex_bytes,
+ peer->remote.r_public, NOISE_PUBLIC_KEY_LEN);
s = format (s, "\n allowed-ips:");
vec_foreach (allowed_ip, peer->allowed_ips)
{
- s = format (s, " %U", format_fib_prefix, &allowed_ip->prefix);
+ s = format (s, " %U", format_fib_prefix, allowed_ip);
}
return s;
}
-static clib_error_t *
-wg_peer_module_init (vlib_main_t * vm)
-{
- /*
- * use a priority better than interface source, so that
- * if the same subnet is added to the wg interface and is
- * used as an allowed IP, then the wireguard soueced prefix
- * wins and traffic is routed to the endpoint rather than dropped
- */
- wg_fib_source = fib_source_allocate ("wireguard", 0x2, FIB_SOURCE_BH_API);
-
- return (NULL);
-}
-
-VLIB_INIT_FUNCTION (wg_peer_module_init);
-
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/wireguard/wireguard_peer.h b/src/plugins/wireguard/wireguard_peer.h
index b60c669ac0f..613c2640ad1 100644
--- a/src/plugins/wireguard/wireguard_peer.h
+++ b/src/plugins/wireguard/wireguard_peer.h
@@ -17,6 +17,8 @@
#ifndef __included_wg_peer_h__
#define __included_wg_peer_h__
+#include <vlibapi/api_helper_macros.h>
+
#include <vnet/ip/ip.h>
#include <wireguard/wireguard_cookie.h>
@@ -31,13 +33,28 @@ typedef struct ip4_udp_header_t_
udp_header_t udp;
} __clib_packed ip4_udp_header_t;
-u8 *format_ip4_udp_header (u8 * s, va_list * va);
+typedef struct ip4_udp_wg_header_t_
+{
+ ip4_header_t ip4;
+ udp_header_t udp;
+ message_data_t wg;
+} __clib_packed ip4_udp_wg_header_t;
+
+typedef struct ip6_udp_header_t_
+{
+ ip6_header_t ip6;
+ udp_header_t udp;
+} __clib_packed ip6_udp_header_t;
-typedef struct wg_peer_allowed_ip_t_
+typedef struct ip6_udp_wg_header_t_
{
- fib_prefix_t prefix;
- fib_node_index_t fib_entry_index;
-} wg_peer_allowed_ip_t;
+ ip6_header_t ip6;
+ udp_header_t udp;
+ message_data_t wg;
+} __clib_packed ip6_udp_wg_header_t;
+
+u8 *format_ip4_udp_header (u8 * s, va_list * va);
+u8 *format_ip6_udp_header (u8 *s, va_list *va);
typedef struct wg_peer_endpoint_t_
{
@@ -45,6 +62,12 @@ typedef struct wg_peer_endpoint_t_
u16 port;
} wg_peer_endpoint_t;
+typedef enum
+{
+ WG_PEER_STATUS_DEAD = 0x1,
+ WG_PEER_ESTABLISHED = 0x2,
+} wg_peer_flags;
+
typedef struct wg_peer
{
noise_remote_t remote;
@@ -57,17 +80,22 @@ typedef struct wg_peer
wg_peer_endpoint_t dst;
wg_peer_endpoint_t src;
u32 table_id;
- adj_index_t adj_index;
+ adj_index_t *adj_indices;
/* rewrite built from address information */
u8 *rewrite;
/* Vector of allowed-ips */
- wg_peer_allowed_ip_t *allowed_ips;
+ fib_prefix_t *allowed_ips;
/* The WG interface this peer is attached to */
u32 wg_sw_if_index;
+ /* API client registered for events */
+ vpe_client_registration_t *api_clients;
+ uword *api_client_by_client_index;
+ wg_peer_flags flags;
+
/* Timers */
tw_timer_wheel_16t_2w_512sl_t *timer_wheel;
u32 timers[WG_N_TIMERS];
@@ -88,7 +116,8 @@ typedef struct wg_peer
bool timer_need_another_keepalive;
- bool is_dead;
+ /* Handshake is sent to main thread? */
+ bool handshake_is_sent;
} wg_peer_t;
typedef struct wg_peer_table_bind_ctx_t_
@@ -111,9 +140,23 @@ index_t wg_peer_walk (wg_peer_walk_cb_t fn, void *data);
u8 *format_wg_peer (u8 * s, va_list * va);
-walk_rc_t wg_peer_if_admin_state_change (wg_if_t * wgi, index_t peeri,
- void *data);
-walk_rc_t wg_peer_if_table_change (wg_if_t * wgi, index_t peeri, void *data);
+walk_rc_t wg_peer_if_admin_state_change (index_t peeri, void *data);
+walk_rc_t wg_peer_if_delete (index_t peeri, void *data);
+walk_rc_t wg_peer_if_adj_change (index_t peeri, void *data);
+adj_walk_rc_t wg_peer_adj_walk (adj_index_t ai, void *data);
+
+void wg_api_peer_event (index_t peeri, wg_peer_flags flags);
+void wg_peer_update_flags (index_t peeri, wg_peer_flags flag, bool add_del);
+void wg_peer_update_endpoint (index_t peeri, const ip46_address_t *addr,
+ u16 port);
+void wg_peer_update_endpoint_from_mt (index_t peeri,
+ const ip46_address_t *addr, u16 port);
+
+static inline bool
+wg_peer_is_dead (wg_peer_t *peer)
+{
+ return peer && peer->flags & WG_PEER_STATUS_DEAD;
+}
/*
* Expoed for the data-plane
@@ -130,6 +173,8 @@ wg_peer_get (index_t peeri)
static inline index_t
wg_peer_get_by_adj_index (index_t ai)
{
+ if (ai >= vec_len (wg_peer_by_adj_index))
+ return INDEX_INVALID;
return (wg_peer_by_adj_index[ai]);
}
@@ -145,6 +190,29 @@ wg_peer_assign_thread (u32 thread_id)
1) : thread_id));
}
+static_always_inline bool
+fib_prefix_is_cover_addr_46 (const fib_prefix_t *p1, const ip46_address_t *ip)
+{
+ switch (p1->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ return (ip4_destination_matches_route (&ip4_main, &p1->fp_addr.ip4,
+ &ip->ip4, p1->fp_len) != 0);
+ case FIB_PROTOCOL_IP6:
+ return (ip6_destination_matches_route (&ip6_main, &p1->fp_addr.ip6,
+ &ip->ip6, p1->fp_len) != 0);
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+ return (false);
+}
+
+static inline bool
+wg_peer_can_send (wg_peer_t *peer)
+{
+ return peer && peer->rewrite;
+}
+
#endif // __included_wg_peer_h__
/*
diff --git a/src/plugins/wireguard/wireguard_send.c b/src/plugins/wireguard/wireguard_send.c
index f492e05c175..41b2e7706a1 100755..100644
--- a/src/plugins/wireguard/wireguard_send.c
+++ b/src/plugins/wireguard/wireguard_send.c
@@ -22,11 +22,11 @@
#include <wireguard/wireguard_send.h>
static int
-ip46_enqueue_packet (vlib_main_t * vm, u32 bi0, int is_ip6)
+ip46_enqueue_packet (vlib_main_t *vm, u32 bi0, int is_ip4)
{
vlib_frame_t *f = 0;
u32 lookup_node_index =
- is_ip6 ? ip6_lookup_node.index : ip4_lookup_node.index;
+ is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
f = vlib_get_frame_to_node (vm, lookup_node_index);
/* f can not be NULL here - frame allocation failure causes panic */
@@ -41,25 +41,51 @@ ip46_enqueue_packet (vlib_main_t * vm, u32 bi0, int is_ip6)
}
static void
-wg_buffer_prepend_rewrite (vlib_buffer_t * b0, const wg_peer_t * peer)
+wg_buffer_prepend_rewrite (vlib_main_t *vm, vlib_buffer_t *b0,
+ const u8 *rewrite, u8 is_ip4)
{
- ip4_udp_header_t *hdr;
+ if (is_ip4)
+ {
+ ip4_udp_header_t *hdr4;
+
+ vlib_buffer_advance (b0, -sizeof (*hdr4));
+
+ hdr4 = vlib_buffer_get_current (b0);
+
+ /* copy only ip4 and udp header; wireguard header not needed */
+ clib_memcpy (hdr4, rewrite, sizeof (ip4_udp_header_t));
+
+ hdr4->udp.length =
+ clib_host_to_net_u16 (b0->current_length - sizeof (ip4_header_t));
+ ip4_header_set_len_w_chksum (&hdr4->ip4,
+ clib_host_to_net_u16 (b0->current_length));
+ }
+ else
+ {
+ ip6_udp_header_t *hdr6;
+
+ vlib_buffer_advance (b0, -sizeof (*hdr6));
- vlib_buffer_advance (b0, -sizeof (*hdr));
+ hdr6 = vlib_buffer_get_current (b0);
- hdr = vlib_buffer_get_current (b0);
- clib_memcpy (hdr, peer->rewrite, vec_len (peer->rewrite));
+ /* copy only ip6 and udp header; wireguard header not needed */
+ clib_memcpy (hdr6, rewrite, sizeof (ip6_udp_header_t));
- hdr->udp.length =
- clib_host_to_net_u16 (b0->current_length - sizeof (ip4_header_t));
- ip4_header_set_len_w_chksum (&hdr->ip4,
- clib_host_to_net_u16 (b0->current_length));
+ hdr6->ip6.payload_length = hdr6->udp.length =
+ clib_host_to_net_u16 (b0->current_length - sizeof (ip6_header_t));
+
+ /* IPv6 UDP checksum is mandatory */
+ int bogus = 0;
+ ip6_header_t *ip6_0 = &(hdr6->ip6);
+ hdr6->udp.checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip6_0, &bogus);
+ ASSERT (bogus == 0);
+ }
}
static bool
-wg_create_buffer (vlib_main_t * vm,
- const wg_peer_t * peer,
- const u8 * packet, u32 packet_len, u32 * bi)
+wg_create_buffer (vlib_main_t *vm, const u8 *rewrite, const u8 *packet,
+ u32 packet_len, u32 *bi, u8 is_ip4)
{
u32 n_buf0 = 0;
vlib_buffer_t *b0;
@@ -75,23 +101,75 @@ wg_create_buffer (vlib_main_t * vm,
b0->current_length = packet_len;
- wg_buffer_prepend_rewrite (b0, peer);
+ wg_buffer_prepend_rewrite (vm, b0, rewrite, is_ip4);
return true;
}
+u8 *
+wg_build_rewrite (ip46_address_t *src_addr, u16 src_port,
+ ip46_address_t *dst_addr, u16 dst_port, u8 is_ip4)
+{
+ if (ip46_address_is_zero (dst_addr) || 0 == dst_port)
+ return NULL;
+
+ u8 *rewrite = NULL;
+ if (is_ip4)
+ {
+ ip4_udp_header_t *hdr;
+
+ /* reserve space for ip4, udp and wireguard headers */
+ vec_validate (rewrite, sizeof (ip4_udp_wg_header_t) - 1);
+ hdr = (ip4_udp_header_t *) rewrite;
+
+ hdr->ip4.ip_version_and_header_length = 0x45;
+ hdr->ip4.ttl = 64;
+ hdr->ip4.src_address = src_addr->ip4;
+ hdr->ip4.dst_address = dst_addr->ip4;
+ hdr->ip4.protocol = IP_PROTOCOL_UDP;
+ hdr->ip4.checksum = ip4_header_checksum (&hdr->ip4);
+
+ hdr->udp.src_port = clib_host_to_net_u16 (src_port);
+ hdr->udp.dst_port = clib_host_to_net_u16 (dst_port);
+ hdr->udp.checksum = 0;
+ }
+ else
+ {
+ ip6_udp_header_t *hdr;
+
+ /* reserve space for ip6, udp and wireguard headers */
+ vec_validate (rewrite, sizeof (ip6_udp_wg_header_t) - 1);
+ hdr = (ip6_udp_header_t *) rewrite;
+
+ hdr->ip6.ip_version_traffic_class_and_flow_label = 0x60;
+ ip6_address_copy (&hdr->ip6.src_address, &src_addr->ip6);
+ ip6_address_copy (&hdr->ip6.dst_address, &dst_addr->ip6);
+ hdr->ip6.protocol = IP_PROTOCOL_UDP;
+ hdr->ip6.hop_limit = 64;
+
+ hdr->udp.src_port = clib_host_to_net_u16 (src_port);
+ hdr->udp.dst_port = clib_host_to_net_u16 (dst_port);
+ hdr->udp.checksum = 0;
+ }
+
+ return (rewrite);
+}
+
bool
wg_send_handshake (vlib_main_t * vm, wg_peer_t * peer, bool is_retry)
{
ASSERT (vm->thread_index == 0);
+ if (!wg_peer_can_send (peer))
+ return false;
+
message_handshake_initiation_t packet;
if (!is_retry)
peer->timer_handshake_attempts = 0;
- if (!wg_birthdate_has_expired (peer->last_sent_handshake,
- REKEY_TIMEOUT) || peer->is_dead)
+ if (!wg_birthdate_has_expired (peer->last_sent_handshake, REKEY_TIMEOUT) ||
+ wg_peer_is_dead (peer))
return true;
if (noise_create_initiation (vm,
@@ -113,11 +191,13 @@ wg_send_handshake (vlib_main_t * vm, wg_peer_t * peer, bool is_retry)
else
return false;
+ u8 is_ip4 = ip46_address_is_ip4 (&peer->dst.addr);
u32 bi0 = 0;
- if (!wg_create_buffer (vm, peer, (u8 *) & packet, sizeof (packet), &bi0))
+ if (!wg_create_buffer (vm, peer->rewrite, (u8 *) &packet, sizeof (packet),
+ &bi0, is_ip4))
return false;
- ip46_enqueue_packet (vm, bi0, false);
+ ip46_enqueue_packet (vm, bi0, is_ip4);
return true;
}
@@ -134,8 +214,11 @@ wg_send_handshake_thread_fn (void *arg)
wg_main_t *wmp = &wg_main;
wg_peer_t *peer = wg_peer_get (a->peer_idx);
+ bool handshake;
wg_send_handshake (wmp->vlib_main, peer, a->is_retry);
+ handshake = false;
+ __atomic_store_n (&peer->handshake_is_sent, handshake, __ATOMIC_RELEASE);
return 0;
}
@@ -147,8 +230,18 @@ wg_send_handshake_from_mt (u32 peer_idx, bool is_retry)
.is_retry = is_retry,
};
- vl_api_rpc_call_main_thread (wg_send_handshake_thread_fn,
- (u8 *) & a, sizeof (a));
+ wg_peer_t *peer = wg_peer_get (peer_idx);
+
+ bool handshake =
+ __atomic_load_n (&peer->handshake_is_sent, __ATOMIC_ACQUIRE);
+
+ if (handshake == false)
+ {
+ handshake = true;
+ __atomic_store_n (&peer->handshake_is_sent, handshake, __ATOMIC_RELEASE);
+ vl_api_rpc_call_main_thread (wg_send_handshake_thread_fn, (u8 *) &a,
+ sizeof (a));
+ }
}
bool
@@ -156,6 +249,9 @@ wg_send_keepalive (vlib_main_t * vm, wg_peer_t * peer)
{
ASSERT (vm->thread_index == 0);
+ if (!wg_peer_can_send (peer))
+ return false;
+
u32 size_of_packet = message_data_len (0);
message_data_t *packet =
(message_data_t *) wg_main.per_thread_data[vm->thread_index].data;
@@ -181,19 +277,22 @@ wg_send_keepalive (vlib_main_t * vm, wg_peer_t * peer)
}
else if (PREDICT_FALSE (state == SC_FAILED))
{
+ wg_peer_update_flags (peer - wg_peer_pool, WG_PEER_ESTABLISHED, false);
ret = false;
goto out;
}
+ u8 is_ip4 = ip46_address_is_ip4 (&peer->dst.addr);
packet->header.type = MESSAGE_DATA;
- if (!wg_create_buffer (vm, peer, (u8 *) packet, size_of_packet, &bi0))
+ if (!wg_create_buffer (vm, peer->rewrite, (u8 *) packet, size_of_packet,
+ &bi0, is_ip4))
{
ret = false;
goto out;
}
- ip46_enqueue_packet (vm, bi0, false);
+ ip46_enqueue_packet (vm, bi0, is_ip4);
wg_timers_any_authenticated_packet_sent (peer);
wg_timers_any_authenticated_packet_traversal (peer);
@@ -207,6 +306,9 @@ wg_send_handshake_response (vlib_main_t * vm, wg_peer_t * peer)
{
message_handshake_response_t packet;
+ if (!wg_peer_can_send (peer))
+ return false;
+
if (noise_create_response (vm,
&peer->remote,
&packet.sender_index,
@@ -223,20 +325,52 @@ wg_send_handshake_response (vlib_main_t * vm, wg_peer_t * peer)
wg_timers_session_derived (peer);
wg_timers_any_authenticated_packet_sent (peer);
wg_timers_any_authenticated_packet_traversal (peer);
- peer->last_sent_handshake = vlib_time_now (vm);
u32 bi0 = 0;
- if (!wg_create_buffer (vm, peer, (u8 *) & packet,
- sizeof (packet), &bi0))
+ u8 is_ip4 = ip46_address_is_ip4 (&peer->dst.addr);
+ if (!wg_create_buffer (vm, peer->rewrite, (u8 *) &packet,
+ sizeof (packet), &bi0, is_ip4))
return false;
- ip46_enqueue_packet (vm, bi0, false);
+ ip46_enqueue_packet (vm, bi0, is_ip4);
+ return true;
}
- else
- return false;
+ return false;
}
- else
+ return false;
+}
+
+bool
+wg_send_handshake_cookie (vlib_main_t *vm, u32 sender_index,
+ cookie_checker_t *cookie_checker,
+ message_macs_t *macs, ip46_address_t *wg_if_addr,
+ u16 wg_if_port, ip46_address_t *remote_addr,
+ u16 remote_port)
+{
+ message_handshake_cookie_t packet;
+ u8 *rewrite;
+
+ packet.header.type = MESSAGE_HANDSHAKE_COOKIE;
+ packet.receiver_index = sender_index;
+
+ cookie_checker_create_payload (vm, cookie_checker, macs, packet.nonce,
+ packet.encrypted_cookie, remote_addr,
+ remote_port);
+
+ u32 bi0 = 0;
+ u8 is_ip4 = ip46_address_is_ip4 (remote_addr);
+ bool ret;
+ rewrite = wg_build_rewrite (wg_if_addr, wg_if_port, remote_addr, remote_port,
+ is_ip4);
+
+ ret = wg_create_buffer (vm, rewrite, (u8 *) &packet, sizeof (packet), &bi0,
+ is_ip4);
+ vec_free (rewrite);
+ if (!ret)
return false;
+
+ ip46_enqueue_packet (vm, bi0, is_ip4);
+
return true;
}
diff --git a/src/plugins/wireguard/wireguard_send.h b/src/plugins/wireguard/wireguard_send.h
index 9575b84b659..419783a5db2 100755..100644
--- a/src/plugins/wireguard/wireguard_send.h
+++ b/src/plugins/wireguard/wireguard_send.h
@@ -19,10 +19,17 @@
#include <wireguard/wireguard_peer.h>
+u8 *wg_build_rewrite (ip46_address_t *src_addr, u16 src_port,
+ ip46_address_t *dst_addr, u16 dst_port, u8 is_ip4);
bool wg_send_keepalive (vlib_main_t * vm, wg_peer_t * peer);
bool wg_send_handshake (vlib_main_t * vm, wg_peer_t * peer, bool is_retry);
void wg_send_handshake_from_mt (u32 peer_index, bool is_retry);
bool wg_send_handshake_response (vlib_main_t * vm, wg_peer_t * peer);
+bool wg_send_handshake_cookie (vlib_main_t *vm, u32 sender_index,
+ cookie_checker_t *cookie_checker,
+ message_macs_t *macs,
+ ip46_address_t *wg_if_addr, u16 wg_if_port,
+ ip46_address_t *remote_addr, u16 remote_port);
always_inline void
ip4_header_set_len_w_chksum (ip4_header_t * ip4, u16 len)
diff --git a/src/plugins/wireguard/wireguard_timer.c b/src/plugins/wireguard/wireguard_timer.c
index b245b853fb5..237e67c1f06 100644
--- a/src/plugins/wireguard/wireguard_timer.c
+++ b/src/plugins/wireguard/wireguard_timer.c
@@ -26,6 +26,13 @@ get_random_u32_max (u32 max)
return random_u32 (&seed) % max;
}
+static u32
+get_random_u32_max_opt (u32 max, f64 time)
+{
+ u32 seed = (u32) (time * 1e6);
+ return random_u32 (&seed) % max;
+}
+
static void
stop_timer (wg_peer_t * peer, u32 timer_id)
{
@@ -66,7 +73,7 @@ start_timer_thread_fn (void *arg)
return 0;
}
-static void
+static_always_inline void
start_timer_from_mt (u32 peer_idx, u32 timer_id, u32 interval_ticks)
{
wg_timers_args a = {
@@ -191,14 +198,14 @@ wg_expired_zero_key_material (vlib_main_t * vm, wg_peer_t * peer)
return;
}
- if (!peer->is_dead)
+ if (!wg_peer_is_dead (peer))
{
noise_remote_clear (vm, &peer->remote);
}
}
-void
-wg_timers_any_authenticated_packet_traversal (wg_peer_t * peer)
+inline void
+wg_timers_any_authenticated_packet_traversal (wg_peer_t *peer)
{
if (peer->persistent_keepalive_interval)
{
@@ -214,6 +221,12 @@ wg_timers_any_authenticated_packet_sent (wg_peer_t * peer)
peer->last_sent_packet = vlib_time_now (vlib_get_main ());
}
+inline void
+wg_timers_any_authenticated_packet_sent_opt (wg_peer_t *peer, f64 time)
+{
+ peer->last_sent_packet = time;
+}
+
void
wg_timers_handshake_initiated (wg_peer_t * peer)
{
@@ -226,6 +239,16 @@ wg_timers_handshake_initiated (wg_peer_t * peer)
}
void
+wg_timers_send_first_handshake (wg_peer_t *peer)
+{
+ // zero value is not allowed
+ peer->new_handshake_interval_tick =
+ get_random_u32_max (REKEY_TIMEOUT_JITTER) + 1;
+ start_timer_from_mt (peer - wg_peer_pool, WG_TIMER_NEW_HANDSHAKE,
+ peer->new_handshake_interval_tick);
+}
+
+void
wg_timers_session_derived (wg_peer_t * peer)
{
peer->session_derived = vlib_time_now (vlib_get_main ());
@@ -246,6 +269,17 @@ wg_timers_data_sent (wg_peer_t * peer)
peer->new_handshake_interval_tick);
}
+inline void
+wg_timers_data_sent_opt (wg_peer_t *peer, f64 time)
+{
+ peer->new_handshake_interval_tick =
+ (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) * WHZ +
+ get_random_u32_max_opt (REKEY_TIMEOUT_JITTER, time);
+
+ start_timer_from_mt (peer - wg_peer_pool, WG_TIMER_NEW_HANDSHAKE,
+ peer->new_handshake_interval_tick);
+}
+
/* Should be called after an authenticated data packet is received. */
void
wg_timers_data_received (wg_peer_t * peer)
@@ -275,6 +309,12 @@ wg_timers_any_authenticated_packet_received (wg_peer_t * peer)
peer->last_received_packet = vlib_time_now (vlib_get_main ());
}
+inline void
+wg_timers_any_authenticated_packet_received_opt (wg_peer_t *peer, f64 time)
+{
+ peer->last_received_packet = time;
+}
+
static vlib_node_registration_t wg_timer_mngr_node;
static void
@@ -394,14 +434,12 @@ wg_timers_stop (wg_peer_t * peer)
}
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (wg_timer_mngr_node, static) = {
.function = wg_timer_mngr_fn,
.type = VLIB_NODE_TYPE_PROCESS,
.name =
"wg-timer-manager",
};
-/* *INDENT-ON* */
void
wg_feature_init (wg_main_t * wmp)
diff --git a/src/plugins/wireguard/wireguard_timer.h b/src/plugins/wireguard/wireguard_timer.h
index 6b59a39f815..47638bfd74d 100755..100644
--- a/src/plugins/wireguard/wireguard_timer.h
+++ b/src/plugins/wireguard/wireguard_timer.h
@@ -41,11 +41,16 @@ typedef struct wg_peer wg_peer_t;
void wg_timer_wheel_init ();
void wg_timers_stop (wg_peer_t * peer);
void wg_timers_data_sent (wg_peer_t * peer);
+void wg_timers_data_sent_opt (wg_peer_t *peer, f64 time);
void wg_timers_data_received (wg_peer_t * peer);
void wg_timers_any_authenticated_packet_sent (wg_peer_t * peer);
+void wg_timers_any_authenticated_packet_sent_opt (wg_peer_t *peer, f64 time);
void wg_timers_any_authenticated_packet_received (wg_peer_t * peer);
+void wg_timers_any_authenticated_packet_received_opt (wg_peer_t *peer,
+ f64 time);
void wg_timers_handshake_initiated (wg_peer_t * peer);
void wg_timers_handshake_complete (wg_peer_t * peer);
+void wg_timers_send_first_handshake (wg_peer_t *peer);
void wg_timers_session_derived (wg_peer_t * peer);
void wg_timers_any_authenticated_packet_traversal (wg_peer_t * peer);
@@ -53,10 +58,19 @@ void wg_timers_any_authenticated_packet_traversal (wg_peer_t * peer);
static inline bool
wg_birthdate_has_expired (f64 birthday_seconds, f64 expiration_seconds)
{
+ if (birthday_seconds == 0.0)
+ return true;
f64 now_seconds = vlib_time_now (vlib_get_main ());
return (birthday_seconds + expiration_seconds) < now_seconds;
}
+static_always_inline bool
+wg_birthdate_has_expired_opt (f64 birthday_seconds, f64 expiration_seconds,
+ f64 time)
+{
+ return (birthday_seconds + expiration_seconds) < time;
+}
+
#endif /* __included_wg_timer_h__ */
/*
diff --git a/src/scripts/fts.py b/src/scripts/fts.py
index b579190e433..e8ff477c788 100755
--- a/src/scripts/fts.py
+++ b/src/scripts/fts.py
@@ -2,6 +2,7 @@
import sys
import os
+import os.path
import ipaddress
import yaml
from pprint import pprint
@@ -10,6 +11,7 @@ from jsonschema import validate, exceptions
import argparse
from subprocess import run, PIPE
from io import StringIO
+import urllib.parse
# VPP feature JSON schema
schema = {
@@ -19,25 +21,28 @@ schema = {
"name": {"type": "string"},
"description": {"type": "string"},
"maintainer": {"$ref": "#/definitions/maintainers"},
- "state": {"type": "string",
- "enum": ["production", "experimental", "development"]},
+ "state": {
+ "type": "string",
+ "enum": ["production", "experimental", "development"],
+ },
"features": {"$ref": "#/definitions/features"},
"missing": {"$ref": "#/definitions/features"},
- "properties": {"type": "array",
- "items": {"type": "string",
- "enum": ["API", "CLI", "STATS",
- "MULTITHREAD"]},
- },
+ "properties": {
+ "type": "array",
+ "items": {"type": "string", "enum": ["API", "CLI", "STATS", "MULTITHREAD"]},
+ },
},
"additionalProperties": False,
"definitions": {
"maintainers": {
- "anyof": [{
- "type": "array",
- "items": {"type": "string"},
- "minItems": 1,
- },
- {"type": "string"}],
+ "anyof": [
+ {
+ "type": "array",
+ "items": {"type": "string"},
+ "minItems": 1,
+ },
+ {"type": "string"},
+ ],
},
"featureobject": {
"type": "object",
@@ -47,23 +52,28 @@ schema = {
},
"features": {
"type": "array",
- "items": {"anyOf": [{"$ref": "#/definitions/featureobject"},
- {"type": "string"},
- ]},
+ "items": {
+ "anyOf": [
+ {"$ref": "#/definitions/featureobject"},
+ {"type": "string"},
+ ]
+ },
"minItems": 1,
},
},
}
+DEFAULT_REPO_LINK = "https://github.com/FDio/vpp/blob/master/"
+
def filelist_from_git_status():
filelist = []
- git_status = 'git status --porcelain */FEATURE*.yaml'
+ git_status = "git status --porcelain */FEATURE*.yaml"
rv = run(git_status.split(), stdout=PIPE, stderr=PIPE)
if rv.returncode != 0:
sys.exit(rv.returncode)
- for l in rv.stdout.decode('ascii').split('\n'):
+ for l in rv.stdout.decode("ascii").split("\n"):
if len(l):
filelist.append(l.split()[1])
return filelist
@@ -71,24 +81,26 @@ def filelist_from_git_status():
def filelist_from_git_ls():
filelist = []
- git_ls = 'git ls-files :(top)*/FEATURE*.yaml'
+ git_ls = "git ls-files :(top)*/FEATURE*.yaml"
rv = run(git_ls.split(), stdout=PIPE, stderr=PIPE)
if rv.returncode != 0:
sys.exit(rv.returncode)
- for l in rv.stdout.decode('ascii').split('\n'):
+ for l in rv.stdout.decode("ascii").split("\n"):
if len(l):
filelist.append(l)
return filelist
+
def version_from_git():
- git_describe = 'git describe'
+ git_describe = "git describe"
rv = run(git_describe.split(), stdout=PIPE, stderr=PIPE)
if rv.returncode != 0:
sys.exit(rv.returncode)
- return rv.stdout.decode('ascii').split('\n')[0]
+ return rv.stdout.decode("ascii").split("\n")[0]
+
-class MarkDown():
+class MarkDown:
_dispatch = {}
def __init__(self, stream):
@@ -98,105 +110,115 @@ class MarkDown():
def print_maintainer(self, o):
write = self.stream.write
if type(o) is list:
- write('Maintainers: ' +
- ', '.join('{m}'.format(m=m) for m in
- o) + ' \n')
+ write("Maintainers: " + ", ".join("{m}".format(m=m) for m in o) + " \n")
else:
- write('Maintainer: {o} \n'.format(o=o))
+ write("Maintainer: {o} \n".format(o=o))
- _dispatch['maintainer'] = print_maintainer
+ _dispatch["maintainer"] = print_maintainer
def print_features(self, o, indent=0):
write = self.stream.write
for f in o:
- indentstr = ' ' * indent
+ indentstr = " " * indent
if type(f) is dict:
for k, v in f.items():
- write('{indentstr}- {k}\n'.format(indentstr=indentstr, k=k))
+ write("{indentstr}- {k}\n".format(indentstr=indentstr, k=k))
self.print_features(v, indent + 2)
else:
- write('{indentstr}- {f}\n'.format(indentstr=indentstr, f=f))
- write('\n')
- _dispatch['features'] = print_features
+ write("{indentstr}- {f}\n".format(indentstr=indentstr, f=f))
+ write("\n")
+
+ _dispatch["features"] = print_features
def print_markdown_header(self, o):
write = self.stream.write
- write('## {o}\n'.format(o=o))
- version = version_from_git()
- write('VPP version: {version}\n\n'.format(version=version))
- _dispatch['markdown_header'] = print_markdown_header
+ write("## {o}\n".format(o=o))
+
+ _dispatch["markdown_header"] = print_markdown_header
def print_name(self, o):
write = self.stream.write
- write('### {o}\n'.format(o=o))
+ write("### {o}\n".format(o=o))
self.toc.append(o)
- _dispatch['name'] = print_name
+
+ _dispatch["name"] = print_name
def print_description(self, o):
write = self.stream.write
- write('\n{o}\n\n'.format(o=o))
- _dispatch['description'] = print_description
+ write("\n{o}\n\n".format(o=o))
+
+ _dispatch["description"] = print_description
def print_state(self, o):
write = self.stream.write
- write('Feature maturity level: {o} \n'.format(o=o))
- _dispatch['state'] = print_state
+ write("Feature maturity level: {o} \n".format(o=o))
+
+ _dispatch["state"] = print_state
def print_properties(self, o):
write = self.stream.write
- write('Supports: {s} \n'.format(s=" ".join(o)))
- _dispatch['properties'] = print_properties
+ write("Supports: {s} \n".format(s=" ".join(o)))
+
+ _dispatch["properties"] = print_properties
def print_missing(self, o):
write = self.stream.write
- write('\nNot yet implemented: \n')
+ write("\nNot yet implemented: \n")
self.print_features(o)
- _dispatch['missing'] = print_missing
+
+ _dispatch["missing"] = print_missing
def print_code(self, o):
write = self.stream.write
- write('Source Code: [{o}]({o}) \n'.format(o=o))
- _dispatch['code'] = print_code
+ write("Source Code: [{o}]({o}) \n".format(o=o))
+
+ _dispatch["code"] = print_code
def print(self, t, o):
write = self.stream.write
if t in self._dispatch:
- self._dispatch[t](self, o,)
+ self._dispatch[t](
+ self,
+ o,
+ )
else:
- write('NOT IMPLEMENTED: {t}\n')
+ write("NOT IMPLEMENTED: {t}\n")
+
def output_toc(toc, stream):
write = stream.write
- write('## VPP Feature list:\n')
+ write("# VPP Supported Features\n")
for t in toc:
- ref = t.lower().replace(' ', '-')
- write('[{t}](#{ref}) \n'.format(t=t, ref=ref))
+ ref = t.lower().replace(" ", "-")
+ write("[{t}](#{ref}) \n".format(t=t, ref=ref))
+
def featuresort(k):
- return k[1]['name']
+ return k[1]["name"]
+
def featurelistsort(k):
orderedfields = {
- 'name': 0,
- 'maintainer': 1,
- 'description': 2,
- 'features': 3,
- 'state': 4,
- 'properties': 5,
- 'missing': 6,
- 'code': 7,
+ "name": 0,
+ "maintainer": 1,
+ "description": 2,
+ "features": 3,
+ "state": 4,
+ "properties": 5,
+ "missing": 6,
+ "code": 7,
}
return orderedfields[k[0]]
-def output_markdown(features, fields, notfields):
+
+def output_markdown(features, fields, notfields, repository_url):
stream = StringIO()
m = MarkDown(stream)
- m.print('markdown_header', 'Feature Details:')
+ m.print("markdown_header", "Feature Details:")
for path, featuredef in sorted(features.items(), key=featuresort):
- codeurl = 'https://git.fd.io/vpp/tree/src/' + \
- '/'.join(os.path.normpath(path).split('/')[1:-1])
- featuredef['code'] = codeurl
+ codeurl = urllib.parse.urljoin(repository_url, os.path.dirname(path))
+ featuredef["code"] = codeurl
for k, v in sorted(featuredef.items(), key=featurelistsort):
if notfields:
if k not in notfields:
@@ -211,21 +233,45 @@ def output_markdown(features, fields, notfields):
output_toc(m.toc, tocstream)
return tocstream, stream
+
def main():
- parser = argparse.ArgumentParser(description='VPP Feature List.')
- parser.add_argument('--validate', dest='validate', action='store_true',
- help='validate the FEATURE.yaml file')
- parser.add_argument('--git-status', dest='git_status', action='store_true',
- help='Get filelist from git status')
- parser.add_argument('--all', dest='all', action='store_true',
- help='Validate all files in repository')
- parser.add_argument('--markdown', dest='markdown', action='store_true',
- help='Output feature table in markdown')
- parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
- default=sys.stdin)
+ parser = argparse.ArgumentParser(description="VPP Feature List.")
+ parser.add_argument(
+ "--validate",
+ dest="validate",
+ action="store_true",
+ help="validate the FEATURE.yaml file",
+ )
+ parser.add_argument(
+ "--repolink",
+ metavar="repolink",
+ default=DEFAULT_REPO_LINK,
+ help="Link to public repository [%s]" % DEFAULT_REPO_LINK,
+ )
+ parser.add_argument(
+ "--git-status",
+ dest="git_status",
+ action="store_true",
+ help="Get filelist from git status",
+ )
+ parser.add_argument(
+ "--all",
+ dest="all",
+ action="store_true",
+ help="Validate all files in repository",
+ )
+ parser.add_argument(
+ "--markdown",
+ dest="markdown",
+ action="store_true",
+ help="Output feature table in markdown",
+ )
+ parser.add_argument(
+ "infile", nargs="?", type=argparse.FileType("r"), default=sys.stdin
+ )
group = parser.add_mutually_exclusive_group()
- group.add_argument('--include', help='List of fields to include')
- group.add_argument('--exclude', help='List of fields to exclude')
+ group.add_argument("--include", help="List of fields to include")
+ group.add_argument("--exclude", help="List of fields to exclude")
args = parser.parse_args()
features = {}
@@ -237,11 +283,11 @@ def main():
filelist = args.infile
if args.include:
- fields = args.include.split(',')
+ fields = args.include.split(",")
else:
fields = []
if args.exclude:
- notfields = args.exclude.split(',')
+ notfields = args.exclude.split(",")
else:
notfields = []
@@ -249,23 +295,25 @@ def main():
featurefile = featurefile.rstrip()
# Load configuration file
- with open(featurefile, encoding='utf-8') as f:
+ with open(featurefile, encoding="utf-8") as f:
cfg = yaml.load(f, Loader=yaml.SafeLoader)
try:
validate(instance=cfg, schema=schema)
except exceptions.ValidationError:
- print('File does not validate: {featurefile}' \
- .format(featurefile=featurefile), file=sys.stderr)
+ print(
+ "File does not validate: {featurefile}".format(featurefile=featurefile),
+ file=sys.stderr,
+ )
raise
features[featurefile] = cfg
if args.markdown:
stream = StringIO()
- tocstream, stream = output_markdown(features, fields, notfields)
+ tocstream, stream = output_markdown(features, fields, notfields, args.repolink)
print(tocstream.getvalue())
print(stream.getvalue())
stream.close()
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff --git a/src/scripts/generate_version_h b/src/scripts/generate_version_h
index 59123684d8d..cdcdf5dd42a 100755
--- a/src/scripts/generate_version_h
+++ b/src/scripts/generate_version_h
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
: ${VPP_BUILD_USER:=$(whoami)}
: ${VPP_BUILD_HOST:=$(hostname)}
DATE_FMT="+%Y-%m-%dT%H:%M:%S"
diff --git a/src/scripts/host-stack/cc_plots.py b/src/scripts/host-stack/cc_plots.py
index 20f5bd6a3f1..f7953f223d4 100755
--- a/src/scripts/host-stack/cc_plots.py
+++ b/src/scripts/host-stack/cc_plots.py
@@ -6,214 +6,241 @@ import argparse
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
-class Point():
+
+class Point:
"CC event"
+
def __init__(self, x, y):
self.x = x
self.y = y
+
def listx(points):
- return list(map(lambda pt: pt.x, points))
+ return list(map(lambda pt: pt.x, points))
+
def listy(points):
- return list(map(lambda pt: pt.y, points))
+ return list(map(lambda pt: pt.y, points))
-def plot_data(d):
- plt.figure(1)
-
- cwndx = listx(d["cwnd"])
- cwndy = listy(d["cwnd"])
- congx = listx(d["congestion"])
- congy = listy(d["congestion"])
- rcvrdx = listx(d["recovered"])
- rcvrdy = listy(d["recovered"])
- rxttx = listx(d["rxtTimeout"])
- rxtty = listy(d["rxtTimeout"])
-
- # cwnd/ssthresh/cc events
- plt.subplot(311)
- plt.title("cwnd/ssthresh")
- pcwnd = plt.plot(cwndx, cwndy, 'r')
- psst = plt.plot(cwndx, d["ssthresh"], 'y-')
- pcong = plt.plot(congx, congy,'yo')
- precov = plt.plot(rcvrdx, rcvrdy,'co')
- prxtt = plt.plot(rxttx, rxtty,'mo')
-
- marker1 = Line2D(range(1), range(1), color="r")
- marker2 = Line2D(range(1), range(1), color="y")
- marker3 = Line2D(range(1), range(1), color="w", marker="o", markerfacecolor="y")
- marker4 = Line2D(range(1), range(1), color="w", marker="o", markerfacecolor="c")
- marker5 = Line2D(range(1), range(1), color="w", marker="o", markerfacecolor="m")
- plt.legend((marker1, marker2, marker3, marker4, marker5),
- ('cwnd', 'ssthresh', 'congestion', 'recovered', 'rxt-timeout'),
- loc=4)
- axes = plt.gca()
- axes.set_ylim([-20e4, max(cwndy) + 20e4])
-
- # snd variables
- plt.subplot(312)
- plt.title("cc variables")
- plt.plot(cwndx, d["space"], 'g-', markersize=1)
- plt.plot(cwndx, d["flight"], 'b-', markersize=1)
- plt.plot(cwndx, d["sacked"], 'm:', markersize=1)
- plt.plot(cwndx, d["lost"], 'y:', markersize=1)
- plt.plot(cwndx, d["cc-space"], 'k:', markersize=1)
- plt.plot(cwndx, cwndy, 'ro', markersize=2)
-
- plt.plot(congx, congy, 'y^', markersize=10, markerfacecolor="y")
- plt.plot(rcvrdx, rcvrdy, 'c^', markersize=10, markerfacecolor="c")
- plt.plot(rxttx, rxtty, 'm^', markersize=10, markerfacecolor="m")
-
- #plt.plot(cwndx, d["snd_wnd"], 'ko', markersize=1)
- plt.legend(("snd-space", "flight", "sacked", "lost", "cc-space", "cwnd",
- "congestion", "recovered", "rxt-timeout"),
- loc=1)
-
- # rto/srrt/rttvar
- plt.subplot(313)
- plt.title("rtt")
- plt.plot(cwndx, d["srtt"], 'g-')
- plt.plot(cwndx, [x/1000 for x in d["mrtt-us"]], 'r-')
- plt.plot(cwndx, d["rttvar"], 'b-')
- plt.legend(["srtt", "mrtt-us", "rttvar"])
- axes = plt.gca()
- #plt.plot(cwndx, rto, 'r-')
- #axes.set_ylim([0, int(max(rto[2:len(rto)])) + 50])
-
- # show
- plt.show()
-
-def find_pattern(file_path,session_idx):
+def plot_data(d):
+ plt.figure(1)
+
+ cwndx = listx(d["cwnd"])
+ cwndy = listy(d["cwnd"])
+ congx = listx(d["congestion"])
+ congy = listy(d["congestion"])
+ rcvrdx = listx(d["recovered"])
+ rcvrdy = listy(d["recovered"])
+ rxttx = listx(d["rxtTimeout"])
+ rxtty = listy(d["rxtTimeout"])
+
+ # cwnd/ssthresh/cc events
+ plt.subplot(311)
+ plt.title("cwnd/ssthresh")
+ pcwnd = plt.plot(cwndx, cwndy, "r")
+ psst = plt.plot(cwndx, d["ssthresh"], "y-")
+ pcong = plt.plot(congx, congy, "yo")
+ precov = plt.plot(rcvrdx, rcvrdy, "co")
+ prxtt = plt.plot(rxttx, rxtty, "mo")
+
+ marker1 = Line2D(range(1), range(1), color="r")
+ marker2 = Line2D(range(1), range(1), color="y")
+ marker3 = Line2D(range(1), range(1), color="w", marker="o", markerfacecolor="y")
+ marker4 = Line2D(range(1), range(1), color="w", marker="o", markerfacecolor="c")
+ marker5 = Line2D(range(1), range(1), color="w", marker="o", markerfacecolor="m")
+ plt.legend(
+ (marker1, marker2, marker3, marker4, marker5),
+ ("cwnd", "ssthresh", "congestion", "recovered", "rxt-timeout"),
+ loc=4,
+ )
+ axes = plt.gca()
+ axes.set_ylim([-20e4, max(cwndy) + 20e4])
+
+ # snd variables
+ plt.subplot(312)
+ plt.title("cc variables")
+ plt.plot(cwndx, d["space"], "g-", markersize=1)
+ plt.plot(cwndx, d["flight"], "b-", markersize=1)
+ plt.plot(cwndx, d["sacked"], "m:", markersize=1)
+ plt.plot(cwndx, d["lost"], "y:", markersize=1)
+ plt.plot(cwndx, d["cc-space"], "k:", markersize=1)
+ plt.plot(cwndx, cwndy, "ro", markersize=2)
+
+ plt.plot(congx, congy, "y^", markersize=10, markerfacecolor="y")
+ plt.plot(rcvrdx, rcvrdy, "c^", markersize=10, markerfacecolor="c")
+ plt.plot(rxttx, rxtty, "m^", markersize=10, markerfacecolor="m")
+
+ # plt.plot(cwndx, d["snd_wnd"], 'ko', markersize=1)
+ plt.legend(
+ (
+ "snd-space",
+ "flight",
+ "sacked",
+ "lost",
+ "cc-space",
+ "cwnd",
+ "congestion",
+ "recovered",
+ "rxt-timeout",
+ ),
+ loc=1,
+ )
+
+ # rto/srrt/rttvar
+ plt.subplot(313)
+ plt.title("rtt")
+ plt.plot(cwndx, d["srtt"], "g-")
+ plt.plot(cwndx, [x / 1000 for x in d["mrtt-us"]], "r-")
+ plt.plot(cwndx, d["rttvar"], "b-")
+ plt.legend(["srtt", "mrtt-us", "rttvar"])
+ axes = plt.gca()
+ # plt.plot(cwndx, rto, 'r-')
+ # axes.set_ylim([0, int(max(rto[2:len(rto)])) + 50])
+
+ # show
+ plt.show()
+
+
+def find_pattern(file_path, session_idx):
is_active_open = 1
listener_pattern = "l\[\d\]"
- if (is_active_open):
- initial_pattern = "\[\d\](\.\d+:\d+\->\.\d+:\d+)\s+open:\s"
+ if is_active_open:
+ initial_pattern = "\[\d\](\.\d+:\d+\->\.\d+:\d+)\s+open:\s"
else:
- initial_pattern = "\[\d\](\.\d+:\d+\->\.\d+:\d+)\s"
+ initial_pattern = "\[\d\](\.\d+:\d+\->\.\d+:\d+)\s"
idx = 0
- f = open(file_path, 'r')
+ f = open(file_path, "r")
for line in f:
- # skip listener lines (server)
- if (re.search(listener_pattern, line) != None):
- continue
- match = re.search(initial_pattern, line)
- if (match == None):
- continue
- if (idx < session_idx):
- idx += 1
- continue
- filter_pattern = str(match.group(1)) + "\s+(.+)"
- print ("pattern is %s" % filter_pattern)
- f.close()
- return filter_pattern
- raise Exception ("Could not find initial pattern")
+ # skip listener lines (server)
+ if re.search(listener_pattern, line) != None:
+ continue
+ match = re.search(initial_pattern, line)
+ if match == None:
+ continue
+ if idx < session_idx:
+ idx += 1
+ continue
+ filter_pattern = str(match.group(1)) + "\s+(.+)"
+ print("pattern is %s" % filter_pattern)
+ f.close()
+ return filter_pattern
+ raise Exception("Could not find initial pattern")
+
def compute_time(min, sec, msec):
- return int(min)*60 + int(sec) + int(msec)/1000.0
+ return int(min) * 60 + int(sec) + int(msec) / 1000.0
+
def run(file_path, session_idx):
filter_sessions = 1
filter_pattern = ""
patterns = {
- "time" : "^\d+:(\d+):(\d+):(\d+):\d+",
- "listener" : "l\[\d\]",
- "cc" : "cwnd (\d+) flight (\d+) space (\d+) ssthresh (\d+) snd_wnd (\d+)",
- "cc-snd" : "cc_space (\d+) sacked (\d+) lost (\d+)",
- "rtt" : "rto (\d+) srtt (\d+) mrtt-us (\d+) rttvar (\d+)",
- "rxtt" : "rxt-timeout",
- "congestion": "congestion",
- "recovered" : "recovered",
+ "time": "^\d+:(\d+):(\d+):(\d+):\d+",
+ "listener": "l\[\d\]",
+ "cc": "cwnd (\d+) flight (\d+) space (\d+) ssthresh (\d+) snd_wnd (\d+)",
+ "cc-snd": "cc_space (\d+) sacked (\d+) lost (\d+)",
+ "rtt": "rto (\d+) srtt (\d+) mrtt-us (\d+) rttvar (\d+)",
+ "rxtt": "rxt-timeout",
+ "congestion": "congestion",
+ "recovered": "recovered",
}
d = {
- "cwnd" : [],
- "space" : [],
- "flight" : [],
- "ssthresh" : [],
- "snd_wnd" : [],
- "cc-space" : [],
- "lost" : [],
- "sacked" : [],
- "rto" : [],
- "srtt" : [],
- "mrtt-us" : [],
- "rttvar" : [],
- "rxtTimeout" : [],
- "congestion" : [],
- "recovered" : [],
+ "cwnd": [],
+ "space": [],
+ "flight": [],
+ "ssthresh": [],
+ "snd_wnd": [],
+ "cc-space": [],
+ "lost": [],
+ "sacked": [],
+ "rto": [],
+ "srtt": [],
+ "mrtt-us": [],
+ "rttvar": [],
+ "rxtTimeout": [],
+ "congestion": [],
+ "recovered": [],
}
- if (filter_sessions):
+ if filter_sessions:
filter_pattern = find_pattern(file_path, session_idx)
- f = open(file_path, 'r')
+ f = open(file_path, "r")
stats_index = 0
start_time = 0
for line in f:
# skip listener lines (server)
- if (re.search(patterns["listener"], line) != None):
+ if re.search(patterns["listener"], line) != None:
continue
# filter sessions
- if (filter_sessions):
+ if filter_sessions:
match = re.search(filter_pattern, line)
- if (match == None):
+ if match == None:
continue
original_line = line
line = match.group(1)
- match = re.search (patterns["time"], original_line)
- if (match == None):
- print "something went wrong! no time!"
- continue
- time = compute_time (match.group(1), match.group(2), match.group(3))
- if (start_time == 0):
- start_time = time
+ match = re.search(patterns["time"], original_line)
+ if match == None:
+ print("something went wrong! no time!")
+ continue
+ time = compute_time(match.group(1), match.group(2), match.group(3))
+ if start_time == 0:
+ start_time = time
time = time - start_time
match = re.search(patterns["cc"], line)
- if (match != None):
- d["cwnd"].append(Point(time, int(match.group(1))))
- d["flight"].append(int(match.group(2)))
- d["space"].append(int(match.group(3)))
- d["ssthresh"].append(int(match.group(4)))
- d["snd_wnd"].append(int(match.group(5)))
- stats_index += 1
- continue
+ if match != None:
+ d["cwnd"].append(Point(time, int(match.group(1))))
+ d["flight"].append(int(match.group(2)))
+ d["space"].append(int(match.group(3)))
+ d["ssthresh"].append(int(match.group(4)))
+ d["snd_wnd"].append(int(match.group(5)))
+ stats_index += 1
+ continue
match = re.search(patterns["cc-snd"], line)
- if (match != None):
- d["cc-space"].append(int(match.group(1)))
- d["sacked"].append(int(match.group(2)))
- d["lost"].append(int(match.group(3)))
+ if match != None:
+ d["cc-space"].append(int(match.group(1)))
+ d["sacked"].append(int(match.group(2)))
+ d["lost"].append(int(match.group(3)))
match = re.search(patterns["rtt"], line)
- if (match != None):
- d["rto"].append(int(match.group(1)))
- d["srtt"].append(int(match.group(2)))
- d["mrtt-us"].append(int(match.group(3)))
- d["rttvar"].append(int(match.group(4)))
- if (stats_index == 0):
- continue
+ if match != None:
+ d["rto"].append(int(match.group(1)))
+ d["srtt"].append(int(match.group(2)))
+ d["mrtt-us"].append(int(match.group(3)))
+ d["rttvar"].append(int(match.group(4)))
+ if stats_index == 0:
+ continue
match = re.search(patterns["rxtt"], line)
- if (match != None):
- d["rxtTimeout"].append(Point(time, d["cwnd"][stats_index - 1].y + 1e4))
- continue
+ if match != None:
+ d["rxtTimeout"].append(Point(time, d["cwnd"][stats_index - 1].y + 1e4))
+ continue
match = re.search(patterns["congestion"], line)
- if (match != None):
- d["congestion"].append(Point(time, d["cwnd"][stats_index - 1].y - 1e4))
- continue
+ if match != None:
+ d["congestion"].append(Point(time, d["cwnd"][stats_index - 1].y - 1e4))
+ continue
match = re.search(patterns["recovered"], line)
- if (match != None):
- d["recovered"].append(Point(time, d["cwnd"][stats_index - 1].y))
- continue
+ if match != None:
+ d["recovered"].append(Point(time, d["cwnd"][stats_index - 1].y))
+ continue
plot_data(d)
+
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Plot tcp cc logs")
- parser.add_argument('-f', action='store', dest='file', required=True,
- help="elog file in txt format")
- parser.add_argument('-s', action='store', dest='session_index', default=0,
- help="session index for which to plot cc logs" )
+ parser.add_argument(
+ "-f", action="store", dest="file", required=True, help="elog file in txt format"
+ )
+ parser.add_argument(
+ "-s",
+ action="store",
+ dest="session_index",
+ default=0,
+ help="session index for which to plot cc logs",
+ )
results = parser.parse_args()
run(results.file, int(results.session_index))
diff --git a/src/scripts/remove-rpath b/src/scripts/remove-rpath
index 3e20b06dbfa..b6df461e464 100755
--- a/src/scripts/remove-rpath
+++ b/src/scripts/remove-rpath
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/src/scripts/version b/src/scripts/version
index 455ab760352..b8748af1588 100755
--- a/src/scripts/version
+++ b/src/scripts/version
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# Copyright (c) 2015 Cisco and/or its affiliates.
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -20,7 +20,7 @@ cd "$path"
if [ -f .version ]; then
vstring=$(cat .version)
else
- vstring=$(git describe --long)
+ vstring=$(git describe --long --match "v*")
if [ $? != 0 ]; then
exit 1
fi
diff --git a/src/scripts/vnet/arp4 b/src/scripts/vnet/arp4
index 372d82a9ccd..24a688549bf 100644
--- a/src/scripts/vnet/arp4
+++ b/src/scripts/vnet/arp4
@@ -1,13 +1,13 @@
-packet-generator new {
- name x
- limit 1
- node ip4-input
- size 64-64
- data {
- ICMP: 1.0.0.2 -> 2.0.0.2
- ICMP echo_request
- incrementing 100
- }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node ip4-input \
+ size 64-64 \
+ data { \
+ ICMP: 1.0.0.2 -> 2.0.0.2 \
+ ICMP echo_request \
+ incrementing 100 \
+ } \
}
trace add pg-input 100
diff --git a/src/scripts/vnet/arp4-mpls b/src/scripts/vnet/arp4-mpls
index 3e1561157ea..fbb68eb4711 100644
--- a/src/scripts/vnet/arp4-mpls
+++ b/src/scripts/vnet/arp4-mpls
@@ -1,13 +1,13 @@
-packet-generator new {
- name x
- limit 1
- node ip4-input
- size 64-64
- data {
- ICMP: 1.0.0.2 -> 2.2.2.2
- ICMP echo_request
- incrementing 100
- }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node ip4-input \
+ size 64-64 \
+ data { \
+ ICMP: 1.0.0.2 -> 2.2.2.2 \
+ ICMP echo_request \
+ incrementing 100 \
+ } \
}
loop create
diff --git a/src/scripts/vnet/arp6 b/src/scripts/vnet/arp6
index df58fb6478f..2ade2962db8 100644
--- a/src/scripts/vnet/arp6
+++ b/src/scripts/vnet/arp6
@@ -1,13 +1,13 @@
-packet-generator new {
- name x
- limit 1
- node ip6-input
- size 64-64
- data {
- ICMP6: 2000::2 -> 2001::2
- ICMP echo_request
- incrementing 100
- }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node ip6-input \
+ size 64-64 \
+ data { \
+ ICMP6: 2000::2 -> 2001::2 \
+ ICMP echo_request \
+ incrementing 100 \
+ } \
}
tr add pg-input 100
diff --git a/src/scripts/vnet/bvi b/src/scripts/vnet/bvi
index e1caf6606d6..cf55d63c2f2 100644
--- a/src/scripts/vnet/bvi
+++ b/src/scripts/vnet/bvi
@@ -3,12 +3,12 @@
set int state tuntap-0 down
set int ip address GigabitEthernet2/1/0 1.2.3.4/24
-set int state GigabitEthernet2/1/0 up
-cre sub GigabitEthernet2/1/0 1 dot1q 7
+set int state GigabitEthernet2/1/0 up
+cre sub GigabitEthernet2/1/0 1 dot1q 7
set int state GigabitEthernet2/1/0.1 up
set int state GigabitEthernet2/2/0 up
-cre sub GigabitEthernet2/2/0 1 dot1q 9
+cre sub GigabitEthernet2/2/0 1 dot1q 9
set int state GigabitEthernet2/2/0.1 up
@@ -22,7 +22,7 @@ set int l2 bridge GigabitEthernet2/1/0.1 0
set int l2 bridge GigabitEthernet2/2/0.1 0
set int l2 tag-rewrite GigabitEthernet2/1/0.1 pop 1
-set int l2 tag-rewrite GigabitEthernet2/2/0.1 pop 1
+set int l2 tag-rewrite GigabitEthernet2/2/0.1 pop 1
l2fib add 00:22:44:06:08:0a 0 GigabitEthernet2/1/0.1 static
l2fib add 00:02:04:06:08:0a 0 GigabitEthernet2/2/0.1 static
@@ -38,36 +38,36 @@ cle er
cle int
cle run
-packet-generator new {
- name bvi_to_l2
- limit 100
- node ethernet-input
- interface GigabitEthernet2/1/0
- data {
- IP4: 0050.56b7.7c83 -> 0050.56b7.296d
- GRE: 1.2.3.3 -> 8.0.0.1 mpls_unicast
+packet-generator new { \
+ name bvi_to_l2 \
+ limit 100 \
+ node ethernet-input \
+ interface GigabitEthernet2/1/0 \
+ data { \
+ IP4: 0050.56b7.7c83 -> 0050.56b7.296d \
+ GRE: 1.2.3.3 -> 8.0.0.1 mpls_unicast \
}
}
-packet-generator new {
- name l2_to_bvi
- limit 50
- node ethernet-input
- interface GigabitEthernet2/2/0
- data {
- IP4: 0050.56b7.7c83 -> dead.0000.0000 vlan 9
- GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast
- }
+packet-generator new { \
+ name l2_to_bvi \
+ limit 50 \
+ node ethernet-input \
+ interface GigabitEthernet2/2/0 \
+ data { \
+ IP4: 0050.56b7.7c83 -> dead.0000.0000 vlan 9 \
+ GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast \
+ } \
}
-packet-generator new {
- name l2_to_bvi_via_flood
- limit 25
- node ethernet-input
- interface GigabitEthernet2/2/0
- data {
- IP4: 0050.56b7.7c83 -> ffff.ffff.ffff vlan 9
- GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast
- }
+packet-generator new { \
+ name l2_to_bvi_via_flood \
+ limit 25 \
+ node ethernet-input \
+ interface GigabitEthernet2/2/0 \
+ data { \
+ IP4: 0050.56b7.7c83 -> ffff.ffff.ffff vlan 9 \
+ GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast \
+ } \
}
diff --git a/src/scripts/vnet/dhcp/left-ping-target.sh b/src/scripts/vnet/dhcp/left-ping-target.sh
index 2edc2a50eaa..4dab842b819 100644
--- a/src/scripts/vnet/dhcp/left-ping-target.sh
+++ b/src/scripts/vnet/dhcp/left-ping-target.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
# to obtain dhcp address from leftpeer
dhclient -d -v eth1
diff --git a/src/scripts/vnet/gre b/src/scripts/vnet/gre
index 820fc09f469..bab26353d40 100644
--- a/src/scripts/vnet/gre
+++ b/src/scripts/vnet/gre
@@ -26,60 +26,60 @@ set int ip address gre1 ::2/128
ip route add 192:168:3::0/64 via gre1
set int state gre1 up
-packet-generator new {
- name gre4-tx
- limit -1
- node ip4-input
- size 64-64
- interface pg0
- data {
- UDP: 192.168.1.2 -> 192.168.3.1
- UDP: 4321 -> 1234
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name gre4-tx \
+ limit -1 \
+ node ip4-input \
+ size 64-64 \
+ interface pg0 \
+ data { \
+ UDP: 192.168.1.2 -> 192.168.3.1 \
+ UDP: 4321 -> 1234 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name gre4-rx
- limit -1
- node ip6-input
- size 72-72
- interface pg1
- data {
- GRE: 192:168:2::2 -> 192:168:2::1
- ip4
- UDP: 192.168.3.1 -> 192.168.1.2
- UDP: 1234 -> 4321
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name gre4-rx \
+ limit -1 \
+ node ip6-input \
+ size 72-72 \
+ interface pg1 \
+ data { \
+ GRE: 192:168:2::2 -> 192:168:2::1 \
+ ip4 \
+ UDP: 192.168.3.1 -> 192.168.1.2 \
+ UDP: 1234 -> 4321 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name gre6-tx
- limit -1
- node ip6-input
- size 64-64
- interface pg0
- data {
- UDP: 192:168:1::2 -> 192:168:3::1
- UDP: 4321 -> 1234
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name gre6-tx \
+ limit -1 \
+ node ip6-input \
+ size 64-64 \
+ interface pg0 \
+ data { \
+ UDP: 192:168:1::2 -> 192:168:3::1 \
+ UDP: 4321 -> 1234 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name gre6-rx
- limit -1
- node ip4-input
- size 72-72
- interface pg1
- data {
- GRE: 192.168.2.2 -> 192.168.2.1
- ip6
- UDP: 192:168:3::1 -> 192:168:1::2
- UDP: 1234 -> 4321
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name gre6-rx \
+ limit -1 \
+ node ip4-input \
+ size 72-72 \
+ interface pg1 \
+ data { \
+ GRE: 192.168.2.2 -> 192.168.2.1 \
+ ip6 \
+ UDP: 192:168:3::1 -> 192:168:1::2 \
+ UDP: 1234 -> 4321 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
packet-generator enable
diff --git a/src/scripts/vnet/gre-teb b/src/scripts/vnet/gre-teb
index 992005cfa83..015ce73b891 100644
--- a/src/scripts/vnet/gre-teb
+++ b/src/scripts/vnet/gre-teb
@@ -10,30 +10,30 @@ set int state gre0 up
set int l2 xconnect pg0 gre0
set int l2 xconnect gre0 pg0
-packet-generator new {
- name tx
- limit -1
- node ethernet-input
- size 64-64
- interface pg0
- data {
- 0x8881: 4:4:4:4:4:4 -> 5:5:5:5:5:5
- incrementing 100
- }
+packet-generator new { \
+ name tx \
+ limit -1 \
+ node ethernet-input \
+ size 64-64 \
+ interface pg0 \
+ data { \
+ 0x8881: 4:4:4:4:4:4 -> 5:5:5:5:5:5 \
+ incrementing 100 \
+ } \
}
-packet-generator new {
- name rx
- limit -1
- node ip4-input
- size 72-72
- interface pg1
- data {
- GRE: 192.168.2.2 -> 192.168.2.1
- teb
- 0x8881: 4:4:4:4:4:4 -> 5:5:5:5:5:5
- incrementing 100
- }
+packet-generator new { \
+ name rx \
+ limit -1 \
+ node ip4-input \
+ size 72-72 \
+ interface pg1 \
+ data { \
+ GRE: 192.168.2.2 -> 192.168.2.1 \
+ teb \
+ 0x8881: 4:4:4:4:4:4 -> 5:5:5:5:5:5 \
+ incrementing 100 \
+ } \
}
packet-generator enable
diff --git a/src/scripts/vnet/icmp b/src/scripts/vnet/icmp
index 889eca82c8f..603d9d77700 100644
--- a/src/scripts/vnet/icmp
+++ b/src/scripts/vnet/icmp
@@ -1,13 +1,13 @@
-packet-generator new {
- name x
- limit 1
- node ip4-input
- size 64-64
- data {
- ICMP: 1.2.3.4 -> 5.6.7.8
- ICMP echo_request
- incrementing 100
- }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node ip4-input \
+ size 64-64 \
+ data { \
+ ICMP: 1.2.3.4 -> 5.6.7.8 \
+ ICMP echo_request \
+ incrementing 100 \
+ } \
}
tr add pg-input 100
diff --git a/src/scripts/vnet/icmp6 b/src/scripts/vnet/icmp6
index 4fcb600594c..eb41470f448 100644
--- a/src/scripts/vnet/icmp6
+++ b/src/scripts/vnet/icmp6
@@ -1,13 +1,13 @@
-packet-generator new {
- name x
- limit 1
- node ip6-input
- size 64-64
- data {
- ICMP6: ::1 -> ::2
- ICMP echo_request
- incrementing 100
- }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node ip6-input \
+ size 64-64 \
+ data { \
+ ICMP6: ::1 -> ::2 \
+ ICMP echo_request \
+ incrementing 100 \
+ } \
}
tr add pg-input 100
diff --git a/src/scripts/vnet/ige b/src/scripts/vnet/ige
index 80d045af36d..f721e597678 100644
--- a/src/scripts/vnet/ige
+++ b/src/scripts/vnet/ige
@@ -1,12 +1,12 @@
-packet-generator new {
- name x
- limit 1
- node ip4-lookup
- size 50-50
- data {
- ICMP: 1.0.0.1 -> 1.0.0.3 ttl 1
- incrementing 30
- }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node ip4-lookup \
+ size 50-50 \
+ data { \
+ ICMP: 1.0.0.1 -> 1.0.0.3 ttl 1 \
+ incrementing 30 \
+ } \
}
comment { tr add pg-input 100 }
diff --git a/src/scripts/vnet/ip6 b/src/scripts/vnet/ip6
index e03c6fd2c91..c7e4566b2bf 100644
--- a/src/scripts/vnet/ip6
+++ b/src/scripts/vnet/ip6
@@ -1,14 +1,14 @@
-packet-generator new {
- name x
- limit 1
- node ethernet-input
- size 64-64
- data {
- IP6: 1.2.3 -> 4.5.6
- ICMP: 3002::2 -> 3001::2
- ICMP echo_request
- incrementing 100
- }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node ethernet-input \
+ size 64-64 \
+ data { \
+ IP6: 1.2.3 -> 4.5.6 \
+ ICMP: 3002::2 -> 3001::2 \
+ ICMP echo_request \
+ incrementing 100 \
+ } \
}
diff --git a/src/scripts/vnet/ip6-hbh b/src/scripts/vnet/ip6-hbh
index 87be118eadd..7bb0be1a513 100644
--- a/src/scripts/vnet/ip6-hbh
+++ b/src/scripts/vnet/ip6-hbh
@@ -1,78 +1,78 @@
tap connect tap0
set int state tap-0 up
set int ip address tap-0 1::1/64
-packet-generator new {
- name hbh1
- limit 1
- node ip6-input
- size 48-48
- data {
- IP6_HOP_BY_HOP_OPTIONS: 1::2 -> 1::2
- hex 0x3B00010403040506
- incrementing 100
- }
+packet-generator new { \
+ name hbh1 \
+ limit 1 \
+ node ip6-input \
+ size 48-48 \
+ data { \
+ IP6_HOP_BY_HOP_OPTIONS: 1::2 -> 1::2 \
+ hex 0x3B00010403040506 \
+ incrementing 100 \
+ } \
}
-packet-generator new {
- name hbh2
- limit 1
- node ip6-input
- size 48-48
- data {
- IP6_HOP_BY_HOP_OPTIONS: 1::2 -> 1::2
- hex 0x3B00C10403040506
- incrementing 100
- }
+packet-generator new { \
+ name hbh2 \
+ limit 1 \
+ node ip6-input \
+ size 48-48 \
+ data { \
+ IP6_HOP_BY_HOP_OPTIONS: 1::2 -> 1::2 \
+ hex 0x3B00C10403040506 \
+ incrementing 100 \
+ } \
}
-packet-generator new {
- name hbh3
- limit 1
- node ip6-input
- size 48-48
- data {
- IP6_HOP_BY_HOP_OPTIONS: 1::2 -> 1::2
- hex 0x3BffC10403040506
- incrementing 100
- }
+packet-generator new { \
+ name hbh3 \
+ limit 1 \
+ node ip6-input \
+ size 48-48 \
+ data { \
+ IP6_HOP_BY_HOP_OPTIONS: 1::2 -> 1::2 \
+ hex 0x3BffC10403040506 \
+ incrementing 100 \
+ } \
}
-packet-generator new {
- name hbh4
- limit 1
- node ip6-input
- size 64-64
- data {
- IP6_HOP_BY_HOP_OPTIONS: 1::2 -> 1::2
- hex 0x3BffC10403040506
- incrementing 100
- }
+packet-generator new { \
+ name hbh4 \
+ limit 1 \
+ node ip6-input \
+ size 64-64 \
+ data { \
+ IP6_HOP_BY_HOP_OPTIONS: 1::2 -> 1::2 \
+ hex 0x3BffC10403040506 \
+ incrementing 100 \
+ } \
}
-packet-generator new {
- name hbh5
- limit 1
- node ip6-input
- size 56-56
- data {
- IP6_HOP_BY_HOP_OPTIONS: 1::2 -> 1::2
- length 16
- hex 0x3B010104030405060106030405060708
- incrementing 100
- }
+packet-generator new { \
+ name hbh5 \
+ limit 1 \
+ node ip6-input \
+ size 56-56 \
+ data { \
+ IP6_HOP_BY_HOP_OPTIONS: 1::2 -> 1::2 \
+ length 16 \
+ hex 0x3B010104030405060106030405060708 \
+ incrementing 100 \
+ } \
}
-packet-generator new {
- name hbh6
- limit 1
- node ip6-input
- size 56-56
- data {
- IP6_HOP_BY_HOP_OPTIONS: 1::2 -> 1::2
- length 16
- hex 0x3a00050200000100
- ICMP echo_request
- incrementing 100
- }
+packet-generator new { \
+ name hbh6 \
+ limit 1 \
+ node ip6-input \
+ size 56-56 \
+ data { \
+ IP6_HOP_BY_HOP_OPTIONS: 1::2 -> 1::2 \
+ length 16 \
+ hex 0x3a00050200000100 \
+ ICMP echo_request \
+ incrementing 100 \
+ } \
}
tr add pg-input 100
diff --git a/src/scripts/vnet/ipsec b/src/scripts/vnet/ipsec
index 55cd914c4f7..5d186c87cb9 100644
--- a/src/scripts/vnet/ipsec
+++ b/src/scripts/vnet/ipsec
@@ -18,49 +18,54 @@ set int state pg0 up
set int state pg1 up
set int state pipe0 up
-create ipsec tunnel local-ip 10.0.0.1 remote-ip 10.0.0.2 local-spi 100 remote-spi 101 local-crypto-key 6541686776336961656264656f6f6579 remote-crypto-key 6541686776336961656264656f6f6579 crypto-alg aes-cbc-128
+ipsec sa add 20 spi 200 crypto-key 6541686776336961656264656f6f6579 crypto-alg aes-cbc-128
+ipsec sa add 30 spi 300 crypto-key 6541686776336961656264656f6f6579 crypto-alg aes-cbc-128
-set int state ipsec0 up
-set int unnum ipsec0 use pg0
+create ipip tunnel src 10.0.0.1 dst 10.0.0.2
+create ipip tunnel src 10.0.0.2 dst 10.0.0.1 outer-table-id 1
-create ipsec tunnel local-ip 10.0.0.2 remote-ip 10.0.0.1 local-spi 101 remote-spi 100 tx-table 1 local-crypto-key 6541686776336961656264656f6f6579 remote-crypto-key 6541686776336961656264656f6f6579 crypto-alg aes-cbc-128
+ipsec tunnel protect ipip0 sa-in 20 sa-out 30
+ipsec tunnel protect ipip1 sa-in 30 sa-out 20
-set int state ipsec1 up
-set int ip table ipsec1 1
-set int unnum ipsec1 use pg1
+set int state ipip0 up
+set int unnum ipip0 use pg0
-ip route add 192.168.1.0/24 via ipsec0
+set int state ipip1 up
+set int ip table ipip1 1
+set int unnum ipip1 use pg1
+
+ip route add 192.168.1.0/24 via ipip0
set ip neighbor pg1 192.168.1.2 00:11:22:33:44:55
-ip route add table 1 192.168.0.0/24 via ipsec1
+ip route add table 1 192.168.0.0/24 via ipip1
set ip neighbor pg0 192.168.0.2 00:11:22:33:44:66
trace add pg-input 100
-packet-generator new {
- name ipsec1
- limit 1
- rate 1e4
- node ip4-input
- interface pg0
- size 100-100
- data {
- UDP: 192.168.0.2 -> 192.168.1.2
- UDP: 4321 -> 1234
- length 72
- incrementing 100
- }
+packet-generator new { \
+ name ipsec1 \
+ limit 1 \
+ rate 1e4 \
+ node ip4-input \
+ interface pg0 \
+ size 100-100 \
+ data { \
+ UDP: 192.168.0.2 -> 192.168.1.2 \
+ UDP: 4321 -> 1234 \
+ length 72 \
+ incrementing 100 \
+ } \
}
-packet-generator new {
- name ipsec2
- limit 1
- rate 1e4
- node ip4-input
- interface pg1
- size 100-100
- data {
- UDP: 192.168.1.2 -> 192.168.0.2
- UDP: 4321 -> 1234
- length 72
- incrementing 100
- }
+packet-generator new { \
+ name ipsec2 \
+ limit 1 \
+ rate 1e4 \
+ node ip4-input \
+ interface pg1 \
+ size 100-100 \
+ data { \
+ UDP: 192.168.1.2 -> 192.168.0.2 \
+ UDP: 4321 -> 1234 \
+ length 72 \
+ incrementing 100 \
+ } \
}
diff --git a/src/scripts/vnet/ipsec_spd b/src/scripts/vnet/ipsec_spd
index 5acced65793..04b925c08ac 100644
--- a/src/scripts/vnet/ipsec_spd
+++ b/src/scripts/vnet/ipsec_spd
@@ -39,17 +39,17 @@ set ip neighbor pg1 192.168.1.2 00:11:22:33:44:55
trace add pg-input 100
-packet-generator new {
- name ipsec1
- limit 1
- rate 1e4
- node ip4-input
- interface pg0
- size 100-100
- data {
- UDP: 192.168.0.2 -> 10.6.0.1
- UDP: 4321 -> 1234
- length 72
- incrementing 100
- }
+packet-generator new { \
+ name ipsec1 \
+ limit 1 \
+ rate 1e4 \
+ node ip4-input \
+ interface pg0 \
+ size 100-100 \
+ data { \
+ UDP: 192.168.0.2 -> 10.6.0.1 \
+ UDP: 4321 -> 1234 \
+ length 72 \
+ incrementing 100 \
+ } \
}
diff --git a/src/scripts/vnet/ipsec_spd_vrf b/src/scripts/vnet/ipsec_spd_vrf
index faa891dc2c4..f4420eccac4 100644
--- a/src/scripts/vnet/ipsec_spd_vrf
+++ b/src/scripts/vnet/ipsec_spd_vrf
@@ -39,34 +39,34 @@ set ip neighbor pg0.3 192.168.0.2 00:11:22:33:44:55
trace add pg-input 100
-packet-generator new {
- name ipsec2
- limit 1
- rate 1e4
- node ethernet-input
- interface pg0
- size 100-100
- data {
- IP4: 1.2.3 -> 4.5.6 vlan 1
- UDP: 10.5.0.1 -> 10.6.0.1
- UDP: 4321 -> 1234
- length 72
- incrementing 100
- }
+packet-generator new { \
+ name ipsec2 \
+ limit 1 \
+ rate 1e4 \
+ node ethernet-input \
+ interface pg0 \
+ size 100-100 \
+ data { \
+ IP4: 1.2.3 -> 4.5.6 vlan 1 \
+ UDP: 10.5.0.1 -> 10.6.0.1 \
+ UDP: 4321 -> 1234 \
+ length 72 \
+ incrementing 100 \
+ } \
}
-packet-generator new {
- name ipsec3
- limit 1
- rate 1e4
- node ethernet-input
- interface pg0
- size 100-100
- data {
- IP4: 1.2.3 -> 4.5.6 vlan 1
- UDP: 10.5.0.1 -> 10.6.0.22
- UDP: 4321 -> 1234
- length 72
- incrementing 100
- }
+packet-generator new { \
+ name ipsec3 \
+ limit 1 \
+ rate 1e4 \
+ node ethernet-input \
+ interface pg0 \
+ size 100-100 \
+ data { \
+ IP4: 1.2.3 -> 4.5.6 vlan 1 \
+ UDP: 10.5.0.1 -> 10.6.0.22 \
+ UDP: 4321 -> 1234 \
+ length 72 \
+ incrementing 100 \
+ } \
}
diff --git a/src/scripts/vnet/ipsec_tun_protect b/src/scripts/vnet/ipsec_tun_protect
deleted file mode 100644
index 6dc4dd7b780..00000000000
--- a/src/scripts/vnet/ipsec_tun_protect
+++ /dev/null
@@ -1,71 +0,0 @@
-
-create packet-generator interface pg0
-create packet-generator interface pg1
-
-pipe create
-
-ip table add 1
-set int ip table pg1 1
-set int ip table pipe0.1 1
-
-set int ip address pg0 192.168.0.1/24
-set int ip address pg1 192.168.1.1/24
-
-set int ip address pipe0.0 10.0.0.1/24
-set int ip address pipe0.1 10.0.0.2/24
-
-set int state pg0 up
-set int state pg1 up
-set int state pipe0 up
-
-ipsec sa add 20 spi 200 crypto-key 6541686776336961656264656f6f6579 crypto-alg aes-cbc-128
-ipsec sa add 30 spi 300 crypto-key 6541686776336961656264656f6f6579 crypto-alg aes-cbc-128
-
-create ipip tunnel src 10.0.0.1 dst 10.0.0.2
-create ipip tunnel src 10.0.0.2 dst 10.0.0.1 outer-table-id 1
-
-ipsec tunnel protect ipip0 sa-in 20 sa-out 30
-ipsec tunnel protect ipip1 sa-in 30 sa-out 20
-
-set int state ipip0 up
-set int unnum ipip0 use pg0
-
-set int state ipip1 up
-set int ip table ipip1 1
-set int unnum ipip1 use pg1
-
-ip route add 192.168.1.0/24 via ipip0
-set ip neighbor pg1 192.168.1.2 00:11:22:33:44:55
-ip route add table 1 192.168.0.0/24 via ipip1
-set ip neighbor pg0 192.168.0.2 00:11:22:33:44:66
-
-trace add pg-input 100
-
-packet-generator new {
- name ipsec1
- limit 1
- rate 1e4
- node ip4-input
- interface pg0
- size 100-100
- data {
- UDP: 192.168.0.2 -> 192.168.1.2
- UDP: 4321 -> 1234
- length 72
- incrementing 100
- }
-}
-packet-generator new {
- name ipsec2
- limit 1
- rate 1e4
- node ip4-input
- interface pg1
- size 100-100
- data {
- UDP: 192.168.1.2 -> 192.168.0.2
- UDP: 4321 -> 1234
- length 72
- incrementing 100
- }
-}
diff --git a/src/scripts/vnet/ixge b/src/scripts/vnet/ixge
deleted file mode 100644
index 6722b5369cd..00000000000
--- a/src/scripts/vnet/ixge
+++ /dev/null
@@ -1,15 +0,0 @@
-packet-generator new {
- name x
- limit 1
- node ip4-lookup
- size 50-50
- data {
- ICMP: 1.0.0.1 -> 1.0.0.3 ttl 1
- incrementing 30
- }
-}
-
-comment { tr add pg-input 100 }
-set int ip address TenGigabitEthernet5/0/0 33.0.1.1/8
-set int state TenGigabitEthernet5/0/0 up
-
diff --git a/src/scripts/vnet/l2efpfilter b/src/scripts/vnet/l2efpfilter
index d1074ce5b0c..4a17c15779d 100644
--- a/src/scripts/vnet/l2efpfilter
+++ b/src/scripts/vnet/l2efpfilter
@@ -45,36 +45,36 @@ clear error
clear run
clear int
-packet-generator new {
- name pre_vtr_fail
- limit 10
- node ethernet-input
- interface GigabitEthernet2/1/0
- data {
- IP4: 0050.56b7.7c83 -> 00:00:00:00:00:11 vlan 1
- GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast
- }
+packet-generator new { \
+ name pre_vtr_fail \
+ limit 10 \
+ node ethernet-input \
+ interface GigabitEthernet2/1/0 \
+ data { \
+ IP4: 0050.56b7.7c83 -> 00:00:00:00:00:11 vlan 1 \
+ GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast \
+ } \
}
-packet-generator new {
- name post_vtr_pass
- limit 20
- node ethernet-input
- interface GigabitEthernet2/1/0
- data {
- IP4: 0050.56b7.7c83 -> 00:00:00:00:00:22 vlan 2
- GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast
- }
+packet-generator new { \
+ name post_vtr_pass \
+ limit 20 \
+ node ethernet-input \
+ interface GigabitEthernet2/1/0 \
+ data { \
+ IP4: 0050.56b7.7c83 -> 00:00:00:00:00:22 vlan 2 \
+ GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast \
+ } \
}
-packet-generator new {
- name post_vtr_fail
- limit 50
- node ethernet-input
- interface GigabitEthernet2/1/0
- data {
- IP4: 0050.56b7.7c83 -> 00:00:00:00:00:22 vlan 3
- GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast
- }
+packet-generator new { \
+ name post_vtr_fail \
+ limit 50 \
+ node ethernet-input \
+ interface GigabitEthernet2/1/0 \
+ data { \
+ IP4: 0050.56b7.7c83 -> 00:00:00:00:00:22 vlan 3 \
+ GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast \
+ } \
}
diff --git a/src/scripts/vnet/l2efpfilter_perf b/src/scripts/vnet/l2efpfilter_perf
index 08c1b46a01c..3856d44e8c0 100644
--- a/src/scripts/vnet/l2efpfilter_perf
+++ b/src/scripts/vnet/l2efpfilter_perf
@@ -45,13 +45,13 @@ clear error
clear run
clear int
-packet-generator new {
- name post_vtr_pass
- limit 9111003
- node ethernet-input
- interface GigabitEthernet2/1/0
- data {
- IP4: 0050.56b7.7c83 -> 00:00:00:00:00:22 vlan 2
- GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast
- }
+packet-generator new { \
+ name post_vtr_pass \
+ limit 9111003 \
+ node ethernet-input \
+ interface GigabitEthernet2/1/0 \
+ data { \
+ IP4: 0050.56b7.7c83 -> 00:00:00:00:00:22 vlan 2 \
+ GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast \
+ } \
}
diff --git a/src/scripts/vnet/l2fib b/src/scripts/vnet/l2fib
index dd5c5a803f8..4a167158260 100644
--- a/src/scripts/vnet/l2fib
+++ b/src/scripts/vnet/l2fib
@@ -25,20 +25,20 @@ clear error
clear run
clear int
-packet-generator new {
- name new_input_if_index_mac_move
- limit 4
- node ethernet-input
- interface GigabitEthernet2/1/0
- size 98-98
- data { hex 0x00010203040500020406080a080045006402b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2040160011000000010101cc000400000000001a00100000000100000000ffffffff }
+packet-generator new { \
+ name new_input_if_index_mac_move \
+ limit 4 \
+ node ethernet-input \
+ interface GigabitEthernet2/1/0 \
+ size 98-98 \
+ data { hex 0x00010203040500020406080a080045006402b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2040160011000000010101cc000400000000001a00100000000100000000ffffffff } \
}
-packet-generator new {
- name dmac_hit
- limit 7
- node ethernet-input
- interface GigabitEthernet2/2/0
- size 98-98
- data { hex 0x00020406080a00224406080a8100000981000011080045006402b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2040160011000000010101cc000400000000001a00100000000100000000ffffffff }
+packet-generator new { \
+ name dmac_hit \
+ limit 7 \
+ node ethernet-input \
+ interface GigabitEthernet2/2/0 \
+ size 98-98 \
+ data { hex 0x00020406080a00224406080a8100000981000011080045006402b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2040160011000000010101cc000400000000001a00100000000100000000ffffffff } \
}
diff --git a/src/scripts/vnet/l2fib_perf b/src/scripts/vnet/l2fib_perf
index 55565061dae..0f456c1932f 100644
--- a/src/scripts/vnet/l2fib_perf
+++ b/src/scripts/vnet/l2fib_perf
@@ -17,12 +17,12 @@ cle er
cle int
cle run
-packet-generator new {
- name perf
- limit 9111003
- node ethernet-input
- interface GigabitEthernet2/2/0.1
- size 98-98
- data { hex 0x00224406080a00020406080a81000009080045006402b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2040160011000000010101cc000400000000001a00100000000100000000ffffffff }
+packet-generator new { \
+ name perf \
+ limit 9111003 \
+ node ethernet-input \
+ interface GigabitEthernet2/2/0.1 \
+ size 98-98 \
+ data { hex 0x00224406080a00020406080a81000009080045006402b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2040160011000000010101cc000400000000001a00100000000100000000ffffffff } \
}
diff --git a/src/scripts/vnet/l2fib_xc b/src/scripts/vnet/l2fib_xc
index bb25bd35247..8083da008e5 100644
--- a/src/scripts/vnet/l2fib_xc
+++ b/src/scripts/vnet/l2fib_xc
@@ -19,12 +19,12 @@ clear error
clear run
clear int
-packet-generator new {
- name xc
- limit 11
- node ethernet-input
- interface GigabitEthernet2/1/0
- size 98-98
- data { hex 0x00010203040500020406080a080045006402b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2040160011000000010101cc000400000000001a00100000000100000000ffffffff }
+packet-generator new { \
+ name xc \
+ limit 11 \
+ node ethernet-input \
+ interface GigabitEthernet2/1/0 \
+ size 98-98 \
+ data { hex 0x00010203040500020406080a080045006402b46b96000100096978676265000500bf436973636f20494f5320536f6674776172652c2040160011000000010101cc000400000000001a00100000000100000000ffffffff } \
}
diff --git a/src/scripts/vnet/l2flood b/src/scripts/vnet/l2flood
index ec82306d487..41c86975119 100644
--- a/src/scripts/vnet/l2flood
+++ b/src/scripts/vnet/l2flood
@@ -27,15 +27,14 @@ clear error
clear run
clear int
-packet-generator new {
- name flood
- limit 1
- node ethernet-input
- interface GigabitEthernet2/1/0
- data {
- IP4: 0050.56b7.7c83 -> ffff.ffff.ffff vlan 1
- GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast
- }
-
+packet-generator new { \
+ name flood \
+ limit 1 \
+ node ethernet-input \
+ interface GigabitEthernet2/1/0 \
+ data { \
+ IP4: 0050.56b7.7c83 -> ffff.ffff.ffff vlan 1 \
+ GRE: 8.0.0.1 -> 1.2.3.3 mpls_unicast \
+ } \
}
diff --git a/src/scripts/vnet/l2tp b/src/scripts/vnet/l2tp
index e9a73d579b7..c4a1fe4ad13 100644
--- a/src/scripts/vnet/l2tp
+++ b/src/scripts/vnet/l2tp
@@ -38,91 +38,91 @@ clear error
clear run
clear int
-packet-generator new {
- name decap
- limit 10
- size 200-200
- node ethernet-input
- interface GigabitEthernet2/2/0
- data {
- IP6: 00:50:00:00:00:01 -> 00:50:56:b7:29:7a
- L2TP: 11::1 -> 22::2
- L2TP: session_id 1 cookie 0xffffffffffffffff
- IP4: 00:55:55:55:00:01 -> 00:dd:dd:dd:00:01
- UDP: 1.2.3.4 -> 5.6.7.8
- incrementing 8
- }
+packet-generator new { \
+ name decap \
+ limit 10 \
+ size 200-200 \
+ node ethernet-input \
+ interface GigabitEthernet2/2/0 \
+ data { \
+ IP6: 00:50:00:00:00:01 -> 00:50:56:b7:29:7a \
+ L2TP: 11::1 -> 22::2 \
+ L2TP: session_id 1 cookie 0xffffffffffffffff \
+ IP4: 00:55:55:55:00:01 -> 00:dd:dd:dd:00:01 \
+ UDP: 1.2.3.4 -> 5.6.7.8 \
+ incrementing 8 \
+ } \
}
-packet-generator new {
- name decap_bad_sid
- limit 30
- size 200-200
- node ethernet-input
- interface GigabitEthernet2/2/0
- data {
- IP6: 00:50:00:00:00:01 -> 00:50:56:b7:29:7a
- L2TP: 11::1 -> 22::2
- L2TP: session_id 0x999 cookie 0xffffffffffffffff
- IP4: 00:55:55:55:00:01 -> 00:dd:dd:dd:00:01
- UDP: 1.2.3.4 -> 5.6.7.8
- incrementing 8
- }
+packet-generator new { \
+ name decap_bad_sid \
+ limit 30 \
+ size 200-200 \
+ node ethernet-input \
+ interface GigabitEthernet2/2/0 \
+ data { \
+ IP6: 00:50:00:00:00:01 -> 00:50:56:b7:29:7a \
+ L2TP: 11::1 -> 22::2 \
+ L2TP: session_id 0x999 cookie 0xffffffffffffffff \
+ IP4: 00:55:55:55:00:01 -> 00:dd:dd:dd:00:01 \
+ UDP: 1.2.3.4 -> 5.6.7.8 \
+ incrementing 8 \
+ } \
}
-packet-generator new {
- name decap_bad_cookie
- limit 50
- size 200-200
- node ethernet-input
- interface GigabitEthernet2/2/0
- data {
- IP6: 00:50:00:00:00:01 -> 00:50:56:b7:29:7a
- L2TP: 11::1 -> 22::2
- L2TP: session_id 1 cookie 0x3333ffffffffffff
- IP4: 00:55:55:55:00:01 -> 00:dd:dd:dd:00:01
- UDP: 1.2.3.4 -> 5.6.7.8
- incrementing 8
- }
+packet-generator new { \
+ name decap_bad_cookie \
+ limit 50 \
+ size 200-200 \
+ node ethernet-input \
+ interface GigabitEthernet2/2/0 \
+ data { \
+ IP6: 00:50:00:00:00:01 -> 00:50:56:b7:29:7a \
+ L2TP: 11::1 -> 22::2 \
+ L2TP: session_id 1 cookie 0x3333ffffffffffff \
+ IP4: 00:55:55:55:00:01 -> 00:dd:dd:dd:00:01 \
+ UDP: 1.2.3.4 -> 5.6.7.8 \
+ incrementing 8 \
+ } \
}
-packet-generator new {
- name encap
- limit 100
- node ethernet-input
- interface GigabitEthernet2/1/0
- data {
- IP4: 0000.5555.0002 -> 00:00:dd:dd:00:02 vlan 1
- UDP: 1.2.3.4 -> 5.6.7.8
- incrementing 8
- }
+packet-generator new { \
+ name encap \
+ limit 100 \
+ node ethernet-input \
+ interface GigabitEthernet2/1/0 \
+ data { \
+ IP4: 0000.5555.0002 -> 00:00:dd:dd:00:02 vlan 1 \
+ UDP: 1.2.3.4 -> 5.6.7.8 \
+ incrementing 8 \
+ } \
}
-packet-generator new {
- name decap_sublayer
- limit 300
- size 200-200
- node ethernet-input
- interface GigabitEthernet2/2/0
- data {
- IP6: 00:50:00:00:00:01 -> 00:50:56:b7:29:7a
- L2TP: 11::1 -> 22::3
- L2TP: session_id 2 cookie 0xffffffffffffffff l2_sublayer 0
- IP4: 00:55:55:55:00:01 -> 00:dd:dd:dd:00:01
- UDP: 1.2.3.4 -> 5.6.7.8
- incrementing 8
- }
+packet-generator new { \
+ name decap_sublayer \
+ limit 300 \
+ size 200-200 \
+ node ethernet-input \
+ interface GigabitEthernet2/2/0 \
+ data { \
+ IP6: 00:50:00:00:00:01 -> 00:50:56:b7:29:7a \
+ L2TP: 11::1 -> 22::3 \
+ L2TP: session_id 2 cookie 0xffffffffffffffff l2_sublayer 0 \
+ IP4: 00:55:55:55:00:01 -> 00:dd:dd:dd:00:01 \
+ UDP: 1.2.3.4 -> 5.6.7.8 \
+ incrementing 8 \
+ } \
}
-packet-generator new {
- name encap_sublayer
- limit 700
- node ethernet-input
- interface GigabitEthernet2/1/0
- data {
- IP4: 0000.5555.0002 -> 00:00:dd:dd:00:02 vlan 2
- UDP: 1.2.3.4 -> 5.6.7.8
- incrementing 8
- }
+packet-generator new { \
+ name encap_sublayer \
+ limit 700 \
+ node ethernet-input \
+ interface GigabitEthernet2/1/0 \
+ data { \
+ IP4: 0000.5555.0002 -> 00:00:dd:dd:00:02 vlan 2 \
+ UDP: 1.2.3.4 -> 5.6.7.8 \
+ incrementing 8 \
+ } \
}
diff --git a/src/scripts/vnet/l3fwd b/src/scripts/vnet/l3fwd
index 8d0aaa99e8c..1b8bff1b4d0 100644
--- a/src/scripts/vnet/l3fwd
+++ b/src/scripts/vnet/l3fwd
@@ -12,56 +12,56 @@ set ip neighbor pg1 192.168.2.2 6:5:4:3:2:1 static
set ip neighbor pg1 192:168:2::2 6:5:4:3:2:1 static
set int state pg1 up
-packet-generator new {
- name v4-1-to-2
- limit -1
- node ip4-input
- size 64-64
- interface pg0
- data {
- UDP: 192.168.1.2 -> 192.168.2.2
- UDP: 4321 -> 1234
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name v4-1-to-2 \
+ limit -1 \
+ node ip4-input \
+ size 64-64 \
+ interface pg0 \
+ data { \
+ UDP: 192.168.1.2 -> 192.168.2.2 \
+ UDP: 4321 -> 1234 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name v4-2-to-1
- limit -1
- node ip4-input
- size 64-64
- interface pg0
- data {
- UDP: 192.168.2.2 -> 192.168.1.2
- UDP: 1234 -> 4321
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name v4-2-to-1 \
+ limit -1 \
+ node ip4-input \
+ size 64-64 \
+ interface pg0 \
+ data { \
+ UDP: 192.168.2.2 -> 192.168.1.2 \
+ UDP: 1234 -> 4321 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name v6-1-to-2
- limit -1
- node ip6-input
- size 64-64
- interface pg0
- data {
- UDP: 192:168:1::2 -> 192:168:2::2
- UDP: 4321 -> 1234
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name v6-1-to-2 \
+ limit -1 \
+ node ip6-input \
+ size 64-64 \
+ interface pg0 \
+ data { \
+ UDP: 192:168:1::2 -> 192:168:2::2 \
+ UDP: 4321 -> 1234 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name v6-2-to-1
- limit -1
- node ip6-input
- size 64-64
- interface pg0
- data {
- UDP: 192:168:2::2 -> 192:168:1::2
- UDP: 1234 -> 4321
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name v6-2-to-1 \
+ limit -1 \
+ node ip6-input \
+ size 64-64 \
+ interface pg0 \
+ data { \
+ UDP: 192:168:2::2 -> 192:168:1::2 \
+ UDP: 1234 -> 4321 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
packet-generator enable
diff --git a/src/scripts/vnet/lfib/ip4-to-mpls b/src/scripts/vnet/lfib/ip4-to-mpls
index 5ec412aa21d..1170684fd1a 100644
--- a/src/scripts/vnet/lfib/ip4-to-mpls
+++ b/src/scripts/vnet/lfib/ip4-to-mpls
@@ -1,13 +1,13 @@
-packet-generator new {
- name x
- limit 1
- node ip4-input
- size 64-64
- data {
- ICMP: 1.0.0.2 -> 2.2.2.2
- ICMP echo_request
- incrementing 100
- }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node ip4-input \
+ size 64-64 \
+ data { \
+ ICMP: 1.0.0.2 -> 2.2.2.2 \
+ ICMP echo_request \
+ incrementing 100 \
+ } \
}
loop create
diff --git a/src/scripts/vnet/lfib/mpls-pop-to-mpls b/src/scripts/vnet/lfib/mpls-pop-to-mpls
index 34a57901c03..83b66fd5e56 100644
--- a/src/scripts/vnet/lfib/mpls-pop-to-mpls
+++ b/src/scripts/vnet/lfib/mpls-pop-to-mpls
@@ -1,11 +1,11 @@
-packet-generator new {
- name x
- limit 1
- node mpls-input
- size 72-72
- data {
- hex 0x0001e0ff0001f1ff4500004000000000400177ba010000020202020208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627
- }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node mpls-input \
+ size 72-72 \
+ data { \
+ hex 0x0001e0ff0001f1ff4500004000000000400177ba010000020202020208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627 \
+ } \
}
loop create
diff --git a/src/scripts/vnet/lfib/mpls-to-ip4 b/src/scripts/vnet/lfib/mpls-to-ip4
index 5acf5fd0e9e..2fa9b9f4025 100644
--- a/src/scripts/vnet/lfib/mpls-to-ip4
+++ b/src/scripts/vnet/lfib/mpls-to-ip4
@@ -1,11 +1,11 @@
-packet-generator new {
- name x
- limit 1
- node mpls-input
- size 68-68
- data {
- hex 0x0001e1ff4500004000000000400177ba010000020202020208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627
- }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node mpls-input \
+ size 68-68 \
+ data { \
+ hex 0x0001e1ff4500004000000000400177ba010000020202020208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627 \
+ } \
}
loop create
diff --git a/src/scripts/vnet/lfib/mpls-to-mpls b/src/scripts/vnet/lfib/mpls-to-mpls
index 5de3d31979a..5e24698ab41 100644
--- a/src/scripts/vnet/lfib/mpls-to-mpls
+++ b/src/scripts/vnet/lfib/mpls-to-mpls
@@ -1,11 +1,11 @@
-packet-generator new {
- name x
- limit 1
- node mpls-input
- size 68-68
- data {
- hex 0x0001e1ff4500004000000000400177ba010000020200000208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627
- }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node mpls-input \
+ size 68-68 \
+ data { \
+ hex 0x0001e1ff4500004000000000400177ba010000020200000208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627 \
+ } \
}
loop create
diff --git a/src/scripts/vnet/mcast/ip4 b/src/scripts/vnet/mcast/ip4
index 101f09b087a..e3db0ce6be1 100644
--- a/src/scripts/vnet/mcast/ip4
+++ b/src/scripts/vnet/mcast/ip4
@@ -1,13 +1,13 @@
-packet-generator new {
- name x
- limit 1
- node ip4-input
- size 512-512
- data {
- ICMP: 1.0.0.2 -> 232.1.1.1
- ICMP echo_request
- incrementing 100
- }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node ip4-input \
+ size 512-512 \
+ data { \
+ ICMP: 1.0.0.2 -> 232.1.1.1 \
+ ICMP echo_request \
+ incrementing 100 \
+ } \
}
create packet-generator interface pg1
diff --git a/src/scripts/vnet/mpls-o-ethernet/pg b/src/scripts/vnet/mpls-o-ethernet/pg
index 4070cde861f..f83f55adf39 100644
--- a/src/scripts/vnet/mpls-o-ethernet/pg
+++ b/src/scripts/vnet/mpls-o-ethernet/pg
@@ -1,9 +1,9 @@
-packet-generator new {
- name x
- limit 1
- node mpls-ethernet-input
- size 68-68
- data {
- hex 0x0001e1ff4500004000000000400177ba010000020200000208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627
- }
-} \ No newline at end of file
+packet-generator new { \
+ name x \
+ limit 1 \
+ node mpls-ethernet-input \
+ size 68-68 \
+ data { \
+ hex 0x0001e1ff4500004000000000400177ba010000020200000208007a6e000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f2021222324252627 \
+ } \
+}
diff --git a/src/scripts/vnet/mpls-to-dot1ad b/src/scripts/vnet/mpls-to-dot1ad
index ea8418e70e8..cfe0a76834e 100644
--- a/src/scripts/vnet/mpls-to-dot1ad
+++ b/src/scripts/vnet/mpls-to-dot1ad
@@ -29,24 +29,24 @@ mpls local-label add eos 33 via l2-input-on mpls-tunnel0
trace add pg-input 100
-packet-generator new {
- name g2v
- limit 1
- node ip4-input
- interface pg0
- data {
- hex 0x4500002000000000fe2f3b5bc0a80002c0a800010000884700021140000000000011005056b77c83010203040506
- }
+packet-generator new { \
+ name g2v \
+ limit 1 \
+ node ip4-input \
+ interface pg0 \
+ data { \
+ hex 0x4500002000000000fe2f3b5bc0a80002c0a800010000884700021140000000000011005056b77c83010203040506 \
+ } \
}
-packet-generator new {
- name v2g
- limit 1
- node ethernet-input
- interface pg1
- data {
- hex 0x000000000011005056b77c8388a80020810000210102030405060708
- }
+packet-generator new { \
+ name v2g \
+ limit 1 \
+ node ethernet-input \
+ interface pg1 \
+ data { \
+ hex 0x000000000011005056b77c8388a80020810000210102030405060708 \
+ } \
}
pack en g2v
diff --git a/src/scripts/vnet/mpls-tunnel b/src/scripts/vnet/mpls-tunnel
index 1c395f4c324..863379f60da 100644
--- a/src/scripts/vnet/mpls-tunnel
+++ b/src/scripts/vnet/mpls-tunnel
@@ -1,46 +1,46 @@
-packet-generator new {
- name x0
- limit 1
- node ip4-input
- size 64-64
- data {
- ICMP: 1.0.0.2 -> 2.0.0.2
- ICMP echo_request
- incrementing 100
- }
+packet-generator new { \
+ name x0 \
+ limit 1 \
+ node ip4-input \
+ size 64-64 \
+ data { \
+ ICMP: 1.0.0.2 -> 2.0.0.2 \
+ ICMP echo_request \
+ incrementing 100 \
+ } \
}
-packet-generator new {
- name x1
- limit 1
- node ip4-input
- size 64-64
- data {
- ICMP: 1.0.0.2 -> 2.0.1.2
- ICMP echo_request
- incrementing 100
- }
+packet-generator new { \
+ name x1 \
+ limit 1 \
+ node ip4-input \
+ size 64-64 \
+ data { \
+ ICMP: 1.0.0.2 -> 2.0.1.2 \
+ ICMP echo_request \
+ incrementing 100 \
+ } \
}
-packet-generator new {
- name x2
- limit 1
- node ip4-input
- size 64-64
- data {
- ICMP: 1.0.0.2 -> 2.0.2.2
- ICMP echo_request
- incrementing 100
- }
+packet-generator new { \
+ name x2 \
+ limit 1 \
+ node ip4-input \
+ size 64-64 \
+ data { \
+ ICMP: 1.0.0.2 -> 2.0.2.2 \
+ ICMP echo_request \
+ incrementing 100 \
+ } \
}
-packet-generator new {
- name x3
- limit 1
- node ip4-input
- size 64-64
- data {
- ICMP: 1.0.0.2 -> 2.0.3.2
- ICMP echo_request
- incrementing 100
- }
+packet-generator new { \
+ name x3 \
+ limit 1 \
+ node ip4-input \
+ size 64-64 \
+ data { \
+ ICMP: 1.0.0.2 -> 2.0.3.2 \
+ ICMP echo_request \
+ incrementing 100 \
+ } \
}
diff --git a/src/scripts/vnet/nat44 b/src/scripts/vnet/nat44
index 5c593233d53..3a4bfe9c961 100644
--- a/src/scripts/vnet/nat44
+++ b/src/scripts/vnet/nat44
@@ -1,31 +1,31 @@
create packet-generator interface pg0
create packet-generator interface pg1
-packet-generator new {
- name f1
- limit 1000000
- node ip4-input
- size 64-64
- interface pg0
- data {
- UDP: 10.0.0.3 -> 172.16.1.2
- UDP: 3000 -> 3001
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name f1 \
+ limit 1000000 \
+ node ip4-input \
+ size 64-64 \
+ interface pg0 \
+ data { \
+ UDP: 10.0.0.3 -> 172.16.1.2 \
+ UDP: 3000 -> 3001 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name f2
- limit 1000000
- node ip4-input
- size 64-64
- interface pg0
- data {
- UDP: 10.0.0.3 -> 172.16.1.2
- UDP: 3005 -> 3006
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name f2 \
+ limit 1000000 \
+ node ip4-input \
+ size 64-64 \
+ interface pg0 \
+ data { \
+ UDP: 10.0.0.3 -> 172.16.1.2 \
+ UDP: 3005 -> 3006 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
nat44 add address 172.16.1.3
diff --git a/src/scripts/vnet/nat44_det b/src/scripts/vnet/nat44_det
index 9b28738dc8f..9579ce10a95 100644
--- a/src/scripts/vnet/nat44_det
+++ b/src/scripts/vnet/nat44_det
@@ -1,93 +1,93 @@
create packet-generator interface pg0
create packet-generator interface pg1
-packet-generator new {
- name f1
- limit 1000000
- node ip4-input
- size 64-64
- worker 0
- interface pg0
- data {
- UDP: 10.0.0.3 -> 172.16.1.2
- UDP: 3000 -> 3001
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name f1 \
+ limit 1000000 \
+ node ip4-input \
+ size 64-64 \
+ worker 0 \
+ interface pg0 \
+ data { \
+ UDP: 10.0.0.3 -> 172.16.1.2 \
+ UDP: 3000 -> 3001 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name f2
- limit 1000000
- node ip4-input
- size 64-64
- worker 1
- interface pg0
- data {
- UDP: 10.0.0.3 -> 172.16.1.2
- UDP: 3005 -> 3006
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name f2 \
+ limit 1000000 \
+ node ip4-input \
+ size 64-64 \
+ worker 1 \
+ interface pg0 \
+ data { \
+ UDP: 10.0.0.3 -> 172.16.1.2 \
+ UDP: 3005 -> 3006 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name f3
- limit 1000000
- node ip4-input
- size 64-64
- worker 0
- interface pg1
- data {
- UDP: 172.16.1.2 -> 1.1.1.2
- UDP: 3001 -> 1141
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name f3 \
+ limit 1000000 \
+ node ip4-input \
+ size 64-64 \
+ worker 0 \
+ interface pg1 \
+ data { \
+ UDP: 172.16.1.2 -> 1.1.1.2 \
+ UDP: 3001 -> 1141 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name f4
- limit 1000000
- node ip4-input
- size 64-64
- worker 1
- interface pg1
- data {
- UDP: 172.16.1.2 -> 1.1.1.2
- UDP: 3006 -> 1146
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name f4 \
+ limit 1000000 \
+ node ip4-input \
+ size 64-64 \
+ worker 1 \
+ interface pg1 \
+ data { \
+ UDP: 172.16.1.2 -> 1.1.1.2 \
+ UDP: 3006 -> 1146 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name f5
- limit 1000000
- node ip4-input
- size 64-64
- worker 2
- interface pg0
- data {
- UDP: 10.0.0.4 -> 172.16.1.2
- UDP: 3005 -> 3006
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name f5 \
+ limit 1000000 \
+ node ip4-input \
+ size 64-64 \
+ worker 2 \
+ interface pg0 \
+ data { \
+ UDP: 10.0.0.4 -> 172.16.1.2 \
+ UDP: 3005 -> 3006 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name f6
- limit 1000000
- node ip4-input
- size 64-64
- worker 1
- interface pg1
- data {
- UDP: 172.16.1.2 -> 1.1.1.2
- UDP: 3006 -> 1177
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name f6 \
+ limit 1000000 \
+ node ip4-input \
+ size 64-64 \
+ worker 1 \
+ interface pg1 \
+ data { \
+ UDP: 172.16.1.2 -> 1.1.1.2 \
+ UDP: 3006 -> 1177 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
nat44 deterministic add in 10.0.0.0/21 out 1.1.1.2/32
diff --git a/src/scripts/vnet/nat44_lb b/src/scripts/vnet/nat44_lb
index 6b6d4e1df94..7ccd0932070 100644
--- a/src/scripts/vnet/nat44_lb
+++ b/src/scripts/vnet/nat44_lb
@@ -1,33 +1,33 @@
create packet-generator interface pg0
create packet-generator interface pg1
-packet-generator new {
- name f1
- limit 1000000
- node ip4-input
- size 64-64
- worker 0
- interface pg1
- data {
- UDP: 172.16.1.11 -> 172.16.1.3
- UDP: 3001 -> 3000
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name f1 \
+ limit 1000000 \
+ node ip4-input \
+ size 64-64 \
+ worker 0 \
+ interface pg1 \
+ data { \
+ UDP: 172.16.1.11 -> 172.16.1.3 \
+ UDP: 3001 -> 3000 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name f2
- limit 1000000
- node ip4-input
- size 64-64
- worker 1
- interface pg1
- data {
- UDP: 172.16.1.10 -> 172.16.1.3
- UDP: 3001 -> 3000
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name f2 \
+ limit 1000000 \
+ node ip4-input \
+ size 64-64 \
+ worker 1 \
+ interface pg1 \
+ data { \
+ UDP: 172.16.1.10 -> 172.16.1.3 \
+ UDP: 3001 -> 3000 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
nat44 add address 172.16.1.3
diff --git a/src/scripts/vnet/nat44_static b/src/scripts/vnet/nat44_static
index 3c761875b2c..34ec854adf2 100644
--- a/src/scripts/vnet/nat44_static
+++ b/src/scripts/vnet/nat44_static
@@ -1,33 +1,33 @@
create packet-generator interface pg0
create packet-generator interface pg1
-packet-generator new {
- name f1
- limit 1000000
- node ip4-input
- size 64-64
- worker 0
- interface pg0
- data {
- UDP: 10.0.0.3 -> 172.16.1.2
- UDP: 3000 -> 3001
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name f1 \
+ limit 1000000 \
+ node ip4-input \
+ size 64-64 \
+ worker 0 \
+ interface pg0 \
+ data { \
+ UDP: 10.0.0.3 -> 172.16.1.2 \
+ UDP: 3000 -> 3001 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name f2
- limit 1000000
- node ip4-input
- size 64-64
- worker 1
- interface pg1
- data {
- UDP: 172.16.1.2 -> 172.16.1.3
- UDP: 3001 -> 3000
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name f2 \
+ limit 1000000 \
+ node ip4-input \
+ size 64-64 \
+ worker 1 \
+ interface pg1 \
+ data { \
+ UDP: 172.16.1.2 -> 172.16.1.3 \
+ UDP: 3001 -> 3000 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
nat44 add address 172.16.1.3
diff --git a/src/scripts/vnet/nat44_static_with_port b/src/scripts/vnet/nat44_static_with_port
index 9087fbbd993..ef53e6d2966 100644
--- a/src/scripts/vnet/nat44_static_with_port
+++ b/src/scripts/vnet/nat44_static_with_port
@@ -1,33 +1,33 @@
create packet-generator interface pg0
create packet-generator interface pg1
-packet-generator new {
- name f1
- limit 1000000
- node ip4-input
- size 64-64
- worker 0
- interface pg0
- data {
- UDP: 10.0.0.3 -> 172.16.1.2
- UDP: 3000 -> 3001
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name f1 \
+ limit 1000000 \
+ node ip4-input \
+ size 64-64 \
+ worker 0 \
+ interface pg0 \
+ data { \
+ UDP: 10.0.0.3 -> 172.16.1.2 \
+ UDP: 3000 -> 3001 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name f2
- limit 1000000
- node ip4-input
- size 64-64
- worker 1
- interface pg1
- data {
- UDP: 172.16.1.2 -> 172.16.1.3
- UDP: 3001 -> 3000
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name f2 \
+ limit 1000000 \
+ node ip4-input \
+ size 64-64 \
+ worker 1 \
+ interface pg1 \
+ data { \
+ UDP: 172.16.1.2 -> 172.16.1.3 \
+ UDP: 3001 -> 3000 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
nat44 add address 172.16.1.3
diff --git a/src/scripts/vnet/nat64 b/src/scripts/vnet/nat64
index 7e37dd88ba7..87c9de339ff 100644
--- a/src/scripts/vnet/nat64
+++ b/src/scripts/vnet/nat64
@@ -1,33 +1,33 @@
create packet-generator interface pg0
create packet-generator interface pg1
-packet-generator new {
- name f1
- limit 10
- node ip6-input
- size 64-64
- worker 0
- interface pg0
- data {
- UDP: 2000::3 -> 64:ff9b::ac10:102
- UDP: 3000 -> 3001
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name f1 \
+ limit 10 \
+ node ip6-input \
+ size 64-64 \
+ worker 0 \
+ interface pg0 \
+ data { \
+ UDP: 2000::3 -> 64:ff9b::ac10:102 \
+ UDP: 3000 -> 3001 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name f2
- limit 10
- node ip6-input
- size 64-64
- interface pg0
- worker 1
- data {
- UDP: 2000::3 -> 64:ff9b::ac10:102
- UDP: 3005 -> 3006
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name f2 \
+ limit 10 \
+ node ip6-input \
+ size 64-64 \
+ interface pg0 \
+ worker 1 \
+ data { \
+ UDP: 2000::3 -> 64:ff9b::ac10:102 \
+ UDP: 3005 -> 3006 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
nat64 add pool address 172.16.1.3
diff --git a/src/scripts/vnet/nat64_static b/src/scripts/vnet/nat64_static
index 3c4dc48bd3a..d5820841862 100644
--- a/src/scripts/vnet/nat64_static
+++ b/src/scripts/vnet/nat64_static
@@ -1,33 +1,33 @@
create packet-generator interface pg0
create packet-generator interface pg1
-packet-generator new {
- name f1
- limit 10
- node ip4-input
- size 64-64
- worker 0
- interface pg1
- data {
- UDP: 172.16.1.2 -> 172.16.1.3
- UDP: 3000 -> 37678
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name f1 \
+ limit 10 \
+ node ip4-input \
+ size 64-64 \
+ worker 0 \
+ interface pg1 \
+ data { \
+ UDP: 172.16.1.2 -> 172.16.1.3 \
+ UDP: 3000 -> 37678 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name f2
- limit 10
- node ip4-input
- size 64-64
- interface pg1
- worker 1
- data {
- UDP: 172.16.1.2 -> 172.16.1.3
- UDP: 3005 -> 38678
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name f2 \
+ limit 10 \
+ node ip4-input \
+ size 64-64 \
+ interface pg1 \
+ worker 1 \
+ data { \
+ UDP: 172.16.1.2 -> 172.16.1.3 \
+ UDP: 3005 -> 38678 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
nat64 add pool address 172.16.1.3
diff --git a/src/scripts/vnet/pcap b/src/scripts/vnet/pcap
index d8f47c69074..c52ddb74587 100644
--- a/src/scripts/vnet/pcap
+++ b/src/scripts/vnet/pcap
@@ -1,15 +1,15 @@
-packet-generator new {
- name x
- limit 1
- node ethernet-input
- pcap /home/eliot/pcap-data/ISIS_level1_adjacency.cap
+packet-generator new { \
+ name x \
+ limit 1 \
+ node ethernet-input \
+ pcap /home/eliot/pcap-data/ISIS_level1_adjacency.cap \
}
-packet-generator new {
- name y
- limit 1
- node hdlc-input
- pcap /home/eliot/pcap-data/ISIS_p2p_adjacency.cap
+packet-generator new { \
+ name y \
+ limit 1 \
+ node hdlc-input \
+ pcap /home/eliot/pcap-data/ISIS_p2p_adjacency.cap \
}
tr add pg-input 10
diff --git a/src/scripts/vnet/rewrite b/src/scripts/vnet/rewrite
index d05ce89e6b8..1cb699409b1 100644
--- a/src/scripts/vnet/rewrite
+++ b/src/scripts/vnet/rewrite
@@ -23,37 +23,37 @@ cle er
cle int
cle run
-packet-generator new {
- name toport
- limit 2
- node ethernet-input
- interface GigabitEthernet2/1/0
- data {
- IP4: 0050.56b7.7c83 -> 0050.56b7.296d
- GRE: 8.0.0.1 -> 1.2.5.6 mpls_unicast
- }
+packet-generator new { \
+ name toport \
+ limit 2 \
+ node ethernet-input \
+ interface GigabitEthernet2/1/0 \
+ data { \
+ IP4: 0050.56b7.7c83 -> 0050.56b7.296d \
+ GRE: 8.0.0.1 -> 1.2.5.6 mpls_unicast \
+ } \
}
-packet-generator new {
- name tovlan
- limit 2
- node ethernet-input
- interface GigabitEthernet2/1/0
- data {
- IP4: 0050.56b7.7c83 -> 0050.56b7.296d
- GRE: 8.0.0.1 -> 1.2.6.6 mpls_unicast
- }
+packet-generator new { \
+ name tovlan \
+ limit 2 \
+ node ethernet-input \
+ interface GigabitEthernet2/1/0 \
+ data { \
+ IP4: 0050.56b7.7c83 -> 0050.56b7.296d \
+ GRE: 8.0.0.1 -> 1.2.6.6 mpls_unicast \
+ } \
}
-packet-generator new {
- name toqinq
- limit 2
- node ethernet-input
- interface GigabitEthernet2/1/0
- data {
- IP4: 0050.56b7.7c83 -> 0050.56b7.296d
- GRE: 8.0.0.1 -> 1.2.7.6 mpls_unicast
- }
+packet-generator new { \
+ name toqinq \
+ limit 2 \
+ node ethernet-input \
+ interface GigabitEthernet2/1/0 \
+ data { \
+ IP4: 0050.56b7.7c83 -> 0050.56b7.296d \
+ GRE: 8.0.0.1 -> 1.2.7.6 mpls_unicast \
+ } \
}
diff --git a/src/scripts/vnet/rpf b/src/scripts/vnet/rpf
index 7a6608e2667..80aa9bc3b67 100644
--- a/src/scripts/vnet/rpf
+++ b/src/scripts/vnet/rpf
@@ -1,13 +1,13 @@
-packet-generator new {
- name x
- limit 1
- node ip4-input
- size 64-64
- data {
- ICMP: 1.2.3.4 -> 5.6.7.8
- ICMP echo_request
- incrementing 100
- }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node ip4-input \
+ size 64-64 \
+ data { \
+ ICMP: 1.2.3.4 -> 5.6.7.8 \
+ ICMP echo_request \
+ incrementing 100 \
+ } \
}
tr add pg-input 100
diff --git a/src/scripts/vnet/rtt-test b/src/scripts/vnet/rtt-test
index 5501b99dc45..73016331430 100644
--- a/src/scripts/vnet/rtt-test
+++ b/src/scripts/vnet/rtt-test
@@ -16,16 +16,16 @@ comment { trace add rtt-test-tx 100 }
comment { trace add ixge-input 100 }
comment { te rtt { 1.0.0.2 -> 2.0.0.2 count 1e4 rate 1e9 size 100 histogram-time 1e-5 } }
-packet-generator new {
- name x
- limit 1
- node ip4-input
- size 170-170
- data {
- ICMP: 1.0.0.2 -> 2.0.0.2
- ICMP echo_request
- incrementing 100
- }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node ip4-input \
+ size 170-170 \
+ data { \
+ ICMP: 1.0.0.2 -> 2.0.0.2 \
+ ICMP echo_request \
+ incrementing 100 \
+ } \
}
diff --git a/src/scripts/vnet/source_and_port_range_check b/src/scripts/vnet/source_and_port_range_check
index 7eabde3127f..abe7034c576 100644
--- a/src/scripts/vnet/source_and_port_range_check
+++ b/src/scripts/vnet/source_and_port_range_check
@@ -4,40 +4,40 @@ create loop int
set int state loop0 up
set int ip addr loop0 10.10.10.10/32
-packet-generator new {
- name deny-from-default-route
- limit 1
- node ip4-input
- size 64-64
- data {
- UDP: 1.2.3.4 -> 5.6.7.8
- UDP: 3000 -> 3001
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name deny-from-default-route \
+ limit 1 \
+ node ip4-input \
+ size 64-64 \
+ data { \
+ UDP: 1.2.3.4 -> 5.6.7.8 \
+ UDP: 3000 -> 3001 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name allow
- limit 1
- node ip4-input
- size 64-64
- data {
- UDP: 1.1.1.1 -> 5.6.7.8
- UDP: 3000 -> 3001
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name allow \
+ limit 1 \
+ node ip4-input \
+ size 64-64 \
+ data { \
+ UDP: 1.1.1.1 -> 5.6.7.8 \
+ UDP: 3000 -> 3001 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name deny-from-port-range
- limit 1
- node ip4-input
- size 64-64
- data {
- UDP: 1.1.1.1 -> 5.6.7.8
- UDP: 6000 -> 6001
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name deny-from-port-range \
+ limit 1 \
+ node ip4-input \
+ size 64-64 \
+ data { \
+ UDP: 1.1.1.1 -> 5.6.7.8 \
+ UDP: 6000 -> 6001 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
set ip source-and-port-range-check 1.1.1.0/24 range 2000 - 3000 vrf 99
diff --git a/src/scripts/vnet/speed b/src/scripts/vnet/speed
index d60d671f29f..eecc1d3d1e9 100644
--- a/src/scripts/vnet/speed
+++ b/src/scripts/vnet/speed
@@ -1,13 +1,13 @@
-packet-generator new {
- name x
- limit 1
- node ip4-input
- size 50-50
- data {
- ICMP: 1.2.3.4 -> 5.6.7.8
- ICMP echo_request
- incrementing 100
- }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node ip4-input \
+ size 50-50 \
+ data { \
+ ICMP: 1.2.3.4 -> 5.6.7.8 \
+ ICMP echo_request \
+ incrementing 100 \
+ } \
}
tr add pg-input 100
diff --git a/src/scripts/vnet/srp b/src/scripts/vnet/srp
index c20b38a6d20..d77b0bd27a8 100644
--- a/src/scripts/vnet/srp
+++ b/src/scripts/vnet/srp
@@ -3,16 +3,16 @@ srp create-interfaces
set int hw-class fake-srp0 srp
set int hw-class fake-srp1 srp
-packet-generator new {
- name x
- limit 1
- node ip4-input
- size 64-64
- data {
- ICMP: 1.0.0.2 -> 2.0.0.2
- ICMP echo_request
- incrementing 100
- }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node ip4-input \
+ size 64-64 \
+ data { \
+ ICMP: 1.0.0.2 -> 2.0.0.2 \
+ ICMP echo_request \
+ incrementing 100 \
+ } \
}
tr add pg-input 100
diff --git a/src/scripts/vnet/tcp b/src/scripts/vnet/tcp
index 8c6c1077aa4..eaba0903c1b 100644
--- a/src/scripts/vnet/tcp
+++ b/src/scripts/vnet/tcp
@@ -2,16 +2,16 @@ loop create
set int ip address loop0 192.168.1.1/8
set int state loop0 up
-packet-generator new {
- name x
- limit 2048
- node ip4-input
- size 100-100
- interface loop0
- data {
- TCP: 192.168.1.2 -> 192.168.1.1
- TCP: 32415 -> 80
- SYN
- incrementing 100
- }
+packet-generator new { \
+ name x \
+ limit 2048 \
+ node ip4-input \
+ size 100-100 \
+ interface loop0 \
+ data { \
+ TCP: 192.168.1.2 -> 192.168.1.1 \
+ TCP: 32415 -> 80 \
+ SYN \
+ incrementing 100 \
+ } \
}
diff --git a/src/scripts/vnet/udp b/src/scripts/vnet/udp
index 8e3b75b1be1..3a8ead8c631 100644
--- a/src/scripts/vnet/udp
+++ b/src/scripts/vnet/udp
@@ -2,17 +2,17 @@ loop create
set int ip address loop0 192.168.1.1/8
set int state loop0 up
-packet-generator new {
- name udp
- limit 512
- rate 1e4
- node ip4-input
- size 100-100
- interface loop0
- data {
- UDP: 192.168.1.2 - 192.168.2.255 -> 192.168.1.1
- UDP: 4321 -> 1234
- length 72
- incrementing 100
- }
+packet-generator new { \
+ name udp \
+ limit 512 \
+ rate 1e4 \
+ node ip4-input \
+ size 100-100 \
+ interface loop0 \
+ data { \
+ UDP: 192.168.1.2 - 192.168.2.255 -> 192.168.1.1 \
+ UDP: 4321 -> 1234 \
+ length 72 \
+ incrementing 100 \
+ } \
}
diff --git a/src/scripts/vnet/uri/dummy_app.py b/src/scripts/vnet/uri/dummy_app.py
index d96a378a193..7fab2d766ad 100755
--- a/src/scripts/vnet/uri/dummy_app.py
+++ b/src/scripts/vnet/uri/dummy_app.py
@@ -5,34 +5,41 @@ import sys
import time
import argparse
-# action can be reflect or drop
+# action can be reflect or drop
action = "drop"
test = 0
-def test_data (data, n_rcvd):
- n_read = len (data);
+
+def test_data(data, n_rcvd):
+ n_read = len(data)
for i in range(n_read):
- expected = (n_rcvd + i) & 0xff
- byte_got = ord (data[i])
- if (byte_got != expected):
- print("Difference at byte {}. Expected {} got {}"
- .format(n_rcvd + i, expected, byte_got))
+ expected = (n_rcvd + i) & 0xFF
+ byte_got = ord(data[i])
+ if byte_got != expected:
+ print(
+ "Difference at byte {}. Expected {} got {}".format(
+ n_rcvd + i, expected, byte_got
+ )
+ )
return n_read
-def handle_connection (connection, client_address):
+
+def handle_connection(connection, client_address):
print("Received connection from {}".format(repr(client_address)))
n_rcvd = 0
try:
while True:
data = connection.recv(4096)
if not data:
- break;
- if (test == 1):
- n_rcvd += test_data (data, n_rcvd)
- if (action != "drop"):
+ break
+ if test == 1:
+ n_rcvd += test_data(data, n_rcvd)
+ if action != "drop":
connection.sendall(data)
finally:
connection.close()
+
+
def run_tcp_server(ip, port):
print("Starting TCP server {}:{}".format(repr(ip), repr(port)))
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -42,7 +49,9 @@ def run_tcp_server(ip, port):
sock.listen(1)
while True:
connection, client_address = sock.accept()
- handle_connection (connection, client_address)
+ handle_connection(connection, client_address)
+
+
def run_udp_server(ip, port):
print("Starting UDP server {}:{}".format(repr(ip), repr(port)))
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
@@ -51,22 +60,25 @@ def run_udp_server(ip, port):
sock.bind(server_address)
while True:
data, addr = sock.recvfrom(4096)
- if (action != "drop"):
- #snd_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
- sock.sendto (data, addr)
+ if action != "drop":
+ # snd_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+ sock.sendto(data, addr)
+
def run_server(ip, port, proto):
- if (proto == "tcp"):
+ if proto == "tcp":
run_tcp_server(ip, port)
- elif (proto == "udp"):
+ elif proto == "udp":
run_udp_server(ip, port)
+
def prepare_data(power):
buf = []
- for i in range (0, pow(2, power)):
- buf.append(i & 0xff)
+ for i in range(0, pow(2, power)):
+ buf.append(i & 0xFF)
return bytearray(buf)
+
def run_tcp_client(ip, port):
print("Starting TCP client {}:{}".format(repr(ip), repr(port)))
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -75,28 +87,33 @@ def run_tcp_client(ip, port):
data = prepare_data(16)
n_rcvd = 0
- n_sent = len (data)
+ n_sent = len(data)
try:
sock.sendall(data)
timeout = time.time() + 2
while n_rcvd < n_sent and time.time() < timeout:
tmp = sock.recv(1500)
- tmp = bytearray (tmp)
+ tmp = bytearray(tmp)
n_read = len(tmp)
for i in range(n_read):
- if (data[n_rcvd + i] != tmp[i]):
- print("Difference at byte {}. Sent {} got {}"
- .format(n_rcvd + i, data[n_rcvd + i], tmp[i]))
+ if data[n_rcvd + i] != tmp[i]:
+ print(
+ "Difference at byte {}. Sent {} got {}".format(
+ n_rcvd + i, data[n_rcvd + i], tmp[i]
+ )
+ )
n_rcvd += n_read
- if (n_rcvd < n_sent or n_rcvd > n_sent):
+ if n_rcvd < n_sent or n_rcvd > n_sent:
print("Sent {} and got back {}".format(n_sent, n_rcvd))
else:
- print("Got back what we've sent!!");
+ print("Got back what we've sent!!")
finally:
sock.close()
+
+
def run_udp_client(ip, port):
print("Starting UDP client {}:{}".format(repr(ip), repr(port)))
n_packets = 100
@@ -104,38 +121,43 @@ def run_udp_client(ip, port):
server_address = (ip, int(port))
data = prepare_data(10)
try:
- for i in range (0, n_packets):
+ for i in range(0, n_packets):
sock.sendto(data, server_address)
finally:
sock.close()
+
+
def run_client(ip, port, proto):
- if (proto == "tcp"):
+ if proto == "tcp":
run_tcp_client(ip, port)
- elif (proto == "udp"):
+ elif proto == "udp":
run_udp_client(ip, port)
+
+
def run(mode, ip, port, proto):
- if (mode == "server"):
- run_server (ip, port, proto)
- elif (mode == "client"):
- run_client (ip, port, proto)
+ if mode == "server":
+ run_server(ip, port, proto)
+ elif mode == "client":
+ run_client(ip, port, proto)
else:
raise Exception("Unknown mode. Only client and server supported")
+
if __name__ == "__main__":
parser = argparse.ArgumentParser()
- parser.add_argument('-m', action='store', dest='mode')
- parser.add_argument('-i', action='store', dest='ip')
- parser.add_argument('-p', action='store', dest='port')
- parser.add_argument('-proto', action='store', dest='proto')
- parser.add_argument('-a', action='store', dest='action')
- parser.add_argument('-t', action='store', dest='test')
+ parser.add_argument("-m", action="store", dest="mode")
+ parser.add_argument("-i", action="store", dest="ip")
+ parser.add_argument("-p", action="store", dest="port")
+ parser.add_argument("-proto", action="store", dest="proto")
+ parser.add_argument("-a", action="store", dest="action")
+ parser.add_argument("-t", action="store", dest="test")
results = parser.parse_args()
action = results.action
test = results.test
run(results.mode, results.ip, results.port, results.proto)
- #if (len(sys.argv)) < 4:
+ # if (len(sys.argv)) < 4:
# raise Exception("Usage: ./dummy_app <mode> <ip> <port> [<action> <test>]")
- #if (len(sys.argv) == 6):
+ # if (len(sys.argv) == 6):
# action = sys.argv[4]
# test = int(sys.argv[5])
- #run (sys.argv[1], sys.argv[2], int(sys.argv[3]))
+ # run (sys.argv[1], sys.argv[2], int(sys.argv[3]))
diff --git a/src/scripts/vnet/uri/udp b/src/scripts/vnet/uri/udp
index bbbe2c1431d..4430b0f10cb 100644
--- a/src/scripts/vnet/uri/udp
+++ b/src/scripts/vnet/uri/udp
@@ -8,18 +8,18 @@ create host-interface name vpp1
set int state host-vpp1 up
set int ip address host-vpp1 6.0.1.1/24
-packet-generator new {
- name udp
- limit 512
- rate 1e4
- node ip4-input
- size 100-100
- interface loop0
- data {
- UDP: 192.168.1.2 - 192.168.2.255 -> 192.168.1.1
- UDP: 4321 -> 1234
- length 72
- incrementing 100
- }
+packet-generator new { \
+ name udp \
+ limit 512 \
+ rate 1e4 \
+ node ip4-input \
+ size 100-100 \
+ interface loop0 \
+ data { \
+ UDP: 192.168.1.2 - 192.168.2.255 -> 192.168.1.1 \
+ UDP: 4321 -> 1234 \
+ length 72 \
+ incrementing 100 \
+ } \
}
session enable
diff --git a/src/scripts/vnet/urpf b/src/scripts/vnet/urpf
index 1268ff50b72..48855e4b165 100644
--- a/src/scripts/vnet/urpf
+++ b/src/scripts/vnet/urpf
@@ -4,64 +4,64 @@ create loop int
set int state loop0 up
set int ip addr loop0 10.10.10.10/24
-packet-generator new {
- name transit-deny
- limit 1
- node ip4-input
- size 64-64
- data {
- UDP: 1.2.3.4 -> 2.2.2.2
- UDP: 3000 -> 3001
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name transit-deny \
+ limit 1 \
+ node ip4-input \
+ size 64-64 \
+ data { \
+ UDP: 1.2.3.4 -> 2.2.2.2 \
+ UDP: 3000 -> 3001 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name transit-allow
- limit 1
- node ip4-input
- size 64-64
- data {
- UDP: 1.1.1.1 -> 2.2.2.2
- UDP: 3000 -> 3001
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name transit-allow \
+ limit 1 \
+ node ip4-input \
+ size 64-64 \
+ data { \
+ UDP: 1.1.1.1 -> 2.2.2.2 \
+ UDP: 3000 -> 3001 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name transit-allow-from-excemption
- limit 1
- node ip4-input
- size 64-64
- data {
- UDP: 11.11.12.13 -> 2.2.2.2
- UDP: 6000 -> 6001
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name transit-allow-from-excemption \
+ limit 1 \
+ node ip4-input \
+ size 64-64 \
+ data { \
+ UDP: 11.11.12.13 -> 2.2.2.2 \
+ UDP: 6000 -> 6001 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name for-us-allow-from-excemption
- limit 1
- node ip4-input
- size 64-64
- data {
- UDP: 11.11.12.13 -> 10.10.10.10
- UDP: 6000 -> 6001
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name for-us-allow-from-excemption \
+ limit 1 \
+ node ip4-input \
+ size 64-64 \
+ data { \
+ UDP: 11.11.12.13 -> 10.10.10.10 \
+ UDP: 6000 -> 6001 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
-packet-generator new {
- name for-us-allow
- limit 1
- node ip4-input
- size 64-64
- data {
- UDP: 1.1.1.1 -> 10.10.10.10
- UDP: 3000 -> 3001
- length 128 checksum 0 incrementing 1
- }
+packet-generator new { \
+ name for-us-allow \
+ limit 1 \
+ node ip4-input \
+ size 64-64 \
+ data { \
+ UDP: 1.1.1.1 -> 10.10.10.10 \
+ UDP: 3000 -> 3001 \
+ length 128 checksum 0 incrementing 1 \
+ } \
}
tr add pg-input 100
diff --git a/src/scripts/vnet/vlan b/src/scripts/vnet/vlan
index eb3359ea499..130027b4b32 100644
--- a/src/scripts/vnet/vlan
+++ b/src/scripts/vnet/vlan
@@ -3,18 +3,18 @@ int create-sub fake-eth0 1
set int state fake-eth0 up
set int state fake-eth0.1 up
-packet-generator new {
- name x
- limit 1
- node ethernet-input
- interface fake-eth0
- size 64-64
- data {
- IP4: 1.2.3 -> 4.5.6 vlan 1
- ICMP: 1.2.3.4 -> 5.6.7.8
- ICMP echo_request
- incrementing 100
- }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node ethernet-input \
+ interface fake-eth0 \
+ size 64-64 \
+ data { \
+ IP4: 1.2.3 -> 4.5.6 vlan 1 \
+ ICMP: 1.2.3.4 -> 5.6.7.8 \
+ ICMP echo_request \
+ incrementing 100 \
+ } \
}
tr add pg-input 100
diff --git a/src/svm/fifo_segment.c b/src/svm/fifo_segment.c
index 6ab07974ff7..d5f62913082 100644
--- a/src/svm/fifo_segment.c
+++ b/src/svm/fifo_segment.c
@@ -105,13 +105,14 @@ fsh_n_active_fifos (fifo_segment_header_t * fsh)
}
static inline uword
-fsh_virtual_mem (fifo_segment_header_t * fsh)
+fs_virtual_mem (fifo_segment_t *fs)
{
+ fifo_segment_header_t *fsh = fs->h;
fifo_segment_slice_t *fss;
uword total_vm = 0;
int i;
- for (i = 0; i < fsh->n_slices; i++)
+ for (i = 0; i < fs->n_slices; i++)
{
fss = fsh_slice_get (fsh, i);
total_vm += clib_atomic_load_relax_n (&fss->virtual_mem);
@@ -294,7 +295,7 @@ fss_fl_chunk_bytes_sub (fifo_segment_slice_t * fss, uword size)
int
fifo_segment_init (fifo_segment_t * fs)
{
- u32 align = 8, offset = 2 * 4096, slices_sz, i;
+ u32 align = 8, offset = FIFO_SEGMENT_ALLOC_OVERHEAD, slices_sz, i;
uword max_fifo, seg_start, seg_sz;
fifo_segment_header_t *fsh;
ssvm_shared_header_t *sh;
@@ -311,7 +312,7 @@ fifo_segment_init (fifo_segment_t * fs)
seg_start = round_pow2_u64 (pointer_to_uword (seg_data), align);
fsh = uword_to_pointer (seg_start, void *);
- CLIB_MEM_UNPOISON (fsh, seg_sz);
+ clib_mem_unpoison (fsh, seg_sz);
memset (fsh, 0, sizeof (*fsh) + slices_sz);
fsh->byte_index = sizeof (*fsh) + slices_sz;
@@ -386,6 +387,8 @@ fifo_segment_attach (fifo_segment_main_t * sm, fifo_segment_create_args_t * a)
pool_get_zero (sm->segments, fs);
+ fs->fs_index = fs - sm->segments;
+ fs->sm_index = ~0;
fs->ssvm.ssvm_size = a->segment_size;
fs->ssvm.my_pid = getpid ();
fs->ssvm.name = format (0, "%s%c", a->segment_name, 0);
@@ -778,7 +781,7 @@ fsh_slice_collect_chunks (fifo_segment_header_t * fsh,
while (c)
{
- CLIB_MEM_UNPOISON (c, sizeof (*c));
+ clib_mem_unpoison (c, sizeof (*c));
next = fs_chunk_ptr (fsh, c->next);
fl_index = fs_freelist_for_size (c->length);
fss_chunk_free_list_push (fsh, fss, fl_index, c);
@@ -833,7 +836,7 @@ fifo_segment_cleanup (fifo_segment_t *fs)
vec_free (fs->slices);
- vec_foreach (fs->mqs, mq)
+ vec_foreach (mq, fs->mqs)
svm_msg_q_cleanup (mq);
vec_free (fs->mqs);
@@ -867,6 +870,9 @@ fifo_segment_alloc_fifo_w_slice (fifo_segment_t * fs, u32 slice_index,
svm_fifo_init (f, data_bytes);
+ f->segment_manager = fs->sm_index;
+ f->segment_index = fs->fs_index;
+
fss = fsh_slice_get (fsh, slice_index);
pfss = fs_slice_private_get (fs, slice_index);
@@ -1092,6 +1098,9 @@ fifo_segment_msg_q_alloc (fifo_segment_t *fs, u32 mq_index,
size = svm_msg_q_size_to_alloc (cfg);
base = fsh_alloc_aligned (fsh, size, 8);
+ if (!base)
+ return 0;
+
fsh->n_reserved_bytes += size;
smq = svm_msg_q_init (base, cfg);
@@ -1376,16 +1385,16 @@ fifo_segment_size (fifo_segment_t * fs)
return fs->h->max_byte_index - fs->h->n_reserved_bytes;
}
-u8
-fsh_has_reached_mem_limit (fifo_segment_header_t * fsh)
+static u8
+fs_has_reached_mem_limit (fifo_segment_t *fs)
{
- return (fsh->flags & FIFO_SEGMENT_F_MEM_LIMIT) ? 1 : 0;
+ return (fs->flags & FIFO_SEGMENT_F_MEM_LIMIT) ? 1 : 0;
}
-void
-fsh_reset_mem_limit (fifo_segment_header_t * fsh)
+static void
+fs_reset_mem_limit (fifo_segment_t *fs)
{
- fsh->flags &= ~FIFO_SEGMENT_F_MEM_LIMIT;
+ fs->flags &= ~FIFO_SEGMENT_F_MEM_LIMIT;
}
void *
@@ -1460,26 +1469,26 @@ fifo_segment_get_mem_usage (fifo_segment_t * fs)
}
fifo_segment_mem_status_t
-fifo_segment_determine_status (fifo_segment_header_t * fsh, u8 usage)
+fifo_segment_determine_status (fifo_segment_t *fs, u8 usage)
{
- if (!fsh->high_watermark || !fsh->low_watermark)
+ if (!fs->high_watermark || !fs->low_watermark)
return MEMORY_PRESSURE_NO_PRESSURE;
/* once the no-memory is detected, the status continues
* until memory usage gets below the high watermark
*/
- if (fsh_has_reached_mem_limit (fsh))
+ if (fs_has_reached_mem_limit (fs))
{
- if (usage >= fsh->high_watermark)
+ if (usage >= fs->high_watermark)
return MEMORY_PRESSURE_NO_MEMORY;
else
- fsh_reset_mem_limit (fsh);
+ fs_reset_mem_limit (fs);
}
- if (usage >= fsh->high_watermark)
+ if (usage >= fs->high_watermark)
return MEMORY_PRESSURE_HIGH_PRESSURE;
- else if (usage >= fsh->low_watermark)
+ else if (usage >= fs->low_watermark)
return MEMORY_PRESSURE_LOW_PRESSURE;
return MEMORY_PRESSURE_NO_PRESSURE;
@@ -1488,10 +1497,9 @@ fifo_segment_determine_status (fifo_segment_header_t * fsh, u8 usage)
fifo_segment_mem_status_t
fifo_segment_get_mem_status (fifo_segment_t * fs)
{
- fifo_segment_header_t *fsh = fs->h;
u8 usage = fifo_segment_get_mem_usage (fs);
- return fifo_segment_determine_status (fsh, usage);
+ return fifo_segment_determine_status (fs, usage);
}
u8 *
@@ -1536,22 +1544,15 @@ format_fifo_segment (u8 * s, va_list * args)
f64 usage;
fifo_segment_mem_status_t mem_st;
- indent = format_get_indent (s) + 2;
-
- if (fs == 0)
- {
- s = format (s, "%-20s%10s%15s%15s%15s%15s", "Name", "Type",
- "HeapSize (M)", "ActiveFifos", "FreeFifos", "Address");
- return s;
- }
+ indent = format_get_indent (s);
fifo_segment_info (fs, &address, &size);
active_fifos = fifo_segment_num_fifos (fs);
free_fifos = fifo_segment_num_free_fifos (fs);
- s = format (s, "%-20v%10U%15llu%15u%15u%15llx", ssvm_name (&fs->ssvm),
- format_fifo_segment_type, fs, size >> 20ULL, active_fifos,
- free_fifos, address);
+ s = format (s, "%U%v type: %U size: %U active fifos: %u", format_white_space,
+ 2, ssvm_name (&fs->ssvm), format_fifo_segment_type, fs,
+ format_memory_size, size, active_fifos);
if (!verbose)
return s;
@@ -1560,9 +1561,8 @@ format_fifo_segment (u8 * s, va_list * args)
free_chunks = fifo_segment_num_free_chunks (fs, ~0);
if (free_chunks)
- s =
- format (s, "\n\n%UFree/Allocated chunks by size:\n", format_white_space,
- indent + 2);
+ s = format (s, "\n\n%UFree/Allocated chunks by size:\n",
+ format_white_space, indent + 2);
else
s = format (s, "\n");
@@ -1598,7 +1598,7 @@ format_fifo_segment (u8 * s, va_list * args)
in_use = fifo_segment_size (fs) - est_free_seg_bytes - tracked_cached_bytes;
usage = (100.0 * in_use) / allocated;
mem_st = fifo_segment_get_mem_status (fs);
- virt = fsh_virtual_mem (fsh);
+ virt = fs_virtual_mem (fs);
reserved = fsh->n_reserved_bytes;
s = format (s, "\n%Useg free bytes: %U (%lu) estimated: %U (%lu) reserved:"
diff --git a/src/svm/fifo_segment.h b/src/svm/fifo_segment.h
index f76798fed90..ec184207269 100644
--- a/src/svm/fifo_segment.h
+++ b/src/svm/fifo_segment.h
@@ -20,6 +20,8 @@
#include <svm/message_queue.h>
#include <svm/svm_fifo.h>
+#define FIFO_SEGMENT_ALLOC_OVERHEAD (2 * clib_mem_get_page_size ())
+
typedef enum
{
FIFO_SEGMENT_FTYPE_NONE = -1,
@@ -69,10 +71,15 @@ typedef struct
{
ssvm_private_t ssvm; /**< ssvm segment data */
fifo_segment_header_t *h; /**< fifo segment data */
- uword max_byte_index;
- u8 n_slices; /**< number of fifo segment slices */
fifo_slice_private_t *slices; /**< private slice information */
svm_msg_q_t *mqs; /**< private vec of attached mqs */
+ uword max_byte_index; /**< max byte index for segment */
+ u32 sm_index; /**< owner segment manager index */
+ u32 fs_index; /**< fs index in sm pool */
+ u8 n_slices; /**< number of fifo segment slices */
+ u8 flags; /**< private fifo segment flags */
+ u8 high_watermark; /**< memory pressure watermark high */
+ u8 low_watermark; /**< memory pressure watermark low */
} fifo_segment_t;
typedef struct
@@ -91,7 +98,7 @@ typedef struct
u32 *new_segment_indices; /**< return vec of new seg indices */
} fifo_segment_create_args_t;
-#define fifo_segment_flags(_fs) _fs->h->flags
+#define fifo_segment_flags(_fs) _fs->flags
int fifo_segment_init (fifo_segment_t * fs);
int fifo_segment_create (fifo_segment_main_t * sm,
@@ -263,21 +270,6 @@ void fsh_collect_chunks (fifo_segment_header_t * fsh, u32 slice_index,
svm_fifo_chunk_t * c);
/**
- * Fifo segment has reached mem limit
- *
- * @param fsh fifo segment header
- * @return 1 (if reached) or 0 (otherwise)
- */
-u8 fsh_has_reached_mem_limit (fifo_segment_header_t * fsh);
-
-/**
- * Fifo segment reset mem limit flag
- *
- * @param fs fifo segment
- */
-void fsh_reset_mem_limit (fifo_segment_header_t * fsh);
-
-/**
* Fifo segment reset mem limit flag
*
* @param fs fifo segment
@@ -349,8 +341,6 @@ uword fifo_segment_chunk_offset (fifo_segment_t *fs, svm_fifo_chunk_t *c);
u32 fifo_segment_num_free_chunks (fifo_segment_t * fs, u32 size);
u8 fifo_segment_get_mem_usage (fifo_segment_t * fs);
-fifo_segment_mem_status_t fifo_segment_determine_status
- (fifo_segment_header_t * fsh, u8 usage);
fifo_segment_mem_status_t fifo_segment_get_mem_status (fifo_segment_t * fs);
void fifo_segment_main_init (fifo_segment_main_t * sm, u64 baseva,
diff --git a/src/svm/fifo_types.h b/src/svm/fifo_types.h
index aa8c3616317..742351b1764 100644
--- a/src/svm/fifo_types.h
+++ b/src/svm/fifo_types.h
@@ -78,6 +78,7 @@ typedef struct svm_fifo_shr_
u32 head; /**< fifo head position/byte */
volatile u32 want_deq_ntf; /**< producer wants nudge */
volatile u32 has_deq_ntf;
+ u32 deq_thresh; /**< fifo threshold used for notifications */
CLIB_CACHE_LINE_ALIGN_MARK (producer);
u32 tail; /**< fifo tail position/byte */
@@ -140,10 +141,7 @@ struct fifo_segment_header_
u32 n_active_fifos; /**< Number of active fifos */
u32 n_reserved_bytes; /**< Bytes not to be allocated */
u32 max_log2_fifo_size; /**< Max log2(chunk size) for fs */
- u8 flags; /**< Segment flags */
u8 n_slices; /**< Number of slices */
- u8 high_watermark; /**< Memory pressure watermark high */
- u8 low_watermark; /**< Memory pressure watermark low */
u8 pct_first_alloc; /**< Pct of fifo size to alloc */
u8 n_mqs; /**< Num mqs for mqs segment */
CLIB_CACHE_LINE_ALIGN_MARK (allocator);
diff --git a/src/svm/message_queue.c b/src/svm/message_queue.c
index a6af7962f73..ab0d230b1f0 100644
--- a/src/svm/message_queue.c
+++ b/src/svm/message_queue.c
@@ -243,8 +243,7 @@ svm_msg_q_lock_and_alloc_msg_w_ring (svm_msg_q_t * mq, u32 ring_index,
{
if (svm_msg_q_try_lock (mq))
return -1;
- if (PREDICT_FALSE (svm_msg_q_is_full (mq)
- || svm_msg_q_ring_is_full (mq, ring_index)))
+ if (PREDICT_FALSE (svm_msg_q_or_ring_is_full (mq, ring_index)))
{
svm_msg_q_unlock (mq);
return -2;
@@ -254,9 +253,8 @@ svm_msg_q_lock_and_alloc_msg_w_ring (svm_msg_q_t * mq, u32 ring_index,
else
{
svm_msg_q_lock (mq);
- while (svm_msg_q_is_full (mq)
- || svm_msg_q_ring_is_full (mq, ring_index))
- svm_msg_q_wait_prod (mq);
+ while (svm_msg_q_or_ring_is_full (mq, ring_index))
+ svm_msg_q_or_ring_wait_prod (mq, ring_index);
*msg = svm_msg_q_alloc_msg_w_ring (mq, ring_index);
}
return 0;
@@ -342,15 +340,15 @@ svm_msq_q_msg_is_valid (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
return (dist1 < dist2);
}
-static void
-svm_msg_q_add_raw (svm_msg_q_t *mq, u8 *elem)
+void
+svm_msg_q_add_raw (svm_msg_q_t *mq, svm_msg_q_msg_t *msg)
{
svm_msg_q_shared_queue_t *sq = mq->q.shr;
i8 *tailp;
u32 sz;
tailp = (i8 *) (&sq->data[0] + sq->elsize * sq->tail);
- clib_memcpy_fast (tailp, elem, sq->elsize);
+ clib_memcpy_fast (tailp, msg, sq->elsize);
sq->tail = (sq->tail + 1) % sq->maxsize;
@@ -383,7 +381,7 @@ svm_msg_q_add (svm_msg_q_t * mq, svm_msg_q_msg_t * msg, int nowait)
svm_msg_q_wait_prod (mq);
}
- svm_msg_q_add_raw (mq, (u8 *) msg);
+ svm_msg_q_add_raw (mq, msg);
svm_msg_q_unlock (mq);
@@ -394,7 +392,7 @@ void
svm_msg_q_add_and_unlock (svm_msg_q_t * mq, svm_msg_q_msg_t * msg)
{
ASSERT (svm_msq_q_msg_is_valid (mq, msg));
- svm_msg_q_add_raw (mq, (u8 *) msg);
+ svm_msg_q_add_raw (mq, msg);
svm_msg_q_unlock (mq);
}
@@ -569,6 +567,35 @@ svm_msg_q_wait_prod (svm_msg_q_t *mq)
}
int
+svm_msg_q_or_ring_wait_prod (svm_msg_q_t *mq, u32 ring_index)
+{
+ if (mq->q.evtfd == -1)
+ {
+ while (svm_msg_q_or_ring_is_full (mq, ring_index))
+ pthread_cond_wait (&mq->q.shr->condvar, &mq->q.shr->mutex);
+ }
+ else
+ {
+ u64 buf;
+ int rv;
+
+ while (svm_msg_q_or_ring_is_full (mq, ring_index))
+ {
+ while ((rv = read (mq->q.evtfd, &buf, sizeof (buf))) < 0)
+ {
+ if (errno != EAGAIN)
+ {
+ clib_unix_warning ("read error");
+ return rv;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+int
svm_msg_q_timedwait (svm_msg_q_t *mq, double timeout)
{
if (mq->q.evtfd == -1)
diff --git a/src/svm/message_queue.h b/src/svm/message_queue.h
index bd76eda5d88..4473c44f4e3 100644
--- a/src/svm/message_queue.h
+++ b/src/svm/message_queue.h
@@ -193,6 +193,17 @@ void svm_msg_q_free_msg (svm_msg_q_t * mq, svm_msg_q_msg_t * msg);
/**
* Producer enqueue one message to queue
*
+ * Must be called with mq locked. Prior to calling this, the producer should've
+ * obtained a message buffer from one of the rings.
+ *
+ * @param mq message queue
+ * @param msg message to be enqueued
+ */
+void svm_msg_q_add_raw (svm_msg_q_t *mq, svm_msg_q_msg_t *msg);
+
+/**
+ * Producer enqueue one message to queue
+ *
* Prior to calling this, the producer should've obtained a message buffer
* from one of the rings by calling @ref svm_msg_q_alloc_msg.
*
@@ -328,6 +339,12 @@ svm_msg_q_ring_is_full (svm_msg_q_t * mq, u32 ring_index)
return (clib_atomic_load_relax_n (&ring->shr->cursize) >= ring->nitems);
}
+static inline u8
+svm_msg_q_or_ring_is_full (svm_msg_q_t *mq, u32 ring_index)
+{
+ return (svm_msg_q_is_full (mq) || svm_msg_q_ring_is_full (mq, ring_index));
+}
+
/**
* Check if message queue is empty
*/
@@ -418,6 +435,14 @@ int svm_msg_q_wait (svm_msg_q_t *mq, svm_msg_q_wait_type_t type);
int svm_msg_q_wait_prod (svm_msg_q_t *mq);
/**
+ * Wait for message queue or ring event as producer
+ *
+ * Similar to @ref svm_msg_q_wait but lock (mutex or spinlock) must
+ * be held. Should only be called by producers.
+ */
+int svm_msg_q_or_ring_wait_prod (svm_msg_q_t *mq, u32 ring_index);
+
+/**
* Timed wait for message queue event
*
* Must be called with mutex held.
diff --git a/src/svm/queue.c b/src/svm/queue.c
index 864d97e3de4..78444d8ede4 100644
--- a/src/svm/queue.c
+++ b/src/svm/queue.c
@@ -323,14 +323,14 @@ svm_queue_add2 (svm_queue_t * q, u8 * elem, u8 * elem2, int nowait)
else
svm_queue_lock (q);
- if (PREDICT_FALSE (q->cursize + 1 == q->maxsize))
+ if (PREDICT_FALSE (q->cursize + 1 >= q->maxsize))
{
if (nowait)
{
svm_queue_unlock (q);
return (-2);
}
- while (q->cursize + 1 == q->maxsize)
+ while (q->cursize + 1 >= q->maxsize)
svm_queue_wait_inline (q);
}
diff --git a/src/svm/ssvm.c b/src/svm/ssvm.c
index f93f40d0526..bf0a1361e4a 100644
--- a/src/svm/ssvm.c
+++ b/src/svm/ssvm.c
@@ -95,7 +95,7 @@ ssvm_server_init_shm (ssvm_private_t * ssvm)
close (ssvm_fd);
- CLIB_MEM_UNPOISON (sh, sizeof (*sh));
+ clib_mem_unpoison (sh, sizeof (*sh));
sh->server_pid = ssvm->my_pid;
sh->ssvm_size = ssvm->ssvm_size;
sh->ssvm_va = pointer_to_uword (sh);
@@ -183,6 +183,7 @@ re_map_it:
return SSVM_API_ERROR_MMAP;
}
sh->client_pid = getpid ();
+ close (ssvm_fd);
return 0;
}
@@ -226,8 +227,12 @@ ssvm_server_init_memfd (ssvm_private_t * memfd)
ASSERT (vec_c_string_is_terminated (memfd->name));
- memfd->fd = clib_mem_vm_create_fd (CLIB_MEM_PAGE_SZ_DEFAULT,
- (char *) memfd->name);
+ if (memfd->huge_page)
+ memfd->fd = clib_mem_vm_create_fd (CLIB_MEM_PAGE_SZ_DEFAULT_HUGE,
+ (char *) memfd->name);
+ else
+ memfd->fd =
+ clib_mem_vm_create_fd (CLIB_MEM_PAGE_SZ_DEFAULT, (char *) memfd->name);
if (memfd->fd == CLIB_MEM_ERROR)
{
@@ -269,7 +274,7 @@ ssvm_server_init_memfd (ssvm_private_t * memfd)
sh->ssvm_va = pointer_to_uword (sh);
sh->type = SSVM_SEGMENT_MEMFD;
- page_size = 1ULL << log2_page_size;
+ page_size = clib_mem_get_page_size ();
sh->heap = clib_mem_create_heap (((u8 *) sh) + page_size,
memfd->ssvm_size - page_size,
1 /* locked */ , "ssvm server memfd");
diff --git a/src/svm/ssvm.h b/src/svm/ssvm.h
index 9bd16a9b462..ef982a1b304 100644
--- a/src/svm/ssvm.h
+++ b/src/svm/ssvm.h
@@ -87,7 +87,7 @@ typedef struct
u8 *name;
u8 numa; /**< UNUSED: numa requested at alloc time */
int is_server;
-
+ int huge_page;
union
{
int fd; /**< memfd segments */
diff --git a/src/svm/svm.c b/src/svm/svm.c
index b844e20b4cc..d32c0a5d4db 100644
--- a/src/svm/svm.c
+++ b/src/svm/svm.c
@@ -327,7 +327,7 @@ svm_data_region_create (svm_map_region_args_t * a, svm_region_t * rp)
return -3;
}
close (fd);
- CLIB_MEM_UNPOISON (rp->data_base, map_size);
+ clib_mem_unpoison (rp->data_base, map_size);
rp->backing_file = (char *) format (0, "%s%c", a->backing_file, 0);
rp->flags |= SVM_FLAGS_FILE;
}
@@ -412,7 +412,7 @@ svm_data_region_map (svm_map_region_args_t * a, svm_region_t * rp)
return -3;
}
close (fd);
- CLIB_MEM_UNPOISON (rp->data_base, map_size);
+ clib_mem_unpoison (rp->data_base, map_size);
}
return 0;
}
@@ -551,7 +551,6 @@ svm_map_region (svm_map_region_args_t * a)
int svm_fd;
svm_region_t *rp;
int deadman = 0;
- u8 junk = 0;
void *oldheap;
int rv;
int pid_holding_region_lock;
@@ -582,6 +581,15 @@ svm_map_region (svm_map_region_args_t * a)
vec_free (shm_name);
+#ifdef __FreeBSD__
+ if (ftruncate (svm_fd, a->size) < 0)
+ {
+ clib_warning ("ftruncate region size");
+ close (svm_fd);
+ return (0);
+ }
+#else
+ u8 junk = 0;
if (lseek (svm_fd, a->size, SEEK_SET) == (off_t) - 1)
{
clib_warning ("seek region size");
@@ -594,6 +602,7 @@ svm_map_region (svm_map_region_args_t * a)
close (svm_fd);
return (0);
}
+#endif /* __FreeBSD__ */
rp = mmap (uword_to_pointer (a->baseva, void *), a->size,
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0);
@@ -605,7 +614,7 @@ svm_map_region (svm_map_region_args_t * a)
return (0);
}
close (svm_fd);
- CLIB_MEM_UNPOISON (rp, a->size);
+ clib_mem_unpoison (rp, a->size);
svm_region_init_mapped_region (a, rp);
@@ -663,7 +672,7 @@ svm_map_region (svm_map_region_args_t * a)
return (0);
}
- CLIB_MEM_UNPOISON (rp, MMAP_PAGESIZE);
+ clib_mem_unpoison (rp, MMAP_PAGESIZE);
/*
* We lost the footrace to create this region; make sure
@@ -701,7 +710,7 @@ svm_map_region (svm_map_region_args_t * a)
close (svm_fd);
- CLIB_MEM_UNPOISON (rp, a->size);
+ clib_mem_unpoison (rp, a->size);
if ((uword) rp != rp->virtual_base)
{
@@ -1051,7 +1060,7 @@ svm_region_unmap_internal (void *rp_arg, u8 is_client)
oldheap = svm_push_pvt_heap (rp); /* nb vec_delete() in the loop */
/* Remove the caller from the list of mappers */
- CLIB_MEM_UNPOISON (rp->client_pids, vec_bytes (rp->client_pids));
+ clib_mem_unpoison (rp->client_pids, vec_bytes (rp->client_pids));
for (i = 0; i < vec_len (rp->client_pids); i++)
{
if (rp->client_pids[i] == mypid)
@@ -1184,7 +1193,7 @@ svm_region_exit_internal (u8 is_client)
virtual_base = root_rp->virtual_base;
virtual_size = root_rp->virtual_size;
- CLIB_MEM_UNPOISON (root_rp->client_pids, vec_bytes (root_rp->client_pids));
+ clib_mem_unpoison (root_rp->client_pids, vec_bytes (root_rp->client_pids));
for (i = 0; i < vec_len (root_rp->client_pids); i++)
{
if (root_rp->client_pids[i] == mypid)
@@ -1291,12 +1300,10 @@ svm_client_scan (const char *root_path)
* Snapshoot names, can't hold root rp mutex across
* find_or_create.
*/
- /* *INDENT-OFF* */
pool_foreach (subp, mp->subregions) {
name = vec_dup (subp->subregion_name);
vec_add1(svm_names, name);
}
- /* *INDENT-ON* */
pthread_mutex_unlock (&root_rp->mutex);
diff --git a/src/svm/svm.h b/src/svm/svm.h
index 8bf561e9a81..cdc9d90cab0 100644
--- a/src/svm/svm.h
+++ b/src/svm/svm.h
@@ -43,23 +43,6 @@ svm_mem_alloc (svm_region_t * rp, uword size)
return (rv);
}
-static inline void *
-svm_mem_alloc_aligned_at_offset (svm_region_t * rp,
- uword size, uword align, uword offset)
-{
- clib_mem_heap_t *oldheap;
- ASSERT (rp->flags & SVM_FLAGS_MHEAP);
- u8 *rv;
-
- pthread_mutex_lock (&rp->mutex);
- oldheap = clib_mem_set_heap (rp->data_heap);
- rv = clib_mem_alloc_aligned_at_offset (size, align, offset,
- 1 /* yes, call os_out_of_memory */ );
- clib_mem_set_heap (oldheap);
- pthread_mutex_unlock (&rp->mutex);
- return (rv);
-}
-
static inline void
svm_mem_free (svm_region_t * rp, void *ptr)
{
diff --git a/src/svm/svm_common.h b/src/svm/svm_common.h
index 1f1132afdc2..0e19ffd3f76 100644
--- a/src/svm/svm_common.h
+++ b/src/svm/svm_common.h
@@ -19,8 +19,14 @@
#define __included_svm_common_h__
#include <stdarg.h>
+#ifdef __FreeBSD__
+#include <stdint.h>
+#endif /* __FreeBSD__ */
#include <pthread.h>
+#ifdef __linux__
#include <sys/user.h>
+#endif /* __linux__ */
+#include <vppinfra/clib.h>
#include <vppinfra/types.h>
#define SVM_VERSION ((1<<16) | 1) /* set to declare region ready. */
diff --git a/src/svm/svm_fifo.c b/src/svm/svm_fifo.c
index 2150694ef46..49b3d1728f3 100644
--- a/src/svm/svm_fifo.c
+++ b/src/svm/svm_fifo.c
@@ -72,8 +72,8 @@ CLIB_MARCH_FN (svm_fifo_copy_from_chunk, void, svm_fifo_t *f,
c = f_cptr (f, c->next);
while ((to_copy -= n_chunk))
{
- CLIB_MEM_UNPOISON (c, sizeof (*c));
- CLIB_MEM_UNPOISON (c->data, c->length);
+ clib_mem_unpoison (c, sizeof (*c));
+ clib_mem_unpoison (c->data, c->length);
n_chunk = clib_min (c->length, to_copy);
clib_memcpy_fast (dst + (len - to_copy), &c->data[0], n_chunk);
c = c->length <= to_copy ? f_cptr (f, c->next) : c;
@@ -1010,25 +1010,26 @@ svm_fifo_enqueue_segments (svm_fifo_t * f, const svm_fifo_seg_t segs[],
}
else
{
- len = clib_min (free_count, len);
+ u32 n_left = clib_min (free_count, len);
- if (f_pos_gt (tail + len, f_chunk_end (f_end_cptr (f))))
+ if (f_pos_gt (tail + n_left, f_chunk_end (f_end_cptr (f))))
{
- if (PREDICT_FALSE (f_try_chunk_alloc (f, head, tail, len)))
+ if (PREDICT_FALSE (f_try_chunk_alloc (f, head, tail, n_left)))
{
- len = f_chunk_end (f_end_cptr (f)) - tail;
- if (!len)
+ n_left = f_chunk_end (f_end_cptr (f)) - tail;
+ if (!n_left)
return SVM_FIFO_EGROW;
}
}
+ len = n_left;
i = 0;
- while (len)
+ while (n_left)
{
- u32 to_copy = clib_min (segs[i].len, len);
+ u32 to_copy = clib_min (segs[i].len, n_left);
svm_fifo_copy_to_chunk (f, f_tail_cptr (f), tail, segs[i].data,
to_copy, &f->shr->tail_chunk);
- len -= to_copy;
+ n_left -= to_copy;
tail += to_copy;
i++;
}
@@ -1154,7 +1155,7 @@ svm_fifo_peek (svm_fifo_t * f, u32 offset, u32 len, u8 * dst)
len = clib_min (cursize - offset, len);
head_idx = head + offset;
- CLIB_MEM_UNPOISON (f->ooo_deq, sizeof (*f->ooo_deq));
+ clib_mem_unpoison (f->ooo_deq, sizeof (*f->ooo_deq));
if (!f->ooo_deq || !f_chunk_includes_pos (f->ooo_deq, head_idx))
f_update_ooo_deq (f, head_idx, head_idx + len);
@@ -1280,8 +1281,8 @@ svm_fifo_provision_chunks (svm_fifo_t *f, svm_fifo_seg_t *fs, u32 n_segs,
}
int
-svm_fifo_segments (svm_fifo_t * f, u32 offset, svm_fifo_seg_t * fs,
- u32 n_segs, u32 max_bytes)
+svm_fifo_segments (svm_fifo_t *f, u32 offset, svm_fifo_seg_t *fs, u32 *n_segs,
+ u32 max_bytes)
{
u32 cursize, to_read, head, tail, fs_index = 1;
u32 n_bytes, head_pos, len, start;
@@ -1314,7 +1315,7 @@ svm_fifo_segments (svm_fifo_t * f, u32 offset, svm_fifo_seg_t * fs,
fs[0].len = clib_min (c->length - head_pos, to_read);
n_bytes = fs[0].len;
- while (n_bytes < to_read && fs_index < n_segs)
+ while (n_bytes < to_read && fs_index < *n_segs)
{
c = f_cptr (f, c->next);
len = clib_min (c->length, to_read - n_bytes);
@@ -1323,6 +1324,7 @@ svm_fifo_segments (svm_fifo_t * f, u32 offset, svm_fifo_seg_t * fs,
n_bytes += len;
fs_index += 1;
}
+ *n_segs = fs_index;
return n_bytes;
}
diff --git a/src/svm/svm_fifo.h b/src/svm/svm_fifo.h
index 560628d2d07..7ea114f8702 100644
--- a/src/svm/svm_fifo.h
+++ b/src/svm/svm_fifo.h
@@ -34,7 +34,7 @@ typedef enum svm_fifo_deq_ntf_
SVM_FIFO_NO_DEQ_NOTIF = 0, /**< No notification requested */
SVM_FIFO_WANT_DEQ_NOTIF = 1, /**< Notify on dequeue */
SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL = 2, /**< Notify on transition from full */
- SVM_FIFO_WANT_DEQ_NOTIF_IF_EMPTY = 4, /**< Notify on transition to empty */
+ SVM_FIFO_WANT_DEQ_NOTIF_IF_EMPTY = 4, /**< Notify on transition to empty */
} svm_fifo_deq_ntf_t;
typedef enum svm_fifo_flag_
@@ -431,8 +431,8 @@ void svm_fifo_dequeue_drop_all (svm_fifo_t * f);
* @param max_bytes max bytes to be mapped to fifo segments
* @return number of bytes in fifo segments or SVM_FIFO_EEMPTY
*/
-int svm_fifo_segments (svm_fifo_t * f, u32 offset, svm_fifo_seg_t * fs,
- u32 n_segs, u32 max_bytes);
+int svm_fifo_segments (svm_fifo_t *f, u32 offset, svm_fifo_seg_t *fs,
+ u32 *n_segs, u32 max_bytes);
/**
* Add io events subscriber to list
*
@@ -640,63 +640,6 @@ u32 svm_fifo_max_read_chunk (svm_fifo_t * f);
u32 svm_fifo_max_write_chunk (svm_fifo_t * f);
/**
- * Fifo head chunk getter
- *
- * @param f fifo
- * @return head chunk pointer
- */
-static inline svm_fifo_chunk_t *
-svm_fifo_head_chunk (svm_fifo_t * f)
-{
- return f_head_cptr (f);
-}
-
-/**
- * Fifo head pointer getter
- *
- * @param f fifo
- * @return head pointer
- */
-static inline u8 *
-svm_fifo_head (svm_fifo_t * f)
-{
- svm_fifo_chunk_t *head_chunk;
- if (!f->shr->head_chunk)
- return 0;
- /* load-relaxed: consumer owned index */
- head_chunk = f_head_cptr (f);
- return (head_chunk->data + (f->shr->head - head_chunk->start_byte));
-}
-
-/**
- * Fifo tail chunk getter
- *
- * @param f fifo
- * @return tail chunk pointer
- */
-static inline svm_fifo_chunk_t *
-svm_fifo_tail_chunk (svm_fifo_t * f)
-{
- return f_tail_cptr (f);
-}
-
-/**
- * Fifo tail pointer getter
- *
- * @param f fifo
- * @return tail pointer
- */
-static inline u8 *
-svm_fifo_tail (svm_fifo_t * f)
-{
- svm_fifo_chunk_t *tail_chunk;
-
- /* load-relaxed: producer owned index */
- tail_chunk = f_tail_cptr (f);
- return (tail_chunk->data + (f->shr->tail - tail_chunk->start_byte));
-}
-
-/**
* Fifo number of subscribers getter
*
* @param f fifo
@@ -816,7 +759,7 @@ svm_fifo_unset_event (svm_fifo_t * f)
static inline void
svm_fifo_add_want_deq_ntf (svm_fifo_t * f, u8 ntf_type)
{
- f->shr->want_deq_ntf |= ntf_type;
+ __atomic_or_fetch (&f->shr->want_deq_ntf, ntf_type, __ATOMIC_RELEASE);
}
/**
@@ -830,7 +773,21 @@ svm_fifo_add_want_deq_ntf (svm_fifo_t * f, u8 ntf_type)
static inline void
svm_fifo_del_want_deq_ntf (svm_fifo_t * f, u8 ntf_type)
{
- f->shr->want_deq_ntf &= ~ntf_type;
+ __atomic_and_fetch (&f->shr->want_deq_ntf, ~ntf_type, __ATOMIC_RELEASE);
+}
+
+/**
+ * Get want notification flag
+ *
+ * Done atomically with acquire memory ordering
+ *
+ * @param f fifo
+ * @return value of want_deq_ntf flag
+ */
+static inline u32
+svm_fifo_get_want_deq_ntf (svm_fifo_t *f)
+{
+ return clib_atomic_load_acq_n (&f->shr->want_deq_ntf);
}
/**
@@ -847,10 +804,27 @@ svm_fifo_del_want_deq_ntf (svm_fifo_t * f, u8 ntf_type)
static inline void
svm_fifo_clear_deq_ntf (svm_fifo_t * f)
{
- /* Set the flag if want_notif_if_full was the only ntf requested */
- f->shr->has_deq_ntf =
- f->shr->want_deq_ntf == SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL;
- svm_fifo_del_want_deq_ntf (f, SVM_FIFO_WANT_DEQ_NOTIF);
+ u32 want_deq_ntf = svm_fifo_get_want_deq_ntf (f);
+ /* Set the flag if want ntf if full or empty was requested */
+ if (want_deq_ntf &
+ (SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL | SVM_FIFO_WANT_DEQ_NOTIF_IF_EMPTY))
+ clib_atomic_store_rel_n (&f->shr->has_deq_ntf, 1);
+ if (want_deq_ntf & SVM_FIFO_WANT_DEQ_NOTIF)
+ svm_fifo_del_want_deq_ntf (f, SVM_FIFO_WANT_DEQ_NOTIF);
+}
+
+/**
+ * Get has dequeue notification flag
+ *
+ * Done atomically with acquire memory ordering
+ *
+ * @param f fifo
+ * @return has_deq_ntf flag
+ */
+static inline u32
+svm_fifo_has_deq_ntf (svm_fifo_t *f)
+{
+ return clib_atomic_load_acq_n (&f->shr->has_deq_ntf);
}
/**
@@ -881,28 +855,40 @@ svm_fifo_reset_has_deq_ntf (svm_fifo_t * f)
static inline u8
svm_fifo_needs_deq_ntf (svm_fifo_t * f, u32 n_last_deq)
{
- u8 want_ntf = f->shr->want_deq_ntf;
+ u32 want_ntf = svm_fifo_get_want_deq_ntf (f);
- if (PREDICT_TRUE (want_ntf == SVM_FIFO_NO_DEQ_NOTIF))
+ if (want_ntf == SVM_FIFO_NO_DEQ_NOTIF)
return 0;
else if (want_ntf & SVM_FIFO_WANT_DEQ_NOTIF)
- return 1;
+ return (svm_fifo_max_enqueue (f) >= f->shr->deq_thresh);
if (want_ntf & SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL)
{
u32 max_deq = svm_fifo_max_dequeue_cons (f);
u32 size = f->shr->size;
- if (!f->shr->has_deq_ntf && max_deq < size &&
- max_deq + n_last_deq >= size)
+ if (max_deq < size && max_deq + n_last_deq >= size &&
+ !svm_fifo_has_deq_ntf (f))
return 1;
}
if (want_ntf & SVM_FIFO_WANT_DEQ_NOTIF_IF_EMPTY)
{
- if (!f->shr->has_deq_ntf && svm_fifo_is_empty (f))
+ if (!svm_fifo_has_deq_ntf (f) && svm_fifo_is_empty (f))
return 1;
}
return 0;
}
+/**
+ * Set the fifo dequeue threshold which will be used for notifications.
+ *
+ * Note: If not set, by default threshold is zero, equivalent to
+ * generating notification on each dequeue event.
+ */
+static inline void
+svm_fifo_set_deq_thresh (svm_fifo_t *f, u32 thresh)
+{
+ f->shr->deq_thresh = thresh;
+}
+
#endif /* __included_ssvm_fifo_h__ */
/*
diff --git a/src/svm/svmdb.c b/src/svm/svmdb.c
index 2c3d351f0c7..3c69dbf45ba 100644
--- a/src/svm/svmdb.c
+++ b/src/svm/svmdb.c
@@ -281,7 +281,7 @@ local_unset_variable_nolock (svmdb_client_t * client,
if (vec_len (oldvalue->notifications))
notify_value (oldvalue, SVMDB_ACTION_UNSET);
/* zero length value means unset */
- _vec_len (oldvalue->value) = 0;
+ vec_set_len (oldvalue->value, 0);
}
client->shm->namespaces[namespace] = h;
}
@@ -317,7 +317,7 @@ local_set_variable_nolock (svmdb_client_t * client,
oldvalue = pool_elt_at_index (client->shm->values, hp->value[0]);
vec_alloc (oldvalue->value, vec_len (val) * elsize);
clib_memcpy (oldvalue->value, val, vec_len (val) * elsize);
- _vec_len (oldvalue->value) = vec_len (val);
+ vec_set_len (oldvalue->value, vec_len (val));
notify_value (oldvalue, SVMDB_ACTION_SET);
}
else
@@ -328,7 +328,7 @@ local_set_variable_nolock (svmdb_client_t * client,
newvalue->elsize = elsize;
vec_alloc (newvalue->value, vec_len (val) * elsize);
clib_memcpy (newvalue->value, val, vec_len (val) * elsize);
- _vec_len (newvalue->value) = vec_len (val);
+ vec_set_len (newvalue->value, vec_len (val));
name = format (0, "%s%c", var, 0);
hash_set_mem (h, name, newvalue - shm->values);
}
@@ -414,7 +414,6 @@ svmdb_local_dump_strings (svmdb_client_t * client)
h = client->shm->namespaces[SVMDB_NAMESPACE_STRING];
- /* *INDENT-OFF* */
hash_foreach_mem(key, value, h,
({
svmdb_value_t *v = pool_elt_at_index (shm->values, value);
@@ -422,7 +421,6 @@ svmdb_local_dump_strings (svmdb_client_t * client)
fformat(stdout, "%s: %s\n", key,
vec_len(v->value) ? v->value : (u8 *)"(nil)");
}));
- /* *INDENT-ON* */
region_unlock (client->db_rp);
}
@@ -433,7 +431,7 @@ svmdb_local_serialize_strings (svmdb_client_t * client, char *filename)
u8 *key;
u32 value;
svmdb_shm_hdr_t *shm = client->shm;
- serialize_main_t _sm, *sm = &_sm;
+ serialize_main_t _sm = { 0 }, *sm = &_sm;
clib_error_t *error = 0;
u8 *sanitized_name = 0;
int fd = 0;
@@ -463,7 +461,6 @@ svmdb_local_serialize_strings (svmdb_client_t * client, char *filename)
serialize_likely_small_unsigned_integer (sm, hash_elts (h));
- /* *INDENT-OFF* */
hash_foreach_mem(key, value, h,
({
svmdb_value_t *v = pool_elt_at_index (shm->values, value);
@@ -475,7 +472,6 @@ svmdb_local_serialize_strings (svmdb_client_t * client, char *filename)
serialize_cstring (sm, (char *)v->value);
}
}));
- /* *INDENT-ON* */
region_unlock (client->db_rp);
serialize_close (sm);
@@ -495,7 +491,7 @@ out:
int
svmdb_local_unserialize_strings (svmdb_client_t * client, char *filename)
{
- serialize_main_t _sm, *sm = &_sm;
+ serialize_main_t _sm = { 0 }, *sm = &_sm;
void *oldheap;
clib_error_t *error = 0;
u8 *key, *value;
@@ -589,7 +585,7 @@ svmdb_local_get_vec_variable (svmdb_client_t * client, char *var, u32 elsize)
/* Make a copy in process-local memory */
vec_alloc (copy, vec_len (rv) * elsize);
clib_memcpy (copy, rv, vec_len (rv) * elsize);
- _vec_len (copy) = vec_len (rv);
+ vec_set_len (copy, vec_len (rv));
region_unlock (client->db_rp);
return (copy);
}
@@ -610,7 +606,6 @@ svmdb_local_dump_vecs (svmdb_client_t * client)
h = client->shm->namespaces[SVMDB_NAMESPACE_VEC];
- /* *INDENT-OFF* */
hash_foreach_mem(key, value, h,
({
svmdb_value_t *v = pool_elt_at_index (shm->values, value);
@@ -618,7 +613,6 @@ svmdb_local_dump_vecs (svmdb_client_t * client)
format_hex_bytes, v->value,
vec_len(v->value)*v->elsize, ((f64 *)(v->value))[0]);
}));
- /* *INDENT-ON* */
region_unlock (client->db_rp);
}
@@ -653,7 +647,7 @@ svmdb_local_find_or_add_vec_variable (svmdb_client_t * client,
clib_memset (newvalue, 0, sizeof (*newvalue));
newvalue->elsize = 1;
vec_alloc (newvalue->value, nbytes);
- _vec_len (newvalue->value) = nbytes;
+ vec_set_len (newvalue->value, nbytes);
name = format (0, "%s%c", var, 0);
hash_set_mem (h, name, newvalue - shm->values);
shm->namespaces[SVMDB_NAMESPACE_VEC] = h;
diff --git a/src/svm/svmdbtool.c b/src/svm/svmdbtool.c
index feb7eed07ef..b60b86db29d 100644
--- a/src/svm/svmdbtool.c
+++ b/src/svm/svmdbtool.c
@@ -248,11 +248,16 @@ static void
sigaction_handler (int signum, siginfo_t * i, void *notused)
{
u32 action, opaque;
+#ifdef __linux__
action = (u32) (uword) i->si_ptr;
action >>= 28;
opaque = (u32) (uword) i->si_ptr;
opaque &= ~(0xF0000000);
+#elif __FreeBSD__
+ action = i->si_code;
+ opaque = 0;
+#endif /* __linux__ */
clib_warning ("signal %d, action %d, opaque %x", signum, action, opaque);
}
diff --git a/src/svm/svmtool.c b/src/svm/svmtool.c
index 60859674298..521ddab7eb3 100644
--- a/src/svm/svmtool.c
+++ b/src/svm/svmtool.c
@@ -72,12 +72,10 @@ format_all_svm_regions (u8 * s, va_list * args)
* Snapshoot names, can't hold root rp mutex across
* find_or_create.
*/
- /* *INDENT-OFF* */
pool_foreach (subp, mp->subregions) {
name = vec_dup (subp->subregion_name);
vec_add1(svm_names, name);
}
- /* *INDENT-ON* */
pthread_mutex_unlock (&root_rp->mutex);
@@ -328,12 +326,10 @@ subregion_repair (char *chroot_path)
* Snapshoot names, can't hold root rp mutex across
* find_or_create.
*/
- /* *INDENT-OFF* */
pool_foreach (subp, mp->subregions) {
name = vec_dup (subp->subregion_name);
vec_add1(svm_names, name);
}
- /* *INDENT-ON* */
pthread_mutex_unlock (&root_rp->mutex);
diff --git a/src/tools/appimage/CMakeLists.txt b/src/tools/appimage/CMakeLists.txt
index 1b83656dbf8..26ef77d1c91 100644
--- a/src/tools/appimage/CMakeLists.txt
+++ b/src/tools/appimage/CMakeLists.txt
@@ -18,7 +18,7 @@ if(VPP_BUILD_APPIMAGE)
WORLD_READ WORLD_EXECUTE)
install(FILES vpp.desktop DESTINATION .)
install(FILES vpp.png DESTINATION .)
- install(FILES vpp.svg DESTINATION share/icons/hicolor/scalable/vpp.svg)
+ install(FILES vpp.svg DESTINATION ${CMAKE_INSTALL_DATADIR}/icons/hicolor/scalable/vpp.svg)
install(CODE "EXECUTE_PROCESS(COMMAND ln -s . ./usr
WORKING_DIRECTORY ${CMAKE_INSTALL_PREFIX})")
install(CODE "EXECUTE_PROCESS(
diff --git a/src/tools/g2/clib.c b/src/tools/g2/clib.c
index 3cfc2637673..bb1f2026a43 100644
--- a/src/tools/g2/clib.c
+++ b/src/tools/g2/clib.c
@@ -21,7 +21,6 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include <sys/fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <ctype.h>
diff --git a/src/tools/g2/cpel.c b/src/tools/g2/cpel.c
index 8bcc91e674e..0d1873431b7 100644
--- a/src/tools/g2/cpel.c
+++ b/src/tools/g2/cpel.c
@@ -21,7 +21,6 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include <sys/fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <ctype.h>
diff --git a/src/tools/g2/events.c b/src/tools/g2/events.c
index 09054b71324..ef85c208b3c 100644
--- a/src/tools/g2/events.c
+++ b/src/tools/g2/events.c
@@ -17,7 +17,6 @@
#include <stdlib.h>
#include <unistd.h>
#include <sys/stat.h>
-#include <sys/fcntl.h>
#include <sys/mman.h>
#include <arpa/inet.h>
#include <stdio.h>
diff --git a/src/tools/g2/pointsel.c b/src/tools/g2/pointsel.c
index 59822377219..fae93365e3d 100644
--- a/src/tools/g2/pointsel.c
+++ b/src/tools/g2/pointsel.c
@@ -169,7 +169,7 @@ static void down_button(void)
static void button_click_callback(GtkButton *item, gpointer data)
{
int i;
- enum button_click click = (enum button_click)data;
+ enum button_click click = (enum button_click) (long int) data;
switch (click) {
case ALL_BUTTON:
diff --git a/src/tools/g2/view1.c b/src/tools/g2/view1.c
index 3902c0a2dc1..7a6ae714e3f 100644
--- a/src/tools/g2/view1.c
+++ b/src/tools/g2/view1.c
@@ -2329,21 +2329,22 @@ out:
static void view1_button_click_callback(GtkButton *item, gpointer data)
{
- enum view1_button_click click = (enum view1_button_click) data;
- event_t *ep;
- ulonglong event_incdec;
- ulonglong current_width;
- ulonglong zoom_delta;
+ enum view1_button_click click = (enum view1_button_click) (long int) data;
+ event_t *ep;
+ ulonglong event_incdec;
+ ulonglong current_width;
+ ulonglong zoom_delta;
- current_width = s_v1->maxvistime - s_v1->minvistime;
- event_incdec = (current_width) / 3;
+ current_width = s_v1->maxvistime - s_v1->minvistime;
+ event_incdec = (current_width) / 3;
- if (event_incdec == 0LL)
- event_incdec = 1;
+ if (event_incdec == 0LL)
+ event_incdec = 1;
- zoom_delta = (s_v1->maxvistime - s_v1->minvistime) / 6;
+ zoom_delta = (s_v1->maxvistime - s_v1->minvistime) / 6;
- switch(click) {
+ switch (click)
+ {
case TOP_BUTTON:
/* First PID to top of window */
s_v1->first_pid_index = 0;
diff --git a/src/tools/perftool/c2cpel.c b/src/tools/perftool/c2cpel.c
index 72049054ae1..b02c506bd59 100644
--- a/src/tools/perftool/c2cpel.c
+++ b/src/tools/perftool/c2cpel.c
@@ -20,7 +20,6 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include <sys/fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <ctype.h>
diff --git a/src/tools/perftool/cpel_util.c b/src/tools/perftool/cpel_util.c
index 4f5f98a51cc..9667f080919 100644
--- a/src/tools/perftool/cpel_util.c
+++ b/src/tools/perftool/cpel_util.c
@@ -20,7 +20,6 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include <sys/fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <ctype.h>
@@ -424,7 +423,7 @@ void alpha_sort_tracks(void)
alpha_compare_tracks);
vec_validate(track_alpha_map, vec_len(the_tracks));
- _vec_len(track_alpha_map) = vec_len(the_tracks);
+ vec_set_len (track_alpha_map, vec_len (the_tracks));
for (i = 0; i < vec_len(the_tracks); i++) {
this_track = &the_tracks[i];
diff --git a/src/tools/perftool/cpelatency.c b/src/tools/perftool/cpelatency.c
index 7b87d606cda..6a3d4f79b8a 100644
--- a/src/tools/perftool/cpelatency.c
+++ b/src/tools/perftool/cpelatency.c
@@ -21,7 +21,6 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include <sys/fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <ctype.h>
diff --git a/src/tools/perftool/cpeldump.c b/src/tools/perftool/cpeldump.c
index be0a70df24e..1ccfd6a91df 100644
--- a/src/tools/perftool/cpeldump.c
+++ b/src/tools/perftool/cpeldump.c
@@ -21,7 +21,6 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include <sys/fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <ctype.h>
diff --git a/src/tools/perftool/cpelinreg.c b/src/tools/perftool/cpelinreg.c
index 115afad7fb2..007e727d1bf 100644
--- a/src/tools/perftool/cpelinreg.c
+++ b/src/tools/perftool/cpelinreg.c
@@ -27,7 +27,6 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include <sys/fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <ctype.h>
@@ -617,12 +616,12 @@ int event_pass (cpel_section_header_t *sh, int verbose, FILE *ofp)
} else {
dup_events++;
}
- _vec_len(tp->start_datum) = sp;
- _vec_len(tp->start_time) = sp;
- _vec_len(tp->dup_event) = sp;
- }
+ vec_set_len (tp->start_datum, sp);
+ vec_set_len (tp->start_time, sp);
+ vec_set_len (tp->dup_event, sp);
+ }
- ep++;
+ ep++;
}
last_end_time = now;
diff --git a/src/tools/perftool/cpelstate.c b/src/tools/perftool/cpelstate.c
index 3fd9ccb9c79..78d9c9752fd 100644
--- a/src/tools/perftool/cpelstate.c
+++ b/src/tools/perftool/cpelstate.c
@@ -21,7 +21,6 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include <sys/fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <ctype.h>
diff --git a/src/tools/perftool/delsvec.c b/src/tools/perftool/delsvec.c
index 724935d331e..d49ba98b94d 100644
--- a/src/tools/perftool/delsvec.c
+++ b/src/tools/perftool/delsvec.c
@@ -1,4 +1,4 @@
-/*
+/*
*------------------------------------------------------------------
* Copyright (c) 2006-2016 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -24,13 +24,13 @@
/*
* #define UNIT_TESTS 1
- * #define MATCH_TRACE 1
+ * #define MATCH_TRACE 1
*/
/*
* delsvec
* break up an input string into a vector of [null-terminated] u8 *'s
- *
+ *
* Each supplied delimiter character results in a string in the output
* vector, unless the delimiters occur back-to-back. When matched,
* a whitespace character in the delimiter consumes an arbitrary
@@ -46,270 +46,316 @@
static u8 **string_cache;
static u8 **svec_cache;
-void delsvec_recycle_this_string (u8 *s)
+void
+delsvec_recycle_this_string (u8 *s)
{
- if (s) {
- _vec_len (s) = 0;
- vec_add1(string_cache, s);
+ if (s)
+ {
+ vec_set_len (s, 0);
+ vec_add1 (string_cache, s);
}
}
-void delsvec_recycle_this_svec (u8 **svec)
+void
+delsvec_recycle_this_svec (u8 **svec)
{
- if (svec) {
- if (svec_cache) {
- vec_free (svec_cache);
- }
- _vec_len (svec) = 0;
- svec_cache = svec;
+ if (svec)
+ {
+ if (svec_cache)
+ {
+ vec_free (svec_cache);
+ }
+ vec_set_len (svec, 0);
+ svec_cache = svec;
}
}
-int pvl (char *a)
+int
+pvl (char *a)
{
- return vec_len(a);
+ return vec_len (a);
}
-u8 **delsvec(void *input_arg, char *fmt)
+u8 **
+delsvec (void *input_arg, char *fmt)
{
- u8 **rv = 0;
- int input_index=0;
- u8 *this;
- int dirflag=0;
- int i;
- u8 *input = input_arg;
+ u8 **rv = 0;
+ int input_index = 0;
+ u8 *this;
+ int dirflag = 0;
+ int i;
+ u8 *input = input_arg;
- if (svec_cache) {
- rv = svec_cache;
- svec_cache = 0;
+ if (svec_cache)
+ {
+ rv = svec_cache;
+ svec_cache = 0;
}
- while (fmt) {
- dirflag=0;
- if (vec_len (string_cache) > 0) {
- this = string_cache [vec_len(string_cache)-1];
- _vec_len (string_cache) = vec_len (string_cache) - 1;
- } else
- this = 0;
- /*
- * '*' means one of two things: match the rest of the input,
- * or match as many characters as possible
- */
- if (fmt[0] == '*') {
- fmt++;
- dirflag=1;
- /*
- * no more format: eat rest of string...
- */
- if (!fmt[0]) {
- for (;input[input_index]; input_index++)
- vec_add1(this, input[input_index]);
- if (vec_len(this)) {
- vec_add1(this, 0);
+ while (fmt)
+ {
+ dirflag = 0;
+ if (vec_len (string_cache) > 0)
+ {
+ this = string_cache[vec_len (string_cache) - 1];
+ vec_set_len (string_cache, vec_len (string_cache) - 1);
+ }
+ else
+ this = 0;
+ /*
+ * '*' means one of two things: match the rest of the input,
+ * or match as many characters as possible
+ */
+ if (fmt[0] == '*')
+ {
+ fmt++;
+ dirflag = 1;
+ /*
+ * no more format: eat rest of string...
+ */
+ if (!fmt[0])
+ {
+ for (; input[input_index]; input_index++)
+ vec_add1 (this, input[input_index]);
+ if (vec_len (this))
+ {
+ vec_add1 (this, 0);
#ifdef MATCH_TRACE
- printf("final star-match adds: '%s'\n", this);
+ printf ("final star-match adds: '%s'\n", this);
#endif
- vec_add1(rv, this);
- } else {
- vec_add1(string_cache, this);
- }
+ vec_add1 (rv, this);
+ }
+ else
+ {
+ vec_add1 (string_cache, this);
+ }
- return(rv);
- }
- }
- /*
- * Left-to-right scan, adding chars until next delimiter char
- * appears.
- */
- if (!dirflag) {
- while (input[input_index]) {
- if (input[input_index] == fmt[0]) {
- /* If we just (exact) matched a whitespace delimiter */
- if (fmt[0] == ' '){
- /* scan forward eating whitespace */
- while (input[input_index] == ' ' ||
- input[input_index] == '\t' ||
- input[input_index] == '\n')
- input_index++;
- input_index--;
- }
- goto found;
- }
- /* If we're looking for whitespace */
- if (fmt[0] == ' ') {
- /* and we have whitespace */
- if (input[input_index] == ' ' ||
- input[input_index] == '\t' ||
- input[input_index] == '\n') {
- /* scan forward eating whitespace */
- while (input[input_index] == ' ' ||
- input[input_index] == '\t' ||
- input[input_index] == '\n') {
- input_index++;
- }
- input_index--;
- goto found;
- }
- }
- /* Not a delimiter, save it */
- vec_add1(this, input[input_index]);
- input_index++;
- }
- /*
- * Fell off the wagon, clean up and bail out
- */
- bail:
+ return (rv);
+ }
+ }
+ /*
+ * Left-to-right scan, adding chars until next delimiter char
+ * appears.
+ */
+ if (!dirflag)
+ {
+ while (input[input_index])
+ {
+ if (input[input_index] == fmt[0])
+ {
+ /* If we just (exact) matched a whitespace delimiter */
+ if (fmt[0] == ' ')
+ {
+ /* scan forward eating whitespace */
+ while (input[input_index] == ' ' ||
+ input[input_index] == '\t' ||
+ input[input_index] == '\n')
+ input_index++;
+ input_index--;
+ }
+ goto found;
+ }
+ /* If we're looking for whitespace */
+ if (fmt[0] == ' ')
+ {
+ /* and we have whitespace */
+ if (input[input_index] == ' ' ||
+ input[input_index] == '\t' || input[input_index] == '\n')
+ {
+ /* scan forward eating whitespace */
+ while (input[input_index] == ' ' ||
+ input[input_index] == '\t' ||
+ input[input_index] == '\n')
+ {
+ input_index++;
+ }
+ input_index--;
+ goto found;
+ }
+ }
+ /* Not a delimiter, save it */
+ vec_add1 (this, input[input_index]);
+ input_index++;
+ }
+ /*
+ * Fell off the wagon, clean up and bail out
+ */
+ bail:
#ifdef MATCH_TRACE
- printf("failed, fmt[0] = '%c', input[%d]='%s'\n",
- fmt[0], input_index, &input[input_index]);
+ printf ("failed, fmt[0] = '%c', input[%d]='%s'\n", fmt[0],
+ input_index, &input[input_index]);
#endif
- delsvec_recycle_this_string(this);
- for (i = 0; i < vec_len(rv); i++)
- delsvec_recycle_this_string(rv[i]);
- delsvec_recycle_this_svec(rv);
- return(0);
-
- found:
- /*
- * Delimiter matched
- */
- input_index++;
- fmt++;
- /*
- * If we actually accumulated non-delimiter characters,
- * add them to the result vector
- */
- if (vec_len(this)) {
- vec_add1(this, 0);
+ delsvec_recycle_this_string (this);
+ for (i = 0; i < vec_len (rv); i++)
+ delsvec_recycle_this_string (rv[i]);
+ delsvec_recycle_this_svec (rv);
+ return (0);
+
+ found:
+ /*
+ * Delimiter matched
+ */
+ input_index++;
+ fmt++;
+ /*
+ * If we actually accumulated non-delimiter characters,
+ * add them to the result vector
+ */
+ if (vec_len (this))
+ {
+ vec_add1 (this, 0);
#ifdef MATCH_TRACE
- printf("match: add '%s'\n", this);
+ printf ("match: add '%s'\n", this);
#endif
- vec_add1(rv, this);
- } else {
- vec_add1(string_cache, this);
- }
- } else {
- /*
- * right-to-left scan, '*' not at
- * the end of the delimiter string
- */
- i = input_index;
- while (input[++i])
- ; /* scan forward */
- i--;
- while (i > input_index) {
- if (input[i] == fmt[0])
- goto found2;
-
- if (fmt[0] == ' ' || fmt[0] == '\t' ||
- fmt[0] == '\n') {
- if (input[i] == ' ' ||
- input[i] == '\t' ||
- input[i] == '\n')
- goto found2;
- }
- i--;
- }
- goto bail;
+ vec_add1 (rv, this);
+ }
+ else
+ {
+ vec_add1 (string_cache, this);
+ }
+ }
+ else
+ {
+ /*
+ * right-to-left scan, '*' not at
+ * the end of the delimiter string
+ */
+ i = input_index;
+ while (input[++i])
+ ; /* scan forward */
+ i--;
+ while (i > input_index)
+ {
+ if (input[i] == fmt[0])
+ goto found2;
- found2:
- for (; input_index < i; input_index++) {
- vec_add1(this, input[input_index]);
- }
- input_index++;
- fmt++;
- vec_add1(this, 0);
+ if (fmt[0] == ' ' || fmt[0] == '\t' || fmt[0] == '\n')
+ {
+ if (input[i] == ' ' || input[i] == '\t' || input[i] == '\n')
+ goto found2;
+ }
+ i--;
+ }
+ goto bail;
+
+ found2:
+ for (; input_index < i; input_index++)
+ {
+ vec_add1 (this, input[input_index]);
+ }
+ input_index++;
+ fmt++;
+ vec_add1 (this, 0);
#ifdef MATCH_TRACE
- printf("inner '*' match: add '%s'\n", this);
+ printf ("inner '*' match: add '%s'\n", this);
#endif
- vec_add1(rv, this);
- }
+ vec_add1 (rv, this);
+ }
}
- return (rv);
+ return (rv);
}
#ifdef UNIT_TESTS
-typedef struct utest_ {
- char *string;
- char *fmt;
+typedef struct utest_
+{
+ char *string;
+ char *fmt;
} utest_t;
utest_t tests[] = {
#ifdef NOTDEF
- {"Dec 7 08:56",
- " :*"},
- {"Dec 17 08:56",
- " :*"},
- {"Dec 7 08:56:41.239 install/inst_repl 0/9/CPU0 t1 [40989] File List:Successfully blobbified file list. Took 1 milliseconds",
- " ::. / // [] *"},
- {"RP/0/9/CPU0:Dec 7 08:55:28.550 : sam_server[291]: SAM backs up digest list to memory file",
- "///: ::. : []: *"},
- /* Expected to fail */
- {"Dec 7 08:56:41.239 install/inst_repl 0/9/CPU0 t1 [40989] File List:Successfully blobbified file list. Took 1 milliseconds",
- "///: ::. : : *"},
- /* Expected to fail */
- {"RP/0/9/CPU0:Dec 7 08:55:28.550 : sam_server[291]: SAM backs up digest list to memory file",
- " ::. / // [] *"},
- {"THIS that and + theother", "*+ *"},
- {"Dec 12 15:33:07.103 ifmgr/errors 0/RP0/CPU0 3# t2 Failed to open IM connection: No such file or directory", " ::. / // *"},
- {"Dec 16 21:43:47.328 ifmgr/bulk 0/3/CPU0 t8 Bulk DPC async download complete. Partitions 1, node_count 1, total_out 0, out_offset 0, out_expected 0: No error"," ::. / // *"},
- {"t:0x53034bd6 CPU:00 PROCESS :PROCCREATE_NAME",
- ": : :*"},
- {" pid:1", " *"},
- {"t:0x53034cbb CPU:00 THREAD :THCREATE pid:1 tid:1",
- ": : : pid: tid:*"},
- {"t:0x5303f950 CPU:00 COMM :REC_PULSE scoid:0x40000003 pid:364659",
- ": : : *"},
- {"/hfr-base-3.3.85/lib/libttyconnection.dll 0xfc000000 0x0000306c 0xfc027000 0x000001c8 1",
- " *"},
- {"Feb 28 02:38:26.123 seqtrace 0/1/CPU0 t8 :msg_receive:ifmgr/t8:IMC_MSG_MTU_UPDATE:ppp_ma/t1",
- " ::. // ::::*"},
+ { "Dec 7 08:56", " :*" },
+ { "Dec 17 08:56", " :*" },
+ { "Dec 7 08:56:41.239 install/inst_repl 0/9/CPU0 t1 [40989] File "
+ "List:Successfully blobbified file list. Took 1 milliseconds",
+ " ::. / // [] *" },
+ { "RP/0/9/CPU0:Dec 7 08:55:28.550 : sam_server[291]: SAM backs up digest "
+ "list to memory file",
+ "///: ::. : []: *" },
+ /* Expected to fail */
+ { "Dec 7 08:56:41.239 install/inst_repl 0/9/CPU0 t1 [40989] File "
+ "List:Successfully blobbified file list. Took 1 milliseconds",
+ "///: ::. : : *" },
+ /* Expected to fail */
+ { "RP/0/9/CPU0:Dec 7 08:55:28.550 : sam_server[291]: SAM backs up digest "
+ "list to memory file",
+ " ::. / // [] *" },
+ { "THIS that and + theother", "*+ *" },
+ { "Dec 12 15:33:07.103 ifmgr/errors 0/RP0/CPU0 3# t2 Failed to open IM "
+ "connection: No such file or directory",
+ " ::. / // *" },
+ { "Dec 16 21:43:47.328 ifmgr/bulk 0/3/CPU0 t8 Bulk DPC async download "
+ "complete. Partitions 1, node_count 1, total_out 0, out_offset 0, "
+ "out_expected 0: No error",
+ " ::. / // *" },
+ { "t:0x53034bd6 CPU:00 PROCESS :PROCCREATE_NAME", ": : :*" },
+ { " pid:1", " *" },
+ { "t:0x53034cbb CPU:00 THREAD :THCREATE pid:1 tid:1",
+ ": : : pid: tid:*" },
+ { "t:0x5303f950 CPU:00 COMM :REC_PULSE scoid:0x40000003 pid:364659",
+ ": : : *" },
+ { "/hfr-base-3.3.85/lib/libttyconnection.dll 0xfc000000 0x0000306c "
+ "0xfc027000 0x000001c8 1",
+ " *" },
+ { "Feb 28 02:38:26.123 seqtrace 0/1/CPU0 t8 "
+ ":msg_receive:ifmgr/t8:IMC_MSG_MTU_UPDATE:ppp_ma/t1",
+ " ::. // ::::*" },
- {"Feb 28 02:38:26.123 seqtrace 0/1/CPU0 t8 :msg_send_event:call:ifmgr/t8:124/0:cdp/t1",
- " ::. // :msg_send_event::::*"},
+ { "Feb 28 02:38:26.123 seqtrace 0/1/CPU0 t8 "
+ ":msg_send_event:call:ifmgr/t8:124/0:cdp/t1",
+ " ::. // :msg_send_event::::*" },
- {"Feb 28 02:38:26.125 seqtrace 0/1/CPU0 t1 :msg_receive_event:cdp/t1:124/0",
- " ::. // :msg_receive_event::*"}
- {"t:0x645dd86d CPU:00 USREVENT:EVENT:100, d0:0x00000002 d1:0x00000000",
- ": : USREVENT:EVENT:, d0: *"}
- {"t:0x5303f950 CPU:00 COMM :REC_PULSE scoid:0x40000003 pid:364659",
- ": : : *"},
- {"t:0x2ccf9f5a CPU:00 INT_ENTR:0x80000000 (-2147483648) IP:0x002d8b18",
- ": : INT_ENTR: IP:*"}
- {"t:0xd473951c CPU:00 KER_EXIT:SCHED_GET/88 ret_val:2 sched_priority:10",
- ": : KER_EXIT:SCHED_GET : sched_priority:*"}
- {"t:0x00000123 CPU:01 SYSTEM :FUNC_ENTER thisfn:0x40e62048 call_site:0x00000000",
- ": : SYSTEM :FUNC_ thisfn: *"},
- {"t:0x5af8de95 CPU:00 INT_HANDLER_ENTR:0x0000004d (77) PID:8200 IP:0x00000000 AREA:0x0bf9b290", ": : INT_HANDLER_*"},
+ { "Feb 28 02:38:26.125 seqtrace 0/1/CPU0 t1 "
+ ":msg_receive_event:cdp/t1:124/0",
+ " ::. // :msg_receive_event::*" } {
+ "t:0x645dd86d CPU:00 USREVENT:EVENT:100, d0:0x00000002 d1:0x00000000",
+ ": : USREVENT:EVENT:, d0: *" } {
+ "t:0x5303f950 CPU:00 COMM :REC_PULSE scoid:0x40000003 pid:364659",
+ ": : : *" },
+ { "t:0x2ccf9f5a CPU:00 INT_ENTR:0x80000000 (-2147483648) "
+ "IP:0x002d8b18",
+ ": : INT_ENTR: IP:*" } {
+ "t:0xd473951c CPU:00 KER_EXIT:SCHED_GET/88 ret_val:2 sched_priority:10",
+ ": : KER_EXIT:SCHED_GET : sched_priority:*" } {
+ "t:0x00000123 CPU:01 SYSTEM :FUNC_ENTER thisfn:0x40e62048 "
+ "call_site:0x00000000",
+ ": : SYSTEM :FUNC_ thisfn: *" },
+ { "t:0x5af8de95 CPU:00 INT_HANDLER_ENTR:0x0000004d (77) PID:8200 "
+ "IP:0x00000000 AREA:0x0bf9b290",
+ ": : INT_HANDLER_*" },
#endif
- {"t:0x6d1ff92f CPU:00 CONTROL: BUFFER sequence = 1053, num_events = 714",
- ": : CONTROL*"},
- {"t:0x6d1ff92f CPU:00 CONTROL :TIME msb:0x0000003c lsb(offset):0x6d1ff921",
- ": : CONTROL*"},
+ { "t:0x6d1ff92f CPU:00 CONTROL: BUFFER sequence = 1053, num_events = 714",
+ ": : CONTROL*" },
+ { "t:0x6d1ff92f CPU:00 CONTROL :TIME msb:0x0000003c lsb(offset):0x6d1ff921",
+ ": : CONTROL*" },
};
-int main (int argc, char **argv)
+int
+main (int argc, char **argv)
{
- int i, j;
- u8 **svec;
+ int i, j;
+ u8 **svec;
- for (j = 0; j < ARRAY_LEN(tests); j++) {
- printf ("input string: '%s'\n", tests[j].string);
- printf ("delimiter arg: '%s'\n", tests[j].fmt);
- printf ("parse trace:\n");
- svec = delsvec(tests[j].string, tests[j].fmt);
- if (!svec) {
- printf("index %d failed\n", j);
- continue;
- }
- printf("%d substring vectors\n", vec_len(svec));
- for (i = 0; i < vec_len(svec); i++) {
- printf("[%d]: '%s'\n", i, svec[i]);
- }
- printf ("-------------------\n");
+ for (j = 0; j < ARRAY_LEN (tests); j++)
+ {
+ printf ("input string: '%s'\n", tests[j].string);
+ printf ("delimiter arg: '%s'\n", tests[j].fmt);
+ printf ("parse trace:\n");
+ svec = delsvec (tests[j].string, tests[j].fmt);
+ if (!svec)
+ {
+ printf ("index %d failed\n", j);
+ continue;
+ }
+ printf ("%d substring vectors\n", vec_len (svec));
+ for (i = 0; i < vec_len (svec); i++)
+ {
+ printf ("[%d]: '%s'\n", i, svec[i]);
+ }
+ printf ("-------------------\n");
}
- exit(0);
+ exit (0);
}
#endif
diff --git a/src/tools/vppapigen/CMakeLists.txt b/src/tools/vppapigen/CMakeLists.txt
index bfabc3a670c..97a6d35f9b5 100644
--- a/src/tools/vppapigen/CMakeLists.txt
+++ b/src/tools/vppapigen/CMakeLists.txt
@@ -11,6 +11,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+find_package(
+ Python3
+ REQUIRED
+ COMPONENTS Interpreter
+)
+
+execute_process(
+ COMMAND ${Python3_EXECUTABLE} -c "import ply"
+ RESULT_VARIABLE _rv
+ OUTPUT_QUIET
+)
+
+if (NOT ${_rv} EQUAL 0)
+ message( FATAL_ERROR "The \"ply\" Python3 package is not installed.")
+endif()
+
install(
FILES vppapigen.py
RENAME vppapigen
@@ -27,7 +43,7 @@ install(
vppapigen_json.py
generate_json.py
DESTINATION
- share/vpp
+ ${CMAKE_INSTALL_DATADIR}/vpp
COMPONENT
vpp-dev
)
diff --git a/src/tools/vppapigen/VPPAPI.md b/src/tools/vppapigen/VPPAPI.md
deleted file mode 100644
index df211d866a0..00000000000
--- a/src/tools/vppapigen/VPPAPI.md
+++ /dev/null
@@ -1,346 +0,0 @@
-# VPP API Language {#api_lang_doc}
-
-The VPP binary API is a message passing API.
-The VPP API language is used to define a RPC interface between VPP and its
-control plane. The API messages supports shared memory transport and
-Unix domain sockets (SOCK_STREAM).
-
-The wire format is essentially that of a network formatted (big-endian) packed C struct.
-
-The VPP API compiler is located in *src/tools/vppapigen* and can currently
-compile to JSON or C (used by the VPP binary itself).
-
-## Language definition
-
-### Defining a messages
-
-There are 3 types of message exchanges:
-
-* Request/Reply
-The client sends a request message and the server replies with a
-single reply message. The convention is that the reply message is
-named as method_name + \_reply.
-
-* Dump/Detail
-The client sends a "bulk" request message to the server, and the
-server replies with a set of detail messages. These messages may be of
-different type. A dump/detail call must be enclosed in a control ping
-block (Otherwise the client will not know the end of the bulk
-transmission). The method name must end with method + "\_dump", the
-reply message should be named method + "\_details". The exception here
-is for the methods that return multiple message types
-(e.g. sw_interface_dump). The Dump/Detail methods are typically used
-for acquiring bulk information, like the complete FIB table.
-
-* Events
-The client can register for getting asynchronous notifications from
-the server. This is useful for getting interface state changes, and so
-on. The method name for requesting notifications is conventionally
-prefixed with "want_". E.g. "want_interface_events". Which
-notification types results from an event registration is defined in
-the service definition.
-
-A message from a client must include the 'client_index', an opaque
-cookie identifying the sender, and a 'context' field to let the client
-match request with reply.
-
-An example of a message definition. The client sends the show_version request,
-the server replies with the show_version_reply.
-
-The *client_index* and *context* fields are required in all requests.
-The *context* is returned by the server and is used by the client to
-match up request and reply messages.
-
-```
-define show_version
-{
- u32 client_index;
- u32 context;
-};
-define show_version_reply
-{
- u32 context;
- i32 retval;
- string program [32];
- string version [32];
- string build_date [32];
- /* The final field can be a variable length argument */
- string build_directory [];
-};
-
-```
-
-The flags are not used by the clients, but have special meaning
-for some of the tracing and debugging of the API.
-The *autoreply* flag is a shorthand for a reply message with just a
-*retval* field.
-
-```
- define : DEFINE ID '{' block_statements_opt '}' ';'
- define : flist DEFINE ID '{' block_statements_opt '}' ';'
- flist : flag
- | flist flag
- flag : MANUAL_PRINT
- | MANUAL_ENDIAN
- | DONT_TRACE
- | AUTOREPLY
-
- block_statements_opt : block_statements
- block_statements : block_statement
- | block_statements block_statement
- block_statement : declaration
- | option
- declaration : type_specifier ID ';'
- | type_specifier ID '[' ID '=' assignee ']' ';'
- declaration : type_specifier ID '[' NUM ']' ';'
- | type_specifier ID '[' ID ']' ';'
- type_specifier : U8
- | U16
- | U32
- | U64
- | I8
- | I16
- | I32
- | I64
- | F64
- | BOOL
- | STRING
- type_specifier : ID
-```
-
-
-### Options
-The *option* word is used to specify meta information.
-The only current use is to specify a semantic version of the .api file itself.
-
-Example:
-```
-option version = "1.0.0";
-```
-
-```
-
- option : OPTION ID '=' assignee ';'
- assignee : NUM
- | TRUE
- | FALSE
- | STRING_LITERAL
-```
-
-### Defining new types
-
-New user defined types are defined just like messages.
-A typedef has two forms. It can either define an alias for a
-different type (or array).
-
-Example:
-
-```
-typedef u8 ip4_address[4];
-typedef u8 ip6_address[16];
-```
-
-Where the above defines two new types *vl_api_ip4_address_t* and
-*vl_api_ip6_address_t*. These are aliases for the underlying
-u8 array.
-
-In the other form, it is used to specify an abstract data type.
-
-```
-enum address_family {
- ADDRESS_IP4 = 0,
- ADDRESS_IP6,
-};
-
-union address_union {
- vl_api_ip4_address_t ip4;
- vl_api_ip6_address_t ip6;
-};
-
-typedef address {
- vl_api_address_family_t af;
- vl_api_address_union_t un;
-};
-```
-
-Where the new type *vl_api_address_t*
-
-```
- typedef : TYPEDEF ID '{' block_statements_opt '}' ';'
- typedef : TYPEDEF declaration
-```
-
-
-### Importing Definitions
-You can use definitions from other .api files by importing them.
-To import another .api's definitions, you add an import statement
-to the top of your file:
-
-import "vnet/ip/ip_types.api";
-
-By default you can only use definitions from directly imported .api files.
-
-The API compiler searches for imported files in a set of directories
-specified on the API compiler command line using the --includedir flag.
-```
-import : IMPORT STRING_LITERAL ';'
-```
-
-### Comments
-
-The API language uses C style comments.
-```
-/* */
-//
-```
-
-### Enumerations
-Enums are similar to enums in C.
-
-Every enum definition must contain a constant that maps to zero
-as its first element. This is because:
-
-There must be a zero value, so that we can use 0 as a numeric default value.
-The zero value needs to be the first element.
-
-As in C, enums can be used as flags or just as numbers.
-The on-wire, and in memory representation size of an enum can be specified.
-Not all language bindings will support that. The default size is 4 (u32).
-
-Example
-```
-enum ip_neighbor_flags
-{
- IP_API_NEIGHBOR_FLAG_NONE = 0,
- IP_API_NEIGHBOR_FLAG_STATIC = 0x1,
- IP_API_NEIGHBOR_FLAG_NO_FIB_ENTRY = 0x2,
-};
-```
-
-Which generates the vl_api_ip_neighbor_flags_t in the C binding.
-In Python that is represented as an IntFlag object
-VppEnum.vl_api_ip_neighbor_flags_t.
-
-```
- enum : ENUM ID '{' enum_statements '}' ';'
- enum : ENUM ID ':' enum_size '{' enum_statements '}' ';'
- enum_size : U8
- | U16
- | U32
- enum_statements : enum_statement
- | enum_statements enum_statement
- enum_statement : ID '=' NUM ','
- | ID ','
-```
-
-### Services
-The service statement defines the relationship between messages.
-For request/response and dump/details messages it ties the
-request with the reply. For events, it specifies which events
-that can be received for a given want_* call.
-
-Example:
-```
-service {
- rpc want_interface_events returns want_interface_events_reply
- events sw_interface_event;
-};
-
-```
-
-Which states that the request want_interface_events returns a
-want_interface_events_reply and if enabled the client will
-receive sw_interface_event messages whenever interface states changes.
-
-```
- service : SERVICE '{' service_statements '}' ';'
- service_statements : service_statement
- | service_statements service_statement
- service_statement : RPC ID RETURNS NULL ';'
- | RPC ID RETURNS ID ';'
- | RPC ID RETURNS STREAM ID ';'
- | RPC ID RETURNS ID EVENTS event_list ';'
- event_list : events
- | event_list events
- events : ID
- | ID ','
-```
-
-
-## Types
-### Scalar Value Types
-
-.api type|size|C type|Python type
----------|----|------|-----------
-i8 | 1|i8 |int
-u8 | 1|u8 |int
-i16 | 2|i16 |int
-u16 | 2|u16 |int
-i32 | 4|i32 |int
-u32 | 4|u32 |int
-i64 | 8|i64 |int
-u64 | 8|u64 |int
-f64 | 8|f64 |float
-bool | 1|bool |boolean
-string |variable|vl_api_string_t|str
-
-### User Defined Types
-#### vnet/ip/ip_types.api
-
-.api type|size|C type|Python type
----------|----|------|-----------
-vl_api_address_t|20|vl_api_address_t|`<class 'ipaddress.IPv4Address'> or <class 'ipaddress.IPv6Address'>`
-vl_api_ip4_address_t|4|vl_api_ip4_address_t|`<class 'ipaddress.IPv4Address'>`
-vl_api_ip6_address_t|16|vl_api_ip6_address_t|`<class 'ipaddress.IPv6Address'>`
-vl_api_prefix_t|21|vl_api_prefix_t|`<class 'ipaddress.IPv4Network'> or <class 'ipaddress.IPv6Network'>`
-vl_api_ip4_prefix_t|5|vl_api_ip4_prefix_t|`<class 'ipaddress.IPv4Network'>`
-vl_api_ip6_prefix_t|17|vl_api_ip6_prefix_t|`<class 'ipaddress.IPv6Network'>`
-vl_api_ip4_address_with_prefix_t|5|vl_api_ip4_address_with_prefix_t|`<class 'ipaddress.IPv4Interface'>`
-vl_api_ip6_address_with_prefix_t|17|vl_api_ip6_address_with_prefix_t|`<class 'ipaddress.IPv6Interface'>`
-
-#### vnet/ethernet/ethernet_types.api
-.api type|size|C type|Python type
----------|----|------|-----------
-vl_api_mac_address_t|6|vl_api_mac_address_t|`class 'vpp_papi.MACAddress'>`
-
-#### vnet/interface_types.api
-.api type|size|C type|Python type
----------|----|------|-----------
-vl_api_interface_index_t|4|vl_api_interface_index_t|int
-
-### New explicit types
-
-#### String versus bytes
-A byte string with a maximum length of 64:
-```
-u8 name[64];
-```
-Before the "string" type was added, text string were defined like this.
-The implications of that was the user would have to know if the field
-represented a \0 ended C-string or a fixed length byte string.
-The wire format of the 'string' type is a u32 length
-
-An IPv4 or IPv6 address was previously defined like:
-```
-u8 is_ip6;
-u8 address[16];
-```
-
-Which made it hard for language bindings to represent the
-address as anything but a byte string.
-The new explicit address types are shown above.
-
-## Language generators
-
-The VPP API compiler currently has two output modules. One generating JSON
-and one generating C header files that are directly used by the VPP
-infrastructure and plugins.
-
-The C/C++, Python, Go Lua, and Java language bindings are generated based
-on the JSON files.
-
-### Future considerations
-- [ ] Generate C/C++ (vapi) client code directly from vppapigen
-- [ ] Embed JSON definitions into the API server, so dynamic languages
- can download them directly without going via the filesystem and JSON
- files.
diff --git a/src/tools/vppapigen/VPPAPI.rst b/src/tools/vppapigen/VPPAPI.rst
new file mode 100644
index 00000000000..e8144803a87
--- /dev/null
+++ b/src/tools/vppapigen/VPPAPI.rst
@@ -0,0 +1,597 @@
+VPP API Language
+================
+
+The VPP binary API is a message passing API. The VPP API language is
+used to define a RPC interface between VPP and its control plane. The
+API messages supports shared memory transport and Unix domain sockets
+(SOCK_STREAM).
+
+The wire format is essentially that of a network formatted (big-endian)
+packed C struct.
+
+The VPP API compiler is located in *src/tools/vppapigen* and can
+currently compile to JSON or C (used by the VPP binary itself).
+
+Language definition
+-------------------
+
+Defining a messages
+~~~~~~~~~~~~~~~~~~~
+
+There are 3 types of message exchanges:
+
+- Request/Reply The client sends a request message and the server
+ replies with a single reply message. The convention is that the reply
+ message is named as method_name + \_reply.
+
+- Dump/Detail The client sends a “bulk” request message to the server,
+ and the server replies with a set of detail messages. These messages
+ may be of different type. A dump/detail call must be enclosed in a
+ control ping block (Otherwise the client will not know the end of the
+ bulk transmission). The method name must end with method + “\_dump”,
+ the reply message should be named method + “\_details”. The exception
+ here is for the methods that return multiple message types
+ (e.g. sw_interface_dump). The Dump/Detail methods are typically used
+ for acquiring bulk information, like the complete FIB table.
+
+- Events The client can register for getting asynchronous notifications
+ from the server. This is useful for getting interface state changes,
+ and so on. The method name for requesting notifications is
+ conventionally prefixed with “want\_”. E.g. “want_interface_events”.
+ Which notification types results from an event registration is
+ defined in the service definition.
+
+A message from a client must include the ‘client_index’, an opaque
+cookie identifying the sender, and a ‘context’ field to let the client
+match request with reply.
+
+An example of a message definition. The client sends the show_version
+request, the server replies with the show_version_reply.
+
+The *client_index* and *context* fields are required in all requests.
+The *context* is returned by the server and is used by the client to
+match up request and reply messages.
+
+.. code-block:: c
+
+ define show_version
+ {
+ u32 client_index;
+ u32 context;
+ };
+ define show_version_reply
+ {
+ u32 context;
+ i32 retval;
+ string program [32];
+ string version [32];
+ string build_date [32];
+ /* The final field can be a variable length argument */
+ string build_directory [];
+ };
+
+The flags are not used by the clients, but have special meaning for some
+of the tracing and debugging of the API. The *autoreply* flag is a
+shorthand for a reply message with just a *retval* field.
+
+.. code-block:: c
+
+ define : DEFINE ID '{' block_statements_opt '}' ';'
+ define : flist DEFINE ID '{' block_statements_opt '}' ';'
+ flist : flag
+ | flist flag
+ flag : MANUAL_PRINT
+ | MANUAL_ENDIAN
+ | DONT_TRACE
+ | AUTOREPLY
+
+ block_statements_opt : block_statements
+ block_statements : block_statement
+ | block_statements block_statement
+ block_statement : declaration
+ | option
+ declaration : type_specifier ID ';'
+ | type_specifier ID '[' ID '=' assignee ']' ';'
+ declaration : type_specifier ID '[' NUM ']' ';'
+ | type_specifier ID '[' ID ']' ';'
+ type_specifier : U8
+ | U16
+ | U32
+ | U64
+ | I8
+ | I16
+ | I32
+ | I64
+ | F64
+ | BOOL
+ | STRING
+ type_specifier : ID
+
+Options
+~~~~~~~
+
+The *option* word is used to specify meta information. The only current
+use is to specify a semantic version of the .api file itself.
+
+Example:
+
+.. code-block:: c
+
+ option version = "1.0.0";
+
+.. code-block:: c
+
+
+ option : OPTION ID '=' assignee ';'
+ assignee : NUM
+ | TRUE
+ | FALSE
+ | STRING_LITERAL
+
+Defining new types
+~~~~~~~~~~~~~~~~~~
+
+New user defined types are defined just like messages. A typedef has two
+forms. It can either define an alias for a different type (or array).
+
+Example:
+
+.. code-block:: c
+
+ typedef u8 ip4_address[4];
+ typedef u8 ip6_address[16];
+
+Where the above defines two new types *vl_api_ip4_address_t* and
+*vl_api_ip6_address_t*. These are aliases for the underlying u8 array.
+
+In the other form, it is used to specify an abstract data type.
+
+.. code-block:: c
+
+ enum address_family {
+ ADDRESS_IP4 = 0,
+ ADDRESS_IP6,
+ };
+
+ union address_union {
+ vl_api_ip4_address_t ip4;
+ vl_api_ip6_address_t ip6;
+ };
+
+ typedef address {
+ vl_api_address_family_t af;
+ vl_api_address_union_t un;
+ };
+
+Where the new type *vl_api_address_t*
+
+.. code-block:: c
+
+ typedef : TYPEDEF ID '{' block_statements_opt '}' ';'
+ typedef : TYPEDEF declaration
+
+Importing Definitions
+~~~~~~~~~~~~~~~~~~~~~
+
+You can use definitions from other .api files by importing them. To
+import another .api’s definitions, you add an import statement to the
+top of your file:
+
+import “vnet/ip/ip_types.api”;
+
+By default you can only use definitions from directly imported .api
+files.
+
+The API compiler searches for imported files in a set of directories
+specified on the API compiler command line using the –includedir flag.
+
+.. code-block:: c
+
+ import : IMPORT STRING_LITERAL ';'
+
+Comments
+~~~~~~~~
+
+The API language uses C style comments.
+
+.. code-block:: c
+
+ /* */
+ //
+
+Enumerations
+~~~~~~~~~~~~
+
+Enums are similar to enums in C.
+
+Every enum definition must contain a constant that maps to zero as its
+first element. This is because:
+
+There must be a zero value, so that we can use 0 as a numeric default
+value. The zero value needs to be the first element.
+
+As in C, enums can be used as flags or just as numbers. The on-wire, and
+in memory representation size of an enum can be specified. Not all
+language bindings will support that. The default size is 4 (u32).
+
+Example
+
+.. code-block:: c
+
+ enum ip_neighbor_flags
+ {
+ IP_API_NEIGHBOR_FLAG_NONE = 0,
+ IP_API_NEIGHBOR_FLAG_STATIC = 0x1,
+ IP_API_NEIGHBOR_FLAG_NO_FIB_ENTRY = 0x2,
+ };
+
+Which generates the vl_api_ip_neighbor_flags_t in the C binding. In
+Python that is represented as an IntFlag object
+VppEnum.vl_api_ip_neighbor_flags_t.
+
+.. code-block:: c
+
+ enum : ENUM ID '{' enum_statements '}' ';'
+ enum : ENUM ID ':' enum_size '{' enum_statements '}' ';'
+ enum_size : U8
+ | U16
+ | U32
+ enum_statements : enum_statement
+ | enum_statements enum_statement
+ enum_statement : ID '=' NUM ','
+ | ID ','
+
+Services
+~~~~~~~~
+
+The service statement defines the relationship between messages. For
+request/response and dump/details messages it ties the request with the
+reply. For events, it specifies which events that can be received for a
+given ``want_*`` call.
+
+Example:
+
+.. code-block:: c
+
+ service {
+ rpc want_interface_events returns want_interface_events_reply
+ events sw_interface_event;
+ };
+
+Which states that the request want_interface_events returns a
+want_interface_events_reply and if enabled the client will receive
+sw_interface_event messages whenever interface states changes.
+
+.. code-block:: c
+
+ service : SERVICE '{' service_statements '}' ';'
+ service_statements : service_statement
+ | service_statements service_statement
+ service_statement : RPC ID RETURNS NULL ';'
+ | RPC ID RETURNS ID ';'
+ | RPC ID RETURNS STREAM ID ';'
+ | RPC ID RETURNS ID EVENTS event_list ';'
+ event_list : events
+ | event_list events
+ events : ID
+ | ID ','
+
+Types
+-----
+
+Scalar Value Types
+~~~~~~~~~~~~~~~~~~
+
+========= ======== =============== ===========
+.api type size C type Python type
+========= ======== =============== ===========
+i8 1 i8 int
+u8 1 u8 int
+i16 2 i16 int
+u16 2 u16 int
+i32 4 i32 int
+u32 4 u32 int
+i64 8 i64 int
+u64 8 u64 int
+f64 8 f64 float
+bool 1 bool boolean
+string variable vl_api_string_t str
+========= ======== =============== ===========
+
+User Defined Types
+~~~~~~~~~~~~~~~~~~
+
+vnet/ip/ip_types.api
+^^^^^^^^^^^^^^^^^^^^
+
++--------------------+--------+-------------+-------------------------+
+| .api type | size | C type | Python type |
++====================+========+=============+=========================+
+| vl_api_address_t | 20 | vl_ap | ` |
+| | | i_address_t | `<class 'ipaddress.IPv4 |
+| | | | Address'> or <class 'ip |
+| | | | address.IPv6Address'>`` |
++--------------------+--------+-------------+-------------------------+
+| vl | 4 | vl_api_ip | ``<class 'ip |
+| _api_ip4_address_t | | 4_address_t | address.IPv4Address'>`` |
++--------------------+--------+-------------+-------------------------+
+| vl | 16 | vl_api_ip | ``<class 'ip |
+| _api_ip6_address_t | | 6_address_t | address.IPv6Address'>`` |
++--------------------+--------+-------------+-------------------------+
+| vl_api_prefix_t | 21 | vl_a | ` |
+| | | pi_prefix_t | `<class 'ipaddress.IPv4 |
+| | | | Network'> or <class 'ip |
+| | | | address.IPv6Network'>`` |
++--------------------+--------+-------------+-------------------------+
+| v | 5 | vl_api_i | ``<class 'ip |
+| l_api_ip4_prefix_t | | p4_prefix_t | address.IPv4Network'>`` |
++--------------------+--------+-------------+-------------------------+
+| v | 17 | vl_api_i | ``<class 'ip |
+| l_api_ip6_prefix_t | | p6_prefix_t | address.IPv6Network'>`` |
++--------------------+--------+-------------+-------------------------+
+| vl_api_ip4_add | 5 | vl_api_ip4 | ``<class 'ipad |
+| ress_with_prefix_t | | _address_wi | dress.IPv4Interface'>`` |
+| | | th_prefix_t | |
++--------------------+--------+-------------+-------------------------+
+| vl_api_ip6_add | 17 | vl_api_ip6 | ``<class 'ipad |
+| ress_with_prefix_t | | _address_wi | dress.IPv6Interface'>`` |
+| | | th_prefix_t | |
++--------------------+--------+-------------+-------------------------+
+
+vnet/ethernet/ethernet_types.api
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
++---------------------+------+---------------------+-------------------+
+| .api type | size | C type | Python type |
++=====================+======+=====================+===================+
+| ``vl_ | 6 | ``vl_ | ``class 'vpp_pa |
+| api_mac_address_t`` | | api_mac_address_t`` | pi.MACAddress'>`` |
++---------------------+------+---------------------+-------------------+
+
+vnet/interface_types.api
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+======================== ==== ======================== ===========
+.api type size C type Python type
+======================== ==== ======================== ===========
+vl_api_interface_index_t 4 vl_api_interface_index_t int
+======================== ==== ======================== ===========
+
+New explicit types
+~~~~~~~~~~~~~~~~~~
+
+String versus bytes
+^^^^^^^^^^^^^^^^^^^
+
+A byte string with a maximum length of 64:
+
+.. code-block:: c
+
+ u8 name[64];
+
+Before the “string” type was added, text string were defined like this.
+The implications of that was the user would have to know if the field
+represented a \\0 ended C-string or a fixed length byte string. The wire
+format of the ‘string’ type is a u32 length
+
+An IPv4 or IPv6 address was previously defined like:
+
+.. code-block:: c
+
+ u8 is_ip6;
+ u8 address[16];
+
+Which made it hard for language bindings to represent the address as
+anything but a byte string. The new explicit address types are shown
+above.
+
+Language generators
+-------------------
+
+The VPP API compiler currently has two output modules. One generating
+JSON and one generating C header files that are directly used by the VPP
+infrastructure and plugins.
+
+The C/C++, Python, Go Lua, and Java language bindings are generated
+based on the JSON files.
+
+Future considerations
+~~~~~~~~~~~~~~~~~~~~~
+
+- Generate C/C++ (vapi) client code directly from vppapigen
+- Embed JSON definitions into the API server, so dynamic languages
+ can download them directly without going via the filesystem and JSON
+ files.
+
+API Change Process
+------------------
+
+Purpose
+~~~~~~~
+
+To minimize the disruptions to the consumers of the VPP API, while permitting
+the innovation for the VPP itself.
+
+Historically, API changes in VPP master branch were allowed at any point in time
+outside of a small window between the API freeze milestone and RC1 milestone.
+The API changes on the throttle branches were not permitted at all. This model
+proved workable, however all the production use cases ended up on throttle
+branches, with a lot of forklift activity when it is the time to upgrade to the
+next branch.
+
+This formally structured API change process harmonizes the behavior across all
+the VPP branches, and allows more flexibility for the consumer, while permitting
+the innovation in the VPP itself.
+
+The Core Promise
+~~~~~~~~~~~~~~~~
+
+"If a user is running a VPP version N and does not use any deprecated APIs, they
+should be able to simply upgrade the VPP to version N+1 and there should be no
+API breakage".
+
+In-Progress, Production and Deprecated APIs
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This proposal adds a classification of stability of an API call:
+
+- "In-Progress": APIs in the process of the development, experimentation, and
+ limited testing.
+
+- "Production": tested as part of the "make test", considered stable for general
+ usage.
+
+- "Deprecated": used as a flag on Production APIs which are slated to be
+ deprecated in the future release.
+
+The "In-Progress" APIs or the APIs with the semantic version of 0.x.y are not
+subject to any stability checks, thus the developers are free to introduce them,
+modify their signatures, and as well remove them completely at will. The users
+should not use the in-progress APIs without the interactions with its
+maintainers, nor base the production code on those APIs. The goal of
+"in-progress" APIs to allow rapid iteration and modifications to ensure the API
+signature and function is stabilized. These API calls may be used for testing or
+experimentation and prototyping.
+
+When the maintainer is satisfied with the quality of the APIs, and ensures that
+they are tested as part of the "Make test" runs, they can transition their
+status to "Production".
+
+The "Production" APIs can *NOT* be changed in any way that modifies their
+representation on the wire and the signature (thus CRC). The only change that
+they may incur is to be marked as "Deprecated". These are the APIs that the
+downstream users can use for production purposes. They exist to fulfill a core
+promise of this process: The "Deprecated" APIs are the "Production" APIs that
+are about to be deleted. To ensure the above core promise is maintained, if the
+API call was marked as deprecated at any point between RC1 of release N and RC1
+of release N+1, it MUST NOT be deleted until the RC1 milestone of the
+release N+2. The deprecated API SHOULD specify a replacement API - which MUST
+be a Production API, so as not to decrease the level of stability.
+
+
+The time interval between a commit that marks an API as deprecated and a commit
+that deletes that API MUST be at least equal the time between the two subsequent
+releases (currently 4 months).
+
+
+Doing so allows a for a good heads-up to those who are using the
+"one free upgrade" property to proactively catch and test the transition from
+the deprecated APIs using the master.
+
+
+Marking an API as deprecated just 1 day before RC1 branch pull and then deleting
+that API one day after does *technically* satisfy "one free upgrade" promise,
+but is rather hostile to the users that are proactively tracking it.
+
+Semantic API Versioning
+~~~~~~~~~~~~~~~~~~~~~~~
+
+VPP APIs use semantic versioning according to semver.org, with the compatibility
+logic being applied at the moment the messages are marked as deprecated.
+
+To discuss: i.e. if message_2 is being introduced which deprecates the
+message_1, then that same commit should increase the major version of the API.
+
+The 0.x.x API versions, by virtue of being in-progress, are exempt from this
+treatment.
+
+Tooling
+~~~~~~~
+
+See https://gerrit.fd.io/r/c/vpp/+/26881:
+
+crcchecker.py is a tool to enforce the policy, with a few other bonus uses:
+
+extras/scripts/crcchecker.py --check-patchset # returns -1 if backwards incompatible extras/scripts/crcchecker.py --dump-manifest extras/scripts/crcchecker.py --git-revision v20.01 <files> extras/scripts/crcchecker.py -- diff <oldfile> <newfile>
+
+Notice that you can use this tool to get the list of API changes since a given past release.
+
+The policy:
+
+.. highlight:: none
+
+.. code-block::
+
+ 1. Production APIs should never change.
+ The definition of a "production API" is if the major version in
+ the API file is > 0 that is not marked as "in-progress".
+ 2. APIs that are experimental / not released are not checked.
+ An API message can be individually marked as in progress,
+ by adding the following in the API definition:
+ option in_progress;
+ 3. An API can be deprecated in three-to-six steps (the steps
+ with letters can be combined or split, depending on situation):
+ Step 1a: A new "in-progress" API new_api_2 is added that
+ is deemed to be a replacement.
+ Step 1b: The existing API is marked as "replaced_by" this new API:
+ option replaced_by="new_api_2";
+ Step 2a: The new_api_2 is marked as production by deleting its in-progress status,
+ provided that this API does have sufficient test coverage to deem it well tested.
+ Step 2b: the existing API is marked as "deprecated":
+ option deprecated="optional short message to humans reading it";
+ Step 3: the deprecated API is deleted.
+
+There is a time constraint that the minimum interval between the steps 2 and 3
+must be at least 4 months. The proposal is to have step 2 around a couple of
+weeks before the F0 milestone for a release, as triggered by the release manager
+(and in the future by an automated means).
+
+Use Cases
+~~~~~~~~~
+
+Adding A New Field To A Production API
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The simplest way to add a new field to a Production API message *foo_message* is
+to create a new In-Progress message *foo_message_v2*, and add the field to that
+one. Typically it will be an extension - so the API message handlers are
+trivially chained. If there are changes/adjustments that are needed, this new
+message can be freely altered without bothering the users of the Production API.
+
+When the maintainer is happy with the quality of the implementation, and the
+foo_message_v2 is tested in "make test" to the same extent as the foo_message,
+they can make two commits: one, removing the in-progress status for
+foo_message_v2, and the second one - deprecating foo_message and pointing the
+foo_message_v2 as the replacement. Technically after the next throttle pull,
+they can delete the foo_message - the deprecation and the replacement will be
+already in the corresponding branch.
+
+Rapid Experimentation For A New Feature
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Add a message that is in-progress, and keep iterating with this message. This
+message is not subject to the change control process.
+
+An In-progress API Accidentally Marked As "production"
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This is expected to mainly apply during the initial period of 20.05->20.09, the
+proposal is to have it active for 4 weeks from Jun 17 till July 15th, with the
+following process.
+
+If a developer finds that a given API or a set of APIs is not ready for
+production due to lack of tests and/or the general API stability, then they:
+
+- Create a new gerrit change with *just* the marking of the API as
+ in_progress, subject being: "api: <feature> api message downgrade" and
+ a comment identifying which APIs are being downgraded and why.
+
+- Add ayourtch@gmail.com or the current Release Manager as a reviewer --
+ for help in guiding the process and to ensure that the gerrit change is not
+ forgotten.
+
+- Send an email to vpp-dev mailing list with the subject being the same as the
+ one-liner commit message, reference to the gerrit change, and the reasoning.
+
+- Wait for the timeout period of two weeks for the feedback.
+
+- If no feedback received, assume the community agreement and commit the
+ change to master branch.
+
+This needs to be highlighted that this process is an *exception* - normally the
+transition is always in_progress => production => deprecated.
+
+API Change Examples
+~~~~~~~~~~~~~~~~~~~
+
+https://gerrit.fd.io/r/q/+is:merged+message:%2522%255Eapi:.*%2524%2522
diff --git a/src/tools/vppapigen/generate_go.py b/src/tools/vppapigen/generate_go.py
index 1e072ef1e1c..4ed507b5d73 100755
--- a/src/tools/vppapigen/generate_go.py
+++ b/src/tools/vppapigen/generate_go.py
@@ -5,6 +5,7 @@ import os
import pathlib
import subprocess
import tarfile
+import shutil
import requests
import sys
@@ -13,58 +14,46 @@ import sys
# GoVPP API generator generates Go bindings compatible with the local VPP
#
-parser = argparse.ArgumentParser()
-parser.add_argument("-govpp-commit", help="GoVPP commit or branch (defaults to v0.3.5-45-g671f16c)",
- default="671f16c", # fixed GoVPP version
- type=str)
-parser.add_argument("-output-dir", help="output target directory for generated bindings", type=str)
-parser.add_argument("-api-files", help="api files to generate (without commas)", nargs="+", type=str)
-parser.add_argument("-import-prefix", help="prefix imports in the generated go code", type=str)
-parser.add_argument("-no-source-path-info", help="disable source path info in generated files", nargs='?', const=True,
- default=False)
-args = parser.parse_args()
-
-
-# Check input arguments
-def validate_args(vpp_dir, o, f, c, i):
- if o is not None:
- if not os.path.exists(o) or os.path.isfile(o):
- print(o + " is not a valid output path")
- sys.exit(1)
- else:
- o = vpp_dir
- if f is None:
- f = []
- if c is None:
- c = "671f16c"
- if i is None:
- i = ""
-
- return str(o), f, c, i
+DefaultGoVppCommit = "16a47ef937b3a5ce6acf45885386062b323c8d25"
+
+
+def version_geq(ver_a, ver_b):
+ major_a, minor_a, patch_a = ver_a.split(".")
+ major_b, minor_b, patch_b = ver_b.split(".")
+ if major_a > major_b:
+ return True
+ elif major_a == major_b and minor_a > minor_b:
+ return True
+ elif major_a == major_b and minor_a == minor_b and patch_a >= patch_b:
+ return True
+ return False
+
+
+def execute(cli, cwd=None):
+ p = subprocess.Popen(
+ cli.split(),
+ cwd=cwd,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ universal_newlines=True,
+ )
+ output, error = p.communicate()
+ if p.returncode != 0:
+ print("Command `%s` failed: %d %s" % (cli, p.returncode, error))
+ sys.exit(1)
+ return output, error
-# Returns version of the installed Go
def get_go_version(go_root):
- p = subprocess.Popen(["./go", "version"],
- cwd=go_root + "/bin",
- stdout=subprocess.PIPE,
- universal_newlines=True, )
- output, _ = p.communicate()
- output_fmt = output.replace("go version go", "", 1)
-
- return output_fmt.rstrip("\n")
+ # Returns version of the installed Go
+ output, _ = execute("./go version", cwd=go_root + "/bin")
+ return output.replace("go version go", "", 1).rstrip("\n")
# Returns version of the installed binary API generator
def get_binapi_gen_version(go_path):
- p = subprocess.Popen(["./binapi-generator", "-version"],
- cwd=go_path + "/bin",
- stdout=subprocess.PIPE,
- universal_newlines=True, )
- output, _ = p.communicate()
- output_fmt = output.replace("govpp", "", 1)
-
- return output_fmt.rstrip("\n")
+ output, _ = execute("./binapi-generator -version", cwd=go_path + "/bin")
+ return output.replace("govpp", "", 1).rstrip("\n")
# Verifies local Go installation and installs the latest
@@ -73,20 +62,30 @@ def install_golang(go_root):
go_bin = go_root + "/bin/go"
if os.path.exists(go_bin) and os.path.isfile(go_bin):
- print('Go ' + get_go_version(go_root) + ' is already installed')
+ print("Go " + get_go_version(go_root) + " is already installed")
return
+ filename = (
+ requests.get("https://golang.org/VERSION?m=text").text + ".linux-amd64.tar.gz"
+ )
+ url = "https://dl.google.com/go/" + filename
+
print("Go binary not found, installing the latest version...")
- go_folders = ['src', 'pkg', 'bin']
+ print("Download url = %s" % url)
+ print("Install directory = %s" % go_root)
+ text = input("[Y/n] ?")
+
+ if text.strip().lower() != "y" and text.strip().lower() != "yes":
+ print("Aborting...")
+ exit(1)
+
+ go_folders = ["src", "pkg", "bin"]
for f in go_folders:
if not os.path.exists(os.path.join(go_root, f)):
os.makedirs(os.path.join(go_root, f))
-
- filename = requests.get('https://golang.org/VERSION?m=text').text + ".linux-amd64.tar.gz"
- url = "https://dl.google.com/go/" + filename
r = requests.get(url)
- with open("/tmp/" + filename, 'wb') as f:
+ with open("/tmp/" + filename, "wb") as f:
f.write(r.content)
go_tf = tarfile.open("/tmp/" + filename)
@@ -97,29 +96,29 @@ def install_golang(go_root):
go_tf.close()
os.remove("/tmp/" + filename)
- print('Go ' + get_go_version(go_root) + ' was installed')
+ print("Go " + get_go_version(go_root) + " was installed")
# Installs latest binary API generator
def install_binapi_gen(c, go_root, go_path):
- os.environ['GO111MODULE'] = "on"
- if os.path.exists(go_root + "/bin/go") & os.path.isfile(go_root + "/bin/go"):
- p = subprocess.Popen(["./go", "get", "git.fd.io/govpp.git/cmd/binapi-generator@" + c],
- cwd=go_root + "/bin",
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- universal_newlines=True, )
- _, error = p.communicate()
- if p.returncode != 0:
- print("binapi generator installation failed: %d %s" % (p.returncode, error))
- sys.exit(1)
+ go_version = get_go_version(go_root)
+ if version_geq(go_version, "1.18.0"):
+ execute(
+ "./go install git.fd.io/govpp.git/cmd/binapi-generator@" + c,
+ cwd=go_root + "/bin",
+ )
+ else:
+ os.environ["GO111MODULE"] = "on"
+ execute(
+ "./go get git.fd.io/govpp.git/cmd/binapi-generator@" + c,
+ cwd=go_root + "/bin",
+ )
bg_ver = get_binapi_gen_version(go_path)
- print('Installed binary API generator ' + bg_ver)
+ print("Installed binary API generator " + bg_ver)
# Creates generated bindings using GoVPP binapigen to the target folder
def generate_api(output_dir, vpp_dir, api_list, import_prefix, no_source, go_path):
- output_binapi = output_dir + "binapi" if output_dir[-1] == "/" else output_dir + "/binapi"
json_dir = vpp_dir + "/build-root/install-vpp-native/vpp/share/vpp/api"
if not os.path.exists(json_dir):
@@ -127,25 +126,19 @@ def generate_api(output_dir, vpp_dir, api_list, import_prefix, no_source, go_pat
sys.exit(1)
print("Generating API")
- cmd = ["./binapi-generator", "--output-dir=" + output_binapi, "--input-dir=" + json_dir]
+ cmd = ["./binapi-generator", "--input-dir=" + json_dir]
+ if output_dir:
+ cmd += ["--output-dir=" + output_dir]
if len(api_list):
print("Following API files were requested by 'GO_API_FILES': " + str(api_list))
print("Note that dependency requirements may generate additional API files")
cmd.append(api_list)
- if not import_prefix == "":
+ if import_prefix:
cmd.append("-import-prefix=" + import_prefix)
if no_source:
cmd.append("-no-source-path-info")
- p = subprocess.Popen(cmd, cwd=go_path + "/bin",
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- universal_newlines=True, )
-
- out = p.communicate()[1]
- if p.returncode != 0:
- print("go api generate failed: %d %s" % (p.returncode, out))
- sys.exit(1)
+ _, out = execute(" ".join(cmd), cwd=go_path + "/bin")
# Print nice output of the binapi generator
for msg in out.split():
if "=" in msg:
@@ -153,30 +146,91 @@ def generate_api(output_dir, vpp_dir, api_list, import_prefix, no_source, go_pat
print(msg, end=" ")
print("\n")
- print("Go API bindings were generated to " + output_binapi)
+ print("Go API bindings were generated to " + output_dir)
-def main():
- # project root directory
- root = pathlib.Path(os.path.dirname(os.path.abspath(__file__)))
- vpp_dir: str = root.parent.parent.parent
-
- o, f, c, i = validate_args(vpp_dir, args.output_dir, args.api_files, args.govpp_commit,
- args.import_prefix)
-
+def get_go_variables():
# go specific environment variables
if "GOROOT" in os.environ:
- go_root = os.environ['GOROOT']
+ go_root = os.environ["GOROOT"]
else:
- go_root = os.environ['HOME'] + "/.go"
+ go_binary = shutil.which("go")
+ if go_binary != "":
+ go_binary_dir, _ = os.path.split(go_binary)
+ go_root = os.path.join(go_binary_dir, "..")
+ else:
+ go_root = os.environ["HOME"] + "/.go"
if "GOPATH" in os.environ:
- go_path = os.environ['GOPATH']
+ go_path = os.environ["GOPATH"]
else:
- go_path = os.environ['HOME'] + "/go"
+ go_path = os.environ["HOME"] + "/go"
+
+ return go_root, go_path
+
+def main():
+ # project root directory
+ root = pathlib.Path(os.path.dirname(os.path.abspath(__file__)))
+ vpp_dir = root.parent.parent.parent
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "-govpp-commit",
+ "--govpp-commit",
+ help="GoVPP commit or branch ",
+ default=DefaultGoVppCommit,
+ type=str,
+ )
+ parser.add_argument(
+ "-output-dir",
+ "--output-dir",
+ help="output target directory for generated bindings",
+ type=str,
+ default=os.path.join(vpp_dir, "vppbinapi"),
+ )
+ parser.add_argument(
+ "-api-files",
+ "--api-files",
+ help="api files to generate (without commas)",
+ nargs="+",
+ type=str,
+ default=[],
+ )
+ parser.add_argument(
+ "-import-prefix",
+ "--import-prefix",
+ help="prefix imports in the generated go code",
+ default="",
+ type=str,
+ )
+ parser.add_argument(
+ "-no-source-path-info",
+ "--no-source-path-info",
+ help="disable source path info in generated files",
+ nargs="?",
+ const=True,
+ default=True,
+ )
+ args = parser.parse_args()
+
+ go_root, go_path = get_go_variables()
install_golang(go_root)
- install_binapi_gen(c, go_root, go_path)
- generate_api(o, str(vpp_dir), f, i, args.no_source_path_info, go_path)
+
+ if not (
+ os.path.exists(go_root + "/bin/go") and os.path.isfile(go_root + "/bin/go")
+ ):
+ print(go_root + "/bin/go does not exist")
+ sys.exit(1)
+
+ install_binapi_gen(args.govpp_commit, go_root, go_path)
+ generate_api(
+ args.output_dir,
+ str(vpp_dir),
+ args.api_files,
+ args.import_prefix,
+ args.no_source_path_info,
+ go_path,
+ )
if __name__ == "__main__":
diff --git a/src/tools/vppapigen/generate_json.py b/src/tools/vppapigen/generate_json.py
index 288e519edd2..dc5cf9c1bbf 100755
--- a/src/tools/vppapigen/generate_json.py
+++ b/src/tools/vppapigen/generate_json.py
@@ -16,30 +16,37 @@
import argparse
import pathlib
import subprocess
-BASE_DIR = subprocess.check_output('git rev-parse --show-toplevel',
- shell=True).strip().decode()
-vppapigen_bin = pathlib.Path(
- '%s/src/tools/vppapigen/vppapigen.py' % BASE_DIR).as_posix()
+import vppapigen
+import os
+from multiprocessing import Pool
+
+BASE_DIR = (
+ subprocess.check_output("git rev-parse --show-toplevel", shell=True)
+ .strip()
+ .decode()
+)
src_dir_depth = 3
output_path = pathlib.Path(
- '%s/build-root/install-vpp-native/vpp/share/vpp/api/' % BASE_DIR)
+ "%s/build-root/install-vpp-native/vpp/share/vpp/api/" % BASE_DIR
+)
output_path_debug = pathlib.Path(
- '%s/build-root/install-vpp_debug-native/vpp/share/vpp/api/' % BASE_DIR)
+ "%s/build-root/install-vpp_debug-native/vpp/share/vpp/api/" % BASE_DIR
+)
output_dir_map = {
- 'plugins': 'plugins',
- 'vlibmemory': 'core',
- 'vnet': 'core',
- 'vlib': 'core',
- 'vpp': 'core',
+ "plugins": "plugins",
+ "vlibmemory": "core",
+ "vnet": "core",
+ "vlib": "core",
+ "vpp": "core",
}
def api_search_globs(src_dir):
globs = []
for g in output_dir_map:
- globs.extend(list(src_dir.glob('%s/**/*.api' % g)))
+ globs.extend(list(src_dir.glob("%s/**/*.api" % g)))
return globs
@@ -48,31 +55,30 @@ def api_files(src_dir):
return [x for x in api_search_globs(src_dir)]
-def vppapigen(vppapigen_bin, output_path, src_dir, src_file):
- try:
- subprocess.check_output(
- [vppapigen_bin, '--includedir', src_dir.as_posix(),
- '--input', src_file.as_posix(), 'JSON',
- '--output', '%s/%s/%s.json' % (
- output_path,
- output_dir_map[src_file.as_posix().split('/')[
- src_dir_depth + BASE_DIR.count('/') - 1]],
- src_file.name)])
- except KeyError:
- print('src_file: %s' % src_file)
- raise
+def get_n_parallel(n_parallel):
+ if n_parallel == 0:
+ n_parallel = os.environ.get("MAKE_PARALLEL_JOBS", os.cpu_count())
+ try:
+ n_parallel = int(n_parallel)
+ except ValueError:
+ return os.cpu_count()
+ return n_parallel or os.cpu_count()
def main():
- cliparser = argparse.ArgumentParser(
- description='VPP API JSON definition generator')
- cliparser.add_argument('--srcdir', action='store',
- default='%s/src' % BASE_DIR),
- cliparser.add_argument('--output', action='store',
- help='directory to store files'),
- cliparser.add_argument('--debug-target', action='store_true',
- default=False,
- help="'True' if -debug target"),
+ cliparser = argparse.ArgumentParser(description="VPP API JSON definition generator")
+ cliparser.add_argument("--srcdir", action="store", default="%s/src" % BASE_DIR),
+ cliparser.add_argument("--output", action="store", help="directory to store files"),
+ cliparser.add_argument(
+ "--parallel", type=int, default=0, help="Number of parallel processes"
+ ),
+ cliparser.add_argument(
+ "--debug-target",
+ action="store_true",
+ default=False,
+ help="'True' if -debug target",
+ ),
+
args = cliparser.parse_args()
src_dir = pathlib.Path(args.srcdir)
@@ -86,13 +92,43 @@ def main():
for d in output_dir_map.values():
output_dir.joinpath(d).mkdir(exist_ok=True, parents=True)
- for f in output_dir.glob('**/*.api.json'):
+ for f in output_dir.glob("**/*.api.json"):
f.unlink()
- for f in api_files(src_dir):
- vppapigen(vppapigen_bin, output_dir, src_dir, f)
- print('json files written to: %s/.' % output_dir)
-
-
-if __name__ == '__main__':
+ with Pool(get_n_parallel(args.parallel)) as p:
+ p.map(
+ vppapigen.run_kw_vppapigen,
+ [
+ {
+ "output": "%s/%s/%s.json"
+ % (
+ output_path,
+ output_dir_map[
+ f.as_posix().split("/")[
+ src_dir_depth + BASE_DIR.count("/") - 1
+ ]
+ ],
+ f.name,
+ ),
+ "outputdir": "%s/%s/"
+ % (
+ output_path,
+ output_dir_map[
+ f.as_posix().split("/")[
+ src_dir_depth + BASE_DIR.count("/") - 1
+ ]
+ ],
+ ),
+ "input_file": f.as_posix(),
+ "includedir": [src_dir.as_posix()],
+ "output_module": "JSON",
+ }
+ for f in api_files(src_dir)
+ ],
+ )
+
+ print("json files written to: %s/." % output_dir)
+
+
+if __name__ == "__main__":
main()
diff --git a/src/tools/vppapigen/test_vppapigen.py b/src/tools/vppapigen/test_vppapigen.py
index c454ffc8638..20f6c6da10d 100755
--- a/src/tools/vppapigen/test_vppapigen.py
+++ b/src/tools/vppapigen/test_vppapigen.py
@@ -1,8 +1,7 @@
#!/usr/bin/env python3
import unittest
-from vppapigen import VPPAPI, Option, ParseError, Union, foldup_crcs, \
- global_types
+from vppapigen import VPPAPI, Option, ParseError, Union, foldup_crcs, global_types
import vppapigen
@@ -28,17 +27,17 @@ class TestUnion(unittest.TestCase):
cls.parser = VPPAPI()
def test_union(self):
- test_string = '''
+ test_string = """
union foo_union {
u32 a;
u8 b;
};
- '''
+ """
r = self.parser.parse_string(test_string)
self.assertTrue(isinstance(r[0], Union))
def test_union_vla(self):
- test_string = '''
+ test_string = """
union foo_union_vla {
u32 a;
u8 b[a];
@@ -46,13 +45,13 @@ class TestUnion(unittest.TestCase):
autoreply define foo {
vl_api_foo_union_vla_t v;
};
- '''
+ """
r = self.parser.parse_string(test_string)
self.assertTrue(isinstance(r[0], Union))
self.assertTrue(r[0].vla)
s = self.parser.process(r)
- test_string2 = '''
+ test_string2 = """
union foo_union_vla2 {
u32 a;
u8 b[a];
@@ -61,10 +60,10 @@ class TestUnion(unittest.TestCase):
autoreply define foo2 {
vl_api_foo_union_vla2_t v;
};
- '''
+ """
self.assertRaises(ValueError, self.parser.parse_string, test_string2)
- test_string3 = '''
+ test_string3 = """
union foo_union_vla3 {
u32 a;
u8 b[a];
@@ -73,7 +72,7 @@ class TestUnion(unittest.TestCase):
vl_api_foo_union_vla3_t v;
u32 x;
};
- '''
+ """
self.assertRaises(ValueError, self.parser.parse_string, test_string3)
@@ -83,10 +82,10 @@ class TestTypedef(unittest.TestCase):
cls.parser = VPPAPI()
def test_duplicatetype(self):
- test_string = '''
+ test_string = """
typedef foo1 { u8 dummy; };
typedef foo1 { u8 dummy; };
- '''
+ """
self.assertRaises(KeyError, self.parser.parse_string, test_string)
@@ -96,42 +95,42 @@ class TestDefine(unittest.TestCase):
cls.parser = VPPAPI()
def test_unknowntype(self):
- test_string = 'define foo { foobar foo;};'
+ test_string = "define foo { foobar foo;};"
with self.assertRaises(ParseError) as ctx:
self.parser.parse_string(test_string)
- self.assertIn('Undefined type: foobar', str(ctx.exception))
+ self.assertIn("Undefined type: foobar", str(ctx.exception))
- test_string = 'define { u8 foo;};'
+ test_string = "define { u8 foo;};"
with self.assertRaises(ParseError) as ctx:
self.parser.parse_string(test_string)
def test_flags(self):
- test_string = '''
+ test_string = """
manual_print dont_trace manual_endian define foo { u8 foo; };
define foo_reply {u32 context; i32 retval; };
- '''
+ """
r = self.parser.parse_string(test_string)
self.assertIsNotNone(r)
s = self.parser.process(r)
self.assertIsNotNone(s)
- for d in s['Define']:
- if d.name == 'foo':
+ for d in s["Define"]:
+ if d.name == "foo":
self.assertTrue(d.dont_trace)
self.assertTrue(d.manual_endian)
self.assertTrue(d.manual_print)
self.assertFalse(d.autoreply)
- test_string = '''
+ test_string = """
nonexisting_flag define foo { u8 foo; };
- '''
+ """
with self.assertRaises(ParseError):
self.parser.parse_string(test_string)
def test_options(self):
- test_string = '''
+ test_string = """
define foo { option deprecated; u8 foo; };
define foo_reply {u32 context; i32 retval; };
- '''
+ """
r = self.parser.parse_string(test_string)
self.assertIsNotNone(r)
s = self.parser.process(r)
@@ -144,14 +143,14 @@ class TestService(unittest.TestCase):
cls.parser = VPPAPI()
def test_service(self):
- test_string = '''
+ test_string = """
autoreply define show_version { u8 foo;};
service { rpc show_version returns show_version_reply; };
- '''
+ """
r = self.parser.parse_string(test_string)
s = self.parser.process(r)
- self.assertEqual(s['Service'][0].caller, 'show_version')
- self.assertEqual(s['Service'][0].reply, 'show_version_reply')
+ self.assertEqual(s["Service"][0].caller, "show_version")
+ self.assertEqual(s["Service"][0].reply, "show_version_reply")
def get_crc(apistring, name):
@@ -159,52 +158,52 @@ def get_crc(apistring, name):
parser = vppapigen.VPPAPI()
r = parser.parse_string(apistring)
s = parser.process(r)
- foldup_crcs(s['Define'])
- d = [f for f in s['Define'] if f.name == name]
+ foldup_crcs(s["Define"])
+ d = [f for f in s["Define"] if f.name == name]
return d[0].crc
class TestCRC(unittest.TestCase):
def test_crc(self):
- test_string = '''
+ test_string = """
typedef list { u8 foo; };
autoreply define foo { u8 foo; vl_api_list_t l;};
- '''
- crc = get_crc(test_string, 'foo')
+ """
+ crc = get_crc(test_string, "foo")
# modify underlying type
- test_string = '''
+ test_string = """
typedef list { u8 foo2; };
autoreply define foo { u8 foo; vl_api_list_t l;};
- '''
- crc2 = get_crc(test_string, 'foo')
+ """
+ crc2 = get_crc(test_string, "foo")
self.assertNotEqual(crc, crc2)
# two user-defined types
- test_string = '''
+ test_string = """
typedef address { u8 foo2; };
typedef list { u8 foo2; vl_api_address_t add; };
autoreply define foo { u8 foo; vl_api_list_t l;};
- '''
- crc3 = get_crc(test_string, 'foo')
+ """
+ crc3 = get_crc(test_string, "foo")
- test_string = '''
+ test_string = """
typedef address { u8 foo3; };
typedef list { u8 foo2; vl_api_address_t add; };
autoreply define foo { u8 foo; vl_api_list_t l;};
- '''
- crc4 = get_crc(test_string, 'foo')
+ """
+ crc4 = get_crc(test_string, "foo")
self.assertNotEqual(crc3, crc4)
- test_string = '''
+ test_string = """
typedef address { u8 foo3; };
typedef list { u8 foo2; vl_api_address_t add; u8 foo3; };
autoreply define foo { u8 foo; vl_api_list_t l;};
- '''
- crc5 = get_crc(test_string, 'foo')
+ """
+ crc5 = get_crc(test_string, "foo")
self.assertNotEqual(crc4, crc5)
- test_string = '''
+ test_string = """
typedef ip6_address
{
u8 foo;
@@ -227,11 +226,11 @@ autoreply define sr_policy_add
u32 fib_table;
vl_api_srv6_sid_list_t sids;
};
-'''
+"""
- crc = get_crc(test_string, 'sr_policy_add')
+ crc = get_crc(test_string, "sr_policy_add")
- test_string = '''
+ test_string = """
typedef ip6_address
{
u8 foo;
@@ -253,14 +252,13 @@ autoreply define sr_policy_add
u32 fib_table;
vl_api_srv6_sid_list_t sids;
};
-'''
- crc2 = get_crc(test_string, 'sr_policy_add')
+"""
+ crc2 = get_crc(test_string, "sr_policy_add")
self.assertNotEqual(crc, crc2)
class TestEnum(unittest.TestCase):
-
@classmethod
def setUpClass(cls):
cls.parser = VPPAPI()
@@ -278,8 +276,8 @@ enum tunnel_mode : u8
r = self.parser.parse_string(test_string)
self.assertIsNotNone(r)
s = self.parser.process(r)
- for o in s['types']:
- if o.type == 'Enum':
+ for o in s["types"]:
+ if o.type == "Enum":
self.assertEqual(o.name, "tunnel_mode")
break
else:
@@ -298,8 +296,8 @@ enum virtio_flags {
r = self.parser.parse_string(test_string)
self.assertIsNotNone(r)
s = self.parser.process(r)
- for o in s['types']:
- if o.type == 'Enum':
+ for o in s["types"]:
+ if o.type == "Enum":
self.assertEqual(o.name, "virtio_flags")
break
else:
@@ -307,7 +305,6 @@ enum virtio_flags {
class TestEnumFlag(unittest.TestCase):
-
@classmethod
def setUpClass(cls):
cls.parser = VPPAPI()
@@ -326,8 +323,9 @@ enumflag tunnel_mode_ef : u8
with self.assertRaises(TypeError) as ctx:
r = self.parser.parse_string(test_string)
- self.assertTrue(str(ctx.exception).startswith(
- 'tunnel_mode_ef is not a flag enum.'))
+ self.assertTrue(
+ str(ctx.exception).startswith("tunnel_mode_ef is not a flag enum.")
+ )
def test_enumflag_as_enumflag(self):
test_string = """\
@@ -342,13 +340,13 @@ enumflag virtio_flags_ef {
r = self.parser.parse_string(test_string)
self.assertIsNotNone(r)
s = self.parser.process(r)
- for o in s['types']:
- if o.type == 'EnumFlag':
+ for o in s["types"]:
+ if o.type == "EnumFlag":
self.assertEqual(o.name, "virtio_flags_ef")
break
else:
self.fail()
-if __name__ == '__main__':
+if __name__ == "__main__":
unittest.main(verbosity=2)
diff --git a/src/tools/vppapigen/vppapigen.py b/src/tools/vppapigen/vppapigen.py
index 8415c28fb7b..2b0ce9999d7 100755
--- a/src/tools/vppapigen/vppapigen.py
+++ b/src/tools/vppapigen/vppapigen.py
@@ -10,9 +10,10 @@ from subprocess import Popen, PIPE
import ply.lex as lex
import ply.yacc as yacc
-assert sys.version_info >= (3, 5), \
- "Not supported Python version: {}".format(sys.version)
-log = logging.getLogger('vppapigen')
+assert sys.version_info >= (3, 5), "Not supported Python version: {}".format(
+ sys.version
+)
+log = logging.getLogger("vppapigen")
# Ensure we don't leave temporary files around
sys.dont_write_bytecode = True
@@ -28,11 +29,10 @@ seen_imports = {}
def global_type_add(name, obj):
- '''Add new type to the dictionary of types '''
- type_name = 'vl_api_' + name + '_t'
+ """Add new type to the dictionary of types"""
+ type_name = "vl_api_" + name + "_t"
if type_name in global_types:
- raise KeyError("Attempted redefinition of {!r} with {!r}.".format(
- name, obj))
+ raise KeyError("Attempted redefinition of {!r} with {!r}.".format(name, obj))
global_types[type_name] = obj
@@ -49,104 +49,105 @@ class VPPAPILexer:
self.filename = filename
reserved = {
- 'service': 'SERVICE',
- 'rpc': 'RPC',
- 'returns': 'RETURNS',
- 'null': 'NULL',
- 'stream': 'STREAM',
- 'events': 'EVENTS',
- 'define': 'DEFINE',
- 'typedef': 'TYPEDEF',
- 'enum': 'ENUM',
- 'enumflag': 'ENUMFLAG',
- 'typeonly': 'TYPEONLY',
- 'manual_print': 'MANUAL_PRINT',
- 'manual_endian': 'MANUAL_ENDIAN',
- 'dont_trace': 'DONT_TRACE',
- 'autoreply': 'AUTOREPLY',
- 'autoendian': 'AUTOENDIAN',
- 'option': 'OPTION',
- 'u8': 'U8',
- 'u16': 'U16',
- 'u32': 'U32',
- 'u64': 'U64',
- 'i8': 'I8',
- 'i16': 'I16',
- 'i32': 'I32',
- 'i64': 'I64',
- 'f64': 'F64',
- 'bool': 'BOOL',
- 'string': 'STRING',
- 'import': 'IMPORT',
- 'true': 'TRUE',
- 'false': 'FALSE',
- 'union': 'UNION',
- 'counters': 'COUNTERS',
- 'paths': 'PATHS',
- 'units': 'UNITS',
- 'severity': 'SEVERITY',
- 'type': 'TYPE',
- 'description': 'DESCRIPTION',
+ "service": "SERVICE",
+ "rpc": "RPC",
+ "returns": "RETURNS",
+ "null": "NULL",
+ "stream": "STREAM",
+ "events": "EVENTS",
+ "define": "DEFINE",
+ "typedef": "TYPEDEF",
+ "enum": "ENUM",
+ "enumflag": "ENUMFLAG",
+ "typeonly": "TYPEONLY",
+ "manual_print": "MANUAL_PRINT",
+ "manual_endian": "MANUAL_ENDIAN",
+ "dont_trace": "DONT_TRACE",
+ "autoreply": "AUTOREPLY",
+ "autoendian": "AUTOENDIAN",
+ "option": "OPTION",
+ "u8": "U8",
+ "u16": "U16",
+ "u32": "U32",
+ "u64": "U64",
+ "i8": "I8",
+ "i16": "I16",
+ "i32": "I32",
+ "i64": "I64",
+ "f64": "F64",
+ "bool": "BOOL",
+ "string": "STRING",
+ "import": "IMPORT",
+ "true": "TRUE",
+ "false": "FALSE",
+ "union": "UNION",
+ "counters": "COUNTERS",
+ "paths": "PATHS",
+ "units": "UNITS",
+ "severity": "SEVERITY",
+ "type": "TYPE",
+ "description": "DESCRIPTION",
}
- tokens = ['STRING_LITERAL',
- 'ID', 'NUM'] + list(reserved.values())
+ tokens = ["STRING_LITERAL", "COMMENT", "ID", "NUM"] + list(reserved.values())
- t_ignore_LINE_COMMENT = '//.*'
+ t_ignore_LINE_COMMENT = "//.*"
def t_FALSE(self, t):
- r'false'
+ r"false"
t.value = False
return t
def t_TRUE(self, t):
- r'false'
+ r"false"
t.value = True
return t
def t_NUM(self, t):
- r'0[xX][0-9a-fA-F]+|-?\d+\.?\d*'
- base = 16 if t.value.startswith('0x') else 10
- if '.' in t.value:
+ r"0[xX][0-9a-fA-F]+|-?\d+\.?\d*"
+ base = 16 if t.value.startswith("0x") else 10
+ if "." in t.value:
t.value = float(t.value)
else:
t.value = int(t.value, base)
return t
def t_ID(self, t):
- r'[a-zA-Z_][a-zA-Z_0-9]*'
+ r"[a-zA-Z_][a-zA-Z_0-9]*"
# Check for reserved words
- t.type = VPPAPILexer.reserved.get(t.value, 'ID')
+ t.type = VPPAPILexer.reserved.get(t.value, "ID")
return t
# C string
def t_STRING_LITERAL(self, t):
- r'\"([^\\\n]|(\\.))*?\"'
- t.value = str(t.value).replace("\"", "")
+ r"\"([^\\\n]|(\\.))*?\" "
+ t.value = str(t.value).replace('"', "")
return t
# C or C++ comment (ignore)
- def t_comment(self, t):
- r'(/\*(.|\n)*?\*/)|(//.*)'
- t.lexer.lineno += t.value.count('\n')
+ def t_COMMENT(self, t):
+ r"(/\*(.|\n)*?\*/)|(//.*)"
+ t.lexer.lineno += t.value.count("\n")
+ return t
# Error handling rule
def t_error(self, t):
- raise ParseError("Illegal character '{}' ({})"
- "in {}: line {}".format(t.value[0],
- hex(ord(t.value[0])),
- self.filename,
- t.lexer.lineno))
+ raise ParseError(
+ "Illegal character '{}' ({})"
+ "in {}: line {}".format(
+ t.value[0], hex(ord(t.value[0])), self.filename, t.lexer.lineno
+ )
+ )
# Define a rule so we can track line numbers
def t_newline(self, t):
- r'\n+'
+ r"\n+"
t.lexer.lineno += len(t.value)
literals = ":{}[];=.,"
# A string containing ignored characters (spaces and tabs)
- t_ignore = ' \t'
+ t_ignore = " \t"
def vla_mark_length_field(block):
@@ -164,23 +165,25 @@ def vla_is_last_check(name, block):
vla = True
if i + 1 < len(block):
raise ValueError(
- 'VLA field "{}" must be the last field in message "{}"'
- .format(b.fieldname, name))
- elif b.fieldtype.startswith('vl_api_'):
+ 'VLA field "{}" must be the last field in message "{}"'.format(
+ b.fieldname, name
+ )
+ )
+ elif b.fieldtype.startswith("vl_api_"):
if global_types[b.fieldtype].vla:
vla = True
if i + 1 < len(block):
raise ValueError(
'VLA field "{}" must be the last '
- 'field in message "{}"'
- .format(b.fieldname, name))
- elif b.fieldtype == 'string' and b.length == 0:
+ 'field in message "{}"'.format(b.fieldname, name)
+ )
+ elif b.fieldtype == "string" and b.length == 0:
vla = True
if i + 1 < len(block):
raise ValueError(
'VLA field "{}" must be the last '
- 'field in message "{}"'
- .format(b.fieldname, name))
+ 'field in message "{}"'.format(b.fieldname, name)
+ )
return vla
@@ -192,10 +195,9 @@ class Processable:
class Service(Processable):
- type = 'Service'
+ type = "Service"
- def __init__(self, caller, reply, events=None, stream_message=None,
- stream=False):
+ def __init__(self, caller, reply, events=None, stream_message=None, stream=False):
self.caller = caller
self.reply = reply
self.stream = stream
@@ -204,7 +206,7 @@ class Service(Processable):
class Typedef(Processable):
- type = 'Typedef'
+ type = "Typedef"
def __init__(self, name, flags, block):
self.name = name
@@ -214,9 +216,9 @@ class Typedef(Processable):
self.manual_print = False
self.manual_endian = False
for f in flags:
- if f == 'manual_print':
+ if f == "manual_print":
self.manual_print = True
- elif f == 'manual_endian':
+ elif f == "manual_endian":
self.manual_endian = True
global_type_add(name, self)
@@ -224,14 +226,14 @@ class Typedef(Processable):
vla_mark_length_field(self.block)
def process(self, result):
- result['types'].append(self)
+ result["types"].append(self)
def __repr__(self):
return self.name + str(self.flags) + str(self.block)
class Using(Processable):
- type = 'Using'
+ type = "Using"
def __init__(self, name, flags, alias):
self.name = name
@@ -243,16 +245,15 @@ class Using(Processable):
self.manual_print = False
self.manual_endian = False
for f in flags:
- if f == 'manual_print':
+ if f == "manual_print":
self.manual_print = True
- elif f == 'manual_endian':
+ elif f == "manual_endian":
self.manual_endian = True
if isinstance(alias, Array):
- a = {'type': alias.fieldtype,
- 'length': alias.length}
+ a = {"type": alias.fieldtype, "length": alias.length}
else:
- a = {'type': alias.fieldtype}
+ a = {"type": alias.fieldtype}
self.alias = a
self.using = alias
@@ -265,14 +266,14 @@ class Using(Processable):
global_type_add(name, self)
def process(self, result): # -> Dict
- result['types'].append(self)
+ result["types"].append(self)
def __repr__(self):
return self.name + str(self.alias)
class Union(Processable):
- type = 'Union'
+ type = "Union"
def __init__(self, name, flags, block):
self.manual_print = False
@@ -280,9 +281,9 @@ class Union(Processable):
self.name = name
for f in flags:
- if f == 'manual_print':
+ if f == "manual_print":
self.manual_print = True
- elif f == 'manual_endian':
+ elif f == "manual_endian":
self.manual_endian = True
self.block = block
@@ -292,16 +293,16 @@ class Union(Processable):
global_type_add(name, self)
def process(self, result):
- result['types'].append(self)
+ result["types"].append(self)
def __repr__(self):
return str(self.block)
class Define(Processable):
- type = 'Define'
+ type = "Define"
- def __init__(self, name, flags, block):
+ def __init__(self, name, flags, block, comment=None):
self.name = name
self.flags = flags
self.block = block
@@ -311,16 +312,17 @@ class Define(Processable):
self.autoreply = False
self.autoendian = 0
self.options = {}
+ self.comment = comment
for f in flags:
- if f == 'dont_trace':
+ if f == "dont_trace":
self.dont_trace = True
- elif f == 'manual_print':
+ elif f == "manual_print":
self.manual_print = True
- elif f == 'manual_endian':
+ elif f == "manual_endian":
self.manual_endian = True
- elif f == 'autoreply':
+ elif f == "autoreply":
self.autoreply = True
- elif f == 'autoendian':
+ elif f == "autoendian":
self.autoendian = 1
remove = []
@@ -337,12 +339,11 @@ class Define(Processable):
self.crc = str(block).encode()
def autoreply_block(self, name, parent):
- block = [Field('u32', 'context'),
- Field('i32', 'retval')]
+ block = [Field("u32", "context"), Field("i32", "retval")]
# inherit the parent's options
for k, v in parent.options.items():
block.append(Option(k, v))
- return Define(name + '_reply', [], block)
+ return Define(name + "_reply", [], block)
def process(self, result): # -> Dict
tname = self.__class__.__name__
@@ -355,61 +356,64 @@ class Define(Processable):
class Enum(Processable):
- type = 'Enum'
+ type = "Enum"
- def __init__(self, name, block, enumtype='u32'):
+ def __init__(self, name, block, enumtype="u32"):
self.name = name
self.enumtype = enumtype
self.vla = False
self.manual_print = False
- count = 0
+ count = -1
block2 = []
block3 = []
bc_set = False
for b in block:
- if 'value' in b:
- count = b['value']
+ if "value" in b:
+ count = b["value"]
else:
count += 1
- block2.append([b['id'], count])
+ block2.append([b["id"], count])
try:
- if b['option']['backwards_compatible']:
+ if b["option"]["backwards_compatible"]:
pass
bc_set = True
except KeyError:
- block3.append([b['id'], count])
+ block3.append([b["id"], count])
if bc_set:
- raise ValueError("Backward compatible enum must "
- "be last {!r} {!r}"
- .format(name, b['id']))
+ raise ValueError(
+ "Backward compatible enum must "
+ "be last {!r} {!r}".format(name, b["id"])
+ )
self.block = block2
self.crc = str(block3).encode()
global_type_add(name, self)
def process(self, result):
- result['types'].append(self)
+ result["types"].append(self)
def __repr__(self):
return self.name + str(self.block)
class EnumFlag(Enum):
- type = 'EnumFlag'
+ type = "EnumFlag"
- def __init__(self, name, block, enumtype='u32'):
+ def __init__(self, name, block, enumtype="u32"):
super(EnumFlag, self).__init__(name, block, enumtype)
for b in self.block:
if bin(b[1])[2:].count("1") > 1:
- raise TypeError("%s is not a flag enum. No element in a "
- "flag enum may have more than a "
- "single bit set." % self.name)
+ raise TypeError(
+ "%s is not a flag enum. No element in a "
+ "flag enum may have more than a "
+ "single bit set." % self.name
+ )
class Import(Processable):
- type = 'Import'
+ type = "Import"
_initialized = False
def __new__(cls, *args, **kwargs):
@@ -440,7 +444,7 @@ class Import(Processable):
class Option(Processable):
- type = 'Option'
+ type = "Option"
def __init__(self, option, value=None):
self.option = option
@@ -458,7 +462,7 @@ class Option(Processable):
class Array(Processable):
- type = 'Array'
+ type = "Array"
def __init__(self, fieldtype, name, length, modern_vla=False):
self.fieldtype = fieldtype
@@ -474,12 +478,11 @@ class Array(Processable):
self.vla = False
def __repr__(self):
- return str([self.fieldtype, self.fieldname, self.length,
- self.lengthfield])
+ return str([self.fieldtype, self.fieldname, self.length, self.lengthfield])
class Field(Processable):
- type = 'Field'
+ type = "Field"
def __init__(self, fieldtype, name, limit=None):
# limit field has been expanded to an options dict.
@@ -487,13 +490,14 @@ class Field(Processable):
self.fieldtype = fieldtype
self.is_lengthfield = False
- if self.fieldtype == 'string':
- raise ValueError("The string type {!r} is an "
- "array type ".format(name))
+ if self.fieldtype == "string":
+ raise ValueError("The string type {!r} is an " "array type ".format(name))
if name in keyword.kwlist:
- raise ValueError("Fieldname {!r} is a python keyword and is not "
- "accessible via the python API. ".format(name))
+ raise ValueError(
+ "Fieldname {!r} is a python keyword and is not "
+ "accessible via the python API. ".format(name)
+ )
self.fieldname = name
self.limit = limit
@@ -502,35 +506,34 @@ class Field(Processable):
class Counter(Processable):
- type = 'Counter'
+ type = "Counter"
def __init__(self, path, counter):
self.name = path
self.block = counter
def process(self, result): # -> Dict
- result['Counters'].append(self)
+ result["Counters"].append(self)
class Paths(Processable):
- type = 'Paths'
+ type = "Paths"
def __init__(self, pathset):
self.paths = pathset
def __repr__(self):
- return "%s(paths=%s)" % (
- self.__class__.__name__, self.paths
- )
+ return "%s(paths=%s)" % (self.__class__.__name__, self.paths)
class Coord:
- """ Coordinates of a syntactic element. Consists of:
- - File name
- - Line number
- - (optional) column number, for the Lexer
+ """Coordinates of a syntactic element. Consists of:
+ - File name
+ - Line number
+ - (optional) column number, for the Lexer
"""
- __slots__ = ('file', 'line', 'column', '__weakref__')
+
+ __slots__ = ("file", "line", "column", "__weakref__")
def __init__(self, file, line, column=None):
self.file = file
@@ -559,6 +562,7 @@ class VPPAPIParser:
self.logger = logger
self.fields = []
self.revision = revision
+ self.last_comment = None
def _parse_error(self, msg, coord):
raise ParseError("%s: %s" % (coord, msg))
@@ -568,49 +572,48 @@ class VPPAPIParser:
self.logger.warning("%s: %s" % (coord, msg))
def _coord(self, lineno, column=None):
- return Coord(
- file=self.filename,
- line=lineno, column=column)
+ return Coord(file=self.filename, line=lineno, column=column)
def _token_coord(self, p, token_idx):
- """ Returns the coordinates for the YaccProduction object 'p' indexed
- with 'token_idx'. The coordinate includes the 'lineno' and
- 'column'. Both follow the lex semantic, starting from 1.
+ """Returns the coordinates for the YaccProduction object 'p' indexed
+ with 'token_idx'. The coordinate includes the 'lineno' and
+ 'column'. Both follow the lex semantic, starting from 1.
"""
- last_cr = p.lexer.lexdata.rfind('\n', 0, p.lexpos(token_idx))
+ last_cr = p.lexer.lexdata.rfind("\n", 0, p.lexpos(token_idx))
if last_cr < 0:
last_cr = -1
- column = (p.lexpos(token_idx) - (last_cr))
+ column = p.lexpos(token_idx) - (last_cr)
return self._coord(p.lineno(token_idx), column)
def p_slist(self, p):
- '''slist : stmt
- | slist stmt'''
+ """slist : stmt
+ | slist stmt"""
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1] + [p[2]]
def p_stmt(self, p):
- '''stmt : define
- | typedef
- | option
- | import
- | enum
- | enumflag
- | union
- | service
- | paths
- | counters'''
+ """stmt : define
+ | typedef
+ | option
+ | import
+ | enum
+ | enumflag
+ | union
+ | service
+ | paths
+ | comment
+ | counters"""
p[0] = p[1]
def p_import(self, p):
- '''import : IMPORT STRING_LITERAL ';' '''
+ """import : IMPORT STRING_LITERAL ';'"""
p[0] = Import(p[2], revision=self.revision)
def p_path_elements(self, p):
- '''path_elements : path_element
- | path_elements path_element'''
+ """path_elements : path_element
+ | path_elements path_element"""
if len(p) == 2:
p[0] = p[1]
else:
@@ -620,20 +623,20 @@ class VPPAPIParser:
p[0] = p[1] + [p[2]]
def p_path_element(self, p):
- '''path_element : STRING_LITERAL STRING_LITERAL ';' '''
- p[0] = {'path': p[1], 'counter': p[2]}
+ """path_element : STRING_LITERAL STRING_LITERAL ';'"""
+ p[0] = {"path": p[1], "counter": p[2]}
def p_paths(self, p):
- '''paths : PATHS '{' path_elements '}' ';' '''
+ """paths : PATHS '{' path_elements '}' ';'"""
p[0] = Paths(p[3])
def p_counters(self, p):
- '''counters : COUNTERS ID '{' counter_elements '}' ';' '''
+ """counters : COUNTERS ID '{' counter_elements '}' ';'"""
p[0] = Counter(p[2], p[4])
def p_counter_elements(self, p):
- '''counter_elements : counter_element
- | counter_elements counter_element'''
+ """counter_elements : counter_element
+ | counter_elements counter_element"""
if len(p) == 2:
p[0] = [p[1]]
else:
@@ -643,46 +646,47 @@ class VPPAPIParser:
p[0] = p[1] + [p[2]]
def p_counter_element(self, p):
- '''counter_element : ID '{' counter_statements '}' ';' '''
- p[0] = {**{'name': p[1]}, **p[3]}
+ """counter_element : ID '{' counter_statements '}' ';'"""
+ p[0] = {**{"name": p[1]}, **p[3]}
def p_counter_statements(self, p):
- '''counter_statements : counter_statement
- | counter_statements counter_statement'''
+ """counter_statements : counter_statement
+ | counter_statements counter_statement"""
if len(p) == 2:
p[0] = p[1]
else:
p[0] = {**p[1], **p[2]}
def p_counter_statement(self, p):
- '''counter_statement : SEVERITY ID ';'
- | UNITS STRING_LITERAL ';'
- | DESCRIPTION STRING_LITERAL ';'
- | TYPE ID ';' '''
+ """counter_statement : SEVERITY ID ';'
+ | UNITS STRING_LITERAL ';'
+ | DESCRIPTION STRING_LITERAL ';'
+ | TYPE ID ';'"""
p[0] = {p[1]: p[2]}
def p_service(self, p):
- '''service : SERVICE '{' service_statements '}' ';' '''
+ """service : SERVICE '{' service_statements '}' ';'"""
p[0] = p[3]
def p_service_statements(self, p):
- '''service_statements : service_statement
- | service_statements service_statement'''
+ """service_statements : service_statement
+ | service_statements service_statement"""
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1] + [p[2]]
def p_service_statement(self, p):
- '''service_statement : RPC ID RETURNS NULL ';'
- | RPC ID RETURNS ID ';'
- | RPC ID RETURNS STREAM ID ';'
- | RPC ID RETURNS ID EVENTS event_list ';' '''
+ """service_statement : RPC ID RETURNS NULL ';'
+ | RPC ID RETURNS ID ';'
+ | RPC ID RETURNS STREAM ID ';'
+ | RPC ID RETURNS ID EVENTS event_list ';'"""
if p[2] == p[4]:
# Verify that caller and reply differ
self._parse_error(
- 'Reply ID ({}) should not be equal to Caller ID'.format(p[2]),
- self._token_coord(p, 1))
+ "Reply ID ({}) should not be equal to Caller ID".format(p[2]),
+ self._token_coord(p, 1),
+ )
if len(p) == 8:
p[0] = Service(p[2], p[4], p[6])
elif len(p) == 7:
@@ -691,287 +695,300 @@ class VPPAPIParser:
p[0] = Service(p[2], p[4])
def p_service_statement2(self, p):
- '''service_statement : RPC ID RETURNS ID STREAM ID ';' '''
+ """service_statement : RPC ID RETURNS ID STREAM ID ';'"""
p[0] = Service(p[2], p[4], stream_message=p[6], stream=True)
def p_event_list(self, p):
- '''event_list : events
- | event_list events '''
+ """event_list : events
+ | event_list events"""
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1] + [p[2]]
def p_event(self, p):
- '''events : ID
- | ID ',' '''
+ """events : ID
+ | ID ','"""
p[0] = p[1]
def p_enum(self, p):
- '''enum : ENUM ID '{' enum_statements '}' ';' '''
+ """enum : ENUM ID '{' enum_statements '}' ';'"""
p[0] = Enum(p[2], p[4])
def p_enum_type(self, p):
- ''' enum : ENUM ID ':' enum_size '{' enum_statements '}' ';' '''
+ """enum : ENUM ID ':' enum_size '{' enum_statements '}' ';'"""
if len(p) == 9:
p[0] = Enum(p[2], p[6], enumtype=p[4])
else:
p[0] = Enum(p[2], p[4])
def p_enumflag(self, p):
- '''enumflag : ENUMFLAG ID '{' enum_statements '}' ';' '''
+ """enumflag : ENUMFLAG ID '{' enum_statements '}' ';'"""
p[0] = EnumFlag(p[2], p[4])
def p_enumflag_type(self, p):
- ''' enumflag : ENUMFLAG ID ':' enumflag_size '{' enum_statements '}' ';' ''' # noqa : E502
+ """enumflag : ENUMFLAG ID ':' enumflag_size '{' enum_statements '}' ';'""" # noqa : E502
if len(p) == 9:
p[0] = EnumFlag(p[2], p[6], enumtype=p[4])
else:
p[0] = EnumFlag(p[2], p[4])
def p_enum_size(self, p):
- ''' enum_size : U8
- | U16
- | U32
- | I8
- | I16
- | I32 '''
+ """enum_size : U8
+ | U16
+ | U32
+ | I8
+ | I16
+ | I32"""
p[0] = p[1]
def p_enumflag_size(self, p):
- ''' enumflag_size : U8
- | U16
- | U32 '''
+ """enumflag_size : U8
+ | U16
+ | U32"""
p[0] = p[1]
def p_define(self, p):
- '''define : DEFINE ID '{' block_statements_opt '}' ';' '''
+ """define : DEFINE ID '{' block_statements_opt '}' ';'"""
self.fields = []
- p[0] = Define(p[2], [], p[4])
+ p[0] = Define(p[2], [], p[4], self.last_comment)
+ self.last_comment = None
def p_define_flist(self, p):
- '''define : flist DEFINE ID '{' block_statements_opt '}' ';' '''
+ """define : flist DEFINE ID '{' block_statements_opt '}' ';'"""
# Legacy typedef
- if 'typeonly' in p[1]:
- self._parse_error('legacy typedef. use typedef: {} {}[{}];'
- .format(p[1], p[2], p[4]),
- self._token_coord(p, 1))
+ if "typeonly" in p[1]:
+ self._parse_error(
+ "legacy typedef. use typedef: {} {}[{}];".format(p[1], p[2], p[4]),
+ self._token_coord(p, 1),
+ )
else:
- p[0] = Define(p[3], p[1], p[5])
+ p[0] = Define(p[3], p[1], p[5], self.last_comment)
+ self.last_comment = None
def p_flist(self, p):
- '''flist : flag
- | flist flag'''
+ """flist : flag
+ | flist flag"""
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1] + [p[2]]
def p_flag(self, p):
- '''flag : MANUAL_PRINT
- | MANUAL_ENDIAN
- | DONT_TRACE
- | TYPEONLY
- | AUTOENDIAN
- | AUTOREPLY'''
+ """flag : MANUAL_PRINT
+ | MANUAL_ENDIAN
+ | DONT_TRACE
+ | TYPEONLY
+ | AUTOENDIAN
+ | AUTOREPLY"""
if len(p) == 1:
return
p[0] = p[1]
def p_typedef(self, p):
- '''typedef : TYPEDEF ID '{' block_statements_opt '}' ';' '''
+ """typedef : TYPEDEF ID '{' block_statements_opt '}' ';'"""
p[0] = Typedef(p[2], [], p[4])
def p_typedef_flist(self, p):
- '''typedef : flist TYPEDEF ID '{' block_statements_opt '}' ';' '''
+ """typedef : flist TYPEDEF ID '{' block_statements_opt '}' ';'"""
p[0] = Typedef(p[3], p[1], p[5])
def p_typedef_alias(self, p):
- '''typedef : TYPEDEF declaration '''
+ """typedef : TYPEDEF declaration"""
p[0] = Using(p[2].fieldname, [], p[2])
def p_typedef_alias_flist(self, p):
- '''typedef : flist TYPEDEF declaration '''
+ """typedef : flist TYPEDEF declaration"""
p[0] = Using(p[3].fieldname, p[1], p[3])
def p_block_statements_opt(self, p):
- '''block_statements_opt : block_statements '''
+ """block_statements_opt : block_statements"""
p[0] = p[1]
def p_block_statements(self, p):
- '''block_statements : block_statement
- | block_statements block_statement'''
+ """block_statements : block_statement
+ | block_statements block_statement"""
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1] + [p[2]]
def p_block_statement(self, p):
- '''block_statement : declaration
- | option '''
+ """block_statement : declaration
+ | option"""
p[0] = p[1]
def p_enum_statements(self, p):
- '''enum_statements : enum_statement
- | enum_statements enum_statement'''
+ """enum_statements : enum_statement
+ | enum_statements enum_statement"""
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1] + [p[2]]
def p_enum_statement(self, p):
- '''enum_statement : ID '=' NUM ','
- | ID ','
- | ID '[' field_options ']' ','
- | ID '=' NUM '[' field_options ']' ',' '''
+ """enum_statement : ID '=' NUM ','
+ | ID ','
+ | ID '[' field_options ']' ','
+ | ID '=' NUM '[' field_options ']' ','"""
if len(p) == 3:
- p[0] = {'id': p[1]}
+ p[0] = {"id": p[1]}
elif len(p) == 5:
- p[0] = {'id': p[1], 'value': p[3]}
+ p[0] = {"id": p[1], "value": p[3]}
elif len(p) == 6:
- p[0] = {'id': p[1], 'option': p[3]}
+ p[0] = {"id": p[1], "option": p[3]}
elif len(p) == 8:
- p[0] = {'id': p[1], 'value': p[3], 'option': p[5]}
+ p[0] = {"id": p[1], "value": p[3], "option": p[5]}
else:
- self._parse_error('ERROR', self._token_coord(p, 1))
+ self._parse_error("ERROR", self._token_coord(p, 1))
def p_field_options(self, p):
- '''field_options : field_option
- | field_options field_option'''
+ """field_options : field_option
+ | field_options field_option"""
if len(p) == 2:
p[0] = p[1]
else:
p[0] = {**p[1], **p[2]}
def p_field_option(self, p):
- '''field_option : ID
- | ID '=' assignee ','
- | ID '=' assignee
+ """field_option : ID
+ | ID '=' assignee ','
+ | ID '=' assignee
- '''
+ """
if len(p) == 2:
p[0] = {p[1]: None}
else:
p[0] = {p[1]: p[3]}
def p_variable_name(self, p):
- '''variable_name : ID
- | TYPE
- | SEVERITY
- | DESCRIPTION
- | COUNTERS
- | PATHS
- '''
+ """variable_name : ID
+ | TYPE
+ | SEVERITY
+ | DESCRIPTION
+ | COUNTERS
+ | PATHS
+ """
p[0] = p[1]
+ def p_comment(self, p):
+ """comment : COMMENT"""
+ self.last_comment = p[1]
+ p[0] = []
+
def p_declaration(self, p):
- '''declaration : type_specifier variable_name ';'
- | type_specifier variable_name '[' field_options ']' ';'
- '''
+ """declaration : type_specifier variable_name ';'
+ | type_specifier variable_name '[' field_options ']' ';'
+ """
if len(p) == 7:
p[0] = Field(p[1], p[2], p[4])
elif len(p) == 4:
p[0] = Field(p[1], p[2])
else:
- self._parse_error('ERROR', self._token_coord(p, 1))
+ self._parse_error("ERROR", self._token_coord(p, 1))
self.fields.append(p[2])
def p_declaration_array_vla(self, p):
- '''declaration : type_specifier variable_name '[' ']' ';' '''
+ """declaration : type_specifier variable_name '[' ']' ';'"""
p[0] = Array(p[1], p[2], 0, modern_vla=True)
def p_declaration_array(self, p):
- '''declaration : type_specifier variable_name '[' NUM ']' ';'
- | type_specifier variable_name '[' ID ']' ';' '''
+ """declaration : type_specifier variable_name '[' NUM ']' ';'
+ | type_specifier variable_name '[' ID ']' ';'"""
if len(p) != 7:
return self._parse_error(
- 'array: %s' % p.value,
- self._coord(lineno=p.lineno))
+ "array: %s" % p.value, self._coord(lineno=p.lineno)
+ )
# Make this error later
if type(p[4]) is int and p[4] == 0:
# XXX: Line number is wrong
- self._parse_warning('Old Style VLA: {} {}[{}];'
- .format(p[1], p[2], p[4]),
- self._token_coord(p, 1))
+ self._parse_warning(
+ "Old Style VLA: {} {}[{}];".format(p[1], p[2], p[4]),
+ self._token_coord(p, 1),
+ )
if type(p[4]) is str and p[4] not in self.fields:
# Verify that length field exists
- self._parse_error('Missing length field: {} {}[{}];'
- .format(p[1], p[2], p[4]),
- self._token_coord(p, 1))
+ self._parse_error(
+ "Missing length field: {} {}[{}];".format(p[1], p[2], p[4]),
+ self._token_coord(p, 1),
+ )
p[0] = Array(p[1], p[2], p[4])
def p_option(self, p):
- '''option : OPTION ID '=' assignee ';'
- | OPTION ID ';' '''
+ """option : OPTION ID '=' assignee ';'
+ | OPTION ID ';'"""
if len(p) == 4:
p[0] = Option(p[2])
else:
p[0] = Option(p[2], p[4])
def p_assignee(self, p):
- '''assignee : NUM
- | TRUE
- | FALSE
- | STRING_LITERAL '''
+ """assignee : NUM
+ | TRUE
+ | FALSE
+ | STRING_LITERAL"""
p[0] = p[1]
def p_type_specifier(self, p):
- '''type_specifier : U8
- | U16
- | U32
- | U64
- | I8
- | I16
- | I32
- | I64
- | F64
- | BOOL
- | STRING'''
+ """type_specifier : U8
+ | U16
+ | U32
+ | U64
+ | I8
+ | I16
+ | I32
+ | I64
+ | F64
+ | BOOL
+ | STRING"""
p[0] = p[1]
# Do a second pass later to verify that user defined types are defined
def p_typedef_specifier(self, p):
- '''type_specifier : ID '''
+ """type_specifier : ID"""
if p[1] not in global_types:
- self._parse_error('Undefined type: {}'.format(p[1]),
- self._token_coord(p, 1))
+ self._parse_error(
+ "Undefined type: {}".format(p[1]), self._token_coord(p, 1)
+ )
p[0] = p[1]
def p_union(self, p):
- '''union : UNION ID '{' block_statements_opt '}' ';' '''
+ """union : UNION ID '{' block_statements_opt '}' ';'"""
p[0] = Union(p[2], [], p[4])
def p_union_flist(self, p):
- '''union : flist UNION ID '{' block_statements_opt '}' ';' '''
+ """union : flist UNION ID '{' block_statements_opt '}' ';'"""
p[0] = Union(p[3], p[1], p[5])
# Error rule for syntax errors
def p_error(self, p):
if p:
- self._parse_error(
- 'before: %s' % p.value,
- self._coord(lineno=p.lineno))
+ if p.type == "COMMENT":
+ self.parser.errok()
+ return
+ self._parse_error("before: %s" % p.value, self._coord(lineno=p.lineno))
else:
- self._parse_error('At end of input', self.filename)
+ self._parse_error("At end of input", self.filename)
+ def build(self, **kwargs):
+ self.parser = yacc.yacc(module=self, **kwargs)
-class VPPAPI():
- def __init__(self, debug=False, filename='', logger=None, revision=None):
+class VPPAPI:
+ def __init__(self, debug=False, filename="", logger=None, revision=None):
self.lexer = lex.lex(module=VPPAPILexer(filename), debug=debug)
- self.parser = yacc.yacc(module=VPPAPIParser(filename, logger,
- revision=revision),
- write_tables=False, debug=debug)
+ self.parser = VPPAPIParser(filename, logger, revision=revision)
+ self.parser.build(write_tables=False, debug=debug)
self.logger = logger
self.revision = revision
self.filename = filename
def parse_string(self, code, debug=0, lineno=1):
self.lexer.lineno = lineno
- return self.parser.parse(code, lexer=self.lexer, debug=debug)
+ return self.parser.parser.parse(code, lexer=self.lexer, debug=debug)
def parse_fd(self, fd, debug=0):
data = fd.read()
@@ -979,38 +996,40 @@ class VPPAPI():
def parse_filename(self, filename, debug=0):
if self.revision:
- git_show = 'git show {}:{}'.format(self.revision, filename)
- proc = Popen(git_show.split(), stdout=PIPE, encoding='utf-8')
+ git_show = "git show {}:{}".format(self.revision, filename)
+ proc = Popen(git_show.split(), stdout=PIPE, encoding="utf-8")
try:
data, errs = proc.communicate()
if proc.returncode != 0:
- print('File not found: {}:{}'
- .format(self.revision, filename), file=sys.stderr)
+ print(
+ "File not found: {}:{}".format(self.revision, filename),
+ file=sys.stderr,
+ )
sys.exit(2)
return self.parse_string(data, debug=debug)
except Exception:
sys.exit(3)
else:
try:
- with open(filename, encoding='utf-8') as fd:
+ with open(filename, encoding="utf-8") as fd:
return self.parse_fd(fd, None)
except FileNotFoundError:
- print('File not found: {}'.format(filename), file=sys.stderr)
+ print("File not found: {}".format(filename), file=sys.stderr)
sys.exit(2)
def process(self, objs):
s = {}
- s['Option'] = {}
- s['Define'] = []
- s['Service'] = []
- s['types'] = []
- s['Import'] = []
- s['Counters'] = []
- s['Paths'] = []
+ s["Option"] = {}
+ s["Define"] = []
+ s["Service"] = []
+ s["types"] = []
+ s["Import"] = []
+ s["Counters"] = []
+ s["Paths"] = []
crc = 0
for o in objs:
try:
- crc = binascii.crc32(o.crc, crc) & 0xffffffff
+ crc = binascii.crc32(o.crc, crc) & 0xFFFFFFFF
except AttributeError:
pass
@@ -1021,82 +1040,84 @@ class VPPAPI():
else:
o.process(s)
- msgs = {d.name: d for d in s['Define']}
- svcs = {s.caller: s for s in s['Service']}
- replies = {s.reply: s for s in s['Service']}
+ msgs = {d.name: d for d in s["Define"]}
+ svcs = {s.caller: s for s in s["Service"]}
+ replies = {s.reply: s for s in s["Service"]}
seen_services = {}
- s['file_crc'] = crc
+ s["file_crc"] = crc
for service in svcs:
if service not in msgs:
raise ValueError(
- 'Service definition refers to unknown message'
- ' definition: {}'.format(service))
- if svcs[service].reply != 'null' and \
- svcs[service].reply not in msgs:
- raise ValueError('Service definition refers to unknown message'
- ' definition in reply: {}'
- .format(svcs[service].reply))
+ "Service definition refers to unknown message"
+ " definition: {}".format(service)
+ )
+ if svcs[service].reply != "null" and svcs[service].reply not in msgs:
+ raise ValueError(
+ "Service definition refers to unknown message"
+ " definition in reply: {}".format(svcs[service].reply)
+ )
if service in replies:
- raise ValueError('Service definition refers to message'
- ' marked as reply: {}'.format(service))
+ raise ValueError(
+ "Service definition refers to message"
+ " marked as reply: {}".format(service)
+ )
for event in svcs[service].events:
if event not in msgs:
- raise ValueError('Service definition refers to unknown '
- 'event: {} in message: {}'
- .format(event, service))
+ raise ValueError(
+ "Service definition refers to unknown "
+ "event: {} in message: {}".format(event, service)
+ )
seen_services[event] = True
# Create services implicitly
for d in msgs:
if d in seen_services:
continue
- if d.endswith('_reply'):
+ if d.endswith("_reply"):
if d[:-6] in svcs:
continue
if d[:-6] not in msgs:
- raise ValueError('{} missing calling message'
- .format(d))
+ raise ValueError("{} missing calling message".format(d))
continue
- if d.endswith('_dump'):
+ if d.endswith("_dump"):
if d in svcs:
continue
- if d[:-5]+'_details' in msgs:
- s['Service'].append(Service(d, d[:-5]+'_details',
- stream=True))
+ if d[:-5] + "_details" in msgs:
+ s["Service"].append(Service(d, d[:-5] + "_details", stream=True))
else:
- raise ValueError('{} missing details message'
- .format(d))
+ raise ValueError("{} missing details message".format(d))
continue
- if d.endswith('_details'):
- if d[:-8]+'_get' in msgs:
- if d[:-8]+'_get' in svcs:
+ if d.endswith("_details"):
+ if d[:-8] + "_get" in msgs:
+ if d[:-8] + "_get" in svcs:
continue
- raise ValueError('{} should be in a stream service'
- .format(d[:-8]+'_get'))
- if d[:-8]+'_dump' in msgs:
+ raise ValueError(
+ "{} should be in a stream service".format(d[:-8] + "_get")
+ )
+ if d[:-8] + "_dump" in msgs:
continue
- raise ValueError('{} missing dump or get message'
- .format(d))
+ raise ValueError("{} missing dump or get message".format(d))
if d in svcs:
continue
- if d+'_reply' in msgs:
- s['Service'].append(Service(d, d+'_reply'))
+ if d + "_reply" in msgs:
+ s["Service"].append(Service(d, d + "_reply"))
else:
raise ValueError(
- '{} missing reply message ({}) or service definition'
- .format(d, d+'_reply'))
+ "{} missing reply message ({}) or service definition".format(
+ d, d + "_reply"
+ )
+ )
return s
def process_imports(self, objs, in_import, result): # -> List
for o in objs:
# Only allow the following object types from imported file
- if in_import and not isinstance(o, (Enum, Import, Typedef,
- Union, Using)):
+ if in_import and not isinstance(o, (Enum, Import, Typedef, Union, Using)):
continue
if isinstance(o, Import):
result.append(o)
@@ -1109,7 +1130,7 @@ class VPPAPI():
# Add message ids to each message.
def add_msg_id(s):
for o in s:
- o.block.insert(0, Field('u16', '_vl_msg_id'))
+ o.block.insert(0, Field("u16", "_vl_msg_id"))
return s
@@ -1129,11 +1150,11 @@ def dirlist_get():
def foldup_blocks(block, crc):
for b in block:
# Look up CRC in user defined types
- if b.fieldtype.startswith('vl_api_'):
+ if b.fieldtype.startswith("vl_api_"):
# Recursively
t = global_types[b.fieldtype]
try:
- crc = binascii.crc32(t.crc, crc) & 0xffffffff
+ crc = binascii.crc32(t.crc, crc) & 0xFFFFFFFF
crc = foldup_blocks(t.block, crc)
except AttributeError:
pass
@@ -1142,49 +1163,38 @@ def foldup_blocks(block, crc):
def foldup_crcs(s):
for f in s:
- f.crc = foldup_blocks(f.block,
- binascii.crc32(f.crc) & 0xffffffff)
-
-
-#
-# Main
-#
-def main():
- if sys.version_info < (3, 5,):
- log.exception('vppapigen requires a supported version of python. '
- 'Please use version 3.5 or greater. '
- 'Using %s', sys.version)
- return 1
-
- cliparser = argparse.ArgumentParser(description='VPP API generator')
- cliparser.add_argument('--pluginpath', default="")
- cliparser.add_argument('--includedir', action='append')
- cliparser.add_argument('--outputdir', action='store')
- cliparser.add_argument('--input')
- cliparser.add_argument('--output', nargs='?',
- type=argparse.FileType('w', encoding='UTF-8'),
- default=sys.stdout)
-
- cliparser.add_argument('output_module', nargs='?', default='C')
- cliparser.add_argument('--debug', action='store_true')
- cliparser.add_argument('--show-name', nargs=1)
- cliparser.add_argument('--git-revision',
- help="Git revision to use for opening files")
- args = cliparser.parse_args()
-
- dirlist_add(args.includedir)
- if not args.debug:
+ f.crc = foldup_blocks(f.block, binascii.crc32(f.crc) & 0xFFFFFFFF)
+
+
+def run_vppapigen(
+ input_file=None,
+ output=sys.stdout,
+ includedir=None,
+ debug=False,
+ show_name=None,
+ output_module="C",
+ outputdir=None,
+ pluginpath="",
+ git_revision=None,
+):
+ # reset globals
+ dirlist.clear()
+ global_types.clear()
+ seen_imports.clear()
+
+ dirlist_add(includedir)
+ if not debug:
sys.excepthook = exception_handler
# Filename
- if args.show_name:
- filename = args.show_name[0]
- elif args.input:
- filename = args.input
+ if show_name:
+ filename = show_name[0]
+ elif input_file:
+ filename = input_file
else:
- filename = ''
+ filename = ""
- if args.debug:
+ if debug:
logging.basicConfig(stream=sys.stdout, level=logging.WARNING)
else:
logging.basicConfig()
@@ -1195,44 +1205,38 @@ def main():
from importlib.machinery import SourceFileLoader
# Default path
- pluginpath = ''
- if not args.pluginpath:
+ pluginpath = ""
+ if not pluginpath:
cand = []
cand.append(os.path.dirname(os.path.realpath(__file__)))
- cand.append(os.path.dirname(os.path.realpath(__file__)) +
- '/../share/vpp/')
+ cand.append(os.path.dirname(os.path.realpath(__file__)) + "/../share/vpp/")
for c in cand:
- c += '/'
- if os.path.isfile('{}vppapigen_{}.py'
- .format(c, args.output_module.lower())):
+ c += "/"
+ if os.path.isfile("{}vppapigen_{}.py".format(c, output_module.lower())):
pluginpath = c
break
else:
- pluginpath = args.pluginpath + '/'
- if pluginpath == '':
- log.exception('Output plugin not found')
+ pluginpath = pluginpath + "/"
+ if pluginpath == "":
+ log.exception("Output plugin not found")
return 1
- module_path = '{}vppapigen_{}.py'.format(pluginpath,
- args.output_module.lower())
+ module_path = "{}vppapigen_{}.py".format(pluginpath, output_module.lower())
try:
- plugin = SourceFileLoader(args.output_module,
- module_path).load_module()
+ plugin = SourceFileLoader(output_module, module_path).load_module()
except Exception as err:
- log.exception('Error importing output plugin: %s, %s',
- module_path, err)
+ log.exception("Error importing output plugin: %s, %s", module_path, err)
return 1
- parser = VPPAPI(debug=args.debug, filename=filename, logger=log,
- revision=args.git_revision)
+ parser = VPPAPI(debug=debug, filename=filename, logger=log, revision=git_revision)
try:
- if not args.input:
+ if not input_file:
parsed_objects = parser.parse_fd(sys.stdin, log)
else:
- parsed_objects = parser.parse_filename(args.input, log)
+ parsed_objects = parser.parse_filename(input_file, log)
except ParseError as e:
- print('Parse error: ', e, file=sys.stderr)
+ print("Parse error: ", e, file=sys.stderr)
sys.exit(1)
# Build a list of objects. Hash of lists.
@@ -1250,32 +1254,90 @@ def main():
else:
s = parser.process(parsed_objects)
imports = parser.process_imports(parsed_objects, False, result)
- s['imported'] = parser.process(imports)
+ s["imported"] = parser.process(imports)
# Add msg_id field
- s['Define'] = add_msg_id(s['Define'])
+ s["Define"] = add_msg_id(s["Define"])
# Fold up CRCs
- foldup_crcs(s['Define'])
+ foldup_crcs(s["Define"])
#
# Debug
- if args.debug:
+ if debug:
import pprint
+
pp = pprint.PrettyPrinter(indent=4, stream=sys.stderr)
- for t in s['Define']:
+ for t in s["Define"]:
pp.pprint([t.name, t.flags, t.block])
- for t in s['types']:
+ for t in s["types"]:
pp.pprint([t.name, t.block])
- result = plugin.run(args, filename, s)
+ result = plugin.run(outputdir, filename, s)
if result:
- print(result, file=args.output)
+ if isinstance(output, str):
+ with open(output, "w", encoding="UTF-8") as f:
+ print(result, file=f)
+ else:
+ print(result, file=output)
else:
- log.exception('Running plugin failed: %s %s', filename, result)
+ log.exception("Running plugin failed: %s %s", filename, result)
return 1
return 0
-if __name__ == '__main__':
+def run_kw_vppapigen(kwargs):
+ return run_vppapigen(**kwargs)
+
+
+#
+# Main
+#
+def main():
+ if sys.version_info < (
+ 3,
+ 5,
+ ):
+ log.exception(
+ "vppapigen requires a supported version of python. "
+ "Please use version 3.5 or greater. "
+ "Using %s",
+ sys.version,
+ )
+ return 1
+
+ cliparser = argparse.ArgumentParser(description="VPP API generator")
+ cliparser.add_argument("--pluginpath", default="")
+ cliparser.add_argument("--includedir", action="append")
+ cliparser.add_argument("--outputdir", action="store")
+ cliparser.add_argument("--input")
+ cliparser.add_argument(
+ "--output",
+ nargs="?",
+ type=argparse.FileType("w", encoding="UTF-8"),
+ default=sys.stdout,
+ )
+
+ cliparser.add_argument("output_module", nargs="?", default="C")
+ cliparser.add_argument("--debug", action="store_true")
+ cliparser.add_argument("--show-name", nargs=1)
+ cliparser.add_argument(
+ "--git-revision", help="Git revision to use for opening files"
+ )
+ args = cliparser.parse_args()
+
+ return run_vppapigen(
+ includedir=args.includedir,
+ debug=args.debug,
+ outputdir=args.outputdir,
+ show_name=args.show_name,
+ input_file=args.input,
+ output_module=args.output_module,
+ pluginpath=args.pluginpath,
+ git_revision=args.git_revision,
+ output=args.output,
+ )
+
+
+if __name__ == "__main__":
sys.exit(main())
diff --git a/src/tools/vppapigen/vppapigen_c.py b/src/tools/vppapigen/vppapigen_c.py
index 8564ade1080..fb7de0a023f 100644..100755
--- a/src/tools/vppapigen/vppapigen_c.py
+++ b/src/tools/vppapigen/vppapigen_c.py
@@ -18,12 +18,13 @@
# binary API format
#
-'''
+"""
This module creates C code for core VPP, VPP plugins and client side VAT and
VAT2 tests.
-'''
+"""
import datetime
+import itertools
import os
import time
import sys
@@ -34,95 +35,102 @@ process_imports = False
###############################################################################
-class ToJSON():
- '''Class to generate functions converting from VPP binary API to JSON.'''
+class ToJSON:
+ """Class to generate functions converting from VPP binary API to JSON."""
+
_dispatch = {}
- noprint_fields = {'_vl_msg_id': None,
- 'client_index': None,
- 'context': None}
- is_number = {'u8': None,
- 'i8': None,
- 'u16': None,
- 'i16': None,
- 'u32': None,
- 'i32': None,
- 'u64': None,
- 'i64': None,
- 'f64': None,
- }
+ noprint_fields = {"_vl_msg_id": None, "client_index": None, "context": None}
+ is_number = {
+ "u8": None,
+ "i8": None,
+ "u16": None,
+ "i16": None,
+ "u32": None,
+ "i32": None,
+ "u64": None,
+ "i64": None,
+ "f64": None,
+ }
def __init__(self, module, types, defines, imported_types, stream):
self.stream = stream
self.module = module
self.defines = defines
self.types = types
- self.types_hash = {'vl_api_'+d.name+'_t':
- d for d in types + imported_types}
+ self.types_hash = {"vl_api_" + d.name + "_t": d for d in types + imported_types}
self.defines_hash = {d.name: d for d in defines}
def header(self):
- '''Output the top boilerplate.'''
+ """Output the top boilerplate."""
write = self.stream.write
- write('#ifndef included_{}_api_tojson_h\n'.format(self.module))
- write('#define included_{}_api_tojson_h\n'.format(self.module))
- write('#include <vppinfra/cJSON.h>\n\n')
- write('#include <vat2/jsonconvert.h>\n\n')
+ write("#ifndef included_{}_api_tojson_h\n".format(self.module))
+ write("#define included_{}_api_tojson_h\n".format(self.module))
+ write("#include <vppinfra/cJSON.h>\n\n")
+ write("#include <vppinfra/jsonformat.h>\n\n")
+ if self.module == "interface_types":
+ write("#define vl_printfun\n")
+ write("#include <vnet/interface_types.api.h>\n\n")
def footer(self):
- '''Output the bottom boilerplate.'''
+ """Output the bottom boilerplate."""
write = self.stream.write
- write('#endif\n')
+ write("#endif\n")
def get_base_type(self, t):
vt_type = None
try:
vt = self.types_hash[t]
- if vt.type == 'Using' and 'length' not in vt.alias:
- vt_type = vt.alias['type']
+ if vt.type == "Using" and "length" not in vt.alias:
+ vt_type = vt.alias["type"]
except KeyError:
vt = t
return vt, vt_type
def get_json_func(self, t):
- '''Given the type, returns the function to use to create a
- cJSON object'''
+ """Given the type, returns the function to use to create a
+ cJSON object"""
vt, vt_type = self.get_base_type(t)
if t in self.is_number or vt_type in self.is_number:
- return 'cJSON_AddNumberToObject', '', False
- if t == 'bool':
- return 'cJSON_AddBoolToObject', '', False
+ return "cJSON_AddNumberToObject", "", False
+ if t == "bool":
+ return "cJSON_AddBoolToObject", "", False
# Lookup type name check if it's enum
- if vt.type == 'Enum' or vt.type == 'EnumFlag':
- return '{t}_tojson'.format(t=t), '', True
- return '{t}_tojson'.format(t=t), '&', True
+ if vt.type == "Enum" or vt.type == "EnumFlag":
+ return "{t}_tojson".format(t=t), "", True
+ return "{t}_tojson".format(t=t), "&", True
def get_json_array_func(self, t):
- '''Given a type returns the function to create a cJSON object
- for arrays.'''
+ """Given a type returns the function to create a cJSON object
+ for arrays."""
if t in self.is_number:
- return 'cJSON_CreateNumber', ''
- if t == 'bool':
- return 'cJSON_CreateBool', ''
+ return "cJSON_CreateNumber", ""
+ if t == "bool":
+ return "cJSON_CreateBool", ""
vt, vt_type = self.get_base_type(t)
- if vt.type == 'Enum' or vt.type == 'EnumFlag':
- return '{t}_tojson'.format(t=t), ''
- return '{t}_tojson'.format(t=t), '&'
+ if vt.type == "Enum" or vt.type == "EnumFlag":
+ return "{t}_tojson".format(t=t), ""
+ return "{t}_tojson".format(t=t), "&"
def print_string(self, o):
- '''Create cJSON object from vl_api_string_t'''
+ """Create cJSON object from vl_api_string_t"""
write = self.stream.write
if o.modern_vla:
- write(' vl_api_string_cJSON_AddToObject(o, "{n}", &a->{n});\n'
- .format(n=o.fieldname))
+ write(
+ ' vl_api_string_cJSON_AddToObject(o, "{n}", &a->{n});\n'.format(
+ n=o.fieldname
+ )
+ )
else:
-
- write(' cJSON_AddStringToObject(o, "{n}", (char *)a->{n});\n'
- .format(n=o.fieldname))
+ write(
+ ' cJSON_AddStringToObject(o, "{n}", (char *)a->{n});\n'.format(
+ n=o.fieldname
+ )
+ )
def print_field(self, o):
- '''Called for every field in a typedef or define.'''
+ """Called for every field in a typedef or define."""
write = self.stream.write
if o.fieldname in self.noprint_fields:
return
@@ -130,19 +138,21 @@ class ToJSON():
f, p, newobj = self.get_json_func(o.fieldtype)
if newobj:
- write(' cJSON_AddItemToObject(o, "{n}", {f}({p}a->{n}));\n'
- .format(f=f, p=p, n=o.fieldname))
+ write(
+ ' cJSON_AddItemToObject(o, "{n}", {f}({p}a->{n}));\n'.format(
+ f=f, p=p, n=o.fieldname
+ )
+ )
else:
- write(' {f}(o, "{n}", {p}a->{n});\n'
- .format(f=f, p=p, n=o.fieldname))
+ write(' {f}(o, "{n}", {p}a->{n});\n'.format(f=f, p=p, n=o.fieldname))
- _dispatch['Field'] = print_field
+ _dispatch["Field"] = print_field
def print_array(self, o):
- '''Converts a VPP API array to cJSON array.'''
+ """Converts a VPP API array to cJSON array."""
write = self.stream.write
- forloop = '''\
+ forloop = """\
{{
int i;
cJSON *array = cJSON_AddArrayToObject(o, "{n}");
@@ -150,232 +160,263 @@ class ToJSON():
cJSON_AddItemToArray(array, {f}({p}a->{n}[i]));
}}
}}
-'''
+"""
- if o.fieldtype == 'string':
+ if o.fieldtype == "string":
self.print_string(o)
return
- lfield = 'a->' + o.lengthfield if o.lengthfield else o.length
- if o.fieldtype == 'u8':
- write(' {\n')
+ lfield = "a->" + o.lengthfield if o.lengthfield else o.length
+ if o.fieldtype == "u8":
+ write(" {\n")
# What is length field doing here?
- write(' u8 *s = format(0, "0x%U", format_hex_bytes, '
- '&a->{n}, {lfield});\n'
- .format(n=o.fieldname, lfield=lfield))
- write(' cJSON_AddStringToObject(o, "{n}", (char *)s);\n'
- .format(n=o.fieldname))
- write(' vec_free(s);\n')
- write(' }\n')
+ write(
+ ' u8 *s = format(0, "0x%U", format_hex_bytes, '
+ "&a->{n}, {lfield});\n".format(n=o.fieldname, lfield=lfield)
+ )
+ write(
+ ' cJSON_AddStringToObject(o, "{n}", (char *)s);\n'.format(
+ n=o.fieldname
+ )
+ )
+ write(" vec_free(s);\n")
+ write(" }\n")
return
f, p = self.get_json_array_func(o.fieldtype)
- write(forloop.format(lfield=lfield,
- t=o.fieldtype,
- n=o.fieldname,
- f=f,
- p=p))
+ write(forloop.format(lfield=lfield, t=o.fieldtype, n=o.fieldname, f=f, p=p))
- _dispatch['Array'] = print_array
+ _dispatch["Array"] = print_array
def print_enum(self, o):
- '''Create cJSON object (string) for VPP API enum'''
+ """Create cJSON object (string) for VPP API enum"""
write = self.stream.write
- write('static inline cJSON *vl_api_{name}_t_tojson '
- '(vl_api_{name}_t a) {{\n'.format(name=o.name))
+ write(
+ "static inline cJSON *vl_api_{name}_t_tojson "
+ "(vl_api_{name}_t a) {{\n".format(name=o.name)
+ )
write(" switch(a) {\n")
for b in o.block:
write(" case %s:\n" % b[1])
write(' return cJSON_CreateString("{}");\n'.format(b[0]))
write(' default: return cJSON_CreateString("Invalid ENUM");\n')
- write(' }\n')
- write(' return 0;\n')
- write('}\n')
+ write(" }\n")
+ write(" return 0;\n")
+ write("}\n")
- _dispatch['Enum'] = print_enum
+ _dispatch["Enum"] = print_enum
def print_enum_flag(self, o):
- '''Create cJSON object (string) for VPP API enum'''
+ """Create cJSON object (string) for VPP API enum"""
write = self.stream.write
- write('static inline cJSON *vl_api_{name}_t_tojson '
- '(vl_api_{name}_t a) {{\n'.format(name=o.name))
- write(' cJSON *array = cJSON_CreateArray();\n')
+ write(
+ "static inline cJSON *vl_api_{name}_t_tojson "
+ "(vl_api_{name}_t a) {{\n".format(name=o.name)
+ )
+ write(" cJSON *array = cJSON_CreateArray();\n")
for b in o.block:
- write(' if (a & {})\n'.format(b[0]))
- write(' cJSON_AddItemToArray(array, cJSON_CreateString("{}"));\n'.format(b[0]))
- write(' return array;\n')
- write('}\n')
+ if b[1] == 0:
+ continue
+ write(" if (a & {})\n".format(b[0]))
+ write(
+ ' cJSON_AddItemToArray(array, cJSON_CreateString("{}"));\n'.format(
+ b[0]
+ )
+ )
+ write(" return array;\n")
+ write("}\n")
- _dispatch['EnumFlag'] = print_enum_flag
+ _dispatch["EnumFlag"] = print_enum_flag
def print_typedef(self, o):
- '''Create cJSON (dictionary) object from VPP API typedef'''
+ """Create cJSON (dictionary) object from VPP API typedef"""
write = self.stream.write
- write('static inline cJSON *vl_api_{name}_t_tojson '
- '(vl_api_{name}_t *a) {{\n'.format(name=o.name))
- write(' cJSON *o = cJSON_CreateObject();\n')
+ write(
+ "static inline cJSON *vl_api_{name}_t_tojson "
+ "(vl_api_{name}_t *a) {{\n".format(name=o.name)
+ )
+ write(" cJSON *o = cJSON_CreateObject();\n")
for t in o.block:
self._dispatch[t.type](self, t)
- write(' return o;\n')
- write('}\n')
+ write(" return o;\n")
+ write("}\n")
def print_define(self, o):
- '''Create cJSON (dictionary) object from VPP API define'''
+ """Create cJSON (dictionary) object from VPP API define"""
write = self.stream.write
- write('static inline cJSON *vl_api_{name}_t_tojson '
- '(vl_api_{name}_t *a) {{\n'.format(name=o.name))
- write(' cJSON *o = cJSON_CreateObject();\n')
- write(' cJSON_AddStringToObject(o, "_msgname", "{}");\n'
- .format(o.name))
+ write(
+ "static inline cJSON *vl_api_{name}_t_tojson "
+ "(vl_api_{name}_t *a) {{\n".format(name=o.name)
+ )
+ write(" cJSON *o = cJSON_CreateObject();\n")
+ write(' cJSON_AddStringToObject(o, "_msgname", "{}");\n'.format(o.name))
+ write(
+ ' cJSON_AddStringToObject(o, "_crc", "{crc:08x}");\n'.format(crc=o.crc)
+ )
for t in o.block:
self._dispatch[t.type](self, t)
- write(' return o;\n')
- write('}\n')
+ write(" return o;\n")
+ write("}\n")
def print_using(self, o):
- '''Create cJSON (dictionary) object from VPP API aliased type'''
+ """Create cJSON (dictionary) object from VPP API aliased type"""
if o.manual_print:
return
write = self.stream.write
- write('static inline cJSON *vl_api_{name}_t_tojson '
- '(vl_api_{name}_t *a) {{\n'.format(name=o.name))
-
- write(' u8 *s = format(0, "%U", format_vl_api_{}_t, a);\n'
- .format(o.name))
- write(' cJSON *o = cJSON_CreateString((char *)s);\n')
- write(' vec_free(s);\n')
- write(' return o;\n')
- write('}\n')
-
- _dispatch['Typedef'] = print_typedef
- _dispatch['Define'] = print_define
- _dispatch['Using'] = print_using
- _dispatch['Union'] = print_typedef
+ write(
+ "static inline cJSON *vl_api_{name}_t_tojson "
+ "(vl_api_{name}_t *a) {{\n".format(name=o.name)
+ )
+
+ write(' u8 *s = format(0, "%U", format_vl_api_{}_t, a);\n'.format(o.name))
+ write(" cJSON *o = cJSON_CreateString((char *)s);\n")
+ write(" vec_free(s);\n")
+ write(" return o;\n")
+ write("}\n")
+
+ _dispatch["Typedef"] = print_typedef
+ _dispatch["Define"] = print_define
+ _dispatch["Using"] = print_using
+ _dispatch["Union"] = print_typedef
def generate_function(self, t):
- '''Main entry point'''
+ """Main entry point"""
write = self.stream.write
if t.manual_print:
- write('/* Manual print {} */\n'.format(t.name))
+ write("/* Manual print {} */\n".format(t.name))
return
self._dispatch[t.type](self, t)
def generate_types(self):
- '''Main entry point'''
+ """Main entry point"""
for t in self.types:
self.generate_function(t)
def generate_defines(self):
- '''Main entry point'''
+ """Main entry point"""
for t in self.defines:
self.generate_function(t)
-class FromJSON():
- '''
+class FromJSON:
+ """
Parse JSON objects into VPP API binary message structures.
- '''
+ """
+
_dispatch = {}
- noprint_fields = {'_vl_msg_id': None,
- 'client_index': None,
- 'context': None}
- is_number = {'u8': None,
- 'i8': None,
- 'u16': None,
- 'i16': None,
- 'u32': None,
- 'i32': None,
- 'u64': None,
- 'i64': None,
- 'f64': None,
- }
+ noprint_fields = {"_vl_msg_id": None, "client_index": None, "context": None}
+ is_number = {
+ "u8": None,
+ "i8": None,
+ "u16": None,
+ "i16": None,
+ "u32": None,
+ "i32": None,
+ "u64": None,
+ "i64": None,
+ "f64": None,
+ }
def __init__(self, module, types, defines, imported_types, stream):
self.stream = stream
self.module = module
self.defines = defines
self.types = types
- self.types_hash = {'vl_api_'+d.name+'_t':
- d for d in types + imported_types}
+ self.types_hash = {"vl_api_" + d.name + "_t": d for d in types + imported_types}
self.defines_hash = {d.name: d for d in defines}
def header(self):
- '''Output the top boilerplate.'''
+ """Output the top boilerplate."""
write = self.stream.write
- write('#ifndef included_{}_api_fromjson_h\n'.format(self.module))
- write('#define included_{}_api_fromjson_h\n'.format(self.module))
- write('#include <vppinfra/cJSON.h>\n\n')
- write('#include <vat2/jsonconvert.h>\n\n')
+ write("#ifndef included_{}_api_fromjson_h\n".format(self.module))
+ write("#define included_{}_api_fromjson_h\n".format(self.module))
+ write("#include <vppinfra/cJSON.h>\n\n")
+ write("#include <vppinfra/jsonformat.h>\n\n")
write('#pragma GCC diagnostic ignored "-Wunused-label"\n')
def is_base_type(self, t):
- '''Check if a type is one of the VPP API base types'''
+ """Check if a type is one of the VPP API base types"""
if t in self.is_number:
return True
- if t == 'bool':
+ if t == "bool":
return True
return False
def footer(self):
- '''Output the bottom boilerplate.'''
+ """Output the bottom boilerplate."""
write = self.stream.write
- write('#endif\n')
+ write("#endif\n")
def print_string(self, o, toplevel=False):
- '''Convert JSON string to vl_api_string_t'''
+ """Convert JSON string to vl_api_string_t"""
write = self.stream.write
msgvar = "a" if toplevel else "*mp"
msgsize = "l" if toplevel else "*len"
if o.modern_vla:
- write(' char *p = cJSON_GetStringValue(item);\n')
- write(' size_t plen = strlen(p);\n')
- write(' {msgvar} = realloc({msgvar}, {msgsize} + plen);\n'
- .format(msgvar=msgvar, msgsize=msgsize))
- write(' if ({msgvar} == 0) goto error;\n'.format(msgvar=msgvar))
- write(' vl_api_c_string_to_api_string(p, (void *){msgvar} + '
- '{msgsize} - sizeof(vl_api_string_t));\n'
- .format(msgvar=msgvar, msgsize=msgsize))
- write(' {msgsize} += plen;\n'.format(msgsize=msgsize))
+ write(" char *p = cJSON_GetStringValue(item);\n")
+ write(" size_t plen = strlen(p);\n")
+ write(
+ " {msgvar} = cJSON_realloc({msgvar}, {msgsize} + plen, {msgsize});\n".format(
+ msgvar=msgvar, msgsize=msgsize
+ )
+ )
+ write(" if ({msgvar} == 0) goto error;\n".format(msgvar=msgvar))
+ write(
+ " vl_api_c_string_to_api_string(p, (void *){msgvar} + "
+ "{msgsize} - sizeof(vl_api_string_t));\n".format(
+ msgvar=msgvar, msgsize=msgsize
+ )
+ )
+ write(" {msgsize} += plen;\n".format(msgsize=msgsize))
else:
- write(' strncpy_s((char *)a->{n}, sizeof(a->{n}), '
- 'cJSON_GetStringValue(item), sizeof(a->{n}) - 1);\n'
- .format(n=o.fieldname))
+ write(
+ " strncpy_s((char *)a->{n}, sizeof(a->{n}), "
+ "cJSON_GetStringValue(item), sizeof(a->{n}) - 1);\n".format(
+ n=o.fieldname
+ )
+ )
def print_field(self, o, toplevel=False):
- '''Called for every field in a typedef or define.'''
+ """Called for every field in a typedef or define."""
write = self.stream.write
if o.fieldname in self.noprint_fields:
return
is_bt = self.is_base_type(o.fieldtype)
- t = 'vl_api_{}'.format(o.fieldtype) if is_bt else o.fieldtype
+ t = "vl_api_{}".format(o.fieldtype) if is_bt else o.fieldtype
msgvar = "(void **)&a" if toplevel else "mp"
msgsize = "&l" if toplevel else "len"
if is_bt:
- write(' vl_api_{t}_fromjson(item, &a->{n});\n'
- .format(t=o.fieldtype, n=o.fieldname))
+ write(
+ " vl_api_{t}_fromjson(item, &a->{n});\n".format(
+ t=o.fieldtype, n=o.fieldname
+ )
+ )
else:
- write(' if ({t}_fromjson({msgvar}, '
- '{msgsize}, item, &a->{n}) < 0) goto error;\n'
- .format(t=t, n=o.fieldname, msgvar=msgvar, msgsize=msgsize))
+ write(
+ " if ({t}_fromjson({msgvar}, "
+ "{msgsize}, item, &a->{n}) < 0) goto error;\n".format(
+ t=t, n=o.fieldname, msgvar=msgvar, msgsize=msgsize
+ )
+ )
- _dispatch['Field'] = print_field
+ _dispatch["Field"] = print_field
def print_array(self, o, toplevel=False):
- '''Convert JSON array to VPP API array'''
+ """Convert JSON array to VPP API array"""
write = self.stream.write
- forloop = '''\
+ forloop = """\
{{
int i;
cJSON *array = cJSON_GetObjectItem(o, "{n}");
@@ -386,14 +427,14 @@ class FromJSON():
{call}
}}
}}
-'''
- forloop_vla = '''\
+"""
+ forloop_vla = """\
{{
int i;
cJSON *array = cJSON_GetObjectItem(o, "{n}");
int size = cJSON_GetArraySize(array);
{lfield} = size;
- {realloc} = realloc({realloc}, {msgsize} + sizeof({t}) * size);
+ {realloc} = cJSON_realloc({realloc}, {msgsize} + sizeof({t}) * size, {msgsize});
{t} *d = (void *){realloc} + {msgsize};
{msgsize} += sizeof({t}) * size;
for (i = 0; i < size; i++) {{
@@ -401,271 +442,296 @@ class FromJSON():
{call}
}}
}}
-'''
+"""
t = o.fieldtype
- if o.fieldtype == 'string':
+ if o.fieldtype == "string":
self.print_string(o, toplevel)
return
- lfield = 'a->' + o.lengthfield if o.lengthfield else o.length
+ lfield = "a->" + o.lengthfield if o.lengthfield else o.length
msgvar = "(void **)&a" if toplevel else "mp"
realloc = "a" if toplevel else "*mp"
msgsize = "l" if toplevel else "*len"
- if o.fieldtype == 'u8':
+ if o.fieldtype == "u8":
if o.lengthfield:
- write(' s = u8string_fromjson(o, "{}");\n'
- .format(o.fieldname))
- write(' if (!s) goto error;\n')
- write(' {} = vec_len(s);\n'.format(lfield))
-
- write(' {realloc} = realloc({realloc}, {msgsize} + '
- 'vec_len(s));\n'.format(msgvar=msgvar, msgsize=msgsize, realloc=realloc))
- write(' memcpy((void *){realloc} + {msgsize}, s, '
- 'vec_len(s));\n'.format(realloc=realloc, msgsize=msgsize))
- write(' {msgsize} += vec_len(s);\n'.format(msgsize=msgsize))
-
- write(' vec_free(s);\n')
+ write(' s = u8string_fromjson(o, "{}");\n'.format(o.fieldname))
+ write(" if (!s) goto error;\n")
+ write(" {} = vec_len(s);\n".format(lfield))
+
+ write(
+ " {realloc} = cJSON_realloc({realloc}, {msgsize} + "
+ "vec_len(s), {msgsize});\n".format(
+ msgvar=msgvar, msgsize=msgsize, realloc=realloc
+ )
+ )
+ write(
+ " memcpy((void *){realloc} + {msgsize}, s, "
+ "vec_len(s));\n".format(realloc=realloc, msgsize=msgsize)
+ )
+ write(" {msgsize} += vec_len(s);\n".format(msgsize=msgsize))
+
+ write(" vec_free(s);\n")
else:
- write(' if (u8string_fromjson2(o, "{n}", a->{n}) < 0) goto error;\n'
- .format(n=o.fieldname))
+ write(
+ ' if (u8string_fromjson2(o, "{n}", a->{n}) < 0) goto error;\n'.format(
+ n=o.fieldname
+ )
+ )
return
is_bt = self.is_base_type(o.fieldtype)
if o.lengthfield:
if is_bt:
- call = ('vl_api_{t}_fromjson(e, &d[i]);'
- .format(t=o.fieldtype))
+ call = "vl_api_{t}_fromjson(e, &d[i]);".format(t=o.fieldtype)
else:
- call = ('if ({t}_fromjson({msgvar}, len, e, &d[i]) < 0) goto error; '
- .format(t=o.fieldtype, msgvar=msgvar))
- write(forloop_vla.format(lfield=lfield,
- t=o.fieldtype,
- n=o.fieldname,
- call=call,
- realloc=realloc,
- msgsize=msgsize))
+ call = "if ({t}_fromjson({msgvar}, len, e, &d[i]) < 0) goto error; ".format(
+ t=o.fieldtype, msgvar=msgvar
+ )
+ write(
+ forloop_vla.format(
+ lfield=lfield,
+ t=o.fieldtype,
+ n=o.fieldname,
+ call=call,
+ realloc=realloc,
+ msgsize=msgsize,
+ )
+ )
else:
if is_bt:
- call = ('vl_api_{t}_fromjson(e, &a->{n}[i]);'
- .format(t=t, n=o.fieldname))
+ call = "vl_api_{t}_fromjson(e, &a->{n}[i]);".format(t=t, n=o.fieldname)
else:
- call = ('if ({}_fromjson({}, len, e, &a->{}[i]) < 0) goto error;'
- .format(t, msgvar, o.fieldname))
- write(forloop.format(lfield=lfield,
- t=t,
- n=o.fieldname,
- call=call,
- msgvar=msgvar,
- realloc=realloc,
- msgsize=msgsize))
-
- _dispatch['Array'] = print_array
+ call = "if ({}_fromjson({}, len, e, &a->{}[i]) < 0) goto error;".format(
+ t, msgvar, o.fieldname
+ )
+ write(
+ forloop.format(
+ lfield=lfield,
+ t=t,
+ n=o.fieldname,
+ call=call,
+ msgvar=msgvar,
+ realloc=realloc,
+ msgsize=msgsize,
+ )
+ )
+
+ _dispatch["Array"] = print_array
def print_enum(self, o):
- '''Convert to JSON enum(string) to VPP API enum (int)'''
+ """Convert to JSON enum(string) to VPP API enum (int)"""
write = self.stream.write
- write('static inline int vl_api_{n}_t_fromjson'
- '(void **mp, int *len, cJSON *o, vl_api_{n}_t *a) {{\n'
- .format(n=o.name))
- write(' char *p = cJSON_GetStringValue(o);\n')
+ write(
+ "static inline int vl_api_{n}_t_fromjson"
+ "(void **mp, int *len, cJSON *o, vl_api_{n}_t *a) {{\n".format(n=o.name)
+ )
+ write(" char *p = cJSON_GetStringValue(o);\n")
for b in o.block:
- write(' if (strcmp(p, "{}") == 0) {{*a = {}; return 0;}}\n'
- .format(b[0], b[1]))
- write(' *a = 0;\n')
- write(' return -1;\n')
- write('}\n')
+ write(
+ ' if (strcmp(p, "{}") == 0) {{*a = {}; return 0;}}\n'.format(
+ b[0], b[1]
+ )
+ )
+ write(" *a = 0;\n")
+ write(" return -1;\n")
+ write("}\n")
- _dispatch['Enum'] = print_enum
+ _dispatch["Enum"] = print_enum
def print_enum_flag(self, o):
- '''Convert to JSON enum(string) to VPP API enum (int)'''
+ """Convert to JSON enum(string) to VPP API enum (int)"""
write = self.stream.write
- write('static inline int vl_api_{n}_t_fromjson '
- '(void **mp, int *len, cJSON *o, vl_api_{n}_t *a) {{\n'
- .format(n=o.name))
- write(' int i;\n')
- write(' *a = 0;\n')
- write(' for (i = 0; i < cJSON_GetArraySize(o); i++) {\n')
- write(' cJSON *e = cJSON_GetArrayItem(o, i);\n')
- write(' char *p = cJSON_GetStringValue(e);\n')
- write(' if (!p) return -1;\n')
+ write(
+ "static inline int vl_api_{n}_t_fromjson "
+ "(void **mp, int *len, cJSON *o, vl_api_{n}_t *a) {{\n".format(n=o.name)
+ )
+ write(" int i;\n")
+ write(" *a = 0;\n")
+ write(" for (i = 0; i < cJSON_GetArraySize(o); i++) {\n")
+ write(" cJSON *e = cJSON_GetArrayItem(o, i);\n")
+ write(" char *p = cJSON_GetStringValue(e);\n")
+ write(" if (!p) return -1;\n")
for b in o.block:
- write(' if (strcmp(p, "{}") == 0) *a |= {};\n'
- .format(b[0], b[1]))
- write(' }\n')
- write(' return 0;\n')
- write('}\n')
+ write(' if (strcmp(p, "{}") == 0) *a |= {};\n'.format(b[0], b[1]))
+ write(" }\n")
+ write(" return 0;\n")
+ write("}\n")
- _dispatch['EnumFlag'] = print_enum_flag
+ _dispatch["EnumFlag"] = print_enum_flag
def print_typedef(self, o):
- '''Convert from JSON object to VPP API binary representation'''
+ """Convert from JSON object to VPP API binary representation"""
write = self.stream.write
- write('static inline int vl_api_{name}_t_fromjson (void **mp, '
- 'int *len, cJSON *o, vl_api_{name}_t *a) {{\n'
- .format(name=o.name))
- write(' cJSON *item __attribute__ ((unused));\n')
- write(' u8 *s __attribute__ ((unused));\n')
+ write(
+ "static inline int vl_api_{name}_t_fromjson (void **mp, "
+ "int *len, cJSON *o, vl_api_{name}_t *a) {{\n".format(name=o.name)
+ )
+ write(" cJSON *item __attribute__ ((unused));\n")
+ write(" u8 *s __attribute__ ((unused));\n")
for t in o.block:
- if t.type == 'Field' and t.is_lengthfield:
+ if t.type == "Field" and t.is_lengthfield:
continue
- write('\n item = cJSON_GetObjectItem(o, "{}");\n'
- .format(t.fieldname))
- write(' if (!item) goto error;\n')
+ write('\n item = cJSON_GetObjectItem(o, "{}");\n'.format(t.fieldname))
+ write(" if (!item) goto error;\n")
self._dispatch[t.type](self, t)
- write('\n return 0;\n')
- write('\n error:\n')
- write(' return -1;\n')
- write('}\n')
+ write("\n return 0;\n")
+ write("\n error:\n")
+ write(" return -1;\n")
+ write("}\n")
def print_union(self, o):
- '''Convert JSON object to VPP API binary union'''
+ """Convert JSON object to VPP API binary union"""
write = self.stream.write
- write('static inline int vl_api_{name}_t_fromjson (void **mp, '
- 'int *len, cJSON *o, vl_api_{name}_t *a) {{\n'
- .format(name=o.name))
- write(' cJSON *item __attribute__ ((unused));\n')
- write(' u8 *s __attribute__ ((unused));\n')
+ write(
+ "static inline int vl_api_{name}_t_fromjson (void **mp, "
+ "int *len, cJSON *o, vl_api_{name}_t *a) {{\n".format(name=o.name)
+ )
+ write(" cJSON *item __attribute__ ((unused));\n")
+ write(" u8 *s __attribute__ ((unused));\n")
for t in o.block:
- if t.type == 'Field' and t.is_lengthfield:
+ if t.type == "Field" and t.is_lengthfield:
continue
- write(' item = cJSON_GetObjectItem(o, "{}");\n'
- .format(t.fieldname))
- write(' if (item) {\n')
+ write(' item = cJSON_GetObjectItem(o, "{}");\n'.format(t.fieldname))
+ write(" if (item) {\n")
self._dispatch[t.type](self, t)
- write(' };\n')
- write('\n return 0;\n')
- write('\n error:\n')
- write(' return -1;\n')
- write('}\n')
+ write(" };\n")
+ write("\n return 0;\n")
+ write("\n error:\n")
+ write(" return -1;\n")
+ write("}\n")
def print_define(self, o):
- '''Convert JSON object to VPP API message'''
+ """Convert JSON object to VPP API message"""
write = self.stream.write
error = 0
- write('static inline vl_api_{name}_t *vl_api_{name}_t_fromjson '
- '(cJSON *o, int *len) {{\n'.format(name=o.name))
- write(' cJSON *item __attribute__ ((unused));\n')
- write(' u8 *s __attribute__ ((unused));\n')
- write(' int l = sizeof(vl_api_{}_t);\n'.format(o.name))
- write(' vl_api_{}_t *a = malloc(l);\n'.format(o.name))
- write('\n')
+ write(
+ "static inline vl_api_{name}_t *vl_api_{name}_t_fromjson "
+ "(cJSON *o, int *len) {{\n".format(name=o.name)
+ )
+ write(" cJSON *item __attribute__ ((unused));\n")
+ write(" u8 *s __attribute__ ((unused));\n")
+ write(" int l = sizeof(vl_api_{}_t);\n".format(o.name))
+ write(" vl_api_{}_t *a = cJSON_malloc(l);\n".format(o.name))
+ write("\n")
for t in o.block:
if t.fieldname in self.noprint_fields:
continue
- if t.type == 'Field' and t.is_lengthfield:
+ if t.type == "Field" and t.is_lengthfield:
continue
- write(' item = cJSON_GetObjectItem(o, "{}");\n'
- .format(t.fieldname))
- write(' if (!item) goto error;\n')
+ write(' item = cJSON_GetObjectItem(o, "{}");\n'.format(t.fieldname))
+ write(" if (!item) goto error;\n")
error += 1
self._dispatch[t.type](self, t, toplevel=True)
- write('\n')
+ write("\n")
- write(' *len = l;\n')
- write(' return a;\n')
+ write(" *len = l;\n")
+ write(" return a;\n")
if error:
- write('\n error:\n')
- write(' free(a);\n')
- write(' return 0;\n')
- write('}\n')
+ write("\n error:\n")
+ write(" cJSON_free(a);\n")
+ write(" return 0;\n")
+ write("}\n")
def print_using(self, o):
- '''Convert JSON field to VPP type alias'''
+ """Convert JSON field to VPP type alias"""
write = self.stream.write
if o.manual_print:
return
t = o.using
- write('static inline int vl_api_{name}_t_fromjson (void **mp, '
- 'int *len, cJSON *o, vl_api_{name}_t *a) {{\n'
- .format(name=o.name))
- if 'length' in o.alias:
- if t.fieldtype != 'u8':
- raise ValueError("Error in processing type {} for {}"
- .format(t.fieldtype, o.name))
- write(' vl_api_u8_string_fromjson(o, (u8 *)a, {});\n'
- .format(o.alias['length']))
+ write(
+ "static inline int vl_api_{name}_t_fromjson (void **mp, "
+ "int *len, cJSON *o, vl_api_{name}_t *a) {{\n".format(name=o.name)
+ )
+ if "length" in o.alias:
+ if t.fieldtype != "u8":
+ raise ValueError(
+ "Error in processing type {} for {}".format(t.fieldtype, o.name)
+ )
+ write(
+ " vl_api_u8_string_fromjson(o, (u8 *)a, {});\n".format(
+ o.alias["length"]
+ )
+ )
else:
- write(' vl_api_{t}_fromjson(o, ({t} *)a);\n'
- .format(t=t.fieldtype))
+ write(" vl_api_{t}_fromjson(o, ({t} *)a);\n".format(t=t.fieldtype))
- write(' return 0;\n')
- write('}\n')
+ write(" return 0;\n")
+ write("}\n")
- _dispatch['Typedef'] = print_typedef
- _dispatch['Define'] = print_define
- _dispatch['Using'] = print_using
- _dispatch['Union'] = print_union
+ _dispatch["Typedef"] = print_typedef
+ _dispatch["Define"] = print_define
+ _dispatch["Using"] = print_using
+ _dispatch["Union"] = print_union
def generate_function(self, t):
- '''Main entry point'''
+ """Main entry point"""
write = self.stream.write
if t.manual_print:
- write('/* Manual print {} */\n'.format(t.name))
+ write("/* Manual print {} */\n".format(t.name))
return
self._dispatch[t.type](self, t)
def generate_types(self):
- '''Main entry point'''
+ """Main entry point"""
for t in self.types:
self.generate_function(t)
def generate_defines(self):
- '''Main entry point'''
+ """Main entry point"""
for t in self.defines:
self.generate_function(t)
def generate_tojson(s, modulename, stream):
- '''Generate all functions to convert from API to JSON'''
+ """Generate all functions to convert from API to JSON"""
write = stream.write
- write('/* Imported API files */\n')
- for i in s['Import']:
- f = i.filename.replace('plugins/', '')
- write('#include <{}_tojson.h>\n'.format(f))
+ write("/* Imported API files */\n")
+ for i in s["Import"]:
+ f = i.filename.replace("plugins/", "")
+ write("#include <{}_tojson.h>\n".format(f))
- pp = ToJSON(modulename, s['types'], s['Define'], s['imported']['types'],
- stream)
+ pp = ToJSON(modulename, s["types"], s["Define"], s["imported"]["types"], stream)
pp.header()
pp.generate_types()
pp.generate_defines()
pp.footer()
- return ''
+ return ""
def generate_fromjson(s, modulename, stream):
- '''Generate all functions to convert from JSON to API'''
+ """Generate all functions to convert from JSON to API"""
write = stream.write
- write('/* Imported API files */\n')
- for i in s['Import']:
- f = i.filename.replace('plugins/', '')
- write('#include <{}_fromjson.h>\n'.format(f))
+ write("/* Imported API files */\n")
+ for i in s["Import"]:
+ f = i.filename.replace("plugins/", "")
+ write("#include <{}_fromjson.h>\n".format(f))
- pp = FromJSON(modulename, s['types'], s['Define'], s['imported']['types'],
- stream)
+ pp = FromJSON(modulename, s["types"], s["Define"], s["imported"]["types"], stream)
pp.header()
pp.generate_types()
pp.generate_defines()
pp.footer()
- return ''
+ return ""
+
###############################################################################
DATESTRING = datetime.datetime.utcfromtimestamp(
- int(os.environ.get('SOURCE_DATE_EPOCH', time.time())))
-TOP_BOILERPLATE = '''\
+ int(os.environ.get("SOURCE_DATE_EPOCH", time.time()))
+)
+TOP_BOILERPLATE = """\
/*
* VLIB API definitions {datestring}
* Input file: {input_filename}
@@ -677,52 +743,65 @@ TOP_BOILERPLATE = '''\
|| defined(vl_printfun) ||defined(vl_endianfun) \\
|| defined(vl_api_version)||defined(vl_typedefs) \\
|| defined(vl_msg_name)||defined(vl_msg_name_crc_list) \\
- || defined(vl_api_version_tuple)
+ || defined(vl_api_version_tuple) || defined(vl_calcsizefun)
/* ok, something was selected */
#else
#warning no content included from {input_filename}
#endif
#define VL_API_PACKED(x) x __attribute__ ((packed))
-'''
-BOTTOM_BOILERPLATE = '''\
+/*
+ * Note: VL_API_MAX_ARRAY_SIZE is set to an arbitrarily large limit.
+ *
+ * However, any message with a ~2 billion element array is likely to break the
+ * api handling long before this limit causes array element endian issues.
+ *
+ * Applications should be written to create reasonable api messages.
+ */
+#define VL_API_MAX_ARRAY_SIZE 0x7fffffff
+
+"""
+
+BOTTOM_BOILERPLATE = """\
/****** API CRC (whole file) *****/
#ifdef vl_api_version
vl_api_version({input_filename}, {file_crc:#08x})
#endif
-'''
+"""
def msg_ids(s):
- '''Generate macro to map API message id to handler'''
- output = '''\
+ """Generate macro to map API message id to handler"""
+ output = """\
/****** Message ID / handler enum ******/
#ifdef vl_msg_id
-'''
+"""
- for t in s['Define']:
- output += "vl_msg_id(VL_API_%s, vl_api_%s_t_handler)\n" % \
- (t.name.upper(), t.name)
+ for t in s["Define"]:
+ output += "vl_msg_id(VL_API_%s, vl_api_%s_t_handler)\n" % (
+ t.name.upper(),
+ t.name,
+ )
output += "#endif"
return output
def msg_names(s):
- '''Generate calls to name mapping macro'''
- output = '''\
+ """Generate calls to name mapping macro"""
+ output = """\
/****** Message names ******/
#ifdef vl_msg_name
-'''
+"""
- for t in s['Define']:
+ for t in s["Define"]:
dont_trace = 0 if t.dont_trace else 1
output += "vl_msg_name(vl_api_%s_t, %d)\n" % (t.name, dont_trace)
output += "#endif"
@@ -731,190 +810,215 @@ def msg_names(s):
def msg_name_crc_list(s, suffix):
- '''Generate list of names to CRC mappings'''
- output = '''\
+ """Generate list of names to CRC mappings"""
+ output = """\
/****** Message name, crc list ******/
#ifdef vl_msg_name_crc_list
-'''
+"""
output += "#define foreach_vl_msg_name_crc_%s " % suffix
- for t in s['Define']:
- output += "\\\n_(VL_API_%s, %s, %08x) " % \
- (t.name.upper(), t.name, t.crc)
+ for t in s["Define"]:
+ output += "\\\n_(VL_API_%s, %s, %08x) " % (t.name.upper(), t.name, t.crc)
output += "\n#endif"
return output
def api2c(fieldtype):
- '''Map between API type names and internal VPP type names'''
- mappingtable = {'string': 'vl_api_string_t', }
+ """Map between API type names and internal VPP type names"""
+ mappingtable = {
+ "string": "vl_api_string_t",
+ }
if fieldtype in mappingtable:
return mappingtable[fieldtype]
return fieldtype
def typedefs(filename):
- '''Include in the main files to the types file'''
- output = '''\
+ """Include in the main files to the types file"""
+ output = """\
/****** Typedefs ******/
#ifdef vl_typedefs
#include "{include}.api_types.h"
#endif
-'''.format(include=filename)
+""".format(
+ include=filename
+ )
return output
-FORMAT_STRINGS = {'u8': '%u',
- 'bool': '%u',
- 'i8': '%d',
- 'u16': '%u',
- 'i16': '%d',
- 'u32': '%u',
- 'i32': '%ld',
- 'u64': '%llu',
- 'i64': '%lld',
- 'f64': '%.2f'}
+FORMAT_STRINGS = {
+ "u8": "%u",
+ "bool": "%u",
+ "i8": "%d",
+ "u16": "%u",
+ "i16": "%d",
+ "u32": "%u",
+ "i32": "%ld",
+ "u64": "%llu",
+ "i64": "%lld",
+ "f64": "%.2f",
+}
+
+class Printfun:
+ """Functions for pretty printing VPP API messages"""
-class Printfun():
- '''Functions for pretty printing VPP API messages'''
_dispatch = {}
- noprint_fields = {'_vl_msg_id': None,
- 'client_index': None,
- 'context': None}
+ noprint_fields = {"_vl_msg_id": None, "client_index": None, "context": None}
def __init__(self, stream):
self.stream = stream
@staticmethod
def print_string(o, stream):
- '''Pretty print a vl_api_string_t'''
+ """Pretty print a vl_api_string_t"""
write = stream.write
if o.modern_vla:
- write(' if (vl_api_string_len(&a->{f}) > 0) {{\n'
- .format(f=o.fieldname))
- write(' s = format(s, "\\n%U{f}: %U", '
- 'format_white_space, indent, '
- 'vl_api_format_string, (&a->{f}));\n'.format(f=o.fieldname))
- write(' } else {\n')
- write(' s = format(s, "\\n%U{f}:", '
- 'format_white_space, indent);\n'.format(f=o.fieldname))
- write(' }\n')
+ write(" if (vl_api_string_len(&a->{f}) > 0) {{\n".format(f=o.fieldname))
+ write(
+ ' s = format(s, "\\n%U{f}: %U", '
+ "format_white_space, indent, "
+ "vl_api_format_string, (&a->{f}));\n".format(f=o.fieldname)
+ )
+ write(" } else {\n")
+ write(
+ ' s = format(s, "\\n%U{f}:", '
+ "format_white_space, indent);\n".format(f=o.fieldname)
+ )
+ write(" }\n")
else:
- write(' s = format(s, "\\n%U{f}: %s", '
- 'format_white_space, indent, a->{f});\n'
- .format(f=o.fieldname))
+ write(
+ ' s = format(s, "\\n%U{f}: %s", '
+ "format_white_space, indent, a->{f});\n".format(f=o.fieldname)
+ )
def print_field(self, o, stream):
- '''Pretty print API field'''
+ """Pretty print API field"""
write = stream.write
if o.fieldname in self.noprint_fields:
return
if o.fieldtype in FORMAT_STRINGS:
f = FORMAT_STRINGS[o.fieldtype]
- write(' s = format(s, "\\n%U{n}: {f}", '
- 'format_white_space, indent, a->{n});\n'
- .format(n=o.fieldname, f=f))
+ write(
+ ' s = format(s, "\\n%U{n}: {f}", '
+ "format_white_space, indent, a->{n});\n".format(n=o.fieldname, f=f)
+ )
else:
- write(' s = format(s, "\\n%U{n}: %U", '
- 'format_white_space, indent, '
- 'format_{t}, &a->{n}, indent);\n'
- .format(n=o.fieldname, t=o.fieldtype))
+ write(
+ ' s = format(s, "\\n%U{n}: %U", '
+ "format_white_space, indent, "
+ "format_{t}, &a->{n}, indent);\n".format(n=o.fieldname, t=o.fieldtype)
+ )
- _dispatch['Field'] = print_field
+ _dispatch["Field"] = print_field
def print_array(self, o, stream):
- '''Pretty print API array'''
+ """Pretty print API array"""
write = stream.write
- forloop = '''\
+ forloop = """\
for (i = 0; i < {lfield}; i++) {{
s = format(s, "\\n%U{n}: %U",
format_white_space, indent, format_{t}, &a->{n}[i], indent);
}}
-'''
+"""
- forloop_format = '''\
+ forloop_format = """\
for (i = 0; i < {lfield}; i++) {{
s = format(s, "\\n%U{n}: {t}",
format_white_space, indent, a->{n}[i]);
}}
-'''
+"""
- if o.fieldtype == 'string':
+ if o.fieldtype == "string":
self.print_string(o, stream)
return
- if o.fieldtype == 'u8':
+ if o.fieldtype == "u8":
if o.lengthfield:
- write(' s = format(s, "\\n%U{n}: %U", format_white_space, '
- 'indent, format_hex_bytes, a->{n}, a->{lfield});\n'
- .format(n=o.fieldname, lfield=o.lengthfield))
+ write(
+ ' s = format(s, "\\n%U{n}: %U", format_white_space, '
+ "indent, format_hex_bytes, a->{n}, a->{lfield});\n".format(
+ n=o.fieldname, lfield=o.lengthfield
+ )
+ )
else:
- write(' s = format(s, "\\n%U{n}: %U", format_white_space, '
- 'indent, format_hex_bytes, a, {lfield});\n'
- .format(n=o.fieldname, lfield=o.length))
+ write(
+ ' s = format(s, "\\n%U{n}: %U", format_white_space, '
+ "indent, format_hex_bytes, a, {lfield});\n".format(
+ n=o.fieldname, lfield=o.length
+ )
+ )
return
- lfield = 'a->' + o.lengthfield if o.lengthfield else o.length
+ lfield = "a->" + o.lengthfield if o.lengthfield else o.length
if o.fieldtype in FORMAT_STRINGS:
- write(forloop_format.format(lfield=lfield,
- t=FORMAT_STRINGS[o.fieldtype],
- n=o.fieldname))
+ write(
+ forloop_format.format(
+ lfield=lfield, t=FORMAT_STRINGS[o.fieldtype], n=o.fieldname
+ )
+ )
else:
write(forloop.format(lfield=lfield, t=o.fieldtype, n=o.fieldname))
- _dispatch['Array'] = print_array
+ _dispatch["Array"] = print_array
@staticmethod
def print_alias(k, v, stream):
- '''Pretty print type alias'''
+ """Pretty print type alias"""
write = stream.write
- if ('length' in v.alias and v.alias['length'] and
- v.alias['type'] == 'u8'):
- write(' return format(s, "%U", format_hex_bytes, a, {});\n'
- .format(v.alias['length']))
- elif v.alias['type'] in FORMAT_STRINGS:
- write(' return format(s, "{}", *a);\n'
- .format(FORMAT_STRINGS[v.alias['type']]))
+ if "length" in v.alias and v.alias["length"] and v.alias["type"] == "u8":
+ write(
+ ' return format(s, "%U", format_hex_bytes, a, {});\n'.format(
+ v.alias["length"]
+ )
+ )
+ elif v.alias["type"] in FORMAT_STRINGS:
+ write(
+ ' return format(s, "{}", *a);\n'.format(
+ FORMAT_STRINGS[v.alias["type"]]
+ )
+ )
else:
- write(' return format(s, "{} (print not implemented)");\n'
- .format(k))
+ write(' return format(s, "{} (print not implemented)");\n'.format(k))
@staticmethod
def print_enum(o, stream):
- '''Pretty print API enum'''
+ """Pretty print API enum"""
write = stream.write
write(" switch(*a) {\n")
for b in o:
write(" case %s:\n" % b[1])
write(' return format(s, "{}");\n'.format(b[0]))
- write(' }\n')
+ write(" }\n")
- _dispatch['Enum'] = print_enum
- _dispatch['EnumFlag'] = print_enum
+ _dispatch["Enum"] = print_enum
+ _dispatch["EnumFlag"] = print_enum
def print_obj(self, o, stream):
- '''Entry point'''
+ """Entry point"""
write = stream.write
if o.type in self._dispatch:
self._dispatch[o.type](self, o, stream)
else:
- write(' s = format(s, "\\n{} {} {} (print not implemented");\n'
- .format(o.type, o.fieldtype, o.fieldname))
+ write(
+ ' s = format(s, "\\n{} {} {} (print not implemented");\n'.format(
+ o.type, o.fieldtype, o.fieldname
+ )
+ )
def printfun(objs, stream, modulename):
- '''Main entry point for pretty print function generation'''
+ """Main entry point for pretty print function generation"""
write = stream.write
- h = '''\
+ h = """\
/****** Print functions *****/
#ifdef vl_printfun
#ifndef included_{module}_printfun
@@ -928,15 +1032,18 @@ def printfun(objs, stream, modulename):
#define _uword_cast long
#endif
-'''
+#include "{module}.api_tojson.h"
+#include "{module}.api_fromjson.h"
+
+"""
- signature = '''\
-static inline void *vl_api_{name}_t_print (vl_api_{name}_t *a, void *handle)
+ signature = """\
+static inline u8 *vl_api_{name}_t_format (u8 *s, va_list *args)
{{
- u8 *s = 0;
+ __attribute__((unused)) vl_api_{name}_t *a = va_arg (*args, vl_api_{name}_t *);
u32 indent __attribute__((unused)) = 2;
int i __attribute__((unused));
-'''
+"""
h = h.format(module=modulename)
write(h)
@@ -944,163 +1051,189 @@ static inline void *vl_api_{name}_t_print (vl_api_{name}_t *a, void *handle)
pp = Printfun(stream)
for t in objs:
if t.manual_print:
- write("/***** manual: vl_api_%s_t_print *****/\n\n" % t.name)
+ write("/***** manual: vl_api_%s_t_format *****/\n\n" % t.name)
continue
- write(signature.format(name=t.name))
- write(' /* Message definition: vl_api_{}_t: */\n'.format(t.name))
- write(" s = format(s, \"vl_api_%s_t:\");\n" % t.name)
+ write(signature.format(name=t.name, suffix=""))
+ write(" /* Message definition: vl_api_{}_t: */\n".format(t.name))
+ write(' s = format(s, "vl_api_%s_t:");\n' % t.name)
for o in t.block:
pp.print_obj(o, stream)
- write(' vec_add1(s, 0);\n')
- write(' vl_print (handle, (char *)s);\n')
- write(' vec_free (s);\n')
- write(' return handle;\n')
- write('}\n\n')
+ write(" return s;\n")
+ write("}\n\n")
write("\n#endif")
write("\n#endif /* vl_printfun */\n")
- return ''
+ return ""
def printfun_types(objs, stream, modulename):
- '''Pretty print API types'''
+ """Pretty print API types"""
write = stream.write
pp = Printfun(stream)
- h = '''\
+ h = """\
/****** Print functions *****/
#ifdef vl_printfun
#ifndef included_{module}_printfun_types
#define included_{module}_printfun_types
-'''
+"""
h = h.format(module=modulename)
write(h)
- signature = '''\
+ signature = """\
static inline u8 *format_vl_api_{name}_t (u8 *s, va_list * args)
{{
vl_api_{name}_t *a = va_arg (*args, vl_api_{name}_t *);
u32 indent __attribute__((unused)) = va_arg (*args, u32);
int i __attribute__((unused));
indent += 2;
-'''
+"""
for t in objs:
- if t.__class__.__name__ == 'Enum' or t.__class__.__name__ == 'EnumFlag':
+ if t.__class__.__name__ == "Enum" or t.__class__.__name__ == "EnumFlag":
write(signature.format(name=t.name))
pp.print_enum(t.block, stream)
- write(' return s;\n')
- write('}\n\n')
+ write(" return s;\n")
+ write("}\n\n")
continue
if t.manual_print:
- write("/***** manual: vl_api_%s_t_print *****/\n\n" % t.name)
+ write("/***** manual: vl_api_%s_t_format *****/\n\n" % t.name)
continue
- if t.__class__.__name__ == 'Using':
+ if t.__class__.__name__ == "Using":
write(signature.format(name=t.name))
pp.print_alias(t.name, t, stream)
- write('}\n\n')
+ write("}\n\n")
continue
write(signature.format(name=t.name))
for o in t.block:
pp.print_obj(o, stream)
- write(' return s;\n')
- write('}\n\n')
+ write(" return s;\n")
+ write("}\n\n")
write("\n#endif")
write("\n#endif /* vl_printfun_types */\n")
def generate_imports(imports):
- '''Add #include matching the API import statements'''
- output = '/* Imported API files */\n'
- output += '#ifndef vl_api_version\n'
+ """Add #include matching the API import statements"""
+ output = "/* Imported API files */\n"
+ output += "#ifndef vl_api_version\n"
for i in imports:
- s = i.filename.replace('plugins/', '')
- output += '#include <{}.h>\n'.format(s)
- output += '#endif\n'
+ s = i.filename.replace("plugins/", "")
+ output += "#include <{}.h>\n".format(s)
+ output += "#endif\n"
return output
ENDIAN_STRINGS = {
- 'u16': 'clib_net_to_host_u16',
- 'u32': 'clib_net_to_host_u32',
- 'u64': 'clib_net_to_host_u64',
- 'i16': 'clib_net_to_host_i16',
- 'i32': 'clib_net_to_host_i32',
- 'i64': 'clib_net_to_host_i64',
- 'f64': 'clib_net_to_host_f64',
+ "u16": "clib_net_to_host_u16",
+ "u32": "clib_net_to_host_u32",
+ "u64": "clib_net_to_host_u64",
+ "i16": "clib_net_to_host_i16",
+ "i32": "clib_net_to_host_i32",
+ "i64": "clib_net_to_host_i64",
+ "f64": "clib_net_to_host_f64",
}
+def get_endian_string(o, type):
+ """Return proper endian string conversion function"""
+ try:
+ if o.to_network:
+ return ENDIAN_STRINGS[type].replace("net_to_host", "host_to_net")
+ except:
+ pass
+ return ENDIAN_STRINGS[type]
+
+
def endianfun_array(o):
- '''Generate endian functions for arrays'''
- forloop = '''\
+ """Generate endian functions for arrays"""
+ forloop = """\
+ {comment}
+ ASSERT((u32){length} <= (u32)VL_API_MAX_ARRAY_SIZE);
for (i = 0; i < {length}; i++) {{
a->{name}[i] = {format}(a->{name}[i]);
}}
-'''
+"""
- forloop_format = '''\
+ forloop_format = """\
for (i = 0; i < {length}; i++) {{
{type}_endian(&a->{name}[i]);
}}
-'''
-
- output = ''
- if o.fieldtype == 'u8' or o.fieldtype == 'string' or o.fieldtype == 'bool':
- output += ' /* a->{n} = a->{n} (no-op) */\n'.format(n=o.fieldname)
+"""
+
+ to_network_comment = ""
+ try:
+ if o.to_network:
+ to_network_comment = """/*
+ * Array fields processed first to handle variable length arrays and size
+ * field endian conversion in the proper order for to-network messages.
+ * Message fields have been sorted by type in the code generator, thus fields
+ * in this generated code may be converted in a different order than specified
+ * in the *.api file.
+ */"""
+ except:
+ pass
+
+ output = ""
+ if o.fieldtype == "u8" or o.fieldtype == "string" or o.fieldtype == "bool":
+ output += " /* a->{n} = a->{n} (no-op) */\n".format(n=o.fieldname)
else:
- lfield = 'a->' + o.lengthfield if o.lengthfield else o.length
+ lfield = "a->" + o.lengthfield if o.lengthfield else o.length
if o.fieldtype in ENDIAN_STRINGS:
- output += (forloop
- .format(length=lfield,
- format=ENDIAN_STRINGS[o.fieldtype],
- name=o.fieldname))
+ output += forloop.format(
+ comment=to_network_comment,
+ length=lfield,
+ format=get_endian_string(o, o.fieldtype),
+ name=o.fieldname,
+ )
else:
- output += (forloop_format
- .format(length=lfield, type=o.fieldtype,
- name=o.fieldname))
+ output += forloop_format.format(
+ length=lfield, type=o.fieldtype, name=o.fieldname
+ )
return output
-NO_ENDIAN_CONVERSION = {'client_index': None}
+NO_ENDIAN_CONVERSION = {"client_index": None}
def endianfun_obj(o):
- '''Generate endian conversion function for type'''
- output = ''
- if o.type == 'Array':
+ """Generate endian conversion function for type"""
+ output = ""
+ if o.type == "Array":
return endianfun_array(o)
- if o.type != 'Field':
- output += (' s = format(s, "\\n{} {} {} (print not implemented");\n'
- .format(o.type, o.fieldtype, o.fieldname))
+ if o.type != "Field":
+ output += ' s = format(s, "\\n{} {} {} (print not implemented");\n'.format(
+ o.type, o.fieldtype, o.fieldname
+ )
return output
if o.fieldname in NO_ENDIAN_CONVERSION:
- output += ' /* a->{n} = a->{n} (no-op) */\n'.format(n=o.fieldname)
+ output += " /* a->{n} = a->{n} (no-op) */\n".format(n=o.fieldname)
return output
if o.fieldtype in ENDIAN_STRINGS:
- output += (' a->{name} = {format}(a->{name});\n'
- .format(name=o.fieldname,
- format=ENDIAN_STRINGS[o.fieldtype]))
- elif o.fieldtype.startswith('vl_api_'):
- output += (' {type}_endian(&a->{name});\n'
- .format(type=o.fieldtype, name=o.fieldname))
+ output += " a->{name} = {format}(a->{name});\n".format(
+ name=o.fieldname, format=get_endian_string(o, o.fieldtype)
+ )
+ elif o.fieldtype.startswith("vl_api_"):
+ output += " {type}_endian(&a->{name});\n".format(
+ type=o.fieldtype, name=o.fieldname
+ )
else:
- output += ' /* a->{n} = a->{n} (no-op) */\n'.format(n=o.fieldname)
+ output += " /* a->{n} = a->{n} (no-op) */\n".format(n=o.fieldname)
return output
def endianfun(objs, modulename):
- '''Main entry point for endian function generation'''
- output = '''\
+ """Main entry point for endian function generation"""
+ output = """\
/****** Endian swap functions *****/\n\
#ifdef vl_endianfun
@@ -1108,57 +1241,76 @@ def endianfun(objs, modulename):
#define included_{module}_endianfun
#undef clib_net_to_host_uword
+#undef clib_host_to_net_uword
#ifdef LP64
#define clib_net_to_host_uword clib_net_to_host_u64
+#define clib_host_to_net_uword clib_host_to_net_u64
#else
#define clib_net_to_host_uword clib_net_to_host_u32
+#define clib_host_to_net_uword clib_host_to_net_u32
#endif
-'''
+"""
output = output.format(module=modulename)
- signature = '''\
+ signature = """\
static inline void vl_api_{name}_t_endian (vl_api_{name}_t *a)
{{
int i __attribute__((unused));
-'''
+"""
for t in objs:
- if t.__class__.__name__ == 'Enum' or t.__class__.__name__ == 'EnumFlag' :
+ # Outbound (to network) messages are identified by message nomenclature
+ # i.e. message names ending with these suffixes are 'to network'
+ if t.name.endswith("_reply") or t.name.endswith("_details"):
+ t.to_network = True
+ else:
+ t.to_network = False
+
+ if t.__class__.__name__ == "Enum" or t.__class__.__name__ == "EnumFlag":
output += signature.format(name=t.name)
if t.enumtype in ENDIAN_STRINGS:
- output += (' *a = {}(*a);\n'
- .format(ENDIAN_STRINGS[t.enumtype]))
+ output += " *a = {}(*a);\n".format(get_endian_string(t, t.enumtype))
else:
- output += (' /* a->{name} = a->{name} (no-op) */\n'
- .format(name=t.name))
+ output += " /* a->{name} = a->{name} (no-op) */\n".format(
+ name=t.name
+ )
- output += '}\n\n'
+ output += "}\n\n"
continue
if t.manual_endian:
output += "/***** manual: vl_api_%s_t_endian *****/\n\n" % t.name
continue
- if t.__class__.__name__ == 'Using':
+ if t.__class__.__name__ == "Using":
output += signature.format(name=t.name)
- if ('length' in t.alias and t.alias['length'] and
- t.alias['type'] == 'u8'):
- output += (' /* a->{name} = a->{name} (no-op) */\n'
- .format(name=t.name))
- elif t.alias['type'] in FORMAT_STRINGS:
- output += (' *a = {}(*a);\n'
- .format(ENDIAN_STRINGS[t.alias['type']]))
+ if "length" in t.alias and t.alias["length"] and t.alias["type"] == "u8":
+ output += " /* a->{name} = a->{name} (no-op) */\n".format(
+ name=t.name
+ )
+ elif t.alias["type"] in FORMAT_STRINGS:
+ output += " *a = {}(*a);\n".format(
+ get_endian_string(t, t.alias["type"])
+ )
else:
- output += ' /* Not Implemented yet {} */'.format(t.name)
- output += '}\n\n'
+ output += " /* Not Implemented yet {} */".format(t.name)
+ output += "}\n\n"
continue
output += signature.format(name=t.name)
+ # For outbound (to network) messages:
+ # some arrays have dynamic length -- iterate over
+ # them before changing endianness for the length field
+ # by making the Array types show up first
+ if t.to_network:
+ t.block.sort(key=lambda x: x.type)
+
for o in t.block:
+ o.to_network = t.to_network
output += endianfun_obj(o)
- output += '}\n\n'
+ output += "}\n\n"
output += "\n#endif"
output += "\n#endif /* vl_endianfun */\n\n"
@@ -1166,19 +1318,103 @@ static inline void vl_api_{name}_t_endian (vl_api_{name}_t *a)
return output
+def calc_size_fun(objs, modulename):
+ """Main entry point for calculate size function generation"""
+ output = """\
+
+/****** Calculate size functions *****/\n\
+#ifdef vl_calcsizefun
+#ifndef included_{module}_calcsizefun
+#define included_{module}_calcsizefun
+
+"""
+ output = output.format(module=modulename)
+
+ signature = """\
+/* calculate message size of message in network byte order */
+static inline uword vl_api_{name}_t_calc_size (vl_api_{name}_t *a)
+{{
+"""
+
+ for o in objs:
+ tname = o.__class__.__name__
+
+ output += signature.format(name=o.name)
+ output += f" return sizeof(*a)"
+ if tname == "Using":
+ if "length" in o.alias:
+ try:
+ tmp = int(o.alias["length"])
+ if tmp == 0:
+ raise (f"Unexpected length '0' for alias {o}")
+ except:
+ # output += f" + vl_api_{o.alias.name}_t_calc_size({o.name})"
+ print("culprit:")
+ print(o)
+ print(dir(o.alias))
+ print(o.alias)
+ raise
+ elif tname == "Enum" or tname == "EnumFlag":
+ pass
+ else:
+ for b in o.block:
+ if b.type == "Option":
+ continue
+ elif b.type == "Field":
+ if b.fieldtype.startswith("vl_api_"):
+ output += f" - sizeof(a->{b.fieldname})"
+ output += f" + {b.fieldtype}_calc_size(&a->{b.fieldname})"
+ elif b.type == "Array":
+ if b.lengthfield:
+ m = list(
+ filter(lambda x: x.fieldname == b.lengthfield, o.block)
+ )
+ if len(m) != 1:
+ raise Exception(
+ f"Expected 1 match for field '{b.lengthfield}', got '{m}'"
+ )
+ lf = m[0]
+ if lf.fieldtype in ENDIAN_STRINGS:
+ output += f" + {get_endian_string(b, lf.fieldtype)}(a->{b.lengthfield}) * sizeof(a->{b.fieldname}[0])"
+ elif lf.fieldtype == "u8":
+ output += (
+ f" + a->{b.lengthfield} * sizeof(a->{b.fieldname}[0])"
+ )
+ else:
+ raise Exception(
+ f"Don't know how to endian swap {lf.fieldtype}"
+ )
+ else:
+ # Fixed length strings decay to nul terminated u8
+ if b.fieldtype == "string":
+ if b.modern_vla:
+ output += f" + vl_api_string_len(&a->{b.fieldname})"
+
+ output += ";\n"
+ output += "}\n\n"
+ output += "\n#endif"
+ output += "\n#endif /* vl_calcsizefun */\n\n"
+
+ return output
+
+
def version_tuple(s, module):
- '''Generate semantic version string'''
- output = '''\
+ """Generate semantic version string"""
+ output = """\
/****** Version tuple *****/
#ifdef vl_api_version_tuple
-'''
- if 'version' in s['Option']:
- v = s['Option']['version']
- (major, minor, patch) = v.split('.')
- output += "vl_api_version_tuple(%s, %s, %s, %s)\n" % \
- (module, major, minor, patch)
+"""
+ if "version" in s["Option"]:
+ v = s["Option"]["version"]
+ (major, minor, patch) = v.split(".")
+ output += "vl_api_version_tuple(%s, %s, %s, %s)\n" % (
+ module,
+ major,
+ minor,
+ patch,
+ )
output += "\n#endif /* vl_api_version_tuple */\n\n"
@@ -1186,224 +1422,288 @@ def version_tuple(s, module):
def generate_include_enum(s, module, stream):
- '''Generate <name>.api_enum.h'''
+ """Generate <name>.api_enum.h"""
write = stream.write
- if 'Define' in s:
- write('typedef enum {\n')
- for t in s['Define']:
- write(' VL_API_{},\n'.format(t.name.upper()))
- write(' VL_MSG_{}_LAST\n'.format(module.upper()))
- write('}} vl_api_{}_enum_t;\n'.format(module))
+ if "Define" in s:
+ write("typedef enum {\n")
+ for t in s["Define"]:
+ write(" VL_API_{},\n".format(t.name.upper()))
+ write(" VL_MSG_{}_LAST\n".format(module.upper()))
+ write("}} vl_api_{}_enum_t;\n".format(module))
def generate_include_counters(s, stream):
- '''Include file for the counter data model types.'''
+ """Include file for the counter data model types."""
write = stream.write
for counters in s:
csetname = counters.name
- write('typedef enum {\n')
+ write("typedef enum {\n")
for c in counters.block:
- write(' {}_ERROR_{},\n'
- .format(csetname.upper(), c['name'].upper()))
- write(' {}_N_ERROR\n'.format(csetname.upper()))
- write('}} vl_counter_{}_enum_t;\n'.format(csetname))
+ write(" {}_ERROR_{},\n".format(csetname.upper(), c["name"].upper()))
+ write(" {}_N_ERROR\n".format(csetname.upper()))
+ write("}} vl_counter_{}_enum_t;\n".format(csetname))
- write('extern vlib_error_desc_t {}_error_counters[];\n'.format(csetname))
+ write("extern vlib_error_desc_t {}_error_counters[];\n".format(csetname))
def generate_include_types(s, module, stream):
- '''Generate separate API _types file.'''
+ """Generate separate API _types file."""
write = stream.write
- write('#ifndef included_{module}_api_types_h\n'.format(module=module))
- write('#define included_{module}_api_types_h\n'.format(module=module))
-
- if 'version' in s['Option']:
- v = s['Option']['version']
- (major, minor, patch) = v.split('.')
- write('#define VL_API_{m}_API_VERSION_MAJOR {v}\n'
- .format(m=module.upper(), v=major))
- write('#define VL_API_{m}_API_VERSION_MINOR {v}\n'
- .format(m=module.upper(), v=minor))
- write('#define VL_API_{m}_API_VERSION_PATCH {v}\n'
- .format(m=module.upper(), v=patch))
-
- if 'Import' in s:
- write('/* Imported API files */\n')
- for i in s['Import']:
- filename = i.filename.replace('plugins/', '')
- write('#include <{}_types.h>\n'.format(filename))
-
- for o in s['types'] + s['Define']:
+ write("#ifndef included_{module}_api_types_h\n".format(module=module))
+ write("#define included_{module}_api_types_h\n".format(module=module))
+
+ if "version" in s["Option"]:
+ v = s["Option"]["version"]
+ (major, minor, patch) = v.split(".")
+ write(
+ "#define VL_API_{m}_API_VERSION_MAJOR {v}\n".format(
+ m=module.upper(), v=major
+ )
+ )
+ write(
+ "#define VL_API_{m}_API_VERSION_MINOR {v}\n".format(
+ m=module.upper(), v=minor
+ )
+ )
+ write(
+ "#define VL_API_{m}_API_VERSION_PATCH {v}\n".format(
+ m=module.upper(), v=patch
+ )
+ )
+
+ if "Import" in s:
+ write("/* Imported API files */\n")
+ for i in s["Import"]:
+ filename = i.filename.replace("plugins/", "")
+ write("#include <{}_types.h>\n".format(filename))
+
+ for o in itertools.chain(s["types"], s["Define"]):
tname = o.__class__.__name__
- if tname == 'Using':
- if 'length' in o.alias:
- write('typedef %s vl_api_%s_t[%s];\n' %
- (o.alias['type'], o.name, o.alias['length']))
+ if tname == "Using":
+ if "length" in o.alias:
+ write(
+ "typedef %s vl_api_%s_t[%s];\n"
+ % (o.alias["type"], o.name, o.alias["length"])
+ )
else:
- write('typedef %s vl_api_%s_t;\n' % (o.alias['type'], o.name))
- elif tname == 'Enum' or tname == 'EnumFlag':
- if o.enumtype == 'u32':
+ write("typedef %s vl_api_%s_t;\n" % (o.alias["type"], o.name))
+ elif tname == "Enum" or tname == "EnumFlag":
+ if o.enumtype == "u32":
write("typedef enum {\n")
else:
write("typedef enum __attribute__((packed)) {\n")
for b in o.block:
write(" %s = %s,\n" % (b[0], b[1]))
- write('} vl_api_%s_t;\n' % o.name)
- if o.enumtype != 'u32':
- size1 = 'sizeof(vl_api_%s_t)' % o.name
- size2 = 'sizeof(%s)' % o.enumtype
- err_str = 'size of API enum %s is wrong' % o.name
- write('STATIC_ASSERT(%s == %s, "%s");\n'
- % (size1, size2, err_str))
+ write("} vl_api_%s_t;\n" % o.name)
+ if o.enumtype != "u32":
+ size1 = "sizeof(vl_api_%s_t)" % o.name
+ size2 = "sizeof(%s)" % o.enumtype
+ err_str = "size of API enum %s is wrong" % o.name
+ write('STATIC_ASSERT(%s == %s, "%s");\n' % (size1, size2, err_str))
else:
- if tname == 'Union':
- write("typedef union __attribute__ ((packed)) _vl_api_%s {\n"
- % o.name)
+ if tname == "Union":
+ write("typedef union __attribute__ ((packed)) _vl_api_%s {\n" % o.name)
else:
- write(("typedef struct __attribute__ ((packed)) _vl_api_%s {\n")
- % o.name)
+ write(
+ ("typedef struct __attribute__ ((packed)) _vl_api_%s {\n") % o.name
+ )
for b in o.block:
- if b.type == 'Option':
+ if b.type == "Option":
continue
- if b.type == 'Field':
- write(" %s %s;\n" % (api2c(b.fieldtype),
- b.fieldname))
- elif b.type == 'Array':
+ if b.type == "Field":
+ write(" %s %s;\n" % (api2c(b.fieldtype), b.fieldname))
+ elif b.type == "Array":
if b.lengthfield:
- write(" %s %s[0];\n" % (api2c(b.fieldtype),
- b.fieldname))
+ write(" %s %s[0];\n" % (api2c(b.fieldtype), b.fieldname))
else:
# Fixed length strings decay to nul terminated u8
- if b.fieldtype == 'string':
+ if b.fieldtype == "string":
if b.modern_vla:
- write(' {} {};\n'
- .format(api2c(b.fieldtype),
- b.fieldname))
+ write(
+ " {} {};\n".format(
+ api2c(b.fieldtype), b.fieldname
+ )
+ )
else:
- write(' u8 {}[{}];\n'
- .format(b.fieldname, b.length))
+ write(" u8 {}[{}];\n".format(b.fieldname, b.length))
else:
- write(" %s %s[%s];\n" %
- (api2c(b.fieldtype), b.fieldname,
- b.length))
+ write(
+ " %s %s[%s];\n"
+ % (api2c(b.fieldtype), b.fieldname, b.length)
+ )
else:
- raise ValueError("Error in processing type {} for {}"
- .format(b, o.name))
-
- write('} vl_api_%s_t;\n' % o.name)
-
- for t in s['Define']:
- write('#define VL_API_{ID}_CRC "{n}_{crc:08x}"\n'
- .format(n=t.name, ID=t.name.upper(), crc=t.crc))
+ raise ValueError(
+ "Error in processing type {} for {}".format(b, o.name)
+ )
+
+ write("} vl_api_%s_t;\n" % o.name)
+ write(
+ f"#define VL_API_{o.name.upper()}_IS_CONSTANT_SIZE ({0 if o.vla else 1})\n\n"
+ )
+
+ for t in s["Define"]:
+ write(
+ '#define VL_API_{ID}_CRC "{n}_{crc:08x}"\n'.format(
+ n=t.name, ID=t.name.upper(), crc=t.crc
+ )
+ )
write("\n#endif\n")
-def generate_c_boilerplate(services, defines, counters, file_crc,
- module, stream):
- '''VPP side plugin.'''
+def generate_c_boilerplate(services, defines, counters, file_crc, module, stream):
+ """VPP side plugin."""
write = stream.write
define_hash = {d.name: d for d in defines}
- hdr = '''\
+ hdr = """\
#define vl_endianfun /* define message structures */
#include "{module}.api.h"
#undef vl_endianfun
+#define vl_calcsizefun
+#include "{module}.api.h"
+#undef vl_calsizefun
+
/* instantiate all the print functions we know about */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define vl_printfun
#include "{module}.api.h"
#undef vl_printfun
-'''
+#include "{module}.api_json.h"
+"""
write(hdr.format(module=module))
- write('static u16\n')
- write('setup_message_id_table (void) {\n')
- write(' api_main_t *am = my_api_main;\n')
- write(' vl_msg_api_msg_config_t c;\n')
- write(' u16 msg_id_base = vl_msg_api_get_msg_ids ("{}_{crc:08x}", '
- 'VL_MSG_{m}_LAST);\n'
- .format(module, crc=file_crc, m=module.upper()))
+ if len(defines) > 0:
+ write("static u16\n")
+ write("setup_message_id_table (void) {\n")
+ write(" api_main_t *am = my_api_main;\n")
+ write(" vl_msg_api_msg_config_t c;\n")
+ write(
+ ' u16 msg_id_base = vl_msg_api_get_msg_ids ("{}_{crc:08x}", '
+ "VL_MSG_{m}_LAST);\n".format(module, crc=file_crc, m=module.upper())
+ )
+ write(f" vec_add1(am->json_api_repr, (u8 *)json_api_repr_{module});\n")
for d in defines:
- write(' vl_msg_api_add_msg_name_crc (am, "{n}_{crc:08x}",\n'
- ' VL_API_{ID} + msg_id_base);\n'
- .format(n=d.name, ID=d.name.upper(), crc=d.crc))
+ write(
+ ' vl_msg_api_add_msg_name_crc (am, "{n}_{crc:08x}",\n'
+ " VL_API_{ID} + msg_id_base);\n".format(
+ n=d.name, ID=d.name.upper(), crc=d.crc
+ )
+ )
for s in services:
d = define_hash[s.caller]
- write(' c = (vl_msg_api_msg_config_t) '
- ' {{.id = VL_API_{ID} + msg_id_base,\n'
- ' .name = "{n}",\n'
- ' .handler = vl_api_{n}_t_handler,\n'
- ' .cleanup = vl_noop_handler,\n'
- ' .endian = vl_api_{n}_t_endian,\n'
- ' .print = vl_api_{n}_t_print,\n'
- ' .traced = 1,\n'
- ' .replay = 1,\n'
- ' .is_autoendian = {auto}}};\n'
- .format(n=s.caller, ID=s.caller.upper(),
- auto=d.autoendian))
- write(' vl_msg_api_config (&c);\n')
+ write(
+ " c = (vl_msg_api_msg_config_t) "
+ " {{.id = VL_API_{ID} + msg_id_base,\n"
+ ' .name = "{n}",\n'
+ " .handler = vl_api_{n}_t_handler,\n"
+ " .endian = vl_api_{n}_t_endian,\n"
+ " .format_fn = vl_api_{n}_t_format,\n"
+ " .traced = 1,\n"
+ " .replay = 1,\n"
+ " .tojson = vl_api_{n}_t_tojson,\n"
+ " .fromjson = vl_api_{n}_t_fromjson,\n"
+ " .calc_size = vl_api_{n}_t_calc_size,\n"
+ " .is_autoendian = {auto}}};\n".format(
+ n=s.caller, ID=s.caller.upper(), auto=d.autoendian
+ )
+ )
+ write(" vl_msg_api_config (&c);\n")
try:
d = define_hash[s.reply]
- write(' c = (vl_msg_api_msg_config_t) '
- '{{.id = VL_API_{ID} + msg_id_base,\n'
- ' .name = "{n}",\n'
- ' .handler = 0,\n'
- ' .cleanup = vl_noop_handler,\n'
- ' .endian = vl_api_{n}_t_endian,\n'
- ' .print = vl_api_{n}_t_print,\n'
- ' .is_autoendian = {auto}}};\n'
- .format(n=s.reply, ID=s.reply.upper(),
- auto=d.autoendian))
- write(' vl_msg_api_config (&c);\n')
+ write(
+ " c = (vl_msg_api_msg_config_t) "
+ "{{.id = VL_API_{ID} + msg_id_base,\n"
+ ' .name = "{n}",\n'
+ " .handler = 0,\n"
+ " .endian = vl_api_{n}_t_endian,\n"
+ " .format_fn = vl_api_{n}_t_format,\n"
+ " .traced = 1,\n"
+ " .replay = 1,\n"
+ " .tojson = vl_api_{n}_t_tojson,\n"
+ " .fromjson = vl_api_{n}_t_fromjson,\n"
+ " .calc_size = vl_api_{n}_t_calc_size,\n"
+ " .is_autoendian = {auto}}};\n".format(
+ n=s.reply, ID=s.reply.upper(), auto=d.autoendian
+ )
+ )
+ write(" vl_msg_api_config (&c);\n")
except KeyError:
pass
- write(' return msg_id_base;\n')
- write('}\n')
+ try:
+ if s.stream:
+ d = define_hash[s.stream_message]
+ write(
+ " c = (vl_msg_api_msg_config_t) "
+ "{{.id = VL_API_{ID} + msg_id_base,\n"
+ ' .name = "{n}",\n'
+ " .handler = 0,\n"
+ " .endian = vl_api_{n}_t_endian,\n"
+ " .format_fn = vl_api_{n}_t_format,\n"
+ " .traced = 1,\n"
+ " .replay = 1,\n"
+ " .tojson = vl_api_{n}_t_tojson,\n"
+ " .fromjson = vl_api_{n}_t_fromjson,\n"
+ " .calc_size = vl_api_{n}_t_calc_size,\n"
+ " .is_autoendian = {auto}}};\n".format(
+ n=s.stream_message,
+ ID=s.stream_message.upper(),
+ auto=d.autoendian,
+ )
+ )
+ write(" vl_msg_api_config (&c);\n")
+ except KeyError:
+ pass
+ if len(defines) > 0:
+ write(" return msg_id_base;\n")
+ write("}\n")
- severity = {'error': 'VL_COUNTER_SEVERITY_ERROR',
- 'info': 'VL_COUNTER_SEVERITY_INFO',
- 'warn': 'VL_COUNTER_SEVERITY_WARN'}
+ severity = {
+ "error": "VL_COUNTER_SEVERITY_ERROR",
+ "info": "VL_COUNTER_SEVERITY_INFO",
+ "warn": "VL_COUNTER_SEVERITY_WARN",
+ }
for cnt in counters:
csetname = cnt.name
- write('vlib_error_desc_t {}_error_counters[] = {{\n'.format(csetname))
+ write("vlib_error_desc_t {}_error_counters[] = {{\n".format(csetname))
for c in cnt.block:
- write(' {\n')
- write(' .name = "{}",\n'.format(c['name']))
- write(' .desc = "{}",\n'.format(c['description']))
- write(' .severity = {},\n'.format(severity[c['severity']]))
- write(' },\n')
- write('};\n')
+ write(" {\n")
+ write(' .name = "{}",\n'.format(c["name"]))
+ write(' .desc = "{}",\n'.format(c["description"]))
+ write(" .severity = {},\n".format(severity[c["severity"]]))
+ write(" },\n")
+ write("};\n")
-def generate_c_test_boilerplate(services, defines, file_crc, module, plugin,
- stream):
- '''Generate code for legacy style VAT. To be deleted.'''
+def generate_c_test_boilerplate(services, defines, file_crc, module, plugin, stream):
+ """Generate code for legacy style VAT. To be deleted."""
write = stream.write
define_hash = {d.name: d for d in defines}
- hdr = '''\
+ hdr = """\
#define vl_endianfun /* define message structures */
#include "{module}.api.h"
#undef vl_endianfun
+#define vl_calcsizefun
+#include "{module}.api.h"
+#undef vl_calsizefun
+
/* instantiate all the print functions we know about */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define vl_printfun
#include "{module}.api.h"
#undef vl_printfun
-'''
+"""
write(hdr.format(module=module))
for s in services:
@@ -1412,109 +1712,133 @@ def generate_c_test_boilerplate(services, defines, file_crc, module, plugin,
except KeyError:
continue
if d.manual_print:
- write('/*\n'
- ' * Manual definition requested for: \n'
- ' * vl_api_{n}_t_handler()\n'
- ' */\n'
- .format(n=s.reply))
+ write(
+ "/*\n"
+ " * Manual definition requested for: \n"
+ " * vl_api_{n}_t_handler()\n"
+ " */\n".format(n=s.reply)
+ )
continue
if not define_hash[s.caller].autoreply:
- write('/* Generation not supported (vl_api_{n}_t_handler()) */\n'
- .format(n=s.reply))
+ write(
+ "/* Generation not supported (vl_api_{n}_t_handler()) */\n".format(
+ n=s.reply
+ )
+ )
continue
- write('#ifndef VL_API_{n}_T_HANDLER\n'.format(n=s.reply.upper()))
- write('static void\n')
- write('vl_api_{n}_t_handler (vl_api_{n}_t * mp) {{\n'
- .format(n=s.reply))
- write(' vat_main_t * vam = {}_test_main.vat_main;\n'.format(module))
- write(' i32 retval = ntohl(mp->retval);\n')
- write(' if (vam->async_mode) {\n')
- write(' vam->async_errors += (retval < 0);\n')
- write(' } else {\n')
- write(' vam->retval = retval;\n')
- write(' vam->result_ready = 1;\n')
- write(' }\n')
- write('}\n')
- write('#endif\n')
+ write("#ifndef VL_API_{n}_T_HANDLER\n".format(n=s.reply.upper()))
+ write("static void\n")
+ write("vl_api_{n}_t_handler (vl_api_{n}_t * mp) {{\n".format(n=s.reply))
+ write(" vat_main_t * vam = {}_test_main.vat_main;\n".format(module))
+ write(" i32 retval = ntohl(mp->retval);\n")
+ write(" if (vam->async_mode) {\n")
+ write(" vam->async_errors += (retval < 0);\n")
+ write(" } else {\n")
+ write(" vam->retval = retval;\n")
+ write(" vam->result_ready = 1;\n")
+ write(" }\n")
+ write("}\n")
+ write("#endif\n")
for e in s.events:
if define_hash[e].manual_print:
continue
- write('static void\n')
- write('vl_api_{n}_t_handler (vl_api_{n}_t * mp) {{\n'.format(n=e))
- write(' vl_print(0, "{n} event called:");\n'.format(n=e))
- write(' vl_api_{n}_t_print(mp, 0);\n'.format(n=e))
- write('}\n')
-
- write('static void\n')
- write('setup_message_id_table (vat_main_t * vam, u16 msg_id_base) {\n')
+ write("static void\n")
+ write("vl_api_{n}_t_handler (vl_api_{n}_t * mp) {{\n".format(n=e))
+ write(' vlib_cli_output(0, "{n} event called:");\n'.format(n=e))
+ write(
+ ' vlib_cli_output(0, "%U", vl_api_{n}_t_format, mp);\n'.format(n=e)
+ )
+ write("}\n")
+
+ write("static void\n")
+ write("setup_message_id_table (vat_main_t * vam, u16 msg_id_base) {\n")
for s in services:
- write(' vl_msg_api_set_handlers(VL_API_{ID} + msg_id_base, '
- ' "{n}",\n'
- ' vl_api_{n}_t_handler, '
- ' vl_noop_handler,\n'
- ' vl_api_{n}_t_endian, '
- ' vl_api_{n}_t_print,\n'
- ' sizeof(vl_api_{n}_t), 1);\n'
- .format(n=s.reply, ID=s.reply.upper()))
- write(' hash_set_mem (vam->function_by_name, "{n}", api_{n});\n'
- .format(n=s.caller))
+ write(
+ " vl_msg_api_config (&(vl_msg_api_msg_config_t){{\n"
+ " .id = VL_API_{ID} + msg_id_base,\n"
+ ' .name = "{n}",\n'
+ " .handler = vl_api_{n}_t_handler,\n"
+ " .endian = vl_api_{n}_t_endian,\n"
+ " .format_fn = vl_api_{n}_t_format,\n"
+ " .size = sizeof(vl_api_{n}_t),\n"
+ " .traced = 1,\n"
+ " .tojson = vl_api_{n}_t_tojson,\n"
+ " .fromjson = vl_api_{n}_t_fromjson,\n"
+ " .calc_size = vl_api_{n}_t_calc_size,\n"
+ " }});".format(n=s.reply, ID=s.reply.upper())
+ )
+ write(
+ ' hash_set_mem (vam->function_by_name, "{n}", api_{n});\n'.format(
+ n=s.caller
+ )
+ )
try:
- write(' hash_set_mem (vam->help_by_name, "{n}", "{help}");\n'
- .format(n=s.caller,
- help=define_hash[s.caller].options['vat_help']))
+ write(
+ ' hash_set_mem (vam->help_by_name, "{n}", "{help}");\n'.format(
+ n=s.caller, help=define_hash[s.caller].options["vat_help"]
+ )
+ )
except KeyError:
pass
# Events
for e in s.events:
- write(' vl_msg_api_set_handlers(VL_API_{ID} + msg_id_base, '
- ' "{n}",\n'
- ' vl_api_{n}_t_handler, '
- ' vl_noop_handler,\n'
- ' vl_api_{n}_t_endian, '
- ' vl_api_{n}_t_print,\n'
- ' sizeof(vl_api_{n}_t), 1);\n'
- .format(n=e, ID=e.upper()))
-
- write('}\n')
- if plugin:
- write('clib_error_t * vat_plugin_register (vat_main_t *vam)\n')
- else:
- write('clib_error_t * vat_{}_plugin_register (vat_main_t *vam)\n'
- .format(module))
- write('{\n')
- write(' {n}_test_main_t * mainp = &{n}_test_main;\n'.format(n=module))
- write(' mainp->vat_main = vam;\n')
- write(' mainp->msg_id_base = vl_client_get_first_plugin_msg_id '
- ' ("{n}_{crc:08x}");\n'
- .format(n=module, crc=file_crc))
- write(' if (mainp->msg_id_base == (u16) ~0)\n')
- write(' return clib_error_return (0, "{} plugin not loaded...");\n'
- .format(module))
- write(' setup_message_id_table (vam, mainp->msg_id_base);\n')
- write('#ifdef VL_API_LOCAL_SETUP_MESSAGE_ID_TABLE\n')
- write(' VL_API_LOCAL_SETUP_MESSAGE_ID_TABLE(vam);\n')
- write('#endif\n')
- write(' return 0;\n')
- write('}\n')
+ write(
+ " vl_msg_api_config (&(vl_msg_api_msg_config_t){{\n"
+ " .id = VL_API_{ID} + msg_id_base,\n"
+ ' .name = "{n}",\n'
+ " .handler = vl_api_{n}_t_handler,\n"
+ " .endian = vl_api_{n}_t_endian,\n"
+ " .format_fn = vl_api_{n}_t_format,\n"
+ " .size = sizeof(vl_api_{n}_t),\n"
+ " .traced = 1,\n"
+ " .tojson = vl_api_{n}_t_tojson,\n"
+ " .fromjson = vl_api_{n}_t_fromjson,\n"
+ " .calc_size = vl_api_{n}_t_calc_size,\n"
+ " }});".format(n=e, ID=e.upper())
+ )
+
+ write("}\n")
+ write("clib_error_t * vat_plugin_register (vat_main_t *vam)\n")
+ write("{\n")
+ write(" {n}_test_main_t * mainp = &{n}_test_main;\n".format(n=module))
+ write(" mainp->vat_main = vam;\n")
+ write(
+ " mainp->msg_id_base = vl_client_get_first_plugin_msg_id "
+ ' ("{n}_{crc:08x}");\n'.format(n=module, crc=file_crc)
+ )
+ write(" if (mainp->msg_id_base == (u16) ~0)\n")
+ write(
+ ' return clib_error_return (0, "{} plugin not loaded...");\n'.format(
+ module
+ )
+ )
+ write(" setup_message_id_table (vam, mainp->msg_id_base);\n")
+ write("#ifdef VL_API_LOCAL_SETUP_MESSAGE_ID_TABLE\n")
+ write(" VL_API_LOCAL_SETUP_MESSAGE_ID_TABLE(vam);\n")
+ write("#endif\n")
+ write(" return 0;\n")
+ write("}\n")
def apifunc(func):
- '''Check if a method is generated already.'''
+ """Check if a method is generated already."""
+
def _f(module, d, processed, *args):
if d.name in processed:
return None
processed[d.name] = True
return func(module, d, *args)
+
return _f
def c_test_api_service(s, dump, stream):
- '''Generate JSON code for a service.'''
+ """Generate JSON code for a service."""
write = stream.write
- req_reply_template = '''\
+ req_reply_template = """\
static cJSON *
api_{n} (cJSON *o)
{{
@@ -1530,7 +1854,7 @@ api_{n} (cJSON *o)
mp->_vl_msg_id = vac_get_msg_index(VL_API_{N}_CRC);
vl_api_{n}_t_endian(mp);
vac_write((char *)mp, len);
- free(mp);
+ cJSON_free(mp);
/* Read reply */
char *p;
@@ -1547,8 +1871,8 @@ api_{n} (cJSON *o)
return vl_api_{r}_t_tojson(rmp);
}}
-'''
- dump_details_template = '''\
+"""
+ dump_details_template = """\
static cJSON *
api_{n} (cJSON *o)
{{
@@ -1563,7 +1887,7 @@ api_{n} (cJSON *o)
mp->_vl_msg_id = msg_id;
vl_api_{n}_t_endian(mp);
vac_write((char *)mp, len);
- free(mp);
+ cJSON_free(mp);
vat2_control_ping(123); // FIX CONTEXT
cJSON *reply = cJSON_CreateArray();
@@ -1602,8 +1926,8 @@ api_{n} (cJSON *o)
return reply;
}}
-'''
- gets_details_reply_template = '''\
+"""
+ gets_details_reply_template = """\
static cJSON *
api_{n} (cJSON *o)
{{
@@ -1619,7 +1943,7 @@ api_{n} (cJSON *o)
vl_api_{n}_t_endian(mp);
vac_write((char *)mp, len);
- free(mp);
+ cJSON_free(mp);
cJSON *reply = cJSON_CreateArray();
@@ -1652,32 +1976,42 @@ api_{n} (cJSON *o)
return reply;
}}
-'''
+"""
if dump:
if s.stream_message:
- write(gets_details_reply_template
- .format(n=s.caller, r=s.reply, N=s.caller.upper(),
- R=s.reply.upper(), d=s.stream_message,
- D=s.stream_message.upper()))
+ write(
+ gets_details_reply_template.format(
+ n=s.caller,
+ r=s.reply,
+ N=s.caller.upper(),
+ R=s.reply.upper(),
+ d=s.stream_message,
+ D=s.stream_message.upper(),
+ )
+ )
else:
- write(dump_details_template.format(n=s.caller, r=s.reply,
- N=s.caller.upper(),
- R=s.reply.upper()))
+ write(
+ dump_details_template.format(
+ n=s.caller, r=s.reply, N=s.caller.upper(), R=s.reply.upper()
+ )
+ )
else:
- write(req_reply_template.format(n=s.caller, r=s.reply,
- N=s.caller.upper(),
- R=s.reply.upper()))
+ write(
+ req_reply_template.format(
+ n=s.caller, r=s.reply, N=s.caller.upper(), R=s.reply.upper()
+ )
+ )
def generate_c_test2_boilerplate(services, defines, module, stream):
- '''Generate code for VAT2 plugin.'''
+ """Generate code for VAT2 plugin."""
write = stream.write
define_hash = {d.name: d for d in defines}
# replies = {}
- hdr = '''\
+ hdr = """\
#include <vlibapi/api.h>
#include <vlibmemory/api.h>
#include <vppinfra/error.h>
@@ -1686,8 +2020,8 @@ def generate_c_test2_boilerplate(services, defines, module, stream):
#define vl_typedefs /* define message structures */
#include <vlibmemory/vl_memory_api_h.h>
-#include <vpp/api/vpe_types.api.h>
-#include <vpp/api/vpe.api.h>
+#include <vlibmemory/vlib.api_types.h>
+#include <vlibmemory/vlib.api.h>
#undef vl_typedefs
#include "{module}.api_enum.h"
@@ -1697,7 +2031,10 @@ def generate_c_test2_boilerplate(services, defines, module, stream):
#include "{module}.api.h"
#undef vl_endianfun
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_calcsizefun
+#include "{module}.api.h"
+#undef vl_calsizefun
+
#define vl_printfun
#include "{module}.api.h"
#undef vl_printfun
@@ -1708,7 +2045,7 @@ def generate_c_test2_boilerplate(services, defines, module, stream):
#include <vat2/vat2_helpers.h>
-'''
+"""
write(hdr.format(module=module))
@@ -1717,119 +2054,123 @@ def generate_c_test2_boilerplate(services, defines, module, stream):
continue
c_test_api_service(s, s.stream, stream)
- write('void vat2_register_function(char *, cJSON * (*)(cJSON *));\n')
+ write(
+ "void vat2_register_function(char *, cJSON * (*)(cJSON *), cJSON * (*)(void *), u32);\n"
+ )
# write('__attribute__((constructor))')
- write('clib_error_t *\n')
- write('vat2_register_plugin (void) {\n')
+ write("clib_error_t *\n")
+ write("vat2_register_plugin (void) {\n")
for s in services:
- write(' vat2_register_function("{n}", api_{n});\n'
- .format(n=s.caller))
- write(' return 0;\n')
- write('}\n')
+ if s.reply not in define_hash:
+ continue
+ crc = define_hash[s.caller].crc
+ write(
+ ' vat2_register_function("{n}", api_{n}, (cJSON * (*)(void *))vl_api_{n}_t_tojson, 0x{crc:08x});\n'.format(
+ n=s.caller, crc=crc
+ )
+ )
+ write(" return 0;\n")
+ write("}\n")
#
# Plugin entry point
#
-def run(args, apifilename, s):
- '''Main plugin entry point.'''
+def run(output_dir, apifilename, s):
+ """Main plugin entry point."""
stream = StringIO()
- if not args.outputdir:
- sys.stderr.write('Missing --outputdir argument')
+ if not output_dir:
+ sys.stderr.write("Missing --outputdir argument")
return None
basename = os.path.basename(apifilename)
filename, _ = os.path.splitext(basename)
- modulename = filename.replace('.', '_')
- filename_enum = os.path.join(args.outputdir + '/' + basename + '_enum.h')
- filename_types = os.path.join(args.outputdir + '/' + basename + '_types.h')
- filename_c = os.path.join(args.outputdir + '/' + basename + '.c')
- filename_c_test = os.path.join(args.outputdir + '/' + basename + '_test.c')
- filename_c_test2 = (os.path.join(args.outputdir + '/' + basename +
- '_test2.c'))
- filename_c_tojson = (os.path.join(args.outputdir +
- '/' + basename + '_tojson.h'))
- filename_c_fromjson = (os.path.join(args.outputdir + '/' +
- basename + '_fromjson.h'))
+ modulename = filename.replace(".", "_")
+ filename_enum = os.path.join(output_dir + "/" + basename + "_enum.h")
+ filename_types = os.path.join(output_dir + "/" + basename + "_types.h")
+ filename_c = os.path.join(output_dir + "/" + basename + ".c")
+ filename_c_test = os.path.join(output_dir + "/" + basename + "_test.c")
+ filename_c_test2 = os.path.join(output_dir + "/" + basename + "_test2.c")
+ filename_c_tojson = os.path.join(output_dir + "/" + basename + "_tojson.h")
+ filename_c_fromjson = os.path.join(output_dir + "/" + basename + "_fromjson.h")
# Generate separate types file
st = StringIO()
generate_include_types(s, modulename, st)
- with open(filename_types, 'w') as fd:
+ with open(filename_types, "w") as fd:
st.seek(0)
shutil.copyfileobj(st, fd)
st.close()
# Generate separate enum file
st = StringIO()
- st.write('#ifndef included_{}_api_enum_h\n'.format(modulename))
- st.write('#define included_{}_api_enum_h\n'.format(modulename))
+ st.write("#ifndef included_{}_api_enum_h\n".format(modulename))
+ st.write("#define included_{}_api_enum_h\n".format(modulename))
generate_include_enum(s, modulename, st)
- generate_include_counters(s['Counters'], st)
- st.write('#endif\n')
- with open(filename_enum, 'w') as fd:
+ generate_include_counters(s["Counters"], st)
+ st.write("#endif\n")
+ with open(filename_enum, "w") as fd:
st.seek(0)
shutil.copyfileobj(st, fd)
st.close()
# Generate separate C file
st = StringIO()
- generate_c_boilerplate(s['Service'], s['Define'], s['Counters'],
- s['file_crc'], modulename, st)
- with open(filename_c, 'w') as fd:
+ generate_c_boilerplate(
+ s["Service"], s["Define"], s["Counters"], s["file_crc"], modulename, st
+ )
+ with open(filename_c, "w") as fd:
st.seek(0)
shutil.copyfileobj(st, fd)
st.close()
# Generate separate C test file
st = StringIO()
- plugin = bool('plugin' in apifilename)
- generate_c_test_boilerplate(s['Service'], s['Define'],
- s['file_crc'],
- modulename, plugin, st)
- with open(filename_c_test, 'w') as fd:
+ plugin = bool("plugin" in apifilename)
+ generate_c_test_boilerplate(
+ s["Service"], s["Define"], s["file_crc"], modulename, plugin, st
+ )
+ with open(filename_c_test, "w") as fd:
st.seek(0)
shutil.copyfileobj(st, fd)
st.close()
# Fully autogenerated VATv2 C test file
st = StringIO()
- generate_c_test2_boilerplate(s['Service'], s['Define'],
- modulename, st)
- with open(filename_c_test2, 'w') as fd:
+ generate_c_test2_boilerplate(s["Service"], s["Define"], modulename, st)
+ with open(filename_c_test2, "w") as fd:
st.seek(0)
shutil.copyfileobj(st, fd)
- st.close() #
+ st.close() #
# Generate separate JSON file
st = StringIO()
generate_tojson(s, modulename, st)
- with open(filename_c_tojson, 'w') as fd:
+ with open(filename_c_tojson, "w") as fd:
st.seek(0)
shutil.copyfileobj(st, fd)
st.close()
st = StringIO()
generate_fromjson(s, modulename, st)
- with open(filename_c_fromjson, 'w') as fd:
+ with open(filename_c_fromjson, "w") as fd:
st.seek(0)
shutil.copyfileobj(st, fd)
st.close()
- output = TOP_BOILERPLATE.format(datestring=DATESTRING,
- input_filename=basename)
- output += generate_imports(s['Import'])
+ output = TOP_BOILERPLATE.format(datestring=DATESTRING, input_filename=basename)
+ output += generate_imports(s["Import"])
output += msg_ids(s)
output += msg_names(s)
output += msg_name_crc_list(s, filename)
output += typedefs(modulename)
- printfun_types(s['types'], stream, modulename)
- printfun(s['Define'], stream, modulename)
+ printfun_types(s["types"], stream, modulename)
+ printfun(s["Define"], stream, modulename)
output += stream.getvalue()
stream.close()
- output += endianfun(s['types'] + s['Define'], modulename)
+ output += endianfun(s["types"] + s["Define"], modulename)
+ output += calc_size_fun(s["types"] + s["Define"], modulename)
output += version_tuple(s, basename)
- output += BOTTOM_BOILERPLATE.format(input_filename=basename,
- file_crc=s['file_crc'])
+ output += BOTTOM_BOILERPLATE.format(input_filename=basename, file_crc=s["file_crc"])
return output
diff --git a/src/tools/vppapigen/vppapigen_crc.py b/src/tools/vppapigen/vppapigen_crc.py
index 791e347292e..f7e8296af3e 100644
--- a/src/tools/vppapigen/vppapigen_crc.py
+++ b/src/tools/vppapigen/vppapigen_crc.py
@@ -7,16 +7,15 @@ process_imports = True
#
# Plugin entry point
#
-def run(args, input_filename, s):
+def run(output_dir, input_filename, s):
j = {}
major = 0
minor = 0
patch = 0
- if 'version' in s['Option']:
- v = s['Option']['version']
- (major, minor, patch) = v.split('.')
- j['_version'] = {'major': major, 'minor': minor, 'patch': patch}
- for t in s['Define']:
- j[t.name] = {'crc': f'{t.crc:#08x}', 'version': major,
- 'options': t.options}
- return json.dumps(j, indent=4, separators=(',', ': '))
+ if "version" in s["Option"]:
+ v = s["Option"]["version"]
+ (major, minor, patch) = v.split(".")
+ j["_version"] = {"major": major, "minor": minor, "patch": patch}
+ for t in s["Define"]:
+ j[t.name] = {"crc": f"{t.crc:#08x}", "version": major, "options": t.options}
+ return json.dumps(j, indent=4, separators=(",", ": "))
diff --git a/src/tools/vppapigen/vppapigen_json.py b/src/tools/vppapigen/vppapigen_json.py
index 5fa839f9854..7239d1ea732 100644
--- a/src/tools/vppapigen/vppapigen_json.py
+++ b/src/tools/vppapigen/vppapigen_json.py
@@ -1,5 +1,7 @@
# JSON generation
import json
+import sys
+import os
process_imports = True
@@ -14,7 +16,7 @@ def walk_imports(s):
def walk_counters(s, pathset):
r = []
for e in s:
- r2 = {'name': e.name, 'elements': e.block}
+ r2 = {"name": e.name, "elements": e.block}
r.append(r2)
r3 = []
@@ -31,7 +33,7 @@ def walk_enums(s):
d.append(e.name)
for b in e.block:
d.append(b)
- d.append({'enumtype': e.enumtype})
+ d.append({"enumtype": e.enumtype})
r.append(d)
return r
@@ -39,13 +41,13 @@ def walk_enums(s):
def walk_services(s):
r = {}
for e in s:
- d = {'reply': e.reply}
+ d = {"reply": e.reply}
if e.stream:
- d['stream'] = True
+ d["stream"] = True
if e.stream_message:
- d['stream_msg'] = e.stream_message
+ d["stream_msg"] = e.stream_message
if e.events:
- d['events'] = e.events
+ d["events"] = e.events
r[e.caller] = d
return r
@@ -56,28 +58,29 @@ def walk_defs(s, is_message=False):
d = []
d.append(t.name)
for b in t.block:
- if b.type == 'Option':
+ if b.type == "Option":
continue
- if b.type == 'Field':
+ if b.type == "Field":
if b.limit:
d.append([b.fieldtype, b.fieldname, b.limit])
else:
d.append([b.fieldtype, b.fieldname])
- elif b.type == 'Array':
+ elif b.type == "Array":
if b.lengthfield:
- d.append([b.fieldtype, b.fieldname,
- b.length, b.lengthfield])
+ d.append([b.fieldtype, b.fieldname, b.length, b.lengthfield])
else:
d.append([b.fieldtype, b.fieldname, b.length])
- elif b.type == 'Union':
+ elif b.type == "Union":
pass
else:
raise ValueError("Error in processing array type %s" % b)
if is_message and t.crc:
c = {}
- c['crc'] = "{0:#0{1}x}".format(t.crc, 10)
- c['options'] = t.options
+ c["crc"] = "{0:#0{1}x}".format(t.crc, 10)
+ c["options"] = t.options
+ if t.comment:
+ c["comment"] = t.comment
d.append(c)
r.append(d)
@@ -87,22 +90,46 @@ def walk_defs(s, is_message=False):
#
# Plugin entry point
#
-def run(args, filename, s):
+
+
+def contents_to_c_string(contents):
+ # Escape backslashes and double quotes
+ contents = contents.replace("\\", "\\\\").replace('"', '\\"')
+ # Replace newlines with \n
+ contents = contents.replace("\n", "\\n")
+ return '"' + contents + '"'
+
+
+def run(output_dir, apifilename, s):
+ if not output_dir:
+ sys.stderr.write("Missing --outputdir argument")
+ return None
+
+ basename = os.path.basename(apifilename)
+ filename_json_repr = os.path.join(output_dir + "/" + basename + "_json.h")
+ filename, _ = os.path.splitext(basename)
+ modulename = filename.replace(".", "_")
+
j = {}
- j['types'] = (walk_defs([o for o in s['types']
- if o.__class__.__name__ == 'Typedef']))
- j['messages'] = walk_defs(s['Define'], True)
- j['unions'] = (walk_defs([o for o in s['types']
- if o.__class__.__name__ == 'Union']))
- j['enums'] = (walk_enums([o for o in s['types']
- if o.__class__.__name__ == 'Enum']))
- j['enumflags'] = (walk_enums([o for o in s['types']
- if o.__class__.__name__ == 'EnumFlag']))
- j['services'] = walk_services(s['Service'])
- j['options'] = s['Option']
- j['aliases'] = {o.name:o.alias for o in s['types'] if o.__class__.__name__ == 'Using'}
- j['vl_api_version'] = hex(s['file_crc'])
- j['imports'] = walk_imports(i for i in s['Import'])
- j['counters'], j['paths'] = walk_counters(s['Counters'], s['Paths'])
- return json.dumps(j, indent=4, separators=(',', ': '))
+ j["types"] = walk_defs([o for o in s["types"] if o.__class__.__name__ == "Typedef"])
+ j["messages"] = walk_defs(s["Define"], True)
+ j["unions"] = walk_defs([o for o in s["types"] if o.__class__.__name__ == "Union"])
+ j["enums"] = walk_enums([o for o in s["types"] if o.__class__.__name__ == "Enum"])
+ j["enumflags"] = walk_enums(
+ [o for o in s["types"] if o.__class__.__name__ == "EnumFlag"]
+ )
+ j["services"] = walk_services(s["Service"])
+ j["options"] = s["Option"]
+ j["aliases"] = {
+ o.name: o.alias for o in s["types"] if o.__class__.__name__ == "Using"
+ }
+ j["vl_api_version"] = hex(s["file_crc"])
+ j["imports"] = walk_imports(i for i in s["Import"])
+ j["counters"], j["paths"] = walk_counters(s["Counters"], s["Paths"])
+ r = json.dumps(j, indent=4, separators=(",", ": "))
+ c_string = contents_to_c_string(r)
+ with open(filename_json_repr, "w", encoding="UTF-8") as f:
+ print(f"const char *json_api_repr_{modulename} = {c_string};", file=f)
+ # return json.dumps(j, indent=4, separators=(",", ": "))
+ return r
diff --git a/src/tools/vppapitrace/vppapitrace b/src/tools/vppapitrace/vppapitrace
deleted file mode 120000
index d0ece85a809..00000000000
--- a/src/tools/vppapitrace/vppapitrace
+++ /dev/null
@@ -1 +0,0 @@
-vppapitrace.py \ No newline at end of file
diff --git a/src/tools/vppapitrace/vppapitrace.py b/src/tools/vppapitrace/vppapitrace.py
deleted file mode 100755
index 8089b3a2236..00000000000
--- a/src/tools/vppapitrace/vppapitrace.py
+++ /dev/null
@@ -1,492 +0,0 @@
-#!/usr/bin/env python3
-
-#
-# Copyright (c) 2019 Cisco and/or its affiliates.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at:
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-#
-# Convert from VPP API trace to JSON.
-
-import argparse
-import struct
-import sys
-import logging
-import json
-from ipaddress import *
-from collections import namedtuple
-from vpp_papi import MACAddress, VPPApiJSONFiles
-import base64
-import os
-import textwrap
-
-def serialize_likely_small_unsigned_integer(x):
- r = x
-
- # Low bit set means it fits into 1 byte.
- if r < (1 << 7):
- return struct.pack("B", 1 + 2 * r)
-
- # Low 2 bits 1 0 means it fits into 2 bytes.
- r -= (1 << 7)
- if r < (1 << 14):
- return struct.pack("<H", 4 * r + 2)
-
- r -= (1 << 14)
- if r < (1 << 29):
- return struct.pack("<I", 8 * r + 4)
-
- return struct.pack("<BQ", 0, x)
-
-
-def unserialize_likely_small_unsigned_integer(data, offset):
- y = struct.unpack_from("B", data, offset)[0]
- if y & 1:
- return y // 2, 1
- r = 1 << 7
- if y & 2:
- p = struct.unpack_from("B", data, offset + 1)[0]
- r += (y // 4) + (p << 6)
- return r, 2
- r += 1 << 14
- if y & 4:
- (p1, p2, p3) = struct.unpack_from("BBB", data, offset+1)
- r += ((y // 8) + (p1 << (5 + 8 * 0))
- + (p2 << (5 + 8 * 1)) + (p3 << (5 + 8 * 2)))
- return r, 3
- return struct.unpack_from(">Q", data, offset+1)[0], 8
-
-
-def serialize_cstring(s):
- bstring = s.encode('utf8')
- l = len(bstring)
- b = serialize_likely_small_unsigned_integer(l)
- b += struct.pack('{}s'.format(l), bstring)
- return b
-
-
-def unserialize_cstring(data, offset):
- l, size = unserialize_likely_small_unsigned_integer(data, offset)
- name = struct.unpack_from('{}s'.format(l), data, offset+size)[0]
- return name.decode('utf8'), size + len(name)
-
-
-def unserialize_msgtbl(data, offset):
- msgtable_by_id = {}
- msgtable_by_name = {}
- i = 0
- nmsg = struct.unpack_from(">I", data, offset)[0]
- o = 4
- while i < nmsg:
- (msgid, size) = unserialize_likely_small_unsigned_integer(
- data, offset + o)
- o += size
- (name, size) = unserialize_cstring(data, offset + o)
- o += size
- msgtable_by_id[msgid] = name
- msgtable_by_name[name] = msgid
-
- i += 1
- return msgtable_by_id, msgtable_by_name, o
-
-
-def serialize_msgtbl(messages):
- offset = 0
- # XXX 100K?
- data = bytearray(100000)
- nmsg = len(messages)
- data = struct.pack(">I", nmsg)
-
- for k, v in messages.items():
- name = k + '_' + v.crc[2:]
- data += serialize_likely_small_unsigned_integer(v._vl_msg_id)
- data += serialize_cstring(name)
- return data
-
-
-def apitrace2json(messages, filename):
- result = []
- with open(filename, 'rb') as file:
- bytes_read = file.read()
- # Read header
- (nitems, msgtbl_size, wrapped) = struct.unpack_from(">IIB",
- bytes_read, 0)
- logging.debug('nitems: {} message table size: {} wrapped: {}'
- .format(nitems, msgtbl_size, wrapped))
- if wrapped:
- sys.stdout.write('Wrapped/incomplete trace, results may vary')
- offset = 9
-
- msgtbl_by_id, msgtbl_by_name, size = unserialize_msgtbl(bytes_read,
- offset)
- offset += size
-
- i = 0
- while i < nitems:
- size = struct.unpack_from(">I", bytes_read, offset)[0]
- offset += 4
- if size == 0:
- break
- msgid = struct.unpack_from(">H", bytes_read, offset)[0]
- name = msgtbl_by_id[msgid]
- n = name[:name.rfind("_")]
- msgobj = messages[n]
- if n + '_' + msgobj.crc[2:] != name:
- sys.exit("CRC Mismatch between JSON API definition "
- "and trace. {}".format(name))
-
- x, s = msgobj.unpack(bytes_read[offset:offset+size])
- msgname = type(x).__name__
- offset += size
- # Replace named tuple illegal _0
- y = x._asdict()
- y.pop('_0')
- result.append({'name': msgname, 'args': y})
- i += 1
-
- file.close()
- return result
-
-
-def json2apitrace(messages, filename):
- """Input JSON file and API message definition. Output API trace
- bytestring."""
-
- msgs = []
- with open(filename, 'r') as file:
- msgs = json.load(file, object_hook=vpp_decode)
- result = b''
- for m in msgs:
- name = m['name']
- msgobj = messages[name]
- m['args']['_vl_msg_id'] = messages[name]._vl_msg_id
- b = msgobj.pack(m['args'])
-
- result += struct.pack('>I', len(b))
- result += b
- return len(msgs), result
-
-
-class VPPEncoder(json.JSONEncoder):
- def default(self, o):
- if type(o) is bytes:
- return "base64:" + base64.b64encode(o).decode('utf-8')
- # Let the base class default method raise the TypeError
- return json.JSONEncoder.default(self, o)
-
- def encode(self, obj):
- def hint_tuples(item):
- if isinstance(item, tuple):
- return hint_tuples(item._asdict())
- if isinstance(item, list):
- return [hint_tuples(e) for e in item]
- if isinstance(item, dict):
- return {key: hint_tuples(value) for key, value in item.items()}
- else:
- return item
-
- return super(VPPEncoder, self).encode(hint_tuples(obj))
-
-
-def vpp_decode(obj):
- for k, v in obj.items():
- if type(v) is str and v.startswith('base64:'):
- s = v.lstrip('base64:')
- obj[k] = base64.b64decode(v[7:])
- return obj
-
-
-def vpp_encoder(obj):
- if isinstance(obj, IPv6Network):
- return str(obj)
- if isinstance(obj, IPv4Network):
- return str(obj)
- if isinstance(obj, IPv6Address):
- return str(obj)
- if isinstance(obj, IPv4Address):
- return str(obj)
- if isinstance(obj, MACAddress):
- return str(obj)
- if type(obj) is bytes:
- return "base64:" + base64.b64encode(obj).decode('ascii')
- raise TypeError('Unknown object {} {}\n'.format(type(obj), obj))
-
-message_filter = {
- 'control_ping',
- 'memclnt_create',
- 'memclnt_delete',
- 'get_first_msg_id',
-}
-
-argument_filter = {
- 'client_index',
- 'context',
-}
-
-def topython(messages, services):
- import pprint
- pp = pprint.PrettyPrinter()
-
- s = '''\
-#!/usr/bin/env python3
-from vpp_papi import VPP, VppEnum
-vpp = VPP(use_socket=True)
-vpp.connect(name='vppapitrace')
-'''
-
- for m in messages:
- if m['name'] not in services:
- s += '# ignoring reply message: {}\n'.format(m['name'])
- continue
- if m['name'] in message_filter:
- s += '# ignoring message {}\n'.format(m['name'])
- continue
- for k in argument_filter:
- try:
- m['args'].pop(k)
- except KeyError:
- pass
- a = pp.pformat(m['args'])
- s += 'rv = vpp.api.{}(**{})\n'.format(m['name'], a)
- s += 'print("RV:", rv)\n'
- s += 'vpp.disconnect()\n'
-
- return s
-
-def todump_items(k, v, level):
- klen = len(k) if k else 0
- spaces = ' ' * level + ' ' * (klen + 3)
- wrapper = textwrap.TextWrapper(initial_indent="", subsequent_indent=spaces, width=60)
- s = ''
- if type(v) is dict:
- if k:
- s += ' ' * level + '{}:\n'.format(k)
- for k2, v2 in v.items():
- s += todump_items(k2, v2, level + 1)
- return s
-
- if type(v) is list:
- for v2 in v:
- s += '{}'.format(todump_items(k, v2, level))
- return s
-
- if type(v) is bytes:
- w = wrapper.fill(bytes.hex(v))
- s += ' ' * level + '{}: {}\n'.format(k, w)
- else:
- if type(v) is str:
- v = wrapper.fill(v)
- s += ' ' * level + '{}: {}\n'.format(k, v)
- return s
-
-
-def todump(messages, services):
- import pprint
- pp = pprint.PrettyPrinter()
-
- s = ''
- for m in messages:
- if m['name'] not in services:
- s += '# ignoring reply message: {}\n'.format(m['name'])
- continue
- #if m['name'] in message_filter:
- # s += '# ignoring message {}\n'.format(m['name'])
- # continue
- for k in argument_filter:
- try:
- m['args'].pop(k)
- except KeyError:
- pass
- a = pp.pformat(m['args'])
- s += '{}:\n'.format(m['name'])
- s += todump_items(None, m['args'], 0)
- return s
-
-
-def init_api(apidir):
- # Read API definitions
- apifiles = VPPApiJSONFiles.find_api_files(api_dir=apidir)
- messages = {}
- services = {}
- for file in apifiles:
- with open(file) as apidef_file:
- m, s = VPPApiJSONFiles.process_json_file(apidef_file)
- messages.update(m)
- services.update(s)
- return messages, services
-
-
-def replaymsgs(vpp, msgs):
- for m in msgs:
- name = m['name']
- if name not in vpp.services:
- continue
- if name == 'control_ping':
- continue
- try:
- m['args'].pop('client_index')
- except KeyError:
- pass
- if m['args']['context'] == 0:
- m['args']['context'] = 1
- f = vpp.get_function(name)
- rv = f(**m['args'])
- print('RV {}'.format(rv))
-
-
-def replay(args):
- """Replay into running VPP instance"""
-
- from vpp_papi import VPP
-
- JSON = 1
- APITRACE = 2
-
- filename, file_extension = os.path.splitext(args.input)
- input_type = JSON if file_extension == '.json' else APITRACE
-
- vpp = VPP(use_socket=args.socket)
- rv = vpp.connect(name='vppapireplay', chroot_prefix=args.shmprefix)
- if rv != 0:
- sys.exit('Cannot connect to VPP')
-
- if input_type == JSON:
- with open(args.input, 'r') as file:
- msgs = json.load(file, object_hook=vpp_decode)
- else:
- msgs = apitrace2json(messages, args.input)
-
- replaymsgs(vpp, msgs)
-
- vpp.disconnect()
-
-
-def generate(args):
- """Generate JSON"""
-
- JSON = 1
- APITRACE = 2
- PYTHON = 3
- DUMP = 4
-
- filename, file_extension = os.path.splitext(args.input)
- input_type = JSON if file_extension == '.json' else APITRACE
- filename, file_extension = os.path.splitext(args.output)
-
- if args.todump:
- output_type = DUMP
- else:
- if file_extension == '.json' or filename == '-':
- output_type = JSON
- elif file_extension == '.py':
- output_type = PYTHON
- else:
- output_type = APITRACE
-
- if input_type == output_type:
- sys.exit("error: Nothing to convert between")
-
- if input_type != JSON and output_type == APITRACE:
- sys.exit("error: Input file must be JSON file: {}".format(args.input))
-
- messages, services = init_api(args.apidir)
-
- if input_type == JSON and output_type == APITRACE:
- i = 0
- for k, v in messages.items():
- v._vl_msg_id = i
- i += 1
-
- n, result = json2apitrace(messages, args.input)
- msgtbl = serialize_msgtbl(messages)
-
- print('API messages: {}'.format(n))
- header = struct.pack(">IIB", n, len(msgtbl), 0)
-
- with open(args.output, 'wb') as outfile:
- outfile.write(header)
- outfile.write(msgtbl)
- outfile.write(result)
-
- return
-
- if input_type == APITRACE:
- result = apitrace2json(messages, args.input)
- if output_type == PYTHON:
- s = json.dumps(result, cls=VPPEncoder, default=vpp_encoder)
- x = json.loads(s, object_hook=vpp_decode)
- s = topython(x, services)
- elif output_type == DUMP:
- s = json.dumps(result, cls=VPPEncoder, default=vpp_encoder)
- x = json.loads(s, object_hook=vpp_decode)
- s = todump(x, services)
- else:
- s = json.dumps(result, cls=VPPEncoder,
- default=vpp_encoder, indent=4 * ' ')
- elif output_type == PYTHON:
- with open(args.input, 'r') as file:
- x = json.load(file, object_hook=vpp_decode)
- s = topython(x, services)
- else:
- sys.exit('Input file must be API trace file: {}'.format(args.input))
-
- if args.output == '-':
- sys.stdout.write(s + '\n')
- else:
- print('Generating {} from API trace: {}'
- .format(args.output, args.input))
- with open(args.output, 'w') as outfile:
- outfile.write(s)
-
-def general(args):
- return
-
-def main():
- parser = argparse.ArgumentParser()
- parser.add_argument('--debug', action='store_true',
- help='enable debug mode')
- parser.add_argument('--apidir',
- help='Location of JSON API definitions')
-
- parser.set_defaults(func=general)
- subparsers = parser.add_subparsers(title='subcommands',
- description='valid subcommands',
- help='additional help')
-
- parser_convert = subparsers.add_parser('convert',
- help='Convert API trace to JSON or Python and back')
- parser_convert.add_argument('input',
- help='Input file (API trace | JSON)')
- parser_convert.add_argument('--todump', action='store_true', help='Output text format')
- parser_convert.add_argument('output',
- help='Output file (Python | JSON | API trace)')
- parser_convert.set_defaults(func=generate)
-
-
- parser_replay = subparsers.add_parser('replay',
- help='Replay messages to running VPP instance')
- parser_replay.add_argument('input', help='Input file (API trace | JSON)')
- parser_replay.add_argument('--socket', action='store_true',
- help='use default socket to connect to VPP')
- parser_replay.add_argument('--shmprefix',
- help='connect to VPP on shared memory prefix')
- parser_replay.set_defaults(func=replay)
-
- args = parser.parse_args()
- if args.debug:
- logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
-
- args.func(args)
-
-
-main()
diff --git a/src/vat/CMakeLists.txt b/src/vat/CMakeLists.txt
index ee53386f114..e5945b20dec 100644
--- a/src/vat/CMakeLists.txt
+++ b/src/vat/CMakeLists.txt
@@ -32,7 +32,6 @@ add_vpp_executable(vpp_api_test ENABLE_EXPORTS
ip_types_api.c
ip_types.c
protocols.def
- ../vnet/arp/arp_test.c
DEPENDS api_headers
@@ -42,7 +41,7 @@ add_vpp_executable(vpp_api_test ENABLE_EXPORTS
vatplugin
vppinfra
Threads::Threads
- rt m dl crypto
+ dl
)
##############################################################################
@@ -58,7 +57,7 @@ add_vpp_executable(vpp_json_test ENABLE_EXPORTS NO_INSTALL
##############################################################################
install(
FILES vat.h json_format.h
- DESTINATION include/vat
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vat
COMPONENT vpp-dev
)
diff --git a/src/vat/api_format.c b/src/vat/api_format.c
index bb168f8459e..45ba025f191 100644
--- a/src/vat/api_format.c
+++ b/src/vat/api_format.c
@@ -29,7 +29,6 @@
#include <vnet/l2/l2_input.h>
#include <vnet/udp/udp_local.h>
-#include <vpp/api/vpe_msg_enum.h>
#include <vnet/l2/l2_classify.h>
#include <vnet/l2/l2_vtr.h>
#include <vnet/classify/in_out_acl.h>
@@ -56,24 +55,22 @@
#include <inttypes.h>
#include <sys/stat.h>
-#define vl_typedefs /* define message structures */
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_typedefs
-
-/* declare message handlers for each api */
+#include <vlibmemory/memclnt.api_enum.h>
+#include <vlibmemory/memclnt.api_types.h>
+#include <vlibmemory/memclnt.api_tojson.h>
+#include <vlibmemory/memclnt.api_fromjson.h>
#define vl_endianfun /* define message structures */
-#include <vpp/api/vpe_all_api_h.h>
+#include <vlibmemory/memclnt.api.h>
#undef vl_endianfun
+#define vl_calcsizefun
+#include <vlibmemory/memclnt.api.h>
+#undef vl_calcsizefun
+
/* instantiate all the print functions we know about */
-#if VPP_API_TEST_BUILTIN == 0
-#define vl_print(handle, ...)
-#else
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#endif
#define vl_printfun
-#include <vpp/api/vpe_all_api_h.h>
+#include <vlibmemory/memclnt.api.h>
#undef vl_printfun
#define __plugin_msg_base 0
@@ -174,23 +171,6 @@ errmsg (char *fmt, ...)
}
#if VPP_API_TEST_BUILTIN == 0
-static uword
-api_unformat_sw_if_index (unformat_input_t * input, va_list * args)
-{
- vat_main_t *vam = va_arg (*args, vat_main_t *);
- u32 *result = va_arg (*args, u32 *);
- u8 *if_name;
- uword *p;
-
- if (!unformat (input, "%s", &if_name))
- return 0;
-
- p = hash_get_mem (vam->sw_if_index_by_interface_name, if_name);
- if (p == 0)
- return 0;
- *result = p[0];
- return 1;
-}
/* Parse an IP4 address %d.%d.%d.%d. */
uword
@@ -579,155 +559,12 @@ ip_set (ip46_address_t * dst, void *src, u8 is_ip4)
}
-static void
-vl_api_cli_reply_t_handler (vl_api_cli_reply_t * mp)
-{
- vat_main_t *vam = &vat_main;
- i32 retval = ntohl (mp->retval);
-
- vam->retval = retval;
- vam->shmem_result = uword_to_pointer (mp->reply_in_shmem, u8 *);
- vam->result_ready = 1;
-}
-
-static void
-vl_api_cli_reply_t_handler_json (vl_api_cli_reply_t * mp)
-{
- vat_main_t *vam = &vat_main;
- vat_json_node_t node;
- void *oldheap;
- u8 *reply;
-
- vat_json_init_object (&node);
- vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
- vat_json_object_add_uint (&node, "reply_in_shmem",
- ntohl (mp->reply_in_shmem));
- /* Toss the shared-memory original... */
- oldheap = vl_msg_push_heap ();
-
- reply = uword_to_pointer (mp->reply_in_shmem, u8 *);
- vec_free (reply);
-
- vl_msg_pop_heap (oldheap);
-
- vat_json_print (vam->ofp, &node);
- vat_json_free (&node);
-
- vam->retval = ntohl (mp->retval);
- vam->result_ready = 1;
-}
-
-static void
-vl_api_cli_inband_reply_t_handler (vl_api_cli_inband_reply_t * mp)
-{
- vat_main_t *vam = &vat_main;
- i32 retval = ntohl (mp->retval);
-
- vec_reset_length (vam->cmd_reply);
-
- vam->retval = retval;
- if (retval == 0)
- vam->cmd_reply = vl_api_from_api_to_new_vec (mp, &mp->reply);
- vam->result_ready = 1;
-}
-
-static void
-vl_api_cli_inband_reply_t_handler_json (vl_api_cli_inband_reply_t * mp)
-{
- vat_main_t *vam = &vat_main;
- vat_json_node_t node;
- u8 *reply = 0; /* reply vector */
-
- reply = vl_api_from_api_to_new_vec (mp, &mp->reply);
- vec_reset_length (vam->cmd_reply);
-
- vat_json_init_object (&node);
- vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
- vat_json_object_add_string_copy (&node, "reply", reply);
-
- vat_json_print (vam->ofp, &node);
- vat_json_free (&node);
- vec_free (reply);
-
- vam->retval = ntohl (mp->retval);
- vam->result_ready = 1;
-}
-
-static void vl_api_get_node_index_reply_t_handler
- (vl_api_get_node_index_reply_t * mp)
-{
- vat_main_t *vam = &vat_main;
- i32 retval = ntohl (mp->retval);
- if (vam->async_mode)
- {
- vam->async_errors += (retval < 0);
- }
- else
- {
- vam->retval = retval;
- if (retval == 0)
- errmsg ("node index %d", ntohl (mp->node_index));
- vam->result_ready = 1;
- }
-}
-
-static void vl_api_get_node_index_reply_t_handler_json
- (vl_api_get_node_index_reply_t * mp)
-{
- vat_main_t *vam = &vat_main;
- vat_json_node_t node;
-
- vat_json_init_object (&node);
- vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
- vat_json_object_add_uint (&node, "node_index", ntohl (mp->node_index));
-
- vat_json_print (vam->ofp, &node);
- vat_json_free (&node);
-
- vam->retval = ntohl (mp->retval);
- vam->result_ready = 1;
-}
-
-static void vl_api_get_next_index_reply_t_handler
- (vl_api_get_next_index_reply_t * mp)
+static void vl_api_get_first_msg_id_reply_t_handler
+ (vl_api_get_first_msg_id_reply_t * mp)
{
vat_main_t *vam = &vat_main;
i32 retval = ntohl (mp->retval);
- if (vam->async_mode)
- {
- vam->async_errors += (retval < 0);
- }
- else
- {
- vam->retval = retval;
- if (retval == 0)
- errmsg ("next node index %d", ntohl (mp->next_index));
- vam->result_ready = 1;
- }
-}
-
-static void vl_api_get_next_index_reply_t_handler_json
- (vl_api_get_next_index_reply_t * mp)
-{
- vat_main_t *vam = &vat_main;
- vat_json_node_t node;
-
- vat_json_init_object (&node);
- vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
- vat_json_object_add_uint (&node, "next_index", ntohl (mp->next_index));
-
- vat_json_print (vam->ofp, &node);
- vat_json_free (&node);
- vam->retval = ntohl (mp->retval);
- vam->result_ready = 1;
-}
-
-static void vl_api_add_node_next_reply_t_handler
- (vl_api_add_node_next_reply_t * mp)
-{
- vat_main_t *vam = &vat_main;
- i32 retval = ntohl (mp->retval);
if (vam->async_mode)
{
vam->async_errors += (retval < 0);
@@ -735,59 +572,24 @@ static void vl_api_add_node_next_reply_t_handler
else
{
vam->retval = retval;
- if (retval == 0)
- errmsg ("next index %d", ntohl (mp->next_index));
vam->result_ready = 1;
}
-}
-
-static void vl_api_add_node_next_reply_t_handler_json
- (vl_api_add_node_next_reply_t * mp)
-{
- vat_main_t *vam = &vat_main;
- vat_json_node_t node;
-
- vat_json_init_object (&node);
- vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
- vat_json_object_add_uint (&node, "next_index", ntohl (mp->next_index));
-
- vat_json_print (vam->ofp, &node);
- vat_json_free (&node);
-
- vam->retval = ntohl (mp->retval);
- vam->result_ready = 1;
-}
-
-static void vl_api_show_version_reply_t_handler
- (vl_api_show_version_reply_t * mp)
-{
- vat_main_t *vam = &vat_main;
- i32 retval = ntohl (mp->retval);
-
if (retval >= 0)
{
- errmsg (" program: %s", mp->program);
- errmsg (" version: %s", mp->version);
- errmsg (" build date: %s", mp->build_date);
- errmsg ("build directory: %s", mp->build_directory);
+ errmsg ("first message id %d", ntohs (mp->first_msg_id));
}
- vam->retval = retval;
- vam->result_ready = 1;
}
-static void vl_api_show_version_reply_t_handler_json
- (vl_api_show_version_reply_t * mp)
+static void vl_api_get_first_msg_id_reply_t_handler_json
+ (vl_api_get_first_msg_id_reply_t * mp)
{
vat_main_t *vam = &vat_main;
vat_json_node_t node;
vat_json_init_object (&node);
vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
- vat_json_object_add_string_copy (&node, "program", mp->program);
- vat_json_object_add_string_copy (&node, "version", mp->version);
- vat_json_object_add_string_copy (&node, "build_date", mp->build_date);
- vat_json_object_add_string_copy (&node, "build_directory",
- mp->build_directory);
+ vat_json_object_add_uint (&node, "first_msg_id",
+ (uint) ntohs (mp->first_msg_id));
vat_json_print (vam->ofp, &node);
vat_json_free (&node);
@@ -796,85 +598,35 @@ static void vl_api_show_version_reply_t_handler_json
vam->result_ready = 1;
}
-static void vl_api_show_threads_reply_t_handler
- (vl_api_show_threads_reply_t * mp)
-{
- vat_main_t *vam = &vat_main;
- i32 retval = ntohl (mp->retval);
- int i, count = 0;
-
- if (retval >= 0)
- count = ntohl (mp->count);
-
- for (i = 0; i < count; i++)
- print (vam->ofp,
- "\n%-2d %-11s %-11s %-5d %-6d %-4d %-6d",
- ntohl (mp->thread_data[i].id), mp->thread_data[i].name,
- mp->thread_data[i].type, ntohl (mp->thread_data[i].pid),
- ntohl (mp->thread_data[i].cpu_id), ntohl (mp->thread_data[i].core),
- ntohl (mp->thread_data[i].cpu_socket));
-
- vam->retval = retval;
- vam->result_ready = 1;
-}
-
-static void vl_api_show_threads_reply_t_handler_json
- (vl_api_show_threads_reply_t * mp)
+/* Format hex dump. */
+u8 *
+format_hex_bytes (u8 * s, va_list * va)
{
- vat_main_t *vam = &vat_main;
- vat_json_node_t node;
- vl_api_thread_data_t *td;
- i32 retval = ntohl (mp->retval);
- int i, count = 0;
+ u8 *bytes = va_arg (*va, u8 *);
+ int n_bytes = va_arg (*va, int);
+ uword i;
- if (retval >= 0)
- count = ntohl (mp->count);
+ /* Print short or long form depending on byte count. */
+ uword short_form = n_bytes <= 32;
+ u32 indent = format_get_indent (s);
- vat_json_init_object (&node);
- vat_json_object_add_int (&node, "retval", retval);
- vat_json_object_add_uint (&node, "count", count);
+ if (n_bytes == 0)
+ return s;
- for (i = 0; i < count; i++)
+ for (i = 0; i < n_bytes; i++)
{
- td = &mp->thread_data[i];
- vat_json_object_add_uint (&node, "id", ntohl (td->id));
- vat_json_object_add_string_copy (&node, "name", td->name);
- vat_json_object_add_string_copy (&node, "type", td->type);
- vat_json_object_add_uint (&node, "pid", ntohl (td->pid));
- vat_json_object_add_int (&node, "cpu_id", ntohl (td->cpu_id));
- vat_json_object_add_int (&node, "core", ntohl (td->id));
- vat_json_object_add_int (&node, "cpu_socket", ntohl (td->cpu_socket));
+ if (!short_form && (i % 32) == 0)
+ s = format (s, "%08x: ", i);
+ s = format (s, "%02x", bytes[i]);
+ if (!short_form && ((i + 1) % 32) == 0 && (i + 1) < n_bytes)
+ s = format (s, "\n%U", format_white_space, indent);
}
- vat_json_print (vam->ofp, &node);
- vat_json_free (&node);
-
- vam->retval = retval;
- vam->result_ready = 1;
-}
-
-static int
-api_show_threads (vat_main_t * vam)
-{
- vl_api_show_threads_t *mp;
- int ret;
-
- print (vam->ofp,
- "\n%-2s %-11s %-11s %-5s %-6s %-4s %-6s",
- "ID", "Name", "Type", "LWP", "cpu_id", "Core", "Socket");
-
- M (SHOW_THREADS, mp);
-
- S (mp);
- W (ret);
- return ret;
+ return s;
}
-#define vl_api_bridge_domain_details_t_endian vl_noop_handler
-#define vl_api_bridge_domain_details_t_print vl_noop_handler
-
-static void vl_api_control_ping_reply_t_handler
- (vl_api_control_ping_reply_t * mp)
+static void
+vl_api_control_ping_reply_t_handler (vl_api_control_ping_reply_t *mp)
{
vat_main_t *vam = &vat_main;
i32 retval = ntohl (mp->retval);
@@ -891,8 +643,8 @@ static void vl_api_control_ping_reply_t_handler
vam->socket_client_main->control_pings_outstanding--;
}
-static void vl_api_control_ping_reply_t_handler_json
- (vl_api_control_ping_reply_t * mp)
+static void
+vl_api_control_ping_reply_t_handler_json (vl_api_control_ping_reply_t *mp)
{
vat_main_t *vam = &vat_main;
i32 retval = ntohl (mp->retval);
@@ -915,162 +667,6 @@ static void vl_api_control_ping_reply_t_handler_json
vam->result_ready = 1;
}
-
-static void vl_api_get_first_msg_id_reply_t_handler
- (vl_api_get_first_msg_id_reply_t * mp)
-{
- vat_main_t *vam = &vat_main;
- i32 retval = ntohl (mp->retval);
-
- if (vam->async_mode)
- {
- vam->async_errors += (retval < 0);
- }
- else
- {
- vam->retval = retval;
- vam->result_ready = 1;
- }
- if (retval >= 0)
- {
- errmsg ("first message id %d", ntohs (mp->first_msg_id));
- }
-}
-
-static void vl_api_get_first_msg_id_reply_t_handler_json
- (vl_api_get_first_msg_id_reply_t * mp)
-{
- vat_main_t *vam = &vat_main;
- vat_json_node_t node;
-
- vat_json_init_object (&node);
- vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
- vat_json_object_add_uint (&node, "first_msg_id",
- (uint) ntohs (mp->first_msg_id));
-
- vat_json_print (vam->ofp, &node);
- vat_json_free (&node);
-
- vam->retval = ntohl (mp->retval);
- vam->result_ready = 1;
-}
-
-static void vl_api_get_node_graph_reply_t_handler
- (vl_api_get_node_graph_reply_t * mp)
-{
- vat_main_t *vam = &vat_main;
- i32 retval = ntohl (mp->retval);
- u8 *pvt_copy, *reply;
- void *oldheap;
- vlib_node_t *node;
- int i;
-
- if (vam->async_mode)
- {
- vam->async_errors += (retval < 0);
- }
- else
- {
- vam->retval = retval;
- vam->result_ready = 1;
- }
-
- /* "Should never happen..." */
- if (retval != 0)
- return;
-
- reply = uword_to_pointer (mp->reply_in_shmem, u8 *);
- pvt_copy = vec_dup (reply);
-
- /* Toss the shared-memory original... */
- oldheap = vl_msg_push_heap ();
-
- vec_free (reply);
-
- vl_msg_pop_heap (oldheap);
-
- if (vam->graph_nodes)
- {
- hash_free (vam->graph_node_index_by_name);
-
- for (i = 0; i < vec_len (vam->graph_nodes[0]); i++)
- {
- node = vam->graph_nodes[0][i];
- vec_free (node->name);
- vec_free (node->next_nodes);
- vec_free (node);
- }
- vec_free (vam->graph_nodes[0]);
- vec_free (vam->graph_nodes);
- }
-
- vam->graph_node_index_by_name = hash_create_string (0, sizeof (uword));
- vam->graph_nodes = vlib_node_unserialize (pvt_copy);
- vec_free (pvt_copy);
-
- for (i = 0; i < vec_len (vam->graph_nodes[0]); i++)
- {
- node = vam->graph_nodes[0][i];
- hash_set_mem (vam->graph_node_index_by_name, node->name, i);
- }
-}
-
-static void vl_api_get_node_graph_reply_t_handler_json
- (vl_api_get_node_graph_reply_t * mp)
-{
- vat_main_t *vam = &vat_main;
- void *oldheap;
- vat_json_node_t node;
- u8 *reply;
-
- /* $$$$ make this real? */
- vat_json_init_object (&node);
- vat_json_object_add_int (&node, "retval", ntohl (mp->retval));
- vat_json_object_add_uint (&node, "reply_in_shmem", mp->reply_in_shmem);
-
- reply = uword_to_pointer (mp->reply_in_shmem, u8 *);
-
- /* Toss the shared-memory original... */
- oldheap = vl_msg_push_heap ();
-
- vec_free (reply);
-
- vl_msg_pop_heap (oldheap);
-
- vat_json_print (vam->ofp, &node);
- vat_json_free (&node);
-
- vam->retval = ntohl (mp->retval);
- vam->result_ready = 1;
-}
-
-/* Format hex dump. */
-u8 *
-format_hex_bytes (u8 * s, va_list * va)
-{
- u8 *bytes = va_arg (*va, u8 *);
- int n_bytes = va_arg (*va, int);
- uword i;
-
- /* Print short or long form depending on byte count. */
- uword short_form = n_bytes <= 32;
- u32 indent = format_get_indent (s);
-
- if (n_bytes == 0)
- return s;
-
- for (i = 0; i < n_bytes; i++)
- {
- if (!short_form && (i % 32) == 0)
- s = format (s, "%08x: ", i);
- s = format (s, "%02x", bytes[i]);
- if (!short_form && ((i + 1) % 32) == 0 && (i + 1) < n_bytes)
- s = format (s, "\n%U", format_white_space, indent);
- }
-
- return s;
-}
-
/*
* Generate boilerplate reply handlers, which
* dig the return value out of the xxx_reply_t API message,
@@ -1120,15 +716,7 @@ foreach_standard_reply_retval_handler;
#define foreach_vpe_api_reply_msg \
_ (GET_FIRST_MSG_ID_REPLY, get_first_msg_id_reply) \
- _ (GET_NODE_GRAPH_REPLY, get_node_graph_reply) \
- _ (CONTROL_PING_REPLY, control_ping_reply) \
- _ (CLI_REPLY, cli_reply) \
- _ (CLI_INBAND_REPLY, cli_inband_reply) \
- _ (GET_NODE_INDEX_REPLY, get_node_index_reply) \
- _ (GET_NEXT_INDEX_REPLY, get_next_index_reply) \
- _ (ADD_NODE_NEXT_REPLY, add_node_next_reply) \
- _ (SHOW_VERSION_REPLY, show_version_reply) \
- _ (SHOW_THREADS_REPLY, show_threads_reply) \
+ _ (CONTROL_PING_REPLY, control_ping_reply)
#define foreach_standalone_reply_msg \
@@ -1142,57 +730,23 @@ typedef struct
case L2_VTR_ ## op: \
return "" # op;
-/*
- * Pass CLI buffers directly in the CLI_INBAND API message,
- * instead of an additional shared memory area.
- */
-static int
-exec_inband (vat_main_t * vam)
+static const char *
+str_vtr_op (u32 vtr_op)
{
- vl_api_cli_inband_t *mp;
- unformat_input_t *i = vam->input;
- int ret;
-
- if (vec_len (i->buffer) == 0)
- return -1;
-
- if (vam->exec_mode == 0 && unformat (i, "mode"))
+ switch (vtr_op)
{
- vam->exec_mode = 1;
- return 0;
- }
- if (vam->exec_mode == 1 && (unformat (i, "exit") || unformat (i, "quit")))
- {
- vam->exec_mode = 0;
- return 0;
+ STR_VTR_OP_CASE (DISABLED);
+ STR_VTR_OP_CASE (PUSH_1);
+ STR_VTR_OP_CASE (PUSH_2);
+ STR_VTR_OP_CASE (POP_1);
+ STR_VTR_OP_CASE (POP_2);
+ STR_VTR_OP_CASE (TRANSLATE_1_1);
+ STR_VTR_OP_CASE (TRANSLATE_1_2);
+ STR_VTR_OP_CASE (TRANSLATE_2_1);
+ STR_VTR_OP_CASE (TRANSLATE_2_2);
}
- /*
- * In order for the CLI command to work, it
- * must be a vector ending in \n, not a C-string ending
- * in \n\0.
- */
- M2 (CLI_INBAND, mp, vec_len (vam->input->buffer));
- vl_api_vec_to_api_string (vam->input->buffer, &mp->cmd);
-
- S (mp);
- W (ret);
- /* json responses may or may not include a useful reply... */
- if (vec_len (vam->cmd_reply))
- print (vam->ofp, "%v", (char *) (vam->cmd_reply));
- return ret;
-}
-
-int
-exec (vat_main_t *vam)
-{
- return exec_inband (vam);
-}
-
-int
-api_sw_interface_dump (vat_main_t *vam)
-{
- return 0;
+ return "UNKNOWN";
}
uword
@@ -1865,7 +1419,7 @@ unformat_classify_mask (unformat_input_t * input, va_list * args)
if (match == 0)
clib_warning ("BUG: match 0");
- _vec_len (mask) = match * sizeof (u32x4);
+ vec_set_len (mask, match * sizeof (u32x4));
*matchp = match;
*maskp = mask;
@@ -2432,7 +1986,7 @@ api_unformat_classify_match (unformat_input_t * input, va_list * args)
sizeof (u32x4));
/* Set size, include skipped vectors */
- _vec_len (match) = (match_n_vectors + skip_n_vectors) * sizeof (u32x4);
+ vec_set_len (match, (match_n_vectors + skip_n_vectors) * sizeof (u32x4));
*matchp = match;
@@ -2442,140 +1996,6 @@ api_unformat_classify_match (unformat_input_t * input, va_list * args)
return 0;
}
-static int
-api_get_node_index (vat_main_t *vam)
-{
- unformat_input_t *i = vam->input;
- vl_api_get_node_index_t *mp;
- u8 *name = 0;
- int ret;
-
- while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (i, "node %s", &name))
- ;
- else
- break;
- }
- if (name == 0)
- {
- errmsg ("node name required");
- return -99;
- }
- if (vec_len (name) >= ARRAY_LEN (mp->node_name))
- {
- errmsg ("node name too long, max %d", ARRAY_LEN (mp->node_name));
- return -99;
- }
-
- M (GET_NODE_INDEX, mp);
- clib_memcpy (mp->node_name, name, vec_len (name));
- vec_free (name);
-
- S (mp);
- W (ret);
- return ret;
-}
-
-static int
-api_get_next_index (vat_main_t *vam)
-{
- unformat_input_t *i = vam->input;
- vl_api_get_next_index_t *mp;
- u8 *node_name = 0, *next_node_name = 0;
- int ret;
-
- while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (i, "node-name %s", &node_name))
- ;
- else if (unformat (i, "next-node-name %s", &next_node_name))
- break;
- }
-
- if (node_name == 0)
- {
- errmsg ("node name required");
- return -99;
- }
- if (vec_len (node_name) >= ARRAY_LEN (mp->node_name))
- {
- errmsg ("node name too long, max %d", ARRAY_LEN (mp->node_name));
- return -99;
- }
-
- if (next_node_name == 0)
- {
- errmsg ("next node name required");
- return -99;
- }
- if (vec_len (next_node_name) >= ARRAY_LEN (mp->next_name))
- {
- errmsg ("next node name too long, max %d", ARRAY_LEN (mp->next_name));
- return -99;
- }
-
- M (GET_NEXT_INDEX, mp);
- clib_memcpy (mp->node_name, node_name, vec_len (node_name));
- clib_memcpy (mp->next_name, next_node_name, vec_len (next_node_name));
- vec_free (node_name);
- vec_free (next_node_name);
-
- S (mp);
- W (ret);
- return ret;
-}
-
-static int
-api_add_node_next (vat_main_t *vam)
-{
- unformat_input_t *i = vam->input;
- vl_api_add_node_next_t *mp;
- u8 *name = 0;
- u8 *next = 0;
- int ret;
-
- while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (i, "node %s", &name))
- ;
- else if (unformat (i, "next %s", &next))
- ;
- else
- break;
- }
- if (name == 0)
- {
- errmsg ("node name required");
- return -99;
- }
- if (vec_len (name) >= ARRAY_LEN (mp->node_name))
- {
- errmsg ("node name too long, max %d", ARRAY_LEN (mp->node_name));
- return -99;
- }
- if (next == 0)
- {
- errmsg ("next node required");
- return -99;
- }
- if (vec_len (next) >= ARRAY_LEN (mp->next_name))
- {
- errmsg ("next name too long, max %d", ARRAY_LEN (mp->next_name));
- return -99;
- }
-
- M (ADD_NODE_NEXT, mp);
- clib_memcpy (mp->node_name, name, vec_len (name));
- clib_memcpy (mp->next_name, next, vec_len (next));
- vec_free (name);
- vec_free (next);
-
- S (mp);
- W (ret);
- return ret;
-}
-
#define foreach_vtr_op \
_ ("disable", L2_VTR_DISABLED) \
_ ("push-1", L2_VTR_PUSH_1) \
@@ -2588,19 +2008,6 @@ api_add_node_next (vat_main_t *vam)
_ ("translate-2-2", L2_VTR_TRANSLATE_2_2)
static int
-api_show_version (vat_main_t *vam)
-{
- vl_api_show_version_t *mp;
- int ret;
-
- M (SHOW_VERSION, mp);
-
- S (mp);
- W (ret);
- return ret;
-}
-
-static int
api_get_first_msg_id (vat_main_t *vam)
{
vl_api_get_first_msg_id_t *mp;
@@ -2637,21 +2044,6 @@ api_get_first_msg_id (vat_main_t *vam)
return ret;
}
-static int
-api_get_node_graph (vat_main_t *vam)
-{
- vl_api_get_node_graph_t *mp;
- int ret;
-
- M (GET_NODE_GRAPH, mp);
-
- /* send it... */
- S (mp);
- /* Wait for the reply */
- W (ret);
- return ret;
-}
-
#define foreach_pbb_vtr_op \
_("disable", L2_VTR_DISABLED) \
_("pop", L2_VTR_POP_2) \
@@ -2944,12 +2336,10 @@ help (vat_main_t * vam)
print (vam->ofp, "Help is available for the following:");
- /* *INDENT-OFF* */
hash_foreach_pair (p, vam->function_by_name,
({
vec_add1 (cmds, (u8 *)(p->key));
}));
- /* *INDENT-ON* */
vec_sort_with_function (cmds, cmd_cmp);
@@ -3022,14 +2412,11 @@ dump_macro_table (vat_main_t * vam)
int i;
hash_pair_t *p;
- /* *INDENT-OFF* */
- hash_foreach_pair (p, vam->macro_main.the_value_table_hash,
- ({
- vec_add2 (sort_me, sm, 1);
- sm->name = (u8 *)(p->key);
- sm->value = (u8 *) (p->value[0]);
- }));
- /* *INDENT-ON* */
+ hash_foreach_pair (p, vam->macro_main.the_value_table_hash, ({
+ vec_add2 (sort_me, sm, 1);
+ sm->name = (u8 *) (p->key);
+ sm->value = (u8 *) (p->value[0]);
+ }));
vec_sort_with_function (sort_me, macro_sort_cmp);
@@ -3065,14 +2452,12 @@ dump_msg_api_table (vat_main_t * vam)
hash_pair_t *hp;
int i;
- /* *INDENT-OFF* */
hash_foreach_pair (hp, am->msg_index_by_name_and_crc,
({
vec_add2 (nses, ns, 1);
ns->name = (u8 *)(hp->key);
ns->value = (u32) hp->value[0];
}));
- /* *INDENT-ON* */
vec_sort_with_function (nses, value_sort_cmp);
@@ -3218,37 +2603,114 @@ echo (vat_main_t * vam)
return 0;
}
+int exec (vat_main_t *vam) __attribute__ ((weak));
+int
+exec (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+name_sort_cmp (void *a1, void *a2)
+{
+ name_sort_t *n1 = a1;
+ name_sort_t *n2 = a2;
+
+ return strcmp ((char *) n1->name, (char *) n2->name);
+}
+
+static int
+dump_interface_table (vat_main_t *vam)
+{
+ hash_pair_t *p;
+ name_sort_t *nses = 0, *ns;
+
+ if (vam->json_output)
+ {
+ clib_warning (
+ "JSON output supported only for VPE API calls and dump_stats_table");
+ return -99;
+ }
+
+ hash_foreach_pair (p, vam->sw_if_index_by_interface_name, ({
+ vec_add2 (nses, ns, 1);
+ ns->name = (u8 *) (p->key);
+ ns->value = (u32) p->value[0];
+ }));
+
+ vec_sort_with_function (nses, name_sort_cmp);
+
+ print (vam->ofp, "%-25s%-15s", "Interface", "sw_if_index");
+ vec_foreach (ns, nses)
+ {
+ print (vam->ofp, "%-25s%-15d", ns->name, ns->value);
+ }
+ vec_free (nses);
+ return 0;
+}
+
+static int
+dump_sub_interface_table (vat_main_t *vam)
+{
+ const sw_interface_subif_t *sub = NULL;
+
+ if (vam->json_output)
+ {
+ clib_warning (
+ "JSON output supported only for VPE API calls and dump_stats_table");
+ return -99;
+ }
+
+ print (vam->ofp, "%-30s%-12s%-11s%-7s%-5s%-9s%-9s%-6s%-8s%-10s%-10s",
+ "Interface", "sw_if_index", "sub id", "dot1ad", "tags", "outer id",
+ "inner id", "exact", "default", "outer any", "inner any");
+
+ vec_foreach (sub, vam->sw_if_subif_table)
+ {
+ print (vam->ofp, "%-30s%-12d%-11d%-7s%-5d%-9d%-9d%-6d%-8d%-10d%-10d",
+ sub->interface_name, sub->sw_if_index, sub->sub_id,
+ sub->sub_dot1ad ? "dot1ad" : "dot1q", sub->sub_number_of_tags,
+ sub->sub_outer_vlan_id, sub->sub_inner_vlan_id,
+ sub->sub_exact_match, sub->sub_default,
+ sub->sub_outer_vlan_id_any, sub->sub_inner_vlan_id_any);
+ if (sub->vtr_op != L2_VTR_DISABLED)
+ {
+ print (vam->ofp,
+ " vlan-tag-rewrite - op: %-14s [ dot1q: %d "
+ "tag1: %d tag2: %d ]",
+ str_vtr_op (sub->vtr_op), sub->vtr_push_dot1q, sub->vtr_tag1,
+ sub->vtr_tag2);
+ }
+ }
+
+ return 0;
+}
+
/* List of API message constructors, CLI names map to api_xxx */
#define foreach_vpe_api_msg \
-_(get_node_index, "node <node-name") \
-_(add_node_next, "node <node-name> next <next-node-name>") \
-_(show_version, "") \
-_(show_threads, "") \
_(get_first_msg_id, "client <name>") \
-_(get_node_graph, " ") \
-_(get_next_index, "node-name <node-name> next-node-name <node-name>") \
_(sock_init_shm, "size <nnn>") \
/* List of command functions, CLI names map directly to functions */
-#define foreach_cli_function \
-_(comment, "usage: comment <ignore-rest-of-line>") \
-_(dump_macro_table, "usage: dump_macro_table ") \
-_(dump_msg_api_table, "usage: dump_msg_api_table") \
-_(elog_setup, "usage: elog_setup [nevents, default 128K]") \
-_(elog_disable, "usage: elog_disable") \
-_(elog_enable, "usage: elog_enable") \
-_(elog_save, "usage: elog_save <filename>") \
-_(get_msg_id, "usage: get_msg_id name_and_crc") \
-_(echo, "usage: echo <message>") \
-_(exec, "usage: exec <vpe-debug-CLI-command>") \
-_(exec_inband, "usage: exec_inband <vpe-debug-CLI-command>") \
-_(help, "usage: help") \
-_(q, "usage: quit") \
-_(quit, "usage: quit") \
-_(search_node_table, "usage: search_node_table <name>...") \
-_(set, "usage: set <variable-name> <value>") \
-_(script, "usage: script <file-name>") \
-_(statseg, "usage: statseg") \
-_(unset, "usage: unset <variable-name>")
+#define foreach_cli_function \
+ _ (comment, "usage: comment <ignore-rest-of-line>") \
+ _ (dump_interface_table, "usage: dump_interface_table") \
+ _ (dump_sub_interface_table, "usage: dump_sub_interface_table") \
+ _ (dump_macro_table, "usage: dump_macro_table ") \
+ _ (dump_msg_api_table, "usage: dump_msg_api_table") \
+ _ (elog_setup, "usage: elog_setup [nevents, default 128K]") \
+ _ (elog_disable, "usage: elog_disable") \
+ _ (elog_enable, "usage: elog_enable") \
+ _ (elog_save, "usage: elog_save <filename>") \
+ _ (get_msg_id, "usage: get_msg_id name_and_crc") \
+ _ (echo, "usage: echo <message>") \
+ _ (help, "usage: help") \
+ _ (q, "usage: quit") \
+ _ (quit, "usage: quit") \
+ _ (search_node_table, "usage: search_node_table <name>...") \
+ _ (set, "usage: set <variable-name> <value>") \
+ _ (script, "usage: script <file-name>") \
+ _ (statseg, "usage: statseg") \
+ _ (unset, "usage: unset <variable-name>")
#define _(N,n) \
static void vl_api_##n##_t_handler_uni \
@@ -3270,13 +2732,19 @@ foreach_standalone_reply_msg;
void
vat_api_hookup (vat_main_t * vam)
{
-#define _(N,n) \
- vl_msg_api_set_handlers(VL_API_##N, #n, \
- vl_api_##n##_t_handler_uni, \
- vl_noop_handler, \
- vl_api_##n##_t_endian, \
- vl_api_##n##_t_print, \
- sizeof(vl_api_##n##_t), 1);
+#define _(N, n) \
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){ \
+ .id = VL_API_##N + 1, \
+ .name = #n, \
+ .handler = vl_api_##n##_t_handler_uni, \
+ .endian = vl_api_##n##_t_endian, \
+ .format_fn = vl_api_##n##_t_format, \
+ .size = sizeof (vl_api_##n##_t), \
+ .traced = 1, \
+ .tojson = vl_api_##n##_t_tojson, \
+ .fromjson = vl_api_##n##_t_fromjson, \
+ .calc_size = vl_api_##n##_t_calc_size, \
+ });
foreach_vpe_api_reply_msg;
#if VPP_API_TEST_BUILTIN == 0
foreach_standalone_reply_msg;
@@ -3284,7 +2752,7 @@ vat_api_hookup (vat_main_t * vam)
#undef _
#if (VPP_API_TEST_BUILTIN==0)
- vl_msg_api_set_first_available_msg_id (VL_MSG_FIRST_AVAILABLE);
+ vl_msg_api_set_first_available_msg_id (VL_MSG_MEMCLNT_LAST + 1);
vam->sw_if_index_by_interface_name = hash_create_string (0, sizeof (uword));
diff --git a/src/vat/ip_types.c b/src/vat/ip_types.c
index 8edcb133f33..248205287a4 100644
--- a/src/vat/ip_types.c
+++ b/src/vat/ip_types.c
@@ -41,14 +41,16 @@ uword
unformat_ip_address (unformat_input_t * input, va_list * args)
{
ip_address_t *a = va_arg (*args, ip_address_t *);
+ ip_address_t tmp, *p_tmp = &tmp;
- clib_memset (a, 0, sizeof (*a));
- if (unformat (input, "%U", unformat_ip4_address, &ip_addr_v4 (a)))
- ip_addr_version (a) = AF_IP4;
- else if (unformat_user (input, unformat_ip6_address, &ip_addr_v6 (a)))
- ip_addr_version (a) = AF_IP6;
+ clib_memset (p_tmp, 0, sizeof (*p_tmp));
+ if (unformat (input, "%U", unformat_ip4_address, &ip_addr_v4 (p_tmp)))
+ ip_addr_version (p_tmp) = AF_IP4;
+ else if (unformat_user (input, unformat_ip6_address, &ip_addr_v6 (p_tmp)))
+ ip_addr_version (p_tmp) = AF_IP6;
else
return 0;
+ *a = *p_tmp;
return 1;
}
@@ -203,9 +205,9 @@ ip_address_family_to_link_type (ip_address_family_t af)
return (VNET_LINK_IP4);
}
-
void
-ip_address_set (ip_address_t * dst, const void *src, u8 version)
+ip_address_set (ip_address_t *dst, const void *src,
+ ip_address_family_t version)
{
ip_addr_version (dst) = version;
@@ -344,23 +346,24 @@ ip_prefix_copy (void *dst, void *src)
}
int
-ip_prefix_cmp (ip_prefix_t * p1, ip_prefix_t * p2)
+ip_prefix_cmp (const ip_prefix_t *ipp1, const ip_prefix_t *ipp2)
{
+ ip_prefix_t p1 = *ipp1, p2 = *ipp2;
int cmp = 0;
- ip_prefix_normalize (p1);
- ip_prefix_normalize (p2);
+ ip_prefix_normalize (&p1);
+ ip_prefix_normalize (&p2);
- cmp = ip_address_cmp (&ip_prefix_addr (p1), &ip_prefix_addr (p2));
+ cmp = ip_address_cmp (&ip_prefix_addr (&p1), &ip_prefix_addr (&p2));
if (cmp == 0)
{
- if (ip_prefix_len (p1) < ip_prefix_len (p2))
+ if (ip_prefix_len (&p1) < ip_prefix_len (&p2))
{
cmp = 1;
}
else
{
- if (ip_prefix_len (p1) > ip_prefix_len (p2))
+ if (ip_prefix_len (&p1) > ip_prefix_len (&p2))
cmp = 2;
}
}
diff --git a/src/vat/json_format.h b/src/vat/json_format.h
index 71db79eacf5..77128621d21 100644
--- a/src/vat/json_format.h
+++ b/src/vat/json_format.h
@@ -22,6 +22,11 @@
#include <vppinfra/clib.h>
#include <vppinfra/format.h>
+#ifdef __FreeBSD__
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#endif /* __FreeBSD__ */
#include <netinet/ip.h>
/* JSON value type */
diff --git a/src/vat/main.c b/src/vat/main.c
index 70352e6bf95..f4bb0cddee4 100644
--- a/src/vat/main.c
+++ b/src/vat/main.c
@@ -13,6 +13,7 @@
* limitations under the License.
*/
#include "vat.h"
+#include <dlfcn.h>
#include "plugin.h"
#include <signal.h>
#include <limits.h>
@@ -42,7 +43,6 @@ connect_to_vpe (char *name)
return 0;
}
-/* *INDENT-OFF* */
vlib_global_main_t vlib_global_main;
@@ -97,7 +97,7 @@ do_one_file (vat_main_t * vam)
rv = write (1, "exec# ", 6);
}
- _vec_len (vam->inbuf) = 4096;
+ vec_set_len (vam->inbuf, 4096);
if (vam->do_exit ||
fgets ((char *) vam->inbuf, vec_len (vam->inbuf), vam->ifp) == 0)
@@ -181,7 +181,7 @@ do_one_file (vat_main_t * vam)
if (vam->regenerate_interface_table)
{
vam->regenerate_interface_table = 0;
- api_sw_interface_dump (vam);
+ vam->api_sw_interface_dump (vam);
}
/* Hack to pick up new client index after memfd_segment_create pivot */
@@ -266,6 +266,7 @@ setup_signal_handlers (void)
/* these signals take the default action */
case SIGABRT:
case SIGKILL:
+ case SIGCONT:
case SIGSTOP:
case SIGUSR1:
case SIGUSR2:
@@ -313,8 +314,7 @@ vat_find_plugin_path ()
return;
*p = 0;
- s = format (0, "%s/lib/" CLIB_TARGET_TRIPLET "/vpp_api_test_plugins:"
- "%s/lib/vpp_api_test_plugins", path, path);
+ s = format (0, "%s/" CLIB_LIB_DIR "/vpp_api_test_plugins", path, path);
vec_add1 (s, 0);
vat_plugin_path = (char *) s;
}
@@ -381,6 +381,30 @@ vlib_call_init_exit_functions (vlib_main_t *vm,
1 /* do_sort */, is_global);
}
+static void
+vat_register_interface_dump (vat_main_t *vam)
+{
+ void *handle;
+ plugin_info_t *pi;
+
+ vec_foreach (pi, vat_plugin_main.plugin_info)
+ {
+ handle = dlsym (pi->handle, "api_sw_interface_dump");
+ if (handle)
+ {
+ vam->api_sw_interface_dump = handle;
+ break;
+ }
+ }
+
+ if (!vam->api_sw_interface_dump)
+ {
+ fformat (stderr,
+ "sw_interface_dump not found in interface_test plugin!\n");
+ exit (1);
+ }
+}
+
int
main (int argc, char **argv)
{
@@ -485,9 +509,6 @@ main (int argc, char **argv)
vam->json_output = json_output;
- if (!json_output)
- api_sw_interface_dump (vam);
-
vec_validate (vam->inbuf, 4096);
load_features ();
@@ -495,6 +516,11 @@ main (int argc, char **argv)
vam->current_file = (u8 *) "plugin-init";
vat_plugin_init (vam);
+ vat_register_interface_dump (vam);
+
+ if (!json_output)
+ vam->api_sw_interface_dump (vam);
+
/* Set up the init function hash table */
vgm->init_functions_called = hash_create (0, 0);
diff --git a/src/vat/plugin.c b/src/vat/plugin.c
index 5c7045415a8..bc780001bbd 100644
--- a/src/vat/plugin.c
+++ b/src/vat/plugin.c
@@ -148,16 +148,16 @@ vat_load_new_plugins (plugin_main_t * pm)
if (p == 0)
{
vec_add2 (pm->plugin_info, pi, 1);
+ clib_memset (pi, 0, sizeof (*pi));
pi->name = plugin_name;
pi->file_info = statb;
if (load_one_plugin (pm, pi))
{
vec_free (plugin_name);
- _vec_len (pm->plugin_info) = vec_len (pm->plugin_info) - 1;
+ vec_set_len (pm->plugin_info, vec_len (pm->plugin_info) - 1);
continue;
}
- clib_memset (pi, 0, sizeof (*pi));
hash_set_mem (pm->plugin_by_name_hash, plugin_name,
pi - pm->plugin_info);
}
diff --git a/src/vat/vat.h b/src/vat/vat.h
index 32de90e4411..d8045752b35 100644
--- a/src/vat/vat.h
+++ b/src/vat/vat.h
@@ -33,6 +33,8 @@
#include <vlib/unix/unix.h>
#include <vlibapi/api.h>
#include <vlibmemory/api.h>
+#include <vlibmemory/memclnt.api_enum.h>
+#include <vlibmemory/memclnt.api_types.h>
#include "vat/json_format.h"
@@ -129,7 +131,7 @@ typedef struct
struct vat_registered_features_t;
-typedef struct
+typedef struct vat_main_
{
/* vpe input queue */
svm_queue_t *vl_input_queue;
@@ -232,6 +234,8 @@ typedef struct
struct vat_registered_features_t *feature_function_registrations;
+ int (*api_sw_interface_dump) (struct vat_main_ *);
+
/* Convenience */
vlib_main_t *vlib_main;
} vat_main_t;
@@ -295,6 +299,25 @@ static void __vlib_add_config_function_##x (void) \
.function = x, \
}
+#if VPP_API_TEST_BUILTIN == 0
+static_always_inline uword
+api_unformat_sw_if_index (unformat_input_t *input, va_list *args)
+{
+ vat_main_t *vam = va_arg (*args, vat_main_t *);
+ u32 *result = va_arg (*args, u32 *);
+ u8 *if_name;
+ uword *p;
+
+ if (!unformat (input, "%s", &if_name))
+ return 0;
+
+ p = hash_get_mem (vam->sw_if_index_by_interface_name, if_name);
+ if (p == 0)
+ return 0;
+ *result = p[0];
+ return 1;
+}
+#endif /* VPP_API_TEST_BUILTIN */
#endif /* __included_vat_h__ */
diff --git a/src/vat2/CMakeLists.txt b/src/vat2/CMakeLists.txt
index 9069d8f6b62..6f843c34661 100644
--- a/src/vat2/CMakeLists.txt
+++ b/src/vat2/CMakeLists.txt
@@ -14,11 +14,10 @@
##############################################################################
# vat2
##############################################################################
-add_vpp_executable(vat2 ENABLE_EXPORTS NO_INSTALL
+add_vpp_executable(vat2 ENABLE_EXPORTS
SOURCES
main.c
plugin.c
- jsonconvert.c
DEPENDS api_headers
@@ -28,7 +27,7 @@ add_vpp_executable(vat2 ENABLE_EXPORTS NO_INSTALL
vppinfra
vppapiclient
Threads::Threads
- rt m dl crypto
+ dl
)
#
@@ -41,23 +40,21 @@ vpp_generate_api_c_header (test/vat2_test.api)
add_vpp_executable(test_vat2 ENABLE_EXPORTS NO_INSTALL
SOURCES
test/vat2_test.c
- jsonconvert.c
DEPENDS api_headers
LINK_LIBRARIES
+ vppinfra
vlibmemoryclient
+ vlibapi
svm
- vppinfra
vppapiclient
Threads::Threads
- rt m dl crypto
+ dl
)
#target_link_options(test_vat2 PUBLIC "LINKER:-fsanitize=address")
-
-if("${CMAKE_VERSION}" VERSION_GREATER_EQUAL "3.13" AND "${CMAKE_C_COMPILER_ID}" MATCHES "(Apple)?[Cc]lang")
+if(VPP_BUILD_TESTS_WITH_COVERAGE)
set(TARGET_NAME test_vat2)
- set(COV_SOURCES ${CMAKE_SOURCE_DIR}/vat2/jsonconvert.c)
message("Building with llvm Code Coverage Tools ${TARGET_NAME}")
target_compile_options(${TARGET_NAME} PRIVATE -fprofile-instr-generate -fcoverage-mapping)
@@ -96,8 +93,7 @@ endif()
##############################################################################
install(
FILES
- jsonconvert.h
vat2_helpers.h
- DESTINATION include/vat2
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vat2
COMPONENT vpp-dev
)
diff --git a/src/vat2/jsonconvert.h b/src/vat2/jsonconvert.h
deleted file mode 100644
index 038ad74ac0e..00000000000
--- a/src/vat2/jsonconvert.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2020 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef included_json_convert_h
-#define included_json_convert_h
-
-#include <stdbool.h>
-#include <vppinfra/cJSON.h>
-#include <vnet/ethernet/mac_address.h>
-#include <vnet/ip/ip6_packet.h>
-#include <vnet/ip/ip_types.api_types.h>
-#include <vnet/ethernet/ethernet_types.api_types.h>
-
-#define foreach_vat2_fromjson \
- _(i8) \
- _(u8) \
- _(i16) \
- _(u16) \
- _(i32) \
- _(u32) \
- _(u64) \
- _(f64)
-
-#define _(T) \
- int vl_api_ ##T## _fromjson(cJSON *o, T *d);
-foreach_vat2_fromjson
-#undef _
-
- /* Prototypes */
- int
- vl_api_bool_fromjson (cJSON *o, bool *d);
-int vl_api_ip4_address_t_fromjson (void **mp, int *len, cJSON *o,
- vl_api_ip4_address_t *a);
-int vl_api_ip4_prefix_t_fromjson (void **mp, int *len, cJSON *o,
- vl_api_ip4_prefix_t *a);
-int vl_api_ip4_address_with_prefix_t_fromjson (void **mp, int *len, cJSON *o,
- vl_api_ip4_prefix_t *a);
-int vl_api_ip6_address_t_fromjson (void **mp, int *len, cJSON *o,
- vl_api_ip6_address_t *a);
-int vl_api_ip6_prefix_t_fromjson (void **mp, int *len, cJSON *o,
- vl_api_ip6_prefix_t *a);
-int vl_api_ip6_address_with_prefix_t_fromjson (void **mp, int *len, cJSON *o,
- vl_api_ip6_prefix_t *a);
-int vl_api_address_t_fromjson (void **mp, int *len, cJSON *o,
- vl_api_address_t *a);
-int vl_api_prefix_t_fromjson (void **mp, int *len, cJSON *o,
- vl_api_prefix_t *a);
-int vl_api_address_with_prefix_t_fromjson (void **mp, int *len, cJSON *o,
- vl_api_prefix_t *a);
-int vl_api_mac_address_t_fromjson (void **mp, int *len, cJSON *o,
- vl_api_mac_address_t *a);
-
-uword unformat_ip4_address (unformat_input_t *input, va_list *args);
-uword unformat_ip6_address (unformat_input_t *input, va_list *args);
-u8 *format_ip6_address (u8 *s, va_list *args);
-uword unformat_mac_address (unformat_input_t *input, va_list *args);
-u8 *format_ip4_address (u8 *s, va_list *args);
-u8 *format_vl_api_interface_index_t (u8 *s, va_list *args);
-u8 *format_vl_api_timestamp_t (u8 *s, va_list *args);
-u8 *format_vl_api_timedelta_t (u8 *s, va_list *args);
-uword unformat_vl_api_timedelta_t (unformat_input_t *input, va_list *args);
-uword unformat_vl_api_timestamp_t (unformat_input_t *input, va_list *args);
-u8 *format_vl_api_gbp_scope_t (u8 *s, va_list *args);
-uword unformat_vl_api_gbp_scope_t (unformat_input_t *input, va_list *args);
-
-int vl_api_c_string_to_api_string (const char *buf, vl_api_string_t *str);
-void vl_api_string_cJSON_AddToObject (cJSON *const object,
- const char *const name,
- vl_api_string_t *astr);
-
-u8 *u8string_fromjson (cJSON *o, char *fieldname);
-int u8string_fromjson2 (cJSON *o, char *fieldname, u8 *data);
-int vl_api_u8_string_fromjson (cJSON *o, u8 *s, int len);
-
-#define foreach_vat2_tojson \
- _(ip4_address) \
- _(ip4_prefix) \
- _(ip6_address) \
- _(ip6_prefix) \
- _(address) \
- _(prefix) \
- _(mac_address)
-
-#define _(T) \
- cJSON *vl_api_ ##T## _t_tojson(vl_api_ ##T## _t *);
- foreach_vat2_tojson
-#undef _
-
-cJSON *vl_api_ip4_address_with_prefix_t_tojson (vl_api_ip4_prefix_t *a);
-cJSON *vl_api_ip6_address_with_prefix_t_tojson (vl_api_ip6_prefix_t *a);
-cJSON *vl_api_address_with_prefix_t_tojson (vl_api_prefix_t *a);
-
-#endif
diff --git a/src/vat2/main.c b/src/vat2/main.c
index 5b042e23503..bf415854db1 100644
--- a/src/vat2/main.c
+++ b/src/vat2/main.c
@@ -14,10 +14,14 @@
*/
#include <stdio.h>
+#include <stdlib.h>
#include <stdbool.h>
#include <ctype.h>
+#include <getopt.h>
+#include <string.h>
#include <vlib/vlib.h>
#include <vlibapi/api_types.h>
+#include <vppinfra/hash.h>
#include <vppinfra/cJSON.h>
/* VPP API client includes */
@@ -26,12 +30,40 @@
#include <limits.h>
#include "vat2.h"
+bool vat2_debug;
+
+/*
+ * Filter these messages as they are used to manage the API connection to VPP
+ */
+char *filter_messages_strings[] = { "memclnt_create",
+ "memclnt_delete",
+ "sockclnt_create",
+ "sockclnt_delete",
+ "memclnt_rx_thread_suspend",
+ "memclnt_read_timeout",
+ "rx_thread_exit",
+ "trace_plugin_msg_ids",
+ 0 };
+
+static bool
+filter_message (char *msgname)
+{
+ char **p = filter_messages_strings;
+
+ while (*p)
+ {
+ if (strcmp (*p, msgname) == 0)
+ return true;
+ p++;
+ }
+ return false;
+}
+
uword *function_by_name;
bool debug = false;
-char *vat2_plugin_path;
-static void
-vat2_find_plugin_path ()
+static u8 *
+vat2_find_plugin_path (void)
{
char *p, path[PATH_MAX];
int rv;
@@ -39,189 +71,384 @@ vat2_find_plugin_path ()
/* find executable path */
if ((rv = readlink ("/proc/self/exe", path, PATH_MAX - 1)) == -1)
- return;
+ return 0;
/* readlink doesn't provide null termination */
path[rv] = 0;
/* strip filename */
if ((p = strrchr (path, '/')) == 0)
- return;
+ return 0;
*p = 0;
/* strip bin/ */
if ((p = strrchr (path, '/')) == 0)
- return;
+ return 0;
*p = 0;
- s = format (0, "%s/lib/" CLIB_TARGET_TRIPLET "/vat2_plugins:"
- "%s/lib/vat2_plugins", path, path);
+ s = format (0, "%s/" CLIB_LIB_DIR "/vat2_plugins", path, path);
vec_add1 (s, 0);
- vat2_plugin_path = (char *) s;
+ return s;
}
void
vac_callback (unsigned char *data, int len)
{
- u16 result_msg_id = ntohs(*((u16 *)data));
- DBG("Received something async: %d\n", result_msg_id);
+ u16 result_msg_id = ntohs (*((u16 *) data));
+ DBG ("Received something async: %d\n", result_msg_id);
}
-int vat2_load_plugins (char *path, char *filter, int *loaded);
+int vat2_load_plugins (u8 *path, char *filter, int *loaded);
static int
-register_function (void)
+register_function (char *pluginpath)
{
int loaded;
+ u8 *vat2_plugin_path = 0;
+
+ if (pluginpath == 0)
+ {
+ vat2_plugin_path = vat2_find_plugin_path ();
+ }
+ else
+ {
+ vat2_plugin_path = format (0, "%s", pluginpath);
+ vec_add1 (vat2_plugin_path, 0);
+ }
+ DBG ("Plugin Path %s\n", vat2_plugin_path);
+ int rv = vat2_load_plugins (vat2_plugin_path, 0, &loaded);
+ DBG ("Loaded %u plugins\n", loaded);
+
+ vec_free (vat2_plugin_path);
- vat2_find_plugin_path();
- DBG("Plugin Path %s\n", vat2_plugin_path);
- int rv = vat2_load_plugins(vat2_plugin_path, 0, &loaded);
- DBG("Loaded %u plugins\n", loaded);
return rv;
}
+struct apifuncs_s
+{
+ cJSON (*f) (cJSON *);
+ cJSON (*tojson) (void *);
+ u32 crc;
+};
+
+struct apifuncs_s *apifuncs = 0;
+
void
-vat2_register_function(char *name, cJSON (*f)(cJSON *))
+vat2_register_function (char *name, cJSON (*f) (cJSON *),
+ cJSON (*tojson) (void *), u32 crc)
+{
+ struct apifuncs_s funcs = { .f = f, .tojson = tojson, .crc = crc };
+ vec_add1 (apifuncs, funcs);
+ hash_set_mem (function_by_name, name, vec_len (apifuncs) - 1);
+}
+
+static int
+vat2_exec_command_by_name (char *msgname, cJSON *o)
+{
+ u32 crc = 0;
+ if (filter_message (msgname))
+ return 0;
+
+ cJSON *crc_obj = cJSON_GetObjectItem (o, "_crc");
+ if (crc_obj)
+ {
+ char *crc_str = cJSON_GetStringValue (crc_obj);
+ crc = (u32) strtol (crc_str, NULL, 16);
+ }
+
+ uword *p = hash_get_mem (function_by_name, msgname);
+ if (!p)
+ {
+ fprintf (stderr, "No such command %s\n", msgname);
+ return -1;
+ }
+ if (crc && crc != apifuncs[p[0]].crc)
+ {
+ fprintf (stderr, "API CRC does not match: %s!\n", msgname);
+ }
+
+ cJSON *(*fp) (cJSON *);
+ fp = (void *) apifuncs[p[0]].f;
+ cJSON *r = (*fp) (o);
+
+ if (r)
+ {
+ char *output = cJSON_Print (r);
+ cJSON_Delete (r);
+ printf ("%s\n", output);
+ free (output);
+ }
+ else
+ {
+ fprintf (stderr, "Call failed: %s\n", msgname);
+ return -1;
+ }
+ return 0;
+}
+
+static int
+vat2_exec_command (cJSON *o)
+{
+
+ cJSON *msg_id_obj = cJSON_GetObjectItem (o, "_msgname");
+ if (!msg_id_obj)
+ {
+ fprintf (stderr, "Missing '_msgname' element!\n");
+ return -1;
+ }
+
+ char *name = cJSON_GetStringValue (msg_id_obj);
+
+ return vat2_exec_command_by_name (name, o);
+}
+
+static void
+print_template (char *msgname)
+{
+ uword *p = hash_get_mem (function_by_name, msgname);
+ if (!p)
+ goto error;
+
+ cJSON *(*fp) (void *);
+ fp = (void *) apifuncs[p[0]].tojson;
+ if (!fp)
+ goto error;
+
+ void *scratch = malloc (2048);
+ if (!scratch)
+ goto error;
+
+ memset (scratch, 0, 2048);
+ cJSON *t = fp (scratch);
+ if (!t)
+ goto error;
+ free (scratch);
+ char *output = cJSON_Print (t);
+ if (!output)
+ goto error;
+
+ cJSON_Delete (t);
+ printf ("%s\n", output);
+ free (output);
+
+ return;
+
+error:
+ fprintf (stderr, "error printing template for: %s\n", msgname);
+}
+
+static void
+dump_apis (void)
+{
+ char *name;
+ u32 *i;
+ hash_foreach_mem (name, i, function_by_name, ({ printf ("%s\n", name); }));
+}
+
+static void
+print_help (void)
{
- hash_set_mem(function_by_name, name, f);
+ char *help_string =
+ "Usage: vat2 [OPTION] <message-name> <JSON object>\n"
+ "Send API message to VPP and print reply\n"
+ "\n"
+ "-d, --debug Print additional information\n"
+ "-p, --prefix <prefix> Specify shared memory prefix to connect "
+ "to a given VPP instance\n"
+ "-f, --file <filename> File containing a JSON object with the "
+ "arguments for the message to send\n"
+ "-t, --template <message-name> Print a template JSON object for given API"
+ " message\n"
+ "--dump-apis List all APIs available in VAT2 (might "
+ "not reflect running VPP)\n"
+ "--plugin-path Pluing path"
+ "\n";
+ printf ("%s", help_string);
}
-int main (int argc, char **argv)
+int
+main (int argc, char **argv)
{
/* Create a heap of 64MB */
clib_mem_init (0, 64 << 20);
- char *filename = 0;
+ char *filename = 0, *prefix = 0, *template = 0, *pluginpath = 0;
int index;
int c;
opterr = 0;
cJSON *o = 0;
- uword *p = 0;
-
- while ((c = getopt (argc, argv, "df:")) != -1) {
- switch (c) {
- case 'd':
- debug = true;
- break;
- case 'f':
- filename = optarg;
- break;
- case '?':
- if (optopt == 'f')
- fprintf (stderr, "Option -%c requires an argument.\n", optopt);
- else if (isprint (optopt))
- fprintf (stderr, "Unknown option `-%c'.\n", optopt);
- else
- fprintf (stderr,
- "Unknown option character `\\x%x'.\n",
- optopt);
- return 1;
- default:
- abort ();
- }
- }
-
- DBG("debug = %d, filename = %s\n", debug, filename);
+ int option_index = 0;
+ bool dump_api = false;
+ char *msgname = 0;
+ static struct option long_options[] = {
+ { "debug", no_argument, 0, 'd' },
+ { "prefix", required_argument, 0, 's' },
+ { "file", required_argument, 0, 'f' },
+ { "dump-apis", no_argument, 0, 0 },
+ { "template", required_argument, 0, 't' },
+ { "plugin-path", required_argument, 0, 'p' },
+ { 0, 0, 0, 0 }
+ };
+
+ while ((c = getopt_long (argc, argv, "hdp:f:t:", long_options,
+ &option_index)) != -1)
+ {
+ switch (c)
+ {
+ case 0:
+ if (option_index == 3)
+ dump_api = true;
+ break;
+ case 'd':
+ vat2_debug = true;
+ break;
+ case 't':
+ template = optarg;
+ break;
+ case 's':
+ prefix = optarg;
+ break;
+ case 'f':
+ filename = optarg;
+ break;
+ case 'p':
+ pluginpath = optarg;
+ break;
+ case '?':
+ print_help ();
+ return 1;
+ default:
+ abort ();
+ }
+ }
+ DBG ("debug = %d, filename = %s, template = %s, shared memory prefix: %s\n",
+ vat2_debug, filename, template, prefix);
for (index = optind; index < argc; index++)
DBG ("Non-option argument %s\n", argv[index]);
index = optind;
+ if (argc > index + 2)
+ {
+ fprintf (stderr, "%s: Too many arguments\n", argv[0]);
+ exit (-1);
+ }
+
/* Load plugins */
function_by_name = hash_create_string (0, sizeof (uword));
- int res = register_function();
- if (res < 0) {
- fprintf(stderr, "%s: loading plugins failed\n", argv[0]);
- exit(-1);
- }
-
- if (argc > index + 2) {
- fprintf(stderr, "%s: Too many arguments\n", argv[0]);
- exit(-1);
- }
-
- /* Read JSON from stdin, command line or file */
- if (argc >= (index + 1)) {
- p = hash_get_mem (function_by_name, argv[index]);
- if (p == 0) {
- fprintf(stderr, "%s: Unknown command: %s\n", argv[0], argv[index]);
- exit(-1);
- }
- }
-
- if (argc == (index + 2)) {
- o = cJSON_Parse(argv[index+1]);
- if (!o) {
- fprintf(stderr, "%s: Failed parsing JSON input: %s\n", argv[0], cJSON_GetErrorPtr());
- exit(-1);
- }
- }
-
- if (filename) {
- if (argc > index + 1) {
- fprintf(stderr, "%s: Superfluous arguments when filename given\n", argv[0]);
- exit(-1);
- }
-
- FILE *f = fopen(filename, "r");
- size_t bufsize = 1024;
- size_t n_read = 0;
- size_t n;
-
- if (!f) {
- fprintf(stderr, "%s: can't open file: %s\n", argv[0], filename);
- exit(-1);
- }
- char *buf = malloc(bufsize);
- while ((n = fread(buf, 1, bufsize, f))) {
- n_read += n;
- if (n == bufsize)
- buf = realloc(buf, bufsize);
- }
- fclose(f);
- if (n_read) {
- o = cJSON_Parse(buf);
- free(buf);
- if (!o) {
- fprintf(stderr, "%s: Failed parsing JSON input: %s\n", argv[0], cJSON_GetErrorPtr());
- exit(-1);
- }
- }
- }
-
- if (!o) {
- fprintf(stderr, "%s: Failed parsing JSON input\n", argv[0]);
- exit(-1);
- }
-
- if (vac_connect("vat2", 0, 0, 1024)) {
- fprintf(stderr, "Failed connecting to VPP\n");
- exit(-1);
- }
- if (!p) {
- fprintf(stderr, "No such command\n");
- exit(-1);
- }
-
- cJSON * (*fp) (cJSON *);
- fp = (void *) p[0];
- cJSON *r = (*fp) (o);
+ int res = register_function (pluginpath);
+ if (res < 0)
+ {
+ fprintf (stderr, "%s: loading plugins failed\n", argv[0]);
+ exit (-1);
+ }
- if (o)
- cJSON_Delete(o);
+ if (template)
+ {
+ print_template (template);
+ exit (0);
+ }
- if (r) {
- char *output = cJSON_Print(r);
- cJSON_Delete(r);
- printf("%s\n", output);
- free(output);
- } else {
- fprintf(stderr, "Call failed\n");
- exit(-1);
- }
+ if (dump_api)
+ {
+ dump_apis ();
+ exit (0);
+ }
- vac_disconnect();
- exit (0);
+ /* Read message arguments from command line */
+ if (argc >= (index + 1))
+ {
+ msgname = argv[index];
+ }
+ if (argc == (index + 2))
+ {
+ o = cJSON_Parse (argv[index + 1]);
+ if (!o)
+ {
+ fprintf (stderr, "%s: Failed parsing JSON input: %s\n", argv[0],
+ cJSON_GetErrorPtr ());
+ exit (-1);
+ }
+ }
+
+ if (!msgname && !filename)
+ {
+ print_help ();
+ exit (-1);
+ }
+ /* Read message from file */
+ if (filename)
+ {
+ if (argc > index)
+ {
+ fprintf (stderr, "%s: Superfluous arguments when filename given\n",
+ argv[0]);
+ exit (-1);
+ }
+
+ FILE *f = fopen (filename, "r");
+ size_t chunksize, bufsize;
+ size_t n_read = 0;
+ size_t n;
+
+ if (!f)
+ {
+ fprintf (stderr, "%s: can't open file: %s\n", argv[0], filename);
+ exit (-1);
+ }
+
+ chunksize = bufsize = 1024;
+ char *buf = malloc (bufsize);
+ while ((n = fread (buf + n_read, 1, chunksize, f)))
+ {
+ n_read += n;
+ if (n == chunksize)
+ {
+ bufsize += chunksize;
+ buf = realloc (buf, bufsize);
+ }
+ }
+ fclose (f);
+ if (n_read)
+ {
+ o = cJSON_Parse (buf);
+ if (!o)
+ {
+ fprintf (stderr, "%s: Failed parsing JSON input: %s\n", argv[0],
+ cJSON_GetErrorPtr ());
+ exit (-1);
+ }
+ }
+ free (buf);
+ }
+
+ if (!o)
+ {
+ fprintf (stderr, "%s: Failed parsing JSON input\n", argv[0]);
+ exit (-1);
+ }
+
+ if (vac_connect ("vat2", prefix, 0, 1024))
+ {
+ fprintf (stderr, "Failed connecting to VPP\n");
+ exit (-1);
+ }
+
+ if (msgname)
+ {
+ vat2_exec_command_by_name (msgname, o);
+ }
+ else
+ {
+ if (cJSON_IsArray (o))
+ {
+ size_t size = cJSON_GetArraySize (o);
+ for (int i = 0; i < size; i++)
+ vat2_exec_command (cJSON_GetArrayItem (o, i));
+ }
+ }
+ cJSON_Delete (o);
+ vac_disconnect ();
+ exit (0);
}
diff --git a/src/vat2/plugin.c b/src/vat2/plugin.c
index 6b6d55ac9b0..aaaf6940ef3 100644
--- a/src/vat2/plugin.c
+++ b/src/vat2/plugin.c
@@ -76,8 +76,9 @@ load_one_plugin (plugin_info_t * pi)
return 0;
}
+/* Takes a vector as argument */
static u8 **
-split_plugin_path (char *plugin_path)
+split_plugin_path (u8 *plugin_path)
{
int i;
u8 **rv = 0;
@@ -104,7 +105,7 @@ split_plugin_path (char *plugin_path)
}
int
-vat2_load_plugins (char *path, char *filter, int *loaded)
+vat2_load_plugins (u8 *path, char *filter, int *loaded)
{
DIR *dp;
struct dirent *entry;
@@ -165,7 +166,7 @@ vat2_load_plugins (char *path, char *filter, int *loaded)
{
res = -1;
vec_free (plugin_name);
- _vec_len (plugin_info) = vec_len (plugin_info) - 1;
+ vec_set_len (plugin_info, vec_len (plugin_info) - 1);
continue;
}
clib_memset (pi, 0, sizeof (*pi));
diff --git a/src/vat2/test/vat2_test.c b/src/vat2/test/vat2_test.c
index 1ac46527b3c..7aa5e71296e 100644
--- a/src/vat2/test/vat2_test.c
+++ b/src/vat2/test/vat2_test.c
@@ -196,6 +196,7 @@ struct tests tests[] = {
"[\"2001:db8::23\", \"2001:db8::23\"] }" },
{ .s = "{\"_msgname\": \"test_empty\"}" },
{ .s = "{\"_msgname\": \"test_interface\", \"sw_if_index\": 100 }" },
+ { .s = "{\"_msgname\": \"test_interface\", \"sw_if_index\": 4294967295 }" },
};
int main (int argc, char **argv)
diff --git a/src/vat2/vat2.h b/src/vat2/vat2.h
index d477b7279b3..acdb85368cc 100644
--- a/src/vat2/vat2.h
+++ b/src/vat2/vat2.h
@@ -3,9 +3,15 @@
#include <stdbool.h>
-extern bool debug;
+extern bool vat2_debug;
-#define DBG(fmt, args...) do {if (debug) fprintf(stderr, fmt, ## args); } while(0)
+#define DBG(fmt, args...) \
+ do \
+ { \
+ if (vat2_debug) \
+ fprintf (stderr, fmt, ##args); \
+ } \
+ while (0)
#define ERR(fmt, args...) fprintf(stderr, "VAT2: %s:%d:%s(): " fmt, \
__FILE__, __LINE__, __func__, ##args)
diff --git a/src/vat2/vat2_helpers.h b/src/vat2/vat2_helpers.h
index 929c012485f..7b197608a7b 100644
--- a/src/vat2/vat2_helpers.h
+++ b/src/vat2/vat2_helpers.h
@@ -16,9 +16,11 @@
#ifndef included_vat2_helpers_h
#define included_vat2_helpers_h
+#include <vlibmemory/vlib.api_types.h>
+
/* For control ping */
#define vl_endianfun
-#include <vpp/api/vpe.api.h>
+#include <vlibmemory/memclnt.api.h>
#undef vl_endianfun
static inline void
diff --git a/src/vcl/CMakeLists.txt b/src/vcl/CMakeLists.txt
index e6d8f98ffed..c8835e771c1 100644
--- a/src/vcl/CMakeLists.txt
+++ b/src/vcl/CMakeLists.txt
@@ -30,6 +30,14 @@ add_vpp_library(vppcom
api_headers
)
+option(LDP_HAS_GNU_SOURCE "LDP configured to use _GNU_SOURCE" ON)
+if (LDP_HAS_GNU_SOURCE)
+ add_compile_definitions(HAVE_GNU_SOURCE)
+endif(LDP_HAS_GNU_SOURCE)
+
+if("${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD")
+ message("WARNING: vcl_ldpreload isn't supported on FreeBSD - disabled")
+else()
add_vpp_library(vcl_ldpreload
SOURCES
ldp_socket_wrapper.c
@@ -38,6 +46,7 @@ add_vpp_library(vcl_ldpreload
LINK_LIBRARIES
vppinfra svm vlibmemoryclient rt pthread vppcom dl
)
+endif()
add_vpp_headers(vcl
ldp.h
@@ -45,4 +54,4 @@ add_vpp_headers(vcl
vppcom.h
vcl_locked.h
ldp_socket_wrapper.h
-) \ No newline at end of file
+)
diff --git a/src/vcl/ldp.c b/src/vcl/ldp.c
index 5edb935006f..bd3457fa8fd 100644
--- a/src/vcl/ldp.c
+++ b/src/vcl/ldp.c
@@ -12,6 +12,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+#ifdef HAVE_GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
#include <unistd.h>
#include <stdio.h>
#include <signal.h>
@@ -21,6 +26,7 @@
#include <stdarg.h>
#include <sys/resource.h>
#include <netinet/tcp.h>
+#include <netinet/udp.h>
#include <vcl/ldp_socket_wrapper.h>
#include <vcl/ldp.h>
@@ -51,6 +57,20 @@
#define LDP_MAX_NWORKERS 32
+#ifdef HAVE_GNU_SOURCE
+#define SOCKADDR_GET_SA(__addr) __addr.__sockaddr__;
+#else
+#define SOCKADDR_GET_SA(__addr) _addr;
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT 103
+#endif
+
+#ifndef SO_ORIGINAL_DST
+/* from <linux/netfilter_ipv4.h> */
+#define SO_ORIGINAL_DST 80
+#endif
typedef struct ldp_worker_ctx_
{
u8 *io_buffer;
@@ -143,15 +163,14 @@ ldp_worker_get_current (void)
static inline void
ldp_set_app_name (char *app_name)
{
- snprintf (ldp->app_name, LDP_APP_NAME_MAX,
- "ldp-%d-%s", getpid (), app_name);
+ snprintf (ldp->app_name, LDP_APP_NAME_MAX, "%s-ldp-%d", app_name, getpid ());
}
static inline char *
ldp_get_app_name ()
{
if (ldp->app_name[0] == '\0')
- ldp_set_app_name ("app");
+ ldp_set_app_name (program_invocation_short_name);
return ldp->app_name;
}
@@ -176,34 +195,12 @@ ldp_alloc_workers (void)
{
if (ldp->workers)
return;
- pool_alloc (ldp->workers, LDP_MAX_NWORKERS);
+ ldp->workers = vec_new (ldp_worker_ctx_t, LDP_MAX_NWORKERS);
}
-static int
-ldp_init (void)
+static void
+ldp_init_cfg (void)
{
- ldp_worker_ctx_t *ldpw;
- int rv;
-
- ASSERT (!ldp->init);
-
- ldp->init = 1;
- ldp->vcl_needs_real_epoll = 1;
- rv = vls_app_create (ldp_get_app_name ());
- if (rv != VPPCOM_OK)
- {
- ldp->vcl_needs_real_epoll = 0;
- if (rv == VPPCOM_EEXIST)
- return 0;
- LDBG (2, "\nERROR: ldp_init: vppcom_app_create()"
- " failed! rv = %d (%s)\n", rv, vppcom_retval_str (rv));
- ldp->init = 0;
- return rv;
- }
- ldp->vcl_needs_real_epoll = 0;
- ldp_alloc_workers ();
- ldpw = ldp_worker_get_current ();
-
char *env_var_str = getenv (LDP_ENV_DEBUG);
if (env_var_str)
{
@@ -271,10 +268,11 @@ ldp_init (void)
/* Make sure there are enough bits in the fd set for vcl sessions */
if (ldp->vlsh_bit_val > FD_SETSIZE / 2)
{
- LDBG (0, "ERROR: LDP vlsh bit value %d > FD_SETSIZE/2 %d!",
+ /* Only valid for select/pselect, so just WARNING and not exit */
+ LDBG (0,
+ "WARNING: LDP vlsh bit value %d > FD_SETSIZE/2 %d, "
+ "select/pselect not supported now!",
ldp->vlsh_bit_val, FD_SETSIZE / 2);
- ldp->init = 0;
- return -1;
}
}
env_var_str = getenv (LDP_ENV_TLS_TRANS);
@@ -282,12 +280,41 @@ ldp_init (void)
{
ldp->transparent_tls = 1;
}
+}
+
+static int
+ldp_init (void)
+{
+ ldp_worker_ctx_t *ldpw;
+ int rv;
- /* *INDENT-OFF* */
- pool_foreach (ldpw, ldp->workers) {
+ if (ldp->init)
+ {
+ LDBG (0, "LDP is initialized already");
+ return 0;
+ }
+
+ ldp_init_cfg ();
+ ldp->init = 1;
+ ldp->vcl_needs_real_epoll = 1;
+ rv = vls_app_create (ldp_get_app_name ());
+ if (rv != VPPCOM_OK)
+ {
+ ldp->vcl_needs_real_epoll = 0;
+ if (rv == VPPCOM_EEXIST)
+ return 0;
+ LDBG (2,
+ "\nERROR: ldp_init: vppcom_app_create()"
+ " failed! rv = %d (%s)\n",
+ rv, vppcom_retval_str (rv));
+ ldp->init = 0;
+ return rv;
+ }
+ ldp->vcl_needs_real_epoll = 0;
+ ldp_alloc_workers ();
+
+ vec_foreach (ldpw, ldp->workers)
clib_memset (&ldpw->clib_time, 0, sizeof (ldpw->clib_time));
- }
- /* *INDENT-ON* */
LDBG (0, "LDP initialization: done!");
@@ -315,16 +342,16 @@ close (int fd)
epfd = vls_attr (vlsh, VPPCOM_ATTR_GET_LIBC_EPFD, 0, 0);
if (epfd > 0)
{
+ ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
+ u32 size = sizeof (epfd);
+
LDBG (0, "fd %d: calling libc_close: epfd %u", fd, epfd);
- rv = libc_close (epfd);
- if (rv < 0)
- {
- u32 size = sizeof (epfd);
- epfd = 0;
+ libc_close (epfd);
+ ldpw->mq_epfd_added = 0;
- (void) vls_attr (vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &epfd, &size);
- }
+ epfd = 0;
+ (void) vls_attr (vlsh, VPPCOM_ATTR_SET_LIBC_EPFD, &epfd, &size);
}
else if (PREDICT_FALSE (epfd < 0))
{
@@ -586,10 +613,16 @@ ioctl (int fd, unsigned long int cmd, ...)
case FIONREAD:
rv = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0);
break;
-
+ case TIOCOUTQ:
+ {
+ u32 *buf = va_arg (ap, void *);
+ u32 *buflen = va_arg (ap, u32 *);
+ rv = vls_attr (vlsh, VPPCOM_ATTR_GET_NWRITEQ, buf, buflen);
+ }
+ break;
case FIONBIO:
{
- u32 flags = va_arg (ap, int) ? O_NONBLOCK : 0;
+ u32 flags = *(va_arg (ap, int *)) ? O_NONBLOCK : 0;
u32 size = sizeof (flags);
/* TBD: When VPPCOM_ATTR_[GS]ET_FLAGS supports flags other than
@@ -635,7 +668,6 @@ ldp_select_init_maps (fd_set * __restrict original,
clib_memcpy_fast (*resultb, original, n_bytes);
memset (original, 0, n_bytes);
- /* *INDENT-OFF* */
clib_bitmap_foreach (fd, *resultb) {
if (fd > nfds)
break;
@@ -645,7 +677,6 @@ ldp_select_init_maps (fd_set * __restrict original,
else
*vclb = clib_bitmap_set (*vclb, vlsh_to_session_index (vlsh), 1);
}
- /* *INDENT-ON* */
si_bits_set = clib_bitmap_last_set (*vclb) + 1;
*si_bits = (si_bits_set > *si_bits) ? si_bits_set : *si_bits;
@@ -665,7 +696,6 @@ ldp_select_vcl_map_to_libc (clib_bitmap_t * vclb, fd_set * __restrict libcb)
if (!libcb)
return 0;
- /* *INDENT-OFF* */
clib_bitmap_foreach (si, vclb) {
vlsh = vls_session_index_to_vlsh (si);
ASSERT (vlsh != VLS_INVALID_HANDLE);
@@ -677,7 +707,6 @@ ldp_select_vcl_map_to_libc (clib_bitmap_t * vclb, fd_set * __restrict libcb)
}
FD_SET (fd, libcb);
}
- /* *INDENT-ON* */
return 0;
}
@@ -690,10 +719,8 @@ ldp_select_libc_map_merge (clib_bitmap_t * result, fd_set * __restrict libcb)
if (!libcb)
return;
- /* *INDENT-OFF* */
clib_bitmap_foreach (fd, result)
FD_SET ((int)fd, libcb);
- /* *INDENT-ON* */
}
int
@@ -724,10 +751,11 @@ ldp_pselect (int nfds, fd_set * __restrict readfds,
time_out = (timeout->tv_sec == 0 && timeout->tv_nsec == 0) ?
(f64) 0 : (f64) timeout->tv_sec + (f64) timeout->tv_nsec / (f64) 1e9;
+ time_out += clib_time_now (&ldpw->clib_time);
+
/* select as fine grained sleep */
if (!nfds)
{
- time_out += clib_time_now (&ldpw->clib_time);
while (clib_time_now (&ldpw->clib_time) < time_out)
;
return 0;
@@ -966,9 +994,7 @@ assign_cert_key_pair (vls_handle_t vlsh)
return -1;
ckp_len = sizeof (ldp->ckpair_index);
- return vppcom_session_attr (vlsh_to_session_index (vlsh),
- VPPCOM_ATTR_SET_CKPAIR, &ldp->ckpair_index,
- &ckp_len);
+ return vls_attr (vlsh, VPPCOM_ATTR_SET_CKPAIR, &ldp->ckpair_index, &ckp_len);
}
int
@@ -1051,8 +1077,9 @@ socketpair (int domain, int type, int protocol, int fds[2])
}
int
-bind (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len)
+bind (int fd, __CONST_SOCKADDR_ARG _addr, socklen_t len)
{
+ const struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
vls_handle_t vlsh;
int rv;
@@ -1123,11 +1150,10 @@ done:
}
static inline int
-ldp_copy_ep_to_sockaddr (__SOCKADDR_ARG addr, socklen_t * __restrict len,
- vppcom_endpt_t * ep)
+ldp_copy_ep_to_sockaddr (struct sockaddr *addr, socklen_t *__restrict len,
+ vppcom_endpt_t *ep)
{
- int rv = 0;
- int sa_len, copy_len;
+ int rv = 0, sa_len, copy_len;
ldp_init_check ();
@@ -1168,8 +1194,9 @@ ldp_copy_ep_to_sockaddr (__SOCKADDR_ARG addr, socklen_t * __restrict len,
}
int
-getsockname (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict len)
+getsockname (int fd, __SOCKADDR_ARG _addr, socklen_t *__restrict len)
{
+ struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
vls_handle_t vlsh;
int rv;
@@ -1202,15 +1229,16 @@ getsockname (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict len)
}
else
{
- rv = libc_getsockname (fd, addr, len);
+ rv = libc_getsockname (fd, _addr, len);
}
return rv;
}
int
-connect (int fd, __CONST_SOCKADDR_ARG addr, socklen_t len)
+connect (int fd, __CONST_SOCKADDR_ARG _addr, socklen_t len)
{
+ const struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
vls_handle_t vlsh;
int rv;
@@ -1290,8 +1318,9 @@ done:
}
int
-getpeername (int fd, __SOCKADDR_ARG addr, socklen_t * __restrict len)
+getpeername (int fd, __SOCKADDR_ARG _addr, socklen_t *__restrict len)
{
+ struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
vls_handle_t vlsh;
int rv;
@@ -1530,13 +1559,26 @@ recv (int fd, void *buf, size_t n, int flags)
return size;
}
-static int
-ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n, int flags,
- __CONST_SOCKADDR_ARG addr, socklen_t addr_len)
+ssize_t
+__recv_chk (int fd, void *buf, size_t n, size_t buflen, int flags)
+{
+ if (n > buflen)
+ return -1;
+
+ return recv (fd, buf, n, flags);
+}
+
+static inline int
+ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n,
+ vppcom_endpt_tlv_t *app_tlvs, int flags,
+ __CONST_SOCKADDR_ARG _addr, socklen_t addr_len)
{
+ const struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
vppcom_endpt_t *ep = 0;
vppcom_endpt_t _ep;
+ _ep.app_tlvs = app_tlvs;
+
if (addr)
{
ep = &_ep;
@@ -1566,11 +1608,11 @@ ldp_vls_sendo (vls_handle_t vlsh, const void *buf, size_t n, int flags,
}
static int
-ldp_vls_recvfrom (vls_handle_t vlsh, void *__restrict buf, size_t n,
- int flags, __SOCKADDR_ARG addr,
- socklen_t * __restrict addr_len)
+ldp_vls_recvfrom (vls_handle_t vlsh, void *__restrict buf, size_t n, int flags,
+ __SOCKADDR_ARG _addr, socklen_t *__restrict addr_len)
{
u8 src_addr[sizeof (struct sockaddr_in6)];
+ struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
vppcom_endpt_t ep;
ssize_t size;
int rv;
@@ -1595,8 +1637,9 @@ ldp_vls_recvfrom (vls_handle_t vlsh, void *__restrict buf, size_t n,
ssize_t
sendto (int fd, const void *buf, size_t n, int flags,
- __CONST_SOCKADDR_ARG addr, socklen_t addr_len)
+ __CONST_SOCKADDR_ARG _addr, socklen_t addr_len)
{
+ const struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
vls_handle_t vlsh;
ssize_t size;
@@ -1605,7 +1648,7 @@ sendto (int fd, const void *buf, size_t n, int flags,
vlsh = ldp_fd_to_vlsh (fd);
if (vlsh != VLS_INVALID_HANDLE)
{
- size = ldp_vls_sendo (vlsh, buf, n, flags, addr, addr_len);
+ size = ldp_vls_sendo (vlsh, buf, n, NULL, flags, addr, addr_len);
if (size < 0)
{
errno = -size;
@@ -1647,6 +1690,98 @@ recvfrom (int fd, void *__restrict buf, size_t n, int flags,
return size;
}
+static int
+ldp_parse_cmsg (vls_handle_t vlsh, const struct msghdr *msg,
+ vppcom_endpt_tlv_t **app_tlvs)
+{
+ uint8_t *ad, *at = (uint8_t *) *app_tlvs;
+ vppcom_endpt_tlv_t *adh;
+ struct in_pktinfo *pi;
+ struct cmsghdr *cmsg;
+
+ cmsg = CMSG_FIRSTHDR (msg);
+
+ while (cmsg != NULL)
+ {
+ switch (cmsg->cmsg_level)
+ {
+ case SOL_UDP:
+ switch (cmsg->cmsg_type)
+ {
+ case UDP_SEGMENT:
+ vec_add2 (at, adh, sizeof (*adh));
+ adh->data_type = VCL_UDP_SEGMENT;
+ adh->data_len = sizeof (uint16_t);
+ vec_add2 (at, ad, sizeof (uint16_t));
+ *(uint16_t *) ad = *(uint16_t *) CMSG_DATA (cmsg);
+ break;
+ default:
+ LDBG (1, "SOL_UDP cmsg_type %u not supported", cmsg->cmsg_type);
+ break;
+ }
+ break;
+ case SOL_IP:
+ switch (cmsg->cmsg_type)
+ {
+ case IP_PKTINFO:
+ vec_add2 (at, adh, sizeof (*adh));
+ adh->data_type = VCL_IP_PKTINFO;
+ adh->data_len = sizeof (struct in_addr);
+ vec_add2 (at, ad, sizeof (struct in_addr));
+ pi = (void *) CMSG_DATA (cmsg);
+ clib_memcpy_fast (ad, &pi->ipi_spec_dst,
+ sizeof (struct in_addr));
+ break;
+ default:
+ LDBG (1, "SOL_IP cmsg_type %u not supported", cmsg->cmsg_type);
+ break;
+ }
+ break;
+ default:
+ LDBG (1, "cmsg_level %u not supported", cmsg->cmsg_level);
+ break;
+ }
+ cmsg = CMSG_NXTHDR ((struct msghdr *) msg, cmsg);
+ }
+ *app_tlvs = (vppcom_endpt_tlv_t *) at;
+ return 0;
+}
+
+static int
+ldp_make_cmsg (vls_handle_t vlsh, struct msghdr *msg)
+{
+ u32 optval, optlen = sizeof (optval);
+ struct cmsghdr *cmsg;
+
+ cmsg = CMSG_FIRSTHDR (msg);
+ memset (cmsg, 0, sizeof (*cmsg));
+
+ if (!vls_attr (vlsh, VPPCOM_ATTR_GET_IP_PKTINFO, (void *) &optval, &optlen))
+ return 0;
+
+ if (optval)
+ {
+ vppcom_endpt_t ep;
+ u8 addr_buf[sizeof (struct in_addr)];
+ u32 size = sizeof (ep);
+
+ ep.ip = addr_buf;
+
+ if (!vls_attr (vlsh, VPPCOM_ATTR_GET_LCL_ADDR, &ep, &size))
+ {
+ struct in_pktinfo pi = {};
+
+ clib_memcpy (&pi.ipi_addr, ep.ip, sizeof (struct in_addr));
+ cmsg->cmsg_level = SOL_IP;
+ cmsg->cmsg_type = IP_PKTINFO;
+ cmsg->cmsg_len = CMSG_LEN (sizeof (pi));
+ clib_memcpy (CMSG_DATA (cmsg), &pi, sizeof (pi));
+ }
+ }
+
+ return 0;
+}
+
ssize_t
sendmsg (int fd, const struct msghdr * msg, int flags)
{
@@ -1658,14 +1793,17 @@ sendmsg (int fd, const struct msghdr * msg, int flags)
vlsh = ldp_fd_to_vlsh (fd);
if (vlsh != VLS_INVALID_HANDLE)
{
+ vppcom_endpt_tlv_t *app_tlvs = 0;
struct iovec *iov = msg->msg_iov;
ssize_t total = 0;
- int i, rv;
+ int i, rv = 0;
+
+ ldp_parse_cmsg (vlsh, msg, &app_tlvs);
for (i = 0; i < msg->msg_iovlen; ++i)
{
- rv = ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, flags,
- msg->msg_name, msg->msg_namelen);
+ rv = ldp_vls_sendo (vlsh, iov[i].iov_base, iov[i].iov_len, app_tlvs,
+ flags, msg->msg_name, msg->msg_namelen);
if (rv < 0)
break;
else
@@ -1676,6 +1814,8 @@ sendmsg (int fd, const struct msghdr * msg, int flags)
}
}
+ vec_free (app_tlvs);
+
if (rv < 0 && total == 0)
{
errno = -rv;
@@ -1692,7 +1832,7 @@ sendmsg (int fd, const struct msghdr * msg, int flags)
return size;
}
-#ifdef USE_GNU
+#ifdef _GNU_SOURCE
int
sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags)
{
@@ -1725,7 +1865,6 @@ sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags)
if (size < 0)
{
int errno_val = errno;
- perror (func_str);
clib_warning ("LDP<%d>: ERROR: fd %d (0x%x): %s() failed! "
"rv %d, errno = %d", getpid (), fd, fd,
func_str, size, errno_val);
@@ -1752,7 +1891,7 @@ recvmsg (int fd, struct msghdr * msg, int flags)
{
struct iovec *iov = msg->msg_iov;
ssize_t max_deq, total = 0;
- int i, rv;
+ int i, rv = 0;
max_deq = vls_attr (vlsh, VPPCOM_ATTR_GET_NREAD, 0, 0);
if (!max_deq)
@@ -1781,7 +1920,11 @@ recvmsg (int fd, struct msghdr * msg, int flags)
size = -1;
}
else
- size = total;
+ {
+ if (msg->msg_controllen)
+ ldp_make_cmsg (vlsh, msg);
+ size = total;
+ }
}
else
{
@@ -1791,52 +1934,60 @@ recvmsg (int fd, struct msghdr * msg, int flags)
return size;
}
-#ifdef USE_GNU
+#ifdef _GNU_SOURCE
int
recvmmsg (int fd, struct mmsghdr *vmessages,
unsigned int vlen, int flags, struct timespec *tmo)
{
- ssize_t size;
- const char *func_str;
- u32 sh = ldp_fd_to_vlsh (fd);
+ ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
+ u32 sh;
ldp_init_check ();
+ sh = ldp_fd_to_vlsh (fd);
+
if (sh != VLS_INVALID_HANDLE)
{
- clib_warning ("LDP<%d>: LDP-TBD", getpid ());
- errno = ENOSYS;
- size = -1;
- }
- else
- {
- func_str = "libc_recvmmsg";
+ struct mmsghdr *mh;
+ ssize_t rv = 0;
+ u32 nvecs = 0;
+ f64 time_out;
- if (LDP_DEBUG > 2)
- clib_warning ("LDP<%d>: fd %d (0x%x): calling %s(): "
- "vmessages %p, vlen %u, flags 0x%x, tmo %p",
- getpid (), fd, fd, func_str, vmessages, vlen,
- flags, tmo);
-
- size = libc_recvmmsg (fd, vmessages, vlen, flags, tmo);
- }
-
- if (LDP_DEBUG > 2)
- {
- if (size < 0)
+ if (PREDICT_FALSE (ldpw->clib_time.init_cpu_time == 0))
+ clib_time_init (&ldpw->clib_time);
+ if (tmo)
{
- int errno_val = errno;
- perror (func_str);
- clib_warning ("LDP<%d>: ERROR: fd %d (0x%x): %s() failed! "
- "rv %d, errno = %d", getpid (), fd, fd,
- func_str, size, errno_val);
- errno = errno_val;
+ time_out = (f64) tmo->tv_sec + (f64) tmo->tv_nsec / (f64) 1e9;
+ time_out += clib_time_now (&ldpw->clib_time);
}
else
- clib_warning ("LDP<%d>: fd %d (0x%x): returning %d (0x%x)",
- getpid (), fd, fd, size, size);
+ {
+ time_out = (f64) ~0;
+ }
+
+ while (nvecs < vlen)
+ {
+ mh = &vmessages[nvecs];
+ rv = recvmsg (fd, &mh->msg_hdr, flags);
+ if (rv > 0)
+ {
+ mh->msg_len = rv;
+ nvecs += 1;
+ continue;
+ }
+
+ if (!time_out || clib_time_now (&ldpw->clib_time) >= time_out)
+ break;
+
+ usleep (1);
+ }
+
+ return nvecs > 0 ? nvecs : rv;
+ }
+ else
+ {
+ return libc_recvmmsg (fd, vmessages, vlen, flags, tmo);
}
- return size;
}
#endif
@@ -1897,6 +2048,21 @@ getsockopt (int fd, int level, int optname,
break;
}
break;
+ case SOL_IP:
+ switch (optname)
+ {
+ case SO_ORIGINAL_DST:
+ rv =
+ vls_attr (vlsh, VPPCOM_ATTR_GET_ORIGINAL_DST, optval, optlen);
+ break;
+ default:
+ LDBG (0,
+ "ERROR: fd %d: getsockopt SOL_IP: vlsh %u "
+ "optname %d unsupported!",
+ fd, vlsh, optname);
+ break;
+ }
+ break;
case SOL_IPV6:
switch (optname)
{
@@ -2059,6 +2225,21 @@ setsockopt (int fd, int level, int optname,
break;
}
break;
+ case SOL_IP:
+ switch (optname)
+ {
+ case IP_PKTINFO:
+ rv = vls_attr (vlsh, VPPCOM_ATTR_SET_IP_PKTINFO, (void *) optval,
+ &optlen);
+ break;
+ default:
+ LDBG (0,
+ "ERROR: fd %d: setsockopt SOL_IP: vlsh %u optname %d"
+ "unsupported!",
+ fd, vlsh, optname);
+ break;
+ }
+ break;
default:
break;
}
@@ -2108,9 +2289,10 @@ listen (int fd, int n)
}
static inline int
-ldp_accept4 (int listen_fd, __SOCKADDR_ARG addr,
- socklen_t * __restrict addr_len, int flags)
+ldp_accept4 (int listen_fd, __SOCKADDR_ARG _addr,
+ socklen_t *__restrict addr_len, int flags)
{
+ struct sockaddr *addr = SOCKADDR_GET_SA (_addr);
vls_handle_t listen_vlsh, accept_vlsh;
int rv;
@@ -2257,8 +2439,10 @@ epoll_ctl (int epfd, int op, int fd, struct epoll_event *event)
* was acquired outside of the LD_PRELOAD process context.
* In any case, if we get one, punt it to libc_epoll_ctl.
*/
- LDBG (1, "epfd %d: calling libc_epoll_ctl: op %d, fd %d"
- " event %p", epfd, op, fd, event);
+ LDBG (1,
+ "epfd %d: calling libc_epoll_ctl: op %d, fd %d"
+ " events 0x%x",
+ epfd, op, fd, event ? event->events : 0);
rv = libc_epoll_ctl (epfd, op, fd, event);
goto done;
@@ -2271,8 +2455,10 @@ epoll_ctl (int epfd, int op, int fd, struct epoll_event *event)
if (vlsh != VLS_INVALID_HANDLE)
{
- LDBG (1, "epfd %d: calling vls_epoll_ctl: ep_vlsh %d op %d, vlsh %u,"
- " event %p", epfd, vep_vlsh, op, vlsh, event);
+ LDBG (1,
+ "epfd %d: calling vls_epoll_ctl: ep_vlsh %d op %d, vlsh %u,"
+ " events 0x%x",
+ epfd, vep_vlsh, op, vlsh, event ? event->events : 0);
rv = vls_epoll_ctl (vep_vlsh, op, vlsh, event);
if (rv != VPPCOM_OK)
@@ -2329,7 +2515,7 @@ static inline int
ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents,
int timeout, const sigset_t * sigmask)
{
- ldp_worker_ctx_t *ldpw = ldp_worker_get_current ();
+ ldp_worker_ctx_t *ldpw;
double time_to_wait = (double) 0, max_time;
int libc_epfd, rv = 0;
vls_handle_t ep_vlsh;
@@ -2342,6 +2528,10 @@ ldp_epoll_pwait (int epfd, struct epoll_event *events, int maxevents,
return -1;
}
+ if (PREDICT_FALSE (vppcom_worker_index () == ~0))
+ vls_register_vcl_worker ();
+
+ ldpw = ldp_worker_get_current ();
if (epfd == ldpw->vcl_mq_epfd)
return libc_epoll_pwait (epfd, events, maxevents, timeout, sigmask);
@@ -2406,8 +2596,9 @@ static inline int
ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events,
int maxevents, int timeout, const sigset_t * sigmask)
{
+ int libc_epfd, rv = 0, num_ev, libc_num_ev, vcl_wups = 0;
+ struct epoll_event *libc_evts;
ldp_worker_ctx_t *ldpw;
- int libc_epfd, rv = 0, num_ev;
vls_handle_t ep_vlsh;
ldp_init_check ();
@@ -2483,7 +2674,12 @@ ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events,
/* Request to only drain unhandled to prevent libc_epoll_wait starved */
rv = vls_epoll_wait (ep_vlsh, events, maxevents, -2);
if (rv > 0)
- goto done;
+ {
+ timeout = 0;
+ if (rv >= maxevents)
+ goto done;
+ maxevents -= rv;
+ }
else if (PREDICT_FALSE (rv < 0))
{
errno = -rv;
@@ -2491,27 +2687,41 @@ ldp_epoll_pwait_eventfd (int epfd, struct epoll_event *events,
goto done;
}
- rv = libc_epoll_pwait (libc_epfd, events, maxevents, timeout, sigmask);
- if (rv <= 0)
- goto done;
- for (int i = 0; i < rv; i++)
+epoll_again:
+
+ libc_evts = &events[rv];
+ libc_num_ev =
+ libc_epoll_pwait (libc_epfd, libc_evts, maxevents, timeout, sigmask);
+ if (libc_num_ev <= 0)
+ {
+ rv = rv >= 0 ? rv : -1;
+ goto done;
+ }
+
+ for (int i = 0; i < libc_num_ev; i++)
{
- if (events[i].data.fd == ldpw->vcl_mq_epfd)
+ if (libc_evts[i].data.fd == ldpw->vcl_mq_epfd)
{
/* We should remove mq epoll fd from events. */
- rv--;
- if (i != rv)
+ libc_num_ev--;
+ if (i != libc_num_ev)
{
- events[i].events = events[rv].events;
- events[i].data.u64 = events[rv].data.u64;
+ libc_evts[i].events = libc_evts[libc_num_ev].events;
+ libc_evts[i].data.u64 = libc_evts[libc_num_ev].data.u64;
}
- num_ev = vls_epoll_wait (ep_vlsh, &events[rv], maxevents - rv, 0);
+ num_ev = vls_epoll_wait (ep_vlsh, &libc_evts[libc_num_ev],
+ maxevents - libc_num_ev, 0);
if (PREDICT_TRUE (num_ev > 0))
rv += num_ev;
+ /* Woken up by vcl but no events generated. Accept it once */
+ if (rv == 0 && libc_num_ev == 0 && timeout && vcl_wups++ < 1)
+ goto epoll_again;
break;
}
}
+ rv += libc_num_ev;
+
done:
return rv;
}
@@ -2640,7 +2850,7 @@ done:
return rv;
}
-#ifdef USE_GNU
+#ifdef _GNU_SOURCE
int
ppoll (struct pollfd *fds, nfds_t nfds,
const struct timespec *timeout, const sigset_t * sigmask)
diff --git a/src/vcl/ldp.h b/src/vcl/ldp.h
index 8d78ead0875..327e73c80c0 100644
--- a/src/vcl/ldp.h
+++ b/src/vcl/ldp.h
@@ -23,9 +23,9 @@
#define LDP_DEBUG_INIT 0
#endif
+#include <vcl/ldp_glibc_socket.h>
#include <vppinfra/error.h>
#include <vppinfra/types.h>
-#include <vcl/ldp_glibc_socket.h>
#define LDP_ENV_DEBUG "LDP_DEBUG"
#define LDP_ENV_APP_NAME "LDP_APP_NAME"
diff --git a/src/vcl/ldp_glibc_socket.h b/src/vcl/ldp_glibc_socket.h
index 0890a644488..dcd37208ff6 100644
--- a/src/vcl/ldp_glibc_socket.h
+++ b/src/vcl/ldp_glibc_socket.h
@@ -163,6 +163,14 @@ extern ssize_t sendfile (int __out_fd, int __in_fd, off_t * __offset,
__THROW. */
extern ssize_t recv (int __fd, void *__buf, size_t __n, int __flags);
+/* Read N bytes into BUF from socket FD with buffer overflow checking.
+ Returns the number read or -1 for errors.
+
+ This function is a cancellation point and therefore not marked with
+ __THROW. */
+extern ssize_t __recv_chk (int fd, void *buf, size_t n, size_t buflen,
+ int flags);
+
/* Send N bytes of BUF on socket FD to peer at address ADDR (which is
ADDR_LEN bytes long). Returns the number sent, or -1 for errors.
@@ -192,15 +200,14 @@ recvfrom (int __fd, void *__restrict __buf,
extern ssize_t
sendmsg (int __fd, const struct msghdr *__message, int __flags);
-#ifdef __USE_GNU
+#ifdef _GNU_SOURCE
/* Send a VLEN messages as described by VMESSAGES to socket FD.
Returns the number of datagrams successfully written or -1 for errors.
This function is a cancellation point and therefore not marked with
__THROW. */
-extern int
-sendmmsg (int __fd, struct mmsghdr *__vmessages,
- unsigned int __vlen, int __flags);
+extern int sendmmsg (int __fd, struct mmsghdr *__vmessages,
+ unsigned int __vlen, int __flags);
#endif
/* Receive a message as described by MESSAGE from socket FD.
@@ -210,7 +217,7 @@ sendmmsg (int __fd, struct mmsghdr *__vmessages,
__THROW. */
extern ssize_t recvmsg (int __fd, struct msghdr *__message, int __flags);
-#ifdef __USE_GNU
+#ifdef _GNU_SOURCE
/* Receive up to VLEN messages as described by VMESSAGES from socket FD.
Returns the number of messages received or -1 for errors.
@@ -329,7 +336,7 @@ epoll_pwait (int __epfd, struct epoll_event *__events,
__THROW. */
extern int poll (struct pollfd *__fds, nfds_t __nfds, int __timeout);
-#ifdef __USE_GNU
+#ifdef _GNU_SOURCE
/* Like poll, but before waiting the threads signal mask is replaced
with that specified in the fourth parameter. For better usability,
the timeout value is specified using a TIMESPEC object.
diff --git a/src/vcl/ldp_socket_wrapper.c b/src/vcl/ldp_socket_wrapper.c
index 253847136ae..7ae2a226641 100644
--- a/src/vcl/ldp_socket_wrapper.c
+++ b/src/vcl/ldp_socket_wrapper.c
@@ -55,6 +55,10 @@
is set.
*/
+#ifdef HAVE_GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
#include <signal.h>
#include <dlfcn.h>
@@ -63,8 +67,9 @@
#include <unistd.h>
#include <pthread.h>
-#include <vcl/ldp_socket_wrapper.h>
+#include <vppinfra/clib.h>
+#include <vcl/ldp_socket_wrapper.h>
enum swrap_dbglvl_e
{
@@ -154,16 +159,14 @@ PRINTF_ATTRIBUTE (3, 4);
* SWRAP LOADING LIBC FUNCTIONS
*********************************************************/
-typedef int (*__libc_accept4) (int sockfd,
- struct sockaddr * addr,
- socklen_t * addrlen, int flags);
-typedef int (*__libc_accept) (int sockfd,
- struct sockaddr * addr, socklen_t * addrlen);
-typedef int (*__libc_bind) (int sockfd,
- const struct sockaddr * addr, socklen_t addrlen);
+typedef int (*__libc_accept4) (int sockfd, __SOCKADDR_ARG addr,
+ socklen_t *addrlen, int flags);
+typedef int (*__libc_accept) (int sockfd, __SOCKADDR_ARG addr,
+ socklen_t *addrlen);
+typedef int (*__libc_bind) (int sockfd, __CONST_SOCKADDR_ARG addr,
+ socklen_t addrlen);
typedef int (*__libc_close) (int fd);
-typedef int (*__libc_connect) (int sockfd,
- const struct sockaddr * addr,
+typedef int (*__libc_connect) (int sockfd, __CONST_SOCKADDR_ARG addr,
socklen_t addrlen);
#if 0
@@ -183,16 +186,12 @@ typedef FILE *(*__libc_fopen64) (const char *name, const char *mode);
#ifdef HAVE_EVENTFD
typedef int (*__libc_eventfd) (int count, int flags);
#endif
-typedef int (*__libc_getpeername) (int sockfd,
- struct sockaddr * addr,
- socklen_t * addrlen);
-typedef int (*__libc_getsockname) (int sockfd,
- struct sockaddr * addr,
- socklen_t * addrlen);
-typedef int (*__libc_getsockopt) (int sockfd,
- int level,
- int optname,
- void *optval, socklen_t * optlen);
+typedef int (*__libc_getpeername) (int sockfd, __SOCKADDR_ARG addr,
+ socklen_t *addrlen);
+typedef int (*__libc_getsockname) (int sockfd, __SOCKADDR_ARG addr,
+ socklen_t *addrlen);
+typedef int (*__libc_getsockopt) (int sockfd, int level, int optname,
+ void *optval, socklen_t *optlen);
typedef int (*__libc_ioctl) (int d, unsigned long int request, ...);
typedef int (*__libc_listen) (int sockfd, int backlog);
typedef int (*__libc_open) (const char *pathname, int flags, mode_t mode);
@@ -202,25 +201,29 @@ typedef int (*__libc_open64) (const char *pathname, int flags, mode_t mode);
typedef int (*__libc_openat) (int dirfd, const char *path, int flags, ...);
typedef int (*__libc_pipe) (int pipefd[2]);
typedef int (*__libc_read) (int fd, void *buf, size_t count);
-typedef ssize_t (*__libc_readv) (int fd, const struct iovec * iov,
- int iovcnt);
+typedef ssize_t (*__libc_readv) (int fd, const struct iovec *iov, int iovcnt);
typedef int (*__libc_recv) (int sockfd, void *buf, size_t len, int flags);
-typedef int (*__libc_recvfrom) (int sockfd,
- void *buf,
- size_t len,
- int flags,
- struct sockaddr * src_addr,
- socklen_t * addrlen);
-typedef int (*__libc_recvmsg) (int sockfd, const struct msghdr * msg,
+typedef int (*__libc_recvfrom) (int sockfd, void *buf, size_t len, int flags,
+ __SOCKADDR_ARG src_addr, socklen_t *addrlen);
+typedef int (*__libc_recvmsg) (int sockfd, const struct msghdr *msg,
int flags);
+#ifdef _GNU_SOURCE
+typedef int (*__libc_recvmmsg) (int fd, struct mmsghdr *vmessages,
+ unsigned int vlen, int flags,
+ struct timespec *tmo);
+#endif
typedef int (*__libc_send) (int sockfd, const void *buf, size_t len,
int flags);
typedef ssize_t (*__libc_sendfile) (int out_fd, int in_fd, off_t * offset,
size_t len);
typedef int (*__libc_sendmsg) (int sockfd, const struct msghdr * msg,
int flags);
+#ifdef _GNU_SOURCE
+typedef int (*__libc_sendmmsg) (int __fd, struct mmsghdr *__vmessages,
+ unsigned int __vlen, int __flags);
+#endif
typedef int (*__libc_sendto) (int sockfd, const void *buf, size_t len,
- int flags, const struct sockaddr * dst_addr,
+ int flags, __CONST_SOCKADDR_ARG dst_addr,
socklen_t addrlen);
typedef int (*__libc_setsockopt) (int sockfd, int level, int optname,
const void *optval, socklen_t optlen);
@@ -269,7 +272,7 @@ typedef int (*__libc_epoll_pwait) (int __epfd, struct epoll_event * __events,
typedef int (*__libc_poll) (struct pollfd * __fds, nfds_t __nfds,
int __timeout);
-#ifdef __USE_GNU
+#ifdef _GNU_SOURCE
typedef int (*__libc_ppoll) (struct pollfd * __fds, nfds_t __nfds,
const struct timespec * __timeout,
const __sigset_t * __ss);
@@ -321,9 +324,15 @@ struct swrap_libc_symbols
SWRAP_SYMBOL_ENTRY (recv);
SWRAP_SYMBOL_ENTRY (recvfrom);
SWRAP_SYMBOL_ENTRY (recvmsg);
+#ifdef _GNU_SOURCE
+ SWRAP_SYMBOL_ENTRY (recvmmsg);
+#endif
SWRAP_SYMBOL_ENTRY (send);
SWRAP_SYMBOL_ENTRY (sendfile);
SWRAP_SYMBOL_ENTRY (sendmsg);
+#ifdef _GNU_SOURCE
+ SWRAP_SYMBOL_ENTRY (sendmmsg);
+#endif
SWRAP_SYMBOL_ENTRY (sendto);
SWRAP_SYMBOL_ENTRY (setsockopt);
#ifdef HAVE_SIGNALFD
@@ -348,7 +357,7 @@ struct swrap_libc_symbols
SWRAP_SYMBOL_ENTRY (epoll_wait);
SWRAP_SYMBOL_ENTRY (epoll_pwait);
SWRAP_SYMBOL_ENTRY (poll);
-#ifdef __USE_GNU
+#ifdef _GNU_SOURCE
SWRAP_SYMBOL_ENTRY (ppoll);
#endif
};
@@ -478,8 +487,7 @@ _swrap_bind_symbol (enum swrap_lib lib, const char *fn_name)
* So we need load each function at the point it is called the first time.
*/
int
-libc_accept4 (int sockfd,
- struct sockaddr *addr, socklen_t * addrlen, int flags)
+libc_accept4 (int sockfd, __SOCKADDR_ARG addr, socklen_t *addrlen, int flags)
{
swrap_bind_symbol_libc (accept4);
@@ -487,7 +495,7 @@ libc_accept4 (int sockfd,
}
int
-libc_accept (int sockfd, struct sockaddr *addr, socklen_t * addrlen)
+libc_accept (int sockfd, __SOCKADDR_ARG addr, socklen_t *addrlen)
{
swrap_bind_symbol_libc (accept);
@@ -495,7 +503,7 @@ libc_accept (int sockfd, struct sockaddr *addr, socklen_t * addrlen)
}
int
-libc_bind (int sockfd, const struct sockaddr *addr, socklen_t addrlen)
+libc_bind (int sockfd, __CONST_SOCKADDR_ARG addr, socklen_t addrlen)
{
swrap_bind_symbol_libc (bind);
@@ -511,7 +519,7 @@ libc_close (int fd)
}
int
-libc_connect (int sockfd, const struct sockaddr *addr, socklen_t addrlen)
+libc_connect (int sockfd, __CONST_SOCKADDR_ARG addr, socklen_t addrlen)
{
swrap_bind_symbol_libc (connect);
@@ -585,7 +593,7 @@ libc_vioctl (int fd, int cmd, va_list ap)
}
int
-libc_getpeername (int sockfd, struct sockaddr *addr, socklen_t * addrlen)
+libc_getpeername (int sockfd, __SOCKADDR_ARG addr, socklen_t *addrlen)
{
swrap_bind_symbol_libc (getpeername);
@@ -593,7 +601,7 @@ libc_getpeername (int sockfd, struct sockaddr *addr, socklen_t * addrlen)
}
int
-libc_getsockname (int sockfd, struct sockaddr *addr, socklen_t * addrlen)
+libc_getsockname (int sockfd, __SOCKADDR_ARG addr, socklen_t *addrlen)
{
swrap_bind_symbol_libc (getsockname);
@@ -645,10 +653,8 @@ libc_recv (int sockfd, void *buf, size_t len, int flags)
}
int
-libc_recvfrom (int sockfd,
- void *buf,
- size_t len,
- int flags, struct sockaddr *src_addr, socklen_t * addrlen)
+libc_recvfrom (int sockfd, void *buf, size_t len, int flags,
+ __SOCKADDR_ARG src_addr, socklen_t *addrlen)
{
swrap_bind_symbol_libc (recvfrom);
@@ -665,6 +671,17 @@ libc_recvmsg (int sockfd, struct msghdr *msg, int flags)
return swrap.libc.symbols._libc_recvmsg.f (sockfd, msg, flags);
}
+#ifdef _GNU_SOURCE
+int
+libc_recvmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags,
+ struct timespec *tmo)
+{
+ swrap_bind_symbol_libc (recvmmsg);
+
+ return swrap.libc.symbols._libc_recvmmsg.f (fd, vmessages, vlen, flags, tmo);
+}
+#endif
+
int
libc_send (int sockfd, const void *buf, size_t len, int flags)
{
@@ -689,11 +706,19 @@ libc_sendmsg (int sockfd, const struct msghdr *msg, int flags)
return swrap.libc.symbols._libc_sendmsg.f (sockfd, msg, flags);
}
+#ifdef _GNU_SOURCE
+int
+libc_sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen, int flags)
+{
+ swrap_bind_symbol_libc (sendmmsg);
+
+ return swrap.libc.symbols._libc_sendmmsg.f (fd, vmessages, vlen, flags);
+}
+#endif
+
int
-libc_sendto (int sockfd,
- const void *buf,
- size_t len,
- int flags, const struct sockaddr *dst_addr, socklen_t addrlen)
+libc_sendto (int sockfd, const void *buf, size_t len, int flags,
+ __CONST_SOCKADDR_ARG dst_addr, socklen_t addrlen)
{
swrap_bind_symbol_libc (sendto);
@@ -836,7 +861,7 @@ libc_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout)
return swrap.libc.symbols._libc_poll.f (__fds, __nfds, __timeout);
}
-#ifdef __USE_GNU
+#ifdef _GNU_SOURCE
int
libc_ppoll (struct pollfd *__fds, nfds_t __nfds,
const struct timespec *__timeout, const __sigset_t * __ss)
diff --git a/src/vcl/ldp_socket_wrapper.h b/src/vcl/ldp_socket_wrapper.h
index 0d167cf1aeb..bf1b0e3f17c 100644
--- a/src/vcl/ldp_socket_wrapper.h
+++ b/src/vcl/ldp_socket_wrapper.h
@@ -98,16 +98,16 @@
* has probably something todo with with the linker.
* So we need load each function at the point it is called the first time.
*/
-int libc_accept4 (int sockfd, struct sockaddr *addr, socklen_t * addrlen,
+int libc_accept4 (int sockfd, __SOCKADDR_ARG addr, socklen_t *addrlen,
int flags);
-int libc_accept (int sockfd, struct sockaddr *addr, socklen_t * addrlen);
+int libc_accept (int sockfd, __SOCKADDR_ARG addr, socklen_t *addrlen);
-int libc_bind (int sockfd, const struct sockaddr *addr, socklen_t addrlen);
+int libc_bind (int sockfd, __CONST_SOCKADDR_ARG addr, socklen_t addrlen);
int libc_close (int fd);
-int libc_connect (int sockfd, const struct sockaddr *addr, socklen_t addrlen);
+int libc_connect (int sockfd, __CONST_SOCKADDR_ARG addr, socklen_t addrlen);
#if 0
/* TBD: dup and dup2 to be implemented later */
@@ -128,9 +128,9 @@ int libc_vfcntl64 (int fd, int cmd, va_list ap);
int libc_vioctl (int fd, int cmd, va_list ap);
-int libc_getpeername (int sockfd, struct sockaddr *addr, socklen_t * addrlen);
+int libc_getpeername (int sockfd, __SOCKADDR_ARG addr, socklen_t *addrlen);
-int libc_getsockname (int sockfd, struct sockaddr *addr, socklen_t * addrlen);
+int libc_getsockname (int sockfd, __SOCKADDR_ARG addr, socklen_t *addrlen);
int
libc_getsockopt (int sockfd,
@@ -144,25 +144,29 @@ ssize_t libc_readv (int fd, const struct iovec *iov, int iovcnt);
int libc_recv (int sockfd, void *buf, size_t len, int flags);
-int
-libc_recvfrom (int sockfd,
- void *buf,
- size_t len,
- int flags, struct sockaddr *src_addr, socklen_t * addrlen);
+int libc_recvfrom (int sockfd, void *buf, size_t len, int flags,
+ __SOCKADDR_ARG src_addr, socklen_t *addrlen);
int libc_recvmsg (int sockfd, struct msghdr *msg, int flags);
+#ifdef _GNU_SOURCE
+int libc_recvmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen,
+ int flags, struct timespec *tmo);
+#endif
+
int libc_send (int sockfd, const void *buf, size_t len, int flags);
ssize_t libc_sendfile (int out_fd, int in_fd, off_t * offset, size_t len);
int libc_sendmsg (int sockfd, const struct msghdr *msg, int flags);
-int
-libc_sendto (int sockfd,
- const void *buf,
- size_t len,
- int flags, const struct sockaddr *dst_addr, socklen_t addrlen);
+#ifdef _GNU_SOURCE
+int libc_sendmmsg (int fd, struct mmsghdr *vmessages, unsigned int vlen,
+ int flags);
+#endif
+
+int libc_sendto (int sockfd, const void *buf, size_t len, int flags,
+ __CONST_SOCKADDR_ARG dst_addr, socklen_t addrlen);
int
libc_setsockopt (int sockfd,
@@ -210,7 +214,7 @@ int libc_epoll_pwait (int __epfd, struct epoll_event *__events,
int libc_poll (struct pollfd *__fds, nfds_t __nfds, int __timeout);
-#ifdef __USE_GNU
+#ifdef _GNU_SOURCE
int libc_ppoll (struct pollfd *__fds, nfds_t __nfds,
const struct timespec *__timeout, const __sigset_t * __ss);
#endif
diff --git a/src/vcl/vcl_bapi.c b/src/vcl/vcl_bapi.c
index 32ded0d9eff..42704f42c53 100644
--- a/src/vcl/vcl_bapi.c
+++ b/src/vcl/vcl_bapi.c
@@ -271,11 +271,14 @@ vl_api_app_del_cert_key_pair_reply_t_handler (
_ (APP_DEL_CERT_KEY_PAIR_REPLY, app_del_cert_key_pair_reply) \
_ (APP_WORKER_ADD_DEL_REPLY, app_worker_add_del_reply)
-#define vl_print(handle, ...) fformat (handle, __VA_ARGS__)
#define vl_endianfun /* define message structures */
#include <vnet/session/session.api.h>
#undef vl_endianfun
+#define vl_calcsizefun
+#include <vnet/session/session.api.h>
+#undef vl_calcsizefun
+
/* instantiate all the print functions we know about */
#define vl_printfun
#include <vnet/session/session.api.h>
@@ -299,10 +302,18 @@ vcl_bapi_hookup (void)
return;
#define _(N, n) \
- vl_msg_api_set_handlers (REPLY_MSG_ID_BASE + VL_API_##N, #n, \
- vl_api_##n##_t_handler, vl_noop_handler, \
- vl_api_##n##_t_endian, vl_api_##n##_t_print, \
- sizeof (vl_api_##n##_t), 1);
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){ \
+ .id = REPLY_MSG_ID_BASE + VL_API_##N, \
+ .name = #n, \
+ .handler = vl_api_##n##_t_handler, \
+ .endian = vl_api_##n##_t_endian, \
+ .format_fn = vl_api_##n##_t_format, \
+ .size = sizeof (vl_api_##n##_t), \
+ .traced = (u32) 1, \
+ .tojson = vl_api_##n##_t_tojson, \
+ .fromjson = vl_api_##n##_t_fromjson, \
+ .calc_size = vl_api_##n##_t_calc_size, \
+ });
foreach_sock_msg;
#undef _
}
@@ -348,7 +359,9 @@ vcl_bapi_send_attach (void)
(vcm->cfg.app_scope_local ? APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE : 0) |
(vcm->cfg.app_scope_global ? APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE : 0) |
(app_is_proxy ? APP_OPTIONS_FLAGS_IS_PROXY : 0) |
- (vcm->cfg.use_mq_eventfd ? APP_OPTIONS_FLAGS_EVT_MQ_USE_EVENTFD : 0);
+ (vcm->cfg.use_mq_eventfd ? APP_OPTIONS_FLAGS_EVT_MQ_USE_EVENTFD : 0) |
+ (vcm->cfg.huge_page ? APP_OPTIONS_FLAGS_USE_HUGE_PAGE : 0) |
+ (vcm->cfg.app_original_dst ? APP_OPTIONS_FLAGS_GET_ORIGINAL_DST : 0);
bmp->options[APP_OPTIONS_PROXY_TRANSPORT] =
(u64) ((vcm->cfg.app_proxy_transport_tcp ? 1 << TRANSPORT_PROTO_TCP : 0) |
(vcm->cfg.app_proxy_transport_udp ? 1 << TRANSPORT_PROTO_UDP : 0));
@@ -425,8 +438,8 @@ static void
vcl_bapi_send_app_add_cert_key_pair (vppcom_cert_key_pair_t *ckpair)
{
vcl_worker_t *wrk = vcl_worker_get_current ();
- u32 cert_len = test_srv_crt_rsa_len;
- u32 key_len = test_srv_key_rsa_len;
+ u32 cert_len = ckpair->cert_len;
+ u32 key_len = ckpair->key_len;
vl_api_app_add_cert_key_pair_t *bmp;
bmp = vl_msg_api_alloc (sizeof (*bmp) + cert_len + key_len);
@@ -437,8 +450,8 @@ vcl_bapi_send_app_add_cert_key_pair (vppcom_cert_key_pair_t *ckpair)
bmp->context = wrk->wrk_index;
bmp->cert_len = clib_host_to_net_u16 (cert_len);
bmp->certkey_len = clib_host_to_net_u16 (key_len + cert_len);
- clib_memcpy_fast (bmp->certkey, test_srv_crt_rsa, cert_len);
- clib_memcpy_fast (bmp->certkey + cert_len, test_srv_key_rsa, key_len);
+ clib_memcpy_fast (bmp->certkey, ckpair->cert, cert_len);
+ clib_memcpy_fast (bmp->certkey + cert_len, ckpair->key, key_len);
vl_msg_api_send_shmem (wrk->vl_input_queue, (u8 *) &bmp);
}
diff --git a/src/vcl/vcl_cfg.c b/src/vcl/vcl_cfg.c
index 7b0710f5faf..edea60dc60a 100644
--- a/src/vcl/vcl_cfg.c
+++ b/src/vcl/vcl_cfg.c
@@ -34,18 +34,14 @@ vppcom_cfg_init (vppcom_cfg_t * vcl_cfg)
vcl_cfg->heapsize = (256ULL << 20);
vcl_cfg->max_workers = 16;
- vcl_cfg->segment_baseva = HIGH_SEGMENT_BASEVA;
vcl_cfg->segment_size = (256 << 20);
vcl_cfg->add_segment_size = (128 << 20);
vcl_cfg->preallocated_fifo_pairs = 8;
vcl_cfg->rx_fifo_size = (1 << 20);
vcl_cfg->tx_fifo_size = (1 << 20);
vcl_cfg->event_queue_size = 2048;
- vcl_cfg->listen_queue_size = CLIB_CACHE_LINE_BYTES / sizeof (u32);
vcl_cfg->app_timeout = 10 * 60.0;
vcl_cfg->session_timeout = 10 * 60.0;
- vcl_cfg->accept_timeout = 60.0;
- vcl_cfg->event_ring_size = (128 << 10);
vcl_cfg->event_log_path = "/dev/shm";
}
@@ -256,6 +252,8 @@ vppcom_cfg_read_file (char *conf_fname)
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
+ /* skip empty newlines as they confuse unformat_line_input */
+ unformat_skip_white_space (input);
(void) unformat_user (input, unformat_line_input, line_input);
unformat_skip_white_space (line_input);
@@ -305,12 +303,6 @@ vppcom_cfg_read_file (char *conf_fname)
vl_set_memory_gid (gid);
VCFG_DBG (0, "VCL<%d>: configured gid %d", getpid (), gid);
}
- else if (unformat (line_input, "segment-baseva 0x%lx",
- &vcl_cfg->segment_baseva))
- {
- VCFG_DBG (0, "VCL<%d>: configured segment_baseva 0x%lx",
- getpid (), (unsigned long) vcl_cfg->segment_baseva);
- }
else if (unformat (line_input, "segment-size 0x%lx",
&vcl_cfg->segment_size))
{
@@ -388,20 +380,6 @@ vppcom_cfg_read_file (char *conf_fname)
getpid (), vcl_cfg->event_queue_size,
vcl_cfg->event_queue_size);
}
- else if (unformat (line_input, "listen-queue-size 0x%x",
- &vcl_cfg->listen_queue_size))
- {
- VCFG_DBG (0, "VCL<%d>: configured listen_queue_size 0x%x (%u)",
- getpid (), vcl_cfg->listen_queue_size,
- vcl_cfg->listen_queue_size);
- }
- else if (unformat (line_input, "listen-queue-size %u",
- &vcl_cfg->listen_queue_size))
- {
- VCFG_DBG (0, "VCL<%d>: configured listen_queue_size %u (0x%x)",
- getpid (), vcl_cfg->listen_queue_size,
- vcl_cfg->listen_queue_size);
- }
else if (unformat (line_input, "app-timeout %f",
&vcl_cfg->app_timeout))
{
@@ -414,12 +392,6 @@ vppcom_cfg_read_file (char *conf_fname)
VCFG_DBG (0, "VCL<%d>: configured session_timeout %f",
getpid (), vcl_cfg->session_timeout);
}
- else if (unformat (line_input, "accept-timeout %f",
- &vcl_cfg->accept_timeout))
- {
- VCFG_DBG (0, "VCL<%d>: configured accept_timeout %f",
- getpid (), vcl_cfg->accept_timeout);
- }
else if (unformat (line_input, "app-proxy-transport-tcp"))
{
vcl_cfg->app_proxy_transport_tcp = 1;
@@ -444,6 +416,12 @@ vppcom_cfg_read_file (char *conf_fname)
VCFG_DBG (0, "VCL<%d>: configured app_scope_global (%d)",
getpid (), vcl_cfg->app_scope_global);
}
+ else if (unformat (line_input, "huge_page"))
+ {
+ vcl_cfg->huge_page = 1;
+ VCFG_DBG (0, "VCL<%d>: configured huge_page (%d)", getpid (),
+ vcl_cfg->huge_page);
+ }
else if (unformat (line_input, "namespace-secret %lu",
&vcl_cfg->namespace_secret))
{
@@ -459,7 +437,7 @@ vppcom_cfg_read_file (char *conf_fname)
u32 nsid_vec_len = vec_len (vcl_cfg->namespace_id);
if (nsid_vec_len > max_nsid_vec_len)
{
- _vec_len (vcl_cfg->namespace_id) = max_nsid_vec_len;
+ vec_set_len (vcl_cfg->namespace_id, max_nsid_vec_len);
VCFG_DBG (0, "VCL<%d>: configured namespace_id is too long,"
" truncated to %d characters!",
getpid (), max_nsid_vec_len);
@@ -486,6 +464,11 @@ vppcom_cfg_read_file (char *conf_fname)
VCFG_DBG (0, "VCL<%d>: configured with multithread workers",
getpid ());
}
+ else if (unformat (line_input, "app_original_dst"))
+ {
+ vcl_cfg->app_original_dst = 1;
+ VCFG_DBG (0, "VCL<%d>: support original destination", getpid ());
+ }
else if (unformat (line_input, "}"))
{
vc_cfg_input = 0;
@@ -619,6 +602,13 @@ vppcom_cfg (vppcom_cfg_t * vcl_cfg)
VCFG_DBG (0, "VCL<%d>: configured app-socket-api (%s)", getpid (),
vcl_cfg->vpp_app_socket_api);
}
+ env_var_str = getenv (VPPCOM_ENV_APP_USE_MQ_EVENTFD);
+ if (env_var_str)
+ {
+ vcm->cfg.use_mq_eventfd = 1;
+ VCFG_DBG (0, "VCL<%d>: configured " VPPCOM_ENV_APP_USE_MQ_EVENTFD,
+ getpid ());
+ }
}
/*
diff --git a/src/vcl/vcl_locked.c b/src/vcl/vcl_locked.c
index 4330d4179c6..69dd15b0ef4 100644
--- a/src/vcl/vcl_locked.c
+++ b/src/vcl/vcl_locked.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Copyright (c) 2021 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this
* You may obtain a copy of the License at:
@@ -13,46 +13,115 @@
* limitations under the License.
*/
+/**
+ * VCL Locked Sessions (VLS) is a wrapper that synchronizes access to VCL APIs
+ * which are, by construction, not thread safe. To this end, VLS uses
+ * configuration and heuristics to detect how applications use sessions in
+ * an attempt to optimize the locking strategy. The modes of operation
+ * currently supported are the following:
+ *
+ * 1) per-process workers
+ *
+ * +----------+ +----------+
+ * | | | |
+ * | process0 | | process1 |
+ * | | | |
+ * +-----+----+ +-----+----+
+ * | |
+ * | |
+ * +-----+----+ +-----+----+
+ * | | | |
+ * | vls_wrk0 | | vls_wrk1 |
+ * | | | |
+ * +-----+----+ +-----+----+
+ * | |
+ * | |
+ * +-----+----+ +-----+----+
+ * | | | |
+ * | vcl_wrk0 | | vcl_wrk1 |
+ * | | | |
+ * +----------+ +----------+
+ *
+ * 2) per-thread workers 3) single-worker multi-thread
+ *
+ * +---------+ +---------+ +---------+ +---------+
+ * | | | | | | | |
+ * | thread0 | | thread1 | | thread0 | | thread1 |
+ * | | | | | | | |
+ * +--------++ +-+-------+ +--------++ +-+-------+
+ * | | | |
+ * | | | |
+ * +-+------+-+ +-+------+-+
+ * | | | |
+ * | vls_wrk0 | | vls_wrk0 |
+ * | | | |
+ * +-+------+-+ +----+-----+
+ * | | |
+ * | | |
+ * +--------+-+ +-+--------+ +----+-----+
+ * | | | | | |
+ * | vcl_wrk0 | | vcl_wrk1 | | vcl_wrk0 |
+ * | | | | | |
+ * +----------+ +----------+ +----------+
+ *
+ * 1) per-process workers: intercept fork calls and assume all children
+ * processes are new workers that must be registered with vcl. VLS
+ * sessions are cloned and shared between workers. Only shared sessions
+ * are locked on use and thereby only one process can interact with
+ * them at a time (explicit sharing).
+ *
+ * 2) per-thread workers: each newly detected pthread is assumed to be a new
+ * worker and is registered with vcl. Enabled via configuration.
+ * When a thread tries to access a session it does not own, a clone and
+ * share rpc request is sent to the owning thread via vcl and vpp.
+ * Consequently, a vls session can map to multiple vcl sessions, one per
+ * vcl worker. VLS sessions are locked on use (implicit sharing).
+ *
+ * 3) single-worker multi-thread: vls does not make any assumptions about
+ * application threads and therefore implements an aggressive locking
+ * strategy that limits access to underlying vcl resources based on type
+ * of interaction and locks vls session on use (implicit sharing).
+ */
+
#include <vcl/vcl_locked.h>
#include <vcl/vcl_private.h>
typedef struct vls_shared_data_
{
- clib_spinlock_t lock;
- u32 owner_wrk_index;
- u32 *workers_subscribed;
- clib_bitmap_t *listeners;
+ clib_spinlock_t lock; /**< shared data lock */
+ u32 owner_wrk_index; /**< vcl wrk that owns session */
+ u32 *workers_subscribed; /**< vec of wrks subscribed to session */
+ clib_bitmap_t *listeners; /**< bitmap of wrks actively listening */
} vls_shared_data_t;
typedef struct vcl_locked_session_
{
- clib_spinlock_t lock;
- u32 session_index;
- u32 worker_index;
- u32 vls_index;
- u32 shared_data_index;
- /** VCL session owned by different workers because of migration */
- u32 owner_vcl_wrk_index;
- uword *vcl_wrk_index_to_session_index;
+ clib_spinlock_t lock; /**< vls lock when in use */
+ u32 session_index; /**< vcl session index */
+ u32 vcl_wrk_index; /**< vcl worker index */
+ u32 vls_index; /**< index in vls pool */
+ u32 shared_data_index; /**< shared data index if any */
+ u32 owner_vcl_wrk_index; /**< vcl wrk of the vls wrk at alloc */
+ uword *vcl_wrk_index_to_session_index; /**< map vcl wrk to session */
} vcl_locked_session_t;
typedef struct vls_worker_
{
- clib_rwlock_t sh_to_vlsh_table_lock; /** valid for multithread workers */
- vcl_locked_session_t *vls_pool;
- uword *session_handle_to_vlsh_table;
- u32 wrk_index;
- /** Vector of child wrk to cleanup */
- u32 *pending_wrk_cleanup;
+ clib_rwlock_t sh_to_vlsh_table_lock; /**< ht rwlock with mt workers */
+ vcl_locked_session_t *vls_pool; /**< pool of vls session */
+ uword *sh_to_vlsh_table; /**< map from vcl sh to vls sh */
+ u32 *pending_vcl_wrk_cleanup; /**< child vcl wrks to cleanup */
+ u32 vcl_wrk_index; /**< if 1:1 map vls to vcl wrk */
} vls_worker_t;
typedef struct vls_local_
{
- int vls_wrk_index;
- volatile int vls_mt_n_threads;
- pthread_mutex_t vls_mt_mq_mlock;
- pthread_mutex_t vls_mt_spool_mlock;
- volatile u8 select_mp_check;
+ int vls_wrk_index; /**< vls wrk index, 1 per process */
+ volatile int vls_mt_n_threads; /**< number of threads detected */
+ clib_rwlock_t vls_pool_lock; /**< per process/wrk vls pool locks */
+ pthread_mutex_t vls_mt_mq_mlock; /**< vcl mq lock */
+ pthread_mutex_t vls_mt_spool_mlock; /**< vcl select or pool lock */
+ volatile u8 select_mp_check; /**< flag set if select checks done */
} vls_process_local_t;
static vls_process_local_t vls_local;
@@ -60,13 +129,10 @@ static vls_process_local_t *vlsl = &vls_local;
typedef struct vls_main_
{
- vls_worker_t *workers;
- clib_rwlock_t vls_table_lock;
- /** Pool of data shared by sessions owned by different workers */
- vls_shared_data_t *shared_data_pool;
- clib_rwlock_t shared_data_lock;
- /** Lock to protect rpc among workers */
- clib_spinlock_t worker_rpc_lock;
+ vls_worker_t *workers; /**< pool of vls workers */
+ vls_shared_data_t *shared_data_pool; /**< inter proc pool of shared data */
+ clib_rwlock_t shared_data_lock; /**< shared data pool lock */
+ clib_spinlock_t worker_rpc_lock; /**< lock for inter-worker rpcs */
} vls_main_t;
vls_main_t *vlsm;
@@ -119,10 +185,7 @@ static void vls_handle_pending_wrk_cleanup (void);
static inline u32
vls_get_worker_index (void)
{
- if (vls_mt_wrk_supported ())
- return vlsl->vls_wrk_index;
- else
- return vcl_get_worker_index ();
+ return vlsl->vls_wrk_index;
}
static u32
@@ -181,31 +244,31 @@ vls_shared_data_pool_runlock (void)
}
static inline void
-vls_mt_table_rlock (void)
+vls_mt_pool_rlock (void)
{
if (vlsl->vls_mt_n_threads > 1)
- clib_rwlock_reader_lock (&vlsm->vls_table_lock);
+ clib_rwlock_reader_lock (&vlsl->vls_pool_lock);
}
static inline void
-vls_mt_table_runlock (void)
+vls_mt_pool_runlock (void)
{
if (vlsl->vls_mt_n_threads > 1)
- clib_rwlock_reader_unlock (&vlsm->vls_table_lock);
+ clib_rwlock_reader_unlock (&vlsl->vls_pool_lock);
}
static inline void
-vls_mt_table_wlock (void)
+vls_mt_pool_wlock (void)
{
if (vlsl->vls_mt_n_threads > 1)
- clib_rwlock_writer_lock (&vlsm->vls_table_lock);
+ clib_rwlock_writer_lock (&vlsl->vls_pool_lock);
}
static inline void
-vls_mt_table_wunlock (void)
+vls_mt_pool_wunlock (void)
{
if (vlsl->vls_mt_n_threads > 1)
- clib_rwlock_writer_unlock (&vlsm->vls_table_lock);
+ clib_rwlock_writer_unlock (&vlsl->vls_pool_lock);
}
typedef enum
@@ -301,7 +364,7 @@ vls_to_sh_tu (vcl_locked_session_t * vls)
{
vcl_session_handle_t sh;
sh = vls_to_sh (vls);
- vls_mt_table_runlock ();
+ vls_mt_pool_runlock ();
return sh;
}
@@ -319,15 +382,15 @@ vls_worker_alloc (void)
pool_get_zero (vlsm->workers, wrk);
if (vls_mt_wrk_supported ())
clib_rwlock_init (&wrk->sh_to_vlsh_table_lock);
- wrk->wrk_index = vcl_get_worker_index ();
- vec_validate (wrk->pending_wrk_cleanup, 16);
- vec_reset_length (wrk->pending_wrk_cleanup);
+ wrk->vcl_wrk_index = vcl_get_worker_index ();
+ vec_validate (wrk->pending_vcl_wrk_cleanup, 16);
+ vec_reset_length (wrk->pending_vcl_wrk_cleanup);
}
static void
vls_worker_free (vls_worker_t * wrk)
{
- hash_free (wrk->session_handle_to_vlsh_table);
+ hash_free (wrk->sh_to_vlsh_table);
if (vls_mt_wrk_supported ())
clib_rwlock_free (&wrk->sh_to_vlsh_table_lock);
pool_free (wrk->vls_pool);
@@ -347,7 +410,7 @@ vls_sh_to_vlsh_table_add (vls_worker_t *wrk, vcl_session_handle_t sh, u32 vlsh)
{
if (vls_mt_wrk_supported ())
clib_rwlock_writer_lock (&wrk->sh_to_vlsh_table_lock);
- hash_set (wrk->session_handle_to_vlsh_table, sh, vlsh);
+ hash_set (wrk->sh_to_vlsh_table, sh, vlsh);
if (vls_mt_wrk_supported ())
clib_rwlock_writer_unlock (&wrk->sh_to_vlsh_table_lock);
}
@@ -357,7 +420,7 @@ vls_sh_to_vlsh_table_del (vls_worker_t *wrk, vcl_session_handle_t sh)
{
if (vls_mt_wrk_supported ())
clib_rwlock_writer_lock (&wrk->sh_to_vlsh_table_lock);
- hash_unset (wrk->session_handle_to_vlsh_table, sh);
+ hash_unset (wrk->sh_to_vlsh_table, sh);
if (vls_mt_wrk_supported ())
clib_rwlock_writer_unlock (&wrk->sh_to_vlsh_table_lock);
}
@@ -367,7 +430,7 @@ vls_sh_to_vlsh_table_get (vls_worker_t *wrk, vcl_session_handle_t sh)
{
if (vls_mt_wrk_supported ())
clib_rwlock_reader_lock (&wrk->sh_to_vlsh_table_lock);
- uword *vlshp = hash_get (wrk->session_handle_to_vlsh_table, sh);
+ uword *vlshp = hash_get (wrk->sh_to_vlsh_table, sh);
if (vls_mt_wrk_supported ())
clib_rwlock_reader_unlock (&wrk->sh_to_vlsh_table_lock);
return vlshp;
@@ -379,23 +442,23 @@ vls_alloc (vcl_session_handle_t sh)
vls_worker_t *wrk = vls_worker_get_current ();
vcl_locked_session_t *vls;
- vls_mt_table_wlock ();
+ vls_mt_pool_wlock ();
pool_get_zero (wrk->vls_pool, vls);
vls->session_index = vppcom_session_index (sh);
- vls->worker_index = vppcom_session_worker (sh);
+ vls->vcl_wrk_index = vppcom_session_worker (sh);
vls->vls_index = vls - wrk->vls_pool;
vls->shared_data_index = ~0;
vls_sh_to_vlsh_table_add (wrk, sh, vls->vls_index);
if (vls_mt_wrk_supported ())
{
- hash_set (vls->vcl_wrk_index_to_session_index, vls->worker_index,
+ hash_set (vls->vcl_wrk_index_to_session_index, vls->vcl_wrk_index,
vls->session_index);
- vls->owner_vcl_wrk_index = vls->worker_index;
+ vls->owner_vcl_wrk_index = vls->vcl_wrk_index;
}
clib_spinlock_init (&vls->lock);
- vls_mt_table_wunlock ();
+ vls_mt_pool_wunlock ();
return vls->vls_index;
}
@@ -436,10 +499,10 @@ static vcl_locked_session_t *
vls_get_w_dlock (vls_handle_t vlsh)
{
vcl_locked_session_t *vls;
- vls_mt_table_rlock ();
+ vls_mt_pool_rlock ();
vls = vls_get_and_lock (vlsh);
if (!vls)
- vls_mt_table_runlock ();
+ vls_mt_pool_runlock ();
return vls;
}
@@ -447,17 +510,17 @@ static inline void
vls_get_and_unlock (vls_handle_t vlsh)
{
vcl_locked_session_t *vls;
- vls_mt_table_rlock ();
+ vls_mt_pool_rlock ();
vls = vls_get (vlsh);
vls_unlock (vls);
- vls_mt_table_runlock ();
+ vls_mt_pool_runlock ();
}
static inline void
vls_dunlock (vcl_locked_session_t * vls)
{
vls_unlock (vls);
- vls_mt_table_runlock ();
+ vls_mt_pool_runlock ();
}
static vcl_locked_session_t *
@@ -506,9 +569,9 @@ vls_session_index_to_vlsh (uint32_t session_index)
{
vls_handle_t vlsh;
- vls_mt_table_rlock ();
+ vls_mt_pool_rlock ();
vlsh = vls_si_wi_to_vlsh (session_index, vcl_get_worker_index ());
- vls_mt_table_runlock ();
+ vls_mt_pool_runlock ();
return vlsh;
}
@@ -556,7 +619,8 @@ vls_listener_wrk_set (vcl_locked_session_t * vls, u32 wrk_index, u8 is_active)
vls_shd = vls_shared_data_get (vls->shared_data_index);
clib_spinlock_lock (&vls_shd->lock);
- clib_bitmap_set (vls_shd->listeners, wrk_index, is_active);
+ vls_shd->listeners =
+ clib_bitmap_set (vls_shd->listeners, wrk_index, is_active);
clib_spinlock_unlock (&vls_shd->lock);
vls_shared_data_pool_runlock ();
@@ -606,8 +670,19 @@ vls_listener_wrk_is_active (vcl_locked_session_t * vls, u32 wrk_index)
static void
vls_listener_wrk_start_listen (vcl_locked_session_t * vls, u32 wrk_index)
{
- vppcom_session_listen (vls_to_sh (vls), ~0);
- vls_listener_wrk_set (vls, wrk_index, 1 /* is_active */ );
+ vcl_worker_t *wrk;
+ vcl_session_t *ls;
+
+ wrk = vcl_worker_get (wrk_index);
+ ls = vcl_session_get (wrk, vls->session_index);
+
+ /* Listen request already sent */
+ if (ls->flags & VCL_SESSION_F_PENDING_LISTEN)
+ return;
+
+ vcl_send_session_listen (wrk, ls);
+
+ vls_listener_wrk_set (vls, wrk_index, 1 /* is_active */);
}
static void
@@ -663,7 +738,7 @@ vls_unshare_session (vcl_locked_session_t * vls, vcl_worker_t * wrk)
if (pos < 0)
{
clib_warning ("worker %u not subscribed for vls %u", wrk->wrk_index,
- vls->worker_index);
+ vls->vcl_wrk_index);
goto done;
}
@@ -710,7 +785,7 @@ vls_unshare_session (vcl_locked_session_t * vls, vcl_worker_t * wrk)
* Check if we can change owner or close
*/
vls_shd->owner_wrk_index = vls_shd->workers_subscribed[0];
- if (s->session_state != VCL_STATE_LISTEN_NO_MQ)
+ if (s->vpp_evt_q)
vcl_send_session_worker_update (wrk, s, vls_shd->owner_wrk_index);
/* XXX is this still needed? */
@@ -735,9 +810,9 @@ vls_init_share_session (vls_worker_t * vls_wrk, vcl_locked_session_t * vls)
vls_shared_data_pool_rlock ();
vls_shd = vls_shared_data_get (vls_shd_index);
- vls_shd->owner_wrk_index = vls_wrk->wrk_index;
+ vls_shd->owner_wrk_index = vls_wrk->vcl_wrk_index;
vls->shared_data_index = vls_shd_index;
- vec_add1 (vls_shd->workers_subscribed, vls_wrk->wrk_index);
+ vec_add1 (vls_shd->workers_subscribed, vls_wrk->vcl_wrk_index);
vls_shared_data_pool_runlock ();
}
@@ -745,7 +820,7 @@ vls_init_share_session (vls_worker_t * vls_wrk, vcl_locked_session_t * vls)
void
vls_share_session (vls_worker_t * vls_wrk, vcl_locked_session_t * vls)
{
- vcl_worker_t *vcl_wrk = vcl_worker_get (vls_wrk->wrk_index);
+ vcl_worker_t *vcl_wrk = vcl_worker_get (vls_wrk->vcl_wrk_index);
vls_shared_data_t *vls_shd;
vcl_session_t *s;
@@ -767,18 +842,19 @@ vls_share_session (vls_worker_t * vls_wrk, vcl_locked_session_t * vls)
vls_shd = vls_shared_data_get (vls->shared_data_index);
clib_spinlock_lock (&vls_shd->lock);
- vec_add1 (vls_shd->workers_subscribed, vls_wrk->wrk_index);
+ vec_add1 (vls_shd->workers_subscribed, vls_wrk->vcl_wrk_index);
clib_spinlock_unlock (&vls_shd->lock);
vls_shared_data_pool_runlock ();
- if (s->rx_fifo)
+ if (s->session_state == VCL_STATE_LISTEN)
{
- vcl_session_share_fifos (s, s->rx_fifo, s->tx_fifo);
+ s->session_state = VCL_STATE_LISTEN_NO_MQ;
+ s->rx_fifo = s->tx_fifo = 0;
}
- else if (s->session_state == VCL_STATE_LISTEN)
+ else if (s->rx_fifo)
{
- s->session_state = VCL_STATE_LISTEN_NO_MQ;
+ vcl_session_share_fifos (s, s->rx_fifo, s->tx_fifo);
}
}
@@ -787,7 +863,6 @@ vls_share_sessions (vls_worker_t * vls_parent_wrk, vls_worker_t * vls_wrk)
{
vcl_locked_session_t *vls, *parent_vls;
- /* *INDENT-OFF* */
pool_foreach (vls, vls_wrk->vls_pool) {
/* Initialize sharing on parent session */
if (vls->shared_data_index == ~0)
@@ -798,7 +873,6 @@ vls_share_sessions (vls_worker_t * vls_parent_wrk, vls_worker_t * vls_wrk)
}
vls_share_session (vls_wrk, vls);
}
- /* *INDENT-ON* */
}
static void
@@ -833,33 +907,42 @@ void
vls_worker_copy_on_fork (vcl_worker_t * parent_wrk)
{
vls_worker_t *vls_wrk = vls_worker_get_current (), *vls_parent_wrk;
- vcl_worker_t *wrk = vcl_worker_get_current ();
+ vcl_worker_t *vcl_wrk = vcl_worker_get_current ();
u32 vls_index, session_index, wrk_index;
vcl_session_handle_t sh;
+ vcl_locked_session_t *vls;
/*
* init vcl worker
*/
- wrk->sessions = pool_dup (parent_wrk->sessions);
- wrk->session_index_by_vpp_handles =
+ vcl_wrk->sessions = pool_dup (parent_wrk->sessions);
+ vcl_wrk->session_index_by_vpp_handles =
hash_dup (parent_wrk->session_index_by_vpp_handles);
/*
* init vls worker
*/
vls_parent_wrk = vls_worker_get (parent_wrk->wrk_index);
- /* *INDENT-OFF* */
- hash_foreach (sh, vls_index, vls_parent_wrk->session_handle_to_vlsh_table,
- ({
- vcl_session_handle_parse (sh, &wrk_index, &session_index);
- hash_set (vls_wrk->session_handle_to_vlsh_table,
- vcl_session_handle_from_index (session_index), vls_index);
- }));
- /* *INDENT-ON* */
+
+ /* clang-format off */
+ hash_foreach (sh, vls_index, vls_parent_wrk->sh_to_vlsh_table, ({
+ vcl_session_handle_parse (sh, &wrk_index, &session_index);
+ hash_set (vls_wrk->sh_to_vlsh_table,
+ vcl_session_handle_from_index (session_index), vls_index);
+ }));
+ /* clang-format on */
vls_wrk->vls_pool = pool_dup (vls_parent_wrk->vls_pool);
+ /*
+ * Detach vls from parent vcl worker and attach them to child.
+ */
+ pool_foreach (vls, vls_wrk->vls_pool)
+ {
+ vls->vcl_wrk_index = vcl_wrk->wrk_index;
+ }
+
/* Validate vep's handle */
- vls_validate_veps (wrk);
+ vls_validate_veps (vcl_wrk);
vls_share_sessions (vls_parent_wrk, vls_wrk);
}
@@ -925,8 +1008,8 @@ vls_mt_rel_locks (int locks_acq)
static inline u8
vls_mt_session_should_migrate (vcl_locked_session_t * vls)
{
- return (vls_mt_wrk_supported ()
- && vls->worker_index != vcl_get_worker_index ());
+ return (vls_mt_wrk_supported () &&
+ vls->vcl_wrk_index != vcl_get_worker_index ());
}
static vcl_locked_session_t *
@@ -939,7 +1022,7 @@ vls_mt_session_migrate (vcl_locked_session_t *vls)
vcl_session_t *session;
uword *p;
- ASSERT (vls_mt_wrk_supported () && vls->worker_index != wrk_index);
+ ASSERT (vls_mt_wrk_supported () && vls->vcl_wrk_index != wrk_index);
/*
* VCL session on current vcl worker already allocated. Update current
@@ -947,7 +1030,7 @@ vls_mt_session_migrate (vcl_locked_session_t *vls)
*/
if ((p = hash_get (vls->vcl_wrk_index_to_session_index, wrk_index)))
{
- vls->worker_index = wrk_index;
+ vls->vcl_wrk_index = wrk_index;
vls->session_index = (u32) p[0];
return vls;
}
@@ -963,7 +1046,7 @@ vls_mt_session_migrate (vcl_locked_session_t *vls)
VERR ("session in owner worker(%u) is free", vls->owner_vcl_wrk_index);
ASSERT (0);
vls_unlock (vls);
- vls_mt_table_runlock ();
+ vls_mt_pool_runlock ();
return 0;
}
@@ -978,7 +1061,7 @@ vls_mt_session_migrate (vcl_locked_session_t *vls)
vls_index = vls->vls_index;
own_vcl_wrk_index = vls->owner_vcl_wrk_index;
vls_unlock (vls);
- vls_mt_table_runlock ();
+ vls_mt_pool_runlock ();
vls_send_clone_and_share_rpc (wrk, vls_index, sid, vls_get_worker_index (),
own_vcl_wrk_index, vls_index, src_sid);
@@ -1016,7 +1099,7 @@ vls_mt_session_migrate (vcl_locked_session_t *vls)
}
session->session_index = sid;
- vls->worker_index = wrk_index;
+ vls->vcl_wrk_index = wrk_index;
vls->session_index = sid;
hash_set (vls->vcl_wrk_index_to_session_index, wrk_index, sid);
vls_sh_to_vlsh_table_add (vls_wrk, vcl_session_handle (session),
@@ -1040,7 +1123,7 @@ vls_mt_detect (void)
if (vls_mt_wrk_supported ()) \
{ \
if (PREDICT_FALSE (_vls && \
- ((vcl_locked_session_t *) _vls)->worker_index != \
+ ((vcl_locked_session_t *) _vls)->vcl_wrk_index != \
vcl_get_worker_index ())) \
{ \
_vls = vls_mt_session_migrate (_vls); \
@@ -1218,23 +1301,30 @@ vls_mp_checks (vcl_locked_session_t * vls, int is_add)
if (vls_mt_wrk_supported ())
return;
+ ASSERT (wrk->wrk_index == vls->vcl_wrk_index);
s = vcl_session_get (wrk, vls->session_index);
switch (s->session_state)
{
case VCL_STATE_LISTEN:
if (is_add)
{
- vls_listener_wrk_set (vls, vls->worker_index, 1 /* is_active */ );
+ vls_listener_wrk_set (vls, vls->vcl_wrk_index, 1 /* is_active */);
break;
}
- vls_listener_wrk_stop_listen (vls, vls->worker_index);
+ /* Although removal from epoll means listener no longer accepts new
+ * sessions, the accept queue built by vpp cannot be drained by stopping
+ * the listener. Morover, some applications, e.g., nginx, might
+ * constantly remove and add listeners to their epfds. Removing
+ * listeners in such situations causes a lot of churn in vpp as segments
+ * and segment managers need to be recreated. */
+ /* vls_listener_wrk_stop_listen (vls, vls->vcl_wrk_index); */
break;
case VCL_STATE_LISTEN_NO_MQ:
if (!is_add)
break;
/* Register worker as listener */
- vls_listener_wrk_start_listen (vls, wrk->wrk_index);
+ vls_listener_wrk_start_listen (vls, vls->vcl_wrk_index);
/* If owner worker did not attempt to accept/xpoll on the session,
* force a listen stop for it, since it may not be interested in
@@ -1307,13 +1397,11 @@ vls_mt_session_cleanup (vcl_locked_session_t * vls)
current_vcl_wrk = vcl_get_worker_index ();
- /* *INDENT-OFF* */
hash_foreach (wrk_index, session_index, vls->vcl_wrk_index_to_session_index,
({
if (current_vcl_wrk != wrk_index)
vls_send_session_cleanup_rpc (wrk, wrk_index, session_index);
}));
- /* *INDENT-ON* */
hash_free (vls->vcl_wrk_index_to_session_index);
}
@@ -1324,12 +1412,12 @@ vls_close (vls_handle_t vlsh)
int rv;
vls_mt_detect ();
- vls_mt_table_wlock ();
+ vls_mt_pool_wlock ();
vls = vls_get_and_lock (vlsh);
if (!vls)
{
- vls_mt_table_wunlock ();
+ vls_mt_pool_wunlock ();
return VPPCOM_EBADFD;
}
@@ -1346,7 +1434,7 @@ vls_close (vls_handle_t vlsh)
vls_free (vls);
vls_mt_unguard ();
- vls_mt_table_wunlock ();
+ vls_mt_pool_wunlock ();
return rv;
}
@@ -1406,30 +1494,46 @@ vls_epoll_ctl (vls_handle_t ep_vlsh, int op, vls_handle_t vlsh,
int rv;
vls_mt_detect ();
- vls_mt_table_rlock ();
+ vls_mt_pool_rlock ();
+
ep_vls = vls_get_and_lock (ep_vlsh);
+ if (PREDICT_FALSE (!ep_vls))
+ {
+ vls_mt_pool_runlock ();
+ return VPPCOM_EBADFD;
+ }
if (vls_mt_session_should_migrate (ep_vls))
{
ep_vls = vls_mt_session_migrate (ep_vls);
if (PREDICT_FALSE (!ep_vls))
- return VPPCOM_EBADFD;
+ {
+ vls_mt_pool_runlock ();
+ return VPPCOM_EBADFD;
+ }
}
- ep_sh = vls_to_sh (ep_vls);
vls = vls_get_and_lock (vlsh);
+ if (PREDICT_FALSE (!vls))
+ {
+ vls_unlock (ep_vls);
+ vls_mt_pool_runlock ();
+ return VPPCOM_EBADFD;
+ }
+
+ ep_sh = vls_to_sh (ep_vls);
sh = vls_to_sh (vls);
vls_epoll_ctl_mp_checks (vls, op);
- vls_mt_table_runlock ();
+ vls_mt_pool_runlock ();
rv = vppcom_epoll_ctl (ep_sh, op, sh, event);
- vls_mt_table_rlock ();
+ vls_mt_pool_rlock ();
ep_vls = vls_get (ep_vlsh);
vls = vls_get (vlsh);
vls_unlock (vls);
vls_unlock (ep_vls);
- vls_mt_table_runlock ();
+ vls_mt_pool_runlock ();
return rv;
}
@@ -1472,7 +1576,6 @@ vls_select_mp_checks (vcl_si_set * read_map)
vlsl->select_mp_check = 1;
wrk = vcl_worker_get_current ();
- /* *INDENT-OFF* */
clib_bitmap_foreach (si, read_map) {
s = vcl_session_get (wrk, si);
if (s->session_state == VCL_STATE_LISTEN)
@@ -1481,7 +1584,6 @@ vls_select_mp_checks (vcl_si_set * read_map)
vls_mp_checks (vls, 1 /* is_add */);
}
}
- /* *INDENT-ON* */
}
int
@@ -1514,13 +1616,11 @@ vls_unshare_vcl_worker_sessions (vcl_worker_t * wrk)
current_wrk = vcl_get_worker_index ();
is_current = current_wrk == wrk->wrk_index;
- /* *INDENT-OFF* */
pool_foreach (s, wrk->sessions) {
vls = vls_get (vls_si_wi_to_vlsh (s->session_index, wrk->wrk_index));
if (vls && (is_current || vls_is_shared_by_wrk (vls, current_wrk)))
vls_unshare_session (vls, wrk);
}
- /* *INDENT-ON* */
}
static void
@@ -1570,18 +1670,18 @@ vls_handle_pending_wrk_cleanup (void)
vcl_worker_t *child_wrk, *wrk;
vls_worker_t *vls_wrk = vls_worker_get_current ();
- if (PREDICT_TRUE (vec_len (vls_wrk->pending_wrk_cleanup) == 0))
+ if (PREDICT_TRUE (vec_len (vls_wrk->pending_vcl_wrk_cleanup) == 0))
return;
wrk = vcl_worker_get_current ();
- vec_foreach (wip, vls_wrk->pending_wrk_cleanup)
+ vec_foreach (wip, vls_wrk->pending_vcl_wrk_cleanup)
{
child_wrk = vcl_worker_get_if_valid (*wip);
if (!child_wrk)
continue;
vls_cleanup_forked_child (wrk, child_wrk);
}
- vec_reset_length (vls_wrk->pending_wrk_cleanup);
+ vec_reset_length (vls_wrk->pending_vcl_wrk_cleanup);
}
static struct sigaction old_sa;
@@ -1621,7 +1721,7 @@ vls_intercept_sigchld_handler (int signum, siginfo_t * si, void *uc)
* So move child wrk cleanup from sighandler to vls_epoll_wait/vls_select.
*/
vls_wrk = vls_worker_get_current ();
- vec_add1 (vls_wrk->pending_wrk_cleanup, child_wrk->wrk_index);
+ vec_add1 (vls_wrk->pending_vcl_wrk_cleanup, child_wrk->wrk_index);
done:
if (old_sa.sa_flags & SA_SIGINFO)
@@ -1691,16 +1791,18 @@ vls_app_fork_child_handler (void)
* Allocate/initialize vls worker and share sessions
*/
vls_worker_alloc ();
- parent_wrk = vcl_worker_get (parent_wrk_index);
- vls_worker_copy_on_fork (parent_wrk);
- parent_wrk->forked_child = vcl_get_worker_index ();
/* Reset number of threads and set wrk index */
vlsl->vls_mt_n_threads = 0;
vlsl->vls_wrk_index = vcl_get_worker_index ();
vlsl->select_mp_check = 0;
+ clib_rwlock_init (&vlsl->vls_pool_lock);
vls_mt_locks_init ();
+ parent_wrk = vcl_worker_get (parent_wrk_index);
+ vls_worker_copy_on_fork (parent_wrk);
+ parent_wrk->forked_child = vcl_get_worker_index ();
+
VDBG (0, "forked child main worker initialized");
vcm->forking = 0;
}
@@ -1866,7 +1968,6 @@ vls_app_create (char *app_name)
vlsm = clib_mem_alloc (sizeof (vls_main_t));
clib_memset (vlsm, 0, sizeof (*vlsm));
- clib_rwlock_init (&vlsm->vls_table_lock);
clib_rwlock_init (&vlsm->shared_data_lock);
clib_spinlock_init (&vlsm->worker_rpc_lock);
pool_alloc (vlsm->workers, vcm->cfg.max_workers);
@@ -1876,6 +1977,7 @@ vls_app_create (char *app_name)
atexit (vls_app_exit);
vls_worker_alloc ();
vlsl->vls_wrk_index = vcl_get_worker_index ();
+ clib_rwlock_init (&vlsl->vls_pool_lock);
vls_mt_locks_init ();
vcm->wrk_rpc_fn = vls_rpc_handler;
return VPPCOM_OK;
diff --git a/src/vcl/vcl_private.c b/src/vcl/vcl_private.c
index 8f8ebf9d2e8..4af79e9ebb0 100644
--- a/src/vcl/vcl_private.c
+++ b/src/vcl/vcl_private.c
@@ -38,6 +38,25 @@ vcl_mq_evt_conn_get (vcl_worker_t * wrk, u32 mq_conn_idx)
return pool_elt_at_index (wrk->mq_evt_conns, mq_conn_idx);
}
+/* Add unix socket to epoll.
+ * Used only to get a notification on socket close
+ * We can't use eventfd because we don't get notifications on that fds
+ */
+static int
+vcl_mq_epoll_add_api_sock (vcl_worker_t *wrk)
+{
+ clib_socket_t *cs = &wrk->app_api_sock;
+ struct epoll_event e = { 0 };
+ int rv;
+
+ e.data.u32 = ~0;
+ rv = epoll_ctl (wrk->mqs_epfd, EPOLL_CTL_ADD, cs->fd, &e);
+ if (rv != EEXIST && rv < 0)
+ return -1;
+
+ return 0;
+}
+
int
vcl_mq_epoll_add_evfd (vcl_worker_t * wrk, svm_msg_q_t * mq)
{
@@ -64,6 +83,12 @@ vcl_mq_epoll_add_evfd (vcl_worker_t * wrk, svm_msg_q_t * mq)
return -1;
}
+ if (vcl_mq_epoll_add_api_sock (wrk))
+ {
+ VDBG (0, "failed to add mq socket to mq epoll fd");
+ return -1;
+ }
+
return mqc_index;
}
@@ -113,6 +138,9 @@ vcl_api_app_worker_add (void)
void
vcl_api_app_worker_del (vcl_worker_t * wrk)
{
+ if (wrk->api_client_handle == ~0)
+ return;
+
if (vcm->cfg.vpp_app_socket_api)
return vcl_sapi_app_worker_del (wrk);
@@ -158,6 +186,48 @@ vcl_worker_cleanup_cb (void *arg)
VDBG (0, "cleaned up worker %u", wrk_index);
}
+void
+vcl_worker_detach_sessions (vcl_worker_t *wrk)
+{
+ session_event_t *e;
+ vcl_session_t *s;
+ uword *seg_indices_map = 0;
+ u32 seg_index, val, *seg_indices = 0;
+
+ close (wrk->app_api_sock.fd);
+ pool_foreach (s, wrk->sessions)
+ {
+ if (s->session_state == VCL_STATE_LISTEN)
+ {
+ s->session_state = VCL_STATE_LISTEN_NO_MQ;
+ continue;
+ }
+ if ((s->flags & VCL_SESSION_F_IS_VEP) ||
+ s->session_state == VCL_STATE_LISTEN_NO_MQ ||
+ s->session_state == VCL_STATE_CLOSED)
+ continue;
+
+ hash_set (seg_indices_map, s->tx_fifo->segment_index, 1);
+
+ s->session_state = VCL_STATE_DETACHED;
+ vec_add2 (wrk->unhandled_evts_vector, e, 1);
+ e->event_type = SESSION_CTRL_EVT_DISCONNECTED;
+ e->session_index = s->session_index;
+ e->postponed = 1;
+ }
+
+ hash_foreach (seg_index, val, seg_indices_map,
+ ({ vec_add1 (seg_indices, seg_index); }));
+
+ vcl_segment_detach_segments (seg_indices);
+
+ /* Detach worker's mqs segment */
+ vcl_segment_detach (vcl_vpp_worker_segment_handle (wrk->wrk_index));
+
+ vec_free (seg_indices);
+ hash_free (seg_indices_map);
+}
+
vcl_worker_t *
vcl_worker_alloc_and_init ()
{
@@ -167,18 +237,22 @@ vcl_worker_alloc_and_init ()
if (vcl_get_worker_index () != ~0)
return 0;
+ /* Grab lock before selecting mem thread index */
+ clib_spinlock_lock (&vcm->workers_lock);
+
/* Use separate heap map entry for worker */
clib_mem_set_thread_index ();
if (pool_elts (vcm->workers) == vcm->cfg.max_workers)
{
VDBG (0, "max-workers %u limit reached", vcm->cfg.max_workers);
- return 0;
+ wrk = 0;
+ goto done;
}
- clib_spinlock_lock (&vcm->workers_lock);
wrk = vcl_worker_alloc ();
vcl_set_worker_index (wrk->wrk_index);
+ wrk->api_client_handle = ~0;
wrk->thread_id = pthread_self ();
wrk->current_pid = getpid ();
@@ -203,9 +277,9 @@ vcl_worker_alloc_and_init ()
vec_reset_length (wrk->mq_msg_vector);
vec_validate (wrk->unhandled_evts_vector, 128);
vec_reset_length (wrk->unhandled_evts_vector);
- clib_spinlock_unlock (&vcm->workers_lock);
done:
+ clib_spinlock_unlock (&vcm->workers_lock);
return wrk;
}
@@ -267,7 +341,8 @@ vcl_session_read_ready (vcl_session_t * s)
if (ph.data_length + SESSION_CONN_HDR_LEN > max_deq)
return 0;
- return ph.data_length;
+ /* Allow zero legth datagrams */
+ return ph.data_length ? ph.data_length : 1;
}
return svm_fifo_max_dequeue_cons (s->rx_fifo);
@@ -361,6 +436,7 @@ vcl_segment_attach (u64 segment_handle, char *name, ssvm_segment_type_t type,
if ((rv = fifo_segment_attach (&vcm->segment_main, a)))
{
clib_warning ("svm_fifo_segment_attach ('%s') failed", name);
+ clib_rwlock_writer_unlock (&vcm->segment_table_lock);
return rv;
}
hash_set (vcm->segment_table, segment_handle, a->new_segment_indices[0]);
@@ -404,7 +480,37 @@ vcl_segment_detach (u64 segment_handle)
clib_rwlock_writer_unlock (&vcm->segment_table_lock);
- VDBG (0, "detached segment %u handle %u", segment_index, segment_handle);
+ VDBG (0, "detached segment %u handle %lx", segment_index, segment_handle);
+}
+
+void
+vcl_segment_detach_segments (u32 *seg_indices)
+{
+ u64 *seg_handles = 0, *seg_handle, key;
+ u32 *seg_index;
+ u32 val;
+
+ clib_rwlock_reader_lock (&vcm->segment_table_lock);
+
+ vec_foreach (seg_index, seg_indices)
+ {
+ /* clang-format off */
+ hash_foreach (key, val, vcm->segment_table, ({
+ if (val == *seg_index)
+ {
+ vec_add1 (seg_handles, key);
+ break;
+ }
+ }));
+ /* clang-format on */
+ }
+
+ clib_rwlock_reader_unlock (&vcm->segment_table_lock);
+
+ vec_foreach (seg_handle, seg_handles)
+ vcl_segment_detach (seg_handle[0]);
+
+ vec_free (seg_handles);
}
int
@@ -591,6 +697,126 @@ vcl_session_share_fifos (vcl_session_t *s, svm_fifo_t *rxf, svm_fifo_t *txf)
return 0;
}
+const char *
+vcl_session_state_str (vcl_session_state_t state)
+{
+ char *st;
+
+ switch (state)
+ {
+ case VCL_STATE_CLOSED:
+ st = "STATE_CLOSED";
+ break;
+ case VCL_STATE_LISTEN:
+ st = "STATE_LISTEN";
+ break;
+ case VCL_STATE_READY:
+ st = "STATE_READY";
+ break;
+ case VCL_STATE_VPP_CLOSING:
+ st = "STATE_VPP_CLOSING";
+ break;
+ case VCL_STATE_DISCONNECT:
+ st = "STATE_DISCONNECT";
+ break;
+ case VCL_STATE_DETACHED:
+ st = "STATE_DETACHED";
+ break;
+ case VCL_STATE_UPDATED:
+ st = "STATE_UPDATED";
+ break;
+ case VCL_STATE_LISTEN_NO_MQ:
+ st = "STATE_LISTEN_NO_MQ";
+ break;
+ default:
+ st = "UNKNOWN_STATE";
+ break;
+ }
+
+ return st;
+}
+
+u8 *
+vcl_format_ip4_address (u8 *s, va_list *args)
+{
+ u8 *a = va_arg (*args, u8 *);
+ return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]);
+}
+
+u8 *
+vcl_format_ip6_address (u8 *s, va_list *args)
+{
+ ip6_address_t *a = va_arg (*args, ip6_address_t *);
+ u32 i, i_max_n_zero, max_n_zeros, i_first_zero, n_zeros, last_double_colon;
+
+ i_max_n_zero = ARRAY_LEN (a->as_u16);
+ max_n_zeros = 0;
+ i_first_zero = i_max_n_zero;
+ n_zeros = 0;
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ u32 is_zero = a->as_u16[i] == 0;
+ if (is_zero && i_first_zero >= ARRAY_LEN (a->as_u16))
+ {
+ i_first_zero = i;
+ n_zeros = 0;
+ }
+ n_zeros += is_zero;
+ if ((!is_zero && n_zeros > max_n_zeros) ||
+ (i + 1 >= ARRAY_LEN (a->as_u16) && n_zeros > max_n_zeros))
+ {
+ i_max_n_zero = i_first_zero;
+ max_n_zeros = n_zeros;
+ i_first_zero = ARRAY_LEN (a->as_u16);
+ n_zeros = 0;
+ }
+ }
+
+ last_double_colon = 0;
+ for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
+ {
+ if (i == i_max_n_zero && max_n_zeros > 1)
+ {
+ s = format (s, "::");
+ i += max_n_zeros - 1;
+ last_double_colon = 1;
+ }
+ else
+ {
+ s = format (s, "%s%x", (last_double_colon || i == 0) ? "" : ":",
+ clib_net_to_host_u16 (a->as_u16[i]));
+ last_double_colon = 0;
+ }
+ }
+
+ return s;
+}
+
+/* Format an IP46 address. */
+u8 *
+vcl_format_ip46_address (u8 *s, va_list *args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ ip46_type_t type = va_arg (*args, ip46_type_t);
+ int is_ip4 = 1;
+
+ switch (type)
+ {
+ case IP46_TYPE_ANY:
+ is_ip4 = ip46_address_is_ip4 (ip46);
+ break;
+ case IP46_TYPE_IP4:
+ is_ip4 = 1;
+ break;
+ case IP46_TYPE_IP6:
+ is_ip4 = 0;
+ break;
+ }
+
+ return is_ip4 ? format (s, "%U", vcl_format_ip4_address, &ip46->ip4) :
+ format (s, "%U", vcl_format_ip6_address, &ip46->ip6);
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vcl/vcl_private.h b/src/vcl/vcl_private.h
index 3233aeca4b2..b89052f96af 100644
--- a/src/vcl/vcl_private.h
+++ b/src/vcl/vcl_private.h
@@ -118,16 +118,17 @@ typedef enum
VCL_SESS_ATTR_CUT_THRU,
VCL_SESS_ATTR_VEP,
VCL_SESS_ATTR_VEP_SESSION,
- VCL_SESS_ATTR_LISTEN, // SOL_SOCKET,SO_ACCEPTCONN
- VCL_SESS_ATTR_NONBLOCK, // fcntl,O_NONBLOCK
- VCL_SESS_ATTR_REUSEADDR, // SOL_SOCKET,SO_REUSEADDR
- VCL_SESS_ATTR_REUSEPORT, // SOL_SOCKET,SO_REUSEPORT
- VCL_SESS_ATTR_BROADCAST, // SOL_SOCKET,SO_BROADCAST
- VCL_SESS_ATTR_V6ONLY, // SOL_TCP,IPV6_V6ONLY
- VCL_SESS_ATTR_KEEPALIVE, // SOL_SOCKET,SO_KEEPALIVE
- VCL_SESS_ATTR_TCP_NODELAY, // SOL_TCP,TCP_NODELAY
- VCL_SESS_ATTR_TCP_KEEPIDLE, // SOL_TCP,TCP_KEEPIDLE
- VCL_SESS_ATTR_TCP_KEEPINTVL, // SOL_TCP,TCP_KEEPINTVL
+ VCL_SESS_ATTR_LISTEN, // SOL_SOCKET,SO_ACCEPTCONN
+ VCL_SESS_ATTR_NONBLOCK, // fcntl,O_NONBLOCK
+ VCL_SESS_ATTR_REUSEADDR, // SOL_SOCKET,SO_REUSEADDR
+ VCL_SESS_ATTR_REUSEPORT, // SOL_SOCKET,SO_REUSEPORT
+ VCL_SESS_ATTR_BROADCAST, // SOL_SOCKET,SO_BROADCAST
+ VCL_SESS_ATTR_V6ONLY, // SOL_TCP,IPV6_V6ONLY
+ VCL_SESS_ATTR_KEEPALIVE, // SOL_SOCKET,SO_KEEPALIVE
+ VCL_SESS_ATTR_TCP_NODELAY, // SOL_TCP,TCP_NODELAY
+ VCL_SESS_ATTR_TCP_KEEPIDLE, // SOL_TCP,TCP_KEEPIDLE
+ VCL_SESS_ATTR_TCP_KEEPINTVL, // SOL_TCP,TCP_KEEPINTVL
+ VCL_SESS_ATTR_IP_PKTINFO, /* IPPROTO_IP, IP_PKTINFO */
VCL_SESS_ATTR_MAX
} vppcom_session_attr_t;
@@ -139,6 +140,9 @@ typedef enum vcl_session_flags_
VCL_SESSION_F_HAS_RX_EVT = 1 << 3,
VCL_SESSION_F_RD_SHUTDOWN = 1 << 4,
VCL_SESSION_F_WR_SHUTDOWN = 1 << 5,
+ VCL_SESSION_F_PENDING_DISCONNECT = 1 << 6,
+ VCL_SESSION_F_PENDING_FREE = 1 << 7,
+ VCL_SESSION_F_PENDING_LISTEN = 1 << 8,
} __clib_packed vcl_session_flags_t;
typedef struct vcl_session_
@@ -155,23 +159,30 @@ typedef struct vcl_session_
svm_fifo_t *ct_tx_fifo;
vcl_session_msg_t *accept_evts_fifo;
- u64 vpp_handle;
- u64 parent_handle;
+ session_handle_t vpp_handle;
+ session_handle_t parent_handle;
u32 listener_index; /**< index of parent listener (if any) */
int n_accepted_sessions; /**< sessions accepted by this listener */
vppcom_epoll_t vep;
u32 attributes; /**< see @ref vppcom_session_attr_t */
int libc_epfd;
u32 vrf;
+ u16 gso_size;
u32 sndbuf_size; // VPP-TBD: Hack until support setsockopt(SO_SNDBUF)
u32 rcvbuf_size; // VPP-TBD: Hack until support setsockopt(SO_RCVBUF)
transport_endpt_ext_cfg_t *ext_config;
+ u8 dscp;
+
+ i32 vpp_error;
#if VCL_ELOG
elog_track_t elog_track;
#endif
+
+ u16 original_dst_port; /**< original dst port (network order) */
+ u32 original_dst_ip4; /**< original dst ip4 (network order) */
} vcl_session_t;
typedef struct vppcom_cfg_t_
@@ -185,7 +196,6 @@ typedef struct vppcom_cfg_t_
u32 rx_fifo_size;
u32 tx_fifo_size;
u32 event_queue_size;
- u32 listen_queue_size;
u8 app_proxy_transport_tcp;
u8 app_proxy_transport_udp;
u8 app_scope_local;
@@ -195,13 +205,13 @@ typedef struct vppcom_cfg_t_
u8 use_mq_eventfd;
f64 app_timeout;
f64 session_timeout;
- f64 accept_timeout;
- u32 event_ring_size;
char *event_log_path;
u8 *vpp_app_socket_api; /**< app socket api socket file name */
u8 *vpp_bapi_socket_name; /**< bapi socket transport socket name */
u32 tls_engine;
u8 mt_wrk_supported;
+ u8 huge_page;
+ u8 app_original_dst;
} vppcom_cfg_t;
void vppcom_cfg (vppcom_cfg_t * vcl_cfg);
@@ -338,6 +348,10 @@ typedef struct vppcom_main_t_
/** Lock to protect worker registrations */
clib_spinlock_t workers_lock;
+ /** Counter to determine order of execution of `vcl_api_retry_attach`
+ * function by multiple workers */
+ int reattach_count;
+
/** Lock to protect segment hash table */
clib_rwlock_t segment_table_lock;
@@ -543,8 +557,6 @@ vcl_session_table_lookup_listener (vcl_worker_t * wrk, u64 handle)
return s;
}
-const char *vppcom_session_state_str (vcl_session_state_t state);
-
static inline u8
vcl_session_is_ct (vcl_session_t * s)
{
@@ -652,6 +664,32 @@ vcl_session_clear_attr (vcl_session_t * s, u8 attr)
s->attributes &= ~(1 << attr);
}
+static inline session_evt_type_t
+vcl_session_dgram_tx_evt (vcl_session_t *s, session_evt_type_t et)
+{
+ return (s->flags & VCL_SESSION_F_CONNECTED) ? et : SESSION_IO_EVT_TX_MAIN;
+}
+
+static inline void
+vcl_session_add_want_deq_ntf (vcl_session_t *s, svm_fifo_deq_ntf_t evt)
+{
+ svm_fifo_t *txf = vcl_session_is_ct (s) ? s->ct_tx_fifo : s->tx_fifo;
+ if (txf)
+ {
+ svm_fifo_add_want_deq_ntf (txf, evt);
+ /* Request tx notification only if 3% of fifo is empty */
+ svm_fifo_set_deq_thresh (txf, 0.03 * svm_fifo_size (txf));
+ }
+}
+
+static inline void
+vcl_session_del_want_deq_ntf (vcl_session_t *s, svm_fifo_deq_ntf_t evt)
+{
+ svm_fifo_t *txf = vcl_session_is_ct (s) ? s->ct_tx_fifo : s->tx_fifo;
+ if (txf)
+ svm_fifo_del_want_deq_ntf (txf, evt);
+}
+
/*
* Helpers
*/
@@ -718,6 +756,8 @@ int vcl_send_worker_rpc (u32 dst_wrk_index, void *data, u32 data_len);
int vcl_segment_attach (u64 segment_handle, char *name,
ssvm_segment_type_t type, int fd);
void vcl_segment_detach (u64 segment_handle);
+void vcl_segment_detach_segments (u32 *seg_indices);
+void vcl_send_session_listen (vcl_worker_t *wrk, vcl_session_t *s);
void vcl_send_session_unlisten (vcl_worker_t * wrk, vcl_session_t * s);
int vcl_segment_attach_session (uword segment_handle, uword rxf_offset,
@@ -731,6 +771,7 @@ svm_fifo_chunk_t *vcl_segment_alloc_chunk (uword segment_handle,
uword *offset);
int vcl_session_share_fifos (vcl_session_t *s, svm_fifo_t *rxf,
svm_fifo_t *txf);
+void vcl_worker_detach_sessions (vcl_worker_t *wrk);
/*
* VCL Binary API
@@ -753,6 +794,16 @@ int vcl_sapi_app_worker_add (void);
void vcl_sapi_app_worker_del (vcl_worker_t * wrk);
void vcl_sapi_detach (vcl_worker_t * wrk);
int vcl_sapi_recv_fds (vcl_worker_t * wrk, int *fds, int n_fds);
+int vcl_sapi_add_cert_key_pair (vppcom_cert_key_pair_t *ckpair);
+int vcl_sapi_del_cert_key_pair (u32 ckpair_index);
+
+/*
+ * Utility functions
+ */
+const char *vcl_session_state_str (vcl_session_state_t state);
+u8 *vcl_format_ip4_address (u8 *s, va_list *args);
+u8 *vcl_format_ip6_address (u8 *s, va_list *args);
+u8 *vcl_format_ip46_address (u8 *s, va_list *args);
#endif /* SRC_VCL_VCL_PRIVATE_H_ */
diff --git a/src/vcl/vcl_sapi.c b/src/vcl/vcl_sapi.c
index 84d56af576b..e3e2b6ac377 100644
--- a/src/vcl/vcl_sapi.c
+++ b/src/vcl/vcl_sapi.c
@@ -30,7 +30,9 @@ vcl_api_connect_app_socket (vcl_worker_t * wrk)
if ((err = clib_socket_init (cs)))
{
- clib_error_report (err);
+ /* don't report the error to avoid flood of error messages during
+ * reconnect */
+ clib_error_free (err);
rv = -1;
goto done;
}
@@ -57,6 +59,8 @@ vcl_api_attach_reply_handler (app_sapi_attach_reply_msg_t * mp, int *fds)
}
wrk->api_client_handle = mp->api_client_handle;
+ /* reattaching via `vcl_api_retry_attach` wants wrk->vpp_wrk_index to be 0 */
+ wrk->vpp_wrk_index = 0;
segment_handle = mp->segment_handle;
if (segment_handle == VCL_INVALID_SEGMENT_HANDLE)
{
@@ -125,7 +129,9 @@ vcl_api_send_attach (clib_socket_t * cs)
(vcm->cfg.app_scope_local ? APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE : 0) |
(vcm->cfg.app_scope_global ? APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE : 0) |
(app_is_proxy ? APP_OPTIONS_FLAGS_IS_PROXY : 0) |
- (vcm->cfg.use_mq_eventfd ? APP_OPTIONS_FLAGS_EVT_MQ_USE_EVENTFD : 0);
+ (vcm->cfg.use_mq_eventfd ? APP_OPTIONS_FLAGS_EVT_MQ_USE_EVENTFD : 0) |
+ (vcm->cfg.huge_page ? APP_OPTIONS_FLAGS_USE_HUGE_PAGE : 0) |
+ (vcm->cfg.app_original_dst ? APP_OPTIONS_FLAGS_GET_ORIGINAL_DST : 0);
mp->options[APP_OPTIONS_PROXY_TRANSPORT] =
(u64) ((vcm->cfg.app_proxy_transport_tcp ? 1 << TRANSPORT_PROTO_TCP : 0) |
(vcm->cfg.app_proxy_transport_udp ? 1 << TRANSPORT_PROTO_UDP : 0));
@@ -356,6 +362,115 @@ vcl_sapi_recv_fds (vcl_worker_t * wrk, int *fds, int n_fds)
return 0;
}
+int
+vcl_sapi_add_cert_key_pair (vppcom_cert_key_pair_t *ckpair)
+{
+ u32 cert_len = ckpair->cert_len, key_len = ckpair->key_len, certkey_len;
+ vcl_worker_t *wrk = vcl_worker_get_current ();
+ app_sapi_msg_t _msg = { 0 }, *msg = &_msg;
+ app_sapi_cert_key_add_del_msg_t *mp;
+ app_sapi_msg_t _rmp, *rmp = &_rmp;
+ clib_error_t *err;
+ clib_socket_t *cs;
+ u8 *certkey = 0;
+ int rv = -1;
+
+ msg->type = APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY;
+ mp = &msg->cert_key_add_del;
+ mp->context = wrk->wrk_index;
+ mp->cert_len = cert_len;
+ mp->certkey_len = cert_len + key_len;
+ mp->is_add = 1;
+
+ certkey_len = cert_len + key_len;
+ vec_validate (certkey, certkey_len - 1);
+ clib_memcpy_fast (certkey, ckpair->cert, cert_len);
+ clib_memcpy_fast (certkey + cert_len, ckpair->key, key_len);
+
+ cs = &wrk->app_api_sock;
+ err = clib_socket_sendmsg (cs, msg, sizeof (*msg), 0, 0);
+ if (err)
+ {
+ clib_error_report (err);
+ goto done;
+ }
+
+ err = clib_socket_sendmsg (cs, certkey, certkey_len, 0, 0);
+ if (err)
+ {
+ clib_error_report (err);
+ goto done;
+ }
+
+ /*
+ * Wait for reply and process it
+ */
+ err = clib_socket_recvmsg (cs, rmp, sizeof (*rmp), 0, 0);
+ if (err)
+ {
+ clib_error_report (err);
+ goto done;
+ }
+
+ if (rmp->type != APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY_REPLY)
+ {
+ clib_warning ("unexpected reply type %u", rmp->type);
+ goto done;
+ }
+
+ if (!rmp->cert_key_add_del_reply.retval)
+ rv = rmp->cert_key_add_del_reply.index;
+
+done:
+
+ return rv;
+}
+
+int
+vcl_sapi_del_cert_key_pair (u32 ckpair_index)
+{
+ vcl_worker_t *wrk = vcl_worker_get_current ();
+ app_sapi_msg_t _msg = { 0 }, *msg = &_msg;
+ app_sapi_cert_key_add_del_msg_t *mp;
+ app_sapi_msg_t _rmp, *rmp = &_rmp;
+ clib_error_t *err;
+ clib_socket_t *cs;
+
+ msg->type = APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY;
+ mp = &msg->cert_key_add_del;
+ mp->context = wrk->wrk_index;
+ mp->index = ckpair_index;
+
+ cs = &wrk->app_api_sock;
+ err = clib_socket_sendmsg (cs, msg, sizeof (*msg), 0, 0);
+ if (err)
+ {
+ clib_error_report (err);
+ return -1;
+ }
+
+ /*
+ * Wait for reply and process it
+ */
+ err = clib_socket_recvmsg (cs, rmp, sizeof (*rmp), 0, 0);
+ if (err)
+ {
+ clib_error_report (err);
+ return -1;
+ }
+
+ if (rmp->type != APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY_REPLY)
+ {
+ clib_warning ("unexpected reply type %u", rmp->type);
+ return -1;
+ }
+
+ if (rmp->cert_key_add_del_reply.retval)
+ return -1;
+
+ return 0;
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vcl/vppcom.c b/src/vcl/vppcom.c
index 940a8b92dc1..a557093e897 100644
--- a/src/vcl/vppcom.c
+++ b/src/vcl/vppcom.c
@@ -37,131 +37,7 @@ vcl_mq_dequeue_batch (vcl_worker_t * wrk, svm_msg_q_t * mq, u32 n_max_msg)
return n_msgs;
}
-const char *
-vppcom_session_state_str (vcl_session_state_t state)
-{
- char *st;
-
- switch (state)
- {
- case VCL_STATE_CLOSED:
- st = "STATE_CLOSED";
- break;
- case VCL_STATE_LISTEN:
- st = "STATE_LISTEN";
- break;
- case VCL_STATE_READY:
- st = "STATE_READY";
- break;
- case VCL_STATE_VPP_CLOSING:
- st = "STATE_VPP_CLOSING";
- break;
- case VCL_STATE_DISCONNECT:
- st = "STATE_DISCONNECT";
- break;
- case VCL_STATE_DETACHED:
- st = "STATE_DETACHED";
- break;
- case VCL_STATE_UPDATED:
- st = "STATE_UPDATED";
- break;
- case VCL_STATE_LISTEN_NO_MQ:
- st = "STATE_LISTEN_NO_MQ";
- break;
- default:
- st = "UNKNOWN_STATE";
- break;
- }
-
- return st;
-}
-u8 *
-format_ip4_address (u8 * s, va_list * args)
-{
- u8 *a = va_arg (*args, u8 *);
- return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]);
-}
-
-u8 *
-format_ip6_address (u8 * s, va_list * args)
-{
- ip6_address_t *a = va_arg (*args, ip6_address_t *);
- u32 i, i_max_n_zero, max_n_zeros, i_first_zero, n_zeros, last_double_colon;
-
- i_max_n_zero = ARRAY_LEN (a->as_u16);
- max_n_zeros = 0;
- i_first_zero = i_max_n_zero;
- n_zeros = 0;
- for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
- {
- u32 is_zero = a->as_u16[i] == 0;
- if (is_zero && i_first_zero >= ARRAY_LEN (a->as_u16))
- {
- i_first_zero = i;
- n_zeros = 0;
- }
- n_zeros += is_zero;
- if ((!is_zero && n_zeros > max_n_zeros)
- || (i + 1 >= ARRAY_LEN (a->as_u16) && n_zeros > max_n_zeros))
- {
- i_max_n_zero = i_first_zero;
- max_n_zeros = n_zeros;
- i_first_zero = ARRAY_LEN (a->as_u16);
- n_zeros = 0;
- }
- }
-
- last_double_colon = 0;
- for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
- {
- if (i == i_max_n_zero && max_n_zeros > 1)
- {
- s = format (s, "::");
- i += max_n_zeros - 1;
- last_double_colon = 1;
- }
- else
- {
- s = format (s, "%s%x",
- (last_double_colon || i == 0) ? "" : ":",
- clib_net_to_host_u16 (a->as_u16[i]));
- last_double_colon = 0;
- }
- }
-
- return s;
-}
-
-/* Format an IP46 address. */
-u8 *
-format_ip46_address (u8 * s, va_list * args)
-{
- ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
- ip46_type_t type = va_arg (*args, ip46_type_t);
- int is_ip4 = 1;
-
- switch (type)
- {
- case IP46_TYPE_ANY:
- is_ip4 = ip46_address_is_ip4 (ip46);
- break;
- case IP46_TYPE_IP4:
- is_ip4 = 1;
- break;
- case IP46_TYPE_IP6:
- is_ip4 = 0;
- break;
- }
-
- return is_ip4 ?
- format (s, "%U", format_ip4_address, &ip46->ip4) :
- format (s, "%U", format_ip6_address, &ip46->ip6);
-}
-
-/*
- * VPPCOM Utility Functions
- */
static void
vcl_msg_add_ext_config (vcl_session_t *s, uword *offset)
@@ -175,8 +51,8 @@ vcl_msg_add_ext_config (vcl_session_t *s, uword *offset)
clib_memcpy_fast (c->data, s->ext_config, s->ext_config->len);
}
-static void
-vcl_send_session_listen (vcl_worker_t * wrk, vcl_session_t * s)
+void
+vcl_send_session_listen (vcl_worker_t *wrk, vcl_session_t *s)
{
app_session_evt_t _app_evt, *app_evt = &_app_evt;
session_listen_msg_t *mp;
@@ -204,6 +80,7 @@ vcl_send_session_listen (vcl_worker_t * wrk, vcl_session_t * s)
clib_mem_free (s->ext_config);
s->ext_config = 0;
}
+ s->flags |= VCL_SESSION_F_PENDING_LISTEN;
}
static void
@@ -219,6 +96,7 @@ vcl_send_session_connect (vcl_worker_t * wrk, vcl_session_t * s)
memset (mp, 0, sizeof (*mp));
mp->client_index = wrk->api_client_handle;
mp->context = s->session_index;
+ mp->dscp = s->dscp;
mp->wrk_index = wrk->vpp_wrk_index;
mp->is_ip4 = s->transport.is_ip4;
mp->parent_handle = s->parent_handle;
@@ -412,10 +290,17 @@ vcl_session_transport_attr (vcl_worker_t *wrk, vcl_session_t *s, u8 is_get,
f64 timeout;
ASSERT (!wrk->session_attr_op);
+ mq = s->vpp_evt_q;
+ if (PREDICT_FALSE (!mq))
+ {
+ /* FIXME: attribute should be stored and sent once session is
+ * bound/connected to vpp */
+ return 0;
+ }
+
wrk->session_attr_op = 1;
wrk->session_attr_op_rv = -1;
- mq = s->vpp_evt_q;
app_alloc_ctrl_evt_to_vpp (mq, app_evt, SESSION_CTRL_EVT_TRANSPORT_ATTR);
mp = (session_transport_attr_msg_t *) app_evt->evt->data;
memset (mp, 0, sizeof (*mp));
@@ -459,12 +344,18 @@ vcl_session_accepted_handler (vcl_worker_t * wrk, session_accepted_msg_t * mp,
mp->segment_handle, mp->server_rx_fifo, mp->server_tx_fifo,
mp->vpp_event_queue_address, mp->mq_index, 0, session))
{
- VDBG (0, "failed to attach fifos for %u", session->session_index);
+ VDBG (0, "session %u [0x%llx]: failed to attach fifos",
+ session->session_index, mp->handle);
goto error;
}
session->vpp_handle = mp->handle;
session->session_state = VCL_STATE_READY;
+ if (mp->rmt.is_ip4)
+ {
+ session->original_dst_ip4 = mp->original_dst_ip4;
+ session->original_dst_port = mp->original_dst_port;
+ }
session->transport.rmt_port = mp->rmt.port;
session->transport.is_ip4 = mp->rmt.is_ip4;
clib_memcpy_fast (&session->transport.rmt_ip, &mp->rmt.ip,
@@ -475,14 +366,11 @@ vcl_session_accepted_handler (vcl_worker_t * wrk, session_accepted_msg_t * mp,
session->transport.lcl_ip = mp->lcl.ip;
session->session_type = listen_session->session_type;
session->is_dgram = vcl_proto_is_dgram (session->session_type);
+ if (session->is_dgram)
+ session->flags |= (listen_session->flags & VCL_SESSION_F_CONNECTED);
session->listener_index = listen_session->session_index;
listen_session->n_accepted_sessions++;
- VDBG (1, "session %u [0x%llx]: client accept request from %s address %U"
- " port %d queue %p!", session->session_index, mp->handle,
- mp->rmt.is_ip4 ? "IPv4" : "IPv6", format_ip46_address, &mp->rmt.ip,
- mp->rmt.is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
- clib_net_to_host_u16 (mp->rmt.port), session->vpp_evt_q);
vcl_evt (VCL_EVT_ACCEPT, session, listen_session, session_index);
vcl_send_session_accepted_reply (session->vpp_evt_q, mp->context,
@@ -508,29 +396,47 @@ vcl_session_connected_handler (vcl_worker_t * wrk,
session_index = mp->context;
session = vcl_session_get (wrk, session_index);
- if (!session)
+ if (PREDICT_FALSE (!session))
{
- VDBG (0, "ERROR: vpp handle 0x%llx has no session index (%u)!",
- mp->handle, session_index);
+ VERR ("vpp handle 0x%llx has no session index (%u)!", mp->handle,
+ session_index);
+ /* Should not happen but if it does, force vpp session cleanup */
+ vcl_session_t tmp_session = {
+ .vpp_handle = mp->handle,
+ .vpp_evt_q = 0,
+ };
+ vcl_segment_attach_session (
+ mp->segment_handle, mp->server_rx_fifo, mp->server_tx_fifo,
+ mp->vpp_event_queue_address, mp->mq_index, 0, session);
+ if (tmp_session.vpp_evt_q)
+ vcl_send_session_disconnect (wrk, &tmp_session);
return VCL_INVALID_SESSION_INDEX;
}
+
if (mp->retval)
{
- VDBG (0, "ERROR: session index %u: connect failed! %U",
- session_index, format_session_error, mp->retval);
+ VDBG (0, "session %u: connect failed! %U", session_index,
+ format_session_error, mp->retval);
session->session_state = VCL_STATE_DETACHED;
- session->vpp_handle = mp->handle;
+ session->vpp_handle = VCL_INVALID_SESSION_HANDLE;
+ session->vpp_error = mp->retval;
return session_index;
}
session->vpp_handle = mp->handle;
+ /* Add to lookup table. Even if something fails, session cannot be
+ * cleaned up prior to notifying vpp and going through the cleanup
+ * "procedure" see @ref vcl_session_cleanup_handler */
+ vcl_session_table_add_vpp_handle (wrk, mp->handle, session_index);
+
if (vcl_segment_attach_session (
mp->segment_handle, mp->server_rx_fifo, mp->server_tx_fifo,
mp->vpp_event_queue_address, mp->mq_index, 0, session))
{
- VDBG (0, "failed to attach fifos for %u", session->session_index);
- session->session_state = VCL_STATE_DETACHED;
+ VDBG (0, "session %u [0x%llx]: failed to attach fifos",
+ session->session_index, session->vpp_handle);
+ session->session_state = VCL_STATE_UPDATED;
vcl_send_session_disconnect (wrk, session);
return session_index;
}
@@ -541,8 +447,9 @@ vcl_session_connected_handler (vcl_worker_t * wrk,
mp->ct_tx_fifo, (uword) ~0, ~0, 1,
session))
{
- VDBG (0, "failed to attach ct fifos for %u", session->session_index);
- session->session_state = VCL_STATE_DETACHED;
+ VDBG (0, "session %u [0x%llx]: failed to attach ct fifos",
+ session->session_index, session->vpp_handle);
+ session->session_state = VCL_STATE_UPDATED;
vcl_send_session_disconnect (wrk, session);
return session_index;
}
@@ -560,12 +467,14 @@ vcl_session_connected_handler (vcl_worker_t * wrk,
else
session->session_state = VCL_STATE_READY;
- /* Add it to lookup table */
- vcl_session_table_add_vpp_handle (wrk, mp->handle, session_index);
-
- VDBG (1, "session %u [0x%llx] connected! rx_fifo %p, refcnt %d, tx_fifo %p,"
- " refcnt %d", session_index, mp->handle, session->rx_fifo,
- session->rx_fifo->refcnt, session->tx_fifo, session->tx_fifo->refcnt);
+ VDBG (0, "session %u [0x%llx] connected local: %U:%u remote %U:%u",
+ session->session_index, session->vpp_handle, vcl_format_ip46_address,
+ &session->transport.lcl_ip,
+ session->transport.is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
+ clib_net_to_host_u16 (session->transport.lcl_port),
+ vcl_format_ip46_address, &session->transport.rmt_ip,
+ session->transport.is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
+ clib_net_to_host_u16 (session->transport.rmt_port));
return session_index;
}
@@ -604,9 +513,9 @@ vcl_session_reset_handler (vcl_worker_t * wrk,
}
/* Caught a reset before actually accepting the session */
- if (session->session_state == VCL_STATE_LISTEN)
+ if (session->session_state == VCL_STATE_LISTEN ||
+ session->session_state == VCL_STATE_LISTEN_NO_MQ)
{
-
if (!vcl_flag_accepted_session (session, reset_msg->handle,
VCL_ACCEPTED_F_RESET))
VDBG (0, "session was not accepted!");
@@ -615,7 +524,9 @@ vcl_session_reset_handler (vcl_worker_t * wrk,
if (session->session_state != VCL_STATE_CLOSED)
session->session_state = VCL_STATE_DISCONNECT;
- VDBG (0, "reset session %u [0x%llx]", sid, reset_msg->handle);
+
+ session->flags |= (VCL_SESSION_F_RD_SHUTDOWN | VCL_SESSION_F_WR_SHUTDOWN);
+ VDBG (0, "session %u [0x%llx]: reset", sid, reset_msg->handle);
return sid;
}
@@ -634,12 +545,13 @@ vcl_session_bound_handler (vcl_worker_t * wrk, session_bound_msg_t * mp)
{
session->session_state = VCL_STATE_DETACHED;
session->vpp_handle = mp->handle;
+ session->vpp_error = mp->retval;
return sid;
}
else
{
- VDBG (0, "ERROR: session %u [0x%llx]: Invalid session index!",
- sid, mp->handle);
+ VDBG (0, "session %u [0x%llx]: Invalid session index!", sid,
+ mp->handle);
return VCL_INVALID_SESSION_INDEX;
}
}
@@ -651,6 +563,7 @@ vcl_session_bound_handler (vcl_worker_t * wrk, session_bound_msg_t * mp)
session->transport.lcl_port = mp->lcl_port;
vcl_session_table_add_listener (wrk, mp->handle, sid);
session->session_state = VCL_STATE_LISTEN;
+ session->flags &= ~VCL_SESSION_F_PENDING_LISTEN;
if (vcl_session_is_cl (session))
{
@@ -658,7 +571,8 @@ vcl_session_bound_handler (vcl_worker_t * wrk, session_bound_msg_t * mp)
mp->tx_fifo, mp->vpp_evt_q, mp->mq_index,
0, session))
{
- VDBG (0, "failed to attach fifos for %u", session->session_index);
+ VDBG (0, "session %u [0x%llx]: failed to attach fifos",
+ session->session_index, session->vpp_handle);
session->session_state = VCL_STATE_DETACHED;
return VCL_INVALID_SESSION_INDEX;
}
@@ -784,7 +698,7 @@ vcl_session_disconnected_handler (vcl_worker_t * wrk,
session = vcl_session_get_w_vpp_handle (wrk, msg->handle);
if (!session)
{
- VDBG (0, "request to disconnect unknown handle 0x%llx", msg->handle);
+ VWRN ("request to disconnect unknown handle 0x%llx", msg->handle);
return 0;
}
@@ -793,7 +707,8 @@ vcl_session_disconnected_handler (vcl_worker_t * wrk,
return 0;
/* Caught a disconnect before actually accepting the session */
- if (session->session_state == VCL_STATE_LISTEN)
+ if (session->session_state == VCL_STATE_LISTEN ||
+ session->session_state == VCL_STATE_LISTEN_NO_MQ)
{
if (!vcl_flag_accepted_session (session, msg->handle,
VCL_ACCEPTED_F_CLOSED))
@@ -824,7 +739,7 @@ vppcom_session_shutdown (uint32_t session_handle, int how)
state = session->session_state;
VDBG (1, "session %u [0x%llx] state 0x%x (%s)", session->session_index,
- vpp_handle, state, vppcom_session_state_str (state));
+ vpp_handle, state, vcl_session_state_str (state));
if (PREDICT_FALSE (state == VCL_STATE_LISTEN))
{
@@ -866,8 +781,8 @@ vppcom_session_disconnect (u32 session_handle)
vpp_handle = session->vpp_handle;
state = session->session_state;
- VDBG (1, "session %u [0x%llx] state 0x%x (%s)", session->session_index,
- vpp_handle, state, vppcom_session_state_str (state));
+ VDBG (1, "session %u [0x%llx]: disconnecting state (%s)",
+ session->session_index, vpp_handle, vcl_session_state_str (state));
if (PREDICT_FALSE (state == VCL_STATE_LISTEN))
{
@@ -888,7 +803,7 @@ vppcom_session_disconnect (u32 session_handle)
if (PREDICT_FALSE (!session->vpp_evt_q))
return VPPCOM_OK;
- VDBG (1, "session %u [0x%llx]: sending disconnect...",
+ VDBG (1, "session %u [0x%llx]: sending disconnect",
session->session_index, vpp_handle);
vcl_send_session_disconnect (wrk, session);
}
@@ -896,7 +811,8 @@ vppcom_session_disconnect (u32 session_handle)
if (session->listener_index != VCL_INVALID_SESSION_INDEX)
{
listen_session = vcl_session_get (wrk, session->listener_index);
- listen_session->n_accepted_sessions--;
+ if (listen_session)
+ listen_session->n_accepted_sessions--;
}
return VPPCOM_OK;
@@ -912,7 +828,7 @@ vcl_session_cleanup_handler (vcl_worker_t * wrk, void *data)
session = vcl_session_get_w_vpp_handle (wrk, msg->handle);
if (!session)
{
- VDBG (0, "disconnect confirmed for unknown handle 0x%llx", msg->handle);
+ VWRN ("disconnect confirmed for unknown handle 0x%llx", msg->handle);
return;
}
@@ -943,15 +859,28 @@ vcl_session_cleanup_handler (vcl_worker_t * wrk, void *data)
return;
}
+ /* VPP will reuse the handle so clean it up now */
vcl_session_table_del_vpp_handle (wrk, msg->handle);
- /* Should not happen. App did not close the connection so don't free it. */
+
+ /* App did not close the connection yet so don't free it. */
if (session->session_state != VCL_STATE_CLOSED)
{
- VDBG (0, "app did not close session %d", session->session_index);
+ VDBG (0, "session %u: app did not close", session->session_index);
session->session_state = VCL_STATE_DETACHED;
session->vpp_handle = VCL_INVALID_SESSION_HANDLE;
return;
}
+
+ /* Session probably tracked with epoll, disconnect not yet handled and
+ * 1) both transport and session cleanup completed 2) app closed. Wait
+ * until message is drained to free the session.
+ * See @ref vcl_handle_mq_event */
+ if (session->flags & VCL_SESSION_F_PENDING_DISCONNECT)
+ {
+ session->flags |= VCL_SESSION_F_PENDING_FREE;
+ return;
+ }
+
vcl_session_free (wrk, session);
}
@@ -1047,7 +976,7 @@ vcl_session_app_del_segment_handler (vcl_worker_t * wrk, void *data)
{
session_app_del_segment_msg_t *msg = (session_app_del_segment_msg_t *) data;
vcl_segment_detach (msg->segment_handle);
- VDBG (1, "Unmapped segment: %d", msg->segment_handle);
+ VDBG (1, "Unmapped segment: %lx", msg->segment_handle);
}
static void
@@ -1124,9 +1053,24 @@ vcl_handle_mq_event (vcl_worker_t * wrk, session_event_t * e)
disconnected_msg = (session_disconnected_msg_t *) e->data;
if (!(s = vcl_session_get_w_vpp_handle (wrk, disconnected_msg->handle)))
break;
- if (vcl_session_has_attr (s, VCL_SESS_ATTR_NONBLOCK))
+ if (s->session_state == VCL_STATE_CLOSED)
+ break;
+ /* We do not postpone for blocking sessions or listen sessions because:
+ * 1. Blocking sessions are not part of epoll instead they're used in a
+ * synchronous manner, such as read/write and etc.
+ * 2. Listen sessions that have not yet been accepted can't change to
+ * VPP_CLOSING state instead can been marked as ACCEPTED_F_CLOSED.
+ */
+ if (vcl_session_has_attr (s, VCL_SESS_ATTR_NONBLOCK) &&
+ !(s->session_state == VCL_STATE_LISTEN ||
+ s->session_state == VCL_STATE_LISTEN_NO_MQ))
{
- vec_add1 (wrk->unhandled_evts_vector, *e);
+ s->session_state = VCL_STATE_VPP_CLOSING;
+ s->flags |= VCL_SESSION_F_PENDING_DISCONNECT;
+ vec_add2 (wrk->unhandled_evts_vector, ecpy, 1);
+ *ecpy = *e;
+ ecpy->postponed = 1;
+ ecpy->session_index = s->session_index;
break;
}
if (!(s = vcl_session_disconnected_handler (wrk, disconnected_msg)))
@@ -1138,9 +1082,25 @@ vcl_handle_mq_event (vcl_worker_t * wrk, session_event_t * e)
reset_msg = (session_reset_msg_t *) e->data;
if (!(s = vcl_session_get_w_vpp_handle (wrk, reset_msg->handle)))
break;
- if (vcl_session_has_attr (s, VCL_SESS_ATTR_NONBLOCK))
+ if (s->session_state == VCL_STATE_CLOSED)
+ break;
+ /* We do not postpone for blocking sessions or listen sessions because:
+ * 1. Blocking sessions are not part of epoll instead they're used in a
+ * synchronous manner, such as read/write and etc.
+ * 2. Listen sessions that have not yet been accepted can't change to
+ * DISCONNECT state instead can been marked as ACCEPTED_F_RESET.
+ */
+ if (vcl_session_has_attr (s, VCL_SESS_ATTR_NONBLOCK) &&
+ !(s->session_state == VCL_STATE_LISTEN ||
+ s->session_state == VCL_STATE_LISTEN_NO_MQ))
{
- vec_add1 (wrk->unhandled_evts_vector, *e);
+ s->flags |= VCL_SESSION_F_PENDING_DISCONNECT;
+ s->session_state = VCL_STATE_DISCONNECT;
+ s->flags |= (VCL_SESSION_F_RD_SHUTDOWN | VCL_SESSION_F_WR_SHUTDOWN);
+ vec_add2 (wrk->unhandled_evts_vector, ecpy, 1);
+ *ecpy = *e;
+ ecpy->postponed = 1;
+ ecpy->session_index = s->session_index;
break;
}
vcl_session_reset_handler (wrk, (session_reset_msg_t *) e->data);
@@ -1202,7 +1162,10 @@ vppcom_wait_for_session_state_change (u32 session_index,
}
if (session->session_state == VCL_STATE_DETACHED)
{
- return VPPCOM_ECONNREFUSED;
+ if (session->vpp_error == SESSION_E_ALREADY_LISTENING)
+ return VPPCOM_EADDRINUSE;
+ else
+ return VPPCOM_ECONNREFUSED;
}
if (svm_msg_q_sub (wrk->app_event_queue, &msg, SVM_Q_NOWAIT, 0))
@@ -1217,7 +1180,7 @@ vppcom_wait_for_session_state_change (u32 session_index,
while (clib_time_now (&wrk->clib_time) < timeout);
VDBG (0, "timeout waiting for state 0x%x (%s)", state,
- vppcom_session_state_str (state));
+ vcl_session_state_str (state));
vcl_evt (VCL_EVT_SESSION_TIMEOUT, session, session_state);
return VPPCOM_ETIMEDOUT;
@@ -1299,11 +1262,11 @@ vppcom_session_unbind (u32 session_handle)
vcl_send_session_unlisten (wrk, session);
- VDBG (1, "session %u [0x%llx]: sending unbind!", session->session_index,
+ VDBG (0, "session %u [0x%llx]: sending unbind!", session->session_index,
session->vpp_handle);
vcl_evt (VCL_EVT_UNBIND, session);
- session->vpp_handle = ~0;
+ session->vpp_handle = SESSION_INVALID_HANDLE;
session->session_state = VCL_STATE_DISCONNECT;
return VPPCOM_OK;
@@ -1335,9 +1298,82 @@ vcl_api_attach (void)
return vcl_bapi_attach ();
}
+int
+vcl_is_first_reattach_to_execute ()
+{
+ if (vcm->reattach_count == 0)
+ return 1;
+
+ return 0;
+}
+
+void
+vcl_set_reattach_counter ()
+{
+ ++vcm->reattach_count;
+
+ if (vcm->reattach_count == vec_len (vcm->workers))
+ vcm->reattach_count = 0;
+}
+
+/**
+ * Reattach vcl to vpp after it has previously been disconnected.
+ *
+ * The logic should be:
+ * - first worker to hit `vcl_api_retry_attach` should attach to vpp,
+ * to reproduce the `vcl_api_attach` in `vppcom_app_create`.
+ * - the rest of the workers should `reproduce vcl_worker_register_with_vpp`
+ * from `vppcom_worker_register` since they were already allocated.
+ */
+
+static void
+vcl_api_retry_attach (vcl_worker_t *wrk)
+{
+ vcl_session_t *s;
+
+ clib_spinlock_lock (&vcm->workers_lock);
+ if (vcl_is_first_reattach_to_execute ())
+ {
+ if (vcl_api_attach ())
+ {
+ clib_spinlock_unlock (&vcm->workers_lock);
+ return;
+ }
+ vcl_set_reattach_counter ();
+ clib_spinlock_unlock (&vcm->workers_lock);
+ }
+ else
+ {
+ vcl_set_reattach_counter ();
+ clib_spinlock_unlock (&vcm->workers_lock);
+ vcl_worker_register_with_vpp ();
+ }
+
+ /* Treat listeners as configuration that needs to be re-added to vpp */
+ pool_foreach (s, wrk->sessions)
+ {
+ if (s->flags & VCL_SESSION_F_IS_VEP)
+ continue;
+ if (s->session_state == VCL_STATE_LISTEN_NO_MQ)
+ vppcom_session_listen (vcl_session_handle (s), 10);
+ else
+ VDBG (0, "internal error: unexpected state %d", s->session_state);
+ }
+}
+
+static void
+vcl_api_handle_disconnect (vcl_worker_t *wrk)
+{
+ wrk->api_client_handle = ~0;
+ vcl_worker_detach_sessions (wrk);
+}
+
static void
vcl_api_detach (vcl_worker_t * wrk)
{
+ if (wrk->api_client_handle == ~0)
+ return;
+
vcl_send_app_detach (wrk);
if (vcm->cfg.vpp_app_socket_api)
@@ -1368,8 +1404,8 @@ vppcom_app_create (const char *app_name)
vcm->main_cpu = pthread_self ();
vcm->main_pid = getpid ();
vcm->app_name = format (0, "%s", app_name);
- fifo_segment_main_init (&vcm->segment_main, vcl_cfg->segment_baseva,
- 20 /* timeout in secs */ );
+ fifo_segment_main_init (&vcm->segment_main, (uword) ~0,
+ 20 /* timeout in secs */);
pool_alloc (vcm->workers, vcl_cfg->max_workers);
clib_spinlock_init (&vcm->workers_lock);
clib_rwlock_init (&vcm->segment_table_lock);
@@ -1380,7 +1416,10 @@ vppcom_app_create (const char *app_name)
vcl_worker_alloc_and_init ();
if ((rv = vcl_api_attach ()))
- return rv;
+ {
+ vppcom_app_destroy ();
+ return rv;
+ }
VDBG (0, "app_name '%s', my_client_index %d (0x%x)", app_name,
vcm->workers[0].api_client_handle, vcm->workers[0].api_client_handle);
@@ -1401,15 +1440,14 @@ vppcom_app_destroy (void)
current_wrk = vcl_worker_get_current ();
- /* *INDENT-OFF* */
pool_foreach (wrk, vcm->workers) {
if (current_wrk != wrk)
vcl_worker_cleanup (wrk, 0 /* notify vpp */ );
}
- /* *INDENT-ON* */
vcl_api_detach (current_wrk);
vcl_worker_cleanup (current_wrk, 0 /* notify vpp */ );
+ vcl_set_worker_index (~0);
vcl_elog_stop (vcm);
@@ -1433,8 +1471,9 @@ vppcom_session_create (u8 proto, u8 is_nonblocking)
session->session_type = proto;
session->session_state = VCL_STATE_CLOSED;
- session->vpp_handle = ~0;
+ session->vpp_handle = SESSION_INVALID_HANDLE;
session->is_dgram = vcl_proto_is_dgram (proto);
+ session->vpp_error = SESSION_E_NONE;
if (is_nonblocking)
vcl_session_set_attr (session, VCL_SESS_ATTR_NONBLOCK);
@@ -1452,6 +1491,8 @@ vcl_epoll_lt_add (vcl_worker_t *wrk, vcl_session_t *s)
{
vcl_session_t *cur, *prev;
+ ASSERT (s->vep.lt_next == VCL_INVALID_SESSION_INDEX);
+
if (wrk->ep_lt_current == VCL_INVALID_SESSION_INDEX)
{
wrk->ep_lt_current = s->session_index;
@@ -1475,10 +1516,13 @@ vcl_epoll_lt_del (vcl_worker_t *wrk, vcl_session_t *s)
{
vcl_session_t *prev, *next;
+ ASSERT (s->vep.lt_next != VCL_INVALID_SESSION_INDEX);
+
if (s->vep.lt_next == s->session_index)
{
wrk->ep_lt_current = VCL_INVALID_SESSION_INDEX;
s->vep.lt_next = VCL_INVALID_SESSION_INDEX;
+ s->vep.lt_prev = VCL_INVALID_SESSION_INDEX;
return;
}
@@ -1492,6 +1536,7 @@ vcl_epoll_lt_del (vcl_worker_t *wrk, vcl_session_t *s)
wrk->ep_lt_current = s->vep.lt_next;
s->vep.lt_next = VCL_INVALID_SESSION_INDEX;
+ s->vep.lt_prev = VCL_INVALID_SESSION_INDEX;
}
int
@@ -1557,9 +1602,14 @@ vcl_session_cleanup (vcl_worker_t * wrk, vcl_session_t * s,
}
else if (s->session_state == VCL_STATE_DETACHED)
{
- /* Should not happen. VPP cleaned up before app confirmed close */
VDBG (0, "vpp freed session %d before close", s->session_index);
- goto free_session;
+
+ if (!(s->flags & VCL_SESSION_F_PENDING_DISCONNECT))
+ goto free_session;
+
+ /* Disconnect/reset messages pending but vpp transport and session
+ * cleanups already done. Free only after messages drained. */
+ s->flags |= VCL_SESSION_F_PENDING_FREE;
}
s->session_state = VCL_STATE_CLOSED;
@@ -1618,17 +1668,19 @@ vppcom_session_bind (uint32_t session_handle, vppcom_endpt_t * ep)
sizeof (ip6_address_t));
session->transport.lcl_port = ep->port;
- VDBG (0, "session %u handle %u: binding to local %s address %U port %u, "
- "proto %s", session->session_index, session_handle,
- session->transport.is_ip4 ? "IPv4" : "IPv6",
- format_ip46_address, &session->transport.lcl_ip,
+ VDBG (0,
+ "session %u handle %u: binding to local %s address %U port %u, "
+ "proto %s",
+ session->session_index, session_handle,
+ session->transport.is_ip4 ? "IPv4" : "IPv6", vcl_format_ip46_address,
+ &session->transport.lcl_ip,
session->transport.is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
clib_net_to_host_u16 (session->transport.lcl_port),
vppcom_proto_str (session->session_type));
vcl_evt (VCL_EVT_BIND, session);
if (session->session_type == VPPCOM_PROTO_UDP)
- vppcom_session_listen (session_handle, 10);
+ return vppcom_session_listen (session_handle, 10);
return VPPCOM_OK;
}
@@ -1645,9 +1697,6 @@ vppcom_session_listen (uint32_t listen_sh, uint32_t q_len)
if (!listen_session || (listen_session->flags & VCL_SESSION_F_IS_VEP))
return VPPCOM_EBADFD;
- if (q_len == 0 || q_len == ~0)
- q_len = vcm->cfg.listen_queue_size;
-
listen_vpp_handle = listen_session->vpp_handle;
if (listen_session->session_state == VCL_STATE_LISTEN)
{
@@ -1678,27 +1727,6 @@ vppcom_session_listen (uint32_t listen_sh, uint32_t q_len)
return VPPCOM_OK;
}
-static int
-validate_args_session_accept_ (vcl_worker_t * wrk, vcl_session_t * ls)
-{
- if (ls->flags & VCL_SESSION_F_IS_VEP)
- {
- VDBG (0, "ERROR: cannot accept on epoll session %u!",
- ls->session_index);
- return VPPCOM_EBADFD;
- }
-
- if ((ls->session_state != VCL_STATE_LISTEN)
- && (!vcl_session_is_connectable_listener (wrk, ls)))
- {
- VDBG (0, "ERROR: session [0x%llx]: not in listen state! state 0x%x"
- " (%s)", ls->vpp_handle, ls->session_state,
- vppcom_session_state_str (ls->session_state));
- return VPPCOM_EBADFD;
- }
- return VPPCOM_OK;
-}
-
int
vppcom_unformat_proto (uint8_t * proto, char *proto_str)
{
@@ -1732,38 +1760,41 @@ vppcom_unformat_proto (uint8_t * proto, char *proto_str)
}
int
-vppcom_session_accept (uint32_t listen_session_handle, vppcom_endpt_t * ep,
- uint32_t flags)
+vppcom_session_accept (uint32_t ls_handle, vppcom_endpt_t *ep, uint32_t flags)
{
- u32 client_session_index = ~0, listen_session_index, accept_flags = 0;
+ u32 client_session_index = ~0, ls_index, accept_flags = 0;
vcl_worker_t *wrk = vcl_worker_get_current ();
session_accepted_msg_t accepted_msg;
- vcl_session_t *listen_session = 0;
- vcl_session_t *client_session = 0;
+ vcl_session_t *ls, *client_session = 0;
vcl_session_msg_t *evt;
u8 is_nonblocking;
- int rv;
again:
- listen_session = vcl_session_get_w_handle (wrk, listen_session_handle);
- if (!listen_session)
+ ls = vcl_session_get_w_handle (wrk, ls_handle);
+ if (!ls)
return VPPCOM_EBADFD;
- listen_session_index = listen_session->session_index;
- if ((rv = validate_args_session_accept_ (wrk, listen_session)))
- return rv;
+ if ((ls->session_state != VCL_STATE_LISTEN) &&
+ (ls->session_state != VCL_STATE_LISTEN_NO_MQ) &&
+ (!vcl_session_is_connectable_listener (wrk, ls)))
+ {
+ VDBG (0, "ERROR: session [0x%llx]: not in listen state! state (%s)",
+ ls->vpp_handle, vcl_session_state_str (ls->session_state));
+ return VPPCOM_EBADFD;
+ }
- if (clib_fifo_elts (listen_session->accept_evts_fifo))
+ ls_index = ls->session_index;
+
+ if (clib_fifo_elts (ls->accept_evts_fifo))
{
- clib_fifo_sub2 (listen_session->accept_evts_fifo, evt);
+ clib_fifo_sub2 (ls->accept_evts_fifo, evt);
accept_flags = evt->flags;
accepted_msg = evt->accepted_msg;
goto handle;
}
- is_nonblocking = vcl_session_has_attr (listen_session,
- VCL_SESS_ATTR_NONBLOCK);
+ is_nonblocking = vcl_session_has_attr (ls, VCL_SESS_ATTR_NONBLOCK);
while (1)
{
if (svm_msg_q_is_empty (wrk->app_event_queue) && is_nonblocking)
@@ -1776,20 +1807,21 @@ again:
handle:
- client_session_index = vcl_session_accepted_handler (wrk, &accepted_msg,
- listen_session_index);
+ client_session_index =
+ vcl_session_accepted_handler (wrk, &accepted_msg, ls_index);
if (client_session_index == VCL_INVALID_SESSION_INDEX)
return VPPCOM_ECONNABORTED;
- listen_session = vcl_session_get (wrk, listen_session_index);
+ ls = vcl_session_get (wrk, ls_index);
client_session = vcl_session_get (wrk, client_session_index);
if (flags & O_NONBLOCK)
vcl_session_set_attr (client_session, VCL_SESS_ATTR_NONBLOCK);
- VDBG (1, "listener %u [0x%llx]: Got a connect request! session %u [0x%llx],"
- " flags %d, is_nonblocking %u", listen_session->session_index,
- listen_session->vpp_handle, client_session_index,
+ VDBG (1,
+ "listener %u [0x%llx]: Got a connect request! session %u [0x%llx],"
+ " flags %d, is_nonblocking %u",
+ ls->session_index, ls->vpp_handle, client_session_index,
client_session->vpp_handle, flags,
vcl_session_has_attr (client_session, VCL_SESS_ATTR_NONBLOCK));
@@ -1805,17 +1837,18 @@ handle:
sizeof (ip6_address_t));
}
- VDBG (0, "listener %u [0x%llx] accepted %u [0x%llx] peer: %U:%u "
- "local: %U:%u", listen_session_handle, listen_session->vpp_handle,
- client_session_index, client_session->vpp_handle,
- format_ip46_address, &client_session->transport.rmt_ip,
+ VDBG (0,
+ "listener %u [0x%llx] accepted %u [0x%llx] peer: %U:%u "
+ "local: %U:%u",
+ ls_handle, ls->vpp_handle, client_session_index,
+ client_session->vpp_handle, vcl_format_ip46_address,
+ &client_session->transport.rmt_ip,
client_session->transport.is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
clib_net_to_host_u16 (client_session->transport.rmt_port),
- format_ip46_address, &client_session->transport.lcl_ip,
+ vcl_format_ip46_address, &client_session->transport.lcl_ip,
client_session->transport.is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
clib_net_to_host_u16 (client_session->transport.lcl_port));
- vcl_evt (VCL_EVT_ACCEPT, client_session, listen_session,
- client_session_index);
+ vcl_evt (VCL_EVT_ACCEPT, client_session, ls, client_session_index);
/*
* Session might have been closed already
@@ -1845,22 +1878,21 @@ vppcom_session_connect (uint32_t session_handle, vppcom_endpt_t * server_ep)
if (PREDICT_FALSE (session->flags & VCL_SESSION_F_IS_VEP))
{
- VDBG (0, "ERROR: cannot connect epoll session %u!",
- session->session_index);
+ VWRN ("cannot connect epoll session %u!", session->session_index);
return VPPCOM_EBADFD;
}
if (PREDICT_FALSE (vcl_session_is_ready (session)))
{
- VDBG (0, "session handle %u [0x%llx]: session already "
- "connected to %s %U port %d proto %s, state 0x%x (%s)",
- session_handle, session->vpp_handle,
- session->transport.is_ip4 ? "IPv4" : "IPv6", format_ip46_address,
- &session->transport.rmt_ip, session->transport.is_ip4 ?
- IP46_TYPE_IP4 : IP46_TYPE_IP6,
+ VDBG (0,
+ "session %u [0x%llx]: already connected to %U:%d proto %s,"
+ " state (%s)",
+ session->session_index, session->vpp_handle,
+ vcl_format_ip46_address, &session->transport.rmt_ip,
+ session->transport.is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
clib_net_to_host_u16 (session->transport.rmt_port),
- vppcom_proto_str (session->session_type), session->session_state,
- vppcom_session_state_str (session->session_state));
+ vppcom_proto_str (session->session_type),
+ vcl_session_state_str (session->session_state));
return VPPCOM_OK;
}
@@ -1879,13 +1911,10 @@ vppcom_session_connect (uint32_t session_handle, vppcom_endpt_t * server_ep)
session->parent_handle = VCL_INVALID_SESSION_HANDLE;
session->flags |= VCL_SESSION_F_CONNECTED;
- VDBG (0, "session handle %u (%s): connecting to peer %s %U "
- "port %d proto %s", session_handle,
- vppcom_session_state_str (session->session_state),
- session->transport.is_ip4 ? "IPv4" : "IPv6",
- format_ip46_address,
- &session->transport.rmt_ip, session->transport.is_ip4 ?
- IP46_TYPE_IP4 : IP46_TYPE_IP6,
+ VDBG (0, "session %u: connecting to peer %U:%d proto %s",
+ session->session_index, vcl_format_ip46_address,
+ &session->transport.rmt_ip,
+ session->transport.is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
clib_net_to_host_u16 (session->transport.rmt_port),
vppcom_proto_str (session->session_type));
@@ -1940,12 +1969,13 @@ vppcom_session_stream_connect (uint32_t session_handle,
if (PREDICT_FALSE (vcl_session_is_ready (session)))
{
- VDBG (0, "session handle %u [0x%llx]: session already "
+ VDBG (0,
+ "session handle %u [0x%llx]: session already "
"connected to session %u [0x%llx] proto %s, state 0x%x (%s)",
- session_handle, session->vpp_handle,
- parent_session_handle, parent_session->vpp_handle,
+ session_handle, session->vpp_handle, parent_session_handle,
+ parent_session->vpp_handle,
vppcom_proto_str (session->session_type), session->session_state,
- vppcom_session_state_str (session->session_state));
+ vcl_session_state_str (session->session_state));
return VPPCOM_OK;
}
@@ -2000,7 +2030,7 @@ vppcom_session_read_internal (uint32_t session_handle, void *buf, int n,
{
VDBG (0, "session %u[0x%llx] is not open! state 0x%x (%s)",
s->session_index, s->vpp_handle, s->session_state,
- vppcom_session_state_str (s->session_state));
+ vcl_session_state_str (s->session_state));
return vcl_session_closed_error (s);
}
@@ -2024,13 +2054,13 @@ vppcom_session_read_internal (uint32_t session_handle, void *buf, int n,
if (svm_fifo_is_empty_cons (rx_fifo))
{
+ if (is_ct)
+ svm_fifo_unset_event (s->rx_fifo);
+ svm_fifo_unset_event (rx_fifo);
if (is_nonblocking)
{
if (vcl_session_is_closing (s))
return vcl_session_closing_error (s);
- if (is_ct)
- svm_fifo_unset_event (s->rx_fifo);
- svm_fifo_unset_event (rx_fifo);
return VPPCOM_EWOULDBLOCK;
}
while (svm_fifo_is_empty_cons (rx_fifo))
@@ -2057,7 +2087,16 @@ read_again:
ASSERT (rv >= 0);
if (peek)
- return rv;
+ {
+ /* Request new notifications if more data enqueued */
+ if (rv < n || rv == svm_fifo_max_dequeue_cons (rx_fifo))
+ {
+ if (is_ct)
+ svm_fifo_unset_event (s->rx_fifo);
+ svm_fifo_unset_event (rx_fifo);
+ }
+ return rv;
+ }
n_read += rv;
@@ -2136,11 +2175,13 @@ vppcom_session_read_segments (uint32_t session_handle,
if (svm_fifo_is_empty_cons (rx_fifo))
{
+ if (is_ct)
+ svm_fifo_unset_event (s->rx_fifo);
+ svm_fifo_unset_event (rx_fifo);
if (is_nonblocking)
{
- if (is_ct)
- svm_fifo_unset_event (s->rx_fifo);
- svm_fifo_unset_event (rx_fifo);
+ if (vcl_session_is_closing (s))
+ return vcl_session_closing_error (s);
return VPPCOM_EWOULDBLOCK;
}
while (svm_fifo_is_empty_cons (rx_fifo))
@@ -2158,7 +2199,7 @@ vppcom_session_read_segments (uint32_t session_handle,
}
n_read = svm_fifo_segments (rx_fifo, s->rx_bytes_pending,
- (svm_fifo_seg_t *) ds, n_segments, max_bytes);
+ (svm_fifo_seg_t *) ds, &n_segments, max_bytes);
if (n_read < 0)
return VPPCOM_EAGAIN;
@@ -2196,7 +2237,7 @@ vppcom_session_free_segments (uint32_t session_handle, uint32_t n_bytes)
is_ct = vcl_session_is_ct (s);
svm_fifo_dequeue_drop (is_ct ? s->ct_rx_fifo : s->rx_fifo, n_bytes);
- ASSERT (s->rx_bytes_pending < n_bytes);
+ ASSERT (s->rx_bytes_pending >= n_bytes);
s->rx_bytes_pending -= n_bytes;
}
@@ -2211,7 +2252,7 @@ vcl_fifo_is_writeable (svm_fifo_t * f, u32 len, u8 is_dgram)
}
always_inline int
-vppcom_session_write_inline (vcl_worker_t * wrk, vcl_session_t * s, void *buf,
+vppcom_session_write_inline (vcl_worker_t *wrk, vcl_session_t *s, void *buf,
size_t n, u8 is_flush, u8 is_dgram)
{
int n_write, is_nonblocking;
@@ -2238,7 +2279,7 @@ vppcom_session_write_inline (vcl_worker_t * wrk, vcl_session_t * s, void *buf,
{
VDBG (1, "session %u [0x%llx]: is not open! state 0x%x (%s)",
s->session_index, s->vpp_handle, s->session_state,
- vppcom_session_state_str (s->session_state));
+ vcl_session_state_str (s->session_state));
return vcl_session_closed_error (s);;
}
@@ -2246,7 +2287,7 @@ vppcom_session_write_inline (vcl_worker_t * wrk, vcl_session_t * s, void *buf,
{
VDBG (1, "session %u [0x%llx]: is shutdown! state 0x%x (%s)",
s->session_index, s->vpp_handle, s->session_state,
- vppcom_session_state_str (s->session_state));
+ vcl_session_state_str (s->session_state));
return VPPCOM_EPIPE;
}
@@ -2277,12 +2318,17 @@ vppcom_session_write_inline (vcl_worker_t * wrk, vcl_session_t * s, void *buf,
et = SESSION_IO_EVT_TX_FLUSH;
if (is_dgram)
- n_write = app_send_dgram_raw (tx_fifo, &s->transport,
- s->vpp_evt_q, buf, n, et,
- 0 /* do_evt */ , SVM_Q_WAIT);
+ {
+ et = vcl_session_dgram_tx_evt (s, et);
+ n_write =
+ app_send_dgram_raw_gso (tx_fifo, &s->transport, s->vpp_evt_q, buf, n,
+ s->gso_size, et, 0 /* do_evt */, SVM_Q_WAIT);
+ }
else
- n_write = app_send_stream_raw (tx_fifo, s->vpp_evt_q, buf, n, et,
- 0 /* do_evt */ , SVM_Q_WAIT);
+ {
+ n_write = app_send_stream_raw (tx_fifo, s->vpp_evt_q, buf, n, et,
+ 0 /* do_evt */, SVM_Q_WAIT);
+ }
if (svm_fifo_set_event (s->tx_fifo))
app_send_io_evt_to_vpp (
@@ -2308,8 +2354,8 @@ vppcom_session_write (uint32_t session_handle, void *buf, size_t n)
if (PREDICT_FALSE (!s))
return VPPCOM_EBADFD;
- return vppcom_session_write_inline (wrk, s, buf, n,
- 0 /* is_flush */ , s->is_dgram ? 1 : 0);
+ return vppcom_session_write_inline (wrk, s, buf, n, 0 /* is_flush */,
+ s->is_dgram ? 1 : 0);
}
int
@@ -2322,8 +2368,8 @@ vppcom_session_write_msg (uint32_t session_handle, void *buf, size_t n)
if (PREDICT_FALSE (!s))
return VPPCOM_EBADFD;
- return vppcom_session_write_inline (wrk, s, buf, n,
- 1 /* is_flush */ , s->is_dgram ? 1 : 0);
+ return vppcom_session_write_inline (wrk, s, buf, n, 1 /* is_flush */,
+ s->is_dgram ? 1 : 0);
}
#define vcl_fifo_rx_evt_valid_or_break(_s) \
@@ -2405,17 +2451,33 @@ vcl_select_handle_mq_event (vcl_worker_t * wrk, session_event_t * e,
sid = e->session_index;
if (sid == VCL_INVALID_SESSION_INDEX)
break;
- if (sid < n_bits && write_map)
- {
- clib_bitmap_set_no_check ((uword *) write_map, sid, 1);
- *bits_set += 1;
- }
+ if (!(sid < n_bits && write_map))
+ break;
+ clib_bitmap_set_no_check ((uword *) write_map, sid, 1);
+ *bits_set += 1;
+ s = vcl_session_get (wrk, sid);
+ /* We didn't have a fifo when the event was added */
+ vcl_session_add_want_deq_ntf (s, SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL);
break;
case SESSION_CTRL_EVT_DISCONNECTED:
- disconnected_msg = (session_disconnected_msg_t *) e->data;
- s = vcl_session_disconnected_handler (wrk, disconnected_msg);
- if (!s)
- break;
+ if (!e->postponed)
+ {
+ disconnected_msg = (session_disconnected_msg_t *) e->data;
+ s = vcl_session_disconnected_handler (wrk, disconnected_msg);
+ if (!s)
+ break;
+ }
+ else
+ {
+ s = vcl_session_get (wrk, e->session_index);
+ s->flags &= ~VCL_SESSION_F_PENDING_DISCONNECT;
+ }
+ if (vcl_session_is_closed (s))
+ {
+ if (s && (s->flags & VCL_SESSION_F_PENDING_FREE))
+ vcl_session_free (wrk, s);
+ break;
+ }
sid = s->session_index;
if (sid < n_bits && except_map)
{
@@ -2424,7 +2486,24 @@ vcl_select_handle_mq_event (vcl_worker_t * wrk, session_event_t * e,
}
break;
case SESSION_CTRL_EVT_RESET:
- sid = vcl_session_reset_handler (wrk, (session_reset_msg_t *) e->data);
+ if (!e->postponed)
+ {
+ sid =
+ vcl_session_reset_handler (wrk, (session_reset_msg_t *) e->data);
+ s = vcl_session_get (wrk, sid);
+ }
+ else
+ {
+ sid = e->session_index;
+ s = vcl_session_get (wrk, sid);
+ s->flags &= ~VCL_SESSION_F_PENDING_DISCONNECT;
+ }
+ if (vcl_session_is_closed (s))
+ {
+ if (s && (s->flags & VCL_SESSION_F_PENDING_FREE))
+ vcl_session_free (wrk, s);
+ break;
+ }
if (sid < n_bits && except_map)
{
clib_bitmap_set_no_check ((uword *) except_map, sid, 1);
@@ -2542,11 +2621,23 @@ vppcom_select_eventfd (vcl_worker_t * wrk, int n_bits,
int n_mq_evts, i;
u64 buf;
+ if (PREDICT_FALSE (wrk->api_client_handle == ~0))
+ {
+ vcl_api_retry_attach (wrk);
+ return 0;
+ }
+
vec_validate (wrk->mq_events, pool_elts (wrk->mq_evt_conns));
n_mq_evts = epoll_wait (wrk->mqs_epfd, wrk->mq_events,
vec_len (wrk->mq_events), time_to_wait);
for (i = 0; i < n_mq_evts; i++)
{
+ if (PREDICT_FALSE (wrk->mq_events[i].data.u32 == ~0))
+ {
+ vcl_api_handle_disconnect (wrk);
+ continue;
+ }
+
mqc = vcl_mq_evt_conn_get (wrk, wrk->mq_events[i].data.u32);
n_read = read (mqc->mq_fd, &buf, sizeof (buf));
vcl_select_handle_mq (wrk, mqc->mq, n_bits, read_map, write_map,
@@ -2609,8 +2700,7 @@ vppcom_select (int n_bits, vcl_si_set * read_map, vcl_si_set * write_map,
}
else
{
- svm_fifo_t *txf = vcl_session_is_ct (s) ? s->ct_tx_fifo : s->tx_fifo;
- svm_fifo_add_want_deq_ntf (txf, SVM_FIFO_WANT_DEQ_NOTIF);
+ vcl_session_add_want_deq_ntf (s, SVM_FIFO_WANT_DEQ_NOTIF);
}
}
@@ -2660,7 +2750,7 @@ vep_verify_epoll_chain (vcl_worker_t * wrk, u32 vep_handle)
u32 sh = vep_handle;
vcl_session_t *s;
- if (VPPCOM_DEBUG <= 2)
+ if (VPPCOM_DEBUG <= 3)
return;
s = vcl_session_get_w_handle (wrk, vep_handle);
@@ -2737,7 +2827,7 @@ vppcom_epoll_create (void)
vep_session->vep.vep_sh = ~0;
vep_session->vep.next_sh = ~0;
vep_session->vep.prev_sh = ~0;
- vep_session->vpp_handle = ~0;
+ vep_session->vpp_handle = SESSION_INVALID_HANDLE;
vcl_evt (VCL_EVT_EPOLL_CREATE, vep_session, vep_session->session_index);
VDBG (0, "Created vep_idx %u", vep_session->session_index);
@@ -2745,6 +2835,25 @@ vppcom_epoll_create (void)
return vcl_session_handle (vep_session);
}
+static void
+vcl_epoll_ctl_add_unhandled_event (vcl_worker_t *wrk, vcl_session_t *s,
+ u32 is_epollet, session_evt_type_t evt)
+{
+ if (!is_epollet)
+ {
+ if (s->vep.lt_next == VCL_INVALID_SESSION_INDEX)
+ vcl_epoll_lt_add (wrk, s);
+ return;
+ }
+
+ session_event_t e = { 0 };
+ e.session_index = s->session_index;
+ e.event_type = evt;
+ if (evt == SESSION_IO_EVT_RX)
+ s->flags &= ~VCL_SESSION_F_HAS_RX_EVT;
+ vec_add1 (wrk->unhandled_evts_vector, e);
+}
+
int
vppcom_epoll_ctl (uint32_t vep_handle, int op, uint32_t session_handle,
struct epoll_event *event)
@@ -2753,7 +2862,6 @@ vppcom_epoll_ctl (uint32_t vep_handle, int op, uint32_t session_handle,
int rv = VPPCOM_OK, add_evt = 0;
vcl_session_t *vep_session;
vcl_session_t *s;
- svm_fifo_t *txf;
if (vep_handle == session_handle)
{
@@ -2822,31 +2930,33 @@ vppcom_epoll_ctl (uint32_t vep_handle, int op, uint32_t session_handle,
s->vep.et_mask = VEP_DEFAULT_ET_MASK;
s->vep.lt_next = VCL_INVALID_SESSION_INDEX;
s->vep.ev = *event;
+ s->vep.ev.events |= EPOLLHUP | EPOLLERR;
s->flags &= ~VCL_SESSION_F_IS_VEP;
s->flags |= VCL_SESSION_F_IS_VEP_SESSION;
vep_session->vep.next_sh = session_handle;
- txf = vcl_session_is_ct (s) ? s->ct_tx_fifo : s->tx_fifo;
- if (txf && (event->events & EPOLLOUT))
- svm_fifo_add_want_deq_ntf (txf, SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL);
-
- /* Generate EPOLLOUT if tx fifo not full */
- if ((event->events & EPOLLOUT) && (vcl_session_write_ready (s) > 0))
+ if ((event->events & EPOLLOUT))
{
- session_event_t e = { 0 };
- e.event_type = SESSION_IO_EVT_TX;
- e.session_index = s->session_index;
- vec_add1 (wrk->unhandled_evts_vector, e);
- add_evt = 1;
+ int write_ready = vcl_session_write_ready (s);
+
+ vcl_session_add_want_deq_ntf (s, SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL);
+ if (write_ready > 0)
+ {
+ /* Generate EPOLLOUT if tx fifo not full */
+ vcl_epoll_ctl_add_unhandled_event (
+ wrk, s, event->events & EPOLLET, SESSION_IO_EVT_TX);
+ add_evt = 1;
+ }
+ else
+ {
+ vcl_session_add_want_deq_ntf (s, SVM_FIFO_WANT_DEQ_NOTIF);
+ }
}
/* Generate EPOLLIN if rx fifo has data */
if ((event->events & EPOLLIN) && (vcl_session_read_ready (s) > 0))
{
- session_event_t e = { 0 };
- e.event_type = SESSION_IO_EVT_RX;
- e.session_index = s->session_index;
- vec_add1 (wrk->unhandled_evts_vector, e);
- s->flags &= ~VCL_SESSION_F_HAS_RX_EVT;
+ vcl_epoll_ctl_add_unhandled_event (wrk, s, event->events & EPOLLET,
+ SESSION_IO_EVT_RX);
add_evt = 1;
}
if (!add_evt && vcl_session_is_closing (s))
@@ -2886,35 +2996,34 @@ vppcom_epoll_ctl (uint32_t vep_handle, int op, uint32_t session_handle,
goto done;
}
- /* Generate EPOLLOUT if session write ready nd event was not on */
- if ((event->events & EPOLLOUT) && !(s->vep.ev.events & EPOLLOUT) &&
- (vcl_session_write_ready (s) > 0))
+ /* Generate EPOLLOUT if session write ready and event was not on */
+ if ((event->events & EPOLLOUT) && !(s->vep.ev.events & EPOLLOUT))
{
- session_event_t e = { 0 };
- e.event_type = SESSION_IO_EVT_TX;
- e.session_index = s->session_index;
- vec_add1 (wrk->unhandled_evts_vector, e);
+ /* Fifo size load acq synchronized with update store rel */
+ int write_ready = vcl_session_write_ready (s);
+
+ vcl_session_add_want_deq_ntf (s, SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL);
+ if (write_ready > 0)
+ vcl_epoll_ctl_add_unhandled_event (wrk, s, event->events & EPOLLET,
+ SESSION_IO_EVT_TX);
+ else
+ /* Request deq ntf in case dequeue happened while updating flag */
+ vcl_session_add_want_deq_ntf (s, SVM_FIFO_WANT_DEQ_NOTIF);
}
+ else if (!(event->events & EPOLLOUT))
+ vcl_session_del_want_deq_ntf (s, SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL);
+
/* Generate EPOLLIN if session read ready and event was not on */
if ((event->events & EPOLLIN) && !(s->vep.ev.events & EPOLLIN) &&
(vcl_session_read_ready (s) > 0))
{
- session_event_t e = { 0 };
- e.event_type = SESSION_IO_EVT_RX;
- e.session_index = s->session_index;
- vec_add1 (wrk->unhandled_evts_vector, e);
- s->flags &= ~VCL_SESSION_F_HAS_RX_EVT;
+ vcl_epoll_ctl_add_unhandled_event (wrk, s, event->events & EPOLLET,
+ SESSION_IO_EVT_RX);
}
s->vep.et_mask = VEP_DEFAULT_ET_MASK;
s->vep.ev = *event;
- txf = vcl_session_is_ct (s) ? s->ct_tx_fifo : s->tx_fifo;
- if (txf)
- {
- if (event->events & EPOLLOUT)
- svm_fifo_add_want_deq_ntf (txf, SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL);
- else
- svm_fifo_del_want_deq_ntf (txf, SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL);
- }
+ s->vep.ev.events |= EPOLLHUP | EPOLLERR;
+
VDBG (1, "EPOLL_CTL_MOD: vep_sh %u, sh %u, events 0x%x, data 0x%llx!",
vep_handle, session_handle, event->events, event->data.u64);
break;
@@ -2974,11 +3083,7 @@ vppcom_epoll_ctl (uint32_t vep_handle, int op, uint32_t session_handle,
s->flags &= ~VCL_SESSION_F_IS_VEP_SESSION;
if (vcl_session_is_open (s))
- {
- txf = vcl_session_is_ct (s) ? s->ct_tx_fifo : s->tx_fifo;
- if (txf)
- svm_fifo_del_want_deq_ntf (txf, SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL);
- }
+ vcl_session_del_want_deq_ntf (s, SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL);
VDBG (1, "EPOLL_CTL_DEL: vep_idx %u, sh %u!", vep_handle,
session_handle);
@@ -2996,6 +3101,14 @@ done:
return rv;
}
+always_inline u8
+vcl_ep_session_needs_evt (vcl_session_t *s, u32 evt)
+{
+ /* No event if not epolled / events reset on hup or level-trigger on */
+ return ((s->vep.ev.events & evt) &&
+ s->vep.lt_next == VCL_INVALID_SESSION_INDEX);
+}
+
static inline void
vcl_epoll_wait_handle_mq_event (vcl_worker_t * wrk, session_event_t * e,
struct epoll_event *events, u32 * num_ev)
@@ -3015,10 +3128,10 @@ vcl_epoll_wait_handle_mq_event (vcl_worker_t * wrk, session_event_t * e,
if (vcl_session_is_closed (s))
break;
vcl_fifo_rx_evt_valid_or_break (s);
- session_events = s->vep.ev.events;
- if (!(EPOLLIN & s->vep.ev.events)
- || (s->flags & VCL_SESSION_F_HAS_RX_EVT))
+ if (!vcl_ep_session_needs_evt (s, EPOLLIN) ||
+ (s->flags & VCL_SESSION_F_HAS_RX_EVT))
break;
+ session_events = s->vep.ev.events;
add_event = 1;
events[*num_ev].events = EPOLLIN;
session_evt_data = s->vep.ev.data.u64;
@@ -3027,28 +3140,26 @@ vcl_epoll_wait_handle_mq_event (vcl_worker_t * wrk, session_event_t * e,
case SESSION_IO_EVT_TX:
sid = e->session_index;
s = vcl_session_get (wrk, sid);
- if (vcl_session_is_closed (s))
+ if (!s || !vcl_session_is_open (s))
break;
- session_events = s->vep.ev.events;
- if (!(EPOLLOUT & session_events))
+ svm_fifo_reset_has_deq_ntf (vcl_session_is_ct (s) ? s->ct_tx_fifo :
+ s->tx_fifo);
+ if (!vcl_ep_session_needs_evt (s, EPOLLOUT))
break;
+ session_events = s->vep.ev.events;
add_event = 1;
events[*num_ev].events = EPOLLOUT;
session_evt_data = s->vep.ev.data.u64;
- svm_fifo_reset_has_deq_ntf (vcl_session_is_ct (s) ?
- s->ct_tx_fifo : s->tx_fifo);
break;
case SESSION_CTRL_EVT_ACCEPTED:
if (!e->postponed)
s = vcl_session_accepted (wrk, (session_accepted_msg_t *) e->data);
else
s = vcl_session_get (wrk, e->session_index);
- if (!s)
+ if (!s || !vcl_ep_session_needs_evt (s, EPOLLIN))
break;
- session_events = s->vep.ev.events;
sid = s->session_index;
- if (!(EPOLLIN & session_events))
- break;
+ session_events = s->vep.ev.events;
add_event = 1;
events[*num_ev].events = EPOLLIN;
session_evt_data = s->vep.ev.data.u64;
@@ -3062,17 +3173,20 @@ vcl_epoll_wait_handle_mq_event (vcl_worker_t * wrk, session_event_t * e,
else
sid = e->session_index;
s = vcl_session_get (wrk, sid);
- if (vcl_session_is_closed (s))
+ if (vcl_session_is_closed (s) || !vcl_ep_session_needs_evt (s, EPOLLOUT))
break;
+ /* We didn't have a fifo when the event was added */
+ vcl_session_add_want_deq_ntf (s, SVM_FIFO_WANT_DEQ_NOTIF_IF_FULL);
+ add_event = 1;
session_events = s->vep.ev.events;
/* Generate EPOLLOUT because there's no connected event */
- if (!(EPOLLOUT & session_events))
- break;
- add_event = 1;
events[*num_ev].events = EPOLLOUT;
session_evt_data = s->vep.ev.data.u64;
if (s->session_state == VCL_STATE_DETACHED)
- events[*num_ev].events |= EPOLLHUP;
+ {
+ events[*num_ev].events |= EPOLLHUP;
+ s->vep.ev.events = 0;
+ }
break;
case SESSION_CTRL_EVT_DISCONNECTED:
if (!e->postponed)
@@ -3083,29 +3197,70 @@ vcl_epoll_wait_handle_mq_event (vcl_worker_t * wrk, session_event_t * e,
else
{
s = vcl_session_get (wrk, e->session_index);
+ s->flags &= ~VCL_SESSION_F_PENDING_DISCONNECT;
+ }
+ if (vcl_session_is_closed (s) || !vcl_ep_session_needs_evt (s, EPOLLHUP))
+ {
+ if (s && (s->flags & VCL_SESSION_F_PENDING_FREE))
+ vcl_session_free (wrk, s);
+ break;
}
- if (vcl_session_is_closed (s) ||
- !(s->flags & VCL_SESSION_F_IS_VEP_SESSION))
- break;
sid = s->session_index;
session_events = s->vep.ev.events;
add_event = 1;
- events[*num_ev].events = EPOLLHUP | EPOLLRDHUP;
+ if (EPOLLRDHUP & session_events)
+ {
+ /* If app can distinguish between RDHUP and HUP,
+ * we make finer control */
+ events[*num_ev].events = EPOLLRDHUP;
+ if (s->flags & VCL_SESSION_F_WR_SHUTDOWN)
+ {
+ events[*num_ev].events |= EPOLLHUP;
+ }
+ }
+ else
+ {
+ events[*num_ev].events = EPOLLHUP;
+ }
session_evt_data = s->vep.ev.data.u64;
+ s->vep.ev.events = 0;
+ break;
+ case SESSION_CTRL_EVT_BOUND:
+ vcl_session_bound_handler (wrk, (session_bound_msg_t *) e->data);
break;
case SESSION_CTRL_EVT_RESET:
if (!e->postponed)
- sid = vcl_session_reset_handler (wrk, (session_reset_msg_t *) e->data);
+ {
+ sid =
+ vcl_session_reset_handler (wrk, (session_reset_msg_t *) e->data);
+ s = vcl_session_get (wrk, sid);
+ }
else
- sid = e->session_index;
- s = vcl_session_get (wrk, sid);
- if (vcl_session_is_closed (s) ||
- !(s->flags & VCL_SESSION_F_IS_VEP_SESSION))
- break;
+ {
+ sid = e->session_index;
+ s = vcl_session_get (wrk, sid);
+ s->flags &= ~VCL_SESSION_F_PENDING_DISCONNECT;
+ }
+ if (vcl_session_is_closed (s) || !vcl_ep_session_needs_evt (s, EPOLLHUP))
+ {
+ if (s && (s->flags & VCL_SESSION_F_PENDING_FREE))
+ vcl_session_free (wrk, s);
+ break;
+ }
session_events = s->vep.ev.events;
add_event = 1;
- events[*num_ev].events = EPOLLHUP | EPOLLRDHUP;
+ events[*num_ev].events = EPOLLERR | EPOLLHUP;
+ if ((EPOLLRDHUP & session_events) &&
+ (s->flags & VCL_SESSION_F_RD_SHUTDOWN))
+ {
+ events[*num_ev].events |= EPOLLRDHUP;
+ }
+ if ((EPOLLIN & session_events) && (s->flags & VCL_SESSION_F_RD_SHUTDOWN))
+ {
+ events[*num_ev].events |= EPOLLIN;
+ }
session_evt_data = s->vep.ev.data.u64;
+ s->vep.ev.events = 0;
break;
case SESSION_CTRL_EVT_UNLISTEN_REPLY:
vcl_session_unlisten_reply_handler (wrk, e->data);
@@ -3138,11 +3293,13 @@ vcl_epoll_wait_handle_mq_event (vcl_worker_t * wrk, session_event_t * e,
if (add_event)
{
+ ASSERT (s->flags & VCL_SESSION_F_IS_VEP_SESSION);
events[*num_ev].data.u64 = session_evt_data;
if (EPOLLONESHOT & session_events)
{
s = vcl_session_get (wrk, sid);
- s->vep.ev.events = 0;
+ if (!(events[*num_ev].events & EPOLLHUP))
+ s->vep.ev.events = EPOLLHUP | EPOLLERR;
}
else if (!(EPOLLET & session_events))
{
@@ -3231,6 +3388,12 @@ vppcom_epoll_wait_eventfd (vcl_worker_t *wrk, struct epoll_event *events,
double end = -1;
u64 buf;
+ if (PREDICT_FALSE (wrk->api_client_handle == ~0))
+ {
+ vcl_api_retry_attach (wrk);
+ return n_evts;
+ }
+
vec_validate (wrk->mq_events, pool_elts (wrk->mq_evt_conns));
if (!n_evts)
{
@@ -3250,6 +3413,13 @@ vppcom_epoll_wait_eventfd (vcl_worker_t *wrk, struct epoll_event *events,
for (i = 0; i < n_mq_evts; i++)
{
+ if (PREDICT_FALSE (wrk->mq_events[i].data.u32 == ~0))
+ {
+ /* api socket was closed */
+ vcl_api_handle_disconnect (wrk);
+ continue;
+ }
+
mqc = vcl_mq_evt_conn_get (wrk, wrk->mq_events[i].data.u32);
n_read = read (mqc->mq_fd, &buf, sizeof (buf));
vcl_epoll_wait_handle_mq (wrk, mqc->mq, events, maxevents, 0,
@@ -3268,7 +3438,7 @@ static void
vcl_epoll_wait_handle_lt (vcl_worker_t *wrk, struct epoll_event *events,
int maxevents, u32 *n_evts)
{
- u32 add_event = 0, next;
+ u32 add_event = 0, evt_flags = 0, next, *to_remove = 0, *si;
vcl_session_t *s;
u64 evt_data;
int rv;
@@ -3283,31 +3453,40 @@ vcl_epoll_wait_handle_lt (vcl_worker_t *wrk, struct epoll_event *events,
s = vcl_session_get (wrk, next);
next = s->vep.lt_next;
+ if (s->vep.ev.events == 0)
+ {
+ vec_add1 (to_remove, s->session_index);
+ continue;
+ }
if ((s->vep.ev.events & EPOLLIN) && (rv = vcl_session_read_ready (s)))
{
add_event = 1;
- events[*n_evts].events |= rv > 0 ? EPOLLIN : EPOLLHUP | EPOLLRDHUP;
+ evt_flags |= rv > 0 ? EPOLLIN : EPOLLHUP | EPOLLRDHUP;
evt_data = s->vep.ev.data.u64;
}
if ((s->vep.ev.events & EPOLLOUT) && (rv = vcl_session_write_ready (s)))
{
add_event = 1;
- events[*n_evts].events |= rv > 0 ? EPOLLOUT : EPOLLHUP | EPOLLRDHUP;
+ evt_flags |= rv > 0 ? EPOLLOUT : EPOLLHUP | EPOLLRDHUP;
evt_data = s->vep.ev.data.u64;
}
if (!add_event && s->session_state > VCL_STATE_READY)
{
add_event = 1;
- events[*n_evts].events |= EPOLLHUP | EPOLLRDHUP;
+ evt_flags |= EPOLLHUP | EPOLLRDHUP;
evt_data = s->vep.ev.data.u64;
}
if (add_event)
{
+ events[*n_evts].events = evt_flags;
events[*n_evts].data.u64 = evt_data;
- *n_evts += 1;
- add_event = 0;
if (EPOLLONESHOT & s->vep.ev.events)
+ s->vep.ev.events = EPOLLHUP | EPOLLERR;
+ if (evt_flags & EPOLLHUP)
s->vep.ev.events = 0;
+ *n_evts += 1;
+ add_event = 0;
+ evt_flags = 0;
if (*n_evts == maxevents)
{
wrk->ep_lt_current = next;
@@ -3316,12 +3495,17 @@ vcl_epoll_wait_handle_lt (vcl_worker_t *wrk, struct epoll_event *events,
}
else
{
- vcl_epoll_lt_del (wrk, s);
- if (wrk->ep_lt_current == VCL_INVALID_SESSION_INDEX)
- break;
+ vec_add1 (to_remove, s->session_index);
}
}
while (next != wrk->ep_lt_current);
+
+ vec_foreach (si, to_remove)
+ {
+ s = vcl_session_get (wrk, *si);
+ vcl_epoll_lt_del (wrk, s);
+ }
+ vec_free (to_remove);
}
int
@@ -3363,6 +3547,10 @@ vppcom_epoll_wait (uint32_t vep_handle, struct epoll_event *events,
}
vec_reset_length (wrk->unhandled_evts_vector);
}
+
+ if (PREDICT_FALSE (wrk->ep_lt_current != VCL_INVALID_SESSION_INDEX))
+ vcl_epoll_wait_handle_lt (wrk, events, maxevents, &n_evts);
+
/* Request to only drain unhandled */
if ((int) wait_for_time == -2)
return n_evts;
@@ -3375,9 +3563,6 @@ vppcom_epoll_wait (uint32_t vep_handle, struct epoll_event *events,
n_evts = vppcom_epoll_wait_condvar (wrk, events, maxevents, n_evts,
wait_for_time);
- if (PREDICT_FALSE (wrk->ep_lt_current != VCL_INVALID_SESSION_INDEX))
- vcl_epoll_wait_handle_lt (wrk, events, maxevents, &n_evts);
-
return n_evts;
}
@@ -3409,7 +3594,19 @@ vppcom_session_attr (uint32_t session_handle, uint32_t op,
VDBG (2, "VPPCOM_ATTR_GET_NWRITE: sh %u, nwrite = %d", session_handle,
rv);
break;
-
+ case VPPCOM_ATTR_GET_NWRITEQ:
+ if (PREDICT_FALSE (!buffer || !buflen || *buflen != sizeof (int)))
+ {
+ rv = VPPCOM_EINVAL;
+ break;
+ }
+ if (!session->tx_fifo || session->session_state == VCL_STATE_DETACHED)
+ {
+ rv = VPPCOM_EINVAL;
+ break;
+ }
+ *(int *) buffer = svm_fifo_max_dequeue (session->tx_fifo);
+ break;
case VPPCOM_ATTR_GET_FLAGS:
if (PREDICT_TRUE (buffer && buflen && (*buflen >= sizeof (*flags))))
{
@@ -3455,9 +3652,11 @@ vppcom_session_attr (uint32_t session_handle, uint32_t op,
clib_memcpy_fast (ep->ip, &session->transport.rmt_ip.ip6,
sizeof (ip6_address_t));
*buflen = sizeof (*ep);
- VDBG (1, "VPPCOM_ATTR_GET_PEER_ADDR: sh %u, is_ip4 = %u, "
- "addr = %U, port %u", session_handle, ep->is_ip4,
- format_ip46_address, &session->transport.rmt_ip,
+ VDBG (1,
+ "VPPCOM_ATTR_GET_PEER_ADDR: sh %u, is_ip4 = %u, "
+ "addr = %U, port %u",
+ session_handle, ep->is_ip4, vcl_format_ip46_address,
+ &session->transport.rmt_ip,
ep->is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
clib_net_to_host_u16 (ep->port));
}
@@ -3478,8 +3677,10 @@ vppcom_session_attr (uint32_t session_handle, uint32_t op,
clib_memcpy_fast (ep->ip, &session->transport.lcl_ip.ip6,
sizeof (ip6_address_t));
*buflen = sizeof (*ep);
- VDBG (1, "VPPCOM_ATTR_GET_LCL_ADDR: sh %u, is_ip4 = %u, addr = %U"
- " port %d", session_handle, ep->is_ip4, format_ip46_address,
+ VDBG (1,
+ "VPPCOM_ATTR_GET_LCL_ADDR: sh %u, is_ip4 = %u, addr = %U"
+ " port %d",
+ session_handle, ep->is_ip4, vcl_format_ip46_address,
&session->transport.lcl_ip,
ep->is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
clib_net_to_host_u16 (ep->port));
@@ -3488,6 +3689,33 @@ vppcom_session_attr (uint32_t session_handle, uint32_t op,
rv = VPPCOM_EINVAL;
break;
+ case VPPCOM_ATTR_GET_ORIGINAL_DST:
+ if (!session->transport.is_ip4)
+ {
+ /* now original dst only support ipv4*/
+ rv = VPPCOM_EAFNOSUPPORT;
+ break;
+ }
+ if (PREDICT_TRUE (buffer && buflen && (*buflen >= sizeof (*ep)) &&
+ ep->ip))
+ {
+ ep->is_ip4 = session->transport.is_ip4;
+ ep->port = session->original_dst_port;
+ clib_memcpy_fast (ep->ip, &session->original_dst_ip4,
+ sizeof (ip4_address_t));
+ *buflen = sizeof (*ep);
+ VDBG (1,
+ "VPPCOM_ATTR_GET_ORIGINAL_DST: sh %u, is_ip4 = %u, addr = %U"
+ " port %d",
+ session_handle, ep->is_ip4, vcl_format_ip4_address,
+ (ip4_address_t *) (&session->original_dst_ip4),
+ ep->is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
+ clib_net_to_host_u16 (ep->port));
+ }
+ else
+ rv = VPPCOM_EINVAL;
+ break;
+
case VPPCOM_ATTR_SET_LCL_ADDR:
if (PREDICT_TRUE (buffer && buflen &&
(*buflen >= sizeof (*ep)) && ep->ip))
@@ -3496,8 +3724,10 @@ vppcom_session_attr (uint32_t session_handle, uint32_t op,
session->transport.lcl_port = ep->port;
vcl_ip_copy_from_ep (&session->transport.lcl_ip, ep);
*buflen = sizeof (*ep);
- VDBG (1, "VPPCOM_ATTR_SET_LCL_ADDR: sh %u, is_ip4 = %u, addr = %U"
- " port %d", session_handle, ep->is_ip4, format_ip46_address,
+ VDBG (1,
+ "VPPCOM_ATTR_SET_LCL_ADDR: sh %u, is_ip4 = %u, addr = %U"
+ " port %d",
+ session_handle, ep->is_ip4, vcl_format_ip46_address,
&session->transport.lcl_ip,
ep->is_ip4 ? IP46_TYPE_IP4 : IP46_TYPE_IP6,
clib_net_to_host_u16 (ep->port));
@@ -3584,6 +3814,18 @@ vppcom_session_attr (uint32_t session_handle, uint32_t op,
rv = VPPCOM_EINVAL;
break;
+ case VPPCOM_ATTR_SET_DSCP:
+ if (buffer && buflen && (*buflen >= sizeof (u8)))
+ {
+ session->dscp = *(u8 *) buffer;
+
+ VDBG (2, "VPPCOM_ATTR_SET_DSCP: %u (0x%x), buflen %d,",
+ *(u8 *) buffer, *(u8 *) buffer, *buflen);
+ }
+ else
+ rv = VPPCOM_EINVAL;
+ break;
+
case VPPCOM_ATTR_SET_TX_FIFO_LEN:
if (buffer && buflen && (*buflen == sizeof (u32)))
{
@@ -3907,7 +4149,6 @@ vppcom_session_attr (uint32_t session_handle, uint32_t op,
VDBG (2, "VPPCOM_ATTR_GET_TCP_USER_MSS: %d, buflen %d", *(int *) buffer,
*buflen);
break;
-
case VPPCOM_ATTR_SET_TCP_USER_MSS:
if (!(buffer && buflen && (*buflen == sizeof (u32))))
{
@@ -4001,6 +4242,36 @@ vppcom_session_attr (uint32_t session_handle, uint32_t op,
clib_memcpy (session->ext_config->data, buffer, *buflen);
session->ext_config->len = *buflen;
break;
+ case VPPCOM_ATTR_SET_IP_PKTINFO:
+ if (buffer && buflen && (*buflen == sizeof (int)) &&
+ !vcl_session_has_attr (session, VCL_SESS_ATTR_IP_PKTINFO))
+ {
+ if (*(int *) buffer)
+ vcl_session_set_attr (session, VCL_SESS_ATTR_IP_PKTINFO);
+ else
+ vcl_session_clear_attr (session, VCL_SESS_ATTR_IP_PKTINFO);
+
+ VDBG (2, "VCL_SESS_ATTR_IP_PKTINFO: %d, buflen %d",
+ vcl_session_has_attr (session, VCL_SESS_ATTR_IP_PKTINFO),
+ *buflen);
+ }
+ else
+ rv = VPPCOM_EINVAL;
+ break;
+
+ case VPPCOM_ATTR_GET_IP_PKTINFO:
+ if (buffer && buflen && (*buflen >= sizeof (int)))
+ {
+ *(int *) buffer =
+ vcl_session_has_attr (session, VCL_SESS_ATTR_IP_PKTINFO);
+ *buflen = sizeof (int);
+
+ VDBG (2, "VCL_SESS_ATTR_IP_PKTINFO: %d, buflen %d", *(int *) buffer,
+ *buflen);
+ }
+ else
+ rv = VPPCOM_EINVAL;
+ break;
default:
rv = VPPCOM_EINVAL;
@@ -4044,6 +4315,31 @@ vppcom_session_recvfrom (uint32_t session_handle, void *buffer,
return rv;
}
+static void
+vcl_handle_ep_app_tlvs (vcl_session_t *s, vppcom_endpt_t *ep)
+{
+ vppcom_endpt_tlv_t *tlv = ep->app_tlvs;
+
+ do
+ {
+ switch (tlv->data_type)
+ {
+ case VCL_UDP_SEGMENT:
+ s->gso_size = *(u16 *) tlv->data;
+ break;
+ case VCL_IP_PKTINFO:
+ clib_memcpy_fast (&s->transport.lcl_ip, (ip4_address_t *) tlv->data,
+ sizeof (ip4_address_t));
+ break;
+ default:
+ VDBG (0, "Ignorning unsupported app tlv %u", tlv->data_type);
+ break;
+ }
+ tlv = VCL_EP_NEXT_APP_TLV (ep, tlv);
+ }
+ while (tlv);
+}
+
int
vppcom_session_sendto (uint32_t session_handle, void *buffer,
uint32_t buflen, int flags, vppcom_endpt_t * ep)
@@ -4060,6 +4356,13 @@ vppcom_session_sendto (uint32_t session_handle, void *buffer,
if (!vcl_session_is_cl (s))
return VPPCOM_EINVAL;
+ s->transport.is_ip4 = ep->is_ip4;
+ s->transport.rmt_port = ep->port;
+ vcl_ip_copy_from_ep (&s->transport.rmt_ip, ep);
+
+ if (ep->app_tlvs)
+ vcl_handle_ep_app_tlvs (s, ep);
+
/* Session not connected/bound in vpp. Create it by 'connecting' it */
if (PREDICT_FALSE (s->session_state == VCL_STATE_CLOSED))
{
@@ -4075,10 +4378,6 @@ vppcom_session_sendto (uint32_t session_handle, void *buffer,
return rv;
s = vcl_session_get (wrk, session_index);
}
-
- s->transport.is_ip4 = ep->is_ip4;
- s->transport.rmt_port = ep->port;
- vcl_ip_copy_from_ep (&s->transport.rmt_ip, ep);
}
if (flags)
@@ -4376,6 +4675,10 @@ vppcom_retval_str (int retval)
st = "VPPCOM_ETIMEDOUT";
break;
+ case VPPCOM_EADDRINUSE:
+ st = "VPPCOM_EADDRINUSE";
+ break;
+
default:
st = "UNKNOWN_STATE";
break;
@@ -4388,22 +4691,55 @@ int
vppcom_add_cert_key_pair (vppcom_cert_key_pair_t *ckpair)
{
if (vcm->cfg.vpp_app_socket_api)
- {
- clib_warning ("not supported");
- return VPPCOM_EINVAL;
- }
- return vcl_bapi_add_cert_key_pair (ckpair);
+ return vcl_sapi_add_cert_key_pair (ckpair);
+ else
+ return vcl_bapi_add_cert_key_pair (ckpair);
}
int
vppcom_del_cert_key_pair (uint32_t ckpair_index)
{
if (vcm->cfg.vpp_app_socket_api)
+ return vcl_sapi_del_cert_key_pair (ckpair_index);
+ else
+ return vcl_bapi_del_cert_key_pair (ckpair_index);
+}
+
+int
+vppcom_session_get_error (uint32_t session_handle)
+{
+ vcl_worker_t *wrk = vcl_worker_get_current ();
+ vcl_session_t *session = 0;
+
+ session = vcl_session_get_w_handle (wrk, session_handle);
+ if (!session)
+ return VPPCOM_EBADFD;
+
+ if (PREDICT_FALSE (session->flags & VCL_SESSION_F_IS_VEP))
{
- clib_warning ("not supported");
- return VPPCOM_EINVAL;
+ VWRN ("epoll session %u! will not have connect", session->session_index);
+ return VPPCOM_EBADFD;
}
- return vcl_bapi_del_cert_key_pair (ckpair_index);
+
+ if (session->vpp_error == SESSION_E_PORTINUSE)
+ return VPPCOM_EADDRINUSE;
+ else if (session->vpp_error == SESSION_E_REFUSED)
+ return VPPCOM_ECONNREFUSED;
+ else if (session->vpp_error != SESSION_E_NONE)
+ return VPPCOM_EFAULT;
+ else
+ return VPPCOM_OK;
+}
+
+int
+vppcom_worker_is_detached (void)
+{
+ vcl_worker_t *wrk = vcl_worker_get_current ();
+
+ if (!vcm->cfg.use_mq_eventfd)
+ return VPPCOM_ENOTSUP;
+
+ return wrk->api_client_handle == ~0;
}
/*
diff --git a/src/vcl/vppcom.h b/src/vcl/vppcom.h
index 19a01c798b5..386d7d0c3f7 100644
--- a/src/vcl/vppcom.h
+++ b/src/vcl/vppcom.h
@@ -16,18 +16,21 @@
#ifndef included_vppcom_h
#define included_vppcom_h
+#ifdef __FreeBSD__
+#include <sys/types.h>
+#endif /* __FreeBSD__ */
#include <netdb.h>
#include <errno.h>
-#include <sys/fcntl.h>
-#include <sys/poll.h>
+#include <fcntl.h>
+#include <poll.h>
#include <sys/epoll.h>
-/* *INDENT-OFF* */
+/* clang-format off */
+
#ifdef __cplusplus
extern "C"
{
#endif
-/* *INDENT-ON* */
/*
* VPPCOM Public API Definitions, Enums, and Data Structures
@@ -42,34 +45,60 @@ extern "C"
#define VPPCOM_ENV_APP_NAMESPACE_SECRET "VCL_APP_NAMESPACE_SECRET"
#define VPPCOM_ENV_APP_SCOPE_LOCAL "VCL_APP_SCOPE_LOCAL"
#define VPPCOM_ENV_APP_SCOPE_GLOBAL "VCL_APP_SCOPE_GLOBAL"
+#define VPPCOM_ENV_APP_USE_MQ_EVENTFD "VCL_APP_USE_MQ_EVENTFD"
#define VPPCOM_ENV_VPP_API_SOCKET "VCL_VPP_API_SOCKET"
#define VPPCOM_ENV_VPP_SAPI_SOCKET "VCL_VPP_SAPI_SOCKET"
- typedef enum
- {
- VPPCOM_PROTO_TCP = 0,
- VPPCOM_PROTO_UDP,
- VPPCOM_PROTO_NONE,
- VPPCOM_PROTO_TLS,
- VPPCOM_PROTO_QUIC,
- VPPCOM_PROTO_DTLS,
- VPPCOM_PROTO_SRTP,
- } vppcom_proto_t;
-
- typedef enum
- {
- VPPCOM_IS_IP6 = 0,
- VPPCOM_IS_IP4,
- } vppcom_is_ip4_t;
-
- typedef struct vppcom_endpt_t_
- {
- uint8_t is_cut_thru;
- uint8_t is_ip4;
- uint8_t *ip;
- uint16_t port;
- uint64_t parent_handle;
- } vppcom_endpt_t;
+typedef enum vppcom_proto_
+{
+ VPPCOM_PROTO_TCP = 0,
+ VPPCOM_PROTO_UDP,
+ VPPCOM_PROTO_NONE,
+ VPPCOM_PROTO_TLS,
+ VPPCOM_PROTO_QUIC,
+ VPPCOM_PROTO_DTLS,
+ VPPCOM_PROTO_SRTP,
+} vppcom_proto_t;
+
+typedef enum
+{
+ VPPCOM_IS_IP6 = 0,
+ VPPCOM_IS_IP4,
+} vppcom_is_ip4_t;
+
+typedef struct vppcom_endpt_tlv_t_
+{
+ uint32_t data_type;
+ uint32_t data_len;
+ uint8_t data[0];
+} vppcom_endpt_tlv_t;
+
+typedef struct vppcom_endpt_t_
+{
+ uint8_t unused; /**< unused */
+ uint8_t is_ip4; /**< flag set if if ip is ipv4 */
+ uint8_t *ip; /**< pointer to ip address */
+ uint16_t port; /**< transport port */
+ uint64_t unused2; /**< unused */
+ uint32_t app_tlv_len; /**< length of app provided tlvs */
+ vppcom_endpt_tlv_t *app_tlvs; /**< array of app provided tlvs */
+} vppcom_endpt_t;
+
+#define VCL_UDP_OPTS_BASE (VPPCOM_PROTO_UDP << 16)
+#define VCL_UDP_SEGMENT (VCL_UDP_OPTS_BASE + 0)
+
+/* By convention we'll use 127 for IP since we don't support IP as protocol */
+#define VCL_IP_OPTS_BASE (127 << 16)
+#define VCL_IP_PKTINFO (VCL_IP_OPTS_BASE + 1)
+
+#define VCL_EP_APP_TLV_LEN(tlv_) (sizeof (vppcom_endpt_tlv_t) + tlv->data_len)
+#define VCL_EP_APP_TLV_POS(ep_, tlv_) ((void *)ep_->app_tlvs - (void *)tlv_)
+#define VCL_EP_APP_TLV_LEN_LEFT(ep_, tlv_) \
+ (ep_->app_tlv_len - VCL_EP_APP_TLV_POS (ep_, tlv_))
+#define VCL_EP_NEXT_APP_TLV(ep_, tlv_) \
+ (VCL_EP_APP_TLV_LEN (tlv_) < VCL_EP_APP_TLV_POS (ep_, tlv_) ? ( \
+ (vppcom_endpt_tlv_t *)((uint8_t *)tlv_ + VCL_EP_APP_TLV_LEN (tlv_))) \
+ : 0)
typedef uint32_t vcl_session_handle_t;
@@ -90,7 +119,11 @@ typedef enum
VPPCOM_EFAULT = -EFAULT,
VPPCOM_ENOMEM = -ENOMEM,
VPPCOM_EINVAL = -EINVAL,
+#ifdef __linux__
VPPCOM_EBADFD = -EBADFD,
+#else
+ VPPCOM_EBADFD = -EBADF,
+#endif /* __linux__ */
VPPCOM_EAFNOSUPPORT = -EAFNOSUPPORT,
VPPCOM_ECONNABORTED = -ECONNABORTED,
VPPCOM_ECONNRESET = -ECONNRESET,
@@ -101,6 +134,8 @@ typedef enum
VPPCOM_ENOPROTOOPT = -ENOPROTOOPT,
VPPCOM_EPIPE = -EPIPE,
VPPCOM_ENOENT = -ENOENT,
+ VPPCOM_EADDRINUSE = -EADDRINUSE,
+ VPPCOM_ENOTSUP = -ENOTSUP
} vppcom_error_t;
typedef enum
@@ -145,6 +180,11 @@ typedef enum
VPPCOM_ATTR_GET_VRF,
VPPCOM_ATTR_GET_DOMAIN,
VPPCOM_ATTR_SET_ENDPT_EXT_CFG,
+ VPPCOM_ATTR_SET_DSCP,
+ VPPCOM_ATTR_SET_IP_PKTINFO,
+ VPPCOM_ATTR_GET_IP_PKTINFO,
+ VPPCOM_ATTR_GET_ORIGINAL_DST,
+ VPPCOM_ATTR_GET_NWRITEQ,
} vppcom_attr_op_t;
typedef struct _vcl_poll
@@ -262,11 +302,25 @@ extern void vppcom_worker_index_set (int);
*/
extern int vppcom_worker_mqs_epfd (void);
-/* *INDENT-OFF* */
+/**
+ * Returns Session error
+ *
+ * Application can use this API to find the detailed session error
+ */
+extern int vppcom_session_get_error (uint32_t session_handle);
+
+/**
+ * Returns true if current worker is disconnected from vpp
+ *
+ * Application can use this API to check if VPP is disconnected
+ * as long as `use-mq-eventfd` is being set
+ */
+extern int vppcom_worker_is_detached (void);
+
#ifdef __cplusplus
}
#endif
-/* *INDENT-ON* */
+/* clang-format on */
#endif /* included_vppcom_h */
diff --git a/src/vlib/CMakeLists.txt b/src/vlib/CMakeLists.txt
index c8127fbe337..7ec9b2050e9 100644
--- a/src/vlib/CMakeLists.txt
+++ b/src/vlib/CMakeLists.txt
@@ -22,6 +22,12 @@ else()
set(BUFFER_ALLOC_FAULT_INJECTOR 0 CACHE STRING "fault injector off")
endif()
+if(VPP_PLATFORM_BUFFER_ALIGN)
+ set(VLIB_BUFFER_ALIGN ${VPP_PLATFORM_BUFFER_ALIGN})
+else()
+ set(VLIB_BUFFER_ALIGN ${VPP_CACHE_LINE_SIZE})
+endif()
+
set(PRE_DATA_SIZE 128 CACHE STRING "Buffer headroom size.")
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
@@ -41,29 +47,24 @@ configure_file(
)
install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/config.h
- DESTINATION include/vlib
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vlib
COMPONENT vpp-dev
)
##############################################################################
-# Find lib and include files
+# vlib shared library
##############################################################################
-message(STATUS "Looking for libuuid")
-vpp_find_path(UUID_INCLUDE_DIR NAMES uuid/uuid.h)
-vpp_find_library(UUID_LIB NAMES uuid)
-if(UUID_INCLUDE_DIR AND UUID_LIB)
- include_directories(${UUID_INCLUDE_DIR})
- set(VMBUS_SOURCE linux/vmbus.c)
- set(VMBUS_LIBS uuid)
- message(STATUS "Found uuid in ${UUID_INCLUDE_DIR}")
-else()
- message(WARNING "-- libuuid not found - vmbus support disabled")
-endif()
+set(PLATFORM_SOURCES
+ linux/pci.c
+ linux/vfio.c
+ linux/vmbus.c
+)
+
+set(PLATFORM_HEADERS
+ linux/vfio.h
+)
-##############################################################################
-# vlib shared library
-##############################################################################
add_vpp_library(vlib
SOURCES
buffer.c
@@ -75,8 +76,6 @@ add_vpp_library(vlib
format.c
handoff_trace.c
init.c
- linux/pci.c
- linux/vfio.c
log.c
main.c
node.c
@@ -88,8 +87,15 @@ add_vpp_library(vlib
physmem.c
punt.c
punt_node.c
+ stats/cli.c
+ stats/collector.c
+ stats/format.c
+ stats/init.c
+ stats/provider_mem.c
+ stats/stats.c
threads.c
threads_cli.c
+ time.c
trace.c
unix/cli.c
unix/input.c
@@ -97,7 +103,9 @@ add_vpp_library(vlib
unix/plugin.c
unix/util.c
vmbus/vmbus.c
- ${VMBUS_SOURCE}
+ dma/dma.c
+ dma/cli.c
+ ${PLATFORM_SOURCES}
MULTIARCH_SOURCES
buffer_funcs.c
@@ -113,12 +121,12 @@ add_vpp_library(vlib
counter.h
counter_types.h
defs.h
+ dma/dma.h
error_funcs.h
error.h
format_funcs.h
global_funcs.h
init.h
- linux/vfio.h
log.h
main.h
node_funcs.h
@@ -129,7 +137,10 @@ add_vpp_library(vlib
physmem_funcs.h
physmem.h
punt.h
+ stats/shared.h
+ stats/stats.h
threads.h
+ time.h
trace_funcs.h
trace.h
unix/mc_socket.h
@@ -137,11 +148,12 @@ add_vpp_library(vlib
unix/unix.h
vlib.h
vmbus/vmbus.h
+ ${PLATFORM_HEADERS}
API_FILES
pci/pci_types.api
- LINK_LIBRARIES vppinfra svm ${VMBUS_LIBS} ${CMAKE_DL_LIBS}
+ LINK_LIBRARIES vppinfra svm ${CMAKE_DL_LIBS} ${EPOLL_LIB}
DEPENDS api_headers
)
diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c
index adaafa36f5d..674f15d5dc6 100644
--- a/src/vlib/buffer.c
+++ b/src/vlib/buffer.c
@@ -43,10 +43,11 @@
* Allocate/free network buffers.
*/
-#include <vppinfra/linux/sysfs.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/unix.h>
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
-#include <vpp/stats/stat_segment.h>
+#include <vlib/stats/stats.h>
#define VLIB_BUFFER_DEFAULT_BUFFERS_PER_NUMA 16384
#define VLIB_BUFFER_DEFAULT_BUFFERS_PER_NUMA_UNPRIV 8192
@@ -58,22 +59,8 @@ STATIC_ASSERT_FITS_IN (vlib_buffer_t, ref_count, 16);
STATIC_ASSERT_FITS_IN (vlib_buffer_t, buffer_pool_index, 16);
#endif
-/* Make sure that buffer template size is not accidentally changed */
-STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, template_end, 64);
-
u16 __vlib_buffer_external_hdr_size = 0;
-static void
-buffer_gauges_update_cached_fn (stat_segment_directory_entry_t * e,
- u32 index);
-
-static void
-buffer_gauges_update_available_fn (stat_segment_directory_entry_t * e,
- u32 index);
-
-static void
-buffer_gauges_update_used_fn (stat_segment_directory_entry_t * e, u32 index);
-
uword
vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm,
vlib_buffer_t * b_first)
@@ -107,7 +94,7 @@ format_vlib_buffer_no_chain (u8 * s, va_list * args)
"ref-count %u", b->current_data, b->current_length,
b->buffer_pool_index, b->ref_count);
- if (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID)
+ if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
s = format (s, ", totlen-nifb %d",
b->total_length_not_including_first_buffer);
@@ -485,26 +472,28 @@ static uword
vlib_buffer_alloc_size (uword ext_hdr_size, uword data_size)
{
uword alloc_size = ext_hdr_size + sizeof (vlib_buffer_t) + data_size;
- alloc_size = CLIB_CACHE_LINE_ROUND (alloc_size);
+ alloc_size = round_pow2 (alloc_size, VLIB_BUFFER_ALIGN);
- /* in case when we have even number of cachelines, we add one more for
+ /* in case when we have even number of 'cachelines', we add one more for
* better cache occupancy */
- alloc_size |= CLIB_CACHE_LINE_BYTES;
+ alloc_size |= VLIB_BUFFER_ALIGN;
return alloc_size;
}
u8
-vlib_buffer_pool_create (vlib_main_t * vm, char *name, u32 data_size,
- u32 physmem_map_index)
+vlib_buffer_pool_create (vlib_main_t *vm, u32 data_size, u32 physmem_map_index,
+ char *fmt, ...)
{
vlib_buffer_main_t *bm = vm->buffer_main;
vlib_buffer_pool_t *bp;
vlib_physmem_map_t *m = vlib_physmem_get_map (vm, physmem_map_index);
uword start = pointer_to_uword (m->base);
uword size = (uword) m->n_pages << m->log2_page_size;
- uword i, j;
- u32 alloc_size, n_alloc_per_page;
+ uword page_mask = ~pow2_mask (m->log2_page_size);
+ u8 *p;
+ u32 alloc_size;
+ va_list va;
if (vec_len (bm->buffer_pools) >= 255)
return ~0;
@@ -542,48 +531,57 @@ vlib_buffer_pool_create (vlib_main_t * vm, char *name, u32 data_size,
bp->buffer_template.buffer_pool_index = bp->index;
bp->buffer_template.ref_count = 1;
bp->physmem_map_index = physmem_map_index;
- bp->name = format (0, "%s%c", name, 0);
bp->data_size = data_size;
bp->numa_node = m->numa_node;
+ bp->log2_page_size = m->log2_page_size;
+
+ va_start (va, fmt);
+ bp->name = va_format (0, fmt, &va);
+ va_end (va);
vec_validate_aligned (bp->threads, vlib_get_n_threads () - 1,
CLIB_CACHE_LINE_BYTES);
alloc_size = vlib_buffer_alloc_size (bm->ext_hdr_size, data_size);
- n_alloc_per_page = (1ULL << m->log2_page_size) / alloc_size;
+ bp->alloc_size = alloc_size;
/* preallocate buffer indices memory */
- bp->n_buffers = m->n_pages * n_alloc_per_page;
- bp->buffers = clib_mem_alloc_aligned (bp->n_buffers * sizeof (u32),
- CLIB_CACHE_LINE_BYTES);
+ bp->buffers = clib_mem_alloc_aligned (
+ round_pow2 ((size / alloc_size) * sizeof (u32), CLIB_CACHE_LINE_BYTES),
+ CLIB_CACHE_LINE_BYTES);
clib_spinlock_init (&bp->lock);
- for (j = 0; j < m->n_pages; j++)
- for (i = 0; i < n_alloc_per_page; i++)
- {
- u8 *p;
- u32 bi;
-
- p = m->base + (j << m->log2_page_size) + i * alloc_size;
- p += bm->ext_hdr_size;
-
- /*
- * Waste 1 buffer (maximum) so that 0 is never a valid buffer index.
- * Allows various places to ASSERT (bi != 0). Much easier
- * than debugging downstream crashes in successor nodes.
- */
- if (p == m->base)
- continue;
+ p = m->base;
- vlib_buffer_copy_template ((vlib_buffer_t *) p, &bp->buffer_template);
+ /* start with naturally aligned address */
+ p += alloc_size - (uword) p % alloc_size;
- bi = vlib_get_buffer_index (vm, (vlib_buffer_t *) p);
+ /*
+ * Waste 1 buffer (maximum) so that 0 is never a valid buffer index.
+ * Allows various places to ASSERT (bi != 0). Much easier
+ * than debugging downstream crashes in successor nodes.
+ */
+ if (p == m->base)
+ p += alloc_size;
- bp->buffers[bp->n_avail++] = bi;
+ for (; p < (u8 *) m->base + size - alloc_size; p += alloc_size)
+ {
+ vlib_buffer_t *b;
+ u32 bi;
+
+ /* skip if buffer spans across page boundary */
+ if (((uword) p & page_mask) != ((uword) (p + alloc_size) & page_mask))
+ continue;
+
+ b = (vlib_buffer_t *) (p + bm->ext_hdr_size);
+ b->template = bp->buffer_template;
+ bi = vlib_get_buffer_index (vm, b);
+ bp->buffers[bp->n_avail++] = bi;
+ vlib_get_buffer (vm, bi);
+ }
- vlib_get_buffer (vm, bi);
- }
+ bp->n_buffers = bp->n_avail;
return bp->index;
}
@@ -601,65 +599,62 @@ format_vlib_buffer_pool (u8 * s, va_list * va)
"Pool Name", "Index", "NUMA", "Size", "Data Size",
"Total", "Avail", "Cached", "Used");
- /* *INDENT-OFF* */
vec_foreach (bpt, bp->threads)
cached += bpt->n_cached;
- /* *INDENT-ON* */
- s = format (s, "%-20s%=6d%=6d%=6u%=11u%=6u%=8u%=8u%=8u",
- bp->name, bp->index, bp->numa_node, bp->data_size +
- sizeof (vlib_buffer_t) + vm->buffer_main->ext_hdr_size,
+ s = format (s, "%-20v%=6d%=6d%=6u%=11u%=6u%=8u%=8u%=8u", bp->name, bp->index,
+ bp->numa_node,
+ bp->data_size + sizeof (vlib_buffer_t) +
+ vm->buffer_main->ext_hdr_size,
bp->data_size, bp->n_buffers, bp->n_avail, cached,
bp->n_buffers - bp->n_avail - cached);
return s;
}
-static clib_error_t *
-show_buffers (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
+u8 *
+format_vlib_buffer_pool_all (u8 *s, va_list *va)
{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
vlib_buffer_main_t *bm = vm->buffer_main;
vlib_buffer_pool_t *bp;
- vlib_cli_output (vm, "%U", format_vlib_buffer_pool, vm, 0);
+ s = format (s, "%U", format_vlib_buffer_pool, vm, 0);
- /* *INDENT-OFF* */
vec_foreach (bp, bm->buffer_pools)
- vlib_cli_output (vm, "%U", format_vlib_buffer_pool, vm, bp);
- /* *INDENT-ON* */
+ s = format (s, "\n%U", format_vlib_buffer_pool, vm, bp);
+
+ return s;
+}
+static clib_error_t *
+show_buffers (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vlib_cli_output (vm, "%U", format_vlib_buffer_pool_all, vm);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_buffers_command, static) = {
.path = "show buffers",
.short_help = "Show packet buffer allocation",
.function = show_buffers,
};
-/* *INDENT-ON* */
clib_error_t *
-vlib_buffer_worker_init (vlib_main_t * vm)
+vlib_buffer_num_workers_change (vlib_main_t *vm)
{
vlib_buffer_main_t *bm = vm->buffer_main;
vlib_buffer_pool_t *bp;
- /* *INDENT-OFF* */
vec_foreach (bp, bm->buffer_pools)
- {
- clib_spinlock_lock (&bp->lock);
- vec_validate_aligned (bp->threads, vlib_get_n_threads () - 1,
- CLIB_CACHE_LINE_BYTES);
- clib_spinlock_unlock (&bp->lock);
- }
- /* *INDENT-ON* */
+ vec_validate_aligned (bp->threads, vlib_get_n_threads () - 1,
+ CLIB_CACHE_LINE_BYTES);
return 0;
}
-VLIB_WORKER_INIT_FUNCTION (vlib_buffer_worker_init);
+VLIB_NUM_WORKERS_CHANGE_FN (vlib_buffer_num_workers_change);
static clib_error_t *
vlib_buffer_main_init_numa_alloc (struct vlib_main_t *vm, u32 numa_node,
@@ -705,7 +700,6 @@ vlib_buffer_main_init_numa_node (struct vlib_main_t *vm, u32 numa_node,
vlib_buffer_main_t *bm = vm->buffer_main;
u32 physmem_map_index;
clib_error_t *error;
- u8 *name = 0;
if (bm->log2_page_size == CLIB_MEM_PAGE_SZ_UNKNOWN)
{
@@ -736,14 +730,12 @@ vlib_buffer_main_init_numa_node (struct vlib_main_t *vm, u32 numa_node,
return error;
buffer_pool_create:
- name = format (name, "default-numa-%d%c", numa_node, 0);
- *index = vlib_buffer_pool_create (vm, (char *) name,
- vlib_buffer_get_default_data_size (vm),
- physmem_map_index);
+ *index =
+ vlib_buffer_pool_create (vm, vlib_buffer_get_default_data_size (vm),
+ physmem_map_index, "default-numa-%d", numa_node);
if (*index == (u8) ~ 0)
error = clib_error_return (0, "maximum number of buffer pools reached");
- vec_free (name);
return error;
@@ -770,10 +762,8 @@ buffer_get_cached (vlib_buffer_pool_t * bp)
clib_spinlock_lock (&bp->lock);
- /* *INDENT-OFF* */
vec_foreach (bpt, bp->threads)
cached += bpt->n_cached;
- /* *INDENT-ON* */
clib_spinlock_unlock (&bp->lock);
@@ -792,37 +782,39 @@ buffer_get_by_index (vlib_buffer_main_t * bm, u32 index)
}
static void
-buffer_gauges_update_used_fn (stat_segment_directory_entry_t * e, u32 index)
+buffer_gauges_collect_used_fn (vlib_stats_collector_data_t *d)
{
vlib_main_t *vm = vlib_get_main ();
- vlib_buffer_pool_t *bp = buffer_get_by_index (vm->buffer_main, index);
+ vlib_buffer_pool_t *bp =
+ buffer_get_by_index (vm->buffer_main, d->private_data);
if (!bp)
return;
- e->value = bp->n_buffers - bp->n_avail - buffer_get_cached (bp);
+ d->entry->value = bp->n_buffers - bp->n_avail - buffer_get_cached (bp);
}
static void
-buffer_gauges_update_available_fn (stat_segment_directory_entry_t * e,
- u32 index)
+buffer_gauges_collect_available_fn (vlib_stats_collector_data_t *d)
{
vlib_main_t *vm = vlib_get_main ();
- vlib_buffer_pool_t *bp = buffer_get_by_index (vm->buffer_main, index);
+ vlib_buffer_pool_t *bp =
+ buffer_get_by_index (vm->buffer_main, d->private_data);
if (!bp)
return;
- e->value = bp->n_avail;
+ d->entry->value = bp->n_avail;
}
static void
-buffer_gauges_update_cached_fn (stat_segment_directory_entry_t * e, u32 index)
+buffer_gauges_collect_cached_fn (vlib_stats_collector_data_t *d)
{
vlib_main_t *vm = vlib_get_main ();
- vlib_buffer_pool_t *bp = buffer_get_by_index (vm->buffer_main, index);
+ vlib_buffer_pool_t *bp =
+ buffer_get_by_index (vm->buffer_main, d->private_data);
if (!bp)
return;
- e->value = buffer_get_cached (bp);
+ d->entry->value = buffer_get_cached (bp);
}
clib_error_t *
@@ -843,13 +835,8 @@ vlib_buffer_main_init (struct vlib_main_t * vm)
clib_spinlock_init (&bm->buffer_known_hash_lockp);
- if ((err = clib_sysfs_read ("/sys/devices/system/node/online", "%U",
- unformat_bitmap_list, &bmp)))
- clib_error_free (err);
-
- if ((err = clib_sysfs_read ("/sys/devices/system/node/has_memory", "%U",
- unformat_bitmap_list, &bmp_has_memory)))
- clib_error_free (err);
+ bmp = os_get_online_cpu_node_bitmap ();
+ bmp_has_memory = os_get_cpu_with_memory_bitmap ();
if (bmp && bmp_has_memory)
bmp = clib_bitmap_and (bmp, bmp_has_memory);
@@ -862,7 +849,6 @@ vlib_buffer_main_init (struct vlib_main_t * vm)
clib_panic ("system have more than %u NUMA nodes",
VLIB_BUFFER_MAX_NUMA_NODES);
- /* *INDENT-OFF* */
clib_bitmap_foreach (numa_node, bmp)
{
u8 *index = bm->default_buffer_pool_index_for_numa + numa_node;
@@ -877,7 +863,6 @@ vlib_buffer_main_init (struct vlib_main_t * vm)
if (first_valid_buffer_pool_index == 0xff)
first_valid_buffer_pool_index = index[0];
}
- /* *INDENT-ON* */
if (first_valid_buffer_pool_index == (u8) ~ 0)
{
@@ -885,34 +870,32 @@ vlib_buffer_main_init (struct vlib_main_t * vm)
goto done;
}
- /* *INDENT-OFF* */
clib_bitmap_foreach (numa_node, bmp)
{
if (bm->default_buffer_pool_index_for_numa[numa_node] == (u8) ~0)
bm->default_buffer_pool_index_for_numa[numa_node] =
first_valid_buffer_pool_index;
}
- /* *INDENT-ON* */
vec_foreach (bp, bm->buffer_pools)
{
+ vlib_stats_collector_reg_t reg = { .private_data = bp - bm->buffer_pools };
if (bp->n_buffers == 0)
continue;
- vec_reset_length (name);
- name = format (name, "/buffer-pools/%s/cached%c", bp->name, 0);
- stat_segment_register_gauge (name, buffer_gauges_update_cached_fn,
- bp - bm->buffer_pools);
+ reg.entry_index =
+ vlib_stats_add_gauge ("/buffer-pools/%v/cached", bp->name);
+ reg.collect_fn = buffer_gauges_collect_cached_fn;
+ vlib_stats_register_collector_fn (&reg);
- vec_reset_length (name);
- name = format (name, "/buffer-pools/%s/used%c", bp->name, 0);
- stat_segment_register_gauge (name, buffer_gauges_update_used_fn,
- bp - bm->buffer_pools);
+ reg.entry_index = vlib_stats_add_gauge ("/buffer-pools/%v/used", bp->name);
+ reg.collect_fn = buffer_gauges_collect_used_fn;
+ vlib_stats_register_collector_fn (&reg);
- vec_reset_length (name);
- name = format (name, "/buffer-pools/%s/available%c", bp->name, 0);
- stat_segment_register_gauge (name, buffer_gauges_update_available_fn,
- bp - bm->buffer_pools);
+ reg.entry_index =
+ vlib_stats_add_gauge ("/buffer-pools/%v/available", bp->name);
+ reg.collect_fn = buffer_gauges_collect_available_fn;
+ vlib_stats_register_collector_fn (&reg);
}
done:
@@ -971,6 +954,20 @@ vlib_buffer_alloc_may_fail (vlib_main_t * vm, u32 n_buffers)
}
#endif
+__clib_export int
+vlib_buffer_set_alloc_free_callback (
+ vlib_main_t *vm, vlib_buffer_alloc_free_callback_t *alloc_callback_fn,
+ vlib_buffer_alloc_free_callback_t *free_callback_fn)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ if ((alloc_callback_fn && bm->alloc_callback_fn) ||
+ (free_callback_fn && bm->free_callback_fn))
+ return 1;
+ bm->alloc_callback_fn = alloc_callback_fn;
+ bm->free_callback_fn = free_callback_fn;
+ return 0;
+}
+
/** @endcond */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h
index 349b7324996..7d45689ed19 100644
--- a/src/vlib/buffer.h
+++ b/src/vlib/buffer.h
@@ -47,8 +47,7 @@
#include <vppinfra/lock.h>
#include <vlib/error.h> /* for vlib_error_t */
-#include <vlib/config.h> /* for __PRE_DATA_SIZE */
-#define VLIB_BUFFER_PRE_DATA_SIZE __PRE_DATA_SIZE
+#include <vlib/config.h> /* for VLIB_BUFFER_PRE_DATA_SIZE */
#define VLIB_BUFFER_DEFAULT_DATA_SIZE (2048)
@@ -107,62 +106,78 @@ enum
#define VLIB_BUFFER_TRACE_TRAJECTORY 0
#endif /* VLIB_BUFFER_TRACE_TRAJECTORY */
+#define vlib_buffer_template_fields \
+ /** signed offset in data[], pre_data[] that we are currently \
+ * processing. If negative current header points into predata area. */ \
+ i16 current_data; \
+ \
+ /** Nbytes between current data and the end of this buffer. */ \
+ u16 current_length; \
+ /** buffer flags: \
+ <br> VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list \
+ index, <br> VLIB_BUFFER_IS_TRACED: trace this buffer. <br> \
+ VLIB_BUFFER_NEXT_PRESENT: this is a multi-chunk buffer. <br> \
+ VLIB_BUFFER_TOTAL_LENGTH_VALID: as it says <br> \
+ VLIB_BUFFER_EXT_HDR_VALID: buffer contains valid external buffer manager \
+ header, set to avoid adding it to a flow report <br> \
+ VLIB_BUFFER_FLAG_USER(n): user-defined bit N \
+ */ \
+ u32 flags; \
+ \
+ /** Generic flow identifier */ \
+ u32 flow_id; \
+ \
+ /** Reference count for this buffer. */ \
+ volatile u8 ref_count; \
+ \
+ /** index of buffer pool this buffer belongs. */ \
+ u8 buffer_pool_index; \
+ \
+ /** Error code for buffers to be enqueued to error handler. */ \
+ vlib_error_t error; \
+ \
+ /** Next buffer for this linked-list of buffers. Only valid if \
+ * VLIB_BUFFER_NEXT_PRESENT flag is set. */ \
+ u32 next_buffer; \
+ \
+ /** The following fields can be in a union because once a packet enters \
+ * the punt path, it is no longer on a feature arc */ \
+ union \
+ { \
+ /** Used by feature subgraph arcs to visit enabled feature nodes */ \
+ u32 current_config_index; \
+ /* the reason the packet once punted */ \
+ u32 punt_reason; \
+ }; \
+ \
+ /** Opaque data used by sub-graphs for their own purposes. */ \
+ u32 opaque[10];
+
+typedef struct
+{
+ CLIB_ALIGN_MARK (align_mark, 64);
+ vlib_buffer_template_fields
+} vlib_buffer_template_t;
+
+STATIC_ASSERT_SIZEOF (vlib_buffer_template_t, 64);
+
/** VLIB buffer representation. */
typedef union
{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
struct
{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-
- /** signed offset in data[], pre_data[] that we are currently
- * processing. If negative current header points into predata area. */
- i16 current_data;
-
- /** Nbytes between current data and the end of this buffer. */
- u16 current_length;
-
- /** buffer flags:
- <br> VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index,
- <br> VLIB_BUFFER_IS_TRACED: trace this buffer.
- <br> VLIB_BUFFER_NEXT_PRESENT: this is a multi-chunk buffer.
- <br> VLIB_BUFFER_TOTAL_LENGTH_VALID: as it says
- <br> VLIB_BUFFER_EXT_HDR_VALID: buffer contains valid external buffer manager header,
- set to avoid adding it to a flow report
- <br> VLIB_BUFFER_FLAG_USER(n): user-defined bit N
- */
- u32 flags;
-
- /** Generic flow identifier */
- u32 flow_id;
-
- /** Reference count for this buffer. */
- volatile u8 ref_count;
-
- /** index of buffer pool this buffer belongs. */
- u8 buffer_pool_index;
-
- /** Error code for buffers to be enqueued to error handler. */
- vlib_error_t error;
-
- /** Next buffer for this linked-list of buffers. Only valid if
- * VLIB_BUFFER_NEXT_PRESENT flag is set. */
- u32 next_buffer;
-
- /** The following fields can be in a union because once a packet enters
- * the punt path, it is no longer on a feature arc */
union
{
- /** Used by feature subgraph arcs to visit enabled feature nodes */
- u32 current_config_index;
- /* the reason the packet once punted */
- u32 punt_reason;
+ struct
+ {
+ vlib_buffer_template_fields
+ };
+ vlib_buffer_template_t template;
};
- /** Opaque data used by sub-graphs for their own purposes. */
- u32 opaque[10];
-
- /** part of buffer metadata which is initialized on alloc ends here. */
- STRUCT_MARK (template_end);
+ /* Data above is initialized or zeroed on alloc, data bellow is not
+ * and it is app responsibility to ensure data is valid */
/** start of 2nd half (2nd cacheline on systems where cacheline size is 64) */
CLIB_ALIGN_MARK (second_half, 64);
@@ -220,6 +235,7 @@ STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE % CLIB_CACHE_LINE_BYTES == 0,
"VLIB_BUFFER_PRE_DATA_SIZE must be divisible by cache line size");
#define VLIB_BUFFER_HDR_SIZE (sizeof(vlib_buffer_t) - VLIB_BUFFER_PRE_DATA_SIZE)
+#define VLIB_BUFFER_INVALID_INDEX 0xffffffff
/** \brief Prefetch buffer metadata.
The first 64 bytes of buffer contains most header information
@@ -452,11 +468,12 @@ typedef struct
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
uword start;
uword size;
- uword log2_page_size;
+ u8 log2_page_size;
u8 index;
- u32 numa_node;
+ u8 numa_node;
u32 physmem_map_index;
u32 data_size;
+ u32 alloc_size;
u32 n_buffers;
u32 n_avail;
u32 *buffers;
@@ -467,11 +484,15 @@ typedef struct
vlib_buffer_pool_thread_t *threads;
/* buffer metadata template */
- vlib_buffer_t buffer_template;
+ vlib_buffer_template_t buffer_template;
} vlib_buffer_pool_t;
#define VLIB_BUFFER_MAX_NUMA_NODES 32
+typedef u32 (vlib_buffer_alloc_free_callback_t) (struct vlib_main_t *vm,
+ u8 buffer_pool_index,
+ u32 *buffers, u32 n_buffers);
+
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@@ -481,12 +502,9 @@ typedef struct
uword buffer_mem_size;
vlib_buffer_pool_t *buffer_pools;
- /* Hash table mapping buffer index into number
- 0 => allocated but free, 1 => allocated and not-free.
- If buffer index is not in hash table then this buffer
- has never been allocated. */
- uword *buffer_known_hash;
- clib_spinlock_t buffer_known_hash_lockp;
+ vlib_buffer_alloc_free_callback_t *alloc_callback_fn;
+ vlib_buffer_alloc_free_callback_t *free_callback_fn;
+
u8 default_buffer_pool_index_for_numa[VLIB_BUFFER_MAX_NUMA_NODES];
/* config */
@@ -495,12 +513,25 @@ typedef struct
u32 default_data_size;
clib_mem_page_sz_t log2_page_size;
+ /* Hash table mapping buffer index into number
+ 0 => allocated but free, 1 => allocated and not-free.
+ If buffer index is not in hash table then this buffer
+ has never been allocated. */
+ uword *buffer_known_hash;
+ clib_spinlock_t buffer_known_hash_lockp;
+
/* logging */
vlib_log_class_t log_default;
} vlib_buffer_main_t;
clib_error_t *vlib_buffer_main_init (struct vlib_main_t *vm);
+format_function_t format_vlib_buffer_pool_all;
+
+int vlib_buffer_set_alloc_free_callback (
+ struct vlib_main_t *vm, vlib_buffer_alloc_free_callback_t *alloc_callback_fn,
+ vlib_buffer_alloc_free_callback_t *free_callback_fn);
+
extern u16 __vlib_buffer_external_hdr_size;
#define VLIB_BUFFER_SET_EXT_HDR_SIZE(x) \
static void __clib_constructor \
diff --git a/src/vlib/buffer_funcs.c b/src/vlib/buffer_funcs.c
index a661370a141..d910b25afac 100644
--- a/src/vlib/buffer_funcs.c
+++ b/src/vlib/buffer_funcs.c
@@ -8,32 +8,41 @@
#include <vppinfra/vector/compress.h>
static_always_inline u32
-enqueue_one (vlib_main_t *vm, vlib_node_runtime_t *node, u64 *used_elt_bmp,
- u16 next_index, u32 *buffers, u16 *nexts, u32 n_buffers,
- u32 n_left, u32 *tmp)
+enqueue_one (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_bitmap_t used_elt_bmp, u16 next_index, u32 *buffers,
+ u16 *nexts, u32 n_buffers, u32 n_left, u32 *tmp, u8 maybe_aux,
+ u32 *aux_data, u32 *tmp_aux)
{
- u64 match_bmp[VLIB_FRAME_SIZE / 64];
+ vlib_frame_bitmap_t match_bmp;
vlib_frame_t *f;
u32 n_extracted, n_free;
- u32 *to;
+ u32 *to, *to_aux = 0;
f = vlib_get_next_frame_internal (vm, node, next_index, 0);
+ maybe_aux = maybe_aux && f->aux_offset;
+
n_free = VLIB_FRAME_SIZE - f->n_vectors;
/* if frame contains enough space for worst case scenario, we can avoid
* use of tmp */
if (n_free >= n_left)
- to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
+ {
+ to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
+ if (maybe_aux)
+ to_aux = (u32 *) vlib_frame_aux_args (f) + f->n_vectors;
+ }
else
- to = tmp;
-
+ {
+ to = tmp;
+ if (maybe_aux)
+ to_aux = tmp_aux;
+ }
clib_mask_compare_u16 (next_index, nexts, match_bmp, n_buffers);
-
n_extracted = clib_compress_u32 (to, buffers, match_bmp, n_buffers);
-
- for (int i = 0; i < ARRAY_LEN (match_bmp); i++)
- used_elt_bmp[i] |= match_bmp[i];
+ if (maybe_aux)
+ clib_compress_u32 (to_aux, aux_data, match_bmp, n_buffers);
+ vlib_frame_bitmap_or (used_elt_bmp, match_bmp);
if (to != tmp)
{
@@ -45,6 +54,11 @@ enqueue_one (vlib_main_t *vm, vlib_node_runtime_t *node, u64 *used_elt_bmp,
/* enough space in the existing frame */
to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
vlib_buffer_copy_indices (to, tmp, n_extracted);
+ if (maybe_aux)
+ {
+ to_aux = (u32 *) vlib_frame_aux_args (f) + f->n_vectors;
+ vlib_buffer_copy_indices (to_aux, tmp_aux, n_extracted);
+ }
vlib_put_next_frame (vm, node, next_index, n_free - n_extracted);
}
else
@@ -52,6 +66,11 @@ enqueue_one (vlib_main_t *vm, vlib_node_runtime_t *node, u64 *used_elt_bmp,
/* full frame */
to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
vlib_buffer_copy_indices (to, tmp, n_free);
+ if (maybe_aux)
+ {
+ to_aux = (u32 *) vlib_frame_aux_args (f) + f->n_vectors;
+ vlib_buffer_copy_indices (to_aux, tmp_aux, n_free);
+ }
vlib_put_next_frame (vm, node, next_index, 0);
/* second frame */
@@ -59,6 +78,11 @@ enqueue_one (vlib_main_t *vm, vlib_node_runtime_t *node, u64 *used_elt_bmp,
f = vlib_get_next_frame_internal (vm, node, next_index, 1);
to = vlib_frame_vector_args (f);
vlib_buffer_copy_indices (to, tmp + n_free, n_2nd_frame);
+ if (maybe_aux)
+ {
+ to_aux = vlib_frame_aux_args (f);
+ vlib_buffer_copy_indices (to_aux, tmp_aux + n_free, n_2nd_frame);
+ }
vlib_put_next_frame (vm, node, next_index,
VLIB_FRAME_SIZE - n_2nd_frame);
}
@@ -66,24 +90,27 @@ enqueue_one (vlib_main_t *vm, vlib_node_runtime_t *node, u64 *used_elt_bmp,
return n_left - n_extracted;
}
-void __clib_section (".vlib_buffer_enqueue_to_next_fn")
-CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_next_fn)
-(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 *nexts,
- uword count)
+static_always_inline void
+vlib_buffer_enqueue_to_next_fn_inline (vlib_main_t *vm,
+ vlib_node_runtime_t *node, u32 *buffers,
+ u32 *aux_data, u16 *nexts, uword count,
+ u8 maybe_aux)
{
u32 tmp[VLIB_FRAME_SIZE];
+ u32 tmp_aux[VLIB_FRAME_SIZE];
u32 n_left;
u16 next_index;
while (count >= VLIB_FRAME_SIZE)
{
- u64 used_elt_bmp[VLIB_FRAME_SIZE / 64] = {};
+ vlib_frame_bitmap_t used_elt_bmp = {};
n_left = VLIB_FRAME_SIZE;
u32 off = 0;
next_index = nexts[0];
n_left = enqueue_one (vm, node, used_elt_bmp, next_index, buffers, nexts,
- VLIB_FRAME_SIZE, n_left, tmp);
+ VLIB_FRAME_SIZE, n_left, tmp, maybe_aux, aux_data,
+ tmp_aux);
while (n_left)
{
@@ -96,23 +123,26 @@ CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_next_fn)
next_index =
nexts[off * 64 + count_trailing_zeros (~used_elt_bmp[off])];
n_left = enqueue_one (vm, node, used_elt_bmp, next_index, buffers,
- nexts, VLIB_FRAME_SIZE, n_left, tmp);
+ nexts, VLIB_FRAME_SIZE, n_left, tmp, maybe_aux,
+ aux_data, tmp_aux);
}
buffers += VLIB_FRAME_SIZE;
+ if (maybe_aux)
+ aux_data += VLIB_FRAME_SIZE;
nexts += VLIB_FRAME_SIZE;
count -= VLIB_FRAME_SIZE;
}
if (count)
{
- u64 used_elt_bmp[VLIB_FRAME_SIZE / 64] = {};
+ vlib_frame_bitmap_t used_elt_bmp = {};
next_index = nexts[0];
n_left = count;
u32 off = 0;
n_left = enqueue_one (vm, node, used_elt_bmp, next_index, buffers, nexts,
- count, n_left, tmp);
+ count, n_left, tmp, maybe_aux, aux_data, tmp_aux);
while (n_left)
{
@@ -124,26 +154,55 @@ CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_next_fn)
next_index =
nexts[off * 64 + count_trailing_zeros (~used_elt_bmp[off])];
- n_left = enqueue_one (vm, node, used_elt_bmp, next_index, buffers,
- nexts, count, n_left, tmp);
+ n_left =
+ enqueue_one (vm, node, used_elt_bmp, next_index, buffers, nexts,
+ count, n_left, tmp, maybe_aux, aux_data, tmp_aux);
}
}
}
+void __clib_section (".vlib_buffer_enqueue_to_next_fn")
+CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_next_fn)
+(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 *nexts,
+ uword count)
+{
+ vlib_buffer_enqueue_to_next_fn_inline (vm, node, buffers, NULL, nexts, count,
+ 0 /* maybe_aux */);
+}
+
CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_next_fn);
-void __clib_section (".vlib_buffer_enqueue_to_single_next_fn")
-CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_single_next_fn)
-(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 next_index,
- u32 count)
+void __clib_section (".vlib_buffer_enqueue_to_next_with_aux_fn")
+CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_next_with_aux_fn)
+(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u32 *aux_data,
+ u16 *nexts, uword count)
{
- u32 *to_next, n_left_to_next, n_enq;
+ vlib_buffer_enqueue_to_next_fn_inline (vm, node, buffers, aux_data, nexts,
+ count, 1 /* maybe_aux */);
+}
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_next_with_aux_fn);
+
+static_always_inline void
+vlib_buffer_enqueue_to_single_next_fn_inline (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ u32 *buffers, u32 *aux_data,
+ u16 next_index, u32 count,
+ u8 with_aux)
+{
+ u32 *to_next, *to_next_aux, n_left_to_next, n_enq;
+
+ if (with_aux)
+ vlib_get_next_frame_with_aux (vm, node, next_index, to_next, to_next_aux,
+ n_left_to_next);
+ else
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
if (PREDICT_TRUE (n_left_to_next >= count))
{
vlib_buffer_copy_indices (to_next, buffers, count);
+ if (with_aux)
+ vlib_buffer_copy_indices (to_next_aux, aux_data, count);
n_left_to_next -= count;
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
return;
@@ -152,22 +211,49 @@ CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_single_next_fn)
n_enq = n_left_to_next;
next:
vlib_buffer_copy_indices (to_next, buffers, n_enq);
+ if (with_aux)
+ vlib_buffer_copy_indices (to_next_aux, aux_data, n_enq);
n_left_to_next -= n_enq;
if (PREDICT_FALSE (count > n_enq))
{
count -= n_enq;
buffers += n_enq;
+ if (with_aux)
+ aux_data += n_enq;
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ if (with_aux)
+ vlib_get_next_frame_with_aux (vm, node, next_index, to_next,
+ to_next_aux, n_left_to_next);
+ else
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
n_enq = clib_min (n_left_to_next, count);
goto next;
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
+
+void __clib_section (".vlib_buffer_enqueue_to_single_next_fn")
+CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_single_next_fn)
+(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 next_index,
+ u32 count)
+{
+ vlib_buffer_enqueue_to_single_next_fn_inline (
+ vm, node, buffers, NULL, next_index, count, 0 /* with_aux */);
+}
CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_single_next_fn);
+void __clib_section (".vlib_buffer_enqueue_to_single_next_with_aux_fn")
+CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_single_next_with_aux_fn)
+(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u32 *aux_data,
+ u16 next_index, u32 count)
+{
+ vlib_buffer_enqueue_to_single_next_fn_inline (
+ vm, node, buffers, aux_data, next_index, count, 1 /* with_aux */);
+}
+CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_single_next_with_aux_fn);
+
static inline vlib_frame_queue_elt_t *
vlib_get_frame_queue_elt (vlib_frame_queue_main_t *fqm, u32 index,
int dont_wait)
@@ -175,7 +261,7 @@ vlib_get_frame_queue_elt (vlib_frame_queue_main_t *fqm, u32 index,
vlib_frame_queue_t *fq;
u64 nelts, tail, new_tail;
- fq = fqm->vlib_frame_queues[index];
+ fq = vec_elt (fqm->vlib_frame_queues, index);
ASSERT (fq);
nelts = fq->nelts;
@@ -205,11 +291,11 @@ vlib_buffer_enqueue_to_thread_inline (vlib_main_t *vm,
vlib_node_runtime_t *node,
vlib_frame_queue_main_t *fqm,
u32 *buffer_indices, u16 *thread_indices,
- u32 n_packets, int drop_on_congestion)
+ u32 n_packets, int drop_on_congestion,
+ int with_aux, u32 *aux_data)
{
u32 drop_list[VLIB_FRAME_SIZE], n_drop = 0;
- u64 used_elts[VLIB_FRAME_SIZE / 64] = {};
- u64 mask[VLIB_FRAME_SIZE / 64];
+ vlib_frame_bitmap_t mask, used_elts = {};
vlib_frame_queue_elt_t *hf = 0;
u16 thread_index;
u32 n_comp, off = 0, n_left = n_packets;
@@ -222,6 +308,9 @@ more:
n_comp = clib_compress_u32 (hf ? hf->buffer_index : drop_list + n_drop,
buffer_indices, mask, n_packets);
+ if (with_aux)
+ clib_compress_u32 (hf ? hf->aux_data : drop_list + n_drop, aux_data, mask,
+ n_packets);
if (hf)
{
@@ -238,8 +327,7 @@ more:
if (n_left)
{
- for (int i = 0; i < ARRAY_LEN (used_elts); i++)
- used_elts[i] |= mask[i];
+ vlib_frame_bitmap_or (used_elts, mask);
while (PREDICT_FALSE (used_elts[off] == ~0))
{
@@ -274,7 +362,7 @@ CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_fn)
{
n_enq += vlib_buffer_enqueue_to_thread_inline (
vm, node, fqm, buffer_indices, thread_indices, VLIB_FRAME_SIZE,
- drop_on_congestion);
+ drop_on_congestion, 0 /* with_aux */, NULL);
buffer_indices += VLIB_FRAME_SIZE;
thread_indices += VLIB_FRAME_SIZE;
n_packets -= VLIB_FRAME_SIZE;
@@ -283,24 +371,58 @@ CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_fn)
if (n_packets == 0)
return n_enq;
- n_enq += vlib_buffer_enqueue_to_thread_inline (vm, node, fqm, buffer_indices,
- thread_indices, n_packets,
- drop_on_congestion);
+ n_enq += vlib_buffer_enqueue_to_thread_inline (
+ vm, node, fqm, buffer_indices, thread_indices, n_packets,
+ drop_on_congestion, 0 /* with_aux */, NULL);
+
+ return n_enq;
+}
+
+u32 __clib_section (".vlib_buffer_enqueue_to_thread_with_aux_fn")
+CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_with_aux_fn)
+(vlib_main_t *vm, vlib_node_runtime_t *node, u32 frame_queue_index,
+ u32 *buffer_indices, u32 *aux, u16 *thread_indices, u32 n_packets,
+ int drop_on_congestion)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_frame_queue_main_t *fqm;
+ u32 n_enq = 0;
+
+ fqm = vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
+
+ while (n_packets >= VLIB_FRAME_SIZE)
+ {
+ n_enq += vlib_buffer_enqueue_to_thread_inline (
+ vm, node, fqm, buffer_indices, thread_indices, VLIB_FRAME_SIZE,
+ drop_on_congestion, 1 /* with_aux */, aux);
+ buffer_indices += VLIB_FRAME_SIZE;
+ thread_indices += VLIB_FRAME_SIZE;
+ n_packets -= VLIB_FRAME_SIZE;
+ }
+
+ if (n_packets == 0)
+ return n_enq;
+
+ n_enq += vlib_buffer_enqueue_to_thread_inline (
+ vm, node, fqm, buffer_indices, thread_indices, n_packets,
+ drop_on_congestion, 1 /* with_aux */, aux);
return n_enq;
}
CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_thread_fn);
+CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_thread_with_aux_fn);
-u32 __clib_section (".vlib_frame_queue_dequeue_fn")
-CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
-(vlib_main_t *vm, vlib_frame_queue_main_t *fqm)
+static_always_inline u32
+vlib_frame_queue_dequeue_inline (vlib_main_t *vm, vlib_frame_queue_main_t *fqm,
+ u8 with_aux)
{
u32 thread_id = vm->thread_index;
vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id];
u32 mask = fq->nelts - 1;
vlib_frame_queue_elt_t *elt;
- u32 n_free, n_copy, *from, *to = 0, processed = 0, vectors = 0;
+ u32 n_free, n_copy, *from, *from_aux, *to = 0, *to_aux = 0, processed = 0,
+ vectors = 0;
vlib_frame_t *f = 0;
ASSERT (fq);
@@ -357,13 +479,16 @@ CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
break;
from = elt->buffer_index + elt->offset;
-
+ if (with_aux)
+ from_aux = elt->aux_data + elt->offset;
ASSERT (elt->offset + elt->n_vectors <= VLIB_FRAME_SIZE);
if (f == 0)
{
f = vlib_get_frame_to_node (vm, fqm->node_index);
to = vlib_frame_vector_args (f);
+ if (with_aux)
+ to_aux = vlib_frame_aux_args (f);
n_free = VLIB_FRAME_SIZE;
}
@@ -374,6 +499,12 @@ CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
vlib_buffer_copy_indices (to, from, n_copy);
to += n_copy;
+ if (with_aux)
+ {
+ vlib_buffer_copy_indices (to_aux, from_aux, n_copy);
+ to_aux += n_copy;
+ }
+
n_free -= n_copy;
vectors += n_copy;
@@ -413,8 +544,24 @@ CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
return processed;
}
+u32 __clib_section (".vlib_frame_queue_dequeue_fn")
+CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
+(vlib_main_t *vm, vlib_frame_queue_main_t *fqm)
+{
+ return vlib_frame_queue_dequeue_inline (vm, fqm, 0 /* with_aux */);
+}
+
CLIB_MARCH_FN_REGISTRATION (vlib_frame_queue_dequeue_fn);
+u32 __clib_section (".vlib_frame_queue_dequeue_with_aux_fn")
+CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_with_aux_fn)
+(vlib_main_t *vm, vlib_frame_queue_main_t *fqm)
+{
+ return vlib_frame_queue_dequeue_inline (vm, fqm, 1 /* with_aux */);
+}
+
+CLIB_MARCH_FN_REGISTRATION (vlib_frame_queue_dequeue_with_aux_fn);
+
#ifndef CLIB_MARCH_VARIANT
vlib_buffer_func_main_t vlib_buffer_func_main;
@@ -424,12 +571,16 @@ vlib_buffer_funcs_init (vlib_main_t *vm)
vlib_buffer_func_main_t *bfm = &vlib_buffer_func_main;
bfm->buffer_enqueue_to_next_fn =
CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_next_fn);
+ bfm->buffer_enqueue_to_next_with_aux_fn =
+ CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_next_with_aux_fn);
bfm->buffer_enqueue_to_single_next_fn =
CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_single_next_fn);
+ bfm->buffer_enqueue_to_single_next_with_aux_fn =
+ CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_single_next_with_aux_fn);
bfm->buffer_enqueue_to_thread_fn =
CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_thread_fn);
- bfm->frame_queue_dequeue_fn =
- CLIB_MARCH_FN_POINTER (vlib_frame_queue_dequeue_fn);
+ bfm->buffer_enqueue_to_thread_with_aux_fn =
+ CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_thread_with_aux_fn);
return 0;
}
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
index 8b8a3911776..010289ce2be 100644
--- a/src/vlib/buffer_funcs.h
+++ b/src/vlib/buffer_funcs.h
@@ -42,6 +42,7 @@
#include <vppinfra/hash.h>
#include <vppinfra/fifo.h>
+#include <vppinfra/vector/index_to_ptr.h>
#include <vlib/buffer.h>
#include <vlib/physmem_funcs.h>
#include <vlib/main.h>
@@ -55,24 +56,38 @@ typedef void (vlib_buffer_enqueue_to_next_fn_t) (vlib_main_t *vm,
vlib_node_runtime_t *node,
u32 *buffers, u16 *nexts,
uword count);
+typedef void (vlib_buffer_enqueue_to_next_with_aux_fn_t) (
+ vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u32 *aux_data,
+ u16 *nexts, uword count);
typedef void (vlib_buffer_enqueue_to_single_next_fn_t) (
vlib_main_t *vm, vlib_node_runtime_t *node, u32 *ers, u16 next_index,
u32 count);
+typedef void (vlib_buffer_enqueue_to_single_next_with_aux_fn_t) (
+ vlib_main_t *vm, vlib_node_runtime_t *node, u32 *ers, u32 *aux_data,
+ u16 next_index, u32 count);
+
typedef u32 (vlib_buffer_enqueue_to_thread_fn_t) (
vlib_main_t *vm, vlib_node_runtime_t *node, u32 frame_queue_index,
u32 *buffer_indices, u16 *thread_indices, u32 n_packets,
int drop_on_congestion);
-typedef u32 (vlib_frame_queue_dequeue_fn_t) (vlib_main_t *vm,
- vlib_frame_queue_main_t *fqm);
+typedef u32 (vlib_buffer_enqueue_to_thread_with_aux_fn_t) (
+ vlib_main_t *vm, vlib_node_runtime_t *node, u32 frame_queue_index,
+ u32 *buffer_indices, u32 *aux, u16 *thread_indices, u32 n_packets,
+ int drop_on_congestion);
typedef struct
{
vlib_buffer_enqueue_to_next_fn_t *buffer_enqueue_to_next_fn;
+ vlib_buffer_enqueue_to_next_with_aux_fn_t
+ *buffer_enqueue_to_next_with_aux_fn;
vlib_buffer_enqueue_to_single_next_fn_t *buffer_enqueue_to_single_next_fn;
+ vlib_buffer_enqueue_to_single_next_with_aux_fn_t
+ *buffer_enqueue_to_single_next_with_aux_fn;
vlib_buffer_enqueue_to_thread_fn_t *buffer_enqueue_to_thread_fn;
- vlib_frame_queue_dequeue_fn_t *frame_queue_dequeue_fn;
+ vlib_buffer_enqueue_to_thread_with_aux_fn_t
+ *buffer_enqueue_to_thread_with_aux_fn;
} vlib_buffer_func_main_t;
extern vlib_buffer_func_main_t vlib_buffer_func_main;
@@ -166,7 +181,6 @@ vlib_buffer_copy_indices_to_ring (u32 * ring, u32 * src, u32 start,
}
}
-STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, template_end, 64);
static_always_inline void
vlib_buffer_copy_template (vlib_buffer_t * b, vlib_buffer_t * bt)
{
@@ -201,102 +215,38 @@ vlib_buffer_pool_get_default_for_numa (vlib_main_t * vm, u32 numa_node)
@param offset - (i32) offset applied to each pointer
*/
static_always_inline void
-vlib_get_buffers_with_offset (vlib_main_t * vm, u32 * bi, void **b, int count,
+vlib_get_buffers_with_offset (vlib_main_t *vm, u32 *bi, void **b, u32 count,
i32 offset)
{
uword buffer_mem_start = vm->buffer_main->buffer_mem_start;
-#ifdef CLIB_HAVE_VEC512
- u64x8 of8 = u64x8_splat (buffer_mem_start + offset);
- u64x4 off = u64x8_extract_lo (of8);
- /* if count is not const, compiler will not unroll while loop
- se we maintain two-in-parallel variant */
- while (count >= 32)
- {
- u64x8 b0 = u64x8_from_u32x8 (u32x8_load_unaligned (bi));
- u64x8 b1 = u64x8_from_u32x8 (u32x8_load_unaligned (bi + 8));
- u64x8 b2 = u64x8_from_u32x8 (u32x8_load_unaligned (bi + 16));
- u64x8 b3 = u64x8_from_u32x8 (u32x8_load_unaligned (bi + 24));
- /* shift and add to get vlib_buffer_t pointer */
- u64x8_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b);
- u64x8_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b + 8);
- u64x8_store_unaligned ((b2 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b + 16);
- u64x8_store_unaligned ((b3 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b + 24);
- b += 32;
- bi += 32;
- count -= 32;
- }
- while (count >= 8)
- {
- u64x8 b0 = u64x8_from_u32x8 (u32x8_load_unaligned (bi));
- /* shift and add to get vlib_buffer_t pointer */
- u64x8_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b);
- b += 8;
- bi += 8;
- count -= 8;
- }
-#elif defined CLIB_HAVE_VEC256
- u64x4 off = u64x4_splat (buffer_mem_start + offset);
- /* if count is not const, compiler will not unroll while loop
- se we maintain two-in-parallel variant */
- while (count >= 32)
- {
- u64x4 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (bi));
- u64x4 b1 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 4));
- u64x4 b2 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 8));
- u64x4 b3 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 12));
- u64x4 b4 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 16));
- u64x4 b5 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 20));
- u64x4 b6 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 24));
- u64x4 b7 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 28));
- /* shift and add to get vlib_buffer_t pointer */
- u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b);
- u64x4_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 4);
- u64x4_store_unaligned ((b2 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 8);
- u64x4_store_unaligned ((b3 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 12);
- u64x4_store_unaligned ((b4 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 16);
- u64x4_store_unaligned ((b5 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 20);
- u64x4_store_unaligned ((b6 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 24);
- u64x4_store_unaligned ((b7 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 28);
- b += 32;
- bi += 32;
- count -= 32;
- }
-#endif
- while (count >= 4)
- {
-#ifdef CLIB_HAVE_VEC256
- u64x4 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (bi));
- /* shift and add to get vlib_buffer_t pointer */
- u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b);
-#elif defined (CLIB_HAVE_VEC128)
- u64x2 off = u64x2_splat (buffer_mem_start + offset);
- u32x4 bi4 = u32x4_load_unaligned (bi);
- u64x2 b0 = u64x2_from_u32x4 ((u32x4) bi4);
-#if defined (__aarch64__)
- u64x2 b1 = u64x2_from_u32x4_high ((u32x4) bi4);
-#else
- bi4 = u32x4_shuffle (bi4, 2, 3, 0, 1);
- u64x2 b1 = u64x2_from_u32x4 ((u32x4) bi4);
-#endif
- u64x2_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b);
- u64x2_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 2);
-#else
- b[0] = vlib_buffer_ptr_from_index (buffer_mem_start, bi[0], offset);
- b[1] = vlib_buffer_ptr_from_index (buffer_mem_start, bi[1], offset);
- b[2] = vlib_buffer_ptr_from_index (buffer_mem_start, bi[2], offset);
- b[3] = vlib_buffer_ptr_from_index (buffer_mem_start, bi[3], offset);
-#endif
- b += 4;
- bi += 4;
- count -= 4;
- }
- while (count)
+ void *base = (void *) (buffer_mem_start + offset);
+ int objsize = __builtin_object_size (b, 0);
+ const int sh = CLIB_LOG2_CACHE_LINE_BYTES;
+
+ if (COMPILE_TIME_CONST (count) == 0 && objsize >= 64 * sizeof (b[0]) &&
+ (objsize & ((8 * sizeof (b[0])) - 1)) == 0)
{
- b[0] = vlib_buffer_ptr_from_index (buffer_mem_start, bi[0], offset);
- b += 1;
- bi += 1;
- count -= 1;
+ u32 n = round_pow2 (count, 8);
+ ASSERT (objsize >= count);
+ CLIB_ASSUME (objsize >= count);
+ while (n >= 64)
+ {
+ clib_index_to_ptr_u32 (bi, base, sh, b, 64);
+ b += 64;
+ bi += 64;
+ n -= 64;
+ }
+
+ while (n)
+ {
+ clib_index_to_ptr_u32 (bi, base, sh, b, 8);
+ b += 8;
+ bi += 8;
+ n -= 8;
+ }
}
+ else
+ clib_index_to_ptr_u32 (bi, base, sh, b, count);
}
/** \brief Translate array of buffer indices into buffer pointers
@@ -308,7 +258,7 @@ vlib_get_buffers_with_offset (vlib_main_t * vm, u32 * bi, void **b, int count,
*/
static_always_inline void
-vlib_get_buffers (vlib_main_t * vm, u32 * bi, vlib_buffer_t ** b, int count)
+vlib_get_buffers (vlib_main_t *vm, u32 *bi, vlib_buffer_t **b, u32 count)
{
vlib_get_buffers_with_offset (vm, bi, (void **) b, count, 0);
}
@@ -626,11 +576,7 @@ vlib_buffer_alloc_from_pool (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
src = bpt->cached_buffers + len - n_buffers;
vlib_buffer_copy_indices (dst, src, n_buffers);
bpt->n_cached -= n_buffers;
-
- if (CLIB_DEBUG > 0)
- vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
- VLIB_BUFFER_KNOWN_FREE);
- return n_buffers;
+ goto done;
}
/* alloc bigger than cache - take buffers directly from main pool */
@@ -638,11 +584,7 @@ vlib_buffer_alloc_from_pool (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
{
n_buffers = vlib_buffer_pool_get (vm, buffer_pool_index, buffers,
n_buffers);
-
- if (CLIB_DEBUG > 0)
- vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
- VLIB_BUFFER_KNOWN_FREE);
- return n_buffers;
+ goto done;
}
/* take everything available in the cache */
@@ -670,11 +612,13 @@ vlib_buffer_alloc_from_pool (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
n_buffers -= n_left;
+done:
/* Verify that buffers are known free. */
if (CLIB_DEBUG > 0)
vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
VLIB_BUFFER_KNOWN_FREE);
-
+ if (PREDICT_FALSE (bm->alloc_callback_fn != 0))
+ bm->alloc_callback_fn (vm, buffer_pool_index, buffers, n_buffers);
return n_buffers;
}
@@ -776,6 +720,7 @@ static_always_inline void
vlib_buffer_pool_put (vlib_main_t * vm, u8 buffer_pool_index,
u32 * buffers, u32 n_buffers)
{
+ vlib_buffer_main_t *bm = vm->buffer_main;
vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index);
vlib_buffer_pool_thread_t *bpt = vec_elt_at_index (bp->threads,
vm->thread_index);
@@ -784,6 +729,8 @@ vlib_buffer_pool_put (vlib_main_t * vm, u8 buffer_pool_index,
if (CLIB_DEBUG > 0)
vlib_buffer_validate_alloc_free (vm, buffers, n_buffers,
VLIB_BUFFER_KNOWN_ALLOCATED);
+ if (PREDICT_FALSE (bm->free_callback_fn != 0))
+ bm->free_callback_fn (vm, buffer_pool_index, buffers, n_buffers);
n_cached = bpt->n_cached;
n_empty = VLIB_BUFFER_POOL_PER_THREAD_CACHE_SZ - n_cached;
@@ -806,6 +753,23 @@ vlib_buffer_pool_put (vlib_main_t * vm, u8 buffer_pool_index,
clib_spinlock_unlock (&bp->lock);
}
+/** \brief return unused buffers back to pool
+ This function can be used to return buffers back to pool without going
+ through vlib_buffer_free. Buffer metadata must not be modified in any
+ way before buffers are returned.
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffers - (u32 * ) buffer index array
+ @param n_buffers - (u32) number of buffers to free
+ @param buffer_pool_index - (u8) buffer pool index
+*/
+always_inline void
+vlib_buffer_unalloc_to_pool (vlib_main_t *vm, u32 *buffers, u32 n_buffers,
+ u8 buffer_pool_index)
+{
+ vlib_buffer_pool_put (vm, buffer_pool_index, buffers, n_buffers);
+}
+
static_always_inline void
vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
int maybe_next)
@@ -813,8 +777,8 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
const int queue_size = 128;
vlib_buffer_pool_t *bp = 0;
u8 buffer_pool_index = ~0;
- u32 n_queue = 0, queue[queue_size + 4];
- vlib_buffer_t bt = { };
+ u32 n_queue = 0, queue[queue_size + 8];
+ vlib_buffer_template_t bt = {};
#if defined(CLIB_HAVE_VEC128)
vlib_buffer_t bpi_mask = {.buffer_pool_index = ~0 };
vlib_buffer_t bpi_vec = {};
@@ -830,7 +794,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
vlib_buffer_t *b = vlib_get_buffer (vm, buffers[0]);
buffer_pool_index = b->buffer_pool_index;
bp = vlib_get_buffer_pool (vm, buffer_pool_index);
- vlib_buffer_copy_template (&bt, &bp->buffer_template);
+ bt = bp->buffer_template;
#if defined(CLIB_HAVE_VEC128)
bpi_vec.buffer_pool_index = buffer_pool_index;
#endif
@@ -840,9 +804,16 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
vlib_buffer_t *b[8];
u32 bi, sum = 0, flags, next;
+#if defined(CLIB_HAVE_VEC512)
+ if (n_buffers < 8)
+#else
if (n_buffers < 4)
+#endif
goto one_by_one;
+#if defined(CLIB_HAVE_VEC512)
+ vlib_get_buffers (vm, buffers, b, 8);
+#else
vlib_get_buffers (vm, buffers, b, 4);
if (n_buffers >= 12)
@@ -853,8 +824,33 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
vlib_prefetch_buffer_header (b[6], LOAD);
vlib_prefetch_buffer_header (b[7], LOAD);
}
+#endif
-#if defined(CLIB_HAVE_VEC128)
+#if defined(CLIB_HAVE_VEC512)
+ u8x16 p0, p1, p2, p3, p4, p5, p6, p7, r;
+ p0 = u8x16_load_unaligned (b[0]);
+ p1 = u8x16_load_unaligned (b[1]);
+ p2 = u8x16_load_unaligned (b[2]);
+ p3 = u8x16_load_unaligned (b[3]);
+ p4 = u8x16_load_unaligned (b[4]);
+ p5 = u8x16_load_unaligned (b[5]);
+ p6 = u8x16_load_unaligned (b[6]);
+ p7 = u8x16_load_unaligned (b[7]);
+
+ r = p0 ^ bpi_vec.as_u8x16[0];
+ r |= p1 ^ bpi_vec.as_u8x16[0];
+ r |= p2 ^ bpi_vec.as_u8x16[0];
+ r |= p3 ^ bpi_vec.as_u8x16[0];
+ r |= p4 ^ bpi_vec.as_u8x16[0];
+ r |= p5 ^ bpi_vec.as_u8x16[0];
+ r |= p6 ^ bpi_vec.as_u8x16[0];
+ r |= p7 ^ bpi_vec.as_u8x16[0];
+ r &= bpi_mask.as_u8x16[0];
+ r |=
+ (p0 | p1 | p2 | p3 | p4 | p5 | p6 | p7) & flags_refs_mask.as_u8x16[0];
+
+ sum = !u8x16_is_all_zero (r);
+#elif defined(CLIB_HAVE_VEC128)
u8x16 p0, p1, p2, p3, r;
p0 = u8x16_load_unaligned (b[0]);
p1 = u8x16_load_unaligned (b[1]);
@@ -888,11 +884,41 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
if (sum)
goto one_by_one;
+#if defined(CLIB_HAVE_VEC512)
+ vlib_buffer_copy_indices (queue + n_queue, buffers, 8);
+ b[0]->template = bt;
+ b[1]->template = bt;
+ b[2]->template = bt;
+ b[3]->template = bt;
+ b[4]->template = bt;
+ b[5]->template = bt;
+ b[6]->template = bt;
+ b[7]->template = bt;
+ n_queue += 8;
+
+ vlib_buffer_validate (vm, b[0]);
+ vlib_buffer_validate (vm, b[1]);
+ vlib_buffer_validate (vm, b[2]);
+ vlib_buffer_validate (vm, b[3]);
+ vlib_buffer_validate (vm, b[4]);
+ vlib_buffer_validate (vm, b[5]);
+ vlib_buffer_validate (vm, b[6]);
+ vlib_buffer_validate (vm, b[7]);
+
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[1]);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[2]);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[3]);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[4]);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[5]);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[6]);
+ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[7]);
+#else
vlib_buffer_copy_indices (queue + n_queue, buffers, 4);
- vlib_buffer_copy_template (b[0], &bt);
- vlib_buffer_copy_template (b[1], &bt);
- vlib_buffer_copy_template (b[2], &bt);
- vlib_buffer_copy_template (b[3], &bt);
+ b[0]->template = bt;
+ b[1]->template = bt;
+ b[2]->template = bt;
+ b[3]->template = bt;
n_queue += 4;
vlib_buffer_validate (vm, b[0]);
@@ -904,14 +930,20 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[1]);
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[2]);
VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[3]);
+#endif
if (n_queue >= queue_size)
{
vlib_buffer_pool_put (vm, buffer_pool_index, queue, n_queue);
n_queue = 0;
}
+#if defined(CLIB_HAVE_VEC512)
+ buffers += 8;
+ n_buffers -= 8;
+#else
buffers += 4;
n_buffers -= 4;
+#endif
continue;
one_by_one:
@@ -936,7 +968,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
bpi_vec.buffer_pool_index = buffer_pool_index;
#endif
bp = vlib_get_buffer_pool (vm, buffer_pool_index);
- vlib_buffer_copy_template (&bt, &bp->buffer_template);
+ bt = bp->buffer_template;
}
vlib_buffer_validate (vm, b[0]);
@@ -945,7 +977,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
if (clib_atomic_sub_fetch (&b[0]->ref_count, 1) == 0)
{
- vlib_buffer_copy_template (b[0], &bt);
+ b[0]->template = bt;
queue[n_queue++] = bi;
}
@@ -1468,139 +1500,148 @@ vlib_buffer_space_left_at_end (vlib_main_t * vm, vlib_buffer_t * b)
((u8 *) vlib_buffer_get_current (b) + b->current_length);
}
+#define VLIB_BUFFER_LINEARIZE_MAX 64
+
always_inline u32
vlib_buffer_chain_linearize (vlib_main_t * vm, vlib_buffer_t * b)
{
- vlib_buffer_t *db = b, *sb, *first = b;
- int is_cloned = 0;
- u32 bytes_left = 0, data_size;
- u16 src_left, dst_left, n_buffers = 1;
- u8 *dp, *sp;
- u32 to_free = 0;
+ vlib_buffer_t *dst_b;
+ u32 n_buffers = 1, to_free = 0;
+ u16 rem_len, dst_len, data_size, src_len = 0;
+ u8 *dst, *src = 0;
if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0))
return 1;
+ ASSERT (1 == b->ref_count);
+ if (PREDICT_FALSE (1 != b->ref_count))
+ return 0;
+
data_size = vlib_buffer_get_default_data_size (vm);
+ rem_len = vlib_buffer_length_in_chain (vm, b) - b->current_length;
- dst_left = vlib_buffer_space_left_at_end (vm, b);
+ dst_b = b;
+ dst = vlib_buffer_get_tail (dst_b);
+ dst_len = vlib_buffer_space_left_at_end (vm, dst_b);
- while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
- {
- b = vlib_get_buffer (vm, b->next_buffer);
- if (b->ref_count > 1)
- is_cloned = 1;
- bytes_left += b->current_length;
- n_buffers++;
- }
+ b->total_length_not_including_first_buffer -= dst_len;
- /* if buffer is cloned, create completely new chain - unless everything fits
- * into one buffer */
- if (is_cloned && bytes_left >= dst_left)
+ while (rem_len > 0)
{
- u32 len = 0;
- u32 space_needed = bytes_left - dst_left;
- u32 tail;
+ u16 copy_len;
- if (vlib_buffer_alloc (vm, &tail, 1) == 0)
- return 0;
-
- ++n_buffers;
- len += data_size;
- b = vlib_get_buffer (vm, tail);
-
- while (len < space_needed)
+ while (0 == src_len)
{
- u32 bi;
- if (vlib_buffer_alloc (vm, &bi, 1) == 0)
- {
- vlib_buffer_free_one (vm, tail);
- return 0;
- }
- b->flags = VLIB_BUFFER_NEXT_PRESENT;
- b->next_buffer = bi;
- b = vlib_get_buffer (vm, bi);
- len += data_size;
- n_buffers++;
+ ASSERT (b->flags & VLIB_BUFFER_NEXT_PRESENT);
+ if (PREDICT_FALSE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)))
+ break; /* malformed chained buffer */
+
+ b = vlib_get_buffer (vm, b->next_buffer);
+ src = vlib_buffer_get_current (b);
+ src_len = b->current_length;
}
- sb = vlib_get_buffer (vm, first->next_buffer);
- to_free = first->next_buffer;
- first->next_buffer = tail;
- }
- else
- sb = vlib_get_buffer (vm, first->next_buffer);
- src_left = sb->current_length;
- sp = vlib_buffer_get_current (sb);
- dp = vlib_buffer_get_tail (db);
+ if (0 == dst_len)
+ {
+ ASSERT (dst_b->flags & VLIB_BUFFER_NEXT_PRESENT);
+ if (PREDICT_FALSE (!(dst_b->flags & VLIB_BUFFER_NEXT_PRESENT)))
+ break; /* malformed chained buffer */
- while (bytes_left)
- {
- u16 bytes_to_copy;
+ vlib_buffer_t *next_dst_b = vlib_get_buffer (vm, dst_b->next_buffer);
- if (dst_left == 0)
- {
- db->current_length = dp - (u8 *) vlib_buffer_get_current (db);
- ASSERT (db->flags & VLIB_BUFFER_NEXT_PRESENT);
- db = vlib_get_buffer (vm, db->next_buffer);
- dst_left = data_size;
- if (db->current_data > 0)
+ if (PREDICT_TRUE (1 == next_dst_b->ref_count))
{
- db->current_data = 0;
+ /* normal case: buffer is not cloned, just use it */
+ dst_b = next_dst_b;
}
else
{
- dst_left += -db->current_data;
+ /* cloned buffer, build a new dest chain from there */
+ vlib_buffer_t *bufs[VLIB_BUFFER_LINEARIZE_MAX];
+ u32 bis[VLIB_BUFFER_LINEARIZE_MAX + 1];
+ const int n = (rem_len + data_size - 1) / data_size;
+ int n_alloc;
+ int i;
+
+ ASSERT (n <= VLIB_BUFFER_LINEARIZE_MAX);
+ if (PREDICT_FALSE (n > VLIB_BUFFER_LINEARIZE_MAX))
+ return 0;
+
+ n_alloc = vlib_buffer_alloc (vm, bis, n);
+ if (PREDICT_FALSE (n_alloc != n))
+ {
+ vlib_buffer_free (vm, bis, n_alloc);
+ return 0;
+ }
+
+ vlib_get_buffers (vm, bis, bufs, n);
+
+ for (i = 0; i < n - 1; i++)
+ {
+ bufs[i]->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ bufs[i]->next_buffer = bis[i + 1];
+ }
+
+ to_free = dst_b->next_buffer;
+ dst_b->next_buffer = bis[0];
+ dst_b = bufs[0];
}
- dp = vlib_buffer_get_current (db);
- }
- while (src_left == 0)
- {
- ASSERT (sb->flags & VLIB_BUFFER_NEXT_PRESENT);
- sb = vlib_get_buffer (vm, sb->next_buffer);
- src_left = sb->current_length;
- sp = vlib_buffer_get_current (sb);
+ n_buffers++;
+
+ dst_b->current_data = clib_min (0, dst_b->current_data);
+ dst_b->current_length = 0;
+
+ dst = dst_b->data + dst_b->current_data;
+ dst_len = data_size - dst_b->current_data;
}
- bytes_to_copy = clib_min (dst_left, src_left);
+ copy_len = clib_min (src_len, dst_len);
- if (dp != sp)
+ if (PREDICT_TRUE (src == dst))
{
- if (sb == db)
- bytes_to_copy = clib_min (bytes_to_copy, sp - dp);
-
- clib_memcpy_fast (dp, sp, bytes_to_copy);
+ /* nothing to do */
+ }
+ else if (src + copy_len > dst && dst + copy_len > src)
+ {
+ /* src and dst overlap */
+ ASSERT (b == dst_b);
+ memmove (dst, src, copy_len);
+ }
+ else
+ {
+ clib_memcpy_fast (dst, src, copy_len);
}
- src_left -= bytes_to_copy;
- dst_left -= bytes_to_copy;
- dp += bytes_to_copy;
- sp += bytes_to_copy;
- bytes_left -= bytes_to_copy;
+ dst_b->current_length += copy_len;
+
+ dst += copy_len;
+ src += copy_len;
+ dst_len -= copy_len;
+ src_len -= copy_len;
+ rem_len -= copy_len;
}
- if (db != first)
- db->current_data = 0;
- db->current_length = dp - (u8 *) vlib_buffer_get_current (db);
- if (is_cloned && to_free)
+ /* in case of a malformed chain buffer, we'll exit early from the loop. */
+ ASSERT (0 == rem_len);
+ b->total_length_not_including_first_buffer -= rem_len;
+
+ if (to_free)
vlib_buffer_free_one (vm, to_free);
- else
+
+ if (dst_b->flags & VLIB_BUFFER_NEXT_PRESENT)
{
- if (db->flags & VLIB_BUFFER_NEXT_PRESENT)
- vlib_buffer_free_one (vm, db->next_buffer);
- db->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
- b = first;
- n_buffers = 1;
- while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ /* the resulting chain is smaller than the original, cut it there */
+ dst_b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+ vlib_buffer_free_one (vm, dst_b->next_buffer);
+ if (1 == n_buffers)
{
- b = vlib_get_buffer (vm, b->next_buffer);
- ++n_buffers;
+ /* no longer a chained buffer */
+ dst_b->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ dst_b->total_length_not_including_first_buffer = 0;
}
}
- first->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
-
return n_buffers;
}
diff --git a/src/vlib/buffer_node.h b/src/vlib/buffer_node.h
index 9ca43d425fc..c0268b21562 100644
--- a/src/vlib/buffer_node.h
+++ b/src/vlib/buffer_node.h
@@ -236,6 +236,53 @@ do { \
} \
} while (0)
+/** \brief Finish enqueueing one buffer forward in the graph, along with its
+ aux_data if possible. Standard single loop boilerplate element. This is a
+ MACRO, with MULTIPLE SIDE EFFECTS. In the ideal case, <code>next_index ==
+ next0</code>, which means that the speculative enqueue at the top of the
+ single loop has correctly dealt with the packet in hand. In that case, the
+ macro does nothing at all. This function MAY return to_next_aux = NULL if
+ next_index does not support aux data
+
+ @param vm vlib_main_t pointer, varies by thread
+ @param node current node vlib_node_runtime_t pointer
+ @param next_index speculated next index used for both packets
+ @param to_next speculated vector pointer used for both packets
+ @param to_next_aux speculated aux_data pointer used for both packets
+ @param n_left_to_next number of slots left in speculated vector
+ @param bi0 first buffer index
+ @param aux0 first aux_data
+ @param next0 actual next index to be used for the first packet
+
+ @return @c next_index -- speculative next index to be used for future packets
+ @return @c to_next -- speculative frame to be used for future packets
+ @return @c n_left_to_next -- number of slots left in speculative frame
+*/
+#define vlib_validate_buffer_enqueue_with_aux_x1( \
+ vm, node, next_index, to_next, to_next_aux, n_left_to_next, bi0, aux0, \
+ next0) \
+ do \
+ { \
+ ASSERT (bi0 != 0); \
+ if (PREDICT_FALSE (next0 != next_index)) \
+ { \
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1); \
+ next_index = next0; \
+ vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next, \
+ to_next_aux, n_left_to_next); \
+ \
+ to_next[0] = bi0; \
+ to_next += 1; \
+ if (to_next_aux) \
+ { \
+ to_next_aux[0] = aux0; \
+ to_next_aux += 1; \
+ } \
+ n_left_to_next -= 1; \
+ } \
+ } \
+ while (0)
+
always_inline uword
generic_buffer_node_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -335,6 +382,17 @@ generic_buffer_node_inline (vlib_main_t * vm,
return frame->n_vectors;
}
+/* Minimum size for the 'buffers' and 'nexts' arrays to be used when calling
+ * vlib_buffer_enqueue_to_next().
+ * Because of optimizations, vlib_buffer_enqueue_to_next() will access
+ * past 'count' elements in the 'buffers' and 'nexts' arrays, IOW it
+ * will overflow.
+ * Those overflow elements are ignored in the final result so they do not
+ * need to be properly initialized, however if the array is allocated right
+ * before the end of a page and the next page is not mapped, accessing the
+ * overflow elements will trigger a segfault. */
+#define VLIB_BUFFER_ENQUEUE_MIN_SIZE(n) round_pow2 ((n), 64)
+
static_always_inline void
vlib_buffer_enqueue_to_next (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 * buffers, u16 * nexts, uword count)
@@ -345,6 +403,30 @@ vlib_buffer_enqueue_to_next (vlib_main_t * vm, vlib_node_runtime_t * node,
}
static_always_inline void
+vlib_buffer_enqueue_to_next_with_aux (vlib_main_t *vm,
+ vlib_node_runtime_t *node, u32 *buffers,
+ u32 *aux_data, u16 *nexts, uword count)
+{
+ vlib_buffer_enqueue_to_next_with_aux_fn_t *fn;
+ fn = vlib_buffer_func_main.buffer_enqueue_to_next_with_aux_fn;
+ (fn) (vm, node, buffers, aux_data, nexts, count);
+}
+
+static_always_inline void
+vlib_buffer_enqueue_to_next_vec (vlib_main_t *vm, vlib_node_runtime_t *node,
+ u32 **buffers, u16 **nexts, uword count)
+{
+ const u32 bl = vec_len (*buffers), nl = vec_len (*nexts);
+ const u32 c = VLIB_BUFFER_ENQUEUE_MIN_SIZE (count);
+ ASSERT (bl >= count && nl >= count);
+ vec_validate (*buffers, c);
+ vec_validate (*nexts, c);
+ vlib_buffer_enqueue_to_next (vm, node, *buffers, *nexts, count);
+ vec_set_len (*buffers, bl);
+ vec_set_len (*nexts, nl);
+}
+
+static_always_inline void
vlib_buffer_enqueue_to_single_next (vlib_main_t * vm,
vlib_node_runtime_t * node, u32 * buffers,
u16 next_index, u32 count)
@@ -354,6 +436,17 @@ vlib_buffer_enqueue_to_single_next (vlib_main_t * vm,
(fn) (vm, node, buffers, next_index, count);
}
+static_always_inline void
+vlib_buffer_enqueue_to_single_next_with_aux (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ u32 *buffers, u32 *aux_data,
+ u16 next_index, u32 count)
+{
+ vlib_buffer_enqueue_to_single_next_with_aux_fn_t *fn;
+ fn = vlib_buffer_func_main.buffer_enqueue_to_single_next_with_aux_fn;
+ (fn) (vm, node, buffers, aux_data, next_index, count);
+}
+
static_always_inline u32
vlib_buffer_enqueue_to_thread (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 frame_queue_index, u32 *buffer_indices,
@@ -366,6 +459,20 @@ vlib_buffer_enqueue_to_thread (vlib_main_t *vm, vlib_node_runtime_t *node,
n_packets, drop_on_congestion);
}
+static_always_inline u32
+vlib_buffer_enqueue_to_thread_with_aux (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ u32 frame_queue_index,
+ u32 *buffer_indices, u32 *aux,
+ u16 *thread_indices, u32 n_packets,
+ int drop_on_congestion)
+{
+ vlib_buffer_enqueue_to_thread_with_aux_fn_t *fn;
+ fn = vlib_buffer_func_main.buffer_enqueue_to_thread_with_aux_fn;
+ return (fn) (vm, node, frame_queue_index, buffer_indices, aux,
+ thread_indices, n_packets, drop_on_congestion);
+}
+
#endif /* included_vlib_buffer_node_h */
/*
diff --git a/src/vlib/cli.c b/src/vlib/cli.c
index a00ae6245f9..98d57c6ccb0 100644
--- a/src/vlib/cli.c
+++ b/src/vlib/cli.c
@@ -38,6 +38,7 @@
*/
#include <vlib/vlib.h>
+#include <vlib/stats/stats.h>
#include <vlib/unix/unix.h>
#include <vppinfra/callback.h>
#include <vppinfra/cpu.h>
@@ -54,36 +55,28 @@ int vl_api_get_elog_trace_api_messages (void);
static void *current_traced_heap;
/* Root of all show commands. */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_show_command, static) = {
.path = "show",
.short_help = "Show commands",
};
-/* *INDENT-ON* */
/* Root of all clear commands. */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_clear_command, static) = {
.path = "clear",
.short_help = "Clear commands",
};
-/* *INDENT-ON* */
/* Root of all set commands. */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_set_command, static) = {
.path = "set",
.short_help = "Set commands",
};
-/* *INDENT-ON* */
/* Root of all test commands. */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_test_command, static) = {
.path = "test",
.short_help = "Test commands",
};
-/* *INDENT-ON* */
/* Returns bitmap of commands which match key. */
static uword *
@@ -158,6 +151,64 @@ done:
return match;
}
+uword
+unformat_vlib_cli_line (unformat_input_t *i, va_list *va)
+{
+ unformat_input_t *result = va_arg (*va, unformat_input_t *);
+ u8 *line = 0;
+ uword c;
+ int skip;
+
+next_line:
+ skip = 0;
+
+ /* skip leading whitespace if any */
+ unformat_skip_white_space (i);
+
+ if (unformat_is_eof (i))
+ return 0;
+
+ while ((c = unformat_get_input (i)) != UNFORMAT_END_OF_INPUT)
+ {
+ if (c == '\\')
+ {
+ c = unformat_get_input (i);
+
+ if (c == '\n')
+ {
+ if (!skip)
+ vec_add1 (line, '\n');
+ skip = 0;
+ continue;
+ }
+
+ if (!skip)
+ vec_add1 (line, '\\');
+
+ if (c == UNFORMAT_END_OF_INPUT)
+ break;
+
+ if (!skip)
+ vec_add1 (line, c);
+ continue;
+ }
+
+ if (c == '#')
+ skip = 1;
+ else if (c == '\n')
+ break;
+
+ if (!skip)
+ vec_add1 (line, c);
+ }
+
+ if (line == 0)
+ goto next_line;
+
+ unformat_init_vector (result, line);
+ return 1;
+}
+
/* Looks for string based sub-input formatted { SUB-INPUT }. */
uword
unformat_vlib_cli_sub_input (unformat_input_t * i, va_list * args)
@@ -304,7 +355,6 @@ vlib_cli_get_possible_completions (u8 * str)
/* if we have a space at the end of input, and a unique match,
* autocomplete the next level of subcommands */
help_next_level = (vec_len (str) == 0) || isspace (str[vec_len (str) - 1]);
- /* *INDENT-OFF* */
clib_bitmap_foreach (index, match_bitmap) {
if (help_next_level && is_unique) {
c = get_sub_command (vcm, c, index);
@@ -316,7 +366,6 @@ vlib_cli_get_possible_completions (u8 * str)
sc = &c->sub_commands[index];
vec_add1(result, (u8*) sc->name);
}
- /* *INDENT-ON* */
done:
clib_bitmap_free (match_bitmap);
@@ -566,13 +615,11 @@ vlib_cli_dispatch_sub_commands (vlib_main_t * vm,
{
if (PREDICT_FALSE (vm->elog_trace_cli_commands))
{
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "cli-cmd: %s",
.format_args = "T4",
};
- /* *INDENT-ON* */
struct
{
u32 c;
@@ -598,13 +645,11 @@ vlib_cli_dispatch_sub_commands (vlib_main_t * vm,
if (PREDICT_FALSE (vm->elog_trace_cli_commands))
{
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "cli-cmd: %s %s",
.format_args = "T4T4",
};
- /* *INDENT-ON* */
struct
{
u32 c, err;
@@ -616,7 +661,7 @@ vlib_cli_dispatch_sub_commands (vlib_main_t * vm,
vec_add1 (c_error->what, 0);
ed->err = elog_string (vlib_get_elog_main (),
(char *) c_error->what);
- _vec_len (c_error->what) -= 1;
+ vec_dec_len (c_error->what, 1);
}
else
ed->err = elog_string (vlib_get_elog_main (), "OK");
@@ -753,13 +798,6 @@ vl_msg_pop_heap (void *oldheap)
{
}
-void *vlib_stats_push_heap (void *) __attribute__ ((weak));
-void *
-vlib_stats_push_heap (void *notused)
-{
- return 0;
-}
-
static clib_error_t *
show_memory_usage (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
@@ -820,14 +858,14 @@ show_memory_usage (vlib_main_t * vm,
}
if (stats_segment)
{
- void *oldheap = vlib_stats_push_heap (0);
+ void *oldheap = vlib_stats_set_heap ();
was_enabled = clib_mem_trace_enable_disable (0);
u8 *s_in_svm = format (0, "%U\n", format_clib_mem_heap, 0, 1);
if (oldheap)
clib_mem_set_heap (oldheap);
u8 *s = vec_dup (s_in_svm);
- oldheap = vlib_stats_push_heap (0);
+ oldheap = vlib_stats_set_heap ();
vec_free (s_in_svm);
if (oldheap)
{
@@ -925,14 +963,12 @@ show_memory_usage (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_memory_usage_command, static) = {
.path = "show memory",
.short_help = "show memory [api-segment][stats-segment][verbose]\n"
- " [numa-heaps][map]",
+ " [numa-heaps][map][main-heap]",
.function = show_memory_usage,
};
-/* *INDENT-ON* */
static clib_error_t *
show_cpu (vlib_main_t * vm, unformat_input_t * input,
@@ -959,13 +995,11 @@ show_cpu (vlib_main_t * vm, unformat_input_t * input,
* Base Frequency: 3.20 GHz
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_cpu_command, static) = {
.path = "show cpu",
.short_help = "Show cpu information",
.function = show_cpu,
};
-/* *INDENT-ON* */
static clib_error_t *
enable_disable_memory_trace (vlib_main_t * vm,
@@ -1038,7 +1072,7 @@ enable_disable_memory_trace (vlib_main_t * vm,
/* Stats segment */
if (stats_segment)
{
- oldheap = vlib_stats_push_heap (0);
+ oldheap = vlib_stats_set_heap ();
current_traced_heap = clib_mem_get_heap ();
clib_mem_trace (stats_segment);
/* We don't want to call vlib_stats_pop_heap... */
@@ -1073,14 +1107,12 @@ enable_disable_memory_trace (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (enable_disable_memory_trace_command, static) = {
.path = "memory-trace",
.short_help = "memory-trace on|off [api-segment][stats-segment][main-heap]\n"
" [numa-heap <numa-id>]\n",
.function = enable_disable_memory_trace,
};
-/* *INDENT-ON* */
static clib_error_t *
restart_cmd_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -1094,27 +1126,23 @@ restart_cmd_fn (vlib_main_t * vm, unformat_input_t * input,
extern char **environ;
/* Close all known open files */
- /* *INDENT-OFF* */
pool_foreach (f, fm->file_pool)
{
if (f->file_descriptor > 2)
close(f->file_descriptor);
}
- /* *INDENT-ON* */
/* Exec ourself */
- execve (vgm->name, (char **) vm->argv, environ);
+ execve (vgm->name, (char **) vgm->argv, environ);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (restart_cmd,static) = {
.path = "restart",
.short_help = "restart process",
.function = restart_cmd_fn,
};
-/* *INDENT-ON* */
#ifdef TEST_CODE
/*
@@ -1140,13 +1168,11 @@ sleep_ten_seconds (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ping_command, static) = {
.path = "test sleep",
.function = sleep_ten_seconds,
.short_help = "Sleep for 10 seconds",
};
-/* *INDENT-ON* */
#endif /* ifdef TEST_CODE */
static uword
@@ -1185,7 +1211,7 @@ vlib_cli_normalize_path (char *input, char **result)
/* Remove any extra space at end. */
if (l > 0 && s[l - 1] == ' ')
- _vec_len (s) -= 1;
+ vec_dec_len (s, 1);
*result = s;
return index_of_last_space;
@@ -1597,7 +1623,6 @@ print_status:
* @cliend
* @cliexcmd{event-logger trace [api][cli][barrier][disable]}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (event_logger_trace_command, static) =
{
.path = "event-logger trace",
@@ -1605,7 +1630,6 @@ VLIB_CLI_COMMAND (event_logger_trace_command, static) =
"[circuit-node <name> e.g. ethernet-input][disable]",
.function = event_logger_trace_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
suspend_command_fn (vlib_main_t * vm,
@@ -1615,7 +1639,6 @@ suspend_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (suspend_command, static) =
{
.path = "suspend",
@@ -1623,7 +1646,6 @@ VLIB_CLI_COMMAND (suspend_command, static) =
.function = suspend_command_fn,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static int
@@ -1814,7 +1836,6 @@ show_cli_command_fn (vlib_main_t * vm,
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_cli_command, static) =
{
.path = "show cli",
@@ -1822,7 +1843,6 @@ VLIB_CLI_COMMAND (show_cli_command, static) =
.function = show_cli_command_fn,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
vlib_cli_init (vlib_main_t * vm)
diff --git a/src/vlib/cli.h b/src/vlib/cli.h
index 83b4eab1851..a2f9c24d2ec 100644
--- a/src/vlib/cli.h
+++ b/src/vlib/cli.h
@@ -204,6 +204,7 @@ clib_error_t *vlib_cli_register (struct vlib_main_t *vm,
clib_error_t *vlib_cli_register_parse_rule (struct vlib_main_t *vm,
vlib_cli_parse_rule_t * c);
+unformat_function_t unformat_vlib_cli_line;
uword unformat_vlib_cli_sub_input (unformat_input_t * i, va_list * args);
/* Return an vector of strings consisting of possible auto-completions
diff --git a/src/vlib/config.h.in b/src/vlib/config.h.in
index 19ec10cfcca..b233b327d31 100644
--- a/src/vlib/config.h.in
+++ b/src/vlib/config.h.in
@@ -16,7 +16,8 @@
#ifndef included_vlib_config_h
#define included_vlib_config_h
-#define __PRE_DATA_SIZE @PRE_DATA_SIZE@
+#define VLIB_BUFFER_PRE_DATA_SIZE @PRE_DATA_SIZE@
+#define VLIB_BUFFER_ALIGN @VLIB_BUFFER_ALIGN@
#define VLIB_BUFFER_ALLOC_FAULT_INJECTOR @BUFFER_ALLOC_FAULT_INJECTOR@
#define VLIB_PROCESS_LOG2_STACK_SIZE @VLIB_PROCESS_LOG2_STACK_SIZE@
diff --git a/src/vlib/counter.c b/src/vlib/counter.c
index 186b48d869e..9f14d02909f 100644
--- a/src/vlib/counter.c
+++ b/src/vlib/counter.c
@@ -38,7 +38,7 @@
*/
#include <vlib/vlib.h>
-#include <vlib/stat_weak_inlines.h>
+#include <vlib/stats/stats.h>
void
vlib_clear_simple_counters (vlib_simple_counter_main_t * cm)
@@ -79,66 +79,62 @@ void
vlib_validate_simple_counter (vlib_simple_counter_main_t * cm, u32 index)
{
vlib_thread_main_t *tm = vlib_get_thread_main ();
- int i, resized = 0;
- void *oldheap = vlib_stats_push_heap (cm->counters);
+ char *name = cm->stat_segment_name ? cm->stat_segment_name : cm->name;
- vec_validate (cm->counters, tm->n_vlib_mains - 1);
- for (i = 0; i < tm->n_vlib_mains; i++)
- if (index >= vec_len (cm->counters[i]))
- {
- if (vec_resize_will_expand (cm->counters[i],
- index - vec_len (cm->counters[i]) +
- 1 /* length_increment */))
- resized++;
+ if (name == 0)
+ {
+ if (cm->counters == 0)
+ cm->stats_entry_index = ~0;
+ vec_validate (cm->counters, tm->n_vlib_mains - 1);
+ for (int i = 0; i < tm->n_vlib_mains; i++)
vec_validate_aligned (cm->counters[i], index, CLIB_CACHE_LINE_BYTES);
- }
+ return;
+ }
- /* Avoid the epoch increase when there was no counter vector resize. */
- if (resized)
- vlib_stats_pop_heap (cm, oldheap, index,
- 2 /* STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE */);
- else
- clib_mem_set_heap (oldheap);
+ if (cm->counters == 0)
+ cm->stats_entry_index = vlib_stats_add_counter_vector ("%s", name);
+
+ vlib_stats_validate (cm->stats_entry_index, tm->n_vlib_mains - 1, index);
+ cm->counters = vlib_stats_get_entry_data_pointer (cm->stats_entry_index);
}
void
vlib_free_simple_counter (vlib_simple_counter_main_t * cm)
{
- int i;
-
- vlib_stats_delete_cm (cm);
-
- void *oldheap = vlib_stats_push_heap (cm->counters);
- for (i = 0; i < vec_len (cm->counters); i++)
- vec_free (cm->counters[i]);
- vec_free (cm->counters);
- clib_mem_set_heap (oldheap);
+ if (cm->stats_entry_index == ~0)
+ {
+ for (int i = 0; i < vec_len (cm->counters); i++)
+ vec_free (cm->counters[i]);
+ vec_free (cm->counters);
+ }
+ else
+ {
+ vlib_stats_remove_entry (cm->stats_entry_index);
+ cm->counters = NULL;
+ }
}
void
vlib_validate_combined_counter (vlib_combined_counter_main_t * cm, u32 index)
{
vlib_thread_main_t *tm = vlib_get_thread_main ();
- int i, resized = 0;
- void *oldheap = vlib_stats_push_heap (cm->counters);
+ char *name = cm->stat_segment_name ? cm->stat_segment_name : cm->name;
- vec_validate (cm->counters, tm->n_vlib_mains - 1);
- for (i = 0; i < tm->n_vlib_mains; i++)
- if (index >= vec_len (cm->counters[i]))
- {
- if (vec_resize_will_expand (cm->counters[i],
- index - vec_len (cm->counters[i]) +
- 1 /* length_increment */))
- resized++;
+ if (name == 0)
+ {
+ if (cm->counters == 0)
+ cm->stats_entry_index = ~0;
+ vec_validate (cm->counters, tm->n_vlib_mains - 1);
+ for (int i = 0; i < tm->n_vlib_mains; i++)
vec_validate_aligned (cm->counters[i], index, CLIB_CACHE_LINE_BYTES);
- }
+ return;
+ }
- /* Avoid the epoch increase when there was no counter vector resize. */
- if (resized)
- vlib_stats_pop_heap (cm, oldheap, index,
- 3 /*STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED */);
- else
- clib_mem_set_heap (oldheap);
+ if (cm->counters == 0)
+ cm->stats_entry_index = vlib_stats_add_counter_pair_vector ("%s", name);
+
+ vlib_stats_validate (cm->stats_entry_index, tm->n_vlib_mains - 1, index);
+ cm->counters = vlib_stats_get_entry_data_pointer (cm->stats_entry_index);
}
int
@@ -147,7 +143,7 @@ int
{
vlib_thread_main_t *tm = vlib_get_thread_main ();
int i;
- void *oldheap = vlib_stats_push_heap (cm->counters);
+ void *oldheap = vlib_stats_set_heap ();
/* Possibly once in recorded history */
if (PREDICT_FALSE (vec_len (cm->counters) == 0))
@@ -176,15 +172,17 @@ int
void
vlib_free_combined_counter (vlib_combined_counter_main_t * cm)
{
- int i;
-
- vlib_stats_delete_cm (cm);
-
- void *oldheap = vlib_stats_push_heap (cm->counters);
- for (i = 0; i < vec_len (cm->counters); i++)
- vec_free (cm->counters[i]);
- vec_free (cm->counters);
- clib_mem_set_heap (oldheap);
+ if (cm->stats_entry_index == ~0)
+ {
+ for (int i = 0; i < vec_len (cm->counters); i++)
+ vec_free (cm->counters[i]);
+ vec_free (cm->counters);
+ }
+ else
+ {
+ vlib_stats_remove_entry (cm->stats_entry_index);
+ cm->counters = NULL;
+ }
}
u32
diff --git a/src/vlib/counter.h b/src/vlib/counter.h
index 9f5654292b9..f9da576a5f2 100644
--- a/src/vlib/counter.h
+++ b/src/vlib/counter.h
@@ -59,11 +59,26 @@ typedef struct
counter_t **counters; /**< Per-thread u64 non-atomic counters */
char *name; /**< The counter collection's name. */
char *stat_segment_name; /**< Name in stat segment directory */
+ u32 stats_entry_index;
} vlib_simple_counter_main_t;
/** The number of counters (not the number of per-thread counters) */
u32 vlib_simple_counter_n_counters (const vlib_simple_counter_main_t * cm);
+/** Pre-fetch a per-thread simple counter for the given object index */
+always_inline void
+vlib_prefetch_simple_counter (const vlib_simple_counter_main_t *cm,
+ u32 thread_index, u32 index)
+{
+ counter_t *my_counters;
+
+ /*
+ * This CPU's index is assumed to already be in cache
+ */
+ my_counters = cm->counters[thread_index];
+ clib_prefetch_store (my_counters + index);
+}
+
/** Increment a simple counter
@param cm - (vlib_simple_counter_main_t *) simple counter main pointer
@param thread_index - (u32) the current cpu index
@@ -205,6 +220,7 @@ typedef struct
vlib_counter_t **counters; /**< Per-thread u64 non-atomic counter pairs */
char *name; /**< The counter collection's name. */
char *stat_segment_name; /**< Name in stat segment directory */
+ u32 stats_entry_index;
} vlib_combined_counter_main_t;
/** The number of counters (not the number of per-thread counters) */
diff --git a/src/vlib/dma/cli.c b/src/vlib/dma/cli.c
new file mode 100644
index 00000000000..1db59c81a2f
--- /dev/null
+++ b/src/vlib/dma/cli.c
@@ -0,0 +1,160 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/physmem_funcs.h>
+#include <vlib/dma/dma.h>
+
+static clib_error_t *
+show_dma_backends_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vlib_dma_main_t *dm = &vlib_dma_main;
+
+ if (vec_len (dm->backends))
+ {
+ vlib_dma_backend_t *b;
+ vec_foreach (b, dm->backends)
+ vlib_cli_output (vm, "%s", b->name);
+ }
+ else
+ vlib_cli_output (vm, "No active DMA backends");
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (avf_create_command, static) = {
+ .path = "show dma backends",
+ .short_help = "show dma backends",
+ .function = show_dma_backends_command_fn,
+};
+
+static void
+test_dma_cb_fn (vlib_main_t *vm, vlib_dma_batch_t *b)
+{
+ fformat (stderr, "%s: cb %p cookie %lx\n", __func__, b,
+ vlib_dma_batch_get_cookie (vm, b));
+}
+
+static clib_error_t *
+fill_random_data (void *buffer, uword size)
+{
+ uword seed = random_default_seed ();
+
+ uword remain = size;
+ const uword p = clib_mem_get_page_size ();
+ uword offset = 0;
+
+ clib_random_buffer_t rb;
+ clib_random_buffer_init (&rb, seed);
+
+ while (remain > 0)
+ {
+ uword fill_size = clib_min (p, remain);
+
+ clib_random_buffer_fill (&rb, fill_size);
+ void *rbuf = clib_random_buffer_get_data (&rb, fill_size);
+ clib_memcpy_fast (buffer + offset, rbuf, fill_size);
+ clib_random_buffer_free (&rb);
+
+ offset += fill_size;
+ remain -= fill_size;
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+test_dma_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ clib_error_t *err = 0;
+ vlib_dma_batch_t *b;
+ int config_index = -1;
+ u32 rsz, n_alloc, v;
+ u8 *from = 0, *to = 0;
+ vlib_dma_config_t cfg = { .max_transfers = 256,
+ .max_transfer_size = 4096,
+ .callback_fn = test_dma_cb_fn };
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "transfers %u", &v))
+ cfg.max_transfers = v;
+ else if (unformat (input, "size %u", &v))
+ cfg.max_transfer_size = v;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if ((config_index = vlib_dma_config_add (vm, &cfg)) < 0)
+ {
+ err = clib_error_return (0, "Unable to allocate dma config");
+ return err;
+ }
+
+ rsz = round_pow2 (cfg.max_transfer_size, CLIB_CACHE_LINE_BYTES);
+ n_alloc = rsz * cfg.max_transfers * 2;
+
+ if ((from = vlib_physmem_alloc_aligned_on_numa (
+ vm, n_alloc, CLIB_CACHE_LINE_BYTES, vm->numa_node)) == 0)
+ {
+ err = clib_error_return (0, "Unable to allocate %u bytes of physmem",
+ n_alloc);
+ return err;
+ }
+ to = from + n_alloc / 2;
+
+ u32 port_allocator_seed;
+
+ fill_random_data (from, (uword) cfg.max_transfers * rsz);
+
+ b = vlib_dma_batch_new (vm, config_index);
+ vlib_dma_batch_set_cookie (vm, b, 0x12345678);
+
+ port_allocator_seed = clib_cpu_time_now ();
+ int transfers = random_u32 (&port_allocator_seed) % cfg.max_transfers;
+ if (!transfers)
+ transfers = 1;
+ for (int i = 0; i < transfers; i++)
+ vlib_dma_batch_add (vm, b, to + i * rsz, from + i * rsz,
+ cfg.max_transfer_size);
+
+ vlib_dma_batch_submit (vm, b);
+ return err;
+}
+
+static clib_error_t *
+test_show_dma_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ clib_error_t *err = 0;
+ int config_index = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "config %u", &config_index))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ for (u32 i = 0; i < vlib_get_n_threads (); i++)
+ vlib_cli_output (vm, "Config %d %U", config_index, vlib_dma_config_info,
+ config_index, vlib_get_main_by_index (i));
+ return err;
+}
+
+VLIB_CLI_COMMAND (test_dma_command, static) = {
+ .path = "test dma",
+ .short_help = "test dma [transfers <x> size <x>]",
+ .function = test_dma_command_fn,
+};
+
+VLIB_CLI_COMMAND (show_dma_command, static) = {
+ .path = "show dma",
+ .short_help = "show dma [config <x>]",
+ .function = test_show_dma_fn,
+};
diff --git a/src/vlib/dma/dma.c b/src/vlib/dma/dma.c
new file mode 100644
index 00000000000..cc8ed94ea8f
--- /dev/null
+++ b/src/vlib/dma/dma.c
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/log.h>
+#include <vlib/dma/dma.h>
+
+VLIB_REGISTER_LOG_CLASS (dma_log) = {
+ .class_name = "dma",
+};
+
+vlib_dma_main_t vlib_dma_main = {};
+
+clib_error_t *
+vlib_dma_register_backend (vlib_main_t *vm, vlib_dma_backend_t *b)
+{
+ vlib_dma_main_t *dm = &vlib_dma_main;
+ vec_add1 (dm->backends, *b);
+ dma_log_info ("backend '%s' registered", b->name);
+ return 0;
+}
+
+int
+vlib_dma_config_add (vlib_main_t *vm, vlib_dma_config_t *c)
+{
+ vlib_dma_main_t *dm = &vlib_dma_main;
+ vlib_dma_backend_t *b;
+ vlib_dma_config_data_t *cd;
+
+ pool_get_zero (dm->configs, cd);
+ cd->config_index = cd - dm->configs;
+
+ clib_memcpy (&cd->cfg, c, sizeof (vlib_dma_config_t));
+
+ vec_foreach (b, dm->backends)
+ {
+ dma_log_info ("calling '%s' config_add_fn", b->name);
+ if (b->config_add_fn (vm, cd))
+ {
+ dma_log_info ("config %u added into backend %s", cd - dm->configs,
+ b->name);
+ cd->backend_index = b - dm->backends;
+ return cd - dm->configs;
+ }
+ }
+
+ pool_put (dm->configs, cd);
+ return -1;
+}
+
+void
+vlib_dma_config_del (vlib_main_t *vm, u32 config_index)
+{
+ vlib_dma_main_t *dm = &vlib_dma_main;
+ vlib_dma_config_data_t *cd = pool_elt_at_index (dm->configs, config_index);
+ vlib_dma_backend_t *b = vec_elt_at_index (dm->backends, cd->backend_index);
+
+ if (b->config_del_fn)
+ b->config_del_fn (vm, cd);
+
+ pool_put (dm->configs, cd);
+ dma_log_info ("config %u deleted from backend %s", config_index, b->name);
+}
+
+u8 *
+vlib_dma_config_info (u8 *s, va_list *args)
+{
+ vlib_dma_main_t *dm = &vlib_dma_main;
+ int config_index = va_arg (*args, int);
+ u32 len = pool_elts (dm->configs);
+ if (config_index >= len)
+ return format (s, "%s", "not found");
+ vlib_dma_config_data_t *cd = pool_elt_at_index (dm->configs, config_index);
+
+ vlib_dma_backend_t *b = vec_elt_at_index (dm->backends, cd->backend_index);
+
+ if (b->info_fn)
+ return b->info_fn (s, args);
+
+ return 0;
+}
diff --git a/src/vlib/dma/dma.h b/src/vlib/dma/dma.h
new file mode 100644
index 00000000000..62d04110aa6
--- /dev/null
+++ b/src/vlib/dma/dma.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2022 Cisco Systems, Inc.
+ */
+
+#ifndef included_vlib_dma_h
+#define included_vlib_dma_h
+#include <vlib/vlib.h>
+
+#define dma_log_debug(f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, dma_log.class, "%s: " f, __func__, \
+ ##__VA_ARGS__)
+
+#define dma_log_info(f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_INFO, dma_log.class, "%s: " f, __func__, \
+ ##__VA_ARGS__)
+
+struct vlib_dma_batch;
+struct vlib_dma_config_data;
+
+typedef int (vlib_dma_config_add_fn) (vlib_main_t *vm,
+ struct vlib_dma_config_data *cfg);
+typedef void (vlib_dma_config_del_fn) (vlib_main_t *vm,
+ struct vlib_dma_config_data *cfg);
+typedef struct vlib_dma_batch *(vlib_dma_batch_new_fn) (
+ vlib_main_t *vm, struct vlib_dma_config_data *);
+typedef int (vlib_dma_batch_submit_fn) (vlib_main_t *vm,
+ struct vlib_dma_batch *b);
+typedef void (vlib_dma_batch_callback_fn) (vlib_main_t *vm,
+ struct vlib_dma_batch *b);
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u32 barrier_before_last : 1;
+ u32 sw_fallback : 1;
+ };
+ u32 features;
+ };
+ u16 max_batches;
+ u16 max_transfers;
+ u32 max_transfer_size;
+ vlib_dma_batch_callback_fn *callback_fn;
+} vlib_dma_config_t;
+
+typedef struct vlib_dma_batch
+{
+ vlib_dma_batch_submit_fn *submit_fn;
+ vlib_dma_batch_callback_fn *callback_fn;
+ uword cookie;
+ u16 src_ptr_off;
+ u16 dst_ptr_off;
+ u16 size_off;
+ u16 stride;
+ u16 n_enq;
+} vlib_dma_batch_t;
+
+typedef struct
+{
+ char *name;
+ vlib_dma_config_add_fn *config_add_fn;
+ vlib_dma_config_del_fn *config_del_fn;
+ format_function_t *info_fn;
+} vlib_dma_backend_t;
+
+typedef struct vlib_dma_config_data
+{
+ vlib_dma_config_t cfg;
+ vlib_dma_batch_new_fn *batch_new_fn;
+ uword private_data;
+ u32 backend_index;
+ u32 config_index;
+} vlib_dma_config_data_t;
+
+typedef struct
+{
+ vlib_dma_backend_t *backends;
+ vlib_dma_config_data_t *configs;
+} vlib_dma_main_t;
+
+extern vlib_dma_main_t vlib_dma_main;
+
+clib_error_t *vlib_dma_register_backend (vlib_main_t *vm,
+ vlib_dma_backend_t *b);
+
+int vlib_dma_config_add (vlib_main_t *vm, vlib_dma_config_t *b);
+void vlib_dma_config_del (vlib_main_t *vm, u32 config_index);
+u8 *vlib_dma_config_info (u8 *s, va_list *args);
+
+static_always_inline vlib_dma_batch_t *
+vlib_dma_batch_new (vlib_main_t *vm, u32 config_index)
+{
+ vlib_dma_main_t *dm = &vlib_dma_main;
+ vlib_dma_config_data_t *cd = pool_elt_at_index (dm->configs, config_index);
+
+ return cd->batch_new_fn (vm, cd);
+}
+
+static_always_inline void
+vlib_dma_batch_set_cookie (vlib_main_t *vm, vlib_dma_batch_t *batch,
+ uword cookie)
+{
+ batch->cookie = cookie;
+}
+
+static_always_inline uword
+vlib_dma_batch_get_cookie (vlib_main_t *vm, vlib_dma_batch_t *batch)
+{
+ return batch->cookie;
+}
+
+static_always_inline void
+vlib_dma_batch_add (vlib_main_t *vm, vlib_dma_batch_t *batch, void *dst,
+ void *src, u32 size)
+{
+ u8 *p = (u8 *) batch + batch->n_enq * batch->stride;
+
+ *((void **) (p + batch->dst_ptr_off)) = dst;
+ *((void **) (p + batch->src_ptr_off)) = src;
+ *((u32 *) (p + batch->size_off)) = size;
+
+ batch->n_enq++;
+}
+
+static_always_inline void
+vlib_dma_batch_submit (vlib_main_t *vm, vlib_dma_batch_t *batch)
+{
+ batch->submit_fn (vm, batch);
+}
+
+#endif
diff --git a/src/vlib/dma/dma.rst b/src/vlib/dma/dma.rst
new file mode 100644
index 00000000000..4048d49b218
--- /dev/null
+++ b/src/vlib/dma/dma.rst
@@ -0,0 +1,70 @@
+.. _dma_plugin:
+
+.. toctree::
+
+DMA plugin
+==========
+
+Overview
+--------
+This plugin utilize platform DMA accelerators like CBDMA/DSA for streaming
+data movement. Modern DMA accelerators has high memory bandwidth and benefit
+cross-numa traffic. Accelerator like DSA has the capability to do IO page
+fault recovery, it will save IOMMU setup for the memory which not pinned.
+
+Terminology & Usage
+-------------------
+
+A ``backend`` is the abstract of resource which inherited from DMA device,
+it support necessary operations for DMA offloading like configuration, DMA
+request and result query.
+
+A ``config`` is the abstract of application DMA capability. Application can
+request a config instance through DMA node. DMA node will check the
+requirements of application and bind suitable backend with it.
+
+Enable DSA work queue:
+----------------------
+
+.. code-block:: console
+ # configure 1 groups, each with one engine
+ accel-config config-engine dsa0/engine0.0 --group-id=0
+
+ # configure 1 queues, putting each in a different group, so each
+ # is backed by a single engine
+ accel-config config-wq dsa0/wq0.0 --group-id=0 --type=user \
+ --priority=10 --max-batch-size=1024 --mode=dedicated -b 1 -a 0 --name=vpp1
+
+DMA transfer:
+-------------
+
+In this sample, application will request DMA capability which can hold
+a batch contained maximum 256 transfers and each transfer hold maximum 4K bytes
+from DMA node. If config_index value is not negative, mean resource has
+been allocated and DMA engine is ready for serve.
+
+.. code-block:: console
+ void dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b);
+
+ vlib_dma_config_args_t args;
+ args->max_transfers = 256;
+ args->max_transfer_size = 4096;
+ args->cpu_fallback = 1;
+ args->barrier_before_last = 1;
+ args->cb = dma_completion_cb;
+ u32 config_index = vlib_dma_config (vm, &args);
+ if (config_index < 0)
+ return;
+
+ u8 *dst[n_transfers];
+ u8 *src[n_transfers];
+ u32 i = 0, size = 4096;
+
+ vlib_dma_batch_t *b;
+ b = vlib_dma_batch_new (vm, config_index);
+ while (wrk_t->config_index >= 0 && n_transfers) {
+ vlib_dma_batch_add (vm, b, dst[i], src[i], size);
+ n_transfers --;
+ i ++;
+ }
+ vlib_dma_batch_submit (vm, config_index);
diff --git a/src/vlib/drop.c b/src/vlib/drop.c
index 3971123839d..3fda1d9b3b6 100644
--- a/src/vlib/drop.c
+++ b/src/vlib/drop.c
@@ -16,6 +16,7 @@
*/
#include <vlib/vlib.h>
+#include <vppinfra/vector/count_equal.h>
typedef enum
{
@@ -73,7 +74,8 @@ counter_index (vlib_main_t * vm, vlib_error_t e)
n = vlib_get_node (vm, ni);
ci = vlib_error_get_code (&vm->node_main, e);
- ASSERT (ci < n->n_errors);
+ if (ci >= n->n_errors)
+ return CLIB_U32_MAX;
ci += n->error_heap_index;
@@ -91,9 +93,12 @@ format_error_trace (u8 * s, va_list * va)
u32 i;
error_node = vlib_get_node (vm, vlib_error_get_node (&vm->node_main, e[0]));
- i = counter_index (vm, vlib_error_get_code (&vm->node_main, e[0])) +
- error_node->error_heap_index;
- s = format (s, "%v: %s", error_node->name, em->counters_heap[i].name);
+ i = counter_index (vm, vlib_error_get_code (&vm->node_main, e[0]));
+ if (i != CLIB_U32_MAX)
+ {
+ i += error_node->error_heap_index;
+ s = format (s, "%v: %s", error_node->name, em->counters_heap[i].desc);
+ }
return s;
}
@@ -221,7 +226,8 @@ process_drop_punt (vlib_main_t * vm,
n_left -= count;
c_index = counter_index (vm, error[0]);
- em->counters[c_index] += count;
+ if (c_index != CLIB_U32_MAX)
+ em->counters[c_index] += count;
vlib_error_elog_count (vm, c_index, count);
}
@@ -232,7 +238,7 @@ process_drop_punt (vlib_main_t * vm,
/* If there is no punt function, free the frame as well. */
if (disposition == ERROR_DISPOSITION_PUNT && !vm->os_punt_frame)
- vlib_frame_free (vm, node, frame);
+ vlib_frame_free (vm, frame);
}
else
vm->os_punt_frame (vm, node, frame);
@@ -254,7 +260,6 @@ VLIB_NODE_FN (error_punt_node) (vlib_main_t * vm,
return process_drop_punt (vm, node, frame, ERROR_DISPOSITION_PUNT);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (error_drop_node) = {
.name = "drop",
.flags = VLIB_NODE_FLAG_IS_DROP,
@@ -262,9 +267,7 @@ VLIB_REGISTER_NODE (error_drop_node) = {
.format_trace = format_error_trace,
.validate_frame = validate_error_frame,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (error_punt_node) = {
.name = "punt",
.flags = (VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH
@@ -273,7 +276,6 @@ VLIB_REGISTER_NODE (error_punt_node) = {
.format_trace = format_error_trace,
.validate_frame = validate_error_frame,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vlib/error.c b/src/vlib/error.c
index 97cb0b52192..3008af307bf 100644
--- a/src/vlib/error.c
+++ b/src/vlib/error.c
@@ -39,7 +39,7 @@
#include <vlib/vlib.h>
#include <vppinfra/heap.h>
-#include <vlib/stat_weak_inlines.h>
+#include <vlib/stats/stats.h>
uword
vlib_error_drop_buffers (vlib_main_t * vm,
@@ -112,6 +112,34 @@ vlib_error_drop_buffers (vlib_main_t * vm,
return n_buffers;
}
+static u8 *
+format_stats_counter_name (u8 *s, va_list *va)
+{
+ u8 *id = va_arg (*va, u8 *);
+
+ for (u32 i = 0; id[i] != 0; i++)
+ vec_add1 (s, id[i] == ' ' ? ' ' : id[i]);
+
+ return s;
+}
+
+void
+vlib_unregister_errors (vlib_main_t *vm, u32 node_index)
+{
+ vlib_error_main_t *em = &vm->error_main;
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ vlib_error_desc_t *cd;
+
+ if (n->n_errors > 0)
+ {
+ cd = vec_elt_at_index (em->counters_heap, n->error_heap_index);
+ for (u32 i = 0; i < n->n_errors; i++)
+ vlib_stats_remove_entry (cd[i].stats_entry_index);
+ heap_dealloc (em->counters_heap, n->error_heap_handle);
+ n->n_errors = 0;
+ }
+}
+
/* Reserves given number of error codes for given node. */
void
vlib_register_errors (vlib_main_t *vm, u32 node_index, u32 n_errors,
@@ -119,94 +147,88 @@ vlib_register_errors (vlib_main_t *vm, u32 node_index, u32 n_errors,
{
vlib_error_main_t *em = &vm->error_main;
vlib_node_main_t *nm = &vm->node_main;
-
vlib_node_t *n = vlib_get_node (vm, node_index);
+ vlib_error_desc_t *cd;
+ u32 n_threads = vlib_get_n_threads ();
+ elog_event_type_t t = {};
uword l;
- void *oldheap;
+ u64 **sc;
ASSERT (vlib_get_thread_index () == 0);
+ vlib_stats_segment_lock ();
+
/* Free up any previous error strings. */
- if (n->n_errors > 0)
- heap_dealloc (em->counters_heap, n->error_heap_handle);
+ vlib_unregister_errors (vm, node_index);
n->n_errors = n_errors;
n->error_counters = counters;
if (n_errors == 0)
- return;
+ goto done;
+
+ n->error_heap_index =
+ heap_alloc (em->counters_heap, n_errors, n->error_heap_handle);
+ l = vec_len (em->counters_heap);
+ cd = vec_elt_at_index (em->counters_heap, n->error_heap_index);
/* Legacy node */
if (!counters)
{
- counters = clib_mem_alloc (sizeof (counters[0]) * n_errors);
- int i;
- for (i = 0; i < n_errors; i++)
+ for (int i = 0; i < n_errors; i++)
{
- counters[i].name = error_strings[i];
- counters[i].desc = error_strings[i];
- counters[i].severity = VL_COUNTER_SEVERITY_ERROR;
+ cd[i].name = error_strings[i];
+ cd[i].desc = error_strings[i];
+ cd[i].severity = VL_COUNTER_SEVERITY_ERROR;
}
}
-
- n->error_heap_index =
- heap_alloc (em->counters_heap, n_errors, n->error_heap_handle);
- l = vec_len (em->counters_heap);
- clib_memcpy (vec_elt_at_index (em->counters_heap, n->error_heap_index),
- counters, n_errors * sizeof (counters[0]));
+ else
+ clib_memcpy (cd, counters, n_errors * sizeof (counters[0]));
vec_validate (vm->error_elog_event_types, l - 1);
- /* Switch to the stats segment ... */
- oldheap = vlib_stats_push_heap (0);
+ if (em->stats_err_entry_index == 0)
+ em->stats_err_entry_index = vlib_stats_add_counter_vector ("/node/errors");
- /* Allocate a counter/elog type for each error. */
- vec_validate (em->counters, l - 1);
+ ASSERT (em->stats_err_entry_index != 0 && em->stats_err_entry_index != ~0);
- /* Zero counters for re-registrations of errors. */
- if (n->error_heap_index + n_errors <= vec_len (em->counters_last_clear))
- clib_memcpy (em->counters + n->error_heap_index,
- em->counters_last_clear + n->error_heap_index,
- n_errors * sizeof (em->counters[0]));
- else
- clib_memset (em->counters + n->error_heap_index,
- 0, n_errors * sizeof (em->counters[0]));
+ vlib_stats_validate (em->stats_err_entry_index, n_threads - 1, l - 1);
+ sc = vlib_stats_get_entry_data_pointer (em->stats_err_entry_index);
+
+ for (int i = 0; i < n_threads; i++)
+ {
+ vlib_main_t *tvm = vlib_get_main_by_index (i);
+ vlib_error_main_t *tem = &tvm->error_main;
+ tem->counters = sc[i];
+
+ /* Zero counters for re-registrations of errors. */
+ if (n->error_heap_index + n_errors <= vec_len (tem->counters_last_clear))
+ clib_memcpy (tem->counters + n->error_heap_index,
+ tem->counters_last_clear + n->error_heap_index,
+ n_errors * sizeof (tem->counters[0]));
+ else
+ clib_memset (tem->counters + n->error_heap_index, 0,
+ n_errors * sizeof (tem->counters[0]));
+ }
/* Register counter indices in the stat segment directory */
- {
- int i;
- u8 *error_name = 0;
-
- for (i = 0; i < n_errors; i++)
- {
- vec_reset_length (error_name);
- error_name =
- format (error_name, "/err/%v/%s%c", n->name, counters[i].name, 0);
- vlib_stats_register_error_index (oldheap, error_name, em->counters,
- n->error_heap_index + i);
- }
-
- vec_free (error_name);
- }
-
- /* (re)register the em->counters base address, switch back to main heap */
- vlib_stats_pop_heap2 (em->counters, vm->thread_index, oldheap, 1);
-
- {
- elog_event_type_t t;
- uword i;
-
- clib_memset (&t, 0, sizeof (t));
- if (n_errors > 0)
- vec_validate (nm->node_by_error, n->error_heap_index + n_errors - 1);
- for (i = 0; i < n_errors; i++)
- {
- t.format = (char *) format (0, "%v %s: %%d",
- n->name, counters[i].name);
- vm->error_elog_event_types[n->error_heap_index + i] = t;
- nm->node_by_error[n->error_heap_index + i] = n->index;
- }
- }
+ for (int i = 0; i < n_errors; i++)
+ cd[i].stats_entry_index = vlib_stats_add_symlink (
+ em->stats_err_entry_index, n->error_heap_index + i, "/err/%v/%U",
+ n->name, format_stats_counter_name, cd[i].name);
+
+ vec_validate (nm->node_by_error, n->error_heap_index + n_errors - 1);
+
+ for (u32 i = 0; i < n_errors; i++)
+ {
+ t.format = (char *) format (0, "%v %s: %%d", n->name, cd[i].name);
+ vec_free (vm->error_elog_event_types[n->error_heap_index + i].format);
+ vm->error_elog_event_types[n->error_heap_index + i] = t;
+ nm->node_by_error[n->error_heap_index + i] = n->index;
+ }
+
+done:
+ vlib_stats_segment_unlock ();
}
uword
@@ -306,11 +328,11 @@ show_errors (vlib_main_t * vm,
if (verbose)
vlib_cli_output (vm, "%10lu%=35v%=35s%=10s%=6d", c, n->name,
- em->counters_heap[i].name,
+ em->counters_heap[i].desc,
sev2str (em->counters_heap[i].severity), i);
else
vlib_cli_output (vm, "%10lu%=35v%=35s%=10s", c, n->name,
- em->counters_heap[i].name,
+ em->counters_heap[i].desc,
sev2str (em->counters_heap[i].severity));
}
}
@@ -330,7 +352,7 @@ show_errors (vlib_main_t * vm,
{
if (verbose)
vlib_cli_output (vm, "%10lu%=40v%=20s%=10d", sums[i], n->name,
- em->counters_heap[i].name, i);
+ em->counters_heap[i].desc, i);
}
}
}
@@ -340,21 +362,17 @@ show_errors (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_show_errors) = {
.path = "show errors",
.short_help = "Show error counts",
.function = show_errors,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_show_node_counters, static) = {
.path = "show node counters",
.short_help = "Show node counters",
.function = show_errors,
};
-/* *INDENT-ON* */
static clib_error_t *
clear_error_counters (vlib_main_t * vm,
@@ -373,21 +391,17 @@ clear_error_counters (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_clear_error_counters, static) = {
.path = "clear errors",
.short_help = "Clear error counters",
.function = clear_error_counters,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_clear_node_counters, static) = {
.path = "clear node counters",
.short_help = "Clear node counters",
.function = clear_error_counters,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vlib/error.h b/src/vlib/error.h
index b921067ee84..b5cc264b60d 100644
--- a/src/vlib/error.h
+++ b/src/vlib/error.h
@@ -56,6 +56,7 @@ typedef struct
char *name;
char *desc;
enum vl_counter_severity_e severity;
+ u32 stats_entry_index;
} vlib_error_desc_t;
typedef struct
@@ -69,12 +70,16 @@ typedef struct
/* Counter structures in heap. Heap index
indexes counter vector. */
vlib_error_desc_t *counters_heap;
+
+ /* stats segment entry index */
+ u32 stats_err_entry_index;
} vlib_error_main_t;
/* Per node error registration. */
void vlib_register_errors (struct vlib_main_t *vm, u32 node_index,
u32 n_errors, char *error_strings[],
vlib_error_desc_t counters[]);
+void vlib_unregister_errors (struct vlib_main_t *vm, u32 node_index);
unformat_function_t unformat_vlib_error;
diff --git a/src/vlib/freebsd/pci.c b/src/vlib/freebsd/pci.c
new file mode 100644
index 00000000000..a4e9eb2dda6
--- /dev/null
+++ b/src/vlib/freebsd/pci.c
@@ -0,0 +1,380 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2024 Tom Jones <thj@freebsd.org>
+ *
+ * This software was developed by Tom Jones <thj@freebsd.org> under sponsorship
+ * from the FreeBSD Foundation.
+ *
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/pci/pci.h>
+#include <vlib/unix/unix.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/eventfd.h>
+
+#include <sys/pciio.h>
+
+#include <fcntl.h>
+#include <dirent.h>
+#include <net/if.h>
+
+extern vlib_pci_main_t freebsd_pci_main;
+
+uword
+vlib_pci_get_private_data (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+ return 0;
+}
+
+void
+vlib_pci_set_private_data (vlib_main_t *vm, vlib_pci_dev_handle_t h,
+ uword private_data)
+{
+}
+
+vlib_pci_addr_t *
+vlib_pci_get_addr (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+ return NULL;
+}
+
+u32
+vlib_pci_get_numa_node (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+ return 0;
+}
+
+u32
+vlib_pci_get_num_msix_interrupts (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+ return 0;
+}
+
+/* Call to allocate/initialize the pci subsystem.
+ This is not an init function so that users can explicitly enable
+ pci only when it's needed. */
+clib_error_t *pci_bus_init (vlib_main_t *vm);
+
+vlib_pci_device_info_t *
+vlib_pci_get_device_info (vlib_main_t *vm, vlib_pci_addr_t *addr,
+ clib_error_t **error)
+{
+ /* Populate a vlib_pci_device_info_t from the given address */
+ clib_error_t *err = NULL;
+ vlib_pci_device_info_t *di = NULL;
+
+ int fd = -1;
+ struct pci_conf_io pci;
+ struct pci_conf match;
+ struct pci_match_conf pattern;
+ bzero (&match, sizeof (match));
+ bzero (&pattern, sizeof (pattern));
+
+ pattern.pc_sel.pc_domain = addr->domain;
+ pattern.pc_sel.pc_bus = addr->bus;
+ pattern.pc_sel.pc_dev = addr->slot;
+ pattern.pc_sel.pc_func = addr->function;
+ pattern.flags = PCI_GETCONF_MATCH_DOMAIN | PCI_GETCONF_MATCH_BUS |
+ PCI_GETCONF_MATCH_DEV | PCI_GETCONF_MATCH_FUNC;
+
+ pci.pat_buf_len = sizeof (pattern);
+ pci.num_patterns = 1;
+ pci.patterns = &pattern;
+ pci.match_buf_len = sizeof (match);
+ pci.num_matches = 1;
+ pci.matches = &match;
+ pci.offset = 0;
+ pci.generation = 0;
+ pci.status = 0;
+
+ fd = open ("/dev/pci", 0);
+ if (fd == -1)
+ {
+ err = clib_error_return_unix (0, "open '/dev/pci'");
+ goto error;
+ }
+
+ if (ioctl (fd, PCIOCGETCONF, &pci) == -1)
+ {
+ err = clib_error_return_unix (0, "reading PCIOCGETCONF");
+ goto error;
+ }
+
+ di = clib_mem_alloc (sizeof (vlib_pci_device_info_t));
+ clib_memset (di, 0, sizeof (vlib_pci_device_info_t));
+
+ di->addr.as_u32 = addr->as_u32;
+ di->numa_node = 0; /* TODO: Place holder until we have NUMA on FreeBSD */
+
+ di->device_class = match.pc_class;
+ di->vendor_id = match.pc_vendor;
+ di->device_id = match.pc_device;
+ di->revision = match.pc_revid;
+
+ di->product_name = NULL;
+ di->vpd_r = 0;
+ di->vpd_w = 0;
+ di->driver_name = format (0, "%s", &match.pd_name);
+ di->iommu_group = -1;
+
+ goto done;
+
+error:
+ vlib_pci_free_device_info (di);
+ di = NULL;
+done:
+ if (error)
+ *error = err;
+ close (fd);
+ return di;
+}
+
+clib_error_t *__attribute__ ((weak))
+vlib_pci_get_device_root_bus (vlib_pci_addr_t *addr, vlib_pci_addr_t *root_bus)
+{
+ return NULL;
+}
+
+clib_error_t *
+vlib_pci_bind_to_uio (vlib_main_t *vm, vlib_pci_addr_t *addr,
+ char *uio_drv_name, int force)
+{
+ clib_error_t *error = 0;
+
+ if (error)
+ {
+ return error;
+ }
+
+ if (strncmp ("auto", uio_drv_name, 5) == 0)
+ {
+ /* TODO: We should confirm that nic_uio is loaded and return an error. */
+ uio_drv_name = "nic_uio";
+ }
+ return error;
+}
+
+clib_error_t *
+vlib_pci_register_intx_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h,
+ pci_intx_handler_function_t *intx_handler)
+{
+ return NULL;
+}
+
+clib_error_t *
+vlib_pci_unregister_intx_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+ return NULL;
+}
+
+clib_error_t *
+vlib_pci_register_msix_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h,
+ u32 start, u32 count,
+ pci_msix_handler_function_t *msix_handler)
+{
+ return NULL;
+}
+
+clib_error_t *
+vlib_pci_unregister_msix_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h,
+ u32 start, u32 count)
+{
+ return NULL;
+}
+
+clib_error_t *
+vlib_pci_enable_msix_irq (vlib_main_t *vm, vlib_pci_dev_handle_t h, u16 start,
+ u16 count)
+{
+ return NULL;
+}
+
+uword
+vlib_pci_get_msix_file_index (vlib_main_t *vm, vlib_pci_dev_handle_t h,
+ u16 index)
+{
+ return 0;
+}
+
+clib_error_t *
+vlib_pci_disable_msix_irq (vlib_main_t *vm, vlib_pci_dev_handle_t h, u16 start,
+ u16 count)
+{
+ return NULL;
+}
+
+/* Configuration space read/write. */
+clib_error_t *
+vlib_pci_read_write_config (vlib_main_t *vm, vlib_pci_dev_handle_t h,
+ vlib_read_or_write_t read_or_write, uword address,
+ void *data, u32 n_bytes)
+{
+ return NULL;
+}
+
+clib_error_t *
+vlib_pci_map_region (vlib_main_t *vm, vlib_pci_dev_handle_t h, u32 resource,
+ void **result)
+{
+ return NULL;
+}
+
+clib_error_t *
+vlib_pci_map_region_fixed (vlib_main_t *vm, vlib_pci_dev_handle_t h,
+ u32 resource, u8 *addr, void **result)
+{
+ return NULL;
+}
+
+clib_error_t *
+vlib_pci_io_region (vlib_main_t *vm, vlib_pci_dev_handle_t h, u32 resource)
+{
+ return NULL;
+}
+
+clib_error_t *
+vlib_pci_read_write_io (vlib_main_t *vm, vlib_pci_dev_handle_t h,
+ vlib_read_or_write_t read_or_write, uword offset,
+ void *data, u32 length)
+{
+ return NULL;
+}
+
+clib_error_t *
+vlib_pci_map_dma (vlib_main_t *vm, vlib_pci_dev_handle_t h, void *ptr)
+{
+ return NULL;
+}
+
+int
+vlib_pci_supports_virtual_addr_dma (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+ return 0;
+}
+
+clib_error_t *
+vlib_pci_device_open (vlib_main_t *vm, vlib_pci_addr_t *addr,
+ pci_device_id_t ids[], vlib_pci_dev_handle_t *handle)
+{
+ return NULL;
+}
+
+void
+vlib_pci_device_close (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+}
+
+void
+init_device_from_registered (vlib_main_t *vm, vlib_pci_device_info_t *di)
+{
+}
+
+static int
+pci_addr_cmp (void *v1, void *v2)
+{
+ vlib_pci_addr_t *a1 = v1;
+ vlib_pci_addr_t *a2 = v2;
+
+ if (a1->domain > a2->domain)
+ return 1;
+ if (a1->domain < a2->domain)
+ return -1;
+ if (a1->bus > a2->bus)
+ return 1;
+ if (a1->bus < a2->bus)
+ return -1;
+ if (a1->slot > a2->slot)
+ return 1;
+ if (a1->slot < a2->slot)
+ return -1;
+ if (a1->function > a2->function)
+ return 1;
+ if (a1->function < a2->function)
+ return -1;
+ return 0;
+}
+
+vlib_pci_addr_t *
+vlib_pci_get_all_dev_addrs ()
+{
+ vlib_pci_addr_t *addrs = 0;
+
+ int fd = -1;
+ struct pci_conf_io pci;
+ struct pci_conf matches[32];
+ bzero (matches, sizeof (matches));
+
+ pci.pat_buf_len = 0;
+ pci.num_patterns = 0;
+ pci.patterns = NULL;
+ pci.match_buf_len = sizeof (matches);
+ pci.num_matches = 32;
+ pci.matches = (struct pci_conf *) &matches;
+ pci.offset = 0;
+ pci.generation = 0;
+ pci.status = 0;
+
+ fd = open ("/dev/pci", 0);
+ if (fd == -1)
+ {
+ clib_error_return_unix (0, "opening /dev/pci");
+ return (NULL);
+ }
+
+ if (ioctl (fd, PCIOCGETCONF, &pci) == -1)
+ {
+ clib_error_return_unix (0, "reading pci config");
+ close (fd);
+ return (NULL);
+ }
+
+ for (int i = 0; i < pci.num_matches; i++)
+ {
+ struct pci_conf *m = &pci.matches[i];
+ vlib_pci_addr_t addr;
+
+ addr.domain = m->pc_sel.pc_domain;
+ addr.bus = m->pc_sel.pc_bus;
+ addr.slot = m->pc_sel.pc_dev;
+ addr.function = m->pc_sel.pc_func;
+
+ vec_add1 (addrs, addr);
+ }
+
+ vec_sort_with_function (addrs, pci_addr_cmp);
+ close (fd);
+
+ return addrs;
+}
+
+clib_error_t *
+freebsd_pci_init (vlib_main_t *vm)
+{
+ vlib_pci_main_t *pm = &pci_main;
+ vlib_pci_addr_t *addr = 0, *addrs;
+
+ pm->vlib_main = vm;
+
+ ASSERT (sizeof (vlib_pci_addr_t) == sizeof (u32));
+
+ addrs = vlib_pci_get_all_dev_addrs ();
+ vec_foreach (addr, addrs)
+ {
+ vlib_pci_device_info_t *d;
+ if ((d = vlib_pci_get_device_info (vm, addr, 0)))
+ {
+ init_device_from_registered (vm, d);
+ vlib_pci_free_device_info (d);
+ }
+ }
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (freebsd_pci_init) = {
+ .runs_after = VLIB_INITS ("unix_input_init"),
+};
diff --git a/src/vlib/handoff_trace.c b/src/vlib/handoff_trace.c
index 964c095b9f6..1370ba2c95a 100644
--- a/src/vlib/handoff_trace.c
+++ b/src/vlib/handoff_trace.c
@@ -75,7 +75,6 @@ typedef enum
HANDOFF_TRACE_N_NEXT,
} tplaceholder_next_t;
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (handoff_trace_node, static) =
{
.name = "handoff_trace",
@@ -94,7 +93,6 @@ VLIB_REGISTER_NODE (handoff_trace_node, static) =
.n_errors = ARRAY_LEN(handoff_trace_error_strings),
.error_strings = handoff_trace_error_strings,
};
-/* *INDENT-ON* */
int
vlib_add_handoff_trace (vlib_main_t * vm, vlib_buffer_t * b)
@@ -117,7 +115,6 @@ vlib_add_handoff_trace (vlib_main_t * vm, vlib_buffer_t * b)
}
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vlib/init.c b/src/vlib/init.c
index 4a56ab20b4a..55f260b74ed 100644
--- a/src/vlib/init.c
+++ b/src/vlib/init.c
@@ -270,12 +270,10 @@ again:
}
/* Finally, clean up all the fine data we allocated */
- /* *INDENT-OFF* */
hash_foreach_pair (hp, index_by_name,
({
vec_add1 (keys_to_delete, (u8 *)hp->key);
}));
- /* *INDENT-ON* */
hash_free (index_by_name);
for (i = 0; i < vec_len (keys_to_delete); i++)
vec_free (keys_to_delete[i]);
@@ -657,12 +655,10 @@ show_init_function_command_fn (vlib_main_t * vm,
}
}
}
- /* *INDENT-OFF* */
hash_foreach_pair (hp, index_by_name,
({
vec_add1 (keys_to_delete, (u8 *)hp->key);
}));
- /* *INDENT-ON* */
hash_free (index_by_name);
for (i = 0; i < vec_len (keys_to_delete); i++)
vec_free (keys_to_delete[i]);
@@ -679,13 +675,11 @@ show_init_function_command_fn (vlib_main_t * vm,
* @cliexstart{show init-function [init | enter | exit] [verbose [nn]]}
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_init_function, static) = {
.path = "show init-function",
.short_help = "show init-function [init | enter | exit][verbose [nn]]",
.function = show_init_function_command_fn,
};
-/* *INDENT-ON* */
/*
diff --git a/src/vlib/init.h b/src/vlib/init.h
index e6235652ad1..364989eafe0 100644
--- a/src/vlib/init.h
+++ b/src/vlib/init.h
@@ -171,6 +171,8 @@ static __clib_unused void * __clib_unused_##tag##_##x = x
#define VLIB_INIT_FUNCTION(x) VLIB_DECLARE_INIT_FUNCTION(x,init)
#define VLIB_WORKER_INIT_FUNCTION(x) VLIB_DECLARE_INIT_FUNCTION(x,worker_init)
+#define VLIB_NUM_WORKERS_CHANGE_FN(x) \
+ VLIB_DECLARE_INIT_FUNCTION (x, num_workers_change)
#define VLIB_MAIN_LOOP_ENTER_FUNCTION(x) \
VLIB_DECLARE_INIT_FUNCTION(x,main_loop_enter)
diff --git a/src/vlib/lex.c b/src/vlib/lex.c
index 1cc8f1678d2..7facba5fe25 100644
--- a/src/vlib/lex.c
+++ b/src/vlib/lex.c
@@ -113,7 +113,7 @@ vlib_lex_get_token (vlib_lex_main_t * lm, vlib_lex_token_t * rv)
lm->lex_token_names[VLIB_LEX_word],
rv->value.as_pointer);
}
- _vec_len (lm->token_buffer) = 0;
+ vec_set_len (lm->token_buffer, 0);
/* Rescan the character which terminated the keyword/word. */
lm->current_index--;
@@ -233,7 +233,7 @@ void
vlib_lex_reset (vlib_lex_main_t * lm, u8 * input_vector)
{
if (lm->pushback_vector)
- _vec_len (lm->pushback_vector) = 0;
+ vec_set_len (lm->pushback_vector, 0);
lm->pushback_sp = -1;
lm->input_vector = input_vector;
@@ -255,7 +255,7 @@ lex_onetime_init (vlib_main_t * vm)
#undef _
vec_validate (lm->token_buffer, 127);
- _vec_len (lm->token_buffer) = 0;
+ vec_set_len (lm->token_buffer, 0);
return 0;
}
diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c
index 7c18505bbfc..f7c63bc3607 100644
--- a/src/vlib/linux/pci.c
+++ b/src/vlib/linux/pci.c
@@ -38,6 +38,8 @@
*/
#include <vppinfra/linux/sysfs.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/unix.h>
#include <vlib/vlib.h>
#include <vlib/pci/pci.h>
@@ -53,19 +55,26 @@
#include <linux/ethtool.h>
#include <linux/sockios.h>
#include <linux/vfio.h>
+#include <limits.h>
#include <sys/eventfd.h>
+#define SYSFS_DEVICES_PCI "/sys/devices/pci"
static const char *sysfs_pci_dev_path = "/sys/bus/pci/devices";
static const char *sysfs_pci_drv_path = "/sys/bus/pci/drivers";
static char *sysfs_mod_vfio_noiommu =
"/sys/module/vfio/parameters/enable_unsafe_noiommu_mode";
-#define pci_log_debug(vm, dev, f, ...) \
- vlib_log(VLIB_LOG_LEVEL_DEBUG, pci_main.log_default, "%U: " f, \
- format_vlib_pci_addr, vlib_pci_get_addr(vm, dev->handle), ## __VA_ARGS__)
-#define pci_log_err(vm, dev, f, ...) \
- vlib_log(VLIB_LOG_LEVEL_ERR, pci_main.log_default, "%U: " f, \
- format_vlib_pci_addr, vlib_pci_get_addr(vm, dev->handle), ## __VA_ARGS__)
+VLIB_REGISTER_LOG_CLASS (pci_log, static) = {
+ .class_name = "pci",
+ .subclass_name = "linux",
+};
+
+#define log_debug(p, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, pci_log.class, "%U: " f, \
+ format_vlib_pci_log, p->handle, ##__VA_ARGS__)
+#define log_err(p, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_ERR, pci_log.class, "%U: " f, format_vlib_pci_log, \
+ p->handle, ##__VA_ARGS__)
typedef struct
{
@@ -232,32 +241,14 @@ vlib_pci_get_device_info (vlib_main_t * vm, vlib_pci_addr_t * addr,
/* You can only read more that 64 bytes of config space as root; so we try to
read the full space but fall back to just the first 64 bytes. */
- if (read (fd, &di->config_data, sizeof (di->config_data)) <
- sizeof (di->config0))
+ if (read (fd, &di->config, sizeof (di->config)) <
+ sizeof (vlib_pci_config_hdr_t))
{
err = clib_error_return_unix (0, "read `%s'", f);
close (fd);
goto error;
}
- {
- static pci_config_header_t all_ones;
- if (all_ones.vendor_id == 0)
- clib_memset (&all_ones, ~0, sizeof (all_ones));
-
- if (!memcmp (&di->config0.header, &all_ones, sizeof (all_ones)))
- {
- err = clib_error_return (0, "invalid PCI config for `%s'", f);
- close (fd);
- goto error;
- }
- }
-
- if (di->config0.header.header_type == 0)
- pci_config_type0_little_to_host (&di->config0);
- else
- pci_config_type1_little_to_host (&di->config1);
-
di->numa_node = -1;
vec_reset_length (f);
f = format (f, "%v/numa_node%c", dev_dir_name, 0);
@@ -269,11 +260,7 @@ vlib_pci_get_device_info (vlib_main_t * vm, vlib_pci_addr_t * addr,
}
if (di->numa_node == -1)
{
- /* if '/sys/bus/pci/devices/<device id>/numa_node' returns -1 and
- it is a SMP system, set numa_node to 0. */
- if ((err = clib_sysfs_read ("/sys/devices/system/node/online", "%U",
- unformat_bitmap_list, &bmp)))
- clib_error_free (err);
+ bmp = os_get_online_cpu_node_bitmap ();
if (clib_bitmap_count_set_bits (bmp) == 1)
di->numa_node = 0;
}
@@ -300,15 +287,19 @@ vlib_pci_get_device_info (vlib_main_t * vm, vlib_pci_addr_t * addr,
di->device_id = tmp;
vec_reset_length (f);
- f = format (f, "%v/driver%c", dev_dir_name, 0);
- di->driver_name = clib_sysfs_link_to_name ((char *) f);
+ f = format (f, "%v/revision%c", dev_dir_name, 0);
+ err = clib_sysfs_read ((char *) f, "0x%x", &tmp);
+ if (err)
+ goto error;
+ di->revision = tmp;
+
+ di->driver_name =
+ clib_file_get_resolved_basename ("%v/driver", dev_dir_name);
if (!di->driver_name)
di->driver_name = format (0, "<NONE>%c", 0);
di->iommu_group = -1;
- vec_reset_length (f);
- f = format (f, "%v/iommu_group%c", dev_dir_name, 0);
- tmpstr = clib_sysfs_link_to_name ((char *) f);
+ tmpstr = clib_file_get_resolved_basename ("%v/iommu_group", dev_dir_name);
if (tmpstr)
{
di->iommu_group = atoi ((char *) tmpstr);
@@ -347,7 +338,7 @@ vlib_pci_get_device_info (vlib_main_t * vm, vlib_pci_addr_t * addr,
break;
len = (tag[2] << 8) | tag[1];
- vec_validate (data, len);
+ vec_validate (data, len - 1);
if (read (fd, data, len) != len)
{
@@ -383,6 +374,64 @@ done:
return di;
}
+clib_error_t *__attribute__ ((weak))
+vlib_pci_get_device_root_bus (vlib_pci_addr_t *addr, vlib_pci_addr_t *root_bus)
+{
+ u8 *rel_path = 0, *abs_path = 0, *link_path = 0;
+ unformat_input_t input;
+ int fd = open (sysfs_pci_dev_path, O_RDONLY);
+ ssize_t size = 0;
+ u32 domain = 0, bus;
+ clib_error_t *err = NULL;
+
+ if (fd < 0)
+ return clib_error_return_unix (0, "failed to open %s", sysfs_pci_dev_path);
+
+ vec_alloc (rel_path, PATH_MAX);
+ vec_alloc (abs_path, PATH_MAX);
+
+ link_path =
+ format (0, "%s/%U", sysfs_pci_dev_path, format_vlib_pci_addr, addr);
+ size = readlinkat (fd, (char *) link_path, (char *) rel_path, PATH_MAX);
+ if (size < 0)
+ {
+ err = clib_error_return_unix (0, "failed to read %s", rel_path);
+ goto done;
+ }
+
+ rel_path[size] = '\0';
+ vec_free (link_path);
+
+ link_path = format (0, "%s/%s", sysfs_pci_dev_path, rel_path);
+ if (!realpath ((char *) link_path, (char *) abs_path))
+ {
+ err = clib_error_return_unix (0, "failed to resolve %s", link_path);
+ goto done;
+ }
+
+ unformat_init_string (&input, (char *) abs_path,
+ clib_strnlen ((char *) abs_path, PATH_MAX));
+
+ if (!unformat (&input, SYSFS_DEVICES_PCI "%x:%x/%s", &domain, &bus,
+ link_path))
+ {
+ err = clib_error_return (0, "unknown input '%U'", format_unformat_error,
+ input);
+ goto done;
+ }
+
+ root_bus->domain = domain;
+ root_bus->bus = bus;
+
+done:
+ vec_free (abs_path);
+ vec_free (link_path);
+ vec_free (rel_path);
+ close (fd);
+
+ return err;
+}
+
static int
directory_exists (char *path)
{
@@ -394,8 +443,8 @@ directory_exists (char *path)
}
clib_error_t *
-vlib_pci_bind_to_uio (vlib_main_t * vm, vlib_pci_addr_t * addr,
- char *uio_drv_name)
+vlib_pci_bind_to_uio (vlib_main_t *vm, vlib_pci_addr_t *addr,
+ char *uio_drv_name, int force)
{
clib_error_t *error = 0;
u8 *s = 0, *driver_name = 0;
@@ -427,7 +476,7 @@ vlib_pci_bind_to_uio (vlib_main_t * vm, vlib_pci_addr_t * addr,
"is bound to IOMMU group and "
"vfio-pci driver is not loaded",
format_vlib_pci_addr, addr);
- goto done;
+ goto err0;
}
else
uio_drv_name = "vfio-pci";
@@ -448,92 +497,94 @@ vlib_pci_bind_to_uio (vlib_main_t * vm, vlib_pci_addr_t * addr,
error = clib_error_return (0, "Skipping PCI device %U: missing "
"kernel VFIO or UIO driver",
format_vlib_pci_addr, addr);
- goto done;
+ goto err0;
}
clib_error_free (error);
}
}
- s = format (s, "%v/driver%c", dev_dir_name, 0);
- driver_name = clib_sysfs_link_to_name ((char *) s);
- vec_reset_length (s);
+ driver_name = clib_file_get_resolved_basename ("%v/driver", dev_dir_name);
if (driver_name &&
((strcmp ("vfio-pci", (char *) driver_name) == 0) ||
(strcmp ("uio_pci_generic", (char *) driver_name) == 0) ||
(strcmp ("igb_uio", (char *) driver_name) == 0)))
- goto done;
-
- /* walk trough all linux interfaces and if interface belonging to
- this device is founf check if interface is admin up */
- dir = opendir ("/sys/class/net");
- s = format (s, "%U%c", format_vlib_pci_addr, addr, 0);
+ goto err0;
- if (!dir)
+ if (!force)
{
- error = clib_error_return (0, "Skipping PCI device %U: failed to "
- "read /sys/class/net",
- format_vlib_pci_addr, addr);
- goto done;
- }
+ /* walk trough all linux interfaces and if interface belonging to
+ this device is found check if interface is admin up */
+ dir = opendir ("/sys/class/net");
+ s = format (s, "%U%c", format_vlib_pci_addr, addr, 0);
- fd = socket (PF_INET, SOCK_DGRAM, 0);
- if (fd < 0)
- {
- error = clib_error_return_unix (0, "socket");
- goto done;
- }
+ if (!dir)
+ {
+ error = clib_error_return (0,
+ "Skipping PCI device %U: failed to "
+ "read /sys/class/net",
+ format_vlib_pci_addr, addr);
+ goto err0;
+ }
- while ((e = readdir (dir)))
- {
- struct ifreq ifr;
- struct ethtool_drvinfo drvinfo;
+ fd = socket (PF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "socket");
+ goto err1;
+ }
- if (e->d_name[0] == '.') /* skip . and .. */
- continue;
+ while ((e = readdir (dir)))
+ {
+ struct ifreq ifr;
+ struct ethtool_drvinfo drvinfo;
- clib_memset (&ifr, 0, sizeof ifr);
- clib_memset (&drvinfo, 0, sizeof drvinfo);
- ifr.ifr_data = (char *) &drvinfo;
- clib_strncpy (ifr.ifr_name, e->d_name, sizeof (ifr.ifr_name) - 1);
+ if (e->d_name[0] == '.') /* skip . and .. */
+ continue;
- drvinfo.cmd = ETHTOOL_GDRVINFO;
- if (ioctl (fd, SIOCETHTOOL, &ifr) < 0)
- {
- /* Some interfaces (eg "lo") don't support this ioctl */
- if ((errno != ENOTSUP) && (errno != ENODEV))
- clib_unix_warning ("ioctl fetch intf %s bus info error",
- e->d_name);
- continue;
- }
+ clib_memset (&ifr, 0, sizeof ifr);
+ clib_memset (&drvinfo, 0, sizeof drvinfo);
+ ifr.ifr_data = (char *) &drvinfo;
+ clib_strncpy (ifr.ifr_name, e->d_name, sizeof (ifr.ifr_name) - 1);
- if (strcmp ((char *) s, drvinfo.bus_info))
- continue;
+ drvinfo.cmd = ETHTOOL_GDRVINFO;
+ if (ioctl (fd, SIOCETHTOOL, &ifr) < 0)
+ {
+ /* Some interfaces (eg "lo") don't support this ioctl */
+ if ((errno != ENOTSUP) && (errno != ENODEV))
+ clib_unix_warning ("ioctl fetch intf %s bus info error",
+ e->d_name);
+ continue;
+ }
- clib_memset (&ifr, 0, sizeof (ifr));
- clib_strncpy (ifr.ifr_name, e->d_name, sizeof (ifr.ifr_name) - 1);
+ if (strcmp ((char *) s, drvinfo.bus_info))
+ continue;
- if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0)
- {
- error = clib_error_return_unix (0, "ioctl fetch intf %s flags",
- e->d_name);
- close (fd);
- goto done;
- }
+ clib_memset (&ifr, 0, sizeof (ifr));
+ clib_strncpy (ifr.ifr_name, e->d_name, sizeof (ifr.ifr_name) - 1);
- if (ifr.ifr_flags & IFF_UP)
- {
- vlib_log (VLIB_LOG_LEVEL_WARNING, pci_main.log_default,
- "Skipping PCI device %U as host "
- "interface %s is up", format_vlib_pci_addr, addr,
- e->d_name);
- close (fd);
- goto done;
+ if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0)
+ {
+ error = clib_error_return_unix (0, "ioctl fetch intf %s flags",
+ e->d_name);
+ close (fd);
+ goto err1;
+ }
+
+ if (ifr.ifr_flags & IFF_UP)
+ {
+ vlib_log (VLIB_LOG_LEVEL_WARNING, pci_main.log_default,
+ "Skipping PCI device %U as host "
+ "interface %s is up",
+ format_vlib_pci_addr, addr, e->d_name);
+ close (fd);
+ goto err1;
+ }
}
- }
- close (fd);
- vec_reset_length (s);
+ close (fd);
+ vec_reset_length (s);
+ }
s = format (s, "%v/driver/unbind%c", dev_dir_name, 0);
clib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, addr);
@@ -565,8 +616,9 @@ vlib_pci_bind_to_uio (vlib_main_t * vm, vlib_pci_addr_t * addr,
vec_reset_length (s);
}
-done:
+err1:
closedir (dir);
+err0:
vec_free (s);
vec_free (dev_dir_name);
vec_free (driver_name);
@@ -595,34 +647,14 @@ vfio_set_irqs (vlib_main_t * vm, linux_pci_device_t * p, u32 index, u32 start,
{
int data_len = efds ? count * sizeof (int) : 0;
u8 buf[sizeof (struct vfio_irq_set) + data_len];
- struct vfio_irq_info ii = { 0 };
struct vfio_irq_set *irq_set = (struct vfio_irq_set *) buf;
-
- ii.argsz = sizeof (struct vfio_irq_info);
- ii.index = index;
-
- if (ioctl (p->fd, VFIO_DEVICE_GET_IRQ_INFO, &ii) < 0)
- return clib_error_return_unix (0, "ioctl(VFIO_DEVICE_GET_IRQ_INFO) "
- "'%U'", format_vlib_pci_addr, &p->addr);
-
- pci_log_debug (vm, p, "%s index:%u count:%u flags: %s%s%s%s(0x%x)",
- __func__, ii.index, ii.count,
- ii.flags & VFIO_IRQ_INFO_EVENTFD ? "eventfd " : "",
- ii.flags & VFIO_IRQ_INFO_MASKABLE ? "maskable " : "",
- ii.flags & VFIO_IRQ_INFO_AUTOMASKED ? "automasked " : "",
- ii.flags & VFIO_IRQ_INFO_NORESIZE ? "noresize " : "",
- ii.flags);
-
- if (ii.count < start + count)
- return clib_error_return_unix (0, "vfio_set_irq: unexistng interrupt on "
- "'%U'", format_vlib_pci_addr, &p->addr);
-
-
if (efds)
{
+ int *data = (int *) irq_set->data;
flags |= VFIO_IRQ_SET_DATA_EVENTFD;
- clib_memcpy_fast (&irq_set->data, efds, data_len);
+ for (u32 i = 0; i < count; i++)
+ data[i] = efds[i];
}
else
flags |= VFIO_IRQ_SET_DATA_NONE;
@@ -637,11 +669,11 @@ vfio_set_irqs (vlib_main_t * vm, linux_pci_device_t * p, u32 index, u32 start,
irq_set->flags = flags;
if (ioctl (p->fd, VFIO_DEVICE_SET_IRQS, irq_set) < 0)
- return clib_error_return_unix (0, "%U:ioctl(VFIO_DEVICE_SET_IRQS) "
- "[index = %u, start = %u, count = %u, "
- "flags = 0x%x]",
+ return clib_error_return_unix (0, "%U:ioctl(VFIO_DEVICE_SET_IRQS)\n%U",
format_vlib_pci_addr, &p->addr,
- index, start, count, flags);
+ format_vfio_irq_set, irq_set);
+
+ log_debug (p, "%s:\n%U", __func__, format_vfio_irq_set, irq_set);
return 0;
}
@@ -800,13 +832,12 @@ vlib_pci_register_intx_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h,
if (ioctl (p->fd, VFIO_DEVICE_GET_IRQ_INFO, &ii) < 0)
return clib_error_return_unix (0, "ioctl(VFIO_DEVICE_GET_IRQ_INFO) '"
"%U'", format_vlib_pci_addr, &p->addr);
- pci_log_debug (vm, p, "%s index:%u count:%u flags: %s%s%s%s(0x%x)",
- __func__, ii.index, ii.count,
- ii.flags & VFIO_IRQ_INFO_EVENTFD ? "eventfd " : "",
- ii.flags & VFIO_IRQ_INFO_MASKABLE ? "maskable " : "",
- ii.flags & VFIO_IRQ_INFO_AUTOMASKED ? "automasked " : "",
- ii.flags & VFIO_IRQ_INFO_NORESIZE ? "noresize " : "",
- ii.flags);
+ log_debug (
+ p, "%s index:%u count:%u flags: %s%s%s%s(0x%x)", __func__, ii.index,
+ ii.count, ii.flags & VFIO_IRQ_INFO_EVENTFD ? "eventfd " : "",
+ ii.flags & VFIO_IRQ_INFO_MASKABLE ? "maskable " : "",
+ ii.flags & VFIO_IRQ_INFO_AUTOMASKED ? "automasked " : "",
+ ii.flags & VFIO_IRQ_INFO_NORESIZE ? "noresize " : "", ii.flags);
if (ii.count != 1)
return clib_error_return (0, "INTx interrupt does not exist on device"
"'%U'", format_vlib_pci_addr, &p->addr);
@@ -835,6 +866,27 @@ vlib_pci_register_intx_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h,
}
clib_error_t *
+vlib_pci_unregister_intx_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+ linux_pci_device_t *p = linux_pci_get_device (h);
+ linux_pci_irq_t *irq = &p->intx_irq;
+
+ if (irq->intx_handler == 0)
+ return 0;
+
+ clib_file_del_by_index (&file_main, irq->clib_file_index);
+ if (p->type == LINUX_PCI_DEVICE_TYPE_VFIO)
+ {
+ close (irq->fd);
+ irq->fd = -1;
+ }
+
+ irq->intx_handler = 0;
+
+ return 0;
+}
+
+clib_error_t *
vlib_pci_register_msix_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h,
u32 start, u32 count,
pci_msix_handler_function_t * msix_handler)
@@ -847,10 +899,8 @@ vlib_pci_register_msix_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h,
return clib_error_return (0, "vfio driver is needed for MSI-X interrupt "
"support");
- /* *INDENT-OFF* */
vec_validate_init_empty (p->msix_irqs, start + count - 1, (linux_pci_irq_t)
{ .fd = -1});
- /* *INDENT-ON* */
for (i = start; i < start + count; i++)
{
@@ -892,6 +942,33 @@ error:
}
clib_error_t *
+vlib_pci_unregister_msix_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h,
+ u32 start, u32 count)
+{
+ clib_error_t *err = 0;
+ linux_pci_device_t *p = linux_pci_get_device (h);
+ u32 i;
+
+ if (p->type != LINUX_PCI_DEVICE_TYPE_VFIO)
+ return clib_error_return (0, "vfio driver is needed for MSI-X interrupt "
+ "support");
+
+ for (i = start; i < start + count; i++)
+ {
+ linux_pci_irq_t *irq = vec_elt_at_index (p->msix_irqs, i);
+
+ if (irq->fd != -1)
+ {
+ clib_file_del_by_index (&file_main, irq->clib_file_index);
+ close (irq->fd);
+ irq->fd = -1;
+ }
+ }
+
+ return err;
+}
+
+clib_error_t *
vlib_pci_enable_msix_irq (vlib_main_t * vm, vlib_pci_dev_handle_t h,
u16 start, u16 count)
{
@@ -903,9 +980,9 @@ vlib_pci_enable_msix_irq (vlib_main_t * vm, vlib_pci_dev_handle_t h,
return clib_error_return (0, "vfio driver is needed for MSI-X interrupt "
"support");
- for (i = start; i < start + count; i++)
+ for (i = 0; i < count; i++)
{
- linux_pci_irq_t *irq = vec_elt_at_index (p->msix_irqs, i);
+ linux_pci_irq_t *irq = vec_elt_at_index (p->msix_irqs, start + i);
fds[i] = irq->fd;
}
@@ -978,7 +1055,7 @@ add_device_vfio (vlib_main_t * vm, linux_pci_device_t * p,
goto error;
}
- pci_log_debug (vm, p, "%s %U", __func__, format_vfio_region_info, &reg);
+ log_debug (p, "%s %U", __func__, format_vfio_region_info, &reg);
p->config_offset = reg.offset;
p->config_fd = p->fd;
@@ -1001,7 +1078,6 @@ add_device_vfio (vlib_main_t * vm, linux_pci_device_t * p,
if (p->supports_va_dma)
{
vlib_buffer_pool_t *bp;
- /* *INDENT-OFF* */
vec_foreach (bp, vm->buffer_main->buffer_pools)
{
u32 i;
@@ -1010,7 +1086,6 @@ add_device_vfio (vlib_main_t * vm, linux_pci_device_t * p,
for (i = 0; i < pm->n_pages; i++)
vfio_map_physmem_page (vm, pm->base + (i << pm->log2_page_size));
}
- /* *INDENT-ON* */
}
if (r && r->init_function)
@@ -1113,7 +1188,7 @@ vlib_pci_region (vlib_main_t * vm, vlib_pci_dev_handle_t h, u32 bar, int *fd,
_fd = p->fd;
_size = r->size;
_offset = r->offset;
- pci_log_debug (vm, p, "%s %U", __func__, format_vfio_region_info, r);
+ log_debug (p, "%s %U", __func__, format_vfio_region_info, r);
clib_mem_free (r);
}
else
@@ -1133,29 +1208,25 @@ vlib_pci_map_region_int (vlib_main_t * vm, vlib_pci_dev_handle_t h,
linux_pci_device_t *p = linux_pci_get_device (h);
int fd = -1;
clib_error_t *error;
- int flags = MAP_SHARED;
u64 size = 0, offset = 0;
- u16 command;
+ vlib_pci_config_reg_command_t command;
- pci_log_debug (vm, p, "map region %u to va %p", bar, addr);
+ log_debug (p, "map region %u to va %p", bar, addr);
- if ((error = vlib_pci_read_config_u16 (vm, h, 4, &command)))
+ if ((error = vlib_pci_read_config_u16 (vm, h, 4, &command.as_u16)))
return error;
- if (!(command & PCI_COMMAND_MEMORY))
+ if (!(command.mem_space))
{
- pci_log_debug (vm, p, "setting memory enable bit");
- command |= PCI_COMMAND_MEMORY;
- if ((error = vlib_pci_write_config_u16 (vm, h, 4, &command)))
+ log_debug (p, "setting memory enable bit");
+ command.mem_space = 1;
+ if ((error = vlib_pci_write_config_u16 (vm, h, 4, &command.as_u16)))
return error;
}
if ((error = vlib_pci_region (vm, h, bar, &fd, &size, &offset)))
return error;
- if (p->type == LINUX_PCI_DEVICE_TYPE_UIO && addr != 0)
- flags |= MAP_FIXED;
-
*result = clib_mem_vm_map_shared (addr, size, fd, offset,
"PCIe %U region %u", format_vlib_pci_addr,
vlib_pci_get_addr (vm, h), bar);
@@ -1167,10 +1238,8 @@ vlib_pci_map_region_int (vlib_main_t * vm, vlib_pci_dev_handle_t h,
return error;
}
- /* *INDENT-OFF* */
vec_validate_init_empty (p->regions, bar,
(linux_pci_region_t) { .fd = -1});
- /* *INDENT-ON* */
if (p->type == LINUX_PCI_DEVICE_TYPE_UIO)
p->regions[bar].fd = fd;
p->regions[bar].addr = *result;
@@ -1261,12 +1330,19 @@ vlib_pci_device_open (vlib_main_t * vm, vlib_pci_addr_t * addr,
if (err)
return err;
- for (i = ids; i->vendor_id != 0; i++)
- if (i->vendor_id == di->vendor_id && i->device_id == di->device_id)
- break;
- if (i->vendor_id == 0)
- return clib_error_return (0, "Wrong vendor or device id");
+ if (ids)
+ {
+ for (i = ids; i->vendor_id != 0; i++)
+ if (i->vendor_id == di->vendor_id && i->device_id == di->device_id)
+ break;
+
+ if (i->vendor_id == 0)
+ {
+ vlib_pci_free_device_info (di);
+ return clib_error_return (0, "Wrong vendor or device id");
+ }
+ }
pool_get (lpm->linux_pci_devices, p);
p->handle = p - lpm->linux_pci_devices;
@@ -1279,9 +1355,8 @@ vlib_pci_device_open (vlib_main_t * vm, vlib_pci_addr_t * addr,
*/
p->io_fd = -1;
- pci_log_debug (vm, p, "open vid:0x%04x did:0x%04x driver:%s iommu_group:%d",
- di->vendor_id, di->device_id, di->driver_name,
- di->iommu_group);
+ log_debug (p, "open vid:0x%04x did:0x%04x driver:%s iommu_group:%d",
+ di->vendor_id, di->device_id, di->driver_name, di->iommu_group);
if (clib_strncmp ("vfio-pci", (char *) di->driver_name, 8) == 0)
err = add_device_vfio (vm, p, di, 0);
@@ -1299,7 +1374,7 @@ error:
vlib_pci_free_device_info (di);
if (err)
{
- pci_log_err (vm, p, "%U", format_clib_error, err);
+ log_err (p, "%U", format_clib_error, err);
clib_memset (p, 0, sizeof (linux_pci_device_t));
pool_put (lpm->linux_pci_devices, p);
}
@@ -1345,7 +1420,6 @@ vlib_pci_device_close (vlib_main_t * vm, vlib_pci_dev_handle_t h)
err = vfio_set_irqs (vm, p, VFIO_PCI_MSIX_IRQ_INDEX, 0, 0,
VFIO_IRQ_SET_ACTION_TRIGGER, 0);
clib_error_free (err);
- /* *INDENT-OFF* */
vec_foreach (irq, p->msix_irqs)
{
if (irq->fd == -1)
@@ -1353,12 +1427,10 @@ vlib_pci_device_close (vlib_main_t * vm, vlib_pci_dev_handle_t h)
clib_file_del_by_index (&file_main, irq->clib_file_index);
close (irq->fd);
}
- /* *INDENT-ON* */
vec_free (p->msix_irqs);
}
}
- /* *INDENT-OFF* */
vec_foreach (res, p->regions)
{
if (res->size == 0)
@@ -1367,7 +1439,6 @@ vlib_pci_device_close (vlib_main_t * vm, vlib_pci_dev_handle_t h)
if (res->fd != -1)
close (res->fd);
}
- /* *INDENT-ON* */
vec_free (p->regions);
close (p->fd);
@@ -1491,7 +1562,6 @@ linux_pci_init (vlib_main_t * vm)
ASSERT (sizeof (vlib_pci_addr_t) == sizeof (u32));
addrs = vlib_pci_get_all_dev_addrs ();
- /* *INDENT-OFF* */
vec_foreach (addr, addrs)
{
vlib_pci_device_info_t *d;
@@ -1501,17 +1571,14 @@ linux_pci_init (vlib_main_t * vm)
vlib_pci_free_device_info (d);
}
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (linux_pci_init) =
{
.runs_after = VLIB_INITS("unix_input_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vlib/linux/vfio.c b/src/vlib/linux/vfio.c
index dc68c52db02..1462cc6c7ca 100644
--- a/src/vlib/linux/vfio.c
+++ b/src/vlib/linux/vfio.c
@@ -185,10 +185,8 @@ linux_vfio_group_get_device_fd (vlib_pci_addr_t * addr, int *fdp,
int fd;
*is_noiommu = 0;
- s =
- format (s, "/sys/bus/pci/devices/%U/iommu_group%c", format_vlib_pci_addr,
- addr, 0);
- tmpstr = clib_sysfs_link_to_name ((char *) s);
+ tmpstr = clib_file_get_resolved_basename (
+ "/sys/bus/pci/devices/%U/iommu_group", format_vlib_pci_addr, addr);
if (tmpstr)
{
iommu_group = atoi ((char *) tmpstr);
@@ -303,10 +301,44 @@ format_vfio_region_info (u8 * s, va_list * args)
return s;
}
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+u8 *
+format_vfio_irq_set (u8 *s, va_list *args)
+{
+ struct vfio_irq_set *is = va_arg (*args, struct vfio_irq_set *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "index:%u start:%u count:%u flags: 0x%x", is->index,
+ is->start, is->count, is->flags);
+
+ s = format (s, " (data:");
+ if (is->flags & VFIO_IRQ_SET_DATA_NONE)
+ s = format (s, " none");
+ if (is->flags & VFIO_IRQ_SET_DATA_BOOL)
+ s = format (s, " bool");
+ if (is->flags & VFIO_IRQ_SET_DATA_EVENTFD)
+ s = format (s, " eventfd");
+
+ s = format (s, ", action:");
+ if (is->flags & VFIO_IRQ_SET_ACTION_MASK)
+ s = format (s, " mask");
+ if (is->flags & VFIO_IRQ_SET_ACTION_UNMASK)
+ s = format (s, " unmask");
+ if (is->flags & VFIO_IRQ_SET_ACTION_TRIGGER)
+ s = format (s, " trigger");
+ vec_add1 (s, ')');
+
+ if (is->flags & VFIO_IRQ_SET_DATA_EVENTFD)
+ {
+ s = format (s, "\n%U eventfd data:", format_white_space, indent);
+ for (u32 i = 0; i < is->count; i++)
+ s = format (s, " %d", ((int *) (is->data))[i]);
+ }
+ if (is->flags & VFIO_IRQ_SET_DATA_BOOL)
+ {
+ s = format (s, "\n%U bool data:", format_white_space, indent);
+ for (u32 i = 0; i < is->count; i++)
+ s = format (s, " %u", is->data);
+ }
+
+ return s;
+}
diff --git a/src/vlib/linux/vfio.h b/src/vlib/linux/vfio.h
index fe4f0f75346..c2bb2e9b667 100644
--- a/src/vlib/linux/vfio.h
+++ b/src/vlib/linux/vfio.h
@@ -50,7 +50,7 @@ clib_error_t *linux_vfio_group_get_device_fd (vlib_pci_addr_t * addr,
int *fd, int *is_noiommu);
format_function_t format_vfio_region_info;
-
+format_function_t format_vfio_irq_set;
#endif /* included_vlib_linux_vfio_h */
diff --git a/src/vlib/linux/vmbus.c b/src/vlib/linux/vmbus.c
index d50b539910b..9dc9d554ebd 100644
--- a/src/vlib/linux/vmbus.c
+++ b/src/vlib/linux/vmbus.c
@@ -31,8 +31,6 @@
#include <linux/ethtool.h>
#include <linux/sockios.h>
-#include <uuid/uuid.h>
-
static const char sysfs_vmbus_dev_path[] = "/sys/bus/vmbus/devices";
static const char sysfs_vmbus_drv_path[] = "/sys/bus/vmbus/drivers";
static const char sysfs_class_net_path[] = "/sys/class/net";
@@ -123,16 +121,39 @@ unformat_vlib_vmbus_addr (unformat_input_t *input, va_list *args)
{
vlib_vmbus_addr_t *addr = va_arg (*args, vlib_vmbus_addr_t *);
uword ret = 0;
- u8 *s;
+ u8 *s = 0;
- if (!unformat (input, "%s", &s))
+ if (!unformat (input, "%U", unformat_token, "a-zA-Z0-9-", &s))
return 0;
- if (uuid_parse ((char *) s, addr->guid) == 0)
- ret = 1;
+ if (vec_len (s) != 36)
+ goto fail;
- vec_free (s);
+ if (s[8] != '-' || s[13] != '-' || s[18] != '-' || s[23] != '-')
+ goto fail;
+
+ clib_memmove (s + 8, s + 9, 4);
+ clib_memmove (s + 12, s + 14, 4);
+ clib_memmove (s + 16, s + 19, 4);
+ clib_memmove (s + 20, s + 24, 12);
+
+ for (int i = 0; i < 32; i++)
+ if (s[i] >= '0' && s[i] <= '9')
+ s[i] -= '0';
+ else if (s[i] >= 'A' && s[i] <= 'F')
+ s[i] -= 'A' - 10;
+ else if (s[i] >= 'a' && s[i] <= 'f')
+ s[i] -= 'a' - 10;
+ else
+ goto fail;
+
+ for (int i = 0; i < 16; i++)
+ addr->guid[i] = s[2 * i] * 16 + s[2 * i + 1];
+
+ ret = 1;
+fail:
+ vec_free (s);
return ret;
}
@@ -141,10 +162,24 @@ u8 *
format_vlib_vmbus_addr (u8 *s, va_list *va)
{
vlib_vmbus_addr_t *addr = va_arg (*va, vlib_vmbus_addr_t *);
- char tmp[40];
-
- uuid_unparse (addr->guid, tmp);
- return format (s, "%s", tmp);
+ u8 *bytes = addr->guid;
+
+ for (int i = 0; i < 4; i++)
+ s = format (s, "%02x", bytes++[0]);
+ vec_add1 (s, '-');
+ for (int i = 0; i < 2; i++)
+ s = format (s, "%02x", bytes++[0]);
+ vec_add1 (s, '-');
+ for (int i = 0; i < 2; i++)
+ s = format (s, "%02x", bytes++[0]);
+ vec_add1 (s, '-');
+ for (int i = 0; i < 2; i++)
+ s = format (s, "%02x", bytes++[0]);
+ vec_add1 (s, '-');
+ for (int i = 0; i < 6; i++)
+ s = format (s, "%02x", bytes++[0]);
+
+ return s;
}
/* workaround for mlx bug, bring lower device up before unbind */
@@ -218,16 +253,14 @@ vlib_vmbus_bind_to_uio (vlib_vmbus_addr_t * addr)
static int uio_new_id_needed = 1;
struct dirent *e;
struct ifreq ifr;
- u8 *s, *driver_name;
+ u8 *s = 0, *driver_name;
DIR *dir;
int fd;
dev_dir_name = format (0, "%s/%U", sysfs_vmbus_dev_path,
format_vlib_vmbus_addr, addr);
- s = format (0, "%v/driver%c", dev_dir_name, 0);
- driver_name = clib_sysfs_link_to_name ((char *) s);
- vec_reset_length (s);
+ driver_name = clib_file_get_resolved_basename ("%v/driver", dev_dir_name);
/* skip if not using the Linux kernel netvsc driver */
if (!driver_name || strcmp ("hv_netvsc", (char *) driver_name) != 0)
@@ -284,9 +317,9 @@ vlib_vmbus_bind_to_uio (vlib_vmbus_addr_t * addr)
if (ifr.ifr_flags & IFF_UP)
{
- error = clib_error_return (0,
- "Skipping VMBUS device %U as host interface %s is up",
- format_vlib_vmbus_addr, addr, e->d_name);
+ error = clib_error_return (
+ 0, "Skipping VMBUS device %U as host interface %s is up",
+ format_vlib_vmbus_addr, addr, ifname);
close (fd);
goto done;
}
@@ -383,7 +416,13 @@ vmbus_addr_cmp (void *v1, void *v2)
vlib_vmbus_addr_t *a1 = v1;
vlib_vmbus_addr_t *a2 = v2;
- return uuid_compare (a1->guid, a2->guid);
+ for (int i = 0; i < ARRAY_LEN (a1->guid); i++)
+ if (a1->guid[i] > a2->guid[i])
+ return 1;
+ else if (a1->guid[i] < a2->guid[i])
+ return -1;
+
+ return 0;
}
vlib_vmbus_addr_t *
@@ -416,12 +455,10 @@ linux_vmbus_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (linux_vmbus_init) =
{
.runs_before = VLIB_INITS("unix_input_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vlib/log.c b/src/vlib/log.c
index fc67a1f8903..60fb9fb5178 100644
--- a/src/vlib/log.c
+++ b/src/vlib/log.c
@@ -25,15 +25,13 @@ vlib_log_main_t log_main = {
.default_syslog_log_level = VLIB_LOG_LEVEL_WARNING,
.unthrottle_time = 3,
.size = 512,
- .add_to_elog = 1,
+ .add_to_elog = 0,
.default_rate_limit = 50,
};
-/* *INDENT-OFF* */
VLIB_REGISTER_LOG_CLASS (log_log, static) = {
.class_name = "log",
};
-/* *INDENT-ON* */
static const int colors[] = {
[VLIB_LOG_LEVEL_EMERG] = 1, /* red */
@@ -70,27 +68,12 @@ last_log_entry ()
i += lm->size;
return i;
}
-
-static vlib_log_class_data_t *
-get_class_data (vlib_log_class_t ci)
-{
- vlib_log_main_t *lm = &log_main;
- return vec_elt_at_index (lm->classes, (ci >> 16));
-}
-
-static vlib_log_subclass_data_t *
-get_subclass_data (vlib_log_class_t ci)
-{
- vlib_log_class_data_t *c = get_class_data (ci);
- return vec_elt_at_index (c->subclasses, (ci & 0xffff));
-}
-
u8 *
format_vlib_log_class (u8 * s, va_list * args)
{
vlib_log_class_t ci = va_arg (*args, vlib_log_class_t);
- vlib_log_class_data_t *c = get_class_data (ci);
- vlib_log_subclass_data_t *sc = get_subclass_data (ci);
+ vlib_log_class_data_t *c = vlib_log_get_class_data (ci);
+ vlib_log_subclass_data_t *sc = vlib_log_get_subclass_data (ci);
if (sc->name)
return format (s, "%v/%v", c->name, sc->name);
@@ -105,7 +88,6 @@ format_indent (u8 * s, va_list * args)
u32 indent = va_arg (*args, u32);
u8 *c;
- /* *INDENT-OFF* */
vec_foreach (c, v)
{
vec_add (s, c, 1);
@@ -113,7 +95,6 @@ format_indent (u8 * s, va_list * args)
for (u32 i = 0; i < indent; i++)
vec_add1 (s, (u8) ' ');
}
- /* *INDENT-ON* */
return s;
}
@@ -133,7 +114,7 @@ vlib_log (vlib_log_level_t level, vlib_log_class_t class, char *fmt, ...)
vlib_main_t *vm = vlib_get_main ();
vlib_log_main_t *lm = &log_main;
vlib_log_entry_t *e;
- vlib_log_subclass_data_t *sc = get_subclass_data (class);
+ vlib_log_subclass_data_t *sc = vlib_log_get_subclass_data (class);
va_list va;
f64 t = vlib_time_now (vm);
f64 delta = t - sc->last_event_timestamp;
@@ -226,13 +207,13 @@ vlib_log (vlib_log_level_t level, vlib_log_class_t class, char *fmt, ...)
if (lm->add_to_elog)
{
- /* *INDENT-OFF* */
- ELOG_TYPE_DECLARE(ee) =
+ ELOG_TYPE_DECLARE(ee) =
{
.format = "log-%s: %s",
.format_args = "t4T4",
- .n_enum_strings = 9,
+ .n_enum_strings = VLIB_LOG_N_LEVELS,
.enum_strings = {
+ "unknown",
"emerg",
"alert",
"crit",
@@ -244,15 +225,15 @@ vlib_log (vlib_log_level_t level, vlib_log_class_t class, char *fmt, ...)
"disabled",
},
};
- struct {
- u32 log_level;
- u32 string_index;
- } *ed;
- /* *INDENT-ON* */
+ struct
+ {
+ u32 log_level;
+ u32 string_index;
+ } * ed;
ed = ELOG_DATA (&vlib_global_main.elog_main, ee);
ed->log_level = level;
ed->string_index =
- elog_string (&vlib_global_main.elog_main, "%v", e->string);
+ elog_string (&vlib_global_main.elog_main, "%v%c", e->string, 0);
}
lm->next = (lm->next + 1) % lm->size;
@@ -366,8 +347,8 @@ format_vlib_log_level (u8 * s, va_list * args)
return format (s, "%s", t);
}
-static clib_error_t *
-vlib_log_init (vlib_main_t * vm)
+clib_error_t *
+vlib_log_init (vlib_main_t *vm)
{
vlib_log_main_t *lm = &log_main;
vlib_log_class_registration_t *r = lm->registrations;
@@ -381,9 +362,10 @@ vlib_log_init (vlib_main_t * vm)
{
r->class = vlib_log_register_class (r->class_name, r->subclass_name);
if (r->default_level)
- get_subclass_data (r->class)->level = r->default_level;
+ vlib_log_get_subclass_data (r->class)->level = r->default_level;
if (r->default_syslog_level)
- get_subclass_data (r->class)->syslog_level = r->default_syslog_level;
+ vlib_log_get_subclass_data (r->class)->syslog_level =
+ r->default_syslog_level;
r = r->next;
}
@@ -396,9 +378,6 @@ vlib_log_init (vlib_main_t * vm)
return 0;
}
-VLIB_INIT_FUNCTION (vlib_log_init);
-
-
static clib_error_t *
show_log (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
@@ -416,23 +395,20 @@ show_log (vlib_main_t * vm,
while (count--)
{
e = vec_elt_at_index (lm->entries, i);
- vlib_cli_output (vm, "%U %-10U %-14U %v",
- format_time_float, 0, e->timestamp + time_offset,
- format_vlib_log_level, e->level,
- format_vlib_log_class, e->class, e->string);
+ vlib_cli_output (vm, "%U %-10U %-14U %v", format_time_float, NULL,
+ e->timestamp + time_offset, format_vlib_log_level,
+ e->level, format_vlib_log_class, e->class, e->string);
i = (i + 1) % lm->size;
}
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_show_log, static) = {
.path = "show logging",
.short_help = "show logging",
.function = show_log,
};
-/* *INDENT-ON* */
static clib_error_t *
show_log_config (vlib_main_t * vm,
@@ -474,13 +450,11 @@ show_log_config (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_show_log_config, static) = {
.path = "show logging configuration",
.short_help = "show logging configuration",
.function = show_log_config,
};
-/* *INDENT-ON* */
static clib_error_t *
clear_log (vlib_main_t * vm,
@@ -505,13 +479,11 @@ clear_log (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_clear_log, static) = {
.path = "clear logging",
.short_help = "clear logging",
.function = clear_log,
};
-/* *INDENT-ON* */
static uword
unformat_vlib_log_level (unformat_input_t * input, va_list * args)
@@ -639,14 +611,12 @@ set_log_class (vlib_main_t * vm,
return rv;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_set_log, static) = {
.path = "set logging class",
.short_help = "set logging class <class> [rate-limit <int>] "
"[level <level>] [syslog-level <level>]",
.function = set_log_class,
};
-/* *INDENT-ON* */
static clib_error_t *
set_log_unth_time (vlib_main_t * vm,
@@ -673,13 +643,11 @@ set_log_unth_time (vlib_main_t * vm,
return rv;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_set_log_params, static) = {
.path = "set logging unthrottle-time",
.short_help = "set logging unthrottle-time <int>",
.function = set_log_unth_time,
};
-/* *INDENT-ON* */
static clib_error_t *
set_log_size (vlib_main_t * vm,
@@ -709,13 +677,11 @@ set_log_size (vlib_main_t * vm,
return rv;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_set_log_size, static) = {
.path = "set logging size",
.short_help = "set logging size <int>",
.function = set_log_size,
};
-/* *INDENT-ON* */
static uword
unformat_vlib_log_subclass (unformat_input_t * input, va_list * args)
@@ -788,13 +754,11 @@ test_log_class_subclass (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_test_log, static) = {
.path = "test log",
.short_help = "test log <level> <class> <subclass> <message>",
.function = test_log_class_subclass,
};
-/* *INDENT-ON* */
static clib_error_t *
log_config_class (vlib_main_t * vm, char *name, unformat_input_t * input)
diff --git a/src/vlib/log.h b/src/vlib/log.h
index c3ebb8150ee..45e2b59946c 100644
--- a/src/vlib/log.h
+++ b/src/vlib/log.h
@@ -117,6 +117,7 @@ typedef struct
extern vlib_log_main_t log_main;
+clib_error_t *vlib_log_init (struct vlib_main_t *vm);
vlib_log_class_t vlib_log_register_class (char *vlass, char *subclass);
vlib_log_class_t
vlib_log_register_class_rate_limit (char *class, char *subclass,
@@ -148,6 +149,34 @@ __vlib_add_log_registration_##x (void) \
} \
__VA_ARGS__ vlib_log_class_registration_t x
+static_always_inline vlib_log_class_data_t *
+vlib_log_get_class_data (vlib_log_class_t ci)
+{
+ vlib_log_main_t *lm = &log_main;
+ return vec_elt_at_index (lm->classes, (ci >> 16));
+}
+
+static_always_inline vlib_log_subclass_data_t *
+vlib_log_get_subclass_data (vlib_log_class_t ci)
+{
+ vlib_log_class_data_t *c = vlib_log_get_class_data (ci);
+ return vec_elt_at_index (c->subclasses, (ci & 0xffff));
+}
+
+static_always_inline int
+vlib_log_is_enabled (vlib_log_level_t level, vlib_log_class_t class)
+{
+ vlib_log_subclass_data_t *sc = vlib_log_get_subclass_data (class);
+
+ if (level <= sc->level && sc->level != VLIB_LOG_LEVEL_DISABLED)
+ return 1;
+
+ if (level <= sc->syslog_level && sc->syslog_level != VLIB_LOG_LEVEL_DISABLED)
+ return 1;
+
+ return 0;
+}
+
#endif /* included_vlib_log_h */
/*
diff --git a/src/vlib/main.c b/src/vlib/main.c
index c7c4aba3080..04b58762646 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -41,75 +41,17 @@
#include <vppinfra/format.h>
#include <vlib/vlib.h>
#include <vlib/threads.h>
+#include <vlib/stats/stats.h>
#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
#include <vlib/unix/unix.h>
-/* Actually allocate a few extra slots of vector data to support
- speculative vector enqueues which overflow vector data in next frame. */
-#define VLIB_FRAME_SIZE_ALLOC (VLIB_FRAME_SIZE + 4)
-
-always_inline u32
-vlib_frame_bytes (u32 n_scalar_bytes, u32 n_vector_bytes)
-{
- u32 n_bytes;
-
- /* Make room for vlib_frame_t plus scalar arguments. */
- n_bytes = vlib_frame_vector_byte_offset (n_scalar_bytes);
-
- /* Make room for vector arguments.
- Allocate a few extra slots of vector data to support
- speculative vector enqueues which overflow vector data in next frame. */
-#define VLIB_FRAME_SIZE_EXTRA 4
- n_bytes += (VLIB_FRAME_SIZE + VLIB_FRAME_SIZE_EXTRA) * n_vector_bytes;
-
- /* Magic number is first 32bit number after vector data.
- Used to make sure that vector data is never overrun. */
#define VLIB_FRAME_MAGIC (0xabadc0ed)
- n_bytes += sizeof (u32);
-
- /* Pad to cache line. */
- n_bytes = round_pow2 (n_bytes, CLIB_CACHE_LINE_BYTES);
-
- return n_bytes;
-}
always_inline u32 *
vlib_frame_find_magic (vlib_frame_t * f, vlib_node_t * node)
{
- void *p = f;
-
- p += vlib_frame_vector_byte_offset (node->scalar_size);
-
- p += (VLIB_FRAME_SIZE + VLIB_FRAME_SIZE_EXTRA) * node->vector_size;
-
- return p;
-}
-
-static inline vlib_frame_size_t *
-get_frame_size_info (vlib_node_main_t * nm,
- u32 n_scalar_bytes, u32 n_vector_bytes)
-{
-#ifdef VLIB_SUPPORTS_ARBITRARY_SCALAR_SIZES
- uword key = (n_scalar_bytes << 16) | n_vector_bytes;
- uword *p, i;
-
- p = hash_get (nm->frame_size_hash, key);
- if (p)
- i = p[0];
- else
- {
- i = vec_len (nm->frame_sizes);
- vec_validate (nm->frame_sizes, i);
- hash_set (nm->frame_size_hash, key, i);
- }
-
- return vec_elt_at_index (nm->frame_sizes, i);
-#else
- ASSERT (vlib_frame_bytes (n_scalar_bytes, n_vector_bytes)
- == (vlib_frame_bytes (0, 4)));
- return vec_elt_at_index (nm->frame_sizes, 0);
-#endif
+ return (void *) f + node->magic_offset;
}
static vlib_frame_t *
@@ -120,31 +62,35 @@ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index,
vlib_frame_size_t *fs;
vlib_node_t *to_node;
vlib_frame_t *f;
- u32 l, n, scalar_size, vector_size;
+ u32 l, n;
ASSERT (vm == vlib_get_main ());
to_node = vlib_get_node (vm, to_node_index);
- scalar_size = to_node->scalar_size;
- vector_size = to_node->vector_size;
+ vec_validate (nm->frame_sizes, to_node->frame_size_index);
+ fs = vec_elt_at_index (nm->frame_sizes, to_node->frame_size_index);
- fs = get_frame_size_info (nm, scalar_size, vector_size);
- n = vlib_frame_bytes (scalar_size, vector_size);
+ if (fs->frame_size == 0)
+ fs->frame_size = to_node->frame_size;
+ else
+ ASSERT (fs->frame_size == to_node->frame_size);
+
+ n = fs->frame_size;
if ((l = vec_len (fs->free_frames)) > 0)
{
/* Allocate from end of free list. */
f = fs->free_frames[l - 1];
- _vec_len (fs->free_frames) = l - 1;
+ vec_set_len (fs->free_frames, l - 1);
}
else
{
- f = clib_mem_alloc_aligned_no_fail (n, VLIB_FRAME_ALIGN);
+ f = clib_mem_alloc_aligned_no_fail (n, CLIB_CACHE_LINE_BYTES);
}
/* Poison frame when debugging. */
if (CLIB_DEBUG > 0)
- clib_memset (f, 0xfe, n);
+ clib_memset_u8 (f, 0xfe, n);
/* Insert magic number. */
{
@@ -156,9 +102,11 @@ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index,
f->frame_flags = VLIB_FRAME_IS_ALLOCATED | frame_flags;
f->n_vectors = 0;
- f->scalar_size = scalar_size;
- f->vector_size = vector_size;
+ f->scalar_offset = to_node->scalar_offset;
+ f->vector_offset = to_node->vector_offset;
+ f->aux_offset = to_node->aux_offset;
f->flags = 0;
+ f->frame_size_index = to_node->frame_size_index;
fs->n_alloc_frames += 1;
@@ -239,17 +187,15 @@ vlib_put_frame_to_node (vlib_main_t * vm, u32 to_node_index, vlib_frame_t * f)
/* Free given frame. */
void
-vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f)
+vlib_frame_free (vlib_main_t *vm, vlib_frame_t *f)
{
vlib_node_main_t *nm = &vm->node_main;
- vlib_node_t *node;
vlib_frame_size_t *fs;
ASSERT (vm == vlib_get_main ());
ASSERT (f->frame_flags & VLIB_FRAME_IS_ALLOCATED);
- node = vlib_get_node (vm, r->node_index);
- fs = get_frame_size_info (nm, node->scalar_size, node->vector_size);
+ fs = vec_elt_at_index (nm->frame_sizes, f->frame_size_index);
ASSERT (f->frame_flags & VLIB_FRAME_IS_ALLOCATED);
@@ -261,6 +207,7 @@ vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f)
}
f->frame_flags &= ~(VLIB_FRAME_IS_ALLOCATED | VLIB_FRAME_NO_APPEND);
+ f->flags = 0;
vec_add1 (fs->free_frames, f);
ASSERT (fs->n_alloc_frames > 0);
@@ -271,30 +218,33 @@ static clib_error_t *
show_frame_stats (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
- vlib_node_main_t *nm = &vm->node_main;
vlib_frame_size_t *fs;
- vlib_cli_output (vm, "%=6s%=12s%=12s", "Size", "# Alloc", "# Free");
- vec_foreach (fs, nm->frame_sizes)
- {
- u32 n_alloc = fs->n_alloc_frames;
- u32 n_free = vec_len (fs->free_frames);
+ vlib_cli_output (vm, "%=8s%=6s%=12s%=12s", "Thread", "Size", "# Alloc",
+ "# Free");
+ foreach_vlib_main ()
+ {
+ vlib_node_main_t *nm = &this_vlib_main->node_main;
+ vec_foreach (fs, nm->frame_sizes)
+ {
+ u32 n_alloc = fs->n_alloc_frames;
+ u32 n_free = vec_len (fs->free_frames);
- if (n_alloc + n_free > 0)
- vlib_cli_output (vm, "%=6d%=12d%=12d",
- fs - nm->frame_sizes, n_alloc, n_free);
- }
+ if (n_alloc + n_free > 0)
+ vlib_cli_output (vm, "%=8d%=6d%=12d%=12d",
+ this_vlib_main->thread_index, fs->frame_size,
+ n_alloc, n_free);
+ }
+ }
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_frame_stats_cli, static) = {
.path = "show vlib frame-allocation",
.short_help = "Show node dispatch frame statistics",
.function = show_frame_stats,
};
-/* *INDENT-ON* */
/* Change ownership of enqueue rights to given next node. */
static void
@@ -525,12 +475,8 @@ vlib_put_next_frame (vlib_main_t * vm,
if (!(f->frame_flags & VLIB_FRAME_PENDING))
{
__attribute__ ((unused)) vlib_node_t *node;
- vlib_node_t *next_node;
- vlib_node_runtime_t *next_runtime;
node = vlib_get_node (vm, r->node_index);
- next_node = vlib_get_next_node (vm, r->node_index, next_index);
- next_runtime = vlib_node_get_runtime (vm, next_node->index);
vec_add2 (nm->pending_frames, p, 1);
@@ -539,18 +485,6 @@ vlib_put_next_frame (vlib_main_t * vm,
p->next_frame_index = nf - nm->next_frames;
nf->flags |= VLIB_FRAME_PENDING;
f->frame_flags |= VLIB_FRAME_PENDING;
-
- /*
- * If we're going to dispatch this frame on another thread,
- * force allocation of a new frame. Otherwise, we create
- * a dangling frame reference. Each thread has its own copy of
- * the next_frames vector.
- */
- if (0 && r->thread_index != next_runtime->thread_index)
- {
- nf->frame = NULL;
- nf->flags &= ~(VLIB_FRAME_PENDING | VLIB_FRAME_IS_ALLOCATED);
- }
}
/* Copy trace flag from next_frame and from runtime. */
@@ -698,13 +632,11 @@ vlib_cli_elog_clear (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (elog_clear_cli, static) = {
.path = "event-logger clear",
.short_help = "Clear the event log",
.function = vlib_cli_elog_clear,
};
-/* *INDENT-ON* */
#ifdef CLIB_UNIX
static clib_error_t *
@@ -753,13 +685,11 @@ vlib_post_mortem_dump (void)
(vgm->post_mortem_callbacks[i]) ();
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (elog_save_cli, static) = {
.path = "event-logger save",
.short_help = "event-logger save <filename> (saves log in /tmp/<filename>)",
.function = elog_save_buffer,
};
-/* *INDENT-ON* */
static clib_error_t *
elog_stop (vlib_main_t * vm,
@@ -773,13 +703,11 @@ elog_stop (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (elog_stop_cli, static) = {
.path = "event-logger stop",
.short_help = "Stop the event-logger",
.function = elog_stop,
};
-/* *INDENT-ON* */
static clib_error_t *
elog_restart (vlib_main_t * vm,
@@ -793,13 +721,11 @@ elog_restart (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (elog_restart_cli, static) = {
.path = "event-logger restart",
.short_help = "Restart the event-logger",
.function = elog_restart,
};
-/* *INDENT-ON* */
static clib_error_t *
elog_resize_command_fn (vlib_main_t * vm,
@@ -823,13 +749,11 @@ elog_resize_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (elog_resize_cli, static) = {
.path = "event-logger resize",
.short_help = "event-logger resize <nnn>",
.function = elog_resize_command_fn,
};
-/* *INDENT-ON* */
#endif /* CLIB_UNIX */
@@ -882,13 +806,11 @@ elog_show_buffer (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (elog_show_cli, static) = {
.path = "show event-logger",
.short_help = "Show event logger info",
.function = elog_show_buffer,
};
-/* *INDENT-ON* */
void
vlib_gdb_show_event_log (void)
@@ -1045,7 +967,6 @@ dispatch_node (vlib_main_t * vm,
polling mode and vice versa. */
if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_ADAPTIVE_MODE))
{
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.function = (char *) __FUNCTION__,
@@ -1056,7 +977,6 @@ dispatch_node (vlib_main_t * vm,
"interrupt", "polling",
},
};
- /* *INDENT-ON* */
struct
{
u32 node_name, vector_length, is_polling;
@@ -1227,13 +1147,14 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index,
/* no new frame has been assigned to this node, use the saved one */
nf->frame = restore_frame;
f->n_vectors = 0;
+ f->flags = 0;
}
else
{
/* The node has gained a frame, implying packets from the current frame
were re-queued to this same node. we don't need the saved one
anymore */
- vlib_frame_free (vm, n, f);
+ vlib_frame_free (vm, f);
}
}
else
@@ -1241,7 +1162,7 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index,
if (f->frame_flags & VLIB_FRAME_FREE_AFTER_DISPATCH)
{
ASSERT (!(n->flags & VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH));
- vlib_frame_free (vm, n, f);
+ vlib_frame_free (vm, f);
}
}
@@ -1417,7 +1338,8 @@ vlib_start_process (vlib_main_t * vm, uword process_index)
{
vlib_node_main_t *nm = &vm->node_main;
vlib_process_t *p = vec_elt (nm->processes, process_index);
- dispatch_process (vm, p, /* frame */ 0, /* cpu_time_now */ 0);
+ u64 cpu_time_now = clib_cpu_time_now ();
+ dispatch_process (vm, p, /* frame */ 0, cpu_time_now);
}
static u64
@@ -1501,12 +1423,6 @@ dispatch_suspended_process (vlib_main_t * vm,
return t;
}
-void vl_api_send_pending_rpc_requests (vlib_main_t *) __attribute__ ((weak));
-void
-vl_api_send_pending_rpc_requests (vlib_main_t * vm)
-{
-}
-
static_always_inline void
vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
{
@@ -1522,7 +1438,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
if (is_main)
{
vec_resize (nm->pending_frames, 32);
- _vec_len (nm->pending_frames) = 0;
+ vec_set_len (nm->pending_frames, 0);
}
/* Mark time of main loop start. */
@@ -1534,9 +1450,6 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
else
cpu_time_now = clib_cpu_time_now ();
- /* Pre-allocate interupt runtime indices and lock. */
- vec_alloc_aligned (nm->pending_interrupts, 1, CLIB_CACHE_LINE_BYTES);
-
/* Pre-allocate expired nodes. */
if (!nm->polling_threshold_vector_length)
nm->polling_threshold_vector_length = 10;
@@ -1572,7 +1485,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
if (PREDICT_FALSE (_vec_len (vm->pending_rpc_requests) > 0))
{
if (!is_main)
- vl_api_send_pending_rpc_requests (vm);
+ vlib_worker_flush_pending_rpc_requests (vm);
}
if (!is_main)
@@ -1581,8 +1494,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
if (PREDICT_FALSE (vm->check_frame_queues + frame_queue_check_counter))
{
u32 processed = 0;
- vlib_frame_queue_dequeue_fn_t *fn =
- vlib_buffer_func_main.frame_queue_dequeue_fn;
+ vlib_frame_queue_dequeue_fn_t *fn;
if (vm->check_frame_queues)
{
@@ -1591,7 +1503,10 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
}
vec_foreach (fqm, tm->frame_queue_mains)
- processed += (fn) (vm, fqm);
+ {
+ fn = fqm->frame_queue_dequeue_fn;
+ processed += (fn) (vm, fqm);
+ }
/* No handoff queue work found? */
if (processed)
@@ -1613,6 +1528,22 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
/* frame */ 0,
cpu_time_now);
+ if (clib_interrupt_is_any_pending (nm->pre_input_node_interrupts))
+ {
+ int int_num = -1;
+
+ while ((int_num = clib_interrupt_get_next_and_clear (
+ nm->pre_input_node_interrupts, int_num)) != -1)
+ {
+ vlib_node_runtime_t *n;
+ n = vec_elt_at_index (
+ nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT], int_num);
+ cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_PRE_INPUT,
+ VLIB_NODE_STATE_INTERRUPT,
+ /* frame */ 0, cpu_time_now);
+ }
+ }
+
/* Next process input nodes. */
vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_INPUT])
cpu_time_now = dispatch_node (vm, n,
@@ -1624,16 +1555,14 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
if (PREDICT_TRUE (is_main && vm->queue_signal_pending == 0))
vm->queue_signal_callback (vm);
- if (__atomic_load_n (nm->pending_interrupts, __ATOMIC_ACQUIRE))
+ if (clib_interrupt_is_any_pending (nm->input_node_interrupts))
{
int int_num = -1;
- *nm->pending_interrupts = 0;
- while ((int_num =
- clib_interrupt_get_next (nm->interrupts, int_num)) != -1)
+ while ((int_num = clib_interrupt_get_next_and_clear (
+ nm->input_node_interrupts, int_num)) != -1)
{
vlib_node_runtime_t *n;
- clib_interrupt_clear (nm->interrupts, int_num);
n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
int_num);
cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT,
@@ -1648,11 +1577,10 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
for (i = 0; i < _vec_len (nm->pending_frames); i++)
cpu_time_now = dispatch_pending_node (vm, i, cpu_time_now);
/* Reset pending vector for next iteration. */
- _vec_len (nm->pending_frames) = 0;
+ vec_set_len (nm->pending_frames, 0);
if (is_main)
{
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (es) =
{
.format = "process tw start",
@@ -1663,7 +1591,6 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
.format = "process tw end: %d",
.format_args = "i4",
};
- /* *INDENT-ON* */
struct
{
@@ -1676,10 +1603,8 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
if (PREDICT_FALSE (vm->elog_trace_graph_dispatch))
ed = ELOG_DATA (&vlib_global_main.elog_main, es);
- nm->data_from_advancing_timing_wheel =
- TW (tw_timer_expire_timers_vec)
- ((TWT (tw_timer_wheel) *) nm->timing_wheel, vlib_time_now (vm),
- nm->data_from_advancing_timing_wheel);
+ TW (tw_timer_expire_timers)
+ ((TWT (tw_timer_wheel) *) nm->timing_wheel, vlib_time_now (vm));
ASSERT (nm->data_from_advancing_timing_wheel != 0);
@@ -1710,6 +1635,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
vlib_get_node (vm, te->process_node_index);
vlib_process_t *p =
vec_elt (nm->processes, n->runtime_index);
+ p->stop_timer_handle = ~0;
void *data;
data =
vlib_process_signal_event_helper (nm, n, p,
@@ -1734,7 +1660,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
dispatch_suspended_process (vm, di, cpu_time_now);
}
}
- _vec_len (nm->data_from_advancing_timing_wheel) = 0;
+ vec_set_len (nm->data_from_advancing_timing_wheel, 0);
}
}
vlib_increment_main_loop_counter (vm);
@@ -1875,7 +1801,6 @@ placeholder_queue_signal_callback (vlib_main_t * vm)
}
#define foreach_weak_reference_stub \
-_(vlib_map_stat_segment_init) \
_(vpe_api_init) \
_(vlibmemory_init) \
_(map_api_segment_init)
@@ -1909,6 +1834,23 @@ vl_api_get_elog_trace_api_messages (void)
return 0;
}
+static void
+process_expired_timer_cb (u32 *expired_timer_handles)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_node_main_t *nm = &vm->node_main;
+ u32 *handle;
+
+ vec_foreach (handle, expired_timer_handles)
+ {
+ u32 pi = vlib_timing_wheel_data_get_index (*handle);
+ vlib_process_t *p = vec_elt (nm->processes, pi);
+
+ p->stop_timer_handle = ~0;
+ }
+ vec_append (nm->data_from_advancing_timing_wheel, expired_timer_handles);
+}
+
/* Main function. */
int
vlib_main (vlib_main_t * volatile vm, unformat_input_t * input)
@@ -1936,7 +1878,13 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input)
goto done;
}
- if ((error = vlib_map_stat_segment_init (vm)))
+ if ((error = vlib_log_init (vm)))
+ {
+ clib_error_report (error);
+ goto done;
+ }
+
+ if ((error = vlib_stats_init (vm)))
{
clib_error_report (error);
goto done;
@@ -2005,18 +1953,18 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input)
CLIB_CACHE_LINE_BYTES);
vec_validate (nm->data_from_advancing_timing_wheel, 10);
- _vec_len (nm->data_from_advancing_timing_wheel) = 0;
+ vec_set_len (nm->data_from_advancing_timing_wheel, 0);
/* Create the process timing wheel */
- TW (tw_timer_wheel_init) ((TWT (tw_timer_wheel) *) nm->timing_wheel,
- 0 /* no callback */ ,
- 10e-6 /* timer period 10us */ ,
- ~0 /* max expirations per call */ );
+ TW (tw_timer_wheel_init)
+ ((TWT (tw_timer_wheel) *) nm->timing_wheel,
+ process_expired_timer_cb /* callback */, 10e-6 /* timer period 10us */,
+ ~0 /* max expirations per call */);
vec_validate (vm->pending_rpc_requests, 0);
- _vec_len (vm->pending_rpc_requests) = 0;
+ vec_set_len (vm->pending_rpc_requests, 0);
vec_validate (vm->processing_rpc_requests, 0);
- _vec_len (vm->processing_rpc_requests) = 0;
+ vec_set_len (vm->processing_rpc_requests, 0);
/* Default params for the buffer allocator fault injector, if configured */
if (VLIB_BUFFER_ALLOC_FAULT_INJECTOR > 0)
@@ -2066,7 +2014,9 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input)
vlib_main_loop (vm);
done:
+ /* Stop worker threads, barrier will not be released */
vlib_worker_thread_barrier_sync (vm);
+
/* Call all exit functions. */
{
clib_error_t *sub_error;
@@ -2074,12 +2024,11 @@ done:
if (sub_error)
clib_error_report (sub_error);
}
- vlib_worker_thread_barrier_release (vm);
if (error)
clib_error_report (error);
- return 0;
+ return vm->main_loop_exit_status;
}
vlib_main_t *
@@ -2094,6 +2043,13 @@ vlib_get_elog_main_not_inline ()
return &vlib_global_main.elog_main;
}
+void
+vlib_exit_with_status (vlib_main_t *vm, int status)
+{
+ vm->main_loop_exit_status = status;
+ __atomic_store_n (&vm->main_loop_exit_now, 1, __ATOMIC_RELEASE);
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vlib/main.h b/src/vlib/main.h
index c655560d08c..94b8c4fa954 100644
--- a/src/vlib/main.h
+++ b/src/vlib/main.h
@@ -40,6 +40,7 @@
#ifndef included_vlib_main_h
#define included_vlib_main_h
+#include <vppinfra/clib.h>
#include <vppinfra/callback_data.h>
#include <vppinfra/elog.h>
#include <vppinfra/format.h>
@@ -143,6 +144,8 @@ typedef struct vlib_main_t
u32 main_loop_exit_set;
/* Set e.g. in the SIGTERM signal handler, checked in a safe place... */
volatile u32 main_loop_exit_now;
+ /* Exit status that will be returned by the process upon exit. */
+ volatile int main_loop_exit_status;
clib_longjmp_t main_loop_exit;
#define VLIB_MAIN_LOOP_EXIT_NONE 0
#define VLIB_MAIN_LOOP_EXIT_PANIC 1
@@ -152,15 +155,6 @@ typedef struct vlib_main_t
/* Error marker to use when exiting main loop. */
clib_error_t *main_loop_error;
- /* Start of the heap. */
- void *heap_base;
-
- /* Truncated version, to create frame indices */
- void *heap_aligned_base;
-
- /* Size of the heap */
- uword heap_size;
-
/* buffer main structure. */
vlib_buffer_main_t *buffer_main;
@@ -218,7 +212,6 @@ typedef struct vlib_main_t
volatile u32 queue_signal_pending;
volatile u32 api_queue_nonempty;
void (*queue_signal_callback) (struct vlib_main_t *);
- u8 **argv;
/* Top of (worker) dispatch loop callback */
void (**volatile worker_thread_main_loop_callbacks)
@@ -281,6 +274,12 @@ typedef struct vlib_global_main_t
/* Name for e.g. syslog. */
char *name;
+ /* full path to main executable */
+ char *exec_path;
+
+ /* command line arguments */
+ u8 **argv;
+
/* post-mortem callbacks */
void (**post_mortem_callbacks) (void);
@@ -308,6 +307,7 @@ typedef struct vlib_global_main_t
_vlib_init_function_list_elt_t *main_loop_enter_function_registrations;
_vlib_init_function_list_elt_t *main_loop_exit_function_registrations;
_vlib_init_function_list_elt_t *worker_init_function_registrations;
+ _vlib_init_function_list_elt_t *num_workers_change_function_registrations;
_vlib_init_function_list_elt_t *api_init_function_registrations;
vlib_config_function_runtime_t *config_function_registrations;
@@ -377,7 +377,13 @@ always_inline void
vlib_panic_with_error (vlib_main_t * vm, clib_error_t * error)
{
vm->main_loop_error = error;
- clib_longjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_PANIC);
+ if (vm->main_loop_exit_set)
+ clib_longjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_PANIC);
+ else
+ {
+ clib_warning ("panic: %U", format_clib_error, error);
+ abort ();
+ }
}
#define vlib_panic_with_msg(vm,args...) \
@@ -389,6 +395,8 @@ vlib_panic (vlib_main_t * vm)
vlib_panic_with_error (vm, 0);
}
+/* Asynchronously requests exit with the given status. */
+void vlib_exit_with_status (vlib_main_t *vm, int status);
always_inline f64
vlib_internal_node_vector_rate (vlib_main_t * vm)
@@ -465,7 +473,7 @@ vlib_main_init ()
vgm->init_functions_called = hash_create (0, /* value bytes */ 0);
vm = clib_mem_alloc_aligned (sizeof (*vm), CLIB_CACHE_LINE_BYTES);
- vec_add1 (vgm->vlib_mains, vm);
+ vec_add1_ha (vgm->vlib_mains, vm, 0, CLIB_CACHE_LINE_BYTES);
}
/* Main routine. */
diff --git a/src/vlib/node.c b/src/vlib/node.c
index f4329e7c503..8f6c852188b 100644
--- a/src/vlib/node.c
+++ b/src/vlib/node.c
@@ -130,12 +130,10 @@ vlib_node_runtime_update (vlib_main_t * vm, u32 node_index, u32 next_index)
&& pf->next_frame_index >= i)
pf->next_frame_index += n_insert;
}
- /* *INDENT-OFF* */
pool_foreach (pf, nm->suspended_process_frames) {
if (pf->next_frame_index != ~0 && pf->next_frame_index >= i)
pf->next_frame_index += n_insert;
}
- /* *INDENT-ON* */
r->n_next_nodes = vec_len (node->next_nodes);
}
@@ -223,7 +221,6 @@ vlib_node_add_next_with_slot (vlib_main_t * vm,
{
uword sib_node_index, sib_slot;
vlib_node_t *sib_node;
- /* *INDENT-OFF* */
clib_bitmap_foreach (sib_node_index, node->sibling_bitmap) {
sib_node = vec_elt (nm->nodes, sib_node_index);
if (sib_node != node)
@@ -232,7 +229,6 @@ vlib_node_add_next_with_slot (vlib_main_t * vm,
ASSERT (sib_slot == slot);
}
}
- /* *INDENT-ON* */
}
vlib_worker_thread_barrier_release (vm);
@@ -329,12 +325,54 @@ vlib_node_get_preferred_node_fn_variant (vlib_main_t *vm,
}
static void
-register_node (vlib_main_t * vm, vlib_node_registration_t * r)
+vlib_node_add_to_sibling_bitmap (vlib_main_t *vm, vlib_node_t *n,
+ vlib_node_t *sib)
{
vlib_node_main_t *nm = &vm->node_main;
- vlib_node_t *n;
+ u32 si;
+
+ clib_bitmap_foreach (si, sib->sibling_bitmap)
+ {
+ vlib_node_t *m = vec_elt (nm->nodes, si);
+
+ /* Connect all of sibling's siblings to us. */
+ m->sibling_bitmap = clib_bitmap_ori (m->sibling_bitmap, n->index);
+
+ /* Connect us to all of sibling's siblings. */
+ n->sibling_bitmap = clib_bitmap_ori (n->sibling_bitmap, si);
+ }
+
+ /* Connect sibling to us. */
+ sib->sibling_bitmap = clib_bitmap_ori (sib->sibling_bitmap, n->index);
+
+ /* Connect us to sibling. */
+ n->sibling_bitmap = clib_bitmap_ori (n->sibling_bitmap, sib->index);
+}
+
+u32
+vlib_register_node (vlib_main_t *vm, vlib_node_registration_t *r, char *fmt,
+ ...)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n, *sib = 0;
+ va_list va;
+ u32 size;
int i;
+ if (r->sibling_of)
+ {
+ if (r->n_next_nodes > 0)
+ clib_error ("sibling node should not have any next nodes `%v'",
+ r->name);
+ if (nm->flags & VLIB_NODE_MAIN_RUNTIME_STARTED)
+ {
+ sib = vlib_get_node_by_name (vm, (u8 *) r->sibling_of);
+
+ if (sib == 0)
+ clib_error ("unknown sibling node '%s'", r->sibling_of);
+ }
+ }
+
if (CLIB_DEBUG > 0)
{
/* Default (0) type should match INTERNAL. */
@@ -362,11 +400,9 @@ register_node (vlib_main_t * vm, vlib_node_registration_t * r)
vec_add1 (nm->nodes, n);
- /* Name is always a vector so it can be formatted with %v. */
- if (clib_mem_is_heap_object (vec_header (r->name, 0)))
- n->name = vec_dup ((u8 *) r->name);
- else
- n->name = format (0, "%s", r->name);
+ va_start (va, fmt);
+ n->name = va_format (0, fmt, &va);
+ va_end (va);
if (!nm->node_by_name)
nm->node_by_name = hash_create_vec ( /* size */ 32,
@@ -387,11 +423,6 @@ register_node (vlib_main_t * vm, vlib_node_registration_t * r)
r->index = n->index; /* save index in registration */
n->function = r->function;
- /* Node index of next sibling will be filled in by vlib_node_main_init. */
- n->sibling_of = r->sibling_of;
- if (r->sibling_of && r->n_next_nodes > 0)
- clib_error ("sibling node should not have any next nodes `%v'", n->name);
-
if (r->type == VLIB_NODE_TYPE_INTERNAL)
ASSERT (r->vector_size > 0);
@@ -400,13 +431,66 @@ register_node (vlib_main_t * vm, vlib_node_registration_t * r)
_(type);
_(flags);
_(state);
- _(scalar_size);
- _(vector_size);
_(format_buffer);
_(unformat_buffer);
_(format_trace);
_(validate_frame);
+ size = round_pow2 (sizeof (vlib_frame_t), VLIB_FRAME_DATA_ALIGN);
+
+ /* scalar data size */
+ if (r->scalar_size)
+ {
+ n->scalar_offset = size;
+ size += round_pow2 (r->scalar_size, VLIB_FRAME_DATA_ALIGN);
+ }
+ else
+ n->scalar_offset = 0;
+
+ /* Vecor data size */
+ n->vector_offset = size;
+ size += r->vector_size * VLIB_FRAME_SIZE;
+
+ /* Allocate a few extra slots of vector data to support
+ speculative vector enqueues which overflow vector data in next frame. */
+ size += r->vector_size * VLIB_FRAME_SIZE_EXTRA;
+
+ /* space for VLIB_FRAME_MAGIC */
+ n->magic_offset = size;
+ size += sizeof (u32);
+
+ /* round size to VLIB_FRAME_DATA_ALIGN */
+ size = round_pow2 (size, VLIB_FRAME_DATA_ALIGN);
+
+ if (r->aux_size)
+ {
+ n->aux_offset = size;
+ size += r->aux_size * VLIB_FRAME_SIZE;
+ }
+ else
+ n->aux_offset = 0;
+
+ /* final size */
+ n->frame_size = size = round_pow2 (size, CLIB_CACHE_LINE_BYTES);
+ ASSERT (size <= __UINT16_MAX__);
+
+ vlib_frame_size_t *fs = 0;
+
+ n->frame_size_index = (u16) ~0;
+ vec_foreach (fs, nm->frame_sizes)
+ if (fs->frame_size == size)
+ {
+ n->frame_size_index = fs - nm->frame_sizes;
+ break;
+ }
+
+ if (n->frame_size_index == (u16) ~0)
+ {
+ vec_add2 (nm->frame_sizes, fs, 1);
+ fs->frame_size = size;
+ n->frame_size_index = fs - nm->frame_sizes;
+ }
+
/* Register error counters. */
vlib_register_errors (vm, n->index, r->n_errors, r->error_strings,
r->error_counters);
@@ -476,7 +560,10 @@ register_node (vlib_main_t * vm, vlib_node_registration_t * r)
vec_add2_aligned (nm->nodes_by_type[n->type], rt, 1,
/* align */ CLIB_CACHE_LINE_BYTES);
if (n->type == VLIB_NODE_TYPE_INPUT)
- clib_interrupt_resize (&nm->interrupts,
+ clib_interrupt_resize (&nm->input_node_interrupts,
+ vec_len (nm->nodes_by_type[n->type]));
+ else if (n->type == VLIB_NODE_TYPE_PRE_INPUT)
+ clib_interrupt_resize (&nm->pre_input_node_interrupts,
vec_len (nm->nodes_by_type[n->type]));
n->runtime_index = rt - nm->nodes_by_type[n->type];
}
@@ -512,13 +599,24 @@ register_node (vlib_main_t * vm, vlib_node_registration_t * r)
vec_free (n->runtime_data);
}
#undef _
-}
-/* Register new packet processing node. */
-u32
-vlib_register_node (vlib_main_t * vm, vlib_node_registration_t * r)
-{
- register_node (vm, r);
+ if (sib)
+ {
+ u32 slot, i;
+
+ vec_foreach_index (i, sib->next_nodes)
+ {
+ slot =
+ vlib_node_add_next_with_slot (vm, n->index, sib->next_nodes[i], i);
+ ASSERT (slot == i);
+ }
+
+ vlib_node_add_to_sibling_bitmap (vm, n, sib);
+
+ r->n_next_nodes = vec_len (n->next_nodes);
+ }
+ n->sibling_of = r->sibling_of;
+
return r->index;
}
@@ -530,7 +628,7 @@ null_node_fn (vlib_main_t * vm,
vlib_node_increment_counter (vm, node->node_index, 0, n_vectors);
vlib_buffer_free (vm, vlib_frame_vector_args (frame), n_vectors);
- vlib_frame_free (vm, node, frame);
+ vlib_frame_free (vm, frame);
return n_vectors;
}
@@ -582,19 +680,18 @@ vlib_register_all_static_nodes (vlib_main_t * vm)
static vlib_node_registration_t null_node_reg = {
.function = null_node_fn,
.vector_size = sizeof (u32),
- .name = "null-node",
.n_errors = 1,
.error_strings = null_node_error_strings,
};
/* make sure that node index 0 is not used by
real node */
- register_node (vm, &null_node_reg);
+ vlib_register_node (vm, &null_node_reg, "null-node");
r = vgm->node_registrations;
while (r)
{
- register_node (vm, r);
+ vlib_register_node (vm, r, "%s", r->name);
r = r->next_registration;
}
}
@@ -669,16 +766,11 @@ vlib_node_main_init (vlib_main_t * vm)
vlib_node_t *n;
uword ni;
- nm->frame_sizes = vec_new (vlib_frame_size_t, 1);
-#ifdef VLIB_SUPPORTS_ARBITRARY_SCALAR_SIZES
- nm->frame_size_hash = hash_create (0, sizeof (uword));
-#endif
nm->flags |= VLIB_NODE_MAIN_RUNTIME_STARTED;
/* Generate sibling relationships */
{
vlib_node_t *n, *sib;
- uword si;
for (ni = 0; ni < vec_len (nm->nodes); ni++)
{
@@ -695,23 +787,7 @@ vlib_node_main_init (vlib_main_t * vm)
goto done;
}
- /* *INDENT-OFF* */
- clib_bitmap_foreach (si, sib->sibling_bitmap) {
- vlib_node_t * m = vec_elt (nm->nodes, si);
-
- /* Connect all of sibling's siblings to us. */
- m->sibling_bitmap = clib_bitmap_ori (m->sibling_bitmap, n->index);
-
- /* Connect us to all of sibling's siblings. */
- n->sibling_bitmap = clib_bitmap_ori (n->sibling_bitmap, si);
- }
- /* *INDENT-ON* */
-
- /* Connect sibling to us. */
- sib->sibling_bitmap = clib_bitmap_ori (sib->sibling_bitmap, n->index);
-
- /* Connect us to sibling. */
- n->sibling_bitmap = clib_bitmap_ori (n->sibling_bitmap, sib->index);
+ vlib_node_add_to_sibling_bitmap (vm, n, sib);
}
}
@@ -800,14 +876,13 @@ vlib_process_create (vlib_main_t * vm, char *name,
memset (&r, 0, sizeof (r));
- r.name = (char *) format (0, "%s", name, 0);
r.function = f;
r.process_log2_n_stack_bytes = log2_n_stack_bytes;
r.type = VLIB_NODE_TYPE_PROCESS;
vlib_worker_thread_barrier_sync (vm);
- vlib_register_node (vm, &r);
+ vlib_register_node (vm, &r, "%s", name);
vec_free (r.name);
vlib_worker_thread_node_runtime_update ();
diff --git a/src/vlib/node.h b/src/vlib/node.h
index 75a0adba8d1..68813c2c3e1 100644
--- a/src/vlib/node.h
+++ b/src/vlib/node.h
@@ -149,7 +149,8 @@ typedef struct _vlib_node_registration
u8 protocol_hint;
/* Size of scalar and vector arguments in bytes. */
- u16 scalar_size, vector_size;
+ u8 vector_size, aux_size;
+ u16 scalar_size;
/* Number of error codes used by this node. */
u16 n_errors;
@@ -200,7 +201,8 @@ static __clib_unused vlib_node_registration_t __clib_unused_##x
#endif
#define VLIB_NODE_FN(node) \
- uword CLIB_MARCH_SFX (node##_fn) (); \
+ uword CLIB_MARCH_SFX (node##_fn) (vlib_main_t *, vlib_node_runtime_t *, \
+ vlib_frame_t *); \
static vlib_node_fn_registration_t CLIB_MARCH_SFX ( \
node##_fn_registration) = { \
.function = &CLIB_MARCH_SFX (node##_fn), \
@@ -273,7 +275,7 @@ typedef struct vlib_node_t
u32 runtime_index;
/* Runtime data for this node. */
- void *runtime_data;
+ u8 *runtime_data;
/* Node flags. */
u16 flags;
@@ -309,7 +311,8 @@ typedef struct vlib_node_t
u16 n_errors;
/* Size of scalar and vector arguments in bytes. */
- u16 scalar_size, vector_size;
+ u16 frame_size, scalar_offset, vector_offset, magic_offset, aux_offset;
+ u16 frame_size_index;
/* Handle/index in error heap for this node. */
u32 error_heap_handle;
@@ -367,7 +370,10 @@ typedef struct vlib_node_t
/* Max number of vector elements to process at once per node. */
#define VLIB_FRAME_SIZE 256
-#define VLIB_FRAME_ALIGN CLIB_CACHE_LINE_BYTES
+/* Number of extra elements allocated at the end of vecttor. */
+#define VLIB_FRAME_SIZE_EXTRA 4
+/* Frame data alignment */
+#define VLIB_FRAME_DATA_ALIGN 16
/* Calling frame (think stack frame) for a node. */
typedef struct vlib_frame_t
@@ -378,15 +384,15 @@ typedef struct vlib_frame_t
/* User flags. Used for sending hints to the next node. */
u16 flags;
- /* Number of scalar bytes in arguments. */
- u8 scalar_size;
-
- /* Number of bytes per vector argument. */
- u8 vector_size;
+ /* Scalar, vector and aux offsets in this frame. */
+ u16 scalar_offset, vector_offset, aux_offset;
/* Number of vector elements currently in frame. */
u16 n_vectors;
+ /* Index of frame size corresponding to allocated node. */
+ u16 frame_size_index;
+
/* Scalar and vector arguments to next node. */
u8 arguments[0];
} vlib_frame_t;
@@ -501,7 +507,7 @@ typedef struct vlib_node_runtime_t
zero before first run of this
node. */
- u16 thread_index; /**< thread this node runs on */
+ CLIB_ALIGN_MARK (runtime_data_pad, 8);
u8 runtime_data[0]; /**< Function dependent
node-runtime data. This data is
@@ -521,10 +527,15 @@ typedef struct
/* Number of allocated frames for this scalar/vector size. */
u32 n_alloc_frames;
+ /* Frame size */
+ u16 frame_size;
+
/* Vector of free frames for this scalar/vector size. */
vlib_frame_t **free_frames;
} vlib_frame_size_t;
+STATIC_ASSERT_SIZEOF (vlib_frame_size_t, 16);
+
typedef struct
{
/* Users opaque value for event type. */
@@ -566,7 +577,7 @@ typedef struct
u32 n_suspends;
/* Vectors of pending event data indexed by event type index. */
- void **pending_event_data_by_type_index;
+ u8 **pending_event_data_by_type_index;
/* Bitmap of event type-indices with non-empty vectors. */
uword *non_empty_event_type_bitmap;
@@ -679,8 +690,8 @@ typedef struct
vlib_node_runtime_t *nodes_by_type[VLIB_N_NODE_TYPE];
/* Node runtime indices for input nodes with pending interrupts. */
- void *interrupts;
- volatile u32 *pending_interrupts;
+ void *input_node_interrupts;
+ void *pre_input_node_interrupts;
/* Input nodes are switched from/to interrupt to/from polling mode
when average vector length goes above/below polling/interrupt
@@ -721,9 +732,6 @@ typedef struct
/* Current counts of nodes in each state. */
u32 input_node_counts_by_state[VLIB_N_NODE_STATE];
- /* Hash of (scalar_size,vector_size) to frame_sizes index. */
- uword *frame_size_hash;
-
/* Per-size frame allocation information. */
vlib_frame_size_t *frame_sizes;
diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c
index 8cf57948cc4..d0bdf5b9097 100644
--- a/src/vlib/node_cli.c
+++ b/src/vlib/node_cli.c
@@ -42,6 +42,7 @@
#include <fcntl.h>
#include <vlib/vlib.h>
#include <vlib/threads.h>
+#include <vlib/stats/stats.h>
#include <math.h>
static int
@@ -84,13 +85,11 @@ show_node_graph (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_node_graph_command, static) = {
.path = "show vlib graph",
.short_help = "Show packet processing node graph",
.function = show_node_graph,
};
-/* *INDENT-ON* */
static clib_error_t *
show_node_graphviz (vlib_main_t * vm,
@@ -310,7 +309,7 @@ show_node_graphviz (vlib_main_t * vm,
/*?
* Dump dot files data to draw a graph of all the nodes.
* If the argument 'filter' is provided, only the active nodes (since the last
- * "clear run" comand) are selected and they are scaled and colored according
+ * "clear run" command) are selected and they are scaled and colored according
* to their utilization. You can choose to filter nodes that are called,
* nodes that receive vectors or both (default).
* The 'file' option allows to save data in a temp file.
@@ -323,14 +322,12 @@ show_node_graphviz (vlib_main_t * vm,
* @cliend
* @cliexcmd{show vlib graphviz [filter][calls][vectors][file <filename>]}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_node_graphviz_command, static) = {
.path = "show vlib graphviz",
.short_help = "Dump packet processing node graph as a graphviz dotfile",
.function = show_node_graphviz,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static u8 *
format_vlib_node_state (u8 * s, va_list * va)
@@ -465,13 +462,6 @@ format_vlib_node_stats (u8 * s, va_list * va)
return s;
}
-f64 vlib_get_stat_segment_update_rate (void) __attribute__ ((weak));
-f64
-vlib_get_stat_segment_update_rate (void)
-{
- return 1e70;
-}
-
static clib_error_t *
show_node_runtime (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
@@ -498,7 +488,6 @@ show_node_runtime (vlib_main_t * vm,
uword i, j;
f64 dt;
u64 n_input, n_output, n_drop, n_punt;
- u64 n_internal_vectors, n_internal_calls;
u64 n_clocks, l, v, c, d;
int brief = 1;
int summary = 0;
@@ -557,7 +546,6 @@ show_node_runtime (vlib_main_t * vm,
vec_sort_with_function (nodes, node_cmp);
n_input = n_output = n_drop = n_punt = n_clocks = 0;
- n_internal_vectors = n_internal_calls = 0;
for (i = 0; i < vec_len (nodes); i++)
{
n = nodes[i];
@@ -566,7 +554,6 @@ show_node_runtime (vlib_main_t * vm,
n_clocks += l;
v = n->stats_total.vectors - n->stats_last_clear.vectors;
- c = n->stats_total.calls - n->stats_last_clear.calls;
switch (n->type)
{
@@ -577,11 +564,6 @@ show_node_runtime (vlib_main_t * vm,
n_output += (n->flags & VLIB_NODE_FLAG_IS_OUTPUT) ? v : 0;
n_drop += (n->flags & VLIB_NODE_FLAG_IS_DROP) ? v : 0;
n_punt += (n->flags & VLIB_NODE_FLAG_IS_PUNT) ? v : 0;
- if (!(n->flags & VLIB_NODE_FLAG_IS_OUTPUT))
- {
- n_internal_vectors += v;
- n_internal_calls += c;
- }
if (n->flags & VLIB_NODE_FLAG_IS_HANDOFF)
n_input += v;
break;
@@ -606,16 +588,14 @@ show_node_runtime (vlib_main_t * vm,
}
dt = time_now - nm->time_last_runtime_stats_clear;
- vlib_cli_output
- (vm,
- "Time %.1f, %f sec internal node vector rate %.2f loops/sec %.2f\n"
- " vector rates in %.4e, out %.4e, drop %.4e, punt %.4e",
- dt,
- vlib_get_stat_segment_update_rate (),
- internal_node_vector_rates[j],
- stat_vm->loops_per_second,
- (f64) n_input / dt,
- (f64) n_output / dt, (f64) n_drop / dt, (f64) n_punt / dt);
+ vlib_cli_output (
+ vm,
+ "Time %.1f, %f sec internal node vector rate %.2f loops/sec %.2f\n"
+ " vector rates in %.4e, out %.4e, drop %.4e, punt %.4e",
+ dt, vlib_stats_get_segment_update_rate (),
+ internal_node_vector_rates[j], stat_vm->loops_per_second,
+ (f64) n_input / dt, (f64) n_output / dt, (f64) n_drop / dt,
+ (f64) n_punt / dt);
if (summary == 0)
{
@@ -646,14 +626,12 @@ show_node_runtime (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_node_runtime_command, static) = {
.path = "show runtime",
.short_help = "Show packet processing runtime",
.function = show_node_runtime,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
clear_node_runtime (vlib_main_t * vm,
@@ -692,6 +670,8 @@ clear_node_runtime (vlib_main_t * vm,
nm->time_last_runtime_stats_clear = vlib_time_now (vm);
}
+ vlib_stats_set_timestamp (STAT_COUNTER_LAST_STATS_CLEAR,
+ vm->node_main.time_last_runtime_stats_clear);
vlib_worker_thread_barrier_release (vm);
vec_free (stat_vms);
@@ -699,13 +679,11 @@ clear_node_runtime (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_node_runtime_command, static) = {
.path = "clear runtime",
.short_help = "Clear packet processing runtime statistics",
.function = clear_node_runtime,
};
-/* *INDENT-ON* */
static clib_error_t *
show_node (vlib_main_t * vm, unformat_input_t * input,
@@ -825,7 +803,6 @@ show_node (vlib_main_t * vm, unformat_input_t * input,
if (n->type == VLIB_NODE_TYPE_INTERNAL)
{
int j = 0;
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, n->prev_node_bitmap) {
vlib_node_t *pn = vlib_get_node (vm, i);
if (j++ % 3 == 0)
@@ -834,7 +811,6 @@ show_node (vlib_main_t * vm, unformat_input_t * input,
s = format (s, "%-35v", s2);
vec_reset_length (s2);
}
- /* *INDENT-ON* */
if (vec_len (s) == 0)
s = format (s, "\n none");
@@ -869,7 +845,6 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_node_command, static) = {
.path = "show node",
.short_help = "show node [index] <node-name | node-index>",
@@ -922,13 +897,11 @@ done:
return err;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_node_fn_command, static) = {
.path = "set node function",
.short_help = "set node function <node-name> <variant-name>",
.function = set_node_fn,
};
-/* *INDENT-ON* */
/* Dummy function to get us linked in. */
void
diff --git a/src/vlib/node_format.c b/src/vlib/node_format.c
index 54cea9ff804..9e0d1a7de6f 100644
--- a/src/vlib/node_format.c
+++ b/src/vlib/node_format.c
@@ -73,13 +73,11 @@ format_vlib_node_graph (u8 * s, va_list * va)
}
j = 0;
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, n->prev_node_bitmap) {
vec_validate_init_empty (tmps, j, empty);
tmps[j].prev_node = i;
j++;
}
- /* *INDENT-ON* */
for (i = 0; i < vec_len (tmps); i++)
{
diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h
index 3c90a88efa0..1beac33cf9b 100644
--- a/src/vlib/node_funcs.h
+++ b/src/vlib/node_funcs.h
@@ -45,6 +45,7 @@
#ifndef included_vlib_node_funcs_h
#define included_vlib_node_funcs_h
+#include <vppinfra/clib.h>
#include <vppinfra/fifo.h>
#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
#include <vppinfra/interrupt.h>
@@ -58,7 +59,8 @@ vlib_process_start_switch_stack (vlib_main_t * vm, vlib_process_t * p)
{
#ifdef CLIB_SANITIZE_ADDR
void *stack = p ? (void *) p->stack : vlib_thread_stacks[vm->thread_index];
- u32 stack_bytes = p ? p->log2_n_stack_bytes : VLIB_THREAD_STACK_SIZE;
+ u32 stack_bytes =
+ p ? (1ULL < p->log2_n_stack_bytes) : VLIB_THREAD_STACK_SIZE;
__sanitizer_start_switch_fiber (&vm->asan_stack_save, stack, stack_bytes);
#endif
}
@@ -250,15 +252,22 @@ vlib_node_set_interrupt_pending (vlib_main_t *vm, u32 node_index)
{
vlib_node_main_t *nm = &vm->node_main;
vlib_node_t *n = vec_elt (nm->nodes, node_index);
+ void *interrupts = 0;
- ASSERT (n->type == VLIB_NODE_TYPE_INPUT);
+ if (n->type == VLIB_NODE_TYPE_INPUT)
+ interrupts = nm->input_node_interrupts;
+ else if (n->type == VLIB_NODE_TYPE_PRE_INPUT)
+ interrupts = nm->pre_input_node_interrupts;
+ else
+ {
+ ASSERT (0);
+ return;
+ }
if (vm != vlib_get_main ())
- clib_interrupt_set_atomic (nm->interrupts, n->runtime_index);
+ clib_interrupt_set_atomic (interrupts, n->runtime_index);
else
- clib_interrupt_set (nm->interrupts, n->runtime_index);
-
- __atomic_store_n (nm->pending_interrupts, 1, __ATOMIC_RELEASE);
+ clib_interrupt_set (interrupts, n->runtime_index);
}
always_inline vlib_process_t *
@@ -283,16 +292,6 @@ vlib_frame_no_append (vlib_frame_t * f)
f->frame_flags |= VLIB_FRAME_NO_APPEND;
}
-/* Byte alignment for vector arguments. */
-#define VLIB_FRAME_VECTOR_ALIGN (1 << 4)
-
-always_inline u32
-vlib_frame_vector_byte_offset (u32 scalar_size)
-{
- return round_pow2 (sizeof (vlib_frame_t) + scalar_size,
- VLIB_FRAME_VECTOR_ALIGN);
-}
-
/** \brief Get pointer to frame vector data.
@param f vlib_frame_t pointer
@return pointer to first vector element in frame
@@ -300,7 +299,19 @@ vlib_frame_vector_byte_offset (u32 scalar_size)
always_inline void *
vlib_frame_vector_args (vlib_frame_t * f)
{
- return (void *) f + vlib_frame_vector_byte_offset (f->scalar_size);
+ ASSERT (f->vector_offset);
+ return (void *) f + f->vector_offset;
+}
+
+/** \brief Get pointer to frame vector aux data.
+ @param f vlib_frame_t pointer
+ @return pointer to first vector aux data element in frame
+*/
+always_inline void *
+vlib_frame_aux_args (vlib_frame_t *f)
+{
+ ASSERT (f->aux_offset);
+ return (void *) f + f->aux_offset;
}
/** \brief Get pointer to frame scalar data.
@@ -314,7 +325,8 @@ vlib_frame_vector_args (vlib_frame_t * f)
always_inline void *
vlib_frame_scalar_args (vlib_frame_t * f)
{
- return vlib_frame_vector_args (f) - f->scalar_size;
+ ASSERT (f->scalar_offset);
+ return (void *) f + f->scalar_offset;
}
always_inline vlib_next_frame_t *
@@ -369,16 +381,34 @@ vlib_frame_t *vlib_get_next_frame_internal (vlib_main_t * vm,
u32 next_index,
u32 alloc_new_frame);
-#define vlib_get_next_frame_macro(vm,node,next_index,vectors,n_vectors_left,alloc_new_frame) \
-do { \
- vlib_frame_t * _f \
- = vlib_get_next_frame_internal ((vm), (node), (next_index), \
- (alloc_new_frame)); \
- u32 _n = _f->n_vectors; \
- (vectors) = vlib_frame_vector_args (_f) + _n * sizeof ((vectors)[0]); \
- (n_vectors_left) = VLIB_FRAME_SIZE - _n; \
-} while (0)
-
+#define vlib_get_next_frame_macro(vm, node, next_index, vectors, \
+ n_vectors_left, alloc_new_frame) \
+ do \
+ { \
+ vlib_frame_t *_f = vlib_get_next_frame_internal ( \
+ (vm), (node), (next_index), (alloc_new_frame)); \
+ u32 _n = _f->n_vectors; \
+ (vectors) = vlib_frame_vector_args (_f) + _n * sizeof ((vectors)[0]); \
+ (n_vectors_left) = VLIB_FRAME_SIZE - _n; \
+ } \
+ while (0)
+
+#define vlib_get_next_frame_macro_with_aux(vm, node, next_index, vectors, \
+ n_vectors_left, alloc_new_frame, \
+ aux_data, maybe_no_aux) \
+ do \
+ { \
+ vlib_frame_t *_f = vlib_get_next_frame_internal ( \
+ (vm), (node), (next_index), (alloc_new_frame)); \
+ u32 _n = _f->n_vectors; \
+ (vectors) = vlib_frame_vector_args (_f) + _n * sizeof ((vectors)[0]); \
+ if ((maybe_no_aux) && (_f)->aux_offset == 0) \
+ (aux_data) = NULL; \
+ else \
+ (aux_data) = vlib_frame_aux_args (_f) + _n * sizeof ((aux_data)[0]); \
+ (n_vectors_left) = VLIB_FRAME_SIZE - _n; \
+ } \
+ while (0)
/** \brief Get pointer to next frame vector data by
(@c vlib_node_runtime_t, @c next_index).
@@ -392,16 +422,69 @@ do { \
@return @c vectors -- pointer to next available vector slot
@return @c n_vectors_left -- number of vector slots available
*/
-#define vlib_get_next_frame(vm,node,next_index,vectors,n_vectors_left) \
- vlib_get_next_frame_macro (vm, node, next_index, \
- vectors, n_vectors_left, \
+#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left) \
+ vlib_get_next_frame_macro (vm, node, next_index, vectors, n_vectors_left, \
/* alloc new frame */ 0)
-#define vlib_get_new_next_frame(vm,node,next_index,vectors,n_vectors_left) \
- vlib_get_next_frame_macro (vm, node, next_index, \
- vectors, n_vectors_left, \
+#define vlib_get_new_next_frame(vm, node, next_index, vectors, \
+ n_vectors_left) \
+ vlib_get_next_frame_macro (vm, node, next_index, vectors, n_vectors_left, \
/* alloc new frame */ 1)
+/** \brief Get pointer to next frame vector data and next frame aux data by
+ (@c vlib_node_runtime_t, @c next_index).
+ Standard single/dual loop boilerplate element.
+ @attention This is a MACRO, with SIDE EFFECTS.
+ @attention This MACRO is unsafe in case the next node does not support
+ aux_data
+
+ @param vm vlib_main_t pointer, varies by thread
+ @param node current node vlib_node_runtime_t pointer
+ @param next_index requested graph arc index
+
+ @return @c vectors -- pointer to next available vector slot
+ @return @c aux_data -- pointer to next available aux data slot
+ @return @c n_vectors_left -- number of vector slots available
+*/
+#define vlib_get_next_frame_with_aux(vm, node, next_index, vectors, aux_data, \
+ n_vectors_left) \
+ vlib_get_next_frame_macro_with_aux ( \
+ vm, node, next_index, vectors, n_vectors_left, /* alloc new frame */ 0, \
+ aux_data, /* maybe_no_aux */ 0)
+
+#define vlib_get_new_next_frame_with_aux(vm, node, next_index, vectors, \
+ aux_data, n_vectors_left) \
+ vlib_get_next_frame_macro_with_aux ( \
+ vm, node, next_index, vectors, n_vectors_left, /* alloc new frame */ 1, \
+ aux_data, /* maybe_no_aux */ 0)
+
+/** \brief Get pointer to next frame vector data and next frame aux data by
+ (@c vlib_node_runtime_t, @c next_index).
+ Standard single/dual loop boilerplate element.
+ @attention This is a MACRO, with SIDE EFFECTS.
+ @attention This MACRO is safe in case the next node does not support aux_data.
+ In that case aux_data is set to NULL.
+
+ @param vm vlib_main_t pointer, varies by thread
+ @param node current node vlib_node_runtime_t pointer
+ @param next_index requested graph arc index
+
+ @return @c vectors -- pointer to next available vector slot
+ @return @c aux_data -- pointer to next available aux data slot
+ @return @c n_vectors_left -- number of vector slots available
+*/
+#define vlib_get_next_frame_with_aux_safe(vm, node, next_index, vectors, \
+ aux_data, n_vectors_left) \
+ vlib_get_next_frame_macro_with_aux ( \
+ vm, node, next_index, vectors, n_vectors_left, /* alloc new frame */ 0, \
+ aux_data, /* maybe_no_aux */ 1)
+
+#define vlib_get_new_next_frame_with_aux_safe(vm, node, next_index, vectors, \
+ aux_data, n_vectors_left) \
+ vlib_get_next_frame_macro_with_aux ( \
+ vm, node, next_index, vectors, n_vectors_left, /* alloc new frame */ 1, \
+ aux_data, /* maybe_no_aux */ 1)
+
/** \brief Release pointer to next frame vector data.
Standard single/dual loop boilerplate element.
@param vm vlib_main_t pointer, varies by thread
@@ -424,6 +507,16 @@ vlib_put_next_frame (vlib_main_t * vm,
(v); \
})
+#define vlib_set_next_frame_with_aux_safe(vm, node, next_index, v, aux) \
+ ({ \
+ uword _n_left; \
+ vlib_get_next_frame_with_aux_safe ((vm), (node), (next_index), (v), \
+ (aux), _n_left); \
+ ASSERT (_n_left > 0); \
+ vlib_put_next_frame ((vm), (node), (next_index), _n_left - 1); \
+ (v); \
+ })
+
always_inline void
vlib_set_next_frame_buffer (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -434,6 +527,20 @@ vlib_set_next_frame_buffer (vlib_main_t * vm,
p[0] = buffer_index;
}
+always_inline void
+vlib_set_next_frame_buffer_with_aux_safe (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ u32 next_index, u32 buffer_index,
+ u32 aux)
+{
+ u32 *p;
+ u32 *a;
+ p = vlib_set_next_frame_with_aux_safe (vm, node, next_index, p, a);
+ p[0] = buffer_index;
+ if (a)
+ a[0] = aux;
+}
+
vlib_frame_t *vlib_get_frame_to_node (vlib_main_t * vm, u32 to_node_index);
void vlib_put_frame_to_node (vlib_main_t * vm, u32 to_node_index,
vlib_frame_t * f);
@@ -601,7 +708,7 @@ vlib_process_get_events (vlib_main_t * vm, uword ** data_vector)
l = _vec_len (p->pending_event_data_by_type_index[t]);
if (data_vector)
vec_add (*data_vector, p->pending_event_data_by_type_index[t], l);
- _vec_len (p->pending_event_data_by_type_index[t]) = 0;
+ vec_set_len (p->pending_event_data_by_type_index[t], 0);
et = pool_elt_at_index (p->event_type_pool, t);
@@ -625,7 +732,7 @@ vlib_process_get_events_helper (vlib_process_t * p, uword t,
l = _vec_len (p->pending_event_data_by_type_index[t]);
if (data_vector)
vec_add (*data_vector, p->pending_event_data_by_type_index[t], l);
- _vec_len (p->pending_event_data_by_type_index[t]) = 0;
+ vec_set_len (p->pending_event_data_by_type_index[t], 0);
vlib_process_maybe_free_event_type (p, t);
@@ -832,7 +939,8 @@ vlib_process_signal_event_helper (vlib_node_main_t * nm,
uword n_data_elts, uword n_data_elt_bytes)
{
uword p_flags, add_to_pending, delete_from_wheel;
- void *data_to_be_written_by_caller;
+ u8 *data_to_be_written_by_caller;
+ vec_attr_t va = { .elt_sz = n_data_elt_bytes };
ASSERT (n->type == VLIB_NODE_TYPE_PROCESS);
@@ -842,22 +950,18 @@ vlib_process_signal_event_helper (vlib_node_main_t * nm,
/* Resize data vector and return caller's data to be written. */
{
- void *data_vec = p->pending_event_data_by_type_index[t];
+ u8 *data_vec = p->pending_event_data_by_type_index[t];
uword l;
if (!data_vec && vec_len (nm->recycled_event_data_vectors))
{
data_vec = vec_pop (nm->recycled_event_data_vectors);
- _vec_len (data_vec) = 0;
+ vec_reset_length (data_vec);
}
l = vec_len (data_vec);
- data_vec = _vec_resize (data_vec,
- /* length_increment */ n_data_elts,
- /* total size after increment */
- (l + n_data_elts) * n_data_elt_bytes,
- /* header_bytes */ 0, /* data_align */ 0);
+ data_vec = _vec_realloc_internal (data_vec, l + n_data_elts, &va);
p->pending_event_data_by_type_index[t] = data_vec;
data_to_be_written_by_caller = data_vec + l * n_data_elt_bytes;
@@ -902,8 +1006,11 @@ vlib_process_signal_event_helper (vlib_node_main_t * nm,
p->flags = p_flags | VLIB_PROCESS_RESUME_PENDING;
vec_add1 (nm->data_from_advancing_timing_wheel, x);
if (delete_from_wheel)
- TW (tw_timer_stop) ((TWT (tw_timer_wheel) *) nm->timing_wheel,
- p->stop_timer_handle);
+ {
+ TW (tw_timer_stop)
+ ((TWT (tw_timer_wheel) *) nm->timing_wheel, p->stop_timer_handle);
+ p->stop_timer_handle = ~0;
+ }
}
return data_to_be_written_by_caller;
@@ -1161,8 +1268,7 @@ vlib_node_vectors_per_main_loop_as_integer (vlib_main_t * vm, u32 node_index)
return v >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE;
}
-void
-vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f);
+void vlib_frame_free (vlib_main_t *vm, vlib_frame_t *f);
/* Return the edge index if present, ~0 otherwise */
uword vlib_node_get_next (vlib_main_t * vm, uword node, uword next_node);
@@ -1208,7 +1314,8 @@ void vlib_node_rename (vlib_main_t * vm, u32 node_index, char *fmt, ...);
/* Register new packet processing node. Nodes can be registered
dynamically via this call or statically via the VLIB_REGISTER_NODE
macro. */
-u32 vlib_register_node (vlib_main_t * vm, vlib_node_registration_t * r);
+u32 vlib_register_node (vlib_main_t *vm, vlib_node_registration_t *r,
+ char *fmt, ...);
/* Register all node function variants */
void vlib_register_all_node_march_variants (vlib_main_t *vm);
@@ -1276,6 +1383,121 @@ vlib_node_function_t *
vlib_node_get_preferred_node_fn_variant (vlib_main_t *vm,
vlib_node_fn_registration_t *regs);
+/*
+ * vlib_frame_bitmap functions
+ */
+
+#define VLIB_FRAME_BITMAP_N_UWORDS \
+ (((VLIB_FRAME_SIZE + uword_bits - 1) & ~(uword_bits - 1)) / uword_bits)
+
+typedef uword vlib_frame_bitmap_t[VLIB_FRAME_BITMAP_N_UWORDS];
+
+static_always_inline void
+vlib_frame_bitmap_init (uword *bmp, u32 n_first_bits_set)
+{
+ u32 n_left = VLIB_FRAME_BITMAP_N_UWORDS;
+ while (n_first_bits_set >= (sizeof (uword) * 8) && n_left)
+ {
+ bmp++[0] = ~0;
+ n_first_bits_set -= sizeof (uword) * 8;
+ n_left--;
+ }
+
+ if (n_first_bits_set && n_left)
+ {
+ bmp++[0] = pow2_mask (n_first_bits_set);
+ n_left--;
+ }
+
+ while (n_left--)
+ bmp++[0] = 0;
+}
+
+static_always_inline void
+vlib_frame_bitmap_set_bit_at_index (uword *bmp, uword bit_index)
+{
+ uword_bitmap_set_bits_at_index (bmp, bit_index, 1);
+}
+
+static_always_inline void
+_vlib_frame_bitmap_clear_bit_at_index (uword *bmp, uword bit_index)
+{
+ uword_bitmap_clear_bits_at_index (bmp, bit_index, 1);
+}
+
+static_always_inline void
+vlib_frame_bitmap_set_bits_at_index (uword *bmp, uword bit_index, uword n_bits)
+{
+ uword_bitmap_set_bits_at_index (bmp, bit_index, n_bits);
+}
+
+static_always_inline void
+vlib_frame_bitmap_clear_bits_at_index (uword *bmp, uword bit_index,
+ uword n_bits)
+{
+ uword_bitmap_clear_bits_at_index (bmp, bit_index, n_bits);
+}
+
+static_always_inline void
+vlib_frame_bitmap_clear (uword *bmp)
+{
+ u32 n_left = VLIB_FRAME_BITMAP_N_UWORDS;
+ while (n_left--)
+ bmp++[0] = 0;
+}
+
+static_always_inline void
+vlib_frame_bitmap_xor (uword *bmp, uword *bmp2)
+{
+ u32 n_left = VLIB_FRAME_BITMAP_N_UWORDS;
+ while (n_left--)
+ bmp++[0] ^= bmp2++[0];
+}
+
+static_always_inline void
+vlib_frame_bitmap_or (uword *bmp, uword *bmp2)
+{
+ u32 n_left = VLIB_FRAME_BITMAP_N_UWORDS;
+ while (n_left--)
+ bmp++[0] |= bmp2++[0];
+}
+
+static_always_inline void
+vlib_frame_bitmap_and (uword *bmp, uword *bmp2)
+{
+ u32 n_left = VLIB_FRAME_BITMAP_N_UWORDS;
+ while (n_left--)
+ bmp++[0] &= bmp2++[0];
+}
+
+static_always_inline uword
+vlib_frame_bitmap_count_set_bits (uword *bmp)
+{
+ return uword_bitmap_count_set_bits (bmp, VLIB_FRAME_BITMAP_N_UWORDS);
+}
+
+static_always_inline uword
+vlib_frame_bitmap_is_bit_set (uword *bmp, uword bit_index)
+{
+ return uword_bitmap_is_bit_set (bmp, bit_index);
+}
+
+static_always_inline uword
+vlib_frame_bitmap_find_first_set (uword *bmp)
+{
+ uword rv = uword_bitmap_find_first_set (bmp);
+ ASSERT (rv < VLIB_FRAME_BITMAP_N_UWORDS * uword_bits);
+ return rv;
+}
+
+#define foreach_vlib_frame_bitmap_set_bit_index(i, v) \
+ for (uword _off = 0; _off < ARRAY_LEN (v); _off++) \
+ for (uword _tmp = \
+ (v[_off]) + 0 * (uword) (i = _off * uword_bits + \
+ get_lowest_set_bit_index (v[_off])); \
+ _tmp; i = _off * uword_bits + get_lowest_set_bit_index ( \
+ _tmp = clear_lowest_set_bit (_tmp)))
+
#endif /* included_vlib_node_funcs_h */
/*
diff --git a/src/vlib/pci/pci.c b/src/vlib/pci/pci.c
index 1c1f4b636f5..0bc90c5532d 100644
--- a/src/vlib/pci/pci.c
+++ b/src/vlib/pci/pci.c
@@ -47,26 +47,166 @@
#include <dirent.h>
#include <sys/ioctl.h>
#include <net/if.h>
+#ifdef __linux__
#include <linux/ethtool.h>
#include <linux/sockios.h>
+#endif /* __linux__ */
vlib_pci_main_t pci_main;
-vlib_pci_device_info_t * __attribute__ ((weak))
-vlib_pci_get_device_info (vlib_main_t * vm, vlib_pci_addr_t * addr,
- clib_error_t ** error)
+VLIB_REGISTER_LOG_CLASS (pci_log, static) = {
+ .class_name = "pci",
+};
+
+#define log_debug(h, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, pci_log.class, "%U: " f, \
+ format_vlib_pci_log, h, ##__VA_ARGS__)
+
+u8 *
+format_vlib_pci_log (u8 *s, va_list *va)
+{
+ vlib_pci_dev_handle_t h = va_arg (*va, vlib_pci_dev_handle_t);
+ return format (s, "%U", format_vlib_pci_addr,
+ vlib_pci_get_addr (vlib_get_main (), h));
+}
+
+vlib_pci_device_info_t *__attribute__ ((weak))
+vlib_pci_get_device_info (vlib_main_t *vm, vlib_pci_addr_t *addr,
+ clib_error_t **error)
{
if (error)
*error = clib_error_return (0, "unsupported");
return 0;
}
+clib_error_t *__attribute__ ((weak))
+vlib_pci_get_device_root_bus (vlib_pci_addr_t *addr, vlib_pci_addr_t *root_bus)
+{
+ return 0;
+}
+
vlib_pci_addr_t * __attribute__ ((weak)) vlib_pci_get_all_dev_addrs ()
{
return 0;
}
static clib_error_t *
+_vlib_pci_config_set_control_bit (vlib_main_t *vm, vlib_pci_dev_handle_t h,
+ u16 bit, int new_val, int *already_set)
+{
+ u16 control, old;
+ clib_error_t *err;
+
+ err = vlib_pci_read_write_config (
+ vm, h, VLIB_READ, STRUCT_OFFSET_OF (vlib_pci_config_t, command), &old,
+ STRUCT_SIZE_OF (vlib_pci_config_t, command));
+
+ if (err)
+ return err;
+
+ control = new_val ? old | bit : old & ~bit;
+ *already_set = old == control;
+ if (*already_set)
+ return 0;
+
+ return vlib_pci_read_write_config (
+ vm, h, VLIB_WRITE, STRUCT_OFFSET_OF (vlib_pci_config_t, command), &control,
+ STRUCT_SIZE_OF (vlib_pci_config_t, command));
+}
+
+clib_error_t *
+vlib_pci_intr_enable (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+ const vlib_pci_config_reg_command_t cmd = { .intx_disable = 1 };
+ clib_error_t *err;
+ int already_set;
+
+ err = _vlib_pci_config_set_control_bit (vm, h, cmd.as_u16, 0, &already_set);
+ log_debug (h, "interrupt%senabled", already_set ? " " : " already ");
+ return err;
+}
+
+clib_error_t *
+vlib_pci_intr_disable (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+ const vlib_pci_config_reg_command_t cmd = { .intx_disable = 1 };
+ clib_error_t *err;
+ int already_set;
+
+ err = _vlib_pci_config_set_control_bit (vm, h, cmd.as_u16, 1, &already_set);
+ log_debug (h, "interrupt%sdisabled", already_set ? " " : " already ");
+ return err;
+}
+
+clib_error_t *
+vlib_pci_bus_master_enable (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+ const vlib_pci_config_reg_command_t cmd = { .bus_master = 1 };
+ clib_error_t *err;
+ int already_set;
+
+ err = _vlib_pci_config_set_control_bit (vm, h, cmd.as_u16, 1, &already_set);
+ log_debug (h, "bus-master%senabled", already_set ? " " : " already ");
+ return err;
+}
+
+clib_error_t *
+vlib_pci_bus_master_disable (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+ const vlib_pci_config_reg_command_t cmd = { .bus_master = 1 };
+ clib_error_t *err;
+ int already_set;
+
+ err = _vlib_pci_config_set_control_bit (vm, h, cmd.as_u16, 0, &already_set);
+ log_debug (h, "bus-master%sdisabled", already_set ? " " : " already ");
+ return err;
+}
+
+clib_error_t *
+vlib_pci_function_level_reset (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+ vlib_pci_config_t cfg;
+ pci_capability_pcie_t *cap;
+ pci_capability_pcie_dev_control_t dev_control;
+ clib_error_t *err;
+ u8 offset;
+
+ log_debug (h, "function level reset");
+
+ err = vlib_pci_read_write_config (vm, h, VLIB_READ, 0, &cfg, sizeof (cfg));
+ if (err)
+ return err;
+
+ offset = cfg.cap_ptr;
+ do
+ {
+ cap = (pci_capability_pcie_t *) (cfg.data + offset);
+
+ if (cap->capability_id == PCI_CAP_ID_PCIE)
+ break;
+
+ offset = cap->next_offset;
+ }
+ while (offset);
+
+ if (cap->capability_id != PCI_CAP_ID_PCIE)
+ return clib_error_return (0, "PCIe capability config not found");
+
+ if (cap->dev_caps.flr_capable == 0)
+ return clib_error_return (0, "PCIe function level reset not supported");
+
+ dev_control = cap->dev_control;
+ dev_control.function_level_reset = 1;
+
+ if ((err = vlib_pci_write_config_u16 (
+ vm, h, offset + STRUCT_OFFSET_OF (pci_capability_pcie_t, dev_control),
+ &dev_control.as_u16)))
+ return err;
+
+ return 0;
+}
+
+static clib_error_t *
show_pci_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
@@ -143,27 +283,56 @@ format_vlib_pci_addr (u8 * s, va_list * va)
}
u8 *
-format_vlib_pci_link_speed (u8 * s, va_list * va)
+format_vlib_pci_link_port (u8 *s, va_list *va)
+{
+ vlib_pci_config_t *c = va_arg (*va, vlib_pci_config_t *);
+ pci_capability_pcie_t *r = pci_config_find_capability (c, PCI_CAP_ID_PCIE);
+
+ if (!r)
+ return format (s, "unknown");
+
+ return format (s, "P%d", r->link_caps.port_number);
+}
+
+static u8 *
+_vlib_pci_link_speed (u8 *s, u8 speed, u8 width)
+{
+ static char *speeds[] = {
+ [1] = "2.5", [2] = "5.0", [3] = "8.0", [4] = "16.0", [5] = "32.0"
+ };
+
+ if (speed >= ARRAY_LEN (speeds) || speeds[speed] == 0)
+ s = format (s, "unknown speed");
+ else
+ s = format (s, "%s GT/s", speeds[speed]);
+
+ return format (s, " x%u", width);
+}
+
+u8 *
+format_vlib_pci_link_speed (u8 *s, va_list *va)
+{
+ vlib_pci_config_t *c = va_arg (*va, vlib_pci_config_t *);
+ pci_capability_pcie_t *r = pci_config_find_capability (c, PCI_CAP_ID_PCIE);
+
+ if (!r)
+ return format (s, "unknown");
+
+ return _vlib_pci_link_speed (s, r->link_status.link_speed,
+ r->link_status.negotiated_link_width);
+}
+
+u8 *
+format_vlib_pci_link_speed_cap (u8 *s, va_list *va)
{
- vlib_pci_device_info_t *d = va_arg (*va, vlib_pci_device_info_t *);
- pcie_config_regs_t *r =
- pci_config_find_capability (&d->config0, PCI_CAP_ID_PCIE);
- int width;
+ vlib_pci_config_t *c = va_arg (*va, vlib_pci_config_t *);
+ pci_capability_pcie_t *r = pci_config_find_capability (c, PCI_CAP_ID_PCIE);
if (!r)
return format (s, "unknown");
- width = (r->link_status >> 4) & 0x3f;
-
- if ((r->link_status & 0xf) == 1)
- return format (s, "2.5 GT/s x%u", width);
- if ((r->link_status & 0xf) == 2)
- return format (s, "5.0 GT/s x%u", width);
- if ((r->link_status & 0xf) == 3)
- return format (s, "8.0 GT/s x%u", width);
- if ((r->link_status & 0xf) == 4)
- return format (s, "16.0 GT/s x%u", width);
- return format (s, "unknown");
+ return _vlib_pci_link_speed (s, r->link_caps.max_link_speed,
+ r->link_caps.max_link_width);
}
u8 *
@@ -238,29 +407,8 @@ format_vlib_pci_vpd (u8 * s, va_list * args)
return s;
}
-
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_pci_command, static) = {
.path = "show pci",
.short_help = "show pci [all]",
.function = show_pci_fn,
};
-/* *INDENT-ON* */
-
-clib_error_t *
-pci_bus_init (vlib_main_t * vm)
-{
- vlib_pci_main_t *pm = &pci_main;
- pm->log_default = vlib_log_register_class ("pci", 0);
- return 0;
-}
-
-VLIB_INIT_FUNCTION (pci_bus_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vlib/pci/pci.h b/src/vlib/pci/pci.h
index 5aae597f825..becfa80f37a 100644
--- a/src/vlib/pci/pci.h
+++ b/src/vlib/pci/pci.h
@@ -43,7 +43,6 @@
#include <vlib/vlib.h>
#include <vlib/pci/pci_config.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED (union
{
struct
@@ -55,7 +54,6 @@ typedef CLIB_PACKED (union
};
u32 as_u32;
}) vlib_pci_addr_t;
-/* *INDENT-ON* */
typedef struct vlib_pci_device_info
{
@@ -72,6 +70,7 @@ typedef struct vlib_pci_device_info
u16 device_class;
u16 vendor_id;
u16 device_id;
+ u8 revision;
/* Vital Product Data */
u8 *product_name;
@@ -82,12 +81,7 @@ typedef struct vlib_pci_device_info
u8 *driver_name;
/* First 64 bytes of configuration space. */
- union
- {
- pci_config_type0_regs_t config0;
- pci_config_type1_regs_t config1;
- u8 config_data[256];
- };
+ vlib_pci_config_t config;
/* IOMMU Group */
int iommu_group;
@@ -96,9 +90,11 @@ typedef struct vlib_pci_device_info
typedef u32 vlib_pci_dev_handle_t;
-vlib_pci_device_info_t *vlib_pci_get_device_info (vlib_main_t * vm,
- vlib_pci_addr_t * addr,
- clib_error_t ** error);
+vlib_pci_device_info_t *vlib_pci_get_device_info (vlib_main_t *vm,
+ vlib_pci_addr_t *addr,
+ clib_error_t **error);
+clib_error_t *vlib_pci_get_device_root_bus (vlib_pci_addr_t *addr,
+ vlib_pci_addr_t *root_bus);
vlib_pci_addr_t *vlib_pci_get_all_dev_addrs ();
vlib_pci_addr_t *vlib_pci_get_addr (vlib_main_t * vm,
vlib_pci_dev_handle_t h);
@@ -127,6 +123,12 @@ typedef struct
u16 vendor_id, device_id;
} pci_device_id_t;
+#define PCI_DEVICE_IDS(...) \
+ (pci_device_id_t[]) \
+ { \
+ __VA_ARGS__, {} \
+ }
+
typedef void (pci_intx_handler_function_t) (vlib_main_t * vm,
vlib_pci_dev_handle_t handle);
typedef void (pci_msix_handler_function_t) (vlib_main_t * vm,
@@ -180,8 +182,8 @@ static void __vlib_rm_pci_device_registration_##x (void) \
} \
__VA_ARGS__ pci_device_registration_t x
-clib_error_t *vlib_pci_bind_to_uio (vlib_main_t * vm, vlib_pci_addr_t * addr,
- char *uio_driver_name);
+clib_error_t *vlib_pci_bind_to_uio (vlib_main_t *vm, vlib_pci_addr_t *addr,
+ char *uio_driver_name, int force);
/* Configuration space read/write. */
clib_error_t *vlib_pci_read_write_config (vlib_main_t * vm,
@@ -196,15 +198,19 @@ clib_error_t *vlib_pci_read_write_io (vlib_main_t * vm,
vlib_read_or_write_t read_or_write,
uword address, void *data, u32 n_bytes);
-
-#define _(t, x) \
-static inline clib_error_t * \
-vlib_pci_read_##x##_##t (vlib_main_t *vm, vlib_pci_dev_handle_t h, \
- uword address, t * data) \
-{ \
- return vlib_pci_read_write_##x (vm, h, VLIB_READ,address, data, \
- sizeof (data[0])); \
-}
+#define _(t, x) \
+ static inline clib_error_t *vlib_pci_read_##x##_##t ( \
+ vlib_main_t *vm, vlib_pci_dev_handle_t h, uword address, t *data) \
+ { \
+ return vlib_pci_read_write_##x (vm, h, VLIB_READ, address, data, \
+ sizeof (data[0])); \
+ } \
+ static inline clib_error_t *vlib_pci_write_##x##_##t ( \
+ vlib_main_t *vm, vlib_pci_dev_handle_t h, uword address, t *data) \
+ { \
+ return vlib_pci_read_write_##x (vm, h, VLIB_WRITE, address, data, \
+ sizeof (data[0])); \
+ }
_(u32, config);
_(u16, config);
@@ -216,77 +222,6 @@ _(u8, io);
#undef _
-#define _(t, x) \
-static inline clib_error_t * \
-vlib_pci_write_##x##_##t (vlib_main_t *vm, vlib_pci_dev_handle_t h, \
- uword address, t * data) \
-{ \
- return vlib_pci_read_write_##x (vm, h, VLIB_WRITE, \
- address, data, sizeof (data[0])); \
-}
-
-_(u32, config);
-_(u16, config);
-_(u8, config);
-
-_(u32, io);
-_(u16, io);
-_(u8, io);
-
-#undef _
-
-static inline clib_error_t *
-vlib_pci_intr_enable (vlib_main_t * vm, vlib_pci_dev_handle_t h)
-{
- u16 command;
- clib_error_t *err;
-
- err = vlib_pci_read_config_u16 (vm, h, 4, &command);
-
- if (err)
- return err;
-
- command &= ~PCI_COMMAND_INTX_DISABLE;
-
- return vlib_pci_write_config_u16 (vm, h, 4, &command);
-}
-
-static inline clib_error_t *
-vlib_pci_intr_disable (vlib_main_t * vm, vlib_pci_dev_handle_t h)
-{
- u16 command;
- clib_error_t *err;
-
- err = vlib_pci_read_config_u16 (vm, h, 4, &command);
-
- if (err)
- return err;
-
- command |= PCI_COMMAND_INTX_DISABLE;
-
- return vlib_pci_write_config_u16 (vm, h, 4, &command);
-}
-
-static inline clib_error_t *
-vlib_pci_bus_master_enable (vlib_main_t * vm, vlib_pci_dev_handle_t h)
-{
- clib_error_t *err;
- u16 command;
-
- /* Set bus master enable (BME) */
- err = vlib_pci_read_config_u16 (vm, h, 4, &command);
-
- if (err)
- return err;
-
- if (command & PCI_COMMAND_BUS_MASTER)
- return 0;
-
- command |= PCI_COMMAND_BUS_MASTER;
-
- return vlib_pci_write_config_u16 (vm, h, 4, &command);
-}
-
clib_error_t *vlib_pci_device_open (vlib_main_t * vm, vlib_pci_addr_t * addr,
pci_device_id_t ids[],
vlib_pci_dev_handle_t * handle);
@@ -303,11 +238,16 @@ clib_error_t *vlib_pci_register_intx_handler (vlib_main_t * vm,
vlib_pci_dev_handle_t h,
pci_intx_handler_function_t *
intx_handler);
+clib_error_t *vlib_pci_unregister_intx_handler (vlib_main_t *vm,
+ vlib_pci_dev_handle_t h);
clib_error_t *vlib_pci_register_msix_handler (vlib_main_t * vm,
vlib_pci_dev_handle_t h,
u32 start, u32 count,
pci_msix_handler_function_t *
msix_handler);
+clib_error_t *vlib_pci_unregister_msix_handler (vlib_main_t *vm,
+ vlib_pci_dev_handle_t h,
+ u32 start, u32 count);
clib_error_t *vlib_pci_enable_msix_irq (vlib_main_t * vm,
vlib_pci_dev_handle_t h, u16 start,
u16 count);
@@ -321,11 +261,22 @@ uword vlib_pci_get_msix_file_index (vlib_main_t * vm, vlib_pci_dev_handle_t h,
int vlib_pci_supports_virtual_addr_dma (vlib_main_t * vm,
vlib_pci_dev_handle_t h);
+clib_error_t *vlib_pci_intr_enable (vlib_main_t *, vlib_pci_dev_handle_t);
+clib_error_t *vlib_pci_intr_disable (vlib_main_t *, vlib_pci_dev_handle_t);
+clib_error_t *vlib_pci_bus_master_enable (vlib_main_t *,
+ vlib_pci_dev_handle_t);
+clib_error_t *vlib_pci_bus_master_disable (vlib_main_t *,
+ vlib_pci_dev_handle_t);
+clib_error_t *vlib_pci_function_level_reset (vlib_main_t *,
+ vlib_pci_dev_handle_t);
unformat_function_t unformat_vlib_pci_addr;
format_function_t format_vlib_pci_addr;
format_function_t format_vlib_pci_link_speed;
+format_function_t format_vlib_pci_link_speed_cap;
+format_function_t format_vlib_pci_link_port;
format_function_t format_vlib_pci_vpd;
+format_function_t format_vlib_pci_log;
#endif /* included_vlib_pci_h */
diff --git a/src/vlib/pci/pci_config.h b/src/vlib/pci/pci_config.h
index b4c38eb53e4..21b40c0f499 100644
--- a/src/vlib/pci/pci_config.h
+++ b/src/vlib/pci/pci_config.h
@@ -168,210 +168,114 @@ pci_device_class_base (pci_device_class_t c)
#define VIRTIO_PCI_LEGACY_DEVICEID_NET 0x1000
#define VIRTIO_PCI_MODERN_DEVICEID_NET 0x1041
-/*
- * Under PCI, each device has 256 bytes of configuration address space,
- * of which the first 64 bytes are standardized as follows:
- */
-typedef struct
+typedef union
{
- u16 vendor_id;
- u16 device_id;
+ struct
+ {
+ u16 io_space : 1;
+ u16 mem_space : 1;
+ u16 bus_master : 1;
+ u16 special_cycles : 1;
+ u16 mem_write_invalidate : 1;
+ u16 vga_palette_snoop : 1;
+ u16 parity_err_resp : 1;
+ u16 _reserved_7 : 1;
+ u16 serr_enable : 1;
+ u16 fast_b2b_enable : 1;
+ u16 intx_disable : 1;
+ u16 _reserved_11 : 5;
+ };
+ u16 as_u16;
+} vlib_pci_config_reg_command_t;
+
+typedef union
+{
+ struct
+ {
+ u16 _reserved_0 : 3;
+ u16 intx_status : 1;
+ u16 capabilities_list : 1;
+ u16 capaable_66mhz : 1;
+ u16 _reserved_6 : 1;
+ u16 fast_b2b_capable : 1;
+ u16 master_data_parity_error : 1;
+ u16 devsel_timing : 2;
+ u16 sig_target_abort : 1;
+ u16 rec_target_abort : 1;
+ u16 rec_master_abort : 1;
+ u16 sig_system_err : 1;
+ u16 detected_parity_err : 1;
+ };
+ u16 as_u16;
+} vlib_pci_config_reg_status_t;
- u16 command;
-#define PCI_COMMAND_IO (1 << 0) /* Enable response in I/O space */
-#define PCI_COMMAND_MEMORY (1 << 1) /* Enable response in Memory space */
-#define PCI_COMMAND_BUS_MASTER (1 << 2) /* Enable bus mastering */
-#define PCI_COMMAND_SPECIAL (1 << 3) /* Enable response to special cycles */
-#define PCI_COMMAND_WRITE_INVALIDATE (1 << 4) /* Use memory write and invalidate */
-#define PCI_COMMAND_VGA_PALETTE_SNOOP (1 << 5)
-#define PCI_COMMAND_PARITY (1 << 6)
-#define PCI_COMMAND_WAIT (1 << 7) /* Enable address/data stepping */
-#define PCI_COMMAND_SERR (1 << 8) /* Enable SERR */
-#define PCI_COMMAND_BACK_TO_BACK_WRITE (1 << 9)
-#define PCI_COMMAND_INTX_DISABLE (1 << 10) /* INTx Emulation Disable */
-
- u16 status;
-#define PCI_STATUS_INTX_PENDING (1 << 3)
-#define PCI_STATUS_CAPABILITY_LIST (1 << 4)
-#define PCI_STATUS_66MHZ (1 << 5) /* Support 66 Mhz PCI 2.1 bus */
-#define PCI_STATUS_UDF (1 << 6) /* Support User Definable Features (obsolete) */
-#define PCI_STATUS_BACK_TO_BACK_WRITE (1 << 7) /* Accept fast-back to back */
-#define PCI_STATUS_PARITY_ERROR (1 << 8) /* Detected parity error */
-#define PCI_STATUS_DEVSEL_GET(x) ((x >> 9) & 3) /* DEVSEL timing */
-#define PCI_STATUS_DEVSEL_FAST (0 << 9)
-#define PCI_STATUS_DEVSEL_MEDIUM (1 << 9)
-#define PCI_STATUS_DEVSEL_SLOW (2 << 9)
-#define PCI_STATUS_SIG_TARGET_ABORT (1 << 11) /* Set on target abort */
-#define PCI_STATUS_REC_TARGET_ABORT (1 << 12) /* Master ack of " */
-#define PCI_STATUS_REC_MASTER_ABORT (1 << 13) /* Set on master abort */
-#define PCI_STATUS_SIG_SYSTEM_ERROR (1 << 14) /* Set when we drive SERR */
-#define PCI_STATUS_DETECTED_PARITY_ERROR (1 << 15)
-
- u8 revision_id;
- u8 programming_interface_class; /* Reg. Level Programming Interface */
-
- pci_device_class_t device_class:16;
-
- u8 cache_size;
- u8 latency_timer;
-
- u8 header_type;
-#define PCI_HEADER_TYPE_NORMAL 0
-#define PCI_HEADER_TYPE_BRIDGE 1
-#define PCI_HEADER_TYPE_CARDBUS 2
-
- u8 bist;
-#define PCI_BIST_CODE_MASK 0x0f /* Return result */
-#define PCI_BIST_START 0x40 /* 1 to start BIST, 2 secs or less */
-#define PCI_BIST_CAPABLE 0x80 /* 1 if BIST capable */
-} pci_config_header_t;
-
-/* Byte swap config header. */
-always_inline void
-pci_config_header_little_to_host (pci_config_header_t * r)
+typedef enum
{
- if (!CLIB_ARCH_IS_BIG_ENDIAN)
- return;
-#define _(f,t) r->f = clib_byte_swap_##t (r->f)
- _(vendor_id, u16);
- _(device_id, u16);
- _(command, u16);
- _(status, u16);
- _(device_class, u16);
-#undef _
-}
+ PCI_HEADER_TYPE_NORMAL = 0,
+ PCI_HEADER_TYPE_BRIDGE = 1,
+ PCI_HEADER_TYPE_CARDBUS = 2
+} __clib_packed pci_config_header_type_t;
+
+#define foreach_pci_config_reg \
+ _ (u16, vendor_id) \
+ _ (u16, device_id) \
+ _ (vlib_pci_config_reg_command_t, command) \
+ _ (vlib_pci_config_reg_status_t, status) \
+ _ (u8, revision_id) \
+ _ (u8, prog_if) \
+ _ (u8, subclass) \
+ _ (u8, class) \
+ _ (u8, cache_line_size) \
+ _ (u8, latency_timer) \
+ _ (pci_config_header_type_t, header_type) \
+ _ (u8, bist) \
+ _ (u32, bar, [6]) \
+ _ (u32, cardbus_cis_ptr) \
+ _ (u16, sub_vendor_id) \
+ _ (u16, sub_device_id) \
+ _ (u32, exp_rom_base_addr) \
+ _ (u8, cap_ptr) \
+ _ (u8, _reserved_0x35, [3]) \
+ _ (u32, _reserved_0x38) \
+ _ (u8, intr_line) \
+ _ (u8, intr_pin) \
+ _ (u8, min_grant) \
+ _ (u8, max_latency)
-/* Header type 0 (normal devices) */
typedef struct
{
- pci_config_header_t header;
-
- /*
- * Base addresses specify locations in memory or I/O space.
- * Decoded size can be determined by writing a value of
- * 0xffffffff to the register, and reading it back. Only
- * 1 bits are decoded.
- */
- u32 base_address[6];
-
- u16 cardbus_cis;
-
- u16 subsystem_vendor_id;
- u16 subsystem_id;
-
- u32 rom_address;
-#define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */
-#define PCI_ROM_ADDRESS_ENABLE 0x01
-#define PCI_ROM_ADDRESS_MASK (~0x7ffUL)
-
- u8 first_capability_offset;
- CLIB_PAD_FROM_TO (0x35, 0x3c);
-
- u8 interrupt_line;
- u8 interrupt_pin;
- u8 min_grant;
- u8 max_latency;
+#define _(a, b, ...) a b __VA_ARGS__;
+ foreach_pci_config_reg
+#undef _
+} vlib_pci_config_hdr_t;
- u8 capability_data[0];
-} pci_config_type0_regs_t;
+STATIC_ASSERT_SIZEOF (vlib_pci_config_hdr_t, 64);
-always_inline void
-pci_config_type0_little_to_host (pci_config_type0_regs_t * r)
+typedef union
{
- int i;
- if (!CLIB_ARCH_IS_BIG_ENDIAN)
- return;
- pci_config_header_little_to_host (&r->header);
-#define _(f,t) r->f = clib_byte_swap_##t (r->f)
- for (i = 0; i < ARRAY_LEN (r->base_address); i++)
- _(base_address[i], u32);
- _(cardbus_cis, u16);
- _(subsystem_vendor_id, u16);
- _(subsystem_id, u16);
- _(rom_address, u32);
+ struct
+ {
+#define _(a, b, ...) a b __VA_ARGS__;
+ foreach_pci_config_reg
#undef _
-}
-
-/* Header type 1 (PCI-to-PCI bridges) */
-typedef struct
-{
- pci_config_header_t header;
-
- u32 base_address[2];
-
- /* Primary/secondary bus number. */
- u8 primary_bus;
- u8 secondary_bus;
-
- /* Highest bus number behind the bridge */
- u8 subordinate_bus;
-
- u8 secondary_bus_latency_timer;
-
- /* I/O range behind bridge. */
- u8 io_base, io_limit;
+ };
+ u8 data[256];
+} vlib_pci_config_t;
- /* Secondary status register, only bit 14 used */
- u16 secondary_status;
+STATIC_ASSERT_SIZEOF (vlib_pci_config_t, 256);
- /* Memory range behind bridge in units of 64k bytes. */
- u16 memory_base, memory_limit;
-#define PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL
-#define PCI_MEMORY_RANGE_MASK (~0x0fUL)
-
- u16 prefetchable_memory_base, prefetchable_memory_limit;
-#define PCI_PREF_RANGE_TYPE_MASK 0x0fUL
-#define PCI_PREF_RANGE_TYPE_32 0x00
-#define PCI_PREF_RANGE_TYPE_64 0x01
-#define PCI_PREF_RANGE_MASK (~0x0fUL)
-
- u32 prefetchable_memory_base_upper_32bits;
- u32 prefetchable_memory_limit_upper_32bits;
- u16 io_base_upper_16bits;
- u16 io_limit_upper_16bits;
-
- /* Same as for type 0. */
- u8 capability_list_offset;
- CLIB_PAD_FROM_TO (0x35, 0x37);
-
- u32 rom_address;
- CLIB_PAD_FROM_TO (0x3c, 0x3e);
-
- u16 bridge_control;
-#define PCI_BRIDGE_CTL_PARITY 0x01 /* Enable parity detection on secondary interface */
-#define PCI_BRIDGE_CTL_SERR 0x02 /* The same for SERR forwarding */
-#define PCI_BRIDGE_CTL_NO_ISA 0x04 /* Disable bridging of ISA ports */
-#define PCI_BRIDGE_CTL_VGA 0x08 /* Forward VGA addresses */
-#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */
-#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */
-#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */
-
- u8 capability_data[0];
-} pci_config_type1_regs_t;
-
-always_inline void
-pci_config_type1_little_to_host (pci_config_type1_regs_t * r)
+typedef union
{
- int i;
- if (!CLIB_ARCH_IS_BIG_ENDIAN)
- return;
- pci_config_header_little_to_host (&r->header);
-#define _(f,t) r->f = clib_byte_swap_##t (r->f)
- for (i = 0; i < ARRAY_LEN (r->base_address); i++)
- _(base_address[i], u32);
- _(secondary_status, u16);
- _(memory_base, u16);
- _(memory_limit, u16);
- _(prefetchable_memory_base, u16);
- _(prefetchable_memory_limit, u16);
- _(prefetchable_memory_base_upper_32bits, u32);
- _(prefetchable_memory_limit_upper_32bits, u32);
- _(io_base_upper_16bits, u16);
- _(io_limit_upper_16bits, u16);
- _(rom_address, u32);
- _(bridge_control, u16);
+ struct
+ {
+#define _(a, b, ...) a b __VA_ARGS__;
+ foreach_pci_config_reg
#undef _
-}
+ };
+ u8 data[4096];
+} vlib_pci_config_ext_t;
+
+STATIC_ASSERT_SIZEOF (vlib_pci_config_ext_t, 4096);
/* Capabilities. */
typedef enum pci_capability_type
@@ -418,16 +322,16 @@ typedef struct
} __clib_packed pci_capability_regs_t;
always_inline void *
-pci_config_find_capability (pci_config_type0_regs_t * t, int cap_type)
+pci_config_find_capability (vlib_pci_config_t *t, int cap_type)
{
pci_capability_regs_t *c;
u32 next_offset;
u32 ttl = 48;
- if (!(t->header.status & PCI_STATUS_CAPABILITY_LIST))
+ if (!(t->status.capabilities_list))
return 0;
- next_offset = t->first_capability_offset;
+ next_offset = t->cap_ptr;
while (ttl-- && next_offset >= 0x40)
{
c = (void *) t + (next_offset & ~3);
@@ -592,77 +496,6 @@ pcie_code_to_size (int code)
return size;
}
-/* PCI Express capability registers */
-typedef struct
-{
- pci_capability_regs_t header;
- u16 pcie_capabilities;
-#define PCIE_CAP_VERSION(x) (((x) >> 0) & 0xf)
-#define PCIE_CAP_DEVICE_TYPE(x) (((x) >> 4) & 0xf)
-#define PCIE_DEVICE_TYPE_ENDPOINT 0
-#define PCIE_DEVICE_TYPE_LEGACY_ENDPOINT 1
-#define PCIE_DEVICE_TYPE_ROOT_PORT 4
- /* Upstream/downstream port of PCI Express switch. */
-#define PCIE_DEVICE_TYPE_SWITCH_UPSTREAM 5
-#define PCIE_DEVICE_TYPE_SWITCH_DOWNSTREAM 6
-#define PCIE_DEVICE_TYPE_PCIE_TO_PCI_BRIDGE 7
-#define PCIE_DEVICE_TYPE_PCI_TO_PCIE_BRIDGE 8
- /* Root complex integrated endpoint. */
-#define PCIE_DEVICE_TYPE_ROOT_COMPLEX_ENDPOINT 9
-#define PCIE_DEVICE_TYPE_ROOT_COMPLEX_EVENT_COLLECTOR 10
-#define PCIE_CAP_SLOW_IMPLEMENTED (1 << 8)
-#define PCIE_CAP_MSI_IRQ(x) (((x) >> 9) & 0x1f)
- u32 dev_capabilities;
-#define PCIE_DEVCAP_MAX_PAYLOAD(x) (128 << (((x) >> 0) & 0x7))
-#define PCIE_DEVCAP_PHANTOM_BITS(x) (((x) >> 3) & 0x3)
-#define PCIE_DEVCAP_EXTENTED_TAG (1 << 5)
-#define PCIE_DEVCAP_L0S 0x1c0 /* L0s Acceptable Latency */
-#define PCIE_DEVCAP_L1 0xe00 /* L1 Acceptable Latency */
-#define PCIE_DEVCAP_ATN_BUT 0x1000 /* Attention Button Present */
-#define PCIE_DEVCAP_ATN_IND 0x2000 /* Attention Indicator Present */
-#define PCIE_DEVCAP_PWR_IND 0x4000 /* Power Indicator Present */
-#define PCIE_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */
-#define PCIE_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */
- u16 dev_control;
-#define PCIE_CTRL_CERE 0x0001 /* Correctable Error Reporting En. */
-#define PCIE_CTRL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */
-#define PCIE_CTRL_FERE 0x0004 /* Fatal Error Reporting Enable */
-#define PCIE_CTRL_URRE 0x0008 /* Unsupported Request Reporting En. */
-#define PCIE_CTRL_RELAX_EN 0x0010 /* Enable relaxed ordering */
-#define PCIE_CTRL_MAX_PAYLOAD(n) (((n) & 7) << 5)
-#define PCIE_CTRL_EXT_TAG 0x0100 /* Extended Tag Field Enable */
-#define PCIE_CTRL_PHANTOM 0x0200 /* Phantom Functions Enable */
-#define PCIE_CTRL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */
-#define PCIE_CTRL_NOSNOOP_EN 0x0800 /* Enable No Snoop */
-#define PCIE_CTRL_MAX_READ_REQUEST(n) (((n) & 7) << 12)
- u16 dev_status;
-#define PCIE_DEVSTA_AUXPD 0x10 /* AUX Power Detected */
-#define PCIE_DEVSTA_TRPND 0x20 /* Transactions Pending */
- u32 link_capabilities;
- u16 link_control;
- u16 link_status;
- u32 slot_capabilities;
- u16 slot_control;
- u16 slot_status;
- u16 root_control;
-#define PCIE_RTCTL_SECEE 0x01 /* System Error on Correctable Error */
-#define PCIE_RTCTL_SENFEE 0x02 /* System Error on Non-Fatal Error */
-#define PCIE_RTCTL_SEFEE 0x04 /* System Error on Fatal Error */
-#define PCIE_RTCTL_PMEIE 0x08 /* PME Interrupt Enable */
-#define PCIE_RTCTL_CRSSVE 0x10 /* CRS Software Visibility Enable */
- u16 root_capabilities;
- u32 root_status;
- u32 dev_capabilities2;
- u16 dev_control2;
- u16 dev_status2;
- u32 link_capabilities2;
- u16 link_control2;
- u16 link_status2;
- u32 slot_capabilities2;
- u16 slot_control2;
- u16 slot_status2;
-} __clib_packed pcie_config_regs_t;
-
/* PCI express extended capabilities. */
typedef enum pcie_capability_type
{
@@ -735,12 +568,178 @@ typedef struct
#define PCI_PWR_CAP 12 /* Capability */
#define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */
+#define pci_capability_pcie_dev_caps_t_fields \
+ _ (3, max_payload_sz) \
+ _ (2, phantom_fn_present) \
+ _ (1, ext_tags_supported) \
+ _ (3, acceptable_l0s_latency) \
+ _ (3, acceptable_l1_latency) \
+ _ (1, attention_button_present) \
+ _ (1, attention_indicator_present) \
+ _ (1, power_indicator_present) \
+ _ (1, role_based_error_reporting_supported) \
+ _ (2, _reserved_16) \
+ _ (8, slot_ppower_limit_val) \
+ _ (2, slot_power_limit_scale) \
+ _ (1, flr_capable) \
+ _ (3, _reserved_29)
+
+#define pci_capability_pcie_dev_control_t_fields \
+ _ (1, enable_correctable_error_reporting) \
+ _ (1, enable_non_fatal_error_reporting) \
+ _ (1, enable_fatal_error_reporting) \
+ _ (1, enable_unsupported_request_reporting) \
+ _ (1, enable_relaxed_ordering) \
+ _ (3, maximum_payload_size) \
+ _ (1, extended_tag_field_enable) \
+ _ (1, phantom_fn_denable) \
+ _ (1, aux_power_pm_enable) \
+ _ (1, enable_no_snoop) \
+ _ (3, max_read_request_size) \
+ _ (1, function_level_reset)
+
+#define pci_capability_pcie_dev_status_t_fields \
+ _ (1, correctable_err_detected) \
+ _ (1, non_fatal_err_detected) \
+ _ (1, fatal_err_detected) \
+ _ (1, unsupported_request_detected) \
+ _ (1, aux_power_detected) \
+ _ (1, transaction_pending) \
+ _ (10, _reserved_6)
+
+#define pci_capability_pcie_link_caps_t_fields \
+ _ (4, max_link_speed) \
+ _ (5, max_link_width) \
+ _ (2, aspm_support) \
+ _ (3, l0s_exit_latency) \
+ _ (3, l1_exit_latency) \
+ _ (1, clock_power_mgmt_status) \
+ _ (1, surprise_down_error_reporting_capable_status) \
+ _ (1, data_link_layer_link_active_reporting_capable_status) \
+ _ (1, link_bandwidth_notification_capability_status) \
+ _ (1, aspm_optionality_compliance) \
+ _ (1, _reserved_23) \
+ _ (8, port_number)
+
+#define pci_capability_pcie_link_control_t_fields \
+ _ (2, aspm_control) \
+ _ (1, _reserved_2) \
+ _ (1, read_completion_boundary) \
+ _ (1, link_disable) \
+ _ (1, retrain_clock) \
+ _ (1, common_clock_config) \
+ _ (1, extended_synch) \
+ _ (1, enable_clock_pwr_mgmt) \
+ _ (1, hw_autonomous_width_disable) \
+ _ (1, link_bw_mgmt_intr_enable) \
+ _ (1, link_autonomous_bw_intr_enable) \
+ _ (4, _reserved_12)
+
+#define pci_capability_pcie_link_status_t_fields \
+ _ (4, link_speed) \
+ _ (6, negotiated_link_width) \
+ _ (1, _reserved_10) \
+ _ (1, link_training) \
+ _ (1, slot_clock_config) \
+ _ (1, data_link_layer_link_active) \
+ _ (1, link_bw_mgmt_status) \
+ _ (1, _reserved_15)
+
+#define pci_capability_pcie_dev_caps2_t_fields \
+ _ (4, compl_timeout_ranges_supported) \
+ _ (1, compl_timeout_disable_supported) \
+ _ (1, ari_forwarding_supported) \
+ _ (1, atomic_op_routing_supported) \
+ _ (1, bit32_atomic_op_completer_supported) \
+ _ (1, bit64_atomic_op_completer_supported) \
+ _ (1, bit128_cas_completer_supported) \
+ _ (1, no_ro_enabled_pr_pr_passing) \
+ _ (1, ltr_mechanism_supported) \
+ _ (1, tph_completer_supported) \
+ _ (18, _reserved_14)
+
+#define pci_capability_pcie_dev_control2_t_fields \
+ _ (4, completion_timeout_value) \
+ _ (1, completion_timeout_disable) \
+ _ (1, ari_forwarding_enable) \
+ _ (1, atomic_op_requester_enable) \
+ _ (1, atomic_op_egress_blocking) \
+ _ (1, ido_request_enable) \
+ _ (1, ido_completion_enable) \
+ _ (1, ltr_mechanism_enable) \
+ _ (5, _reserved_11)
+
+#define pci_capability_pcie_link_control2_t_fields \
+ _ (4, target_link_speed) \
+ _ (1, enter_compliance) \
+ _ (1, hw_autonomous_speed_disable) \
+ _ (1, selectable_de_emphasis) \
+ _ (3, transmit_margin) \
+ _ (1, enter_modified_compliance) \
+ _ (1, compliance_sos) \
+ _ (4, compliance_de_emphasis)
+
+#define pci_capability_pcie_link_status2_t_fields \
+ _ (1, current_de_emphasis_level) \
+ _ (15, _reserved_1)
+
+#define __(t, n) \
+ typedef union \
+ { \
+ struct \
+ { \
+ n##_fields; \
+ }; \
+ t as_##t; \
+ } n; \
+ STATIC_ASSERT_SIZEOF (n, sizeof (t))
+
+#define _(b, n) u32 n : b;
+__ (u32, pci_capability_pcie_dev_caps_t);
+__ (u32, pci_capability_pcie_link_caps_t);
+__ (u32, pci_capability_pcie_dev_caps2_t);
+#undef _
+#define _(b, n) u16 n : b;
+__ (u16, pci_capability_pcie_dev_control_t);
+__ (u16, pci_capability_pcie_dev_status_t);
+__ (u16, pci_capability_pcie_link_control_t);
+__ (u16, pci_capability_pcie_link_status_t);
+__ (u16, pci_capability_pcie_dev_control2_t);
+__ (u16, pci_capability_pcie_link_control2_t);
+__ (u16, pci_capability_pcie_link_status2_t);
+#undef _
+#undef __
+
+typedef struct
+{
+ u8 capability_id;
+ u8 next_offset;
+ u16 version_id : 3;
+ u16 _reserved_0_19 : 13;
+ pci_capability_pcie_dev_caps_t dev_caps;
+ pci_capability_pcie_dev_control_t dev_control;
+ pci_capability_pcie_dev_status_t dev_status;
+ pci_capability_pcie_link_caps_t link_caps;
+ pci_capability_pcie_link_control_t link_control;
+ pci_capability_pcie_link_status_t link_status;
+ u32 _reserved_0x14;
+ u16 _reserved_0x18;
+ u16 _reserved_0x1a;
+ u32 _reserved_0x1c;
+ u16 _reserved_0x20;
+ u16 _reserved_0x22;
+ pci_capability_pcie_dev_caps2_t dev_caps2;
+ pci_capability_pcie_dev_control2_t dev_control2;
+ u16 _reserved_0x2a;
+ u32 _reserved_0x2c;
+ pci_capability_pcie_link_control2_t link_control2;
+ pci_capability_pcie_link_status2_t link_status2;
+ u32 _reserved_0x34;
+ u16 _reserved_0x38;
+ u16 _reserved_0x3a;
+} pci_capability_pcie_t;
+
+STATIC_ASSERT_SIZEOF (pci_capability_pcie_t, 60);
+
#endif /* included_vlib_pci_config_h */
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vlib/physmem.c b/src/vlib/physmem.c
index a36444fdc9f..84c61d2a44f 100644
--- a/src/vlib/physmem.c
+++ b/src/vlib/physmem.c
@@ -17,11 +17,11 @@
#include <sys/types.h>
#include <sys/mount.h>
#include <sys/mman.h>
-#include <sys/fcntl.h>
+#include <fcntl.h>
#include <sys/stat.h>
#include <unistd.h>
-#include <vppinfra/linux/sysfs.h>
+#include <vppinfra/clib.h>
#include <vlib/vlib.h>
#include <vlib/physmem.h>
#include <vlib/unix/unix.h>
@@ -103,8 +103,10 @@ vlib_physmem_init (vlib_main_t * vm)
vpm->flags |= VLIB_PHYSMEM_MAIN_F_HAVE_PAGEMAP;
vec_free (pt);
+#ifdef __linux__
if ((error = linux_vfio_init (vm)))
return error;
+#endif /* __linux__ */
p = clib_mem_alloc_aligned (sizeof (clib_pmalloc_main_t),
CLIB_CACHE_LINE_BYTES);
@@ -160,13 +162,11 @@ show_physmem (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_physmem_command, static) = {
.path = "show physmem",
.short_help = "show physmem [verbose | detail | map]",
.function = show_physmem,
};
-/* *INDENT-ON* */
static clib_error_t *
vlib_physmem_config (vlib_main_t * vm, unformat_input_t * input)
diff --git a/src/vlib/punt.c b/src/vlib/punt.c
index 4a5e42db203..b59e5d251be 100644
--- a/src/vlib/punt.c
+++ b/src/vlib/punt.c
@@ -254,12 +254,10 @@ punt_reg_mk_dp (vlib_punt_reason_t reason)
old = punt_dp_db[reason];
- /* *INDENT-OFF* */
hash_foreach (key, pri, punt_reg_db,
({
vec_add1(pris, pri);
}));
- /* *INDENT-ON* */
/*
* A check for an empty vector is done in the DP, so the a zero
@@ -594,26 +592,22 @@ punt_client_show (vlib_main_t * vm,
{
u8 *name;
- /* *INDENT-OFF* */
hash_foreach(name, pci, punt_client_db,
({
vlib_cli_output (vm, "%U", format_punt_client, pci,
PUNT_FORMAT_FLAG_NONE);
}));
- /* *INDENT-ON* */
}
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_client_show_command, static) =
{
.path = "show punt client",
.short_help = "show client[s] registered with the punt infra",
.function = punt_client_show,
};
-/* *INDENT-ON* */
static clib_error_t *
punt_reason_show (vlib_main_t * vm,
@@ -629,14 +623,12 @@ punt_reason_show (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_reason_show_command, static) =
{
.path = "show punt reasons",
.short_help = "show all punt reasons",
.function = punt_reason_show,
};
-/* *INDENT-ON* */
static clib_error_t *
punt_db_show (vlib_main_t * vm,
@@ -645,12 +637,10 @@ punt_db_show (vlib_main_t * vm,
u32 pri, ii, jj;
u64 key;
- /* *INDENT-OFF* */
hash_foreach (key, pri, punt_reg_db,
({
vlib_cli_output (vm, " %U", format_punt_reg, pri);
}));
- /* *INDENT-ON* */
vlib_cli_output (vm, "\nDerived data-plane data-base:");
vlib_cli_output (vm,
@@ -672,14 +662,12 @@ punt_db_show (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_db_show_command, static) =
{
.path = "show punt db",
.short_help = "show the punt DB",
.function = punt_db_show,
};
-/* *INDENT-ON* */
static clib_error_t *
punt_stats_show (vlib_main_t * vm,
@@ -699,14 +687,12 @@ punt_stats_show (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_stats_show_command, static) =
{
.path = "show punt stats",
.short_help = "show the punt stats",
.function = punt_stats_show,
};
-/* *INDENT-ON* */
static clib_error_t *
punt_init (vlib_main_t * vm)
diff --git a/src/vlib/punt_node.c b/src/vlib/punt_node.c
index de721046057..4b81a61715a 100644
--- a/src/vlib/punt_node.c
+++ b/src/vlib/punt_node.c
@@ -280,7 +280,6 @@ VLIB_NODE_FN (punt_dispatch_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (punt_dispatch_node) = {
.name = "punt-dispatch",
.vector_size = sizeof (u32),
@@ -293,7 +292,6 @@ VLIB_REGISTER_NODE (punt_dispatch_node) = {
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
diff --git a/src/vlib/stat_weak_inlines.h b/src/vlib/stat_weak_inlines.h
deleted file mode 100644
index a68566d0fdd..00000000000
--- a/src/vlib/stat_weak_inlines.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2019 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * NOTE: Only include this file from external components that require
- * a loose coupling to the stats component.
- */
-
-#ifndef included_stat_weak_inlines_h
-#define included_stat_weak_inlines_h
-void *vlib_stats_push_heap (void *) __attribute__ ((weak));
-void *
-vlib_stats_push_heap (void *unused)
-{
- return 0;
-};
-
-void vlib_stats_pop_heap (void *, void *, u32, int) __attribute__ ((weak));
-void
-vlib_stats_pop_heap (void *notused, void *notused2, u32 i, int type)
-{
-};
-void vlib_stats_register_error_index (void *, u8 *, u64 *, u64)
- __attribute__ ((weak));
-void
-vlib_stats_register_error_index (void * notused, u8 * notused2, u64 * notused3, u64 notused4)
-{
-};
-
-void vlib_stats_pop_heap2 (void *, u32, void *, int) __attribute__ ((weak));
-void
-vlib_stats_pop_heap2 (void *notused, u32 notused2, void *notused3,
- int notused4)
-{
-};
-
-void vlib_stat_segment_lock (void) __attribute__ ((weak));
-void
-vlib_stat_segment_lock (void)
-{
-}
-
-void vlib_stat_segment_unlock (void) __attribute__ ((weak));
-void
-vlib_stat_segment_unlock (void)
-{
-}
-void vlib_stats_delete_cm (void *) __attribute__ ((weak));
-void
-vlib_stats_delete_cm (void *notused)
-{
-}
-
-void vlib_stats_register_mem_heap (void *) __attribute__ ((weak));
-void
-vlib_stats_register_mem_heap (void *notused)
-{
-}
-
-#endif
diff --git a/src/vlib/stats/cli.c b/src/vlib/stats/cli.c
new file mode 100644
index 00000000000..94a852ac751
--- /dev/null
+++ b/src/vlib/stats/cli.c
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/stats/stats.h>
+
+static int
+name_sort_cmp (void *a1, void *a2)
+{
+ vlib_stats_entry_t *n1 = a1;
+ vlib_stats_entry_t *n2 = a2;
+
+ return strcmp ((char *) n1->name, (char *) n2->name);
+}
+
+static u8 *
+format_stat_dir_entry (u8 *s, va_list *args)
+{
+ vlib_stats_entry_t *ep = va_arg (*args, vlib_stats_entry_t *);
+ char *type_name;
+ char *format_string;
+
+ format_string = "%-74s %-10s %10lld";
+
+ switch (ep->type)
+ {
+ case STAT_DIR_TYPE_SCALAR_INDEX:
+ type_name = "ScalarPtr";
+ break;
+
+ case STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE:
+ case STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED:
+ type_name = "CMainPtr";
+ break;
+
+ case STAT_DIR_TYPE_NAME_VECTOR:
+ type_name = "NameVector";
+ break;
+
+ case STAT_DIR_TYPE_EMPTY:
+ type_name = "empty";
+ break;
+
+ case STAT_DIR_TYPE_SYMLINK:
+ type_name = "Symlink";
+ break;
+
+ default:
+ type_name = "illegal!";
+ break;
+ }
+
+ return format (s, format_string, ep->name, type_name, 0);
+}
+static clib_error_t *
+show_stat_segment_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ vlib_stats_entry_t *show_data;
+ int i;
+
+ int verbose = 0;
+
+ if (unformat (input, "verbose"))
+ verbose = 1;
+
+ /* Lock even as reader, as this command doesn't handle epoch changes */
+ vlib_stats_segment_lock ();
+ show_data = vec_dup (sm->directory_vector);
+ vlib_stats_segment_unlock ();
+
+ vec_sort_with_function (show_data, name_sort_cmp);
+
+ vlib_cli_output (vm, "%-74s %10s %10s", "Name", "Type", "Value");
+
+ for (i = 0; i < vec_len (show_data); i++)
+ {
+ vlib_stats_entry_t *ep = vec_elt_at_index (show_data, i);
+
+ if (ep->type == STAT_DIR_TYPE_EMPTY)
+ continue;
+
+ vlib_cli_output (vm, "%-100U", format_stat_dir_entry,
+ vec_elt_at_index (show_data, i));
+ }
+
+ if (verbose)
+ {
+ ASSERT (sm->heap);
+ vlib_cli_output (vm, "%U", format_clib_mem_heap, sm->heap,
+ 0 /* verbose */);
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+show_stat_segment_hash_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ char *name;
+ u32 i;
+ hash_foreach_mem (name, i, sm->directory_vector_by_name,
+ ({ vlib_cli_output (vm, "%d: %s\n", i, name); }));
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_stat_segment_hash_command, static) = {
+ .path = "show statistics hash",
+ .short_help = "show statistics hash",
+ .function = show_stat_segment_hash_command_fn,
+};
+
+VLIB_CLI_COMMAND (show_stat_segment_command, static) = {
+ .path = "show statistics segment",
+ .short_help = "show statistics segment [verbose]",
+ .function = show_stat_segment_command_fn,
+};
diff --git a/src/vlib/stats/collector.c b/src/vlib/stats/collector.c
new file mode 100644
index 00000000000..b23f3df5713
--- /dev/null
+++ b/src/vlib/stats/collector.c
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/stats/stats.h>
+
+enum
+{
+ NODE_CLOCKS,
+ NODE_VECTORS,
+ NODE_CALLS,
+ NODE_SUSPENDS,
+ N_NODE_COUNTERS
+};
+
+struct
+{
+ u32 entry_index;
+ char *name;
+} node_counters[] = {
+ [NODE_CLOCKS] = { .name = "clocks" },
+ [NODE_VECTORS] = { .name = "vectors" },
+ [NODE_CALLS] = { .name = "calls" },
+ [NODE_SUSPENDS] = { .name = "suspends" },
+};
+
+static struct
+{
+ u8 *name;
+ u32 symlinks[N_NODE_COUNTERS];
+} *node_data = 0;
+
+static vlib_stats_string_vector_t node_names = 0;
+
+static inline void
+update_node_counters (vlib_stats_segment_t *sm)
+{
+ clib_bitmap_t *bmp = 0;
+ vlib_main_t **stat_vms = 0;
+ vlib_node_t ***node_dups = 0;
+ u32 n_nodes;
+ int i, j;
+
+ vlib_node_get_nodes (0 /* vm, for barrier sync */,
+ (u32) ~0 /* all threads */, 1 /* include stats */,
+ 0 /* barrier sync */, &node_dups, &stat_vms);
+
+ n_nodes = vec_len (node_dups[0]);
+
+ vec_validate (node_data, n_nodes - 1);
+
+ for (i = 0; i < n_nodes; i++)
+ if (vec_is_equal (node_data[i].name, node_dups[0][i]->name) == 0)
+ bmp = clib_bitmap_set (bmp, i, 1);
+
+ if (bmp)
+ {
+ u32 last_thread = vlib_get_n_threads ();
+ vlib_stats_segment_lock ();
+ clib_bitmap_foreach (i, bmp)
+ {
+ if (node_data[i].name)
+ {
+ vec_free (node_data[i].name);
+ for (j = 0; j < ARRAY_LEN (node_data->symlinks); j++)
+ vlib_stats_remove_entry (node_data[i].symlinks[j]);
+ }
+ }
+ /* We can't merge the loops because a node index corresponding to a given
+ * node name can change between 2 updates. Otherwise, we could add
+ * already existing symlinks or delete valid ones.
+ */
+ clib_bitmap_foreach (i, bmp)
+ {
+ vlib_node_t *n = node_dups[0][i];
+ node_data[i].name = vec_dup (n->name);
+ vlib_stats_set_string_vector (&node_names, n->index, "%v", n->name);
+
+ for (int j = 0; j < ARRAY_LEN (node_counters); j++)
+ {
+ vlib_stats_validate (node_counters[j].entry_index, last_thread,
+ n_nodes - 1);
+ node_data[i].symlinks[j] = vlib_stats_add_symlink (
+ node_counters[j].entry_index, n->index, "/nodes/%U/%s",
+ format_vlib_stats_symlink, n->name, node_counters[j].name);
+ ASSERT (node_data[i].symlinks[j] != CLIB_U32_MAX);
+ }
+ }
+ vlib_stats_segment_unlock ();
+ vec_free (bmp);
+ }
+
+ for (j = 0; j < vec_len (node_dups); j++)
+ {
+ vlib_node_t **nodes = node_dups[j];
+
+ for (i = 0; i < vec_len (nodes); i++)
+ {
+ counter_t **counters;
+ counter_t *c;
+ vlib_node_t *n = nodes[i];
+
+ counters = vlib_stats_get_entry_data_pointer (
+ node_counters[NODE_CLOCKS].entry_index);
+ c = counters[j];
+ c[n->index] = n->stats_total.clocks - n->stats_last_clear.clocks;
+
+ counters = vlib_stats_get_entry_data_pointer (
+ node_counters[NODE_VECTORS].entry_index);
+ c = counters[j];
+ c[n->index] = n->stats_total.vectors - n->stats_last_clear.vectors;
+
+ counters = vlib_stats_get_entry_data_pointer (
+ node_counters[NODE_CALLS].entry_index);
+ c = counters[j];
+ c[n->index] = n->stats_total.calls - n->stats_last_clear.calls;
+
+ counters = vlib_stats_get_entry_data_pointer (
+ node_counters[NODE_SUSPENDS].entry_index);
+ c = counters[j];
+ c[n->index] = n->stats_total.suspends - n->stats_last_clear.suspends;
+ }
+ vec_free (node_dups[j]);
+ }
+ vec_free (node_dups);
+ vec_free (stat_vms);
+}
+
+static void
+do_stat_segment_updates (vlib_main_t *vm, vlib_stats_segment_t *sm)
+{
+ if (sm->node_counters_enabled)
+ update_node_counters (sm);
+
+ vlib_stats_collector_t *c;
+ pool_foreach (c, sm->collectors)
+ {
+ vlib_stats_collector_data_t data = {
+ .entry_index = c->entry_index,
+ .vector_index = c->vector_index,
+ .private_data = c->private_data,
+ .entry = sm->directory_vector + c->entry_index,
+ };
+ c->fn (&data);
+ }
+
+ /* Heartbeat, so clients detect we're still here */
+ sm->directory_vector[STAT_COUNTER_HEARTBEAT].value++;
+}
+
+static uword
+stat_segment_collector_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
+ vlib_frame_t *f)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+
+ if (sm->node_counters_enabled)
+ {
+ node_names = vlib_stats_add_string_vector ("/sys/node/names");
+ ASSERT (node_names);
+
+ for (int x = 0; x < ARRAY_LEN (node_counters); x++)
+ {
+ node_counters[x].entry_index = vlib_stats_add_counter_vector (
+ "/sys/node/%s", node_counters[x].name);
+ ASSERT (node_counters[x].entry_index != CLIB_U32_MAX);
+ }
+ }
+
+ sm->directory_vector[STAT_COUNTER_BOOTTIME].value = unix_time_now ();
+
+ while (1)
+ {
+ do_stat_segment_updates (vm, sm);
+ vlib_process_suspend (vm, sm->update_interval);
+ }
+ return 0; /* or not */
+}
+
+VLIB_REGISTER_NODE (stat_segment_collector, static) = {
+ .function = stat_segment_collector_process,
+ .name = "statseg-collector-process",
+ .type = VLIB_NODE_TYPE_PROCESS,
+};
diff --git a/src/vlib/stats/format.c b/src/vlib/stats/format.c
new file mode 100644
index 00000000000..54c11aff743
--- /dev/null
+++ b/src/vlib/stats/format.c
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/stats/stats.h>
+
+u8 *
+format_vlib_stats_symlink (u8 *s, va_list *args)
+{
+ u8 *input = va_arg (*args, u8 *);
+
+ for (int i = 0; i < vec_len (input); i++)
+ if (input[i] == '/')
+ vec_add1 (s, '_');
+ else
+ vec_add1 (s, input[i]);
+
+ return s;
+}
diff --git a/src/vlib/stats/init.c b/src/vlib/stats/init.c
new file mode 100644
index 00000000000..8b382daf333
--- /dev/null
+++ b/src/vlib/stats/init.c
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/stats/stats.h>
+
+#define STAT_SEGMENT_SOCKET_FILENAME "stats.sock"
+
+static u32 vlib_loops_stats_counter_index;
+
+static void
+vector_rate_collector_fn (vlib_stats_collector_data_t *d)
+{
+ vlib_main_t *this_vlib_main;
+ counter_t **counters, **loops_counters;
+ counter_t *cb, *loops_cb;
+ f64 vector_rate = 0.0;
+ u32 i, n_threads = vlib_get_n_threads ();
+
+ vlib_stats_validate (d->entry_index, 0, n_threads - 1);
+ counters = d->entry->data;
+ cb = counters[0];
+
+ vlib_stats_validate (vlib_loops_stats_counter_index, 0, n_threads - 1);
+ loops_counters =
+ vlib_stats_get_entry_data_pointer (vlib_loops_stats_counter_index);
+ loops_cb = loops_counters[0];
+
+ for (i = 0; i < n_threads; i++)
+ {
+ f64 this_vector_rate;
+ this_vlib_main = vlib_get_main_by_index (i);
+
+ this_vector_rate = vlib_internal_node_vector_rate (this_vlib_main);
+ vlib_clear_internal_node_vector_rate (this_vlib_main);
+ cb[i] = this_vector_rate;
+ vector_rate += this_vector_rate;
+
+ loops_cb[i] = this_vlib_main->loops_per_second;
+ }
+
+ /* And set the system average rate */
+ vector_rate /= (f64) (i > 1 ? i - 1 : 1);
+ vlib_stats_set_gauge (d->private_data, vector_rate);
+}
+
+clib_error_t *
+vlib_stats_init (vlib_main_t *vm)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ vlib_stats_shared_header_t *shared_header;
+ vlib_stats_collector_reg_t reg = {};
+
+ uword memory_size, sys_page_sz;
+ int mfd;
+ char *mem_name = "stat segment";
+ void *heap, *memaddr;
+
+ memory_size = sm->memory_size;
+ if (memory_size == 0)
+ memory_size = STAT_SEGMENT_DEFAULT_SIZE;
+
+ if (sm->log2_page_sz == CLIB_MEM_PAGE_SZ_UNKNOWN)
+ sm->log2_page_sz = CLIB_MEM_PAGE_SZ_DEFAULT;
+
+ mfd = clib_mem_vm_create_fd (sm->log2_page_sz, mem_name);
+
+ if (mfd == -1)
+ return clib_error_return (0, "stat segment memory fd failure: %U",
+ format_clib_error, clib_mem_get_last_error ());
+ /* Set size */
+ if ((ftruncate (mfd, memory_size)) == -1)
+ {
+ close (mfd);
+ return clib_error_return (0, "stat segment ftruncate failure");
+ }
+
+ memaddr = clib_mem_vm_map_shared (0, memory_size, mfd, 0, mem_name);
+
+ if (memaddr == CLIB_MEM_VM_MAP_FAILED)
+ return clib_error_return (0, "stat segment mmap failure");
+
+ sys_page_sz = clib_mem_get_page_size ();
+
+ heap =
+ clib_mem_create_heap (((u8 *) memaddr) + sys_page_sz,
+ memory_size - sys_page_sz, 1 /* locked */, mem_name);
+ sm->heap = heap;
+ sm->memfd = mfd;
+
+ sm->directory_vector_by_name = hash_create_string (0, sizeof (uword));
+ sm->shared_header = shared_header = memaddr;
+
+ shared_header->version = STAT_SEGMENT_VERSION;
+ shared_header->base = memaddr;
+
+ sm->stat_segment_lockp = clib_mem_alloc (sizeof (clib_spinlock_t));
+ sm->locking_thread_index = ~0;
+ sm->n_locks = 0;
+ clib_spinlock_init (sm->stat_segment_lockp);
+
+ /* Set up the name to counter-vector hash table */
+ sm->directory_vector =
+ vec_new_heap (typeof (sm->directory_vector[0]), STAT_COUNTERS, heap);
+ sm->dir_vector_first_free_elt = CLIB_U32_MAX;
+
+ shared_header->epoch = 1;
+
+ /* Scalar stats and node counters */
+#define _(E, t, n, p) \
+ strcpy (sm->directory_vector[STAT_COUNTER_##E].name, p "/" #n); \
+ sm->directory_vector[STAT_COUNTER_##E].type = STAT_DIR_TYPE_##t;
+ foreach_stat_segment_counter_name
+#undef _
+ /* Save the vector in the shared segment, for clients */
+ shared_header->directory_vector = sm->directory_vector;
+
+ vlib_stats_register_mem_heap (heap);
+
+ reg.collect_fn = vector_rate_collector_fn;
+ reg.private_data = vlib_stats_add_gauge ("/sys/vector_rate");
+ reg.entry_index =
+ vlib_stats_add_counter_vector ("/sys/vector_rate_per_worker");
+ vlib_loops_stats_counter_index =
+ vlib_stats_add_counter_vector ("/sys/loops_per_worker");
+ vlib_stats_register_collector_fn (&reg);
+ vlib_stats_validate (reg.entry_index, 0, vlib_get_n_threads ());
+ vlib_stats_validate (vlib_loops_stats_counter_index, 0,
+ vlib_get_n_threads ());
+
+ return 0;
+}
+
+static clib_error_t *
+statseg_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ sm->update_interval = 10.0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "socket-name %s", &sm->socket_name))
+ ;
+ /* DEPRECATE: default (does nothing) */
+ else if (unformat (input, "default"))
+ ;
+ else if (unformat (input, "size %U", unformat_memory_size,
+ &sm->memory_size))
+ ;
+ else if (unformat (input, "page-size %U", unformat_log2_page_size,
+ &sm->log2_page_sz))
+ ;
+ else if (unformat (input, "per-node-counters on"))
+ sm->node_counters_enabled = 1;
+ else if (unformat (input, "per-node-counters off"))
+ sm->node_counters_enabled = 0;
+ else if (unformat (input, "update-interval %f", &sm->update_interval))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ /*
+ * NULL-terminate socket name string
+ * clib_socket_init()->socket_config() use C str*
+ */
+ if (vec_len (sm->socket_name))
+ vec_terminate_c_string (sm->socket_name);
+
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (statseg_config, "statseg");
+
+/*
+ * Accept connection on the socket and exchange the fd for the shared
+ * memory segment.
+ */
+static clib_error_t *
+stats_socket_accept_ready (clib_file_t *uf)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ clib_error_t *err;
+ clib_socket_t client = { 0 };
+
+ err = clib_socket_accept (sm->socket, &client);
+ if (err)
+ {
+ clib_error_report (err);
+ return err;
+ }
+
+ /* Send the fd across and close */
+ err = clib_socket_sendmsg (&client, 0, 0, &sm->memfd, 1);
+ if (err)
+ clib_error_report (err);
+ clib_socket_close (&client);
+
+ return 0;
+}
+
+static clib_error_t *
+stats_segment_socket_init (void)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ clib_error_t *error;
+ clib_socket_t *s = clib_mem_alloc (sizeof (clib_socket_t));
+
+ memset (s, 0, sizeof (clib_socket_t));
+ s->config = (char *) sm->socket_name;
+ s->flags = CLIB_SOCKET_F_IS_SERVER | CLIB_SOCKET_F_SEQPACKET |
+ CLIB_SOCKET_F_ALLOW_GROUP_WRITE | CLIB_SOCKET_F_PASSCRED;
+
+ if ((error = clib_socket_init (s)))
+ return error;
+
+ clib_file_t template = { 0 };
+ template.read_function = stats_socket_accept_ready;
+ template.file_descriptor = s->fd;
+ template.description = format (0, "stats segment listener %s", s->config);
+ clib_file_add (&file_main, &template);
+
+ sm->socket = s;
+
+ return 0;
+}
+
+static clib_error_t *
+stats_segment_socket_exit (vlib_main_t *vm)
+{
+ /*
+ * cleanup the listener socket on exit.
+ */
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ unlink ((char *) sm->socket_name);
+ return 0;
+}
+
+VLIB_MAIN_LOOP_EXIT_FUNCTION (stats_segment_socket_exit);
+
+static clib_error_t *
+statseg_init (vlib_main_t *vm)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+
+ /* set default socket file name when statseg config stanza is empty. */
+ if (!vec_len (sm->socket_name))
+ sm->socket_name = format (0, "%s/%s%c", vlib_unix_get_runtime_dir (),
+ STAT_SEGMENT_SOCKET_FILENAME, 0);
+ return stats_segment_socket_init ();
+}
+
+VLIB_INIT_FUNCTION (statseg_init) = {
+ .runs_after = VLIB_INITS ("unix_input_init", "linux_epoll_input_init"),
+};
diff --git a/src/vlib/stats/provider_mem.c b/src/vlib/stats/provider_mem.c
new file mode 100644
index 00000000000..f3a3f5d3ed4
--- /dev/null
+++ b/src/vlib/stats/provider_mem.c
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/stats/stats.h>
+
+static clib_mem_heap_t **memory_heaps_vec;
+
+enum
+{
+ STAT_MEM_TOTAL = 0,
+ STAT_MEM_USED,
+ STAT_MEM_FREE,
+ STAT_MEM_USED_MMAP,
+ STAT_MEM_TOTAL_ALLOC,
+ STAT_MEM_FREE_CHUNKS,
+ STAT_MEM_RELEASABLE,
+} stat_mem_usage_e;
+
+/*
+ * Called from the stats periodic process to update memory counters.
+ */
+static void
+stat_provider_mem_usage_update_fn (vlib_stats_collector_data_t *d)
+{
+ clib_mem_usage_t usage;
+ clib_mem_heap_t *heap;
+ counter_t **counters = d->entry->data;
+ counter_t *cb;
+
+ heap = vec_elt (memory_heaps_vec, d->private_data);
+ clib_mem_get_heap_usage (heap, &usage);
+ cb = counters[0];
+ cb[STAT_MEM_TOTAL] = usage.bytes_total;
+ cb[STAT_MEM_USED] = usage.bytes_used;
+ cb[STAT_MEM_FREE] = usage.bytes_free;
+ cb[STAT_MEM_USED_MMAP] = usage.bytes_used_mmap;
+ cb[STAT_MEM_TOTAL_ALLOC] = usage.bytes_max;
+ cb[STAT_MEM_FREE_CHUNKS] = usage.bytes_free_reclaimed;
+ cb[STAT_MEM_RELEASABLE] = usage.bytes_overhead;
+}
+
+/*
+ * Provide memory heap counters.
+ * Two dimensional array of heap index and per-heap gauges.
+ */
+void
+vlib_stats_register_mem_heap (clib_mem_heap_t *heap)
+{
+ vlib_stats_collector_reg_t r = {};
+ u32 idx;
+
+ vec_add1 (memory_heaps_vec, heap);
+
+ r.entry_index = idx = vlib_stats_add_counter_vector ("/mem/%s", heap->name);
+ vlib_stats_validate (idx, 0, STAT_MEM_RELEASABLE);
+
+ /* Create symlink */
+ vlib_stats_add_symlink (idx, STAT_MEM_USED, "/mem/%s/used", heap->name);
+ vlib_stats_add_symlink (idx, STAT_MEM_TOTAL, "/mem/%s/total", heap->name);
+ vlib_stats_add_symlink (idx, STAT_MEM_FREE, "/mem/%s/free", heap->name);
+
+ r.private_data = vec_len (memory_heaps_vec) - 1;
+ r.collect_fn = stat_provider_mem_usage_update_fn;
+ vlib_stats_register_collector_fn (&r);
+}
diff --git a/src/vlib/stats/shared.h b/src/vlib/stats/shared.h
new file mode 100644
index 00000000000..8e44ce3dc86
--- /dev/null
+++ b/src/vlib/stats/shared.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#ifndef included_stat_segment_shared_h
+#define included_stat_segment_shared_h
+
+typedef enum
+{
+ STAT_DIR_TYPE_ILLEGAL = 0,
+ STAT_DIR_TYPE_SCALAR_INDEX,
+ STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE,
+ STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED,
+ STAT_DIR_TYPE_NAME_VECTOR,
+ STAT_DIR_TYPE_EMPTY,
+ STAT_DIR_TYPE_SYMLINK,
+} stat_directory_type_t;
+
+typedef struct
+{
+ stat_directory_type_t type;
+ union
+ {
+ struct
+ {
+ uint32_t index1;
+ uint32_t index2;
+ };
+ uint64_t index;
+ uint64_t value;
+ void *data;
+ uint8_t **string_vector;
+ };
+#define VLIB_STATS_MAX_NAME_SZ 128
+ char name[VLIB_STATS_MAX_NAME_SZ];
+} vlib_stats_entry_t;
+
+/*
+ * Shared header first in the shared memory segment.
+ */
+typedef struct
+{
+ uint64_t version;
+ void *base;
+ volatile uint64_t epoch;
+ volatile uint64_t in_progress;
+ volatile vlib_stats_entry_t *directory_vector;
+} vlib_stats_shared_header_t;
+
+#endif /* included_stat_segment_shared_h */
diff --git a/src/vlib/stats/stats.c b/src/vlib/stats/stats.c
new file mode 100644
index 00000000000..b7743ec70f2
--- /dev/null
+++ b/src/vlib/stats/stats.c
@@ -0,0 +1,574 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/stats/stats.h>
+
+vlib_stats_main_t vlib_stats_main;
+
+void
+vlib_stats_segment_lock (void)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+
+ /* already locked by us */
+ if (sm->shared_header->in_progress &&
+ vm->thread_index == sm->locking_thread_index)
+ goto done;
+
+ ASSERT (sm->locking_thread_index == ~0);
+ ASSERT (sm->shared_header->in_progress == 0);
+ ASSERT (sm->n_locks == 0);
+
+ clib_spinlock_lock (sm->stat_segment_lockp);
+
+ sm->shared_header->in_progress = 1;
+ sm->locking_thread_index = vm->thread_index;
+done:
+ sm->n_locks++;
+}
+
+void
+vlib_stats_segment_unlock (void)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+
+ ASSERT (sm->shared_header->in_progress == 1);
+ ASSERT (sm->locking_thread_index == vm->thread_index);
+ ASSERT (sm->n_locks > 0);
+
+ sm->n_locks--;
+
+ if (sm->n_locks > 0)
+ return;
+
+ sm->shared_header->epoch++;
+ __atomic_store_n (&sm->shared_header->in_progress, 0, __ATOMIC_RELEASE);
+ sm->locking_thread_index = ~0;
+ clib_spinlock_unlock (sm->stat_segment_lockp);
+}
+
+/*
+ * Change heap to the stats shared memory segment
+ */
+void *
+vlib_stats_set_heap ()
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+
+ ASSERT (sm && sm->shared_header);
+ return clib_mem_set_heap (sm->heap);
+}
+
+u32
+vlib_stats_find_entry_index (char *fmt, ...)
+{
+ u8 *name;
+ va_list va;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+ vec_add1 (name, 0);
+
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ hash_pair_t *hp = hash_get_pair (sm->directory_vector_by_name, name);
+ vec_free (name);
+ return hp ? hp->value[0] : STAT_SEGMENT_INDEX_INVALID;
+}
+
+static void
+hash_set_str_key_alloc (uword **h, const char *key, uword v)
+{
+ int size = strlen (key) + 1;
+ void *copy = clib_mem_alloc (size);
+ clib_memcpy_fast (copy, key, size);
+ hash_set_mem (*h, copy, v);
+}
+
+static void
+hash_unset_str_key_free (uword **h, const char *key)
+{
+ hash_pair_t *hp = hash_get_pair_mem (*h, key);
+ if (hp)
+ {
+ void *_k = uword_to_pointer (hp->key, void *);
+ hash_unset_mem (*h, _k);
+ clib_mem_free (_k);
+ }
+}
+
+u32
+vlib_stats_create_counter (vlib_stats_entry_t *e)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ u32 index;
+
+ if (sm->dir_vector_first_free_elt != CLIB_U32_MAX)
+ {
+ index = sm->dir_vector_first_free_elt;
+ sm->dir_vector_first_free_elt = sm->directory_vector[index].index;
+ }
+ else
+ {
+ index = vec_len (sm->directory_vector);
+ vec_validate (sm->directory_vector, index);
+ }
+
+ sm->directory_vector[index] = *e;
+
+ hash_set_str_key_alloc (&sm->directory_vector_by_name, e->name, index);
+
+ return index;
+}
+
+void
+vlib_stats_remove_entry (u32 entry_index)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ vlib_stats_entry_t *e = vlib_stats_get_entry (sm, entry_index);
+ counter_t **c;
+ vlib_counter_t **vc;
+ void *oldheap;
+ u32 i;
+
+ if (entry_index >= vec_len (sm->directory_vector))
+ return;
+
+ vlib_stats_segment_lock ();
+
+ switch (e->type)
+ {
+ case STAT_DIR_TYPE_NAME_VECTOR:
+ for (i = 0; i < vec_len (e->string_vector); i++)
+ vec_free (e->string_vector[i]);
+ vec_free (e->string_vector);
+ break;
+
+ case STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE:
+ c = e->data;
+ e->data = 0;
+ oldheap = clib_mem_set_heap (sm->heap);
+ for (i = 0; i < vec_len (c); i++)
+ vec_free (c[i]);
+ vec_free (c);
+ clib_mem_set_heap (oldheap);
+ break;
+
+ case STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED:
+ vc = e->data;
+ e->data = 0;
+ oldheap = clib_mem_set_heap (sm->heap);
+ for (i = 0; i < vec_len (vc); i++)
+ vec_free (vc[i]);
+ vec_free (vc);
+ clib_mem_set_heap (oldheap);
+ break;
+
+ case STAT_DIR_TYPE_SCALAR_INDEX:
+ case STAT_DIR_TYPE_SYMLINK:
+ break;
+ default:
+ ASSERT (0);
+ }
+
+ vlib_stats_segment_unlock ();
+
+ hash_unset_str_key_free (&sm->directory_vector_by_name, e->name);
+
+ memset (e, 0, sizeof (*e));
+ e->type = STAT_DIR_TYPE_EMPTY;
+
+ e->value = sm->dir_vector_first_free_elt;
+ sm->dir_vector_first_free_elt = entry_index;
+}
+
+static void
+vlib_stats_set_entry_name (vlib_stats_entry_t *e, char *s)
+{
+ u32 i, len = VLIB_STATS_MAX_NAME_SZ - 1;
+
+ for (i = 0; i < len; i++)
+ {
+ e->name[i] = s[i];
+ if (s[i] == 0)
+ return;
+ }
+ ASSERT (i < VLIB_STATS_MAX_NAME_SZ - 1);
+ s[i] = 0;
+}
+
+static u32
+vlib_stats_new_entry_internal (stat_directory_type_t t, u8 *name)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ vlib_stats_shared_header_t *shared_header = sm->shared_header;
+ vlib_stats_entry_t e = { .type = t };
+
+ ASSERT (shared_header);
+
+ u32 vector_index = vlib_stats_find_entry_index ("%v", name);
+ if (vector_index != STAT_SEGMENT_INDEX_INVALID) /* Already registered */
+ {
+ vector_index = ~0;
+ goto done;
+ }
+
+ vec_add1 (name, 0);
+ vlib_stats_set_entry_name (&e, (char *) name);
+
+ vlib_stats_segment_lock ();
+ vector_index = vlib_stats_create_counter (&e);
+
+ shared_header->directory_vector = sm->directory_vector;
+
+ vlib_stats_segment_unlock ();
+
+done:
+ vec_free (name);
+ return vector_index;
+}
+
+u32
+vlib_stats_add_gauge (char *fmt, ...)
+{
+ va_list va;
+ u8 *name;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+ return vlib_stats_new_entry_internal (STAT_DIR_TYPE_SCALAR_INDEX, name);
+}
+
+void
+vlib_stats_set_gauge (u32 index, u64 value)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+
+ ASSERT (index < vec_len (sm->directory_vector));
+ sm->directory_vector[index].value = value;
+}
+
+u32
+vlib_stats_add_timestamp (char *fmt, ...)
+{
+ va_list va;
+ u8 *name;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+ return vlib_stats_new_entry_internal (STAT_DIR_TYPE_SCALAR_INDEX, name);
+}
+
+void
+vlib_stats_set_timestamp (u32 entry_index, f64 value)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+
+ ASSERT (entry_index < vec_len (sm->directory_vector));
+ sm->directory_vector[entry_index].value = value;
+}
+
+vlib_stats_string_vector_t
+vlib_stats_add_string_vector (char *fmt, ...)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ va_list va;
+ vlib_stats_header_t *sh;
+ vlib_stats_string_vector_t sv;
+ u32 index;
+ u8 *name;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+
+ index = vlib_stats_new_entry_internal (STAT_DIR_TYPE_NAME_VECTOR, name);
+ if (index == CLIB_U32_MAX)
+ return 0;
+
+ sv = vec_new_generic (vlib_stats_string_vector_t, 0,
+ sizeof (vlib_stats_header_t), 0, sm->heap);
+ sh = vec_header (sv);
+ sh->entry_index = index;
+ sm->directory_vector[index].string_vector = sv;
+ return sv;
+}
+
+void
+vlib_stats_set_string_vector (vlib_stats_string_vector_t *svp,
+ u32 vector_index, char *fmt, ...)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ vlib_stats_header_t *sh = vec_header (*svp);
+ vlib_stats_entry_t *e = vlib_stats_get_entry (sm, sh->entry_index);
+ va_list va;
+ u8 *s;
+
+ if (fmt[0] == 0)
+ {
+ if (vec_len (e->string_vector) <= vector_index)
+ return;
+
+ if (e->string_vector[vector_index] == 0)
+ return;
+
+ vlib_stats_segment_lock ();
+ vec_free (e->string_vector[vector_index]);
+ vlib_stats_segment_unlock ();
+ return;
+ }
+
+ vlib_stats_segment_lock ();
+
+ ASSERT (e->string_vector);
+
+ vec_validate (e->string_vector, vector_index);
+ svp[0] = e->string_vector;
+
+ s = e->string_vector[vector_index];
+
+ if (s == 0)
+ s = vec_new_heap (u8 *, 0, sm->heap);
+
+ vec_reset_length (s);
+
+ va_start (va, fmt);
+ s = va_format (s, fmt, &va);
+ va_end (va);
+ vec_add1 (s, 0);
+
+ e->string_vector[vector_index] = s;
+
+ vlib_stats_segment_unlock ();
+}
+
+void
+vlib_stats_free_string_vector (vlib_stats_string_vector_t *sv)
+{
+ vlib_stats_header_t *sh = vec_header (*sv);
+ vlib_stats_remove_entry (sh->entry_index);
+}
+
+u32
+vlib_stats_add_counter_vector (char *fmt, ...)
+{
+ va_list va;
+ u8 *name;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+ return vlib_stats_new_entry_internal (STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE,
+ name);
+}
+
+u32
+vlib_stats_add_counter_pair_vector (char *fmt, ...)
+{
+ va_list va;
+ u8 *name;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+ return vlib_stats_new_entry_internal (STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED,
+ name);
+}
+
+static int
+vlib_stats_validate_will_expand_internal (u32 entry_index, va_list *va)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ vlib_stats_entry_t *e = vlib_stats_get_entry (sm, entry_index);
+ void *oldheap;
+ int rv = 1;
+
+ oldheap = clib_mem_set_heap (sm->heap);
+ if (e->type == STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE)
+ {
+ u32 idx0 = va_arg (*va, u32);
+ u32 idx1 = va_arg (*va, u32);
+ u64 **data = e->data;
+
+ if (idx0 >= vec_len (data))
+ goto done;
+
+ for (u32 i = 0; i <= idx0; i++)
+ if (idx1 >= vec_max_len (data[i]))
+ goto done;
+ }
+ else if (e->type == STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED)
+ {
+ u32 idx0 = va_arg (*va, u32);
+ u32 idx1 = va_arg (*va, u32);
+ vlib_counter_t **data = e->data;
+
+ va_end (*va);
+
+ if (idx0 >= vec_len (data))
+ goto done;
+
+ for (u32 i = 0; i <= idx0; i++)
+ if (idx1 >= vec_max_len (data[i]))
+ goto done;
+ }
+ else
+ ASSERT (0);
+
+ rv = 0;
+done:
+ clib_mem_set_heap (oldheap);
+ return rv;
+}
+
+int
+vlib_stats_validate_will_expand (u32 entry_index, ...)
+{
+ va_list va;
+ int rv;
+
+ va_start (va, entry_index);
+ rv = vlib_stats_validate_will_expand_internal (entry_index, &va);
+ va_end (va);
+ return rv;
+}
+
+void
+vlib_stats_validate (u32 entry_index, ...)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ vlib_stats_entry_t *e = vlib_stats_get_entry (sm, entry_index);
+ void *oldheap;
+ va_list va;
+ int will_expand;
+
+ va_start (va, entry_index);
+ will_expand = vlib_stats_validate_will_expand_internal (entry_index, &va);
+ va_end (va);
+
+ if (will_expand)
+ vlib_stats_segment_lock ();
+
+ oldheap = clib_mem_set_heap (sm->heap);
+
+ va_start (va, entry_index);
+
+ if (e->type == STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE)
+ {
+ u32 idx0 = va_arg (va, u32);
+ u32 idx1 = va_arg (va, u32);
+ u64 **data = e->data;
+
+ vec_validate_aligned (data, idx0, CLIB_CACHE_LINE_BYTES);
+
+ for (u32 i = 0; i <= idx0; i++)
+ vec_validate_aligned (data[i], idx1, CLIB_CACHE_LINE_BYTES);
+ e->data = data;
+ }
+ else if (e->type == STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED)
+ {
+ u32 idx0 = va_arg (va, u32);
+ u32 idx1 = va_arg (va, u32);
+ vlib_counter_t **data = e->data;
+
+ vec_validate_aligned (data, idx0, CLIB_CACHE_LINE_BYTES);
+
+ for (u32 i = 0; i <= idx0; i++)
+ vec_validate_aligned (data[i], idx1, CLIB_CACHE_LINE_BYTES);
+ e->data = data;
+ }
+ else
+ ASSERT (0);
+
+ va_end (va);
+
+ clib_mem_set_heap (oldheap);
+
+ if (will_expand)
+ vlib_stats_segment_unlock ();
+}
+
+u32
+vlib_stats_add_symlink (u32 entry_index, u32 vector_index, char *fmt, ...)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ vlib_stats_shared_header_t *shared_header = sm->shared_header;
+ vlib_stats_entry_t e;
+ va_list va;
+ u8 *name;
+
+ ASSERT (shared_header);
+ ASSERT (entry_index < vec_len (sm->directory_vector));
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+
+ if (vlib_stats_find_entry_index ("%v", name) == STAT_SEGMENT_INDEX_INVALID)
+ {
+ vec_add1 (name, 0);
+ vlib_stats_set_entry_name (&e, (char *) name);
+ e.type = STAT_DIR_TYPE_SYMLINK;
+ e.index1 = entry_index;
+ e.index2 = vector_index;
+ vector_index = vlib_stats_create_counter (&e);
+
+ /* Warn clients to refresh any pointers they might be holding */
+ shared_header->directory_vector = sm->directory_vector;
+ }
+ else
+ vector_index = ~0;
+
+ vec_free (name);
+ return vector_index;
+}
+
+void
+vlib_stats_rename_symlink (u64 entry_index, char *fmt, ...)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ vlib_stats_entry_t *e = vlib_stats_get_entry (sm, entry_index);
+ va_list va;
+ u8 *new_name;
+
+ hash_unset_str_key_free (&sm->directory_vector_by_name, e->name);
+
+ va_start (va, fmt);
+ new_name = va_format (0, fmt, &va);
+ va_end (va);
+
+ vec_add1 (new_name, 0);
+ vlib_stats_set_entry_name (e, (char *) new_name);
+ hash_set_str_key_alloc (&sm->directory_vector_by_name, e->name, entry_index);
+ vec_free (new_name);
+}
+
+f64
+vlib_stats_get_segment_update_rate (void)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ return sm->update_interval;
+}
+
+void
+vlib_stats_register_collector_fn (vlib_stats_collector_reg_t *reg)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ vlib_stats_collector_t *c;
+
+ ASSERT (reg->entry_index != ~0);
+
+ pool_get_zero (sm->collectors, c);
+ c->fn = reg->collect_fn;
+ c->entry_index = reg->entry_index;
+ c->vector_index = reg->vector_index;
+ c->private_data = reg->private_data;
+
+ return;
+}
diff --git a/src/vlib/stats/stats.h b/src/vlib/stats/stats.h
new file mode 100644
index 00000000000..ab1e2828c5a
--- /dev/null
+++ b/src/vlib/stats/stats.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#ifndef included_stats_stats_h
+#define included_stats_stats_h
+
+#include <vppinfra/socket.h>
+#include <vppinfra/lock.h>
+#include <vlib/stats/shared.h>
+
+/* Default stat segment 32m */
+#define STAT_SEGMENT_DEFAULT_SIZE (32 << 20)
+
+/* Shared segment memory layout version */
+#define STAT_SEGMENT_VERSION 2
+
+#define STAT_SEGMENT_INDEX_INVALID UINT32_MAX
+
+typedef enum
+{
+ STAT_COUNTER_HEARTBEAT = 0,
+ STAT_COUNTER_LAST_STATS_CLEAR,
+ STAT_COUNTER_BOOTTIME,
+ STAT_COUNTERS
+} stat_segment_counter_t;
+
+#define foreach_stat_segment_counter_name \
+ _ (LAST_STATS_CLEAR, SCALAR_INDEX, last_stats_clear, "/sys") \
+ _ (HEARTBEAT, SCALAR_INDEX, heartbeat, "/sys") \
+ _ (BOOTTIME, SCALAR_INDEX, boottime, "/sys")
+
+typedef struct
+{
+ u32 entry_index;
+ u32 vector_index;
+ u64 private_data;
+ vlib_stats_entry_t *entry;
+} vlib_stats_collector_data_t;
+
+typedef void (*vlib_stats_collector_fn_t) (vlib_stats_collector_data_t *);
+
+typedef struct
+{
+ vlib_stats_collector_fn_t collect_fn;
+ u32 entry_index;
+ u32 vector_index;
+ u64 private_data;
+} vlib_stats_collector_reg_t;
+
+typedef struct
+{
+ vlib_stats_collector_fn_t fn;
+ u32 entry_index;
+ u32 vector_index;
+ u64 private_data;
+} vlib_stats_collector_t;
+
+typedef struct
+{
+ /* internal, does not point to shared memory */
+ vlib_stats_collector_t *collectors;
+
+ /* statistics segment */
+ uword *directory_vector_by_name;
+ vlib_stats_entry_t *directory_vector;
+ u32 dir_vector_first_free_elt;
+
+ /* Update interval */
+ f64 update_interval;
+
+ clib_spinlock_t *stat_segment_lockp;
+ u32 locking_thread_index;
+ u32 n_locks;
+ clib_socket_t *socket;
+ u8 *socket_name;
+ ssize_t memory_size;
+ clib_mem_page_sz_t log2_page_sz;
+ u8 node_counters_enabled;
+ void *heap;
+ vlib_stats_shared_header_t
+ *shared_header; /* pointer to shared memory segment */
+ int memfd;
+
+} vlib_stats_segment_t;
+
+typedef struct
+{
+ u32 entry_index;
+} vlib_stats_header_t;
+
+typedef struct
+{
+ vlib_stats_segment_t segment;
+} vlib_stats_main_t;
+
+extern vlib_stats_main_t vlib_stats_main;
+
+static_always_inline vlib_stats_segment_t *
+vlib_stats_get_segment ()
+{
+ return &vlib_stats_main.segment;
+}
+
+static_always_inline vlib_stats_entry_t *
+vlib_stats_get_entry (vlib_stats_segment_t *sm, u32 entry_index)
+{
+ vlib_stats_entry_t *e;
+ ASSERT (entry_index < vec_len (sm->directory_vector));
+ e = sm->directory_vector + entry_index;
+ ASSERT (e->type != STAT_DIR_TYPE_EMPTY && e->type != STAT_DIR_TYPE_ILLEGAL);
+ return e;
+}
+
+static_always_inline void *
+vlib_stats_get_entry_data_pointer (u32 entry_index)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ vlib_stats_entry_t *e = vlib_stats_get_entry (sm, entry_index);
+ return e->data;
+}
+
+clib_error_t *vlib_stats_init (vlib_main_t *vm);
+void *vlib_stats_set_heap ();
+void vlib_stats_segment_lock (void);
+void vlib_stats_segment_unlock (void);
+void vlib_stats_register_mem_heap (clib_mem_heap_t *);
+f64 vlib_stats_get_segment_update_rate (void);
+
+/* gauge */
+u32 vlib_stats_add_gauge (char *fmt, ...);
+void vlib_stats_set_gauge (u32 entry_index, u64 value);
+
+/* timestamp */
+u32 vlib_stats_add_timestamp (char *fmt, ...);
+void vlib_stats_set_timestamp (u32 entry_index, f64 value);
+
+/* counter vector */
+u32 vlib_stats_add_counter_vector (char *fmt, ...);
+
+/* counter pair vector */
+u32 vlib_stats_add_counter_pair_vector (char *fmt, ...);
+
+/* string vector */
+typedef u8 **vlib_stats_string_vector_t;
+vlib_stats_string_vector_t vlib_stats_add_string_vector (char *fmt, ...);
+void vlib_stats_set_string_vector (vlib_stats_string_vector_t *sv, u32 index,
+ char *fmt, ...);
+void vlib_stats_free_string_vector (vlib_stats_string_vector_t *sv);
+
+/* symlink */
+u32 vlib_stats_add_symlink (u32 entry_index, u32 vector_index, char *fmt, ...);
+void vlib_stats_rename_symlink (u64 entry_index, char *fmt, ...);
+
+/* common to all types */
+void vlib_stats_validate (u32 entry_index, ...);
+int vlib_stats_validate_will_expand (u32 entry_index, ...);
+void vlib_stats_remove_entry (u32 entry_index);
+u32 vlib_stats_find_entry_index (char *fmt, ...);
+void vlib_stats_register_collector_fn (vlib_stats_collector_reg_t *r);
+
+format_function_t format_vlib_stats_symlink;
+
+#endif
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index a33e70ab8d4..bbcb4ec2979 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -16,15 +16,19 @@
#include <signal.h>
#include <math.h>
+#ifdef __FreeBSD__
+#include <pthread_np.h>
+#endif /* __FreeBSD__ */
#include <vppinfra/format.h>
#include <vppinfra/time_range.h>
#include <vppinfra/interrupt.h>
-#include <vppinfra/linux/sysfs.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/unix.h>
#include <vlib/vlib.h>
#include <vlib/threads.h>
-#include <vlib/stat_weak_inlines.h>
+#include <vlib/stats/stats.h>
u32
vl (void *p)
@@ -178,71 +182,104 @@ vlib_thread_init (vlib_main_t * vm)
u32 n_vlib_mains = 1;
u32 first_index = 1;
u32 i;
- uword *avail_cpu;
+ pid_t pid;
+ uword *avail_cpu, *affinity_cpu;
+ uword n_cpus;
+ u32 stats_num_worker_threads_dir_index;
+
+ stats_num_worker_threads_dir_index =
+ vlib_stats_add_gauge ("/sys/num_worker_threads");
+ ASSERT (stats_num_worker_threads_dir_index != ~0);
/* get bitmaps of active cpu cores and sockets */
- tm->cpu_core_bitmap =
- clib_sysfs_list_to_bitmap ("/sys/devices/system/cpu/online");
- tm->cpu_socket_bitmap =
- clib_sysfs_list_to_bitmap ("/sys/devices/system/node/online");
+ tm->cpu_core_bitmap = os_get_online_cpu_core_bitmap ();
+ tm->cpu_socket_bitmap = os_get_online_cpu_node_bitmap ();
+
+ /* get bitmap of active cpu cores vpp has affinity to */
+ pid = getpid ();
+ tm->cpu_affinity_bitmap = os_get_cpu_affinity_bitmap (pid);
+
+ /* if fetching affinity fails, return online cpu core bmp */
+ if (tm->cpu_affinity_bitmap == 0)
+ tm->cpu_affinity_bitmap = os_get_online_cpu_core_bitmap ();
avail_cpu = clib_bitmap_dup (tm->cpu_core_bitmap);
+ affinity_cpu = clib_bitmap_dup (tm->cpu_affinity_bitmap);
/* skip cores */
+ n_cpus = clib_bitmap_count_set_bits (avail_cpu);
+ if (tm->skip_cores >= n_cpus)
+ return clib_error_return (0, "skip-core greater than available cpus");
+ n_cpus = clib_bitmap_count_set_bits (affinity_cpu);
+ if (tm->skip_cores >= n_cpus)
+ return clib_error_return (0, "skip-core greater than affinity cpus");
+
for (i = 0; i < tm->skip_cores; i++)
{
- uword c = clib_bitmap_first_set (avail_cpu);
+ uword c;
+ c = clib_bitmap_first_set (avail_cpu);
if (c == ~0)
return clib_error_return (0, "no available cpus to skip");
avail_cpu = clib_bitmap_set (avail_cpu, c, 0);
+
+ c = clib_bitmap_first_set (affinity_cpu);
+ if (c == ~0)
+ return clib_error_return (0, "no available env cpus to skip");
+
+ affinity_cpu = clib_bitmap_set (affinity_cpu, c, 0);
}
/* grab cpu for main thread */
- if (tm->main_lcore == ~0)
- {
- /* if main-lcore is not set, we try to use lcore 1 */
- if (clib_bitmap_get (avail_cpu, 1))
- tm->main_lcore = 1;
- else
- tm->main_lcore = clib_bitmap_first_set (avail_cpu);
- if (tm->main_lcore == (u8) ~ 0)
- return clib_error_return (0, "no available cpus to be used for the"
- " main thread");
- }
- else
+ if (tm->main_lcore != ~0)
{
if (clib_bitmap_get (avail_cpu, tm->main_lcore) == 0)
return clib_error_return (0, "cpu %u is not available to be used"
" for the main thread", tm->main_lcore);
+ avail_cpu = clib_bitmap_set (avail_cpu, tm->main_lcore, 0);
+ affinity_cpu = clib_bitmap_set (affinity_cpu, tm->main_lcore, 0);
+ }
+ /* if auto enabled, grab first cpu vpp has affinity to for main thread */
+ else if (tm->use_main_core_auto)
+ {
+ uword c = clib_bitmap_first_set (affinity_cpu);
+ if (c != ~0)
+ tm->main_lcore = c;
+
+ avail_cpu = clib_bitmap_set (avail_cpu, tm->main_lcore, 0);
+ affinity_cpu = clib_bitmap_set (affinity_cpu, tm->main_lcore, 0);
}
- avail_cpu = clib_bitmap_set (avail_cpu, tm->main_lcore, 0);
/* assume that there is socket 0 only if there is no data from sysfs */
if (!tm->cpu_socket_bitmap)
tm->cpu_socket_bitmap = clib_bitmap_set (0, 0, 1);
/* pin main thread to main_lcore */
- if (tm->cb.vlib_thread_set_lcore_cb)
- {
- tm->cb.vlib_thread_set_lcore_cb (0, tm->main_lcore);
- }
- else
+ if (tm->main_lcore != ~0)
{
cpu_set_t cpuset;
CPU_ZERO (&cpuset);
CPU_SET (tm->main_lcore, &cpuset);
- pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t), &cpuset);
+ if (pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t),
+ &cpuset))
+ {
+ return clib_error_return (0, "could not pin main thread to cpu %u",
+ tm->main_lcore);
+ }
}
/* Set up thread 0 */
vec_validate_aligned (vlib_worker_threads, 0, CLIB_CACHE_LINE_BYTES);
- _vec_len (vlib_worker_threads) = 1;
+ vec_set_len (vlib_worker_threads, 1);
w = vlib_worker_threads;
w->thread_mheap = clib_mem_get_heap ();
w->thread_stack = vlib_thread_stacks[0];
w->cpu_id = tm->main_lcore;
+#ifdef __FreeBSD__
+ w->lwp = pthread_getthreadid_np ();
+#else
w->lwp = syscall (SYS_gettid);
+#endif /* __FreeBSD__ */
w->thread_id = pthread_self ();
tm->n_vlib_mains = 1;
@@ -285,7 +322,6 @@ vlib_thread_init (vlib_main_t * vm)
if (tr->coremask)
{
uword c;
- /* *INDENT-OFF* */
clib_bitmap_foreach (c, tr->coremask) {
if (clib_bitmap_get(avail_cpu, c) == 0)
return clib_error_return (0, "cpu %u is not available to be used"
@@ -293,17 +329,26 @@ vlib_thread_init (vlib_main_t * vm)
avail_cpu = clib_bitmap_set(avail_cpu, c, 0);
}
- /* *INDENT-ON* */
}
else
{
+ /* for automatic pinning, use cpu affinity list */
+ uword n_env_cpu = 0;
+ n_env_cpu = clib_bitmap_count_set_bits (affinity_cpu);
+
+ if (n_env_cpu < tr->count)
+ return clib_error_return (0,
+ "no available cpus to be used for"
+ " the '%s' thread #%u",
+ tr->name, n_env_cpu);
+
for (j = 0; j < tr->count; j++)
{
/* Do not use CPU 0 by default - leave it to the host and IRQs */
- uword avail_c0 = clib_bitmap_get (avail_cpu, 0);
- avail_cpu = clib_bitmap_set (avail_cpu, 0, 0);
+ uword avail_c0 = clib_bitmap_get (affinity_cpu, 0);
+ affinity_cpu = clib_bitmap_set (affinity_cpu, 0, 0);
- uword c = clib_bitmap_first_set (avail_cpu);
+ uword c = clib_bitmap_first_set (affinity_cpu);
/* Use CPU 0 as a last resort */
if (c == ~0 && avail_c0)
{
@@ -314,18 +359,21 @@ vlib_thread_init (vlib_main_t * vm)
if (c == ~0)
return clib_error_return (0,
"no available cpus to be used for"
- " the '%s' thread", tr->name);
+ " the '%s' thread #%u",
+ tr->name, tr->count);
- avail_cpu = clib_bitmap_set (avail_cpu, 0, avail_c0);
- avail_cpu = clib_bitmap_set (avail_cpu, c, 0);
+ affinity_cpu = clib_bitmap_set (affinity_cpu, 0, avail_c0);
+ affinity_cpu = clib_bitmap_set (affinity_cpu, c, 0);
tr->coremask = clib_bitmap_set (tr->coremask, c, 1);
}
}
}
clib_bitmap_free (avail_cpu);
+ clib_bitmap_free (affinity_cpu);
tm->n_vlib_mains = n_vlib_mains;
+ vlib_stats_set_gauge (stats_num_worker_threads_dir_index, n_vlib_mains - 1);
/*
* Allocate the remaining worker threads, and thread stack vector slots
@@ -405,52 +453,52 @@ vlib_worker_thread_init (vlib_worker_thread_t * w)
void *
vlib_worker_thread_bootstrap_fn (void *arg)
{
- void *rv;
vlib_worker_thread_t *w = arg;
+#ifdef __FreeBSD__
+ w->lwp = pthread_getthreadid_np ();
+#else
w->lwp = syscall (SYS_gettid);
+#endif /* __FreeBSD__ */
w->thread_id = pthread_self ();
__os_thread_index = w - vlib_worker_threads;
- vlib_process_start_switch_stack (vlib_get_main_by_index (__os_thread_index),
- 0);
- rv = (void *) clib_calljmp
- ((uword (*)(uword)) w->thread_function,
- (uword) arg, w->thread_stack + VLIB_THREAD_STACK_SIZE);
- /* NOTREACHED, we hope */
- return rv;
+ if (CLIB_DEBUG > 0)
+ {
+ void *frame_addr = __builtin_frame_address (0);
+ if (frame_addr < (void *) w->thread_stack ||
+ frame_addr > (void *) w->thread_stack + VLIB_THREAD_STACK_SIZE)
+ {
+ /* heap is not set yet */
+ fprintf (stderr, "thread stack is not set properly\n");
+ exit (1);
+ }
+ }
+
+ w->thread_function (arg);
+
+ return 0;
}
void
vlib_get_thread_core_numa (vlib_worker_thread_t * w, unsigned cpu_id)
{
- const char *sys_cpu_path = "/sys/devices/system/cpu/cpu";
- const char *sys_node_path = "/sys/devices/system/node/node";
clib_bitmap_t *nbmp = 0, *cbmp = 0;
- u32 node;
- u8 *p = 0;
- int core_id = -1, numa_id = -1;
+ int node, core_id = -1, numa_id = -1;
- p = format (p, "%s%u/topology/core_id%c", sys_cpu_path, cpu_id, 0);
- clib_sysfs_read ((char *) p, "%d", &core_id);
- vec_reset_length (p);
+ core_id = os_get_cpu_phys_core_id (cpu_id);
+ nbmp = os_get_online_cpu_node_bitmap ();
- /* *INDENT-OFF* */
- clib_sysfs_read ("/sys/devices/system/node/online", "%U",
- unformat_bitmap_list, &nbmp);
clib_bitmap_foreach (node, nbmp) {
- p = format (p, "%s%u/cpulist%c", sys_node_path, node, 0);
- clib_sysfs_read ((char *) p, "%U", unformat_bitmap_list, &cbmp);
- if (clib_bitmap_get (cbmp, cpu_id))
- numa_id = node;
- vec_reset_length (cbmp);
- vec_reset_length (p);
+ cbmp = os_get_cpu_on_node_bitmap (node);
+ if (clib_bitmap_get (cbmp, cpu_id))
+ numa_id = node;
+ vec_reset_length (cbmp);
}
- /* *INDENT-ON* */
+
vec_free (nbmp);
vec_free (cbmp);
- vec_free (p);
w->core_id = core_id;
w->numa_id = numa_id;
@@ -461,6 +509,9 @@ vlib_launch_thread_int (void *fp, vlib_worker_thread_t * w, unsigned cpu_id)
{
clib_mem_main_t *mm = &clib_mem_main;
vlib_thread_main_t *tm = &vlib_thread_main;
+ pthread_t worker;
+ pthread_attr_t attr;
+ cpu_set_t cpuset;
void *(*fp_arg) (void *) = fp;
void *numa_heap;
@@ -487,29 +538,33 @@ vlib_launch_thread_int (void *fp, vlib_worker_thread_t * w, unsigned cpu_id)
}
}
- if (tm->cb.vlib_launch_thread_cb && !w->registration->use_pthreads)
- return tm->cb.vlib_launch_thread_cb (fp, (void *) w, cpu_id);
- else
- {
- pthread_t worker;
- cpu_set_t cpuset;
CPU_ZERO (&cpuset);
CPU_SET (cpu_id, &cpuset);
- if (pthread_create (&worker, NULL /* attr */ , fp_arg, (void *) w))
+ if (pthread_attr_init (&attr))
+ return clib_error_return_unix (0, "pthread_attr_init");
+
+ if (pthread_attr_setstack (&attr, w->thread_stack,
+ VLIB_THREAD_STACK_SIZE))
+ return clib_error_return_unix (0, "pthread_attr_setstack");
+
+ if (pthread_create (&worker, &attr, fp_arg, (void *) w))
return clib_error_return_unix (0, "pthread_create");
if (pthread_setaffinity_np (worker, sizeof (cpu_set_t), &cpuset))
return clib_error_return_unix (0, "pthread_setaffinity_np");
+ if (pthread_attr_destroy (&attr))
+ return clib_error_return_unix (0, "pthread_attr_destroy");
+
return 0;
- }
}
static clib_error_t *
start_workers (vlib_main_t * vm)
{
vlib_global_main_t *vgm = vlib_get_global_main ();
+ vlib_main_t *fvm = vlib_get_first_main ();
int i, j;
vlib_worker_thread_t *w;
vlib_main_t *vm_clone;
@@ -519,6 +574,7 @@ start_workers (vlib_main_t * vm)
vlib_node_runtime_t *rt;
u32 n_vlib_mains = tm->n_vlib_mains;
u32 worker_thread_index;
+ u32 stats_err_entry_index = fvm->error_main.stats_err_entry_index;
clib_mem_heap_t *main_heap = clib_mem_get_per_cpu_heap ();
vlib_stats_register_mem_heap (main_heap);
@@ -544,7 +600,7 @@ start_workers (vlib_main_t * vm)
vec_validate_aligned (vgm->vlib_mains, n_vlib_mains - 1,
CLIB_CACHE_LINE_BYTES);
- _vec_len (vgm->vlib_mains) = 0;
+ vec_set_len (vgm->vlib_mains, 0);
vec_add1_aligned (vgm->vlib_mains, vm, CLIB_CACHE_LINE_BYTES);
if (n_vlib_mains > 1)
@@ -588,6 +644,7 @@ start_workers (vlib_main_t * vm)
for (k = 0; k < tr->count; k++)
{
vlib_node_t *n;
+ u64 **c;
vec_add2 (vlib_worker_threads, w, 1);
/* Currently unused, may not really work */
@@ -623,12 +680,9 @@ start_workers (vlib_main_t * vm)
sizeof (*vm_clone));
vm_clone->thread_index = worker_thread_index;
- vm_clone->heap_base = w->thread_mheap;
- vm_clone->heap_aligned_base = (void *)
- (((uword) w->thread_mheap) & ~(VLIB_FRAME_ALIGN - 1));
vm_clone->pending_rpc_requests = 0;
vec_validate (vm_clone->pending_rpc_requests, 0);
- _vec_len (vm_clone->pending_rpc_requests) = 0;
+ vec_set_len (vm_clone->pending_rpc_requests, 0);
clib_memset (&vm_clone->random_buffer, 0,
sizeof (vm_clone->random_buffer));
clib_spinlock_init
@@ -658,7 +712,7 @@ start_workers (vlib_main_t * vm)
/* fork the frame dispatch queue */
nm_clone->pending_frames = 0;
vec_validate (nm_clone->pending_frames, 10);
- _vec_len (nm_clone->pending_frames) = 0;
+ vec_set_len (nm_clone->pending_frames, 0);
/* fork nodes */
nm_clone->nodes = 0;
@@ -684,7 +738,6 @@ start_workers (vlib_main_t * vm)
nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
{
vlib_node_t *n = vlib_get_node (vm, rt->node_index);
- rt->thread_index = vm_clone->thread_index;
/* copy initial runtime_data from node */
if (n->runtime_data && n->runtime_data_bytes > 0)
clib_memcpy (rt->runtime_data, n->runtime_data,
@@ -696,12 +749,14 @@ start_workers (vlib_main_t * vm)
vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
CLIB_CACHE_LINE_BYTES);
clib_interrupt_init (
- &nm_clone->interrupts,
+ &nm_clone->input_node_interrupts,
vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]));
+ clib_interrupt_init (
+ &nm_clone->pre_input_node_interrupts,
+ vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]));
vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
{
vlib_node_t *n = vlib_get_node (vm, rt->node_index);
- rt->thread_index = vm_clone->thread_index;
/* copy initial runtime_data from node */
if (n->runtime_data && n->runtime_data_bytes > 0)
clib_memcpy (rt->runtime_data, n->runtime_data,
@@ -716,7 +771,6 @@ start_workers (vlib_main_t * vm)
nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
{
vlib_node_t *n = vlib_get_node (vm, rt->node_index);
- rt->thread_index = vm_clone->thread_index;
/* copy initial runtime_data from node */
if (n->runtime_data && n->runtime_data_bytes > 0)
clib_memcpy (rt->runtime_data, n->runtime_data,
@@ -728,10 +782,7 @@ start_workers (vlib_main_t * vm)
CLIB_CACHE_LINE_BYTES);
/* Create per-thread frame freelist */
- nm_clone->frame_sizes = vec_new (vlib_frame_size_t, 1);
-#ifdef VLIB_SUPPORTS_ARBITRARY_SCALAR_SIZES
- nm_clone->frame_size_hash = hash_create (0, sizeof (uword));
-#endif
+ nm_clone->frame_sizes = 0;
nm_clone->node_by_error = nm->node_by_error;
/* Packet trace buffers are guaranteed to be empty, nothing to do here */
@@ -741,12 +792,10 @@ start_workers (vlib_main_t * vm)
CLIB_CACHE_LINE_BYTES);
/* Switch to the stats segment ... */
- void *oldheap = vlib_stats_push_heap (0);
- vm_clone->error_main.counters =
- vec_dup_aligned (vlib_get_first_main ()->error_main.counters,
- CLIB_CACHE_LINE_BYTES);
- vlib_stats_pop_heap2 (vm_clone->error_main.counters,
- worker_thread_index, oldheap, 1);
+ vlib_stats_validate (stats_err_entry_index, worker_thread_index,
+ vec_len (fvm->error_main.counters) - 1);
+ c = vlib_stats_get_entry_data_pointer (stats_err_entry_index);
+ vm_clone->error_main.counters = c[worker_thread_index];
vm_clone->error_main.counters_last_clear = vec_dup_aligned (
vlib_get_first_main ()->error_main.counters_last_clear,
@@ -802,28 +851,37 @@ start_workers (vlib_main_t * vm)
{
for (j = 0; j < tr->count; j++)
{
+
w = vlib_worker_threads + worker_thread_index++;
err = vlib_launch_thread_int (vlib_worker_thread_bootstrap_fn,
w, 0);
if (err)
- clib_error_report (err);
+ clib_unix_error ("%U, thread %s init on cpu %d failed",
+ format_clib_error, err, tr->name, 0);
}
}
else
{
uword c;
- /* *INDENT-OFF* */
clib_bitmap_foreach (c, tr->coremask) {
w = vlib_worker_threads + worker_thread_index++;
err = vlib_launch_thread_int (vlib_worker_thread_bootstrap_fn,
w, c);
if (err)
- clib_error_report (err);
- }
- /* *INDENT-ON* */
+ clib_unix_error ("%U, thread %s init on cpu %d failed",
+ format_clib_error, err, tr->name, c);
+ }
}
}
vlib_worker_thread_barrier_sync (vm);
+ {
+ clib_error_t *err;
+ err = vlib_call_init_exit_functions (
+ vm, &vgm->num_workers_change_function_registrations, 1 /* call_once */,
+ 1 /* is_global */);
+ if (err)
+ clib_error_report (err);
+ }
vlib_worker_thread_barrier_release (vm);
return 0;
}
@@ -885,6 +943,7 @@ vlib_worker_thread_node_refork (void)
vlib_node_main_t *nm, *nm_clone;
vlib_node_t **old_nodes_clone;
vlib_node_runtime_t *rt, *old_rt;
+ u64 **c;
vlib_node_t *new_n_clone;
@@ -896,24 +955,29 @@ vlib_worker_thread_node_refork (void)
nm_clone = &vm_clone->node_main;
/* Re-clone error heap */
- u64 *old_counters = vm_clone->error_main.counters;
u64 *old_counters_all_clear = vm_clone->error_main.counters_last_clear;
clib_memcpy_fast (&vm_clone->error_main, &vm->error_main,
sizeof (vm->error_main));
j = vec_len (vm->error_main.counters) - 1;
- /* Switch to the stats segment ... */
- void *oldheap = vlib_stats_push_heap (0);
- vec_validate_aligned (old_counters, j, CLIB_CACHE_LINE_BYTES);
- vm_clone->error_main.counters = old_counters;
- vlib_stats_pop_heap2 (vm_clone->error_main.counters, vm_clone->thread_index,
- oldheap, 0);
+ c = vlib_stats_get_entry_data_pointer (vm->error_main.stats_err_entry_index);
+ vm_clone->error_main.counters = c[vm_clone->thread_index];
vec_validate_aligned (old_counters_all_clear, j, CLIB_CACHE_LINE_BYTES);
vm_clone->error_main.counters_last_clear = old_counters_all_clear;
- nm_clone = &vm_clone->node_main;
+ for (j = 0; j < vec_len (nm_clone->next_frames); j++)
+ {
+ vlib_next_frame_t *nf = &nm_clone->next_frames[j];
+ if ((nf->flags & VLIB_FRAME_IS_ALLOCATED) && nf->frame != NULL)
+ {
+ vlib_frame_t *f = nf->frame;
+ nf->frame = NULL;
+ vlib_frame_free (vm_clone, f);
+ }
+ }
+
vec_free (nm_clone->next_frames);
nm_clone->next_frames = vec_dup_aligned (nm->next_frames,
CLIB_CACHE_LINE_BYTES);
@@ -988,7 +1052,6 @@ vlib_worker_thread_node_refork (void)
vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
{
vlib_node_t *n = vlib_get_node (vm, rt->node_index);
- rt->thread_index = vm_clone->thread_index;
/* copy runtime_data, will be overwritten later for existing rt */
if (n->runtime_data && n->runtime_data_bytes > 0)
clib_memcpy_fast (rt->runtime_data, n->runtime_data,
@@ -1013,13 +1076,15 @@ vlib_worker_thread_node_refork (void)
vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
CLIB_CACHE_LINE_BYTES);
clib_interrupt_resize (
- &nm_clone->interrupts,
+ &nm_clone->input_node_interrupts,
vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]));
+ clib_interrupt_resize (
+ &nm_clone->pre_input_node_interrupts,
+ vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]));
vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
{
vlib_node_t *n = vlib_get_node (vm, rt->node_index);
- rt->thread_index = vm_clone->thread_index;
/* copy runtime_data, will be overwritten later for existing rt */
if (n->runtime_data && n->runtime_data_bytes > 0)
clib_memcpy_fast (rt->runtime_data, n->runtime_data,
@@ -1047,7 +1112,6 @@ vlib_worker_thread_node_refork (void)
vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
{
vlib_node_t *n = vlib_get_node (vm, rt->node_index);
- rt->thread_index = vm_clone->thread_index;
/* copy runtime_data, will be overwritten later for existing rt */
if (n->runtime_data && n->runtime_data_bytes > 0)
clib_memcpy_fast (rt->runtime_data, n->runtime_data,
@@ -1066,6 +1130,7 @@ vlib_worker_thread_node_refork (void)
vec_free (old_rt);
+ vec_free (nm_clone->processes);
nm_clone->processes = vec_dup_aligned (nm->processes,
CLIB_CACHE_LINE_BYTES);
nm_clone->node_by_error = nm->node_by_error;
@@ -1111,6 +1176,7 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input)
tm->sched_policy = ~0;
tm->sched_priority = ~0;
tm->main_lcore = ~0;
+ tm->use_main_core_auto = 0;
tr = tm->next;
@@ -1126,6 +1192,8 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input)
tm->use_pthreads = 1;
else if (unformat (input, "thread-prefix %v", &tm->thread_prefix))
;
+ else if (unformat (input, "main-core auto"))
+ tm->use_main_core_auto = 1;
else if (unformat (input, "main-core %u", &tm->main_lcore))
;
else if (unformat (input, "skip-cores %u", &tm->skip_cores))
@@ -1184,6 +1252,13 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input)
break;
}
+ if (tm->main_lcore != ~0 && tm->use_main_core_auto)
+ {
+ return clib_error_return (
+ 0, "cannot set both 'main-core %u' and 'main-core auto'",
+ tm->main_lcore);
+ }
+
if (tm->sched_priority != ~0)
{
if (tm->sched_policy == SCHED_FIFO || tm->sched_policy == SCHED_RR)
@@ -1404,7 +1479,7 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
* rebuilding the stat segment node clones from the
* stat thread...
*/
- vlib_stat_segment_lock ();
+ vlib_stats_segment_lock ();
/* Do stats elements on main thread */
worker_thread_node_runtime_update_internal ();
@@ -1455,7 +1530,7 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
os_panic ();
}
}
- vlib_stat_segment_unlock ();
+ vlib_stats_segment_unlock ();
}
t_closed_total = now - vm->barrier_epoch;
@@ -1479,6 +1554,57 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm)
vm->clib_time.last_cpu_time, 1 /* leave */ );
}
+static void
+vlib_worker_sync_rpc (void *args)
+{
+ ASSERT (vlib_thread_is_main_w_barrier ());
+ vlib_worker_threads->wait_before_barrier = 0;
+}
+
+void
+vlib_workers_sync (void)
+{
+ if (PREDICT_FALSE (!vlib_num_workers ()))
+ return;
+
+ if (!(*vlib_worker_threads->wait_at_barrier) &&
+ !clib_atomic_swap_rel_n (&vlib_worker_threads->wait_before_barrier, 1))
+ {
+ u32 thread_index = vlib_get_thread_index ();
+ vlib_rpc_call_main_thread (vlib_worker_sync_rpc, (u8 *) &thread_index,
+ sizeof (thread_index));
+ vlib_worker_flush_pending_rpc_requests (vlib_get_main ());
+ }
+
+ /* Wait until main thread asks for barrier */
+ while (!(*vlib_worker_threads->wait_at_barrier))
+ ;
+
+ /* Stop before barrier and make sure all threads are either
+ * at worker barrier or the barrier before it */
+ clib_atomic_fetch_add (&vlib_worker_threads->workers_before_barrier, 1);
+ while (vlib_num_workers () > (*vlib_worker_threads->workers_at_barrier +
+ vlib_worker_threads->workers_before_barrier))
+ ;
+}
+
+void
+vlib_workers_continue (void)
+{
+ if (PREDICT_FALSE (!vlib_num_workers ()))
+ return;
+
+ clib_atomic_fetch_add (&vlib_worker_threads->done_work_before_barrier, 1);
+
+ /* Wait until all workers are done with work before barrier */
+ while (vlib_worker_threads->done_work_before_barrier <
+ vlib_worker_threads->workers_before_barrier)
+ ;
+
+ clib_atomic_fetch_add (&vlib_worker_threads->done_work_before_barrier, -1);
+ clib_atomic_fetch_add (&vlib_worker_threads->workers_before_barrier, -1);
+}
+
/**
* Wait until each of the workers has been once around the track
*/
@@ -1516,16 +1642,26 @@ vlib_worker_wait_one_loop (void)
}
void
+vlib_worker_flush_pending_rpc_requests (vlib_main_t *vm)
+{
+ vlib_main_t *vm_global = vlib_get_first_main ();
+
+ ASSERT (vm != vm_global);
+
+ clib_spinlock_lock_if_init (&vm_global->pending_rpc_lock);
+ vec_append (vm_global->pending_rpc_requests, vm->pending_rpc_requests);
+ vec_reset_length (vm->pending_rpc_requests);
+ clib_spinlock_unlock_if_init (&vm_global->pending_rpc_lock);
+}
+
+void
vlib_worker_thread_fn (void *arg)
{
vlib_global_main_t *vgm = vlib_get_global_main ();
vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
vlib_main_t *vm = vlib_get_main ();
clib_error_t *e;
- vlib_process_finish_switch_stack (vm);
-
ASSERT (vm->thread_index == vlib_get_thread_index ());
vlib_worker_thread_init (w);
@@ -1540,27 +1676,27 @@ vlib_worker_thread_fn (void *arg)
if (e)
clib_error_report (e);
- /* Wait until the dpdk init sequence is complete */
- while (tm->extern_thread_mgmt && tm->worker_thread_release == 0)
- vlib_worker_thread_barrier_check ();
-
vlib_worker_loop (vm);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_THREAD (worker_thread_reg, static) = {
.name = "workers",
.short_name = "wk",
.function = vlib_worker_thread_fn,
};
-/* *INDENT-ON* */
+extern clib_march_fn_registration
+ *vlib_frame_queue_dequeue_with_aux_fn_march_fn_registrations;
+extern clib_march_fn_registration
+ *vlib_frame_queue_dequeue_fn_march_fn_registrations;
u32
vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts)
{
vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_main_t *vm = vlib_get_main ();
vlib_frame_queue_main_t *fqm;
vlib_frame_queue_t *fq;
+ vlib_node_t *node;
int i;
u32 num_threads;
@@ -1572,11 +1708,24 @@ vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts)
vec_add2 (tm->frame_queue_mains, fqm, 1);
+ node = vlib_get_node (vm, fqm->node_index);
+ ASSERT (node);
+ if (node->aux_offset)
+ {
+ fqm->frame_queue_dequeue_fn =
+ CLIB_MARCH_FN_VOID_POINTER (vlib_frame_queue_dequeue_with_aux_fn);
+ }
+ else
+ {
+ fqm->frame_queue_dequeue_fn =
+ CLIB_MARCH_FN_VOID_POINTER (vlib_frame_queue_dequeue_fn);
+ }
+
fqm->node_index = node_index;
fqm->frame_queue_nelts = frame_queue_nelts;
vec_validate (fqm->vlib_frame_queues, tm->n_vlib_mains - 1);
- _vec_len (fqm->vlib_frame_queues) = 0;
+ vec_set_len (fqm->vlib_frame_queues, 0);
for (i = 0; i < tm->n_vlib_mains; i++)
{
fq = vlib_frame_queue_alloc (frame_queue_nelts);
@@ -1586,19 +1735,6 @@ vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts)
return (fqm - tm->frame_queue_mains);
}
-int
-vlib_thread_cb_register (struct vlib_main_t *vm, vlib_thread_callbacks_t * cb)
-{
- vlib_thread_main_t *tm = vlib_get_thread_main ();
-
- if (tm->extern_thread_mgmt)
- return -1;
-
- tm->cb.vlib_launch_thread_cb = cb->vlib_launch_thread_cb;
- tm->extern_thread_mgmt = 1;
- return 0;
-}
-
void
vlib_process_signal_event_mt_helper (vlib_process_signal_event_mt_args_t *
args)
@@ -1625,17 +1761,21 @@ vlib_rpc_call_main_thread (void *callback, u8 * args, u32 arg_size)
clib_error_t *
threads_init (vlib_main_t * vm)
{
+ const vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ if (tm->main_lcore == ~0 && tm->n_vlib_mains > 1)
+ return clib_error_return (0, "Configuration error, a main core must "
+ "be specified when using worker threads");
+
return 0;
}
VLIB_INIT_FUNCTION (threads_init);
-
static clib_error_t *
show_clock_command_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
- int i;
int verbose = 0;
clib_timebase_t _tb, *tb = &_tb;
@@ -1648,36 +1788,29 @@ show_clock_command_fn (vlib_main_t * vm,
verbose, format_clib_timebase_time,
clib_timebase_now (tb));
- if (vlib_get_n_threads () == 1)
- return 0;
-
vlib_cli_output (vm, "Time last barrier release %.9f",
vm->time_last_barrier_release);
- for (i = 1; i < vlib_get_n_threads (); i++)
+ foreach_vlib_main ()
{
- vlib_main_t *ovm = vlib_get_main_by_index (i);
- if (ovm == 0)
- continue;
-
- vlib_cli_output (vm, "%d: %U", i, format_clib_time, &ovm->clib_time,
- verbose);
-
- vlib_cli_output (
- vm, "Thread %d offset %.9f error %.9f", i, ovm->time_offset,
- vm->time_last_barrier_release - ovm->time_last_barrier_release);
+ vlib_cli_output (vm, "%d: %U", this_vlib_main->thread_index,
+ format_clib_time, &this_vlib_main->clib_time, verbose);
+
+ vlib_cli_output (vm, "Thread %d offset %.9f error %.9f",
+ this_vlib_main->thread_index,
+ this_vlib_main->time_offset,
+ vm->time_last_barrier_release -
+ this_vlib_main->time_last_barrier_release);
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (f_command, static) =
{
.path = "show clock",
.short_help = "show clock",
.function = show_clock_command_fn,
};
-/* *INDENT-ON* */
vlib_thread_main_t *
vlib_get_thread_main_not_inline (void)
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index 91727bacc23..3072d0e67dd 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -17,7 +17,11 @@
#include <vlib/main.h>
#include <vppinfra/callback.h>
+#ifdef __linux__
#include <linux/sched.h>
+#elif __FreeBSD__
+#include <sys/sched.h>
+#endif /* __linux__ */
void vlib_set_thread_name (char *name);
@@ -45,22 +49,6 @@ typedef struct vlib_thread_registration_
uword *coremask;
} vlib_thread_registration_t;
-/*
- * Frames have their cpu / vlib_main_t index in the low-order N bits
- * Make VLIB_MAX_CPUS a power-of-two, please...
- */
-
-#ifndef VLIB_MAX_CPUS
-#define VLIB_MAX_CPUS 256
-#endif
-
-#if VLIB_MAX_CPUS > CLIB_MAX_MHEAPS
-#error Please increase number of per-cpu mheaps
-#endif
-
-#define VLIB_CPU_MASK (VLIB_MAX_CPUS - 1) /* 0x3f, max */
-#define VLIB_OFFSET_MASK (~VLIB_CPU_MASK)
-
#define VLIB_LOG2_THREAD_STACK_SIZE (21)
#define VLIB_THREAD_STACK_SIZE (1<<VLIB_LOG2_THREAD_STACK_SIZE)
@@ -75,6 +63,7 @@ typedef struct
CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
u32 buffer_index[VLIB_FRAME_SIZE];
+ u32 aux_data[VLIB_FRAME_SIZE];
}
vlib_frame_queue_elt_t;
@@ -101,6 +90,9 @@ typedef struct
const char *barrier_caller;
const char *barrier_context;
volatile u32 *node_reforks_required;
+ volatile u32 wait_before_barrier;
+ volatile u32 workers_before_barrier;
+ volatile u32 done_work_before_barrier;
long lwp;
int cpu_id;
@@ -130,7 +122,10 @@ typedef struct
}
vlib_frame_queue_t;
-typedef struct
+struct vlib_frame_queue_main_t_;
+typedef u32 (vlib_frame_queue_dequeue_fn_t) (
+ vlib_main_t *vm, struct vlib_frame_queue_main_t_ *fqm);
+typedef struct vlib_frame_queue_main_t_
{
u32 node_index;
u32 frame_queue_nelts;
@@ -140,6 +135,7 @@ typedef struct
/* for frame queue tracing */
frame_queue_trace_t *frame_queue_traces;
frame_queue_nelt_counter_t *frame_queue_histogram;
+ vlib_frame_queue_dequeue_fn_t *frame_queue_dequeue_fn;
} vlib_frame_queue_main_t;
typedef struct
@@ -182,6 +178,10 @@ void vlib_worker_thread_node_refork (void);
* Wait until each of the workers has been once around the track
*/
void vlib_worker_wait_one_loop (void);
+/**
+ * Flush worker's pending rpc requests to main thread's rpc queue
+ */
+void vlib_worker_flush_pending_rpc_requests (vlib_main_t *vm);
static_always_inline uword
vlib_get_thread_index (void)
@@ -218,12 +218,20 @@ __foreach_vlib_main_helper (vlib_main_t *ii, vlib_main_t **p)
__foreach_vlib_main_helper (ii, &this_vlib_main); ii++) \
if (this_vlib_main)
-#define foreach_sched_policy \
- _(SCHED_OTHER, OTHER, "other") \
- _(SCHED_BATCH, BATCH, "batch") \
- _(SCHED_IDLE, IDLE, "idle") \
- _(SCHED_FIFO, FIFO, "fifo") \
- _(SCHED_RR, RR, "rr")
+#define foreach_sched_policy_posix \
+ _ (SCHED_OTHER, OTHER, "other") \
+ _ (SCHED_FIFO, FIFO, "fifo") \
+ _ (SCHED_RR, RR, "rr")
+#define foreach_sched_policy_linux \
+ _ (SCHED_BATCH, BATCH, "batch") \
+ _ (SCHED_IDLE, IDLE, "idle")
+
+#ifdef __linux__
+#define foreach_sched_policy \
+ foreach_sched_policy_posix foreach_sched_policy_linux
+#else
+#define foreach_sched_policy foreach_sched_policy_posix
+#endif /* __linux__ */
typedef enum
{
@@ -235,13 +243,6 @@ typedef enum
typedef struct
{
- clib_error_t *(*vlib_launch_thread_cb) (void *fp, vlib_worker_thread_t * w,
- unsigned cpu_id);
- clib_error_t *(*vlib_thread_set_lcore_cb) (u32 thread, u16 cpu);
-} vlib_thread_callbacks_t;
-
-typedef struct
-{
/* Link list of registrations, built by constructors */
vlib_thread_registration_t *next;
@@ -252,12 +253,10 @@ typedef struct
vlib_worker_thread_t *worker_threads;
- /*
- * Launch all threads as pthreads,
- * not eal_rte_launch (strict affinity) threads
- */
int use_pthreads;
+ int use_main_core_auto;
+
/* Number of vlib_main / vnet_main clones */
u32 n_vlib_mains;
@@ -285,6 +284,9 @@ typedef struct
/* Bitmap of available CPU sockets (NUMA nodes) */
uword *cpu_socket_bitmap;
+ /* Bitmap of CPU affinity for VPP process */
+ uword *cpu_affinity_bitmap;
+
/* Worker handoff queues */
vlib_frame_queue_main_t *frame_queue_mains;
@@ -297,10 +299,6 @@ typedef struct
/* scheduling policy priority */
u32 sched_priority;
- /* callbacks */
- vlib_thread_callbacks_t cb;
- int extern_thread_mgmt;
-
/* NUMA-bound heap size */
uword numa_heap_size;
@@ -370,12 +368,10 @@ vlib_worker_thread_barrier_check (void)
if (PREDICT_FALSE (vlib_worker_threads->barrier_elog_enabled))
{
vlib_worker_thread_t *w = vlib_worker_threads + thread_index;
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) = {
.format = "barrier-wait-thread-%d",
.format_args = "i4",
};
- /* *INDENT-ON* */
struct
{
@@ -419,12 +415,10 @@ vlib_worker_thread_barrier_check (void)
{
t = vlib_time_now (vm) - t;
vlib_worker_thread_t *w = vlib_worker_threads + thread_index;
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) = {
.format = "barrier-refork-thread-%d",
.format_args = "i4",
};
- /* *INDENT-ON* */
struct
{
@@ -446,12 +440,10 @@ vlib_worker_thread_barrier_check (void)
{
t = vlib_time_now (vm) - t;
vlib_worker_thread_t *w = vlib_worker_threads + thread_index;
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) = {
.format = "barrier-released-thread-%d: %dus",
.format_args = "i4i4",
};
- /* *INDENT-ON* */
struct
{
@@ -490,8 +482,6 @@ vlib_thread_is_main_w_barrier (void)
}
u8 *vlib_thread_stack_init (uword thread_index);
-int vlib_thread_cb_register (struct vlib_main_t *vm,
- vlib_thread_callbacks_t * cb);
extern void *rpc_call_main_thread_cb_fn;
void
@@ -501,6 +491,17 @@ void vlib_rpc_call_main_thread (void *function, u8 * args, u32 size);
void vlib_get_thread_core_numa (vlib_worker_thread_t * w, unsigned cpu_id);
vlib_thread_main_t *vlib_get_thread_main_not_inline (void);
+/**
+ * Force workers sync from within worker
+ *
+ * Must be paired with @ref vlib_workers_continue
+ */
+void vlib_workers_sync (void);
+/**
+ * Release barrier after workers sync
+ */
+void vlib_workers_continue (void);
+
#endif /* included_vlib_threads_h */
/*
diff --git a/src/vlib/threads_cli.c b/src/vlib/threads_cli.c
index d14e9c50e27..2872a025d66 100644
--- a/src/vlib/threads_cli.c
+++ b/src/vlib/threads_cli.c
@@ -43,6 +43,7 @@ static clib_error_t *
show_threads_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
+ const vlib_thread_main_t *tm = vlib_get_thread_main ();
vlib_worker_thread_t *w;
int i;
@@ -64,7 +65,7 @@ show_threads_fn (vlib_main_t * vm,
line = format (line, "%-25U", format_sched_policy_and_priority, w->lwp);
int cpu_id = w->cpu_id;
- if (cpu_id > -1)
+ if (cpu_id > -1 && tm->main_lcore != ~0)
{
int core_id = w->core_id;
int numa_id = w->numa_id;
@@ -84,13 +85,11 @@ show_threads_fn (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_threads_command, static) = {
.path = "show threads",
.short_help = "Show threads",
.function = show_threads_fn,
};
-/* *INDENT-ON* */
/*
* Trigger threads to grab frame queue trace data
@@ -180,14 +179,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_trace_frame_queue,static) = {
.path = "trace frame-queue",
.short_help = "trace frame-queue (on|off)",
.function = trace_frame_queue,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
@@ -362,21 +359,17 @@ show_frame_queue_histogram (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_show_frame_queue_trace,static) = {
.path = "show frame-queue",
.short_help = "show frame-queue trace",
.function = show_frame_queue_trace,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_show_frame_queue_histogram,static) = {
.path = "show frame-queue histogram",
.short_help = "show frame-queue histogram",
.function = show_frame_queue_histogram,
};
-/* *INDENT-ON* */
/*
@@ -445,13 +438,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_test_frame_queue_nelts,static) = {
.path = "test frame-queue nelts",
.short_help = "test frame-queue nelts (4,8,16,32)",
.function = test_frame_queue_nelts,
};
-/* *INDENT-ON* */
/*
@@ -524,13 +515,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_test_frame_queue_threshold,static) = {
.path = "test frame-queue threshold",
.short_help = "test frame-queue threshold N (0=no limit)",
.function = test_frame_queue_threshold,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vlib/time.c b/src/vlib/time.c
new file mode 100644
index 00000000000..cfe45a0643c
--- /dev/null
+++ b/src/vlib/time.c
@@ -0,0 +1,84 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+/* Virtual time allows to adjust VPP clock by arbitrary amount of time.
+ * It is done such that the order of timer expirations is maintained,
+ * and if a timer expiration callback reschedule another timer, this
+ * timer will also properly expire in the right order. IOW, the order
+ * of events is preserved.
+ *
+ * When moving time forward, each VPP thread (main and workers) runs an
+ * instance of the input node 'virtual-time-input' below. This node is
+ * responsible of advancing its own VPP thread clock to the next timer
+ * expiration. IOW each thread will move its clock independently one
+ * timer at a time. This also means that while moving time forward, each
+ * thread might not have the exact same view of what 'now' means. Once
+ * the main thread has finished moving its time forward, the worker thread
+ * barrier will ensure the timer between main and workers is synchronized.
+ *
+ * Using an input node in poll-mode has several advantages, including
+ * preventing 'unix-epoll-input' to sleep (as it will not sleep if at
+ * least one polling node is active). */
+
+#include <vlib/vlib.h>
+#include <vlib/time.h>
+
+static f64 vlib_time_virtual_stop;
+
+static uword
+vlib_time_virtual_input (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ const f64 next = vlib_time_get_next_timer (vm);
+ /* each thread will advance its own time. In case a thread is much faster
+ * than another, we must make sure it does not run away... */
+ if (vlib_time_now (vm) + next > vlib_time_virtual_stop)
+ vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_DISABLED);
+ else
+ vlib_time_adjust (vm, next);
+ return 0;
+}
+
+VLIB_REGISTER_NODE (vlib_time_virtual_input_node) = {
+ .function = vlib_time_virtual_input,
+ .type = VLIB_NODE_TYPE_INPUT,
+ .name = "virtual-time-input",
+ .state = VLIB_NODE_STATE_DISABLED,
+};
+
+static clib_error_t *
+vlib_time_virtual_adjust_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ f64 val;
+
+ if (!unformat (input, "%f", &val))
+ return clib_error_create ("unknown input `%U'", format_unformat_error,
+ input);
+
+ vlib_time_virtual_stop = vlib_time_now (vm) + val;
+
+ foreach_vlib_main ()
+ vlib_node_set_state (this_vlib_main, vlib_time_virtual_input_node.index,
+ VLIB_NODE_STATE_POLLING);
+
+ vlib_worker_thread_barrier_release (vm);
+ while ((val = vlib_process_wait_for_event_or_clock (vm, val)) >= 0.001)
+ ;
+ /* this barrier sync will resynchronize all the clocks, so even if the main
+ * thread was faster than some workers, this will make sure the workers will
+ * disable their virtual-time-input node on their next iteration (as stop
+ * time is reached). If a worker is too slow, there is a slight chance
+ * several of its timers expire at the same time at this point. Time will
+ * tell... */
+ vlib_worker_thread_barrier_sync (vm);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (vlib_time_virtual_command) = {
+ .path = "set clock adjust",
+ .short_help = "set clock adjust <nn>",
+ .function = vlib_time_virtual_adjust_command_fn,
+};
diff --git a/src/vlib/time.h b/src/vlib/time.h
new file mode 100644
index 00000000000..61873bb2ef3
--- /dev/null
+++ b/src/vlib/time.h
@@ -0,0 +1,26 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifndef included_vlib_time_h
+#define included_vlib_time_h
+
+#include <vlib/vlib.h>
+#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h>
+
+static inline f64
+vlib_time_get_next_timer (vlib_main_t *vm)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ TWT (tw_timer_wheel) *wheel = nm->timing_wheel;
+ return TW (tw_timer_first_expires_in_ticks) (wheel) * wheel->timer_interval;
+}
+
+static inline void
+vlib_time_adjust (vlib_main_t *vm, f64 offset)
+{
+ vm->time_offset += offset;
+}
+
+#endif /* included_vlib_time_h */
diff --git a/src/vlib/trace.c b/src/vlib/trace.c
index 4bbd9505b71..fa085387e4b 100644
--- a/src/vlib/trace.c
+++ b/src/vlib/trace.c
@@ -173,12 +173,10 @@ format_vlib_trace (u8 * s, va_list * va)
}
/* Root of all trace cli commands. */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (trace_cli_command,static) = {
.path = "trace",
.short_help = "Packet tracer commands",
};
-/* *INDENT-ON* */
int
trace_time_cmp (void *a1, void *a2)
@@ -256,7 +254,6 @@ trace_apply_filter (vlib_main_t * vm)
* of any N traces.
*/
n_accepted = 0;
- /* *INDENT-OFF* */
pool_foreach (h, tm->trace_buffer_pool)
{
accept = filter_accept(tm, h[0]);
@@ -266,13 +263,12 @@ trace_apply_filter (vlib_main_t * vm)
else
n_accepted++;
}
- /* *INDENT-ON* */
/* remove all traces that we don't want to keep */
for (index = 0; index < vec_len (traces_to_remove); index++)
{
trace_index = traces_to_remove[index] - tm->trace_buffer_pool;
- _vec_len (tm->trace_buffer_pool[trace_index]) = 0;
+ vec_set_len (tm->trace_buffer_pool[trace_index], 0);
pool_put_index (tm->trace_buffer_pool, trace_index);
}
@@ -357,13 +353,11 @@ cli_show_trace_buffer (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_trace_cli,static) = {
.path = "show trace",
.short_help = "Show trace buffer [max COUNT]",
.function = cli_show_trace_buffer,
};
-/* *INDENT-ON* */
int vlib_enable_disable_pkt_trace_filter (int enable) __attribute__ ((weak));
@@ -463,13 +457,6 @@ cli_add_trace_buffer (vlib_main_t * vm,
goto done;
}
- u32 filter_table = classify_get_trace_chain ();
- if (filter && filter_table == ~0)
- {
- error = clib_error_create ("No packet trace filter configured...");
- goto done;
- }
-
trace_update_capture_options (add, node_index, filter, verbose);
done:
@@ -478,13 +465,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (add_trace_cli,static) = {
.path = "trace add",
.short_help = "trace add <input-graph-node> <add'l-pkts-for-node-> [filter] [verbose]",
.function = cli_add_trace_buffer,
};
-/* *INDENT-ON* */
/*
* Configure a filter for packet traces.
@@ -582,13 +567,11 @@ cli_filter_trace (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (filter_trace_cli,static) = {
.path = "trace filter",
.short_help = "trace filter none | [include|exclude] NODE COUNT",
.function = cli_filter_trace,
};
-/* *INDENT-ON* */
static clib_error_t *
cli_clear_trace_buffer (vlib_main_t * vm,
@@ -598,13 +581,11 @@ cli_clear_trace_buffer (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_trace_cli,static) = {
.path = "clear trace",
.short_help = "Clear trace buffer and free memory",
.function = cli_clear_trace_buffer,
};
-/* *INDENT-ON* */
/* Placeholder function to get us linked in. */
void
@@ -612,18 +593,6 @@ vlib_trace_cli_reference (void)
{
}
-int
-vnet_is_packet_traced (vlib_buffer_t * b,
- u32 classify_table_index, int func)
-__attribute__ ((weak));
-
-int
-vnet_is_packet_traced (vlib_buffer_t * b, u32 classify_table_index, int func)
-{
- clib_warning ("BUG: STUB called");
- return 1;
-}
-
void *
vlib_add_trace (vlib_main_t * vm,
vlib_node_runtime_t * r, vlib_buffer_t * b, u32 n_data_bytes)
@@ -631,8 +600,148 @@ vlib_add_trace (vlib_main_t * vm,
return vlib_add_trace_inline (vm, r, b, n_data_bytes);
}
+vlib_is_packet_traced_fn_t *
+vlib_is_packet_traced_function_from_name (const char *name)
+{
+ vlib_trace_filter_function_registration_t *reg =
+ vlib_trace_filter_main.trace_filter_registration;
+ while (reg)
+ {
+ if (clib_strcmp (reg->name, name) == 0)
+ break;
+ reg = reg->next;
+ }
+ if (!reg)
+ return 0;
+ return reg->function;
+}
+
+vlib_is_packet_traced_fn_t *
+vlib_is_packet_traced_default_function ()
+{
+ vlib_trace_filter_function_registration_t *reg =
+ vlib_trace_filter_main.trace_filter_registration;
+ vlib_trace_filter_function_registration_t *tmp_reg = reg;
+ while (reg)
+ {
+ if (reg->priority > tmp_reg->priority)
+ tmp_reg = reg;
+ reg = reg->next;
+ }
+ return tmp_reg->function;
+}
+
+static clib_error_t *
+vlib_trace_filter_function_init (vlib_main_t *vm)
+{
+ vlib_is_packet_traced_fn_t *default_fn =
+ vlib_is_packet_traced_default_function ();
+ foreach_vlib_main ()
+ {
+ vlib_trace_main_t *tm = &this_vlib_main->trace_main;
+ tm->current_trace_filter_function = default_fn;
+ }
+ return 0;
+}
+
+vlib_trace_filter_main_t vlib_trace_filter_main;
+
+VLIB_INIT_FUNCTION (vlib_trace_filter_function_init);
+
+static clib_error_t *
+show_trace_filter_function (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vlib_trace_filter_main_t *tfm = &vlib_trace_filter_main;
+ vlib_trace_main_t *tm = &vm->trace_main;
+ vlib_is_packet_traced_fn_t *current_trace_filter_fn =
+ tm->current_trace_filter_function;
+ vlib_trace_filter_function_registration_t *reg =
+ tfm->trace_filter_registration;
+
+ while (reg)
+ {
+ vlib_cli_output (vm, "%sname:%s description: %s priority: %u",
+ reg->function == current_trace_filter_fn ? "(*) " : "",
+ reg->name, reg->description, reg->priority);
+ reg = reg->next;
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_trace_filter_function_cli, static) = {
+ .path = "show trace filter function",
+ .short_help = "show trace filter function",
+ .function = show_trace_filter_function,
+};
+
+uword
+unformat_vlib_trace_filter_function (unformat_input_t *input, va_list *args)
+{
+ vlib_is_packet_traced_fn_t **res =
+ va_arg (*args, vlib_is_packet_traced_fn_t **);
+ vlib_trace_filter_main_t *tfm = &vlib_trace_filter_main;
+
+ vlib_trace_filter_function_registration_t *reg =
+ tfm->trace_filter_registration;
+ while (reg)
+ {
+ if (unformat (input, reg->name))
+ {
+ *res = reg->function;
+ return 1;
+ }
+ reg = reg->next;
+ }
+ return 0;
+}
+
+void
+vlib_set_trace_filter_function (vlib_is_packet_traced_fn_t *x)
+{
+ foreach_vlib_main ()
+ {
+ this_vlib_main->trace_main.current_trace_filter_function = x;
+ }
+}
+
+static clib_error_t *
+set_trace_filter_function (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vlib_is_packet_traced_fn_t *res = 0;
+ clib_error_t *error = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != (uword) UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_vlib_trace_filter_function,
+ &res))
+ ;
+ else
+ {
+ error = clib_error_create (
+ "expected valid trace filter function, got `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+ vlib_set_trace_filter_function (res);
+
+done:
+ unformat_free (line_input);
+ return error;
+}
+VLIB_CLI_COMMAND (set_trace_filter_function_cli, static) = {
+ .path = "set trace filter function",
+ .short_help = "set trace filter function <func_name>",
+ .function = set_trace_filter_function,
+};
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vlib/trace.h b/src/vlib/trace.h
index d045271f853..196c691ece6 100644
--- a/src/vlib/trace.h
+++ b/src/vlib/trace.h
@@ -80,6 +80,17 @@ typedef void *(vlib_add_trace_callback_t) (struct vlib_main_t *,
struct vlib_buffer_t * b,
u32 n_data_bytes);
+typedef int (vlib_is_packet_traced_fn_t) (vlib_buffer_t *b,
+ u32 classify_table_index, int func);
+typedef struct vlib_trace_filter_function_registration
+{
+ const char *name;
+ const char *description;
+ int priority;
+ vlib_is_packet_traced_fn_t *function;
+ struct vlib_trace_filter_function_registration *next;
+} vlib_trace_filter_function_registration_t;
+
typedef struct
{
/* Pool of trace buffers. */
@@ -109,10 +120,33 @@ typedef struct
/* a callback to enable customized addition of a new trace */
vlib_add_trace_callback_t *add_trace_callback;
+ vlib_is_packet_traced_fn_t *current_trace_filter_function;
+
} vlib_trace_main_t;
format_function_t format_vlib_trace;
-
+typedef struct
+{
+ vlib_trace_filter_function_registration_t *trace_filter_registration;
+} vlib_trace_filter_main_t;
+
+extern vlib_trace_filter_main_t vlib_trace_filter_main;
+#define VLIB_REGISTER_TRACE_FILTER_FUNCTION(x, ...) \
+ __VA_ARGS__ vlib_trace_filter_function_registration_t \
+ __vlib_trace_filter_function_##x; \
+ static void __clib_constructor \
+ __vlib_trace_filter_function_registration_##x (void) \
+ { \
+ vlib_trace_filter_main_t *tfm = &vlib_trace_filter_main; \
+ __vlib_trace_filter_function_##x.next = tfm->trace_filter_registration; \
+ tfm->trace_filter_registration = &__vlib_trace_filter_function_##x; \
+ } \
+ __VA_ARGS__ vlib_trace_filter_function_registration_t \
+ __vlib_trace_filter_function_##x
+
+vlib_is_packet_traced_fn_t *
+vlib_is_packet_traced_function_from_name (const char *name);
+vlib_is_packet_traced_fn_t *vlib_is_packet_traced_default_function ();
void trace_apply_filter (struct vlib_main_t *vm);
int trace_time_cmp (void *a1, void *a2);
void vlib_trace_stop_and_clear (void);
@@ -121,6 +155,9 @@ void trace_update_capture_options (u32 add, u32 node_index,
u32 filter, u8 verbose);
void trace_filter_set (u32 node_index, u32 flag, u32 count);
void clear_trace_buffer (void);
+void vlib_set_trace_filter_function (vlib_is_packet_traced_fn_t *x);
+uword unformat_vlib_trace_filter_function (unformat_input_t *input,
+ va_list *args);
#endif /* included_vlib_trace_h */
diff --git a/src/vlib/trace_funcs.h b/src/vlib/trace_funcs.h
index 9313d41eb7d..9b45346b467 100644
--- a/src/vlib/trace_funcs.h
+++ b/src/vlib/trace_funcs.h
@@ -125,7 +125,7 @@ vlib_free_trace (vlib_main_t * vm, vlib_buffer_t * b)
vlib_trace_main_t *tm = &vm->trace_main;
u32 trace_index = vlib_buffer_get_trace_index (b);
vlib_validate_trace (tm, b);
- _vec_len (tm->trace_buffer_pool[trace_index]) = 0;
+ vec_set_len (tm->trace_buffer_pool[trace_index], 0);
pool_put_index (tm->trace_buffer_pool, trace_index);
}
@@ -138,10 +138,7 @@ vlib_trace_next_frame (vlib_main_t * vm,
nf->flags |= VLIB_FRAME_TRACE;
}
-void trace_apply_filter (vlib_main_t * vm);
-int vnet_is_packet_traced (vlib_buffer_t * b,
- u32 classify_table_index, int func);
-
+void trace_apply_filter (vlib_main_t *vm);
/*
* Mark buffer as traced and allocate trace buffer.
@@ -164,7 +161,7 @@ vlib_trace_buffer (vlib_main_t * vm,
if (PREDICT_FALSE (vlib_global_main.trace_filter.trace_filter_enable))
{
/* See if we're supposed to trace this packet... */
- if (vnet_is_packet_traced (
+ if (tm->current_trace_filter_function (
b, vlib_global_main.trace_filter.classify_table_index,
0 /* full classify */) != 1)
return 0;
diff --git a/src/vlib/unix/cli.c b/src/vlib/unix/cli.c
index 44ec11fdb35..90cf61d811d 100644
--- a/src/vlib/unix/cli.c
+++ b/src/vlib/unix/cli.c
@@ -62,6 +62,7 @@
#include <netinet/tcp.h>
#include <math.h>
#include <vppinfra/macros.h>
+#include <vppinfra/format_table.h>
/** ANSI escape code. */
#define ESC "\x1b"
@@ -244,6 +245,9 @@ typedef struct
/** Macro tables for this session */
clib_macro_main_t macro_main;
+
+ /** Session name */
+ u8 *name;
} unix_cli_file_t;
/** Resets the pager buffer and other data.
@@ -275,6 +279,7 @@ unix_cli_file_free (unix_cli_file_t * f)
{
vec_free (f->output_vector);
vec_free (f->input_vector);
+ vec_free (f->name);
unix_cli_pager_reset (f);
}
@@ -1312,6 +1317,10 @@ unix_cli_new_session_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
/* Add an identifier to the new session list */
unix_cli_new_session_t ns;
+ /* Check the connection didn't close already */
+ if (pool_is_free_index (cm->cli_file_pool, event_data[0]))
+ break;
+
ns.cf_index = event_data[0];
ns.deadline = vlib_time_now (vm) + 1.0;
@@ -1606,7 +1615,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm,
/* Delete the desired text from the command */
memmove (cf->current_command, cf->current_command + j, delta);
- _vec_len (cf->current_command) = delta;
+ vec_set_len (cf->current_command, delta);
/* Print the new contents */
unix_vlib_cli_output_cooked (cf, uf, cf->current_command, delta);
@@ -1631,7 +1640,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm,
unix_vlib_cli_output_cursor_left (cf, uf);
/* Truncate the line at the cursor */
- _vec_len (cf->current_command) = cf->cursor;
+ vec_set_len (cf->current_command, cf->cursor);
cf->search_mode = 0;
break;
@@ -1673,7 +1682,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm,
unix_vlib_cli_output_cooked (cf, uf, (u8 *) " ", 1);
for (; (cf->current_command + cf->cursor) > save; cf->cursor--)
unix_vlib_cli_output_cursor_left (cf, uf);
- _vec_len (cf->current_command) -= delta;
+ vec_dec_len (cf->current_command, delta);
}
}
cf->search_mode = 0;
@@ -1730,13 +1739,13 @@ unix_cli_line_process_one (unix_cli_main_t * cm,
if (cf->excursion == vec_len (cf->command_history))
{
/* down-arrowed to last entry - want a blank line */
- _vec_len (cf->current_command) = 0;
+ vec_set_len (cf->current_command, 0);
}
else if (cf->excursion < 0)
{
/* up-arrowed over the start to the end, want a blank line */
cf->excursion = vec_len (cf->command_history);
- _vec_len (cf->current_command) = 0;
+ vec_set_len (cf->current_command, 0);
}
else
{
@@ -1749,7 +1758,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm,
vec_validate (cf->current_command, vec_len (prev) - 1);
clib_memcpy (cf->current_command, prev, vec_len (prev));
- _vec_len (cf->current_command) = vec_len (prev);
+ vec_set_len (cf->current_command, vec_len (prev));
unix_vlib_cli_output_cooked (cf, uf, cf->current_command,
vec_len (cf->current_command));
}
@@ -1836,7 +1845,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm,
cf->cursor++;
unix_vlib_cli_output_cursor_left (cf, uf);
cf->cursor--;
- _vec_len (cf->current_command)--;
+ vec_dec_len (cf->current_command, 1);
}
else if (cf->cursor > 0)
{
@@ -1844,7 +1853,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm,
j = vec_len (cf->current_command) - cf->cursor;
memmove (cf->current_command + cf->cursor - 1,
cf->current_command + cf->cursor, j);
- _vec_len (cf->current_command)--;
+ vec_dec_len (cf->current_command, 1);
/* redraw the rest of the line */
unix_vlib_cli_output_cursor_left (cf, uf);
@@ -1880,7 +1889,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm,
j = vec_len (cf->current_command) - cf->cursor - 1;
memmove (cf->current_command + cf->cursor,
cf->current_command + cf->cursor + 1, j);
- _vec_len (cf->current_command)--;
+ vec_dec_len (cf->current_command, 1);
/* redraw the rest of the line */
unix_vlib_cli_output_cooked (cf, uf,
cf->current_command + cf->cursor,
@@ -1952,7 +1961,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm,
vec_resize (save, vec_len (cf->current_command) - cf->cursor);
clib_memcpy (save, cf->current_command + cf->cursor,
vec_len (cf->current_command) - cf->cursor);
- _vec_len (cf->current_command) = cf->cursor;
+ vec_set_len (cf->current_command, cf->cursor);
}
else
{
@@ -1974,7 +1983,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm,
cf->cursor--;
j--;
}
- _vec_len (cf->current_command) = j;
+ vec_set_len (cf->current_command, j);
/* replace it with the newly expanded command */
vec_append (cf->current_command, completed);
@@ -2381,7 +2390,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm,
vec_validate (cf->current_command, vec_len (item) - 1);
clib_memcpy (cf->current_command, item, vec_len (item));
- _vec_len (cf->current_command) = vec_len (item);
+ vec_set_len (cf->current_command, vec_len (item));
unix_vlib_cli_output_cooked (cf, uf, cf->current_command,
vec_len (cf->current_command));
@@ -2572,9 +2581,8 @@ more:
{
static u8 *lv;
vec_reset_length (lv);
- lv = format (lv, "%U[%d]: %v",
- format_timeval, 0 /* current bat-time */ ,
- 0 /* current bat-format */ ,
+ lv = format (lv, "%U[%d]: %v", format_timeval,
+ NULL /* current bat-format */, 0 /* current bat-time */,
cli_file_index, cf->current_command);
if ((vec_len (cf->current_command) > 0) &&
(cf->current_command[vec_len (cf->current_command) - 1] != '\n'))
@@ -2595,7 +2603,7 @@ more:
0 /* level */ ,
8 /* max_level */ );
/* Macro processor NULL terminates the return */
- _vec_len (expanded) -= 1;
+ vec_dec_len (expanded, 1);
vec_reset_length (cf->current_command);
vec_append (cf->current_command, expanded);
vec_free (expanded);
@@ -2687,6 +2695,17 @@ unix_cli_kill (unix_cli_main_t * cm, uword cli_file_index)
if (pool_is_free_index (cm->cli_file_pool, cli_file_index))
return;
+ vec_foreach_index (i, cm->new_sessions)
+ {
+ unix_cli_new_session_t *ns = vec_elt_at_index (cm->new_sessions, i);
+
+ if (ns->cf_index == cli_file_index)
+ {
+ ns->cf_index = ~0;
+ break;
+ }
+ }
+
cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
@@ -2739,7 +2758,7 @@ unix_cli_process (vlib_main_t * vm,
}
if (data)
- _vec_len (data) = 0;
+ vec_set_len (data, 0);
}
done:
@@ -2821,7 +2840,7 @@ unix_cli_read_ready (clib_file_t * uf)
return clib_error_return_unix (0, "read");
n_read = n < 0 ? 0 : n;
- _vec_len (cf->input_vector) = l + n_read;
+ vec_set_len (cf->input_vector, l + n_read);
}
if (!(n < 0))
@@ -2863,47 +2882,16 @@ unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd)
{
unix_main_t *um = &unix_main;
clib_file_main_t *fm = &file_main;
- vlib_node_main_t *nm = &vlib_get_main ()->node_main;
unix_cli_file_t *cf;
clib_file_t template = { 0 };
vlib_main_t *vm = um->vlib_main;
vlib_node_t *n = 0;
- u8 *file_desc = 0;
-
- file_desc = format (0, "%s", name);
-
- name = (char *) format (0, "unix-cli-%s", name);
if (vec_len (cm->unused_cli_process_node_indices) > 0)
{
- uword l = vec_len (cm->unused_cli_process_node_indices);
- int i;
- vlib_main_t *this_vlib_main;
- u8 *old_name = 0;
-
- /*
- * Nodes are bulk-copied, so node name pointers are shared.
- * Find the cli node in all graph replicas, and give all of them
- * the same new name.
- * Then, throw away the old shared name-vector.
- */
- for (i = 0; i < vlib_get_n_threads (); i++)
- {
- this_vlib_main = vlib_get_main_by_index (i);
- if (this_vlib_main == 0)
- continue;
- n = vlib_get_node (this_vlib_main,
- cm->unused_cli_process_node_indices[l - 1]);
- old_name = n->name;
- n->name = (u8 *) name;
- }
- ASSERT (old_name);
- hash_unset (nm->node_by_name, old_name);
- hash_set (nm->node_by_name, name, n->index);
- vec_free (old_name);
+ n = vlib_get_node (vm, vec_pop (cm->unused_cli_process_node_indices));
vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING);
- _vec_len (cm->unused_cli_process_node_indices) = l - 1;
}
else
{
@@ -2912,21 +2900,18 @@ unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd)
.type = VLIB_NODE_TYPE_PROCESS,
.process_log2_n_stack_bytes = 18,
};
-
- r.name = name;
+ static u32 count = 0;
vlib_worker_thread_barrier_sync (vm);
- vlib_register_node (vm, &r);
- vec_free (name);
+ vlib_register_node (vm, &r, "unix-cli-process-%u", count++);
n = vlib_get_node (vm, r.index);
vlib_worker_thread_node_runtime_update ();
vlib_worker_thread_barrier_release (vm);
}
- pool_get (cm->cli_file_pool, cf);
- clib_memset (cf, 0, sizeof (*cf));
+ pool_get_zero (cm->cli_file_pool, cf);
clib_macro_init (&cf->macro_main);
template.read_function = unix_cli_read_ready;
@@ -2934,14 +2919,15 @@ unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd)
template.error_function = unix_cli_error_detected;
template.file_descriptor = fd;
template.private_data = cf - cm->cli_file_pool;
- template.description = file_desc;
+ template.description = format (0, "%s", name);
+ cf->name = format (0, "unix-cli-%s", name);
cf->process_node_index = n->index;
cf->clib_file_index = clib_file_add (fm, &template);
cf->output_vector = 0;
cf->input_vector = 0;
vec_validate (cf->current_command, 0);
- _vec_len (cf->current_command) = 0;
+ vec_set_len (cf->current_command, 0);
vlib_start_process (vm, n->runtime_index);
@@ -3331,21 +3317,17 @@ unix_cli_quit (vlib_main_t * vm,
* If VPP is running in @em interactive mode and this is the console session
* (that is, the session on @c stdin) then this will also terminate VPP.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (unix_cli_quit_command, static) = {
.path = "quit",
.short_help = "Exit CLI",
.function = unix_cli_quit,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (unix_cli_q_command, static) = {
.path = "q",
.short_help = "Exit CLI",
.function = unix_cli_quit,
};
-/* *INDENT-ON* */
/** CLI command to execute a VPP command script. */
static clib_error_t *
@@ -3353,9 +3335,10 @@ unix_cli_exec (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
char *file_name;
- int fd;
- unformat_input_t sub_input;
+ int fd, rv = 0;
+ unformat_input_t sub_input, in;
clib_error_t *error;
+ clib_macro_main_t *mm = 0;
unix_cli_main_t *cm = &unix_cli_main;
unix_cli_file_t *cf;
u8 *file_data = 0;
@@ -3392,8 +3375,14 @@ unix_cli_exec (vlib_main_t * vm,
goto done;
}
+ if (s.st_size < 1)
+ {
+ error = clib_error_return (0, "empty file `%s'", file_name);
+ goto done;
+ }
+
/* Read the file */
- vec_validate (file_data, s.st_size);
+ vec_validate (file_data, s.st_size - 1);
if (read (fd, file_data, s.st_size) != s.st_size)
{
@@ -3403,42 +3392,43 @@ unix_cli_exec (vlib_main_t * vm,
goto done;
}
- /* The macro expander expects a c string... */
- vec_add1 (file_data, 0);
-
unformat_init_vector (&sub_input, file_data);
- /* Run the file contents through the macro processor */
- if (vec_len (sub_input.buffer) > 1)
+ /* Initial config process? Use the global macro table. */
+ if (pool_is_free_index (cm->cli_file_pool, cm->current_input_file_index))
+ mm = &cm->macro_main;
+ else
{
- u8 *expanded;
- clib_macro_main_t *mm = 0;
+ /* Otherwise, use the per-cli-process macro table */
+ cf = pool_elt_at_index (cm->cli_file_pool, cm->current_input_file_index);
+ mm = &cf->macro_main;
+ }
- /* Initial config process? Use the global macro table. */
- if (pool_is_free_index
- (cm->cli_file_pool, cm->current_input_file_index))
- mm = &cm->macro_main;
- else
+ while (rv == 0 && unformat_user (&sub_input, unformat_vlib_cli_line, &in))
+ {
+ /* Run the file contents through the macro processor */
+ if (vec_len (in.buffer) > 1)
{
- /* Otherwise, use the per-cli-process macro table */
- cf = pool_elt_at_index (cm->cli_file_pool,
- cm->current_input_file_index);
- mm = &cf->macro_main;
+ u8 *expanded;
+
+ /* The macro expander expects a c string... */
+ vec_add1 (in.buffer, 0);
+
+ expanded =
+ (u8 *) clib_macro_eval (mm, (i8 *) in.buffer, 1 /* complain */,
+ 0 /* level */, 8 /* max_level */);
+ /* Macro processor NULL terminates the return */
+ vec_dec_len (expanded, 1);
+ vec_reset_length (in.buffer);
+ vec_append (in.buffer, expanded);
+ vec_free (expanded);
}
- expanded = (u8 *) clib_macro_eval (mm,
- (i8 *) sub_input.buffer,
- 1 /* complain */ ,
- 0 /* level */ ,
- 8 /* max_level */ );
- /* Macro processor NULL terminates the return */
- _vec_len (expanded) -= 1;
- vec_reset_length (sub_input.buffer);
- vec_append (sub_input.buffer, expanded);
- vec_free (expanded);
+ if ((rv = vlib_cli_input (vm, &in, 0, 0)) != 0)
+ error = clib_error_return (0, "CLI line error: %U",
+ format_unformat_error, &in);
+ unformat_free (&in);
}
-
- vlib_cli_input (vm, &sub_input, 0, 0);
unformat_free (&sub_input);
done:
@@ -3451,7 +3441,7 @@ done:
/*?
* Executes a sequence of CLI commands which are read from a file. If
- * a command is unrecognised or otherwise invalid then the usual CLI
+ * a command is unrecognized or otherwise invalid then the usual CLI
* feedback will be generated, however execution of subsequent commands
* from the file will continue.
*
@@ -3472,14 +3462,12 @@ done:
* Example of how to execute a set of CLI commands from a file:
* @cliexcmd{exec /usr/share/vpp/scripts/gigup.txt}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_exec, static) = {
.path = "exec",
.short_help = "exec <filename>",
.function = unix_cli_exec,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/** CLI command to show various unix error statistics. */
static clib_error_t *
@@ -3548,13 +3536,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_unix_show_errors, static) = {
.path = "show unix errors",
.short_help = "Show Unix system call error history",
.function = unix_show_errors,
};
-/* *INDENT-ON* */
/** CLI command to show various unix error statistics. */
static clib_error_t *
@@ -3570,7 +3556,6 @@ unix_show_files (vlib_main_t * vm,
vlib_cli_output (vm, "%3s %6s %12s %12s %12s %-32s %s", "FD", "Thread",
"Read", "Write", "Error", "File Name", "Description");
- /* *INDENT-OFF* */
pool_foreach (f, fm->file_pool)
{
int rv;
@@ -3585,19 +3570,16 @@ unix_show_files (vlib_main_t * vm,
path, f->description);
vec_reset_length (s);
}
- /* *INDENT-ON* */
vec_free (s);
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_unix_show_files, static) = {
.path = "show unix files",
.short_help = "Show Unix files in use",
.function = unix_show_files,
};
-/* *INDENT-ON* */
/** CLI command to show session command history. */
static clib_error_t *
@@ -3628,13 +3610,11 @@ unix_cli_show_history (vlib_main_t * vm,
/*?
* Displays the command history for the current session, if any.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_unix_cli_show_history, static) = {
.path = "history",
.short_help = "Show current session command history",
.function = unix_cli_show_history,
};
-/* *INDENT-ON* */
/** CLI command to show terminal status. */
static clib_error_t *
@@ -3651,7 +3631,8 @@ unix_cli_show_terminal (vlib_main_t * vm,
n = vlib_get_node (vm, cf->process_node_index);
- vlib_cli_output (vm, "Terminal name: %v\n", n->name);
+ vlib_cli_output (vm, "Terminal name: %v\n", cf->name);
+ vlib_cli_output (vm, "Terminal node: %v\n", n->name);
vlib_cli_output (vm, "Terminal mode: %s\n", cf->line_mode ?
"line-by-line" : "char-by-char");
vlib_cli_output (vm, "Terminal width: %d\n", cf->width);
@@ -3700,13 +3681,11 @@ unix_cli_show_terminal (vlib_main_t * vm,
* CRLF mode: LF
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_unix_cli_show_terminal, static) = {
.path = "show terminal",
.short_help = "Show current session terminal settings",
.function = unix_cli_show_terminal,
};
-/* *INDENT-ON* */
/** CLI command to display a list of CLI sessions. */
static clib_error_t *
@@ -3716,31 +3695,34 @@ unix_cli_show_cli_sessions (vlib_main_t * vm,
{
unix_cli_main_t *cm = &unix_cli_main;
clib_file_main_t *fm = &file_main;
+ table_t table = {}, *t = &table;
unix_cli_file_t *cf;
clib_file_t *uf;
- vlib_node_t *n;
- vlib_cli_output (vm, "%-5s %-5s %-20s %s", "PNI", "FD", "Name", "Flags");
+ table_add_header_col (t, 4, "PNI ", "FD ", "Name", "Flags");
#define fl(x, y) ( (x) ? toupper((y)) : tolower((y)) )
- /* *INDENT-OFF* */
- pool_foreach (cf, cm->cli_file_pool) {
- uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
- n = vlib_get_node (vm, cf->process_node_index);
- vlib_cli_output (vm,
- "%-5d %-5d %-20v %c%c%c%c%c\n",
- cf->process_node_index,
- uf->file_descriptor,
- n->name,
- fl (cf->is_interactive, 'i'),
- fl (cf->is_socket, 's'),
- fl (cf->line_mode, 'l'),
- fl (cf->has_epipe, 'p'),
- fl (cf->ansi_capable, 'a'));
- }
- /* *INDENT-ON* */
+ int i = 0;
+ pool_foreach (cf, cm->cli_file_pool)
+ {
+ int j = 0;
+
+ uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index);
+ table_format_cell (t, i, j++, "%u", cf->process_node_index);
+ table_format_cell (t, i, j++, "%u", uf->file_descriptor);
+ table_format_cell (t, i, j++, "%v", cf->name);
+ table_format_cell (t, i++, j++, "%c%c%c%c%c",
+ fl (cf->is_interactive, 'i'), fl (cf->is_socket, 's'),
+ fl (cf->line_mode, 'l'), fl (cf->has_epipe, 'p'),
+ fl (cf->ansi_capable, 'a'));
+ }
#undef fl
+ t->default_body.align = TTAA_LEFT;
+ t->default_header_col.align = TTAA_LEFT;
+ vlib_cli_output (vm, "%U", format_table, t);
+ table_free (t);
+
return 0;
}
@@ -3780,13 +3762,11 @@ unix_cli_show_cli_sessions (vlib_main_t * vm,
* - @em P EPIPE detected on connection; it will close soon.
* - @em A ANSI-capable terminal.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_unix_cli_show_cli_sessions, static) = {
.path = "show cli-sessions",
.short_help = "Show current CLI sessions",
.function = unix_cli_show_cli_sessions,
};
-/* *INDENT-ON* */
/** CLI command to set terminal pager settings. */
static clib_error_t *
@@ -3837,13 +3817,11 @@ done:
* Additionally allows the pager buffer size to be set; though note that
* this value is set globally and not per session.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_unix_cli_set_terminal_pager, static) = {
.path = "set terminal pager",
.short_help = "set terminal pager [on|off] [limit <lines>]",
.function = unix_cli_set_terminal_pager,
};
-/* *INDENT-ON* */
/** CLI command to set terminal history settings. */
static clib_error_t *
@@ -3908,13 +3886,11 @@ done:
* This command also allows the maximum size of the history buffer for
* this session to be altered.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_unix_cli_set_terminal_history, static) = {
.path = "set terminal history",
.short_help = "set terminal history [on|off] [limit <lines>]",
.function = unix_cli_set_terminal_history,
};
-/* *INDENT-ON* */
/** CLI command to set terminal ANSI settings. */
static clib_error_t *
@@ -3947,13 +3923,11 @@ unix_cli_set_terminal_ansi (vlib_main_t * vm,
* ANSI control sequences are used in a small number of places to provide,
* for example, color text output and to control the cursor in the pager.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_unix_cli_set_terminal_ansi, static) = {
.path = "set terminal ansi",
.short_help = "set terminal ansi [on|off]",
.function = unix_cli_set_terminal_ansi,
};
-/* *INDENT-ON* */
#define MAX_CLI_WAIT 86400
@@ -3987,13 +3961,11 @@ unix_wait_cmd (vlib_main_t * vm,
unformat_free (line_input);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_unix_wait_cmd, static) = {
.path = "wait",
.short_help = "wait <sec>",
.function = unix_wait_cmd,
};
-/* *INDENT-ON* */
static clib_error_t *
echo_cmd (vlib_main_t * vm,
@@ -4014,13 +3986,11 @@ echo_cmd (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_unix_echo_cmd, static) = {
.path = "echo",
.short_help = "echo <rest-of-line>",
.function = echo_cmd,
};
-/* *INDENT-ON* */
static clib_error_t *
define_cmd_fn (vlib_main_t * vm,
@@ -4052,14 +4022,12 @@ define_cmd_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (define_cmd, static) = {
.path = "define",
.short_help = "define <variable-name> <value>",
.function = define_cmd_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
undefine_cmd_fn (vlib_main_t * vm,
@@ -4078,13 +4046,11 @@ undefine_cmd_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (undefine_cmd, static) = {
.path = "undefine",
.short_help = "undefine <variable-name>",
.function = undefine_cmd_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_macro_cmd_fn (vlib_main_t * vm,
@@ -4102,13 +4068,11 @@ show_macro_cmd_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_macro, static) = {
.path = "show macro",
.short_help = "show macro [noevaluate]",
.function = show_macro_cmd_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
unix_cli_init (vlib_main_t * vm)
diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c
index 9c7c54f6b1b..e96cd902466 100644
--- a/src/vlib/unix/input.c
+++ b/src/vlib/unix/input.c
@@ -250,7 +250,10 @@ linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
while (nanosleep (&ts, &tsrem) < 0)
ts = tsrem;
if (*vlib_worker_threads->wait_at_barrier ||
- *nm->pending_interrupts)
+ clib_interrupt_is_any_pending (
+ nm->input_node_interrupts) ||
+ clib_interrupt_is_any_pending (
+ nm->pre_input_node_interrupts))
goto done;
}
}
@@ -367,13 +370,11 @@ linux_epoll_input (vlib_main_t * vm,
return linux_epoll_input_inline (vm, node, frame, thread_index);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (linux_epoll_input_node,static) = {
.function = linux_epoll_input,
.type = VLIB_NODE_TYPE_PRE_INPUT,
.name = "unix-epoll-input",
};
-/* *INDENT-ON* */
clib_error_t *
linux_epoll_input_init (vlib_main_t * vm)
@@ -416,12 +417,10 @@ unix_input_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (unix_input_init) =
{
.runs_before = VLIB_INITS ("linux_epoll_input_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c
index 4ef96652470..ee28ca8f1aa 100644
--- a/src/vlib/unix/main.c
+++ b/src/vlib/unix/main.c
@@ -39,7 +39,9 @@
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vlib/unix/plugin.h>
+#include <vppinfra/unix.h>
+#include <limits.h>
#include <signal.h>
#include <sys/ucontext.h>
#include <syslog.h>
@@ -70,12 +72,10 @@ unix_main_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (unix_main_init) =
{
.runs_before = VLIB_INITS ("unix_input_init"),
};
-/* *INDENT-ON* */
static int
unsetup_signal_handlers (int sig)
@@ -144,17 +144,6 @@ unix_signal_handler (int signum, siginfo_t * si, ucontext_t * uc)
break;
}
-#ifdef CLIB_GCOV
- /*
- * Test framework sends SIGTERM, so we need to flush the
- * code coverage stats here.
- */
- {
- void __gcov_flush (void);
- __gcov_flush ();
- }
-#endif
-
/* Null terminate. */
vec_add1 (syslog_msg, 0);
@@ -210,6 +199,7 @@ setup_signal_handlers (unix_main_t * um)
{
/* these signals take the default action */
case SIGKILL:
+ case SIGCONT:
case SIGSTOP:
case SIGUSR1:
case SIGUSR2:
@@ -246,14 +236,7 @@ unix_error_handler (void *arg, u8 * msg, int msg_len)
}
else
{
- char save = msg[msg_len - 1];
-
- /* Null Terminate. */
- msg[msg_len - 1] = 0;
-
- syslog (LOG_ERR | LOG_DAEMON, "%s", msg);
-
- msg[msg_len - 1] = save;
+ syslog (LOG_ERR | LOG_DAEMON, "%.*s", msg_len, msg);
}
}
@@ -266,20 +249,10 @@ vlib_unix_error_report (vlib_main_t * vm, clib_error_t * error)
return;
{
- char save;
- u8 *msg;
- u32 msg_len;
-
- msg = error->what;
- msg_len = vec_len (msg);
-
- /* Null Terminate. */
- save = msg[msg_len - 1];
- msg[msg_len - 1] = 0;
-
- syslog (LOG_ERR | LOG_DAEMON, "%s", msg);
-
- msg[msg_len - 1] = save;
+ u8 *msg = error->what;
+ u32 len = vec_len (msg);
+ int msg_len = (len > INT_MAX) ? INT_MAX : len;
+ syslog (LOG_ERR | LOG_DAEMON, "%.*s", msg_len, msg);
}
}
@@ -288,98 +261,34 @@ startup_config_process (vlib_main_t * vm,
vlib_node_runtime_t * rt, vlib_frame_t * f)
{
unix_main_t *um = &unix_main;
- u8 *buf = 0;
- uword l, n = 1;
+ unformat_input_t in;
vlib_process_suspend (vm, 2.0);
while (um->unix_config_complete == 0)
vlib_process_suspend (vm, 0.1);
- if (um->startup_config_filename)
+ if (!um->startup_config_filename)
{
- unformat_input_t sub_input;
- int fd;
- struct stat s;
- char *fn = (char *) um->startup_config_filename;
-
- fd = open (fn, O_RDONLY);
- if (fd < 0)
- {
- clib_warning ("failed to open `%s'", fn);
- return 0;
- }
+ return 0;
+ }
- if (fstat (fd, &s) < 0)
- {
- clib_warning ("failed to stat `%s'", fn);
- bail:
- close (fd);
- return 0;
- }
+ unformat_init_vector (&in,
+ format (0, "exec %s", um->startup_config_filename));
- if (!(S_ISREG (s.st_mode) || S_ISLNK (s.st_mode)))
- {
- clib_warning ("not a regular file: `%s'", fn);
- goto bail;
- }
+ vlib_cli_input (vm, &in, 0, 0);
- while (n > 0)
- {
- l = vec_len (buf);
- vec_resize (buf, 4096);
- n = read (fd, buf + l, 4096);
- if (n > 0)
- {
- _vec_len (buf) = l + n;
- if (n < 4096)
- break;
- }
- else
- break;
- }
- if (um->log_fd && vec_len (buf))
- {
- u8 *lv = 0;
- lv = format (lv, "%U: ***** Startup Config *****\n%v",
- format_timeval, 0 /* current bat-time */ ,
- 0 /* current bat-format */ ,
- buf);
- {
- int rv __attribute__ ((unused)) =
- write (um->log_fd, lv, vec_len (lv));
- }
- vec_reset_length (lv);
- lv = format (lv, "%U: ***** End Startup Config *****\n",
- format_timeval, 0 /* current bat-time */ ,
- 0 /* current bat-format */ );
- {
- int rv __attribute__ ((unused)) =
- write (um->log_fd, lv, vec_len (lv));
- }
- vec_free (lv);
- }
+ unformat_free (&in);
- if (vec_len (buf))
- {
- unformat_init_vector (&sub_input, buf);
- vlib_cli_input (vm, &sub_input, 0, 0);
- /* frees buf for us */
- unformat_free (&sub_input);
- }
- close (fd);
- }
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (startup_config_node,static) = {
.function = startup_config_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "startup-config-process",
.process_log2_n_stack_bytes = 18,
};
-/* *INDENT-ON* */
static clib_error_t *
unix_config (vlib_main_t * vm, unformat_input_t * input)
@@ -480,9 +389,8 @@ unix_config (vlib_main_t * vm, unformat_input_t * input)
{
u8 *lv = 0;
lv = format (0, "%U: ***** Start: PID %d *****\n",
- format_timeval, 0 /* current bat-time */ ,
- 0 /* current bat-format */ ,
- getpid ());
+ format_timeval, NULL /* current bat-format */,
+ 0 /* current bat-time */, getpid ());
{
int rv __attribute__ ((unused)) =
write (um->log_fd, lv, vec_len (lv));
@@ -518,6 +426,9 @@ unix_config (vlib_main_t * vm, unformat_input_t * input)
if (error)
return error;
+ if (chdir ((char *) um->runtime_dir) < 0)
+ return clib_error_return_unix (0, "chdir('%s')", um->runtime_dir);
+
error = setup_signal_handlers (um);
if (error)
return error;
@@ -662,12 +573,13 @@ static uword
thread0 (uword arg)
{
vlib_main_t *vm = (vlib_main_t *) arg;
+ vlib_global_main_t *vgm = vlib_get_global_main ();
unformat_input_t input;
int i;
vlib_process_finish_switch_stack (vm);
- unformat_init_command_line (&input, (char **) vm->argv);
+ unformat_init_command_line (&input, (char **) vgm->argv);
i = vlib_main (vm, &input);
unformat_free (&input);
@@ -690,6 +602,10 @@ vlib_thread_stack_init (uword thread_index)
return stack;
}
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
int
vlib_unix_main (int argc, char *argv[])
{
@@ -701,12 +617,24 @@ vlib_unix_main (int argc, char *argv[])
vec_validate_aligned (vgm->vlib_mains, 0, CLIB_CACHE_LINE_BYTES);
- vm->argv = (u8 **) argv;
- vgm->name = argv[0];
- vm->heap_base = clib_mem_get_heap ();
- vm->heap_aligned_base = (void *)
- (((uword) vm->heap_base) & ~(VLIB_FRAME_ALIGN - 1));
- ASSERT (vm->heap_base);
+ vgm->exec_path = (char *) os_get_exec_path ();
+
+ if (vgm->exec_path)
+ {
+ for (i = vec_len (vgm->exec_path) - 1; i > 0; i--)
+ if (vgm->exec_path[i - 1] == '/')
+ break;
+
+ vgm->name = 0;
+
+ vec_add (vgm->name, vgm->exec_path + i, vec_len (vgm->exec_path) - i);
+ vec_add1 (vgm->exec_path, 0);
+ vec_add1 (vgm->name, 0);
+ }
+ else
+ vgm->exec_path = vgm->name = argv[0];
+
+ vgm->argv = (u8 **) argv;
clib_time_init (&vm->clib_time);
@@ -715,7 +643,7 @@ vlib_unix_main (int argc, char *argv[])
elog_init (vlib_get_elog_main (), vgm->configured_elog_ring_size);
elog_enable_disable (vlib_get_elog_main (), 1);
- unformat_init_command_line (&input, (char **) vm->argv);
+ unformat_init_command_line (&input, (char **) vgm->argv);
if ((e = vlib_plugin_config (vm, &input)))
{
clib_error_report (e);
@@ -727,7 +655,7 @@ vlib_unix_main (int argc, char *argv[])
if (i)
return i;
- unformat_init_command_line (&input, (char **) vm->argv);
+ unformat_init_command_line (&input, (char **) vgm->argv);
if (vgm->init_functions_called == 0)
vgm->init_functions_called = hash_create (0, /* value bytes */ 0);
e = vlib_call_all_config_functions (vm, &input, 1 /* early */ );
@@ -739,7 +667,7 @@ vlib_unix_main (int argc, char *argv[])
unformat_free (&input);
/* always load symbols, for signal handler and mheap memory get/put backtrace */
- clib_elf_main_init (vgm->name);
+ clib_elf_main_init (vgm->exec_path);
vec_validate (vlib_thread_stacks, 0);
vlib_thread_stack_init (0);
diff --git a/src/vlib/unix/mc_socket.c b/src/vlib/unix/mc_socket.c
index 9800b1e744c..1f3b4e9a8f1 100644
--- a/src/vlib/unix/mc_socket.c
+++ b/src/vlib/unix/mc_socket.c
@@ -90,7 +90,7 @@ sendmsg_helper (mc_socket_main_t * msm,
h.msg_namelen = sizeof (tx_addr[0]);
if (msm->iovecs)
- _vec_len (msm->iovecs) = 0;
+ vec_set_len (msm->iovecs, 0);
n_bytes = append_buffer_index_to_iovec (vm, buffer_index, &msm->iovecs);
ASSERT (n_bytes <= msm->mc_main.transport.max_packet_size);
@@ -177,7 +177,7 @@ recvmsg_helper (mc_socket_main_t * msm,
vec_validate (msm->rx_buffers, max_alloc - 1);
n_alloc =
vlib_buffer_alloc (vm, msm->rx_buffers + n_left, max_alloc - n_left);
- _vec_len (msm->rx_buffers) = n_left + n_alloc;
+ vec_set_len (msm->rx_buffers, n_left + n_alloc);
}
ASSERT (vec_len (msm->rx_buffers) >= n_mtu);
@@ -192,7 +192,7 @@ recvmsg_helper (mc_socket_main_t * msm,
msm->iovecs[i].iov_base = b->data;
msm->iovecs[i].iov_len = buffer_size;
}
- _vec_len (msm->iovecs) = n_mtu;
+ vec_set_len (msm->iovecs, n_mtu);
{
struct msghdr h;
@@ -237,7 +237,7 @@ recvmsg_helper (mc_socket_main_t * msm,
b->next_buffer = msm->rx_buffers[i_rx];
}
- _vec_len (msm->rx_buffers) = i_rx;
+ vec_set_len (msm->rx_buffers, i_rx);
return 0 /* no error */ ;
}
@@ -418,7 +418,7 @@ catchup_socket_read_ready (clib_file_t * uf, int is_server)
}
}
- _vec_len (c->input_vector) = l + n;
+ vec_set_len (c->input_vector, l + n);
if (is_eof && vec_len (c->input_vector) > 0)
{
@@ -426,7 +426,7 @@ catchup_socket_read_ready (clib_file_t * uf, int is_server)
{
mc_msg_catchup_request_handler (mcm, (void *) c->input_vector,
c - msm->catchups);
- _vec_len (c->input_vector) = 0;
+ vec_set_len (c->input_vector, 0);
}
else
{
diff --git a/src/vlib/unix/plugin.c b/src/vlib/unix/plugin.c
index a714c7c29ce..fd3a050b944 100644
--- a/src/vlib/unix/plugin.c
+++ b/src/vlib/unix/plugin.c
@@ -35,7 +35,7 @@ char *vlib_plugin_app_version __attribute__ ((weak));
char *vlib_plugin_app_version = "";
void *
-vlib_get_plugin_symbol (char *plugin_name, char *symbol_name)
+vlib_get_plugin_symbol (const char *plugin_name, const char *symbol_name)
{
plugin_main_t *pm = &vlib_plugin_main;
uword *p;
@@ -194,6 +194,8 @@ load_one_plugin (plugin_main_t * pm, plugin_info_t * pi, int from_early_init)
reread_reg = 0;
goto process_reg;
}
+ else
+ clib_error_free (error);
error = elf_get_section_by_name (&em, ".vlib_plugin_registration",
&section);
@@ -304,7 +306,8 @@ process_reg:
}
vec_free (version_required);
- handle = dlopen ((char *) pi->filename, RTLD_LAZY);
+ handle = dlopen ((char *) pi->filename,
+ RTLD_LAZY | (reg->deep_bind ? RTLD_DEEPBIND : 0));
if (handle == 0)
{
@@ -593,7 +596,12 @@ vlib_plugin_early_init (vlib_main_t * vm)
0x7FFFFFFF /* aka no rate limit */ );
if (pm->plugin_path == 0)
- pm->plugin_path = format (0, "%s%c", vlib_plugin_path, 0);
+ pm->plugin_path = format (0, "%s", vlib_plugin_path);
+
+ if (pm->plugin_path_add)
+ pm->plugin_path = format (pm->plugin_path, ":%s", pm->plugin_path_add);
+
+ pm->plugin_path = format (pm->plugin_path, "%c", 0);
PLUGIN_LOG_DBG ("plugin path %s", pm->plugin_path);
@@ -632,7 +640,6 @@ vlib_plugins_show_cmd_fn (vlib_main_t * vm,
s = format (s, " Plugin path is: %s\n\n", pm->plugin_path);
s = format (s, " %-41s%-33s%s\n", "Plugin", "Version", "Description");
- /* *INDENT-OFF* */
hash_foreach_mem (key, value, pm->plugin_by_name_hash,
{
if (key != 0)
@@ -644,21 +651,18 @@ vlib_plugins_show_cmd_fn (vlib_main_t * vm,
index++;
}
});
- /* *INDENT-ON* */
vlib_cli_output (vm, "%v", s);
vec_free (s);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (plugins_show_cmd, static) =
{
.path = "show plugins",
.short_help = "show loaded plugins",
.function = vlib_plugins_show_cmd_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
config_one_plugin (vlib_main_t * vm, char *name, unformat_input_t * input)
@@ -755,6 +759,8 @@ done:
u8 *s = 0;
if (unformat (input, "path %s", &s))
pm->plugin_path = s;
+ else if (unformat (input, "add-path %s", &s))
+ pm->plugin_path_add = s;
else if (unformat (input, "name-filter %s", &s))
pm->plugin_name_filter = s;
else if (unformat (input, "vat-path %s", &s))
diff --git a/src/vlib/unix/plugin.h b/src/vlib/unix/plugin.h
index ae15e5db973..a7d9b9449a5 100644
--- a/src/vlib/unix/plugin.h
+++ b/src/vlib/unix/plugin.h
@@ -56,16 +56,17 @@
* vlib_load_new_plugins().
*/
-/* *INDENT-OFF* */
-typedef CLIB_PACKED(struct {
- u8 default_disabled;
- const char version[32];
- const char version_required[32];
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u8 default_disabled : 1;
+ u8 deep_bind : 1;
+ const char version[64];
+ const char version_required[64];
const char overrides[256];
const char *early_init;
const char *description;
-}) vlib_plugin_registration_t;
-/* *INDENT-ON* */
+} vlib_plugin_registration_t;
/*
* Plugins may also use this registration format, which is
@@ -123,6 +124,7 @@ typedef struct
/* paths and name filters */
u8 *plugin_path;
+ u8 *plugin_path_add;
u8 *plugin_name_filter;
u8 *vat_plugin_path;
u8 *vat_plugin_name_filter;
@@ -144,12 +146,12 @@ extern plugin_main_t vlib_plugin_main;
clib_error_t *vlib_plugin_config (vlib_main_t * vm, unformat_input_t * input);
int vlib_plugin_early_init (vlib_main_t * vm);
int vlib_load_new_plugins (plugin_main_t * pm, int from_early_init);
-void *vlib_get_plugin_symbol (char *plugin_name, char *symbol_name);
+void *vlib_get_plugin_symbol (const char *plugin_name,
+ const char *symbol_name);
u8 *vlib_get_vat_plugin_path (void);
#define VLIB_PLUGIN_REGISTER() \
vlib_plugin_registration_t vlib_plugin_registration \
- CLIB_NOSANITIZE_PLUGIN_REG_SECTION \
__clib_export __clib_section(".vlib_plugin_registration")
/* Call a plugin init function: used for init function dependencies. */
diff --git a/src/vlib/unix/util.c b/src/vlib/unix/util.c
index 03aef364357..04cd6f593ac 100644
--- a/src/vlib/unix/util.c
+++ b/src/vlib/unix/util.c
@@ -86,8 +86,8 @@ foreach_directory_file (char *dir_name,
s = format (s, "%s/%s", dir_name, e->d_name);
t = format (t, "%s", e->d_name);
error = f (arg, s, t);
- _vec_len (s) = 0;
- _vec_len (t) = 0;
+ vec_set_len (s, 0);
+ vec_set_len (t, 0);
if (error)
break;
@@ -116,7 +116,7 @@ vlib_unix_recursive_mkdir (char *path)
error = clib_error_return_unix (0, "mkdir '%s'", c);
goto done;
}
- _vec_len (c)--;
+ vec_dec_len (c, 1);
}
vec_add1 (c, path[i]);
i++;
diff --git a/src/vlibapi/CMakeLists.txt b/src/vlibapi/CMakeLists.txt
index dfd6b5b5045..6476b5a2f33 100644
--- a/src/vlibapi/CMakeLists.txt
+++ b/src/vlibapi/CMakeLists.txt
@@ -11,16 +11,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-install(
- FILES
- api_helper_macros.h
+add_vpp_library (vlibapi
+ SOURCES
+ api_shared.c
+ api_format.c
+ node_serialize.c
+ memory_shared.c
+
+ INSTALL_HEADERS
api.h
- vat_helper_macros.h
api_common.h
+ api_helper_macros.h
api_types.h
-
- DESTINATION
- include/vlibapi
-
- COMPONENT vpp-dev
+ vat_helper_macros.h
+ memory_shared.h
)
+
diff --git a/src/vlibapi/api.h b/src/vlibapi/api.h
index 431155c5e09..74957a6f0f6 100644
--- a/src/vlibapi/api.h
+++ b/src/vlibapi/api.h
@@ -28,16 +28,14 @@
#include <vlib/unix/unix.h>
#include <vlibapi/api_common.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED ( struct {
u32 nitems;
u32 msgtbl_size;
u8 wrapped;
}) vl_api_trace_file_header_t;
-/* *INDENT-ON* */
-int vl_msg_api_trace_save (api_main_t * am,
- vl_api_trace_which_t which, FILE * fp);
+int vl_msg_api_trace_save (api_main_t *am, vl_api_trace_which_t which,
+ FILE *fp, u8 is_json);
#define VLIB_API_INIT_FUNCTION(x) VLIB_DECLARE_INIT_FUNCTION(x,api_init)
@@ -105,10 +103,6 @@ int vl_msg_api_trace_onoff (api_main_t * am, vl_api_trace_which_t which,
int vl_msg_api_trace_free (api_main_t * am, vl_api_trace_which_t which);
int vl_msg_api_trace_configure (api_main_t * am, vl_api_trace_which_t which,
u32 nitems);
-void vl_msg_api_handler_with_vm_node (api_main_t * am, svm_region_t * vlib_rp,
- void *the_msg, vlib_main_t * vm,
- vlib_node_runtime_t * node,
- u8 is_private);
u32 vl_msg_api_max_length (void *mp);
vl_api_trace_t *vl_msg_api_trace_get (api_main_t * am,
vl_api_trace_which_t which);
@@ -123,6 +117,38 @@ vlib_node_t ***vlib_node_unserialize (u8 * vector);
u32 vl_msg_api_get_msg_length (void *msg_arg);
+typedef int (*vl_msg_traverse_trace_fn) (u8 *, void *);
+
+int vl_msg_traverse_trace (vl_api_trace_t *tp, vl_msg_traverse_trace_fn fn,
+ void *ctx);
+
+always_inline void
+vl_api_increase_msg_trace_size (api_main_t *am, u32 msg_id, u32 inc)
+{
+ am->msg_data[msg_id].trace_size += inc;
+}
+
+always_inline void
+vl_api_set_msg_thread_safe (api_main_t *am, u32 msg_id, int v)
+{
+ am->msg_data[msg_id].is_mp_safe = v != 0;
+}
+
+always_inline void
+vl_api_set_msg_autoendian (api_main_t *am, u32 msg_id, int v)
+{
+ am->msg_data[msg_id].is_autoendian = v != 0;
+}
+
+always_inline void
+vl_api_allow_msg_replay (api_main_t *am, u32 msg_id, int v)
+{
+ am->msg_data[msg_id].replay_allowed = v != 0;
+}
+
+format_function_t format_vl_api_msg_text;
+format_function_t format_vl_api_msg_json;
+
#endif /* included_api_h */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vlibapi/api_common.h b/src/vlibapi/api_common.h
index 3fdc1bbdd36..62a8d4c62d8 100644
--- a/src/vlibapi/api_common.h
+++ b/src/vlibapi/api_common.h
@@ -27,6 +27,7 @@
#include <vppinfra/clib_error.h>
#include <vppinfra/elog.h>
+#include <vppinfra/cJSON.h>
#include <vlibapi/api_types.h>
#include <svm/svm_common.h>
#include <svm/queue.h>
@@ -74,18 +75,12 @@ typedef struct vl_api_registration_
/* socket client only */
u32 server_handle; /**< Socket client only: server handle */
u32 server_index; /**< Socket client only: server index */
+
+ bool keepalive; /**< Dead client scan */
} vl_api_registration_t;
#define VL_API_INVALID_FI ((u32)~0)
-/** Trace configuration for a single message */
-typedef struct
-{
- int size; /**< for sanity checking */
- int trace_enable; /**< trace this message */
- int replay_enable; /**< This message can be replayed */
-} trace_cfg_t;
-
/**
* API trace state
*/
@@ -127,13 +122,16 @@ typedef struct
void *handler; /**< the message handler */
void *cleanup; /**< non-default message cleanup handler */
void *endian; /**< message endian function */
- void *print; /**< message print function */
+ void *format_fn; /**< message format function */
+ void *tojson; /**< binary to JSON convert function */
+ void *fromjson; /**< JSON to binary convert function */
+ void *calc_size; /**< message size calculation */
int size; /**< message size */
- int traced; /**< is this message to be traced? */
- int replay; /**< is this message to be replayed? */
- int message_bounce; /**< do not free message after processing */
- int is_mp_safe; /**< worker thread barrier required? */
- int is_autoendian; /**< endian conversion required? */
+ u32 traced : 1; /**< is this message to be traced? */
+ u32 replay : 1; /**< is this message to be replayed? */
+ u32 message_bounce : 1; /**< do not free message after processing */
+ u32 is_mp_safe : 1; /**< worker thread barrier required? */
+ u32 is_autoendian : 1; /**< endian conversion required? */
} vl_msg_api_msg_config_t;
/** Message header structure */
@@ -145,39 +143,34 @@ typedef struct msgbuf_
u8 data[0]; /**< actual message begins here */
} msgbuf_t;
-CLIB_NOSANITIZE_ADDR static inline void
+__clib_nosanitize_addr static inline void
VL_MSG_API_UNPOISON (const void *a)
{
const msgbuf_t *m = &((const msgbuf_t *) a)[-1];
- CLIB_MEM_UNPOISON (m, sizeof (*m) + ntohl (m->data_len));
+ clib_mem_unpoison (m, sizeof (*m) + ntohl (m->data_len));
}
-CLIB_NOSANITIZE_ADDR static inline void
-VL_MSG_API_SVM_QUEUE_UNPOISON (const svm_queue_t * q)
+__clib_nosanitize_addr static inline void
+VL_MSG_API_SVM_QUEUE_UNPOISON (const svm_queue_t *q)
{
- CLIB_MEM_UNPOISON (q, sizeof (*q) + q->elsize * q->maxsize);
+ clib_mem_unpoison (q, sizeof (*q) + q->elsize * q->maxsize);
}
static inline void
VL_MSG_API_POISON (const void *a)
{
const msgbuf_t *m = &((const msgbuf_t *) a)[-1];
- CLIB_MEM_POISON (m, sizeof (*m) + ntohl (m->data_len));
+ clib_mem_poison (m, sizeof (*m) + ntohl (m->data_len));
}
/* api_shared.c prototypes */
-void vl_msg_api_handler (void *the_msg);
-void vl_msg_api_handler_no_free (void *the_msg);
-void vl_msg_api_handler_no_trace_no_free (void *the_msg);
-void vl_msg_api_trace_only (void *the_msg);
+void vl_msg_api_handler (void *the_msg, uword msg_len);
+void vl_msg_api_handler_no_free (void *the_msg, uword msg_len);
+void vl_msg_api_handler_no_trace_no_free (void *the_msg, uword msg_len);
+void vl_msg_api_trace_only (void *the_msg, uword msg_len);
void vl_msg_api_cleanup_handler (void *the_msg);
void vl_msg_api_replay_handler (void *the_msg);
-void vl_msg_api_socket_handler (void *the_msg);
-void vl_msg_api_set_handlers (int msg_id, char *msg_name,
- void *handler,
- void *cleanup,
- void *endian,
- void *print, int msg_size, int traced);
+void vl_msg_api_socket_handler (void *the_msg, uword msg_len);
void vl_msg_api_clean_handlers (int msg_id);
void vl_msg_api_config (vl_msg_api_msg_config_t *);
void vl_msg_api_set_cleanup_handler (int msg_id, void *fp);
@@ -192,13 +185,11 @@ void vl_msg_api_barrier_trace_context (const char *context)
#define vl_msg_api_barrier_trace_context(X)
#endif
void vl_msg_api_free (void *);
-void vl_noop_handler (void *mp);
void vl_msg_api_increment_missing_client_counter (void);
void vl_msg_api_post_mortem_dump (void);
void vl_msg_api_post_mortem_dump_enable_disable (int enable);
void vl_msg_api_register_pd_handler (void *handler,
u16 msg_id_host_byte_order);
-int vl_msg_api_pd_handler (void *mp, int rv);
void vl_msg_api_set_first_available_msg_id (u16 first_avail);
u16 vl_msg_api_get_msg_ids (const char *name, int n);
@@ -224,34 +215,51 @@ typedef struct
char name[64];
} api_version_t;
-/** API main structure, used by both vpp and binary API clients */
-typedef struct api_main_t
+typedef struct
{
/** Message handler vector */
- void (**msg_handlers) (void *);
- /** Plaform-dependent (aka hardware) message handler vector */
- int (**pd_msg_handlers) (void *, int);
+ void (*handler) (void *);
/** non-default message cleanup handler vector */
- void (**msg_cleanup_handlers) (void *);
+ void (*cleanup_handler) (void *);
+
+ /** Message name vector */
+ const char *name;
+
+ /** Message format function */
+ format_function_t *format_fn;
+
+ /** Message convert function vector */
+ cJSON *(*tojson_handler) (void *);
+
+ /** Message convert function vector */
+ void *(*fromjson_handler) (cJSON *, int *);
/** Message endian handler vector */
- void (**msg_endian_handlers) (void *);
+ void (*endian_handler) (void *);
- /** Message print function vector */
- void (**msg_print_handlers) (void *, void *);
+ /** Message calc size function vector */
+ uword (*calc_size_func) (void *);
- /** Message name vector */
- const char **msg_names;
+ /** trace size for sanity checking */
+ int trace_size;
+
+ /** Flags */
+ u8 bounce : 1; /**> Don't automatically free message buffer vetor */
+ u8 is_mp_safe : 1; /**< Message is mp safe vector */
+ u8 is_autoendian : 1; /**< Message requires us to do endian conversion */
+ u8 trace_enable : 1; /**< trace this message */
+ u8 replay_allowed : 1; /**< This message can be replayed */
- /** Don't automatically free message buffer vetor */
- u8 *message_bounce;
+} vl_api_msg_data_t;
- /** Message is mp safe vector */
- u8 *is_mp_safe;
+/** API main structure, used by both vpp and binary API clients */
+typedef struct api_main_t
+{
+ vl_api_msg_data_t *msg_data;
- /** Message requires us to do endian conversion */
- u8 *is_autoendian;
+ /** API message ID by name hash table */
+ uword *msg_id_by_name;
/** Allocator ring vectors (in shared memory) */
struct ring_alloc_ *arings;
@@ -274,9 +282,6 @@ typedef struct api_main_t
/** Print every received message */
int msg_print_flag;
- /** Current trace configuration */
- trace_cfg_t *api_trace_cfg;
-
/** Current process PID */
int our_pid;
@@ -349,6 +354,8 @@ typedef struct api_main_t
/** client message index hash table */
uword *msg_index_by_name_and_crc;
+ /** plugin JSON representation vector table */
+ u8 **json_api_repr;
/** api version list */
api_version_t *api_version_list;
@@ -384,7 +391,6 @@ typedef struct api_main_t
} api_main_t;
extern __thread api_main_t *my_api_main;
-extern api_main_t api_global_main;
always_inline api_main_t *
vlibapi_get_main (void)
@@ -392,6 +398,14 @@ vlibapi_get_main (void)
return my_api_main;
}
+always_inline vl_api_msg_data_t *
+vl_api_get_msg_data (api_main_t *am, u32 msg_id)
+{
+ if (msg_id >= vec_len (am->msg_data))
+ return 0;
+ return am->msg_data + msg_id;
+}
+
always_inline void
vlibapi_set_main (api_main_t * am)
{
diff --git a/src/vlibapi/api_doc.md b/src/vlibapi/api_doc.md
deleted file mode 100644
index 2e7ae09a722..00000000000
--- a/src/vlibapi/api_doc.md
+++ /dev/null
@@ -1,352 +0,0 @@
-# Binary API support {#api_doc}
-
-VPP provides a binary API scheme to allow a wide variety of client codes to
-program data-plane tables. As of this writing, there are hundreds of binary
-APIs.
-
-Messages are defined in `*.api` files. Today, there are about 50 api files,
-with more arriving as folks add programmable features. The API file compiler
-sources reside in @ref src/tools/vppapigen.
-
-From @ref src/vnet/interface.api, here's a typical request/response message
-definition:
-
-```{.c}
- autoreply define sw_interface_set_flags
- {
- u32 client_index;
- u32 context;
- u32 sw_if_index;
- /* 1 = up, 0 = down */
- u8 admin_up_down;
- };
-```
-
-To a first approximation, the API compiler renders this definition into
-`build-root/.../vpp/include/vnet/interface.api.h` as follows:
-
-```{.c}
- /****** Message ID / handler enum ******/
- #ifdef vl_msg_id
- vl_msg_id(VL_API_SW_INTERFACE_SET_FLAGS, vl_api_sw_interface_set_flags_t_handler)
- vl_msg_id(VL_API_SW_INTERFACE_SET_FLAGS_REPLY, vl_api_sw_interface_set_flags_reply_t_handler)
- #endif
-
- /****** Message names ******/
- #ifdef vl_msg_name
- vl_msg_name(vl_api_sw_interface_set_flags_t, 1)
- vl_msg_name(vl_api_sw_interface_set_flags_reply_t, 1)
- #endif
-
- /****** Message name, crc list ******/
- #ifdef vl_msg_name_crc_list
- #define foreach_vl_msg_name_crc_interface \
- _(VL_API_SW_INTERFACE_SET_FLAGS, sw_interface_set_flags, f890584a) \
- _(VL_API_SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply, dfbf3afa) \
- #endif
-
- /****** Typedefs *****/
- #ifdef vl_typedefs
- typedef VL_API_PACKED(struct _vl_api_sw_interface_set_flags {
- u16 _vl_msg_id;
- u32 client_index;
- u32 context;
- u32 sw_if_index;
- u8 admin_up_down;
- }) vl_api_sw_interface_set_flags_t;
-
- typedef VL_API_PACKED(struct _vl_api_sw_interface_set_flags_reply {
- u16 _vl_msg_id;
- u32 context;
- i32 retval;
- }) vl_api_sw_interface_set_flags_reply_t;
-
- ...
- #endif /* vl_typedefs */
-```
-
-To change the admin state of an interface, a binary api client sends a
-@ref vl_api_sw_interface_set_flags_t to VPP, which will respond with a
-@ref vl_api_sw_interface_set_flags_reply_t message.
-
-Multiple layers of software, transport types, and shared libraries
-implement a variety of features:
-
-* API message allocation, tracing, pretty-printing, and replay.
-* Message transport via global shared memory, pairwise/private shared
- memory, and sockets.
-* Barrier synchronization of worker threads across thread-unsafe
- message handlers.
-
-Correctly-coded message handlers know nothing about the transport used to
-deliver messages to/from VPP. It's reasonably straighforward to use multiple
-API message transport types simultaneously.
-
-For historical reasons, binary api messages are (putatively) sent in network
-byte order. As of this writing, we're seriously considering whether that
-choice makes sense.
-
-
-## Message Allocation
-
-Since binary API messages are always processed in order, we allocate messages
-using a ring allocator whenever possible. This scheme is extremely fast when
-compared with a traditional memory allocator, and doesn't cause heap
-fragmentation. See
-@ref src/vlibmemory/memory_shared.c @ref vl_msg_api_alloc_internal().
-
-Regardless of transport, binary api messages always follow a @ref msgbuf_t
-header:
-
-```{.c}
- typedef struct msgbuf_
- {
- unix_shared_memory_queue_t *q;
- u32 data_len;
- u32 gc_mark_timestamp;
- u8 data[0];
- } msgbuf_t;
-```
-
-This structure makes it easy to trace messages without having to
-decode them - simply save data_len bytes - and allows
-@ref vl_msg_api_free() to rapidly dispose of message buffers:
-
-```{.c}
- void
- vl_msg_api_free (void *a)
- {
- msgbuf_t *rv;
- api_main_t *am = &api_main;
-
- rv = (msgbuf_t *) (((u8 *) a) - offsetof (msgbuf_t, data));
-
- /*
- * Here's the beauty of the scheme. Only one proc/thread has
- * control of a given message buffer. To free a buffer, we just
- * clear the queue field, and leave. No locks, no hits, no errors...
- */
- if (rv->q)
- {
- rv->q = 0;
- rv->gc_mark_timestamp = 0;
- return;
- }
- <snip>
- }
-```
-
-## Message Tracing and Replay
-
-It's extremely important that VPP can capture and replay sizeable binary API
-traces. System-level issues involving hundreds of thousands of API
-transactions can be re-run in a second or less. Partial replay allows one to
-binary-search for the point where the wheels fall off. One can add scaffolding
-to the data plane, to trigger when complex conditions obtain.
-
-With binary API trace, print, and replay, system-level bug reports of the form
-"after 300,000 API transactions, the VPP data-plane stopped forwarding
-traffic, FIX IT!" can be solved offline.
-
-More often than not, one discovers that a control-plane client
-misprograms the data plane after a long time or under complex
-circumstances. Without direct evidence, "it's a data-plane problem!"
-
-See @ref src/vlibmemory/memory_vlib.c @ref vl_msg_api_process_file(),
-and @ref src/vlibapi/api_shared.c. See also the debug CLI command "api trace"
-
-## Client connection details
-
-Establishing a binary API connection to VPP from a C-language client
-is easy:
-
-```{.c}
- int
- connect_to_vpe (char *client_name, int client_message_queue_length)
- {
- vat_main_t *vam = &vat_main;
- api_main_t *am = &api_main;
-
- if (vl_client_connect_to_vlib ("/vpe-api", client_name,
- client_message_queue_length) < 0)
- return -1;
-
- /* Memorize vpp's binary API message input queue address */
- vam->vl_input_queue = am->shmem_hdr->vl_input_queue;
- /* And our client index */
- vam->my_client_index = am->my_client_index;
- return 0;
- }
-```
-
-32 is a typical value for client_message_queue_length. VPP cannot
-block when it needs to send an API message to a binary API client, and
-the VPP-side binary API message handlers are very fast. When sending
-asynchronous messages, make sure to scrape the binary API rx ring with
-some enthusiasm.
-
-### binary API message RX pthread
-
-Calling @ref vl_client_connect_to_vlib spins up a binary API message RX
-pthread:
-
-```{.c}
- static void *
- rx_thread_fn (void *arg)
- {
- unix_shared_memory_queue_t *q;
- memory_client_main_t *mm = &memory_client_main;
- api_main_t *am = &api_main;
-
- q = am->vl_input_queue;
-
- /* So we can make the rx thread terminate cleanly */
- if (setjmp (mm->rx_thread_jmpbuf) == 0)
- {
- mm->rx_thread_jmpbuf_valid = 1;
- while (1)
- {
- vl_msg_api_queue_handler (q);
- }
- }
- pthread_exit (0);
- }
-```
-
-To handle the binary API message queue yourself, use
-@ref vl_client_connect_to_vlib_no_rx_pthread.
-
-In turn, vl_msg_api_queue_handler(...) uses mutex/condvar signalling
-to wake up, process VPP -> client traffic, then sleep. VPP supplies a
-condvar broadcast when the VPP -> client API message queue transitions
-from empty to nonempty.
-
-VPP checks its own binary API input queue at a very high rate. VPP
-invokes message handlers in "process" context [aka cooperative
-multitasking thread context] at a variable rate, depending on
-data-plane packet processing requirements.
-
-## Client disconnection details
-
-To disconnect from VPP, call @ref vl_client_disconnect_from_vlib.
-Please arrange to call this function if the client application
-terminates abnormally. VPP makes every effort to hold a decent funeral
-for dead clients, but VPP can't guarantee to free leaked memory in the
-shared binary API segment.
-
-## Sending binary API messages to VPP
-
-The point of the exercise is to send binary API messages to VPP, and
-to receive replies from VPP. Many VPP binary APIs comprise a client
-request message, and a simple status reply. For example, to
-set the admin status of an interface, one codes:
-
-```{.c}
- vl_api_sw_interface_set_flags_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_SW_INTERFACE_SET_FLAGS);
- mp->client_index = api_main.my_client_index;
- mp->sw_if_index = clib_host_to_net_u32 (<interface-sw-if-index>);
- vl_msg_api_send (api_main.shmem_hdr->vl_input_queue, (u8 *)mp);
-```
-
-Key points:
-
-* Use @ref vl_msg_api_alloc to allocate message buffers
-
-* Allocated message buffers are not initialized, and must be presumed
- to contain trash.
-
-* Don't forget to set the _vl_msg_id field!
-
-* As of this writing, binary API message IDs and data are sent in
- network byte order
-
-* The client-library global data structure @ref api_main keeps track
- of sufficient pointers and handles used to communicate with VPP
-
-## Receiving binary API messages from VPP
-
-Unless you've made other arrangements (see @ref
-vl_client_connect_to_vlib_no_rx_pthread), *messages are received on a
-separate rx pthread*. Synchronization with the client application main
-thread is the responsibility of the application!
-
-Set up message handlers about as follows:
-
-```{.c}
- #define vl_typedefs /* define message structures */
- #include <vpp/api/vpe_all_api_h.h>
- #undef vl_typedefs
-
- /* declare message handlers for each api */
-
- #define vl_endianfun /* define message structures */
- #include <vpp/api/vpe_all_api_h.h>
- #undef vl_endianfun
-
- /* instantiate all the print functions we know about */
- #define vl_print(handle, ...)
- #define vl_printfun
- #include <vpp/api/vpe_all_api_h.h>
- #undef vl_printfun
-
- /* Define a list of all message that the client handles */
- #define foreach_vpe_api_reply_msg \
- _(SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply)
-
- static clib_error_t *
- my_api_hookup (vlib_main_t * vm)
- {
- api_main_t *am = &api_main;
-
- #define _(N,n) \
- vl_msg_api_set_handlers(VL_API_##N, #n, \
- vl_api_##n##_t_handler, \
- vl_noop_handler, \
- vl_api_##n##_t_endian, \
- vl_api_##n##_t_print, \
- sizeof(vl_api_##n##_t), 1);
- foreach_vpe_api_msg;
- #undef _
-
- return 0;
- }
-```
-
-The key API used to establish message handlers is @ref
-vl_msg_api_set_handlers , which sets values in multiple parallel
-vectors in the @ref api_main_t structure. As of this writing: not all
-vector element values can be set through the API. You'll see sporadic
-API message registrations followed by minor adjustments of this form:
-
-```{.c}
- /*
- * Thread-safe API messages
- */
- am->is_mp_safe[VL_API_IP_ADD_DEL_ROUTE] = 1;
- am->is_mp_safe[VL_API_GET_NODE_GRAPH] = 1;
-```
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/src/vlibapi/api_doc.rst b/src/vlibapi/api_doc.rst
new file mode 100644
index 00000000000..2131cc1919c
--- /dev/null
+++ b/src/vlibapi/api_doc.rst
@@ -0,0 +1,341 @@
+.. _api_doc:
+
+Writing API handlers
+====================
+
+VPP provides a binary API scheme to allow a wide variety of client codes
+to program data-plane tables. As of this writing, there are hundreds of
+binary APIs.
+
+Messages are defined in ``*.api`` files. Today, there are about 50 api
+files, with more arriving as folks add programmable features. The API
+file compiler sources reside in @ref src/tools/vppapigen.
+
+From @ref src/vnet/interface.api, here’s a typical request/response
+message definition:
+
+.. code:: c
+
+ autoreply define sw_interface_set_flags
+ {
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ /* 1 = up, 0 = down */
+ u8 admin_up_down;
+ };
+
+To a first approximation, the API compiler renders this definition into
+``build-root/.../vpp/include/vnet/interface.api.h`` as follows:
+
+.. code:: c
+
+ /****** Message ID / handler enum ******/
+ #ifdef vl_msg_id
+ vl_msg_id(VL_API_SW_INTERFACE_SET_FLAGS, vl_api_sw_interface_set_flags_t_handler)
+ vl_msg_id(VL_API_SW_INTERFACE_SET_FLAGS_REPLY, vl_api_sw_interface_set_flags_reply_t_handler)
+ #endif
+
+ /****** Message names ******/
+ #ifdef vl_msg_name
+ vl_msg_name(vl_api_sw_interface_set_flags_t, 1)
+ vl_msg_name(vl_api_sw_interface_set_flags_reply_t, 1)
+ #endif
+
+ /****** Message name, crc list ******/
+ #ifdef vl_msg_name_crc_list
+ #define foreach_vl_msg_name_crc_interface \
+ _(VL_API_SW_INTERFACE_SET_FLAGS, sw_interface_set_flags, f890584a) \
+ _(VL_API_SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply, dfbf3afa) \
+ #endif
+
+ /****** Typedefs *****/
+ #ifdef vl_typedefs
+ typedef VL_API_PACKED(struct _vl_api_sw_interface_set_flags {
+ u16 _vl_msg_id;
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u8 admin_up_down;
+ }) vl_api_sw_interface_set_flags_t;
+
+ typedef VL_API_PACKED(struct _vl_api_sw_interface_set_flags_reply {
+ u16 _vl_msg_id;
+ u32 context;
+ i32 retval;
+ }) vl_api_sw_interface_set_flags_reply_t;
+
+ ...
+ #endif /* vl_typedefs */
+
+To change the admin state of an interface, a binary api client sends a
+@ref vl_api_sw_interface_set_flags_t to VPP, which will respond with a
+@ref vl_api_sw_interface_set_flags_reply_t message.
+
+Multiple layers of software, transport types, and shared libraries
+implement a variety of features:
+
+- API message allocation, tracing, pretty-printing, and replay.
+- Message transport via global shared memory, pairwise/private shared
+ memory, and sockets.
+- Barrier synchronization of worker threads across thread-unsafe
+ message handlers.
+
+Correctly-coded message handlers know nothing about the transport used
+to deliver messages to/from VPP. It’s reasonably straightforward to use
+multiple API message transport types simultaneously.
+
+For historical reasons, binary api messages are (putatively) sent in
+network byte order. As of this writing, we’re seriously considering
+whether that choice makes sense.
+
+Message Allocation
+------------------
+
+Since binary API messages are always processed in order, we allocate
+messages using a ring allocator whenever possible. This scheme is
+extremely fast when compared with a traditional memory allocator, and
+doesn’t cause heap fragmentation. See @ref
+src/vlibmemory/memory_shared.c @ref vl_msg_api_alloc_internal().
+
+Regardless of transport, binary api messages always follow a @ref
+msgbuf_t header:
+
+.. code:: c
+
+ typedef struct msgbuf_
+ {
+ unix_shared_memory_queue_t *q;
+ u32 data_len;
+ u32 gc_mark_timestamp;
+ u8 data[0];
+ } msgbuf_t;
+
+This structure makes it easy to trace messages without having to decode
+them - simply save data_len bytes - and allows @ref vl_msg_api_free() to
+rapidly dispose of message buffers:
+
+.. code:: c
+
+ void
+ vl_msg_api_free (void *a)
+ {
+ msgbuf_t *rv;
+ api_main_t *am = &api_main;
+
+ rv = (msgbuf_t *) (((u8 *) a) - offsetof (msgbuf_t, data));
+
+ /*
+ * Here's the beauty of the scheme. Only one proc/thread has
+ * control of a given message buffer. To free a buffer, we just
+ * clear the queue field, and leave. No locks, no hits, no errors...
+ */
+ if (rv->q)
+ {
+ rv->q = 0;
+ rv->gc_mark_timestamp = 0;
+ return;
+ }
+ <snip>
+ }
+
+Message Tracing and Replay
+--------------------------
+
+It’s extremely important that VPP can capture and replay sizeable binary
+API traces. System-level issues involving hundreds of thousands of API
+transactions can be re-run in a second or less. Partial replay allows
+one to binary-search for the point where the wheels fall off. One can
+add scaffolding to the data plane, to trigger when complex conditions
+obtain.
+
+With binary API trace, print, and replay, system-level bug reports of
+the form “after 300,000 API transactions, the VPP data-plane stopped
+forwarding traffic, FIX IT!” can be solved offline.
+
+More often than not, one discovers that a control-plane client
+misprograms the data plane after a long time or under complex
+circumstances. Without direct evidence, “it’s a data-plane problem!”
+
+See @ref src/vlibmemory/memory_vlib.c @ref vl_msg_api_process_file(),
+and @ref src/vlibapi/api_shared.c. See also the debug CLI command “api
+trace”
+
+Client connection details
+-------------------------
+
+Establishing a binary API connection to VPP from a C-language client is
+easy:
+
+.. code:: c
+
+ int
+ connect_to_vpe (char *client_name, int client_message_queue_length)
+ {
+ vat_main_t *vam = &vat_main;
+ api_main_t *am = &api_main;
+
+ if (vl_client_connect_to_vlib ("/vpe-api", client_name,
+ client_message_queue_length) < 0)
+ return -1;
+
+ /* Memorize vpp's binary API message input queue address */
+ vam->vl_input_queue = am->shmem_hdr->vl_input_queue;
+ /* And our client index */
+ vam->my_client_index = am->my_client_index;
+ return 0;
+ }
+
+32 is a typical value for client_message_queue_length. VPP cannot block
+when it needs to send an API message to a binary API client, and the
+VPP-side binary API message handlers are very fast. When sending
+asynchronous messages, make sure to scrape the binary API rx ring with
+some enthusiasm.
+
+binary API message RX pthread
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Calling @ref vl_client_connect_to_vlib spins up a binary API message RX
+pthread:
+
+.. code:: c
+
+ static void *
+ rx_thread_fn (void *arg)
+ {
+ unix_shared_memory_queue_t *q;
+ memory_client_main_t *mm = &memory_client_main;
+ api_main_t *am = &api_main;
+
+ q = am->vl_input_queue;
+
+ /* So we can make the rx thread terminate cleanly */
+ if (setjmp (mm->rx_thread_jmpbuf) == 0)
+ {
+ mm->rx_thread_jmpbuf_valid = 1;
+ while (1)
+ {
+ vl_msg_api_queue_handler (q);
+ }
+ }
+ pthread_exit (0);
+ }
+
+To handle the binary API message queue yourself, use @ref
+vl_client_connect_to_vlib_no_rx_pthread.
+
+In turn, vl_msg_api_queue_handler(…) uses mutex/condvar signalling to
+wake up, process VPP -> client traffic, then sleep. VPP supplies a
+condvar broadcast when the VPP -> client API message queue transitions
+from empty to nonempty.
+
+VPP checks its own binary API input queue at a very high rate. VPP
+invokes message handlers in “process” context [aka cooperative
+multitasking thread context] at a variable rate, depending on data-plane
+packet processing requirements.
+
+Client disconnection details
+----------------------------
+
+To disconnect from VPP, call @ref vl_client_disconnect_from_vlib. Please
+arrange to call this function if the client application terminates
+abnormally. VPP makes every effort to hold a decent funeral for dead
+clients, but VPP can’t guarantee to free leaked memory in the shared
+binary API segment.
+
+Sending binary API messages to VPP
+----------------------------------
+
+The point of the exercise is to send binary API messages to VPP, and to
+receive replies from VPP. Many VPP binary APIs comprise a client request
+message, and a simple status reply. For example, to set the admin status
+of an interface, one codes:
+
+.. code:: c
+
+ vl_api_sw_interface_set_flags_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_SW_INTERFACE_SET_FLAGS);
+ mp->client_index = api_main.my_client_index;
+ mp->sw_if_index = clib_host_to_net_u32 (<interface-sw-if-index>);
+ vl_msg_api_send (api_main.shmem_hdr->vl_input_queue, (u8 *)mp);
+
+Key points:
+
+- Use @ref vl_msg_api_alloc to allocate message buffers
+
+- Allocated message buffers are not initialized, and must be presumed
+ to contain trash.
+
+- Don’t forget to set the \_vl_msg_id field!
+
+- As of this writing, binary API message IDs and data are sent in
+ network byte order
+
+- The client-library global data structure @ref api_main keeps track of
+ sufficient pointers and handles used to communicate with VPP
+
+Receiving binary API messages from VPP
+--------------------------------------
+
+Unless you’ve made other arrangements (see @ref
+vl_client_connect_to_vlib_no_rx_pthread), *messages are received on a
+separate rx pthread*. Synchronization with the client application main
+thread is the responsibility of the application!
+
+Set up message handlers about as follows:
+
+.. code:: c
+
+ #define vl_typedefs /* define message structures */
+ #include <vpp/api/vpe_all_api_h.h>
+ #undef vl_typedefs
+
+ /* declare message handlers for each api */
+
+ #define vl_endianfun /* define message structures */
+ #include <vpp/api/vpe_all_api_h.h>
+ #undef vl_endianfun
+
+ /* instantiate all the print functions we know about */
+ #define vl_printfun
+ #include <vpp/api/vpe_all_api_h.h>
+ #undef vl_printfun
+
+ /* Define a list of all message that the client handles */
+ #define foreach_vpe_api_reply_msg \
+ _(SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply)
+
+ static clib_error_t *
+ my_api_hookup (vlib_main_t * vm)
+ {
+ api_main_t *am = &api_main;
+
+ #define _(N,n) \
+ vl_msg_api_set_handlers(VL_API_##N, #n, \
+ vl_api_##n##_t_handler, \
+ vl_noop_handler, \
+ vl_api_##n##_t_endian, \
+ vl_api_##n##_t_print, \
+ sizeof(vl_api_##n##_t), 1);
+ foreach_vpe_api_msg;
+ #undef _
+
+ return 0;
+ }
+
+The key API used to establish message handlers is @ref
+vl_msg_api_set_handlers , which sets values in multiple parallel vectors
+in the @ref api_main_t structure. As of this writing: not all vector
+element values can be set through the API. You’ll see sporadic API
+message registrations followed by minor adjustments of this form:
+
+.. code:: c
+
+ /*
+ * Thread-safe API messages
+ */
+ am->is_mp_safe[VL_API_IP_ADD_DEL_ROUTE] = 1;
+ am->is_mp_safe[VL_API_GET_NODE_GRAPH] = 1;
diff --git a/src/vlibapi/api_format.c b/src/vlibapi/api_format.c
new file mode 100644
index 00000000000..f7bb9d3970f
--- /dev/null
+++ b/src/vlibapi/api_format.c
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vlibapi/api.h>
+
+u8 *
+format_vl_api_msg_text (u8 *s, va_list *args)
+{
+ api_main_t *am = va_arg (*args, api_main_t *);
+ u32 msg_id = va_arg (*args, u32);
+ void *msg = va_arg (*args, void *);
+ vl_api_msg_data_t *m = vl_api_get_msg_data (am, msg_id);
+
+ if (m->format_fn)
+ s = format (s, "%U", m->format_fn, msg);
+ else
+ s = format (s, "[format handler missing for `%s`]", m->name);
+ return s;
+}
+
+u8 *
+format_vl_api_msg_json (u8 *s, va_list *args)
+{
+ api_main_t *am = va_arg (*args, api_main_t *);
+ u32 msg_id = va_arg (*args, u32);
+ void *msg = va_arg (*args, void *);
+ vl_api_msg_data_t *m = vl_api_get_msg_data (am, msg_id);
+
+ cJSON *o = m->tojson_handler (msg);
+ char *out = cJSON_Print (o);
+
+ s = format (s, "%s", out);
+
+ cJSON_Delete (o);
+ cJSON_free (out);
+ return s;
+}
diff --git a/src/vlibapi/api_helper_macros.h b/src/vlibapi/api_helper_macros.h
index d49282e9e65..9c93d33934b 100644
--- a/src/vlibapi/api_helper_macros.h
+++ b/src/vlibapi/api_helper_macros.h
@@ -27,81 +27,100 @@
#define REPLY_MSG_ID_BASE 0
#endif
-#define REPLY_MACRO(t) \
-do { \
- vl_api_registration_t *rp; \
- rv = vl_msg_api_pd_handler (mp, rv); \
- rp = vl_api_client_index_to_registration (mp->client_index); \
- if (rp == 0) \
- return; \
- \
- rmp = vl_msg_api_alloc (sizeof (*rmp)); \
- rmp->_vl_msg_id = htons((t)+(REPLY_MSG_ID_BASE)); \
- rmp->context = mp->context; \
- rmp->retval = ntohl(rv); \
- \
- vl_api_send_msg (rp, (u8 *)rmp); \
-} while(0);
+#define _NATIVE_TO_NETWORK(t, rmp) \
+ api_main_t *am = vlibapi_get_main (); \
+ void (*endian_fp) (void *); \
+ endian_fp = am->msg_data[t + (REPLY_MSG_ID_BASE)].endian_handler; \
+ (*endian_fp) (rmp);
-#define REPLY_MACRO_END(t) \
-do { \
- vl_api_registration_t *rp; \
- rv = vl_msg_api_pd_handler (mp, rv); \
- rp = vl_api_client_index_to_registration (mp->client_index); \
- if (rp == 0) \
- return; \
- \
- rmp = vl_msg_api_alloc (sizeof (*rmp)); \
- rmp->_vl_msg_id = t+(REPLY_MSG_ID_BASE); \
- rmp->context = mp->context; \
- rmp->retval = rv; \
- api_main_t *am = vlibapi_get_main (); \
- void (*endian_fp) (void *); \
- endian_fp = am->msg_endian_handlers[t+(REPLY_MSG_ID_BASE)]; \
- (*endian_fp) (rmp); \
- vl_api_send_msg (rp, (u8 *)rmp); \
-} while(0);
+#define REPLY_MACRO(msg) \
+ do \
+ { \
+ STATIC_ASSERT ( \
+ msg##_IS_CONSTANT_SIZE, \
+ "REPLY_MACRO can only be used with constant size messages, " \
+ "use REPLY_MACRO[3|4]* instead"); \
+ vl_api_registration_t *rp; \
+ rp = vl_api_client_index_to_registration (mp->client_index); \
+ if (rp == 0) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = htons (msg + (REPLY_MSG_ID_BASE)); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl (rv); \
+ \
+ vl_api_send_msg (rp, (u8 *) rmp); \
+ } \
+ while (0);
-#define REPLY_MACRO2(t, body) \
-do { \
- vl_api_registration_t *rp; \
- rv = vl_msg_api_pd_handler (mp, rv); \
- rp = vl_api_client_index_to_registration (mp->client_index); \
- if (rp == 0) \
- return; \
- \
- rmp = vl_msg_api_alloc (sizeof (*rmp)); \
- rmp->_vl_msg_id = htons((t)+(REPLY_MSG_ID_BASE)); \
- rmp->context = mp->context; \
- rmp->retval = ntohl(rv); \
- do {body;} while (0); \
- vl_api_send_msg (rp, (u8 *)rmp); \
-} while(0);
+#define REPLY_MACRO_END(t) \
+ do \
+ { \
+ vl_api_registration_t *rp; \
+ rp = vl_api_client_index_to_registration (mp->client_index); \
+ if (rp == 0) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = t + (REPLY_MSG_ID_BASE); \
+ rmp->context = mp->context; \
+ rmp->retval = rv; \
+ _NATIVE_TO_NETWORK (t, rmp); \
+ vl_api_send_msg (rp, (u8 *) rmp); \
+ } \
+ while (0);
-#define REPLY_MACRO2_END(t, body) \
-do { \
- vl_api_registration_t *rp; \
- rv = vl_msg_api_pd_handler (mp, rv); \
- rp = vl_api_client_index_to_registration (mp->client_index); \
- if (rp == 0) \
- return; \
- \
- rmp = vl_msg_api_alloc (sizeof (*rmp)); \
- rmp->_vl_msg_id = t+(REPLY_MSG_ID_BASE); \
- rmp->context = mp->context; \
- rmp->retval = rv; \
- do {body;} while (0); \
- api_main_t *am = vlibapi_get_main (); \
- void (*endian_fp) (void *); \
- endian_fp = am->msg_endian_handlers[t+(REPLY_MSG_ID_BASE)]; \
- (*endian_fp) (rmp); \
- vl_api_send_msg (rp, (u8 *)rmp); \
-} while(0);
+#define REPLY_MACRO2(t, body) \
+ do \
+ { \
+ STATIC_ASSERT ( \
+ t##_IS_CONSTANT_SIZE, \
+ "REPLY_MACRO2 can only be used with constant size messages, " \
+ "use REPLY_MACRO[3|4]* instead"); \
+ vl_api_registration_t *rp; \
+ rp = vl_api_client_index_to_registration (mp->client_index); \
+ if (rp == 0) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = htons ((t) + (REPLY_MSG_ID_BASE)); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl (rv); \
+ do \
+ { \
+ body; \
+ } \
+ while (0); \
+ vl_api_send_msg (rp, (u8 *) rmp); \
+ } \
+ while (0);
+
+#define REPLY_MACRO2_END(t, body) \
+ do \
+ { \
+ vl_api_registration_t *rp; \
+ rp = vl_api_client_index_to_registration (mp->client_index); \
+ if (rp == 0) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = t + (REPLY_MSG_ID_BASE); \
+ rmp->context = mp->context; \
+ rmp->retval = rv; \
+ do \
+ { \
+ body; \
+ } \
+ while (0); \
+ _NATIVE_TO_NETWORK (t, rmp); \
+ vl_api_send_msg (rp, (u8 *) rmp); \
+ } \
+ while (0);
#define REPLY_MACRO2_ZERO(t, body) \
do { \
vl_api_registration_t *rp; \
- rv = vl_msg_api_pd_handler (mp, rv); \
rp = vl_api_client_index_to_registration (mp->client_index); \
if (rp == 0) \
return; \
@@ -114,10 +133,31 @@ do { \
vl_api_send_msg (rp, (u8 *)rmp); \
} while(0);
+#define REPLY_MACRO2_ZERO_END(t, body) \
+ do \
+ { \
+ vl_api_registration_t *rp; \
+ rp = vl_api_client_index_to_registration (mp->client_index); \
+ if (rp == 0) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc_zero (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ((t) + (REPLY_MSG_ID_BASE)); \
+ rmp->context = mp->context; \
+ rmp->retval = rv; \
+ do \
+ { \
+ body; \
+ } \
+ while (0); \
+ _NATIVE_TO_NETWORK (t, rmp); \
+ vl_api_send_msg (rp, (u8 *) rmp); \
+ } \
+ while (0);
+
#define REPLY_MACRO_DETAILS2(t, body) \
do { \
vl_api_registration_t *rp; \
- rv = vl_msg_api_pd_handler (mp, rv); \
rp = vl_api_client_index_to_registration (mp->client_index); \
if (rp == 0) \
return; \
@@ -129,6 +169,27 @@ do { \
vl_api_send_msg (rp, (u8 *)rmp); \
} while(0);
+#define REPLY_MACRO_DETAILS2_END(t, body) \
+ do \
+ { \
+ vl_api_registration_t *rp; \
+ rp = vl_api_client_index_to_registration (mp->client_index); \
+ if (rp == 0) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ((t) + (REPLY_MSG_ID_BASE)); \
+ rmp->context = mp->context; \
+ do \
+ { \
+ body; \
+ } \
+ while (0); \
+ _NATIVE_TO_NETWORK (t, rmp); \
+ vl_api_send_msg (rp, (u8 *) rmp); \
+ } \
+ while (0);
+
#define REPLY_MACRO_DETAILS4(t, rp, context, body) \
do { \
rmp = vl_msg_api_alloc (sizeof (*rmp)); \
@@ -138,26 +199,99 @@ do { \
vl_api_send_msg (rp, (u8 *)rmp); \
} while(0);
-#define REPLY_MACRO3(t, n, body) \
-do { \
- vl_api_registration_t *rp; \
- rv = vl_msg_api_pd_handler (mp, rv); \
- rp = vl_api_client_index_to_registration (mp->client_index); \
- if (rp == 0) \
- return; \
- \
- rmp = vl_msg_api_alloc (sizeof (*rmp) + n); \
- rmp->_vl_msg_id = htons((t)+(REPLY_MSG_ID_BASE)); \
- rmp->context = mp->context; \
- rmp->retval = ntohl(rv); \
- do {body;} while (0); \
- vl_api_send_msg (rp, (u8 *)rmp); \
-} while(0);
+#define REPLY_MACRO_DETAILS4_END(t, rp, context, body) \
+ do \
+ { \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ rmp->_vl_msg_id = ((t) + (REPLY_MSG_ID_BASE)); \
+ rmp->context = context; \
+ do \
+ { \
+ body; \
+ } \
+ while (0); \
+ _NATIVE_TO_NETWORK (t, rmp); \
+ vl_api_send_msg (rp, (u8 *) rmp); \
+ } \
+ while (0);
+
+#define REPLY_MACRO_DETAILS5(t, n, rp, context, body) \
+ do \
+ { \
+ rmp = vl_msg_api_alloc (sizeof (*rmp) + n); \
+ rmp->_vl_msg_id = htons ((t) + (REPLY_MSG_ID_BASE)); \
+ rmp->context = context; \
+ do \
+ { \
+ body; \
+ } \
+ while (0); \
+ vl_api_send_msg (rp, (u8 *) rmp); \
+ } \
+ while (0);
+
+#define REPLY_MACRO_DETAILS5_END(t, n, rp, context, body) \
+ do \
+ { \
+ rmp = vl_msg_api_alloc (sizeof (*rmp) + n); \
+ rmp->_vl_msg_id = ((t) + (REPLY_MSG_ID_BASE)); \
+ rmp->context = context; \
+ do \
+ { \
+ body; \
+ } \
+ while (0); \
+ _NATIVE_TO_NETWORK (t, rmp); \
+ vl_api_send_msg (rp, (u8 *) rmp); \
+ } \
+ while (0);
+
+#define REPLY_MACRO3(t, n, body) \
+ do \
+ { \
+ vl_api_registration_t *rp; \
+ rp = vl_api_client_index_to_registration (mp->client_index); \
+ if (rp == 0) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp) + (n)); \
+ rmp->_vl_msg_id = htons ((t) + (REPLY_MSG_ID_BASE)); \
+ rmp->context = mp->context; \
+ rmp->retval = ntohl (rv); \
+ do \
+ { \
+ body; \
+ } \
+ while (0); \
+ vl_api_send_msg (rp, (u8 *) rmp); \
+ } \
+ while (0);
+
+#define REPLY_MACRO3_END(t, n, body) \
+ do \
+ { \
+ vl_api_registration_t *rp; \
+ rp = vl_api_client_index_to_registration (mp->client_index); \
+ if (rp == 0) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp) + n); \
+ rmp->_vl_msg_id = ((t) + (REPLY_MSG_ID_BASE)); \
+ rmp->context = mp->context; \
+ rmp->retval = rv; \
+ do \
+ { \
+ body; \
+ } \
+ while (0); \
+ _NATIVE_TO_NETWORK (t, rmp); \
+ vl_api_send_msg (rp, (u8 *) rmp); \
+ } \
+ while (0);
#define REPLY_MACRO3_ZERO(t, n, body) \
do { \
vl_api_registration_t *rp; \
- rv = vl_msg_api_pd_handler (mp, rv); \
rp = vl_api_client_index_to_registration (mp->client_index); \
if (rp == 0) \
return; \
@@ -170,11 +304,32 @@ do { \
vl_api_send_msg (rp, (u8 *)rmp); \
} while(0);
+#define REPLY_MACRO3_ZERO_END(t, n, body) \
+ do \
+ { \
+ vl_api_registration_t *rp; \
+ rp = vl_api_client_index_to_registration (mp->client_index); \
+ if (rp == 0) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc_zero (sizeof (*rmp) + n); \
+ rmp->_vl_msg_id = ((t) + (REPLY_MSG_ID_BASE)); \
+ rmp->context = mp->context; \
+ rmp->retval = rv; \
+ do \
+ { \
+ body; \
+ } \
+ while (0); \
+ _NATIVE_TO_NETWORK (t, rmp); \
+ vl_api_send_msg (rp, (u8 *) rmp); \
+ } \
+ while (0);
+
#define REPLY_MACRO4(t, n, body) \
do { \
vl_api_registration_t *rp; \
u8 is_error = 0; \
- rv = vl_msg_api_pd_handler (mp, rv); \
\
rp = vl_api_client_index_to_registration (mp->client_index); \
if (rp == 0) \
@@ -201,6 +356,43 @@ do { \
vl_api_send_msg (rp, (u8 *)rmp); \
} while(0);
+#define REPLY_MACRO4_END(t, n, body) \
+ do \
+ { \
+ vl_api_registration_t *rp; \
+ u8 is_error = 0; \
+ \
+ rp = vl_api_client_index_to_registration (mp->client_index); \
+ if (rp == 0) \
+ return; \
+ \
+ rmp = vl_msg_api_alloc_or_null (sizeof (*rmp) + n); \
+ if (!rmp) \
+ { \
+ /* if there isn't enough memory, try to allocate */ \
+ /* some at least for returning an error */ \
+ rmp = vl_msg_api_alloc (sizeof (*rmp)); \
+ if (!rmp) \
+ return; \
+ \
+ clib_memset (rmp, 0, sizeof (*rmp)); \
+ rv = VNET_API_ERROR_TABLE_TOO_BIG; \
+ is_error = 1; \
+ } \
+ rmp->_vl_msg_id = ((t) + (REPLY_MSG_ID_BASE)); \
+ rmp->context = mp->context; \
+ rmp->retval = rv; \
+ if (!is_error) \
+ do \
+ { \
+ body; \
+ } \
+ while (0); \
+ _NATIVE_TO_NETWORK (t, rmp); \
+ vl_api_send_msg (rp, (u8 *) rmp); \
+ } \
+ while (0);
+
#define REPLY_AND_DETAILS_MACRO(t, p, body) \
do \
{ \
@@ -241,6 +433,46 @@ do { \
} \
while (0);
+#define REPLY_AND_DETAILS_MACRO_END(t, p, body) \
+ do \
+ { \
+ if (pool_elts (p) == 0) \
+ { \
+ REPLY_MACRO_END (t); \
+ break; \
+ } \
+ vl_api_registration_t *rp; \
+ rp = vl_api_client_index_to_registration (mp->client_index); \
+ if (rp == 0) \
+ return; \
+ u32 cursor = mp->cursor; \
+ vlib_main_t *vm = vlib_get_main (); \
+ f64 start = vlib_time_now (vm); \
+ if (pool_is_free_index (p, cursor)) \
+ { \
+ cursor = pool_next_index (p, cursor); \
+ if (cursor == ~0) \
+ rv = VNET_API_ERROR_INVALID_VALUE; \
+ } \
+ while (cursor != ~0) \
+ { \
+ do \
+ { \
+ body; \
+ } \
+ while (0); \
+ cursor = pool_next_index (p, cursor); \
+ if (vl_api_process_may_suspend (vm, rp, start)) \
+ { \
+ if (cursor != ~0) \
+ rv = VNET_API_ERROR_EAGAIN; \
+ break; \
+ } \
+ } \
+ REPLY_MACRO2_END (t, ({ rmp->cursor = cursor; })); \
+ } \
+ while (0);
+
#define REPLY_AND_DETAILS_VEC_MACRO(t, v, mp, rmp, rv, body) \
do { \
vl_api_registration_t *rp; \
@@ -269,14 +501,45 @@ do { \
})); \
} while(0);
+#define REPLY_AND_DETAILS_VEC_MACRO_END(t, v, mp, rmp, rv, body) \
+ do \
+ { \
+ vl_api_registration_t *rp; \
+ rp = vl_api_client_index_to_registration (mp->client_index); \
+ if (rp == 0) \
+ return; \
+ u32 cursor = mp->cursor; \
+ vlib_main_t *vm = vlib_get_main (); \
+ f64 start = vlib_time_now (vm); \
+ if (!v || vec_len (v) == 0) \
+ { \
+ cursor = ~0; \
+ rv = VNET_API_ERROR_INVALID_VALUE; \
+ } \
+ else if (cursor == ~0) \
+ cursor = 0; \
+ while (cursor != ~0 && cursor < vec_len (v)) \
+ { \
+ do \
+ { \
+ body; \
+ } \
+ while (0); \
+ ++cursor; \
+ if (vl_api_process_may_suspend (vm, rp, start)) \
+ { \
+ if (cursor < vec_len (v)) \
+ rv = VNET_API_ERROR_EAGAIN; \
+ break; \
+ } \
+ } \
+ REPLY_MACRO2_END (t, ({ rmp->cursor = cursor; })); \
+ } \
+ while (0);
/* "trust, but verify" */
-
-static inline uword
-vnet_sw_if_index_is_api_valid (u32 sw_if_index)
-{
- return vnet_sw_interface_is_api_valid (vnet_get_main (), sw_if_index);
-}
+#define vnet_sw_if_index_is_api_valid(sw_if_index) \
+ vnet_sw_interface_is_api_valid (vnet_get_main (), sw_if_index)
#define VALIDATE_SW_IF_INDEX(mp) \
do { u32 __sw_if_index = ntohl((mp)->sw_if_index); \
@@ -286,6 +549,17 @@ vnet_sw_if_index_is_api_valid (u32 sw_if_index)
} \
} while(0);
+#define VALIDATE_SW_IF_INDEX_END(mp) \
+ do \
+ { \
+ if (!vnet_sw_if_index_is_api_valid ((mp)->sw_if_index)) \
+ { \
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; \
+ goto bad_sw_if_index; \
+ } \
+ } \
+ while (0);
+
#define BAD_SW_IF_INDEX_LABEL \
do { \
bad_sw_if_index: \
@@ -300,6 +574,17 @@ bad_sw_if_index: \
} \
} while(0);
+#define VALIDATE_RX_SW_IF_INDEX_END(mp) \
+ do \
+ { \
+ if (!vnet_sw_if_index_is_api_valid ((mp)->rx_sw_if_index)) \
+ { \
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; \
+ goto bad_rx_sw_if_index; \
+ } \
+ } \
+ while (0);
+
#define BAD_RX_SW_IF_INDEX_LABEL \
do { \
bad_rx_sw_if_index: \
@@ -314,6 +599,17 @@ bad_rx_sw_if_index: \
} \
} while(0);
+#define VALIDATE_TX_SW_IF_INDEX_END(mp) \
+ do \
+ { \
+ if (!vnet_sw_if_index_is_api_valid (mp->tx_sw_if_index)) \
+ { \
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX; \
+ goto bad_tx_sw_if_index; \
+ } \
+ } \
+ while (0);
+
#define BAD_TX_SW_IF_INDEX_LABEL \
do { \
bad_tx_sw_if_index: \
@@ -328,68 +624,82 @@ bad_tx_sw_if_index: \
} \
} while(0);
+#define VALIDATE_BD_ID_END(mp) \
+ do \
+ { \
+ if (mp->bd_id > L2_BD_ID_MAX) \
+ { \
+ rv = VNET_API_ERROR_BD_ID_EXCEED_MAX; \
+ goto bad_bd_id; \
+ } \
+ } \
+ while (0);
+
#define BAD_BD_ID_LABEL \
do { \
bad_bd_id: \
; \
} while (0);
-#define pub_sub_handler(lca,UCA) \
-static void vl_api_want_##lca##_t_handler ( \
- vl_api_want_##lca##_t *mp) \
-{ \
- vpe_api_main_t *vam = &vpe_api_main; \
- vpe_client_registration_t *rp; \
- vl_api_want_##lca##_reply_t *rmp; \
- uword *p; \
- i32 rv = 0; \
- \
- p = hash_get (vam->lca##_registration_hash, mp->client_index); \
- if (p) { \
- if (mp->enable_disable) { \
- clib_warning ("pid %d: already enabled...", ntohl(mp->pid)); \
- rv = VNET_API_ERROR_INVALID_REGISTRATION; \
- goto reply; \
- } else { \
- rp = pool_elt_at_index (vam->lca##_registrations, p[0]); \
- pool_put (vam->lca##_registrations, rp); \
- hash_unset (vam->lca##_registration_hash, \
- mp->client_index); \
- goto reply; \
- } \
- } \
- if (mp->enable_disable == 0) { \
- clib_warning ("pid %d: already disabled...", mp->pid); \
- rv = VNET_API_ERROR_INVALID_REGISTRATION; \
- goto reply; \
- } \
- pool_get (vam->lca##_registrations, rp); \
- rp->client_index = mp->client_index; \
- rp->client_pid = mp->pid; \
- hash_set (vam->lca##_registration_hash, rp->client_index, \
- rp - vam->lca##_registrations); \
- \
-reply: \
- REPLY_MACRO (VL_API_WANT_##UCA##_REPLY); \
-} \
- \
-static clib_error_t * vl_api_want_##lca##_t_reaper (u32 client_index) \
-{ \
- vpe_api_main_t *vam = &vpe_api_main; \
- vpe_client_registration_t *rp; \
- uword *p; \
- \
- p = hash_get (vam->lca##_registration_hash, client_index); \
- if (p) \
- { \
- rp = pool_elt_at_index (vam->lca##_registrations, p[0]); \
- pool_put (vam->lca##_registrations, rp); \
- hash_unset (vam->lca##_registration_hash, client_index); \
- } \
- return (NULL); \
-} \
- \
-VL_MSG_API_REAPER_FUNCTION (vl_api_want_##lca##_t_reaper); \
+#define pub_sub_handler(lca, UCA) \
+ static void vl_api_want_##lca##_t_handler (vl_api_want_##lca##_t *mp) \
+ { \
+ vpe_api_main_t *vam = &vpe_api_main; \
+ vpe_client_registration_t *rp; \
+ vl_api_want_##lca##_reply_t *rmp; \
+ uword *p; \
+ i32 rv = 0; \
+ \
+ p = hash_get (vam->lca##_registration_hash, mp->client_index); \
+ if (p) \
+ { \
+ if (mp->enable_disable) \
+ { \
+ clib_warning ("pid %d: already enabled...", ntohl (mp->pid)); \
+ rv = VNET_API_ERROR_INVALID_REGISTRATION; \
+ goto reply; \
+ } \
+ else \
+ { \
+ rp = pool_elt_at_index (vam->lca##_registrations, p[0]); \
+ pool_put (vam->lca##_registrations, rp); \
+ hash_unset (vam->lca##_registration_hash, mp->client_index); \
+ goto reply; \
+ } \
+ } \
+ if (mp->enable_disable == 0) \
+ { \
+ clib_warning ("pid %d: already disabled...", mp->pid); \
+ rv = VNET_API_ERROR_INVALID_REGISTRATION; \
+ goto reply; \
+ } \
+ pool_get (vam->lca##_registrations, rp); \
+ rp->client_index = mp->client_index; \
+ rp->client_pid = mp->pid; \
+ hash_set (vam->lca##_registration_hash, rp->client_index, \
+ rp - vam->lca##_registrations); \
+ \
+ reply: \
+ REPLY_MACRO (VL_API_WANT_##UCA##_REPLY); \
+ } \
+ \
+ static clib_error_t *vl_api_want_##lca##_t_reaper (u32 client_index) \
+ { \
+ vpe_api_main_t *vam = &vpe_api_main; \
+ vpe_client_registration_t *rp; \
+ uword *p; \
+ \
+ p = hash_get (vam->lca##_registration_hash, client_index); \
+ if (p) \
+ { \
+ rp = pool_elt_at_index (vam->lca##_registrations, p[0]); \
+ pool_put (vam->lca##_registrations, rp); \
+ hash_unset (vam->lca##_registration_hash, client_index); \
+ } \
+ return (NULL); \
+ } \
+ \
+ VL_MSG_API_REAPER_FUNCTION (vl_api_want_##lca##_t_reaper);
#define foreach_registration_hash \
_(interface_events) \
@@ -423,7 +733,7 @@ typedef struct
/* convenience */
vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
+ struct vnet_main_t *vnet_main;
} vpe_api_main_t;
extern vpe_api_main_t vpe_api_main;
diff --git a/src/vlibapi/api_shared.c b/src/vlibapi/api_shared.c
index 65288d89f67..7de1906f17a 100644
--- a/src/vlibapi/api_shared.c
+++ b/src/vlibapi/api_shared.c
@@ -32,14 +32,11 @@
#include <vppinfra/elog.h>
#include <vppinfra/callback.h>
-/* *INDENT-OFF* */
-api_main_t api_global_main =
- {
- .region_name = "/unset",
- .api_uid = -1,
- .api_gid = -1,
- };
-/* *INDENT-ON* */
+static api_main_t api_global_main = {
+ .region_name = "/unset",
+ .api_uid = -1,
+ .api_gid = -1,
+};
/* Please use vlibapi_get_main() to access my_api_main */
__thread api_main_t *my_api_main = &api_global_main;
@@ -80,13 +77,11 @@ vl_msg_api_trace (api_main_t * am, vl_api_trace_t * tp, void *msg)
u8 **old_trace;
u8 *msg_copy;
u32 length;
- trace_cfg_t *cfgp;
u16 msg_id = clib_net_to_host_u16 (*((u16 *) msg));
+ vl_api_msg_data_t *m = vl_api_get_msg_data (am, msg_id);
msgbuf_t *header = (msgbuf_t *) (((u8 *) msg) - offsetof (msgbuf_t, data));
- cfgp = am->api_trace_cfg + msg_id;
-
- if (!cfgp || !cfgp->trace_enable)
+ if (!m || !m->trace_enable)
return;
msg_copy = 0;
@@ -212,24 +207,169 @@ vl_api_serialize_message_table (api_main_t * am, u8 * vector)
/* serialize the count */
serialize_integer (sm, nmsg, sizeof (u32));
- /* *INDENT-OFF* */
hash_foreach_pair (hp, am->msg_index_by_name_and_crc,
({
serialize_likely_small_unsigned_integer (sm, hp->value[0]);
serialize_cstring (sm, (char *) hp->key);
}));
- /* *INDENT-ON* */
return serialize_close_vector (sm);
}
+static int
+vl_msg_api_trace_write_one (api_main_t *am, u8 *msg, FILE *fp)
+{
+ u8 *tmpmem = 0;
+ int tlen, slen;
+
+ u32 msg_length = vec_len (msg);
+ vec_validate (tmpmem, msg_length - 1);
+ clib_memcpy_fast (tmpmem, msg, msg_length);
+ u16 id = clib_net_to_host_u16 (*((u16 *) msg));
+ vl_api_msg_data_t *m = vl_api_get_msg_data (am, id);
+
+ if (m && m->endian_handler)
+ {
+ m->endian_handler (tmpmem);
+ }
+
+ if (m && m->tojson_handler)
+ {
+ cJSON *o = m->tojson_handler (tmpmem);
+ char *s = cJSON_Print (o);
+ slen = strlen (s);
+ tlen = fwrite (s, 1, slen, fp);
+ cJSON_free (s);
+ cJSON_Delete (o);
+ vec_free (tmpmem);
+ if (tlen != slen)
+ {
+ fformat (stderr, "writing to file error\n");
+ return -11;
+ }
+ }
+ else
+ fformat (stderr, " [no registered tojson fn]\n");
+
+ return 0;
+}
+
+#define vl_msg_fwrite(_s, _f) fwrite (_s, 1, sizeof (_s) - 1, _f)
+
+typedef struct
+{
+ FILE *fp;
+ u32 n_traces;
+ u32 i;
+} vl_msg_write_json_args_t;
+
+static int
+vl_msg_write_json_fn (u8 *msg, void *ctx)
+{
+ vl_msg_write_json_args_t *arg = ctx;
+ FILE *fp = arg->fp;
+ api_main_t *am = vlibapi_get_main ();
+ int rc = vl_msg_api_trace_write_one (am, msg, fp);
+ if (rc < 0)
+ return rc;
+
+ if (arg->i < arg->n_traces - 1)
+ vl_msg_fwrite (",\n", fp);
+ arg->i++;
+ return 0;
+}
+
+static int
+vl_msg_api_trace_write_json (api_main_t *am, vl_api_trace_t *tp, FILE *fp)
+{
+ vl_msg_write_json_args_t args;
+ clib_memset (&args, 0, sizeof (args));
+ args.fp = fp;
+ args.n_traces = vec_len (tp->traces);
+ vl_msg_fwrite ("[\n", fp);
+
+ int rv = vl_msg_traverse_trace (tp, vl_msg_write_json_fn, &args);
+ if (rv < 0)
+ return rv;
+
+ vl_msg_fwrite ("]", fp);
+ return 0;
+}
+
int
-vl_msg_api_trace_save (api_main_t * am, vl_api_trace_which_t which, FILE * fp)
+vl_msg_traverse_trace (vl_api_trace_t *tp, vl_msg_traverse_trace_fn fn,
+ void *ctx)
{
- vl_api_trace_t *tp;
- vl_api_trace_file_header_t fh;
int i;
u8 *msg;
+ int rv = 0;
+
+ /* No-wrap case */
+ if (tp->wrapped == 0)
+ {
+ for (i = 0; i < vec_len (tp->traces); i++)
+ {
+ /*sa_ignore NO_NULL_CHK */
+ msg = tp->traces[i];
+ if (!msg)
+ continue;
+
+ rv = fn (msg, ctx);
+ if (rv < 0)
+ return rv;
+ }
+ }
+ else
+ {
+ /* Wrap case: write oldest -> end of buffer */
+ for (i = tp->curindex; i < vec_len (tp->traces); i++)
+ {
+ msg = tp->traces[i];
+ if (!msg)
+ continue;
+
+ rv = fn (msg, ctx);
+ if (rv < 0)
+ return rv;
+ }
+ /* write beginning of buffer -> oldest-1 */
+ for (i = 0; i < tp->curindex; i++)
+ {
+ /*sa_ignore NO_NULL_CHK */
+ msg = tp->traces[i];
+ if (!msg)
+ continue;
+
+ rv = fn (msg, ctx);
+ if (rv < 0)
+ return rv;
+ }
+ }
+ return 0;
+}
+
+static int
+vl_api_msg_write_fn (u8 *msg, void *ctx)
+{
+ FILE *fp = ctx;
+ u32 msg_length = clib_host_to_net_u32 (vec_len (msg));
+ if (fwrite (&msg_length, 1, sizeof (msg_length), fp) != sizeof (msg_length))
+ {
+ return (-14);
+ }
+ if (fwrite (msg, 1, vec_len (msg), fp) != vec_len (msg))
+ {
+ return (-14);
+ }
+ return 0;
+}
+
+int
+vl_msg_api_trace_save (api_main_t *am, vl_api_trace_which_t which, FILE *fp,
+ u8 is_json)
+{
+ vl_api_trace_t *tp;
+ vl_api_trace_file_header_t fh;
switch (which)
{
@@ -256,9 +396,13 @@ vl_msg_api_trace_save (api_main_t * am, vl_api_trace_which_t which, FILE * fp)
return -2;
}
+ if (is_json)
+ return vl_msg_api_trace_write_json (am, tp, fp);
+
/* Write the file header */
fh.wrapped = tp->wrapped;
fh.nitems = clib_host_to_net_u32 (vec_len (tp->traces));
+
u8 *m = vl_api_serialize_message_table (am, 0);
fh.msgtbl_size = clib_host_to_net_u32 (vec_len (m));
@@ -274,92 +418,7 @@ vl_msg_api_trace_save (api_main_t * am, vl_api_trace_which_t which, FILE * fp)
}
vec_free (m);
- /* No-wrap case */
- if (tp->wrapped == 0)
- {
- /*
- * Note: vec_len return 0 when fed a NULL pointer.
- * Unfortunately, the static analysis tool doesn't
- * figure it out, hence the suppressed warnings.
- * What a great use of my time.
- */
- for (i = 0; i < vec_len (tp->traces); i++)
- {
- u32 msg_length;
- /*sa_ignore NO_NULL_CHK */
- msg = tp->traces[i];
- /*
- * This retarded check required to pass
- * [sic] SA-checking.
- */
- if (!msg)
- continue;
-
- msg_length = clib_host_to_net_u32 (vec_len (msg));
- if (fwrite (&msg_length, 1, sizeof (msg_length), fp)
- != sizeof (msg_length))
- {
- return (-14);
- }
- if (fwrite (msg, 1, vec_len (msg), fp) != vec_len (msg))
- {
- return (-11);
- }
- }
- }
- else
- {
- /* Wrap case: write oldest -> end of buffer */
- for (i = tp->curindex; i < vec_len (tp->traces); i++)
- {
- u32 msg_length;
- msg = tp->traces[i];
- /*
- * This retarded check required to pass
- * [sic] SA-checking
- */
- if (!msg)
- continue;
-
- msg_length = clib_host_to_net_u32 (vec_len (msg));
- if (fwrite (&msg_length, 1, sizeof (msg_length), fp)
- != sizeof (msg_length))
- {
- return (-14);
- }
-
- if (fwrite (msg, 1, vec_len (msg), fp) != vec_len (msg))
- {
- return (-12);
- }
- }
- /* write beginning of buffer -> oldest-1 */
- for (i = 0; i < tp->curindex; i++)
- {
- u32 msg_length;
- /*sa_ignore NO_NULL_CHK */
- msg = tp->traces[i];
- /*
- * This retarded check required to pass
- * [sic] SA-checking
- */
- if (!msg)
- continue;
-
- msg_length = clib_host_to_net_u32 (vec_len (msg));
- if (fwrite (&msg_length, 1, sizeof (msg_length), fp)
- != sizeof (msg_length))
- {
- return (-14);
- }
-
- if (fwrite (msg, 1, vec_len (msg), fp) != vec_len (msg))
- {
- return (-13);
- }
- }
- }
- return 0;
+ return vl_msg_traverse_trace (tp, vl_api_msg_write_fn, fp);
}
int
@@ -434,76 +493,87 @@ vl_msg_api_barrier_release (void)
}
always_inline void
-msg_handler_internal (api_main_t * am,
- void *the_msg, int trace_it, int do_it, int free_it)
+msg_handler_internal (api_main_t *am, void *the_msg, uword msg_len,
+ int trace_it, int do_it, int free_it)
{
u16 id = clib_net_to_host_u16 (*((u16 *) the_msg));
- u8 *(*print_fp) (void *, void *);
+ vl_api_msg_data_t *m = vl_api_get_msg_data (am, id);
if (PREDICT_FALSE (am->elog_trace_api_messages))
{
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "api-msg: %s",
.format_args = "T4",
};
- /* *INDENT-ON* */
struct
{
u32 c;
} *ed;
ed = ELOG_DATA (am->elog_main, e);
- if (id < vec_len (am->msg_names) && am->msg_names[id])
- ed->c = elog_string (am->elog_main, (char *) am->msg_names[id]);
+ if (m && m->name)
+ ed->c = elog_string (am->elog_main, (char *) m->name);
else
ed->c = elog_string (am->elog_main, "BOGUS");
}
- if (id < vec_len (am->msg_handlers) && am->msg_handlers[id])
+ if (m && m->handler)
{
if (trace_it)
vl_msg_api_trace (am, am->rx_trace, the_msg);
if (am->msg_print_flag)
{
- fformat (stdout, "[%d]: %s\n", id, am->msg_names[id]);
- print_fp = (void *) am->msg_print_handlers[id];
- if (print_fp == 0)
- {
- fformat (stdout, " [no registered print fn]\n");
- }
- else
+ fformat (stdout, "[%d]: %s\n", id, m->name);
+ fformat (stdout, "%U", format_vl_api_msg_text, am, id, the_msg);
+ }
+
+ uword calc_size = 0;
+ ASSERT (NULL != m->calc_size_func);
+ if (m->calc_size_func)
+ {
+ calc_size = m->calc_size_func (the_msg);
+ if (calc_size > msg_len)
{
- (*print_fp) (the_msg, stdout);
+ clib_warning (
+ "Truncated message '%s' (id %u) received, calculated size "
+ "%lu is bigger than actual size %llu, message dropped.",
+ m->name, id, calc_size, msg_len);
}
}
+ else
+ {
+ clib_warning ("Message '%s' (id %u) has NULL calc_size_func, cannot "
+ "verify message size is correct",
+ m->name, id);
+ }
- if (do_it)
+ /* don't process message if it's truncated, otherwise byte swaps
+ * and stuff could corrupt memory even beyond message if it's malicious
+ * e.g. VLA length field set to 1M elements, but VLA empty */
+ if (do_it && calc_size <= msg_len)
{
- if (!am->is_mp_safe[id])
+
+ if (!m->is_mp_safe)
{
vl_msg_api_barrier_trace_context (am->msg_names[id]);
vl_msg_api_barrier_sync ();
}
- if (am->is_autoendian[id])
- {
- void (*endian_fp) (void *);
- endian_fp = am->msg_endian_handlers[id];
- (*endian_fp) (the_msg);
- }
+ if (m->is_autoendian)
+ m->endian_handler (the_msg);
if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0))
clib_call_callbacks (am->perf_counter_cbs, am, id,
0 /* before */ );
- (*am->msg_handlers[id]) (the_msg);
+ m->handler (the_msg);
if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0))
clib_call_callbacks (am->perf_counter_cbs, am, id,
1 /* after */ );
- if (!am->is_mp_safe[id])
+
+ if (!m->is_mp_safe)
vl_msg_api_barrier_release ();
}
}
@@ -517,7 +587,6 @@ msg_handler_internal (api_main_t * am,
if (PREDICT_FALSE (am->elog_trace_api_messages))
{
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "api-msg-done(%s): %s",
@@ -529,7 +598,6 @@ msg_handler_internal (api_main_t * am,
"mp-safe",
}
};
- /* *INDENT-ON* */
struct
{
@@ -537,10 +605,10 @@ msg_handler_internal (api_main_t * am,
u32 c;
} *ed;
ed = ELOG_DATA (am->elog_main, e);
- if (id < vec_len (am->msg_names) && am->msg_names[id])
+ if (m && m->name)
{
- ed->c = elog_string (am->elog_main, (char *) am->msg_names[id]);
- ed->barrier = !am->is_mp_safe[id];
+ ed->c = elog_string (am->elog_main, (char *) m->name);
+ ed->barrier = !m->is_mp_safe;
}
else
{
@@ -550,183 +618,31 @@ msg_handler_internal (api_main_t * am,
}
}
-void (*vl_msg_api_fuzz_hook) (u16, void *);
-
-/* This is only to be called from a vlib/vnet app */
void
-vl_msg_api_handler_with_vm_node (api_main_t * am, svm_region_t * vlib_rp,
- void *the_msg, vlib_main_t * vm,
- vlib_node_runtime_t * node, u8 is_private)
-{
- u16 id = clib_net_to_host_u16 (*((u16 *) the_msg));
- u8 *(*handler) (void *, void *, void *);
- u8 *(*print_fp) (void *, void *);
- svm_region_t *old_vlib_rp;
- void *save_shmem_hdr;
- int is_mp_safe = 1;
-
- if (PREDICT_FALSE (am->elog_trace_api_messages))
- {
- /* *INDENT-OFF* */
- ELOG_TYPE_DECLARE (e) =
- {
- .format = "api-msg: %s",
- .format_args = "T4",
- };
- /* *INDENT-ON* */
- struct
- {
- u32 c;
- } *ed;
- ed = ELOG_DATA (am->elog_main, e);
- if (id < vec_len (am->msg_names) && am->msg_names[id])
- ed->c = elog_string (am->elog_main, (char *) am->msg_names[id]);
- else
- ed->c = elog_string (am->elog_main, "BOGUS");
- }
-
- if (id < vec_len (am->msg_handlers) && am->msg_handlers[id])
- {
- handler = (void *) am->msg_handlers[id];
-
- if (PREDICT_FALSE (am->rx_trace && am->rx_trace->enabled))
- vl_msg_api_trace (am, am->rx_trace, the_msg);
-
- if (PREDICT_FALSE (am->msg_print_flag))
- {
- fformat (stdout, "[%d]: %s\n", id, am->msg_names[id]);
- print_fp = (void *) am->msg_print_handlers[id];
- if (print_fp == 0)
- {
- fformat (stdout, " [no registered print fn for msg %d]\n", id);
- }
- else
- {
- (*print_fp) (the_msg, vm);
- }
- }
- is_mp_safe = am->is_mp_safe[id];
-
- if (!is_mp_safe)
- {
- vl_msg_api_barrier_trace_context (am->msg_names[id]);
- vl_msg_api_barrier_sync ();
- }
- if (is_private)
- {
- old_vlib_rp = am->vlib_rp;
- save_shmem_hdr = am->shmem_hdr;
- am->vlib_rp = vlib_rp;
- am->shmem_hdr = (void *) vlib_rp->user_ctx;
- }
-
- if (PREDICT_FALSE (vl_msg_api_fuzz_hook != 0))
- (*vl_msg_api_fuzz_hook) (id, the_msg);
-
- if (am->is_autoendian[id])
- {
- void (*endian_fp) (void *);
- endian_fp = am->msg_endian_handlers[id];
- (*endian_fp) (the_msg);
- }
- if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0))
- clib_call_callbacks (am->perf_counter_cbs, am, id, 0 /* before */ );
-
- (*handler) (the_msg, vm, node);
-
- if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0))
- clib_call_callbacks (am->perf_counter_cbs, am, id, 1 /* after */ );
- if (is_private)
- {
- am->vlib_rp = old_vlib_rp;
- am->shmem_hdr = save_shmem_hdr;
- }
- if (!is_mp_safe)
- vl_msg_api_barrier_release ();
- }
- else
- {
- clib_warning ("no handler for msg id %d", id);
- }
-
- /*
- * Special-case, so we can e.g. bounce messages off the vnet
- * main thread without copying them...
- */
- if (id >= vec_len (am->message_bounce) || !(am->message_bounce[id]))
- {
- if (is_private)
- {
- old_vlib_rp = am->vlib_rp;
- save_shmem_hdr = am->shmem_hdr;
- am->vlib_rp = vlib_rp;
- am->shmem_hdr = (void *) vlib_rp->user_ctx;
- }
- vl_msg_api_free (the_msg);
- if (is_private)
- {
- am->vlib_rp = old_vlib_rp;
- am->shmem_hdr = save_shmem_hdr;
- }
- }
-
- if (PREDICT_FALSE (am->elog_trace_api_messages))
- {
- /* *INDENT-OFF* */
- ELOG_TYPE_DECLARE (e) =
- {
- .format = "api-msg-done(%s): %s",
- .format_args = "t4T4",
- .n_enum_strings = 2,
- .enum_strings =
- {
- "barrier",
- "mp-safe",
- }
- };
- /* *INDENT-ON* */
-
- struct
- {
- u32 barrier;
- u32 c;
- } *ed;
- ed = ELOG_DATA (am->elog_main, e);
- if (id < vec_len (am->msg_names) && am->msg_names[id])
- ed->c = elog_string (am->elog_main, (char *) am->msg_names[id]);
- else
- ed->c = elog_string (am->elog_main, "BOGUS");
- ed->barrier = is_mp_safe;
- }
-}
-
-void
-vl_msg_api_handler (void *the_msg)
+vl_msg_api_handler (void *the_msg, uword msg_len)
{
api_main_t *am = vlibapi_get_main ();
- msg_handler_internal (am, the_msg,
- (am->rx_trace
- && am->rx_trace->enabled) /* trace_it */ ,
- 1 /* do_it */ , 1 /* free_it */ );
+ msg_handler_internal (am, the_msg, msg_len,
+ (am->rx_trace && am->rx_trace->enabled) /* trace_it */,
+ 1 /* do_it */, 1 /* free_it */);
}
void
-vl_msg_api_handler_no_free (void *the_msg)
+vl_msg_api_handler_no_free (void *the_msg, uword msg_len)
{
api_main_t *am = vlibapi_get_main ();
- msg_handler_internal (am, the_msg,
- (am->rx_trace
- && am->rx_trace->enabled) /* trace_it */ ,
- 1 /* do_it */ , 0 /* free_it */ );
+ msg_handler_internal (am, the_msg, msg_len,
+ (am->rx_trace && am->rx_trace->enabled) /* trace_it */,
+ 1 /* do_it */, 0 /* free_it */);
}
void
-vl_msg_api_handler_no_trace_no_free (void *the_msg)
+vl_msg_api_handler_no_trace_no_free (void *the_msg, uword msg_len)
{
api_main_t *am = vlibapi_get_main ();
- msg_handler_internal (am, the_msg, 0 /* trace_it */ , 1 /* do_it */ ,
- 0 /* free_it */ );
+ msg_handler_internal (am, the_msg, msg_len, 0 /* trace_it */, 1 /* do_it */,
+ 0 /* free_it */);
}
/*
@@ -739,14 +655,13 @@ vl_msg_api_handler_no_trace_no_free (void *the_msg)
*
*/
void
-vl_msg_api_trace_only (void *the_msg)
+vl_msg_api_trace_only (void *the_msg, uword msg_len)
{
api_main_t *am = vlibapi_get_main ();
- msg_handler_internal (am, the_msg,
- (am->rx_trace
- && am->rx_trace->enabled) /* trace_it */ ,
- 0 /* do_it */ , 0 /* free_it */ );
+ msg_handler_internal (am, the_msg, msg_len,
+ (am->rx_trace && am->rx_trace->enabled) /* trace_it */,
+ 0 /* do_it */, 0 /* free_it */);
}
void
@@ -754,14 +669,16 @@ vl_msg_api_cleanup_handler (void *the_msg)
{
api_main_t *am = vlibapi_get_main ();
u16 id = clib_net_to_host_u16 (*((u16 *) the_msg));
+ vl_api_msg_data_t *m = vl_api_get_msg_data (am, id);
- if (PREDICT_FALSE (id >= vec_len (am->msg_cleanup_handlers)))
+ if (PREDICT_FALSE (!m))
{
clib_warning ("_vl_msg_id too large: %d\n", id);
return;
}
- if (am->msg_cleanup_handlers[id])
- (*am->msg_cleanup_handlers[id]) (the_msg);
+
+ if (m->cleanup_handler)
+ m->cleanup_handler (the_msg);
vl_msg_api_free (the_msg);
}
@@ -773,17 +690,17 @@ void
vl_msg_api_replay_handler (void *the_msg)
{
api_main_t *am = vlibapi_get_main ();
-
u16 id = clib_net_to_host_u16 (*((u16 *) the_msg));
+ vl_api_msg_data_t *m = vl_api_get_msg_data (am, id);
- if (PREDICT_FALSE (id >= vec_len (am->msg_handlers)))
+ if (PREDICT_FALSE (!m))
{
clib_warning ("_vl_msg_id too large: %d\n", id);
return;
}
/* do NOT trace the message... */
- if (am->msg_handlers[id])
- (*am->msg_handlers[id]) (the_msg);
+ if (m->handler)
+ m->handler (the_msg);
/* do NOT free the message buffer... */
}
@@ -797,31 +714,20 @@ vl_msg_api_get_msg_length (void *msg_arg)
* vl_msg_api_socket_handler
*/
void
-vl_msg_api_socket_handler (void *the_msg)
+vl_msg_api_socket_handler (void *the_msg, uword msg_len)
{
api_main_t *am = vlibapi_get_main ();
- msg_handler_internal (am, the_msg,
- (am->rx_trace
- && am->rx_trace->enabled) /* trace_it */ ,
- 1 /* do_it */ , 0 /* free_it */ );
+ msg_handler_internal (am, the_msg, msg_len,
+ (am->rx_trace && am->rx_trace->enabled) /* trace_it */,
+ 1 /* do_it */, 0 /* free_it */);
}
-#define foreach_msg_api_vector \
-_(msg_names) \
-_(msg_handlers) \
-_(msg_cleanup_handlers) \
-_(msg_endian_handlers) \
-_(msg_print_handlers) \
-_(api_trace_cfg) \
-_(message_bounce) \
-_(is_mp_safe) \
-_(is_autoendian)
-
void
vl_msg_api_config (vl_msg_api_msg_config_t * c)
{
api_main_t *am = vlibapi_get_main ();
+ vl_api_msg_data_t *m;
/*
* This happens during the java core tests if the message
@@ -840,55 +746,34 @@ vl_msg_api_config (vl_msg_api_msg_config_t * c)
return;
}
-#define _(a) vec_validate (am->a, c->id);
- foreach_msg_api_vector;
-#undef _
-
- if (am->msg_handlers[c->id] && am->msg_handlers[c->id] != c->handler)
- clib_warning
- ("BUG: re-registering 'vl_api_%s_t_handler'."
- "Handler was %llx, replaced by %llx",
- c->name, am->msg_handlers[c->id], c->handler);
-
- am->msg_names[c->id] = c->name;
- am->msg_handlers[c->id] = c->handler;
- am->msg_cleanup_handlers[c->id] = c->cleanup;
- am->msg_endian_handlers[c->id] = c->endian;
- am->msg_print_handlers[c->id] = c->print;
- am->message_bounce[c->id] = c->message_bounce;
- am->is_mp_safe[c->id] = c->is_mp_safe;
- am->is_autoendian[c->id] = c->is_autoendian;
-
- am->api_trace_cfg[c->id].size = c->size;
- am->api_trace_cfg[c->id].trace_enable = c->traced;
- am->api_trace_cfg[c->id].replay_enable = c->replay;
-}
-
-/*
- * vl_msg_api_set_handlers
- * preserve the old API for a while
- */
-void
-vl_msg_api_set_handlers (int id, char *name, void *handler, void *cleanup,
- void *endian, void *print, int size, int traced)
-{
- vl_msg_api_msg_config_t cfg;
- vl_msg_api_msg_config_t *c = &cfg;
-
- clib_memset (c, 0, sizeof (*c));
-
- c->id = id;
- c->name = name;
- c->handler = handler;
- c->cleanup = cleanup;
- c->endian = endian;
- c->print = print;
- c->traced = traced;
- c->replay = 1;
- c->message_bounce = 0;
- c->is_mp_safe = 0;
- c->is_autoendian = 0;
- vl_msg_api_config (c);
+ vec_validate (am->msg_data, c->id);
+ m = vl_api_get_msg_data (am, c->id);
+
+ if (m->handler && m->handler != c->handler)
+ clib_warning ("BUG: re-registering 'vl_api_%s_t_handler'."
+ "Handler was %llx, replaced by %llx",
+ c->name, m->handler, c->handler);
+
+ m->name = c->name;
+ m->handler = c->handler;
+ m->cleanup_handler = c->cleanup;
+ m->endian_handler = c->endian;
+ m->format_fn = c->format_fn;
+ m->tojson_handler = c->tojson;
+ m->fromjson_handler = c->fromjson;
+ m->calc_size_func = c->calc_size;
+ m->bounce = c->message_bounce;
+ m->is_mp_safe = c->is_mp_safe;
+ m->is_autoendian = c->is_autoendian;
+
+ m->trace_size = c->size;
+ m->trace_enable = c->traced;
+ m->replay_allowed = c->replay;
+
+ if (!am->msg_id_by_name)
+ am->msg_id_by_name = hash_create_string (0, sizeof (uword));
+
+ hash_set_mem (am->msg_id_by_name, c->name, c->id);
}
void
@@ -907,10 +792,10 @@ void
vl_msg_api_set_cleanup_handler (int msg_id, void *fp)
{
api_main_t *am = vlibapi_get_main ();
+ vl_api_msg_data_t *m = vl_api_get_msg_data (am, msg_id);
ASSERT (msg_id > 0);
- vec_validate (am->msg_cleanup_handlers, msg_id);
- am->msg_cleanup_handlers[msg_id] = fp;
+ m->cleanup_handler = fp;
}
void
@@ -918,8 +803,12 @@ vl_msg_api_queue_handler (svm_queue_t * q)
{
uword msg;
- while (!svm_queue_sub (q, (u8 *) & msg, SVM_Q_WAIT, 0))
- vl_msg_api_handler ((void *) msg);
+ while (!svm_queue_sub (q, (u8 *) &msg, SVM_Q_WAIT, 0))
+ {
+ VL_MSG_API_UNPOISON ((u8 *) msg);
+ msgbuf_t *msgbuf = (msgbuf_t *) ((u8 *) msg - offsetof (msgbuf_t, data));
+ vl_msg_api_handler ((void *) msg, ntohl (msgbuf->data_len));
+ }
}
u32
@@ -951,12 +840,6 @@ vl_msg_api_trace_get (api_main_t * am, vl_api_trace_which_t which)
}
}
-void
-vl_noop_handler (void *mp)
-{
-}
-
-
static u8 post_mortem_dump_enabled;
void
@@ -987,7 +870,7 @@ vl_msg_api_post_mortem_dump (void)
rv = write (2, "\n", 1);
return;
}
- rv = vl_msg_api_trace_save (am, VL_API_TRACE_RX, fp);
+ rv = vl_msg_api_trace_save (am, VL_API_TRACE_RX, fp, 0);
fclose (fp);
if (rv < 0)
{
@@ -1001,41 +884,6 @@ vl_msg_api_post_mortem_dump (void)
/* Layered message handling support */
void
-vl_msg_api_register_pd_handler (void *fp, u16 msg_id_host_byte_order)
-{
- api_main_t *am = vlibapi_get_main ();
-
- /* Mild idiot proofing */
- if (msg_id_host_byte_order > 10000)
- clib_warning ("msg_id_host_byte_order endian issue? %d arg vs %d",
- msg_id_host_byte_order,
- clib_net_to_host_u16 (msg_id_host_byte_order));
- vec_validate (am->pd_msg_handlers, msg_id_host_byte_order);
- am->pd_msg_handlers[msg_id_host_byte_order] = fp;
-}
-
-int
-vl_msg_api_pd_handler (void *mp, int rv)
-{
- api_main_t *am = vlibapi_get_main ();
- int (*fp) (void *, int);
- u16 msg_id;
-
- if (clib_arch_is_little_endian)
- msg_id = clib_net_to_host_u16 (*((u16 *) mp));
- else
- msg_id = *((u16 *) mp);
-
- if (msg_id >= vec_len (am->pd_msg_handlers)
- || am->pd_msg_handlers[msg_id] == 0)
- return rv;
-
- fp = am->pd_msg_handlers[msg_id];
- rv = (*fp) (mp, rv);
- return rv;
-}
-
-void
vl_msg_api_set_first_available_msg_id (u16 first_avail)
{
api_main_t *am = vlibapi_get_main ();
@@ -1204,8 +1052,9 @@ vl_api_from_api_to_new_vec (void *mp, vl_api_string_t * astr)
u8 *v = 0;
if (vl_msg_api_max_length (mp) < clib_net_to_host_u32 (astr->length))
- return format (0, "insane astr->length %u%c",
- clib_net_to_host_u32 (astr->length), 0);
+ return format (0, "Invalid astr->length %u > max (%u)%c",
+ clib_net_to_host_u32 (astr->length),
+ vl_msg_api_max_length (mp), 0);
vec_add (v, astr->buf, clib_net_to_host_u32 (astr->length));
return v;
}
diff --git a/src/vlibmemory/memory_shared.c b/src/vlibapi/memory_shared.c
index 1716f271466..efa45e1f5b5 100644
--- a/src/vlibmemory/memory_shared.c
+++ b/src/vlibapi/memory_shared.c
@@ -43,8 +43,8 @@
#define DEBUG_MESSAGE_BUFFER_OVERRUN 0
-CLIB_NOSANITIZE_ADDR static inline void *
-vl_msg_api_alloc_internal (svm_region_t * vlib_rp, int nbytes, int pool,
+__clib_nosanitize_addr static inline void *
+vl_msg_api_alloc_internal (svm_region_t *vlib_rp, int nbytes, int pool,
int may_return_null)
{
int i;
@@ -117,14 +117,16 @@ vl_msg_api_alloc_internal (svm_region_t * vlib_rp, int nbytes, int pool,
if (CLIB_DEBUG > 0)
{
u16 *msg_idp, msg_id;
+ vl_api_msg_data_t *m;
clib_warning
("garbage collect pool %d ring %d index %d", pool, i,
q->head);
msg_idp = (u16 *) (rv->data);
msg_id = clib_net_to_host_u16 (*msg_idp);
- if (msg_id < vec_len (vlibapi_get_main ()->msg_names))
+ m = vl_api_get_msg_data (am, msg_id);
+ if (m)
clib_warning ("msg id %d name %s", (u32) msg_id,
- vlibapi_get_main ()->msg_names[msg_id]);
+ m->name);
}
shmem_hdr->garbage_collects++;
goto collected;
@@ -545,8 +547,8 @@ vl_map_shmem (const char *region_name, int is_vlib)
if (strstr (region_name, vpe_api_region_suffix))
{
u8 *root_path = format (0, "%s", region_name);
- _vec_len (root_path) = (vec_len (root_path) -
- strlen (vpe_api_region_suffix));
+ vec_set_len (root_path,
+ vec_len (root_path) - strlen (vpe_api_region_suffix));
vec_terminate_c_string (root_path);
a->root_path = (const char *) root_path;
am->root_path = (const char *) root_path;
@@ -731,10 +733,7 @@ vl_unmap_shmem_internal (u8 is_client)
is_client ? svm_region_exit_client () : svm_region_exit ();
- /* $$$ more careful cleanup, valgrind run... */
- vec_free (am->msg_handlers);
- vec_free (am->msg_endian_handlers);
- vec_free (am->msg_print_handlers);
+ vec_free (am->msg_data);
}
void
@@ -768,13 +767,11 @@ vl_msg_api_send_shmem (svm_queue_t * q, u8 * elem)
{
if (PREDICT_FALSE (am->elog_trace_api_messages))
{
- /* *INDENT-OFF* */
ELOG_TYPE_DECLARE (e) =
{
.format = "api-client-queue-stuffed: %x%x",
.format_args = "i4i4",
};
- /* *INDENT-ON* */
struct
{
u32 hi, low;
diff --git a/src/vlibmemory/memory_shared.h b/src/vlibapi/memory_shared.h
index 4c4773d060b..4c4773d060b 100644
--- a/src/vlibmemory/memory_shared.h
+++ b/src/vlibapi/memory_shared.h
diff --git a/src/vlibapi/node_serialize.c b/src/vlibapi/node_serialize.c
index b50d79e2922..09b59247eab 100644
--- a/src/vlibapi/node_serialize.c
+++ b/src/vlibapi/node_serialize.c
@@ -176,7 +176,7 @@ vlib_node_unserialize (u8 * vector)
nstat_vms = unserialize_likely_small_unsigned_integer (sm);
vec_validate (nodes_by_thread, nstat_vms - 1);
- _vec_len (nodes_by_thread) = 0;
+ vec_set_len (nodes_by_thread, 0);
for (i = 0; i < nstat_vms; i++)
{
@@ -326,13 +326,11 @@ test_node_serialize_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_node_serialize_node, static) = {
.path = "test node serialize",
.short_help = "test node serialize [max-threads NN] nexts stats",
.function = test_node_serialize_command_fn,
};
-/* *INDENT-ON* */
#endif
/*
diff --git a/src/vlibapi/vat_helper_macros.h b/src/vlibapi/vat_helper_macros.h
index 239e2091dc1..1dd597cce96 100644
--- a/src/vlibapi/vat_helper_macros.h
+++ b/src/vlibapi/vat_helper_macros.h
@@ -62,6 +62,22 @@ do { \
mp->client_index = vam->my_client_index; \
} while(0);
+#define PING(_tm, mp_ping) \
+ do \
+ { \
+ socket_client_main_t *scm = vam->socket_client_main; \
+ if (scm && scm->socket_enable) \
+ mp_ping = vl_socket_client_msg_alloc (sizeof (*mp_ping)); \
+ else \
+ mp_ping = vl_msg_api_alloc_as_if_client (sizeof (*mp_ping)); \
+ mp_ping->_vl_msg_id = htons (VL_API_CONTROL_PING + 1); \
+ mp_ping->client_index = vam->my_client_index; \
+ vam->result_ready = 0; \
+ if (scm) \
+ scm->control_pings_outstanding++; \
+ } \
+ while (0);
+
/* S: send a message */
#define S(mp) \
do { \
diff --git a/src/vlibmemory/CMakeLists.txt b/src/vlibmemory/CMakeLists.txt
index b48ff7b5766..2ee4dd08ba9 100644
--- a/src/vlibmemory/CMakeLists.txt
+++ b/src/vlibmemory/CMakeLists.txt
@@ -14,18 +14,13 @@
add_vpp_library (vlibmemory
SOURCES
memory_api.c
- memory_shared.c
- memory_client.c
- socket_client.c
socket_api.c
- vlib_api.c
+ memclnt_api.c
vlib_api_cli.c
- ../vlibapi/api_shared.c
- ../vlibapi/node_serialize.c
+ vlib_api.c
INSTALL_HEADERS
vl_memory_msg_enum.h
- memory_shared.h
vl_memory_api_h.h
socket_client.h
memory_api.h
@@ -35,19 +30,30 @@ add_vpp_library (vlibmemory
API_FILES
memclnt.api
+ vlib.api
- LINK_LIBRARIES vppinfra svm vlib
+ LINK_LIBRARIES vppinfra svm vlib vlibapi
)
add_dependencies(vlibmemory vlibmemory_api_headers)
add_vpp_library (vlibmemoryclient
SOURCES
- memory_shared.c
memory_client.c
socket_client.c
- ../vlibapi/api_shared.c
- ../vlibapi/node_serialize.c
- LINK_LIBRARIES vppinfra svm
+ LINK_LIBRARIES vppinfra svm vlibapi
)
add_dependencies(vlibmemoryclient vlibmemory_api_headers)
+
+add_vat_test_library(vlib
+ vlibapi_test.c
+)
+##############################################################################
+# VAT2 plugins
+##############################################################################
+add_vpp_test_library(vlibmemoryclient
+ memclnt.api
+)
+add_vpp_test_library(vlibmemory
+ vlib.api
+)
diff --git a/src/vlibmemory/api.h b/src/vlibmemory/api.h
index 662805373ba..273cd2cee10 100644
--- a/src/vlibmemory/api.h
+++ b/src/vlibmemory/api.h
@@ -28,7 +28,6 @@
void vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
void vl_api_force_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
u16 vl_client_get_first_plugin_msg_id (const char *plugin_name);
-void vl_api_send_pending_rpc_requests (vlib_main_t * vm);
u8 *vl_api_serialize_message_table (api_main_t * am, u8 * vector);
always_inline void
diff --git a/src/vlibmemory/memclnt.api b/src/vlibmemory/memclnt.api
index 07c6d47b9fc..dc0f4e1c8a7 100644
--- a/src/vlibmemory/memclnt.api
+++ b/src/vlibmemory/memclnt.api
@@ -27,10 +27,11 @@ service {
};
/*
- * Create a client registration
+ * Create a client registration
*/
-manual_print
define memclnt_create {
+ option deprecated;
+
u32 context; /* opaque value to be returned in the reply */
i32 ctx_quota; /* requested punt context quota */
u64 input_queue; /* client's queue */
@@ -39,6 +40,8 @@ define memclnt_create {
};
define memclnt_create_reply {
+ option deprecated;
+
u32 context; /* opaque value from the create request */
i32 response; /* Non-negative = success */
u64 handle; /* handle by which vlib knows this client */
@@ -47,9 +50,8 @@ define memclnt_create_reply {
};
/*
- * Delete a client registration
+ * Delete a client registration
*/
-manual_print
define memclnt_delete {
u32 index; /* index, used e.g. by API trace replay */
u64 handle; /* handle by which vlib knows this client */
@@ -137,7 +139,7 @@ define api_versions_reply {
* at api trace replay time
*/
-manual_print define trace_plugin_msg_ids
+define trace_plugin_msg_ids
{
u32 client_index;
u32 context;
@@ -153,7 +155,7 @@ typedef message_table_entry
};
/*
- * Create a socket client registration.
+ * Create a socket client registration.
*/
define sockclnt_create {
u32 context; /* opaque value to be returned in the reply */
@@ -170,7 +172,7 @@ define sockclnt_create_reply {
};
/*
- * Delete a client registration
+ * Delete a client registration
*/
define sockclnt_delete {
u32 client_index;
@@ -209,3 +211,55 @@ autoreply define memclnt_keepalive
u32 client_index;
u32 context;
};
+
+/** \brief Control ping from client to api server request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define control_ping
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Control ping from the client to the server response
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param vpe_pid - the pid of the vpe, returned by the server
+*/
+define control_ping_reply
+{
+ u32 context;
+ i32 retval;
+ u32 client_index;
+ u32 vpe_pid;
+};
+
+define memclnt_create_v2 {
+ u32 context; /* opaque value to be returned in the reply */
+ i32 ctx_quota; /* requested punt context quota */
+ u64 input_queue; /* client's queue */
+ string name[64]; /* for show, find by name, whatever */
+ u32 api_versions[8]; /* client-server pairs use as desired */
+ bool keepalive[default=true]; /* dead client scan keepalives */
+};
+
+define memclnt_create_v2_reply {
+ u32 context; /* opaque value from the create request */
+ i32 response; /* Non-negative = success */
+ u64 handle; /* handle by which vlib knows this client */
+ u32 index; /* index, used e.g. by API trace replay */
+ u64 message_table; /* serialized message table in shmem */
+};
+
+define get_api_json {
+ u32 client_index;
+ u32 context;
+};
+
+define get_api_json_reply {
+ u32 context;
+ i32 retval;
+ string json[];
+};
diff --git a/src/vlibmemory/memclnt_api.c b/src/vlibmemory/memclnt_api.c
new file mode 100644
index 00000000000..d4106b10559
--- /dev/null
+++ b/src/vlibmemory/memclnt_api.c
@@ -0,0 +1,780 @@
+/*
+ *------------------------------------------------------------------
+ * memclnt_api.c VLIB API implementation
+ *
+ * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <fcntl.h>
+#include <pthread.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/format.h>
+#include <vppinfra/byte_order.h>
+#include <vppinfra/elog.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibapi/api_helper_macros.h>
+
+/**
+ * @file
+ * @brief Binary API messaging via shared memory
+ * Low-level, primary provisioning interface
+ */
+/*? %%clicmd:group_label Binary API CLI %% ?*/
+/*? %%syscfg:group_label Binary API configuration %% ?*/
+
+#define TRACE_VLIB_MEMORY_QUEUE 0
+
+#include <vlibmemory/vl_memory_msg_enum.h> /* enumerate all vlib messages */
+
+#define vl_typedefs /* define message structures */
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_typedefs
+
+/* instantiate all the print functions we know about */
+#define vl_printfun
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_printfun
+
+/* instantiate all the endian swap functions we know about */
+#define vl_endianfun
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_endianfun
+
+#define vl_calcsizefun
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_calcsizefun
+
+static void
+vl_api_get_first_msg_id_t_handler (vl_api_get_first_msg_id_t *mp)
+{
+ vl_api_get_first_msg_id_reply_t *rmp;
+ vl_api_registration_t *regp;
+ uword *p;
+ api_main_t *am = vlibapi_get_main ();
+ vl_api_msg_range_t *rp;
+ u8 name[64];
+ u16 first_msg_id = ~0;
+ int rv = -7; /* VNET_API_ERROR_INVALID_VALUE */
+
+ regp = vl_api_client_index_to_registration (mp->client_index);
+ if (!regp)
+ return;
+
+ if (am->msg_range_by_name == 0)
+ goto out;
+ strncpy ((char *) name, (char *) mp->name, ARRAY_LEN (name));
+ name[ARRAY_LEN (name) - 1] = '\0';
+ p = hash_get_mem (am->msg_range_by_name, name);
+ if (p == 0)
+ goto out;
+
+ rp = vec_elt_at_index (am->msg_ranges, p[0]);
+ first_msg_id = rp->first_msg_id;
+ rv = 0;
+
+out:
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_GET_FIRST_MSG_ID_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = ntohl (rv);
+ rmp->first_msg_id = ntohs (first_msg_id);
+ vl_api_send_msg (regp, (u8 *) rmp);
+}
+
+void
+vl_api_api_versions_t_handler (vl_api_api_versions_t *mp)
+{
+ api_main_t *am = vlibapi_get_main ();
+ vl_api_api_versions_reply_t *rmp;
+ vl_api_registration_t *reg;
+ u32 nmsg = vec_len (am->api_version_list);
+ int msg_size = sizeof (*rmp) + sizeof (rmp->api_versions[0]) * nmsg;
+ int i;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ rmp = vl_msg_api_alloc (msg_size);
+ clib_memset (rmp, 0, msg_size);
+ rmp->_vl_msg_id = ntohs (VL_API_API_VERSIONS_REPLY);
+
+ /* fill in the message */
+ rmp->context = mp->context;
+ rmp->count = htonl (nmsg);
+
+ for (i = 0; i < nmsg; ++i)
+ {
+ api_version_t *vl = &am->api_version_list[i];
+ rmp->api_versions[i].major = htonl (vl->major);
+ rmp->api_versions[i].minor = htonl (vl->minor);
+ rmp->api_versions[i].patch = htonl (vl->patch);
+ strncpy ((char *) rmp->api_versions[i].name, vl->name,
+ ARRAY_LEN (rmp->api_versions[i].name));
+ rmp->api_versions[i].name[ARRAY_LEN (rmp->api_versions[i].name) - 1] =
+ '\0';
+ }
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_control_ping_t_handler (vl_api_control_ping_t *mp)
+{
+ vl_api_control_ping_reply_t *rmp;
+ int rv = 0;
+
+ REPLY_MACRO2 (VL_API_CONTROL_PING_REPLY,
+ ({ rmp->vpe_pid = ntohl (getpid ()); }));
+}
+
+static void
+vl_api_get_api_json_t_handler (vl_api_get_api_json_t *mp)
+{
+ vl_api_get_api_json_reply_t *rmp;
+ api_main_t *am = vlibapi_get_main ();
+ int rv = 0, n = 0;
+ u8 *s = 0;
+
+ vl_api_registration_t *rp =
+ vl_api_client_index_to_registration (mp->client_index);
+ if (rp == 0)
+ return;
+
+ s = format (s, "[\n");
+ u8 **ptr;
+ vec_foreach (ptr, am->json_api_repr)
+ {
+ s = format (s, "%s,", ptr[0]);
+ }
+ s[vec_len (s) - 1] = ']'; // Replace last comma with a bracket
+ vec_terminate_c_string (s);
+ n = vec_len (s);
+
+done:
+ REPLY_MACRO3 (VL_API_GET_API_JSON_REPLY, n, ({
+ if (rv == 0)
+ {
+ vl_api_c_string_to_api_string ((char *) s, &rmp->json);
+ }
+ }));
+ vec_free (s);
+}
+
+#define foreach_vlib_api_msg \
+ _ (GET_FIRST_MSG_ID, get_first_msg_id) \
+ _ (API_VERSIONS, api_versions) \
+ _ (CONTROL_PING, control_ping) \
+ _ (GET_API_JSON, get_api_json)
+
+/*
+ * vl_api_init
+ */
+static int
+vlib_api_init (void)
+{
+ api_main_t *am = vlibapi_get_main ();
+ vl_msg_api_msg_config_t cfg;
+ vl_msg_api_msg_config_t *c = &cfg;
+
+ cJSON_Hooks cjson_hooks = {
+ .malloc_fn = clib_mem_alloc,
+ .free_fn = clib_mem_free,
+ };
+ cJSON_InitHooks (&cjson_hooks);
+
+ clib_memset (c, 0, sizeof (*c));
+
+#define _(N, n) \
+ do \
+ { \
+ c->id = VL_API_##N; \
+ c->name = #n; \
+ c->handler = vl_api_##n##_t_handler; \
+ c->endian = vl_api_##n##_t_endian; \
+ c->format_fn = vl_api_##n##_t_format; \
+ c->tojson = vl_api_##n##_t_tojson; \
+ c->fromjson = vl_api_##n##_t_fromjson; \
+ c->calc_size = vl_api_##n##_t_calc_size; \
+ c->size = sizeof (vl_api_##n##_t); \
+ c->traced = 1; /* trace, so these msgs print */ \
+ c->replay = 0; /* don't replay client create/delete msgs */ \
+ c->message_bounce = 0; /* don't bounce this message */ \
+ vl_msg_api_config (c); \
+ } \
+ while (0);
+
+ foreach_vlib_api_msg;
+#undef _
+
+ /* Mark messages as mp safe */
+ vl_api_set_msg_thread_safe (am, VL_API_GET_FIRST_MSG_ID, 1);
+ vl_api_set_msg_thread_safe (am, VL_API_API_VERSIONS, 1);
+ vl_api_set_msg_thread_safe (am, VL_API_CONTROL_PING, 1);
+ vl_api_set_msg_thread_safe (am, VL_API_CONTROL_PING_REPLY, 1);
+
+ return 0;
+}
+
+u64 vector_rate_histogram[SLEEP_N_BUCKETS];
+
+/*
+ * Callback to send ourselves a plugin numbering-space trace msg
+ */
+static void
+send_one_plugin_msg_ids_msg (u8 *name, u16 first_msg_id, u16 last_msg_id)
+{
+ vl_api_trace_plugin_msg_ids_t *mp;
+ api_main_t *am = vlibapi_get_main ();
+ vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
+ svm_queue_t *q;
+
+ mp = vl_msg_api_alloc_as_if_client (sizeof (*mp));
+ clib_memset (mp, 0, sizeof (*mp));
+
+ mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_TRACE_PLUGIN_MSG_IDS);
+ strncpy ((char *) mp->plugin_name, (char *) name,
+ sizeof (mp->plugin_name) - 1);
+ mp->first_msg_id = clib_host_to_net_u16 (first_msg_id);
+ mp->last_msg_id = clib_host_to_net_u16 (last_msg_id);
+
+ q = shmem_hdr->vl_input_queue;
+
+ vl_msg_api_send_shmem (q, (u8 *) &mp);
+}
+
+void
+vl_api_save_msg_table (void)
+{
+ u8 *serialized_message_table;
+ api_main_t *am = vlibapi_get_main ();
+ u8 *chroot_file;
+ int fd, rv;
+
+ /*
+ * Snapshoot the api message table.
+ */
+ if (strstr ((char *) am->save_msg_table_filename, "..") ||
+ index ((char *) am->save_msg_table_filename, '/'))
+ {
+ clib_warning ("illegal save-message-table filename '%s'",
+ am->save_msg_table_filename);
+ return;
+ }
+
+ chroot_file = format (0, "/tmp/%s%c", am->save_msg_table_filename, 0);
+
+ fd = creat ((char *) chroot_file, 0644);
+
+ if (fd < 0)
+ {
+ clib_unix_warning ("creat");
+ return;
+ }
+
+ serialized_message_table = vl_api_serialize_message_table (am, 0);
+
+ rv =
+ write (fd, serialized_message_table, vec_len (serialized_message_table));
+
+ if (rv != vec_len (serialized_message_table))
+ clib_unix_warning ("write");
+
+ rv = close (fd);
+ if (rv < 0)
+ clib_unix_warning ("close");
+
+ vec_free (chroot_file);
+ vec_free (serialized_message_table);
+}
+
+clib_error_t *vat_builtin_main_init (vlib_main_t *vm) __attribute__ ((weak));
+clib_error_t *
+vat_builtin_main_init (vlib_main_t *vm)
+{
+ return 0;
+}
+
+static uword
+vl_api_clnt_process (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *f)
+{
+ vlib_global_main_t *vgm = vlib_get_global_main ();
+ int private_segment_rotor = 0, i, rv;
+ vl_socket_args_for_process_t *a;
+ vl_shmem_hdr_t *shm;
+ svm_queue_t *q;
+ clib_error_t *e;
+ api_main_t *am = vlibapi_get_main ();
+ f64 dead_client_scan_time;
+ f64 sleep_time, start_time;
+ f64 vector_rate;
+ clib_error_t *error;
+ uword event_type;
+ uword *event_data = 0;
+ f64 now;
+
+ if ((error = vl_sock_api_init (vm)))
+ {
+ clib_error_report (error);
+ clib_warning ("socksvr_api_init failed, quitting...");
+ return 0;
+ }
+
+ if ((rv = vlib_api_init ()) < 0)
+ {
+ clib_warning ("vlib_api_init returned %d, quitting...", rv);
+ return 0;
+ }
+
+ shm = am->shmem_hdr;
+ q = shm->vl_input_queue;
+
+ e = vlib_call_init_exit_functions (vm, &vgm->api_init_function_registrations,
+ 1 /* call_once */, 1 /* is_global */);
+ if (e)
+ clib_error_report (e);
+
+ e = vat_builtin_main_init (vm);
+ if (e)
+ clib_error_report (e);
+
+ sleep_time = 10.0;
+ dead_client_scan_time = vlib_time_now (vm) + 10.0;
+
+ /*
+ * Send plugin message range messages for each plugin we loaded
+ */
+ for (i = 0; i < vec_len (am->msg_ranges); i++)
+ {
+ vl_api_msg_range_t *rp = am->msg_ranges + i;
+ send_one_plugin_msg_ids_msg (rp->name, rp->first_msg_id,
+ rp->last_msg_id);
+ }
+
+ /*
+ * Save the api message table snapshot, if configured
+ */
+ if (am->save_msg_table_filename)
+ vl_api_save_msg_table ();
+
+ /* $$$ pay attention to frame size, control CPU usage */
+ while (1)
+ {
+ /*
+ * There's a reason for checking the queue before
+ * sleeping. If the vlib application crashes, it's entirely
+ * possible for a client to enqueue a connect request
+ * during the process restart interval.
+ *
+ * Unless some force of physics causes the new incarnation
+ * of the application to process the request, the client will
+ * sit and wait for Godot...
+ */
+ vector_rate = (f64) vlib_last_vectors_per_main_loop (vm);
+ start_time = vlib_time_now (vm);
+ while (1)
+ {
+ if (vl_mem_api_handle_rpc (vm, node) ||
+ vl_mem_api_handle_msg_main (vm, node))
+ {
+ vm->api_queue_nonempty = 0;
+ VL_MEM_API_LOG_Q_LEN ("q-underflow: len %d", 0);
+ sleep_time = 20.0;
+ break;
+ }
+
+ /* Allow no more than 10us without a pause */
+ if (vlib_time_now (vm) > start_time + 10e-6)
+ {
+ int index = SLEEP_400_US;
+ if (vector_rate > 40.0)
+ sleep_time = 400e-6;
+ else if (vector_rate > 20.0)
+ {
+ index = SLEEP_200_US;
+ sleep_time = 200e-6;
+ }
+ else if (vector_rate >= 1.0)
+ {
+ index = SLEEP_100_US;
+ sleep_time = 100e-6;
+ }
+ else
+ {
+ index = SLEEP_10_US;
+ sleep_time = 10e-6;
+ }
+ vector_rate_histogram[index] += 1;
+ break;
+ }
+ }
+
+ /*
+ * see if we have any private api shared-memory segments
+ * If so, push required context variables, and process
+ * a message.
+ */
+ if (PREDICT_FALSE (vec_len (am->vlib_private_rps)))
+ {
+ if (private_segment_rotor >= vec_len (am->vlib_private_rps))
+ private_segment_rotor = 0;
+ vl_mem_api_handle_msg_private (vm, node, private_segment_rotor++);
+ }
+
+ vlib_process_wait_for_event_or_clock (vm, sleep_time);
+ vec_reset_length (event_data);
+ event_type = vlib_process_get_events (vm, &event_data);
+ now = vlib_time_now (vm);
+
+ switch (event_type)
+ {
+ case QUEUE_SIGNAL_EVENT:
+ vm->queue_signal_pending = 0;
+ VL_MEM_API_LOG_Q_LEN ("q-awake: len %d", q->cursize);
+
+ break;
+ case SOCKET_READ_EVENT:
+ for (i = 0; i < vec_len (event_data); i++)
+ {
+ vl_api_registration_t *regp;
+
+ a = pool_elt_at_index (socket_main.process_args, event_data[i]);
+ regp = vl_socket_get_registration (a->reg_index);
+ if (regp)
+ {
+ vl_socket_process_api_msg (regp, (i8 *) a->data);
+ a = pool_elt_at_index (socket_main.process_args,
+ event_data[i]);
+ }
+ vec_free (a->data);
+ pool_put (socket_main.process_args, a);
+ }
+ break;
+
+ /* Timeout... */
+ case -1:
+ break;
+
+ default:
+ clib_warning ("unknown event type %d", event_type);
+ break;
+ }
+
+ if (now > dead_client_scan_time)
+ {
+ vl_mem_api_dead_client_scan (am, shm, now);
+ dead_client_scan_time = vlib_time_now (vm) + 10.0;
+ }
+ }
+
+ return 0;
+}
+
+VLIB_REGISTER_NODE (vl_api_clnt_node) = {
+ .function = vl_api_clnt_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "api-rx-from-ring",
+ .state = VLIB_NODE_STATE_DISABLED,
+ .process_log2_n_stack_bytes = 18,
+};
+
+void
+vl_mem_api_enable_disable (vlib_main_t *vm, int enable)
+{
+ vlib_node_set_state (
+ vm, vl_api_clnt_node.index,
+ (enable ? VLIB_NODE_STATE_POLLING : VLIB_NODE_STATE_DISABLED));
+}
+
+static uword
+api_rx_from_node (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ uword n_packets = frame->n_vectors;
+ uword n_left_from;
+ u32 *from;
+ static u8 *long_msg;
+
+ vec_validate (long_msg, 4095);
+ n_left_from = frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+
+ while (n_left_from > 0)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ void *msg;
+ uword msg_len;
+
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
+ from += 1;
+ n_left_from -= 1;
+
+ msg = b0->data + b0->current_data;
+ msg_len = b0->current_length;
+ if (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ ASSERT (long_msg != 0);
+ vec_set_len (long_msg, 0);
+ vec_add (long_msg, msg, msg_len);
+ while (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b0 = vlib_get_buffer (vm, b0->next_buffer);
+ msg = b0->data + b0->current_data;
+ msg_len = b0->current_length;
+ vec_add (long_msg, msg, msg_len);
+ }
+ msg = long_msg;
+ msg_len = vec_len (long_msg);
+ }
+ vl_msg_api_handler_no_trace_no_free (msg, msg_len);
+ }
+
+ /* Free what we've been given. */
+ vlib_buffer_free (vm, vlib_frame_vector_args (frame), n_packets);
+
+ return n_packets;
+}
+
+VLIB_REGISTER_NODE (api_rx_from_node_node, static) = {
+ .function = api_rx_from_node,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .vector_size = 4,
+ .name = "api-rx-from-node",
+};
+
+static void
+vl_api_rpc_call_t_handler (vl_api_rpc_call_t *mp)
+{
+ vl_api_rpc_call_reply_t *rmp;
+ int (*fp) (void *);
+ i32 rv = 0;
+ vlib_main_t *vm = vlib_get_main ();
+
+ if (mp->function == 0)
+ {
+ rv = -1;
+ clib_warning ("rpc NULL function pointer");
+ }
+
+ else
+ {
+ if (mp->need_barrier_sync)
+ vlib_worker_thread_barrier_sync (vm);
+
+ fp = uword_to_pointer (mp->function, int (*) (void *));
+ rv = fp (mp->data);
+
+ if (mp->need_barrier_sync)
+ vlib_worker_thread_barrier_release (vm);
+ }
+
+ if (mp->send_reply)
+ {
+ svm_queue_t *q = vl_api_client_index_to_input_queue (mp->client_index);
+ if (q)
+ {
+ rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (VL_API_RPC_CALL_REPLY);
+ rmp->context = mp->context;
+ rmp->retval = rv;
+ vl_msg_api_send_shmem (q, (u8 *) &rmp);
+ }
+ }
+ if (mp->multicast)
+ {
+ clib_warning ("multicast not yet implemented...");
+ }
+}
+
+static void
+vl_api_rpc_call_reply_t_handler (vl_api_rpc_call_reply_t *mp)
+{
+ clib_warning ("unimplemented");
+}
+
+always_inline void
+vl_api_rpc_call_main_thread_inline (void *fp, u8 *data, u32 data_length,
+ u8 force_rpc)
+{
+ vl_api_rpc_call_t *mp;
+ vlib_main_t *vm_global = vlib_get_first_main ();
+ vlib_main_t *vm = vlib_get_main ();
+
+ /* Main thread and not a forced RPC: call the function directly */
+ if ((force_rpc == 0) && (vlib_get_thread_index () == 0))
+ {
+ void (*call_fp) (void *);
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ call_fp = fp;
+ call_fp (data);
+
+ vlib_worker_thread_barrier_release (vm);
+ return;
+ }
+
+ /* Otherwise, actually do an RPC */
+ mp = vl_msg_api_alloc_as_if_client (sizeof (*mp) + data_length);
+
+ clib_memset (mp, 0, sizeof (*mp));
+ clib_memcpy_fast (mp->data, data, data_length);
+ mp->_vl_msg_id = ntohs (VL_API_RPC_CALL);
+ mp->function = pointer_to_uword (fp);
+ mp->need_barrier_sync = 1;
+
+ /* Add to the pending vector. Thread 0 requires locking. */
+ if (vm == vm_global)
+ clib_spinlock_lock_if_init (&vm_global->pending_rpc_lock);
+ vec_add1 (vm->pending_rpc_requests, (uword) mp);
+ if (vm == vm_global)
+ clib_spinlock_unlock_if_init (&vm_global->pending_rpc_lock);
+}
+
+/*
+ * Check if called from worker threads.
+ * If so, make rpc call of fp through shmem.
+ * Otherwise, call fp directly
+ */
+void
+vl_api_rpc_call_main_thread (void *fp, u8 *data, u32 data_length)
+{
+ vl_api_rpc_call_main_thread_inline (fp, data, data_length, /*force_rpc */
+ 0);
+}
+
+/*
+ * Always make rpc call of fp through shmem, useful for calling from threads
+ * not setup as worker threads, such as DPDK callback thread
+ */
+void
+vl_api_force_rpc_call_main_thread (void *fp, u8 *data, u32 data_length)
+{
+ vl_api_rpc_call_main_thread_inline (fp, data, data_length, /*force_rpc */
+ 1);
+}
+
+static void
+vl_api_trace_plugin_msg_ids_t_handler (vl_api_trace_plugin_msg_ids_t *mp)
+{
+ api_main_t *am = vlibapi_get_main ();
+ vl_api_msg_range_t *rp;
+ uword *p;
+
+ /* Noop (except for tracing) during normal operation */
+ if (am->replay_in_progress == 0)
+ return;
+
+ p = hash_get_mem (am->msg_range_by_name, mp->plugin_name);
+ if (p == 0)
+ {
+ clib_warning ("WARNING: traced plugin '%s' not in current image",
+ mp->plugin_name);
+ return;
+ }
+
+ rp = vec_elt_at_index (am->msg_ranges, p[0]);
+ if (rp->first_msg_id != clib_net_to_host_u16 (mp->first_msg_id))
+ {
+ clib_warning ("WARNING: traced plugin '%s' first message id %d not %d",
+ mp->plugin_name, clib_net_to_host_u16 (mp->first_msg_id),
+ rp->first_msg_id);
+ }
+
+ if (rp->last_msg_id != clib_net_to_host_u16 (mp->last_msg_id))
+ {
+ clib_warning ("WARNING: traced plugin '%s' last message id %d not %d",
+ mp->plugin_name, clib_net_to_host_u16 (mp->last_msg_id),
+ rp->last_msg_id);
+ }
+}
+
+#define foreach_rpc_api_msg \
+ _ (RPC_CALL, rpc_call) \
+ _ (RPC_CALL_REPLY, rpc_call_reply)
+
+#define foreach_plugin_trace_msg _ (TRACE_PLUGIN_MSG_IDS, trace_plugin_msg_ids)
+
+/*
+ * Set the rpc callback at our earliest possible convenience.
+ * This avoids ordering issues between thread_init() -> start_workers and
+ * an init function which we could define here. If we ever intend to use
+ * vlib all by itself, we can't create a link-time dependency on
+ * an init function here and a typical "call foo_init first"
+ * guitar lick.
+ */
+
+extern void *rpc_call_main_thread_cb_fn;
+
+static clib_error_t *
+rpc_api_hookup (vlib_main_t *vm)
+{
+ api_main_t *am = vlibapi_get_main ();
+#define _(N, n) \
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){ \
+ .id = VL_API_##N, \
+ .name = #n, \
+ .handler = vl_api_##n##_t_handler, \
+ .format_fn = vl_api_##n##_t_format, \
+ .size = sizeof (vl_api_##n##_t), \
+ .traced = 0, \
+ .tojson = vl_api_##n##_t_tojson, \
+ .fromjson = vl_api_##n##_t_fromjson, \
+ .calc_size = vl_api_##n##_t_calc_size, \
+ });
+ foreach_rpc_api_msg;
+#undef _
+
+#define _(N, n) \
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){ \
+ .id = VL_API_##N, \
+ .name = #n, \
+ .handler = vl_api_##n##_t_handler, \
+ .endian = vl_api_##n##_t_endian, \
+ .format_fn = vl_api_##n##_t_format, \
+ .size = sizeof (vl_api_##n##_t), \
+ .traced = 1, \
+ .tojson = vl_api_##n##_t_tojson, \
+ .fromjson = vl_api_##n##_t_fromjson, \
+ .calc_size = vl_api_##n##_t_calc_size, \
+ });
+ foreach_plugin_trace_msg
+#undef _
+
+ vl_api_allow_msg_replay (am, VL_API_TRACE_PLUGIN_MSG_IDS, 0);
+
+ /* No reason to halt the parade to create a trace record... */
+ vl_api_set_msg_thread_safe (am, VL_API_TRACE_PLUGIN_MSG_IDS, 1);
+ rpc_call_main_thread_cb_fn = vl_api_rpc_call_main_thread;
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (rpc_api_hookup);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/memory_api.c b/src/vlibmemory/memory_api.c
index 4287bd36e5f..39c6b0fd15b 100644
--- a/src/vlibmemory/memory_api.c
+++ b/src/vlibmemory/memory_api.c
@@ -28,7 +28,6 @@
#undef vl_typedefs
/* instantiate all the print functions we know about */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define vl_printfun
#include <vlibmemory/vl_memory_api_h.h>
#undef vl_printfun
@@ -38,26 +37,6 @@
#include <vlibmemory/vl_memory_api_h.h>
#undef vl_endianfun
-static inline void *
-vl_api_memclnt_create_t_print (vl_api_memclnt_create_t * a, void *handle)
-{
- vl_print (handle, "vl_api_memclnt_create_t:\n");
- vl_print (handle, "name: %s\n", a->name);
- vl_print (handle, "input_queue: 0x%wx\n", a->input_queue);
- vl_print (handle, "context: %u\n", (unsigned) a->context);
- vl_print (handle, "ctx_quota: %ld\n", (long) a->ctx_quota);
- return handle;
-}
-
-static inline void *
-vl_api_memclnt_delete_t_print (vl_api_memclnt_delete_t * a, void *handle)
-{
- vl_print (handle, "vl_api_memclnt_delete_t:\n");
- vl_print (handle, "index: %u\n", (unsigned) a->index);
- vl_print (handle, "handle: 0x%wx\n", a->handle);
- return handle;
-}
-
volatile int **vl_api_queue_cursizes;
static void
@@ -212,6 +191,7 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp)
regp->name = format (0, "%s", mp->name);
vec_add1 (regp->name, 0);
+ regp->keepalive = true;
if (am->serialized_message_table_in_shmem == 0)
am->serialized_message_table_in_shmem =
@@ -238,6 +218,87 @@ vl_api_memclnt_create_t_handler (vl_api_memclnt_create_t * mp)
}
void
+vl_api_memclnt_create_v2_t_handler (vl_api_memclnt_create_v2_t *mp)
+{
+ vl_api_registration_t **regpp;
+ vl_api_registration_t *regp;
+ vl_api_memclnt_create_v2_reply_t *rp;
+ svm_queue_t *q;
+ int rv = 0;
+ void *oldheap;
+ api_main_t *am = vlibapi_get_main ();
+ u8 *msg_table;
+
+ /*
+ * This is tortured. Maintain a vlib-address-space private
+ * pool of client registrations. We use the shared-memory virtual
+ * address of client structure as a handle, to allow direct
+ * manipulation of context quota vbls from the client library.
+ *
+ * This scheme causes trouble w/ API message trace replay, since
+ * some random VA from clib_mem_alloc() certainly won't
+ * occur in the Linux sim. The (very) few places
+ * that care need to use the pool index.
+ *
+ * Putting the registration object(s) into a pool in shared memory and
+ * using the pool index as a handle seems like a great idea.
+ * Unfortunately, each and every reference to that pool would need
+ * to be protected by a mutex:
+ *
+ * Client VLIB
+ * ------ ----
+ * convert pool index to
+ * pointer.
+ * <deschedule>
+ * expand pool
+ * <deschedule>
+ * kaboom!
+ */
+
+ pool_get (am->vl_clients, regpp);
+
+ oldheap = vl_msg_push_heap ();
+ *regpp = clib_mem_alloc (sizeof (vl_api_registration_t));
+
+ regp = *regpp;
+ clib_memset (regp, 0, sizeof (*regp));
+ regp->registration_type = REGISTRATION_TYPE_SHMEM;
+ regp->vl_api_registration_pool_index = regpp - am->vl_clients;
+ regp->vlib_rp = am->vlib_rp;
+ regp->shmem_hdr = am->shmem_hdr;
+ regp->clib_file_index = am->shmem_hdr->clib_file_index;
+
+ q = regp->vl_input_queue = (svm_queue_t *) (uword) mp->input_queue;
+ VL_MSG_API_SVM_QUEUE_UNPOISON (q);
+
+ regp->name = format (0, "%s", mp->name);
+ vec_add1 (regp->name, 0);
+ regp->keepalive = mp->keepalive;
+
+ if (am->serialized_message_table_in_shmem == 0)
+ am->serialized_message_table_in_shmem =
+ vl_api_serialize_message_table (am, 0);
+
+ if (am->vlib_rp != am->vlib_primary_rp)
+ msg_table = vl_api_serialize_message_table (am, 0);
+ else
+ msg_table = am->serialized_message_table_in_shmem;
+
+ vl_msg_pop_heap (oldheap);
+
+ rp = vl_msg_api_alloc (sizeof (*rp));
+ rp->_vl_msg_id = ntohs (VL_API_MEMCLNT_CREATE_V2_REPLY);
+ rp->handle = (uword) regp;
+ rp->index = vl_msg_api_handle_from_index_and_epoch (
+ regp->vl_api_registration_pool_index, am->shmem_hdr->application_restarts);
+ rp->context = mp->context;
+ rp->response = ntohl (rv);
+ rp->message_table = pointer_to_uword (msg_table);
+
+ vl_msg_api_send_shmem (q, (u8 *) &rp);
+}
+
+void
vl_api_call_reaper_functions (u32 client_index)
{
clib_error_t *error = 0;
@@ -417,11 +478,12 @@ vl_api_memclnt_keepalive_t_handler (vl_api_memclnt_keepalive_t * mp)
* don't trace memclnt_keepalive[_reply] msgs
*/
-#define foreach_vlib_api_msg \
-_(MEMCLNT_CREATE, memclnt_create, 1) \
-_(MEMCLNT_DELETE, memclnt_delete, 1) \
-_(MEMCLNT_KEEPALIVE, memclnt_keepalive, 0) \
-_(MEMCLNT_KEEPALIVE_REPLY, memclnt_keepalive_reply, 0)
+#define foreach_vlib_api_msg \
+ _ (MEMCLNT_CREATE, memclnt_create, 0) \
+ _ (MEMCLNT_CREATE_V2, memclnt_create_v2, 0) \
+ _ (MEMCLNT_DELETE, memclnt_delete, 0) \
+ _ (MEMCLNT_KEEPALIVE, memclnt_keepalive, 0) \
+ _ (MEMCLNT_KEEPALIVE_REPLY, memclnt_keepalive_reply, 0)
/*
* memory_api_init
@@ -441,29 +503,40 @@ vl_mem_api_init (const char *region_name)
if ((rv = vl_map_shmem (region_name, 1 /* is_vlib */ )) < 0)
return rv;
-#define _(N,n,t) do { \
- c->id = VL_API_##N; \
- c->name = #n; \
- c->handler = vl_api_##n##_t_handler; \
- c->cleanup = vl_noop_handler; \
- c->endian = vl_api_##n##_t_endian; \
- c->print = vl_api_##n##_t_print; \
- c->size = sizeof(vl_api_##n##_t); \
- c->traced = t; /* trace, so these msgs print */ \
- c->replay = 0; /* don't replay client create/delete msgs */ \
- c->message_bounce = 0; /* don't bounce this message */ \
- vl_msg_api_config(c);} while (0);
+#define _(N, n, t) \
+ do \
+ { \
+ c->id = VL_API_##N; \
+ c->name = #n; \
+ c->handler = vl_api_##n##_t_handler; \
+ c->endian = vl_api_##n##_t_endian; \
+ c->format_fn = vl_api_##n##_t_format; \
+ c->size = sizeof (vl_api_##n##_t); \
+ c->traced = t; /* trace, so these msgs print */ \
+ c->replay = 0; /* don't replay client create/delete msgs */ \
+ c->message_bounce = 0; /* don't bounce this message */ \
+ vl_msg_api_config (c); \
+ } \
+ while (0);
foreach_vlib_api_msg;
#undef _
+#define vl_msg_name_crc_list
+#include <vlibmemory/memclnt.api.h>
+#undef vl_msg_name_crc_list
+
+#define _(id, n, crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
+ foreach_vl_msg_name_crc_memclnt;
+#undef _
+
/*
* special-case freeing of memclnt_delete messages, so we can
* simply munmap pairwise / private API segments...
*/
- am->message_bounce[VL_API_MEMCLNT_DELETE] = 1;
- am->is_mp_safe[VL_API_MEMCLNT_KEEPALIVE_REPLY] = 1;
- am->is_mp_safe[VL_API_MEMCLNT_KEEPALIVE] = 1;
+ am->msg_data[VL_API_MEMCLNT_DELETE].bounce = 1;
+ vl_api_set_msg_thread_safe (am, VL_API_MEMCLNT_KEEPALIVE_REPLY, 1);
+ vl_api_set_msg_thread_safe (am, VL_API_MEMCLNT_KEEPALIVE, 1);
vlib_set_queue_signal_callback (vm, memclnt_queue_callback);
@@ -588,12 +661,12 @@ vl_mem_api_dead_client_scan (api_main_t * am, vl_shmem_hdr_t * shm, f64 now)
vec_reset_length (dead_indices);
vec_reset_length (confused_indices);
- /* *INDENT-OFF* */
pool_foreach (regpp, am->vl_clients) {
+ if (!(*regpp)->keepalive)
+ continue;
vl_mem_send_client_keepalive_w_reg (am, now, regpp, &dead_indices,
- &confused_indices);
+ &confused_indices);
}
- /* *INDENT-ON* */
/* This should "never happen," but if it does, fix it... */
if (PREDICT_FALSE (vec_len (confused_indices) > 0))
@@ -686,6 +759,139 @@ vl_mem_api_dead_client_scan (api_main_t * am, vl_shmem_hdr_t * shm, f64 now)
}
}
+void (*vl_mem_api_fuzz_hook) (u16, void *);
+
+/* This is only to be called from a vlib/vnet app */
+static void
+vl_mem_api_handler_with_vm_node (api_main_t *am, svm_region_t *vlib_rp,
+ void *the_msg, vlib_main_t *vm,
+ vlib_node_runtime_t *node, u8 is_private)
+{
+ u16 id = clib_net_to_host_u16 (*((u16 *) the_msg));
+ vl_api_msg_data_t *m = vl_api_get_msg_data (am, id);
+ u8 *(*handler) (void *, void *, void *);
+ svm_region_t *old_vlib_rp;
+ void *save_shmem_hdr;
+ int is_mp_safe = 1;
+
+ if (PREDICT_FALSE (am->elog_trace_api_messages))
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "api-msg: %s",
+ .format_args = "T4",
+ };
+ struct
+ {
+ u32 c;
+ } * ed;
+ ed = ELOG_DATA (am->elog_main, e);
+ if (m && m->name)
+ ed->c = elog_string (am->elog_main, (char *) m->name);
+ else
+ ed->c = elog_string (am->elog_main, "BOGUS");
+ }
+
+ if (m && m->handler)
+ {
+ handler = (void *) m->handler;
+
+ if (PREDICT_FALSE (am->rx_trace && am->rx_trace->enabled))
+ vl_msg_api_trace (am, am->rx_trace, the_msg);
+
+ if (PREDICT_FALSE (am->msg_print_flag))
+ {
+ fformat (stdout, "[%d]: %s\n", id, m->name);
+ fformat (stdout, "%U", format_vl_api_msg_text, am, id, the_msg);
+ }
+ is_mp_safe = am->msg_data[id].is_mp_safe;
+
+ if (!is_mp_safe)
+ {
+ vl_msg_api_barrier_trace_context (am->msg_data[id].name);
+ vl_msg_api_barrier_sync ();
+ }
+ if (is_private)
+ {
+ old_vlib_rp = am->vlib_rp;
+ save_shmem_hdr = am->shmem_hdr;
+ am->vlib_rp = vlib_rp;
+ am->shmem_hdr = (void *) vlib_rp->user_ctx;
+ }
+
+ if (PREDICT_FALSE (vl_mem_api_fuzz_hook != 0))
+ (*vl_mem_api_fuzz_hook) (id, the_msg);
+
+ if (m->is_autoendian)
+ {
+ void (*endian_fp) (void *);
+ endian_fp = am->msg_data[id].endian_handler;
+ (*endian_fp) (the_msg);
+ }
+ if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0))
+ clib_call_callbacks (am->perf_counter_cbs, am, id, 0 /* before */);
+
+ (*handler) (the_msg, vm, node);
+
+ if (PREDICT_FALSE (vec_len (am->perf_counter_cbs) != 0))
+ clib_call_callbacks (am->perf_counter_cbs, am, id, 1 /* after */);
+ if (is_private)
+ {
+ am->vlib_rp = old_vlib_rp;
+ am->shmem_hdr = save_shmem_hdr;
+ }
+ if (!is_mp_safe)
+ vl_msg_api_barrier_release ();
+ }
+ else
+ {
+ clib_warning ("no handler for msg id %d", id);
+ }
+
+ /*
+ * Special-case, so we can e.g. bounce messages off the vnet
+ * main thread without copying them...
+ */
+ if (!m || !m->bounce)
+ {
+ if (is_private)
+ {
+ old_vlib_rp = am->vlib_rp;
+ save_shmem_hdr = am->shmem_hdr;
+ am->vlib_rp = vlib_rp;
+ am->shmem_hdr = (void *) vlib_rp->user_ctx;
+ }
+ vl_msg_api_free (the_msg);
+ if (is_private)
+ {
+ am->vlib_rp = old_vlib_rp;
+ am->shmem_hdr = save_shmem_hdr;
+ }
+ }
+
+ if (PREDICT_FALSE (am->elog_trace_api_messages))
+ {
+ ELOG_TYPE_DECLARE (e) = { .format = "api-msg-done(%s): %s",
+ .format_args = "t4T4",
+ .n_enum_strings = 2,
+ .enum_strings = {
+ "barrier",
+ "mp-safe",
+ } };
+
+ struct
+ {
+ u32 barrier;
+ u32 c;
+ } * ed;
+ ed = ELOG_DATA (am->elog_main, e);
+ if (m && m->name)
+ ed->c = elog_string (am->elog_main, (char *) m->name);
+ else
+ ed->c = elog_string (am->elog_main, "BOGUS");
+ ed->barrier = is_mp_safe;
+ }
+}
+
static inline int
void_mem_api_handle_msg_i (api_main_t * am, svm_region_t * vlib_rp,
vlib_main_t * vm, vlib_node_runtime_t * node,
@@ -699,7 +905,7 @@ void_mem_api_handle_msg_i (api_main_t * am, svm_region_t * vlib_rp,
if (!svm_queue_sub2 (q, (u8 *) & mp))
{
VL_MSG_API_UNPOISON ((void *) mp);
- vl_msg_api_handler_with_vm_node (am, vlib_rp, (void *) mp, vm, node,
+ vl_mem_api_handler_with_vm_node (am, vlib_rp, (void *) mp, vm, node,
is_private);
return 0;
}
@@ -749,8 +955,8 @@ vl_mem_api_handle_rpc (vlib_main_t * vm, vlib_node_runtime_t * node)
for (i = 0; i < vec_len (vm->processing_rpc_requests); i++)
{
mp = vm->processing_rpc_requests[i];
- vl_msg_api_handler_with_vm_node (am, am->vlib_rp, (void *) mp, vm,
- node, 0 /* is_private */ );
+ vl_mem_api_handler_with_vm_node (am, am->vlib_rp, (void *) mp, vm,
+ node, 0 /* is_private */);
}
vl_msg_api_barrier_release ();
}
@@ -898,7 +1104,6 @@ vl_api_ring_command (vlib_main_t * vm,
vl_api_registration_t *regp = 0;
/* For horizontal scaling, add a hash table... */
- /* *INDENT-OFF* */
pool_foreach (regpp, am->vl_clients)
{
regp = *regpp;
@@ -910,7 +1115,6 @@ vl_api_ring_command (vlib_main_t * vm,
}
vlib_cli_output (vm, "regp %llx not found?", regp);
continue;
- /* *INDENT-ON* */
found:
vlib_cli_output (vm, "%U", format_api_message_rings, am,
0 /* print header */ , 0 /* notused */ );
@@ -924,14 +1128,12 @@ vl_api_ring_command (vlib_main_t * vm,
/*?
* Display binary api message allocation ring statistics
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_show_api_ring_command, static) =
{
.path = "show api ring-stats",
.short_help = "Message ring statistics",
.function = vl_api_ring_command,
};
-/* *INDENT-ON* */
clib_error_t *
vlibmemory_init (vlib_main_t * vm)
diff --git a/src/vlibmemory/memory_api.h b/src/vlibmemory/memory_api.h
index 843bf8935b2..4a1573c6544 100644
--- a/src/vlibmemory/memory_api.h
+++ b/src/vlibmemory/memory_api.h
@@ -23,7 +23,7 @@
#include <svm/queue.h>
#include <vlib/vlib.h>
#include <vlibapi/api.h>
-#include <vlibmemory/memory_shared.h>
+#include <vlibapi/memory_shared.h>
svm_queue_t *vl_api_client_index_to_input_queue (u32 index);
int vl_mem_api_init (const char *region_name);
diff --git a/src/vlibmemory/memory_client.c b/src/vlibmemory/memory_client.c
index 64650b64eca..8bce069f97b 100644
--- a/src/vlibmemory/memory_client.c
+++ b/src/vlibmemory/memory_client.c
@@ -39,8 +39,11 @@
#include <vlibmemory/vl_memory_api_h.h>
#undef vl_endianfun
+#define vl_calcsizefun
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_calcsizefun
+
/* instantiate all the print functions we know about */
-#define vl_print(handle, ...) clib_warning (__VA_ARGS__)
#define vl_printfun
#include <vlibmemory/vl_memory_api_h.h>
#undef vl_printfun
@@ -98,23 +101,21 @@ vl_api_name_and_crc_free (void)
if (!am->msg_index_by_name_and_crc)
return;
- /* *INDENT-OFF* */
hash_foreach_pair (hp, am->msg_index_by_name_and_crc,
({
vec_add1 (keys, (u8 *) hp->key);
}));
- /* *INDENT-ON* */
for (i = 0; i < vec_len (keys); i++)
vec_free (keys[i]);
vec_free (keys);
hash_free (am->msg_index_by_name_and_crc);
}
-CLIB_NOSANITIZE_ADDR static void
+__clib_nosanitize_addr static void
VL_API_VEC_UNPOISON (const void *v)
{
const vec_header_t *vh = &((vec_header_t *) v)[-1];
- CLIB_MEM_UNPOISON (vh, sizeof (*vh) + vec_len (v));
+ clib_mem_unpoison (vh, sizeof (*vh) + vec_len (v));
}
static void
@@ -151,11 +152,6 @@ vl_api_memclnt_create_reply_t_handler (vl_api_memclnt_create_reply_t * mp)
}
}
-static void
-noop_handler (void *notused)
-{
-}
-
void vl_msg_api_send_shmem (svm_queue_t * q, u8 * elem);
int
vl_client_connect (const char *name, int ctx_quota, int input_queue_size)
@@ -188,7 +184,7 @@ vl_client_connect (const char *name, int ctx_quota, int input_queue_size)
return -1;
}
- CLIB_MEM_UNPOISON (shmem_hdr, sizeof (*shmem_hdr));
+ clib_mem_unpoison (shmem_hdr, sizeof (*shmem_hdr));
VL_MSG_API_SVM_QUEUE_UNPOISON (shmem_hdr->vl_input_queue);
oldheap = vl_msg_push_heap ();
@@ -240,7 +236,8 @@ vl_client_connect (const char *name, int ctx_quota, int input_queue_size)
}
rv = clib_net_to_host_u32 (rp->response);
- vl_msg_api_handler ((void *) rp);
+ msgbuf_t *msgbuf = (msgbuf_t *) ((u8 *) rp - offsetof (msgbuf_t, data));
+ vl_msg_api_handler ((void *) rp, ntohl (msgbuf->data_len));
break;
}
return (rv);
@@ -289,6 +286,7 @@ vl_client_disconnect (void)
svm_queue_t *vl_input_queue;
api_main_t *am = vlibapi_get_main ();
time_t begin;
+ msgbuf_t *msgbuf;
vl_input_queue = am->vl_input_queue;
vl_client_send_disconnect (0 /* wait for reply */ );
@@ -321,10 +319,12 @@ vl_client_disconnect (void)
if (ntohs (rp->_vl_msg_id) != VL_API_MEMCLNT_DELETE_REPLY)
{
clib_warning ("queue drain: %d", ntohs (rp->_vl_msg_id));
- vl_msg_api_handler ((void *) rp);
+ msgbuf = (msgbuf_t *) ((u8 *) rp - offsetof (msgbuf_t, data));
+ vl_msg_api_handler ((void *) rp, ntohl (msgbuf->data_len));
continue;
}
- vl_msg_api_handler ((void *) rp);
+ msgbuf = (msgbuf_t *) ((u8 *) rp - offsetof (msgbuf_t, data));
+ vl_msg_api_handler ((void *) rp, ntohl (msgbuf->data_len));
break;
}
@@ -362,14 +362,21 @@ _(MEMCLNT_KEEPALIVE, memclnt_keepalive)
void
vl_client_install_client_message_handlers (void)
{
-
-#define _(N,n) \
- vl_msg_api_set_handlers(VL_API_##N, #n, \
- vl_api_##n##_t_handler, \
- noop_handler, \
- vl_api_##n##_t_endian, \
- vl_api_##n##_t_print, \
- sizeof(vl_api_##n##_t), 1);
+ api_main_t *am = vlibapi_get_main ();
+#define _(N, n) \
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){ \
+ .id = VL_API_##N, \
+ .name = #n, \
+ .handler = vl_api_##n##_t_handler, \
+ .endian = vl_api_##n##_t_endian, \
+ .format_fn = vl_api_##n##_t_format, \
+ .size = sizeof (vl_api_##n##_t), \
+ .traced = 0, \
+ .tojson = vl_api_##n##_t_tojson, \
+ .fromjson = vl_api_##n##_t_fromjson, \
+ .calc_size = vl_api_##n##_t_calc_size, \
+ }); \
+ am->msg_data[VL_API_##N].replay_allowed = 0;
foreach_api_msg;
#undef _
}
@@ -501,6 +508,14 @@ vl_client_connect_to_vlib_thread_fn (const char *svm_name,
thread_fn, arg, 1 /* do map */ );
}
+void
+vl_client_stop_rx_thread (svm_queue_t *vl_input_queue)
+{
+ vl_api_rx_thread_exit_t *ep;
+ ep = vl_msg_api_alloc (sizeof (*ep));
+ ep->_vl_msg_id = ntohs (VL_API_RX_THREAD_EXIT);
+ vl_msg_api_send_shmem (vl_input_queue, (u8 *) &ep);
+}
static void
disconnect_from_vlib_internal (u8 do_unmap)
@@ -511,10 +526,7 @@ disconnect_from_vlib_internal (u8 do_unmap)
if (mm->rx_thread_jmpbuf_valid)
{
- vl_api_rx_thread_exit_t *ep;
- ep = vl_msg_api_alloc (sizeof (*ep));
- ep->_vl_msg_id = ntohs (VL_API_RX_THREAD_EXIT);
- vl_msg_api_send_shmem (am->vl_input_queue, (u8 *) & ep);
+ vl_client_stop_rx_thread (am->vl_input_queue);
pthread_join (mm->rx_thread_handle, (void **) &junk);
}
if (mm->connected_to_vlib)
@@ -554,6 +566,8 @@ vl_client_get_first_plugin_msg_id (const char *plugin_name)
vl_api_get_first_msg_id_t *mp;
api_main_t *am = vlibapi_get_main ();
memory_client_main_t *mm = vlibapi_get_memory_client_main ();
+ vl_api_msg_data_t *m =
+ vl_api_get_msg_data (am, VL_API_GET_FIRST_MSG_ID_REPLY);
f64 timeout;
void *old_handler;
clib_time_t clib_time;
@@ -566,9 +580,13 @@ vl_client_get_first_plugin_msg_id (const char *plugin_name)
clib_time_init (&clib_time);
/* Push this plugin's first_msg_id_reply handler */
- old_handler = am->msg_handlers[VL_API_GET_FIRST_MSG_ID_REPLY];
- am->msg_handlers[VL_API_GET_FIRST_MSG_ID_REPLY] = (void *)
- vl_api_get_first_msg_id_reply_t_handler;
+ old_handler = m->handler;
+ m->handler = (void *) vl_api_get_first_msg_id_reply_t_handler;
+ if (!m->calc_size_func)
+ {
+ m->calc_size_func =
+ (uword (*) (void *)) vl_api_get_first_msg_id_reply_t_calc_size;
+ }
/* Ask the data-plane for the message-ID base of the indicated plugin */
mm->first_msg_id_reply_ready = 0;
@@ -595,7 +613,7 @@ vl_client_get_first_plugin_msg_id (const char *plugin_name)
sock_err:
/* Restore old handler */
- am->msg_handlers[VL_API_GET_FIRST_MSG_ID_REPLY] = old_handler;
+ m->handler = old_handler;
return -1;
}
@@ -620,7 +638,7 @@ vl_client_get_first_plugin_msg_id (const char *plugin_name)
}
}
/* Restore old handler */
- am->msg_handlers[VL_API_GET_FIRST_MSG_ID_REPLY] = old_handler;
+ m->handler = old_handler;
return rv;
}
@@ -628,7 +646,7 @@ vl_client_get_first_plugin_msg_id (const char *plugin_name)
result:
/* Restore the old handler */
- am->msg_handlers[VL_API_GET_FIRST_MSG_ID_REPLY] = old_handler;
+ m->handler = old_handler;
if (rv == (u16) ~ 0)
clib_warning ("plugin '%s' not registered", plugin_name);
diff --git a/src/vlibmemory/memory_client.h b/src/vlibmemory/memory_client.h
index a0168693a4b..3b9b2d8d959 100644
--- a/src/vlibmemory/memory_client.h
+++ b/src/vlibmemory/memory_client.h
@@ -18,7 +18,7 @@
#ifndef SRC_VLIBMEMORY_MEMORY_CLIENT_H_
#define SRC_VLIBMEMORY_MEMORY_CLIENT_H_
-#include <vlibmemory/memory_shared.h>
+#include <vlibapi/memory_shared.h>
#include <setjmp.h>
/*
@@ -62,6 +62,7 @@ int vl_client_connect_to_vlib_no_rx_pthread_no_map (const char *svm_name,
int rx_queue_size);
void vl_client_install_client_message_handlers (void);
u8 vl_mem_client_is_connected (void);
+void vl_client_stop_rx_thread (svm_queue_t *vl_input_queue);
always_inline memory_client_main_t *
vlibapi_get_memory_client_main (void)
diff --git a/src/vlibmemory/socket_api.c b/src/vlibmemory/socket_api.c
index 60ca650d92f..26be8d09522 100644
--- a/src/vlibmemory/socket_api.c
+++ b/src/vlibmemory/socket_api.c
@@ -35,7 +35,6 @@
#undef vl_typedefs
/* instantiate all the print functions we know about */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define vl_printfun
#include <vlibmemory/vl_memory_api_h.h>
#undef vl_printfun
@@ -45,6 +44,10 @@
#include <vlibmemory/vl_memory_api_h.h>
#undef vl_endianfun
+#define vl_calcsizefun
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_calcsizefun
+
socket_main_t socket_main;
#define SOCK_API_REG_HANDLE_BIT (1<<31)
@@ -84,7 +87,6 @@ vl_sock_api_dump_clients (vlib_main_t * vm, api_main_t * am)
vlib_cli_output (vm, "Socket clients");
vlib_cli_output (vm, "%20s %8s", "Name", "Fildesc");
- /* *INDENT-OFF* */
pool_foreach (reg, sm->registration_pool)
{
if (reg->registration_type == REGISTRATION_TYPE_SOCKET_SERVER) {
@@ -92,7 +94,6 @@ vl_sock_api_dump_clients (vlib_main_t * vm, api_main_t * am)
vlib_cli_output (vm, "%20s %8d", reg->name, f->file_descriptor);
}
}
-/* *INDENT-ON* */
}
vl_api_registration_t *
@@ -128,7 +129,7 @@ vl_socket_api_send (vl_api_registration_t * rp, u8 * elem)
cf = vl_api_registration_file (rp);
ASSERT (rp->registration_type > REGISTRATION_TYPE_SHMEM);
- if (msg_id >= vec_len (am->api_trace_cfg))
+ if (msg_id >= vec_len (am->msg_data))
{
clib_warning ("id out of range: %d", msg_id);
vl_msg_api_free ((void *) elem);
@@ -179,7 +180,8 @@ vl_socket_free_registration_index (u32 pool_index)
}
rp = pool_elt_at_index (socket_main.registration_pool, pool_index);
- vl_api_call_reaper_functions (pool_index);
+ vl_api_call_reaper_functions (
+ clib_host_to_net_u32 (sock_api_registration_handle (rp)));
ASSERT (rp->registration_type != REGISTRATION_TYPE_FREE);
for (i = 0; i < vec_len (rp->additional_fds_to_close); i++)
@@ -200,7 +202,7 @@ vl_socket_process_api_msg (vl_api_registration_t * rp, i8 * input_v)
u8 *the_msg = (u8 *) (mbp->data);
socket_main.current_rp = rp;
- vl_msg_api_socket_handler (the_msg);
+ vl_msg_api_socket_handler (the_msg, ntohl (mbp->data_len));
socket_main.current_rp = 0;
}
@@ -216,6 +218,12 @@ static void
socket_cleanup_pending_remove_registration_cb (u32 *preg_index)
{
vl_api_registration_t *rp = vl_socket_get_registration (*preg_index);
+ if (!rp)
+ {
+ /* Might already have gone */
+ return;
+ }
+
clib_file_main_t *fm = &file_main;
u32 pending_remove_file_index = vl_api_registration_file_index (rp);
@@ -278,6 +286,10 @@ vl_socket_read_ready (clib_file_t * uf)
}
rp = vl_socket_get_registration (reg_index);
+ if (!rp)
+ {
+ return 0;
+ }
/* Ignore unprocessed_input for now, n describes input_buffer for now. */
n = read (uf->file_descriptor, socket_main.input_buffer,
@@ -398,7 +410,7 @@ vl_socket_write_ready (clib_file_t * uf)
while (remaining_bytes > 0)
{
bytes_to_send = remaining_bytes > 4096 ? 4096 : remaining_bytes;
- n = write (uf->file_descriptor, p, bytes_to_send);
+ n = send (uf->file_descriptor, p, bytes_to_send, MSG_NOSIGNAL);
if (n < 0)
{
if (errno == EAGAIN)
@@ -495,7 +507,13 @@ vl_api_sockclnt_create_t_handler (vl_api_sockclnt_create_t * mp)
regp = socket_main.current_rp;
- ASSERT (regp->registration_type == REGISTRATION_TYPE_SOCKET_SERVER);
+ /* client already connected through shared memory? */
+ if (!regp || regp->registration_type != REGISTRATION_TYPE_SOCKET_SERVER)
+ {
+ clib_warning (
+ "unsupported API call: already connected though shared memory?");
+ return;
+ }
regp->name = format (0, "%s%c", mp->name, 0);
@@ -507,7 +525,6 @@ vl_api_sockclnt_create_t_handler (vl_api_sockclnt_create_t * mp)
rp->response = htonl (rv);
rp->count = htons (nmsg);
- /* *INDENT-OFF* */
hash_foreach_pair (hp, am->msg_index_by_name_and_crc,
({
rp->message_table[i].index = htons(hp->value[0]);
@@ -517,7 +534,6 @@ vl_api_sockclnt_create_t_handler (vl_api_sockclnt_create_t * mp)
64-1 /* chars to copy, without zero byte. */);
i++;
}));
- /* *INDENT-ON* */
vl_api_send_msg (regp, (u8 *) rp);
}
@@ -686,7 +702,7 @@ vl_api_sock_init_shm_t_handler (vl_api_sock_init_shm_t * mp)
/* delete the unused heap created in ssvm_server_init_memfd and mark it
* accessible again for ASAN */
clib_mem_destroy_heap (memfd->sh->heap);
- CLIB_MEM_UNPOISON ((void *) memfd->sh->ssvm_va, memfd->ssvm_size);
+ clib_mem_unpoison ((void *) memfd->sh->ssvm_va, memfd->ssvm_size);
/* Remember to close this fd when the socket connection goes away */
vec_add1 (regp->additional_fds_to_close, memfd->fd);
@@ -765,14 +781,15 @@ reply:
vl_sock_api_send_fd_msg (cf->file_descriptor, &memfd->fd, 1);
}
-#define foreach_vlib_api_msg \
- _(SOCKCLNT_CREATE, sockclnt_create, 1) \
- _(SOCKCLNT_DELETE, sockclnt_delete, 1) \
- _(SOCK_INIT_SHM, sock_init_shm, 1)
+#define foreach_vlib_api_msg \
+ _ (SOCKCLNT_CREATE, sockclnt_create, 0) \
+ _ (SOCKCLNT_DELETE, sockclnt_delete, 0) \
+ _ (SOCK_INIT_SHM, sock_init_shm, 0)
clib_error_t *
vl_sock_api_init (vlib_main_t * vm)
{
+ api_main_t *am = vlibapi_get_main ();
clib_file_main_t *fm = &file_main;
clib_file_t template = { 0 };
vl_api_registration_t *rp;
@@ -784,13 +801,20 @@ vl_sock_api_init (vlib_main_t * vm)
if (sm->socket_name == 0)
return 0;
-#define _(N,n,t) \
- vl_msg_api_set_handlers(VL_API_##N, #n, \
- vl_api_##n##_t_handler, \
- vl_noop_handler, \
- vl_api_##n##_t_endian, \
- vl_api_##n##_t_print, \
- sizeof(vl_api_##n##_t), t);
+#define _(N, n, t) \
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){ \
+ .id = VL_API_##N, \
+ .name = #n, \
+ .handler = vl_api_##n##_t_handler, \
+ .endian = vl_api_##n##_t_endian, \
+ .format_fn = vl_api_##n##_t_format, \
+ .size = sizeof (vl_api_##n##_t), \
+ .traced = t, \
+ .tojson = vl_api_##n##_t_tojson, \
+ .fromjson = vl_api_##n##_t_fromjson, \
+ .calc_size = vl_api_##n##_t_calc_size, \
+ }); \
+ am->msg_data[VL_API_##N].replay_allowed = 0;
foreach_vlib_api_msg;
#undef _
@@ -827,13 +851,11 @@ socket_exit (vlib_main_t * vm)
if (sm->registration_pool)
{
u32 index;
- /* *INDENT-OFF* */
pool_foreach (rp, sm->registration_pool) {
vl_api_registration_del_file (rp);
index = rp->vl_api_registration_pool_index;
vl_socket_free_registration_index (index);
}
-/* *INDENT-ON* */
}
return 0;
diff --git a/src/vlibmemory/socket_client.c b/src/vlibmemory/socket_client.c
index 69126f88963..ad28136dc07 100644
--- a/src/vlibmemory/socket_client.c
+++ b/src/vlibmemory/socket_client.c
@@ -22,6 +22,14 @@
#define _GNU_SOURCE
#include <sys/socket.h>
+#ifdef __FreeBSD__
+#define _WANT_UCRED
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/ucred.h>
+#include <sys/un.h>
+#endif /* __FreeBSD__ */
+
#include <svm/ssvm.h>
#include <vlibmemory/socket_client.h>
#include <vlibmemory/memory_client.h>
@@ -36,8 +44,11 @@
#include <vlibmemory/vl_memory_api_h.h>
#undef vl_endianfun
+#define vl_calcsizefun
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_calcsizefun
+
/* instantiate all the print functions we know about */
-#define vl_print(handle, ...) clib_warning (__VA_ARGS__)
#define vl_printfun
#include <vlibmemory/vl_memory_api_h.h>
#undef vl_printfun
@@ -84,12 +95,11 @@ vl_socket_client_read_internal (socket_client_main_t * scm, int wait)
while (1)
{
- while (vec_len (scm->socket_rx_buffer) < sizeof (*mbp))
+ current_rx_index = vec_len (scm->socket_rx_buffer);
+ while (current_rx_index < sizeof (*mbp))
{
- current_rx_index = vec_len (scm->socket_rx_buffer);
vec_validate (scm->socket_rx_buffer, current_rx_index
+ scm->socket_buffer_size - 1);
- _vec_len (scm->socket_rx_buffer) = current_rx_index;
n = read (scm->socket_fd, scm->socket_rx_buffer + current_rx_index,
scm->socket_buffer_size);
if (n < 0)
@@ -98,10 +108,12 @@ vl_socket_client_read_internal (socket_client_main_t * scm, int wait)
continue;
clib_unix_warning ("socket_read");
+ vec_set_len (scm->socket_rx_buffer, current_rx_index);
return -1;
}
- _vec_len (scm->socket_rx_buffer) += n;
+ current_rx_index += n;
}
+ vec_set_len (scm->socket_rx_buffer, current_rx_index);
#if CLIB_DEBUG > 1
if (n > 0)
@@ -112,32 +124,32 @@ vl_socket_client_read_internal (socket_client_main_t * scm, int wait)
data_len = ntohl (mbp->data_len);
current_rx_index = vec_len (scm->socket_rx_buffer);
vec_validate (scm->socket_rx_buffer, current_rx_index + data_len);
- _vec_len (scm->socket_rx_buffer) = current_rx_index;
mbp = (msgbuf_t *) (scm->socket_rx_buffer);
msg_size = data_len + sizeof (*mbp);
- while (vec_len (scm->socket_rx_buffer) < msg_size)
+ while (current_rx_index < msg_size)
{
- n = read (scm->socket_fd,
- scm->socket_rx_buffer + vec_len (scm->socket_rx_buffer),
- msg_size - vec_len (scm->socket_rx_buffer));
+ n = read (scm->socket_fd, scm->socket_rx_buffer + current_rx_index,
+ msg_size - current_rx_index);
if (n < 0)
{
if (errno == EAGAIN)
continue;
clib_unix_warning ("socket_read");
+ vec_set_len (scm->socket_rx_buffer, current_rx_index);
return -1;
}
- _vec_len (scm->socket_rx_buffer) += n;
+ current_rx_index += n;
}
+ vec_set_len (scm->socket_rx_buffer, current_rx_index);
if (vec_len (scm->socket_rx_buffer) >= data_len + sizeof (*mbp))
{
- vl_msg_api_socket_handler ((void *) (mbp->data));
+ vl_msg_api_socket_handler ((void *) (mbp->data), data_len);
if (vec_len (scm->socket_rx_buffer) == data_len + sizeof (*mbp))
- _vec_len (scm->socket_rx_buffer) = 0;
+ vec_set_len (scm->socket_rx_buffer, 0);
else
vec_delete (scm->socket_rx_buffer, data_len + sizeof (*mbp), 0);
mbp = 0;
@@ -175,11 +187,11 @@ static int
vl_socket_client_write_internal (socket_client_main_t * scm)
{
int n;
-
+ int len = vec_len (scm->socket_tx_buffer);
msgbuf_t msgbuf = {
.q = 0,
.gc_mark_timestamp = 0,
- .data_len = htonl (scm->socket_tx_nbytes),
+ .data_len = htonl (len),
};
n = write (scm->socket_fd, &msgbuf, sizeof (msgbuf));
@@ -189,8 +201,11 @@ vl_socket_client_write_internal (socket_client_main_t * scm)
return -1;
}
- n = write (scm->socket_fd, scm->socket_tx_buffer, scm->socket_tx_nbytes);
- if (n < scm->socket_tx_nbytes)
+ n = write (scm->socket_fd, scm->socket_tx_buffer, len);
+
+ vec_set_len (scm->socket_tx_buffer, 0);
+
+ if (n < len)
{
clib_unix_warning ("socket write (msg)");
return -1;
@@ -220,7 +235,7 @@ vl_socket_client_write2 (socket_client_main_t * scm)
void *
vl_socket_client_msg_alloc2 (socket_client_main_t * scm, int nbytes)
{
- scm->socket_tx_nbytes = nbytes;
+ vec_set_len (scm->socket_tx_buffer, nbytes);
return ((void *) scm->socket_tx_buffer);
}
@@ -271,7 +286,11 @@ vl_sock_api_recv_fd_msg_internal (socket_client_main_t * scm, int fds[],
struct msghdr mh = { 0 };
struct iovec iov[1];
ssize_t size = 0;
+#ifdef __linux__
struct ucred *cr = 0;
+#elif __FreeBSD__
+ struct cmsgcred *cr = 0;
+#endif /* __linux__ */
struct cmsghdr *cmsg;
pid_t pid __attribute__ ((unused));
uid_t uid __attribute__ ((unused));
@@ -311,6 +330,7 @@ vl_sock_api_recv_fd_msg_internal (socket_client_main_t * scm, int fds[],
{
if (cmsg->cmsg_level == SOL_SOCKET)
{
+#ifdef __linux__
if (cmsg->cmsg_type == SCM_CREDENTIALS)
{
cr = (struct ucred *) CMSG_DATA (cmsg);
@@ -318,6 +338,15 @@ vl_sock_api_recv_fd_msg_internal (socket_client_main_t * scm, int fds[],
gid = cr->gid;
pid = cr->pid;
}
+#elif __FreeBSD__
+ if (cmsg->cmsg_type == SCM_CREDS)
+ {
+ cr = (struct cmsgcred *) CMSG_DATA (cmsg);
+ uid = cr->cmcred_uid;
+ gid = cr->cmcred_gid;
+ pid = cr->cmcred_pid;
+ }
+#endif /* __linux__ */
else if (cmsg->cmsg_type == SCM_RIGHTS)
{
clib_memcpy_fast (fds, CMSG_DATA (cmsg), sizeof (int) * n_fds);
@@ -423,22 +452,23 @@ vl_api_sockclnt_create_reply_t_handler (vl_api_sockclnt_create_reply_t * mp)
_(SOCKCLNT_CREATE_REPLY, sockclnt_create_reply) \
_(SOCK_INIT_SHM_REPLY, sock_init_shm_reply) \
-static void
-noop_handler (void *notused)
-{
-}
-
void
vl_sock_client_install_message_handlers (void)
{
-#define _(N,n) \
- vl_msg_api_set_handlers(VL_API_##N, #n, \
- vl_api_##n##_t_handler, \
- noop_handler, \
- vl_api_##n##_t_endian, \
- vl_api_##n##_t_print, \
- sizeof(vl_api_##n##_t), 1);
+#define _(N, n) \
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){ \
+ .id = VL_API_##N, \
+ .name = #n, \
+ .handler = vl_api_##n##_t_handler, \
+ .endian = vl_api_##n##_t_endian, \
+ .format_fn = vl_api_##n##_t_format, \
+ .size = sizeof (vl_api_##n##_t), \
+ .traced = 0, \
+ .tojson = vl_api_##n##_t_tojson, \
+ .fromjson = vl_api_##n##_t_fromjson, \
+ .calc_size = vl_api_##n##_t_calc_size, \
+ });
foreach_sock_client_api_msg;
#undef _
}
@@ -477,8 +507,8 @@ vl_socket_client_connect_internal (socket_client_main_t * scm,
SOCKET_CLIENT_DEFAULT_BUFFER_SIZE;
vec_validate (scm->socket_tx_buffer, scm->socket_buffer_size - 1);
vec_validate (scm->socket_rx_buffer, scm->socket_buffer_size - 1);
- _vec_len (scm->socket_rx_buffer) = 0;
- _vec_len (scm->socket_tx_buffer) = 0;
+ vec_set_len (scm->socket_rx_buffer, 0);
+ vec_set_len (scm->socket_tx_buffer, 0);
scm->name = format (0, "%s", client_name);
mp = vl_socket_client_msg_alloc2 (scm, sizeof (*mp));
diff --git a/src/vlibmemory/socket_client.h b/src/vlibmemory/socket_client.h
index 231290088b3..a4b8bc7d6be 100644
--- a/src/vlibmemory/socket_client.h
+++ b/src/vlibmemory/socket_client.h
@@ -20,7 +20,7 @@
#include <vppinfra/file.h>
#include <vppinfra/time.h>
-#include <vlibmemory/memory_shared.h>
+#include <vlibapi/memory_shared.h>
typedef struct
{
@@ -34,7 +34,6 @@ typedef struct
u32 socket_buffer_size;
u8 *socket_tx_buffer;
u8 *socket_rx_buffer;
- u32 socket_tx_nbytes;
int control_pings_outstanding;
u8 *name;
diff --git a/src/vlibmemory/vlib.api b/src/vlibmemory/vlib.api
new file mode 100644
index 00000000000..ce1236826aa
--- /dev/null
+++ b/src/vlibmemory/vlib.api
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+option version = "1.0.0";
+
+/** \brief Process a vpe parser cli string request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param cmd_in_shmem - pointer to cli command string
+*/
+define cli
+{
+ u32 client_index;
+ u32 context;
+ u64 cmd_in_shmem;
+};
+define cli_inband
+{
+ u32 client_index;
+ u32 context;
+ string cmd[];
+};
+
+/** \brief vpe parser cli string response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for request
+ @param reply_in_shmem - Reply string from cli processing if any
+*/
+define cli_reply
+{
+ u32 context;
+ i32 retval;
+ u64 reply_in_shmem;
+};
+define cli_inband_reply
+{
+ u32 context;
+ i32 retval;
+ string reply[];
+};
+
+/** \brief Get node index using name request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param node_name[] - name of the node
+*/
+define get_node_index
+{
+ u32 client_index;
+ u32 context;
+ string node_name[64];
+};
+
+/** \brief Get node index using name request
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the request
+ @param node_index - index of the desired node if found, else ~0
+*/
+define get_node_index_reply
+{
+ u32 context;
+ i32 retval;
+ u32 node_index;
+};
+
+/** \brief Set the next node for a given node request
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param node_name[] - node to add the next node to
+ @param next_name[] - node to add as the next node
+*/
+define add_node_next
+{
+ u32 client_index;
+ u32 context;
+ string node_name[64];
+ string next_name[64];
+};
+
+/** \brief IP Set the next node for a given node response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the add next node request
+ @param next_index - the index of the next node if success, else ~0
+*/
+define add_node_next_reply
+{
+ u32 context;
+ i32 retval;
+ u32 next_index;
+};
+
+/** \brief show_threads display the information about vpp
+ threads running on system along with their process id,
+ cpu id, physical core and cpu socket.
+*/
+define show_threads
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief thread data
+ @param id - thread index
+ @param name - thread name i.e. vpp_main or vpp_wk_0
+ @param type - thread type i.e. workers or stats
+ @param pid - thread Process Id
+ @param cpu_id - thread pinned to cpu.
+ "CPUs or Logical cores are the number of physical cores times
+ the number of threads that can run on each core through
+ the use of hyperthreading." (from unix.stackexchange.com)
+ @param core - thread pinned to actual physical core.
+ @param cpu_socket - thread is running on which cpu socket.
+*/
+typedef thread_data
+{
+ u32 id;
+ string name[64];
+ string type[64];
+ u32 pid;
+ u32 cpu_id;
+ u32 core;
+ u32 cpu_socket;
+};
+
+/** \brief show_threads_reply
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param count - number of threads in thread_data array
+ @param thread_data - array of thread data
+*/
+define show_threads_reply
+{
+ u32 context;
+ i32 retval;
+ u32 count;
+ vl_api_thread_data_t thread_data[count];
+};
+
+define get_node_graph
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief get_node_graph_reply
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param reply_in_shmem - result from vlib_node_serialize, in shared
+ memory. Process with vlib_node_unserialize, remember to switch
+ heaps and free the result.
+*/
+
+define get_node_graph_reply
+{
+ u32 context;
+ i32 retval;
+ u64 reply_in_shmem;
+};
+
+/** \brief Query relative index via node names
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param node_name - name of node to find relative index from
+ @param next_name - next node from node_name to find relative index of
+*/
+define get_next_index
+{
+ u32 client_index;
+ u32 context;
+ string node_name[64];
+ string next_name[64];
+};
+
+/** \brief Reply for get next node index
+ @param context - sender context which was passed in the request
+ @param retval - return value
+ @param next_index - index of the next_node
+*/
+define get_next_index_reply
+{
+ u32 context;
+ i32 retval;
+ u32 next_index;
+};
+
+/** \brief f64 types are not standardized across the wire. Sense wire format in each direction by sending the f64 value 1.0.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param f64_one - The constant of 1.0. If you send a different value, expect an rv=VNET_API_ERROR_API_ENDIAN_FAILED.
+*/
+define get_f64_endian_value
+{
+ u32 client_index;
+ u32 context;
+ f64 f64_one [default=1.0];
+};
+
+/** \brief get_f64_endian_value reply message
+ @param context - sender context which was passed in the request
+ @param retval - return value - VNET_API_ERROR_API_ENDIAN_FAILED if f64_one != 1.0
+ @param f64_one_result - The value of 'f64 1.0'
+*/
+define get_f64_endian_value_reply
+{
+ u32 context;
+ u32 retval;
+ f64 f64_one_result;
+};
+
+/** \brief Verify f64 wire format by sending a value and receiving the value + 1.0
+ @param client_index - opaque cookie to identify the sender.
+ @param context - sender context, to match reply w/ request.
+ @param f64_value - The value you want to test. Default: 1.0.
+*/
+define get_f64_increment_by_one
+{
+ u32 client_index;
+ u32 context;
+ f64 f64_value [default=1.0];
+};
+
+/** \brief get_f64_increment_by_one reply
+ @param client_index - opaque cookie to identify the sender.
+ @param context - sender context, to match reply w/ request.
+ @param f64_value - The input f64_value incremented by 1.0.
+*/
+define get_f64_increment_by_one_reply
+{
+ u32 context;
+ u32 retval;
+ f64 f64_value;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlibmemory/vlib_api.c b/src/vlibmemory/vlib_api.c
index f9ed891f90a..705e9c241a6 100644
--- a/src/vlibmemory/vlib_api.c
+++ b/src/vlibmemory/vlib_api.c
@@ -1,8 +1,7 @@
/*
- *------------------------------------------------------------------
* vlib_api.c VLIB API implementation
*
- * Copyright (c) 2009 Cisco and/or its affiliates.
+ * Copyright (c) 2021 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -14,710 +13,341 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- *------------------------------------------------------------------
*/
-#include <fcntl.h>
-#include <pthread.h>
-#include <vppinfra/vec.h>
-#include <vppinfra/hash.h>
-#include <vppinfra/pool.h>
-#include <vppinfra/format.h>
-#include <vppinfra/byte_order.h>
-#include <vppinfra/elog.h>
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
#include <vlibapi/api.h>
#include <vlibmemory/api.h>
+#include <vnet/api_errno.h>
-/**
- * @file
- * @brief Binary API messaging via shared memory
- * Low-level, primary provisioning interface
- */
-/*? %%clicmd:group_label Binary API CLI %% ?*/
-/*? %%syscfg:group_label Binary API configuration %% ?*/
-
-#define TRACE_VLIB_MEMORY_QUEUE 0
-
-#include <vlibmemory/vl_memory_msg_enum.h> /* enumerate all vlib messages */
+#include <vlibmemory/vlib.api_enum.h>
+#include <vlibmemory/vlib.api_types.h>
-#define vl_typedefs /* define message structures */
-#include <vlibmemory/vl_memory_api_h.h>
-#undef vl_typedefs
+static u16 msg_id_base;
+#define REPLY_MSG_ID_BASE msg_id_base
+#include <vlibapi/api_helper_macros.h>
-/* instantiate all the print functions we know about */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define vl_printfun
-#include <vlibmemory/vl_memory_api_h.h>
-#undef vl_printfun
-
-static inline void *
-vl_api_trace_plugin_msg_ids_t_print (vl_api_trace_plugin_msg_ids_t * a,
- void *handle)
+static void
+shmem_cli_output (uword arg, u8 *buffer, uword buffer_bytes)
{
- vl_print (handle, "vl_api_trace_plugin_msg_ids: %s first %u last %u\n",
- a->plugin_name,
- clib_host_to_net_u16 (a->first_msg_id),
- clib_host_to_net_u16 (a->last_msg_id));
- return handle;
-}
+ u8 **shmem_vecp = (u8 **) arg;
+ u8 *shmem_vec;
+ void *oldheap;
+ u32 offset;
-/* instantiate all the endian swap functions we know about */
-#define vl_endianfun
-#include <vlibmemory/vl_memory_api_h.h>
-#undef vl_endianfun
+ shmem_vec = *shmem_vecp;
-static void
-vl_api_get_first_msg_id_t_handler (vl_api_get_first_msg_id_t * mp)
-{
- vl_api_get_first_msg_id_reply_t *rmp;
- vl_api_registration_t *regp;
- uword *p;
- api_main_t *am = vlibapi_get_main ();
- vl_api_msg_range_t *rp;
- u8 name[64];
- u16 first_msg_id = ~0;
- int rv = -7; /* VNET_API_ERROR_INVALID_VALUE */
+ offset = vec_len (shmem_vec);
- regp = vl_api_client_index_to_registration (mp->client_index);
- if (!regp)
- return;
+ oldheap = vl_msg_push_heap ();
- if (am->msg_range_by_name == 0)
- goto out;
- strncpy ((char *) name, (char *) mp->name, ARRAY_LEN (name));
- name[ARRAY_LEN (name) - 1] = '\0';
- p = hash_get_mem (am->msg_range_by_name, name);
- if (p == 0)
- goto out;
+ vec_validate (shmem_vec, offset + buffer_bytes - 1);
- rp = vec_elt_at_index (am->msg_ranges, p[0]);
- first_msg_id = rp->first_msg_id;
- rv = 0;
+ clib_memcpy (shmem_vec + offset, buffer, buffer_bytes);
-out:
- rmp = vl_msg_api_alloc (sizeof (*rmp));
- rmp->_vl_msg_id = ntohs (VL_API_GET_FIRST_MSG_ID_REPLY);
- rmp->context = mp->context;
- rmp->retval = ntohl (rv);
- rmp->first_msg_id = ntohs (first_msg_id);
- vl_api_send_msg (regp, (u8 *) rmp);
+ vl_msg_pop_heap (oldheap);
+
+ *shmem_vecp = shmem_vec;
}
-void
-vl_api_api_versions_t_handler (vl_api_api_versions_t * mp)
+static void
+vl_api_cli_t_handler (vl_api_cli_t *mp)
{
- api_main_t *am = vlibapi_get_main ();
- vl_api_api_versions_reply_t *rmp;
+ vl_api_cli_reply_t *rp;
vl_api_registration_t *reg;
- u32 nmsg = vec_len (am->api_version_list);
- int msg_size = sizeof (*rmp) + sizeof (rmp->api_versions[0]) * nmsg;
- int i;
+ vlib_main_t *vm = vlib_get_main ();
+ unformat_input_t input;
+ u8 *shmem_vec = 0;
+ void *oldheap;
reg = vl_api_client_index_to_registration (mp->client_index);
if (!reg)
return;
+ ;
- rmp = vl_msg_api_alloc (msg_size);
- clib_memset (rmp, 0, msg_size);
- rmp->_vl_msg_id = ntohs (VL_API_API_VERSIONS_REPLY);
-
- /* fill in the message */
- rmp->context = mp->context;
- rmp->count = htonl (nmsg);
+ rp = vl_msg_api_alloc (sizeof (*rp));
+ rp->_vl_msg_id = ntohs (VL_API_CLI_REPLY + REPLY_MSG_ID_BASE);
+ rp->context = mp->context;
- for (i = 0; i < nmsg; ++i)
- {
- api_version_t *vl = &am->api_version_list[i];
- rmp->api_versions[i].major = htonl (vl->major);
- rmp->api_versions[i].minor = htonl (vl->minor);
- rmp->api_versions[i].patch = htonl (vl->patch);
- strncpy ((char *) rmp->api_versions[i].name, vl->name,
- ARRAY_LEN (rmp->api_versions[i].name));
- rmp->api_versions[i].name[ARRAY_LEN (rmp->api_versions[i].name) - 1] =
- '\0';
- }
+ unformat_init_vector (&input, (u8 *) (uword) mp->cmd_in_shmem);
- vl_api_send_msg (reg, (u8 *) rmp);
-}
+ vlib_cli_input (vm, &input, shmem_cli_output, (uword) &shmem_vec);
-#define foreach_vlib_api_msg \
-_(GET_FIRST_MSG_ID, get_first_msg_id) \
-_(API_VERSIONS, api_versions)
+ oldheap = vl_msg_push_heap ();
+ vec_add1 (shmem_vec, 0);
+ vl_msg_pop_heap (oldheap);
-/*
- * vl_api_init
- */
-static int
-vlib_api_init (void)
-{
- vl_msg_api_msg_config_t cfg;
- vl_msg_api_msg_config_t *c = &cfg;
-
- clib_memset (c, 0, sizeof (*c));
-
-#define _(N,n) do { \
- c->id = VL_API_##N; \
- c->name = #n; \
- c->handler = vl_api_##n##_t_handler; \
- c->cleanup = vl_noop_handler; \
- c->endian = vl_api_##n##_t_endian; \
- c->print = vl_api_##n##_t_print; \
- c->size = sizeof(vl_api_##n##_t); \
- c->traced = 1; /* trace, so these msgs print */ \
- c->replay = 0; /* don't replay client create/delete msgs */ \
- c->message_bounce = 0; /* don't bounce this message */ \
- vl_msg_api_config(c);} while (0);
-
- foreach_vlib_api_msg;
-#undef _
+ rp->reply_in_shmem = (uword) shmem_vec;
- return 0;
+ vl_api_send_msg (reg, (u8 *) rp);
}
-u64 vector_rate_histogram[SLEEP_N_BUCKETS];
-
-/*
- * Callback to send ourselves a plugin numbering-space trace msg
- */
static void
-send_one_plugin_msg_ids_msg (u8 * name, u16 first_msg_id, u16 last_msg_id)
+inband_cli_output (uword arg, u8 *buffer, uword buffer_bytes)
{
- vl_api_trace_plugin_msg_ids_t *mp;
- api_main_t *am = vlibapi_get_main ();
- vl_shmem_hdr_t *shmem_hdr = am->shmem_hdr;
- svm_queue_t *q;
+ u8 **mem_vecp = (u8 **) arg;
+ u8 *mem_vec = *mem_vecp;
+ u32 offset = vec_len (mem_vec);
- mp = vl_msg_api_alloc_as_if_client (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
-
- mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_TRACE_PLUGIN_MSG_IDS);
- strncpy ((char *) mp->plugin_name, (char *) name,
- sizeof (mp->plugin_name) - 1);
- mp->first_msg_id = clib_host_to_net_u16 (first_msg_id);
- mp->last_msg_id = clib_host_to_net_u16 (last_msg_id);
-
- q = shmem_hdr->vl_input_queue;
-
- vl_msg_api_send_shmem (q, (u8 *) & mp);
+ vec_validate (mem_vec, offset + buffer_bytes - 1);
+ clib_memcpy (mem_vec + offset, buffer, buffer_bytes);
+ *mem_vecp = mem_vec;
}
-void
-vl_api_save_msg_table (void)
+static void
+vl_api_cli_inband_t_handler (vl_api_cli_inband_t *mp)
{
- u8 *serialized_message_table;
- api_main_t *am = vlibapi_get_main ();
- u8 *chroot_file;
- int fd, rv;
-
- /*
- * Snapshoot the api message table.
- */
- if (strstr ((char *) am->save_msg_table_filename, "..")
- || index ((char *) am->save_msg_table_filename, '/'))
- {
- clib_warning ("illegal save-message-table filename '%s'",
- am->save_msg_table_filename);
- return;
- }
-
- chroot_file = format (0, "/tmp/%s%c", am->save_msg_table_filename, 0);
-
- fd = creat ((char *) chroot_file, 0644);
+ vl_api_cli_inband_reply_t *rmp;
+ int rv = 0;
+ vlib_main_t *vm = vlib_get_main ();
+ unformat_input_t input;
+ u8 *out_vec = 0;
+ u8 *cmd_vec = 0;
- if (fd < 0)
+ if (vl_msg_api_get_msg_length (mp) <
+ vl_api_string_len (&mp->cmd) + sizeof (*mp))
{
- clib_unix_warning ("creat");
- return;
+ rv = -1;
+ goto error;
}
- serialized_message_table = vl_api_serialize_message_table (am, 0);
+ cmd_vec = vl_api_from_api_to_new_vec (mp, &mp->cmd);
- rv = write (fd, serialized_message_table,
- vec_len (serialized_message_table));
+ unformat_init_string (&input, (char *) cmd_vec,
+ vl_api_string_len (&mp->cmd));
+ rv = vlib_cli_input (vm, &input, inband_cli_output, (uword) &out_vec);
+ unformat_free (&input);
- if (rv != vec_len (serialized_message_table))
- clib_unix_warning ("write");
-
- rv = close (fd);
- if (rv < 0)
- clib_unix_warning ("close");
-
- vec_free (chroot_file);
- vec_free (serialized_message_table);
+error:
+ REPLY_MACRO3 (VL_API_CLI_INBAND_REPLY, vec_len (out_vec),
+ ({ vl_api_vec_to_api_string (out_vec, &rmp->reply); }));
+ vec_free (out_vec);
+ vec_free (cmd_vec);
}
-clib_error_t *vat_builtin_main_init (vlib_main_t * vm) __attribute__ ((weak));
-clib_error_t *
-vat_builtin_main_init (vlib_main_t * vm)
-{
- return 0;
-}
-
-static uword
-vl_api_clnt_process (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * f)
+static void
+vl_api_get_node_index_t_handler (vl_api_get_node_index_t *mp)
{
- vlib_global_main_t *vgm = vlib_get_global_main ();
- int private_segment_rotor = 0, i, rv;
- vl_socket_args_for_process_t *a;
- vl_shmem_hdr_t *shm;
- svm_queue_t *q;
- clib_error_t *e;
- api_main_t *am = vlibapi_get_main ();
- f64 dead_client_scan_time;
- f64 sleep_time, start_time;
- f64 vector_rate;
- clib_error_t *error;
- uword event_type;
- uword *event_data = 0;
- f64 now;
-
- if ((error = vl_sock_api_init (vm)))
- {
- clib_error_report (error);
- clib_warning ("socksvr_api_init failed, quitting...");
- return 0;
- }
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_get_node_index_reply_t *rmp;
+ vlib_node_t *n;
+ int rv = 0;
+ u32 node_index = ~0;
- if ((rv = vlib_api_init ()) < 0)
- {
- clib_warning ("vlib_api_init returned %d, quitting...", rv);
- return 0;
- }
+ n = vlib_get_node_by_name (vm, mp->node_name);
- shm = am->shmem_hdr;
- q = shm->vl_input_queue;
+ if (n == 0)
+ rv = VNET_API_ERROR_NO_SUCH_NODE;
+ else
+ node_index = n->index;
- e = vlib_call_init_exit_functions (vm, &vgm->api_init_function_registrations,
- 1 /* call_once */, 1 /* is_global */);
- if (e)
- clib_error_report (e);
+ REPLY_MACRO2 (VL_API_GET_NODE_INDEX_REPLY,
+ ({ rmp->node_index = htonl (node_index); }));
+}
- e = vat_builtin_main_init (vm);
- if (e)
- clib_error_report (e);
+static void
+vl_api_add_node_next_t_handler (vl_api_add_node_next_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_add_node_next_reply_t *rmp;
+ vlib_node_t *n, *next;
+ int rv = 0;
+ u32 next_index = ~0;
- sleep_time = 10.0;
- dead_client_scan_time = vlib_time_now (vm) + 10.0;
+ n = vlib_get_node_by_name (vm, mp->node_name);
- /*
- * Send plugin message range messages for each plugin we loaded
- */
- for (i = 0; i < vec_len (am->msg_ranges); i++)
+ if (n == 0)
{
- vl_api_msg_range_t *rp = am->msg_ranges + i;
- send_one_plugin_msg_ids_msg (rp->name, rp->first_msg_id,
- rp->last_msg_id);
+ rv = VNET_API_ERROR_NO_SUCH_NODE;
+ goto out;
}
- /*
- * Save the api message table snapshot, if configured
- */
- if (am->save_msg_table_filename)
- vl_api_save_msg_table ();
+ next = vlib_get_node_by_name (vm, mp->next_name);
- /* $$$ pay attention to frame size, control CPU usage */
- while (1)
- {
- /*
- * There's a reason for checking the queue before
- * sleeping. If the vlib application crashes, it's entirely
- * possible for a client to enqueue a connect request
- * during the process restart interval.
- *
- * Unless some force of physics causes the new incarnation
- * of the application to process the request, the client will
- * sit and wait for Godot...
- */
- vector_rate = (f64) vlib_last_vectors_per_main_loop (vm);
- start_time = vlib_time_now (vm);
- while (1)
- {
- if (vl_mem_api_handle_rpc (vm, node)
- || vl_mem_api_handle_msg_main (vm, node))
- {
- vm->api_queue_nonempty = 0;
- VL_MEM_API_LOG_Q_LEN ("q-underflow: len %d", 0);
- sleep_time = 20.0;
- break;
- }
-
- /* Allow no more than 10us without a pause */
- if (vlib_time_now (vm) > start_time + 10e-6)
- {
- int index = SLEEP_400_US;
- if (vector_rate > 40.0)
- sleep_time = 400e-6;
- else if (vector_rate > 20.0)
- {
- index = SLEEP_200_US;
- sleep_time = 200e-6;
- }
- else if (vector_rate >= 1.0)
- {
- index = SLEEP_100_US;
- sleep_time = 100e-6;
- }
- else
- {
- index = SLEEP_10_US;
- sleep_time = 10e-6;
- }
- vector_rate_histogram[index] += 1;
- break;
- }
- }
-
- /*
- * see if we have any private api shared-memory segments
- * If so, push required context variables, and process
- * a message.
- */
- if (PREDICT_FALSE (vec_len (am->vlib_private_rps)))
- {
- if (private_segment_rotor >= vec_len (am->vlib_private_rps))
- private_segment_rotor = 0;
- vl_mem_api_handle_msg_private (vm, node, private_segment_rotor++);
- }
-
- vlib_process_wait_for_event_or_clock (vm, sleep_time);
- vec_reset_length (event_data);
- event_type = vlib_process_get_events (vm, &event_data);
- now = vlib_time_now (vm);
-
- switch (event_type)
- {
- case QUEUE_SIGNAL_EVENT:
- vm->queue_signal_pending = 0;
- VL_MEM_API_LOG_Q_LEN ("q-awake: len %d", q->cursize);
-
- break;
- case SOCKET_READ_EVENT:
- for (i = 0; i < vec_len (event_data); i++)
- {
- vl_api_registration_t *regp;
-
- a = pool_elt_at_index (socket_main.process_args, event_data[i]);
- regp = vl_socket_get_registration (a->reg_index);
- if (regp)
- {
- vl_socket_process_api_msg (regp, (i8 *) a->data);
- a = pool_elt_at_index (socket_main.process_args,
- event_data[i]);
- }
- vec_free (a->data);
- pool_put (socket_main.process_args, a);
- }
- break;
-
- /* Timeout... */
- case -1:
- break;
-
- default:
- clib_warning ("unknown event type %d", event_type);
- break;
- }
-
- if (now > dead_client_scan_time)
- {
- vl_mem_api_dead_client_scan (am, shm, now);
- dead_client_scan_time = vlib_time_now (vm) + 10.0;
- }
- }
+ if (next == 0)
+ rv = VNET_API_ERROR_NO_SUCH_NODE2;
+ else
+ next_index = vlib_node_add_next (vm, n->index, next->index);
- return 0;
-}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (vl_api_clnt_node) =
-{
- .function = vl_api_clnt_process,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "api-rx-from-ring",
- .state = VLIB_NODE_STATE_DISABLED,
- .process_log2_n_stack_bytes = 18,
-};
-/* *INDENT-ON* */
-
-void
-vl_mem_api_enable_disable (vlib_main_t * vm, int enable)
-{
- vlib_node_set_state (vm, vl_api_clnt_node.index,
- (enable
- ? VLIB_NODE_STATE_POLLING
- : VLIB_NODE_STATE_DISABLED));
+out:
+ REPLY_MACRO2 (VL_API_ADD_NODE_NEXT_REPLY,
+ ({ rmp->next_index = htonl (next_index); }));
}
-static uword
-api_rx_from_node (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
+static void
+get_thread_data (vl_api_thread_data_t *td, int index)
{
- uword n_packets = frame->n_vectors;
- uword n_left_from;
- u32 *from;
- static u8 *long_msg;
-
- vec_validate (long_msg, 4095);
- n_left_from = frame->n_vectors;
- from = vlib_frame_vector_args (frame);
-
- while (n_left_from > 0)
- {
- u32 bi0;
- vlib_buffer_t *b0;
- void *msg;
- uword msg_len;
-
- bi0 = from[0];
- b0 = vlib_get_buffer (vm, bi0);
- from += 1;
- n_left_from -= 1;
-
- msg = b0->data + b0->current_data;
- msg_len = b0->current_length;
- if (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
- {
- ASSERT (long_msg != 0);
- _vec_len (long_msg) = 0;
- vec_add (long_msg, msg, msg_len);
- while (b0->flags & VLIB_BUFFER_NEXT_PRESENT)
- {
- b0 = vlib_get_buffer (vm, b0->next_buffer);
- msg = b0->data + b0->current_data;
- msg_len = b0->current_length;
- vec_add (long_msg, msg, msg_len);
- }
- msg = long_msg;
- }
- vl_msg_api_handler_no_trace_no_free (msg);
- }
-
- /* Free what we've been given. */
- vlib_buffer_free (vm, vlib_frame_vector_args (frame), n_packets);
-
- return n_packets;
+ vlib_worker_thread_t *w = vlib_worker_threads + index;
+ td->id = htonl (index);
+ if (w->name)
+ strncpy ((char *) td->name, (char *) w->name, ARRAY_LEN (td->name) - 1);
+ if (w->registration)
+ strncpy ((char *) td->type, (char *) w->registration->name,
+ ARRAY_LEN (td->type) - 1);
+ td->pid = htonl (w->lwp);
+ td->cpu_id = htonl (w->cpu_id);
+ td->core = htonl (w->core_id);
+ td->cpu_socket = htonl (w->numa_id);
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (api_rx_from_node_node,static) = {
- .function = api_rx_from_node,
- .type = VLIB_NODE_TYPE_INTERNAL,
- .vector_size = 4,
- .name = "api-rx-from-node",
-};
-/* *INDENT-ON* */
-
static void
-vl_api_rpc_call_t_handler (vl_api_rpc_call_t * mp)
+vl_api_show_threads_t_handler (vl_api_show_threads_t *mp)
{
- vl_api_rpc_call_reply_t *rmp;
- int (*fp) (void *);
- i32 rv = 0;
- vlib_main_t *vm = vlib_get_main ();
-
- if (mp->function == 0)
- {
- rv = -1;
- clib_warning ("rpc NULL function pointer");
- }
+ int count = 0;
- else
- {
- if (mp->need_barrier_sync)
- vlib_worker_thread_barrier_sync (vm);
+#if !defined(__powerpc64__)
+ vl_api_registration_t *reg;
+ vl_api_show_threads_reply_t *rmp;
+ vl_api_thread_data_t *td;
+ int i, msg_size = 0;
+ count = vec_len (vlib_worker_threads);
+ if (!count)
+ return;
- fp = uword_to_pointer (mp->function, int (*)(void *));
- rv = fp (mp->data);
+ msg_size = sizeof (*rmp) + sizeof (rmp->thread_data[0]) * count;
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
- if (mp->need_barrier_sync)
- vlib_worker_thread_barrier_release (vm);
- }
+ rmp = vl_msg_api_alloc (msg_size);
+ clib_memset (rmp, 0, msg_size);
+ rmp->_vl_msg_id = htons (VL_API_SHOW_THREADS_REPLY + REPLY_MSG_ID_BASE);
+ rmp->context = mp->context;
+ rmp->count = htonl (count);
+ td = rmp->thread_data;
- if (mp->send_reply)
+ for (i = 0; i < count; i++)
{
- svm_queue_t *q = vl_api_client_index_to_input_queue (mp->client_index);
- if (q)
- {
- rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp));
- rmp->_vl_msg_id = ntohs (VL_API_RPC_CALL_REPLY);
- rmp->context = mp->context;
- rmp->retval = rv;
- vl_msg_api_send_shmem (q, (u8 *) & rmp);
- }
+ get_thread_data (&td[i], i);
}
- if (mp->multicast)
- {
- clib_warning ("multicast not yet implemented...");
- }
-}
-static void
-vl_api_rpc_call_reply_t_handler (vl_api_rpc_call_reply_t * mp)
-{
- clib_warning ("unimplemented");
-}
-
-void
-vl_api_send_pending_rpc_requests (vlib_main_t * vm)
-{
- vlib_main_t *vm_global = vlib_get_first_main ();
-
- ASSERT (vm != vm_global);
+ vl_api_send_msg (reg, (u8 *) rmp);
+#else
- clib_spinlock_lock_if_init (&vm_global->pending_rpc_lock);
- vec_append (vm_global->pending_rpc_requests, vm->pending_rpc_requests);
- vec_reset_length (vm->pending_rpc_requests);
- clib_spinlock_unlock_if_init (&vm_global->pending_rpc_lock);
+ /* unimplemented support */
+ rv = -9;
+ clib_warning ("power pc does not support show threads api");
+ REPLY_MACRO2 (VL_API_SHOW_THREADS_REPLY, ({ rmp->count = htonl (count); }));
+#endif
}
-always_inline void
-vl_api_rpc_call_main_thread_inline (void *fp, u8 * data, u32 data_length,
- u8 force_rpc)
+static void
+vl_api_get_node_graph_t_handler (vl_api_get_node_graph_t *mp)
{
- vl_api_rpc_call_t *mp;
- vlib_main_t *vm_global = vlib_get_first_main ();
+ int rv = 0;
+ u8 *vector = 0;
vlib_main_t *vm = vlib_get_main ();
+ void *oldheap;
+ vl_api_get_node_graph_reply_t *rmp;
+ static vlib_node_t ***node_dups;
+ static vlib_main_t **stat_vms;
- /* Main thread and not a forced RPC: call the function directly */
- if ((force_rpc == 0) && (vlib_get_thread_index () == 0))
- {
- void (*call_fp) (void *);
-
- vlib_worker_thread_barrier_sync (vm);
+ oldheap = vl_msg_push_heap ();
- call_fp = fp;
- call_fp (data);
+ /*
+ * Keep the number of memcpy ops to a minimum (e.g. 1).
+ */
+ vec_validate (vector, 16384);
+ vec_reset_length (vector);
- vlib_worker_thread_barrier_release (vm);
- return;
- }
+ vlib_node_get_nodes (vm, 0 /* main threads */, 0 /* include stats */,
+ 1 /* barrier sync */, &node_dups, &stat_vms);
+ vector = vlib_node_serialize (vm, node_dups, vector, 1 /* include nexts */,
+ 1 /* include stats */);
- /* Otherwise, actually do an RPC */
- mp = vl_msg_api_alloc_as_if_client (sizeof (*mp) + data_length);
-
- clib_memset (mp, 0, sizeof (*mp));
- clib_memcpy_fast (mp->data, data, data_length);
- mp->_vl_msg_id = ntohs (VL_API_RPC_CALL);
- mp->function = pointer_to_uword (fp);
- mp->need_barrier_sync = 1;
-
- /* Add to the pending vector. Thread 0 requires locking. */
- if (vm == vm_global)
- clib_spinlock_lock_if_init (&vm_global->pending_rpc_lock);
- vec_add1 (vm->pending_rpc_requests, (uword) mp);
- if (vm == vm_global)
- clib_spinlock_unlock_if_init (&vm_global->pending_rpc_lock);
-}
+ vl_msg_pop_heap (oldheap);
-/*
- * Check if called from worker threads.
- * If so, make rpc call of fp through shmem.
- * Otherwise, call fp directly
- */
-void
-vl_api_rpc_call_main_thread (void *fp, u8 * data, u32 data_length)
-{
- vl_api_rpc_call_main_thread_inline (fp, data, data_length, /*force_rpc */
- 0);
-}
-
-/*
- * Always make rpc call of fp through shmem, useful for calling from threads
- * not setup as worker threads, such as DPDK callback thread
- */
-void
-vl_api_force_rpc_call_main_thread (void *fp, u8 * data, u32 data_length)
-{
- vl_api_rpc_call_main_thread_inline (fp, data, data_length, /*force_rpc */
- 1);
+ REPLY_MACRO2 (VL_API_GET_NODE_GRAPH_REPLY,
+ ({ rmp->reply_in_shmem = (uword) vector; }));
}
static void
-vl_api_trace_plugin_msg_ids_t_handler (vl_api_trace_plugin_msg_ids_t * mp)
+vl_api_get_next_index_t_handler (vl_api_get_next_index_t *mp)
{
- api_main_t *am = vlibapi_get_main ();
- vl_api_msg_range_t *rp;
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_get_next_index_reply_t *rmp;
+ vlib_node_t *node, *next_node;
+ int rv = 0;
+ u32 next_node_index = ~0, next_index = ~0;
uword *p;
- /* Noop (except for tracing) during normal operation */
- if (am->replay_in_progress == 0)
- return;
+ node = vlib_get_node_by_name (vm, mp->node_name);
- p = hash_get_mem (am->msg_range_by_name, mp->plugin_name);
- if (p == 0)
+ if (node == 0)
{
- clib_warning ("WARNING: traced plugin '%s' not in current image",
- mp->plugin_name);
- return;
+ rv = VNET_API_ERROR_NO_SUCH_NODE;
+ goto out;
}
- rp = vec_elt_at_index (am->msg_ranges, p[0]);
- if (rp->first_msg_id != clib_net_to_host_u16 (mp->first_msg_id))
+ next_node = vlib_get_node_by_name (vm, mp->next_name);
+
+ if (next_node == 0)
{
- clib_warning ("WARNING: traced plugin '%s' first message id %d not %d",
- mp->plugin_name, clib_net_to_host_u16 (mp->first_msg_id),
- rp->first_msg_id);
+ rv = VNET_API_ERROR_NO_SUCH_NODE2;
+ goto out;
}
+ else
+ next_node_index = next_node->index;
- if (rp->last_msg_id != clib_net_to_host_u16 (mp->last_msg_id))
+ p = hash_get (node->next_slot_by_node, next_node_index);
+
+ if (p == 0)
{
- clib_warning ("WARNING: traced plugin '%s' last message id %d not %d",
- mp->plugin_name, clib_net_to_host_u16 (mp->last_msg_id),
- rp->last_msg_id);
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto out;
}
-}
+ else
+ next_index = p[0];
-#define foreach_rpc_api_msg \
-_(RPC_CALL,rpc_call) \
-_(RPC_CALL_REPLY,rpc_call_reply)
+out:
+ REPLY_MACRO2 (VL_API_GET_NEXT_INDEX_REPLY,
+ ({ rmp->next_index = htonl (next_index); }));
+}
-#define foreach_plugin_trace_msg \
-_(TRACE_PLUGIN_MSG_IDS,trace_plugin_msg_ids)
+static void
+vl_api_get_f64_endian_value_t_handler (vl_api_get_f64_endian_value_t *mp)
+{
+ int rv = 0;
+ f64 one = 1.0;
+ vl_api_get_f64_endian_value_reply_t *rmp;
+ if (1.0 != clib_net_to_host_f64 (mp->f64_one))
+ rv = VNET_API_ERROR_API_ENDIAN_FAILED;
+
+ REPLY_MACRO2 (VL_API_GET_F64_ENDIAN_VALUE_REPLY,
+ ({ rmp->f64_one_result = clib_host_to_net_f64 (one); }));
+}
-/*
- * Set the rpc callback at our earliest possible convenience.
- * This avoids ordering issues between thread_init() -> start_workers and
- * an init function which we could define here. If we ever intend to use
- * vlib all by itself, we can't create a link-time dependency on
- * an init function here and a typical "call foo_init first"
- * guitar lick.
- */
+static void
+vl_api_get_f64_increment_by_one_t_handler (
+ vl_api_get_f64_increment_by_one_t *mp)
+{
+ int rv = 0;
+ vl_api_get_f64_increment_by_one_reply_t *rmp;
-extern void *rpc_call_main_thread_cb_fn;
+ REPLY_MACRO2 (VL_API_GET_F64_INCREMENT_BY_ONE_REPLY, ({
+ rmp->f64_value = clib_host_to_net_f64 (
+ clib_net_to_host_f64 (mp->f64_value) + 1.0);
+ }));
+}
+#include <vlibmemory/vlib.api.c>
static clib_error_t *
-rpc_api_hookup (vlib_main_t * vm)
+vlib_apis_hookup (vlib_main_t *vm)
{
api_main_t *am = vlibapi_get_main ();
-#define _(N,n) \
- vl_msg_api_set_handlers(VL_API_##N, #n, \
- vl_api_##n##_t_handler, \
- vl_noop_handler, \
- vl_noop_handler, \
- vl_api_##n##_t_print, \
- sizeof(vl_api_##n##_t), 0 /* do not trace */);
- foreach_rpc_api_msg;
-#undef _
-
-#define _(N,n) \
- vl_msg_api_set_handlers(VL_API_##N, #n, \
- vl_api_##n##_t_handler, \
- vl_noop_handler, \
- vl_noop_handler, \
- vl_api_##n##_t_print, \
- sizeof(vl_api_##n##_t), 1 /* do trace */);
- foreach_plugin_trace_msg;
-#undef _
-
- /* No reason to halt the parade to create a trace record... */
- am->is_mp_safe[VL_API_TRACE_PLUGIN_MSG_IDS] = 1;
- rpc_call_main_thread_cb_fn = vl_api_rpc_call_main_thread;
+
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ msg_id_base = setup_message_id_table ();
+
+ /* Mark messages as mp safe */
+ vl_api_set_msg_thread_safe (am, msg_id_base + VL_API_GET_NODE_GRAPH, 1);
+ vl_api_set_msg_thread_safe (am, msg_id_base + VL_API_SHOW_THREADS, 1);
+
return 0;
}
-VLIB_API_INIT_FUNCTION (rpc_api_hookup);
+VLIB_API_INIT_FUNCTION (vlib_apis_hookup);
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vlibmemory/vlib_api_cli.c b/src/vlibmemory/vlib_api_cli.c
index 0057c85adcf..4492f5af980 100644
--- a/src/vlibmemory/vlib_api_cli.c
+++ b/src/vlibmemory/vlib_api_cli.c
@@ -61,14 +61,12 @@ vl_api_show_histogram_command (vlib_main_t * vm,
/*?
* Display the binary api sleep-time histogram
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_show_api_histogram_command, static) =
{
.path = "show api histogram",
.short_help = "show api histogram",
.function = vl_api_show_histogram_command,
};
-/* *INDENT-ON* */
static clib_error_t *
vl_api_clear_histogram_command (vlib_main_t * vm,
@@ -85,14 +83,12 @@ vl_api_clear_histogram_command (vlib_main_t * vm,
/*?
* Clear the binary api sleep-time histogram
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_clear_api_histogram_command, static) =
{
.path = "clear api histogram",
.short_help = "clear api histogram",
.function = vl_api_clear_histogram_command,
};
-/* *INDENT-ON* */
static clib_error_t *
vl_api_client_command (vlib_main_t * vm,
@@ -110,7 +106,6 @@ vl_api_client_command (vlib_main_t * vm,
vlib_cli_output (vm, "%20s %8s %14s %18s %s",
"Name", "PID", "Queue Length", "Queue VA", "Health");
- /* *INDENT-OFF* */
pool_foreach (regpp, am->vl_clients)
{
regp = *regpp;
@@ -135,7 +130,6 @@ vl_api_client_command (vlib_main_t * vm,
vec_add1 (confused_indices, regpp - am->vl_clients);
}
}
- /* *INDENT-ON* */
/* This should "never happen," but if it does, fix it... */
if (PREDICT_FALSE (vec_len (confused_indices) > 0))
@@ -191,37 +185,31 @@ vl_api_status_command (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_show_api_command, static) =
{
.path = "show api",
.short_help = "Show API information",
};
-/* *INDENT-ON* */
/*?
* Display current api client connections
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_show_api_clients_command, static) =
{
.path = "show api clients",
.short_help = "Client information",
.function = vl_api_client_command,
};
-/* *INDENT-ON* */
/*?
* Display the current api message tracing status
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_show_api_status_command, static) =
{
.path = "show api trace-status",
.short_help = "Display API trace status",
.function = vl_api_status_command,
};
-/* *INDENT-ON* */
static clib_error_t *
vl_api_message_table_command (vlib_main_t * vm,
@@ -242,20 +230,19 @@ vl_api_message_table_command (vlib_main_t * vm,
vlib_cli_output (vm, "%-4s %-40s %6s %7s", "ID", "Name", "Bounce",
"MP-safe");
- for (i = 1; i < vec_len (am->msg_names); i++)
+ for (i = 1; i < vec_len (am->msg_data); i++)
{
+ vl_api_msg_data_t *m = vl_api_get_msg_data (am, i);
if (verbose == 0)
{
vlib_cli_output (vm, "%-4d %s", i,
- am->msg_names[i] ? am->msg_names[i] :
- " [no handler]");
+ m->name ? m->name : " [no handler]");
}
else
{
vlib_cli_output (vm, "%-4d %-40s %6d %7d", i,
- am->msg_names[i] ? am->msg_names[i] :
- " [no handler]", am->message_bounce[i],
- am->is_mp_safe[i]);
+ m->name ? m->name : " [no handler]", m->bounce,
+ m->is_mp_safe);
}
}
@@ -265,14 +252,12 @@ vl_api_message_table_command (vlib_main_t * vm,
/*?
* Display the current api message decode tables
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_show_api_message_table_command, static) =
{
.path = "show api message-table",
.short_help = "Message Table",
.function = vl_api_message_table_command,
};
-/* *INDENT-ON* */
static int
range_compare (vl_api_msg_range_t * a0, vl_api_msg_range_t * a1)
@@ -332,18 +317,17 @@ vl_api_show_plugin_command (vlib_main_t * vm,
/*?
* Display the plugin binary API message range table
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cli_show_api_plugin_command, static) =
{
.path = "show api plugin",
.short_help = "show api plugin",
.function = vl_api_show_plugin_command,
};
-/* *INDENT-ON* */
typedef enum
{
DUMP,
+ DUMP_JSON,
REPLAY,
INITIALIZERS,
} vl_api_replay_t;
@@ -391,6 +375,7 @@ vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
{
vl_api_trace_file_header_t *hp;
int i, fd;
+ u16 *msgid_vec = 0;
struct stat statb;
size_t file_size;
u8 *msg;
@@ -433,7 +418,7 @@ vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
}
close (fd);
- CLIB_MEM_UNPOISON (hp, file_size);
+ clib_mem_unpoison (hp, file_size);
nitems = ntohl (hp->nitems);
@@ -453,13 +438,31 @@ vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
vlib_cli_output (vm,
"Note: wrapped/incomplete trace, results may vary\n");
+ size_t file_size_left = file_size;
+
+#define assert_size(size_left, s) \
+ do \
+ { \
+ if ((s) >= size_left) \
+ { \
+ vlib_cli_output (vm, "corrupted file"); \
+ munmap (hp, file_size); \
+ vec_free (msgid_vec); \
+ return; \
+ } \
+ size_left -= s; \
+ } \
+ while (0);
+
+ assert_size (file_size_left, sizeof (hp[0]));
msg = (u8 *) (hp + 1);
- u16 *msgid_vec = 0;
serialize_main_t _sm, *sm = &_sm;
u32 msgtbl_size = ntohl (hp->msgtbl_size);
u8 *name_and_crc;
+ assert_size (file_size_left, msgtbl_size);
+
unserialize_open_data (sm, msg, msgtbl_size);
unserialize_integer (sm, &nitems_msgtbl, sizeof (u32));
@@ -467,7 +470,11 @@ vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
{
u16 msg_index = unserialize_likely_small_unsigned_integer (sm);
unserialize_cstring (sm, (char **) &name_and_crc);
- u16 msg_index2 = vl_msg_api_get_msg_index (name_and_crc);
+ u32 msg_index2 = vl_msg_api_get_msg_index (name_and_crc);
+ ASSERT (~0 == msg_index2 || msg_index2 <= 65535);
+ if (~0 == msg_index2)
+ vlib_cli_output (vm, "warning: can't find msg index for id %d\n",
+ msg_index);
vec_validate (msgid_vec, msg_index);
msgid_vec[msg_index] = msg_index2;
}
@@ -476,23 +483,20 @@ vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
for (i = 0; i < first_index; i++)
{
- trace_cfg_t *cfgp;
int size;
u16 msg_id;
+ assert_size (file_size_left, sizeof (u32));
size = clib_host_to_net_u32 (*(u32 *) msg);
msg += sizeof (u32);
+ assert_size (file_size_left, clib_max (size, sizeof (u16)));
msg_id = ntohs (*((u16 *) msg));
- if (msg_id < vec_len (msgid_vec))
- msg_id = msgid_vec[msg_id];
- cfgp = am->api_trace_cfg + msg_id;
- if (!cfgp)
- {
- vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id);
- munmap (hp, file_size);
- return;
- }
+ if (msg_id >= vec_len (msgid_vec) ||
+ msgid_vec[msg_id] >= vec_len (am->msg_data))
+ vlib_cli_output (vm, "warning: unknown msg id %d for msg number %d\n",
+ msg_id, i);
+
msg += size;
}
@@ -501,32 +505,33 @@ vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
for (; i <= last_index; i++)
{
- trace_cfg_t *cfgp;
+ vl_api_msg_data_t *m;
u16 msg_id;
int size;
if (which == DUMP)
vlib_cli_output (vm, "---------- trace %d -----------\n", i);
+ assert_size (file_size_left, sizeof (u32));
size = clib_host_to_net_u32 (*(u32 *) msg);
msg += sizeof (u32);
+ assert_size (file_size_left, clib_max (size, sizeof (u16)));
msg_id = ntohs (*((u16 *) msg));
- if (msg_id < vec_len (msgid_vec))
- {
- msg_id = msgid_vec[msg_id];
- }
- cfgp = am->api_trace_cfg + msg_id;
- if (!cfgp)
+ if (msg_id >= vec_len (msgid_vec) ||
+ msgid_vec[msg_id] >= vec_len (am->msg_data))
{
- vlib_cli_output (vm, "Ugh: msg id %d no trace config\n", msg_id);
- munmap (hp, file_size);
- vec_free (tmpbuf);
- am->replay_in_progress = 0;
- return;
+ vlib_cli_output (
+ vm, "warning: unknown msg id %d for msg number %d, skipping\n",
+ msg_id, i);
+ msg += size;
+ continue;
}
+ msg_id = msgid_vec[msg_id];
+ m = vl_api_get_msg_data (am, msg_id);
+
/* Copy the buffer (from the read-only mmap'ed file) */
vec_validate (tmpbuf, size - 1 + sizeof (uword));
clib_memcpy (tmpbuf + sizeof (uword), msg, size);
@@ -536,11 +541,10 @@ vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
* Endian swap if needed. All msg data is supposed to be in
* network byte order.
*/
- if (((which == DUMP) && clib_arch_is_little_endian))
+ if (((which == DUMP || which == DUMP_JSON) &&
+ clib_arch_is_little_endian))
{
- void (*endian_fp) (void *);
- if (msg_id >= vec_len (am->msg_endian_handlers)
- || (am->msg_endian_handlers[msg_id] == 0))
+ if (m && m->endian_handler == 0)
{
vlib_cli_output (vm, "Ugh: msg id %d no endian swap\n", msg_id);
munmap (hp, file_size);
@@ -548,8 +552,10 @@ vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
am->replay_in_progress = 0;
return;
}
- endian_fp = am->msg_endian_handlers[msg_id];
- (*endian_fp) (tmpbuf + sizeof (uword));
+ if (m)
+ {
+ m->endian_handler (tmpbuf + sizeof (uword));
+ }
}
/* msg_id always in network byte order */
@@ -561,43 +567,31 @@ vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
switch (which)
{
- case DUMP:
- if (msg_id < vec_len (am->msg_print_handlers) &&
- am->msg_print_handlers[msg_id])
- {
- u8 *(*print_fp) (void *, void *);
+ case DUMP_JSON:
+ vlib_cli_output (vm, "%U", format_vl_api_msg_json, am, msg_id,
+ tmpbuf + sizeof (uword));
+ break;
- print_fp = (void *) am->msg_print_handlers[msg_id];
- (*print_fp) (tmpbuf + sizeof (uword), vm);
- }
- else
- {
- vlib_cli_output (vm, "Skipping msg id %d: no print fcn\n",
- msg_id);
- break;
- }
+ case DUMP:
+ vlib_cli_output (vm, "%U", format_vl_api_msg_text, am, msg_id,
+ tmpbuf + sizeof (uword));
break;
case INITIALIZERS:
- if (msg_id < vec_len (am->msg_print_handlers) &&
- am->msg_print_handlers[msg_id])
+ if (m)
{
u8 *s;
int j;
- u8 *(*print_fp) (void *, void *);
- print_fp = (void *) am->msg_print_handlers[msg_id];
+ vlib_cli_output (vm, "/*%U*/", format_vl_api_msg_text, am,
+ msg_id, tmpbuf + sizeof (uword));
- vlib_cli_output (vm, "/*");
-
- (*print_fp) (tmpbuf + sizeof (uword), vm);
vlib_cli_output (vm, "*/\n");
- s = format (0, "static u8 * vl_api_%s_%d[%d] = {",
- am->msg_names[msg_id], i,
- am->api_trace_cfg[msg_id].size);
+ s = format (0, "static u8 * vl_api_%s_%d[%d] = {", m->name, i,
+ m->trace_size);
- for (j = 0; j < am->api_trace_cfg[msg_id].size; j++)
+ for (j = 0; j < m->trace_size; j++)
{
if ((j & 7) == 0)
s = format (s, "\n ");
@@ -610,22 +604,17 @@ vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
break;
case REPLAY:
- if (msg_id < vec_len (am->msg_print_handlers) &&
- am->msg_print_handlers[msg_id] && cfgp->replay_enable)
+ if (m && m->handler && m->replay_allowed)
{
- void (*handler) (void *, vlib_main_t *);
-
- handler = (void *) am->msg_handlers[msg_id];
-
- if (!am->is_mp_safe[msg_id])
+ if (!m->is_mp_safe)
vl_msg_api_barrier_sync ();
- (*handler) (tmpbuf + sizeof (uword), vm);
- if (!am->is_mp_safe[msg_id])
+ m->handler (tmpbuf + sizeof (uword));
+ if (!m->is_mp_safe)
vl_msg_api_barrier_release ();
}
else
{
- if (cfgp->replay_enable)
+ if (m && m->replay_allowed)
vlib_cli_output (vm, "Skipping msg id %d: no handler\n",
msg_id);
break;
@@ -633,15 +622,309 @@ vl_msg_api_process_file (vlib_main_t * vm, u8 * filename,
break;
}
- _vec_len (tmpbuf) = 0;
+ vec_set_len (tmpbuf, 0);
msg += size;
}
munmap (hp, file_size);
vec_free (tmpbuf);
+ vec_free (msgid_vec);
am->replay_in_progress = 0;
}
+static int
+file_exists (u8 *fname)
+{
+ FILE *fp = 0;
+ fp = fopen ((char *) fname, "r");
+ if (fp)
+ {
+ fclose (fp);
+ return 1;
+ }
+ return 0;
+}
+
+typedef struct
+{
+ vlib_main_t *vm;
+ u8 is_json;
+} vl_msg_print_args;
+
+static int
+vl_msg_print_trace (u8 *msg, void *ctx)
+{
+ vl_msg_print_args *a = ctx;
+ api_main_t *am = vlibapi_get_main ();
+ u16 msg_id = ntohs (*((u16 *) msg));
+ vl_api_msg_data_t *m = vl_api_get_msg_data (am, msg_id);
+ u8 is_json = a->is_json;
+ u8 *tmpbuf = 0;
+
+ if (!m)
+ {
+ vlib_cli_output (a->vm, "Unknown msg id %d\n", msg_id);
+ return 0;
+ }
+
+ if (clib_arch_is_little_endian && (m->endian_handler != NULL))
+ {
+ u32 msg_length = vec_len (msg);
+ vec_validate (tmpbuf, msg_length - 1);
+ clib_memcpy_fast (tmpbuf, msg, msg_length);
+ msg = tmpbuf;
+
+ m->endian_handler (tmpbuf);
+ }
+
+ vlib_cli_output (a->vm, "%U\n",
+ is_json ? format_vl_api_msg_json : format_vl_api_msg_text,
+ am, msg_id, msg);
+
+ vec_free (tmpbuf);
+ return 0;
+}
+
+static int
+vl_msg_api_dump_trace (vlib_main_t *vm, vl_api_trace_which_t which, u8 is_json)
+{
+ api_main_t *am = vlibapi_get_main ();
+ vl_api_trace_t *tp;
+
+ switch (which)
+ {
+ case VL_API_TRACE_TX:
+ tp = am->tx_trace;
+ break;
+ case VL_API_TRACE_RX:
+ tp = am->rx_trace;
+ break;
+ default:
+ return -1;
+ }
+
+ if (tp == 0 || tp->nitems == 0 || vec_len (tp->traces) == 0)
+ return -1;
+
+ vl_msg_print_args args;
+ clib_memset (&args, 0, sizeof (args));
+ args.is_json = is_json;
+ args.vm = vm;
+ vl_msg_traverse_trace (tp, vl_msg_print_trace, &args);
+
+ return 0;
+}
+
+static char *
+vl_msg_read_file (FILE *f)
+{
+ const size_t bufsize = 1024;
+ char *buf[bufsize], *v = 0;
+ size_t n;
+
+ while ((n = fread (buf, 1, bufsize, f)))
+ vec_add (v, buf, n);
+
+ /* most callers expect a NULL-terminated C-string */
+ if (v)
+ vec_add1 (v, 0);
+
+ return v;
+}
+
+static u16
+vl_msg_find_id_by_name_and_crc (vlib_main_t *vm, api_main_t *am, char *name)
+{
+ uword *p;
+ p = hash_get_mem (am->msg_index_by_name_and_crc, name);
+ if (!p)
+ return (u16) ~0;
+
+ return p[0];
+}
+
+static u16
+vl_msg_find_id_by_name (vlib_main_t *vm, api_main_t *am, char *name)
+{
+ uword *p;
+
+ if (!am->msg_id_by_name)
+ {
+ vlib_cli_output (vm, "message id table not yet initialized!\n");
+ return (u16) ~0;
+ }
+
+ p = hash_get_mem (am->msg_id_by_name, name);
+ if (!p)
+ return (u16) ~0;
+
+ return p[0];
+}
+
+static int
+vl_msg_exec_json_command (vlib_main_t *vm, cJSON *o)
+{
+ api_main_t *am = vlibapi_get_main ();
+ u16 msg_id;
+ int len = 0, rv = -1;
+ vl_api_msg_data_t *m;
+ u8 *msg = 0;
+
+ cJSON *msg_id_obj = cJSON_GetObjectItem (o, "_msgname");
+ if (!msg_id_obj)
+ {
+ vlib_cli_output (vm, "Missing '_msgname' element!\n");
+ return rv;
+ }
+ char *name = cJSON_GetStringValue (msg_id_obj);
+
+ cJSON *crc_obj = cJSON_GetObjectItem (o, "_crc");
+ if (!crc_obj)
+ {
+ vlib_cli_output (vm, "Missing '_crc' element!\n");
+ return rv;
+ }
+ char *crc = cJSON_GetStringValue (crc_obj);
+ u8 proc_warning = 0;
+
+ u8 *name_crc = format (0, "%s_%s%c", name, crc, 0);
+ msg_id = vl_msg_find_id_by_name_and_crc (vm, am, (char *) name_crc);
+ m = vl_api_get_msg_data (am, msg_id);
+ if (msg_id == (u16) ~0)
+ {
+ msg_id = vl_msg_find_id_by_name (vm, am, name);
+ if (msg_id == (u16) ~0)
+ {
+ vlib_cli_output (vm, "unknown msg id %d!\n", msg_id);
+ vec_free (name_crc);
+ return rv;
+ }
+ proc_warning = 1;
+ }
+ vec_free (name_crc);
+
+ if (m->replay_allowed)
+ {
+ if (proc_warning)
+ vlib_cli_output (vm, "warning: msg %d has different signature\n");
+
+ if (!m->fromjson_handler)
+ {
+ vlib_cli_output (vm, "missing fromjson convert function! id %d\n",
+ msg_id);
+ return rv;
+ }
+
+ msg = (u8 *) m->fromjson_handler (o, &len);
+ if (!msg)
+ {
+ vlib_cli_output (vm, "failed to convert JSON (msg id %d)!\n",
+ msg_id);
+ return rv;
+ }
+
+ if (clib_arch_is_little_endian)
+ m->endian_handler (msg);
+
+ if (!m->handler)
+ {
+ vlib_cli_output (vm, "no handler for msg id %d!\n", msg_id);
+ goto end;
+ }
+
+ if (m->handler)
+ {
+ if (!m->is_mp_safe)
+ vl_msg_api_barrier_sync ();
+ m->handler (msg);
+ if (!m->is_mp_safe)
+ vl_msg_api_barrier_release ();
+ }
+ }
+
+ rv = 0;
+end:
+ if (msg)
+ cJSON_free (msg);
+ return rv;
+}
+
+static void
+vl_msg_replay_json (vlib_main_t *vm, u8 *filename)
+{
+ api_main_t *am = vlibapi_get_main ();
+ cJSON *o = 0;
+ int rv = 0;
+ FILE *f = fopen ((char *) filename, "r");
+
+ if (!f)
+ {
+ vlib_cli_output (vm, "failed to open %s!\n", filename);
+ return;
+ }
+
+ char *buf = vl_msg_read_file (f);
+ fclose (f);
+
+ o = cJSON_Parse (buf);
+ vec_free (buf);
+ if (!o)
+ {
+ vlib_cli_output (vm, "%s: Failed parsing JSON input: %s\n", filename,
+ cJSON_GetErrorPtr ());
+ return;
+ }
+
+ if (cJSON_IsArray (o))
+ {
+ am->replay_in_progress = 1;
+ size_t size = cJSON_GetArraySize (o);
+ for (int i = 0; i < size; i++)
+ {
+ rv = vl_msg_exec_json_command (vm, cJSON_GetArrayItem (o, i));
+ if (rv < 0)
+ {
+ am->replay_in_progress = 0;
+ break;
+ }
+ }
+ }
+ else
+ {
+ rv = vl_msg_exec_json_command (vm, o);
+ }
+
+ if (rv < 0)
+ vlib_cli_output (vm, "error during replaying API trace");
+
+ cJSON_Delete (o);
+}
+
+static void
+vl_msg_dump_file_json (vlib_main_t *vm, u8 *filename)
+{
+ FILE *f = fopen ((char *) filename, "r");
+ char *buf;
+
+ if (!f)
+ {
+ vlib_cli_output (vm, "failed to open %s!\n", filename);
+ return;
+ }
+
+ buf = vl_msg_read_file (f);
+ fclose (f);
+
+ if (!buf)
+ {
+ vlib_cli_output (vm, "no content in %s!\n", filename);
+ return;
+ }
+
+ vlib_cli_output (vm, buf);
+ vec_free (buf);
+}
+
/** api_trace_command_fn - control the binary API trace / replay feature
Note: this command MUST be marked thread-safe. Replay with
@@ -688,6 +971,43 @@ api_trace_command_fn (vlib_main_t * vm,
vl_msg_api_trace_onoff (am, which, 0);
vlib_worker_thread_barrier_release (vm);
}
+ else if (unformat (line_input, "save-json %s", &filename))
+ {
+ if (strstr ((char *) filename, "..") ||
+ index ((char *) filename, '/'))
+ {
+ vlib_cli_output (vm, "illegal characters in filename '%s'",
+ filename);
+ goto out;
+ }
+
+ chroot_filename = format (0, "/tmp/%s%c", filename, 0);
+
+ vec_free (filename);
+
+ if (file_exists (chroot_filename))
+ {
+ vlib_cli_output (vm, "file exists: %s\n", chroot_filename);
+ goto out;
+ }
+
+ fp = fopen ((char *) chroot_filename, "w");
+ if (fp == NULL)
+ {
+ vlib_cli_output (vm, "Couldn't create %s\n", chroot_filename);
+ goto out;
+ }
+ vlib_worker_thread_barrier_sync (vm);
+ rv = vl_msg_api_trace_save (am, which, fp, 1);
+ if (rv == -1)
+ vlib_cli_output (vm, "API Trace data not present\n");
+ else if (rv < 0)
+ vlib_cli_output (vm, "failed to save api trace\n");
+ else
+ vlib_cli_output (vm, "API trace saved to %s\n", chroot_filename);
+ vlib_worker_thread_barrier_release (vm);
+ fclose (fp);
+ }
else if (unformat (line_input, "save %s", &filename))
{
if (strstr ((char *) filename, "..")
@@ -702,6 +1022,12 @@ api_trace_command_fn (vlib_main_t * vm,
vec_free (filename);
+ if (file_exists (chroot_filename))
+ {
+ vlib_cli_output (vm, "file exists: %s\n", chroot_filename);
+ goto out;
+ }
+
fp = fopen ((char *) chroot_filename, "w");
if (fp == NULL)
{
@@ -709,7 +1035,7 @@ api_trace_command_fn (vlib_main_t * vm,
goto out;
}
vlib_worker_thread_barrier_sync (vm);
- rv = vl_msg_api_trace_save (am, which, fp);
+ rv = vl_msg_api_trace_save (am, which, fp, 0);
vlib_worker_thread_barrier_release (vm);
fclose (fp);
if (rv == -1)
@@ -732,10 +1058,30 @@ api_trace_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "API trace saved to %s\n", chroot_filename);
goto out;
}
- else if (unformat (line_input, "dump %s", &filename))
+ else if (unformat (line_input, "tojson %s", &filename))
+ {
+ vl_msg_api_process_file (vm, filename, first, last, DUMP_JSON);
+ }
+ else if (unformat (line_input, "dump-file-json %s", &filename))
+ {
+ vl_msg_dump_file_json (vm, filename);
+ }
+ else if (unformat (line_input, "dump-file %s", &filename))
{
vl_msg_api_process_file (vm, filename, first, last, DUMP);
}
+ else if (unformat (line_input, "dump-json"))
+ {
+ vl_msg_api_dump_trace (vm, which, 1);
+ }
+ else if (unformat (line_input, "dump"))
+ {
+ vl_msg_api_dump_trace (vm, which, 0);
+ }
+ else if (unformat (line_input, "replay-json %s", &filename))
+ {
+ vl_msg_replay_json (vm, filename);
+ }
else if (unformat (line_input, "replay %s", &filename))
{
vl_msg_api_process_file (vm, filename, first, last, REPLAY);
@@ -787,93 +1133,15 @@ out:
* Display, replay, or save a binary API trace
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (api_trace_command, static) = {
.path = "api trace",
- .short_help = "api trace [on|off][first <n>][last <n>][status][free]"
- "[post-mortem-on][dump|save|replay <file>]",
+ .short_help = "api trace [tx][on|off][first <n>][last <n>][status][free]"
+ "[post-mortem-on][dump|dump-file|dump-json|save|tojson|save-"
+ "json|replay <file>|replay-json <file>][nitems <n>]"
+ "[initializers <file>]",
.function = api_trace_command_fn,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
-
-static clib_error_t *
-vl_api_trace_command (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cli_cmd)
-{
- u32 nitems = 1024;
- vl_api_trace_which_t which = VL_API_TRACE_RX;
- api_main_t *am = vlibapi_get_main ();
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "rx nitems %u", &nitems) || unformat (input, "rx"))
- goto configure;
- else if (unformat (input, "tx nitems %u", &nitems)
- || unformat (input, "tx"))
- {
- which = VL_API_TRACE_RX;
- goto configure;
- }
- else if (unformat (input, "on rx"))
- {
- vl_msg_api_trace_onoff (am, VL_API_TRACE_RX, 1);
- }
- else if (unformat (input, "on tx"))
- {
- vl_msg_api_trace_onoff (am, VL_API_TRACE_TX, 1);
- }
- else if (unformat (input, "on"))
- {
- vl_msg_api_trace_onoff (am, VL_API_TRACE_RX, 1);
- }
- else if (unformat (input, "off"))
- {
- vl_msg_api_trace_onoff (am, VL_API_TRACE_RX, 0);
- vl_msg_api_trace_onoff (am, VL_API_TRACE_TX, 0);
- }
- else if (unformat (input, "free"))
- {
- vl_msg_api_trace_onoff (am, VL_API_TRACE_RX, 0);
- vl_msg_api_trace_onoff (am, VL_API_TRACE_TX, 0);
- vl_msg_api_trace_free (am, VL_API_TRACE_RX);
- vl_msg_api_trace_free (am, VL_API_TRACE_TX);
- }
- else if (unformat (input, "debug on"))
- {
- am->msg_print_flag = 1;
- }
- else if (unformat (input, "debug off"))
- {
- am->msg_print_flag = 0;
- }
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
- return 0;
-
-configure:
- if (vl_msg_api_trace_configure (am, which, nitems))
- {
- vlib_cli_output (vm, "warning: trace configure error (%d, %d)",
- which, nitems);
- }
-
- return 0;
-}
-
-/*?
- * Control the binary API trace mechanism
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (trace, static) =
-{
- .path = "set api-trace",
- .short_help = "API trace [on][on tx][on rx][off][free][debug on][debug off]",
- .function = vl_api_trace_command,
-};
-/* *INDENT-ON* */
static clib_error_t *
api_trace_config_fn (vlib_main_t * vm, unformat_input_t * input)
@@ -944,7 +1212,7 @@ extract_name (u8 * s)
rv = vec_dup (s);
while (vec_len (rv) && rv[vec_len (rv)] != '_')
- _vec_len (rv)--;
+ vec_dec_len (rv, 1);
rv[vec_len (rv)] = 0;
@@ -1203,7 +1471,6 @@ cleanup:
* decode table with the current image, to establish API differences.
*
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (dump_api_table_file, static) =
{
.path = "show api dump",
@@ -1211,7 +1478,6 @@ VLIB_CLI_COMMAND (dump_api_table_file, static) =
.function = dump_api_table_file_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vlibmemory/vlibapi_test.c b/src/vlibmemory/vlibapi_test.c
new file mode 100644
index 00000000000..bed4879030e
--- /dev/null
+++ b/src/vlibmemory/vlibapi_test.c
@@ -0,0 +1,470 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vppinfra/error.h>
+
+#include <vpp/api/types.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/ip/ip_types_api.h>
+
+typedef struct
+{
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} vlib_test_main_t;
+vlib_test_main_t vlib_test_main;
+
+#define __plugin_msg_base vlib_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/* Declare message IDs */
+#include <vlibmemory/vlib.api_enum.h>
+#include <vlibmemory/vlib.api_types.h>
+
+static void
+vl_api_cli_reply_t_handler (vl_api_cli_reply_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ vam->retval = retval;
+ vam->shmem_result = uword_to_pointer (mp->reply_in_shmem, u8 *);
+ vam->result_ready = 1;
+}
+
+static void
+vl_api_cli_inband_reply_t_handler (vl_api_cli_inband_reply_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ vec_reset_length (vam->cmd_reply);
+
+ vam->retval = retval;
+ if (retval == 0)
+ vam->cmd_reply = vl_api_from_api_to_new_vec (mp, &mp->reply);
+ vam->result_ready = 1;
+}
+
+static void
+vl_api_get_node_index_reply_t_handler (vl_api_get_node_index_reply_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ if (retval == 0)
+ errmsg ("node index %d", ntohl (mp->node_index));
+ vam->result_ready = 1;
+ }
+}
+
+static void
+vl_api_get_next_index_reply_t_handler (vl_api_get_next_index_reply_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ if (retval == 0)
+ errmsg ("next node index %d", ntohl (mp->next_index));
+ vam->result_ready = 1;
+ }
+}
+
+static void
+vl_api_add_node_next_reply_t_handler (vl_api_add_node_next_reply_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ if (retval == 0)
+ errmsg ("next index %d", ntohl (mp->next_index));
+ vam->result_ready = 1;
+ }
+}
+
+static void
+vl_api_get_f64_endian_value_reply_t_handler (
+ vl_api_get_f64_endian_value_reply_t *mp)
+{
+ // not yet implemented
+}
+
+static void
+vl_api_get_f64_increment_by_one_reply_t_handler (
+ vl_api_get_f64_increment_by_one_reply_t *mp)
+{
+ // not yet implemented
+}
+
+static int
+api_get_f64_endian_value (vat_main_t *vam)
+{
+ // not yet implemented
+ return -1;
+}
+
+static int
+api_get_f64_increment_by_one (vat_main_t *vam)
+{
+ // not yet implemented
+ return -1;
+}
+
+/*
+ * Pass CLI buffers directly in the CLI_INBAND API message,
+ * instead of an additional shared memory area.
+ */
+static int
+exec_inband (vat_main_t *vam)
+{
+ vl_api_cli_inband_t *mp;
+ unformat_input_t *i = vam->input;
+ int ret;
+
+ if (vec_len (i->buffer) == 0)
+ return -1;
+
+ if (vam->exec_mode == 0 && unformat (i, "mode"))
+ {
+ vam->exec_mode = 1;
+ return 0;
+ }
+ if (vam->exec_mode == 1 && (unformat (i, "exit") || unformat (i, "quit")))
+ {
+ vam->exec_mode = 0;
+ return 0;
+ }
+
+ /*
+ * In order for the CLI command to work, it
+ * must be a vector ending in \n, not a C-string ending
+ * in \n\0.
+ */
+ M2 (CLI_INBAND, mp, vec_len (vam->input->buffer));
+ vl_api_vec_to_api_string (vam->input->buffer, &mp->cmd);
+
+ S (mp);
+ W (ret);
+ /* json responses may or may not include a useful reply... */
+ if (vec_len (vam->cmd_reply))
+ print (vam->ofp, "%v", (char *) (vam->cmd_reply));
+ return ret;
+}
+static int
+api_cli_inband (vat_main_t *vam)
+{
+ return exec_inband (vam);
+}
+
+int
+exec (vat_main_t *vam)
+{
+ return exec_inband (vam);
+}
+
+static int
+api_cli (vat_main_t *vam)
+{
+ return exec_inband (vam);
+}
+
+static int
+api_get_node_index (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_get_node_index_t *mp;
+ u8 *name = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "node %s", &name))
+ ;
+ else
+ break;
+ }
+ if (name == 0)
+ {
+ errmsg ("node name required");
+ return -99;
+ }
+ if (vec_len (name) >= ARRAY_LEN (mp->node_name))
+ {
+ errmsg ("node name too long, max %d", ARRAY_LEN (mp->node_name));
+ return -99;
+ }
+
+ M (GET_NODE_INDEX, mp);
+ clib_memcpy (mp->node_name, name, vec_len (name));
+ vec_free (name);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_get_next_index (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_get_next_index_t *mp;
+ u8 *node_name = 0, *next_node_name = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "node-name %s", &node_name))
+ ;
+ else if (unformat (i, "next-node-name %s", &next_node_name))
+ break;
+ }
+
+ if (node_name == 0)
+ {
+ errmsg ("node name required");
+ return -99;
+ }
+ if (vec_len (node_name) >= ARRAY_LEN (mp->node_name))
+ {
+ errmsg ("node name too long, max %d", ARRAY_LEN (mp->node_name));
+ return -99;
+ }
+
+ if (next_node_name == 0)
+ {
+ errmsg ("next node name required");
+ return -99;
+ }
+ if (vec_len (next_node_name) >= ARRAY_LEN (mp->next_name))
+ {
+ errmsg ("next node name too long, max %d", ARRAY_LEN (mp->next_name));
+ return -99;
+ }
+
+ M (GET_NEXT_INDEX, mp);
+ clib_memcpy (mp->node_name, node_name, vec_len (node_name));
+ clib_memcpy (mp->next_name, next_node_name, vec_len (next_node_name));
+ vec_free (node_name);
+ vec_free (next_node_name);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_add_node_next (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_add_node_next_t *mp;
+ u8 *name = 0;
+ u8 *next = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "node %s", &name))
+ ;
+ else if (unformat (i, "next %s", &next))
+ ;
+ else
+ break;
+ }
+ if (name == 0)
+ {
+ errmsg ("node name required");
+ return -99;
+ }
+ if (vec_len (name) >= ARRAY_LEN (mp->node_name))
+ {
+ errmsg ("node name too long, max %d", ARRAY_LEN (mp->node_name));
+ return -99;
+ }
+ if (next == 0)
+ {
+ errmsg ("next node required");
+ return -99;
+ }
+ if (vec_len (next) >= ARRAY_LEN (mp->next_name))
+ {
+ errmsg ("next name too long, max %d", ARRAY_LEN (mp->next_name));
+ return -99;
+ }
+
+ M (ADD_NODE_NEXT, mp);
+ clib_memcpy (mp->node_name, name, vec_len (name));
+ clib_memcpy (mp->next_name, next, vec_len (next));
+ vec_free (name);
+ vec_free (next);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_show_threads_reply_t_handler (vl_api_show_threads_reply_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ int i, count = 0;
+
+ if (retval >= 0)
+ count = ntohl (mp->count);
+
+ for (i = 0; i < count; i++)
+ print (vam->ofp, "\n%-2d %-11s %-11s %-5d %-6d %-4d %-6d",
+ ntohl (mp->thread_data[i].id), mp->thread_data[i].name,
+ mp->thread_data[i].type, ntohl (mp->thread_data[i].pid),
+ ntohl (mp->thread_data[i].cpu_id), ntohl (mp->thread_data[i].core),
+ ntohl (mp->thread_data[i].cpu_socket));
+
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static int
+api_show_threads (vat_main_t *vam)
+{
+ vl_api_show_threads_t *mp;
+ int ret;
+
+ print (vam->ofp, "\n%-2s %-11s %-11s %-5s %-6s %-4s %-6s", "ID", "Name",
+ "Type", "LWP", "cpu_id", "Core", "Socket");
+
+ M (SHOW_THREADS, mp);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_get_node_graph_reply_t_handler (vl_api_get_node_graph_reply_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ u8 *pvt_copy, *reply;
+ void *oldheap;
+ vlib_node_t *node;
+ int i;
+
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+
+ /* "Should never happen..." */
+ if (retval != 0)
+ return;
+
+ reply = uword_to_pointer (mp->reply_in_shmem, u8 *);
+ pvt_copy = vec_dup (reply);
+
+ /* Toss the shared-memory original... */
+ oldheap = vl_msg_push_heap ();
+
+ vec_free (reply);
+
+ vl_msg_pop_heap (oldheap);
+
+ if (vam->graph_nodes)
+ {
+ hash_free (vam->graph_node_index_by_name);
+
+ for (i = 0; i < vec_len (vam->graph_nodes[0]); i++)
+ {
+ node = vam->graph_nodes[0][i];
+ vec_free (node->name);
+ vec_free (node->next_nodes);
+ vec_free (node);
+ }
+ vec_free (vam->graph_nodes[0]);
+ vec_free (vam->graph_nodes);
+ }
+
+ vam->graph_node_index_by_name = hash_create_string (0, sizeof (uword));
+ vam->graph_nodes = vlib_node_unserialize (pvt_copy);
+ vec_free (pvt_copy);
+
+ for (i = 0; i < vec_len (vam->graph_nodes[0]); i++)
+ {
+ node = vam->graph_nodes[0][i];
+ hash_set_mem (vam->graph_node_index_by_name, node->name, i);
+ }
+}
+
+static int
+api_get_node_graph (vat_main_t *vam)
+{
+ vl_api_get_node_graph_t *mp;
+ int ret;
+
+ M (GET_NODE_GRAPH, mp);
+
+ /* send it... */
+ S (mp);
+ /* Wait for the reply */
+ W (ret);
+ return ret;
+}
+
+#define VL_API_LOCAL_SETUP_MESSAGE_ID_TABLE local_setup_message_id_table
+static void
+local_setup_message_id_table (vat_main_t *vam)
+{
+ /* Add exec as an alias for cli_inband */
+ hash_set_mem (vam->function_by_name, "exec", api_cli_inband);
+ hash_set_mem (vam->help_by_name, "exec",
+ "usage: exec <vpe-debug-CLI-command>");
+}
+
+#include <vlibmemory/vlib.api_test.c>
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt
index a800230c9d5..fb8d294009d 100644
--- a/src/vnet/CMakeLists.txt
+++ b/src/vnet/CMakeLists.txt
@@ -16,6 +16,8 @@ unset(VNET_HEADERS)
unset(VNET_API_FILES)
unset(VNET_MULTIARCH_SOURCES)
+option(VPP_IP_FIB_MTRIE_16 "IP FIB's MTRIE Stride is 16-8-8 (if not set it's 8-8-8-8)" ON)
+
##############################################################################
# Generic stuff
##############################################################################
@@ -24,6 +26,22 @@ list(APPEND VNET_SOURCES
config.c
devices/devices.c
devices/netlink.c
+ dev/api.c
+ dev/args.c
+ dev/cli.c
+ dev/config.c
+ dev/counters.c
+ dev/dev.c
+ dev/dev_api.c
+ dev/error.c
+ dev/format.c
+ dev/handlers.c
+ dev/pci.c
+ dev/port.c
+ dev/process.c
+ dev/queue.c
+ dev/runtime.c
+ error.c
flow/flow.c
flow/flow_cli.c
flow/flow_api.c
@@ -33,9 +51,12 @@ list(APPEND VNET_SOURCES
interface_cli.c
interface_format.c
interface_output.c
+ interface/caps.c
interface/rx_queue.c
interface/tx_queue.c
interface/runtime.c
+ interface/monitor.c
+ interface/stats.c
interface_stats.c
misc.c
)
@@ -48,13 +69,14 @@ list(APPEND VNET_MULTIARCH_SOURCES
list(APPEND VNET_HEADERS
api_errno.h
+ error.h
buffer.h
config.h
devices/devices.h
devices/netlink.h
+ dev/dev.h
flow/flow.h
global_funcs.h
- handoff.h
interface/rx_queue_funcs.h
interface/tx_queue_funcs.h
interface.h
@@ -71,10 +93,13 @@ list(APPEND VNET_HEADERS
util/refcount.h
format_fns.h
ip/ip_format_fns.h
+ ip/ip_sas.h
ethernet/ethernet_format_fns.h
+ ethernet/ethernet_types_api.h
)
list(APPEND VNET_API_FILES
+ dev/dev.api
interface.api
interface_types.api
ip/ip_types.api
@@ -289,30 +314,6 @@ list(APPEND VNET_HEADERS
)
##############################################################################
-# Layer 2 / vxlan
-##############################################################################
-list(APPEND VNET_SOURCES
- vxlan/vxlan.c
- vxlan/encap.c
- vxlan/decap.c
- vxlan/vxlan_api.c
-)
-
-list(APPEND VNET_MULTIARCH_SOURCES
- vxlan/encap.c
-)
-
-list(APPEND VNET_HEADERS
- vxlan/vxlan.h
- vxlan/vxlan_packet.h
- vxlan/vxlan_error.def
-)
-
-list(APPEND VNET_MULTIARCH_SOURCES vxlan/decap.c)
-
-list(APPEND VNET_API_FILES vxlan/vxlan.api)
-
-##############################################################################
# Layer 2 / Bonding
##############################################################################
list(APPEND VNET_SOURCES
@@ -411,6 +412,7 @@ list(APPEND VNET_SOURCES
ip/punt.c
ip/punt_node.c
ip/vtep.c
+ ip/ip_sas.c
)
list(APPEND VNET_MULTIARCH_SOURCES
@@ -435,13 +437,11 @@ list(APPEND VNET_HEADERS
ip/icmp4.h
ip/icmp6.h
ip/igmp_packet.h
- ip/ip4_error.h
ip/ip4.h
ip/ip4_mtrie.h
ip/ip4_inlines.h
ip/ip4_packet.h
ip/ip46_address.h
- ip/ip6_error.h
ip/ip6.h
ip/ip6_hop_by_hop.h
ip/ip6_hop_by_hop_packet.h
@@ -453,6 +453,7 @@ list(APPEND VNET_HEADERS
ip/ip_table.h
ip/ip_interface.h
ip/ip_packet.h
+ ip/ip_psh_cksum.h
ip/ip_source_and_port_range_check.h
ip/ip_types.h
ip/lookup.h
@@ -460,6 +461,8 @@ list(APPEND VNET_HEADERS
ip/protocols.def
ip/punt_error.def
ip/punt.h
+ ip/reass/ip4_sv_reass.h
+ ip/reass/ip6_sv_reass.h
)
list(APPEND VNET_API_FILES
@@ -660,6 +663,7 @@ list(APPEND VNET_SOURCES
udp/udp_encap.c
udp/udp_decap.c
udp/udp_api.c
+ udp/udp_output.c
)
list(APPEND VNET_MULTIARCH_SOURCES
@@ -670,6 +674,7 @@ list(APPEND VNET_MULTIARCH_SOURCES
list(APPEND VNET_HEADERS
udp/udp_error.def
udp/udp.h
+ udp/udp_encap.h
udp/udp_packet.h
udp/udp_inlines.h
udp/udp_local.h
@@ -680,27 +685,10 @@ list(APPEND VNET_API_FILES udp/udp.api)
##############################################################################
# Tunnel protocol: gre
##############################################################################
-list(APPEND VNET_SOURCES
- gre/gre.c
- gre/node.c
- gre/interface.c
- gre/pg.c
- gre/gre_api.c
-)
-
-list(APPEND VNET_MULTIARCH_SOURCES
- gre/node.c
- gre/gre.c
-)
-
list(APPEND VNET_HEADERS
- gre/gre.h
gre/packet.h
- gre/error.def
)
-list(APPEND VNET_API_FILES gre/gre.api)
-
##############################################################################
# Tunnel protocol: ipip
##############################################################################
@@ -739,6 +727,7 @@ list(APPEND VNET_API_FILES
list(APPEND VNET_HEADERS
tunnel/tunnel.h
tunnel/tunnel_dp.h
+ tunnel/tunnel_types_api.h
)
##############################################################################
@@ -765,39 +754,15 @@ list(APPEND VNET_MULTIARCH_SOURCES
list(APPEND VNET_HEADERS
mpls/mpls.h
+ mpls/mpls_lookup.h
mpls/mpls_types.h
mpls/mpls_tunnel.h
mpls/packet.h
- mpls/error.def
)
list(APPEND VNET_API_FILES mpls/mpls.api)
##############################################################################
-# Tunnel protocol: vxlan-gbp
-##############################################################################
-list(APPEND VNET_SOURCES
- vxlan-gbp/decap.c
- vxlan-gbp/encap.c
- vxlan-gbp/vxlan_gbp_api.c
- vxlan-gbp/vxlan_gbp.c
- vxlan-gbp/vxlan_gbp_packet.c
-)
-
-list (APPEND VNET_MULTIARCH_SOURCES
- vxlan-gbp/decap.c
- vxlan-gbp/encap.c
-)
-
-list(APPEND VNET_HEADERS
- vxlan-gbp/vxlan_gbp.h
- vxlan-gbp/vxlan_gbp_packet.h
- vxlan-gbp/vxlan_gbp_error.def
-)
-
-list(APPEND VNET_API_FILES vxlan-gbp/vxlan_gbp.api)
-
-##############################################################################
# Tunnel protocol: vxlan-gpe
##############################################################################
@@ -830,16 +795,21 @@ list(APPEND VNET_SOURCES
srv6/sr_policy_rewrite.c
srv6/sr_steering.c
srv6/sr_api.c
+ srv6/sr_pt.c
+ srv6/sr_pt_node.c
+ srv6/sr_pt_api.c
)
list(APPEND VNET_HEADERS
srv6/sr_packet.h
srv6/sr.h
+ srv6/sr_pt.h
)
list(APPEND VNET_API_FILES
srv6/sr.api
srv6/sr_types.api
+ srv6/sr_pt.api
)
##############################################################################
@@ -877,6 +847,21 @@ list(APPEND VNET_HEADERS
list(APPEND VNET_API_FILES ipfix-export/ipfix_export.api)
##############################################################################
+# HASH
+##############################################################################
+list(APPEND VNET_SOURCES
+ hash/hash.c
+ hash/cli.c
+ hash/crc32_5tuple.c
+ hash/handoff_eth.c
+ hash/hash_eth.c
+)
+
+list(APPEND VNET_HEADERS
+ hash/hash.h
+)
+
+##############################################################################
# GSO
##############################################################################
list(APPEND VNET_SOURCES
@@ -910,23 +895,6 @@ list(APPEND VNET_HEADERS
)
##############################################################################
-# lawful intercept
-##############################################################################
-
-list(APPEND VNET_SOURCES
- lawful-intercept/lawful_intercept.c
- lawful-intercept/node.c
-)
-
-list(APPEND VNET_MULTIARCH_SOURCES
- lawful-intercept/node.c
-)
-
-list(APPEND VNET_HEADERS
- lawful-intercept/lawful_intercept.h
-)
-
-##############################################################################
# SPAN (port mirroring)
##############################################################################
@@ -977,15 +945,11 @@ list(APPEND VNET_SOURCES
devices/virtio/format.c
devices/virtio/node.c
devices/virtio/pci.c
- devices/virtio/vhost_user.c
- devices/virtio/vhost_user_input.c
- devices/virtio/vhost_user_output.c
- devices/virtio/vhost_user_api.c
devices/virtio/virtio.c
devices/virtio/virtio_api.c
devices/virtio/virtio_pci_legacy.c
devices/virtio/virtio_pci_modern.c
- devices/virtio/virtio_process.c
+ devices/virtio/virtio_pre_input.c
devices/virtio/virtio_types_api.c
)
@@ -997,20 +961,15 @@ list(APPEND VNET_HEADERS
devices/virtio/virtio_pci_legacy.h
devices/virtio/virtio_pci_modern.h
devices/virtio/vhost_std.h
- devices/virtio/vhost_user.h
devices/virtio/virtio_types_api.h
)
list(APPEND VNET_MULTIARCH_SOURCES
- devices/virtio/vhost_user_input.c
- devices/virtio/vhost_user_output.c
devices/virtio/node.c
- devices/af_packet/node.c
devices/virtio/device.c
)
list(APPEND VNET_API_FILES
- devices/virtio/vhost_user.api
devices/virtio/virtio.api
devices/virtio/virtio_types.api
)
@@ -1019,6 +978,7 @@ list(APPEND VNET_API_FILES
# tap interface (with virtio backend)
##############################################################################
+if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
list(APPEND VNET_SOURCES
devices/tap/cli.c
devices/tap/tap.c
@@ -1032,6 +992,7 @@ list(APPEND VNET_HEADERS
list(APPEND VNET_API_FILES
devices/tap/tapv2.api
)
+endif()
##############################################################################
# tap interface (with virtio backend)
@@ -1059,6 +1020,7 @@ list(APPEND VNET_SOURCES
session/session_rules_table.c
session/session_lookup.c
session/session_node.c
+ session/session_input.c
session/transport.c
session/application.c
session/application_worker.c
@@ -1105,27 +1067,6 @@ list(APPEND VNET_HEADERS
tls/tls_test.h
)
-##############################################################################
-# Linux packet interface
-##############################################################################
-
-list(APPEND VNET_SOURCES
- devices/af_packet/af_packet.c
- devices/af_packet/device.c
- devices/af_packet/node.c
- devices/af_packet/cli.c
- devices/af_packet/af_packet_api.c
-)
-
-list(APPEND VNET_MULTIARCH_SOURCES
- devices/af_packet/device.c
-)
-
-list(APPEND VNET_HEADERS
- devices/af_packet/af_packet.h
-)
-
-list(APPEND VNET_API_FILES devices/af_packet/af_packet.api)
##############################################################################
# Driver feature graph arc support
@@ -1149,6 +1090,7 @@ list(APPEND VNET_API_FILES feature/feature.api)
# FIXME: unix/hgshm.c
+if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
list(APPEND VNET_SOURCES
unix/gdb_funcs.c
unix/tuntap.c
@@ -1157,6 +1099,7 @@ list(APPEND VNET_SOURCES
list(APPEND VNET_HEADERS
unix/tuntap.h
)
+endif()
##############################################################################
# FIB
@@ -1164,7 +1107,10 @@ list(APPEND VNET_HEADERS
list(APPEND VNET_SOURCES
fib/fib.c
+ fib/ip4_fib_hash.c
fib/ip4_fib.c
+ fib/ip4_fib_16.c
+ fib/ip4_fib_8.c
fib/ip6_fib.c
fib/mpls_fib.c
fib/fib_table.c
@@ -1200,7 +1146,11 @@ list(APPEND VNET_SOURCES
list(APPEND VNET_HEADERS
fib/fib.h
fib/fib_api.h
+ fib/fib_entry_track.h
fib/ip4_fib.h
+ fib/ip4_fib_8.h
+ fib/ip4_fib_16.h
+ fib/ip4_fib_hash.h
fib/ip6_fib.h
fib/fib_types.h
fib/fib_table.h
@@ -1208,6 +1158,8 @@ list(APPEND VNET_HEADERS
fib/fib_node_list.h
fib/fib_entry.h
fib/fib_entry_delegate.h
+ fib/fib_path.h
+ fib/fib_path_list.h
fib/fib_sas.h
fib/fib_source.h
)
@@ -1287,6 +1239,7 @@ list(APPEND VNET_MULTIARCH_SOURCES
list(APPEND VNET_HEADERS
dpo/load_balance.h
+ dpo/load_balance_map.h
dpo/drop_dpo.h
dpo/lookup_dpo.h
dpo/punt_dpo.h
@@ -1400,10 +1353,13 @@ list(APPEND VNET_MULTIARCH_SOURCES
)
list(APPEND VNET_HEADERS
- bier/bier_types.h
+ bier/bier_bit_string.h
bier/bier_entry.h
+ bier/bier_fwd.h
+ bier/bier_hdr_inlines.h
bier/bier_update.h
bier/bier_table.h
+ bier/bier_types.h
)
list(APPEND VNET_API_FILES bier/bier.api)
@@ -1462,6 +1418,7 @@ list (APPEND VNET_SOURCES
ip6-nd/ip6_nd.c
ip6-nd/ip6_nd_api.c
ip6-nd/ip6_nd_proxy.c
+ ip6-nd/ip6_nd_mirror_proxy.c
ip6-nd/ip6_ra.c
ip6-nd/rd_cp.c
ip6-nd/rd_cp_api.c
@@ -1498,6 +1455,17 @@ add_vpp_library (vatclient
DEPENDS api_headers
)
+add_vat_test_library(vnet
+ interface_test.c
+ ip/ip_test.c
+ arp/arp_test.c
+ ip6-nd/ip6_nd_test.c
+ srmpls/sr_mpls_test.c
+ session/session_test.c
+ l2/l2_test.c
+ ipsec/ipsec_test.c
+)
+
##############################################################################
# VAT2 plugins
##############################################################################
diff --git a/src/vnet/MTU.md b/src/vnet/MTU.md
deleted file mode 100644
index a0a8ba87490..00000000000
--- a/src/vnet/MTU.md
+++ /dev/null
@@ -1,72 +0,0 @@
-# MTU Introduction {#mtu_doc}
-Maximum Transmission Unit is a term used to describe the maximum sized "thingy" that can be sent out an interface. It can refer to the maximum frame size that a NIC can send. On Ethernet that would include the Ethernet header but typically not the IGF. It can refer to the maximum packet size, that is, on Ethernet an MTU of 1500, would allow an IPv4 packet of 1500 bytes, that would result in an Ethernet frame of 1518 bytes.
-
-# MTU in VPP
-VPP allows setting of the physical payload MTU. I.e. not including L2 overhead. Setting the hardware MTU will program the NIC.
-This MTU will be inherited by all software interfaces.
-
-VPP also allows setting of the payload MTU for software interfaces. Independently of the MTU set on the hardware. If the software payload MTU is set higher than the capability of the NIC, the packet will be dropped.
-
-In addition VPP supports setting the MTU of individual network layer protocols. IPv4, IPv6 or MPLS. For example an IPv4 MTU of 1500 (includes the IPv4 header) will fit in a hardware payload MTU of 1500.
-
-_Note we might consider changing the hardware payload MTU to hardware MTU_. That is, the MTU includes all L2 framing. Then the payload MTU can be calculated based on the interface's configuration. E.g. 802.1q tags etc.
-
-There are currently no checks or warnings if e.g. the user configures a per-protocol MTU larger than the underlying payload MTU. If that happens packets will be fragmented or dropped.
-
-## Data structures
-The hardware payload MTU is stored in the max_packet_bytes variable in the vnet_hw_interface_t structure.
-
-The software MTU (previously max_l3_packet_bytes) is in vnet_sw_interface_t->in mtu[VNET_N_MTU].
-
-# API
-
-## Set physical MTU
-
-This API message is used to set the physical MTU. It is currently limited to Ethernet interfaces. Note, this programs the NIC.
-
-```
-autoreply define hw_interface_set_mtu
-{
- u32 client_index;
- u32 context;
- u32 sw_if_index;
- u16 mtu;
-};
-```
-
-## Set the L2 payload MTU (not including the L2 header) and per-protocol MTUs
-
-This API message sets the L3 payload MTU. E.g. on Ethernet it is the maximum size of the Ethernet payload. If a value is left as 0, then the default is picked from VNET_MTU_L3.
-
-```
-autoreply define sw_interface_set_mtu
-{
- u32 client_index;
- u32 context;
- u32 sw_if_index;
- /* $$$$ Replace with enum */
- u32 mtu[4]; /* 0 - L3, 1 - IP4, 2 - IP6, 3 - MPLS */
-};
-
-```
-
-## Get interface MTU
-
-The various MTUs on an interface can be queried with the sw_interface_dump/sw_interface_details calls.
-
-```
-define sw_interface_details
-{
- /* MTU */
- u16 link_mtu;
-
- /* Per protocol MTUs */
- u32 mtu[4]; /* 0 - L3, 1 - IP4, 2 - IP6, 3 - MPLS */
-};
-```
-
-# CLI
-
-```
-set interface mtu [packet|ip4|ip6|mpls] <value> <interface>
-```
diff --git a/src/vnet/adj/adj.c b/src/vnet/adj/adj.c
index 8808294f7a6..201561fe485 100644
--- a/src/vnet/adj/adj.c
+++ b/src/vnet/adj/adj.c
@@ -34,6 +34,11 @@ vlib_combined_counter_main_t adjacency_counters = {
ip_adjacency_t *adj_pool;
/**
+ * The adjacency logger
+ */
+vlib_log_class_t adj_logger;
+
+/**
* @brief Global Config for enabling per-adjacency counters.
* By default these are disabled.
*/
@@ -64,14 +69,12 @@ ip_adjacency_t *
adj_alloc (fib_protocol_t proto)
{
ip_adjacency_t *adj;
- u8 need_barrier_sync = 0;
+ u8 need_barrier_sync = pool_get_will_expand (adj_pool);
vlib_main_t *vm;
vm = vlib_get_main();
ASSERT (vm->thread_index == 0);
- pool_get_aligned_will_expand (adj_pool, need_barrier_sync,
- CLIB_CACHE_LINE_BYTES);
/* If the adj_pool will expand, stop the parade. */
if (need_barrier_sync)
vlib_worker_thread_barrier_sync (vm);
@@ -309,12 +312,12 @@ adj_last_lock_gone (ip_adjacency_t *adj)
break;
}
- vlib_worker_thread_barrier_release(vm);
fib_node_deinit(&adj->ia_node);
ASSERT(0 == vec_len(adj->ia_delegates));
vec_free(adj->ia_delegates);
pool_put(adj_pool, adj);
+ vlib_worker_thread_barrier_release(vm);
}
u32
@@ -350,7 +353,6 @@ adj_lock (adj_index_t adj_index)
adj = adj_get(adj_index);
ASSERT(adj);
- ADJ_DBG(adj, "lock");
fib_node_lock(&adj->ia_node);
}
@@ -367,9 +369,6 @@ adj_unlock (adj_index_t adj_index)
adj = adj_get(adj_index);
ASSERT(adj);
- ADJ_DBG(adj, "unlock");
- ASSERT(adj);
-
fib_node_unlock(&adj->ia_node);
}
@@ -650,6 +649,8 @@ adj_module_init (vlib_main_t * vm)
vnet_feature_register(adj_feature_update, NULL);
+ adj_logger = vlib_log_register_class("adj", "adj");
+
return (NULL);
}
@@ -703,7 +704,6 @@ adj_show (vlib_main_t * vm,
}
else
{
- /* *INDENT-OFF* */
pool_foreach_index (ai, adj_pool)
{
if (~0 != sw_if_index &&
@@ -718,7 +718,6 @@ adj_show (vlib_main_t * vm,
FORMAT_IP_ADJACENCY_NONE);
}
}
- /* *INDENT-ON* */
}
}
return 0;
diff --git a/src/vnet/adj/adj.h b/src/vnet/adj/adj.h
index c1922c755ec..860193c04ad 100644
--- a/src/vnet/adj/adj.h
+++ b/src/vnet/adj/adj.h
@@ -165,14 +165,6 @@ typedef enum adj_attr_t_
ADJ_ATTR_SYNC_WALK_ACTIVE = 0,
/**
- * Packets TX through the midchain do not increment the interface
- * counters. This should be used when the adj is associated with an L2
- * interface and that L2 interface is in a bridge domain. In that case
- * the packet will have traversed the interface's TX node, and hence have
- * been counted, before it traverses ths midchain
- */
- ADJ_ATTR_MIDCHAIN_NO_COUNT,
- /**
* When stacking midchains on a fib-entry extract the choice from the
* load-balance returned based on an IP hash of the adj's rewrite
*/
@@ -195,7 +187,6 @@ typedef enum adj_attr_t_
#define ADJ_ATTR_NAMES { \
[ADJ_ATTR_SYNC_WALK_ACTIVE] = "walk-active", \
- [ADJ_ATTR_MIDCHAIN_NO_COUNT] = "midchain-no-count", \
[ADJ_ATTR_MIDCHAIN_IP_STACK] = "midchain-ip-stack", \
[ADJ_ATTR_MIDCHAIN_LOOPED] = "midchain-looped", \
[ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR] = "midchain-ip4o4-hdr-fixup", \
@@ -214,7 +205,6 @@ typedef enum adj_flags_t_
{
ADJ_FLAG_NONE = 0,
ADJ_FLAG_SYNC_WALK_ACTIVE = (1 << ADJ_ATTR_SYNC_WALK_ACTIVE),
- ADJ_FLAG_MIDCHAIN_NO_COUNT = (1 << ADJ_ATTR_MIDCHAIN_NO_COUNT),
ADJ_FLAG_MIDCHAIN_IP_STACK = (1 << ADJ_ATTR_MIDCHAIN_IP_STACK),
ADJ_FLAG_MIDCHAIN_LOOPED = (1 << ADJ_ATTR_MIDCHAIN_LOOPED),
ADJ_FLAG_MIDCHAIN_FIXUP_IP4O4_HDR = (1 << ADJ_ATTR_MIDCHAIN_FIXUP_IP4O4_HDR),
diff --git a/src/vnet/adj/adj_bfd.c b/src/vnet/adj/adj_bfd.c
index 2d787d41ab6..e54ba6d74ae 100644
--- a/src/vnet/adj/adj_bfd.c
+++ b/src/vnet/adj/adj_bfd.c
@@ -114,9 +114,7 @@ void
adj_bfd_notify (bfd_listen_event_e event,
const bfd_session_t *session)
{
- const bfd_udp_key_t *key;
adj_bfd_delegate_t *abd;
- fib_protocol_t fproto;
adj_delegate_t *aed;
adj_index_t ai;
@@ -129,19 +127,28 @@ adj_bfd_notify (bfd_listen_event_e event,
return;
}
- key = &session->udp.key;
-
- fproto = (ip46_address_is_ip4 (&key->peer_addr) ?
- FIB_PROTOCOL_IP4:
- FIB_PROTOCOL_IP6);
+ switch (session->transport)
+ {
+ case BFD_TRANSPORT_UDP4:
+ case BFD_TRANSPORT_UDP6:
+ /*
+ * pick up the same adjacency that the BFD session is using
+ * to send. The BFD session is holding a lock on this adj.
+ */
+ ai = session->udp.adj_index;
+ break;
+ default:
+ /*
+ * Don't know what adj this session uses
+ */
+ return;
+ }
- /*
- * find the adj that corresponds to the BFD session.
- */
- ai = adj_nbr_add_or_lock(fproto,
- fib_proto_to_link(fproto),
- &key->peer_addr,
- key->sw_if_index);
+ if (INDEX_INVALID == ai)
+ {
+ /* No associated Adjacency with the session */
+ return;
+ }
switch (event)
{
@@ -160,13 +167,6 @@ adj_bfd_notify (bfd_listen_event_e event,
else
{
/*
- * lock the adj. add the delegate.
- * Locking the adj prevents it being removed and thus maintains
- * the BFD derived states
- */
- adj_lock(ai);
-
- /*
* allocate and init a new delegate struct
*/
pool_get(abd_pool, abd);
@@ -213,14 +213,12 @@ adj_bfd_notify (bfd_listen_event_e event,
{
/*
* has an associated BFD tracking delegate
- * remove the BFD tracking delegate, update children, then
- * unlock the adj
+ * remove the BFD tracking delegate, update children
*/
adj_delegate_remove(ai, ADJ_DELEGATE_BFD);
pool_put(abd_pool, abd);
adj_bfd_update_walk(ai);
- adj_unlock(ai);
}
/*
* else
@@ -228,11 +226,6 @@ adj_bfd_notify (bfd_listen_event_e event,
*/
break;
}
-
- /*
- * unlock match of the add-or-lock at the start
- */
- adj_unlock(ai);
}
int
@@ -287,9 +280,7 @@ adj_bfd_main_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (adj_bfd_main_init)=
{
.runs_after = VLIB_INITS("bfd_main_init"),
};
-/* *INDENT-ON* */
diff --git a/src/vnet/adj/adj_dp.h b/src/vnet/adj/adj_dp.h
index aff1a2b1f43..186044b90ad 100644
--- a/src/vnet/adj/adj_dp.h
+++ b/src/vnet/adj/adj_dp.h
@@ -36,22 +36,36 @@ adj_midchain_ipip44_fixup (vlib_main_t * vm,
ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
if (PREDICT_TRUE(TUNNEL_ENCAP_DECAP_FLAG_NONE == flags))
- {
- ip_csum_t sum;
- u16 old,new;
-
- old = 0;
- new = ip4->length;
-
- sum = ip4->checksum;
- sum = ip_csum_update (sum, old, new, ip4_header_t, length);
- ip4->checksum = ip_csum_fold (sum);
- }
+ {
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP |
+ VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM);
+ }
+ else
+ {
+ ip_csum_t sum;
+ u16 old,new;
+ old = 0;
+ new = ip4->length;
+ sum = ip4->checksum;
+ sum = ip_csum_update (sum, old, new, ip4_header_t, length);
+ ip4->checksum = ip_csum_fold (sum);
+ }
+ }
else
- {
+ {
tunnel_encap_fixup_4o4 (flags, ip4 + 1, ip4);
- ip4->checksum = ip4_header_checksum (ip4);
- }
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP |
+ VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM);
+ }
+ else
+ ip4->checksum = ip4_header_checksum (ip4);
+ }
}
static_always_inline void
diff --git a/src/vnet/adj/adj_glean.c b/src/vnet/adj/adj_glean.c
index 8df104bd19d..ceece0d74ed 100644
--- a/src/vnet/adj/adj_glean.c
+++ b/src/vnet/adj/adj_glean.c
@@ -45,7 +45,7 @@ adj_glean_db_lookup (fib_protocol_t proto,
{
uword *p;
- if (vec_len(adj_gleans[proto]) <= sw_if_index)
+ if ((proto >= FIB_PROTOCOL_IP_MAX) || vec_len(adj_gleans[proto]) <= sw_if_index)
return (ADJ_INDEX_INVALID);
p = hash_get_mem (adj_gleans[proto][sw_if_index], nh_addr);
@@ -66,6 +66,7 @@ adj_glean_db_insert (fib_protocol_t proto,
vlib_worker_thread_barrier_sync(vm);
+ ASSERT(proto < FIB_PROTOCOL_IP_MAX);
vec_validate(adj_gleans[proto], sw_if_index);
if (NULL == adj_gleans[proto][sw_if_index])
@@ -186,10 +187,36 @@ adj_glean_update_rewrite_walk (adj_index_t ai,
return (ADJ_WALK_RC_CONTINUE);
}
-void
-adj_glean_update_rewrite_itf (u32 sw_if_index)
+static void
+adj_glean_walk_proto (fib_protocol_t proto,
+ u32 sw_if_index,
+ adj_walk_cb_t cb,
+ void *data)
{
- adj_glean_walk (sw_if_index, adj_glean_update_rewrite_walk, NULL);
+ adj_index_t ai, *aip, *ais = NULL;
+ ip46_address_t *conn;
+
+ ASSERT(proto < FIB_PROTOCOL_IP_MAX);
+ if (vec_len(adj_gleans[proto]) <= sw_if_index ||
+ NULL == adj_gleans[proto][sw_if_index])
+ return;
+
+ /*
+ * Walk first to collect the indices
+ * then walk the collection. This is safe
+ * to modifications of the hash table
+ */
+ hash_foreach_mem(conn, ai, adj_gleans[proto][sw_if_index],
+ ({
+ vec_add1(ais, ai);
+ }));
+
+ vec_foreach(aip, ais)
+ {
+ if (ADJ_WALK_RC_STOP == cb(*aip, data))
+ break;
+ }
+ vec_free(ais);
}
void
@@ -201,29 +228,7 @@ adj_glean_walk (u32 sw_if_index,
FOR_EACH_FIB_IP_PROTOCOL(proto)
{
- adj_index_t ai, *aip, *ais = NULL;
- ip46_address_t *conn;
-
- if (vec_len(adj_gleans[proto]) <= sw_if_index ||
- NULL == adj_gleans[proto][sw_if_index])
- continue;
-
- /*
- * Walk first to collect the indices
- * then walk the collection. This is safe
- * to modifications of the hash table
- */
- hash_foreach_mem(conn, ai, adj_gleans[proto][sw_if_index],
- ({
- vec_add1(ais, ai);
- }));
-
- vec_foreach(aip, ais)
- {
- if (ADJ_WALK_RC_STOP == cb(*aip, data))
- break;
- }
- vec_free(ais);
+ adj_glean_walk_proto (proto, sw_if_index, cb, data);
}
}
@@ -241,6 +246,7 @@ adj_glean_get (fib_protocol_t proto,
ip46_address_t *conn;
adj_index_t ai;
+ ASSERT(proto < FIB_PROTOCOL_IP_MAX);
if (vec_len(adj_gleans[proto]) <= sw_if_index ||
NULL == adj_gleans[proto][sw_if_index])
return (ADJ_INDEX_INVALID);
@@ -262,6 +268,7 @@ adj_glean_get_src (fib_protocol_t proto,
const ip_adjacency_t *adj;
adj_index_t ai;
+ ASSERT(proto < FIB_PROTOCOL_IP_MAX);
if (vec_len(adj_gleans[proto]) <= sw_if_index ||
NULL == adj_gleans[proto][sw_if_index])
return (NULL);
@@ -425,6 +432,64 @@ adj_glean_interface_delete (vnet_main_t * vnm,
VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_glean_interface_delete);
+/**
+ * Callback function invoked when an interface's MAC Address changes
+ */
+static void
+adj_glean_ethernet_change_mac (ethernet_main_t * em,
+ u32 sw_if_index,
+ uword opaque)
+{
+ adj_glean_walk (sw_if_index, adj_glean_update_rewrite_walk, NULL);
+}
+
+static void
+adj_glean_table_bind (fib_protocol_t fproto,
+ u32 sw_if_index,
+ u32 itf_fib_index)
+{
+ /*
+ * for each glean on the interface trigger a walk back to the children
+ */
+ fib_node_back_walk_ctx_t bw_ctx = {
+ .fnbw_reason = FIB_NODE_BW_REASON_FLAG_INTERFACE_BIND,
+ .interface_bind = {
+ .fnbw_to_fib_index = itf_fib_index,
+ },
+ };
+
+ adj_glean_walk_proto (fproto, sw_if_index, adj_glean_start_backwalk, &bw_ctx);
+}
+
+
+/**
+ * Callback function invoked when an interface's IPv6 Table
+ * binding changes
+ */
+static void
+adj_glean_ip6_table_bind (ip6_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ u32 new_fib_index,
+ u32 old_fib_index)
+{
+ adj_glean_table_bind (FIB_PROTOCOL_IP6, sw_if_index, new_fib_index);
+}
+
+/**
+ * Callback function invoked when an interface's IPv4 Table
+ * binding changes
+ */
+static void
+adj_glean_ip4_table_bind (ip4_main_t * im,
+ uword opaque,
+ u32 sw_if_index,
+ u32 new_fib_index,
+ u32 old_fib_index)
+{
+ adj_glean_table_bind (FIB_PROTOCOL_IP4, sw_if_index, new_fib_index);
+}
+
u8*
format_adj_glean (u8* s, va_list *ap)
{
@@ -509,4 +574,20 @@ void
adj_glean_module_init (void)
{
dpo_register(DPO_ADJACENCY_GLEAN, &adj_glean_dpo_vft, glean_nodes);
+
+ ethernet_address_change_ctx_t ctx = {
+ .function = adj_glean_ethernet_change_mac,
+ .function_opaque = 0,
+ };
+ vec_add1 (ethernet_main.address_change_callbacks, ctx);
+
+ ip6_table_bind_callback_t cbt6 = {
+ .function = adj_glean_ip6_table_bind,
+ };
+ vec_add1 (ip6_main.table_bind_callbacks, cbt6);
+
+ ip4_table_bind_callback_t cbt4 = {
+ .function = adj_glean_ip4_table_bind,
+ };
+ vec_add1 (ip4_main.table_bind_callbacks, cbt4);
}
diff --git a/src/vnet/adj/adj_glean.h b/src/vnet/adj/adj_glean.h
index a06b9e81616..9e25fd9c919 100644
--- a/src/vnet/adj/adj_glean.h
+++ b/src/vnet/adj/adj_glean.h
@@ -67,7 +67,6 @@ extern adj_index_t adj_glean_get(fib_protocol_t proto,
* glean behaviour on an adjacency liked to a connected prefix.
*/
extern void adj_glean_update_rewrite(adj_index_t adj_index);
-extern void adj_glean_update_rewrite_itf(u32 sw_if_index);
/**
* Return the source address from the glean
diff --git a/src/vnet/adj/adj_internal.h b/src/vnet/adj/adj_internal.h
index 3dbf7e2a371..380af46e22a 100644
--- a/src/vnet/adj/adj_internal.h
+++ b/src/vnet/adj/adj_internal.h
@@ -31,24 +31,20 @@
/*
* Debug macro
*/
-#ifdef ADJ_DEBUG
-#define ADJ_DBG(_adj, _fmt, _args...) \
-{ \
- clib_warning("adj:[%d:%p]:" _fmt, \
- _adj - adj_pool, _adj, \
- ##_args); \
+extern vlib_log_class_t adj_logger;
+#define ADJ_DBG(_adj, _fmt, _args...) \
+{ \
+ vlib_log_debug(adj_logger, "adj:[%d:%p]:" _fmt, \
+ _adj - adj_pool, _adj, \
+ ##_args); \
}
-#else
-#define ADJ_DBG(_e, _fmt, _args...)
-#endif
/*
* Vlib nodes
*/
extern vlib_node_registration_t adj_nsh_midchain_node;
extern vlib_node_registration_t adj_nsh_rewrite_node;
-extern vlib_node_registration_t adj_midchain_tx_no_count_node;
-extern vlib_node_registration_t adj_midchain_tx_node;
+extern vlib_node_registration_t adj_midchain_tx;
static inline u32
adj_get_rewrite_node (vnet_link_t linkt)
@@ -128,6 +124,7 @@ extern void adj_nbr_remove(adj_index_t ai,
vnet_link_t link_type,
const ip46_address_t *nh_addr,
u32 sw_if_index);
+extern u32 adj_nbr_get_n_adjs(vnet_link_t link_type, u32 sw_if_index);
extern void adj_glean_remove(ip_adjacency_t *adj);
extern void adj_mcast_remove(fib_protocol_t proto,
u32 sw_if_index);
diff --git a/src/vnet/adj/adj_mcast.c b/src/vnet/adj/adj_mcast.c
index a20f61f6f6b..573105b7228 100644
--- a/src/vnet/adj/adj_mcast.c
+++ b/src/vnet/adj/adj_mcast.c
@@ -82,6 +82,8 @@ adj_mcast_add_or_lock (fib_protocol_t proto,
*/
vnet_update_adjacency_for_sw_interface(vnm, sw_if_index,
adj_get_index(adj));
+
+ adj_delegate_adj_created(adj);
}
else
{
@@ -89,8 +91,6 @@ adj_mcast_add_or_lock (fib_protocol_t proto,
adj_lock(adj_get_index(adj));
}
- adj_delegate_adj_created(adj);
-
return (adj_get_index(adj));
}
diff --git a/src/vnet/adj/adj_midchain.c b/src/vnet/adj/adj_midchain.c
index 9f709ad13be..8e6a940befa 100644
--- a/src/vnet/adj/adj_midchain.c
+++ b/src/vnet/adj/adj_midchain.c
@@ -75,52 +75,37 @@ adj_get_midchain_node (vnet_link_t link)
}
static u8
-adj_midchain_get_feature_arc_index_for_link_type (const ip_adjacency_t *adj)
+adj_midchain_get_feature_arc_index (const ip_adjacency_t *adj)
{
- u8 arc = (u8) ~0;
switch (adj->ia_link)
{
case VNET_LINK_IP4:
- {
- arc = ip4_main.lookup_main.output_feature_arc_index;
- break;
- }
+ return ip4_main.lookup_main.output_feature_arc_index;
case VNET_LINK_IP6:
- {
- arc = ip6_main.lookup_main.output_feature_arc_index;
- break;
- }
+ return ip6_main.lookup_main.output_feature_arc_index;
case VNET_LINK_MPLS:
- {
- arc = mpls_main.output_feature_arc_index;
- break;
- }
+ return mpls_main.output_feature_arc_index;
case VNET_LINK_ETHERNET:
- {
- arc = ethernet_main.output_feature_arc_index;
- break;
- }
+ return ethernet_main.output_feature_arc_index;
case VNET_LINK_NSH:
- {
- arc = nsh_main_placeholder.output_feature_arc_index;
- break;
- }
case VNET_LINK_ARP:
- ASSERT(0);
break;
}
-
- ASSERT (arc != (u8) ~0);
-
- return (arc);
+ ASSERT (0);
+ return (0);
}
static u32
adj_nbr_midchain_get_tx_node (ip_adjacency_t *adj)
{
- return ((adj->ia_flags & ADJ_FLAG_MIDCHAIN_NO_COUNT) ?
- adj_midchain_tx_no_count_node.index :
- adj_midchain_tx_node.index);
+ return (adj_midchain_tx.index);
+}
+
+static u32
+adj_nbr_midchain_get_next_node (ip_adjacency_t *adj)
+{
+ return (vnet_feature_get_end_node(adj_midchain_get_feature_arc_index(adj),
+ adj->rewrite_header.sw_if_index));
}
/**
@@ -131,17 +116,7 @@ adj_nbr_midchain_get_tx_node (ip_adjacency_t *adj)
void
adj_midchain_teardown (ip_adjacency_t *adj)
{
- vlib_main_t *vm = vlib_get_main();
-
dpo_reset(&adj->sub_type.midchain.next_dpo);
-
- vlib_worker_thread_barrier_sync(vm);
- adj->ia_cfg_index = vnet_feature_modify_end_node(
- adj_midchain_get_feature_arc_index_for_link_type (adj),
- adj->rewrite_header.sw_if_index,
- vlib_get_node_by_name (vlib_get_main(),
- (u8*) "interface-output")->index);
- vlib_worker_thread_barrier_release(vm);
}
/**
@@ -155,9 +130,7 @@ adj_midchain_setup (adj_index_t adj_index,
const void *data,
adj_flags_t flags)
{
- vlib_main_t *vm = vlib_get_main();
ip_adjacency_t *adj;
- u32 tx_node;
ASSERT(ADJ_INDEX_INVALID != adj_index);
@@ -181,15 +154,6 @@ adj_midchain_setup (adj_index_t adj_index,
adj->rewrite_header.flags &= ~VNET_REWRITE_FIXUP_FLOW_HASH;
}
- tx_node = adj_nbr_midchain_get_tx_node(adj);
-
- vlib_worker_thread_barrier_sync(vm);
- adj->ia_cfg_index = vnet_feature_modify_end_node(
- adj_midchain_get_feature_arc_index_for_link_type (adj),
- adj->rewrite_header.sw_if_index,
- tx_node);
- vlib_worker_thread_barrier_release(vm);
-
/*
* stack the midchain on the drop so it's ready to forward in the adj-midchain-tx.
* The graph arc used/created here is from the midchain-tx node to the
@@ -197,7 +161,7 @@ adj_midchain_setup (adj_index_t adj_index,
* node are any output features, then the midchain-tx. from there we
* need to get to the stacked child's node.
*/
- dpo_stack_from_node(tx_node,
+ dpo_stack_from_node(adj_nbr_midchain_get_tx_node(adj),
&adj->sub_type.midchain.next_dpo,
drop_dpo_get(vnet_link_to_dpo_proto(adj->ia_link)));
}
@@ -238,7 +202,7 @@ adj_nbr_midchain_update_rewrite (adj_index_t adj_index,
adj_nbr_update_rewrite_internal(adj,
IP_LOOKUP_NEXT_MIDCHAIN,
adj_get_midchain_node(adj->ia_link),
- adj_nbr_midchain_get_tx_node(adj),
+ adj_nbr_midchain_get_next_node(adj),
rewrite);
}
@@ -260,11 +224,6 @@ adj_nbr_midchain_update_next_node (adj_index_t adj_index,
adj->ia_node_index,
next_node);
- adj->ia_cfg_index = vnet_feature_modify_end_node(
- adj_midchain_get_feature_arc_index_for_link_type (adj),
- adj->rewrite_header.sw_if_index,
- next_node);
-
vlib_worker_thread_barrier_release(vm);
}
@@ -284,12 +243,7 @@ adj_nbr_midchain_reset_next_node (adj_index_t adj_index)
adj->rewrite_header.next_index =
vlib_node_add_next(vlib_get_main(),
adj->ia_node_index,
- adj_nbr_midchain_get_tx_node(adj));
-
- adj->ia_cfg_index = vnet_feature_modify_end_node(
- adj_midchain_get_feature_arc_index_for_link_type (adj),
- adj->rewrite_header.sw_if_index,
- adj_nbr_midchain_get_tx_node(adj));
+ adj_nbr_midchain_get_next_node(adj));
vlib_worker_thread_barrier_release(vm);
}
diff --git a/src/vnet/adj/adj_midchain.h b/src/vnet/adj/adj_midchain.h
index 5fb0ee8efb3..eee8c99ae40 100644
--- a/src/vnet/adj/adj_midchain.h
+++ b/src/vnet/adj/adj_midchain.h
@@ -99,7 +99,7 @@ extern void adj_nbr_midchain_stack(adj_index_t adj_index,
* The FIB entry to stack on
*
* @param fct
- * The chain type to use from the fib entry fowarding
+ * The chain type to use from the fib entry forwarding
*/
extern void adj_nbr_midchain_stack_on_fib_entry(adj_index_t adj_index,
fib_node_index_t fei,
@@ -160,6 +160,11 @@ extern void adj_midchain_delegate_restack(adj_index_t ai);
*/
extern void adj_midchain_delegate_unstack(adj_index_t ai);
+/**
+ * @brief remove a midchain delegate (this stacks it on a drop)
+ */
+extern void adj_midchain_delegate_remove (adj_index_t ai);
+
extern u8 adj_is_midchain (adj_index_t ai);
#endif
diff --git a/src/vnet/adj/adj_midchain_delegate.c b/src/vnet/adj/adj_midchain_delegate.c
index 9e788432640..16129ff86ac 100644
--- a/src/vnet/adj/adj_midchain_delegate.c
+++ b/src/vnet/adj/adj_midchain_delegate.c
@@ -132,6 +132,31 @@ adj_midchain_delegate_stack (adj_index_t ai,
}
void
+adj_midchain_delegate_remove (adj_index_t ai)
+{
+ adj_midchain_delegate_t *amd;
+ ip_adjacency_t *adj;
+ adj_delegate_t *ad;
+
+ /*
+ * if there's a delegate, it can be removed
+ */
+ adj = adj_get(ai);
+ ad = adj_delegate_get(adj, ADJ_DELEGATE_MIDCHAIN);
+
+ if (NULL != ad)
+ {
+ adj_nbr_midchain_unstack(ai);
+
+ amd = pool_elt_at_index(amd_pool, ad->ad_index);
+ fib_entry_untrack(amd->amd_fei, amd->amd_sibling);
+ pool_put(amd_pool, amd);
+
+ adj_delegate_remove (ai, ADJ_DELEGATE_MIDCHAIN);
+ }
+}
+
+void
adj_midchain_delegate_unstack (adj_index_t ai)
{
adj_nbr_midchain_unstack(ai);
diff --git a/src/vnet/adj/adj_midchain_node.c b/src/vnet/adj/adj_midchain_node.c
index 170ed19855e..fcc2c6c7647 100644
--- a/src/vnet/adj/adj_midchain_node.c
+++ b/src/vnet/adj/adj_midchain_node.c
@@ -202,16 +202,20 @@ format_adj_midchain_tx_trace (u8 * s, va_list * args)
return (s);
}
-static uword
-adj_midchain_tx (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (adj_midchain_tx) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return (adj_midchain_tx_inline(vm, node, frame, 1));
+}
+VLIB_NODE_FN (tunnel_output) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
{
return (adj_midchain_tx_inline(vm, node, frame, 1));
}
-VLIB_REGISTER_NODE (adj_midchain_tx_node) = {
- .function = adj_midchain_tx,
+VLIB_REGISTER_NODE (adj_midchain_tx) = {
.name = "adj-midchain-tx",
.vector_size = sizeof (u32),
@@ -222,20 +226,23 @@ VLIB_REGISTER_NODE (adj_midchain_tx_node) = {
[0] = "error-drop",
},
};
+VLIB_REGISTER_NODE (tunnel_output) = {
+ .name = "tunnel-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_adj_midchain_tx_trace,
+ .sibling_of = "adj-midchain-tx",
+};
-static uword
-adj_midchain_tx_no_count (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (tunnel_output_no_count) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
{
return (adj_midchain_tx_inline(vm, node, frame, 0));
}
-VLIB_REGISTER_NODE (adj_midchain_tx_no_count_node) = {
- .function = adj_midchain_tx_no_count,
- .name = "adj-midchain-tx-no-count",
+VLIB_REGISTER_NODE (tunnel_output_no_count) = {
+ .name = "tunnel-output-no-count",
.vector_size = sizeof (u32),
-
.format_trace = format_adj_midchain_tx_trace,
.sibling_of = "adj-midchain-tx",
};
diff --git a/src/vnet/adj/adj_nbr.c b/src/vnet/adj/adj_nbr.c
index 3344d6e47cc..b3a027b7af4 100644
--- a/src/vnet/adj/adj_nbr.c
+++ b/src/vnet/adj/adj_nbr.c
@@ -105,6 +105,46 @@ adj_nbr_remove (adj_index_t ai,
}
}
+typedef struct adj_nbr_get_n_adjs_walk_ctx_t_
+{
+ vnet_link_t linkt;
+ u32 count;
+} adj_nbr_get_n_adjs_walk_ctx_t;
+
+static adj_walk_rc_t
+adj_nbr_get_n_adjs_walk (adj_index_t ai,
+ void *data)
+{
+ adj_nbr_get_n_adjs_walk_ctx_t *ctx = data;
+ const ip_adjacency_t *adj;
+
+ adj = adj_get(ai);
+
+ if (ctx->linkt == adj->ia_link)
+ ctx->count++;
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+u32
+adj_nbr_get_n_adjs (vnet_link_t link_type, u32 sw_if_index)
+{
+ adj_nbr_get_n_adjs_walk_ctx_t ctx = {
+ .linkt = link_type,
+ };
+ fib_protocol_t fproto;
+
+ FOR_EACH_FIB_IP_PROTOCOL(fproto)
+ {
+ adj_nbr_walk (sw_if_index,
+ fproto,
+ adj_nbr_get_n_adjs_walk,
+ &ctx);
+ }
+
+ return (ctx.count);
+}
+
adj_index_t
adj_nbr_find (fib_protocol_t nh_proto,
vnet_link_t link_type,
@@ -492,7 +532,7 @@ adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
fib_walk_sync(FIB_NODE_TYPE_ADJ, walk_ai, &bw_ctx);
/*
- * fib_walk_sync may allocate a new adjacency and potentially cuase a
+ * fib_walk_sync may allocate a new adjacency and potentially cause a
* realloc for adj_pool. When that happens, adj pointer is no longer
* valid here. We refresh the adj pointer accordingly.
*/
@@ -560,7 +600,7 @@ adj_nbr_update_rewrite_internal (ip_adjacency_t *adj,
walk_adj->ia_flags &= ~ADJ_FLAG_SYNC_WALK_ACTIVE;
}
- adj_delegate_adj_modified(adj);
+ adj_delegate_adj_modified(adj_get(ai));
adj_unlock(ai);
adj_unlock(walk_ai);
}
@@ -753,9 +793,15 @@ adj_nbr_interface_state_change_one (adj_index_t ai,
adj_lock (ai);
adj = adj_get(ai);
-
adj->ia_flags |= ADJ_FLAG_SYNC_WALK_ACTIVE;
fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
+
+ /*
+ * fib_walk_sync may allocate a new adjacency and potentially cause a
+ * realloc for adj_pool. When that happens, adj pointer is no longer
+ * valid here. We refresh the adj pointer accordingly.
+ */
+ adj = adj_get(ai);
adj->ia_flags &= ~ADJ_FLAG_SYNC_WALK_ACTIVE;
adj_unlock (ai);
@@ -863,9 +909,15 @@ adj_nbr_interface_delete_one (adj_index_t ai,
adj_lock(ai);
adj = adj_get(ai);
-
adj->ia_flags |= ADJ_FLAG_SYNC_WALK_ACTIVE;
fib_walk_sync(FIB_NODE_TYPE_ADJ, ai, &bw_ctx);
+
+ /*
+ * fib_walk_sync may allocate a new adjacency and potentially cause a
+ * realloc for adj_pool. When that happens, adj pointer is no longer
+ * valid here. We refresh the adj pointer accordingly.
+ */
+ adj = adj_get(ai);
adj->ia_flags &= ~ADJ_FLAG_SYNC_WALK_ACTIVE;
adj_unlock(ai);
@@ -910,13 +962,40 @@ adj_nbr_interface_add_del (vnet_main_t * vnm,
}
return (NULL);
-
}
VNET_SW_INTERFACE_ADD_DEL_FUNCTION(adj_nbr_interface_add_del);
static adj_walk_rc_t
+adj_nbr_ethernet_mac_change_one (adj_index_t ai,
+ void *arg)
+{
+ vnet_update_adjacency_for_sw_interface(vnet_get_main(),
+ adj_get_sw_if_index(ai),
+ ai);
+
+ return (ADJ_WALK_RC_CONTINUE);
+}
+
+/**
+ * Callback function invoked when an interface's MAC Address changes
+ */
+static void
+adj_nbr_ethernet_change_mac (ethernet_main_t * em,
+ u32 sw_if_index, uword opaque)
+{
+ fib_protocol_t proto;
+
+ FOR_EACH_FIB_IP_PROTOCOL(proto)
+ {
+ adj_nbr_walk(sw_if_index, proto,
+ adj_nbr_ethernet_mac_change_one,
+ NULL);
+ }
+}
+
+static adj_walk_rc_t
adj_nbr_show_one (adj_index_t ai,
void *arg)
{
@@ -1156,4 +1235,10 @@ adj_nbr_module_init (void)
dpo_register(DPO_ADJACENCY_INCOMPLETE,
&adj_nbr_incompl_dpo_vft,
nbr_incomplete_nodes);
+
+ ethernet_address_change_ctx_t ctx = {
+ .function = adj_nbr_ethernet_change_mac,
+ .function_opaque = 0,
+ };
+ vec_add1 (ethernet_main.address_change_callbacks, ctx);
}
diff --git a/src/vnet/adj/adj_nsh.c b/src/vnet/adj/adj_nsh.c
index 00d945729d8..1b4fa6c15b9 100644
--- a/src/vnet/adj/adj_nsh.c
+++ b/src/vnet/adj/adj_nsh.c
@@ -190,7 +190,6 @@ VLIB_REGISTER_NODE (adj_nsh_midchain_node) = {
};
/* Built-in ip4 tx feature path definition */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (nsh_output, static) =
{
.arc_name = "nsh-output",
@@ -204,4 +203,3 @@ VNET_FEATURE_INIT (nsh_tx_drop, static) =
.node_name = "error-drop",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
diff --git a/src/vnet/adj/rewrite.h b/src/vnet/adj/rewrite.h
index 4234986dc37..06b1b00882e 100644
--- a/src/vnet/adj/rewrite.h
+++ b/src/vnet/adj/rewrite.h
@@ -136,7 +136,7 @@ always_inline void
vnet_rewrite_clear_data_internal (vnet_rewrite_header_t * rw, int max_size)
{
/* Sanity check values carefully for this clib_memset operation */
- ASSERT ((max_size > 0) && (max_size < VLIB_BUFFER_PRE_DATA_SIZE));
+ ASSERT ((max_size > 0) && (max_size < VNET_REWRITE_TOTAL_BYTES));
rw->data_bytes = 0;
clib_memset (rw->data, 0xfe, max_size);
@@ -147,8 +147,8 @@ vnet_rewrite_set_data_internal (vnet_rewrite_header_t * rw,
int max_size, void *data, int data_bytes)
{
/* Sanity check values carefully for this clib_memset operation */
- ASSERT ((max_size > 0) && (max_size < VLIB_BUFFER_PRE_DATA_SIZE));
- ASSERT ((data_bytes >= 0) && (data_bytes < max_size));
+ ASSERT ((max_size > 0) && (max_size <= VNET_REWRITE_TOTAL_BYTES));
+ ASSERT ((data_bytes >= 0) && (data_bytes <= max_size));
rw->data_bytes = data_bytes;
clib_memcpy_fast (rw->data, data, data_bytes);
diff --git a/src/vnet/api_errno.h b/src/vnet/api_errno.h
index df3806a7630..52f201c081b 100644
--- a/src/vnet/api_errno.h
+++ b/src/vnet/api_errno.h
@@ -18,146 +18,9 @@
#include <stdarg.h>
#include <vppinfra/types.h>
#include <vppinfra/format.h>
+#include <vnet/error.h>
-#define foreach_vnet_api_error \
-_(UNSPECIFIED, -1, "Unspecified Error") \
-_(INVALID_SW_IF_INDEX, -2, "Invalid sw_if_index") \
-_(NO_SUCH_FIB, -3, "No such FIB / VRF") \
-_(NO_SUCH_INNER_FIB, -4, "No such inner FIB / VRF") \
-_(NO_SUCH_LABEL, -5, "No such label") \
-_(NO_SUCH_ENTRY, -6, "No such entry") \
-_(INVALID_VALUE, -7, "Invalid value") \
-_(INVALID_VALUE_2, -8, "Invalid value #2") \
-_(UNIMPLEMENTED, -9, "Unimplemented") \
-_(INVALID_SW_IF_INDEX_2, -10, "Invalid sw_if_index #2") \
-_(SYSCALL_ERROR_1, -11, "System call error #1") \
-_(SYSCALL_ERROR_2, -12, "System call error #2") \
-_(SYSCALL_ERROR_3, -13, "System call error #3") \
-_(SYSCALL_ERROR_4, -14, "System call error #4") \
-_(SYSCALL_ERROR_5, -15, "System call error #5") \
-_(SYSCALL_ERROR_6, -16, "System call error #6") \
-_(SYSCALL_ERROR_7, -17, "System call error #7") \
-_(SYSCALL_ERROR_8, -18, "System call error #8") \
-_(SYSCALL_ERROR_9, -19, "System call error #9") \
-_(SYSCALL_ERROR_10, -20, "System call error #10") \
-_(FEATURE_DISABLED, -30, "Feature disabled by configuration") \
-_(INVALID_REGISTRATION, -31, "Invalid registration") \
-_(NEXT_HOP_NOT_IN_FIB, -50, "Next hop not in FIB") \
-_(UNKNOWN_DESTINATION, -51, "Unknown destination") \
-_(NO_PATHS_IN_ROUTE, -52, "No paths specified in route") \
-_(NEXT_HOP_NOT_FOUND_MP, -53, "Next hop not found (multipath)") \
-_(NO_MATCHING_INTERFACE, -54, "No matching interface for probe") \
-_(INVALID_VLAN, -55, "Invalid VLAN") \
-_(VLAN_ALREADY_EXISTS, -56, "VLAN subif already exists") \
-_(INVALID_SRC_ADDRESS, -57, "Invalid src address") \
-_(INVALID_DST_ADDRESS, -58, "Invalid dst address") \
-_(ADDRESS_LENGTH_MISMATCH, -59, "Address length mismatch") \
-_(ADDRESS_NOT_FOUND_FOR_INTERFACE, -60, "Address not found for interface") \
-_(ADDRESS_NOT_DELETABLE, -61, "Address not deletable") \
-_(IP6_NOT_ENABLED, -62, "ip6 not enabled") \
-_(NO_SUCH_NODE, -63, "No such graph node") \
-_(NO_SUCH_NODE2, -64, "No such graph node #2") \
-_(NO_SUCH_TABLE, -65, "No such table") \
-_(NO_SUCH_TABLE2, -66, "No such table #2") \
-_(NO_SUCH_TABLE3, -67, "No such table #3") \
-_(SUBIF_ALREADY_EXISTS, -68, "Subinterface already exists") \
-_(SUBIF_CREATE_FAILED, -69, "Subinterface creation failed") \
-_(INVALID_MEMORY_SIZE, -70, "Invalid memory size requested") \
-_(INVALID_INTERFACE, -71, "Invalid interface") \
-_(INVALID_VLAN_TAG_COUNT, -72, "Invalid number of tags for requested operation") \
-_(INVALID_ARGUMENT, -73, "Invalid argument") \
-_(UNEXPECTED_INTF_STATE, -74, "Unexpected interface state") \
-_(TUNNEL_EXIST, -75, "Tunnel already exists") \
-_(INVALID_DECAP_NEXT, -76, "Invalid decap-next") \
-_(RESPONSE_NOT_READY, -77, "Response not ready") \
-_(NOT_CONNECTED, -78, "Not connected to the data plane") \
-_(IF_ALREADY_EXISTS, -79, "Interface already exists") \
-_(BOND_SLAVE_NOT_ALLOWED, -80, "Operation not allowed on slave of BondEthernet") \
-_(VALUE_EXIST, -81, "Value already exists") \
-_(SAME_SRC_DST, -82, "Source and destination are the same") \
-_(IP6_MULTICAST_ADDRESS_NOT_PRESENT, -83, "IP6 multicast address required") \
-_(SR_POLICY_NAME_NOT_PRESENT, -84, "Segment routing policy name required") \
-_(NOT_RUNNING_AS_ROOT, -85, "Not running as root") \
-_(ALREADY_CONNECTED, -86, "Connection to the data plane already exists") \
-_(UNSUPPORTED_JNI_VERSION, -87, "Unsupported JNI version") \
-_(IP_PREFIX_INVALID, -88, "IP prefix invalid (masked bits set in address") \
-_(INVALID_WORKER, -89, "Invalid worker thread") \
-_(LISP_DISABLED, -90, "LISP is disabled") \
-_(CLASSIFY_TABLE_NOT_FOUND, -91, "Classify table not found") \
-_(INVALID_EID_TYPE, -92, "Unsupported LISP EID type") \
-_(CANNOT_CREATE_PCAP_FILE, -93, "Cannot create pcap file") \
-_(INCORRECT_ADJACENCY_TYPE, -94, "Invalid adjacency type for this operation") \
-_(EXCEEDED_NUMBER_OF_RANGES_CAPACITY, -95, "Operation would exceed configured capacity of ranges") \
-_(EXCEEDED_NUMBER_OF_PORTS_CAPACITY, -96, "Operation would exceed capacity of number of ports") \
-_(INVALID_ADDRESS_FAMILY, -97, "Invalid address family") \
-_(INVALID_SUB_SW_IF_INDEX, -98, "Invalid sub-interface sw_if_index") \
-_(TABLE_TOO_BIG, -99, "Table too big") \
-_(CANNOT_ENABLE_DISABLE_FEATURE, -100, "Cannot enable/disable feature") \
-_(BFD_EEXIST, -101, "Duplicate BFD object") \
-_(BFD_ENOENT, -102, "No such BFD object") \
-_(BFD_EINUSE, -103, "BFD object in use") \
-_(BFD_NOTSUPP, -104, "BFD feature not supported") \
-_(ADDRESS_IN_USE, -105, "Address in use") \
-_(ADDRESS_NOT_IN_USE, -106, "Address not in use") \
-_(QUEUE_FULL, -107, "Queue full") \
-_(APP_UNSUPPORTED_CFG, -108, "Unsupported application config") \
-_(URI_FIFO_CREATE_FAILED, -109, "URI FIFO segment create failed") \
-_(LISP_RLOC_LOCAL, -110, "RLOC address is local") \
-_(BFD_EAGAIN, -111, "BFD object cannot be manipulated at this time") \
-_(INVALID_GPE_MODE, -112, "Invalid GPE mode") \
-_(LISP_GPE_ENTRIES_PRESENT, -113, "LISP GPE entries are present") \
-_(ADDRESS_FOUND_FOR_INTERFACE, -114, "Address found for interface") \
-_(SESSION_CONNECT, -115, "Session failed to connect") \
-_(ENTRY_ALREADY_EXISTS, -116, "Entry already exists") \
-_(SVM_SEGMENT_CREATE_FAIL, -117, "Svm segment create fail") \
-_(APPLICATION_NOT_ATTACHED, -118, "Application not attached") \
-_(BD_ALREADY_EXISTS, -119, "Bridge domain already exists") \
-_(BD_IN_USE, -120, "Bridge domain has member interfaces") \
-_(BD_NOT_MODIFIABLE, -121, "Bridge domain 0 can't be deleted/modified") \
-_(BD_ID_EXCEED_MAX, -122, "Bridge domain ID exceeds 16M limit") \
-_(SUBIF_DOESNT_EXIST, -123, "Subinterface doesn't exist") \
-_(L2_MACS_EVENT_CLINET_PRESENT, -124, "Client already exist for L2 MACs events") \
-_(INVALID_QUEUE, -125, "Invalid queue") \
-_(UNSUPPORTED, -126, "Unsupported") \
-_(DUPLICATE_IF_ADDRESS, -127, "Address already present on another interface") \
-_(APP_INVALID_NS, -128, "Invalid application namespace") \
-_(APP_WRONG_NS_SECRET, -129, "Wrong app namespace secret") \
-_(APP_CONNECT_SCOPE, -130, "Connect scope") \
-_(APP_ALREADY_ATTACHED, -131, "App already attached") \
-_(SESSION_REDIRECT, -132, "Redirect failed") \
-_(ILLEGAL_NAME, -133, "Illegal name") \
-_(NO_NAME_SERVERS, -134, "No name servers configured") \
-_(NAME_SERVER_NOT_FOUND, -135, "Name server not found") \
-_(NAME_RESOLUTION_NOT_ENABLED, -136, "Name resolution not enabled") \
-_(NAME_SERVER_FORMAT_ERROR, -137, "Server format error (bug!)") \
-_(NAME_SERVER_NO_SUCH_NAME, -138, "No such name") \
-_(NAME_SERVER_NO_ADDRESSES, -139, "No addresses available") \
-_(NAME_SERVER_NEXT_SERVER, -140, "Retry with new server") \
-_(APP_CONNECT_FILTERED, -141, "Connect was filtered") \
-_(ACL_IN_USE_INBOUND, -142, "Inbound ACL in use") \
-_(ACL_IN_USE_OUTBOUND, -143, "Outbound ACL in use") \
-_(INIT_FAILED, -144, "Initialization Failed") \
-_(NETLINK_ERROR, -145, "Netlink error") \
-_(BIER_BSL_UNSUP, -146, "BIER bit-string-length unsupported") \
-_(INSTANCE_IN_USE, -147, "Instance in use") \
-_(INVALID_SESSION_ID, -148, "Session ID out of range") \
-_(ACL_IN_USE_BY_LOOKUP_CONTEXT, -149, "ACL in use by a lookup context") \
-_(INVALID_VALUE_3, -150, "Invalid value #3") \
-_(NON_ETHERNET, -151, "Interface is not an Ethernet interface") \
-_(BD_ALREADY_HAS_BVI, -152, "Bridge domain already has a BVI interface") \
-_(INVALID_PROTOCOL, -153, "Invalid Protocol") \
-_(INVALID_ALGORITHM, -154, "Invalid Algorithm") \
-_(RSRC_IN_USE, -155, "Resource In Use") \
-_(KEY_LENGTH, -156, "invalid Key Length") \
-_(FIB_PATH_UNSUPPORTED_NH_PROTO, -157, "Unsupported FIB Path protocol") \
-_(API_ENDIAN_FAILED, -159, "Endian mismatch detected") \
-_(NO_CHANGE, -160, "No change in table") \
-_(MISSING_CERT_KEY, -161, "Missing certifcate or key") \
-_(LIMIT_EXCEEDED, -162, "limit exceeded") \
-_(IKE_NO_PORT, -163, "port not managed by IKE") \
-_(UDP_PORT_TAKEN, -164, "UDP port already taken") \
-_(EAGAIN, -165, "Retry stream call with cursor") \
-_(INVALID_VALUE_4, -166, "Invalid value #4") \
+#define foreach_vnet_api_error foreach_vnet_error
typedef enum
{
@@ -167,29 +30,25 @@ typedef enum
VNET_API_N_ERROR,
} vnet_api_error_t;
-/* *INDENT-OFF* */
-static inline u8 *
-format_vnet_api_errno (u8 * s, va_list * args)
+format_function_t format_vnet_api_errno;
+
+static_always_inline vnet_api_error_t
+vnet_api_error (clib_error_t *err)
{
- vnet_api_error_t api_error = va_arg (*args, vnet_api_error_t);
-#ifdef _
-#undef _
-#endif
-#define _(a, b, c) \
- case b: \
- s = format (s, "%s", c); \
- break;
- switch (api_error)
- {
- foreach_vnet_api_error
- default:
- s = format (s, "UNKNOWN");
- break;
- }
- return s;
-#undef _
+ if (err == 0)
+ return 0;
+ if (err->code >= 0)
+ return VNET_API_ERROR_BUG;
+ return err->code;
+}
+
+static_always_inline vnet_api_error_t
+vnet_get_api_error_and_free (clib_error_t *err)
+{
+ vnet_api_error_t rv = vnet_api_error (err);
+ clib_error_free (err);
+ return rv;
}
-/* *INDENT-ON* */
#endif /* included_vnet_api_errno_h */
diff --git a/src/vnet/arp/arp.api b/src/vnet/arp/arp.api
index 27bfa3b65c6..7de06f7f7e1 100644
--- a/src/vnet/arp/arp.api
+++ b/src/vnet/arp/arp.api
@@ -98,3 +98,121 @@ define proxy_arp_intfc_details
u32 context;
u32 sw_if_index;
};
+
+counters arp {
+ replies_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ARP replies sent";
+ };
+ disabled {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ARP Disabled";
+ };
+ l2_type_not_ethernet {
+ severity error;
+ type counter64;
+ units "packets";
+ description "L2 type not ethernet";
+ };
+ l3_type_not_ip4 {
+ severity error;
+ type counter64;
+ units "packets";
+ description "L3 type not IP4";
+ };
+ l3_src_address_not_local {
+ severity error;
+ type counter64;
+ units "packets";
+ description "IP4 source address not local to subnet";
+ };
+ l3_dst_address_not_local {
+ severity error;
+ type counter64;
+ units "packets";
+ description "IP4 destination address not local to subnet";
+ };
+ l3_dst_address_unset {
+ severity error;
+ type counter64;
+ units "packets";
+ description "IP4 destination address is unset";
+ };
+ l3_src_address_is_local {
+ severity error;
+ type counter64;
+ units "packets";
+ description "IP4 source address matches local interface";
+ };
+ l3_src_address_learned {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ARP request IP4 source address learned";
+ };
+ replies_received {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ARP replies received";
+ };
+ opcode_not_request {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ARP opcode not request";
+ };
+ proxy_arp_replies_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "Proxy ARP replies sent";
+ };
+ l2_address_mismatch {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ARP hw addr does not match L2 frame src addr";
+ };
+ gratuitous_arp {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ARP probe or announcement dropped";
+ };
+ interface_no_table {
+ severity error;
+ type counter64;
+ units "packets";
+ description "Interface is not mapped to an IP table";
+ };
+ interface_not_ip_enabled {
+ severity error;
+ type counter64;
+ units "packets";
+ description "Interface is not IP enabled";
+ };
+ unnumbered_mismatch {
+ severity error;
+ type counter64;
+ units "packets";
+ description "RX interface is unnumbered to different subnet";
+ };
+};
+
+paths {
+ "/err/arp-reply" "arp";
+ "/err/arp-disabled" "arp";
+ "/err/arp-input" "arp";
+ "/err/arp-proxy" "arp";
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/arp/arp.c b/src/vnet/arp/arp.c
index ced3c1cb7a7..43b2a93a7b3 100644
--- a/src/vnet/arp/arp.c
+++ b/src/vnet/arp/arp.c
@@ -25,6 +25,7 @@
#include <vnet/pg/pg.h>
#include <vnet/ip-neighbor/ip_neighbor.h>
+#include <vnet/ip-neighbor/ip4_neighbor.h>
#include <vnet/ip-neighbor/ip_neighbor_dp.h>
#include <vlibmemory/api.h>
@@ -190,7 +191,6 @@ always_inline u32
arp_learn (u32 sw_if_index,
const ethernet_arp_ip4_over_ethernet_address_t * addr)
{
- /* *INDENT-OFF* */
ip_neighbor_learn_t l = {
.ip = {
.ip.ip4 = addr->ip4,
@@ -199,11 +199,10 @@ arp_learn (u32 sw_if_index,
.mac = addr->mac,
.sw_if_index = sw_if_index,
};
- /* *INDENT-ON* */
ip_neighbor_learn_dp (&l);
- return (ETHERNET_ARP_ERROR_l3_src_address_learned);
+ return (ARP_ERROR_L3_SRC_ADDRESS_LEARNED);
}
typedef enum arp_input_next_t_
@@ -248,22 +247,21 @@ arp_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
p0 = vlib_get_buffer (vm, pi0);
arp0 = vlib_buffer_get_current (p0);
- error0 = ETHERNET_ARP_ERROR_replies_sent;
+ error0 = ARP_ERROR_REPLIES_SENT;
next0 = ARP_INPUT_NEXT_DROP;
- error0 =
- (arp0->l2_type !=
- clib_net_to_host_u16 (ETHERNET_ARP_HARDWARE_TYPE_ethernet) ?
- ETHERNET_ARP_ERROR_l2_type_not_ethernet : error0);
- error0 =
- (arp0->l3_type !=
- clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
- ETHERNET_ARP_ERROR_l3_type_not_ip4 : error0);
- error0 =
- (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ?
- ETHERNET_ARP_ERROR_l3_dst_address_unset : error0);
-
- if (ETHERNET_ARP_ERROR_replies_sent == error0)
+ error0 = (arp0->l2_type != clib_net_to_host_u16 (
+ ETHERNET_ARP_HARDWARE_TYPE_ethernet) ?
+ ARP_ERROR_L2_TYPE_NOT_ETHERNET :
+ error0);
+ error0 = (arp0->l3_type != clib_net_to_host_u16 (ETHERNET_TYPE_IP4) ?
+ ARP_ERROR_L3_TYPE_NOT_IP4 :
+ error0);
+ error0 = (0 == arp0->ip4_over_ethernet[0].ip4.as_u32 ?
+ ARP_ERROR_L3_DST_ADDRESS_UNSET :
+ error0);
+
+ if (ARP_ERROR_REPLIES_SENT == error0)
{
next0 = ARP_INPUT_NEXT_DISABLED;
vnet_feature_arc_start (am->feature_arc_index,
@@ -289,23 +287,6 @@ typedef enum arp_disabled_next_t_
ARP_DISABLED_N_NEXT,
} arp_disabled_next_t;
-#define foreach_arp_disabled_error \
- _ (DISABLED, "ARP Disabled on this interface") \
-
-typedef enum
-{
-#define _(sym,string) ARP_DISABLED_ERROR_##sym,
- foreach_arp_disabled_error
-#undef _
- ARP_DISABLED_N_ERROR,
-} arp_disabled_error_t;
-
-static char *arp_disabled_error_strings[] = {
-#define _(sym,string) string,
- foreach_arp_disabled_error
-#undef _
-};
-
static uword
arp_disabled (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
@@ -332,7 +313,7 @@ arp_disabled (vlib_main_t * vm,
u32 pi0, error0;
next0 = ARP_DISABLED_NEXT_DROP;
- error0 = ARP_DISABLED_ERROR_DISABLED;
+ error0 = ARP_ERROR_DISABLED;
pi0 = to_next[0] = from[0];
from += 1;
@@ -371,7 +352,6 @@ arp_dst_fib_check (const fib_node_index_t fei, fib_entry_flag_t * flags)
const fib_entry_t *entry = fib_entry_get (fei);
const fib_entry_src_t *entry_src;
fib_source_t src;
- /* *INDENT-OFF* */
FOR_EACH_SRC_ADDED(entry, entry_src, src,
({
*flags = fib_entry_get_flags_for_source (fei, src);
@@ -380,7 +360,6 @@ arp_dst_fib_check (const fib_node_index_t fei, fib_entry_flag_t * flags)
else if (FIB_ENTRY_FLAG_CONNECTED & *flags)
return ARP_DST_FIB_CONN;
}))
- /* *INDENT-ON* */
return ARP_DST_FIB_NONE;
}
@@ -432,18 +411,22 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
eth_rx = ethernet_buffer_get_header (p0);
next0 = ARP_REPLY_NEXT_DROP;
- error0 = ETHERNET_ARP_ERROR_replies_sent;
+ error0 = ARP_ERROR_REPLIES_SENT;
sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
/* Check that IP address is local and matches incoming interface. */
fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
if (~0 == fib_index0)
{
- error0 = ETHERNET_ARP_ERROR_interface_no_table;
+ error0 = ARP_ERROR_INTERFACE_NO_TABLE;
goto drop;
}
+ dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
+ &arp0->ip4_over_ethernet[1].ip4, 32);
+ conn_sw_if_index0 = fib_entry_get_any_resolving_interface (dst_fei);
+
{
/*
* we're looking for FIB entries that indicate the source
@@ -476,7 +459,6 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
* flags we need, or the flags we must not have,
* is not the best source, so check then all.
*/
- /* *INDENT-OFF* */
FOR_EACH_SRC_ADDED(src_fib_entry, src, source,
({
src_flags = fib_entry_get_flags_for_source (src_fei, source);
@@ -485,36 +467,35 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
address. */
if (FIB_ENTRY_FLAG_LOCAL & src_flags)
{
- error0 = ETHERNET_ARP_ERROR_l3_src_address_is_local;
- /*
- * When VPP has an interface whose address is also
- * applied to a TAP interface on the host, then VPP's
- * TAP interface will be unnumbered to the 'real'
- * interface and do proxy ARP from the host.
- * The curious aspect of this setup is that ARP requests
- * from the host will come from the VPP's own address.
- * So don't drop immediately here, instead go see if this
- * is a proxy ARP case.
- */
- goto next_feature;
- }
- /* A Source must also be local to subnet of matching
- * interface address. */
- if ((FIB_ENTRY_FLAG_ATTACHED & src_flags) ||
- (FIB_ENTRY_FLAG_CONNECTED & src_flags))
- {
- attached = 1;
- break;
- }
- /*
- * else
- * The packet was sent from an address that is not
- * connected nor attached i.e. it is not from an
- * address that is covered by a link's sub-net,
- * nor is it a already learned host resp.
- */
+ error0 = ARP_ERROR_L3_SRC_ADDRESS_IS_LOCAL;
+ /*
+ * When VPP has an interface whose address is also
+ * applied to a TAP interface on the host, then VPP's
+ * TAP interface will be unnumbered to the 'real'
+ * interface and do proxy ARP from the host.
+ * The curious aspect of this setup is that ARP requests
+ * from the host will come from the VPP's own address.
+ * So don't drop immediately here, instead go see if this
+ * is a proxy ARP case.
+ */
+ goto next_feature;
+ }
+ /* A Source must also be local to subnet of matching
+ * interface address. */
+ if ((FIB_ENTRY_FLAG_ATTACHED & src_flags) ||
+ (FIB_ENTRY_FLAG_CONNECTED & src_flags))
+ {
+ attached = 1;
+ break;
+ }
+ /*
+ * else
+ * The packet was sent from an address that is not
+ * connected nor attached i.e. it is not from an
+ * address that is covered by a link's sub-net,
+ * nor is it a already learned host resp.
+ */
}));
- /* *INDENT-ON* */
/*
* shorter mask lookup for the next iteration.
@@ -532,24 +513,20 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
while (!attached &&
!fib_entry_is_sourced (src_fei, FIB_SOURCE_DEFAULT_ROUTE));
- if (!attached)
+ if (!attached &&
+ !arp_unnumbered (p0, sw_if_index0, conn_sw_if_index0))
{
/*
- * the matching route is a not attached, i.e. it was
- * added as a result of routing, rather than interface/ARP
- * configuration. If the matching route is not a host route
- * (i.e. a /32)
+ * the matching route is a not attached and not unnumbered,
+ * i.e. it was added as a result of routing, rather than
+ * interface/ARP configuration. If the matching route is not
+ * a host route (i.e. a /32)
*/
- error0 = ETHERNET_ARP_ERROR_l3_src_address_not_local;
+ error0 = ARP_ERROR_L3_SRC_ADDRESS_NOT_LOCAL;
goto drop;
}
}
- dst_fei = ip4_fib_table_lookup (ip4_fib_get (fib_index0),
- &arp0->ip4_over_ethernet[1].ip4,
- 32);
- conn_sw_if_index0 = fib_entry_get_any_resolving_interface (dst_fei);
-
switch (arp_dst_fib_check (dst_fei, &dst_flags))
{
case ARP_DST_FIB_ADJ:
@@ -562,18 +539,24 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
* blow our ARP cache
*/
if (conn_sw_if_index0 != sw_if_index0)
- error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
+ error0 = ARP_ERROR_L3_DST_ADDRESS_NOT_LOCAL;
else if (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
arp0->ip4_over_ethernet[1].ip4.as_u32)
- error0 = arp_learn (sw_if_index0,
- &arp0->ip4_over_ethernet[0]);
- goto drop;
+ {
+ vlib_increment_simple_counter (
+ &ip_neighbor_counters[AF_IP4]
+ .ipnc[VLIB_RX][IP_NEIGHBOR_CTR_GRAT],
+ vm->thread_index, sw_if_index0, 1);
+ error0 =
+ arp_learn (sw_if_index0, &arp0->ip4_over_ethernet[0]);
+ }
+ goto next_feature;
case ARP_DST_FIB_CONN:
/* destination is connected, continue to process */
break;
case ARP_DST_FIB_NONE:
/* destination is not connected, stop here */
- error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
+ error0 = ARP_ERROR_L3_DST_ADDRESS_NOT_LOCAL;
goto next_feature;
}
@@ -596,10 +579,18 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
(eth_rx->src_address,
arp0->ip4_over_ethernet[0].mac.bytes) && !is_vrrp_reply0)
{
- error0 = ETHERNET_ARP_ERROR_l2_address_mismatch;
+ error0 = ARP_ERROR_L2_ADDRESS_MISMATCH;
goto drop;
}
+ vlib_increment_simple_counter (
+ &ip_neighbor_counters[AF_IP4]
+ .ipnc[VLIB_RX][arp0->opcode == clib_host_to_net_u16 (
+ ETHERNET_ARP_OPCODE_reply) ?
+ IP_NEIGHBOR_CTR_REPLY :
+ IP_NEIGHBOR_CTR_REQUEST],
+ vm->thread_index, sw_if_index0, 1);
+
/* Learn or update sender's mapping only for replies to addresses
* that are local to the subnet */
if (arp0->opcode ==
@@ -612,7 +603,7 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
/* a reply for a non-local destination could be a GARP.
* GARPs for hosts we know were handled above, so this one
* we drop */
- error0 = ETHERNET_ARP_ERROR_l3_dst_address_not_local;
+ error0 = ARP_ERROR_L3_DST_ADDRESS_NOT_LOCAL;
goto next_feature;
}
@@ -628,37 +619,38 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
sw_if_index0 != fib_entry_get_resolving_interface (src_fei))
{
/*
- * The interface the ARP is sent to or was received on is not the
- * interface on which the covering prefix is configured.
- * Maybe this is a case for unnumbered.
+ * The interface the ARP is sent to or was received on is
+ * not the interface on which the covering prefix is
+ * configured. Maybe this is a case for unnumbered.
*/
if (!arp_unnumbered (p0, sw_if_index0, conn_sw_if_index0))
{
- error0 = ETHERNET_ARP_ERROR_unnumbered_mismatch;
+ error0 = ARP_ERROR_UNNUMBERED_MISMATCH;
goto drop;
}
}
if (arp0->ip4_over_ethernet[0].ip4.as_u32 ==
arp0->ip4_over_ethernet[1].ip4.as_u32)
{
- error0 = ETHERNET_ARP_ERROR_gratuitous_arp;
+ error0 = ARP_ERROR_GRATUITOUS_ARP;
goto drop;
}
- next0 = arp_mk_reply (vnm, p0, sw_if_index0,
- if_addr0, arp0, eth_rx);
+ next0 = arp_mk_reply (vnm, p0, sw_if_index0, if_addr0, arp0, eth_rx);
/* We are going to reply to this request, so, in the absence of
errors, learn the sender */
if (!error0)
error0 = arp_learn (sw_if_index0, &arp0->ip4_over_ethernet[1]);
+ vlib_increment_simple_counter (
+ &ip_neighbor_counters[AF_IP4].ipnc[VLIB_TX][IP_NEIGHBOR_CTR_REPLY],
+ vm->thread_index, sw_if_index0, 1);
n_replies_sent += 1;
goto enqueue;
next_feature:
vnet_feature_next (&next0, p0);
- goto enqueue;
drop:
p0->error = node->errors[error0];
@@ -671,28 +663,21 @@ arp_reply (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
- vlib_error_count (vm, node->node_index,
- ETHERNET_ARP_ERROR_replies_sent, n_replies_sent);
+ vlib_error_count (vm, node->node_index, ARP_ERROR_REPLIES_SENT,
+ n_replies_sent);
return frame->n_vectors;
}
-static char *ethernet_arp_error_strings[] = {
-#define _(sym,string) string,
- foreach_ethernet_arp_error
-#undef _
-};
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (arp_input_node, static) =
{
.function = arp_input,
.name = "arp-input",
.vector_size = sizeof (u32),
- .n_errors = ETHERNET_ARP_N_ERROR,
- .error_strings = ethernet_arp_error_strings,
+ .n_errors = ARP_N_ERROR,
+ .error_counters = arp_error_counters,
.n_next_nodes = ARP_INPUT_N_NEXT,
.next_nodes = {
[ARP_INPUT_NEXT_DROP] = "error-drop",
@@ -707,8 +692,8 @@ VLIB_REGISTER_NODE (arp_disabled_node, static) =
.function = arp_disabled,
.name = "arp-disabled",
.vector_size = sizeof (u32),
- .n_errors = ARP_DISABLED_N_ERROR,
- .error_strings = arp_disabled_error_strings,
+ .n_errors = ARP_N_ERROR,
+ .error_counters = arp_error_counters,
.n_next_nodes = ARP_DISABLED_N_NEXT,
.next_nodes = {
[ARP_INPUT_NEXT_DROP] = "error-drop",
@@ -722,8 +707,8 @@ VLIB_REGISTER_NODE (arp_reply_node, static) =
.function = arp_reply,
.name = "arp-reply",
.vector_size = sizeof (u32),
- .n_errors = ETHERNET_ARP_N_ERROR,
- .error_strings = ethernet_arp_error_strings,
+ .n_errors = ARP_N_ERROR,
+ .error_counters = arp_error_counters,
.n_next_nodes = ARP_REPLY_N_NEXT,
.next_nodes = {
[ARP_REPLY_NEXT_DROP] = "error-drop",
@@ -771,7 +756,6 @@ VNET_FEATURE_INIT (arp_drop_feat_node, static) =
.runs_before = 0, /* last feature */
};
-/* *INDENT-ON* */
typedef struct
{
@@ -870,7 +854,7 @@ VNET_SW_INTERFACE_ADD_DEL_FUNCTION (vnet_arp_add_del_sw_interface);
const static ip_neighbor_vft_t arp_vft = {
.inv_proxy4_add = arp_proxy_add,
.inv_proxy4_del = arp_proxy_del,
- .inv_proxy4_enable = arp_proxy_disable,
+ .inv_proxy4_enable = arp_proxy_enable,
.inv_proxy4_disable = arp_proxy_disable,
};
@@ -896,12 +880,39 @@ ethernet_arp_init (vlib_main_t * vm)
vlib_node_runtime_t *rt =
vlib_node_get_runtime (vm, arp_input_node.index);
-#define _(a,b) \
- vnet_pcap_drop_trace_filter_add_del \
- (rt->errors[ETHERNET_ARP_ERROR_##a], \
- 1 /* is_add */);
- foreach_ethernet_arp_error
-#undef _
+ vnet_pcap_drop_trace_filter_add_del (rt->errors[ARP_ERROR_REPLIES_SENT],
+ 1);
+ vnet_pcap_drop_trace_filter_add_del (rt->errors[ARP_ERROR_DISABLED], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_L2_TYPE_NOT_ETHERNET], 1);
+ vnet_pcap_drop_trace_filter_add_del (rt->errors[ARP_ERROR_L3_TYPE_NOT_IP4],
+ 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_L3_SRC_ADDRESS_NOT_LOCAL], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_L3_DST_ADDRESS_NOT_LOCAL], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_L3_DST_ADDRESS_UNSET], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_L3_SRC_ADDRESS_IS_LOCAL], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_L3_SRC_ADDRESS_LEARNED], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_REPLIES_RECEIVED], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_OPCODE_NOT_REQUEST], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_PROXY_ARP_REPLIES_SENT], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_L2_ADDRESS_MISMATCH], 1);
+ vnet_pcap_drop_trace_filter_add_del (rt->errors[ARP_ERROR_GRATUITOUS_ARP],
+ 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_INTERFACE_NO_TABLE], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_INTERFACE_NOT_IP_ENABLED], 1);
+ vnet_pcap_drop_trace_filter_add_del (
+ rt->errors[ARP_ERROR_UNNUMBERED_MISMATCH], 1);
}
{
@@ -916,13 +927,11 @@ ethernet_arp_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ethernet_arp_init) =
{
.runs_after = VLIB_INITS("ethernet_init",
"ip_neighbor_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/arp/arp.h b/src/vnet/arp/arp.h
index 7446564b0cf..f8cab8ae78d 100644
--- a/src/vnet/arp/arp.h
+++ b/src/vnet/arp/arp.h
@@ -19,32 +19,7 @@
#include <vnet/ethernet/ethernet.h>
#include <vnet/ip/ip.h>
#include <vnet/ethernet/arp_packet.h>
-
-#define foreach_ethernet_arp_error \
- _ (replies_sent, "ARP replies sent") \
- _ (l2_type_not_ethernet, "L2 type not ethernet") \
- _ (l3_type_not_ip4, "L3 type not IP4") \
- _ (l3_src_address_not_local, "IP4 source address not local to subnet") \
- _ (l3_dst_address_not_local, "IP4 destination address not local to subnet") \
- _ (l3_dst_address_unset, "IP4 destination address is unset") \
- _ (l3_src_address_is_local, "IP4 source address matches local interface") \
- _ (l3_src_address_learned, "ARP request IP4 source address learned") \
- _ (replies_received, "ARP replies received") \
- _ (opcode_not_request, "ARP opcode not request") \
- _ (proxy_arp_replies_sent, "Proxy ARP replies sent") \
- _ (l2_address_mismatch, "ARP hw addr does not match L2 frame src addr") \
- _ (gratuitous_arp, "ARP probe or announcement dropped") \
- _ (interface_no_table, "Interface is not mapped to an IP table") \
- _ (interface_not_ip_enabled, "Interface is not IP enabled") \
- _ (unnumbered_mismatch, "RX interface is unnumbered to different subnet") \
-
-typedef enum
-{
-#define _(sym,string) ETHERNET_ARP_ERROR_##sym,
- foreach_ethernet_arp_error
-#undef _
- ETHERNET_ARP_N_ERROR,
-} ethernet_arp_reply_error_t;
+#include <vnet/arp/arp.api_enum.h>
extern int arp_proxy_add (u32 fib_index,
const ip4_address_t * lo_addr,
diff --git a/src/vnet/arp/arp_packet.h b/src/vnet/arp/arp_packet.h
index a860c258f75..66ab384a33e 100644
--- a/src/vnet/arp/arp_packet.h
+++ b/src/vnet/arp/arp_packet.h
@@ -68,6 +68,8 @@ arp_mk_reply (vnet_main_t * vnm,
clib_mem_unaligned (&arp0->ip4_over_ethernet[0].ip4.data_u32, u32) =
if_addr0->data_u32;
+ p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+
/* Hardware must be ethernet-like. */
ASSERT (vec_len (hw_if0->hw_address) == 6);
diff --git a/src/vnet/arp/arp_proxy.c b/src/vnet/arp/arp_proxy.c
index e3f5b4ae67b..39f624d5a1d 100644
--- a/src/vnet/arp/arp_proxy.c
+++ b/src/vnet/arp/arp_proxy.c
@@ -223,7 +223,6 @@ set_arp_proxy (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
/*?
* Enable proxy-arp on an interface. The vpp stack will answer ARP
* requests for the indicated address range. Multiple proxy-arp
@@ -249,15 +248,12 @@ VLIB_CLI_COMMAND (set_int_proxy_enable_command, static) = {
"set interface proxy-arp <intfc> [enable|disable]",
.function = set_int_proxy_arp_command_fn,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_arp_proxy_command, static) = {
.path = "set arp proxy",
.short_help = "set arp proxy [del] table-ID <table-ID> start <start-address> end <end-addres>",
.function = set_arp_proxy,
};
-/* *INDENT-ON* */
typedef struct
{
@@ -326,14 +322,14 @@ arp_proxy (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
is_request0 = arp0->opcode
== clib_host_to_net_u16 (ETHERNET_ARP_OPCODE_request);
- error0 = ETHERNET_ARP_ERROR_replies_sent;
+ error0 = ARP_ERROR_REPLIES_SENT;
sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
next0 = ARP_REPLY_NEXT_DROP;
fib_index0 = ip4_fib_table_get_index_for_sw_if_index (sw_if_index0);
if (~0 == fib_index0)
{
- error0 = ETHERNET_ARP_ERROR_interface_no_table;
+ error0 = ARP_ERROR_INTERFACE_NO_TABLE;
}
if (0 == error0 && is_request0)
@@ -376,28 +372,28 @@ arp_proxy (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
- vlib_error_count (vm, node->node_index,
- ETHERNET_ARP_ERROR_replies_sent, n_arp_replies_sent);
+ vlib_error_count (vm, node->node_index, ARP_ERROR_REPLIES_SENT,
+ n_arp_replies_sent);
return frame->n_vectors;
}
-static char *ethernet_arp_error_strings[] = {
-#define _(sym,string) string,
- foreach_ethernet_arp_error
-#undef _
-};
-
VLIB_REGISTER_NODE (arp_proxy_node, static) =
{
- .function = arp_proxy,.name = "arp-proxy",.vector_size =
- sizeof (u32),.n_errors = ETHERNET_ARP_N_ERROR,.error_strings =
- ethernet_arp_error_strings,.n_next_nodes = ARP_REPLY_N_NEXT,.next_nodes =
+ .function = arp_proxy,
+ .name = "arp-proxy",
+ .vector_size = sizeof (u32),
+ .n_errors = ARP_N_ERROR,
+ .error_counters = arp_error_counters,
+ .n_next_nodes = ARP_REPLY_N_NEXT,
+ .next_nodes =
{
- [ARP_REPLY_NEXT_DROP] = "error-drop",
- [ARP_REPLY_NEXT_REPLY_TX] = "interface-output",}
-,.format_buffer = format_ethernet_arp_header,.format_trace =
- format_ethernet_arp_input_trace,};
+ [ARP_REPLY_NEXT_DROP] = "error-drop",
+ [ARP_REPLY_NEXT_REPLY_TX] = "interface-output",
+ },
+ .format_buffer = format_ethernet_arp_header,
+ .format_trace = format_ethernet_arp_input_trace,
+};
static clib_error_t *
show_ip4_arp (vlib_main_t * vm,
@@ -435,13 +431,11 @@ show_ip4_arp (vlib_main_t * vm,
* Fib_index 0 6.0.0.1 - 6.0.0.11
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip4_arp_command, static) = {
.path = "show arp proxy",
.function = show_ip4_arp,
.short_help = "show ip arp",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/arp/arp_test.c b/src/vnet/arp/arp_test.c
index 29eeeb59d0f..9eaea91c709 100644
--- a/src/vnet/arp/arp_test.c
+++ b/src/vnet/arp/arp_test.c
@@ -43,7 +43,7 @@ uword unformat_sw_if_index (unformat_input_t * input, va_list * args);
/* Declare message IDs */
#include <vnet/arp/arp.api_enum.h>
#include <vnet/arp/arp.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
static int
api_proxy_arp_dump (vat_main_t * vam)
@@ -158,8 +158,6 @@ api_proxy_arp_intfc_enable_disable (vat_main_t * vam)
#include <vnet/arp/arp.api_test.c>
-VAT_REGISTER_FEATURE_FUNCTION (vat_arp_plugin_register);
-
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/bfd/bfd.api b/src/vnet/bfd/bfd.api
index f53cc7630fd..d3b3ed21a26 100644
--- a/src/vnet/bfd/bfd.api
+++ b/src/vnet/bfd/bfd.api
@@ -107,6 +107,26 @@ autoreply define bfd_udp_add
u8 bfd_key_id;
u32 conf_key_id;
};
+define bfd_udp_upd
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u32 desired_min_tx;
+ u32 required_min_rx;
+ vl_api_address_t local_addr;
+ vl_api_address_t peer_addr;
+ u8 detect_mult;
+ bool is_authenticated;
+ u8 bfd_key_id;
+ u32 conf_key_id;
+};
+define bfd_udp_upd_reply
+{
+ u32 context;
+ i32 retval;
+ u32 stats_index;
+};
/** \brief Modify UDP BFD session on interface
@param client_index - opaque cookie to identify the sender
@@ -339,6 +359,107 @@ autoreply define bfd_udp_auth_deactivate
bool is_delayed;
};
+/* must be compatible with bfd_error_t */
+counters bfd_udp {
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "OK";
+ };
+ bad {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bad packet";
+ };
+ disabled {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bfd packets received on disabled interfaces";
+ };
+ version {
+ severity error;
+ type counter64;
+ units "packets";
+ description "version";
+ };
+ length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "too short";
+ };
+ detect_multi {
+ severity error;
+ type counter64;
+ units "packets";
+ description "detect-multi";
+ };
+ multi_point {
+ severity error;
+ type counter64;
+ units "packets";
+ description "multi-point";
+ };
+ my_disc {
+ severity error;
+ type counter64;
+ units "packets";
+ description "my-disc";
+ };
+ your_disc {
+ severity error;
+ type counter64;
+ units "packets";
+ description "your-disc";
+ };
+ admin_down {
+ severity error;
+ type counter64;
+ units "packets";
+ description "session admin-down";
+ };
+ no_session {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no-session";
+ };
+ failed_verification {
+ severity error;
+ type counter64;
+ units "packets";
+ description "failed-verification";
+ };
+ src_mismatch {
+ severity error;
+ type counter64;
+ units "packets";
+ description "src-mismatch";
+ };
+ dst_mismatch {
+ severity error;
+ type counter64;
+ units "packets";
+ description "dst-mismatch";
+ };
+ ttl {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ttl";
+ };
+};
+
+paths {
+ "/err/bfd-udp4-input" "bfd";
+ "/err/bfd-udp6-input" "bfd";
+ "/err/bfd-udp4-echo-input" "bfd";
+ "/err/bfd-udp6-echo-input" "bfd";
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/bfd/bfd_api.c b/src/vnet/bfd/bfd_api.c
index 0ae8508f865..816e71081ff 100644
--- a/src/vnet/bfd/bfd_api.c
+++ b/src/vnet/bfd/bfd_api.c
@@ -71,6 +71,27 @@ vl_api_bfd_udp_add_t_handler (vl_api_bfd_udp_add_t * mp)
}
static void
+vl_api_bfd_udp_upd_t_handler (vl_api_bfd_udp_add_t *mp)
+{
+ vl_api_bfd_udp_upd_reply_t *rmp;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ BFD_UDP_API_PARAM_COMMON_CODE;
+
+ rv = bfd_udp_upd_session (
+ BFD_UDP_API_PARAM_FROM_MP (mp), clib_net_to_host_u32 (mp->desired_min_tx),
+ clib_net_to_host_u32 (mp->required_min_rx), mp->detect_mult,
+ mp->is_authenticated, clib_net_to_host_u32 (mp->conf_key_id),
+ mp->bfd_key_id);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO2 (VL_API_BFD_UDP_UPD_REPLY,
+ ({ rmp->stats_index = clib_host_to_net_u32 (0); }));
+}
+
+static void
vl_api_bfd_udp_mod_t_handler (vl_api_bfd_udp_mod_t * mp)
{
vl_api_bfd_udp_mod_reply_t *rmp;
@@ -196,7 +217,6 @@ bfd_event (bfd_main_t * bm, bfd_session_t * bs)
vpe_api_main_t *vam = &vpe_api_main;
vpe_client_registration_t *reg;
vl_api_registration_t *vl_reg;
- /* *INDENT-OFF* */
pool_foreach (reg, vam->bfd_events_registrations) {
vl_reg = vl_api_client_index_to_registration (reg->client_index);
if (vl_reg)
@@ -210,7 +230,6 @@ bfd_event (bfd_main_t * bm, bfd_session_t * bs)
}
}
}
- /* *INDENT-ON* */
}
static void
@@ -223,13 +242,11 @@ vl_api_bfd_udp_session_dump_t_handler (vl_api_bfd_udp_session_dump_t * mp)
return;
bfd_session_t *bs = NULL;
- /* *INDENT-OFF* */
pool_foreach (bs, bfd_main.sessions) {
if (bs->transport == BFD_TRANSPORT_UDP4 ||
bs->transport == BFD_TRANSPORT_UDP6)
send_bfd_udp_session_details (reg, mp->context, bs);
}
- /* *INDENT-ON* */
}
static void
@@ -280,7 +297,6 @@ vl_api_bfd_auth_keys_dump_t_handler (vl_api_bfd_auth_keys_dump_t * mp)
bfd_auth_key_t *key = NULL;
vl_api_bfd_auth_keys_details_t *rmp = NULL;
- /* *INDENT-OFF* */
pool_foreach (key, bfd_main.auth_keys) {
rmp = vl_msg_api_alloc (sizeof (*rmp));
clib_memset (rmp, 0, sizeof (*rmp));
@@ -291,7 +307,6 @@ vl_api_bfd_auth_keys_dump_t_handler (vl_api_bfd_auth_keys_dump_t * mp)
rmp->use_count = clib_host_to_net_u32 (key->use_count);
vl_api_send_msg (reg, (u8 *)rmp);
}
- /* *INDENT-ON* */
}
static void
@@ -373,7 +388,6 @@ vl_api_bfd_udp_get_echo_source_t_handler (vl_api_bfd_udp_get_echo_source_t *
bfd_udp_get_echo_source (&is_set, &sw_if_index, &have_usable_ip4, &ip4,
&have_usable_ip6, &ip6);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_BFD_UDP_GET_ECHO_SOURCE_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
@@ -407,7 +421,6 @@ vl_api_bfd_udp_get_echo_source_t_handler (vl_api_bfd_udp_get_echo_source_t *
rmp->have_usable_ip6 = false;
}
}))
- /* *INDENT-ON* */
}
#include <vnet/bfd/bfd.api.c>
diff --git a/src/vnet/bfd/bfd_api.h b/src/vnet/bfd/bfd_api.h
index 2a6c69b78b6..f051e6b679c 100644
--- a/src/vnet/bfd/bfd_api.h
+++ b/src/vnet/bfd/bfd_api.h
@@ -45,6 +45,15 @@ bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr,
u8 bfd_key_id);
/**
+ * @brief create a new or modify and existing bfd session
+ */
+vnet_api_error_t
+bfd_udp_upd_session (u32 sw_if_index, const ip46_address_t *local_addr,
+ const ip46_address_t *peer_addr, u32 desired_min_tx_usec,
+ u32 required_min_rx_usec, u8 detect_mult,
+ u8 is_authenticated, u32 conf_key_id, u8 bfd_key_id);
+
+/**
* @brief modify existing session
*/
vnet_api_error_t
diff --git a/src/vnet/bfd/bfd_cli.c b/src/vnet/bfd/bfd_cli.c
index 1d100b077eb..33942bb89e6 100644
--- a/src/vnet/bfd/bfd_cli.c
+++ b/src/vnet/bfd/bfd_cli.c
@@ -134,12 +134,10 @@ show_bfd (vlib_main_t * vm, unformat_input_t * input,
bfd_auth_key_t *key = NULL;
u8 *s = format (NULL, "%=10s %=25s %=10s\n", "Configuration Key ID",
"Type", "Use Count");
- /* *INDENT-OFF* */
pool_foreach (key, bm->auth_keys) {
s = format (s, "%10u %-25s %10u\n", key->conf_key_id,
bfd_auth_type_str (key->auth_type), key->use_count);
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "%v\n", s);
vec_free (s);
vlib_cli_output (vm, "Number of configured BFD keys: %lu\n",
@@ -149,11 +147,9 @@ show_bfd (vlib_main_t * vm, unformat_input_t * input,
{
u8 *s = format (NULL, "%=10s %=32s %=20s %=20s\n", "Index", "Property",
"Local value", "Remote value");
- /* *INDENT-OFF* */
pool_foreach (bs, bm->sessions) {
s = format (s, "%U", format_bfd_session_cli, vm, bs);
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "%v", s);
vec_free (s);
vlib_cli_output (vm, "Number of configured BFD sessions: %lu\n",
@@ -212,13 +208,11 @@ show_bfd (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_bfd_command, static) = {
.path = "show bfd",
.short_help = "show bfd [keys|sessions|echo-source]",
.function = show_bfd,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_key_add (vlib_main_t * vm, unformat_input_t * input,
@@ -310,7 +304,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_key_add_command, static) = {
.path = "bfd key set",
.short_help = "bfd key set"
@@ -319,7 +312,6 @@ VLIB_CLI_COMMAND (bfd_cli_key_add_command, static) = {
" secret <secret>",
.function = bfd_cli_key_add,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_key_del (vlib_main_t * vm, unformat_input_t * input,
@@ -355,13 +347,11 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_key_del_command, static) = {
.path = "bfd key del",
.short_help = "bfd key del conf-key-id <id>",
.function = bfd_cli_key_del,
};
-/* *INDENT-ON* */
#define INTERFACE_STR "interface"
#define LOCAL_ADDR_STR "local-addr"
@@ -397,23 +387,30 @@ WARN_OFF(tautological-compare) \
goto out; \
}
+static uword
+bfd_cli_unformat_ip46_address (unformat_input_t *input, va_list *args)
+{
+ ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
+ return unformat_user (input, unformat_ip46_address, ip46, IP46_TYPE_ANY);
+}
+
static clib_error_t *
bfd_cli_udp_session_add (vlib_main_t * vm, unformat_input_t * input,
CLIB_UNUSED (vlib_cli_command_t * lmd))
{
clib_error_t *ret = NULL;
unformat_input_t _line_input, *line_input = &_line_input;
-#define foreach_bfd_cli_udp_session_add_cli_param(F) \
- F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
- unformat_vnet_sw_interface, &vnet_main) \
- F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \
- F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \
- F (u32, detect_mult, DETECT_MULT_STR, mandatory, "%u") \
- F (u32, conf_key_id, CONF_KEY_ID_STR, optional, "%u") \
+#define foreach_bfd_cli_udp_session_add_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \
+ F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \
+ F (u32, detect_mult, DETECT_MULT_STR, mandatory, "%u") \
+ F (u32, conf_key_id, CONF_KEY_ID_STR, optional, "%u") \
F (u32, bfd_key_id, BFD_KEY_ID_STR, optional, "%u")
foreach_bfd_cli_udp_session_add_cli_param (DECLARE);
@@ -477,7 +474,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_session_add_command, static) = {
.path = "bfd udp session add",
.short_help = "bfd udp session add"
@@ -493,7 +489,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_add_command, static) = {
"]",
.function = bfd_cli_udp_session_add,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_udp_session_mod (vlib_main_t * vm, unformat_input_t * input,
@@ -501,15 +496,15 @@ bfd_cli_udp_session_mod (vlib_main_t * vm, unformat_input_t * input,
{
clib_error_t *ret = NULL;
unformat_input_t _line_input, *line_input = &_line_input;
-#define foreach_bfd_cli_udp_session_mod_cli_param(F) \
- F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
- unformat_vnet_sw_interface, &vnet_main) \
- F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \
- F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \
+#define foreach_bfd_cli_udp_session_mod_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (u32, desired_min_tx, DESIRED_MIN_TX_STR, mandatory, "%u") \
+ F (u32, required_min_rx, REQUIRED_MIN_RX_STR, mandatory, "%u") \
F (u32, detect_mult, DETECT_MULT_STR, mandatory, "%u")
foreach_bfd_cli_udp_session_mod_cli_param (DECLARE);
@@ -556,7 +551,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_session_mod_command, static) = {
.path = "bfd udp session mod",
.short_help = "bfd udp session mod interface"
@@ -568,7 +562,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_mod_command, static) = {
" <detect multiplier> ",
.function = bfd_cli_udp_session_mod,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_udp_session_del (vlib_main_t * vm, unformat_input_t * input,
@@ -576,13 +569,13 @@ bfd_cli_udp_session_del (vlib_main_t * vm, unformat_input_t * input,
{
clib_error_t *ret = NULL;
unformat_input_t _line_input, *line_input = &_line_input;
-#define foreach_bfd_cli_udp_session_del_cli_param(F) \
- F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
- unformat_vnet_sw_interface, &vnet_main) \
- F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address)
+#define foreach_bfd_cli_udp_session_del_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address)
foreach_bfd_cli_udp_session_del_cli_param (DECLARE);
@@ -620,7 +613,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_session_del_command, static) = {
.path = "bfd udp session del",
.short_help = "bfd udp session del interface"
@@ -629,7 +621,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_del_command, static) = {
"<peer-address> ",
.function = bfd_cli_udp_session_del,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_udp_session_set_flags (vlib_main_t * vm, unformat_input_t * input,
@@ -637,14 +628,14 @@ bfd_cli_udp_session_set_flags (vlib_main_t * vm, unformat_input_t * input,
{
clib_error_t *ret = NULL;
unformat_input_t _line_input, *line_input = &_line_input;
-#define foreach_bfd_cli_udp_session_set_flags_cli_param(F) \
- F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
- unformat_vnet_sw_interface, &vnet_main) \
- F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (u8 *, admin_up_down_token, ADMIN_STR, mandatory, "%v", \
+#define foreach_bfd_cli_udp_session_set_flags_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (u8 *, admin_up_down_token, ADMIN_STR, mandatory, "%v", \
&admin_up_down_token)
foreach_bfd_cli_udp_session_set_flags_cli_param (DECLARE);
@@ -702,7 +693,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_session_set_flags_command, static) = {
.path = "bfd udp session set-flags",
.short_help = "bfd udp session set-flags"
@@ -712,7 +702,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_set_flags_command, static) = {
" admin <up|down>",
.function = bfd_cli_udp_session_set_flags,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_udp_session_auth_activate (vlib_main_t * vm,
@@ -721,15 +710,15 @@ bfd_cli_udp_session_auth_activate (vlib_main_t * vm,
{
clib_error_t *ret = NULL;
unformat_input_t _line_input, *line_input = &_line_input;
-#define foreach_bfd_cli_udp_session_auth_activate_cli_param(F) \
- F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
- unformat_vnet_sw_interface, &vnet_main) \
- F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (u8 *, delayed_token, DELAYED_STR, optional, "%v") \
- F (u32, conf_key_id, CONF_KEY_ID_STR, mandatory, "%u") \
+#define foreach_bfd_cli_udp_session_auth_activate_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (u8 *, delayed_token, DELAYED_STR, optional, "%v") \
+ F (u32, conf_key_id, CONF_KEY_ID_STR, mandatory, "%u") \
F (u32, bfd_key_id, BFD_KEY_ID_STR, mandatory, "%u")
foreach_bfd_cli_udp_session_auth_activate_cli_param (DECLARE);
@@ -799,7 +788,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_activate_command, static) = {
.path = "bfd udp session auth activate",
.short_help = "bfd udp session auth activate"
@@ -818,13 +806,13 @@ bfd_cli_udp_session_auth_deactivate (vlib_main_t *vm, unformat_input_t *input,
{
clib_error_t *ret = NULL;
unformat_input_t _line_input, *line_input = &_line_input;
-#define foreach_bfd_cli_udp_session_auth_deactivate_cli_param(F) \
- F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
- unformat_vnet_sw_interface, &vnet_main) \
- F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
- F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
- unformat_ip46_address) \
+#define foreach_bfd_cli_udp_session_auth_deactivate_cli_param(F) \
+ F (u32, sw_if_index, INTERFACE_STR, mandatory, "%U", \
+ unformat_vnet_sw_interface, &vnet_main) \
+ F (ip46_address_t, local_addr, LOCAL_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
+ F (ip46_address_t, peer_addr, PEER_ADDR_STR, mandatory, "%U", \
+ bfd_cli_unformat_ip46_address) \
F (u8 *, delayed_token, DELAYED_STR, optional, "%v")
foreach_bfd_cli_udp_session_auth_deactivate_cli_param (DECLARE);
@@ -884,7 +872,6 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_deactivate_command, static) = {
.path = "bfd udp session auth deactivate",
.short_help = "bfd udp session auth deactivate"
@@ -894,7 +881,6 @@ VLIB_CLI_COMMAND (bfd_cli_udp_session_auth_deactivate_command, static) = {
"[ delayed <yes|no> ]",
.function = bfd_cli_udp_session_auth_deactivate,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_udp_set_echo_source (vlib_main_t * vm, unformat_input_t * input,
@@ -941,13 +927,11 @@ out:
return ret;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_set_echo_source_cmd, static) = {
.path = "bfd udp echo-source set",
.short_help = "bfd udp echo-source set interface <interface>",
.function = bfd_cli_udp_set_echo_source,
};
-/* *INDENT-ON* */
static clib_error_t *
bfd_cli_udp_del_echo_source (vlib_main_t * vm, unformat_input_t * input,
@@ -964,13 +948,11 @@ bfd_cli_udp_del_echo_source (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bfd_cli_udp_del_echo_source_cmd, static) = {
.path = "bfd udp echo-source del",
.short_help = "bfd udp echo-source del",
.function = bfd_cli_udp_del_echo_source,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/bfd/bfd_doc.md b/src/vnet/bfd/bfd_doc.md
deleted file mode 100644
index 7d7606e4dd1..00000000000
--- a/src/vnet/bfd/bfd_doc.md
+++ /dev/null
@@ -1,374 +0,0 @@
-# BFD module {#bfd_doc}
-
-## Overview
-
-Bidirectional Forwarding Detection in VPP currently supports single-hop UDP
-transport based on RFC 5880 and RFC 5881.
-
-## Usage
-
-### General usage
-
-BFD sessions are created using APIs only. The following CLIs are implemented,
-which call the APIs to manipulate the BFD:
-
-#### Show commands:
-
-> show bfd [keys|sessions|echo-source]
-
-Show the existing keys, sessions or echo-source.
-
-#### Key manipulation
-
-##### Create a new key or modify an existing key
-
-> bfd key set conf-key-id <id> type <keyed-sha1|meticulous-keyed-sha1> secret <secret>
-
-Parameters:
-
-* conf-key-id - local configuration key ID, used to uniquely identify this key
-* type - type of the key
-* secret - shared secret (hex data)
-
-Example:
-
-> bfd key set conf-key-id 2368880803 type meticulous-keyed-sha1 secret 69d685b0d990cdba46872706dc
-
-Notes:
-
-* in-use key cannot be modified
-
-##### Delete an existing key
-
-> bfd key del conf-key-id <id>
-
-Parameters:
-
-* conf-key-id - local configuration key ID, used to uniquely identify this key
-
-Example:
-
-> bfd key del conf-key-id 2368880803
-
-Notes:
-
-* in-use key cannot be deleted
-
-##### Create a new (plain or authenticated) BFD session
-
-> bfd udp session add interface <interface> local-addr <address> peer-addr <address> desired-min-tx <interval> required-min-rx <interval> detect-mult <multiplier> [ conf-key-id <ID> bfd-key-id <ID> ]
-
-Parameters:
-
-* interface - interface to which this session is tied to
-* local-addr - local address (ipv4 or ipv6)
-* peer-addr - peer address (ipv4 or ipv6, must match local-addr family)
-* desired-min-tx - desired minimum tx interval (microseconds)
-* required-min-rx - required minimum rx interval (microseconds)
-* detect-mult - detect multiplier (must be non-zero)
-* conf-key-id - local configuration key ID
-* bfd-key-id - BFD key ID, as carried in BFD control frames
-
-Example:
-
-> bfd udp session add interface pg0 local-addr fd01:1::1 peer-addr fd01:1::2 desired-min-tx 100000 required-min-rx 100000 detect-mult 3 conf-key-id 1029559112 bfd-key-id 13
-
-Notes:
-
-* if conf-key-id and bfd-key-id are not specified, session is non-authenticated
-* desired-min-tx controls desired transmission rate of both control frames and echo packets
-
-##### Modify BFD session
-
-> bfd udp session mod interface <interface> local-addr <address> peer-addr <address> desired-min-tx <interval> required-min-rx <interval> detect-mult <multiplier>
-
-Parameters:
-
-* interface - interface to which this session is tied to
-* local-addr - local address (ipv4 or ipv6)
-* peer-addr - peer address (ipv4 or ipv6, must match local-addr family)
-* desired-min-tx - desired minimum tx interval (microseconds)
-* required-min-rx - required minimum rx interval (microseconds)
-* detect-mult - detect multiplier (must be non-zero)
-
-Example:
-
-> bfd udp session mod interface pg0 local-addr 172.16.1.1 peer-addr 172.16.1.2 desired-min-tx 300000 required-min-rx 200000 detect-mult 12
-
-Notes:
-
-* desired-min-tx controls desired transmission rate of both control frames and echo packets
-
-##### Delete an existing BFD session
-
-> bfd udp session del interface <interface> local-addr <address> peer-addr<address>
-
-Parameters:
-
-* interface - interface to which this session is tied to
-* local-addr - local address (ipv4 or ipv6)
-* peer-addr - peer address (ipv4 or ipv6, must match local-addr family)
-
-Example:
-
-> bfd udp session del interface pg0 local-addr 172.16.1.1 peer-addr 172.16.1.2
-
-##### Set session admin-up or admin-down
-
-> bfd udp session set-flags interface <interface> local-addr <address> peer-addr <address> admin <up|down>
-
-Parameters:
-
-* interface - interface to which this session is tied to
-* local-addr - local address (ipv4 or ipv6)
-* peer-addr - peer address (ipv4 or ipv6, must match local-addr family)
-* admin - up/down based on desired action
-
-Example:
-
-> bfd udp session set-flags admin down interface pg0 local-addr 172.16.1.1 peer-addr 172.16.1.2
-
-##### Activate/change authentication for existing session
-
-> bfd udp session auth activate interface <interface> local-addr <address> peer-addr <address> conf-key-id <ID> bfd-key-id <ID> [ delayed <yes|no> ]
-
-Parameters:
-
-* interface - interface to which this session is tied to
-* local-addr - local address (ipv4 or ipv6)
-* peer-addr - peer address (ipv4 or ipv6, must match local-addr family)
-* conf-key-id - local configuration key ID
-* bfd-key-id - BFD key ID, as carried in BFD control frames
-* delayed - is yes then this action is delayed until the peer performs the same action
-
-Example:
-
-> bfd udp session auth activate interface pg0 local-addr 172.16.1.1 peer-addr 172.16.1.2 conf-key-id 540928695 bfd-key-id 239 delayed yes
-
-Notes:
-
-* see [Delayed option] for more information
-
-##### Deactivate authentication for existing session
-
-> bfd udp session auth deactivate interface <interface> local-addr <address> peer-addr <address> [ delayed <yes|no> ]
-
-Parameters:
-
-* interface - interface to which this session is tied to
-* local-addr - local address (ipv4 or ipv6)
-* peer-addr - peer address (ipv4 or ipv6, must match local-addr family)
-* delayed - is yes then this action is delayed until the peer performs the same action
-
-Example:
-
-> bfd udp session auth deactivate interface pg0 local-addr 172.16.1.1 peer-addr 172.16.1.2
-
-Notes:
-
-* see [Delayed option] for more information
-
-##### Set echo-source interface
-
-> bfd udp echo-source set interface <interface>
-
-Parameters:
-
-* interface - interface used for getting source address for echo packets
-
-Example:
-
-> bfd udp echo-source set interface loop0
-
-##### Delete echo-source interface
-
-> bfd udp echo-source del
-
-Example:
-
-> bfd udp echo-source del
-
-### Authentication
-
-BFD sessions should be authenticated for security purposes. SHA1 and meticulous
-SHA1 authentication is supported by VPP. First, authentication keys are
-configured in VPP and afterwards they can be used by sessions.
-
-There are two key IDs in the scope of BFD session:
-
-* configuration key ID is the internal unique key ID inside VPP and is never
- communicated to any peer, it serves only the purpose of identifying the key
-* BFD key ID is the key ID carried in BFD control frames and is used for
- verifying authentication
-
-#### Turning auth on/off
-
-Authentication can be turned on or off at any time. Care must be taken however,
-to either synchronize the authentication manipulation with peer's actions
-to avoid the session going down.
-
-##### Delayed option
-
-Delayed option is useful for synchronizing authentication changes with a peer.
-If it's specified, then authentication change is not performed immediately.
-In this case, VPP continues to transmit packets using the old authentication
-method (unauthenticated or using old sha1 key). If a packet is received, which
-does not pass the current authentication, then VPP tries to authenticate it
-using the new method (which might be none, if deactivating authentication)
-and if it passes, then the new authentication method is put in use.
-
-The recommended procedure for enabling/changing/disabling session
-authentication is:
-
-1. perform authentication change on vpp's side with delayed option set to yes
-2. perform authentication change on peer's side (without delayed option)
-
-Notes:
-
-* if both peers use delayed option at the same time, the change will never
- be carried out, since none of the peers will see any packet with the new
- authentication which could trigger the change
-* remote peer does not need to support or even be aware of this mechanism
- for it to work properly
-
-
-### Echo function
-
-Echo function is used by VPP whenever a peer declares the willingness
-to support it, echo-source is set and it contains a usable subnet (see below).
-When echo function is switched on, the required min rx interval advertised
-to peer is set to 1 second (or the configured value, if its higher).
-
-#### Echo source address
-
-Because echo packets are only looped back (and not processed in any way)
-by a peer, it's necessary to set the source address in a way which avoids
-packet drop due to spoofing protection by VPP. Per RFC, the source address
-should not be in the subnet set on the interface over which the echo packets
-are sent. Also, it must not be any VPP-local address, otherwise the packet
-gets dropped on receipt by VPP. The solution is to create a loopback interface
-with a (private) IPv4/IPv6 subnet assigned as echo-source. The BFD then picks
-an unused address from the subnet by flipping the last bit and uses that as
-source address in the echo packets, thus meeting RFC recommendation while
-avoiding spoofing protection.
-
-Example: if 10.10.10.3/31 is the subnet, then 10.10.10.2 will be used as
- source address in (IPv4) echo packets
-
-### Demand mode
-
-Demand mode is respected by VPP, but not used locally. The only scenario when
-demand mode could make sense currently is when echo is active. Because echo
-packets are inherently insecure against an adversary looping them back a poll
-sequence would be required for slow periodic connectivity verification anyway.
-It's more efficient to just ask the remote peer to send slow periodic control
-frames without VPP initiating periodic poll sequences.
-
-### Admin-down
-
-Session may be put admin-down at any time. This immediately causes the state
-to be changed to AdminDown and remain so unless the session is put admin-up.
-
-## BFD implementation notes
-
-Because BFD can work over different transport layers, the BFD code is separated
-into core BFD functionality - main module implemented in bfd_main.c
-and transport-specific code implemented in bfd_udp.c.
-
-### Main module
-
-Main module is responsible for handling all the BFD functionality defined
-in RFC 5880.
-
-#### Internal API
-
-Internal APIs defined in bfd_main.h are called from transport-specific code
-to create/modify/delete
-
-#### Packet receipt
-
-When a packet is received by the transport layer, it is forwarded to main
-module (to main thread) via an RPC call. At this point, the authentication has
-been verified, so the packet is consumed, session parameters are updated
-accordingly and state change (if applicable). Based on these, the timeouts
-are adjusted if required and an event is sent to the process node to wake up
-and recalculate sleep time.
-
-#### Packet transmit
-
-Main module allocates a vlib_buffer_t, creates the required BFD frame (control
-or echo in it), then calls the transport layer to add the transport layer.
-Then a frame containing the buffer to the aprropriate node is created
-and enqueued.
-
-#### Process node
-
-Main module implements one process node which is a simple loop. The process
-node gets next timeout from the timer wheel, sleeps until the timeout expires
-and then calls a timeout routine which drives the state machine for each
-session which timed out. The sleep is interrupted externally via vlib event,
-when a session is added or modified in a way which might require timer wheel
-manipulation. In this case the caller inserts the necessary timeout to timer
-wheel and then signals the process node to wake up early, handle possible
-timeouts and recalculate the sleep time again.
-
-#### State machine
-
-Default state of BFD session when created is Down, per RFC 5880. State changes
-to Init, Up or Down based on events like received state from peer and timeouts.
-The session state can be set AdminDown using a binary API, which prevents it
-from going to any other state, until this limitation is removed. This state
-is advertised to peers in slow periodic control frames.
-
-For each session, the following timeouts are maintained:
-
-1. tx timeout - used for sending out control frames
-2. rx timeout - used for detecting session timeout
-3. echo tx timeout - used for sending out echo frames
-3. echo rx timeout - used for detecting session timeout based on echo
-
-These timeouts are maintained in cpu clocks and recalculated when appropriate
-(e.g. rx timeout is bumped when a packet is received, keeping the session
-alive). Only the earliest timeout is inserted into the timer wheel at a time
-and timer wheel events are never deleted, rather spurious events are ignored.
-This allows efficient operation, like not inserting events into timing wheel
-for each packet received or ignoring left-over events in case a bfd session
-gets removed and a new one is recreated with the same session index.
-
-#### Authentication keys management
-
-Authentication keys are managed internally in a pool, with each key tracking
-it's use count. The removal/modification is only allowed if the key is not in
-use.
-
-### UDP module
-
-UDP module is responsible for:
-
-1. public APIs/CLIs to configure BFD over UDP.
-2. support code called by main module to encapsulate/decapsulate BFD packets
-
-This module implements two graph nodes - for consuming ipv4 and ipv6 packets
-target at BFD ports 3874 and 3875.
-
-#### Packet receipt
-
-BFD packet receipt receipt starts in the bfd udp graph nodes. Since the code
-needs to verify IP/UDP header data, it relies on ip4-local (and ip6-local)
-nodes to store pointers to the appropriate headers. First, your discriminator
-is extracted from BFD packet and used to lookup the existing session. In case
-it's zero, the pair of IP addresses and sw_if_index is used to lookup session.
-Then, main module is called to verify the authentication, if present.
-Afterwards a check is made if the IP/UDP headers are correct. If yes, then
-an RPC call is made to the main thread to consume the packet and take action
-upon it.
-
-#### Packet transmission
-
-When process node decides that there is a need to transmit the packet, it
-creates a buffer, fills the BFD frame data in and calls the UDP module to
-add the transport layer. This is a simple operation for the control frames
-consisting of just adding UDP/IP headers based on session data. For echo
-frames, an additional step, looking at the echo-source interface and picking
-and address is performed and if this fails, then the packet cannot be
-transmitted and an error is returned to main thread.
diff --git a/src/vnet/bfd/bfd_doc.rst b/src/vnet/bfd/bfd_doc.rst
new file mode 100644
index 00000000000..54a53c6fe92
--- /dev/null
+++ b/src/vnet/bfd/bfd_doc.rst
@@ -0,0 +1,512 @@
+.. _bfd_doc:
+
+BFD module
+==========
+
+Overview
+--------
+
+Bidirectional Forwarding Detection in VPP currently supports single-hop
+UDP transport based on RFC 5880 and RFC 5881.
+
+Usage
+-----
+
+General usage
+~~~~~~~~~~~~~
+
+BFD sessions are created using APIs only. The following CLIs are
+implemented, which call the APIs to manipulate the BFD:
+
+Show commands:
+^^^^^^^^^^^^^^
+
+ show bfd [keys|sessions|echo-source]
+
+Show the existing keys, sessions or echo-source.
+
+Key manipulation
+^^^^^^^^^^^^^^^^
+
+Create a new key or modify an existing key
+''''''''''''''''''''''''''''''''''''''''''
+
+ bfd key set conf-key-id type <keyed-sha1|meticulous-keyed-sha1>
+ secret
+
+Parameters:
+
+- conf-key-id - local configuration key ID, used to uniquely identify
+ this key
+- type - type of the key
+- secret - shared secret (hex data)
+
+Example:
+
+ bfd key set conf-key-id 2368880803 type meticulous-keyed-sha1 secret
+ 69d685b0d990cdba46872706dc
+
+Notes:
+
+- in-use key cannot be modified
+
+Delete an existing key
+''''''''''''''''''''''
+
+ bfd key del conf-key-id
+
+Parameters:
+
+- conf-key-id - local configuration key ID, used to uniquely identify
+ this key
+
+Example:
+
+ bfd key del conf-key-id 2368880803
+
+Notes:
+
+- in-use key cannot be deleted
+
+Create a new (plain or authenticated) BFD session
+'''''''''''''''''''''''''''''''''''''''''''''''''
+
+ bfd udp session add interface local-addr
+
+ .. raw:: html
+
+ <address>
+
+ peer-addr
+
+ .. raw:: html
+
+ <address>
+
+ desired-min-tx required-min-rx detect-mult [ conf-key-id bfd-key-id ]
+
+Parameters:
+
+- interface - interface to which this session is tied to
+- local-addr - local address (ipv4 or ipv6)
+- peer-addr - peer address (ipv4 or ipv6, must match local-addr family)
+- desired-min-tx - desired minimum tx interval (microseconds)
+- required-min-rx - required minimum rx interval (microseconds)
+- detect-mult - detect multiplier (must be non-zero)
+- conf-key-id - local configuration key ID
+- bfd-key-id - BFD key ID, as carried in BFD control frames
+
+Example:
+
+ bfd udp session add interface pg0 local-addr fd01:1::1 peer-addr
+ fd01:1::2 desired-min-tx 100000 required-min-rx 100000 detect-mult 3
+ conf-key-id 1029559112 bfd-key-id 13
+
+Notes:
+
+- if conf-key-id and bfd-key-id are not specified, session is
+ non-authenticated
+- desired-min-tx controls desired transmission rate of both control
+ frames and echo packets
+
+Modify BFD session
+''''''''''''''''''
+
+ bfd udp session mod interface local-addr
+
+ .. raw:: html
+
+ <address>
+
+ peer-addr
+
+ .. raw:: html
+
+ <address>
+
+ desired-min-tx required-min-rx detect-mult
+
+Parameters:
+
+- interface - interface to which this session is tied to
+- local-addr - local address (ipv4 or ipv6)
+- peer-addr - peer address (ipv4 or ipv6, must match local-addr family)
+- desired-min-tx - desired minimum tx interval (microseconds)
+- required-min-rx - required minimum rx interval (microseconds)
+- detect-mult - detect multiplier (must be non-zero)
+
+Example:
+
+ bfd udp session mod interface pg0 local-addr 172.16.1.1 peer-addr
+ 172.16.1.2 desired-min-tx 300000 required-min-rx 200000 detect-mult
+ 12
+
+Notes:
+
+- desired-min-tx controls desired transmission rate of both control
+ frames and echo packets
+
+Delete an existing BFD session
+''''''''''''''''''''''''''''''
+
+ bfd udp session del interface local-addr
+
+ .. raw:: html
+
+ <address>
+
+ peer-addr
+
+ .. raw:: html
+
+ <address>
+
+Parameters:
+
+- interface - interface to which this session is tied to
+- local-addr - local address (ipv4 or ipv6)
+- peer-addr - peer address (ipv4 or ipv6, must match local-addr family)
+
+Example:
+
+ bfd udp session del interface pg0 local-addr 172.16.1.1 peer-addr
+ 172.16.1.2
+
+Set session admin-up or admin-down
+''''''''''''''''''''''''''''''''''
+
+ bfd udp session set-flags interface local-addr
+
+ .. raw:: html
+
+ <address>
+
+ peer-addr
+
+ .. raw:: html
+
+ <address>
+
+ admin <up|down>
+
+Parameters:
+
+- interface - interface to which this session is tied to
+- local-addr - local address (ipv4 or ipv6)
+- peer-addr - peer address (ipv4 or ipv6, must match local-addr family)
+- admin - up/down based on desired action
+
+Example:
+
+ bfd udp session set-flags admin down interface pg0 local-addr
+ 172.16.1.1 peer-addr 172.16.1.2
+
+Activate/change authentication for existing session
+'''''''''''''''''''''''''''''''''''''''''''''''''''
+
+ bfd udp session auth activate interface local-addr
+
+ .. raw:: html
+
+ <address>
+
+ peer-addr
+
+ .. raw:: html
+
+ <address>
+
+ conf-key-id bfd-key-id [ delayed <yes|no> ]
+
+Parameters:
+
+- interface - interface to which this session is tied to
+- local-addr - local address (ipv4 or ipv6)
+- peer-addr - peer address (ipv4 or ipv6, must match local-addr family)
+- conf-key-id - local configuration key ID
+- bfd-key-id - BFD key ID, as carried in BFD control frames
+- delayed - is yes then this action is delayed until the peer performs
+ the same action
+
+Example:
+
+ bfd udp session auth activate interface pg0 local-addr 172.16.1.1
+ peer-addr 172.16.1.2 conf-key-id 540928695 bfd-key-id 239 delayed yes
+
+Notes:
+
+- see `Delayed option <#delayed-option>`__ for more information
+
+Deactivate authentication for existing session
+''''''''''''''''''''''''''''''''''''''''''''''
+
+ bfd udp session auth deactivate interface local-addr
+
+ .. raw:: html
+
+ <address>
+
+ peer-addr
+
+ .. raw:: html
+
+ <address>
+
+ [ delayed <yes|no> ]
+
+Parameters:
+
+- interface - interface to which this session is tied to
+- local-addr - local address (ipv4 or ipv6)
+- peer-addr - peer address (ipv4 or ipv6, must match local-addr family)
+- delayed - is yes then this action is delayed until the peer performs
+ the same action
+
+Example:
+
+ bfd udp session auth deactivate interface pg0 local-addr 172.16.1.1
+ peer-addr 172.16.1.2
+
+Notes:
+
+- see `Delayed option <#delayed-option>`__ for more information
+
+Set echo-source interface
+'''''''''''''''''''''''''
+
+ bfd udp echo-source set interface
+
+Parameters:
+
+- interface - interface used for getting source address for echo
+ packets
+
+Example:
+
+ bfd udp echo-source set interface loop0
+
+Delete echo-source interface
+''''''''''''''''''''''''''''
+
+ bfd udp echo-source del
+
+Example:
+
+ bfd udp echo-source del
+
+Authentication
+~~~~~~~~~~~~~~
+
+BFD sessions should be authenticated for security purposes. SHA1 and
+meticulous SHA1 authentication is supported by VPP. First,
+authentication keys are configured in VPP and afterwards they can be
+used by sessions.
+
+There are two key IDs in the scope of BFD session:
+
+- configuration key ID is the internal unique key ID inside VPP and is
+ never communicated to any peer, it serves only the purpose of
+ identifying the key
+- BFD key ID is the key ID carried in BFD control frames and is used
+ for verifying authentication
+
+Turning auth on/off
+^^^^^^^^^^^^^^^^^^^
+
+Authentication can be turned on or off at any time. Care must be taken
+however, to either synchronize the authentication manipulation with
+peer’s actions to avoid the session going down.
+
+Delayed option
+''''''''''''''
+
+Delayed option is useful for synchronizing authentication changes with a
+peer. If it’s specified, then authentication change is not performed
+immediately. In this case, VPP continues to transmit packets using the
+old authentication method (unauthenticated or using old sha1 key). If a
+packet is received, which does not pass the current authentication, then
+VPP tries to authenticate it using the new method (which might be none,
+if deactivating authentication) and if it passes, then the new
+authentication method is put in use.
+
+The recommended procedure for enabling/changing/disabling session
+authentication is:
+
+1. perform authentication change on vpp’s side with delayed option set
+ to yes
+2. perform authentication change on peer’s side (without delayed option)
+
+Notes:
+
+- if both peers use delayed option at the same time, the change will
+ never be carried out, since none of the peers will see any packet
+ with the new authentication which could trigger the change
+- remote peer does not need to support or even be aware of this
+ mechanism for it to work properly
+
+Echo function
+~~~~~~~~~~~~~
+
+Echo function is used by VPP whenever a peer declares the willingness to
+support it, echo-source is set and it contains a usable subnet (see
+below). When echo function is switched on, the required min rx interval
+advertised to peer is set to 1 second (or the configured value, if its
+higher).
+
+Echo source address
+^^^^^^^^^^^^^^^^^^^
+
+Because echo packets are only looped back (and not processed in any way)
+by a peer, it’s necessary to set the source address in a way which
+avoids packet drop due to spoofing protection by VPP. Per RFC, the
+source address should not be in the subnet set on the interface over
+which the echo packets are sent. Also, it must not be any VPP-local
+address, otherwise the packet gets dropped on receipt by VPP. The
+solution is to create a loopback interface with a (private) IPv4/IPv6
+subnet assigned as echo-source. The BFD then picks an unused address
+from the subnet by flipping the last bit and uses that as source address
+in the echo packets, thus meeting RFC recommendation while avoiding
+spoofing protection.
+
+Example: if 10.10.10.3/31 is the subnet, then 10.10.10.2 will be used as
+source address in (IPv4) echo packets
+
+Demand mode
+~~~~~~~~~~~
+
+Demand mode is respected by VPP, but not used locally. The only scenario
+when demand mode could make sense currently is when echo is active.
+Because echo packets are inherently insecure against an adversary
+looping them back a poll sequence would be required for slow periodic
+connectivity verification anyway. It’s more efficient to just ask the
+remote peer to send slow periodic control frames without VPP initiating
+periodic poll sequences.
+
+Admin-down
+~~~~~~~~~~
+
+Session may be put admin-down at any time. This immediately causes the
+state to be changed to AdminDown and remain so unless the session is put
+admin-up.
+
+BFD implementation notes
+------------------------
+
+Because BFD can work over different transport layers, the BFD code is
+separated into core BFD functionality - main module implemented in
+bfd_main.c and transport-specific code implemented in bfd_udp.c.
+
+Main module
+~~~~~~~~~~~
+
+Main module is responsible for handling all the BFD functionality
+defined in RFC 5880.
+
+Internal API
+^^^^^^^^^^^^
+
+Internal APIs defined in bfd_main.h are called from transport-specific
+code to create/modify/delete
+
+Packet receipt
+^^^^^^^^^^^^^^
+
+When a packet is received by the transport layer, it is forwarded to
+main module (to main thread) via an RPC call. At this point, the
+authentication has been verified, so the packet is consumed, session
+parameters are updated accordingly and state change (if applicable).
+Based on these, the timeouts are adjusted if required and an event is
+sent to the process node to wake up and recalculate sleep time.
+
+Packet transmit
+^^^^^^^^^^^^^^^
+
+Main module allocates a vlib_buffer_t, creates the required BFD frame
+(control or echo in it), then calls the transport layer to add the
+transport layer. Then a frame containing the buffer to the appropriate
+node is created and enqueued.
+
+Process node
+^^^^^^^^^^^^
+
+Main module implements one process node which is a simple loop. The
+process node gets next timeout from the timer wheel, sleeps until the
+timeout expires and then calls a timeout routine which drives the state
+machine for each session which timed out. The sleep is interrupted
+externally via vlib event, when a session is added or modified in a way
+which might require timer wheel manipulation. In this case the caller
+inserts the necessary timeout to timer wheel and then signals the
+process node to wake up early, handle possible timeouts and recalculate
+the sleep time again.
+
+State machine
+^^^^^^^^^^^^^
+
+Default state of BFD session when created is Down, per RFC 5880. State
+changes to Init, Up or Down based on events like received state from
+peer and timeouts. The session state can be set AdminDown using a binary
+API, which prevents it from going to any other state, until this
+limitation is removed. This state is advertised to peers in slow
+periodic control frames.
+
+For each session, the following timeouts are maintained:
+
+1. tx timeout - used for sending out control frames
+2. rx timeout - used for detecting session timeout
+3. echo tx timeout - used for sending out echo frames
+4. echo rx timeout - used for detecting session timeout based on echo
+
+These timeouts are maintained in cpu clocks and recalculated when
+appropriate (e.g. rx timeout is bumped when a packet is received,
+keeping the session alive). Only the earliest timeout is inserted into
+the timer wheel at a time and timer wheel events are never deleted,
+rather spurious events are ignored. This allows efficient operation,
+like not inserting events into timing wheel for each packet received or
+ignoring left-over events in case a bfd session gets removed and a new
+one is recreated with the same session index.
+
+Authentication keys management
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Authentication keys are managed internally in a pool, with each key
+tracking it’s use count. The removal/modification is only allowed if the
+key is not in use.
+
+UDP module
+~~~~~~~~~~
+
+UDP module is responsible for:
+
+1. public APIs/CLIs to configure BFD over UDP.
+2. support code called by main module to encapsulate/decapsulate BFD
+ packets
+
+This module implements two graph nodes - for consuming ipv4 and ipv6
+packets target at BFD ports 3874 and 3875.
+
+.. _packet-receipt-1:
+
+Packet receipt
+^^^^^^^^^^^^^^
+
+BFD packet receipt receipt starts in the bfd udp graph nodes. Since the
+code needs to verify IP/UDP header data, it relies on ip4-local (and
+ip6-local) nodes to store pointers to the appropriate headers. First,
+your discriminator is extracted from BFD packet and used to lookup the
+existing session. In case it’s zero, the pair of IP addresses and
+sw_if_index is used to lookup session. Then, main module is called to
+verify the authentication, if present. Afterwards a check is made if the
+IP/UDP headers are correct. If yes, then an RPC call is made to the main
+thread to consume the packet and take action upon it.
+
+Packet transmission
+^^^^^^^^^^^^^^^^^^^
+
+When process node decides that there is a need to transmit the packet,
+it creates a buffer, fills the BFD frame data in and calls the UDP
+module to add the transport layer. This is a simple operation for the
+control frames consisting of just adding UDP/IP headers based on session
+data. For echo frames, an additional step, looking at the echo-source
+interface and picking and address is performed and if this fails, then
+the packet cannot be transmitted and an error is returned to main
+thread.
diff --git a/src/vnet/bfd/bfd_main.c b/src/vnet/bfd/bfd_main.c
index c67317f5789..1423da91158 100644
--- a/src/vnet/bfd/bfd_main.c
+++ b/src/vnet/bfd/bfd_main.c
@@ -30,14 +30,25 @@
#include <vlib/log.h>
#include <vnet/crypto/crypto.h>
+static void
+bfd_validate_counters (bfd_main_t *bm)
+{
+ vlib_validate_combined_counter (&bm->rx_counter, pool_elts (bm->sessions));
+ vlib_validate_combined_counter (&bm->rx_echo_counter,
+ pool_elts (bm->sessions));
+ vlib_validate_combined_counter (&bm->tx_counter, pool_elts (bm->sessions));
+ vlib_validate_combined_counter (&bm->tx_echo_counter,
+ pool_elts (bm->sessions));
+}
+
static u64
bfd_calc_echo_checksum (u32 discriminator, u64 expire_time, u32 secret)
{
u64 checksum = 0;
#if defined(clib_crc32c_uses_intrinsics) && !defined (__i386__)
- checksum = crc32_u64 (0, discriminator);
- checksum = crc32_u64 (checksum, expire_time);
- checksum = crc32_u64 (checksum, secret);
+ checksum = clib_crc32c_u64 (0, discriminator);
+ checksum = clib_crc32c_u64 (checksum, expire_time);
+ checksum = clib_crc32c_u64 (checksum, secret);
#else
checksum = clib_xxhash (discriminator ^ expire_time ^ secret);
#endif
@@ -172,7 +183,7 @@ bfd_set_poll_state (bfd_session_t * bs, bfd_poll_state_e state)
}
static void
-bfd_recalc_tx_interval (bfd_main_t * bm, bfd_session_t * bs)
+bfd_recalc_tx_interval (bfd_session_t *bs)
{
bs->transmit_interval_nsec =
clib_max (bs->effective_desired_min_tx_nsec, bs->remote_min_rx_nsec);
@@ -181,7 +192,7 @@ bfd_recalc_tx_interval (bfd_main_t * bm, bfd_session_t * bs)
}
static void
-bfd_recalc_echo_tx_interval (bfd_main_t * bm, bfd_session_t * bs)
+bfd_recalc_echo_tx_interval (bfd_session_t *bs)
{
bs->echo_transmit_interval_nsec =
clib_max (bs->effective_desired_min_tx_nsec, bs->remote_min_echo_rx_nsec);
@@ -240,7 +251,7 @@ bfd_calc_next_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now)
}
static void
-bfd_calc_next_echo_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now)
+bfd_calc_next_echo_tx (bfd_session_t *bs, u64 now)
{
bs->echo_tx_timeout_nsec =
bs->echo_last_tx_nsec + bs->echo_transmit_interval_nsec;
@@ -261,7 +272,7 @@ bfd_calc_next_echo_tx (bfd_main_t * bm, bfd_session_t * bs, u64 now)
}
static void
-bfd_recalc_detection_time (bfd_main_t * bm, bfd_session_t * bs)
+bfd_recalc_detection_time (bfd_session_t *bs)
{
if (bs->local_state == BFD_STATE_init || bs->local_state == BFD_STATE_up)
{
@@ -385,26 +396,24 @@ bfd_set_effective_desired_min_tx (bfd_main_t * bm,
bs->effective_desired_min_tx_nsec = desired_min_tx_nsec;
BFD_DBG ("Set effective desired min tx to " BFD_CLK_FMT,
BFD_CLK_PRN (bs->effective_desired_min_tx_nsec));
- bfd_recalc_detection_time (bm, bs);
- bfd_recalc_tx_interval (bm, bs);
- bfd_recalc_echo_tx_interval (bm, bs);
+ bfd_recalc_detection_time (bs);
+ bfd_recalc_tx_interval (bs);
+ bfd_recalc_echo_tx_interval (bs);
bfd_calc_next_tx (bm, bs, now);
}
static void
-bfd_set_effective_required_min_rx (bfd_main_t * bm,
- bfd_session_t * bs,
- u64 required_min_rx_nsec)
+bfd_set_effective_required_min_rx (bfd_session_t *bs, u64 required_min_rx_nsec)
{
bs->effective_required_min_rx_nsec = required_min_rx_nsec;
BFD_DBG ("Set effective required min rx to " BFD_CLK_FMT,
BFD_CLK_PRN (bs->effective_required_min_rx_nsec));
- bfd_recalc_detection_time (bm, bs);
+ bfd_recalc_detection_time (bs);
}
static void
-bfd_set_remote_required_min_rx (bfd_main_t * bm, bfd_session_t * bs,
- u64 now, u32 remote_required_min_rx_usec)
+bfd_set_remote_required_min_rx (bfd_session_t *bs,
+ u32 remote_required_min_rx_usec)
{
if (bs->remote_min_rx_usec != remote_required_min_rx_usec)
{
@@ -412,14 +421,13 @@ bfd_set_remote_required_min_rx (bfd_main_t * bm, bfd_session_t * bs,
bs->remote_min_rx_nsec = bfd_usec_to_nsec (remote_required_min_rx_usec);
BFD_DBG ("Set remote min rx to " BFD_CLK_FMT,
BFD_CLK_PRN (bs->remote_min_rx_nsec));
- bfd_recalc_detection_time (bm, bs);
- bfd_recalc_tx_interval (bm, bs);
+ bfd_recalc_detection_time (bs);
+ bfd_recalc_tx_interval (bs);
}
}
static void
-bfd_set_remote_required_min_echo_rx (bfd_main_t * bm, bfd_session_t * bs,
- u64 now,
+bfd_set_remote_required_min_echo_rx (bfd_session_t *bs,
u32 remote_required_min_echo_rx_usec)
{
if (bs->remote_min_echo_rx_usec != remote_required_min_echo_rx_usec)
@@ -429,7 +437,7 @@ bfd_set_remote_required_min_echo_rx (bfd_main_t * bm, bfd_session_t * bs,
bfd_usec_to_nsec (bs->remote_min_echo_rx_usec);
BFD_DBG ("Set remote min echo rx to " BFD_CLK_FMT,
BFD_CLK_PRN (bs->remote_min_echo_rx_nsec));
- bfd_recalc_echo_tx_interval (bm, bs);
+ bfd_recalc_echo_tx_interval (bs);
}
}
@@ -450,14 +458,21 @@ bfd_session_start (bfd_main_t * bm, bfd_session_t * bs)
BFD_DBG ("\nStarting session: %U", format_bfd_session, bs);
vlib_log_info (bm->log_class, "start BFD session: %U",
format_bfd_session_brief, bs);
- bfd_set_effective_required_min_rx (bm, bs, bs->config_required_min_rx_nsec);
- bfd_recalc_tx_interval (bm, bs);
+ bfd_set_effective_required_min_rx (bs, bs->config_required_min_rx_nsec);
+ bfd_recalc_tx_interval (bs);
vlib_process_signal_event (bm->vlib_main, bm->bfd_process_node_index,
BFD_EVENT_NEW_SESSION, bs->bs_idx);
bfd_notify_listeners (bm, BFD_LISTEN_EVENT_CREATE, bs);
}
void
+bfd_session_stop (bfd_main_t *bm, bfd_session_t *bs)
+{
+ BFD_DBG ("\nStopping session: %U", format_bfd_session, bs);
+ bfd_notify_listeners (bm, BFD_LISTEN_EVENT_DELETE, bs);
+}
+
+void
bfd_session_set_flags (vlib_main_t * vm, bfd_session_t * bs, u8 admin_up_down)
{
bfd_main_t *bm = &bfd_main;
@@ -485,30 +500,29 @@ bfd_session_set_flags (vlib_main_t * vm, bfd_session_t * bs, u8 admin_up_down)
}
u8 *
-bfd_input_format_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- const bfd_input_trace_t *t = va_arg (*args, bfd_input_trace_t *);
- const bfd_pkt_t *pkt = (bfd_pkt_t *) t->data;
- if (t->len > STRUCT_SIZE_OF (bfd_pkt_t, head))
- {
- s = format (s, "BFD v%u, diag=%u(%s), state=%u(%s),\n"
- " flags=(P:%u, F:%u, C:%u, A:%u, D:%u, M:%u), "
- "detect_mult=%u, length=%u\n",
- bfd_pkt_get_version (pkt), bfd_pkt_get_diag_code (pkt),
- bfd_diag_code_string (bfd_pkt_get_diag_code (pkt)),
- bfd_pkt_get_state (pkt),
- bfd_state_string (bfd_pkt_get_state (pkt)),
- bfd_pkt_get_poll (pkt), bfd_pkt_get_final (pkt),
- bfd_pkt_get_control_plane_independent (pkt),
- bfd_pkt_get_auth_present (pkt), bfd_pkt_get_demand (pkt),
- bfd_pkt_get_multipoint (pkt), pkt->head.detect_mult,
- pkt->head.length);
- if (t->len >= sizeof (bfd_pkt_t) &&
- pkt->head.length >= sizeof (bfd_pkt_t))
+format_bfd_pkt (u8 *s, va_list *args)
+{
+ u32 len = va_arg (*args, u32);
+ u8 *data = va_arg (*args, u8 *);
+
+ const bfd_pkt_t *pkt = (bfd_pkt_t *) data;
+ if (len > STRUCT_SIZE_OF (bfd_pkt_t, head))
+ {
+ s = format (
+ s,
+ "BFD v%u, diag=%u(%s), state=%u(%s),\n"
+ " flags=(P:%u, F:%u, C:%u, A:%u, D:%u, M:%u), "
+ "detect_mult=%u, length=%u",
+ bfd_pkt_get_version (pkt), bfd_pkt_get_diag_code (pkt),
+ bfd_diag_code_string (bfd_pkt_get_diag_code (pkt)),
+ bfd_pkt_get_state (pkt), bfd_state_string (bfd_pkt_get_state (pkt)),
+ bfd_pkt_get_poll (pkt), bfd_pkt_get_final (pkt),
+ bfd_pkt_get_control_plane_independent (pkt),
+ bfd_pkt_get_auth_present (pkt), bfd_pkt_get_demand (pkt),
+ bfd_pkt_get_multipoint (pkt), pkt->head.detect_mult, pkt->head.length);
+ if (len >= sizeof (bfd_pkt_t) && pkt->head.length >= sizeof (bfd_pkt_t))
{
- s = format (s, " my discriminator: %u\n",
+ s = format (s, "\n my discriminator: %u\n",
clib_net_to_host_u32 (pkt->my_disc));
s = format (s, " your discriminator: %u\n",
clib_net_to_host_u32 (pkt->your_disc));
@@ -519,16 +533,16 @@ bfd_input_format_trace (u8 * s, va_list * args)
s = format (s, " required min echo rx interval: %u",
clib_net_to_host_u32 (pkt->req_min_echo_rx));
}
- if (t->len >= sizeof (bfd_pkt_with_common_auth_t) &&
+ if (len >= sizeof (bfd_pkt_with_common_auth_t) &&
pkt->head.length >= sizeof (bfd_pkt_with_common_auth_t) &&
bfd_pkt_get_auth_present (pkt))
{
const bfd_pkt_with_common_auth_t *with_auth = (void *) pkt;
const bfd_auth_common_t *common = &with_auth->common_auth;
s = format (s, "\n auth len: %u\n", common->len);
- s = format (s, " auth type: %u:%s\n", common->type,
+ s = format (s, " auth type: %u:%s", common->type,
bfd_auth_type_str (common->type));
- if (t->len >= sizeof (bfd_pkt_with_sha1_auth_t) &&
+ if (len >= sizeof (bfd_pkt_with_sha1_auth_t) &&
pkt->head.length >= sizeof (bfd_pkt_with_sha1_auth_t) &&
(BFD_AUTH_TYPE_keyed_sha1 == common->type ||
BFD_AUTH_TYPE_meticulous_keyed_sha1 == common->type))
@@ -542,15 +556,23 @@ bfd_input_format_trace (u8 * s, va_list * args)
sizeof (sha1->hash));
}
}
- else
- {
- s = format (s, "\n");
- }
}
return s;
}
+u8 *
+bfd_input_format_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ const bfd_input_trace_t *t = va_arg (*args, bfd_input_trace_t *);
+
+ s = format (s, "%U", format_bfd_pkt, t->len, t->data);
+
+ return s;
+}
+
typedef struct
{
u32 bs_idx;
@@ -651,8 +673,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now,
clib_max
(bs->config_desired_min_tx_nsec,
bm->default_desired_min_tx_nsec));
- bfd_set_effective_required_min_rx (bm, bs,
- bs->config_required_min_rx_nsec);
+ bfd_set_effective_required_min_rx (bs, bs->config_required_min_rx_nsec);
bfd_set_timer (bm, bs, now, handling_wakeup);
break;
case BFD_STATE_down:
@@ -661,8 +682,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now,
clib_max
(bs->config_desired_min_tx_nsec,
bm->default_desired_min_tx_nsec));
- bfd_set_effective_required_min_rx (bm, bs,
- bs->config_required_min_rx_nsec);
+ bfd_set_effective_required_min_rx (bs, bs->config_required_min_rx_nsec);
bfd_set_timer (bm, bs, now, handling_wakeup);
break;
case BFD_STATE_init:
@@ -676,7 +696,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now,
bs->config_desired_min_tx_nsec);
if (BFD_POLL_NOT_NEEDED == bs->poll_state)
{
- bfd_set_effective_required_min_rx (bm, bs,
+ bfd_set_effective_required_min_rx (bs,
bs->config_required_min_rx_nsec);
}
bfd_set_timer (bm, bs, now, handling_wakeup);
@@ -694,8 +714,7 @@ bfd_on_state_change (bfd_main_t * bm, bfd_session_t * bs, u64 now,
}
static void
-bfd_on_config_change (vlib_main_t * vm, vlib_node_runtime_t * rt,
- bfd_main_t * bm, bfd_session_t * bs, u64 now)
+bfd_on_config_change (bfd_main_t *bm, bfd_session_t *bs, u64 now)
{
/*
* if remote demand mode is set and we need to do a poll, set the next
@@ -706,7 +725,7 @@ bfd_on_config_change (vlib_main_t * vm, vlib_node_runtime_t * rt,
{
bs->tx_timeout_nsec = now;
}
- bfd_recalc_detection_time (bm, bs);
+ bfd_recalc_detection_time (bs);
bfd_set_timer (bm, bs, now, 0);
}
@@ -727,17 +746,18 @@ bfd_add_transport_layer (vlib_main_t * vm, u32 bi, bfd_session_t * bs)
}
static int
-bfd_transport_control_frame (vlib_main_t * vm, u32 bi, bfd_session_t * bs)
+bfd_transport_control_frame (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi,
+ bfd_session_t *bs)
{
switch (bs->transport)
{
case BFD_TRANSPORT_UDP4:
BFD_DBG ("Transport bfd via udp4, bs_idx=%u", bs->bs_idx);
- return bfd_transport_udp4 (vm, bi, bs);
+ return bfd_transport_udp4 (vm, rt, bi, bs, 0 /* is_echo */);
break;
case BFD_TRANSPORT_UDP6:
BFD_DBG ("Transport bfd via udp6, bs_idx=%u", bs->bs_idx);
- return bfd_transport_udp6 (vm, bi, bs);
+ return bfd_transport_udp6 (vm, rt, bi, bs, 0 /* is_echo */);
break;
}
return 0;
@@ -761,17 +781,18 @@ bfd_echo_add_transport_layer (vlib_main_t * vm, u32 bi, bfd_session_t * bs)
}
static int
-bfd_transport_echo (vlib_main_t * vm, u32 bi, bfd_session_t * bs)
+bfd_transport_echo (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi,
+ bfd_session_t *bs)
{
switch (bs->transport)
{
case BFD_TRANSPORT_UDP4:
BFD_DBG ("Transport bfd echo via udp4, bs_idx=%u", bs->bs_idx);
- return bfd_transport_udp4 (vm, bi, bs);
+ return bfd_transport_udp4 (vm, rt, bi, bs, 1 /* is_echo */);
break;
case BFD_TRANSPORT_UDP6:
BFD_DBG ("Transport bfd echo via udp6, bs_idx=%u", bs->bs_idx);
- return bfd_transport_udp6 (vm, bi, bs);
+ return bfd_transport_udp6 (vm, rt, bi, bs, 1 /* is_echo */);
break;
}
return 0;
@@ -861,8 +882,7 @@ bfd_is_echo_possible (bfd_session_t * bs)
}
static void
-bfd_init_control_frame (bfd_main_t * bm, bfd_session_t * bs,
- vlib_buffer_t * b)
+bfd_init_control_frame (bfd_session_t *bs, vlib_buffer_t *b)
{
bfd_pkt_t *pkt = vlib_buffer_get_current (b);
u32 bfd_length = 0;
@@ -891,9 +911,39 @@ bfd_init_control_frame (bfd_main_t * bm, bfd_session_t * bs,
b->current_length = bfd_length;
}
+typedef struct
+{
+ u32 bs_idx;
+ u32 len;
+ u8 data[400];
+} bfd_process_trace_t;
+
+static void
+bfd_process_trace_buf (vlib_main_t *vm, vlib_node_runtime_t *rt,
+ vlib_buffer_t *b, bfd_session_t *bs)
+{
+ u32 n_trace = vlib_get_trace_count (vm, rt);
+ if (n_trace > 0)
+ {
+ bfd_process_trace_t *tr;
+ if (vlib_trace_buffer (vm, rt, 0, b, 0))
+ {
+ tr = vlib_add_trace (vm, rt, b, sizeof (*tr));
+ tr->bs_idx = bs->bs_idx;
+ u64 len = (b->current_length < sizeof (tr->data)) ?
+ b->current_length :
+ sizeof (tr->data);
+ tr->len = len;
+ clib_memcpy_fast (tr->data, vlib_buffer_get_current (b), len);
+ --n_trace;
+ vlib_set_trace_count (vm, rt, n_trace);
+ }
+ }
+}
+
static void
-bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt,
- bfd_main_t * bm, bfd_session_t * bs, u64 now)
+bfd_send_echo (vlib_main_t *vm, vlib_node_runtime_t *rt, bfd_main_t *bm,
+ bfd_session_t *bs, u64 now)
{
if (!bfd_is_echo_possible (bs))
{
@@ -921,6 +971,7 @@ bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt,
bfd_calc_echo_checksum (bs->local_discr, pkt->expire_time_nsec,
bs->echo_secret);
b->current_length = sizeof (*pkt);
+ bfd_process_trace_buf (vm, rt, b, bs);
if (!bfd_echo_add_transport_layer (vm, bi, bs))
{
BFD_ERR ("cannot send echo packet out, turning echo off");
@@ -928,7 +979,7 @@ bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt,
vlib_buffer_free_one (vm, bi);
return;
}
- if (!bfd_transport_echo (vm, bi, bs))
+ if (!bfd_transport_echo (vm, rt, bi, bs))
{
BFD_ERR ("cannot send echo packet out, turning echo off");
bs->echo = 0;
@@ -936,7 +987,7 @@ bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt,
return;
}
bs->echo_last_tx_nsec = now;
- bfd_calc_next_echo_tx (bm, bs, now);
+ bfd_calc_next_echo_tx (bs, now);
}
else
{
@@ -947,8 +998,8 @@ bfd_send_echo (vlib_main_t * vm, vlib_node_runtime_t * rt,
}
static void
-bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt,
- bfd_main_t * bm, bfd_session_t * bs, u64 now)
+bfd_send_periodic (vlib_main_t *vm, vlib_node_runtime_t *rt, bfd_main_t *bm,
+ bfd_session_t *bs, u64 now)
{
if (!bs->remote_min_rx_usec && BFD_POLL_NOT_NEEDED == bs->poll_state)
{
@@ -980,7 +1031,7 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt,
}
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
ASSERT (b->current_data == 0);
- bfd_init_control_frame (bm, bs, b);
+ bfd_init_control_frame (bs, b);
switch (bs->poll_state)
{
case BFD_POLL_NEEDED:
@@ -1005,8 +1056,9 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt,
break;
}
bfd_add_auth_section (vm, b, bs);
+ bfd_process_trace_buf (vm, rt, b, bs);
bfd_add_transport_layer (vm, bi, bs);
- if (!bfd_transport_control_frame (vm, bi, bs))
+ if (!bfd_transport_control_frame (vm, rt, bi, bs))
{
vlib_buffer_free_one (vm, bi);
}
@@ -1022,12 +1074,11 @@ bfd_send_periodic (vlib_main_t * vm, vlib_node_runtime_t * rt,
}
void
-bfd_init_final_control_frame (vlib_main_t * vm, vlib_buffer_t * b,
- bfd_main_t * bm, bfd_session_t * bs,
- int is_local)
+bfd_init_final_control_frame (vlib_main_t *vm, vlib_buffer_t *b,
+ bfd_session_t *bs)
{
BFD_DBG ("Send final control frame for bs_idx=%lu", bs->bs_idx);
- bfd_init_control_frame (bm, bs, b);
+ bfd_init_control_frame (bs, b);
bfd_pkt_set_final (vlib_buffer_get_current (b));
bfd_add_auth_section (vm, b, bs);
u32 bi = vlib_get_buffer_index (vm, b);
@@ -1069,7 +1120,7 @@ bfd_check_rx_timeout (vlib_main_t * vm, bfd_main_t * bm, bfd_session_t * bs,
* since it is no longer required to maintain previous session state)
* and then can transmit at its own rate.
*/
- bfd_set_remote_required_min_rx (bm, bs, now, 1);
+ bfd_set_remote_required_min_rx (bs, 1);
}
else if (bs->echo
&& bs->echo_last_rx_nsec +
@@ -1082,15 +1133,14 @@ bfd_check_rx_timeout (vlib_main_t * vm, bfd_main_t * bm, bfd_session_t * bs,
}
void
-bfd_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm,
- bfd_session_t * bs, u64 now)
+bfd_on_timeout (vlib_main_t *vm, vlib_node_runtime_t *rt, bfd_main_t *bm,
+ bfd_session_t *bs, u64 now)
{
BFD_DBG ("Timeout for bs_idx=%lu", bs->bs_idx);
switch (bs->local_state)
{
case BFD_STATE_admin_down:
- bfd_send_periodic (vm, rt, bm, bs, now);
- break;
+ /* fallthrough */
case BFD_STATE_down:
bfd_send_periodic (vm, rt, bm, bs, now);
break;
@@ -1108,10 +1158,9 @@ bfd_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm,
bs->echo = 1;
bs->echo_last_rx_nsec = now;
bs->echo_tx_timeout_nsec = now;
- bfd_set_effective_required_min_rx (bm, bs,
- clib_max
- (bm->min_required_min_rx_while_echo_nsec,
- bs->config_required_min_rx_nsec));
+ bfd_set_effective_required_min_rx (
+ bs, clib_max (bm->min_required_min_rx_while_echo_nsec,
+ bs->config_required_min_rx_nsec));
bfd_set_poll_state (bs, BFD_POLL_NEEDED);
}
bfd_send_periodic (vm, rt, bm, bs, now);
@@ -1123,11 +1172,25 @@ bfd_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * rt, bfd_main_t * bm,
}
}
+u8 *
+format_bfd_process_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ bfd_process_trace_t *t = va_arg (*args, bfd_process_trace_t *);
+
+ s =
+ format (s, "bs_idx=%u => %U", t->bs_idx, format_bfd_pkt, t->len, t->data);
+
+ return s;
+}
+
/*
* bfd process node function
*/
static uword
-bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
+bfd_process (vlib_main_t *vm, vlib_node_runtime_t *rt,
+ CLIB_UNUSED (vlib_frame_t *f))
{
bfd_main_t *bm = &bfd_main;
u32 *expired = 0;
@@ -1168,7 +1231,9 @@ bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
now + first_expires_in_ticks * bm->nsec_per_tw_tick;
bm->bfd_process_next_wakeup_nsec = next_expire_nsec;
bfd_unlock (bm);
- timeout = (next_expire_nsec - now) * SEC_PER_NSEC;
+ ASSERT (next_expire_nsec - now <= UINT32_MAX);
+ // cast to u32 to avoid warning
+ timeout = (u32) (next_expire_nsec - now) * SEC_PER_NSEC;
}
BFD_DBG ("vlib_process_wait_for_event_or_clock(vm, %.09f)",
timeout);
@@ -1224,7 +1289,7 @@ bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
{
bfd_session_t *bs =
pool_elt_at_index (bm->sessions, *session_index);
- bfd_on_config_change (vm, rt, bm, bs, now);
+ bfd_on_config_change (bm, bs, now);
}
else
{
@@ -1258,11 +1323,11 @@ bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
bfd_unlock (bm);
if (expired)
{
- _vec_len (expired) = 0;
+ vec_set_len (expired, 0);
}
if (event_data)
{
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
}
}
@@ -1272,18 +1337,29 @@ bfd_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
/*
* bfd process node declaration
*/
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (bfd_process_node, static) = {
+// clang-format off
+VLIB_REGISTER_NODE (bfd_process_node, static) =
+{
.function = bfd_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "bfd-process",
- .n_next_nodes = 0,
- .next_nodes = {},
+ .flags = (VLIB_NODE_FLAG_TRACE_SUPPORTED),
+ .format_trace = format_bfd_process_trace,
+ .n_next_nodes = BFD_TX_N_NEXT,
+ .next_nodes = {
+ [BFD_TX_IP4_ARP] = "ip4-arp",
+ [BFD_TX_IP6_NDP] = "ip6-discover-neighbor",
+ [BFD_TX_IP4_REWRITE] = "ip4-rewrite",
+ [BFD_TX_IP6_REWRITE] = "ip6-rewrite",
+ [BFD_TX_IP4_MIDCHAIN] = "ip4-midchain",
+ [BFD_TX_IP6_MIDCHAIN] = "ip6-midchain",
+ }
};
-/* *INDENT-ON* */
+// clang-format on
static clib_error_t *
-bfd_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
+bfd_sw_interface_up_down (CLIB_UNUSED (vnet_main_t *vnm),
+ CLIB_UNUSED (u32 sw_if_index), u32 flags)
{
// bfd_main_t *bm = &bfd_main;
// vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
@@ -1297,7 +1373,8 @@ bfd_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (bfd_sw_interface_up_down);
static clib_error_t *
-bfd_hw_interface_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
+bfd_hw_interface_up_down (CLIB_UNUSED (vnet_main_t *vnm),
+ CLIB_UNUSED (u32 hw_if_index), u32 flags)
{
// bfd_main_t *bm = &bfd_main;
if (flags & VNET_HW_INTERFACE_FLAG_LINK_UP)
@@ -1346,6 +1423,14 @@ bfd_main_init (vlib_main_t * vm)
bm->owner_thread_index = ~0;
if (n_vlib_mains > 1)
clib_spinlock_init (&bm->lock);
+ bm->rx_counter.name = "bfd rx session counters";
+ bm->rx_counter.stat_segment_name = "/bfd/rx-session-counters";
+ bm->rx_echo_counter.name = "bfd rx session echo counters";
+ bm->rx_echo_counter.stat_segment_name = "/bfd/rx-session-echo-counters";
+ bm->tx_counter.name = "bfd tx session counters";
+ bm->tx_counter.stat_segment_name = "/bfd/tx-session-counters";
+ bm->tx_echo_counter.name = "bfd tx session echo counters";
+ bm->tx_echo_counter.stat_segment_name = "/bfd/tx-session-echo-counters";
return 0;
}
@@ -1381,6 +1466,11 @@ bfd_get_session (bfd_main_t * bm, bfd_transport_e t)
while (hash_get (bm->session_by_disc, result->local_discr));
bfd_set_defaults (bm, result);
hash_set (bm->session_by_disc, result->local_discr, result->bs_idx);
+ bfd_validate_counters (bm);
+ vlib_zero_combined_counter (&bm->rx_counter, result->bs_idx);
+ vlib_zero_combined_counter (&bm->rx_echo_counter, result->bs_idx);
+ vlib_zero_combined_counter (&bm->tx_counter, result->bs_idx);
+ vlib_zero_combined_counter (&bm->tx_echo_counter, result->bs_idx);
bfd_unlock (bm);
return result;
}
@@ -1392,7 +1482,6 @@ bfd_put_session (bfd_main_t * bm, bfd_session_t * bs)
vlib_log_info (bm->log_class, "delete session: %U",
format_bfd_session_brief, bs);
- bfd_notify_listeners (bm, BFD_LISTEN_EVENT_DELETE, bs);
if (bs->auth.curr_key)
{
--bs->auth.curr_key->use_count;
@@ -1402,6 +1491,10 @@ bfd_put_session (bfd_main_t * bm, bfd_session_t * bs)
--bs->auth.next_key->use_count;
}
hash_unset (bm->session_by_disc, bs->local_discr);
+ vlib_zero_combined_counter (&bm->rx_counter, bs->bs_idx);
+ vlib_zero_combined_counter (&bm->rx_echo_counter, bs->bs_idx);
+ vlib_zero_combined_counter (&bm->tx_counter, bs->bs_idx);
+ vlib_zero_combined_counter (&bm->tx_echo_counter, bs->bs_idx);
pool_put (bm->sessions, bs);
bfd_unlock (bm);
}
@@ -1436,14 +1529,14 @@ bfd_find_session_by_disc (bfd_main_t * bm, u32 disc)
*
* @return 1 if bfd packet is valid
*/
-int
-bfd_verify_pkt_common (const bfd_pkt_t * pkt)
+bfd_error_t
+bfd_verify_pkt_common (const bfd_pkt_t *pkt)
{
if (1 != bfd_pkt_get_version (pkt))
{
BFD_ERR ("BFD verification failed - unexpected version: '%d'",
bfd_pkt_get_version (pkt));
- return 0;
+ return BFD_ERROR_VERSION;
}
if (pkt->head.length < sizeof (bfd_pkt_t) ||
(bfd_pkt_get_auth_present (pkt) &&
@@ -1452,25 +1545,25 @@ bfd_verify_pkt_common (const bfd_pkt_t * pkt)
BFD_ERR ("BFD verification failed - unexpected length: '%d' (auth "
"present: %d)",
pkt->head.length, bfd_pkt_get_auth_present (pkt));
- return 0;
+ return BFD_ERROR_LENGTH;
}
if (!pkt->head.detect_mult)
{
BFD_ERR ("BFD verification failed - unexpected detect-mult: '%d'",
pkt->head.detect_mult);
- return 0;
+ return BFD_ERROR_DETECT_MULTI;
}
if (bfd_pkt_get_multipoint (pkt))
{
BFD_ERR ("BFD verification failed - unexpected multipoint: '%d'",
bfd_pkt_get_multipoint (pkt));
- return 0;
+ return BFD_ERROR_MULTI_POINT;
}
if (!pkt->my_disc)
{
BFD_ERR ("BFD verification failed - unexpected my-disc: '%d'",
pkt->my_disc);
- return 0;
+ return BFD_ERROR_MY_DISC;
}
if (!pkt->your_disc)
{
@@ -1479,10 +1572,10 @@ bfd_verify_pkt_common (const bfd_pkt_t * pkt)
{
BFD_ERR ("BFD verification failed - unexpected state: '%s' "
"(your-disc is zero)", bfd_state_string (pkt_state));
- return 0;
+ return BFD_ERROR_YOUR_DISC;
}
}
- return 1;
+ return BFD_ERROR_NONE;
}
static void
@@ -1581,8 +1674,8 @@ bfd_verify_pkt_auth_seq_num (vlib_main_t * vm, bfd_session_t * bs,
static int
bfd_verify_pkt_auth_key_sha1 (vlib_main_t *vm, const bfd_pkt_t *pkt,
- u32 pkt_size, bfd_session_t *bs, u8 bfd_key_id,
- bfd_auth_key_t *auth_key)
+ u32 pkt_size, CLIB_UNUSED (bfd_session_t *bs),
+ u8 bfd_key_id, bfd_auth_key_t *auth_key)
{
ASSERT (auth_key->auth_type == BFD_AUTH_TYPE_keyed_sha1 ||
auth_key->auth_type == BFD_AUTH_TYPE_meticulous_keyed_sha1);
@@ -1634,6 +1727,11 @@ bfd_verify_pkt_auth_key_sha1 (vlib_main_t *vm, const bfd_pkt_t *pkt,
op.len = sizeof (*with_sha1);
op.digest = calculated_hash;
vnet_crypto_process_ops (vm, &op, 1);
+
+ /* Restore the modified data within the packet */
+ clib_memcpy (with_sha1->sha1_auth.hash, hash_from_packet,
+ sizeof (with_sha1->sha1_auth.hash));
+
if (0 ==
memcmp (calculated_hash, hash_from_packet, sizeof (calculated_hash)))
{
@@ -1662,18 +1760,14 @@ bfd_verify_pkt_auth_key (vlib_main_t * vm, const bfd_pkt_t * pkt,
bfd_auth_type_str (auth_key->auth_type));
return 0;
case BFD_AUTH_TYPE_simple_password:
- vlib_log_err (bm->log_class,
- "internal error, not implemented, unexpected auth_type=%d:%s",
- auth_key->auth_type,
- bfd_auth_type_str (auth_key->auth_type));
- return 0;
+ /* fallthrough */
case BFD_AUTH_TYPE_keyed_md5:
/* fallthrough */
case BFD_AUTH_TYPE_meticulous_keyed_md5:
- vlib_log_err
- (bm->log_class,
- "internal error, not implemented, unexpected auth_type=%d:%s",
- auth_key->auth_type, bfd_auth_type_str (auth_key->auth_type));
+ vlib_log_err (
+ bm->log_class,
+ "internal error, not implemented, unexpected auth_type=%d:%s",
+ auth_key->auth_type, bfd_auth_type_str (auth_key->auth_type));
return 0;
case BFD_AUTH_TYPE_keyed_sha1:
/* fallthrough */
@@ -1780,8 +1874,8 @@ bfd_verify_pkt_auth (vlib_main_t * vm, const bfd_pkt_t * pkt, u16 pkt_size,
return 0;
}
-void
-bfd_consume_pkt (vlib_main_t * vm, bfd_main_t * bm, const bfd_pkt_t * pkt,
+bfd_error_t
+bfd_consume_pkt (vlib_main_t *vm, bfd_main_t *bm, const bfd_pkt_t *pkt,
u32 bs_idx)
{
bfd_lock_check (bm);
@@ -1789,7 +1883,7 @@ bfd_consume_pkt (vlib_main_t * vm, bfd_main_t * bm, const bfd_pkt_t * pkt,
bfd_session_t *bs = bfd_find_session_by_idx (bm, bs_idx);
if (!bs || (pkt->your_disc && pkt->your_disc != bs->local_discr))
{
- return;
+ return BFD_ERROR_YOUR_DISC;
}
BFD_DBG ("Scanning bfd packet, bs_idx=%d", bs->bs_idx);
bs->remote_discr = pkt->my_disc;
@@ -1834,11 +1928,9 @@ bfd_consume_pkt (vlib_main_t * vm, bfd_main_t * bm, const bfd_pkt_t * pkt,
bs->remote_desired_min_tx_nsec =
bfd_usec_to_nsec (clib_net_to_host_u32 (pkt->des_min_tx));
bs->remote_detect_mult = pkt->head.detect_mult;
- bfd_set_remote_required_min_rx (bm, bs, now,
- clib_net_to_host_u32 (pkt->req_min_rx));
- bfd_set_remote_required_min_echo_rx (bm, bs, now,
- clib_net_to_host_u32
- (pkt->req_min_echo_rx));
+ bfd_set_remote_required_min_rx (bs, clib_net_to_host_u32 (pkt->req_min_rx));
+ bfd_set_remote_required_min_echo_rx (
+ bs, clib_net_to_host_u32 (pkt->req_min_echo_rx));
if (bfd_pkt_get_final (pkt))
{
if (BFD_POLL_IN_PROGRESS == bs->poll_state)
@@ -1847,10 +1939,12 @@ bfd_consume_pkt (vlib_main_t * vm, bfd_main_t * bm, const bfd_pkt_t * pkt,
bfd_set_poll_state (bs, BFD_POLL_NOT_NEEDED);
if (BFD_STATE_up == bs->local_state)
{
- bfd_set_effective_required_min_rx (bm, bs,
- clib_max (bs->echo *
- bm->min_required_min_rx_while_echo_nsec,
- bs->config_required_min_rx_nsec));
+ bfd_set_effective_desired_min_tx (
+ bm, bs, now, bs->config_desired_min_tx_nsec);
+ bfd_set_effective_required_min_rx (
+ bs,
+ clib_max (bs->echo * bm->min_required_min_rx_while_echo_nsec,
+ bs->config_required_min_rx_nsec));
}
}
else if (BFD_POLL_IN_PROGRESS_AND_QUEUED == bs->poll_state)
@@ -1875,7 +1969,7 @@ bfd_consume_pkt (vlib_main_t * vm, bfd_main_t * bm, const bfd_pkt_t * pkt,
{
BFD_DBG ("Session is admin-down, ignoring packet, bs_idx=%u",
bs->bs_idx);
- return;
+ return BFD_ERROR_ADMIN_DOWN;
}
if (BFD_STATE_admin_down == bs->remote_state)
{
@@ -1912,10 +2006,11 @@ bfd_consume_pkt (vlib_main_t * vm, bfd_main_t * bm, const bfd_pkt_t * pkt,
bfd_set_state (vm, bm, bs, BFD_STATE_down, 0);
}
}
+ return BFD_ERROR_NONE;
}
-int
-bfd_consume_echo_pkt (vlib_main_t * vm, bfd_main_t * bm, vlib_buffer_t * b)
+bfd_session_t *
+bfd_consume_echo_pkt (vlib_main_t *vm, bfd_main_t *bm, vlib_buffer_t *b)
{
bfd_echo_pkt_t *pkt = NULL;
if (b->current_length != sizeof (*pkt))
@@ -1935,7 +2030,7 @@ bfd_consume_echo_pkt (vlib_main_t * vm, bfd_main_t * bm, vlib_buffer_t * b)
if (checksum != pkt->checksum)
{
BFD_DBG ("Invalid echo packet, checksum mismatch");
- return 1;
+ return 0;
}
u64 now = bfd_time_now_nsec (vm, NULL);
if (pkt->expire_time_nsec < now)
@@ -1947,7 +2042,7 @@ bfd_consume_echo_pkt (vlib_main_t * vm, bfd_main_t * bm, vlib_buffer_t * b)
{
bs->echo_last_rx_nsec = now;
}
- return 1;
+ return bs;
}
u8 *
@@ -2020,22 +2115,27 @@ bfd_auth_activate (bfd_session_t * bs, u32 conf_key_id,
bfd_auth_key_t *key = pool_elt_at_index (bm->auth_keys, key_idx);
if (is_delayed)
{
- if (bs->auth.next_key == key)
+ if (bs->auth.next_key == key && bs->auth.next_bfd_key_id == bfd_key_id)
{
/* already using this key, no changes required */
return 0;
}
- bs->auth.next_key = key;
+ if (bs->auth.next_key != key)
+ {
+ ++key->use_count;
+ bs->auth.next_key = key;
+ }
bs->auth.next_bfd_key_id = bfd_key_id;
bs->auth.is_delayed = 1;
}
else
{
- if (bs->auth.curr_key == key)
+ if (bs->auth.curr_key == key && bs->auth.curr_bfd_key_id == bfd_key_id)
{
/* already using this key, no changes required */
return 0;
}
+ ++key->use_count;
if (bs->auth.curr_key)
{
--bs->auth.curr_key->use_count;
@@ -2044,7 +2144,6 @@ bfd_auth_activate (bfd_session_t * bs, u32 conf_key_id,
bs->auth.curr_bfd_key_id = bfd_key_id;
bs->auth.is_delayed = 0;
}
- ++key->use_count;
BFD_DBG ("\nSession auth modified: %U", format_bfd_session, bs);
vlib_log_info (bm->log_class, "session auth modified: %U",
format_bfd_session_brief, bs);
diff --git a/src/vnet/bfd/bfd_main.h b/src/vnet/bfd/bfd_main.h
index 0bdcfb87622..1d4617e1d7c 100644
--- a/src/vnet/bfd/bfd_main.h
+++ b/src/vnet/bfd/bfd_main.h
@@ -258,7 +258,7 @@ typedef enum
} bfd_listen_event_e;
/**
- * session nitification call back function type
+ * session notification call back function type
*/
typedef void (*bfd_notify_fn_t) (bfd_listen_event_e, const bfd_session_t *);
@@ -322,15 +322,27 @@ typedef struct
vlib_log_class_t log_class;
u16 msg_id_base;
+
+ vlib_combined_counter_main_t rx_counter;
+ vlib_combined_counter_main_t rx_echo_counter;
+ vlib_combined_counter_main_t tx_counter;
+ vlib_combined_counter_main_t tx_echo_counter;
} bfd_main_t;
extern bfd_main_t bfd_main;
/** Packet counters */
-#define foreach_bfd_error(F) \
- F (NONE, "good bfd packets (processed)") \
- F (BAD, "invalid bfd packets") \
- F (DISABLED, "bfd packets received on disabled interfaces")
+#define foreach_bfd_error(F) \
+ F (NONE, "good bfd packets (processed)") \
+ F (BAD, "invalid bfd packets") \
+ F (DISABLED, "bfd packets received on disabled interfaces") \
+ F (VERSION, "version") \
+ F (LENGTH, "length") \
+ F (DETECT_MULTI, "detect-multi") \
+ F (MULTI_POINT, "multi-point") \
+ F (MY_DISC, "my-disc") \
+ F (YOUR_DISC, "your-disc") \
+ F (ADMIN_DOWN, "session admin-down")
typedef enum
{
@@ -354,7 +366,6 @@ typedef enum
BFD_EVENT_CONFIG_CHANGED,
} bfd_process_event_e;
-/* *INDENT-OFF* */
/** echo packet structure */
typedef CLIB_PACKED (struct {
/** local discriminator */
@@ -364,7 +375,6 @@ typedef CLIB_PACKED (struct {
/** checksum - based on discriminator, local secret and expire time */
u64 checksum;
}) bfd_echo_pkt_t;
-/* *INDENT-ON* */
static inline void
bfd_lock (bfd_main_t * bm)
@@ -412,17 +422,17 @@ void bfd_put_session (bfd_main_t * bm, bfd_session_t * bs);
bfd_session_t *bfd_find_session_by_idx (bfd_main_t * bm, uword bs_idx);
bfd_session_t *bfd_find_session_by_disc (bfd_main_t * bm, u32 disc);
void bfd_session_start (bfd_main_t * bm, bfd_session_t * bs);
-void bfd_consume_pkt (vlib_main_t * vm, bfd_main_t * bm,
- const bfd_pkt_t * bfd, u32 bs_idx);
-int bfd_consume_echo_pkt (vlib_main_t * vm, bfd_main_t * bm,
- vlib_buffer_t * b);
-int bfd_verify_pkt_common (const bfd_pkt_t * pkt);
+void bfd_session_stop (bfd_main_t *bm, bfd_session_t *bs);
+bfd_error_t bfd_consume_pkt (vlib_main_t *vm, bfd_main_t *bm,
+ const bfd_pkt_t *bfd, u32 bs_idx);
+bfd_session_t *bfd_consume_echo_pkt (vlib_main_t *vm, bfd_main_t *bm,
+ vlib_buffer_t *b);
+bfd_error_t bfd_verify_pkt_common (const bfd_pkt_t *pkt);
int bfd_verify_pkt_auth (vlib_main_t * vm, const bfd_pkt_t * pkt,
u16 pkt_size, bfd_session_t * bs);
void bfd_event (bfd_main_t * bm, bfd_session_t * bs);
-void bfd_init_final_control_frame (vlib_main_t * vm, vlib_buffer_t * b,
- bfd_main_t * bm, bfd_session_t * bs,
- int is_local);
+void bfd_init_final_control_frame (vlib_main_t *vm, vlib_buffer_t *b,
+ bfd_session_t *bs);
u8 *format_bfd_session (u8 * s, va_list * args);
u8 *format_bfd_session_brief (u8 * s, va_list * args);
u8 *format_bfd_auth_key (u8 * s, va_list * args);
@@ -464,6 +474,17 @@ const char *bfd_poll_state_string (bfd_poll_state_e state);
*/
void bfd_register_listener (bfd_notify_fn_t fn);
+typedef enum
+{
+ BFD_TX_IP4_ARP,
+ BFD_TX_IP6_NDP,
+ BFD_TX_IP4_REWRITE,
+ BFD_TX_IP6_REWRITE,
+ BFD_TX_IP4_MIDCHAIN,
+ BFD_TX_IP6_MIDCHAIN,
+ BFD_TX_N_NEXT,
+} bfd_tx_next_t;
+
#endif /* __included_bfd_main_h__ */
/*
diff --git a/src/vnet/bfd/bfd_protocol.h b/src/vnet/bfd/bfd_protocol.h
index 210c561b430..16ee3231ef0 100644
--- a/src/vnet/bfd/bfd_protocol.h
+++ b/src/vnet/bfd/bfd_protocol.h
@@ -46,14 +46,11 @@ typedef enum
u32 bfd_max_key_len_for_auth_type (bfd_auth_type_e auth_type);
const char *bfd_auth_type_str (bfd_auth_type_e auth_type);
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 type;
u8 len;
}) bfd_auth_common_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
/*
* 4.4. Keyed SHA1 and Meticulous Keyed SHA1 Authentication Section Format
@@ -88,9 +85,7 @@ typedef CLIB_PACKED (struct {
*/
u8 hash[20];
}) bfd_auth_sha1_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
/*
* The Mandatory Section of a BFD Control packet has the following
@@ -125,21 +120,16 @@ typedef CLIB_PACKED (struct {
u32 req_min_rx;
u32 req_min_echo_rx;
}) bfd_pkt_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
bfd_pkt_t pkt;
bfd_auth_common_t common_auth;
}) bfd_pkt_with_common_auth_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
bfd_pkt_t pkt;
bfd_auth_sha1_t sha1_auth;
}) bfd_pkt_with_sha1_auth_t;
-/* *INDENT-ON* */
u8 bfd_pkt_get_version (const bfd_pkt_t * pkt);
void bfd_pkt_set_version (bfd_pkt_t * pkt, int version);
diff --git a/src/vnet/bfd/bfd_udp.c b/src/vnet/bfd/bfd_udp.c
index 4ad5660fdf6..ec42cda1bc4 100644
--- a/src/vnet/bfd/bfd_udp.c
+++ b/src/vnet/bfd/bfd_udp.c
@@ -35,10 +35,20 @@
#include <vnet/dpo/receive_dpo.h>
#include <vnet/fib/fib_entry.h>
#include <vnet/fib/fib_table.h>
+#include <vlib/stats/stats.h>
#include <vnet/bfd/bfd_debug.h>
#include <vnet/bfd/bfd_udp.h>
#include <vnet/bfd/bfd_main.h>
#include <vnet/bfd/bfd_api.h>
+#include <vnet/bfd/bfd.api_enum.h>
+
+#define F(sym, str) \
+ STATIC_ASSERT ((int) BFD_ERROR_##sym == (int) BFD_UDP_ERROR_##sym, \
+ "BFD error enums mismatch");
+foreach_bfd_error (F)
+#undef F
+ STATIC_ASSERT ((int) BFD_N_ERROR <= (int) BFD_UDP_N_ERROR,
+ "BFD error enum sizes mismatch");
typedef struct
{
@@ -52,24 +62,14 @@ typedef struct
int echo_source_is_set;
/* loopback interface used to get echo source ip */
u32 echo_source_sw_if_index;
- /* node index of "ip4-arp" node */
- u32 ip4_arp_idx;
- /* node index of "ip6-discover-neighbor" node */
- u32 ip6_ndp_idx;
- /* node index of "ip4-rewrite" node */
- u32 ip4_rewrite_idx;
- /* node index of "ip6-rewrite" node */
- u32 ip6_rewrite_idx;
- /* node index of "ip4-midchain" node */
- u32 ip4_midchain_idx;
- /* node index of "ip6-midchain" node */
- u32 ip6_midchain_idx;
/* log class */
vlib_log_class_t log_class;
/* number of active udp4 sessions */
u32 udp4_sessions_count;
+ u32 udp4_sessions_count_stat_seg_entry;
/* number of active udp6 sessions */
u32 udp6_sessions_count;
+ u32 udp6_sessions_count_stat_seg_entry;
} bfd_udp_main_t;
static vlib_node_registration_t bfd_udp4_input_node;
@@ -79,6 +79,14 @@ static vlib_node_registration_t bfd_udp_echo6_input_node;
bfd_udp_main_t bfd_udp_main;
+void
+bfd_udp_update_stat_segment_entry (u32 entry, u64 value)
+{
+ vlib_stats_segment_lock ();
+ vlib_stats_set_gauge (entry, value);
+ vlib_stats_segment_unlock ();
+}
+
vnet_api_error_t
bfd_udp_set_echo_source (u32 sw_if_index)
{
@@ -94,7 +102,7 @@ bfd_udp_set_echo_source (u32 sw_if_index)
}
vnet_api_error_t
-bfd_udp_del_echo_source (u32 sw_if_index)
+bfd_udp_del_echo_source ()
{
bfd_udp_main.echo_source_sw_if_index = ~0;
bfd_udp_main.echo_source_is_set = 0;
@@ -123,7 +131,6 @@ bfd_udp_is_echo_available (bfd_transport_e transport)
{
ip4_main_t *im = &ip4_main;
ip_interface_address_t *ia = NULL;
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im->lookup_main, ia,
bfd_udp_main.echo_source_sw_if_index,
0 /* honor unnumbered */, ({
@@ -132,13 +139,11 @@ bfd_udp_is_echo_available (bfd_transport_e transport)
return 1;
}
}));
- /* *INDENT-ON* */
}
else if (BFD_TRANSPORT_UDP6 == transport)
{
ip6_main_t *im = &ip6_main;
ip_interface_address_t *ia = NULL;
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im->lookup_main, ia,
bfd_udp_main.echo_source_sw_if_index,
0 /* honor unnumbered */, ({
@@ -147,7 +152,6 @@ bfd_udp_is_echo_available (bfd_transport_e transport)
return 1;
}
}));
- /* *INDENT-ON* */
}
}
BFD_DBG ("No usable IP address for UDP echo - echo not available");
@@ -179,7 +183,6 @@ bfd_udp_get_echo_src_ip4 (ip4_address_t * addr)
ip_interface_address_t *ia = NULL;
ip4_main_t *im = &ip4_main;
- /* *INDENT-OFF* */
foreach_ip_interface_address (
&im->lookup_main, ia, bfd_udp_main.echo_source_sw_if_index,
0 /* honor unnumbered */, ({
@@ -197,7 +200,6 @@ bfd_udp_get_echo_src_ip4 (ip4_address_t * addr)
return 1;
}
}));
- /* *INDENT-ON* */
BFD_ERR ("cannot find ip4 address, no usable address found");
return 0;
}
@@ -213,7 +215,6 @@ bfd_udp_get_echo_src_ip6 (ip6_address_t * addr)
ip_interface_address_t *ia = NULL;
ip6_main_t *im = &ip6_main;
- /* *INDENT-OFF* */
foreach_ip_interface_address (
&im->lookup_main, ia, bfd_udp_main.echo_source_sw_if_index,
0 /* honor unnumbered */, ({
@@ -226,7 +227,6 @@ bfd_udp_get_echo_src_ip6 (ip6_address_t * addr)
return 1;
}
}));
- /* *INDENT-ON* */
BFD_ERR ("cannot find ip6 address, no usable address found");
return 0;
}
@@ -372,13 +372,25 @@ bfd_add_udp6_transport (vlib_main_t * vm, u32 bi, const bfd_session_t * bs,
}
static void
-bfd_create_frame_to_next_node (vlib_main_t * vm, u32 bi, u32 next_node)
+bfd_create_frame_to_next_node (vlib_main_t *vm, vlib_node_runtime_t *rt,
+ u32 bi, const bfd_session_t *bs, u32 next,
+ vlib_combined_counter_main_t *tx_counter)
{
- vlib_frame_t *f = vlib_get_frame_to_node (vm, next_node);
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ vlib_node_t *from_node = vlib_get_node (vm, rt->node_index);
+ ASSERT (next < vec_len (from_node->next_nodes));
+ u32 to_node_index = from_node->next_nodes[next];
+ vlib_frame_t *f = vlib_get_frame_to_node (vm, to_node_index);
u32 *to_next = vlib_frame_vector_args (f);
to_next[0] = bi;
f->n_vectors = 1;
- vlib_put_frame_to_node (vm, next_node, f);
+ if (b->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ f->frame_flags |= VLIB_NODE_FLAG_TRACE;
+ }
+ vlib_put_frame_to_node (vm, to_node_index, f);
+ vlib_increment_combined_counter (tx_counter, vm->thread_index, bs->bs_idx, 1,
+ vlib_buffer_length_in_chain (vm, b));
}
int
@@ -398,10 +410,10 @@ bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node)
switch (bs->transport)
{
case BFD_TRANSPORT_UDP4:
- *next_node = bfd_udp_main.ip4_arp_idx;
+ *next_node = BFD_TX_IP4_ARP;
return 1;
case BFD_TRANSPORT_UDP6:
- *next_node = bfd_udp_main.ip6_ndp_idx;
+ *next_node = BFD_TX_IP6_NDP;
return 1;
}
break;
@@ -409,10 +421,10 @@ bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node)
switch (bs->transport)
{
case BFD_TRANSPORT_UDP4:
- *next_node = bfd_udp_main.ip4_rewrite_idx;
+ *next_node = BFD_TX_IP4_REWRITE;
return 1;
case BFD_TRANSPORT_UDP6:
- *next_node = bfd_udp_main.ip6_rewrite_idx;
+ *next_node = BFD_TX_IP6_REWRITE;
return 1;
}
break;
@@ -420,10 +432,10 @@ bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node)
switch (bs->transport)
{
case BFD_TRANSPORT_UDP4:
- *next_node = bfd_udp_main.ip4_midchain_idx;
+ *next_node = BFD_TX_IP4_MIDCHAIN;
return 1;
case BFD_TRANSPORT_UDP6:
- *next_node = bfd_udp_main.ip6_midchain_idx;
+ *next_node = BFD_TX_IP6_MIDCHAIN;
return 1;
}
break;
@@ -435,27 +447,35 @@ bfd_udp_calc_next_node (const struct bfd_session_s *bs, u32 * next_node)
}
int
-bfd_transport_udp4 (vlib_main_t * vm, u32 bi, const struct bfd_session_s *bs)
+bfd_transport_udp4 (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi,
+ const struct bfd_session_s *bs, int is_echo)
{
u32 next_node;
int rv = bfd_udp_calc_next_node (bs, &next_node);
+ bfd_main_t *bm = bfd_udp_main.bfd_main;
if (rv)
{
- bfd_create_frame_to_next_node (vm, bi, next_node);
+ bfd_create_frame_to_next_node (vm, rt, bi, bs, next_node,
+ is_echo ? &bm->tx_echo_counter :
+ &bm->tx_counter);
}
return rv;
}
int
-bfd_transport_udp6 (vlib_main_t * vm, u32 bi, const struct bfd_session_s *bs)
+bfd_transport_udp6 (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi,
+ const struct bfd_session_s *bs, int is_echo)
{
u32 next_node;
int rv = bfd_udp_calc_next_node (bs, &next_node);
+ bfd_main_t *bm = bfd_udp_main.bfd_main;
if (rv)
{
- bfd_create_frame_to_next_node (vm, bi, next_node);
+ bfd_create_frame_to_next_node (vm, rt, bi, bs, next_node,
+ is_echo ? &bm->tx_echo_counter :
+ &bm->tx_counter);
}
- return 1;
+ return rv;
}
static bfd_session_t *
@@ -503,6 +523,7 @@ bfd_udp_add_session_internal (vlib_main_t * vm, bfd_udp_main_t * bum,
}
bfd_udp_session_t *bus = &bs->udp;
clib_memset (bus, 0, sizeof (*bus));
+ bus->adj_index = ADJ_INDEX_INVALID;
bfd_udp_key_t *key = &bus->key;
bfd_udp_key_init (key, sw_if_index, local_addr, peer_addr);
const bfd_session_t *tmp = bfd_lookup_session (bum, key);
@@ -521,15 +542,21 @@ bfd_udp_add_session_internal (vlib_main_t * vm, bfd_udp_main_t * bum,
&key->peer_addr, IP46_TYPE_ANY);
vlib_log_info (bum->log_class, "create BFD session: %U",
format_bfd_session, bs);
+ const ip46_address_t *peer =
+ (vnet_sw_interface_is_p2p (vnet_get_main (), key->sw_if_index) ?
+ &zero_addr :
+ &key->peer_addr);
if (BFD_TRANSPORT_UDP4 == t)
{
bus->adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP4, VNET_LINK_IP4,
- &key->peer_addr,
- key->sw_if_index);
+ peer, key->sw_if_index);
BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP4, VNET_LINK_IP4, %U, %d) "
- "returns %d", format_ip46_address, &key->peer_addr,
- IP46_TYPE_ANY, key->sw_if_index, bus->adj_index);
+ "returns %d",
+ format_ip46_address, peer, IP46_TYPE_ANY, key->sw_if_index,
+ bus->adj_index);
++bum->udp4_sessions_count;
+ bfd_udp_update_stat_segment_entry (
+ bum->udp4_sessions_count_stat_seg_entry, bum->udp4_sessions_count);
if (1 == bum->udp4_sessions_count)
{
udp_register_dst_port (vm, UDP_DST_PORT_bfd4,
@@ -541,12 +568,14 @@ bfd_udp_add_session_internal (vlib_main_t * vm, bfd_udp_main_t * bum,
else
{
bus->adj_index = adj_nbr_add_or_lock (FIB_PROTOCOL_IP6, VNET_LINK_IP6,
- &key->peer_addr,
- key->sw_if_index);
+ peer, key->sw_if_index);
BFD_DBG ("adj_nbr_add_or_lock(FIB_PROTOCOL_IP6, VNET_LINK_IP6, %U, %d) "
- "returns %d", format_ip46_address, &key->peer_addr,
- IP46_TYPE_ANY, key->sw_if_index, bus->adj_index);
+ "returns %d",
+ format_ip46_address, peer, IP46_TYPE_ANY, key->sw_if_index,
+ bus->adj_index);
++bum->udp6_sessions_count;
+ bfd_udp_update_stat_segment_entry (
+ bum->udp6_sessions_count_stat_seg_entry, bum->udp6_sessions_count);
if (1 == bum->udp6_sessions_count)
{
udp_register_dst_port (vm, UDP_DST_PORT_bfd6,
@@ -568,8 +597,6 @@ bfd_udp_validate_api_input (u32 sw_if_index,
bfd_udp_main_t *bum = &bfd_udp_main;
vnet_sw_interface_t *sw_if =
vnet_get_sw_interface_or_null (bfd_udp_main.vnet_main, sw_if_index);
- u8 local_ip_valid = 0;
- ip_interface_address_t *ia = NULL;
if (!sw_if)
{
vlib_log_err (bum->log_class,
@@ -585,21 +612,6 @@ bfd_udp_validate_api_input (u32 sw_if_index,
"IP family mismatch (local is ipv4, peer is ipv6)");
return VNET_API_ERROR_INVALID_ARGUMENT;
}
- ip4_main_t *im = &ip4_main;
-
- /* *INDENT-OFF* */
- foreach_ip_interface_address (
- &im->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, ({
- ip4_address_t *x =
- ip_interface_address_get_address (&im->lookup_main, ia);
- if (x->as_u32 == local_addr->ip4.as_u32)
- {
- /* valid address for this interface */
- local_ip_valid = 1;
- break;
- }
- }));
- /* *INDENT-ON* */
}
else
{
@@ -609,44 +621,6 @@ bfd_udp_validate_api_input (u32 sw_if_index,
"IP family mismatch (local is ipv6, peer is ipv4)");
return VNET_API_ERROR_INVALID_ARGUMENT;
}
-
- if (ip6_address_is_link_local_unicast (&local_addr->ip6))
- {
- const ip6_address_t *ll_addr;
- ll_addr = ip6_get_link_local_address (sw_if_index);
- if (ip6_address_is_equal (ll_addr, &local_addr->ip6))
- {
- /* valid address for this interface */
- local_ip_valid = 1;
- }
- }
- else
- {
- ip6_main_t *im = &ip6_main;
- /* *INDENT-OFF* */
- foreach_ip_interface_address (
- &im->lookup_main, ia, sw_if_index, 0 /* honor unnumbered */, ({
- ip6_address_t *x =
- ip_interface_address_get_address (&im->lookup_main, ia);
- if (local_addr->ip6.as_u64[0] == x->as_u64[0] &&
- local_addr->ip6.as_u64[1] == x->as_u64[1])
- {
- /* valid address for this interface */
- local_ip_valid = 1;
- break;
- }
- }));
- /* *INDENT-ON* */
- }
- }
-
- if (!local_ip_valid)
- {
- vlib_log_err (bum->log_class,
- "local address %U not found on interface with index %u",
- format_ip46_address, local_addr, IP46_TYPE_ANY,
- sw_if_index);
- return VNET_API_ERROR_ADDRESS_NOT_FOUND_FOR_INTERFACE;
}
return 0;
@@ -685,9 +659,8 @@ bfd_udp_find_session_by_api_input (u32 sw_if_index,
static vnet_api_error_t
bfd_api_verify_common (u32 sw_if_index, u32 desired_min_tx_usec,
- u32 required_min_rx_usec, u8 detect_mult,
- const ip46_address_t * local_addr,
- const ip46_address_t * peer_addr)
+ u8 detect_mult, const ip46_address_t *local_addr,
+ const ip46_address_t *peer_addr)
{
bfd_udp_main_t *bum = &bfd_udp_main;
vnet_api_error_t rv =
@@ -714,12 +687,15 @@ bfd_udp_del_session_internal (vlib_main_t * vm, bfd_session_t * bs)
{
bfd_udp_main_t *bum = &bfd_udp_main;
BFD_DBG ("free bfd-udp session, bs_idx=%d", bs->bs_idx);
+ bfd_session_stop (bum->bfd_main, bs);
mhash_unset (&bum->bfd_session_idx_by_bfd_key, &bs->udp.key, NULL);
adj_unlock (bs->udp.adj_index);
switch (bs->transport)
{
case BFD_TRANSPORT_UDP4:
--bum->udp4_sessions_count;
+ bfd_udp_update_stat_segment_entry (
+ bum->udp4_sessions_count_stat_seg_entry, bum->udp4_sessions_count);
if (!bum->udp4_sessions_count)
{
udp_unregister_dst_port (vm, UDP_DST_PORT_bfd4, 1);
@@ -728,6 +704,8 @@ bfd_udp_del_session_internal (vlib_main_t * vm, bfd_session_t * bs)
break;
case BFD_TRANSPORT_UDP6:
--bum->udp6_sessions_count;
+ bfd_udp_update_stat_segment_entry (
+ bum->udp6_sessions_count_stat_seg_entry, bum->udp6_sessions_count);
if (!bum->udp6_sessions_count)
{
udp_unregister_dst_port (vm, UDP_DST_PORT_bfd6, 0);
@@ -738,33 +716,26 @@ bfd_udp_del_session_internal (vlib_main_t * vm, bfd_session_t * bs)
bfd_put_session (bum->bfd_main, bs);
}
-vnet_api_error_t
-bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr,
- const ip46_address_t * peer_addr,
- u32 desired_min_tx_usec, u32 required_min_rx_usec,
- u8 detect_mult, u8 is_authenticated, u32 conf_key_id,
- u8 bfd_key_id)
+static vnet_api_error_t
+bfd_udp_add_and_start_session (u32 sw_if_index,
+ const ip46_address_t *local_addr,
+ const ip46_address_t *peer_addr,
+ u32 desired_min_tx_usec,
+ u32 required_min_rx_usec, u8 detect_mult,
+ u8 is_authenticated, u32 conf_key_id,
+ u8 bfd_key_id)
{
- bfd_main_t *bm = &bfd_main;
- bfd_lock (bm);
-
- vnet_api_error_t rv =
- bfd_api_verify_common (sw_if_index, desired_min_tx_usec,
- required_min_rx_usec, detect_mult,
- local_addr, peer_addr);
bfd_session_t *bs = NULL;
- if (!rv)
- {
- rv =
- bfd_udp_add_session_internal (vlib_get_main (), &bfd_udp_main,
- sw_if_index, desired_min_tx_usec,
- required_min_rx_usec, detect_mult,
- local_addr, peer_addr, &bs);
- }
+ vnet_api_error_t rv;
+
+ rv = bfd_udp_add_session_internal (
+ vlib_get_main (), &bfd_udp_main, sw_if_index, desired_min_tx_usec,
+ required_min_rx_usec, detect_mult, local_addr, peer_addr, &bs);
+
if (!rv && is_authenticated)
{
rv = bfd_auth_activate (bs, conf_key_id, bfd_key_id,
- 0 /* is not delayed */ );
+ 0 /* is not delayed */);
if (rv)
{
bfd_udp_del_session_internal (vlib_get_main (), bs);
@@ -775,15 +746,67 @@ bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr,
bfd_session_start (bfd_udp_main.bfd_main, bs);
}
- bfd_unlock (bm);
return rv;
}
vnet_api_error_t
-bfd_udp_mod_session (u32 sw_if_index,
- const ip46_address_t * local_addr,
+bfd_udp_add_session (u32 sw_if_index, const ip46_address_t * local_addr,
const ip46_address_t * peer_addr,
- u32 desired_min_tx_usec,
+ u32 desired_min_tx_usec, u32 required_min_rx_usec,
+ u8 detect_mult, u8 is_authenticated, u32 conf_key_id,
+ u8 bfd_key_id)
+{
+ bfd_main_t *bm = &bfd_main;
+ bfd_lock (bm);
+
+ vnet_api_error_t rv = bfd_api_verify_common (
+ sw_if_index, desired_min_tx_usec, detect_mult, local_addr, peer_addr);
+
+ if (!rv)
+ rv = bfd_udp_add_and_start_session (
+ sw_if_index, local_addr, peer_addr, desired_min_tx_usec,
+ required_min_rx_usec, detect_mult, is_authenticated, conf_key_id,
+ bfd_key_id);
+
+ bfd_unlock (bm);
+ return rv;
+}
+
+vnet_api_error_t
+bfd_udp_upd_session (u32 sw_if_index, const ip46_address_t *local_addr,
+ const ip46_address_t *peer_addr, u32 desired_min_tx_usec,
+ u32 required_min_rx_usec, u8 detect_mult,
+ u8 is_authenticated, u32 conf_key_id, u8 bfd_key_id)
+{
+ bfd_main_t *bm = &bfd_main;
+ bfd_lock (bm);
+
+ vnet_api_error_t rv = bfd_api_verify_common (
+ sw_if_index, desired_min_tx_usec, detect_mult, local_addr, peer_addr);
+ if (!rv)
+ {
+ bfd_session_t *bs = NULL;
+
+ rv = bfd_udp_find_session_by_api_input (sw_if_index, local_addr,
+ peer_addr, &bs);
+ if (VNET_API_ERROR_BFD_ENOENT == rv)
+ rv = bfd_udp_add_and_start_session (
+ sw_if_index, local_addr, peer_addr, desired_min_tx_usec,
+ required_min_rx_usec, detect_mult, is_authenticated, conf_key_id,
+ bfd_key_id);
+ else
+ rv = bfd_session_set_params (bfd_udp_main.bfd_main, bs,
+ desired_min_tx_usec, required_min_rx_usec,
+ detect_mult);
+ }
+
+ bfd_unlock (bm);
+ return rv;
+}
+
+vnet_api_error_t
+bfd_udp_mod_session (u32 sw_if_index, const ip46_address_t *local_addr,
+ const ip46_address_t *peer_addr, u32 desired_min_tx_usec,
u32 required_min_rx_usec, u8 detect_mult)
{
bfd_session_t *bs = NULL;
@@ -903,29 +926,6 @@ typedef enum
BFD_UDP_INPUT_N_NEXT,
} bfd_udp_input_next_t;
-/* Packet counters - BFD control frames */
-#define foreach_bfd_udp_error(F) \
- F (NONE, "good bfd packets (processed)") \
- F (BAD, "invalid bfd packets")
-
-#define F(sym, string) static char BFD_UDP_ERR_##sym##_STR[] = string;
-foreach_bfd_udp_error (F);
-#undef F
-
-static char *bfd_udp_error_strings[] = {
-#define F(sym, string) BFD_UDP_ERR_##sym##_STR,
- foreach_bfd_udp_error (F)
-#undef F
-};
-
-typedef enum
-{
-#define F(sym, str) BFD_UDP_ERROR_##sym,
- foreach_bfd_udp_error (F)
-#undef F
- BFD_UDP_N_ERROR,
-} bfd_udp_error_t;
-
typedef enum
{
BFD_UDP_ECHO_INPUT_NEXT_NORMAL,
@@ -934,28 +934,12 @@ typedef enum
BFD_UDP_ECHO_INPUT_N_NEXT,
} bfd_udp_echo_input_next_t;
-/* Packet counters - BFD ECHO packets */
-#define foreach_bfd_udp_echo_error(F) \
- F (NONE, "good bfd echo packets (processed)") \
- F (BAD, "invalid bfd echo packets")
-
-#define F(sym, string) static char BFD_UDP_ECHO_ERR_##sym##_STR[] = string;
-foreach_bfd_udp_echo_error (F);
-#undef F
-
-static char *bfd_udp_echo_error_strings[] = {
-#define F(sym, string) BFD_UDP_ECHO_ERR_##sym##_STR,
- foreach_bfd_udp_echo_error (F)
-#undef F
-};
-
-typedef enum
+static_always_inline vl_counter_bfd_udp_enum_t
+bfd_error_to_udp (bfd_error_t e)
{
-#define F(sym, str) BFD_UDP_ECHO_ERROR_##sym,
- foreach_bfd_udp_echo_error (F)
-#undef F
- BFD_UDP_ECHO_N_ERROR,
-} bfd_udp_echo_error_t;
+ /* The UDP error is a super set of the proto independent errors */
+ return ((vl_counter_bfd_udp_enum_t) e);
+}
static void
bfd_udp4_find_headers (vlib_buffer_t * b, ip4_header_t ** ip4,
@@ -963,7 +947,7 @@ bfd_udp4_find_headers (vlib_buffer_t * b, ip4_header_t ** ip4,
{
/* sanity check first */
const i32 start = vnet_buffer (b)->l3_hdr_offset;
- if (start < 0 && start < sizeof (b->pre_data))
+ if (start < -(signed) sizeof (b->pre_data))
{
BFD_ERR ("Start of ip header is before pre_data, ignoring");
*ip4 = NULL;
@@ -981,9 +965,9 @@ bfd_udp4_find_headers (vlib_buffer_t * b, ip4_header_t ** ip4,
*udp = (udp_header_t *) ((*ip4) + 1);
}
-static bfd_udp_error_t
-bfd_udp4_verify_transport (const ip4_header_t * ip4,
- const udp_header_t * udp, const bfd_session_t * bs)
+static vl_counter_bfd_udp_enum_t
+bfd_udp4_verify_transport (const ip4_header_t *ip4, const udp_header_t *udp,
+ const bfd_session_t *bs)
{
const bfd_udp_session_t *bus = &bs->udp;
const bfd_udp_key_t *key = &bus->key;
@@ -992,21 +976,21 @@ bfd_udp4_verify_transport (const ip4_header_t * ip4,
BFD_ERR ("IPv4 src addr mismatch, got %U, expected %U",
format_ip4_address, ip4->src_address.as_u8, format_ip4_address,
key->peer_addr.ip4.as_u8);
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_SRC_MISMATCH;
}
if (ip4->dst_address.as_u32 != key->local_addr.ip4.as_u32)
{
BFD_ERR ("IPv4 dst addr mismatch, got %U, expected %U",
format_ip4_address, ip4->dst_address.as_u8, format_ip4_address,
key->local_addr.ip4.as_u8);
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_DST_MISMATCH;
}
const u8 expected_ttl = 255;
if (ip4->ttl != expected_ttl)
{
BFD_ERR ("IPv4 unexpected TTL value %u, expected %u", ip4->ttl,
expected_ttl);
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_TTL;
}
if (clib_net_to_host_u16 (udp->src_port) < 49152)
{
@@ -1022,18 +1006,20 @@ typedef struct
bfd_pkt_t pkt;
} bfd_rpc_update_t;
-static void
-bfd_rpc_update_session (vlib_main_t * vm, u32 bs_idx, const bfd_pkt_t * pkt)
+static bfd_error_t
+bfd_rpc_update_session (vlib_main_t *vm, u32 bs_idx, const bfd_pkt_t *pkt)
{
bfd_main_t *bm = &bfd_main;
+ bfd_error_t err;
bfd_lock (bm);
- bfd_consume_pkt (vm, bm, pkt, bs_idx);
+ err = bfd_consume_pkt (vm, bm, pkt, bs_idx);
bfd_unlock (bm);
+
+ return err;
}
-static bfd_udp_error_t
-bfd_udp4_scan (vlib_main_t * vm, vlib_node_runtime_t * rt,
- vlib_buffer_t * b, bfd_session_t ** bs_out)
+static vl_counter_bfd_udp_enum_t
+bfd_udp4_scan (vlib_main_t *vm, vlib_buffer_t *b, bfd_session_t **bs_out)
{
const bfd_pkt_t *pkt = vlib_buffer_get_current (b);
if (sizeof (*pkt) > b->current_length)
@@ -1057,11 +1043,13 @@ bfd_udp4_scan (vlib_main_t * vm, vlib_node_runtime_t * rt,
BFD_ERR
("BFD packet length is larger than udp payload length (%u > %u)",
pkt->head.length, udp_payload_length);
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_LENGTH;
}
- if (!bfd_verify_pkt_common (pkt))
+ vl_counter_bfd_udp_enum_t err;
+ if (BFD_UDP_ERROR_NONE !=
+ (err = bfd_error_to_udp (bfd_verify_pkt_common (pkt))))
{
- return BFD_UDP_ERROR_BAD;
+ return err;
}
bfd_session_t *bs = NULL;
if (pkt->your_disc)
@@ -1086,22 +1074,21 @@ bfd_udp4_scan (vlib_main_t * vm, vlib_node_runtime_t * rt,
if (!bs)
{
BFD_ERR ("BFD session lookup failed - no session matches BFD pkt");
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_NO_SESSION;
}
BFD_DBG ("BFD session found, bs_idx=%u", bs->bs_idx);
if (!bfd_verify_pkt_auth (vm, pkt, b->current_length, bs))
{
BFD_ERR ("Packet verification failed, dropping packet");
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_FAILED_VERIFICATION;
}
- bfd_udp_error_t err;
if (BFD_UDP_ERROR_NONE != (err = bfd_udp4_verify_transport (ip4, udp, bs)))
{
return err;
}
- bfd_rpc_update_session (vm, bs->bs_idx, pkt);
+ err = bfd_error_to_udp (bfd_rpc_update_session (vm, bs->bs_idx, pkt));
*bs_out = bs;
- return BFD_UDP_ERROR_NONE;
+ return err;
}
static void
@@ -1110,7 +1097,7 @@ bfd_udp6_find_headers (vlib_buffer_t * b, ip6_header_t ** ip6,
{
/* sanity check first */
const i32 start = vnet_buffer (b)->l3_hdr_offset;
- if (start < 0 && start < sizeof (b->pre_data))
+ if (start < -(signed) sizeof (b->pre_data))
{
BFD_ERR ("Start of ip header is before pre_data, ignoring");
*ip6 = NULL;
@@ -1136,9 +1123,9 @@ bfd_udp6_find_headers (vlib_buffer_t * b, ip6_header_t ** ip6,
*udp = (udp_header_t *) ((*ip6) + 1);
}
-static bfd_udp_error_t
-bfd_udp6_verify_transport (const ip6_header_t * ip6,
- const udp_header_t * udp, const bfd_session_t * bs)
+static vl_counter_bfd_udp_enum_t
+bfd_udp6_verify_transport (const ip6_header_t *ip6, const udp_header_t *udp,
+ const bfd_session_t *bs)
{
const bfd_udp_session_t *bus = &bs->udp;
const bfd_udp_key_t *key = &bus->key;
@@ -1148,7 +1135,7 @@ bfd_udp6_verify_transport (const ip6_header_t * ip6,
BFD_ERR ("IP src addr mismatch, got %U, expected %U",
format_ip6_address, ip6, format_ip6_address,
&key->peer_addr.ip6);
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_SRC_MISMATCH;
}
if (ip6->dst_address.as_u64[0] != key->local_addr.ip6.as_u64[0] &&
ip6->dst_address.as_u64[1] != key->local_addr.ip6.as_u64[1])
@@ -1156,14 +1143,14 @@ bfd_udp6_verify_transport (const ip6_header_t * ip6,
BFD_ERR ("IP dst addr mismatch, got %U, expected %U",
format_ip6_address, ip6, format_ip6_address,
&key->local_addr.ip6);
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_DST_MISMATCH;
}
const u8 expected_hop_limit = 255;
if (ip6->hop_limit != expected_hop_limit)
{
BFD_ERR ("IPv6 unexpected hop-limit value %u, expected %u",
ip6->hop_limit, expected_hop_limit);
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_TTL;
}
if (clib_net_to_host_u16 (udp->src_port) < 49152)
{
@@ -1173,9 +1160,8 @@ bfd_udp6_verify_transport (const ip6_header_t * ip6,
return BFD_UDP_ERROR_NONE;
}
-static bfd_udp_error_t
-bfd_udp6_scan (vlib_main_t * vm, vlib_node_runtime_t * rt,
- vlib_buffer_t * b, bfd_session_t ** bs_out)
+static vl_counter_bfd_udp_enum_t
+bfd_udp6_scan (vlib_main_t *vm, vlib_buffer_t *b, bfd_session_t **bs_out)
{
const bfd_pkt_t *pkt = vlib_buffer_get_current (b);
if (sizeof (*pkt) > b->current_length)
@@ -1201,9 +1187,11 @@ bfd_udp6_scan (vlib_main_t * vm, vlib_node_runtime_t * rt,
pkt->head.length, udp_payload_length);
return BFD_UDP_ERROR_BAD;
}
- if (!bfd_verify_pkt_common (pkt))
+ vl_counter_bfd_udp_enum_t err;
+ if (BFD_UDP_ERROR_NONE !=
+ (err = bfd_error_to_udp (bfd_verify_pkt_common (pkt))))
{
- return BFD_UDP_ERROR_BAD;
+ return err;
}
bfd_session_t *bs = NULL;
if (pkt->your_disc)
@@ -1230,22 +1218,21 @@ bfd_udp6_scan (vlib_main_t * vm, vlib_node_runtime_t * rt,
if (!bs)
{
BFD_ERR ("BFD session lookup failed - no session matches BFD pkt");
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_NO_SESSION;
}
BFD_DBG ("BFD session found, bs_idx=%u", bs->bs_idx);
if (!bfd_verify_pkt_auth (vm, pkt, b->current_length, bs))
{
BFD_ERR ("Packet verification failed, dropping packet");
- return BFD_UDP_ERROR_BAD;
+ return BFD_UDP_ERROR_FAILED_VERIFICATION;
}
- bfd_udp_error_t err;
if (BFD_UDP_ERROR_NONE != (err = bfd_udp6_verify_transport (ip6, udp, bs)))
{
return err;
}
- bfd_rpc_update_session (vm, bs->bs_idx, pkt);
+ err = bfd_error_to_udp (bfd_rpc_update_session (vm, bs->bs_idx, pkt));
*bs_out = bs;
- return BFD_UDP_ERROR_NONE;
+ return err;
}
/*
@@ -1277,7 +1264,7 @@ bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
/* If this pkt is traced, snapshot the data */
if (b0->flags & VLIB_BUFFER_IS_TRACED)
{
- int len;
+ u64 len;
t0 = vlib_add_trace (vm, rt, b0, sizeof (*t0));
len = (b0->current_length < sizeof (t0->data)) ? b0->current_length
: sizeof (t0->data);
@@ -1289,17 +1276,20 @@ bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
bfd_lock (bm);
if (is_ipv6)
{
- error0 = bfd_udp6_scan (vm, rt, b0, &bs);
+ error0 = bfd_udp6_scan (vm, b0, &bs);
}
else
{
- error0 = bfd_udp4_scan (vm, rt, b0, &bs);
+ error0 = bfd_udp4_scan (vm, b0, &bs);
}
b0->error = rt->errors[error0];
next0 = BFD_UDP_INPUT_NEXT_NORMAL;
if (BFD_UDP_ERROR_NONE == error0)
{
+ vlib_increment_combined_counter (
+ &bm->rx_counter, vm->thread_index, bs->bs_idx, 1,
+ vlib_buffer_length_in_chain (vm, b0));
/*
* if everything went fine, check for poll bit, if present, re-use
* the buffer and based on (now updated) session parameters, send
@@ -1310,17 +1300,16 @@ bfd_udp_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
{
b0->current_data = 0;
b0->current_length = 0;
- bfd_init_final_control_frame (vm, b0, bfd_udp_main.bfd_main, bs,
- 0);
+ bfd_init_final_control_frame (vm, b0, bs);
if (is_ipv6)
{
vlib_node_increment_counter (vm, bfd_udp6_input_node.index,
- b0->error, 1);
+ error0, 1);
}
else
{
vlib_node_increment_counter (vm, bfd_udp4_input_node.index,
- b0->error, 1);
+ error0, 1);
}
const bfd_udp_session_t *bus = &bs->udp;
ip_adjacency_t *adj = adj_get (bus->adj_index);
@@ -1360,7 +1349,6 @@ bfd_udp4_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
/*
* bfd input graph node declaration
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (bfd_udp4_input_node, static) = {
.function = bfd_udp4_input,
.name = "bfd-udp4-input",
@@ -1368,7 +1356,7 @@ VLIB_REGISTER_NODE (bfd_udp4_input_node, static) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = BFD_UDP_N_ERROR,
- .error_strings = bfd_udp_error_strings,
+ .error_counters = bfd_udp_error_counters,
.format_trace = bfd_input_format_trace,
@@ -1381,7 +1369,6 @@ VLIB_REGISTER_NODE (bfd_udp4_input_node, static) = {
[BFD_UDP_INPUT_NEXT_REPLY_MIDCHAIN] = "ip4-midchain",
},
};
-/* *INDENT-ON* */
static uword
bfd_udp6_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
@@ -1389,7 +1376,6 @@ bfd_udp6_input (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
return bfd_udp_input (vm, rt, f, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (bfd_udp6_input_node, static) = {
.function = bfd_udp6_input,
.name = "bfd-udp6-input",
@@ -1397,7 +1383,7 @@ VLIB_REGISTER_NODE (bfd_udp6_input_node, static) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = BFD_UDP_N_ERROR,
- .error_strings = bfd_udp_error_strings,
+ .error_counters = bfd_udp_error_counters,
.format_trace = bfd_input_format_trace,
@@ -1410,7 +1396,6 @@ VLIB_REGISTER_NODE (bfd_udp6_input_node, static) = {
[BFD_UDP_INPUT_NEXT_REPLY_MIDCHAIN] = "ip6-midchain",
},
};
-/* *INDENT-ON* */
/*
* Process a frame of bfd echo packets
@@ -1439,7 +1424,7 @@ bfd_udp_echo_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
/* If this pkt is traced, snapshot the data */
if (b0->flags & VLIB_BUFFER_IS_TRACED)
{
- int len;
+ u64 len;
t0 = vlib_add_trace (vm, rt, b0, sizeof (*t0));
len = (b0->current_length < sizeof (t0->data)) ? b0->current_length
: sizeof (t0->data);
@@ -1447,8 +1432,9 @@ bfd_udp_echo_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
clib_memcpy_fast (t0->data, vlib_buffer_get_current (b0), len);
}
+ bfd_session_t *bs = NULL;
bfd_lock (bm);
- if (bfd_consume_echo_pkt (vm, bfd_udp_main.bfd_main, b0))
+ if ((bs = bfd_consume_echo_pkt (vm, bfd_udp_main.bfd_main, b0)))
{
b0->error = rt->errors[BFD_UDP_ERROR_NONE];
next0 = BFD_UDP_ECHO_INPUT_NEXT_NORMAL;
@@ -1460,17 +1446,25 @@ bfd_udp_echo_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
if (is_ipv6)
{
vlib_node_increment_counter (vm, bfd_udp_echo6_input_node.index,
- b0->error, 1);
+ BFD_UDP_ERROR_NONE, 1);
}
else
{
vlib_node_increment_counter (vm, bfd_udp_echo4_input_node.index,
- b0->error, 1);
+ BFD_UDP_ERROR_NONE, 1);
}
next0 = BFD_UDP_ECHO_INPUT_NEXT_REPLY_REWRITE;
}
bfd_unlock (bm);
+
+ if (bs)
+ {
+ vlib_increment_combined_counter (
+ &bm->rx_echo_counter, vm->thread_index, bs->bs_idx, 1,
+ vlib_buffer_length_in_chain (vm, b0));
+ }
+
vlib_set_next_frame_buffer (vm, rt, next0, bi0);
from += 1;
@@ -1506,15 +1500,14 @@ bfd_echo_input_format_trace (u8 * s, va_list * args)
/*
* bfd input graph node declaration
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (bfd_udp_echo4_input_node, static) = {
.function = bfd_udp_echo4_input,
.name = "bfd-udp-echo4-input",
.vector_size = sizeof (u32),
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = BFD_UDP_ECHO_N_ERROR,
- .error_strings = bfd_udp_error_strings,
+ .n_errors = BFD_UDP_N_ERROR,
+ .error_counters = bfd_udp_error_counters,
.format_trace = bfd_echo_input_format_trace,
@@ -1526,7 +1519,6 @@ VLIB_REGISTER_NODE (bfd_udp_echo4_input_node, static) = {
[BFD_UDP_ECHO_INPUT_NEXT_REPLY_REWRITE] = "ip4-lookup",
},
};
-/* *INDENT-ON* */
static uword
bfd_udp_echo6_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
@@ -1535,15 +1527,14 @@ bfd_udp_echo6_input (vlib_main_t * vm, vlib_node_runtime_t * rt,
return bfd_udp_echo_input (vm, rt, f, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (bfd_udp_echo6_input_node, static) = {
.function = bfd_udp_echo6_input,
.name = "bfd-udp-echo6-input",
.vector_size = sizeof (u32),
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = BFD_UDP_ECHO_N_ERROR,
- .error_strings = bfd_udp_echo_error_strings,
+ .n_errors = BFD_UDP_N_ERROR,
+ .error_counters = bfd_udp_error_counters,
.format_trace = bfd_echo_input_format_trace,
@@ -1556,46 +1547,73 @@ VLIB_REGISTER_NODE (bfd_udp_echo6_input_node, static) = {
},
};
-/* *INDENT-ON* */
static clib_error_t *
-bfd_udp_sw_if_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_create)
+bfd_udp_sw_if_add_del (CLIB_UNUSED (vnet_main_t *vnm), u32 sw_if_index,
+ u32 is_create)
{
- bfd_session_t **to_be_freed = NULL;
+ u32 *to_be_freed = NULL;
bfd_udp_main_t *bum = &bfd_udp_main;
BFD_DBG ("sw_if_add_del called, sw_if_index=%u, is_create=%u", sw_if_index,
is_create);
if (!is_create)
{
bfd_session_t *bs;
- pool_foreach (bs, bfd_udp_main.bfd_main->sessions)
- {
- if (bs->transport != BFD_TRANSPORT_UDP4 &&
- bs->transport != BFD_TRANSPORT_UDP6)
- {
- continue;
- }
- if (bs->udp.key.sw_if_index != sw_if_index)
- {
- continue;
- }
- vec_add1 (to_be_freed, bs);
- }
- }
- bfd_session_t **bs;
- vec_foreach (bs, to_be_freed)
- {
- vlib_log_notice (bum->log_class,
- "removal of sw_if_index=%u forces removal of bfd session "
- "with bs_idx=%u", sw_if_index, (*bs)->bs_idx);
- bfd_session_set_flags (vlib_get_main (), *bs, 0);
- bfd_udp_del_session_internal (vlib_get_main (), *bs);
- }
+ pool_foreach (bs, bum->bfd_main->sessions)
+ {
+ if (bs->transport != BFD_TRANSPORT_UDP4 &&
+ bs->transport != BFD_TRANSPORT_UDP6)
+ {
+ continue;
+ }
+ if (bs->udp.key.sw_if_index != sw_if_index)
+ {
+ continue;
+ }
+ vec_add1 (to_be_freed, bs->bs_idx);
+ }
+ }
+ u32 *bs_idx;
+ vec_foreach (bs_idx, to_be_freed)
+ {
+ bfd_session_t *bs = pool_elt_at_index (bum->bfd_main->sessions, *bs_idx);
+ vlib_log_notice (bum->log_class,
+ "removal of sw_if_index=%u forces removal of bfd "
+ "session with bs_idx=%u",
+ sw_if_index, bs->bs_idx);
+ bfd_session_set_flags (vlib_get_main (), bs, 0);
+ bfd_udp_del_session_internal (vlib_get_main (), bs);
+ }
return 0;
}
VNET_SW_INTERFACE_ADD_DEL_FUNCTION (bfd_udp_sw_if_add_del);
+clib_error_t *
+bfd_udp_stats_init (bfd_udp_main_t *bum)
+{
+ const char *name4 = "/bfd/udp4/sessions";
+ bum->udp4_sessions_count_stat_seg_entry = vlib_stats_add_gauge ("%s", name4);
+
+ vlib_stats_set_gauge (bum->udp4_sessions_count_stat_seg_entry, 0);
+ if (~0 == bum->udp4_sessions_count_stat_seg_entry)
+ {
+ return clib_error_return (
+ 0, "Could not create stat segment entry for %s", name4);
+ }
+ const char *name6 = "/bfd/udp6/sessions";
+ bum->udp6_sessions_count_stat_seg_entry = vlib_stats_add_gauge ("%s", name6);
+
+ vlib_stats_set_gauge (bum->udp6_sessions_count_stat_seg_entry, 0);
+ if (~0 == bum->udp6_sessions_count_stat_seg_entry)
+ {
+ return clib_error_return (
+ 0, "Could not create stat segment entry for %s", name6);
+ }
+
+ return 0;
+}
+
/*
* setup function
*/
@@ -1608,24 +1626,7 @@ bfd_udp_init (vlib_main_t * vm)
sizeof (bfd_udp_key_t));
bfd_udp_main.bfd_main = &bfd_main;
bfd_udp_main.vnet_main = vnet_get_main ();
- vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "ip4-arp");
- ASSERT (node);
- bfd_udp_main.ip4_arp_idx = node->index;
- node = vlib_get_node_by_name (vm, (u8 *) "ip6-discover-neighbor");
- ASSERT (node);
- bfd_udp_main.ip6_ndp_idx = node->index;
- node = vlib_get_node_by_name (vm, (u8 *) "ip4-rewrite");
- ASSERT (node);
- bfd_udp_main.ip4_rewrite_idx = node->index;
- node = vlib_get_node_by_name (vm, (u8 *) "ip6-rewrite");
- ASSERT (node);
- bfd_udp_main.ip6_rewrite_idx = node->index;
- node = vlib_get_node_by_name (vm, (u8 *) "ip4-midchain");
- ASSERT (node);
- bfd_udp_main.ip4_midchain_idx = node->index;
- node = vlib_get_node_by_name (vm, (u8 *) "ip6-midchain");
- ASSERT (node);
- bfd_udp_main.ip6_midchain_idx = node->index;
+ bfd_udp_stats_init (&bfd_udp_main);
bfd_udp_main.log_class = vlib_log_register_class ("bfd", "udp");
vlib_log_debug (bfd_udp_main.log_class, "initialized");
diff --git a/src/vnet/bfd/bfd_udp.h b/src/vnet/bfd/bfd_udp.h
index 87868104f98..8f4bfee2bd7 100644
--- a/src/vnet/bfd/bfd_udp.h
+++ b/src/vnet/bfd/bfd_udp.h
@@ -24,7 +24,6 @@
#include <vnet/ip/ip6_packet.h>
#include <vnet/bfd/bfd_api.h>
-/* *INDENT-OFF* */
/** identifier of BFD session based on UDP transport only */
typedef CLIB_PACKED (struct {
union {
@@ -38,7 +37,6 @@ typedef CLIB_PACKED (struct {
/** peer address */
ip46_address_t peer_addr;
}) bfd_udp_key_t;
-/* *INDENT-ON* */
/** UDP transport specific data embedded in bfd_session's union */
typedef struct
@@ -82,22 +80,18 @@ int bfd_add_udp6_transport (vlib_main_t * vm, u32 bi,
/**
* @brief transport packet over udpv4
*
- * @param is_echo 1 if this is echo packet, 0 if control frame
- *
* @return 1 on success, 0 on failure
*/
-int bfd_transport_udp4 (vlib_main_t * vm, u32 bi,
- const struct bfd_session_s *bs);
+int bfd_transport_udp4 (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi,
+ const struct bfd_session_s *bs, int is_echo);
/**
* @brief transport packet over udpv6
*
- * @param is_echo 1 if this is echo packet, 0 if control frame
- *
* @return 1 on success, 0 on failure
*/
-int bfd_transport_udp6 (vlib_main_t * vm, u32 bi,
- const struct bfd_session_s *bs);
+int bfd_transport_udp6 (vlib_main_t *vm, vlib_node_runtime_t *rt, u32 bi,
+ const struct bfd_session_s *bs, int is_echo);
/**
* @brief check if the bfd udp layer is echo-capable at this time
diff --git a/src/vnet/bier/bier_update.c b/src/vnet/bier/bier_update.c
index 4108d09f51e..fdb7c5c0865 100644
--- a/src/vnet/bier/bier_update.c
+++ b/src/vnet/bier/bier_update.c
@@ -129,7 +129,14 @@ done:
VLIB_CLI_COMMAND (bier_route_command) = {
.path = "bier route",
- .short_help = "bier route [add|del] sd <sud-domain> set <set> bsl <bit-string-length> bp <bit-position> via [next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4 <interface>] [out-labels <value value value>]",
+ .short_help =
+ "bier route [add|del] sd <sud-domain> set <set> bsl <bit-string-length> "
+ "bp <bit-position> via [next-hop-address] [next-hop-interface] "
+ "[next-hop-table <value>] [weight <value>] [preference <value>] "
+ "[udp-encap-id <value>] [ip4-lookup-in-table <value>] "
+ "[ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] "
+ "[resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 <interface>] "
+ "[out-labels <value value value>]",
.function = vnet_bier_route_cmd,
};
diff --git a/src/vnet/bonding/bond_api.c b/src/vnet/bonding/bond_api.c
index 3fd73d7995f..d9287a8e23d 100644
--- a/src/vnet/bonding/bond_api.c
+++ b/src/vnet/bonding/bond_api.c
@@ -43,8 +43,11 @@ vl_api_bond_delete_t_handler (vl_api_bond_delete_t * mp)
vl_api_bond_delete_reply_t *rmp;
u32 sw_if_index = ntohl (mp->sw_if_index);
+ VALIDATE_SW_IF_INDEX (mp);
+
rv = bond_delete_if (vm, sw_if_index);
+ BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_BOND_DELETE_REPLY);
}
@@ -72,12 +75,10 @@ vl_api_bond_create_t_handler (vl_api_bond_create_t * mp)
int rv = ap->rv;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_BOND_CREATE_REPLY,
({
rmp->sw_if_index = ntohl (ap->sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -105,12 +106,10 @@ vl_api_bond_create2_t_handler (vl_api_bond_create2_t * mp)
int rv = ap->rv;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_BOND_CREATE2_REPLY,
({
rmp->sw_if_index = ntohl (ap->sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -168,6 +167,8 @@ static void
vl_api_sw_interface_set_bond_weight_reply_t *rmp;
int rv = 0;
+ VALIDATE_SW_IF_INDEX (mp);
+
clib_memset (ap, 0, sizeof (*ap));
ap->sw_if_index = ntohl (mp->sw_if_index);
@@ -176,6 +177,7 @@ static void
bond_set_intf_weight (vm, ap);
rv = ap->rv;
+ BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_SW_INTERFACE_SET_BOND_WEIGHT_REPLY);
}
@@ -187,12 +189,15 @@ vl_api_bond_detach_slave_t_handler (vl_api_bond_detach_slave_t * mp)
bond_detach_member_args_t _a, *ap = &_a;
int rv = 0;
+ VALIDATE_SW_IF_INDEX (mp);
+
clib_memset (ap, 0, sizeof (*ap));
ap->member = ntohl (mp->sw_if_index);
bond_detach_member (vm, ap);
rv = ap->rv;
+ BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_BOND_DETACH_SLAVE_REPLY);
}
@@ -204,12 +209,15 @@ vl_api_bond_detach_member_t_handler (vl_api_bond_detach_member_t * mp)
bond_detach_member_args_t _a, *ap = &_a;
int rv = 0;
+ VALIDATE_SW_IF_INDEX (mp);
+
clib_memset (ap, 0, sizeof (*ap));
ap->member = ntohl (mp->sw_if_index);
bond_detach_member (vm, ap);
rv = ap->rv;
+ BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_BOND_DETACH_MEMBER_REPLY);
}
diff --git a/src/vnet/bonding/cli.c b/src/vnet/bonding/cli.c
index a24d1104486..cdc935ff10f 100644
--- a/src/vnet/bonding/cli.c
+++ b/src/vnet/bonding/cli.c
@@ -20,7 +20,7 @@
#include <vlib/unix/unix.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/bonding/node.h>
-#include <vpp/stats/stat_segment.h>
+#include <vlib/stats/stats.h>
void
bond_disable_collecting_distributing (vlib_main_t * vm, member_if_t * mif)
@@ -183,7 +183,6 @@ bond_dump_ifs (bond_interface_details_t ** out_bondifs)
bond_interface_details_t *r_bondifs = NULL;
bond_interface_details_t *bondif = NULL;
- /* *INDENT-OFF* */
pool_foreach (bif, bm->interfaces) {
vec_add2(r_bondifs, bondif, 1);
clib_memset (bondif, 0, sizeof (*bondif));
@@ -201,7 +200,6 @@ bond_dump_ifs (bond_interface_details_t ** out_bondifs)
bondif->active_members = vec_len (bif->active_members);
bondif->members = vec_len (bif->members);
}
- /* *INDENT-ON* */
*out_bondifs = r_bondifs;
@@ -323,10 +321,10 @@ bond_delete_neighbor (vlib_main_t * vm, bond_if_t * bif, member_if_t * mif)
if (bif->mode == BOND_MODE_LACP)
{
- stat_segment_deregister_state_counter
- (bm->stats[bif->sw_if_index][mif->sw_if_index].actor_state);
- stat_segment_deregister_state_counter
- (bm->stats[bif->sw_if_index][mif->sw_if_index].partner_state);
+ vlib_stats_remove_entry (
+ bm->stats[bif->sw_if_index][mif->sw_if_index].actor_state);
+ vlib_stats_remove_entry (
+ bm->stats[bif->sw_if_index][mif->sw_if_index].partner_state);
}
pool_put (bm->neighbors, mif);
@@ -376,11 +374,11 @@ bond_delete_if (vlib_main_t * vm, u32 sw_if_index)
void
bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args)
{
+ vnet_eth_interface_registration_t eir = {};
bond_main_t *bm = &bond_main;
vnet_main_t *vnm = vnet_get_main ();
vnet_sw_interface_t *sw;
bond_if_t *bif;
- vnet_hw_interface_t *hw;
if ((args->mode == BOND_MODE_LACP) && bm->lacp_plugin_loaded == 0)
{
@@ -408,6 +406,16 @@ bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args)
bif->mode = args->mode;
bif->gso = args->gso;
+ if (bif->lb == BOND_LB_L2)
+ bif->hash_func =
+ vnet_hash_function_from_name ("hash-eth-l2", VNET_HASH_FN_TYPE_ETHERNET);
+ else if (bif->lb == BOND_LB_L34)
+ bif->hash_func = vnet_hash_function_from_name ("hash-eth-l34",
+ VNET_HASH_FN_TYPE_ETHERNET);
+ else if (bif->lb == BOND_LB_L23)
+ bif->hash_func = vnet_hash_function_from_name ("hash-eth-l23",
+ VNET_HASH_FN_TYPE_ETHERNET);
+
// Adjust requested interface id
if (bif->id == ~0)
bif->id = bif->dev_instance;
@@ -440,33 +448,26 @@ bond_create_if (vlib_main_t * vm, bond_create_if_args_t * args)
args->hw_addr[1] = 0xfe;
}
memcpy (bif->hw_address, args->hw_addr, 6);
- args->error = ethernet_register_interface
- (vnm, bond_dev_class.index, bif->dev_instance /* device instance */ ,
- bif->hw_address /* ethernet address */ ,
- &bif->hw_if_index, 0 /* flag change */ );
- if (args->error)
- {
- args->rv = VNET_API_ERROR_INVALID_REGISTRATION;
- hash_unset (bm->id_used, bif->id);
- pool_put (bm->interfaces, bif);
- return;
- }
+ eir.dev_class_index = bond_dev_class.index;
+ eir.dev_instance = bif->dev_instance;
+ eir.address = bif->hw_address;
+ bif->hw_if_index = vnet_eth_register_interface (vnm, &eir);
sw = vnet_get_hw_sw_interface (vnm, bif->hw_if_index);
bif->sw_if_index = sw->sw_if_index;
bif->group = bif->sw_if_index;
bif->numa_only = args->numa_only;
- hw = vnet_get_hw_interface (vnm, bif->hw_if_index);
/*
* Add GSO and Checksum offload flags if GSO is enabled on Bond
*/
if (args->gso)
{
- hw->caps |= (VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM);
+ vnet_hw_if_set_caps (vnm, bif->hw_if_index,
+ VNET_HW_IF_CAP_TCP_GSO |
+ VNET_HW_IF_CAP_TX_TCP_CKSUM |
+ VNET_HW_IF_CAP_TX_UDP_CKSUM);
}
if (vlib_get_thread_main ()->n_vlib_mains > 1)
clib_spinlock_init (&bif->lockp);
@@ -517,12 +518,18 @@ bond_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (args.mode == BOND_MODE_LACP)
args.numa_only = 1;
else
- return clib_error_return (0,
- "Only lacp mode supports numa-only so far!");
+ {
+ unformat_free (line_input);
+ return clib_error_return (
+ 0, "Only lacp mode supports numa-only so far!");
+ }
}
else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ {
+ unformat_free (line_input);
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
}
unformat_free (line_input);
@@ -538,7 +545,6 @@ bond_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bond_create_command, static) = {
.path = "create bond",
.short_help = "create bond mode {round-robin | active-backup | broadcast | "
@@ -546,7 +552,6 @@ VLIB_CLI_COMMAND (bond_create_command, static) = {
"[hw-addr <mac-address>] [id <if-id>] [gso]",
.function = bond_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
bond_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -587,14 +592,12 @@ bond_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bond_delete__command, static) =
{
.path = "delete bond",
.short_help = "delete bond {<interface> | sw_if_index <sw_idx>}",
.function = bond_delete_command_fn,
};
-/* *INDENT-ON* */
void
bond_add_member (vlib_main_t * vm, bond_add_member_args_t * args)
@@ -632,7 +635,7 @@ bond_add_member (vlib_main_t * vm, bond_add_member_args_t * args)
clib_error_return (0, "bond interface cannot be added as member");
return;
}
- if (bif->gso && !(mif_hw->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO))
+ if (bif->gso && !(mif_hw->caps & VNET_HW_IF_CAP_TCP_GSO))
{
args->rv = VNET_API_ERROR_INVALID_INTERFACE;
args->error =
@@ -641,32 +644,29 @@ bond_add_member (vlib_main_t * vm, bond_add_member_args_t * args)
}
if (bif->mode == BOND_MODE_LACP)
{
- u8 *name = format (0, "/if/lacp/%u/%u/state%c", bif->sw_if_index,
- args->member, 0);
+ u32 actor_idx, partner_idx;
- vec_validate (bm->stats, bif->sw_if_index);
- vec_validate (bm->stats[bif->sw_if_index], args->member);
-
- args->error = stat_segment_register_state_counter
- (name, &bm->stats[bif->sw_if_index][args->member].actor_state);
- if (args->error != 0)
+ actor_idx = vlib_stats_add_gauge ("/if/lacp/%u/%u/state",
+ bif->sw_if_index, args->member);
+ if (actor_idx == ~0)
{
args->rv = VNET_API_ERROR_INVALID_INTERFACE;
- vec_free (name);
return;
}
- vec_reset_length (name);
- name = format (0, "/if/lacp/%u/%u/partner-state%c", bif->sw_if_index,
- args->member, 0);
- args->error = stat_segment_register_state_counter
- (name, &bm->stats[bif->sw_if_index][args->member].partner_state);
- vec_free (name);
- if (args->error != 0)
+ partner_idx = vlib_stats_add_gauge ("/if/lacp/%u/%u/partner-state",
+ bif->sw_if_index, args->member);
+ if (partner_idx == ~0)
{
+ vlib_stats_remove_entry (actor_idx);
args->rv = VNET_API_ERROR_INVALID_INTERFACE;
return;
}
+
+ vec_validate (bm->stats, bif->sw_if_index);
+ vec_validate (bm->stats[bif->sw_if_index], args->member);
+ bm->stats[bif->sw_if_index][args->member].actor_state = actor_idx;
+ bm->stats[bif->sw_if_index][args->member].partner_state = partner_idx;
}
pool_get (bm->neighbors, mif);
@@ -817,14 +817,12 @@ add_member_interface_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (add_member_interface_command, static) = {
.path = "bond add",
.short_help = "bond add <BondEthernetx> <member-interface> "
"[passive] [long-timeout]",
.function = add_member_interface_command_fn,
};
-/* *INDENT-ON* */
void
bond_detach_member (vlib_main_t * vm, bond_detach_member_args_t * args)
@@ -881,13 +879,11 @@ detach_interface_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (detach_interface_command, static) = {
.path = "bond del",
.short_help = "bond del <member-interface>",
.function = detach_interface_command_fn,
};
-/* *INDENT-ON* */
static void
show_bond (vlib_main_t * vm)
@@ -899,7 +895,6 @@ show_bond (vlib_main_t * vm)
"interface name", "sw_if_index", "mode",
"load balance", "active members", "members");
- /* *INDENT-OFF* */
pool_foreach (bif, bm->interfaces)
{
vlib_cli_output (vm, "%-16U %-12d %-13U %-13U %-14u %u",
@@ -908,7 +903,6 @@ show_bond (vlib_main_t * vm)
format_bond_load_balance, bif->lb,
vec_len (bif->active_members), vec_len (bif->members));
}
- /* *INDENT-ON* */
}
static void
@@ -918,7 +912,6 @@ show_bond_details (vlib_main_t * vm)
bond_if_t *bif;
u32 *sw_if_index;
- /* *INDENT-OFF* */
pool_foreach (bif, bm->interfaces)
{
vlib_cli_output (vm, "%U", format_bond_interface_name, bif->dev_instance);
@@ -957,7 +950,6 @@ show_bond_details (vlib_main_t * vm)
vlib_cli_output (vm, " sw_if_index: %d", bif->sw_if_index);
vlib_cli_output (vm, " hw_if_index: %d", bif->hw_if_index);
}
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -985,13 +977,11 @@ show_bond_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_bond_command, static) = {
.path = "show bond",
.short_help = "show bond [details]",
.function = show_bond_fn,
};
-/* *INDENT-ON* */
void
bond_set_intf_weight (vlib_main_t * vm, bond_set_intf_weight_args_t * args)
@@ -1091,14 +1081,12 @@ bond_set_intf_cmd (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(set_interface_bond_cmd, static) = {
.path = "set interface bond",
.short_help = "set interface bond <interface> | sw_if_index <idx>"
" weight <value>",
.function = bond_set_intf_cmd,
};
-/* *INDENT-ON* */
clib_error_t *
bond_cli_init (vlib_main_t * vm)
diff --git a/src/vnet/bonding/device.c b/src/vnet/bonding/device.c
index 9e949b87214..a0b93fccde1 100644
--- a/src/vnet/bonding/device.c
+++ b/src/vnet/bonding/device.c
@@ -17,16 +17,9 @@
#define _GNU_SOURCE
#include <stdint.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/ip/ip4_packet.h>
-#include <vnet/ip/ip6_packet.h>
-#include <vnet/ip/ip6_hop_by_hop_packet.h>
-#include <vnet/bonding/node.h>
-#include <vppinfra/lb_hash_hash.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ip-neighbor/ip_neighbor.h>
#include <vnet/ip-neighbor/ip4_neighbor.h>
#include <vnet/ip-neighbor/ip6_neighbor.h>
+#include <vnet/bonding/node.h>
#define foreach_bond_tx_error \
_ (NONE, "no error") \
@@ -118,14 +111,6 @@ bond_set_l2_mode_function (vnet_main_t * vnm,
return 0;
}
-static __clib_unused clib_error_t *
-bond_subif_add_del_function (vnet_main_t * vnm, u32 hw_if_index,
- struct vnet_sw_interface_t *st, int is_add)
-{
- /* Nothing for now */
- return 0;
-}
-
static clib_error_t *
bond_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
{
@@ -194,8 +179,8 @@ bond_tx_add_to_queue (bond_per_thread_data_t * ptd, u32 port, u32 bi)
}
static_always_inline u32
-bond_lb_broadcast (vlib_main_t * vm,
- bond_if_t * bif, vlib_buffer_t * b0, uword n_members)
+bond_lb_broadcast (vlib_main_t *vm, bond_if_t *bif, vlib_buffer_t *b0,
+ uword n_members)
{
bond_main_t *bm = &bond_main;
vlib_buffer_t *c0;
@@ -220,230 +205,75 @@ bond_lb_broadcast (vlib_main_t * vm,
}
static_always_inline u32
-bond_lb_l2 (vlib_buffer_t * b0)
-{
- ethernet_header_t *eth = vlib_buffer_get_current (b0);
- u64 *dst = (u64 *) & eth->dst_address[0];
- u64 a = clib_mem_unaligned (dst, u64);
- u32 *src = (u32 *) & eth->src_address[2];
- u32 b = clib_mem_unaligned (src, u32);
-
- return lb_hash_hash_2_tuples (a, b);
-}
-
-static_always_inline u16 *
-bond_locate_ethertype (ethernet_header_t * eth)
+bond_lb_round_robin (bond_if_t *bif, vlib_buffer_t *b0, uword n_members)
{
- u16 *ethertype_p;
- ethernet_vlan_header_t *vlan;
+ bif->lb_rr_last_index++;
+ if (bif->lb_rr_last_index >= n_members)
+ bif->lb_rr_last_index = 0;
- if (!ethernet_frame_is_tagged (clib_net_to_host_u16 (eth->type)))
- {
- ethertype_p = &eth->type;
- }
- else
- {
- vlan = (void *) (eth + 1);
- ethertype_p = &vlan->type;
- if (*ethertype_p == ntohs (ETHERNET_TYPE_VLAN))
- {
- vlan++;
- ethertype_p = &vlan->type;
- }
- }
- return ethertype_p;
+ return bif->lb_rr_last_index;
}
-static_always_inline u32
-bond_lb_l23 (vlib_buffer_t * b0)
+static_always_inline void
+bond_tx_hash (vlib_main_t *vm, bond_per_thread_data_t *ptd, bond_if_t *bif,
+ vlib_buffer_t **b, u32 *h, u32 n_left)
{
- ethernet_header_t *eth = vlib_buffer_get_current (b0);
- u8 ip_version;
- ip4_header_t *ip4;
- u16 ethertype, *ethertype_p;
- u32 *mac1, *mac2, *mac3;
+ u32 n_left_from = n_left;
+ void **data;
- ethertype_p = bond_locate_ethertype (eth);
- ethertype = clib_mem_unaligned (ethertype_p, u16);
+ ASSERT (bif->hash_func != 0);
- if ((ethertype != htons (ETHERNET_TYPE_IP4)) &&
- (ethertype != htons (ETHERNET_TYPE_IP6)))
- return bond_lb_l2 (b0);
-
- ip4 = (ip4_header_t *) (ethertype_p + 1);
- ip_version = (ip4->ip_version_and_header_length >> 4);
-
- if (ip_version == 0x4)
- {
- u32 a, c;
-
- mac1 = (u32 *) & eth->dst_address[0];
- mac2 = (u32 *) & eth->dst_address[4];
- mac3 = (u32 *) & eth->src_address[2];
-
- a = clib_mem_unaligned (mac1, u32) ^ clib_mem_unaligned (mac2, u32) ^
- clib_mem_unaligned (mac3, u32);
- c =
- lb_hash_hash_2_tuples (clib_mem_unaligned (&ip4->address_pair, u64),
- a);
- return c;
- }
- else if (ip_version == 0x6)
+ vec_validate_aligned (ptd->data, n_left - 1, CLIB_CACHE_LINE_BYTES);
+ data = ptd->data;
+ while (n_left >= 8)
{
- u64 a;
- u32 c;
- ip6_header_t *ip6 = (ip6_header_t *) (eth + 1);
-
- mac1 = (u32 *) & eth->dst_address[0];
- mac2 = (u32 *) & eth->dst_address[4];
- mac3 = (u32 *) & eth->src_address[2];
-
- a = clib_mem_unaligned (mac1, u32) ^ clib_mem_unaligned (mac2, u32) ^
- clib_mem_unaligned (mac3, u32);
- c =
- lb_hash_hash (clib_mem_unaligned
- (&ip6->src_address.as_uword[0], uword),
- clib_mem_unaligned (&ip6->src_address.as_uword[1],
- uword),
- clib_mem_unaligned (&ip6->dst_address.as_uword[0],
- uword),
- clib_mem_unaligned (&ip6->dst_address.as_uword[1],
- uword), a);
- return c;
- }
- return bond_lb_l2 (b0);
-}
-
-static_always_inline u32
-bond_lb_l34 (vlib_buffer_t * b0)
-{
- ethernet_header_t *eth = vlib_buffer_get_current (b0);
- u8 ip_version;
- uword is_tcp_udp;
- ip4_header_t *ip4;
- u16 ethertype, *ethertype_p;
-
- ethertype_p = bond_locate_ethertype (eth);
- ethertype = clib_mem_unaligned (ethertype_p, u16);
-
- if ((ethertype != htons (ETHERNET_TYPE_IP4)) &&
- (ethertype != htons (ETHERNET_TYPE_IP6)))
- return (bond_lb_l2 (b0));
+ // Prefetch next iteration
+ vlib_prefetch_buffer_header (b[4], LOAD);
+ vlib_prefetch_buffer_header (b[5], LOAD);
+ vlib_prefetch_buffer_header (b[6], LOAD);
+ vlib_prefetch_buffer_header (b[7], LOAD);
- ip4 = (ip4_header_t *) (ethertype_p + 1);
- ip_version = (ip4->ip_version_and_header_length >> 4);
+ data[0] = vlib_buffer_get_current (b[0]);
+ data[1] = vlib_buffer_get_current (b[1]);
+ data[2] = vlib_buffer_get_current (b[2]);
+ data[3] = vlib_buffer_get_current (b[3]);
- if (ip_version == 0x4)
- {
- u32 a, t1, t2;
- tcp_header_t *tcp = (void *) (ip4 + 1);
-
- is_tcp_udp = (ip4->protocol == IP_PROTOCOL_TCP) ||
- (ip4->protocol == IP_PROTOCOL_UDP);
- t1 = is_tcp_udp ? clib_mem_unaligned (&tcp->src, u16) : 0;
- t2 = is_tcp_udp ? clib_mem_unaligned (&tcp->dst, u16) : 0;
- a = t1 ^ t2;
- return
- lb_hash_hash_2_tuples (clib_mem_unaligned (&ip4->address_pair, u64),
- a);
- }
- else if (ip_version == 0x6)
- {
- u64 a;
- u32 c, t1, t2;
- ip6_header_t *ip6 = (ip6_header_t *) (eth + 1);
- tcp_header_t *tcp = (void *) (ip6 + 1);
-
- is_tcp_udp = 0;
- if (PREDICT_TRUE ((ip6->protocol == IP_PROTOCOL_TCP) ||
- (ip6->protocol == IP_PROTOCOL_UDP)))
- {
- is_tcp_udp = 1;
- tcp = (void *) (ip6 + 1);
- }
- else if (ip6->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
- {
- ip6_hop_by_hop_header_t *hbh =
- (ip6_hop_by_hop_header_t *) (ip6 + 1);
- if ((hbh->protocol == IP_PROTOCOL_TCP)
- || (hbh->protocol == IP_PROTOCOL_UDP))
- {
- is_tcp_udp = 1;
- tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3));
- }
- }
- t1 = is_tcp_udp ? clib_mem_unaligned (&tcp->src, u16) : 0;
- t2 = is_tcp_udp ? clib_mem_unaligned (&tcp->dst, u16) : 0;
- a = t1 ^ t2;
- c =
- lb_hash_hash (clib_mem_unaligned
- (&ip6->src_address.as_uword[0], uword),
- clib_mem_unaligned (&ip6->src_address.as_uword[1],
- uword),
- clib_mem_unaligned (&ip6->dst_address.as_uword[0],
- uword),
- clib_mem_unaligned (&ip6->dst_address.as_uword[1],
- uword), a);
- return c;
+ n_left -= 4;
+ b += 4;
+ data += 4;
}
- return bond_lb_l2 (b0);
-}
+ while (n_left > 0)
+ {
+ data[0] = vlib_buffer_get_current (b[0]);
-static_always_inline u32
-bond_lb_round_robin (bond_if_t * bif, vlib_buffer_t * b0, uword n_members)
-{
- bif->lb_rr_last_index++;
- if (bif->lb_rr_last_index >= n_members)
- bif->lb_rr_last_index = 0;
+ n_left -= 1;
+ b += 1;
+ data += 1;
+ }
- return bif->lb_rr_last_index;
+ bif->hash_func (ptd->data, h, n_left_from);
+ vec_reset_length (ptd->data);
}
static_always_inline void
-bond_tx_inline (vlib_main_t * vm, bond_if_t * bif, vlib_buffer_t ** b,
- u32 * h, u32 n_left, uword n_members, u32 lb_alg)
+bond_tx_no_hash (vlib_main_t *vm, bond_if_t *bif, vlib_buffer_t **b, u32 *h,
+ u32 n_left, uword n_members, u32 lb_alg)
{
- while (n_left >= 4)
+ while (n_left >= 8)
{
// Prefetch next iteration
- if (n_left >= 8)
- {
- vlib_buffer_t **pb = b + 4;
-
- vlib_prefetch_buffer_header (pb[0], LOAD);
- vlib_prefetch_buffer_header (pb[1], LOAD);
- vlib_prefetch_buffer_header (pb[2], LOAD);
- vlib_prefetch_buffer_header (pb[3], LOAD);
+ vlib_prefetch_buffer_header (b[4], LOAD);
+ vlib_prefetch_buffer_header (b[5], LOAD);
+ vlib_prefetch_buffer_header (b[6], LOAD);
+ vlib_prefetch_buffer_header (b[7], LOAD);
- clib_prefetch_load (pb[0]->data);
- clib_prefetch_load (pb[1]->data);
- clib_prefetch_load (pb[2]->data);
- clib_prefetch_load (pb[3]->data);
- }
+ clib_prefetch_load (b[4]->data);
+ clib_prefetch_load (b[5]->data);
+ clib_prefetch_load (b[6]->data);
+ clib_prefetch_load (b[7]->data);
- if (lb_alg == BOND_LB_L2)
- {
- h[0] = bond_lb_l2 (b[0]);
- h[1] = bond_lb_l2 (b[1]);
- h[2] = bond_lb_l2 (b[2]);
- h[3] = bond_lb_l2 (b[3]);
- }
- else if (lb_alg == BOND_LB_L34)
- {
- h[0] = bond_lb_l34 (b[0]);
- h[1] = bond_lb_l34 (b[1]);
- h[2] = bond_lb_l34 (b[2]);
- h[3] = bond_lb_l34 (b[3]);
- }
- else if (lb_alg == BOND_LB_L23)
- {
- h[0] = bond_lb_l23 (b[0]);
- h[1] = bond_lb_l23 (b[1]);
- h[2] = bond_lb_l23 (b[2]);
- h[3] = bond_lb_l23 (b[3]);
- }
- else if (lb_alg == BOND_LB_RR)
+ if (lb_alg == BOND_LB_RR)
{
h[0] = bond_lb_round_robin (bif, b[0], n_members);
h[1] = bond_lb_round_robin (bif, b[1], n_members);
@@ -469,13 +299,7 @@ bond_tx_inline (vlib_main_t * vm, bond_if_t * bif, vlib_buffer_t ** b,
while (n_left > 0)
{
- if (bif->lb == BOND_LB_L2)
- h[0] = bond_lb_l2 (b[0]);
- else if (bif->lb == BOND_LB_L34)
- h[0] = bond_lb_l34 (b[0]);
- else if (bif->lb == BOND_LB_L23)
- h[0] = bond_lb_l23 (b[0]);
- else if (bif->lb == BOND_LB_RR)
+ if (bif->lb == BOND_LB_RR)
h[0] = bond_lb_round_robin (bif, b[0], n_members);
else if (bif->lb == BOND_LB_BC)
h[0] = bond_lb_broadcast (vm, bif, b[0], n_members);
@@ -496,40 +320,6 @@ bond_hash_to_port (u32 * h, u32 n_left, u32 n_members,
{
u32 mask = n_members - 1;
-#ifdef CLIB_HAVE_VEC256
- /* only lower 16 bits of hash due to single precision fp arithmetic */
- u32x8 mask8, sc8u, h8a, h8b;
- f32x8 sc8f;
-
- if (use_modulo_shortcut)
- {
- mask8 = u32x8_splat (mask);
- }
- else
- {
- mask8 = u32x8_splat (0xffff);
- sc8u = u32x8_splat (n_members);
- sc8f = f32x8_from_u32x8 (sc8u);
- }
-
- while (n_left > 16)
- {
- h8a = u32x8_load_unaligned (h) & mask8;
- h8b = u32x8_load_unaligned (h + 8) & mask8;
-
- if (use_modulo_shortcut == 0)
- {
- h8a -= sc8u * u32x8_from_f32x8 (f32x8_from_u32x8 (h8a) / sc8f);
- h8b -= sc8u * u32x8_from_f32x8 (f32x8_from_u32x8 (h8b) / sc8f);
- }
-
- u32x8_store_unaligned (h8a, h);
- u32x8_store_unaligned (h8b, h + 8);
- n_left -= 16;
- h += 16;
- }
-#endif
-
while (n_left > 4)
{
if (use_modulo_shortcut)
@@ -568,17 +358,13 @@ bond_update_sw_if_index (bond_per_thread_data_t * ptd, bond_if_t * bif,
u32 sw_if_index = data[0];
u32 *h = data;
- while (n_left >= 4)
+ while (n_left >= 8)
{
// Prefetch next iteration
- if (n_left >= 8)
- {
- vlib_buffer_t **pb = b + 4;
- vlib_prefetch_buffer_header (pb[0], LOAD);
- vlib_prefetch_buffer_header (pb[1], LOAD);
- vlib_prefetch_buffer_header (pb[2], LOAD);
- vlib_prefetch_buffer_header (pb[3], LOAD);
- }
+ vlib_prefetch_buffer_header (b[4], LOAD);
+ vlib_prefetch_buffer_header (b[5], LOAD);
+ vlib_prefetch_buffer_header (b[6], LOAD);
+ vlib_prefetch_buffer_header (b[7], LOAD);
if (PREDICT_FALSE (single_sw_if_index))
{
@@ -594,17 +380,14 @@ bond_update_sw_if_index (bond_per_thread_data_t * ptd, bond_if_t * bif,
}
else
{
- u32 sw_if_index[4];
-
- sw_if_index[0] = *vec_elt_at_index (bif->active_members, h[0]);
- sw_if_index[1] = *vec_elt_at_index (bif->active_members, h[1]);
- sw_if_index[2] = *vec_elt_at_index (bif->active_members, h[2]);
- sw_if_index[3] = *vec_elt_at_index (bif->active_members, h[3]);
-
- vnet_buffer (b[0])->sw_if_index[VLIB_TX] = sw_if_index[0];
- vnet_buffer (b[1])->sw_if_index[VLIB_TX] = sw_if_index[1];
- vnet_buffer (b[2])->sw_if_index[VLIB_TX] = sw_if_index[2];
- vnet_buffer (b[3])->sw_if_index[VLIB_TX] = sw_if_index[3];
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
+ *vec_elt_at_index (bif->active_members, h[0]);
+ vnet_buffer (b[1])->sw_if_index[VLIB_TX] =
+ *vec_elt_at_index (bif->active_members, h[1]);
+ vnet_buffer (b[2])->sw_if_index[VLIB_TX] =
+ *vec_elt_at_index (bif->active_members, h[2]);
+ vnet_buffer (b[3])->sw_if_index[VLIB_TX] =
+ *vec_elt_at_index (bif->active_members, h[3]);
bond_tx_add_to_queue (ptd, h[0], bi[0]);
bond_tx_add_to_queue (ptd, h[1], bi[1]);
@@ -626,9 +409,8 @@ bond_update_sw_if_index (bond_per_thread_data_t * ptd, bond_if_t * bif,
}
else
{
- u32 sw_if_index0 = *vec_elt_at_index (bif->active_members, h[0]);
-
- vnet_buffer (b[0])->sw_if_index[VLIB_TX] = sw_if_index0;
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
+ *vec_elt_at_index (bif->active_members, h[0]);
bond_tx_add_to_queue (ptd, h[0], bi[0]);
}
@@ -735,7 +517,7 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
{
sw_if_index = *vec_elt_at_index (bif->active_members, 0);
- bond_tx_inline (vm, bif, bufs, hashes, n_left, n_members, BOND_LB_BC);
+ bond_tx_no_hash (vm, bif, bufs, hashes, n_left, n_members, BOND_LB_BC);
bond_tx_trace (vm, node, bif, bufs, frame->n_vectors, 0);
bond_update_sw_if_index (ptd, bif, from, bufs, &sw_if_index, n_left,
/* single_sw_if_index */ 1);
@@ -747,24 +529,10 @@ VNET_DEVICE_CLASS_TX_FN (bond_dev_class) (vlib_main_t * vm,
if (bif->n_numa_members >= 1)
n_members = bif->n_numa_members;
- if (bif->lb == BOND_LB_L2)
- bond_tx_inline (vm, bif, bufs, hashes, n_left, n_members, BOND_LB_L2);
- else if (bif->lb == BOND_LB_L34)
- bond_tx_inline (vm, bif, bufs, hashes, n_left, n_members, BOND_LB_L34);
- else if (bif->lb == BOND_LB_L23)
- bond_tx_inline (vm, bif, bufs, hashes, n_left, n_members, BOND_LB_L23);
- else if (bif->lb == BOND_LB_RR)
- bond_tx_inline (vm, bif, bufs, hashes, n_left, n_members, BOND_LB_RR);
+ if (bif->lb == BOND_LB_RR)
+ bond_tx_no_hash (vm, bif, bufs, hashes, n_left, n_members, BOND_LB_RR);
else
- {
- vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
- vlib_increment_simple_counter (
- vnet_main.interface_main.sw_if_counters + VNET_INTERFACE_COUNTER_DROP,
- thread_index, bif->sw_if_index, frame->n_vectors);
- vlib_error_count (vm, node->node_index, BOND_TX_ERROR_BAD_LB_MODE,
- frame->n_vectors);
- return frame->n_vectors;
- }
+ bond_tx_hash (vm, ptd, bif, bufs, hashes, n_left);
/* calculate port out of hash */
h = hashes;
@@ -805,8 +573,10 @@ bond_active_interface_switch_cb (vnet_main_t * vnm, u32 sw_if_index,
{
bond_main_t *bm = &bond_main;
- ip4_neighbor_advertise (bm->vlib_main, bm->vnet_main, sw_if_index, NULL);
- ip6_neighbor_advertise (bm->vlib_main, bm->vnet_main, sw_if_index, NULL);
+ ip4_neighbor_advertise (bm->vlib_main, bm->vnet_main, sw_if_index,
+ vlib_get_thread_index (), NULL);
+ ip6_neighbor_advertise (bm->vlib_main, bm->vnet_main, sw_if_index,
+ vlib_get_thread_index (), NULL);
return (WALK_CONTINUE);
}
@@ -838,16 +608,13 @@ bond_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (bond_process_node) = {
.function = bond_process,
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "bond-process",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (bond_dev_class) = {
.name = "bond",
.tx_function_n_errors = BOND_TX_N_ERROR,
@@ -855,12 +622,10 @@ VNET_DEVICE_CLASS (bond_dev_class) = {
.format_device_name = format_bond_interface_name,
.set_l2_mode_function = bond_set_l2_mode_function,
.admin_up_down_function = bond_interface_admin_up_down,
- .subif_add_del_function = bond_subif_add_del_function,
.format_tx_trace = format_bond_tx_trace,
.mac_addr_add_del_function = bond_add_del_mac_address,
};
-/* *INDENT-ON* */
static clib_error_t *
bond_member_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
diff --git a/src/vnet/bonding/node.c b/src/vnet/bonding/node.c
index 21a968177fe..66de1e4dd80 100644
--- a/src/vnet/bonding/node.c
+++ b/src/vnet/bonding/node.c
@@ -397,7 +397,6 @@ bond_input_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (bond_input_node) = {
.name = "bond-input",
.vector_size = sizeof (u32),
@@ -421,7 +420,6 @@ VNET_FEATURE_INIT (bond_input, static) =
.node_name = "bond-input",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
-/* *INDENT-ON* */
static clib_error_t *
bond_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
diff --git a/src/vnet/bonding/node.h b/src/vnet/bonding/node.h
index 843c236f123..c6602ef01b9 100644
--- a/src/vnet/bonding/node.h
+++ b/src/vnet/bonding/node.h
@@ -21,6 +21,7 @@
#include <vppinfra/hash.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/interface.h>
+#include <vnet/hash/hash.h>
#define LACP_FAST_PERIODIC_TIMER 1.0
#define LACP_SHORT_TIMOUT_TIME (LACP_FAST_PERIODIC_TIMER * 3)
@@ -163,6 +164,7 @@ typedef struct
typedef struct
{
bond_per_port_queue_t *per_port_queue;
+ void **data;
} bond_per_thread_data_t;
typedef struct
@@ -208,6 +210,7 @@ typedef struct
u8 hw_address[6];
clib_spinlock_t lockp;
+ vnet_hash_fn_t hash_func;
} bond_if_t;
typedef struct
diff --git a/src/vnet/buffer.c b/src/vnet/buffer.c
index ef93185b2c1..721f856ddda 100644
--- a/src/vnet/buffer.c
+++ b/src/vnet/buffer.c
@@ -37,10 +37,9 @@ format_vnet_buffer_offload (u8 *s, va_list *args)
return s;
}
-u8 *
-format_vnet_buffer (u8 * s, va_list * args)
+static u8 *
+format_vnet_buffer_internal (u8 *s, vlib_buffer_t *b, int no_chain)
{
- vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *);
u32 indent = format_get_indent (s);
u8 *a = 0;
@@ -72,7 +71,8 @@ format_vnet_buffer (u8 * s, va_list * args)
if (b->flags & VNET_BUFFER_F_LOOP_COUNTER_VALID)
a = format (a, "loop-counter %d ", vnet_buffer2 (b)->loop_counter);
- s = format (s, "%U", format_vlib_buffer_no_chain, b);
+ s = format (s, "%U",
+ no_chain ? format_vlib_buffer_no_chain : format_vlib_buffer, b);
if (a)
s = format (s, "\n%U%v", format_white_space, indent, a);
vec_free (a);
@@ -80,6 +80,19 @@ format_vnet_buffer (u8 * s, va_list * args)
return s;
}
+u8 *
+format_vnet_buffer_no_chain (u8 *s, va_list *args)
+{
+ vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *);
+ return format_vnet_buffer_internal (s, b, 1 /* no_chain */);
+}
+
+u8 *
+format_vnet_buffer (u8 *s, va_list *args)
+{
+ vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *);
+ return format_vnet_buffer_internal (s, b, 0 /* no_chain */);
+}
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h
index 600782c3864..2f34aa4b5fc 100644
--- a/src/vnet/buffer.h
+++ b/src/vnet/buffer.h
@@ -190,8 +190,17 @@ typedef struct
/* Rewrite length */
u8 save_rewrite_length;
- /* MFIB RPF ID */
- u32 rpf_id;
+ union
+ {
+ /* sw_if_index of the local interface the buffer was received on
+ * - if hitting a DPO_RECEIVE - it is set in ip[46]-receive.
+ * This is ~0 if the dpo is not a receive dpo, or if the
+ * interface is not specified (e.g. route add via local) */
+ u32 rx_sw_if_index;
+
+ /* MFIB RPF ID */
+ u32 rpf_id;
+ };
};
/* ICMP */
@@ -235,7 +244,8 @@ typedef struct
u8 save_rewrite_length;
u8 ip_proto; /* protocol in ip header */
u8 icmp_type_or_tcp_flags;
- u8 is_non_first_fragment;
+ u8 is_non_first_fragment : 1;
+ u8 l4_layer_truncated : 7;
u32 tcp_seq_number;
};
/* full reassembly output variables */
@@ -310,13 +320,13 @@ typedef struct
/* L2 classify */
struct
{
- struct opaque_l2 pad;
+ u32 pad[4]; /* do not overlay w/ ip.fib_index nor l2 */
union
{
u32 table_index;
u32 opaque_index;
};
- u64 hash;
+ u32 hash;
} l2_classify;
/* vnet policer */
@@ -408,7 +418,9 @@ typedef struct
};
} vnet_buffer_opaque_t;
-#define VNET_REWRITE_TOTAL_BYTES (VLIB_BUFFER_PRE_DATA_SIZE)
+#define VNET_REWRITE_TOTAL_BYTES 128
+STATIC_ASSERT (VNET_REWRITE_TOTAL_BYTES <= VLIB_BUFFER_PRE_DATA_SIZE,
+ "VNET_REWRITE_TOTAL_BYTES too big");
STATIC_ASSERT (STRUCT_SIZE_OF (vnet_buffer_opaque_t, ip.save_rewrite_length)
== STRUCT_SIZE_OF (vnet_buffer_opaque_t,
@@ -455,15 +467,7 @@ typedef struct
} qos;
u8 loop_counter;
- u8 __unused[1];
-
- /* Group Based Policy */
- struct
- {
- u8 __unused;
- u8 flags;
- u16 sclass;
- } gbp;
+ u8 pad[5]; /* unused */
/**
* The L4 payload size set on input on GSO enabled interfaces
@@ -491,15 +495,7 @@ typedef struct
};
} nat;
- union
- {
- struct
- {
- u64 pad[1];
- u64 pg_replay_timestamp;
- };
- u32 unused[8];
- };
+ u32 unused[8];
} vnet_buffer_opaque2_t;
#define vnet_buffer2(b) ((vnet_buffer_opaque2_t *) (b)->opaque2)
@@ -508,8 +504,8 @@ typedef struct
* The opaque2 field of the vlib_buffer_t is interpreted as a
* vnet_buffer_opaque2_t. Hence it should be big enough to accommodate one.
*/
-STATIC_ASSERT (sizeof (vnet_buffer_opaque2_t) <=
- STRUCT_SIZE_OF (vlib_buffer_t, opaque2),
+STATIC_ASSERT (sizeof (vnet_buffer_opaque2_t) ==
+ STRUCT_SIZE_OF (vlib_buffer_t, opaque2),
"VNET buffer opaque2 meta-data too large for vlib_buffer");
#define gso_mtu_sz(b) (vnet_buffer2(b)->gso_size + \
@@ -517,7 +513,7 @@ STATIC_ASSERT (sizeof (vnet_buffer_opaque2_t) <=
vnet_buffer(b)->l4_hdr_offset - \
vnet_buffer (b)->l3_hdr_offset)
-
+format_function_t format_vnet_buffer_no_chain;
format_function_t format_vnet_buffer;
format_function_t format_vnet_buffer_offload;
format_function_t format_vnet_buffer_flags;
diff --git a/src/vnet/classify/classify.api b/src/vnet/classify/classify.api
index c569fe6a599..00963f6fb6a 100644
--- a/src/vnet/classify/classify.api
+++ b/src/vnet/classify/classify.api
@@ -420,6 +420,46 @@ autoreply define input_acl_set_interface
bool is_add;
};
+/** \brief Add/del punt ACL
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param ip4_table_index - ip4 punt classify table index (~0 for skip)
+ @param ip6_table_index - ip6 punt classify table index (~0 for skip)
+ @param is_add - add punt ACL if non-zero, else delete
+*/
+autoreply define punt_acl_add_del
+{
+ u32 client_index;
+ u32 context;
+ u32 ip4_table_index [default=0xffffffff];
+ u32 ip6_table_index [default=0xffffffff];
+ bool is_add [default=true];
+};
+
+/** \brief Get classify table ids configured for punt ACL
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define punt_acl_get
+{
+ u32 client_index;
+ u32 context;
+};
+
+/** \brief Reply for punt_acl_get
+ @param context - sender context which was passed in the request
+ @param retval - return value (0 for success)
+ @param ip4_table_index - ip4 punt classify table index (~0 for none)
+ @param ip6_table_index - ip6 punt classify table index (~0 for none)
+*/
+define punt_acl_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 ip4_table_index;
+ u32 ip6_table_index;
+};
+
/** \brief Set/unset output ACL interface
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/vnet/classify/classify_api.c b/src/vnet/classify/classify_api.c
index 39f7b98007d..fc57b006d37 100644
--- a/src/vnet/classify/classify_api.c
+++ b/src/vnet/classify/classify_api.c
@@ -91,7 +91,8 @@ static void vl_api_classify_pcap_lookup_table_t_handler
out:
rmp = vl_msg_api_alloc (sizeof (*rmp));
- rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_PCAP_LOOKUP_TABLE_REPLY);
+ rmp->_vl_msg_id =
+ ntohs (REPLY_MSG_ID_BASE + VL_API_CLASSIFY_PCAP_LOOKUP_TABLE_REPLY);
rmp->context = mp->context;
rmp->retval = ntohl (rv);
rmp->table_index = htonl (table_index);
@@ -114,9 +115,8 @@ static void vl_api_classify_pcap_set_table_t_handler
u32 table_index = ntohl (mp->table_index);
u32 sw_if_index = ntohl (mp->sw_if_index);
- if (sw_if_index == ~0
- || sw_if_index >= vec_len (cm->classify_table_index_by_sw_if_index)
- || (table_index != ~0 && pool_is_free_index (cm->tables, table_index)))
+ if (sw_if_index == ~0 ||
+ (table_index != ~0 && pool_is_free_index (cm->tables, table_index)))
{
rv = VNET_API_ERROR_INVALID_VALUE;
goto out;
@@ -132,7 +132,8 @@ static void vl_api_classify_pcap_set_table_t_handler
out:
rmp = vl_msg_api_alloc (sizeof (*rmp));
- rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_PCAP_SET_TABLE_REPLY);
+ rmp->_vl_msg_id =
+ ntohs (REPLY_MSG_ID_BASE + VL_API_CLASSIFY_PCAP_SET_TABLE_REPLY);
rmp->context = mp->context;
rmp->retval = ntohl (rv);
rmp->table_index = htonl (table_index);
@@ -181,7 +182,8 @@ static void vl_api_classify_pcap_get_tables_t_handler
out:
count = vec_len (tables);
rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp) + count * sizeof (u32));
- rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_PCAP_GET_TABLES_REPLY);
+ rmp->_vl_msg_id =
+ ntohs (REPLY_MSG_ID_BASE + VL_API_CLASSIFY_PCAP_GET_TABLES_REPLY);
rmp->context = mp->context;
rmp->retval = ntohl (rv);
rmp->count = htonl (count);
@@ -233,7 +235,8 @@ static void vl_api_classify_trace_lookup_table_t_handler
out:
rmp = vl_msg_api_alloc (sizeof (*rmp));
- rmp->_vl_msg_id = ntohs ((VL_API_CLASSIFY_TRACE_LOOKUP_TABLE_REPLY));
+ rmp->_vl_msg_id =
+ ntohs ((REPLY_MSG_ID_BASE + VL_API_CLASSIFY_TRACE_LOOKUP_TABLE_REPLY));
rmp->context = mp->context;
rmp->retval = ntohl (rv);
rmp->table_index = htonl (table_index);
@@ -270,7 +273,8 @@ static void vl_api_classify_trace_set_table_t_handler
out:
rmp = vl_msg_api_alloc (sizeof (*rmp));
- rmp->_vl_msg_id = ntohs ((VL_API_CLASSIFY_TRACE_SET_TABLE_REPLY));
+ rmp->_vl_msg_id =
+ ntohs ((REPLY_MSG_ID_BASE + VL_API_CLASSIFY_TRACE_SET_TABLE_REPLY));
rmp->context = mp->context;
rmp->retval = ntohl (rv);
rmp->table_index = htonl (table_index);
@@ -311,7 +315,8 @@ static void vl_api_classify_trace_get_tables_t_handler
out:
count = vec_len (tables);
rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp) + count * sizeof (u32));
- rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_TRACE_GET_TABLES_REPLY);
+ rmp->_vl_msg_id =
+ ntohs (REPLY_MSG_ID_BASE + VL_API_CLASSIFY_TRACE_GET_TABLES_REPLY);
rmp->context = mp->context;
rmp->retval = ntohl (rv);
rmp->count = htonl (count);
@@ -374,7 +379,6 @@ static void vl_api_classify_add_del_table_t_handler
current_data_flag, current_data_offset, mp->is_add, mp->del_chain);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_CLASSIFY_ADD_DEL_TABLE_REPLY,
({
if (rv == 0 && mp->is_add)
@@ -391,7 +395,6 @@ out:
rmp->new_table_index = ~0;
}
}));
- /* *INDENT-ON* */
}
static void vl_api_classify_add_del_session_t_handler
@@ -469,7 +472,7 @@ send_policer_classify_details (u32 sw_if_index,
mp = vl_msg_api_alloc (sizeof (*mp));
clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_POLICER_CLASSIFY_DETAILS);
+ mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_POLICER_CLASSIFY_DETAILS);
mp->context = context;
mp->sw_if_index = htonl (sw_if_index);
mp->table_index = htonl (table_index);
@@ -528,17 +531,16 @@ vl_api_classify_table_ids_t_handler (vl_api_classify_table_ids_t * mp)
u32 *table_ids = 0;
u32 count;
- /* *INDENT-OFF* */
pool_foreach (t, cm->tables)
{
vec_add1 (table_ids, ntohl(t - cm->tables));
}
- /* *INDENT-ON* */
count = vec_len (table_ids);
vl_api_classify_table_ids_reply_t *rmp;
rmp = vl_msg_api_alloc_as_if_client (sizeof (*rmp) + count * sizeof (u32));
- rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_TABLE_IDS_REPLY);
+ rmp->_vl_msg_id =
+ ntohs (REPLY_MSG_ID_BASE + VL_API_CLASSIFY_TABLE_IDS_REPLY);
rmp->context = mp->context;
rmp->count = ntohl (count);
clib_memcpy (rmp->ids, table_ids, count * sizeof (u32));
@@ -589,7 +591,6 @@ static void
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_CLASSIFY_TABLE_BY_INTERFACE_REPLY,
({
rmp->sw_if_index = ntohl(sw_if_index);
@@ -597,7 +598,6 @@ static void
rmp->ip4_table_id = ntohl(acl[IN_OUT_ACL_TABLE_IP4]);
rmp->ip6_table_id = ntohl(acl[IN_OUT_ACL_TABLE_IP6]);
}));
- /* *INDENT-ON* */
vec_free (acl);
}
@@ -616,34 +616,35 @@ vl_api_classify_table_info_t_handler (vl_api_classify_table_info_t * mp)
u32 table_id = ntohl (mp->table_id);
vnet_classify_table_t *t;
- /* *INDENT-OFF* */
- pool_foreach (t, cm->tables)
+ pool_foreach (t, cm->tables)
{
- if (table_id == t - cm->tables)
- {
- rmp = vl_msg_api_alloc_as_if_client
- (sizeof (*rmp) + t->match_n_vectors * sizeof (u32x4));
- rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_TABLE_INFO_REPLY);
- rmp->context = mp->context;
- rmp->table_id = ntohl(table_id);
- rmp->nbuckets = ntohl(t->nbuckets);
- rmp->match_n_vectors = ntohl(t->match_n_vectors);
- rmp->skip_n_vectors = ntohl(t->skip_n_vectors);
- rmp->active_sessions = ntohl(t->active_elements);
- rmp->next_table_index = ntohl(t->next_table_index);
- rmp->miss_next_index = ntohl(t->miss_next_index);
- rmp->mask_length = ntohl(t->match_n_vectors * sizeof (u32x4));
- clib_memcpy(rmp->mask, t->mask, t->match_n_vectors * sizeof(u32x4));
- rmp->retval = 0;
- break;
- }
- }
- /* *INDENT-ON* */
+ if (table_id == t - cm->tables)
+ {
+ rmp = vl_msg_api_alloc_as_if_client (
+ sizeof (*rmp) + t->match_n_vectors * sizeof (u32x4));
+ rmp->_vl_msg_id =
+ ntohs (REPLY_MSG_ID_BASE + VL_API_CLASSIFY_TABLE_INFO_REPLY);
+ rmp->context = mp->context;
+ rmp->table_id = ntohl (table_id);
+ rmp->nbuckets = ntohl (t->nbuckets);
+ rmp->match_n_vectors = ntohl (t->match_n_vectors);
+ rmp->skip_n_vectors = ntohl (t->skip_n_vectors);
+ rmp->active_sessions = ntohl (t->active_elements);
+ rmp->next_table_index = ntohl (t->next_table_index);
+ rmp->miss_next_index = ntohl (t->miss_next_index);
+ rmp->mask_length = ntohl (t->match_n_vectors * sizeof (u32x4));
+ clib_memcpy (rmp->mask, t->mask,
+ t->match_n_vectors * sizeof (u32x4));
+ rmp->retval = 0;
+ break;
+ }
+ }
if (rmp == 0)
{
rmp = vl_msg_api_alloc (sizeof (*rmp));
- rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_TABLE_INFO_REPLY);
+ rmp->_vl_msg_id =
+ ntohs (REPLY_MSG_ID_BASE + VL_API_CLASSIFY_TABLE_INFO_REPLY);
rmp->context = mp->context;
rmp->retval = ntohl (VNET_API_ERROR_CLASSIFY_TABLE_NOT_FOUND);
}
@@ -659,9 +660,10 @@ send_classify_session_details (vl_api_registration_t * reg,
{
vl_api_classify_session_details_t *rmp;
- rmp = vl_msg_api_alloc (sizeof (*rmp));
+ rmp = vl_msg_api_alloc (sizeof (*rmp) + match_length);
clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id = ntohs (VL_API_CLASSIFY_SESSION_DETAILS);
+ rmp->_vl_msg_id =
+ ntohs (REPLY_MSG_ID_BASE + VL_API_CLASSIFY_SESSION_DETAILS);
rmp->context = context;
rmp->table_id = ntohl (table_id);
rmp->hit_next_index = ntohl (e->next_index);
@@ -686,7 +688,6 @@ vl_api_classify_session_dump_t_handler (vl_api_classify_session_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (t, cm->tables)
{
if (table_id == t - cm->tables)
@@ -720,7 +721,6 @@ vl_api_classify_session_dump_t_handler (vl_api_classify_session_dump_t * mp)
break;
}
}
- /* *INDENT-ON* */
}
static void
@@ -755,7 +755,7 @@ send_flow_classify_details (u32 sw_if_index,
mp = vl_msg_api_alloc (sizeof (*mp));
clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_FLOW_CLASSIFY_DETAILS);
+ mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_FLOW_CLASSIFY_DETAILS);
mp->context = context;
mp->sw_if_index = htonl (sw_if_index);
mp->table_index = htonl (table_index);
@@ -887,6 +887,43 @@ static void vl_api_input_acl_set_interface_t_handler
REPLY_MACRO (VL_API_INPUT_ACL_SET_INTERFACE_REPLY);
}
+static void
+vl_api_punt_acl_add_del_t_handler (vl_api_punt_acl_add_del_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_punt_acl_add_del_reply_t *rmp;
+ int rv;
+
+ rv = vnet_set_in_out_acl_intfc (
+ vm, 0 /* sw_if_index */, ~0 /* ip4_table_index */,
+ ~0 /* ip6_table_index */, ~0 /* l2_table_index */,
+ ntohl (mp->ip4_table_index), ntohl (mp->ip6_table_index), mp->is_add,
+ 0 /* is_output */);
+
+ REPLY_MACRO (VL_API_PUNT_ACL_ADD_DEL_REPLY);
+}
+
+static void
+vl_api_punt_acl_get_t_handler (vl_api_punt_acl_get_t *mp)
+{
+ vl_api_punt_acl_get_reply_t *rmp;
+ int rv = 0;
+
+ const in_out_acl_main_t *am = &in_out_acl_main;
+
+ u32 *const *tables =
+ am->classify_table_index_by_sw_if_index[IN_OUT_ACL_INPUT_TABLE_GROUP];
+ const u32 *ip4_table = tables[IN_OUT_ACL_TABLE_IP4_PUNT];
+ const u32 *ip6_table = tables[IN_OUT_ACL_TABLE_IP6_PUNT];
+ const u32 ip4_table_index = vec_len (ip4_table) ? ip4_table[0] : ~0;
+ const u32 ip6_table_index = vec_len (ip6_table) ? ip6_table[0] : ~0;
+
+ REPLY_MACRO2 (VL_API_PUNT_ACL_GET_REPLY, ({
+ rmp->ip4_table_index = ntohl (ip4_table_index);
+ rmp->ip6_table_index = ntohl (ip6_table_index);
+ }));
+}
+
static void vl_api_output_acl_set_interface_t_handler
(vl_api_output_acl_set_interface_t * mp)
{
@@ -915,6 +952,16 @@ static void vl_api_output_acl_set_interface_t_handler
static clib_error_t *
classify_api_hookup (vlib_main_t * vm)
{
+ api_main_t *am = vlibapi_get_main ();
+
+ /*
+ * Trace space for classifier mask+match
+ */
+ vl_api_increase_msg_trace_size (am, VL_API_CLASSIFY_ADD_DEL_TABLE,
+ 5 * sizeof (u32x4));
+ vl_api_increase_msg_trace_size (am, VL_API_CLASSIFY_ADD_DEL_SESSION,
+ 5 * sizeof (u32x4));
+
/*
* Set up the (msg_name, crc, message-id) table
*/
diff --git a/src/vnet/classify/flow_classify.c b/src/vnet/classify/flow_classify.c
index afdadc66235..7197558a77a 100644
--- a/src/vnet/classify/flow_classify.c
+++ b/src/vnet/classify/flow_classify.c
@@ -150,7 +150,6 @@ set_flow_classify_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_input_acl_command, static) = {
.path = "set flow classify",
.short_help =
@@ -158,7 +157,6 @@ VLIB_CLI_COMMAND (set_input_acl_command, static) = {
" [ip6-table <index>] [del]",
.function = set_flow_classify_command_fn,
};
-/* *INDENT-ON* */
static uword
unformat_table_type (unformat_input_t * input, va_list * va)
@@ -215,13 +213,11 @@ show_flow_classify_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_flow_classify_command, static) = {
.path = "show classify flow",
.short_help = "show classify flow type [ip4|ip6]",
.function = show_flow_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/classify/flow_classify_node.c b/src/vnet/classify/flow_classify_node.c
index 4989bf0a012..a34bab6190b 100644
--- a/src/vnet/classify/flow_classify_node.c
+++ b/src/vnet/classify/flow_classify_node.c
@@ -184,7 +184,7 @@ flow_classify_inline (vlib_main_t * vm,
u32 table_index0;
vnet_classify_table_t *t0;
vnet_classify_entry_t *e0;
- u64 hash0;
+ u32 hash0;
u8 *h0;
/* Stride 3 seems to work best */
@@ -193,7 +193,7 @@ flow_classify_inline (vlib_main_t * vm,
vlib_buffer_t *p1 = vlib_get_buffer (vm, from[3]);
vnet_classify_table_t *tp1;
u32 table_index1;
- u64 phash1;
+ u32 phash1;
table_index1 = vnet_buffer (p1)->l2_classify.table_index;
@@ -279,7 +279,6 @@ VLIB_NODE_FN (ip4_flow_classify_node) (vlib_main_t * vm,
return flow_classify_inline (vm, node, frame, FLOW_CLASSIFY_TABLE_IP4);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_flow_classify_node) = {
.name = "ip4-flow-classify",
.vector_size = sizeof (u32),
@@ -291,7 +290,6 @@ VLIB_REGISTER_NODE (ip4_flow_classify_node) = {
[FLOW_CLASSIFY_NEXT_INDEX_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_flow_classify_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -300,7 +298,6 @@ VLIB_NODE_FN (ip6_flow_classify_node) (vlib_main_t * vm,
return flow_classify_inline (vm, node, frame, FLOW_CLASSIFY_TABLE_IP6);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_flow_classify_node) = {
.name = "ip6-flow-classify",
.vector_size = sizeof (u32),
@@ -313,7 +310,6 @@ VLIB_REGISTER_NODE (ip6_flow_classify_node) = {
},
};
-/* *INDENT-ON* */
static clib_error_t *
diff --git a/src/vnet/classify/in_out_acl.c b/src/vnet/classify/in_out_acl.c
index 7f5a926212c..af765139332 100644
--- a/src/vnet/classify/in_out_acl.c
+++ b/src/vnet/classify/in_out_acl.c
@@ -21,63 +21,75 @@
in_out_acl_main_t in_out_acl_main;
static int
-vnet_in_out_acl_ip_feature_enable (vlib_main_t * vnm,
- in_out_acl_main_t * am,
- u32 sw_if_index,
- in_out_acl_table_id_t tid,
- int feature_enable, int is_output)
+vnet_in_out_acl_feature_enable (in_out_acl_main_t *am, u32 sw_if_index,
+ in_out_acl_table_id_t tid, int feature_enable,
+ int is_output)
{
+ const char *arc_name, *feature_name;
+ vnet_feature_config_main_t *fcm;
+ u8 arc;
+ int rv;
- if (tid == IN_OUT_ACL_TABLE_L2)
+ switch (tid)
{
+ case IN_OUT_ACL_N_TABLES:
+ return VNET_API_ERROR_NO_SUCH_TABLE;
+ case IN_OUT_ACL_TABLE_L2:
if (is_output)
l2output_intf_bitmap_enable (sw_if_index, L2OUTPUT_FEAT_ACL,
feature_enable);
else
l2input_intf_bitmap_enable (sw_if_index, L2INPUT_FEAT_ACL,
feature_enable);
+ return 0;
+ case IN_OUT_ACL_TABLE_IP4:
+ arc_name = is_output ? "ip4-output" : "ip4-unicast";
+ feature_name = is_output ? "ip4-outacl" : "ip4-inacl";
+ break;
+ case IN_OUT_ACL_TABLE_IP6:
+ arc_name = is_output ? "ip6-output" : "ip6-unicast";
+ feature_name = is_output ? "ip6-outacl" : "ip6-inacl";
+ break;
+ case IN_OUT_ACL_TABLE_IP4_PUNT:
+ if (sw_if_index != 0)
+ return VNET_API_ERROR_INVALID_INTERFACE;
+ arc_name = "ip4-punt";
+ feature_name = "ip4-punt-acl";
+ break;
+ case IN_OUT_ACL_TABLE_IP6_PUNT:
+ if (sw_if_index != 0)
+ return VNET_API_ERROR_INVALID_INTERFACE;
+ arc_name = "ip6-punt";
+ feature_name = "ip6-punt-acl";
+ break;
}
- else
- { /* IP[46] */
- vnet_feature_config_main_t *fcm;
- u8 arc;
- if (tid == IN_OUT_ACL_TABLE_IP4)
- {
- char *arc_name = is_output ? "ip4-output" : "ip4-unicast";
- vnet_feature_enable_disable (arc_name,
- is_output ? "ip4-outacl" : "ip4-inacl",
- sw_if_index, feature_enable, 0, 0);
- arc = vnet_get_feature_arc_index (arc_name);
- }
- else
- {
- char *arc_name = is_output ? "ip6-output" : "ip6-unicast";
- vnet_feature_enable_disable (arc_name,
- is_output ? "ip6-outacl" : "ip6-inacl",
- sw_if_index, feature_enable, 0, 0);
- arc = vnet_get_feature_arc_index (arc_name);
- }
+ rv = vnet_feature_enable_disable (arc_name, feature_name, sw_if_index,
+ feature_enable, 0, 0);
+ if (rv)
+ return rv;
- fcm = vnet_get_feature_arc_config_main (arc);
- am->vnet_config_main[is_output][tid] = &fcm->config_main;
- }
+ arc = vnet_get_feature_arc_index (arc_name);
+ fcm = vnet_get_feature_arc_config_main (arc);
+ am->vnet_config_main[is_output][tid] = &fcm->config_main;
return 0;
}
int
-vnet_set_in_out_acl_intfc (vlib_main_t * vm, u32 sw_if_index,
- u32 ip4_table_index,
- u32 ip6_table_index, u32 l2_table_index,
- u32 is_add, u32 is_output)
+vnet_set_in_out_acl_intfc (vlib_main_t *vm, u32 sw_if_index,
+ u32 ip4_table_index, u32 ip6_table_index,
+ u32 l2_table_index, u32 ip4_punt_table_index,
+ u32 ip6_punt_table_index, u32 is_add, u32 is_output)
{
in_out_acl_main_t *am = &in_out_acl_main;
vnet_classify_main_t *vcm = am->vnet_classify_main;
- u32 acl[IN_OUT_ACL_N_TABLES] = { ip4_table_index, ip6_table_index,
- l2_table_index
+ u32 acl[IN_OUT_ACL_N_TABLES] = {
+ ip4_table_index, ip6_table_index, l2_table_index,
+ ip4_punt_table_index, ip6_punt_table_index,
};
u32 ti;
+ int rv;
/* Assume that we've validated sw_if_index in the API layer */
@@ -111,8 +123,10 @@ vnet_set_in_out_acl_intfc (vlib_main_t * vm, u32 sw_if_index,
!= ~0)
return 0;
- vnet_in_out_acl_ip_feature_enable (vm, am, sw_if_index, ti, is_add,
- is_output);
+ rv = vnet_in_out_acl_feature_enable (am, sw_if_index, ti, is_add,
+ is_output);
+ if (rv)
+ return rv;
if (is_add)
am->classify_table_index_by_sw_if_index[is_output][ti][sw_if_index] =
@@ -130,9 +144,10 @@ vnet_set_input_acl_intfc (vlib_main_t * vm, u32 sw_if_index,
u32 ip4_table_index,
u32 ip6_table_index, u32 l2_table_index, u32 is_add)
{
- return vnet_set_in_out_acl_intfc (vm, sw_if_index, ip4_table_index,
- ip6_table_index, l2_table_index, is_add,
- IN_OUT_ACL_INPUT_TABLE_GROUP);
+ return vnet_set_in_out_acl_intfc (
+ vm, sw_if_index, ip4_table_index, ip6_table_index, l2_table_index,
+ ~0 /* ip4_punt_table_index */, ~0 /* ip6_punt_table_index */, is_add,
+ IN_OUT_ACL_INPUT_TABLE_GROUP);
}
int
@@ -141,9 +156,10 @@ vnet_set_output_acl_intfc (vlib_main_t * vm, u32 sw_if_index,
u32 ip6_table_index, u32 l2_table_index,
u32 is_add)
{
- return vnet_set_in_out_acl_intfc (vm, sw_if_index, ip4_table_index,
- ip6_table_index, l2_table_index, is_add,
- IN_OUT_ACL_OUTPUT_TABLE_GROUP);
+ return vnet_set_in_out_acl_intfc (
+ vm, sw_if_index, ip4_table_index, ip6_table_index, l2_table_index,
+ ~0 /* ip4_punt_table_index */, ~0 /* ip6_punt_table_index */, is_add,
+ IN_OUT_ACL_OUTPUT_TABLE_GROUP);
}
static clib_error_t *
@@ -155,6 +171,8 @@ set_in_out_acl_command_fn (vlib_main_t * vm,
u32 sw_if_index = ~0;
u32 ip4_table_index = ~0;
u32 ip6_table_index = ~0;
+ u32 ip4_punt_table_index = ~0;
+ u32 ip6_punt_table_index = ~0;
u32 l2_table_index = ~0;
u32 is_add = 1;
u32 idx_cnt = 0;
@@ -169,6 +187,10 @@ set_in_out_acl_command_fn (vlib_main_t * vm,
idx_cnt++;
else if (unformat (input, "ip6-table %d", &ip6_table_index))
idx_cnt++;
+ else if (unformat (input, "ip4-punt-table %d", &ip4_punt_table_index))
+ idx_cnt++;
+ else if (unformat (input, "ip6-punt-table %d", &ip6_punt_table_index))
+ idx_cnt++;
else if (unformat (input, "l2-table %d", &l2_table_index))
idx_cnt++;
else if (unformat (input, "del"))
@@ -186,9 +208,9 @@ set_in_out_acl_command_fn (vlib_main_t * vm,
if (idx_cnt > 1)
return clib_error_return (0, "Only one table index per API is allowed.");
- rv = vnet_set_in_out_acl_intfc (vm, sw_if_index, ip4_table_index,
- ip6_table_index, l2_table_index, is_add,
- is_output);
+ rv = vnet_set_in_out_acl_intfc (
+ vm, sw_if_index, ip4_table_index, ip6_table_index, l2_table_index,
+ ip4_punt_table_index, ip6_punt_table_index, is_add, is_output);
switch (rv)
{
@@ -200,6 +222,9 @@ set_in_out_acl_command_fn (vlib_main_t * vm,
case VNET_API_ERROR_NO_SUCH_ENTRY:
return clib_error_return (0, "No such classifier table");
+
+ default:
+ return clib_error_return (0, "Error: %d", rv);
}
return 0;
}
@@ -230,13 +255,13 @@ set_output_acl_command_fn (vlib_main_t * vm,
* Note: Only one table index per API call is allowed.
*
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_input_acl_command, static) = {
- .path = "set interface input acl",
- .short_help =
+ .path = "set interface input acl",
+ .short_help =
"set interface input acl intfc <int> [ip4-table <index>]\n"
- " [ip6-table <index>] [l2-table <index>] [del]",
- .function = set_input_acl_command_fn,
+ " [ip6-table <index>] [l2-table <index>] [ip4-punt-table <index>]\n"
+ " [ip6-punt-table <index> [del]",
+ .function = set_input_acl_command_fn,
};
VLIB_CLI_COMMAND (set_output_acl_command, static) = {
.path = "set interface output acl",
@@ -245,7 +270,6 @@ VLIB_CLI_COMMAND (set_output_acl_command, static) = {
" [ip6-table <index>] [l2-table <index>] [del]",
.function = set_output_acl_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
in_out_acl_init (vlib_main_t * vm)
@@ -258,12 +282,10 @@ in_out_acl_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (in_out_acl_init) =
{
.runs_after = VLIB_INITS("ip_in_out_acl_init"),
};
-/* *INDENT-ON* */
uword
unformat_acl_type (unformat_input_t * input, va_list * args)
@@ -366,7 +388,6 @@ show_outacl_command_fn (vlib_main_t * vm,
IN_OUT_ACL_OUTPUT_TABLE_GROUP);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_inacl_command, static) = {
.path = "show inacl",
.short_help = "show inacl type [ip4|ip6|l2]",
@@ -377,7 +398,6 @@ VLIB_CLI_COMMAND (show_outacl_command, static) = {
.short_help = "show outacl type [ip4|ip6|l2]",
.function = show_outacl_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/classify/in_out_acl.h b/src/vnet/classify/in_out_acl.h
index be0323055d8..331c64f531f 100644
--- a/src/vnet/classify/in_out_acl.h
+++ b/src/vnet/classify/in_out_acl.h
@@ -31,6 +31,8 @@ typedef enum
IN_OUT_ACL_TABLE_IP4,
IN_OUT_ACL_TABLE_IP6,
IN_OUT_ACL_TABLE_L2,
+ IN_OUT_ACL_TABLE_IP4_PUNT,
+ IN_OUT_ACL_TABLE_IP6_PUNT,
IN_OUT_ACL_N_TABLES,
} in_out_acl_table_id_t;
@@ -59,14 +61,14 @@ typedef struct
extern in_out_acl_main_t in_out_acl_main;
-int vnet_set_in_out_acl_intfc (vlib_main_t * vm, u32 sw_if_index,
- u32 ip4_table_index,
- u32 ip6_table_index,
- u32 l2_table_index, u32 is_add, u32 is_output);
+int vnet_set_in_out_acl_intfc (vlib_main_t *vm, u32 sw_if_index,
+ u32 ip4_table_index, u32 ip6_table_index,
+ u32 l2_table_index, u32 ip4_punt_table_index,
+ u32 ip6_punt_table_index, u32 is_add,
+ u32 is_output);
-int vnet_set_input_acl_intfc (vlib_main_t * vm, u32 sw_if_index,
- u32 ip4_table_index,
- u32 ip6_table_index,
+int vnet_set_input_acl_intfc (vlib_main_t *vm, u32 sw_if_index,
+ u32 ip4_table_index, u32 ip6_table_index,
u32 l2_table_index, u32 is_add);
int vnet_set_output_acl_intfc (vlib_main_t * vm, u32 sw_if_index,
diff --git a/src/vnet/classify/ip_classify.c b/src/vnet/classify/ip_classify.c
index a5c044521bf..e8562c6912c 100644
--- a/src/vnet/classify/ip_classify.c
+++ b/src/vnet/classify/ip_classify.c
@@ -190,7 +190,7 @@ ip_classify_inline (vlib_main_t * vm,
u32 table_index0;
vnet_classify_table_t *t0;
vnet_classify_entry_t *e0;
- u64 hash0;
+ u32 hash0;
u8 *h0;
/* Stride 3 seems to work best */
@@ -199,7 +199,7 @@ ip_classify_inline (vlib_main_t * vm,
vlib_buffer_t *p1 = vlib_get_buffer (vm, from[3]);
vnet_classify_table_t *tp1;
u32 table_index1;
- u64 phash1;
+ u32 phash1;
table_index1 = vnet_buffer (p1)->l2_classify.table_index;
@@ -309,7 +309,6 @@ VLIB_NODE_FN (ip4_classify_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_classify_node) = {
.name = "ip4-classify",
.vector_size = sizeof (u32),
@@ -320,7 +319,6 @@ VLIB_REGISTER_NODE (ip4_classify_node) = {
.n_next_nodes = 0,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_classify_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -330,7 +328,6 @@ VLIB_NODE_FN (ip6_classify_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_classify_node) = {
.name = "ip6-classify",
.vector_size = sizeof (u32),
@@ -341,7 +338,6 @@ VLIB_REGISTER_NODE (ip6_classify_node) = {
.n_next_nodes = 0,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
static clib_error_t *
diff --git a/src/vnet/classify/pcap_classify.h b/src/vnet/classify/pcap_classify.h
index e079816f62c..a4ebcd1241c 100644
--- a/src/vnet/classify/pcap_classify.h
+++ b/src/vnet/classify/pcap_classify.h
@@ -47,11 +47,11 @@ vnet_is_packet_pcaped (vnet_pcap_t *pp, vlib_buffer_t *b, u32 sw_if_index)
return 0; /* wrong error */
if (filter_classify_table_index != ~0 &&
- vnet_is_packet_traced_inline (b, filter_classify_table_index,
- 0 /* full classify */) != 1)
+ pp->current_filter_function (b, filter_classify_table_index,
+ 0 /* full classify */) != 1)
return 0; /* not matching the filter, skip */
- return 1; /* success */
+ return 1;
}
/*
diff --git a/src/vnet/classify/policer_classify.c b/src/vnet/classify/policer_classify.c
index 4cf12a24e9e..814adefc987 100644
--- a/src/vnet/classify/policer_classify.c
+++ b/src/vnet/classify/policer_classify.c
@@ -164,7 +164,6 @@ set_policer_classify_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_policer_classify_command, static) = {
.path = "set policer classify",
.short_help =
@@ -172,7 +171,6 @@ VLIB_CLI_COMMAND (set_policer_classify_command, static) = {
" [ip6-table <index>] [l2-table <index>] [del]",
.function = set_policer_classify_command_fn,
};
-/* *INDENT-ON* */
static uword
unformat_table_type (unformat_input_t * input, va_list * va)
@@ -231,13 +229,11 @@ show_policer_classify_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_policer_classify_command, static) = {
.path = "show classify policer",
.short_help = "show classify policer type [ip4|ip6|l2]",
.function = show_policer_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/classify/trace_classify.h b/src/vnet/classify/trace_classify.h
index bc25ecd0ff7..03421210d03 100644
--- a/src/vnet/classify/trace_classify.h
+++ b/src/vnet/classify/trace_classify.h
@@ -29,6 +29,8 @@
* @param u32 classify_table_index - classifier table index
* @return 0 => no trace, 1 => trace, -1 => error
*/
+int vnet_is_packet_traced (vlib_buffer_t *b, u32 classify_table_index,
+ int func);
static inline int
vnet_is_packet_traced_inline (vlib_buffer_t * b,
@@ -43,6 +45,9 @@ vnet_is_packet_traced_inline (vlib_buffer_t * b,
if (func != 0)
return -1;
+ if (classify_table_index == ~0)
+ return -1;
+
/* This will happen... */
if (pool_is_free_index (vcm->tables, classify_table_index))
return -1;
diff --git a/src/vnet/classify/vnet_classify.c b/src/vnet/classify/vnet_classify.c
index d36d93b5f31..77c1c81f9c4 100644
--- a/src/vnet/classify/vnet_classify.c
+++ b/src/vnet/classify/vnet_classify.c
@@ -139,7 +139,7 @@ vnet_classify_new_table (vnet_classify_main_t *cm, const u8 *mask,
pool_get_aligned_zero (cm->tables, t, CLIB_CACHE_LINE_BYTES);
- vec_validate_aligned (t->mask, match_n_vectors - 1, sizeof (u32x4));
+ clib_memset_u32 (t->mask, 0, 4 * ARRAY_LEN (t->mask));
clib_memcpy_fast (t->mask, mask, match_n_vectors * sizeof (u32x4));
t->next_table_index = ~0;
@@ -148,6 +148,7 @@ vnet_classify_new_table (vnet_classify_main_t *cm, const u8 *mask,
t->match_n_vectors = match_n_vectors;
t->skip_n_vectors = skip_n_vectors;
t->entries_per_page = 2;
+ t->load_mask = pow2_mask (match_n_vectors * 2);
t->mheap = clib_mem_create_heap (0, memory_size, 1 /* locked */ ,
"classify");
@@ -175,7 +176,6 @@ vnet_classify_delete_table_index (vnet_classify_main_t * cm,
/* Recursively delete the entire chain */
vnet_classify_delete_table_index (cm, t->next_table_index, del_chain);
- vec_free (t->mask);
vec_free (t->buckets);
clib_mem_destroy_heap (t->mheap);
pool_put (cm->tables, t);
@@ -293,7 +293,7 @@ split_and_rehash (vnet_classify_table_t * t,
for (i = 0; i < length_in_entries; i++)
{
- u64 new_hash;
+ u32 new_hash;
v = vnet_classify_entry_at_index (t, old_values, i);
@@ -424,7 +424,7 @@ vnet_classify_add_del (vnet_classify_table_t *t, vnet_classify_entry_t *add_v,
u32 value_index;
int rv = 0;
int i;
- u64 hash, new_hash;
+ u32 hash, new_hash;
u32 limit;
u32 old_log2_pages, new_log2_pages;
u32 thread_index = vlib_get_thread_index ();
@@ -640,28 +640,26 @@ unlock:
return rv;
}
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
ethernet_header_t eh;
ip4_header_t ip;
}) classify_data_or_mask_t;
-/* *INDENT-ON* */
-u64
-vnet_classify_hash_packet (vnet_classify_table_t * t, u8 * h)
+u32
+vnet_classify_hash_packet (const vnet_classify_table_t *t, u8 *h)
{
return vnet_classify_hash_packet_inline (t, h);
}
vnet_classify_entry_t *
-vnet_classify_find_entry (vnet_classify_table_t * t,
- u8 * h, u64 hash, f64 now)
+vnet_classify_find_entry (const vnet_classify_table_t *t, u8 *h, u32 hash,
+ f64 now)
{
return vnet_classify_find_entry_inline (t, h, hash, now);
}
-static u8 *
-format_classify_entry (u8 * s, va_list * args)
+u8 *
+format_classify_entry (u8 *s, va_list *args)
{
vnet_classify_table_t *t = va_arg (*args, vnet_classify_table_t *);
vnet_classify_entry_t *e = va_arg (*args, vnet_classify_entry_t *);
@@ -777,8 +775,10 @@ vnet_classify_add_del_table (vnet_classify_main_t *cm, const u8 *mask,
else /* update */
{
vnet_classify_main_t *cm = &vnet_classify_main;
- t = pool_elt_at_index (cm->tables, *table_index);
+ if (pool_is_free_index (cm->tables, *table_index))
+ return VNET_API_ERROR_CLASSIFY_TABLE_NOT_FOUND;
+ t = pool_elt_at_index (cm->tables, *table_index);
t->next_table_index = next_table_index;
}
return 0;
@@ -1233,12 +1233,16 @@ unformat_classify_mask (unformat_input_t * input, va_list * args)
u8 *l2 = 0;
u8 *l3 = 0;
u8 *l4 = 0;
+ u8 add_l2 = 1;
int i;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (input, "hex %U", unformat_hex_string, &mask))
;
+ else if (unformat (input, "l2 none"))
+ /* Don't add the l2 header in the mask */
+ add_l2 = 0;
else if (unformat (input, "l2 %U", unformat_l2_mask, &l2))
;
else if (unformat (input, "l3 %U", unformat_l3_mask, &l3))
@@ -1249,6 +1253,15 @@ unformat_classify_mask (unformat_input_t * input, va_list * args)
break;
}
+ if (l2 && !add_l2)
+ {
+ vec_free (mask);
+ vec_free (l2);
+ vec_free (l3);
+ vec_free (l4);
+ return 0;
+ }
+
if (l4 && !l3)
{
vec_free (mask);
@@ -1261,15 +1274,20 @@ unformat_classify_mask (unformat_input_t * input, va_list * args)
{
if (l2 || l3 || l4)
{
- /* "With a free Ethernet header in every package" */
- if (l2 == 0)
- vec_validate (l2, 13);
- mask = l2;
- if (l3)
+ if (add_l2)
{
- vec_append (mask, l3);
- vec_free (l3);
+ /* "With a free Ethernet header in every package" */
+ if (l2 == 0)
+ vec_validate (l2, 13);
+ mask = l2;
+ if (l3)
+ {
+ vec_append (mask, l3);
+ vec_free (l3);
+ }
}
+ else
+ mask = l3;
if (l4)
{
vec_append (mask, l4);
@@ -1302,7 +1320,7 @@ unformat_classify_mask (unformat_input_t * input, va_list * args)
if (match == 0)
clib_warning ("BUG: match 0");
- _vec_len (mask) = match * sizeof (u32x4);
+ vec_set_len (mask, match * sizeof (u32x4));
*matchp = match;
*maskp = mask;
@@ -1313,12 +1331,11 @@ unformat_classify_mask (unformat_input_t * input, va_list * args)
return 0;
}
-#define foreach_l2_input_next \
-_(drop, DROP) \
-_(ethernet, ETHERNET_INPUT) \
-_(ip4, IP4_INPUT) \
-_(ip6, IP6_INPUT) \
-_(li, LI)
+#define foreach_l2_input_next \
+ _ (drop, DROP) \
+ _ (ethernet, ETHERNET_INPUT) \
+ _ (ip4, IP4_INPUT) \
+ _ (ip6, IP6_INPUT)
uword
unformat_l2_input_next_index (unformat_input_t * input, va_list * args)
@@ -1618,7 +1635,6 @@ classify_table_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (classify_table, static) =
{
.path = "classify table",
@@ -1630,7 +1646,6 @@ VLIB_CLI_COMMAND (classify_table, static) =
"\n [del] [del-chain]",
.function = classify_table_command_fn,
};
-/* *INDENT-ON* */
static int
filter_table_mask_compare (void *a1, void *a2)
@@ -1649,13 +1664,13 @@ filter_table_mask_compare (void *a1, void *a2)
m1 = (u8 *) (t1->mask);
m2 = (u8 *) (t2->mask);
- for (i = 0; i < vec_len (t1->mask) * sizeof (u32x4); i++)
+ for (i = 0; i < t1->match_n_vectors * sizeof (u32x4); i++)
{
n1 += count_set_bits (m1[0]);
m1++;
}
- for (i = 0; i < vec_len (t2->mask) * sizeof (u32x4); i++)
+ for (i = 0; i < t2->match_n_vectors * sizeof (u32x4); i++)
{
n2 += count_set_bits (m2[0]);
m2++;
@@ -1815,11 +1830,11 @@ classify_lookup_chain (u32 table_index, u8 * mask, u32 n_skip, u32 n_match)
continue;
/* Masks aren't congruent, can't use this table. */
- if (vec_len (t->mask) * sizeof (u32x4) != vec_len (mask))
+ if (t->match_n_vectors * sizeof (u32x4) != vec_len (mask))
continue;
/* Masks aren't bit-for-bit identical, can't use this table. */
- if (memcmp (t->mask, mask, vec_len (mask)))
+ if (memcmp (t->mask, mask, t->match_n_vectors * sizeof (u32x4)))
continue;
/* Winner... */
@@ -2034,7 +2049,7 @@ vlib_enable_disable_pkt_trace_filter (int enable)
/*?
* Construct an arbitrary set of packet classifier tables for use with
- * "pcap rx | tx trace," and with the vpp packet tracer
+ * "pcap trace rx | tx," and with the vpp packet tracer
*
* Packets which match a rule in the classifier table chain
* will be traced. The tables are automatically ordered so that
@@ -2043,7 +2058,7 @@ vlib_enable_disable_pkt_trace_filter (int enable)
* It's reasonably likely that folks will configure a single
* table with one or two matches. As a result, we configure
* 8 hash buckets and 128K of match rule space. One can override
- * the defaults by specifiying "buckets <nnn>" and "memory-size <xxx>"
+ * the defaults by specifying "buckets <nnn>" and "memory-size <xxx>"
* as desired.
*
* To build up complex filter chains, repeatedly issue the
@@ -2077,18 +2092,20 @@ vlib_enable_disable_pkt_trace_filter (int enable)
* @cliexpar
* Configuring the classify filter
*
- * Configure a simple classify filter, and configure pcap rx trace to use it:
+ * Configure a simple classify filter, and configure pcap trace rx to use it:
*
- * <b><em>classify filter rx mask l3 ip4 src match l3 ip4 src 192.168.1.11"</em></b><br>
- * <b><em>pcap rx trace on max 100 filter</em></b>
+ * @cliexcmd{classify filter rx mask l3 ip4 src match l3 ip4 src 192.168.1.11}
+ * <b><em>pcap trace rx max 100 filter</em></b>
*
* Configure another fairly simple filter
*
- * <b><em>classify filter mask l3 ip4 src dst match l3 ip4 src 192.168.1.10 dst 192.168.2.10"</em></b>
+ * @cliexcmd{classify filter mask l3 ip4 src dst match l3 ip4 src 192.168.1.10
+ * dst 192.168.2.10}
*
*
* Configure a filter for use with the vpp packet tracer:
- * <b><em>classify filter trace mask l3 ip4 src dst match l3 ip4 src 192.168.1.10 dst 192.168.2.10"</em></b>
+ * @cliexcmd{classify filter trace mask l3 ip4 src dst match l3 ip4 src
+ * 192.168.1.10 dst 192.168.2.10}
* <b><em>trace add dpdk-input 100 filter</em></b>
*
* Clear classifier filters
@@ -2096,7 +2113,7 @@ vlib_enable_disable_pkt_trace_filter (int enable)
* <b><em>classify filter [trace | rx | tx | <intfc>] del</em></b>
*
* To display the top-level classifier tables for each use case:
- * <b><em>show classify filter</em/></b>
+ * <b><em>show classify filter</em></b>
*
* To inspect the classifier tables, use
*
@@ -2104,7 +2121,6 @@ vlib_enable_disable_pkt_trace_filter (int enable)
* The verbose form displays all of the match rules, with hit-counters
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (classify_filter, static) =
{
.path = "classify filter",
@@ -2114,7 +2130,6 @@ VLIB_CLI_COMMAND (classify_filter, static) =
" [buckets <nn>] [memory-size <n>]",
.function = classify_filter_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_classify_filter_command_fn (vlib_main_t * vm,
@@ -2194,14 +2209,12 @@ show_classify_filter_command_fn (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_classify_filter, static) =
{
.path = "show classify filter",
.short_help = "show classify filter [verbose [nn]]",
.function = show_classify_filter_command_fn,
};
-/* *INDENT-ON* */
u8 *
format_vnet_classify_table (u8 *s, va_list *args)
@@ -2213,7 +2226,7 @@ format_vnet_classify_table (u8 *s, va_list *args)
if (index == ~0)
{
- s = format (s, "%10s%10s%10s%10s", "TableIdx", "Sessions", "NextTbl",
+ s = format (s, "\n%10s%10s%10s%10s", "TableIdx", "Sessions", "NextTbl",
"NextNode", verbose ? "Details" : "");
return s;
}
@@ -2264,21 +2277,21 @@ show_classify_tables_command_fn (vlib_main_t * vm,
break;
}
- /* *INDENT-OFF* */
pool_foreach (t, cm->tables)
{
if (match_index == ~0 || (match_index == t - cm->tables))
vec_add1 (indices, t - cm->tables);
}
- /* *INDENT-ON* */
if (vec_len (indices))
{
- vlib_cli_output (vm, "%U", format_vnet_classify_table, cm, verbose,
- ~0 /* hdr */ );
for (i = 0; i < vec_len (indices); i++)
- vlib_cli_output (vm, "%U", format_vnet_classify_table, cm,
- verbose, indices[i]);
+ {
+ vlib_cli_output (vm, "%U", format_vnet_classify_table, cm, verbose,
+ ~0 /* hdr */);
+ vlib_cli_output (vm, "%U", format_vnet_classify_table, cm, verbose,
+ indices[i]);
+ }
}
else
vlib_cli_output (vm, "No classifier tables configured");
@@ -2288,13 +2301,11 @@ show_classify_tables_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_classify_table_command, static) = {
.path = "show classify tables",
.short_help = "show classify tables [index <nn>]",
.function = show_classify_tables_command_fn,
};
-/* *INDENT-ON* */
uword
unformat_l4_match (unformat_input_t * input, va_list * args)
@@ -2314,7 +2325,7 @@ unformat_l4_match (unformat_input_t * input, va_list * args)
else if (unformat (input, "dst_port %d", &dst_port))
;
else
- return 0;
+ break;
}
h.src_port = clib_host_to_net_u16 (src_port);
@@ -2675,6 +2686,7 @@ unformat_classify_match (unformat_input_t * input, va_list * args)
u8 *l2 = 0;
u8 *l3 = 0;
u8 *l4 = 0;
+ u8 add_l2 = 1;
if (pool_is_free_index (cm->tables, table_index))
return 0;
@@ -2685,6 +2697,9 @@ unformat_classify_match (unformat_input_t * input, va_list * args)
{
if (unformat (input, "hex %U", unformat_hex_string, &match))
;
+ else if (unformat (input, "l2 none"))
+ /* Don't add the l2 header in the mask */
+ add_l2 = 0;
else if (unformat (input, "l2 %U", unformat_l2_match, &l2))
;
else if (unformat (input, "l3 %U", unformat_l3_match, &l3))
@@ -2695,6 +2710,15 @@ unformat_classify_match (unformat_input_t * input, va_list * args)
break;
}
+ if (l2 && !add_l2)
+ {
+ vec_free (match);
+ vec_free (l2);
+ vec_free (l3);
+ vec_free (l4);
+ return 0;
+ }
+
if (l4 && !l3)
{
vec_free (match);
@@ -2707,15 +2731,20 @@ unformat_classify_match (unformat_input_t * input, va_list * args)
{
if (l2 || l3 || l4)
{
- /* "Win a free Ethernet header in every packet" */
- if (l2 == 0)
- vec_validate_aligned (l2, 13, sizeof (u32x4));
- match = l2;
- if (l3)
+ if (add_l2)
{
- vec_append_aligned (match, l3, sizeof (u32x4));
- vec_free (l3);
+ /* "Win a free Ethernet header in every packet" */
+ if (l2 == 0)
+ vec_validate_aligned (l2, 13, sizeof (u32x4));
+ match = l2;
+ if (l3)
+ {
+ vec_append_aligned (match, l3, sizeof (u32x4));
+ vec_free (l3);
+ }
}
+ else
+ match = l3;
if (l4)
{
vec_append_aligned (match, l4, sizeof (u32x4));
@@ -2730,8 +2759,8 @@ unformat_classify_match (unformat_input_t * input, va_list * args)
sizeof (u32x4));
/* Set size, include skipped vectors */
- _vec_len (match) =
- (t->match_n_vectors + t->skip_n_vectors) * sizeof (u32x4);
+ vec_set_len (match,
+ (t->match_n_vectors + t->skip_n_vectors) * sizeof (u32x4));
*matchp = match;
@@ -2743,9 +2772,9 @@ unformat_classify_match (unformat_input_t * input, va_list * args)
int
vnet_classify_add_del_session (vnet_classify_main_t *cm, u32 table_index,
- const u8 *match, u32 hit_next_index,
+ const u8 *match, u16 hit_next_index,
u32 opaque_index, i32 advance, u8 action,
- u16 metadata, int is_add)
+ u32 metadata, int is_add)
{
vnet_classify_table_t *t;
vnet_classify_entry_5_t _max_e __attribute__ ((aligned (16)));
@@ -2889,7 +2918,6 @@ classify_session_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (classify_session_command, static) = {
.path = "classify session",
.short_help =
@@ -2899,7 +2927,6 @@ VLIB_CLI_COMMAND (classify_session_command, static) = {
"\n [action set-ip4-fib-id|set-ip6-fib-id|set-sr-policy-index <n>] [del]",
.function = classify_session_command_fn,
};
-/* *INDENT-ON* */
static uword
unformat_opaque_sw_if_index (unformat_input_t * input, va_list * args)
@@ -3043,7 +3070,12 @@ vnet_is_packet_traced (vlib_buffer_t * b, u32 classify_table_index, int func)
{
return vnet_is_packet_traced_inline (b, classify_table_index, func);
}
-
+VLIB_REGISTER_TRACE_FILTER_FUNCTION (vnet_is_packet_traced_fn, static) = {
+ .name = "vnet_is_packet_traced",
+ .description = "classifier based filter",
+ .priority = 50,
+ .function = vnet_is_packet_traced
+};
#define TEST_CODE 0
@@ -3195,7 +3227,7 @@ test_classify_churn (test_classify_main_t * tm)
for (i = 0; i < tm->sessions; i++)
{
u8 *key_minus_skip;
- u64 hash;
+ u32 hash;
vnet_classify_entry_t *e;
ep = tm->entries + i;
@@ -3312,7 +3344,6 @@ test_classify_command_fn (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_classify_command, static) = {
.path = "test classify",
.short_help =
@@ -3321,7 +3352,6 @@ VLIB_CLI_COMMAND (test_classify_command, static) = {
" [churn-test]",
.function = test_classify_command_fn,
};
-/* *INDENT-ON* */
#endif /* TEST_CODE */
/*
diff --git a/src/vnet/classify/vnet_classify.h b/src/vnet/classify/vnet_classify.h
index 06784e0541e..768593c45af 100644
--- a/src/vnet/classify/vnet_classify.h
+++ b/src/vnet/classify/vnet_classify.h
@@ -89,15 +89,17 @@ typedef struct _vnet_classify_entry
/* last heard time */
f64 last_heard;
+ u32 metadata;
+
+ /* Graph node next index */
+ u16 next_index;
+
+ vnet_classify_action_t action;
+
/* Really only need 1 bit */
u8 flags;
#define VNET_CLASSIFY_ENTRY_FREE (1<<0)
- vnet_classify_action_t action;
- u16 metadata;
- /* Graph node next index */
- u32 next_index;
-
/* Must be aligned to a 16-octet boundary */
u32x4 key[0];
} vnet_classify_entry_t;
@@ -147,9 +149,6 @@ typedef struct
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- /* Mask to apply after skipping N vectors */
- u32x4 *mask;
-
/* hash Buckets */
vnet_classify_bucket_t *buckets;
@@ -165,6 +164,7 @@ typedef struct
u32 entries_per_page;
u32 skip_n_vectors;
u32 match_n_vectors;
+ u16 load_mask;
/* Index of next table to try */
u32 next_table_index;
@@ -196,6 +196,14 @@ typedef struct
/* Writer (only) lock for this table */
clib_spinlock_t writer_lock;
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
+ /* Mask to apply after skipping N vectors */
+ union
+ {
+ u32x4 mask[8];
+ u32 mask_u32[32];
+ };
+
} vnet_classify_table_t;
/**
@@ -233,10 +241,11 @@ struct _vnet_classify_main
extern vnet_classify_main_t vnet_classify_main;
+u8 *format_classify_entry (u8 *s, va_list *args);
u8 *format_classify_table (u8 * s, va_list * args);
u8 *format_vnet_classify_table (u8 *s, va_list *args);
-u64 vnet_classify_hash_packet (vnet_classify_table_t * t, u8 * h);
+u32 vnet_classify_hash_packet (const vnet_classify_table_t *t, u8 *h);
static_always_inline vnet_classify_table_t *
vnet_classify_table_get (u32 table_index)
@@ -246,63 +255,85 @@ vnet_classify_table_get (u32 table_index)
return (pool_elt_at_index (vcm->tables, table_index));
}
-static inline u64
-vnet_classify_hash_packet_inline (vnet_classify_table_t *t, const u8 *h)
+static inline u32
+vnet_classify_hash_packet_inline (const vnet_classify_table_t *t, const u8 *h)
{
- u32x4 *mask;
+ u64 xor_sum;
+ ASSERT (t);
+ h += t->skip_n_vectors * 16;
- union
- {
- u32x4 as_u32x4;
- u64 as_u64[2];
- } xor_sum __attribute__ ((aligned (sizeof (u32x4))));
+#if defined(CLIB_HAVE_VEC512) && defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
+ u64x8 xor_sum_x8, *mask = (u64x8 *) t->mask;
+ u16 load_mask = t->load_mask;
+ u64x8u *data = (u64x8u *) h;
+
+ xor_sum_x8 = u64x8_mask_load_zero (data, load_mask) & mask[0];
+
+ if (PREDICT_FALSE (load_mask >> 8))
+ xor_sum_x8 ^= u64x8_mask_load_zero (data + 1, load_mask >> 8) & mask[1];
+
+ xor_sum_x8 ^= u64x8_align_right (xor_sum_x8, xor_sum_x8, 4);
+ xor_sum_x8 ^= u64x8_align_right (xor_sum_x8, xor_sum_x8, 2);
+ xor_sum = xor_sum_x8[0] ^ xor_sum_x8[1];
+#elif defined(CLIB_HAVE_VEC256) && defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u64x4 xor_sum_x4, *mask = (u64x4 *) t->mask;
+ u16 load_mask = t->load_mask;
+ u64x4u *data = (u64x4u *) h;
+
+ xor_sum_x4 = u64x4_mask_load_zero (data, load_mask) & mask[0];
+ xor_sum_x4 ^= u64x4_mask_load_zero (data + 1, load_mask >> 4) & mask[1];
+
+ if (PREDICT_FALSE (load_mask >> 8))
+ xor_sum_x4 ^= u64x4_mask_load_zero (data + 2, load_mask >> 8) & mask[2];
+
+ xor_sum_x4 ^= u64x4_align_right (xor_sum_x4, xor_sum_x4, 2);
+ xor_sum = xor_sum_x4[0] ^ xor_sum_x4[1];
+#elif defined(CLIB_HAVE_VEC128)
+ u64x2 *mask = (u64x2 *) t->mask;
+ u64x2u *data = (u64x2u *) h;
+ u64x2 xor_sum_x2;
+
+ xor_sum_x2 = data[0] & mask[0];
- ASSERT (t);
- mask = t->mask;
-#ifdef CLIB_HAVE_VEC128
- u32x4u *data = (u32x4u *) h;
- xor_sum.as_u32x4 = data[0 + t->skip_n_vectors] & mask[0];
switch (t->match_n_vectors)
{
case 5:
- xor_sum.as_u32x4 ^= data[4 + t->skip_n_vectors] & mask[4];
+ xor_sum_x2 ^= data[4] & mask[4];
/* FALLTHROUGH */
case 4:
- xor_sum.as_u32x4 ^= data[3 + t->skip_n_vectors] & mask[3];
+ xor_sum_x2 ^= data[3] & mask[3];
/* FALLTHROUGH */
case 3:
- xor_sum.as_u32x4 ^= data[2 + t->skip_n_vectors] & mask[2];
+ xor_sum_x2 ^= data[2] & mask[2];
/* FALLTHROUGH */
case 2:
- xor_sum.as_u32x4 ^= data[1 + t->skip_n_vectors] & mask[1];
+ xor_sum_x2 ^= data[1] & mask[1];
/* FALLTHROUGH */
case 1:
break;
default:
abort ();
}
+ xor_sum = xor_sum_x2[0] ^ xor_sum_x2[1];
#else
- u32 skip_u64 = t->skip_n_vectors * 2;
- u64 *data64 = (u64 *) h;
- xor_sum.as_u64[0] = data64[0 + skip_u64] & ((u64 *) mask)[0];
- xor_sum.as_u64[1] = data64[1 + skip_u64] & ((u64 *) mask)[1];
+ u64 *data = (u64 *) h;
+ u64 *mask = (u64 *) t->mask;
+
+ xor_sum = (data[0] & mask[0]) ^ (data[1] & mask[1]);
+
switch (t->match_n_vectors)
{
case 5:
- xor_sum.as_u64[0] ^= data64[8 + skip_u64] & ((u64 *) mask)[8];
- xor_sum.as_u64[1] ^= data64[9 + skip_u64] & ((u64 *) mask)[9];
+ xor_sum ^= (data[8] & mask[8]) ^ (data[9] & mask[9]);
/* FALLTHROUGH */
case 4:
- xor_sum.as_u64[0] ^= data64[6 + skip_u64] & ((u64 *) mask)[6];
- xor_sum.as_u64[1] ^= data64[7 + skip_u64] & ((u64 *) mask)[7];
+ xor_sum ^= (data[6] & mask[6]) ^ (data[7] & mask[7]);
/* FALLTHROUGH */
case 3:
- xor_sum.as_u64[0] ^= data64[4 + skip_u64] & ((u64 *) mask)[4];
- xor_sum.as_u64[1] ^= data64[5 + skip_u64] & ((u64 *) mask)[5];
+ xor_sum ^= (data[4] & mask[4]) ^ (data[5] & mask[5]);
/* FALLTHROUGH */
case 2:
- xor_sum.as_u64[0] ^= data64[2 + skip_u64] & ((u64 *) mask)[2];
- xor_sum.as_u64[1] ^= data64[3 + skip_u64] & ((u64 *) mask)[3];
+ xor_sum ^= (data[2] & mask[2]) ^ (data[3] & mask[3]);
/* FALLTHROUGH */
case 1:
break;
@@ -315,7 +346,7 @@ vnet_classify_hash_packet_inline (vnet_classify_table_t *t, const u8 *h)
#ifdef clib_crc32c_uses_intrinsics
return clib_crc32c ((u8 *) & xor_sum, sizeof (xor_sum));
#else
- return clib_xxhash (xor_sum.as_u64[0] ^ xor_sum.as_u64[1]);
+ return clib_xxhash (xor_sum);
#endif
}
@@ -332,7 +363,7 @@ vnet_classify_prefetch_bucket (vnet_classify_table_t * t, u64 hash)
}
static inline vnet_classify_entry_t *
-vnet_classify_get_entry (vnet_classify_table_t * t, uword offset)
+vnet_classify_get_entry (const vnet_classify_table_t *t, uword offset)
{
u8 *hp = clib_mem_get_heap_base (t->mheap);
u8 *vp = hp + offset;
@@ -354,8 +385,8 @@ vnet_classify_get_offset (vnet_classify_table_t * t,
}
static inline vnet_classify_entry_t *
-vnet_classify_entry_at_index (vnet_classify_table_t * t,
- vnet_classify_entry_t * e, u32 index)
+vnet_classify_entry_at_index (const vnet_classify_table_t *t,
+ vnet_classify_entry_t *e, u32 index)
{
u8 *eu8;
@@ -392,127 +423,151 @@ vnet_classify_prefetch_entry (vnet_classify_table_t * t, u64 hash)
clib_prefetch_load (e);
}
-vnet_classify_entry_t *vnet_classify_find_entry (vnet_classify_table_t * t,
- u8 * h, u64 hash, f64 now);
+vnet_classify_entry_t *
+vnet_classify_find_entry (const vnet_classify_table_t *t, u8 *h, u32 hash,
+ f64 now);
+
+static_always_inline int
+vnet_classify_entry_is_equal (vnet_classify_entry_t *v, const u8 *d, u8 *m,
+ u32 match_n_vectors, u16 load_mask)
+{
+#if defined(CLIB_HAVE_VEC512) && defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
+ u64x8 r, *mask = (u64x8 *) m;
+ u64x8u *data = (u64x8u *) d;
+ u64x8 *key = (u64x8 *) v->key;
+
+ r = (u64x8_mask_load_zero (data, load_mask) & mask[0]) ^
+ u64x8_mask_load_zero (key, load_mask);
+ load_mask >>= 8;
+
+ if (PREDICT_FALSE (load_mask))
+ r |= (u64x8_mask_load_zero (data + 1, load_mask) & mask[1]) ^
+ u64x8_mask_load_zero (key + 1, load_mask);
+
+ if (u64x8_is_all_zero (r))
+ return 1;
+
+#elif defined(CLIB_HAVE_VEC256) && defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u64x4 r, *mask = (u64x4 *) m;
+ u64x4u *data = (u64x4u *) d;
+ u64x4 *key = (u64x4 *) v->key;
+
+ r = (u64x4_mask_load_zero (data, load_mask) & mask[0]) ^
+ u64x4_mask_load_zero (key, load_mask);
+ load_mask >>= 4;
+
+ r |= (u64x4_mask_load_zero (data + 1, load_mask) & mask[1]) ^
+ u64x4_mask_load_zero (key + 1, load_mask);
+ load_mask >>= 4;
+
+ if (PREDICT_FALSE (load_mask))
+ r |= (u64x4_mask_load_zero (data + 2, load_mask) & mask[2]) ^
+ u64x4_mask_load_zero (key + 2, load_mask);
+
+ if (u64x4_is_all_zero (r))
+ return 1;
+
+#elif defined(CLIB_HAVE_VEC128)
+ u64x2u *data = (u64x2 *) d;
+ u64x2 *key = (u64x2 *) v->key;
+ u64x2 *mask = (u64x2 *) m;
+ u64x2 r;
+
+ r = (data[0] & mask[0]) ^ key[0];
+ switch (match_n_vectors)
+ {
+ case 5:
+ r |= (data[4] & mask[4]) ^ key[4];
+ /* fall through */
+ case 4:
+ r |= (data[3] & mask[3]) ^ key[3];
+ /* fall through */
+ case 3:
+ r |= (data[2] & mask[2]) ^ key[2];
+ /* fall through */
+ case 2:
+ r |= (data[1] & mask[1]) ^ key[1];
+ /* fall through */
+ case 1:
+ break;
+ default:
+ abort ();
+ }
+
+ if (u64x2_is_all_zero (r))
+ return 1;
+
+#else
+ u64 *data = (u64 *) d;
+ u64 *key = (u64 *) v->key;
+ u64 *mask = (u64 *) m;
+ u64 r;
+
+ r = ((data[0] & mask[0]) ^ key[0]) | ((data[1] & mask[1]) ^ key[1]);
+ switch (match_n_vectors)
+ {
+ case 5:
+ r |= ((data[8] & mask[8]) ^ key[8]) | ((data[9] & mask[9]) ^ key[9]);
+ /* fall through */
+ case 4:
+ r |= ((data[6] & mask[6]) ^ key[6]) | ((data[7] & mask[7]) ^ key[7]);
+ /* fall through */
+ case 3:
+ r |= ((data[4] & mask[4]) ^ key[4]) | ((data[5] & mask[5]) ^ key[5]);
+ /* fall through */
+ case 2:
+ r |= ((data[2] & mask[2]) ^ key[2]) | ((data[3] & mask[3]) ^ key[3]);
+ /* fall through */
+ case 1:
+ break;
+ default:
+ abort ();
+ }
+
+ if (r == 0)
+ return 1;
+
+#endif /* CLIB_HAVE_VEC128 */
+ return 0;
+}
static inline vnet_classify_entry_t *
-vnet_classify_find_entry_inline (vnet_classify_table_t *t, const u8 *h,
- u64 hash, f64 now)
+vnet_classify_find_entry_inline (const vnet_classify_table_t *t, const u8 *h,
+ u32 hash, f64 now)
{
vnet_classify_entry_t *v;
- u32x4 *mask, *key;
- union
- {
- u32x4 as_u32x4;
- u64 as_u64[2];
- } result __attribute__ ((aligned (sizeof (u32x4))));
vnet_classify_bucket_t *b;
- u32 value_index;
- u32 bucket_index;
- u32 limit;
+ u32 bucket_index, limit, pages, match_n_vectors = t->match_n_vectors;
+ u16 load_mask = t->load_mask;
+ u8 *mask = (u8 *) t->mask;
int i;
bucket_index = hash & (t->nbuckets - 1);
b = &t->buckets[bucket_index];
- mask = t->mask;
if (b->offset == 0)
return 0;
- hash >>= t->log2_nbuckets;
-
+ pages = 1 << b->log2_pages;
v = vnet_classify_get_entry (t, b->offset);
- value_index = hash & ((1 << b->log2_pages) - 1);
limit = t->entries_per_page;
if (PREDICT_FALSE (b->linear_search))
{
- value_index = 0;
- limit *= (1 << b->log2_pages);
+ limit *= pages;
+ v = vnet_classify_entry_at_index (t, v, 0);
}
-
- v = vnet_classify_entry_at_index (t, v, value_index);
-
-#ifdef CLIB_HAVE_VEC128
- const u32x4u *data = (const u32x4u *) h;
- for (i = 0; i < limit; i++)
+ else
{
- key = v->key;
- result.as_u32x4 = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0];
- switch (t->match_n_vectors)
- {
- case 5:
- result.as_u32x4 |= (data[4 + t->skip_n_vectors] & mask[4]) ^ key[4];
- /* FALLTHROUGH */
- case 4:
- result.as_u32x4 |= (data[3 + t->skip_n_vectors] & mask[3]) ^ key[3];
- /* FALLTHROUGH */
- case 3:
- result.as_u32x4 |= (data[2 + t->skip_n_vectors] & mask[2]) ^ key[2];
- /* FALLTHROUGH */
- case 2:
- result.as_u32x4 |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1];
- /* FALLTHROUGH */
- case 1:
- break;
- default:
- abort ();
- }
-
- if (u32x4_is_all_zero (result.as_u32x4))
- {
- if (PREDICT_TRUE (now))
- {
- v->hits++;
- v->last_heard = now;
- }
- return (v);
- }
- v = vnet_classify_entry_at_index (t, v, 1);
+ hash >>= t->log2_nbuckets;
+ v = vnet_classify_entry_at_index (t, v, hash & (pages - 1));
}
-#else
- u32 skip_u64 = t->skip_n_vectors * 2;
- const u64 *data64 = (const u64 *) h;
- for (i = 0; i < limit; i++)
- {
- key = v->key;
- result.as_u64[0] =
- (data64[0 + skip_u64] & ((u64 *) mask)[0]) ^ ((u64 *) key)[0];
- result.as_u64[1] =
- (data64[1 + skip_u64] & ((u64 *) mask)[1]) ^ ((u64 *) key)[1];
- switch (t->match_n_vectors)
- {
- case 5:
- result.as_u64[0] |=
- (data64[8 + skip_u64] & ((u64 *) mask)[8]) ^ ((u64 *) key)[8];
- result.as_u64[1] |=
- (data64[9 + skip_u64] & ((u64 *) mask)[9]) ^ ((u64 *) key)[9];
- /* FALLTHROUGH */
- case 4:
- result.as_u64[0] |=
- (data64[6 + skip_u64] & ((u64 *) mask)[6]) ^ ((u64 *) key)[6];
- result.as_u64[1] |=
- (data64[7 + skip_u64] & ((u64 *) mask)[7]) ^ ((u64 *) key)[7];
- /* FALLTHROUGH */
- case 3:
- result.as_u64[0] |=
- (data64[4 + skip_u64] & ((u64 *) mask)[4]) ^ ((u64 *) key)[4];
- result.as_u64[1] |=
- (data64[5 + skip_u64] & ((u64 *) mask)[5]) ^ ((u64 *) key)[5];
- /* FALLTHROUGH */
- case 2:
- result.as_u64[0] |=
- (data64[2 + skip_u64] & ((u64 *) mask)[2]) ^ ((u64 *) key)[2];
- result.as_u64[1] |=
- (data64[3 + skip_u64] & ((u64 *) mask)[3]) ^ ((u64 *) key)[3];
- /* FALLTHROUGH */
- case 1:
- break;
- default:
- abort ();
- }
+ h += t->skip_n_vectors * 16;
- if (result.as_u64[0] == 0 && result.as_u64[1] == 0)
+ for (i = 0; i < limit; i++)
+ {
+ if (vnet_classify_entry_is_equal (v, h, mask, match_n_vectors,
+ load_mask))
{
if (PREDICT_TRUE (now))
{
@@ -521,10 +576,8 @@ vnet_classify_find_entry_inline (vnet_classify_table_t *t, const u8 *h,
}
return (v);
}
-
v = vnet_classify_entry_at_index (t, v, 1);
}
-#endif /* CLIB_HAVE_VEC128 */
return 0;
}
@@ -535,9 +588,9 @@ vnet_classify_table_t *vnet_classify_new_table (vnet_classify_main_t *cm,
u32 match_n_vectors);
int vnet_classify_add_del_session (vnet_classify_main_t *cm, u32 table_index,
- const u8 *match, u32 hit_next_index,
+ const u8 *match, u16 hit_next_index,
u32 opaque_index, i32 advance, u8 action,
- u16 metadata, int is_add);
+ u32 metadata, int is_add);
int vnet_classify_add_del_table (vnet_classify_main_t *cm, const u8 *mask,
u32 nbuckets, u32 memory_size, u32 skip,
diff --git a/src/vnet/config.c b/src/vnet/config.c
index c9d4909cdeb..c05da663fb7 100644
--- a/src/vnet/config.c
+++ b/src/vnet/config.c
@@ -97,7 +97,7 @@ find_config_with_features (vlib_main_t * vm,
config_string = cm->config_string_temp;
cm->config_string_temp = 0;
if (config_string)
- _vec_len (config_string) = 0;
+ vec_set_len (config_string, 0);
vec_foreach (f, feature_vector)
{
@@ -119,6 +119,12 @@ find_config_with_features (vlib_main_t * vm,
vec_add1 (config_string, next_index);
}
+ /* Add the end node index to the config string so that it is part of
+ * the key used to detect string sharing. If this is not included then
+ * a modification of the end node would affect all the user of a shared
+ * string. */
+ vec_add1 (config_string, end_node_index);
+
/* See if config string is unique. */
p = hash_get_mem (cm->config_string_hash, config_string);
if (p)
@@ -250,6 +256,15 @@ vnet_config_del (vnet_config_main_t * cm, u32 config_id)
}
u32
+vnet_config_reset_end_node (vlib_main_t *vm, vnet_config_main_t *cm, u32 ci)
+{
+ cm->end_node_indices_by_user_index[ci] = cm->default_end_node_index;
+
+ return (
+ vnet_config_modify_end_node (vm, cm, ci, cm->default_end_node_index));
+}
+
+u32
vnet_config_modify_end_node (vlib_main_t * vm,
vnet_config_main_t * cm,
u32 config_string_heap_index, u32 end_node_index)
@@ -281,7 +296,7 @@ vnet_config_modify_end_node (vlib_main_t * vm,
if (new_features[last].node_index == cm->default_end_node_index)
{
vec_free (new_features->feature_config);
- _vec_len (new_features) = last;
+ vec_set_len (new_features, last);
}
}
@@ -304,6 +319,18 @@ vnet_config_modify_end_node (vlib_main_t * vm,
}
u32
+vnet_config_get_end_node (vlib_main_t *vm, vnet_config_main_t *cm,
+ u32 config_string_heap_index)
+{
+ if (config_string_heap_index >= vec_len (cm->end_node_indices_by_user_index))
+ return cm->default_end_node_index;
+ if (~0 == cm->end_node_indices_by_user_index[config_string_heap_index])
+ return cm->default_end_node_index;
+
+ return (cm->end_node_indices_by_user_index[config_string_heap_index]);
+}
+
+u32
vnet_config_add_feature (vlib_main_t * vm,
vnet_config_main_t * cm,
u32 config_string_heap_index,
diff --git a/src/vnet/config.h b/src/vnet/config.h
index ccbbbf433e2..9b01b4a433e 100644
--- a/src/vnet/config.h
+++ b/src/vnet/config.h
@@ -169,6 +169,12 @@ u32 vnet_config_modify_end_node (vlib_main_t * vm,
u32 config_string_heap_index,
u32 end_node_index);
+u32 vnet_config_reset_end_node (vlib_main_t *vm, vnet_config_main_t *cm,
+ u32 config_string_heap_index);
+
+u32 vnet_config_get_end_node (vlib_main_t *vm, vnet_config_main_t *cm,
+ u32 config_string_heap_index);
+
u8 *vnet_config_format_features (vlib_main_t * vm,
vnet_config_main_t * cm,
u32 config_index, u8 * s);
diff --git a/src/vnet/crypto/cli.c b/src/vnet/crypto/cli.c
index a6098a18e11..2ca66f228c3 100644
--- a/src/vnet/crypto/cli.c
+++ b/src/vnet/crypto/cli.c
@@ -36,16 +36,13 @@ show_crypto_engines_command_fn (vlib_main_t * vm,
}
vlib_cli_output (vm, "%-20s%-8s%s", "Name", "Prio", "Description");
- /* *INDENT-OFF* */
vec_foreach (p, cm->engines)
{
vlib_cli_output (vm, "%-20s%-8u%s", p->name, p->priority, p->desc);
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_crypto_engines_command, static) =
{
.path = "show crypto engines",
@@ -67,7 +64,7 @@ format_vnet_crypto_engine_candidates (u8 * s, va_list * args)
{
vec_foreach (e, cm->engines)
{
- if (e->enqueue_handlers[id] && e->dequeue_handlers[id])
+ if (e->enqueue_handlers[id] && e->dequeue_handler)
{
s = format (s, "%U", format_vnet_crypto_engine, e - cm->engines);
if (ei == e - cm->engines)
@@ -145,20 +142,18 @@ show_crypto_handlers_command_fn (vlib_main_t * vm,
"Chained");
for (i = 0; i < VNET_CRYPTO_N_ALGS; i++)
- vlib_cli_output (vm, "%-16U%U", format_vnet_crypto_alg, i,
+ vlib_cli_output (vm, "%-20U%U", format_vnet_crypto_alg, i,
format_vnet_crypto_handlers, i);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_crypto_handlers_command, static) =
{
.path = "show crypto handlers",
.short_help = "show crypto handlers",
.function = show_crypto_handlers_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
set_crypto_handler_command_fn (vlib_main_t * vm,
@@ -209,13 +204,11 @@ set_crypto_handler_command_fn (vlib_main_t * vm,
char *key;
u8 *value;
- /* *INDENT-OFF* */
hash_foreach_mem (key, value, cm->alg_index_by_name,
({
(void) value;
rc += vnet_crypto_set_handler2 (key, engine, oct);
}));
- /* *INDENT-ON* */
if (rc)
vlib_cli_output (vm, "failed to set crypto engine!");
@@ -241,7 +234,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_crypto_handler_command, static) =
{
.path = "set crypto handler",
@@ -249,7 +241,6 @@ VLIB_CLI_COMMAND (set_crypto_handler_command, static) =
" [simple|chained]",
.function = set_crypto_handler_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_vnet_crypto_async_handlers (u8 * s, va_list * args)
@@ -300,14 +291,12 @@ show_crypto_async_handlers_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_crypto_async_handlers_command, static) =
{
.path = "show crypto async handlers",
.short_help = "show crypto async handlers",
.function = show_crypto_async_handlers_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -316,7 +305,6 @@ show_crypto_async_status_command_fn (vlib_main_t * vm,
vlib_cli_command_t * cmd)
{
vnet_crypto_main_t *cm = &crypto_main;
- u32 skip_master = vlib_num_workers () > 0;
vlib_thread_main_t *tm = vlib_get_thread_main ();
unformat_input_t _line_input, *line_input = &_line_input;
int i;
@@ -324,12 +312,7 @@ show_crypto_async_status_command_fn (vlib_main_t * vm,
if (unformat_user (input, unformat_line_input, line_input))
unformat_free (line_input);
- vlib_cli_output (vm, "Crypto async dispatch mode: %s",
- cm->dispatch_mode ==
- VNET_CRYPTO_ASYNC_DISPATCH_POLLING ? "POLLING" :
- "INTERRUPT");
-
- for (i = skip_master; i < tm->n_vlib_mains; i++)
+ for (i = 0; i < tm->n_vlib_mains; i++)
{
vlib_node_state_t state = vlib_node_get_state (
vlib_get_main_by_index (i), cm->crypto_node_index);
@@ -343,14 +326,12 @@ show_crypto_async_status_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_crypto_async_status_command, static) =
{
.path = "show crypto async status",
.short_help = "show crypto async status",
.function = show_crypto_async_status_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
set_crypto_async_handler_command_fn (vlib_main_t * vm,
@@ -394,13 +375,11 @@ set_crypto_async_handler_command_fn (vlib_main_t * vm,
char *key;
u8 *value;
- /* *INDENT-OFF* */
hash_foreach_mem (key, value, cm->async_alg_index_by_name,
({
(void) value;
rc += vnet_crypto_set_async_handler2 (key, engine);
}));
- /* *INDENT-ON* */
if (rc)
vlib_cli_output (vm, "failed to set crypto engine!");
@@ -426,57 +405,52 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_crypto_async_handler_command, static) =
{
.path = "set crypto async handler",
.short_help = "set crypto async handler type [type2 type3 ...] engine",
.function = set_crypto_async_handler_command_fn,
};
-/* *INDENT-ON* */
-
-static inline void
-print_crypto_async_dispatch_warning ()
-{
- clib_warning ("Switching dispatch mode might not work is some situations.");
- clib_warning
- ("Use 'show crypto async status' to verify that the nodes' states were set");
- clib_warning ("and if not, set 'crypto async dispatch' mode again.");
-}
static clib_error_t *
-set_crypto_async_dispatch_polling_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
+set_crypto_async_dispatch_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
{
- print_crypto_async_dispatch_warning ();
- vnet_crypto_set_async_dispatch_mode (VNET_CRYPTO_ASYNC_DISPATCH_POLLING);
- return 0;
-}
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = 0;
+ u8 adaptive = 0;
+ u8 mode = VLIB_NODE_STATE_INTERRUPT;
-static clib_error_t *
-set_crypto_async_dispatch_interrupt_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- print_crypto_async_dispatch_warning ();
- vnet_crypto_set_async_dispatch_mode (VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT);
- return 0;
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "polling"))
+ mode = VLIB_NODE_STATE_POLLING;
+ else if (unformat (line_input, "interrupt"))
+ mode = VLIB_NODE_STATE_INTERRUPT;
+ else if (unformat (line_input, "adaptive"))
+ adaptive = 1;
+ else
+ {
+ error = clib_error_return (0, "invalid params");
+ goto done;
+ }
+ }
+
+ vnet_crypto_set_async_dispatch (mode, adaptive);
+done:
+ unformat_free (line_input);
+ return error;
}
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_crypto_async_dispatch_polling_command, static) =
-{
- .path = "set crypto async dispatch polling",
- .short_help = "set crypto async dispatch polling|interrupt",
- .function = set_crypto_async_dispatch_polling_command_fn,
-};
-VLIB_CLI_COMMAND (set_crypto_async_dispatch_interrupt_command, static) =
-{
- .path = "set crypto async dispatch interrupt",
- .short_help = "set crypto async dispatch polling|interrupt",
- .function = set_crypto_async_dispatch_interrupt_command_fn,
+VLIB_CLI_COMMAND (set_crypto_async_dispatch_mode_command, static) = {
+ .path = "set crypto async dispatch mode",
+ .short_help = "set crypto async dispatch mode <polling|interrupt|adaptive>",
+ .function = set_crypto_async_dispatch_command_fn,
};
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/crypto/crypto.api b/src/vnet/crypto/crypto.api
index 6eccd8524ba..8fec805dcfc 100644
--- a/src/vnet/crypto/crypto.api
+++ b/src/vnet/crypto/crypto.api
@@ -28,7 +28,8 @@ enum crypto_op_class_type:u8
CRYPTO_API_OP_BOTH,
};
- /** \brief crypto: use polling or interrupt dispatch
+ /** \brief crypto: Use polling or interrupt dispatch.
+ Always unset the adaptive flag (that is why it is deprecated).
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param mode - dispatch mode
@@ -36,11 +37,29 @@ enum crypto_op_class_type:u8
autoreply define crypto_set_async_dispatch
{
+ option deprecated;
+ option replaced_by="crypto_set_async_dispatch_v2";
u32 client_index;
u32 context;
vl_api_crypto_dispatch_mode_t mode;
};
+ /** \brief crypto: Change the way crypto operations are dispatched.
+ Use adaptive (or not) mode, starting in polling or interrupt state.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param mode - dispatch initial state
+ @param adaptive - whether on not the state shall change depending on load
+*/
+
+autoreply define crypto_set_async_dispatch_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_crypto_dispatch_mode_t mode;
+ bool adaptive;
+};
+
/** \brief crypto: set crypto handler
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/vnet/crypto/crypto.c b/src/vnet/crypto/crypto.c
index 3b1505ad448..c8e7ca90c9d 100644
--- a/src/vnet/crypto/crypto.c
+++ b/src/vnet/crypto/crypto.c
@@ -192,13 +192,16 @@ vnet_crypto_is_set_handler (vnet_crypto_alg_t alg)
vnet_crypto_op_id_t opt = 0;
int i;
- if (alg > vec_len (cm->algs))
+ if (alg >= vec_len (cm->algs))
return 0;
for (i = 0; i < VNET_CRYPTO_OP_N_TYPES; i++)
if ((opt = cm->algs[alg].op_by_type[i]) != 0)
break;
+ if (opt >= vec_len (cm->ops_handlers))
+ return 0;
+
return NULL != cm->ops_handlers[opt];
}
@@ -275,30 +278,24 @@ vnet_crypto_register_ops_handlers (vlib_main_t * vm, u32 engine_index,
}
void
-vnet_crypto_register_async_handler (vlib_main_t * vm, u32 engine_index,
- vnet_crypto_async_op_id_t opt,
- vnet_crypto_frame_enqueue_t * enqueue_hdl,
- vnet_crypto_frame_dequeue_t * dequeue_hdl)
+vnet_crypto_register_enqueue_handler (vlib_main_t *vm, u32 engine_index,
+ vnet_crypto_async_op_id_t opt,
+ vnet_crypto_frame_enqueue_t *enqueue_hdl)
{
vnet_crypto_main_t *cm = &crypto_main;
vnet_crypto_engine_t *ae, *e = vec_elt_at_index (cm->engines, engine_index);
vnet_crypto_async_op_data_t *otd = cm->async_opt_data + opt;
vec_validate_aligned (cm->enqueue_handlers, VNET_CRYPTO_ASYNC_OP_N_IDS,
CLIB_CACHE_LINE_BYTES);
- vec_validate_aligned (cm->dequeue_handlers, VNET_CRYPTO_ASYNC_OP_N_IDS,
- CLIB_CACHE_LINE_BYTES);
- /* both enqueue hdl and dequeue hdl should present */
- if (!enqueue_hdl && !dequeue_hdl)
+ if (!enqueue_hdl)
return;
e->enqueue_handlers[opt] = enqueue_hdl;
- e->dequeue_handlers[opt] = dequeue_hdl;
if (otd->active_engine_index_async == ~0)
{
otd->active_engine_index_async = engine_index;
cm->enqueue_handlers[opt] = enqueue_hdl;
- cm->dequeue_handlers[opt] = dequeue_hdl;
}
ae = vec_elt_at_index (cm->engines, otd->active_engine_index_async);
@@ -306,12 +303,79 @@ vnet_crypto_register_async_handler (vlib_main_t * vm, u32 engine_index,
{
otd->active_engine_index_async = engine_index;
cm->enqueue_handlers[opt] = enqueue_hdl;
- cm->dequeue_handlers[opt] = dequeue_hdl;
}
return;
}
+static int
+engine_index_cmp (void *v1, void *v2)
+{
+ u32 *a1 = v1;
+ u32 *a2 = v2;
+
+ if (*a1 > *a2)
+ return 1;
+ if (*a1 < *a2)
+ return -1;
+ return 0;
+}
+
+static void
+vnet_crypto_update_cm_dequeue_handlers (void)
+{
+ vnet_crypto_main_t *cm = &crypto_main;
+ vnet_crypto_async_op_data_t *otd;
+ vnet_crypto_engine_t *e;
+ u32 *active_engines = 0, *ei, last_ei = ~0, i;
+
+ vec_reset_length (cm->dequeue_handlers);
+
+ for (i = 0; i < VNET_CRYPTO_ASYNC_OP_N_IDS; i++)
+ {
+ otd = cm->async_opt_data + i;
+ if (otd->active_engine_index_async == ~0)
+ continue;
+ e = cm->engines + otd->active_engine_index_async;
+ if (!e->dequeue_handler)
+ continue;
+ vec_add1 (active_engines, otd->active_engine_index_async);
+ }
+
+ vec_sort_with_function (active_engines, engine_index_cmp);
+
+ vec_foreach (ei, active_engines)
+ {
+ if (ei[0] == last_ei)
+ continue;
+ if (ei[0] == ~0)
+ continue;
+
+ e = cm->engines + ei[0];
+ vec_add1 (cm->dequeue_handlers, e->dequeue_handler);
+ last_ei = ei[0];
+ }
+
+ vec_free (active_engines);
+}
+
+void
+vnet_crypto_register_dequeue_handler (vlib_main_t *vm, u32 engine_index,
+ vnet_crypto_frame_dequeue_t *deq_fn)
+{
+ vnet_crypto_main_t *cm = &crypto_main;
+ vnet_crypto_engine_t *e = vec_elt_at_index (cm->engines, engine_index);
+
+ if (!deq_fn)
+ return;
+
+ e->dequeue_handler = deq_fn;
+
+ vnet_crypto_update_cm_dequeue_handlers ();
+
+ return;
+}
+
void
vnet_crypto_register_key_handler (vlib_main_t * vm, u32 engine_index,
vnet_crypto_key_handler_t * key_handler)
@@ -365,20 +429,29 @@ vnet_crypto_key_add (vlib_main_t * vm, vnet_crypto_alg_t alg, u8 * data,
vnet_crypto_engine_t *engine;
vnet_crypto_key_t *key;
+ u8 need_barrier_sync = 0;
+
if (!vnet_crypto_key_len_check (alg, length))
return ~0;
+ need_barrier_sync = pool_get_will_expand (cm->keys);
+ /* If the cm->keys will expand, stop the parade. */
+ if (need_barrier_sync)
+ vlib_worker_thread_barrier_sync (vm);
+
pool_get_zero (cm->keys, key);
+
+ if (need_barrier_sync)
+ vlib_worker_thread_barrier_release (vm);
+
index = key - cm->keys;
key->type = VNET_CRYPTO_KEY_TYPE_DATA;
key->alg = alg;
vec_validate_aligned (key->data, length - 1, CLIB_CACHE_LINE_BYTES);
clib_memcpy (key->data, data, length);
- /* *INDENT-OFF* */
vec_foreach (engine, cm->engines)
if (engine->key_op_handler)
engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_ADD, index);
- /* *INDENT-ON* */
return index;
}
@@ -389,25 +462,34 @@ vnet_crypto_key_del (vlib_main_t * vm, vnet_crypto_key_index_t index)
vnet_crypto_engine_t *engine;
vnet_crypto_key_t *key = pool_elt_at_index (cm->keys, index);
- /* *INDENT-OFF* */
vec_foreach (engine, cm->engines)
if (engine->key_op_handler)
engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_DEL, index);
- /* *INDENT-ON* */
if (key->type == VNET_CRYPTO_KEY_TYPE_DATA)
{
- clib_memset (key->data, 0, vec_len (key->data));
+ clib_memset (key->data, 0xfe, vec_len (key->data));
vec_free (key->data);
}
else if (key->type == VNET_CRYPTO_KEY_TYPE_LINK)
{
- key->index_crypto = key->index_integ = 0;
+ key->index_crypto = key->index_integ = ~0;
}
pool_put (cm->keys, key);
}
+void
+vnet_crypto_key_update (vlib_main_t *vm, vnet_crypto_key_index_t index)
+{
+ vnet_crypto_main_t *cm = &crypto_main;
+ vnet_crypto_engine_t *engine;
+
+ vec_foreach (engine, cm->engines)
+ if (engine->key_op_handler)
+ engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_MODIFY, index);
+}
+
vnet_crypto_async_alg_t
vnet_crypto_link_algs (vnet_crypto_alg_t crypto_alg,
vnet_crypto_alg_t integ_alg)
@@ -446,50 +528,13 @@ vnet_crypto_key_add_linked (vlib_main_t * vm,
key->index_integ = index_integ;
key->async_alg = linked_alg;
- /* *INDENT-OFF* */
vec_foreach (engine, cm->engines)
if (engine->key_op_handler)
engine->key_op_handler (vm, VNET_CRYPTO_KEY_OP_ADD, index);
- /* *INDENT-ON* */
return index;
}
-clib_error_t *
-crypto_dispatch_enable_disable (int is_enable)
-{
- vnet_crypto_main_t *cm = &crypto_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- u32 skip_master = vlib_num_workers () > 0, i;
- vlib_node_state_t state = VLIB_NODE_STATE_DISABLED;
- u8 state_change = 0;
-
- CLIB_MEMORY_STORE_BARRIER ();
- if (is_enable && cm->async_refcnt > 0)
- {
- state_change = 1;
- state =
- cm->dispatch_mode ==
- VNET_CRYPTO_ASYNC_DISPATCH_POLLING ? VLIB_NODE_STATE_POLLING :
- VLIB_NODE_STATE_INTERRUPT;
- }
-
- if (!is_enable && cm->async_refcnt == 0)
- {
- state_change = 1;
- state = VLIB_NODE_STATE_DISABLED;
- }
-
- if (state_change)
- for (i = skip_master; i < tm->n_vlib_mains; i++)
- {
- vlib_main_t *ovm = vlib_get_main_by_index (i);
- if (state != vlib_node_get_state (ovm, cm->crypto_node_index))
- vlib_node_set_state (ovm, cm->crypto_node_index, state);
- }
- return 0;
-}
-
static_always_inline void
crypto_set_active_async_engine (vnet_crypto_async_op_data_t * od,
vnet_crypto_async_op_id_t id, u32 ei)
@@ -497,11 +542,10 @@ crypto_set_active_async_engine (vnet_crypto_async_op_data_t * od,
vnet_crypto_main_t *cm = &crypto_main;
vnet_crypto_engine_t *ce = vec_elt_at_index (cm->engines, ei);
- if (ce->enqueue_handlers[id] && ce->dequeue_handlers[id])
+ if (ce->enqueue_handlers[id] && ce->dequeue_handler)
{
od->active_engine_index_async = ei;
cm->enqueue_handlers[id] = ce->enqueue_handlers[id];
- cm->dequeue_handlers[id] = ce->dequeue_handlers[id];
}
}
@@ -534,6 +578,8 @@ vnet_crypto_set_async_handler2 (char *alg_name, char *engine)
crypto_set_active_async_engine (od, id, p[0]);
}
+ vnet_crypto_update_cm_dequeue_handlers ();
+
return 0;
}
@@ -549,13 +595,11 @@ vnet_crypto_register_post_node (vlib_main_t * vm, char *post_node_name)
if (!pn)
return ~0;
- /* *INDENT-OFF* */
- vec_foreach (cm->next_nodes, nn)
- {
- if (nn->node_idx == pn->index)
- return nn->next_idx;
- }
- /* *INDENT-ON* */
+ vec_foreach (nn, cm->next_nodes)
+ {
+ if (nn->node_idx == pn->index)
+ return nn->next_idx;
+ }
vec_validate (cm->next_nodes, index);
nn = vec_elt_at_index (cm->next_nodes, index);
@@ -568,70 +612,19 @@ vnet_crypto_register_post_node (vlib_main_t * vm, char *post_node_name)
}
void
-vnet_crypto_request_async_mode (int is_enable)
+vnet_crypto_set_async_dispatch (u8 mode, u8 adaptive)
{
- vnet_crypto_main_t *cm = &crypto_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
- u32 skip_master = vlib_num_workers () > 0, i;
- vlib_node_state_t state = VLIB_NODE_STATE_DISABLED;
- u8 state_change = 0;
+ u32 i, node_index = crypto_main.crypto_node_index;
+ vlib_node_state_t state =
+ mode ? VLIB_NODE_STATE_INTERRUPT : VLIB_NODE_STATE_POLLING;
- CLIB_MEMORY_STORE_BARRIER ();
- if (is_enable && cm->async_refcnt == 0)
- {
- state_change = 1;
- state =
- cm->dispatch_mode == VNET_CRYPTO_ASYNC_DISPATCH_POLLING ?
- VLIB_NODE_STATE_POLLING : VLIB_NODE_STATE_INTERRUPT;
- }
- if (!is_enable && cm->async_refcnt == 1)
- {
- state_change = 1;
- state = VLIB_NODE_STATE_DISABLED;
- }
-
- if (state_change)
- for (i = skip_master; i < tm->n_vlib_mains; i++)
- {
- vlib_main_t *ovm = vlib_get_main_by_index (i);
- if (state != vlib_node_get_state (ovm, cm->crypto_node_index))
- vlib_node_set_state (ovm, cm->crypto_node_index, state);
- }
-
- if (is_enable)
- cm->async_refcnt += 1;
- else if (cm->async_refcnt > 0)
- cm->async_refcnt -= 1;
-}
-
-void
-vnet_crypto_set_async_dispatch_mode (u8 mode)
-{
- vnet_crypto_main_t *cm = &crypto_main;
- u32 skip_master = vlib_num_workers () > 0, i;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- vlib_node_state_t state = VLIB_NODE_STATE_DISABLED;
-
- CLIB_MEMORY_STORE_BARRIER ();
- cm->dispatch_mode = mode;
- if (mode == VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT)
- {
- state =
- cm->async_refcnt == 0 ?
- VLIB_NODE_STATE_DISABLED : VLIB_NODE_STATE_INTERRUPT;
- }
- else if (mode == VNET_CRYPTO_ASYNC_DISPATCH_POLLING)
- {
- state =
- cm->async_refcnt == 0 ?
- VLIB_NODE_STATE_DISABLED : VLIB_NODE_STATE_POLLING;
- }
-
- for (i = skip_master; i < tm->n_vlib_mains; i++)
+ for (i = vlib_num_workers () > 0; i < tm->n_vlib_mains; i++)
{
vlib_main_t *ovm = vlib_get_main_by_index (i);
- if (state != vlib_node_get_state (ovm, cm->crypto_node_index))
- vlib_node_set_state (ovm, cm->crypto_node_index, state);
+ vlib_node_set_state (ovm, node_index, state);
+ vlib_node_set_flag (ovm, node_index, VLIB_NODE_FLAG_ADAPTIVE_MODE,
+ adaptive);
}
}
@@ -730,18 +723,15 @@ vnet_crypto_init (vlib_main_t * vm)
vlib_thread_main_t *tm = vlib_get_thread_main ();
vnet_crypto_thread_t *ct = 0;
- cm->dispatch_mode = VNET_CRYPTO_ASYNC_DISPATCH_POLLING;
cm->engine_index_by_name = hash_create_string ( /* size */ 0,
sizeof (uword));
cm->alg_index_by_name = hash_create_string (0, sizeof (uword));
cm->async_alg_index_by_name = hash_create_string (0, sizeof (uword));
vec_validate_aligned (cm->threads, tm->n_vlib_mains, CLIB_CACHE_LINE_BYTES);
vec_foreach (ct, cm->threads)
- pool_alloc_aligned (ct->frame_pool, VNET_CRYPTO_FRAME_POOL_SIZE,
- CLIB_CACHE_LINE_BYTES);
+ pool_init_fixed (ct->frame_pool, VNET_CRYPTO_FRAME_POOL_SIZE);
vec_validate (cm->algs, VNET_CRYPTO_N_ALGS);
vec_validate (cm->async_algs, VNET_CRYPTO_N_ASYNC_ALGS);
- clib_bitmap_validate (cm->async_active_ids, VNET_CRYPTO_ASYNC_OP_N_IDS);
#define _(n, s, l) \
vnet_crypto_init_cipher_data (VNET_CRYPTO_ALG_##n, \
diff --git a/src/vnet/crypto/crypto.h b/src/vnet/crypto/crypto.h
index 71978b64835..89cf70d19e3 100644
--- a/src/vnet/crypto/crypto.h
+++ b/src/vnet/crypto/crypto.h
@@ -33,11 +33,14 @@
_(AES_256_CTR, "aes-256-ctr", 32)
/* CRYPTO_ID, PRETTY_NAME, KEY_LENGTH_IN_BYTES */
-#define foreach_crypto_aead_alg \
- _(AES_128_GCM, "aes-128-gcm", 16) \
- _(AES_192_GCM, "aes-192-gcm", 24) \
- _(AES_256_GCM, "aes-256-gcm", 32) \
- _(CHACHA20_POLY1305, "chacha20-poly1305", 32)
+#define foreach_crypto_aead_alg \
+ _ (AES_128_GCM, "aes-128-gcm", 16) \
+ _ (AES_192_GCM, "aes-192-gcm", 24) \
+ _ (AES_256_GCM, "aes-256-gcm", 32) \
+ _ (AES_128_NULL_GMAC, "aes-128-null-gmac", 16) \
+ _ (AES_192_NULL_GMAC, "aes-192-null-gmac", 24) \
+ _ (AES_256_NULL_GMAC, "aes-256-null-gmac", 32) \
+ _ (CHACHA20_POLY1305, "chacha20-poly1305", 32)
#define foreach_crypto_hash_alg \
_ (SHA1, "sha-1") \
@@ -82,15 +85,22 @@ typedef enum
/** async crypto **/
/* CRYPTO_ID, PRETTY_NAME, KEY_LENGTH_IN_BYTES, TAG_LEN, AAD_LEN */
-#define foreach_crypto_aead_async_alg \
- _(AES_128_GCM, "aes-128-gcm-aad8", 16, 16, 8) \
- _(AES_128_GCM, "aes-128-gcm-aad12", 16, 16, 12) \
- _(AES_192_GCM, "aes-192-gcm-aad8", 24, 16, 8) \
- _(AES_192_GCM, "aes-192-gcm-aad12", 24, 16, 12) \
- _(AES_256_GCM, "aes-256-gcm-aad8", 32, 16, 8) \
- _(AES_256_GCM, "aes-256-gcm-aad12", 32, 16, 12) \
- _(CHACHA20_POLY1305, "chacha20-poly1305-aad8", 32, 16, 8) \
- _(CHACHA20_POLY1305, "chacha20-poly1305-aad12", 32, 16, 12)
+#define foreach_crypto_aead_async_alg \
+ _ (AES_128_GCM, "aes-128-gcm-aad8", 16, 16, 8) \
+ _ (AES_128_GCM, "aes-128-gcm-aad12", 16, 16, 12) \
+ _ (AES_192_GCM, "aes-192-gcm-aad8", 24, 16, 8) \
+ _ (AES_192_GCM, "aes-192-gcm-aad12", 24, 16, 12) \
+ _ (AES_256_GCM, "aes-256-gcm-aad8", 32, 16, 8) \
+ _ (AES_256_GCM, "aes-256-gcm-aad12", 32, 16, 12) \
+ _ (AES_128_NULL_GMAC, "aes-128-null-gmac-aad8", 16, 16, 8) \
+ _ (AES_128_NULL_GMAC, "aes-128-null-gmac-aad12", 16, 16, 12) \
+ _ (AES_192_NULL_GMAC, "aes-192-null-gmac-aad8", 24, 16, 8) \
+ _ (AES_192_NULL_GMAC, "aes-192-null-gmac-aad12", 24, 16, 12) \
+ _ (AES_256_NULL_GMAC, "aes-256-null-gmac-aad8", 32, 16, 8) \
+ _ (AES_256_NULL_GMAC, "aes-256-null-gmac-aad12", 32, 16, 12) \
+ _ (CHACHA20_POLY1305, "chacha20-poly1305-aad8", 32, 16, 8) \
+ _ (CHACHA20_POLY1305, "chacha20-poly1305-aad12", 32, 16, 12) \
+ _ (CHACHA20_POLY1305, "chacha20-poly1305", 32, 16, 0)
/* CRYPTO_ID, INTEG_ID, PRETTY_NAME, KEY_LENGTH_IN_BYTES, DIGEST_LEN */
#define foreach_crypto_link_async_alg \
@@ -141,7 +151,6 @@ typedef enum
VNET_CRYPTO_OP_N_STATUS,
} vnet_crypto_op_status_t;
-/* *INDENT-OFF* */
typedef enum
{
VNET_CRYPTO_ALG_NONE = 0,
@@ -230,7 +239,6 @@ typedef enum
#undef _
VNET_CRYPTO_N_OP_IDS,
} vnet_crypto_op_id_t;
-/* *INDENT-ON* */
typedef enum
{
@@ -259,9 +267,8 @@ typedef struct
vnet_crypto_op_id_t op:16;
vnet_crypto_op_status_t status:8;
u8 flags;
-#define VNET_CRYPTO_OP_FLAG_INIT_IV (1 << 0)
-#define VNET_CRYPTO_OP_FLAG_HMAC_CHECK (1 << 1)
-#define VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS (1 << 2)
+#define VNET_CRYPTO_OP_FLAG_HMAC_CHECK (1 << 0)
+#define VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS (1 << 1)
union
{
@@ -337,7 +344,7 @@ typedef struct
i16 crypto_start_offset; /* first buffer offset */
i16 integ_start_offset;
/* adj total_length for integ, e.g.4 bytes for IPSec ESN */
- u16 integ_length_adj;
+ i16 integ_length_adj;
vnet_crypto_op_status_t status : 8;
u8 flags; /**< share same VNET_CRYPTO_OP_FLAG_* values */
} vnet_crypto_async_frame_elt_t;
@@ -422,12 +429,15 @@ void vnet_crypto_register_key_handler (vlib_main_t * vm, u32 engine_index,
/** async crypto register functions */
u32 vnet_crypto_register_post_node (vlib_main_t * vm, char *post_node_name);
-void vnet_crypto_register_async_handler (vlib_main_t * vm,
- u32 engine_index,
- vnet_crypto_async_op_id_t opt,
- vnet_crypto_frame_enqueue_t * enq_fn,
- vnet_crypto_frame_dequeue_t *
- deq_fn);
+
+void
+vnet_crypto_register_enqueue_handler (vlib_main_t *vm, u32 engine_index,
+ vnet_crypto_async_op_id_t opt,
+ vnet_crypto_frame_enqueue_t *enq_fn);
+
+void
+vnet_crypto_register_dequeue_handler (vlib_main_t *vm, u32 engine_index,
+ vnet_crypto_frame_dequeue_t *deq_fn);
typedef struct
{
@@ -439,7 +449,7 @@ typedef struct
vnet_crypto_chained_ops_handler_t
* chained_ops_handlers[VNET_CRYPTO_N_OP_IDS];
vnet_crypto_frame_enqueue_t *enqueue_handlers[VNET_CRYPTO_ASYNC_OP_N_IDS];
- vnet_crypto_frame_dequeue_t *dequeue_handlers[VNET_CRYPTO_ASYNC_OP_N_IDS];
+ vnet_crypto_frame_dequeue_t *dequeue_handler;
} vnet_crypto_engine_t;
typedef struct
@@ -456,7 +466,6 @@ typedef struct
vnet_crypto_chained_ops_handler_t **chained_ops_handlers;
vnet_crypto_frame_enqueue_t **enqueue_handlers;
vnet_crypto_frame_dequeue_t **dequeue_handlers;
- clib_bitmap_t *async_active_ids;
vnet_crypto_op_data_t opt_data[VNET_CRYPTO_N_OP_IDS];
vnet_crypto_async_op_data_t async_opt_data[VNET_CRYPTO_ASYNC_OP_N_IDS];
vnet_crypto_engine_t *engines;
@@ -465,12 +474,8 @@ typedef struct
uword *alg_index_by_name;
uword *async_alg_index_by_name;
vnet_crypto_async_alg_data_t *async_algs;
- u32 async_refcnt;
vnet_crypto_async_next_node_t *next_nodes;
u32 crypto_node_index;
-#define VNET_CRYPTO_ASYNC_DISPATCH_POLLING 0
-#define VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT 1
- u8 dispatch_mode;
} vnet_crypto_main_t;
extern vnet_crypto_main_t crypto_main;
@@ -481,7 +486,7 @@ u32 vnet_crypto_process_chained_ops (vlib_main_t * vm, vnet_crypto_op_t ops[],
u32 vnet_crypto_process_ops (vlib_main_t * vm, vnet_crypto_op_t ops[],
u32 n_ops);
-
+void vnet_crypto_set_async_dispatch (u8 mode, u8 adaptive);
int vnet_crypto_set_handler2 (char *ops_handler_name, char *engine,
crypto_op_class_type_t oct);
int vnet_crypto_is_set_handler (vnet_crypto_alg_t alg);
@@ -489,6 +494,7 @@ int vnet_crypto_is_set_handler (vnet_crypto_alg_t alg);
u32 vnet_crypto_key_add (vlib_main_t * vm, vnet_crypto_alg_t alg,
u8 * data, u16 length);
void vnet_crypto_key_del (vlib_main_t * vm, vnet_crypto_key_index_t index);
+void vnet_crypto_key_update (vlib_main_t *vm, vnet_crypto_key_index_t index);
/**
* Use 2 created keys to generate new key for linked algs (cipher + integ)
@@ -498,21 +504,13 @@ u32 vnet_crypto_key_add_linked (vlib_main_t * vm,
vnet_crypto_key_index_t index_crypto,
vnet_crypto_key_index_t index_integ);
-clib_error_t *crypto_dispatch_enable_disable (int is_enable);
-
int vnet_crypto_set_async_handler2 (char *alg_name, char *engine);
int vnet_crypto_is_set_async_handler (vnet_crypto_async_op_id_t opt);
-void vnet_crypto_request_async_mode (int is_enable);
-
-void vnet_crypto_set_async_dispatch_mode (u8 mode);
-
vnet_crypto_async_alg_t vnet_crypto_link_algs (vnet_crypto_alg_t crypto_alg,
vnet_crypto_alg_t integ_alg);
-clib_error_t *crypto_dispatch_enable_disable (int is_enable);
-
format_function_t format_vnet_crypto_alg;
format_function_t format_vnet_crypto_engine;
format_function_t format_vnet_crypto_op;
@@ -566,12 +564,16 @@ vnet_crypto_async_get_frame (vlib_main_t * vm, vnet_crypto_async_op_id_t opt)
vnet_crypto_thread_t *ct = cm->threads + vm->thread_index;
vnet_crypto_async_frame_t *f = NULL;
- pool_get_aligned (ct->frame_pool, f, CLIB_CACHE_LINE_BYTES);
- if (CLIB_DEBUG > 0)
- clib_memset (f, 0xfe, sizeof (*f));
- f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED;
- f->op = opt;
- f->n_elts = 0;
+ if (PREDICT_TRUE (pool_free_elts (ct->frame_pool)))
+ {
+ pool_get_aligned (ct->frame_pool, f, CLIB_CACHE_LINE_BYTES);
+#if CLIB_DEBUG > 0
+ clib_memset (f, 0xfe, sizeof (*f));
+#endif
+ f->state = VNET_CRYPTO_FRAME_STATE_NOT_PROCESSED;
+ f->op = opt;
+ f->n_elts = 0;
+ }
return f;
}
@@ -591,20 +593,26 @@ vnet_crypto_async_submit_open_frame (vlib_main_t * vm,
{
vnet_crypto_main_t *cm = &crypto_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
- vnet_crypto_async_op_id_t opt = frame->op;
- u32 i = vlib_num_workers () > 0;
+ u32 i;
+ vlib_node_t *n;
frame->state = VNET_CRYPTO_FRAME_STATE_PENDING;
frame->enqueue_thread_index = vm->thread_index;
+ if (PREDICT_FALSE (cm->enqueue_handlers == NULL))
+ {
+ frame->state = VNET_CRYPTO_FRAME_STATE_ELT_ERROR;
+ return -1;
+ }
+
int ret = (cm->enqueue_handlers[frame->op]) (vm, frame);
- clib_bitmap_set_no_check (cm->async_active_ids, opt, 1);
if (PREDICT_TRUE (ret == 0))
{
- if (cm->dispatch_mode == VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT)
+ n = vlib_get_node (vm, cm->crypto_node_index);
+ if (n->state == VLIB_NODE_STATE_INTERRUPT)
{
- for (; i < tm->n_vlib_mains; i++)
+ for (i = 0; i < tm->n_vlib_mains; i++)
vlib_node_set_interrupt_pending (vlib_get_main_by_index (i),
cm->crypto_node_index);
}
@@ -621,7 +629,7 @@ static_always_inline void
vnet_crypto_async_add_to_frame (vlib_main_t *vm, vnet_crypto_async_frame_t *f,
u32 key_index, u32 crypto_len,
i16 integ_len_adj, i16 crypto_start_offset,
- u16 integ_start_offset, u32 buffer_index,
+ i16 integ_start_offset, u32 buffer_index,
u16 next_node, u8 *iv, u8 *tag, u8 *aad,
u8 flags)
{
diff --git a/src/vnet/crypto/crypto_api.c b/src/vnet/crypto/crypto_api.c
index 49b12a3d377..e701864a5ba 100644
--- a/src/vnet/crypto/crypto_api.c
+++ b/src/vnet/crypto/crypto_api.c
@@ -46,12 +46,24 @@ vl_api_crypto_set_async_dispatch_t_handler (vl_api_crypto_set_async_dispatch_t
vl_api_crypto_set_async_dispatch_reply_t *rmp;
int rv = 0;
- vnet_crypto_set_async_dispatch_mode ((u8) mp->mode);
+ vnet_crypto_set_async_dispatch ((u8) mp->mode, 0);
REPLY_MACRO (VL_API_CRYPTO_SET_ASYNC_DISPATCH_REPLY);
}
static void
+vl_api_crypto_set_async_dispatch_v2_t_handler (
+ vl_api_crypto_set_async_dispatch_v2_t *mp)
+{
+ vl_api_crypto_set_async_dispatch_v2_reply_t *rmp;
+ int rv = 0;
+
+ vnet_crypto_set_async_dispatch ((u8) mp->mode, mp->adaptive ? 1 : 0);
+
+ REPLY_MACRO (VL_API_CRYPTO_SET_ASYNC_DISPATCH_V2_REPLY);
+}
+
+static void
vl_api_crypto_set_handler_t_handler (vl_api_crypto_set_handler_t * mp)
{
vl_api_crypto_set_handler_reply_t *rmp;
diff --git a/src/vnet/crypto/node.c b/src/vnet/crypto/node.c
index 7f34ec10fff..ee7f344ce68 100644
--- a/src/vnet/crypto/node.c
+++ b/src/vnet/crypto/node.c
@@ -114,8 +114,8 @@ crypto_dequeue_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
n_cache += cf->n_elts;
if (n_cache >= VLIB_FRAME_SIZE)
{
- vlib_buffer_enqueue_to_next (vm, node, ct->buffer_indices,
- ct->nexts, n_cache);
+ vlib_buffer_enqueue_to_next_vec (vm, node, &ct->buffer_indices,
+ &ct->nexts, n_cache);
n_cache = 0;
}
@@ -135,8 +135,11 @@ crypto_dequeue_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_crypto_async_free_frame (vm, cf);
}
/* signal enqueue-thread to dequeue the processed frame (n_elts>0) */
- if (cm->dispatch_mode == VNET_CRYPTO_ASYNC_DISPATCH_INTERRUPT
- && n_elts > 0)
+ if (n_elts > 0 &&
+ ((node->state == VLIB_NODE_STATE_POLLING &&
+ (node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE)) ||
+ node->state == VLIB_NODE_STATE_INTERRUPT))
{
vlib_node_set_interrupt_pending (
vlib_get_main_by_index (enqueue_thread_idx),
@@ -158,27 +161,35 @@ VLIB_NODE_FN (crypto_dispatch_node) (vlib_main_t * vm,
{
vnet_crypto_main_t *cm = &crypto_main;
vnet_crypto_thread_t *ct = cm->threads + vm->thread_index;
- u32 n_dispatched = 0, n_cache = 0;
- u32 index;
-
- /* *INDENT-OFF* */
- clib_bitmap_foreach (index, cm->async_active_ids) {
- n_cache = crypto_dequeue_frame (vm, node, ct, cm->dequeue_handlers[index],
- n_cache, &n_dispatched);
- }
- /* *INDENT-ON* */
+ u32 n_dispatched = 0, n_cache = 0, index;
+ vec_foreach_index (index, cm->dequeue_handlers)
+ {
+ n_cache = crypto_dequeue_frame (
+ vm, node, ct, cm->dequeue_handlers[index], n_cache, &n_dispatched);
+ }
if (n_cache)
- vlib_buffer_enqueue_to_next (vm, node, ct->buffer_indices, ct->nexts,
- n_cache);
+ vlib_buffer_enqueue_to_next_vec (vm, node, &ct->buffer_indices, &ct->nexts,
+ n_cache);
+
+ /* if there are still pending tasks and node in interrupt mode,
+ sending current thread signal to dequeue next loop */
+ if (pool_elts (ct->frame_pool) > 0 &&
+ ((node->state == VLIB_NODE_STATE_POLLING &&
+ (node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE)) ||
+ node->state == VLIB_NODE_STATE_INTERRUPT))
+ {
+ vlib_node_set_interrupt_pending (vm, node->node_index);
+ }
return n_dispatched;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (crypto_dispatch_node) = {
.name = "crypto-dispatch",
.type = VLIB_NODE_TYPE_INPUT,
- .state = VLIB_NODE_STATE_DISABLED,
+ .flags = VLIB_NODE_FLAG_ADAPTIVE_MODE,
+ .state = VLIB_NODE_STATE_INTERRUPT,
.format_trace = format_crypto_dispatch_trace,
.n_errors = ARRAY_LEN(vnet_crypto_async_error_strings),
@@ -192,7 +203,6 @@ VLIB_REGISTER_NODE (crypto_dispatch_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/dev/api.c b/src/vnet/dev/api.c
new file mode 100644
index 00000000000..114b63d6662
--- /dev/null
+++ b/src/vnet/dev/api.c
@@ -0,0 +1,275 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/pool.h"
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+#include <vnet/dev/api.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "api",
+};
+
+static int
+_vnet_dev_queue_size_validate (u32 size, vnet_dev_queue_config_t c)
+{
+ if (size < c.min_size)
+ return 0;
+ if (size > c.max_size)
+ return 0;
+ if (c.size_is_power_of_two && count_set_bits (size) != 1)
+ return 0;
+ if (c.multiplier && size % c.multiplier)
+ return 0;
+
+ return 1;
+}
+
+vnet_dev_rv_t
+vnet_dev_api_attach (vlib_main_t *vm, vnet_dev_api_attach_args_t *args)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = 0;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ vnet_dev_bus_t *bus;
+ vnet_dev_driver_t *driver;
+ void *bus_dev_info = 0;
+ u8 *dev_desc = 0;
+
+ log_debug (0, "%s driver %s flags '%U' args '%v'", args->device_id,
+ args->driver_name, format_vnet_dev_flags, &args->flags,
+ args->args);
+
+ if (vnet_dev_by_id (args->device_id))
+ return VNET_DEV_ERR_ALREADY_IN_USE;
+
+ bus = vnet_dev_find_device_bus (vm, args->device_id);
+ if (!bus)
+ {
+ log_err (dev, "unknown bus");
+ rv = VNET_DEV_ERR_INVALID_BUS;
+ goto done;
+ }
+
+ bus_dev_info = vnet_dev_get_device_info (vm, args->device_id);
+ if (!bus_dev_info)
+ {
+ log_err (dev, "invalid or unsupported device id");
+ rv = VNET_DEV_ERR_INVALID_DEVICE_ID;
+ goto done;
+ }
+
+ vec_foreach (driver, dm->drivers)
+ {
+ if (args->driver_name[0] &&
+ strcmp (args->driver_name, driver->registration->name))
+ continue;
+ if (driver->ops.probe &&
+ (dev_desc = driver->ops.probe (vm, bus->index, bus_dev_info)))
+ break;
+ }
+
+ if (!dev_desc)
+ {
+ log_err (dev, "driver not available for %s", args->device_id);
+ rv = VNET_DEV_ERR_DRIVER_NOT_AVAILABLE;
+ goto done;
+ }
+
+ dev = vnet_dev_alloc (vm, args->device_id, driver);
+ if (!dev)
+ {
+ log_err (dev, "dev alloc failed for %s", args->device_id);
+ rv = VNET_DEV_ERR_BUG;
+ goto done;
+ }
+ dev->description = dev_desc;
+
+ if (driver->registration->args)
+ for (vnet_dev_arg_t *a = driver->registration->args;
+ a->type != VNET_DEV_ARG_END; a++)
+ vec_add1 (dev->args, *a);
+
+ if (args->args)
+ {
+ if ((rv = vnet_dev_arg_parse (vm, dev, dev->args, args->args)) !=
+ VNET_DEV_OK)
+ goto done;
+ }
+
+ if ((args->flags.e & VNET_DEV_F_NO_STATS) == 0)
+ dev->poll_stats = 1;
+
+ log_debug (0, "found '%v'", dev->description);
+
+ rv = vnet_dev_process_call_op (vm, dev, vnet_dev_init);
+
+done:
+ if (bus_dev_info)
+ bus->ops.free_device_info (vm, bus_dev_info);
+
+ if (rv != VNET_DEV_OK && dev)
+ vnet_dev_process_call_op_no_rv (vm, dev, vnet_dev_free);
+ else if (dev)
+ args->dev_index = dev->index;
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_api_detach (vlib_main_t *vm, vnet_dev_api_detach_args_t *args)
+{
+ vnet_dev_t *dev = vnet_dev_by_index (args->dev_index);
+
+ log_debug (dev, "detach");
+
+ if (dev)
+ return vnet_dev_process_call_op_no_rv (vm, dev, vnet_dev_detach);
+
+ return VNET_DEV_ERR_NOT_FOUND;
+}
+
+vnet_dev_rv_t
+vnet_dev_api_reset (vlib_main_t *vm, vnet_dev_api_reset_args_t *args)
+{
+ vnet_dev_t *dev = vnet_dev_by_id (args->device_id);
+
+ log_debug (dev, "detach");
+
+ if (!dev)
+ return VNET_DEV_ERR_NOT_FOUND;
+
+ if (dev->ops.reset)
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ return vnet_dev_process_call_op (vm, dev, vnet_dev_reset);
+}
+
+vnet_dev_rv_t
+vnet_dev_api_create_port_if (vlib_main_t *vm,
+ vnet_dev_api_create_port_if_args_t *args)
+{
+ vnet_dev_t *dev = vnet_dev_by_index (args->dev_index);
+ vnet_dev_port_t *port = 0;
+ u16 n_threads = vlib_get_n_threads ();
+ int default_is_intr_mode;
+ vnet_dev_rv_t rv;
+
+ log_debug (dev,
+ "create_port_if: dev_index %u port %u intf_name '%s' num_rx_q %u "
+ "num_tx_q %u rx_q_sz %u tx_q_sz %u, flags '%U' args '%v'",
+ args->dev_index, args->port_id, args->intf_name,
+ args->num_rx_queues, args->num_tx_queues, args->rx_queue_size,
+ args->tx_queue_size, format_vnet_dev_port_flags, &args->flags,
+ args->args);
+
+ if (dev == 0)
+ return VNET_DEV_ERR_NOT_FOUND;
+
+ foreach_vnet_dev_port (p, dev)
+ if (p->port_id == args->port_id)
+ {
+ port = p;
+ break;
+ }
+
+ if (!port)
+ return VNET_DEV_ERR_INVALID_DEVICE_ID;
+
+ if (port->interface_created)
+ return VNET_DEV_ERR_ALREADY_EXISTS;
+
+ if (args->args)
+ {
+ rv = vnet_dev_arg_parse (vm, dev, port->args, args->args);
+ if (rv != VNET_DEV_OK)
+ return rv;
+ }
+
+ default_is_intr_mode = (args->flags.e & VNET_DEV_PORT_F_INTERRUPT_MODE) != 0;
+ if (default_is_intr_mode && port->attr.caps.interrupt_mode == 0)
+ {
+ log_err (dev, "interrupt mode requested and port doesn't support it");
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+ }
+
+ if (args->num_rx_queues)
+ {
+ if (args->num_rx_queues > port->attr.max_rx_queues)
+ return VNET_DEV_ERR_INVALID_NUM_RX_QUEUES;
+ port->intf.num_rx_queues = args->num_rx_queues;
+ }
+ else
+ port->intf.num_rx_queues = clib_min (port->attr.max_tx_queues, 1);
+
+ if (args->num_tx_queues)
+ {
+ if (args->num_tx_queues > port->attr.max_tx_queues)
+ return VNET_DEV_ERR_INVALID_NUM_TX_QUEUES;
+ port->intf.num_tx_queues = args->num_tx_queues;
+ }
+ else
+ port->intf.num_tx_queues = clib_min (port->attr.max_tx_queues, n_threads);
+
+ if (args->rx_queue_size)
+ {
+ if (!_vnet_dev_queue_size_validate (args->rx_queue_size,
+ port->rx_queue_config))
+ return VNET_DEV_ERR_INVALID_RX_QUEUE_SIZE;
+ port->intf.rxq_sz = args->rx_queue_size;
+ }
+ else
+ port->intf.rxq_sz = port->rx_queue_config.default_size;
+
+ if (args->tx_queue_size)
+ {
+ if (!_vnet_dev_queue_size_validate (args->tx_queue_size,
+ port->tx_queue_config))
+ return VNET_DEV_ERR_INVALID_TX_QUEUE_SIZE;
+ port->intf.txq_sz = args->tx_queue_size;
+ }
+ else
+ port->intf.txq_sz = port->tx_queue_config.default_size;
+
+ clib_memcpy (port->intf.name, args->intf_name, sizeof (port->intf.name));
+ port->intf.default_is_intr_mode = default_is_intr_mode;
+
+ rv = vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_create);
+ args->sw_if_index = (rv == VNET_DEV_OK) ? port->intf.sw_if_index : ~0;
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_api_remove_port_if (vlib_main_t *vm,
+ vnet_dev_api_remove_port_if_args_t *args)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_sw_interface_t *si;
+ vnet_hw_interface_t *hi;
+ vnet_dev_port_t *port;
+
+ si = vnet_get_sw_interface_or_null (vnm, args->sw_if_index);
+ if (!si)
+ return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+ hi = vnet_get_hw_interface_or_null (vnm, si->hw_if_index);
+ if (!hi)
+ return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+ if (pool_is_free_index (dm->ports_by_dev_instance, hi->dev_instance))
+ return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+ port = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+
+ if (port->intf.hw_if_index != si->hw_if_index)
+ return VNET_DEV_ERR_UNKNOWN_INTERFACE;
+
+ return vnet_dev_process_call_port_op (vm, port, vnet_dev_port_if_remove);
+}
diff --git a/src/vnet/dev/api.h b/src/vnet/dev/api.h
new file mode 100644
index 00000000000..1b7bf27d62a
--- /dev/null
+++ b/src/vnet/dev/api.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_API_H_
+#define _VNET_DEV_API_H_
+
+#include <vppinfra/clib.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/types.h>
+
+typedef struct
+{
+ vnet_dev_device_id_t device_id;
+ vnet_dev_driver_name_t driver_name;
+ vnet_dev_flags_t flags;
+ u8 *args;
+
+ /* return */
+ u32 dev_index;
+} vnet_dev_api_attach_args_t;
+
+vnet_dev_rv_t vnet_dev_api_attach (vlib_main_t *,
+ vnet_dev_api_attach_args_t *);
+
+typedef struct
+{
+ u32 dev_index;
+} vnet_dev_api_detach_args_t;
+vnet_dev_rv_t vnet_dev_api_detach (vlib_main_t *,
+ vnet_dev_api_detach_args_t *);
+
+typedef struct
+{
+ vnet_dev_device_id_t device_id;
+} vnet_dev_api_reset_args_t;
+vnet_dev_rv_t vnet_dev_api_reset (vlib_main_t *, vnet_dev_api_reset_args_t *);
+
+typedef struct
+{
+ u32 dev_index;
+ vnet_dev_if_name_t intf_name;
+ u16 num_rx_queues;
+ u16 num_tx_queues;
+ u16 rx_queue_size;
+ u16 tx_queue_size;
+ vnet_dev_port_id_t port_id;
+ vnet_dev_port_flags_t flags;
+ u8 *args;
+
+ /* return */
+ u32 sw_if_index;
+} vnet_dev_api_create_port_if_args_t;
+
+vnet_dev_rv_t
+vnet_dev_api_create_port_if (vlib_main_t *,
+ vnet_dev_api_create_port_if_args_t *);
+
+typedef struct
+{
+ u32 sw_if_index;
+} vnet_dev_api_remove_port_if_args_t;
+
+vnet_dev_rv_t
+vnet_dev_api_remove_port_if (vlib_main_t *,
+ vnet_dev_api_remove_port_if_args_t *);
+
+#endif /* _VNET_DEV_API_H_ */
diff --git a/src/vnet/dev/args.c b/src/vnet/dev/args.c
new file mode 100644
index 00000000000..e302517cc61
--- /dev/null
+++ b/src/vnet/dev/args.c
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/pool.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+#include <vnet/dev/types.h>
+#include <vppinfra/format_table.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "args",
+};
+
+void
+vnet_dev_arg_clear_value (vnet_dev_arg_t *a)
+{
+ if (a->type == VNET_DEV_ARG_TYPE_STRING)
+ vec_free (a->val.string);
+ a->val = (typeof (a->val)){};
+ a->val_set = 0;
+}
+
+void
+vnet_dev_arg_free (vnet_dev_arg_t **vp)
+{
+ vnet_dev_arg_t *v;
+ vec_foreach (v, *vp)
+ vnet_dev_arg_clear_value (v);
+ vec_free (*vp);
+}
+
+vnet_dev_rv_t
+vnet_dev_arg_parse (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_arg_t *args,
+ u8 *str)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ unformat_input_t in;
+ u8 *name = 0;
+ u8 *err = 0;
+
+ log_debug (dev, "input '%v'", str);
+ if (args == 0)
+ return rv;
+
+ unformat_init_string (&in, (char *) str, vec_len (str));
+
+ while (unformat (&in, "%U=", unformat_token, "a-zA-Z0-9_", &name))
+ {
+ vnet_dev_arg_t *a = args;
+ vec_add1 (name, 0);
+ while (a < vec_end (args))
+ if (strcmp (a->name, (char *) name) == 0)
+ break;
+ else
+ a++;
+
+ if (a->type == VNET_DEV_ARG_TYPE_BOOL)
+ {
+
+ if (unformat (&in, "true") || unformat (&in, "1") ||
+ unformat (&in, "on") || unformat (&in, "yes"))
+ a->val.boolean = 1;
+ else if (unformat (&in, "false") || unformat (&in, "0") ||
+ unformat (&in, "off") || unformat (&in, "no"))
+ a->val.boolean = 0;
+ else
+ {
+ log_err (dev, "unable to parse args: %U", format_unformat_error,
+ &in);
+ err = format (
+ 0,
+ "boolean value expected ('yes', 'no', '0', '1', 'on', "
+ "'off', 'true' or 'false') for argument '%s', found '%U'",
+ a->name, format_unformat_error, &in);
+ goto done;
+ }
+ }
+ else if (a->type == VNET_DEV_ARG_TYPE_UINT32)
+ {
+ u32 val, min = 0, max = CLIB_U32_MAX;
+ if (!unformat (&in, "%u", &val))
+ {
+ err = format (0,
+ "unsigned integer in range %u - %u expected for "
+ "argument '%s', found '%U'",
+ min, max, a->name, format_unformat_error, &in);
+ goto done;
+ }
+
+ if (a->min || a->max)
+ {
+ min = a->min;
+ max = a->max;
+ }
+
+ if (val < min || val > max)
+ {
+ err = format (0,
+ "unsigned integer in range %u - %u expected for "
+ "argument '%s', found '%u'",
+ min, max, a->name, val);
+ goto done;
+ }
+ a->val.uint32 = val;
+ }
+ else if (a->type == VNET_DEV_ARG_TYPE_STRING)
+ {
+ if (!unformat (&in, "%U", unformat_double_quoted_string,
+ &a->val.string))
+ {
+ err = format (
+ 0,
+ "double quoted string expected for argument '%s', found '%U'",
+ a->name, format_unformat_error, &in);
+ goto done;
+ }
+
+ if (a->min && vec_len (a->val.string) < a->min)
+ {
+ err =
+ format (0, "string '%v' too short, must be at least %u chars",
+ a->val.string, a->min);
+ goto done;
+ }
+ if (a->max && vec_len (a->val.string) > a->max)
+ {
+ err = format (
+ 0, "string '%v' too long, must be no longer than %u chars",
+ a->val.string, a->max);
+ goto done;
+ }
+ }
+ else
+ {
+ err = format (0, "unknown argument '%s'", name);
+ goto done;
+ }
+
+ a->val_set = 1;
+ log_debug (dev, "name '%s' type %U value %U", name,
+ format_vnet_dev_arg_type, a->type, format_vnet_dev_arg_value,
+ a->type, &a->val);
+ vec_free (name);
+ unformat (&in, ",");
+ }
+
+ if (unformat_check_input (&in) != UNFORMAT_END_OF_INPUT)
+ err = format (0, "unable to parse argument name '%U'",
+ format_unformat_error, &in);
+
+done:
+ if (err)
+ {
+ vnet_dev_arg_t *a = 0;
+ log_err (dev, "%v", err);
+ vec_free (err);
+ vec_foreach (a, args)
+ vnet_dev_arg_clear_value (a);
+ rv = VNET_DEV_ERR_INVALID_ARG;
+ }
+
+ vec_free (name);
+ unformat_free (&in);
+ return rv;
+}
+
+u8 *
+format_vnet_dev_arg_type (u8 *s, va_list *args)
+{
+ vnet_dev_arg_type_t t = va_arg (*args, u32);
+ switch (t)
+ {
+#define _(n, f, val) \
+ case VNET_DEV_ARG_TYPE_##n: \
+ return format (s, #n);
+ foreach_vnet_dev_arg_type
+#undef _
+ default : ASSERT (0);
+ break;
+ }
+ return s;
+}
+
+u8 *
+format_vnet_dev_arg_value (u8 *s, va_list *args)
+{
+ vnet_dev_arg_type_t t = va_arg (*args, u32);
+ vnet_dev_arg_value_t *v = va_arg (*args, vnet_dev_arg_value_t *);
+
+ switch (t)
+ {
+#define _(n, f, value) \
+ case VNET_DEV_ARG_TYPE_##n: \
+ s = format (s, f, v->value); \
+ break;
+ foreach_vnet_dev_arg_type
+#undef _
+ default : break;
+ }
+ return s;
+}
+
+u8 *
+format_vnet_dev_args (u8 *s, va_list *va)
+{
+ vnet_dev_arg_t *a, *args = va_arg (*va, vnet_dev_arg_t *);
+ table_t t = { .no_ansi = 1 };
+
+ table_add_header_col (&t, 4, "Name", "Value", "Default", "Description");
+ table_set_cell_align (&t, -1, 0, TTAA_LEFT);
+ table_set_cell_align (&t, -1, 3, TTAA_LEFT);
+ vec_foreach (a, args)
+ {
+ int r = a - args;
+ table_format_cell (&t, r, 0, "%s", a->name);
+ if (a->val_set)
+ table_format_cell (&t, r, 1, "%U", format_vnet_dev_arg_value, a->type,
+ &a->val);
+ else
+ table_format_cell (&t, r, 1, "<not set>");
+
+ table_format_cell (&t, r, 2, "%U", format_vnet_dev_arg_value, a->type,
+ &a->default_val);
+ table_format_cell (&t, r, 3, "%s", a->desc);
+ table_set_cell_align (&t, r, 0, TTAA_LEFT);
+ table_set_cell_align (&t, r, 3, TTAA_LEFT);
+ }
+
+ s = format (s, "%U", format_table, &t);
+
+ table_free (&t);
+ return s;
+}
diff --git a/src/vnet/dev/args.h b/src/vnet/dev/args.h
new file mode 100644
index 00000000000..a256cfe8e0e
--- /dev/null
+++ b/src/vnet/dev/args.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_ARGS_H_
+#define _VNET_DEV_ARGS_H_
+
+#include <vppinfra/clib.h>
+#include <vnet/dev/errors.h>
+
+#define foreach_vnet_dev_arg_type \
+ _ (BOOL, "%u", boolean) \
+ _ (UINT32, "%u", uint32) \
+ _ (STRING, "\'%v\'", string)
+
+typedef enum
+{
+ VNET_DEV_ARG_END,
+#define _(n, f, v) VNET_DEV_ARG_TYPE_##n,
+ foreach_vnet_dev_arg_type
+#undef _
+} __clib_packed vnet_dev_arg_type_t;
+
+typedef union
+{
+ u8 boolean;
+ u32 uint32;
+ u8 *string;
+} vnet_dev_arg_value_t;
+
+typedef struct
+{
+ char *name;
+ char *desc;
+ vnet_dev_arg_type_t type;
+ u8 val_set;
+ u32 min;
+ u32 max;
+ u64 id;
+ vnet_dev_arg_value_t val;
+ vnet_dev_arg_value_t default_val;
+} vnet_dev_arg_t;
+
+#define VNET_DEV_ARG_BOOL(ud, n, d, ...) \
+ { \
+ .type = VNET_DEV_ARG_TYPE_BOOL, .id = ud, .name = n, .desc = d, \
+ __VA_ARGS__ \
+ }
+#define VNET_DEV_ARG_UINT32(ud, n, d, ...) \
+ { \
+ .type = VNET_DEV_ARG_TYPE_UINT32, .id = ud, .name = n, .desc = d, \
+ __VA_ARGS__ \
+ }
+#define VNET_DEV_ARG_STRING(ud, n, d, ...) \
+ { \
+ .type = VNET_DEV_ARG_TYPE_STRING, .id = ud, .name = n, .desc = d, \
+ __VA_ARGS__ \
+ }
+#define VNET_DEV_ARG_END() \
+ { \
+ .type = VNET_DEV_ARG_END \
+ }
+
+#define VNET_DEV_ARGS(...) \
+ (vnet_dev_arg_t[]) { __VA_ARGS__, VNET_DEV_ARG_END () }
+
+#define foreach_vnet_dev_args(a, d) \
+ for (typeof ((d)->args[0]) *(a) = (d)->args; (a) < vec_end ((d)->args); \
+ (a)++)
+#define foreach_vnet_dev_port_args(a, p) \
+ for (typeof ((p)->args[0]) *(a) = (p)->args; (a) < vec_end ((p)->args); \
+ (a)++)
+
+#endif /* _VNET_DEV_ARGS_H_ */
diff --git a/src/vnet/dev/cli.c b/src/vnet/dev/cli.c
new file mode 100644
index 00000000000..53be4483183
--- /dev/null
+++ b/src/vnet/dev/cli.c
@@ -0,0 +1,331 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/api.h>
+
+static clib_error_t *
+device_attach_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_api_attach_args_t a = {};
+ vnet_dev_rv_t rv;
+
+ if (!unformat_user (input, unformat_c_string_array, a.device_id,
+ sizeof (a.device_id)))
+ return clib_error_return (0, "please specify valid device id");
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!a.driver_name[0] &&
+ unformat (input, "driver %U", unformat_c_string_array, a.driver_name,
+ sizeof (a.driver_name)))
+ ;
+ else if (!a.flags.n &&
+ unformat (input, "flags %U", unformat_vnet_dev_flags, &a.flags))
+ ;
+ else if (!a.args && unformat (input, "args %v", &a.args))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ rv = vnet_dev_api_attach (vm, &a);
+
+ vec_free (a.args);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to attach '%s': %U", a.device_id,
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_attach_cmd, static) = {
+ .path = "device attach",
+ .short_help = "device attach <device-id> [driver <name>] "
+ "[args <dev-args>]",
+ .function = device_attach_cmd_fn,
+};
+
+static clib_error_t *
+device_detach_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_rv_t rv;
+ vnet_dev_device_id_t device_id = {};
+ vnet_dev_t *dev;
+
+ if (!unformat_user (input, unformat_c_string_array, device_id,
+ sizeof (device_id)))
+ return clib_error_return (0, "please specify valid device id");
+
+ dev = vnet_dev_by_id (device_id);
+
+ if (dev)
+ {
+ vnet_dev_api_detach_args_t a = { .dev_index = dev->index };
+ rv = vnet_dev_api_detach (vm, &a);
+ }
+ else
+ rv = VNET_DEV_ERR_UNKNOWN_DEVICE;
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to detach '%s': %U", device_id,
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_detach_cmd, static) = {
+ .path = "device detach",
+ .short_help = "device detach <device-id>",
+ .function = device_detach_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+device_reset_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_api_reset_args_t a = {};
+ vnet_dev_rv_t rv;
+
+ if (!unformat_user (input, unformat_c_string_array, a.device_id,
+ sizeof (a.device_id)))
+ return clib_error_return (0, "please specify valid device id");
+
+ rv = vnet_dev_api_reset (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to reset '%s': %U", a.device_id,
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_reset_cmd, static) = {
+ .path = "device reset",
+ .short_help = "device reset <device-id>",
+ .function = device_reset_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+device_create_if_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_api_create_port_if_args_t a = {};
+ vnet_dev_rv_t rv;
+ vnet_dev_device_id_t device_id = {};
+ vnet_dev_t *dev = 0;
+ u32 n;
+
+ if (unformat_user (input, unformat_c_string_array, device_id,
+ sizeof (device_id)))
+ dev = vnet_dev_by_id (device_id);
+
+ if (!dev)
+ return clib_error_return (0, "please specify valid device id");
+
+ a.dev_index = dev->index;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!a.intf_name[0] &&
+ unformat (input, "if-name %U", unformat_c_string_array, a.intf_name,
+ sizeof (a.intf_name)))
+ ;
+ else if (!a.port_id && unformat (input, "port %u", &n))
+ a.port_id = n;
+ else if (!a.flags.n && unformat (input, "flags %U",
+ unformat_vnet_dev_port_flags, &a.flags))
+ ;
+ else if (!a.num_rx_queues && unformat (input, "num-rx-queues %u", &n))
+ a.num_rx_queues = n;
+ else if (!a.num_tx_queues && unformat (input, "num-tx-queues %u", &n))
+ a.num_tx_queues = n;
+ else if (!a.rx_queue_size && unformat (input, "rx-queues-size %u", &n))
+ a.rx_queue_size = n;
+ else if (!a.tx_queue_size && unformat (input, "tx-queues-size %u", &n))
+ a.tx_queue_size = n;
+ else if (!a.intf_name[0] &&
+ unformat (input, "name %U", unformat_c_string_array,
+ &a.intf_name, sizeof (a.intf_name)))
+ ;
+ else if (!a.args && unformat (input, "args %v", &a.args))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ rv = vnet_dev_api_create_port_if (vm, &a);
+
+ vec_free (a.args);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to create_if '%s': %U", device_id,
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_create_if_cmd, static) = {
+ .path = "device create-interface",
+ .short_help = "device create-interface <device-id> [port <port-id>] "
+ "[args <iface-args>]",
+ .function = device_create_if_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+device_remove_if_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_api_remove_port_if_args_t a = { .sw_if_index = ~0 };
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_dev_rv_t rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_sw_interface, vnm,
+ &a.sw_if_index))
+ ;
+ else if (unformat (input, "sw-if-index %u", &a.sw_if_index))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (a.sw_if_index == ~0)
+ return clib_error_return (0, "please specify existing interface name");
+
+ rv = vnet_dev_api_remove_port_if (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "unable to remove interface: %U",
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (device_remove_if_cmd, static) = {
+ .path = "device remove-interface",
+ .short_help = "device remove-interface [<interface-name> | sw-if-index <n>]",
+ .function = device_remove_if_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+show_devices_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_format_args_t fa = {}, *a = &fa;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "counters"))
+ fa.counters = 1;
+ else if (unformat (input, "all"))
+ fa.show_zero_counters = 1;
+ else if (unformat (input, "debug"))
+ fa.debug = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ pool_foreach_pointer (dev, dm->devices)
+ {
+ vlib_cli_output (vm, "device '%s':", dev->device_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_info, a, dev);
+ foreach_vnet_dev_port (p, dev)
+ {
+ vlib_cli_output (vm, " Port %u:", p->port_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_port_info, a, p);
+ if (fa.counters)
+ vlib_cli_output (vm, " %U", format_vnet_dev_counters, a,
+ p->counter_main);
+
+ foreach_vnet_dev_port_rx_queue (q, p)
+ {
+ vlib_cli_output (vm, " RX queue %u:", q->queue_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_rx_queue_info,
+ a, q);
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, p)
+ {
+ vlib_cli_output (vm, " TX queue %u:", q->queue_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_tx_queue_info,
+ a, q);
+ }
+ }
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_devices_cmd, static) = {
+ .path = "show device",
+ .short_help = "show device [counters]",
+ .function = show_devices_cmd_fn,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+show_device_counters_cmd_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_format_args_t fa = { .counters = 1 };
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "all"))
+ fa.show_zero_counters = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ pool_foreach_pointer (dev, dm->devices)
+ {
+ vlib_cli_output (vm, "device '%s':", dev->device_id);
+ foreach_vnet_dev_port (p, dev)
+ {
+ vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa,
+ p->counter_main);
+
+ foreach_vnet_dev_port_rx_queue (q, p)
+ if (q->counter_main)
+ {
+ vlib_cli_output (vm, " RX queue %u:", q->queue_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa,
+ q->counter_main);
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, p)
+ if (q->counter_main)
+ {
+ vlib_cli_output (vm, " TX queue %u:", q->queue_id);
+ vlib_cli_output (vm, " %U", format_vnet_dev_counters, &fa,
+ q->counter_main);
+ }
+ }
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_device_counters_cmd, static) = {
+ .path = "show device counters",
+ .short_help = "show device counters [all]",
+ .function = show_device_counters_cmd_fn,
+ .is_mp_safe = 1,
+};
diff --git a/src/vnet/dev/config.c b/src/vnet/dev/config.c
new file mode 100644
index 00000000000..8883e727ac2
--- /dev/null
+++ b/src/vnet/dev/config.c
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/error.h"
+#include "vppinfra/pool.h"
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/api.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "config",
+};
+
+static clib_error_t *
+vnet_dev_config_one_interface (vlib_main_t *vm, unformat_input_t *input,
+ vnet_dev_api_create_port_if_args_t *args)
+{
+ clib_error_t *err = 0;
+
+ log_debug (0, "port %u %U", args->port_id, format_unformat_input, input);
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ u32 n;
+
+ if (unformat (input, "name %U", unformat_c_string_array, args->intf_name,
+ sizeof (args->intf_name)))
+ ;
+ else if (unformat (input, "num-rx-queues %u", &n))
+ args->num_rx_queues = n;
+ else if (unformat (input, "num-tx-queues %u", &n))
+ args->num_tx_queues = n;
+ else if (unformat (input, "rx-queue-size %u", &n))
+ args->rx_queue_size = n;
+ else if (unformat (input, "tx-queue-size %u", &n))
+ args->tx_queue_size = n;
+ else if (unformat (input, "flags %U", unformat_vnet_dev_port_flags,
+ &args->flags))
+ ;
+ else if (unformat (input, "args %U", unformat_single_quoted_string,
+ &args->args))
+ ;
+ else
+ {
+ err = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ break;
+ }
+ }
+ return err;
+}
+static clib_error_t *
+vnet_dev_config_one_device (vlib_main_t *vm, unformat_input_t *input,
+ char *device_id)
+{
+ log_debug (0, "device %s %U", device_id, format_unformat_input, input);
+ clib_error_t *err = 0;
+ vnet_dev_api_attach_args_t args = {};
+ vnet_dev_api_create_port_if_args_t *if_args_vec = 0, *if_args;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ unformat_input_t sub_input;
+ u32 n;
+
+ if (unformat (input, "driver %U", unformat_c_string_array,
+ args.driver_name, sizeof (args.driver_name)))
+ ;
+ else if (unformat (input, "flags %U", unformat_vnet_dev_flags,
+ &args.flags))
+ ;
+ else if (unformat (input, "args %U", unformat_single_quoted_string,
+ &args.args))
+ ;
+ else if (unformat (input, "port %u %U", &n, unformat_vlib_cli_sub_input,
+ &sub_input))
+ {
+ vnet_dev_api_create_port_if_args_t *if_args;
+ vec_add2 (if_args_vec, if_args, 1);
+ if_args->port_id = n;
+ err = vnet_dev_config_one_interface (vm, &sub_input, if_args);
+ unformat_free (&sub_input);
+ if (err)
+ break;
+ }
+ else
+ {
+ err = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ break;
+ }
+ }
+
+ if (err == 0)
+ {
+ vnet_dev_rv_t rv;
+
+ clib_memcpy (args.device_id, device_id, sizeof (args.device_id));
+ rv = vnet_dev_api_attach (vm, &args);
+ vec_free (args.args);
+
+ if (rv == VNET_DEV_OK)
+ {
+ vec_foreach (if_args, if_args_vec)
+ {
+ if_args->dev_index = args.dev_index;
+ rv = vnet_dev_api_create_port_if (vm, if_args);
+ if (rv != VNET_DEV_OK)
+ break;
+ }
+ }
+
+ if (rv != VNET_DEV_OK)
+ err = clib_error_return (0, "error: %U for device '%s'",
+ format_vnet_dev_rv, rv, device_id);
+ }
+
+ vec_free (if_args_vec);
+ return err;
+}
+
+uword
+dev_config_process_node_fn (vlib_main_t *vm, vlib_node_runtime_t *rt,
+ vlib_frame_t *f)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ unformat_input_t input;
+ clib_error_t *err = 0;
+
+ if (dm->startup_config == 0)
+ return 0;
+
+ unformat_init_vector (&input, dm->startup_config);
+ dm->startup_config = 0;
+
+ while (!err && unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ {
+ unformat_input_t sub_input;
+ vnet_dev_device_id_t device_id;
+ if (unformat (&input, "dev %U %U", unformat_c_string_array, device_id,
+ sizeof (device_id), unformat_vlib_cli_sub_input,
+ &sub_input))
+ {
+ err = vnet_dev_config_one_device (vm, &sub_input, device_id);
+ unformat_free (&sub_input);
+ }
+ else if (unformat (&input, "dev %U", unformat_c_string_array, device_id,
+ sizeof (device_id)))
+ {
+ unformat_input_t no_input = {};
+ unformat_init_vector (&no_input, 0);
+ err = vnet_dev_config_one_device (vm, &no_input, device_id);
+ unformat_free (&no_input);
+ }
+ else
+ err = clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, &input);
+ }
+
+ unformat_free (&input);
+
+ if (err)
+ {
+ log_err (0, "%U", format_clib_error, err);
+ clib_error_free (err);
+ }
+
+ vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
+ vlib_node_rename (vm, rt->node_index, "deleted-%u", rt->node_index);
+ vec_add1 (dm->free_process_node_indices, rt->node_index);
+ return 0;
+}
+
+VLIB_REGISTER_NODE (dev_config_process_node) = {
+ .function = dev_config_process_node_fn,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "dev-config",
+};
+
+static clib_error_t *
+devices_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ uword c;
+
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ vec_add1 (dm->startup_config, c);
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (devices_config, "devices");
diff --git a/src/vnet/dev/counters.c b/src/vnet/dev/counters.c
new file mode 100644
index 00000000000..0a1e0a7419d
--- /dev/null
+++ b/src/vnet/dev/counters.c
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+#include <vnet/interface/rx_queue_funcs.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "counters",
+};
+
+vnet_dev_counter_main_t *
+vnet_dev_counters_alloc (vlib_main_t *vm, vnet_dev_counter_t *counters,
+ u16 n_counters, char *fmt, ...)
+{
+ vnet_dev_counter_t *c;
+ vnet_dev_counter_main_t *cm;
+ u32 alloc_sz;
+
+ alloc_sz = sizeof (*cm) + n_counters * sizeof (*c);
+ cm = clib_mem_alloc_aligned (alloc_sz, CLIB_CACHE_LINE_BYTES);
+ clib_memset (cm, 0, sizeof (*cm));
+ cm->n_counters = n_counters;
+
+ if (fmt && strlen (fmt))
+ {
+ va_list va;
+ va_start (va, fmt);
+ cm->desc = va_format (0, fmt, &va);
+ va_end (va);
+ }
+
+ for (u32 i = 0; i < n_counters; i++)
+ {
+ cm->counters[i] = counters[i];
+ cm->counters[i].index = i;
+ }
+
+ vec_validate_aligned (cm->counter_data, n_counters - 1,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (cm->counter_start, n_counters - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ return cm;
+}
+
+void
+vnet_dev_counters_clear (vlib_main_t *vm, vnet_dev_counter_main_t *cm)
+{
+ for (int i = 0; i < cm->n_counters; i++)
+ {
+ cm->counter_start[i] = cm->counter_data[i];
+ cm->counter_data[i] = 0;
+ }
+}
+
+void
+vnet_dev_counters_free (vlib_main_t *vm, vnet_dev_counter_main_t *cm)
+{
+ vec_free (cm->desc);
+ vec_free (cm->counter_data);
+ vec_free (cm->counter_start);
+ clib_mem_free (cm);
+}
+
+u8 *
+format_vnet_dev_counter_name (u8 *s, va_list *va)
+{
+ vnet_dev_counter_t *c = va_arg (*va, vnet_dev_counter_t *);
+
+ char *std_counters[] = {
+ [VNET_DEV_CTR_TYPE_RX_BYTES] = "total bytes received",
+ [VNET_DEV_CTR_TYPE_TX_BYTES] = "total bytes transmitted",
+ [VNET_DEV_CTR_TYPE_RX_PACKETS] = "total packets received",
+ [VNET_DEV_CTR_TYPE_TX_PACKETS] = "total packets transmitted",
+ [VNET_DEV_CTR_TYPE_RX_DROPS] = "total drops received",
+ [VNET_DEV_CTR_TYPE_TX_DROPS] = "total drops transmitted",
+ };
+
+ char *directions[] = {
+ [VNET_DEV_CTR_DIR_RX] = "received",
+ [VNET_DEV_CTR_DIR_TX] = "sent",
+ };
+ char *units[] = {
+ [VNET_DEV_CTR_UNIT_BYTES] = "bytes",
+ [VNET_DEV_CTR_UNIT_PACKETS] = "packets",
+ };
+
+ if (c->type == VNET_DEV_CTR_TYPE_VENDOR)
+ {
+ s = format (s, "%s", c->name);
+
+ if (c->unit < ARRAY_LEN (units) && units[c->unit])
+ s = format (s, " %s", units[c->unit]);
+
+ if (c->dir < ARRAY_LEN (directions) && directions[c->dir])
+ s = format (s, " %s", directions[c->dir]);
+ }
+ else if (c->type < ARRAY_LEN (std_counters) && std_counters[c->type])
+ s = format (s, "%s", std_counters[c->type]);
+ else
+ ASSERT (0);
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_counters (u8 *s, va_list *va)
+{
+ vnet_dev_format_args_t *a = va_arg (*va, vnet_dev_format_args_t *);
+ vnet_dev_counter_main_t *cm = va_arg (*va, vnet_dev_counter_main_t *);
+ u32 line = 0, indent = format_get_indent (s);
+
+ foreach_vnet_dev_counter (c, cm)
+ {
+ if (a->show_zero_counters == 0 && cm->counter_data[c->index] == 0)
+ continue;
+
+ if (line++)
+ s = format (s, "\n%U", format_white_space, indent);
+
+ s = format (s, "%-45U%lu", format_vnet_dev_counter_name, c,
+ cm->counter_data[c->index]);
+ }
+
+ return s;
+}
diff --git a/src/vnet/dev/counters.h b/src/vnet/dev/counters.h
new file mode 100644
index 00000000000..33d08ffbecd
--- /dev/null
+++ b/src/vnet/dev/counters.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_COUNTERS_H_
+#define _VNET_DEV_COUNTERS_H_
+
+#include <vnet/dev/dev.h>
+
+typedef enum
+{
+ VNET_DEV_CTR_DIR_NA,
+ VNET_DEV_CTR_DIR_RX,
+ VNET_DEV_CTR_DIR_TX,
+} __clib_packed vnet_dev_counter_direction_t;
+
+typedef enum
+{
+ VNET_DEV_CTR_TYPE_RX_BYTES,
+ VNET_DEV_CTR_TYPE_RX_PACKETS,
+ VNET_DEV_CTR_TYPE_RX_DROPS,
+ VNET_DEV_CTR_TYPE_TX_BYTES,
+ VNET_DEV_CTR_TYPE_TX_PACKETS,
+ VNET_DEV_CTR_TYPE_TX_DROPS,
+ VNET_DEV_CTR_TYPE_VENDOR,
+} __clib_packed vnet_dev_counter_type_t;
+
+typedef enum
+{
+ VNET_DEV_CTR_UNIT_NA,
+ VNET_DEV_CTR_UNIT_BYTES,
+ VNET_DEV_CTR_UNIT_PACKETS,
+} __clib_packed vnet_dev_counter_unit_t;
+
+typedef struct vnet_dev_counter
+{
+ char name[24];
+ uword user_data;
+ vnet_dev_counter_type_t type;
+ vnet_dev_counter_direction_t dir;
+ vnet_dev_counter_unit_t unit;
+ u16 index;
+} vnet_dev_counter_t;
+
+typedef struct vnet_dev_counter_main
+{
+ u8 *desc;
+ u64 *counter_data;
+ u64 *counter_start;
+ u16 n_counters;
+ vnet_dev_counter_t counters[];
+} vnet_dev_counter_main_t;
+
+#define VNET_DEV_CTR_RX_BYTES(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_RX_BYTES, .dir = VNET_DEV_CTR_DIR_RX, \
+ .unit = VNET_DEV_CTR_UNIT_BYTES, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_TX_BYTES(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_TX_BYTES, .dir = VNET_DEV_CTR_DIR_TX, \
+ .unit = VNET_DEV_CTR_UNIT_BYTES, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_RX_PACKETS(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_RX_PACKETS, .dir = VNET_DEV_CTR_DIR_RX, \
+ .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_TX_PACKETS(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_TX_PACKETS, .dir = VNET_DEV_CTR_DIR_TX, \
+ .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_RX_DROPS(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_RX_DROPS, .dir = VNET_DEV_CTR_DIR_RX, \
+ .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_TX_DROPS(p, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_TX_DROPS, .dir = VNET_DEV_CTR_DIR_TX, \
+ .unit = VNET_DEV_CTR_UNIT_PACKETS, .user_data = (p), __VA_ARGS__ \
+ }
+#define VNET_DEV_CTR_VENDOR(p, d, u, n, ...) \
+ { \
+ .type = VNET_DEV_CTR_TYPE_VENDOR, .user_data = (p), .name = n, \
+ .dir = VNET_DEV_CTR_DIR_##d, .unit = VNET_DEV_CTR_UNIT_##u, __VA_ARGS__ \
+ }
+
+vnet_dev_counter_main_t *vnet_dev_counters_alloc (vlib_main_t *,
+ vnet_dev_counter_t *, u16,
+ char *, ...);
+void vnet_dev_counters_clear (vlib_main_t *, vnet_dev_counter_main_t *);
+void vnet_dev_counters_free (vlib_main_t *, vnet_dev_counter_main_t *);
+
+format_function_t format_vnet_dev_counters;
+format_function_t format_vnet_dev_counters_all;
+
+static_always_inline vnet_dev_counter_main_t *
+vnet_dev_counter_get_main (vnet_dev_counter_t *counter)
+{
+ return (vnet_dev_counter_main_t *) ((u8 *) (counter - counter->index) -
+ STRUCT_OFFSET_OF (
+ vnet_dev_counter_main_t, counters));
+}
+
+static_always_inline void
+vnet_dev_counter_value_add (vlib_main_t *vm, vnet_dev_counter_t *counter,
+ u64 val)
+{
+ vnet_dev_counter_main_t *cm = vnet_dev_counter_get_main (counter);
+ cm->counter_data[counter->index] += val;
+}
+
+static_always_inline void
+vnet_dev_counter_value_update (vlib_main_t *vm, vnet_dev_counter_t *counter,
+ u64 val)
+{
+ vnet_dev_counter_main_t *cm = vnet_dev_counter_get_main (counter);
+ cm->counter_data[counter->index] = val - cm->counter_start[counter->index];
+}
+
+#define foreach_vnet_dev_counter(c, cm) \
+ if (cm) \
+ for (typeof (*(cm)->counters) *(c) = (cm)->counters; \
+ (c) < (cm)->counters + (cm)->n_counters; (c)++)
+
+#endif /* _VNET_DEV_COUNTERS_H_ */
diff --git a/src/vnet/dev/dev.api b/src/vnet/dev/dev.api
new file mode 100644
index 00000000000..552b778949b
--- /dev/null
+++ b/src/vnet/dev/dev.api
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+option version = "0.0.1";
+
+enumflag dev_flags : u32
+{
+ VL_API_DEV_FLAG_NO_STATS = 0x1,
+};
+
+enumflag dev_port_flags : u32
+{
+ VL_API_DEV_PORT_FLAG_INTERRUPT_MODE = 0x1,
+};
+
+autoendian define dev_attach
+{
+ u32 client_index;
+ u32 context;
+ string device_id[48];
+ string driver_name[16];
+ vl_api_dev_flags_t flags;
+ string args[];
+};
+
+autoendian define dev_attach_reply
+{
+ u32 context;
+ u32 dev_index;
+ i32 retval;
+ string error_string[];
+};
+
+autoendian define dev_detach
+{
+ u32 client_index;
+ u32 context;
+ u32 dev_index;
+};
+
+autoendian define dev_detach_reply
+{
+ u32 context;
+ i32 retval;
+ string error_string[];
+};
+
+autoendian define dev_create_port_if
+{
+ u32 client_index;
+ u32 context;
+ u32 dev_index;
+ string intf_name[32];
+ u16 num_rx_queues;
+ u16 num_tx_queues;
+ u16 rx_queue_size;
+ u16 tx_queue_size;
+ u16 port_id;
+ vl_api_dev_port_flags_t flags;
+ string args[];
+};
+
+autoendian define dev_create_port_if_reply
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ i32 retval;
+ string error_string[];
+};
+
+autoendian define dev_remove_port_if
+{
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+};
+
+autoendian define dev_remove_port_if_reply
+{
+ u32 context;
+ i32 retval;
+ string error_string[];
+};
+
diff --git a/src/vnet/dev/dev.c b/src/vnet/dev/dev.c
new file mode 100644
index 00000000000..e04fa161ce2
--- /dev/null
+++ b/src/vnet/dev/dev.c
@@ -0,0 +1,461 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/pool.h"
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/log.h>
+#include <vnet/dev/counters.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+};
+
+vnet_dev_main_t vnet_dev_main = { .next_rx_queue_thread = 1 };
+
+vnet_dev_bus_t *
+vnet_dev_find_device_bus (vlib_main_t *vm, vnet_dev_device_id_t id)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_bus_t *bus;
+
+ pool_foreach (bus, dm->buses)
+ {
+ int n = strlen (bus->registration->name);
+ int l = strlen (id);
+ int dl = strlen (VNET_DEV_DEVICE_ID_PREFIX_DELIMITER);
+
+ if (l <= n + dl)
+ continue;
+
+ if (strncmp (bus->registration->name, id, n))
+ continue;
+
+ if (strncmp (VNET_DEV_DEVICE_ID_PREFIX_DELIMITER, id + n, dl))
+ continue;
+
+ return bus;
+ }
+
+ return 0;
+}
+
+void *
+vnet_dev_get_device_info (vlib_main_t *vm, vnet_dev_device_id_t id)
+{
+ vnet_dev_bus_t *bus;
+
+ bus = vnet_dev_find_device_bus (vm, id);
+ if (bus == 0)
+ return 0;
+
+ return bus->ops.get_device_info (vm, id);
+}
+
+vnet_dev_t *
+vnet_dev_alloc (vlib_main_t *vm, vnet_dev_device_id_t id,
+ vnet_dev_driver_t *driver)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = 0, **devp = 0;
+
+ dev = vnet_dev_alloc_with_data (sizeof (vnet_dev_t),
+ driver->registration->device_data_sz);
+
+ pool_get (dm->devices, devp);
+ devp[0] = dev;
+ dev->index = devp - dm->devices;
+ dev->driver_index = driver->index;
+ dev->ops = driver->registration->ops;
+ dev->bus_index = driver->bus_index;
+ clib_memcpy (dev->device_id, id, sizeof (dev->device_id));
+ hash_set (dm->device_index_by_id, dev->device_id, dev->index);
+
+ if ((vnet_dev_process_create (vm, dev)) == VNET_DEV_OK)
+ return dev;
+
+ vnet_dev_free (vm, dev);
+ return 0;
+}
+
+vnet_dev_rv_t
+vnet_dev_init (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_validate (vm, dev);
+
+ if ((rv = bus->ops.device_open (vm, dev)) != VNET_DEV_OK)
+ return rv;
+
+ if (dev->ops.alloc)
+ {
+ rv = dev->ops.alloc (vm, dev);
+ if (rv != VNET_DEV_OK)
+ {
+ log_err (dev, "device init failed [rv %d]", rv);
+ if (dev->ops.deinit)
+ dev->ops.deinit (vm, dev);
+ if (dev->ops.free)
+ dev->ops.free (vm, dev);
+ return rv;
+ }
+ }
+
+ if ((rv = dev->ops.init (vm, dev)) != VNET_DEV_OK)
+ {
+ log_err (dev, "device init failed [rv %d]", rv);
+ if (dev->ops.deinit)
+ dev->ops.deinit (vm, dev);
+ if (dev->ops.free)
+ dev->ops.free (vm, dev);
+ return rv;
+ }
+
+ dev->initialized = 1;
+ dev->not_first_init = 1;
+ return VNET_DEV_OK;
+}
+
+void
+vnet_dev_deinit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ASSERT (dev->initialized == 1);
+ vnet_dev_bus_t *bus;
+
+ vnet_dev_validate (vm, dev);
+
+ foreach_vnet_dev_port (p, dev)
+ ASSERT (p->interface_created == 0);
+
+ if (dev->ops.deinit)
+ dev->ops.deinit (vm, dev);
+
+ bus = vnet_dev_get_bus (dev);
+ if (bus->ops.device_close)
+ bus->ops.device_close (vm, dev);
+
+ vnet_dev_process_quit (vm, dev);
+
+ dev->initialized = 0;
+}
+
+void
+vnet_dev_free (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+
+ vnet_dev_validate (vm, dev);
+
+ ASSERT (dev->initialized == 0);
+
+ foreach_vnet_dev_port (p, dev)
+ vnet_dev_port_free (vm, p);
+
+ vec_free (dev->description);
+ pool_free (dev->ports);
+ pool_free (dev->periodic_ops);
+ hash_unset (dm->device_index_by_id, dev->device_id);
+ vnet_dev_arg_free (&dev->args);
+ pool_put_index (dm->devices, dev->index);
+}
+
+vnet_dev_rv_t
+vnet_dev_reset (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_rv_t rv;
+
+ ASSERT (dev->initialized == 1);
+ vnet_dev_validate (vm, dev);
+
+ if (dev->ops.reset == 0)
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ if ((rv = dev->ops.reset (vm, dev)) != VNET_DEV_OK)
+ {
+ log_err (dev, "device reset failed [rv %d]", rv);
+ return rv;
+ }
+
+ return VNET_DEV_OK;
+}
+
+void
+vnet_dev_detach (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ foreach_vnet_dev_port (p, dev)
+ if (p->interface_created)
+ vnet_dev_port_if_remove (vm, p);
+ vnet_dev_deinit (vm, dev);
+ vnet_dev_free (vm, dev);
+}
+
+vnet_dev_rv_t
+vnet_dev_dma_mem_alloc (vlib_main_t *vm, vnet_dev_t *dev, u32 size, u32 align,
+ void **pp)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_validate (vm, dev);
+
+ if (!bus->ops.dma_mem_alloc_fn)
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ rv = bus->ops.dma_mem_alloc_fn (vm, dev, size, align, pp);
+ if (rv == VNET_DEV_OK)
+ log_debug (dev, "%u bytes va %p dma-addr 0x%lx numa %u align %u", size,
+ *pp, vnet_dev_get_dma_addr (vm, dev, *pp), dev->numa_node,
+ align);
+ return rv;
+}
+
+void
+vnet_dev_dma_mem_free (vlib_main_t *vm, vnet_dev_t *dev, void *p)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+
+ vnet_dev_validate (vm, dev);
+
+ if (p == 0 || !bus->ops.dma_mem_free_fn)
+ return;
+
+ return bus->ops.dma_mem_free_fn (vm, dev, p);
+}
+
+clib_error_t *
+vnet_dev_admin_up_down_fn (vnet_main_t *vnm, u32 hw_if_index, u32 flags)
+{
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ u32 is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
+
+ if (is_up && p->started == 0)
+ rv = vnet_dev_process_call_port_op (vm, p, vnet_dev_port_start);
+ else if (!is_up && p->started)
+ rv = vnet_dev_process_call_port_op_no_rv (vm, p, vnet_dev_port_stop);
+
+ if (rv != VNET_DEV_OK)
+ return clib_error_return (0, "failed to change port admin state: %U",
+ format_vnet_dev_rv, rv);
+
+ return 0;
+}
+
+static void
+vnet_dev_feature_update_cb (u32 sw_if_index, u8 arc_index, u8 is_enable,
+ void *cb)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_config_main_t *cm;
+ vnet_dev_main_t *vdm = &vnet_dev_main;
+ vnet_dev_port_t *port;
+ vnet_hw_interface_t *hw;
+ u32 current_config_index = ~0;
+ u32 next_index = ~0;
+ int update_runtime = 0;
+
+ if (arc_index != vdm->eth_port_rx_feature_arc_index)
+ return;
+
+ hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
+ port = vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+
+ if (port == 0 || port->intf.sw_if_index != sw_if_index)
+ return;
+
+ if (vnet_have_features (arc_index, sw_if_index))
+ {
+ cm = &fm->feature_config_mains[arc_index];
+ current_config_index =
+ vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
+ vnet_get_config_data (&cm->config_main, &current_config_index,
+ &next_index, 0);
+ if (port->intf.feature_arc == 0 ||
+ port->intf.rx_next_index != next_index ||
+ port->intf.current_config_index != current_config_index)
+ {
+ port->intf.current_config_index = current_config_index;
+ port->intf.rx_next_index = next_index;
+ port->intf.feature_arc_index = arc_index;
+ port->intf.feature_arc = 1;
+ update_runtime = 1;
+ }
+ }
+ else
+ {
+ if (port->intf.feature_arc)
+ {
+ port->intf.current_config_index = 0;
+ port->intf.rx_next_index =
+ port->intf.redirect_to_node ?
+ port->intf.redirect_to_node_next_index :
+ vnet_dev_default_next_index_by_port_type[port->attr.type];
+ port->intf.feature_arc_index = 0;
+ port->intf.feature_arc = 0;
+ update_runtime = 1;
+ }
+ }
+
+ if (update_runtime)
+ {
+ foreach_vnet_dev_port_rx_queue (rxq, port)
+ vnet_dev_rx_queue_rt_request (
+ vm, rxq,
+ (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1,
+ .update_feature_arc = 1 });
+ log_debug (port->dev, "runtime update requested due to chgange in "
+ "feature arc configuration");
+ }
+}
+
+static int
+sort_driver_registrations (void *a0, void *a1)
+{
+ vnet_dev_driver_registration_t **r0 = a0;
+ vnet_dev_driver_registration_t **r1 = a1;
+
+ if (r0[0]->priority > r1[0]->priority)
+ return -1;
+ else if (r0[0]->priority < r1[0]->priority)
+ return 1;
+
+ return 0;
+}
+
+static clib_error_t *
+vnet_dev_main_init (vlib_main_t *vm)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_driver_registration_t **drv = 0;
+ u32 temp_space_sz = 0;
+
+ dm->device_index_by_id = hash_create_string (0, sizeof (uword));
+
+ for (vnet_dev_bus_registration_t *r = dm->bus_registrations; r;
+ r = r->next_registration)
+ {
+ vnet_dev_bus_t *bus;
+ pool_get_zero (dm->buses, bus);
+ bus->registration = r;
+ bus->index = bus - dm->buses;
+ bus->ops = r->ops;
+ if (!r->device_data_size ||
+ r->device_data_size > STRUCT_SIZE_OF (vnet_dev_t, bus_data))
+ return clib_error_return (
+ 0, "bus device data for bus '%s' is too big not specified", r->name);
+
+ log_debug (0, "bus '%s' registered", r->name);
+ }
+
+ for (vnet_dev_driver_registration_t *r = dm->driver_registrations; r;
+ r = r->next_registration)
+ vec_add1 (drv, r);
+
+ vec_sort_with_function (drv, sort_driver_registrations);
+
+ vec_foreach_pointer (r, drv)
+ {
+ vnet_dev_driver_t *driver;
+ vnet_dev_bus_t *bus;
+ vnet_device_class_t *dev_class;
+ int bus_index = -1;
+
+ pool_foreach (bus, dm->buses)
+ {
+ if (strcmp (bus->registration->name, r->bus) == 0)
+ {
+ bus_index = bus->index;
+ break;
+ }
+ }
+
+ if (bus_index < 0)
+ return clib_error_return (0, "unknown bus '%s'", r->bus);
+
+ pool_get_zero (dm->drivers, driver);
+ driver->registration = r;
+ driver->index = driver - dm->drivers;
+ driver->bus_index = bus_index;
+ driver->ops = r->ops;
+ dev_class = clib_mem_alloc (sizeof (vnet_device_class_t));
+ *dev_class = (vnet_device_class_t){
+ .name = r->name,
+ .format_device_name = format_vnet_dev_interface_name,
+ .format_device = format_vnet_dev_interface_info,
+ .admin_up_down_function = vnet_dev_admin_up_down_fn,
+ .rx_redirect_to_node = vnet_dev_set_interface_next_node,
+ .clear_counters = vnet_dev_clear_hw_interface_counters,
+ .mac_addr_change_function = vnet_dev_port_mac_change,
+ .mac_addr_add_del_function = vnet_dev_add_del_mac_address,
+ .flow_ops_function = vnet_dev_flow_ops_fn,
+ .format_flow = format_vnet_dev_flow,
+ .set_rss_queues_function = vnet_dev_interface_set_rss_queues,
+ };
+ driver->dev_class_index = vnet_register_device_class (vm, dev_class);
+ log_debug (0, "driver '%s' registered on bus '%s'", r->name,
+ bus->registration->name);
+
+ if (temp_space_sz < r->runtime_temp_space_sz)
+ temp_space_sz = r->runtime_temp_space_sz;
+ }
+
+ if (dm->startup_config)
+ log_debug (0, "startup config: %v", dm->startup_config);
+
+ vec_free (drv);
+
+ if (temp_space_sz > 0)
+ {
+ const u32 align = CLIB_CACHE_LINE_BYTES;
+ u32 sz = round_pow2 (temp_space_sz, align);
+ dm->log2_runtime_temp_space_sz =
+ get_lowest_set_bit_index (max_pow2 (sz));
+ sz = 1 << dm->log2_runtime_temp_space_sz;
+ sz *= vlib_get_n_threads ();
+ dm->runtime_temp_spaces = clib_mem_alloc_aligned (sz, align);
+ clib_memset (dm->runtime_temp_spaces, 0, sz);
+ log_debug (0,
+ "requested %u bytes for runtime temp storage, allocated %u "
+ "per thread (total %u)",
+ temp_space_sz, 1 << dm->log2_runtime_temp_space_sz, sz);
+ }
+
+ vnet_feature_register (vnet_dev_feature_update_cb, 0);
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (vnet_dev_main_init);
+
+clib_error_t *
+vnet_dev_num_workers_change (vlib_main_t *vm)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+
+ if (dm->log2_runtime_temp_space_sz > 0)
+ {
+ const u32 align = CLIB_CACHE_LINE_BYTES;
+ uword sz =
+ (1ULL << dm->log2_runtime_temp_space_sz) * vlib_get_n_threads ();
+ if (dm->runtime_temp_spaces)
+ clib_mem_free (dm->runtime_temp_spaces);
+ dm->runtime_temp_spaces = clib_mem_alloc_aligned (sz, align);
+ clib_memset (dm->runtime_temp_spaces, 0, sz);
+ log_debug (0, "runtime temp storage resized to %u", sz);
+ }
+
+ return 0;
+}
+
+VLIB_NUM_WORKERS_CHANGE_FN (vnet_dev_num_workers_change);
diff --git a/src/vnet/dev/dev.h b/src/vnet/dev/dev.h
new file mode 100644
index 00000000000..bbf2f9dff21
--- /dev/null
+++ b/src/vnet/dev/dev.h
@@ -0,0 +1,753 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_H_
+#define _VNET_DEV_H_
+
+#include <vppinfra/clib.h>
+#include <vppinfra/error_bootstrap.h>
+#include <vppinfra/format.h>
+#include <vnet/vnet.h>
+#include <vnet/dev/types.h>
+#include <vnet/dev/args.h>
+
+#define VNET_DEV_DEVICE_ID_PREFIX_DELIMITER "/"
+
+#define foreach_vnet_dev_port_type \
+ _ (0, UNKNOWN) \
+ _ (1, ETHERNET)
+
+typedef enum
+{
+#define _(b, n) VNET_DEV_PORT_TYPE_##n = (1U << (b)),
+ foreach_vnet_dev_port_type
+#undef _
+} vnet_dev_port_type_t;
+
+#define foreach_vnet_dev_port_caps \
+ _ (interrupt_mode) \
+ _ (rss) \
+ _ (change_max_rx_frame_size) \
+ _ (mac_filter)
+
+#define foreach_vnet_dev_port_rx_offloads _ (ip4_cksum)
+
+#define foreach_vnet_dev_port_tx_offloads \
+ _ (ip4_cksum) \
+ _ (tcp_gso) \
+ _ (udp_gso)
+
+typedef union
+{
+ struct
+ {
+#define _(n) u8 n : 1;
+ foreach_vnet_dev_port_caps
+#undef _
+ };
+ u8 as_number;
+} vnet_dev_port_caps_t;
+
+typedef union
+{
+ struct
+ {
+#define _(n) u8 n : 1;
+ foreach_vnet_dev_port_rx_offloads
+#undef _
+ };
+ u8 as_number;
+} vnet_dev_port_rx_offloads_t;
+
+typedef union
+{
+ struct
+ {
+#define _(n) u8 n : 1;
+ foreach_vnet_dev_port_tx_offloads
+#undef _
+ };
+ u8 as_number;
+} vnet_dev_port_tx_offloads_t;
+
+typedef union
+{
+ u8 eth_mac[6];
+ u8 raw[8];
+} vnet_dev_hw_addr_t;
+
+typedef struct vnet_dev_bus_registration vnet_dev_bus_registration_t;
+typedef struct vnet_dev_driver_registration vnet_dev_driver_registration_t;
+
+typedef struct vnet_dev vnet_dev_t;
+typedef struct vnet_dev_port vnet_dev_port_t;
+typedef struct vnet_dev_rx_queue vnet_dev_rx_queue_t;
+typedef struct vnet_dev_tx_queue vnet_dev_tx_queue_t;
+typedef struct vnet_dev_bus_registration vnet_dev_bus_registration_t;
+typedef struct vnet_dev_driver_registration vnet_dev_driver_registration_t;
+typedef struct vnet_dev_counter vnet_dev_counter_t;
+typedef struct vnet_dev_counter_main vnet_dev_counter_main_t;
+typedef struct vnet_dev_port_cfg_change_req vnet_dev_port_cfg_change_req_t;
+
+typedef vnet_dev_rv_t (vnet_dev_op_t) (vlib_main_t *, vnet_dev_t *);
+typedef vnet_dev_rv_t (vnet_dev_port_op_t) (vlib_main_t *, vnet_dev_port_t *);
+typedef vnet_dev_rv_t (vnet_dev_port_cfg_change_op_t) (
+ vlib_main_t *, vnet_dev_port_t *, vnet_dev_port_cfg_change_req_t *);
+typedef vnet_dev_rv_t (vnet_dev_rx_queue_op_t) (vlib_main_t *,
+ vnet_dev_rx_queue_t *);
+typedef vnet_dev_rv_t (vnet_dev_tx_queue_op_t) (vlib_main_t *,
+ vnet_dev_tx_queue_t *);
+typedef void (vnet_dev_op_no_rv_t) (vlib_main_t *, vnet_dev_t *);
+typedef void (vnet_dev_port_op_no_rv_t) (vlib_main_t *, vnet_dev_port_t *);
+typedef void (vnet_dev_rx_queue_op_no_rv_t) (vlib_main_t *,
+ vnet_dev_rx_queue_t *);
+typedef void (vnet_dev_tx_queue_op_no_rv_t) (vlib_main_t *,
+ vnet_dev_tx_queue_t *);
+
+typedef u16 vnet_dev_queue_id_t;
+typedef u16 vnet_dev_bus_index_t;
+typedef u16 vnet_dev_driver_index_t;
+
+typedef struct
+{
+ vnet_dev_rx_queue_op_t *alloc;
+ vnet_dev_rx_queue_op_t *start;
+ vnet_dev_rx_queue_op_no_rv_t *stop;
+ vnet_dev_rx_queue_op_no_rv_t *free;
+ format_function_t *format_info;
+} vnet_dev_rx_queue_ops_t;
+
+typedef struct
+{
+ vnet_dev_tx_queue_op_t *alloc;
+ vnet_dev_tx_queue_op_t *start;
+ vnet_dev_tx_queue_op_no_rv_t *stop;
+ vnet_dev_tx_queue_op_no_rv_t *free;
+ format_function_t *format_info;
+} vnet_dev_tx_queue_ops_t;
+
+typedef struct
+{
+ u16 data_size;
+ u16 min_size;
+ u16 max_size;
+ u16 default_size;
+ u8 multiplier;
+ u8 size_is_power_of_two : 1;
+} vnet_dev_queue_config_t;
+
+#define foreach_vnet_dev_port_cfg_type \
+ _ (PROMISC_MODE) \
+ _ (MAX_RX_FRAME_SIZE) \
+ _ (CHANGE_PRIMARY_HW_ADDR) \
+ _ (ADD_SECONDARY_HW_ADDR) \
+ _ (REMOVE_SECONDARY_HW_ADDR) \
+ _ (RXQ_INTR_MODE_ENABLE) \
+ _ (RXQ_INTR_MODE_DISABLE) \
+ _ (ADD_RX_FLOW) \
+ _ (DEL_RX_FLOW) \
+ _ (GET_RX_FLOW_COUNTER) \
+ _ (RESET_RX_FLOW_COUNTER)
+
+typedef enum
+{
+ VNET_DEV_PORT_CFG_UNKNOWN,
+#define _(n) VNET_DEV_PORT_CFG_##n,
+ foreach_vnet_dev_port_cfg_type
+#undef _
+} __clib_packed vnet_dev_port_cfg_type_t;
+
+typedef struct vnet_dev_port_cfg_change_req
+{
+ vnet_dev_port_cfg_type_t type;
+ u8 validated : 1;
+ u8 all_queues : 1;
+
+ union
+ {
+ u8 promisc : 1;
+ vnet_dev_hw_addr_t addr;
+ u16 max_rx_frame_size;
+ vnet_dev_queue_id_t queue_id;
+ struct
+ {
+ u32 flow_index;
+ uword *private_data;
+ };
+ };
+
+} vnet_dev_port_cfg_change_req_t;
+
+typedef struct
+{
+ vnet_dev_hw_addr_t hw_addr;
+ u16 max_rx_queues;
+ u16 max_tx_queues;
+ u16 max_supported_rx_frame_size;
+ vnet_dev_port_type_t type;
+ vnet_dev_port_caps_t caps;
+ vnet_dev_port_rx_offloads_t rx_offloads;
+ vnet_dev_port_tx_offloads_t tx_offloads;
+} vnet_dev_port_attr_t;
+
+typedef enum
+{
+ VNET_DEV_PERIODIC_OP_TYPE_DEV = 1,
+ VNET_DEV_PERIODIC_OP_TYPE_PORT = 2,
+} __clib_packed vnet_dev_periodic_op_type_t;
+
+typedef struct
+{
+ f64 interval;
+ f64 last_run;
+ vnet_dev_periodic_op_type_t type;
+ union
+ {
+ vnet_dev_t *dev;
+ vnet_dev_port_t *port;
+ void *arg;
+ };
+ union
+ {
+ vnet_dev_op_no_rv_t *dev_op;
+ vnet_dev_port_op_no_rv_t *port_op;
+ void *op;
+ };
+} vnet_dev_periodic_op_t;
+
+typedef struct
+{
+ struct _vlib_node_fn_registration *registrations;
+ format_function_t *format_trace;
+ vlib_error_desc_t *error_counters;
+ u16 n_error_counters;
+} vnet_dev_node_t;
+
+typedef struct
+{
+ vnet_dev_op_t *alloc;
+ vnet_dev_op_t *init;
+ vnet_dev_op_no_rv_t *deinit;
+ vnet_dev_op_t *reset;
+ vnet_dev_op_no_rv_t *free;
+ u8 *(*probe) (vlib_main_t *, vnet_dev_bus_index_t, void *);
+ format_function_t *format_info;
+} vnet_dev_ops_t;
+
+typedef struct
+{
+ vnet_dev_port_op_t *alloc;
+ vnet_dev_port_op_t *init;
+ vnet_dev_port_cfg_change_op_t *config_change;
+ vnet_dev_port_cfg_change_op_t *config_change_validate;
+ vnet_dev_port_op_t *start;
+ vnet_dev_port_op_no_rv_t *stop;
+ vnet_dev_port_op_no_rv_t *deinit;
+ vnet_dev_port_op_no_rv_t *free;
+ format_function_t *format_status;
+ format_function_t *format_flow;
+} vnet_dev_port_ops_t;
+
+typedef union
+{
+ struct
+ {
+ u8 update_next_index : 1;
+ u8 update_feature_arc : 1;
+ u8 suspend_off : 1;
+ u8 suspend_on : 1;
+ };
+ u8 as_number;
+} vnet_dev_rx_queue_rt_req_t;
+
+typedef struct vnet_dev_rx_queue
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vnet_dev_port_t *port;
+ u16 rx_thread_index;
+ u16 index;
+ vnet_dev_counter_main_t *counter_main;
+ CLIB_CACHE_LINE_ALIGN_MARK (runtime0);
+ vnet_dev_rx_queue_t *next_on_thread;
+ u8 interrupt_mode : 1;
+ u8 enabled : 1;
+ u8 started : 1;
+ u8 suspended : 1;
+ vnet_dev_queue_id_t queue_id;
+ u16 size;
+ u16 next_index;
+ vnet_dev_rx_queue_rt_req_t runtime_request;
+ CLIB_CACHE_LINE_ALIGN_MARK (runtime1);
+ vlib_buffer_template_t buffer_template;
+ CLIB_CACHE_LINE_ALIGN_MARK (driver_data);
+ u8 data[];
+} vnet_dev_rx_queue_t;
+
+STATIC_ASSERT_SIZEOF (vnet_dev_rx_queue_t, 3 * CLIB_CACHE_LINE_BYTES);
+
+typedef struct vnet_dev_tx_queue
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vnet_dev_port_t *port;
+ clib_bitmap_t *assigned_threads;
+ u16 index;
+ vnet_dev_counter_main_t *counter_main;
+ CLIB_CACHE_LINE_ALIGN_MARK (runtime0);
+ vnet_dev_queue_id_t queue_id;
+ u8 started : 1;
+ u8 enabled : 1;
+ u8 lock_needed : 1;
+ u8 lock;
+ u16 size;
+ CLIB_ALIGN_MARK (private_data, 16);
+ u8 data[];
+} vnet_dev_tx_queue_t;
+
+STATIC_ASSERT_SIZEOF (vnet_dev_tx_queue_t, 2 * CLIB_CACHE_LINE_BYTES);
+
+typedef struct vnet_dev_port
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vnet_dev_t *dev;
+ vnet_dev_port_id_t port_id;
+ vnet_dev_driver_index_t driver_index;
+ u8 initialized : 1;
+ u8 started : 1;
+ u8 link_up : 1;
+ u8 promisc : 1;
+ u8 interface_created : 1;
+ u8 rx_node_assigned : 1;
+ vnet_dev_counter_main_t *counter_main;
+ vnet_dev_queue_config_t rx_queue_config;
+ vnet_dev_queue_config_t tx_queue_config;
+ vnet_dev_port_attr_t attr;
+ u32 max_rx_frame_size;
+ vnet_dev_hw_addr_t primary_hw_addr;
+ vnet_dev_hw_addr_t *secondary_hw_addr;
+ u32 index;
+ u32 speed;
+ vnet_dev_rx_queue_t **rx_queues;
+ vnet_dev_tx_queue_t **tx_queues;
+ vnet_dev_port_ops_t port_ops;
+ vnet_dev_arg_t *args;
+ vnet_dev_rx_queue_ops_t rx_queue_ops;
+ vnet_dev_tx_queue_ops_t tx_queue_ops;
+ vnet_dev_node_t rx_node;
+ vnet_dev_node_t tx_node;
+
+ struct
+ {
+ vnet_dev_if_name_t name;
+ u32 dev_instance;
+ u32 rx_node_index;
+ u32 current_config_index;
+ u16 rx_next_index;
+ u16 redirect_to_node_next_index;
+ u8 feature_arc_index;
+ u8 feature_arc : 1;
+ u8 redirect_to_node : 1;
+ u8 default_is_intr_mode : 1;
+ u32 tx_node_index;
+ u32 hw_if_index;
+ u32 sw_if_index;
+ u16 num_rx_queues;
+ u16 num_tx_queues;
+ u16 txq_sz;
+ u16 rxq_sz;
+ } intf;
+
+ CLIB_CACHE_LINE_ALIGN_MARK (data0);
+ u8 data[];
+} vnet_dev_port_t;
+
+typedef struct vnet_dev
+{
+ vnet_dev_device_id_t device_id;
+ u16 initialized : 1;
+ u16 not_first_init : 1;
+ u16 va_dma : 1;
+ u16 process_node_quit : 1;
+ u16 process_node_periodic : 1;
+ u16 poll_stats : 1;
+ u16 bus_index;
+ u8 numa_node;
+ u16 max_rx_queues;
+ u16 max_tx_queues;
+ vnet_dev_driver_index_t driver_index;
+ u32 index;
+ u32 process_node_index;
+ u8 bus_data[32] __clib_aligned (16);
+ vnet_dev_ops_t ops;
+ vnet_dev_port_t **ports;
+ vnet_dev_periodic_op_t *periodic_ops;
+ u8 *description;
+ vnet_dev_arg_t *args;
+ u8 __clib_aligned (16)
+ data[];
+} vnet_dev_t;
+
+typedef struct
+{
+ u16 vendor_id, device_id;
+ char *description;
+} vnet_dev_match_t;
+
+#define VNET_DEV_MATCH(...) \
+ (vnet_dev_match_t[]) \
+ { \
+ __VA_ARGS__, {} \
+ }
+
+typedef struct
+{
+ vnet_dev_op_t *device_open;
+ vnet_dev_op_no_rv_t *device_close;
+ vnet_dev_rv_t (*dma_mem_alloc_fn) (vlib_main_t *, vnet_dev_t *, u32, u32,
+ void **);
+ void (*dma_mem_free_fn) (vlib_main_t *, vnet_dev_t *, void *);
+ void *(*get_device_info) (vlib_main_t *, char *);
+ void (*free_device_info) (vlib_main_t *, void *);
+ format_function_t *format_device_info;
+ format_function_t *format_device_addr;
+} vnet_dev_bus_ops_t;
+
+struct vnet_dev_bus_registration
+{
+ vnet_dev_bus_registration_t *next_registration;
+ vnet_dev_driver_name_t name;
+ u16 device_data_size;
+ vnet_dev_bus_ops_t ops;
+};
+
+struct vnet_dev_driver_registration
+{
+ vnet_dev_driver_registration_t *next_registration;
+ u8 bus_master_enable : 1;
+ vnet_dev_driver_name_t name;
+ vnet_dev_bus_name_t bus;
+ u16 device_data_sz;
+ u16 runtime_temp_space_sz;
+ vnet_dev_match_t *match;
+ int priority;
+ vnet_dev_ops_t ops;
+ vnet_dev_arg_t *args;
+};
+
+typedef struct
+{
+ u32 index;
+ vnet_dev_bus_registration_t *registration;
+ vnet_dev_bus_ops_t ops;
+} vnet_dev_bus_t;
+
+typedef struct
+{
+ u32 index;
+ void *dev_data;
+ vnet_dev_driver_registration_t *registration;
+ u32 dev_class_index;
+ vnet_dev_bus_index_t bus_index;
+ vnet_dev_ops_t ops;
+} vnet_dev_driver_t;
+
+typedef struct
+{
+ vnet_dev_bus_t *buses;
+ vnet_dev_driver_t *drivers;
+ vnet_dev_t **devices;
+ vnet_dev_port_t **ports_by_dev_instance;
+ vnet_dev_bus_registration_t *bus_registrations;
+ vnet_dev_driver_registration_t *driver_registrations;
+ void *runtime_temp_spaces;
+ u32 log2_runtime_temp_space_sz;
+ u32 *free_process_node_indices;
+ u32 *free_rx_node_indices;
+ uword *device_index_by_id;
+
+ u8 *startup_config;
+ u16 next_rx_queue_thread;
+ u8 eth_port_rx_feature_arc_index;
+} vnet_dev_main_t;
+
+extern vnet_dev_main_t vnet_dev_main;
+
+typedef struct
+{
+ struct
+ {
+ vnet_dev_port_attr_t attr;
+ vnet_dev_port_ops_t ops;
+ vnet_dev_arg_t *args;
+ u16 data_size;
+ void *initial_data;
+ } port;
+
+ vnet_dev_node_t *rx_node;
+ vnet_dev_node_t *tx_node;
+
+ struct
+ {
+ vnet_dev_queue_config_t config;
+ vnet_dev_rx_queue_ops_t ops;
+ } rx_queue;
+
+ struct
+ {
+ vnet_dev_queue_config_t config;
+ vnet_dev_tx_queue_ops_t ops;
+ } tx_queue;
+} vnet_dev_port_add_args_t;
+
+typedef struct
+{
+ union
+ {
+ struct
+ {
+ u8 link_speed : 1;
+ u8 link_state : 1;
+ u8 link_duplex : 1;
+ };
+ u8 any;
+ } change;
+ u8 link_state : 1;
+ u8 full_duplex : 1;
+ u32 link_speed;
+} vnet_dev_port_state_changes_t;
+
+/* args.c */
+vnet_dev_rv_t vnet_dev_arg_parse (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_arg_t *, u8 *);
+void vnet_dev_arg_free (vnet_dev_arg_t **);
+void vnet_dev_arg_clear_value (vnet_dev_arg_t *);
+format_function_t format_vnet_dev_arg_type;
+format_function_t format_vnet_dev_arg_value;
+format_function_t format_vnet_dev_args;
+
+/* dev.c */
+vnet_dev_t *vnet_dev_alloc (vlib_main_t *, vnet_dev_device_id_t,
+ vnet_dev_driver_t *);
+void vnet_dev_free (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_init (vlib_main_t *, vnet_dev_t *);
+void vnet_dev_deinit (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_reset (vlib_main_t *, vnet_dev_t *);
+void vnet_dev_detach (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_port_add (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_port_id_t,
+ vnet_dev_port_add_args_t *);
+vnet_dev_rv_t vnet_dev_dma_mem_alloc (vlib_main_t *, vnet_dev_t *, u32, u32,
+ void **);
+void vnet_dev_dma_mem_free (vlib_main_t *, vnet_dev_t *, void *);
+vnet_dev_bus_t *vnet_dev_find_device_bus (vlib_main_t *, vnet_dev_device_id_t);
+void *vnet_dev_get_device_info (vlib_main_t *, vnet_dev_device_id_t);
+
+/* error.c */
+clib_error_t *vnet_dev_port_err (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_rv_t, char *, ...);
+int vnet_dev_flow_err (vlib_main_t *, vnet_dev_rv_t);
+
+/* handlers.c */
+clib_error_t *vnet_dev_port_set_max_frame_size (vnet_main_t *,
+ vnet_hw_interface_t *, u32);
+u32 vnet_dev_port_eth_flag_change (vnet_main_t *, vnet_hw_interface_t *, u32);
+clib_error_t *vnet_dev_port_mac_change (vnet_hw_interface_t *, const u8 *,
+ const u8 *);
+clib_error_t *vnet_dev_add_del_mac_address (vnet_hw_interface_t *, const u8 *,
+ u8);
+int vnet_dev_flow_ops_fn (vnet_main_t *, vnet_flow_dev_op_t, u32, u32,
+ uword *);
+clib_error_t *vnet_dev_interface_set_rss_queues (vnet_main_t *,
+ vnet_hw_interface_t *,
+ clib_bitmap_t *);
+void vnet_dev_clear_hw_interface_counters (u32);
+void vnet_dev_set_interface_next_node (vnet_main_t *, u32, u32);
+
+/* port.c */
+vnet_dev_rv_t vnet_dev_port_start (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t vnet_dev_port_start_all_rx_queues (vlib_main_t *,
+ vnet_dev_port_t *);
+vnet_dev_rv_t vnet_dev_port_start_all_tx_queues (vlib_main_t *,
+ vnet_dev_port_t *);
+void vnet_dev_port_stop (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_deinit (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_free (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_add_counters (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_counter_t *, u16);
+void vnet_dev_port_free_counters (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_update_tx_node_runtime (vlib_main_t *, vnet_dev_port_t *);
+void vnet_dev_port_state_change (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_state_changes_t);
+void vnet_dev_port_clear_counters (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t
+vnet_dev_port_cfg_change_req_validate (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t vnet_dev_port_cfg_change (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+vnet_dev_rv_t vnet_dev_port_if_create (vlib_main_t *, vnet_dev_port_t *);
+vnet_dev_rv_t vnet_dev_port_if_remove (vlib_main_t *, vnet_dev_port_t *);
+
+/* queue.c */
+vnet_dev_rv_t vnet_dev_rx_queue_alloc (vlib_main_t *, vnet_dev_port_t *, u16);
+vnet_dev_rv_t vnet_dev_tx_queue_alloc (vlib_main_t *, vnet_dev_port_t *, u16);
+void vnet_dev_rx_queue_free (vlib_main_t *, vnet_dev_rx_queue_t *);
+void vnet_dev_tx_queue_free (vlib_main_t *, vnet_dev_tx_queue_t *);
+void vnet_dev_rx_queue_add_counters (vlib_main_t *, vnet_dev_rx_queue_t *,
+ vnet_dev_counter_t *, u16);
+void vnet_dev_rx_queue_free_counters (vlib_main_t *, vnet_dev_rx_queue_t *);
+void vnet_dev_tx_queue_add_counters (vlib_main_t *, vnet_dev_tx_queue_t *,
+ vnet_dev_counter_t *, u16);
+void vnet_dev_tx_queue_free_counters (vlib_main_t *, vnet_dev_tx_queue_t *);
+vnet_dev_rv_t vnet_dev_rx_queue_start (vlib_main_t *, vnet_dev_rx_queue_t *);
+vnet_dev_rv_t vnet_dev_tx_queue_start (vlib_main_t *, vnet_dev_tx_queue_t *);
+void vnet_dev_rx_queue_stop (vlib_main_t *, vnet_dev_rx_queue_t *);
+void vnet_dev_tx_queue_stop (vlib_main_t *, vnet_dev_tx_queue_t *);
+
+/* process.c */
+vnet_dev_rv_t vnet_dev_process_create (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_process_call_op (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_op_t *);
+vnet_dev_rv_t vnet_dev_process_call_op_no_rv (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_op_no_rv_t *);
+void vnet_dev_process_call_op_no_wait (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_op_no_rv_t *);
+vnet_dev_rv_t vnet_dev_process_call_port_op (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_op_t *);
+vnet_dev_rv_t vnet_dev_process_call_port_op_no_rv (vlib_main_t *vm,
+ vnet_dev_port_t *,
+ vnet_dev_port_op_no_rv_t *);
+void vnet_dev_process_call_port_op_no_wait (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_op_no_rv_t *);
+vnet_dev_rv_t
+vnet_dev_process_port_cfg_change_req (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_cfg_change_req_t *);
+void vnet_dev_process_quit (vlib_main_t *, vnet_dev_t *);
+void vnet_dev_poll_dev_add (vlib_main_t *, vnet_dev_t *, f64,
+ vnet_dev_op_no_rv_t *);
+void vnet_dev_poll_dev_remove (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_op_no_rv_t *);
+void vnet_dev_poll_port_add (vlib_main_t *, vnet_dev_port_t *, f64,
+ vnet_dev_port_op_no_rv_t *);
+void vnet_dev_poll_port_remove (vlib_main_t *, vnet_dev_port_t *,
+ vnet_dev_port_op_no_rv_t *);
+
+typedef struct
+{
+ u16 thread_index;
+ u8 completed;
+ u8 in_order;
+ vnet_dev_port_t *port;
+} vnet_dev_rt_op_t;
+
+vnet_dev_rv_t vnet_dev_rt_exec_ops (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_rt_op_t *, u32);
+
+/* format.c */
+typedef struct
+{
+ u8 counters : 1;
+ u8 show_zero_counters : 1;
+ u8 debug : 1;
+} vnet_dev_format_args_t;
+
+format_function_t format_vnet_dev_addr;
+format_function_t format_vnet_dev_flags;
+format_function_t format_vnet_dev_hw_addr;
+format_function_t format_vnet_dev_info;
+format_function_t format_vnet_dev_interface_info;
+format_function_t format_vnet_dev_interface_name;
+format_function_t format_vnet_dev_log;
+format_function_t format_vnet_dev_port_caps;
+format_function_t format_vnet_dev_port_flags;
+format_function_t format_vnet_dev_port_info;
+format_function_t format_vnet_dev_port_rx_offloads;
+format_function_t format_vnet_dev_port_tx_offloads;
+format_function_t format_vnet_dev_rv;
+format_function_t format_vnet_dev_rx_queue_info;
+format_function_t format_vnet_dev_tx_queue_info;
+format_function_t format_vnet_dev_flow;
+unformat_function_t unformat_vnet_dev_flags;
+unformat_function_t unformat_vnet_dev_port_flags;
+
+typedef struct
+{
+ vnet_dev_rx_queue_t *first_rx_queue;
+} vnet_dev_rx_node_runtime_t;
+
+STATIC_ASSERT (sizeof (vnet_dev_rx_node_runtime_t) <=
+ VLIB_NODE_RUNTIME_DATA_SIZE,
+ "must fit into runtime data");
+
+#define foreach_vnet_dev_port_rx_next \
+ _ (ETH_INPUT, "ethernet-input") \
+ _ (DROP, "error-drop")
+
+typedef enum
+{
+#define _(n, s) VNET_DEV_ETH_RX_PORT_NEXT_##n,
+ foreach_vnet_dev_port_rx_next
+#undef _
+ VNET_DEV_ETH_RX_PORT_N_NEXTS
+} vnet_dev_eth_port_rx_next_t;
+
+extern u16 vnet_dev_default_next_index_by_port_type[];
+extern vlib_node_registration_t port_rx_eth_node;
+
+typedef vnet_interface_output_runtime_t vnet_dev_tx_node_runtime_t;
+
+STATIC_ASSERT (sizeof (vnet_dev_tx_node_runtime_t) <=
+ VLIB_NODE_RUNTIME_DATA_SIZE,
+ "must fit into runtime data");
+
+#define VNET_DEV_REGISTER_BUS(x, ...) \
+ __VA_ARGS__ vnet_dev_bus_registration_t __vnet_dev_bus_registration_##x; \
+ static void __clib_constructor __vnet_dev_bus_registration_fn_##x (void) \
+ { \
+ vnet_dev_main_t *dm = &vnet_dev_main; \
+ __vnet_dev_bus_registration_##x.next_registration = \
+ dm->bus_registrations; \
+ dm->bus_registrations = &__vnet_dev_bus_registration_##x; \
+ } \
+ __VA_ARGS__ vnet_dev_bus_registration_t __vnet_dev_bus_registration_##x
+
+#define VNET_DEV_REGISTER_DRIVER(x, ...) \
+ __VA_ARGS__ vnet_dev_driver_registration_t \
+ __vnet_dev_driver_registration_##x; \
+ static void __clib_constructor __vnet_dev_driver_registration_fn_##x (void) \
+ { \
+ vnet_dev_main_t *dm = &vnet_dev_main; \
+ __vnet_dev_driver_registration_##x.next_registration = \
+ dm->driver_registrations; \
+ dm->driver_registrations = &__vnet_dev_driver_registration_##x; \
+ } \
+ __VA_ARGS__ vnet_dev_driver_registration_t __vnet_dev_driver_registration_##x
+
+#define VNET_DEV_NODE_FN(node) \
+ uword CLIB_MARCH_SFX (node##_fn) (vlib_main_t *, vlib_node_runtime_t *, \
+ vlib_frame_t *); \
+ static vlib_node_fn_registration_t CLIB_MARCH_SFX ( \
+ node##_fn_registration) = { \
+ .function = &CLIB_MARCH_SFX (node##_fn), \
+ }; \
+ \
+ static void __clib_constructor CLIB_MARCH_SFX ( \
+ node##_fn_multiarch_register) (void) \
+ { \
+ extern vnet_dev_node_t node; \
+ vlib_node_fn_registration_t *r; \
+ r = &CLIB_MARCH_SFX (node##_fn_registration); \
+ r->march_variant = CLIB_MARCH_SFX (CLIB_MARCH_VARIANT_TYPE); \
+ r->next_registration = (node).registrations; \
+ (node).registrations = r; \
+ } \
+ uword CLIB_MARCH_SFX (node##_fn)
+
+#define foreach_vnet_dev_port(p, d) pool_foreach_pointer (p, d->ports)
+#define foreach_vnet_dev_port_rx_queue(q, p) \
+ pool_foreach_pointer (q, p->rx_queues)
+#define foreach_vnet_dev_port_tx_queue(q, p) \
+ pool_foreach_pointer (q, p->tx_queues)
+
+#include <vnet/dev/dev_funcs.h>
+
+#endif /* _VNET_DEV_H_ */
diff --git a/src/vnet/dev/dev_api.c b/src/vnet/dev/dev_api.c
new file mode 100644
index 00000000000..5e9ac502b5d
--- /dev/null
+++ b/src/vnet/dev/dev_api.c
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/api.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+
+/* define message IDs */
+#include <dev/dev.api_enum.h>
+#include <dev/dev.api_types.h>
+
+static u16 vnet_dev_api_msg_id_base;
+
+#define REPLY_MSG_ID_BASE (vnet_dev_api_msg_id_base)
+#include <vlibapi/api_helper_macros.h>
+
+#define _(b, n, d) \
+ STATIC_ASSERT ((int) VL_API_DEV_FLAG_##n == (int) VNET_DEV_F_##n, "");
+foreach_vnet_dev_flag;
+#undef _
+
+#define _(b, n, d) \
+ STATIC_ASSERT ((int) VL_API_DEV_PORT_FLAG_##n == (int) VNET_DEV_PORT_F_##n, \
+ "");
+foreach_vnet_dev_port_flag;
+#undef _
+
+static void
+vl_api_dev_attach_t_handler (vl_api_dev_attach_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_dev_attach_reply_t *rmp;
+ vnet_dev_api_attach_args_t a = {};
+ vnet_dev_rv_t rv;
+ u8 *error_string = 0;
+
+ STATIC_ASSERT (sizeof (mp->device_id) == sizeof (a.device_id), "");
+ STATIC_ASSERT (sizeof (mp->driver_name) == sizeof (a.driver_name), "");
+ STATIC_ASSERT (sizeof (mp->flags) == sizeof (a.flags), "");
+
+ a.flags.n = mp->flags;
+ strncpy (a.device_id, (char *) mp->device_id, sizeof (a.device_id));
+ strncpy (a.driver_name, (char *) mp->driver_name, sizeof (a.driver_name));
+ vec_add (a.args, mp->args.buf, mp->args.length);
+
+ rv = vnet_dev_api_attach (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ error_string = format (0, "%U", format_vnet_dev_rv, rv);
+
+ vec_free (a.args);
+
+ REPLY_MACRO3_END (VL_API_DEV_ATTACH_REPLY, vec_len (error_string), ({
+ rmp->retval = rv;
+ if (error_string)
+ {
+ rmp->dev_index = ~0;
+ vl_api_vec_to_api_string (error_string,
+ &rmp->error_string);
+ }
+ else
+ rmp->dev_index = a.dev_index;
+ }));
+
+ vec_free (a.args);
+ vec_free (error_string);
+}
+
+static void
+vl_api_dev_detach_t_handler (vl_api_dev_detach_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_dev_detach_reply_t *rmp;
+ vnet_dev_api_detach_args_t a = {};
+ vnet_dev_rv_t rv;
+ u8 *error_string = 0;
+
+ a.dev_index = mp->dev_index;
+
+ rv = vnet_dev_api_detach (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ error_string = format (0, "%U", format_vnet_dev_rv, rv);
+
+ REPLY_MACRO3_END (VL_API_DEV_DETACH_REPLY, vec_len (error_string), ({
+ rmp->retval = rv;
+ if (error_string)
+ vl_api_vec_to_api_string (error_string,
+ &rmp->error_string);
+ }));
+
+ vec_free (error_string);
+}
+
+static void
+vl_api_dev_create_port_if_t_handler (vl_api_dev_create_port_if_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_dev_create_port_if_reply_t *rmp;
+ vnet_dev_api_create_port_if_args_t a = {};
+ vnet_dev_rv_t rv;
+ u8 *error_string = 0;
+
+ STATIC_ASSERT (sizeof (mp->intf_name) == sizeof (a.intf_name), "");
+ STATIC_ASSERT (sizeof (mp->flags) == sizeof (a.flags), "");
+
+ a.flags.n = mp->flags;
+#define _(n) a.n = mp->n;
+ _ (dev_index)
+ _ (port_id)
+ _ (num_rx_queues)
+ _ (num_tx_queues)
+ _ (rx_queue_size)
+ _ (tx_queue_size)
+#undef _
+
+ strncpy (a.intf_name, (char *) mp->intf_name, sizeof (a.intf_name));
+ vec_add (a.args, mp->args.buf, mp->args.length);
+
+ rv = vnet_dev_api_create_port_if (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ error_string = format (0, "%U", format_vnet_dev_rv, rv);
+
+ vec_free (a.args);
+
+ REPLY_MACRO3_END (VL_API_DEV_CREATE_PORT_IF_REPLY, vec_len (error_string), ({
+ rmp->retval = rv;
+ if (error_string)
+ {
+ rmp->sw_if_index = ~0;
+ vl_api_vec_to_api_string (error_string,
+ &rmp->error_string);
+ }
+ else
+ rmp->sw_if_index = a.sw_if_index;
+ }));
+
+ vec_free (a.args);
+ vec_free (error_string);
+}
+
+static void
+vl_api_dev_remove_port_if_t_handler (vl_api_dev_remove_port_if_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_dev_remove_port_if_reply_t *rmp;
+ vnet_dev_api_remove_port_if_args_t a = {};
+ vnet_dev_rv_t rv;
+ u8 *error_string = 0;
+
+ a.sw_if_index = mp->sw_if_index;
+
+ rv = vnet_dev_api_remove_port_if (vm, &a);
+
+ if (rv != VNET_DEV_OK)
+ error_string = format (0, "%U", format_vnet_dev_rv, rv);
+
+ REPLY_MACRO3_END (VL_API_DEV_REMOVE_PORT_IF_REPLY, vec_len (error_string), ({
+ rmp->retval = rv;
+ if (error_string)
+ vl_api_vec_to_api_string (error_string,
+ &rmp->error_string);
+ }));
+
+ vec_free (error_string);
+}
+
+/* set tup the API message handling tables */
+
+#include <dev/dev.api.c>
+
+static clib_error_t *
+vnet_dev_api_hookup (vlib_main_t *vm)
+{
+ api_main_t *am = vlibapi_get_main ();
+
+ /* ask for a correctly-sized block of API message decode slots */
+ vnet_dev_api_msg_id_base = setup_message_id_table ();
+
+ foreach_int (i, VL_API_DEV_ATTACH, VL_API_DEV_DETACH,
+ VL_API_DEV_CREATE_PORT_IF, VL_API_DEV_REMOVE_PORT_IF)
+ vl_api_set_msg_thread_safe (am, vnet_dev_api_msg_id_base + i, 1);
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (vnet_dev_api_hookup);
diff --git a/src/vnet/dev/dev_funcs.h b/src/vnet/dev/dev_funcs.h
new file mode 100644
index 00000000000..521157abbec
--- /dev/null
+++ b/src/vnet/dev/dev_funcs.h
@@ -0,0 +1,332 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_FUNCS_H_
+#define _VNET_DEV_FUNCS_H_
+
+#include <vppinfra/clib.h>
+#include <vnet/dev/dev.h>
+
+static_always_inline void *
+vnet_dev_get_data (vnet_dev_t *dev)
+{
+ return dev->data;
+}
+
+static_always_inline vnet_dev_t *
+vnet_dev_from_data (void *p)
+{
+ return (void *) ((u8 *) p - STRUCT_OFFSET_OF (vnet_dev_t, data));
+}
+
+static_always_inline void *
+vnet_dev_get_port_data (vnet_dev_port_t *port)
+{
+ return port->data;
+}
+
+static_always_inline void *
+vnet_dev_get_rx_queue_data (vnet_dev_rx_queue_t *rxq)
+{
+ return rxq->data;
+}
+
+static_always_inline void *
+vnet_dev_get_tx_queue_data (vnet_dev_tx_queue_t *txq)
+{
+ return txq->data;
+}
+
+static_always_inline vnet_dev_t *
+vnet_dev_get_by_index (u32 index)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ return pool_elt_at_index (dm->devices, index)[0];
+}
+
+static_always_inline vnet_dev_port_t *
+vnet_dev_get_port_by_index (vnet_dev_t *dev, u32 index)
+{
+ return pool_elt_at_index (dev->ports, index)[0];
+}
+
+static_always_inline vnet_dev_port_t *
+vnet_dev_get_port_from_dev_instance (u32 dev_instance)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ if (pool_is_free_index (dm->ports_by_dev_instance, dev_instance))
+ return 0;
+ return pool_elt_at_index (dm->ports_by_dev_instance, dev_instance)[0];
+}
+
+static_always_inline vnet_dev_port_t *
+vnet_dev_get_port_from_hw_if_index (u32 hw_if_index)
+{
+ vnet_hw_interface_t *hw;
+ vnet_dev_port_t *port;
+ hw = vnet_get_hw_interface (vnet_get_main (), hw_if_index);
+ port = vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+
+ if (!port || port->intf.hw_if_index != hw_if_index)
+ return 0;
+
+ return port;
+}
+
+static_always_inline vnet_dev_t *
+vnet_dev_by_index (u32 index)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ if (pool_is_free_index (dm->devices, index))
+ return 0;
+
+ return *pool_elt_at_index (dm->devices, index);
+}
+
+static_always_inline vnet_dev_t *
+vnet_dev_by_id (char *id)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ uword *p = hash_get (dm->device_index_by_id, id);
+ if (p)
+ return *pool_elt_at_index (dm->devices, p[0]);
+ return 0;
+}
+
+static_always_inline uword
+vnet_dev_get_dma_addr (vlib_main_t *vm, vnet_dev_t *dev, void *p)
+{
+ return dev->va_dma ? pointer_to_uword (p) : vlib_physmem_get_pa (vm, p);
+}
+
+static_always_inline void *
+vnet_dev_get_bus_data (vnet_dev_t *dev)
+{
+ return (void *) dev->bus_data;
+}
+
+static_always_inline vnet_dev_bus_t *
+vnet_dev_get_bus (vnet_dev_t *dev)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ return pool_elt_at_index (dm->buses, dev->bus_index);
+}
+
+static_always_inline void
+vnet_dev_validate (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ ASSERT (dev->process_node_index == vlib_get_current_process_node_index (vm));
+ ASSERT (vm->thread_index == 0);
+}
+
+static_always_inline void
+vnet_dev_port_validate (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ ASSERT (port->dev->process_node_index ==
+ vlib_get_current_process_node_index (vm));
+ ASSERT (vm->thread_index == 0);
+}
+
+static_always_inline u32
+vnet_dev_port_get_sw_if_index (vnet_dev_port_t *port)
+{
+ return port->intf.sw_if_index;
+}
+
+static_always_inline vnet_dev_port_t *
+vnet_dev_get_port_by_id (vnet_dev_t *dev, vnet_dev_port_id_t port_id)
+{
+ foreach_vnet_dev_port (p, dev)
+ if (p->port_id == port_id)
+ return p;
+ return 0;
+}
+
+static_always_inline vnet_dev_rx_queue_t *
+vnet_dev_port_get_rx_queue_by_id (vnet_dev_port_t *port,
+ vnet_dev_queue_id_t queue_id)
+{
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->queue_id == queue_id)
+ return q;
+ return 0;
+}
+
+static_always_inline vnet_dev_tx_queue_t *
+vnet_dev_port_get_tx_queue_by_id (vnet_dev_port_t *port,
+ vnet_dev_queue_id_t queue_id)
+{
+ foreach_vnet_dev_port_tx_queue (q, port)
+ if (q->queue_id == queue_id)
+ return q;
+ return 0;
+}
+
+static_always_inline void *
+vnet_dev_alloc_with_data (u32 sz, u32 data_sz)
+{
+ void *p;
+ sz += data_sz;
+ sz = round_pow2 (sz, CLIB_CACHE_LINE_BYTES);
+ p = clib_mem_alloc_aligned (sz, CLIB_CACHE_LINE_BYTES);
+ clib_memset (p, 0, sz);
+ return p;
+}
+
+static_always_inline void
+vnet_dev_tx_queue_lock_if_needed (vnet_dev_tx_queue_t *txq)
+{
+ u8 free = 0;
+
+ if (!txq->lock_needed)
+ return;
+
+ while (!__atomic_compare_exchange_n (&txq->lock, &free, 1, 0,
+ __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
+ {
+ while (__atomic_load_n (&txq->lock, __ATOMIC_RELAXED))
+ CLIB_PAUSE ();
+ free = 0;
+ }
+}
+
+static_always_inline void
+vnet_dev_tx_queue_unlock_if_needed (vnet_dev_tx_queue_t *txq)
+{
+ if (!txq->lock_needed)
+ return;
+ __atomic_store_n (&txq->lock, 0, __ATOMIC_RELEASE);
+}
+
+static_always_inline u8
+vnet_dev_get_rx_queue_buffer_pool_index (vnet_dev_rx_queue_t *rxq)
+{
+ return rxq->buffer_template.buffer_pool_index;
+}
+
+static_always_inline u32
+vnet_dev_get_rx_queue_buffer_data_size (vlib_main_t *vm,
+ vnet_dev_rx_queue_t *rxq)
+{
+ u8 bpi = vnet_dev_get_rx_queue_buffer_pool_index (rxq);
+ return vlib_get_buffer_pool (vm, bpi)->data_size;
+}
+
+static_always_inline void
+vnet_dev_rx_queue_rt_request (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq,
+ vnet_dev_rx_queue_rt_req_t req)
+{
+ __atomic_fetch_or (&rxq->runtime_request.as_number, req.as_number,
+ __ATOMIC_RELEASE);
+}
+
+static_always_inline vnet_dev_rx_node_runtime_t *
+vnet_dev_get_rx_node_runtime (vlib_node_runtime_t *node)
+{
+ return (void *) node->runtime_data;
+}
+
+static_always_inline vnet_dev_tx_node_runtime_t *
+vnet_dev_get_tx_node_runtime (vlib_node_runtime_t *node)
+{
+ return (void *) node->runtime_data;
+}
+
+static_always_inline vnet_dev_rx_queue_t *
+foreach_vnet_dev_rx_queue_runtime_helper (vlib_node_runtime_t *node,
+ vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_port_t *port;
+ vnet_dev_rx_queue_rt_req_t req;
+
+ if (rxq == 0)
+ rxq = vnet_dev_get_rx_node_runtime (node)->first_rx_queue;
+ else
+ next:
+ rxq = rxq->next_on_thread;
+
+ if (PREDICT_FALSE (rxq == 0))
+ return 0;
+
+ if (PREDICT_TRUE (rxq->runtime_request.as_number == 0))
+ return rxq;
+
+ req.as_number =
+ __atomic_exchange_n (&rxq->runtime_request.as_number, 0, __ATOMIC_ACQUIRE);
+
+ port = rxq->port;
+ if (req.update_next_index)
+ rxq->next_index = port->intf.rx_next_index;
+
+ if (req.update_feature_arc)
+ {
+ vlib_buffer_template_t *bt = &rxq->buffer_template;
+ bt->current_config_index = port->intf.current_config_index;
+ vnet_buffer (bt)->feature_arc_index = port->intf.feature_arc_index;
+ }
+
+ if (req.suspend_on)
+ {
+ rxq->suspended = 1;
+ goto next;
+ }
+
+ if (req.suspend_off)
+ rxq->suspended = 0;
+
+ return rxq;
+}
+
+#define foreach_vnet_dev_rx_queue_runtime(q, node) \
+ for (vnet_dev_rx_queue_t * (q) = \
+ foreach_vnet_dev_rx_queue_runtime_helper (node, 0); \
+ q; (q) = foreach_vnet_dev_rx_queue_runtime_helper (node, q))
+
+static_always_inline void *
+vnet_dev_get_rt_temp_space (vlib_main_t *vm)
+{
+ return vnet_dev_main.runtime_temp_spaces +
+ ((uword) vm->thread_index
+ << vnet_dev_main.log2_runtime_temp_space_sz);
+}
+
+static_always_inline void
+vnet_dev_set_hw_addr_eth_mac (vnet_dev_hw_addr_t *addr, const u8 *eth_mac_addr)
+{
+ vnet_dev_hw_addr_t ha = {};
+ clib_memcpy_fast (&ha.eth_mac, eth_mac_addr, sizeof (ha.eth_mac));
+ *addr = ha;
+}
+
+static_always_inline vnet_dev_arg_t *
+vnet_dev_get_port_arg_by_id (vnet_dev_port_t *port, u32 id)
+{
+ foreach_vnet_dev_port_args (a, port)
+ if (a->id == id)
+ return a;
+ return 0;
+}
+
+static_always_inline int
+vnet_dev_arg_get_bool (vnet_dev_arg_t *arg)
+{
+ ASSERT (arg->type == VNET_DEV_ARG_TYPE_BOOL);
+ return arg->val_set ? arg->val.boolean : arg->default_val.boolean;
+}
+
+static_always_inline u32
+vnet_dev_arg_get_uint32 (vnet_dev_arg_t *arg)
+{
+ ASSERT (arg->type == VNET_DEV_ARG_TYPE_UINT32);
+ return arg->val_set ? arg->val.uint32 : arg->default_val.uint32;
+}
+
+static_always_inline u8 *
+vnet_dev_arg_get_string (vnet_dev_arg_t *arg)
+{
+ ASSERT (arg->type == VNET_DEV_ARG_TYPE_STRING);
+ return arg->val_set ? arg->val.string : arg->default_val.string;
+}
+
+#endif /* _VNET_DEV_FUNCS_H_ */
diff --git a/src/vnet/dev/error.c b/src/vnet/dev/error.c
new file mode 100644
index 00000000000..4e057010af0
--- /dev/null
+++ b/src/vnet/dev/error.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/flow/flow.h>
+
+clib_error_t *
+vnet_dev_port_err (vlib_main_t *vm, vnet_dev_port_t *port, vnet_dev_rv_t rv,
+ char *fmt, ...)
+{
+ clib_error_t *err;
+ va_list va;
+ u8 *s;
+
+ if (rv == VNET_DEV_OK)
+ return 0;
+
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+
+ err = clib_error_return (0, "%s port %u: %U (%v)", port->dev->device_id,
+ port->port_id, format_vnet_dev_rv, rv, s);
+ vec_free (s);
+ return err;
+}
+
+int
+vnet_dev_flow_err (vlib_main_t *vm, vnet_dev_rv_t rv)
+{
+ if (rv == VNET_DEV_OK)
+ return 0;
+
+ switch (rv)
+ {
+ /* clang-format off */
+#define _(n, e, s) \
+ case VNET_DEV_ERR_##e: \
+ return VNET_FLOW_ERROR_##e;
+ foreach_flow_error;
+#undef _
+ /* clang-format on */
+ default:
+ ASSERT (0);
+ }
+
+ ASSERT (0);
+
+ return 0;
+}
diff --git a/src/vnet/dev/errors.h b/src/vnet/dev/errors.h
new file mode 100644
index 00000000000..430a6aef282
--- /dev/null
+++ b/src/vnet/dev/errors.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_ERRORS_H_
+#define _VNET_DEV_ERRORS_H_
+
+#define foreach_vnet_dev_rv_type \
+ _ (ALREADY_EXISTS, "already exists") \
+ _ (ALREADY_IN_USE, "already in use") \
+ _ (BUFFER_ALLOC_FAIL, "packet buffer allocation failure") \
+ _ (BUG, "bug") \
+ _ (BUS, "bus error") \
+ _ (DEVICE_NO_REPLY, "no reply from device") \
+ _ (DMA_MEM_ALLOC_FAIL, "DMA memory allocation error") \
+ _ (DRIVER_NOT_AVAILABLE, "driver not available") \
+ _ (INVALID_ARG, "invalid argument") \
+ _ (INVALID_BUS, "invalid bus") \
+ _ (INVALID_DATA, "invalid data") \
+ _ (INVALID_DEVICE_ID, "invalid device id") \
+ _ (INVALID_NUM_RX_QUEUES, "invalid number of rx queues") \
+ _ (INVALID_NUM_TX_QUEUES, "invalid number of tx queues") \
+ _ (INVALID_PORT_ID, "invalid port id") \
+ _ (INVALID_RX_QUEUE_SIZE, "invalid rx queue size") \
+ _ (INVALID_TX_QUEUE_SIZE, "invalid tx queue size") \
+ _ (INVALID_VALUE, "invalid value") \
+ _ (INTERNAL, "internal error") \
+ _ (NOT_FOUND, "not found") \
+ _ (NOT_READY, "not ready") \
+ _ (NOT_SUPPORTED, "not supported") \
+ _ (NO_CHANGE, "no change") \
+ _ (NO_AVAIL_QUEUES, "no queues available") \
+ _ (NO_SUCH_ENTRY, "no such enty") \
+ _ (PORT_STARTED, "port started") \
+ _ (PROCESS_REPLY, "dev process reply error") \
+ _ (RESOURCE_NOT_AVAILABLE, "resource not available") \
+ _ (TIMEOUT, "timeout") \
+ _ (UNKNOWN_DEVICE, "unknown device") \
+ _ (UNKNOWN_INTERFACE, "unknown interface") \
+ _ (UNSUPPORTED_CONFIG, "unsupported config") \
+ _ (UNSUPPORTED_DEVICE, "unsupported device") \
+ _ (UNSUPPORTED_DEVICE_VER, "unsupported device version") \
+ _ (ALREADY_DONE, "already done") \
+ _ (NO_SUCH_INTERFACE, "no such interface")
+
+#endif /* _VNET_DEV_ERRORS_H_ */
diff --git a/src/vnet/dev/format.c b/src/vnet/dev/format.c
new file mode 100644
index 00000000000..ed83a0eba95
--- /dev/null
+++ b/src/vnet/dev/format.c
@@ -0,0 +1,507 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/ethernet/ethernet.h>
+
+u8 *
+format_vnet_dev_rv (u8 *s, va_list *args)
+{
+ vnet_dev_rv_t rv = va_arg (*args, vnet_dev_rv_t);
+ u32 index = -rv;
+
+ char *strings[] = { [0] = "OK",
+#define _(n, d) [-VNET_DEV_ERR_##n] = d,
+ foreach_vnet_dev_rv_type
+#undef _
+ };
+
+ if (index >= ARRAY_LEN (strings))
+ return format (s, "unknown return value (%d)", rv);
+ return format (s, "%s", strings[index]);
+}
+
+u8 *
+format_vnet_dev_addr (u8 *s, va_list *args)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ vnet_dev_bus_t *bus;
+
+ if (dev == 0)
+ return 0;
+
+ bus = pool_elt_at_index (dm->buses, dev->bus_index);
+ s = format (s, "%U", bus->ops.format_device_addr, dev);
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_interface_name (u8 *s, va_list *args)
+{
+ u32 i = va_arg (*args, u32);
+ vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (i);
+
+ return format (s, "%s", port->intf.name);
+}
+
+u8 *
+format_vnet_dev_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t *a = va_arg (*args, vnet_dev_format_args_t *);
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ vnet_dev_driver_t *dr = pool_elt_at_index (dm->drivers, dev->driver_index);
+ vnet_dev_bus_t *bus = pool_elt_at_index (dm->buses, dev->bus_index);
+
+ u32 indent = format_get_indent (s);
+ s = format (s, "Driver is '%s', bus is '%s'", dr->registration->name,
+ bus->registration->name);
+
+ if (dev->description)
+ s = format (s, ", description is '%v'", dev->description);
+
+ if (bus->ops.format_device_info)
+ s = format (s, "\n%U%U", format_white_space, indent,
+ bus->ops.format_device_info, a, dev);
+
+ s = format (s, "\n%UAssigned process node is '%U'", format_white_space,
+ indent, format_vlib_node_name, vm, dev->process_node_index);
+ if (dev->args)
+ s = format (s, "\n%UDevice Specific Arguments:\n%U%U", format_white_space,
+ indent, format_white_space, indent + 2, format_vnet_dev_args,
+ dev->args);
+ if (dev->ops.format_info)
+ s =
+ format (s, "\n%UDevice Specific Info:\n%U%U", format_white_space, indent,
+ format_white_space, indent + 2, dev->ops.format_info, a, dev);
+ return s;
+}
+
+u8 *
+format_vnet_dev_hw_addr (u8 *s, va_list *args)
+{
+ vnet_dev_hw_addr_t *addr = va_arg (*args, vnet_dev_hw_addr_t *);
+ return format (s, "%U", format_ethernet_address, addr->eth_mac);
+}
+
+u8 *
+format_vnet_dev_port_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t *a = va_arg (*args, vnet_dev_format_args_t *);
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_dev_port_t *port = va_arg (*args, vnet_dev_port_t *);
+
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "Hardware Address is %U", format_vnet_dev_hw_addr,
+ &port->attr.hw_addr);
+ s = format (s, ", %u RX queues (max %u), %u TX queues (max %u)",
+ pool_elts (port->rx_queues), port->attr.max_rx_queues,
+ pool_elts (port->tx_queues), port->attr.max_tx_queues);
+ if (pool_elts (port->secondary_hw_addr))
+ {
+ u32 i = 0;
+ vnet_dev_hw_addr_t *a;
+ s = format (s, "\n%USecondary Hardware Address%s:", format_white_space,
+ indent,
+ pool_elts (port->secondary_hw_addr) > 1 ? "es are" : " is");
+ pool_foreach (a, port->secondary_hw_addr)
+ {
+ if (i++ % 6 == 0)
+ s = format (s, "\n%U", format_white_space, indent + 1);
+ s = format (s, " %U", format_vnet_dev_hw_addr, a);
+ }
+ }
+ s = format (s, "\n%UMax RX frame size is %u (max supported %u)",
+ format_white_space, indent, port->max_rx_frame_size,
+ port->attr.max_supported_rx_frame_size);
+ s = format (s, "\n%UCaps: %U", format_white_space, indent,
+ format_vnet_dev_port_caps, &port->attr.caps);
+ s = format (s, "\n%URX Offloads: %U", format_white_space, indent,
+ format_vnet_dev_port_rx_offloads, &port->attr.rx_offloads);
+ s = format (s, "\n%UTX Offloads: %U", format_white_space, indent,
+ format_vnet_dev_port_tx_offloads, &port->attr.tx_offloads);
+ if (port->port_ops.format_status)
+ s = format (s, "\n%UDevice Specific Port Status:\n%U%U",
+ format_white_space, indent, format_white_space, indent + 2,
+ port->port_ops.format_status, a, port);
+ if (port->args)
+ s = format (s, "\n%UDevice Specific Port Arguments:\n%U%U",
+ format_white_space, indent, format_white_space, indent + 2,
+ format_vnet_dev_args, port->args);
+
+ s = format (s, "\n%UInterface ", format_white_space, indent);
+ if (port->interface_created)
+ {
+ s = format (s, "assigned, interface name is '%U', RX node is '%U'",
+ format_vnet_sw_if_index_name, vnm, port->intf.sw_if_index,
+ format_vlib_node_name, vm, port->intf.rx_node_index);
+ }
+ else
+ s = format (s, "not assigned");
+ return s;
+}
+
+u8 *
+format_vnet_dev_rx_queue_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t __clib_unused *a =
+ va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_rx_queue_t *rxq = va_arg (*args, vnet_dev_rx_queue_t *);
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "Size is %u, buffer pool index is %u", rxq->size,
+ vnet_dev_get_rx_queue_buffer_pool_index (rxq));
+ s = format (s, "\n%UPolling thread is %u, %sabled, %sstarted, %s mode",
+ format_white_space, indent, rxq->rx_thread_index,
+ rxq->enabled ? "en" : "dis", rxq->started ? "" : "not-",
+ rxq->interrupt_mode ? "interrupt" : "polling");
+ if (rxq->port->rx_queue_ops.format_info)
+ s = format (s, "\n%U%U", format_white_space, indent,
+ rxq->port->rx_queue_ops.format_info, a, rxq);
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_tx_queue_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t __clib_unused *a =
+ va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_tx_queue_t *txq = va_arg (*args, vnet_dev_tx_queue_t *);
+ u32 indent = format_get_indent (s);
+ u32 n;
+
+ s = format (s, "Size is %u", txq->size);
+ s = format (s, "\n%U", format_white_space, indent);
+ n = clib_bitmap_count_set_bits (txq->assigned_threads);
+ if (n == 0)
+ s = format (s, "Not used by any thread");
+ else
+ s = format (s, "Used by thread%s %U", n > 1 ? "s" : "", format_bitmap_list,
+ txq->assigned_threads);
+ if (txq->port->tx_queue_ops.format_info)
+ s = format (s, "\n%U%U", format_white_space, indent,
+ txq->port->tx_queue_ops.format_info, a, txq);
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_interface_info (u8 *s, va_list *args)
+{
+ u32 i = va_arg (*args, u32);
+ vnet_dev_format_args_t fa = {}, *a = &fa;
+ vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (i);
+ vnet_dev_t *dev = port->dev;
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "Device:");
+ s = format (s, "\n%U%U", format_white_space, indent + 2,
+ format_vnet_dev_info, a, dev);
+
+ s = format (s, "\n%UPort %u:", format_white_space, indent, port->port_id);
+ s = format (s, "\n%U%U", format_white_space, indent + 2,
+ format_vnet_dev_port_info, a, port);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ s = format (s, "\n%URX queue %u:", format_white_space, indent + 2,
+ q->queue_id);
+ s = format (s, "\n%U%U", format_white_space, indent + 4,
+ format_vnet_dev_rx_queue_info, a, q);
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ s = format (s, "\n%UTX queue %u:", format_white_space, indent + 2,
+ q->queue_id);
+ s = format (s, "\n%U%U", format_white_space, indent + 4,
+ format_vnet_dev_tx_queue_info, a, q);
+ }
+ return s;
+}
+
+static u64
+unformat_flags (unformat_input_t *input, char *names[], u64 val[], u32 n_flags)
+{
+ u64 rv = 0;
+ uword c = 0;
+ u8 *s = 0;
+
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ switch (c)
+ {
+ case 'a' ... 'z':
+ c -= 'a' - 'A';
+ case '0' ... '9':
+ case 'A' ... 'Z':
+ vec_add1 (s, c);
+ break;
+ case '-':
+ vec_add1 (s, '_');
+ break;
+ case ',':
+ vec_add1 (s, 0);
+ break;
+ default:
+ goto end_of_string;
+ }
+ }
+end_of_string:
+
+ if (s == 0)
+ return 0;
+
+ vec_add1 (s, 0);
+
+ for (u8 *p = s, *end = vec_end (s); p < end; p += strlen ((char *) p) + 1)
+ {
+ for (c = 0; c < n_flags; c++)
+ if (strcmp (names[c], (char *) p) == 0)
+ {
+ rv |= val[c];
+ break;
+ }
+ if (c == n_flags)
+ goto done;
+ }
+
+done:
+ vec_free (s);
+ return rv;
+}
+
+uword
+unformat_vnet_dev_flags (unformat_input_t *input, va_list *args)
+{
+ vnet_dev_flags_t *fp = va_arg (*args, vnet_dev_flags_t *);
+ u64 val;
+
+ char *names[] = {
+#define _(b, n, d) #n,
+ foreach_vnet_dev_flag
+#undef _
+ };
+ u64 vals[] = {
+#define _(b, n, d) 1ull << (b)
+ foreach_vnet_dev_flag
+#undef _
+ };
+
+ val = unformat_flags (input, names, vals, ARRAY_LEN (names));
+
+ if (!val)
+ return 0;
+
+ fp->n = val;
+ return 1;
+}
+
+uword
+unformat_vnet_dev_port_flags (unformat_input_t *input, va_list *args)
+{
+ vnet_dev_port_flags_t *fp = va_arg (*args, vnet_dev_port_flags_t *);
+ u64 val;
+
+ char *flag_names[] = {
+#define _(b, n, d) #n,
+ foreach_vnet_dev_port_flag
+#undef _
+ };
+ u64 flag_values[] = {
+#define _(b, n, d) 1ull << (b)
+ foreach_vnet_dev_port_flag
+#undef _
+ };
+
+ val =
+ unformat_flags (input, flag_names, flag_values, ARRAY_LEN (flag_names));
+
+ if (!val)
+ return 0;
+
+ fp->n = val;
+ return 1;
+}
+
+static u8 *
+format_flags (u8 *s, u64 val, char *flag_names[], u64 flag_values[],
+ u32 n_flags)
+{
+ u32 n = 0;
+ for (int i = 0; i < n_flags; i++)
+ {
+ if ((val & flag_values[i]) == 0)
+ continue;
+
+ if (n++)
+ vec_add1 (s, ' ');
+
+ for (char *c = flag_names[i]; c[0] != 0; c++)
+ {
+ switch (c[0])
+ {
+ case 'A' ... 'Z':
+ vec_add1 (s, c[0] + 'a' - 'A');
+ break;
+ case '_':
+ vec_add1 (s, '-');
+ break;
+ default:
+ vec_add1 (s, c[0]);
+ }
+ }
+ }
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_flags (u8 *s, va_list *args)
+{
+ vnet_dev_flags_t *fp = va_arg (*args, vnet_dev_flags_t *);
+ char *flag_names[] = {
+#define _(b, n, d) #n,
+ foreach_vnet_dev_flag
+#undef _
+ };
+ u64 flag_values[] = {
+#define _(b, n, d) 1ull << (b)
+ foreach_vnet_dev_flag
+#undef _
+ };
+
+ return format_flags (s, fp->n, flag_names, flag_values,
+ ARRAY_LEN (flag_names));
+}
+
+u8 *
+format_vnet_dev_port_flags (u8 *s, va_list *args)
+{
+ vnet_dev_port_flags_t *fp = va_arg (*args, vnet_dev_port_flags_t *);
+ char *flag_names[] = {
+#define _(b, n, d) #n,
+ foreach_vnet_dev_port_flag
+#undef _
+ };
+ u64 flag_values[] = {
+#define _(b, n, d) 1ull << (b)
+ foreach_vnet_dev_port_flag
+#undef _
+ };
+
+ return format_flags (s, fp->n, flag_names, flag_values,
+ ARRAY_LEN (flag_names));
+}
+
+u8 *
+format_vnet_dev_log (u8 *s, va_list *args)
+{
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ char *func = va_arg (*args, char *);
+
+ if (dev)
+ s = format (s, "%U", format_vnet_dev_addr, dev);
+ if (dev && func)
+ vec_add1 (s, ' ');
+ if (func)
+ s = format (s, "%s", func);
+ vec_add1 (s, ':');
+ vec_add1 (s, ' ');
+ return s;
+}
+
+u8 *
+format_vnet_dev_port_caps (u8 *s, va_list *args)
+{
+ vnet_dev_port_caps_t *c = va_arg (*args, vnet_dev_port_caps_t *);
+ u32 line = 0;
+
+ if (c->as_number == 0)
+ return s;
+
+#define _(n) \
+ if (c->n) \
+ { \
+ if (line++) \
+ vec_add1 (s, ' '); \
+ for (char *str = #n; *str; str++) \
+ vec_add1 (s, *str == '_' ? '-' : *str); \
+ }
+ foreach_vnet_dev_port_caps;
+#undef _
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_port_rx_offloads (u8 *s, va_list *args)
+{
+ vnet_dev_port_rx_offloads_t *c =
+ va_arg (*args, vnet_dev_port_rx_offloads_t *);
+ u32 line = 0;
+
+ if (c->as_number == 0)
+ return s;
+
+#define _(n) \
+ if (c->n) \
+ { \
+ if (line++) \
+ vec_add1 (s, ' '); \
+ for (char *str = #n; *str; str++) \
+ vec_add1 (s, *str == '_' ? '-' : *str); \
+ }
+ foreach_vnet_dev_port_rx_offloads;
+#undef _
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_port_tx_offloads (u8 *s, va_list *args)
+{
+ vnet_dev_port_tx_offloads_t *c =
+ va_arg (*args, vnet_dev_port_tx_offloads_t *);
+ u32 line = 0;
+
+ if (c->as_number == 0)
+ return s;
+
+#define _(n) \
+ if (c->n) \
+ { \
+ if (line++) \
+ vec_add1 (s, ' '); \
+ for (char *str = #n; *str; str++) \
+ vec_add1 (s, *str == '_' ? '-' : *str); \
+ }
+ foreach_vnet_dev_port_tx_offloads;
+#undef _
+
+ return s;
+}
+
+u8 *
+format_vnet_dev_flow (u8 *s, va_list *args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ u32 flow_index = va_arg (*args, u32);
+ uword private_data = va_arg (*args, uword);
+ vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (dev_instance);
+
+ if (port->port_ops.format_flow)
+ s = format (s, "%U", port->port_ops.format_flow, port, flow_index,
+ private_data);
+
+ return s;
+}
diff --git a/src/vnet/dev/handlers.c b/src/vnet/dev/handlers.c
new file mode 100644
index 00000000000..2a55affe3e3
--- /dev/null
+++ b/src/vnet/dev/handlers.c
@@ -0,0 +1,256 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+#include <vnet/flow/flow.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "handler",
+};
+
+clib_error_t *
+vnet_dev_port_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hw,
+ u32 frame_size)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE,
+ .max_rx_frame_size = frame_size,
+ };
+
+ log_debug (p->dev, "size %u", frame_size);
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv == VNET_DEV_ERR_NO_CHANGE)
+ return 0;
+
+ if (rv != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv,
+ "new max frame size is not valid for port");
+
+ if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv,
+ "device failed to change max frame size");
+
+ return 0;
+}
+
+u32
+vnet_dev_port_eth_flag_change (vnet_main_t *vnm, vnet_hw_interface_t *hw,
+ u32 flags)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = VNET_DEV_PORT_CFG_PROMISC_MODE,
+ };
+
+ switch (flags)
+ {
+ case ETHERNET_INTERFACE_FLAG_DEFAULT_L3:
+ log_debug (p->dev, "promisc off");
+ break;
+ case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL:
+ log_debug (p->dev, "promisc on");
+ req.promisc = 1;
+ break;
+ default:
+ return ~0;
+ }
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv == VNET_DEV_ERR_NO_CHANGE)
+ return 0;
+
+ if (rv != VNET_DEV_OK)
+ return ~0;
+
+ rv = vnet_dev_process_port_cfg_change_req (vm, p, &req);
+ if (rv == VNET_DEV_OK || rv == VNET_DEV_ERR_NO_CHANGE)
+ return 0;
+ return ~0;
+}
+
+clib_error_t *
+vnet_dev_port_mac_change (vnet_hw_interface_t *hi, const u8 *old,
+ const u8 *new)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR,
+ };
+
+ vnet_dev_set_hw_addr_eth_mac (&req.addr, new);
+
+ log_debug (p->dev, "new mac %U", format_vnet_dev_hw_addr, &req.addr);
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv == VNET_DEV_ERR_NO_CHANGE)
+ return 0;
+
+ if (rv != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv, "hw address is not valid for port");
+
+ if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv, "device failed to change hw address");
+
+ return 0;
+}
+
+clib_error_t *
+vnet_dev_add_del_mac_address (vnet_hw_interface_t *hi, const u8 *address,
+ u8 is_add)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = is_add ? VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR :
+ VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR,
+ };
+
+ vnet_dev_set_hw_addr_eth_mac (&req.addr, address);
+
+ log_debug (p->dev, "received (addr %U is_add %u", format_vnet_dev_hw_addr,
+ &req.addr, is_add);
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv != VNET_DEV_OK)
+ return vnet_dev_port_err (vm, p, rv,
+ "provided secondary hw addresses cannot "
+ "be added/removed");
+
+ if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK)
+ return vnet_dev_port_err (
+ vm, p, rv, "device failed to add/remove secondary hw address");
+
+ return 0;
+}
+
+int
+vnet_dev_flow_ops_fn (vnet_main_t *vnm, vnet_flow_dev_op_t op,
+ u32 dev_instance, u32 flow_index, uword *private_data)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (dev_instance);
+ vnet_dev_port_cfg_change_req_t req;
+ vnet_dev_rv_t rv;
+
+ switch (op)
+ {
+ case VNET_FLOW_DEV_OP_ADD_FLOW:
+ req.type = VNET_DEV_PORT_CFG_ADD_RX_FLOW;
+ break;
+ case VNET_FLOW_DEV_OP_DEL_FLOW:
+ req.type = VNET_DEV_PORT_CFG_DEL_RX_FLOW;
+ break;
+ case VNET_FLOW_DEV_OP_GET_COUNTER:
+ req.type = VNET_DEV_PORT_CFG_GET_RX_FLOW_COUNTER;
+ break;
+ case VNET_FLOW_DEV_OP_RESET_COUNTER:
+ req.type = VNET_DEV_PORT_CFG_RESET_RX_FLOW_COUNTER;
+ break;
+ default:
+ log_warn (p->dev, "unsupported request for flow_ops received");
+ return VNET_FLOW_ERROR_NOT_SUPPORTED;
+ }
+
+ req.flow_index = flow_index;
+ req.private_data = private_data;
+
+ rv = vnet_dev_port_cfg_change_req_validate (vm, p, &req);
+ if (rv != VNET_DEV_OK)
+ {
+ log_err (p->dev, "validation failed for flow_ops");
+ return VNET_FLOW_ERROR_NOT_SUPPORTED;
+ }
+
+ if ((rv = vnet_dev_process_port_cfg_change_req (vm, p, &req)) != VNET_DEV_OK)
+ {
+ log_err (p->dev, "request for flow_ops failed");
+ return vnet_dev_flow_err (vm, rv);
+ }
+
+ return 0;
+}
+
+clib_error_t *
+vnet_dev_interface_set_rss_queues (vnet_main_t *vnm, vnet_hw_interface_t *hi,
+ clib_bitmap_t *bitmap)
+{
+ vnet_dev_port_t *p = vnet_dev_get_port_from_dev_instance (hi->dev_instance);
+ log_warn (p->dev, "unsupported request for flow_ops received");
+ return vnet_error (VNET_ERR_UNSUPPORTED, "not implemented");
+}
+
+void
+vnet_dev_clear_hw_interface_counters (u32 instance)
+{
+ vnet_dev_port_t *port = vnet_dev_get_port_from_dev_instance (instance);
+ vlib_main_t *vm = vlib_get_main ();
+
+ vnet_dev_process_call_port_op_no_rv (vm, port, vnet_dev_port_clear_counters);
+}
+
+void
+vnet_dev_set_interface_next_node (vnet_main_t *vnm, u32 hw_if_index,
+ u32 node_index)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_dev_port_t *port =
+ vnet_dev_get_port_from_dev_instance (hw->dev_instance);
+ int runtime_update = 0;
+
+ if (node_index == ~0)
+ {
+ port->intf.redirect_to_node_next_index = 0;
+ if (port->intf.feature_arc == 0)
+ {
+ port->intf.rx_next_index =
+ vnet_dev_default_next_index_by_port_type[port->attr.type];
+ runtime_update = 1;
+ }
+ port->intf.redirect_to_node = 0;
+ }
+ else
+ {
+ u16 next_index = vlib_node_add_next (vlib_get_main (),
+ port_rx_eth_node.index, node_index);
+ port->intf.redirect_to_node_next_index = next_index;
+ if (port->intf.feature_arc == 0)
+ {
+ port->intf.rx_next_index = next_index;
+ runtime_update = 1;
+ }
+ port->intf.redirect_to_node = 1;
+ }
+ port->intf.rx_next_index =
+ node_index == ~0 ?
+ vnet_dev_default_next_index_by_port_type[port->attr.type] :
+ node_index;
+
+ if (runtime_update)
+ {
+ foreach_vnet_dev_port_rx_queue (rxq, port)
+ vnet_dev_rx_queue_rt_request (
+ vm, rxq, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 });
+ log_debug (port->dev, "runtime update requested due to chgange in "
+ "reditect-to-next configuration");
+ }
+}
diff --git a/src/vnet/dev/log.h b/src/vnet/dev/log.h
new file mode 100644
index 00000000000..5ca7b6620e9
--- /dev/null
+++ b/src/vnet/dev/log.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_LOG_H_
+#define _VNET_DEV_LOG_H_
+
+#define log_debug(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_DEBUG, dev_log.class, "%U" f, format_vnet_dev_log, \
+ dev, clib_string_skip_prefix (__func__, "vnet_dev_"), \
+ ##__VA_ARGS__)
+#define log_notice(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_NOTICE, dev_log.class, "%U" f, \
+ format_vnet_dev_log, dev, 0, ##__VA_ARGS__)
+#define log_warn(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_WARNING, dev_log.class, "%U" f, \
+ format_vnet_dev_log, dev, 0, ##__VA_ARGS__)
+#define log_err(dev, f, ...) \
+ vlib_log (VLIB_LOG_LEVEL_ERR, dev_log.class, "%U" f, format_vnet_dev_log, \
+ dev, 0, ##__VA_ARGS__)
+
+#endif /* _VNET_DEV_LOG_H_ */
diff --git a/src/vnet/dev/mgmt.h b/src/vnet/dev/mgmt.h
new file mode 100644
index 00000000000..f13f4075255
--- /dev/null
+++ b/src/vnet/dev/mgmt.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_MGMT_H_
+#define _VNET_DEV_MGMT_H_
+
+#include <vppinfra/clib.h>
+
+#endif /* _VNET_DEV_MGMT_H_ */
diff --git a/src/vnet/dev/pci.c b/src/vnet/dev/pci.c
new file mode 100644
index 00000000000..3cc0cba5003
--- /dev/null
+++ b/src/vnet/dev/pci.c
@@ -0,0 +1,458 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/pci.h>
+#include <vnet/dev/log.h>
+#include <vlib/unix/unix.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "pci",
+};
+
+static int
+vnet_dev_bus_pci_device_id_to_pci_addr (vlib_pci_addr_t *addr, char *str)
+{
+ unformat_input_t input;
+ uword rv;
+ unformat_init_string (&input, str, strlen (str));
+ rv = unformat (&input, "pci" VNET_DEV_DEVICE_ID_PREFIX_DELIMITER "%U",
+ unformat_vlib_pci_addr, addr);
+ unformat_free (&input);
+ return rv;
+}
+
+static void *
+vnet_dev_bus_pci_get_device_info (vlib_main_t *vm, char *device_id)
+{
+ vnet_dev_bus_pci_device_info_t *info;
+ vlib_pci_addr_t addr = {};
+ clib_error_t *err = 0;
+ vlib_pci_device_info_t *di = 0;
+
+ vlib_log_debug (dev_log.class, "device %s", device_id);
+
+ if (vnet_dev_bus_pci_device_id_to_pci_addr (&addr, device_id) == 0)
+ return 0;
+
+ di = vlib_pci_get_device_info (vm, &addr, &err);
+ if (err)
+ {
+ vlib_log_err (dev_log.class, "get_device_info: %U", format_clib_error,
+ err);
+ clib_error_free (err);
+ return 0;
+ }
+
+ info = clib_mem_alloc (sizeof (vnet_dev_bus_pci_device_info_t));
+ info->addr = addr;
+ info->vendor_id = di->vendor_id;
+ info->device_id = di->device_id;
+ info->revision = di->revision;
+
+ vlib_pci_free_device_info (di);
+ return info;
+}
+
+static void
+vnet_dev_bus_pci_free_device_info (vlib_main_t *vm, void *dev_info)
+{
+ clib_mem_free (dev_info);
+}
+
+static vnet_dev_rv_t
+vnet_dev_bus_pci_open (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ clib_error_t *err = 0;
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+ if (vnet_dev_bus_pci_device_id_to_pci_addr (&pdd->addr, dev->device_id) == 0)
+ return VNET_DEV_ERR_INVALID_DEVICE_ID;
+
+ if ((err = vlib_pci_device_open (vm, &pdd->addr, 0, &pdd->handle)))
+ {
+ log_err (dev, "device_open: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ dev->numa_node = vlib_pci_get_numa_node (vm, pdd->handle);
+
+ if (vlib_pci_supports_virtual_addr_dma (vm, pdd->handle))
+ {
+ dev->va_dma = 1;
+ log_debug (dev, "device supports VA DMA");
+ }
+
+ vlib_pci_set_private_data (vm, pdd->handle, (uword) dev);
+
+ pdd->n_msix_int = vlib_pci_get_num_msix_interrupts (vm, pdd->handle);
+ if (pdd->n_msix_int)
+ {
+ u32 sz = sizeof (pdd->msix_handlers[0]) * pdd->n_msix_int;
+ sz = round_pow2 (sz, CLIB_CACHE_LINE_BYTES);
+ pdd->msix_handlers = clib_mem_alloc_aligned (sz, CLIB_CACHE_LINE_BYTES);
+ clib_memset (pdd->msix_handlers, 0, sz);
+ }
+
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_bus_pci_close (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+ if (pdd->intx_handler)
+ vnet_dev_pci_intx_remove_handler (vm, dev);
+
+ if (pdd->msix_handlers)
+ {
+ for (u16 i = 0; i < pdd->n_msix_int; i++)
+ if (pdd->msix_handlers[i])
+ vnet_dev_pci_msix_remove_handler (vm, dev, i, 1);
+ clib_mem_free (pdd->msix_handlers);
+ pdd->msix_handlers = 0;
+ }
+
+ if (pdd->pci_handle_valid)
+ vlib_pci_device_close (vm, pdd->handle);
+}
+
+static vnet_dev_rv_t
+vnet_dev_bus_pci_dma_mem_alloc (vlib_main_t *vm, vnet_dev_t *dev, u32 size,
+ u32 align, void **pp)
+{
+ clib_error_t *err;
+ void *p;
+
+ align = align ? align : CLIB_CACHE_LINE_BYTES;
+ size = round_pow2 (size, align);
+
+ p = vlib_physmem_alloc_aligned_on_numa (vm, size, align, dev->numa_node);
+
+ if (p == 0)
+ {
+ err = vlib_physmem_last_error (vm);
+ log_err (dev, "dev_dma_mem_alloc: physmem_alloc_aligned error %U",
+ format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_DMA_MEM_ALLOC_FAIL;
+ }
+
+ if ((err = vlib_pci_map_dma (vm, vnet_dev_get_pci_handle (dev), p)))
+ {
+ log_err (dev, "dev_dma_mem_alloc: pci_map_dma: %U", format_clib_error,
+ err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_DMA_MEM_ALLOC_FAIL;
+ }
+
+ clib_memset (p, 0, size);
+ pp[0] = p;
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_bus_pci_dma_mem_free (vlib_main_t *vm, vnet_dev_t *dev, void *p)
+{
+ if (p)
+ vlib_physmem_free (vm, p);
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_read_config_header (vlib_main_t *vm, vnet_dev_t *dev,
+ vlib_pci_config_hdr_t *hdr)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_read_write_config (vm, h, VLIB_READ, 0, hdr, sizeof (*hdr));
+ if (err)
+ {
+ log_err (dev, "pci_read_config_header: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_map_region (vlib_main_t *vm, vnet_dev_t *dev, u8 region,
+ void **pp)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ if ((err = vlib_pci_map_region (vm, h, region, pp)))
+ {
+ log_err (dev, "pci_map_region: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_function_level_reset (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ if ((err = vlib_pci_function_level_reset (vm, h)))
+ {
+ log_err (dev, "pci_function_level_reset: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_bus_master_enable (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ if ((err = vlib_pci_bus_master_enable (vm, h)))
+ {
+ log_err (dev, "pci_bus_master_enable: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_pci_intx_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h)
+{
+ vnet_dev_t *dev = (vnet_dev_t *) vlib_pci_get_private_data (vm, h);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+ if (pdd->intx_handler)
+ pdd->intx_handler (vm, dev);
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_intx_add_handler (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_pci_intx_handler_fn_t *fn)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_register_intx_handler (vm, h, vnet_dev_pci_intx_handler);
+
+ if (err)
+ {
+ log_err (dev, "pci_register_intx_handler: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_intx_remove_handler (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_unregister_intx_handler (vm, h);
+
+ if (err)
+ {
+ log_err (dev, "pci_unregister_intx_handler: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ pdd->intx_handler = 0;
+
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_pci_msix_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h, u16 line)
+{
+ vnet_dev_t *dev = (vnet_dev_t *) vlib_pci_get_private_data (vm, h);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+
+ if (line < pdd->n_msix_int && pdd->msix_handlers[line])
+ pdd->msix_handlers[line](vm, dev, line);
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_add_handler (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_pci_msix_handler_fn_t *fn, u16 first,
+ u16 count)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_register_msix_handler (vm, h, first, count,
+ vnet_dev_pci_msix_handler);
+
+ if (err)
+ {
+ log_err (dev, "pci_register_msix_handler: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ for (u16 i = first; i < first + count; i++)
+ {
+ ASSERT (pdd->msix_handlers[i] == 0);
+ pdd->msix_handlers[i] = fn;
+ }
+
+ return VNET_DEV_OK;
+}
+
+void
+vnet_dev_pci_msix_set_polling_thread (vlib_main_t *vm, vnet_dev_t *dev,
+ u16 line, u16 thread_index)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ u32 index;
+
+ index = vlib_pci_get_msix_file_index (vm, h, line);
+
+ clib_file_set_polling_thread (&file_main, index, thread_index);
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_remove_handler (vlib_main_t *vm, vnet_dev_t *dev, u16 first,
+ u16 count)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_unregister_msix_handler (vm, h, first, count);
+
+ if (err)
+ {
+ log_err (dev, "pci_unregister_msix_handler: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ for (u16 i = first; i < first + count; i++)
+ {
+ ASSERT (pdd->msix_handlers[i] != 0);
+ pdd->msix_handlers[i] = 0;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_enable (vlib_main_t *vm, vnet_dev_t *dev, u16 first,
+ u16 count)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_enable_msix_irq (vm, h, first, count);
+
+ if (err)
+ {
+ log_err (dev, "pci_enable_msix_irq: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_msix_disable (vlib_main_t *vm, vnet_dev_t *dev, u16 first,
+ u16 count)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ err = vlib_pci_disable_msix_irq (vm, h, first, count);
+
+ if (err)
+ {
+ log_err (dev, "pci_disble_msix_irq: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_pci_bus_master_disable (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vlib_pci_dev_handle_t h = vnet_dev_get_pci_handle (dev);
+ clib_error_t *err;
+
+ if ((err = vlib_pci_bus_master_disable (vm, h)))
+ {
+ log_err (dev, "pci_bus_master_disable: %U", format_clib_error, err);
+ clib_error_free (err);
+ return VNET_DEV_ERR_BUS;
+ }
+ return VNET_DEV_OK;
+}
+
+static u8 *
+format_dev_pci_device_info (u8 *s, va_list *args)
+{
+ vnet_dev_format_args_t __clib_unused *a =
+ va_arg (*args, vnet_dev_format_args_t *);
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_pci_config_t cfg = {};
+ clib_error_t *err;
+
+ s = format (s, "PCIe address is %U", format_vlib_pci_addr, &pdd->addr);
+
+ err = vlib_pci_read_write_config (vm, pdd->handle, VLIB_READ, 0, &cfg,
+ sizeof (cfg));
+ if (!err)
+ {
+ s = format (s, ", port is %U, speed is %U (max %U)",
+ format_vlib_pci_link_port, &cfg, format_vlib_pci_link_speed,
+ &cfg, format_vlib_pci_link_speed_cap, &cfg);
+ }
+ else
+ clib_error_free (err);
+
+ return s;
+}
+
+static u8 *
+format_dev_pci_device_addr (u8 *s, va_list *args)
+{
+ vnet_dev_t *dev = va_arg (*args, vnet_dev_t *);
+ vnet_dev_bus_pci_device_data_t *pdd = vnet_dev_get_bus_pci_device_data (dev);
+ return format (s, "%U", format_vlib_pci_addr, &pdd->addr);
+}
+
+VNET_DEV_REGISTER_BUS (pci) = {
+ .name = "pci",
+ .device_data_size = sizeof (vnet_dev_bus_pci_device_info_t),
+ .ops = {
+ .device_open = vnet_dev_bus_pci_open,
+ .device_close = vnet_dev_bus_pci_close,
+ .get_device_info = vnet_dev_bus_pci_get_device_info,
+ .free_device_info = vnet_dev_bus_pci_free_device_info,
+ .dma_mem_alloc_fn = vnet_dev_bus_pci_dma_mem_alloc,
+ .dma_mem_free_fn = vnet_dev_bus_pci_dma_mem_free,
+ .format_device_info = format_dev_pci_device_info,
+ .format_device_addr = format_dev_pci_device_addr,
+ },
+};
diff --git a/src/vnet/dev/pci.h b/src/vnet/dev/pci.h
new file mode 100644
index 00000000000..ce9a53aa273
--- /dev/null
+++ b/src/vnet/dev/pci.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_PCI_H_
+#define _VNET_DEV_PCI_H_
+
+#include <vppinfra/clib.h>
+#include <vlib/pci/pci.h>
+#include <vnet/dev/dev.h>
+
+typedef void (vnet_dev_pci_intx_handler_fn_t) (vlib_main_t *vm,
+ vnet_dev_t *dev);
+typedef void (vnet_dev_pci_msix_handler_fn_t) (vlib_main_t *vm,
+ vnet_dev_t *dev, u16 line);
+
+typedef struct
+{
+ vlib_pci_addr_t addr;
+ u16 vendor_id;
+ u16 device_id;
+ u8 revision;
+} vnet_dev_bus_pci_device_info_t;
+
+typedef struct
+{
+ u8 pci_handle_valid : 1;
+ u16 n_msix_int;
+ vlib_pci_addr_t addr;
+ vlib_pci_dev_handle_t handle;
+ vnet_dev_pci_intx_handler_fn_t *intx_handler;
+ vnet_dev_pci_msix_handler_fn_t **msix_handlers;
+} vnet_dev_bus_pci_device_data_t;
+
+static_always_inline vnet_dev_bus_pci_device_data_t *
+vnet_dev_get_bus_pci_device_data (vnet_dev_t *dev)
+{
+ return (void *) dev->bus_data;
+}
+static_always_inline vlib_pci_dev_handle_t
+vnet_dev_get_pci_handle (vnet_dev_t *dev)
+{
+ return ((vnet_dev_bus_pci_device_data_t *) (dev->bus_data))->handle;
+}
+
+static_always_inline vlib_pci_addr_t
+vnet_dev_get_pci_addr (vnet_dev_t *dev)
+{
+ return ((vnet_dev_bus_pci_device_data_t *) (dev->bus_data))->addr;
+}
+
+static_always_inline vlib_pci_dev_handle_t
+vnet_dev_get_pci_n_msix_interrupts (vnet_dev_t *dev)
+{
+ return vnet_dev_get_bus_pci_device_data (dev)->n_msix_int;
+}
+
+vnet_dev_rv_t vnet_dev_pci_read_config_header (vlib_main_t *, vnet_dev_t *,
+ vlib_pci_config_hdr_t *);
+
+vnet_dev_rv_t vnet_dev_pci_map_region (vlib_main_t *, vnet_dev_t *, u8,
+ void **);
+vnet_dev_rv_t vnet_dev_pci_function_level_reset (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_bus_master_enable (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_bus_master_disable (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_intx_add_handler (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_pci_intx_handler_fn_t *);
+vnet_dev_rv_t vnet_dev_pci_intx_remove_handler (vlib_main_t *, vnet_dev_t *);
+vnet_dev_rv_t vnet_dev_pci_msix_add_handler (vlib_main_t *, vnet_dev_t *,
+ vnet_dev_pci_msix_handler_fn_t *,
+ u16, u16);
+vnet_dev_rv_t vnet_dev_pci_msix_remove_handler (vlib_main_t *, vnet_dev_t *,
+ u16, u16);
+vnet_dev_rv_t vnet_dev_pci_msix_enable (vlib_main_t *, vnet_dev_t *, u16, u16);
+vnet_dev_rv_t vnet_dev_pci_msix_disable (vlib_main_t *, vnet_dev_t *, u16,
+ u16);
+void vnet_dev_pci_msix_set_polling_thread (vlib_main_t *, vnet_dev_t *, u16,
+ u16);
+
+#endif /* _VNET_DEV_PCI_H_ */
diff --git a/src/vnet/dev/port.c b/src/vnet/dev/port.c
new file mode 100644
index 00000000000..8a6df54cbc8
--- /dev/null
+++ b/src/vnet/dev/port.c
@@ -0,0 +1,748 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "port",
+};
+
+static uword
+dummy_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ ASSERT (0);
+ return 0;
+}
+
+VLIB_REGISTER_NODE (port_rx_eth_node) = {
+ .function = dummy_input_fn,
+ .name = "port-rx-eth",
+ .runtime_data_bytes = sizeof (vnet_dev_rx_node_runtime_t),
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_DISABLED,
+ .n_next_nodes = VNET_DEV_ETH_RX_PORT_N_NEXTS,
+ .next_nodes = {
+#define _(n, s) [VNET_DEV_ETH_RX_PORT_NEXT_##n] = s,
+ foreach_vnet_dev_port_rx_next
+#undef _
+ },
+};
+
+u16 vnet_dev_default_next_index_by_port_type[] = {
+ [VNET_DEV_PORT_TYPE_ETHERNET] = VNET_DEV_ETH_RX_PORT_NEXT_ETH_INPUT,
+};
+
+VNET_FEATURE_ARC_INIT (eth_port_rx, static) = {
+ .arc_name = "port-rx-eth",
+ .start_nodes = VNET_FEATURES ("port-rx-eth"),
+ .last_in_arc = "ethernet-input",
+ .arc_index_ptr = &vnet_dev_main.eth_port_rx_feature_arc_index,
+};
+
+VNET_FEATURE_INIT (l2_patch, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "l2-patch",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (worker_handoff, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "worker-handoff",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (span_input, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "span-input",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (p2p_ethernet_node, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "p2p-ethernet-input",
+ .runs_before = VNET_FEATURES ("ethernet-input"),
+};
+
+VNET_FEATURE_INIT (ethernet_input, static) = {
+ .arc_name = "port-rx-eth",
+ .node_name = "ethernet-input",
+ .runs_before = 0, /* not before any other features */
+};
+
+void
+vnet_dev_port_free (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+
+ vnet_dev_port_validate (vm, port);
+
+ ASSERT (port->started == 0);
+
+ log_debug (dev, "port %u", port->port_id);
+
+ if (port->port_ops.free)
+ port->port_ops.free (vm, port);
+
+ pool_free (port->secondary_hw_addr);
+ pool_free (port->rx_queues);
+ pool_free (port->tx_queues);
+ vnet_dev_arg_free (&port->args);
+ pool_put_index (dev->ports, port->index);
+ clib_mem_free (port);
+}
+
+void
+vnet_dev_port_update_tx_node_runtime (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_port_validate (vm, port);
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ u32 ti;
+ clib_bitmap_foreach (ti, q->assigned_threads)
+ {
+ vlib_main_t *tvm = vlib_get_main_by_index (ti);
+ vlib_node_runtime_t *nr =
+ vlib_node_get_runtime (tvm, port->intf.tx_node_index);
+ vnet_dev_tx_node_runtime_t *tnr = vnet_dev_get_tx_node_runtime (nr);
+ tnr->hw_if_index = port->intf.hw_if_index;
+ tnr->tx_queue = q;
+ }
+ }
+}
+
+void
+vnet_dev_port_stop (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_rt_op_t *ops = 0;
+ u16 n_threads = vlib_get_n_threads ();
+
+ log_debug (dev, "stopping port %u", port->port_id);
+
+ for (u16 i = 0; i < n_threads; i++)
+ {
+ vnet_dev_rt_op_t op = { .thread_index = i, .port = port };
+ vec_add1 (ops, op);
+ }
+
+ vnet_dev_rt_exec_ops (vm, dev, ops, vec_len (ops));
+ vec_free (ops);
+
+ port->port_ops.stop (vm, port);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ q->started = 0;
+ log_debug (dev, "port %u rx queue %u stopped", port->port_id,
+ q->queue_id);
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ q->started = 0;
+ log_debug (dev, "port %u tx queue %u stopped", port->port_id,
+ q->queue_id);
+ }
+
+ log_debug (dev, "port %u stopped", port->port_id);
+ port->started = 0;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_start_all_rx_queues (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ vnet_dev_port_validate (vm, port);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ rv = vnet_dev_rx_queue_start (vm, q);
+ if (rv != VNET_DEV_OK)
+ return rv;
+ }
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_start_all_tx_queues (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ vnet_dev_port_validate (vm, port);
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ rv = vnet_dev_tx_queue_start (vm, q);
+ if (rv != VNET_DEV_OK)
+ return rv;
+ }
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_start (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ u16 n_threads = vlib_get_n_threads ();
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_rt_op_t *ops = 0;
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_validate (vm, port);
+
+ log_debug (dev, "starting port %u", port->port_id);
+
+ vnet_dev_port_update_tx_node_runtime (vm, port);
+
+ if ((rv = port->port_ops.start (vm, port)) != VNET_DEV_OK)
+ {
+ vnet_dev_port_stop (vm, port);
+ return rv;
+ }
+
+ for (u16 i = 0; i < n_threads; i++)
+ {
+ vnet_dev_rt_op_t op = { .thread_index = i, .port = port };
+ vec_add1 (ops, op);
+ }
+
+ vnet_dev_rt_exec_ops (vm, dev, ops, vec_len (ops));
+ vec_free (ops);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->enabled)
+ {
+ log_debug (dev, "port %u rx queue %u started", port->port_id,
+ q->queue_id);
+ q->started = 1;
+ }
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ if (q->enabled)
+ {
+ log_debug (dev, "port %u tx queue %u started", port->port_id,
+ q->queue_id);
+ q->started = 1;
+ }
+
+ port->started = 1;
+ log_debug (dev, "port %u started", port->port_id);
+
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_add (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_port_id_t id,
+ vnet_dev_port_add_args_t *args)
+{
+ vnet_dev_port_t **pp, *port;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ ASSERT (args->port.attr.type != VNET_DEV_PORT_TYPE_UNKNOWN);
+ ASSERT (args->port.attr.max_supported_rx_frame_size);
+
+ port =
+ vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t), args->port.data_size);
+ pool_get (dev->ports, pp);
+ pp[0] = port;
+ clib_memcpy (vnet_dev_get_port_data (port), args->port.initial_data,
+ args->port.data_size);
+ port->port_id = id;
+ port->index = pp - dev->ports;
+ port->dev = dev;
+ port->attr = args->port.attr;
+ port->rx_queue_config = args->rx_queue.config;
+ port->tx_queue_config = args->tx_queue.config;
+ port->rx_queue_ops = args->rx_queue.ops;
+ port->tx_queue_ops = args->tx_queue.ops;
+ port->port_ops = args->port.ops;
+ port->rx_node = *args->rx_node;
+ port->tx_node = *args->tx_node;
+
+ if (args->port.args)
+ for (vnet_dev_arg_t *a = args->port.args; a->type != VNET_DEV_ARG_END; a++)
+ vec_add1 (port->args, *a);
+
+ /* defaults out of port attributes */
+ port->max_rx_frame_size = args->port.attr.max_supported_rx_frame_size;
+ port->primary_hw_addr = args->port.attr.hw_addr;
+
+ if (port->attr.type == VNET_DEV_PORT_TYPE_ETHERNET)
+ {
+ if (port->max_rx_frame_size > 1514 &&
+ port->attr.caps.change_max_rx_frame_size)
+ port->max_rx_frame_size = 1514;
+ }
+
+ if (port->port_ops.alloc)
+ rv = port->port_ops.alloc (vm, port);
+
+ if (rv == VNET_DEV_OK)
+ port->initialized = 1;
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_cfg_change_req_validate (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv;
+ vnet_dev_hw_addr_t *addr;
+ int found;
+
+ if (req->validated)
+ return VNET_DEV_OK;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ if (req->max_rx_frame_size > port->attr.max_supported_rx_frame_size)
+ return VNET_DEV_ERR_INVALID_VALUE;
+ if (req->max_rx_frame_size == port->max_rx_frame_size)
+ return VNET_DEV_ERR_NO_CHANGE;
+ break;
+
+ case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ if (req->promisc == port->promisc)
+ return VNET_DEV_ERR_NO_CHANGE;
+ break;
+
+ case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ if (clib_memcmp (&req->addr, &port->primary_hw_addr,
+ sizeof (vnet_dev_hw_addr_t)) == 0)
+ return VNET_DEV_ERR_NO_CHANGE;
+ break;
+
+ case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+ pool_foreach (addr, port->secondary_hw_addr)
+ if (clib_memcmp (addr, &req->addr, sizeof (*addr)) == 0)
+ return VNET_DEV_ERR_ALREADY_EXISTS;
+ break;
+
+ case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+ found = 0;
+ pool_foreach (addr, port->secondary_hw_addr)
+ if (clib_memcmp (addr, &req->addr, sizeof (*addr)) == 0)
+ found = 1;
+ if (!found)
+ return VNET_DEV_ERR_NO_SUCH_ENTRY;
+ break;
+
+ default:
+ break;
+ }
+
+ if (port->port_ops.config_change_validate)
+ {
+ rv = port->port_ops.config_change_validate (vm, port, req);
+ if (rv != VNET_DEV_OK)
+ return rv;
+ }
+ else
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ req->validated = 1;
+ return VNET_DEV_OK;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_cfg_change (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *req)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ vnet_dev_hw_addr_t *a;
+ vnet_dev_rx_queue_t *rxq = 0;
+ u8 enable = 0;
+
+ vnet_dev_port_validate (vm, port);
+
+ if (req->type == VNET_DEV_PORT_CFG_RXQ_INTR_MODE_ENABLE ||
+ req->type == VNET_DEV_PORT_CFG_RXQ_INTR_MODE_DISABLE)
+ {
+ if (req->all_queues == 0)
+ {
+ rxq = vnet_dev_port_get_rx_queue_by_id (port, req->queue_id);
+ if (rxq == 0)
+ return VNET_DEV_ERR_BUG;
+ }
+ }
+
+ if ((rv = vnet_dev_port_cfg_change_req_validate (vm, port, req)))
+ return rv;
+
+ if (port->port_ops.config_change)
+ rv = port->port_ops.config_change (vm, port, req);
+ else
+ return VNET_DEV_ERR_NOT_SUPPORTED;
+
+ if (rv != VNET_DEV_OK)
+ return rv;
+
+ switch (req->type)
+ {
+ case VNET_DEV_PORT_CFG_MAX_RX_FRAME_SIZE:
+ port->max_rx_frame_size = req->max_rx_frame_size;
+ break;
+
+ case VNET_DEV_PORT_CFG_PROMISC_MODE:
+ port->promisc = req->promisc;
+ break;
+
+ case VNET_DEV_PORT_CFG_RXQ_INTR_MODE_ENABLE:
+ enable = 1;
+ case VNET_DEV_PORT_CFG_RXQ_INTR_MODE_DISABLE:
+ if (req->all_queues)
+ {
+ clib_bitmap_t *bmp = 0;
+ vnet_dev_rt_op_t *ops = 0;
+ u32 i;
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ q->interrupt_mode = enable;
+ bmp = clib_bitmap_set (bmp, q->rx_thread_index, 1);
+ }
+
+ clib_bitmap_foreach (i, bmp)
+ {
+ vnet_dev_rt_op_t op = { .port = port, .thread_index = i };
+ vec_add1 (ops, op);
+ }
+
+ vnet_dev_rt_exec_ops (vm, port->dev, ops, vec_len (ops));
+ clib_bitmap_free (bmp);
+ vec_free (ops);
+ }
+ else
+ {
+ rxq->interrupt_mode = enable;
+ vnet_dev_rt_exec_ops (vm, port->dev,
+ &(vnet_dev_rt_op_t){
+ .port = port,
+ .thread_index = rxq->rx_thread_index,
+ },
+ 1);
+ }
+ break;
+
+ case VNET_DEV_PORT_CFG_CHANGE_PRIMARY_HW_ADDR:
+ clib_memcpy (&port->primary_hw_addr, &req->addr,
+ sizeof (vnet_dev_hw_addr_t));
+ break;
+
+ case VNET_DEV_PORT_CFG_ADD_SECONDARY_HW_ADDR:
+ pool_get (port->secondary_hw_addr, a);
+ clib_memcpy (a, &req->addr, sizeof (vnet_dev_hw_addr_t));
+ break;
+
+ case VNET_DEV_PORT_CFG_REMOVE_SECONDARY_HW_ADDR:
+ pool_foreach (a, port->secondary_hw_addr)
+ if (clib_memcmp (a, &req->addr, sizeof (vnet_dev_hw_addr_t)) == 0)
+ {
+ pool_put (port->secondary_hw_addr, a);
+ break;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return VNET_DEV_OK;
+}
+
+void
+vnet_dev_port_state_change (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_state_changes_t changes)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+
+ vnet_dev_port_validate (vm, port);
+
+ if (changes.change.link_speed)
+ {
+ port->speed = changes.link_speed;
+ if (port->interface_created)
+ vnet_hw_interface_set_link_speed (vnm, port->intf.hw_if_index,
+ changes.link_speed);
+ log_debug (port->dev, "port speed changed to %u", changes.link_speed);
+ }
+
+ if (changes.change.link_state)
+ {
+ port->link_up = changes.link_state;
+ if (port->interface_created)
+ vnet_hw_interface_set_flags (
+ vnm, port->intf.hw_if_index,
+ changes.link_state ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
+ log_debug (port->dev, "port link state changed to %s",
+ changes.link_state ? "up" : "down");
+ }
+}
+
+void
+vnet_dev_port_add_counters (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_counter_t *counters, u16 n_counters)
+{
+ vnet_dev_port_validate (vm, port);
+
+ port->counter_main =
+ vnet_dev_counters_alloc (vm, counters, n_counters, "%s port %u counters",
+ port->dev->device_id, port->port_id);
+}
+
+void
+vnet_dev_port_free_counters (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_port_validate (vm, port);
+
+ if (port->counter_main)
+ vnet_dev_counters_free (vm, port->counter_main);
+}
+
+vnet_dev_rv_t
+vnet_dev_port_if_create (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u16 n_threads = vlib_get_n_threads ();
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_port_t **pp;
+ vnet_dev_rv_t rv;
+ u16 ti = 0;
+
+ if (port->intf.name[0] == 0)
+ {
+ u8 *s;
+ s = format (0, "%s%u/%u",
+ dm->drivers[port->dev->driver_index].registration->name,
+ port->dev->index, port->index);
+ u32 n = vec_len (s);
+
+ if (n >= sizeof (port->intf.name))
+ {
+ vec_free (s);
+ return VNET_DEV_ERR_BUG;
+ }
+ clib_memcpy (port->intf.name, s, n);
+ port->intf.name[n] = 0;
+ vec_free (s);
+ }
+
+ log_debug (
+ dev, "allocating %u rx queues with size %u and %u tx queues with size %u",
+ port->intf.num_rx_queues, port->intf.rxq_sz, port->intf.num_tx_queues,
+ port->intf.txq_sz);
+
+ for (int i = 0; i < port->intf.num_rx_queues; i++)
+ if ((rv = vnet_dev_rx_queue_alloc (vm, port, port->intf.rxq_sz)) !=
+ VNET_DEV_OK)
+ goto error;
+
+ for (u32 i = 0; i < port->intf.num_tx_queues; i++)
+ if ((rv = vnet_dev_tx_queue_alloc (vm, port, port->intf.txq_sz)) !=
+ VNET_DEV_OK)
+ goto error;
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ {
+ q->assigned_threads = clib_bitmap_set (q->assigned_threads, ti, 1);
+ log_debug (dev, "port %u tx queue %u assigned to thread %u",
+ port->port_id, q->queue_id, ti);
+ if (++ti >= n_threads)
+ break;
+ }
+
+ /* pool of port pointers helps us to assign unique dev_instance */
+ pool_get (dm->ports_by_dev_instance, pp);
+ port->intf.dev_instance = pp - dm->ports_by_dev_instance;
+ pp[0] = port;
+
+ if (port->attr.type == VNET_DEV_PORT_TYPE_ETHERNET)
+ {
+ vnet_device_class_t *dev_class;
+ vnet_dev_driver_t *driver;
+ vnet_sw_interface_t *sw;
+ vnet_hw_interface_t *hw;
+ vnet_hw_if_caps_t caps = 0;
+ u32 rx_node_index;
+
+ driver = pool_elt_at_index (dm->drivers, dev->driver_index);
+
+ /* hack to provide per-port tx node function */
+ dev_class = vnet_get_device_class (vnm, driver->dev_class_index);
+ dev_class->tx_fn_registrations = port->tx_node.registrations;
+ dev_class->format_tx_trace = port->tx_node.format_trace;
+ dev_class->tx_function_error_counters = port->tx_node.error_counters;
+ dev_class->tx_function_n_errors = port->tx_node.n_error_counters;
+
+ /* create new interface including tx and output nodes */
+ port->intf.hw_if_index = vnet_eth_register_interface (
+ vnm, &(vnet_eth_interface_registration_t){
+ .address = port->primary_hw_addr.eth_mac,
+ .max_frame_size = port->max_rx_frame_size,
+ .dev_class_index = driver->dev_class_index,
+ .dev_instance = port->intf.dev_instance,
+ .cb.set_max_frame_size = vnet_dev_port_set_max_frame_size,
+ .cb.flag_change = vnet_dev_port_eth_flag_change,
+ });
+
+ sw = vnet_get_hw_sw_interface (vnm, port->intf.hw_if_index);
+ hw = vnet_get_hw_interface (vnm, port->intf.hw_if_index);
+ port->intf.sw_if_index = sw->sw_if_index;
+ vnet_hw_interface_set_flags (
+ vnm, port->intf.hw_if_index,
+ port->link_up ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
+ if (port->speed)
+ vnet_hw_interface_set_link_speed (vnm, port->intf.hw_if_index,
+ port->speed);
+
+ port->intf.tx_node_index = hw->tx_node_index;
+
+ caps |= port->attr.caps.interrupt_mode ? VNET_HW_IF_CAP_INT_MODE : 0;
+ caps |= port->attr.caps.mac_filter ? VNET_HW_IF_CAP_MAC_FILTER : 0;
+ caps |= port->attr.tx_offloads.tcp_gso ? VNET_HW_IF_CAP_TCP_GSO : 0;
+ caps |= port->attr.tx_offloads.ip4_cksum ? VNET_HW_IF_CAP_TX_CKSUM : 0;
+
+ if (caps)
+ vnet_hw_if_set_caps (vnm, port->intf.hw_if_index, caps);
+
+ /* create / reuse rx node */
+ if (vec_len (dm->free_rx_node_indices))
+ {
+ vlib_node_t *n;
+ rx_node_index = vec_pop (dm->free_rx_node_indices);
+ vlib_node_rename (vm, rx_node_index, "%s-rx", port->intf.name);
+ n = vlib_get_node (vm, rx_node_index);
+ n->function = vlib_node_get_preferred_node_fn_variant (
+ vm, port->rx_node.registrations);
+ n->format_trace = port->rx_node.format_trace;
+ vlib_register_errors (vm, rx_node_index,
+ port->rx_node.n_error_counters, 0,
+ port->rx_node.error_counters);
+ }
+ else
+ {
+ dev_class->format_tx_trace = port->tx_node.format_trace;
+ dev_class->tx_function_error_counters = port->tx_node.error_counters;
+ dev_class->tx_function_n_errors = port->tx_node.n_error_counters;
+ vlib_node_registration_t rx_node_reg = {
+ .sibling_of = "port-rx-eth",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_DISABLED,
+ .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
+ .node_fn_registrations = port->rx_node.registrations,
+ .format_trace = port->rx_node.format_trace,
+ .error_counters = port->rx_node.error_counters,
+ .n_errors = port->rx_node.n_error_counters,
+ };
+ rx_node_index =
+ vlib_register_node (vm, &rx_node_reg, "%s-rx", port->intf.name);
+ }
+ port->rx_node_assigned = 1;
+ port->intf.rx_node_index = rx_node_index;
+ port->intf.rx_next_index =
+ vnet_dev_default_next_index_by_port_type[port->attr.type];
+
+ vlib_worker_thread_node_runtime_update ();
+ log_debug (dev,
+ "ethernet interface created, hw_if_index %u sw_if_index %u "
+ "rx_node_index %u tx_node_index %u",
+ port->intf.hw_if_index, port->intf.sw_if_index,
+ port->intf.rx_node_index, port->intf.tx_node_index);
+ }
+
+ port->interface_created = 1;
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ vnet_buffer (&q->buffer_template)->sw_if_index[VLIB_RX] =
+ port->intf.sw_if_index;
+ /* poison to catch node not calling runtime update function */
+ q->next_index = ~0;
+ q->interrupt_mode = port->intf.default_is_intr_mode;
+ vnet_dev_rx_queue_rt_request (
+ vm, q, (vnet_dev_rx_queue_rt_req_t){ .update_next_index = 1 });
+ }
+
+ vnet_dev_port_update_tx_node_runtime (vm, port);
+
+ if (port->port_ops.init)
+ rv = port->port_ops.init (vm, port);
+
+error:
+ if (rv != VNET_DEV_OK)
+ vnet_dev_port_if_remove (vm, port);
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_port_if_remove (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_main_t *vnm = vnet_get_main ();
+
+ vnet_dev_port_validate (vm, port);
+
+ if (port->started)
+ vnet_dev_port_stop (vm, port);
+
+ if (port->rx_node_assigned)
+ {
+ vlib_node_rename (vm, port->intf.rx_node_index, "deleted-%u",
+ port->intf.rx_node_index);
+ vec_add1 (dm->free_rx_node_indices, port->intf.rx_node_index);
+ port->rx_node_assigned = 0;
+ }
+
+ if (port->interface_created)
+ {
+ vlib_worker_thread_barrier_sync (vm);
+ vnet_delete_hw_interface (vnm, port->intf.hw_if_index);
+ vlib_worker_thread_barrier_release (vm);
+ pool_put_index (dm->ports_by_dev_instance, port->intf.dev_instance);
+ port->interface_created = 0;
+ }
+
+ port->intf = (typeof (port->intf)){};
+
+ if (port->port_ops.deinit)
+ port->port_ops.deinit (vm, port);
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ vnet_dev_tx_queue_free (vm, q);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ vnet_dev_rx_queue_free (vm, q);
+
+ vnet_dev_port_free_counters (vm, port);
+
+ foreach_vnet_dev_port_args (v, port)
+ vnet_dev_arg_clear_value (v);
+
+ return VNET_DEV_OK;
+}
+void
+vnet_dev_port_clear_counters (vlib_main_t *vm, vnet_dev_port_t *port)
+{
+ if (port->counter_main)
+ vnet_dev_counters_clear (vm, port->counter_main);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ if (q->counter_main)
+ vnet_dev_counters_clear (vm, q->counter_main);
+
+ foreach_vnet_dev_port_tx_queue (q, port)
+ if (q->counter_main)
+ vnet_dev_counters_clear (vm, q->counter_main);
+
+ log_notice (port->dev, "counters cleared on port %u", port->port_id);
+}
diff --git a/src/vnet/dev/process.c b/src/vnet/dev/process.c
new file mode 100644
index 00000000000..3c1f0b8d2d8
--- /dev/null
+++ b/src/vnet/dev/process.c
@@ -0,0 +1,474 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/error.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "process",
+};
+
+typedef enum
+{
+ VNET_DEV_EVENT_PERIODIC_STOP,
+ VNET_DEV_EVENT_PERIODIC_START,
+ VNET_DEV_EVENT_PORT_CONFIG_CHANGE_REQ,
+ VNET_DEV_EVENT_PROCESS_QUIT,
+ VNET_DEV_EVENT_CALL_OP,
+ VNET_DEV_EVENT_CALL_OP_NO_RV,
+ VNET_DEV_EVENT_CALL_OP_NO_WAIT,
+ VNET_DEV_EVENT_CALL_PORT_OP,
+ VNET_DEV_EVENT_CALL_PORT_OP_NO_RV,
+ VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT,
+ VNET_DEV_EVENT_CLOCK = ~0
+} __clib_packed vnet_dev_event_t;
+
+typedef struct
+{
+ vnet_dev_event_t event;
+ u8 reply_needed : 1;
+ u32 calling_process_index;
+ union
+ {
+ struct
+ {
+ vnet_dev_port_t *port;
+ vnet_dev_port_cfg_change_req_t *change_req;
+ } port_cfg_change;
+ struct
+ {
+ vnet_dev_op_t *op;
+ } call_op;
+ struct
+ {
+ vnet_dev_op_no_rv_t *op;
+ } call_op_no_rv;
+ struct
+ {
+ vnet_dev_op_no_rv_t *op;
+ } call_op_no_wait;
+ struct
+ {
+ vnet_dev_port_op_t *op;
+ vnet_dev_port_t *port;
+ } call_port_op;
+ struct
+ {
+ vnet_dev_port_op_no_rv_t *op;
+ vnet_dev_port_t *port;
+ } call_port_op_no_rv;
+ struct
+ {
+ vnet_dev_port_op_no_rv_t *op;
+ vnet_dev_port_t *port;
+ } call_port_op_no_wait;
+ };
+} vnet_dev_event_data_t;
+
+static vnet_dev_rv_t
+vnet_dev_process_one_event (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_event_data_t *ed)
+{
+ vnet_dev_port_t *p;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ switch (ed->event)
+ {
+ case VNET_DEV_EVENT_CLOCK:
+ break;
+ case VNET_DEV_EVENT_PROCESS_QUIT:
+ log_debug (dev, "quit requested");
+ dev->process_node_quit = 1;
+ break;
+ case VNET_DEV_EVENT_PERIODIC_START:
+ log_debug (dev, "periodic start");
+ dev->process_node_periodic = 1;
+ break;
+ case VNET_DEV_EVENT_PERIODIC_STOP:
+ log_debug (dev, "periodic stop");
+ dev->process_node_periodic = 0;
+ break;
+ case VNET_DEV_EVENT_PORT_CONFIG_CHANGE_REQ:
+ log_debug (dev, "port config change");
+ p = ed->port_cfg_change.port;
+ rv = vnet_dev_port_cfg_change (vm, p, ed->port_cfg_change.change_req);
+ break;
+ case VNET_DEV_EVENT_CALL_OP:
+ log_debug (dev, "call op");
+ rv = ed->call_op.op (vm, dev);
+ break;
+ case VNET_DEV_EVENT_CALL_OP_NO_RV:
+ log_debug (dev, "call op no rv");
+ ed->call_op_no_rv.op (vm, dev);
+ break;
+ case VNET_DEV_EVENT_CALL_OP_NO_WAIT:
+ log_debug (dev, "call op no wait");
+ ed->call_op_no_wait.op (vm, dev);
+ break;
+ case VNET_DEV_EVENT_CALL_PORT_OP:
+ log_debug (dev, "call port op");
+ rv = ed->call_port_op.op (vm, ed->call_port_op.port);
+ break;
+ case VNET_DEV_EVENT_CALL_PORT_OP_NO_RV:
+ log_debug (dev, "call port op no rv");
+ ed->call_port_op_no_rv.op (vm, ed->call_port_op_no_rv.port);
+ break;
+ case VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT:
+ log_debug (dev, "call port op no wait");
+ ed->call_port_op_no_wait.op (vm, ed->call_port_op_no_wait.port);
+ break;
+ default:
+ ASSERT (0);
+ }
+ return rv;
+}
+
+static uword
+vnet_dev_process (vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_periodic_op_t *pop, *pops = 0;
+ f64 next = CLIB_F64_MAX;
+ vnet_dev_event_data_t *event_data = 0, *new_event_data, *ed;
+
+ vnet_dev_t *dev =
+ *((vnet_dev_t **) vlib_node_get_runtime_data (vm, rt->node_index));
+
+ log_debug (dev, "process '%U' started", format_vlib_node_name, vm,
+ rt->node_index);
+
+ while (dev->process_node_quit == 0)
+ {
+ uword event_type;
+ f64 now = vlib_time_now (vm);
+
+ if (dev->process_node_periodic)
+ vlib_process_wait_for_event_or_clock (vm, next > now ? next - now : 0);
+ else
+ vlib_process_wait_for_event (vm);
+
+ new_event_data = vlib_process_get_event_data (vm, &event_type);
+
+ if (new_event_data)
+ {
+ vec_append (event_data, new_event_data);
+ vlib_process_put_event_data (vm, new_event_data);
+
+ ASSERT (event_type == 0);
+
+ vec_foreach (ed, event_data)
+ {
+ vnet_dev_rv_t rv;
+ rv = vnet_dev_process_one_event (vm, dev, ed);
+ if (ed->reply_needed)
+ vlib_process_signal_event (vm, ed->calling_process_index,
+ ed->event, rv);
+ }
+ vec_reset_length (event_data);
+ }
+
+ next = CLIB_F64_MAX;
+ pool_foreach (pop, dev->periodic_ops)
+ {
+ if (pop->last_run + pop->interval < now)
+ {
+ vec_add1 (pops, *pop);
+ pop->last_run = now;
+ }
+ if (pop->last_run + pop->interval < next)
+ next = pop->last_run + pop->interval;
+ }
+
+ vec_foreach (pop, pops)
+ {
+ switch (pop->type)
+ {
+ case VNET_DEV_PERIODIC_OP_TYPE_DEV:
+ pop->dev_op (vm, pop->dev);
+ break;
+ case VNET_DEV_PERIODIC_OP_TYPE_PORT:
+ pop->port_op (vm, pop->port);
+ break;
+ default:
+ ASSERT (0);
+ }
+ }
+ vec_reset_length (pops);
+ }
+
+ log_debug (dev, "process '%U' quit", format_vlib_node_name, vm,
+ rt->node_index);
+ vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
+ vlib_node_rename (vm, rt->node_index, "deleted-%u", rt->node_index);
+
+ /* add node index to the freelist */
+ vec_add1 (dm->free_process_node_indices, rt->node_index);
+ vec_free (pops);
+ vec_free (event_data);
+ return 0;
+}
+
+vnet_dev_rv_t
+vnet_dev_process_create (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vlib_node_t *n;
+ uword l;
+
+ l = vec_len (dm->free_process_node_indices);
+ if (l > 0)
+ {
+ n = vlib_get_node (vm, dm->free_process_node_indices[l - 1]);
+ if (n->function != vnet_dev_process)
+ {
+ vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, n->index);
+ n->function = vnet_dev_process;
+ rt->function = vnet_dev_process;
+ }
+ vlib_node_rename (vm, n->index, "%s-process", dev->device_id);
+ vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING);
+ vec_set_len (dm->free_process_node_indices, l - 1);
+ log_debug (dev, "process node '%U' (%u) reused", format_vlib_node_name,
+ vm, n->index, n->index);
+ }
+ else
+ {
+ vlib_node_registration_t r = {
+ .function = vnet_dev_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .process_log2_n_stack_bytes = 16,
+ .runtime_data_bytes = sizeof (void *),
+ };
+
+ vlib_register_node (vm, &r, "%s-process", dev->device_id);
+
+ n = vlib_get_node (vm, r.index);
+ log_debug (dev, "process node '%U' (%u) created", format_vlib_node_name,
+ vm, r.index, r.index);
+ }
+
+ dev->process_node_index = n->index;
+ *(vnet_dev_t **) vlib_node_get_runtime_data (vm, n->index) = dev;
+ vlib_start_process (vm, n->runtime_index);
+
+ return VNET_DEV_OK;
+}
+
+static void
+vnet_dev_process_event_send (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_event_data_t ed)
+{
+ vnet_dev_event_data_t *edp = vlib_process_signal_event_data (
+ vm, dev->process_node_index, 0, 1, sizeof (ed));
+ *edp = ed;
+}
+
+static vnet_dev_rv_t
+vnet_dev_process_event_send_and_wait (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_event_data_t ed)
+{
+ uword event, *event_data = 0;
+ vnet_dev_rv_t rv;
+
+ ed.calling_process_index = vlib_get_current_process_node_index (vm);
+
+ if (ed.calling_process_index == dev->process_node_index)
+ return vnet_dev_process_one_event (vm, dev, &ed);
+
+ ed.reply_needed = 1;
+ vnet_dev_process_event_send (vm, dev, ed);
+ vlib_process_wait_for_event_or_clock (vm, 5.0);
+ event = vlib_process_get_events (vm, &event_data);
+ if (event != ed.event)
+ {
+ log_err (dev, "%s",
+ event == VNET_DEV_EVENT_CLOCK ?
+ "timeout waiting for process node to respond" :
+ "unexpected event received");
+ rv = VNET_DEV_ERR_PROCESS_REPLY;
+ }
+ else
+ rv = event_data[0];
+ vec_free (event_data);
+ return rv;
+}
+
+void
+vnet_dev_process_quit (vlib_main_t *vm, vnet_dev_t *dev)
+{
+ vnet_dev_event_data_t ed = { .event = VNET_DEV_EVENT_PROCESS_QUIT };
+ vnet_dev_process_event_send_and_wait (vm, dev, ed);
+}
+
+static int
+_vnet_dev_poll_add (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_periodic_op_t pop)
+{
+ const vnet_dev_event_data_t ed = { .event = VNET_DEV_EVENT_PERIODIC_START };
+ vnet_dev_periodic_op_t *p;
+
+ pool_foreach (p, dev->periodic_ops)
+ if (p->op == pop.op && p->arg == pop.arg)
+ return 0;
+
+ pool_get_zero (dev->periodic_ops, p);
+ *p = pop;
+ if (pool_elts (dev->periodic_ops) == 1)
+ vnet_dev_process_event_send (vm, dev, ed);
+ return 1;
+}
+
+static int
+_vnet_dev_poll_remove (vlib_main_t *vm, vnet_dev_t *dev, void *op, void *arg)
+{
+ const vnet_dev_event_data_t ed = { .event = VNET_DEV_EVENT_PERIODIC_STOP };
+ vnet_dev_periodic_op_t *pop;
+
+ pool_foreach (pop, dev->periodic_ops)
+ if (pop->op == op && pop->arg == arg)
+ {
+ pool_put (dev->periodic_ops, pop);
+ if (pool_elts (dev->periodic_ops) == 0)
+ vnet_dev_process_event_send (vm, dev, ed);
+ return 1;
+ }
+ return 0;
+}
+
+void
+vnet_dev_poll_dev_add (vlib_main_t *vm, vnet_dev_t *dev, f64 interval,
+ vnet_dev_op_no_rv_t *dev_op)
+{
+ vnet_dev_periodic_op_t pop = {
+ .interval = interval,
+ .type = VNET_DEV_PERIODIC_OP_TYPE_DEV,
+ .dev_op = dev_op,
+ .dev = dev,
+ };
+
+ if (_vnet_dev_poll_add (vm, dev, pop) == 0)
+ log_warn (dev, "poll_dev_add: op already exists, not added");
+}
+
+void
+vnet_dev_poll_dev_remove (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_op_no_rv_t *dev_op)
+{
+ if (_vnet_dev_poll_remove (vm, dev, (void *) dev_op, (void *) dev) == 0)
+ log_warn (dev, "poll_dev_remove: op not found, not removed");
+}
+
+void
+vnet_dev_poll_port_add (vlib_main_t *vm, vnet_dev_port_t *port, f64 interval,
+ vnet_dev_port_op_no_rv_t *port_op)
+{
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_periodic_op_t pop = {
+ .interval = interval,
+ .type = VNET_DEV_PERIODIC_OP_TYPE_PORT,
+ .port_op = port_op,
+ .port = port,
+ };
+
+ if (_vnet_dev_poll_add (vm, dev, pop) == 0)
+ log_warn (dev, "poll_port_add: op already exists, not added");
+}
+
+void
+vnet_dev_poll_port_remove (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_op_no_rv_t *port_op)
+{
+ vnet_dev_t *dev = port->dev;
+ if (_vnet_dev_poll_remove (vm, dev, (void *) port_op, (void *) port) == 0)
+ log_warn (dev, "poll_port_remove: op not found, not removed");
+}
+
+vnet_dev_rv_t
+vnet_dev_process_port_cfg_change_req (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_cfg_change_req_t *pccr)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_PORT_CONFIG_CHANGE_REQ,
+ .port_cfg_change = {
+ .port = port,
+ .change_req = pccr,
+ },
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, port->dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_op (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_op_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_OP,
+ .call_op.op = op,
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_op_no_rv (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_op_no_rv_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_OP_NO_RV,
+ .call_op_no_rv.op = op,
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, dev, ed);
+}
+
+void
+vnet_dev_process_call_op_no_wait (vlib_main_t *vm, vnet_dev_t *dev,
+ vnet_dev_op_no_rv_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_OP_NO_WAIT,
+ .call_op_no_rv.op = op,
+ };
+
+ vnet_dev_process_event_send (vm, dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_port_op (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_op_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_PORT_OP,
+ .call_port_op = { .op = op, .port = port },
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, port->dev, ed);
+}
+
+vnet_dev_rv_t
+vnet_dev_process_call_port_op_no_rv (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_op_no_rv_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_PORT_OP_NO_RV,
+ .call_port_op_no_rv = { .op = op, .port = port },
+ };
+
+ return vnet_dev_process_event_send_and_wait (vm, port->dev, ed);
+}
+
+void
+vnet_dev_process_call_port_op_no_wait (vlib_main_t *vm, vnet_dev_port_t *port,
+ vnet_dev_port_op_no_rv_t *op)
+{
+ const vnet_dev_event_data_t ed = {
+ .event = VNET_DEV_EVENT_CALL_PORT_OP_NO_WAIT,
+ .call_port_op_no_wait = { .op = op, .port = port },
+ };
+
+ vnet_dev_process_event_send (vm, port->dev, ed);
+}
diff --git a/src/vnet/dev/process.h b/src/vnet/dev/process.h
new file mode 100644
index 00000000000..9223973dffc
--- /dev/null
+++ b/src/vnet/dev/process.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_PROCESS_H_
+#define _VNET_DEV_PROCESS_H_
+
+#include <vppinfra/clib.h>
+
+#endif /* _VNET_DEV_PROCESS_H_ */
diff --git a/src/vnet/dev/queue.c b/src/vnet/dev/queue.c
new file mode 100644
index 00000000000..9a016a626fb
--- /dev/null
+++ b/src/vnet/dev/queue.c
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/counters.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "error",
+};
+
+void
+vnet_dev_rx_queue_free (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_port_t *port = rxq->port;
+ vnet_dev_t *dev = port->dev;
+ log_debug (dev, "queue %u", rxq->queue_id);
+ if (port->rx_queue_ops.free)
+ port->rx_queue_ops.free (vm, rxq);
+
+ vnet_dev_rx_queue_free_counters (vm, rxq);
+ pool_put_index (port->rx_queues, rxq->index);
+ clib_mem_free (rxq);
+}
+
+vnet_dev_rv_t
+vnet_dev_rx_queue_alloc (vlib_main_t *vm, vnet_dev_port_t *port,
+ u16 queue_size)
+{
+ vnet_dev_main_t *dm = &vnet_dev_main;
+ vnet_dev_rx_queue_t *rxq, **qp;
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ u16 n_threads = vlib_get_n_threads ();
+ u8 buffer_pool_index;
+
+ vnet_dev_port_validate (vm, port);
+
+ log_debug (dev, "port %u queue_size %u", port->port_id, queue_size);
+
+ if (pool_elts (port->rx_queues) == port->attr.max_rx_queues)
+ return VNET_DEV_ERR_NO_AVAIL_QUEUES;
+
+ rxq = vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t),
+ port->rx_queue_config.data_size);
+ pool_get (port->rx_queues, qp);
+ qp[0] = rxq;
+ rxq->enabled = 1;
+ rxq->port = port;
+ rxq->size = queue_size;
+ rxq->index = qp - port->rx_queues;
+
+ /* default queue id - can be changed by driver */
+ rxq->queue_id = qp - port->rx_queues;
+ ASSERT (rxq->queue_id < port->attr.max_rx_queues);
+
+ if (n_threads > 1)
+ {
+ rxq->rx_thread_index = dm->next_rx_queue_thread++;
+ if (dm->next_rx_queue_thread >= n_threads)
+ dm->next_rx_queue_thread = 1;
+ }
+
+ buffer_pool_index =
+ vlib_buffer_pool_get_default_for_numa (vm, dev->numa_node);
+ vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, buffer_pool_index);
+
+ rxq->buffer_template = bp->buffer_template;
+ vnet_buffer (&rxq->buffer_template)->sw_if_index[VLIB_TX] = ~0;
+
+ rxq->next_index = vnet_dev_default_next_index_by_port_type[port->attr.type];
+
+ if (port->rx_queue_ops.alloc)
+ rv = port->rx_queue_ops.alloc (vm, rxq);
+
+ if (rv != VNET_DEV_OK)
+ {
+ log_err (dev, "driver rejected rx queue add with rv %d", rv);
+ vnet_dev_rx_queue_free (vm, rxq);
+ }
+ else
+ log_debug (dev, "queue %u added, assigned to thread %u", rxq->queue_id,
+ rxq->rx_thread_index);
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_rx_queue_start (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ if (rxq->port->rx_queue_ops.start)
+ rv = rxq->port->rx_queue_ops.start (vm, rxq);
+
+ if (rv == VNET_DEV_OK)
+ rxq->started = 1;
+
+ return rv;
+}
+
+void
+vnet_dev_rx_queue_stop (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ if (rxq->port->rx_queue_ops.stop)
+ rxq->port->rx_queue_ops.stop (vm, rxq);
+ vlib_node_set_state (vm, rxq->port->intf.rx_node_index,
+ VLIB_NODE_STATE_DISABLED);
+ rxq->started = 0;
+}
+
+void
+vnet_dev_tx_queue_free (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ vnet_dev_port_t *port = txq->port;
+ vnet_dev_t *dev = port->dev;
+
+ vnet_dev_port_validate (vm, port);
+
+ log_debug (dev, "queue %u", txq->queue_id);
+ if (port->tx_queue_ops.free)
+ port->tx_queue_ops.free (vm, txq);
+
+ clib_bitmap_free (txq->assigned_threads);
+ vnet_dev_tx_queue_free_counters (vm, txq);
+ pool_put_index (port->tx_queues, txq->index);
+ clib_mem_free (txq);
+}
+
+vnet_dev_rv_t
+vnet_dev_tx_queue_alloc (vlib_main_t *vm, vnet_dev_port_t *port,
+ u16 queue_size)
+{
+ vnet_dev_tx_queue_t *txq, **qp;
+ vnet_dev_t *dev = port->dev;
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+
+ log_debug (dev, "port %u size %u", port->port_id, queue_size);
+
+ if (pool_elts (port->tx_queues) == port->attr.max_tx_queues)
+ return VNET_DEV_ERR_NO_AVAIL_QUEUES;
+
+ txq = vnet_dev_alloc_with_data (sizeof (vnet_dev_port_t),
+ port->tx_queue_config.data_size);
+ pool_get (port->tx_queues, qp);
+ qp[0] = txq;
+ txq->enabled = 1;
+ txq->port = port;
+ txq->size = queue_size;
+ txq->index = qp - port->tx_queues;
+
+ /* default queue id - can be changed by driver */
+ txq->queue_id = qp - port->tx_queues;
+ ASSERT (txq->queue_id < port->attr.max_tx_queues);
+
+ if (port->tx_queue_ops.alloc)
+ rv = port->tx_queue_ops.alloc (vm, txq);
+
+ if (rv != VNET_DEV_OK)
+ {
+ log_err (dev, "driver rejected tx queue alloc with rv %d", rv);
+ vnet_dev_tx_queue_free (vm, txq);
+ }
+ else
+ log_debug (dev, "queue %u added", txq->queue_id);
+
+ return rv;
+}
+
+vnet_dev_rv_t
+vnet_dev_tx_queue_start (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ vnet_dev_rv_t rv = VNET_DEV_OK;
+ if (txq->port->tx_queue_ops.start)
+ rv = txq->port->tx_queue_ops.start (vm, txq);
+
+ if (rv == VNET_DEV_OK)
+ txq->started = 1;
+
+ return rv;
+}
+
+void
+vnet_dev_tx_queue_stop (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ if (txq->port->tx_queue_ops.stop)
+ txq->port->tx_queue_ops.stop (vm, txq);
+ txq->started = 0;
+}
+
+void
+vnet_dev_rx_queue_add_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq,
+ vnet_dev_counter_t *counters, u16 n_counters)
+{
+ rxq->counter_main = vnet_dev_counters_alloc (
+ vm, counters, n_counters, "%s port %u rx-queue %u counters",
+ rxq->port->dev->device_id, rxq->port->port_id, rxq->queue_id);
+}
+
+void
+vnet_dev_rx_queue_free_counters (vlib_main_t *vm, vnet_dev_rx_queue_t *rxq)
+{
+ if (rxq->counter_main)
+ vnet_dev_counters_free (vm, rxq->counter_main);
+}
+
+void
+vnet_dev_tx_queue_add_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *txq,
+ vnet_dev_counter_t *counters, u16 n_counters)
+{
+ txq->counter_main = vnet_dev_counters_alloc (
+ vm, counters, n_counters, "%s port %u tx-queue %u counters",
+ txq->port->dev->device_id, txq->port->port_id, txq->queue_id);
+}
+
+void
+vnet_dev_tx_queue_free_counters (vlib_main_t *vm, vnet_dev_tx_queue_t *txq)
+{
+ if (!txq->counter_main)
+ return;
+
+ log_debug (txq->port->dev, "free");
+ vnet_dev_counters_free (vm, txq->counter_main);
+}
diff --git a/src/vnet/dev/runtime.c b/src/vnet/dev/runtime.c
new file mode 100644
index 00000000000..79c55cfbd53
--- /dev/null
+++ b/src/vnet/dev/runtime.c
@@ -0,0 +1,180 @@
+
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#include "vppinfra/bitmap.h"
+#include "vppinfra/lock.h"
+#include <vnet/vnet.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/log.h>
+
+VLIB_REGISTER_LOG_CLASS (dev_log, static) = {
+ .class_name = "dev",
+ .subclass_name = "runtime",
+};
+
+static vnet_dev_rt_op_t *rt_ops;
+
+static void
+_vnet_dev_rt_exec_op (vlib_main_t *vm, vnet_dev_rt_op_t *op)
+{
+ vnet_dev_port_t *port = op->port;
+ vnet_dev_rx_queue_t *previous = 0, *first = 0;
+ vnet_dev_rx_node_runtime_t *rtd;
+ vlib_node_state_t state = VLIB_NODE_STATE_DISABLED;
+ u32 node_index = port->intf.rx_node_index;
+
+ rtd = vlib_node_get_runtime_data (vm, node_index);
+
+ foreach_vnet_dev_port_rx_queue (q, port)
+ {
+ if (q->rx_thread_index != vm->thread_index)
+ continue;
+
+ if (q->interrupt_mode == 0)
+ state = VLIB_NODE_STATE_POLLING;
+ else if (state != VLIB_NODE_STATE_POLLING)
+ state = VLIB_NODE_STATE_INTERRUPT;
+
+ q->next_on_thread = 0;
+ if (previous == 0)
+ first = q;
+ else
+ previous->next_on_thread = q;
+
+ previous = q;
+ }
+
+ rtd->first_rx_queue = first;
+ vlib_node_set_state (vm, port->intf.rx_node_index, state);
+ __atomic_store_n (&op->completed, 1, __ATOMIC_RELEASE);
+}
+
+static uword
+vnet_dev_rt_mgmt_node_fn (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ u16 thread_index = vm->thread_index;
+ vnet_dev_rt_op_t *op, *ops = __atomic_load_n (&rt_ops, __ATOMIC_ACQUIRE);
+ u32 n_pending = 0;
+ uword rv = 0;
+
+ vec_foreach (op, ops)
+ {
+ if (!op->completed && op->thread_index == thread_index)
+ {
+ if (op->in_order == 1 && n_pending)
+ {
+ vlib_node_set_interrupt_pending (vm, node->node_index);
+ return rv;
+ }
+ _vnet_dev_rt_exec_op (vm, op);
+ rv++;
+ }
+
+ if (op->completed == 0)
+ n_pending++;
+ }
+
+ return rv;
+}
+
+VLIB_REGISTER_NODE (vnet_dev_rt_mgmt_node, static) = {
+ .function = vnet_dev_rt_mgmt_node_fn,
+ .name = "dev-rt-mgmt",
+ .type = VLIB_NODE_TYPE_PRE_INPUT,
+ .state = VLIB_NODE_STATE_INTERRUPT,
+};
+
+vnet_dev_rv_t
+vnet_dev_rt_exec_ops (vlib_main_t *vm, vnet_dev_t *dev, vnet_dev_rt_op_t *ops,
+ u32 n_ops)
+{
+ vnet_dev_rt_op_t *op = ops;
+ vnet_dev_rt_op_t *remote_ops = 0;
+ clib_bitmap_t *remote_bmp = 0;
+ u32 i;
+
+ ASSERT (rt_ops == 0);
+
+ if (vlib_worker_thread_barrier_held ())
+ {
+ for (op = ops; op < (ops + n_ops); op++)
+ {
+ vlib_main_t *tvm = vlib_get_main_by_index (op->thread_index);
+ _vnet_dev_rt_exec_op (tvm, op);
+ log_debug (
+ dev,
+ "port %u rx node runtime update on thread %u executed locally",
+ op->port->port_id, op->thread_index);
+ }
+ return VNET_DEV_OK;
+ }
+
+ while (n_ops)
+ {
+ if (op->thread_index != vm->thread_index)
+ break;
+
+ _vnet_dev_rt_exec_op (vm, op);
+ log_debug (
+ dev, "port %u rx node runtime update on thread %u executed locally",
+ op->port->port_id, op->thread_index);
+ op++;
+ n_ops--;
+ }
+
+ if (n_ops == 0)
+ return VNET_DEV_OK;
+
+ for (op = ops; op < (ops + n_ops); op++)
+ {
+ if (op->thread_index == vm->thread_index &&
+ (op->in_order == 0 || vec_len (remote_ops) == 0))
+ {
+ _vnet_dev_rt_exec_op (vm, op);
+ log_debug (dev,
+ "port %u rx node runtime update on thread "
+ "%u executed locally",
+ op->port->port_id, op->thread_index);
+ }
+ else
+ {
+ vec_add1 (remote_ops, *op);
+ log_debug (dev,
+ "port %u rx node runtime update on thread %u "
+ "enqueued for remote execution",
+ op->port->port_id, op->thread_index);
+ remote_bmp = clib_bitmap_set (remote_bmp, op->thread_index, 1);
+ }
+ }
+
+ if (remote_ops == 0)
+ return VNET_DEV_OK;
+
+ __atomic_store_n (&rt_ops, remote_ops, __ATOMIC_RELEASE);
+
+ clib_bitmap_foreach (i, remote_bmp)
+ {
+ vlib_node_set_interrupt_pending (vlib_get_main_by_index (i),
+ vnet_dev_rt_mgmt_node.index);
+ log_debug (dev, "interrupt sent to %s node on thread %u",
+ vnet_dev_rt_mgmt_node.name, i);
+ }
+
+ vec_foreach (op, remote_ops)
+ {
+ while (op->completed == 0)
+ vlib_process_suspend (vm, 5e-5);
+
+ log_debug (
+ dev, "port %u rx node runtime update on thread %u executed locally",
+ op->port->port_id, op->thread_index);
+ }
+
+ __atomic_store_n (&rt_ops, 0, __ATOMIC_RELAXED);
+ vec_free (remote_ops);
+ clib_bitmap_free (remote_bmp);
+ return VNET_DEV_OK;
+}
diff --git a/src/vnet/dev/types.h b/src/vnet/dev/types.h
new file mode 100644
index 00000000000..006d18e5bc5
--- /dev/null
+++ b/src/vnet/dev/types.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef _VNET_DEV_TYPES_H_
+#define _VNET_DEV_TYPES_H_
+
+#include <vppinfra/types.h>
+#include <vnet/dev/errors.h>
+
+typedef char vnet_dev_device_id_t[48];
+typedef char vnet_dev_if_name_t[32];
+typedef char vnet_dev_driver_name_t[16];
+typedef char vnet_dev_bus_name_t[8];
+typedef u16 vnet_dev_port_id_t;
+typedef struct vnet_dev vnet_dev_t;
+typedef struct vnet_dev_port vnet_dev_port_t;
+typedef struct vnet_dev_rx_queue vnet_dev_rx_queue_t;
+typedef struct vnet_dev_tx_queue vnet_dev_tx_queue_t;
+
+typedef enum
+{
+ VNET_DEV_MINUS_OK = 0,
+#define _(n, d) VNET_DEV_ERR_MINUS_##n,
+ foreach_vnet_dev_rv_type
+#undef _
+} vnet_dev_minus_rv_t;
+
+typedef enum
+{
+ VNET_DEV_OK = 0,
+#define _(n, d) VNET_DEV_ERR_##n = -(VNET_DEV_ERR_MINUS_##n),
+ foreach_vnet_dev_rv_type
+#undef _
+} vnet_dev_rv_t;
+
+/* do not change bit assignments - API dependency */
+#define foreach_vnet_dev_flag _ (0, NO_STATS, "don't poll device stats")
+
+typedef union
+{
+ enum
+ {
+#define _(b, n, d) VNET_DEV_F_##n = 1ull << (b),
+ foreach_vnet_dev_flag
+#undef _
+ } e;
+ u32 n;
+} vnet_dev_flags_t;
+
+/* do not change bit assignments - API dependency */
+#define foreach_vnet_dev_port_flag \
+ _ (0, INTERRUPT_MODE, "enable interrupt mode")
+
+typedef union
+{
+ enum
+ {
+#define _(b, n, d) VNET_DEV_PORT_F_##n = 1ull << (b),
+ foreach_vnet_dev_port_flag
+#undef _
+ } e;
+ u32 n;
+} vnet_dev_port_flags_t;
+
+#endif /* _VNET_DEV_TYPES_H_ */
diff --git a/src/vnet/devices/af_packet/af_packet.api b/src/vnet/devices/af_packet/af_packet.api
deleted file mode 100644
index 035096e17a8..00000000000
--- a/src/vnet/devices/af_packet/af_packet.api
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2015-2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-option version = "2.0.0";
-
-import "vnet/interface_types.api";
-import "vnet/ethernet/ethernet_types.api";
-
-/** \brief Create host-interface
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param hw_addr - interface MAC
- @param use_random_hw_addr - use random generated MAC
- @param host_if_name - interface name
-*/
-define af_packet_create
-{
- u32 client_index;
- u32 context;
-
- vl_api_mac_address_t hw_addr;
- bool use_random_hw_addr;
- string host_if_name[64];
-};
-
-/** \brief Create host-interface response
- @param context - sender context, to match reply w/ request
- @param retval - return value for request
-*/
-define af_packet_create_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
-};
-
-/** \brief Delete host-interface
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param host_if_name - interface name
-*/
-autoreply define af_packet_delete
-{
- u32 client_index;
- u32 context;
-
- string host_if_name[64];
-};
-
-/** \brief Set l4 offload checksum calculation
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
-*/
-autoreply define af_packet_set_l4_cksum_offload
-{
- u32 client_index;
- u32 context;
-
- vl_api_interface_index_t sw_if_index;
- bool set;
-};
-
-/** \brief Dump af_packet interfaces request */
-define af_packet_dump
-{
- u32 client_index;
- u32 context;
-};
-
-/** \brief Reply for af_packet dump request
- @param sw_if_index - software index of af_packet interface
- @param host_if_name - interface name
-*/
-define af_packet_details
-{
- u32 context;
- vl_api_interface_index_t sw_if_index;
- string host_if_name[64];
-};
-
-/*
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c
deleted file mode 100644
index 76677a43092..00000000000
--- a/src/vnet/devices/af_packet/af_packet.c
+++ /dev/null
@@ -1,578 +0,0 @@
-/*
- *------------------------------------------------------------------
- * af_packet.c - linux kernel packet interface
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <sys/ioctl.h>
-#include <net/if.h>
-#include <dirent.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <fcntl.h>
-
-#include <vppinfra/linux/sysfs.h>
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vnet/ip/ip.h>
-#include <vnet/devices/netlink.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/interface/rx_queue_funcs.h>
-
-#include <vnet/devices/af_packet/af_packet.h>
-
-af_packet_main_t af_packet_main;
-
-#define AF_PACKET_TX_FRAMES_PER_BLOCK 1024
-#define AF_PACKET_TX_FRAME_SIZE (2048 * 5)
-#define AF_PACKET_TX_BLOCK_NR 1
-#define AF_PACKET_TX_FRAME_NR (AF_PACKET_TX_BLOCK_NR * \
- AF_PACKET_TX_FRAMES_PER_BLOCK)
-#define AF_PACKET_TX_BLOCK_SIZE (AF_PACKET_TX_FRAME_SIZE * \
- AF_PACKET_TX_FRAMES_PER_BLOCK)
-
-#define AF_PACKET_RX_FRAMES_PER_BLOCK 1024
-#define AF_PACKET_RX_FRAME_SIZE (2048 * 5)
-#define AF_PACKET_RX_BLOCK_NR 1
-#define AF_PACKET_RX_FRAME_NR (AF_PACKET_RX_BLOCK_NR * \
- AF_PACKET_RX_FRAMES_PER_BLOCK)
-#define AF_PACKET_RX_BLOCK_SIZE (AF_PACKET_RX_FRAME_SIZE * \
- AF_PACKET_RX_FRAMES_PER_BLOCK)
-
-/*defined in net/if.h but clashes with dpdk headers */
-unsigned int if_nametoindex (const char *ifname);
-
-typedef struct tpacket_req tpacket_req_t;
-
-static u32
-af_packet_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi,
- u32 flags)
-{
- clib_error_t *error;
- af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif =
- pool_elt_at_index (apm->interfaces, hi->dev_instance);
-
- if (flags == ETHERNET_INTERFACE_FLAG_MTU)
- {
- error =
- vnet_netlink_set_link_mtu (apif->host_if_index, hi->max_packet_bytes);
-
- if (error)
- {
- vlib_log_err (apm->log_class, "netlink failed to change MTU: %U",
- format_clib_error, error);
- clib_error_free (error);
- return VNET_API_ERROR_SYSCALL_ERROR_1;
- }
- else
- apif->host_mtu = hi->max_packet_bytes;
- }
-
- return 0;
-}
-
-static int
-af_packet_read_mtu (af_packet_if_t *apif)
-{
- af_packet_main_t *apm = &af_packet_main;
- clib_error_t *error;
- error = vnet_netlink_get_link_mtu (apif->host_if_index, &apif->host_mtu);
- if (error)
- {
- vlib_log_err (apm->log_class, "netlink failed to get MTU: %U",
- format_clib_error, error);
- clib_error_free (error);
- return VNET_API_ERROR_SYSCALL_ERROR_1;
- }
- return 0;
-}
-
-static clib_error_t *
-af_packet_fd_read_ready (clib_file_t * uf)
-{
- af_packet_main_t *apm = &af_packet_main;
- vnet_main_t *vnm = vnet_get_main ();
- u32 idx = uf->private_data;
- af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, idx);
-
- apm->pending_input_bitmap =
- clib_bitmap_set (apm->pending_input_bitmap, idx, 1);
-
- /* Schedule the rx node */
- vnet_hw_if_rx_queue_set_int_pending (vnm, apif->queue_index);
- return 0;
-}
-
-static int
-is_bridge (const u8 * host_if_name)
-{
- u8 *s;
- DIR *dir = NULL;
-
- s = format (0, "/sys/class/net/%s/bridge%c", host_if_name, 0);
- dir = opendir ((char *) s);
- vec_free (s);
-
- if (dir)
- {
- closedir (dir);
- return 0;
- }
-
- return -1;
-}
-
-static int
-create_packet_v2_sock (int host_if_index, tpacket_req_t * rx_req,
- tpacket_req_t * tx_req, int *fd, u8 ** ring)
-{
- af_packet_main_t *apm = &af_packet_main;
- int ret;
- struct sockaddr_ll sll;
- int ver = TPACKET_V2;
- socklen_t req_sz = sizeof (struct tpacket_req);
- u32 ring_sz = rx_req->tp_block_size * rx_req->tp_block_nr +
- tx_req->tp_block_size * tx_req->tp_block_nr;
-
- if ((*fd = socket (AF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to create AF_PACKET socket: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- /* bind before rx ring is cfged so we don't receive packets from other interfaces */
- clib_memset (&sll, 0, sizeof (sll));
- sll.sll_family = PF_PACKET;
- sll.sll_protocol = htons (ETH_P_ALL);
- sll.sll_ifindex = host_if_index;
- if (bind (*fd, (struct sockaddr *) &sll, sizeof (sll)) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to bind rx packet socket: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- if (setsockopt (*fd, SOL_PACKET, PACKET_VERSION, &ver, sizeof (ver)) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to set rx packet interface version: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- int opt = 1;
- if (setsockopt (*fd, SOL_PACKET, PACKET_LOSS, &opt, sizeof (opt)) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to set packet tx ring error handling option: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- if (setsockopt (*fd, SOL_PACKET, PACKET_RX_RING, rx_req, req_sz) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to set packet rx ring options: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- if (setsockopt (*fd, SOL_PACKET, PACKET_TX_RING, tx_req, req_sz) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to set packet tx ring options: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- *ring =
- mmap (NULL, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, *fd,
- 0);
- if (*ring == MAP_FAILED)
- {
- vlib_log_debug (apm->log_class, "mmap failure: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- return 0;
-error:
- if (*fd >= 0)
- {
- close (*fd);
- *fd = -1;
- }
- return ret;
-}
-
-int
-af_packet_create_if (vlib_main_t * vm, u8 * host_if_name, u8 * hw_addr_set,
- u32 * sw_if_index)
-{
- af_packet_main_t *apm = &af_packet_main;
- int ret, fd = -1, fd2 = -1;
- struct tpacket_req *rx_req = 0;
- struct tpacket_req *tx_req = 0;
- struct ifreq ifr;
- u8 *ring = 0;
- af_packet_if_t *apif = 0;
- u8 hw_addr[6];
- clib_error_t *error;
- vnet_sw_interface_t *sw;
- vnet_hw_interface_t *hw;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- vnet_main_t *vnm = vnet_get_main ();
- uword *p;
- uword if_index;
- u8 *host_if_name_dup = 0;
- int host_if_index = -1;
-
- p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
- if (p)
- {
- apif = vec_elt_at_index (apm->interfaces, p[0]);
- *sw_if_index = apif->sw_if_index;
- return VNET_API_ERROR_IF_ALREADY_EXISTS;
- }
-
- host_if_name_dup = vec_dup (host_if_name);
-
- vec_validate (rx_req, 0);
- rx_req->tp_block_size = AF_PACKET_RX_BLOCK_SIZE;
- rx_req->tp_frame_size = AF_PACKET_RX_FRAME_SIZE;
- rx_req->tp_block_nr = AF_PACKET_RX_BLOCK_NR;
- rx_req->tp_frame_nr = AF_PACKET_RX_FRAME_NR;
-
- vec_validate (tx_req, 0);
- tx_req->tp_block_size = AF_PACKET_TX_BLOCK_SIZE;
- tx_req->tp_frame_size = AF_PACKET_TX_FRAME_SIZE;
- tx_req->tp_block_nr = AF_PACKET_TX_BLOCK_NR;
- tx_req->tp_frame_nr = AF_PACKET_TX_FRAME_NR;
-
- /*
- * make sure host side of interface is 'UP' before binding AF_PACKET
- * socket on it.
- */
- if ((fd2 = socket (AF_UNIX, SOCK_DGRAM, 0)) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to create AF_UNIX socket: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- clib_memcpy (ifr.ifr_name, (const char *) host_if_name,
- vec_len (host_if_name));
- if (ioctl (fd2, SIOCGIFINDEX, &ifr) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to retrieve the interface (%s) index: %s (errno %d)",
- host_if_name, strerror (errno), errno);
- ret = VNET_API_ERROR_INVALID_INTERFACE;
- goto error;
- }
-
- host_if_index = ifr.ifr_ifindex;
- if (ioctl (fd2, SIOCGIFFLAGS, &ifr) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to get the active flag: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- if (!(ifr.ifr_flags & IFF_UP))
- {
- ifr.ifr_flags |= IFF_UP;
- if (ioctl (fd2, SIOCSIFFLAGS, &ifr) < 0)
- {
- vlib_log_debug (apm->log_class,
- "Failed to set the active flag: %s (errno %d)",
- strerror (errno), errno);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
- }
-
- if (fd2 > -1)
- {
- close (fd2);
- fd2 = -1;
- }
-
- ret = create_packet_v2_sock (host_if_index, rx_req, tx_req, &fd, &ring);
-
- if (ret != 0)
- goto error;
-
- ret = is_bridge (host_if_name);
-
- if (ret == 0) /* is a bridge, ignore state */
- host_if_index = -1;
-
- /* So far everything looks good, let's create interface */
- pool_get (apm->interfaces, apif);
- if_index = apif - apm->interfaces;
-
- apif->host_if_index = host_if_index;
- apif->fd = fd;
- apif->rx_ring = ring;
- apif->tx_ring = ring + rx_req->tp_block_size * rx_req->tp_block_nr;
- apif->rx_req = rx_req;
- apif->tx_req = tx_req;
- apif->host_if_name = host_if_name_dup;
- apif->per_interface_next_index = ~0;
- apif->next_tx_frame = 0;
- apif->next_rx_frame = 0;
-
- ret = af_packet_read_mtu (apif);
- if (ret != 0)
- goto error;
-
- if (tm->n_vlib_mains > 1)
- clib_spinlock_init (&apif->lockp);
-
- /*use configured or generate random MAC address */
- if (hw_addr_set)
- clib_memcpy (hw_addr, hw_addr_set, 6);
- else
- {
- f64 now = vlib_time_now (vm);
- u32 rnd;
- rnd = (u32) (now * 1e6);
- rnd = random_u32 (&rnd);
-
- clib_memcpy (hw_addr + 2, &rnd, sizeof (rnd));
- hw_addr[0] = 2;
- hw_addr[1] = 0xfe;
- }
-
- error = ethernet_register_interface (vnm, af_packet_device_class.index,
- if_index, hw_addr, &apif->hw_if_index,
- af_packet_eth_flag_change);
-
- if (error)
- {
- clib_memset (apif, 0, sizeof (*apif));
- pool_put (apm->interfaces, apif);
- vlib_log_err (apm->log_class, "Unable to register interface: %U",
- format_clib_error, error);
- clib_error_free (error);
- ret = VNET_API_ERROR_SYSCALL_ERROR_1;
- goto error;
- }
-
- sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index);
- hw = vnet_get_hw_interface (vnm, apif->hw_if_index);
- apif->sw_if_index = sw->sw_if_index;
- vnet_hw_if_set_input_node (vnm, apif->hw_if_index,
- af_packet_input_node.index);
- apif->queue_index = vnet_hw_if_register_rx_queue (vnm, apif->hw_if_index, 0,
- VNET_HW_IF_RXQ_THREAD_ANY);
-
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
- vnet_hw_interface_set_flags (vnm, apif->hw_if_index,
- VNET_HW_INTERFACE_FLAG_LINK_UP);
-
- vnet_hw_if_set_rx_queue_mode (vnm, apif->queue_index,
- VNET_HW_IF_RX_MODE_INTERRUPT);
- vnet_hw_if_update_runtime_data (vnm, apif->hw_if_index);
- {
- clib_file_t template = { 0 };
- template.read_function = af_packet_fd_read_ready;
- template.file_descriptor = fd;
- template.private_data = if_index;
- template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED;
- template.description =
- format (0, "%U", format_af_packet_device_name, if_index);
- apif->clib_file_index = clib_file_add (&file_main, &template);
- }
- vnet_hw_if_set_rx_queue_file_index (vnm, apif->queue_index,
- apif->clib_file_index);
-
- mhash_set_mem (&apm->if_index_by_host_if_name, host_if_name_dup, &if_index,
- 0);
- if (sw_if_index)
- *sw_if_index = apif->sw_if_index;
-
- return 0;
-
-error:
- if (fd2 > -1)
- {
- close (fd2);
- fd2 = -1;
- }
- vec_free (host_if_name_dup);
- vec_free (rx_req);
- vec_free (tx_req);
- return ret;
-}
-
-int
-af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name)
-{
- vnet_main_t *vnm = vnet_get_main ();
- af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif;
- uword *p;
- uword if_index;
- u32 ring_sz;
-
- p = mhash_get (&apm->if_index_by_host_if_name, host_if_name);
- if (p == NULL)
- {
- vlib_log_warn (apm->log_class, "Host interface %s does not exist",
- host_if_name);
- return VNET_API_ERROR_SYSCALL_ERROR_1;
- }
- apif = pool_elt_at_index (apm->interfaces, p[0]);
- if_index = apif - apm->interfaces;
-
- /* bring down the interface */
- vnet_hw_interface_set_flags (vnm, apif->hw_if_index, 0);
-
- /* clean up */
- if (apif->clib_file_index != ~0)
- {
- clib_file_del (&file_main, file_main.file_pool + apif->clib_file_index);
- apif->clib_file_index = ~0;
- }
- else
- close (apif->fd);
-
- ring_sz = apif->rx_req->tp_block_size * apif->rx_req->tp_block_nr +
- apif->tx_req->tp_block_size * apif->tx_req->tp_block_nr;
- if (munmap (apif->rx_ring, ring_sz))
- vlib_log_warn (apm->log_class,
- "Host interface %s could not free rx/tx ring",
- host_if_name);
- apif->rx_ring = NULL;
- apif->tx_ring = NULL;
- apif->fd = -1;
-
- vec_free (apif->rx_req);
- apif->rx_req = NULL;
- vec_free (apif->tx_req);
- apif->tx_req = NULL;
-
- vec_free (apif->host_if_name);
- apif->host_if_name = NULL;
- apif->host_if_index = -1;
-
- mhash_unset (&apm->if_index_by_host_if_name, host_if_name, &if_index);
-
- ethernet_delete_interface (vnm, apif->hw_if_index);
-
- pool_put (apm->interfaces, apif);
-
- return 0;
-}
-
-int
-af_packet_set_l4_cksum_offload (vlib_main_t * vm, u32 sw_if_index, u8 set)
-{
- vnet_main_t *vnm = vnet_get_main ();
- vnet_hw_interface_t *hw;
-
- hw = vnet_get_sup_hw_interface (vnm, sw_if_index);
-
- if (hw->dev_class_index != af_packet_device_class.index)
- return VNET_API_ERROR_INVALID_INTERFACE;
-
- if (set)
- {
- hw->caps &= ~(VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM);
- }
- else
- {
- hw->caps |= (VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM);
- }
- return 0;
-}
-
-int
-af_packet_dump_ifs (af_packet_if_detail_t ** out_af_packet_ifs)
-{
- af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif;
- af_packet_if_detail_t *r_af_packet_ifs = NULL;
- af_packet_if_detail_t *af_packet_if = NULL;
-
- /* *INDENT-OFF* */
- pool_foreach (apif, apm->interfaces)
- {
- vec_add2 (r_af_packet_ifs, af_packet_if, 1);
- af_packet_if->sw_if_index = apif->sw_if_index;
- if (apif->host_if_name)
- {
- clib_memcpy (af_packet_if->host_if_name, apif->host_if_name,
- MIN (ARRAY_LEN (af_packet_if->host_if_name) - 1,
- strlen ((const char *) apif->host_if_name)));
- }
- }
- /* *INDENT-ON* */
-
- *out_af_packet_ifs = r_af_packet_ifs;
-
- return 0;
-}
-
-static clib_error_t *
-af_packet_init (vlib_main_t * vm)
-{
- af_packet_main_t *apm = &af_packet_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
-
- clib_memset (apm, 0, sizeof (af_packet_main_t));
-
- mhash_init_vec_string (&apm->if_index_by_host_if_name, sizeof (uword));
-
- vec_validate_aligned (apm->rx_buffers, tm->n_vlib_mains - 1,
- CLIB_CACHE_LINE_BYTES);
-
- apm->log_class = vlib_log_register_class ("af_packet", 0);
- vlib_log_debug (apm->log_class, "initialized");
-
- return 0;
-}
-
-VLIB_INIT_FUNCTION (af_packet_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h
deleted file mode 100644
index fc35b48b85e..00000000000
--- a/src/vnet/devices/af_packet/af_packet.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- *------------------------------------------------------------------
- * af_packet.h - linux kernel packet interface header file
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vppinfra/lock.h>
-
-#include <vlib/log.h>
-
-typedef struct
-{
- u32 sw_if_index;
- u8 host_if_name[64];
-} af_packet_if_detail_t;
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- clib_spinlock_t lockp;
- u8 *host_if_name;
- int host_if_index;
- int fd;
- struct tpacket_req *rx_req;
- struct tpacket_req *tx_req;
- u8 *rx_ring;
- u8 *tx_ring;
- u32 hw_if_index;
- u32 sw_if_index;
- u32 clib_file_index;
-
- u32 next_rx_frame;
- u32 next_tx_frame;
-
- u32 per_interface_next_index;
- u8 is_admin_up;
- u32 queue_index;
- u32 host_mtu;
-} af_packet_if_t;
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- af_packet_if_t *interfaces;
-
- /* bitmap of pending rx interfaces */
- uword *pending_input_bitmap;
-
- /* rx buffer cache */
- u32 **rx_buffers;
-
- /* hash of host interface names */
- mhash_t if_index_by_host_if_name;
-
- /** log class */
- vlib_log_class_t log_class;
-} af_packet_main_t;
-
-extern af_packet_main_t af_packet_main;
-extern vnet_device_class_t af_packet_device_class;
-extern vlib_node_registration_t af_packet_input_node;
-
-int af_packet_create_if (vlib_main_t * vm, u8 * host_if_name,
- u8 * hw_addr_set, u32 * sw_if_index);
-int af_packet_delete_if (vlib_main_t * vm, u8 * host_if_name);
-int af_packet_set_l4_cksum_offload (vlib_main_t * vm, u32 sw_if_index,
- u8 set);
-int af_packet_dump_ifs (af_packet_if_detail_t ** out_af_packet_ifs);
-
-format_function_t format_af_packet_device_name;
-
-#define MIN(x,y) (((x)<(y))?(x):(y))
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/af_packet/af_packet_api.c b/src/vnet/devices/af_packet/af_packet_api.c
deleted file mode 100644
index d4d041ffb3c..00000000000
--- a/src/vnet/devices/af_packet/af_packet_api.c
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- *------------------------------------------------------------------
- * af_packet_api.c - af-packet api
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vnet/vnet.h>
-#include <vlibmemory/api.h>
-
-#include <vnet/interface.h>
-#include <vnet/api_errno.h>
-#include <vnet/devices/af_packet/af_packet.h>
-
-#include <vnet/format_fns.h>
-#include <vnet/devices/af_packet/af_packet.api_enum.h>
-#include <vnet/devices/af_packet/af_packet.api_types.h>
-
-#define REPLY_MSG_ID_BASE msg_id_base
-#include <vlibapi/api_helper_macros.h>
-
-static u16 msg_id_base;
-
-static void
-vl_api_af_packet_create_t_handler (vl_api_af_packet_create_t * mp)
-{
- vlib_main_t *vm = vlib_get_main ();
- vl_api_af_packet_create_reply_t *rmp;
- int rv = 0;
- u8 *host_if_name = NULL;
- u32 sw_if_index;
-
- host_if_name = format (0, "%s", mp->host_if_name);
- vec_add1 (host_if_name, 0);
-
- rv = af_packet_create_if (vm, host_if_name,
- mp->use_random_hw_addr ? 0 : mp->hw_addr,
- &sw_if_index);
-
- vec_free (host_if_name);
-
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_AF_PACKET_CREATE_REPLY,
- ({
- rmp->sw_if_index = clib_host_to_net_u32(sw_if_index);
- }));
- /* *INDENT-ON* */
-}
-
-static void
-vl_api_af_packet_delete_t_handler (vl_api_af_packet_delete_t * mp)
-{
- vlib_main_t *vm = vlib_get_main ();
- vl_api_af_packet_delete_reply_t *rmp;
- int rv = 0;
- u8 *host_if_name = NULL;
-
- host_if_name = format (0, "%s", mp->host_if_name);
- vec_add1 (host_if_name, 0);
-
- rv = af_packet_delete_if (vm, host_if_name);
-
- vec_free (host_if_name);
-
- REPLY_MACRO (VL_API_AF_PACKET_DELETE_REPLY);
-}
-
-static void
- vl_api_af_packet_set_l4_cksum_offload_t_handler
- (vl_api_af_packet_set_l4_cksum_offload_t * mp)
-{
- vlib_main_t *vm = vlib_get_main ();
- vl_api_af_packet_delete_reply_t *rmp;
- int rv = 0;
-
- rv = af_packet_set_l4_cksum_offload (vm, ntohl (mp->sw_if_index), mp->set);
- REPLY_MACRO (VL_API_AF_PACKET_SET_L4_CKSUM_OFFLOAD_REPLY);
-}
-
-static void
-af_packet_send_details (vpe_api_main_t * am,
- vl_api_registration_t * reg,
- af_packet_if_detail_t * af_packet_if, u32 context)
-{
- vl_api_af_packet_details_t *mp;
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = htons (REPLY_MSG_ID_BASE + VL_API_AF_PACKET_DETAILS);
- mp->sw_if_index = htonl (af_packet_if->sw_if_index);
- clib_memcpy (mp->host_if_name, af_packet_if->host_if_name,
- MIN (ARRAY_LEN (mp->host_if_name) - 1,
- strlen ((const char *) af_packet_if->host_if_name)));
-
- mp->context = context;
- vl_api_send_msg (reg, (u8 *) mp);
-}
-
-
-static void
-vl_api_af_packet_dump_t_handler (vl_api_af_packet_dump_t * mp)
-{
- int rv;
- vpe_api_main_t *am = &vpe_api_main;
- vl_api_registration_t *reg;
- af_packet_if_detail_t *out_af_packet_ifs = NULL;
- af_packet_if_detail_t *af_packet_if = NULL;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- rv = af_packet_dump_ifs (&out_af_packet_ifs);
- if (rv)
- return;
-
- vec_foreach (af_packet_if, out_af_packet_ifs)
- {
- af_packet_send_details (am, reg, af_packet_if, mp->context);
- }
-
- vec_free (out_af_packet_ifs);
-}
-
-#include <vnet/devices/af_packet/af_packet.api.c>
-static clib_error_t *
-af_packet_api_hookup (vlib_main_t * vm)
-{
- /*
- * Set up the (msg_name, crc, message-id) table
- */
- REPLY_MSG_ID_BASE = setup_message_id_table ();
-
- return 0;
-}
-
-VLIB_API_INIT_FUNCTION (af_packet_api_hookup);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c
deleted file mode 100644
index b6b99a0465c..00000000000
--- a/src/vnet/devices/af_packet/device.c
+++ /dev/null
@@ -1,398 +0,0 @@
-/*
- *------------------------------------------------------------------
- * af_packet.c - linux kernel packet interface
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <linux/if_packet.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-#include <net/if.h>
-#include <net/if_arp.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
-
-#include <vnet/devices/af_packet/af_packet.h>
-
-#define foreach_af_packet_tx_func_error \
-_(FRAME_NOT_READY, "tx frame not ready") \
-_(TXRING_EAGAIN, "tx sendto temporary failure") \
-_(TXRING_FATAL, "tx sendto fatal failure") \
-_(TXRING_OVERRUN, "tx ring overrun")
-
-typedef enum
-{
-#define _(f,s) AF_PACKET_TX_ERROR_##f,
- foreach_af_packet_tx_func_error
-#undef _
- AF_PACKET_TX_N_ERROR,
-} af_packet_tx_func_error_t;
-
-static char *af_packet_tx_func_error_strings[] = {
-#define _(n,s) s,
- foreach_af_packet_tx_func_error
-#undef _
-};
-
-
-#ifndef CLIB_MARCH_VARIANT
-u8 *
-format_af_packet_device_name (u8 * s, va_list * args)
-{
- u32 i = va_arg (*args, u32);
- af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, i);
-
- s = format (s, "host-%s", apif->host_if_name);
- return s;
-}
-#endif /* CLIB_MARCH_VARIANT */
-
-static u8 *
-format_af_packet_device (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- u32 indent = format_get_indent (s);
- int __clib_unused verbose = va_arg (*args, int);
-
- af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, dev_instance);
- clib_spinlock_lock_if_init (&apif->lockp);
- u32 block_size = apif->tx_req->tp_block_size;
- u32 frame_size = apif->tx_req->tp_frame_size;
- u32 frame_num = apif->tx_req->tp_frame_nr;
- int block = 0;
- u8 *block_start = apif->tx_ring + block * block_size;
- u32 tx_frame = apif->next_tx_frame;
- struct tpacket2_hdr *tph;
-
- s = format (s, "Linux PACKET socket interface\n");
- s = format (s, "%Ublock:%d frame:%d\n", format_white_space, indent,
- block_size, frame_size);
- s = format (s, "%Unext frame:%d\n", format_white_space, indent,
- apif->next_tx_frame);
-
- int n_send_req = 0, n_avail = 0, n_sending = 0, n_tot = 0, n_wrong = 0;
- do
- {
- tph = (struct tpacket2_hdr *) (block_start + tx_frame * frame_size);
- tx_frame = (tx_frame + 1) % frame_num;
- if (tph->tp_status == 0)
- n_avail++;
- else if (tph->tp_status & TP_STATUS_SEND_REQUEST)
- n_send_req++;
- else if (tph->tp_status & TP_STATUS_SENDING)
- n_sending++;
- else
- n_wrong++;
- n_tot++;
- }
- while (tx_frame != apif->next_tx_frame);
- s = format (s, "%Uavailable:%d request:%d sending:%d wrong:%d total:%d\n",
- format_white_space, indent, n_avail, n_send_req, n_sending,
- n_wrong, n_tot);
-
- clib_spinlock_unlock_if_init (&apif->lockp);
- return s;
-}
-
-static u8 *
-format_af_packet_tx_trace (u8 * s, va_list * args)
-{
- s = format (s, "Unimplemented...");
- return s;
-}
-
-VNET_DEVICE_CLASS_TX_FN (af_packet_device_class) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- af_packet_main_t *apm = &af_packet_main;
- u32 *buffers = vlib_frame_vector_args (frame);
- u32 n_left = frame->n_vectors;
- u32 n_sent = 0;
- vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
- af_packet_if_t *apif =
- pool_elt_at_index (apm->interfaces, rd->dev_instance);
- clib_spinlock_lock_if_init (&apif->lockp);
- int block = 0;
- u32 block_size = apif->tx_req->tp_block_size;
- u32 frame_size = apif->tx_req->tp_frame_size;
- u32 frame_num = apif->tx_req->tp_frame_nr;
- u8 *block_start = apif->tx_ring + block * block_size;
- u32 tx_frame = apif->next_tx_frame;
- struct tpacket2_hdr *tph;
- u32 frame_not_ready = 0;
-
- while (n_left)
- {
- u32 len;
- u32 offset = 0;
- vlib_buffer_t *b0;
- n_left--;
- u32 bi = buffers[0];
- buffers++;
-
- nextframe:
- tph = (struct tpacket2_hdr *) (block_start + tx_frame * frame_size);
- if (PREDICT_FALSE (tph->tp_status &
- (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)))
- {
- tx_frame = (tx_frame + 1) % frame_num;
- frame_not_ready++;
- /* check if we've exhausted the ring */
- if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num))
- break;
- goto nextframe;
- }
-
- do
- {
- b0 = vlib_get_buffer (vm, bi);
- len = b0->current_length;
- clib_memcpy_fast ((u8 *) tph +
- TPACKET_ALIGN (sizeof (struct tpacket2_hdr)) +
- offset, vlib_buffer_get_current (b0), len);
- offset += len;
- }
- while ((bi =
- (b0->flags & VLIB_BUFFER_NEXT_PRESENT) ? b0->next_buffer : 0));
-
- tph->tp_len = tph->tp_snaplen = offset;
- tph->tp_status = TP_STATUS_SEND_REQUEST;
- n_sent++;
-
- tx_frame = (tx_frame + 1) % frame_num;
-
- /* check if we've exhausted the ring */
- if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num))
- break;
- }
-
- CLIB_MEMORY_BARRIER ();
-
- apif->next_tx_frame = tx_frame;
-
- if (PREDICT_TRUE (n_sent))
- if (PREDICT_FALSE (sendto (apif->fd, NULL, 0, MSG_DONTWAIT, NULL, 0) ==
- -1))
- {
- /* Uh-oh, drop & move on, but count whether it was fatal or not.
- * Note that we have no reliable way to properly determine the
- * disposition of the packets we just enqueued for delivery.
- */
- vlib_error_count (vm, node->node_index,
- unix_error_is_fatal (errno) ?
- AF_PACKET_TX_ERROR_TXRING_FATAL :
- AF_PACKET_TX_ERROR_TXRING_EAGAIN,
- n_sent);
- }
-
- clib_spinlock_unlock_if_init (&apif->lockp);
-
- if (PREDICT_FALSE (frame_not_ready))
- vlib_error_count (vm, node->node_index,
- AF_PACKET_TX_ERROR_FRAME_NOT_READY, frame_not_ready);
-
- if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num))
- vlib_error_count (vm, node->node_index, AF_PACKET_TX_ERROR_TXRING_OVERRUN,
- n_left);
-
- vlib_buffer_free (vm, vlib_frame_vector_args (frame), frame->n_vectors);
- return frame->n_vectors;
-}
-
-static void
-af_packet_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
- u32 node_index)
-{
- af_packet_main_t *apm = &af_packet_main;
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
- af_packet_if_t *apif =
- pool_elt_at_index (apm->interfaces, hw->dev_instance);
-
- /* Shut off redirection */
- if (node_index == ~0)
- {
- apif->per_interface_next_index = node_index;
- return;
- }
-
- apif->per_interface_next_index =
- vlib_node_add_next (vlib_get_main (), af_packet_input_node.index,
- node_index);
-}
-
-static void
-af_packet_clear_hw_interface_counters (u32 instance)
-{
- /* Nothing for now */
-}
-
-static clib_error_t *
-af_packet_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
- u32 flags)
-{
- af_packet_main_t *apm = &af_packet_main;
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
- af_packet_if_t *apif =
- pool_elt_at_index (apm->interfaces, hw->dev_instance);
- u32 hw_flags;
- int rv, fd = socket (AF_UNIX, SOCK_DGRAM, 0);
- struct ifreq ifr;
-
- if (0 > fd)
- {
- vlib_log_warn (apm->log_class, "af_packet_%s could not open socket",
- apif->host_if_name);
- return 0;
- }
-
- /* if interface is a bridge ignore */
- if (apif->host_if_index < 0)
- goto error; /* no error */
-
- /* use host_if_index in case host name has changed */
- ifr.ifr_ifindex = apif->host_if_index;
- if ((rv = ioctl (fd, SIOCGIFNAME, &ifr)) < 0)
- {
- vlib_log_warn (apm->log_class,
- "af_packet_%s ioctl could not retrieve eth name",
- apif->host_if_name);
- goto error;
- }
-
- apif->is_admin_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
-
- if ((rv = ioctl (fd, SIOCGIFFLAGS, &ifr)) < 0)
- {
- vlib_log_warn (apm->log_class, "af_packet_%s error: %d",
- apif->is_admin_up ? "up" : "down", rv);
- goto error;
- }
-
- if (apif->is_admin_up)
- {
- hw_flags = VNET_HW_INTERFACE_FLAG_LINK_UP;
- ifr.ifr_flags |= IFF_UP;
- }
- else
- {
- hw_flags = 0;
- ifr.ifr_flags &= ~IFF_UP;
- }
-
- if ((rv = ioctl (fd, SIOCSIFFLAGS, &ifr)) < 0)
- {
- vlib_log_warn (apm->log_class, "af_packet_%s error: %d",
- apif->is_admin_up ? "up" : "down", rv);
- goto error;
- }
-
- vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
-
-error:
- if (0 <= fd)
- close (fd);
-
- return 0; /* no error */
-}
-
-static clib_error_t *
-af_packet_subif_add_del_function (vnet_main_t * vnm,
- u32 hw_if_index,
- struct vnet_sw_interface_t *st, int is_add)
-{
- /* Nothing for now */
- return 0;
-}
-
-static clib_error_t *af_packet_set_mac_address_function
- (struct vnet_hw_interface_t *hi, const u8 * old_address, const u8 * address)
-{
- af_packet_main_t *apm = &af_packet_main;
- af_packet_if_t *apif =
- pool_elt_at_index (apm->interfaces, hi->dev_instance);
- int rv, fd = socket (AF_UNIX, SOCK_DGRAM, 0);
- struct ifreq ifr;
-
- if (0 > fd)
- {
- vlib_log_warn (apm->log_class, "af_packet_%s could not open socket",
- apif->host_if_name);
- return 0;
- }
-
- /* if interface is a bridge ignore */
- if (apif->host_if_index < 0)
- goto error; /* no error */
-
- /* use host_if_index in case host name has changed */
- ifr.ifr_ifindex = apif->host_if_index;
- if ((rv = ioctl (fd, SIOCGIFNAME, &ifr)) < 0)
- {
- vlib_log_warn
- (apm->log_class,
- "af_packet_%s ioctl could not retrieve eth name, error: %d",
- apif->host_if_name, rv);
- goto error;
- }
-
- clib_memcpy (ifr.ifr_hwaddr.sa_data, address, 6);
- ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
-
- if ((rv = ioctl (fd, SIOCSIFHWADDR, &ifr)) < 0)
- {
- vlib_log_warn (apm->log_class,
- "af_packet_%s ioctl could not set mac, error: %d",
- apif->host_if_name, rv);
- goto error;
- }
-
-error:
-
- if (0 <= fd)
- close (fd);
-
- return 0; /* no error */
-}
-
-/* *INDENT-OFF* */
-VNET_DEVICE_CLASS (af_packet_device_class) = {
- .name = "af-packet",
- .format_device_name = format_af_packet_device_name,
- .format_device = format_af_packet_device,
- .format_tx_trace = format_af_packet_tx_trace,
- .tx_function_n_errors = AF_PACKET_TX_N_ERROR,
- .tx_function_error_strings = af_packet_tx_func_error_strings,
- .rx_redirect_to_node = af_packet_set_interface_next_node,
- .clear_counters = af_packet_clear_hw_interface_counters,
- .admin_up_down_function = af_packet_interface_admin_up_down,
- .subif_add_del_function = af_packet_subif_add_del_function,
- .mac_addr_change_function = af_packet_set_mac_address_function,
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c
deleted file mode 100644
index 9fd115f9509..00000000000
--- a/src/vnet/devices/af_packet/node.c
+++ /dev/null
@@ -1,410 +0,0 @@
-/*
- *------------------------------------------------------------------
- * af_packet.c - linux kernel packet interface
- *
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <linux/if_packet.h>
-
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/interface/rx_queue_funcs.h>
-#include <vnet/feature/feature.h>
-#include <vnet/ethernet/packet.h>
-
-#include <vnet/devices/af_packet/af_packet.h>
-
-#define foreach_af_packet_input_error \
- _(PARTIAL_PKT, "partial packet")
-
-typedef enum
-{
-#define _(f,s) AF_PACKET_INPUT_ERROR_##f,
- foreach_af_packet_input_error
-#undef _
- AF_PACKET_INPUT_N_ERROR,
-} af_packet_input_error_t;
-
-static char *af_packet_input_error_strings[] = {
-#define _(n,s) s,
- foreach_af_packet_input_error
-#undef _
-};
-
-typedef struct
-{
- u32 next_index;
- u32 hw_if_index;
- int block;
- struct tpacket2_hdr tph;
-} af_packet_input_trace_t;
-
-static u8 *
-format_af_packet_input_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- af_packet_input_trace_t *t = va_arg (*args, af_packet_input_trace_t *);
- u32 indent = format_get_indent (s);
-
- s = format (s, "af_packet: hw_if_index %d next-index %d",
- t->hw_if_index, t->next_index);
-
- s =
- format (s,
- "\n%Utpacket2_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u"
- "\n%Usec 0x%x nsec 0x%x vlan %U"
-#ifdef TP_STATUS_VLAN_TPID_VALID
- " vlan_tpid %u"
-#endif
- ,
- format_white_space, indent + 2,
- format_white_space, indent + 4,
- t->tph.tp_status,
- t->tph.tp_len,
- t->tph.tp_snaplen,
- t->tph.tp_mac,
- t->tph.tp_net,
- format_white_space, indent + 4,
- t->tph.tp_sec,
- t->tph.tp_nsec, format_ethernet_vlan_tci, t->tph.tp_vlan_tci
-#ifdef TP_STATUS_VLAN_TPID_VALID
- , t->tph.tp_vlan_tpid
-#endif
- );
- return s;
-}
-
-always_inline void
-buffer_add_to_chain (vlib_main_t * vm, u32 bi, u32 first_bi, u32 prev_bi)
-{
- vlib_buffer_t *b = vlib_get_buffer (vm, bi);
- vlib_buffer_t *first_b = vlib_get_buffer (vm, first_bi);
- vlib_buffer_t *prev_b = vlib_get_buffer (vm, prev_bi);
-
- /* update first buffer */
- first_b->total_length_not_including_first_buffer += b->current_length;
-
- /* update previous buffer */
- prev_b->next_buffer = bi;
- prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
-
- /* update current buffer */
- b->next_buffer = 0;
-}
-
-static_always_inline void
-fill_gso_buffer_flags (vlib_buffer_t *b, u32 gso_size, u8 l4_hdr_sz)
-{
- b->flags |= VNET_BUFFER_F_GSO;
- vnet_buffer2 (b)->gso_size = gso_size;
- vnet_buffer2 (b)->gso_l4_hdr_sz = l4_hdr_sz;
-}
-
-static_always_inline void
-mark_tcp_udp_cksum_calc (vlib_buffer_t *b, u8 *l4_hdr_sz)
-{
- ethernet_header_t *eth = vlib_buffer_get_current (b);
- vnet_buffer_oflags_t oflags = 0;
- if (clib_net_to_host_u16 (eth->type) == ETHERNET_TYPE_IP4)
- {
- ip4_header_t *ip4 =
- (vlib_buffer_get_current (b) + sizeof (ethernet_header_t));
- b->flags |= VNET_BUFFER_F_IS_IP4;
- if (ip4->protocol == IP_PROTOCOL_TCP)
- {
- oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
- tcp_header_t *tcp = (tcp_header_t *) (vlib_buffer_get_current (b) +
- sizeof (ethernet_header_t) +
- ip4_header_bytes (ip4));
- tcp->checksum = 0;
- *l4_hdr_sz = tcp_header_bytes (tcp);
- }
- else if (ip4->protocol == IP_PROTOCOL_UDP)
- {
- oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
- udp_header_t *udp = (udp_header_t *) (vlib_buffer_get_current (b) +
- sizeof (ethernet_header_t) +
- ip4_header_bytes (ip4));
- udp->checksum = 0;
- *l4_hdr_sz = sizeof (*udp);
- }
- vnet_buffer (b)->l3_hdr_offset = sizeof (ethernet_header_t);
- vnet_buffer (b)->l4_hdr_offset =
- sizeof (ethernet_header_t) + ip4_header_bytes (ip4);
- if (oflags)
- vnet_buffer_offload_flags_set (b, oflags);
- }
- else if (clib_net_to_host_u16 (eth->type) == ETHERNET_TYPE_IP6)
- {
- ip6_header_t *ip6 =
- (vlib_buffer_get_current (b) + sizeof (ethernet_header_t));
- b->flags |= VNET_BUFFER_F_IS_IP6;
- u16 ip6_hdr_len = sizeof (ip6_header_t);
- if (ip6_ext_hdr (ip6->protocol))
- {
- ip6_ext_header_t *p = (void *) (ip6 + 1);
- ip6_hdr_len += ip6_ext_header_len (p);
- while (ip6_ext_hdr (p->next_hdr))
- {
- ip6_hdr_len += ip6_ext_header_len (p);
- p = ip6_ext_next_header (p);
- }
- }
- if (ip6->protocol == IP_PROTOCOL_TCP)
- {
- oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
- tcp_header_t *tcp =
- (tcp_header_t *) (vlib_buffer_get_current (b) +
- sizeof (ethernet_header_t) + ip6_hdr_len);
- tcp->checksum = 0;
- *l4_hdr_sz = tcp_header_bytes (tcp);
- }
- else if (ip6->protocol == IP_PROTOCOL_UDP)
- {
- oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
- udp_header_t *udp =
- (udp_header_t *) (vlib_buffer_get_current (b) +
- sizeof (ethernet_header_t) + ip6_hdr_len);
- udp->checksum = 0;
- *l4_hdr_sz = sizeof (*udp);
- }
- vnet_buffer (b)->l3_hdr_offset = sizeof (ethernet_header_t);
- vnet_buffer (b)->l4_hdr_offset =
- sizeof (ethernet_header_t) + ip6_hdr_len;
- if (oflags)
- vnet_buffer_offload_flags_set (b, oflags);
- }
-}
-
-always_inline uword
-af_packet_device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, af_packet_if_t * apif)
-{
- af_packet_main_t *apm = &af_packet_main;
- struct tpacket2_hdr *tph;
- u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- u32 block = 0;
- u32 rx_frame;
- u32 n_free_bufs;
- u32 n_rx_packets = 0;
- u32 n_rx_bytes = 0;
- u32 *to_next = 0;
- u32 block_size = apif->rx_req->tp_block_size;
- u32 frame_size = apif->rx_req->tp_frame_size;
- u32 frame_num = apif->rx_req->tp_frame_nr;
- u8 *block_start = apif->rx_ring + block * block_size;
- uword n_trace = vlib_get_trace_count (vm, node);
- u32 thread_index = vm->thread_index;
- u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm);
- u32 min_bufs = apif->rx_req->tp_frame_size / n_buffer_bytes;
-
- n_free_bufs = vec_len (apm->rx_buffers[thread_index]);
- if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE))
- {
- vec_validate (apm->rx_buffers[thread_index],
- VLIB_FRAME_SIZE + n_free_bufs - 1);
- n_free_bufs +=
- vlib_buffer_alloc (vm, &apm->rx_buffers[thread_index][n_free_bufs],
- VLIB_FRAME_SIZE);
- _vec_len (apm->rx_buffers[thread_index]) = n_free_bufs;
- }
-
- rx_frame = apif->next_rx_frame;
- tph = (struct tpacket2_hdr *) (block_start + rx_frame * frame_size);
- while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs))
- {
- vlib_buffer_t *b0 = 0, *first_b0 = 0;
- u32 next0 = next_index;
-
- u32 n_left_to_next;
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
- while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs) &&
- n_left_to_next)
- {
- u32 data_len = tph->tp_snaplen;
- u32 offset = 0;
- u32 bi0 = 0, first_bi0 = 0, prev_bi0;
- u8 l4_hdr_sz = 0;
-
- while (data_len)
- {
- /* grab free buffer */
- u32 last_empty_buffer =
- vec_len (apm->rx_buffers[thread_index]) - 1;
- prev_bi0 = bi0;
- bi0 = apm->rx_buffers[thread_index][last_empty_buffer];
- b0 = vlib_get_buffer (vm, bi0);
- _vec_len (apm->rx_buffers[thread_index]) = last_empty_buffer;
- n_free_bufs--;
-
- /* copy data */
- u32 bytes_to_copy =
- data_len > n_buffer_bytes ? n_buffer_bytes : data_len;
- u32 vlan_len = 0;
- u32 bytes_copied = 0;
- b0->current_data = 0;
- /* Kernel removes VLAN headers, so reconstruct VLAN */
- if (PREDICT_FALSE (tph->tp_status & TP_STATUS_VLAN_VALID))
- {
- if (PREDICT_TRUE (offset == 0))
- {
- clib_memcpy_fast (vlib_buffer_get_current (b0),
- (u8 *) tph + tph->tp_mac,
- sizeof (ethernet_header_t));
- ethernet_header_t *eth = vlib_buffer_get_current (b0);
- ethernet_vlan_header_t *vlan =
- (ethernet_vlan_header_t *) (eth + 1);
- vlan->priority_cfi_and_id =
- clib_host_to_net_u16 (tph->tp_vlan_tci);
- vlan->type = eth->type;
- eth->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN);
- vlan_len = sizeof (ethernet_vlan_header_t);
- bytes_copied = sizeof (ethernet_header_t);
- }
- }
- clib_memcpy_fast (((u8 *) vlib_buffer_get_current (b0)) +
- bytes_copied + vlan_len,
- (u8 *) tph + tph->tp_mac + offset +
- bytes_copied, (bytes_to_copy - bytes_copied));
-
- /* fill buffer header */
- b0->current_length = bytes_to_copy + vlan_len;
-
- if (offset == 0)
- {
- b0->total_length_not_including_first_buffer = 0;
- b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = apif->sw_if_index;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- first_bi0 = bi0;
- first_b0 = vlib_get_buffer (vm, first_bi0);
- if (tph->tp_status & TP_STATUS_CSUMNOTREADY)
- mark_tcp_udp_cksum_calc (first_b0, &l4_hdr_sz);
- if (tph->tp_snaplen > apif->host_mtu)
- fill_gso_buffer_flags (first_b0, apif->host_mtu,
- l4_hdr_sz);
- }
- else
- buffer_add_to_chain (vm, bi0, first_bi0, prev_bi0);
-
- offset += bytes_to_copy;
- data_len -= bytes_to_copy;
- }
- n_rx_packets++;
- n_rx_bytes += tph->tp_snaplen;
- to_next[0] = first_bi0;
- to_next += 1;
- n_left_to_next--;
-
- /* drop partial packets */
- if (PREDICT_FALSE (tph->tp_len != tph->tp_snaplen))
- {
- next0 = VNET_DEVICE_INPUT_NEXT_DROP;
- first_b0->error =
- node->errors[AF_PACKET_INPUT_ERROR_PARTIAL_PKT];
- }
- else
- {
- next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
-
- if (PREDICT_FALSE (apif->per_interface_next_index != ~0))
- next0 = apif->per_interface_next_index;
-
- /* redirect if feature path enabled */
- vnet_feature_start_device_input_x1 (apif->sw_if_index, &next0,
- first_b0);
- }
-
- /* trace */
- if (PREDICT_FALSE
- (n_trace > 0 && vlib_trace_buffer (vm, node, next0, first_b0,
- /* follow_chain */ 0)))
- {
- af_packet_input_trace_t *tr;
- vlib_set_trace_count (vm, node, --n_trace);
- tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr));
- tr->next_index = next0;
- tr->hw_if_index = apif->hw_if_index;
- clib_memcpy_fast (&tr->tph, tph, sizeof (struct tpacket2_hdr));
- }
-
- /* enque and take next packet */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, first_bi0, next0);
-
- /* next packet */
- tph->tp_status = TP_STATUS_KERNEL;
- rx_frame = (rx_frame + 1) % frame_num;
- tph = (struct tpacket2_hdr *) (block_start + rx_frame * frame_size);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- apif->next_rx_frame = rx_frame;
-
- vlib_increment_combined_counter
- (vnet_get_main ()->interface_main.combined_sw_if_counters
- + VNET_INTERFACE_COUNTER_RX,
- vlib_get_thread_index (), apif->hw_if_index, n_rx_packets, n_rx_bytes);
-
- vnet_device_increment_rx_packets (thread_index, n_rx_packets);
- return n_rx_packets;
-}
-
-VLIB_NODE_FN (af_packet_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- u32 n_rx_packets = 0;
- af_packet_main_t *apm = &af_packet_main;
- vnet_hw_if_rxq_poll_vector_t *pv;
- pv = vnet_hw_if_get_rxq_poll_vector (vm, node);
- for (int i = 0; i < vec_len (pv); i++)
- {
- af_packet_if_t *apif;
- apif = vec_elt_at_index (apm->interfaces, pv[i].dev_instance);
- if (apif->is_admin_up)
- n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif);
- }
-
- return n_rx_packets;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (af_packet_input_node) = {
- .name = "af-packet-input",
- .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
- .sibling_of = "device-input",
- .format_trace = format_af_packet_input_trace,
- .type = VLIB_NODE_TYPE_INPUT,
- .state = VLIB_NODE_STATE_INTERRUPT,
- .n_errors = AF_PACKET_INPUT_N_ERROR,
- .error_strings = af_packet_input_error_strings,
-};
-/* *INDENT-ON* */
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/devices/devices.c b/src/vnet/devices/devices.c
index 5c28cadc03c..ee380bebbde 100644
--- a/src/vnet/devices/devices.c
+++ b/src/vnet/devices/devices.c
@@ -18,6 +18,7 @@
#include <vnet/feature/feature.h>
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
+#include <vlib/stats/stats.h>
vnet_device_main_t vnet_device_main;
@@ -28,7 +29,6 @@ device_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (device_input_node) = {
.function = device_input_fn,
.name = "device-input",
@@ -39,29 +39,6 @@ VLIB_REGISTER_NODE (device_input_node) = {
.next_nodes = VNET_DEVICE_INPUT_NEXT_NODES,
};
-/* Table defines how much we need to advance current data pointer
- in the buffer if we shortcut to l3 nodes */
-
-const u32 __attribute__((aligned (CLIB_CACHE_LINE_BYTES)))
-device_input_next_node_advance[((VNET_DEVICE_INPUT_N_NEXT_NODES /
- CLIB_CACHE_LINE_BYTES) +1) * CLIB_CACHE_LINE_BYTES] =
-{
- [VNET_DEVICE_INPUT_NEXT_IP4_INPUT] = sizeof (ethernet_header_t),
- [VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT] = sizeof (ethernet_header_t),
- [VNET_DEVICE_INPUT_NEXT_IP6_INPUT] = sizeof (ethernet_header_t),
- [VNET_DEVICE_INPUT_NEXT_MPLS_INPUT] = sizeof (ethernet_header_t),
-};
-
-const u32 __attribute__((aligned (CLIB_CACHE_LINE_BYTES)))
-device_input_next_node_flags[((VNET_DEVICE_INPUT_N_NEXT_NODES /
- CLIB_CACHE_LINE_BYTES) +1) * CLIB_CACHE_LINE_BYTES] =
-{
- [VNET_DEVICE_INPUT_NEXT_IP4_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID,
- [VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID,
- [VNET_DEVICE_INPUT_NEXT_IP6_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID,
- [VNET_DEVICE_INPUT_NEXT_MPLS_INPUT] = VNET_BUFFER_F_L3_HDR_OFFSET_VALID,
-};
-
VNET_FEATURE_ARC_INIT (device_input, static) =
{
.arc_name = "device-input",
@@ -99,7 +76,23 @@ VNET_FEATURE_INIT (ethernet_input, static) = {
.node_name = "ethernet-input",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
+
+static void
+input_rate_collector_fn (vlib_stats_collector_data_t *d)
+{
+ vlib_stats_segment_t *sm = vlib_stats_get_segment ();
+ vlib_stats_entry_t *e2 = sm->directory_vector + d->private_data;
+ static u64 last_input_packets = 0;
+ f64 dt, now;
+
+ now = vlib_time_now (vlib_get_main ());
+ u64 input_packets = vnet_get_aggregate_rx_packets ();
+
+ dt = now - e2->value;
+ d->entry->value = (f64) (input_packets - last_input_packets) / dt;
+ last_input_packets = input_packets;
+ e2->value = now;
+}
static clib_error_t *
vnet_device_init (vlib_main_t * vm)
@@ -107,6 +100,7 @@ vnet_device_init (vlib_main_t * vm)
vnet_device_main_t *vdm = &vnet_device_main;
vlib_thread_main_t *tm = vlib_get_thread_main ();
vlib_thread_registration_t *tr;
+ vlib_stats_collector_reg_t reg = {};
uword *p;
vec_validate_aligned (vdm->workers, tm->n_vlib_mains - 1,
@@ -120,6 +114,12 @@ vnet_device_init (vlib_main_t * vm)
vdm->next_worker_thread_index = tr->first_index;
vdm->last_worker_thread_index = tr->first_index + tr->count - 1;
}
+
+ reg.private_data = vlib_stats_add_timestamp ("/sys/last_update");
+ reg.entry_index = vlib_stats_add_gauge ("/sys/input_rate");
+ reg.collect_fn = input_rate_collector_fn;
+ vlib_stats_register_collector_fn (&reg);
+
return 0;
}
diff --git a/src/vnet/devices/devices.h b/src/vnet/devices/devices.h
index e54c7a29130..cadf1f857a6 100644
--- a/src/vnet/devices/devices.h
+++ b/src/vnet/devices/devices.h
@@ -67,8 +67,6 @@ typedef struct
extern vnet_device_main_t vnet_device_main;
extern vlib_node_registration_t device_input_node;
-extern const u32 device_input_next_node_advance[];
-extern const u32 device_input_next_node_flags[];
static inline u64
vnet_get_aggregate_rx_packets (void)
diff --git a/src/vnet/devices/netlink.c b/src/vnet/devices/netlink.c
index 9aae205c54f..3fd3e13bf77 100644
--- a/src/vnet/devices/netlink.c
+++ b/src/vnet/devices/netlink.c
@@ -20,8 +20,13 @@
#include <fcntl.h>
#include <net/if.h>
+#ifdef __linux__
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
+#elif __FreeBSD__
+#include <netlink/netlink.h>
+#include <netlink/netlink_route.h>
+#endif
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
@@ -273,7 +278,6 @@ vnet_netlink_get_link_mtu (int ifindex, u32 *mtu)
*mtu = clib_net_to_host_u32 (msg_mtu);
else
*mtu = msg_mtu;
- clib_warning ("mtu: %d", *mtu);
goto done;
}
offset = NLA_ALIGN (attr->nla_len);
@@ -409,6 +413,50 @@ vnet_netlink_add_ip6_route (void *dst, u8 dst_len, void *gw)
return err;
}
+clib_error_t *
+vnet_netlink_del_ip4_addr (int ifindex, void *addr, int pfx_len)
+{
+ vnet_netlink_msg_t m;
+ struct ifaddrmsg ifa = { 0 };
+ clib_error_t *err = 0;
+
+ ifa.ifa_family = AF_INET;
+ ifa.ifa_prefixlen = pfx_len;
+ ifa.ifa_index = ifindex;
+
+ vnet_netlink_msg_init (&m, RTM_DELADDR, NLM_F_REQUEST, &ifa,
+ sizeof (struct ifaddrmsg));
+
+ vnet_netlink_msg_add_rtattr (&m, IFA_LOCAL, addr, 4);
+ vnet_netlink_msg_add_rtattr (&m, IFA_ADDRESS, addr, 4);
+ err = vnet_netlink_msg_send (&m, NULL);
+ if (err)
+ err = clib_error_return (0, "del ip4 addr %U", format_clib_error, err);
+ return err;
+}
+
+clib_error_t *
+vnet_netlink_del_ip6_addr (int ifindex, void *addr, int pfx_len)
+{
+ vnet_netlink_msg_t m;
+ struct ifaddrmsg ifa = { 0 };
+ clib_error_t *err = 0;
+
+ ifa.ifa_family = AF_INET6;
+ ifa.ifa_prefixlen = pfx_len;
+ ifa.ifa_index = ifindex;
+
+ vnet_netlink_msg_init (&m, RTM_DELADDR, NLM_F_REQUEST, &ifa,
+ sizeof (struct ifaddrmsg));
+
+ vnet_netlink_msg_add_rtattr (&m, IFA_LOCAL, addr, 16);
+ vnet_netlink_msg_add_rtattr (&m, IFA_ADDRESS, addr, 16);
+ err = vnet_netlink_msg_send (&m, NULL);
+ if (err)
+ err = clib_error_return (0, "del ip6 addr %U", format_clib_error, err);
+ return err;
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/devices/netlink.h b/src/vnet/devices/netlink.h
index f1c42609cbf..086781fdbff 100644
--- a/src/vnet/devices/netlink.h
+++ b/src/vnet/devices/netlink.h
@@ -26,8 +26,10 @@ clib_error_t *vnet_netlink_get_link_mtu (int ifindex, u32 *mtu);
clib_error_t *vnet_netlink_set_link_mtu (int ifindex, int mtu);
clib_error_t *vnet_netlink_add_ip4_addr (int ifindex, void *addr,
int pfx_len);
+clib_error_t *vnet_netlink_del_ip4_addr (int ifindex, void *addr, int pfx_len);
clib_error_t *vnet_netlink_add_ip6_addr (int ifindex, void *addr,
int pfx_len);
+clib_error_t *vnet_netlink_del_ip6_addr (int ifindex, void *addr, int pfx_len);
clib_error_t *vnet_netlink_add_ip4_route (void *dst, u8 dst_len, void *gw);
clib_error_t *vnet_netlink_add_ip6_route (void *dst, u8 dst_len, void *gw);
diff --git a/src/vnet/devices/pipe/pipe.c b/src/vnet/devices/pipe/pipe.c
index eb92b3c788a..9caee2a55cb 100644
--- a/src/vnet/devices/pipe/pipe.c
+++ b/src/vnet/devices/pipe/pipe.c
@@ -83,13 +83,11 @@ pipe_build_rewrite (vnet_main_t * vnm,
return (rewrite);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (pipe_hw_interface_class) = {
.name = "Pipe",
.build_rewrite = pipe_build_rewrite,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
pipe_t *
pipe_get (u32 sw_if_index)
@@ -131,7 +129,7 @@ pipe_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
{
u32 n_left_from, n_left_to_next, n_copy, *from, *to_next;
u32 next_index = VNET_PIPE_TX_NEXT_ETHERNET_INPUT;
- u32 i, sw_if_index = 0, n_pkts = 0, n_bytes = 0;
+ u32 i, sw_if_index = 0;
vlib_buffer_t *b;
pipe_t *pipe;
@@ -159,8 +157,6 @@ pipe_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vnet_buffer (b)->sw_if_index[VLIB_TX] = ~0;
i++;
- n_pkts++;
- n_bytes += vlib_buffer_length_in_chain (vm, b);
}
from += n_copy;
@@ -187,25 +183,21 @@ pipe_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
- /* *INDENT-OFF* */
hi = vnet_get_hw_interface (vnm, hw_if_index);
hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
({
vnet_sw_interface_set_flags (vnm, sw_if_index, flags);
}));
- /* *INDENT-ON* */
return (NULL);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (pipe_device_class) = {
.name = "Pipe",
.format_device_name = format_pipe_name,
.tx_function = pipe_tx,
.admin_up_down_function = pipe_admin_up_down,
};
-/* *INDENT-ON* */
#define foreach_pipe_rx_next \
_ (DROP, "error-drop")
@@ -434,7 +426,6 @@ pipe_rx (vlib_main_t * vm,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (pipe_rx_node) = {
.function = pipe_rx,
.name = "pipe-rx",
@@ -444,7 +435,6 @@ VLIB_REGISTER_NODE (pipe_rx_node) = {
.sibling_of = "ethernet-input",
};
-/* *INDENT-ON* */
/*
* Maintain a bitmap of allocated pipe instance numbers.
@@ -534,6 +524,7 @@ vnet_create_pipe_interface (u8 is_specified,
{
vnet_main_t *vnm = vnet_get_main ();
vlib_main_t *vm = vlib_get_main ();
+ vnet_eth_interface_registration_t eir = {};
u8 address[6] = {
[0] = 0x22,
[1] = 0x22,
@@ -564,15 +555,10 @@ vnet_create_pipe_interface (u8 is_specified,
*/
address[5] = instance;
- error = ethernet_register_interface (vnm, pipe_device_class.index,
- instance, address, &hw_if_index,
- /* flag change */ 0);
-
- if (error)
- {
- rv = VNET_API_ERROR_INVALID_REGISTRATION;
- goto oops;
- }
+ eir.dev_class_index = pipe_device_class.index;
+ eir.dev_instance = instance;
+ eir.address = address;
+ hw_if_index = vnet_eth_register_interface (vnm, &eir);
hi = vnet_get_hw_interface (vnm, hw_if_index);
*parent_sw_if_index = hi->sw_if_index;
@@ -631,13 +617,11 @@ pipe_hw_walk (vnet_main_t * vnm, u32 hw_if_index, void *args)
{
u32 pipe_sw_if_index[2], id, sw_if_index;
- /* *INDENT-OFF* */
hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
({
ASSERT(id < 2);
pipe_sw_if_index[id] = sw_if_index;
}));
- /* *INDENT-ON* */
ctx->cb (hi->sw_if_index, pipe_sw_if_index, hi->dev_instance, ctx->ctx);
}
@@ -696,13 +680,11 @@ create_pipe_interfaces (vlib_main_t * vm,
* Example of how to create a pipe interface:
* @cliexcmd{pipe create}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (pipe_create_interface_command, static) = {
.path = "pipe create",
.short_help = "pipe create [instance <instance>]",
.function = create_pipe_interfaces,
};
-/* *INDENT-ON* */
int
vnet_delete_pipe_interface (u32 sw_if_index)
@@ -726,13 +708,11 @@ vnet_delete_pipe_interface (u32 sw_if_index)
return VNET_API_ERROR_INVALID_SW_IF_INDEX;
}
- /* *INDENT-OFF* */
hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
({
vnet_delete_sub_interface(sw_if_index);
pipe_main.pipes[sw_if_index] = PIPE_INVALID;
}));
- /* *INDENT-ON* */
ethernet_delete_interface (vnm, hw_if_index);
@@ -776,13 +756,11 @@ delete_pipe_interfaces (vlib_main_t * vm,
* Example of how to delete a pipe interface:
* @cliexcmd{pipe delete-interface intfc loop0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (pipe_delete_interface_command, static) = {
.path = "pipe delete",
.short_help = "pipe delete <interface>",
.function = delete_pipe_interfaces,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/devices/pipe/pipe_api.c b/src/vnet/devices/pipe/pipe_api.c
index 1f0faef7c1e..79a4377de83 100644
--- a/src/vnet/devices/pipe/pipe_api.c
+++ b/src/vnet/devices/pipe/pipe_api.c
@@ -42,14 +42,12 @@ vl_api_pipe_create_t_handler (vl_api_pipe_create_t * mp)
rv = vnet_create_pipe_interface (is_specified, user_instance,
&parent_sw_if_index, pipe_sw_if_index);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_PIPE_CREATE_REPLY,
({
rmp->sw_if_index = ntohl (parent_sw_if_index);
rmp->pipe_sw_if_index[0] = ntohl (pipe_sw_if_index[0]);
rmp->pipe_sw_if_index[1] = ntohl (pipe_sw_if_index[1]);
}));
- /* *INDENT-ON* */
}
static void
diff --git a/src/vnet/devices/tap/FEATURE.yaml b/src/vnet/devices/tap/FEATURE.yaml
index 35ee4885b02..1a774fb0e74 100644
--- a/src/vnet/devices/tap/FEATURE.yaml
+++ b/src/vnet/devices/tap/FEATURE.yaml
@@ -1,6 +1,6 @@
---
name: Tap Device
-maintainer: damarion@cisco.com sluong@cisco.com sykazmi@cisco.com
+maintainer: damarion@cisco.com sluong@cisco.com mohsin.kazmi14@gmail.com
features:
- Virtio
- Persistence
diff --git a/src/vnet/devices/tap/cli.c b/src/vnet/devices/tap/cli.c
index 10f4bb0ee2e..5c676d32d60 100644
--- a/src/vnet/devices/tap/cli.c
+++ b/src/vnet/devices/tap/cli.c
@@ -41,6 +41,7 @@ tap_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
args.tap_flags = 0;
args.rv = -1;
args.num_rx_queues = 1;
+ args.num_tx_queues = 1;
/* Get a line of input. */
if (unformat_user (input, unformat_line_input, line_input))
@@ -76,6 +77,8 @@ tap_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
args.host_ip6_gw_set = 1;
else if (unformat (line_input, "num-rx-queues %d", &tmp))
args.num_rx_queues = tmp;
+ else if (unformat (line_input, "num-tx-queues %d", &tmp))
+ args.num_tx_queues = tmp;
else if (unformat (line_input, "rx-ring-size %d", &tmp))
args.rx_ring_sz = tmp;
else if (unformat (line_input, "tx-ring-size %d", &tmp))
@@ -133,12 +136,12 @@ tap_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tap_create_command, static) = {
.path = "create tap",
- .short_help = "create tap {id <if-id>} [hw-addr <mac-address>] "
- "[num-rx-queues <n>] [rx-ring-size <size>] [tx-ring-size <size>] "
- "[host-ns <netns>] [host-bridge <bridge-name>] "
+ .short_help =
+ "create tap {id <if-id>} [hw-addr <mac-address>] "
+ "[num-rx-queues <n>] [num-tx-queues <n>] [rx-ring-size <size>] "
+ "[tx-ring-size <size>] [host-ns <netns>] [host-bridge <bridge-name>] "
"[host-ip4-addr <ip4addr/mask>] [host-ip6-addr <ip6-addr>] "
"[host-ip4-gw <ip4-addr>] [host-ip6-gw <ip6-addr>] "
"[host-mac-addr <host-mac-address>] [host-if-name <name>] "
@@ -146,7 +149,6 @@ VLIB_CLI_COMMAND (tap_create_command, static) = {
"[persist] [attach] [tun] [packed] [in-order]",
.function = tap_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
tap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -187,14 +189,12 @@ tap_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tap_delete__command, static) =
{
.path = "delete tap",
.short_help = "delete tap {<interface> | sw_if_index <sw_idx>}",
.function = tap_delete_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
tap_offload_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -257,7 +257,6 @@ tap_offload_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tap_offload_command, static) =
{
.path = "set tap offload",
@@ -266,7 +265,6 @@ VLIB_CLI_COMMAND (tap_offload_command, static) =
"csum-offload-disable>",
.function = tap_offload_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
tap_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -296,10 +294,8 @@ tap_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (vec_len (hw_if_indices) == 0)
{
- /* *INDENT-OFF* */
pool_foreach (vif, mm->interfaces)
vec_add1 (hw_if_indices, vif->hw_if_index);
- /* *INDENT-ON* */
}
virtio_show (vm, hw_if_indices, show_descr, VIRTIO_IF_TYPE_TAP);
@@ -309,13 +305,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tap_show_command, static) = {
.path = "show tap",
.short_help = "show tap {<interface>] [descriptors]",
.function = tap_show_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
tun_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -345,10 +339,8 @@ tun_show_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (vec_len (hw_if_indices) == 0)
{
- /* *INDENT-OFF* */
pool_foreach (vif, mm->interfaces)
vec_add1 (hw_if_indices, vif->hw_if_index);
- /* *INDENT-ON* */
}
virtio_show (vm, hw_if_indices, show_descr, VIRTIO_IF_TYPE_TUN);
@@ -358,13 +350,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tun_show_command, static) = {
.path = "show tun",
.short_help = "show tun {<interface>] [descriptors]",
.function = tun_show_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
tap_cli_init (vlib_main_t * vm)
diff --git a/src/vnet/devices/tap/tap.c b/src/vnet/devices/tap/tap.c
index 33d6e3bc84a..1e2ee87041d 100644
--- a/src/vnet/devices/tap/tap.c
+++ b/src/vnet/devices/tap/tap.c
@@ -58,13 +58,11 @@ tap_main_t tap_main;
goto error; \
}
- /* *INDENT-OFF* */
-VNET_HW_INTERFACE_CLASS (tun_device_hw_interface_class, static) =
-{
+VNET_HW_INTERFACE_CLASS (tun_device_hw_interface_class, static) = {
.name = "tun-device",
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+ .tx_hash_fn_type = VNET_HASH_FN_TYPE_IP,
};
- /* *INDENT-ON* */
#define TUN_MAX_PACKET_BYTES 65355
#define TUN_MIN_PACKET_BYTES 64
@@ -79,6 +77,14 @@ virtio_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi,
return 0;
}
+static clib_error_t *
+virtio_eth_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hi,
+ u32 frame_size)
+{
+ /* nothing for now */
+ return 0;
+}
+
#define TAP_MAX_INSTANCE 1024
static void
@@ -89,14 +95,14 @@ tap_free (vlib_main_t * vm, virtio_if_t * vif)
clib_error_t *err = 0;
int i;
- /* *INDENT-OFF* */
+ virtio_pre_input_node_disable (vm, vif);
+
vec_foreach_index (i, vif->vhost_fds) if (vif->vhost_fds[i] != -1)
close (vif->vhost_fds[i]);
vec_foreach_index (i, vif->rxq_vrings)
virtio_vring_free_rx (vm, vif, RX_QUEUE (i));
vec_foreach_index (i, vif->txq_vrings)
virtio_vring_free_tx (vm, vif, TX_QUEUE (i));
- /* *INDENT-ON* */
if (vif->tap_fds)
{
@@ -106,6 +112,7 @@ tap_free (vlib_main_t * vm, virtio_if_t * vif)
error:
vec_foreach_index (i, vif->tap_fds) close (vif->tap_fds[i]);
+ vec_free (vif->tap_fds);
vec_free (vif->vhost_fds);
vec_free (vif->rxq_vrings);
vec_free (vif->txq_vrings);
@@ -129,6 +136,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
tap_main_t *tm = &tap_main;
vnet_sw_interface_t *sw;
vnet_hw_interface_t *hw;
+ vnet_hw_if_caps_change_t cc;
int i, num_vhost_queues;
int old_netns_fd = -1;
struct ifreq ifr = {.ifr_flags = IFF_NO_PI | IFF_VNET_HDR };
@@ -190,43 +198,47 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
vif->dev_instance = vif - vim->interfaces;
vif->id = args->id;
- vif->num_txqs = thm->n_vlib_mains;
+ vif->num_txqs = clib_max (args->num_tx_queues, thm->n_vlib_mains);
vif->num_rxqs = clib_max (args->num_rx_queues, 1);
if (args->tap_flags & TAP_FLAG_ATTACH)
{
- if (args->host_if_name != NULL)
- {
- host_if_name = (char *) args->host_if_name;
- clib_memcpy (ifr.ifr_name, host_if_name,
- clib_min (IFNAMSIZ, vec_len (host_if_name)));
- }
- else
+ if (args->host_if_name == NULL)
{
args->rv = VNET_API_ERROR_NO_MATCHING_INTERFACE;
err = clib_error_return (0, "host_if_name is not provided");
goto error;
}
- if (args->host_namespace)
+ }
+
+ /* if namespace is specified, all further netlink messages should be executed
+ * after we change our net namespace */
+ if (args->host_namespace)
+ {
+ old_netns_fd = clib_netns_open (NULL /* self */);
+ if ((nfd = clib_netns_open (args->host_namespace)) == -1)
{
- old_netns_fd = clib_netns_open (NULL /* self */);
- if ((nfd = clib_netns_open (args->host_namespace)) == -1)
- {
- args->rv = VNET_API_ERROR_SYSCALL_ERROR_2;
- args->error = clib_error_return_unix (0, "clib_netns_open '%s'",
- args->host_namespace);
- goto error;
- }
- if (clib_setns (nfd) == -1)
- {
- args->rv = VNET_API_ERROR_SYSCALL_ERROR_3;
- args->error = clib_error_return_unix (0, "setns '%s'",
- args->host_namespace);
- goto error;
- }
+ args->rv = VNET_API_ERROR_SYSCALL_ERROR_2;
+ args->error = clib_error_return_unix (0, "clib_netns_open '%s'",
+ args->host_namespace);
+ goto error;
+ }
+ if (clib_setns (nfd) == -1)
+ {
+ args->rv = VNET_API_ERROR_SYSCALL_ERROR_3;
+ args->error =
+ clib_error_return_unix (0, "setns '%s'", args->host_namespace);
+ goto error;
}
}
+ if (args->host_if_name != NULL)
+ {
+ host_if_name = (char *) args->host_if_name;
+ clib_memcpy (ifr.ifr_name, host_if_name,
+ clib_min (IFNAMSIZ, vec_len (host_if_name)));
+ }
+
if ((tfd = open ("/dev/net/tun", O_RDWR | O_NONBLOCK)) < 0)
{
args->rv = VNET_API_ERROR_SYSCALL_ERROR_2;
@@ -258,7 +270,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
else
ifr.ifr_flags |= IFF_MULTI_QUEUE;
- hdrsz = sizeof (virtio_net_hdr_v1_t);
+ hdrsz = sizeof (vnet_virtio_net_hdr_v1_t);
if (args->tap_flags & TAP_FLAG_GSO)
{
offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
@@ -320,10 +332,10 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
args->error = clib_error_return_unix (0, "open '/dev/net/tun'");
goto error;
}
+ vec_add1 (vif->tap_fds, qfd);
_IOCTL (qfd, TUNSETIFF, (void *) &ifr);
tap_log_dbg (vif, "TUNSETIFF fd %d name %s flags 0x%x", qfd,
ifr.ifr_ifrn.ifrn_name, ifr.ifr_flags);
- vec_add1 (vif->tap_fds, qfd);
}
for (i = 0; i < vif->num_rxqs; i++)
@@ -399,54 +411,6 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
virtio_set_net_hdr_size (vif);
- if (!(args->tap_flags & TAP_FLAG_ATTACH))
- {
- /* if namespace is specified, all further netlink messages should be executed
- after we change our net namespace */
- if (args->host_namespace)
- {
- old_netns_fd = clib_netns_open (NULL /* self */);
- if ((nfd = clib_netns_open (args->host_namespace)) == -1)
- {
- args->rv = VNET_API_ERROR_SYSCALL_ERROR_2;
- args->error = clib_error_return_unix (0, "clib_netns_open '%s'",
- args->host_namespace);
- goto error;
- }
- args->error = vnet_netlink_set_link_netns (vif->ifindex, nfd,
- host_if_name);
- if (args->error)
- {
- args->rv = VNET_API_ERROR_NETLINK_ERROR;
- goto error;
- }
- if (clib_setns (nfd) == -1)
- {
- args->rv = VNET_API_ERROR_SYSCALL_ERROR_3;
- args->error = clib_error_return_unix (0, "setns '%s'",
- args->host_namespace);
- goto error;
- }
- if ((vif->ifindex = if_nametoindex (host_if_name)) == 0)
- {
- args->rv = VNET_API_ERROR_SYSCALL_ERROR_3;
- args->error = clib_error_return_unix (0, "if_nametoindex '%s'",
- host_if_name);
- goto error;
- }
- }
- else if (host_if_name)
- {
- args->error =
- vnet_netlink_set_link_name (vif->ifindex, host_if_name);
- if (args->error)
- {
- args->rv = VNET_API_ERROR_NETLINK_ERROR;
- goto error;
- }
- }
- }
-
if (vif->type == VIRTIO_IF_TYPE_TAP)
{
if (ethernet_mac_address_is_zero (args->host_mac_addr.bytes))
@@ -611,7 +575,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
vhost_vring_addr_t addr = { 0 };
vhost_vring_state_t state = { 0 };
vhost_vring_file_t file = { 0 };
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
u16 qp = i >> 1;
int fd = vif->vhost_fds[qp];
@@ -629,7 +593,7 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
}
addr.index = state.index = file.index = vring->queue_id & 1;
- state.num = vring->size;
+ state.num = vring->queue_size;
virtio_log_debug (vif, "VHOST_SET_VRING_NUM fd %d index %u num %u", fd,
state.index, state.num);
_IOCTL (fd, VHOST_SET_VRING_NUM, &state);
@@ -668,10 +632,12 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
ethernet_mac_address_generate (args->mac_addr.bytes);
clib_memcpy (vif->mac_addr, args->mac_addr.bytes, 6);
- vif->host_bridge = format (0, "%s%c", args->host_bridge, 0);
+ if (args->host_bridge)
+ vif->host_bridge = format (0, "%s%c", args->host_bridge, 0);
}
vif->host_if_name = format (0, "%s%c", host_if_name, 0);
- vif->net_ns = format (0, "%s%c", args->host_namespace, 0);
+ if (args->host_namespace)
+ vif->net_ns = format (0, "%s%c", args->host_namespace, 0);
vif->host_mtu_size = args->host_mtu_size;
vif->tap_flags = args->tap_flags;
clib_memcpy (vif->host_mac_addr, args->host_mac_addr.bytes, 6);
@@ -684,17 +650,14 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
if (vif->type != VIRTIO_IF_TYPE_TUN)
{
- args->error =
- ethernet_register_interface (vnm, virtio_device_class.index,
- vif->dev_instance, vif->mac_addr,
- &vif->hw_if_index,
- virtio_eth_flag_change);
- if (args->error)
- {
- args->rv = VNET_API_ERROR_INVALID_REGISTRATION;
- goto error;
- }
-
+ vnet_eth_interface_registration_t eir = {};
+
+ eir.dev_class_index = virtio_device_class.index;
+ eir.dev_instance = vif->dev_instance;
+ eir.address = vif->mac_addr;
+ eir.cb.flag_change = virtio_eth_flag_change;
+ eir.cb.set_max_frame_size = virtio_eth_set_max_frame_size;
+ vif->hw_if_index = vnet_eth_register_interface (vnm, &eir);
}
else
{
@@ -710,18 +673,16 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
args->sw_if_index = vif->sw_if_index;
args->rv = 0;
hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+ cc.mask = VNET_HW_IF_CAP_INT_MODE | VNET_HW_IF_CAP_TCP_GSO |
+ VNET_HW_IF_CAP_TX_TCP_CKSUM | VNET_HW_IF_CAP_TX_UDP_CKSUM;
+ cc.val = VNET_HW_IF_CAP_INT_MODE;
+
if (args->tap_flags & TAP_FLAG_GSO)
- {
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM;
- }
+ cc.val |= VNET_HW_IF_CAP_TCP_GSO | VNET_HW_IF_CAP_TX_TCP_CKSUM |
+ VNET_HW_IF_CAP_TX_UDP_CKSUM;
else if (args->tap_flags & TAP_FLAG_CSUM_OFFLOAD)
- {
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM;
- }
+ cc.val |= VNET_HW_IF_CAP_TX_TCP_CKSUM | VNET_HW_IF_CAP_TX_UDP_CKSUM;
+
if ((args->tap_flags & TAP_FLAG_GSO)
&& (args->tap_flags & TAP_FLAG_GRO_COALESCE))
{
@@ -729,18 +690,18 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
}
if (vif->type == VIRTIO_IF_TYPE_TUN)
{
- hw->max_supported_packet_bytes = TUN_MAX_PACKET_BYTES;
- hw->min_packet_bytes = hw->min_supported_packet_bytes =
- TUN_MIN_PACKET_BYTES;
- hw->max_packet_bytes =
- args->host_mtu_size ? args->host_mtu_size : TUN_DEFAULT_PACKET_BYTES;
- vnet_sw_interface_set_mtu (vnm, hw->sw_if_index, hw->max_packet_bytes);
+ hw->min_frame_size = TUN_MIN_PACKET_BYTES;
+ vnet_hw_interface_set_mtu (
+ vnm, hw->hw_if_index,
+ args->host_mtu_size ? args->host_mtu_size : TUN_DEFAULT_PACKET_BYTES);
}
+ vnet_hw_if_change_caps (vnm, vif->hw_if_index, &cc);
+ virtio_pre_input_node_enable (vm, vif);
virtio_vring_set_rx_queues (vm, vif);
+ virtio_vring_set_tx_queues (vm, vif);
vif->per_interface_next_index = ~0;
- vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
vnet_hw_interface_set_flags (vnm, vif->hw_if_index,
VNET_HW_INTERFACE_FLAG_LINK_UP);
/*
@@ -749,7 +710,6 @@ tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args)
* TUNSETCARRIER ioctl(). See tap_set_carrier().
*/
vif->host_carrier_up = 1;
- vif->cxq_vring = NULL;
goto done;
@@ -767,7 +727,11 @@ done:
if (vhost_mem)
clib_mem_free (vhost_mem);
if (old_netns_fd != -1)
- close (old_netns_fd);
+ {
+ /* in case we errored with a switched netns */
+ clib_setns (old_netns_fd);
+ close (old_netns_fd);
+ }
if (nfd != -1)
close (nfd);
}
@@ -812,6 +776,7 @@ tap_csum_offload_enable_disable (vlib_main_t * vm, u32 sw_if_index,
virtio_main_t *mm = &virtio_main;
virtio_if_t *vif;
vnet_hw_interface_t *hw;
+ vnet_hw_if_caps_change_t cc;
clib_error_t *err = 0;
int i = 0;
@@ -829,21 +794,19 @@ tap_csum_offload_enable_disable (vlib_main_t * vm, u32 sw_if_index,
_IOCTL (vif->tap_fds[i], TUNSETOFFLOAD, offload);
vif->gso_enabled = 0;
vif->packet_coalesce = 0;
- vif->csum_offload_enabled = enable_disable ? 1 : 0;
-
- if ((hw->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) != 0)
- {
- hw->caps &= ~VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO;
- }
+ cc.mask = VNET_HW_IF_CAP_TCP_GSO | VNET_HW_IF_CAP_L4_TX_CKSUM;
if (enable_disable)
{
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM;
+ cc.val = VNET_HW_IF_CAP_L4_TX_CKSUM;
+ vif->csum_offload_enabled = 1;
}
else
{
- hw->caps &= ~VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM;
+ cc.val = 0;
+ vif->csum_offload_enabled = 0;
}
+ vnet_hw_if_change_caps (vnm, vif->hw_if_index, &cc);
error:
if (err)
@@ -863,6 +826,7 @@ tap_gso_enable_disable (vlib_main_t * vm, u32 sw_if_index, int enable_disable,
virtio_main_t *mm = &virtio_main;
virtio_if_t *vif;
vnet_hw_interface_t *hw;
+ vnet_hw_if_caps_change_t cc;
clib_error_t *err = 0;
int i = 0;
@@ -878,29 +842,25 @@ tap_gso_enable_disable (vlib_main_t * vm, u32 sw_if_index, int enable_disable,
unsigned int offload = enable_disable ? gso_on : gso_off;
vec_foreach_index (i, vif->tap_fds)
_IOCTL (vif->tap_fds[i], TUNSETOFFLOAD, offload);
- vif->gso_enabled = enable_disable ? 1 : 0;
- vif->csum_offload_enabled = 0;
+
+ cc.mask = VNET_HW_IF_CAP_TCP_GSO | VNET_HW_IF_CAP_L4_TX_CKSUM;
+
if (enable_disable)
{
- if ((hw->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) == 0)
- {
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM;
- }
+ cc.val = cc.mask;
+ vif->gso_enabled = 1;
+ vif->csum_offload_enabled = 1;
if (is_packet_coalesce)
- {
- virtio_set_packet_coalesce (vif);
- }
+ virtio_set_packet_coalesce (vif);
}
else
{
- if ((hw->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) != 0)
- {
- hw->caps &= ~(VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM);
- }
+ cc.val = 0;
+ vif->gso_enabled = 0;
+ vif->csum_offload_enabled = 0;
vif->packet_coalesce = 0;
}
+ vnet_hw_if_change_caps (vnm, vif->hw_if_index, &cc);
error:
if (err)
@@ -918,12 +878,11 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids)
vnet_main_t *vnm = vnet_get_main ();
virtio_main_t *mm = &virtio_main;
virtio_if_t *vif;
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
vnet_hw_interface_t *hi;
tap_interface_details_t *r_tapids = NULL;
tap_interface_details_t *tapid = NULL;
- /* *INDENT-OFF* */
pool_foreach (vif, mm->interfaces) {
if ((vif->type != VIRTIO_IF_TYPE_TAP)
&& (vif->type != VIRTIO_IF_TYPE_TUN))
@@ -936,9 +895,9 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids)
clib_memcpy(tapid->dev_name, hi->name,
MIN (ARRAY_LEN (tapid->dev_name) - 1, vec_len (hi->name)));
vring = vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS(0));
- tapid->rx_ring_sz = vring->size;
+ tapid->rx_ring_sz = vring->queue_size;
vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS(0));
- tapid->tx_ring_sz = vring->size;
+ tapid->tx_ring_sz = vring->queue_size;
tapid->tap_flags = vif->tap_flags;
clib_memcpy(&tapid->host_mac_addr, vif->host_mac_addr, 6);
if (vif->host_if_name)
@@ -967,7 +926,6 @@ tap_dump_ifs (tap_interface_details_t ** out_tapids)
tapid->host_ip6_prefix_len = vif->host_ip6_prefix_len;
tapid->host_mtu_size = vif->host_mtu_size;
}
- /* *INDENT-ON* */
*out_tapids = r_tapids;
diff --git a/src/vnet/devices/tap/tap.h b/src/vnet/devices/tap/tap.h
index 2efaa511a49..6b88c34fe41 100644
--- a/src/vnet/devices/tap/tap.h
+++ b/src/vnet/devices/tap/tap.h
@@ -44,7 +44,8 @@ typedef struct
u32 id;
u8 mac_addr_set;
mac_address_t mac_addr;
- u8 num_rx_queues;
+ u16 num_rx_queues;
+ u16 num_tx_queues;
u16 rx_ring_sz;
u16 tx_ring_sz;
u32 tap_flags;
@@ -98,8 +99,10 @@ typedef struct
/* host mtu size, configurable through startup.conf */
int host_mtu_size;
+ u16 msg_id_base;
} tap_main_t;
+extern tap_main_t tap_main;
void tap_create_if (vlib_main_t * vm, tap_create_if_args_t * args);
int tap_delete_if (vlib_main_t * vm, u32 sw_if_index);
int tap_gso_enable_disable (vlib_main_t * vm, u32 sw_if_index,
diff --git a/src/vnet/devices/tap/tapv2.api b/src/vnet/devices/tap/tapv2.api
index 6b6618411a6..bf53d1bc6fe 100644
--- a/src/vnet/devices/tap/tapv2.api
+++ b/src/vnet/devices/tap/tapv2.api
@@ -43,6 +43,82 @@ enum tap_flags {
@param use_random_mac - let the system generate a unique mac address
@param mac_address - mac addr to assign to the interface if use_random not set
@param num_rx_queues - number of rx queues
+ @param num_tx_queues - number of tx queues
+ @param tx_ring_sz - the number of entries of TX ring, optional, default is 256 entries, must be power of 2
+ @param rx_ring_sz - the number of entries of RX ring, optional, default is 256 entries, must be power of 2
+ @param host_mtu_set - host MTU should be set
+ @param host_mtu_size - host MTU size
+ @param host_mac_addr_set - host side interface mac address should be set
+ @param host_mac_addr - host side interface mac address
+ @param host_ip4_prefix_set - host IPv4 ip address should be set
+ @param host_ip4_prefix - host IPv4 ip address
+ @param host_ip6_prefix_set - host IPv6 ip address should be set
+ @param host_ip6_prefix - host IPv6 ip address
+ @param host_ip4_gw_set - host IPv4 default gateway should be set
+ @param host_ip4_gw - host IPv4 default gateway
+ @param host_ip6_gw_set - host IPv6 default gateway should be set
+ @param host_ip6_gw - host IPv6 default gateway
+ @param tap_flags - flags for the TAP interface creation
+ @param host_if_name_set - host side interface name should be set
+ @param host_if_name - host side interface name
+ @param host_namespace_set - host namespace should be set
+ @param host_namespace - host namespace to attach interface to
+ @param host_bridge_set - host bridge should be set
+ @param host_bridge - host bridge to attach interface to
+ @param tag - tag
+*/
+autoendian define tap_create_v3
+{
+ u32 client_index;
+ u32 context;
+ u32 id [default=0xffffffff];
+ bool use_random_mac [default=true];
+ vl_api_mac_address_t mac_address;
+ u16 num_rx_queues [default=1];
+ u16 num_tx_queues [default=1];
+ u16 tx_ring_sz [default=256];
+ u16 rx_ring_sz [default=256];
+ bool host_mtu_set;
+ u32 host_mtu_size;
+ bool host_mac_addr_set;
+ vl_api_mac_address_t host_mac_addr;
+ bool host_ip4_prefix_set;
+ vl_api_ip4_address_with_prefix_t host_ip4_prefix;
+ bool host_ip6_prefix_set;
+ vl_api_ip6_address_with_prefix_t host_ip6_prefix;
+ bool host_ip4_gw_set;
+ vl_api_ip4_address_t host_ip4_gw;
+ bool host_ip6_gw_set;
+ vl_api_ip6_address_t host_ip6_gw;
+ vl_api_tap_flags_t tap_flags;
+ bool host_namespace_set;
+ string host_namespace[64];
+ bool host_if_name_set;
+ string host_if_name[64];
+ bool host_bridge_set;
+ string host_bridge[64];
+ string tag[];
+};
+
+/** \brief Reply for tap create reply
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param sw_if_index - software index allocated for the new tap interface
+*/
+autoendian define tap_create_v3_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
+
+/** \brief Initialize a new tap interface with the given parameters
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param id - interface id, 0xffffffff means auto
+ @param use_random_mac - let the system generate a unique mac address
+ @param mac_address - mac addr to assign to the interface if use_random not set
+ @param num_rx_queues - number of rx queues
@param tx_ring_sz - the number of entries of TX ring, optional, default is 256 entries, must be power of 2
@param rx_ring_sz - the number of entries of RX ring, optional, default is 256 entries, must be power of 2
@param host_mtu_set - host MTU should be set
@@ -68,6 +144,8 @@ enum tap_flags {
*/
define tap_create_v2
{
+ option deprecated;
+
u32 client_index;
u32 context;
u32 id [default=0xffffffff];
@@ -105,6 +183,8 @@ define tap_create_v2
*/
define tap_create_v2_reply
{
+ option deprecated;
+
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
diff --git a/src/vnet/devices/tap/tapv2_api.c b/src/vnet/devices/tap/tapv2_api.c
index 08dca0dc92b..ab4189ab607 100644
--- a/src/vnet/devices/tap/tapv2_api.c
+++ b/src/vnet/devices/tap/tapv2_api.c
@@ -32,10 +32,102 @@
#include <vnet/devices/tap/tapv2.api_enum.h>
#include <vnet/devices/tap/tapv2.api_types.h>
-#define REPLY_MSG_ID_BASE msg_id_base
+#define REPLY_MSG_ID_BASE tap_main.msg_id_base
#include <vlibapi/api_helper_macros.h>
-static u16 msg_id_base;
+static void
+vl_api_tap_create_v3_t_handler (vl_api_tap_create_v3_t *mp)
+{
+ vl_api_registration_t *reg;
+ int rv;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ vnet_main_t *vnm = vnet_get_main ();
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_tap_create_v3_reply_t *rmp;
+
+ tap_create_if_args_t _a, *ap = &_a;
+
+ clib_memset (ap, 0, sizeof (*ap));
+
+ ap->id = mp->id;
+ if (!mp->use_random_mac)
+ {
+ mac_address_decode (mp->mac_address, &ap->mac_addr);
+ ap->mac_addr_set = 1;
+ }
+ ap->rx_ring_sz = mp->rx_ring_sz;
+ ap->tx_ring_sz = mp->tx_ring_sz;
+ ap->sw_if_index = (u32) ~0;
+ ap->num_rx_queues = clib_max (1, mp->num_rx_queues);
+ ap->num_tx_queues = mp->num_tx_queues;
+
+ if (mp->host_if_name_set)
+ ap->host_if_name = format (0, "%s%c", mp->host_if_name, 0);
+
+ if (mp->host_mac_addr_set)
+ {
+ mac_address_decode (mp->host_mac_addr, &ap->host_mac_addr);
+ }
+
+ if (mp->host_namespace_set)
+ ap->host_namespace = format (0, "%s%c", mp->host_namespace, 0);
+
+ if (mp->host_bridge_set)
+ ap->host_bridge = format (0, "%s%c", mp->host_bridge, 0);
+
+ if (mp->host_ip4_prefix_set)
+ {
+ ip4_address_decode (mp->host_ip4_prefix.address, &ap->host_ip4_addr);
+ ap->host_ip4_prefix_len = mp->host_ip4_prefix.len;
+ }
+
+ if (mp->host_ip6_prefix_set)
+ {
+ ip6_address_decode (mp->host_ip6_prefix.address, &ap->host_ip6_addr);
+ ap->host_ip6_prefix_len = mp->host_ip6_prefix.len;
+ }
+
+ if (mp->host_ip4_gw_set)
+ {
+ ip4_address_decode (mp->host_ip4_gw, &ap->host_ip4_gw);
+ ap->host_ip4_gw_set = 1;
+ }
+
+ if (mp->host_ip6_gw_set)
+ {
+ ip6_address_decode (mp->host_ip6_gw, &ap->host_ip6_gw);
+ ap->host_ip6_gw_set = 1;
+ }
+
+ if (mp->host_mtu_set)
+ {
+ ap->host_mtu_size = mp->host_mtu_size;
+ ap->host_mtu_set = 1;
+ }
+
+ ap->tap_flags = mp->tap_flags;
+
+ tap_create_if (vm, ap);
+
+ /* If a tag was supplied... */
+ if (vl_api_string_len (&mp->tag))
+ {
+ u8 *tag = vl_api_from_api_to_new_vec (mp, &mp->tag);
+ vnet_set_sw_interface_tag (vnm, tag, ap->sw_if_index);
+ }
+
+ vec_free (ap->host_if_name);
+ vec_free (ap->host_namespace);
+ vec_free (ap->host_bridge);
+
+ rv = ap->rv;
+ REPLY_MACRO2_END (VL_API_TAP_CREATE_V3_REPLY,
+ ({ rmp->sw_if_index = ap->sw_if_index; }));
+}
static void
vl_api_tap_create_v2_t_handler (vl_api_tap_create_v2_t * mp)
@@ -63,6 +155,7 @@ vl_api_tap_create_v2_t_handler (vl_api_tap_create_v2_t * mp)
ap->tx_ring_sz = ntohs (mp->tx_ring_sz);
ap->sw_if_index = (u32) ~ 0;
ap->num_rx_queues = 1;
+ ap->num_tx_queues = 1;
if (mp->num_rx_queues > 1)
ap->num_rx_queues = mp->num_rx_queues;
diff --git a/src/vnet/devices/virtio/FEATURE.yaml b/src/vnet/devices/virtio/FEATURE.yaml
index 7b2fb59e1ad..446a45b61a3 100644
--- a/src/vnet/devices/virtio/FEATURE.yaml
+++ b/src/vnet/devices/virtio/FEATURE.yaml
@@ -1,6 +1,6 @@
---
name: Virtio PCI Device
-maintainer: sykazmi@cisco.com sluong@cisco.com
+maintainer: mohsin.kazmi14@gmail.com sluong@cisco.com
features:
- Driver mode to emulate PCI interface presented to VPP from
the host interface.
@@ -11,6 +11,8 @@ features:
- Support multi-queue, GSO, checksum offload, indirect descriptor,
jumbo frame, and packed ring.
- Support virtio 1.1 packed ring in vhost
+ - Support for tx queue size configuration (tested on host kernel 5.15
+ and qemu version 6.2.0)
description: "Virtio implementation"
missing:
- API dump filtering by sw_if_index
diff --git a/src/vnet/devices/virtio/cli.c b/src/vnet/devices/virtio/cli.c
index a78336997e2..c1b6c8be065 100644
--- a/src/vnet/devices/virtio/cli.c
+++ b/src/vnet/devices/virtio/cli.c
@@ -31,6 +31,7 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
virtio_pci_create_if_args_t args;
u64 feature_mask = (u64) ~ (0ULL);
u32 buffering_size = 0;
+ u32 txq_size = 0;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -43,6 +44,8 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
;
else if (unformat (line_input, "feature-mask 0x%llx", &feature_mask))
args.features = feature_mask;
+ else if (unformat (line_input, "tx-queue-size %u", &txq_size))
+ args.tx_queue_size = txq_size;
else if (unformat (line_input, "gso-enabled"))
args.gso_enabled = 1;
else if (unformat (line_input, "csum-enabled"))
@@ -55,6 +58,10 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
}
else if (unformat (line_input, "packed"))
args.virtio_flags |= VIRTIO_FLAG_PACKED;
+ else if (unformat (line_input, "bind force"))
+ args.bind = VIRTIO_BIND_FORCE;
+ else if (unformat (line_input, "bind"))
+ args.bind = VIRTIO_BIND_DEFAULT;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
@@ -66,15 +73,14 @@ virtio_pci_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
return args.error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (virtio_pci_create_command, static) = {
.path = "create interface virtio",
.short_help = "create interface virtio <pci-address> "
- "[feature-mask <hex-mask>] [gso-enabled] [csum-enabled] "
- "[buffering [size <buffering-szie>]] [packed]",
+ "[feature-mask <hex-mask>] [tx-queue-size <size>] "
+ "[gso-enabled] [csum-enabled] "
+ "[buffering [size <buffering-szie>]] [packed] [bind [force]]",
.function = virtio_pci_create_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
virtio_pci_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -120,14 +126,12 @@ virtio_pci_delete_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (virtio_pci_delete_command, static) = {
.path = "delete interface virtio",
.short_help = "delete interface virtio "
"{<interface> | sw_if_index <sw_idx>}",
.function = virtio_pci_delete_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
virtio_pci_enable_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -182,14 +186,12 @@ virtio_pci_enable_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (virtio_pci_enable_command, static) = {
.path = "set virtio pci",
.short_help = "set virtio pci {<interface> | sw_if_index <sw_idx>}"
" [gso-enabled | csum-offload-enabled | offloads-disabled]",
.function = virtio_pci_enable_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_virtio_pci_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -248,13 +250,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_virtio_pci_command, static) = {
.path = "show virtio pci",
.short_help = "show virtio pci [<interface>] [descriptors | desc] [debug-device]",
.function = show_virtio_pci_fn,
};
-/* *INDENT-ON* */
clib_error_t *
virtio_pci_cli_init (vlib_main_t * vm)
diff --git a/src/vnet/devices/virtio/device.c b/src/vnet/devices/virtio/device.c
index aa816e2e17d..112f77e7065 100644
--- a/src/vnet/devices/virtio/device.c
+++ b/src/vnet/devices/virtio/device.c
@@ -27,6 +27,7 @@
#include <vnet/gso/hdr_offset_parser.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip_psh_cksum.h>
#include <vnet/tcp/tcp_packet.h>
#include <vnet/udp/udp_packet.h>
#include <vnet/devices/virtio/virtio.h>
@@ -62,13 +63,31 @@ format_virtio_device (u8 * s, va_list * args)
u32 dev_instance = va_arg (*args, u32);
int verbose = va_arg (*args, int);
u32 indent = format_get_indent (s);
+ virtio_main_t *vim = &virtio_main;
+ virtio_if_t *vif = vec_elt_at_index (vim->interfaces, dev_instance);
+ vnet_virtio_vring_t *vring = 0;
s = format (s, "VIRTIO interface");
if (verbose)
{
s = format (s, "\n%U instance %u", format_white_space, indent + 2,
dev_instance);
+ s = format (s, "\n%U RX QUEUE : Total Packets", format_white_space,
+ indent + 4);
+ vec_foreach (vring, vif->rxq_vrings)
+ {
+ s = format (s, "\n%U %8u : %llu", format_white_space, indent + 4,
+ RX_QUEUE_ACCESS (vring->queue_id), vring->total_packets);
+ }
+ s = format (s, "\n%U TX QUEUE : Total Packets", format_white_space,
+ indent + 4);
+ vec_foreach (vring, vif->txq_vrings)
+ {
+ s = format (s, "\n%U %8u : %llu", format_white_space, indent + 4,
+ TX_QUEUE_ACCESS (vring->queue_id), vring->total_packets);
+ }
}
+
return s;
}
@@ -88,9 +107,8 @@ format_virtio_tx_trace (u8 * s, va_list * va)
virtio_tx_trace_t *t = va_arg (*va, virtio_tx_trace_t *);
u32 indent = format_get_indent (s);
- s = format (s, "%Ubuffer 0x%x: %U\n",
- format_white_space, indent,
- t->buffer_index, format_vnet_buffer, &t->buffer);
+ s = format (s, "%Ubuffer 0x%x: %U\n", format_white_space, indent,
+ t->buffer_index, format_vnet_buffer_no_chain, &t->buffer);
s =
format (s, "%U%U\n", format_white_space, indent,
format_generic_header_offset, &t->gho);
@@ -109,6 +127,7 @@ virtio_tx_trace (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *b0,
t = vlib_add_trace (vm, node, b0, sizeof (t[0]));
t->sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
t->buffer_index = bi;
+ clib_memset (&t->gho, 0, sizeof (t->gho));
if (is_tun)
{
int is_ip4 = 0, is_ip6 = 0;
@@ -166,11 +185,12 @@ virtio_memset_ring_u32 (u32 *ring, u32 start, u32 ring_size, u32 n_buffers)
}
static void
-virtio_free_used_device_desc_split (vlib_main_t *vm, virtio_vring_t *vring,
+virtio_free_used_device_desc_split (vlib_main_t *vm,
+ vnet_virtio_vring_t *vring,
uword node_index)
{
u16 used = vring->desc_in_use;
- u16 sz = vring->size;
+ u16 sz = vring->queue_size;
u16 mask = sz - 1;
u16 last = vring->last_used_idx;
u16 n_left = vring->used->idx - last;
@@ -181,7 +201,7 @@ virtio_free_used_device_desc_split (vlib_main_t *vm, virtio_vring_t *vring,
while (n_left)
{
- vring_used_elem_t *e = &vring->used->ring[last & mask];
+ vnet_virtio_vring_used_elem_t *e = &vring->used->ring[last & mask];
u16 slot, n_buffers;
slot = n_buffers = e->id;
@@ -190,7 +210,7 @@ virtio_free_used_device_desc_split (vlib_main_t *vm, virtio_vring_t *vring,
n_left--;
last++;
n_buffers++;
- vring_desc_t *d = &vring->desc[e->id];
+ vnet_virtio_vring_desc_t *d = &vring->desc[e->id];
u16 next;
while (d->flags & VRING_DESC_F_NEXT)
{
@@ -232,11 +252,12 @@ virtio_free_used_device_desc_split (vlib_main_t *vm, virtio_vring_t *vring,
}
static void
-virtio_free_used_device_desc_packed (vlib_main_t *vm, virtio_vring_t *vring,
+virtio_free_used_device_desc_packed (vlib_main_t *vm,
+ vnet_virtio_vring_t *vring,
uword node_index)
{
- vring_packed_desc_t *d;
- u16 sz = vring->size;
+ vnet_virtio_vring_packed_desc_t *d;
+ u16 sz = vring->queue_size;
u16 last = vring->last_used_idx;
u16 n_buffers = 0, start;
u16 flags;
@@ -273,7 +294,7 @@ virtio_free_used_device_desc_packed (vlib_main_t *vm, virtio_vring_t *vring,
}
static void
-virtio_free_used_device_desc (vlib_main_t *vm, virtio_vring_t *vring,
+virtio_free_used_device_desc (vlib_main_t *vm, vnet_virtio_vring_t *vring,
uword node_index, int packed)
{
if (packed)
@@ -284,74 +305,89 @@ virtio_free_used_device_desc (vlib_main_t *vm, virtio_vring_t *vring,
}
static void
-set_checksum_offsets (vlib_buffer_t *b, virtio_net_hdr_v1_t *hdr,
+set_checksum_offsets (vlib_buffer_t *b, vnet_virtio_net_hdr_v1_t *hdr,
const int is_l2)
{
vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
-
+ i16 l4_hdr_offset = vnet_buffer (b)->l4_hdr_offset - b->current_data;
if (b->flags & VNET_BUFFER_F_IS_IP4)
{
ip4_header_t *ip4;
- generic_header_offset_t gho = { 0 };
- vnet_generic_header_offset_parser (b, &gho, is_l2, 1 /* ip4 */ ,
- 0 /* ip6 */ );
hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = gho.l4_hdr_offset; // 0x22;
+ hdr->csum_start = l4_hdr_offset; // 0x22;
+
+ /*
+ * virtio devices do not support IP4 checksum offload. So driver takes
+ * care of it while doing tx.
+ */
+ ip4 = (ip4_header_t *) (b->data + vnet_buffer (b)->l3_hdr_offset);
+ if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
+ ip4->checksum = ip4_header_checksum (ip4);
+
+ /*
+ * virtio devices assume the l4 header is set to the checksum of the
+ * l3 pseudo-header, so we compute it before tx-ing
+ */
if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
{
+ tcp_header_t *tcp =
+ (tcp_header_t *) (b->data + vnet_buffer (b)->l4_hdr_offset);
+ tcp->checksum = ip4_pseudo_header_cksum (ip4);
hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
}
else if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
{
+ udp_header_t *udp =
+ (udp_header_t *) (b->data + vnet_buffer (b)->l4_hdr_offset);
+ udp->checksum = ip4_pseudo_header_cksum (ip4);
hdr->csum_offset = STRUCT_OFFSET_OF (udp_header_t, checksum);
}
-
- /*
- * virtio devices do not support IP4 checksum offload. So driver takes care
- * of it while doing tx.
- */
- ip4 =
- (ip4_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
- if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
- ip4->checksum = ip4_header_checksum (ip4);
}
else if (b->flags & VNET_BUFFER_F_IS_IP6)
{
- generic_header_offset_t gho = { 0 };
- vnet_generic_header_offset_parser (b, &gho, is_l2, 0 /* ip4 */ ,
- 1 /* ip6 */ );
+ ip6_header_t *ip6;
hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = gho.l4_hdr_offset; // 0x36;
+ hdr->csum_start = l4_hdr_offset; // 0x36;
+ ip6 = (ip6_header_t *) (b->data + vnet_buffer (b)->l3_hdr_offset);
+
+ /*
+ * virtio devices assume the l4 header is set to the checksum of the
+ * l3 pseudo-header, so we compute it before tx-ing
+ */
if (oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
{
+ tcp_header_t *tcp =
+ (tcp_header_t *) (b->data + vnet_buffer (b)->l4_hdr_offset);
+ tcp->checksum = ip6_pseudo_header_cksum (ip6);
hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
}
else if (oflags & VNET_BUFFER_OFFLOAD_F_UDP_CKSUM)
{
+ udp_header_t *udp =
+ (udp_header_t *) (b->data + vnet_buffer (b)->l4_hdr_offset);
+ udp->checksum = ip6_pseudo_header_cksum (ip6);
hdr->csum_offset = STRUCT_OFFSET_OF (udp_header_t, checksum);
}
}
}
static void
-set_gso_offsets (vlib_buffer_t *b, virtio_net_hdr_v1_t *hdr, const int is_l2)
+set_gso_offsets (vlib_buffer_t *b, vnet_virtio_net_hdr_v1_t *hdr,
+ const int is_l2)
{
vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
+ i16 l4_hdr_offset = vnet_buffer (b)->l4_hdr_offset - b->current_data;
if (b->flags & VNET_BUFFER_F_IS_IP4)
{
ip4_header_t *ip4;
- generic_header_offset_t gho = { 0 };
- vnet_generic_header_offset_parser (b, &gho, is_l2, 1 /* ip4 */ ,
- 0 /* ip6 */ );
hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
hdr->gso_size = vnet_buffer2 (b)->gso_size;
- hdr->hdr_len = gho.hdr_sz;
+ hdr->hdr_len = l4_hdr_offset + vnet_buffer2 (b)->gso_l4_hdr_sz;
hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = gho.l4_hdr_offset; // 0x22;
+ hdr->csum_start = l4_hdr_offset; // 0x22;
hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
- ip4 =
- (ip4_header_t *) (vlib_buffer_get_current (b) + gho.l3_hdr_offset);
+ ip4 = (ip4_header_t *) (b->data + vnet_buffer (b)->l3_hdr_offset);
/*
* virtio devices do not support IP4 checksum offload. So driver takes care
* of it while doing tx.
@@ -361,35 +397,33 @@ set_gso_offsets (vlib_buffer_t *b, virtio_net_hdr_v1_t *hdr, const int is_l2)
}
else if (b->flags & VNET_BUFFER_F_IS_IP6)
{
- generic_header_offset_t gho = { 0 };
- vnet_generic_header_offset_parser (b, &gho, is_l2, 0 /* ip4 */ ,
- 1 /* ip6 */ );
hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
hdr->gso_size = vnet_buffer2 (b)->gso_size;
- hdr->hdr_len = gho.hdr_sz;
+ hdr->hdr_len = l4_hdr_offset + vnet_buffer2 (b)->gso_l4_hdr_sz;
hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
- hdr->csum_start = gho.l4_hdr_offset; // 0x36;
+ hdr->csum_start = l4_hdr_offset; // 0x36;
hdr->csum_offset = STRUCT_OFFSET_OF (tcp_header_t, checksum);
}
}
static u16
add_buffer_to_slot (vlib_main_t *vm, vlib_node_runtime_t *node,
- virtio_if_t *vif, virtio_vring_t *vring, u32 bi,
+ virtio_if_t *vif, vnet_virtio_vring_t *vring, u32 bi,
u16 free_desc_count, u16 avail, u16 next, u16 mask,
int hdr_sz, int do_gso, int csum_offload, int is_pci,
int is_tun, int is_indirect, int is_any_layout)
{
u16 n_added = 0;
- vring_desc_t *d;
+ vnet_virtio_vring_desc_t *d;
int is_l2 = !is_tun;
d = &vring->desc[next];
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
- virtio_net_hdr_v1_t *hdr = vlib_buffer_get_current (b) - hdr_sz;
+ vnet_virtio_net_hdr_v1_t *hdr = vlib_buffer_get_current (b) - hdr_sz;
u32 drop_inline = ~0;
clib_memset_u8 (hdr, 0, hdr_sz);
+ vring->total_packets++;
if (b->flags & VNET_BUFFER_F_GSO)
{
if (do_gso)
@@ -446,8 +480,8 @@ add_buffer_to_slot (vlib_main_t *vm, vlib_node_runtime_t *node,
indirect_desc->next_buffer = bi;
bi = indirect_buffer;
- vring_desc_t *id =
- (vring_desc_t *) vlib_buffer_get_current (indirect_desc);
+ vnet_virtio_vring_desc_t *id =
+ (vnet_virtio_vring_desc_t *) vlib_buffer_get_current (indirect_desc);
u32 count = 1;
if (is_pci)
{
@@ -516,7 +550,7 @@ add_buffer_to_slot (vlib_main_t *vm, vlib_node_runtime_t *node,
}
id->flags = 0;
id->next = 0;
- d->len = count * sizeof (vring_desc_t);
+ d->len = count * sizeof (vnet_virtio_vring_desc_t);
d->flags = VRING_DESC_F_INDIRECT;
}
else if (is_pci)
@@ -582,20 +616,22 @@ done:
static u16
add_buffer_to_slot_packed (vlib_main_t *vm, vlib_node_runtime_t *node,
- virtio_if_t *vif, virtio_vring_t *vring, u32 bi,
- u16 next, int hdr_sz, int do_gso, int csum_offload,
- int is_pci, int is_tun, int is_indirect,
- int is_any_layout)
+ virtio_if_t *vif, vnet_virtio_vring_t *vring,
+ u32 bi, u16 next, int hdr_sz, int do_gso,
+ int csum_offload, int is_pci, int is_tun,
+ int is_indirect, int is_any_layout)
{
u16 n_added = 0, flags = 0;
int is_l2 = !is_tun;
- vring_packed_desc_t *d = &vring->packed_desc[next];
+ vnet_virtio_vring_packed_desc_t *d = &vring->packed_desc[next];
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
- virtio_net_hdr_v1_t *hdr = vlib_buffer_get_current (b) - hdr_sz;
+ vnet_virtio_net_hdr_v1_t *hdr = vlib_buffer_get_current (b) - hdr_sz;
u32 drop_inline = ~0;
clib_memset (hdr, 0, hdr_sz);
+ vring->total_packets++;
+
if (b->flags & VNET_BUFFER_F_GSO)
{
if (do_gso)
@@ -652,8 +688,9 @@ add_buffer_to_slot_packed (vlib_main_t *vm, vlib_node_runtime_t *node,
indirect_desc->next_buffer = bi;
bi = indirect_buffer;
- vring_packed_desc_t *id =
- (vring_packed_desc_t *) vlib_buffer_get_current (indirect_desc);
+ vnet_virtio_vring_packed_desc_t *id =
+ (vnet_virtio_vring_packed_desc_t *) vlib_buffer_get_current (
+ indirect_desc);
u32 count = 1;
if (is_pci)
{
@@ -697,7 +734,7 @@ add_buffer_to_slot_packed (vlib_main_t *vm, vlib_node_runtime_t *node,
}
id->flags = 0;
id->id = 0;
- d->len = count * sizeof (vring_packed_desc_t);
+ d->len = count * sizeof (vnet_virtio_vring_packed_desc_t);
flags = VRING_DESC_F_INDIRECT;
}
else
@@ -729,12 +766,10 @@ done:
}
static uword
-virtio_interface_tx_packed_gso_inline (vlib_main_t *vm,
- vlib_node_runtime_t *node,
- virtio_if_t *vif, virtio_if_type_t type,
- virtio_vring_t *vring, u32 *buffers,
- u16 n_left, const int do_gso,
- const int csum_offload)
+virtio_interface_tx_packed_gso_inline (
+ vlib_main_t *vm, vlib_node_runtime_t *node, virtio_if_t *vif,
+ virtio_if_type_t type, vnet_virtio_vring_t *vring, u32 *buffers, u16 n_left,
+ const int do_gso, const int csum_offload)
{
int is_pci = (type == VIRTIO_IF_TYPE_PCI);
int is_tun = (type == VIRTIO_IF_TYPE_TUN);
@@ -743,7 +778,7 @@ virtio_interface_tx_packed_gso_inline (vlib_main_t *vm,
int is_any_layout =
((vif->features & VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT)) != 0);
const int hdr_sz = vif->virtio_net_hdr_sz;
- u16 sz = vring->size;
+ u16 sz = vring->queue_size;
u16 used, next, n_buffers = 0, n_buffers_left = 0;
u16 n_vectors = n_left;
@@ -777,6 +812,7 @@ virtio_interface_tx_packed_gso_inline (vlib_main_t *vm,
vring->avail_wrap_counter ^= 1;
}
}
+ virtio_txq_clear_scheduled (vring);
}
while (n_left && used < sz)
@@ -814,7 +850,7 @@ virtio_interface_tx_packed_gso_inline (vlib_main_t *vm,
}
static void
-virtio_find_free_desc (virtio_vring_t *vring, u16 size, u16 mask, u16 req,
+virtio_find_free_desc (vnet_virtio_vring_t *vring, u16 size, u16 mask, u16 req,
u16 next, u32 *first_free_desc_index,
u16 *free_desc_count)
{
@@ -853,7 +889,7 @@ static u16
virtio_interface_tx_split_gso_inline (vlib_main_t *vm,
vlib_node_runtime_t *node,
virtio_if_t *vif, virtio_if_type_t type,
- virtio_vring_t *vring, u32 *buffers,
+ vnet_virtio_vring_t *vring, u32 *buffers,
u16 n_left, int do_gso, int csum_offload)
{
u16 used, next, avail, n_buffers = 0, n_buffers_left = 0;
@@ -863,7 +899,7 @@ virtio_interface_tx_split_gso_inline (vlib_main_t *vm,
((vif->features & VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC)) != 0);
int is_any_layout =
((vif->features & VIRTIO_FEATURE (VIRTIO_F_ANY_LAYOUT)) != 0);
- u16 sz = vring->size;
+ u16 sz = vring->queue_size;
int hdr_sz = vif->virtio_net_hdr_sz;
u16 mask = sz - 1;
u16 n_vectors = n_left;
@@ -917,6 +953,7 @@ virtio_interface_tx_split_gso_inline (vlib_main_t *vm,
n_buffers_left--;
free_desc_count -= n_added;
}
+ virtio_txq_clear_scheduled (vring);
}
while (n_left && free_desc_count)
@@ -961,7 +998,7 @@ virtio_interface_tx_split_gso_inline (vlib_main_t *vm,
static u16
virtio_interface_tx_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
virtio_if_t *vif, virtio_if_type_t type,
- virtio_vring_t *vring, u32 *buffers,
+ vnet_virtio_vring_t *vring, u32 *buffers,
u16 n_left, int packed, int do_gso,
int csum_offload)
{
@@ -977,19 +1014,19 @@ virtio_interface_tx_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
static u16
virtio_interface_tx_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
- virtio_if_t *vif, virtio_vring_t *vring,
+ virtio_if_t *vif, vnet_virtio_vring_t *vring,
virtio_if_type_t type, u32 *buffers, u16 n_left,
int packed)
{
vnet_main_t *vnm = vnet_get_main ();
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
- if (hw->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO)
+ if (hw->caps & VNET_HW_IF_CAP_TCP_GSO)
return virtio_interface_tx_gso_inline (vm, node, vif, type, vring,
buffers, n_left, packed,
1 /* do_gso */ ,
1 /* checksum offload */ );
- else if (hw->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM)
+ else if (hw->caps & VNET_HW_IF_CAP_L4_TX_CKSUM)
return virtio_interface_tx_gso_inline (vm, node, vif, type, vring,
buffers, n_left, packed,
0 /* no do_gso */ ,
@@ -1008,21 +1045,24 @@ VNET_DEVICE_CLASS_TX_FN (virtio_device_class) (vlib_main_t * vm,
virtio_main_t *nm = &virtio_main;
vnet_interface_output_runtime_t *rund = (void *) node->runtime_data;
virtio_if_t *vif = pool_elt_at_index (nm->interfaces, rund->dev_instance);
- u16 qid = vm->thread_index % vif->num_txqs;
- virtio_vring_t *vring = vec_elt_at_index (vif->txq_vrings, qid);
+ vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame);
+ u16 qid = tf->queue_id;
+ vnet_virtio_vring_t *vring = vec_elt_at_index (vif->txq_vrings, qid);
u16 n_left = frame->n_vectors;
u32 *buffers = vlib_frame_vector_args (frame);
u32 to[GRO_TO_VECTOR_SIZE (n_left)];
int packed = vif->is_packed;
u16 n_vectors = frame->n_vectors;
- clib_spinlock_lock_if_init (&vring->lockp);
+ if (tf->shared_queue)
+ clib_spinlock_lock (&vring->lockp);
if (vif->packet_coalesce)
{
n_vectors = n_left =
vnet_gro_inline (vm, vring->flow_table, buffers, n_left, to);
buffers = to;
+ virtio_txq_clear_scheduled (vring);
}
u16 retry_count = 2;
@@ -1066,7 +1106,8 @@ retry:
&buffers[n_vectors - n_left], n_left,
VIRTIO_TX_ERROR_NO_FREE_SLOTS);
- clib_spinlock_unlock_if_init (&vring->lockp);
+ if (tf->shared_queue)
+ clib_spinlock_unlock (&vring->lockp);
return frame->n_vectors - n_left;
}
@@ -1098,7 +1139,7 @@ virtio_clear_hw_interface_counters (u32 instance)
}
static void
-virtio_set_rx_interrupt (virtio_if_t *vif, virtio_vring_t *vring)
+virtio_set_rx_interrupt (virtio_if_t *vif, vnet_virtio_vring_t *vring)
{
if (vif->is_packed)
vring->driver_event->flags &= ~VRING_EVENT_F_DISABLE;
@@ -1107,7 +1148,7 @@ virtio_set_rx_interrupt (virtio_if_t *vif, virtio_vring_t *vring)
}
static void
-virtio_set_rx_polling (virtio_if_t *vif, virtio_vring_t *vring)
+virtio_set_rx_polling (virtio_if_t *vif, vnet_virtio_vring_t *vring)
{
if (vif->is_packed)
vring->driver_event->flags |= VRING_EVENT_F_DISABLE;
@@ -1119,11 +1160,10 @@ static clib_error_t *
virtio_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid,
vnet_hw_if_rx_mode mode)
{
- vlib_main_t *vm = vnm->vlib_main;
virtio_main_t *mm = &virtio_main;
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
virtio_if_t *vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
- virtio_vring_t *rx_vring = vec_elt_at_index (vif->rxq_vrings, qid);
+ vnet_virtio_vring_t *rx_vring = vec_elt_at_index (vif->rxq_vrings, qid);
if (vif->type == VIRTIO_IF_TYPE_PCI && !(vif->support_int_mode))
{
@@ -1132,30 +1172,9 @@ virtio_interface_rx_mode_change (vnet_main_t * vnm, u32 hw_if_index, u32 qid,
}
if (mode == VNET_HW_IF_RX_MODE_POLLING)
- {
- if (vif->packet_coalesce || vif->packet_buffering)
- {
- if (mm->interrupt_queues_count > 0)
- mm->interrupt_queues_count--;
- if (mm->interrupt_queues_count == 0)
- vlib_process_signal_event (vm,
- virtio_send_interrupt_node.index,
- VIRTIO_EVENT_STOP_TIMER, 0);
- }
virtio_set_rx_polling (vif, rx_vring);
- }
else
- {
- if (vif->packet_coalesce || vif->packet_buffering)
- {
- mm->interrupt_queues_count++;
- if (mm->interrupt_queues_count == 1)
- vlib_process_signal_event (vm,
- virtio_send_interrupt_node.index,
- VIRTIO_EVENT_START_TIMER, 0);
- }
virtio_set_rx_interrupt (vif, rx_vring);
- }
rx_vring->mode = mode;
@@ -1183,16 +1202,6 @@ virtio_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
return 0;
}
-static clib_error_t *
-virtio_subif_add_del_function (vnet_main_t * vnm,
- u32 hw_if_index,
- struct vnet_sw_interface_t *st, int is_add)
-{
- /* Nothing for now */
- return 0;
-}
-
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (virtio_device_class) = {
.name = "virtio",
.format_device_name = format_virtio_device_name,
@@ -1203,11 +1212,9 @@ VNET_DEVICE_CLASS (virtio_device_class) = {
.rx_redirect_to_node = virtio_set_interface_next_node,
.clear_counters = virtio_clear_hw_interface_counters,
.admin_up_down_function = virtio_interface_admin_up_down,
- .subif_add_del_function = virtio_subif_add_del_function,
.rx_mode_change_function = virtio_interface_rx_mode_change,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/devices/virtio/node.c b/src/vnet/devices/virtio/node.c
index c36c0807de0..8c837575cf8 100644
--- a/src/vnet/devices/virtio/node.c
+++ b/src/vnet/devices/virtio/node.c
@@ -19,7 +19,11 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <net/if.h>
+#ifdef __linux__
#include <linux/if_tun.h>
+#elif __FreeBSD__
+#include <net/if_tun.h>
+#endif /* __linux */
#include <sys/ioctl.h>
#include <sys/eventfd.h>
@@ -27,11 +31,11 @@
#include <vlib/unix/unix.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/feature/feature.h>
-#include <vnet/gso/gro_func.h>
#include <vnet/interface/rx_queue_funcs.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
#include <vnet/udp/udp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
#include <vnet/devices/virtio/virtio.h>
#include <vnet/devices/virtio/virtio_inline.h>
@@ -47,7 +51,7 @@ typedef struct
u32 hw_if_index;
u16 ring;
u16 len;
- virtio_net_hdr_v1_t hdr;
+ vnet_virtio_net_hdr_v1_t hdr;
} virtio_input_trace_t;
static u8 *
@@ -69,8 +73,8 @@ format_virtio_input_trace (u8 * s, va_list * args)
}
static_always_inline void
-virtio_needs_csum (vlib_buffer_t * b0, virtio_net_hdr_v1_t * hdr,
- u8 * l4_proto, u8 * l4_hdr_sz, virtio_if_type_t type)
+virtio_needs_csum (vlib_buffer_t *b0, vnet_virtio_net_hdr_v1_t *hdr,
+ u8 *l4_proto, u8 *l4_hdr_sz, virtio_if_type_t type)
{
if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
{
@@ -91,8 +95,7 @@ virtio_needs_csum (vlib_buffer_t * b0, virtio_net_hdr_v1_t * hdr,
}
else
{
- ethernet_header_t *eh =
- (ethernet_header_t *) vlib_buffer_get_current (b0);
+ ethernet_header_t *eh = (ethernet_header_t *) b0->data;
ethertype = clib_net_to_host_u16 (eh->type);
l2hdr_sz = sizeof (ethernet_header_t);
@@ -117,8 +120,7 @@ virtio_needs_csum (vlib_buffer_t * b0, virtio_net_hdr_v1_t * hdr,
if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP4))
{
- ip4_header_t *ip4 =
- (ip4_header_t *) (vlib_buffer_get_current (b0) + l2hdr_sz);
+ ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l2hdr_sz);
vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + ip4_header_bytes (ip4);
*l4_proto = ip4->protocol;
oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
@@ -129,8 +131,7 @@ virtio_needs_csum (vlib_buffer_t * b0, virtio_net_hdr_v1_t * hdr,
}
else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6))
{
- ip6_header_t *ip6 =
- (ip6_header_t *) (vlib_buffer_get_current (b0) + l2hdr_sz);
+ ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l2hdr_sz);
vnet_buffer (b0)->l4_hdr_offset = l2hdr_sz + sizeof (ip6_header_t);
/* FIXME IPv6 EH traversal */
*l4_proto = ip6->protocol;
@@ -142,18 +143,14 @@ virtio_needs_csum (vlib_buffer_t * b0, virtio_net_hdr_v1_t * hdr,
if (*l4_proto == IP_PROTOCOL_TCP)
{
oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
- tcp_header_t *tcp = (tcp_header_t *) (vlib_buffer_get_current (b0) +
- vnet_buffer
- (b0)->l4_hdr_offset);
+ tcp_header_t *tcp =
+ (tcp_header_t *) (b0->data + vnet_buffer (b0)->l4_hdr_offset);
*l4_hdr_sz = tcp_header_bytes (tcp);
}
else if (*l4_proto == IP_PROTOCOL_UDP)
{
oflags |= VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
- udp_header_t *udp = (udp_header_t *) (vlib_buffer_get_current (b0) +
- vnet_buffer
- (b0)->l4_hdr_offset);
- *l4_hdr_sz = sizeof (*udp);
+ *l4_hdr_sz = sizeof (udp_header_t);
}
if (oflags)
vnet_buffer_offload_flags_set (b0, oflags);
@@ -161,7 +158,7 @@ virtio_needs_csum (vlib_buffer_t * b0, virtio_net_hdr_v1_t * hdr,
}
static_always_inline void
-fill_gso_buffer_flags (vlib_buffer_t * b0, virtio_net_hdr_v1_t * hdr,
+fill_gso_buffer_flags (vlib_buffer_t *b0, vnet_virtio_net_hdr_v1_t *hdr,
u8 l4_proto, u8 l4_hdr_sz)
{
if (hdr->gso_type == VIRTIO_NET_HDR_GSO_TCPV4)
@@ -181,7 +178,7 @@ fill_gso_buffer_flags (vlib_buffer_t * b0, virtio_net_hdr_v1_t * hdr,
}
static_always_inline u16
-virtio_n_left_to_process (virtio_vring_t * vring, const int packed)
+virtio_n_left_to_process (vnet_virtio_vring_t *vring, const int packed)
{
if (packed)
return vring->desc_in_use;
@@ -190,7 +187,7 @@ virtio_n_left_to_process (virtio_vring_t * vring, const int packed)
}
static_always_inline u16
-virtio_get_slot_id (virtio_vring_t * vring, const int packed, u16 last,
+virtio_get_slot_id (vnet_virtio_vring_t *vring, const int packed, u16 last,
u16 mask)
{
if (packed)
@@ -200,7 +197,7 @@ virtio_get_slot_id (virtio_vring_t * vring, const int packed, u16 last,
}
static_always_inline u16
-virtio_get_len (virtio_vring_t * vring, const int packed, const int hdr_sz,
+virtio_get_len (vnet_virtio_vring_t *vring, const int packed, const int hdr_sz,
u16 last, u16 mask)
{
if (packed)
@@ -209,22 +206,60 @@ virtio_get_len (virtio_vring_t * vring, const int packed, const int hdr_sz,
return vring->used->ring[last & mask].len - hdr_sz;
}
-#define increment_last(last, packed, vring) \
- do { \
- last++; \
- if (packed && last >= vring->size) \
- { \
- last = 0; \
- vring->used_wrap_counter ^= 1; \
- } \
- } while (0)
+#define virtio_packed_check_n_left(vring, last) \
+ do \
+ { \
+ vnet_virtio_vring_packed_desc_t *d = &vring->packed_desc[last]; \
+ u16 flags = d->flags; \
+ if ((flags & VRING_DESC_F_AVAIL) != (vring->used_wrap_counter << 7) || \
+ (flags & VRING_DESC_F_USED) != (vring->used_wrap_counter << 15)) \
+ { \
+ n_left = 0; \
+ } \
+ } \
+ while (0)
+
+#define increment_last(last, packed, vring) \
+ do \
+ { \
+ last++; \
+ if (packed && last >= vring->queue_size) \
+ { \
+ last = 0; \
+ vring->used_wrap_counter ^= 1; \
+ } \
+ } \
+ while (0)
+
+static_always_inline void
+virtio_device_input_ethernet (vlib_main_t *vm, vlib_node_runtime_t *node,
+ const u32 next_index, const u32 sw_if_index,
+ const u32 hw_if_index)
+{
+ vlib_next_frame_t *nf;
+ vlib_frame_t *f;
+ ethernet_input_frame_t *ef;
+
+ if (PREDICT_FALSE (VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT != next_index))
+ return;
+
+ nf = vlib_node_runtime_get_next_frame (
+ vm, node, VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT);
+ f = vlib_get_frame (vm, nf->frame);
+ f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+ ef = vlib_frame_scalar_args (f);
+ ef->sw_if_index = sw_if_index;
+ ef->hw_if_index = hw_if_index;
+ vlib_frame_no_append (f);
+}
static_always_inline uword
-virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, virtio_if_t * vif,
- virtio_vring_t * vring, virtio_if_type_t type,
- int gso_enabled, int checksum_offload_enabled,
- int packed)
+virtio_device_input_gso_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, virtio_if_t *vif,
+ vnet_virtio_vring_t *vring,
+ virtio_if_type_t type, int gso_enabled,
+ int checksum_offload_enabled, int packed)
{
vnet_main_t *vnm = vnet_get_main ();
u32 thread_index = vm->thread_index;
@@ -234,10 +269,15 @@ virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 *to_next = 0;
u32 n_rx_packets = 0;
u32 n_rx_bytes = 0;
- u16 mask = vring->size - 1;
+ u16 mask = vring->queue_size - 1;
u16 last = vring->last_used_idx;
u16 n_left = virtio_n_left_to_process (vring, packed);
- vlib_buffer_t bt;
+ vlib_buffer_t bt = {};
+
+ if (packed)
+ {
+ virtio_packed_check_n_left (vring, last);
+ }
if (n_left == 0)
return 0;
@@ -253,7 +293,7 @@ virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
next_index = vif->per_interface_next_index;
/* only for l2, redirect if feature path enabled */
- vnet_feature_start_device_input_x1 (vif->sw_if_index, &next_index, &bt);
+ vnet_feature_start_device_input (vif->sw_if_index, &next_index, &bt);
}
while (n_left)
@@ -261,13 +301,13 @@ virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 n_left_to_next;
u32 next0 = next_index;
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
while (n_left && n_left_to_next)
{
if (packed)
{
- vring_packed_desc_t *d = &vring->packed_desc[last];
+ vnet_virtio_vring_packed_desc_t *d = &vring->packed_desc[last];
u16 flags = d->flags;
if ((flags & VRING_DESC_F_AVAIL) !=
(vring->used_wrap_counter << 7)
@@ -280,13 +320,13 @@ virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
u8 l4_proto = 0, l4_hdr_sz = 0;
u16 num_buffers = 1;
- virtio_net_hdr_v1_t *hdr;
+ vnet_virtio_net_hdr_v1_t *hdr;
u16 slot = virtio_get_slot_id (vring, packed, last, mask);
u16 len = virtio_get_len (vring, packed, hdr_sz, last, mask);
u32 bi0 = vring->buffers[slot];
vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
hdr = vlib_buffer_get_current (b0);
- if (hdr_sz == sizeof (virtio_net_hdr_v1_t))
+ if (hdr_sz == sizeof (vnet_virtio_net_hdr_v1_t))
num_buffers = hdr->num_buffers;
b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
@@ -371,7 +411,7 @@ virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tr->next_index = next0;
tr->hw_if_index = vif->hw_if_index;
tr->len = len;
- clib_memcpy_fast (&tr->hdr, hdr, hdr_sz);
+ clib_memcpy_fast (&tr->hdr, hdr, (hdr_sz == 12) ? 12 : 10);
}
/* enqueue buffer */
@@ -391,10 +431,13 @@ virtio_device_input_gso_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
n_rx_packets++;
n_rx_bytes += len;
}
+ virtio_device_input_ethernet (vm, node, next_index, vif->sw_if_index,
+ vif->hw_if_index);
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
vring->last_used_idx = last;
+ vring->total_packets += n_rx_packets;
vlib_increment_combined_counter (vnm->interface_main.combined_sw_if_counters
+ VNET_INTERFACE_COUNTER_RX, thread_index,
vif->sw_if_index, n_rx_packets,
@@ -408,23 +451,10 @@ virtio_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame, virtio_if_t * vif, u16 qid,
virtio_if_type_t type)
{
- virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, qid);
+ vnet_virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, qid);
const int hdr_sz = vif->virtio_net_hdr_sz;
- u16 txq_id = vm->thread_index % vif->num_txqs;
- virtio_vring_t *txq_vring = vec_elt_at_index (vif->txq_vrings, txq_id);
uword rv;
- if (clib_spinlock_trylock_if_init (&txq_vring->lockp))
- {
- if (vif->packet_coalesce)
- vnet_gro_flow_table_schedule_node_on_dispatcher
- (vm, txq_vring->flow_table);
- else if (vif->packet_buffering)
- virtio_vring_buffering_schedule_node_on_dispatcher
- (vm, txq_vring->buffering);
- clib_spinlock_unlock_if_init (&txq_vring->lockp);
- }
-
if (vif->is_packed)
{
if (vif->gso_enabled)
@@ -494,7 +524,6 @@ VLIB_NODE_FN (virtio_input_node) (vlib_main_t * vm,
return n_rx;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (virtio_input_node) = {
.name = "virtio-input",
.sibling_of = "device-input",
@@ -505,7 +534,6 @@ VLIB_REGISTER_NODE (virtio_input_node) = {
.n_errors = VIRTIO_INPUT_N_ERROR,
.error_strings = virtio_input_error_strings,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/devices/virtio/pci.c b/src/vnet/devices/virtio/pci.c
index 7ef4b2a52dd..6234f64fcfb 100644
--- a/src/vnet/devices/virtio/pci.c
+++ b/src/vnet/devices/virtio/pci.c
@@ -116,7 +116,7 @@ virtio_pci_irq_queue_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h,
line--;
u16 qid = line;
- virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, qid);
+ vnet_virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, qid);
vnet_hw_if_rx_queue_set_int_pending (vnm, vring->queue_index);
}
@@ -131,13 +131,11 @@ virtio_pci_irq_config_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h,
if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP)
{
- vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
vnet_hw_interface_set_flags (vnm, vif->hw_if_index,
VNET_HW_INTERFACE_FLAG_LINK_UP);
}
else
{
- vif->flags &= ~VIRTIO_IF_FLAG_ADMIN_UP;
vnet_hw_interface_set_flags (vnm, vif->hw_if_index, 0);
}
}
@@ -200,18 +198,18 @@ static int
virtio_pci_send_ctrl_msg_packed (vlib_main_t * vm, virtio_if_t * vif,
virtio_ctrl_msg_t * data, u32 len)
{
- virtio_vring_t *vring = vif->cxq_vring;
+ vnet_virtio_vring_t *vring = vif->cxq_vring;
virtio_net_ctrl_ack_t status = VIRTIO_NET_ERR;
virtio_ctrl_msg_t result;
u32 buffer_index;
vlib_buffer_t *b;
u16 used, next;
- u16 sz = vring->size;
+ u16 sz = vring->queue_size;
u16 flags = 0, first_desc_flags = 0;
used = vring->desc_in_use;
next = vring->desc_next;
- vring_packed_desc_t *d = &vring->packed_desc[next];
+ vnet_virtio_vring_packed_desc_t *d = &vring->packed_desc[next];
if (vlib_buffer_alloc (vm, &buffer_index, 1))
b = vlib_get_buffer (vm, buffer_index);
@@ -319,9 +317,9 @@ virtio_pci_send_ctrl_msg_packed (vlib_main_t * vm, virtio_if_t * vif,
|| (flags & VRING_DESC_F_USED) != (vring->used_wrap_counter << 15));
last += 3;
- if (last >= vring->size)
+ if (last >= vring->queue_size)
{
- last = last - vring->size;
+ last = last - vring->queue_size;
vring->used_wrap_counter ^= 1;
}
vring->desc_in_use -= 3;
@@ -340,19 +338,19 @@ static int
virtio_pci_send_ctrl_msg_split (vlib_main_t * vm, virtio_if_t * vif,
virtio_ctrl_msg_t * data, u32 len)
{
- virtio_vring_t *vring = vif->cxq_vring;
+ vnet_virtio_vring_t *vring = vif->cxq_vring;
virtio_net_ctrl_ack_t status = VIRTIO_NET_ERR;
virtio_ctrl_msg_t result;
u32 buffer_index;
vlib_buffer_t *b;
u16 used, next, avail;
- u16 sz = vring->size;
+ u16 sz = vring->queue_size;
u16 mask = sz - 1;
used = vring->desc_in_use;
next = vring->desc_next;
avail = vring->avail->idx;
- vring_desc_t *d = &vring->desc[next];
+ vnet_virtio_vring_desc_t *d = &vring->desc[next];
if (vlib_buffer_alloc (vm, &buffer_index, 1))
b = vlib_get_buffer (vm, buffer_index);
@@ -405,7 +403,7 @@ virtio_pci_send_ctrl_msg_split (vlib_main_t * vm, virtio_if_t * vif,
while (n_left)
{
- vring_used_elem_t *e = &vring->used->ring[last & mask];
+ vnet_virtio_vring_used_elem_t *e = &vring->used->ring[last & mask];
u16 slot = e->id;
d = &vring->desc[slot];
@@ -508,7 +506,7 @@ virtio_pci_offloads (vlib_main_t * vm, virtio_if_t * vif, int gso_enabled,
int csum_offload_enabled)
{
vnet_main_t *vnm = vnet_get_main ();
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
+ vnet_hw_if_caps_change_t cc = {};
if ((vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ)) &&
(vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)))
@@ -524,10 +522,10 @@ virtio_pci_offloads (vlib_main_t * vm, virtio_if_t * vif, int gso_enabled,
else
{
vif->gso_enabled = 1;
- vif->csum_offload_enabled = 0;
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM;
+ vif->csum_offload_enabled = 1;
+ cc.val = cc.mask = VNET_HW_IF_CAP_TCP_GSO |
+ VNET_HW_IF_CAP_TX_TCP_CKSUM |
+ VNET_HW_IF_CAP_TX_UDP_CKSUM;
}
}
else if (csum_offload_enabled
@@ -541,9 +539,10 @@ virtio_pci_offloads (vlib_main_t * vm, virtio_if_t * vif, int gso_enabled,
{
vif->csum_offload_enabled = 1;
vif->gso_enabled = 0;
- hw->caps &= ~VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO;
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM;
+ cc.val =
+ VNET_HW_IF_CAP_TX_TCP_CKSUM | VNET_HW_IF_CAP_TX_UDP_CKSUM;
+ cc.mask = VNET_HW_IF_CAP_TCP_GSO | VNET_HW_IF_CAP_TX_TCP_CKSUM |
+ VNET_HW_IF_CAP_TX_UDP_CKSUM;
}
}
else
@@ -556,12 +555,15 @@ virtio_pci_offloads (vlib_main_t * vm, virtio_if_t * vif, int gso_enabled,
{
vif->csum_offload_enabled = 0;
vif->gso_enabled = 0;
- hw->caps &= ~(VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM |
- VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO);
+ cc.val = 0;
+ cc.mask = VNET_HW_IF_CAP_L4_TX_CKSUM | VNET_HW_IF_CAP_TCP_GSO;
}
}
}
+ if (cc.mask)
+ vnet_hw_if_change_caps (vnm, vif->hw_if_index, &cc);
+
return 0;
}
@@ -598,7 +600,7 @@ virtio_pci_control_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
{
clib_error_t *error = 0;
u16 queue_size = 0;
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
u32 i = 0;
void *ptr = NULL;
@@ -613,34 +615,36 @@ virtio_pci_control_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
vec_validate_aligned (vif->cxq_vring, 0, CLIB_CACHE_LINE_BYTES);
vring = vec_elt_at_index (vif->cxq_vring, 0);
- i =
- (((queue_size * sizeof (vring_packed_desc_t)) +
- sizeof (vring_desc_event_t) + VIRTIO_PCI_VRING_ALIGN -
- 1) & ~(VIRTIO_PCI_VRING_ALIGN - 1)) + sizeof (vring_desc_event_t);
+ i = (((queue_size * sizeof (vnet_virtio_vring_packed_desc_t)) +
+ sizeof (vnet_virtio_vring_desc_event_t) + VNET_VIRTIO_PCI_VRING_ALIGN -
+ 1) &
+ ~(VNET_VIRTIO_PCI_VRING_ALIGN - 1)) +
+ sizeof (vnet_virtio_vring_desc_event_t);
- ptr =
- vlib_physmem_alloc_aligned_on_numa (vm, i, VIRTIO_PCI_VRING_ALIGN,
- vif->numa_node);
+ ptr = vlib_physmem_alloc_aligned_on_numa (vm, i, VNET_VIRTIO_PCI_VRING_ALIGN,
+ vif->numa_node);
if (!ptr)
return vlib_physmem_last_error (vm);
clib_memset (ptr, 0, i);
vring->packed_desc = ptr;
- vring->driver_event = ptr + (queue_size * sizeof (vring_packed_desc_t));
+ vring->driver_event =
+ ptr + (queue_size * sizeof (vnet_virtio_vring_packed_desc_t));
vring->driver_event->off_wrap = 0;
vring->driver_event->flags = VRING_EVENT_F_DISABLE;
vring->device_event =
- ptr +
- (((queue_size * sizeof (vring_packed_desc_t)) +
- sizeof (vring_desc_event_t) + VIRTIO_PCI_VRING_ALIGN -
- 1) & ~(VIRTIO_PCI_VRING_ALIGN - 1));
+ ptr + (((queue_size * sizeof (vnet_virtio_vring_packed_desc_t)) +
+ sizeof (vnet_virtio_vring_desc_event_t) +
+ VNET_VIRTIO_PCI_VRING_ALIGN - 1) &
+ ~(VNET_VIRTIO_PCI_VRING_ALIGN - 1));
vring->device_event->off_wrap = 0;
vring->device_event->flags = 0;
+ vring->total_packets = 0;
vring->queue_id = queue_num;
- vring->size = queue_size;
+ vring->queue_size = queue_size;
vring->avail_wrap_counter = 1;
vring->used_wrap_counter = 1;
@@ -648,7 +652,7 @@ virtio_pci_control_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
virtio_log_debug (vif, "control-queue: number %u, size %u", queue_num,
queue_size);
- vif->virtio_pci_func->setup_queue (vm, vif, queue_num, (void *) vring);
+ vif->virtio_pci_func->setup_queue (vm, vif, queue_num, vring);
vring->queue_notify_offset =
vif->notify_off_multiplier *
vif->virtio_pci_func->get_queue_notify_off (vm, vif, queue_num);
@@ -663,8 +667,7 @@ virtio_pci_control_vring_split_init (vlib_main_t * vm, virtio_if_t * vif,
{
clib_error_t *error = 0;
u16 queue_size = 0;
- virtio_vring_t *vring;
- vring_t vr;
+ vnet_virtio_vring_t *vring;
u32 i = 0;
void *ptr = NULL;
@@ -683,27 +686,21 @@ virtio_pci_control_vring_split_init (vlib_main_t * vm, virtio_if_t * vif,
vec_validate_aligned (vif->cxq_vring, 0, CLIB_CACHE_LINE_BYTES);
vring = vec_elt_at_index (vif->cxq_vring, 0);
- i = vring_size (queue_size, VIRTIO_PCI_VRING_ALIGN);
- i = round_pow2 (i, VIRTIO_PCI_VRING_ALIGN);
- ptr =
- vlib_physmem_alloc_aligned_on_numa (vm, i, VIRTIO_PCI_VRING_ALIGN,
- vif->numa_node);
+ i = vnet_virtio_vring_size (queue_size, VNET_VIRTIO_PCI_VRING_ALIGN);
+ i = round_pow2 (i, VNET_VIRTIO_PCI_VRING_ALIGN);
+ ptr = vlib_physmem_alloc_aligned_on_numa (vm, i, VNET_VIRTIO_PCI_VRING_ALIGN,
+ vif->numa_node);
if (!ptr)
return vlib_physmem_last_error (vm);
clib_memset (ptr, 0, i);
- vring_init (&vr, queue_size, ptr, VIRTIO_PCI_VRING_ALIGN);
- vring->desc = vr.desc;
- vring->avail = vr.avail;
- vring->used = vr.used;
+ vnet_virtio_vring_init (vring, queue_size, ptr, VNET_VIRTIO_PCI_VRING_ALIGN);
vring->queue_id = queue_num;
- vring->avail->flags = VIRTIO_RING_FLAG_MASK_INT;
+ vring->total_packets = 0;
ASSERT (vring->buffers == 0);
-
- vring->size = queue_size;
virtio_log_debug (vif, "control-queue: number %u, size %u", queue_num,
queue_size);
- vif->virtio_pci_func->setup_queue (vm, vif, queue_num, ptr);
+ vif->virtio_pci_func->setup_queue (vm, vif, queue_num, vring);
vring->queue_notify_offset =
vif->notify_off_multiplier *
vif->virtio_pci_func->get_queue_notify_off (vm, vif, queue_num);
@@ -724,14 +721,12 @@ virtio_pci_control_vring_init (vlib_main_t * vm, virtio_if_t * vif,
}
clib_error_t *
-virtio_pci_vring_split_init (vlib_main_t * vm, virtio_if_t * vif,
- u16 queue_num)
+virtio_pci_vring_split_init (vlib_main_t *vm, virtio_if_t *vif, u16 queue_num,
+ u16 txq_size)
{
- vlib_thread_main_t *vtm = vlib_get_thread_main ();
clib_error_t *error = 0;
u16 queue_size = 0;
- virtio_vring_t *vring;
- vring_t vr;
+ vnet_virtio_vring_t *vring;
u32 i = 0;
void *ptr = NULL;
@@ -750,11 +745,20 @@ virtio_pci_vring_split_init (vlib_main_t * vm, virtio_if_t * vif,
if (queue_num % 2)
{
+ if (txq_size)
+ {
+ virtio_log_debug (vif, "tx-queue: number %u, default-size %u",
+ queue_num, queue_size);
+ vif->virtio_pci_func->set_queue_size (vm, vif, queue_num, txq_size);
+ queue_size =
+ vif->virtio_pci_func->get_queue_size (vm, vif, queue_num);
+ virtio_log_debug (vif, "tx-queue: number %u, new size %u", queue_num,
+ queue_size);
+ }
vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num),
CLIB_CACHE_LINE_BYTES);
vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num));
- if (vif->max_queue_pairs < vtm->n_vlib_mains)
- clib_spinlock_init (&vring->lockp);
+ clib_spinlock_init (&vring->lockp);
}
else
{
@@ -762,21 +766,18 @@ virtio_pci_vring_split_init (vlib_main_t * vm, virtio_if_t * vif,
CLIB_CACHE_LINE_BYTES);
vring = vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS (queue_num));
}
- i = vring_size (queue_size, VIRTIO_PCI_VRING_ALIGN);
- i = round_pow2 (i, VIRTIO_PCI_VRING_ALIGN);
- ptr =
- vlib_physmem_alloc_aligned_on_numa (vm, i, VIRTIO_PCI_VRING_ALIGN,
- vif->numa_node);
+ i = vnet_virtio_vring_size (queue_size, VNET_VIRTIO_PCI_VRING_ALIGN);
+ i = round_pow2 (i, VNET_VIRTIO_PCI_VRING_ALIGN);
+ ptr = vlib_physmem_alloc_aligned_on_numa (vm, i, VNET_VIRTIO_PCI_VRING_ALIGN,
+ vif->numa_node);
if (!ptr)
return vlib_physmem_last_error (vm);
clib_memset (ptr, 0, i);
- vring_init (&vr, queue_size, ptr, VIRTIO_PCI_VRING_ALIGN);
- vring->desc = vr.desc;
- vring->avail = vr.avail;
- vring->used = vr.used;
+ vnet_virtio_vring_init (vring, queue_size, ptr, VNET_VIRTIO_PCI_VRING_ALIGN);
vring->queue_id = queue_num;
vring->avail->flags = VIRTIO_RING_FLAG_MASK_INT;
vring->flow_table = 0;
+ vring->total_packets = 0;
ASSERT (vring->buffers == 0);
vec_validate_aligned (vring->buffers, queue_size, CLIB_CACHE_LINE_BYTES);
@@ -791,8 +792,8 @@ virtio_pci_vring_split_init (vlib_main_t * vm, virtio_if_t * vif,
virtio_log_debug (vif, "rx-queue: number %u, size %u", queue_num,
queue_size);
}
- vring->size = queue_size;
- if (vif->virtio_pci_func->setup_queue (vm, vif, queue_num, ptr))
+ vring->queue_size = queue_size;
+ if (vif->virtio_pci_func->setup_queue (vm, vif, queue_num, vring))
return clib_error_return (0, "error in queue address setup");
vring->queue_notify_offset =
@@ -807,10 +808,9 @@ clib_error_t *
virtio_pci_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
u16 queue_num)
{
- vlib_thread_main_t *vtm = vlib_get_thread_main ();
clib_error_t *error = 0;
u16 queue_size = 0;
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
u32 i = 0;
void *ptr = NULL;
@@ -827,8 +827,7 @@ virtio_pci_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num),
CLIB_CACHE_LINE_BYTES);
vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (queue_num));
- if (vif->max_queue_pairs < vtm->n_vlib_mains)
- clib_spinlock_init (&vring->lockp);
+ clib_spinlock_init (&vring->lockp);
}
else
{
@@ -837,29 +836,30 @@ virtio_pci_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
vring = vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS (queue_num));
}
- i =
- (((queue_size * sizeof (vring_packed_desc_t)) +
- sizeof (vring_desc_event_t) + VIRTIO_PCI_VRING_ALIGN -
- 1) & ~(VIRTIO_PCI_VRING_ALIGN - 1)) + sizeof (vring_desc_event_t);
+ i = (((queue_size * sizeof (vnet_virtio_vring_packed_desc_t)) +
+ sizeof (vnet_virtio_vring_desc_event_t) + VNET_VIRTIO_PCI_VRING_ALIGN -
+ 1) &
+ ~(VNET_VIRTIO_PCI_VRING_ALIGN - 1)) +
+ sizeof (vnet_virtio_vring_desc_event_t);
- ptr =
- vlib_physmem_alloc_aligned_on_numa (vm, i, VIRTIO_PCI_VRING_ALIGN,
- vif->numa_node);
+ ptr = vlib_physmem_alloc_aligned_on_numa (vm, i, VNET_VIRTIO_PCI_VRING_ALIGN,
+ vif->numa_node);
if (!ptr)
return vlib_physmem_last_error (vm);
clib_memset (ptr, 0, i);
vring->packed_desc = ptr;
- vring->driver_event = ptr + (queue_size * sizeof (vring_packed_desc_t));
+ vring->driver_event =
+ ptr + (queue_size * sizeof (vnet_virtio_vring_packed_desc_t));
vring->driver_event->off_wrap = 0;
vring->driver_event->flags = VRING_EVENT_F_DISABLE;
vring->device_event =
- ptr +
- (((queue_size * sizeof (vring_packed_desc_t)) +
- sizeof (vring_desc_event_t) + VIRTIO_PCI_VRING_ALIGN -
- 1) & ~(VIRTIO_PCI_VRING_ALIGN - 1));
+ ptr + (((queue_size * sizeof (vnet_virtio_vring_packed_desc_t)) +
+ sizeof (vnet_virtio_vring_desc_event_t) +
+ VNET_VIRTIO_PCI_VRING_ALIGN - 1) &
+ ~(VNET_VIRTIO_PCI_VRING_ALIGN - 1));
vring->device_event->off_wrap = 0;
vring->device_event->flags = 0;
@@ -867,6 +867,7 @@ virtio_pci_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
vring->avail_wrap_counter = 1;
vring->used_wrap_counter = 1;
+ vring->total_packets = 0;
ASSERT (vring->buffers == 0);
vec_validate_aligned (vring->buffers, queue_size, CLIB_CACHE_LINE_BYTES);
@@ -881,8 +882,8 @@ virtio_pci_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
virtio_log_debug (vif, "rx-queue: number %u, size %u", queue_num,
queue_size);
}
- vring->size = queue_size;
- if (vif->virtio_pci_func->setup_queue (vm, vif, queue_num, (void *) vring))
+ vring->queue_size = queue_size;
+ if (vif->virtio_pci_func->setup_queue (vm, vif, queue_num, vring))
return clib_error_return (0, "error in queue address setup");
vring->queue_notify_offset =
@@ -895,12 +896,13 @@ virtio_pci_vring_packed_init (vlib_main_t * vm, virtio_if_t * vif,
}
clib_error_t *
-virtio_pci_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 queue_num)
+virtio_pci_vring_init (vlib_main_t *vm, virtio_if_t *vif, u16 queue_num,
+ u16 txq_size)
{
if (vif->is_packed)
return virtio_pci_vring_packed_init (vm, vif, queue_num);
else
- return virtio_pci_vring_split_init (vm, vif, queue_num);
+ return virtio_pci_vring_split_init (vm, vif, queue_num, txq_size);
}
static void
@@ -1147,7 +1149,6 @@ virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif,
virtio_pci_create_if_args_t * args, void **bar)
{
clib_error_t *error = 0;
- vlib_thread_main_t *vtm = vlib_get_thread_main ();
u8 status = 0;
if ((error = virtio_pci_read_caps (vm, vif, bar)))
@@ -1239,7 +1240,7 @@ virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif,
for (int i = 0; i < vif->max_queue_pairs; i++)
{
- if ((error = virtio_pci_vring_init (vm, vif, RX_QUEUE (i))))
+ if ((error = virtio_pci_vring_init (vm, vif, RX_QUEUE (i), 0)))
{
args->rv = VNET_API_ERROR_INIT_FAILED;
virtio_log_error (vif, "%s (%u) %s", "error in rxq-queue",
@@ -1254,23 +1255,8 @@ virtio_pci_device_init (vlib_main_t * vm, virtio_if_t * vif,
vif->num_rxqs++;
}
- if (i >= vtm->n_vlib_mains)
- {
- /*
- * There is 1:1 mapping between tx queue and vpp worker thread.
- * tx queue 0 is bind with thread index 0, tx queue 1 on thread
- * index 1 and so on.
- * Multiple worker threads can poll same tx queue when number of
- * workers are more than tx queues. In this case, 1:N mapping
- * between tx queue and vpp worker thread.
- */
- virtio_log_debug (vif, "%s %u, %s", "tx-queue: number",
- TX_QUEUE (i),
- "no VPP worker thread is available");
- continue;
- }
-
- if ((error = virtio_pci_vring_init (vm, vif, TX_QUEUE (i))))
+ if ((error = virtio_pci_vring_init (vm, vif, TX_QUEUE (i),
+ args->tx_queue_size)))
{
args->rv = VNET_API_ERROR_INIT_FAILED;
virtio_log_error (vif, "%s (%u) %s", "error in txq-queue",
@@ -1354,7 +1340,6 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args)
clib_error_t *error = 0;
u32 interrupt_count = 0;
- /* *INDENT-OFF* */
pool_foreach (vif, vim->interfaces) {
if (vif->pci_addr.as_u32 == args->addr)
{
@@ -1367,7 +1352,24 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args)
return;
}
}
- /* *INDENT-ON* */
+
+ if (args->bind)
+ {
+ vlib_pci_addr_t pci = { .as_u32 = args->addr };
+ error = vlib_pci_bind_to_uio (vm, &pci, (char *) "auto",
+ VIRTIO_BIND_FORCE == args->bind);
+ if (error)
+ {
+ args->rv = VNET_API_ERROR_INVALID_INTERFACE;
+ args->error =
+ clib_error_return (error, "%U: %s", format_vlib_pci_addr, &pci,
+ "error encountered on binding pci device");
+ vlib_log (VLIB_LOG_LEVEL_ERR, vim->log_default, "%U: %s",
+ format_vlib_pci_addr, &pci,
+ "error encountered on binding pci devicee");
+ return;
+ }
+ }
pool_get (vim->interfaces, vif);
vif->dev_instance = vif - vim->interfaces;
@@ -1483,25 +1485,18 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args)
}
/* create interface */
- error = ethernet_register_interface (vnm, virtio_device_class.index,
- vif->dev_instance, vif->mac_addr,
- &vif->hw_if_index,
- virtio_pci_flag_change);
-
- if (error)
- {
- args->rv = VNET_API_ERROR_INVALID_REGISTRATION;
- virtio_log_error (vif,
- "error encountered on ethernet register interface");
- goto error;
- }
+ vnet_eth_interface_registration_t eir = {};
+ eir.dev_class_index = virtio_device_class.index;
+ eir.dev_instance = vif->dev_instance;
+ eir.address = vif->mac_addr;
+ eir.cb.flag_change = virtio_pci_flag_change;
+ vif->hw_if_index = vnet_eth_register_interface (vnm, &eir);
vnet_sw_interface_t *sw = vnet_get_hw_sw_interface (vnm, vif->hw_if_index);
vif->sw_if_index = sw->sw_if_index;
args->sw_if_index = sw->sw_if_index;
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
- hw->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+ vnet_hw_if_set_caps (vnm, vif->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
if (args->virtio_flags & VIRTIO_FLAG_BUFFERING)
{
@@ -1513,13 +1508,22 @@ virtio_pci_create_if (vlib_main_t * vm, virtio_pci_create_if_args_t * args)
"error encountered during packet buffering init");
goto error;
}
+ /*
+ * packet buffering flag needs to be set 1 before calling the
+ * virtio_pre_input_node_enable but after the successful initialization
+ * of buffering queues above.
+ * Packet buffering flag set to 0 if there will be any error during
+ * buffering initialization.
+ */
+ vif->packet_buffering = 1;
+ virtio_pre_input_node_enable (vm, vif);
}
virtio_vring_set_rx_queues (vm, vif);
+ virtio_vring_set_tx_queues (vm, vif);
if (virtio_pci_is_link_up (vm, vif) & VIRTIO_NET_S_LINK_UP)
{
- vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
vnet_hw_interface_set_flags (vnm, vif->hw_if_index,
VNET_HW_INTERFACE_FLAG_LINK_UP);
}
@@ -1556,17 +1560,19 @@ virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif)
vlib_pci_intr_disable (vm, vif->pci_dev_handle);
- for (i = 0; i < vif->max_queue_pairs; i++)
+ if (vif->virtio_pci_func)
{
- vif->virtio_pci_func->del_queue (vm, vif, RX_QUEUE (i));
- vif->virtio_pci_func->del_queue (vm, vif, TX_QUEUE (i));
- }
+ for (i = 0; i < vif->max_queue_pairs; i++)
+ {
+ vif->virtio_pci_func->del_queue (vm, vif, RX_QUEUE (i));
+ vif->virtio_pci_func->del_queue (vm, vif, TX_QUEUE (i));
+ }
- if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ))
- vif->virtio_pci_func->del_queue (vm, vif, vif->max_queue_pairs * 2);
+ if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ))
+ vif->virtio_pci_func->del_queue (vm, vif, vif->max_queue_pairs * 2);
- if (vif->virtio_pci_func)
- vif->virtio_pci_func->device_reset (vm, vif);
+ vif->virtio_pci_func->device_reset (vm, vif);
+ }
if (vif->hw_if_index)
{
@@ -1578,7 +1584,7 @@ virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif)
vec_foreach_index (i, vif->rxq_vrings)
{
- virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, i);
+ vnet_virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, i);
if (vring->used)
{
virtio_free_buffers (vm, vring);
@@ -1587,9 +1593,12 @@ virtio_pci_delete_if (vlib_main_t * vm, virtio_if_t * vif)
vlib_physmem_free (vm, vring->desc);
}
+ if (vif->packet_buffering)
+ virtio_pre_input_node_disable (vm, vif);
+
vec_foreach_index (i, vif->txq_vrings)
{
- virtio_vring_t *vring = vec_elt_at_index (vif->txq_vrings, i);
+ vnet_virtio_vring_t *vring = vec_elt_at_index (vif->txq_vrings, i);
if (vring->used)
{
virtio_free_buffers (vm, vring);
diff --git a/src/vnet/devices/virtio/pci.h b/src/vnet/devices/virtio/pci.h
index 70aa9833c2d..5eb80f823be 100644
--- a/src/vnet/devices/virtio/pci.h
+++ b/src/vnet/devices/virtio/pci.h
@@ -87,7 +87,7 @@ typedef enum
#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12
-#define VIRTIO_PCI_VRING_ALIGN 4096
+#define VNET_VIRTIO_PCI_VRING_ALIGN 4096
typedef enum
{
@@ -154,13 +154,11 @@ typedef struct
* and an ack/status response in the last entry. Data for the
* command goes in between.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
u8 class;
u8 cmd;
}) virtio_net_ctrl_hdr_t;
-/* *INDENT-ON* */
typedef u8 virtio_net_ctrl_ack_t;
@@ -192,8 +190,8 @@ typedef struct _virtio_pci_func
u16 (*get_queue_size) (vlib_main_t * vm, virtio_if_t * vif, u16 queue_id);
void (*set_queue_size) (vlib_main_t * vm, virtio_if_t * vif, u16 queue_id,
u16 queue_size);
- u8 (*setup_queue) (vlib_main_t * vm, virtio_if_t * vif, u16 queue_id,
- void *p);
+ u8 (*setup_queue) (vlib_main_t *vm, virtio_if_t *vif, u16 queue_id,
+ vnet_virtio_vring_t *vring);
void (*del_queue) (vlib_main_t * vm, virtio_if_t * vif, u16 queue_id);
u16 (*get_queue_notify_off) (vlib_main_t * vm, virtio_if_t * vif,
u16 queue_id);
@@ -227,6 +225,13 @@ typedef enum
#undef _
} virtio_flag_t;
+typedef enum
+{
+ VIRTIO_BIND_NONE = 0,
+ VIRTIO_BIND_DEFAULT = 1,
+ VIRTIO_BIND_FORCE = 2,
+} __clib_packed virtio_bind_t;
+
typedef struct
{
u32 addr;
@@ -238,6 +243,8 @@ typedef struct
u64 features;
u8 gso_enabled;
u8 checksum_offload_enabled;
+ u32 tx_queue_size;
+ virtio_bind_t bind;
u32 buffering_size;
u32 virtio_flags;
clib_error_t *error;
diff --git a/src/vnet/devices/virtio/virtio.api b/src/vnet/devices/virtio/virtio.api
index bbe2341a001..a11492ec258 100644
--- a/src/vnet/devices/virtio/virtio.api
+++ b/src/vnet/devices/virtio/virtio.api
@@ -56,7 +56,7 @@ define virtio_pci_create_reply
vl_api_interface_index_t sw_if_index;
};
-enum virtio_flags {
+enumflag virtio_flags {
VIRTIO_API_FLAG_GSO = 1, /* enable gso on the interface */
VIRTIO_API_FLAG_CSUM_OFFLOAD = 2, /* enable checksum offload without gso on the interface */
VIRTIO_API_FLAG_GRO_COALESCE = 4, /* enable packet coalescing on tx side, provided gso enabled */
diff --git a/src/vnet/devices/virtio/virtio.c b/src/vnet/devices/virtio/virtio.c
index b8054d13e5b..d2302fa1dc4 100644
--- a/src/vnet/devices/virtio/virtio.c
+++ b/src/vnet/devices/virtio/virtio.c
@@ -19,7 +19,11 @@
#include <sys/stat.h>
#include <fcntl.h>
#include <net/if.h>
+#ifdef __linux__
#include <linux/if_tun.h>
+#elif __FreeBSD__
+#include <net/if_tun.h>
+#endif /* __linux__ */
#include <sys/ioctl.h>
#include <sys/eventfd.h>
@@ -33,6 +37,7 @@
#include <vnet/devices/virtio/virtio_inline.h>
#include <vnet/devices/virtio/pci.h>
#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
virtio_main_t virtio_main;
@@ -59,7 +64,7 @@ call_read_ready (clib_file_t * uf)
clib_error_t *
virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz)
{
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
int i;
if (!is_pow2 (sz))
@@ -73,12 +78,10 @@ virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz)
if (idx % 2)
{
- vlib_thread_main_t *thm = vlib_get_thread_main ();
vec_validate_aligned (vif->txq_vrings, TX_QUEUE_ACCESS (idx),
CLIB_CACHE_LINE_BYTES);
vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (idx));
- if (thm->n_vlib_mains > vif->num_txqs)
- clib_spinlock_init (&vring->lockp);
+ clib_spinlock_init (&vring->lockp);
}
else
{
@@ -86,19 +89,20 @@ virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz)
CLIB_CACHE_LINE_BYTES);
vring = vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS (idx));
}
- i = sizeof (vring_desc_t) * sz;
+ i = sizeof (vnet_virtio_vring_desc_t) * sz;
i = round_pow2 (i, CLIB_CACHE_LINE_BYTES);
vring->desc = clib_mem_alloc_aligned (i, CLIB_CACHE_LINE_BYTES);
clib_memset (vring->desc, 0, i);
- i = sizeof (vring_avail_t) + sz * sizeof (vring->avail->ring[0]);
+ i = sizeof (vnet_virtio_vring_avail_t) + sz * sizeof (vring->avail->ring[0]);
i = round_pow2 (i, CLIB_CACHE_LINE_BYTES);
vring->avail = clib_mem_alloc_aligned (i, CLIB_CACHE_LINE_BYTES);
clib_memset (vring->avail, 0, i);
// tell kernel that we don't need interrupt
vring->avail->flags = VRING_AVAIL_F_NO_INTERRUPT;
- i = sizeof (vring_used_t) + sz * sizeof (vring_used_elem_t);
+ i = sizeof (vnet_virtio_vring_used_t) +
+ sz * sizeof (vnet_virtio_vring_used_elem_t);
i = round_pow2 (i, CLIB_CACHE_LINE_BYTES);
vring->used = clib_mem_alloc_aligned (i, CLIB_CACHE_LINE_BYTES);
clib_memset (vring->used, 0, i);
@@ -116,20 +120,21 @@ virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx, u16 sz)
else
vring->call_fd = eventfd (0, EFD_NONBLOCK | EFD_CLOEXEC);
- vring->size = sz;
+ vring->total_packets = 0;
+ vring->queue_size = sz;
vring->kick_fd = eventfd (0, EFD_NONBLOCK | EFD_CLOEXEC);
virtio_log_debug (vif, "vring %u size %u call_fd %d kick_fd %d", idx,
- vring->size, vring->call_fd, vring->kick_fd);
+ vring->queue_size, vring->call_fd, vring->kick_fd);
return 0;
}
inline void
-virtio_free_buffers (vlib_main_t * vm, virtio_vring_t * vring)
+virtio_free_buffers (vlib_main_t *vm, vnet_virtio_vring_t *vring)
{
u16 used = vring->desc_in_use;
u16 last = vring->last_used_idx;
- u16 mask = vring->size - 1;
+ u16 mask = vring->queue_size - 1;
while (used)
{
@@ -142,7 +147,7 @@ virtio_free_buffers (vlib_main_t * vm, virtio_vring_t * vring)
clib_error_t *
virtio_vring_free_rx (vlib_main_t * vm, virtio_if_t * vif, u32 idx)
{
- virtio_vring_t *vring =
+ vnet_virtio_vring_t *vring =
vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS (idx));
clib_file_del_by_index (&file_main, vring->call_file_index);
@@ -164,7 +169,7 @@ virtio_vring_free_rx (vlib_main_t * vm, virtio_if_t * vif, u32 idx)
clib_error_t *
virtio_vring_free_tx (vlib_main_t * vm, virtio_if_t * vif, u32 idx)
{
- virtio_vring_t *vring =
+ vnet_virtio_vring_t *vring =
vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS (idx));
close (vring->kick_fd);
@@ -189,7 +194,7 @@ virtio_set_packet_coalesce (virtio_if_t * vif)
{
vnet_main_t *vnm = vnet_get_main ();
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
vif->packet_coalesce = 1;
vec_foreach (vring, vif->txq_vrings)
{
@@ -204,9 +209,8 @@ virtio_set_packet_buffering (virtio_if_t * vif, u16 buffering_size)
{
vnet_main_t *vnm = vnet_get_main ();
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
clib_error_t *error = 0;
- vif->packet_buffering = 1;
vec_foreach (vring, vif->txq_vrings)
{
@@ -222,7 +226,8 @@ virtio_set_packet_buffering (virtio_if_t * vif, u16 buffering_size)
}
static void
-virtio_vring_fill (vlib_main_t *vm, virtio_if_t *vif, virtio_vring_t *vring)
+virtio_vring_fill (vlib_main_t *vm, virtio_if_t *vif,
+ vnet_virtio_vring_t *vring)
{
if (vif->is_packed)
virtio_refill_vring_packed (vm, vif, vif->type, vring,
@@ -238,7 +243,7 @@ void
virtio_vring_set_rx_queues (vlib_main_t *vm, virtio_if_t *vif)
{
vnet_main_t *vnm = vnet_get_main ();
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
u32 i = 0;
vnet_hw_if_set_input_node (vnm, vif->hw_if_index, virtio_input_node.index);
@@ -284,24 +289,53 @@ virtio_vring_set_rx_queues (vlib_main_t *vm, virtio_if_t *vif)
vnet_hw_if_update_runtime_data (vnm, vif->hw_if_index);
}
+void
+virtio_vring_set_tx_queues (vlib_main_t *vm, virtio_if_t *vif)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_virtio_vring_t *vring;
+
+ vec_foreach (vring, vif->txq_vrings)
+ {
+ vring->queue_index = vnet_hw_if_register_tx_queue (
+ vnm, vif->hw_if_index, TX_QUEUE_ACCESS (vring->queue_id));
+ }
+
+ if (vif->num_txqs == 0)
+ {
+ virtio_log_error (vif, "Interface %U has 0 txq",
+ format_vnet_hw_if_index_name, vnm, vif->hw_if_index);
+ return;
+ }
+
+ for (u32 j = 0; j < vlib_get_n_threads (); j++)
+ {
+ u32 qi = vif->txq_vrings[j % vif->num_txqs].queue_index;
+ vnet_hw_if_tx_queue_assign_thread (vnm, qi, j);
+ }
+
+ vnet_hw_if_update_runtime_data (vnm, vif->hw_if_index);
+}
+
inline void
virtio_set_net_hdr_size (virtio_if_t * vif)
{
if (vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF) ||
vif->features & VIRTIO_FEATURE (VIRTIO_F_VERSION_1))
- vif->virtio_net_hdr_sz = sizeof (virtio_net_hdr_v1_t);
+ vif->virtio_net_hdr_sz = sizeof (vnet_virtio_net_hdr_v1_t);
else
- vif->virtio_net_hdr_sz = sizeof (virtio_net_hdr_t);
+ vif->virtio_net_hdr_sz = sizeof (vnet_virtio_net_hdr_t);
}
inline void
-virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type)
+virtio_show (vlib_main_t *vm, u32 *hw_if_indices, u8 show_descr,
+ virtio_if_type_t type)
{
u32 i, j, hw_if_index;
virtio_if_t *vif;
vnet_main_t *vnm = &vnet_main;
virtio_main_t *mm = &virtio_main;
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
struct feat_struct
{
u8 bit;
@@ -407,17 +441,17 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type)
}
vlib_cli_output (vm, " Number of RX Virtqueue %u", vif->num_rxqs);
vlib_cli_output (vm, " Number of TX Virtqueue %u", vif->num_txqs);
- if (vif->cxq_vring != NULL
- && vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ))
+ if (type == VIRTIO_IF_TYPE_PCI && vif->cxq_vring != NULL &&
+ vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ))
vlib_cli_output (vm, " Number of CTRL Virtqueue 1");
vec_foreach_index (i, vif->rxq_vrings)
{
vring = vec_elt_at_index (vif->rxq_vrings, i);
vlib_cli_output (vm, " Virtqueue (RX) %d", vring->queue_id);
- vlib_cli_output (vm,
- " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d",
- vring->size, vring->last_used_idx, vring->desc_next,
- vring->desc_in_use);
+ vlib_cli_output (
+ vm, " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d",
+ vring->queue_size, vring->last_used_idx, vring->desc_next,
+ vring->desc_in_use);
if (vif->is_packed)
{
vlib_cli_output (vm,
@@ -448,11 +482,12 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type)
" id addr len flags next/id user_addr\n");
vlib_cli_output (vm,
" ===== ================== ===== ====== ======= ==================\n");
- for (j = 0; j < vring->size; j++)
+ for (j = 0; j < vring->queue_size; j++)
{
if (vif->is_packed)
{
- vring_packed_desc_t *desc = &vring->packed_desc[j];
+ vnet_virtio_vring_packed_desc_t *desc =
+ &vring->packed_desc[j];
vlib_cli_output (vm,
" %-5d 0x%016lx %-5d 0x%04x %-8d 0x%016lx\n",
j, desc->addr,
@@ -461,7 +496,7 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type)
}
else
{
- vring_desc_t *desc = &vring->desc[j];
+ vnet_virtio_vring_desc_t *desc = &vring->desc[j];
vlib_cli_output (vm,
" %-5d 0x%016lx %-5d 0x%04x %-8d 0x%016lx\n",
j, desc->addr,
@@ -475,10 +510,10 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type)
{
vring = vec_elt_at_index (vif->txq_vrings, i);
vlib_cli_output (vm, " Virtqueue (TX) %d", vring->queue_id);
- vlib_cli_output (vm,
- " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d",
- vring->size, vring->last_used_idx, vring->desc_next,
- vring->desc_in_use);
+ vlib_cli_output (
+ vm, " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d",
+ vring->queue_size, vring->last_used_idx, vring->desc_next,
+ vring->desc_in_use);
if (vif->is_packed)
{
vlib_cli_output (vm,
@@ -519,11 +554,12 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type)
" id addr len flags next/id user_addr\n");
vlib_cli_output (vm,
" ===== ================== ===== ====== ======== ==================\n");
- for (j = 0; j < vring->size; j++)
+ for (j = 0; j < vring->queue_size; j++)
{
if (vif->is_packed)
{
- vring_packed_desc_t *desc = &vring->packed_desc[j];
+ vnet_virtio_vring_packed_desc_t *desc =
+ &vring->packed_desc[j];
vlib_cli_output (vm,
" %-5d 0x%016lx %-5d 0x%04x %-8d 0x%016lx\n",
j, desc->addr,
@@ -532,7 +568,7 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type)
}
else
{
- vring_desc_t *desc = &vring->desc[j];
+ vnet_virtio_vring_desc_t *desc = &vring->desc[j];
vlib_cli_output (vm,
" %-5d 0x%016lx %-5d 0x%04x %-8d 0x%016lx\n",
j, desc->addr,
@@ -542,15 +578,15 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type)
}
}
}
- if (vif->cxq_vring != NULL
- && vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ))
+ if (type == VIRTIO_IF_TYPE_PCI && vif->cxq_vring != NULL &&
+ vif->features & VIRTIO_FEATURE (VIRTIO_NET_F_CTRL_VQ))
{
vring = vif->cxq_vring;
vlib_cli_output (vm, " Virtqueue (CTRL) %d", vring->queue_id);
- vlib_cli_output (vm,
- " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d",
- vring->size, vring->last_used_idx,
- vring->desc_next, vring->desc_in_use);
+ vlib_cli_output (
+ vm, " qsz %d, last_used_idx %d, desc_next %d, desc_in_use %d",
+ vring->queue_size, vring->last_used_idx, vring->desc_next,
+ vring->desc_in_use);
if (vif->is_packed)
{
vlib_cli_output (vm,
@@ -571,11 +607,6 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type)
vring->avail->flags, vring->avail->idx,
vring->used->flags, vring->used->idx);
}
- if (type & (VIRTIO_IF_TYPE_TAP | VIRTIO_IF_TYPE_TUN))
- {
- vlib_cli_output (vm, " kickfd %d, callfd %d", vring->kick_fd,
- vring->call_fd);
- }
if (show_descr)
{
vlib_cli_output (vm, "\n descriptor table:\n");
@@ -583,11 +614,12 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type)
" id addr len flags next/id user_addr\n");
vlib_cli_output (vm,
" ===== ================== ===== ====== ======== ==================\n");
- for (j = 0; j < vring->size; j++)
+ for (j = 0; j < vring->queue_size; j++)
{
if (vif->is_packed)
{
- vring_packed_desc_t *desc = &vring->packed_desc[j];
+ vnet_virtio_vring_packed_desc_t *desc =
+ &vring->packed_desc[j];
vlib_cli_output (vm,
" %-5d 0x%016lx %-5d 0x%04x %-8d 0x%016lx\n",
j, desc->addr,
@@ -596,7 +628,7 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type)
}
else
{
- vring_desc_t *desc = &vring->desc[j];
+ vnet_virtio_vring_desc_t *desc = &vring->desc[j];
vlib_cli_output (vm,
" %-5d 0x%016lx %-5d 0x%04x %-8d 0x%016lx\n",
j, desc->addr,
@@ -606,7 +638,6 @@ virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr, u32 type)
}
}
}
-
}
}
diff --git a/src/vnet/devices/virtio/virtio.h b/src/vnet/devices/virtio/virtio.h
index 48996052e2b..431b1d25c26 100644
--- a/src/vnet/devices/virtio/virtio.h
+++ b/src/vnet/devices/virtio/virtio.h
@@ -22,6 +22,7 @@
#include <vnet/devices/virtio/vhost_std.h>
#include <vnet/devices/virtio/virtio_buffering.h>
#include <vnet/gso/gro.h>
+#include <vnet/interface.h>
#define foreach_virtio_if_flag \
_(0, ADMIN_UP, "admin-up") \
@@ -68,19 +69,19 @@ typedef struct
{
struct
{
- vring_desc_t *desc;
- vring_used_t *used;
- vring_avail_t *avail;
+ vnet_virtio_vring_desc_t *desc;
+ vnet_virtio_vring_used_t *used;
+ vnet_virtio_vring_avail_t *avail;
};
struct
{
- vring_packed_desc_t *packed_desc;
- vring_desc_event_t *driver_event;
- vring_desc_event_t *device_event;
+ vnet_virtio_vring_packed_desc_t *packed_desc;
+ vnet_virtio_vring_desc_event_t *driver_event;
+ vnet_virtio_vring_desc_event_t *device_event;
};
};
u32 *buffers;
- u16 size;
+ u16 queue_size;
u16 queue_id;
u32 queue_index;
u16 desc_in_use;
@@ -103,12 +104,14 @@ typedef struct
};
};
#define VRING_TX_OUT_OF_ORDER 1
+#define VRING_TX_SCHEDULED 2
u16 flags;
u8 buffer_pool_index;
vnet_hw_if_rx_mode mode;
virtio_vring_buffering_t *buffering;
gro_flow_table_t *flow_table;
-} virtio_vring_t;
+ u64 total_packets;
+} vnet_virtio_vring_t;
typedef union
{
@@ -133,8 +136,8 @@ typedef struct
u32 per_interface_next_index;
u16 num_rxqs;
u16 num_txqs;
- virtio_vring_t *rxq_vrings;
- virtio_vring_t *txq_vrings;
+ vnet_virtio_vring_t *rxq_vrings;
+ vnet_virtio_vring_t *txq_vrings;
int gso_enabled;
int csum_offload_enabled;
union
@@ -192,7 +195,7 @@ typedef struct
struct /* native virtio */
{
void *bar;
- virtio_vring_t *cxq_vring;
+ vnet_virtio_vring_t *cxq_vring;
pci_addr_t pci_addr;
u32 bar_id;
u32 notify_off_multiplier;
@@ -213,7 +216,7 @@ typedef struct
typedef struct
{
- u32 interrupt_queues_count;
+ u32 gro_or_buffering_if_count;
/* logging */
vlib_log_class_t log_default;
@@ -224,7 +227,6 @@ typedef struct
extern virtio_main_t virtio_main;
extern vnet_device_class_t virtio_device_class;
extern vlib_node_registration_t virtio_input_node;
-extern vlib_node_registration_t virtio_send_interrupt_node;
clib_error_t *virtio_vring_init (vlib_main_t * vm, virtio_if_t * vif, u16 idx,
u16 sz);
@@ -233,10 +235,11 @@ clib_error_t *virtio_vring_free_rx (vlib_main_t * vm, virtio_if_t * vif,
clib_error_t *virtio_vring_free_tx (vlib_main_t * vm, virtio_if_t * vif,
u32 idx);
void virtio_vring_set_rx_queues (vlib_main_t *vm, virtio_if_t *vif);
-extern void virtio_free_buffers (vlib_main_t * vm, virtio_vring_t * vring);
+void virtio_vring_set_tx_queues (vlib_main_t *vm, virtio_if_t *vif);
+extern void virtio_free_buffers (vlib_main_t *vm, vnet_virtio_vring_t *vring);
extern void virtio_set_net_hdr_size (virtio_if_t * vif);
-extern void virtio_show (vlib_main_t * vm, u32 * hw_if_indices, u8 show_descr,
- u32 type);
+extern void virtio_show (vlib_main_t *vm, u32 *hw_if_indices, u8 show_descr,
+ virtio_if_type_t type);
extern void virtio_set_packet_coalesce (virtio_if_t * vif);
clib_error_t *virtio_set_packet_buffering (virtio_if_t * vif, u16 size);
extern void virtio_pci_legacy_notify_queue (vlib_main_t * vm,
@@ -245,11 +248,14 @@ extern void virtio_pci_legacy_notify_queue (vlib_main_t * vm,
extern void virtio_pci_modern_notify_queue (vlib_main_t * vm,
virtio_if_t * vif, u16 queue_id,
u16 queue_notify_offset);
+extern void virtio_pre_input_node_enable (vlib_main_t *vm, virtio_if_t *vif);
+extern void virtio_pre_input_node_disable (vlib_main_t *vm, virtio_if_t *vif);
+
format_function_t format_virtio_device_name;
format_function_t format_virtio_log_name;
static_always_inline void
-virtio_kick (vlib_main_t * vm, virtio_vring_t * vring, virtio_if_t * vif)
+virtio_kick (vlib_main_t *vm, vnet_virtio_vring_t *vring, virtio_if_t *vif)
{
if (vif->type == VIRTIO_IF_TYPE_PCI)
{
@@ -270,6 +276,56 @@ virtio_kick (vlib_main_t * vm, virtio_vring_t * vring, virtio_if_t * vif)
}
}
+static_always_inline u8
+virtio_txq_is_scheduled (vnet_virtio_vring_t *vring)
+{
+ if (vring)
+ return (vring->flags & VRING_TX_SCHEDULED);
+ return 1;
+}
+
+static_always_inline void
+virtio_txq_set_scheduled (vnet_virtio_vring_t *vring)
+{
+ if (vring)
+ vring->flags |= VRING_TX_SCHEDULED;
+}
+
+static_always_inline void
+virtio_txq_clear_scheduled (vnet_virtio_vring_t *vring)
+{
+ if (vring)
+ vring->flags &= ~VRING_TX_SCHEDULED;
+}
+
+static_always_inline void
+vnet_virtio_vring_init (vnet_virtio_vring_t *vring, u16 queue_size, void *p,
+ u32 align)
+{
+ vring->queue_size = queue_size;
+ vring->desc = p;
+ vring->avail =
+ (vnet_virtio_vring_avail_t *) ((char *) p +
+ queue_size *
+ sizeof (vnet_virtio_vring_desc_t));
+ vring->used =
+ (vnet_virtio_vring_used_t
+ *) ((char *) p + ((sizeof (vnet_virtio_vring_desc_t) * queue_size +
+ sizeof (u16) * (3 + queue_size) + align - 1) &
+ ~(align - 1)));
+ vring->avail->flags = VIRTIO_RING_FLAG_MASK_INT;
+}
+
+static_always_inline u16
+vnet_virtio_vring_size (u16 queue_size, u32 align)
+{
+ return ((sizeof (vnet_virtio_vring_desc_t) * queue_size +
+ sizeof (u16) * (3 + queue_size) + align - 1) &
+ ~(align - 1)) +
+ sizeof (u16) * 3 +
+ sizeof (vnet_virtio_vring_used_elem_t) * queue_size;
+}
+
#define virtio_log_debug(vif, f, ...) \
{ \
vlib_log(VLIB_LOG_LEVEL_DEBUG, virtio_main.log_default, \
diff --git a/src/vnet/devices/virtio/virtio_api.c b/src/vnet/devices/virtio/virtio_api.c
index 11514c75c59..3197a2fab6d 100644
--- a/src/vnet/devices/virtio/virtio_api.c
+++ b/src/vnet/devices/virtio/virtio_api.c
@@ -193,10 +193,10 @@ virtio_pci_send_sw_interface_details (vpe_api_main_t * am,
pci_address_encode ((vlib_pci_addr_t *) & vif->pci_addr.as_u32,
&mp->pci_addr);
mp->sw_if_index = htonl (vif->sw_if_index);
- virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, 0);
- mp->rx_ring_sz = htons (vring->size);
+ vnet_virtio_vring_t *vring = vec_elt_at_index (vif->rxq_vrings, 0);
+ mp->rx_ring_sz = htons (vring->queue_size);
vring = vec_elt_at_index (vif->txq_vrings, 0);
- mp->tx_ring_sz = htons (vring->size);
+ mp->tx_ring_sz = htons (vring->queue_size);
clib_memcpy (mp->mac_addr, vif->mac_addr, 6);
mp->features = clib_host_to_net_u64 (vif->features);
diff --git a/src/vnet/devices/virtio/virtio_buffering.h b/src/vnet/devices/virtio/virtio_buffering.h
index ef3d9d27652..6f13a1f5c36 100644
--- a/src/vnet/devices/virtio/virtio_buffering.h
+++ b/src/vnet/devices/virtio/virtio_buffering.h
@@ -18,6 +18,8 @@
#ifndef _VNET_DEVICES_VIRTIO_VIRTIO_BUFFERING_H_
#define _VNET_DEVICES_VIRTIO_VIRTIO_BUFFERING_H_
+#include <vnet/interface.h>
+
#define VIRTIO_BUFFERING_DEFAULT_SIZE 1024
#define VIRTIO_BUFFERING_TIMEOUT 1e-5
@@ -205,15 +207,18 @@ virtio_vring_buffering_read_from_back (virtio_vring_buffering_t * buffering)
}
static_always_inline void
-virtio_vring_buffering_schedule_node_on_dispatcher (vlib_main_t * vm,
- virtio_vring_buffering_t *
- buffering)
+virtio_vring_buffering_schedule_node_on_dispatcher (
+ vlib_main_t *vm, vnet_hw_if_tx_queue_t *txq,
+ virtio_vring_buffering_t *buffering)
{
if (buffering && virtio_vring_buffering_is_timeout (vm, buffering)
&& virtio_vring_n_buffers (buffering))
{
vlib_frame_t *f = vlib_get_frame_to_node (vm, buffering->node_index);
+ vnet_hw_if_tx_frame_t *ft = vlib_frame_scalar_args (f);
u32 *f_to = vlib_frame_vector_args (f);
+ ft->shared_queue = txq->shared_queue;
+ ft->queue_id = txq->queue_id;
f_to[f->n_vectors] = virtio_vring_buffering_read_from_back (buffering);
f->n_vectors++;
vlib_put_frame_to_node (vm, buffering->node_index, f);
diff --git a/src/vnet/devices/virtio/virtio_inline.h b/src/vnet/devices/virtio/virtio_inline.h
index 209817d48c7..179f319aa4c 100644
--- a/src/vnet/devices/virtio/virtio_inline.h
+++ b/src/vnet/devices/virtio/virtio_inline.h
@@ -29,11 +29,11 @@ typedef enum
static_always_inline void
virtio_refill_vring_split (vlib_main_t *vm, virtio_if_t *vif,
- virtio_if_type_t type, virtio_vring_t *vring,
+ virtio_if_type_t type, vnet_virtio_vring_t *vring,
const int hdr_sz, u32 node_index)
{
u16 used, next, avail, n_slots, n_refill;
- u16 sz = vring->size;
+ u16 sz = vring->queue_size;
u16 mask = sz - 1;
more:
@@ -47,8 +47,9 @@ more:
next = vring->desc_next;
avail = vring->avail->idx;
- n_slots = vlib_buffer_alloc_to_ring_from_pool (
- vm, vring->buffers, next, vring->size, n_refill, vring->buffer_pool_index);
+ n_slots = vlib_buffer_alloc_to_ring_from_pool (vm, vring->buffers, next,
+ vring->queue_size, n_refill,
+ vring->buffer_pool_index);
if (PREDICT_FALSE (n_slots != n_refill))
{
@@ -60,7 +61,7 @@ more:
while (n_slots)
{
- vring_desc_t *d = &vring->desc[next];
+ vnet_virtio_vring_desc_t *d = &vring->desc[next];
;
vlib_buffer_t *b = vlib_get_buffer (vm, vring->buffers[next]);
/*
@@ -94,11 +95,11 @@ more:
static_always_inline void
virtio_refill_vring_packed (vlib_main_t *vm, virtio_if_t *vif,
- virtio_if_type_t type, virtio_vring_t *vring,
+ virtio_if_type_t type, vnet_virtio_vring_t *vring,
const int hdr_sz, u32 node_index)
{
u16 used, next, n_slots, n_refill, flags = 0, first_desc_flags;
- u16 sz = vring->size;
+ u16 sz = vring->queue_size;
more:
used = vring->desc_in_use;
@@ -124,7 +125,7 @@ more:
while (n_slots)
{
- vring_packed_desc_t *d = &vring->packed_desc[next];
+ vnet_virtio_vring_packed_desc_t *d = &vring->packed_desc[next];
vlib_buffer_t *b = vlib_get_buffer (vm, vring->buffers[next]);
/*
* current_data may not be initialized with 0 and may contain
diff --git a/src/vnet/devices/virtio/virtio_pci_legacy.c b/src/vnet/devices/virtio/virtio_pci_legacy.c
index 1426a7035a2..d7a1c982413 100644
--- a/src/vnet/devices/virtio/virtio_pci_legacy.c
+++ b/src/vnet/devices/virtio/virtio_pci_legacy.c
@@ -176,10 +176,11 @@ virtio_pci_legacy_set_queue_num (vlib_main_t * vm, virtio_if_t * vif,
}
static u8
-virtio_pci_legacy_setup_queue (vlib_main_t * vm, virtio_if_t * vif,
- u16 queue_id, void *p)
+virtio_pci_legacy_setup_queue (vlib_main_t *vm, virtio_if_t *vif, u16 queue_id,
+ vnet_virtio_vring_t *vring)
{
- u64 addr = vlib_physmem_get_pa (vm, p) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
+ u64 addr =
+ vlib_physmem_get_pa (vm, vring->desc) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT;
u32 addr2 = 0, a = (u32) addr;
vlib_pci_write_io_u16 (vm, vif->pci_dev_handle, VIRTIO_PCI_QUEUE_SEL,
&queue_id);
diff --git a/src/vnet/devices/virtio/virtio_pci_modern.c b/src/vnet/devices/virtio/virtio_pci_modern.c
index 8e090ffed3a..50a7b392367 100644
--- a/src/vnet/devices/virtio/virtio_pci_modern.c
+++ b/src/vnet/devices/virtio/virtio_pci_modern.c
@@ -164,9 +164,7 @@ virtio_pci_modern_set_queue_size (vlib_main_t * vm, virtio_if_t * vif,
return;
}
- if (virtio_pci_modern_get_queue_size (vm, vif, queue_id) > queue_size)
- virtio_pci_reg_write_u16 (vif, VIRTIO_QUEUE_SIZE_OFFSET (vif),
- queue_size);
+ virtio_pci_reg_write_u16 (vif, VIRTIO_QUEUE_SIZE_OFFSET (vif), queue_size);
}
static u16
@@ -265,32 +263,24 @@ virtio_pci_modern_set_queue_device (virtio_if_t * vif, u64 queue_device)
}
static u8
-virtio_pci_modern_setup_queue (vlib_main_t * vm, virtio_if_t * vif,
- u16 queue_id, void *p)
+virtio_pci_modern_setup_queue (vlib_main_t *vm, virtio_if_t *vif, u16 queue_id,
+ vnet_virtio_vring_t *vring)
{
u64 desc, avail, used;
- u16 queue_size = 0;
virtio_pci_modern_set_queue_select (vif, queue_id);
- queue_size = virtio_pci_modern_get_queue_size (vm, vif, queue_id);
if (vif->is_packed)
{
- virtio_vring_t *vring = (virtio_vring_t *) p;
-
desc = vlib_physmem_get_pa (vm, vring->packed_desc);
avail = vlib_physmem_get_pa (vm, vring->driver_event);
used = vlib_physmem_get_pa (vm, vring->device_event);
}
else
{
- vring_t vr;
-
- vring_init (&vr, queue_size, p, VIRTIO_PCI_VRING_ALIGN);
-
- desc = vlib_physmem_get_pa (vm, vr.desc);
- avail = vlib_physmem_get_pa (vm, vr.avail);
- used = vlib_physmem_get_pa (vm, vr.used);
+ desc = vlib_physmem_get_pa (vm, vring->desc);
+ avail = vlib_physmem_get_pa (vm, vring->avail);
+ used = vlib_physmem_get_pa (vm, vring->used);
}
virtio_pci_modern_set_queue_desc (vif, desc);
diff --git a/src/vnet/devices/virtio/virtio_pre_input.c b/src/vnet/devices/virtio/virtio_pre_input.c
new file mode 100644
index 00000000000..80cc8d6edb0
--- /dev/null
+++ b/src/vnet/devices/virtio/virtio_pre_input.c
@@ -0,0 +1,160 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/gso/gro_func.h>
+#include <vnet/interface/tx_queue_funcs.h>
+#include <vnet/devices/virtio/virtio.h>
+#include <vnet/devices/virtio/virtio_inline.h>
+
+static_always_inline uword
+virtio_pre_input_inline (vlib_main_t *vm, vnet_virtio_vring_t *txq_vring,
+ vnet_hw_if_tx_queue_t *txq, u8 packet_coalesce,
+ u8 packet_buffering)
+{
+ if (txq->shared_queue)
+ {
+ if (clib_spinlock_trylock (&txq_vring->lockp))
+ {
+ if (virtio_txq_is_scheduled (txq_vring))
+ goto unlock;
+ if (packet_coalesce)
+ vnet_gro_flow_table_schedule_node_on_dispatcher (
+ vm, txq, txq_vring->flow_table);
+ else if (packet_buffering)
+ virtio_vring_buffering_schedule_node_on_dispatcher (
+ vm, txq, txq_vring->buffering);
+ virtio_txq_set_scheduled (txq_vring);
+ unlock:
+ clib_spinlock_unlock (&txq_vring->lockp);
+ }
+ }
+ else
+ {
+ if (packet_coalesce)
+ vnet_gro_flow_table_schedule_node_on_dispatcher (
+ vm, txq, txq_vring->flow_table);
+ else if (packet_buffering)
+ virtio_vring_buffering_schedule_node_on_dispatcher (
+ vm, txq, txq_vring->buffering);
+ }
+ return 0;
+}
+
+static uword
+virtio_pre_input (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame)
+{
+ virtio_main_t *vim = &virtio_main;
+ vnet_main_t *vnm = vnet_get_main ();
+ virtio_if_t *vif;
+
+ pool_foreach (vif, vim->interfaces)
+ {
+ if (vif->packet_coalesce || vif->packet_buffering)
+ {
+ vnet_virtio_vring_t *txq_vring;
+ vec_foreach (txq_vring, vif->txq_vrings)
+ {
+ vnet_hw_if_tx_queue_t *txq =
+ vnet_hw_if_get_tx_queue (vnm, txq_vring->queue_index);
+ if (clib_bitmap_get (txq->threads, vm->thread_index) == 1)
+ virtio_pre_input_inline (vm, txq_vring, txq,
+ vif->packet_coalesce,
+ vif->packet_buffering);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * virtio interfaces support packet coalescing and buffering which
+ * depends on timer expiry to flush the stored packets periodically.
+ * Previously, virtio input node checked timer expiry and scheduled
+ * tx queue accordingly.
+ *
+ * In poll mode, timer expiry was handled naturally, as input node
+ * runs periodically. In interrupt mode, virtio input node was dependent
+ * on the interrupts send from backend. Stored packets could starve,
+ * if there would not be interrupts to input node.
+ *
+ * This problem had been solved through a dedicated process node which
+ * periodically sends interrupt to virtio input node given coalescing
+ * or buffering feature were enabled on an interface.
+ *
+ * But that approach worked with following limitations:
+ * 1) Each VPP thread should have (atleast) 1 rx queue of an interface
+ * (with buffering enabled). And rxqs and txqs should be placed on the
+ * same thread.
+ *
+ * New design provides solution to above problem(s) without any limitation
+ * through (dedicated) pre-input node running on each VPP thread when
+ * atleast 1 virtio interface is enabled with coalescing or buffering.
+ */
+VLIB_REGISTER_NODE (virtio_pre_input_node) = {
+ .function = virtio_pre_input,
+ .type = VLIB_NODE_TYPE_PRE_INPUT,
+ .name = "virtio-pre-input",
+ .state = VLIB_NODE_STATE_DISABLED,
+};
+
+void
+virtio_pre_input_node_enable (vlib_main_t *vm, virtio_if_t *vif)
+{
+ virtio_main_t *vim = &virtio_main;
+ if (vif->packet_coalesce || vif->packet_buffering)
+ {
+ vim->gro_or_buffering_if_count++;
+ if (vim->gro_or_buffering_if_count == 1)
+ {
+ foreach_vlib_main ()
+ {
+ vlib_node_set_state (this_vlib_main, virtio_pre_input_node.index,
+ VLIB_NODE_STATE_POLLING);
+ }
+ }
+ }
+}
+
+void
+virtio_pre_input_node_disable (vlib_main_t *vm, virtio_if_t *vif)
+{
+ virtio_main_t *vim = &virtio_main;
+ if (vif->packet_coalesce || vif->packet_buffering)
+ {
+ if (vim->gro_or_buffering_if_count > 0)
+ vim->gro_or_buffering_if_count--;
+ if (vim->gro_or_buffering_if_count == 0)
+ {
+ foreach_vlib_main ()
+ {
+ vlib_node_set_state (this_vlib_main, virtio_pre_input_node.index,
+ VLIB_NODE_STATE_DISABLED);
+ }
+ }
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/devices/virtio/virtio_process.c b/src/vnet/devices/virtio/virtio_process.c
index f347ef2ab57..13ba590659c 100644
--- a/src/vnet/devices/virtio/virtio_process.c
+++ b/src/vnet/devices/virtio/virtio_process.c
@@ -50,7 +50,7 @@ virtio_send_interrupt_process (vlib_main_t * vm,
{
if (vif->packet_coalesce || vif->packet_buffering)
{
- virtio_vring_t *vring;
+ vnet_virtio_vring_t *vring;
vec_foreach (vring, vif->rxq_vrings)
{
if (vring->mode == VNET_HW_IF_RX_MODE_INTERRUPT ||
@@ -70,13 +70,11 @@ virtio_send_interrupt_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (virtio_send_interrupt_node) = {
.function = virtio_send_interrupt_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "virtio-send-interrupt-process",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/devices/virtio/virtio_std.h b/src/vnet/devices/virtio/virtio_std.h
index 619dd66d5ed..ec988c08dbb 100644
--- a/src/vnet/devices/virtio/virtio_std.h
+++ b/src/vnet/devices/virtio/virtio_std.h
@@ -64,7 +64,7 @@ typedef enum
#define _(f,n) f = n,
foreach_virtio_net_features
#undef _
-} virtio_net_feature_t;
+} vnet_virtio_net_feature_t;
#define VIRTIO_FEATURE(X) (1ULL << X)
@@ -87,7 +87,7 @@ typedef enum
#define _(f,n) f = n,
foreach_virtio_event_idx_flags
#undef _
-} virtio_event_idx_flags_t;
+} vnet_virtio_event_idx_flags_t;
#define VRING_USED_F_NO_NOTIFY 1
#define VRING_AVAIL_F_NO_INTERRUPT 1
@@ -98,7 +98,7 @@ typedef struct
u32 len;
u16 flags;
u16 next;
-} vring_desc_t;
+} vnet_virtio_vring_desc_t;
typedef struct
{
@@ -106,38 +106,35 @@ typedef struct
u16 idx;
u16 ring[0];
/* u16 used_event; */
-} vring_avail_t;
+} vnet_virtio_vring_avail_t;
typedef struct
{
u32 id;
u32 len;
-} vring_used_elem_t;
+} vnet_virtio_vring_used_elem_t;
typedef struct
{
u16 flags;
u16 idx;
- vring_used_elem_t ring[0];
+ vnet_virtio_vring_used_elem_t ring[0];
/* u16 avail_event; */
-} vring_used_t;
+} vnet_virtio_vring_used_t;
-/* *INDENT-OFF* */
-typedef CLIB_PACKED (struct
-{
- u64 addr; // packet data buffer address
- u32 len; // packet data buffer size
- u16 id; // buffer id
- u16 flags; // flags
-}) vring_packed_desc_t;
+typedef CLIB_PACKED (struct {
+ u64 addr; // packet data buffer address
+ u32 len; // packet data buffer size
+ u16 id; // buffer id
+ u16 flags; // flags
+}) vnet_virtio_vring_packed_desc_t;
-STATIC_ASSERT_SIZEOF (vring_packed_desc_t, 16);
+STATIC_ASSERT_SIZEOF (vnet_virtio_vring_packed_desc_t, 16);
-typedef CLIB_PACKED (struct
-{
+typedef CLIB_PACKED (struct {
u16 off_wrap;
u16 flags;
-}) vring_desc_event_t;
+}) vnet_virtio_vring_desc_event_t;
#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */
#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
@@ -148,8 +145,7 @@ typedef CLIB_PACKED (struct
#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */
#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */
-typedef CLIB_PACKED (struct
-{
+typedef CLIB_PACKED (struct {
u8 flags;
u8 gso_type;
u16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */
@@ -157,54 +153,22 @@ typedef CLIB_PACKED (struct
u16 csum_start; /* Position to start checksumming from */
u16 csum_offset; /* Offset after that to place checksum */
u16 num_buffers; /* Number of merged rx buffers */
-}) virtio_net_hdr_v1_t;
+}) vnet_virtio_net_hdr_v1_t;
-typedef CLIB_PACKED (struct
-{
+typedef CLIB_PACKED (struct {
u8 flags;
u8 gso_type;
u16 hdr_len;
u16 gso_size;
u16 csum_start;
u16 csum_offset;
-}) virtio_net_hdr_t;
+}) vnet_virtio_net_hdr_t;
-typedef CLIB_PACKED (struct
-{
- virtio_net_hdr_t hdr;
+typedef CLIB_PACKED (struct {
+ vnet_virtio_net_hdr_t hdr;
u16 num_buffers;
-}) virtio_net_hdr_mrg_rxbuf_t;
+}) vnet_virtio_net_hdr_mrg_rxbuf_t;
-/* *INDENT-ON* */
-
-typedef struct
-{
- u16 num;
- vring_desc_t *desc;
- vring_avail_t *avail;
- vring_used_t *used;
-} vring_t;
-
-static_always_inline void
-vring_init (vring_t * vr, u32 num, void *p, u32 align)
-{
- vr->num = num;
- vr->desc = p;
- vr->avail = (vring_avail_t *) ((char *) p + num * sizeof (vring_desc_t));
- vr->used =
- (vring_used_t *) ((char *) p +
- ((sizeof (vring_desc_t) * num +
- sizeof (u16) * (3 + num) + align - 1) & ~(align -
- 1)));
-}
-
-static_always_inline u16
-vring_size (u32 num, u32 align)
-{
- return ((sizeof (vring_desc_t) * num + sizeof (u16) * (3 + num)
- + align - 1) & ~(align - 1))
- + sizeof (u16) * 3 + sizeof (vring_used_elem_t) * num;
-}
#endif
/*
diff --git a/src/vnet/dpo/dpo.c b/src/vnet/dpo/dpo.c
index d8342ff17ae..fc789ae0a7f 100644
--- a/src/vnet/dpo/dpo.c
+++ b/src/vnet/dpo/dpo.c
@@ -613,12 +613,10 @@ dpo_module_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION(dpo_module_init) =
{
.runs_before = VLIB_INITS ("ip_main_init"),
};
-/* *INDENT-ON* */
static clib_error_t *
dpo_memory_show (vlib_main_t * vm,
@@ -640,7 +638,6 @@ dpo_memory_show (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
/*?
* The '<em>sh dpo memory </em>' command displays the memory usage for each
* data-plane object type.
@@ -662,6 +659,5 @@ VLIB_CLI_COMMAND (show_fib_memory, static) = {
.function = dpo_memory_show,
.short_help = "show dpo memory",
};
-/* *INDENT-ON* */
// clang-format on
diff --git a/src/vnet/dpo/dpo.h b/src/vnet/dpo/dpo.h
index e9976c2dd87..470359df95c 100644
--- a/src/vnet/dpo/dpo.h
+++ b/src/vnet/dpo/dpo.h
@@ -543,7 +543,7 @@ dpo_get_next_node_by_type_and_proto (dpo_type_t child_type,
#define dpo_pool_barrier_sync(VM,P,YESNO) \
do { \
- pool_get_aligned_will_expand ((P), YESNO, CLIB_CACHE_LINE_BYTES); \
+ YESNO = pool_get_will_expand (P); \
\
if (YESNO) \
{ \
diff --git a/src/vnet/dpo/dvr_dpo.c b/src/vnet/dpo/dvr_dpo.c
index 5db9c803145..2b66467837c 100644
--- a/src/vnet/dpo/dvr_dpo.c
+++ b/src/vnet/dpo/dvr_dpo.c
@@ -206,12 +206,9 @@ format_dvr_dpo (u8* s, va_list *ap)
vnet_main_t * vnm = vnet_get_main();
dvr_dpo_t *dd = dvr_dpo_get(index);
- return (format(s, "%U-dvr-%U-dpo %U",
- format_dpo_proto, dd->dd_proto,
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(vnm, dd->dd_sw_if_index),
- format_dvr_reinject, dd->dd_reinject));
+ return format (s, "%U-dvr-%U-dpo %U", format_dpo_proto, dd->dd_proto,
+ format_vnet_sw_if_index_name, vnm, dd->dd_sw_if_index,
+ format_dvr_reinject, dd->dd_reinject);
}
static void
diff --git a/src/vnet/dpo/interface_rx_dpo.c b/src/vnet/dpo/interface_rx_dpo.c
index d3615d0ce76..5a519d344c1 100644
--- a/src/vnet/dpo/interface_rx_dpo.c
+++ b/src/vnet/dpo/interface_rx_dpo.c
@@ -160,11 +160,8 @@ format_interface_rx_dpo (u8* s, va_list *ap)
vnet_main_t * vnm = vnet_get_main();
interface_rx_dpo_t *ido = interface_rx_dpo_get(index);
- return (format(s, "%U-rx-dpo: %U",
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(vnm, ido->ido_sw_if_index),
- format_dpo_proto, ido->ido_proto));
+ return format (s, "%U-rx-dpo: %U", format_vnet_sw_if_index_name, vnm,
+ ido->ido_sw_if_index, format_dpo_proto, ido->ido_proto);
}
static void
diff --git a/src/vnet/dpo/interface_tx_dpo.c b/src/vnet/dpo/interface_tx_dpo.c
index 870579884a0..73f4e906268 100644
--- a/src/vnet/dpo/interface_tx_dpo.c
+++ b/src/vnet/dpo/interface_tx_dpo.c
@@ -50,10 +50,7 @@ format_interface_tx_dpo (u8* s, va_list *ap)
CLIB_UNUSED(u32 indent) = va_arg(*ap, u32);
vnet_main_t * vnm = vnet_get_main();
- return (format(s, "%U-tx-dpo:",
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(vnm, index)));
+ return format (s, "%U-tx-dpo:", format_vnet_sw_if_index_name, vnm, index);
}
static void
diff --git a/src/vnet/dpo/ip6_ll_dpo.c b/src/vnet/dpo/ip6_ll_dpo.c
index f86472c16c5..86908efbc04 100644
--- a/src/vnet/dpo/ip6_ll_dpo.c
+++ b/src/vnet/dpo/ip6_ll_dpo.c
@@ -97,6 +97,11 @@ typedef enum ip6_ll_next_t_
IP6_LL_NEXT_NUM,
} ip6_ll_next_t;
+typedef enum ip6_ll_error_t_
+{
+ IP6_LL_ERROR_NO_TABLE,
+} ip6_ll_error_t;
+
always_inline uword
ip6_ll_dpo_inline (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
@@ -131,10 +136,19 @@ ip6_ll_dpo_inline (vlib_main_t * vm,
/* use the packet's RX interface to pick the link-local FIB */
fib_index0 =
ip6_ll_fib_get (vnet_buffer (p0)->sw_if_index[VLIB_RX]);
+
+ if (~0 == fib_index0)
+ {
+ next0 = IP6_LL_NEXT_DROP;
+ p0->error = node->errors[IP6_LL_ERROR_NO_TABLE];
+ goto trace0;
+ }
+
/* write that fib index into the packet so it's used in the
* lookup node next */
vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
+ trace0:
if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
{
ip6_ll_dpo_trace_t *tr = vlib_add_trace (vm, node, p0,
@@ -170,23 +184,27 @@ ip6_ll_dpo_switch (vlib_main_t * vm,
return (ip6_ll_dpo_inline (vm, node, frame));
}
+static char *ip6_ll_dpo_error_strings[] = {
+ [IP6_LL_ERROR_NO_TABLE] = "Interface is not mapped to an IP6-LL table",
+};
+
/**
* @brief
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_ll_dpo_node) =
{
.function = ip6_ll_dpo_switch,
.name = "ip6-link-local",
.vector_size = sizeof (u32),
.format_trace = format_ip6_ll_dpo_trace,
+ .n_errors = ARRAY_LEN (ip6_ll_dpo_error_strings),
+ .error_strings = ip6_ll_dpo_error_strings,
.n_next_nodes = IP6_LL_NEXT_NUM,
.next_nodes = {
[IP6_LL_NEXT_DROP] = "ip6-drop",
[IP6_LL_NEXT_LOOKUP] = "ip6-lookup",
},
};
-/* *INDENT-ON* */
void
ip6_ll_dpo_module_init (void)
diff --git a/src/vnet/dpo/l3_proxy_dpo.c b/src/vnet/dpo/l3_proxy_dpo.c
index 41156301a0e..f89554d775f 100644
--- a/src/vnet/dpo/l3_proxy_dpo.c
+++ b/src/vnet/dpo/l3_proxy_dpo.c
@@ -116,9 +116,8 @@ format_l3_proxy_dpo (u8 *s, va_list *ap)
if (~0 != l3p->l3p_sw_if_index)
{
- return (format(s, "dpo-l3_proxy: %U",
- format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface(vnm, l3p->l3p_sw_if_index)));
+ return (format (s, "dpo-l3_proxy: %U", format_vnet_sw_if_index_name, vnm,
+ l3p->l3p_sw_if_index));
}
else
{
diff --git a/src/vnet/dpo/load_balance.c b/src/vnet/dpo/load_balance.c
index a212532dffd..8f2a0de6ea8 100644
--- a/src/vnet/dpo/load_balance.c
+++ b/src/vnet/dpo/load_balance.c
@@ -100,8 +100,8 @@ load_balance_alloc_i (void)
vlib_main_t *vm = vlib_get_main();
ASSERT (vm->thread_index == 0);
- pool_get_aligned_will_expand (load_balance_pool, need_barrier_sync,
- CLIB_CACHE_LINE_BYTES);
+ need_barrier_sync = pool_get_will_expand (load_balance_pool);
+
if (need_barrier_sync)
vlib_worker_thread_barrier_sync (vm);
@@ -149,7 +149,13 @@ load_balance_format (index_t lbi,
dpo_id_t *buckets;
u32 i;
- lb = load_balance_get(lbi);
+ lb = load_balance_get_or_null(lbi);
+ if (lb == NULL)
+ {
+ s = format(s, "DELETED lb:%u", lbi);
+ return (s);
+ }
+
vlib_get_combined_counter(&(load_balance_main.lbm_to_counters), lbi, &to);
vlib_get_combined_counter(&(load_balance_main.lbm_via_counters), lbi, &via);
buckets = load_balance_get_buckets(lb);
@@ -244,6 +250,8 @@ load_balance_create_i (u32 num_buckets,
{
load_balance_t *lb;
+ ASSERT (num_buckets <= LB_MAX_BUCKETS);
+
lb = load_balance_alloc_i();
lb->lb_hash_config = fhc;
lb->lb_n_buckets = num_buckets;
@@ -408,7 +416,7 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
{
nhs[0] = raw_next_hops[0];
nhs[0].path_weight = 1;
- _vec_len (nhs) = 1;
+ vec_set_len (nhs, 1);
sum_weight = 1;
goto done;
}
@@ -425,7 +433,7 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
if (nhs[0].path_weight == nhs[1].path_weight)
{
nhs[0].path_weight = nhs[1].path_weight = 1;
- _vec_len (nhs) = 2;
+ vec_set_len (nhs, 2);
sum_weight = 2;
goto done;
}
@@ -455,8 +463,9 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
/* Try larger and larger power of 2 sized adjacency blocks until we
find one where traffic flows to within 1% of specified weights. */
- for (n_adj = max_pow2 (n_nhs); ; n_adj *= 2)
+ for (n_adj = clib_min(max_pow2 (n_nhs), LB_MAX_BUCKETS); ; n_adj *= 2)
{
+ ASSERT (n_adj <= LB_MAX_BUCKETS);
error = 0;
norm = n_adj / ((f64) sum_weight);
@@ -487,12 +496,22 @@ ip_multipath_normalize_next_hops (const load_balance_path_t * raw_next_hops,
nhs[0].path_weight += n_adj_left;
- /* Less than 5% average error per adjacency with this size adjacency block? */
- if (error <= multipath_next_hop_error_tolerance*n_adj)
+ /* Less than 1% average error per adjacency with this size adjacency block,
+ * or did we reached the maximum number of buckets we support? */
+ if (error <= multipath_next_hop_error_tolerance*n_adj ||
+ n_adj >= LB_MAX_BUCKETS)
{
- /* Truncate any next hops with zero weight. */
- _vec_len (nhs) = i;
- break;
+ if (i < n_nhs)
+ {
+ /* Truncate any next hops in excess */
+ vlib_log_err(load_balance_logger,
+ "Too many paths for load-balance, truncating %d -> %d",
+ n_nhs, i);
+ for (int j = i; j < n_nhs; j++)
+ dpo_reset (&vec_elt(nhs, j).path_dpo);
+ }
+ vec_set_len (nhs, i);
+ break;
}
}
@@ -592,6 +611,7 @@ load_balance_fill_buckets_sticky (load_balance_t *lb,
{
/* fill the bucks from the next up path */
load_balance_set_bucket_i(lb, bucket++, buckets, &fwding_paths[fpath].path_dpo);
+ ASSERT(vec_len(fwding_paths) > 0);
fpath = (fpath + 1) % vec_len(fwding_paths);
}
}
@@ -621,6 +641,7 @@ static inline void
load_balance_set_n_buckets (load_balance_t *lb,
u32 n_buckets)
{
+ ASSERT (n_buckets <= LB_MAX_BUCKETS);
lb->lb_n_buckets = n_buckets;
lb->lb_n_buckets_minus_1 = n_buckets-1;
}
@@ -650,8 +671,6 @@ load_balance_multipath_update (const dpo_id_t *dpo,
&sum_of_weights,
multipath_next_hop_error_tolerance);
- ASSERT (n_buckets >= vec_len (raw_nhs));
-
/*
* Save the old load-balance map used, and get a new one if required.
*/
diff --git a/src/vnet/dpo/load_balance.h b/src/vnet/dpo/load_balance.h
index 5428e20e981..eee073f5892 100644
--- a/src/vnet/dpo/load_balance.h
+++ b/src/vnet/dpo/load_balance.h
@@ -50,6 +50,12 @@ typedef struct load_balance_main_t_
extern load_balance_main_t load_balance_main;
/**
+ * The maximum number of buckets that a load-balance object can have
+ * This must not overflow the lb_n_buckets field
+ */
+#define LB_MAX_BUCKETS 8192
+
+/**
* The number of buckets that a load-balance object can have and still
* fit in one cache-line
*/
@@ -176,6 +182,10 @@ typedef struct load_balance_t_ {
STATIC_ASSERT(sizeof(load_balance_t) <= CLIB_CACHE_LINE_BYTES,
"A load_balance object size exceeds one cacheline");
+STATIC_ASSERT (LB_MAX_BUCKETS <= CLIB_U16_MAX,
+ "Too many buckets for load_balance object");
+STATIC_ASSERT (LB_MAX_BUCKETS && !(LB_MAX_BUCKETS & (LB_MAX_BUCKETS - 1)),
+ "LB_MAX_BUCKETS must be a power of 2");
/**
* Flags controlling load-balance formatting/display
@@ -222,6 +232,14 @@ load_balance_get (index_t lbi)
return (pool_elt_at_index(load_balance_pool, lbi));
}
+static inline load_balance_t *
+load_balance_get_or_null (index_t lbi)
+{
+ if (pool_is_free_index (load_balance_pool, lbi))
+ return 0;
+ return (pool_elt_at_index (load_balance_pool, lbi));
+}
+
#define LB_HAS_INLINE_BUCKETS(_lb) \
((_lb)->lb_n_buckets <= LB_NUM_INLINE_BUCKETS)
diff --git a/src/vnet/dpo/load_balance_map.c b/src/vnet/dpo/load_balance_map.c
index 55249747e5d..765cd856608 100644
--- a/src/vnet/dpo/load_balance_map.c
+++ b/src/vnet/dpo/load_balance_map.c
@@ -317,7 +317,7 @@ load_balance_map_fill (load_balance_map_t *lbm)
bucket += lbmp->lbmp_weight;
}
}
- _vec_len(tmp_buckets) = jj;
+ vec_set_len (tmp_buckets, jj);
/*
* If the number of temporaries written is as many as we need, implying
diff --git a/src/vnet/dpo/lookup_dpo.c b/src/vnet/dpo/lookup_dpo.c
index 3f34cefe0c4..9ce94eebe5c 100644
--- a/src/vnet/dpo/lookup_dpo.c
+++ b/src/vnet/dpo/lookup_dpo.c
@@ -268,50 +268,6 @@ lookup_dpo_unlock (dpo_id_t *dpo)
}
}
-always_inline void
-ip4_src_fib_lookup_one (u32 src_fib_index0,
- const ip4_address_t * addr0,
- u32 * src_adj_index0)
-{
- ip4_fib_mtrie_leaf_t leaf0;
- ip4_fib_mtrie_t * mtrie0;
-
- mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie;
-
- leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0);
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2);
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3);
-
- src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
-}
-
-always_inline void
-ip4_src_fib_lookup_two (u32 src_fib_index0,
- u32 src_fib_index1,
- const ip4_address_t * addr0,
- const ip4_address_t * addr1,
- u32 * src_adj_index0,
- u32 * src_adj_index1)
-{
- ip4_fib_mtrie_leaf_t leaf0, leaf1;
- ip4_fib_mtrie_t * mtrie0, * mtrie1;
-
- mtrie0 = &ip4_fib_get (src_fib_index0)->mtrie;
- mtrie1 = &ip4_fib_get (src_fib_index1)->mtrie;
-
- leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, addr0);
- leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, addr1);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 2);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 2);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, addr0, 3);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, addr1, 3);
-
- src_adj_index0[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
- src_adj_index1[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
-}
-
/**
* @brief Lookup trace data
*/
@@ -435,9 +391,8 @@ lookup_dpo_ip4_inline (vlib_main_t * vm,
}
/* do lookup */
- ip4_src_fib_lookup_two (fib_index0, fib_index1,
- input_addr0, input_addr1,
- &lbi0, &lbi1);
+ ip4_fib_forwarding_lookup_x2 (fib_index0, fib_index1, input_addr0,
+ input_addr1, &lbi0, &lbi1);
lb0 = load_balance_get(lbi0);
lb1 = load_balance_get(lbi1);
@@ -573,7 +528,7 @@ lookup_dpo_ip4_inline (vlib_main_t * vm,
}
/* do lookup */
- ip4_src_fib_lookup_one (fib_index0, input_addr, &lbi0);
+ lbi0 = ip4_fib_forwarding_lookup (fib_index0, input_addr);
lb0 = load_balance_get(lbi0);
vnet_buffer(b0)->sw_if_index[VLIB_TX] = fib_index0;
diff --git a/src/vnet/dpo/mpls_disposition.c b/src/vnet/dpo/mpls_disposition.c
index 7bc2cb65f87..2f996727a2d 100644
--- a/src/vnet/dpo/mpls_disposition.c
+++ b/src/vnet/dpo/mpls_disposition.c
@@ -431,14 +431,14 @@ VLIB_NODE_FN (ip4_mpls_label_disposition_pipe_node) (vlib_main_t * vm,
FIB_MPLS_LSP_MODE_PIPE));
}
-VLIB_REGISTER_NODE(ip4_mpls_label_disposition_pipe_node) = {
- .name = "ip4-mpls-label-disposition-pipe",
- .vector_size = sizeof(u32),
-
- .format_trace = format_mpls_label_disposition_trace,
- .sibling_of = "ip4-input",
- .n_errors = IP4_N_ERROR,
- .error_strings = ip4_error_strings,
+VLIB_REGISTER_NODE (ip4_mpls_label_disposition_pipe_node) = {
+ .name = "ip4-mpls-label-disposition-pipe",
+ .vector_size = sizeof (u32),
+
+ .format_trace = format_mpls_label_disposition_trace,
+ .sibling_of = "ip4-input",
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
};
VLIB_NODE_FN (ip6_mpls_label_disposition_pipe_node) (vlib_main_t * vm,
@@ -449,14 +449,14 @@ VLIB_NODE_FN (ip6_mpls_label_disposition_pipe_node) (vlib_main_t * vm,
FIB_MPLS_LSP_MODE_PIPE));
}
-VLIB_REGISTER_NODE(ip6_mpls_label_disposition_pipe_node) = {
- .name = "ip6-mpls-label-disposition-pipe",
- .vector_size = sizeof(u32),
+VLIB_REGISTER_NODE (ip6_mpls_label_disposition_pipe_node) = {
+ .name = "ip6-mpls-label-disposition-pipe",
+ .vector_size = sizeof (u32),
- .format_trace = format_mpls_label_disposition_trace,
- .sibling_of = "ip6-input",
- .n_errors = IP6_N_ERROR,
- .error_strings = ip6_error_strings,
+ .format_trace = format_mpls_label_disposition_trace,
+ .sibling_of = "ip6-input",
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
};
VLIB_NODE_FN (ip4_mpls_label_disposition_uniform_node) (vlib_main_t * vm,
@@ -467,14 +467,14 @@ VLIB_NODE_FN (ip4_mpls_label_disposition_uniform_node) (vlib_main_t * vm,
FIB_MPLS_LSP_MODE_UNIFORM));
}
-VLIB_REGISTER_NODE(ip4_mpls_label_disposition_uniform_node) = {
- .name = "ip4-mpls-label-disposition-uniform",
- .vector_size = sizeof(u32),
+VLIB_REGISTER_NODE (ip4_mpls_label_disposition_uniform_node) = {
+ .name = "ip4-mpls-label-disposition-uniform",
+ .vector_size = sizeof (u32),
- .format_trace = format_mpls_label_disposition_trace,
- .sibling_of = "ip4-input",
- .n_errors = IP4_N_ERROR,
- .error_strings = ip4_error_strings,
+ .format_trace = format_mpls_label_disposition_trace,
+ .sibling_of = "ip4-input",
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
};
VLIB_NODE_FN (ip6_mpls_label_disposition_uniform_node) (vlib_main_t * vm,
@@ -485,14 +485,14 @@ VLIB_NODE_FN (ip6_mpls_label_disposition_uniform_node) (vlib_main_t * vm,
FIB_MPLS_LSP_MODE_UNIFORM));
}
-VLIB_REGISTER_NODE(ip6_mpls_label_disposition_uniform_node) = {
- .name = "ip6-mpls-label-disposition-uniform",
- .vector_size = sizeof(u32),
+VLIB_REGISTER_NODE (ip6_mpls_label_disposition_uniform_node) = {
+ .name = "ip6-mpls-label-disposition-uniform",
+ .vector_size = sizeof (u32),
- .format_trace = format_mpls_label_disposition_trace,
- .sibling_of = "ip6-input",
- .n_errors = IP6_N_ERROR,
- .error_strings = ip6_error_strings,
+ .format_trace = format_mpls_label_disposition_trace,
+ .sibling_of = "ip6-input",
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
};
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/dpo/mpls_label_dpo.c b/src/vnet/dpo/mpls_label_dpo.c
index b87cb1efcd2..872577dfbe1 100644
--- a/src/vnet/dpo/mpls_label_dpo.c
+++ b/src/vnet/dpo/mpls_label_dpo.c
@@ -84,6 +84,7 @@ mpls_label_dpo_create (fib_mpls_label_t *label_stack,
mld = mpls_label_dpo_alloc();
mld->mld_flags = flags;
+ mld->mld_payload_proto = payload_proto;
dtype = mpls_label_dpo_types[flags];
if (MPLS_LABEL_DPO_MAX_N_LABELS < vec_len(label_stack))
@@ -92,13 +93,12 @@ mpls_label_dpo_create (fib_mpls_label_t *label_stack,
dpo_stack(dtype,
mld->mld_payload_proto,
&mld->mld_dpo,
- drop_dpo_get(DPO_PROTO_MPLS));
+ drop_dpo_get(mld->mld_payload_proto));
}
else
{
mld->mld_n_labels = vec_len(label_stack);
mld->mld_n_hdr_bytes = mld->mld_n_labels * sizeof(mld->mld_hdr[0]);
- mld->mld_payload_proto = payload_proto;
/*
* construct label rewrite headers for each value passed.
@@ -398,22 +398,22 @@ mpls_label_imposition_inline (vlib_main_t * vm,
/* Prefetch next iteration. */
{
- vlib_buffer_t * p2, * p3, *p4, *p5;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
- p4 = vlib_get_buffer (vm, from[4]);
- p5 = vlib_get_buffer (vm, from[5]);
-
- vlib_prefetch_buffer_header (p2, STORE);
- vlib_prefetch_buffer_header (p3, STORE);
- vlib_prefetch_buffer_header (p4, STORE);
- vlib_prefetch_buffer_header (p5, STORE);
-
- CLIB_PREFETCH (p2->data, sizeof (hdr0[0]), STORE);
- CLIB_PREFETCH (p3->data, sizeof (hdr0[0]), STORE);
- CLIB_PREFETCH (p4->data, sizeof (hdr0[0]), STORE);
- CLIB_PREFETCH (p5->data, sizeof (hdr0[0]), STORE);
+ vlib_buffer_t *p4, *p5, *p6, *p7;
+
+ p4 = vlib_get_buffer (vm, from[4]);
+ p5 = vlib_get_buffer (vm, from[5]);
+ p6 = vlib_get_buffer (vm, from[6]);
+ p7 = vlib_get_buffer (vm, from[7]);
+
+ vlib_prefetch_buffer_header (p4, STORE);
+ vlib_prefetch_buffer_header (p5, STORE);
+ vlib_prefetch_buffer_header (p6, STORE);
+ vlib_prefetch_buffer_header (p7, STORE);
+
+ CLIB_PREFETCH (p4->data, sizeof (hdr0[0]), STORE);
+ CLIB_PREFETCH (p5->data, sizeof (hdr0[0]), STORE);
+ CLIB_PREFETCH (p6->data, sizeof (hdr0[0]), STORE);
+ CLIB_PREFETCH (p7->data, sizeof (hdr0[0]), STORE);
}
from += 4;
@@ -439,8 +439,14 @@ mpls_label_imposition_inline (vlib_main_t * vm,
if (DPO_PROTO_MPLS != dproto)
{
/*
- * These are the non-MPLS payload imposition cases
+ * These are the non-MPLS payload imposition cases.
+ * Save the l3 offset
*/
+ vnet_buffer (b0)->l3_hdr_offset = b0->current_data;
+ vnet_buffer (b1)->l3_hdr_offset = b1->current_data;
+ vnet_buffer (b2)->l3_hdr_offset = b2->current_data;
+ vnet_buffer (b3)->l3_hdr_offset = b3->current_data;
+
if (DPO_PROTO_IP4 == dproto)
{
ip4_header_t * ip0 = vlib_buffer_get_current(b0);
@@ -785,6 +791,8 @@ mpls_label_imposition_inline (vlib_main_t * vm,
if (DPO_PROTO_MPLS != dproto)
{
+ vnet_buffer (b0)->l3_hdr_offset = b0->current_data;
+
if (DPO_PROTO_IP4 == dproto)
{
/*
diff --git a/src/vnet/dpo/receive_dpo.c b/src/vnet/dpo/receive_dpo.c
index b12b382ce64..413c3ae5b47 100644
--- a/src/vnet/dpo/receive_dpo.c
+++ b/src/vnet/dpo/receive_dpo.c
@@ -122,10 +122,9 @@ format_receive_dpo (u8 *s, va_list *ap)
if (~0 != rd->rd_sw_if_index)
{
- return (format(s, "dpo-receive: %U on %U",
- format_ip46_address, &rd->rd_addr, IP46_TYPE_ANY,
- format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface(vnm, rd->rd_sw_if_index)));
+ return (format (s, "dpo-receive: %U on %U", format_ip46_address,
+ &rd->rd_addr, IP46_TYPE_ANY,
+ format_vnet_sw_if_index_name, vnm, rd->rd_sw_if_index));
}
else
{
@@ -156,15 +155,13 @@ const static dpo_vft_t receive_vft = {
* this means that these graph nodes are ones from which a receive is the
* parent object in the DPO-graph.
*/
-const static char* const receive_ip4_nodes[] =
-{
- "ip4-local",
- NULL,
+const static char *const receive_ip4_nodes[] = {
+ "ip4-receive",
+ NULL,
};
-const static char* const receive_ip6_nodes[] =
-{
- "ip6-local",
- NULL,
+const static char *const receive_ip6_nodes[] = {
+ "ip6-receive",
+ NULL,
};
const static char* const * const receive_nodes[DPO_PROTO_NUM] =
diff --git a/src/vnet/dpo/replicate_dpo.c b/src/vnet/dpo/replicate_dpo.c
index 5f88f12b910..0474fd82984 100644
--- a/src/vnet/dpo/replicate_dpo.c
+++ b/src/vnet/dpo/replicate_dpo.c
@@ -172,6 +172,8 @@ replicate_create_i (u32 num_buckets,
{
replicate_t *rep;
+ ASSERT (num_buckets <= REP_MAX_BUCKETS);
+
rep = replicate_alloc_i();
rep->rep_n_buckets = num_buckets;
rep->rep_proto = rep_proto;
@@ -311,7 +313,8 @@ static inline void
replicate_set_n_buckets (replicate_t *rep,
u32 n_buckets)
{
- rep->rep_n_buckets = n_buckets;
+ ASSERT (n_buckets <= REP_MAX_BUCKETS);
+ rep->rep_n_buckets = n_buckets;
}
void
@@ -331,6 +334,17 @@ replicate_multipath_update (const dpo_id_t *dpo,
rep->rep_proto);
n_buckets = vec_len(nhs);
+ if (n_buckets > REP_MAX_BUCKETS)
+ {
+ vlib_log_err (replicate_logger,
+ "Too many paths for replicate, truncating %d -> %d",
+ n_buckets, REP_MAX_BUCKETS);
+ for (int i = REP_MAX_BUCKETS; i < n_buckets; i++)
+ dpo_reset (&vec_elt (nhs, i).path_dpo);
+ vec_set_len (nhs, REP_MAX_BUCKETS);
+ n_buckets = REP_MAX_BUCKETS;
+ }
+
if (0 == rep->rep_n_buckets)
{
/*
diff --git a/src/vnet/dpo/replicate_dpo.h b/src/vnet/dpo/replicate_dpo.h
index 908c20c1d56..d21f52a4833 100644
--- a/src/vnet/dpo/replicate_dpo.h
+++ b/src/vnet/dpo/replicate_dpo.h
@@ -41,6 +41,12 @@ typedef struct replicate_main_t_
extern replicate_main_t replicate_main;
/**
+ * The number of buckets that a replicate object can have
+ * This must not overflow the rep_n_buckets field
+ */
+#define REP_MAX_BUCKETS 1024
+
+/**
* The number of buckets that a load-balance object can have and still
* fit in one cache-line
*/
@@ -108,6 +114,8 @@ typedef struct replicate_t_ {
STATIC_ASSERT(sizeof(replicate_t) <= CLIB_CACHE_LINE_BYTES,
"A replicate object size exceeds one cacheline");
+STATIC_ASSERT (REP_MAX_BUCKETS <= CLIB_U16_MAX,
+ "Too many buckets for replicate object");
/**
* Flags controlling load-balance formatting/display
diff --git a/src/vnet/error.c b/src/vnet/error.c
new file mode 100644
index 00000000000..473d11135f1
--- /dev/null
+++ b/src/vnet/error.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/error.h>
+#include <vnet/api_errno.h>
+
+static char *error_strings[] = {
+#define _(a, b, c) [-(b)] = c,
+ foreach_vnet_error
+#undef _
+};
+
+clib_error_t *
+vnet_error (vnet_error_t rv, char *fmt, ...)
+{
+ clib_error_t *e, *err = 0;
+ va_list va;
+ vec_add2 (err, e, 1);
+ e->what = format (e->what, "%s", error_strings[-rv]);
+
+ if (fmt)
+ {
+ vec_add1 (e->what, ' ');
+ vec_add1 (e->what, '(');
+ va_start (va, fmt);
+ e->what = va_format (e->what, fmt, &va);
+ vec_add1 (e->what, ')');
+ va_end (va);
+ }
+
+ e->code = rv;
+ return err;
+}
+
+u8 *
+format_vnet_api_errno (u8 *s, va_list *args)
+{
+ vnet_api_error_t api_error = va_arg (*args, vnet_api_error_t);
+#ifdef _
+#undef _
+#endif
+#define _(a, b, c) \
+ case b: \
+ s = format (s, "%s", c); \
+ break;
+ switch (api_error)
+ {
+ foreach_vnet_error default : s = format (s, "UNKNOWN");
+ break;
+ }
+ return s;
+#undef _
+}
diff --git a/src/vnet/error.h b/src/vnet/error.h
new file mode 100644
index 00000000000..fa1337538c4
--- /dev/null
+++ b/src/vnet/error.h
@@ -0,0 +1,177 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+#ifndef included_vnet_error_h
+#define included_vnet_error_h
+
+#include <stdarg.h>
+#include <vppinfra/types.h>
+#include <vppinfra/format.h>
+
+#define foreach_vnet_error \
+ _ (UNSPECIFIED, -1, "Unspecified Error") \
+ _ (INVALID_SW_IF_INDEX, -2, "Invalid sw_if_index") \
+ _ (NO_SUCH_FIB, -3, "No such FIB / VRF") \
+ _ (NO_SUCH_INNER_FIB, -4, "No such inner FIB / VRF") \
+ _ (NO_SUCH_LABEL, -5, "No such label") \
+ _ (NO_SUCH_ENTRY, -6, "No such entry") \
+ _ (INVALID_VALUE, -7, "Invalid value") \
+ _ (INVALID_VALUE_2, -8, "Invalid value #2") \
+ _ (UNIMPLEMENTED, -9, "Unimplemented") \
+ _ (INVALID_SW_IF_INDEX_2, -10, "Invalid sw_if_index #2") \
+ _ (SYSCALL_ERROR_1, -11, "System call error #1") \
+ _ (SYSCALL_ERROR_2, -12, "System call error #2") \
+ _ (SYSCALL_ERROR_3, -13, "System call error #3") \
+ _ (SYSCALL_ERROR_4, -14, "System call error #4") \
+ _ (SYSCALL_ERROR_5, -15, "System call error #5") \
+ _ (SYSCALL_ERROR_6, -16, "System call error #6") \
+ _ (SYSCALL_ERROR_7, -17, "System call error #7") \
+ _ (SYSCALL_ERROR_8, -18, "System call error #8") \
+ _ (SYSCALL_ERROR_9, -19, "System call error #9") \
+ _ (SYSCALL_ERROR_10, -20, "System call error #10") \
+ _ (FEATURE_DISABLED, -30, "Feature disabled by configuration") \
+ _ (INVALID_REGISTRATION, -31, "Invalid registration") \
+ _ (NEXT_HOP_NOT_IN_FIB, -50, "Next hop not in FIB") \
+ _ (UNKNOWN_DESTINATION, -51, "Unknown destination") \
+ _ (NO_PATHS_IN_ROUTE, -52, "No paths specified in route") \
+ _ (NEXT_HOP_NOT_FOUND_MP, -53, "Next hop not found (multipath)") \
+ _ (NO_MATCHING_INTERFACE, -54, "No matching interface for probe") \
+ _ (INVALID_VLAN, -55, "Invalid VLAN") \
+ _ (VLAN_ALREADY_EXISTS, -56, "VLAN subif already exists") \
+ _ (INVALID_SRC_ADDRESS, -57, "Invalid src address") \
+ _ (INVALID_DST_ADDRESS, -58, "Invalid dst address") \
+ _ (ADDRESS_LENGTH_MISMATCH, -59, "Address length mismatch") \
+ _ (ADDRESS_NOT_FOUND_FOR_INTERFACE, -60, "Address not found for interface") \
+ _ (ADDRESS_NOT_DELETABLE, -61, "Address not deletable") \
+ _ (IP6_NOT_ENABLED, -62, "ip6 not enabled") \
+ _ (NO_SUCH_NODE, -63, "No such graph node") \
+ _ (NO_SUCH_NODE2, -64, "No such graph node #2") \
+ _ (NO_SUCH_TABLE, -65, "No such table") \
+ _ (NO_SUCH_TABLE2, -66, "No such table #2") \
+ _ (NO_SUCH_TABLE3, -67, "No such table #3") \
+ _ (SUBIF_ALREADY_EXISTS, -68, "Subinterface already exists") \
+ _ (SUBIF_CREATE_FAILED, -69, "Subinterface creation failed") \
+ _ (INVALID_MEMORY_SIZE, -70, "Invalid memory size requested") \
+ _ (INVALID_INTERFACE, -71, "Invalid interface") \
+ _ (INVALID_VLAN_TAG_COUNT, -72, \
+ "Invalid number of tags for requested operation") \
+ _ (INVALID_ARGUMENT, -73, "Invalid argument") \
+ _ (UNEXPECTED_INTF_STATE, -74, "Unexpected interface state") \
+ _ (TUNNEL_EXIST, -75, "Tunnel already exists") \
+ _ (INVALID_DECAP_NEXT, -76, "Invalid decap-next") \
+ _ (RESPONSE_NOT_READY, -77, "Response not ready") \
+ _ (NOT_CONNECTED, -78, "Not connected to the data plane") \
+ _ (IF_ALREADY_EXISTS, -79, "Interface already exists") \
+ _ (BOND_SLAVE_NOT_ALLOWED, -80, \
+ "Operation not allowed on slave of BondEthernet") \
+ _ (VALUE_EXIST, -81, "Value already exists") \
+ _ (SAME_SRC_DST, -82, "Source and destination are the same") \
+ _ (IP6_MULTICAST_ADDRESS_NOT_PRESENT, -83, \
+ "IP6 multicast address required") \
+ _ (SR_POLICY_NAME_NOT_PRESENT, -84, "Segment routing policy name required") \
+ _ (NOT_RUNNING_AS_ROOT, -85, "Not running as root") \
+ _ (ALREADY_CONNECTED, -86, "Connection to the data plane already exists") \
+ _ (UNSUPPORTED_JNI_VERSION, -87, "Unsupported JNI version") \
+ _ (IP_PREFIX_INVALID, -88, "IP prefix invalid (masked bits set in address") \
+ _ (INVALID_WORKER, -89, "Invalid worker thread") \
+ _ (LISP_DISABLED, -90, "LISP is disabled") \
+ _ (CLASSIFY_TABLE_NOT_FOUND, -91, "Classify table not found") \
+ _ (INVALID_EID_TYPE, -92, "Unsupported LISP EID type") \
+ _ (CANNOT_CREATE_PCAP_FILE, -93, "Cannot create pcap file") \
+ _ (INCORRECT_ADJACENCY_TYPE, -94, \
+ "Invalid adjacency type for this operation") \
+ _ (EXCEEDED_NUMBER_OF_RANGES_CAPACITY, -95, \
+ "Operation would exceed configured capacity of ranges") \
+ _ (EXCEEDED_NUMBER_OF_PORTS_CAPACITY, -96, \
+ "Operation would exceed capacity of number of ports") \
+ _ (INVALID_ADDRESS_FAMILY, -97, "Invalid address family") \
+ _ (INVALID_SUB_SW_IF_INDEX, -98, "Invalid sub-interface sw_if_index") \
+ _ (TABLE_TOO_BIG, -99, "Table too big") \
+ _ (CANNOT_ENABLE_DISABLE_FEATURE, -100, "Cannot enable/disable feature") \
+ _ (BFD_EEXIST, -101, "Duplicate BFD object") \
+ _ (BFD_ENOENT, -102, "No such BFD object") \
+ _ (BFD_EINUSE, -103, "BFD object in use") \
+ _ (BFD_NOTSUPP, -104, "BFD feature not supported") \
+ _ (ADDRESS_IN_USE, -105, "Address in use") \
+ _ (ADDRESS_NOT_IN_USE, -106, "Address not in use") \
+ _ (QUEUE_FULL, -107, "Queue full") \
+ _ (APP_UNSUPPORTED_CFG, -108, "Unsupported application config") \
+ _ (URI_FIFO_CREATE_FAILED, -109, "URI FIFO segment create failed") \
+ _ (LISP_RLOC_LOCAL, -110, "RLOC address is local") \
+ _ (BFD_EAGAIN, -111, "BFD object cannot be manipulated at this time") \
+ _ (INVALID_GPE_MODE, -112, "Invalid GPE mode") \
+ _ (LISP_GPE_ENTRIES_PRESENT, -113, "LISP GPE entries are present") \
+ _ (ADDRESS_FOUND_FOR_INTERFACE, -114, "Address found for interface") \
+ _ (SESSION_CONNECT, -115, "Session failed to connect") \
+ _ (ENTRY_ALREADY_EXISTS, -116, "Entry already exists") \
+ _ (SVM_SEGMENT_CREATE_FAIL, -117, "Svm segment create fail") \
+ _ (APPLICATION_NOT_ATTACHED, -118, "Application not attached") \
+ _ (BD_ALREADY_EXISTS, -119, "Bridge domain already exists") \
+ _ (BD_IN_USE, -120, "Bridge domain has member interfaces") \
+ _ (BD_NOT_MODIFIABLE, -121, "Bridge domain 0 can't be deleted/modified") \
+ _ (BD_ID_EXCEED_MAX, -122, "Bridge domain ID exceeds 16M limit") \
+ _ (SUBIF_DOESNT_EXIST, -123, "Subinterface doesn't exist") \
+ _ (L2_MACS_EVENT_CLINET_PRESENT, -124, \
+ "Client already exist for L2 MACs events") \
+ _ (INVALID_QUEUE, -125, "Invalid queue") \
+ _ (UNSUPPORTED, -126, "Unsupported") \
+ _ (DUPLICATE_IF_ADDRESS, -127, \
+ "Address already present on another interface") \
+ _ (APP_INVALID_NS, -128, "Invalid application namespace") \
+ _ (APP_WRONG_NS_SECRET, -129, "Wrong app namespace secret") \
+ _ (APP_CONNECT_SCOPE, -130, "Connect scope") \
+ _ (APP_ALREADY_ATTACHED, -131, "App already attached") \
+ _ (SESSION_REDIRECT, -132, "Redirect failed") \
+ _ (ILLEGAL_NAME, -133, "Illegal name") \
+ _ (NO_NAME_SERVERS, -134, "No name servers configured") \
+ _ (NAME_SERVER_NOT_FOUND, -135, "Name server not found") \
+ _ (NAME_RESOLUTION_NOT_ENABLED, -136, "Name resolution not enabled") \
+ _ (NAME_SERVER_FORMAT_ERROR, -137, "Server format error (bug!)") \
+ _ (NAME_SERVER_NO_SUCH_NAME, -138, "No such name") \
+ _ (NAME_SERVER_NO_ADDRESSES, -139, "No addresses available") \
+ _ (NAME_SERVER_NEXT_SERVER, -140, "Retry with new server") \
+ _ (APP_CONNECT_FILTERED, -141, "Connect was filtered") \
+ _ (ACL_IN_USE_INBOUND, -142, "Inbound ACL in use") \
+ _ (ACL_IN_USE_OUTBOUND, -143, "Outbound ACL in use") \
+ _ (INIT_FAILED, -144, "Initialization Failed") \
+ _ (NETLINK_ERROR, -145, "Netlink error") \
+ _ (BIER_BSL_UNSUP, -146, "BIER bit-string-length unsupported") \
+ _ (INSTANCE_IN_USE, -147, "Instance in use") \
+ _ (INVALID_SESSION_ID, -148, "Session ID out of range") \
+ _ (ACL_IN_USE_BY_LOOKUP_CONTEXT, -149, "ACL in use by a lookup context") \
+ _ (INVALID_VALUE_3, -150, "Invalid value #3") \
+ _ (NON_ETHERNET, -151, "Interface is not an Ethernet interface") \
+ _ (BD_ALREADY_HAS_BVI, -152, "Bridge domain already has a BVI interface") \
+ _ (INVALID_PROTOCOL, -153, "Invalid Protocol") \
+ _ (INVALID_ALGORITHM, -154, "Invalid Algorithm") \
+ _ (RSRC_IN_USE, -155, "Resource In Use") \
+ _ (KEY_LENGTH, -156, "invalid Key Length") \
+ _ (FIB_PATH_UNSUPPORTED_NH_PROTO, -157, "Unsupported FIB Path protocol") \
+ _ (API_ENDIAN_FAILED, -159, "Endian mismatch detected") \
+ _ (NO_CHANGE, -160, "No change in table") \
+ _ (MISSING_CERT_KEY, -161, "Missing certifcate or key") \
+ _ (LIMIT_EXCEEDED, -162, "limit exceeded") \
+ _ (IKE_NO_PORT, -163, "port not managed by IKE") \
+ _ (UDP_PORT_TAKEN, -164, "UDP port already taken") \
+ _ (EAGAIN, -165, "Retry stream call with cursor") \
+ _ (INVALID_VALUE_4, -166, "Invalid value #4") \
+ _ (BUSY, -167, "Busy") \
+ _ (BUG, -168, "Bug") \
+ _ (FEATURE_ALREADY_DISABLED, -169, "Feature already disabled") \
+ _ (FEATURE_ALREADY_ENABLED, -170, "Feature already enabled") \
+ _ (INVALID_PREFIX_LENGTH, -171, "Invalid prefix length")
+
+typedef enum
+{
+#define _(a, b, c) VNET_ERR_##a = (b),
+ foreach_vnet_error
+#undef _
+ VNET_N_ERROR,
+} vnet_error_t;
+
+clib_error_t __clib_warn_unused_result *vnet_error (vnet_error_t code,
+ char *fmt, ...);
+
+format_function_t format_vnet_api_errno;
+
+#endif
diff --git a/src/vnet/ethernet/arp_packet.h b/src/vnet/ethernet/arp_packet.h
index 6b4dfa6ab3d..9a9df680853 100644
--- a/src/vnet/ethernet/arp_packet.h
+++ b/src/vnet/ethernet/arp_packet.h
@@ -110,22 +110,10 @@ typedef enum
IP4_ARP_N_NEXT,
} ip4_arp_next_t;
-typedef enum
-{
- IP4_ARP_ERROR_THROTTLED,
- IP4_ARP_ERROR_RESOLVED,
- IP4_ARP_ERROR_NO_BUFFERS,
- IP4_ARP_ERROR_REQUEST_SENT,
- IP4_ARP_ERROR_NON_ARP_ADJ,
- IP4_ARP_ERROR_NO_SOURCE_ADDRESS,
-} ip4_arp_error_t;
-
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
mac_address_t mac;
ip4_address_t ip4;
}) ethernet_arp_ip4_over_ethernet_address_t;
-/* *INDENT-ON* */
STATIC_ASSERT (sizeof (ethernet_arp_ip4_over_ethernet_address_t) == 10,
"Packet ethernet address and IP4 address too big");
diff --git a/src/vnet/ethernet/ethernet.h b/src/vnet/ethernet/ethernet.h
index f3dd1a24a4f..858400d08d8 100644
--- a/src/vnet/ethernet/ethernet.h
+++ b/src/vnet/ethernet/ethernet.h
@@ -128,6 +128,15 @@ struct vnet_hw_interface_t;
typedef u32 (ethernet_flag_change_function_t)
(vnet_main_t * vnm, struct vnet_hw_interface_t * hi, u32 flags);
+typedef struct
+{
+ /* ethernet interface flags change */
+ ethernet_flag_change_function_t *flag_change;
+
+ /* set Max Frame Size callback */
+ vnet_interface_set_max_frame_size_function_t *set_max_frame_size;
+} vnet_eth_if_callbacks_t;
+
#define ETHERNET_MIN_PACKET_BYTES 64
#define ETHERNET_MAX_PACKET_BYTES 9216
@@ -161,11 +170,8 @@ typedef struct ethernet_interface
/* Set interface to accept all packets (promiscuous mode). */
#define ETHERNET_INTERFACE_FLAG_ACCEPT_ALL 1
- /* Change MTU on interface from hw interface structure */
-#define ETHERNET_INTERFACE_FLAG_MTU 2
-
/* Callback, e.g. to turn on/off promiscuous mode */
- ethernet_flag_change_function_t *flag_change;
+ vnet_eth_if_callbacks_t cb;
u32 driver_instance;
@@ -353,14 +359,6 @@ mac_address_t *ethernet_interface_add_del_address (ethernet_main_t * em,
const u8 * address,
u8 is_add);
-clib_error_t *ethernet_register_interface (vnet_main_t * vnm,
- u32 dev_class_index,
- u32 dev_instance,
- const u8 * address,
- u32 * hw_if_index_return,
- ethernet_flag_change_function_t
- flag_change);
-
void ethernet_delete_interface (vnet_main_t * vnm, u32 hw_if_index);
/* Register given node index to take input for given ethernet type. */
@@ -574,6 +572,18 @@ vnet_get_ethernet_main (void)
return &ethernet_main;
}
+typedef struct
+{
+ u32 dev_class_index;
+ u32 dev_instance;
+ u16 max_frame_size;
+ u16 frame_overhead;
+ vnet_eth_if_callbacks_t cb;
+ const u8 *address;
+} vnet_eth_interface_registration_t;
+
+u32 vnet_eth_register_interface (vnet_main_t *vnm,
+ vnet_eth_interface_registration_t *r);
void ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai);
u8 *ethernet_build_rewrite (vnet_main_t * vnm,
u32 sw_if_index,
diff --git a/src/vnet/ethernet/init.c b/src/vnet/ethernet/init.c
index f78b65c7cc0..3921e1ec0e6 100644
--- a/src/vnet/ethernet/init.c
+++ b/src/vnet/ethernet/init.c
@@ -62,7 +62,6 @@ add_type (ethernet_main_t * em, ethernet_type_t type, char *type_name)
}
/* Built-in ip4 tx feature path definition */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ethernet_output, static) =
{
.arc_name = "ethernet-output",
@@ -77,7 +76,6 @@ VNET_FEATURE_INIT (ethernet_tx_drop, static) =
.node_name = "error-drop",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
static clib_error_t *
ethernet_init (vlib_main_t * vm)
@@ -107,7 +105,6 @@ ethernet_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ethernet_init) =
{
/*
@@ -119,7 +116,6 @@ VLIB_INIT_FUNCTION (ethernet_init) =
"llc_init",
"vnet_feature_init"),
};
-/* *INDENT-ON* */
ethernet_main_t *
ethernet_get_main (vlib_main_t * vm)
diff --git a/src/vnet/ethernet/interface.c b/src/vnet/ethernet/interface.c
index f72b4930049..f1bb6b81070 100644
--- a/src/vnet/ethernet/interface.c
+++ b/src/vnet/ethernet/interface.c
@@ -216,6 +216,7 @@ ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
adj_glean_update_rewrite (ai);
break;
case IP_LOOKUP_NEXT_ARP:
+ case IP_LOOKUP_NEXT_REWRITE:
ip_neighbor_update (vnm, ai);
break;
case IP_LOOKUP_NEXT_BCAST:
@@ -257,7 +258,6 @@ ethernet_update_adjacency (vnet_main_t * vnm, u32 sw_if_index, u32 ai)
case IP_LOOKUP_NEXT_DROP:
case IP_LOOKUP_NEXT_PUNT:
case IP_LOOKUP_NEXT_LOCAL:
- case IP_LOOKUP_NEXT_REWRITE:
case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
case IP_LOOKUP_NEXT_MIDCHAIN:
case IP_LOOKUP_NEXT_ICMP_ERROR:
@@ -303,16 +303,40 @@ ethernet_mac_change (vnet_hw_interface_t * hi,
{
ethernet_address_change_ctx_t *cb;
+ u32 id, sw_if_index;
vec_foreach (cb, em->address_change_callbacks)
- cb->function (em, hi->sw_if_index, cb->function_opaque);
+ {
+ cb->function (em, hi->sw_if_index, cb->function_opaque);
+ /* clang-format off */
+ hash_foreach (id, sw_if_index, hi->sub_interface_sw_if_index_by_id,
+ ({
+ cb->function (em, sw_if_index, cb->function_opaque);
+ }));
+ /* clang-format on */
+ }
}
return (NULL);
}
-/* *INDENT-OFF* */
+static clib_error_t *
+ethernet_set_max_frame_size (vnet_main_t *vnm, vnet_hw_interface_t *hi,
+ u32 frame_size)
+{
+ ethernet_interface_t *ei =
+ pool_elt_at_index (ethernet_main.interfaces, hi->hw_instance);
+
+ if (ei->cb.set_max_frame_size)
+ return ei->cb.set_max_frame_size (vnm, hi, frame_size);
+
+ return vnet_error (
+ VNET_ERR_UNSUPPORTED,
+ "underlying driver doesn't support changing Max Frame Size");
+}
+
VNET_HW_INTERFACE_CLASS (ethernet_hw_interface_class) = {
.name = "Ethernet",
+ .tx_hash_fn_type = VNET_HASH_FN_TYPE_ETHERNET,
.format_address = format_ethernet_address,
.format_header = format_ethernet_header_with_length,
.unformat_hw_address = unformat_ethernet_address,
@@ -320,8 +344,8 @@ VNET_HW_INTERFACE_CLASS (ethernet_hw_interface_class) = {
.build_rewrite = ethernet_build_rewrite,
.update_adjacency = ethernet_update_adjacency,
.mac_addr_change_function = ethernet_mac_change,
+ .set_max_frame_size = ethernet_set_max_frame_size,
};
-/* *INDENT-ON* */
uword
unformat_ethernet_interface (unformat_input_t * input, va_list * args)
@@ -344,49 +368,41 @@ unformat_ethernet_interface (unformat_input_t * input, va_list * args)
return 0;
}
-clib_error_t *
-ethernet_register_interface (vnet_main_t * vnm,
- u32 dev_class_index,
- u32 dev_instance,
- const u8 * address,
- u32 * hw_if_index_return,
- ethernet_flag_change_function_t flag_change)
+u32
+vnet_eth_register_interface (vnet_main_t *vnm,
+ vnet_eth_interface_registration_t *r)
{
ethernet_main_t *em = &ethernet_main;
ethernet_interface_t *ei;
vnet_hw_interface_t *hi;
- clib_error_t *error = 0;
u32 hw_if_index;
pool_get (em->interfaces, ei);
- ei->flag_change = flag_change;
+ clib_memcpy (&ei->cb, &r->cb, sizeof (vnet_eth_if_callbacks_t));
- hw_if_index = vnet_register_interface
- (vnm,
- dev_class_index, dev_instance,
- ethernet_hw_interface_class.index, ei - em->interfaces);
- *hw_if_index_return = hw_if_index;
+ hw_if_index = vnet_register_interface (
+ vnm, r->dev_class_index, r->dev_instance,
+ ethernet_hw_interface_class.index, ei - em->interfaces);
hi = vnet_get_hw_interface (vnm, hw_if_index);
ethernet_setup_node (vnm->vlib_main, hi->output_node_index);
- hi->min_packet_bytes = hi->min_supported_packet_bytes =
- ETHERNET_MIN_PACKET_BYTES;
- hi->max_packet_bytes = hi->max_supported_packet_bytes =
- ETHERNET_MAX_PACKET_BYTES;
+ hi->min_frame_size = ETHERNET_MIN_PACKET_BYTES;
+ hi->frame_overhead =
+ r->frame_overhead ?
+ r->frame_overhead :
+ sizeof (ethernet_header_t) + 2 * sizeof (ethernet_vlan_header_t);
+ hi->max_frame_size = r->max_frame_size ?
+ r->max_frame_size :
+ ethernet_main.default_mtu + hi->frame_overhead;
+ ;
/* Default ethernet MTU, 9000 unless set by ethernet_config see below */
vnet_sw_interface_set_mtu (vnm, hi->sw_if_index, em->default_mtu);
- ethernet_set_mac (hi, ei, address);
-
- if (error)
- {
- pool_put (em->interfaces, ei);
- return error;
- }
- return error;
+ ethernet_set_mac (hi, ei, r->address);
+ return hw_if_index;
}
void
@@ -454,14 +470,14 @@ ethernet_set_flags (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
/* preserve status bits and update last set operation bits */
ei->flags = (ei->flags & ETHERNET_INTERFACE_FLAGS_STATUS_MASK) | opn_flags;
- if (ei->flag_change)
+ if (ei->cb.flag_change)
{
switch (opn_flags)
{
case ETHERNET_INTERFACE_FLAG_DEFAULT_L3:
- if (hi->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_MAC_FILTER)
+ if (hi->caps & VNET_HW_IF_CAP_MAC_FILTER)
{
- if (ei->flag_change (vnm, hi, opn_flags) != ~0)
+ if (ei->cb.flag_change (vnm, hi, opn_flags) != ~0)
{
ei->flags |= ETHERNET_INTERFACE_FLAG_STATUS_L3;
return 0;
@@ -472,9 +488,7 @@ ethernet_set_flags (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
/* fall through */
case ETHERNET_INTERFACE_FLAG_ACCEPT_ALL:
ei->flags &= ~ETHERNET_INTERFACE_FLAG_STATUS_L3;
- /* fall through */
- case ETHERNET_INTERFACE_FLAG_MTU:
- return ei->flag_change (vnm, hi, opn_flags);
+ return ei->cb.flag_change (vnm, hi, opn_flags);
default:
return ~0;
}
@@ -520,7 +534,7 @@ simulated_ethernet_interface_tx (vlib_main_t * vm,
while (n_left_from >= 4)
{
u32 sw_if_index0, sw_if_index1, sw_if_index2, sw_if_index3;
- u32 not_all_match_config;
+ u32x4 xor_ifx4;
/* Prefetch next iteration. */
if (PREDICT_TRUE (n_left_from >= 8))
@@ -537,12 +551,11 @@ simulated_ethernet_interface_tx (vlib_main_t * vm,
sw_if_index2 = vnet_buffer (b[2])->sw_if_index[VLIB_TX];
sw_if_index3 = vnet_buffer (b[3])->sw_if_index[VLIB_TX];
- not_all_match_config = (sw_if_index0 ^ sw_if_index1)
- ^ (sw_if_index2 ^ sw_if_index3);
- not_all_match_config += sw_if_index0 ^ new_rx_sw_if_index;
+ xor_ifx4 = u32x4_gather (&sw_if_index0, &sw_if_index1, &sw_if_index2,
+ &sw_if_index3);
/* Speed path / expected case: all pkts on the same intfc */
- if (PREDICT_TRUE (not_all_match_config == 0))
+ if (PREDICT_TRUE (u32x4_is_all_equal (xor_ifx4, new_rx_sw_if_index)))
{
next[0] = next_index;
next[1] = next_index;
@@ -745,7 +758,6 @@ simulated_ethernet_mac_change (vnet_hw_interface_t * hi,
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (ethernet_simulated_device_class) = {
.name = "Loopback",
.format_device_name = format_simulated_ethernet_name,
@@ -753,7 +765,6 @@ VNET_DEVICE_CLASS (ethernet_simulated_device_class) = {
.admin_up_down_function = simulated_ethernet_admin_up_down,
.mac_addr_change_function = simulated_ethernet_mac_change,
};
-/* *INDENT-ON* */
/*
* Maintain a bitmap of allocated loopback instance numbers.
@@ -833,13 +844,11 @@ vnet_create_loopback_interface (u32 * sw_if_indexp, u8 * mac_address,
{
vnet_main_t *vnm = vnet_get_main ();
vlib_main_t *vm = vlib_get_main ();
- clib_error_t *error;
u32 instance;
u8 address[6];
u32 hw_if_index;
vnet_hw_interface_t *hw_if;
u32 slot;
- int rv = 0;
ASSERT (sw_if_indexp);
@@ -871,18 +880,11 @@ vnet_create_loopback_interface (u32 * sw_if_indexp, u8 * mac_address,
address[5] = instance;
}
- error = ethernet_register_interface
- (vnm,
- ethernet_simulated_device_class.index, instance, address, &hw_if_index,
- /* flag change */ 0);
-
- if (error)
- {
- rv = VNET_API_ERROR_INVALID_REGISTRATION;
- clib_error_report (error);
- return rv;
- }
-
+ vnet_eth_interface_registration_t eir = {};
+ eir.dev_class_index = ethernet_simulated_device_class.index;
+ eir.dev_instance = instance;
+ eir.address = address;
+ hw_if_index = vnet_eth_register_interface (vnm, &eir);
hw_if = vnet_get_hw_interface (vnm, hw_if_index);
slot = vlib_node_add_named_next_with_slot
(vm, hw_if->tx_node_index,
@@ -951,13 +953,11 @@ create_simulated_ethernet_interfaces (vlib_main_t * vm,
* Example of how to create a loopback interface:
* @cliexcmd{loopback create-interface}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_simulated_ethernet_interface_command, static) = {
.path = "loopback create-interface",
.short_help = "loopback create-interface [mac <mac-addr>] [instance <instance>]",
.function = create_simulated_ethernet_interfaces,
};
-/* *INDENT-ON* */
/*?
* Create a loopback interface. Optionally, a MAC Address can be
@@ -970,13 +970,11 @@ VLIB_CLI_COMMAND (create_simulated_ethernet_interface_command, static) = {
* Example of how to create a loopback interface:
* @cliexcmd{create loopback interface}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_loopback_interface_command, static) = {
.path = "create loopback interface",
.short_help = "create loopback interface [mac <mac-addr>] [instance <instance>]",
.function = create_simulated_ethernet_interfaces,
};
-/* *INDENT-ON* */
ethernet_interface_t *
ethernet_get_interface (ethernet_main_t * em, u32 hw_if_index)
@@ -1187,13 +1185,11 @@ delete_sub_interface (vlib_main_t * vm,
* Example of how to delete a loopback interface:
* @cliexcmd{loopback delete-interface intfc loop0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (delete_simulated_ethernet_interface_command, static) = {
.path = "loopback delete-interface",
.short_help = "loopback delete-interface intfc <interface>",
.function = delete_simulated_ethernet_interfaces,
};
-/* *INDENT-ON* */
/*?
* Delete a loopback interface.
@@ -1205,13 +1201,11 @@ VLIB_CLI_COMMAND (delete_simulated_ethernet_interface_command, static) = {
* Example of how to delete a loopback interface:
* @cliexcmd{delete loopback interface intfc loop0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (delete_loopback_interface_command, static) = {
.path = "delete loopback interface",
.short_help = "delete loopback interface intfc <interface>",
.function = delete_simulated_ethernet_interfaces,
};
-/* *INDENT-ON* */
/*?
* Delete a sub-interface.
@@ -1220,13 +1214,11 @@ VLIB_CLI_COMMAND (delete_loopback_interface_command, static) = {
* Example of how to delete a sub-interface:
* @cliexcmd{delete sub-interface GigabitEthernet0/8/0.200}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (delete_sub_interface_command, static) = {
.path = "delete sub-interface",
.short_help = "delete sub-interface <interface>",
.function = delete_sub_interface,
};
-/* *INDENT-ON* */
/* ethernet { ... } configuration. */
/*?
diff --git a/src/vnet/ethernet/mac_address.c b/src/vnet/ethernet/mac_address.c
index b7981299700..098b3ce19c1 100644
--- a/src/vnet/ethernet/mac_address.c
+++ b/src/vnet/ethernet/mac_address.c
@@ -15,13 +15,11 @@
#include <vnet/ethernet/mac_address.h>
-/* *INDENT-OFF* */
const mac_address_t ZERO_MAC_ADDRESS = {
.bytes = {
0, 0, 0, 0, 0, 0,
},
};
-/* *INDENT-ON* */
u8 *
format_mac_address_t (u8 * s, va_list * args)
@@ -66,9 +64,9 @@ mac_address_increment (mac_address_t * mac)
{
u64 a;
- a = mac_address_as_u64 (mac);
+ a = ethernet_mac_address_u64 (mac->bytes);
a++;
- mac_address_from_u64 (mac, a);
+ ethernet_mac_address_from_u64 (a, mac->bytes);
}
/*
diff --git a/src/vnet/ethernet/node.c b/src/vnet/ethernet/node.c
index cc0a0c30b9c..e2558eeca41 100644
--- a/src/vnet/ethernet/node.c
+++ b/src/vnet/ethernet/node.c
@@ -225,25 +225,24 @@ identify_subint (ethernet_main_t * em,
// A unicast packet arriving on an L3 interface must have a dmac
// matching the interface mac. If interface has STATUS_L3 bit set
// mac filter is already done.
- if (!(*is_l2 || (ei->flags & ETHERNET_INTERFACE_FLAG_STATUS_L3)))
+ if ((!*is_l2) && ei &&
+ (!(ei->flags & ETHERNET_INTERFACE_FLAG_STATUS_L3)))
{
u64 dmacs[2];
u8 dmacs_bad[2];
ethernet_header_t *e0;
- ethernet_interface_t *ei0;
e0 = (void *) (b0->data + vnet_buffer (b0)->l2_hdr_offset);
dmacs[0] = *(u64 *) e0;
- ei0 = ethernet_get_interface (&ethernet_main, hi->hw_if_index);
- if (ei0 && vec_len (ei0->secondary_addrs))
+ if (vec_len (ei->secondary_addrs))
ethernet_input_inline_dmac_check (hi, dmacs, dmacs_bad,
- 1 /* n_packets */ , ei0,
- 1 /* have_sec_dmac */ );
+ 1 /* n_packets */, ei,
+ 1 /* have_sec_dmac */);
else
ethernet_input_inline_dmac_check (hi, dmacs, dmacs_bad,
- 1 /* n_packets */ , ei0,
- 0 /* have_sec_dmac */ );
+ 1 /* n_packets */, ei,
+ 0 /* have_sec_dmac */);
if (dmacs_bad[0])
*error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
}
@@ -1564,21 +1563,20 @@ ethernet_input_inline (vlib_main_t * vm,
dmacs[0] = *(u64 *) e0;
- if (ei && vec_len (ei->secondary_addrs))
- ethernet_input_inline_dmac_check (hi, dmacs,
- dmacs_bad,
- 1 /* n_packets */ ,
- ei,
- 1 /* have_sec_dmac */ );
- else
- ethernet_input_inline_dmac_check (hi, dmacs,
- dmacs_bad,
- 1 /* n_packets */ ,
- ei,
- 0 /* have_sec_dmac */ );
-
- if (dmacs_bad[0])
- error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
+ if (ei)
+ {
+ if (vec_len (ei->secondary_addrs))
+ ethernet_input_inline_dmac_check (
+ hi, dmacs, dmacs_bad, 1 /* n_packets */, ei,
+ 1 /* have_sec_dmac */);
+ else
+ ethernet_input_inline_dmac_check (
+ hi, dmacs, dmacs_bad, 1 /* n_packets */, ei,
+ 0 /* have_sec_dmac */);
+
+ if (dmacs_bad[0])
+ error0 = ETHERNET_ERROR_L3_MAC_MISMATCH;
+ }
skip_dmac_check0:
vlib_buffer_advance (b0, sizeof (ethernet_header_t));
@@ -1897,7 +1895,7 @@ ethernet_sw_interface_get_config (vnet_main_t * vnm,
}
else
{
- // a specific outer + specifc innner vlan id, a common case
+ // a specific outer + specific innner vlan id, a common case
// get the qinq table
if (vlan_table->vlans[si->sub.eth.outer_vlan_id].qinqs == 0)
@@ -2100,7 +2098,6 @@ static char *ethernet_error_strings[] = {
#undef ethernet_error
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ethernet_input_node) = {
.name = "ethernet-input",
/* Takes a vector of packets. */
@@ -2142,7 +2139,6 @@ VLIB_REGISTER_NODE (ethernet_input_not_l2_node) = {
#undef _
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
void
diff --git a/src/vnet/ethernet/p2p_ethernet.c b/src/vnet/ethernet/p2p_ethernet.c
index ddf23901419..0ece84fd9cc 100644
--- a/src/vnet/ethernet/p2p_ethernet.c
+++ b/src/vnet/ethernet/p2p_ethernet.c
@@ -146,6 +146,8 @@ p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index,
vnet_feature_enable_disable ("device-input",
"p2p-ethernet-input",
parent_if_index, 1, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth", "p2p-ethernet-input",
+ parent_if_index, 1, 0, 0);
/* Set promiscuous mode on the l2 interface */
ethernet_set_flags (vnm, parent_if_index,
ETHERNET_INTERFACE_FLAG_ACCEPT_ALL);
@@ -153,7 +155,7 @@ p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index,
}
p2pm->p2p_ethernet_by_sw_if_index[parent_if_index]++;
/* set the interface mode */
- set_int_l2_mode (vm, vnm, MODE_L3, p2pe_subif_id, 0,
+ set_int_l2_mode (vm, vnm, MODE_L3, p2pe_sw_if_index, 0,
L2_BD_PORT_TYPE_NORMAL, 0, 0);
return 0;
}
@@ -176,6 +178,9 @@ p2p_ethernet_add_del (vlib_main_t * vm, u32 parent_if_index,
vnet_feature_enable_disable ("device-input",
"p2p-ethernet-input",
parent_if_index, 0, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth",
+ "p2p-ethernet-input",
+ parent_if_index, 0, 0, 0);
/* Disable promiscuous mode on the l2 interface */
ethernet_set_flags (vnm, parent_if_index, 0);
}
@@ -248,10 +253,11 @@ vnet_p2p_ethernet_add_del (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-VLIB_CLI_COMMAND (p2p_ethernet_add_del_command, static) =
-{
-.path = "p2p_ethernet ",.function = vnet_p2p_ethernet_add_del,.short_help =
- "p2p_ethernet <intfc> <mac-address> [sub-id <id> | del]",};
+VLIB_CLI_COMMAND (p2p_ethernet_add_del_command, static) = {
+ .path = "p2p_ethernet",
+ .function = vnet_p2p_ethernet_add_del,
+ .short_help = "p2p_ethernet <intfc> <mac-address> [sub-id <id>|del]",
+};
static clib_error_t *
p2p_ethernet_init (vlib_main_t * vm)
diff --git a/src/vnet/ethernet/p2p_ethernet_api.c b/src/vnet/ethernet/p2p_ethernet_api.c
index a9a8cc0a444..903678ce445 100644
--- a/src/vnet/ethernet/p2p_ethernet_api.c
+++ b/src/vnet/ethernet/p2p_ethernet_api.c
@@ -58,14 +58,12 @@ vl_api_p2p_ethernet_add_t_handler (vl_api_p2p_ethernet_add_t * mp)
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_P2P_ETHERNET_ADD_REPLY,
({
rmp->sw_if_index = htonl(p2pe_if_index);
}));
- /* *INDENT-ON* */
}
void
diff --git a/src/vnet/ethernet/p2p_ethernet_input.c b/src/vnet/ethernet/p2p_ethernet_input.c
index 3e9589e0e19..3d81e99cff2 100644
--- a/src/vnet/ethernet/p2p_ethernet_input.c
+++ b/src/vnet/ethernet/p2p_ethernet_input.c
@@ -235,7 +235,6 @@ VLIB_NODE_FN (p2p_ethernet_input_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (p2p_ethernet_input_node) = {
.name = "p2p-ethernet-input",
.vector_size = sizeof (u32),
@@ -253,7 +252,6 @@ VLIB_REGISTER_NODE (p2p_ethernet_input_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ethernet/packet.h b/src/vnet/ethernet/packet.h
index e1e42badd06..007f93596f3 100644
--- a/src/vnet/ethernet/packet.h
+++ b/src/vnet/ethernet/packet.h
@@ -184,7 +184,6 @@ typedef struct
#define ETHERNET_N_PBB (1 << 24)
} ethernet_pbb_header_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
/* Backbone source/destination address. */
@@ -201,7 +200,6 @@ typedef CLIB_PACKED (struct
/* 3 bit priority, 1 bit DEI, 1 bit UCA, 3 bit RES and 24 bit I_SID (service identifier) */
u32 priority_dei_uca_res_sid;
}) ethernet_pbb_header_packed_t;
-/* *INDENT-ON* */
#endif /* included_ethernet_packet_h */
diff --git a/src/vnet/feature/feature.c b/src/vnet/feature/feature.c
index c93f586c349..a7246fbb16a 100644
--- a/src/vnet/feature/feature.c
+++ b/src/vnet/feature/feature.c
@@ -293,9 +293,10 @@ vnet_feature_enable_disable_with_index (u8 arc_index, u32 feature_index,
fm->sw_if_index_has_features[arc_index] =
clib_bitmap_set (fm->sw_if_index_has_features[arc_index], sw_if_index,
(feature_count > 0));
+ fm->feature_count_by_sw_if_index[arc_index][sw_if_index] = feature_count;
+
vnet_feature_reg_invoke (sw_if_index, arc_index, (feature_count > 0));
- fm->feature_count_by_sw_if_index[arc_index][sw_if_index] = feature_count;
return 0;
}
@@ -375,6 +376,52 @@ vnet_feature_is_enabled (const char *arc_name, const char *feature_node_name,
return 0;
}
+u32
+vnet_feature_get_end_node (u8 arc_index, u32 sw_if_index)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_config_main_t *cm;
+ u32 ci;
+
+ if (arc_index == (u8) ~0)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ cm = &fm->feature_config_mains[arc_index];
+ vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
+ ci = cm->config_index_by_sw_if_index[sw_if_index];
+
+ return (vnet_config_get_end_node (vlib_get_main (), &cm->config_main, ci));
+}
+
+u32
+vnet_feature_reset_end_node (u8 arc_index, u32 sw_if_index)
+{
+ vnet_feature_main_t *fm = &feature_main;
+ vnet_feature_config_main_t *cm;
+ u32 ci;
+
+ cm = &fm->feature_config_mains[arc_index];
+ vec_validate_init_empty (cm->config_index_by_sw_if_index, sw_if_index, ~0);
+ ci = cm->config_index_by_sw_if_index[sw_if_index];
+
+ ci = vnet_config_reset_end_node (vlib_get_main (), &cm->config_main, ci);
+
+ if (ci != ~0)
+ cm->config_index_by_sw_if_index[sw_if_index] = ci;
+
+ i16 feature_count;
+
+ if (NULL == fm->feature_count_by_sw_if_index ||
+ vec_len (fm->feature_count_by_sw_if_index) <= arc_index ||
+ vec_len (fm->feature_count_by_sw_if_index[arc_index]) <= sw_if_index)
+ feature_count = 0;
+ else
+ feature_count = fm->feature_count_by_sw_if_index[arc_index][sw_if_index];
+
+ vnet_feature_reg_invoke (sw_if_index, arc_index, (feature_count > 0));
+
+ return ci;
+}
u32
vnet_feature_modify_end_node (u8 arc_index,
@@ -400,6 +447,17 @@ vnet_feature_modify_end_node (u8 arc_index,
if (ci != ~0)
cm->config_index_by_sw_if_index[sw_if_index] = ci;
+ i16 feature_count;
+
+ if (NULL == fm->feature_count_by_sw_if_index ||
+ vec_len (fm->feature_count_by_sw_if_index) <= arc_index ||
+ vec_len (fm->feature_count_by_sw_if_index[arc_index]) <= sw_if_index)
+ feature_count = 0;
+ else
+ feature_count = fm->feature_count_by_sw_if_index[arc_index][sw_if_index];
+
+ vnet_feature_reg_invoke (sw_if_index, arc_index, (feature_count > 0));
+
return ci;
}
@@ -475,13 +533,11 @@ show_features_command_fn (vlib_main_t * vm,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_features_command, static) = {
.path = "show features",
.short_help = "show features [verbose]",
.function = show_features_command_fn,
};
-/* *INDENT-ON* */
/** Display the set of driver features configured on a specific interface
* Called by "show interface" handler
@@ -642,14 +698,12 @@ done:
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_feature_command, static) = {
.path = "set interface feature",
.short_help = "set interface feature <intfc> <feature_name> arc <arc_name> "
"[disable]",
.function = set_interface_features_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
vnet_feature_add_del_sw_interface (vnet_main_t * vnm, u32 sw_if_index,
diff --git a/src/vnet/feature/feature.h b/src/vnet/feature/feature.h
index 9aa32182ef6..b1917e8df13 100644
--- a/src/vnet/feature/feature.h
+++ b/src/vnet/feature/feature.h
@@ -222,6 +222,10 @@ vnet_feature_enable_disable (const char *arc_name, const char *node_name,
u32
vnet_feature_modify_end_node (u8 arc_index, u32 sw_if_index, u32 node_index);
+u32 vnet_feature_get_end_node (u8 arc_index, u32 sw_if_index);
+
+u32 vnet_feature_reset_end_node (u8 arc_index, u32 sw_if_index);
+
static_always_inline u32
vnet_get_feature_count (u8 arc, u32 sw_if_index)
{
@@ -340,83 +344,8 @@ vnet_device_input_have_features (u32 sw_if_index)
}
static_always_inline void
-vnet_feature_start_device_input_x1 (u32 sw_if_index, u32 * next0,
- vlib_buffer_t * b0)
-{
- vnet_feature_main_t *fm = &feature_main;
- vnet_feature_config_main_t *cm;
- u8 feature_arc_index = fm->device_input_feature_arc_index;
- cm = &fm->feature_config_mains[feature_arc_index];
-
- if (PREDICT_FALSE
- (clib_bitmap_get
- (fm->sw_if_index_has_features[feature_arc_index], sw_if_index)))
- {
- /*
- * Save next0 so that the last feature in the chain
- * can skip ethernet-input if indicated...
- */
- u16 adv;
-
- adv = device_input_next_node_advance[*next0];
- vlib_buffer_advance (b0, -adv);
-
- vnet_buffer (b0)->feature_arc_index = feature_arc_index;
- b0->current_config_index =
- vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
- vnet_get_config_data (&cm->config_main, &b0->current_config_index,
- next0, /* # bytes of config data */ 0);
- }
-}
-
-static_always_inline void
-vnet_feature_start_device_input_x2 (u32 sw_if_index,
- u32 * next0,
- u32 * next1,
- vlib_buffer_t * b0, vlib_buffer_t * b1)
-{
- vnet_feature_main_t *fm = &feature_main;
- vnet_feature_config_main_t *cm;
- u8 feature_arc_index = fm->device_input_feature_arc_index;
- cm = &fm->feature_config_mains[feature_arc_index];
-
- if (PREDICT_FALSE
- (clib_bitmap_get
- (fm->sw_if_index_has_features[feature_arc_index], sw_if_index)))
- {
- /*
- * Save next0 so that the last feature in the chain
- * can skip ethernet-input if indicated...
- */
- u16 adv;
-
- adv = device_input_next_node_advance[*next0];
- vlib_buffer_advance (b0, -adv);
-
- adv = device_input_next_node_advance[*next1];
- vlib_buffer_advance (b1, -adv);
-
- vnet_buffer (b0)->feature_arc_index = feature_arc_index;
- vnet_buffer (b1)->feature_arc_index = feature_arc_index;
- b0->current_config_index =
- vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
- b1->current_config_index = b0->current_config_index;
- vnet_get_config_data (&cm->config_main, &b0->current_config_index,
- next0, /* # bytes of config data */ 0);
- vnet_get_config_data (&cm->config_main, &b1->current_config_index,
- next1, /* # bytes of config data */ 0);
- }
-}
-
-static_always_inline void
-vnet_feature_start_device_input_x4 (u32 sw_if_index,
- u32 * next0,
- u32 * next1,
- u32 * next2,
- u32 * next3,
- vlib_buffer_t * b0,
- vlib_buffer_t * b1,
- vlib_buffer_t * b2, vlib_buffer_t * b3)
+vnet_feature_start_device_input (u32 sw_if_index, u32 *next0,
+ vlib_buffer_t *b0)
{
vnet_feature_main_t *fm = &feature_main;
vnet_feature_config_main_t *cm;
@@ -427,43 +356,11 @@ vnet_feature_start_device_input_x4 (u32 sw_if_index,
(clib_bitmap_get
(fm->sw_if_index_has_features[feature_arc_index], sw_if_index)))
{
- /*
- * Save next0 so that the last feature in the chain
- * can skip ethernet-input if indicated...
- */
- u16 adv;
-
- adv = device_input_next_node_advance[*next0];
- vlib_buffer_advance (b0, -adv);
-
- adv = device_input_next_node_advance[*next1];
- vlib_buffer_advance (b1, -adv);
-
- adv = device_input_next_node_advance[*next2];
- vlib_buffer_advance (b2, -adv);
-
- adv = device_input_next_node_advance[*next3];
- vlib_buffer_advance (b3, -adv);
-
vnet_buffer (b0)->feature_arc_index = feature_arc_index;
- vnet_buffer (b1)->feature_arc_index = feature_arc_index;
- vnet_buffer (b2)->feature_arc_index = feature_arc_index;
- vnet_buffer (b3)->feature_arc_index = feature_arc_index;
-
b0->current_config_index =
vec_elt (cm->config_index_by_sw_if_index, sw_if_index);
- b1->current_config_index = b0->current_config_index;
- b2->current_config_index = b0->current_config_index;
- b3->current_config_index = b0->current_config_index;
-
vnet_get_config_data (&cm->config_main, &b0->current_config_index,
next0, /* # bytes of config data */ 0);
- vnet_get_config_data (&cm->config_main, &b1->current_config_index,
- next1, /* # bytes of config data */ 0);
- vnet_get_config_data (&cm->config_main, &b2->current_config_index,
- next2, /* # bytes of config data */ 0);
- vnet_get_config_data (&cm->config_main, &b3->current_config_index,
- next3, /* # bytes of config data */ 0);
}
}
diff --git a/src/vnet/feature/registration.c b/src/vnet/feature/registration.c
index 537a4ada6e4..bc20412b9cf 100644
--- a/src/vnet/feature/registration.c
+++ b/src/vnet/feature/registration.c
@@ -351,12 +351,10 @@ again:
*in_feature_nodes = feature_nodes;
/* Finally, clean up all the shit we allocated */
- /* *INDENT-OFF* */
hash_foreach_pair (hp, index_by_name,
({
vec_add1 (keys_to_delete, (u8 *)hp->key);
}));
- /* *INDENT-ON* */
hash_free (index_by_name);
for (i = 0; i < vec_len (keys_to_delete); i++)
vec_free (keys_to_delete[i]);
diff --git a/src/vnet/fib/fib.c b/src/vnet/fib/fib.c
index ddfa830bb0f..cce03b4b49c 100644
--- a/src/vnet/fib/fib.c
+++ b/src/vnet/fib/fib.c
@@ -32,9 +32,7 @@ fib_module_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (fib_module_init) =
{
.runs_after = VLIB_INITS("dpo_module_init", "adj_module_init"),
};
-/* *INDENT-ON* */
diff --git a/src/vnet/fib/fib_api.c b/src/vnet/fib/fib_api.c
index 0254c551411..07d6699d87a 100644
--- a/src/vnet/fib/fib_api.c
+++ b/src/vnet/fib/fib_api.c
@@ -65,13 +65,11 @@ static void
fib_api_next_hop_decode (const vl_api_fib_path_t *in,
ip46_address_t *out)
{
- if (in->proto == FIB_API_PATH_NH_PROTO_IP4)
- clib_memcpy (&out->ip4, &in->nh.address.ip4, sizeof (out->ip4));
- else if (in->proto == FIB_API_PATH_NH_PROTO_IP6)
- clib_memcpy (&out->ip6, &in->nh.address.ip6, sizeof (out->ip6));
+ ASSERT (FIB_API_PATH_NH_PROTO_IP4 == in->proto || FIB_API_PATH_NH_PROTO_IP6 == in->proto);
+ *out = to_ip46 (FIB_API_PATH_NH_PROTO_IP6 == in->proto, (void *)&in->nh.address);
}
-static vl_api_fib_path_nh_proto_t
+vl_api_fib_path_nh_proto_t
fib_api_path_dpo_proto_to_nh (dpo_proto_t dproto)
{
switch (dproto)
@@ -110,7 +108,7 @@ fib_api_next_hop_encode (const fib_route_path_t *rpath,
sizeof (rpath->frp_addr.ip6));
}
-static int
+int
fib_api_path_nh_proto_to_dpo (vl_api_fib_path_nh_proto_t pp,
dpo_proto_t *dproto)
{
@@ -296,22 +294,17 @@ fib_api_path_decode (vl_api_fib_path_t *in,
if (!(out->frp_flags & FIB_ROUTE_PATH_BIER_IMP))
{
- fib_api_next_hop_decode(in, &out->frp_addr);
+ index_t bdti;
+
+ bdti = bier_disp_table_find(ntohl(in->table_id));
- if (ip46_address_is_zero(&out->frp_addr))
+ if (INDEX_INVALID != bdti)
+ {
+ out->frp_fib_index = bdti;
+ }
+ else
{
- index_t bdti;
-
- bdti = bier_disp_table_find(ntohl(in->table_id));
-
- if (INDEX_INVALID != bdti)
- {
- out->frp_fib_index = bdti;
- }
- else
- {
- return (VNET_API_ERROR_NO_SUCH_FIB);
- }
+ return (VNET_API_ERROR_NO_SUCH_FIB);
}
}
break;
@@ -455,6 +448,9 @@ fib_api_route_add_del (u8 is_add,
fib_entry_flag_t entry_flags,
fib_route_path_t *rpaths)
{
+ if (!fib_prefix_validate(prefix)) {
+ return (VNET_API_ERROR_INVALID_PREFIX_LENGTH);
+ }
if (is_multipath)
{
if (vec_len(rpaths) == 0)
diff --git a/src/vnet/fib/fib_api.h b/src/vnet/fib/fib_api.h
index 7fd7d16cb33..0c59531b438 100644
--- a/src/vnet/fib/fib_api.h
+++ b/src/vnet/fib/fib_api.h
@@ -29,6 +29,8 @@ struct _vl_api_fib_prefix;
/**
* Encode and decode functions from the API types to internal types
*/
+extern vl_api_fib_path_nh_proto_t fib_api_path_dpo_proto_to_nh (dpo_proto_t dproto);
+extern int fib_api_path_nh_proto_to_dpo (vl_api_fib_path_nh_proto_t pp, dpo_proto_t *dproto);
extern void fib_api_path_encode(const fib_route_path_t * api_rpath,
vl_api_fib_path_t *out);
extern int fib_api_path_decode(vl_api_fib_path_t *in,
diff --git a/src/vnet/fib/fib_attached_export.c b/src/vnet/fib/fib_attached_export.c
index 5ea96fd0cf6..c6ba0575a04 100644
--- a/src/vnet/fib/fib_attached_export.c
+++ b/src/vnet/fib/fib_attached_export.c
@@ -106,8 +106,7 @@ fib_entry_ae_add_or_lock (fib_node_index_t connected)
{
fed = fib_entry_delegate_find_or_add(entry,
FIB_ENTRY_DELEGATE_ATTACHED_EXPORT);
- pool_get(fib_ae_export_pool, export);
- clib_memset(export, 0, sizeof(*export));
+ pool_get_zero(fib_ae_export_pool, export);
fed->fd_index = (export - fib_ae_export_pool);
export->faee_ei = connected;
@@ -249,13 +248,14 @@ fib_attached_export_import (fib_entry_t *fib_entry,
*/
fei = fib_entry_get_index(fib_entry);
- pool_get(fib_ae_import_pool, import);
+ pool_get_zero(fib_ae_import_pool, import);
import->faei_import_fib = fib_entry->fe_fib_index;
import->faei_export_fib = export_fib;
import->faei_prefix = fib_entry->fe_prefix;
import->faei_import_entry = fib_entry_get_index(fib_entry);
import->faei_export_sibling = ~0;
+ import->faei_exporter = FIB_NODE_INDEX_INVALID;
/*
* do an exact match in the export table
@@ -273,7 +273,6 @@ fib_attached_export_import (fib_entry_t *fib_entry,
import->faei_export_entry =
fib_table_lookup(import->faei_export_fib,
&import->faei_prefix);
- import->faei_exporter = FIB_NODE_INDEX_INVALID;
}
else
{
@@ -379,6 +378,7 @@ fib_attached_export_purge (fib_entry_t *fib_entry)
*/
if (0 == --export->faee_locks)
{
+ vec_free (export->faee_importers);
pool_put(fib_ae_export_pool, export);
fib_entry_delegate_remove(export_entry,
FIB_ENTRY_DELEGATE_ATTACHED_EXPORT);
diff --git a/src/vnet/fib/fib_bfd.c b/src/vnet/fib/fib_bfd.c
index b02fbc67a63..6bfd29ae2cc 100644
--- a/src/vnet/fib/fib_bfd.c
+++ b/src/vnet/fib/fib_bfd.c
@@ -188,9 +188,7 @@ fib_bfd_main_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (fib_bfd_main_init) =
{
.runs_after = VLIB_INITS("bfd_main_init"),
};
-/* *INDENT-ON* */
diff --git a/src/vnet/fib/fib_entry.c b/src/vnet/fib/fib_entry.c
index 119a7ac5e77..b78346ce45a 100644
--- a/src/vnet/fib/fib_entry.c
+++ b/src/vnet/fib/fib_entry.c
@@ -293,58 +293,6 @@ fib_entry_get_flags (fib_node_index_t fib_entry_index)
return (fib_entry_get_flags_i(fib_entry_get(fib_entry_index)));
}
-/*
- * fib_entry_back_walk_notify
- *
- * A back walk has reach this entry.
- */
-static fib_node_back_walk_rc_t
-fib_entry_back_walk_notify (fib_node_t *node,
- fib_node_back_walk_ctx_t *ctx)
-{
- fib_entry_t *fib_entry;
-
- fib_entry = fib_entry_from_fib_node(node);
-
- if (FIB_NODE_BW_REASON_FLAG_EVALUATE & ctx->fnbw_reason ||
- FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason ||
- FIB_NODE_BW_REASON_FLAG_ADJ_DOWN & ctx->fnbw_reason ||
- FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason ||
- FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason ||
- FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
- {
- fib_entry_src_action_reactivate(fib_entry,
- fib_entry_get_best_source(
- fib_entry_get_index(fib_entry)));
- }
-
- /*
- * all other walk types can be reclassifed to a re-evaluate to
- * all recursive dependents.
- * By reclassifying we ensure that should any of these walk types meet
- * they can be merged.
- */
- ctx->fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE;
-
- /*
- * ... and nothing is forced sync from now on.
- */
- ctx->fnbw_flags &= ~FIB_NODE_BW_FLAG_FORCE_SYNC;
-
- FIB_ENTRY_DBG(fib_entry, "bw:%U",
- format_fib_node_bw_reason, ctx->fnbw_reason);
-
- /*
- * propagate the backwalk further if we haven't already reached the
- * maximum depth.
- */
- fib_walk_sync(FIB_NODE_TYPE_ENTRY,
- fib_entry_get_index(fib_entry),
- ctx);
-
- return (FIB_NODE_BACK_WALK_CONTINUE);
-}
-
static void
fib_entry_show_memory (void)
{
@@ -373,16 +321,6 @@ fib_entry_show_memory (void)
sizeof(fib_path_ext_t));
}
-/*
- * The FIB path-list's graph node virtual function table
- */
-static const fib_node_vft_t fib_entry_vft = {
- .fnv_get = fib_entry_get_node,
- .fnv_last_lock = fib_entry_last_lock_gone,
- .fnv_back_walk = fib_entry_back_walk_notify,
- .fnv_mem_show = fib_entry_show_memory,
-};
-
/**
* @brief Contribute the set of Adjacencies that this entry forwards with
* to build the uRPF list of its children
@@ -599,8 +537,18 @@ fib_entry_alloc (u32 fib_index,
{
fib_entry_t *fib_entry;
fib_prefix_t *fep;
+ u8 need_barrier_sync = pool_get_will_expand (fib_entry_pool);
+ vlib_main_t *vm = vlib_get_main();
+ ASSERT (vm->thread_index == 0);
+
+ if (need_barrier_sync)
+ vlib_worker_thread_barrier_sync (vm);
pool_get(fib_entry_pool, fib_entry);
+
+ if (need_barrier_sync)
+ vlib_worker_thread_barrier_release (vm);
+
clib_memset(fib_entry, 0, sizeof(*fib_entry));
fib_node_init(&fib_entry->fe_node,
@@ -634,7 +582,8 @@ fib_entry_alloc (u32 fib_index,
static fib_entry_t*
fib_entry_post_flag_update_actions (fib_entry_t *fib_entry,
- fib_entry_flag_t old_flags)
+ fib_entry_flag_t old_flags,
+ u32 new_fib_index)
{
fib_node_index_t fei;
@@ -659,12 +608,14 @@ fib_entry_post_flag_update_actions (fib_entry_t *fib_entry,
* there is an assumption here that the entry resolves via only
* one interface and that it is the cross VRF interface.
*/
- u32 sw_if_index = fib_path_list_get_resolving_interface(fib_entry->fe_parent);
-
- fib_attached_export_import(fib_entry,
- fib_table_get_index_for_sw_if_index(
- fib_entry_get_proto(fib_entry),
- sw_if_index));
+ if (~0 == new_fib_index)
+ {
+ u32 sw_if_index = fib_path_list_get_resolving_interface(fib_entry->fe_parent);
+ new_fib_index = fib_table_get_index_for_sw_if_index(
+ fib_entry_get_proto(fib_entry),
+ sw_if_index);
+ }
+ fib_attached_export_import(fib_entry, new_fib_index);
}
else if (was_import && !is_import)
{
@@ -673,6 +624,14 @@ fib_entry_post_flag_update_actions (fib_entry_t *fib_entry,
*/
fib_attached_export_purge(fib_entry);
}
+ else if (was_import && is_import && ~0 != new_fib_index)
+ {
+ /*
+ * transition from export from one table to another
+ */
+ fib_attached_export_purge(fib_entry);
+ fib_attached_export_import(fib_entry, new_fib_index);
+ }
/*
* else
* no change. nothing to do.
@@ -706,8 +665,7 @@ fib_entry_post_install_actions (fib_entry_t *fib_entry,
fib_source_t source,
fib_entry_flag_t old_flags)
{
- fib_entry = fib_entry_post_flag_update_actions(fib_entry,
- old_flags);
+ fib_entry = fib_entry_post_flag_update_actions(fib_entry, old_flags, ~0);
fib_entry = fib_entry_src_action_installed(fib_entry, source);
return (fib_entry);
@@ -979,7 +937,7 @@ fib_entry_source_removed (fib_entry_t *fib_entry,
/*
* no more sources left. this entry is toast.
*/
- fib_entry = fib_entry_post_flag_update_actions(fib_entry, old_flags);
+ fib_entry = fib_entry_post_flag_update_actions(fib_entry, old_flags, ~0);
fib_entry_src_action_uninstall(fib_entry);
return (FIB_ENTRY_SRC_FLAG_NONE);
@@ -1153,7 +1111,7 @@ fib_entry_special_remove (fib_node_index_t fib_entry_index,
/*
* no more sources left. this entry is toast.
*/
- fib_entry = fib_entry_post_flag_update_actions(fib_entry, bflags);
+ fib_entry = fib_entry_post_flag_update_actions(fib_entry, bflags, ~0);
fib_entry_src_action_uninstall(fib_entry);
return (FIB_ENTRY_SRC_FLAG_NONE);
}
@@ -1470,6 +1428,126 @@ fib_entry_recursive_loop_detect (fib_node_index_t entry_index,
return (is_looped);
}
+/*
+ * fib_entry_attached_cross_table
+ *
+ * Return true if the route is attached via an interface that
+ * is not in the same table as the route
+ */
+static int
+fib_entry_attached_cross_table (const fib_entry_t *fib_entry,
+ u32 fib_index)
+{
+ const fib_prefix_t *pfx = &fib_entry->fe_prefix;
+
+ switch (pfx->fp_proto)
+ {
+ case FIB_PROTOCOL_MPLS:
+ /* MPLS routes are never imported/exported */
+ return (0);
+ case FIB_PROTOCOL_IP6:
+ /* Ignore link local addresses these also can't be imported/exported */
+ if (ip6_address_is_link_local_unicast (&pfx->fp_addr.ip6))
+ {
+ return (0);
+ }
+ break;
+ case FIB_PROTOCOL_IP4:
+ break;
+ }
+
+ return (fib_entry->fe_fib_index != fib_index);
+}
+
+/*
+ * fib_entry_back_walk_notify
+ *
+ * A back walk has reach this entry.
+ */
+static fib_node_back_walk_rc_t
+fib_entry_back_walk_notify (fib_node_t *node,
+ fib_node_back_walk_ctx_t *ctx)
+{
+ fib_source_t best_source;
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *bsrc;
+
+ fib_entry = fib_entry_from_fib_node(node);
+ bsrc = fib_entry_get_best_src_i(fib_entry);
+ best_source = fib_entry_src_get_source(bsrc);
+
+ if (FIB_NODE_BW_REASON_FLAG_INTERFACE_BIND & ctx->fnbw_reason)
+ {
+ fib_entry_flag_t bflags;
+
+ bflags = fib_entry_src_get_flags(bsrc);
+
+ fib_entry_src_action_reactivate(fib_entry, best_source);
+
+ /* re-evaluate whether the prefix is cross table */
+ if (fib_entry_attached_cross_table(
+ fib_entry, ctx->interface_bind.fnbw_to_fib_index) &&
+ !(bsrc->fes_entry_flags & FIB_ENTRY_FLAG_NO_ATTACHED_EXPORT))
+ {
+ bsrc->fes_entry_flags |= FIB_ENTRY_FLAG_IMPORT;
+ }
+ else
+ {
+ bsrc->fes_entry_flags &= ~FIB_ENTRY_FLAG_IMPORT;
+ }
+
+ fib_entry = fib_entry_post_flag_update_actions(
+ fib_entry, bflags,
+ ctx->interface_bind.fnbw_to_fib_index);
+ }
+ else if (FIB_NODE_BW_REASON_FLAG_EVALUATE & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_ADJ_DOWN & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_UP & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_BIND & ctx->fnbw_reason ||
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE & ctx->fnbw_reason)
+ {
+ fib_entry_src_action_reactivate(fib_entry, best_source);
+ }
+
+ /*
+ * all other walk types can be reclassifed to a re-evaluate to
+ * all recursive dependents.
+ * By reclassifying we ensure that should any of these walk types meet
+ * they can be merged.
+ */
+ ctx->fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE;
+
+ /*
+ * ... and nothing is forced sync from now on.
+ */
+ ctx->fnbw_flags &= ~FIB_NODE_BW_FLAG_FORCE_SYNC;
+
+ FIB_ENTRY_DBG(fib_entry, "bw:%U",
+ format_fib_node_bw_reason, ctx->fnbw_reason);
+
+ /*
+ * propagate the backwalk further if we haven't already reached the
+ * maximum depth.
+ */
+ fib_walk_sync(FIB_NODE_TYPE_ENTRY,
+ fib_entry_get_index(fib_entry),
+ ctx);
+
+ return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/*
+ * The FIB path-list's graph node virtual function table
+ */
+static const fib_node_vft_t fib_entry_vft = {
+ .fnv_get = fib_entry_get_node,
+ .fnv_last_lock = fib_entry_last_lock_gone,
+ .fnv_back_walk = fib_entry_back_walk_notify,
+ .fnv_mem_show = fib_entry_show_memory,
+};
+
u32
fib_entry_get_resolving_interface (fib_node_index_t entry_index)
{
diff --git a/src/vnet/fib/fib_entry.h b/src/vnet/fib/fib_entry.h
index 4053ff65181..7331f803ec4 100644
--- a/src/vnet/fib/fib_entry.h
+++ b/src/vnet/fib/fib_entry.h
@@ -154,9 +154,13 @@ typedef enum fib_entry_src_attribute_t_ {
*/
FIB_ENTRY_SRC_ATTRIBUTE_INHERITED,
/**
+ * the source is currently used as glean src address
+ */
+ FIB_ENTRY_SRC_ATTRIBUTE_PROVIDES_GLEAN,
+ /**
* Marker. add new entries before this one.
*/
- FIB_ENTRY_SRC_ATTRIBUTE_LAST = FIB_ENTRY_SRC_ATTRIBUTE_INHERITED,
+ FIB_ENTRY_SRC_ATTRIBUTE_LAST = FIB_ENTRY_SRC_ATTRIBUTE_PROVIDES_GLEAN,
} fib_entry_src_attribute_t;
@@ -166,6 +170,7 @@ typedef enum fib_entry_src_attribute_t_ {
[FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE] = "active", \
[FIB_ENTRY_SRC_ATTRIBUTE_STALE] = "stale", \
[FIB_ENTRY_SRC_ATTRIBUTE_INHERITED] = "inherited", \
+ [FIB_ENTRY_SRC_ATTRIBUTE_PROVIDES_GLEAN] = "provides-glean", \
}
#define FOR_EACH_FIB_SRC_ATTRIBUTE(_item) \
@@ -180,6 +185,7 @@ typedef enum fib_entry_src_flag_t_ {
FIB_ENTRY_SRC_FLAG_ACTIVE = (1 << FIB_ENTRY_SRC_ATTRIBUTE_ACTIVE),
FIB_ENTRY_SRC_FLAG_STALE = (1 << FIB_ENTRY_SRC_ATTRIBUTE_STALE),
FIB_ENTRY_SRC_FLAG_INHERITED = (1 << FIB_ENTRY_SRC_ATTRIBUTE_INHERITED),
+ FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN = (1 << FIB_ENTRY_SRC_ATTRIBUTE_PROVIDES_GLEAN),
} __attribute__ ((packed)) fib_entry_src_flag_t;
extern u8 * format_fib_entry_src_flags(u8 *s, va_list *args);
@@ -421,6 +427,9 @@ extern const int fib_entry_get_dpo_for_source (
fib_node_index_t fib_entry_index,
fib_source_t source,
dpo_id_t *dpo);
+extern fib_node_index_t fib_entry_get_path_list_for_source (
+ fib_node_index_t fib_entry_index,
+ fib_source_t source);
extern adj_index_t fib_entry_get_adj(fib_node_index_t fib_entry_index);
diff --git a/src/vnet/fib/fib_entry_src.c b/src/vnet/fib/fib_entry_src.c
index a4a4f1ae0b5..c79b745b5b5 100644
--- a/src/vnet/fib/fib_entry_src.c
+++ b/src/vnet/fib/fib_entry_src.c
@@ -46,6 +46,7 @@ fib_entry_src_get_vft (const fib_entry_src_t *esrc)
return (&fib_entry_src_bh_vft[FIB_SOURCE_BH_INTERPOSE]);
}
+ ASSERT(bh < FIB_SOURCE_BH_MAX);
return (&fib_entry_src_bh_vft[bh]);
}
@@ -257,6 +258,7 @@ typedef struct fib_entry_src_collect_forwarding_ctx_t_
fib_forward_chain_type_t fct;
int n_recursive_constrained;
u16 preference;
+ dpo_proto_t payload_proto;
} fib_entry_src_collect_forwarding_ctx_t;
/**
@@ -289,47 +291,6 @@ fib_entry_src_valid_out_label (mpls_label_t label)
MPLS_IETF_IMPLICIT_NULL_LABEL == label));
}
-/**
- * @brief Turn the chain type requested by the client into the one they
- * really wanted
- */
-fib_forward_chain_type_t
-fib_entry_chain_type_fixup (const fib_entry_t *entry,
- fib_forward_chain_type_t fct)
-{
- /*
- * The EOS chain is a tricky since one cannot know the adjacency
- * to link to without knowing what the packets payload protocol
- * will be once the label is popped.
- */
- fib_forward_chain_type_t dfct;
-
- if (FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct)
- {
- return (fct);
- }
-
- dfct = fib_entry_get_default_chain_type(entry);
-
- if (FIB_FORW_CHAIN_TYPE_MPLS_EOS == dfct)
- {
- /*
- * If the entry being asked is a eos-MPLS label entry,
- * then use the payload-protocol field, that we stashed there
- * for just this purpose
- */
- return (fib_forw_chain_type_from_dpo_proto(
- entry->fe_prefix.fp_payload_proto));
- }
- /*
- * else give them what this entry would be by default. i.e. if it's a v6
- * entry, then the label its local labelled should be carrying v6 traffic.
- * If it's a non-EOS label entry, then there are more labels and we want
- * a non-eos chain.
- */
- return (dfct);
-}
-
static dpo_proto_t
fib_prefix_get_payload_proto (const fib_prefix_t *pfx)
{
@@ -371,7 +332,8 @@ fib_entry_src_get_path_forwarding (fib_node_index_t path_index,
nh->path_index = path_index;
nh->path_weight = fib_path_get_weight(path_index);
- fib_path_contribute_forwarding(path_index, ctx->fct, &nh->path_dpo);
+ fib_path_contribute_forwarding(path_index, ctx->fct,
+ ctx->payload_proto, &nh->path_dpo);
break;
case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
@@ -384,6 +346,7 @@ fib_entry_src_get_path_forwarding (fib_node_index_t path_index,
nh->path_weight = fib_path_get_weight(path_index);
fib_path_contribute_forwarding(path_index,
FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS,
+ ctx->payload_proto,
&nh->path_dpo);
}
break;
@@ -397,11 +360,11 @@ fib_entry_src_get_path_forwarding (fib_node_index_t path_index,
nh->path_index = path_index;
nh->path_weight = fib_path_get_weight(path_index);
fib_path_contribute_forwarding(path_index,
- fib_entry_chain_type_fixup(ctx->fib_entry,
- ctx->fct),
+ ctx->fct,
+ ctx->payload_proto,
&nh->path_dpo);
fib_path_stack_mpls_disp(path_index,
- fib_prefix_get_payload_proto(&ctx->fib_entry->fe_prefix),
+ ctx->payload_proto,
FIB_MPLS_LSP_MODE_PIPE,
&nh->path_dpo);
@@ -480,9 +443,8 @@ fib_entry_src_collect_forwarding (fib_node_index_t pl_index,
*/
ctx->next_hops =
fib_path_ext_stack(path_ext,
+ ctx->payload_proto,
ctx->fct,
- fib_entry_chain_type_fixup(ctx->fib_entry,
- ctx->fct),
ctx->next_hops);
}
else
@@ -609,6 +571,7 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry,
.preference = 0xffff,
.start_source_index = start,
.end_source_index = end,
+ .payload_proto = fib_prefix_get_payload_proto(&fib_entry->fe_prefix),
};
/*
@@ -794,6 +757,7 @@ fib_entry_src_action_uninstall (fib_entry_t *fib_entry)
&fib_entry->fe_prefix,
&fib_entry->fe_lb);
+ vlib_worker_wait_one_loop();
dpo_reset(&fib_entry->fe_lb);
}
}
@@ -1493,7 +1457,7 @@ fib_entry_src_action_remove (fib_entry_t *fib_entry,
* Return true the the route is attached via an interface that
* is not in the same table as the route
*/
-static inline int
+static int
fib_route_attached_cross_table (const fib_entry_t *fib_entry,
const fib_route_path_t *rpath)
{
@@ -1508,7 +1472,7 @@ fib_route_attached_cross_table (const fib_entry_t *fib_entry,
/* Ignore link local addresses these also can't be imported/exported */
if (ip6_address_is_link_local_unicast (&pfx->fp_addr.ip6))
{
- return (!0);
+ return (0);
}
break;
case FIB_PROTOCOL_IP4:
@@ -1834,6 +1798,25 @@ fib_entry_get_dpo_for_source (fib_node_index_t fib_entry_index,
return (0);
}
+fib_node_index_t
+fib_entry_get_path_list_for_source (fib_node_index_t fib_entry_index,
+ fib_source_t source)
+{
+ fib_entry_t *fib_entry;
+ fib_entry_src_t *esrc;
+
+ if (FIB_NODE_INDEX_INVALID == fib_entry_index)
+ return FIB_NODE_INDEX_INVALID;
+
+ fib_entry = fib_entry_get(fib_entry_index);
+ esrc = fib_entry_src_find(fib_entry, source);
+
+ if (esrc)
+ return esrc->fes_pl;
+
+ return FIB_NODE_INDEX_INVALID;
+}
+
u32
fib_entry_get_resolving_interface_for_source (fib_node_index_t entry_index,
fib_source_t source)
diff --git a/src/vnet/fib/fib_entry_src.h b/src/vnet/fib/fib_entry_src.h
index ced6b5c42fc..1f348baeacb 100644
--- a/src/vnet/fib/fib_entry_src.h
+++ b/src/vnet/fib/fib_entry_src.h
@@ -326,9 +326,6 @@ extern fib_entry_flag_t fib_entry_get_flags_i(const fib_entry_t *fib_entry);
extern fib_path_list_flags_t fib_entry_src_flags_2_path_list_flags(
fib_entry_flag_t eflags);
-extern fib_forward_chain_type_t fib_entry_chain_type_fixup(const fib_entry_t *entry,
- fib_forward_chain_type_t fct);
-
extern void fib_entry_src_mk_lb (fib_entry_t *fib_entry,
fib_source_t source,
fib_forward_chain_type_t fct,
diff --git a/src/vnet/fib/fib_entry_src_interface.c b/src/vnet/fib/fib_entry_src_interface.c
index 402369d1dfc..c5028dc8798 100644
--- a/src/vnet/fib/fib_entry_src_interface.c
+++ b/src/vnet/fib/fib_entry_src_interface.c
@@ -87,8 +87,16 @@ fib_entry_src_interface_update_glean (fib_entry_t *cover,
if (fib_prefix_is_cover(&adj->sub_type.glean.rx_pfx,
&local->fe_prefix))
{
- adj->sub_type.glean.rx_pfx.fp_addr = local->fe_prefix.fp_addr;
- return (1);
+ fib_entry_src_t *local_src;
+
+ local_src = fib_entry_src_find (local, FIB_SOURCE_INTERFACE);
+ if (local_src != NULL)
+ {
+ adj->sub_type.glean.rx_pfx.fp_addr =
+ local->fe_prefix.fp_addr;
+ local_src->fes_flags |= FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN;
+ return (1);
+ }
}
}
}
@@ -116,6 +124,52 @@ fib_entry_src_interface_path_swap (fib_entry_src_t *src,
src->fes_pl = fib_path_list_create(pl_flags, paths);
}
+typedef struct fesi_find_glean_ctx_t_ {
+ fib_node_index_t glean_node_index;
+} fesi_find_glean_ctx_t;
+
+static walk_rc_t
+fib_entry_src_interface_find_glean_walk (fib_entry_t *cover,
+ fib_node_index_t covered,
+ void *ctx)
+{
+ fesi_find_glean_ctx_t *find_glean_ctx = ctx;
+ fib_entry_t *covered_entry;
+ fib_entry_src_t *covered_src;
+
+ covered_entry = fib_entry_get (covered);
+ covered_src = fib_entry_src_find (covered_entry, FIB_SOURCE_INTERFACE);
+ if ((covered_src != NULL) &&
+ (covered_src->fes_flags & FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN))
+ {
+ find_glean_ctx->glean_node_index = covered;
+ return WALK_STOP;
+ }
+
+ return WALK_CONTINUE;
+}
+
+static fib_entry_t *
+fib_entry_src_interface_find_glean (fib_entry_t *cover)
+{
+ fib_entry_src_t *src;
+
+ src = fib_entry_src_find (cover, FIB_SOURCE_INTERFACE);
+ if (src == NULL)
+ /* the cover is not an interface source */
+ return NULL;
+
+ fesi_find_glean_ctx_t ctx = {
+ .glean_node_index = ~0,
+ };
+
+ fib_entry_cover_walk (cover, fib_entry_src_interface_find_glean_walk,
+ &ctx);
+
+ return (ctx.glean_node_index == ~0) ? NULL :
+ fib_entry_get (ctx.glean_node_index);
+}
+
/*
* Source activate.
* Called when the source is teh new longer best source on the entry
@@ -128,6 +182,8 @@ fib_entry_src_interface_activate (fib_entry_src_t *src,
if (FIB_ENTRY_FLAG_LOCAL & src->fes_entry_flags)
{
+ u8 update_glean;
+
/*
* Track the covering attached/connected cover. This is so that
* during an attached export of the cover, this local prefix is
@@ -141,10 +197,17 @@ fib_entry_src_interface_activate (fib_entry_src_t *src,
cover = fib_entry_get(src->u.interface.fesi_cover);
+ /*
+ * Before adding as a child of the cover, check whether an existing
+ * child has already been used to populate the glean adjacency. If so,
+ * we don't need to update the adjacency.
+ */
+ update_glean = (fib_entry_src_interface_find_glean (cover) == NULL);
src->u.interface.fesi_sibling =
fib_entry_cover_track(cover, fib_entry_get_index(fib_entry));
- fib_entry_src_interface_update_glean(cover, fib_entry);
+ if (update_glean)
+ fib_entry_src_interface_update_glean(cover, fib_entry);
}
return (!0);
@@ -167,15 +230,19 @@ fib_entry_src_interface_deactivate (fib_entry_src_t *src,
if (FIB_NODE_INDEX_INVALID != src->u.interface.fesi_cover)
{
cover = fib_entry_get(src->u.interface.fesi_cover);
-
fib_entry_cover_untrack(cover, src->u.interface.fesi_sibling);
src->u.interface.fesi_cover = FIB_NODE_INDEX_INVALID;
src->u.interface.fesi_sibling = ~0;
- fib_entry_cover_walk(cover,
- fib_entry_src_interface_update_glean_walk,
- NULL);
+ /* If this was the glean address, find a new one */
+ if (src->fes_flags & FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN)
+ {
+ fib_entry_cover_walk(cover,
+ fib_entry_src_interface_update_glean_walk,
+ NULL);
+ src->fes_flags &= ~FIB_ENTRY_SRC_FLAG_PROVIDES_GLEAN;
+ }
}
}
diff --git a/src/vnet/fib/fib_node.c b/src/vnet/fib/fib_node.c
index 1d3abd50a9d..e668c4fc51f 100644
--- a/src/vnet/fib/fib_node.c
+++ b/src/vnet/fib/fib_node.c
@@ -31,23 +31,20 @@ static fib_node_type_t last_new_type = FIB_NODE_TYPE_LAST;
/*
* the node type names
*/
-static const char *fn_type_names[] = FIB_NODE_TYPES;
+static const char *fn_type_builtin_names[] = FIB_NODE_TYPES;
+static const char **fn_type_names;
const char*
fib_node_type_get_name (fib_node_type_t type)
{
- if (type < FIB_NODE_TYPE_LAST)
- return (fn_type_names[type]);
+ if ((type < vec_len(fn_type_names)) &&
+ (NULL != fn_type_names[type]))
+ {
+ return (fn_type_names[type]);
+ }
else
{
- if (NULL != fn_vfts[type].fnv_format)
- {
- return ("fixme");
- }
- else
- {
- return ("unknown");
- }
+ return ("unknown");
}
}
@@ -56,9 +53,10 @@ fib_node_type_get_name (fib_node_type_t type)
*
* Register the function table for a given type
*/
-void
-fib_node_register_type (fib_node_type_t type,
- const fib_node_vft_t *vft)
+static void
+fib_node_register_type_i (fib_node_type_t type,
+ const char *name,
+ const fib_node_vft_t *vft)
{
/*
* assert that one only registration is made per-node type
@@ -74,16 +72,31 @@ fib_node_register_type (fib_node_type_t type,
vec_validate(fn_vfts, type);
fn_vfts[type] = *vft;
+ vec_validate(fn_type_names, type);
+ fn_type_names[type] = name;
+}
+
+/**
+ * fib_node_register_type
+ *
+ * Register the function table for a given type
+ */
+void
+fib_node_register_type (fib_node_type_t type,
+ const fib_node_vft_t *vft)
+{
+ fib_node_register_type_i(type, fn_type_builtin_names[type], vft);
}
fib_node_type_t
-fib_node_register_new_type (const fib_node_vft_t *vft)
+fib_node_register_new_type (const char *name,
+ const fib_node_vft_t *vft)
{
fib_node_type_t new_type;
new_type = ++last_new_type;
- fib_node_register_type(new_type, vft);
+ fib_node_register_type_i(new_type, name, vft);
return (new_type);
}
@@ -255,7 +268,6 @@ fib_memory_show (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
/*?
* The '<em>sh fib memory </em>' command displays the memory usage for each
* FIB object type.
@@ -288,4 +300,3 @@ VLIB_CLI_COMMAND (show_fib_memory, static) = {
.function = fib_memory_show,
.short_help = "show fib memory",
};
-/* *INDENT-ON* */
diff --git a/src/vnet/fib/fib_node.h b/src/vnet/fib/fib_node.h
index 27e67b11c87..6639c39bcd2 100644
--- a/src/vnet/fib/fib_node.h
+++ b/src/vnet/fib/fib_node.h
@@ -53,8 +53,7 @@ typedef enum fib_node_type_t_ {
/**
* Marker. New types before this one. leave the test last.
*/
- FIB_NODE_TYPE_TEST,
- FIB_NODE_TYPE_LAST = FIB_NODE_TYPE_TEST,
+ FIB_NODE_TYPE_LAST = FIB_NODE_TYPE_ENTRY_TRACK,
} __attribute__ ((packed)) fib_node_type_t;
#define FIB_NODE_TYPE_MAX (FIB_NODE_TYPE_LAST + 1)
@@ -110,6 +109,10 @@ typedef enum fib_node_back_walk_reason_t_ {
*/
FIB_NODE_BW_REASON_INTERFACE_DOWN,
/**
+ * A resolving interface has been bound to another table
+ */
+ FIB_NODE_BW_REASON_INTERFACE_BIND,
+ /**
* A resolving interface has been deleted.
*/
FIB_NODE_BW_REASON_INTERFACE_DELETE,
@@ -138,6 +141,7 @@ typedef enum fib_node_back_walk_reason_t_ {
[FIB_NODE_BW_REASON_INTERFACE_UP] = "if-up", \
[FIB_NODE_BW_REASON_INTERFACE_DOWN] = "if-down", \
[FIB_NODE_BW_REASON_INTERFACE_DELETE] = "if-delete", \
+ [FIB_NODE_BW_REASON_INTERFACE_BIND] = "if-bind", \
[FIB_NODE_BW_REASON_ADJ_UPDATE] = "adj-update", \
[FIB_NODE_BW_REASON_ADJ_MTU] = "adj-mtu", \
[FIB_NODE_BW_REASON_ADJ_DOWN] = "adj-down", \
@@ -157,14 +161,15 @@ typedef enum fib_node_bw_reason_flag_t_ {
FIB_NODE_BW_REASON_FLAG_EVALUATE = (1 << FIB_NODE_BW_REASON_EVALUATE),
FIB_NODE_BW_REASON_FLAG_INTERFACE_UP = (1 << FIB_NODE_BW_REASON_INTERFACE_UP),
FIB_NODE_BW_REASON_FLAG_INTERFACE_DOWN = (1 << FIB_NODE_BW_REASON_INTERFACE_DOWN),
+ FIB_NODE_BW_REASON_FLAG_INTERFACE_BIND = (1 << FIB_NODE_BW_REASON_INTERFACE_BIND),
FIB_NODE_BW_REASON_FLAG_INTERFACE_DELETE = (1 << FIB_NODE_BW_REASON_INTERFACE_DELETE),
FIB_NODE_BW_REASON_FLAG_ADJ_UPDATE = (1 << FIB_NODE_BW_REASON_ADJ_UPDATE),
FIB_NODE_BW_REASON_FLAG_ADJ_MTU = (1 << FIB_NODE_BW_REASON_ADJ_MTU),
FIB_NODE_BW_REASON_FLAG_ADJ_DOWN = (1 << FIB_NODE_BW_REASON_ADJ_DOWN),
} __attribute__ ((packed)) fib_node_bw_reason_flag_t;
-STATIC_ASSERT(sizeof(fib_node_bw_reason_flag_t) < 2,
- "BW Reason enum < 2 byte. Consequences for cover_upd_res_t");
+STATIC_ASSERT(sizeof(fib_node_bw_reason_flag_t) < 3,
+ "BW Reason enum < 2 byte. Consequences for fib_entry_src_cover_res_t");
extern u8 *format_fib_node_bw_reason(u8 *s, va_list *args);
@@ -229,6 +234,17 @@ typedef struct fib_node_back_walk_ctx_t_ {
* in the graph.
*/
u32 fnbw_depth;
+
+ /**
+ * Additional data associated with the reason the walk is occuring
+ */
+ union
+ {
+ struct {
+ u32 fnbw_from_fib_index;
+ u32 fnbw_to_fib_index;
+ } interface_bind;
+ };
} fib_node_back_walk_ctx_t;
/**
@@ -289,7 +305,6 @@ typedef struct fib_node_vft_t_ {
fib_node_get_t fnv_get;
fib_node_last_lock_gone_t fnv_last_lock;
fib_node_back_walk_t fnv_back_walk;
- format_function_t *fnv_format;
fib_node_memory_show_t fnv_mem_show;
} fib_node_vft_t;
@@ -340,12 +355,13 @@ extern void fib_node_register_type (fib_node_type_t ft,
* @brief
* Create a new FIB node type and Register the function table for it.
*
- * @param vft
- * virtual function table
+ * @param name Name of the type (as display when printing children)
+ * @param vft virtual function table
*
* @return new FIB node type
*/
-extern fib_node_type_t fib_node_register_new_type (const fib_node_vft_t *vft);
+extern fib_node_type_t fib_node_register_new_type (const char *name,
+ const fib_node_vft_t *vft);
/**
* @brief Show the memory usage for a type
diff --git a/src/vnet/fib/fib_path.c b/src/vnet/fib/fib_path.c
index 209cf403c6e..95e7cb6ba7d 100644
--- a/src/vnet/fib/fib_path.c
+++ b/src/vnet/fib/fib_path.c
@@ -501,11 +501,9 @@ format_fib_path (u8 * s, va_list * args)
else
{
s = format (s, " %U",
- format_vnet_sw_interface_name,
+ format_vnet_sw_if_index_name,
vnm,
- vnet_get_sw_interface(
- vnm,
- path->attached_next_hop.fp_interface));
+ path->attached_next_hop.fp_interface);
if (vnet_sw_interface_is_p2p(vnet_get_main(),
path->attached_next_hop.fp_interface))
{
@@ -532,11 +530,8 @@ format_fib_path (u8 * s, va_list * args)
else
{
s = format (s, " %U",
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(
- vnm,
- path->attached.fp_interface));
+ format_vnet_sw_if_index_name,
+ vnm, path->attached.fp_interface);
}
break;
case FIB_PATH_TYPE_RECURSIVE:
@@ -587,11 +582,8 @@ format_fib_path (u8 * s, va_list * args)
break;
case FIB_PATH_TYPE_DVR:
s = format (s, " %U",
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(
- vnm,
- path->dvr.fp_interface));
+ format_vnet_sw_if_index_name,
+ vnm, path->dvr.fp_interface);
break;
case FIB_PATH_TYPE_DEAG:
s = format (s, " %sfib-index:%d",
@@ -1161,6 +1153,11 @@ FIXME comment
fib_path_unresolve(path);
path->fp_oper_flags |= FIB_PATH_OPER_FLAG_DROP;
}
+ if (FIB_NODE_BW_REASON_FLAG_INTERFACE_BIND & ctx->fnbw_reason)
+ {
+ /* bind walks should appear here and pass silently up to
+ * to the fib_entry */
+ }
break;
case FIB_PATH_TYPE_UDP_ENCAP:
{
@@ -1360,7 +1357,8 @@ fib_path_create (fib_node_index_t pl_index,
dpo_copy(&path->exclusive.fp_ex_dpo, &rpath->dpo);
}
else if ((path->fp_cfg_flags & FIB_PATH_CFG_FLAG_ICMP_PROHIBIT) ||
- (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_ICMP_UNREACH))
+ (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_ICMP_UNREACH) ||
+ (path->fp_cfg_flags & FIB_PATH_CFG_FLAG_DROP))
{
path->fp_type = FIB_PATH_TYPE_SPECIAL;
}
@@ -1502,6 +1500,12 @@ fib_path_copy (fib_node_index_t path_index,
clib_memset(&path->fp_dpo, 0, sizeof(path->fp_dpo));
dpo_reset(&path->fp_dpo);
+ if (path->fp_type == FIB_PATH_TYPE_EXCLUSIVE)
+ {
+ clib_memset(&path->exclusive.fp_ex_dpo, 0, sizeof(dpo_id_t));
+ dpo_copy(&path->exclusive.fp_ex_dpo, &orig_path->exclusive.fp_ex_dpo);
+ }
+
return (fib_path_get_index(path));
}
@@ -1987,7 +1991,11 @@ fib_path_resolve (fib_node_index_t path_index)
}
else
{
- fib_prefix_from_ip46_addr(&path->recursive.fp_nh.fp_ip, &pfx);
+ ASSERT(!ip46_address_is_zero(&path->recursive.fp_nh.fp_ip));
+
+ fib_protocol_t fp = (ip46_address_is_ip4(&path->recursive.fp_nh.fp_ip) ?
+ FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6);
+ fib_prefix_from_ip46_addr(fp, &path->recursive.fp_nh.fp_ip, &pfx);
}
fib_table_lock(path->recursive.fp_tbl_id,
@@ -2416,6 +2424,7 @@ fib_path_stack_mpls_disp (fib_node_index_t path_index,
void
fib_path_contribute_forwarding (fib_node_index_t path_index,
fib_forward_chain_type_t fct,
+ dpo_proto_t payload_proto,
dpo_id_t *dpo)
{
fib_path_t *path;
@@ -2423,7 +2432,6 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
path = fib_path_get(path_index);
ASSERT(path);
- ASSERT(FIB_FORW_CHAIN_TYPE_MPLS_EOS != fct);
/*
* The DPO stored in the path was created when the path was resolved.
@@ -2441,9 +2449,19 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
case FIB_PATH_TYPE_ATTACHED_NEXT_HOP:
switch (fct)
{
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS: {
+ dpo_id_t tmp = DPO_INVALID;
+ dpo_copy (&tmp, dpo);
+ path = fib_path_attached_next_hop_get_adj(
+ path,
+ dpo_proto_to_link(payload_proto),
+ &tmp);
+ dpo_copy (dpo, &tmp);
+ dpo_reset(&tmp);
+ break;
+ }
case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
- case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
case FIB_FORW_CHAIN_TYPE_ETHERNET:
case FIB_FORW_CHAIN_TYPE_NSH:
@@ -2555,10 +2573,25 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
case FIB_PATH_TYPE_ATTACHED:
switch (fct)
{
+ case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
+ /*
+ * End of stack traffic via an attacehd path (a glean)
+ * must forace an IP lookup so that the IP packet can
+ * match against any installed adj-fibs
+ */
+ lookup_dpo_add_or_lock_w_fib_index(
+ fib_table_get_index_for_sw_if_index(
+ dpo_proto_to_fib(payload_proto),
+ path->attached.fp_interface),
+ payload_proto,
+ LOOKUP_UNICAST,
+ LOOKUP_INPUT_DST_ADDR,
+ LOOKUP_TABLE_FROM_CONFIG,
+ dpo);
+ break;
case FIB_FORW_CHAIN_TYPE_MPLS_NON_EOS:
case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
case FIB_FORW_CHAIN_TYPE_UNICAST_IP6:
- case FIB_FORW_CHAIN_TYPE_MPLS_EOS:
case FIB_FORW_CHAIN_TYPE_ETHERNET:
case FIB_FORW_CHAIN_TYPE_NSH:
case FIB_FORW_CHAIN_TYPE_BIER:
@@ -2604,8 +2637,8 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
/*
* Create the adj needed for sending IP multicast traffic
*/
- interface_rx_dpo_add_or_lock(fib_forw_chain_type_to_dpo_proto(fct),
- path->attached.fp_interface,
+ interface_rx_dpo_add_or_lock(payload_proto,
+ path->intf_rx.fp_interface,
dpo);
break;
case FIB_PATH_TYPE_UDP_ENCAP:
@@ -2625,6 +2658,7 @@ fib_path_contribute_forwarding (fib_node_index_t path_index,
load_balance_path_t *
fib_path_append_nh_for_multipath_hash (fib_node_index_t path_index,
fib_forward_chain_type_t fct,
+ dpo_proto_t payload_proto,
load_balance_path_t *hash_key)
{
load_balance_path_t *mnh;
@@ -2641,7 +2675,7 @@ fib_path_append_nh_for_multipath_hash (fib_node_index_t path_index,
if (fib_path_is_resolved(path_index))
{
- fib_path_contribute_forwarding(path_index, fct, &mnh->path_dpo);
+ fib_path_contribute_forwarding(path_index, fct, payload_proto, &mnh->path_dpo);
}
else
{
diff --git a/src/vnet/fib/fib_path.h b/src/vnet/fib/fib_path.h
index c0f76411390..f3442c23dd6 100644
--- a/src/vnet/fib/fib_path.h
+++ b/src/vnet/fib/fib_path.h
@@ -191,6 +191,7 @@ extern uword fib_path_hash(fib_node_index_t path_index);
extern load_balance_path_t * fib_path_append_nh_for_multipath_hash(
fib_node_index_t path_index,
fib_forward_chain_type_t fct,
+ dpo_proto_t payload_proto,
load_balance_path_t *hash_key);
extern void fib_path_stack_mpls_disp(fib_node_index_t path_index,
dpo_proto_t payload_proto,
@@ -198,6 +199,7 @@ extern void fib_path_stack_mpls_disp(fib_node_index_t path_index,
dpo_id_t *dpo);
extern void fib_path_contribute_forwarding(fib_node_index_t path_index,
fib_forward_chain_type_t type,
+ dpo_proto_t payload_proto,
dpo_id_t *dpo);
extern void fib_path_contribute_urpf(fib_node_index_t path_index,
index_t urpf);
diff --git a/src/vnet/fib/fib_path_ext.c b/src/vnet/fib/fib_path_ext.c
index 209b6273a85..f5611f92271 100644
--- a/src/vnet/fib/fib_path_ext.c
+++ b/src/vnet/fib/fib_path_ext.c
@@ -163,8 +163,8 @@ fib_path_ext_mpls_flags_to_mpls_label (fib_path_ext_mpls_flags_t fpe_flags)
load_balance_path_t *
fib_path_ext_stack (fib_path_ext_t *path_ext,
+ dpo_proto_t payload_proto,
fib_forward_chain_type_t child_fct,
- fib_forward_chain_type_t imp_null_fct,
load_balance_path_t *nhs)
{
fib_forward_chain_type_t parent_fct;
@@ -189,7 +189,7 @@ fib_path_ext_stack (fib_path_ext_t *path_ext,
*/
if (fib_path_ext_is_imp_null(path_ext))
{
- parent_fct = imp_null_fct;
+ parent_fct = fib_forw_chain_type_from_dpo_proto(payload_proto);
}
else
{
@@ -240,6 +240,7 @@ fib_path_ext_stack (fib_path_ext_t *path_ext,
*/
fib_path_contribute_forwarding(path_ext->fpe_path_index,
parent_fct,
+ payload_proto,
&via_dpo);
if (dpo_is_drop(&via_dpo) ||
diff --git a/src/vnet/fib/fib_path_ext.h b/src/vnet/fib/fib_path_ext.h
index b49fd977a20..2850a588608 100644
--- a/src/vnet/fib/fib_path_ext.h
+++ b/src/vnet/fib/fib_path_ext.h
@@ -141,8 +141,8 @@ extern void fib_path_ext_resolve(fib_path_ext_t *path_ext,
fib_node_index_t path_list_index);
extern load_balance_path_t *fib_path_ext_stack(fib_path_ext_t *path_ext,
+ dpo_proto_t payload_proto,
fib_forward_chain_type_t fct,
- fib_forward_chain_type_t imp_null_fct,
load_balance_path_t *nhs);
extern fib_path_ext_t * fib_path_ext_list_push_back (fib_path_ext_list_t *list,
diff --git a/src/vnet/fib/fib_path_list.c b/src/vnet/fib/fib_path_list.c
index 15d480cb43d..ebd2c0e9be1 100644
--- a/src/vnet/fib/fib_path_list.c
+++ b/src/vnet/fib/fib_path_list.c
@@ -378,8 +378,10 @@ fib_path_list_mk_lb (fib_path_list_t *path_list,
if ((flags & FIB_PATH_LIST_FWD_FLAG_STICKY) ||
fib_path_is_resolved(*path_index))
{
- nhs = fib_path_append_nh_for_multipath_hash(*path_index,
- fct, nhs);
+ nhs = fib_path_append_nh_for_multipath_hash(
+ *path_index, fct,
+ fib_forw_chain_type_to_dpo_proto(fct),
+ nhs);
}
}
@@ -962,8 +964,7 @@ fib_path_list_copy_and_path_add (fib_node_index_t orig_path_list_index,
}
if (duplicate)
{
- _vec_len(path_list->fpl_paths) =
- vec_len(path_list->fpl_paths) - 1;
+ vec_set_len(path_list->fpl_paths, vec_len(path_list->fpl_paths) - 1);
fib_path_destroy(new_path_index);
}
else
@@ -1297,15 +1298,8 @@ fib_path_list_child_add (fib_node_index_t path_list_index,
fib_node_type_t child_type,
fib_node_index_t child_index)
{
- u32 sibling;
-
- sibling = fib_node_child_add(FIB_NODE_TYPE_PATH_LIST,
- path_list_index,
- child_type,
- child_index);
-
- if (FIB_PATH_LIST_POPULAR == fib_node_get_n_children(FIB_NODE_TYPE_PATH_LIST,
- path_list_index))
+ if (FIB_PATH_LIST_POPULAR - 1 == fib_node_get_n_children(FIB_NODE_TYPE_PATH_LIST,
+ path_list_index))
{
/*
* Set the popular flag on the path-list once we pass the magic
@@ -1328,7 +1322,10 @@ fib_path_list_child_add (fib_node_index_t path_list_index,
fib_walk_sync(FIB_NODE_TYPE_PATH_LIST, path_list_index, &ctx);
}
- return (sibling);
+ return (fib_node_child_add(FIB_NODE_TYPE_PATH_LIST,
+ path_list_index,
+ child_type,
+ child_index));
}
void
diff --git a/src/vnet/fib/fib_sas.c b/src/vnet/fib/fib_sas.c
index b607a0b5be8..c9d469379f1 100644
--- a/src/vnet/fib/fib_sas.c
+++ b/src/vnet/fib/fib_sas.c
@@ -61,6 +61,18 @@ fib_sas4_get (u32 sw_if_index,
d_tmp.ip4 = *dst;
}
+ if (vnet_sw_interface_is_p2p(vnet_get_main(), sw_if_index))
+ {
+ ip4_address_t *ip4;
+ ip4 = ip_interface_get_first_ip (sw_if_index, 1);
+ if (ip4) {
+ src->as_u32 = ip4->as_u32;
+ return (true);
+ } else {
+ return (false);
+ }
+ }
+
/*
* If the interface is unnumbered then use the IP interface
*/
@@ -100,12 +112,29 @@ fib_sas6_get (u32 sw_if_index,
/*
* if the dst is v6 and link local, use the source link local
*/
- if (ip6_address_is_link_local_unicast (dst))
+ if (dst && ip6_address_is_link_local_unicast (dst))
{
- ip6_address_copy (src, ip6_get_link_local_address (sw_if_index));
+ const ip6_address_t *ll = ip6_get_link_local_address (sw_if_index);
+ if (NULL == ll)
+ {
+ return (false);
+ }
+ ip6_address_copy (src, ll);
return (true);
}
+ if (vnet_sw_interface_is_p2p(vnet_get_main(), sw_if_index))
+ {
+ ip6_address_t *ip6;
+ ip6 = ip_interface_get_first_ip (sw_if_index, 0);
+ if (ip6) {
+ ip6_address_copy(src, ip6);
+ return (true);
+ } else {
+ return (false);
+ }
+ }
+
/*
* get the source address from the glean adjacency
*/
diff --git a/src/vnet/fib/fib_table.c b/src/vnet/fib/fib_table.c
index 7cc989d6324..b2a32d0da56 100644
--- a/src/vnet/fib/fib_table.c
+++ b/src/vnet/fib/fib_table.c
@@ -25,6 +25,13 @@
const static char * fib_table_flags_strings[] = FIB_TABLE_ATTRIBUTES;
+/*
+ * Default names for IP4, IP6, and MPLS FIB table index 0.
+ * Nominally like "ipv6-VRF:0", but this will override that name if set
+ * in a config section of the startup.conf file.
+ */
+char *fib_table_default_names[FIB_PROTOCOL_MAX];
+
fib_table_t *
fib_table_get (fib_node_index_t index,
fib_protocol_t proto)
@@ -534,7 +541,11 @@ fib_table_route_path_fixup (const fib_prefix_t *prefix,
else if (fib_route_path_is_attached(path))
{
path->frp_flags |= FIB_ROUTE_PATH_GLEAN;
- fib_prefix_normalize(prefix, &path->frp_connected);
+ /*
+ * attached prefixes are not suitable as the source of ARP requests
+ * so don't save the prefix in the glean adj
+ */
+ clib_memset(&path->frp_connected, 0, sizeof(path->frp_connected));
}
if (*eflags & FIB_ENTRY_FLAG_DROP)
{
@@ -1149,21 +1160,29 @@ fib_table_find_or_create_and_lock_i (fib_protocol_t proto,
fib_table = fib_table_get(fi, proto);
- if (NULL == fib_table->ft_desc)
+ if (fib_table->ft_desc)
+ return fi;
+
+ if (name && name[0])
{
- if (name && name[0])
- {
- fib_table->ft_desc = format(NULL, "%s", name);
- }
- else
- {
- fib_table->ft_desc = format(NULL, "%U-VRF:%d",
- format_fib_protocol, proto,
- table_id);
- }
+ fib_table->ft_desc = format(NULL, "%s", name);
+ return fi;
}
- return (fi);
+ if (table_id == 0)
+ {
+ char *default_name = fib_table_default_names[proto];
+ if (default_name && default_name[0])
+ {
+ fib_table->ft_desc = format(NULL, "%s", default_name);
+ return fi;
+ }
+ }
+
+ fib_table->ft_desc = format(NULL, "%U-VRF:%d",
+ format_fib_protocol, proto,
+ table_id);
+ return fi;
}
u32
@@ -1322,6 +1341,7 @@ fib_table_lock_dec (fib_table_t *fib_table,
{
vec_validate(fib_table->ft_locks, source);
+ ASSERT(fib_table->ft_locks[source] > 0);
fib_table->ft_locks[source]--;
fib_table->ft_total_locks--;
}
@@ -1337,6 +1357,36 @@ fib_table_lock_inc (fib_table_t *fib_table,
fib_table->ft_total_locks++;
}
+
+static void
+fib_table_lock_clear (fib_table_t *fib_table,
+ fib_source_t source)
+{
+ vec_validate(fib_table->ft_locks, source);
+
+ ASSERT(fib_table->ft_locks[source] <= 1);
+ if (fib_table->ft_locks[source])
+ {
+ fib_table->ft_locks[source]--;
+ fib_table->ft_total_locks--;
+ }
+}
+
+static void
+fib_table_lock_set (fib_table_t *fib_table,
+ fib_source_t source)
+{
+ vec_validate(fib_table->ft_locks, source);
+
+ ASSERT(fib_table->ft_locks[source] <= 1);
+ ASSERT(fib_table->ft_total_locks < (0xffffffff - 1));
+ if (!fib_table->ft_locks[source])
+ {
+ fib_table->ft_locks[source]++;
+ fib_table->ft_total_locks++;
+ }
+}
+
void
fib_table_unlock (u32 fib_index,
fib_protocol_t proto,
@@ -1345,12 +1395,16 @@ fib_table_unlock (u32 fib_index,
fib_table_t *fib_table;
fib_table = fib_table_get(fib_index, proto);
- fib_table_lock_dec(fib_table, source);
+
+ if (source == FIB_SOURCE_API || source == FIB_SOURCE_CLI)
+ fib_table_lock_clear(fib_table, source);
+ else
+ fib_table_lock_dec(fib_table, source);
if (0 == fib_table->ft_total_locks)
{
/*
- * no more locak from any source - kill it
+ * no more lock from any source - kill it
*/
fib_table_destroy(fib_table);
}
@@ -1365,7 +1419,10 @@ fib_table_lock (u32 fib_index,
fib_table = fib_table_get(fib_index, proto);
- fib_table_lock_inc(fib_table, source);
+ if (source == FIB_SOURCE_API || source == FIB_SOURCE_CLI)
+ fib_table_lock_set(fib_table, source);
+ else
+ fib_table_lock_inc(fib_table, source);
}
u32
diff --git a/src/vnet/fib/fib_table.h b/src/vnet/fib/fib_table.h
index 11137e173cf..0eaaa67eea2 100644
--- a/src/vnet/fib/fib_table.h
+++ b/src/vnet/fib/fib_table.h
@@ -122,6 +122,15 @@ typedef struct fib_table_t_
u8* ft_desc;
} fib_table_t;
+
+/**
+ * @brief
+ * Default names for IP4, IP6, and MPLS FIB table index 0.
+ * Nominally like "ipv4-VRF:0", but this will override that name if set
+ * in a config section of the startup.conf file.
+ */
+extern char *fib_table_default_names[FIB_PROTOCOL_MAX];
+
/**
* @brief
* Format the description/name of the table
diff --git a/src/vnet/fib/fib_types.api b/src/vnet/fib/fib_types.api
index c5fbcf8fc29..c0e467f6fd6 100644
--- a/src/vnet/fib/fib_types.api
+++ b/src/vnet/fib/fib_types.api
@@ -102,7 +102,7 @@ enum fib_path_type
not given)
@param weight - The weight, for UCMP
@param preference - The preference of the path. lowest preference
- is prefered
+ is preferred
@param rpf-id - For paths that pop to multicast, this the the
RPF ID the packet will be given (0 and ~0 => unset)
@param type - the path type
diff --git a/src/vnet/fib/fib_types.c b/src/vnet/fib/fib_types.c
index eab5ca22571..c4472c7122d 100644
--- a/src/vnet/fib/fib_types.c
+++ b/src/vnet/fib/fib_types.c
@@ -78,16 +78,15 @@ format_fib_mpls_label (u8 *s, va_list *ap)
}
void
-fib_prefix_from_ip46_addr (const ip46_address_t *addr,
+fib_prefix_from_ip46_addr (fib_protocol_t fproto,
+ const ip46_address_t *addr,
fib_prefix_t *pfx)
{
- ASSERT(!ip46_address_is_zero(addr));
+ ASSERT(FIB_PROTOCOL_MPLS != fproto);
- pfx->fp_proto = ((ip46_address_is_ip4(addr) ?
- FIB_PROTOCOL_IP4 :
- FIB_PROTOCOL_IP6));
- pfx->fp_len = ((ip46_address_is_ip4(addr) ?
- 32 : 128));
+ pfx->fp_proto = fproto;
+ pfx->fp_len = ((FIB_PROTOCOL_IP4 == fproto) ?
+ 32 : 128);
pfx->fp_addr = *addr;
pfx->___fp___pad = 0;
}
@@ -541,6 +540,7 @@ unformat_fib_route_path (unformat_input_t * input, va_list * args)
{
fib_route_path_t *rpath = va_arg (*args, fib_route_path_t *);
dpo_proto_t *payload_proto = va_arg (*args, void*);
+ dpo_proto_t explicit_proto = DPO_PROTO_NONE;
u32 weight, preference, udp_encap_id, fi;
mpls_label_t out_label;
vnet_main_t *vnm;
@@ -708,6 +708,13 @@ unformat_fib_route_path (unformat_input_t * input, va_list * args)
rpath->frp_proto = DPO_PROTO_IP4;
rpath->frp_flags = FIB_ROUTE_PATH_INTF_RX;
}
+ else if (unformat (input, "rx-ip6 %U",
+ unformat_vnet_sw_interface, vnm,
+ &rpath->frp_sw_if_index))
+ {
+ rpath->frp_proto = DPO_PROTO_IP6;
+ rpath->frp_flags = FIB_ROUTE_PATH_INTF_RX;
+ }
else if (unformat (input, "local"))
{
clib_memset (&rpath->frp_addr, 0, sizeof (rpath->frp_addr));
@@ -726,6 +733,14 @@ unformat_fib_route_path (unformat_input_t * input, va_list * args)
vec_add1(rpath->frp_label_stack, fml);
}
}
+ else if (unformat (input, "ip4"))
+ {
+ explicit_proto = DPO_PROTO_IP4;
+ }
+ else if (unformat (input, "ip6"))
+ {
+ explicit_proto = DPO_PROTO_IP6;
+ }
else if (unformat (input, "%U",
unformat_vnet_sw_interface, vnm,
&rpath->frp_sw_if_index))
@@ -750,6 +765,9 @@ unformat_fib_route_path (unformat_input_t * input, va_list * args)
}
}
+ if (DPO_PROTO_NONE != explicit_proto)
+ *payload_proto = rpath->frp_proto = explicit_proto;
+
return (1);
}
@@ -764,6 +782,7 @@ fib_route_path_is_attached (const fib_route_path_t *rpath)
* L3 game with these
*/
if (rpath->frp_flags & (FIB_ROUTE_PATH_DVR |
+ FIB_ROUTE_PATH_INTF_RX |
FIB_ROUTE_PATH_UDP_ENCAP))
{
return (0);
@@ -785,3 +804,56 @@ fib_route_path_is_attached (const fib_route_path_t *rpath)
}
return (0);
}
+
+static void
+fib_prefix_ip4_addr_increment (fib_prefix_t *pfx)
+{
+ /* Calculate the addend based on the host length of address */
+ u32 incr = 1ULL << (32 - pfx->fp_len);
+ ip4_address_t dst = (pfx->fp_addr).ip4;
+ dst.as_u32 = clib_host_to_net_u32 (incr + clib_net_to_host_u32 (dst.as_u32));
+ pfx->fp_addr.ip4.as_u32 = dst.as_u32;
+}
+
+static void
+fib_prefix_ip6_addr_increment (fib_prefix_t *pfx)
+{
+ /*
+ * Calculate the addend based on the host length of address
+ * and which part(lower 64 bits or higher 64 bits) it lies
+ * in
+ */
+ u32 host_len = 128 - pfx->fp_len;
+ u64 incr = 1ULL << ((host_len > 64) ? (host_len - 64) : host_len);
+ i32 bucket = (host_len < 64 ? 1 : 0);
+ ip6_address_t dst = (pfx->fp_addr).ip6;
+ u64 tmp = incr + clib_net_to_host_u64 (dst.as_u64[bucket]);
+ /* Handle overflow */
+ if (bucket && (tmp < incr))
+ {
+ dst.as_u64[1] = clib_host_to_net_u64 (tmp);
+ dst.as_u64[0] = clib_host_to_net_u64 (1ULL + clib_net_to_host_u64 (dst.as_u64[0]));
+ }
+ else
+ dst.as_u64[bucket] = clib_host_to_net_u64 (tmp);
+
+ pfx->fp_addr.ip6.as_u128 = dst.as_u128;
+}
+
+/*
+ * Increase IPv4/IPv6 address according to the prefix length
+ */
+void fib_prefix_increment (fib_prefix_t *pfx)
+{
+ switch (pfx->fp_proto)
+ {
+ case FIB_PROTOCOL_IP4:
+ fib_prefix_ip4_addr_increment (pfx);
+ break;
+ case FIB_PROTOCOL_IP6:
+ fib_prefix_ip6_addr_increment (pfx);
+ break;
+ case FIB_PROTOCOL_MPLS:
+ break;
+ }
+}
diff --git a/src/vnet/fib/fib_types.h b/src/vnet/fib/fib_types.h
index 55b404b044e..b9346c75108 100644
--- a/src/vnet/fib/fib_types.h
+++ b/src/vnet/fib/fib_types.h
@@ -276,8 +276,9 @@ extern void fib_prefix_normalize(const fib_prefix_t *p,
/**
* \brief Host prefix from ip
*/
-extern void fib_prefix_from_ip46_addr (const ip46_address_t *addr,
- fib_prefix_t *pfx);
+extern void fib_prefix_from_ip46_addr (fib_protocol_t fproto,
+ const ip46_address_t *addr,
+ fib_prefix_t *pfx);
extern u8 * format_fib_prefix(u8 * s, va_list * args);
extern u8 * format_fib_forw_chain_type(u8 * s, va_list * args);
@@ -286,6 +287,11 @@ extern dpo_proto_t fib_proto_to_dpo(fib_protocol_t fib_proto);
extern fib_protocol_t dpo_proto_to_fib(dpo_proto_t dpo_proto);
/**
+ * \brief Increase IPv4/IPv6 address according to the prefix length
+ */
+extern void fib_prefix_increment (fib_prefix_t *pfx);
+
+/**
* Convert from BIER next-hop proto to FIB proto
*/
extern fib_protocol_t bier_hdr_proto_to_fib(bier_hdr_proto_id_t bproto);
@@ -627,7 +633,7 @@ extern int fib_route_path_is_attached (const fib_route_path_t *rpath);
/**
* A help string to list the FIB path options
*/
-#define FIB_ROUTE_PATH_HELP "[next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4 <interface>] [out-labels <value value value>]"
+#define FIB_ROUTE_PATH_HELP "[next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 <interface>] [out-labels <value value value>]"
/**
* return code to control pat-hlist walk
diff --git a/src/vnet/fib/fib_urpf_list.c b/src/vnet/fib/fib_urpf_list.c
index 4f751a63627..67be6699a0e 100644
--- a/src/vnet/fib/fib_urpf_list.c
+++ b/src/vnet/fib/fib_urpf_list.c
@@ -55,8 +55,18 @@ index_t
fib_urpf_list_alloc_and_lock (void)
{
fib_urpf_list_t *urpf;
+ u8 need_barrier_sync = pool_get_will_expand (fib_urpf_list_pool);
+ vlib_main_t *vm = vlib_get_main();
+ ASSERT (vm->thread_index == 0);
+
+ if (need_barrier_sync)
+ vlib_worker_thread_barrier_sync (vm);
pool_get(fib_urpf_list_pool, urpf);
+
+ if (need_barrier_sync)
+ vlib_worker_thread_barrier_release (vm);
+
clib_memset(urpf, 0, sizeof(*urpf));
urpf->furpf_locks++;
@@ -163,7 +173,7 @@ fib_urpf_list_bake (index_t ui)
if (urpf->furpf_itfs[i] != urpf->furpf_itfs[j])
urpf->furpf_itfs[++i] = urpf->furpf_itfs[j];
/* set the length of the vector to the number of unique itfs */
- _vec_len(urpf->furpf_itfs) = i+1;
+ vec_set_len (urpf->furpf_itfs, i+1);
}
urpf->furpf_flags |= FIB_URPF_LIST_BAKED;
@@ -218,7 +228,6 @@ show_fib_urpf_list_command (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
/*?
* The '<em>sh fib uRPF [index] </em>' command displays the uRPF lists
*
@@ -236,4 +245,3 @@ VLIB_CLI_COMMAND (show_fib_urpf_list, static) = {
.function = show_fib_urpf_list_command,
.short_help = "show fib uRPF",
};
-/* *INDENT-OFF* */
diff --git a/src/vnet/fib/fib_walk.c b/src/vnet/fib/fib_walk.c
index b3b2b1e7944..236607cb891 100644
--- a/src/vnet/fib/fib_walk.c
+++ b/src/vnet/fib/fib_walk.c
@@ -611,13 +611,11 @@ fib_walk_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (fib_walk_process_node,static) = {
.function = fib_walk_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "fib-walk",
};
-/* *INDENT-ON* */
/**
* @brief Allocate a new walk object
diff --git a/src/vnet/fib/ip4_fib.c b/src/vnet/fib/ip4_fib.c
index a3010149a4d..0eff8d0d485 100644
--- a/src/vnet/fib/ip4_fib.c
+++ b/src/vnet/fib/ip4_fib.c
@@ -99,6 +99,49 @@ static const ip4_fib_table_special_prefix_t ip4_specials[] = {
}
};
+void
+ip4_fib_hash_load_specials (u32 fib_index)
+{
+ /*
+ * add the special entries into the new FIB
+ */
+ int ii;
+
+ for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++)
+ {
+ fib_prefix_t prefix = ip4_specials[ii].ift_prefix;
+
+ prefix.fp_addr.ip4.data_u32 =
+ clib_host_to_net_u32(prefix.fp_addr.ip4.data_u32);
+
+ fib_table_entry_special_add(fib_index,
+ &prefix,
+ ip4_specials[ii].ift_source,
+ ip4_specials[ii].ift_flag);
+ }
+}
+
+void
+ip4_fib_hash_flush_specials (u32 fib_index)
+{
+ int ii;
+
+ /*
+ * remove all the specials we added when the table was created.
+ * In reverse order so the default route is last.
+ */
+ for (ii = ARRAY_LEN(ip4_specials) - 1; ii >= 0; ii--)
+ {
+ fib_prefix_t prefix = ip4_specials[ii].ift_prefix;
+
+ prefix.fp_addr.ip4.data_u32 =
+ clib_host_to_net_u32(prefix.fp_addr.ip4.data_u32);
+
+ fib_table_entry_special_remove(fib_index,
+ &prefix,
+ ip4_specials[ii].ift_source);
+ }
+}
static u32
ip4_create_fib_with_table_id (u32 table_id,
@@ -110,44 +153,34 @@ ip4_create_fib_with_table_id (u32 table_id,
pool_get(ip4_main.fibs, fib_table);
clib_memset(fib_table, 0, sizeof(*fib_table));
- pool_get_aligned(ip4_main.v4_fibs, v4_fib, CLIB_CACHE_LINE_BYTES);
-
- ASSERT((fib_table - ip4_main.fibs) ==
- (v4_fib - ip4_main.v4_fibs));
+ pool_get_aligned(ip4_fibs, v4_fib, CLIB_CACHE_LINE_BYTES);
fib_table->ft_proto = FIB_PROTOCOL_IP4;
- fib_table->ft_index =
- v4_fib->index =
- (fib_table - ip4_main.fibs);
+ fib_table->ft_index = (v4_fib - ip4_fibs);
+
+ /*
+ * It is required that the index of the fib_table_t in its pool
+ * is the same as the index of the ip4_fib_t in its pool, since the
+ * rest of the code usues the 'fib_index' to mean either of these
+ * objects, depending on the context.
+ */
+ ASSERT(fib_table->ft_index == fib_table - ip4_main.fibs);
hash_set (ip4_main.fib_index_by_table_id, table_id, fib_table->ft_index);
fib_table->ft_table_id =
- v4_fib->table_id =
+ v4_fib->hash.table_id =
table_id;
fib_table->ft_flow_hash_config = IP_FLOW_HASH_DEFAULT;
fib_table_lock(fib_table->ft_index, FIB_PROTOCOL_IP4, src);
- ip4_mtrie_init(&v4_fib->mtrie);
+ ip4_fib_table_init(v4_fib);
/*
* add the special entries into the new FIB
*/
- int ii;
-
- for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++)
- {
- fib_prefix_t prefix = ip4_specials[ii].ift_prefix;
-
- prefix.fp_addr.ip4.data_u32 =
- clib_host_to_net_u32(prefix.fp_addr.ip4.data_u32);
-
- fib_table_entry_special_add(fib_table->ft_index,
- &prefix,
- ip4_specials[ii].ift_source,
- ip4_specials[ii].ift_flag);
- }
+ ip4_fib_hash_load_specials(fib_table - ip4_main.fibs);
return (fib_table->ft_index);
}
@@ -156,25 +189,14 @@ void
ip4_fib_table_destroy (u32 fib_index)
{
fib_table_t *fib_table = pool_elt_at_index(ip4_main.fibs, fib_index);
- ip4_fib_t *v4_fib = pool_elt_at_index(ip4_main.v4_fibs, fib_index);
+ ip4_fib_t *v4_fib = pool_elt_at_index(ip4_fibs, fib_table->ft_index);
u32 *n_locks;
- int ii;
/*
* remove all the specials we added when the table was created.
* In reverse order so the default route is last.
*/
- for (ii = ARRAY_LEN(ip4_specials) - 1; ii >= 0; ii--)
- {
- fib_prefix_t prefix = ip4_specials[ii].ift_prefix;
-
- prefix.fp_addr.ip4.data_u32 =
- clib_host_to_net_u32(prefix.fp_addr.ip4.data_u32);
-
- fib_table_entry_special_remove(fib_table->ft_index,
- &prefix,
- ip4_specials[ii].ift_source);
- }
+ ip4_fib_hash_flush_specials(fib_table - ip4_main.fibs);
/*
* validate no more routes.
@@ -194,10 +216,11 @@ ip4_fib_table_destroy (u32 fib_index)
hash_unset (ip4_main.fib_index_by_table_id, fib_table->ft_table_id);
}
+ vec_free (fib_table->ft_locks);
vec_free(fib_table->ft_src_route_counts);
- ip4_mtrie_free(&v4_fib->mtrie);
+ ip4_fib_table_free(v4_fib);
- pool_put(ip4_main.v4_fibs, v4_fib);
+ pool_put(ip4_fibs, v4_fib);
pool_put(ip4_main.fibs, fib_table);
}
@@ -237,268 +260,6 @@ ip4_fib_table_get_index_for_sw_if_index (u32 sw_if_index)
return (ip4_main.fib_index_by_sw_if_index[sw_if_index]);
}
-/*
- * ip4_fib_table_lookup_exact_match
- *
- * Exact match prefix lookup
- */
-fib_node_index_t
-ip4_fib_table_lookup_exact_match (const ip4_fib_t *fib,
- const ip4_address_t *addr,
- u32 len)
-{
- uword * hash, * result;
- u32 key;
-
- hash = fib->fib_entry_by_dst_address[len];
- key = (addr->data_u32 & ip4_main.fib_masks[len]);
-
- result = hash_get(hash, key);
-
- if (NULL != result) {
- return (result[0]);
- }
- return (FIB_NODE_INDEX_INVALID);
-}
-
-/*
- * ip4_fib_table_lookup_adj
- *
- * Longest prefix match
- */
-index_t
-ip4_fib_table_lookup_lb (ip4_fib_t *fib,
- const ip4_address_t *addr)
-{
- fib_node_index_t fei;
-
- fei = ip4_fib_table_lookup(fib, addr, 32);
-
- if (FIB_NODE_INDEX_INVALID != fei)
- {
- const dpo_id_t *dpo;
-
- dpo = fib_entry_contribute_ip_forwarding(fei);
-
- return (dpo->dpoi_index);
- }
- return (INDEX_INVALID);
-}
-
-/*
- * ip4_fib_table_lookup
- *
- * Longest prefix match
- */
-fib_node_index_t
-ip4_fib_table_lookup (const ip4_fib_t *fib,
- const ip4_address_t *addr,
- u32 len)
-{
- uword * hash, * result;
- i32 mask_len;
- u32 key;
-
- for (mask_len = len; mask_len >= 0; mask_len--)
- {
- hash = fib->fib_entry_by_dst_address[mask_len];
- key = (addr->data_u32 & ip4_main.fib_masks[mask_len]);
-
- result = hash_get (hash, key);
-
- if (NULL != result) {
- return (result[0]);
- }
- }
- return (FIB_NODE_INDEX_INVALID);
-}
-
-void
-ip4_fib_table_entry_insert (ip4_fib_t *fib,
- const ip4_address_t *addr,
- u32 len,
- fib_node_index_t fib_entry_index)
-{
- uword * hash, * result;
- u32 key;
-
- key = (addr->data_u32 & ip4_main.fib_masks[len]);
- hash = fib->fib_entry_by_dst_address[len];
- result = hash_get (hash, key);
-
- if (NULL == result) {
- /*
- * adding a new entry
- */
-
- if (NULL == hash) {
- hash = hash_create (32 /* elts */, sizeof (uword));
- hash_set_flags (hash, HASH_FLAG_NO_AUTO_SHRINK);
-
- }
- hash = hash_set(hash, key, fib_entry_index);
- fib->fib_entry_by_dst_address[len] = hash;
- }
- else
- {
- ASSERT(0);
- }
-}
-
-void
-ip4_fib_table_entry_remove (ip4_fib_t *fib,
- const ip4_address_t *addr,
- u32 len)
-{
- uword * hash, * result;
- u32 key;
-
- key = (addr->data_u32 & ip4_main.fib_masks[len]);
- hash = fib->fib_entry_by_dst_address[len];
- result = hash_get (hash, key);
-
- if (NULL == result)
- {
- /*
- * removing a non-existent entry. i'll allow it.
- */
- }
- else
- {
- hash_unset(hash, key);
- }
-
- fib->fib_entry_by_dst_address[len] = hash;
-}
-
-void
-ip4_fib_table_fwding_dpo_update (ip4_fib_t *fib,
- const ip4_address_t *addr,
- u32 len,
- const dpo_id_t *dpo)
-{
- ip4_fib_mtrie_route_add(&fib->mtrie, addr, len, dpo->dpoi_index);
-}
-
-void
-ip4_fib_table_fwding_dpo_remove (ip4_fib_t *fib,
- const ip4_address_t *addr,
- u32 len,
- const dpo_id_t *dpo,
- u32 cover_index)
-{
- const fib_prefix_t *cover_prefix;
- const dpo_id_t *cover_dpo;
-
- /*
- * We need to pass the MTRIE the LB index and address length of the
- * covering prefix, so it can fill the plys with the correct replacement
- * for the entry being removed
- */
- cover_prefix = fib_entry_get_prefix(cover_index);
- cover_dpo = fib_entry_contribute_ip_forwarding(cover_index);
-
- ip4_fib_mtrie_route_del(&fib->mtrie,
- addr, len, dpo->dpoi_index,
- cover_prefix->fp_len,
- cover_dpo->dpoi_index);
-}
-
-void
-ip4_fib_table_walk (ip4_fib_t *fib,
- fib_table_walk_fn_t fn,
- void *ctx)
-{
- fib_prefix_t root = {
- .fp_proto = FIB_PROTOCOL_IP4,
- // address and length default to all 0
- };
-
- /*
- * A full tree walk is the dengenerate case of a sub-tree from
- * the very root
- */
- return (ip4_fib_table_sub_tree_walk(fib, &root, fn, ctx));
-}
-
-void
-ip4_fib_table_sub_tree_walk (ip4_fib_t *fib,
- const fib_prefix_t *root,
- fib_table_walk_fn_t fn,
- void *ctx)
-{
- fib_prefix_t *sub_trees = NULL;
- int i;
-
- /*
- * There is no efficient way to walk this array of hash tables.
- * so we walk each table with a mask length greater than and equal to
- * the required root and check it is covered by the root.
- */
- for (i = root->fp_len;
- i < ARRAY_LEN (fib->fib_entry_by_dst_address);
- i++)
- {
- uword * hash = fib->fib_entry_by_dst_address[i];
-
- if (NULL != hash)
- {
- ip4_address_t key;
- hash_pair_t * p;
-
- hash_foreach_pair (p, hash,
- ({
- key.as_u32 = p->key;
- if (ip4_destination_matches_route(&ip4_main,
- &key,
- &root->fp_addr.ip4,
- root->fp_len))
- {
- const fib_prefix_t *sub_tree;
- int skip = 0;
-
- /*
- * exclude sub-trees the walk does not want to explore
- */
- vec_foreach(sub_tree, sub_trees)
- {
- if (ip4_destination_matches_route(&ip4_main,
- &key,
- &sub_tree->fp_addr.ip4,
- sub_tree->fp_len))
- {
- skip = 1;
- break;
- }
- }
-
- if (!skip)
- {
- switch (fn(p->value[0], ctx))
- {
- case FIB_TABLE_WALK_CONTINUE:
- break;
- case FIB_TABLE_WALK_SUB_TREE_STOP: {
- fib_prefix_t pfx = {
- .fp_proto = FIB_PROTOCOL_IP4,
- .fp_len = i,
- .fp_addr.ip4 = key,
- };
- vec_add1(sub_trees, pfx);
- break;
- }
- case FIB_TABLE_WALK_STOP:
- goto done;
- }
- }
- }
- }));
- }
- }
-done:
- vec_free(sub_trees);
- return;
-}
/**
* Walk show context
@@ -573,12 +334,11 @@ ip4_show_fib (vlib_main_t * vm,
vlib_cli_command_t * cmd)
{
ip4_main_t * im4 = &ip4_main;
- fib_table_t * fib_table;
u64 total_mtrie_memory, total_hash_memory;
int verbose, matching, mtrie, memory;
ip4_address_t matching_address;
- u32 matching_mask = 32;
- int i, table_id = -1, fib_index = ~0;
+ u32 fib_index, matching_mask = 32;
+ int i, table_id = -1, user_fib_index = ~0;
int detail = 0;
verbose = 1;
@@ -610,21 +370,22 @@ ip4_show_fib (vlib_main_t * vm,
else if (unformat (input, "table %d", &table_id))
;
- else if (unformat (input, "index %d", &fib_index))
+ else if (unformat (input, "index %d", &user_fib_index))
;
else
break;
}
- pool_foreach (fib_table, im4->fibs)
+ pool_foreach_index (fib_index, im4->fibs)
{
- ip4_fib_t *fib = pool_elt_at_index(im4->v4_fibs, fib_table->ft_index);
+ fib_table_t *fib_table = pool_elt_at_index(im4->fibs, fib_index);
+ ip4_fib_t *fib = pool_elt_at_index(ip4_fibs, fib_table->ft_index);
fib_source_t source;
u8 *s = NULL;
- if (table_id >= 0 && table_id != (int)fib->table_id)
+ if (table_id >= 0 && table_id != (int)fib->hash.table_id)
continue;
- if (fib_index != ~0 && fib_index != (int)fib->index)
+ if (user_fib_index != ~0 && user_fib_index != fib_index)
continue;
if (memory)
@@ -632,12 +393,12 @@ ip4_show_fib (vlib_main_t * vm,
uword mtrie_size, hash_size;
- mtrie_size = ip4_fib_mtrie_memory_usage(&fib->mtrie);
+ mtrie_size = ip4_mtrie_memory_usage(&fib->mtrie);
hash_size = 0;
- for (i = 0; i < ARRAY_LEN (fib->fib_entry_by_dst_address); i++)
+ for (i = 0; i < ARRAY_LEN (fib->hash.fib_entry_by_dst_address); i++)
{
- uword * hash = fib->fib_entry_by_dst_address[i];
+ uword * hash = fib->hash.fib_entry_by_dst_address[i];
if (NULL != hash)
{
hash_size += hash_bytes(hash);
@@ -646,7 +407,7 @@ ip4_show_fib (vlib_main_t * vm,
if (verbose)
vlib_cli_output (vm, "%U mtrie:%d hash:%d",
- format_fib_table_name, fib->index,
+ format_fib_table_name, fib_index,
FIB_PROTOCOL_IP4,
mtrie_size,
hash_size);
@@ -656,9 +417,9 @@ ip4_show_fib (vlib_main_t * vm,
}
s = format(s, "%U, fib_index:%d, flow hash:[%U] epoch:%d flags:%U locks:[",
- format_fib_table_name, fib->index,
+ format_fib_table_name, fib_index,
FIB_PROTOCOL_IP4,
- fib->index,
+ fib_index,
format_ip_flow_hash_config,
fib_table->ft_flow_hash_config,
fib_table->ft_epoch,
@@ -679,15 +440,15 @@ ip4_show_fib (vlib_main_t * vm,
/* Show summary? */
if (mtrie)
{
- vlib_cli_output (vm, "%U", format_ip4_fib_mtrie, &fib->mtrie, verbose);
+ vlib_cli_output (vm, "%U", format_ip4_mtrie, &fib->mtrie, verbose);
continue;
}
if (! verbose)
{
vlib_cli_output (vm, "%=20s%=16s", "Prefix length", "Count");
- for (i = 0; i < ARRAY_LEN (fib->fib_entry_by_dst_address); i++)
+ for (i = 0; i < ARRAY_LEN (fib->hash.fib_entry_by_dst_address); i++)
{
- uword * hash = fib->fib_entry_by_dst_address[i];
+ uword * hash = fib->hash.fib_entry_by_dst_address[i];
uword n_elts = hash_elts (hash);
if (n_elts > 0)
vlib_cli_output (vm, "%20d%16d", i, n_elts);
@@ -721,7 +482,7 @@ ip4_show_fib (vlib_main_t * vm,
* entries for each table.
*
* @note This command will run for a long time when the FIB tables are
- * comprised of millions of entries. For those senarios, consider displaying
+ * comprised of millions of entries. For those scenarios, consider displaying
* a single table or summary mode.
*
* @cliexpar
@@ -860,10 +621,29 @@ ip4_show_fib (vlib_main_t * vm,
* 32 4
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip4_show_fib_command, static) = {
.path = "show ip fib",
.short_help = "show ip fib [summary] [table <table-id>] [index <fib-id>] [<ip4-addr>[/<mask>]] [mtrie] [detail]",
.function = ip4_show_fib,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+ip_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ char *default_name = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "default-table-name %s", &default_name))
+ ;
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+
+ fib_table_default_names[FIB_PROTOCOL_IP4] = default_name;
+
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (ip_config, "ip");
diff --git a/src/vnet/fib/ip4_fib.h b/src/vnet/fib/ip4_fib.h
index 7d17baf2545..65fc7775cc4 100644
--- a/src/vnet/fib/ip4_fib.h
+++ b/src/vnet/fib/ip4_fib.h
@@ -34,77 +34,52 @@
#include <vnet/ip/ip.h>
#include <vnet/fib/fib_entry.h>
#include <vnet/fib/fib_table.h>
-#include <vnet/ip/ip4_mtrie.h>
+#include <vnet/fib/ip4_fib_8.h>
+#include <vnet/fib/ip4_fib_16.h>
-typedef struct ip4_fib_t_
-{
- /** Required for pool_get_aligned */
- CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);
-
- /**
- * Mtrie for fast lookups. Hash is used to maintain overlapping prefixes.
- * First member so it's in the first cacheline.
- */
- ip4_fib_mtrie_t mtrie;
-
- /* Hash table for each prefix length mapping. */
- uword *fib_entry_by_dst_address[33];
-
- /* Table ID (hash key) for this FIB. */
- u32 table_id;
-
- /* Index into FIB vector. */
- u32 index;
-} ip4_fib_t;
-
-extern fib_node_index_t ip4_fib_table_lookup(const ip4_fib_t *fib,
- const ip4_address_t *addr,
- u32 len);
-extern fib_node_index_t ip4_fib_table_lookup_exact_match(const ip4_fib_t *fib,
- const ip4_address_t *addr,
- u32 len);
-
-extern void ip4_fib_table_entry_remove(ip4_fib_t *fib,
- const ip4_address_t *addr,
- u32 len);
-
-extern void ip4_fib_table_entry_insert(ip4_fib_t *fib,
- const ip4_address_t *addr,
- u32 len,
- fib_node_index_t fib_entry_index);
-extern void ip4_fib_table_destroy(u32 fib_index);
-
-extern void ip4_fib_table_fwding_dpo_update(ip4_fib_t *fib,
- const ip4_address_t *addr,
- u32 len,
- const dpo_id_t *dpo);
-
-extern void ip4_fib_table_fwding_dpo_remove(ip4_fib_t *fib,
- const ip4_address_t *addr,
- u32 len,
- const dpo_id_t *dpo,
- fib_node_index_t cover_index);
-extern u32 ip4_fib_table_lookup_lb (ip4_fib_t *fib,
- const ip4_address_t * dst);
+// for the VPP_IP_FIB_MTRIE_16 definition
+#include <vpp/vnet/config.h>
/**
- * @brief Walk all entries in a FIB table
- * N.B: This is NOT safe to deletes. If you need to delete walk the whole
- * table and store elements in a vector, then delete the elements
+ * the FIB module uses the 16-8-8 stride trie
*/
-extern void ip4_fib_table_walk(ip4_fib_t *fib,
- fib_table_walk_fn_t fn,
- void *ctx);
+#ifdef VPP_IP_FIB_MTRIE_16
+typedef ip4_fib_16_t ip4_fib_t;
+
+#define ip4_fibs ip4_fib_16s
+#define ip4_fib_table_lookup ip4_fib_16_table_lookup
+#define ip4_fib_table_lookup_exact_match ip4_fib_16_table_lookup_exact_match
+#define ip4_fib_table_entry_remove ip4_fib_16_table_entry_remove
+#define ip4_fib_table_entry_insert ip4_fib_16_table_entry_insert
+#define ip4_fib_table_fwding_dpo_update ip4_fib_16_table_fwding_dpo_update
+#define ip4_fib_table_fwding_dpo_remove ip4_fib_16_table_fwding_dpo_remove
+#define ip4_fib_table_lookup_lb ip4_fib_16_table_lookup_lb
+#define ip4_fib_table_walk ip4_fib_16_table_walk
+#define ip4_fib_table_sub_tree_walk ip4_fib_16_table_sub_tree_walk
+#define ip4_fib_table_init ip4_fib_16_table_init
+#define ip4_fib_table_free ip4_fib_16_table_free
+#define ip4_mtrie_memory_usage ip4_mtrie_16_memory_usage
+#define format_ip4_mtrie format_ip4_mtrie_16
+
+#else
+typedef ip4_fib_8_t ip4_fib_t;
+
+#define ip4_fibs ip4_fib_8s
+#define ip4_fib_table_lookup ip4_fib_8_table_lookup
+#define ip4_fib_table_lookup_exact_match ip4_fib_8_table_lookup_exact_match
+#define ip4_fib_table_entry_remove ip4_fib_8_table_entry_remove
+#define ip4_fib_table_entry_insert ip4_fib_8_table_entry_insert
+#define ip4_fib_table_fwding_dpo_update ip4_fib_8_table_fwding_dpo_update
+#define ip4_fib_table_fwding_dpo_remove ip4_fib_8_table_fwding_dpo_remove
+#define ip4_fib_table_lookup_lb ip4_fib_8_table_lookup_lb
+#define ip4_fib_table_walk ip4_fib_8_table_walk
+#define ip4_fib_table_sub_tree_walk ip4_fib_8_table_sub_tree_walk
+#define ip4_fib_table_init ip4_fib_8_table_init
+#define ip4_fib_table_free ip4_fib_8_table_free
+#define ip4_mtrie_memory_usage ip4_mtrie_8_memory_usage
+#define format_ip4_mtrie format_ip4_mtrie_8
-/**
- * @brief Walk all entries in a sub-tree of the FIB table
- * N.B: This is NOT safe to deletes. If you need to delete walk the whole
- * table and store elements in a vector, then delete the elements
- */
-extern void ip4_fib_table_sub_tree_walk(ip4_fib_t *fib,
- const fib_prefix_t *root,
- fib_table_walk_fn_t fn,
- void *ctx);
+#endif
/**
* @brief Get the FIB at the given index
@@ -112,7 +87,7 @@ extern void ip4_fib_table_sub_tree_walk(ip4_fib_t *fib,
static inline ip4_fib_t *
ip4_fib_get (u32 index)
{
- return (pool_elt_at_index(ip4_main.v4_fibs, index));
+ return (pool_elt_at_index(ip4_fibs, index));
}
always_inline u32
@@ -138,6 +113,7 @@ ip4_fib_lookup (ip4_main_t * im, u32 sw_if_index, ip4_address_t * dst)
extern u32 ip4_fib_table_find_or_create_and_lock(u32 table_id,
fib_source_t src);
extern u32 ip4_fib_table_create_and_lock(fib_source_t src);
+extern void ip4_fib_table_destroy(u32 fib_index);
extern u8 *format_ip4_fib_table_memory(u8 * s, va_list * args);
@@ -156,20 +132,108 @@ u32 ip4_fib_index_from_table_id (u32 table_id)
extern u32 ip4_fib_table_get_index_for_sw_if_index(u32 sw_if_index);
+#ifdef VPP_IP_FIB_MTRIE_16
+always_inline index_t
+ip4_fib_forwarding_lookup (u32 fib_index,
+ const ip4_address_t * addr)
+{
+ ip4_mtrie_leaf_t leaf;
+ ip4_mtrie_16_t * mtrie;
+
+ mtrie = &ip4_fib_get(fib_index)->mtrie;
+
+ leaf = ip4_mtrie_16_lookup_step_one (mtrie, addr);
+ leaf = ip4_mtrie_16_lookup_step (leaf, addr, 2);
+ leaf = ip4_mtrie_16_lookup_step (leaf, addr, 3);
+
+ return (ip4_mtrie_leaf_get_adj_index(leaf));
+}
+
+static_always_inline void
+ip4_fib_forwarding_lookup_x2 (u32 fib_index0,
+ u32 fib_index1,
+ const ip4_address_t * addr0,
+ const ip4_address_t * addr1,
+ index_t *lb0,
+ index_t *lb1)
+{
+ ip4_mtrie_leaf_t leaf[2];
+ ip4_mtrie_16_t * mtrie[2];
+
+ mtrie[0] = &ip4_fib_get(fib_index0)->mtrie;
+ mtrie[1] = &ip4_fib_get(fib_index1)->mtrie;
+
+ leaf[0] = ip4_mtrie_16_lookup_step_one (mtrie[0], addr0);
+ leaf[1] = ip4_mtrie_16_lookup_step_one (mtrie[1], addr1);
+ leaf[0] = ip4_mtrie_16_lookup_step (leaf[0], addr0, 2);
+ leaf[1] = ip4_mtrie_16_lookup_step (leaf[1], addr1, 2);
+ leaf[0] = ip4_mtrie_16_lookup_step (leaf[0], addr0, 3);
+ leaf[1] = ip4_mtrie_16_lookup_step (leaf[1], addr1, 3);
+
+ *lb0 = ip4_mtrie_leaf_get_adj_index(leaf[0]);
+ *lb1 = ip4_mtrie_leaf_get_adj_index(leaf[1]);
+}
+
+static_always_inline void
+ip4_fib_forwarding_lookup_x4 (u32 fib_index0,
+ u32 fib_index1,
+ u32 fib_index2,
+ u32 fib_index3,
+ const ip4_address_t * addr0,
+ const ip4_address_t * addr1,
+ const ip4_address_t * addr2,
+ const ip4_address_t * addr3,
+ index_t *lb0,
+ index_t *lb1,
+ index_t *lb2,
+ index_t *lb3)
+{
+ ip4_mtrie_leaf_t leaf[4];
+ ip4_mtrie_16_t * mtrie[4];
+
+ mtrie[0] = &ip4_fib_get(fib_index0)->mtrie;
+ mtrie[1] = &ip4_fib_get(fib_index1)->mtrie;
+ mtrie[2] = &ip4_fib_get(fib_index2)->mtrie;
+ mtrie[3] = &ip4_fib_get(fib_index3)->mtrie;
+
+ leaf[0] = ip4_mtrie_16_lookup_step_one (mtrie[0], addr0);
+ leaf[1] = ip4_mtrie_16_lookup_step_one (mtrie[1], addr1);
+ leaf[2] = ip4_mtrie_16_lookup_step_one (mtrie[2], addr2);
+ leaf[3] = ip4_mtrie_16_lookup_step_one (mtrie[3], addr3);
+
+ leaf[0] = ip4_mtrie_16_lookup_step (leaf[0], addr0, 2);
+ leaf[1] = ip4_mtrie_16_lookup_step (leaf[1], addr1, 2);
+ leaf[2] = ip4_mtrie_16_lookup_step (leaf[2], addr2, 2);
+ leaf[3] = ip4_mtrie_16_lookup_step (leaf[3], addr3, 2);
+
+ leaf[0] = ip4_mtrie_16_lookup_step (leaf[0], addr0, 3);
+ leaf[1] = ip4_mtrie_16_lookup_step (leaf[1], addr1, 3);
+ leaf[2] = ip4_mtrie_16_lookup_step (leaf[2], addr2, 3);
+ leaf[3] = ip4_mtrie_16_lookup_step (leaf[3], addr3, 3);
+
+ *lb0 = ip4_mtrie_leaf_get_adj_index(leaf[0]);
+ *lb1 = ip4_mtrie_leaf_get_adj_index(leaf[1]);
+ *lb2 = ip4_mtrie_leaf_get_adj_index(leaf[2]);
+ *lb3 = ip4_mtrie_leaf_get_adj_index(leaf[3]);
+}
+
+#else
+
always_inline index_t
ip4_fib_forwarding_lookup (u32 fib_index,
const ip4_address_t * addr)
{
- ip4_fib_mtrie_leaf_t leaf;
- ip4_fib_mtrie_t * mtrie;
+ ip4_mtrie_leaf_t leaf;
+ ip4_mtrie_8_t * mtrie;
mtrie = &ip4_fib_get(fib_index)->mtrie;
- leaf = ip4_fib_mtrie_lookup_step_one (mtrie, addr);
- leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 2);
- leaf = ip4_fib_mtrie_lookup_step (mtrie, leaf, addr, 3);
+ leaf = ip4_mtrie_8_lookup_step_one (mtrie, addr);
+ leaf = ip4_mtrie_8_lookup_step (leaf, addr, 1);
+ leaf = ip4_mtrie_8_lookup_step (leaf, addr, 2);
+ leaf = ip4_mtrie_8_lookup_step (leaf, addr, 3);
- return (ip4_fib_mtrie_leaf_get_adj_index(leaf));
+ return (ip4_mtrie_leaf_get_adj_index(leaf));
}
static_always_inline void
@@ -180,22 +244,73 @@ ip4_fib_forwarding_lookup_x2 (u32 fib_index0,
index_t *lb0,
index_t *lb1)
{
- ip4_fib_mtrie_leaf_t leaf[2];
- ip4_fib_mtrie_t * mtrie[2];
+ ip4_mtrie_leaf_t leaf[2];
+ ip4_mtrie_8_t * mtrie[2];
mtrie[0] = &ip4_fib_get(fib_index0)->mtrie;
mtrie[1] = &ip4_fib_get(fib_index1)->mtrie;
- leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], addr0);
- leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], addr1);
- leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0], addr0, 2);
- leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1], addr1, 2);
- leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0], addr0, 3);
- leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1], addr1, 3);
+ leaf[0] = ip4_mtrie_8_lookup_step_one (mtrie[0], addr0);
+ leaf[1] = ip4_mtrie_8_lookup_step_one (mtrie[1], addr1);
+ leaf[0] = ip4_mtrie_8_lookup_step (leaf[0], addr0, 1);
+ leaf[1] = ip4_mtrie_8_lookup_step (leaf[1], addr1, 1);
+ leaf[0] = ip4_mtrie_8_lookup_step (leaf[0], addr0, 2);
+ leaf[1] = ip4_mtrie_8_lookup_step (leaf[1], addr1, 2);
+ leaf[0] = ip4_mtrie_8_lookup_step (leaf[0], addr0, 3);
+ leaf[1] = ip4_mtrie_8_lookup_step (leaf[1], addr1, 3);
+
+ *lb0 = ip4_mtrie_leaf_get_adj_index(leaf[0]);
+ *lb1 = ip4_mtrie_leaf_get_adj_index(leaf[1]);
+}
+
+static_always_inline void
+ip4_fib_forwarding_lookup_x4 (u32 fib_index0,
+ u32 fib_index1,
+ u32 fib_index2,
+ u32 fib_index3,
+ const ip4_address_t * addr0,
+ const ip4_address_t * addr1,
+ const ip4_address_t * addr2,
+ const ip4_address_t * addr3,
+ index_t *lb0,
+ index_t *lb1,
+ index_t *lb2,
+ index_t *lb3)
+{
+ ip4_mtrie_leaf_t leaf[4];
+ ip4_mtrie_8_t * mtrie[4];
- *lb0 = ip4_fib_mtrie_leaf_get_adj_index(leaf[0]);
- *lb1 = ip4_fib_mtrie_leaf_get_adj_index(leaf[1]);
+ mtrie[0] = &ip4_fib_get(fib_index0)->mtrie;
+ mtrie[1] = &ip4_fib_get(fib_index1)->mtrie;
+ mtrie[2] = &ip4_fib_get(fib_index2)->mtrie;
+ mtrie[3] = &ip4_fib_get(fib_index3)->mtrie;
+
+ leaf[0] = ip4_mtrie_8_lookup_step_one (mtrie[0], addr0);
+ leaf[1] = ip4_mtrie_8_lookup_step_one (mtrie[1], addr1);
+ leaf[2] = ip4_mtrie_8_lookup_step_one (mtrie[2], addr2);
+ leaf[3] = ip4_mtrie_8_lookup_step_one (mtrie[3], addr3);
+
+ leaf[0] = ip4_mtrie_8_lookup_step (leaf[0], addr0, 1);
+ leaf[1] = ip4_mtrie_8_lookup_step (leaf[1], addr1, 1);
+ leaf[2] = ip4_mtrie_8_lookup_step (leaf[2], addr2, 1);
+ leaf[3] = ip4_mtrie_8_lookup_step (leaf[3], addr3, 1);
+
+ leaf[0] = ip4_mtrie_8_lookup_step (leaf[0], addr0, 2);
+ leaf[1] = ip4_mtrie_8_lookup_step (leaf[1], addr1, 2);
+ leaf[2] = ip4_mtrie_8_lookup_step (leaf[2], addr2, 2);
+ leaf[3] = ip4_mtrie_8_lookup_step (leaf[3], addr3, 2);
+
+ leaf[0] = ip4_mtrie_8_lookup_step (leaf[0], addr0, 3);
+ leaf[1] = ip4_mtrie_8_lookup_step (leaf[1], addr1, 3);
+ leaf[2] = ip4_mtrie_8_lookup_step (leaf[2], addr2, 3);
+ leaf[3] = ip4_mtrie_8_lookup_step (leaf[3], addr3, 3);
+
+ *lb0 = ip4_mtrie_leaf_get_adj_index(leaf[0]);
+ *lb1 = ip4_mtrie_leaf_get_adj_index(leaf[1]);
+ *lb2 = ip4_mtrie_leaf_get_adj_index(leaf[2]);
+ *lb3 = ip4_mtrie_leaf_get_adj_index(leaf[3]);
}
#endif
+#endif
diff --git a/src/vnet/fib/ip4_fib_16.c b/src/vnet/fib/ip4_fib_16.c
new file mode 100644
index 00000000000..7699e8926f1
--- /dev/null
+++ b/src/vnet/fib/ip4_fib_16.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/ip4_fib.h>
+
+ip4_fib_16_t *ip4_fib_16s;
+
+void
+ip4_fib_16_table_init (ip4_fib_16_t *fib)
+{
+ ip4_mtrie_16_init(&fib->mtrie);
+}
+
+void
+ip4_fib_16_table_free (ip4_fib_16_t *fib)
+{
+ ip4_mtrie_16_free(&fib->mtrie);
+}
+
+/*
+ * ip4_fib_16_table_lookup_exact_match
+ *
+ * Exact match prefix lookup
+ */
+fib_node_index_t
+ip4_fib_16_table_lookup_exact_match (const ip4_fib_16_t *fib,
+ const ip4_address_t *addr,
+ u32 len)
+{
+ return (ip4_fib_hash_table_lookup_exact_match(&fib->hash, addr, len));
+}
+
+/*
+ * ip4_fib_16_table_lookup_adj
+ *
+ * Longest prefix match
+ */
+index_t
+ip4_fib_16_table_lookup_lb (ip4_fib_16_t *fib,
+ const ip4_address_t *addr)
+{
+ return (ip4_fib_hash_table_lookup_lb(&fib->hash, addr));
+}
+
+/*
+ * ip4_fib_16_table_lookup
+ *
+ * Longest prefix match
+ */
+fib_node_index_t
+ip4_fib_16_table_lookup (const ip4_fib_16_t *fib,
+ const ip4_address_t *addr,
+ u32 len)
+{
+ return (ip4_fib_hash_table_lookup(&fib->hash, addr, len));
+}
+
+void
+ip4_fib_16_table_entry_insert (ip4_fib_16_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ fib_node_index_t fib_entry_index)
+{
+ return (ip4_fib_hash_table_entry_insert(&fib->hash, addr, len, fib_entry_index));
+}
+
+void
+ip4_fib_16_table_entry_remove (ip4_fib_16_t *fib,
+ const ip4_address_t *addr,
+ u32 len)
+{
+ return (ip4_fib_hash_table_entry_remove(&fib->hash, addr, len));
+}
+
+void
+ip4_fib_16_table_fwding_dpo_update (ip4_fib_16_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo)
+{
+ ip4_mtrie_16_route_add(&fib->mtrie, addr, len, dpo->dpoi_index);
+}
+
+void
+ip4_fib_16_table_fwding_dpo_remove (ip4_fib_16_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo,
+ u32 cover_index)
+{
+ const fib_prefix_t *cover_prefix;
+ const dpo_id_t *cover_dpo;
+
+ /*
+ * We need to pass the MTRIE the LB index and address length of the
+ * covering prefix, so it can fill the plys with the correct replacement
+ * for the entry being removed
+ */
+ cover_prefix = fib_entry_get_prefix(cover_index);
+ cover_dpo = fib_entry_contribute_ip_forwarding(cover_index);
+
+ ip4_mtrie_16_route_del(&fib->mtrie,
+ addr, len, dpo->dpoi_index,
+ cover_prefix->fp_len,
+ cover_dpo->dpoi_index);
+}
+
+void
+ip4_fib_16_table_walk (ip4_fib_16_t *fib,
+ fib_table_walk_fn_t fn,
+ void *ctx)
+{
+ ip4_fib_hash_table_walk(&fib->hash, fn, ctx);
+}
+
+void
+ip4_fib_16_table_sub_tree_walk (ip4_fib_16_t *fib,
+ const fib_prefix_t *root,
+ fib_table_walk_fn_t fn,
+ void *ctx)
+{
+ ip4_fib_hash_table_sub_tree_walk(&fib->hash, root, fn, ctx);
+}
diff --git a/src/vnet/fib/ip4_fib_16.h b/src/vnet/fib/ip4_fib_16.h
new file mode 100644
index 00000000000..b82ad57a04a
--- /dev/null
+++ b/src/vnet/fib/ip4_fib_16.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief The IPv4 FIB
+ *
+ * FIBs are composed of two prefix data-bases (akak tables). The non-forwarding
+ * table contains all the routes that the control plane has programmed, the
+ * forwarding table contains the sub-set of those routes that can be used to
+ * forward packets.
+ * In the IPv4 FIB the non-forwarding table is an array of hash tables indexed
+ * by mask length, the forwarding table is an mtrie
+ *
+ * This IPv4 FIB is used by the protocol independent FIB. So directly using
+ * this APIs in client code is not encouraged. However, this IPv4 FIB can be
+ * used if all the client wants is an IPv4 prefix data-base
+ */
+
+#ifndef __IP4_FIB_16_H__
+#define __IP4_FIB_16_H__
+
+#include <vnet/fib/ip4_fib_hash.h>
+#include <vnet/ip/ip4_mtrie.h>
+
+typedef struct ip4_fib_16_t_
+{
+ /** Required for pool_get_aligned */
+ CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);
+
+ /**
+ * Mtrie for fast lookups. Hash is used to maintain overlapping prefixes.
+ * First member so it's in the first cacheline.
+ */
+ ip4_mtrie_16_t mtrie;
+
+ /**
+ * The hash table DB
+ */
+ ip4_fib_hash_t hash;
+} ip4_fib_16_t;
+
+extern ip4_fib_16_t *ip4_fib_16s;
+
+extern fib_node_index_t ip4_fib_16_table_lookup(const ip4_fib_16_t *fib,
+ const ip4_address_t *addr,
+ u32 len);
+extern fib_node_index_t ip4_fib_16_table_lookup_exact_match(const ip4_fib_16_t *fib,
+ const ip4_address_t *addr,
+ u32 len);
+
+extern void ip4_fib_16_table_entry_remove(ip4_fib_16_t *fib,
+ const ip4_address_t *addr,
+ u32 len);
+
+extern void ip4_fib_16_table_entry_insert(ip4_fib_16_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ fib_node_index_t fib_entry_index);
+extern void ip4_fib_16_table_free(ip4_fib_16_t *fib);
+extern void ip4_fib_16_table_init(ip4_fib_16_t *fib);
+
+extern void ip4_fib_16_table_fwding_dpo_update(ip4_fib_16_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo);
+
+extern void ip4_fib_16_table_fwding_dpo_remove(ip4_fib_16_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo,
+ fib_node_index_t cover_index);
+extern u32 ip4_fib_16_table_lookup_lb (ip4_fib_16_t *fib,
+ const ip4_address_t * dst);
+
+/**
+ * @brief Walk all entries in a FIB table
+ * N.B: This is NOT safe to deletes. If you need to delete walk the whole
+ * table and store elements in a vector, then delete the elements
+ */
+extern void ip4_fib_16_table_walk(ip4_fib_16_t *fib,
+ fib_table_walk_fn_t fn,
+ void *ctx);
+
+/**
+ * @brief Walk all entries in a sub-tree of the FIB table
+ * N.B: This is NOT safe to deletes. If you need to delete walk the whole
+ * table and store elements in a vector, then delete the elements
+ */
+extern void ip4_fib_16_table_sub_tree_walk(ip4_fib_16_t *fib,
+ const fib_prefix_t *root,
+ fib_table_walk_fn_t fn,
+ void *ctx);
+
+#endif
+
diff --git a/src/vnet/fib/ip4_fib_8.c b/src/vnet/fib/ip4_fib_8.c
new file mode 100644
index 00000000000..587e28a8c9c
--- /dev/null
+++ b/src/vnet/fib/ip4_fib_8.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/ip4_fib.h>
+
+ip4_fib_8_t *ip4_fib_8s;
+
+void
+ip4_fib_8_table_init (ip4_fib_8_t *fib)
+{
+ ip4_mtrie_8_init(&fib->mtrie);
+}
+
+void
+ip4_fib_8_table_free (ip4_fib_8_t *fib)
+{
+ ip4_mtrie_8_free(&fib->mtrie);
+}
+
+/*
+ * ip4_fib_8_table_lookup_exact_match
+ *
+ * Exact match prefix lookup
+ */
+fib_node_index_t
+ip4_fib_8_table_lookup_exact_match (const ip4_fib_8_t *fib,
+ const ip4_address_t *addr,
+ u32 len)
+{
+ return (ip4_fib_hash_table_lookup_exact_match(&fib->hash, addr, len));
+}
+
+/*
+ * ip4_fib_8_table_lookup_adj
+ *
+ * Longest prefix match
+ */
+index_t
+ip4_fib_8_table_lookup_lb (ip4_fib_8_t *fib,
+ const ip4_address_t *addr)
+{
+ return (ip4_fib_hash_table_lookup_lb(&fib->hash, addr));
+}
+
+/*
+ * ip4_fib_8_table_lookup
+ *
+ * Longest prefix match
+ */
+fib_node_index_t
+ip4_fib_8_table_lookup (const ip4_fib_8_t *fib,
+ const ip4_address_t *addr,
+ u32 len)
+{
+ return (ip4_fib_hash_table_lookup(&fib->hash, addr, len));
+}
+
+void
+ip4_fib_8_table_entry_insert (ip4_fib_8_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ fib_node_index_t fib_entry_index)
+{
+ return (ip4_fib_hash_table_entry_insert(&fib->hash, addr, len, fib_entry_index));
+}
+
+void
+ip4_fib_8_table_entry_remove (ip4_fib_8_t *fib,
+ const ip4_address_t *addr,
+ u32 len)
+{
+ return (ip4_fib_hash_table_entry_remove(&fib->hash, addr, len));
+}
+
+void
+ip4_fib_8_table_fwding_dpo_update (ip4_fib_8_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo)
+{
+ ip4_mtrie_8_route_add(&fib->mtrie, addr, len, dpo->dpoi_index);
+}
+
+void
+ip4_fib_8_table_fwding_dpo_remove (ip4_fib_8_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo,
+ u32 cover_index)
+{
+ const fib_prefix_t *cover_prefix;
+ const dpo_id_t *cover_dpo;
+
+ /*
+ * We need to pass the MTRIE the LB index and address length of the
+ * covering prefix, so it can fill the plys with the correct replacement
+ * for the entry being removed
+ */
+ cover_prefix = fib_entry_get_prefix(cover_index);
+ cover_dpo = fib_entry_contribute_ip_forwarding(cover_index);
+
+ ip4_mtrie_8_route_del(&fib->mtrie,
+ addr, len, dpo->dpoi_index,
+ cover_prefix->fp_len,
+ cover_dpo->dpoi_index);
+}
+
+void
+ip4_fib_8_table_walk (ip4_fib_8_t *fib,
+ fib_table_walk_fn_t fn,
+ void *ctx)
+{
+ ip4_fib_hash_table_walk(&fib->hash, fn, ctx);
+}
+
+void
+ip4_fib_8_table_sub_tree_walk (ip4_fib_8_t *fib,
+ const fib_prefix_t *root,
+ fib_table_walk_fn_t fn,
+ void *ctx)
+{
+ ip4_fib_hash_table_sub_tree_walk(&fib->hash, root, fn, ctx);
+}
diff --git a/src/vnet/fib/ip4_fib_8.h b/src/vnet/fib/ip4_fib_8.h
new file mode 100644
index 00000000000..0964f3ab133
--- /dev/null
+++ b/src/vnet/fib/ip4_fib_8.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief The IPv4 FIB
+ *
+ * FIBs are composed of two prefix data-bases (akak tables). The non-forwarding
+ * table contains all the routes that the control plane has programmed, the
+ * forwarding table contains the sub-set of those routes that can be used to
+ * forward packets.
+ * In the IPv4 FIB the non-forwarding table is an array of hash tables indexed
+ * by mask length, the forwarding table is an mtrie
+ *
+ * This IPv4 FIB is used by the protocol independent FIB. So directly using
+ * this APIs in client code is not encouraged. However, this IPv4 FIB can be
+ * used if all the client wants is an IPv4 prefix data-base
+ */
+
+#ifndef __IP4_FIB_8_H__
+#define __IP4_FIB_8_H__
+
+#include <vnet/fib/ip4_fib_hash.h>
+#include <vnet/ip/ip4_mtrie.h>
+
+typedef struct ip4_fib_8_t_
+{
+ /** Required for pool_get_aligned */
+ CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);
+
+ /**
+ * Mtrie for fast lookups. Hash is used to maintain overlapping prefixes.
+ * First member so it's in the first cacheline.
+ */
+ ip4_mtrie_8_t mtrie;
+
+ /**
+ * The hash table DB
+ */
+ ip4_fib_hash_t hash;
+} ip4_fib_8_t;
+
+extern ip4_fib_8_t *ip4_fib_8s;
+
+extern fib_node_index_t ip4_fib_8_table_lookup(const ip4_fib_8_t *fib,
+ const ip4_address_t *addr,
+ u32 len);
+extern fib_node_index_t ip4_fib_8_table_lookup_exact_match(const ip4_fib_8_t *fib,
+ const ip4_address_t *addr,
+ u32 len);
+
+extern void ip4_fib_8_table_entry_remove(ip4_fib_8_t *fib,
+ const ip4_address_t *addr,
+ u32 len);
+
+extern void ip4_fib_8_table_entry_insert(ip4_fib_8_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ fib_node_index_t fib_entry_index);
+extern void ip4_fib_8_table_free(ip4_fib_8_t *fib);
+extern void ip4_fib_8_table_init(ip4_fib_8_t *fib);
+
+extern void ip4_fib_8_table_fwding_dpo_update(ip4_fib_8_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo);
+
+extern void ip4_fib_8_table_fwding_dpo_remove(ip4_fib_8_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ const dpo_id_t *dpo,
+ fib_node_index_t cover_index);
+extern u32 ip4_fib_8_table_lookup_lb (ip4_fib_8_t *fib,
+ const ip4_address_t * dst);
+
+/**
+ * @brief Walk all entries in a FIB table
+ * N.B: This is NOT safe to deletes. If you need to delete walk the whole
+ * table and store elements in a vector, then delete the elements
+ */
+extern void ip4_fib_8_table_walk(ip4_fib_8_t *fib,
+ fib_table_walk_fn_t fn,
+ void *ctx);
+
+/**
+ * @brief Walk all entries in a sub-tree of the FIB table
+ * N.B: This is NOT safe to deletes. If you need to delete walk the whole
+ * table and store elements in a vector, then delete the elements
+ */
+extern void ip4_fib_8_table_sub_tree_walk(ip4_fib_8_t *fib,
+ const fib_prefix_t *root,
+ fib_table_walk_fn_t fn,
+ void *ctx);
+
+#endif
+
diff --git a/src/vnet/fib/ip4_fib_hash.c b/src/vnet/fib/ip4_fib_hash.c
new file mode 100644
index 00000000000..f42cf28f53e
--- /dev/null
+++ b/src/vnet/fib/ip4_fib_hash.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/fib/fib_table.h>
+#include <vnet/fib/fib_entry.h>
+#include <vnet/fib/ip4_fib.h>
+
+/*
+ * ip4_fib_hash_table_lookup_exact_match
+ *
+ * Exact match prefix lookup
+ */
+fib_node_index_t
+ip4_fib_hash_table_lookup_exact_match (const ip4_fib_hash_t *fib,
+ const ip4_address_t *addr,
+ u32 len)
+{
+ uword * hash, * result;
+ u32 key;
+
+ hash = fib->fib_entry_by_dst_address[len];
+ key = (addr->data_u32 & ip4_main.fib_masks[len]);
+
+ result = hash_get(hash, key);
+
+ if (NULL != result) {
+ return (result[0]);
+ }
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+/*
+ * ip4_fib_hash_table_lookup_adj
+ *
+ * Longest prefix match
+ */
+index_t
+ip4_fib_hash_table_lookup_lb (const ip4_fib_hash_t *fib,
+ const ip4_address_t *addr)
+{
+ fib_node_index_t fei;
+
+ fei = ip4_fib_hash_table_lookup(fib, addr, 32);
+
+ if (FIB_NODE_INDEX_INVALID != fei)
+ {
+ const dpo_id_t *dpo;
+
+ dpo = fib_entry_contribute_ip_forwarding(fei);
+
+ return (dpo->dpoi_index);
+ }
+ return (INDEX_INVALID);
+}
+
+/*
+ * ip4_fib_hash_table_lookup
+ *
+ * Longest prefix match
+ */
+fib_node_index_t
+ip4_fib_hash_table_lookup (const ip4_fib_hash_t *fib,
+ const ip4_address_t *addr,
+ u32 len)
+{
+ uword * hash, * result;
+ i32 mask_len;
+ u32 key;
+
+ for (mask_len = len; mask_len >= 0; mask_len--)
+ {
+ hash = fib->fib_entry_by_dst_address[mask_len];
+ key = (addr->data_u32 & ip4_main.fib_masks[mask_len]);
+
+ result = hash_get (hash, key);
+
+ if (NULL != result) {
+ return (result[0]);
+ }
+ }
+ return (FIB_NODE_INDEX_INVALID);
+}
+
+void
+ip4_fib_hash_table_entry_insert (ip4_fib_hash_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ fib_node_index_t fib_entry_index)
+{
+ uword * hash, * result;
+ u32 key;
+
+ key = (addr->data_u32 & ip4_main.fib_masks[len]);
+ hash = fib->fib_entry_by_dst_address[len];
+ result = hash_get (hash, key);
+
+ if (NULL == result) {
+ /*
+ * adding a new entry
+ */
+
+ if (NULL == hash) {
+ hash = hash_create (32 /* elts */, sizeof (uword));
+ hash_set_flags (hash, HASH_FLAG_NO_AUTO_SHRINK);
+
+ }
+ hash = hash_set(hash, key, fib_entry_index);
+ fib->fib_entry_by_dst_address[len] = hash;
+ }
+ else
+ {
+ ASSERT(0);
+ }
+}
+
+void
+ip4_fib_hash_table_entry_remove (ip4_fib_hash_t *fib,
+ const ip4_address_t *addr,
+ u32 len)
+{
+ uword * hash, * result;
+ u32 key;
+
+ key = (addr->data_u32 & ip4_main.fib_masks[len]);
+ hash = fib->fib_entry_by_dst_address[len];
+ result = hash_get (hash, key);
+
+ if (NULL == result)
+ {
+ /*
+ * removing a non-existent entry. i'll allow it.
+ */
+ }
+ else
+ {
+ hash_unset(hash, key);
+ }
+
+ fib->fib_entry_by_dst_address[len] = hash;
+}
+
+void
+ip4_fib_hash_table_walk (ip4_fib_hash_t *fib,
+ fib_table_walk_fn_t fn,
+ void *ctx)
+{
+ fib_prefix_t root = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ // address and length default to all 0
+ };
+
+ /*
+ * A full tree walk is the dengenerate case of a sub-tree from
+ * the very root
+ */
+ return (ip4_fib_hash_table_sub_tree_walk(fib, &root, fn, ctx));
+}
+
+void
+ip4_fib_hash_table_sub_tree_walk (ip4_fib_hash_t *fib,
+ const fib_prefix_t *root,
+ fib_table_walk_fn_t fn,
+ void *ctx)
+{
+ fib_prefix_t *sub_trees = NULL;
+ int i;
+
+ /*
+ * There is no efficient way to walk this array of hash tables.
+ * so we walk each table with a mask length greater than and equal to
+ * the required root and check it is covered by the root.
+ */
+ for (i = root->fp_len;
+ i < ARRAY_LEN (fib->fib_entry_by_dst_address);
+ i++)
+ {
+ uword * hash = fib->fib_entry_by_dst_address[i];
+
+ if (NULL != hash)
+ {
+ ip4_address_t key;
+ hash_pair_t * p;
+
+ hash_foreach_pair (p, hash,
+ ({
+ key.as_u32 = p->key;
+ if (ip4_destination_matches_route(&ip4_main,
+ &key,
+ &root->fp_addr.ip4,
+ root->fp_len))
+ {
+ const fib_prefix_t *sub_tree;
+ int skip = 0;
+
+ /*
+ * exclude sub-trees the walk does not want to explore
+ */
+ vec_foreach(sub_tree, sub_trees)
+ {
+ if (ip4_destination_matches_route(&ip4_main,
+ &key,
+ &sub_tree->fp_addr.ip4,
+ sub_tree->fp_len))
+ {
+ skip = 1;
+ break;
+ }
+ }
+
+ if (!skip)
+ {
+ switch (fn(p->value[0], ctx))
+ {
+ case FIB_TABLE_WALK_CONTINUE:
+ break;
+ case FIB_TABLE_WALK_SUB_TREE_STOP: {
+ fib_prefix_t pfx = {
+ .fp_proto = FIB_PROTOCOL_IP4,
+ .fp_len = i,
+ .fp_addr.ip4 = key,
+ };
+ vec_add1(sub_trees, pfx);
+ break;
+ }
+ case FIB_TABLE_WALK_STOP:
+ goto done;
+ }
+ }
+ }
+ }));
+ }
+ }
+done:
+ vec_free(sub_trees);
+ return;
+}
+
diff --git a/src/vnet/fib/ip4_fib_hash.h b/src/vnet/fib/ip4_fib_hash.h
new file mode 100644
index 00000000000..84b3b9ae834
--- /dev/null
+++ b/src/vnet/fib/ip4_fib_hash.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * @brief The IPv4 FIB Hash table
+ */
+
+#ifndef __IP4_FIB_HASH_H__
+#define __IP4_FIB_HASH_H__
+
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+
+typedef struct ip4_fib_hash_t_
+{
+ /* Hash table for each prefix length mapping. */
+ uword *fib_entry_by_dst_address[33];
+
+ /* Table ID (hash key) for this FIB. */
+ u32 table_id;
+} ip4_fib_hash_t;
+
+extern fib_node_index_t ip4_fib_hash_table_lookup(const ip4_fib_hash_t *fib,
+ const ip4_address_t *addr,
+ u32 len);
+extern index_t ip4_fib_hash_table_lookup_lb(const ip4_fib_hash_t *fib,
+ const ip4_address_t *addr);
+extern fib_node_index_t ip4_fib_hash_table_lookup_exact_match(const ip4_fib_hash_t *fib,
+ const ip4_address_t *addr,
+ u32 len);
+
+extern void ip4_fib_hash_table_entry_remove(ip4_fib_hash_t *fib,
+ const ip4_address_t *addr,
+ u32 len);
+
+extern void ip4_fib_hash_table_entry_insert(ip4_fib_hash_t *fib,
+ const ip4_address_t *addr,
+ u32 len,
+ fib_node_index_t fib_entry_index);
+extern void ip4_fib_hash_table_init(ip4_fib_hash_t *fib);
+extern void ip4_fib_hash_table_destroy(ip4_fib_hash_t *fib);
+
+/**
+ * @brief Walk all entries in a FIB table
+ * N.B: This is NOT safe to deletes. If you need to delete walk the whole
+ * table and store elements in a vector, then delete the elements
+ */
+extern void ip4_fib_hash_table_walk(ip4_fib_hash_t *fib,
+ fib_table_walk_fn_t fn,
+ void *ctx);
+
+/**
+ * @brief Walk all entries in a sub-tree of the FIB table
+ * N.B: This is NOT safe to deletes. If you need to delete walk the whole
+ * table and store elements in a vector, then delete the elements
+ */
+extern void ip4_fib_hash_table_sub_tree_walk(ip4_fib_hash_t *fib,
+ const fib_prefix_t *root,
+ fib_table_walk_fn_t fn,
+ void *ctx);
+
+#endif
+
diff --git a/src/vnet/fib/ip6_fib.c b/src/vnet/fib/ip6_fib.c
index 62bf16f0efc..d37b77e08a4 100644
--- a/src/vnet/fib/ip6_fib.c
+++ b/src/vnet/fib/ip6_fib.c
@@ -174,6 +174,7 @@ ip6_fib_table_destroy (u32 fib_index)
{
hash_unset (ip6_main.fib_index_by_table_id, fib_table->ft_table_id);
}
+ vec_free (fib_table->ft_locks);
vec_free(fib_table->ft_src_route_counts);
pool_put_index(ip6_main.v6_fibs, fib_table->ft_index);
pool_put(ip6_main.fibs, fib_table);
@@ -772,7 +773,7 @@ ip6_show_fib (vlib_main_t * vm,
* entries for each table.
*
* @note This command will run for a long time when the FIB tables are
- * comprised of millions of entries. For those senarios, consider displaying
+ * comprised of millions of entries. For those scenarios, consider displaying
* in summary mode.
*
* @cliexpar
@@ -861,19 +862,18 @@ ip6_show_fib (vlib_main_t * vm,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_show_fib_command, static) = {
.path = "show ip6 fib",
.short_help = "show ip6 fib [summary] [table <table-id>] [index <fib-id>] [<ip6-addr>[/<width>]] [detail]",
.function = ip6_show_fib,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_config (vlib_main_t * vm, unformat_input_t * input)
{
uword heapsize = 0;
u32 nbuckets = 0;
+ char *default_name = 0;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
@@ -882,6 +882,8 @@ ip6_config (vlib_main_t * vm, unformat_input_t * input)
else if (unformat (input, "heap-size %U",
unformat_memory_size, &heapsize))
;
+ else if (unformat (input, "default-table-name %s", &default_name))
+ ;
else
return clib_error_return (0, "unknown input '%U'",
format_unformat_error, input);
@@ -889,6 +891,7 @@ ip6_config (vlib_main_t * vm, unformat_input_t * input)
ip6_fib_table_nbuckets = nbuckets;
ip6_fib_table_size = heapsize;
+ fib_table_default_names[FIB_PROTOCOL_IP6] = default_name;
return 0;
}
diff --git a/src/vnet/fib/mpls_fib.c b/src/vnet/fib/mpls_fib.c
index 494d0106bae..767fc84c8a8 100644
--- a/src/vnet/fib/mpls_fib.c
+++ b/src/vnet/fib/mpls_fib.c
@@ -22,7 +22,7 @@
* packets. Whether the two actions are the same more often than they are
* different, or vice versa, is a function of the deployment in which the router
* is used and thus not predictable.
- * The desgin choice to make with an MPLS_FIB table is:
+ * The design choice to make with an MPLS_FIB table is:
* 1 - 20 bit key: label only.
* When the EOS and non-EOS actions differ the result is a 'EOS-choice' object.
* 2 - 21 bit key: label and EOS-bit.
@@ -33,7 +33,7 @@
* - lower memory overhead, since there are few DB entries.
* Disadvantages:
* - slower DP performance in the case the chains differ, as more objects are
- * encounterd in the switch path
+ * encountered in the switch path
*
* 21 bit key:
* Advantages:
@@ -275,6 +275,7 @@ mpls_fib_table_destroy (u32 fib_index)
}
hash_free(mf->mf_entries);
+ vec_free (fib_table->ft_locks);
vec_free(fib_table->ft_src_route_counts);
pool_put(mpls_main.mpls_fibs, mf);
pool_put(mpls_main.fibs, fib_table);
@@ -450,7 +451,7 @@ mpls_fib_show (vlib_main_t * vm,
continue;
s = format (s, "%v, fib_index:%d locks:[",
- fib_table->ft_desc, mpls_main.fibs - fib_table);
+ fib_table->ft_desc, fib_table - mpls_main.fibs);
vec_foreach_index(source, fib_table->ft_locks)
{
if (0 != fib_table->ft_locks[source])
@@ -480,3 +481,24 @@ VLIB_CLI_COMMAND (mpls_fib_show_command, static) = {
.short_help = "show mpls fib [summary] [table <n>]",
.function = mpls_fib_show,
};
+
+static clib_error_t *
+mpls_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ char *default_name = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "default-table-name %s", &default_name))
+ ;
+ else
+ return clib_error_return (0, "unknown input '%U'",
+ format_unformat_error, input);
+ }
+
+ fib_table_default_names[FIB_PROTOCOL_MPLS] = default_name;
+
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (mpls_config, "mpls");
diff --git a/src/vnet/fib/mpls_fib.h b/src/vnet/fib/mpls_fib.h
index 33eaa88ca9d..e9fee9990ac 100644
--- a/src/vnet/fib/mpls_fib.h
+++ b/src/vnet/fib/mpls_fib.h
@@ -31,7 +31,7 @@
* Type exposure is to allow the DP fast/inlined access
*/
#define MPLS_FIB_KEY_SIZE 21
-#define MPLS_FIB_DB_SIZE (1 << (MPLS_FIB_KEY_SIZE-1))
+#define MPLS_FIB_DB_SIZE (1 << MPLS_FIB_KEY_SIZE)
/**
* There are no options for controlling the MPLS flow hash,
@@ -54,7 +54,7 @@ typedef struct mpls_fib_t_
/**
* The load-balance indices keyed by 21 bit label+eos bit.
- * A flat array for maximum lookup performace.
+ * A flat array for maximum lookup performance.
*/
index_t mf_lbs[MPLS_FIB_DB_SIZE];
} mpls_fib_t;
diff --git a/src/vnet/flow/FEATURE.yaml b/src/vnet/flow/FEATURE.yaml
index a26571c35e8..8633f4febdd 100644
--- a/src/vnet/flow/FEATURE.yaml
+++ b/src/vnet/flow/FEATURE.yaml
@@ -16,13 +16,15 @@ features:
- FLOW_TYPE_IP4_VXLAN,
- FLOW_TYPE_IP6_VXLAN,
- FLOW_TYPE_IP4_GTPC,
- - FLOW_TYPE_IP4_GTPU
+ - FLOW_TYPE_IP4_GTPU,
+ - FLOW_TYPE_GENERIC
- The below flow actions can be specified for the flows:
- FLOW_ACTION_COUNT,
- FLOW_ACTION_MARK,
- FLOW_ACTION_BUFFER_ADVANCE,
- FLOW_ACTION_REDIRECT_TO_NODE,
- FLOW_ACTION_REDIRECT_TO_QUEUE,
+ - FLOW_ACTION_RSS,
- FLOW_ACTION_DROP
description: "Flow infrastructure to provide hardware offload capabilities"
state: development
diff --git a/src/vnet/flow/flow.api b/src/vnet/flow/flow.api
index 7bb21cdcd72..1e807b539d5 100644
--- a/src/vnet/flow/flow.api
+++ b/src/vnet/flow/flow.api
@@ -13,7 +13,7 @@
* limitations under the License.
*/
-option version = "0.0.2";
+option version = "1.0.3";
import "vnet/interface_types.api";
import "vnet/ip/ip_types.api";
@@ -26,12 +26,27 @@ import "vnet/flow/flow_types.api";
*/
define flow_add
{
+ option deprecated;
+
u32 client_index;
u32 context;
vl_api_flow_rule_t flow;
option vat_help = "test flow add [src-ip <ip-addr/mask>] [dst-ip <ip-addr/mask>] [src-port <port/mask>] [dst-port <port/mask>] [proto <ip-proto>]";
};
+/** \brief flow add request v2
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param flow - flow rule v2
+*/
+define flow_add_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_flow_rule_v2_t flow;
+ option vat_help = "test flow add [src-ip <ip-addr/mask>] [dst-ip <ip-addr/mask>] [src-port <port/mask>] [dst-port <port/mask>] [proto <ip-proto>] [spec <spec-string>] [mask <mask-string>]";
+};
+
/** \brief reply for adding flow
@param context - sender context, to match reply w/ request
@param retval - return code
@@ -39,6 +54,20 @@ define flow_add
*/
define flow_add_reply
{
+ option deprecated;
+
+ u32 context;
+ i32 retval;
+ u32 flow_index;
+};
+
+/** \brief reply for adding flow v2
+ @param context - sender context, to match reply w/ request
+ @param retval - return code
+ @param flow_index - flow index, can be used for flow del/enable/disable
+*/
+define flow_add_v2_reply
+{
u32 context;
i32 retval;
u32 flow_index;
diff --git a/src/vnet/flow/flow.c b/src/vnet/flow/flow.c
index 9b6a376af3e..eda15356958 100644
--- a/src/vnet/flow/flow.c
+++ b/src/vnet/flow/flow.c
@@ -74,12 +74,10 @@ vnet_flow_del (vnet_main_t * vnm, u32 flow_index)
if (f == 0)
return VNET_FLOW_ERROR_NO_SUCH_ENTRY;
- /* *INDENT-OFF* */
hash_foreach (hw_if_index, private_data, f->private_data,
({
vnet_flow_disable (vnm, flow_index, hw_if_index);
}));
- /* *INDENT-ON* */
hash_free (f->private_data);
clib_memset (f, 0, sizeof (*f));
diff --git a/src/vnet/flow/flow.h b/src/vnet/flow/flow.h
index ad61677a44b..ada822257e3 100644
--- a/src/vnet/flow/flow.h
+++ b/src/vnet/flow/flow.h
@@ -18,31 +18,43 @@
#include <vppinfra/clib.h>
#include <vppinfra/pcap.h>
+#include <vnet/vnet.h>
#include <vnet/l3_types.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
#include <vnet/ethernet/packet.h>
-#define foreach_flow_type \
- /* l2 flow*/ \
- _(ETHERNET, ethernet, "ethernet") \
- /* l3 IP flow */ \
- _(IP4, ip4, "ipv4") \
- _(IP6, ip6, "ipv6") \
- /* IP tunnel flow */ \
- _(IP4_L2TPV3OIP, ip4_l2tpv3oip, "ipv4-l2tpv3oip") \
- _(IP4_IPSEC_ESP, ip4_ipsec_esp, "ipv4-ipsec-esp") \
- _(IP4_IPSEC_AH, ip4_ipsec_ah, "ipv4-ipsec-ah") \
- /* l4 flow*/ \
- _(IP4_N_TUPLE, ip4_n_tuple, "ipv4-n-tuple") \
- _(IP6_N_TUPLE, ip6_n_tuple, "ipv6-n-tuple") \
- _(IP4_N_TUPLE_TAGGED, ip4_n_tuple_tagged, "ipv4-n-tuple-tagged") \
- _(IP6_N_TUPLE_TAGGED, ip6_n_tuple_tagged, "ipv6-n-tuple-tagged") \
- /* L4 tunnel flow*/ \
- _(IP4_VXLAN, ip4_vxlan, "ipv4-vxlan") \
- _(IP6_VXLAN, ip6_vxlan, "ipv6-vxlan") \
- _(IP4_GTPC, ip4_gtpc, "ipv4-gtpc") \
- _(IP4_GTPU, ip4_gtpu, "ipv4-gtpu")
+#define foreach_flow_type \
+ /* l2 flow*/ \
+ _ (ETHERNET, ethernet, "ethernet") \
+ /* l3 IP flow */ \
+ _ (IP4, ip4, "ipv4") \
+ _ (IP6, ip6, "ipv6") \
+ /* IP tunnel flow */ \
+ _ (IP4_L2TPV3OIP, ip4_l2tpv3oip, "ipv4-l2tpv3oip") \
+ _ (IP4_IPSEC_ESP, ip4_ipsec_esp, "ipv4-ipsec-esp") \
+ _ (IP4_IPSEC_AH, ip4_ipsec_ah, "ipv4-ipsec-ah") \
+ /* l4 flow*/ \
+ _ (IP4_N_TUPLE, ip4_n_tuple, "ipv4-n-tuple") \
+ _ (IP6_N_TUPLE, ip6_n_tuple, "ipv6-n-tuple") \
+ _ (IP4_N_TUPLE_TAGGED, ip4_n_tuple_tagged, "ipv4-n-tuple-tagged") \
+ _ (IP6_N_TUPLE_TAGGED, ip6_n_tuple_tagged, "ipv6-n-tuple-tagged") \
+ /* L4 tunnel flow*/ \
+ _ (IP4_VXLAN, ip4_vxlan, "ipv4-vxlan") \
+ _ (IP6_VXLAN, ip6_vxlan, "ipv6-vxlan") \
+ _ (IP4_GTPC, ip4_gtpc, "ipv4-gtpc") \
+ _ (IP4_GTPU, ip4_gtpu, "ipv4-gtpu") \
+ /* generic flow */ \
+ _ (GENERIC, generic, "generic") \
+ /* IP in IP */ \
+ _ (IP6_IP6, ip6_ip6, "ipv6-ipv6") \
+ _ (IP6_IP4, ip6_ip4, "ipv6-ipv4") \
+ _ (IP4_IP6, ip4_ip6, "ipv4-ipv6") \
+ _ (IP4_IP4, ip4_ip4, "ipv4-ipv4") \
+ _ (IP6_IP6_N_TUPLE, ip6_ip6_n_tuple, "ipv6-ipv6-n-tuple") \
+ _ (IP6_IP4_N_TUPLE, ip6_ip4_n_tuple, "ipv6-ipv4-n-tuple") \
+ _ (IP4_IP6_N_TUPLE, ip4_ip6_n_tuple, "ipv4-ipv6-n-tuple") \
+ _ (IP4_IP4_N_TUPLE, ip4_ip4_n_tuple, "ipv4-ipv4-n-tuple")
#define foreach_flow_entry_ethernet \
_fe(ethernet_header_t, eth_hdr)
@@ -103,6 +115,44 @@
foreach_flow_entry_ip4_n_tuple \
_fe(u32, teid)
+#define foreach_flow_entry_ip6_ip6 \
+ foreach_flow_entry_ip6 _fe (ip6_address_and_mask_t, in_src_addr) \
+ _fe (ip6_address_and_mask_t, in_dst_addr) \
+ _fe (ip_prot_and_mask_t, in_protocol)
+
+#define foreach_flow_entry_ip6_ip6_n_tuple \
+ foreach_flow_entry_ip6_ip6 _fe (ip_port_and_mask_t, in_src_port) \
+ _fe (ip_port_and_mask_t, in_dst_port)
+
+#define foreach_flow_entry_ip6_ip4 \
+ foreach_flow_entry_ip6 _fe (ip4_address_and_mask_t, in_src_addr) \
+ _fe (ip4_address_and_mask_t, in_dst_addr) \
+ _fe (ip_prot_and_mask_t, in_protocol)
+
+#define foreach_flow_entry_ip6_ip4_n_tuple \
+ foreach_flow_entry_ip6_ip4 _fe (ip_port_and_mask_t, in_src_port) \
+ _fe (ip_port_and_mask_t, in_dst_port)
+
+#define foreach_flow_entry_ip4_ip6 \
+ foreach_flow_entry_ip4 _fe (ip6_address_and_mask_t, in_src_addr) \
+ _fe (ip6_address_and_mask_t, in_dst_addr) \
+ _fe (ip_prot_and_mask_t, in_protocol)
+
+#define foreach_flow_entry_ip4_ip6_n_tuple \
+ foreach_flow_entry_ip4_ip6 _fe (ip_port_and_mask_t, in_src_port) \
+ _fe (ip_port_and_mask_t, in_dst_port)
+
+#define foreach_flow_entry_ip4_ip4 \
+ foreach_flow_entry_ip4 _fe (ip4_address_and_mask_t, in_src_addr) \
+ _fe (ip4_address_and_mask_t, in_dst_addr) \
+ _fe (ip_prot_and_mask_t, in_protocol)
+
+#define foreach_flow_entry_ip4_ip4_n_tuple \
+ foreach_flow_entry_ip4_ip4 _fe (ip_port_and_mask_t, in_src_port) \
+ _fe (ip_port_and_mask_t, in_dst_port)
+
+#define foreach_flow_entry_generic _fe (generic_pattern_t, pattern)
+
#define foreach_flow_action \
_(0, COUNT, "count") \
_(1, MARK, "mark") \
@@ -127,32 +177,41 @@ typedef enum
_( -5, NO_SUCH_INTERFACE, "no such interface") \
_( -6, INTERNAL, "internal error")
-#define foreach_flow_rss_types \
- _(0, FRAG_IPV4, "ipv4-frag") \
- _(1, IPV4_TCP, "ipv4-tcp") \
- _(2, IPV4_UDP, "ipv4-udp") \
- _(3, IPV4_SCTP, "ipv4-sctp") \
- _(4, IPV4_OTHER, "ipv4-other") \
- _(5, IPV4, "ipv4") \
- _(6, IPV6_TCP_EX, "ipv6-tcp-ex") \
- _(7, IPV6_UDP_EX, "ipv6-udp-ex") \
- _(8, FRAG_IPV6, "ipv6-frag") \
- _(9, IPV6_TCP, "ipv6-tcp") \
- _(10, IPV6_UDP, "ipv6-udp") \
- _(11, IPV6_SCTP, "ipv6-sctp") \
- _(12, IPV6_OTHER, "ipv6-other") \
- _(13, IPV6_EX, "ipv6-ex") \
- _(14, IPV6, "ipv6") \
- _(15, L2_PAYLOAD, "l2-payload") \
- _(16, PORT, "port") \
- _(17, VXLAN, "vxlan") \
- _(18, GENEVE, "geneve") \
- _(19, NVGRE, "nvgre") \
- _(20, GTPU, "gtpu") \
- _(60, L4_DST_ONLY, "l4-dst-only") \
- _(61, L4_SRC_ONLY, "l4-src-only") \
- _(62, L3_DST_ONLY, "l3-dst-only") \
- _(63, L3_SRC_ONLY, "l3-src-only")
+#define foreach_flow_rss_types \
+ _ (0, FRAG_IPV4, "ipv4-frag") \
+ _ (1, IPV4_TCP, "ipv4-tcp") \
+ _ (2, IPV4_UDP, "ipv4-udp") \
+ _ (3, IPV4_SCTP, "ipv4-sctp") \
+ _ (4, IPV4_OTHER, "ipv4-other") \
+ _ (5, IPV4, "ipv4") \
+ _ (6, IPV6_TCP_EX, "ipv6-tcp-ex") \
+ _ (7, IPV6_UDP_EX, "ipv6-udp-ex") \
+ _ (8, FRAG_IPV6, "ipv6-frag") \
+ _ (9, IPV6_TCP, "ipv6-tcp") \
+ _ (10, IPV6_UDP, "ipv6-udp") \
+ _ (11, IPV6_SCTP, "ipv6-sctp") \
+ _ (12, IPV6_OTHER, "ipv6-other") \
+ _ (13, IPV6_EX, "ipv6-ex") \
+ _ (14, IPV6, "ipv6") \
+ _ (15, L2_PAYLOAD, "l2-payload") \
+ _ (16, PORT, "port") \
+ _ (17, VXLAN, "vxlan") \
+ _ (18, GENEVE, "geneve") \
+ _ (19, NVGRE, "nvgre") \
+ _ (20, GTPU, "gtpu") \
+ _ (21, ESP, "esp") \
+ _ (22, L2TPV3, "l2tpv3") \
+ _ (60, L4_DST_ONLY, "l4-dst-only") \
+ _ (61, L4_SRC_ONLY, "l4-src-only") \
+ _ (62, L3_DST_ONLY, "l3-dst-only") \
+ _ (63, L3_SRC_ONLY, "l3-src-only")
+
+typedef enum
+{
+#define _(v, n, s) VNET_FLOW_RSS_TYPES_##n = v,
+ foreach_flow_rss_types
+#undef _
+} vnet_flow_rss_types_t;
#define foreach_rss_function \
_(DEFAULT, "default") \
@@ -181,6 +240,12 @@ typedef struct
u8 mask;
} ip_prot_and_mask_t;
+typedef struct
+{
+ u8 spec[1024];
+ u8 mask[1024];
+} generic_pattern_t;
+
typedef enum
{
VNET_FLOW_TYPE_UNKNOWN,
@@ -233,6 +298,10 @@ typedef struct
/* queue for VNET_FLOW_ACTION_REDIRECT_TO_QUEUE */
u32 redirect_queue;
+ /* start queue index and queue numbers for RSS queue group */
+ u32 queue_index;
+ u32 queue_num;
+
/* buffer offset for VNET_FLOW_ACTION_BUFFER_ADVANCE */
i32 buffer_advance;
diff --git a/src/vnet/flow/flow_api.c b/src/vnet/flow/flow_api.c
index 6f08f0314a4..bfe97ec2978 100644
--- a/src/vnet/flow/flow_api.c
+++ b/src/vnet/flow/flow_api.c
@@ -215,6 +215,16 @@ ipv4_gtpc_flow_convert (vl_api_flow_ip4_gtpc_t * vl_api_flow,
f->teid = ntohl (vl_api_flow->teid);
}
+static inline void
+generic_flow_convert (vl_api_flow_generic_t *vl_api_flow,
+ vnet_flow_generic_t *f)
+{
+ clib_memcpy (f->pattern.spec, vl_api_flow->pattern.spec,
+ sizeof (vl_api_flow->pattern.spec));
+ clib_memcpy (f->pattern.mask, vl_api_flow->pattern.mask,
+ sizeof (vl_api_flow->pattern.mask));
+}
+
static void
vl_api_flow_add_t_handler (vl_api_flow_add_t * mp)
{
@@ -289,12 +299,95 @@ vl_api_flow_add_t_handler (vl_api_flow_add_t * mp)
rv = vnet_flow_add (vnm, &flow, &flow_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_FLOW_ADD_REPLY,
({
rmp->flow_index = ntohl (flow_index);
}));
- /* *INDENT-ON* */
+}
+
+static void
+vl_api_flow_add_v2_t_handler (vl_api_flow_add_v2_t *mp)
+{
+ vl_api_flow_add_v2_reply_t *rmp;
+ int rv = 0;
+ vnet_flow_t flow;
+ u32 flow_index = ~0;
+ vl_api_flow_rule_v2_t *f = &mp->flow;
+
+ vnet_main_t *vnm = vnet_get_main ();
+
+ flow.type = ntohl (f->type);
+ flow.actions = ntohl (f->actions);
+ flow.mark_flow_id = ntohl (f->mark_flow_id);
+ flow.redirect_node_index = ntohl (f->redirect_node_index);
+ flow.redirect_device_input_next_index =
+ ntohl (f->redirect_device_input_next_index);
+ flow.redirect_queue = ntohl (f->redirect_queue);
+ flow.buffer_advance = ntohl (f->buffer_advance);
+ flow.queue_index = ntohl (f->queue_index);
+ flow.queue_num = ntohl (f->queue_num);
+ flow.rss_types = clib_net_to_host_u64 (f->rss_types);
+ flow.rss_fun = ntohl (f->rss_fun);
+
+ switch (flow.type)
+ {
+ case VNET_FLOW_TYPE_IP4:
+ ipv4_flow_convert (&f->flow.ip4, &flow.ip4);
+ break;
+ case VNET_FLOW_TYPE_IP6:
+ ipv6_flow_convert (&f->flow.ip6, &flow.ip6);
+ break;
+ case VNET_FLOW_TYPE_IP4_N_TUPLE:
+ ipv4_n_tuple_flow_convert (&f->flow.ip4_n_tuple, &flow.ip4_n_tuple);
+ break;
+ case VNET_FLOW_TYPE_IP6_N_TUPLE:
+ ipv6_n_tuple_flow_convert (&f->flow.ip6_n_tuple, &flow.ip6_n_tuple);
+ break;
+ case VNET_FLOW_TYPE_IP4_N_TUPLE_TAGGED:
+ ipv4_n_tuple_tagged_flow_convert (&f->flow.ip4_n_tuple_tagged,
+ &flow.ip4_n_tuple_tagged);
+ break;
+ case VNET_FLOW_TYPE_IP6_N_TUPLE_TAGGED:
+ ipv6_n_tuple_tagged_flow_convert (&f->flow.ip6_n_tuple_tagged,
+ &flow.ip6_n_tuple_tagged);
+ break;
+ case VNET_FLOW_TYPE_IP4_L2TPV3OIP:
+ ipv4_l2tpv3oip_flow_convert (&f->flow.ip4_l2tpv3oip,
+ &flow.ip4_l2tpv3oip);
+ break;
+ case VNET_FLOW_TYPE_IP4_IPSEC_ESP:
+ ipv4_ipsec_esp_flow_convert (&f->flow.ip4_ipsec_esp,
+ &flow.ip4_ipsec_esp);
+ break;
+ case VNET_FLOW_TYPE_IP4_IPSEC_AH:
+ ipv4_ipsec_ah_flow_convert (&f->flow.ip4_ipsec_ah, &flow.ip4_ipsec_ah);
+ break;
+ case VNET_FLOW_TYPE_IP4_VXLAN:
+ ipv4_vxlan_flow_convert (&f->flow.ip4_vxlan, &flow.ip4_vxlan);
+ break;
+ case VNET_FLOW_TYPE_IP6_VXLAN:
+ ipv6_vxlan_flow_convert (&f->flow.ip6_vxlan, &flow.ip6_vxlan);
+ break;
+ case VNET_FLOW_TYPE_IP4_GTPU:
+ ipv4_gtpu_flow_convert (&f->flow.ip4_gtpu, &flow.ip4_gtpu);
+ break;
+ case VNET_FLOW_TYPE_IP4_GTPC:
+ ipv4_gtpc_flow_convert (&f->flow.ip4_gtpc, &flow.ip4_gtpc);
+ break;
+ case VNET_FLOW_TYPE_GENERIC:
+ generic_flow_convert (&f->flow.generic, &flow.generic);
+ break;
+ default:
+ rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
+ goto out;
+ break;
+ }
+
+ rv = vnet_flow_add (vnm, &flow, &flow_index);
+
+out:
+ REPLY_MACRO2 (VL_API_FLOW_ADD_V2_REPLY,
+ ({ rmp->flow_index = ntohl (flow_index); }));
}
static void
diff --git a/src/vnet/flow/flow_cli.c b/src/vnet/flow/flow_cli.c
index e2a3141c551..e4b73717241 100644
--- a/src/vnet/flow/flow_cli.c
+++ b/src/vnet/flow/flow_cli.c
@@ -138,13 +138,11 @@ format_flow_enabled_hw (u8 * s, va_list * args)
u32 hw_if_index;
uword private_data;
vnet_main_t *vnm = vnet_get_main ();
- /* *INDENT-OFF* */
hash_foreach (hw_if_index, private_data, f->private_data,
({
t = format (t, "%s%U", t ? ", " : "",
format_vnet_hw_if_index_name, vnm, hw_if_index);
}));
- /* *INDENT-ON* */
s = format (s, "%v", t);
vec_free (t);
return s;
@@ -223,7 +221,11 @@ show_flow_entry (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "%-10s: %u", "index", f->index);
vlib_cli_output (vm, "%-10s: %s", "type", flow_type_strings[f->type]);
vlib_cli_output (vm, "%-10s: %U", "match", format_flow, f);
- /* *INDENT-OFF* */
+ if (f->type == VNET_FLOW_TYPE_GENERIC)
+ {
+ vlib_cli_output (vm, "%s: %s", "spec", f->generic.pattern.spec);
+ vlib_cli_output (vm, "%s: %s", "mask", f->generic.pattern.mask);
+ }
hash_foreach (hw_if_index, private_data, f->private_data,
({
hi = vnet_get_hw_interface (vnm, hw_if_index);
@@ -234,28 +236,28 @@ show_flow_entry (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, " %U\n", dev_class->format_flow,
hi->dev_instance, f->index, private_data);
}));
- /* *INDENT-ON* */
return 0;
}
no_args:
- /* *INDENT-OFF* */
pool_foreach (f, fm->global_flow_pool)
{
vlib_cli_output (vm, "%U\n", format_flow, f);
+ if (f->type == VNET_FLOW_TYPE_GENERIC)
+ {
+ vlib_cli_output (vm, "%s: %s", "spec", f->generic.pattern.spec);
+ vlib_cli_output (vm, "%s: %s", "mask", f->generic.pattern.mask);
+ }
}
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_flow_entry_command, static) = {
.path = "show flow entry",
.short_help = "show flow entry [index <index>]",
.function = show_flow_entry,
};
-/* *INDENT-ON* */
static clib_error_t *
show_flow_ranges (vlib_main_t * vm, unformat_input_t * input,
@@ -266,22 +268,18 @@ show_flow_ranges (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "%8s %8s %s", "Start", "Count", "Owner");
- /* *INDENT-OFF* */
vec_foreach (r, fm->ranges)
{
vlib_cli_output (vm, "%8u %8u %s", r->start, r->count, r->owner);
};
- /* *INDENT-ON* */
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_flow_ranges_command, static) = {
.path = "show flow ranges",
.short_help = "show flow ranges",
.function = show_flow_ranges,
};
-/* *INDENT-ON* */
static clib_error_t *
show_flow_interface (vlib_main_t * vm, unformat_input_t * input,
@@ -319,13 +317,11 @@ show_flow_interface (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_flow_interface_command, static) = {
.path = "show flow interface",
.short_help = "show flow interface <interface name>",
.function = show_flow_interface,
};
-/* *INDENT-ON* */
static clib_error_t *
test_flow (vlib_main_t * vm, unformat_input_t * input,
@@ -354,16 +350,18 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
int rv;
u32 teid = 0, session_id = 0, spi = 0;
u32 vni = 0;
+ u32 queue_start = 0, queue_end = 0;
vnet_flow_type_t type = VNET_FLOW_TYPE_UNKNOWN;
- ip4_address_and_mask_t ip4s = { };
- ip4_address_and_mask_t ip4d = { };
- ip6_address_and_mask_t ip6s = { };
- ip6_address_and_mask_t ip6d = { };
- ip_port_and_mask_t sport = { };
- ip_port_and_mask_t dport = { };
- ip_prot_and_mask_t protocol = { };
+ ip4_address_and_mask_t ip4s = {}, in_ip4s = {};
+ ip4_address_and_mask_t ip4d = {}, in_ip4d = {};
+ ip6_address_and_mask_t ip6s = {}, in_ip6s = {};
+ ip6_address_and_mask_t ip6d = {}, in_ip6d = {};
+ ip_port_and_mask_t sport = {}, in_sport = {};
+ ip_port_and_mask_t dport = {}, in_dport = {};
+ ip_prot_and_mask_t protocol = {}, in_proto = {};
u16 eth_type;
- bool tcp_udp_port_set = false;
+ bool inner_ip4_set = false, inner_ip6_set = false;
+ bool tcp_udp_port_set = false, inner_port_set = false;
bool gtpc_set = false;
bool gtpu_set = false;
bool vni_set = false;
@@ -371,6 +369,8 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
bool ipsec_esp_set = false, ipsec_ah_set = false;
u8 *rss_type[3] = { };
u8 *type_str = NULL;
+ u8 *spec = NULL;
+ u8 *mask = NULL;
clib_memset (&flow, 0, sizeof (vnet_flow_t));
flow.index = ~0;
@@ -389,6 +389,10 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
action = FLOW_ENABLE;
else if (unformat (line_input, "disable"))
action = FLOW_DISABLE;
+ else if (unformat (line_input, "spec %s", &spec))
+ ;
+ else if (unformat (line_input, "mask %s", &mask))
+ ;
else if (unformat (line_input, "eth-type %U",
unformat_ethernet_type_host_byte_order, &eth_type))
flow_class = FLOW_ETHERNET_CLASS;
@@ -398,12 +402,24 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
else if (unformat (line_input, "dst-ip %U",
unformat_ip4_address_and_mask, &ip4d))
flow_class = FLOW_IPV4_CLASS;
+ else if (unformat (line_input, "in-src-ip %U",
+ unformat_ip4_address_and_mask, &in_ip4s))
+ inner_ip4_set = true;
+ else if (unformat (line_input, "in-dst-ip %U",
+ unformat_ip4_address_and_mask, &in_ip4d))
+ inner_ip4_set = true;
else if (unformat (line_input, "ip6-src-ip %U",
unformat_ip6_address_and_mask, &ip6s))
flow_class = FLOW_IPV6_CLASS;
else if (unformat (line_input, "ip6-dst-ip %U",
unformat_ip6_address_and_mask, &ip6d))
flow_class = FLOW_IPV6_CLASS;
+ else if (unformat (line_input, "in-ip6-src-ip %U",
+ unformat_ip6_address_and_mask, &in_ip6s))
+ inner_ip6_set = true;
+ else if (unformat (line_input, "in-ip6-dst-ip %U",
+ unformat_ip6_address_and_mask, &in_ip6d))
+ inner_ip6_set = true;
else if (unformat (line_input, "src-port %U", unformat_ip_port_and_mask,
&sport))
tcp_udp_port_set = true;
@@ -415,6 +431,15 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
(line_input, "proto %U", unformat_ip_protocol_and_mask,
&protocol))
;
+ else if (unformat (line_input, "in-src-port %U",
+ unformat_ip_port_and_mask, &in_sport))
+ inner_port_set = true;
+ else if (unformat (line_input, "in-dst-port %U",
+ unformat_ip_port_and_mask, &in_dport))
+ inner_port_set = true;
+ else if (unformat (line_input, "in-proto %U",
+ unformat_ip_protocol_and_mask, &in_proto))
+ ;
else if (unformat (line_input, "gtpc teid %u", &teid))
gtpc_set = true;
else if (unformat (line_input, "gtpu teid %u", &teid))
@@ -506,6 +531,21 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
#undef _
flow.actions |= VNET_FLOW_ACTION_RSS;
}
+ else if (unformat (line_input, "rss queues"))
+ {
+ if (unformat (line_input, "%d to %d", &queue_start, &queue_end))
+ ;
+ else
+ {
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ }
+
+ flow.queue_index = queue_start;
+ flow.queue_num = queue_end - queue_start + 1;
+
+ flow.actions |= VNET_FLOW_ACTION_RSS;
+ }
else if (unformat (line_input, "%U", unformat_vnet_hw_interface, vnm,
&hw_if_index))
;
@@ -560,6 +600,22 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
type = VNET_FLOW_TYPE_IP4_IPSEC_AH;
else if (tcp_udp_port_set)
type = VNET_FLOW_TYPE_IP4_N_TUPLE;
+ else if (inner_ip4_set)
+ {
+ if (inner_port_set)
+ type = VNET_FLOW_TYPE_IP4_IP4_N_TUPLE;
+ else
+ type = VNET_FLOW_TYPE_IP4_IP4;
+ protocol.prot = IP_PROTOCOL_IP_IN_IP;
+ }
+ else if (inner_ip6_set)
+ {
+ if (inner_port_set)
+ type = VNET_FLOW_TYPE_IP4_IP6_N_TUPLE;
+ else
+ type = VNET_FLOW_TYPE_IP4_IP6;
+ protocol.prot = IP_PROTOCOL_IPV6;
+ }
else
type = VNET_FLOW_TYPE_IP4;
break;
@@ -568,11 +624,32 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
type = VNET_FLOW_TYPE_IP6_N_TUPLE;
else if (vni_set)
type = VNET_FLOW_TYPE_IP6_VXLAN;
+ else if (inner_ip4_set)
+ {
+ if (inner_port_set)
+ type = VNET_FLOW_TYPE_IP6_IP4_N_TUPLE;
+ else
+ type = VNET_FLOW_TYPE_IP6_IP4;
+ protocol.prot = IP_PROTOCOL_IP_IN_IP;
+ }
+ else if (inner_ip6_set)
+ {
+ if (inner_port_set)
+ type = VNET_FLOW_TYPE_IP6_IP6_N_TUPLE;
+ else
+ type = VNET_FLOW_TYPE_IP6_IP6;
+ protocol.prot = IP_PROTOCOL_IPV6;
+ }
else
type = VNET_FLOW_TYPE_IP6;
break;
default:
+ if (spec && mask)
+ {
+ type = VNET_FLOW_TYPE_GENERIC;
+ break;
+ }
return clib_error_return (0,
"Please specify a supported flow type");
}
@@ -623,6 +700,30 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
case IP_PROTOCOL_IPSEC_AH:
flow.ip4_ipsec_esp.spi = spi;
break;
+ case IP_PROTOCOL_IP_IN_IP:
+ clib_memcpy (&flow.ip4_ip4.in_src_addr, &in_ip4s,
+ sizeof (ip4_address_and_mask_t));
+ clib_memcpy (&flow.ip4_ip4.in_dst_addr, &in_ip4d,
+ sizeof (ip4_address_and_mask_t));
+ if (type == VNET_FLOW_TYPE_IP4_IP4_N_TUPLE)
+ {
+ flow.ip4_ip4.in_protocol.prot = in_proto.prot;
+ flow.ip4_ip4_n_tuple.in_src_port = in_sport;
+ flow.ip4_ip4_n_tuple.in_dst_port = in_dport;
+ }
+ break;
+ case IP_PROTOCOL_IPV6:
+ clib_memcpy (&flow.ip4_ip6.in_src_addr, &in_ip6s,
+ sizeof (ip6_address_and_mask_t));
+ clib_memcpy (&flow.ip4_ip6.in_dst_addr, &in_ip6d,
+ sizeof (ip6_address_and_mask_t));
+ if (type == VNET_FLOW_TYPE_IP4_IP6_N_TUPLE)
+ {
+ flow.ip4_ip6.in_protocol.prot = in_proto.prot;
+ flow.ip4_ip6_n_tuple.in_src_port = in_sport;
+ flow.ip4_ip6_n_tuple.in_dst_port = in_dport;
+ }
+ break;
default:
break;
}
@@ -656,10 +757,41 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
if (type == VNET_FLOW_TYPE_IP6_VXLAN)
flow.ip6_vxlan.vni = vni;
break;
+ case IP_PROTOCOL_IP_IN_IP:
+ clib_memcpy (&flow.ip6_ip4.in_src_addr, &in_ip4s,
+ sizeof (ip4_address_and_mask_t));
+ clib_memcpy (&flow.ip6_ip4.in_dst_addr, &in_ip4d,
+ sizeof (ip4_address_and_mask_t));
+ if (type == VNET_FLOW_TYPE_IP6_IP4_N_TUPLE)
+ {
+ flow.ip6_ip4.in_protocol.prot = in_proto.prot;
+ flow.ip6_ip4_n_tuple.in_src_port = in_sport;
+ flow.ip6_ip4_n_tuple.in_dst_port = in_dport;
+ }
+ break;
+ case IP_PROTOCOL_IPV6:
+ clib_memcpy (&flow.ip6_ip6.in_src_addr, &in_ip6s,
+ sizeof (ip6_address_and_mask_t));
+ clib_memcpy (&flow.ip6_ip6.in_dst_addr, &in_ip6d,
+ sizeof (ip6_address_and_mask_t));
+ if (type == VNET_FLOW_TYPE_IP6_IP6_N_TUPLE)
+ {
+ flow.ip6_ip6.in_protocol.prot = in_proto.prot;
+ flow.ip6_ip6_n_tuple.in_src_port = in_sport;
+ flow.ip6_ip6_n_tuple.in_dst_port = in_dport;
+ }
+ break;
default:
break;
}
}
+ if (type == VNET_FLOW_TYPE_GENERIC)
+ {
+ clib_memcpy (flow.generic.pattern.spec, spec,
+ sizeof (flow.generic.pattern.spec));
+ clib_memcpy (flow.generic.pattern.mask, mask,
+ sizeof (flow.generic.pattern.mask));
+ }
flow.type = type;
rv = vnet_flow_add (vnm, &flow, &flow_index);
@@ -687,22 +819,22 @@ test_flow (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_flow_command, static) = {
- .path = "test flow",
- .short_help = "test flow [add|del|enable|disable] [index <id>] "
- "[src-ip <ip-addr/mask>] [dst-ip <ip-addr/mask>] "
- "[ip6-src-ip <ip-addr/mask>] [ip6-dst-ip <ip-addr/mask>] "
- "[src-port <port/mask>] [dst-port <port/mask>] "
- "[proto <ip-proto>] "
- "[gtpc teid <teid>] [gtpu teid <teid>] [vxlan <vni>] "
- "[session id <session>] [spi <spi>]"
- "[next-node <node>] [mark <id>] [buffer-advance <len>] "
- "[redirect-to-queue <queue>] [drop] "
- "[rss function <name>] [rss types <flow type>]",
- .function = test_flow,
+ .path = "test flow",
+ .short_help = "test flow [add|del|enable|disable] [index <id>] "
+ "[src-ip <ip-addr/mask>] [dst-ip <ip-addr/mask>] "
+ "[ip6-src-ip <ip-addr/mask>] [ip6-dst-ip <ip-addr/mask>] "
+ "[src-port <port/mask>] [dst-port <port/mask>] "
+ "[proto <ip-proto>] "
+ "[gtpc teid <teid>] [gtpu teid <teid>] [vxlan <vni>] "
+ "[session id <session>] [spi <spi>]"
+ "[spec <spec string>] [mask <mask string>]"
+ "[next-node <node>] [mark <id>] [buffer-advance <len>] "
+ "[redirect-to-queue <queue>] [drop] "
+ "[rss function <name>] [rss types <flow type>]"
+ "[rss queues <queue_start> to <queue_end>]",
+ .function = test_flow,
};
-/* *INDENT-ON* */
static u8 *
format_flow_match_element (u8 * s, va_list * args)
diff --git a/src/vnet/flow/flow_types.api b/src/vnet/flow/flow_types.api
index 86f7ce128cb..1696001d975 100644
--- a/src/vnet/flow/flow_types.api
+++ b/src/vnet/flow/flow_types.api
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-option version = "0.0.3";
+option version = "0.0.4";
import "vnet/ethernet/ethernet_types.api";
import "vnet/ip/ip_types.api";
@@ -36,6 +36,25 @@ enum flow_type
FLOW_TYPE_IP4_GTPU,
};
+enum flow_type_v2
+{
+ FLOW_TYPE_ETHERNET_V2 = 1,
+ FLOW_TYPE_IP4_V2,
+ FLOW_TYPE_IP6_V2,
+ FLOW_TYPE_IP4_L2TPV3OIP_V2,
+ FLOW_TYPE_IP4_IPSEC_ESP_V2,
+ FLOW_TYPE_IP4_IPSEC_AH_V2,
+ FLOW_TYPE_IP4_N_TUPLE_V2,
+ FLOW_TYPE_IP6_N_TUPLE_V2,
+ FLOW_TYPE_IP4_N_TUPLE_TAGGED_V2,
+ FLOW_TYPE_IP6_N_TUPLE_TAGGED_V2,
+ FLOW_TYPE_IP4_VXLAN_V2,
+ FLOW_TYPE_IP6_VXLAN_V2,
+ FLOW_TYPE_IP4_GTPC_V2,
+ FLOW_TYPE_IP4_GTPU_V2,
+ FLOW_TYPE_GENERIC_V2,
+};
+
enum flow_action
{
FLOW_ACTION_COUNT = 1,
@@ -46,6 +65,31 @@ enum flow_action
FLOW_ACTION_DROP = 64,
};
+enum flow_action_v2
+{
+ FLOW_ACTION_COUNT_V2 = 1,
+ FLOW_ACTION_MARK_V2 = 2,
+ FLOW_ACTION_BUFFER_ADVANCE_V2 = 4,
+ FLOW_ACTION_REDIRECT_TO_NODE_V2 = 8,
+ FLOW_ACTION_REDIRECT_TO_QUEUE_V2 = 16,
+ FLOW_ACTION_RSS_V2 = 32,
+ FLOW_ACTION_DROP_V2 = 64,
+};
+
+enum rss_function
+{
+ RSS_FUNC_DEFAULT,
+ RSS_FUNC_TOEPLITZ,
+ RSS_FUNC_SIMPLE_XOR,
+ RSS_FUNC_SYMMETRIC_TOEPLITZ,
+};
+
+typedef generic_pattern
+{
+ u8 spec[1024];
+ u8 mask[1024];
+};
+
typedef ip_port_and_mask
{
u16 port;
@@ -193,6 +237,12 @@ typedef flow_ip4_gtpu
u32 teid;
};
+typedef flow_generic
+{
+ i32 foo;
+ vl_api_generic_pattern_t pattern;
+};
+
union flow
{
vl_api_flow_ethernet_t ethernet;
@@ -211,6 +261,25 @@ union flow
vl_api_flow_ip4_gtpu_t ip4_gtpu;
};
+union flow_v2
+{
+ vl_api_flow_ethernet_t ethernet;
+ vl_api_flow_ip4_t ip4;
+ vl_api_flow_ip6_t ip6;
+ vl_api_flow_ip4_l2tpv3oip_t ip4_l2tpv3oip;
+ vl_api_flow_ip4_ipsec_esp_t ip4_ipsec_esp;
+ vl_api_flow_ip4_ipsec_ah_t ip4_ipsec_ah;
+ vl_api_flow_ip4_n_tuple_t ip4_n_tuple;
+ vl_api_flow_ip6_n_tuple_t ip6_n_tuple;
+ vl_api_flow_ip4_n_tuple_tagged_t ip4_n_tuple_tagged;
+ vl_api_flow_ip6_n_tuple_tagged_t ip6_n_tuple_tagged;
+ vl_api_flow_ip4_vxlan_t ip4_vxlan;
+ vl_api_flow_ip6_vxlan_t ip6_vxlan;
+ vl_api_flow_ip4_gtpc_t ip4_gtpc;
+ vl_api_flow_ip4_gtpu_t ip4_gtpu;
+ vl_api_flow_generic_t generic;
+};
+
/* main flow struct */
typedef flow_rule
{
@@ -240,3 +309,41 @@ typedef flow_rule
vl_api_flow_t flow;
};
+/* main flow struct */
+typedef flow_rule_v2
+{
+ /* flow type */
+ vl_api_flow_type_v2_t type;
+
+ /* flow index */
+ u32 index;
+
+ /* bitmap of flow actions (FLOW_ACTION_*) */
+ vl_api_flow_action_v2_t actions;
+
+ /* flow id for VNET_FLOW_ACTION_MARK */
+ u32 mark_flow_id;
+
+ /* node index and next index for FLOW_ACTION_REDIRECT_TO_NODE */
+ u32 redirect_node_index;
+ u32 redirect_device_input_next_index;
+
+ /* queue for FLOW_ACTION_REDIRECT_TO_QUEUE */
+ u32 redirect_queue;
+
+ /* start queue index and queue numbers for RSS queue group with FLOW_ACTION_RSS */
+ u32 queue_index;
+ u32 queue_num;
+
+ /* buffer offset for FLOW_ACTION_BUFFER_ADVANCE */
+ i32 buffer_advance;
+
+ /* RSS types, including IPv4/IPv6/TCP/UDP... */
+ u64 rss_types;
+
+ /* RSS functions, including IPv4/IPv6/TCP/UDP... */
+ vl_api_rss_function_t rss_fun;
+
+ /* flow enum */
+ vl_api_flow_v2_t flow;
+};
diff --git a/src/vnet/gre/packet.h b/src/vnet/gre/packet.h
index bbd67d565c5..bbda2df3f68 100644
--- a/src/vnet/gre/packet.h
+++ b/src/vnet/gre/packet.h
@@ -138,7 +138,6 @@ typedef struct
This field is platform dependent.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u32 seq_num;
union
@@ -158,7 +157,6 @@ typedef CLIB_PACKED (struct {
erspan_t2_t erspan;
}) erspan_t2_header_t;
-/* *INDENT-ON* */
/* u64 template for ERSPAN type 2 header with both EN bits set */
#define ERSPAN_HDR2 0x1000180000000000ul
diff --git a/src/vnet/gso/FEATURE.yaml b/src/vnet/gso/FEATURE.yaml
index d3db0cc23e3..5f6275caca2 100644
--- a/src/vnet/gso/FEATURE.yaml
+++ b/src/vnet/gso/FEATURE.yaml
@@ -1,6 +1,6 @@
---
name: VNET GSO
-maintainer: ayourtch@gmail.com sykazmi@cisco.com
+maintainer: ayourtch@gmail.com mohsin.kazmi14@gmail.com
features:
- Basic GSO support
- GSO for VLAN tagged packets
diff --git a/src/vnet/gso/cli.c b/src/vnet/gso/cli.c
index 060ce812fad..11dbaad728f 100644
--- a/src/vnet/gso/cli.c
+++ b/src/vnet/gso/cli.c
@@ -76,13 +76,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_feature_gso_command, static) = {
.path = "set interface feature gso",
.short_help = "set interface feature gso <intfc> [enable | disable]",
.function = set_interface_feature_gso_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/gso/gro_func.h b/src/vnet/gso/gro_func.h
index b29d4a5f944..e2e4e93850b 100644
--- a/src/vnet/gso/gro_func.h
+++ b/src/vnet/gso/gro_func.h
@@ -21,9 +21,14 @@
#include <vnet/gso/hdr_offset_parser.h>
#include <vnet/ip/ip4.h>
#include <vnet/ip/ip6.h>
+#include <vnet/ip/ip6_inlines.h>
#include <vnet/udp/udp_packet.h>
#include <vnet/tcp/tcp_packet.h>
#include <vnet/vnet.h>
+#include <vnet/interface.h>
+
+#define GRO_MIN_PACKET_SIZE 256
+#define GRO_PADDED_PACKET_SIZE 64
static_always_inline u8
gro_is_bad_packet (vlib_buffer_t * b, u8 flags, i16 l234_sz)
@@ -160,6 +165,34 @@ gro_validate_checksum (vlib_main_t * vm, vlib_buffer_t * b0,
}
static_always_inline u32
+gro_fix_padded_packet_len (vlib_buffer_t *b0, generic_header_offset_t *gho0,
+ ip4_header_t *ip4_0, ip6_header_t *ip6_0,
+ u32 pkt_len0, u16 l234_sz0)
+{
+ u32 tcp_payload_len0 = 0;
+ if (gho0->gho_flags & GHO_F_IP4)
+ {
+ tcp_payload_len0 = clib_net_to_host_u16 (ip4_0->length) -
+ ip4_header_bytes (ip4_0) - gho0->l4_hdr_sz;
+ }
+ else
+ {
+ tcp_payload_len0 =
+ clib_net_to_host_u16 (ip6_0->payload_length) - gho0->l4_hdr_sz;
+ }
+
+ ASSERT (l234_sz0 + tcp_payload_len0 <= pkt_len0);
+
+ if (PREDICT_FALSE (l234_sz0 + tcp_payload_len0 < pkt_len0))
+ {
+ /* small packet with padding at the end, remove padding */
+ b0->current_length = l234_sz0 + tcp_payload_len0;
+ pkt_len0 = b0->current_length;
+ }
+ return pkt_len0;
+}
+
+static_always_inline u32
gro_get_packet_data (vlib_main_t *vm, vlib_buffer_t *b0,
generic_header_offset_t *gho0, gro_flow_key_t *flow_key0,
u8 is_l2)
@@ -222,6 +255,11 @@ gro_get_packet_data (vlib_main_t *vm, vlib_buffer_t *b0,
if (PREDICT_FALSE (pkt_len0 >= TCP_MAX_GSO_SZ))
return 0;
+ if (PREDICT_FALSE (pkt_len0 <= GRO_PADDED_PACKET_SIZE))
+ {
+ pkt_len0 =
+ gro_fix_padded_packet_len (b0, gho0, ip4_0, ip6_0, pkt_len0, l234_sz0);
+ }
return pkt_len0;
}
@@ -264,8 +302,8 @@ gro_coalesce_buffers (vlib_main_t *vm, vlib_buffer_t *b0, vlib_buffer_t *b1,
pkt_len0 = vlib_buffer_length_in_chain (vm, b0);
pkt_len1 = vlib_buffer_length_in_chain (vm, b1);
- if (((gho0.gho_flags & GHO_F_TCP) == 0)
- || ((gho1.gho_flags & GHO_F_TCP) == 0))
+ if (((gho0.gho_flags & GHO_F_TCP) == 0 || pkt_len0 <= GRO_MIN_PACKET_SIZE) ||
+ ((gho1.gho_flags & GHO_F_TCP) == 0 || pkt_len1 <= GRO_MIN_PACKET_SIZE))
return 0;
ip4_0 =
@@ -346,6 +384,7 @@ gro_fixup_header (vlib_main_t *vm, vlib_buffer_t *b0, u32 ack_number, u8 is_l2)
1 /* is_ip6 */ );
vnet_buffer2 (b0)->gso_size = b0->current_length - gho0.hdr_sz;
+ vnet_buffer (b0)->l2_hdr_offset = b0->current_data;
if (gho0.gho_flags & GHO_F_IP4)
{
@@ -354,6 +393,7 @@ gro_fixup_header (vlib_main_t *vm, vlib_buffer_t *b0, u32 ack_number, u8 is_l2)
ip4->length =
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
gho0.l3_hdr_offset);
+ vnet_buffer (b0)->l3_hdr_offset = (u8 *) ip4 - b0->data;
b0->flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP4);
vnet_buffer_offload_flags_set (b0, (VNET_BUFFER_OFFLOAD_F_TCP_CKSUM |
VNET_BUFFER_OFFLOAD_F_IP_CKSUM));
@@ -365,12 +405,15 @@ gro_fixup_header (vlib_main_t *vm, vlib_buffer_t *b0, u32 ack_number, u8 is_l2)
ip6->payload_length =
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
gho0.l4_hdr_offset);
+ vnet_buffer (b0)->l3_hdr_offset = (u8 *) ip6 - b0->data;
b0->flags |= (VNET_BUFFER_F_GSO | VNET_BUFFER_F_IS_IP6);
vnet_buffer_offload_flags_set (b0, VNET_BUFFER_OFFLOAD_F_TCP_CKSUM);
}
tcp_header_t *tcp0 =
(tcp_header_t *) (vlib_buffer_get_current (b0) + gho0.l4_hdr_offset);
+ vnet_buffer (b0)->l4_hdr_offset = (u8 *) tcp0 - b0->data;
+ vnet_buffer2 (b0)->gso_l4_hdr_sz = tcp_header_bytes (tcp0);
tcp0->ack_number = ack_number;
b0->flags &= ~VLIB_BUFFER_IS_TRACED;
}
@@ -407,9 +450,9 @@ vnet_gro_flow_table_flush (vlib_main_t * vm, gro_flow_table_t * flow_table,
}
static_always_inline void
-vnet_gro_flow_table_schedule_node_on_dispatcher (vlib_main_t * vm,
- gro_flow_table_t *
- flow_table)
+vnet_gro_flow_table_schedule_node_on_dispatcher (vlib_main_t *vm,
+ vnet_hw_if_tx_queue_t *txq,
+ gro_flow_table_t *flow_table)
{
if (gro_flow_table_is_timeout (vm, flow_table))
{
@@ -420,9 +463,13 @@ vnet_gro_flow_table_schedule_node_on_dispatcher (vlib_main_t * vm,
{
u32 node_index = flow_table->node_index;
vlib_frame_t *f = vlib_get_frame_to_node (vm, node_index);
+ vnet_hw_if_tx_frame_t *ft = vlib_frame_scalar_args (f);
u32 *f_to = vlib_frame_vector_args (f);
u32 i = 0;
+ ft->shared_queue = txq->shared_queue;
+ ft->queue_id = txq->queue_id;
+
while (i < n_to)
{
f_to[f->n_vectors] = to[i];
@@ -483,7 +530,8 @@ vnet_gro_flow_table_inline (vlib_main_t * vm, gro_flow_table_t * flow_table,
}
tcp0 = (tcp_header_t *) (vlib_buffer_get_current (b0) + gho0.l4_hdr_offset);
- if (PREDICT_TRUE ((tcp0->flags & TCP_FLAG_PSH) == 0))
+ if (PREDICT_TRUE (((tcp0->flags & TCP_FLAG_PSH) == 0) &&
+ (pkt_len0 > GRO_MIN_PACKET_SIZE)))
gro_flow = gro_flow_table_find_or_add_flow (flow_table, &flow_key0);
else
{
diff --git a/src/vnet/gso/gso.h b/src/vnet/gso/gso.h
index 8e174dfd1f6..dee5da5c70b 100644
--- a/src/vnet/gso/gso.h
+++ b/src/vnet/gso/gso.h
@@ -17,6 +17,8 @@
#define included_gso_h
#include <vnet/vnet.h>
+#include <vnet/gso/hdr_offset_parser.h>
+#include <vnet/ip/ip_psh_cksum.h>
typedef struct
{
@@ -28,6 +30,277 @@ typedef struct
extern gso_main_t gso_main;
int vnet_sw_interface_gso_enable_disable (u32 sw_if_index, u8 enable);
+u32 gso_segment_buffer (vlib_main_t *vm, vnet_interface_per_thread_data_t *ptd,
+ u32 bi, vlib_buffer_t *b, generic_header_offset_t *gho,
+ u32 n_bytes_b, u8 is_l2, u8 is_ip6);
+
+static_always_inline void
+gso_init_bufs_from_template_base (vlib_buffer_t **bufs, vlib_buffer_t *b0,
+ u32 flags, u16 n_bufs, u16 hdr_sz)
+{
+ u32 i = n_bufs;
+ while (i >= 6)
+ {
+ /* prefetches */
+ CLIB_PREFETCH (bufs[2], 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (bufs[3], 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ vlib_prefetch_buffer_data (bufs[4], LOAD);
+ vlib_prefetch_buffer_data (bufs[5], LOAD);
+
+ /* copying objects from cacheline 0 */
+ bufs[0]->current_data = 0;
+ bufs[1]->current_data = 0;
+
+ bufs[0]->current_length = hdr_sz;
+ bufs[1]->current_length = hdr_sz;
+
+ bufs[0]->flags = bufs[1]->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID | flags;
+ bufs[0]->flow_id = bufs[1]->flow_id = b0->flow_id;
+ bufs[0]->error = bufs[1]->error = b0->error;
+ bufs[0]->current_config_index = bufs[1]->current_config_index =
+ b0->current_config_index;
+
+ clib_memcpy_fast (&bufs[0]->opaque, &b0->opaque, sizeof (b0->opaque));
+ clib_memcpy_fast (&bufs[1]->opaque, &b0->opaque, sizeof (b0->opaque));
+
+ /* copying objects from cacheline 1 */
+ bufs[0]->trace_handle = b0->trace_handle;
+ bufs[1]->trace_handle = b0->trace_handle;
+
+ bufs[0]->total_length_not_including_first_buffer = 0;
+ bufs[1]->total_length_not_including_first_buffer = 0;
+
+ clib_memcpy_fast (&bufs[0]->opaque2, &b0->opaque2, sizeof (b0->opaque2));
+ clib_memcpy_fast (&bufs[1]->opaque2, &b0->opaque2, sizeof (b0->opaque2));
+
+ /* copying data */
+ clib_memcpy_fast (bufs[0]->data, vlib_buffer_get_current (b0), hdr_sz);
+ clib_memcpy_fast (bufs[1]->data, vlib_buffer_get_current (b0), hdr_sz);
+
+ /* header offset fixup */
+ vnet_buffer (bufs[0])->l2_hdr_offset -= b0->current_data;
+ vnet_buffer (bufs[0])->l3_hdr_offset -= b0->current_data;
+ vnet_buffer (bufs[0])->l4_hdr_offset -= b0->current_data;
+ vnet_buffer2 (bufs[0])->outer_l3_hdr_offset -= b0->current_data;
+ vnet_buffer2 (bufs[0])->outer_l4_hdr_offset -= b0->current_data;
+
+ vnet_buffer (bufs[1])->l2_hdr_offset -= b0->current_data;
+ vnet_buffer (bufs[1])->l3_hdr_offset -= b0->current_data;
+ vnet_buffer (bufs[1])->l4_hdr_offset -= b0->current_data;
+ vnet_buffer2 (bufs[1])->outer_l3_hdr_offset -= b0->current_data;
+ vnet_buffer2 (bufs[1])->outer_l4_hdr_offset -= b0->current_data;
+
+ bufs += 2;
+ i -= 2;
+ }
+
+ while (i > 0)
+ {
+ /* copying objects from cacheline 0 */
+ bufs[0]->current_data = 0;
+ bufs[0]->current_length = hdr_sz;
+ bufs[0]->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID | flags;
+ bufs[0]->flow_id = b0->flow_id;
+ bufs[0]->error = b0->error;
+ bufs[0]->current_config_index = b0->current_config_index;
+ clib_memcpy_fast (&bufs[0]->opaque, &b0->opaque, sizeof (b0->opaque));
+
+ /* copying objects from cacheline 1 */
+ bufs[0]->trace_handle = b0->trace_handle;
+ bufs[0]->total_length_not_including_first_buffer = 0;
+ clib_memcpy_fast (&bufs[0]->opaque2, &b0->opaque2, sizeof (b0->opaque2));
+
+ /* copying data */
+ clib_memcpy_fast (bufs[0]->data, vlib_buffer_get_current (b0), hdr_sz);
+
+ /* header offset fixup */
+ vnet_buffer (bufs[0])->l2_hdr_offset -= b0->current_data;
+ vnet_buffer (bufs[0])->l3_hdr_offset -= b0->current_data;
+ vnet_buffer (bufs[0])->l4_hdr_offset -= b0->current_data;
+ vnet_buffer2 (bufs[0])->outer_l3_hdr_offset -= b0->current_data;
+ vnet_buffer2 (bufs[0])->outer_l4_hdr_offset -= b0->current_data;
+
+ bufs++;
+ i--;
+ }
+}
+
+static_always_inline void
+gso_fixup_segmented_buf (vlib_main_t *vm, vlib_buffer_t *b0, u32 next_tcp_seq,
+ int is_l2, u8 oflags, u16 hdr_sz, u16 l4_hdr_sz,
+ clib_ip_csum_t *c, u8 tcp_flags, u8 is_prefetch,
+ vlib_buffer_t *b1)
+{
+
+ i16 l3_hdr_offset = vnet_buffer (b0)->l3_hdr_offset;
+ i16 l4_hdr_offset = vnet_buffer (b0)->l4_hdr_offset;
+
+ ip4_header_t *ip4 = (ip4_header_t *) (b0->data + l3_hdr_offset);
+ ip6_header_t *ip6 = (ip6_header_t *) (b0->data + l3_hdr_offset);
+ tcp_header_t *tcp = (tcp_header_t *) (b0->data + l4_hdr_offset);
+
+ tcp->flags = tcp_flags;
+ tcp->seq_number = clib_host_to_net_u32 (next_tcp_seq);
+ c->odd = 0;
+
+ if (oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
+ {
+ ip4->length =
+ clib_host_to_net_u16 (b0->current_length - hdr_sz +
+ (l4_hdr_offset - l3_hdr_offset) + l4_hdr_sz);
+ ip4->checksum = 0;
+ ip4->checksum = ip4_header_checksum (ip4);
+ vnet_buffer_offload_flags_clear (b0, (VNET_BUFFER_OFFLOAD_F_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TCP_CKSUM));
+ c->sum += clib_mem_unaligned (&ip4->src_address, u32);
+ c->sum += clib_mem_unaligned (&ip4->dst_address, u32);
+ c->sum += clib_host_to_net_u32 (
+ (clib_net_to_host_u16 (ip4->length) - ip4_header_bytes (ip4)) +
+ (ip4->protocol << 16));
+ }
+ else
+ {
+ ip6->payload_length =
+ clib_host_to_net_u16 (b0->current_length - hdr_sz + l4_hdr_sz);
+ vnet_buffer_offload_flags_clear (b0, VNET_BUFFER_OFFLOAD_F_TCP_CKSUM);
+ ip6_psh_t psh = { 0 };
+ u32 *p = (u32 *) &psh;
+ psh.src = ip6->src_address;
+ psh.dst = ip6->dst_address;
+ psh.l4len = ip6->payload_length;
+ psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol);
+ for (int i = 0; i < 10; i++)
+ c->sum += p[i];
+ }
+
+ if (is_prefetch)
+ CLIB_PREFETCH (vlib_buffer_get_current (b1) + hdr_sz,
+ CLIB_CACHE_LINE_BYTES, LOAD);
+
+ clib_ip_csum_chunk (c, (u8 *) tcp, l4_hdr_sz);
+ tcp->checksum = clib_ip_csum_fold (c);
+
+ if (!is_l2 && ((oflags & VNET_BUFFER_OFFLOAD_F_TNL_MASK) == 0))
+ {
+ u32 adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+
+ ip_adjacency_t *adj0 = adj_get (adj_index0);
+
+ if (adj0->lookup_next_index == IP_LOOKUP_NEXT_MIDCHAIN &&
+ adj0->sub_type.midchain.fixup_func)
+ /* calls e.g. ipip44_fixup */
+ adj0->sub_type.midchain.fixup_func (
+ vm, adj0, b0, adj0->sub_type.midchain.fixup_data);
+ }
+}
+
+static_always_inline u32
+gso_segment_buffer_inline (vlib_main_t *vm,
+ vnet_interface_per_thread_data_t *ptd,
+ vlib_buffer_t *b, int is_l2)
+{
+ vlib_buffer_t **bufs = 0;
+ u32 n_tx_bytes = 0;
+
+ u8 oflags = vnet_buffer (b)->oflags;
+ i16 l4_hdr_offset = vnet_buffer (b)->l4_hdr_offset;
+ u16 gso_size = vnet_buffer2 (b)->gso_size;
+ u16 l4_hdr_sz = vnet_buffer2 (b)->gso_l4_hdr_sz;
+
+ u8 tcp_flags = 0, tcp_flags_no_fin_psh = 0;
+ u32 default_bflags =
+ b->flags & ~(VNET_BUFFER_F_GSO | VLIB_BUFFER_NEXT_PRESENT);
+ u16 hdr_sz = (l4_hdr_offset - b->current_data) + l4_hdr_sz;
+ u32 next_tcp_seq = 0, tcp_seq = 0;
+ u32 data_size = vlib_buffer_length_in_chain (vm, b) - hdr_sz;
+ u16 size =
+ clib_min (gso_size, vlib_buffer_get_default_data_size (vm) - hdr_sz);
+ u16 n_alloc = 0, n_bufs = ((data_size + size - 1) / size);
+ clib_ip_csum_t c = { .sum = 0, .odd = 0 };
+ u8 *src_ptr, *dst_ptr;
+ u16 src_left, dst_left, bytes_to_copy;
+ u32 i = 0;
+
+ vec_validate (ptd->split_buffers, n_bufs - 1);
+ n_alloc = vlib_buffer_alloc (vm, ptd->split_buffers, n_bufs);
+ if (n_alloc < n_bufs)
+ {
+ vlib_buffer_free (vm, ptd->split_buffers, n_alloc);
+ return 0;
+ }
+
+ vec_validate (bufs, n_bufs - 1);
+ vlib_get_buffers (vm, ptd->split_buffers, bufs, n_bufs);
+
+ tcp_header_t *tcp = (tcp_header_t *) (b->data + l4_hdr_offset);
+
+ tcp_seq = next_tcp_seq = clib_net_to_host_u32 (tcp->seq_number);
+ /* store original flags for last packet and reset FIN and PSH */
+ tcp_flags = tcp->flags;
+ tcp_flags_no_fin_psh = tcp->flags & ~(TCP_FLAG_FIN | TCP_FLAG_PSH);
+ tcp->checksum = 0;
+
+ gso_init_bufs_from_template_base (bufs, b, default_bflags, n_bufs, hdr_sz);
+
+ src_ptr = vlib_buffer_get_current (b) + hdr_sz;
+ src_left = b->current_length - hdr_sz;
+ dst_ptr = vlib_buffer_get_current (bufs[i]) + hdr_sz;
+ dst_left = size;
+
+ while (data_size)
+ {
+ bytes_to_copy = clib_min (src_left, dst_left);
+ clib_ip_csum_and_copy_chunk (&c, src_ptr, dst_ptr, bytes_to_copy);
+
+ src_left -= bytes_to_copy;
+ src_ptr += bytes_to_copy;
+ data_size -= bytes_to_copy;
+ dst_left -= bytes_to_copy;
+ dst_ptr += bytes_to_copy;
+ next_tcp_seq += bytes_to_copy;
+ bufs[i]->current_length += bytes_to_copy;
+
+ if (0 == src_left)
+ {
+ /* init src to the next buffer in chain */
+ if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ src_left = b->current_length;
+ src_ptr = vlib_buffer_get_current (b);
+ }
+ else
+ {
+ ASSERT (data_size == 0);
+ break;
+ }
+ }
+ if (0 == dst_left && data_size)
+ {
+ vlib_prefetch_buffer_header (bufs[i + 1], LOAD);
+
+ n_tx_bytes += bufs[i]->current_length;
+ gso_fixup_segmented_buf (vm, bufs[i], tcp_seq, is_l2, oflags, hdr_sz,
+ l4_hdr_sz, &c, tcp_flags_no_fin_psh, 1,
+ bufs[i + 1]);
+ i++;
+ dst_left = size;
+ dst_ptr = vlib_buffer_get_current (bufs[i]) + hdr_sz;
+ tcp_seq = next_tcp_seq;
+ // reset clib_ip_csum_t
+ c.odd = 0;
+ c.sum = 0;
+ }
+ }
+
+ ASSERT ((i + 1) == n_alloc);
+ n_tx_bytes += bufs[i]->current_length;
+ gso_fixup_segmented_buf (vm, bufs[i], tcp_seq, is_l2, oflags, hdr_sz,
+ l4_hdr_sz, &c, tcp_flags, 0, NULL);
+
+ vec_free (bufs);
+ return n_tx_bytes;
+}
#endif /* included_gso_h */
diff --git a/src/vnet/gso/gso.rst b/src/vnet/gso/gso.rst
new file mode 100644
index 00000000000..78788f82216
--- /dev/null
+++ b/src/vnet/gso/gso.rst
@@ -0,0 +1,154 @@
+.. _gso_doc:
+
+Generic Segmentation Offload
+============================
+
+Overview
+________
+
+Modern physical NICs provide offload capabilities to software based network
+stacks to transfer some type of the packet processing from CPU to physical
+NICs. TCP Segmentation Offload (TSO) is one among many which is provided by
+modern physical NICs. Software based network stack can offload big (up to 64KB)
+TCP packets to NIC and NIC will segment them into Maximum Segment Size packets.
+Hence network stack save CPU cycles by processing few big packets instead of
+processing many small packets.
+
+GSO is software based analogous to TSO which is used by virtual interfaces
+i.e. tap, virtio, af_packet, vhost-user etc. Typically, virtual interfaces
+provide capability to offload big packets (64KB size). But in reality, they
+just pass the packet as it is to the other end without segmenting it. Hence, it
+is necessary to validate the support of GSO offloading in whole setup otherwise
+packet will be dropped when it will be processed by virtual entity which does
+not support GSO.
+
+The GSO Infrastructure
+_______________________
+
+Software based network stacks implements GSO packet segmentation in software
+where egress interface (virtual or physical) does not support GSO or TSO
+offload. VPP implements GSO stack to provide support for software based packet
+chunking of GSO packets when egress interface does not support GSO or TSO
+offload.
+
+It is implemented as a feature node on interface-output feature arc. It
+implements support for basic GSO, GSO with VXLAN tunnel and GSO with IPIP
+tunnel. GSO with Geneve and GSO with NVGRE are not supported today. But one can
+enable GSO feature node on tunnel interfaces i.e. IPSEC etc to segment GSO
+packets before they will be tunneled.
+
+Virtual interfaces does not support GSO with tunnels. So, special care is
+needed when user configures tunnel(s) along with GSO in the setup. In such case,
+either enable GSO feature node on tunnel interface (mean chunk the GSO packets
+before they will be encapsulated in tunnel) or disable the GSO offload on the
+egress interface (only work for VXLAN tunnel and IPIP tunnel), if it is enabled,
+should work fine.
+
+Similarly, many physical interfaces does not support GSO with tunnels too. User
+can do the same configuration as it is mentioned previously for virtual
+interfaces.
+
+Data structures
+^^^^^^^^^^^^^^^
+
+VPP ``vlib_buffer_t`` uses ``VNET_BUFFER_F_GSO`` flags to mark the buffer carrying GSO
+packet and also contain metadata fields with respect to GSO:
+
+.. code:: c
+
+ i16 l2_hdr_offset;
+ i16 l3_hdr_offset;
+ i16 l4_hdr_offset;
+
+ u16 gso_size;
+ u16 gso_l4_hdr_sz;
+ i16 outer_l3_hdr_offset;
+ i16 outer_l4_hdr_offset;
+
+Packet header offsets are computed from the reference of ``vlib_buffer_t`` data
+pointer.
+
+``l2_hdr_offset``, ``l3_hdr_offset`` and ``l4_hdr_offset`` are set on input of checksum
+offload or GSO enabled interfaces or features i.e. host stack. Appropriate
+offload flags are also set to ``vnet_buffer_oflags_t`` to reflect the actual packet
+offloads which will be used later at egress interface tx node or
+interface-output node or GSO node to process the packet appropriately. These
+fields are present in 1st cache line and does not incur extra cycles as most of
+the VPP features fetch the ``vlib_buffer_t`` 1st cache line to access ``current_data``
+or ``current_length`` fields of the packet.
+
+Please note that ``gso_size``, ``gso_l4_hdr_sz``, ``outer_l3_hdr_offset`` and
+``outer_l4_hdr_offset`` are in second cache line of ``vlib_buffer_t``. Accessing them in
+data plane will incur some extra cycles but cost of these cycles will be
+amortized over (up to 64KB) packet.
+
+The ``gso_size`` and ``gso_l4_hdr_sz`` are set on input of GSO enabled interfaces (tap,
+virtio, af_packet etc) or features (vpp host stack), when we receive a GSO
+packet (a chain of buffers with the first one having ``VNET_BUFFER_F_GSO`` bit set),
+and needs to persist all the way to the interface-output, in case the egress
+interface is not GSO-enabled - then we need to perform the segmentation, and use
+these values to chunk the payload appropriately.
+
+``outer_l3_hdr_offset`` and ``outer_l4_hdr_offset`` are used in case of tunneled packet
+(i.e. VXLAN or IPIP). ``outer_l3_hdr_offset`` will point to outer l3 header of the
+tunnel headers and ``outer_l4_hdr_offset`` will point to outer l4 header of the
+tunnel headers, if any.
+
+Following are the helper functions used to set and clear the offload flags from
+``vlib_buffer_t`` metadata:
+
+.. code:: c
+
+ static_always_inline void
+ vnet_buffer_offload_flags_set (vlib_buffer_t *b, vnet_buffer_oflags_t oflags)
+ {
+ if (b->flags & VNET_BUFFER_F_OFFLOAD)
+ {
+ /* add a flag to existing offload */
+ vnet_buffer (b)->oflags |= oflags;
+ }
+ else
+ {
+ /* no offload yet: reset offload flags to new value */
+ vnet_buffer (b)->oflags = oflags;
+ b->flags |= VNET_BUFFER_F_OFFLOAD;
+ }
+ }
+
+ static_always_inline void
+ vnet_buffer_offload_flags_clear (vlib_buffer_t *b, vnet_buffer_oflags_t oflags)
+ {
+ vnet_buffer (b)->oflags &= ~oflags;
+ if (0 == vnet_buffer (b)->oflags)
+ b->flags &= ~VNET_BUFFER_F_OFFLOAD;
+ }
+
+
+ENABLE GSO FEATURE NODE
+-----------------------
+
+GSO feature node is not enabled by default when egress interface does not
+support GSO. User has to enable it explicitly using api or cli.
+
+GSO API
+^^^^^^^
+
+This API message is used to enable GSO feature node on an interface.
+
+.. code:: c
+
+ autoreply define feature_gso_enable_disable
+ {
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ bool enable_disable;
+ option vat_help = "<intfc> | sw_if_index <nn> [enable | disable]";
+ };
+
+GSO CLI
+^^^^^^^
+
+::
+
+ set interface feature gso <intfc> [enable | disable]
diff --git a/src/vnet/gso/hdr_offset_parser.h b/src/vnet/gso/hdr_offset_parser.h
index e846aaa6fd2..08037f57ea0 100644
--- a/src/vnet/gso/hdr_offset_parser.h
+++ b/src/vnet/gso/hdr_offset_parser.h
@@ -21,8 +21,10 @@
#include <vnet/ip/ip6_packet.h>
#include <vnet/udp/udp_local.h>
#include <vnet/udp/udp_packet.h>
+#include <vnet/tcp/tcp_packet.h>
#include <vnet/vnet.h>
-#include <vnet/vxlan/vxlan_packet.h>
+
+#define VXLAN_HEADER_SIZE 8
#define foreach_gho_flag \
_( 0, IP4) \
@@ -155,8 +157,6 @@ vnet_geneve_inner_header_parser_inline (vlib_buffer_t * b0,
/* not supported yet */
if ((gho->gho_flags & GHO_F_GENEVE_TUNNEL) == 0)
return;
-
- ASSERT (0);
}
static_always_inline void
@@ -166,8 +166,6 @@ vnet_gre_inner_header_parser_inline (vlib_buffer_t * b0,
/* not supported yet */
if ((gho->gho_flags & GHO_F_GRE_TUNNEL) == 0)
return;
-
- ASSERT (0);
}
static_always_inline void
@@ -440,7 +438,7 @@ vnet_generic_outer_header_parser_inline (vlib_buffer_t * b0,
if (UDP_DST_PORT_vxlan == clib_net_to_host_u16 (udp->dst_port))
{
gho->gho_flags |= GHO_F_VXLAN_TUNNEL;
- gho->hdr_sz += sizeof (vxlan_header_t);
+ gho->hdr_sz += VXLAN_HEADER_SIZE;
}
else if (UDP_DST_PORT_geneve == clib_net_to_host_u16 (udp->dst_port))
{
diff --git a/src/vnet/gso/node.c b/src/vnet/gso/node.c
index 5e793a5cffd..c1d4459476e 100644
--- a/src/vnet/gso/node.c
+++ b/src/vnet/gso/node.c
@@ -80,119 +80,108 @@ format_gso_trace (u8 * s, va_list * args)
return s;
}
-static_always_inline u16
-tso_segment_ipip_tunnel_fixup (vlib_main_t * vm,
- vnet_interface_per_thread_data_t * ptd,
- vlib_buffer_t * sb0,
- generic_header_offset_t * gho)
+static_always_inline void
+tso_segment_ipip_tunnel_fixup (vlib_main_t *vm,
+ vnet_interface_per_thread_data_t *ptd,
+ vlib_buffer_t *sb0)
{
u16 n_tx_bufs = vec_len (ptd->split_buffers);
- u16 i = 0, n_tx_bytes = 0;
+ u16 i = 0;
while (i < n_tx_bufs)
{
vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[i]);
- vnet_get_outer_header (b0, gho);
- clib_memcpy_fast (vlib_buffer_get_current (b0),
- vlib_buffer_get_current (sb0), gho->outer_hdr_sz);
-
- ip4_header_t *ip4 =
- (ip4_header_t *) (vlib_buffer_get_current (b0) +
- gho->outer_l3_hdr_offset);
- ip6_header_t *ip6 =
- (ip6_header_t *) (vlib_buffer_get_current (b0) +
- gho->outer_l3_hdr_offset);
-
- if (gho->gho_flags & GHO_F_OUTER_IP4)
+ i16 outer_l3_hdr_offset = vnet_buffer2 (b0)->outer_l3_hdr_offset;
+ i16 l3_hdr_offset = vnet_buffer (b0)->l3_hdr_offset;
+
+ ip4_header_t *ip4 = (ip4_header_t *) (b0->data + outer_l3_hdr_offset);
+ ip6_header_t *ip6 = (ip6_header_t *) (b0->data + outer_l3_hdr_offset);
+
+ if (vnet_buffer (b0)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM)
{
- ip4->length =
- clib_host_to_net_u16 (b0->current_length -
- gho->outer_l3_hdr_offset);
+ ip4->length = clib_host_to_net_u16 (
+ b0->current_length - (outer_l3_hdr_offset - b0->current_data));
ip4->checksum = ip4_header_checksum (ip4);
+ vnet_buffer_offload_flags_clear (
+ b0, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
}
- else if (gho->gho_flags & GHO_F_OUTER_IP6)
+ else
{
- ip6->payload_length =
- clib_host_to_net_u16 (b0->current_length -
- gho->outer_l4_hdr_offset);
+ ip6->payload_length = clib_host_to_net_u16 (
+ b0->current_length - (l3_hdr_offset - b0->current_data));
+ vnet_buffer_offload_flags_clear (b0, VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
}
- n_tx_bytes += gho->outer_hdr_sz;
i++;
}
- return n_tx_bytes;
}
static_always_inline void
-tso_segment_vxlan_tunnel_headers_fixup (vlib_main_t * vm, vlib_buffer_t * b,
- generic_header_offset_t * gho)
+tso_segment_vxlan_tunnel_headers_fixup (vlib_main_t *vm, vlib_buffer_t *b)
{
- u8 proto = 0;
ip4_header_t *ip4 = 0;
ip6_header_t *ip6 = 0;
udp_header_t *udp = 0;
+ i16 outer_l3_hdr_offset = vnet_buffer2 (b)->outer_l3_hdr_offset;
+ i16 outer_l4_hdr_offset = vnet_buffer2 (b)->outer_l4_hdr_offset;
- ip4 =
- (ip4_header_t *) (vlib_buffer_get_current (b) + gho->outer_l3_hdr_offset);
- ip6 =
- (ip6_header_t *) (vlib_buffer_get_current (b) + gho->outer_l3_hdr_offset);
- udp =
- (udp_header_t *) (vlib_buffer_get_current (b) + gho->outer_l4_hdr_offset);
+ ip4 = (ip4_header_t *) (b->data + outer_l3_hdr_offset);
+ ip6 = (ip6_header_t *) (b->data + outer_l3_hdr_offset);
+ udp = (udp_header_t *) (b->data + outer_l4_hdr_offset);
- if (gho->gho_flags & GHO_F_OUTER_IP4)
+ if (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM)
{
- proto = ip4->protocol;
- ip4->length =
- clib_host_to_net_u16 (b->current_length - gho->outer_l3_hdr_offset);
+ ip4->length = clib_host_to_net_u16 (
+ b->current_length - (outer_l3_hdr_offset - b->current_data));
ip4->checksum = ip4_header_checksum (ip4);
+ if (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM)
+ {
+ udp->length = clib_host_to_net_u16 (
+ b->current_length - (outer_l4_hdr_offset - b->current_data));
+ // udp checksum is 0, in udp tunnel
+ udp->checksum = 0;
+ }
+ vnet_buffer_offload_flags_clear (
+ b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_VXLAN);
}
- else if (gho->gho_flags & GHO_F_OUTER_IP6)
- {
- proto = ip6->protocol;
- ip6->payload_length =
- clib_host_to_net_u16 (b->current_length - gho->outer_l4_hdr_offset);
- }
- if (proto == IP_PROTOCOL_UDP)
+ else
{
- int bogus;
- udp->length =
- clib_host_to_net_u16 (b->current_length - gho->outer_l4_hdr_offset);
- udp->checksum = 0;
- if (gho->gho_flags & GHO_F_OUTER_IP6)
+ ip6->payload_length = clib_host_to_net_u16 (
+ b->current_length - (outer_l4_hdr_offset - b->current_data));
+
+ if (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM)
{
+ int bogus;
+ udp->length = ip6->payload_length;
+ // udp checksum is 0, in udp tunnel
+ udp->checksum = 0;
udp->checksum =
ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus);
+ vnet_buffer_offload_flags_clear (
+ b, VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_VXLAN);
}
- else if (gho->gho_flags & GHO_F_OUTER_IP4)
- {
- udp->checksum = ip4_tcp_udp_compute_checksum (vm, b, ip4);
- }
- /* FIXME: it should be OUTER_UDP_CKSUM */
- vnet_buffer_offload_flags_clear (b, VNET_BUFFER_OFFLOAD_F_UDP_CKSUM);
}
}
-static_always_inline u16
-tso_segment_vxlan_tunnel_fixup (vlib_main_t * vm,
- vnet_interface_per_thread_data_t * ptd,
- vlib_buffer_t * sb0,
- generic_header_offset_t * gho)
+static_always_inline void
+tso_segment_vxlan_tunnel_fixup (vlib_main_t *vm,
+ vnet_interface_per_thread_data_t *ptd,
+ vlib_buffer_t *sb0)
{
u16 n_tx_bufs = vec_len (ptd->split_buffers);
- u16 i = 0, n_tx_bytes = 0;
+ u16 i = 0;
while (i < n_tx_bufs)
{
vlib_buffer_t *b0 = vlib_get_buffer (vm, ptd->split_buffers[i]);
- vnet_get_outer_header (b0, gho);
- clib_memcpy_fast (vlib_buffer_get_current (b0),
- vlib_buffer_get_current (sb0), gho->outer_hdr_sz);
- tso_segment_vxlan_tunnel_headers_fixup (vm, b0, gho);
- n_tx_bytes += gho->outer_hdr_sz;
+ tso_segment_vxlan_tunnel_headers_fixup (vm, b0);
i++;
}
- return n_tx_bytes;
}
static_always_inline u16
@@ -468,6 +457,15 @@ tso_segment_buffer (vlib_main_t * vm, vnet_interface_per_thread_data_t * ptd,
return n_tx_bytes;
}
+__clib_unused u32
+gso_segment_buffer (vlib_main_t *vm, vnet_interface_per_thread_data_t *ptd,
+ u32 bi, vlib_buffer_t *b, generic_header_offset_t *gho,
+ u32 n_bytes_b, u8 is_l2, u8 is_ip6)
+{
+
+ return tso_segment_buffer (vm, ptd, bi, b, gho, n_bytes_b, is_l2, is_ip6);
+}
+
static_always_inline void
drop_one_buffer_and_count (vlib_main_t * vm, vnet_main_t * vnm,
vlib_node_runtime_t * node, u32 * pbi0,
@@ -546,30 +544,28 @@ vnet_gso_node_inline (vlib_main_t * vm,
if (PREDICT_FALSE (hi->sw_if_index != swif0))
{
hi0 = vnet_get_sup_hw_interface (vnm, swif0);
- if ((hi0->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) ==
- 0 &&
+ if ((hi0->caps & VNET_HW_IF_CAP_TCP_GSO) == 0 &&
(b[0]->flags & VNET_BUFFER_F_GSO))
break;
}
if (PREDICT_FALSE (hi->sw_if_index != swif1))
{
hi1 = vnet_get_sup_hw_interface (vnm, swif1);
- if (!(hi1->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) &&
+ if (!(hi1->caps & VNET_HW_IF_CAP_TCP_GSO) &&
(b[1]->flags & VNET_BUFFER_F_GSO))
break;
}
if (PREDICT_FALSE (hi->sw_if_index != swif2))
{
hi2 = vnet_get_sup_hw_interface (vnm, swif2);
- if ((hi2->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) ==
- 0 &&
+ if ((hi2->caps & VNET_HW_IF_CAP_TCP_GSO) == 0 &&
(b[2]->flags & VNET_BUFFER_F_GSO))
break;
}
if (PREDICT_FALSE (hi->sw_if_index != swif3))
{
hi3 = vnet_get_sup_hw_interface (vnm, swif3);
- if (!(hi3->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) &&
+ if (!(hi3->caps & VNET_HW_IF_CAP_TCP_GSO) &&
(b[3]->flags & VNET_BUFFER_F_GSO))
break;
}
@@ -580,6 +576,7 @@ vnet_gso_node_inline (vlib_main_t * vm,
t0->flags = b[0]->flags & VNET_BUFFER_F_GSO;
t0->gso_size = vnet_buffer2 (b[0])->gso_size;
t0->gso_l4_hdr_sz = vnet_buffer2 (b[0])->gso_l4_hdr_sz;
+ clib_memset (&t0->gho, 0, sizeof (t0->gho));
vnet_generic_header_offset_parser (b[0], &t0->gho, is_l2,
is_ip4, is_ip6);
}
@@ -589,6 +586,7 @@ vnet_gso_node_inline (vlib_main_t * vm,
t1->flags = b[1]->flags & VNET_BUFFER_F_GSO;
t1->gso_size = vnet_buffer2 (b[1])->gso_size;
t1->gso_l4_hdr_sz = vnet_buffer2 (b[1])->gso_l4_hdr_sz;
+ clib_memset (&t1->gho, 0, sizeof (t1->gho));
vnet_generic_header_offset_parser (b[1], &t1->gho, is_l2,
is_ip4, is_ip6);
}
@@ -598,6 +596,7 @@ vnet_gso_node_inline (vlib_main_t * vm,
t2->flags = b[2]->flags & VNET_BUFFER_F_GSO;
t2->gso_size = vnet_buffer2 (b[2])->gso_size;
t2->gso_l4_hdr_sz = vnet_buffer2 (b[2])->gso_l4_hdr_sz;
+ clib_memset (&t2->gho, 0, sizeof (t2->gho));
vnet_generic_header_offset_parser (b[2], &t2->gho, is_l2,
is_ip4, is_ip6);
}
@@ -607,6 +606,7 @@ vnet_gso_node_inline (vlib_main_t * vm,
t3->flags = b[3]->flags & VNET_BUFFER_F_GSO;
t3->gso_size = vnet_buffer2 (b[3])->gso_size;
t3->gso_l4_hdr_sz = vnet_buffer2 (b[3])->gso_l4_hdr_sz;
+ clib_memset (&t3->gho, 0, sizeof (t3->gho));
vnet_generic_header_offset_parser (b[3], &t3->gho, is_l2,
is_ip4, is_ip6);
}
@@ -640,7 +640,7 @@ vnet_gso_node_inline (vlib_main_t * vm,
if (PREDICT_FALSE (hi->sw_if_index != swif0))
{
hi0 = vnet_get_sup_hw_interface (vnm, swif0);
- if ((hi0->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO) == 0 &&
+ if ((hi0->caps & VNET_HW_IF_CAP_TCP_GSO) == 0 &&
(b[0]->flags & VNET_BUFFER_F_GSO))
do_segmentation0 = 1;
}
@@ -660,6 +660,7 @@ vnet_gso_node_inline (vlib_main_t * vm,
t0->flags = b[0]->flags & VNET_BUFFER_F_GSO;
t0->gso_size = vnet_buffer2 (b[0])->gso_size;
t0->gso_l4_hdr_sz = vnet_buffer2 (b[0])->gso_l4_hdr_sz;
+ clib_memset (&t0->gho, 0, sizeof (t0->gho));
vnet_generic_header_offset_parser (b[0], &t0->gho, is_l2,
is_ip4, is_ip6);
}
@@ -676,37 +677,10 @@ vnet_gso_node_inline (vlib_main_t * vm,
to_next -= 1;
n_left_to_next += 1;
/* undo the counting. */
- generic_header_offset_t gho = { 0 };
- u32 n_bytes_b0 = vlib_buffer_length_in_chain (vm, b[0]);
u32 n_tx_bytes = 0;
- u32 inner_is_ip6 = is_ip6;
-
- vnet_generic_header_offset_parser (b[0], &gho, is_l2,
- is_ip4, is_ip6);
-
- if (PREDICT_FALSE (gho.gho_flags & GHO_F_TUNNEL))
- {
- if (PREDICT_FALSE
- (gho.gho_flags & (GHO_F_GRE_TUNNEL |
- GHO_F_GENEVE_TUNNEL)))
- {
- /* not supported yet */
- drop_one_buffer_and_count (vm, vnm, node, from - 1,
- hi->sw_if_index,
- GSO_ERROR_UNHANDLED_TYPE);
- b += 1;
- continue;
- }
-
- vnet_get_inner_header (b[0], &gho);
-
- n_bytes_b0 -= gho.outer_hdr_sz;
- inner_is_ip6 = (gho.gho_flags & GHO_F_IP6) != 0;
- }
n_tx_bytes =
- tso_segment_buffer (vm, ptd, bi0, b[0], &gho, n_bytes_b0,
- is_l2, inner_is_ip6);
+ gso_segment_buffer_inline (vm, ptd, b[0], is_l2);
if (PREDICT_FALSE (n_tx_bytes == 0))
{
@@ -717,21 +691,15 @@ vnet_gso_node_inline (vlib_main_t * vm,
continue;
}
-
- if (PREDICT_FALSE (gho.gho_flags & GHO_F_VXLAN_TUNNEL))
+ if (PREDICT_FALSE (vnet_buffer (b[0])->oflags &
+ VNET_BUFFER_OFFLOAD_F_TNL_VXLAN))
{
- vnet_get_outer_header (b[0], &gho);
- n_tx_bytes +=
- tso_segment_vxlan_tunnel_fixup (vm, ptd, b[0], &gho);
+ tso_segment_vxlan_tunnel_fixup (vm, ptd, b[0]);
}
- else
- if (PREDICT_FALSE
- (gho.gho_flags & (GHO_F_IPIP_TUNNEL |
- GHO_F_IPIP6_TUNNEL)))
+ else if (PREDICT_FALSE (vnet_buffer (b[0])->oflags &
+ VNET_BUFFER_OFFLOAD_F_TNL_IPIP))
{
- vnet_get_outer_header (b[0], &gho);
- n_tx_bytes +=
- tso_segment_ipip_tunnel_fixup (vm, ptd, b[0], &gho);
+ tso_segment_ipip_tunnel_fixup (vm, ptd, b[0]);
}
u16 n_tx_bufs = vec_len (ptd->split_buffers);
@@ -765,7 +733,7 @@ vnet_gso_node_inline (vlib_main_t * vm,
to_next, n_left_to_next);
}
/* The buffers were enqueued. Reset the length */
- _vec_len (ptd->split_buffers) = 0;
+ vec_set_len (ptd->split_buffers, 0);
/* Free the now segmented buffer */
vlib_buffer_free_one (vm, bi0);
b += 1;
@@ -799,7 +767,7 @@ vnet_gso_inline (vlib_main_t * vm,
hi = vnet_get_sup_hw_interface (vnm,
vnet_buffer (b)->sw_if_index[VLIB_TX]);
- if (hi->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO)
+ if (hi->caps & (VNET_HW_IF_CAP_TCP_GSO | VNET_HW_IF_CAP_VXLAN_TNL_GSO))
return vnet_gso_node_inline (vm, node, frame, vnm, hi,
is_l2, is_ip4, is_ip6,
/* do_segmentation */ 0);
@@ -839,7 +807,6 @@ VLIB_NODE_FN (gso_ip6_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
1 /* ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (gso_l2_ip4_node) = {
.vector_size = sizeof (u32),
diff --git a/src/vnet/handoff.c b/src/vnet/handoff.c
index f64d5ad3a69..e9c3bb6de67 100644
--- a/src/vnet/handoff.c
+++ b/src/vnet/handoff.c
@@ -15,13 +15,13 @@
*/
#include <vnet/vnet.h>
-#include <vppinfra/xxhash.h>
+#include <vnet/hash/hash.h>
#include <vlib/threads.h>
-#include <vnet/handoff.h>
#include <vnet/feature/feature.h>
typedef struct
{
+ vnet_hash_fn_t hash_fn;
uword *workers_bitmap;
u32 *workers;
} per_inteface_handoff_data_t;
@@ -36,14 +36,14 @@ typedef struct
/* Worker handoff index */
u32 frame_queue_index;
-
- u64 (*hash_fn) (ethernet_header_t *);
} handoff_main_t;
extern handoff_main_t handoff_main;
#ifndef CLIB_MARCH_VARIANT
+
handoff_main_t handoff_main;
+
#endif /* CLIB_MARCH_VARIANT */
typedef struct
@@ -78,12 +78,35 @@ format_worker_handoff_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
worker_handoff_trace_t *t = va_arg (*args, worker_handoff_trace_t *);
- s =
- format (s, "worker-handoff: sw_if_index %d, next_worker %d, buffer 0x%x",
- t->sw_if_index, t->next_worker_index, t->buffer_index);
+ s = format (s, "worker-handoff: sw_if_index %d, next_worker %d, buffer 0x%x",
+ t->sw_if_index, t->next_worker_index, t->buffer_index);
return s;
}
+static void
+worker_handoff_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t **bufs, u16 *threads, u32 n_vectors)
+{
+ worker_handoff_trace_t *t;
+ vlib_buffer_t **b;
+ u16 *ti;
+
+ b = bufs;
+ ti = threads;
+
+ while (n_vectors)
+ {
+ t = vlib_add_trace (vm, node, b[0], sizeof (*t));
+ t->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
+ t->next_worker_index = ti[0];
+ t->buffer_index = vlib_get_buffer_index (vm, b[0]);
+
+ b += 1;
+ ti += 1;
+ n_vectors -= 1;
+ }
+}
+
VLIB_NODE_FN (worker_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
@@ -102,26 +125,16 @@ VLIB_NODE_FN (worker_handoff_node) (vlib_main_t * vm,
while (n_left_from > 0)
{
- u32 sw_if_index0;
- u32 hash;
- u64 hash_key;
per_inteface_handoff_data_t *ihd0;
- u32 index0;
-
+ u32 sw_if_index0, hash, index0;
+ void *data;
sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
- ASSERT (hm->if_data);
ihd0 = vec_elt_at_index (hm->if_data, sw_if_index0);
- /*
- * Force unknown traffic onto worker 0,
- * and into ethernet-input. $$$$ add more hashes.
- */
-
/* Compute ingress LB hash */
- hash_key = hm->hash_fn ((ethernet_header_t *)
- vlib_buffer_get_current (b[0]));
- hash = (u32) clib_xxhash (hash_key);
+ data = vlib_buffer_get_current (b[0]);
+ ihd0->hash_fn (&data, &hash, 1);
/* if input node did not specify next index, then packet
should go to ethernet-input */
@@ -133,22 +146,16 @@ VLIB_NODE_FN (worker_handoff_node) (vlib_main_t * vm,
ti[0] = hm->first_worker_index + ihd0->workers[index0];
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b[0]->flags & VLIB_BUFFER_IS_TRACED)))
- {
- worker_handoff_trace_t *t =
- vlib_add_trace (vm, node, b[0], sizeof (*t));
- t->sw_if_index = sw_if_index0;
- t->next_worker_index = ti[0];
- t->buffer_index = vlib_get_buffer_index (vm, b[0]);
- }
-
/* next */
n_left_from -= 1;
ti += 1;
b += 1;
}
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ worker_handoff_trace_frame (vm, node, bufs, thread_indices,
+ frame->n_vectors);
+
n_enq = vlib_buffer_enqueue_to_thread (vm, node, hm->frame_queue_index, from,
thread_indices, frame->n_vectors, 1);
@@ -159,7 +166,6 @@ VLIB_NODE_FN (worker_handoff_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (worker_handoff_node) = {
.name = "worker-handoff",
.vector_size = sizeof (u32),
@@ -174,12 +180,12 @@ VLIB_REGISTER_NODE (worker_handoff_node) = {
},
};
-/* *INDENT-ON* */
-
#ifndef CLIB_MARCH_VARIANT
+
int
-interface_handoff_enable_disable (vlib_main_t * vm, u32 sw_if_index,
- uword * bitmap, int enable_disable)
+interface_handoff_enable_disable (vlib_main_t *vm, u32 sw_if_index,
+ uword *bitmap, u8 is_sym, int is_l4,
+ int enable_disable)
{
handoff_main_t *hm = &handoff_main;
vnet_sw_interface_t *sw;
@@ -212,16 +218,34 @@ interface_handoff_enable_disable (vlib_main_t * vm, u32 sw_if_index,
if (enable_disable)
{
d->workers_bitmap = bitmap;
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, bitmap)
- {
+ {
vec_add1(d->workers, i);
}
- /* *INDENT-ON* */
+
+ if (is_sym)
+ {
+ if (is_l4)
+ return VNET_API_ERROR_UNIMPLEMENTED;
+
+ d->hash_fn = vnet_hash_function_from_name (
+ "handoff-eth-sym", VNET_HASH_FN_TYPE_ETHERNET);
+ }
+ else
+ {
+ if (is_l4)
+ d->hash_fn =
+ vnet_hash_default_function (VNET_HASH_FN_TYPE_ETHERNET);
+ else
+ d->hash_fn = vnet_hash_function_from_name (
+ "handoff-eth", VNET_HASH_FN_TYPE_ETHERNET);
+ }
}
vnet_feature_enable_disable ("device-input", "worker-handoff",
sw_if_index, enable_disable, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth", "worker-handoff", sw_if_index,
+ enable_disable, 0, 0);
return rv;
}
@@ -230,12 +254,9 @@ set_interface_handoff_command_fn (vlib_main_t * vm,
unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- handoff_main_t *hm = &handoff_main;
- u32 sw_if_index = ~0;
+ u32 sw_if_index = ~0, is_sym = 0, is_l4 = 0;
int enable_disable = 1;
uword *bitmap = 0;
- u32 sym = ~0;
-
int rv = 0;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
@@ -248,9 +269,11 @@ set_interface_handoff_command_fn (vlib_main_t * vm,
vnet_get_main (), &sw_if_index))
;
else if (unformat (input, "symmetrical"))
- sym = 1;
+ is_sym = 1;
else if (unformat (input, "asymmetrical"))
- sym = 0;
+ is_sym = 0;
+ else if (unformat (input, "l4"))
+ is_l4 = 1;
else
break;
}
@@ -261,9 +284,8 @@ set_interface_handoff_command_fn (vlib_main_t * vm,
if (bitmap == 0)
return clib_error_return (0, "Please specify list of workers...");
- rv =
- interface_handoff_enable_disable (vm, sw_if_index, bitmap,
- enable_disable);
+ rv = interface_handoff_enable_disable (vm, sw_if_index, bitmap, is_sym,
+ is_l4, enable_disable);
switch (rv)
{
@@ -287,22 +309,15 @@ set_interface_handoff_command_fn (vlib_main_t * vm,
return clib_error_return (0, "unknown return value %d", rv);
}
- if (sym == 1)
- hm->hash_fn = eth_get_sym_key;
- else if (sym == 0)
- hm->hash_fn = eth_get_key;
-
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_handoff_command, static) = {
.path = "set interface handoff",
- .short_help =
- "set interface handoff <interface-name> workers <workers-list> [symmetrical|asymmetrical]",
+ .short_help = "set interface handoff <interface-name> workers <workers-list>"
+ " [symmetrical|asymmetrical]",
.function = set_interface_handoff_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
handoff_init (vlib_main_t * vm)
@@ -328,7 +343,6 @@ handoff_init (vlib_main_t * vm)
}
}
- hm->hash_fn = eth_get_key;
hm->frame_queue_index = ~0;
return 0;
diff --git a/src/vnet/hash/FEATURE.yaml b/src/vnet/hash/FEATURE.yaml
new file mode 100644
index 00000000000..d5b9a069c27
--- /dev/null
+++ b/src/vnet/hash/FEATURE.yaml
@@ -0,0 +1,9 @@
+---
+name: Hash infrastructure
+maintainer: Mohsin Kazmi <mohsin.kazmi14@gmail.com>, Damjan Marion <damarion@cisco.com>
+features:
+ - Ethernet
+ - IP
+description: "Hash infrastructure"
+state: development
+properties: [CLI]
diff --git a/src/vnet/hash/cli.c b/src/vnet/hash/cli.c
new file mode 100644
index 00000000000..47d33b9872e
--- /dev/null
+++ b/src/vnet/hash/cli.c
@@ -0,0 +1,33 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/hash/hash.h>
+
+static clib_error_t *
+show_hash (vlib_main_t *vm, unformat_input_t *input, vlib_cli_command_t *cmd)
+{
+ clib_error_t *error = 0;
+ vnet_hash_main_t *hm = &vnet_hash_main;
+ vnet_hash_function_registration_t *hash;
+
+ hash = hm->hash_registrations;
+
+ vlib_cli_output (vm, "%-25s%-8s%s", "Name", "Prio", "Description");
+ while (hash)
+ {
+ vlib_cli_output (vm, "%-25s%-8u%s", hash->name, hash->priority,
+ hash->description);
+ hash = hash->next;
+ }
+
+ return (error);
+}
+
+VLIB_CLI_COMMAND (cmd_show_hash, static) = {
+ .path = "show hash",
+ .short_help = "show hash",
+ .function = show_hash,
+};
diff --git a/src/vnet/hash/crc32_5tuple.c b/src/vnet/hash/crc32_5tuple.c
new file mode 100644
index 00000000000..2cdb19440c6
--- /dev/null
+++ b/src/vnet/hash/crc32_5tuple.c
@@ -0,0 +1,168 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/hash/hash.h>
+#include <vppinfra/crc32.h>
+
+#ifdef clib_crc32c_uses_intrinsics
+
+static const u8 l4_mask_bits[256] = {
+ [IP_PROTOCOL_ICMP] = 16, [IP_PROTOCOL_IGMP] = 8,
+ [IP_PROTOCOL_TCP] = 32, [IP_PROTOCOL_UDP] = 32,
+ [IP_PROTOCOL_IPSEC_ESP] = 32, [IP_PROTOCOL_IPSEC_AH] = 32,
+ [IP_PROTOCOL_ICMP6] = 16,
+};
+
+static_always_inline u32
+compute_ip6_key (ip6_header_t *ip)
+{
+ u32 hash = 0, l4hdr;
+ u8 pr;
+ /* dst + src ip as u64 */
+ hash = clib_crc32c_u64 (hash, *(u64u *) ((u8 *) ip + 8));
+ hash = clib_crc32c_u64 (hash, *(u64u *) ((u8 *) ip + 16));
+ hash = clib_crc32c_u64 (hash, *(u64u *) ((u8 *) ip + 24));
+ hash = clib_crc32c_u64 (hash, *(u64u *) ((u8 *) ip + 32));
+ pr = ip->protocol;
+ l4hdr = *(u32 *) ip6_next_header (ip) & pow2_mask (l4_mask_bits[pr]);
+ /* protocol + l4 hdr */
+ return clib_crc32c_u64 (hash, ((u64) pr << 32) | l4hdr);
+}
+
+static_always_inline u32
+compute_ip4_key (ip4_header_t *ip)
+{
+ u32 hash = 0, l4hdr;
+ u8 pr;
+ /* dst + src ip as u64 */
+ hash = clib_crc32c_u64 (0, *(u64 *) ((u8 *) ip + 12));
+ pr = ip->protocol;
+ l4hdr = *(u32 *) ip4_next_header (ip) & pow2_mask (l4_mask_bits[pr]);
+ /* protocol + l4 hdr */
+ return clib_crc32c_u64 (hash, ((u64) pr << 32) | l4hdr);
+}
+static_always_inline u32
+compute_ip_key (void *p)
+{
+ if ((((u8 *) p)[0] & 0xf0) == 0x40)
+ return compute_ip4_key (p);
+ else if ((((u8 *) p)[0] & 0xf0) == 0x60)
+ return compute_ip6_key (p);
+ return 0;
+}
+
+void
+vnet_crc32c_5tuple_ip_func (void **p, u32 *hash, u32 n_packets)
+{
+ u32 n_left_from = n_packets;
+
+ while (n_left_from >= 8)
+ {
+ clib_prefetch_load (p[4]);
+ clib_prefetch_load (p[5]);
+ clib_prefetch_load (p[6]);
+ clib_prefetch_load (p[7]);
+
+ hash[0] = compute_ip_key (p[0]);
+ hash[1] = compute_ip_key (p[1]);
+ hash[2] = compute_ip_key (p[2]);
+ hash[3] = compute_ip_key (p[3]);
+
+ hash += 4;
+ n_left_from -= 4;
+ p += 4;
+ }
+
+ while (n_left_from > 0)
+ {
+ hash[0] = compute_ip_key (p[0]);
+
+ hash += 1;
+ n_left_from -= 1;
+ p += 1;
+ }
+}
+
+static_always_inline u32
+compute_ethernet_key (void *p)
+{
+ u16 ethertype = 0, l2hdr_sz = 0;
+
+ ethernet_header_t *eh = (ethernet_header_t *) p;
+ ethertype = clib_net_to_host_u16 (eh->type);
+ l2hdr_sz = sizeof (ethernet_header_t);
+
+ if (ethernet_frame_is_tagged (ethertype))
+ {
+ ethernet_vlan_header_t *vlan = (ethernet_vlan_header_t *) (eh + 1);
+
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ l2hdr_sz += sizeof (*vlan);
+ while (ethernet_frame_is_tagged (ethertype))
+ {
+ vlan++;
+ ethertype = clib_net_to_host_u16 (vlan->type);
+ l2hdr_sz += sizeof (*vlan);
+ }
+ }
+
+ if (ethertype == ETHERNET_TYPE_IP4)
+ {
+ ip4_header_t *ip4 = (ip4_header_t *) (p + l2hdr_sz);
+ return compute_ip4_key (ip4);
+ }
+ else if (ethertype == ETHERNET_TYPE_IP6)
+ {
+ ip6_header_t *ip6 = (ip6_header_t *) (p + l2hdr_sz);
+ return compute_ip6_key (ip6);
+ }
+ return 0;
+}
+
+void
+vnet_crc32c_5tuple_ethernet_func (void **p, u32 *hash, u32 n_packets)
+{
+ u32 n_left_from = n_packets;
+
+ while (n_left_from >= 8)
+ {
+ clib_prefetch_load (p[4]);
+ clib_prefetch_load (p[5]);
+ clib_prefetch_load (p[6]);
+ clib_prefetch_load (p[7]);
+
+ hash[0] = compute_ethernet_key (p[0]);
+ hash[1] = compute_ethernet_key (p[1]);
+ hash[2] = compute_ethernet_key (p[2]);
+ hash[3] = compute_ethernet_key (p[3]);
+
+ hash += 4;
+ n_left_from -= 4;
+ p += 4;
+ }
+
+ while (n_left_from > 0)
+ {
+ hash[0] = compute_ethernet_key (p[0]);
+
+ hash += 1;
+ n_left_from -= 1;
+ p += 1;
+ }
+}
+
+VNET_REGISTER_HASH_FUNCTION (crc32c_5tuple, static) = {
+ .name = "crc32c-5tuple",
+ .description = "IPv4/IPv6 header and TCP/UDP ports",
+ .priority = 50,
+ .function[VNET_HASH_FN_TYPE_ETHERNET] = vnet_crc32c_5tuple_ethernet_func,
+ .function[VNET_HASH_FN_TYPE_IP] = vnet_crc32c_5tuple_ip_func,
+};
+
+#endif
diff --git a/src/vnet/handoff.h b/src/vnet/hash/handoff_eth.c
index f50b86d5c6d..dc8db2ac413 100644
--- a/src/vnet/handoff.h
+++ b/src/vnet/hash/handoff_eth.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2021 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -13,14 +13,24 @@
* limitations under the License.
*/
-#ifndef included_vnet_handoff_h
-#define included_vnet_handoff_h
-
#include <vlib/vlib.h>
#include <vnet/ethernet/ethernet.h>
+#include <vnet/hash/hash.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
#include <vnet/mpls/packet.h>
+#include <vppinfra/crc32.h>
+#include <vppinfra/xxhash.h>
+
+always_inline u32
+ho_hash (u64 key)
+{
+#ifdef clib_crc32c_uses_intrinsics
+ return clib_crc32c ((u8 *) &key, sizeof (key));
+#else
+ return clib_xxhash (key);
+#endif
+}
static inline u64
ipv4_get_key (ip4_header_t * ip)
@@ -235,7 +245,103 @@ eth_get_key (ethernet_header_t * h0)
return hash_key;
}
-#endif /* included_vnet_handoff_h */
+void
+handoff_eth_func (void **p, u32 *hash, u32 n_packets)
+{
+ u32 n_left_from = n_packets;
+
+ while (n_left_from >= 8)
+ {
+ u64 key[4] = {};
+
+ clib_prefetch_load (p[4]);
+ clib_prefetch_load (p[5]);
+ clib_prefetch_load (p[6]);
+ clib_prefetch_load (p[7]);
+
+ key[0] = eth_get_key ((ethernet_header_t *) p[0]);
+ key[1] = eth_get_key ((ethernet_header_t *) p[1]);
+ key[2] = eth_get_key ((ethernet_header_t *) p[2]);
+ key[3] = eth_get_key ((ethernet_header_t *) p[3]);
+
+ hash[0] = ho_hash (key[0]);
+ hash[1] = ho_hash (key[1]);
+ hash[2] = ho_hash (key[2]);
+ hash[3] = ho_hash (key[3]);
+
+ hash += 4;
+ n_left_from -= 4;
+ p += 4;
+ }
+
+ while (n_left_from > 0)
+ {
+ u64 key;
+
+ key = eth_get_key ((ethernet_header_t *) p[0]);
+ hash[0] = ho_hash (key);
+
+ hash += 1;
+ n_left_from -= 1;
+ p += 1;
+ }
+}
+
+VNET_REGISTER_HASH_FUNCTION (handoff_eth, static) = {
+ .name = "handoff-eth",
+ .description = "Ethernet/IPv4/IPv6/MPLS headers",
+ .priority = 2,
+ .function[VNET_HASH_FN_TYPE_ETHERNET] = handoff_eth_func,
+};
+
+void
+handoff_eth_sym_func (void **p, u32 *hash, u32 n_packets)
+{
+ u32 n_left_from = n_packets;
+
+ while (n_left_from >= 8)
+ {
+ u64 key[4] = {};
+
+ clib_prefetch_load (p[4]);
+ clib_prefetch_load (p[5]);
+ clib_prefetch_load (p[6]);
+ clib_prefetch_load (p[7]);
+
+ key[0] = eth_get_sym_key ((ethernet_header_t *) p[0]);
+ key[1] = eth_get_sym_key ((ethernet_header_t *) p[1]);
+ key[2] = eth_get_sym_key ((ethernet_header_t *) p[2]);
+ key[3] = eth_get_sym_key ((ethernet_header_t *) p[3]);
+
+ hash[0] = ho_hash (key[0]);
+ hash[1] = ho_hash (key[1]);
+ hash[2] = ho_hash (key[2]);
+ hash[3] = ho_hash (key[3]);
+
+ hash += 4;
+ n_left_from -= 4;
+ p += 4;
+ }
+
+ while (n_left_from > 0)
+ {
+ u64 key;
+
+ key = eth_get_sym_key ((ethernet_header_t *) p[0]);
+ hash[0] = ho_hash (key);
+
+ hash += 1;
+ n_left_from -= 1;
+ p += 1;
+ }
+}
+
+VNET_REGISTER_HASH_FUNCTION (handoff_eth_sym, static) = {
+ .name = "handoff-eth-sym",
+ .description = "Ethernet/IPv4/IPv6/MPLS headers Symmetric",
+ .priority = 1,
+ .function[VNET_HASH_FN_TYPE_ETHERNET] = handoff_eth_sym_func,
+};
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/hash/hash.c b/src/vnet/hash/hash.c
new file mode 100644
index 00000000000..31693c9889b
--- /dev/null
+++ b/src/vnet/hash/hash.c
@@ -0,0 +1,76 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/interface.h>
+#include <vnet/hash/hash.h>
+
+vnet_hash_main_t vnet_hash_main;
+
+u8 *
+format_vnet_hash (u8 *s, va_list *args)
+{
+ vnet_hash_function_registration_t *hash =
+ va_arg (*args, vnet_hash_function_registration_t *);
+
+ s = format (s, "[name: %s ", hash->name);
+ s = format (s, "priority: %u ", hash->priority);
+ s = format (s, "description: %s]", hash->description);
+ return s;
+}
+
+/**
+ * select hash func with highest priority
+ */
+vnet_hash_fn_t
+vnet_hash_default_function (vnet_hash_fn_type_t ftype)
+{
+ vnet_hash_function_registration_t *hash = vnet_hash_main.hash_registrations;
+ vnet_hash_function_registration_t *tmp_hash = hash;
+ while (hash)
+ {
+ if (hash->priority > tmp_hash->priority)
+ tmp_hash = hash;
+ hash = hash->next;
+ }
+ return tmp_hash->function[ftype];
+}
+
+vnet_hash_fn_t
+vnet_hash_function_from_name (const char *name, vnet_hash_fn_type_t ftype)
+{
+ vnet_hash_function_registration_t *hash = vnet_hash_main.hash_registrations;
+ while (hash)
+ {
+ if (strcmp (hash->name, name) == 0)
+ break;
+ hash = hash->next;
+ }
+ if (!hash)
+ return (0);
+ return hash->function[ftype];
+}
+
+vnet_hash_function_registration_t *
+vnet_hash_function_from_func (vnet_hash_fn_t fn, vnet_hash_fn_type_t ftype)
+{
+ vnet_hash_function_registration_t *hash = vnet_hash_main.hash_registrations;
+ while (hash)
+ {
+ if (hash->function[ftype] == fn)
+ break;
+ hash = hash->next;
+ }
+ return hash;
+}
+
+static clib_error_t *
+vnet_hash_init (vlib_main_t *vm)
+{
+ return (0);
+}
+
+VLIB_INIT_FUNCTION (vnet_hash_init);
diff --git a/src/vnet/hash/hash.h b/src/vnet/hash/hash.h
new file mode 100644
index 00000000000..c1eb9475e28
--- /dev/null
+++ b/src/vnet/hash/hash.h
@@ -0,0 +1,59 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifndef __VNET_HASH_H__
+#define __VNET_HASH_H__
+
+#include <vlib/vlib.h>
+
+#define foreach_vnet_hash_fn_types \
+ _ (ETHERNET, 0, "hash-fn-ethernet") \
+ _ (IP, 1, "hash-fn-ip")
+
+typedef enum
+{
+#define _(f, n, s) VNET_HASH_FN_TYPE_##f,
+ foreach_vnet_hash_fn_types
+#undef _
+ VNET_HASH_FN_TYPE_N,
+} vnet_hash_fn_type_t;
+
+typedef void (*vnet_hash_fn_t) (void **p, u32 *h, u32 n_packets);
+
+typedef struct vnet_hash_function_registration
+{
+ const char *name;
+ const char *description;
+ int priority;
+ vnet_hash_fn_t function[VNET_HASH_FN_TYPE_N];
+
+ struct vnet_hash_function_registration *next;
+} vnet_hash_function_registration_t;
+
+typedef struct
+{
+ vnet_hash_function_registration_t *hash_registrations;
+} vnet_hash_main_t;
+
+extern vnet_hash_main_t vnet_hash_main;
+
+#define VNET_REGISTER_HASH_FUNCTION(x, ...) \
+ __VA_ARGS__ vnet_hash_function_registration_t __vnet_hash_function_##x; \
+ static void __clib_constructor __vnet_hash_function_registration_##x (void) \
+ { \
+ vnet_hash_main_t *hm = &vnet_hash_main; \
+ __vnet_hash_function_##x.next = hm->hash_registrations; \
+ hm->hash_registrations = &__vnet_hash_function_##x; \
+ } \
+ __VA_ARGS__ vnet_hash_function_registration_t __vnet_hash_function_##x
+
+vnet_hash_fn_t vnet_hash_default_function (vnet_hash_fn_type_t ftype);
+vnet_hash_fn_t vnet_hash_function_from_name (const char *name,
+ vnet_hash_fn_type_t ftype);
+vnet_hash_function_registration_t *
+vnet_hash_function_from_func (vnet_hash_fn_t fn, vnet_hash_fn_type_t ftype);
+format_function_t format_vnet_hash;
+
+#endif
diff --git a/src/vnet/hash/hash.rst b/src/vnet/hash/hash.rst
new file mode 100644
index 00000000000..3db74e2f093
--- /dev/null
+++ b/src/vnet/hash/hash.rst
@@ -0,0 +1,90 @@
+.. _hash_doc:
+
+Hash Infra
+==========
+
+Overview
+________
+
+Modern physical NICs uses packet flow hash for different purposes, i.e. Receive
+Side Scaling, flow steering and interface bonding etc. NICs can also provide
+packet flow hash prepended to data packet as metadata which can be used by
+applications without recomputing the packet flow hash.
+
+As more and more services are deployed in virtualized environment, making use of
+virtual interfaces to interconnect those services.
+
+The Hash Infrastructure
+_______________________
+
+VPP implements software based hashing functionality which can be used for different
+purposes. It also provides users a centralized way to registry custom hash functions
+based on traffic profile to be used in different vpp features i.e. Multi-TXQ,
+software RSS or bonding driver.
+
+Data structures
+^^^^^^^^^^^^^^^
+
+Hashing infra provides two types of hashing functions:
+``VNET_HASH_FN_TYPE_ETHERNET`` and ``VNET_HASH_FN_TYPE_IP`` for ethernet traffic and
+IP traffic respectively.
+Hashing infra provides uniform signature to the functions to be implemented:
+
+.. code:: c
+
+ void (*vnet_hash_fn_t) (void **p, u32 *h, u32 n_packets);
+
+Here ``**p`` is the array of pointers pointing to the beginning of packet headers
+(either ethernet or ip).
+``*h`` is an empty array of size n_packets. On return, it will contain hashes.
+``n_packets`` is the number of packets pass to this function.
+
+Custom hashing functions can be registered through ``VNET_REGISTER_HASH_FUNCTION``.
+Users need to provide a name, description, priority and hashing functions for
+registration.
+
+Default hashing function is selected based on the highest priority among the registered
+hashing functions.
+
+.. code:: c
+
+ typedef struct vnet_hash_function_registration
+ {
+ const char *name;
+ const char *description;
+ int priority;
+ vnet_hash_fn_t function[VNET_HASH_FN_TYPE_N];
+
+ struct vnet_hash_function_registration *next;
+ } vnet_hash_function_registration_t;
+
+For example, ``crc32c_5tuple`` provides two hashing functions: for IP traffic and for
+ethernet traffic. It uses 5 tuples from the flow to compute the crc32 hash on it.
+
+.. code:: c
+
+ void vnet_crc32c_5tuple_ip_func (void **p, u32 *hash, u32 n_packets);
+ void vnet_crc32c_5tuple_ethernet_func (void **p, u32 *hash, u32 n_packets);
+
+ VNET_REGISTER_HASH_FUNCTION (crc32c_5tuple, static) = {
+ .name = "crc32c-5tuple",
+ .description = "IPv4/IPv6 header and TCP/UDP ports",
+ .priority = 50,
+ .function[VNET_HASH_FN_TYPE_ETHERNET] = vnet_crc32c_5tuple_ethernet_func,
+ .function[VNET_HASH_FN_TYPE_IP] = vnet_crc32c_5tuple_ip_func,
+ };
+
+
+Users can see all the registered hash functions along with priority and description.
+
+Hash API
+^^^^^^^^
+
+There is no Hash API at the moment.
+
+Hash CLI
+^^^^^^^^
+
+::
+
+ show hash
diff --git a/src/vnet/hash/hash_eth.c b/src/vnet/hash/hash_eth.c
new file mode 100644
index 00000000000..1ac8b66a1bc
--- /dev/null
+++ b/src/vnet/hash/hash_eth.c
@@ -0,0 +1,326 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#define _GNU_SOURCE
+#include <stdint.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+#include <vnet/tcp/tcp_packet.h>
+#include <vppinfra/lb_hash_hash.h>
+#include <vnet/hash/hash.h>
+
+static_always_inline u16 *
+locate_ethertype (ethernet_header_t *eth)
+{
+ u16 *ethertype_p;
+ ethernet_vlan_header_t *vlan;
+
+ if (!ethernet_frame_is_tagged (clib_net_to_host_u16 (eth->type)))
+ {
+ ethertype_p = &eth->type;
+ }
+ else
+ {
+ vlan = (void *) (eth + 1);
+ ethertype_p = &vlan->type;
+ if (*ethertype_p == ntohs (ETHERNET_TYPE_VLAN))
+ {
+ vlan++;
+ ethertype_p = &vlan->type;
+ }
+ }
+ return ethertype_p;
+}
+
+static void
+hash_eth_l2 (void **p, u32 *hash, u32 n_packets)
+{
+ u32 n_left_from = n_packets;
+
+ while (n_left_from >= 8)
+ {
+ ethernet_header_t *eth = *p;
+ u64 *dst = (u64 *) &eth->dst_address[0];
+ u64 a = clib_mem_unaligned (dst, u64);
+ u32 *src = (u32 *) &eth->src_address[2];
+ u32 b = clib_mem_unaligned (src, u32);
+
+ clib_prefetch_load (p[4]);
+ clib_prefetch_load (p[5]);
+ clib_prefetch_load (p[6]);
+ clib_prefetch_load (p[7]);
+
+ hash[0] = lb_hash_hash_2_tuples (a, b);
+ hash[1] = lb_hash_hash_2_tuples (a, b);
+ hash[2] = lb_hash_hash_2_tuples (a, b);
+ hash[3] = lb_hash_hash_2_tuples (a, b);
+
+ hash += 4;
+ n_left_from -= 4;
+ p += 4;
+ }
+
+ while (n_left_from > 0)
+ {
+ ethernet_header_t *eth = *p;
+ u64 *dst = (u64 *) &eth->dst_address[0];
+ u64 a = clib_mem_unaligned (dst, u64);
+ u32 *src = (u32 *) &eth->src_address[2];
+ u32 b = clib_mem_unaligned (src, u32);
+
+ hash[0] = lb_hash_hash_2_tuples (a, b);
+
+ hash += 1;
+ n_left_from -= 1;
+ p += 1;
+ }
+}
+
+static_always_inline u32
+hash_eth_l23_inline (void **p)
+{
+ ethernet_header_t *eth = *p;
+ u8 ip_version;
+ ip4_header_t *ip4;
+ u16 ethertype, *ethertype_p;
+ u32 *mac1, *mac2, *mac3;
+ u32 hash;
+
+ ethertype_p = locate_ethertype (eth);
+ ethertype = clib_mem_unaligned (ethertype_p, u16);
+
+ if ((ethertype != htons (ETHERNET_TYPE_IP4)) &&
+ (ethertype != htons (ETHERNET_TYPE_IP6)))
+ {
+ hash_eth_l2 (p, &hash, 1);
+ return hash;
+ }
+
+ ip4 = (ip4_header_t *) (ethertype_p + 1);
+ ip_version = (ip4->ip_version_and_header_length >> 4);
+
+ if (ip_version == 0x4)
+ {
+ u32 a;
+
+ mac1 = (u32 *) &eth->dst_address[0];
+ mac2 = (u32 *) &eth->dst_address[4];
+ mac3 = (u32 *) &eth->src_address[2];
+
+ a = clib_mem_unaligned (mac1, u32) ^ clib_mem_unaligned (mac2, u32) ^
+ clib_mem_unaligned (mac3, u32);
+ hash = lb_hash_hash_2_tuples (
+ clib_mem_unaligned (&ip4->address_pair, u64), a);
+ return hash;
+ }
+
+ if (ip_version == 0x6)
+ {
+ u64 a;
+ ip6_header_t *ip6 = (ip6_header_t *) (eth + 1);
+
+ mac1 = (u32 *) &eth->dst_address[0];
+ mac2 = (u32 *) &eth->dst_address[4];
+ mac3 = (u32 *) &eth->src_address[2];
+
+ a = clib_mem_unaligned (mac1, u32) ^ clib_mem_unaligned (mac2, u32) ^
+ clib_mem_unaligned (mac3, u32);
+ hash = lb_hash_hash (
+ clib_mem_unaligned (&ip6->src_address.as_uword[0], uword),
+ clib_mem_unaligned (&ip6->src_address.as_uword[1], uword),
+ clib_mem_unaligned (&ip6->dst_address.as_uword[0], uword),
+ clib_mem_unaligned (&ip6->dst_address.as_uword[1], uword), a);
+ return hash;
+ }
+
+ hash_eth_l2 (p, &hash, 1);
+ return hash;
+}
+
+static void
+hash_eth_l23 (void **p, u32 *hash, u32 n_packets)
+{
+ u32 n_left_from = n_packets;
+
+ while (n_left_from >= 8)
+ {
+ clib_prefetch_load (p[4]);
+ clib_prefetch_load (p[5]);
+ clib_prefetch_load (p[6]);
+ clib_prefetch_load (p[7]);
+
+ hash[0] = hash_eth_l23_inline (&p[0]);
+ hash[1] = hash_eth_l23_inline (&p[1]);
+ hash[2] = hash_eth_l23_inline (&p[2]);
+ hash[3] = hash_eth_l23_inline (&p[3]);
+
+ hash += 4;
+ n_left_from -= 4;
+ p += 4;
+ }
+
+ while (n_left_from > 0)
+ {
+ hash[0] = hash_eth_l23_inline (&p[0]);
+
+ hash += 1;
+ n_left_from -= 1;
+ p += 1;
+ }
+}
+
+static_always_inline u32
+hash_eth_l34_inline (void **p)
+{
+ ethernet_header_t *eth = *p;
+ u8 ip_version;
+ uword is_tcp_udp;
+ ip4_header_t *ip4;
+ u16 ethertype, *ethertype_p;
+ u32 hash;
+
+ ethertype_p = locate_ethertype (eth);
+ ethertype = clib_mem_unaligned (ethertype_p, u16);
+
+ if ((ethertype != htons (ETHERNET_TYPE_IP4)) &&
+ (ethertype != htons (ETHERNET_TYPE_IP6)))
+ {
+ hash_eth_l2 (p, &hash, 1);
+ return hash;
+ }
+
+ ip4 = (ip4_header_t *) (ethertype_p + 1);
+ ip_version = (ip4->ip_version_and_header_length >> 4);
+
+ if (ip_version == 0x4)
+ {
+ u32 a, t1, t2;
+ tcp_header_t *tcp = (void *) (ip4 + 1);
+
+ is_tcp_udp = (ip4->protocol == IP_PROTOCOL_TCP) ||
+ (ip4->protocol == IP_PROTOCOL_UDP);
+ t1 = is_tcp_udp ? clib_mem_unaligned (&tcp->src, u16) : 0;
+ t2 = is_tcp_udp ? clib_mem_unaligned (&tcp->dst, u16) : 0;
+ a = t1 ^ t2;
+ hash = lb_hash_hash_2_tuples (
+ clib_mem_unaligned (&ip4->address_pair, u64), a);
+ return hash;
+ }
+
+ if (ip_version == 0x6)
+ {
+ u64 a;
+ u32 t1, t2;
+ ip6_header_t *ip6 = (ip6_header_t *) (eth + 1);
+ tcp_header_t *tcp = (void *) (ip6 + 1);
+
+ is_tcp_udp = 0;
+ if (PREDICT_TRUE ((ip6->protocol == IP_PROTOCOL_TCP) ||
+ (ip6->protocol == IP_PROTOCOL_UDP)))
+ {
+ is_tcp_udp = 1;
+ tcp = (void *) (ip6 + 1);
+ }
+ else if (ip6->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ {
+ ip6_hop_by_hop_header_t *hbh = (ip6_hop_by_hop_header_t *) (ip6 + 1);
+ if ((hbh->protocol == IP_PROTOCOL_TCP) ||
+ (hbh->protocol == IP_PROTOCOL_UDP))
+ {
+ is_tcp_udp = 1;
+ tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3));
+ }
+ }
+ t1 = is_tcp_udp ? clib_mem_unaligned (&tcp->src, u16) : 0;
+ t2 = is_tcp_udp ? clib_mem_unaligned (&tcp->dst, u16) : 0;
+ a = t1 ^ t2;
+ hash = lb_hash_hash (
+ clib_mem_unaligned (&ip6->src_address.as_uword[0], uword),
+ clib_mem_unaligned (&ip6->src_address.as_uword[1], uword),
+ clib_mem_unaligned (&ip6->dst_address.as_uword[0], uword),
+ clib_mem_unaligned (&ip6->dst_address.as_uword[1], uword), a);
+ return hash;
+ }
+
+ hash_eth_l2 (p, &hash, 1);
+ return hash;
+}
+
+static void
+hash_eth_l34 (void **p, u32 *hash, u32 n_packets)
+{
+ u32 n_left_from = n_packets;
+
+ while (n_left_from >= 8)
+ {
+ clib_prefetch_load (p[4]);
+ clib_prefetch_load (p[5]);
+ clib_prefetch_load (p[6]);
+ clib_prefetch_load (p[7]);
+
+ hash[0] = hash_eth_l34_inline (&p[0]);
+ hash[1] = hash_eth_l34_inline (&p[1]);
+ hash[2] = hash_eth_l34_inline (&p[2]);
+ hash[3] = hash_eth_l34_inline (&p[3]);
+
+ hash += 4;
+ n_left_from -= 4;
+ p += 4;
+ }
+
+ while (n_left_from > 0)
+ {
+ hash[0] = hash_eth_l34_inline (&p[0]);
+
+ hash += 1;
+ n_left_from -= 1;
+ p += 1;
+ }
+}
+
+VNET_REGISTER_HASH_FUNCTION (hash_eth_l2, static) = {
+ .name = "hash-eth-l2",
+ .description = "Hash ethernet L2 headers",
+ .priority = 50,
+ .function[VNET_HASH_FN_TYPE_ETHERNET] = hash_eth_l2,
+};
+
+VNET_REGISTER_HASH_FUNCTION (hash_eth_l23, static) = {
+ .name = "hash-eth-l23",
+ .description = "Hash ethernet L23 headers",
+ .priority = 50,
+ .function[VNET_HASH_FN_TYPE_ETHERNET] = hash_eth_l23,
+};
+
+VNET_REGISTER_HASH_FUNCTION (hash_eth_l34, static) = {
+ .name = "hash-eth-l34",
+ .description = "Hash ethernet L34 headers",
+ .priority = 50,
+ .function[VNET_HASH_FN_TYPE_ETHERNET] = hash_eth_l34,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/hdlc/hdlc.c b/src/vnet/hdlc/hdlc.c
index fa1e7cd5eaf..443a0396e9e 100644
--- a/src/vnet/hdlc/hdlc.c
+++ b/src/vnet/hdlc/hdlc.c
@@ -197,7 +197,6 @@ hdlc_build_rewrite (vnet_main_t * vnm,
return (rewrite);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (hdlc_hw_interface_class) = {
.name = "HDLC",
.format_header = format_hdlc_header_with_length,
@@ -205,7 +204,6 @@ VNET_HW_INTERFACE_CLASS (hdlc_hw_interface_class) = {
.build_rewrite = hdlc_build_rewrite,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
static void
add_protocol (hdlc_main_t * pm, hdlc_protocol_t protocol, char *protocol_name)
diff --git a/src/vnet/hdlc/node.c b/src/vnet/hdlc/node.c
index 8bb621231c7..48269a3b8d3 100644
--- a/src/vnet/hdlc/node.c
+++ b/src/vnet/hdlc/node.c
@@ -279,7 +279,6 @@ static char *hdlc_error_strings[] = {
#undef hdlc_error
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (hdlc_input_node) = {
.function = hdlc_input,
.name = "hdlc-input",
@@ -302,7 +301,6 @@ VLIB_REGISTER_NODE (hdlc_input_node) = {
.format_trace = format_hdlc_input_trace,
.unformat_buffer = unformat_hdlc_header,
};
-/* *INDENT-ON* */
static clib_error_t *
hdlc_input_runtime_init (vlib_main_t * vm)
diff --git a/src/vnet/interface.api b/src/vnet/interface.api
index 38dc4320b8d..eea86aa1ac8 100644
--- a/src/vnet/interface.api
+++ b/src/vnet/interface.api
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-option version = "3.2.2";
+option version = "3.2.3";
import "vnet/interface_types.api";
import "vnet/ethernet/ethernet_types.api";
@@ -458,6 +458,45 @@ autoreply define sw_interface_set_rx_placement
bool is_main;
};
+/** \brief Set an interface's tx-placement
+ Tx-Queue placement on specific thread is operational for only hardware
+ interface. It will not set queue - thread placement for sub-interfaces,
+ p2p and pipe interfaces.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface whose tx-placement will be set
+ @param queue_id - the queue number whose tx-placement will be set.
+ @param array_size - the size of the thread indexes array
+ @param threads - the thread indexes of main and worker(s) threads
+ whom tx-placement will be at.
+*/
+autoendian autoreply define sw_interface_set_tx_placement
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u32 queue_id;
+ u32 array_size;
+ u32 threads[array_size];
+ option vat_help = "<interface | sw_if_index <index>> queue <n> [threads <list> | mask <hex>]";
+};
+
+/** \brief Set custom interface name
+ Set custom interface name for the interface.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface whose name will be set
+ @param name - the custom interface name to be set
+k
+*/
+autoreply define sw_interface_set_interface_name
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ string name[64];
+};
+
/** \brief dump the rx queue placement of interface(s)
@param sw_if_index - optional interface index for which queue placement to
be requested. sw_if_index = ~0 will dump placement information for all
@@ -496,6 +535,60 @@ define sw_interface_rx_placement_details
vl_api_rx_mode_t mode;
};
+service {
+ rpc sw_interface_tx_placement_get returns sw_interface_tx_placement_get_reply
+ stream sw_interface_tx_placement_details;
+};
+
+/** \brief get the tx queue placement of interface(s)
+ @param cursor - optional, it allows client to continue a dump
+ @param sw_if_index - optional interface index for which queue placement to
+ be requested. sw_if_index = ~0 will get the placement information for all
+ interfaces. It will not get information related to sub-interfaces, p2p
+ and pipe interfaces.
+*/
+autoendian define sw_interface_tx_placement_get
+{
+ u32 client_index;
+ u32 context;
+ u32 cursor;
+ vl_api_interface_index_t sw_if_index;
+ option vat_help = "[interface | sw_if_index <index>]";
+};
+
+autoendian define sw_interface_tx_placement_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 cursor;
+};
+
+/** \brief show the interface's queue - thread placement
+ This api is used to display the interface and queue worker
+ thread placement. One message per tx-queue per interface will
+ be sent to client.
+ Each message will contain information about tx-queue id of an
+ interface, interface index, thread on which this tx-queue is
+ placed and mode of tx-queue.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - the interface whose tx-placement will be dumped
+ @param queue_id - the queue id
+ @param shared - the queue is shared on other threads
+ @param array_size - the size of the threads array
+ @param threads - the main and worker(s) thread index(es) whom tx-placement are at.
+*/
+autoendian define sw_interface_tx_placement_details
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u32 queue_id;
+ u8 shared;
+ u32 array_size;
+ u32 threads[array_size];
+};
+
/* Gross kludge, DGMS */
autoreply define interface_name_renumber
{
@@ -640,6 +733,61 @@ autoreply define collect_detailed_interface_stats
bool enable_disable;
};
+/** \brief pcap_set_filter_function
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param filter_function_name - the name of the filter function
+ to set for pcap capture
+*/
+autoreply define pcap_set_filter_function
+{
+ u32 client_index;
+ u32 context;
+
+ string filter_function_name[];
+};
+
+/** \brief pcap_trace_on
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param capture_rx - capture received packets
+ @param capture_tx - capture transmitted packets
+ @param capture_drop - capture dropped packets
+ @param filter - is a filter is being used on this capture
+ @param preallocate_data - preallocate the data buffer
+ @param free_data - free the data buffer
+ @param max_packets - depth of local buffer
+ @param max_bytes_per_packet - maximum number of bytes to capture
+ for each packet
+ @param sw_if_index - specify a given interface, or 0 for any
+ @param error - filter packets based on a specific error.
+ @param filename - output filename, will be placed in /tmp
+*/
+autoreply define pcap_trace_on
+{
+ u32 client_index;
+ u32 context;
+ bool capture_rx;
+ bool capture_tx;
+ bool capture_drop;
+ bool filter;
+ bool preallocate_data;
+ bool free_data;
+ u32 max_packets [default=1000];
+ u32 max_bytes_per_packet [default=512];
+ vl_api_interface_index_t sw_if_index;
+ string error[128];
+ string filename[64];
+
+ option vat_help = "pcap_trace_on [capture_rx] [capture_tx] [capture_drop] [max_packets <nn>] [sw_if_index <sw_if_index>|0 for any] [error <node>.<error>] [filename <name>] [max_bytes_per_packet <nnnn>] [filter] [preallocate_data] [free_data]";
+};
+
+autoreply define pcap_trace_off
+{
+ u32 client_index;
+ u32 context;
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/interface.c b/src/vnet/interface.c
index 44ea52a870d..5fb2ff65fa2 100644
--- a/src/vnet/interface.c
+++ b/src/vnet/interface.c
@@ -45,11 +45,9 @@
#include <vnet/interface/rx_queue_funcs.h>
#include <vnet/interface/tx_queue_funcs.h>
-/* *INDENT-OFF* */
VLIB_REGISTER_LOG_CLASS (if_default_log, static) = {
.class_name = "interface",
};
-/* *INDENT-ON* */
#define log_debug(fmt,...) vlib_log_debug(if_default_log.class, fmt, __VA_ARGS__)
#define log_err(fmt,...) vlib_log_err(if_default_log.class, fmt, __VA_ARGS__)
@@ -141,15 +139,12 @@ serialize_vnet_interface_state (serialize_main_t * m, va_list * va)
/* Serialize hardware interface classes since they may have changed.
Must do this before sending up/down flags. */
- /* *INDENT-OFF* */
pool_foreach (hif, im->hw_interfaces) {
vnet_hw_interface_class_t * hw_class = vnet_get_hw_interface_class (vnm, hif->hw_class_index);
serialize_cstring (m, hw_class->name);
}
- /* *INDENT-ON* */
/* Send sw/hw interface state when non-zero. */
- /* *INDENT-OFF* */
pool_foreach (sif, im->sw_interfaces) {
if (sif->flags != 0)
{
@@ -158,14 +153,12 @@ serialize_vnet_interface_state (serialize_main_t * m, va_list * va)
st->flags = sif->flags;
}
}
- /* *INDENT-ON* */
vec_serialize (m, sts, serialize_vec_vnet_sw_hw_interface_state);
if (sts)
- _vec_len (sts) = 0;
+ vec_set_len (sts, 0);
- /* *INDENT-OFF* */
pool_foreach (hif, im->hw_interfaces) {
if (hif->flags != 0)
{
@@ -174,7 +167,6 @@ serialize_vnet_interface_state (serialize_main_t * m, va_list * va)
st->flags = vnet_hw_interface_flags_to_sw(hif->flags);
}
}
- /* *INDENT-ON* */
vec_serialize (m, sts, serialize_vec_vnet_sw_hw_interface_state);
@@ -206,7 +198,6 @@ unserialize_vnet_interface_state (serialize_main_t * m, va_list * va)
uword *p;
clib_error_t *error;
- /* *INDENT-OFF* */
pool_foreach (hif, im->hw_interfaces) {
unserialize_cstring (m, &class_name);
p = hash_get_mem (im->hw_interface_class_by_name, class_name);
@@ -222,7 +213,6 @@ unserialize_vnet_interface_state (serialize_main_t * m, va_list * va)
clib_error_report (error);
vec_free (class_name);
}
- /* *INDENT-ON* */
}
vec_unserialize (m, &sts, unserialize_vec_vnet_sw_hw_interface_state);
@@ -462,9 +452,6 @@ vnet_sw_interface_set_flags_helper (vnet_main_t * vnm, u32 sw_if_index,
goto done;
}
- /* save the si admin up flag */
- old_flags = si->flags;
-
/* update si admin up flag in advance if we are going admin down */
if (!(flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP))
si->flags &= ~VNET_SW_INTERFACE_FLAG_ADMIN_UP;
@@ -658,6 +645,7 @@ vnet_create_sw_interface (vnet_main_t * vnm, vnet_sw_interface_t * template,
/* undo the work done by vnet_create_sw_interface_no_callbacks() */
log_err ("create_sw_interface: set flags failed\n %U",
format_clib_error, error);
+ call_sw_interface_add_del_callbacks (vnm, *sw_if_index, 0);
vnet_sw_interface_t *sw =
pool_elt_at_index (im->sw_interfaces, *sw_if_index);
pool_put (im->sw_interfaces, sw);
@@ -771,18 +759,40 @@ sw_interface_walk_callback (vnet_main_t * vnm, u32 sw_if_index, void *ctx)
return WALK_CONTINUE;
}
-void
-vnet_hw_interface_set_mtu (vnet_main_t * vnm, u32 hw_if_index, u32 mtu)
+clib_error_t *
+vnet_hw_interface_set_max_frame_size (vnet_main_t *vnm, u32 hw_if_index,
+ u32 fs)
{
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_hw_interface_class_t *hw_if_class =
+ vnet_get_hw_interface_class (vnm, hi->hw_class_index);
+ clib_error_t *err = 0;
+ log_debug ("set_max_frame_size: interface %v, max_frame_size %u -> %u",
+ hi->name, hi->max_frame_size, fs);
+
+ if (hw_if_class->set_max_frame_size == 0)
+ return vnet_error (VNET_ERR_UNSUPPORTED,
+ "hw class doesn't support changing Max Frame Size");
- if (hi->max_packet_bytes != mtu)
+ if (hi->max_frame_size != fs)
{
- hi->max_packet_bytes = mtu;
- ethernet_set_flags (vnm, hw_if_index, ETHERNET_INTERFACE_FLAG_MTU);
+ u32 mtu;
+ if (hw_if_class->set_max_frame_size)
+ if ((err = hw_if_class->set_max_frame_size (vnm, hi, fs)))
+ return err;
+ hi->max_frame_size = fs;
+ mtu = fs - hi->frame_overhead;
vnet_hw_interface_walk_sw (vnm, hw_if_index, sw_interface_walk_callback,
&mtu);
}
+ return 0;
+}
+clib_error_t *
+vnet_hw_interface_set_mtu (vnet_main_t *vnm, u32 hw_if_index, u32 mtu)
+{
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ return vnet_hw_interface_set_max_frame_size (vnm, hw_if_index,
+ mtu + hi->frame_overhead);
}
static void
@@ -807,6 +817,36 @@ setup_output_node (vlib_main_t * vm,
n->unformat_buffer = hw_class->unformat_header;
}
+void
+vnet_reset_interface_l3_output_node (vlib_main_t *vm, u32 sw_if_index)
+{
+ vnet_set_interface_l3_output_node (vm, sw_if_index,
+ (u8 *) "interface-output");
+}
+
+void
+vnet_set_interface_l3_output_node (vlib_main_t *vm, u32 sw_if_index,
+ u8 *output_node)
+{
+ vlib_node_t *l3_node;
+
+ l3_node = vlib_get_node_by_name (vm, output_node);
+
+ static char *arcs[] = {
+ "ip4-output",
+ "ip6-output",
+ "mpls-output",
+ "ethernet-output",
+ };
+ u8 a;
+
+ for (a = 0; a < ARRAY_LEN (arcs); a++)
+ {
+ u8 arc = vnet_get_feature_arc_index (arcs[a]);
+ vnet_feature_modify_end_node (arc, sw_if_index, l3_node->index);
+ }
+}
+
/* Register an interface instance. */
u32
vnet_register_interface (vnet_main_t * vnm,
@@ -824,7 +864,6 @@ vnet_register_interface (vnet_main_t * vnm,
vnet_feature_config_main_t *fcm;
vnet_config_main_t *cm;
u32 hw_index, i;
- char *tx_node_name = NULL, *output_node_name = NULL;
vlib_node_t *if_out_node =
vlib_get_node (vm, vnet_interface_output_node.index);
@@ -836,6 +875,10 @@ vnet_register_interface (vnet_main_t * vnm,
hw->hw_if_index = hw_index;
hw->default_rx_mode = VNET_HW_IF_RX_MODE_POLLING;
+ if (hw_class->tx_hash_fn_type == VNET_HASH_FN_TYPE_ETHERNET ||
+ hw_class->tx_hash_fn_type == VNET_HASH_FN_TYPE_IP)
+ hw->hf = vnet_hash_default_function (hw_class->tx_hash_fn_type);
+
if (dev_class->format_device_name)
hw->name = format (0, "%U", dev_class->format_device_name, dev_instance);
else if (hw_class->format_interface_name)
@@ -867,15 +910,11 @@ vnet_register_interface (vnet_main_t * vnm,
hw->hw_instance = hw_instance;
hw->max_rate_bits_per_sec = 0;
- hw->min_packet_bytes = 0;
vnet_sw_interface_set_mtu (vnm, hw->sw_if_index, 0);
if (dev_class->tx_function == 0 && dev_class->tx_fn_registrations == 0)
goto no_output_nodes; /* No output/tx nodes to create */
- tx_node_name = (char *) format (0, "%v-tx", hw->name);
- output_node_name = (char *) format (0, "%v-output", hw->name);
-
/* If we have previously deleted interface nodes, re-use them. */
if (vec_len (im->deleted_hw_interface_nodes) > 0)
{
@@ -888,8 +927,8 @@ vnet_register_interface (vnet_main_t * vnm,
hw->tx_node_index = hn->tx_node_index;
hw->output_node_index = hn->output_node_index;
- vlib_node_rename (vm, hw->tx_node_index, "%v", tx_node_name);
- vlib_node_rename (vm, hw->output_node_index, "%v", output_node_name);
+ vlib_node_rename (vm, hw->tx_node_index, "%v-tx", hw->name);
+ vlib_node_rename (vm, hw->output_node_index, "%v-output", hw->name);
foreach_vlib_main ()
{
@@ -943,7 +982,7 @@ vnet_register_interface (vnet_main_t * vnm,
VLIB_NODE_RUNTIME_PERF_RESET);
}
- _vec_len (im->deleted_hw_interface_nodes) -= 1;
+ vec_dec_len (im->deleted_hw_interface_nodes, 1);
}
else
{
@@ -963,7 +1002,6 @@ vnet_register_interface (vnet_main_t * vnm,
r.vector_size = sizeof (u32);
r.flags = VLIB_NODE_FLAG_IS_OUTPUT;
- r.name = tx_node_name;
if (dev_class->tx_fn_registrations)
{
r.function = 0;
@@ -972,14 +1010,13 @@ vnet_register_interface (vnet_main_t * vnm,
else
r.function = dev_class->tx_function;
- hw->tx_node_index = vlib_register_node (vm, &r);
+ hw->tx_node_index = vlib_register_node (vm, &r, "%v-tx", hw->name);
vlib_node_add_named_next_with_slot (vm, hw->tx_node_index,
"error-drop",
VNET_INTERFACE_TX_NEXT_DROP);
r.flags = 0;
- r.name = output_node_name;
r.format_trace = format_vnet_interface_output_trace;
if (if_out_node->node_fn_registrations)
{
@@ -993,12 +1030,14 @@ vnet_register_interface (vnet_main_t * vnm,
static char *e[] = {
"interface is down",
"interface is deleted",
+ "no tx queue available",
};
r.n_errors = ARRAY_LEN (e);
r.error_strings = e;
}
- hw->output_node_index = vlib_register_node (vm, &r);
+ hw->output_node_index =
+ vlib_register_node (vm, &r, "%v-output", hw->name);
vlib_node_add_named_next_with_slot (vm, hw->output_node_index,
"error-drop",
@@ -1041,9 +1080,6 @@ no_output_nodes:
VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE);
vnet_hw_interface_set_flags_helper (vnm, hw_index, /* flags */ 0,
VNET_INTERFACE_SET_FLAGS_HELPER_IS_CREATE);
- vec_free (tx_node_name);
- vec_free (output_node_name);
-
return hw_index;
}
@@ -1070,7 +1106,6 @@ vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index)
/* Delete any sub-interfaces. */
{
u32 id, sw_if_index;
- /* *INDENT-OFF* */
hash_foreach (id, sw_if_index, hw->sub_interface_sw_if_index_by_id,
({
vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
@@ -1080,7 +1115,6 @@ vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index)
vnet_delete_sw_interface (vnm, sw_if_index);
}));
hash_free (hw->sub_interface_sw_if_index_by_id);
- /* *INDENT-ON* */
}
/* Delete software interface corresponding to hardware interface. */
@@ -1105,16 +1139,16 @@ vnet_delete_hw_interface (vnet_main_t * vnm, u32 hw_if_index)
"interface-%d-output-deleted", hw_if_index);
vlib_node_rename (vm, hw->tx_node_index, "interface-%d-tx-deleted",
hw_if_index);
+ vlib_unregister_errors (vm, hw->output_node_index);
+ vlib_unregister_errors (vm, hw->tx_node_index);
vec_add2 (im->deleted_hw_interface_nodes, dn, 1);
dn->tx_node_index = hw->tx_node_index;
dn->output_node_index = hw->output_node_index;
}
-
hash_unset_mem (im->hw_interface_by_name, hw->name);
vec_free (hw->name);
vec_free (hw->hw_address);
- vec_free (hw->input_node_thread_index_by_queue);
- vec_free (hw->rx_queue_indices);
+ vec_free (hw->output_node_thread_runtimes);
pool_put (im->hw_interfaces, hw);
}
@@ -1131,14 +1165,12 @@ vnet_hw_interface_walk_sw (vnet_main_t * vnm,
if (WALK_STOP == fn (vnm, hi->sw_if_index, ctx))
return;
- /* *INDENT-OFF* */
hash_foreach (id, sw_if_index,
hi->sub_interface_sw_if_index_by_id,
({
if (WALK_STOP == fn (vnm, sw_if_index, ctx))
break;
}));
- /* *INDENT-ON* */
}
void
@@ -1150,13 +1182,11 @@ vnet_hw_interface_walk (vnet_main_t * vnm,
im = &vnm->interface_main;
- /* *INDENT-OFF* */
pool_foreach (hi, im->hw_interfaces)
{
if (WALK_STOP == fn(vnm, hi->hw_if_index, ctx))
break;
}
- /* *INDENT-ON* */
}
void
@@ -1168,13 +1198,11 @@ vnet_sw_interface_walk (vnet_main_t * vnm,
im = &vnm->interface_main;
- /* *INDENT-OFF* */
pool_foreach (si, im->sw_interfaces)
{
if (WALK_STOP == fn (vnm, si, ctx))
break;
}
- /* *INDENT-ON* */
}
void
@@ -1312,7 +1340,10 @@ vnet_hw_interface_compare (vnet_main_t * vnm,
int
vnet_sw_interface_is_p2p (vnet_main_t * vnm, u32 sw_if_index)
{
- vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+ vnet_sw_interface_t *si = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (si == NULL)
+ return -1;
+
if ((si->type == VNET_SW_INTERFACE_TYPE_P2P) ||
(si->type == VNET_SW_INTERFACE_TYPE_PIPE))
return 1;
@@ -1335,6 +1366,49 @@ vnet_sw_interface_is_nbma (vnet_main_t * vnm, u32 sw_if_index)
}
clib_error_t *
+vnet_sw_interface_supports_addressing (vnet_main_t *vnm, u32 sw_if_index)
+{
+ if (sw_if_index == 0)
+ {
+ return clib_error_create (
+ "local0 interface doesn't support IP addressing");
+ }
+
+ if (vnet_sw_interface_is_sub (vnm, sw_if_index))
+ {
+ vnet_sw_interface_t *si;
+ si = vnet_get_sw_interface_or_null (vnm, sw_if_index);
+ if (si && si->type == VNET_SW_INTERFACE_TYPE_SUB &&
+ si->sub.eth.flags.exact_match == 0)
+ {
+ return clib_error_create (
+ "sub-interface without exact-match doesn't support IP addressing");
+ }
+ }
+ return NULL;
+}
+
+u32
+vnet_register_device_class (vlib_main_t *vm, vnet_device_class_t *c)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_interface_main_t *im = &vnm->interface_main;
+ c->index = vec_len (im->device_classes);
+ hash_set_mem (im->device_class_by_name, c->name, c->index);
+
+ /* to avoid confusion, please remove ".tx_function" statement
+ from VNET_DEVICE_CLASS() if using function candidates */
+ ASSERT (c->tx_fn_registrations == 0 || c->tx_function == 0);
+
+ if (c->tx_fn_registrations)
+ c->tx_function =
+ vlib_node_get_preferred_node_fn_variant (vm, c->tx_fn_registrations);
+
+ vec_add1 (im->device_classes, c[0]);
+ return c->index;
+}
+
+clib_error_t *
vnet_interface_init (vlib_main_t * vm)
{
vnet_main_t *vnm = vnet_get_main ();
@@ -1380,28 +1454,10 @@ vnet_interface_init (vlib_main_t * vm)
im->device_class_by_name = hash_create_string ( /* size */ 0,
sizeof (uword));
- {
- vnet_device_class_t *c;
-
- c = vnm->device_class_registrations;
-
- while (c)
- {
- c->index = vec_len (im->device_classes);
- hash_set_mem (im->device_class_by_name, c->name, c->index);
- /* to avoid confusion, please remove ".tx_function" statement
- from VNET_DEVICE_CLASS() if using function candidates */
- ASSERT (c->tx_fn_registrations == 0 || c->tx_function == 0);
-
- if (c->tx_fn_registrations)
- c->tx_function = vlib_node_get_preferred_node_fn_variant (
- vm, c->tx_fn_registrations);
-
- vec_add1 (im->device_classes, c[0]);
- c = c->next_class_registration;
- }
- }
+ for (vnet_device_class_t *c = vnm->device_class_registrations; c;
+ c = c->next_class_registration)
+ vnet_register_device_class (vm, c);
im->hw_interface_class_by_name = hash_create_string ( /* size */ 0,
sizeof (uword));
@@ -1610,20 +1666,48 @@ vnet_hw_interface_change_mac_address (vnet_main_t * vnm, u32 hw_if_index,
(vnm, hw_if_index, mac_address);
}
+static int
+vnet_sw_interface_check_table_same (u32 unnumbered_sw_if_index,
+ u32 ip_sw_if_index)
+{
+ if (ip4_main.fib_index_by_sw_if_index[unnumbered_sw_if_index] !=
+ ip4_main.fib_index_by_sw_if_index[ip_sw_if_index])
+ return VNET_API_ERROR_UNEXPECTED_INTF_STATE;
+
+ if (ip4_main.mfib_index_by_sw_if_index[unnumbered_sw_if_index] !=
+ ip4_main.mfib_index_by_sw_if_index[ip_sw_if_index])
+ return VNET_API_ERROR_UNEXPECTED_INTF_STATE;
+
+ if (ip6_main.fib_index_by_sw_if_index[unnumbered_sw_if_index] !=
+ ip6_main.fib_index_by_sw_if_index[ip_sw_if_index])
+ return VNET_API_ERROR_UNEXPECTED_INTF_STATE;
+
+ if (ip6_main.mfib_index_by_sw_if_index[unnumbered_sw_if_index] !=
+ ip6_main.mfib_index_by_sw_if_index[ip_sw_if_index])
+ return VNET_API_ERROR_UNEXPECTED_INTF_STATE;
+
+ return 0;
+}
+
/* update the unnumbered state of an interface*/
-void
+int
vnet_sw_interface_update_unnumbered (u32 unnumbered_sw_if_index,
u32 ip_sw_if_index, u8 enable)
{
vnet_main_t *vnm = vnet_get_main ();
vnet_sw_interface_t *si;
u32 was_unnum;
+ int rv = 0;
si = vnet_get_sw_interface (vnm, unnumbered_sw_if_index);
was_unnum = (si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED);
if (enable)
{
+ rv = vnet_sw_interface_check_table_same (unnumbered_sw_if_index,
+ ip_sw_if_index);
+ if (rv != 0)
+ return rv;
si->flags |= VNET_SW_INTERFACE_FLAG_UNNUMBERED;
si->unnumbered_sw_if_index = ip_sw_if_index;
@@ -1660,6 +1744,8 @@ vnet_sw_interface_update_unnumbered (u32 unnumbered_sw_if_index,
ip4_sw_interface_enable_disable (unnumbered_sw_if_index, enable);
ip6_sw_interface_enable_disable (unnumbered_sw_if_index, enable);
}
+
+ return 0;
}
vnet_l3_packet_type_t
@@ -1841,13 +1927,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (collect_detailed_interface_stats_command, static) = {
.path = "interface collect detailed-stats",
.short_help = "interface collect detailed-stats <enable|disable>",
.function = collect_detailed_interface_stats_cli,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/interface.h b/src/vnet/interface.h
index a761b04aff8..f0cb540f979 100644
--- a/src/vnet/interface.h
+++ b/src/vnet/interface.h
@@ -44,6 +44,7 @@
#include <vppinfra/pcap.h>
#include <vnet/l3_types.h>
#include <vppinfra/lock.h>
+#include <vnet/hash/hash.h>
struct vnet_main_t;
struct vnet_hw_interface_t;
@@ -69,6 +70,10 @@ typedef clib_error_t *(vnet_subif_add_del_function_t)
(struct vnet_main_t * vnm, u32 if_index,
struct vnet_sw_interface_t * template, int is_add);
+/* Interface set mtu callback. */
+typedef clib_error_t *(vnet_interface_set_max_frame_size_function_t) (
+ struct vnet_main_t *vnm, struct vnet_hw_interface_t *hi, u32 mtu);
+
/* Interface set mac address callback. */
typedef clib_error_t *(vnet_interface_set_mac_address_function_t)
(struct vnet_hw_interface_t * hi,
@@ -287,6 +292,8 @@ typedef struct _vnet_device_class
} vnet_device_class_t;
+u32 vnet_register_device_class (vlib_main_t *, vnet_device_class_t *);
+
#ifndef CLIB_MARCH_VARIANT
#define VNET_DEVICE_CLASS(x,...) \
__VA_ARGS__ vnet_device_class_t x; \
@@ -315,7 +322,8 @@ static __clib_unused vnet_device_class_t __clib_unused_##x
#endif
#define VNET_DEVICE_CLASS_TX_FN(devclass) \
- uword CLIB_MARCH_SFX (devclass##_tx_fn) (); \
+ uword CLIB_MARCH_SFX (devclass##_tx_fn) ( \
+ vlib_main_t *, vlib_node_runtime_t *, vlib_frame_t *); \
static vlib_node_fn_registration_t CLIB_MARCH_SFX ( \
devclass##_tx_fn_registration) = { \
.function = &CLIB_MARCH_SFX (devclass##_tx_fn), \
@@ -410,6 +418,9 @@ typedef struct _vnet_hw_interface_class
/* Flags */
vnet_hw_interface_class_flags_t flags;
+ /* tx hash type for interfaces of this hw class */
+ vnet_hash_fn_type_t tx_hash_fn_type;
+
/* Function to call when hardware interface is added/deleted. */
vnet_interface_function_t *interface_add_del_function;
@@ -425,6 +436,9 @@ typedef struct _vnet_hw_interface_class
/* Function to add/delete additional MAC addresses */
vnet_interface_add_del_mac_address_function_t *mac_addr_add_del_function;
+ /* Function to set max frame size. */
+ vnet_interface_set_max_frame_size_function_t *set_max_frame_size;
+
/* Format function to display interface name. */
format_function_t *format_interface_name;
@@ -515,60 +529,60 @@ typedef enum vnet_hw_interface_flags_t_
VNET_HW_INTERFACE_FLAG_NBMA = (1 << 19),
} vnet_hw_interface_flags_t;
-typedef enum vnet_hw_interface_capabilities_t_
+#define foreach_vnet_hw_if_caps \
+ _ (0, TX_IP4_CKSUM, "ip4-csum-tx") \
+ _ (1, TX_TCP_CKSUM, "tcp-csum-tx") \
+ _ (2, TX_UDP_CKSUM, "udp-csum-tx") \
+ _ (3, TX_IP4_OUTER_CKSUM, "outer-ip4-csum-tx") \
+ _ (4, TX_UDP_OUTER_CKSUM, "outer-udp-csum-tx") \
+ _ (5, RX_IP4_CKSUM, "ip4-csum-rx") \
+ _ (6, RX_TCP_CKSUM, "tcp-csum-rx") \
+ _ (7, RX_UDP_CKSUM, "udp-csum-rx") \
+ _ (8, RX_IP4_OUTER_CKSUM, "outer-ip4-csum-rx") \
+ _ (9, RX_UDP_OUTER_CKSUM, "outer-udp-csum-rx") \
+ _ (10, TCP_GSO, "tcp-tso") \
+ _ (11, UDP_GSO, "udp-gso") \
+ _ (12, VXLAN_TNL_GSO, "vxlan-tnl-gso") \
+ _ (13, IPIP_TNL_GSO, "ipip-tnl-gso") \
+ _ (14, GENEVE_TNL_GSO, "geneve-tnl-gso") \
+ _ (15, GRE_TNL_GSO, "gre-tnl-gso") \
+ _ (16, UDP_TNL_GSO, "udp-tnl-gso") \
+ _ (17, IP_TNL_GSO, "ip-tnl-gso") \
+ _ (18, TCP_LRO, "tcp-lro") \
+ _ (30, INT_MODE, "int-mode") \
+ _ (31, MAC_FILTER, "mac-filter")
+
+typedef enum vnet_hw_if_caps_t_
{
VNET_HW_INTERFACE_CAP_NONE,
+#define _(bit, sfx, str) VNET_HW_IF_CAP_##sfx = (1 << (bit)),
+ foreach_vnet_hw_if_caps
+#undef _
+
+} vnet_hw_if_caps_t;
+
+#define VNET_HW_IF_CAP_L4_TX_CKSUM \
+ (VNET_HW_IF_CAP_TX_TCP_CKSUM | VNET_HW_IF_CAP_TX_UDP_CKSUM)
+
+#define VNET_HW_IF_CAP_TX_CKSUM \
+ (VNET_HW_IF_CAP_TX_IP4_CKSUM | VNET_HW_IF_CAP_TX_TCP_CKSUM | \
+ VNET_HW_IF_CAP_TX_UDP_CKSUM)
+
+#define VNET_HW_IF_CAP_TX_OUTER_CKSUM \
+ (VNET_HW_IF_CAP_TX_IP4_OUTER_CKSUM | VNET_HW_IF_CAP_TX_UDP_OUTER_CKSUM)
- /* tx checksum offload */
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_IP4_CKSUM = (1 << 0),
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM = (1 << 1),
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM = (1 << 2),
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_IP4_OUTER_CKSUM = (1 << 3),
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_OUTER_CKSUM = (1 << 4),
-
- /* rx checksum offload */
- VNET_HW_INTERFACE_CAP_SUPPORTS_RX_IP4_CKSUM = (1 << 5),
- VNET_HW_INTERFACE_CAP_SUPPORTS_RX_UDP_CKSUM = (1 << 6),
- VNET_HW_INTERFACE_CAP_SUPPORTS_RX_TCP_CKSUM = (1 << 7),
- VNET_HW_INTERFACE_CAP_SUPPORTS_RX_IP4_OUTER_CKSUM = (1 << 8),
- VNET_HW_INTERFACE_CAP_SUPPORTS_RX_UDP_OUTER_CKSUM = (1 << 9),
-
- /* gso */
- VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO = (1 << 10),
- VNET_HW_INTERFACE_CAP_SUPPORTS_UDP_GSO = (1 << 11),
- VNET_HW_INTERFACE_CAP_SUPPORTS_VXLAN_TNL_GSO = (1 << 12),
- VNET_HW_INTERFACE_CAP_SUPPORTS_IPIP_TNL_GSO = (1 << 13),
- VNET_HW_INTERFACE_CAP_SUPPORTS_GENEVE_TNL_GSO = (1 << 14),
- VNET_HW_INTERFACE_CAP_SUPPORTS_GRE_TNL_GSO = (1 << 15),
- VNET_HW_INTERFACE_CAP_SUPPORTS_UDP_TNL_GSO = (1 << 16),
- VNET_HW_INTERFACE_CAP_SUPPORTS_IP_TNL_GSO = (1 << 17),
-
- /* lro */
- VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_LRO = (1 << 18),
-
- /* rx mode */
- VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE = (1 << 30),
- /* hw/driver can switch between l2-promisc and l3-dmac-filter modes */
- VNET_HW_INTERFACE_CAP_SUPPORTS_MAC_FILTER = (1 << 31),
-} vnet_hw_interface_capabilities_t;
-
-#define VNET_HW_INTERFACE_CAP_SUPPORTS_L4_TX_CKSUM \
- (VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM | \
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM)
-
-#define VNET_HW_INTERFACE_CAP_SUPPORTS_TX_CKSUM \
- (VNET_HW_INTERFACE_CAP_SUPPORTS_TX_IP4_CKSUM | \
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_TCP_CKSUM | \
- VNET_HW_INTERFACE_CAP_SUPPORTS_TX_UDP_CKSUM)
-
-#define VNET_HW_INTERFACE_CAP_SUPPORTS_L4_RX_CKSUM \
- (VNET_HW_INTERFACE_CAP_SUPPORTS_RX_TCP_CKSUM | \
- VNET_HW_INTERFACE_CAP_SUPPORTS_RX_UDP_CKSUM)
-
-#define VNET_HW_INTERFACE_CAP_SUPPORTS_RX_CKSUM \
- (VNET_HW_INTERFACE_CAP_SUPPORTS_RX_IP4_CKSUM | \
- VNET_HW_INTERFACE_CAP_SUPPORTS_RX_TCP_CKSUM | \
- VNET_HW_INTERFACE_CAP_SUPPORTS_RX_UDP_CKSUM)
+#define VNET_HW_IF_CAP_TX_CKSUM_MASK \
+ (VNET_HW_IF_CAP_TX_CKSUM | VNET_HW_IF_CAP_TX_OUTER_CKSUM)
+
+#define VNET_HW_IF_CAP_L4_RX_CKSUM \
+ (VNET_HW_IF_CAP_RX_TCP_CKSUM | VNET_HW_IF_CAP_RX_UDP_CKSUM)
+
+#define VNET_HW_IF_CAP_RX_CKSUM \
+ (VNET_HW_IF_CAP_RX_IP4_CKSUM | VNET_HW_IF_CAP_RX_TCP_CKSUM | \
+ VNET_HW_IF_CAP_RX_UDP_CKSUM)
+
+#define VNET_HW_IF_CAP_TNL_GSO_MASK \
+ VNET_HW_IF_CAP_VXLAN_TNL_GSO | VNET_HW_IF_CAP_IPIP_TNL_GSO
#define VNET_HW_INTERFACE_FLAG_DUPLEX_SHIFT 1
#define VNET_HW_INTERFACE_FLAG_SPEED_SHIFT 3
@@ -629,8 +643,9 @@ typedef struct
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- vnet_hw_if_tx_frame_t frame;
- u32 n_threads;
+ vnet_hw_if_tx_frame_t *frame;
+ u32 *lookup_table;
+ u32 n_queues;
} vnet_hw_if_output_node_runtime_t;
/* Hardware-interface. This corresponds to a physical wire
@@ -642,7 +657,7 @@ typedef struct vnet_hw_interface_t
vnet_hw_interface_flags_t flags;
/* capabilities flags */
- vnet_hw_interface_capabilities_t caps;
+ vnet_hw_if_caps_t caps;
/* Hardware address as vector. Zero (e.g. zero-length vector) if no
address for this class (e.g. PPP). */
@@ -684,20 +699,20 @@ typedef struct vnet_hw_interface_t
used by node function vnet_per_buffer_interface_output() */
u32 output_node_next_index;
+ /* called when hw interface is using transmit side packet steering */
+ vnet_hash_fn_t hf;
+
/* Maximum transmit rate for this interface in bits/sec. */
f64 max_rate_bits_per_sec;
- /* Smallest packet size supported by this interface. */
- u32 min_supported_packet_bytes;
-
- /* Largest packet size supported by this interface. */
- u32 max_supported_packet_bytes;
-
/* Smallest packet size for this interface. */
- u32 min_packet_bytes;
+ u32 min_frame_size;
- /* Largest packet size for this interface. */
- u32 max_packet_bytes;
+ /* Largest frame size for this interface. */
+ u32 max_frame_size;
+
+ /* Layer 2 overhead */
+ u16 frame_overhead;
/* Hash table mapping sub interface id to sw_if_index. */
uword *sub_interface_sw_if_index_by_id;
@@ -717,9 +732,6 @@ typedef struct vnet_hw_interface_t
/* Input node */
u32 input_node_index;
- /* input node cpu index by queue */
- u32 *input_node_thread_index_by_queue;
-
vnet_hw_if_rx_mode default_rx_mode;
/* rx queues */
@@ -751,7 +763,8 @@ typedef struct
typedef struct
{
- vnet_hw_if_rxq_poll_vector_t *rxq_poll_vector;
+ vnet_hw_if_rxq_poll_vector_t *rxq_vector_int;
+ vnet_hw_if_rxq_poll_vector_t *rxq_vector_poll;
void *rxq_interrupts;
} vnet_hw_if_rx_node_runtime_t;
diff --git a/src/vnet/interface/caps.c b/src/vnet/interface/caps.c
new file mode 100644
index 00000000000..54e8d90c471
--- /dev/null
+++ b/src/vnet/interface/caps.c
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/interface.h>
+
+VLIB_REGISTER_LOG_CLASS (if_caps_log, static) = {
+ .class_name = "interface",
+ .subclass_name = "caps",
+};
+
+#define log_debug(fmt, ...) \
+ vlib_log_debug (if_caps_log.class, fmt, __VA_ARGS__)
+
+format_function_t format_vnet_hw_if_caps;
+
+void
+vnet_hw_if_change_caps (vnet_main_t *vnm, u32 hw_if_index,
+ vnet_hw_if_caps_change_t *caps)
+{
+ vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
+ vnet_hw_if_caps_t old = hi->caps;
+
+ hi->caps = (hi->caps & ~caps->mask) | caps->val;
+
+ log_debug ("change: interface %U, set: %U, cleared: %U",
+ format_vnet_hw_if_index_name, vnm, hw_if_index,
+ format_vnet_hw_if_caps, (old ^ hi->caps) & caps->val,
+ format_vnet_hw_if_caps, (old ^ hi->caps) & ~caps->val);
+}
+
+u8 *
+format_vnet_hw_if_caps (u8 *s, va_list *va)
+{
+ vnet_hw_if_caps_t caps = va_arg (*va, vnet_hw_if_caps_t);
+
+ const char *strings[sizeof (vnet_hw_if_caps_t) * 8] = {
+#define _(bit, sfx, str) [bit] = (str),
+ foreach_vnet_hw_if_caps
+#undef _
+ };
+
+ if (caps == 0)
+ return format (s, "none");
+
+ while (caps)
+ {
+ int bit = get_lowest_set_bit_index (caps);
+
+ if (strings[bit])
+ s = format (s, "%s", strings[bit]);
+ else
+ s = format (s, "unknown-%u", bit);
+
+ caps = clear_lowest_set_bit (caps);
+ if (caps)
+ vec_add1 (s, ' ');
+ }
+
+ return s;
+}
diff --git a/src/vnet/interface/monitor.c b/src/vnet/interface/monitor.c
new file mode 100644
index 00000000000..3ae1fd29156
--- /dev/null
+++ b/src/vnet/interface/monitor.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vlib/vlib.h>
+
+static clib_error_t *
+monitor_interface_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ const vnet_main_t *vnm = vnet_get_main ();
+ const vlib_combined_counter_main_t *counters =
+ vnm->interface_main.combined_sw_if_counters;
+ f64 refresh_interval = 1.0;
+ u32 refresh_count = ~0;
+ clib_error_t *error = 0;
+ vlib_counter_t vrx[2], vtx[2];
+ f64 ts[2];
+ u32 hw_if_index = ~0;
+ u8 spin = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_hw_interface, vnm,
+ &hw_if_index))
+ ;
+ else if (unformat (input, "interval %f", &refresh_interval))
+ ;
+ else if (unformat (input, "count %u", &refresh_count))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (hw_if_index == ~0)
+ {
+ error = clib_error_return (0, "no interface passed");
+ goto done;
+ }
+
+ vlib_get_combined_counter (counters + VNET_INTERFACE_COUNTER_RX, hw_if_index,
+ &vrx[spin]);
+ vlib_get_combined_counter (counters + VNET_INTERFACE_COUNTER_TX, hw_if_index,
+ &vtx[spin]);
+ ts[spin] = vlib_time_now (vm);
+
+ while (refresh_count--)
+ {
+ f64 sleep_interval, tsd;
+
+ while (((sleep_interval =
+ ts[spin] + refresh_interval - vlib_time_now (vm)) > 0.0))
+ {
+ uword event_type, *event_data = 0;
+ vlib_process_wait_for_event_or_clock (vm, sleep_interval);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case ~0: /* no events => timeout */
+ break;
+ default:
+ /* someone pressed a key, abort */
+ vlib_cli_output (vm, "Aborted due to a keypress.");
+ goto done;
+ }
+ vec_free (event_data);
+ }
+ spin ^= 1;
+ vlib_get_combined_counter (counters + VNET_INTERFACE_COUNTER_RX,
+ hw_if_index, &vrx[spin]);
+ vlib_get_combined_counter (counters + VNET_INTERFACE_COUNTER_TX,
+ hw_if_index, &vtx[spin]);
+ ts[spin] = vlib_time_now (vm);
+
+ tsd = ts[spin] - ts[spin ^ 1];
+ vlib_cli_output (
+ vm, "rx: %Upps %Ubps tx: %Upps %Ubps", format_base10,
+ (u64) ((vrx[spin].packets - vrx[spin ^ 1].packets) / tsd),
+ format_base10,
+ (u64) (8 * (vrx[spin].bytes - vrx[spin ^ 1].bytes) / tsd),
+ format_base10,
+ (u64) ((vtx[spin].packets - vtx[spin ^ 1].packets) / tsd),
+ format_base10,
+ (u64) (8 * (vtx[spin].bytes - vtx[spin ^ 1].bytes) / tsd));
+ }
+
+done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (monitor_interface_command, static) = {
+ .path = "monitor interface",
+ .short_help =
+ "monitor interface <interface> [interval <intv>] [count <count>]",
+ .function = monitor_interface_command_fn,
+ .is_mp_safe = 1,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/interface/runtime.c b/src/vnet/interface/runtime.c
index 462f7bbfba7..a88a23bd4c9 100644
--- a/src/vnet/interface/runtime.c
+++ b/src/vnet/interface/runtime.c
@@ -62,7 +62,7 @@ vnet_hw_if_update_runtime_data (vnet_main_t *vnm, u32 hw_if_index)
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
u32 node_index = hi->input_node_index;
vnet_hw_if_rx_queue_t *rxq;
- vnet_hw_if_rxq_poll_vector_t *pv, **d = 0;
+ vnet_hw_if_rxq_poll_vector_t *pv, **d = 0, **a = 0;
vnet_hw_if_output_node_runtime_t *new_out_runtimes = 0;
vlib_node_state_t *per_thread_node_state = 0;
u32 n_threads = vlib_get_n_threads ();
@@ -76,6 +76,7 @@ vnet_hw_if_update_runtime_data (vnet_main_t *vnm, u32 hw_if_index)
format_vlib_node_name, vm, node_index, hi->name);
vec_validate (d, n_threads - 1);
+ vec_validate (a, n_threads - 1);
vec_validate_init_empty (per_thread_node_state, n_threads - 1,
VLIB_NODE_STATE_DISABLED);
vec_validate_init_empty (per_thread_node_adaptive, n_threads - 1, 0);
@@ -126,6 +127,13 @@ vnet_hw_if_update_runtime_data (vnet_main_t *vnm, u32 hw_if_index)
rxq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE)
last_int = clib_max (last_int, rxq - im->hw_if_rx_queues);
+ if (per_thread_node_adaptive[ti])
+ {
+ vec_add2_aligned (a[ti], pv, 1, CLIB_CACHE_LINE_BYTES);
+ pv->dev_instance = rxq->dev_instance;
+ pv->queue_id = rxq->queue_id;
+ }
+
if (per_thread_node_state[ti] != VLIB_NODE_STATE_POLLING)
continue;
@@ -157,49 +165,92 @@ vnet_hw_if_update_runtime_data (vnet_main_t *vnm, u32 hw_if_index)
{
vnet_hw_if_rx_node_runtime_t *rt;
rt = vlib_node_get_runtime_data (ovm, node_index);
- if (vec_len (rt->rxq_poll_vector) != vec_len (d[i]))
+ if (vec_len (rt->rxq_vector_int) != vec_len (d[i]))
something_changed_on_rx = 1;
- else if (memcmp (d[i], rt->rxq_poll_vector,
+ else if (memcmp (d[i], rt->rxq_vector_int,
vec_len (d[i]) * sizeof (**d)))
something_changed_on_rx = 1;
if (clib_interrupt_get_n_int (rt->rxq_interrupts) != last_int + 1)
something_changed_on_rx = 1;
+
+ if (something_changed_on_rx == 0 && per_thread_node_adaptive[i])
+ {
+ if (vec_len (rt->rxq_vector_poll) != vec_len (a[i]))
+ something_changed_on_rx = 1;
+ else if (memcmp (a[i], rt->rxq_vector_poll,
+ vec_len (a[i]) * sizeof (**a)))
+ something_changed_on_rx = 1;
+ }
}
}
- new_out_runtimes =
- vec_dup_aligned (hi->output_node_thread_runtimes, CLIB_CACHE_LINE_BYTES);
- vec_validate_aligned (new_out_runtimes, n_threads - 1,
- CLIB_CACHE_LINE_BYTES);
-
- if (vec_len (hi->output_node_thread_runtimes) != vec_len (new_out_runtimes))
- something_changed_on_tx = 1;
-
- for (int i = 0; i < vec_len (hi->tx_queue_indices); i++)
+ if (vec_len (hi->tx_queue_indices) > 0)
{
- u32 thread_index;
- u32 queue_index = hi->tx_queue_indices[i];
- vnet_hw_if_tx_queue_t *txq = vnet_hw_if_get_tx_queue (vnm, queue_index);
- uword n_threads = clib_bitmap_count_set_bits (txq->threads);
+ new_out_runtimes = vec_dup_aligned (hi->output_node_thread_runtimes,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (new_out_runtimes, n_threads - 1,
+ CLIB_CACHE_LINE_BYTES);
- clib_bitmap_foreach (thread_index, txq->threads)
+ for (u32 i = 0; i < vec_len (new_out_runtimes); i++)
{
vnet_hw_if_output_node_runtime_t *rt;
- rt = vec_elt_at_index (new_out_runtimes, thread_index);
- if ((rt->frame.queue_id != txq->queue_id) ||
- (rt->n_threads != n_threads))
+ rt = vec_elt_at_index (new_out_runtimes, i);
+ u32 n_queues = 0, total_queues = vec_len (hi->tx_queue_indices);
+ rt->frame = 0;
+ rt->lookup_table = 0;
+
+ for (u32 j = 0; j < total_queues; j++)
{
+ u32 queue_index = hi->tx_queue_indices[j];
+ vnet_hw_if_tx_frame_t frame = { .shared_queue = 0,
+ .hints = 7,
+ .queue_id = ~0 };
+ vnet_hw_if_tx_queue_t *txq =
+ vnet_hw_if_get_tx_queue (vnm, queue_index);
+ if (!clib_bitmap_get (txq->threads, i))
+ continue;
+
log_debug ("tx queue data changed for interface %v, thread %u "
- "(queue_id %u -> %u, n_threads %u -> %u)",
- hi->name, thread_index, rt->frame.queue_id,
- txq->queue_id, rt->n_threads, n_threads);
+ "(queue_id %u)",
+ hi->name, i, txq->queue_id);
+ something_changed_on_tx = 1;
+
+ frame.queue_id = txq->queue_id;
+ frame.shared_queue = txq->shared_queue;
+ vec_add1 (rt->frame, frame);
+ n_queues++;
+ }
+
+ // don't initialize rt->n_queues above
+ if (rt->n_queues != n_queues)
+ {
something_changed_on_tx = 1;
- rt->frame.queue_id = txq->queue_id;
- rt->frame.shared_queue = txq->shared_queue;
- rt->n_threads = n_threads;
+ rt->n_queues = n_queues;
+ }
+ /*
+ * It is only used in case of multiple txq.
+ */
+ if (rt->n_queues > 0)
+ {
+ if (!is_pow2 (n_queues))
+ n_queues = max_pow2 (n_queues);
+
+ vec_validate_aligned (rt->lookup_table, n_queues - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ for (u32 k = 0; k < vec_len (rt->lookup_table); k++)
+ {
+ rt->lookup_table[k] = rt->frame[k % rt->n_queues].queue_id;
+ log_debug ("tx queue lookup table changed for interface %v, "
+ "(lookup table [%u]=%u)",
+ hi->name, k, rt->lookup_table[k]);
+ }
}
}
}
+ else
+ /* interface deleted */
+ something_changed_on_tx = 1;
if (something_changed_on_rx || something_changed_on_tx)
{
@@ -223,18 +274,24 @@ vnet_hw_if_update_runtime_data (vnet_main_t *vnm, u32 hw_if_index)
vlib_main_t *vm = vlib_get_main_by_index (i);
vnet_hw_if_rx_node_runtime_t *rt;
rt = vlib_node_get_runtime_data (vm, node_index);
- pv = rt->rxq_poll_vector;
- rt->rxq_poll_vector = d[i];
+ pv = rt->rxq_vector_int;
+ rt->rxq_vector_int = d[i];
d[i] = pv;
+ if (per_thread_node_adaptive[i])
+ {
+ pv = rt->rxq_vector_poll;
+ rt->rxq_vector_poll = a[i];
+ a[i] = pv;
+ }
+
if (rt->rxq_interrupts)
{
void *in = rt->rxq_interrupts;
int int_num = -1;
- while ((int_num = clib_interrupt_get_next (in, int_num)) !=
- -1)
+ while ((int_num = clib_interrupt_get_next_and_clear (
+ in, int_num)) != -1)
{
- clib_interrupt_clear (in, int_num);
pending_int = clib_bitmap_set (pending_int, int_num, 1);
last_int = clib_max (last_int, int_num);
}
@@ -276,9 +333,18 @@ vnet_hw_if_update_runtime_data (vnet_main_t *vnm, u32 hw_if_index)
}
for (int i = 0; i < n_threads; i++)
- vec_free (d[i]);
+ {
+ vec_free (d[i]);
+ vec_free (a[i]);
+ if (new_out_runtimes)
+ {
+ vec_free (new_out_runtimes[i].frame);
+ vec_free (new_out_runtimes[i].lookup_table);
+ }
+ }
vec_free (d);
+ vec_free (a);
vec_free (per_thread_node_state);
vec_free (per_thread_node_adaptive);
vec_free (new_out_runtimes);
diff --git a/src/vnet/interface/rx_queue.c b/src/vnet/interface/rx_queue.c
index 1099a0ba0f9..b1fc82f38e9 100644
--- a/src/vnet/interface/rx_queue.c
+++ b/src/vnet/interface/rx_queue.c
@@ -124,7 +124,10 @@ vnet_hw_if_unregister_all_rx_queues (vnet_main_t *vnm, u32 hw_if_index)
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
vnet_interface_main_t *im = &vnm->interface_main;
vnet_hw_if_rx_queue_t *rxq;
+ vlib_main_t *vm;
+ vnet_hw_if_rx_node_runtime_t *rt;
u64 key;
+ u32 queue_index;
log_debug ("unregister_all: interface %v", hi->name);
@@ -132,6 +135,15 @@ vnet_hw_if_unregister_all_rx_queues (vnet_main_t *vnm, u32 hw_if_index)
{
rxq = vnet_hw_if_get_rx_queue (vnm, hi->rx_queue_indices[i]);
key = rx_queue_key (rxq->hw_if_index, rxq->queue_id);
+ if (PREDICT_FALSE (rxq->mode == VNET_HW_IF_RX_MODE_INTERRUPT ||
+ rxq->mode == VNET_HW_IF_RX_MODE_ADAPTIVE))
+ {
+ vm = vlib_get_main_by_index (rxq->thread_index);
+ queue_index = vnet_hw_if_get_rx_queue_index_by_id (vnm, hw_if_index,
+ rxq->queue_id);
+ rt = vlib_node_get_runtime_data (vm, hi->input_node_index);
+ clib_interrupt_clear (rt->rxq_interrupts, queue_index);
+ }
hash_unset_mem_free (&im->rxq_index_by_hw_if_index_and_queue_id, &key);
pool_put_index (im->hw_if_rx_queues, hi->rx_queue_indices[i]);
@@ -228,7 +240,7 @@ vnet_hw_if_set_rx_queue_thread_index (vnet_main_t *vnm, u32 queue_index,
hi->name, rxq->queue_id, thread_index);
}
-void
+vnet_hw_if_rxq_poll_vector_t *
vnet_hw_if_generate_rxq_int_poll_vector (vlib_main_t *vm,
vlib_node_runtime_t *node)
{
@@ -238,20 +250,19 @@ vnet_hw_if_generate_rxq_int_poll_vector (vlib_main_t *vm,
ASSERT (node->state == VLIB_NODE_STATE_INTERRUPT);
- vec_reset_length (rt->rxq_poll_vector);
+ vec_reset_length (rt->rxq_vector_int);
- while ((int_num = clib_interrupt_get_next (rt->rxq_interrupts, int_num)) !=
- -1)
+ while ((int_num = clib_interrupt_get_next_and_clear (rt->rxq_interrupts,
+ int_num)) != -1)
{
vnet_hw_if_rx_queue_t *rxq = vnet_hw_if_get_rx_queue (vnm, int_num);
vnet_hw_if_rxq_poll_vector_t *pv;
- clib_interrupt_clear (rt->rxq_interrupts, int_num);
-
- vec_add2 (rt->rxq_poll_vector, pv, 1);
+ vec_add2 (rt->rxq_vector_int, pv, 1);
pv->dev_instance = rxq->dev_instance;
pv->queue_id = rxq->queue_id;
}
+ return rt->rxq_vector_int;
}
/*
diff --git a/src/vnet/interface/rx_queue_funcs.h b/src/vnet/interface/rx_queue_funcs.h
index 26dc1b8777f..906d7118296 100644
--- a/src/vnet/interface/rx_queue_funcs.h
+++ b/src/vnet/interface/rx_queue_funcs.h
@@ -33,8 +33,9 @@ vnet_hw_if_rx_mode vnet_hw_if_get_rx_queue_mode (vnet_main_t *vnm,
u32 queue_index);
void vnet_hw_if_set_rx_queue_thread_index (vnet_main_t *vnm, u32 queue_index,
u32 thread_index);
-void vnet_hw_if_generate_rxq_int_poll_vector (vlib_main_t *vm,
- vlib_node_runtime_t *node);
+vnet_hw_if_rxq_poll_vector_t *
+vnet_hw_if_generate_rxq_int_poll_vector (vlib_main_t *vm,
+ vlib_node_runtime_t *node);
/* inline functions */
@@ -69,11 +70,14 @@ static_always_inline vnet_hw_if_rxq_poll_vector_t *
vnet_hw_if_get_rxq_poll_vector (vlib_main_t *vm, vlib_node_runtime_t *node)
{
vnet_hw_if_rx_node_runtime_t *rt = (void *) node->runtime_data;
+ vnet_hw_if_rxq_poll_vector_t *pv = rt->rxq_vector_int;
if (PREDICT_FALSE (node->state == VLIB_NODE_STATE_INTERRUPT))
- vnet_hw_if_generate_rxq_int_poll_vector (vm, node);
+ pv = vnet_hw_if_generate_rxq_int_poll_vector (vm, node);
+ else if (node->flags & VLIB_NODE_FLAG_ADAPTIVE_MODE)
+ pv = rt->rxq_vector_poll;
- return rt->rxq_poll_vector;
+ return pv;
}
static_always_inline u8
diff --git a/src/vnet/interface/stats.c b/src/vnet/interface/stats.c
new file mode 100644
index 00000000000..4f3213aafc3
--- /dev/null
+++ b/src/vnet/interface/stats.c
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/stats/stats.h>
+#include <vnet/vnet.h>
+#include <vnet/devices/devices.h> /* vnet_get_aggregate_rx_packets */
+#include <vnet/interface.h>
+
+vlib_stats_string_vector_t if_names = 0;
+static u32 **dir_entry_indices = 0;
+
+static struct
+{
+ char *prefix, *name;
+ u32 index;
+} if_counters[] = {
+#define _(e, n, p) { .prefix = #p, .name = #n },
+ foreach_simple_interface_counter_name foreach_combined_interface_counter_name
+#undef _
+};
+
+static clib_error_t *
+statseg_sw_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add)
+{
+ u8 *name = 0;
+
+ if (if_names == 0)
+ {
+ if_names = vlib_stats_add_string_vector ("/if/names");
+
+ for (int i = 0; i < ARRAY_LEN (if_counters); i++)
+ if_counters[i].index = vlib_stats_find_entry_index (
+ "/%s/%s", if_counters[i].prefix, if_counters[i].name);
+ }
+
+ vec_validate (dir_entry_indices, sw_if_index);
+
+ vlib_stats_segment_lock ();
+
+ if (is_add)
+ {
+ vnet_sw_interface_t *si, *si_sup;
+ vnet_hw_interface_t *hi_sup;
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+ si_sup = vnet_get_sup_sw_interface (vnm, si->sw_if_index);
+ ASSERT (si_sup->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
+ hi_sup = vnet_get_hw_interface (vnm, si_sup->hw_if_index);
+
+ name = format (0, "%v", hi_sup->name);
+ if (si->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
+ name = format (name, ".%d", si->sub.id);
+
+ vlib_stats_set_string_vector (&if_names, sw_if_index, "%v", name);
+
+ for (u32 index, i = 0; i < ARRAY_LEN (if_counters); i++)
+ {
+ index = vlib_stats_add_symlink (
+ if_counters[i].index, sw_if_index, "/interfaces/%U/%s",
+ format_vlib_stats_symlink, name, if_counters[i].name);
+ ASSERT (index != ~0);
+ vec_add1 (dir_entry_indices[sw_if_index], index);
+ }
+ }
+ else
+ {
+ name = format (0, "%s", "deleted");
+ vlib_stats_set_string_vector (&if_names, sw_if_index, "%v", name);
+ for (u32 i = 0; i < vec_len (dir_entry_indices[sw_if_index]); i++)
+ vlib_stats_remove_entry (dir_entry_indices[sw_if_index][i]);
+ vec_free (dir_entry_indices[sw_if_index]);
+ }
+
+ vec_free (name);
+
+ vlib_stats_segment_unlock ();
+
+ return 0;
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (statseg_sw_interface_add_del);
diff --git a/src/vnet/interface/tx_queue.rst b/src/vnet/interface/tx_queue.rst
new file mode 100644
index 00000000000..e8f0e039b8e
--- /dev/null
+++ b/src/vnet/interface/tx_queue.rst
@@ -0,0 +1,159 @@
+.. _TX_Queue_doc:
+
+Transmit Queues
+===============
+
+Overview
+________
+
+VPP implements Transmit queues infra to access and manage them. It provides
+common registration functions to register or unregister interfaces’ transmit
+queues. It also provides functions for queues placement on given thread(s).
+
+The TXQ Infrastructure
+_______________________
+
+Infra registers each queue using a unique key which is formed by concatenating
+the hardware interface index ``hw_if_index`` and unique queue identifier for
+given interface ``queue_id``. As a result of registration of queue, infra
+returns back a unique global ``queue_index`` which can be used by driver to
+access that queue later.
+
+Interface output node uses pre-computed ``output_node_thread_runtime`` data
+which provides essential information related to queue placements on given
+thread of given interface. Transmit queue infra implements an algorithm to
+pre-compute this information. It also pre-computes scalar arguments of frame
+``vnet_hw_if_tx_frame_t``. It also pre-calculates a ``lookup_table`` for
+thread if there are multiple transmit queues are placed on that thread.
+Interface drivers call ``vnet_hw_if_update_runtime_data()`` to execute that
+algorithm after registering the transmit queues to TXQ infra.
+
+The algorithm makes the copy of existing runtime data and iterate through them
+for each vpp main and worker thread. In each iteration, algorithm loop through
+all the tx queues of given interface to fill the information in the frame data
+structure ``vnet_hw_if_tx_frame_t``. Algorithm also updates the information
+related to number of transmit queues of given interface on given vpp thread in
+data structure ``output_node_thread_runtime``. As a consequence of any update
+to the copy, triggers the function to update the actual working copy by taking
+the worker barrier and free the old copy of ``output_node_thread_runtime``.
+
+Multi-TXQ infra
+^^^^^^^^^^^^^^^
+
+Interface output node uses packet flow hash using hash infra in case of multi-txq
+on given thread. Each hardware interface class contains type of the hash required
+for interfaces from that hardware interface class i.e. ethernet interface hardware
+class contains type ``VNET_HASH_FN_TYPE_ETHERNET``. Though, the hash function
+itself is contained by hardware interface data structure of given interface. Default
+hashing function is selected upon interface creation based on priority. User can
+configure a different hash to an interface for multi-txq use case.
+
+Interface output node uses packet flow hash as an index to the pre-calculated lookup
+table to get the queue identifier for given transmit queue. Interface output node
+enqueues the packets to respective frame and also copies the ``vnet_hw_if_tx_frame_t``
+to frame scalar arguments. Drivers use scalar arguments ``vnet_hw_if_tx_frame_t``
+of the given frame to extract the information about the transmit queue to be used to
+transmit the packets. Drivers may need to acquire a lock on given queue before
+transmitting the packets based on the ``shared_queue`` bit status.
+
+Data structures
+^^^^^^^^^^^^^^^
+
+Queue information is stored in data structure ``vnet_hw_if_tx_queue_t``:
+
+.. code:: c
+
+ typedef struct
+ {
+ /* either this queue is shared among multiple threads */
+ u8 shared_queue : 1;
+ /* hw interface index */
+ u32 hw_if_index;
+
+ /* hardware queue identifier */
+ u32 queue_id;
+
+ /* bitmap of threads which use this queue */
+ clib_bitmap_t *threads;
+ } vnet_hw_if_tx_queue_t;
+
+
+Frame information is stored in data structure: ``vnet_hw_if_tx_frame_t``:
+
+.. code:: c
+
+ typedef enum
+ {
+ VNET_HW_IF_TX_FRAME_HINT_NOT_CHAINED = (1 << 0),
+ VNET_HW_IF_TX_FRAME_HINT_NO_GSO = (1 << 1),
+ VNET_HW_IF_TX_FRAME_HINT_NO_CKSUM_OFFLOAD = (1 << 2),
+ } vnet_hw_if_tx_frame_hint_t;
+
+ typedef struct
+ {
+ u8 shared_queue : 1;
+ vnet_hw_if_tx_frame_hint_t hints : 16;
+ u32 queue_id;
+ } vnet_hw_if_tx_frame_t;
+
+Output node runtime information is stored in data structure: ``output_node_thread_runtime``:
+
+.. code:: c
+
+ typedef struct
+ {
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ vnet_hw_if_tx_frame_t *frame;
+ u32 *lookup_table;
+ u32 n_queues;
+ } vnet_hw_if_output_node_runtime_t;
+
+
+MultiTXQ API
+^^^^^^^^^^^^
+
+This API message is used to place tx queue of an interface to vpp main or worker(s) thread(s).
+
+.. code:: c
+
+ autoendian autoreply define sw_interface_set_tx_placement
+ {
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u32 queue_id;
+ u32 array_size;
+ u32 threads[array_size];
+ option vat_help = "<interface | sw_if_index <index>> queue <n> [threads <list> | mask <hex>]";
+ };
+
+Multi-TXQ CLI
+^^^^^^^^^^^^^
+
+::
+
+ set interface tx-queue set interface tx-queue <interface> queue <n> [threads <list>]
+ set interface tx-hash set interface tx-hash <interface> hash-name <hash-name>
+
+::
+
+ show hardware-interfaces
+
+ Name Idx Link Hardware
+ tap0 1 up tap0
+ Link speed: unknown
+ RX Queues:
+ queue thread mode
+ 0 main (0) polling
+ TX Queues:
+ TX Hash: [name: crc32c-5tuple priority: 50 description: IPv4/IPv6 header and TCP/UDP ports]
+ queue shared thread(s)
+ 0 no 0
+ Ethernet address 02:fe:27:69:5a:b5
+ VIRTIO interface
+ instance 0
+ RX QUEUE : Total Packets
+ 0 : 0
+ TX QUEUE : Total Packets
+ 0 : 0
+
diff --git a/src/vnet/interface/tx_queue_funcs.h b/src/vnet/interface/tx_queue_funcs.h
index 22956a4eb9b..8fcf7c336a8 100644
--- a/src/vnet/interface/tx_queue_funcs.h
+++ b/src/vnet/interface/tx_queue_funcs.h
@@ -27,3 +27,20 @@ vnet_hw_if_get_tx_queue (vnet_main_t *vnm, u32 queue_index)
return 0;
return pool_elt_at_index (im->hw_if_tx_queues, queue_index);
}
+
+static_always_inline int
+vnet_hw_if_txq_cmp_cli_api (vnet_hw_if_tx_queue_t **a,
+ vnet_hw_if_tx_queue_t **b)
+{
+ if (*a == *b)
+ return 0;
+
+ if (a[0]->hw_if_index != b[0]->hw_if_index)
+ return 2 * (a[0]->hw_if_index > b[0]->hw_if_index) - 1;
+
+ if (a[0]->queue_id != b[0]->queue_id)
+ return 2 * (a[0]->queue_id > b[0]->queue_id) - 1;
+
+ ASSERT (0);
+ return ~0;
+}
diff --git a/src/vnet/interface_api.c b/src/vnet/interface_api.c
index a1450bd1906..c727e519138 100644
--- a/src/vnet/interface_api.c
+++ b/src/vnet/interface_api.c
@@ -17,11 +17,15 @@
*------------------------------------------------------------------
*/
+#define _GNU_SOURCE
+#include <string.h>
+
#include <vnet/vnet.h>
#include <vlibmemory/api.h>
#include <vnet/interface.h>
#include <vnet/interface/rx_queue_funcs.h>
+#include <vnet/interface/tx_queue_funcs.h>
#include <vnet/api_errno.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/ip/ip.h>
@@ -56,7 +60,9 @@ vpe_api_main_t vpe_api_main;
_ (SW_INTERFACE_ADD_DEL_ADDRESS, sw_interface_add_del_address) \
_ (SW_INTERFACE_SET_RX_MODE, sw_interface_set_rx_mode) \
_ (SW_INTERFACE_RX_PLACEMENT_DUMP, sw_interface_rx_placement_dump) \
+ _ (SW_INTERFACE_TX_PLACEMENT_GET, sw_interface_tx_placement_get) \
_ (SW_INTERFACE_SET_RX_PLACEMENT, sw_interface_set_rx_placement) \
+ _ (SW_INTERFACE_SET_TX_PLACEMENT, sw_interface_set_tx_placement) \
_ (SW_INTERFACE_SET_TABLE, sw_interface_set_table) \
_ (SW_INTERFACE_GET_TABLE, sw_interface_get_table) \
_ (SW_INTERFACE_SET_UNNUMBERED, sw_interface_set_unnumbered) \
@@ -143,6 +149,7 @@ vl_api_hw_interface_set_mtu_t_handler (vl_api_hw_interface_set_mtu_t * mp)
u32 sw_if_index = ntohl (mp->sw_if_index);
u16 mtu = ntohs (mp->mtu);
ethernet_main_t *em = &ethernet_main;
+ clib_error_t *err;
int rv = 0;
VALIDATE_SW_IF_INDEX (mp);
@@ -154,7 +161,6 @@ vl_api_hw_interface_set_mtu_t_handler (vl_api_hw_interface_set_mtu_t * mp)
goto bad_sw_if_index;
}
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, si->hw_if_index);
ethernet_interface_t *eif = ethernet_get_interface (em, si->hw_if_index);
if (!eif)
@@ -163,20 +169,13 @@ vl_api_hw_interface_set_mtu_t_handler (vl_api_hw_interface_set_mtu_t * mp)
goto bad_sw_if_index;
}
- if (mtu < hi->min_supported_packet_bytes)
- {
- rv = VNET_API_ERROR_INVALID_VALUE;
- goto bad_sw_if_index;
- }
-
- if (mtu > hi->max_supported_packet_bytes)
+ if ((err = vnet_hw_interface_set_mtu (vnm, si->hw_if_index, mtu)))
{
- rv = VNET_API_ERROR_INVALID_VALUE;
+ rv = vnet_api_error (err);
+ clib_error_free (err);
goto bad_sw_if_index;
}
- vnet_hw_interface_set_mtu (vnm, si->hw_if_index, mtu);
-
BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_HW_INTERFACE_SET_MTU_REPLY);
}
@@ -262,7 +261,7 @@ send_sw_interface_details (vpe_api_main_t * am,
mp->link_duplex = ntohl (((hi->flags & VNET_HW_INTERFACE_FLAG_DUPLEX_MASK) >>
VNET_HW_INTERFACE_FLAG_DUPLEX_SHIFT));
mp->link_speed = ntohl (hi->link_speed);
- mp->link_mtu = ntohs (hi->max_packet_bytes);
+ mp->link_mtu = ntohs (hi->max_frame_size - hi->frame_overhead);
mp->mtu[VNET_MTU_L3] = ntohl (swif->mtu[VNET_MTU_L3]);
mp->mtu[VNET_MTU_IP4] = ntohl (swif->mtu[VNET_MTU_IP4]);
mp->mtu[VNET_MTU_IP6] = ntohl (swif->mtu[VNET_MTU_IP6]);
@@ -388,8 +387,6 @@ vl_api_sw_interface_dump_t_handler (vl_api_sw_interface_dump_t * mp)
vec_add1 (filter, 0); /* Ensure it's a C string for strcasecmp() */
}
- char *strcasestr (char *, char *); /* lnx hdr file botch */
- /* *INDENT-OFF* */
pool_foreach (swif, im->sw_interfaces)
{
if (!vnet_swif_is_api_visible (swif))
@@ -403,7 +400,6 @@ vl_api_sw_interface_dump_t_handler (vl_api_sw_interface_dump_t * mp)
send_sw_interface_details (am, rp, swif, name, mp->context);
}
- /* *INDENT-ON* */
vec_free (name);
vec_free (filter);
@@ -461,67 +457,26 @@ vl_api_sw_interface_set_table_t_handler (vl_api_sw_interface_set_table_t * mp)
VALIDATE_SW_IF_INDEX (mp);
if (mp->is_ipv6)
- rv = ip_table_bind (FIB_PROTOCOL_IP6, sw_if_index, table_id, 1);
+ rv = ip_table_bind (FIB_PROTOCOL_IP6, sw_if_index, table_id);
else
- rv = ip_table_bind (FIB_PROTOCOL_IP4, sw_if_index, table_id, 1);
+ rv = ip_table_bind (FIB_PROTOCOL_IP4, sw_if_index, table_id);
BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_SW_INTERFACE_SET_TABLE_REPLY);
}
-int
-ip_table_bind (fib_protocol_t fproto,
- u32 sw_if_index, u32 table_id, u8 is_api)
+void
+fib_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 fib_index)
{
- CLIB_UNUSED (ip_interface_address_t * ia);
- u32 fib_index, mfib_index;
- fib_source_t src;
- mfib_source_t msrc;
-
- if (is_api)
- {
- src = FIB_SOURCE_API;
- msrc = MFIB_SOURCE_API;
- }
- else
- {
- src = FIB_SOURCE_CLI;
- msrc = MFIB_SOURCE_CLI;
- }
+ u32 table_id;
- /*
- * This if table does not exist = error is what we want in the end.
- */
- fib_index = fib_table_find (fproto, table_id);
- mfib_index = mfib_table_find (fproto, table_id);
-
- if (~0 == fib_index || ~0 == mfib_index)
- {
- return (VNET_API_ERROR_NO_SUCH_FIB);
- }
+ table_id = fib_table_get_table_id (fib_index, fproto);
+ ASSERT (table_id != ~0);
if (FIB_PROTOCOL_IP6 == fproto)
{
/*
- * If the interface already has in IP address, then a change int
- * VRF is not allowed. The IP address applied must first be removed.
- * We do not do that automatically here, since VPP has no knowledge
- * of whether those subnets are valid in the destination VRF.
- */
- /* *INDENT-OFF* */
- foreach_ip_interface_address (&ip6_main.lookup_main,
- ia, sw_if_index,
- 1 /* honor unnumbered */ ,
- ({
- return (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE);
- }));
- /* *INDENT-ON* */
-
- vec_validate (ip6_main.fib_index_by_sw_if_index, sw_if_index);
- vec_validate (ip6_main.mfib_index_by_sw_if_index, sw_if_index);
-
- /*
* tell those that are interested that the binding is changing.
*/
ip6_table_bind_callback_t *cb;
@@ -534,42 +489,19 @@ ip_table_bind (fib_protocol_t fproto,
/* unlock currently assigned tables */
if (0 != ip6_main.fib_index_by_sw_if_index[sw_if_index])
fib_table_unlock (ip6_main.fib_index_by_sw_if_index[sw_if_index],
- FIB_PROTOCOL_IP6, src);
- if (0 != ip6_main.mfib_index_by_sw_if_index[sw_if_index])
- mfib_table_unlock (ip6_main.mfib_index_by_sw_if_index[sw_if_index],
- FIB_PROTOCOL_IP6, msrc);
+ FIB_PROTOCOL_IP6, FIB_SOURCE_INTERFACE);
if (0 != table_id)
{
/* we need to lock the table now it's inuse */
- fib_table_lock (fib_index, FIB_PROTOCOL_IP6, src);
- mfib_table_lock (mfib_index, FIB_PROTOCOL_IP6, msrc);
+ fib_table_lock (fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_INTERFACE);
}
ip6_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
- ip6_main.mfib_index_by_sw_if_index[sw_if_index] = mfib_index;
}
else
{
/*
- * If the interface already has in IP address, then a change int
- * VRF is not allowed. The IP address applied must first be removed.
- * We do not do that automatically here, since VPP has no knowledge
- * of whether those subnets are valid in the destination VRF.
- */
- /* *INDENT-OFF* */
- foreach_ip_interface_address (&ip4_main.lookup_main,
- ia, sw_if_index,
- 1 /* honor unnumbered */ ,
- ({
- return (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE);
- }));
- /* *INDENT-ON* */
-
- vec_validate (ip4_main.fib_index_by_sw_if_index, sw_if_index);
- vec_validate (ip4_main.mfib_index_by_sw_if_index, sw_if_index);
-
- /*
* tell those that are interested that the binding is changing.
*/
ip4_table_bind_callback_t *cb;
@@ -582,24 +514,94 @@ ip_table_bind (fib_protocol_t fproto,
/* unlock currently assigned tables */
if (0 != ip4_main.fib_index_by_sw_if_index[sw_if_index])
fib_table_unlock (ip4_main.fib_index_by_sw_if_index[sw_if_index],
- FIB_PROTOCOL_IP4, src);
+ FIB_PROTOCOL_IP4, FIB_SOURCE_INTERFACE);
+
+ if (0 != table_id)
+ {
+ /* we need to lock the table now it's inuse */
+ fib_index = fib_table_find_or_create_and_lock (
+ FIB_PROTOCOL_IP4, table_id, FIB_SOURCE_INTERFACE);
+ }
+
+ ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
+ }
+}
+
+void
+mfib_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 mfib_index)
+{
+ u32 table_id;
+
+ table_id = mfib_table_get_table_id (mfib_index, fproto);
+ ASSERT (table_id != ~0);
+
+ if (FIB_PROTOCOL_IP6 == fproto)
+ {
+ if (0 != ip6_main.mfib_index_by_sw_if_index[sw_if_index])
+ mfib_table_unlock (ip6_main.mfib_index_by_sw_if_index[sw_if_index],
+ FIB_PROTOCOL_IP6, MFIB_SOURCE_INTERFACE);
+
+ if (0 != table_id)
+ {
+ /* we need to lock the table now it's inuse */
+ mfib_table_lock (mfib_index, FIB_PROTOCOL_IP6,
+ MFIB_SOURCE_INTERFACE);
+ }
+
+ ip6_main.mfib_index_by_sw_if_index[sw_if_index] = mfib_index;
+ }
+ else
+ {
if (0 != ip4_main.mfib_index_by_sw_if_index[sw_if_index])
mfib_table_unlock (ip4_main.mfib_index_by_sw_if_index[sw_if_index],
- FIB_PROTOCOL_IP4, msrc);
+ FIB_PROTOCOL_IP4, MFIB_SOURCE_INTERFACE);
if (0 != table_id)
{
/* we need to lock the table now it's inuse */
- fib_index = fib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
- table_id, src);
-
- mfib_index = mfib_table_find_or_create_and_lock (FIB_PROTOCOL_IP4,
- table_id, msrc);
+ mfib_index = mfib_table_find_or_create_and_lock (
+ FIB_PROTOCOL_IP4, table_id, MFIB_SOURCE_INTERFACE);
}
- ip4_main.fib_index_by_sw_if_index[sw_if_index] = fib_index;
ip4_main.mfib_index_by_sw_if_index[sw_if_index] = mfib_index;
}
+}
+
+int
+ip_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 table_id)
+{
+ CLIB_UNUSED (ip_interface_address_t * ia);
+ u32 fib_index, mfib_index;
+
+ /*
+ * This if table does not exist = error is what we want in the end.
+ */
+ fib_index = fib_table_find (fproto, table_id);
+ mfib_index = mfib_table_find (fproto, table_id);
+
+ if (~0 == fib_index || ~0 == mfib_index)
+ {
+ return (VNET_API_ERROR_NO_SUCH_FIB);
+ }
+
+ /*
+ * If the interface already has in IP address, then a change int
+ * VRF is not allowed. The IP address applied must first be removed.
+ * We do not do that automatically here, since VPP has no knowledge
+ * of whether those subnets are valid in the destination VRF.
+ */
+ /* clang-format off */
+ foreach_ip_interface_address (FIB_PROTOCOL_IP6 == fproto ?
+ &ip6_main.lookup_main : &ip4_main.lookup_main,
+ ia, sw_if_index,
+ 1 /* honor unnumbered */ ,
+ ({
+ return (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE);
+ }));
+ /* clang-format on */
+
+ fib_table_bind (fproto, sw_if_index, fib_index);
+ mfib_table_bind (fproto, sw_if_index, mfib_index);
return (0);
}
@@ -682,8 +684,8 @@ static void vl_api_sw_interface_set_unnumbered_t_handler
goto done;
}
- vnet_sw_interface_update_unnumbered (unnumbered_sw_if_index,
- sw_if_index, mp->is_add);
+ rv = vnet_sw_interface_update_unnumbered (unnumbered_sw_if_index,
+ sw_if_index, mp->is_add);
done:
REPLY_MACRO (VL_API_SW_INTERFACE_SET_UNNUMBERED_REPLY);
}
@@ -806,14 +808,12 @@ link_state_process (vlib_main_t * vm,
if (event_by_sw_if_index[i] == 0)
continue;
- /* *INDENT-OFF* */
pool_foreach (reg, vam->interface_events_registrations)
{
vl_reg = vl_api_client_index_to_registration (reg->client_index);
if (vl_reg)
send_sw_interface_event (vam, reg, vl_reg, i, event_by_sw_if_index[i]);
}
- /* *INDENT-ON* */
}
vec_reset_length (event_by_sw_if_index);
}
@@ -829,13 +829,11 @@ static clib_error_t *sw_interface_add_del_function (vnet_main_t * vm,
u32 sw_if_index,
u32 flags);
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (link_state_process_node,static) = {
.function = link_state_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "vpe-link-state-process",
};
-/* *INDENT-ON* */
VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (admin_up_down_function);
VNET_HW_INTERFACE_LINK_UP_DOWN_FUNCTION (link_up_down_function);
@@ -1016,6 +1014,38 @@ static void vl_api_sw_interface_get_mac_address_t_handler
vl_api_send_msg (reg, (u8 *) rmp);
}
+static void
+vl_api_sw_interface_set_interface_name_t_handler (
+ vl_api_sw_interface_set_interface_name_t *mp)
+{
+ vl_api_sw_interface_set_interface_name_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ clib_error_t *error;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ u32 sw_if_index = ntohl (mp->sw_if_index);
+ vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
+
+ if (mp->name[0] == 0)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto out;
+ }
+
+ error = vnet_rename_interface (vnm, si->hw_if_index, (char *) mp->name);
+ if (error)
+ {
+ clib_error_free (error);
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ }
+
+out:
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_SW_INTERFACE_SET_INTERFACE_NAME_REPLY);
+}
+
static void vl_api_sw_interface_set_rx_mode_t_handler
(vl_api_sw_interface_set_rx_mode_t * mp)
{
@@ -1177,6 +1207,164 @@ out:
}
static void
+send_interface_tx_placement_details (vnet_hw_if_tx_queue_t **all_queues,
+ u32 index, vl_api_registration_t *rp,
+ u32 context)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vl_api_sw_interface_tx_placement_details_t *rmp;
+ u32 n_bits = 0, v = ~0;
+ vnet_hw_if_tx_queue_t **q = vec_elt_at_index (all_queues, index);
+ uword *bitmap = q[0]->threads;
+ u32 hw_if_index = q[0]->hw_if_index;
+ vnet_hw_interface_t *hw_if = vnet_get_hw_interface (vnm, hw_if_index);
+
+ n_bits = clib_bitmap_count_set_bits (bitmap);
+ u32 n = n_bits * sizeof (u32);
+
+ REPLY_MACRO_DETAILS5_END (VL_API_SW_INTERFACE_TX_PLACEMENT_DETAILS, n, rp,
+ context, ({
+ rmp->sw_if_index = hw_if->sw_if_index;
+ rmp->queue_id = q[0]->queue_id;
+ rmp->shared = q[0]->shared_queue;
+ rmp->array_size = n_bits;
+
+ v = clib_bitmap_first_set (bitmap);
+ for (u32 i = 0; i < n_bits; i++)
+ {
+ rmp->threads[i] = v;
+ v = clib_bitmap_next_set (bitmap, v + 1);
+ }
+ }));
+}
+
+static void
+vl_api_sw_interface_tx_placement_get_t_handler (
+ vl_api_sw_interface_tx_placement_get_t *mp)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vl_api_sw_interface_tx_placement_get_reply_t *rmp = 0;
+ vnet_hw_if_tx_queue_t **all_queues = 0;
+ vnet_hw_if_tx_queue_t *q;
+ u32 sw_if_index = mp->sw_if_index;
+ i32 rv = 0;
+
+ if (pool_elts (vnm->interface_main.hw_if_tx_queues) == 0)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ goto err;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ pool_foreach (q, vnm->interface_main.hw_if_tx_queues)
+ vec_add1 (all_queues, q);
+ vec_sort_with_function (all_queues, vnet_hw_if_txq_cmp_cli_api);
+ }
+ else
+ {
+ u32 qi = ~0;
+ vnet_sw_interface_t *si;
+
+ if (!vnet_sw_if_index_is_api_valid (sw_if_index))
+ {
+ clib_warning ("sw_if_index %u does not exist", sw_if_index);
+ rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ goto err;
+ }
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+ if (si->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
+ {
+ clib_warning ("interface type is not HARDWARE! P2P, PIPE and SUB"
+ " interfaces are not supported");
+ rv = VNET_API_ERROR_INVALID_INTERFACE;
+ goto err;
+ }
+
+ vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, si->hw_if_index);
+ for (qi = 0; qi < vec_len (hw->tx_queue_indices); qi++)
+ {
+ q = vnet_hw_if_get_tx_queue (vnm, hw->tx_queue_indices[qi]);
+ vec_add1 (all_queues, q);
+ }
+ }
+
+ REPLY_AND_DETAILS_VEC_MACRO_END (VL_API_SW_INTERFACE_TX_PLACEMENT_GET_REPLY,
+ all_queues, mp, rmp, rv, ({
+ send_interface_tx_placement_details (
+ all_queues, cursor, rp, mp->context);
+ }));
+
+ vec_free (all_queues);
+ return;
+
+err:
+ REPLY_MACRO_END (VL_API_SW_INTERFACE_TX_PLACEMENT_GET_REPLY);
+}
+
+static void
+vl_api_sw_interface_set_tx_placement_t_handler (
+ vl_api_sw_interface_set_tx_placement_t *mp)
+{
+ vl_api_sw_interface_set_tx_placement_reply_t *rmp;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index = mp->sw_if_index;
+ vnet_sw_interface_t *si;
+ uword *bitmap = 0;
+ u32 queue_id = ~0;
+ u32 size = 0;
+ clib_error_t *error = 0;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX_END (mp);
+
+ si = vnet_get_sw_interface (vnm, sw_if_index);
+ if (si->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
+ {
+ rv = VNET_API_ERROR_INVALID_VALUE;
+ goto bad_sw_if_index;
+ }
+
+ size = mp->array_size;
+ for (u32 i = 0; i < size; i++)
+ {
+ u32 thread_index = mp->threads[i];
+ bitmap = clib_bitmap_set (bitmap, thread_index, 1);
+ }
+
+ queue_id = mp->queue_id;
+ rv = set_hw_interface_tx_queue (si->hw_if_index, queue_id, bitmap);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (
+ 0, "please specify valid thread(s) - last thread index %u",
+ clib_bitmap_last_set (bitmap));
+ break;
+ case VNET_API_ERROR_INVALID_QUEUE:
+ error = clib_error_return (
+ 0, "unknown queue %u on interface %s", queue_id,
+ vnet_get_hw_interface (vnet_get_main (), si->hw_if_index)->name);
+ break;
+ default:
+ break;
+ }
+
+ if (error)
+ {
+ clib_error_report (error);
+ goto out;
+ }
+
+ BAD_SW_IF_INDEX_LABEL;
+out:
+ REPLY_MACRO_END (VL_API_SW_INTERFACE_SET_TX_PLACEMENT_REPLY);
+ clib_bitmap_free (bitmap);
+}
+
+static void
vl_api_create_vlan_subif_t_handler (vl_api_create_vlan_subif_t * mp)
{
vl_api_create_vlan_subif_reply_t *rmp;
@@ -1282,12 +1470,10 @@ vl_api_create_subif_t_handler (vl_api_create_subif_t * mp)
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_CREATE_SUBIF_REPLY,
({
rmp->sw_if_index = ntohl(sub_sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1329,12 +1515,10 @@ vl_api_create_loopback_t_handler (vl_api_create_loopback_t * mp)
mac_address_decode (mp->mac_address, &mac);
rv = vnet_create_loopback_interface (&sw_if_index, (u8 *) & mac, 0, 0);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_CREATE_LOOPBACK_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void vl_api_create_loopback_instance_t_handler
@@ -1351,12 +1535,10 @@ static void vl_api_create_loopback_instance_t_handler
rv = vnet_create_loopback_interface (&sw_if_index, (u8 *) & mac,
is_specified, user_instance);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_CREATE_LOOPBACK_INSTANCE_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1410,6 +1592,92 @@ static void
REPLY_MACRO (VL_API_SW_INTERFACE_ADDRESS_REPLACE_END_REPLY);
}
+static void
+vl_api_pcap_set_filter_function_t_handler (
+ vl_api_pcap_set_filter_function_t *mp)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_pcap_t *pp = &vnm->pcap;
+ vl_api_pcap_set_filter_function_reply_t *rmp;
+ unformat_input_t input = { 0 };
+ vlib_is_packet_traced_fn_t *f;
+ char *filter_name;
+ int rv = 0;
+ filter_name = vl_api_from_api_to_new_c_string (&mp->filter_function_name);
+ unformat_init_cstring (&input, filter_name);
+ if (unformat (&input, "%U", unformat_vlib_trace_filter_function, &f) == 0)
+ {
+ rv = -1;
+ goto done;
+ }
+
+ pp->current_filter_function = f;
+
+done:
+ unformat_free (&input);
+ vec_free (filter_name);
+ REPLY_MACRO (VL_API_PCAP_SET_FILTER_FUNCTION_REPLY);
+}
+
+static void
+vl_api_pcap_trace_on_t_handler (vl_api_pcap_trace_on_t *mp)
+{
+ vl_api_pcap_trace_on_reply_t *rmp;
+ unformat_input_t filename, drop_err_name;
+ vnet_pcap_dispatch_trace_args_t capture_args;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ unformat_init_cstring (&filename, (char *) mp->filename);
+ if (!unformat_user (&filename, unformat_vlib_tmpfile,
+ &capture_args.filename))
+ {
+ rv = VNET_API_ERROR_ILLEGAL_NAME;
+ goto out;
+ }
+
+ capture_args.rx_enable = mp->capture_rx;
+ capture_args.tx_enable = mp->capture_tx;
+ capture_args.preallocate_data = mp->preallocate_data;
+ capture_args.free_data = mp->free_data;
+ capture_args.drop_enable = mp->capture_drop;
+ capture_args.status = 0;
+ capture_args.packets_to_capture = ntohl (mp->max_packets);
+ capture_args.sw_if_index = ntohl (mp->sw_if_index);
+ capture_args.filter = mp->filter;
+ capture_args.max_bytes_per_pkt = ntohl (mp->max_bytes_per_packet);
+ capture_args.drop_err = ~0;
+
+ unformat_init_cstring (&drop_err_name, (char *) mp->error);
+ unformat_user (&drop_err_name, unformat_vlib_error, vlib_get_main (),
+ &capture_args.drop_err);
+
+ rv = vnet_pcap_dispatch_trace_configure (&capture_args);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+out:
+ unformat_free (&filename);
+ unformat_free (&drop_err_name);
+
+ REPLY_MACRO (VL_API_PCAP_TRACE_ON_REPLY);
+}
+
+static void
+vl_api_pcap_trace_off_t_handler (vl_api_pcap_trace_off_t *mp)
+{
+ vl_api_pcap_trace_off_reply_t *rmp;
+ vnet_pcap_dispatch_trace_args_t capture_args;
+ int rv = 0;
+
+ clib_memset (&capture_args, 0, sizeof (capture_args));
+
+ rv = vnet_pcap_dispatch_trace_configure (&capture_args);
+
+ REPLY_MACRO (VL_API_PCAP_TRACE_OFF_REPLY);
+}
+
/*
* vpe_api_hookup
* Add vpe's API message handlers to the table.
@@ -1426,19 +1694,31 @@ interface_api_hookup (vlib_main_t * vm)
{
api_main_t *am = vlibapi_get_main ();
- /* Mark these APIs as mp safe */
- am->is_mp_safe[VL_API_SW_INTERFACE_DUMP] = 1;
- am->is_mp_safe[VL_API_SW_INTERFACE_DETAILS] = 1;
- am->is_mp_safe[VL_API_SW_INTERFACE_TAG_ADD_DEL] = 1;
-
- /* Do not replay VL_API_SW_INTERFACE_DUMP messages */
- am->api_trace_cfg[VL_API_SW_INTERFACE_DUMP].replay_enable = 0;
-
/*
* Set up the (msg_name, crc, message-id) table
*/
REPLY_MSG_ID_BASE = setup_message_id_table ();
+ /* Mark these APIs as mp safe */
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_DUMP,
+ 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_DETAILS, 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_TAG_ADD_DEL, 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_SET_INTERFACE_NAME, 1);
+
+ /* Do not replay VL_API_SW_INTERFACE_DUMP messages */
+ vl_api_allow_msg_replay (am, REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_DUMP,
+ 0);
+
+ /* Mark these APIs as autoendian */
+ vl_api_set_msg_autoendian (
+ am, REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_SET_TX_PLACEMENT, 1);
+ vl_api_set_msg_autoendian (
+ am, REPLY_MSG_ID_BASE + VL_API_SW_INTERFACE_TX_PLACEMENT_GET, 1);
+
return 0;
}
diff --git a/src/vnet/interface_cli.c b/src/vnet/interface_cli.c
index 73b275785b8..c56eb9777cf 100644
--- a/src/vnet/interface_cli.c
+++ b/src/vnet/interface_cli.c
@@ -53,6 +53,10 @@
#include <vnet/classify/vnet_classify.h>
#include <vnet/interface/rx_queue_funcs.h>
#include <vnet/interface/tx_queue_funcs.h>
+#include <vnet/hash/hash.h>
+#include <vnet/dev/dev.h>
+#include <vnet/dev/dev_funcs.h>
+
static int
compare_interface_names (void *a1, void *a2)
{
@@ -68,33 +72,37 @@ show_or_clear_hw_interfaces (vlib_main_t * vm,
vlib_cli_command_t * cmd, int is_show)
{
clib_error_t *error = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
vnet_main_t *vnm = vnet_get_main ();
vnet_interface_main_t *im = &vnm->interface_main;
vnet_hw_interface_t *hi;
u32 hw_if_index, *hw_if_indices = 0;
int i, verbose = -1, show_bond = 0;
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ if (!unformat_user (input, unformat_line_input, line_input))
+ goto skip_unformat;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
/* See if user wants to show a specific interface. */
- if (unformat
- (input, "%U", unformat_vnet_hw_interface, vnm, &hw_if_index))
+ if (unformat (line_input, "%U", unformat_vnet_hw_interface, vnm,
+ &hw_if_index))
vec_add1 (hw_if_indices, hw_if_index);
/* See if user wants to show an interface with a specific hw_if_index. */
- else if (unformat (input, "%u", &hw_if_index))
+ else if (unformat (line_input, "%u", &hw_if_index))
vec_add1 (hw_if_indices, hw_if_index);
- else if (unformat (input, "verbose"))
+ else if (unformat (line_input, "verbose"))
verbose = 1; /* this is also the default */
- else if (unformat (input, "detail"))
+ else if (unformat (line_input, "detail"))
verbose = 2;
- else if (unformat (input, "brief"))
+ else if (unformat (line_input, "brief"))
verbose = 0;
- else if (unformat (input, "bond"))
+ else if (unformat (line_input, "bond"))
{
show_bond = 1;
if (verbose < 0)
@@ -104,11 +112,15 @@ show_or_clear_hw_interfaces (vlib_main_t * vm,
else
{
error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ format_unformat_error, line_input);
+ unformat_free (line_input);
goto done;
}
}
+ unformat_free (line_input);
+
+skip_unformat:
/* Gather interfaces. */
if (vec_len (hw_if_indices) == 0)
pool_foreach (hi, im->hw_interfaces)
@@ -137,14 +149,12 @@ show_or_clear_hw_interfaces (vlib_main_t * vm,
vlib_cli_output (vm, "%U\n", format_vnet_hw_interface, vnm,
hi, verbose);
- /* *INDENT-OFF* */
clib_bitmap_foreach (hw_idx, hi->bond_info)
{
shi = vnet_get_hw_interface(vnm, hw_idx);
vlib_cli_output (vm, "%U\n",
format_vnet_hw_interface, vnm, shi, verbose);
}
- /* *INDENT-ON* */
}
}
}
@@ -238,14 +248,12 @@ clear_hw_interfaces (vlib_main_t * vm,
* cpu socket 0
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_hw_interfaces_command, static) = {
.path = "show hardware-interfaces",
.short_help = "show hardware-interfaces [brief|verbose|detail] [bond] "
"[<interface> [<interface> [..]]] [<sw_idx> [<sw_idx> [..]]]",
.function = show_hw_interfaces,
};
-/* *INDENT-ON* */
/*?
@@ -259,14 +267,12 @@ VLIB_CLI_COMMAND (show_hw_interfaces_command, static) = {
* name and software index (where 2 is the software index):
* @cliexcmd{clear hardware-interfaces GigabitEthernet7/0/0 2}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_hw_interface_counters_command, static) = {
.path = "clear hardware-interfaces",
.short_help = "clear hardware-interfaces "
"[<interface> [<interface> [..]]] [<sw_idx> [<sw_idx> [..]]]",
.function = clear_hw_interfaces,
};
-/* *INDENT-ON* */
static int
sw_interface_name_compare (void *a1, void *a2)
@@ -319,6 +325,21 @@ show_sw_interfaces (vlib_main_t * vm,
show_vtr = 1;
else if (unformat (linput, "verbose"))
verbose = 1;
+ else if (unformat (linput, "%d", &sw_if_index))
+ {
+ if (!pool_is_free_index (im->sw_interfaces, sw_if_index))
+ {
+ si = pool_elt_at_index (im->sw_interfaces, sw_if_index);
+ vec_add1 (sorted_sis, si[0]);
+ }
+ else
+ {
+ vec_free (sorted_sis);
+ error = clib_error_return (0, "unknown interface index `%d'",
+ sw_if_index);
+ goto done;
+ }
+ }
else
{
vec_free (sorted_sis);
@@ -392,15 +413,13 @@ show_sw_interfaces (vlib_main_t * vm,
/* Gather interfaces. */
sorted_sis =
vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
- _vec_len (sorted_sis) = 0;
- /* *INDENT-OFF* */
+ vec_set_len (sorted_sis, 0);
pool_foreach (si, im->sw_interfaces)
{
int visible = vnet_swif_is_api_visible (si);
if (visible)
vec_add1 (sorted_sis, si[0]);
}
- /* *INDENT-ON* */
/* Sort by name. */
vec_sort_with_function (sorted_sis, sw_interface_name_compare);
}
@@ -442,24 +461,21 @@ show_sw_interfaces (vlib_main_t * vm,
/* Display any L2 info */
vlib_cli_output (vm, "%U", format_l2_input, si->sw_if_index);
- /* *INDENT-OFF* */
/* Display any IP4 addressing info */
foreach_ip_interface_address (lm4, ia, si->sw_if_index,
1 /* honor unnumbered */,
({
ip4_address_t *r4 = ip_interface_address_get_address (lm4, ia);
- if (fib4->table_id)
- vlib_cli_output (vm, " L3 %U/%d ip4 table-id %d fib-idx %d",
- format_ip4_address, r4, ia->address_length,
- fib4->table_id,
- ip4_fib_index_from_table_id (fib4->table_id));
+ if (fib4->hash.table_id)
+ vlib_cli_output (
+ vm, " L3 %U/%d ip4 table-id %d fib-idx %d", format_ip4_address,
+ r4, ia->address_length, fib4->hash.table_id,
+ ip4_fib_index_from_table_id (fib4->hash.table_id));
else
vlib_cli_output (vm, " L3 %U/%d",
format_ip4_address, r4, ia->address_length);
}));
- /* *INDENT-ON* */
- /* *INDENT-OFF* */
/* Display any IP6 addressing info */
foreach_ip_interface_address (lm6, ia, si->sw_if_index,
1 /* honor unnumbered */,
@@ -474,7 +490,6 @@ show_sw_interfaces (vlib_main_t * vm,
vlib_cli_output (vm, " L3 %U/%d",
format_ip6_address, r6, ia->address_length);
}));
- /* *INDENT-ON* */
}
}
else
@@ -490,29 +505,24 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sw_interfaces_command, static) = {
.path = "show interface",
- .short_help = "show interface [address|addr|features|feat|vtr] [<interface> [<interface> [..]]] [verbose]",
+ .short_help = "show interface [address|addr|features|feat|vtr|tag] "
+ "[<interface> [<interface> [..]]] [verbose]",
.function = show_sw_interfaces,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/* Root of all interface commands. */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vnet_cli_interface_command, static) = {
.path = "interface",
.short_help = "Interface commands",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vnet_cli_set_interface_command, static) = {
.path = "set interface",
.short_help = "Interface commands",
};
-/* *INDENT-ON* */
static clib_error_t *
clear_interface_counters (vlib_main_t * vm,
@@ -553,13 +563,11 @@ clear_interface_counters (vlib_main_t * vm,
* Example of how to clear the statistics for all interfaces:
* @cliexcmd{clear interfaces}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_interface_counters_command, static) = {
.path = "clear interfaces",
.short_help = "clear interfaces",
.function = clear_interface_counters,
};
-/* *INDENT-ON* */
/**
* Parse subinterface names.
@@ -884,7 +892,6 @@ done:
* @cliexcmd{set interface GigabitEthernet2/0/0.7 up}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_sub_interfaces_command, static) = {
.path = "create sub-interfaces",
.short_help = "create sub-interfaces <interface> "
@@ -893,7 +900,6 @@ VLIB_CLI_COMMAND (create_sub_interfaces_command, static) = {
"{<subId> dot1q|dot1ad <vlanId>|any [inner-dot1q <vlanId>|any] [exact-match]}",
.function = create_sub_interfaces,
};
-/* *INDENT-ON* */
static clib_error_t *
set_state (vlib_main_t * vm,
@@ -926,7 +932,6 @@ done:
return error;
}
-
/*?
* This command is used to change the admin state (up/down) of an interface.
*
@@ -936,18 +941,18 @@ done:
* '<em>punt</em>' flag (interface is still down).
*
* @cliexpar
- * Example of how to configure the admin state of an interface to '<em>up</em?':
+ * Example of how to configure the admin state of an interface to
+ '<em>up</em>':
* @cliexcmd{set interface state GigabitEthernet2/0/0 up}
- * Example of how to configure the admin state of an interface to '<em>down</em?':
+ * Example of how to configure the admin state of an interface to
+ '<em>down</em>':
* @cliexcmd{set interface state GigabitEthernet2/0/0 down}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_state_command, static) = {
.path = "set interface state",
.short_help = "set interface state <interface> [up|down|punt|enable]",
.function = set_state,
};
-/* *INDENT-ON* */
static clib_error_t *
set_unnumbered (vlib_main_t * vm,
@@ -976,19 +981,32 @@ set_unnumbered (vlib_main_t * vm,
return clib_error_return (0, "When enabling unnumbered specify the"
" IP enabled interface that it uses");
- vnet_sw_interface_update_unnumbered (unnumbered_sw_if_index,
- inherit_from_sw_if_index, enable);
+ int rv = vnet_sw_interface_update_unnumbered (
+ unnumbered_sw_if_index, inherit_from_sw_if_index, enable);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+
+ case VNET_API_ERROR_UNEXPECTED_INTF_STATE:
+ return clib_error_return (
+ 0,
+ "When enabling unnumbered both interfaces must be in the same tables");
+
+ default:
+ return clib_error_return (
+ 0, "vnet_sw_interface_update_unnumbered returned %d", rv);
+ }
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_unnumbered_command, static) = {
.path = "set interface unnumbered",
.short_help = "set interface unnumbered [<interface> use <interface> | del <interface>]",
.function = set_unnumbered,
};
-/* *INDENT-ON* */
@@ -1025,13 +1043,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_hw_class_command, static) = {
.path = "set interface hw-class",
.short_help = "Set interface hardware class",
.function = set_hw_class,
};
-/* *INDENT-ON* */
static clib_error_t *
vnet_interface_cli_init (vlib_main_t * vm)
@@ -1075,13 +1091,11 @@ renumber_interface_command_fn (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (renumber_interface_command, static) = {
.path = "renumber interface",
.short_help = "renumber interface <interface> <new-dev-instance>",
.function = renumber_interface_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
promiscuous_cmd (vlib_main_t * vm,
@@ -1111,13 +1125,11 @@ promiscuous_cmd (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_promiscuous_cmd, static) = {
.path = "set interface promiscuous",
.short_help = "set interface promiscuous [on|off] <interface>",
.function = promiscuous_cmd,
};
-/* *INDENT-ON* */
static clib_error_t *
mtu_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
@@ -1126,6 +1138,7 @@ mtu_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
u32 hw_if_index, sw_if_index, mtu;
ethernet_main_t *em = &ethernet_main;
u32 mtus[VNET_N_MTU] = { 0, 0, 0, 0 };
+ clib_error_t *err;
if (unformat (input, "%d %U", &mtu,
unformat_vnet_hw_interface, vnm, &hw_if_index))
@@ -1134,22 +1147,14 @@ mtu_cmd (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
* Change physical MTU on interface. Only supported for Ethernet
* interfaces
*/
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, hw_if_index);
ethernet_interface_t *eif = ethernet_get_interface (em, hw_if_index);
if (!eif)
return clib_error_return (0, "not supported");
- if (mtu < hi->min_supported_packet_bytes)
- return clib_error_return (0, "Invalid mtu (%d): "
- "must be >= min pkt bytes (%d)", mtu,
- hi->min_supported_packet_bytes);
-
- if (mtu > hi->max_supported_packet_bytes)
- return clib_error_return (0, "Invalid mtu (%d): must be <= (%d)", mtu,
- hi->max_supported_packet_bytes);
-
- vnet_hw_interface_set_mtu (vnm, hw_if_index, mtu);
+ err = vnet_hw_interface_set_mtu (vnm, hw_if_index, mtu);
+ if (err)
+ return err;
goto done;
}
else if (unformat (input, "packet %d %U", &mtu,
@@ -1175,13 +1180,11 @@ done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_mtu_cmd, static) = {
.path = "set interface mtu",
.short_help = "set interface mtu [packet|ip4|ip6|mpls] <value> <interface>",
.function = mtu_cmd,
};
-/* *INDENT-ON* */
static clib_error_t *
show_interface_sec_mac_addr_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -1204,15 +1207,13 @@ show_interface_sec_mac_addr_fn (vlib_main_t * vm, unformat_input_t * input,
{
sorted_sis =
vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
- _vec_len (sorted_sis) = 0;
- /* *INDENT-OFF* */
+ vec_set_len (sorted_sis, 0);
pool_foreach (si, im->sw_interfaces)
{
int visible = vnet_swif_is_api_visible (si);
if (visible)
vec_add1 (sorted_sis, si[0]);
}
- /* *INDENT-ON* */
/* Sort by name. */
vec_sort_with_function (sorted_sis, sw_interface_name_compare);
}
@@ -1253,13 +1254,11 @@ show_interface_sec_mac_addr_fn (vlib_main_t * vm, unformat_input_t * input,
* @cliexstart{show interface secondary-mac-address}
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_interface_sec_mac_addr, static) = {
.path = "show interface secondary-mac-address",
.short_help = "show interface secondary-mac-address [<interface>]",
.function = show_interface_sec_mac_addr_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
interface_add_del_mac_address (vlib_main_t * vm, unformat_input_t * input,
@@ -1327,13 +1326,11 @@ done:
* @cliexcmd{set interface secondary-mac-address GigabitEthernet0/8/0 aa:bb:cc:dd:ee:01 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (interface_add_del_mac_address_cmd, static) = {
.path = "set interface secondary-mac-address",
.short_help = "set interface secondary-mac-address <interface> <mac-address> [(add|del)]",
.function = interface_add_del_mac_address,
};
-/* *INDENT-ON* */
static clib_error_t *
set_interface_mac_address (vlib_main_t * vm, unformat_input_t * input,
@@ -1377,13 +1374,11 @@ done:
* @cliexcmd{set interface mac address pg0 aa:bb:cc:dd:ee:04}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_mac_address_cmd, static) = {
.path = "set interface mac address",
.short_help = "set interface mac address <interface> <mac-address>",
.function = set_interface_mac_address,
};
-/* *INDENT-ON* */
static clib_error_t *
set_tag (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
@@ -1402,13 +1397,11 @@ set_tag (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_tag_command, static) = {
.path = "set interface tag",
.short_help = "set interface tag <interface> <tag>",
.function = set_tag,
};
-/* *INDENT-ON* */
static clib_error_t *
clear_tag (vlib_main_t * vm, unformat_input_t * input,
@@ -1426,13 +1419,11 @@ clear_tag (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_tag_command, static) = {
.path = "clear interface tag",
.short_help = "clear interface tag <interface>",
.function = clear_tag,
};
-/* *INDENT-ON* */
static clib_error_t *
set_ip_directed_broadcast (vlib_main_t * vm,
@@ -1466,13 +1457,11 @@ set_ip_directed_broadcast (vlib_main_t * vm,
* subnet broadcast address will be sent L2 broadcast on the interface,
* otherwise it is dropped.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ip_directed_broadcast_command, static) = {
.path = "set interface ip directed-broadcast",
.short_help = "set interface enable <interface> <enable|disable>",
.function = set_ip_directed_broadcast,
};
-/* *INDENT-ON* */
clib_error_t *
set_hw_interface_change_rx_mode (vnet_main_t * vnm, u32 hw_if_index,
@@ -1482,6 +1471,33 @@ set_hw_interface_change_rx_mode (vnet_main_t * vnm, u32 hw_if_index,
clib_error_t *error = 0;
vnet_hw_interface_t *hw;
u32 *queue_indices = 0;
+ vnet_dev_port_t *port;
+
+ port = vnet_dev_get_port_from_hw_if_index (hw_if_index);
+
+ if (port)
+ {
+ vlib_main_t *vm = vlib_get_main ();
+ vnet_dev_rv_t rv;
+
+ vnet_dev_port_cfg_change_req_t req = {
+ .type = mode == VNET_HW_IF_RX_MODE_POLLING ?
+ VNET_DEV_PORT_CFG_RXQ_INTR_MODE_DISABLE :
+ VNET_DEV_PORT_CFG_RXQ_INTR_MODE_ENABLE,
+ .queue_id = queue_id_valid ? queue_id : 0,
+ .all_queues = queue_id_valid ? 0 : 1,
+ };
+
+ if ((rv = vnet_dev_port_cfg_change_req_validate (vm, port, &req)))
+ return vnet_dev_port_err (
+ vm, port, rv, "rx queue interupt mode enable/disable not supported");
+
+ if ((rv = vnet_dev_process_port_cfg_change_req (vm, port, &req)))
+ return vnet_dev_port_err (
+ vm, port, rv,
+ "device failed to enable/disable queue interrupt mode");
+ return 0;
+ }
hw = vnet_get_hw_interface (vnm, hw_if_index);
@@ -1502,7 +1518,12 @@ set_hw_interface_change_rx_mode (vnet_main_t * vnm, u32 hw_if_index,
{
int rv = vnet_hw_if_set_rx_queue_mode (vnm, queue_indices[i], mode);
if (rv)
- goto done;
+ {
+ error = clib_error_return (
+ 0, "unable to set rx-mode on interface %v queue-id %u.\n",
+ hw->name, queue_id);
+ goto done;
+ }
}
done:
@@ -1596,13 +1617,11 @@ set_interface_rx_mode (vlib_main_t * vm, unformat_input_t * input,
* VirtualEthernet0/0/13 queue 3 (polling)
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_set_if_rx_mode,static) = {
.path = "set interface rx-mode",
.short_help = "set interface rx-mode <interface> [queue <n>] [polling | interrupt | adaptive]",
.function = set_interface_rx_mode,
};
-/* *INDENT-ON* */
static clib_error_t *
show_interface_rx_placement_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -1668,13 +1687,11 @@ show_interface_rx_placement_fn (vlib_main_t * vm, unformat_input_t * input,
* VirtualEthernet0/0/13 queue 3 (polling)
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_interface_rx_placement, static) = {
.path = "show interface rx-placement",
.short_help = "show interface rx-placement",
.function = show_interface_rx_placement_fn,
};
-/* *INDENT-ON* */
clib_error_t *
set_hw_interface_rx_placement (u32 hw_if_index, u32 queue_id,
u32 thread_index, u8 is_main)
@@ -1799,7 +1816,6 @@ set_interface_rx_placement (vlib_main_t *vm, unformat_input_t *input,
* VirtualEthernet0/0/13 queue 3 (polling)
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_set_if_rx_placement,static) = {
.path = "set interface rx-placement",
.short_help = "set interface rx-placement <interface> [queue <n>] "
@@ -1807,30 +1823,25 @@ VLIB_CLI_COMMAND (cmd_set_if_rx_placement,static) = {
.function = set_interface_rx_placement,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
-clib_error_t *
+int
set_hw_interface_tx_queue (u32 hw_if_index, u32 queue_id, uword *bitmap)
{
vnet_main_t *vnm = vnet_get_main ();
- vnet_device_main_t *vdm = &vnet_device_main;
- vnet_hw_interface_t *hw;
+ vlib_thread_main_t *vtm = vlib_get_thread_main ();
vnet_hw_if_tx_queue_t *txq;
u32 queue_index;
u32 thread_index;
- hw = vnet_get_hw_interface (vnm, hw_if_index);
-
/* highest set bit in bitmap should not exceed last worker thread index */
thread_index = clib_bitmap_last_set (bitmap);
- if ((thread_index != ~0) && (thread_index > vdm->last_worker_thread_index))
- return clib_error_return (0, "please specify valid thread(s)");
+ if ((thread_index != ~0) && (thread_index >= vtm->n_vlib_mains))
+ return VNET_API_ERROR_INVALID_VALUE;
queue_index =
vnet_hw_if_get_tx_queue_index_by_id (vnm, hw_if_index, queue_id);
if (queue_index == ~0)
- return clib_error_return (0, "unknown queue %u on interface %s", queue_id,
- hw->name);
+ return VNET_API_ERROR_INVALID_QUEUE;
txq = vnet_hw_if_get_tx_queue (vnm, queue_index);
@@ -1858,6 +1869,7 @@ set_interface_tx_queue (vlib_main_t *vm, unformat_input_t *input,
u32 hw_if_index = (u32) ~0;
u32 queue_id = (u32) 0;
uword *bitmap = 0;
+ int rv = 0;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -1889,7 +1901,23 @@ set_interface_tx_queue (vlib_main_t *vm, unformat_input_t *input,
goto error;
}
- error = set_hw_interface_tx_queue (hw_if_index, queue_id, bitmap);
+ rv = set_hw_interface_tx_queue (hw_if_index, queue_id, bitmap);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (
+ 0, "please specify valid thread(s) - last thread index %u",
+ clib_bitmap_last_set (bitmap));
+ break;
+ case VNET_API_ERROR_INVALID_QUEUE:
+ error = clib_error_return (
+ 0, "unknown queue %u on interface %s", queue_id,
+ vnet_get_hw_interface (vnet_get_main (), hw_if_index)->name);
+ break;
+ default:
+ break;
+ }
error:
clib_bitmap_free (bitmap);
@@ -1979,13 +2007,11 @@ done:
* @cliexstart{set interface rss queues VirtualFunctionEthernet18/1/0 list 0,2-5,7}
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (cmd_set_interface_rss_queues,static) = {
.path = "set interface rss queues",
.short_help = "set interface rss queues <interface> <list <queue-list>>",
.function = set_interface_rss_queues_fn,
};
-/* *INDENT-ON* */
static u8 *
format_vnet_pcap (u8 * s, va_list * args)
@@ -2333,13 +2359,13 @@ pcap_trace_command_fn (vlib_main_t * vm,
* packet capture are preserved, so '<em>any</em>' can be used to reset
* the interface setting.
*
- * - <b>filter</b> - Use the pcap rx / tx / drop trace filter, which
+ * - <b>filter</b> - Use the pcap trace rx / tx / drop filter, which
* must be configured. Use <b>classify filter pcap...</b> to configure the
* filter. The filter will only be executed if the per-interface or
* any-interface tests fail.
*
* - <b>error <node>.<error></b> - filter packets based on a specific error.
- * For example: error {ip4-udp-lookup}.{No listener for dst port}
+ * For example: error {ip4-udp-lookup}.{no_listener}
*
* - <b>file <name></b> - Used to specify the output filename. The file will
* be placed in the '<em>/tmp</em>' directory, so only the filename is
@@ -2362,7 +2388,7 @@ pcap_trace_command_fn (vlib_main_t * vm,
* @cliexend
* Example of how to start a tx packet capture:
* @cliexstart{pcap trace tx max 35 intfc GigabitEthernet0/8/0 file
-vppTest.pcap}
+ * vppTest.pcap}
* @cliexend
* Example of how to display the status of a tx packet capture in progress:
* @cliexstart{pcap trace status}
@@ -2375,7 +2401,6 @@ vppTest.pcap}
* saved to /tmp/vppTest.pcap...
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (pcap_tx_trace_command, static) = {
.path = "pcap trace",
@@ -2385,7 +2410,253 @@ VLIB_CLI_COMMAND (pcap_tx_trace_command, static) = {
" [preallocate-data][free-data]",
.function = pcap_trace_command_fn,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+set_pcap_filter_function (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_pcap_t *pp = &vnet_get_main ()->pcap;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vlib_is_packet_traced_fn_t *res = 0;
+ clib_error_t *error = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != (uword) UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_vlib_trace_filter_function,
+ &res))
+ ;
+ else
+ {
+ error = clib_error_create (
+ "expected valid trace filter function, got `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+ pp->current_filter_function = res;
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (set_pcap_filter_function_cli, static) = {
+ .path = "set pcap filter function",
+ .short_help = "set pcap filter function <func_name>",
+ .function = set_pcap_filter_function,
+};
+
+static clib_error_t *
+show_pcap_filter_function (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_pcap_t *pp = &vnet_get_main ()->pcap;
+ vlib_trace_filter_main_t *tfm = &vlib_trace_filter_main;
+ vlib_is_packet_traced_fn_t *current_trace_filter_fn =
+ pp->current_filter_function;
+ vlib_trace_filter_function_registration_t *reg =
+ tfm->trace_filter_registration;
+
+ while (reg)
+ {
+ vlib_cli_output (vm, "%sname:%s description: %s priority: %u",
+ reg->function == current_trace_filter_fn ? "(*) " : "",
+ reg->name, reg->description, reg->priority);
+ reg = reg->next;
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_pcap_filter_function_cli, static) = {
+ .path = "show pcap filter function",
+ .short_help = "show pcap filter function",
+ .function = show_pcap_filter_function,
+};
+
+static clib_error_t *
+set_interface_name (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ clib_error_t *error = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 hw_if_index = ~0;
+ char *name = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U %s", unformat_vnet_hw_interface, vnm,
+ &hw_if_index, &name))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ unformat_free (line_input);
+ vec_free (name);
+ return error;
+ }
+ }
+
+ unformat_free (line_input);
+
+ if (hw_if_index == (u32) ~0 || name == 0)
+ {
+ vec_free (name);
+ error = clib_error_return (0, "please specify valid interface name");
+ return error;
+ }
+
+ error = vnet_rename_interface (vnm, hw_if_index, name);
+ vec_free (name);
+
+ return (error);
+}
+
+VLIB_CLI_COMMAND (cmd_set_if_name, static) = {
+ .path = "set interface name",
+ .short_help = "set interface name <interface-name> <new-interface-name>",
+ .function = set_interface_name,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+set_interface_tx_hash_cmd (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ clib_error_t *error = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hi;
+ u8 *hash_name = 0;
+ u32 hw_if_index = (u32) ~0;
+ vnet_hash_fn_t hf;
+ vnet_hash_fn_type_t ftype;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_vnet_hw_interface, vnm,
+ &hw_if_index))
+ ;
+ else if (unformat (line_input, "hash-name %s", &hash_name))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ unformat_free (line_input);
+ return error;
+ }
+ }
+
+ unformat_free (line_input);
+
+ if (hw_if_index == (u32) ~0)
+ {
+ error = clib_error_return (0, "please specify valid interface name");
+ goto error;
+ }
+
+ if (hash_name == 0)
+ {
+ error = clib_error_return (0, "hash-name is required");
+ goto error;
+ }
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ ftype =
+ vnet_get_hw_interface_class (vnm, hi->hw_class_index)->tx_hash_fn_type;
+ hf = vnet_hash_function_from_name ((const char *) hash_name, ftype);
+
+ if (!hf)
+ {
+ error = clib_error_return (0, "please specify valid hash name");
+ goto error;
+ }
+
+ hi->hf = hf;
+error:
+ vec_free (hash_name);
+ return (error);
+}
+
+VLIB_CLI_COMMAND (cmd_set_if_tx_hash, static) = {
+ .path = "set interface tx-hash",
+ .short_help = "set interface tx-hash <interface> hash-name <hash-name>",
+ .function = set_interface_tx_hash_cmd,
+};
+
+static clib_error_t *
+show_tx_hash (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ clib_error_t *error = 0;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_hw_interface_t *hi;
+ vnet_hash_function_registration_t *hash;
+ u32 hw_if_index = (u32) ~0;
+ vnet_hash_fn_type_t ftype;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", unformat_vnet_hw_interface, vnm,
+ &hw_if_index))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ unformat_free (line_input);
+ goto error;
+ }
+ }
+
+ unformat_free (line_input);
+
+ if (hw_if_index == (u32) ~0)
+ {
+ error = clib_error_return (0, "please specify valid interface name");
+ goto error;
+ }
+
+ hi = vnet_get_hw_interface (vnm, hw_if_index);
+ ftype =
+ vnet_get_hw_interface_class (vnm, hi->hw_class_index)->tx_hash_fn_type;
+
+ if (hi->hf)
+ {
+ hash = vnet_hash_function_from_func (hi->hf, ftype);
+ if (hash)
+ vlib_cli_output (vm, "%U", format_vnet_hash, hash);
+ else
+ vlib_cli_output (vm, "no matching hash function found");
+ }
+ else
+ vlib_cli_output (vm, "no hashing function set");
+
+error:
+ return (error);
+}
+
+VLIB_CLI_COMMAND (cmd_show_tx_hash, static) = {
+ .path = "show interface tx-hash",
+ .short_help = "show interface tx-hash [interface]",
+ .function = show_tx_hash,
+};
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/interface_format.c b/src/vnet/interface_format.c
index 4acd6ab63e6..0eff8c4597c 100644
--- a/src/vnet/interface_format.c
+++ b/src/vnet/interface_format.c
@@ -120,7 +120,7 @@ format_vnet_hw_interface_link_speed (u8 * s, va_list * args)
{
u32 link_speed = va_arg (*args, u32);
- if (link_speed == 0)
+ if (link_speed == 0 || link_speed == UINT32_MAX)
return format (s, "unknown");
if (link_speed >= 1000000)
@@ -143,11 +143,9 @@ format_vnet_hw_interface_rss_queues (u8 * s, va_list * args)
if (bitmap)
{
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, bitmap) {
s = format (s, "%u ", i);
}
- /* *INDENT-ON* */
}
return s;
@@ -212,6 +210,9 @@ format_vnet_hw_interface (u8 * s, va_list * args)
if (vec_len (hi->tx_queue_indices))
{
s = format (s, "\n%UTX Queues:", format_white_space, indent + 2);
+ s = format (
+ s, "\n%UTX Hash: %U", format_white_space, indent + 4, format_vnet_hash,
+ vnet_hash_function_from_func (hi->hf, hw_class->tx_hash_fn_type));
s = format (s, "\n%U%-6s%-7s%-15s", format_white_space, indent + 4,
"queue", "shared", "thread(s)");
for (int i = 0; i < vec_len (hi->tx_queue_indices); i++)
@@ -287,7 +288,7 @@ format_vnet_sw_if_index_name (u8 * s, va_list * args)
if (NULL == si)
{
- return format (s, "DELETED");
+ return format (s, "DELETED (%u)", sw_if_index);
}
return format (s, "%U", format_vnet_sw_interface_name, vnm, si);
}
@@ -302,7 +303,7 @@ format_vnet_hw_if_index_name (u8 * s, va_list * args)
hi = vnet_get_hw_interface (vnm, hw_if_index);
if (hi == 0)
- return format (s, "DELETED");
+ return format (s, "DELETED (%u)", hw_if_index);
return format (s, "%v", hi->name);
}
@@ -366,11 +367,11 @@ format_vnet_sw_interface_cntrs (u8 * s, vnet_interface_main_t * im,
n_printed += 2;
if (n)
- _vec_len (n) = 0;
+ vec_set_len (n, 0);
n = format (n, "%s packets", cm->name);
s = format (s, "%-16v%16Ld", n, vtotal.packets);
- _vec_len (n) = 0;
+ vec_set_len (n, 0);
n = format (n, "%s bytes", cm->name);
s = format (s, "\n%U%-16v%16Ld",
format_white_space, indent, n, vtotal.bytes);
@@ -599,9 +600,9 @@ format_vnet_buffer_opaque (u8 * s, va_list * args)
s = format (s,
"l2_classify.table_index: %d, l2_classify.opaque_index: %d, "
- "l2_classify.hash: 0x%llx",
- o->l2_classify.table_index,
- o->l2_classify.opaque_index, o->l2_classify.hash);
+ "l2_classify.hash: 0x%lx",
+ o->l2_classify.table_index, o->l2_classify.opaque_index,
+ o->l2_classify.hash);
vec_add1 (s, '\n');
s = format (s, "policer.index: %d", o->policer.index);
@@ -694,17 +695,10 @@ format_vnet_buffer_opaque2 (u8 * s, va_list * args)
s = format (s, "loop_counter: %d", o->loop_counter);
vec_add1 (s, '\n');
- s = format (s, "gbp.flags: %x, gbp.sclass: %d",
- (u32) (o->gbp.flags), (u32) (o->gbp.sclass));
- vec_add1 (s, '\n');
-
s = format (s, "gso_size: %d, gso_l4_hdr_sz: %d",
(u32) (o->gso_size), (u32) (o->gso_l4_hdr_sz));
vec_add1 (s, '\n');
- s = format (s, "pg_replay_timestamp: %llu", (u32) (o->pg_replay_timestamp));
- vec_add1 (s, '\n');
-
for (i = 0; i < vec_len (im->buffer_opaque2_format_helpers); i++)
{
helper_fp = im->buffer_opaque2_format_helpers[i];
diff --git a/src/vnet/interface_funcs.h b/src/vnet/interface_funcs.h
index 14168406377..511df4920e4 100644
--- a/src/vnet/interface_funcs.h
+++ b/src/vnet/interface_funcs.h
@@ -231,6 +231,10 @@ u32 vnet_register_interface (vnet_main_t * vnm,
void vnet_set_interface_output_node (vnet_main_t * vnm,
u32 hw_if_index, u32 node_index);
+void vnet_set_interface_l3_output_node (vlib_main_t *vm, u32 sw_if_index,
+ u8 *output_node);
+void vnet_reset_interface_l3_output_node (vlib_main_t *vm, u32 sw_if_index);
+
/* Creates a software interface given template. */
clib_error_t *vnet_create_sw_interface (vnet_main_t * vnm,
vnet_sw_interface_t * template,
@@ -306,7 +310,7 @@ always_inline u32
vnet_hw_interface_get_mtu (vnet_main_t * vnm, u32 hw_if_index)
{
vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
- return hw->max_packet_bytes;
+ return hw->max_frame_size - hw->frame_overhead;
}
always_inline u32
@@ -350,6 +354,9 @@ vnet_sw_interface_is_sub (vnet_main_t *vnm, u32 sw_if_index)
return (sw->sw_if_index != sw->sup_sw_if_index);
}
+clib_error_t *vnet_sw_interface_supports_addressing (vnet_main_t *vnm,
+ u32 sw_if_index);
+
always_inline vlib_frame_t *
vnet_get_frame_to_sw_interface (vnet_main_t * vnm, u32 sw_if_index)
{
@@ -420,9 +427,16 @@ clib_error_t *set_hw_interface_change_rx_mode (vnet_main_t * vnm,
/* Set rx-placement on the interface */
clib_error_t *set_hw_interface_rx_placement (u32 hw_if_index, u32 queue_id,
u32 thread_index, u8 is_main);
+/* Set tx-queue placement on the interface */
+int set_hw_interface_tx_queue (u32 hw_if_index, u32 queue_id, uword *bitmap);
+/* Set the Max Frame Size on the HW interface */
+clib_error_t *vnet_hw_interface_set_max_frame_size (vnet_main_t *vnm,
+ u32 hw_if_index,
+ u32 max_frame_size);
/* Set the MTU on the HW interface */
-void vnet_hw_interface_set_mtu (vnet_main_t * vnm, u32 hw_if_index, u32 mtu);
+clib_error_t *vnet_hw_interface_set_mtu (vnet_main_t *vnm, u32 hw_if_index,
+ u32 mtu);
/* Set the MTU on the SW interface */
void vnet_sw_interface_set_mtu (vnet_main_t * vnm, u32 sw_if_index, u32 mtu);
@@ -430,8 +444,8 @@ void vnet_sw_interface_set_protocol_mtu (vnet_main_t * vnm, u32 sw_if_index,
u32 mtu[]);
/* update the unnumbered state of an interface */
-void vnet_sw_interface_update_unnumbered (u32 sw_if_index,
- u32 ip_sw_if_index, u8 enable);
+int vnet_sw_interface_update_unnumbered (u32 sw_if_index, u32 ip_sw_if_index,
+ u8 enable);
int vnet_sw_interface_stats_collect_enable_disable (u32 sw_if_index,
u8 enable);
@@ -469,12 +483,14 @@ unformat_function_t unformat_vnet_sw_interface_flags;
format_function_t format_vtr;
/* Node runtime for interface output function. */
+struct vnet_dev_tx_queue;
typedef struct
{
u32 hw_if_index;
u32 sw_if_index;
u32 dev_instance;
- u32 is_deleted;
+ u8 is_deleted;
+ struct vnet_dev_tx_queue *tx_queue;
} vnet_interface_output_runtime_t;
/* Interface output function. */
@@ -502,6 +518,7 @@ typedef enum
{
VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DOWN,
VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DELETED,
+ VNET_INTERFACE_OUTPUT_ERROR_NO_TX_QUEUE,
} vnet_interface_output_error_t;
/* Format for interface output traces. */
@@ -531,6 +548,7 @@ pcap_add_buffer (pcap_main_t *pm, struct vlib_main_t *vm, u32 buffer_index,
if (PREDICT_TRUE (pm->n_packets_captured < pm->n_packets_to_capture))
{
+ time_now += vm->clib_time.init_reference_time;
clib_spinlock_lock_if_init (&pm->lock);
d = pcap_add_packet (pm, time_now, n_left, n);
while (1)
@@ -547,6 +565,31 @@ pcap_add_buffer (pcap_main_t *pm, struct vlib_main_t *vm, u32 buffer_index,
clib_spinlock_unlock_if_init (&pm->lock);
}
}
+
+typedef struct
+{
+ vnet_hw_if_caps_t val;
+ vnet_hw_if_caps_t mask;
+} vnet_hw_if_caps_change_t;
+
+void vnet_hw_if_change_caps (vnet_main_t *vnm, u32 hw_if_index,
+ vnet_hw_if_caps_change_t *caps);
+
+static_always_inline void
+vnet_hw_if_set_caps (vnet_main_t *vnm, u32 hw_if_index, vnet_hw_if_caps_t caps)
+{
+ vnet_hw_if_caps_change_t cc = { .val = caps, .mask = caps };
+ vnet_hw_if_change_caps (vnm, hw_if_index, &cc);
+}
+
+static_always_inline void
+vnet_hw_if_unset_caps (vnet_main_t *vnm, u32 hw_if_index,
+ vnet_hw_if_caps_t caps)
+{
+ vnet_hw_if_caps_change_t cc = { .val = 0, .mask = caps };
+ vnet_hw_if_change_caps (vnm, hw_if_index, &cc);
+}
+
#endif /* included_vnet_interface_funcs_h */
/*
diff --git a/src/vnet/interface_output.c b/src/vnet/interface_output.c
index 0de2714ed61..47844dcd68a 100644
--- a/src/vnet/interface_output.c
+++ b/src/vnet/interface_output.c
@@ -46,9 +46,12 @@
#include <vnet/udp/udp_packet.h>
#include <vnet/feature/feature.h>
#include <vnet/classify/pcap_classify.h>
+#include <vnet/hash/hash.h>
#include <vnet/interface_output.h>
#include <vppinfra/vector/mask_compare.h>
#include <vppinfra/vector/compress.h>
+#include <vppinfra/vector/count_equal.h>
+#include <vppinfra/vector/array_mask.h>
typedef struct
{
@@ -82,9 +85,8 @@ format_vnet_interface_output_trace (u8 * s, va_list * va)
else
{
si = vnet_get_sw_interface (vnm, t->sw_if_index);
- s =
- format (s, "%U ", format_vnet_sw_interface_name, vnm, si,
- t->flags);
+ s = format (s, "%U flags 0x%08x", format_vnet_sw_interface_name, vnm,
+ si, t->flags);
}
s =
format (s, "\n%U%U", format_white_space, indent,
@@ -167,16 +169,19 @@ vnet_interface_output_trace (vlib_main_t * vm,
static_always_inline void
vnet_interface_output_handle_offload (vlib_main_t *vm, vlib_buffer_t *b)
{
+ if (b->flags & VNET_BUFFER_F_GSO)
+ return;
vnet_calc_checksums_inline (vm, b, b->flags & VNET_BUFFER_F_IS_IP4,
b->flags & VNET_BUFFER_F_IS_IP6);
+ vnet_calc_outer_checksums_inline (vm, b);
}
static_always_inline uword
vnet_interface_output_node_inline (vlib_main_t *vm, u32 sw_if_index,
vlib_combined_counter_main_t *ccm,
- vlib_buffer_t **b, u32 config_index, u8 arc,
- u32 n_left, int do_tx_offloads,
- int arc_or_subif)
+ vlib_buffer_t **b, void **p,
+ u32 config_index, u8 arc, u32 n_left,
+ int processing_level)
{
u32 n_bytes = 0;
u32 n_bytes0, n_bytes1, n_bytes2, n_bytes3;
@@ -192,7 +197,7 @@ vnet_interface_output_node_inline (vlib_main_t *vm, u32 sw_if_index,
vlib_prefetch_buffer_header (b[6], LOAD);
vlib_prefetch_buffer_header (b[7], LOAD);
- if (do_tx_offloads)
+ if (processing_level >= 1)
or_flags = b[0]->flags | b[1]->flags | b[2]->flags | b[3]->flags;
/* Be grumpy about zero length buffers for benefit of
@@ -207,7 +212,16 @@ vnet_interface_output_node_inline (vlib_main_t *vm, u32 sw_if_index,
n_bytes += n_bytes2 = vlib_buffer_length_in_chain (vm, b[2]);
n_bytes += n_bytes3 = vlib_buffer_length_in_chain (vm, b[3]);
- if (arc_or_subif)
+ if (processing_level >= 3)
+ {
+ p[0] = vlib_buffer_get_current (b[0]);
+ p[1] = vlib_buffer_get_current (b[1]);
+ p[2] = vlib_buffer_get_current (b[2]);
+ p[3] = vlib_buffer_get_current (b[3]);
+ p += 4;
+ }
+
+ if (processing_level >= 2)
{
u32 tx_swif0, tx_swif1, tx_swif2, tx_swif3;
tx_swif0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
@@ -241,7 +255,7 @@ vnet_interface_output_node_inline (vlib_main_t *vm, u32 sw_if_index,
}
}
- if (do_tx_offloads && (or_flags & VNET_BUFFER_F_OFFLOAD))
+ if (processing_level >= 1 && (or_flags & VNET_BUFFER_F_OFFLOAD))
{
vnet_interface_output_handle_offload (vm, b[0]);
vnet_interface_output_handle_offload (vm, b[1]);
@@ -261,7 +275,13 @@ vnet_interface_output_node_inline (vlib_main_t *vm, u32 sw_if_index,
n_bytes += n_bytes0 = vlib_buffer_length_in_chain (vm, b[0]);
- if (arc_or_subif)
+ if (processing_level >= 3)
+ {
+ p[0] = vlib_buffer_get_current (b[0]);
+ p += 1;
+ }
+
+ if (processing_level >= 2)
{
u32 tx_swif0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
@@ -275,7 +295,7 @@ vnet_interface_output_node_inline (vlib_main_t *vm, u32 sw_if_index,
vlib_increment_combined_counter (ccm, ti, tx_swif0, 1, n_bytes0);
}
- if (do_tx_offloads)
+ if (processing_level >= 1)
vnet_interface_output_handle_offload (vm, b[0]);
n_left -= 1;
@@ -285,25 +305,33 @@ vnet_interface_output_node_inline (vlib_main_t *vm, u32 sw_if_index,
return n_bytes;
}
-static_always_inline void vnet_interface_pcap_tx_trace
- (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame,
- int sw_if_index_from_buffer)
+static_always_inline void
+vnet_interface_pcap_tx_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, int in_interface_ouput)
{
vnet_main_t *vnm = vnet_get_main ();
u32 n_left_from, *from;
- u32 sw_if_index;
+ u32 sw_if_index = ~0, hw_if_index = ~0;
vnet_pcap_t *pp = &vnm->pcap;
if (PREDICT_TRUE (pp->pcap_tx_enable == 0))
return;
- if (sw_if_index_from_buffer == 0)
+ if (in_interface_ouput)
+ {
+ /* interface-output is called right before interface-output-template.
+ * We only want to capture packets here if there is a per-interface
+ * filter, in case it matches the sub-interface sw_if_index.
+ * If there is no per-interface filter configured, let the
+ * interface-output-template node deal with it */
+ if (pp->pcap_sw_if_index == 0)
+ return;
+ }
+ else
{
vnet_interface_output_runtime_t *rt = (void *) node->runtime_data;
sw_if_index = rt->sw_if_index;
}
- else
- sw_if_index = ~0;
n_left_from = frame->n_vectors;
from = vlib_frame_vector_args (frame);
@@ -315,8 +343,19 @@ static_always_inline void vnet_interface_pcap_tx_trace
from++;
n_left_from--;
- if (sw_if_index_from_buffer)
- sw_if_index = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ if (in_interface_ouput)
+ {
+ const u32 sii = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ if (PREDICT_FALSE (sii != sw_if_index))
+ {
+ const vnet_hw_interface_t *hi =
+ vnet_get_sup_hw_interface (vnm, sii);
+ hw_if_index = hi->sw_if_index;
+ sw_if_index = sii;
+ }
+ if (hw_if_index == sw_if_index)
+ continue; /* defer to interface-output-template */
+ }
if (vnet_is_packet_pcaped (pp, b0, sw_if_index))
pcap_add_buffer (&pp->pcap_main, vm, bi0, pp->max_bytes_per_pkt);
@@ -324,39 +363,71 @@ static_always_inline void vnet_interface_pcap_tx_trace
}
static_always_inline void
-store_tx_frame_scalar_data (vnet_hw_if_output_node_runtime_t *r,
- vnet_hw_if_tx_frame_t *tf)
+hash_func_with_mask (void **p, u32 *hash, u32 n_packets, u32 *lookup_table,
+ u32 mask, vnet_hash_fn_t hf)
{
- if (r)
- clib_memcpy_fast (tf, &r->frame, sizeof (vnet_hw_if_tx_frame_t));
+ u32 n_left_from = n_packets;
+
+ hf (p, hash, n_packets);
+
+ clib_array_mask_u32 (hash, mask, n_packets);
+
+ while (n_left_from >= 4)
+ {
+ hash[0] = lookup_table[hash[0]];
+ hash[1] = lookup_table[hash[1]];
+ hash[2] = lookup_table[hash[2]];
+ hash[3] = lookup_table[hash[3]];
+
+ hash += 4;
+ n_left_from -= 4;
+ }
+
+ while (n_left_from > 0)
+ {
+ hash[0] = lookup_table[hash[0]];
+
+ hash += 1;
+ n_left_from -= 1;
+ }
}
static_always_inline void
-enqueu_to_tx_node (vlib_main_t *vm, vlib_node_runtime_t *node,
- vnet_hw_interface_t *hi, u32 *from, u32 n_vectors)
+store_tx_frame_scalar_data (vnet_hw_if_tx_frame_t *copy_frame,
+ vnet_hw_if_tx_frame_t *tf)
{
- u32 next_index = VNET_INTERFACE_OUTPUT_NEXT_TX;
- vnet_hw_if_output_node_runtime_t *r = 0;
- u32 n_free, n_copy, *to;
- vnet_hw_if_tx_frame_t *tf;
- vlib_frame_t *f;
-
- ASSERT (n_vectors <= VLIB_FRAME_SIZE);
+ if (copy_frame)
+ clib_memcpy_fast (tf, copy_frame, sizeof (vnet_hw_if_tx_frame_t));
+}
- if (hi->output_node_thread_runtimes)
- r = vec_elt_at_index (hi->output_node_thread_runtimes, vm->thread_index);
+static_always_inline u32
+enqueue_one_to_tx_node (vlib_main_t *vm, vlib_node_runtime_t *node, u32 *ppqi,
+ u32 *from, vnet_hw_if_tx_frame_t *copy_frame,
+ u32 n_vectors, u32 n_left, u32 next_index)
+{
+ u32 tmp[VLIB_FRAME_SIZE];
+ vlib_frame_bitmap_t mask = {};
+ vlib_frame_t *f;
+ vnet_hw_if_tx_frame_t *tf;
+ u32 *to;
+ u32 n_copy = 0, n_free = 0;
f = vlib_get_next_frame_internal (vm, node, next_index, 0);
tf = vlib_frame_scalar_args (f);
- if (f->n_vectors > 0 && (r == 0 || tf->queue_id == r->frame.queue_id))
+ if (f->n_vectors > 0 &&
+ (!copy_frame || (tf->queue_id == copy_frame->queue_id)))
{
/* append current next frame */
n_free = VLIB_FRAME_SIZE - f->n_vectors;
- n_copy = clib_min (n_vectors, n_free);
- n_vectors -= n_copy;
- to = vlib_frame_vector_args (f);
- to += f->n_vectors;
+ /*
+ * if frame contains enough space for worst case scenario,
+ * we can avoid use of tmp
+ */
+ if (n_free >= n_left)
+ to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
+ else
+ to = tmp;
}
else
{
@@ -368,25 +439,113 @@ enqueu_to_tx_node (vlib_main_t *vm, vlib_node_runtime_t *node,
}
/* empty frame - store scalar data */
- store_tx_frame_scalar_data (r, tf);
+ store_tx_frame_scalar_data (copy_frame, tf);
to = vlib_frame_vector_args (f);
n_free = VLIB_FRAME_SIZE;
- n_copy = n_vectors;
- n_vectors = 0;
}
- vlib_buffer_copy_indices (to, from, n_copy);
- vlib_put_next_frame (vm, node, next_index, n_free - n_copy);
+ /*
+ * per packet queue id array
+ * compare with given queue_id, if match, copy respective buffer index from
+ * -> to
+ */
+ if (ppqi)
+ {
+ clib_mask_compare_u32 (copy_frame->queue_id, ppqi, mask, n_vectors);
+ n_copy = clib_compress_u32 (to, from, mask, n_vectors);
- if (n_vectors == 0)
- return;
+ if (n_copy == 0)
+ return n_left;
+ }
+ else
+ {
+ /*
+ * no work required, just copy all buffer indices from -> to
+ */
+ n_copy = n_left;
+ vlib_buffer_copy_indices (to, from, n_copy);
+ }
- /* we have more indices to store, take empty frame */
- from += n_copy;
- f = vlib_get_next_frame_internal (vm, node, next_index, 1);
- store_tx_frame_scalar_data (r, vlib_frame_scalar_args (f));
- vlib_buffer_copy_indices (vlib_frame_vector_args (f), from, n_vectors);
- vlib_put_next_frame (vm, node, next_index, VLIB_FRAME_SIZE - n_vectors);
+ if (to != tmp)
+ {
+ /* indices already written to frame, just close it */
+ vlib_put_next_frame (vm, node, next_index, n_free - n_copy);
+ }
+ else if (n_free >= n_copy)
+ {
+ /* enough space in the existing frame */
+ to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
+ vlib_buffer_copy_indices (to, tmp, n_copy);
+ vlib_put_next_frame (vm, node, next_index, n_free - n_copy);
+ }
+ else
+ {
+ /* full frame */
+ to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
+ vlib_buffer_copy_indices (to, tmp, n_free);
+ vlib_put_next_frame (vm, node, next_index, 0);
+
+ /* second frame */
+ u32 n_2nd_frame = n_copy - n_free;
+ f = vlib_get_next_frame_internal (vm, node, next_index, 1);
+ tf = vlib_frame_scalar_args (f);
+ /* empty frame - store scalar data */
+ store_tx_frame_scalar_data (copy_frame, tf);
+ to = vlib_frame_vector_args (f);
+ vlib_buffer_copy_indices (to, tmp + n_free, n_2nd_frame);
+ vlib_put_next_frame (vm, node, next_index,
+ VLIB_FRAME_SIZE - n_2nd_frame);
+ }
+
+ return n_left - n_copy;
+}
+
+static_always_inline void
+enqueue_to_tx_node (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vnet_hw_interface_t *hi, u32 next_index,
+ vnet_hw_if_output_node_runtime_t *r, u32 *from, void **p,
+ u32 n_vectors)
+{
+ u32 n_left = n_vectors;
+
+ ASSERT (n_vectors <= VLIB_FRAME_SIZE);
+
+ /*
+ * backward compatible for drivers not integrated with new tx infra.
+ */
+ if (r == 0)
+ {
+ n_left = enqueue_one_to_tx_node (vm, node, NULL, from, NULL, n_vectors,
+ n_left, next_index);
+ }
+ /*
+ * only 1 tx queue of given interface is available on given thread
+ */
+ else if (r->n_queues == 1)
+ {
+ n_left = enqueue_one_to_tx_node (vm, node, NULL, from, r->frame,
+ n_vectors, n_left, next_index);
+ }
+ /*
+ * multi tx-queues use case
+ */
+ else if (r->n_queues > 1)
+ {
+ u32 qids[VLIB_FRAME_SIZE];
+
+ hash_func_with_mask (p, qids, n_vectors, r->lookup_table,
+ vec_len (r->lookup_table) - 1, hi->hf);
+
+ for (u32 i = 0; i < r->n_queues; i++)
+ {
+ n_left = enqueue_one_to_tx_node (vm, node, qids, from, &r->frame[i],
+ n_vectors, n_left, next_index);
+ if (n_left == 0)
+ break;
+ }
+ }
+ else
+ ASSERT (0);
}
VLIB_NODE_FN (vnet_interface_output_node)
@@ -398,6 +557,7 @@ VLIB_NODE_FN (vnet_interface_output_node)
vnet_hw_interface_t *hi;
vnet_sw_interface_t *si;
vnet_interface_output_runtime_t *rt = (void *) node->runtime_data;
+ vnet_hw_if_output_node_runtime_t *r = 0;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
u32 n_bytes, n_buffers = frame->n_vectors;
u32 config_index = ~0;
@@ -407,6 +567,8 @@ VLIB_NODE_FN (vnet_interface_output_node)
u8 arc = im->output_feature_arc_index;
int arc_or_subif = 0;
int do_tx_offloads = 0;
+ void *ptr[VLIB_FRAME_SIZE], **p = ptr;
+ u8 is_parr = 0;
u32 *from;
if (node->flags & VLIB_NODE_FLAG_TRACE)
@@ -420,8 +582,7 @@ VLIB_NODE_FN (vnet_interface_output_node)
/* buffer stride */ 1, n_buffers, VNET_INTERFACE_OUTPUT_NEXT_DROP,
node->node_index, VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DELETED);
- vnet_interface_pcap_tx_trace (vm, node, frame,
- 0 /* sw_if_index_from_buffer */ );
+ vnet_interface_pcap_tx_trace (vm, node, frame, 0 /* in_interface_ouput */);
vlib_get_buffers (vm, from, bufs, n_buffers);
@@ -443,6 +604,27 @@ VLIB_NODE_FN (vnet_interface_output_node)
node->node_index, VNET_INTERFACE_OUTPUT_ERROR_INTERFACE_DOWN);
}
+ if (hi->output_node_thread_runtimes)
+ r = vec_elt_at_index (hi->output_node_thread_runtimes, vm->thread_index);
+
+ if (r)
+ {
+ /*
+ * tx queue of given interface is not available on given thread
+ */
+ if (r->n_queues == 0)
+ return vlib_error_drop_buffers (
+ vm, node, from,
+ /* buffer stride */ 1, n_buffers, VNET_INTERFACE_OUTPUT_NEXT_DROP,
+ node->node_index, VNET_INTERFACE_OUTPUT_ERROR_NO_TX_QUEUE);
+ /*
+ * multiple tx queues available on given thread
+ */
+ else if (r->n_queues > 1)
+ /* construct array of pointer */
+ is_parr = 1;
+ }
+
/* interface-output feature arc handling */
if (PREDICT_FALSE (vnet_have_features (arc, sw_if_index)))
{
@@ -457,26 +639,33 @@ VLIB_NODE_FN (vnet_interface_output_node)
ccm = im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX;
- if ((hi->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TX_CKSUM) == 0)
+ /* if not all three flags IP4_,TCP_,UDP_CKSUM set, do compute them
+ * here before sending to the interface */
+ if ((hi->caps & VNET_HW_IF_CAP_TX_CKSUM) != VNET_HW_IF_CAP_TX_CKSUM)
do_tx_offloads = 1;
- if (do_tx_offloads == 0 && arc_or_subif == 0)
+ // basic processing
+ if (do_tx_offloads == 0 && arc_or_subif == 0 && is_parr == 0)
n_bytes = vnet_interface_output_node_inline (
- vm, sw_if_index, ccm, bufs, config_index, arc, n_buffers, 0, 0);
- else if (do_tx_offloads == 0 && arc_or_subif == 1)
+ vm, sw_if_index, ccm, bufs, NULL, config_index, arc, n_buffers, 0);
+ // basic processing + tx offloads
+ else if (do_tx_offloads == 1 && arc_or_subif == 0 && is_parr == 0)
n_bytes = vnet_interface_output_node_inline (
- vm, sw_if_index, ccm, bufs, config_index, arc, n_buffers, 0, 1);
- else if (do_tx_offloads == 1 && arc_or_subif == 0)
+ vm, sw_if_index, ccm, bufs, NULL, config_index, arc, n_buffers, 1);
+ // basic processing + tx offloads + vlans + arcs
+ else if (do_tx_offloads == 1 && arc_or_subif == 1 && is_parr == 0)
n_bytes = vnet_interface_output_node_inline (
- vm, sw_if_index, ccm, bufs, config_index, arc, n_buffers, 1, 0);
+ vm, sw_if_index, ccm, bufs, NULL, config_index, arc, n_buffers, 2);
+ // basic processing + tx offloads + vlans + arcs + multi-txqs
else
n_bytes = vnet_interface_output_node_inline (
- vm, sw_if_index, ccm, bufs, config_index, arc, n_buffers, 1, 1);
+ vm, sw_if_index, ccm, bufs, p, config_index, arc, n_buffers, 3);
from = vlib_frame_vector_args (frame);
if (PREDICT_TRUE (next_index == VNET_INTERFACE_OUTPUT_NEXT_TX))
{
- enqueu_to_tx_node (vm, node, hi, from, frame->n_vectors);
+ enqueue_to_tx_node (vm, node, hi, next_index, r, from, ptr,
+ frame->n_vectors);
}
else
{
@@ -504,8 +693,7 @@ VLIB_NODE_FN (vnet_per_buffer_interface_output_node) (vlib_main_t * vm,
u32 n_left_to_next, *from, *to_next;
u32 n_left_from, next_index;
- vnet_interface_pcap_tx_trace (vm, node, frame,
- 1 /* sw_if_index_from_buffer */ );
+ vnet_interface_pcap_tx_trace (vm, node, frame, 1 /* in_interface_ouput */);
n_left_from = frame->n_vectors;
@@ -1033,7 +1221,6 @@ VLIB_NODE_FN (interface_punt) (vlib_main_t * vm,
return interface_drop_punt (vm, node, frame, VNET_ERROR_DISPOSITION_PUNT);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (interface_drop) = {
.name = "error-drop",
.vector_size = sizeof (u32),
@@ -1044,9 +1231,7 @@ VLIB_REGISTER_NODE (interface_drop) = {
[0] = "drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (interface_punt) = {
.name = "error-punt",
.vector_size = sizeof (u32),
@@ -1057,7 +1242,6 @@ VLIB_REGISTER_NODE (interface_punt) = {
[0] = "punt",
},
};
-/* *INDENT-ON* */
VLIB_REGISTER_NODE (vnet_per_buffer_interface_output_node) = {
.name = "interface-output",
@@ -1069,16 +1253,13 @@ VLIB_NODE_FN (vnet_interface_output_arc_end_node)
{
vnet_main_t *vnm = vnet_get_main ();
vnet_interface_main_t *im = &vnm->interface_main;
- vnet_hw_if_output_node_runtime_t *r = 0;
vnet_hw_interface_t *hi;
- vnet_hw_if_tx_frame_t *tf;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
u32 sw_if_indices[VLIB_FRAME_SIZE], *sw_if_index = sw_if_indices;
- u64 used_elts[VLIB_FRAME_SIZE / 64] = {};
- u64 mask[VLIB_FRAME_SIZE / 64] = {};
- u32 *tmp, *from, n_left, n_free, n_comp, *to, swif, off;
+ vlib_frame_bitmap_t used_elts = {}, mask = {};
+ u32 *tmp, *from, n_left, n_comp, n_p_comp, swif, off;
u16 next_index;
- vlib_frame_t *f;
+ void *ptr[VLIB_FRAME_SIZE], **p = ptr;
from = vlib_frame_vector_args (frame);
n_left = frame->n_vectors;
@@ -1090,11 +1271,17 @@ VLIB_NODE_FN (vnet_interface_output_arc_end_node)
vlib_prefetch_buffer_header (b[5], LOAD);
vlib_prefetch_buffer_header (b[6], LOAD);
vlib_prefetch_buffer_header (b[7], LOAD);
+
+ p[0] = vlib_buffer_get_current (b[0]);
+ p[1] = vlib_buffer_get_current (b[1]);
+ p[2] = vlib_buffer_get_current (b[2]);
+ p[3] = vlib_buffer_get_current (b[3]);
sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
sw_if_index[1] = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
sw_if_index[2] = vnet_buffer (b[2])->sw_if_index[VLIB_TX];
sw_if_index[3] = vnet_buffer (b[3])->sw_if_index[VLIB_TX];
+ p += 4;
b += 4;
sw_if_index += 4;
n_left -= 4;
@@ -1102,7 +1289,9 @@ VLIB_NODE_FN (vnet_interface_output_arc_end_node)
while (n_left)
{
+ p[0] = vlib_buffer_get_current (b[0]);
sw_if_index[0] = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
+ p++;
b++;
sw_if_index++;
n_left--;
@@ -1119,74 +1308,45 @@ VLIB_NODE_FN (vnet_interface_output_arc_end_node)
more:
next_index = vec_elt (im->if_out_arc_end_next_index_by_sw_if_index, swif);
hi = vnet_get_sup_hw_interface (vnm, swif);
+ vnet_hw_if_output_node_runtime_t *r = 0;
+ void *ptr_tmp[VLIB_FRAME_SIZE], **p_tmp = ptr_tmp;
+
if (hi->output_node_thread_runtimes)
r = vec_elt_at_index (hi->output_node_thread_runtimes, vm->thread_index);
- f = vlib_get_next_frame_internal (vm, node, next_index, 0);
- tf = vlib_frame_scalar_args (f);
-
- if (f->n_vectors > 0 && (r == 0 || r->frame.queue_id == tf->queue_id))
- {
- /* append frame */
- n_free = VLIB_FRAME_SIZE - f->n_vectors;
- if (n_free >= f->n_vectors)
- to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
- else
- to = tmp;
- }
- else
- {
- if (f->n_vectors > 0)
- {
- /* current frame doesn't fit - grab empty one */
- f = vlib_get_next_frame_internal (vm, node, next_index, 1);
- tf = vlib_frame_scalar_args (f);
- }
-
- /* empty frame - store scalar data */
- store_tx_frame_scalar_data (r, tf);
- n_free = VLIB_FRAME_SIZE;
- to = vlib_frame_vector_args (f);
- }
/* compare and compress based on comparison mask */
clib_mask_compare_u32 (swif, sw_if_indices, mask, frame->n_vectors);
- n_comp = clib_compress_u32 (to, from, mask, frame->n_vectors);
+ n_comp = clib_compress_u32 (tmp, from, mask, frame->n_vectors);
- if (tmp != to)
- {
- /* indices already written to frame, just close it */
- vlib_put_next_frame (vm, node, next_index, n_free - n_comp);
- }
- else if (n_free >= n_comp)
+ /*
+ * tx queue of given interface is not available on given thread
+ */
+ if (r)
{
- /* enough space in the existing frame */
- to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
- vlib_buffer_copy_indices (to, tmp, n_comp);
- vlib_put_next_frame (vm, node, next_index, n_free - n_comp);
+ if (r->n_queues == 0)
+ {
+ vlib_error_drop_buffers (
+ vm, node, tmp,
+ /* buffer stride */ 1, n_comp, VNET_INTERFACE_OUTPUT_NEXT_DROP,
+ node->node_index, VNET_INTERFACE_OUTPUT_ERROR_NO_TX_QUEUE);
+ goto drop;
+ }
+ else if (r->n_queues > 1)
+ {
+ n_p_comp = clib_compress_u64 ((u64 *) p_tmp, (u64 *) ptr, mask,
+ frame->n_vectors);
+ ASSERT (n_p_comp == n_comp);
+ }
}
- else
- {
- /* full frame */
- to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors;
- vlib_buffer_copy_indices (to, tmp, n_free);
- vlib_put_next_frame (vm, node, next_index, 0);
- /* second frame */
- u32 n_frame2 = n_comp - n_free;
- f = vlib_get_next_frame_internal (vm, node, next_index, 1);
- to = vlib_frame_vector_args (f);
- vlib_buffer_copy_indices (to, tmp + n_free, n_frame2);
- tf = vlib_frame_scalar_args (f);
- store_tx_frame_scalar_data (r, tf);
- vlib_put_next_frame (vm, node, next_index, VLIB_FRAME_SIZE - n_frame2);
- }
+ enqueue_to_tx_node (vm, node, hi, next_index, r, tmp, ptr_tmp, n_comp);
+drop:
n_left -= n_comp;
if (n_left)
{
/* store comparison mask so we can find next unused element */
- for (int i = 0; i < ARRAY_LEN (used_elts); i++)
- used_elts[i] |= mask[i];
+ vlib_frame_bitmap_or (used_elts, mask);
/* fine first unused sw_if_index by scanning trough used_elts bitmap */
while (PREDICT_FALSE (used_elts[off] == ~0))
diff --git a/src/vnet/interface_output.h b/src/vnet/interface_output.h
index 15b0a1d3ccc..b512d9a04a8 100644
--- a/src/vnet/interface_output.h
+++ b/src/vnet/interface_output.h
@@ -41,6 +41,7 @@
#define __INTERFACE_INLINES_H__
#include <vnet/vnet.h>
+#include <vnet/tcp/tcp_packet.h>
static_always_inline void
vnet_calc_ip4_checksums (vlib_main_t *vm, vlib_buffer_t *b, ip4_header_t *ip4,
@@ -114,6 +115,36 @@ vnet_calc_checksums_inline (vlib_main_t * vm, vlib_buffer_t * b,
VNET_BUFFER_OFFLOAD_F_TCP_CKSUM));
}
+static_always_inline void
+vnet_calc_outer_checksums_inline (vlib_main_t *vm, vlib_buffer_t *b)
+{
+
+ if (!(b->flags & VNET_BUFFER_F_OFFLOAD))
+ return;
+
+ vnet_buffer_oflags_t oflags = vnet_buffer (b)->oflags;
+ if (oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM)
+ {
+ ip4_header_t *ip4;
+ ip4 = (ip4_header_t *) (b->data + vnet_buffer2 (b)->outer_l3_hdr_offset);
+ ip4->checksum = ip4_header_checksum (ip4);
+ vnet_buffer_offload_flags_clear (b,
+ VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM);
+ }
+ else if (oflags & VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM)
+ {
+ int bogus;
+ ip6_header_t *ip6;
+ udp_header_t *uh;
+
+ ip6 = (ip6_header_t *) (b->data + vnet_buffer2 (b)->outer_l3_hdr_offset);
+ uh = (udp_header_t *) (b->data + vnet_buffer2 (b)->outer_l4_hdr_offset);
+ uh->checksum = 0;
+ uh->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6, &bogus);
+ vnet_buffer_offload_flags_clear (b,
+ VNET_BUFFER_OFFLOAD_F_OUTER_UDP_CKSUM);
+ }
+}
#endif
/*
diff --git a/src/vnet/interface_stats.c b/src/vnet/interface_stats.c
index 3afde0ea54f..ff1a2af9130 100644
--- a/src/vnet/interface_stats.c
+++ b/src/vnet/interface_stats.c
@@ -170,7 +170,6 @@ VLIB_NODE_FN (stats_collect_tx_node) (vlib_main_t * vm,
return stats_collect_inline (vm, node, frame, VLIB_TX);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (stats_collect_rx_node) = {
.vector_size = sizeof (u32),
.format_trace = format_stats_collect_trace,
@@ -201,7 +200,6 @@ VNET_FEATURE_INIT (stats_collect_tx_node, static) = {
.runs_before = VNET_FEATURES ("interface-output-arc-end"),
};
-/* *INDENT-ON* */
static clib_error_t *
stats_collect_init (vlib_main_t * vm)
diff --git a/src/vnet/interface_test.c b/src/vnet/interface_test.c
new file mode 100644
index 00000000000..2d0c0ee81d1
--- /dev/null
+++ b/src/vnet/interface_test.c
@@ -0,0 +1,1316 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vppinfra/error.h>
+#include <vpp/api/types.h>
+
+#include <vnet/ip/ip_types_api.h>
+
+#define __plugin_msg_base interface_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/* Declare message IDs */
+#include <vnet/format_fns.h>
+#include <vnet/interface.api_enum.h>
+#include <vnet/interface.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
+#include <vlibmemory/memclnt.api_enum.h>
+
+#define vl_endianfun /* define message structures */
+#include <vnet/interface.api.h>
+#undef vl_endianfun
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} interface_test_main_t;
+
+static interface_test_main_t interface_test_main;
+
+static int
+api_sw_interface_set_flags (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_flags_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 admin_up = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "admin-up"))
+ admin_up = 1;
+ else if (unformat (i, "admin-down"))
+ admin_up = 0;
+ else if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_SET_FLAGS, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->flags = ntohl ((admin_up) ? IF_STATUS_API_FLAG_ADMIN_UP : 0);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply, return the good/bad news... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_hw_interface_set_mtu (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_hw_interface_set_mtu_t *mp;
+ u32 sw_if_index = ~0;
+ u32 mtu = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "mtu %d", &mtu))
+ ;
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ if (mtu == 0)
+ {
+ errmsg ("no mtu specified");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (HW_INTERFACE_SET_MTU, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->mtu = ntohs ((u16) mtu);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_tag_add_del (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_tag_add_del_t *mp;
+ u32 sw_if_index = ~0;
+ u8 *tag = 0;
+ u8 enable = 1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "tag %s", &tag))
+ ;
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "del"))
+ enable = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ if (enable && (tag == 0))
+ {
+ errmsg ("no tag specified");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_TAG_ADD_DEL, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_add = enable;
+ if (enable)
+ strncpy ((char *) mp->tag, (char *) tag, ARRAY_LEN (mp->tag) - 1);
+ vec_free (tag);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_add_del_mac_address (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_mac_address_t mac = { 0 };
+ vl_api_sw_interface_add_del_mac_address_t *mp;
+ u32 sw_if_index = ~0;
+ u8 is_add = 1;
+ u8 mac_set = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "%U", unformat_vl_api_mac_address, &mac))
+ mac_set++;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ if (!mac_set)
+ {
+ errmsg ("missing MAC address");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_ADD_DEL_MAC_ADDRESS, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_add = is_add;
+ clib_memcpy (&mp->addr, &mac, sizeof (mac));
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_sw_interface_details_t_handler (vl_api_sw_interface_details_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ u8 *s = format (0, "%s%c", mp->interface_name, 0);
+
+ hash_set_mem (vam->sw_if_index_by_interface_name, s,
+ ntohl (mp->sw_if_index));
+
+ /* In sub interface case, fill the sub interface table entry */
+ if (mp->sw_if_index != mp->sup_sw_if_index)
+ {
+ sw_interface_subif_t *sub = NULL;
+
+ vec_add2 (vam->sw_if_subif_table, sub, 1);
+
+ vec_validate (sub->interface_name, strlen ((char *) s) + 1);
+ strncpy ((char *) sub->interface_name, (char *) s,
+ vec_len (sub->interface_name));
+ sub->sw_if_index = ntohl (mp->sw_if_index);
+ sub->sub_id = ntohl (mp->sub_id);
+
+ sub->raw_flags = ntohl (mp->sub_if_flags & SUB_IF_API_FLAG_MASK_VNET);
+
+ sub->sub_number_of_tags = mp->sub_number_of_tags;
+ sub->sub_outer_vlan_id = ntohs (mp->sub_outer_vlan_id);
+ sub->sub_inner_vlan_id = ntohs (mp->sub_inner_vlan_id);
+
+ /* vlan tag rewrite */
+ sub->vtr_op = ntohl (mp->vtr_op);
+ sub->vtr_push_dot1q = ntohl (mp->vtr_push_dot1q);
+ sub->vtr_tag1 = ntohl (mp->vtr_tag1);
+ sub->vtr_tag2 = ntohl (mp->vtr_tag2);
+ }
+}
+
+static int
+api_sw_interface_get_mac_address (vat_main_t *vat)
+{
+ return -1;
+}
+
+static void
+vl_api_sw_interface_get_mac_address_reply_t_handler (
+ vl_api_sw_interface_get_mac_address_reply_t *mp)
+{
+}
+
+static int
+api_sw_interface_add_del_address (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_add_del_address_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 is_add = 1, del_all = 0;
+ u32 address_length = 0;
+ u8 v4_address_set = 0;
+ u8 v6_address_set = 0;
+ ip4_address_t v4address;
+ ip6_address_t v6address;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del-all"))
+ del_all = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "%U/%d", unformat_ip4_address, &v4address,
+ &address_length))
+ v4_address_set = 1;
+ else if (unformat (i, "%U/%d", unformat_ip6_address, &v6address,
+ &address_length))
+ v6_address_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+ if (v4_address_set && v6_address_set)
+ {
+ errmsg ("both v4 and v6 addresses set");
+ return -99;
+ }
+ if (!v4_address_set && !v6_address_set && !del_all)
+ {
+ errmsg ("no addresses set");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_ADD_DEL_ADDRESS, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_add = is_add;
+ mp->del_all = del_all;
+ if (v6_address_set)
+ {
+ mp->prefix.address.af = ADDRESS_IP6;
+ clib_memcpy (mp->prefix.address.un.ip6, &v6address, sizeof (v6address));
+ }
+ else
+ {
+ mp->prefix.address.af = ADDRESS_IP4;
+ clib_memcpy (mp->prefix.address.un.ip4, &v4address, sizeof (v4address));
+ }
+ mp->prefix.len = address_length;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply, return good/bad news */
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_get_table (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_get_table_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 is_ipv6 = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (SW_INTERFACE_GET_TABLE, mp);
+ mp->sw_if_index = htonl (sw_if_index);
+ mp->is_ipv6 = is_ipv6;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_set_rx_mode (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_rx_mode_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ int ret;
+ u8 queue_id_valid = 0;
+ u32 queue_id;
+ vnet_hw_if_rx_mode mode = VNET_HW_IF_RX_MODE_UNKNOWN;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "queue %d", &queue_id))
+ queue_id_valid = 1;
+ else if (unformat (i, "polling"))
+ mode = VNET_HW_IF_RX_MODE_POLLING;
+ else if (unformat (i, "interrupt"))
+ mode = VNET_HW_IF_RX_MODE_INTERRUPT;
+ else if (unformat (i, "adaptive"))
+ mode = VNET_HW_IF_RX_MODE_ADAPTIVE;
+ else if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+ if (mode == VNET_HW_IF_RX_MODE_UNKNOWN)
+ {
+ errmsg ("missing rx-mode");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_SET_RX_MODE, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->mode = (vl_api_rx_mode_t) mode;
+ mp->queue_id_valid = queue_id_valid;
+ mp->queue_id = queue_id_valid ? ntohl (queue_id) : ~0;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply, return the good/bad news... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_set_unnumbered (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_unnumbered_t *mp;
+ u32 sw_if_index;
+ u32 unnum_sw_index = ~0;
+ u8 is_add = 1;
+ u8 sw_if_index_set = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "unnum_if_index %d", &unnum_sw_index))
+ ;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (SW_INTERFACE_SET_UNNUMBERED, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->unnumbered_sw_if_index = ntohl (unnum_sw_index);
+ mp->is_add = is_add;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_sw_interface_get_table_reply_t_handler (
+ vl_api_sw_interface_get_table_reply_t *mp)
+{
+ vat_main_t *vam = interface_test_main.vat_main;
+
+ fformat (vam->ofp, "%d", ntohl (mp->vrf_id));
+
+ vam->retval = ntohl (mp->retval);
+ vam->result_ready = 1;
+}
+
+static int
+api_sw_interface_address_replace_begin (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sw_interface_set_mac_address (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sw_interface_set_rx_placement (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_rx_placement_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ int ret;
+ u8 is_main = 0;
+ u32 queue_id, thread_index;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "queue %d", &queue_id))
+ ;
+ else if (unformat (i, "main"))
+ is_main = 1;
+ else if (unformat (i, "worker %d", &thread_index))
+ ;
+ else if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ if (is_main)
+ thread_index = 0;
+ /* Construct the API message */
+ M (SW_INTERFACE_SET_RX_PLACEMENT, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->worker_id = ntohl (thread_index);
+ mp->queue_id = ntohl (queue_id);
+ mp->is_main = is_main;
+
+ /* send it... */
+ S (mp);
+ /* Wait for a reply, return the good/bad news... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_set_tx_placement (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_tx_placement_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ int ret;
+ uword *bitmap = 0;
+ u32 queue_id, n_bits = 0;
+ u32 v;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "queue %d", &queue_id))
+ ;
+ else if (unformat (i, "threads %U", unformat_bitmap_list, &bitmap))
+ ;
+ else if (unformat (i, "mask %U", unformat_bitmap_mask, &bitmap))
+ ;
+ else if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ n_bits = clib_bitmap_count_set_bits (bitmap);
+ /* Construct the API message */
+ M2 (SW_INTERFACE_SET_TX_PLACEMENT, mp, sizeof (u32) * n_bits);
+ mp->sw_if_index = htonl (sw_if_index);
+ mp->queue_id = htonl (queue_id);
+ mp->array_size = htonl (n_bits);
+
+ v = clib_bitmap_first_set (bitmap);
+ for (u32 j = 0; j < n_bits; j++)
+ {
+ mp->threads[j] = htonl (v);
+ v = clib_bitmap_next_set (bitmap, v + 1);
+ }
+
+ /* send it... */
+ S (mp);
+ /* Wait for a reply, return the good/bad news... */
+ W (ret);
+ clib_bitmap_free (bitmap);
+ return ret;
+}
+
+static int
+api_interface_name_renumber (vat_main_t *vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_interface_name_renumber_t *mp;
+ u32 sw_if_index = ~0;
+ u32 new_show_dev_instance = ~0;
+ int ret;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U", api_unformat_sw_if_index, vam,
+ &sw_if_index))
+ ;
+ else if (unformat (line_input, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (line_input, "new_show_dev_instance %d",
+ &new_show_dev_instance))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ if (new_show_dev_instance == ~0)
+ {
+ errmsg ("missing new_show_dev_instance");
+ return -99;
+ }
+
+ M (INTERFACE_NAME_RENUMBER, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->new_show_dev_instance = ntohl (new_show_dev_instance);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_delete_subif (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_delete_subif_t *mp;
+ u32 sw_if_index = ~0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (DELETE_SUBIF, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_delete_loopback (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_delete_loopback_t *mp;
+ u32 sw_if_index = ~0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (DELETE_LOOPBACK, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_create_loopback_instance (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_create_loopback (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_create_loopback_t *mp;
+ vl_api_create_loopback_instance_t *mp_lbi;
+ u8 mac_address[6];
+ u8 mac_set = 0;
+ u8 is_specified = 0;
+ u32 user_instance = 0;
+ int ret;
+
+ clib_memset (mac_address, 0, sizeof (mac_address));
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "mac %U", unformat_ethernet_address, mac_address))
+ mac_set = 1;
+ if (unformat (i, "instance %d", &user_instance))
+ is_specified = 1;
+ else
+ break;
+ }
+
+ if (is_specified)
+ {
+ M (CREATE_LOOPBACK_INSTANCE, mp_lbi);
+ mp_lbi->is_specified = is_specified;
+ if (is_specified)
+ mp_lbi->user_instance = htonl (user_instance);
+ if (mac_set)
+ clib_memcpy (mp_lbi->mac_address, mac_address, sizeof (mac_address));
+ S (mp_lbi);
+ }
+ else
+ {
+ /* Construct the API message */
+ M (CREATE_LOOPBACK, mp);
+ if (mac_set)
+ clib_memcpy (mp->mac_address, mac_address, sizeof (mac_address));
+ S (mp);
+ }
+
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_create_subif_reply_t_handler (vl_api_create_subif_reply_t *mp)
+{
+ vat_main_t *vam = interface_test_main.vat_main;
+ vam->result_ready = 1;
+}
+
+#define foreach_create_subif_bit \
+ _ (no_tags) \
+ _ (one_tag) \
+ _ (two_tags) \
+ _ (dot1ad) \
+ _ (exact_match) \
+ _ (default_sub) \
+ _ (outer_vlan_id_any) \
+ _ (inner_vlan_id_any)
+
+#define foreach_create_subif_flag \
+ _ (0, "no_tags") \
+ _ (1, "one_tag") \
+ _ (2, "two_tags") \
+ _ (3, "dot1ad") \
+ _ (4, "exact_match") \
+ _ (5, "default_sub") \
+ _ (6, "outer_vlan_id_any") \
+ _ (7, "inner_vlan_id_any")
+
+static int
+api_create_subif (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_create_subif_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u32 sub_id;
+ u8 sub_id_set = 0;
+ u32 __attribute__ ((unused)) no_tags = 0;
+ u32 __attribute__ ((unused)) one_tag = 0;
+ u32 __attribute__ ((unused)) two_tags = 0;
+ u32 __attribute__ ((unused)) dot1ad = 0;
+ u32 __attribute__ ((unused)) exact_match = 0;
+ u32 __attribute__ ((unused)) default_sub = 0;
+ u32 __attribute__ ((unused)) outer_vlan_id_any = 0;
+ u32 __attribute__ ((unused)) inner_vlan_id_any = 0;
+ u32 tmp;
+ u16 outer_vlan_id = 0;
+ u16 inner_vlan_id = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sub_id %d", &sub_id))
+ sub_id_set = 1;
+ else if (unformat (i, "outer_vlan_id %d", &tmp))
+ outer_vlan_id = tmp;
+ else if (unformat (i, "inner_vlan_id %d", &tmp))
+ inner_vlan_id = tmp;
+
+#define _(a) else if (unformat (i, #a)) a = 1;
+ foreach_create_subif_bit
+#undef _
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ if (sub_id_set == 0)
+ {
+ errmsg ("missing sub_id");
+ return -99;
+ }
+ M (CREATE_SUBIF, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->sub_id = ntohl (sub_id);
+
+#define _(a, b) mp->sub_if_flags |= (1 << a);
+ foreach_create_subif_flag;
+#undef _
+
+ mp->outer_vlan_id = ntohs (outer_vlan_id);
+ mp->inner_vlan_id = ntohs (inner_vlan_id);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_sw_interface_rx_placement_details_t_handler (
+ vl_api_sw_interface_rx_placement_details_t *mp)
+{
+ vat_main_t *vam = interface_test_main.vat_main;
+ u32 worker_id = ntohl (mp->worker_id);
+
+ print (vam->ofp, "\n%-11d %-11s %-6d %-5d %-9s", ntohl (mp->sw_if_index),
+ (worker_id == 0) ? "main" : "worker", worker_id, ntohl (mp->queue_id),
+ (mp->mode == 1) ? "polling" :
+ ((mp->mode == 2) ? "interrupt" : "adaptive"));
+}
+
+static __clib_unused void
+vl_api_sw_interface_tx_placement_details_t_handler (
+ vl_api_sw_interface_tx_placement_details_t *mp)
+{
+ vat_main_t *vam = interface_test_main.vat_main;
+ u32 size = ntohl (mp->array_size);
+ uword *bitmap = 0;
+
+ for (u32 i = 0; i < size; i++)
+ {
+ u32 thread_index = ntohl (mp->threads[i]);
+ bitmap = clib_bitmap_set (bitmap, thread_index, 1);
+ }
+
+ print (vam->ofp, "\n%-11d %-6d %-7s %U", ntohl (mp->sw_if_index),
+ ntohl (mp->queue_id), (mp->shared == 1) ? "yes" : "no",
+ format_bitmap_list, bitmap);
+}
+
+static void
+vl_api_create_vlan_subif_reply_t_handler (vl_api_create_vlan_subif_reply_t *mp)
+{
+ vat_main_t *vam = interface_test_main.vat_main;
+ vam->result_ready = 1;
+}
+
+static void
+vl_api_create_loopback_reply_t_handler (vl_api_create_loopback_reply_t *mp)
+{
+ vat_main_t *vam = interface_test_main.vat_main;
+ vam->result_ready = 1;
+}
+
+static void
+vl_api_create_loopback_instance_reply_t_handler (
+ vl_api_create_loopback_instance_reply_t *mp)
+{
+ vat_main_t *vam = interface_test_main.vat_main;
+ vam->result_ready = 1;
+}
+
+static int
+api_create_vlan_subif (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_create_vlan_subif_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u32 vlan_id;
+ u8 vlan_id_set = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "vlan %d", &vlan_id))
+ vlan_id_set = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ if (vlan_id_set == 0)
+ {
+ errmsg ("missing vlan_id");
+ return -99;
+ }
+ M (CREATE_VLAN_SUBIF, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->vlan_id = ntohl (vlan_id);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_collect_detailed_interface_stats (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sw_interface_rx_placement_dump (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_rx_placement_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set++;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set++;
+ else
+ break;
+ }
+
+ fformat (vam->ofp, "\n%-11s %-11s %-6s %-5s %-4s", "sw_if_index",
+ "main/worker", "thread", "queue", "mode");
+
+ /* Dump Interface rx placement */
+ M (SW_INTERFACE_RX_PLACEMENT_DUMP, mp);
+
+ if (sw_if_index_set)
+ mp->sw_if_index = htonl (sw_if_index);
+ else
+ mp->sw_if_index = ~0;
+
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ PING (&interface_test_main, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_tx_placement_get (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_tx_placement_get_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set++;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set++;
+ else
+ break;
+ }
+
+ fformat (vam->ofp, "\n%-11s %-6s %-7s %-11s", "sw_if_index", "queue",
+ "shared", "threads");
+
+ /* Dump Interface tx placement */
+ M (SW_INTERFACE_TX_PLACEMENT_GET, mp);
+
+ if (sw_if_index_set)
+ mp->sw_if_index = htonl (sw_if_index);
+ else
+ mp->sw_if_index = ~0;
+
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ PING (&interface_test_main, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_sw_interface_tx_placement_get_reply_t_handler ()
+{
+}
+
+static int
+api_sw_interface_clear_stats (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_clear_stats_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ break;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_CLEAR_STATS, mp);
+
+ if (sw_if_index_set == 1)
+ mp->sw_if_index = ntohl (sw_if_index);
+ else
+ mp->sw_if_index = ~0;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply, return the good/bad news... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_set_table (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_table_t *mp;
+ u32 sw_if_index, vrf_id = 0;
+ u8 sw_if_index_set = 0;
+ u8 is_ipv6 = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "vrf %d", &vrf_id))
+ ;
+ else if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_SET_TABLE, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_ipv6 = is_ipv6;
+ mp->vrf_id = ntohl (vrf_id);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_address_replace_end (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sw_interface_set_ip_directed_broadcast (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sw_interface_set_mtu (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sw_interface_set_promisc (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_want_interface_events (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_want_interface_events_t *mp;
+ int enable = -1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "enable"))
+ enable = 1;
+ else if (unformat (i, "disable"))
+ enable = 0;
+ else
+ break;
+ }
+
+ if (enable == -1)
+ {
+ errmsg ("missing enable|disable");
+ return -99;
+ }
+
+ M (WANT_INTERFACE_EVENTS, mp);
+ mp->enable_disable = enable;
+
+ vam->interface_event_display = enable;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+typedef struct
+{
+ u8 *name;
+ u32 value;
+} name_sort_t;
+
+int
+api_sw_interface_dump (vat_main_t *vam)
+{
+ vl_api_sw_interface_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ hash_pair_t *p;
+ name_sort_t *nses = 0, *ns;
+ sw_interface_subif_t *sub = NULL;
+ int ret;
+
+ /* Toss the old name table */
+ hash_foreach_pair (p, vam->sw_if_index_by_interface_name, ({
+ vec_add2 (nses, ns, 1);
+ ns->name = (u8 *) (p->key);
+ ns->value = (u32) p->value[0];
+ }));
+
+ hash_free (vam->sw_if_index_by_interface_name);
+
+ vec_foreach (ns, nses)
+ vec_free (ns->name);
+
+ vec_free (nses);
+
+ vec_foreach (sub, vam->sw_if_subif_table)
+ {
+ vec_free (sub->interface_name);
+ }
+ vec_free (vam->sw_if_subif_table);
+
+ /* recreate the interface name hash table */
+ vam->sw_if_index_by_interface_name = hash_create_string (0, sizeof (uword));
+
+ /*
+ * Ask for all interface names. Otherwise, the epic catalog of
+ * name filters becomes ridiculously long, and vat ends up needing
+ * to be taught about new interface types.
+ */
+ M (SW_INTERFACE_DUMP, mp);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ PING (&interface_test_main, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_set_interface_name (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_pcap_set_filter_function (vat_main_t *vam)
+{
+ vl_api_pcap_set_filter_function_t *mp;
+ int ret;
+
+ M (PCAP_SET_FILTER_FUNCTION, mp);
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_pcap_trace_on (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_pcap_trace_off (vat_main_t *vam)
+{
+ return -1;
+}
+
+#include <vnet/interface.api_test.c>
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip-neighbor/ip4_neighbor.c b/src/vnet/ip-neighbor/ip4_neighbor.c
index 9dda50ee911..61b9e768fe5 100644
--- a/src/vnet/ip-neighbor/ip4_neighbor.c
+++ b/src/vnet/ip-neighbor/ip4_neighbor.c
@@ -38,9 +38,11 @@
*/
#include <vnet/ip-neighbor/ip4_neighbor.h>
+#include <vnet/ip-neighbor/ip_neighbor.api_enum.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/util/throttle.h>
#include <vnet/fib/fib_sas.h>
+#include <vnet/ip/ip_sas.h>
/** ARP throttling */
static throttle_t arp_throttle;
@@ -54,7 +56,8 @@ VLIB_REGISTER_LOG_CLASS (ip4_neighbor_log, static) = {
vlib_log_debug (ip4_neighbor_log.class, fmt, __VA_ARGS__)
void
-ip4_neighbor_probe_dst (u32 sw_if_index, const ip4_address_t * dst)
+ip4_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
+ const ip4_address_t *dst)
{
ip4_address_t src;
adj_index_t ai;
@@ -62,15 +65,16 @@ ip4_neighbor_probe_dst (u32 sw_if_index, const ip4_address_t * dst)
/* any glean will do, it's just for the rewrite */
ai = adj_glean_get (FIB_PROTOCOL_IP4, sw_if_index, NULL);
- if (ADJ_INDEX_INVALID != ai && fib_sas4_get (sw_if_index, dst, &src))
+ if (ADJ_INDEX_INVALID != ai &&
+ (fib_sas4_get (sw_if_index, dst, &src) ||
+ ip4_sas_by_sw_if_index (sw_if_index, dst, &src)))
ip4_neighbor_probe (vlib_get_main (),
vnet_get_main (), adj_get (ai), &src, dst);
}
void
-ip4_neighbor_advertise (vlib_main_t * vm,
- vnet_main_t * vnm,
- u32 sw_if_index, const ip4_address_t * addr)
+ip4_neighbor_advertise (vlib_main_t *vm, vnet_main_t *vnm, u32 sw_if_index,
+ u32 thread_index, const ip4_address_t *addr)
{
vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
ip4_main_t *i4m = &ip4_main;
@@ -79,7 +83,8 @@ ip4_neighbor_advertise (vlib_main_t * vm,
if (NULL == addr)
{
- if (fib_sas4_get (sw_if_index, NULL, &tmp))
+ if (fib_sas4_get (sw_if_index, NULL, &tmp) ||
+ ip4_sas_by_sw_if_index (sw_if_index, NULL, &tmp))
addr = &tmp;
}
@@ -122,6 +127,10 @@ ip4_neighbor_advertise (vlib_main_t * vm,
to_next[0] = bi;
f->n_vectors = 1;
vlib_put_frame_to_node (vm, hi->output_node_index, f);
+
+ vlib_increment_simple_counter (
+ &ip_neighbor_counters[AF_IP4].ipnc[VLIB_TX][IP_NEIGHBOR_CTR_GRAT],
+ thread_index, sw_if_index, 1);
}
}
@@ -178,17 +187,23 @@ ip4_arp_inline (vlib_main_t * vm,
/* resolve the packet's destination */
ip4_header_t *ip0 = vlib_buffer_get_current (p0);
resolve0 = ip0->dst_address;
- src0 = adj0->sub_type.glean.rx_pfx.fp_addr.ip4;
}
else
+ /* resolve the incomplete adj */
+ resolve0 = adj0->sub_type.nbr.next_hop.ip4;
+
+ if (is_glean && adj0->sub_type.glean.rx_pfx.fp_len)
+ /* the glean is for a connected, local prefix */
+ src0 = adj0->sub_type.glean.rx_pfx.fp_addr.ip4;
+ else
{
- /* resolve the incomplete adj */
- resolve0 = adj0->sub_type.nbr.next_hop.ip4;
/* Src IP address in ARP header. */
- if (!fib_sas4_get (sw_if_index0, &resolve0, &src0))
+ if (!fib_sas4_get (sw_if_index0, &resolve0, &src0) &&
+ !ip4_sas_by_sw_if_index (sw_if_index0, &resolve0, &src0))
{
/* No source address available */
- p0->error = node->errors[IP4_ARP_ERROR_NO_SOURCE_ADDRESS];
+ p0->error =
+ node->errors[IP4_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS];
continue;
}
}
@@ -199,7 +214,7 @@ ip4_arp_inline (vlib_main_t * vm,
if (throttle_check (&arp_throttle, thread_index, r0, seed))
{
- p0->error = node->errors[IP4_ARP_ERROR_THROTTLED];
+ p0->error = node->errors[IP4_NEIGHBOR_ERROR_THROTTLED];
continue;
}
@@ -209,7 +224,7 @@ ip4_arp_inline (vlib_main_t * vm,
*/
if (IP_LOOKUP_NEXT_REWRITE == adj0->lookup_next_index)
{
- p0->error = node->errors[IP4_ARP_ERROR_RESOLVED];
+ p0->error = node->errors[IP4_NEIGHBOR_ERROR_RESOLVED];
continue;
}
@@ -220,7 +235,7 @@ ip4_arp_inline (vlib_main_t * vm,
if ((is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_GLEAN)
|| (!is_glean && adj0->lookup_next_index != IP_LOOKUP_NEXT_ARP))
{
- p0->error = node->errors[IP4_ARP_ERROR_NON_ARP_ADJ];
+ p0->error = node->errors[IP4_NEIGHBOR_ERROR_NON_ARP_ADJ];
continue;
}
@@ -232,11 +247,11 @@ ip4_arp_inline (vlib_main_t * vm,
/* copy the persistent fields from the original */
clib_memcpy_fast (b0->opaque2, p0->opaque2,
sizeof (p0->opaque2));
- p0->error = node->errors[IP4_ARP_ERROR_REQUEST_SENT];
+ p0->error = node->errors[IP4_NEIGHBOR_ERROR_REQUEST_SENT];
}
else
{
- p0->error = node->errors[IP4_ARP_ERROR_NO_BUFFERS];
+ p0->error = node->errors[IP4_NEIGHBOR_ERROR_NO_BUFFERS];
continue;
}
}
@@ -259,23 +274,13 @@ VLIB_NODE_FN (ip4_glean_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return (ip4_arp_inline (vm, node, frame, 1));
}
-static char *ip4_arp_error_strings[] = {
- [IP4_ARP_ERROR_THROTTLED] = "ARP requests throttled",
- [IP4_ARP_ERROR_RESOLVED] = "ARP requests resolved",
- [IP4_ARP_ERROR_NO_BUFFERS] = "ARP requests out of buffer",
- [IP4_ARP_ERROR_REQUEST_SENT] = "ARP requests sent",
- [IP4_ARP_ERROR_NON_ARP_ADJ] = "ARPs to non-ARP adjacencies",
- [IP4_ARP_ERROR_NO_SOURCE_ADDRESS] = "no source address for ARP request",
-};
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_arp_node) =
{
.name = "ip4-arp",
.vector_size = sizeof (u32),
.format_trace = format_ip4_forward_next_trace,
- .n_errors = ARRAY_LEN (ip4_arp_error_strings),
- .error_strings = ip4_arp_error_strings,
+ .n_errors = IP4_NEIGHBOR_N_ERROR,
+ .error_counters = ip4_neighbor_error_counters,
.n_next_nodes = IP4_ARP_N_NEXT,
.next_nodes = {
[IP4_ARP_NEXT_DROP] = "ip4-drop",
@@ -287,14 +292,13 @@ VLIB_REGISTER_NODE (ip4_glean_node) =
.name = "ip4-glean",
.vector_size = sizeof (u32),
.format_trace = format_ip4_forward_next_trace,
- .n_errors = ARRAY_LEN (ip4_arp_error_strings),
- .error_strings = ip4_arp_error_strings,
+ .n_errors = IP4_NEIGHBOR_N_ERROR,
+ .error_counters = ip4_neighbor_error_counters,
.n_next_nodes = IP4_ARP_N_NEXT,
.next_nodes = {
[IP4_ARP_NEXT_DROP] = "ip4-drop",
},
};
-/* *INDENT-ON* */
#define foreach_notrace_ip4_arp_error \
_(THROTTLED) \
@@ -310,10 +314,9 @@ arp_notrace_init (vlib_main_t * vm)
vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, ip4_arp_node.index);
/* don't trace ARP request packets */
-#define _(a) \
- vnet_pcap_drop_trace_filter_add_del \
- (rt->errors[IP4_ARP_ERROR_##a], \
- 1 /* is_add */);
+#define _(a) \
+ vnet_pcap_drop_trace_filter_add_del (rt->errors[IP4_NEIGHBOR_ERROR_##a], \
+ 1 /* is_add */);
foreach_notrace_ip4_arp_error;
#undef _
return 0;
@@ -327,7 +330,7 @@ ip4_neighbor_main_loop_enter (vlib_main_t * vm)
vlib_thread_main_t *tm = &vlib_thread_main;
u32 n_vlib_mains = tm->n_vlib_mains;
- throttle_init (&arp_throttle, n_vlib_mains, 1e-3);
+ throttle_init (&arp_throttle, n_vlib_mains, THROTTLE_BITS, 1e-3);
return (NULL);
}
diff --git a/src/vnet/ip-neighbor/ip4_neighbor.h b/src/vnet/ip-neighbor/ip4_neighbor.h
index c330dfa59e7..7941ebdbced 100644
--- a/src/vnet/ip-neighbor/ip4_neighbor.h
+++ b/src/vnet/ip-neighbor/ip4_neighbor.h
@@ -18,19 +18,18 @@
#include <vnet/ip/ip.h>
#include <vnet/ethernet/arp_packet.h>
+#include <vnet/ip-neighbor/ip_neighbor_types.h>
-extern void ip4_neighbor_probe_dst (u32 sw_if_index,
- const ip4_address_t * dst);
-extern void ip4_neighbor_advertise (vlib_main_t * vm,
- vnet_main_t * vnm,
- u32 sw_if_index,
- const ip4_address_t * addr);
+extern void ip4_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
+ const ip4_address_t *dst);
+extern void ip4_neighbor_advertise (vlib_main_t *vm, vnet_main_t *vnm,
+ u32 sw_if_index, u32 thread_index,
+ const ip4_address_t *addr);
always_inline vlib_buffer_t *
-ip4_neighbor_probe (vlib_main_t * vm,
- vnet_main_t * vnm,
- const ip_adjacency_t * adj0,
- const ip4_address_t * src, const ip4_address_t * dst)
+ip4_neighbor_probe (vlib_main_t *vm, vnet_main_t *vnm,
+ const ip_adjacency_t *adj0, const ip4_address_t *src,
+ const ip4_address_t *dst)
{
vnet_hw_interface_t *hw_if0;
ethernet_arp_header_t *h0;
@@ -62,6 +61,7 @@ ip4_neighbor_probe (vlib_main_t * vm,
h0->ip4_over_ethernet[1].ip4 = *dst;
vnet_buffer (b0)->sw_if_index[VLIB_TX] = adj0->rewrite_header.sw_if_index;
+ b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
vlib_buffer_advance (b0, -adj0->rewrite_header.data_bytes);
@@ -73,6 +73,10 @@ ip4_neighbor_probe (vlib_main_t * vm,
vlib_put_frame_to_node (vm, hw_if0->output_node_index, f);
}
+ vlib_increment_simple_counter (
+ &ip_neighbor_counters[AF_IP4].ipnc[VLIB_TX][IP_NEIGHBOR_CTR_REQUEST],
+ vm->thread_index, adj0->rewrite_header.sw_if_index, 1);
+
return b0;
}
diff --git a/src/vnet/ip-neighbor/ip6_neighbor.c b/src/vnet/ip-neighbor/ip6_neighbor.c
index ec323543e32..ca8aed3d4ca 100644
--- a/src/vnet/ip-neighbor/ip6_neighbor.c
+++ b/src/vnet/ip-neighbor/ip6_neighbor.c
@@ -16,8 +16,10 @@
*/
#include <vnet/ip-neighbor/ip6_neighbor.h>
+#include <vnet/ip-neighbor/ip_neighbor.api_enum.h>
#include <vnet/util/throttle.h>
#include <vnet/fib/fib_sas.h>
+#include <vnet/ip/ip_sas.h>
/** ND throttling */
static throttle_t nd_throttle;
@@ -30,19 +32,20 @@ VLIB_REGISTER_LOG_CLASS (ip6_neighbor_log, static) = {
#define log_debug(fmt, ...) \
vlib_log_debug (ip6_neighbor_log.class, fmt, __VA_ARGS__)
void
-ip6_neighbor_probe_dst (u32 sw_if_index, const ip6_address_t * dst)
+ip6_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
+ const ip6_address_t *dst)
{
ip6_address_t src;
- if (fib_sas6_get (sw_if_index, dst, &src))
- ip6_neighbor_probe (vlib_get_main (), vnet_get_main (),
- sw_if_index, &src, dst);
+ if (fib_sas6_get (sw_if_index, dst, &src) ||
+ ip6_sas_by_sw_if_index (sw_if_index, dst, &src))
+ ip6_neighbor_probe (vlib_get_main (), vnet_get_main (), sw_if_index,
+ thread_index, &src, dst);
}
void
-ip6_neighbor_advertise (vlib_main_t * vm,
- vnet_main_t * vnm,
- u32 sw_if_index, const ip6_address_t * addr)
+ip6_neighbor_advertise (vlib_main_t *vm, vnet_main_t *vnm, u32 sw_if_index,
+ u32 thread_index, const ip6_address_t *addr)
{
vnet_hw_interface_t *hi = vnet_get_sup_hw_interface (vnm, sw_if_index);
ip6_main_t *i6m = &ip6_main;
@@ -103,6 +106,10 @@ ip6_neighbor_advertise (vlib_main_t * vm,
to_next[0] = bi;
f->n_vectors = 1;
vlib_put_frame_to_node (vm, hi->output_node_index, f);
+
+ vlib_increment_simple_counter (
+ &ip_neighbor_counters[AF_IP6].ipnc[VLIB_TX][IP_NEIGHBOR_CTR_GRAT],
+ thread_index, sw_if_index, 1);
}
}
@@ -113,14 +120,6 @@ typedef enum
IP6_NBR_N_NEXT,
} ip6_discover_neighbor_next_t;
-typedef enum
-{
- IP6_NBR_ERROR_DROP,
- IP6_NBR_ERROR_REQUEST_SENT,
- IP6_NBR_ERROR_NO_SOURCE_ADDRESS,
- IP6_NBR_ERROR_NO_BUFFERS,
-} ip6_discover_neighbor_error_t;
-
static uword
ip6_discover_neighbor_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -186,6 +185,12 @@ ip6_discover_neighbor_inline (vlib_main_t * vm,
to_next_drop += 1;
n_left_to_next_drop -= 1;
+ if (drop0)
+ {
+ p0->error = node->errors[IP6_NEIGHBOR_ERROR_THROTTLED];
+ continue;
+ }
+
hw_if0 = vnet_get_sup_hw_interface (vnm, sw_if_index0);
/* If the interface is link-down, drop the pkt */
@@ -204,7 +209,7 @@ ip6_discover_neighbor_inline (vlib_main_t * vm,
if (drop0)
{
- p0->error = node->errors[IP6_NBR_ERROR_DROP];
+ p0->error = node->errors[IP6_NEIGHBOR_ERROR_DROP];
continue;
}
@@ -212,15 +217,17 @@ ip6_discover_neighbor_inline (vlib_main_t * vm,
* Choose source address based on destination lookup
* adjacency.
*/
- if (!fib_sas6_get (sw_if_index0, &ip0->dst_address, &src))
+ const ip6_address_t *ll = ip6_get_link_local_address (sw_if_index0);
+ if (!ll)
{
/* There is no address on the interface */
- p0->error = node->errors[IP6_NBR_ERROR_NO_SOURCE_ADDRESS];
+ p0->error = node->errors[IP6_NEIGHBOR_ERROR_NO_SOURCE_ADDRESS];
continue;
}
+ ip6_address_copy (&src, ll);
- b0 = ip6_neighbor_probe (vm, vnm, sw_if_index0,
- &src, &ip0->dst_address);
+ b0 = ip6_neighbor_probe (vm, vnm, sw_if_index0, thread_index, &src,
+ &ip0->dst_address);
if (PREDICT_TRUE (NULL != b0))
{
@@ -228,12 +235,12 @@ ip6_discover_neighbor_inline (vlib_main_t * vm,
sizeof (p0->opaque2));
b0->flags |= p0->flags & VLIB_BUFFER_IS_TRACED;
b0->trace_handle = p0->trace_handle;
- p0->error = node->errors[IP6_NBR_ERROR_REQUEST_SENT];
+ p0->error = node->errors[IP6_NEIGHBOR_ERROR_REQUEST_SENT];
}
else
{
/* There is no address on the interface */
- p0->error = node->errors[IP6_NBR_ERROR_NO_BUFFERS];
+ p0->error = node->errors[IP6_NEIGHBOR_ERROR_NO_BUFFERS];
continue;
}
}
@@ -257,22 +264,14 @@ ip6_glean (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
return (ip6_discover_neighbor_inline (vm, node, frame, 1));
}
-static char *ip6_discover_neighbor_error_strings[] = {
- [IP6_NBR_ERROR_DROP] = "address overflow drops",
- [IP6_NBR_ERROR_REQUEST_SENT] = "neighbor solicitations sent",
- [IP6_NBR_ERROR_NO_SOURCE_ADDRESS] = "no source address for ND solicitation",
- [IP6_NBR_ERROR_NO_BUFFERS] = "no buffers",
-};
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_glean_node) =
{
.function = ip6_glean,
.name = "ip6-glean",
.vector_size = sizeof (u32),
.format_trace = format_ip6_forward_next_trace,
- .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
- .error_strings = ip6_discover_neighbor_error_strings,
+ .n_errors = IP6_NEIGHBOR_N_ERROR,
+ .error_counters = ip6_neighbor_error_counters,
.n_next_nodes = IP6_NBR_N_NEXT,
.next_nodes =
{
@@ -286,8 +285,8 @@ VLIB_REGISTER_NODE (ip6_discover_neighbor_node) =
.name = "ip6-discover-neighbor",
.vector_size = sizeof (u32),
.format_trace = format_ip6_forward_next_trace,
- .n_errors = ARRAY_LEN (ip6_discover_neighbor_error_strings),
- .error_strings = ip6_discover_neighbor_error_strings,
+ .n_errors = IP6_NEIGHBOR_N_ERROR,
+ .error_counters = ip6_neighbor_error_counters,
.n_next_nodes = IP6_NBR_N_NEXT,
.next_nodes =
{
@@ -295,7 +294,6 @@ VLIB_REGISTER_NODE (ip6_discover_neighbor_node) =
[IP6_NBR_NEXT_REPLY_TX] = "ip6-rewrite-mcast",
},
};
-/* *INDENT-ON* */
/* Template used to generate IP6 neighbor solicitation packets. */
vlib_packet_template_t ip6_neighbor_packet_template;
@@ -339,7 +337,7 @@ ip6_nd_main_loop_enter (vlib_main_t * vm)
{
vlib_thread_main_t *tm = &vlib_thread_main;
- throttle_init (&nd_throttle, tm->n_vlib_mains, 1e-3);
+ throttle_init (&nd_throttle, tm->n_vlib_mains, THROTTLE_BITS, 1e-3);
return 0;
}
diff --git a/src/vnet/ip-neighbor/ip6_neighbor.h b/src/vnet/ip-neighbor/ip6_neighbor.h
index ad2ace21948..c6e718dc2ff 100644
--- a/src/vnet/ip-neighbor/ip6_neighbor.h
+++ b/src/vnet/ip-neighbor/ip6_neighbor.h
@@ -25,23 +25,22 @@
#include <vnet/ip/icmp46_packet.h>
#include <vnet/ethernet/ethernet.h>
#include <vnet/adj/adj_internal.h>
+#include <vnet/ip-neighbor/ip_neighbor_types.h>
/* Template used to generate IP6 neighbor solicitation packets. */
extern vlib_packet_template_t ip6_neighbor_packet_template;
-extern void ip6_neighbor_advertise (vlib_main_t * vm,
- vnet_main_t * vnm,
- u32 sw_if_index,
- const ip6_address_t * addr);
+extern void ip6_neighbor_advertise (vlib_main_t *vm, vnet_main_t *vnm,
+ u32 sw_if_index, u32 thread_index,
+ const ip6_address_t *addr);
-extern void ip6_neighbor_probe_dst (u32 sw_if_index,
- const ip6_address_t * dst);
+extern void ip6_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
+ const ip6_address_t *dst);
always_inline vlib_buffer_t *
-ip6_neighbor_probe (vlib_main_t * vm,
- vnet_main_t * vnm,
- u32 sw_if_index,
- const ip6_address_t * src, const ip6_address_t * dst)
+ip6_neighbor_probe (vlib_main_t *vm, vnet_main_t *vnm, u32 sw_if_index,
+ u32 thread_index, const ip6_address_t *src,
+ const ip6_address_t *dst)
{
icmp6_neighbor_solicitation_header_t *h0;
vnet_hw_interface_t *hw_if0;
@@ -104,6 +103,10 @@ ip6_neighbor_probe (vlib_main_t * vm,
vlib_put_frame_to_node (vm, adj->ia_node_index, f);
}
+ vlib_increment_simple_counter (
+ &ip_neighbor_counters[AF_IP6].ipnc[VLIB_TX][IP_NEIGHBOR_CTR_REQUEST],
+ thread_index, sw_if_index, 1);
+
return b0;
}
diff --git a/src/vnet/ip-neighbor/ip_neighbor.api b/src/vnet/ip-neighbor/ip_neighbor.api
index 62730e7c1e3..24cddd42fab 100644
--- a/src/vnet/ip-neighbor/ip_neighbor.api
+++ b/src/vnet/ip-neighbor/ip_neighbor.api
@@ -20,7 +20,7 @@
called through a shared memory interface.
*/
-option version = "1.0.0";
+option version = "1.0.1";
import "vnet/ip/ip_types.api";
import "vnet/ethernet/ethernet_types.api";
@@ -126,6 +126,40 @@ autoreply define ip_neighbor_config
bool recycle;
};
+/** \brief Get neighbor database configuration per AF
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param af - Address family (v4/v6)
+*/
+define ip_neighbor_config_get
+{
+ option in_progress;
+ u32 client_index;
+ u32 context;
+ vl_api_address_family_t af;
+};
+
+/** \brief Neighbor database configuration reply
+ @param context - sender context, to match reply w/ request
+ @param retval - error (0 is "no error")
+ @param af - Address family (v4/v6)
+ @param max_number - The maximum number of neighbours that will be created
+ @param max_age - The maximum age (in seconds) before an inactive neighbour
+ is flushed
+ @param recycle - If max_number of neighbours is reached and new ones need
+ to be created, should the oldest neighbour be 'recycled'
+*/
+define ip_neighbor_config_get_reply
+{
+ option in_progress;
+ u32 context;
+ i32 retval;
+ vl_api_address_family_t af;
+ u32 max_number;
+ u32 max_age;
+ bool recycle;
+};
+
/** \brief IP neighbour replace begin
The use-case is that, for some unspecified reason, the control plane
@@ -264,6 +298,85 @@ service {
events ip_neighbor_event_v2;
};
+counters ip4_neighbor {
+ throttled {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ARP requests throttled";
+ };
+ resolved {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ARP requests resolved";
+ };
+ no_buffers {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ARP requests out of buffer";
+ };
+ request_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ARP requests sent";
+ };
+ non_arp_adj {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ARPs to non-ARP adjacencies";
+ };
+ no_source_address {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no source address for ARP request";
+ };
+};
+
+counters ip6_neighbor {
+ throttled {
+ severity info;
+ type counter64;
+ units "packets";
+ description "throttled";
+ };
+ drop {
+ severity error;
+ type counter64;
+ units "packets";
+ description "address overflow drops";
+ };
+ request_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "neighbor solicitations sent";
+ };
+ no_source_address {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no source address for ND solicitation";
+ };
+ no_buffers {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no buffers";
+ };
+};
+
+paths {
+ "/err/ip4-arp" "ip4_neighbor";
+ "/err/ip4-glean" "ip4_neighbor";
+ "/err/ip6-arp" "ip6_neighbor";
+ "/err/ip6-glean" "ip6_neighbor";
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/ip-neighbor/ip_neighbor.c b/src/vnet/ip-neighbor/ip_neighbor.c
index 8637e16fd8e..d340037a15d 100644
--- a/src/vnet/ip-neighbor/ip_neighbor.c
+++ b/src/vnet/ip-neighbor/ip_neighbor.c
@@ -27,6 +27,74 @@
#include <vnet/fib/fib_table.h>
#include <vnet/adj/adj_mcast.h>
+ip_neighbor_counters_t ip_neighbor_counters[] =
+{
+ [AF_IP4] = {
+ .ipnc = {
+ [VLIB_RX] = {
+ [IP_NEIGHBOR_CTR_REPLY] = {
+ .name = "arp-rx-replies",
+ .stat_segment_name = "/net/arp/rx/replies",
+ },
+ [IP_NEIGHBOR_CTR_REQUEST] = {
+ .name = "arp-rx-requests",
+ .stat_segment_name = "/net/arp/rx/requests",
+ },
+ [IP_NEIGHBOR_CTR_GRAT] = {
+ .name = "arp-rx-gratuitous",
+ .stat_segment_name = "/net/arp/rx/gratuitous",
+ },
+ },
+ [VLIB_TX] = {
+ [IP_NEIGHBOR_CTR_REPLY] = {
+ .name = "arp-tx-replies",
+ .stat_segment_name = "/net/arp/tx/replies",
+ },
+ [IP_NEIGHBOR_CTR_REQUEST] = {
+ .name = "arp-tx-requests",
+ .stat_segment_name = "/net/arp/tx/requests",
+ },
+ [IP_NEIGHBOR_CTR_GRAT] = {
+ .name = "arp-tx-gratuitous",
+ .stat_segment_name = "/net/arp/tx/gratuitous",
+ },
+ },
+ },
+ },
+ [AF_IP6] = {
+ .ipnc = {
+ [VLIB_RX] = {
+ [IP_NEIGHBOR_CTR_REPLY] = {
+ .name = "ip6-nd-rx-replies",
+ .stat_segment_name = "/net/ip6-nd/rx/replies",
+ },
+ [IP_NEIGHBOR_CTR_REQUEST] = {
+ .name = "ip6-nd-rx-requests",
+ .stat_segment_name = "/net/ip6-nd/rx/requests",
+ },
+ [IP_NEIGHBOR_CTR_GRAT] = {
+ .name = "ip6-nd-rx-gratuitous",
+ .stat_segment_name = "/net/ip6-nd/rx/gratuitous",
+ },
+ },
+ [VLIB_TX] = {
+ [IP_NEIGHBOR_CTR_REPLY] = {
+ .name = "ip6-nd-tx-replies",
+ .stat_segment_name = "/net/ip6-nd/tx/replies",
+ },
+ [IP_NEIGHBOR_CTR_REQUEST] = {
+ .name = "ip6-nd-tx-requests",
+ .stat_segment_name = "/net/ip6-nd/tx/requests",
+ },
+ [IP_NEIGHBOR_CTR_GRAT] = {
+ .name = "ip6-nd-tx-gratuitous",
+ .stat_segment_name = "/net/ip6-nd/tx/gratuitous",
+ },
+ },
+ },
+ },
+};
+
/** Pool for All IP neighbors */
static ip_neighbor_t *ip_neighbor_pool;
@@ -62,7 +130,6 @@ typedef struct ip_neighbor_db_t_
static vlib_log_class_t ipn_logger;
/* DBs of neighbours one per AF */
-/* *INDENT-OFF* */
static ip_neighbor_db_t ip_neighbor_db[N_AF] = {
[AF_IP4] = {
.ipndb_limit = 50000,
@@ -77,7 +144,6 @@ static ip_neighbor_db_t ip_neighbor_db[N_AF] = {
.ipndb_recycle = false,
}
};
-/* *INDENT-ON* */
#define IP_NEIGHBOR_DBG(...) \
vlib_log_debug (ipn_logger, __VA_ARGS__);
@@ -692,13 +758,18 @@ ip_neighbor_update (vnet_main_t * vnm, adj_index_t ai)
ip_neighbor_probe (adj);
}
break;
+ case IP_LOOKUP_NEXT_REWRITE:
+ /* Update of an existing rewrite adjacency happens e.g. when the
+ * interface's MAC address changes */
+ if (NULL != ipn)
+ ip_neighbor_mk_complete (ai, ipn);
+ break;
case IP_LOOKUP_NEXT_GLEAN:
case IP_LOOKUP_NEXT_BCAST:
case IP_LOOKUP_NEXT_MCAST:
case IP_LOOKUP_NEXT_DROP:
case IP_LOOKUP_NEXT_PUNT:
case IP_LOOKUP_NEXT_LOCAL:
- case IP_LOOKUP_NEXT_REWRITE:
case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
case IP_LOOKUP_NEXT_MIDCHAIN:
case IP_LOOKUP_NEXT_ICMP_ERROR:
@@ -724,7 +795,7 @@ ip_neighbor_cmd (vlib_main_t * vm,
vnet_main_t *vnm = vnet_get_main ();
ip_neighbor_flags_t flags;
u32 sw_if_index = ~0;
- int is_add = 1;
+ int is_add = 1, is_flush = 0;
int count = 1;
flags = IP_NEIGHBOR_FLAG_DYNAMIC;
@@ -738,6 +809,8 @@ ip_neighbor_cmd (vlib_main_t * vm,
;
else if (unformat (input, "delete") || unformat (input, "del"))
is_add = 0;
+ else if (unformat (input, "flush"))
+ is_flush = 1;
else if (unformat (input, "static"))
{
flags |= IP_NEIGHBOR_FLAG_STATIC;
@@ -751,6 +824,13 @@ ip_neighbor_cmd (vlib_main_t * vm,
break;
}
+ if (is_flush)
+ {
+ ip_neighbor_del_all (AF_IP4, sw_if_index);
+ ip_neighbor_del_all (AF_IP6, sw_if_index);
+ return NULL;
+ }
+
if (sw_if_index == ~0 ||
ip_address_is_zero (&ip) || mac_address_is_zero (&mac))
return clib_error_return (0,
@@ -773,11 +853,10 @@ ip_neighbor_cmd (vlib_main_t * vm,
return NULL;
}
-/* *INDENT-OFF* */
/*?
* Add or delete IPv4 ARP cache entries.
*
- * @note 'set ip neighbor' options (e.g. delete, static, 'fib-id <id>',
+ * @note 'set ip neighbor' options (e.g. delete, static,
* 'count <number>', 'interface ip4_addr mac_addr') can be added in
* any order and combination.
*
@@ -786,35 +865,39 @@ ip_neighbor_cmd (vlib_main_t * vm,
* Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
* either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
* @cliexcmd{set ip neighbor GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
- * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be}
+ * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3
+ * de:ad:be:ef:ba:be}
*
- * To add or delete an IPv4 ARP cache entry to or from a specific fib
+ * To add or delete an IPv4 ARP cache entry
* table:
- * @cliexcmd{set ip neighbor fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
- * @cliexcmd{set ip neighbor fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @cliexcmd{set ip neighbor GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3
+ * dead.beef.babe}
*
* Add or delete IPv4 static ARP cache entries as follows:
- * @cliexcmd{set ip neighbor static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
- * @cliexcmd{set ip neighbor static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @cliexcmd{set ip neighbor static GigabitEthernet2/0/0 6.0.0.3
+ * dead.beef.babe}
+ * @cliexcmd{set ip neighbor static delete GigabitEthernet2/0/0 6.0.0.3
+ * dead.beef.babe}
*
* For testing / debugging purposes, the 'set ip neighbor' command can add or
* delete multiple entries. Supply the 'count N' parameter:
- * @cliexcmd{set ip neighbor count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
+ * @cliexcmd{set ip neighbor count 10 GigabitEthernet2/0/0 6.0.0.3
+ * dead.beef.babe}
* @endparblock
?*/
VLIB_CLI_COMMAND (ip_neighbor_command, static) = {
.path = "set ip neighbor",
- .short_help =
- "set ip neighbor [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
+ .short_help = "set ip neighbor [del] <intfc> <ip-address> <mac-address> "
+ "[static] [no-fib-entry] [count <count>]",
.function = ip_neighbor_cmd,
};
VLIB_CLI_COMMAND (ip_neighbor_command2, static) = {
.path = "ip neighbor",
- .short_help =
- "ip neighbor [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
+ .short_help = "ip neighbor [del] [flush] <intfc> <ip-address> <mac-address> "
+ "[static] [no-fib-entry] [count <count>]",
.function = ip_neighbor_cmd,
};
-/* *INDENT-ON* */
static int
ip_neighbor_sort (void *a1, void *a2)
@@ -840,7 +923,6 @@ ip_neighbor_entries (u32 sw_if_index, ip_address_family_t af)
index_t *ipnis = NULL;
ip_neighbor_t *ipn;
- /* *INDENT-OFF* */
pool_foreach (ipn, ip_neighbor_pool)
{
if ((sw_if_index == ~0 ||
@@ -850,7 +932,6 @@ ip_neighbor_entries (u32 sw_if_index, ip_address_family_t af)
vec_add1 (ipnis, ip_neighbor_get_index(ipn));
}
- /* *INDENT-ON* */
if (ipnis)
vec_sort_with_function (ipnis, ip_neighbor_sort);
@@ -870,7 +951,6 @@ ip_neighbor_show_sorted_i (vlib_main_t * vm,
vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP",
"Flags", "Ethernet", "Interface");
- /* *INDENT-OFF*/
/* the list is time sorted, newest first, so start from the back
* and work forwards. Stop when we get to one that is alive */
clib_llist_foreach_reverse(ip_neighbor_elt_pool,
@@ -878,7 +958,6 @@ ip_neighbor_show_sorted_i (vlib_main_t * vm,
({
vlib_cli_output (vm, "%U", format_ip_neighbor, elt->ipne_index);
}));
- /* *INDENT-ON*/
return (NULL);
}
@@ -960,7 +1039,6 @@ ip4_neighbor_show_sorted (vlib_main_t * vm,
* Fib_index 0 6.0.0.1 - 6.0.0.11
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip_neighbors_cmd_node, static) = {
.path = "show ip neighbors",
.function = ip_neighbor_show,
@@ -1001,7 +1079,6 @@ VLIB_CLI_COMMAND (show_ip6_neighbor_sorted_cmd_node, static) = {
.function = ip6_neighbor_show_sorted,
.short_help = "show ip6 neighbor-sorted",
};
-/* *INDENT-ON* */
static ip_neighbor_vft_t ip_nbr_vfts[N_AF];
@@ -1012,8 +1089,8 @@ ip_neighbor_register (ip_address_family_t af, const ip_neighbor_vft_t * vft)
}
void
-ip_neighbor_probe_dst (u32 sw_if_index,
- ip_address_family_t af, const ip46_address_t * dst)
+ip_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
+ ip_address_family_t af, const ip46_address_t *dst)
{
if (!vnet_sw_interface_is_admin_up (vnet_get_main (), sw_if_index))
return;
@@ -1021,10 +1098,10 @@ ip_neighbor_probe_dst (u32 sw_if_index,
switch (af)
{
case AF_IP6:
- ip6_neighbor_probe_dst (sw_if_index, &dst->ip6);
+ ip6_neighbor_probe_dst (sw_if_index, thread_index, &dst->ip6);
break;
case AF_IP4:
- ip4_neighbor_probe_dst (sw_if_index, &dst->ip4);
+ ip4_neighbor_probe_dst (sw_if_index, thread_index, &dst->ip4);
break;
}
}
@@ -1033,6 +1110,7 @@ void
ip_neighbor_probe (const ip_adjacency_t * adj)
{
ip_neighbor_probe_dst (adj->rewrite_header.sw_if_index,
+ vlib_get_thread_index (),
ip_address_family_from_fib_proto (adj->ia_nh_proto),
&adj->sub_type.nbr.next_hop);
}
@@ -1050,13 +1128,11 @@ ip_neighbor_walk (ip_address_family_t af,
vec_foreach (hash, ip_neighbor_db[af].ipndb_hash)
{
- /* *INDENT-OFF* */
hash_foreach (key, ipni, *hash,
({
if (WALK_STOP == cb (ipni, ctx))
break;
}));
- /* *INDENT-ON* */
}
}
else
@@ -1067,13 +1143,11 @@ ip_neighbor_walk (ip_address_family_t af,
return;
hash = ip_neighbor_db[af].ipndb_hash[sw_if_index];
- /* *INDENT-OFF* */
hash_foreach (key, ipni, hash,
({
if (WALK_STOP == cb (ipni, ctx))
break;
}));
- /* *INDENT-ON* */
}
}
@@ -1142,31 +1216,6 @@ ip6_neighbor_proxy_del (u32 sw_if_index, const ip6_address_t * addr)
return -1;
}
-static void
-ip_neighbor_ethernet_change_mac (ethernet_main_t * em,
- u32 sw_if_index, uword opaque)
-{
- ip_neighbor_t *ipn;
-
- IP_NEIGHBOR_DBG ("mac-change: %U",
- format_vnet_sw_if_index_name, vnet_get_main (),
- sw_if_index);
-
- /* *INDENT-OFF* */
- pool_foreach (ipn, ip_neighbor_pool)
- {
- if (ipn->ipn_key->ipnk_sw_if_index == sw_if_index)
- adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
- ip_address_family_to_fib_proto(ip_neighbor_get_af(ipn)),
- &ip_addr_46(&ipn->ipn_key->ipnk_ip),
- ip_neighbor_mk_complete_walk,
- ipn);
- }
- /* *INDENT-ON* */
-
- adj_glean_update_rewrite_itf (sw_if_index);
-}
-
void
ip_neighbor_populate (ip_address_family_t af, u32 sw_if_index)
{
@@ -1177,14 +1226,12 @@ ip_neighbor_populate (ip_address_family_t af, u32 sw_if_index)
format_vnet_sw_if_index_name, vnet_get_main (),
sw_if_index, format_ip_address_family, af);
- /* *INDENT-OFF* */
pool_foreach (ipn, ip_neighbor_pool)
{
if (ip_neighbor_get_af(ipn) == af &&
ipn->ipn_key->ipnk_sw_if_index == sw_if_index)
vec_add1 (ipnis, ipn - ip_neighbor_pool);
}
- /* *INDENT-ON* */
vec_foreach (ipni, ipnis)
{
@@ -1210,7 +1257,6 @@ ip_neighbor_flush (ip_address_family_t af, u32 sw_if_index)
format_vnet_sw_if_index_name, vnet_get_main (),
sw_if_index, format_ip_address_family, af);
- /* *INDENT-OFF* */
pool_foreach (ipn, ip_neighbor_pool)
{
if (ip_neighbor_get_af(ipn) == af &&
@@ -1218,13 +1264,12 @@ ip_neighbor_flush (ip_address_family_t af, u32 sw_if_index)
ip_neighbor_is_dynamic (ipn))
vec_add1 (ipnis, ipn - ip_neighbor_pool);
}
- /* *INDENT-ON* */
vec_foreach (ipni, ipnis) ip_neighbor_destroy (ip_neighbor_get (*ipni));
vec_free (ipnis);
}
-static walk_rc_t
+walk_rc_t
ip_neighbor_mark_one (index_t ipni, void *ctx)
{
ip_neighbor_t *ipn;
@@ -1311,8 +1356,8 @@ VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip_neighbor_interface_admin_change);
* Remove any arp entries associated with the specified interface
*/
static clib_error_t *
-ip_neighbor_delete_sw_interface (vnet_main_t * vnm,
- u32 sw_if_index, u32 is_add)
+ip_neighbor_add_del_sw_interface (vnet_main_t *vnm, u32 sw_if_index,
+ u32 is_add)
{
IP_NEIGHBOR_DBG ("interface-change: %U %s",
format_vnet_sw_if_index_name, vnet_get_main (),
@@ -1325,10 +1370,16 @@ ip_neighbor_delete_sw_interface (vnet_main_t * vnm,
FOR_EACH_IP_ADDRESS_FAMILY (af) ip_neighbor_flush (af, sw_if_index);
}
+ if (is_add)
+ {
+ ip_neighbor_alloc_ctr (&ip_neighbor_counters[AF_IP4], sw_if_index);
+ ip_neighbor_alloc_ctr (&ip_neighbor_counters[AF_IP6], sw_if_index);
+ }
+
return (NULL);
}
-VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip_neighbor_delete_sw_interface);
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip_neighbor_add_del_sw_interface);
typedef struct ip_neighbor_walk_covered_ctx_t_
{
@@ -1386,14 +1437,12 @@ ip_neighbor_add_del_interface_address_v4 (ip4_main_t * im,
* Flush the ARP cache of all entries covered by the address
* that is being removed.
*/
- IP_NEIGHBOR_DBG ("addr-%d: %U, %U/%d",
- (is_del ? "del" : "add"),
- format_vnet_sw_if_index_name, vnet_get_main (),
- sw_if_index, format_ip4_address, address, address_length);
+ IP_NEIGHBOR_DBG ("addr-%s: %U, %U/%d", (is_del ? "del" : "add"),
+ format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index,
+ format_ip4_address, address, address_length);
if (is_del)
{
- /* *INDENT-OFF* */
ip_neighbor_walk_covered_ctx_t ctx = {
.addr = {
.ip.ip4 = *address,
@@ -1401,7 +1450,6 @@ ip_neighbor_add_del_interface_address_v4 (ip4_main_t * im,
},
.length = address_length,
};
- /* *INDENT-ON* */
index_t *ipni;
ip_neighbor_walk (AF_IP4, sw_if_index, ip_neighbor_walk_covered, &ctx);
@@ -1435,7 +1483,6 @@ ip_neighbor_add_del_interface_address_v6 (ip6_main_t * im,
if (is_del)
{
- /* *INDENT-OFF* */
ip_neighbor_walk_covered_ctx_t ctx = {
.addr = {
.ip.ip6 = *address,
@@ -1443,7 +1490,6 @@ ip_neighbor_add_del_interface_address_v6 (ip6_main_t * im,
},
.length = address_length,
};
- /* *INDENT-ON* */
index_t *ipni;
ip_neighbor_walk (AF_IP6, sw_if_index, ip_neighbor_walk_covered, &ctx);
@@ -1540,7 +1586,8 @@ ip_neighbour_age_out (index_t ipni, f64 now, f64 * wait)
else
{
ip_neighbor_probe_dst (ip_neighbor_get_sw_if_index (ipn),
- af, &ip_addr_46 (&ipn->ipn_key->ipnk_ip));
+ vlib_get_thread_index (), af,
+ &ip_addr_46 (&ipn->ipn_key->ipnk_ip));
ipn->ipn_n_probes++;
*wait = 1;
@@ -1598,7 +1645,6 @@ ip_neighbor_age_loop (vlib_main_t * vm,
head = pool_elt_at_index (ip_neighbor_elt_pool,
ip_neighbor_list_head[af]);
- /* *INDENT-OFF*/
/* the list is time sorted, newest first, so start from the back
* and work forwards. Stop when we get to one that is alive */
restart:
@@ -1623,7 +1669,6 @@ ip_neighbor_age_loop (vlib_main_t * vm,
timeout = clib_min (wait, timeout);
}));
- /* *INDENT-ON* */
break;
}
case IP_NEIGHBOR_AGE_PROCESS_WAKEUP:
@@ -1670,7 +1715,6 @@ ip6_neighbor_age_process (vlib_main_t * vm,
return (ip_neighbor_age_loop (vm, rt, f, AF_IP6));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_neighbor_age_process_node,static) = {
.function = ip4_neighbor_age_process,
.type = VLIB_NODE_TYPE_PROCESS,
@@ -1681,7 +1725,6 @@ VLIB_REGISTER_NODE (ip6_neighbor_age_process_node,static) = {
.type = VLIB_NODE_TYPE_PROCESS,
.name = "ip6-neighbor-age-process",
};
-/* *INDENT-ON* */
int
ip_neighbor_config (ip_address_family_t af, u32 limit, u32 age, bool recycle)
@@ -1699,13 +1742,23 @@ ip_neighbor_config (ip_address_family_t af, u32 limit, u32 age, bool recycle)
return (0);
}
+int
+ip_neighbor_get_config (ip_address_family_t af, u32 *limit, u32 *age,
+ bool *recycle)
+{
+ *limit = ip_neighbor_db[af].ipndb_limit;
+ *age = ip_neighbor_db[af].ipndb_age;
+ *recycle = ip_neighbor_db[af].ipndb_recycle;
+
+ return (0);
+}
+
static clib_error_t *
ip_neighbor_config_show (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
ip_address_family_t af;
- /* *INDENT-OFF* */
FOR_EACH_IP_ADDRESS_FAMILY(af) {
vlib_cli_output (vm, "%U:", format_ip_address_family, af);
vlib_cli_output (vm, " limit:%d, age:%d, recycle:%d",
@@ -1714,7 +1767,6 @@ ip_neighbor_config_show (vlib_main_t * vm,
ip_neighbor_db[af].ipndb_recycle);
}
- /* *INDENT-ON* */
return (NULL);
}
@@ -1765,7 +1817,47 @@ done:
return error;
}
-/* *INDENT-OFF* */
+static void
+ip_neighbor_stats_show_one (vlib_main_t *vm, vnet_main_t *vnm, u32 sw_if_index)
+{
+ vlib_cli_output (vm, " %U", format_vnet_sw_if_index_name, vnm, sw_if_index);
+ vlib_cli_output (vm, " arp:%U", format_ip_neighbor_counters,
+ &ip_neighbor_counters[AF_IP4], sw_if_index);
+ vlib_cli_output (vm, " nd: %U", format_ip_neighbor_counters,
+ &ip_neighbor_counters[AF_IP6], sw_if_index);
+}
+
+static walk_rc_t
+ip_neighbor_stats_show_cb (vnet_main_t *vnm, vnet_sw_interface_t *si,
+ void *ctx)
+{
+ ip_neighbor_stats_show_one (ctx, vnm, si->sw_if_index);
+
+ return (WALK_CONTINUE);
+}
+
+static clib_error_t *
+ip_neighbor_stats_show (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_main_t *vnm;
+ u32 sw_if_index;
+
+ vnm = vnet_get_main ();
+ sw_if_index = ~0;
+ (void) unformat_user (input, unformat_vnet_sw_interface, vnm, &sw_if_index);
+
+ if (~0 == sw_if_index)
+ {
+ vnet_sw_interface_walk (vnm, ip_neighbor_stats_show_cb, vm);
+ }
+ else
+ {
+ ip_neighbor_stats_show_one (vm, vnm, sw_if_index);
+ }
+ return (NULL);
+}
+
VLIB_CLI_COMMAND (show_ip_neighbor_cfg_cmd_node, static) = {
.path = "show ip neighbor-config",
.function = ip_neighbor_config_show,
@@ -1777,7 +1869,11 @@ VLIB_CLI_COMMAND (set_ip_neighbor_cfg_cmd_node, static) = {
.short_help = "set ip neighbor-config ip4|ip6 [limit <limit>] [age <age>] "
"[recycle|norecycle]",
};
-/* *INDENT-ON* */
+VLIB_CLI_COMMAND (show_ip_neighbor_stats_cmd_node, static) = {
+ .path = "show ip neighbor-stats",
+ .function = ip_neighbor_stats_show,
+ .short_help = "show ip neighbor-stats [interface]",
+};
static clib_error_t *
ip_neighbor_init (vlib_main_t * vm)
@@ -1806,14 +1902,6 @@ ip_neighbor_init (vlib_main_t * vm)
};
vec_add1 (ip6_main.table_bind_callbacks, cb);
}
- {
- ethernet_address_change_ctx_t ctx = {
- .function = ip_neighbor_ethernet_change_mac,
- .function_opaque = 0,
- };
- vec_add1 (ethernet_main.address_change_callbacks, ctx);
- }
-
ipn_logger = vlib_log_register_class ("ip", "neighbor");
ip_address_family_t af;
@@ -1825,12 +1913,10 @@ ip_neighbor_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip_neighbor_init) =
{
.runs_after = VLIB_INITS("ip_main_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip-neighbor/ip_neighbor.h b/src/vnet/ip-neighbor/ip_neighbor.h
index 064569b56ce..cc888ba2054 100644
--- a/src/vnet/ip-neighbor/ip_neighbor.h
+++ b/src/vnet/ip-neighbor/ip_neighbor.h
@@ -36,6 +36,8 @@ extern int ip_neighbor_del (const ip_address_t * ip, u32 sw_if_index);
extern int ip_neighbor_config (ip_address_family_t af,
u32 limit, u32 age, bool recycle);
+extern int ip_neighbor_get_config (ip_address_family_t af, u32 *limit,
+ u32 *age, bool *recycle);
extern void ip_neighbor_del_all (ip_address_family_t af, u32 sw_if_index);
@@ -54,12 +56,13 @@ extern void ip_neighbor_learn (const ip_neighbor_learn_t * l);
extern void ip_neighbor_update (vnet_main_t * vnm, adj_index_t ai);
extern void ip_neighbor_probe (const ip_adjacency_t * adj);
-extern void ip_neighbor_probe_dst (u32 sw_if_index,
+extern void ip_neighbor_probe_dst (u32 sw_if_index, u32 thread_index,
ip_address_family_t af,
- const ip46_address_t * ip);
+ const ip46_address_t *ip);
extern void ip_neighbor_mark (ip_address_family_t af);
extern void ip_neighbor_sweep (ip_address_family_t af);
+extern walk_rc_t ip_neighbor_mark_one (index_t ipni, void *ctx);
/**
* From the watcher to the API to publish a new neighbor
@@ -111,7 +114,6 @@ typedef struct ip_neighbor_vft_t_
extern void ip_neighbor_register (ip_address_family_t af,
const ip_neighbor_vft_t * vft);
-
#endif /* __INCLUDE_IP_NEIGHBOR_H__ */
/*
diff --git a/src/vnet/ip-neighbor/ip_neighbor_api.c b/src/vnet/ip-neighbor/ip_neighbor_api.c
index 81af86211de..2297546f111 100644
--- a/src/vnet/ip-neighbor/ip_neighbor_api.c
+++ b/src/vnet/ip-neighbor/ip_neighbor_api.c
@@ -234,12 +234,10 @@ vl_api_ip_neighbor_add_del_t_handler (vl_api_ip_neighbor_add_del_t * mp,
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IP_NEIGHBOR_ADD_DEL_REPLY,
({
rmp->stats_index = htonl (stats_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -314,6 +312,32 @@ vl_api_ip_neighbor_config_t_handler (vl_api_ip_neighbor_config_t * mp)
}
static void
+vl_api_ip_neighbor_config_get_t_handler (vl_api_ip_neighbor_config_get_t *mp)
+{
+ vl_api_ip_neighbor_config_get_reply_t *rmp;
+ int rv;
+ ip_address_family_t af = AF_IP4;
+ u32 max_number = ~0;
+ u32 max_age = ~0;
+ bool recycle = false;
+
+ rv = ip_address_family_decode (mp->af, &af);
+
+ if (!rv)
+ rv = ip_neighbor_get_config (af, &max_number, &max_age, &recycle);
+
+ // clang-format off
+ REPLY_MACRO2 (VL_API_IP_NEIGHBOR_CONFIG_GET_REPLY,
+ ({
+ rmp->af = ip_address_family_encode (af);
+ rmp->max_number = htonl (max_number);
+ rmp->max_age = htonl (max_age);
+ rmp->recycle = recycle;
+ }));
+ // clang-format on
+}
+
+static void
vl_api_ip_neighbor_replace_begin_t_handler (vl_api_ip_neighbor_replace_begin_t
* mp)
{
diff --git a/src/vnet/ip-neighbor/ip_neighbor_types.c b/src/vnet/ip-neighbor/ip_neighbor_types.c
index 76fbc5ac8a9..39039a48249 100644
--- a/src/vnet/ip-neighbor/ip_neighbor_types.c
+++ b/src/vnet/ip-neighbor/ip_neighbor_types.c
@@ -83,6 +83,53 @@ format_ip_neighbor (u8 * s, va_list * va)
ipn->ipn_key->ipnk_sw_if_index));
}
+static void
+ip_neighbor_alloc_one_ctr (ip_neighbor_counters_t *ctr, vlib_dir_t dir,
+ ip_neighbor_counter_type_t type, u32 sw_if_index)
+{
+ vlib_validate_simple_counter (&(ctr->ipnc[dir][type]), sw_if_index);
+ vlib_zero_simple_counter (&(ctr->ipnc[dir][type]), sw_if_index);
+}
+
+void
+ip_neighbor_alloc_ctr (ip_neighbor_counters_t *ctr, u32 sw_if_index)
+{
+ ip_neighbor_counter_type_t type;
+ vlib_dir_t dir;
+
+ FOREACH_VLIB_DIR (dir)
+ {
+ FOREACH_IP_NEIGHBOR_CTR (type)
+ {
+ ip_neighbor_alloc_one_ctr (ctr, dir, type, sw_if_index);
+ }
+ }
+}
+
+u8 *
+format_ip_neighbor_counters (u8 *s, va_list *args)
+{
+ ip_neighbor_counters_t *ctr = va_arg (*args, ip_neighbor_counters_t *);
+ u32 sw_if_index = va_arg (*args, u32);
+ vlib_dir_t dir;
+
+ FOREACH_VLIB_DIR (dir)
+ {
+ s = format (s, " %U:[", format_vlib_rx_tx, dir);
+
+#define _(a, b) \
+ s = format (s, "%s:%lld ", b, \
+ vlib_get_simple_counter (&ctr->ipnc[dir][IP_NEIGHBOR_CTR_##a], \
+ sw_if_index));
+ foreach_ip_neighbor_counter_type
+#undef _
+
+ s = format (s, "]");
+ }
+
+ return (s);
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/ip-neighbor/ip_neighbor_types.h b/src/vnet/ip-neighbor/ip_neighbor_types.h
index 2eb8fd0841f..d7e818ba252 100644
--- a/src/vnet/ip-neighbor/ip_neighbor_types.h
+++ b/src/vnet/ip-neighbor/ip_neighbor_types.h
@@ -120,7 +120,37 @@ extern void ip_neighbor_clone (const ip_neighbor_t * ipn,
extern void ip_neighbor_free (ip_neighbor_t * ipn);
+/**
+ * Keep RX and TX counts per-AF
+ */
+#define foreach_ip_neighbor_counter_type \
+ _ (REPLY, "reply") \
+ _ (REQUEST, "request") \
+ _ (GRAT, "gratuitous")
+
+typedef enum ip_neighbor_counter_type_t_
+{
+#define _(a, b) IP_NEIGHBOR_CTR_##a,
+ foreach_ip_neighbor_counter_type
+#undef _
+} ip_neighbor_counter_type_t;
+
+#define N_IP_NEIGHBOR_CTRS (IP_NEIGHBOR_CTR_GRAT + 1)
+
+#define FOREACH_IP_NEIGHBOR_CTR(_type) \
+ for (_type = 0; _type < N_IP_NEIGHBOR_CTRS; _type++)
+
+typedef struct ip_neighbor_counters_t_
+{
+ vlib_simple_counter_main_t ipnc[VLIB_N_DIR][N_IP_NEIGHBOR_CTRS];
+} ip_neighbor_counters_t;
+
+extern u8 *format_ip_neighbor_counters (u8 *s, va_list *args);
+
+extern void ip_neighbor_alloc_ctr (ip_neighbor_counters_t *ctr,
+ u32 sw_if_index);
+extern ip_neighbor_counters_t ip_neighbor_counters[N_AF];
#endif /* __INCLUDE_IP_NEIGHBOR_H__ */
diff --git a/src/vnet/ip-neighbor/ip_neighbor_watch.c b/src/vnet/ip-neighbor/ip_neighbor_watch.c
index 72908f4e613..74f450114e1 100644
--- a/src/vnet/ip-neighbor/ip_neighbor_watch.c
+++ b/src/vnet/ip-neighbor/ip_neighbor_watch.c
@@ -66,13 +66,11 @@ ip_neighbor_event_process (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip_neighbor_event_process_node) = {
.function = ip_neighbor_event_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "ip-neighbor-event",
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -84,7 +82,6 @@ want_ip_neighbor_events_reaper (u32 client_index)
i32 pos;
/* walk the entire IP neighbour DB and removes the client's registrations */
- /* *INDENT-OFF* */
mhash_foreach(key, v, &ipnw_db.ipnwdb_hash,
({
watchers = (ip_neighbor_watcher_t*) *v;
@@ -97,7 +94,6 @@ want_ip_neighbor_events_reaper (u32 client_index)
if (vec_len(watchers) == 0)
vec_add1 (empty_keys, *key);
}));
- /* *INDENT-OFF* */
vec_foreach (key, empty_keys)
mhash_unset (&ipnw_db.ipnwdb_hash, key, NULL);
@@ -236,7 +232,6 @@ ip_neighbor_watchers_show (vlib_main_t * vm,
ip_neighbor_key_t *key;
uword *v;
- /* *INDENT-OFF* */
mhash_foreach(key, v, &ipnw_db.ipnwdb_hash,
({
watchers = (ip_neighbor_watcher_t*) *v;
@@ -247,17 +242,14 @@ ip_neighbor_watchers_show (vlib_main_t * vm,
vec_foreach (watcher, watchers)
vlib_cli_output (vm, " %U", format_ip_neighbor_watcher, watcher);
}));
- /* *INDENT-ON* */
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip_neighbor_watchers_cmd_node, static) = {
.path = "show ip neighbor-watcher",
.function = ip_neighbor_watchers_show,
.short_help = "show ip neighbors-watcher",
};
-/* *INDENT-ON* */
static clib_error_t *
ip_neighbor_watch_init (vlib_main_t * vm)
@@ -267,12 +259,10 @@ ip_neighbor_watch_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip_neighbor_watch_init) =
{
.runs_after = VLIB_INITS("ip_neighbor_init"),
};
-/* *INDENT-ON* */
/*
diff --git a/src/vnet/ip/icmp4.c b/src/vnet/ip/icmp4.c
index 0363092d5d5..fa4a0e12276 100644
--- a/src/vnet/ip/icmp4.c
+++ b/src/vnet/ip/icmp4.c
@@ -40,12 +40,11 @@
#include <vlib/vlib.h>
#include <vnet/ip/ip.h>
#include <vnet/pg/pg.h>
+#include <vnet/ip/ip_sas.h>
+#include <vnet/util/throttle.h>
-static char *icmp_error_strings[] = {
-#define _(f,s) s,
- foreach_icmp4_error
-#undef _
-};
+/** ICMP throttling */
+static throttle_t icmp_throttle;
static u8 *
format_ip4_icmp_type_and_code (u8 * s, va_list * args)
@@ -205,7 +204,6 @@ ip4_icmp_input (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_icmp_input_node) = {
.function = ip4_icmp_input,
.name = "ip4-icmp-input",
@@ -214,15 +212,14 @@ VLIB_REGISTER_NODE (ip4_icmp_input_node) = {
.format_trace = format_icmp_input_trace,
- .n_errors = ARRAY_LEN (icmp_error_strings),
- .error_strings = icmp_error_strings,
+ .n_errors = ICMP4_N_ERROR,
+ .error_counters = icmp4_error_counters,
.n_next_nodes = 1,
.next_nodes = {
[ICMP_INPUT_NEXT_ERROR] = "ip4-punt",
},
};
-/* *INDENT-ON* */
typedef enum
{
@@ -254,13 +251,14 @@ ip4_icmp_error (vlib_main_t * vm,
u32 *from, *to_next;
uword n_left_from, n_left_to_next;
ip4_icmp_error_next_t next_index;
- ip4_main_t *im = &ip4_main;
- ip_lookup_main_t *lm = &im->lookup_main;
+ u32 thread_index = vm->thread_index;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
+ u64 seed = throttle_seed (&icmp_throttle, thread_index, vlib_time_now (vm));
+
if (node->flags & VLIB_NODE_FLAG_TRACE)
vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
/* stride */ 1,
@@ -286,10 +284,25 @@ ip4_icmp_error (vlib_main_t * vm,
vlib_buffer_t *p0, *org_p0;
ip4_header_t *ip0, *out_ip0;
icmp46_header_t *icmp0;
- u32 sw_if_index0, if_add_index0;
+ u32 sw_if_index0;
ip_csum_t sum;
org_p0 = vlib_get_buffer (vm, org_pi0);
+ ip0 = vlib_buffer_get_current (org_p0);
+
+ /* Rate limit based on the src,dst addresses in the original packet
+ */
+ u64 r0 =
+ (u64) ip0->dst_address.as_u32 << 32 | ip0->src_address.as_u32;
+
+ if (throttle_check (&icmp_throttle, thread_index, r0, seed))
+ {
+ vlib_error_count (vm, node->node_index, ICMP4_ERROR_DROP, 1);
+ from += 1;
+ n_left_from -= 1;
+ continue;
+ }
+
p0 = vlib_buffer_copy_no_chain (vm, org_p0, &pi0);
if (!p0 || pi0 == ~0) /* Out of buffers */
continue;
@@ -301,14 +314,16 @@ ip4_icmp_error (vlib_main_t * vm,
n_left_from -= 1;
n_left_to_next -= 1;
- ip0 = vlib_buffer_get_current (p0);
sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ vlib_buffer_copy_trace_flag (vm, org_p0, pi0);
+
/* Add IP header and ICMPv4 header including a 4 byte data field */
vlib_buffer_advance (p0,
-sizeof (ip4_header_t) -
sizeof (icmp46_header_t) - 4);
+ p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
p0->current_length =
p0->current_length > 576 ? 576 : p0->current_length;
out_ip0 = vlib_buffer_get_current (p0);
@@ -323,25 +338,14 @@ ip4_icmp_error (vlib_main_t * vm,
out_ip0->ttl = 0xff;
out_ip0->protocol = IP_PROTOCOL_ICMP;
out_ip0->dst_address = ip0->src_address;
- if_add_index0 = ~0;
- if (PREDICT_TRUE (vec_len (lm->if_address_pool_index_by_sw_if_index)
- > sw_if_index0))
- if_add_index0 =
- lm->if_address_pool_index_by_sw_if_index[sw_if_index0];
- if (PREDICT_TRUE (if_add_index0 != ~0))
- {
- ip_interface_address_t *if_add =
- pool_elt_at_index (lm->if_address_pool, if_add_index0);
- ip4_address_t *if_ip =
- ip_interface_address_get_address (lm, if_add);
- out_ip0->src_address = *if_ip;
- }
- else
- {
- /* interface has no IP4 address - should not happen */
+ /* Prefer a source address from "offending interface" */
+ if (!ip4_sas_by_sw_if_index (sw_if_index0, &out_ip0->dst_address,
+ &out_ip0->src_address))
+ { /* interface has no IP4 address - should not happen */
next0 = IP4_ICMP_ERROR_NEXT_DROP;
error0 = ICMP4_ERROR_DROP;
}
+
out_ip0->checksum = ip4_header_checksum (out_ip0);
/* Fill icmp header fields */
@@ -382,14 +386,13 @@ ip4_icmp_error (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_icmp_error_node) = {
.function = ip4_icmp_error,
.name = "ip4-icmp-error",
.vector_size = sizeof (u32),
- .n_errors = ARRAY_LEN (icmp_error_strings),
- .error_strings = icmp_error_strings,
+ .n_errors = ICMP4_N_ERROR,
+ .error_counters = icmp4_error_counters,
.n_next_nodes = IP4_ICMP_ERROR_N_NEXT,
.next_nodes = {
@@ -399,7 +402,6 @@ VLIB_REGISTER_NODE (ip4_icmp_error_node) = {
.format_trace = format_icmp_input_trace,
};
-/* *INDENT-ON* */
static uword
@@ -582,6 +584,11 @@ icmp4_init (vlib_main_t * vm)
ICMP_INPUT_NEXT_ERROR,
sizeof (cm->ip4_input_next_index_by_type));
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ u32 n_vlib_mains = tm->n_vlib_mains;
+
+ throttle_init (&icmp_throttle, n_vlib_mains, THROTTLE_BITS, 1e-5);
+
return 0;
}
diff --git a/src/vnet/ip/icmp4.h b/src/vnet/ip/icmp4.h
index e2a95673fc7..22a4fc508e5 100644
--- a/src/vnet/ip/icmp4.h
+++ b/src/vnet/ip/icmp4.h
@@ -15,29 +15,6 @@
#ifndef included_vnet_icmp4_h
#define included_vnet_icmp4_h
-#define foreach_icmp4_error \
- _ (NONE, "valid packets") \
- _ (UNKNOWN_TYPE, "unknown type") \
- _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \
- _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \
- _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \
- _ (OPTIONS_WITH_ODD_LENGTH, \
- "total option length not multiple of 8 bytes") \
- _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \
- _ (ECHO_REPLIES_SENT, "echo replies sent") \
- _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \
- _ (DEST_UNREACH_SENT, "destination unreachable response sent") \
- _ (TTL_EXPIRE_SENT, "hop limit exceeded response sent") \
- _ (PARAM_PROBLEM_SENT, "parameter problem response sent") \
- _ (DROP, "error message dropped")
-
-typedef enum
-{
-#define _(f,s) ICMP4_ERROR_##f,
- foreach_icmp4_error
-#undef _
-} icmp4_error_t;
-
typedef struct
{
u8 packet_data[64];
diff --git a/src/vnet/ip/icmp46_packet.h b/src/vnet/ip/icmp46_packet.h
index 0545046fe60..08e73f6cd7d 100644
--- a/src/vnet/ip/icmp46_packet.h
+++ b/src/vnet/ip/icmp46_packet.h
@@ -187,7 +187,6 @@ typedef enum
#undef _
} icmp6_code_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
u8 type;
@@ -195,7 +194,6 @@ typedef CLIB_PACKED (struct
/* IP checksum of icmp header plus data which follows. */
u16 checksum;
}) icmp46_header_t;
-/* *INDENT-ON* */
/* ip6 neighbor discovery */
#define foreach_icmp6_neighbor_discovery_option \
@@ -238,7 +236,6 @@ typedef enum icmp6_neighbor_discovery_option_type
#undef _
} icmp6_neighbor_discovery_option_type_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
/* Option type. */
@@ -357,6 +354,5 @@ typedef CLIB_PACKED (struct
icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
link_layer_option;
}) icmp6_neighbor_solicitation_header_t;
-/* *INDENT-ON* */
#endif /* included_vnet_icmp46_packet_h */
diff --git a/src/vnet/ip/icmp6.c b/src/vnet/ip/icmp6.c
index 4bba430fadc..b095f679cc8 100644
--- a/src/vnet/ip/icmp6.c
+++ b/src/vnet/ip/icmp6.c
@@ -40,6 +40,11 @@
#include <vlib/vlib.h>
#include <vnet/ip/ip.h>
#include <vnet/pg/pg.h>
+#include <vnet/ip/ip_sas.h>
+#include <vnet/util/throttle.h>
+
+/** ICMP throttling */
+static throttle_t icmp_throttle;
static u8 *
format_ip6_icmp_type_and_code (u8 * s, va_list * args)
@@ -122,12 +127,6 @@ format_icmp6_input_trace (u8 * s, va_list * va)
return s;
}
-static char *icmp_error_strings[] = {
-#define _(f,s) s,
- foreach_icmp6_error
-#undef _
-};
-
typedef enum
{
ICMP_INPUT_NEXT_PUNT,
@@ -236,7 +235,6 @@ ip6_icmp_input (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_input_node) = {
.function = ip6_icmp_input,
.name = "ip6-icmp-input",
@@ -245,195 +243,14 @@ VLIB_REGISTER_NODE (ip6_icmp_input_node) = {
.format_trace = format_icmp6_input_trace,
- .n_errors = ARRAY_LEN (icmp_error_strings),
- .error_strings = icmp_error_strings,
+ .n_errors = ICMP6_N_ERROR,
+ .error_counters = icmp6_error_counters,
.n_next_nodes = 1,
.next_nodes = {
[ICMP_INPUT_NEXT_PUNT] = "ip6-punt",
},
};
-/* *INDENT-ON* */
-
-typedef enum
-{
- ICMP6_ECHO_REQUEST_NEXT_LOOKUP,
- ICMP6_ECHO_REQUEST_NEXT_OUTPUT,
- ICMP6_ECHO_REQUEST_N_NEXT,
-} icmp6_echo_request_next_t;
-
-static uword
-ip6_icmp_echo_request (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
-{
- u32 *from, *to_next;
- u32 n_left_from, n_left_to_next, next_index;
- ip6_main_t *im = &ip6_main;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- if (node->flags & VLIB_NODE_FLAG_TRACE)
- vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
- /* stride */ 1,
- sizeof (icmp6_input_trace_t));
-
- while (n_left_from > 0)
- {
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 2 && n_left_to_next > 2)
- {
- vlib_buffer_t *p0, *p1;
- ip6_header_t *ip0, *ip1;
- icmp46_header_t *icmp0, *icmp1;
- ip6_address_t tmp0, tmp1;
- ip_csum_t sum0, sum1;
- u32 bi0, bi1;
- u32 fib_index0, fib_index1;
- u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
- u32 next1 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
-
- bi0 = to_next[0] = from[0];
- bi1 = to_next[1] = from[1];
-
- from += 2;
- n_left_from -= 2;
- to_next += 2;
- n_left_to_next -= 2;
-
- p0 = vlib_get_buffer (vm, bi0);
- p1 = vlib_get_buffer (vm, bi1);
- ip0 = vlib_buffer_get_current (p0);
- ip1 = vlib_buffer_get_current (p1);
- icmp0 = ip6_next_header (ip0);
- icmp1 = ip6_next_header (ip1);
-
- /* Check icmp type to echo reply and update icmp checksum. */
- sum0 = icmp0->checksum;
- sum1 = icmp1->checksum;
-
- ASSERT (icmp0->type == ICMP6_echo_request);
- ASSERT (icmp1->type == ICMP6_echo_request);
- sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply,
- icmp46_header_t, type);
- sum1 = ip_csum_update (sum1, ICMP6_echo_request, ICMP6_echo_reply,
- icmp46_header_t, type);
-
- icmp0->checksum = ip_csum_fold (sum0);
- icmp1->checksum = ip_csum_fold (sum1);
-
- icmp0->type = ICMP6_echo_reply;
- icmp1->type = ICMP6_echo_reply;
-
- /* Swap source and destination address. */
- tmp0 = ip0->src_address;
- tmp1 = ip1->src_address;
-
- ip0->src_address = ip0->dst_address;
- ip1->src_address = ip1->dst_address;
-
- ip0->dst_address = tmp0;
- ip1->dst_address = tmp1;
-
- /* New hop count. */
- ip0->hop_limit = im->host_config.ttl;
- ip1->hop_limit = im->host_config.ttl;
-
- /* Determine the correct lookup fib indices... */
- fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
- vnet_buffer (p0)->sw_if_index[VLIB_RX]);
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
- /* Determine the correct lookup fib indices... */
- fib_index1 = vec_elt (im->fib_index_by_sw_if_index,
- vnet_buffer (p1)->sw_if_index[VLIB_RX]);
- vnet_buffer (p1)->sw_if_index[VLIB_TX] = fib_index1;
-
- /* verify speculative enqueues, maybe switch current next frame */
- /* if next0==next1==next_index then nothing special needs to be done */
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- vlib_buffer_t *p0;
- ip6_header_t *ip0;
- icmp46_header_t *icmp0;
- u32 bi0;
- ip6_address_t tmp0;
- ip_csum_t sum0;
- u32 fib_index0;
- u32 next0 = ICMP6_ECHO_REQUEST_NEXT_LOOKUP;
-
- bi0 = to_next[0] = from[0];
-
- from += 1;
- n_left_from -= 1;
- to_next += 1;
- n_left_to_next -= 1;
-
- p0 = vlib_get_buffer (vm, bi0);
- ip0 = vlib_buffer_get_current (p0);
- icmp0 = ip6_next_header (ip0);
-
- /* Check icmp type to echo reply and update icmp checksum. */
- sum0 = icmp0->checksum;
-
- ASSERT (icmp0->type == ICMP6_echo_request);
- sum0 = ip_csum_update (sum0, ICMP6_echo_request, ICMP6_echo_reply,
- icmp46_header_t, type);
-
- icmp0->checksum = ip_csum_fold (sum0);
-
- icmp0->type = ICMP6_echo_reply;
-
- /* Swap source and destination address. */
- tmp0 = ip0->src_address;
- ip0->src_address = ip0->dst_address;
- ip0->dst_address = tmp0;
-
- ip0->hop_limit = im->host_config.ttl;
-
- /* if the packet is link local, we'll bounce through the link-local
- * table with the RX interface correctly set */
- fib_index0 = vec_elt (im->fib_index_by_sw_if_index,
- vnet_buffer (p0)->sw_if_index[VLIB_RX]);
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = fib_index0;
-
- /* Verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- vlib_error_count (vm, ip6_icmp_input_node.index,
- ICMP6_ERROR_ECHO_REPLIES_SENT, frame->n_vectors);
-
- return frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip6_icmp_echo_request_node,static) = {
- .function = ip6_icmp_echo_request,
- .name = "ip6-icmp-echo-request",
-
- .vector_size = sizeof (u32),
-
- .format_trace = format_icmp6_input_trace,
-
- .n_next_nodes = ICMP6_ECHO_REQUEST_N_NEXT,
- .next_nodes = {
- [ICMP6_ECHO_REQUEST_NEXT_LOOKUP] = "ip6-lookup",
- [ICMP6_ECHO_REQUEST_NEXT_OUTPUT] = "interface-output",
- },
-};
-/* *INDENT-ON* */
typedef enum
{
@@ -475,13 +292,14 @@ ip6_icmp_error (vlib_main_t * vm,
u32 *from, *to_next;
uword n_left_from, n_left_to_next;
ip6_icmp_error_next_t next_index;
- ip6_main_t *im = &ip6_main;
- ip_lookup_main_t *lm = &im->lookup_main;
+ u32 thread_index = vm->thread_index;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
+ u64 seed = throttle_seed (&icmp_throttle, thread_index, vlib_time_now (vm));
+
if (node->flags & VLIB_NODE_FLAG_TRACE)
vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
/* stride */ 1,
@@ -507,10 +325,25 @@ ip6_icmp_error (vlib_main_t * vm,
vlib_buffer_t *p0, *org_p0;
ip6_header_t *ip0, *out_ip0;
icmp46_header_t *icmp0;
- u32 sw_if_index0, if_add_index0;
+ u32 sw_if_index0;
int bogus_length;
org_p0 = vlib_get_buffer (vm, org_pi0);
+ ip0 = vlib_buffer_get_current (org_p0);
+
+ /* Rate limit based on the src,dst addresses in the original packet
+ */
+ u64 r0 = (ip6_address_hash_to_u64 (&ip0->dst_address) ^
+ ip6_address_hash_to_u64 (&ip0->src_address));
+
+ if (throttle_check (&icmp_throttle, thread_index, r0, seed))
+ {
+ vlib_error_count (vm, node->node_index, ICMP4_ERROR_DROP, 1);
+ from += 1;
+ n_left_from -= 1;
+ continue;
+ }
+
p0 = vlib_buffer_copy_no_chain (vm, org_p0, &pi0);
if (!p0 || pi0 == ~0) /* Out of buffers */
continue;
@@ -522,15 +355,15 @@ ip6_icmp_error (vlib_main_t * vm,
n_left_from -= 1;
n_left_to_next -= 1;
- ip0 = vlib_buffer_get_current (p0);
sw_if_index0 = vnet_buffer (p0)->sw_if_index[VLIB_RX];
+ vlib_buffer_copy_trace_flag (vm, org_p0, pi0);
+
/* Add IP header and ICMPv6 header including a 4 byte data field */
vlib_buffer_advance (p0,
-(sizeof (ip6_header_t) +
sizeof (icmp46_header_t) + 4));
- vnet_buffer (p0)->sw_if_index[VLIB_TX] = ~0;
p0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
p0->current_length =
p0->current_length > 1280 ? 1280 : p0->current_length;
@@ -547,18 +380,10 @@ ip6_icmp_error (vlib_main_t * vm,
out_ip0->protocol = IP_PROTOCOL_ICMP6;
out_ip0->hop_limit = 0xff;
out_ip0->dst_address = ip0->src_address;
- if_add_index0 =
- lm->if_address_pool_index_by_sw_if_index[sw_if_index0];
- if (PREDICT_TRUE (if_add_index0 != ~0))
- {
- ip_interface_address_t *if_add =
- pool_elt_at_index (lm->if_address_pool, if_add_index0);
- ip6_address_t *if_ip =
- ip_interface_address_get_address (lm, if_add);
- out_ip0->src_address = *if_ip;
- }
- else /* interface has no IP6 address - should not happen */
- {
+ /* Prefer a source address from "offending interface" */
+ if (!ip6_sas_by_sw_if_index (sw_if_index0, &out_ip0->dst_address,
+ &out_ip0->src_address))
+ { /* interface has no IP6 address - should not happen */
next0 = IP6_ICMP_ERROR_NEXT_DROP;
error0 = ICMP6_ERROR_DROP;
}
@@ -599,14 +424,13 @@ ip6_icmp_error (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_error_node) = {
.function = ip6_icmp_error,
.name = "ip6-icmp-error",
.vector_size = sizeof (u32),
- .n_errors = ARRAY_LEN (icmp_error_strings),
- .error_strings = icmp_error_strings,
+ .n_errors = ICMP6_N_ERROR,
+ .error_counters = icmp6_error_counters,
.n_next_nodes = IP6_ICMP_ERROR_N_NEXT,
.next_nodes = {
@@ -616,7 +440,6 @@ VLIB_REGISTER_NODE (ip6_icmp_error_node) = {
.format_trace = format_icmp6_input_trace,
};
-/* *INDENT-ON* */
static uword
@@ -813,8 +636,10 @@ icmp6_init (vlib_main_t * vm)
cm->min_valid_length_by_type[ICMP6_redirect] =
sizeof (icmp6_redirect_header_t);
- icmp6_register_type (vm, ICMP6_echo_request,
- ip6_icmp_echo_request_node.index);
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ u32 n_vlib_mains = tm->n_vlib_mains;
+
+ throttle_init (&icmp_throttle, n_vlib_mains, THROTTLE_BITS, 1e-3);
return (NULL);
}
diff --git a/src/vnet/ip/icmp6.h b/src/vnet/ip/icmp6.h
index 7a5eef5df18..119aaf0bae9 100644
--- a/src/vnet/ip/icmp6.h
+++ b/src/vnet/ip/icmp6.h
@@ -17,48 +17,6 @@
#include <vnet/ip/icmp46_packet.h>
-#define foreach_icmp6_error \
- _ (NONE, "valid packets") \
- _ (UNKNOWN_TYPE, "unknown type") \
- _ (INVALID_CODE_FOR_TYPE, "invalid code for type") \
- _ (INVALID_HOP_LIMIT_FOR_TYPE, "hop_limit != 255") \
- _ (LENGTH_TOO_SMALL_FOR_TYPE, "payload length too small for type") \
- _ (OPTIONS_WITH_ODD_LENGTH, \
- "total option length not multiple of 8 bytes") \
- _ (OPTION_WITH_ZERO_LENGTH, "option has zero length") \
- _ (ECHO_REPLIES_SENT, "echo replies sent") \
- _ (NEIGHBOR_SOLICITATION_SOURCE_NOT_ON_LINK, \
- "neighbor solicitations from source not on link") \
- _ (NEIGHBOR_SOLICITATION_SOURCE_UNKNOWN, \
- "neighbor solicitations for unknown targets") \
- _ (NEIGHBOR_ADVERTISEMENTS_TX, "neighbor advertisements sent") \
- _ (NEIGHBOR_ADVERTISEMENTS_RX, "neighbor advertisements received") \
- _ (ROUTER_SOLICITATION_SOURCE_NOT_ON_LINK, \
- "router solicitations from source not on link") \
- _ (ROUTER_SOLICITATION_UNSUPPORTED_INTF, \
- "neighbor discovery unsupported interface") \
- _ (ROUTER_SOLICITATION_RADV_NOT_CONFIG, \
- "neighbor discovery not configured") \
- _ (ROUTER_ADVERTISEMENT_SOURCE_NOT_LINK_LOCAL, \
- "router advertisement source not link local") \
- _ (ROUTER_ADVERTISEMENTS_TX, "router advertisements sent") \
- _ (ROUTER_ADVERTISEMENTS_RX, "router advertisements received") \
- _ (DST_LOOKUP_MISS, "icmp6 dst address lookup misses") \
- _ (DEST_UNREACH_SENT, "destination unreachable response sent") \
- _ (PACKET_TOO_BIG_SENT, "packet too big response sent") \
- _ (TTL_EXPIRE_SENT, "hop limit exceeded response sent") \
- _ (PARAM_PROBLEM_SENT, "parameter problem response sent") \
- _ (DROP, "error message dropped") \
- _ (ALLOC_FAILURE, "buffer allocation failure")
-
-
-typedef enum
-{
-#define _(f,s) ICMP6_ERROR_##f,
- foreach_icmp6_error
-#undef _
-} icmp6_error_t;
-
typedef struct
{
u8 packet_data[64];
diff --git a/src/vnet/ip/ip.api b/src/vnet/ip/ip.api
index c8d4c397182..967f56cf917 100644
--- a/src/vnet/ip/ip.api
+++ b/src/vnet/ip/ip.api
@@ -20,7 +20,7 @@
called through a shared memory interface.
*/
-option version = "3.1.0";
+option version = "3.2.0";
import "vnet/interface_types.api";
import "vnet/fib/fib_types.api";
@@ -57,6 +57,35 @@ autoreply define ip_table_add_del
vl_api_ip_table_t table;
};
+/** \brief Allocate an unused table
+ A table can be added multiple times.
+ If a large number of tables are in use (millions), this API might
+ fail to find a free ID with very low probability, and will return
+ EAGAIN. A subsequent attempt may be successful.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param table - if table.table_id == ~0, vpp allocates an unused table_id and
+ proceeds as in ip_table_add_del with is_add = true
+ if table.table_id != ~0, vpp uses the table.table_id and
+ proceeds as in ip_table_add_del with is_add = true
+ table.table_id should never be 0
+*/
+define ip_table_allocate
+{
+ u32 client_index;
+ u32 context;
+
+ vl_api_ip_table_t table;
+};
+
+define ip_table_allocate_reply
+{
+ u32 context;
+ i32 retval;
+
+ vl_api_ip_table_t table;
+};
+
/** \brief Dump IP all fib tables
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -337,6 +366,41 @@ autoreply define set_ip_flow_hash_v2
vl_api_ip_flow_hash_config_t flow_hash_config;
};
+/**
+ @brief flow hash settings for an IP table
+ @param src - include src in flow hash
+ @param dst - include dst in flow hash
+ @param sport - include sport in flow hash
+ @param dport - include dport in flow hash
+ @param proto - include proto in flow hash
+ @param reverse - include reverse in flow hash
+ @param symmetric - include symmetry in flow hash
+ @param flowlabel - include flowlabel in flow hash
+ @param gtpv1teid - include gtpv1teid in flow hash
+*/
+enumflag ip_flow_hash_config_v2
+{
+ IP_API_V2_FLOW_HASH_SRC_IP = 0x01,
+ IP_API_V2_FLOW_HASH_DST_IP = 0x02,
+ IP_API_V2_FLOW_HASH_SRC_PORT = 0x04,
+ IP_API_V2_FLOW_HASH_DST_PORT = 0x08,
+ IP_API_V2_FLOW_HASH_PROTO = 0x10,
+ IP_API_V2_FLOW_HASH_REVERSE = 0x20,
+ IP_API_V2_FLOW_HASH_SYMETRIC = 0x40,
+ IP_API_V2_FLOW_HASH_FLOW_LABEL = 0x80,
+ IP_API_V2_FLOW_HASH_GTPV1_TEID = 0x100,
+};
+
+autoreply define set_ip_flow_hash_v3
+{
+ u32 client_index;
+ u32 context;
+ u32 table_id;
+ vl_api_address_family_t af;
+ vl_api_ip_flow_hash_config_v2_t flow_hash_config;
+ option status="in_progress";
+};
+
/** \brief Set the ip flow hash router ID
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -557,6 +621,8 @@ typedef punt_redirect
*/
autoreply define ip_punt_redirect
{
+ option deprecated;
+
u32 client_index;
u32 context;
vl_api_punt_redirect_t punt;
@@ -565,6 +631,8 @@ autoreply define ip_punt_redirect
define ip_punt_redirect_dump
{
+ option deprecated;
+
u32 client_index;
u32 context;
vl_api_interface_index_t sw_if_index;
@@ -573,10 +641,54 @@ define ip_punt_redirect_dump
define ip_punt_redirect_details
{
+ option deprecated;
+
u32 context;
vl_api_punt_redirect_t punt;
};
+/** \brief Punt redirect type
+ @param rx_sw_if_index - specify the original RX interface of traffic
+ that should be redirected. ~0 means any interface.
+ @param af - Address family (ip4 or ip6)
+ @param paths - the TX paths to which traffic should be redirected.
+*/
+typedef punt_redirect_v2
+{
+ vl_api_interface_index_t rx_sw_if_index [default=0xffffffff];
+ vl_api_address_family_t af;
+ u32 n_paths;
+ vl_api_fib_path_t paths[n_paths];
+};
+
+/** \brief Add IP punt redirect rule
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param punt - punt definition
+ @param is_add - 1 to add punt_redirect rule, 0 to delete
+*/
+autoreply define add_del_ip_punt_redirect_v2
+{
+ u32 client_index;
+ u32 context;
+ bool is_add [default=true];
+ vl_api_punt_redirect_v2_t punt;
+};
+
+define ip_punt_redirect_v2_dump
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ vl_api_address_family_t af;
+};
+
+define ip_punt_redirect_v2_details
+{
+ u32 context;
+ vl_api_punt_redirect_v2_t punt;
+};
+
autoreply define ip_container_proxy_add_del
{
u32 client_index;
@@ -764,6 +876,30 @@ autoreply define ip_reassembly_enable_disable
vl_api_ip_reass_type_t type;
};
+/** enable/disable full reassembly of packets aimed at our addresses */
+autoreply define ip_local_reass_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ bool enable_ip4;
+ bool enable_ip6;
+};
+
+/** get status of local reassembly */
+define ip_local_reass_get
+{
+ u32 client_index;
+ u32 context;
+};
+
+define ip_local_reass_get_reply
+{
+ u32 context;
+ i32 retval;
+ bool ip4_is_enabled;
+ bool ip6_is_enabled;
+};
+
/**
@brief Set a Path MTU value. i.e. a MTU value for a given neighbour.
The neighbour can be described as attached (w/ interface and next-hop)
@@ -821,6 +957,816 @@ autoreply define ip_path_mtu_replace_end
u32 context;
};
+counters ip_frag {
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "packet fragmented";
+ };
+ small_packet {
+ severity error;
+ type counter64;
+ units "packets";
+ description "packet smaller than MTU";
+ };
+ fragment_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "number of sent fragments";
+ };
+ cant_fragment_header {
+ severity error;
+ type counter64;
+ units "packets";
+ description "can't fragment header";
+ };
+ dont_fragment_set {
+ severity error;
+ type counter64;
+ units "packets";
+ description "can't fragment this packet";
+ };
+ malformed {
+ severity error;
+ type counter64;
+ units "packets";
+ description "malformed packet";
+ };
+ memory {
+ severity error;
+ type counter64;
+ units "packets";
+ description "could not allocate buffer";
+ };
+ unknown {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unknown error";
+ };
+};
+
+counters ip4 {
+ /* Must be first. */
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "valid ip4 packets";
+ };
+
+ /* Errors signalled by ip4-input */
+ too_short {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 length < 20 bytes";
+ };
+ bad_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 length > l2 length";
+ };
+ bad_checksum {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bad ip4 checksum";
+ };
+ version {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 version != 4";
+ };
+ options {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ip4 options present";
+ };
+ fragment_offset_one {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 fragment offset == 1";
+ };
+ time_expired {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 ttl <= 1";
+ };
+ hdr_too_short {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 IHL < 5";
+ };
+
+ /* Errors signalled by ip4-rewrite. */
+ mtu_exceeded {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 MTU exceeded and DF set";
+ };
+ dst_lookup_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 destination lookup miss";
+ };
+ src_lookup_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 source lookup miss";
+ };
+ drop {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 drop";
+ };
+ punt {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 punt";
+ };
+ same_interface {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 egress interface same as ingress";
+ };
+
+ /* errors signalled by ip4-local. */
+ unknown_protocol {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unknown ip protocol";
+ };
+ tcp_checksum {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bad tcp checksum";
+ };
+ udp_checksum {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bad udp checksum";
+ };
+ udp_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "inconsistent udp/ip lengths";
+ };
+
+ /* spoofed packets in ip4-rewrite-local */
+ spoofed_local_packets {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip4 spoofed local-address packet drops";
+ };
+
+ /* Errors signalled by ip4-inacl */
+ inacl_table_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "input ACL table-miss drops";
+ };
+ inacl_session_deny {
+ severity error;
+ type counter64;
+ units "packets";
+ description "input ACL session deny drops";
+ };
+
+ /* Errors singalled by ip4-outacl */
+ outacl_table_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "output ACL table-miss drops";
+ };
+ outacl_session_deny {
+ severity error;
+ type counter64;
+ units "packets";
+ description "output ACL session deny drops";
+ };
+
+ /* Errors from mfib-forward */
+ rpf_failure {
+ severity error;
+ type counter64;
+ units "packets";
+ description "Multicast RPF check failed";
+ };
+
+ /* Errors signalled by ip4-reassembly */
+ reass_duplicate_fragment {
+ severity error;
+ type counter64;
+ units "packets";
+ description "duplicate/overlapping fragments";
+ };
+ reass_limit_reached {
+ severity error;
+ type counter64;
+ units "packets";
+ description "drops due to concurrent reassemblies limit";
+ };
+ reass_fragment_chain_too_long {
+ severity error;
+ type counter64;
+ units "packets";
+ description "fragment chain too long (drop)";
+ };
+ reass_no_buf {
+ severity error;
+ type counter64;
+ units "packets";
+ description "out of buffers (drop)";
+ };
+ reass_malformed_packet {
+ severity error;
+ type counter64;
+ units "packets";
+ description "malformed packets";
+ };
+ reass_internal_error {
+ severity error;
+ type counter64;
+ units "packets";
+ description "drops due to internal reassembly error";
+ };
+ reass_timeout {
+ severity error;
+ type counter64;
+ units "packets";
+ description "fragments dropped due to reassembly timeout";
+ };
+ reass_to_custom_app {
+ severity error;
+ type counter64;
+ units "packets";
+ description "send to custom drop app";
+ };
+ reass_success {
+ severity info;
+ type counter64;
+ units "packets";
+ description "successful reassemblies";
+ };
+ reass_fragments_reassembled {
+ severity info;
+ type counter64;
+ units "packets";
+ description "fragments reassembled";
+ };
+ reass_fragments_rcvd {
+ severity info;
+ type counter64;
+ units "packets";
+ description "fragments received";
+ };
+ reass_unsupp_ip_prot {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unsupported ip protocol";
+ };
+};
+
+/**
+ * IPv6 Error/info counters
+ */
+counters ip6 {
+ /* Must be first. */
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "valid ip6 packets";
+ };
+
+ /* Errors signalled by ip6-input */
+ too_short {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 length < 40 bytes";
+ };
+ bad_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 length > l2 length";
+ };
+ version {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 version != 6";
+ };
+ time_expired {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 ttl <= 1";
+ };
+
+ /* Errors signalled by ip6-rewrite. */
+ mtu_exceeded {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 MTU exceeded";
+ };
+ dst_lookup_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 destination lookup miss";
+ };
+ src_lookup_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 source lookup miss";
+ };
+ drop {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 drop";
+ };
+ punt {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 punt";
+ };
+
+ /* errors signalled by ip6-local. */
+ unknown_protocol {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unknown ip protocol";
+ };
+ udp_checksum {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bad udp checksum";
+ };
+ icmp_checksum {
+ severity error;
+ type counter64;
+ units "packets";
+ description "bad icmp checksum";
+ };
+ udp_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "inconsistent udp/ip lengths";
+ };
+ /* Errors signalled by udp6-lookup. */
+ unknown_udp_port {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no listener for udp port";
+ };
+
+ /* spoofed packets in ip6-rewrite-local */
+ spoofed_local_packets {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ip6 spoofed local-address packet drops";
+ };
+
+ /* Errors signalled by ip6-inacl */
+ inacl_table_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "input ACL table-miss drops";
+ };
+ inacl_session_deny {
+ severity error;
+ type counter64;
+ units "packets";
+ description "input ACL session deny drops";
+ };
+
+ /* Errors singalled by ip6-outacl */
+ outacl_table_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "output ACL table-miss drops";
+ };
+ outacl_session_deny {
+ severity error;
+ type counter64;
+ units "packets";
+ description "output ACL session deny drops";
+ };
+
+ /* Errors from mfib-forward */
+ rpf_failure {
+ severity error;
+ type counter64;
+ units "packets";
+ description "Multicast RPF check failed";
+ };
+
+ /* Errors signalled by ip6-reassembly */
+ reass_missing_upper {
+ severity error;
+ type counter64;
+ units "packets";
+ description "missing-upper layer drops";
+ };
+ reass_duplicate_fragment {
+ severity error;
+ type counter64;
+ units "packets";
+ description "duplicate fragments";
+ };
+ reass_overlapping_fragment {
+ severity error;
+ type counter64;
+ units "packets";
+ description "overlapping fragments";
+ };
+ reass_limit_reached {
+ severity error;
+ type counter64;
+ units "packets";
+ description "drops due to concurrent reassemblies limit";
+ };
+ reass_fragment_chain_too_long {
+ severity error;
+ type counter64;
+ units "packets";
+ description "fragment chain too long (drop)";
+ };
+ reass_no_buf {
+ severity error;
+ type counter64;
+ units "packets";
+ description "out of buffers (drop)";
+ };
+ reass_timeout {
+ severity error;
+ type counter64;
+ units "packets";
+ description "fragments dropped due to reassembly timeout";
+ };
+ reass_internal_error {
+ severity error;
+ type counter64;
+ units "packets";
+ description "drops due to internal reassembly error";
+ };
+ reass_invalid_frag_len {
+ severity error;
+ type counter64;
+ units "packets";
+ description "invalid fragment length";
+ };
+ reass_to_custom_app {
+ severity error;
+ type counter64;
+ units "packets";
+ description "send to custom drop app";
+ };
+ reass_no_frag_hdr {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no fragmentation header";
+ };
+ reass_invalid_frag_size {
+ severity error;
+ type counter64;
+ units "packets";
+ description "drop due to invalid fragment size";
+ };
+ reass_success {
+ severity info;
+ type counter64;
+ units "packets";
+ description "successful reassemblies";
+ };
+ reass_fragments_reassembled {
+ severity info;
+ type counter64;
+ units "packets";
+ description "fragments reassembled";
+ };
+ reass_fragments_rcvd {
+ severity info;
+ type counter64;
+ units "packets";
+ description "fragments received";
+ };
+ reass_unsupp_ip_proto {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unsupported ip protocol";
+ };
+};
+
+counters icmp4 {
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "valid packets";
+ };
+ unknown_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unknown type";
+ };
+ invalid_code_for_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "invalid code for type";
+ };
+ invalid_hop_limit_for_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "hop_limit != 255";
+ };
+ length_too_small_for_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "payload length too small for type";
+ };
+ options_with_odd_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "total option length not multiple of 8 bytes";
+ };
+ option_with_zero_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "option has zero length";
+ };
+ echo_replies_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "echo replies sent";
+ };
+ dst_lookup_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "icmp6 dst address lookup misses";
+ };
+ dest_unreach_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "destination unreachable response sent";
+ };
+ ttl_expire_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "hop limit exceeded response sent";
+ };
+ param_problem_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "parameter problem response sent";
+ };
+ drop {
+ severity error;
+ type counter64;
+ units "packets";
+ description "error message dropped";
+ };
+};
+
+counters icmp6 {
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "valid packets";
+ };
+ unknown_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unknown type";
+ };
+ invalid_code_for_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "invalid code for type";
+ };
+ invalid_hop_limit_for_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "hop_limit != 255";
+ };
+ length_too_small_for_type {
+ severity error;
+ type counter64;
+ units "packets";
+ description "payload length too small for type";
+ };
+ options_with_odd_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "total option length not multiple of 8 bytes";
+ };
+ option_with_zero_length {
+ severity error;
+ type counter64;
+ units "packets";
+ description "option has zero length";
+ };
+ echo_replies_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "echo replies sent";
+ };
+ neighbor_solicitation_source_not_on_link {
+ severity error;
+ type counter64;
+ units "packets";
+ description "neighbor solicitations from source not on link";
+ };
+ neighbor_solicitation_source_unknown {
+ severity error;
+ type counter64;
+ units "packets";
+ description "neighbor solicitations for unknown targets";
+ };
+ neighbor_advertisements_tx {
+ severity info;
+ type counter64;
+ units "packets";
+ description "neighbor advertisements sent";
+ };
+ neighbor_advertisements_rx {
+ severity info;
+ type counter64;
+ units "packets";
+ description "neighbor advertisements received";
+ };
+ router_solicitation_source_not_on_link {
+ severity error;
+ type counter64;
+ units "packets";
+ description "router solicitations from source not on link";
+ };
+ router_solicitation_unsupported_intf {
+ severity error;
+ type counter64;
+ units "packets";
+ description "neighbor discovery unsupported interface";
+ };
+ router_solicitation_radv_not_config {
+ severity error;
+ type counter64;
+ units "packets";
+ description "neighbor discovery not configured";
+ };
+ router_advertisement_source_not_link_local {
+ severity error;
+ type counter64;
+ units "packets";
+ description "router advertisement source not link local";
+ };
+ router_advertisements_tx {
+ severity info;
+ type counter64;
+ units "packets";
+ description "router advertisements sent";
+ };
+ router_advertisements_rx {
+ severity info;
+ type counter64;
+ units "packets";
+ description "router advertisements received";
+ };
+ dst_lookup_miss {
+ severity error;
+ type counter64;
+ units "packets";
+ description "icmp6 dst address lookup misses";
+ };
+ dest_unreach_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "destination unreachable response sent";
+ };
+ packet_too_big_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "packet too big response sent";
+ };
+ ttl_expire_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "hop limit exceeded response sent";
+ };
+ param_problem_sent {
+ severity info;
+ type counter64;
+ units "packets";
+ description "parameter problem response sent";
+ };
+ drop {
+ severity error;
+ type counter64;
+ units "packets";
+ description "error message dropped";
+ };
+ alloc_failure {
+ severity error;
+ type counter64;
+ units "packets";
+ description "buffer allocation failure";
+ };
+};
+
+paths {
+ "/err/ip-frag" "ip_frag";
+ "/err/mpls-frag" "ip_frag";
+ "/err/ip4-mpls-label-disposition-pipe" "ip4";
+ "/err/ip4-mpls-label-disposition-uniform" "ip4";
+ "/err/ip4-local" "ip4";
+ "/err/ip4-input" "ip4";
+ "/err/ip4-full-reassembly" "ip4";
+ "/err/ip4-local-full-reassembly" "ip4";
+ "/err/ip4-full-reassembly-feature" "ip4";
+ "/err/ip4-full-reassembly-custom" "ip4";
+ "/err/ip4-full-reassembly-expire-walk" "ip4";
+ "/err/ip4-sv-reassembly" "ip4";
+ "/err/ip4-sv-reassembly-feature" "ip4";
+ "/err/ip4-sv-reassembly-output-feature" "ip4";
+ "/err/ip4-sv-reassembly-custom-next" "ip4";
+ "/err/ip4-sv-reassembly-expire-walk" "ip4";
+ "/err/ip6-mpls-label-disposition-pipe" "ip6";
+ "/err/ip6-mpls-label-disposition-uniform" "ip6";
+ "/err/ip6-local" "ip6";
+ "/err/ip6-input" "ip6";
+ "/err/ip6-full-reassembly" "ip6";
+ "/err/ip6-local-full-reassembly" "ip6";
+ "/err/ip6-full-reassembly-feature" "ip6";
+ "/err/ip6-full-reassembly-custom" "ip6";
+ "/err/ip6-full-reassembly-expire-walk" "ip6";
+ "/err/ip6-sv-reassembly" "ip6";
+ "/err/ip6-sv-reassembly-feature" "ip6";
+ "/err/ip6-sv-reassembly-output-feature" "ip6";
+ "/err/ip6-sv-reassembly-custom-next" "ip6";
+ "/err/ip6-sv-reassembly-expire-walk" "ip6";
+ "/err/ip4-icmp-input" "icmp4";
+ "/err/ip4-icmp-error" "icmp4";
+ "/err/ip6-icmp-input" "icmp6";
+ "/err/ip6-icmp-error" "icmp6";
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/ip/ip.c b/src/vnet/ip/ip.c
index 5d0c7707dd3..586f7dfbc85 100644
--- a/src/vnet/ip/ip.c
+++ b/src/vnet/ip/ip.c
@@ -18,6 +18,20 @@
u32 ip_flow_hash_router_id;
+ethernet_type_t
+ip_address_family_to_ether_type (ip_address_family_t af)
+{
+ switch (af)
+ {
+ case AF_IP4:
+ return (ETHERNET_TYPE_IP4);
+ case AF_IP6:
+ return (ETHERNET_TYPE_IP6);
+ }
+ ASSERT (0);
+ return (ETHERNET_TYPE_IP4);
+}
+
u8
ip_is_zero (ip46_address_t * ip46_address, u8 is_ip4)
{
@@ -104,7 +118,6 @@ ip_set (ip46_address_t * dst, void *src, u8 is_ip4)
sizeof (ip6_address_t));
}
-/* *INDENT-OFF* */
static const char *ip_arc_names[N_IP_FEATURE_LOCATIONS][N_AF][N_SAFI] = {
[IP_FEATURE_INPUT] = {
[AF_IP4] = {
@@ -157,7 +170,6 @@ static const char *ip_arc_names[N_IP_FEATURE_LOCATIONS][N_AF][N_SAFI] = {
},
},
};
-/* *INDENT-ON* */
void
ip_feature_enable_disable (ip_address_family_t af,
@@ -189,7 +201,8 @@ ip_feature_enable_disable (ip_address_family_t af,
}
int
-ip_flow_hash_set (ip_address_family_t af, u32 table_id, u32 flow_hash_config)
+ip_flow_hash_set (ip_address_family_t af, u32 table_id,
+ flow_hash_config_t flow_hash_config)
{
fib_protocol_t fproto;
u32 fib_index;
diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h
index 6d822d29dbe..9ebefa0cf5d 100644
--- a/src/vnet/ip/ip.h
+++ b/src/vnet/ip/ip.h
@@ -51,19 +51,18 @@
#include <vnet/ip/ip_packet.h>
#include <vnet/ip/lookup.h>
#include <vnet/ip/ip_interface.h>
+#include <vnet/ip/ip.api_enum.h>
#include <vnet/tcp/tcp_packet.h>
#include <vnet/udp/udp_packet.h>
#include <vnet/ip/icmp46_packet.h>
#include <vnet/ip/ip4.h>
-#include <vnet/ip/ip4_error.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/icmp4.h>
#include <vnet/ip/ip6.h>
#include <vnet/ip/ip6_packet.h>
-#include <vnet/ip/ip6_error.h>
#include <vnet/ip/icmp6.h>
/* Per protocol info. */
@@ -267,8 +266,11 @@ void ip_table_create (fib_protocol_t fproto, u32 table_id, u8 is_api,
void ip_table_delete (fib_protocol_t fproto, u32 table_id, u8 is_api);
-int ip_table_bind (fib_protocol_t fproto, u32 sw_if_index,
- u32 table_id, u8 is_api);
+void fib_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 fib_index);
+void mfib_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 mfib_index);
+int ip_table_bind (fib_protocol_t fproto, u32 sw_if_index, u32 table_id);
+
+u32 ip_table_get_unused_id (fib_protocol_t fproto);
u8 ip_is_zero (ip46_address_t * ip46_address, u8 is_ip4);
u8 ip_is_local_host (ip46_address_t * ip46_address, u8 is_ip4);
@@ -286,6 +288,8 @@ void ip_feature_enable_disable (ip_address_family_t af,
void *feature_config,
u32 n_feature_config_bytes);
+ethernet_type_t ip_address_family_to_ether_type (ip_address_family_t af);
+
always_inline u32 vlib_buffer_get_ip4_fib_index (vlib_buffer_t * b);
always_inline u32 vlib_buffer_get_ip6_fib_index (vlib_buffer_t * b);
always_inline u32
diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h
index bc971a2b7ca..45d07c2e0f6 100644
--- a/src/vnet/ip/ip4.h
+++ b/src/vnet/ip/ip4.h
@@ -111,9 +111,6 @@ typedef struct ip4_main_t
/** Vector of FIBs. */
struct fib_table_t_ *fibs;
- /** Vector of MTries. */
- struct ip4_fib_t_ *v4_fibs;
-
/** Vector of MFIBs. */
struct mfib_table_t_ *mfibs;
@@ -172,7 +169,6 @@ typedef struct ip4_main_t
/** Global ip4 main structure. */
extern ip4_main_t ip4_main;
-extern char *ip4_error_strings[];
/** Global ip4 input node. Errors get attached to ip4 input node. */
extern vlib_node_registration_t ip4_input_node;
@@ -215,7 +211,6 @@ ip4_interface_address_matching_destination (ip4_main_t * im,
ip_interface_address_t *ia;
ip4_address_t *result = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm, ia, sw_if_index,
1 /* honor unnumbered */,
({
@@ -226,7 +221,6 @@ ip4_interface_address_matching_destination (ip4_main_t * im,
break;
}
}));
- /* *INDENT-ON* */
if (result_ia)
*result_ia = result ? ia : 0;
return result;
@@ -264,10 +258,8 @@ int vnet_set_ip4_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
void ip4_punt_policer_add_del (u8 is_add, u32 policer_index);
-void ip4_punt_redirect_add (u32 rx_sw_if_index,
- u32 tx_sw_if_index, ip46_address_t * nh);
void ip4_punt_redirect_add_paths (u32 rx_sw_if_index,
- fib_route_path_t * paths);
+ const fib_route_path_t *paths);
void ip4_punt_redirect_del (u32 rx_sw_if_index);
diff --git a/src/vnet/ip/ip46_address.h b/src/vnet/ip/ip46_address.h
index f726178ee63..90f766464f6 100644
--- a/src/vnet/ip/ip46_address.h
+++ b/src/vnet/ip/ip46_address.h
@@ -34,7 +34,6 @@ typedef enum
extern u8 *format_ip46_type (u8 * s, va_list * args);
-/* *INDENT-OFF* */
typedef CLIB_PACKED (union ip46_address_t_ {
struct {
u32 pad[3];
@@ -44,7 +43,6 @@ typedef CLIB_PACKED (union ip46_address_t_ {
u8 as_u8[16];
u64 as_u64[2];
}) ip46_address_t;
-/* *INDENT-ON* */
format_function_t format_ip46_address;
diff --git a/src/vnet/ip/ip46_cli.c b/src/vnet/ip/ip46_cli.c
index f58be898d9b..e3da27914bd 100644
--- a/src/vnet/ip/ip46_cli.c
+++ b/src/vnet/ip/ip46_cli.c
@@ -71,12 +71,10 @@ ip6_address_compare (ip6_address_t * a1, ip6_address_t * a2)
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_command, static) = {
.path = "set interface ip",
.short_help = "IP4/IP6 commands",
};
-/* *INDENT-ON* */
void
ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
@@ -90,7 +88,6 @@ ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
ip_interface_address_t *ia;
int i;
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im4->lookup_main, ia, sw_if_index,
0 /* honor unnumbered */,
({
@@ -99,9 +96,7 @@ ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
vec_add1 (ip4_addrs, x[0]);
vec_add1 (ip4_masks, ia->address_length);
}));
- /* *INDENT-ON* */
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im6->lookup_main, ia, sw_if_index,
0 /* honor unnumbered */,
({
@@ -110,7 +105,6 @@ ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index)
vec_add1 (ip6_addrs, x[0]);
vec_add1 (ip6_masks, ia->address_length);
}));
- /* *INDENT-ON* */
for (i = 0; i < vec_len (ip4_addrs); i++)
ip4_add_del_interface_address (vm, sw_if_index, &ip4_addrs[i],
@@ -212,13 +206,11 @@ done:
* @cliexcmd{set interface ip address del GigabitEthernet2/0/0 all}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_address_command, static) = {
.path = "set interface ip address",
.function = add_del_ip_address,
.short_help = "set interface ip address [del] <interface> <ip-addr>/<mask> | [all]",
};
-/* *INDENT-ON* */
static clib_error_t *
set_reassembly_command_fn (vlib_main_t * vm,
@@ -294,13 +286,11 @@ set_reassembly_command_fn (vlib_main_t * vm,
return NULL;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_reassembly_command, static) = {
.path = "set interface reassembly",
.short_help = "set interface reassembly <interface-name> [on|off|ip4|ip6]",
.function = set_reassembly_command_fn,
};
-/* *INDENT-ON* */
/* Dummy init function to get us linked in. */
static clib_error_t *
diff --git a/src/vnet/ip/ip4_error.h b/src/vnet/ip/ip4_error.h
deleted file mode 100644
index dce3dd4c1ab..00000000000
--- a/src/vnet/ip/ip4_error.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * ip/ip4_error.h: ip4 fast path errors
- *
- * Copyright (c) 2008 Eliot Dresselhaus
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef included_ip_ip4_error_h
-#define included_ip_ip4_error_h
-
-#define foreach_ip4_error \
- /* Must be first. */ \
- _ (NONE, "valid ip4 packets") \
- \
- /* Errors signalled by ip4-input */ \
- _ (TOO_SHORT, "ip4 length < 20 bytes") \
- _ (BAD_LENGTH, "ip4 length > l2 length") \
- _ (BAD_CHECKSUM, "bad ip4 checksum") \
- _ (VERSION, "ip4 version != 4") \
- _ (OPTIONS, "ip4 options present") \
- _ (FRAGMENT_OFFSET_ONE, "ip4 fragment offset == 1") \
- _ (TIME_EXPIRED, "ip4 ttl <= 1") \
- \
- /* Errors signalled by ip4-rewrite. */ \
- _ (MTU_EXCEEDED, "ip4 MTU exceeded and DF set") \
- _ (DST_LOOKUP_MISS, "ip4 destination lookup miss") \
- _ (SRC_LOOKUP_MISS, "ip4 source lookup miss") \
- _ (DROP, "ip4 drop") \
- _ (PUNT, "ip4 punt") \
- _ (SAME_INTERFACE, "ip4 egress interface same as ingress") \
- \
- /* Errors signalled by ip4-local. */ \
- _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \
- _ (TCP_CHECKSUM, "bad tcp checksum") \
- _ (UDP_CHECKSUM, "bad udp checksum") \
- _ (UDP_LENGTH, "inconsistent udp/ip lengths") \
- \
- /* Spoofed packets in ip4-rewrite-local */ \
- _ (SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \
- \
- /* Errors signalled by ip4-inacl */ \
- _ (INACL_TABLE_MISS, "input ACL table-miss drops") \
- _ (INACL_SESSION_DENY, "input ACL session deny drops") \
- /* Errors singalled by ip4-outacl */ \
- _ (OUTACL_TABLE_MISS, "output ACL table-miss drops") \
- _ (OUTACL_SESSION_DENY, "output ACL session deny drops") \
- \
- /* Errors from mfib-forward */ \
- _ (RPF_FAILURE, "Multicast RPF check failed") \
- \
- /* Errors signalled by ip4-reassembly */ \
- _ (REASS_DUPLICATE_FRAGMENT, "duplicate/overlapping fragments") \
- _ (REASS_LIMIT_REACHED, "drops due to concurrent reassemblies limit") \
- _ (REASS_FRAGMENT_CHAIN_TOO_LONG, "fragment chain too long (drop)") \
- _ (REASS_NO_BUF, "out of buffers (drop)") \
- _ (REASS_MALFORMED_PACKET, "malformed packets") \
- _ (REASS_INTERNAL_ERROR, "drops due to internal reassembly error") \
- _ (REASS_UNSUPP_IP_PROT, "unsupported ip protocol")
-
-typedef enum
-{
-#define _(sym,str) IP4_ERROR_##sym,
- foreach_ip4_error
-#undef _
- IP4_N_ERROR,
-} ip4_error_t;
-
-#endif /* included_ip_ip4_error_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index 750b75ffcff..ff74b52eb18 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -52,6 +52,7 @@
#include <vnet/mfib/ip4_mfib.h>
#include <vnet/dpo/load_balance.h>
#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/receive_dpo.h>
#include <vnet/dpo/classify_dpo.h>
#include <vnet/mfib/mfib_table.h> /* for mFIB table and entry creation */
#include <vnet/adj/adj_dp.h>
@@ -60,6 +61,7 @@
#include <vnet/ip/ip4_forward.h>
#include <vnet/interface_output.h>
#include <vnet/classify/vnet_classify.h>
+#include <vnet/ip/reass/ip4_full_reass.h>
/** @brief IPv4 lookup node.
@node ip4-lookup
@@ -101,7 +103,6 @@ VLIB_NODE_FN (ip4_lookup_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
static u8 *format_ip4_lookup_trace (u8 * s, va_list * args);
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_lookup_node) =
{
.name = "ip4-lookup",
@@ -110,7 +111,6 @@ VLIB_REGISTER_NODE (ip4_lookup_node) =
.n_next_nodes = IP_LOOKUP_N_NEXT,
.next_nodes = IP4_LOOKUP_NEXT_NODES,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -266,7 +266,6 @@ VLIB_NODE_FN (ip4_load_balance_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_load_balance_node) =
{
.name = "ip4-load-balance",
@@ -274,7 +273,6 @@ VLIB_REGISTER_NODE (ip4_load_balance_node) =
.sibling_of = "ip4-lookup",
.format_trace = format_ip4_lookup_trace,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
/* get first interface address */
@@ -286,7 +284,6 @@ ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
ip_interface_address_t *ia = 0;
ip4_address_t *result = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address
(lm, ia, sw_if_index,
1 /* honor unnumbered */ ,
@@ -296,7 +293,6 @@ ip4_interface_first_address (ip4_main_t * im, u32 sw_if_index,
result = a;
break;
}));
- /* *INDENT-OFF* */
if (result_ia)
*result_ia = result ? ia : 0;
return result;
@@ -653,14 +649,13 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm,
u32 if_address_index;
ip4_address_fib_t ip4_af, *addr_fib = 0;
- /* local0 interface doesn't support IP addressing */
- if (sw_if_index == 0)
+ error = vnet_sw_interface_supports_addressing (vnm, sw_if_index);
+ if (error)
{
- return
- clib_error_create ("local0 interface doesn't support IP addressing");
+ vnm->api_errno = VNET_API_ERROR_UNSUPPORTED;
+ return error;
}
- vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
ip4_addr_fib_init (&ip4_af, address,
vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
vec_add1 (addr_fib, ip4_af);
@@ -670,7 +665,6 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm,
* subnets on interfaces. Easy fix - disallow overlapping subnets, like
* most routers do.
*/
- /* *INDENT-OFF* */
if (!is_del)
{
/* When adding an address check that it does not conflict
@@ -731,7 +725,6 @@ ip4_add_del_interface_address_internal (vlib_main_t * vm,
}
}
}
- /* *INDENT-ON* */
if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
@@ -852,7 +845,6 @@ ip4_directed_broadcast (u32 sw_if_index, u8 enable)
* when directed broadcast is enabled, the subnet braodcast route will forward
* packets using an adjacency with a broadcast MAC. otherwise it drops
*/
- /* *INDENT-OFF* */
foreach_ip_interface_address(&im->lookup_main, ia,
sw_if_index, 0,
({
@@ -876,7 +868,6 @@ ip4_directed_broadcast (u32 sw_if_index, u8 enable)
&pfx, sw_if_index);
}
}));
- /* *INDENT-ON* */
}
#endif
@@ -888,9 +879,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
ip4_address_t *a;
u32 is_admin_up, fib_index;
- /* Fill in lookup tables with default table (0). */
- vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
-
vec_validate_init_empty (im->
lookup_main.if_address_pool_index_by_sw_if_index,
sw_if_index, ~0);
@@ -899,7 +887,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
0 /* honor unnumbered */,
({
@@ -913,7 +900,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
im, fib_index,
a, ia->address_length);
}));
- /* *INDENT-ON* */
return 0;
}
@@ -921,7 +907,6 @@ ip4_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip4_sw_interface_admin_up_down);
/* Built-in ip4 unicast rx feature path definition */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip4_unicast, static) =
{
.arc_name = "ip4-unicast",
@@ -1060,18 +1045,22 @@ VNET_FEATURE_INIT (ip4_interface_output, static) =
.node_name = "interface-output",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
static clib_error_t *
ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
{
ip4_main_t *im = &ip4_main;
- /* Fill in lookup tables with default table (0). */
- vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
- vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
+ vec_validate_init_empty (im->fib_index_by_sw_if_index, sw_if_index, ~0);
+ vec_validate_init_empty (im->mfib_index_by_sw_if_index, sw_if_index, ~0);
- if (!is_add)
+ if (is_add)
+ {
+ /* Fill in lookup tables with default table (0). */
+ im->fib_index_by_sw_if_index[sw_if_index] = 0;
+ im->mfib_index_by_sw_if_index[sw_if_index] = 0;
+ }
+ else
{
ip4_main_t *im4 = &ip4_main;
ip_lookup_main_t *lm4 = &im4->lookup_main;
@@ -1080,14 +1069,21 @@ ip4_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
vlib_main_t *vm = vlib_get_main ();
vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
({
address = ip_interface_address_get_address (lm4, ia);
ip4_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
}));
- /* *INDENT-ON* */
ip4_mfib_interface_enable_disable (sw_if_index, 0);
+
+ if (0 != im4->fib_index_by_sw_if_index[sw_if_index])
+ fib_table_bind (FIB_PROTOCOL_IP4, sw_if_index, 0);
+ if (0 != im4->mfib_index_by_sw_if_index[sw_if_index])
+ mfib_table_bind (FIB_PROTOCOL_IP4, sw_if_index, 0);
+
+ /* Erase the lookup tables just in case */
+ im4->fib_index_by_sw_if_index[sw_if_index] = ~0;
+ im4->mfib_index_by_sw_if_index[sw_if_index] = ~0;
}
vnet_feature_enable_disable ("ip4-unicast", "ip4-not-enabled", sw_if_index,
@@ -1194,9 +1190,11 @@ format_ip4_forward_next_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip4_forward_next_trace_t *t = va_arg (*args, ip4_forward_next_trace_t *);
u32 indent = format_get_indent (s);
- s = format (s, "%U%U",
- format_white_space, indent,
- format_ip4_header, t->packet_data, sizeof (t->packet_data));
+
+ s = format (s, "%Ufib:%d adj:%d flow:0x%08x", format_white_space, indent,
+ t->fib_index, t->dpo_index, t->flow_hash);
+ s = format (s, "\n%U%U", format_white_space, indent, format_ip4_header,
+ t->packet_data, sizeof (t->packet_data));
return s;
}
#endif
@@ -1385,14 +1383,11 @@ ip4_tcp_udp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
}
#endif
-/* *INDENT-OFF* */
-VNET_FEATURE_ARC_INIT (ip4_local) =
-{
- .arc_name = "ip4-local",
- .start_nodes = VNET_FEATURES ("ip4-local"),
+VNET_FEATURE_ARC_INIT (ip4_local) = {
+ .arc_name = "ip4-local",
+ .start_nodes = VNET_FEATURES ("ip4-local", "ip4-receive"),
.last_in_arc = "ip4-local-end-of-arc",
};
-/* *INDENT-ON* */
static inline void
ip4_local_l4_csum_validate (vlib_main_t * vm, vlib_buffer_t * p,
@@ -1468,10 +1463,10 @@ ip4_local_check_l4_csum_x2 (vlib_main_t * vm, vlib_buffer_t ** b,
if (PREDICT_FALSE (ip4_local_need_csum_check (is_tcp_udp[0], b[0])
|| ip4_local_need_csum_check (is_tcp_udp[1], b[1])))
{
- if (is_tcp_udp[0])
+ if (is_tcp_udp[0] && !ip4_local_csum_is_offloaded (b[0]))
ip4_local_l4_csum_validate (vm, b[0], ih[0], is_udp[0], &error[0],
&good_tcp_udp[0]);
- if (is_tcp_udp[1])
+ if (is_tcp_udp[1] && !ip4_local_csum_is_offloaded (b[1]))
ip4_local_l4_csum_validate (vm, b[1], ih[1], is_udp[1], &error[1],
&good_tcp_udp[1]);
}
@@ -1497,9 +1492,8 @@ ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
next_index = *next;
if (PREDICT_TRUE (error == (u8) IP4_ERROR_UNKNOWN_PROTOCOL))
{
- vnet_feature_arc_start (arc_index,
- vnet_buffer (b)->sw_if_index[VLIB_RX],
- &next_index, b);
+ vnet_feature_arc_start (
+ arc_index, vnet_buffer (b)->ip.rx_sw_if_index, &next_index, b);
*next = next_index;
}
}
@@ -1507,18 +1501,19 @@ ip4_local_set_next_and_error (vlib_node_runtime_t * error_node,
typedef struct
{
+ /* The src and fib-index together determine if packet n is the same as n-1 */
ip4_address_t src;
+ u32 fib_index;
u32 lbi;
u8 error;
u8 first;
} ip4_local_last_check_t;
static inline void
-ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
- ip4_local_last_check_t * last_check, u8 * error0)
+ip4_local_check_src (vlib_buffer_t *b, ip4_header_t *ip0,
+ ip4_local_last_check_t *last_check, u8 *error0,
+ int is_receive_dpo)
{
- ip4_fib_mtrie_leaf_t leaf0;
- ip4_fib_mtrie_t *mtrie0;
const dpo_id_t *dpo0;
load_balance_t *lb0;
u32 lbi0;
@@ -1527,20 +1522,27 @@ ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0 ?
vnet_buffer (b)->sw_if_index[VLIB_TX] : vnet_buffer (b)->ip.fib_index;
+ vnet_buffer (b)->ip.rx_sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
+ if (is_receive_dpo)
+ {
+ receive_dpo_t *rd;
+ rd = receive_dpo_get (vnet_buffer (b)->ip.adj_index[VLIB_TX]);
+ if (rd->rd_sw_if_index != ~0)
+ vnet_buffer (b)->ip.rx_sw_if_index = rd->rd_sw_if_index;
+ }
+
/*
* vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
* adjacency for the destination address (the local interface address).
* vnet_buffer()->ip.adj_index[VLIB_TX] will be set to the index of the
* adjacency for the source address (the remote sender's address)
*/
- if (PREDICT_TRUE (last_check->src.as_u32 != ip0->src_address.as_u32) ||
+ if (PREDICT_TRUE ((last_check->src.as_u32 != ip0->src_address.as_u32)) ||
+ (last_check->fib_index != vnet_buffer (b)->ip.fib_index) ||
last_check->first)
{
- mtrie0 = &ip4_fib_get (vnet_buffer (b)->ip.fib_index)->mtrie;
- leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, &ip0->src_address);
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 2);
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, &ip0->src_address, 3);
- lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ lbi0 = ip4_fib_forwarding_lookup (vnet_buffer (b)->ip.fib_index,
+ &ip0->src_address);
vnet_buffer (b)->ip.adj_index[VLIB_RX] =
vnet_buffer (b)->ip.adj_index[VLIB_TX];
@@ -1572,6 +1574,7 @@ ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
last_check->lbi = lbi0;
last_check->error = *error0;
last_check->first = 0;
+ last_check->fib_index = vnet_buffer (b)->ip.fib_index;
}
else
{
@@ -1583,11 +1586,10 @@ ip4_local_check_src (vlib_buffer_t * b, ip4_header_t * ip0,
}
static inline void
-ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
- ip4_local_last_check_t * last_check, u8 * error)
+ip4_local_check_src_x2 (vlib_buffer_t **b, ip4_header_t **ip,
+ ip4_local_last_check_t *last_check, u8 *error,
+ int is_receive_dpo)
{
- ip4_fib_mtrie_leaf_t leaf[2];
- ip4_fib_mtrie_t *mtrie[2];
const dpo_id_t *dpo[2];
load_balance_t *lb[2];
u32 not_last_hit;
@@ -1607,6 +1609,24 @@ ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
vnet_buffer (b[1])->ip.fib_index;
+ not_last_hit |= vnet_buffer (b[0])->ip.fib_index ^ last_check->fib_index;
+ not_last_hit |= vnet_buffer (b[1])->ip.fib_index ^ last_check->fib_index;
+
+ vnet_buffer (b[0])->ip.rx_sw_if_index =
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX];
+ vnet_buffer (b[1])->ip.rx_sw_if_index =
+ vnet_buffer (b[1])->sw_if_index[VLIB_RX];
+ if (is_receive_dpo)
+ {
+ const receive_dpo_t *rd0, *rd1;
+ rd0 = receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
+ rd1 = receive_dpo_get (vnet_buffer (b[1])->ip.adj_index[VLIB_TX]);
+ if (rd0->rd_sw_if_index != ~0)
+ vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index;
+ if (rd1->rd_sw_if_index != ~0)
+ vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index;
+ }
+
/*
* vnet_buffer()->ip.adj_index[VLIB_RX] will be set to the index of the
* adjacency for the destination address (the local interface address).
@@ -1615,24 +1635,9 @@ ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
*/
if (PREDICT_TRUE (not_last_hit))
{
- mtrie[0] = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
- mtrie[1] = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
-
- leaf[0] = ip4_fib_mtrie_lookup_step_one (mtrie[0], &ip[0]->src_address);
- leaf[1] = ip4_fib_mtrie_lookup_step_one (mtrie[1], &ip[1]->src_address);
-
- leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
- &ip[0]->src_address, 2);
- leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
- &ip[1]->src_address, 2);
-
- leaf[0] = ip4_fib_mtrie_lookup_step (mtrie[0], leaf[0],
- &ip[0]->src_address, 3);
- leaf[1] = ip4_fib_mtrie_lookup_step (mtrie[1], leaf[1],
- &ip[1]->src_address, 3);
-
- lbi[0] = ip4_fib_mtrie_leaf_get_adj_index (leaf[0]);
- lbi[1] = ip4_fib_mtrie_leaf_get_adj_index (leaf[1]);
+ ip4_fib_forwarding_lookup_x2 (
+ vnet_buffer (b[0])->ip.fib_index, vnet_buffer (b[1])->ip.fib_index,
+ &ip[0]->src_address, &ip[1]->src_address, &lbi[0], &lbi[1]);
vnet_buffer (b[0])->ip.adj_index[VLIB_RX] =
vnet_buffer (b[0])->ip.adj_index[VLIB_TX];
@@ -1668,6 +1673,7 @@ ip4_local_check_src_x2 (vlib_buffer_t ** b, ip4_header_t ** ip,
last_check->lbi = lbi[1];
last_check->error = error[1];
last_check->first = 0;
+ last_check->fib_index = vnet_buffer (b[1])->ip.fib_index;
}
else
{
@@ -1718,9 +1724,9 @@ ip4_local_classify (vlib_buffer_t * b, ip4_header_t * ip, u16 * next)
}
static inline uword
-ip4_local_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, int head_of_feature_arc)
+ip4_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, int head_of_feature_arc,
+ int is_receive_dpo)
{
u32 *from, n_left_from;
vlib_node_runtime_t *error_node =
@@ -1737,10 +1743,11 @@ ip4_local_inline (vlib_main_t * vm,
* member to make sure the .lbi is initialised for the first
* packet.
*/
- .src = {.as_u32 = 0},
+ .src = { .as_u32 = 0 },
.lbi = ~0,
.error = IP4_ERROR_UNKNOWN_PROTOCOL,
.first = 1,
+ .fib_index = 0,
};
from = vlib_frame_vector_args (frame);
@@ -1785,19 +1792,21 @@ ip4_local_inline (vlib_main_t * vm,
if (PREDICT_TRUE (not_batch == 0))
{
ip4_local_check_l4_csum_x2 (vm, b, ip, error);
- ip4_local_check_src_x2 (b, ip, &last_check, error);
+ ip4_local_check_src_x2 (b, ip, &last_check, error, is_receive_dpo);
}
else
{
if (!pt[0])
{
ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
- ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
+ ip4_local_check_src (b[0], ip[0], &last_check, &error[0],
+ is_receive_dpo);
}
if (!pt[1])
{
ip4_local_check_l4_csum (vm, b[1], ip[1], &error[1]);
- ip4_local_check_src (b[1], ip[1], &last_check, &error[1]);
+ ip4_local_check_src (b[1], ip[1], &last_check, &error[1],
+ is_receive_dpo);
}
}
@@ -1825,7 +1834,8 @@ ip4_local_inline (vlib_main_t * vm,
goto skip_check;
ip4_local_check_l4_csum (vm, b[0], ip[0], &error[0]);
- ip4_local_check_src (b[0], ip[0], &last_check, &error[0]);
+ ip4_local_check_src (b[0], ip[0], &last_check, &error[0],
+ is_receive_dpo);
skip_check:
@@ -1844,17 +1854,17 @@ ip4_local_inline (vlib_main_t * vm,
VLIB_NODE_FN (ip4_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */ );
+ return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */,
+ 0 /* is_receive_dpo */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_local_node) =
{
.name = "ip4-local",
.vector_size = sizeof (u32),
.format_trace = format_ip4_forward_next_trace,
.n_errors = IP4_N_ERROR,
- .error_strings = ip4_error_strings,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP_LOCAL_N_NEXT,
.next_nodes =
{
@@ -1862,20 +1872,32 @@ VLIB_REGISTER_NODE (ip4_local_node) =
[IP_LOCAL_NEXT_PUNT] = "ip4-punt",
[IP_LOCAL_NEXT_UDP_LOOKUP] = "ip4-udp-lookup",
[IP_LOCAL_NEXT_ICMP] = "ip4-icmp-input",
- [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-full-reassembly",
+ [IP_LOCAL_NEXT_REASSEMBLY] = "ip4-local-full-reassembly",
},
};
-/* *INDENT-ON* */
+VLIB_NODE_FN (ip4_receive_local_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip4_local_inline (vm, node, frame, 1 /* head of feature arc */,
+ 1 /* is_receive_dpo */);
+}
+
+VLIB_REGISTER_NODE (ip4_receive_local_node) = {
+ .name = "ip4-receive",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip4_forward_next_trace,
+ .sibling_of = "ip4-local"
+};
VLIB_NODE_FN (ip4_local_end_of_arc_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */ );
+ return ip4_local_inline (vm, node, frame, 0 /* head of feature arc */,
+ 0 /* is_receive_dpo */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_local_end_of_arc_node) = {
.name = "ip4-local-end-of-arc",
.vector_size = sizeof (u32),
@@ -1889,7 +1911,6 @@ VNET_FEATURE_INIT (ip4_local_end_of_arc, static) = {
.node_name = "ip4-local-end-of-arc",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
void
@@ -1952,14 +1973,12 @@ show_ip_local_command_fn (vlib_main_t * vm,
* 47
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip_local, static) =
{
.path = "show ip local",
.function = show_ip_local_command_fn,
.short_help = "show ip local",
};
-/* *INDENT-ON* */
typedef enum
{
@@ -2026,7 +2045,9 @@ ip4_ttl_inc (vlib_buffer_t * b, ip4_header_t * ip)
ttl += 1;
ip->ttl = ttl;
- ASSERT (ip4_header_checksum_is_valid (ip));
+ ASSERT (ip4_header_checksum_is_valid (ip) ||
+ (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) ||
+ (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM));
}
/* Decrement TTL & update checksum.
@@ -2068,7 +2089,8 @@ ip4_ttl_and_checksum_check (vlib_buffer_t * b, ip4_header_t * ip, u16 * next,
/* Verify checksum. */
ASSERT (ip4_header_checksum_is_valid (ip) ||
- (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM));
+ (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM) ||
+ (vnet_buffer (b)->oflags & VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM));
}
always_inline uword
@@ -2203,9 +2225,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
adj0->ia_cfg_index);
next[0] = next_index;
- if (is_midchain)
- vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
- 0 /* is_ip6 */ );
}
else
{
@@ -2228,9 +2247,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
&next_index, b[1],
adj1->ia_cfg_index);
next[1] = next_index;
- if (is_midchain)
- vnet_calc_checksums_inline (vm, b[1], 1 /* is_ip4 */ ,
- 0 /* is_ip6 */ );
}
else
{
@@ -2380,9 +2396,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (is_midchain)
{
- vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
- 0 /* is_ip6 */ );
-
/* Guess we are only writing on ipv4 header. */
vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
}
@@ -2486,10 +2499,6 @@ ip4_rewrite_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (is_midchain)
{
- /* this acts on the packet that is about to be encapped */
- vnet_calc_checksums_inline (vm, b[0], 1 /* is_ip4 */ ,
- 0 /* is_ip6 */ );
-
/* Guess we are only writing on ipv4 header. */
vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip4_header_t));
}
@@ -2616,7 +2625,6 @@ VLIB_NODE_FN (ip4_mcast_midchain_node) (vlib_main_t * vm,
return ip4_rewrite_inline (vm, node, frame, 0, 1, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_rewrite_node) = {
.name = "ip4-rewrite",
.vector_size = sizeof (u32),
@@ -2661,104 +2669,6 @@ VLIB_REGISTER_NODE (ip4_midchain_node) = {
.format_trace = format_ip4_rewrite_trace,
.sibling_of = "ip4-rewrite",
};
-/* *INDENT-ON */
-
-static int
-ip4_lookup_validate (ip4_address_t * a, u32 fib_index0)
-{
- ip4_fib_mtrie_t *mtrie0;
- ip4_fib_mtrie_leaf_t leaf0;
- u32 lbi0;
-
- mtrie0 = &ip4_fib_get (fib_index0)->mtrie;
-
- leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, a);
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 2);
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, a, 3);
-
- lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
-
- return lbi0 == ip4_fib_table_lookup_lb (ip4_fib_get (fib_index0), a);
-}
-
-static clib_error_t *
-test_lookup_command_fn (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- ip4_fib_t *fib;
- u32 table_id = 0;
- f64 count = 1;
- u32 n;
- int i;
- ip4_address_t ip4_base_address;
- u64 errors = 0;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "table %d", &table_id))
- {
- /* Make sure the entry exists. */
- fib = ip4_fib_get (table_id);
- if ((fib) && (fib->index != table_id))
- return clib_error_return (0, "<fib-index> %d does not exist",
- table_id);
- }
- else if (unformat (input, "count %f", &count))
- ;
-
- else if (unformat (input, "%U",
- unformat_ip4_address, &ip4_base_address))
- ;
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
-
- n = count;
-
- for (i = 0; i < n; i++)
- {
- if (!ip4_lookup_validate (&ip4_base_address, table_id))
- errors++;
-
- ip4_base_address.as_u32 =
- clib_host_to_net_u32 (1 +
- clib_net_to_host_u32 (ip4_base_address.as_u32));
- }
-
- if (errors)
- vlib_cli_output (vm, "%llu errors out of %d lookups\n", errors, n);
- else
- vlib_cli_output (vm, "No errors in %d lookups\n", n);
-
- return 0;
-}
-
-/*?
- * Perform a lookup of an IPv4 Address (or range of addresses) in the
- * given FIB table to determine if there is a conflict with the
- * adjacency table. The fib-id can be determined by using the
- * '<em>show ip fib</em>' command. If fib-id is not entered, default value
- * of 0 is used.
- *
- * @todo This command uses fib-id, other commands use table-id (not
- * just a name, they are different indexes). Would like to change this
- * to table-id for consistency.
- *
- * @cliexpar
- * Example of how to run the test lookup command:
- * @cliexstart{test lookup 172.16.1.1 table 1 count 2}
- * No errors in 2 lookups
- * @cliexend
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (lookup_test_command, static) =
-{
- .path = "test lookup",
- .short_help = "test lookup <ipv4-addr> [table <fib-id>] [count <nn>]",
- .function = test_lookup_command_fn,
-};
-/* *INDENT-ON* */
static clib_error_t *
set_ip_flow_hash_command_fn (vlib_main_t * vm,
@@ -2890,15 +2800,12 @@ set_ip_flow_hash_command_fn (vlib_main_t * vm,
* [0] [@0]: dpo-drop ip6
* @cliexend
?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) =
-{
+VLIB_CLI_COMMAND (set_ip_flow_hash_command, static) = {
.path = "set ip flow-hash",
- .short_help =
- "set ip flow-hash table <table-id> [src] [dst] [sport] [dport] [proto] [reverse]",
+ .short_help = "set ip flow-hash table <table-id> [src] [dst] [sport] "
+ "[dport] [proto] [reverse] [gtpv1teid]",
.function = set_ip_flow_hash_command_fn,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -3015,7 +2922,6 @@ set_ip_classify_command_fn (vlib_main_t * vm,
* Example of how to assign a classification table to an interface:
* @cliexcmd{set ip classify intfc GigabitEthernet2/0/0 table-index 1}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ip_classify_command, static) =
{
.path = "set ip classify",
@@ -3023,7 +2929,6 @@ VLIB_CLI_COMMAND (set_ip_classify_command, static) =
"set ip classify intfc <interface> table-index <classify-idx>",
.function = set_ip_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip4_forward.h b/src/vnet/ip/ip4_forward.h
index 8779d2ded6b..54150d4dab4 100644
--- a/src/vnet/ip/ip4_forward.h
+++ b/src/vnet/ip/ip4_forward.h
@@ -74,8 +74,6 @@ ip4_lookup_inline (vlib_main_t * vm,
{
ip4_header_t *ip0, *ip1, *ip2, *ip3;
const load_balance_t *lb0, *lb1, *lb2, *lb3;
- ip4_fib_mtrie_t *mtrie0, *mtrie1, *mtrie2, *mtrie3;
- ip4_fib_mtrie_leaf_t leaf0, leaf1, leaf2, leaf3;
ip4_address_t *dst_addr0, *dst_addr1, *dst_addr2, *dst_addr3;
u32 lb_index0, lb_index1, lb_index2, lb_index3;
flow_hash_config_t flow_hash_config0, flow_hash_config1;
@@ -112,30 +110,11 @@ ip4_lookup_inline (vlib_main_t * vm,
ip_lookup_set_buffer_fib_index (im->fib_index_by_sw_if_index, b[2]);
ip_lookup_set_buffer_fib_index (im->fib_index_by_sw_if_index, b[3]);
- mtrie0 = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
- mtrie1 = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
- mtrie2 = &ip4_fib_get (vnet_buffer (b[2])->ip.fib_index)->mtrie;
- mtrie3 = &ip4_fib_get (vnet_buffer (b[3])->ip.fib_index)->mtrie;
-
- leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
- leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
- leaf2 = ip4_fib_mtrie_lookup_step_one (mtrie2, dst_addr2);
- leaf3 = ip4_fib_mtrie_lookup_step_one (mtrie3, dst_addr3);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
- leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 2);
- leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 2);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
- leaf2 = ip4_fib_mtrie_lookup_step (mtrie2, leaf2, dst_addr2, 3);
- leaf3 = ip4_fib_mtrie_lookup_step (mtrie3, leaf3, dst_addr3, 3);
-
- lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
- lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
- lb_index2 = ip4_fib_mtrie_leaf_get_adj_index (leaf2);
- lb_index3 = ip4_fib_mtrie_leaf_get_adj_index (leaf3);
+ ip4_fib_forwarding_lookup_x4 (
+ vnet_buffer (b[0])->ip.fib_index, vnet_buffer (b[1])->ip.fib_index,
+ vnet_buffer (b[2])->ip.fib_index, vnet_buffer (b[3])->ip.fib_index,
+ dst_addr0, dst_addr1, dst_addr2, dst_addr3, &lb_index0, &lb_index1,
+ &lb_index2, &lb_index3);
ASSERT (lb_index0 && lb_index1 && lb_index2 && lb_index3);
lb0 = load_balance_get (lb_index0);
@@ -245,8 +224,6 @@ ip4_lookup_inline (vlib_main_t * vm,
{
ip4_header_t *ip0, *ip1;
const load_balance_t *lb0, *lb1;
- ip4_fib_mtrie_t *mtrie0, *mtrie1;
- ip4_fib_mtrie_leaf_t leaf0, leaf1;
ip4_address_t *dst_addr0, *dst_addr1;
u32 lb_index0, lb_index1;
flow_hash_config_t flow_hash_config0, flow_hash_config1;
@@ -271,20 +248,9 @@ ip4_lookup_inline (vlib_main_t * vm,
ip_lookup_set_buffer_fib_index (im->fib_index_by_sw_if_index, b[0]);
ip_lookup_set_buffer_fib_index (im->fib_index_by_sw_if_index, b[1]);
- mtrie0 = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
- mtrie1 = &ip4_fib_get (vnet_buffer (b[1])->ip.fib_index)->mtrie;
-
- leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
- leaf1 = ip4_fib_mtrie_lookup_step_one (mtrie1, dst_addr1);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 2);
-
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
- leaf1 = ip4_fib_mtrie_lookup_step (mtrie1, leaf1, dst_addr1, 3);
-
- lb_index0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
- lb_index1 = ip4_fib_mtrie_leaf_get_adj_index (leaf1);
+ ip4_fib_forwarding_lookup_x2 (
+ vnet_buffer (b[0])->ip.fib_index, vnet_buffer (b[1])->ip.fib_index,
+ dst_addr0, dst_addr1, &lb_index0, &lb_index1);
ASSERT (lb_index0 && lb_index1);
lb0 = load_balance_get (lb_index0);
@@ -348,8 +314,6 @@ ip4_lookup_inline (vlib_main_t * vm,
{
ip4_header_t *ip0;
const load_balance_t *lb0;
- ip4_fib_mtrie_t *mtrie0;
- ip4_fib_mtrie_leaf_t leaf0;
ip4_address_t *dst_addr0;
u32 lbi0;
flow_hash_config_t flow_hash_config0;
@@ -360,11 +324,8 @@ ip4_lookup_inline (vlib_main_t * vm,
dst_addr0 = &ip0->dst_address;
ip_lookup_set_buffer_fib_index (im->fib_index_by_sw_if_index, b[0]);
- mtrie0 = &ip4_fib_get (vnet_buffer (b[0])->ip.fib_index)->mtrie;
- leaf0 = ip4_fib_mtrie_lookup_step_one (mtrie0, dst_addr0);
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 2);
- leaf0 = ip4_fib_mtrie_lookup_step (mtrie0, leaf0, dst_addr0, 3);
- lbi0 = ip4_fib_mtrie_leaf_get_adj_index (leaf0);
+ lbi0 = ip4_fib_forwarding_lookup (vnet_buffer (b[0])->ip.fib_index,
+ dst_addr0);
ASSERT (lbi0);
lb0 = load_balance_get (lbi0);
diff --git a/src/vnet/ip/ip4_inlines.h b/src/vnet/ip/ip4_inlines.h
index 00a47125b8a..b4fcebc9896 100644
--- a/src/vnet/ip/ip4_inlines.h
+++ b/src/vnet/ip/ip4_inlines.h
@@ -42,6 +42,8 @@
#include <vnet/ip/ip_flow_hash.h>
#include <vnet/ip/ip4_packet.h>
+#include <vnet/tcp/tcp_packet.h>
+#include <vnet/udp/udp_packet.h>
#define IP_DF 0x4000 /* don't fragment */
@@ -52,9 +54,11 @@ ip4_compute_flow_hash (const ip4_header_t * ip,
flow_hash_config_t flow_hash_config)
{
tcp_header_t *tcp = (void *) (ip + 1);
+ udp_header_t *udp = (void *) (ip + 1);
+ gtpv1u_header_t *gtpu = (void *) (udp + 1);
u32 a, b, c, t1, t2;
- uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP
- || ip->protocol == IP_PROTOCOL_UDP);
+ uword is_udp = ip->protocol == IP_PROTOCOL_UDP;
+ uword is_tcp_udp = (ip->protocol == IP_PROTOCOL_TCP || is_udp);
t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR)
? ip->src_address.data_u32 : 0;
@@ -89,6 +93,13 @@ ip4_compute_flow_hash (const ip4_header_t * ip,
b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? ip->protocol : 0;
c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ?
(t1 << 16) | t2 : (t2 << 16) | t1;
+ if (PREDICT_TRUE (is_udp) &&
+ PREDICT_FALSE ((flow_hash_config & IP_FLOW_HASH_GTPV1_TEID) &&
+ udp->dst_port == GTPV1_PORT_BE))
+ {
+ t1 = gtpu->teid;
+ c ^= t1;
+ }
a ^= ip_flow_hash_router_id;
hash_v3_mix32 (a, b, c);
@@ -98,9 +109,9 @@ ip4_compute_flow_hash (const ip4_header_t * ip,
}
always_inline void *
-vlib_buffer_push_ip4_custom (vlib_main_t * vm, vlib_buffer_t * b,
- ip4_address_t * src, ip4_address_t * dst,
- int proto, u8 csum_offload, u8 is_df)
+vlib_buffer_push_ip4_custom (vlib_main_t *vm, vlib_buffer_t *b,
+ ip4_address_t *src, ip4_address_t *dst, int proto,
+ u8 csum_offload, u8 is_df, u8 dscp)
{
ip4_header_t *ih;
@@ -108,7 +119,8 @@ vlib_buffer_push_ip4_custom (vlib_main_t * vm, vlib_buffer_t * b,
ih = vlib_buffer_push_uninit (b, sizeof (ip4_header_t));
ih->ip_version_and_header_length = 0x45;
- ih->tos = 0;
+ ip4_header_set_dscp (ih, dscp);
+ ip4_header_set_ecn (ih, 0);
ih->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
/* No fragments */
@@ -152,7 +164,7 @@ vlib_buffer_push_ip4 (vlib_main_t * vm, vlib_buffer_t * b,
u8 csum_offload)
{
return vlib_buffer_push_ip4_custom (vm, b, src, dst, proto, csum_offload,
- 1 /* is_df */ );
+ 1 /* is_df */, 0);
}
#endif /* included_ip_ip4_inlines_h */
diff --git a/src/vnet/ip/ip4_input.c b/src/vnet/ip/ip4_input.c
index 3b3edf9fca7..106d17da3cb 100644
--- a/src/vnet/ip/ip4_input.c
+++ b/src/vnet/ip/ip4_input.c
@@ -374,22 +374,13 @@ VLIB_NODE_FN (ip4_input_no_checksum_node) (vlib_main_t * vm,
return ip4_input_inline (vm, node, frame, /* verify_checksum */ 0);
}
-#ifndef CLIB_MARCH_VARIANT
-char *ip4_error_strings[] = {
-#define _(sym,string) string,
- foreach_ip4_error
-#undef _
-};
-#endif
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_input_node) = {
.name = "ip4-input",
.vector_size = sizeof (u32),
.protocol_hint = VLIB_NODE_PROTO_HINT_IP4,
.n_errors = IP4_N_ERROR,
- .error_strings = ip4_error_strings,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_INPUT_N_NEXT,
.next_nodes = {
@@ -399,7 +390,6 @@ VLIB_REGISTER_NODE (ip4_input_node) = {
[IP4_INPUT_NEXT_LOOKUP] = "ip4-lookup",
[IP4_INPUT_NEXT_LOOKUP_MULTICAST] = "ip4-mfib-forward-lookup",
[IP4_INPUT_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [IP4_INPUT_NEXT_REASSEMBLY] = "ip4-full-reassembly",
},
.format_buffer = format_ip4_header,
@@ -414,7 +404,6 @@ VLIB_REGISTER_NODE (ip4_input_no_checksum_node) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_input_trace,
};
-/* *INDENT-ON* */
static clib_error_t *
ip4_init (vlib_main_t * vm)
diff --git a/src/vnet/ip/ip4_input.h b/src/vnet/ip/ip4_input.h
index 383ef31758c..d2ed13fa35f 100644
--- a/src/vnet/ip/ip4_input.h
+++ b/src/vnet/ip/ip4_input.h
@@ -42,6 +42,7 @@
#include <vnet/ip/ip.h>
#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/vector/ip_csum.h>
typedef enum
{
@@ -51,7 +52,6 @@ typedef enum
IP4_INPUT_NEXT_LOOKUP,
IP4_INPUT_NEXT_LOOKUP_MULTICAST,
IP4_INPUT_NEXT_ICMP_ERROR,
- IP4_INPUT_NEXT_REASSEMBLY,
IP4_INPUT_N_NEXT,
} ip4_input_next_t;
@@ -60,18 +60,21 @@ check_ver_opt_csum (ip4_header_t * ip, u8 * error, int verify_checksum)
{
if (PREDICT_FALSE (ip->ip_version_and_header_length != 0x45))
{
- if ((ip->ip_version_and_header_length & 0xf) != 5)
+ if ((ip->ip_version_and_header_length & 0xf0) != 0x40)
+ *error = IP4_ERROR_VERSION;
+ else if ((ip->ip_version_and_header_length & 0x0f) < 5)
+ *error = IP4_ERROR_HDR_TOO_SHORT;
+ else
{
*error = IP4_ERROR_OPTIONS;
- if (verify_checksum && ip_csum (ip, ip4_header_bytes (ip)) != 0)
+ if (verify_checksum &&
+ clib_ip_csum ((u8 *) ip, ip4_header_bytes (ip)) != 0)
*error = IP4_ERROR_BAD_CHECKSUM;
}
- else
- *error = IP4_ERROR_VERSION;
}
- else
- if (PREDICT_FALSE (verify_checksum &&
- ip_csum (ip, sizeof (ip4_header_t)) != 0))
+ else if (PREDICT_FALSE (verify_checksum &&
+ clib_ip_csum ((u8 *) ip, sizeof (ip4_header_t)) !=
+ 0))
*error = IP4_ERROR_BAD_CHECKSUM;
}
diff --git a/src/vnet/ip/ip4_mtrie.c b/src/vnet/ip/ip4_mtrie.c
index 7bfcf986e6f..00855f7db43 100644
--- a/src/vnet/ip/ip4_mtrie.c
+++ b/src/vnet/ip/ip4_mtrie.c
@@ -45,10 +45,10 @@
/**
* Global pool of IPv4 8bit PLYs
*/
-ip4_fib_mtrie_8_ply_t *ip4_ply_pool;
+ip4_mtrie_8_ply_t *ip4_ply_pool;
always_inline u32
-ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_8_ply_t * p, u8 dst_byte)
+ip4_mtrie_leaf_is_non_empty (ip4_mtrie_8_ply_t *p, u8 dst_byte)
{
/*
* It's 'non-empty' if the length of the leaf stored is greater than the
@@ -60,140 +60,91 @@ ip4_fib_mtrie_leaf_is_non_empty (ip4_fib_mtrie_8_ply_t * p, u8 dst_byte)
return (0);
}
-always_inline ip4_fib_mtrie_leaf_t
-ip4_fib_mtrie_leaf_set_adj_index (u32 adj_index)
+always_inline ip4_mtrie_leaf_t
+ip4_mtrie_leaf_set_adj_index (u32 adj_index)
{
- ip4_fib_mtrie_leaf_t l;
+ ip4_mtrie_leaf_t l;
l = 1 + 2 * adj_index;
- ASSERT (ip4_fib_mtrie_leaf_get_adj_index (l) == adj_index);
+ ASSERT (ip4_mtrie_leaf_get_adj_index (l) == adj_index);
return l;
}
always_inline u32
-ip4_fib_mtrie_leaf_is_next_ply (ip4_fib_mtrie_leaf_t n)
+ip4_mtrie_leaf_is_next_ply (ip4_mtrie_leaf_t n)
{
return (n & 1) == 0;
}
always_inline u32
-ip4_fib_mtrie_leaf_get_next_ply_index (ip4_fib_mtrie_leaf_t n)
+ip4_mtrie_leaf_get_next_ply_index (ip4_mtrie_leaf_t n)
{
- ASSERT (ip4_fib_mtrie_leaf_is_next_ply (n));
+ ASSERT (ip4_mtrie_leaf_is_next_ply (n));
return n >> 1;
}
-always_inline ip4_fib_mtrie_leaf_t
-ip4_fib_mtrie_leaf_set_next_ply_index (u32 i)
+always_inline ip4_mtrie_leaf_t
+ip4_mtrie_leaf_set_next_ply_index (u32 i)
{
- ip4_fib_mtrie_leaf_t l;
+ ip4_mtrie_leaf_t l;
l = 0 + 2 * i;
- ASSERT (ip4_fib_mtrie_leaf_get_next_ply_index (l) == i);
+ ASSERT (ip4_mtrie_leaf_get_next_ply_index (l) == i);
return l;
}
-#ifndef __ALTIVEC__
-#define PLY_X4_SPLAT_INIT(init_x4, init) \
- init_x4 = u32x4_splat (init);
-#else
-#define PLY_X4_SPLAT_INIT(init_x4, init) \
-{ \
- u32x4_union_t y; \
- y.as_u32[0] = init; \
- y.as_u32[1] = init; \
- y.as_u32[2] = init; \
- y.as_u32[3] = init; \
- init_x4 = y.as_u32x4; \
-}
-#endif
-
-#ifdef CLIB_HAVE_VEC128
-#define PLY_INIT_LEAVES(p) \
-{ \
- u32x4 *l, init_x4; \
- \
- PLY_X4_SPLAT_INIT(init_x4, init); \
- for (l = p->leaves_as_u32x4; \
- l < p->leaves_as_u32x4 + ARRAY_LEN (p->leaves_as_u32x4); \
- l += 4) \
- { \
- l[0] = init_x4; \
- l[1] = init_x4; \
- l[2] = init_x4; \
- l[3] = init_x4; \
- } \
-}
-#else
-#define PLY_INIT_LEAVES(p) \
-{ \
- u32 *l; \
- \
- for (l = p->leaves; l < p->leaves + ARRAY_LEN (p->leaves); l += 4) \
- { \
- l[0] = init; \
- l[1] = init; \
- l[2] = init; \
- l[3] = init; \
- } \
-}
-#endif
-
-#define PLY_INIT(p, init, prefix_len, ply_base_len) \
-{ \
- /* \
- * A leaf is 'empty' if it represents a leaf from the covering PLY \
- * i.e. if the prefix length of the leaf is less than or equal to \
- * the prefix length of the PLY \
- */ \
- p->n_non_empty_leafs = (prefix_len > ply_base_len ? \
- ARRAY_LEN (p->leaves) : 0); \
- clib_memset (p->dst_address_bits_of_leaves, prefix_len, \
- sizeof (p->dst_address_bits_of_leaves)); \
- p->dst_address_bits_base = ply_base_len; \
- \
- /* Initialize leaves. */ \
- PLY_INIT_LEAVES(p); \
-}
-
static void
-ply_8_init (ip4_fib_mtrie_8_ply_t * p,
- ip4_fib_mtrie_leaf_t init, uword prefix_len, u32 ply_base_len)
+ply_8_init (ip4_mtrie_8_ply_t *p, ip4_mtrie_leaf_t init, uword prefix_len,
+ u32 ply_base_len)
{
- PLY_INIT (p, init, prefix_len, ply_base_len);
+ p->n_non_empty_leafs = prefix_len > ply_base_len ? ARRAY_LEN (p->leaves) : 0;
+ clib_memset_u8 (p->dst_address_bits_of_leaves, prefix_len,
+ sizeof (p->dst_address_bits_of_leaves));
+ p->dst_address_bits_base = ply_base_len;
+
+ clib_memset_u32 (p->leaves, init, ARRAY_LEN (p->leaves));
}
static void
-ply_16_init (ip4_fib_mtrie_16_ply_t * p,
- ip4_fib_mtrie_leaf_t init, uword prefix_len)
+ply_16_init (ip4_mtrie_16_ply_t *p, ip4_mtrie_leaf_t init, uword prefix_len)
{
- clib_memset (p->dst_address_bits_of_leaves, prefix_len,
- sizeof (p->dst_address_bits_of_leaves));
- PLY_INIT_LEAVES (p);
+ clib_memset_u8 (p->dst_address_bits_of_leaves, prefix_len,
+ sizeof (p->dst_address_bits_of_leaves));
+ clib_memset_u32 (p->leaves, init, ARRAY_LEN (p->leaves));
}
-static ip4_fib_mtrie_leaf_t
-ply_create (ip4_fib_mtrie_t * m,
- ip4_fib_mtrie_leaf_t init_leaf,
- u32 leaf_prefix_len, u32 ply_base_len)
+static ip4_mtrie_leaf_t
+ply_create (ip4_mtrie_leaf_t init_leaf, u32 leaf_prefix_len, u32 ply_base_len)
{
- ip4_fib_mtrie_8_ply_t *p;
- /* Get cache aligned ply. */
+ ip4_mtrie_8_ply_t *p;
+ ip4_mtrie_leaf_t l;
+ u8 need_barrier_sync = pool_get_will_expand (ip4_ply_pool);
+ vlib_main_t *vm = vlib_get_main ();
+ ASSERT (vm->thread_index == 0);
+ if (need_barrier_sync)
+ vlib_worker_thread_barrier_sync (vm);
+
+ /* Get cache aligned ply. */
pool_get_aligned (ip4_ply_pool, p, CLIB_CACHE_LINE_BYTES);
ply_8_init (p, init_leaf, leaf_prefix_len, ply_base_len);
- return ip4_fib_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool);
+ l = ip4_mtrie_leaf_set_next_ply_index (p - ip4_ply_pool);
+
+ if (need_barrier_sync)
+ vlib_worker_thread_barrier_release (vm);
+
+ return l;
}
-always_inline ip4_fib_mtrie_8_ply_t *
-get_next_ply_for_leaf (ip4_fib_mtrie_t * m, ip4_fib_mtrie_leaf_t l)
+always_inline ip4_mtrie_8_ply_t *
+get_next_ply_for_leaf (ip4_mtrie_leaf_t l)
{
- uword n = ip4_fib_mtrie_leaf_get_next_ply_index (l);
+ uword n = ip4_mtrie_leaf_get_next_ply_index (l);
return pool_elt_at_index (ip4_ply_pool, n);
}
void
-ip4_mtrie_free (ip4_fib_mtrie_t * m)
+ip4_mtrie_16_free (ip4_mtrie_16_t *m)
{
/* the root ply is embedded so there is nothing to do,
* the assumption being that the IP4 FIB table has emptied the trie
@@ -203,15 +154,46 @@ ip4_mtrie_free (ip4_fib_mtrie_t * m)
int i;
for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
{
- ASSERT (!ip4_fib_mtrie_leaf_is_next_ply (m->root_ply.leaves[i]));
+ ASSERT (!ip4_mtrie_leaf_is_next_ply (m->root_ply.leaves[i]));
+ }
+#endif
+}
+
+void
+ip4_mtrie_16_init (ip4_mtrie_16_t *m)
+{
+ ply_16_init (&m->root_ply, IP4_MTRIE_LEAF_EMPTY, 0);
+}
+
+void
+ip4_mtrie_8_free (ip4_mtrie_8_t *m)
+{
+ /* the root ply is embedded so there is nothing to do,
+ * the assumption being that the IP4 FIB table has emptied the trie
+ * before deletion.
+ */
+ ip4_mtrie_8_ply_t *root = pool_elt_at_index (ip4_ply_pool, m->root_ply);
+
+#if CLIB_DEBUG > 0
+ int i;
+ for (i = 0; i < ARRAY_LEN (root->leaves); i++)
+ {
+ ASSERT (!ip4_mtrie_leaf_is_next_ply (root->leaves[i]));
}
#endif
+
+ pool_put (ip4_ply_pool, root);
}
void
-ip4_mtrie_init (ip4_fib_mtrie_t * m)
+ip4_mtrie_8_init (ip4_mtrie_8_t *m)
{
- ply_16_init (&m->root_ply, IP4_FIB_MTRIE_LEAF_EMPTY, 0);
+ ip4_mtrie_8_ply_t *root;
+
+ pool_get (ip4_ply_pool, root);
+ m->root_ply = root - ip4_ply_pool;
+
+ ply_8_init (root, IP4_MTRIE_LEAF_EMPTY, 0, 0);
}
typedef struct
@@ -221,29 +203,27 @@ typedef struct
u32 adj_index;
u32 cover_address_length;
u32 cover_adj_index;
-} ip4_fib_mtrie_set_unset_leaf_args_t;
+} ip4_mtrie_set_unset_leaf_args_t;
static void
-set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
- ip4_fib_mtrie_8_ply_t * ply,
- ip4_fib_mtrie_leaf_t new_leaf,
+set_ply_with_more_specific_leaf (ip4_mtrie_8_ply_t *ply,
+ ip4_mtrie_leaf_t new_leaf,
uword new_leaf_dst_address_bits)
{
- ip4_fib_mtrie_leaf_t old_leaf;
+ ip4_mtrie_leaf_t old_leaf;
uword i;
- ASSERT (ip4_fib_mtrie_leaf_is_terminal (new_leaf));
+ ASSERT (ip4_mtrie_leaf_is_terminal (new_leaf));
for (i = 0; i < ARRAY_LEN (ply->leaves); i++)
{
old_leaf = ply->leaves[i];
/* Recurse into sub plies. */
- if (!ip4_fib_mtrie_leaf_is_terminal (old_leaf))
+ if (!ip4_mtrie_leaf_is_terminal (old_leaf))
{
- ip4_fib_mtrie_8_ply_t *sub_ply =
- get_next_ply_for_leaf (m, old_leaf);
- set_ply_with_more_specific_leaf (m, sub_ply, new_leaf,
+ ip4_mtrie_8_ply_t *sub_ply = get_next_ply_for_leaf (old_leaf);
+ set_ply_with_more_specific_leaf (sub_ply, new_leaf,
new_leaf_dst_address_bits);
}
@@ -253,20 +233,19 @@ set_ply_with_more_specific_leaf (ip4_fib_mtrie_t * m,
{
clib_atomic_store_rel_n (&ply->leaves[i], new_leaf);
ply->dst_address_bits_of_leaves[i] = new_leaf_dst_address_bits;
- ply->n_non_empty_leafs += ip4_fib_mtrie_leaf_is_non_empty (ply, i);
+ ply->n_non_empty_leafs += ip4_mtrie_leaf_is_non_empty (ply, i);
}
}
}
static void
-set_leaf (ip4_fib_mtrie_t * m,
- const ip4_fib_mtrie_set_unset_leaf_args_t * a,
- u32 old_ply_index, u32 dst_address_byte_index)
+set_leaf (const ip4_mtrie_set_unset_leaf_args_t *a, u32 old_ply_index,
+ u32 dst_address_byte_index)
{
- ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
+ ip4_mtrie_leaf_t old_leaf, new_leaf;
i32 n_dst_bits_next_plies;
u8 dst_byte;
- ip4_fib_mtrie_8_ply_t *old_ply;
+ ip4_mtrie_8_ply_t *old_ply;
old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
@@ -295,30 +274,30 @@ set_leaf (ip4_fib_mtrie_t * m,
* fill the buckets/slots of the ply */
for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
{
- ip4_fib_mtrie_8_ply_t *new_ply;
+ ip4_mtrie_8_ply_t *new_ply;
old_leaf = old_ply->leaves[i];
- old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+ old_leaf_is_terminal = ip4_mtrie_leaf_is_terminal (old_leaf);
if (a->dst_address_length >= old_ply->dst_address_bits_of_leaves[i])
{
/* The new leaf is more or equally specific than the one currently
* occupying the slot */
- new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+ new_leaf = ip4_mtrie_leaf_set_adj_index (a->adj_index);
if (old_leaf_is_terminal)
{
/* The current leaf is terminal, we can replace it with
* the new one */
old_ply->n_non_empty_leafs -=
- ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
+ ip4_mtrie_leaf_is_non_empty (old_ply, i);
old_ply->dst_address_bits_of_leaves[i] =
a->dst_address_length;
clib_atomic_store_rel_n (&old_ply->leaves[i], new_leaf);
old_ply->n_non_empty_leafs +=
- ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
+ ip4_mtrie_leaf_is_non_empty (old_ply, i);
ASSERT (old_ply->n_non_empty_leafs <=
ARRAY_LEN (old_ply->leaves));
}
@@ -326,8 +305,8 @@ set_leaf (ip4_fib_mtrie_t * m,
{
/* Existing leaf points to another ply. We need to place
* new_leaf into all more specific slots. */
- new_ply = get_next_ply_for_leaf (m, old_leaf);
- set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
+ new_ply = get_next_ply_for_leaf (old_leaf);
+ set_ply_with_more_specific_leaf (new_ply, new_leaf,
a->dst_address_length);
}
}
@@ -335,9 +314,8 @@ set_leaf (ip4_fib_mtrie_t * m,
{
/* The current leaf is less specific and not termial (i.e. a ply),
* recurse on down the trie */
- new_ply = get_next_ply_for_leaf (m, old_leaf);
- set_leaf (m, a, new_ply - ip4_ply_pool,
- dst_address_byte_index + 1);
+ new_ply = get_next_ply_for_leaf (old_leaf);
+ set_leaf (a, new_ply - ip4_ply_pool, dst_address_byte_index + 1);
}
/*
* else
@@ -350,24 +328,23 @@ set_leaf (ip4_fib_mtrie_t * m,
{
/* The address to insert requires us to move down at a lower level of
* the trie - recurse on down */
- ip4_fib_mtrie_8_ply_t *new_ply;
+ ip4_mtrie_8_ply_t *new_ply;
u8 ply_base_len;
ply_base_len = 8 * (dst_address_byte_index + 1);
old_leaf = old_ply->leaves[dst_byte];
- if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
+ if (ip4_mtrie_leaf_is_terminal (old_leaf))
{
/* There is a leaf occupying the slot. Replace it with a new ply */
old_ply->n_non_empty_leafs -=
- ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
+ ip4_mtrie_leaf_is_non_empty (old_ply, dst_byte);
- new_leaf =
- ply_create (m, old_leaf,
- old_ply->dst_address_bits_of_leaves[dst_byte],
- ply_base_len);
- new_ply = get_next_ply_for_leaf (m, new_leaf);
+ new_leaf = ply_create (old_leaf,
+ old_ply->dst_address_bits_of_leaves[dst_byte],
+ ply_base_len);
+ new_ply = get_next_ply_for_leaf (new_leaf);
/* Refetch since ply_create may move pool. */
old_ply = pool_elt_at_index (ip4_ply_pool, old_ply_index);
@@ -376,22 +353,21 @@ set_leaf (ip4_fib_mtrie_t * m,
old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
old_ply->n_non_empty_leafs +=
- ip4_fib_mtrie_leaf_is_non_empty (old_ply, dst_byte);
+ ip4_mtrie_leaf_is_non_empty (old_ply, dst_byte);
ASSERT (old_ply->n_non_empty_leafs >= 0);
}
else
- new_ply = get_next_ply_for_leaf (m, old_leaf);
+ new_ply = get_next_ply_for_leaf (old_leaf);
- set_leaf (m, a, new_ply - ip4_ply_pool, dst_address_byte_index + 1);
+ set_leaf (a, new_ply - ip4_ply_pool, dst_address_byte_index + 1);
}
}
static void
-set_root_leaf (ip4_fib_mtrie_t * m,
- const ip4_fib_mtrie_set_unset_leaf_args_t * a)
+set_root_leaf (ip4_mtrie_16_t *m, const ip4_mtrie_set_unset_leaf_args_t *a)
{
- ip4_fib_mtrie_leaf_t old_leaf, new_leaf;
- ip4_fib_mtrie_16_ply_t *old_ply;
+ ip4_mtrie_leaf_t old_leaf, new_leaf;
+ ip4_mtrie_16_ply_t *old_ply;
i32 n_dst_bits_next_plies;
u16 dst_byte;
@@ -420,7 +396,7 @@ set_root_leaf (ip4_fib_mtrie_t * m,
* fill the buckets/slots of the ply */
for (i = 0; i < (1 << n_dst_bits_this_ply); i++)
{
- ip4_fib_mtrie_8_ply_t *new_ply;
+ ip4_mtrie_8_ply_t *new_ply;
u16 slot;
slot = clib_net_to_host_u16 (dst_byte);
@@ -428,14 +404,14 @@ set_root_leaf (ip4_fib_mtrie_t * m,
slot = clib_host_to_net_u16 (slot);
old_leaf = old_ply->leaves[slot];
- old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+ old_leaf_is_terminal = ip4_mtrie_leaf_is_terminal (old_leaf);
if (a->dst_address_length >=
old_ply->dst_address_bits_of_leaves[slot])
{
/* The new leaf is more or equally specific than the one currently
* occupying the slot */
- new_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+ new_leaf = ip4_mtrie_leaf_set_adj_index (a->adj_index);
if (old_leaf_is_terminal)
{
@@ -449,8 +425,8 @@ set_root_leaf (ip4_fib_mtrie_t * m,
{
/* Existing leaf points to another ply. We need to place
* new_leaf into all more specific slots. */
- new_ply = get_next_ply_for_leaf (m, old_leaf);
- set_ply_with_more_specific_leaf (m, new_ply, new_leaf,
+ new_ply = get_next_ply_for_leaf (old_leaf);
+ set_ply_with_more_specific_leaf (new_ply, new_leaf,
a->dst_address_length);
}
}
@@ -458,8 +434,8 @@ set_root_leaf (ip4_fib_mtrie_t * m,
{
/* The current leaf is less specific and not termial (i.e. a ply),
* recurse on down the trie */
- new_ply = get_next_ply_for_leaf (m, old_leaf);
- set_leaf (m, a, new_ply - ip4_ply_pool, 2);
+ new_ply = get_next_ply_for_leaf (old_leaf);
+ set_leaf (a, new_ply - ip4_ply_pool, 2);
}
/*
* else
@@ -472,38 +448,36 @@ set_root_leaf (ip4_fib_mtrie_t * m,
{
/* The address to insert requires us to move down at a lower level of
* the trie - recurse on down */
- ip4_fib_mtrie_8_ply_t *new_ply;
+ ip4_mtrie_8_ply_t *new_ply;
u8 ply_base_len;
ply_base_len = 16;
old_leaf = old_ply->leaves[dst_byte];
- if (ip4_fib_mtrie_leaf_is_terminal (old_leaf))
+ if (ip4_mtrie_leaf_is_terminal (old_leaf))
{
/* There is a leaf occupying the slot. Replace it with a new ply */
- new_leaf =
- ply_create (m, old_leaf,
- old_ply->dst_address_bits_of_leaves[dst_byte],
- ply_base_len);
- new_ply = get_next_ply_for_leaf (m, new_leaf);
+ new_leaf = ply_create (old_leaf,
+ old_ply->dst_address_bits_of_leaves[dst_byte],
+ ply_base_len);
+ new_ply = get_next_ply_for_leaf (new_leaf);
clib_atomic_store_rel_n (&old_ply->leaves[dst_byte], new_leaf);
old_ply->dst_address_bits_of_leaves[dst_byte] = ply_base_len;
}
else
- new_ply = get_next_ply_for_leaf (m, old_leaf);
+ new_ply = get_next_ply_for_leaf (old_leaf);
- set_leaf (m, a, new_ply - ip4_ply_pool, 2);
+ set_leaf (a, new_ply - ip4_ply_pool, 2);
}
}
static uword
-unset_leaf (ip4_fib_mtrie_t * m,
- const ip4_fib_mtrie_set_unset_leaf_args_t * a,
- ip4_fib_mtrie_8_ply_t * old_ply, u32 dst_address_byte_index)
+unset_leaf (const ip4_mtrie_set_unset_leaf_args_t *a,
+ ip4_mtrie_8_ply_t *old_ply, u32 dst_address_byte_index)
{
- ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
+ ip4_mtrie_leaf_t old_leaf, del_leaf;
i32 n_dst_bits_next_plies;
i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
u8 dst_byte;
@@ -522,28 +496,28 @@ unset_leaf (ip4_fib_mtrie_t * m,
n_dst_bits_next_plies <= 0 ? -n_dst_bits_next_plies : 0;
n_dst_bits_this_ply = clib_min (8, n_dst_bits_this_ply);
- del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+ del_leaf = ip4_mtrie_leaf_set_adj_index (a->adj_index);
for (i = dst_byte; i < dst_byte + (1 << n_dst_bits_this_ply); i++)
{
old_leaf = old_ply->leaves[i];
- old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+ old_leaf_is_terminal = ip4_mtrie_leaf_is_terminal (old_leaf);
- if (old_leaf == del_leaf
- || (!old_leaf_is_terminal
- && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf),
- dst_address_byte_index + 1)))
+ if (old_leaf == del_leaf ||
+ (!old_leaf_is_terminal &&
+ unset_leaf (a, get_next_ply_for_leaf (old_leaf),
+ dst_address_byte_index + 1)))
{
old_ply->n_non_empty_leafs -=
- ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
+ ip4_mtrie_leaf_is_non_empty (old_ply, i);
- clib_atomic_store_rel_n (&old_ply->leaves[i],
- ip4_fib_mtrie_leaf_set_adj_index
- (a->cover_adj_index));
+ clib_atomic_store_rel_n (
+ &old_ply->leaves[i],
+ ip4_mtrie_leaf_set_adj_index (a->cover_adj_index));
old_ply->dst_address_bits_of_leaves[i] = a->cover_address_length;
old_ply->n_non_empty_leafs +=
- ip4_fib_mtrie_leaf_is_non_empty (old_ply, i);
+ ip4_mtrie_leaf_is_non_empty (old_ply, i);
ASSERT (old_ply->n_non_empty_leafs >= 0);
if (old_ply->n_non_empty_leafs == 0 && dst_address_byte_index > 0)
@@ -558,7 +532,7 @@ unset_leaf (ip4_fib_mtrie_t * m,
int ii, count = 0;
for (ii = 0; ii < ARRAY_LEN (old_ply->leaves); ii++)
{
- count += ip4_fib_mtrie_leaf_is_non_empty (old_ply, ii);
+ count += ip4_mtrie_leaf_is_non_empty (old_ply, ii);
}
ASSERT (count);
}
@@ -571,14 +545,13 @@ unset_leaf (ip4_fib_mtrie_t * m,
}
static void
-unset_root_leaf (ip4_fib_mtrie_t * m,
- const ip4_fib_mtrie_set_unset_leaf_args_t * a)
+unset_root_leaf (ip4_mtrie_16_t *m, const ip4_mtrie_set_unset_leaf_args_t *a)
{
- ip4_fib_mtrie_leaf_t old_leaf, del_leaf;
+ ip4_mtrie_leaf_t old_leaf, del_leaf;
i32 n_dst_bits_next_plies;
i32 i, n_dst_bits_this_ply, old_leaf_is_terminal;
u16 dst_byte;
- ip4_fib_mtrie_16_ply_t *old_ply;
+ ip4_mtrie_16_ply_t *old_ply;
ASSERT (a->dst_address_length <= 32);
@@ -590,7 +563,7 @@ unset_root_leaf (ip4_fib_mtrie_t * m,
n_dst_bits_this_ply = (n_dst_bits_next_plies <= 0 ?
(16 - a->dst_address_length) : 0);
- del_leaf = ip4_fib_mtrie_leaf_set_adj_index (a->adj_index);
+ del_leaf = ip4_mtrie_leaf_set_adj_index (a->adj_index);
/* Starting at the value of the byte at this section of the v4 address
* fill the buckets/slots of the ply */
@@ -603,26 +576,25 @@ unset_root_leaf (ip4_fib_mtrie_t * m,
slot = clib_host_to_net_u16 (slot);
old_leaf = old_ply->leaves[slot];
- old_leaf_is_terminal = ip4_fib_mtrie_leaf_is_terminal (old_leaf);
+ old_leaf_is_terminal = ip4_mtrie_leaf_is_terminal (old_leaf);
- if (old_leaf == del_leaf
- || (!old_leaf_is_terminal
- && unset_leaf (m, a, get_next_ply_for_leaf (m, old_leaf), 2)))
+ if (old_leaf == del_leaf ||
+ (!old_leaf_is_terminal &&
+ unset_leaf (a, get_next_ply_for_leaf (old_leaf), 2)))
{
- clib_atomic_store_rel_n (&old_ply->leaves[slot],
- ip4_fib_mtrie_leaf_set_adj_index
- (a->cover_adj_index));
+ clib_atomic_store_rel_n (
+ &old_ply->leaves[slot],
+ ip4_mtrie_leaf_set_adj_index (a->cover_adj_index));
old_ply->dst_address_bits_of_leaves[slot] = a->cover_address_length;
}
}
}
void
-ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
- const ip4_address_t * dst_address,
- u32 dst_address_length, u32 adj_index)
+ip4_mtrie_16_route_add (ip4_mtrie_16_t *m, const ip4_address_t *dst_address,
+ u32 dst_address_length, u32 adj_index)
{
- ip4_fib_mtrie_set_unset_leaf_args_t a;
+ ip4_mtrie_set_unset_leaf_args_t a;
ip4_main_t *im = &ip4_main;
/* Honor dst_address_length. Fib masks are in network byte order */
@@ -635,13 +607,29 @@ ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
}
void
-ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
- const ip4_address_t * dst_address,
- u32 dst_address_length,
- u32 adj_index,
- u32 cover_address_length, u32 cover_adj_index)
+ip4_mtrie_8_route_add (ip4_mtrie_8_t *m, const ip4_address_t *dst_address,
+ u32 dst_address_length, u32 adj_index)
+{
+ ip4_mtrie_set_unset_leaf_args_t a;
+ ip4_main_t *im = &ip4_main;
+
+ /* Honor dst_address_length. Fib masks are in network byte order */
+ a.dst_address.as_u32 =
+ (dst_address->as_u32 & im->fib_masks[dst_address_length]);
+ a.dst_address_length = dst_address_length;
+ a.adj_index = adj_index;
+
+ ip4_mtrie_8_ply_t *root = pool_elt_at_index (ip4_ply_pool, m->root_ply);
+
+ set_leaf (&a, root - ip4_ply_pool, 0);
+}
+
+void
+ip4_mtrie_16_route_del (ip4_mtrie_16_t *m, const ip4_address_t *dst_address,
+ u32 dst_address_length, u32 adj_index,
+ u32 cover_address_length, u32 cover_adj_index)
{
- ip4_fib_mtrie_set_unset_leaf_args_t a;
+ ip4_mtrie_set_unset_leaf_args_t a;
ip4_main_t *im = &ip4_main;
/* Honor dst_address_length. Fib masks are in network byte order */
@@ -656,18 +644,41 @@ ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
unset_root_leaf (m, &a);
}
+void
+ip4_mtrie_8_route_del (ip4_mtrie_8_t *m, const ip4_address_t *dst_address,
+ u32 dst_address_length, u32 adj_index,
+ u32 cover_address_length, u32 cover_adj_index)
+{
+ ip4_main_t *im = &ip4_main;
+
+ /* Honor dst_address_length. Fib masks are in network byte order */
+ ip4_mtrie_set_unset_leaf_args_t a = {
+ .dst_address.as_u32 =
+ (dst_address->as_u32 & im->fib_masks[dst_address_length]),
+ .dst_address_length = dst_address_length,
+ .adj_index = adj_index,
+ .cover_adj_index = cover_adj_index,
+ .cover_address_length = cover_address_length,
+ };
+
+ /* the top level ply is never removed */
+ ip4_mtrie_8_ply_t *root = pool_elt_at_index (ip4_ply_pool, m->root_ply);
+
+ unset_leaf (&a, root, 0);
+}
+
/* Returns number of bytes of memory used by mtrie. */
static uword
-mtrie_ply_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_8_ply_t * p)
+mtrie_ply_memory_usage (ip4_mtrie_8_ply_t *p)
{
uword bytes, i;
bytes = sizeof (p[0]);
for (i = 0; i < ARRAY_LEN (p->leaves); i++)
{
- ip4_fib_mtrie_leaf_t l = p->leaves[i];
- if (ip4_fib_mtrie_leaf_is_next_ply (l))
- bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
+ ip4_mtrie_leaf_t l = p->leaves[i];
+ if (ip4_mtrie_leaf_is_next_ply (l))
+ bytes += mtrie_ply_memory_usage (get_next_ply_for_leaf (l));
}
return bytes;
@@ -675,62 +686,76 @@ mtrie_ply_memory_usage (ip4_fib_mtrie_t * m, ip4_fib_mtrie_8_ply_t * p)
/* Returns number of bytes of memory used by mtrie. */
uword
-ip4_fib_mtrie_memory_usage (ip4_fib_mtrie_t * m)
+ip4_mtrie_16_memory_usage (ip4_mtrie_16_t *m)
{
uword bytes, i;
bytes = sizeof (*m);
for (i = 0; i < ARRAY_LEN (m->root_ply.leaves); i++)
{
- ip4_fib_mtrie_leaf_t l = m->root_ply.leaves[i];
- if (ip4_fib_mtrie_leaf_is_next_ply (l))
- bytes += mtrie_ply_memory_usage (m, get_next_ply_for_leaf (m, l));
+ ip4_mtrie_leaf_t l = m->root_ply.leaves[i];
+ if (ip4_mtrie_leaf_is_next_ply (l))
+ bytes += mtrie_ply_memory_usage (get_next_ply_for_leaf (l));
+ }
+
+ return bytes;
+}
+uword
+ip4_mtrie_8_memory_usage (ip4_mtrie_8_t *m)
+{
+ ip4_mtrie_8_ply_t *root = pool_elt_at_index (ip4_ply_pool, m->root_ply);
+ uword bytes, i;
+
+ bytes = sizeof (*m);
+ for (i = 0; i < ARRAY_LEN (root->leaves); i++)
+ {
+ ip4_mtrie_leaf_t l = root->leaves[i];
+ if (ip4_mtrie_leaf_is_next_ply (l))
+ bytes += mtrie_ply_memory_usage (get_next_ply_for_leaf (l));
}
return bytes;
}
static u8 *
-format_ip4_fib_mtrie_leaf (u8 * s, va_list * va)
+format_ip4_mtrie_leaf (u8 *s, va_list *va)
{
- ip4_fib_mtrie_leaf_t l = va_arg (*va, ip4_fib_mtrie_leaf_t);
+ ip4_mtrie_leaf_t l = va_arg (*va, ip4_mtrie_leaf_t);
- if (ip4_fib_mtrie_leaf_is_terminal (l))
- s = format (s, "lb-index %d", ip4_fib_mtrie_leaf_get_adj_index (l));
+ if (ip4_mtrie_leaf_is_terminal (l))
+ s = format (s, "lb-index %d", ip4_mtrie_leaf_get_adj_index (l));
else
- s = format (s, "next ply %d", ip4_fib_mtrie_leaf_get_next_ply_index (l));
+ s = format (s, "next ply %d", ip4_mtrie_leaf_get_next_ply_index (l));
return s;
}
-#define FORMAT_PLY(s, _p, _a, _i, _base_address, _ply_max_len, _indent) \
-({ \
- u32 a, ia_length; \
- ip4_address_t ia; \
- ip4_fib_mtrie_leaf_t _l = p->leaves[(_i)]; \
- \
- a = (_base_address) + ((_a) << (32 - (_ply_max_len))); \
- ia.as_u32 = clib_host_to_net_u32 (a); \
- ia_length = (_p)->dst_address_bits_of_leaves[(_i)]; \
- s = format (s, "\n%U%U %U", \
- format_white_space, (_indent) + 4, \
- format_ip4_address_and_length, &ia, ia_length, \
- format_ip4_fib_mtrie_leaf, _l); \
- \
- if (ip4_fib_mtrie_leaf_is_next_ply (_l)) \
- s = format (s, "\n%U", \
- format_ip4_fib_mtrie_ply, m, a, (_indent) + 8, \
- ip4_fib_mtrie_leaf_get_next_ply_index (_l)); \
- s; \
-})
+#define FORMAT_PLY(s, _p, _a, _i, _base_address, _ply_max_len, _indent) \
+ ({ \
+ u32 a, ia_length; \
+ ip4_address_t ia; \
+ ip4_mtrie_leaf_t _l = (_p)->leaves[(_i)]; \
+ \
+ a = (_base_address) + ((_a) << (32 - (_ply_max_len))); \
+ ia.as_u32 = clib_host_to_net_u32 (a); \
+ ia_length = (_p)->dst_address_bits_of_leaves[(_i)]; \
+ s = format (s, "\n%U%U %U", format_white_space, (_indent) + 4, \
+ format_ip4_address_and_length, &ia, ia_length, \
+ format_ip4_mtrie_leaf, _l); \
+ \
+ if (ip4_mtrie_leaf_is_next_ply (_l)) \
+ s = format (s, "\n%U", format_ip4_mtrie_ply, m, a, (_indent) + 8, \
+ ip4_mtrie_leaf_get_next_ply_index (_l)); \
+ s; \
+ })
static u8 *
-format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
+format_ip4_mtrie_ply (u8 *s, va_list *va)
{
- ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
+ ip4_mtrie_16_t *m = va_arg (*va, ip4_mtrie_16_t *);
u32 base_address = va_arg (*va, u32);
u32 indent = va_arg (*va, u32);
u32 ply_index = va_arg (*va, u32);
- ip4_fib_mtrie_8_ply_t *p;
+ ip4_mtrie_8_ply_t *p;
int i;
p = pool_elt_at_index (ip4_ply_pool, ply_index);
@@ -739,7 +764,7 @@ format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
for (i = 0; i < ARRAY_LEN (p->leaves); i++)
{
- if (ip4_fib_mtrie_leaf_is_non_empty (p, i))
+ if (ip4_mtrie_leaf_is_non_empty (p, i))
{
s = FORMAT_PLY (s, p, i, i, base_address,
p->dst_address_bits_base + 8, indent);
@@ -750,18 +775,17 @@ format_ip4_fib_mtrie_ply (u8 * s, va_list * va)
}
u8 *
-format_ip4_fib_mtrie (u8 * s, va_list * va)
+format_ip4_mtrie_16 (u8 *s, va_list *va)
{
- ip4_fib_mtrie_t *m = va_arg (*va, ip4_fib_mtrie_t *);
+ ip4_mtrie_16_t *m = va_arg (*va, ip4_mtrie_16_t *);
int verbose = va_arg (*va, int);
- ip4_fib_mtrie_16_ply_t *p;
+ ip4_mtrie_16_ply_t *p;
u32 base_address = 0;
int i;
- s = format (s, "%d plies, memory usage %U\n",
- pool_elts (ip4_ply_pool),
- format_memory_size, ip4_fib_mtrie_memory_usage (m));
- s = format (s, "root-ply");
+ s =
+ format (s, "16-8-8: %d plies, memory usage %U\n", pool_elts (ip4_ply_pool),
+ format_memory_size, ip4_mtrie_16_memory_usage (m));
p = &m->root_ply;
if (verbose)
@@ -785,6 +809,37 @@ format_ip4_fib_mtrie (u8 * s, va_list * va)
return s;
}
+u8 *
+format_ip4_mtrie_8 (u8 *s, va_list *va)
+{
+ ip4_mtrie_8_t *m = va_arg (*va, ip4_mtrie_8_t *);
+ int verbose = va_arg (*va, int);
+ ip4_mtrie_8_ply_t *root;
+ u32 base_address = 0;
+ u16 slot;
+
+ root = pool_elt_at_index (ip4_ply_pool, m->root_ply);
+
+ s = format (s, "8-8-8-8; %d plies, memory usage %U\n",
+ pool_elts (ip4_ply_pool), format_memory_size,
+ ip4_mtrie_8_memory_usage (m));
+
+ if (verbose)
+ {
+ s = format (s, "root-ply");
+
+ for (slot = 0; slot < ARRAY_LEN (root->leaves); slot++)
+ {
+ if (root->dst_address_bits_of_leaves[slot] > 0)
+ {
+ s = FORMAT_PLY (s, root, slot, slot, base_address, 8, 0);
+ }
+ }
+ }
+
+ return s;
+}
+
/** Default heap size for the IPv4 mtries */
#define IP4_FIB_DEFAULT_MTRIE_HEAP_SIZE (32<<20)
#ifndef MAP_HUGE_SHIFT
@@ -794,7 +849,7 @@ format_ip4_fib_mtrie (u8 * s, va_list * va)
static clib_error_t *
ip4_mtrie_module_init (vlib_main_t * vm)
{
- CLIB_UNUSED (ip4_fib_mtrie_8_ply_t * p);
+ CLIB_UNUSED (ip4_mtrie_8_ply_t * p);
clib_error_t *error = NULL;
/* Burn one ply so index 0 is taken */
diff --git a/src/vnet/ip/ip4_mtrie.h b/src/vnet/ip/ip4_mtrie.h
index 87e2b5892ba..16c524745be 100644
--- a/src/vnet/ip/ip4_mtrie.h
+++ b/src/vnet/ip/ip4_mtrie.h
@@ -49,9 +49,9 @@
1 + 2*adj_index for terminal leaves.
0 + 2*next_ply_index for non-terminals, i.e. PLYs
1 => empty (adjacency index of zero is special miss adjacency). */
-typedef u32 ip4_fib_mtrie_leaf_t;
+typedef u32 ip4_mtrie_leaf_t;
-#define IP4_FIB_MTRIE_LEAF_EMPTY (1 + 2*0)
+#define IP4_MTRIE_LEAF_EMPTY (1 + 2 * 0)
/**
* @brief the 16 way stride that is the top PLY of the mtrie
@@ -60,42 +60,29 @@ typedef u32 ip4_fib_mtrie_leaf_t;
* the FIB is destroyed.
*/
#define PLY_16_SIZE (1<<16)
-typedef struct ip4_fib_mtrie_16_ply_t_
+typedef struct ip4_mtrie_16_ply_t_
{
/**
* The leaves/slots/buckets to be filed with leafs
*/
- union
- {
- ip4_fib_mtrie_leaf_t leaves[PLY_16_SIZE];
-
-#ifdef CLIB_HAVE_VEC128
- u32x4 leaves_as_u32x4[PLY_16_SIZE / 4];
-#endif
- };
+ ip4_mtrie_leaf_t leaves[PLY_16_SIZE];
/**
* Prefix length for terminal leaves.
*/
u8 dst_address_bits_of_leaves[PLY_16_SIZE];
-} ip4_fib_mtrie_16_ply_t;
+} ip4_mtrie_16_ply_t;
/**
* @brief One ply of the 4 ply mtrie fib.
*/
-typedef struct ip4_fib_mtrie_8_ply_t_
+typedef struct ip4_mtrie_8_ply_t_
{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
/**
* The leaves/slots/buckets to be filed with leafs
*/
- union
- {
- ip4_fib_mtrie_leaf_t leaves[256];
-
-#ifdef CLIB_HAVE_VEC128
- u32x4 leaves_as_u32x4[256 / 4];
-#endif
- };
+ ip4_mtrie_leaf_t leaves[256];
/**
* Prefix length for leaves/ply.
@@ -113,17 +100,13 @@ typedef struct ip4_fib_mtrie_8_ply_t_
* 'non-empty'. Otherwise it is the value of the cover.
*/
i32 dst_address_bits_base;
+} ip4_mtrie_8_ply_t;
- /* Pad to cache line boundary. */
- u8 pad[CLIB_CACHE_LINE_BYTES - 2 * sizeof (i32)];
-}
-ip4_fib_mtrie_8_ply_t;
-
-STATIC_ASSERT (0 == sizeof (ip4_fib_mtrie_8_ply_t) % CLIB_CACHE_LINE_BYTES,
+STATIC_ASSERT (0 == sizeof (ip4_mtrie_8_ply_t) % CLIB_CACHE_LINE_BYTES,
"IP4 Mtrie ply cache line");
/**
- * @brief The mutiway-TRIE.
+ * @brief The mutiway-TRIE with a 16-8-8 stride.
* There is no data associated with the mtrie apart from the top PLY
*/
typedef struct
@@ -133,54 +116,73 @@ typedef struct
* 'get me the mtrie' returns the first ply, and not an indirect 'pointer'
* to it. therefore no cacheline misses in the data-path.
*/
- ip4_fib_mtrie_16_ply_t root_ply;
-} ip4_fib_mtrie_t;
+ ip4_mtrie_16_ply_t root_ply;
+} ip4_mtrie_16_t;
+
+/**
+ * @brief The mutiway-TRIE with a 8-8-8-8 stride.
+ * There is no data associated with the mtrie apart from the top PLY
+ */
+typedef struct
+{
+ /* pool index of the root ply */
+ u32 root_ply;
+} ip4_mtrie_8_t;
/**
* @brief Initialise an mtrie
*/
-void ip4_mtrie_init (ip4_fib_mtrie_t * m);
+void ip4_mtrie_16_init (ip4_mtrie_16_t *m);
+void ip4_mtrie_8_init (ip4_mtrie_8_t *m);
/**
- * @brief Free an mtrie, It must be emty when free'd
+ * @brief Free an mtrie, It must be empty when free'd
*/
-void ip4_mtrie_free (ip4_fib_mtrie_t * m);
+void ip4_mtrie_16_free (ip4_mtrie_16_t *m);
+void ip4_mtrie_8_free (ip4_mtrie_8_t *m);
/**
* @brief Add a route/entry to the mtrie
*/
-void ip4_fib_mtrie_route_add (ip4_fib_mtrie_t * m,
- const ip4_address_t * dst_address,
- u32 dst_address_length, u32 adj_index);
+void ip4_mtrie_16_route_add (ip4_mtrie_16_t *m,
+ const ip4_address_t *dst_address,
+ u32 dst_address_length, u32 adj_index);
+void ip4_mtrie_8_route_add (ip4_mtrie_8_t *m, const ip4_address_t *dst_address,
+ u32 dst_address_length, u32 adj_index);
+
/**
* @brief remove a route/entry to the mtrie
*/
-void ip4_fib_mtrie_route_del (ip4_fib_mtrie_t * m,
- const ip4_address_t * dst_address,
- u32 dst_address_length,
- u32 adj_index,
- u32 cover_address_length, u32 cover_adj_index);
+void ip4_mtrie_16_route_del (ip4_mtrie_16_t *m,
+ const ip4_address_t *dst_address,
+ u32 dst_address_length, u32 adj_index,
+ u32 cover_address_length, u32 cover_adj_index);
+void ip4_mtrie_8_route_del (ip4_mtrie_8_t *m, const ip4_address_t *dst_address,
+ u32 dst_address_length, u32 adj_index,
+ u32 cover_address_length, u32 cover_adj_index);
/**
* @brief return the memory used by the table
*/
-uword ip4_fib_mtrie_memory_usage (ip4_fib_mtrie_t * m);
+uword ip4_mtrie_16_memory_usage (ip4_mtrie_16_t *m);
+uword ip4_mtrie_8_memory_usage (ip4_mtrie_8_t *m);
/**
* @brief Format/display the contents of the mtrie
*/
-format_function_t format_ip4_fib_mtrie;
+format_function_t format_ip4_mtrie_16;
+format_function_t format_ip4_mtrie_8;
/**
* @brief A global pool of 8bit stride plys
*/
-extern ip4_fib_mtrie_8_ply_t *ip4_ply_pool;
+extern ip4_mtrie_8_ply_t *ip4_ply_pool;
/**
* Is the leaf terminal (i.e. an LB index) or non-terminal (i.e. a PLY index)
*/
always_inline u32
-ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n)
+ip4_mtrie_leaf_is_terminal (ip4_mtrie_leaf_t n)
{
return n & 1;
}
@@ -189,24 +191,23 @@ ip4_fib_mtrie_leaf_is_terminal (ip4_fib_mtrie_leaf_t n)
* From the stored slot value extract the LB index value
*/
always_inline u32
-ip4_fib_mtrie_leaf_get_adj_index (ip4_fib_mtrie_leaf_t n)
+ip4_mtrie_leaf_get_adj_index (ip4_mtrie_leaf_t n)
{
- ASSERT (ip4_fib_mtrie_leaf_is_terminal (n));
+ ASSERT (ip4_mtrie_leaf_is_terminal (n));
return n >> 1;
}
/**
* @brief Lookup step. Processes 1 byte of 4 byte ip4 address.
*/
-always_inline ip4_fib_mtrie_leaf_t
-ip4_fib_mtrie_lookup_step (const ip4_fib_mtrie_t * m,
- ip4_fib_mtrie_leaf_t current_leaf,
- const ip4_address_t * dst_address,
- u32 dst_address_byte_index)
+always_inline ip4_mtrie_leaf_t
+ip4_mtrie_16_lookup_step (ip4_mtrie_leaf_t current_leaf,
+ const ip4_address_t *dst_address,
+ u32 dst_address_byte_index)
{
- ip4_fib_mtrie_8_ply_t *ply;
+ ip4_mtrie_8_ply_t *ply;
- uword current_is_terminal = ip4_fib_mtrie_leaf_is_terminal (current_leaf);
+ uword current_is_terminal = ip4_mtrie_leaf_is_terminal (current_leaf);
if (!current_is_terminal)
{
@@ -220,17 +221,48 @@ ip4_fib_mtrie_lookup_step (const ip4_fib_mtrie_t * m,
/**
* @brief Lookup step number 1. Processes 2 bytes of 4 byte ip4 address.
*/
-always_inline ip4_fib_mtrie_leaf_t
-ip4_fib_mtrie_lookup_step_one (const ip4_fib_mtrie_t * m,
- const ip4_address_t * dst_address)
+always_inline ip4_mtrie_leaf_t
+ip4_mtrie_16_lookup_step_one (const ip4_mtrie_16_t *m,
+ const ip4_address_t *dst_address)
{
- ip4_fib_mtrie_leaf_t next_leaf;
+ ip4_mtrie_leaf_t next_leaf;
next_leaf = m->root_ply.leaves[dst_address->as_u16[0]];
return next_leaf;
}
+always_inline ip4_mtrie_leaf_t
+ip4_mtrie_8_lookup_step (ip4_mtrie_leaf_t current_leaf,
+ const ip4_address_t *dst_address,
+ u32 dst_address_byte_index)
+{
+ ip4_mtrie_8_ply_t *ply;
+
+ uword current_is_terminal = ip4_mtrie_leaf_is_terminal (current_leaf);
+
+ if (!current_is_terminal)
+ {
+ ply = ip4_ply_pool + (current_leaf >> 1);
+ return (ply->leaves[dst_address->as_u8[dst_address_byte_index]]);
+ }
+
+ return current_leaf;
+}
+
+always_inline ip4_mtrie_leaf_t
+ip4_mtrie_8_lookup_step_one (const ip4_mtrie_8_t *m,
+ const ip4_address_t *dst_address)
+{
+ ip4_mtrie_leaf_t next_leaf;
+ ip4_mtrie_8_ply_t *ply;
+
+ ply = pool_elt_at_index (ip4_ply_pool, m->root_ply);
+ next_leaf = ply->leaves[dst_address->as_u8[0]];
+
+ return next_leaf;
+}
+
#endif /* included_ip_ip4_fib_h */
/*
diff --git a/src/vnet/ip/ip4_options.c b/src/vnet/ip/ip4_options.c
index 1b5a7878512..bbe311ffb20 100644
--- a/src/vnet/ip/ip4_options.c
+++ b/src/vnet/ip/ip4_options.c
@@ -78,10 +78,17 @@ VLIB_NODE_FN (ip4_options_node) (vlib_main_t * vm,
{
case IP4_ROUTER_ALERT_OPTION:
/*
+ * check the option length
+ */
+ if (options[1] != 4)
+ break;
+ /*
* if it's an IGMP packet, pass up the local stack
*/
if (IP_PROTOCOL_IGMP == ip4->protocol)
{
+ ip_lookup_set_buffer_fib_index (
+ ip4_main.fib_index_by_sw_if_index, b);
next = IP4_OPTIONS_NEXT_LOCAL;
}
break;
@@ -120,7 +127,6 @@ format_ip4_options_trace (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_options_node) = {
.name = "ip4-options",
.vector_size = sizeof (u32),
@@ -133,7 +139,6 @@ VLIB_REGISTER_NODE (ip4_options_node) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_options_trace,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip4_packet.h b/src/vnet/ip/ip4_packet.h
index 513a7449b54..269049194e6 100644
--- a/src/vnet/ip/ip4_packet.h
+++ b/src/vnet/ip/ip4_packet.h
@@ -41,7 +41,6 @@
#define included_ip4_packet_h
#include <vnet/ip/ip_packet.h> /* for ip_csum_t */
-#include <vnet/tcp/tcp_packet.h> /* for tcp_header_t */
#include <vppinfra/byte_order.h> /* for clib_net_to_host_u16 */
#include <vppinfra/warnings.h> /* for WARN_OFF/WARN_ON macro */
@@ -130,19 +129,15 @@ typedef union
/* For checksumming we'll want to access IP header in word sized chunks. */
/* For 64 bit machines. */
- /* *INDENT-OFF* */
CLIB_PACKED (struct {
u64 checksum_data_64[2];
u32 checksum_data_64_32[1];
});
- /* *INDENT-ON* */
/* For 32 bit machines. */
- /* *INDENT-OFF* */
CLIB_PACKED (struct {
u32 checksum_data_32[5];
});
- /* *INDENT-ON* */
} ip4_header_t;
/* Value of ip_version_and_header_length for packets w/o options. */
@@ -201,9 +196,7 @@ ip4_next_header (ip4_header_t * i)
/* Turn off array bounds check due to ip4_header_t
option field operations. */
-/* *INDENT-OFF* */
WARN_OFF(array-bounds)
-/* *INDENT-ON* */
static_always_inline u16
ip4_header_checksum_inline (ip4_header_t * i, int with_checksum)
@@ -306,9 +299,7 @@ ip4_header_checksum_inline (ip4_header_t * i, int with_checksum)
return ~((u16) sum);
}
-/* *INDENT-OFF* */
WARN_ON(array-bounds)
-/* *INDENT-ON* */
always_inline u16
ip4_header_checksum (ip4_header_t * i)
@@ -476,47 +467,6 @@ ip4_multicast_ethernet_address (u8 * ethernet_address,
ethernet_address[5] = d[3];
}
-always_inline void
-ip4_tcp_reply_x1 (ip4_header_t * ip0, tcp_header_t * tcp0)
-{
- u32 src0, dst0;
-
- src0 = ip0->src_address.data_u32;
- dst0 = ip0->dst_address.data_u32;
- ip0->src_address.data_u32 = dst0;
- ip0->dst_address.data_u32 = src0;
-
- src0 = tcp0->src;
- dst0 = tcp0->dst;
- tcp0->src = dst0;
- tcp0->dst = src0;
-}
-
-always_inline void
-ip4_tcp_reply_x2 (ip4_header_t * ip0, ip4_header_t * ip1,
- tcp_header_t * tcp0, tcp_header_t * tcp1)
-{
- u32 src0, dst0, src1, dst1;
-
- src0 = ip0->src_address.data_u32;
- src1 = ip1->src_address.data_u32;
- dst0 = ip0->dst_address.data_u32;
- dst1 = ip1->dst_address.data_u32;
- ip0->src_address.data_u32 = dst0;
- ip1->src_address.data_u32 = dst1;
- ip0->dst_address.data_u32 = src0;
- ip1->dst_address.data_u32 = src1;
-
- src0 = tcp0->src;
- src1 = tcp1->src;
- dst0 = tcp0->dst;
- dst1 = tcp1->dst;
- tcp0->src = dst0;
- tcp1->src = dst1;
- tcp0->dst = src0;
- tcp1->dst = src1;
-}
-
#endif /* included_ip4_packet_h */
/*
diff --git a/src/vnet/ip/ip4_punt_drop.c b/src/vnet/ip/ip4_punt_drop.c
index 20430adb2f3..b8cc3304437 100644
--- a/src/vnet/ip/ip4_punt_drop.c
+++ b/src/vnet/ip/ip4_punt_drop.c
@@ -18,7 +18,6 @@
#include <vnet/policer/policer.h>
#include <vnet/policer/police_inlines.h>
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip4_punt) =
{
.arc_name = "ip4-punt",
@@ -30,7 +29,6 @@ VNET_FEATURE_ARC_INIT (ip4_drop) =
.arc_name = "ip4-drop",
.start_nodes = VNET_FEATURES ("ip4-drop", "ip4-not-enabled"),
};
-/* *INDENT-ON* */
extern ip_punt_policer_t ip4_punt_policer_cfg;
@@ -89,7 +87,6 @@ VLIB_NODE_FN (ip4_punt_policer_node) (vlib_main_t * vm,
ip4_punt_policer_cfg.policer_index));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_punt_policer_node) = {
.name = "ip4-punt-policer",
.vector_size = sizeof (u32),
@@ -109,7 +106,6 @@ VNET_FEATURE_INIT (ip4_punt_policer_node) = {
.node_name = "ip4-punt-policer",
.runs_before = VNET_FEATURES("ip4-punt-redirect"),
};
-/* *INDENT-ON* */
#define foreach_ip4_punt_redirect_error \
@@ -138,7 +134,6 @@ VLIB_NODE_FN (ip4_punt_redirect_node) (vlib_main_t * vm,
FIB_PROTOCOL_IP4));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_punt_redirect_node) = {
.name = "ip4-punt-redirect",
.vector_size = sizeof (u32),
@@ -160,7 +155,6 @@ VNET_FEATURE_INIT (ip4_punt_redirect_node, static) = {
.node_name = "ip4-punt-redirect",
.runs_before = VNET_FEATURES("error-punt"),
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_drop_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
@@ -194,7 +188,6 @@ ip4_punt (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vnet_feat_arc_ip4_punt.feature_arc_index);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_drop_node) =
{
.name = "ip4-drop",
@@ -237,7 +230,6 @@ VNET_FEATURE_INIT (ip4_drop_end_of_arc, static) = {
.node_name = "error-drop",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON */
#ifndef CLIB_MARCH_VARIANT
void
@@ -301,52 +293,35 @@ done:
* @cliexpar
* @cliexcmd{set ip punt policer <INDEX>}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip4_punt_policer_command, static) =
{
.path = "ip punt policer",
.function = ip4_punt_police_cmd,
.short_help = "ip punt policer [add|del] <index>",
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
-void
-ip4_punt_redirect_add (u32 rx_sw_if_index,
- u32 tx_sw_if_index, ip46_address_t * nh)
-{
- /* *INDENT-OFF* */
- fib_route_path_t *rpaths = NULL, rpath = {
- .frp_proto = DPO_PROTO_IP4,
- .frp_addr = *nh,
- .frp_sw_if_index = tx_sw_if_index,
- .frp_weight = 1,
- .frp_fib_index = ~0,
- };
- /* *INDENT-ON* */
-
- vec_add1 (rpaths, rpath);
-
- ip4_punt_redirect_add_paths (rx_sw_if_index, rpaths);
-
- vec_free (rpaths);
-}
+static u32 ip4_punt_redirect_enable_counts;
void
-ip4_punt_redirect_add_paths (u32 rx_sw_if_index, fib_route_path_t * rpaths)
+ip4_punt_redirect_add_paths (u32 rx_sw_if_index,
+ const fib_route_path_t *rpaths)
{
ip_punt_redirect_add (FIB_PROTOCOL_IP4,
rx_sw_if_index,
FIB_FORW_CHAIN_TYPE_UNICAST_IP4, rpaths);
- vnet_feature_enable_disable ("ip4-punt", "ip4-punt-redirect", 0, 1, 0, 0);
+ if (1 == ++ip4_punt_redirect_enable_counts)
+ vnet_feature_enable_disable ("ip4-punt", "ip4-punt-redirect", 0, 1, 0, 0);
}
void
ip4_punt_redirect_del (u32 rx_sw_if_index)
{
- vnet_feature_enable_disable ("ip4-punt", "ip4-punt-redirect", 0, 0, 0, 0);
+ ASSERT (ip4_punt_redirect_enable_counts);
+ if (0 == --ip4_punt_redirect_enable_counts)
+ vnet_feature_enable_disable ("ip4-punt", "ip4-punt-redirect", 0, 0, 0, 0);
ip_punt_redirect_del (FIB_PROTOCOL_IP4, rx_sw_if_index);
}
@@ -358,10 +333,10 @@ ip4_punt_redirect_cmd (vlib_main_t * vm,
vlib_cli_command_t * cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
- ip46_address_t nh = { 0 };
+ fib_route_path_t *rpaths = NULL, rpath;
+ dpo_proto_t payload_proto = DPO_PROTO_IP4;
clib_error_t *error = 0;
u32 rx_sw_if_index = ~0;
- u32 tx_sw_if_index = ~0;
vnet_main_t *vnm;
u8 is_add;
@@ -378,17 +353,13 @@ ip4_punt_redirect_cmd (vlib_main_t * vm,
else if (unformat (line_input, "add"))
is_add = 1;
else if (unformat (line_input, "rx all"))
- rx_sw_if_index = ~0;
+ rx_sw_if_index = 0;
else if (unformat (line_input, "rx %U",
unformat_vnet_sw_interface, vnm, &rx_sw_if_index))
;
- else if (unformat (line_input, "via %U %U",
- unformat_ip4_address, &nh.ip4,
- unformat_vnet_sw_interface, vnm, &tx_sw_if_index))
- ;
- else if (unformat (line_input, "via %U",
- unformat_vnet_sw_interface, vnm, &tx_sw_if_index))
- ;
+ else if (unformat (line_input, "via %U", unformat_fib_route_path, &rpath,
+ &payload_proto))
+ vec_add1 (rpaths, rpath);
else
{
error = unformat_parse_error (line_input);
@@ -404,7 +375,8 @@ ip4_punt_redirect_cmd (vlib_main_t * vm,
if (is_add)
{
- ip4_punt_redirect_add (rx_sw_if_index, tx_sw_if_index, &nh);
+ if (vec_len (rpaths))
+ ip4_punt_redirect_add_paths (rx_sw_if_index, rpaths);
}
else
{
@@ -412,6 +384,7 @@ ip4_punt_redirect_cmd (vlib_main_t * vm,
}
done:
+ vec_free (rpaths);
unformat_free (line_input);
return (error);
}
@@ -421,14 +394,12 @@ done:
* @cliexpar
* @cliexcmd{set ip punt policer}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip4_punt_redirect_command, static) =
{
.path = "ip punt redirect",
.function = ip4_punt_redirect_cmd,
.short_help = "ip punt redirect [add|del] rx [<interface>|all] via [<nh>] <tx_interface>",
};
-/* *INDENT-ON* */
static clib_error_t *
ip4_punt_redirect_show_cmd (vlib_main_t * vm,
@@ -445,7 +416,6 @@ ip4_punt_redirect_show_cmd (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{set ip punt redierect}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip4_punt_redirect_command, static) =
{
.path = "show ip punt redirect",
@@ -453,7 +423,6 @@ VLIB_CLI_COMMAND (show_ip4_punt_redirect_command, static) =
.short_help = "show ip punt redirect",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip4_source_and_port_range_check.c b/src/vnet/ip/ip4_source_and_port_range_check.c
index 00ab51e2440..27b2d549ea7 100644
--- a/src/vnet/ip/ip4_source_and_port_range_check.c
+++ b/src/vnet/ip/ip4_source_and_port_range_check.c
@@ -99,7 +99,9 @@ static inline u32
check_adj_port_range_x1 (const protocol_port_range_dpo_t * ppr_dpo,
u16 dst_port, u32 next)
{
+#ifdef CLIB_HAVE_VEC128
u16x8 key = u16x8_splat (dst_port);
+#endif
int i;
if (NULL == ppr_dpo || dst_port == 0)
@@ -107,9 +109,20 @@ check_adj_port_range_x1 (const protocol_port_range_dpo_t * ppr_dpo,
for (i = 0; i < ppr_dpo->n_used_blocks; i++)
+#ifdef CLIB_HAVE_VEC128
if (!u16x8_is_all_zero ((ppr_dpo->blocks[i].low.as_u16x8 <= key) &
(ppr_dpo->blocks[i].hi.as_u16x8 >= key)))
return next;
+#else
+ {
+ for (int j = 0; j < 8; j++)
+ {
+ if ((ppr_dpo->blocks[i].low.as_u16[j] <= dst_port) &&
+ (ppr_dpo->blocks[i].hi.as_u16[j] >= dst_port))
+ return next;
+ }
+ };
+#endif
return IP4_SOURCE_AND_PORT_RANGE_CHECK_NEXT_DROP;
}
@@ -550,7 +563,6 @@ ip4_source_and_port_range_check_tx (vlib_main_t * vm,
if this changes can easily make new function
*/
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_source_port_and_range_check_rx) = {
.function = ip4_source_and_port_range_check_rx,
.name = "ip4-source-and-port-range-check-rx",
@@ -567,9 +579,7 @@ VLIB_REGISTER_NODE (ip4_source_port_and_range_check_rx) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_source_and_port_range_check_trace,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_source_port_and_range_check_tx) = {
.function = ip4_source_and_port_range_check_tx,
.name = "ip4-source-and-port-range-check-tx",
@@ -586,7 +596,6 @@ VLIB_REGISTER_NODE (ip4_source_port_and_range_check_tx) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_source_and_port_range_check_trace,
};
-/* *INDENT-ON* */
int
set_ip_source_and_port_range_check (vlib_main_t * vm,
@@ -749,7 +758,8 @@ set_ip_source_and_port_range_check_fn (vlib_main_t * vm,
* @cliexend
*
* Example of how to enable range checking on TX:
- * @cliexcmd{set interface ip source-and-port-range-check GigabitEthernet2/0/0 udp-in-vrf 7}
+ * @cliexcmd{set interface ip source-and-port-range-check GigabitEthernet2/0/0
+ * udp-in-vrf 7}
*
* Example of graph node after range checking is enabled:
* @cliexstart{show vlib graph ip4-source-and-port-range-check-tx}
@@ -758,7 +768,7 @@ set_ip_source_and_port_range_check_fn (vlib_main_t * vm,
* interface-output [1]
* @cliexend
*
- * Example of how to display the features enabed on an interface:
+ * Example of how to display the features enabled on an interface:
* @cliexstart{show ip interface features GigabitEthernet2/0/0}
* IP feature paths configured on GigabitEthernet2/0/0...
*
@@ -783,13 +793,11 @@ set_ip_source_and_port_range_check_fn (vlib_main_t * vm,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_source_and_port_range_check_command, static) = {
.path = "set interface ip source-and-port-range-check",
.function = set_ip_source_and_port_range_check_fn,
.short_help = "set interface ip source-and-port-range-check <interface> [tcp-out-vrf <table-id>] [udp-out-vrf <table-id>] [tcp-in-vrf <table-id>] [udp-in-vrf <table-id>] [del]",
};
-/* *INDENT-ON* */
static u8 *
format_ppr_dpo (u8 * s, va_list * args)
@@ -1250,14 +1258,12 @@ ip_source_and_port_range_check_command_fn (vlib_main_t * vm,
* Example of how to delete an IPv4 subnet and range of ports from an IPv4 FIB table:
* @cliexcmd{set ip source-and-port-range-check vrf 7 172.16.1.0/24 range 23 - 100 del}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip_source_and_port_range_check_command, static) = {
.path = "set ip source-and-port-range-check",
.function = ip_source_and_port_range_check_command_fn,
.short_help =
"set ip source-and-port-range-check vrf <table-id> <ip-addr>/<mask> {port nn | range <nn> - <nn>} [del]",
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -1367,7 +1373,7 @@ show_source_and_port_range_check_fn (vlib_main_t * vm,
* @cliexstart{show ip source-and-port-range-check vrf 7 172.16.2.0}
* 172.16.2.0: 23 - 101
* @cliexend
- * Example of how to test to determine of a given Pv4 address and port
+ * Example of how to test to determine of a given iPv4 address and port
* are being validated:
* @cliexstart{show ip source-and-port-range-check vrf 7 172.16.2.2 port 23}
* 172.16.2.2 port 23 PASS
@@ -1376,14 +1382,12 @@ show_source_and_port_range_check_fn (vlib_main_t * vm,
* 172.16.2.2 port 250 FAIL
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_source_and_port_range_check, static) = {
.path = "show ip source-and-port-range-check",
.function = show_source_and_port_range_check_fn,
.short_help =
"show ip source-and-port-range-check vrf <table-id> <ip-addr> [port <n>]",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip4_to_ip6.h b/src/vnet/ip/ip4_to_ip6.h
index a6d87f1f962..57c2b6ff78b 100644
--- a/src/vnet/ip/ip4_to_ip6.h
+++ b/src/vnet/ip/ip4_to_ip6.h
@@ -28,14 +28,12 @@
typedef int (*ip4_to_ip6_set_fn_t) (vlib_buffer_t * b, ip4_header_t * ip4,
ip6_header_t * ip6, void *ctx);
-/* *INDENT-OFF* */
static u8 icmp_to_icmp6_updater_pointer_table[] =
{ 0, 1, 4, 4, ~0,
~0, ~0, ~0, 7, 6,
~0, ~0, 8, 8, 8,
8, 24, 24, 24, 24
};
-/* *INDENT-ON* */
#define frag_id_4to6(id) (id)
diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h
index 4d94d8c1230..56eec523d5b 100644
--- a/src/vnet/ip/ip6.h
+++ b/src/vnet/ip/ip6.h
@@ -238,7 +238,6 @@ ip6_interface_address_matching_destination (ip6_main_t * im,
ip_interface_address_t *ia;
ip6_address_t *result = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm, ia, sw_if_index,
1 /* honor unnumbered */,
({
@@ -249,7 +248,6 @@ ip6_interface_address_matching_destination (ip6_main_t * im,
break;
}
}));
- /* *INDENT-ON* */
if (result_ia)
*result_ia = result ? ia : 0;
return result;
@@ -287,10 +285,8 @@ u8 *format_ip6_forward_next_trace (u8 * s, va_list * args);
u32 ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0);
void ip6_punt_policer_add_del (u8 is_add, u32 policer_index);
-void ip6_punt_redirect_add (u32 rx_sw_if_index,
- u32 tx_sw_if_index, ip46_address_t * nh);
void ip6_punt_redirect_add_paths (u32 rx_sw_if_index,
- fib_route_path_t * paths);
+ const fib_route_path_t *paths);
void ip6_punt_redirect_del (u32 rx_sw_if_index);
int vnet_set_ip6_classify_intfc (vlib_main_t * vm, u32 sw_if_index,
diff --git a/src/vnet/ip/ip6_error.h b/src/vnet/ip/ip6_error.h
deleted file mode 100644
index a6fb16570b6..00000000000
--- a/src/vnet/ip/ip6_error.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- * ip/ip6_error.h: ip6 fast path errors
- *
- * Copyright (c) 2008 Eliot Dresselhaus
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef included_ip_ip6_error_h
-#define included_ip_ip6_error_h
-
-#define foreach_ip6_error \
- /* Must be first. */ \
- _ (NONE, "valid ip6 packets") \
- \
- /* Errors signalled by ip6-input */ \
- _ (TOO_SHORT, "ip6 length < 40 bytes") \
- _ (BAD_LENGTH, "ip6 length > l2 length") \
- _ (VERSION, "ip6 version != 6") \
- _ (TIME_EXPIRED, "ip6 ttl <= 1") \
- \
- /* Errors signalled by ip6-rewrite. */ \
- _ (MTU_EXCEEDED, "ip6 MTU exceeded") \
- _ (DST_LOOKUP_MISS, "ip6 destination lookup miss") \
- _ (SRC_LOOKUP_MISS, "ip6 source lookup miss") \
- _ (DROP, "ip6 drop") \
- _ (PUNT, "ip6 punt") \
- \
- /* Errors signalled by ip6-local. */ \
- _ (UNKNOWN_PROTOCOL, "unknown ip protocol") \
- _ (UDP_CHECKSUM, "bad udp checksum") \
- _ (ICMP_CHECKSUM, "bad icmp checksum") \
- _ (UDP_LENGTH, "inconsistent udp/ip lengths") \
- \
- /* Errors signalled by udp6-lookup. */ \
- _ (UNKNOWN_UDP_PORT, "no listener for udp port") \
- \
- /* Spoofed packets in ip6-rewrite-local */ \
- _(SPOOFED_LOCAL_PACKETS, "ip4 spoofed local-address packet drops") \
- \
- /* Erros singalled by ip6-inacl */ \
- _ (INACL_TABLE_MISS, "input ACL table-miss drops") \
- _ (INACL_SESSION_DENY, "input ACL session deny drops") \
- /* Erros singalled by ip6-outacl */ \
- _ (OUTACL_TABLE_MISS, "output ACL table-miss drops") \
- _ (OUTACL_SESSION_DENY, "output ACL session deny drops") \
- \
- /* Errors signalled by ip6-reassembly */ \
- _ (REASS_MISSING_UPPER, "missing-upper layer drops") \
- _ (REASS_DUPLICATE_FRAGMENT, "duplicate fragments") \
- _ (REASS_OVERLAPPING_FRAGMENT, "overlapping fragments") \
- _ (REASS_LIMIT_REACHED, "drops due to concurrent reassemblies limit") \
- _ (REASS_FRAGMENT_CHAIN_TOO_LONG, "fragment chain too long (drop)") \
- _ (REASS_NO_BUF, "out of buffers (drop)") \
- _ (REASS_TIMEOUT, "fragments dropped due to reassembly timeout") \
- _ (REASS_INTERNAL_ERROR, "drops due to internal reassembly error") \
- _ (REASS_UNSUPP_IP_PROTO, "unsupported ip protocol")
-
-typedef enum
-{
-#define _(sym,str) IP6_ERROR_##sym,
- foreach_ip6_error
-#undef _
- IP6_N_ERROR,
-} ip6_error_t;
-
-#endif /* included_ip_ip6_error_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/ip/ip6_format.c b/src/vnet/ip/ip6_format.c
index 1b8ff1e0ab0..1a1bef26aa6 100644
--- a/src/vnet/ip/ip6_format.c
+++ b/src/vnet/ip/ip6_format.c
@@ -288,7 +288,7 @@ format_ip6_header (u8 * s, va_list * args)
"\n%Utos 0x%02x, flow label 0x%x, hop limit %d, payload length %d",
format_white_space, indent, traffic_class, flow_label,
ip->hop_limit, clib_net_to_host_u16 (ip->payload_length));
-
+#if 0
/* Recurse into next protocol layer. */
if (max_header_bytes != 0 && sizeof (ip[0]) < max_header_bytes)
{
@@ -301,7 +301,7 @@ format_ip6_header (u8 * s, va_list * args)
/* next protocol header */ (void *) (ip + 1),
max_header_bytes - sizeof (ip[0]));
}
-
+#endif
return s;
}
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index ba616ebc7c7..48fb633fd32 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -48,6 +48,7 @@
#include <vnet/fib/ip6_fib.h>
#include <vnet/mfib/ip6_mfib.h>
#include <vnet/dpo/load_balance_map.h>
+#include <vnet/dpo/receive_dpo.h>
#include <vnet/dpo/classify_dpo.h>
#include <vnet/classify/vnet_classify.h>
#include <vnet/pg/pg.h>
@@ -70,7 +71,6 @@ ip6_add_interface_prefix_routes (ip6_main_t * im,
ip_lookup_main_t *lm = &im->lookup_main;
ip_interface_prefix_t *if_prefix;
- /* *INDENT-OFF* */
ip_interface_prefix_key_t key = {
.prefix = {
.fp_len = address_length,
@@ -84,7 +84,6 @@ ip6_add_interface_prefix_routes (ip6_main_t * im,
},
.sw_if_index = sw_if_index,
};
- /* *INDENT-ON* */
/* If prefix already set on interface, just increment ref count & return */
if_prefix = ip_get_interface_prefix (lm, &key);
@@ -177,7 +176,6 @@ ip6_del_interface_prefix_routes (ip6_main_t * im,
ip_lookup_main_t *lm = &im->lookup_main;
ip_interface_prefix_t *if_prefix;
- /* *INDENT-OFF* */
ip_interface_prefix_key_t key = {
.prefix = {
.fp_len = address_length,
@@ -191,13 +189,12 @@ ip6_del_interface_prefix_routes (ip6_main_t * im,
},
.sw_if_index = sw_if_index,
};
- /* *INDENT-ON* */
if_prefix = ip_get_interface_prefix (lm, &key);
if (!if_prefix)
{
clib_warning ("Prefix not found while deleting %U",
- format_ip4_address_and_length, address, address_length);
+ format_ip6_address_and_length, address, address_length);
return;
}
@@ -282,7 +279,6 @@ ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
ip_interface_address_t *ia = 0;
ip6_address_t *result = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm, ia, sw_if_index,
1 /* honor unnumbered */,
({
@@ -290,7 +286,6 @@ ip6_interface_first_address (ip6_main_t * im, u32 sw_if_index)
result = a;
break;
}));
- /* *INDENT-ON* */
return result;
}
@@ -308,11 +303,11 @@ ip6_add_del_interface_address (vlib_main_t * vm,
ip6_address_fib_t ip6_af, *addr_fib = 0;
const ip6_address_t *ll_addr;
- /* local0 interface doesn't support IP addressing */
- if (sw_if_index == 0)
+ error = vnet_sw_interface_supports_addressing (vnm, sw_if_index);
+ if (error)
{
- return
- clib_error_create ("local0 interface doesn't support IP addressing");
+ vnm->api_errno = VNET_API_ERROR_UNSUPPORTED;
+ return error;
}
if (ip6_address_is_link_local_unicast (address))
@@ -354,14 +349,10 @@ ip6_add_del_interface_address (vlib_main_t * vm,
return (NULL);
}
- vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
- vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
-
ip6_addr_fib_init (&ip6_af, address,
vec_elt (im->fib_index_by_sw_if_index, sw_if_index));
vec_add1 (addr_fib, ip6_af);
- /* *INDENT-OFF* */
if (!is_del)
{
/* When adding an address check that it does not conflict
@@ -419,7 +410,6 @@ ip6_add_del_interface_address (vlib_main_t * vm,
}
}
}
- /* *INDENT-ON* */
if_address_index = ip_interface_address_find (lm, addr_fib, address_length);
@@ -531,9 +521,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
ip6_address_t *a;
u32 is_admin_up, fib_index;
- /* Fill in lookup tables with default table (0). */
- vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
-
vec_validate_init_empty (im->
lookup_main.if_address_pool_index_by_sw_if_index,
sw_if_index, ~0);
@@ -542,7 +529,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
fib_index = vec_elt (im->fib_index_by_sw_if_index, sw_if_index);
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im->lookup_main, ia, sw_if_index,
0 /* honor unnumbered */,
({
@@ -555,7 +541,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
ip6_del_interface_routes (sw_if_index, im, fib_index,
a, ia->address_length);
}));
- /* *INDENT-ON* */
return 0;
}
@@ -563,7 +548,6 @@ ip6_sw_interface_admin_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip6_sw_interface_admin_up_down);
/* Built-in ip6 unicast rx feature path definition */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip6_unicast, static) =
{
.arc_name = "ip6-unicast",
@@ -688,17 +672,22 @@ VNET_FEATURE_INIT (ip6_interface_output, static) = {
.node_name = "interface-output",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
{
ip6_main_t *im = &ip6_main;
- vec_validate (im->fib_index_by_sw_if_index, sw_if_index);
- vec_validate (im->mfib_index_by_sw_if_index, sw_if_index);
+ vec_validate_init_empty (im->fib_index_by_sw_if_index, sw_if_index, ~0);
+ vec_validate_init_empty (im->mfib_index_by_sw_if_index, sw_if_index, ~0);
- if (!is_add)
+ if (is_add)
+ {
+ /* Fill in lookup tables with default table (0). */
+ im->fib_index_by_sw_if_index[sw_if_index] = 0;
+ im->mfib_index_by_sw_if_index[sw_if_index] = 0;
+ }
+ else
{
/* Ensure that IPv6 is disabled */
ip6_main_t *im6 = &ip6_main;
@@ -708,14 +697,21 @@ ip6_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
vlib_main_t *vm = vlib_get_main ();
vnet_sw_interface_update_unnumbered (sw_if_index, ~0, 0);
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm6, ia, sw_if_index, 0,
({
address = ip_interface_address_get_address (lm6, ia);
ip6_add_del_interface_address(vm, sw_if_index, address, ia->address_length, 1);
}));
- /* *INDENT-ON* */
ip6_mfib_interface_enable_disable (sw_if_index, 0);
+
+ if (0 != im6->fib_index_by_sw_if_index[sw_if_index])
+ fib_table_bind (FIB_PROTOCOL_IP6, sw_if_index, 0);
+ if (0 != im6->mfib_index_by_sw_if_index[sw_if_index])
+ mfib_table_bind (FIB_PROTOCOL_IP6, sw_if_index, 0);
+
+ /* Erase the lookup tables just in case */
+ im6->fib_index_by_sw_if_index[sw_if_index] = ~0;
+ im6->mfib_index_by_sw_if_index[sw_if_index] = ~0;
}
vnet_feature_enable_disable ("ip6-unicast", "ip6-not-enabled", sw_if_index,
@@ -738,7 +734,6 @@ VLIB_NODE_FN (ip6_lookup_node) (vlib_main_t * vm,
static u8 *format_ip6_lookup_trace (u8 * s, va_list * args);
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_lookup_node) =
{
.name = "ip6-lookup",
@@ -747,7 +742,6 @@ VLIB_REGISTER_NODE (ip6_lookup_node) =
.n_next_nodes = IP6_LOOKUP_N_NEXT,
.next_nodes = IP6_LOOKUP_NEXT_NODES,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -925,7 +919,6 @@ VLIB_NODE_FN (ip6_load_balance_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_load_balance_node) =
{
.name = "ip6-load-balance",
@@ -933,7 +926,6 @@ VLIB_REGISTER_NODE (ip6_load_balance_node) =
.sibling_of = "ip6-lookup",
.format_trace = format_ip6_lookup_trace,
};
-/* *INDENT-ON* */
typedef struct
{
@@ -956,8 +948,7 @@ format_ip6_forward_next_trace (u8 * s, va_list * args)
ip6_forward_next_trace_t *t = va_arg (*args, ip6_forward_next_trace_t *);
u32 indent = format_get_indent (s);
- s = format (s, "%Ufib:%d adj:%d flow:%d",
- format_white_space, indent,
+ s = format (s, "%Ufib:%d adj:%d flow:0x%08x", format_white_space, indent,
t->fib_index, t->adj_index, t->flow_hash);
s = format (s, "\n%U%U",
format_white_space, indent,
@@ -1217,23 +1208,17 @@ always_inline u8
ip6_next_proto_is_tcp_udp (vlib_buffer_t * p0, ip6_header_t * ip0,
u32 * udp_offset0)
{
- u32 proto0;
- proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_UDP, udp_offset0);
- if (proto0 != IP_PROTOCOL_UDP)
- {
- proto0 = ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, udp_offset0);
- proto0 = (proto0 == IP_PROTOCOL_TCP) ? proto0 : 0;
- }
- return proto0;
+ int nh = ip6_locate_header (p0, ip0, -1, udp_offset0);
+ if (nh > 0)
+ if (nh == IP_PROTOCOL_UDP || nh == IP_PROTOCOL_TCP)
+ return nh;
+ return 0;
}
-/* *INDENT-OFF* */
-VNET_FEATURE_ARC_INIT (ip6_local) =
-{
- .arc_name = "ip6-local",
- .start_nodes = VNET_FEATURES ("ip6-local"),
+VNET_FEATURE_ARC_INIT (ip6_local) = {
+ .arc_name = "ip6-local",
+ .start_nodes = VNET_FEATURES ("ip6-local", "ip6-receive"),
};
-/* *INDENT-ON* */
static_always_inline u8
ip6_tcp_udp_icmp_bad_length (vlib_main_t * vm, vlib_buffer_t * p0)
@@ -1270,7 +1255,7 @@ ip6_tcp_udp_icmp_bad_length (vlib_main_t * vm, vlib_buffer_t * p0)
}
n_bytes_left -= n_this_buffer;
- n_bytes_left -= p0->total_length_not_including_first_buffer;
+ n_bytes_left -= vlib_buffer_length_in_chain (vm, p0) - p0->current_length;
if (n_bytes_left == 0)
return 0;
@@ -1278,10 +1263,10 @@ ip6_tcp_udp_icmp_bad_length (vlib_main_t * vm, vlib_buffer_t * p0)
return 1;
}
-
always_inline uword
-ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, int head_of_feature_arc)
+ip6_local_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, int head_of_feature_arc,
+ int is_receive_dpo)
{
ip6_main_t *im = &ip6_main;
ip_lookup_main_t *lm = &im->lookup_main;
@@ -1313,7 +1298,7 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_prefetch_buffer_data (b[3], LOAD);
}
- ip6_error_t error[2];
+ vl_counter_ip6_enum_t error[2];
error[0] = IP6_ERROR_UNKNOWN_PROTOCOL;
error[1] = IP6_ERROR_UNKNOWN_PROTOCOL;
@@ -1469,6 +1454,23 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_buffer (b[1])->sw_if_index[VLIB_TX] != ~0 ?
vnet_buffer (b[1])->sw_if_index[VLIB_TX] :
vnet_buffer (b[1])->ip.fib_index;
+
+ vnet_buffer (b[0])->ip.rx_sw_if_index =
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX];
+ vnet_buffer (b[1])->ip.rx_sw_if_index =
+ vnet_buffer (b[1])->sw_if_index[VLIB_RX];
+ if (is_receive_dpo)
+ {
+ const receive_dpo_t *rd0, *rd1;
+ rd0 =
+ receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
+ rd1 =
+ receive_dpo_get (vnet_buffer (b[1])->ip.adj_index[VLIB_TX]);
+ if (rd0->rd_sw_if_index != ~0)
+ vnet_buffer (b[0])->ip.rx_sw_if_index = rd0->rd_sw_if_index;
+ if (rd1->rd_sw_if_index != ~0)
+ vnet_buffer (b[1])->ip.rx_sw_if_index = rd1->rd_sw_if_index;
+ }
} /* head_of_feature_arc */
next[0] = lm->local_next_by_ip_protocol[ip[0]->protocol];
@@ -1490,16 +1492,16 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
{
u32 next32 = next[0];
vnet_feature_arc_start (arc_index,
- vnet_buffer (b[0])->sw_if_index
- [VLIB_RX], &next32, b[0]);
+ vnet_buffer (b[0])->ip.rx_sw_if_index,
+ &next32, b[0]);
next[0] = next32;
}
if (PREDICT_TRUE (ip6_unknown[1]))
{
u32 next32 = next[1];
vnet_feature_arc_start (arc_index,
- vnet_buffer (b[1])->sw_if_index
- [VLIB_RX], &next32, b[1]);
+ vnet_buffer (b[1])->ip.rx_sw_if_index,
+ &next32, b[1]);
next[1] = next32;
}
}
@@ -1596,6 +1598,16 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_buffer (b[0])->sw_if_index[VLIB_TX] != ~0 ?
vnet_buffer (b[0])->sw_if_index[VLIB_TX] :
vnet_buffer (b[0])->ip.fib_index;
+
+ vnet_buffer (b[0])->ip.rx_sw_if_index =
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX];
+ if (is_receive_dpo)
+ {
+ receive_dpo_t *rd;
+ rd = receive_dpo_get (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
+ if (rd->rd_sw_if_index != ~0)
+ vnet_buffer (b[0])->ip.rx_sw_if_index = rd->rd_sw_if_index;
+ }
} /* head_of_feature_arc */
next[0] = lm->local_next_by_ip_protocol[ip->protocol];
@@ -1610,8 +1622,8 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
{
u32 next32 = next[0];
vnet_feature_arc_start (arc_index,
- vnet_buffer (b[0])->sw_if_index
- [VLIB_RX], &next32, b[0]);
+ vnet_buffer (b[0])->ip.rx_sw_if_index,
+ &next32, b[0]);
next[0] = next32;
}
}
@@ -1629,15 +1641,17 @@ ip6_local_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
VLIB_NODE_FN (ip6_local_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */ );
+ return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */,
+ 0 /* ip6_local_inline */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_local_node) =
{
.name = "ip6-local",
.vector_size = sizeof (u32),
.format_trace = format_ip6_forward_next_trace,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP_LOCAL_N_NEXT,
.next_nodes =
{
@@ -1645,19 +1659,32 @@ VLIB_REGISTER_NODE (ip6_local_node) =
[IP_LOCAL_NEXT_PUNT] = "ip6-punt",
[IP_LOCAL_NEXT_UDP_LOOKUP] = "ip6-udp-lookup",
[IP_LOCAL_NEXT_ICMP] = "ip6-icmp-input",
- [IP_LOCAL_NEXT_REASSEMBLY] = "ip6-full-reassembly",
+ [IP_LOCAL_NEXT_REASSEMBLY] = "ip6-local-full-reassembly",
},
};
-/* *INDENT-ON* */
+
+VLIB_NODE_FN (ip6_receive_local_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_local_inline (vm, node, frame, 1 /* head of feature arc */,
+ 1 /* is_receive_dpo */);
+}
+
+VLIB_REGISTER_NODE (ip6_receive_local_node) = {
+ .name = "ip6-receive",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_forward_next_trace,
+ .sibling_of = "ip6-local"
+};
VLIB_NODE_FN (ip6_local_end_of_arc_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */ );
+ return ip6_local_inline (vm, node, frame, 0 /* head of feature arc */,
+ 0 /* ip6_local_inline */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_local_end_of_arc_node) = {
.name = "ip6-local-end-of-arc",
.vector_size = sizeof (u32),
@@ -1671,7 +1698,6 @@ VNET_FEATURE_INIT (ip6_local_end_of_arc, static) = {
.node_name = "ip6-local-end-of-arc",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
#ifdef CLIB_MARCH_VARIANT
extern vlib_node_registration_t ip6_local_node;
@@ -1944,13 +1970,6 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
if (is_midchain)
{
- /* before we paint on the next header, update the L4
- * checksums if required, since there's no offload on a tunnel */
- vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ ,
- 1 /* is_ip6 */ );
- vnet_calc_checksums_inline (vm, p1, 0 /* is_ip4 */ ,
- 1 /* is_ip6 */ );
-
/* Guess we are only writing on ipv6 header. */
vnet_rewrite_two_headers (adj0[0], adj1[0],
ip0, ip1, sizeof (ip6_header_t));
@@ -2044,9 +2063,6 @@ ip6_rewrite_inline_with_gso (vlib_main_t * vm,
if (is_midchain)
{
- vnet_calc_checksums_inline (vm, p0, 0 /* is_ip4 */ ,
- 1 /* is_ip6 */ );
-
/* Guess we are only writing on ip6 header. */
vnet_rewrite_one_header (adj0[0], ip0, sizeof (ip6_header_t));
}
@@ -2196,14 +2212,12 @@ VLIB_NODE_FN (ip6_mcast_midchain_node) (vlib_main_t * vm,
return ip6_rewrite_inline (vm, node, frame, 0, 1, 1);
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip6_midchain_node) =
-{
+VLIB_REGISTER_NODE (ip6_midchain_node) = {
.name = "ip6-midchain",
.vector_size = sizeof (u32),
.format_trace = format_ip6_forward_next_trace,
.sibling_of = "ip6-rewrite",
- };
+};
VLIB_REGISTER_NODE (ip6_rewrite_node) =
{
@@ -2244,7 +2258,6 @@ VLIB_REGISTER_NODE (ip6_mcast_midchain_node) =
.sibling_of = "ip6-rewrite",
};
-/* *INDENT-ON* */
/*
* Hop-by-Hop handling
@@ -2258,7 +2271,6 @@ _(PROCESSED, "pkts with ip6 hop-by-hop options") \
_(FORMAT, "incorrectly formatted hop-by-hop options") \
_(UNKNOWN_OPTION, "unknown ip6 hop-by-hop options")
-/* *INDENT-OFF* */
typedef enum
{
#define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
@@ -2266,7 +2278,6 @@ typedef enum
#undef _
IP6_HOP_BY_HOP_N_ERROR,
} ip6_hop_by_hop_error_t;
-/* *INDENT-ON* */
/*
* Primary h-b-h handler trace support
@@ -2693,7 +2704,6 @@ VLIB_NODE_FN (ip6_hop_by_hop_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
{
.name = "ip6-hop-by-hop",
@@ -2705,7 +2715,6 @@ VLIB_REGISTER_NODE (ip6_hop_by_hop_node) =
.error_strings = ip6_hop_by_hop_error_strings,
.n_next_nodes = 0,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_hop_by_hop_init (vlib_main_t * vm)
@@ -2957,14 +2966,12 @@ set_ip6_flow_hash_command_fn (vlib_main_t * vm,
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ip6_flow_hash_command, static) = {
.path = "set ip6 flow-hash",
.short_help = "set ip6 flow-hash table <table-id> [src] [dst] [sport] "
"[dport] [proto] [reverse] [flowlabel]",
.function = set_ip6_flow_hash_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_ip6_local_command_fn (vlib_main_t * vm,
@@ -3005,14 +3012,12 @@ show_ip6_local_command_fn (vlib_main_t * vm,
* 115
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip6_local, static) =
{
.path = "show ip6 local",
.function = show_ip6_local_command_fn,
.short_help = "show ip6 local",
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -3124,7 +3129,6 @@ set_ip6_classify_command_fn (vlib_main_t * vm,
* Example of how to assign a classification table to an interface:
* @cliexcmd{set ip6 classify intfc GigabitEthernet2/0/0 table-index 1}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
{
.path = "set ip6 classify",
@@ -3132,7 +3136,6 @@ VLIB_CLI_COMMAND (set_ip6_classify_command, static) =
"set ip6 classify intfc <interface> table-index <classify-idx>",
.function = set_ip6_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip6_hop_by_hop.c b/src/vnet/ip/ip6_hop_by_hop.c
index e66084c2c4d..412741abcf8 100644
--- a/src/vnet/ip/ip6_hop_by_hop.c
+++ b/src/vnet/ip/ip6_hop_by_hop.c
@@ -438,8 +438,7 @@ VLIB_NODE_FN (ip6_add_hop_by_hop_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = /* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) =
{
.name = "ip6-add-hop-by-hop",
.vector_size = sizeof (u32),
@@ -455,7 +454,6 @@ VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = /* *INDENT-OFF* */
#undef _
},
};
-/* *INDENT-ON* */
/* The main h-b-h tracer was already invoked, no need to do much here */
typedef struct
@@ -778,7 +776,6 @@ VLIB_NODE_FN (ip6_pop_hop_by_hop_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) =
{
.name = "ip6-pop-hop-by-hop",
@@ -791,7 +788,6 @@ VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) =
/* See ip/lookup.h */
.n_next_nodes = 0,
};
-/* *INDENT-ON* */
typedef struct
{
@@ -1006,7 +1002,6 @@ VLIB_NODE_FN (ip6_local_hop_by_hop_node) (vlib_main_t * vm,
}
#ifndef CLIB_MARCH_VARIANT
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_local_hop_by_hop_node) =
{
.name = "ip6-local-hop-by-hop",
@@ -1025,7 +1020,6 @@ VLIB_REGISTER_NODE (ip6_local_hop_by_hop_node) =
[IP6_LOCAL_HOP_BY_HOP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
clib_error_t *
show_ip6_hbh_command_fn (vlib_main_t * vm,
@@ -1059,13 +1053,11 @@ show_ip6_hbh_command_fn (vlib_main_t * vm,
* Display ip6 local hop-by-hop next protocol handler nodes
* @cliexcmd{show ip6 hbh}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip6_hbh, static) = {
.path = "show ip6 hbh",
.short_help = "show ip6 hbh",
.function = show_ip6_hbh_command_fn,
};
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
@@ -1105,12 +1097,10 @@ ip6_hop_by_hop_ioam_init (vlib_main_t * vm)
return (0);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_init) =
{
.runs_after = VLIB_INITS("ip_main_init", "ip6_lookup_init"),
};
-/* *INDENT-ON* */
void
ip6_local_hop_by_hop_register_protocol (u32 protocol, u32 node_index)
@@ -1264,13 +1254,11 @@ clear_ioam_rewrite_command_fn (vlib_main_t * vm,
* Example of how to clear iOAM features:
* @cliexcmd{clear ioam rewrite}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_clear_ioam_rewrite_cmd, static) = {
.path = "clear ioam rewrite",
.short_help = "clear ioam rewrite",
.function = clear_ioam_rewrite_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
ip6_ioam_enable (int has_trace_option, int has_pot_option,
@@ -1371,13 +1359,11 @@ ip6_set_ioam_rewrite_command_fn (vlib_main_t * vm,
* Example of how to enable trace and pot with ppc set to encap:
* @cliexcmd{set ioam rewrite trace pot ppc encap}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_set_ioam_rewrite_cmd, static) = {
.path = "set ioam rewrite",
.short_help = "set ioam [trace] [pot] [seqno] [analyse]",
.function = ip6_set_ioam_rewrite_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm,
@@ -1455,13 +1441,11 @@ ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm,
* EDGE TO EDGE - PPC OPTION - 1 (Encap)
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_show_ioam_run_cmd, static) = {
.path = "show ioam summary",
.short_help = "show ioam summary",
.function = ip6_show_ioam_summary_cmd_fn,
};
-/* *INDENT-ON* */
void
vnet_register_ioam_end_of_path_callback (void *cb)
diff --git a/src/vnet/ip/ip6_inlines.h b/src/vnet/ip/ip6_inlines.h
index 2a4bb70573b..9bd475224eb 100644
--- a/src/vnet/ip/ip6_inlines.h
+++ b/src/vnet/ip/ip6_inlines.h
@@ -49,29 +49,40 @@ always_inline u32
ip6_compute_flow_hash (const ip6_header_t * ip,
flow_hash_config_t flow_hash_config)
{
- tcp_header_t *tcp;
+ const tcp_header_t *tcp;
+ const udp_header_t *udp = (void *) (ip + 1);
+ const gtpv1u_header_t *gtpu = (void *) (udp + 1);
u64 a, b, c;
u64 t1, t2;
+ u32 t3;
uword is_tcp_udp = 0;
u8 protocol = ip->protocol;
+ uword is_udp = protocol == IP_PROTOCOL_UDP;
- if (PREDICT_TRUE
- ((ip->protocol == IP_PROTOCOL_TCP)
- || (ip->protocol == IP_PROTOCOL_UDP)))
+ if (PREDICT_TRUE ((protocol == IP_PROTOCOL_TCP) || is_udp))
{
is_tcp_udp = 1;
tcp = (void *) (ip + 1);
}
- else if (ip->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ else
{
- ip6_hop_by_hop_header_t *hbh = (ip6_hop_by_hop_header_t *) (ip + 1);
- if ((hbh->protocol == IP_PROTOCOL_TCP) ||
- (hbh->protocol == IP_PROTOCOL_UDP))
+ const void *cur = ip + 1;
+ if (protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ {
+ const ip6_hop_by_hop_header_t *hbh = cur;
+ protocol = hbh->protocol;
+ cur += (hbh->length + 1) * 8;
+ }
+ if (protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ const ip6_fragment_ext_header_t *frag = cur;
+ protocol = frag->protocol;
+ }
+ else if (protocol == IP_PROTOCOL_TCP || protocol == IP_PROTOCOL_UDP)
{
is_tcp_udp = 1;
- tcp = (tcp_header_t *) ((u8 *) hbh + ((hbh->length + 1) << 3));
+ tcp = cur;
}
- protocol = hbh->protocol;
}
t1 = (ip->src_address.as_u64[0] ^ ip->src_address.as_u64[1]);
@@ -113,7 +124,13 @@ ip6_compute_flow_hash (const ip6_header_t * ip,
((flow_hash_config & IP_FLOW_HASH_FL) ? ip6_flow_label_network_order (ip) :
0);
c ^= t1;
-
+ if (PREDICT_TRUE (is_udp) &&
+ PREDICT_FALSE ((flow_hash_config & IP_FLOW_HASH_GTPV1_TEID) &&
+ udp->dst_port == GTPV1_PORT_BE))
+ {
+ t3 = gtpu->teid;
+ a ^= t3;
+ }
hash_mix64 (a, b, c);
return (u32) c;
}
@@ -134,65 +151,17 @@ ip6_compute_flow_hash (const ip6_header_t * ip,
* it is a non-first fragment -1 is returned.
*/
always_inline int
-ip6_locate_header (vlib_buffer_t * p0,
- ip6_header_t * ip0, int find_hdr_type, u32 * offset)
+ip6_locate_header (vlib_buffer_t *b, ip6_header_t *ip, int find_hdr_type,
+ u32 *offset)
{
- u8 next_proto = ip0->protocol;
- u8 *next_header;
- u8 done = 0;
- u32 cur_offset;
- u8 *temp_nxthdr = 0;
- u32 exthdr_len = 0;
-
- next_header = ip6_next_header (ip0);
- cur_offset = sizeof (ip6_header_t);
- while (1)
+ ip6_ext_hdr_chain_t hdr_chain;
+ int res = ip6_ext_header_walk (b, ip, find_hdr_type, &hdr_chain);
+ if (res >= 0)
{
- done = (next_proto == find_hdr_type);
- if (PREDICT_FALSE
- (next_header >=
- (u8 *) vlib_buffer_get_current (p0) + p0->current_length))
- {
- //A malicious packet could set an extension header with a too big size
- return (-1);
- }
- if (done)
- break;
- if ((!ip6_ext_hdr (next_proto)) || next_proto == IP_PROTOCOL_IP6_NONXT)
- {
- if (find_hdr_type < 0)
- break;
- return -1;
- }
- if (next_proto == IP_PROTOCOL_IPV6_FRAGMENTATION)
- {
- ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *) next_header;
- u16 frag_off = ip6_frag_hdr_offset (frag_hdr);
- /* Non first fragment return -1 */
- if (frag_off)
- return (-1);
- exthdr_len = sizeof (ip6_frag_hdr_t);
- temp_nxthdr = next_header + exthdr_len;
- }
- else if (next_proto == IP_PROTOCOL_IPSEC_AH)
- {
- exthdr_len =
- ip6_ext_authhdr_len (((ip6_ext_header_t *) next_header));
- temp_nxthdr = next_header + exthdr_len;
- }
- else
- {
- exthdr_len =
- ip6_ext_header_len (((ip6_ext_header_t *) next_header));
- temp_nxthdr = next_header + exthdr_len;
- }
- next_proto = ((ip6_ext_header_t *) next_header)->next_hdr;
- next_header = temp_nxthdr;
- cur_offset += exthdr_len;
+ *offset = hdr_chain.eh[res].offset;
+ return hdr_chain.eh[res].protocol;
}
-
- *offset = cur_offset;
- return (next_proto);
+ return -1;
}
diff --git a/src/vnet/ip/ip6_input.c b/src/vnet/ip/ip6_input.c
index 01b8f46b4d8..64c9d76ebaa 100644
--- a/src/vnet/ip/ip6_input.c
+++ b/src/vnet/ip/ip6_input.c
@@ -219,21 +219,12 @@ VLIB_NODE_FN (ip6_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return frame->n_vectors;
}
-#ifndef CLIB_MARCH_VARIANT
-char *ip6_error_strings[] = {
-#define _(sym,string) string,
- foreach_ip6_error
-#undef _
-};
-#endif /* CLIB_MARCH_VARIANT */
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_input_node) = {
.name = "ip6-input",
.vector_size = sizeof (u32),
.n_errors = IP6_N_ERROR,
- .error_strings = ip6_error_strings,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_INPUT_N_NEXT,
.next_nodes = {
@@ -246,7 +237,6 @@ VLIB_REGISTER_NODE (ip6_input_node) = {
.format_buffer = format_ip6_header,
.format_trace = format_ip6_input_trace,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_init (vlib_main_t * vm)
diff --git a/src/vnet/ip/ip6_input.h b/src/vnet/ip/ip6_input.h
index fe993caa889..49e37ec1808 100644
--- a/src/vnet/ip/ip6_input.h
+++ b/src/vnet/ip/ip6_input.h
@@ -43,8 +43,6 @@
#include <vnet/ip/ip.h>
#include <vnet/ip/icmp6.h>
-extern char *ip6_error_strings[];
-
typedef enum
{
IP6_INPUT_NEXT_DROP,
diff --git a/src/vnet/ip/ip6_link.c b/src/vnet/ip/ip6_link.c
index afa9d8e3ea9..c2a7ccacbc1 100644
--- a/src/vnet/ip/ip6_link.c
+++ b/src/vnet/ip/ip6_link.c
@@ -242,12 +242,10 @@ ip6_link_delegate_flush (ip6_link_t * il)
{
ip6_link_delegate_t *ild;
- /* *INDENT-OFF* */
FOREACH_IP6_LINK_DELEGATE (ild, il,
({
il_delegate_vfts[ild->ild_type].ildv_disable(ild->ild_index);
}));
- /* *INDENT-ON* */
vec_free (il->il_delegates);
il->il_delegates = NULL;
@@ -357,14 +355,12 @@ ip6_link_set_local_address (u32 sw_if_index, const ip6_address_t * address)
ip6_address_copy (&ilp.ilp_addr, address);
ip6_ll_table_entry_update (&ilp, FIB_ROUTE_PATH_LOCAL);
- /* *INDENT-OFF* */
FOREACH_IP6_LINK_DELEGATE (ild, il,
({
if (NULL != il_delegate_vfts[ild->ild_type].ildv_ll_change)
il_delegate_vfts[ild->ild_type].ildv_ll_change(ild->ild_index,
&il->il_ll_addr);
}));
- /* *INDENT-ON* */
return (0);
}
@@ -465,7 +461,6 @@ ip6_link_add_del_address (ip6_main_t * im,
if (NULL == il)
return;
- /* *INDENT-OFF* */
FOREACH_IP6_LINK_DELEGATE (ild, il,
({
if (is_delete)
@@ -481,7 +476,6 @@ ip6_link_add_del_address (ip6_main_t * im,
address, address_length);
}
}));
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -555,14 +549,12 @@ test_ip6_link_command_fn (vlib_main_t * vm,
* Original MAC address: 16:d9:e0:91:79:86
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_link_command, static) =
{
.path = "test ip6 link",
.function = test_ip6_link_command_fn,
.short_help = "test ip6 link <mac-address>",
};
-/* *INDENT-ON* */
static u8 *
ip6_print_addrs (u8 * s, u32 * addrs)
@@ -594,11 +586,10 @@ format_ip6_link (u8 * s, va_list * arg)
if (!ip6_link_is_enabled_i (il))
return (s);
- s = format (s, "%U is admin %s\n",
- format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface (vnm, il->il_sw_if_index),
- (vnet_sw_interface_is_admin_up (vnm, il->il_sw_if_index) ?
- "up" : "down"));
+ s = format (
+ s, "%U is admin %s\n", format_vnet_sw_if_index_name, vnm,
+ il->il_sw_if_index,
+ (vnet_sw_interface_is_admin_up (vnm, il->il_sw_if_index) ? "up" : "down"));
u32 ai;
u32 *link_scope = 0, *global_scope = 0;
@@ -660,13 +651,11 @@ format_ip6_link (u8 * s, va_list * arg)
s = format (s, "%U%U\n",
format_white_space, 4, format_ip6_address, &il->il_ll_addr);
- /* *INDENT-OFF* */
FOREACH_IP6_LINK_DELEGATE(ild, il,
({
s = format (s, "%U", il_delegate_vfts[ild->ild_type].ildv_format,
ild->ild_index, 2);
}));
- /* *INDENT-ON* */
return (s);
}
@@ -739,14 +728,12 @@ ip6_link_show (vlib_main_t * vm,
* show ip6 interface: IPv6 not enabled on interface
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_link_show_command, static) =
{
.path = "show ip6 interface",
.function = ip6_link_show,
.short_help = "show ip6 interface <interface>",
};
-/* *INDENT-ON* */
static clib_error_t *
enable_ip6_interface_cmd (vlib_main_t * vm,
@@ -779,14 +766,12 @@ enable_ip6_interface_cmd (vlib_main_t * vm,
* Example of how enable IPv6 on a given interface:
* @cliexcmd{enable ip6 interface GigabitEthernet2/0/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (enable_ip6_interface_command, static) =
{
.path = "enable ip6 interface",
.function = enable_ip6_interface_cmd,
.short_help = "enable ip6 interface <interface>",
};
-/* *INDENT-ON* */
static clib_error_t *
disable_ip6_interface_cmd (vlib_main_t * vm,
@@ -819,14 +804,12 @@ disable_ip6_interface_cmd (vlib_main_t * vm,
* Example of how disable IPv6 on a given interface:
* @cliexcmd{disable ip6 interface GigabitEthernet2/0/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (disable_ip6_interface_command, static) =
{
.path = "disable ip6 interface",
.function = disable_ip6_interface_cmd,
.short_help = "disable ip6 interface <interface>",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip6_ll_table.c b/src/vnet/ip/ip6_ll_table.c
index e4010bc43c4..f9172f6c50c 100644
--- a/src/vnet/ip/ip6_ll_table.c
+++ b/src/vnet/ip/ip6_ll_table.c
@@ -52,9 +52,8 @@ ip6_ll_fib_create (u32 sw_if_index)
vnet_main_t *vnm = vnet_get_main ();
u8 *desc;
- desc = format (NULL, "IP6-link-local:%U",
- format_vnet_sw_interface_name,
- vnm, vnet_get_sw_interface (vnm, sw_if_index));
+ desc = format (NULL, "IP6-link-local:%U", format_vnet_sw_if_index_name, vnm,
+ sw_if_index);
ip6_ll_table.ilt_fibs[sw_if_index] =
ip6_fib_table_create_and_lock (FIB_SOURCE_IP6_ND,
@@ -64,7 +63,6 @@ ip6_ll_fib_create (u32 sw_if_index)
* leave the default route as a drop, but fix fe::/10 to be a glean
* via the interface.
*/
- /* *INDENT-OFF* */
fib_prefix_t pfx = {
.fp_proto = FIB_PROTOCOL_IP6,
.fp_len = 10,
@@ -90,7 +88,6 @@ ip6_ll_fib_create (u32 sw_if_index)
1,
NULL,
FIB_ROUTE_PATH_FLAG_NONE);
- /* *INDENT-ON* */
}
static void
@@ -111,12 +108,17 @@ ip6_ll_table_entry_update (const ip6_ll_prefix_t * ilp,
.frp_flags = flags,
.frp_sw_if_index = ilp->ilp_sw_if_index,
.frp_proto = DPO_PROTO_IP6,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
};
- fib_prefix_t fp;
+ fib_prefix_t fp = { 0 };
- vec_validate (ip6_ll_table.ilt_fibs, ilp->ilp_sw_if_index);
+ if (flags & FIB_ROUTE_PATH_LOCAL)
+ rpath.frp_addr.ip6 = ilp->ilp_addr;
- if (0 == ip6_ll_fib_get (ilp->ilp_sw_if_index))
+ vec_validate_init_empty (ip6_ll_table.ilt_fibs, ilp->ilp_sw_if_index, ~0);
+
+ if (~0 == ip6_ll_fib_get (ilp->ilp_sw_if_index))
{
ip6_ll_fib_create (ilp->ilp_sw_if_index);
}
@@ -151,11 +153,12 @@ ip6_ll_table_entry_delete (const ip6_ll_prefix_t * ilp)
* if there are no ND sourced prefixes left, then we can clean up this FIB
*/
fib_index = ip6_ll_fib_get (ilp->ilp_sw_if_index);
- if (0 == fib_table_get_num_entries (fib_index,
- FIB_PROTOCOL_IP6, FIB_SOURCE_IP6_ND))
+ if (~0 != fib_index &&
+ 0 == fib_table_get_num_entries (fib_index, FIB_PROTOCOL_IP6,
+ FIB_SOURCE_IP6_ND))
{
fib_table_unlock (fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_IP6_ND);
- ip6_ll_table.ilt_fibs[ilp->ilp_sw_if_index] = 0;
+ ip6_ll_table.ilt_fibs[ilp->ilp_sw_if_index] = ~0;
}
}
@@ -273,8 +276,7 @@ ip6_ll_show_fib (vlib_main_t * vm,
u8 *s = NULL;
fib_index = ip6_ll_table.ilt_fibs[sw_if_index];
-
- if (0 == fib_index)
+ if (~0 == fib_index)
continue;
fib_table = fib_table_get (fib_index, FIB_PROTOCOL_IP6);
@@ -345,13 +347,21 @@ ip6_ll_show_fib (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_show_fib_command, static) = {
.path = "show ip6-ll",
.short_help = "show ip6-ll [summary] [interface] [<ip6-addr>[/<width>]] [detail]",
.function = ip6_ll_show_fib,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+ip6_ll_sw_interface_add_del (vnet_main_t *vnm, u32 sw_if_index, u32 is_add)
+{
+ vec_validate_init_empty (ip6_ll_table.ilt_fibs, sw_if_index, ~0);
+
+ return (NULL);
+}
+
+VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip6_ll_sw_interface_add_del);
static clib_error_t *
ip6_ll_module_init (vlib_main_t * vm)
diff --git a/src/vnet/ip/ip6_ll_types.c b/src/vnet/ip/ip6_ll_types.c
index a7ac164b05a..b074b6e991c 100644
--- a/src/vnet/ip/ip6_ll_types.c
+++ b/src/vnet/ip/ip6_ll_types.c
@@ -23,10 +23,8 @@ format_ip6_ll_prefix (u8 * s, va_list * args)
ip6_ll_prefix_t *ilp = va_arg (*args, ip6_ll_prefix_t *);
vnet_main_t *vnm = vnet_get_main ();
- s = format (s, "(%U, %U)",
- format_ip6_address, &ilp->ilp_addr,
- format_vnet_sw_interface_name,
- vnm, vnet_get_sw_interface (vnm, ilp->ilp_sw_if_index));
+ s = format (s, "(%U, %U)", format_ip6_address, &ilp->ilp_addr,
+ format_vnet_sw_if_index_name, vnm, ilp->ilp_sw_if_index);
return (s);
}
diff --git a/src/vnet/ip/ip6_packet.h b/src/vnet/ip/ip6_packet.h
index 7a8c31cee48..c506792ddcf 100644
--- a/src/vnet/ip/ip6_packet.h
+++ b/src/vnet/ip/ip6_packet.h
@@ -40,8 +40,9 @@
#ifndef included_ip6_packet_h
#define included_ip6_packet_h
-#include <vnet/tcp/tcp_packet.h>
+#include <vlib/vlib.h>
#include <vnet/ip/ip4_packet.h>
+#include <stdbool.h>
typedef union
{
@@ -62,13 +63,11 @@ typedef struct
} ip6_address_and_mask_t;
/* Packed so that the mhash key doesn't include uninitialized pad bytes */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
/* IP address must be first for ip_interface_address_get_address() to work */
ip6_address_t ip6_addr;
u32 fib_index;
}) ip6_address_fib_t;
-/* *INDENT-ON* */
always_inline void
ip6_addr_fib_init (ip6_address_fib_t * addr_fib,
@@ -424,97 +423,39 @@ ip6_copy_header (ip6_header_t * dst, const ip6_header_t * src)
dst->dst_address.as_uword[1] = src->dst_address.as_uword[1];
}
-always_inline void
-ip6_tcp_reply_x1 (ip6_header_t * ip0, tcp_header_t * tcp0)
-{
- {
- ip6_address_t src0, dst0;
-
- src0 = ip0->src_address;
- dst0 = ip0->dst_address;
- ip0->src_address = dst0;
- ip0->dst_address = src0;
- }
-
- {
- u16 src0, dst0;
-
- src0 = tcp0->src;
- dst0 = tcp0->dst;
- tcp0->src = dst0;
- tcp0->dst = src0;
- }
-}
-
-always_inline void
-ip6_tcp_reply_x2 (ip6_header_t * ip0, ip6_header_t * ip1,
- tcp_header_t * tcp0, tcp_header_t * tcp1)
-{
- {
- ip6_address_t src0, dst0, src1, dst1;
-
- src0 = ip0->src_address;
- src1 = ip1->src_address;
- dst0 = ip0->dst_address;
- dst1 = ip1->dst_address;
- ip0->src_address = dst0;
- ip1->src_address = dst1;
- ip0->dst_address = src0;
- ip1->dst_address = src1;
- }
-
- {
- u16 src0, dst0, src1, dst1;
-
- src0 = tcp0->src;
- src1 = tcp1->src;
- dst0 = tcp0->dst;
- dst1 = tcp1->dst;
- tcp0->src = dst0;
- tcp1->src = dst1;
- tcp0->dst = src0;
- tcp1->dst = src1;
- }
-}
-
-
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 data;
}) ip6_pad1_option_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
u8 type;
u8 len;
u8 data[0];
}) ip6_padN_option_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
#define IP6_MLDP_ALERT_TYPE 0x5
u8 type;
u8 len;
u16 value;
}) ip6_router_alert_option_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct {
+ u8 protocol;
+ u8 reserved;
+ u16 fragoff;
+ u32 id;
+}) ip6_fragment_ext_header_t;
+
typedef CLIB_PACKED (struct {
u8 next_hdr;
/* Length of this header plus option data in 8 byte units. */
u8 n_data_u64s;
}) ip6_ext_header_t;
-/* *INDENT-ON* */
#define foreach_ext_hdr_type \
_(IP6_HOP_BY_HOP_OPTIONS) \
_(IPV6_ROUTE) \
- _(IPV6_FRAGMENTATION) \
- _(IPSEC_ESP) \
- _(IPSEC_AH) \
_(IP6_DESTINATION_OPTIONS) \
_(MOBILITY) \
_(HIP) \
@@ -542,15 +483,70 @@ ip6_ext_hdr (u8 nexthdr)
#endif
}
+typedef CLIB_PACKED (struct {
+ u8 next_hdr;
+ /* Length of this header plus option data in 8 byte units. */
+ u8 n_data_u64s;
+ u8 data[0];
+}) ip6_hop_by_hop_ext_t;
+
+typedef CLIB_PACKED (struct {
+ u8 next_hdr;
+ u8 rsv;
+ u16 fragment_offset_and_more;
+ u32 identification;
+}) ip6_frag_hdr_t;
+
+#define ip6_frag_hdr_offset(hdr) \
+ (clib_net_to_host_u16 ((hdr)->fragment_offset_and_more) >> 3)
+
+#define ip6_frag_hdr_offset_bytes(hdr) (8 * ip6_frag_hdr_offset (hdr))
+
+#define ip6_frag_hdr_more(hdr) \
+ (clib_net_to_host_u16 ((hdr)->fragment_offset_and_more) & 0x1)
+
+#define ip6_frag_hdr_offset_and_more(offset, more) \
+ clib_host_to_net_u16 (((offset) << 3) + !!(more))
+
#define ip6_ext_header_len(p) ((((ip6_ext_header_t *)(p))->n_data_u64s+1) << 3)
#define ip6_ext_authhdr_len(p) ((((ip6_ext_header_t *)(p))->n_data_u64s+2) << 2)
+static inline int
+ip6_ext_header_len_s (ip_protocol_t nh, void *p)
+{
+ if (ip6_ext_hdr (nh))
+ return ip6_ext_header_len (p);
+ switch (nh)
+ {
+ case IP_PROTOCOL_IPSEC_AH:
+ return ip6_ext_authhdr_len (p);
+ case IP_PROTOCOL_IPV6_FRAGMENTATION:
+ return sizeof (ip6_frag_hdr_t);
+ case IP_PROTOCOL_ICMP6:
+ return 4;
+ case IP_PROTOCOL_UDP:
+ return 8;
+ case IP_PROTOCOL_TCP:
+ return 20;
+ default: /* Caller is responsible for validating the length of terminating
+ protocols */
+ ;
+ }
+ return 0;
+}
+
always_inline void *
ip6_ext_next_header (ip6_ext_header_t * ext_hdr)
{
return (void *) ((u8 *) ext_hdr + ip6_ext_header_len (ext_hdr));
}
+always_inline void *
+ip6_ext_next_header_offset (void *hdr, u16 offset)
+{
+ return (hdr + offset);
+}
+
always_inline int
vlib_object_within_buffer_data (vlib_main_t * vm, vlib_buffer_t * b,
void *obj, size_t len)
@@ -562,153 +558,144 @@ vlib_object_within_buffer_data (vlib_main_t * vm, vlib_buffer_t * b,
return 1;
}
-/*
- * find ipv6 extension header within ipv6 header within buffer b
- *
- * @param vm
- * @param b buffer to limit search to
- * @param ip6_header ipv6 header
- * @param header_type extension header type to search for
- * @param[out] prev_ext_header address of header preceding found header
- */
+/* Returns the number of bytes left in buffer from p. */
+static inline u32
+vlib_bytes_left_in_buffer (vlib_buffer_t *b, void *obj)
+{
+ return b->current_length - (((u8 *) obj - b->data) - b->current_data);
+}
+
always_inline void *
-ip6_ext_header_find (vlib_main_t * vm, vlib_buffer_t * b,
- ip6_header_t * ip6_header, u8 header_type,
- ip6_ext_header_t ** prev_ext_header)
+ip6_ext_next_header_s (ip_protocol_t cur_nh, void *hdr, u32 max_offset,
+ u32 *offset, int *res_nh, bool *last)
{
- ip6_ext_header_t *prev = NULL;
- ip6_ext_header_t *result = NULL;
- if ((ip6_header)->protocol == header_type)
+ u16 hdrlen = 0;
+ int new_nh = -1;
+ void *res = 0;
+ if (ip6_ext_hdr (cur_nh))
{
- result = (void *) (ip6_header + 1);
- if (!vlib_object_within_buffer_data (vm, b, result,
- ip6_ext_header_len (result)))
- {
- result = NULL;
- }
+ hdrlen = ip6_ext_header_len (hdr);
+ new_nh = ((ip6_ext_header_t *) hdr)->next_hdr;
+ res = hdr + hdrlen;
+ }
+ else if (cur_nh == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ {
+ ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *) hdr;
+ if (ip6_frag_hdr_offset (frag_hdr) > 0)
+ *last = true;
+ new_nh = frag_hdr->next_hdr;
+ hdrlen = sizeof (ip6_frag_hdr_t);
+ res = hdr + hdrlen;
+ }
+ else if (cur_nh == IP_PROTOCOL_IPSEC_AH)
+ {
+ new_nh = ((ip6_ext_header_t *) hdr)->next_hdr;
+ hdrlen = ip6_ext_authhdr_len (hdr);
+ res = hdr + hdrlen;
}
else
{
- result = NULL;
- prev = (void *) (ip6_header + 1);
- while (ip6_ext_hdr (prev->next_hdr) && prev->next_hdr != header_type)
- {
- prev = ip6_ext_next_header (prev);
- if (!vlib_object_within_buffer_data (vm, b, prev,
- ip6_ext_header_len (prev)))
- {
- prev = NULL;
- break;
- }
- }
- if (prev && (prev->next_hdr == header_type))
- {
- result = ip6_ext_next_header (prev);
- if (!vlib_object_within_buffer_data (vm, b, result,
- ip6_ext_header_len (result)))
- {
- result = NULL;
- }
- }
+ ;
}
- if (prev_ext_header)
+
+ if (res && (*offset + hdrlen) >= max_offset)
{
- *prev_ext_header = prev;
+ return 0;
}
- return result;
+ *res_nh = new_nh;
+ *offset += hdrlen;
+ return res;
}
+#define IP6_EXT_HDR_MAX (4) /* Maximum number of headers */
+#define IP6_EXT_HDR_MAX_DEPTH (256) /* Maximum header depth */
+typedef struct
+{
+ int length;
+ struct
+ {
+ u16 protocol;
+ u16 offset;
+ } eh[IP6_EXT_HDR_MAX];
+} ip6_ext_hdr_chain_t;
+
/*
- * walk extension headers, looking for a specific extension header and last
- * extension header, calculating length of all extension headers
+ * Find ipv6 extension header within ipv6 header within
+ * whichever is smallest of buffer or IP6_EXT_HDR_MAX_DEPTH.
+ * The complete header chain must be in first buffer.
*
- * @param vm
- * @param b buffer to limit search to
- * @param ip6_header ipv6 header
- * @param find_hdr extension header to look for (ignored if ext_hdr is NULL)
- * @param length[out] length of all extension headers
- * @param ext_hdr[out] extension header of type find_hdr (may be NULL)
- * @param last_ext_hdr[out] last extension header (may be NULL)
- *
- * @return 0 on success, -1 on failure (ext headers crossing buffer boundary)
+ * The complete header chain (up to the terminating header) is
+ * returned in res.
+ * Returns the index of the find_hdr_type if > 0. Otherwise
+ * it returns the index of the last header.
*/
always_inline int
-ip6_walk_ext_hdr (vlib_main_t * vm, vlib_buffer_t * b,
- const ip6_header_t * ip6_header, u8 find_hdr, u32 * length,
- ip6_ext_header_t ** ext_hdr,
- ip6_ext_header_t ** last_ext_hdr)
-{
- if (!ip6_ext_hdr (ip6_header->protocol))
- {
- *length = 0;
- *ext_hdr = NULL;
- *last_ext_hdr = NULL;
- return 0;
- }
- *length = 0;
- ip6_ext_header_t *h = (void *) (ip6_header + 1);
- if (!vlib_object_within_buffer_data (vm, b, h, ip6_ext_header_len (h)))
+ip6_ext_header_walk (vlib_buffer_t *b, ip6_header_t *ip, int find_hdr_type,
+ ip6_ext_hdr_chain_t *res)
+{
+ int i = 0;
+ int found = -1;
+ void *next_header = ip6_next_header (ip);
+ int next_proto = ip->protocol;
+ res->length = 0;
+ u32 n_bytes_this_buffer =
+ clib_min (vlib_bytes_left_in_buffer (b, ip), IP6_EXT_HDR_MAX_DEPTH);
+ u32 max_offset = clib_min (n_bytes_this_buffer,
+ sizeof (ip6_header_t) +
+ clib_net_to_host_u16 (ip->payload_length));
+ u32 offset = sizeof (ip6_header_t);
+ if ((ip6_ext_header_len_s (ip->protocol, next_header) + offset) > max_offset)
{
return -1;
}
- *length += ip6_ext_header_len (h);
- *last_ext_hdr = h;
- *ext_hdr = NULL;
- if (ip6_header->protocol == find_hdr)
+ bool last = false;
+ while (next_header)
{
- *ext_hdr = h;
+ /* Move on to next header */
+ res->eh[i].offset = offset;
+ res->eh[i].protocol = next_proto;
+ if (next_proto == find_hdr_type)
+ found = i;
+ i++;
+ if (last)
+ break;
+ if (i >= IP6_EXT_HDR_MAX)
+ break;
+ next_header = ip6_ext_next_header_s (next_proto, next_header, max_offset,
+ &offset, &next_proto, &last);
}
- while (ip6_ext_hdr (h->next_hdr))
+ res->length = i;
+ if (find_hdr_type < 0)
+ return i - 1;
+ return found != -1 ? found : i - 1;
+}
+
+always_inline void *
+ip6_ext_header_find (vlib_main_t *vm, vlib_buffer_t *b, ip6_header_t *ip,
+ int find_hdr_type, ip6_ext_header_t **prev_ext_header)
+{
+ ip6_ext_hdr_chain_t hdr_chain;
+ int res = ip6_ext_header_walk (b, ip, find_hdr_type, &hdr_chain);
+ if (res < 0)
+ return 0;
+
+ if (prev_ext_header)
{
- if (h->next_hdr == find_hdr)
+ if (res > 0)
{
- h = ip6_ext_next_header (h);
- *ext_hdr = h;
+ *prev_ext_header =
+ ip6_ext_next_header_offset (ip, hdr_chain.eh[res - 1].offset);
}
else
{
- h = ip6_ext_next_header (h);
+ *prev_ext_header = 0;
}
- if (!vlib_object_within_buffer_data (vm, b, h, ip6_ext_header_len (h)))
- {
- return -1;
- }
- *length += ip6_ext_header_len (h);
- *last_ext_hdr = h;
}
+ if (find_hdr_type == hdr_chain.eh[res].protocol)
+ return ip6_ext_next_header_offset (ip, hdr_chain.eh[res].offset);
return 0;
}
-/* *INDENT-OFF* */
-typedef CLIB_PACKED (struct {
- u8 next_hdr;
- /* Length of this header plus option data in 8 byte units. */
- u8 n_data_u64s;
- u8 data[0];
-}) ip6_hop_by_hop_ext_t;
-/* *INDENT-ON* */
-
-/* *INDENT-OFF* */
-typedef CLIB_PACKED (struct {
- u8 next_hdr;
- u8 rsv;
- u16 fragment_offset_and_more;
- u32 identification;
-}) ip6_frag_hdr_t;
-/* *INDENT-ON* */
-
-#define ip6_frag_hdr_offset(hdr) \
- (clib_net_to_host_u16((hdr)->fragment_offset_and_more) >> 3)
-
-#define ip6_frag_hdr_offset_bytes(hdr) \
- (8 * ip6_frag_hdr_offset(hdr))
-
-#define ip6_frag_hdr_more(hdr) \
- (clib_net_to_host_u16((hdr)->fragment_offset_and_more) & 0x1)
-
-#define ip6_frag_hdr_offset_and_more(offset, more) \
- clib_host_to_net_u16(((offset) << 3) + !!(more))
-
#endif /* included_ip6_packet_h */
/*
diff --git a/src/vnet/ip/ip6_punt_drop.c b/src/vnet/ip/ip6_punt_drop.c
index 107703a7b6d..78ca9521f53 100644
--- a/src/vnet/ip/ip6_punt_drop.c
+++ b/src/vnet/ip/ip6_punt_drop.c
@@ -18,7 +18,6 @@
#include <vnet/policer/policer.h>
#include <vnet/policer/police_inlines.h>
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (ip6_punt) =
{
.arc_name = "ip6-punt",
@@ -30,7 +29,6 @@ VNET_FEATURE_ARC_INIT (ip6_drop) =
.arc_name = "ip6-drop",
.start_nodes = VNET_FEATURES ("ip6-drop", "ip6-not-enabled"),
};
-/* *INDENT-ON* */
extern ip_punt_policer_t ip6_punt_policer_cfg;
@@ -77,7 +75,6 @@ VLIB_NODE_FN (ip6_punt_policer_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_punt_policer_node) = {
.name = "ip6-punt-policer",
@@ -99,7 +96,6 @@ VNET_FEATURE_INIT (ip6_punt_policer_node, static) = {
.node_name = "ip6-punt-policer",
.runs_before = VNET_FEATURES("ip6-punt-redirect")
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_drop_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
@@ -134,7 +130,6 @@ VLIB_NODE_FN (ip6_punt_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_feat_arc_ip6_punt.feature_arc_index);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_drop_node) =
{
.name = "ip6-drop",
@@ -146,15 +141,11 @@ VLIB_REGISTER_NODE (ip6_drop_node) =
},
};
-VLIB_REGISTER_NODE (ip6_not_enabled_node) =
-{
+VLIB_REGISTER_NODE (ip6_not_enabled_node) = {
.name = "ip6-not-enabled",
.vector_size = sizeof (u32),
.format_trace = format_ip6_forward_next_trace,
- .n_next_nodes = 1,
- .next_nodes = {
- [0] = "error-drop",
- },
+ .sibling_of = "ip6-drop",
};
VLIB_REGISTER_NODE (ip6_punt_node) =
@@ -179,7 +170,6 @@ VNET_FEATURE_INIT (ip6_drop_end_of_arc, static) = {
.node_name = "error-drop",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON */
#ifndef CLIB_MARCH_VARIANT
void
@@ -243,7 +233,6 @@ done:
* @cliexpar
* @cliexcmd{set ip punt policer <INDEX>}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_punt_policer_command, static) =
{
.path = "ip6 punt policer",
@@ -251,7 +240,6 @@ VLIB_CLI_COMMAND (ip6_punt_policer_command, static) =
.short_help = "ip6 punt policer [add|del] <index>",
};
-/* *INDENT-ON* */
#define foreach_ip6_punt_redirect_error \
_(DROP, "ip6 punt redirect drop")
@@ -279,7 +267,6 @@ VLIB_NODE_FN (ip6_punt_redirect_node) (vlib_main_t * vm,
FIB_PROTOCOL_IP6));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_punt_redirect_node) = {
.name = "ip6-punt-redirect",
.vector_size = sizeof (u32),
@@ -301,44 +288,29 @@ VNET_FEATURE_INIT (ip6_punt_redirect_node, static) = {
.node_name = "ip6-punt-redirect",
.runs_before = VNET_FEATURES("error-punt")
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
-void
-ip6_punt_redirect_add (u32 rx_sw_if_index,
- u32 tx_sw_if_index, ip46_address_t * nh)
-{
- /* *INDENT-OFF* */
- fib_route_path_t *rpaths = NULL, rpath = {
- .frp_proto = DPO_PROTO_IP6,
- .frp_addr = *nh,
- .frp_sw_if_index = tx_sw_if_index,
- .frp_weight = 1,
- .frp_fib_index = ~0,
- };
- /* *INDENT-ON* */
- vec_add1 (rpaths, rpath);
-
- ip6_punt_redirect_add_paths (rx_sw_if_index, rpaths);
-
- vec_free (rpaths);
-}
+static u32 ip6_punt_redirect_enable_counts;
void
-ip6_punt_redirect_add_paths (u32 rx_sw_if_index, fib_route_path_t * rpaths)
+ip6_punt_redirect_add_paths (u32 rx_sw_if_index,
+ const fib_route_path_t *rpaths)
{
ip_punt_redirect_add (FIB_PROTOCOL_IP6,
rx_sw_if_index,
FIB_FORW_CHAIN_TYPE_UNICAST_IP6, rpaths);
- vnet_feature_enable_disable ("ip6-punt", "ip6-punt-redirect", 0, 1, 0, 0);
+ if (1 == ++ip6_punt_redirect_enable_counts)
+ vnet_feature_enable_disable ("ip6-punt", "ip6-punt-redirect", 0, 1, 0, 0);
}
void
ip6_punt_redirect_del (u32 rx_sw_if_index)
{
- vnet_feature_enable_disable ("ip6-punt", "ip6-punt-redirect", 0, 0, 0, 0);
+ ASSERT (ip6_punt_redirect_enable_counts);
+ if (0 == --ip6_punt_redirect_enable_counts)
+ vnet_feature_enable_disable ("ip6-punt", "ip6-punt-redirect", 0, 0, 0, 0);
ip_punt_redirect_del (FIB_PROTOCOL_IP6, rx_sw_if_index);
}
@@ -351,7 +323,7 @@ ip6_punt_redirect_cmd (vlib_main_t * vm,
{
unformat_input_t _line_input, *line_input = &_line_input;
fib_route_path_t *rpaths = NULL, rpath;
- dpo_proto_t payload_proto;
+ dpo_proto_t payload_proto = DPO_PROTO_IP6;
clib_error_t *error = 0;
u32 rx_sw_if_index = ~0;
vnet_main_t *vnm;
@@ -370,7 +342,7 @@ ip6_punt_redirect_cmd (vlib_main_t * vm,
else if (unformat (line_input, "add"))
is_add = 1;
else if (unformat (line_input, "rx all"))
- rx_sw_if_index = ~0;
+ rx_sw_if_index = 0;
else if (unformat (line_input, "rx %U",
unformat_vnet_sw_interface, vnm, &rx_sw_if_index))
;
@@ -401,6 +373,7 @@ ip6_punt_redirect_cmd (vlib_main_t * vm,
}
done:
+ vec_free (rpaths);
unformat_free (line_input);
return (error);
}
@@ -410,14 +383,12 @@ done:
* @cliexpar
* @cliexcmd{set ip punt policer <INDEX>}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_punt_redirect_command, static) =
{
.path = "ip6 punt redirect",
.function = ip6_punt_redirect_cmd,
.short_help = "ip6 punt redirect [add|del] rx [<interface>|all] via [<nh>] <tx_interface>",
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
@@ -438,7 +409,6 @@ ip6_punt_redirect_show_cmd (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{set ip punt policer <INDEX>}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip6_punt_redirect_command, static) =
{
.path = "show ip6 punt redirect",
@@ -446,7 +416,6 @@ VLIB_CLI_COMMAND (show_ip6_punt_redirect_command, static) =
.short_help = "show ip6 punt redirect",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip6_to_ip4.h b/src/vnet/ip/ip6_to_ip4.h
index 6a533e3b54e..29d5718d4da 100644
--- a/src/vnet/ip/ip6_to_ip4.h
+++ b/src/vnet/ip/ip6_to_ip4.h
@@ -31,7 +31,6 @@ typedef int (*ip6_to_ip4_tcp_udp_set_fn_t) (vlib_buffer_t * b,
ip6_header_t * ip6,
ip4_header_t * ip4, void *ctx);
-/* *INDENT-OFF* */
static u8 icmp6_to_icmp_updater_pointer_table[] =
{ 0, 1, ~0, ~0,
2, 2, 9, 8,
@@ -44,7 +43,6 @@ static u8 icmp6_to_icmp_updater_pointer_table[] =
24, 24, 24, 24,
24, 24, 24, 24
};
-/* *INDENT-ON* */
#define frag_id_6to4(id) ((id) ^ ((id) >> 16))
@@ -62,41 +60,25 @@ static u8 icmp6_to_icmp_updater_pointer_table[] =
* @returns 0 on success, non-zero value otherwise.
*/
static_always_inline int
-ip6_parse (vlib_main_t * vm, vlib_buffer_t * b, const ip6_header_t * ip6,
- u32 buff_len, u8 * l4_protocol, u16 * l4_offset,
- u16 * frag_hdr_offset)
+ip6_parse (vlib_main_t *vm, vlib_buffer_t *b, ip6_header_t *ip6, u32 buff_len,
+ u8 *l4_protocol, u16 *l4_offset, u16 *frag_hdr_offset)
{
- ip6_ext_header_t *last_hdr, *frag_hdr;
- u32 length;
- if (ip6_walk_ext_hdr
- (vm, b, ip6, IP_PROTOCOL_IPV6_FRAGMENTATION, &length, &frag_hdr,
- &last_hdr))
+ ip6_ext_hdr_chain_t hdr_chain;
+ int res =
+ ip6_ext_header_walk (b, ip6, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain);
+ if (res < 0)
{
return -1;
}
-
- if (length > 0)
- {
- if (frag_hdr)
- {
- *frag_hdr_offset = (u8 *) frag_hdr - (u8 *) ip6;
- }
- else
- {
- *frag_hdr_offset = 0;
- }
- *l4_protocol = last_hdr->next_hdr;
- }
+ if (hdr_chain.eh[res].protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+ *frag_hdr_offset = hdr_chain.eh[res].offset;
else
- {
- *frag_hdr_offset = 0;
- *l4_protocol = ip6->protocol;
- }
- *l4_offset = sizeof (*ip6) + length;
+ *frag_hdr_offset = 0;
- return (buff_len < (*l4_offset + 4)) ||
- (clib_net_to_host_u16 (ip6->payload_length) <
- (*l4_offset + 4 - sizeof (*ip6)));
+ *l4_protocol = hdr_chain.eh[hdr_chain.length - 1].protocol;
+ *l4_offset = hdr_chain.eh[hdr_chain.length - 1].offset;
+
+ return 0;
}
/**
@@ -124,13 +106,13 @@ ip6_get_port (vlib_main_t * vm, vlib_buffer_t * b, ip6_header_t * ip6,
u16 frag_offset;
u8 *l4;
- if (ip6_parse
- (vm, b, ip6, buffer_len, &l4_protocol, &l4_offset, &frag_offset))
- return 0;
-
+ if (ip6_parse (vm, b, ip6, buffer_len, &l4_protocol, &l4_offset,
+ &frag_offset))
+ {
+ return 0;
+ }
if (frag_offset &&
- ip6_frag_hdr_offset (((ip6_frag_hdr_t *)
- u8_ptr_add (ip6, frag_offset))))
+ ip6_frag_hdr_offset (((ip6_frag_hdr_t *) u8_ptr_add (ip6, frag_offset))))
return 0; //Can't deal with non-first fragment for now
if (ip_protocol)
diff --git a/src/vnet/ip/ip_api.c b/src/vnet/ip/ip_api.c
index 6f06e382024..644b4988abc 100644
--- a/src/vnet/ip/ip_api.c
+++ b/src/vnet/ip/ip_api.c
@@ -106,7 +106,6 @@ vl_api_ip_table_dump_t_handler (vl_api_ip_table_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (fib_table, ip4_main.fibs)
{
send_ip_table_details(am, reg, mp->context, fib_table);
@@ -118,7 +117,6 @@ vl_api_ip_table_dump_t_handler (vl_api_ip_table_dump_t * mp)
continue;
send_ip_table_details(am, reg, mp->context, fib_table);
}
- /* *INDENT-ON* */
}
typedef struct vl_api_ip_fib_dump_walk_ctx_t_
@@ -326,7 +324,6 @@ vl_api_ip_mtable_dump_t_handler (vl_api_ip_mtable_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (mfib_table, ip4_main.mfibs)
{
send_ip_mtable_details (reg, mp->context, mfib_table);
@@ -335,7 +332,6 @@ vl_api_ip_mtable_dump_t_handler (vl_api_ip_mtable_dump_t * mp)
{
send_ip_mtable_details (reg, mp->context, mfib_table);
}
- /* *INDENT-ON* */
}
typedef struct vl_api_ip_mfib_dump_ctx_t_
@@ -443,46 +439,98 @@ vl_api_ip_punt_police_t_handler (vl_api_ip_punt_police_t * mp,
}
static void
-vl_api_ip_punt_redirect_t_handler (vl_api_ip_punt_redirect_t * mp,
- vlib_main_t * vm)
+ip_punt_redirect_t_handler_common (u8 is_add, u32 rx_sw_if_index,
+ ip_address_family_t af,
+ const fib_route_path_t *rpaths)
+{
+ if (is_add)
+ {
+ if (af == AF_IP6)
+ ip6_punt_redirect_add_paths (rx_sw_if_index, rpaths);
+ else if (af == AF_IP4)
+ ip4_punt_redirect_add_paths (rx_sw_if_index, rpaths);
+ }
+ else
+ {
+ if (af == AF_IP6)
+ ip6_punt_redirect_del (rx_sw_if_index);
+ else if (af == AF_IP4)
+ ip4_punt_redirect_del (rx_sw_if_index);
+ }
+}
+
+static void
+vl_api_ip_punt_redirect_t_handler (vl_api_ip_punt_redirect_t *mp,
+ vlib_main_t *vm)
{
vl_api_ip_punt_redirect_reply_t *rmp;
- int rv = 0;
+ fib_route_path_t *rpaths = NULL, rpath = {
+ .frp_weight = 1,
+ .frp_fib_index = ~0,
+ };
+ ip_address_family_t af;
ip46_type_t ipv;
- ip46_address_t nh;
+ u32 rx_sw_if_index;
+ int rv = 0;
if (!vnet_sw_if_index_is_api_valid (ntohl (mp->punt.tx_sw_if_index)))
goto bad_sw_if_index;
- ipv = ip_address_decode (&mp->punt.nh, &nh);
- if (mp->is_add)
+ ipv = ip_address_decode (&mp->punt.nh, &rpath.frp_addr);
+ af = (ipv == IP46_TYPE_IP6) ? AF_IP6 : AF_IP4;
+ rpath.frp_proto = (ipv == IP46_TYPE_IP6) ? DPO_PROTO_IP6 : DPO_PROTO_IP4;
+ rpath.frp_sw_if_index = ntohl (mp->punt.tx_sw_if_index);
+ rx_sw_if_index = ntohl (mp->punt.rx_sw_if_index);
+
+ vec_add1 (rpaths, rpath);
+ ip_punt_redirect_t_handler_common (mp->is_add, rx_sw_if_index, af, rpaths);
+ vec_free (rpaths);
+
+ BAD_SW_IF_INDEX_LABEL;
+
+ REPLY_MACRO (VL_API_IP_PUNT_REDIRECT_REPLY);
+}
+
+static void
+vl_api_add_del_ip_punt_redirect_v2_t_handler (
+ vl_api_add_del_ip_punt_redirect_v2_t *mp, vlib_main_t *vm)
+{
+ vl_api_add_del_ip_punt_redirect_v2_reply_t *rmp;
+ fib_route_path_t *rpaths = NULL, *rpath;
+ vl_api_fib_path_t *apath;
+ ip_address_family_t af;
+ u32 rx_sw_if_index, n_paths;
+ int rv = 0, ii;
+
+ rx_sw_if_index = ntohl (mp->punt.rx_sw_if_index);
+ n_paths = ntohl (mp->punt.n_paths);
+
+ rv = ip_address_family_decode (mp->punt.af, &af);
+ if (rv != 0)
+ goto out;
+
+ if (0 != n_paths)
{
- if (ipv == IP46_TYPE_IP6)
- {
- ip6_punt_redirect_add (ntohl (mp->punt.rx_sw_if_index),
- ntohl (mp->punt.tx_sw_if_index), &nh);
- }
- else if (ipv == IP46_TYPE_IP4)
- {
- ip4_punt_redirect_add (ntohl (mp->punt.rx_sw_if_index),
- ntohl (mp->punt.tx_sw_if_index), &nh);
- }
+ vec_validate (rpaths, n_paths - 1);
}
- else
+
+ for (ii = 0; ii < n_paths; ii++)
{
- if (ipv == IP46_TYPE_IP6)
- {
- ip6_punt_redirect_del (ntohl (mp->punt.rx_sw_if_index));
- }
- else if (ipv == IP46_TYPE_IP4)
- {
- ip4_punt_redirect_del (ntohl (mp->punt.rx_sw_if_index));
- }
+ apath = &mp->punt.paths[ii];
+ rpath = &rpaths[ii];
+
+ rv = fib_api_path_decode (apath, rpath);
+
+ if (rv != 0)
+ goto out;
}
- BAD_SW_IF_INDEX_LABEL;
+ ip_punt_redirect_t_handler_common (mp->is_add, rx_sw_if_index, af, rpaths);
- REPLY_MACRO (VL_API_IP_PUNT_REDIRECT_REPLY);
+out:
+ vec_free (rpaths);
+
+ REPLY_MACRO (VL_API_ADD_DEL_IP_PUNT_REDIRECT_V2_REPLY);
}
static clib_error_t *
@@ -551,6 +599,32 @@ ip_table_delete (fib_protocol_t fproto, u32 table_id, u8 is_api)
}
}
+/*
+ * Returns an unused table id, and ~0 if it can't find one.
+ */
+u32
+ip_table_get_unused_id (fib_protocol_t fproto)
+{
+ int i, j;
+ static u32 seed = 0;
+ /* limit to 1M tries */
+ for (j = 0; j < 1 << 10; j++)
+ {
+ seed = random_u32 (&seed);
+ for (i = 0; i < 1 << 10; i++)
+ {
+ /* look around randomly generated id */
+ seed += (2 * (i % 2) - 1) * i;
+ if (seed == ~0)
+ continue;
+ if (fib_table_find (fproto, seed) == ~0)
+ return seed;
+ }
+ }
+
+ return ~0;
+}
+
void
vl_api_ip_table_add_del_t_handler (vl_api_ip_table_add_del_t * mp)
{
@@ -572,6 +646,29 @@ vl_api_ip_table_add_del_t_handler (vl_api_ip_table_add_del_t * mp)
REPLY_MACRO (VL_API_IP_TABLE_ADD_DEL_REPLY);
}
+void
+vl_api_ip_table_allocate_t_handler (vl_api_ip_table_allocate_t *mp)
+{
+ vl_api_ip_table_allocate_reply_t *rmp;
+ fib_protocol_t fproto =
+ (mp->table.is_ip6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4);
+ u32 table_id = ntohl (mp->table.table_id);
+ int rv = 0;
+
+ if (~0 == table_id)
+ table_id = ip_table_get_unused_id (fproto);
+
+ if (~0 == table_id)
+ rv = VNET_API_ERROR_EAGAIN;
+ else
+ ip_table_create (fproto, table_id, 1, mp->table.name);
+
+ REPLY_MACRO2 (VL_API_IP_TABLE_ALLOCATE_REPLY, {
+ clib_memcpy_fast (&rmp->table, &mp->table, sizeof (mp->table));
+ rmp->table.table_id = htonl (table_id);
+ })
+}
+
static int
ip_route_add_del_t_handler (vl_api_ip_route_add_del_t * mp, u32 * stats_index)
{
@@ -681,12 +778,10 @@ vl_api_ip_route_add_del_t_handler (vl_api_ip_route_add_del_t * mp)
rv = ip_route_add_del_t_handler (mp, &stats_index);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IP_ROUTE_ADD_DEL_REPLY,
({
rmp->stats_index = htonl (stats_index);
}))
- /* *INDENT-ON* */
}
void
@@ -738,7 +833,6 @@ vl_api_ip_route_lookup_t_handler (vl_api_ip_route_lookup_t * mp)
}
}
- /* *INDENT-OFF* */
REPLY_MACRO3_ZERO(VL_API_IP_ROUTE_LOOKUP_REPLY,
npaths * sizeof (*fp),
({
@@ -758,7 +852,6 @@ vl_api_ip_route_lookup_t_handler (vl_api_ip_route_lookup_t * mp)
}
}
}));
- /* *INDENT-ON* */
vec_free (rpaths);
}
@@ -845,20 +938,14 @@ ip_table_create (fib_protocol_t fproto,
fib_index = fib_table_find (fproto, table_id);
mfib_index = mfib_table_find (fproto, table_id);
- if (~0 == fib_index)
- {
- fib_table_find_or_create_and_lock_w_name (fproto, table_id,
- (is_api ?
- FIB_SOURCE_API :
- FIB_SOURCE_CLI), name);
- }
- if (~0 == mfib_index)
- {
- mfib_table_find_or_create_and_lock_w_name (fproto, table_id,
- (is_api ?
- MFIB_SOURCE_API :
- MFIB_SOURCE_CLI), name);
- }
+ /*
+ * Always try to re-lock in case the fib was deleted by an API call
+ * but was not yet freed because some other locks were held
+ */
+ fib_table_find_or_create_and_lock_w_name (
+ fproto, table_id, (is_api ? FIB_SOURCE_API : FIB_SOURCE_CLI), name);
+ mfib_table_find_or_create_and_lock_w_name (
+ fproto, table_id, (is_api ? MFIB_SOURCE_API : MFIB_SOURCE_CLI), name);
if ((~0 == fib_index) || (~0 == mfib_index))
call_elf_section_ip_table_callbacks (vnm, table_id, 1 /* is_add */ ,
@@ -886,9 +973,8 @@ mroute_add_del_handler (u8 is_add,
{
if (is_add)
{
- mfib_entry_index =
- mfib_table_entry_paths_update (fib_index, prefix,
- MFIB_SOURCE_API, rpaths);
+ mfib_entry_index = mfib_table_entry_paths_update (
+ fib_index, prefix, MFIB_SOURCE_API, entry_flags, rpaths);
}
else
{
@@ -955,12 +1041,10 @@ vl_api_ip_mroute_add_del_t_handler (vl_api_ip_mroute_add_del_t * mp)
rv = api_mroute_add_del_t_handler (mp, &stats_index);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IP_MROUTE_ADD_DEL_REPLY,
({
rmp->stats_index = htonl (stats_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1023,7 +1107,6 @@ vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp)
if (mp->is_ipv6)
{
- /* *INDENT-OFF* */
/* Do not send subnet details of the IP-interface for
* unnumbered interfaces. otherwise listening clients
* will be confused that the subnet is applied on more
@@ -1037,11 +1120,9 @@ vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp)
};
send_ip_address_details(am, reg, &pfx, sw_if_index, mp->context);
}));
- /* *INDENT-ON* */
}
else
{
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, sw_if_index, 0,
({
fib_prefix_t pfx = {
@@ -1052,7 +1133,6 @@ vl_api_ip_address_dump_t_handler (vl_api_ip_address_dump_t * mp)
send_ip_address_details(am, reg, &pfx, sw_if_index, mp->context);
}));
- /* *INDENT-ON* */
}
BAD_SW_IF_INDEX_LABEL;
@@ -1109,7 +1189,6 @@ vl_api_ip_unnumbered_dump_t_handler (vl_api_ip_unnumbered_dump_t * mp)
}
else
{
- /* *INDENT-OFF* */
pool_foreach (si, im->sw_interfaces)
{
if ((si->flags & VNET_SW_INTERFACE_FLAG_UNNUMBERED))
@@ -1120,7 +1199,6 @@ vl_api_ip_unnumbered_dump_t_handler (vl_api_ip_unnumbered_dump_t * mp)
mp->context);
}
}
- /* *INDENT-ON* */
}
BAD_SW_IF_INDEX_LABEL;
@@ -1143,13 +1221,11 @@ vl_api_ip_dump_t_handler (vl_api_ip_dump_t * mp)
/* Gather interfaces. */
sorted_sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
- _vec_len (sorted_sis) = 0;
- /* *INDENT-OFF* */
+ vec_set_len (sorted_sis, 0);
pool_foreach (si, im->sw_interfaces)
{
vec_add1 (sorted_sis, si[0]);
}
- /* *INDENT-ON* */
vec_foreach (si, sorted_sis)
{
@@ -1204,6 +1280,22 @@ vl_api_set_ip_flow_hash_v2_t_handler (vl_api_set_ip_flow_hash_v2_t *mp)
}
static void
+vl_api_set_ip_flow_hash_v3_t_handler (vl_api_set_ip_flow_hash_v3_t *mp)
+{
+ vl_api_set_ip_flow_hash_v3_reply_t *rmp;
+ ip_address_family_t af;
+ int rv;
+
+ rv = ip_address_family_decode (mp->af, &af);
+
+ if (!rv)
+ rv = ip_flow_hash_set (af, htonl (mp->table_id),
+ htonl (mp->flow_hash_config));
+
+ REPLY_MACRO (VL_API_SET_IP_FLOW_HASH_V3_REPLY);
+}
+
+static void
vl_api_set_ip_flow_hash_router_id_t_handler (
vl_api_set_ip_flow_hash_router_id_t *mp)
{
@@ -1613,7 +1705,6 @@ vl_api_ip_table_flush_t_handler (vl_api_ip_table_flush_t * mp)
vnet_sw_interface_t *si;
/* Shut down interfaces in this FIB / clean out intfc routes */
- /* *INDENT-OFF* */
pool_foreach (si, im->sw_interfaces)
{
if (fib_index == fib_table_get_index_for_sw_if_index (fproto,
@@ -1624,7 +1715,6 @@ vl_api_ip_table_flush_t_handler (vl_api_ip_table_flush_t * mp)
vnet_sw_interface_set_flags (vnm, si->sw_if_index, flags);
}
}
- /* *INDENT-ON* */
fib_table_flush (fib_index, fproto, FIB_SOURCE_API);
mfib_table_flush (mfib_table_find (fproto, ntohl (mp->table.table_id)),
@@ -1781,6 +1871,30 @@ void
REPLY_MACRO (VL_API_IP_REASSEMBLY_ENABLE_DISABLE_REPLY);
}
+void
+vl_api_ip_local_reass_enable_disable_t_handler (
+ vl_api_ip_local_reass_enable_disable_t *mp)
+{
+ vl_api_ip_local_reass_enable_disable_reply_t *rmp;
+ int rv = 0;
+
+ ip4_local_full_reass_enable_disable (mp->enable_ip4);
+ ip6_local_full_reass_enable_disable (mp->enable_ip6);
+
+ REPLY_MACRO (VL_API_IP_LOCAL_REASS_ENABLE_DISABLE_REPLY);
+}
+
+void
+vl_api_ip_local_reass_get_t_handler (vl_api_ip_local_reass_get_t *mp)
+{
+ vl_api_ip_local_reass_get_reply_t *rmp;
+ int rv = 0;
+ REPLY_MACRO2 (VL_API_IP_LOCAL_REASS_GET, {
+ rmp->ip4_is_enabled = ip4_local_full_reass_enabled ();
+ rmp->ip6_is_enabled = ip6_local_full_reass_enabled ();
+ });
+}
+
static walk_rc_t
send_ip_punt_redirect_details (u32 rx_sw_if_index,
const ip_punt_redirect_rx_t * ipr, void *arg)
@@ -1814,40 +1928,114 @@ send_ip_punt_redirect_details (u32 rx_sw_if_index,
return (WALK_CONTINUE);
}
+static walk_rc_t
+send_ip_punt_redirect_v2_details (u32 rx_sw_if_index,
+ const ip_punt_redirect_rx_t *ipr, void *arg)
+{
+ vl_api_ip_punt_redirect_v2_details_t *mp;
+ fib_path_encode_ctx_t path_ctx = {
+ .rpaths = NULL,
+ };
+ fib_route_path_t *rpath;
+ ip_walk_ctx_t *ctx = arg;
+ vl_api_fib_path_t *fp;
+ int n_paths;
+
+ fib_path_list_walk_w_ext (ipr->pl, NULL, fib_path_encode, &path_ctx);
+
+ n_paths = vec_len (path_ctx.rpaths);
+ mp = vl_msg_api_alloc (sizeof (*mp) + n_paths * sizeof (*fp));
+ if (!mp)
+ return (WALK_STOP);
+
+ clib_memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id =
+ ntohs (REPLY_MSG_ID_BASE + VL_API_IP_PUNT_REDIRECT_V2_DETAILS);
+ mp->context = ctx->context;
+ mp->punt.rx_sw_if_index = htonl (rx_sw_if_index);
+ mp->punt.n_paths = htonl (n_paths);
+ fp = mp->punt.paths;
+ vec_foreach (rpath, path_ctx.rpaths)
+ {
+ fib_api_path_encode (rpath, fp);
+ fp++;
+ }
+ mp->punt.af = (ipr->fproto == FIB_PROTOCOL_IP6) ? ADDRESS_IP6 : ADDRESS_IP4;
+
+ vl_api_send_msg (ctx->reg, (u8 *) mp);
+
+ vec_free (path_ctx.rpaths);
+
+ return (WALK_CONTINUE);
+}
+
+static void
+vl_api_ip_punt_redirect_dump_common (ip_walk_ctx_t *ctx, fib_protocol_t fproto,
+ u32 rx_sw_if_index,
+ ip_punt_redirect_walk_cb_t cb)
+{
+
+ if ((u32) ~0 != rx_sw_if_index)
+ {
+ index_t pri;
+ pri = ip_punt_redirect_find (fproto, rx_sw_if_index);
+
+ if (INDEX_INVALID == pri)
+ return;
+
+ cb (rx_sw_if_index, ip_punt_redirect_get (pri), ctx);
+ }
+ else
+ ip_punt_redirect_walk (fproto, cb, ctx);
+}
+
static void
vl_api_ip_punt_redirect_dump_t_handler (vl_api_ip_punt_redirect_dump_t * mp)
{
vl_api_registration_t *reg;
- fib_protocol_t fproto = FIB_PROTOCOL_IP4;
+ fib_protocol_t fproto;
reg = vl_api_client_index_to_registration (mp->client_index);
if (!reg)
return;
- if (mp->is_ipv6 == 1)
- fproto = FIB_PROTOCOL_IP6;
+ fproto = (mp->is_ipv6 == 1) ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4;
ip_walk_ctx_t ctx = {
.reg = reg,
.context = mp->context,
};
- if (~0 != mp->sw_if_index)
- {
- u32 rx_sw_if_index;
- index_t pri;
+ vl_api_ip_punt_redirect_dump_common (&ctx, fproto, ntohl (mp->sw_if_index),
+ send_ip_punt_redirect_details);
+}
- rx_sw_if_index = ntohl (mp->sw_if_index);
- pri = ip_punt_redirect_find (fproto, rx_sw_if_index);
+static void
+vl_api_ip_punt_redirect_v2_dump_t_handler (
+ vl_api_ip_punt_redirect_v2_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ ip_address_family_t af;
+ fib_protocol_t fproto;
+ int rv = 0;
- if (INDEX_INVALID == pri)
- return;
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
- send_ip_punt_redirect_details (rx_sw_if_index,
- ip_punt_redirect_get (pri), &ctx);
- }
- else
- ip_punt_redirect_walk (fproto, send_ip_punt_redirect_details, &ctx);
+ rv = ip_address_family_decode (mp->af, &af);
+ if (rv != 0)
+ return;
+
+ fproto = (af == AF_IP6) ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4;
+
+ ip_walk_ctx_t ctx = {
+ .reg = reg,
+ .context = mp->context,
+ };
+
+ vl_api_ip_punt_redirect_dump_common (&ctx, fproto, ntohl (mp->sw_if_index),
+ send_ip_punt_redirect_v2_details);
}
void
@@ -1925,17 +2113,21 @@ ip_api_hookup (vlib_main_t * vm)
api_main_t *am = vlibapi_get_main ();
/*
- * Mark the route add/del API as MP safe
+ * Set up the (msg_name, crc, message-id) table
*/
- am->is_mp_safe[VL_API_IP_ROUTE_ADD_DEL] = 1;
- am->is_mp_safe[VL_API_IP_ROUTE_ADD_DEL_REPLY] = 1;
- am->is_mp_safe[VL_API_IP_ROUTE_ADD_DEL_V2] = 1;
- am->is_mp_safe[VL_API_IP_ROUTE_ADD_DEL_V2_REPLY] = 1;
+ REPLY_MSG_ID_BASE = setup_message_id_table ();
/*
- * Set up the (msg_name, crc, message-id) table
+ * Mark the route add/del API as MP safe
*/
- REPLY_MSG_ID_BASE = setup_message_id_table ();
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL,
+ 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_REPLY, 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_V2, 1);
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_IP_ROUTE_ADD_DEL_V2_REPLY, 1);
return 0;
}
diff --git a/src/vnet/ip/ip_checksum.c b/src/vnet/ip/ip_checksum.c
index 1ac7248ea05..4fbf1fb74fa 100644
--- a/src/vnet/ip/ip_checksum.c
+++ b/src/vnet/ip/ip_checksum.c
@@ -165,14 +165,12 @@ test_ip_checksum_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_checksum, static) =
{
.path = "test ip checksum",
.short_help = "test ip checksum",
.function = test_ip_checksum_fn,
};
-/* *INDENT-ON* */
#endif /* CLIB_DEBUG */
diff --git a/src/vnet/ip/ip_container_proxy.c b/src/vnet/ip/ip_container_proxy.c
index 2c94b70ec85..1618704e804 100644
--- a/src/vnet/ip/ip_container_proxy.c
+++ b/src/vnet/ip/ip_container_proxy.c
@@ -132,28 +132,22 @@ ip_container_proxy_fib_table_walk (fib_node_index_t fei, void *arg)
void
ip_container_proxy_walk (ip_container_proxy_cb_t cb, void *ctx)
{
- fib_table_t *fib_table;
ip_container_proxy_walk_ctx_t wctx = {
.cb = cb,
.ctx = ctx,
};
+ u32 fib_index;
- /* *INDENT-OFF* */
- pool_foreach (fib_table, ip4_main.fibs)
- {
- fib_table_walk(fib_table->ft_index,
- FIB_PROTOCOL_IP4,
- ip_container_proxy_fib_table_walk,
- &wctx);
- }
- pool_foreach (fib_table, ip6_main.fibs)
- {
- fib_table_walk(fib_table->ft_index,
- FIB_PROTOCOL_IP6,
- ip_container_proxy_fib_table_walk,
- &wctx);
- }
- /* *INDENT-ON* */
+ pool_foreach_index (fib_index, ip4_main.fibs)
+ {
+ fib_table_walk (fib_index, FIB_PROTOCOL_IP4,
+ ip_container_proxy_fib_table_walk, &wctx);
+ }
+ pool_foreach_index (fib_index, ip6_main.fibs)
+ {
+ fib_table_walk (fib_index, FIB_PROTOCOL_IP6,
+ ip_container_proxy_fib_table_walk, &wctx);
+ }
}
clib_error_t *
@@ -220,14 +214,12 @@ ip_container_cmd (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip_container_command_node, static) = {
.path = "ip container",
.function = ip_container_cmd,
.short_help = "ip container <address> <interface>",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
clib_error_t *
show_ip_container_cmd_fn (vlib_main_t * vm, unformat_input_t * main_input,
@@ -279,14 +271,12 @@ show_ip_container_cmd_fn (vlib_main_t * vm, unformat_input_t * main_input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip_container_command, static) = {
.path = "show ip container",
.function = show_ip_container_cmd_fn,
.short_help = "show ip container <address> <interface>",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip_flow_hash.h b/src/vnet/ip/ip_flow_hash.h
index bd37ef7307b..30dfcd70a1b 100644
--- a/src/vnet/ip/ip_flow_hash.h
+++ b/src/vnet/ip/ip_flow_hash.h
@@ -38,7 +38,17 @@
_ (proto, 4, IP_FLOW_HASH_PROTO) \
_ (reverse, 5, IP_FLOW_HASH_REVERSE_SRC_DST) \
_ (symmetric, 6, IP_FLOW_HASH_SYMMETRIC) \
- _ (flowlabel, 7, IP_FLOW_HASH_FL)
+ _ (flowlabel, 7, IP_FLOW_HASH_FL) \
+ _ (gtpv1teid, 8, IP_FLOW_HASH_GTPV1_TEID)
+
+typedef struct
+{
+ u8 ver_flags;
+ u8 type;
+ u16 length;
+ u32 teid;
+} __attribute__ ((packed)) gtpv1u_header_t;
+#define GTPV1_PORT_BE 0x6808
/**
* A flow hash configuration is a mask of the flow hash options
diff --git a/src/vnet/ip/ip_frag.c b/src/vnet/ip/ip_frag.c
index cafa9a66d6b..934e40a5d18 100644
--- a/src/vnet/ip/ip_frag.c
+++ b/src/vnet/ip/ip_frag.c
@@ -25,10 +25,10 @@
typedef struct
{
- u8 ipv6;
u16 mtu;
u8 next;
u16 n_fragments;
+ u16 pkt_size;
} ip_frag_trace_t;
static u8 *
@@ -37,8 +37,8 @@ format_ip_frag_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip_frag_trace_t *t = va_arg (*args, ip_frag_trace_t *);
- s = format (s, "IPv%s mtu: %u fragments: %u next: %d",
- t->ipv6 ? "6" : "4", t->mtu, t->n_fragments, t->next);
+ s = format (s, "mtu: %u pkt-size: %u fragments: %u next: %d", t->mtu,
+ t->pkt_size, t->n_fragments, t->next);
return s;
}
@@ -95,7 +95,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
{
vlib_buffer_t *from_b;
ip4_header_t *ip4;
- u16 len, max, rem, ip_frag_id, ip_frag_offset;
+ u16 len, max, rem, ip_frag_id, ip_frag_offset, head_bytes;
u8 *org_from_packet, more;
from_b = vlib_get_buffer (vm, from_bi);
@@ -103,9 +103,9 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
ip4 = vlib_buffer_get_current (from_b) + l2unfragmentablesize;
rem = clib_net_to_host_u16 (ip4->length) - sizeof (ip4_header_t);
- max =
- (clib_min (mtu, vlib_buffer_get_default_data_size (vm)) -
- sizeof (ip4_header_t)) & ~0x7;
+ head_bytes = sizeof (ip4_header_t) + l2unfragmentablesize;
+ max = (clib_min (mtu, vlib_buffer_get_default_data_size (vm)) - head_bytes) &
+ ~0x7;
if (rem >
(vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip4_header_t)))
@@ -142,8 +142,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
u8 *from_data = (void *) (ip4 + 1);
vlib_buffer_t *org_from_b = from_b;
u16 fo = 0;
- u16 left_in_from_buffer =
- from_b->current_length - (l2unfragmentablesize + sizeof (ip4_header_t));
+ u16 left_in_from_buffer = from_b->current_length - head_bytes;
u16 ptr = 0;
/* Do the actual fragmentation */
@@ -166,8 +165,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
/* Copy ip4 header */
to_data = vlib_buffer_get_current (to_b);
- clib_memcpy_fast (to_data, org_from_packet,
- l2unfragmentablesize + sizeof (ip4_header_t));
+ clib_memcpy_fast (to_data, org_from_packet, head_bytes);
to_ip4 = (ip4_header_t *) (to_data + l2unfragmentablesize);
to_data = (void *) (to_ip4 + 1);
vnet_buffer (to_b)->l3_hdr_offset = to_b->current_data;
@@ -213,8 +211,7 @@ ip4_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
}
to_b->flags |= VNET_BUFFER_F_IS_IP4;
- to_b->current_length =
- len + sizeof (ip4_header_t) + l2unfragmentablesize;
+ to_b->current_length = len + head_bytes;
to_ip4->fragment_id = ip_frag_id;
to_ip4->flags_and_fragment_offset =
@@ -286,7 +283,7 @@ frag_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
ip_frag_trace_t *tr =
vlib_add_trace (vm, node, p0, sizeof (*tr));
tr->mtu = mtu;
- tr->ipv6 = is_ip6 ? 1 : 0;
+ tr->pkt_size = vlib_buffer_length_in_chain (vm, p0);
tr->n_fragments = vec_len (buffer);
tr->next = vnet_buffer (p0)->ip_frag.next_index;
}
@@ -385,13 +382,17 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
ip6_header_t *ip6;
u16 len, max, rem, ip_frag_id;
u8 *org_from_packet;
+ u16 head_bytes;
from_b = vlib_get_buffer (vm, from_bi);
org_from_packet = vlib_buffer_get_current (from_b);
ip6 = vlib_buffer_get_current (from_b) + l2unfragmentablesize;
+ head_bytes =
+ (sizeof (ip6_header_t) + sizeof (ip6_frag_hdr_t) + l2unfragmentablesize);
rem = clib_net_to_host_u16 (ip6->payload_length);
- max = (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) & ~0x7; // TODO: Is max correct??
+ max = (clib_min (mtu, vlib_buffer_get_default_data_size (vm)) - head_bytes) &
+ ~0x7;
if (rem >
(vlib_buffer_length_in_chain (vm, from_b) - sizeof (ip6_header_t)))
@@ -423,9 +424,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
ip6_frag_hdr_t *to_frag_hdr;
u8 *to_data;
- len =
- (rem >
- (mtu - sizeof (ip6_header_t) - sizeof (ip6_frag_hdr_t)) ? max : rem);
+ len = (rem > max ? max : rem);
if (len != rem) /* Last fragment does not need to divisible by 8 */
len &= ~0x7;
if ((to_b = frag_buffer_alloc (org_from_b, &to_bi)) == 0)
@@ -438,7 +437,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
/* Copy ip6 header */
clib_memcpy_fast (to_b->data, org_from_packet,
l2unfragmentablesize + sizeof (ip6_header_t));
- to_ip6 = vlib_buffer_get_current (to_b);
+ to_ip6 = vlib_buffer_get_current (to_b) + l2unfragmentablesize;
to_frag_hdr = (ip6_frag_hdr_t *) (to_ip6 + 1);
to_data = (void *) (to_frag_hdr + 1);
@@ -484,8 +483,7 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
to_ptr += bytes_to_copy;
}
- to_b->current_length =
- len + sizeof (ip6_header_t) + sizeof (ip6_frag_hdr_t);
+ to_b->current_length = len + head_bytes;
to_ip6->payload_length =
clib_host_to_net_u16 (len + sizeof (ip6_frag_hdr_t));
to_ip6->protocol = IP_PROTOCOL_IPV6_FRAGMENTATION;
@@ -502,13 +500,6 @@ ip6_frag_do_fragment (vlib_main_t * vm, u32 from_bi, u16 mtu,
return IP_FRAG_ERROR_NONE;
}
-static char *ip4_frag_error_strings[] = {
-#define _(sym,string) string,
- foreach_ip_frag_error
-#undef _
-};
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_frag_node) = {
.function = ip4_frag,
.name = IP4_FRAG_NODE_NAME,
@@ -517,21 +508,17 @@ VLIB_REGISTER_NODE (ip4_frag_node) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = IP_FRAG_N_ERROR,
- .error_strings = ip4_frag_error_strings,
+ .error_counters = ip_frag_error_counters,
.n_next_nodes = IP_FRAG_N_NEXT,
- .next_nodes = {
- [IP_FRAG_NEXT_IP_REWRITE] = "ip4-rewrite",
- [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip4-midchain",
- [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
- [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
- [IP_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [IP_FRAG_NEXT_DROP] = "ip4-drop"
- },
+ .next_nodes = { [IP_FRAG_NEXT_IP_REWRITE] = "ip4-rewrite",
+ [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip4-midchain",
+ [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
+ [IP_FRAG_NEXT_DROP] = "ip4-drop" },
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_frag_node) = {
.function = ip6_frag,
.name = IP6_FRAG_NODE_NAME,
@@ -540,19 +527,16 @@ VLIB_REGISTER_NODE (ip6_frag_node) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.n_errors = IP_FRAG_N_ERROR,
- .error_strings = ip4_frag_error_strings,
+ .error_counters = ip_frag_error_counters,
.n_next_nodes = IP_FRAG_N_NEXT,
- .next_nodes = {
- [IP_FRAG_NEXT_IP_REWRITE] = "ip6-rewrite",
- [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip6-midchain",
- [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
- [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
- [IP_FRAG_NEXT_ICMP_ERROR] = "error-drop",
- [IP_FRAG_NEXT_DROP] = "ip6-drop"
- },
+ .next_nodes = { [IP_FRAG_NEXT_IP_REWRITE] = "ip6-rewrite",
+ [IP_FRAG_NEXT_IP_REWRITE_MIDCHAIN] = "ip6-midchain",
+ [IP_FRAG_NEXT_IP4_LOOKUP] = "ip4-lookup",
+ [IP_FRAG_NEXT_IP6_LOOKUP] = "ip6-lookup",
+ [IP_FRAG_NEXT_ICMP_ERROR] = "error-drop",
+ [IP_FRAG_NEXT_DROP] = "ip6-drop" },
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip_frag.h b/src/vnet/ip/ip_frag.h
index 86462e6c7d2..4ddd62b89e6 100644
--- a/src/vnet/ip/ip_frag.h
+++ b/src/vnet/ip/ip_frag.h
@@ -36,6 +36,7 @@
#define IP_FRAG_H
#include <vnet/vnet.h>
+#include <vnet/ip/ip.api_enum.h>
#define IP_FRAG_FLAG_IP4_HEADER 0x01 //Encapsulating IPv4 header
#define IP_FRAG_FLAG_IP6_HEADER 0x02 //Encapsulating IPv6 header
@@ -57,24 +58,7 @@ typedef enum
IP_FRAG_N_NEXT
} ip_frag_next_t;
-#define foreach_ip_frag_error \
- /* Must be first. */ \
- _(NONE, "packet fragmented") \
- _(SMALL_PACKET, "packet smaller than MTU") \
- _(FRAGMENT_SENT, "number of sent fragments") \
- _(CANT_FRAGMENT_HEADER, "can't fragment header") \
- _(DONT_FRAGMENT_SET, "can't fragment this packet") \
- _(MALFORMED, "malformed packet") \
- _(MEMORY, "could not allocate buffer") \
- _(UNKNOWN, "unknown error")
-
-typedef enum
-{
-#define _(sym,str) IP_FRAG_ERROR_##sym,
- foreach_ip_frag_error
-#undef _
- IP_FRAG_N_ERROR,
-} ip_frag_error_t;
+typedef vl_counter_ip_frag_enum_t ip_frag_error_t;
void ip_frag_set_vnet_buffer (vlib_buffer_t * b, u16 mtu,
u8 next_index, u8 flags);
diff --git a/src/vnet/ip/ip_in_out_acl.c b/src/vnet/ip/ip_in_out_acl.c
index a5e652e1ee8..eb3c94a188a 100644
--- a/src/vnet/ip/ip_in_out_acl.c
+++ b/src/vnet/ip/ip_in_out_acl.c
@@ -32,11 +32,26 @@ format_ip_in_out_acl_trace (u8 * s, u32 is_output, va_list * args)
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ip_in_out_acl_trace_t *t = va_arg (*args, ip_in_out_acl_trace_t *);
-
- s = format (s, "%s: sw_if_index %d, next_index %d, table %d, offset %d",
- is_output ? "OUTACL" : "INACL",
- t->sw_if_index, t->next_index, t->table_index, t->offset);
- return s;
+ const vnet_classify_main_t *vcm = &vnet_classify_main;
+ const u32 indent = format_get_indent (s);
+ vnet_classify_table_t *table;
+ vnet_classify_entry_t *e;
+
+ s =
+ format (s, "%s: sw_if_index %d, next_index %d, table_index %d, offset %d",
+ is_output ? "OUTACL" : "INACL", t->sw_if_index, t->next_index,
+ t->table_index, t->offset);
+
+ if (pool_is_free_index (vcm->tables, t->table_index))
+ return format (s, "\n%Uno table", format_white_space, indent + 4);
+
+ if (~0 == t->offset)
+ return format (s, "\n%Uno match", format_white_space, indent + 4);
+
+ table = vnet_classify_table_get (t->table_index);
+ e = vnet_classify_get_entry (table, t->offset);
+ return format (s, "\n%U%U", format_white_space, indent + 4,
+ format_classify_entry, table, e);
}
static u8 *
@@ -97,57 +112,40 @@ static char *ip_outacl_error_strings[] = {
};
static_always_inline void
-ip_in_out_acl_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_buffer_t ** b,
- u16 * next, u32 n_left, int is_ip4, int is_output,
- int do_trace)
+ip_in_out_acl_inline_trace (
+ vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame,
+ vlib_buffer_t **b, u16 *next, u32 n_left, u32 *hits__, u32 *misses__,
+ u32 *chain_hits__, const vlib_error_t error_none,
+ const vlib_error_t error_deny, const vlib_error_t error_miss,
+ vnet_classify_table_t *tables, const u32 *table_index_by_sw_if_index,
+ u32 *fib_index_by_sw_if_index, vnet_config_main_t *cm,
+ const vlib_rx_or_tx_t way, const int is_output, const int do_trace)
{
- in_out_acl_main_t *am = &in_out_acl_main;
- vnet_classify_main_t *vcm = am->vnet_classify_main;
f64 now = vlib_time_now (vm);
u32 hits = 0;
u32 misses = 0;
u32 chain_hits = 0;
- in_out_acl_table_id_t tid;
- vlib_node_runtime_t *error_node;
- u32 n_next_nodes;
-
+ u32 n_next_nodes = node->n_next_nodes;
u8 *h[4];
u32 sw_if_index[4];
u32 table_index[4];
vnet_classify_table_t *t[4] = { 0, 0 };
- u64 hash[4];
-
- n_next_nodes = node->n_next_nodes;
-
- if (is_ip4)
- {
- tid = IN_OUT_ACL_TABLE_IP4;
- error_node = vlib_node_get_runtime (vm, ip4_input_node.index);
- }
- else
- {
- tid = IN_OUT_ACL_TABLE_IP6;
- error_node = vlib_node_get_runtime (vm, ip6_input_node.index);
- }
+ u32 hash[4];
/* calculate hashes for b[0] & b[1] */
if (n_left >= 2)
{
- sw_if_index[2] =
- vnet_buffer (b[0])->sw_if_index[is_output ? VLIB_TX : VLIB_RX];
- sw_if_index[3] =
- vnet_buffer (b[1])->sw_if_index[is_output ? VLIB_TX : VLIB_RX];
+ /* ~0 is used as a wildcard to say 'always use sw_if_index 0'
+ * aka local0. It is used when we do not care about the sw_if_index, as
+ * when punting */
+ sw_if_index[2] = ~0 == way ? 0 : vnet_buffer (b[0])->sw_if_index[way];
+ sw_if_index[3] = ~0 == way ? 0 : vnet_buffer (b[1])->sw_if_index[way];
- table_index[2] =
- am->classify_table_index_by_sw_if_index[is_output][tid]
- [sw_if_index[2]];
- table_index[3] =
- am->classify_table_index_by_sw_if_index[is_output][tid]
- [sw_if_index[3]];
+ table_index[2] = table_index_by_sw_if_index[sw_if_index[2]];
+ table_index[3] = table_index_by_sw_if_index[sw_if_index[3]];
- t[2] = pool_elt_at_index (vcm->tables, table_index[2]);
- t[3] = pool_elt_at_index (vcm->tables, table_index[3]);
+ t[2] = pool_elt_at_index (tables, table_index[2]);
+ t[3] = pool_elt_at_index (tables, table_index[3]);
if (t[2]->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
h[2] =
@@ -164,16 +162,16 @@ ip_in_out_acl_inline (vlib_main_t * vm,
if (is_output)
{
/* Save the rewrite length, since we are using the l2_classify struct */
- vnet_buffer (b[0])->l2_classify.pad.l2_len =
+ vnet_buffer (b[0])->l2.l2_len =
vnet_buffer (b[0])->ip.save_rewrite_length;
/* advance the match pointer so the matching happens on IP header */
- h[2] += vnet_buffer (b[0])->l2_classify.pad.l2_len;
+ h[2] += vnet_buffer (b[0])->l2.l2_len;
/* Save the rewrite length, since we are using the l2_classify struct */
- vnet_buffer (b[1])->l2_classify.pad.l2_len =
+ vnet_buffer (b[1])->l2.l2_len =
vnet_buffer (b[1])->ip.save_rewrite_length;
/* advance the match pointer so the matching happens on IP header */
- h[3] += vnet_buffer (b[1])->l2_classify.pad.l2_len;
+ h[3] += vnet_buffer (b[1])->l2.l2_len;
}
hash[2] = vnet_classify_hash_packet_inline (t[2], (u8 *) h[2]);
@@ -198,7 +196,6 @@ ip_in_out_acl_inline (vlib_main_t * vm,
{
vnet_classify_entry_t *e[2] = { 0, 0 };
u32 _next[2] = { ACL_NEXT_INDEX_DENY, ACL_NEXT_INDEX_DENY };
- u8 error[2];
h[0] = h[2];
h[1] = h[3];
@@ -228,19 +225,15 @@ ip_in_out_acl_inline (vlib_main_t * vm,
if (n_left >= 4)
{
sw_if_index[2] =
- vnet_buffer (b[2])->sw_if_index[is_output ? VLIB_TX : VLIB_RX];
+ ~0 == way ? 0 : vnet_buffer (b[2])->sw_if_index[way];
sw_if_index[3] =
- vnet_buffer (b[3])->sw_if_index[is_output ? VLIB_TX : VLIB_RX];
+ ~0 == way ? 0 : vnet_buffer (b[3])->sw_if_index[way];
- table_index[2] =
- am->classify_table_index_by_sw_if_index[is_output][tid]
- [sw_if_index[2]];
- table_index[3] =
- am->classify_table_index_by_sw_if_index[is_output][tid]
- [sw_if_index[3]];
+ table_index[2] = table_index_by_sw_if_index[sw_if_index[2]];
+ table_index[3] = table_index_by_sw_if_index[sw_if_index[3]];
- t[2] = pool_elt_at_index (vcm->tables, table_index[2]);
- t[3] = pool_elt_at_index (vcm->tables, table_index[3]);
+ t[2] = pool_elt_at_index (tables, table_index[2]);
+ t[3] = pool_elt_at_index (tables, table_index[3]);
if (t[2]->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
h[2] =
@@ -259,16 +252,16 @@ ip_in_out_acl_inline (vlib_main_t * vm,
if (is_output)
{
/* Save the rewrite length, since we are using the l2_classify struct */
- vnet_buffer (b[2])->l2_classify.pad.l2_len =
+ vnet_buffer (b[2])->l2.l2_len =
vnet_buffer (b[2])->ip.save_rewrite_length;
/* advance the match pointer so the matching happens on IP header */
- h[2] += vnet_buffer (b[2])->l2_classify.pad.l2_len;
+ h[2] += vnet_buffer (b[2])->l2.l2_len;
/* Save the rewrite length, since we are using the l2_classify struct */
- vnet_buffer (b[3])->l2_classify.pad.l2_len =
+ vnet_buffer (b[3])->l2.l2_len =
vnet_buffer (b[3])->ip.save_rewrite_length;
/* advance the match pointer so the matching happens on IP header */
- h[3] += vnet_buffer (b[3])->l2_classify.pad.l2_len;
+ h[3] += vnet_buffer (b[3])->l2.l2_len;
}
hash[2] = vnet_classify_hash_packet_inline (t[2], (u8 *) h[2]);
@@ -292,11 +285,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
}
/* find entry for b[0] & b[1] */
- vnet_get_config_data (am->vnet_config_main[is_output][tid],
- &b[0]->current_config_index, &_next[0],
+ vnet_get_config_data (cm, &b[0]->current_config_index, &_next[0],
/* # bytes of config data */ 0);
- vnet_get_config_data (am->vnet_config_main[is_output][tid],
- &b[1]->current_config_index, &_next[1],
+ vnet_get_config_data (cm, &b[1]->current_config_index, &_next[1],
/* # bytes of config data */ 0);
if (PREDICT_TRUE (table_index[0] != ~0))
@@ -314,15 +305,8 @@ ip_in_out_acl_inline (vlib_main_t * vm,
hits++;
- if (is_ip4)
- error[0] = (_next[0] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP4_ERROR_OUTACL_SESSION_DENY :
- IP4_ERROR_INACL_SESSION_DENY) : IP4_ERROR_NONE;
- else
- error[0] = (_next[0] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP6_ERROR_OUTACL_SESSION_DENY :
- IP6_ERROR_INACL_SESSION_DENY) : IP6_ERROR_NONE;
- b[0]->error = error_node->errors[error[0]];
+ b[0]->error =
+ (_next[0] == ACL_NEXT_INDEX_DENY) ? error_deny : error_none;
if (!is_output)
{
@@ -330,17 +314,22 @@ ip_in_out_acl_inline (vlib_main_t * vm,
e[0]->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX)
vnet_buffer (b[0])->sw_if_index[VLIB_TX] = e[0]->metadata;
else if (e[0]->action == CLASSIFY_ACTION_SET_METADATA)
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] =
- e[0]->metadata;
+ {
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX] =
+ e[0]->metadata;
+ /* For source check in case we skip the lookup node */
+ ip_lookup_set_buffer_fib_index (fib_index_by_sw_if_index,
+ b[0]);
+ }
}
}
else
{
while (1)
{
- if (PREDICT_TRUE (t[0]->next_table_index != ~0))
- t[0] = pool_elt_at_index (vcm->tables,
- t[0]->next_table_index);
+ table_index[0] = t[0]->next_table_index;
+ if (PREDICT_TRUE (table_index[0] != ~0))
+ t[0] = pool_elt_at_index (tables, table_index[0]);
else
{
_next[0] = (t[0]->miss_next_index < n_next_nodes) ?
@@ -348,15 +337,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
misses++;
- if (is_ip4)
- error[0] = (_next[0] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP4_ERROR_OUTACL_TABLE_MISS :
- IP4_ERROR_INACL_TABLE_MISS) : IP4_ERROR_NONE;
- else
- error[0] = (_next[0] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP6_ERROR_OUTACL_TABLE_MISS :
- IP6_ERROR_INACL_TABLE_MISS) : IP6_ERROR_NONE;
- b[0]->error = error_node->errors[error[0]];
+ b[0]->error = (_next[0] == ACL_NEXT_INDEX_DENY) ?
+ error_miss :
+ error_none;
break;
}
@@ -369,7 +352,7 @@ ip_in_out_acl_inline (vlib_main_t * vm,
/* advance the match pointer so the matching happens on IP header */
if (is_output)
- h[0] += vnet_buffer (b[0])->l2_classify.pad.l2_len;
+ h[0] += vnet_buffer (b[0])->l2.l2_len;
hash[0] =
vnet_classify_hash_packet_inline (t[0], (u8 *) h[0]);
@@ -386,15 +369,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
hits++;
chain_hits++;
- if (is_ip4)
- error[0] = (_next[0] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP4_ERROR_OUTACL_SESSION_DENY :
- IP4_ERROR_INACL_SESSION_DENY) : IP4_ERROR_NONE;
- else
- error[0] = (_next[0] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP6_ERROR_OUTACL_SESSION_DENY :
- IP6_ERROR_INACL_SESSION_DENY) : IP6_ERROR_NONE;
- b[0]->error = error_node->errors[error[0]];
+ b[0]->error = (_next[0] == ACL_NEXT_INDEX_DENY) ?
+ error_deny :
+ error_none;
if (!is_output)
{
@@ -406,8 +383,14 @@ ip_in_out_acl_inline (vlib_main_t * vm,
e[0]->metadata;
else if (e[0]->action ==
CLASSIFY_ACTION_SET_METADATA)
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] =
- e[0]->metadata;
+ {
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX] =
+ e[0]->metadata;
+ /* For source check in case we skip the lookup
+ * node */
+ ip_lookup_set_buffer_fib_index (
+ fib_index_by_sw_if_index, b[0]);
+ }
}
break;
}
@@ -430,15 +413,8 @@ ip_in_out_acl_inline (vlib_main_t * vm,
hits++;
- if (is_ip4)
- error[1] = (_next[1] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP4_ERROR_OUTACL_SESSION_DENY :
- IP4_ERROR_INACL_SESSION_DENY) : IP4_ERROR_NONE;
- else
- error[1] = (_next[1] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP6_ERROR_OUTACL_SESSION_DENY :
- IP6_ERROR_INACL_SESSION_DENY) : IP6_ERROR_NONE;
- b[1]->error = error_node->errors[error[1]];
+ b[1]->error =
+ (_next[1] == ACL_NEXT_INDEX_DENY) ? error_deny : error_none;
if (!is_output)
{
@@ -446,17 +422,22 @@ ip_in_out_acl_inline (vlib_main_t * vm,
e[1]->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX)
vnet_buffer (b[1])->sw_if_index[VLIB_TX] = e[1]->metadata;
else if (e[1]->action == CLASSIFY_ACTION_SET_METADATA)
- vnet_buffer (b[1])->ip.adj_index[VLIB_TX] =
- e[1]->metadata;
+ {
+ vnet_buffer (b[1])->ip.adj_index[VLIB_TX] =
+ e[1]->metadata;
+ /* For source check in case we skip the lookup node */
+ ip_lookup_set_buffer_fib_index (fib_index_by_sw_if_index,
+ b[1]);
+ }
}
}
else
{
while (1)
{
- if (PREDICT_TRUE (t[1]->next_table_index != ~0))
- t[1] = pool_elt_at_index (vcm->tables,
- t[1]->next_table_index);
+ table_index[1] = t[1]->next_table_index;
+ if (PREDICT_TRUE (table_index[1] != ~0))
+ t[1] = pool_elt_at_index (tables, table_index[1]);
else
{
_next[1] = (t[1]->miss_next_index < n_next_nodes) ?
@@ -464,15 +445,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
misses++;
- if (is_ip4)
- error[1] = (_next[1] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP4_ERROR_OUTACL_TABLE_MISS :
- IP4_ERROR_INACL_TABLE_MISS) : IP4_ERROR_NONE;
- else
- error[1] = (_next[1] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP6_ERROR_OUTACL_TABLE_MISS :
- IP6_ERROR_INACL_TABLE_MISS) : IP6_ERROR_NONE;
- b[1]->error = error_node->errors[error[1]];
+ b[1]->error = (_next[1] == ACL_NEXT_INDEX_DENY) ?
+ error_miss :
+ error_none;
break;
}
@@ -485,7 +460,7 @@ ip_in_out_acl_inline (vlib_main_t * vm,
/* advance the match pointer so the matching happens on IP header */
if (is_output)
- h[1] += vnet_buffer (b[1])->l2_classify.pad.l2_len;
+ h[1] += vnet_buffer (b[1])->l2.l2_len;
hash[1] =
vnet_classify_hash_packet_inline (t[1], (u8 *) h[1]);
@@ -502,15 +477,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
hits++;
chain_hits++;
- if (is_ip4)
- error[1] = (_next[1] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP4_ERROR_OUTACL_SESSION_DENY :
- IP4_ERROR_INACL_SESSION_DENY) : IP4_ERROR_NONE;
- else
- error[1] = (_next[1] == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP6_ERROR_OUTACL_SESSION_DENY :
- IP6_ERROR_INACL_SESSION_DENY) : IP6_ERROR_NONE;
- b[1]->error = error_node->errors[error[1]];
+ b[1]->error = (_next[1] == ACL_NEXT_INDEX_DENY) ?
+ error_deny :
+ error_none;
if (!is_output)
{
@@ -522,8 +491,14 @@ ip_in_out_acl_inline (vlib_main_t * vm,
e[1]->metadata;
else if (e[1]->action ==
CLASSIFY_ACTION_SET_METADATA)
- vnet_buffer (b[1])->ip.adj_index[VLIB_TX] =
- e[1]->metadata;
+ {
+ vnet_buffer (b[1])->ip.adj_index[VLIB_TX] =
+ e[1]->metadata;
+ /* For source check in case we skip the lookup
+ * node */
+ ip_lookup_set_buffer_fib_index (
+ fib_index_by_sw_if_index, b[1]);
+ }
}
break;
}
@@ -536,9 +511,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
ip_in_out_acl_trace_t *_t =
vlib_add_trace (vm, node, b[0], sizeof (*_t));
_t->sw_if_index =
- vnet_buffer (b[0])->sw_if_index[is_output ? VLIB_TX : VLIB_RX];
+ ~0 == way ? 0 : vnet_buffer (b[0])->sw_if_index[way];
_t->next_index = _next[0];
- _t->table_index = t[0] ? t[0] - vcm->tables : ~0;
+ _t->table_index = table_index[0];
_t->offset = (e[0]
&& t[0]) ? vnet_classify_get_offset (t[0], e[0]) : ~0;
}
@@ -548,9 +523,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
ip_in_out_acl_trace_t *_t =
vlib_add_trace (vm, node, b[1], sizeof (*_t));
_t->sw_if_index =
- vnet_buffer (b[1])->sw_if_index[is_output ? VLIB_TX : VLIB_RX];
+ ~0 == way ? 0 : vnet_buffer (b[1])->sw_if_index[way];
_t->next_index = _next[1];
- _t->table_index = t[1] ? t[1] - vcm->tables : ~0;
+ _t->table_index = table_index[1];
_t->offset = (e[1]
&& t[1]) ? vnet_classify_get_offset (t[1], e[1]) : ~0;
}
@@ -584,15 +559,12 @@ ip_in_out_acl_inline (vlib_main_t * vm,
vnet_classify_table_t *t0 = 0;
vnet_classify_entry_t *e0 = 0;
u32 next0 = ACL_NEXT_INDEX_DENY;
- u64 hash0;
- u8 error0;
+ u32 hash0;
- sw_if_index0 =
- vnet_buffer (b[0])->sw_if_index[is_output ? VLIB_TX : VLIB_RX];
- table_index0 =
- am->classify_table_index_by_sw_if_index[is_output][tid][sw_if_index0];
+ sw_if_index0 = ~0 == way ? 0 : vnet_buffer (b[0])->sw_if_index[way];
+ table_index0 = table_index_by_sw_if_index[sw_if_index0];
- t0 = pool_elt_at_index (vcm->tables, table_index0);
+ t0 = pool_elt_at_index (tables, table_index0);
if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
h0 =
@@ -603,10 +575,10 @@ ip_in_out_acl_inline (vlib_main_t * vm,
if (is_output)
{
/* Save the rewrite length, since we are using the l2_classify struct */
- vnet_buffer (b[0])->l2_classify.pad.l2_len =
+ vnet_buffer (b[0])->l2.l2_len =
vnet_buffer (b[0])->ip.save_rewrite_length;
/* advance the match pointer so the matching happens on IP header */
- h0 += vnet_buffer (b[0])->l2_classify.pad.l2_len;
+ h0 += vnet_buffer (b[0])->l2.l2_len;
}
vnet_buffer (b[0])->l2_classify.hash =
@@ -615,14 +587,13 @@ ip_in_out_acl_inline (vlib_main_t * vm,
vnet_buffer (b[0])->l2_classify.table_index = table_index0;
vnet_buffer (b[0])->l2_classify.opaque_index = ~0;
- vnet_get_config_data (am->vnet_config_main[is_output][tid],
- &b[0]->current_config_index, &next0,
+ vnet_get_config_data (cm, &b[0]->current_config_index, &next0,
/* # bytes of config data */ 0);
if (PREDICT_TRUE (table_index0 != ~0))
{
hash0 = vnet_buffer (b[0])->l2_classify.hash;
- t0 = pool_elt_at_index (vcm->tables, table_index0);
+ t0 = pool_elt_at_index (tables, table_index0);
if (t0->current_data_flag == CLASSIFY_FLAG_USE_CURR_DATA)
h0 =
@@ -633,7 +604,7 @@ ip_in_out_acl_inline (vlib_main_t * vm,
/* advance the match pointer so the matching happens on IP header */
if (is_output)
- h0 += vnet_buffer (b[0])->l2_classify.pad.l2_len;
+ h0 += vnet_buffer (b[0])->l2.l2_len;
e0 = vnet_classify_find_entry_inline (t0, (u8 *) h0, hash0, now);
if (e0)
@@ -646,15 +617,8 @@ ip_in_out_acl_inline (vlib_main_t * vm,
hits++;
- if (is_ip4)
- error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP4_ERROR_OUTACL_SESSION_DENY :
- IP4_ERROR_INACL_SESSION_DENY) : IP4_ERROR_NONE;
- else
- error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP6_ERROR_OUTACL_SESSION_DENY :
- IP6_ERROR_INACL_SESSION_DENY) : IP6_ERROR_NONE;
- b[0]->error = error_node->errors[error0];
+ b[0]->error =
+ (next0 == ACL_NEXT_INDEX_DENY) ? error_deny : error_none;
if (!is_output)
{
@@ -662,16 +626,21 @@ ip_in_out_acl_inline (vlib_main_t * vm,
e0->action == CLASSIFY_ACTION_SET_IP6_FIB_INDEX)
vnet_buffer (b[0])->sw_if_index[VLIB_TX] = e0->metadata;
else if (e0->action == CLASSIFY_ACTION_SET_METADATA)
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = e0->metadata;
+ {
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = e0->metadata;
+ /* For source check in case we skip the lookup node */
+ ip_lookup_set_buffer_fib_index (fib_index_by_sw_if_index,
+ b[0]);
+ }
}
}
else
{
while (1)
{
- if (PREDICT_TRUE (t0->next_table_index != ~0))
- t0 =
- pool_elt_at_index (vcm->tables, t0->next_table_index);
+ table_index0 = t0->next_table_index;
+ if (PREDICT_TRUE (table_index0 != ~0))
+ t0 = pool_elt_at_index (tables, table_index0);
else
{
next0 = (t0->miss_next_index < n_next_nodes) ?
@@ -679,15 +648,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
misses++;
- if (is_ip4)
- error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP4_ERROR_OUTACL_TABLE_MISS :
- IP4_ERROR_INACL_TABLE_MISS) : IP4_ERROR_NONE;
- else
- error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP6_ERROR_OUTACL_TABLE_MISS :
- IP6_ERROR_INACL_TABLE_MISS) : IP6_ERROR_NONE;
- b[0]->error = error_node->errors[error0];
+ b[0]->error = (next0 == ACL_NEXT_INDEX_DENY) ?
+ error_miss :
+ error_none;
break;
}
@@ -700,7 +663,7 @@ ip_in_out_acl_inline (vlib_main_t * vm,
/* advance the match pointer so the matching happens on IP header */
if (is_output)
- h0 += vnet_buffer (b[0])->l2_classify.pad.l2_len;
+ h0 += vnet_buffer (b[0])->l2.l2_len;
hash0 = vnet_classify_hash_packet_inline (t0, (u8 *) h0);
e0 = vnet_classify_find_entry_inline
@@ -714,15 +677,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
e0->next_index : next0;
hits++;
- if (is_ip4)
- error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP4_ERROR_OUTACL_SESSION_DENY :
- IP4_ERROR_INACL_SESSION_DENY) : IP4_ERROR_NONE;
- else
- error0 = (next0 == ACL_NEXT_INDEX_DENY) ?
- (is_output ? IP6_ERROR_OUTACL_SESSION_DENY :
- IP6_ERROR_INACL_SESSION_DENY) : IP6_ERROR_NONE;
- b[0]->error = error_node->errors[error0];
+ b[0]->error = (next0 == ACL_NEXT_INDEX_DENY) ?
+ error_deny :
+ error_none;
if (!is_output)
{
@@ -733,8 +690,14 @@ ip_in_out_acl_inline (vlib_main_t * vm,
vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
e0->metadata;
else if (e0->action == CLASSIFY_ACTION_SET_METADATA)
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] =
- e0->metadata;
+ {
+ vnet_buffer (b[0])->ip.adj_index[VLIB_TX] =
+ e0->metadata;
+ /* For source check in case we skip the lookup
+ * node */
+ ip_lookup_set_buffer_fib_index (
+ fib_index_by_sw_if_index, b[0]);
+ }
}
break;
}
@@ -747,9 +710,9 @@ ip_in_out_acl_inline (vlib_main_t * vm,
ip_in_out_acl_trace_t *t =
vlib_add_trace (vm, node, b[0], sizeof (*t));
t->sw_if_index =
- vnet_buffer (b[0])->sw_if_index[is_output ? VLIB_TX : VLIB_RX];
+ ~0 == way ? 0 : vnet_buffer (b[0])->sw_if_index[way];
t->next_index = next0;
- t->table_index = t0 ? t0 - vcm->tables : ~0;
+ t->table_index = table_index0;
t->offset = (e0 && t0) ? vnet_classify_get_offset (t0, e0) : ~0;
}
@@ -767,69 +730,92 @@ ip_in_out_acl_inline (vlib_main_t * vm,
n_left--;
}
- vlib_node_increment_counter (vm, node->node_index,
- is_output ? IP_OUTACL_ERROR_MISS :
- IP_INACL_ERROR_MISS, misses);
- vlib_node_increment_counter (vm, node->node_index,
- is_output ? IP_OUTACL_ERROR_HIT :
- IP_INACL_ERROR_HIT, hits);
- vlib_node_increment_counter (vm, node->node_index,
- is_output ? IP_OUTACL_ERROR_CHAIN_HIT :
- IP_INACL_ERROR_CHAIN_HIT, chain_hits);
+ *hits__ = hits;
+ *misses__ = misses;
+ *chain_hits__ = chain_hits;
}
-VLIB_NODE_FN (ip4_inacl_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+static_always_inline uword
+ip_in_out_acl_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, const in_out_acl_table_id_t tid,
+ u32 *fib_index_by_sw_if_index,
+ const vlib_node_registration_t *parent_error_node,
+ const u32 error_none_index, const u32 error_deny_index,
+ const u32 error_miss_index, const vlib_rx_or_tx_t way,
+ const int is_output)
{
-
- u32 *from;
+ const in_out_acl_main_t *am = &in_out_acl_main;
+ vnet_classify_table_t *tables = am->vnet_classify_main->tables;
+ u32 *from = vlib_frame_vector_args (frame);
+ const u32 *table_index_by_sw_if_index =
+ am->classify_table_index_by_sw_if_index[is_output][tid];
+ vnet_config_main_t *cm = am->vnet_config_main[is_output][tid];
+ const vlib_node_runtime_t *error_node =
+ vlib_node_get_runtime (vm, parent_error_node->index);
+ const vlib_error_t error_none = error_node->errors[error_none_index];
+ const vlib_error_t error_deny = error_node->errors[error_deny_index];
+ const vlib_error_t error_miss = error_node->errors[error_miss_index];
vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
u16 nexts[VLIB_FRAME_SIZE];
-
- from = vlib_frame_vector_args (frame);
+ u32 hits, misses, chain_hits;
vlib_get_buffers (vm, from, bufs, frame->n_vectors);
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
- ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors,
- 1 /* is_ip4 */ ,
- 0 /* is_output */ , 1 /* is_trace */ );
+#define ip_in_out_acl_inline_trace__(do_trace) \
+ ip_in_out_acl_inline_trace ( \
+ vm, node, frame, bufs, nexts, frame->n_vectors, &hits, &misses, \
+ &chain_hits, error_deny, error_miss, error_none, tables, \
+ table_index_by_sw_if_index, fib_index_by_sw_if_index, cm, way, is_output, \
+ do_trace)
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ ip_in_out_acl_inline_trace__ (1 /* do_trace */);
else
- ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors,
- 1 /* is_ip4 */ ,
- 0 /* is_output */ , 0 /* is_trace */ );
+ ip_in_out_acl_inline_trace__ (0 /* do_trace */);
+
+ vlib_node_increment_counter (
+ vm, node->node_index,
+ is_output ? IP_OUTACL_ERROR_MISS : IP_INACL_ERROR_MISS, misses);
+ vlib_node_increment_counter (
+ vm, node->node_index, is_output ? IP_OUTACL_ERROR_HIT : IP_INACL_ERROR_HIT,
+ hits);
+ vlib_node_increment_counter (vm, node->node_index,
+ is_output ? IP_OUTACL_ERROR_CHAIN_HIT :
+ IP_INACL_ERROR_CHAIN_HIT,
+ chain_hits);
vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
-
return frame->n_vectors;
}
-VLIB_NODE_FN (ip4_outacl_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (ip4_inacl_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- u32 *from;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
- u16 nexts[VLIB_FRAME_SIZE];
-
- from = vlib_frame_vector_args (frame);
-
- vlib_get_buffers (vm, from, bufs, frame->n_vectors);
-
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
- ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors,
- 1 /* is_ip4 */ ,
- 1 /* is_output */ , 1 /* is_trace */ );
- else
- ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors,
- 1 /* is_ip4 */ ,
- 1 /* is_output */ , 0 /* is_trace */ );
+ return ip_in_out_acl_inline (
+ vm, node, frame, IN_OUT_ACL_TABLE_IP4, ip4_main.fib_index_by_sw_if_index,
+ &ip4_input_node, IP4_ERROR_NONE, IP4_ERROR_INACL_SESSION_DENY,
+ IP4_ERROR_INACL_TABLE_MISS, VLIB_RX, 0 /* is_output */);
+}
- vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+VLIB_NODE_FN (ip4_punt_acl_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip_in_out_acl_inline (
+ vm, node, frame, IN_OUT_ACL_TABLE_IP4_PUNT,
+ ip4_main.fib_index_by_sw_if_index, &ip4_input_node, IP4_ERROR_NONE,
+ IP4_ERROR_INACL_SESSION_DENY, IP4_ERROR_INACL_TABLE_MISS, ~0 /* way */,
+ 0 /* is_output */);
+}
- return frame->n_vectors;
+VLIB_NODE_FN (ip4_outacl_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip_in_out_acl_inline (
+ vm, node, frame, IN_OUT_ACL_TABLE_IP4, NULL, &ip4_input_node,
+ IP4_ERROR_NONE, IP4_ERROR_INACL_SESSION_DENY, IP4_ERROR_INACL_TABLE_MISS,
+ VLIB_TX, 1 /* is_output */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_inacl_node) = {
.name = "ip4-inacl",
.vector_size = sizeof (u32),
@@ -843,6 +829,19 @@ VLIB_REGISTER_NODE (ip4_inacl_node) = {
},
};
+VLIB_REGISTER_NODE (ip4_punt_acl_node) = {
+ .name = "ip4-punt-acl",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_inacl_trace,
+ .n_errors = ARRAY_LEN(ip_inacl_error_strings),
+ .error_strings = ip_inacl_error_strings,
+
+ .n_next_nodes = ACL_NEXT_INDEX_N_NEXT,
+ .next_nodes = {
+ [ACL_NEXT_INDEX_DENY] = "ip4-drop",
+ },
+};
+
VLIB_REGISTER_NODE (ip4_outacl_node) = {
.name = "ip4-outacl",
.vector_size = sizeof (u32),
@@ -855,59 +854,41 @@ VLIB_REGISTER_NODE (ip4_outacl_node) = {
[ACL_NEXT_INDEX_DENY] = "ip4-drop",
},
};
-/* *INDENT-ON* */
+
+VNET_FEATURE_INIT (ip4_punt_acl_feature) = {
+ .arc_name = "ip4-punt",
+ .node_name = "ip4-punt-acl",
+ .runs_after = VNET_FEATURES ("ip4-punt-policer"),
+};
VLIB_NODE_FN (ip6_inacl_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- u32 *from;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
- u16 nexts[VLIB_FRAME_SIZE];
-
- from = vlib_frame_vector_args (frame);
-
- vlib_get_buffers (vm, from, bufs, frame->n_vectors);
-
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
- ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors,
- 0 /* is_ip4 */ ,
- 0 /* is_output */ , 1 /* is_trace */ );
- else
- ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors,
- 0 /* is_ip4 */ ,
- 0 /* is_output */ , 0 /* is_trace */ );
-
- vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+ return ip_in_out_acl_inline (
+ vm, node, frame, IN_OUT_ACL_TABLE_IP6, ip6_main.fib_index_by_sw_if_index,
+ &ip6_input_node, IP6_ERROR_NONE, IP6_ERROR_INACL_SESSION_DENY,
+ IP6_ERROR_INACL_TABLE_MISS, VLIB_RX, 0 /* is_output */);
+}
- return frame->n_vectors;
+VLIB_NODE_FN (ip6_punt_acl_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip_in_out_acl_inline (
+ vm, node, frame, IN_OUT_ACL_TABLE_IP6_PUNT,
+ ip4_main.fib_index_by_sw_if_index, &ip6_input_node, IP6_ERROR_NONE,
+ IP6_ERROR_INACL_SESSION_DENY, IP6_ERROR_INACL_TABLE_MISS, ~0 /* way */,
+ 0 /* is_output */);
}
VLIB_NODE_FN (ip6_outacl_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- u32 *from;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE];
- u16 nexts[VLIB_FRAME_SIZE];
-
- from = vlib_frame_vector_args (frame);
-
- vlib_get_buffers (vm, from, bufs, frame->n_vectors);
-
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
- ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors,
- 0 /* is_ip4 */ ,
- 1 /* is_output */ , 1 /* is_trace */ );
- else
- ip_in_out_acl_inline (vm, node, bufs, nexts, frame->n_vectors,
- 0 /* is_ip4 */ ,
- 1 /* is_output */ , 0 /* is_trace */ );
-
- vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
-
- return frame->n_vectors;
+ return ip_in_out_acl_inline (
+ vm, node, frame, IN_OUT_ACL_TABLE_IP6, NULL, &ip6_input_node,
+ IP6_ERROR_NONE, IP6_ERROR_INACL_SESSION_DENY, IP6_ERROR_INACL_TABLE_MISS,
+ VLIB_TX, 1 /* is_output */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_inacl_node) = {
.name = "ip6-inacl",
.vector_size = sizeof (u32),
@@ -921,6 +902,19 @@ VLIB_REGISTER_NODE (ip6_inacl_node) = {
},
};
+VLIB_REGISTER_NODE (ip6_punt_acl_node) = {
+ .name = "ip6-punt-acl",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip_inacl_trace,
+ .n_errors = ARRAY_LEN(ip_inacl_error_strings),
+ .error_strings = ip_inacl_error_strings,
+
+ .n_next_nodes = ACL_NEXT_INDEX_N_NEXT,
+ .next_nodes = {
+ [ACL_NEXT_INDEX_DENY] = "ip6-drop",
+ },
+};
+
VLIB_REGISTER_NODE (ip6_outacl_node) = {
.name = "ip6-outacl",
.vector_size = sizeof (u32),
@@ -933,7 +927,12 @@ VLIB_REGISTER_NODE (ip6_outacl_node) = {
[ACL_NEXT_INDEX_DENY] = "ip6-drop",
},
};
-/* *INDENT-ON* */
+
+VNET_FEATURE_INIT (ip6_punt_acl_feature) = {
+ .arc_name = "ip6-punt",
+ .node_name = "ip6-punt-acl",
+ .runs_after = VNET_FEATURES ("ip6-punt-policer"),
+};
#ifndef CLIB_MARCH_VARIANT
static clib_error_t *
diff --git a/src/vnet/ip/ip_init.c b/src/vnet/ip/ip_init.c
index 8894a878881..c2490f196ef 100644
--- a/src/vnet/ip/ip_init.c
+++ b/src/vnet/ip/ip_init.c
@@ -104,7 +104,6 @@ do { \
return error;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip_main_init) = {
.init_order = VLIB_INITS ("vnet_main_init", "ip4_init", "ip6_init",
"icmp4_init", "icmp6_init", "ip6_hop_by_hop_init",
@@ -112,7 +111,6 @@ VLIB_INIT_FUNCTION (ip_main_init) = {
"in_out_acl_init", "policer_classify_init",
"flow_classify_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/ip_interface.c b/src/vnet/ip/ip_interface.c
index 48c20a6cf34..ca1938f651a 100644
--- a/src/vnet/ip/ip_interface.c
+++ b/src/vnet/ip/ip_interface.c
@@ -145,27 +145,23 @@ ip_interface_has_address (u32 sw_if_index, ip46_address_t * ip, u8 is_ip4)
{
ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
ip4_address_t *ip4;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* unnumbered */ ,
({
ip4 = ip_interface_address_get_address (lm4, ia);
if (ip4_address_compare (ip4, &ip->ip4) == 0)
return 1;
}));
- /* *INDENT-ON* */
}
else
{
ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
ip6_address_t *ip6;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* unnumbered */ ,
({
ip6 = ip_interface_address_get_address (lm6, ia);
if (ip6_address_compare (ip6, &ip->ip6) == 0)
return 1;
}));
- /* *INDENT-ON* */
}
return 0;
}
@@ -179,16 +175,13 @@ ip_interface_get_first_ip (u32 sw_if_index, u8 is_ip4)
if (is_ip4)
{
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, sw_if_index, 1 /* unnumbered */ ,
({
return ip_interface_address_get_address (lm4, ia);
}));
- /* *INDENT-ON* */
}
else
{
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm6, ia, sw_if_index, 1 /* unnumbered */ ,
({
ip6_address_t *rv;
@@ -197,21 +190,19 @@ ip_interface_get_first_ip (u32 sw_if_index, u8 is_ip4)
if (!ip6_address_is_link_local_unicast (rv))
return rv;
}));
- /* *INDENT-ON* */
}
return 0;
}
-static walk_rc_t
-ip_interface_address_mark_one_interface (vnet_main_t * vnm,
- vnet_sw_interface_t * si, void *ctx)
+walk_rc_t
+ip_interface_address_mark_one_interface (vnet_main_t *vnm,
+ vnet_sw_interface_t *si, void *ctx)
{
ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
ip_interface_address_t *ia = 0;
- /* *INDENT-OFF* */
foreach_ip_interface_address (lm4, ia, si->sw_if_index, 1 /* unnumbered */ ,
({
ia->flags |= IP_INTERFACE_ADDRESS_FLAG_STALE;
@@ -220,7 +211,6 @@ ip_interface_address_mark_one_interface (vnet_main_t * vnm,
({
ia->flags |= IP_INTERFACE_ADDRESS_FLAG_STALE;
}));
- /* *INDENT-ON* */
return (WALK_CONTINUE);
}
@@ -246,7 +236,6 @@ ip_interface_address_sweep_one_interface (vnet_main_t * vnm,
u32 *ip4_masks = 0;
int i;
- /* *INDENT-OFF* */
foreach_ip_interface_address (&im4->lookup_main, ia, si->sw_if_index, 1,
({
if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
@@ -268,7 +257,6 @@ ip_interface_address_sweep_one_interface (vnet_main_t * vnm,
vec_add1 (ip6_masks, ia->address_length);
}
}));
- /* *INDENT-ON* */
for (i = 0; i < vec_len (ip4_addrs); i++)
ip4_add_del_interface_address (vm, si->sw_if_index, &ip4_addrs[i],
diff --git a/src/vnet/ip/ip_interface.h b/src/vnet/ip/ip_interface.h
index b48eebdbc90..f0034ed0314 100644
--- a/src/vnet/ip/ip_interface.h
+++ b/src/vnet/ip/ip_interface.h
@@ -38,6 +38,9 @@ void ip_interface_address_sweep (void);
u32 ip_interface_address_find (ip_lookup_main_t * lm,
void *addr_fib, u32 address_length);
u8 ip_interface_has_address (u32 sw_if_index, ip46_address_t * ip, u8 is_ip4);
+walk_rc_t ip_interface_address_mark_one_interface (vnet_main_t *vnm,
+ vnet_sw_interface_t *si,
+ void *ctx);
always_inline void *
ip_interface_address_get_address (ip_lookup_main_t * lm,
@@ -53,7 +56,6 @@ ip_get_interface_prefix (ip_lookup_main_t * lm, ip_interface_prefix_key_t * k)
return p ? pool_elt_at_index (lm->if_prefix_pool, p[0]) : 0;
}
-/* *INDENT-OFF* */
#define foreach_ip_interface_address(lm,a,sw_if_index,loop,body) \
do { \
vnet_main_t *_vnm = vnet_get_main(); \
@@ -87,7 +89,6 @@ do { \
body; \
} \
} while (0)
-/* *INDENT-ON* */
#endif /* included_ip_interface_h */
diff --git a/src/vnet/ip/ip_packet.h b/src/vnet/ip/ip_packet.h
index b0b5f41260c..04cf9f11d70 100644..100755
--- a/src/vnet/ip/ip_packet.h
+++ b/src/vnet/ip/ip_packet.h
@@ -149,98 +149,6 @@ STATIC_ASSERT_SIZEOF (ip_ecn_t, 1);
extern u8 *format_ip_ecn (u8 * s, va_list * va);
-/* IP checksum support. */
-
-static_always_inline u16
-ip_csum (void *data, u16 n_left)
-{
- u32 sum;
-#ifdef CLIB_HAVE_VEC256
- u16x16 v1, v2;
- u32x8 zero = { 0 };
- u32x8 sum8 = { 0 };
- u32x4 sum4;
-#endif
-
- /* if there is odd number of bytes, pad by zero and store in sum */
- sum = (n_left & 1) ? ((u8 *) data)[n_left - 1] << 8 : 0;
-
- /* we deal with words */
- n_left >>= 1;
-
-#ifdef CLIB_HAVE_VEC256
- while (n_left >= 32)
- {
- v1 = u16x16_load_unaligned (data);
- v2 = u16x16_load_unaligned (data + 32);
-
-#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
- v1 = u16x16_byte_swap (v1);
- v2 = u16x16_byte_swap (v2);
-#endif
- sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1));
- sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1));
- sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v2));
- sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v2));
- n_left -= 32;
- data += 64;
- }
-
- if (n_left >= 16)
- {
- v1 = u16x16_load_unaligned (data);
-#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
- v1 = u16x16_byte_swap (v1);
-#endif
- sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1));
- sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1));
- n_left -= 16;
- data += 32;
- }
-
- if (n_left)
- {
- v1 = u16x16_load_unaligned (data);
-#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
- v1 = u16x16_byte_swap (v1);
-#endif
- v1 = u16x16_mask_last (v1, 16 - n_left);
- sum8 += u32x8_from_u16x8 (u16x16_extract_lo (v1));
- sum8 += u32x8_from_u16x8 (u16x16_extract_hi (v1));
- }
-
- sum8 = u32x8_hadd (sum8, zero);
- sum4 = u32x8_extract_lo (sum8) + u32x8_extract_hi (sum8);
- sum += sum4[0] + sum4[1];
-
-#else
- /* scalar version */
- while (n_left >= 8)
- {
- sum += clib_net_to_host_u16 (*((u16 *) data + 0));
- sum += clib_net_to_host_u16 (*((u16 *) data + 1));
- sum += clib_net_to_host_u16 (*((u16 *) data + 2));
- sum += clib_net_to_host_u16 (*((u16 *) data + 3));
- sum += clib_net_to_host_u16 (*((u16 *) data + 4));
- sum += clib_net_to_host_u16 (*((u16 *) data + 5));
- sum += clib_net_to_host_u16 (*((u16 *) data + 6));
- sum += clib_net_to_host_u16 (*((u16 *) data + 7));
- n_left -= 8;
- data += 16;
- }
- while (n_left)
- {
- sum += clib_net_to_host_u16 (*(u16 *) data);
- n_left -= 1;
- data += 2;
- }
-#endif
-
- sum = (sum & 0xffff) + (sum >> 16);
- sum = (sum & 0xffff) + (sum >> 16);
- return ~((u16) sum);
-}
-
/* Incremental checksum update. */
typedef uword ip_csum_t;
@@ -301,6 +209,20 @@ always_inline u16
ip_csum_fold (ip_csum_t c)
{
/* Reduce to 16 bits. */
+#if defined(__x86_64__) && defined(__BMI2__)
+ u64 tmp;
+ asm volatile(
+ /* using ADC is much faster than mov, shift, add sequence
+ * compiler produces */
+ "mov %k[sum], %k[tmp] \n\t"
+ "shr $32, %[sum] \n\t"
+ "add %k[tmp], %k[sum] \n\t"
+ "mov $16, %k[tmp] \n\t"
+ "shrx %k[tmp], %k[sum], %k[tmp] \n\t"
+ "adc %w[tmp], %w[sum] \n\t"
+ "adc $0, %w[sum] \n\t"
+ : [ sum ] "+&r"(c), [ tmp ] "=&r"(tmp));
+#else
#if uword_bits == 64
c = (c & (ip_csum_t) 0xffffffff) + (c >> (ip_csum_t) 32);
c = (c & 0xffff) + (c >> 16);
@@ -308,7 +230,7 @@ ip_csum_fold (ip_csum_t c)
c = (c & 0xffff) + (c >> 16);
c = (c & 0xffff) + (c >> 16);
-
+#endif
return c;
}
diff --git a/src/vnet/ip/ip_path_mtu.c b/src/vnet/ip/ip_path_mtu.c
index 38adb44065b..ccb57e1e352 100644
--- a/src/vnet/ip/ip_path_mtu.c
+++ b/src/vnet/ip/ip_path_mtu.c
@@ -297,10 +297,19 @@ ip_ptmu_adj_walk_update (adj_index_t ai, void *ctx)
static ip_pmtu_dpo_t *
ip_pmtu_dpo_alloc (void)
{
+ vlib_main_t *vm = vlib_get_main ();
+ u8 need_barrier_sync = pool_get_will_expand (ip_pmtu_dpo_pool);
ip_pmtu_dpo_t *ipm;
+
+ if (need_barrier_sync)
+ vlib_worker_thread_barrier_sync (vm);
+
pool_get_aligned_zero (ip_pmtu_dpo_pool, ipm, sizeof (ip_pmtu_dpo_t));
+ if (need_barrier_sync)
+ vlib_worker_thread_barrier_release (vm);
+
return (ipm);
}
@@ -353,18 +362,16 @@ ip_pmtu_dpo_get_urpf (const dpo_id_t *dpo)
}
void
-ip_pmtu_dpo_add_or_lock (fib_protocol_t fproto, u16 pmtu, dpo_id_t *dpo)
+ip_pmtu_dpo_add_or_lock (u16 pmtu, const dpo_id_t *parent, dpo_id_t *dpo)
{
ip_pmtu_dpo_t *ipm;
- dpo_id_t parent = DPO_INVALID;
ipm = ip_pmtu_dpo_alloc ();
- ipm->ipm_proto = fib_proto_to_dpo (fproto);
+ ipm->ipm_proto = parent->dpoi_proto;
ipm->ipm_pmtu = pmtu;
- dpo_copy (&parent, drop_dpo_get (ipm->ipm_proto));
- dpo_stack (ip_pmtu_dpo_type, ipm->ipm_proto, &ipm->ipm_dpo, &parent);
+ dpo_stack (ip_pmtu_dpo_type, ipm->ipm_proto, &ipm->ipm_dpo, parent);
dpo_set (dpo, ip_pmtu_dpo_type, ipm->ipm_proto, ip_pmtu_dpo_get_index (ipm));
}
@@ -516,7 +523,9 @@ ip_pmtu_alloc (u32 fib_index, const fib_prefix_t *pfx,
/*
* interpose a policy DPO from the nh so that MTU is applied
*/
- ip_pmtu_dpo_add_or_lock (pfx->fp_proto, ipt->ipt_oper_pmtu, &ip_dpo);
+ ip_pmtu_dpo_add_or_lock (ipt->ipt_oper_pmtu,
+ drop_dpo_get (fib_proto_to_dpo (pfx->fp_proto)),
+ &ip_dpo);
fib_table_entry_special_dpo_add (fib_index, pfx, ip_pmtu_source,
FIB_ENTRY_FLAG_INTERPOSE, &ip_dpo);
@@ -587,7 +596,9 @@ ip_pmtu_stack (ip_pmtu_t *ipt)
{
dpo_id_t ip_dpo = DPO_INVALID;
- ip_pmtu_dpo_add_or_lock (pfx->fp_proto, ipt->ipt_oper_pmtu, &ip_dpo);
+ ip_pmtu_dpo_add_or_lock (
+ ipt->ipt_oper_pmtu,
+ drop_dpo_get (fib_proto_to_dpo (pfx->fp_proto)), &ip_dpo);
fib_table_entry_special_dpo_update (
fib_index, pfx, ip_pmtu_source, FIB_ENTRY_FLAG_INTERPOSE, &ip_dpo);
@@ -826,7 +837,8 @@ ip_path_module_init (vlib_main_t *vm)
adj_delegate_register_new_type (&ip_path_adj_delegate_vft);
ip_pmtu_source = fib_source_allocate ("path-mtu", FIB_SOURCE_PRIORITY_HI,
FIB_SOURCE_BH_SIMPLE);
- ip_pmtu_fib_type = fib_node_register_new_type (&ip_ptmu_fib_node_vft);
+ ip_pmtu_fib_type =
+ fib_node_register_new_type ("ip-pmtu", &ip_ptmu_fib_node_vft);
ip_pmtu_db = hash_create_mem (0, sizeof (ip_pmtu_key_t), sizeof (index_t));
ip_pmtu_logger = vlib_log_register_class ("ip", "pmtu");
diff --git a/src/vnet/ip/ip_path_mtu.h b/src/vnet/ip/ip_path_mtu.h
index 2c54fcd7401..96a5227237a 100644
--- a/src/vnet/ip/ip_path_mtu.h
+++ b/src/vnet/ip/ip_path_mtu.h
@@ -100,6 +100,9 @@ extern int ip_path_mtu_replace_end (void);
extern u32 ip_pmtu_get_table_id (const ip_pmtu_t *ipt);
extern void ip_pmtu_get_ip (const ip_pmtu_t *ipt, ip_address_t *ip);
+extern void ip_pmtu_dpo_add_or_lock (u16 pmtu, const dpo_id_t *parent,
+ dpo_id_t *dpo);
+
/**
* Data-plane accessor functions
*/
diff --git a/src/vnet/ip/ip_path_mtu_node.c b/src/vnet/ip/ip_path_mtu_node.c
index b13f9de849c..cadf1cbe137 100644
--- a/src/vnet/ip/ip_path_mtu_node.c
+++ b/src/vnet/ip/ip_path_mtu_node.c
@@ -49,7 +49,6 @@ ip_pmtu_dpo_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame, ip_address_family_t af)
{
u32 n_left_from, *from, next_index, *to_next, n_left_to_next;
- u32 frag_sent = 0, small_packets = 0;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -114,8 +113,6 @@ ip_pmtu_dpo_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (error0 == IP_FRAG_ERROR_NONE)
{
/* Free original buffer chain */
- frag_sent += vec_len (buffer);
- small_packets += (vec_len (buffer) == 1);
vlib_buffer_free_one (vm, pi0); /* Free original packet */
}
else
@@ -176,7 +173,8 @@ VLIB_REGISTER_NODE (ip4_ip_pmtu_dpo_node) = {
.name = "ip4-pmtu-dpo",
.vector_size = sizeof (u32),
.format_trace = format_ip_pmtu_trace,
- .n_errors = 0,
+ .n_errors = IP_FRAG_N_ERROR,
+ .error_counters = ip_frag_error_counters,
.n_next_nodes = IP_PMTU_N_NEXT,
.next_nodes =
{
@@ -187,7 +185,8 @@ VLIB_REGISTER_NODE (ip6_ip_pmtu_dpo_node) = {
.name = "ip6-pmtu-dpo",
.vector_size = sizeof (u32),
.format_trace = format_ip_pmtu_trace,
- .n_errors = 0,
+ .n_errors = IP_FRAG_N_ERROR,
+ .error_counters = ip_frag_error_counters,
.n_next_nodes = IP_PMTU_N_NEXT,
.next_nodes =
{
diff --git a/src/vnet/ip/ip_psh_cksum.h b/src/vnet/ip/ip_psh_cksum.h
new file mode 100644
index 00000000000..a80211561b7
--- /dev/null
+++ b/src/vnet/ip/ip_psh_cksum.h
@@ -0,0 +1,55 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifndef included_ip_psh_cksum_h
+#define included_ip_psh_cksum_h
+
+#include <vnet/ip/ip.h>
+#include <vppinfra/vector/ip_csum.h>
+
+typedef struct _ip4_psh
+{
+ ip4_address_t src;
+ ip4_address_t dst;
+ u8 zero;
+ u8 proto;
+ u16 l4len;
+} ip4_psh_t;
+
+typedef struct _ip6_psh
+{
+ ip6_address_t src;
+ ip6_address_t dst;
+ u32 l4len;
+ u32 proto;
+} ip6_psh_t;
+
+STATIC_ASSERT (sizeof (ip4_psh_t) == 12, "ipv4 pseudo header is 12B");
+STATIC_ASSERT (sizeof (ip6_psh_t) == 40, "ipv6 pseudo header is 40B");
+
+static_always_inline u16
+ip4_pseudo_header_cksum (ip4_header_t *ip4)
+{
+ ip4_psh_t psh = { 0 };
+ psh.src = ip4->src_address;
+ psh.dst = ip4->dst_address;
+ psh.proto = ip4->protocol;
+ psh.l4len = clib_host_to_net_u16 (clib_net_to_host_u16 (ip4->length) -
+ sizeof (ip4_header_t));
+ return ~(clib_ip_csum ((u8 *) &psh, sizeof (ip4_psh_t)));
+}
+
+static_always_inline u16
+ip6_pseudo_header_cksum (ip6_header_t *ip6)
+{
+ ip6_psh_t psh = { 0 };
+ psh.src = ip6->src_address;
+ psh.dst = ip6->dst_address;
+ psh.l4len = ip6->payload_length;
+ psh.proto = clib_host_to_net_u32 ((u32) ip6->protocol);
+ return ~(clib_ip_csum ((u8 *) &psh, sizeof (ip6_psh_t)));
+}
+
+#endif /* included_ip_psh_cksum_h */
diff --git a/src/vnet/ip/ip_punt_drop.c b/src/vnet/ip/ip_punt_drop.c
index f3388007b4a..dc113f51386 100644
--- a/src/vnet/ip/ip_punt_drop.c
+++ b/src/vnet/ip/ip_punt_drop.c
@@ -69,9 +69,9 @@ ip_punt_redirect_find (fib_protocol_t fproto, u32 rx_sw_if_index)
}
void
-ip_punt_redirect_add (fib_protocol_t fproto,
- u32 rx_sw_if_index,
- fib_forward_chain_type_t ct, fib_route_path_t * rpaths)
+ip_punt_redirect_add (fib_protocol_t fproto, u32 rx_sw_if_index,
+ fib_forward_chain_type_t ct,
+ const fib_route_path_t *rpaths)
{
ip_punt_redirect_rx_t *ipr;
index_t ipri;
@@ -143,9 +143,8 @@ format_ip_punt_redirect (u8 * s, va_list * args)
rx = ip_punt_redirect_get (rxs[rx_sw_if_index]);
- s = format (s, " rx %U via:\n",
- format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface (vnm, rx_sw_if_index));
+ s = format (s, " rx %U via:\n", format_vnet_sw_if_index_name, vnm,
+ rx_sw_if_index);
s = format (s, " %U", format_fib_path_list, rx->pl, 2);
s = format (s, " forwarding\n", format_dpo_id, &rx->dpo, 0);
s = format (s, " %U\n", format_dpo_id, &rx->dpo, 0);
diff --git a/src/vnet/ip/ip_punt_drop.h b/src/vnet/ip/ip_punt_drop.h
index a595e446adc..11b7ad6c895 100644
--- a/src/vnet/ip/ip_punt_drop.h
+++ b/src/vnet/ip/ip_punt_drop.h
@@ -267,10 +267,9 @@ typedef struct ip4_punt_redirect_trace_t_
/**
* Add a punt redirect entry
*/
-extern void ip_punt_redirect_add (fib_protocol_t fproto,
- u32 rx_sw_if_index,
+extern void ip_punt_redirect_add (fib_protocol_t fproto, u32 rx_sw_if_index,
fib_forward_chain_type_t ct,
- fib_route_path_t * rpaths);
+ const fib_route_path_t *rpaths);
extern void ip_punt_redirect_del (fib_protocol_t fproto, u32 rx_sw_if_index);
extern index_t ip_punt_redirect_find (fib_protocol_t fproto,
diff --git a/src/vnet/ip/ip_sas.c b/src/vnet/ip/ip_sas.c
new file mode 100644
index 00000000000..0fc261724f1
--- /dev/null
+++ b/src/vnet/ip/ip_sas.c
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ip_sas.h"
+#include <vppinfra/types.h>
+#include <vnet/ip/ip_interface.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/ip/ip6_link.h>
+#include <vppinfra/byte_order.h>
+
+/*
+ * This file implement source address selection for VPP applications
+ * (e.g. ping, DNS, ICMP)
+ * It does not yet implement full fledged RFC6724 SAS.
+ * SAS assumes every IP enabled interface has an address. The algorithm will
+ * not go and hunt for a suitable IP address on other interfaces than the
+ * output interface or the specified preferred sw_if_index.
+ * That means that an interface with just an IPv6 link-local address must also
+ * be configured with an unnumbered configuration pointing to a numbered
+ * interface.
+ */
+
+static int
+ip6_sas_commonlen (const ip6_address_t *a1, const ip6_address_t *a2)
+{
+ u64 fa = clib_net_to_host_u64 (a1->as_u64[0]) ^
+ clib_net_to_host_u64 (a2->as_u64[0]);
+ if (fa == 0)
+ {
+ u64 la = clib_net_to_host_u64 (a1->as_u64[1]) ^
+ clib_net_to_host_u64 (a2->as_u64[1]);
+ if (la == 0)
+ return 128;
+ return 64 + __builtin_clzll (la);
+ }
+ else
+ {
+ return __builtin_clzll (fa);
+ }
+}
+
+static int
+ip4_sas_commonlen (const ip4_address_t *a1, const ip4_address_t *a2)
+{
+ u64 a =
+ clib_net_to_host_u32 (a1->as_u32) ^ clib_net_to_host_u32 (a2->as_u32);
+ if (a == 0)
+ return 32;
+ return __builtin_clz (a);
+}
+
+/*
+ * walk all addresses on an interface:
+ * - prefer a source matching the scope of the destination address.
+ * - last resort pick the source address with the longest
+ * common prefix with destination
+ * NOTE: This should at some point implement RFC6724.
+ */
+bool
+ip6_sas_by_sw_if_index (u32 sw_if_index, const ip6_address_t *dst,
+ ip6_address_t *src)
+{
+ ip_interface_address_t *ia = 0;
+ ip_lookup_main_t *lm6 = &ip6_main.lookup_main;
+ ip6_address_t *tmp, *bestsrc = 0;
+ int bestlen = 0, l;
+
+ if (ip6_address_is_link_local_unicast (dst) ||
+ dst->as_u32[0] == clib_host_to_net_u32 (0xff020000))
+ {
+ const ip6_address_t *ll = ip6_get_link_local_address (sw_if_index);
+ if (NULL == ll)
+ {
+ return false;
+ }
+ ip6_address_copy (src, ll);
+ return true;
+ }
+
+ foreach_ip_interface_address (
+ lm6, ia, sw_if_index, 1, ({
+ if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
+ continue;
+ tmp = ip_interface_address_get_address (lm6, ia);
+ l = ip6_sas_commonlen (tmp, dst);
+ if (l > bestlen || bestsrc == 0)
+ {
+ bestsrc = tmp;
+ bestlen = l;
+ }
+ }));
+ if (bestsrc)
+ {
+ ip6_address_copy (src, bestsrc);
+ return true;
+ }
+ return false;
+}
+
+/*
+ * walk all addresses on an interface and pick the source address with the
+ * longest common prefix with destination.
+ */
+bool
+ip4_sas_by_sw_if_index (u32 sw_if_index, const ip4_address_t *dst,
+ ip4_address_t *src)
+{
+ ip_interface_address_t *ia = 0;
+ ip_lookup_main_t *lm4 = &ip4_main.lookup_main;
+ ip4_address_t *tmp, *bestsrc = 0;
+ int bestlen = 0, l;
+
+ foreach_ip_interface_address (
+ lm4, ia, sw_if_index, 1, ({
+ if (ia->flags & IP_INTERFACE_ADDRESS_FLAG_STALE)
+ continue;
+ tmp = ip_interface_address_get_address (lm4, ia);
+ l = ip4_sas_commonlen (tmp, dst);
+ if (l > bestlen || bestsrc == 0)
+ {
+ bestsrc = tmp;
+ bestlen = l;
+ }
+ }));
+ if (bestsrc)
+ {
+ src->as_u32 = bestsrc->as_u32;
+ return true;
+ }
+ return false;
+}
+
+/*
+ * table_id must be set. Default = 0.
+ * sw_if_index is the interface to pick SA from otherwise ~0 will pick from
+ * outbound interface.
+ *
+ * NOTE: What to do if multiple output interfaces?
+ *
+ */
+bool
+ip6_sas (u32 table_id, u32 sw_if_index, const ip6_address_t *dst,
+ ip6_address_t *src)
+{
+ fib_prefix_t prefix;
+ u32 if_index = sw_if_index;
+
+ /* If sw_if_index is not specified use the output interface. */
+ if (sw_if_index == ~0)
+ {
+ clib_memcpy (&prefix.fp_addr.ip6, dst, sizeof (*dst));
+ prefix.fp_proto = FIB_PROTOCOL_IP6;
+ prefix.fp_len = 128;
+
+ u32 fib_index = fib_table_find (prefix.fp_proto, table_id);
+ if (fib_index == (u32) ~0)
+ return false;
+
+ fib_node_index_t fei = fib_table_lookup (fib_index, &prefix);
+ if (fei == FIB_NODE_INDEX_INVALID)
+ return false;
+
+ u32 output_sw_if_index = fib_entry_get_resolving_interface (fei);
+ if (output_sw_if_index == ~0)
+ return false;
+ if_index = output_sw_if_index;
+ }
+ return ip6_sas_by_sw_if_index (if_index, dst, src);
+}
+
+/*
+ * table_id must be set. Default = 0.
+ * sw_if_index is the interface to pick SA from otherwise ~0 will pick from
+ * outbound interface.
+ *
+ * NOTE: What to do if multiple output interfaces?
+ *
+ */
+bool
+ip4_sas (u32 table_id, u32 sw_if_index, const ip4_address_t *dst,
+ ip4_address_t *src)
+{
+ fib_prefix_t prefix;
+ u32 if_index = sw_if_index;
+
+ /* If sw_if_index is not specified use the output interface. */
+ if (sw_if_index == ~0)
+ {
+ clib_memcpy (&prefix.fp_addr.ip4, dst, sizeof (*dst));
+ prefix.fp_proto = FIB_PROTOCOL_IP4;
+ prefix.fp_len = 32;
+
+ u32 fib_index = fib_table_find (prefix.fp_proto, table_id);
+ if (fib_index == (u32) ~0)
+ return false;
+
+ fib_node_index_t fei = fib_table_lookup (fib_index, &prefix);
+ if (fei == FIB_NODE_INDEX_INVALID)
+ return false;
+
+ u32 output_sw_if_index = fib_entry_get_resolving_interface (fei);
+ if (output_sw_if_index == ~0)
+ return false;
+ if_index = output_sw_if_index;
+ }
+ return ip4_sas_by_sw_if_index (if_index, dst, src);
+}
diff --git a/src/vnet/ip/ip_sas.h b/src/vnet/ip/ip_sas.h
new file mode 100644
index 00000000000..b1e9e732ed9
--- /dev/null
+++ b/src/vnet/ip/ip_sas.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_ip_sas_h
+#define included_ip_sas_h
+
+#include <stdbool.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip4_packet.h>
+
+bool ip6_sas_by_sw_if_index (u32 sw_if_index, const ip6_address_t *dst,
+ ip6_address_t *src);
+bool ip4_sas_by_sw_if_index (u32 sw_if_index, const ip4_address_t *dst,
+ ip4_address_t *src);
+bool ip6_sas (u32 table_id, u32 sw_if_index, const ip6_address_t *dst,
+ ip6_address_t *src);
+bool ip4_sas (u32 table_id, u32 sw_if_index, const ip4_address_t *dst,
+ ip4_address_t *src);
+
+#endif
diff --git a/src/vnet/ip/ip_test.c b/src/vnet/ip/ip_test.c
new file mode 100644
index 00000000000..727afba67f4
--- /dev/null
+++ b/src/vnet/ip/ip_test.c
@@ -0,0 +1,1578 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vppinfra/error.h>
+#include <vpp/api/types.h>
+#include <vnet/mpls/packet.h>
+#include <vnet/ip/ip_types_api.h>
+
+#define __plugin_msg_base ip_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/* Declare message IDs */
+#include <vnet/format_fns.h>
+#include <vnet/ip/ip.api_enum.h>
+#include <vnet/ip/ip.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
+
+#define vl_endianfun /* define message structures */
+#include <vnet/ip/ip.api.h>
+#undef vl_endianfun
+
+#define vl_calcsizefun
+#include <vnet/ip/ip.api.h>
+#undef vl_calcsizefun
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} ip_test_main_t;
+
+static ip_test_main_t ip_test_main;
+
+static int
+api_ip_route_add_del_v2 (vat_main_t *vam)
+{
+ return -1;
+}
+
+static void
+set_ip4_address (vl_api_address_t *a, u32 v)
+{
+ if (a->af == ADDRESS_IP4)
+ {
+ ip4_address_t *i = (ip4_address_t *) &a->un.ip4;
+ i->as_u32 = v;
+ }
+}
+
+static void
+increment_v4_address (vl_api_ip4_address_t *i)
+{
+ ip4_address_t *a = (ip4_address_t *) i;
+ u32 v;
+
+ v = ntohl (a->as_u32) + 1;
+ a->as_u32 = ntohl (v);
+}
+
+static void
+increment_v6_address (vl_api_ip6_address_t *i)
+{
+ ip6_address_t *a = (ip6_address_t *) i;
+ u64 v0, v1;
+
+ v0 = clib_net_to_host_u64 (a->as_u64[0]);
+ v1 = clib_net_to_host_u64 (a->as_u64[1]);
+
+ v1 += 1;
+ if (v1 == 0)
+ v0 += 1;
+ a->as_u64[0] = clib_net_to_host_u64 (v0);
+ a->as_u64[1] = clib_net_to_host_u64 (v1);
+}
+
+static void
+increment_address (vl_api_address_t *a)
+{
+ if (a->af == ADDRESS_IP4)
+ increment_v4_address (&a->un.ip4);
+ else if (a->af == ADDRESS_IP6)
+ increment_v6_address (&a->un.ip6);
+}
+
+static uword
+unformat_fib_path (unformat_input_t *input, va_list *args)
+{
+ vat_main_t *vam = va_arg (*args, vat_main_t *);
+ vl_api_fib_path_t *path = va_arg (*args, vl_api_fib_path_t *);
+ u32 weight, preference;
+ mpls_label_t out_label;
+
+ clib_memset (path, 0, sizeof (*path));
+ path->weight = 1;
+ path->sw_if_index = ~0;
+ path->rpf_id = ~0;
+ path->n_labels = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U %U", unformat_vl_api_ip4_address,
+ &path->nh.address.ip4, api_unformat_sw_if_index, vam,
+ &path->sw_if_index))
+ {
+ path->proto = FIB_API_PATH_NH_PROTO_IP4;
+ }
+ else if (unformat (input, "%U %U", unformat_vl_api_ip6_address,
+ &path->nh.address.ip6, api_unformat_sw_if_index, vam,
+ &path->sw_if_index))
+ {
+ path->proto = FIB_API_PATH_NH_PROTO_IP6;
+ }
+ else if (unformat (input, "weight %u", &weight))
+ {
+ path->weight = weight;
+ }
+ else if (unformat (input, "preference %u", &preference))
+ {
+ path->preference = preference;
+ }
+ else if (unformat (input, "%U next-hop-table %d",
+ unformat_vl_api_ip4_address, &path->nh.address.ip4,
+ &path->table_id))
+ {
+ path->proto = FIB_API_PATH_NH_PROTO_IP4;
+ }
+ else if (unformat (input, "%U next-hop-table %d",
+ unformat_vl_api_ip6_address, &path->nh.address.ip6,
+ &path->table_id))
+ {
+ path->proto = FIB_API_PATH_NH_PROTO_IP6;
+ }
+ else if (unformat (input, "%U", unformat_vl_api_ip4_address,
+ &path->nh.address.ip4))
+ {
+ /*
+ * the recursive next-hops are by default in the default table
+ */
+ path->table_id = 0;
+ path->sw_if_index = ~0;
+ path->proto = FIB_API_PATH_NH_PROTO_IP4;
+ }
+ else if (unformat (input, "%U", unformat_vl_api_ip6_address,
+ &path->nh.address.ip6))
+ {
+ /*
+ * the recursive next-hops are by default in the default table
+ */
+ path->table_id = 0;
+ path->sw_if_index = ~0;
+ path->proto = FIB_API_PATH_NH_PROTO_IP6;
+ }
+ else if (unformat (input, "resolve-via-host"))
+ {
+ path->flags |= FIB_API_PATH_FLAG_RESOLVE_VIA_HOST;
+ }
+ else if (unformat (input, "resolve-via-attached"))
+ {
+ path->flags |= FIB_API_PATH_FLAG_RESOLVE_VIA_ATTACHED;
+ }
+ else if (unformat (input, "ip4-lookup-in-table %d", &path->table_id))
+ {
+ path->type = FIB_API_PATH_TYPE_LOCAL;
+ path->sw_if_index = ~0;
+ path->proto = FIB_API_PATH_NH_PROTO_IP4;
+ }
+ else if (unformat (input, "ip6-lookup-in-table %d", &path->table_id))
+ {
+ path->type = FIB_API_PATH_TYPE_LOCAL;
+ path->sw_if_index = ~0;
+ path->proto = FIB_API_PATH_NH_PROTO_IP6;
+ }
+ else if (unformat (input, "sw_if_index %d", &path->sw_if_index))
+ ;
+ else if (unformat (input, "via-label %d", &path->nh.via_label))
+ {
+ path->proto = FIB_API_PATH_NH_PROTO_MPLS;
+ path->sw_if_index = ~0;
+ }
+ else if (unformat (input, "l2-input-on %d", &path->sw_if_index))
+ {
+ path->proto = FIB_API_PATH_NH_PROTO_ETHERNET;
+ path->type = FIB_API_PATH_TYPE_INTERFACE_RX;
+ }
+ else if (unformat (input, "local"))
+ {
+ path->type = FIB_API_PATH_TYPE_LOCAL;
+ }
+ else if (unformat (input, "out-labels"))
+ {
+ while (unformat (input, "%d", &out_label))
+ {
+ path->label_stack[path->n_labels].label = out_label;
+ path->label_stack[path->n_labels].is_uniform = 0;
+ path->label_stack[path->n_labels].ttl = 64;
+ path->n_labels++;
+ }
+ }
+ else if (unformat (input, "via"))
+ {
+ /* new path, back up and return */
+ unformat_put_input (input);
+ unformat_put_input (input);
+ unformat_put_input (input);
+ unformat_put_input (input);
+ break;
+ }
+ else
+ {
+ return (0);
+ }
+ }
+
+ path->proto = ntohl (path->proto);
+ path->type = ntohl (path->type);
+ path->flags = ntohl (path->flags);
+ path->table_id = ntohl (path->table_id);
+ path->sw_if_index = ntohl (path->sw_if_index);
+
+ return (1);
+}
+
+static int
+api_ip_route_add_del (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ip_route_add_del_t *mp;
+ u32 vrf_id = 0;
+ u8 is_add = 1;
+ u8 is_multipath = 0;
+ u8 prefix_set = 0;
+ u8 path_count = 0;
+ vl_api_prefix_t pfx = {};
+ vl_api_fib_path_t paths[8];
+ int count = 1;
+ int j;
+ f64 before = 0;
+ u32 random_add_del = 0;
+ u32 *random_vector = 0;
+ u32 random_seed = 0xdeaddabe;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_vl_api_prefix, &pfx))
+ prefix_set = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "add"))
+ is_add = 1;
+ else if (unformat (i, "vrf %d", &vrf_id))
+ ;
+ else if (unformat (i, "count %d", &count))
+ ;
+ else if (unformat (i, "random"))
+ random_add_del = 1;
+ else if (unformat (i, "multipath"))
+ is_multipath = 1;
+ else if (unformat (i, "seed %d", &random_seed))
+ ;
+ else if (unformat (i, "via %U", unformat_fib_path, vam,
+ &paths[path_count]))
+ {
+ path_count++;
+ if (8 == path_count)
+ {
+ errmsg ("max 8 paths");
+ return -99;
+ }
+ }
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (!path_count)
+ {
+ errmsg ("specify a path; via ...");
+ return -99;
+ }
+ if (prefix_set == 0)
+ {
+ errmsg ("missing prefix");
+ return -99;
+ }
+
+ /* Generate a pile of unique, random routes */
+ if (random_add_del)
+ {
+ ip4_address_t *i = (ip4_address_t *) &paths[0].nh.address.ip4;
+ u32 this_random_address;
+ uword *random_hash;
+
+ random_hash = hash_create (count, sizeof (uword));
+
+ hash_set (random_hash, i->as_u32, 1);
+ for (j = 0; j <= count; j++)
+ {
+ do
+ {
+ this_random_address = random_u32 (&random_seed);
+ this_random_address = clib_host_to_net_u32 (this_random_address);
+ }
+ while (hash_get (random_hash, this_random_address));
+ vec_add1 (random_vector, this_random_address);
+ hash_set (random_hash, this_random_address, 1);
+ }
+ hash_free (random_hash);
+ set_ip4_address (&pfx.address, random_vector[0]);
+ }
+
+ if (count > 1)
+ {
+ /* Turn on async mode */
+ vam->async_mode = 1;
+ vam->async_errors = 0;
+ before = vat_time_now (vam);
+ }
+
+ for (j = 0; j < count; j++)
+ {
+ /* Construct the API message */
+ M2 (IP_ROUTE_ADD_DEL, mp, sizeof (vl_api_fib_path_t) * path_count);
+
+ mp->is_add = is_add;
+ mp->is_multipath = is_multipath;
+
+ clib_memcpy (&mp->route.prefix, &pfx, sizeof (pfx));
+ mp->route.table_id = ntohl (vrf_id);
+ mp->route.n_paths = path_count;
+
+ clib_memcpy (&mp->route.paths, &paths, sizeof (paths[0]) * path_count);
+
+ if (random_add_del)
+ set_ip4_address (&pfx.address, random_vector[j + 1]);
+ else
+ increment_address (&pfx.address);
+ /* send it... */
+ S (mp);
+ /* If we receive SIGTERM, stop now... */
+ if (vam->do_exit)
+ break;
+ }
+
+ /* When testing multiple add/del ops, use a control-ping to sync */
+ if (count > 1)
+ {
+ vl_api_control_ping_t *mp_ping;
+ f64 after;
+ f64 timeout;
+
+ /* Shut off async mode */
+ vam->async_mode = 0;
+
+ PING (&ip_test_main, mp_ping);
+ S (mp_ping);
+
+ timeout = vat_time_now (vam) + 1.0;
+ while (vat_time_now (vam) < timeout)
+ if (vam->result_ready == 1)
+ goto out;
+ vam->retval = -99;
+
+ out:
+ if (vam->retval == -99)
+ errmsg ("timeout");
+
+ if (vam->async_errors > 0)
+ {
+ errmsg ("%d asynchronous errors", vam->async_errors);
+ vam->retval = -98;
+ }
+ vam->async_errors = 0;
+ after = vat_time_now (vam);
+
+ /* slim chance, but we might have eaten SIGTERM on the first iteration */
+ if (j > 0)
+ count = j;
+
+ print (vam->ofp, "%d routes in %.6f secs, %.2f routes/sec", count,
+ after - before, count / (after - before));
+ }
+ else
+ {
+ int ret;
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+ }
+
+ /* Return the good/bad news */
+ return (vam->retval);
+}
+
+static int
+api_ip_table_add_del (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ip_table_add_del_t *mp;
+ u32 table_id = ~0;
+ u8 is_ipv6 = 0;
+ u8 is_add = 1;
+ int ret = 0;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "add"))
+ is_add = 1;
+ else if (unformat (i, "table %d", &table_id))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (~0 == table_id)
+ {
+ errmsg ("missing table-ID");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (IP_TABLE_ADD_DEL, mp);
+
+ mp->table.table_id = ntohl (table_id);
+ mp->table.is_ip6 = is_ipv6;
+ mp->is_add = is_add;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+
+ return ret;
+}
+
+static int
+api_ip_table_replace_begin (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ip_table_replace_begin_t *mp;
+ u32 table_id = 0;
+ u8 is_ipv6 = 0;
+
+ int ret;
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "table %d", &table_id))
+ ;
+ else if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (IP_TABLE_REPLACE_BEGIN, mp);
+
+ mp->table.table_id = ntohl (table_id);
+ mp->table.is_ip6 = is_ipv6;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ip_table_flush (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ip_table_flush_t *mp;
+ u32 table_id = 0;
+ u8 is_ipv6 = 0;
+
+ int ret;
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "table %d", &table_id))
+ ;
+ else if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (IP_TABLE_FLUSH, mp);
+
+ mp->table.table_id = ntohl (table_id);
+ mp->table.is_ip6 = is_ipv6;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ip_table_allocate (vat_main_t *vam)
+{
+ return -1;
+}
+
+static void
+vl_api_ip_table_allocate_reply_t_handler (vl_api_ip_table_allocate_reply_t *mp)
+{
+}
+
+static void
+vl_api_ip_route_add_del_v2_reply_t_handler (
+ vl_api_ip_route_add_del_v2_reply_t *mp)
+{
+}
+
+static void
+vl_api_ip_route_details_t_handler (vl_api_ip_route_details_t *mp)
+{
+}
+
+static void
+vl_api_ip_route_v2_details_t_handler (vl_api_ip_route_v2_details_t *mp)
+{
+}
+
+static void
+vl_api_ip_route_add_del_reply_t_handler (vl_api_ip_route_add_del_reply_t *mp)
+{
+ vat_main_t *vam = ip_test_main.vat_main;
+ vam->result_ready = 1;
+}
+
+static void
+vl_api_ip_route_lookup_reply_t_handler (vl_api_ip_route_lookup_reply_t *mp)
+{
+}
+
+static void
+vl_api_ip_route_lookup_v2_reply_t_handler (
+ vl_api_ip_route_lookup_v2_reply_t *mp)
+{
+}
+
+static int
+api_set_ip_flow_hash_router_id (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ip_route_lookup (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ip_route_lookup_v2 (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_set_ip_flow_hash (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_set_ip_flow_hash_t *mp;
+ u32 vrf_id = 0;
+ u8 is_ipv6 = 0;
+ u8 vrf_id_set = 0;
+ u8 src = 0;
+ u8 dst = 0;
+ u8 sport = 0;
+ u8 dport = 0;
+ u8 proto = 0;
+ u8 reverse = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "vrf %d", &vrf_id))
+ vrf_id_set = 1;
+ else if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else if (unformat (i, "src"))
+ src = 1;
+ else if (unformat (i, "dst"))
+ dst = 1;
+ else if (unformat (i, "sport"))
+ sport = 1;
+ else if (unformat (i, "dport"))
+ dport = 1;
+ else if (unformat (i, "proto"))
+ proto = 1;
+ else if (unformat (i, "reverse"))
+ reverse = 1;
+
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (vrf_id_set == 0)
+ {
+ errmsg ("missing vrf id");
+ return -99;
+ }
+
+ M (SET_IP_FLOW_HASH, mp);
+ mp->src = src;
+ mp->dst = dst;
+ mp->sport = sport;
+ mp->dport = dport;
+ mp->proto = proto;
+ mp->reverse = reverse;
+ mp->vrf_id = ntohl (vrf_id);
+ mp->is_ipv6 = is_ipv6;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_mfib_signal_dump (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ip_punt_police (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ip_punt_redirect (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_add_del_ip_punt_redirect_v2 (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ip_punt_redirect_dump (vat_main_t *vat)
+{
+ return -1;
+}
+
+static void
+vl_api_ip_punt_redirect_details_t_handler (
+ vl_api_ip_punt_redirect_details_t *mp)
+{
+ /**/
+}
+
+static int
+api_ip_punt_redirect_v2_dump (vat_main_t *vat)
+{
+ return -1;
+}
+
+static void
+vl_api_ip_punt_redirect_v2_details_t_handler (
+ vl_api_ip_punt_redirect_v2_details_t *mp)
+{
+ /**/
+}
+
+static int
+api_ip_address_dump (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ip_address_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u32 sw_if_index = ~0;
+ u8 sw_if_index_set = 0;
+ u8 ipv4_set = 0;
+ u8 ipv6_set = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "ipv4"))
+ ipv4_set = 1;
+ else if (unformat (i, "ipv6"))
+ ipv6_set = 1;
+ else
+ break;
+ }
+
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("ipv4 and ipv6 flags cannot be both set");
+ return -99;
+ }
+
+ if ((!ipv4_set) && (!ipv6_set))
+ {
+ errmsg ("no ipv4 nor ipv6 flag set");
+ return -99;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ vam->current_sw_if_index = sw_if_index;
+ vam->is_ipv6 = ipv6_set;
+
+ M (IP_ADDRESS_DUMP, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_ipv6 = ipv6_set;
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ PING (&ip_test_main, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_sw_interface_ip6_get_link_local_address_reply_t_handler (
+ vl_api_sw_interface_ip6_get_link_local_address_reply_t *mp)
+{
+}
+
+static int
+api_sw_interface_ip6_set_link_local_address (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sw_interface_ip6_get_link_local_address (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_ip_path_mtu_replace_end (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_ioam_enable (vat_main_t *vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_ioam_enable_t *mp;
+ u32 id = 0;
+ int has_trace_option = 0;
+ int has_pot_option = 0;
+ int has_seqno_option = 0;
+ int has_analyse_option = 0;
+ int ret;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "trace"))
+ has_trace_option = 1;
+ else if (unformat (input, "pot"))
+ has_pot_option = 1;
+ else if (unformat (input, "seqno"))
+ has_seqno_option = 1;
+ else if (unformat (input, "analyse"))
+ has_analyse_option = 1;
+ else
+ break;
+ }
+ M (IOAM_ENABLE, mp);
+ mp->id = htons (id);
+ mp->seqno = has_seqno_option;
+ mp->analyse = has_analyse_option;
+ mp->pot_enable = has_pot_option;
+ mp->trace_enable = has_trace_option;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ip_reassembly_get (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_ip_path_mtu_replace_begin (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_ip_path_mtu_update (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_ioam_disable (vat_main_t *vam)
+{
+ vl_api_ioam_disable_t *mp;
+ int ret;
+
+ M (IOAM_DISABLE, mp);
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ip_source_and_port_range_check_add_del (vat_main_t *vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_ip_source_and_port_range_check_add_del_t *mp;
+
+ u16 *low_ports = 0;
+ u16 *high_ports = 0;
+ u16 this_low;
+ u16 this_hi;
+ vl_api_prefix_t prefix;
+ u32 tmp, tmp2;
+ u8 prefix_set = 0;
+ u32 vrf_id = ~0;
+ u8 is_add = 1;
+ int ret;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vl_api_prefix, &prefix))
+ prefix_set = 1;
+ else if (unformat (input, "vrf %d", &vrf_id))
+ ;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else if (unformat (input, "port %d", &tmp))
+ {
+ if (tmp == 0 || tmp > 65535)
+ {
+ errmsg ("port %d out of range", tmp);
+ return -99;
+ }
+ this_low = tmp;
+ this_hi = this_low + 1;
+ vec_add1 (low_ports, this_low);
+ vec_add1 (high_ports, this_hi);
+ }
+ else if (unformat (input, "range %d - %d", &tmp, &tmp2))
+ {
+ if ((tmp > tmp2) || (tmp == 0) || (tmp2 > 65535))
+ {
+ errmsg ("incorrect range parameters");
+ return -99;
+ }
+ this_low = tmp;
+ /* Note: in debug CLI +1 is added to high before
+ passing to real fn that does "the work"
+ (ip_source_and_port_range_check_add_del).
+ This fn is a wrapper around the binary API fn a
+ control plane will call, which expects this increment
+ to have occurred. Hence letting the binary API control
+ plane fn do the increment for consistency between VAT
+ and other control planes.
+ */
+ this_hi = tmp2;
+ vec_add1 (low_ports, this_low);
+ vec_add1 (high_ports, this_hi);
+ }
+ else
+ break;
+ }
+
+ if (prefix_set == 0)
+ {
+ errmsg ("<address>/<mask> not specified");
+ return -99;
+ }
+
+ if (vrf_id == ~0)
+ {
+ errmsg ("VRF ID required, not specified");
+ return -99;
+ }
+
+ if (vrf_id == 0)
+ {
+ errmsg ("VRF ID should not be default. Should be distinct VRF for this "
+ "purpose.");
+ return -99;
+ }
+
+ if (vec_len (low_ports) == 0)
+ {
+ errmsg ("At least one port or port range required");
+ return -99;
+ }
+
+ M (IP_SOURCE_AND_PORT_RANGE_CHECK_ADD_DEL, mp);
+
+ mp->is_add = is_add;
+
+ clib_memcpy (&mp->prefix, &prefix, sizeof (prefix));
+
+ mp->number_of_ranges = vec_len (low_ports);
+
+ clib_memcpy (mp->low_ports, low_ports, vec_len (low_ports));
+ vec_free (low_ports);
+
+ clib_memcpy (mp->high_ports, high_ports, vec_len (high_ports));
+ vec_free (high_ports);
+
+ mp->vrf_id = ntohl (vrf_id);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ip_reassembly_set (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ip_container_proxy_add_del (vat_main_t *vam)
+{
+ vl_api_ip_container_proxy_add_del_t *mp;
+ unformat_input_t *i = vam->input;
+ u32 sw_if_index = ~0;
+ vl_api_prefix_t pfx = {};
+ u8 is_add = 1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "add"))
+ ;
+ if (unformat (i, "%U", unformat_vl_api_prefix, &pfx))
+ ;
+ else if (unformat (i, "sw_if_index %u", &sw_if_index))
+ ;
+ else
+ break;
+ }
+ if (sw_if_index == ~0 || pfx.len == 0)
+ {
+ errmsg ("address and sw_if_index must be set");
+ return -99;
+ }
+
+ M (IP_CONTAINER_PROXY_ADD_DEL, mp);
+
+ mp->sw_if_index = clib_host_to_net_u32 (sw_if_index);
+ mp->is_add = is_add;
+ clib_memcpy (&mp->pfx, &pfx, sizeof (pfx));
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ip_reassembly_enable_disable (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ip_local_reass_enable_disable (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ip_local_reass_get (vat_main_t *vat)
+{
+ return -1;
+}
+
+static void
+vl_api_ip_local_reass_get_reply_t_handler (
+ vl_api_ip_local_reass_get_reply_t *mp)
+{
+}
+
+static void
+vl_api_ip_reassembly_get_reply_t_handler (vl_api_ip_reassembly_get_reply_t *mp)
+{
+}
+
+int
+api_ip_source_and_port_range_check_interface_add_del (vat_main_t *vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_ip_source_and_port_range_check_interface_add_del_t *mp;
+ u32 sw_if_index = ~0;
+ int vrf_set = 0;
+ u32 tcp_out_vrf_id = ~0, udp_out_vrf_id = ~0;
+ u32 tcp_in_vrf_id = ~0, udp_in_vrf_id = ~0;
+ u8 is_add = 1;
+ int ret;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (input, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (input, "tcp-out-vrf %d", &tcp_out_vrf_id))
+ vrf_set = 1;
+ else if (unformat (input, "udp-out-vrf %d", &udp_out_vrf_id))
+ vrf_set = 1;
+ else if (unformat (input, "tcp-in-vrf %d", &tcp_in_vrf_id))
+ vrf_set = 1;
+ else if (unformat (input, "udp-in-vrf %d", &udp_in_vrf_id))
+ vrf_set = 1;
+ else if (unformat (input, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("Interface required but not specified");
+ return -99;
+ }
+
+ if (vrf_set == 0)
+ {
+ errmsg ("VRF ID required but not specified");
+ return -99;
+ }
+
+ if (tcp_out_vrf_id == 0 || udp_out_vrf_id == 0 || tcp_in_vrf_id == 0 ||
+ udp_in_vrf_id == 0)
+ {
+ errmsg ("VRF ID should not be default. Should be distinct VRF for this "
+ "purpose.");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (IP_SOURCE_AND_PORT_RANGE_CHECK_INTERFACE_ADD_DEL, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_add = is_add;
+ mp->tcp_out_vrf_id = ntohl (tcp_out_vrf_id);
+ mp->udp_out_vrf_id = ntohl (udp_out_vrf_id);
+ mp->tcp_in_vrf_id = ntohl (tcp_in_vrf_id);
+ mp->udp_in_vrf_id = ntohl (udp_in_vrf_id);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_ip_container_proxy_details_t_handler (
+ vl_api_ip_container_proxy_details_t *mp)
+{
+}
+
+static int
+api_ip_container_proxy_dump (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_ip_dump (vat_main_t *vam)
+{
+ vl_api_ip_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ unformat_input_t *in = vam->input;
+ int ipv4_set = 0;
+ int ipv6_set = 0;
+ int is_ipv6;
+ int i;
+ int ret;
+
+ while (unformat_check_input (in) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (in, "ipv4"))
+ ipv4_set = 1;
+ else if (unformat (in, "ipv6"))
+ ipv6_set = 1;
+ else
+ break;
+ }
+
+ if (ipv4_set && ipv6_set)
+ {
+ errmsg ("ipv4 and ipv6 flags cannot be both set");
+ return -99;
+ }
+
+ if ((!ipv4_set) && (!ipv6_set))
+ {
+ errmsg ("no ipv4 nor ipv6 flag set");
+ return -99;
+ }
+
+ is_ipv6 = ipv6_set;
+ vam->is_ipv6 = is_ipv6;
+
+ /* free old data */
+ for (i = 0; i < vec_len (vam->ip_details_by_sw_if_index[is_ipv6]); i++)
+ {
+ vec_free (vam->ip_details_by_sw_if_index[is_ipv6][i].addr);
+ }
+ vec_free (vam->ip_details_by_sw_if_index[is_ipv6]);
+
+ M (IP_DUMP, mp);
+ mp->is_ipv6 = ipv6_set;
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ PING (&ip_test_main, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_mfib_signal_details_t_handler (vl_api_mfib_signal_details_t *mp)
+{
+}
+
+static void
+vl_api_ip_mroute_details_t_handler (vl_api_ip_mroute_details_t *mp)
+{
+ vat_main_t *vam = ip_test_main.vat_main;
+ vam->result_ready = 1;
+}
+
+static int
+api_ip_mroute_dump (vat_main_t *vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_control_ping_t *mp_ping;
+ vl_api_ip_mroute_dump_t *mp;
+ int ret, is_ip6;
+ u32 table_id;
+
+ is_ip6 = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table_id %d", &table_id))
+ ;
+ else if (unformat (input, "ip6"))
+ is_ip6 = 1;
+ else if (unformat (input, "ip4"))
+ is_ip6 = 0;
+ else
+ break;
+ }
+ if (table_id == ~0)
+ {
+ errmsg ("missing table id");
+ return -99;
+ }
+
+ M (IP_MROUTE_DUMP, mp);
+ mp->table.table_id = table_id;
+ mp->table.is_ip6 = is_ip6;
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ PING (&ip_test_main, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_ip6_enable_disable (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_ip6_enable_disable_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 enable = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", api_unformat_sw_if_index, vam, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "enable"))
+ enable = 1;
+ else if (unformat (i, "disable"))
+ enable = 0;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (SW_INTERFACE_IP6_ENABLE_DISABLE, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->enable = enable;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_set_ip_flow_hash_v2 (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_set_ip_flow_hash_v3 (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ip_mroute_add_del (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ u8 path_set = 0, prefix_set = 0, is_add = 1;
+ vl_api_ip_mroute_add_del_t *mp;
+ mfib_entry_flags_t eflags = 0;
+ vl_api_mfib_path_t path;
+ vl_api_mprefix_t pfx = {};
+ u32 vrf_id = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_vl_api_mprefix, &pfx))
+ {
+ prefix_set = 1;
+ pfx.grp_address_length = htons (pfx.grp_address_length);
+ }
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "add"))
+ is_add = 1;
+ else if (unformat (i, "vrf %d", &vrf_id))
+ ;
+ else if (unformat (i, "%U", unformat_mfib_itf_flags, &path.itf_flags))
+ path.itf_flags = htonl (path.itf_flags);
+ else if (unformat (i, "%U", unformat_mfib_entry_flags, &eflags))
+ ;
+ else if (unformat (i, "via %U", unformat_fib_path, vam, &path.path))
+ path_set = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (prefix_set == 0)
+ {
+ errmsg ("missing addresses\n");
+ return -99;
+ }
+ if (path_set == 0)
+ {
+ errmsg ("missing path\n");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (IP_MROUTE_ADD_DEL, mp);
+
+ mp->is_add = is_add;
+ mp->is_multipath = 1;
+
+ clib_memcpy (&mp->route.prefix, &pfx, sizeof (pfx));
+ mp->route.table_id = htonl (vrf_id);
+ mp->route.n_paths = 1;
+ mp->route.entry_flags = htonl (eflags);
+
+ clib_memcpy (&mp->route.paths, &path, sizeof (path));
+
+ /* send it... */
+ S (mp);
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_ip_mroute_add_del_reply_t_handler (vl_api_ip_mroute_add_del_reply_t *mp)
+{
+ vat_main_t *vam = ip_test_main.vat_main;
+ vam->result_ready = 1;
+}
+
+static int
+api_ip_mtable_dump (vat_main_t *vam)
+{
+ vl_api_ip_mtable_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ M (IP_MTABLE_DUMP, mp);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ PING (&ip_test_main, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_ip_mtable_details_t_handler (vl_api_ip_mtable_details_t *mp)
+{
+ vat_main_t *vam = ip_test_main.vat_main;
+ vam->result_ready = 1;
+}
+
+static int
+api_ip_table_replace_end (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ip_table_replace_end_t *mp;
+ u32 table_id = 0;
+ u8 is_ipv6 = 0;
+
+ int ret;
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "table %d", &table_id))
+ ;
+ else if (unformat (i, "ipv6"))
+ is_ipv6 = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (IP_TABLE_REPLACE_END, mp);
+
+ mp->table.table_id = ntohl (table_id);
+ mp->table.is_ip6 = is_ipv6;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ip_table_dump (vat_main_t *vam)
+{
+ vl_api_ip_table_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ M (IP_TABLE_DUMP, mp);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ PING (&ip_test_main, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_ip_table_details_t_handler (vl_api_ip_table_details_t *mp)
+{
+ vat_main_t *vam = ip_test_main.vat_main;
+
+ fformat (vam->ofp, "%s; table-id %d, prefix %U/%d", mp->table.name,
+ ntohl (mp->table.table_id));
+ vam->result_ready = 1;
+}
+
+static int
+api_ip_path_mtu_get (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ip_route_v2_dump (vat_main_t *vat)
+{
+ return -1;
+}
+
+static void
+vl_api_ip_path_mtu_get_reply_t_handler (vl_api_ip_path_mtu_get_reply_t *mp)
+{
+}
+
+static int
+api_ip_route_dump (vat_main_t *vam)
+{
+ unformat_input_t *input = vam->input;
+ vl_api_ip_route_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u32 table_id;
+ u8 is_ip6;
+ int ret;
+
+ is_ip6 = 0;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table_id %d", &table_id))
+ ;
+ else if (unformat (input, "ip6"))
+ is_ip6 = 1;
+ else if (unformat (input, "ip4"))
+ is_ip6 = 0;
+ else
+ break;
+ }
+ if (table_id == ~0)
+ {
+ errmsg ("missing table id");
+ return -99;
+ }
+
+ M (IP_ROUTE_DUMP, mp);
+
+ mp->table.table_id = table_id;
+ mp->table.is_ip6 = is_ip6;
+
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ PING (&ip_test_main, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_ip_address_details_t_handler (vl_api_ip_address_details_t *mp)
+{
+ vat_main_t *vam = ip_test_main.vat_main;
+ static ip_address_details_t empty_ip_address_details = { { 0 } };
+ ip_address_details_t *address = NULL;
+ ip_details_t *current_ip_details = NULL;
+ ip_details_t *details = NULL;
+
+ details = vam->ip_details_by_sw_if_index[vam->is_ipv6];
+
+ if (!details || vam->current_sw_if_index >= vec_len (details) ||
+ !details[vam->current_sw_if_index].present)
+ {
+ errmsg ("ip address details arrived but not stored");
+ errmsg ("ip_dump should be called first");
+ return;
+ }
+
+ current_ip_details = vec_elt_at_index (details, vam->current_sw_if_index);
+
+#define addresses (current_ip_details->addr)
+
+ vec_validate_init_empty (addresses, vec_len (addresses),
+ empty_ip_address_details);
+
+ address = vec_elt_at_index (addresses, vec_len (addresses) - 1);
+
+ clib_memcpy (&address->ip, &mp->prefix.address.un, sizeof (address->ip));
+ address->prefix_length = mp->prefix.len;
+#undef addresses
+}
+
+static int
+api_ip_unnumbered_dump (vat_main_t *vam)
+{
+ return -1;
+}
+
+static void
+vl_api_ip_unnumbered_details_t_handler (vl_api_ip_unnumbered_details_t *mp)
+{
+}
+
+static void
+vl_api_ip_details_t_handler (vl_api_ip_details_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ static ip_details_t empty_ip_details = { 0 };
+ ip_details_t *ip = NULL;
+ u32 sw_if_index = ~0;
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ vec_validate_init_empty (vam->ip_details_by_sw_if_index[vam->is_ipv6],
+ sw_if_index, empty_ip_details);
+
+ ip = vec_elt_at_index (vam->ip_details_by_sw_if_index[vam->is_ipv6],
+ sw_if_index);
+
+ ip->present = 1;
+}
+
+#include <vnet/ip/ip.api_test.c>
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip/ip_types.c b/src/vnet/ip/ip_types.c
index 3e5ecebf142..ec80a96f15c 100644
--- a/src/vnet/ip/ip_types.c
+++ b/src/vnet/ip/ip_types.c
@@ -41,14 +41,16 @@ uword
unformat_ip_address (unformat_input_t * input, va_list * args)
{
ip_address_t *a = va_arg (*args, ip_address_t *);
+ ip_address_t tmp, *p_tmp = &tmp;
- clib_memset (a, 0, sizeof (*a));
- if (unformat (input, "%U", unformat_ip4_address, &ip_addr_v4 (a)))
- ip_addr_version (a) = AF_IP4;
- else if (unformat_user (input, unformat_ip6_address, &ip_addr_v6 (a)))
- ip_addr_version (a) = AF_IP6;
+ clib_memset (p_tmp, 0, sizeof (*p_tmp));
+ if (unformat (input, "%U", unformat_ip4_address, &ip_addr_v4 (p_tmp)))
+ ip_addr_version (p_tmp) = AF_IP4;
+ else if (unformat_user (input, unformat_ip6_address, &ip_addr_v6 (p_tmp)))
+ ip_addr_version (p_tmp) = AF_IP6;
else
return 0;
+ *a = *p_tmp;
return 1;
}
@@ -288,6 +290,13 @@ ip_address_to_fib_prefix (const ip_address_t * addr, fib_prefix_t * prefix)
}
void
+ip_address_to_prefix (const ip_address_t *addr, ip_prefix_t *prefix)
+{
+ prefix->len = (addr->version == AF_IP4 ? 32 : 128);
+ clib_memcpy (&prefix->addr, addr, sizeof (prefix->addr));
+}
+
+void
ip_address_increment (ip_address_t * ip)
{
ip46_address_increment ((ip_addr_version (ip) == AF_IP4 ?
@@ -380,23 +389,24 @@ ip_prefix_copy (void *dst, void *src)
}
int
-ip_prefix_cmp (ip_prefix_t * p1, ip_prefix_t * p2)
+ip_prefix_cmp (const ip_prefix_t *ipp1, const ip_prefix_t *ipp2)
{
+ ip_prefix_t p1 = *ipp1, p2 = *ipp2;
int cmp = 0;
- ip_prefix_normalize (p1);
- ip_prefix_normalize (p2);
+ ip_prefix_normalize (&p1);
+ ip_prefix_normalize (&p2);
- cmp = ip_address_cmp (&ip_prefix_addr (p1), &ip_prefix_addr (p2));
+ cmp = ip_address_cmp (&ip_prefix_addr (&p1), &ip_prefix_addr (&p2));
if (cmp == 0)
{
- if (ip_prefix_len (p1) < ip_prefix_len (p2))
+ if (ip_prefix_len (&p1) < ip_prefix_len (&p2))
{
cmp = 1;
}
else
{
- if (ip_prefix_len (p1) > ip_prefix_len (p2))
+ if (ip_prefix_len (&p1) > ip_prefix_len (&p2))
cmp = 2;
}
}
diff --git a/src/vnet/ip/ip_types.h b/src/vnet/ip/ip_types.h
index 83a0f6adc72..f1b387df194 100644
--- a/src/vnet/ip/ip_types.h
+++ b/src/vnet/ip/ip_types.h
@@ -75,13 +75,11 @@ typedef enum ip_feature_location_t_
#define N_IP_FEATURE_LOCATIONS (IP_FEATURE_DROP+1)
-/* *INDENT-OFF* */
typedef struct ip_address
{
ip46_address_t ip;
ip_address_family_t version;
} __clib_packed ip_address_t;
-/* *INDENT-ON* */
#define IP_ADDRESS_V4_ALL_0S {.ip.ip4.as_u32 = 0, .version = AF_IP4}
#define IP_ADDRESS_V6_ALL_0S {.ip.ip6.as_u64 = {0, 0}, .version = AF_IP6}
@@ -112,13 +110,11 @@ extern void ip_address_from_46 (const ip46_address_t * a,
extern void ip_address_increment (ip_address_t * ip);
extern void ip_address_reset (ip_address_t * ip);
-/* *INDENT-OFF* */
typedef struct ip_prefix
{
ip_address_t addr;
u8 len;
} __clib_packed ip_prefix_t;
-/* *INDENT-ON* */
#define ip_prefix_addr(_a) (_a)->addr
#define ip_prefix_version(_a) ip_addr_version(&ip_prefix_addr(_a))
@@ -126,11 +122,13 @@ typedef struct ip_prefix
#define ip_prefix_v4(_a) ip_addr_v4(&ip_prefix_addr(_a))
#define ip_prefix_v6(_a) ip_addr_v6(&ip_prefix_addr(_a))
-extern int ip_prefix_cmp (ip_prefix_t * p1, ip_prefix_t * p2);
+extern int ip_prefix_cmp (const ip_prefix_t *p1, const ip_prefix_t *p2);
extern void ip_prefix_normalize (ip_prefix_t * a);
extern void ip_address_to_fib_prefix (const ip_address_t * addr,
fib_prefix_t * prefix);
+extern void ip_address_to_prefix (const ip_address_t *addr,
+ ip_prefix_t *prefix);
extern void ip_prefix_to_fib_prefix (const ip_prefix_t * ipp,
fib_prefix_t * fibp);
extern u8 *format_ip_prefix (u8 * s, va_list * args);
diff --git a/src/vnet/ip/lookup.c b/src/vnet/ip/lookup.c
index f674fec4823..c225c222a38 100644
--- a/src/vnet/ip/lookup.c
+++ b/src/vnet/ip/lookup.c
@@ -128,6 +128,42 @@ format_ip_flow_hash_config (u8 * s, va_list * args)
return s;
}
+uword
+unformat_ip_flow_hash_config (unformat_input_t *input, va_list *args)
+{
+ flow_hash_config_t *flow_hash_config = va_arg (*args, flow_hash_config_t *);
+ uword start_index = unformat_check_input (input);
+ int matched_once = 0;
+
+ if (unformat (input, "default"))
+ {
+ *flow_hash_config = IP_FLOW_HASH_DEFAULT;
+ return 1;
+ }
+ while (!unformat_is_eof (input) &&
+ !is_white_space (unformat_peek_input (input)))
+ {
+ if (unformat (input, "%_,"))
+ ;
+#define _(a, b, c) \
+ else if (unformat (input, "%_" #a)) \
+ { \
+ *flow_hash_config |= c; \
+ matched_once = 1; \
+ }
+ foreach_flow_hash_bit
+#undef _
+ else
+ {
+ /* Roll back to our start */
+ input->index = start_index;
+ return 0;
+ }
+ }
+
+ return matched_once;
+}
+
u8 *
format_ip_adjacency_packet_data (u8 * s, va_list * args)
{
@@ -184,6 +220,27 @@ const ip46_address_t zero_addr = {
0, 0},
};
+bool
+fib_prefix_validate (const fib_prefix_t *prefix)
+{
+ if (FIB_PROTOCOL_IP4 == prefix->fp_proto)
+ {
+ if (prefix->fp_len > 32)
+ {
+ return false;
+ }
+ }
+
+ if (FIB_PROTOCOL_IP6 == prefix->fp_proto)
+ {
+ if (prefix->fp_len > 128)
+ {
+ return false;
+ }
+ }
+ return true;
+}
+
static clib_error_t *
vnet_ip_route_cmd (vlib_main_t * vm,
unformat_input_t * main_input, vlib_cli_command_t * cmd)
@@ -304,22 +361,25 @@ vnet_ip_route_cmd (vlib_main_t * vm,
}
else if (0 < vec_len (rpaths))
{
- u32 k, n, incr;
- ip46_address_t dst = prefixs[i].fp_addr;
+ u32 k, n;
f64 t[2];
n = count;
t[0] = vlib_time_now (vm);
- incr = 1 << ((FIB_PROTOCOL_IP4 == prefixs[0].fp_proto ? 32 : 128) -
- prefixs[i].fp_len);
for (k = 0; k < n; k++)
{
fib_prefix_t rpfx = {
.fp_len = prefixs[i].fp_len,
.fp_proto = prefixs[i].fp_proto,
- .fp_addr = dst,
+ .fp_addr = prefixs[i].fp_addr,
};
+ if (!fib_prefix_validate (&rpfx))
+ {
+ vlib_cli_output (vm, "Invalid prefix len: %d", rpfx.fp_len);
+ continue;
+ }
+
if (is_del)
fib_table_entry_path_remove2 (fib_index,
&rpfx, FIB_SOURCE_CLI, rpaths);
@@ -329,21 +389,7 @@ vnet_ip_route_cmd (vlib_main_t * vm,
FIB_SOURCE_CLI,
FIB_ENTRY_FLAG_NONE, rpaths);
- if (FIB_PROTOCOL_IP4 == prefixs[0].fp_proto)
- {
- dst.ip4.as_u32 =
- clib_host_to_net_u32 (incr +
- clib_net_to_host_u32 (dst.
- ip4.as_u32));
- }
- else
- {
- int bucket = (incr < 64 ? 0 : 1);
- dst.ip6.as_u64[bucket] =
- clib_host_to_net_u64 (incr +
- clib_net_to_host_u64 (dst.ip6.as_u64
- [bucket]));
- }
+ fib_prefix_increment (&prefixs[i]);
}
t[1] = vlib_time_now (vm);
@@ -399,29 +445,35 @@ vnet_ip_table_cmd (vlib_main_t * vm,
}
}
- if (~0 == table_id)
- {
- error = clib_error_return (0, "No table id");
- goto done;
- }
- else if (0 == table_id)
+ if (0 == table_id)
{
error = clib_error_return (0, "Can't change the default table");
goto done;
}
else
- {
- if (is_add)
- {
- ip_table_create (fproto, table_id, 0, name);
- }
- else
{
- ip_table_delete (fproto, table_id, 0);
+ if (is_add)
+ {
+ if (~0 == table_id)
+ {
+ table_id = ip_table_get_unused_id (fproto);
+ vlib_cli_output (vm, "%u\n", table_id);
+ }
+ ip_table_create (fproto, table_id, 0, name);
+ }
+ else
+ {
+ if (~0 == table_id)
+ {
+ error = clib_error_return (0, "No table id");
+ goto done;
+ }
+ ip_table_delete (fproto, table_id, 0);
+ }
}
- }
done:
+ vec_free (name);
unformat_free (line_input);
return error;
}
@@ -440,33 +492,90 @@ vnet_ip6_table_cmd (vlib_main_t * vm,
return (vnet_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP6));
}
-/* *INDENT-OFF* */
+clib_error_t *
+vnet_show_ip_table_cmd (vlib_main_t *vm, unformat_input_t *main_input,
+ vlib_cli_command_t *cmd, fib_protocol_t fproto)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ fib_table_t *fib, *fibs;
+ clib_error_t *error = NULL;
+ u32 table_id = ~0, fib_index;
+ /* Get a line of input. */
+ if (unformat_user (main_input, unformat_line_input, line_input))
+ {
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%d", &table_id))
+ ;
+ else
+ {
+ error = unformat_parse_error (line_input);
+ goto done;
+ }
+ }
+ unformat_free (line_input);
+ }
+
+ fibs = (fproto == FIB_PROTOCOL_IP4) ? ip4_main.fibs : ip6_main.fibs;
+
+ if (table_id != (u32) ~0)
+ {
+ fib_index = fib_table_find (fproto, table_id);
+ if (fib_index == (u32) ~0)
+ {
+ error = clib_error_return (0, "Couldn't find table with table_id %u",
+ table_id);
+ goto done;
+ }
+
+ fib = fib_table_get (fib_index, fproto);
+ vlib_cli_output (vm, "[%u] table_id:%u %v", fib->ft_index,
+ fib->ft_table_id, fib->ft_desc);
+ }
+ else
+ {
+ pool_foreach (fib, fibs)
+ vlib_cli_output (vm, "[%u] table_id:%u %v", fib->ft_index,
+ fib->ft_table_id, fib->ft_desc);
+ }
+
+done:
+ return error;
+}
+
+clib_error_t *
+vnet_show_ip4_table_cmd (vlib_main_t *vm, unformat_input_t *main_input,
+ vlib_cli_command_t *cmd)
+{
+ return (vnet_show_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP4));
+}
+
+clib_error_t *
+vnet_show_ip6_table_cmd (vlib_main_t *vm, unformat_input_t *main_input,
+ vlib_cli_command_t *cmd)
+{
+ return (vnet_show_ip_table_cmd (vm, main_input, cmd, FIB_PROTOCOL_IP6));
+}
+
VLIB_CLI_COMMAND (vlib_cli_ip_command, static) = {
.path = "ip",
.short_help = "Internet protocol (IP) commands",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_ip6_command, static) = {
.path = "ip6",
.short_help = "Internet protocol version 6 (IPv6) commands",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_show_ip_command, static) = {
.path = "show ip",
.short_help = "Internet protocol (IP) show commands",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = {
.path = "show ip6",
.short_help = "Internet protocol version 6 (IPv6) show commands",
};
-/* *INDENT-ON* */
/*?
* This command is used to add or delete IPv4 or IPv6 routes. All
@@ -495,43 +604,55 @@ VLIB_CLI_COMMAND (vlib_cli_show_ip6_command, static) = {
* To add a route to a particular FIB table (VRF), use:
* @cliexcmd{ip route add 172.16.24.0/24 table 7 via GigabitEthernet2/0/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip_route_command, static) = {
.path = "ip route",
- .short_help = "ip route [add|del] [count <n>] <dst-ip-addr>/<width> [table <table-id>] via [next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4 <interface>] [out-labels <value value value>]",
+ .short_help = "ip route [add|del] [count <n>] <dst-ip-addr>/<width> [table "
+ "<table-id>] via [next-hop-address] [next-hop-interface] "
+ "[next-hop-table <value>] [weight <value>] [preference "
+ "<value>] [udp-encap <value>] [ip4-lookup-in-table <value>] "
+ "[ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] "
+ "[resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 "
+ "<interface>] [out-labels <value value value>]",
.function = vnet_ip_route_cmd,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*?
* This command is used to add or delete IPv4 Tables. All
* Tables must be explicitly added before that can be used. Creating a
* table will add both unicast and multicast FIBs
*
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip4_table_command, static) = {
.path = "ip table",
.short_help = "ip table [add|del] <table-id>",
.function = vnet_ip4_table_cmd,
};
-/* *INDENT-ON* */
-/* *INDENT-ON* */
/*?
* This command is used to add or delete IPv4 Tables. All
* Tables must be explicitly added before that can be used. Creating a
* table will add both unicast and multicast FIBs
*
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_table_command, static) = {
.path = "ip6 table",
.short_help = "ip6 table [add|del] <table-id>",
.function = vnet_ip6_table_cmd,
};
+VLIB_CLI_COMMAND (show_ip4_table_command, static) = {
+ .path = "show ip table",
+ .short_help = "show ip table <table-id>",
+ .function = vnet_show_ip4_table_cmd,
+};
+
+VLIB_CLI_COMMAND (show_ip6_table_command, static) = {
+ .path = "show ip6 table",
+ .short_help = "show ip6 table <table-id>",
+ .function = vnet_show_ip6_table_cmd,
+};
+
static clib_error_t *
ip_table_bind_cmd (vlib_main_t * vm,
unformat_input_t * input,
@@ -561,7 +682,7 @@ ip_table_bind_cmd (vlib_main_t * vm,
goto done;
}
- rv = ip_table_bind (fproto, sw_if_index, table_id, 0);
+ rv = ip_table_bind (fproto, sw_if_index, table_id);
if (VNET_API_ERROR_ADDRESS_FOUND_FOR_INTERFACE == rv)
{
@@ -618,14 +739,12 @@ ip6_table_bind_cmd (vlib_main_t * vm,
* Example of how to add an interface to an IPv4 FIB table (where 2 is the table-id):
* @cliexcmd{set interface ip table GigabitEthernet2/0/0 2}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
{
.path = "set interface ip table",
.function = ip4_table_bind_cmd,
.short_help = "set interface ip table <interface> <table-id>",
};
-/* *INDENT-ON* */
/*?
* Place the indicated interface into the supplied IPv6 FIB table (also known
@@ -646,14 +765,12 @@ VLIB_CLI_COMMAND (set_interface_ip_table_command, static) =
* Example of how to add an interface to an IPv6 FIB table (where 2 is the table-id):
* @cliexcmd{set interface ip6 table GigabitEthernet2/0/0 2}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip6_table_command, static) =
{
.path = "set interface ip6 table",
.function = ip6_table_bind_cmd,
.short_help = "set interface ip6 table <interface> <table-id>"
};
-/* *INDENT-ON* */
clib_error_t *
vnet_ip_mroute_cmd (vlib_main_t * vm,
@@ -817,8 +934,8 @@ vnet_ip_mroute_cmd (vlib_main_t * vm,
mfib_table_entry_path_remove (fib_index,
&pfx, MFIB_SOURCE_CLI, rpaths);
else
- mfib_table_entry_path_update (fib_index,
- &pfx, MFIB_SOURCE_CLI, rpaths);
+ mfib_table_entry_path_update (fib_index, &pfx, MFIB_SOURCE_CLI,
+ MFIB_ENTRY_FLAG_NONE, rpaths);
}
if (FIB_PROTOCOL_IP4 == pfx.fp_proto)
@@ -890,7 +1007,6 @@ done:
* @cliexcmd{ip mroute add 232.1.1.1 Signal}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip_mroute_command, static) =
{
.path = "ip mroute",
@@ -898,7 +1014,6 @@ VLIB_CLI_COMMAND (ip_mroute_command, static) =
.function = vnet_ip_mroute_cmd,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/lookup.h b/src/vnet/ip/lookup.h
index 48ba468d7c2..8083d974df6 100644
--- a/src/vnet/ip/lookup.h
+++ b/src/vnet/ip/lookup.h
@@ -162,23 +162,22 @@ typedef struct ip_lookup_main_t
} ip_lookup_main_t;
u8 *format_ip_flow_hash_config (u8 * s, va_list * args);
-
+uword unformat_ip_flow_hash_config (unformat_input_t *input, va_list *args);
always_inline void
ip_lookup_set_buffer_fib_index (u32 * fib_index_by_sw_if_index,
vlib_buffer_t * b)
{
- /* *INDENT-OFF* */
vnet_buffer (b)->ip.fib_index =
vec_elt (fib_index_by_sw_if_index, vnet_buffer (b)->sw_if_index[VLIB_RX]);
vnet_buffer (b)->ip.fib_index =
((vnet_buffer (b)->sw_if_index[VLIB_TX] == (u32) ~ 0) ?
vnet_buffer (b)->ip.fib_index :
vnet_buffer (b)->sw_if_index[VLIB_TX]);
- /* *INDENT-ON* */
}
void ip_lookup_init (ip_lookup_main_t * lm, u32 ip_lookup_node_index);
+bool fib_prefix_validate (const fib_prefix_t *prefix);
#endif /* included_ip_lookup_h */
/*
diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c
index fb0cc221950..3c46549634a 100644
--- a/src/vnet/ip/punt.c
+++ b/src/vnet/ip/punt.c
@@ -148,14 +148,31 @@ punt_socket_register_l4 (vlib_main_t * vm,
punt_main_t *pm = &punt_main;
punt_client_t *c;
- /* For now we only support UDP punt */
- if (protocol != IP_PROTOCOL_UDP)
- return clib_error_return (0,
- "only UDP protocol (%d) is supported, got %d",
- IP_PROTOCOL_UDP, protocol);
-
if (port == (u16) ~ 0)
- return clib_error_return (0, "UDP port number required");
+ return clib_error_return (0, "Port number required");
+
+ u32 node_index;
+ switch (protocol)
+ {
+ case IP_PROTOCOL_UDP:
+ node_index = (af == AF_IP4 ? udp4_punt_socket_node.index :
+ udp6_punt_socket_node.index);
+ udp_register_dst_port (vm, port, node_index, af == AF_IP4);
+ break;
+ case IP_PROTOCOL_ICMP6:
+ if (af != AF_IP6)
+ return clib_error_return (
+ 0, "only UDP or ICMP6 protocol (%d, %d) is supported, got %d",
+ IP_PROTOCOL_UDP, IP_PROTOCOL_ICMP6, protocol);
+
+ node_index = icmp6_punt_socket_node.index;
+ icmp6_register_type (vm, port, node_index);
+ break;
+ default:
+ return clib_error_return (
+ 0, "only UDP or ICMP6 protocol (%d) is supported, got %d",
+ IP_PROTOCOL_UDP, protocol);
+ }
c = punt_client_l4_get (af, port);
@@ -165,19 +182,14 @@ punt_socket_register_l4 (vlib_main_t * vm,
punt_client_l4_db_add (af, port, c - pm->punt_client_pool);
}
- memcpy (c->caddr.sun_path, client_pathname, sizeof (c->caddr.sun_path));
+ snprintf (c->caddr.sun_path, sizeof (c->caddr.sun_path), "%s",
+ client_pathname);
c->caddr.sun_family = AF_UNIX;
c->reg.type = PUNT_TYPE_L4;
c->reg.punt.l4.port = port;
c->reg.punt.l4.protocol = protocol;
c->reg.punt.l4.af = af;
- u32 node_index = (af == AF_IP4 ?
- udp4_punt_socket_node.index :
- udp6_punt_socket_node.index);
-
- udp_register_dst_port (vm, port, node_index, af == AF_IP4);
-
return (NULL);
}
@@ -197,7 +209,8 @@ punt_socket_register_ip_proto (vlib_main_t * vm,
punt_client_ip_proto_db_add (af, proto, c - pm->punt_client_pool);
}
- memcpy (c->caddr.sun_path, client_pathname, sizeof (c->caddr.sun_path));
+ snprintf (c->caddr.sun_path, sizeof (c->caddr.sun_path), "%s",
+ client_pathname);
c->caddr.sun_family = AF_UNIX;
c->reg.type = PUNT_TYPE_IP_PROTO;
c->reg.punt.ip_proto.protocol = proto;
@@ -227,7 +240,8 @@ punt_socket_register_exception (vlib_main_t * vm,
punt_client_exception_db_add (reason, pc - pm->punt_client_pool);
}
- memcpy (pc->caddr.sun_path, client_pathname, sizeof (pc->caddr.sun_path));
+ snprintf (pc->caddr.sun_path, sizeof (pc->caddr.sun_path), "%s",
+ client_pathname);
pc->caddr.sun_family = AF_UNIX;
pc->reg.type = PUNT_TYPE_EXCEPTION;
pc->reg.punt.exception.reason = reason;
@@ -369,6 +383,8 @@ punt_l4_add_del (vlib_main_t * vm,
ip_address_family_t af,
ip_protocol_t protocol, u16 port, bool is_add)
{
+ int is_ip4 = af == AF_IP4;
+
/* For now we only support TCP and UDP punt */
if (protocol != IP_PROTOCOL_UDP && protocol != IP_PROTOCOL_TCP)
return clib_error_return (0,
@@ -378,19 +394,22 @@ punt_l4_add_del (vlib_main_t * vm,
if (port == (u16) ~ 0)
{
if (protocol == IP_PROTOCOL_UDP)
- udp_punt_unknown (vm, af == AF_IP4, is_add);
+ udp_punt_unknown (vm, is_ip4, is_add);
else if (protocol == IP_PROTOCOL_TCP)
- tcp_punt_unknown (vm, af == AF_IP4, is_add);
+ tcp_punt_unknown (vm, is_ip4, is_add);
return 0;
}
else if (is_add)
{
+ const vlib_node_registration_t *punt_node =
+ is_ip4 ? &udp4_punt_node : &udp6_punt_node;
+
if (protocol == IP_PROTOCOL_TCP)
return clib_error_return (0, "punt TCP ports is not supported yet");
- udp_register_dst_port (vm, port, udp4_punt_node.index, af == AF_IP4);
+ udp_register_dst_port (vm, port, punt_node->index, is_ip4);
return 0;
}
@@ -399,7 +418,7 @@ punt_l4_add_del (vlib_main_t * vm,
if (protocol == IP_PROTOCOL_TCP)
return clib_error_return (0, "punt TCP ports is not supported yet");
- udp_unregister_dst_port (vm, port, af == AF_IP4);
+ udp_unregister_dst_port (vm, port, is_ip4);
return 0;
}
@@ -455,7 +474,6 @@ punt_cli (vlib_main_t * vm,
unformat_input_t line_input, *input = &line_input;
clib_error_t *error = NULL;
bool is_add = true;
- /* *INDENT-OFF* */
punt_reg_t pr = {
.punt = {
.l4 = {
@@ -467,7 +485,6 @@ punt_cli (vlib_main_t * vm,
.type = PUNT_TYPE_L4,
};
u32 port;
- /* *INDENT-ON* */
if (!unformat_user (input__, unformat_line_input, input))
return 0;
@@ -533,13 +550,11 @@ done:
* @cliexcmd{set punt udp del all}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_command, static) = {
.path = "set punt",
.short_help = "set punt [IPV4|ip6|ipv6] [UDP|tcp] [del] [ALL|<port-num>]",
.function = punt_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
punt_socket_register_cmd (vlib_main_t * vm,
@@ -549,7 +564,6 @@ punt_socket_register_cmd (vlib_main_t * vm,
unformat_input_t line_input, *input = &line_input;
u8 *socket_name = 0;
clib_error_t *error = NULL;
- /* *INDENT-OFF* */
punt_reg_t pr = {
.punt = {
.l4 = {
@@ -560,7 +574,6 @@ punt_socket_register_cmd (vlib_main_t * vm,
},
.type = PUNT_TYPE_L4,
};
- /* *INDENT-ON* */
if (!unformat_user (input__, unformat_line_input, input))
return 0;
@@ -608,7 +621,6 @@ done:
* @cliexcmd{punt socket register socket punt_l4_foo.sock}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_socket_register_command, static) =
{
.path = "punt socket register",
@@ -616,7 +628,6 @@ VLIB_CLI_COMMAND (punt_socket_register_command, static) =
.short_help = "punt socket register [IPV4|ipv6] [UDP|tcp] [ALL|<port-num>] socket <socket>",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static clib_error_t *
punt_socket_deregister_cmd (vlib_main_t * vm,
@@ -625,7 +636,6 @@ punt_socket_deregister_cmd (vlib_main_t * vm,
{
unformat_input_t line_input, *input = &line_input;
clib_error_t *error = NULL;
- /* *INDENT-OFF* */
punt_reg_t pr = {
.punt = {
.l4 = {
@@ -636,7 +646,6 @@ punt_socket_deregister_cmd (vlib_main_t * vm,
},
.type = PUNT_TYPE_L4,
};
- /* *INDENT-ON* */
if (!unformat_user (input__, unformat_line_input, input))
return 0;
@@ -677,7 +686,6 @@ done:
* @cliexpar
* @cliexcmd{punt socket register}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (punt_socket_deregister_command, static) =
{
.path = "punt socket deregister",
@@ -685,7 +693,6 @@ VLIB_CLI_COMMAND (punt_socket_deregister_command, static) =
.short_help = "punt socket deregister [IPV4|ipv6] [UDP|tcp] [ALL|<port-num>]",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
void
punt_client_walk (punt_type_t pt, punt_client_walk_cb_t cb, void *ctx)
@@ -698,24 +705,20 @@ punt_client_walk (punt_type_t pt, punt_client_walk_cb_t cb, void *ctx)
{
u32 pci, key;
- /* *INDENT-OFF* */
hash_foreach(key, pci, pm->db.clients_by_l4_port,
({
cb (pool_elt_at_index(pm->punt_client_pool, pci), ctx);
}));
- /* *INDENT-ON* */
break;
}
case PUNT_TYPE_IP_PROTO:
{
u32 pci, key;
- /* *INDENT-OFF* */
hash_foreach(key, pci, pm->db.clients_by_ip_proto,
({
cb (pool_elt_at_index(pm->punt_client_pool, pci), ctx);
}));
- /* *INDENT-ON* */
break;
}
case PUNT_TYPE_EXCEPTION:
@@ -813,7 +816,6 @@ done:
* @cliexpar
* @cliexcmd{show punt socket ipv4}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_punt_socket_registration_command, static) =
{
.path = "show punt socket registrations",
@@ -821,7 +823,6 @@ VLIB_CLI_COMMAND (show_punt_socket_registration_command, static) =
.short_help = "show punt socket registrations [l4|exception]",
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
clib_error_t *
ip_punt_init (vlib_main_t * vm)
diff --git a/src/vnet/ip/punt.h b/src/vnet/ip/punt.h
index a2612d60f07..e8495caad61 100644
--- a/src/vnet/ip/punt.h
+++ b/src/vnet/ip/punt.h
@@ -20,7 +20,12 @@
#ifndef included_punt_h
#define included_punt_h
+#ifdef __linux__
#include <linux/un.h>
+#elif __FreeBSD__
+#include <sys/un.h>
+#define UNIX_PATH_MAX SUNPATHLEN
+#endif /* __linux__ */
#include <stdbool.h>
#include <vnet/ip/ip.h>
@@ -239,6 +244,7 @@ extern vlib_node_registration_t udp4_punt_node;
extern vlib_node_registration_t udp6_punt_node;
extern vlib_node_registration_t udp4_punt_socket_node;
extern vlib_node_registration_t udp6_punt_socket_node;
+extern vlib_node_registration_t icmp6_punt_socket_node;
extern vlib_node_registration_t ip4_proto_punt_socket_node;
extern vlib_node_registration_t ip6_proto_punt_socket_node;
extern vlib_node_registration_t punt_socket_rx_node;
diff --git a/src/vnet/ip/punt_api.c b/src/vnet/ip/punt_api.c
index bcbf939f69d..20297af2e75 100644
--- a/src/vnet/ip/punt_api.c
+++ b/src/vnet/ip/punt_api.c
@@ -224,12 +224,10 @@ vl_api_punt_socket_register_t_handler (vl_api_punt_socket_register_t * mp)
char *p = vnet_punt_get_server_pathname ();
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_PUNT_SOCKET_REGISTER_REPLY,
({
memcpy ((char *) rmp->pathname, p, sizeof (rmp->pathname));
}));
- /* *INDENT-ON* */
}
typedef struct punt_socket_send_ctx_t_
diff --git a/src/vnet/ip/punt_node.c b/src/vnet/ip/punt_node.c
index 7f9beef0ffe..6400e49c626 100644
--- a/src/vnet/ip/punt_node.c
+++ b/src/vnet/ip/punt_node.c
@@ -23,6 +23,7 @@
*/
#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
#include <vlib/vlib.h>
#include <vnet/ip/punt.h>
#include <vlib/unix/unix.h>
@@ -182,7 +183,6 @@ VLIB_NODE_FN (udp6_punt_node) (vlib_main_t * vm,
return udp46_punt_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_punt_node) = {
.name = "ip4-udp-punt",
/* Takes a vector of packets. */
@@ -214,7 +214,6 @@ VLIB_REGISTER_NODE (udp6_punt_node) = {
#undef _
},
};
-/* *INDENT-ON* */
typedef struct
{
@@ -243,10 +242,9 @@ format_udp_punt_trace (u8 * s, va_list * args)
}
always_inline uword
-punt_socket_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame,
- punt_type_t pt, ip_address_family_t af)
+punt_socket_inline2 (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, punt_type_t pt,
+ ip_address_family_t af, ip_protocol_t protocol)
{
u32 *buffers = vlib_frame_vector_args (frame);
u32 thread_index = vm->thread_index;
@@ -266,33 +264,42 @@ punt_socket_inline (vlib_main_t * vm,
uword l;
punt_packetdesc_t packetdesc;
punt_client_t *c;
-
+ u16 port = 0;
b = vlib_get_buffer (vm, buffers[i]);
if (PUNT_TYPE_L4 == pt)
{
- /* Reverse UDP Punt advance */
- udp_header_t *udp;
- if (AF_IP4 == af)
+ if (protocol == IP_PROTOCOL_UDP)
{
- vlib_buffer_advance (b, -(sizeof (ip4_header_t) +
- sizeof (udp_header_t)));
- ip4_header_t *ip = vlib_buffer_get_current (b);
- udp = (udp_header_t *) (ip + 1);
+ /* Reverse UDP Punt advance */
+ udp_header_t *udp;
+ if (AF_IP4 == af)
+ {
+ vlib_buffer_advance (
+ b, -(sizeof (ip4_header_t) + sizeof (udp_header_t)));
+ ip4_header_t *ip = vlib_buffer_get_current (b);
+ udp = (udp_header_t *) (ip + 1);
+ }
+ else
+ {
+ vlib_buffer_advance (
+ b, -(sizeof (ip6_header_t) + sizeof (udp_header_t)));
+ ip6_header_t *ip = vlib_buffer_get_current (b);
+ udp = (udp_header_t *) (ip + 1);
+ }
+ port = clib_net_to_host_u16 (udp->dst_port);
}
- else
+ else if (protocol == IP_PROTOCOL_ICMP6)
{
- vlib_buffer_advance (b, -(sizeof (ip6_header_t) +
- sizeof (udp_header_t)));
ip6_header_t *ip = vlib_buffer_get_current (b);
- udp = (udp_header_t *) (ip + 1);
+ icmp46_header_t *icmp = ip6_next_header (ip);
+ port = icmp->type;
}
-
/*
* Find registerered client
* If no registered client, drop packet and count
*/
- c = punt_client_l4_get (af, clib_net_to_host_u16 (udp->dst_port));
+ c = punt_client_l4_get (af, port);
}
else if (PUNT_TYPE_IP_PROTO == pt)
{
@@ -339,7 +346,7 @@ punt_socket_inline (vlib_main_t * vm,
iov->iov_len = sizeof (packetdesc);
/** VLIB buffer chain -> Unix iovec(s). */
- vlib_buffer_advance (b, -(sizeof (ethernet_header_t)));
+ vlib_buffer_advance (b, -ethernet_buffer_header_size (b));
vec_add2 (ptd->iovecs, iov, 1);
iov->iov_base = b->data + b->current_data;
iov->iov_len = l = b->current_length;
@@ -396,6 +403,14 @@ error:
return n_packets;
}
+always_inline uword
+punt_socket_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, punt_type_t pt,
+ ip_address_family_t af)
+{
+ return punt_socket_inline2 (vm, node, frame, pt, af, IP_PROTOCOL_UDP);
+}
+
static uword
udp4_punt_socket (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * from_frame)
@@ -427,6 +442,14 @@ ip6_proto_punt_socket (vlib_main_t * vm,
}
static uword
+icmp6_punt_socket (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *from_frame)
+{
+ return punt_socket_inline2 (vm, node, from_frame, PUNT_TYPE_L4, AF_IP6,
+ IP_PROTOCOL_ICMP6);
+}
+
+static uword
exception_punt_socket (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * from_frame)
{
@@ -435,7 +458,6 @@ exception_punt_socket (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_punt_socket_node) = {
.function = udp4_punt_socket,
.name = "ip4-udp-punt-socket",
@@ -483,7 +505,16 @@ VLIB_REGISTER_NODE (exception_punt_socket_node) = {
.n_errors = PUNT_N_ERROR,
.error_strings = punt_error_strings,
};
-/* *INDENT-ON* */
+VLIB_REGISTER_NODE (icmp6_punt_socket_node) = {
+ .function = icmp6_punt_socket,
+ .name = "ip6-icmp-punt-socket",
+ .format_trace = format_udp_punt_trace,
+ .flags = VLIB_NODE_FLAG_IS_DROP,
+ .vector_size = sizeof (u32),
+ .n_errors = PUNT_N_ERROR,
+ .error_strings = punt_error_strings,
+};
+
typedef struct
{
@@ -614,7 +645,6 @@ punt_socket_rx (vlib_main_t * vm,
return total_count;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (punt_socket_rx_node) =
{
.function = punt_socket_rx,
@@ -633,7 +663,6 @@ VLIB_REGISTER_NODE (punt_socket_rx_node) =
},
.format_trace = format_punt_trace,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip/reass/ip4_full_reass.c b/src/vnet/ip/reass/ip4_full_reass.c
index fda73a43a1b..bab7d479dcf 100644
--- a/src/vnet/ip/reass/ip4_full_reass.c
+++ b/src/vnet/ip/reass/ip4_full_reass.c
@@ -23,16 +23,21 @@
#include <vppinfra/vec.h>
#include <vnet/vnet.h>
#include <vnet/ip/ip.h>
+#include <vnet/ip/ip.api_enum.h>
#include <vppinfra/fifo.h>
#include <vppinfra/bihash_16_8.h>
#include <vnet/ip/reass/ip4_full_reass.h>
#include <stddef.h>
#define MSEC_PER_SEC 1000
-#define IP4_REASS_TIMEOUT_DEFAULT_MS 100
-#define IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default
+#define IP4_REASS_TIMEOUT_DEFAULT_MS 200
+
+/* As there are only 1024 reass context per thread, either the DDOS attacks
+ * or fractions of real timeouts, would consume these contexts quickly and
+ * running out context space and unable to perform reassembly */
+#define IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 50 // 50 ms default
#define IP4_REASS_MAX_REASSEMBLIES_DEFAULT 1024
-#define IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
+#define IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
#define IP4_REASS_HT_LOAD_FACTOR (0.75)
#define IP4_REASS_DEBUG_BUFFERS 0
@@ -68,21 +73,19 @@ typedef enum
typedef struct
{
- union
+ struct
{
- struct
- {
- u32 xx_id;
- ip4_address_t src;
- ip4_address_t dst;
- u16 frag_id;
- u8 proto;
- u8 unused;
- };
- u64 as_u64[2];
+ u16 frag_id;
+ u8 proto;
+ u8 unused;
+ u32 fib_index;
+ ip4_address_t src;
+ ip4_address_t dst;
};
} ip4_full_reass_key_t;
+STATIC_ASSERT_SIZEOF (ip4_full_reass_key_t, 16);
+
typedef union
{
struct
@@ -155,6 +158,8 @@ typedef struct
ip4_full_reass_t *pool;
u32 reass_n;
u32 id_counter;
+ // for pacing the main thread timeouts
+ u32 last_id;
clib_spinlock_t lock;
} ip4_full_reass_per_thread_t;
@@ -177,17 +182,19 @@ typedef struct
// convenience
vlib_main_t *vlib_main;
- // node index of ip4-drop node
- u32 ip4_drop_idx;
u32 ip4_full_reass_expire_node_idx;
/** Worker handoff */
u32 fq_index;
+ u32 fq_local_index;
u32 fq_feature_index;
u32 fq_custom_index;
// reference count for enabling/disabling feature - per interface
u32 *feature_use_refcount_per_intf;
+
+ // whether local fragmented packets are reassembled or not
+ int is_local_reass_enabled;
} ip4_full_reass_main_t;
extern ip4_full_reass_main_t ip4_full_reass_main;
@@ -219,6 +226,7 @@ typedef enum
RANGE_OVERLAP,
FINALIZE,
HANDOFF,
+ PASSTHROUGH,
} ip4_full_reass_trace_operation_e;
typedef struct
@@ -329,13 +337,15 @@ format_ip4_full_reass_trace (u8 * s, va_list * args)
format (s, "handoff from thread #%u to thread #%u", t->thread_id,
t->thread_id_to);
break;
+ case PASSTHROUGH:
+ s = format (s, "passthrough - not a fragment");
+ break;
}
return s;
}
static void
ip4_full_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip4_full_reass_main_t * rm,
ip4_full_reass_t * reass, u32 bi,
ip4_full_reass_trace_operation_e action,
u32 size_diff, u32 thread_id_to)
@@ -405,73 +415,121 @@ ip4_full_reass_free (ip4_full_reass_main_t * rm,
ip4_full_reass_per_thread_t * rt,
ip4_full_reass_t * reass)
{
- clib_bihash_kv_16_8_t kv;
- kv.key[0] = reass->key.as_u64[0];
- kv.key[1] = reass->key.as_u64[1];
+ clib_bihash_kv_16_8_t kv = {};
+ clib_memcpy_fast (&kv, &reass->key, sizeof (kv.key));
clib_bihash_add_del_16_8 (&rm->hash, &kv, 0);
return ip4_full_reass_free_ctx (rt, reass);
}
+/* n_left_to_next, and to_next are taken as input params, as this function
+ * could be called from a graphnode, where its managing local copy of these
+ * variables, and ignoring those and still trying to enqueue the buffers
+ * with local variables would cause either buffer leak or corruption */
always_inline void
-ip4_full_reass_drop_all (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip4_full_reass_main_t * rm, ip4_full_reass_t * reass)
+ip4_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip4_full_reass_t *reass)
{
u32 range_bi = reass->first_bi;
vlib_buffer_t *range_b;
vnet_buffer_opaque_t *range_vnb;
u32 *to_free = NULL;
+
while (~0 != range_bi)
{
range_b = vlib_get_buffer (vm, range_bi);
range_vnb = vnet_buffer (range_b);
- u32 bi = range_bi;
- while (~0 != bi)
+
+ if (~0 != range_bi)
{
- vec_add1 (to_free, bi);
- vlib_buffer_t *b = vlib_get_buffer (vm, bi);
- if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
- {
- bi = b->next_buffer;
- b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
- }
- else
- {
- bi = ~0;
- }
+ vec_add1 (to_free, range_bi);
}
+
range_bi = range_vnb->ip.reass.next_range_bi;
}
+
/* send to next_error_index */
- if (~0 != reass->error_next_index)
+ if (~0 != reass->error_next_index &&
+ reass->error_next_index < node->n_next_nodes)
+ {
+ u32 n_free = vec_len (to_free);
+
+ /* record number of packets sent to custom app */
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_TO_CUSTOM_APP, n_free);
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ for (u32 i = 0; i < n_free; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, to_free[i]);
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
+ ip4_full_reass_add_trace (vm, node, reass, to_free[i],
+ RANGE_DISCARD, 0, ~0);
+ }
+
+ vlib_buffer_enqueue_to_single_next (vm, node, to_free,
+ reass->error_next_index, n_free);
+ }
+ else
{
- u32 n_left_to_next, *to_next, next_index;
+ vlib_buffer_free (vm, to_free, vec_len (to_free));
+ }
+ vec_free (to_free);
+}
- next_index = reass->error_next_index;
- u32 bi = ~0;
+always_inline void
+sanitize_reass_buffers_add_missing (vlib_main_t *vm, ip4_full_reass_t *reass,
+ u32 *bi0)
+{
+ u32 range_bi = reass->first_bi;
+ vlib_buffer_t *range_b;
+ vnet_buffer_opaque_t *range_vnb;
- while (vec_len (to_free) > 0)
+ while (~0 != range_bi)
+ {
+ range_b = vlib_get_buffer (vm, range_bi);
+ range_vnb = vnet_buffer (range_b);
+ u32 bi = range_bi;
+ if (~0 != bi)
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (vec_len (to_free) > 0 && n_left_to_next > 0)
+ if (bi == *bi0)
+ *bi0 = ~0;
+ if (range_b->flags & VLIB_BUFFER_NEXT_PRESENT)
{
- bi = vec_pop (to_free);
-
- if (~0 != bi)
+ u32 _bi = bi;
+ vlib_buffer_t *_b = vlib_get_buffer (vm, _bi);
+ while (_b->flags & VLIB_BUFFER_NEXT_PRESENT)
{
- to_next[0] = bi;
- to_next += 1;
- n_left_to_next -= 1;
+ if (_b->next_buffer != range_vnb->ip.reass.next_range_bi)
+ {
+ _bi = _b->next_buffer;
+ _b = vlib_get_buffer (vm, _bi);
+ }
+ else
+ {
+ _b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+ break;
+ }
}
}
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ range_bi = range_vnb->ip.reass.next_range_bi;
}
}
- else
+ if (*bi0 != ~0)
{
- vlib_buffer_free (vm, to_free, vec_len (to_free));
+ vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
+ vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
+ if (~0 != reass->first_bi)
+ {
+ fvnb->ip.reass.next_range_bi = reass->first_bi;
+ reass->first_bi = *bi0;
+ }
+ else
+ {
+ reass->first_bi = *bi0;
+ fvnb->ip.reass.next_range_bi = ~0;
+ }
+ *bi0 = ~0;
}
- vec_free (to_free);
}
always_inline void
@@ -485,10 +543,10 @@ ip4_full_reass_init (ip4_full_reass_t * reass)
}
always_inline ip4_full_reass_t *
-ip4_full_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip4_full_reass_main_t * rm,
- ip4_full_reass_per_thread_t * rt,
- ip4_full_reass_kv_t * kv, u8 * do_handoff)
+ip4_full_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip4_full_reass_main_t *rm,
+ ip4_full_reass_per_thread_t *rt,
+ ip4_full_reass_kv_t *kv, u8 *do_handoff)
{
ip4_full_reass_t *reass;
f64 now;
@@ -511,7 +569,9 @@ again:
if (now > reass->last_heard + rm->timeout)
{
- ip4_full_reass_drop_all (vm, node, rm, reass);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_TIMEOUT, 1);
+ ip4_full_reass_drop_all (vm, node, reass);
ip4_full_reass_free (rm, rt, reass);
reass = NULL;
}
@@ -539,8 +599,7 @@ again:
++rt->reass_n;
}
- reass->key.as_u64[0] = kv->kv.key[0];
- reass->key.as_u64[1] = kv->kv.key[1];
+ clib_memcpy_fast (&reass->key, &kv->kv.key, sizeof (reass->key));
kv->v.reass_index = (reass - rt->pool);
kv->v.memory_owner_thread_index = vm->thread_index;
reass->last_heard = now;
@@ -569,7 +628,6 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_t *last_b = NULL;
u32 sub_chain_bi = reass->first_bi;
u32 total_length = 0;
- u32 buf_cnt = 0;
do
{
u32 tmp_bi = sub_chain_bi;
@@ -606,7 +664,6 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end;
while (1)
{
- ++buf_cnt;
if (trim_front)
{
if (trim_front > tmp->current_length)
@@ -717,8 +774,8 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
first_b->flags &= ~VLIB_BUFFER_EXT_HDR_VALID;
if (PREDICT_FALSE (first_b->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_full_reass_add_trace (vm, node, rm, reass, reass->first_bi,
- FINALIZE, 0, ~0);
+ ip4_full_reass_add_trace (vm, node, reass, reass->first_bi, FINALIZE, 0,
+ ~0);
#if 0
// following code does a hexdump of packet fragments to stdout ...
do
@@ -756,6 +813,16 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
*next0 = reass->next_index;
}
vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length;
+
+ /* Keep track of number of successfully reassembled packets and number of
+ * fragments reassembled */
+ vlib_node_increment_counter (vm, node->node_index, IP4_ERROR_REASS_SUCCESS,
+ 1);
+
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_FRAGMENTS_REASSEMBLED,
+ reass->fragments_n);
+
*error0 = IP4_ERROR_NONE;
ip4_full_reass_free (rm, rt, reass);
reass = NULL;
@@ -764,8 +831,6 @@ ip4_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
always_inline ip4_full_reass_rc_t
ip4_full_reass_insert_range_in_chain (vlib_main_t * vm,
- ip4_full_reass_main_t * rm,
- ip4_full_reass_per_thread_t * rt,
ip4_full_reass_t * reass,
u32 prev_range_bi, u32 new_next_bi)
{
@@ -799,7 +864,6 @@ ip4_full_reass_insert_range_in_chain (vlib_main_t * vm,
always_inline ip4_full_reass_rc_t
ip4_full_reass_remove_range_from_chain (vlib_main_t * vm,
vlib_node_runtime_t * node,
- ip4_full_reass_main_t * rm,
ip4_full_reass_t * reass,
u32 prev_range_bi, u32 discard_bi)
{
@@ -831,8 +895,8 @@ ip4_full_reass_remove_range_from_chain (vlib_main_t * vm,
u32 to_be_freed_bi = discard_bi;
if (PREDICT_FALSE (discard_b->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_full_reass_add_trace (vm, node, rm, reass, discard_bi,
- RANGE_DISCARD, 0, ~0);
+ ip4_full_reass_add_trace (vm, node, reass, discard_bi, RANGE_DISCARD,
+ 0, ~0);
}
if (discard_b->flags & VLIB_BUFFER_NEXT_PRESENT)
{
@@ -890,16 +954,14 @@ ip4_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
{
// starting a new reassembly
rc =
- ip4_full_reass_insert_range_in_chain (vm, rm, rt, reass,
- prev_range_bi, *bi0);
+ ip4_full_reass_insert_range_in_chain (vm, reass, prev_range_bi, *bi0);
if (IP4_REASS_RC_OK != rc)
{
return rc;
}
if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_full_reass_add_trace (vm, node, rm, reass, *bi0, RANGE_NEW, 0,
- ~0);
+ ip4_full_reass_add_trace (vm, node, reass, *bi0, RANGE_NEW, 0, ~0);
}
*bi0 = ~0;
reass->min_fragment_length = clib_net_to_host_u16 (fip->length);
@@ -922,9 +984,8 @@ ip4_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
~0 == candidate_range_bi)
{
// special case - this fragment falls beyond all known ranges
- rc =
- ip4_full_reass_insert_range_in_chain (vm, rm, rt, reass,
- prev_range_bi, *bi0);
+ rc = ip4_full_reass_insert_range_in_chain (vm, reass,
+ prev_range_bi, *bi0);
if (IP4_REASS_RC_OK != rc)
{
return rc;
@@ -937,9 +998,8 @@ ip4_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
if (fragment_last < candidate_vnb->ip.reass.range_first)
{
// this fragment ends before candidate range without any overlap
- rc =
- ip4_full_reass_insert_range_in_chain (vm, rm, rt, reass,
- prev_range_bi, *bi0);
+ rc = ip4_full_reass_insert_range_in_chain (vm, reass, prev_range_bi,
+ *bi0);
if (IP4_REASS_RC_OK != rc)
{
return rc;
@@ -954,7 +1014,7 @@ ip4_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
// this fragment is a (sub)part of existing range, ignore it
if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_full_reass_add_trace (vm, node, rm, reass, *bi0,
+ ip4_full_reass_add_trace (vm, node, reass, *bi0,
RANGE_OVERLAP, 0, ~0);
}
break;
@@ -974,14 +1034,12 @@ ip4_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
reass->data_len -= overlap;
if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_full_reass_add_trace (vm, node, rm, reass,
+ ip4_full_reass_add_trace (vm, node, reass,
candidate_range_bi,
RANGE_SHRINK, 0, ~0);
}
- rc =
- ip4_full_reass_insert_range_in_chain (vm, rm, rt, reass,
- prev_range_bi,
- *bi0);
+ rc = ip4_full_reass_insert_range_in_chain (
+ vm, reass, prev_range_bi, *bi0);
if (IP4_REASS_RC_OK != rc)
{
return rc;
@@ -1010,11 +1068,8 @@ ip4_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
else
{
// special case - last range discarded
- rc =
- ip4_full_reass_insert_range_in_chain (vm, rm, rt,
- reass,
- candidate_range_bi,
- *bi0);
+ rc = ip4_full_reass_insert_range_in_chain (
+ vm, reass, candidate_range_bi, *bi0);
if (IP4_REASS_RC_OK != rc)
{
return rc;
@@ -1035,10 +1090,8 @@ ip4_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
{
u32 next_range_bi = candidate_vnb->ip.reass.next_range_bi;
// discard candidate range, probe next range
- rc =
- ip4_full_reass_remove_range_from_chain (vm, node, rm, reass,
- prev_range_bi,
- candidate_range_bi);
+ rc = ip4_full_reass_remove_range_from_chain (
+ vm, node, reass, prev_range_bi, candidate_range_bi);
if (IP4_REASS_RC_OK != rc)
{
return rc;
@@ -1051,10 +1104,8 @@ ip4_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
else
{
// special case - last range discarded
- rc =
- ip4_full_reass_insert_range_in_chain (vm, rm, rt, reass,
- prev_range_bi,
- *bi0);
+ rc = ip4_full_reass_insert_range_in_chain (
+ vm, reass, prev_range_bi, *bi0);
if (IP4_REASS_RC_OK != rc)
{
return rc;
@@ -1070,8 +1121,7 @@ ip4_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
{
if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_full_reass_add_trace (vm, node, rm, reass, *bi0, RANGE_NEW, 0,
- ~0);
+ ip4_full_reass_add_trace (vm, node, reass, *bi0, RANGE_NEW, 0, ~0);
}
}
if (~0 != reass->last_packet_octet &&
@@ -1108,201 +1158,216 @@ ip4_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
}
always_inline uword
-ip4_full_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, ip4_full_reass_node_type_t type)
+ip4_full_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, ip4_full_reass_node_type_t type,
+ bool is_local)
{
u32 *from = vlib_frame_vector_args (frame);
- u32 n_left_from, n_left_to_next, *to_next, next_index;
+ u32 n_left, n_next = 0, to_next[VLIB_FRAME_SIZE];
ip4_full_reass_main_t *rm = &ip4_full_reass_main;
ip4_full_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
+ u16 nexts[VLIB_FRAME_SIZE];
+
clib_spinlock_lock (&rt->lock);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
- while (n_left_from > 0)
+ n_left = frame->n_vectors;
+ while (n_left > 0)
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u32 next0;
+ u32 error0 = IP4_ERROR_NONE;
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- vlib_buffer_t *b0;
- u32 next0;
- u32 error0 = IP4_ERROR_NONE;
+ bi0 = from[0];
+ b0 = vlib_get_buffer (vm, bi0);
- bi0 = from[0];
- b0 = vlib_get_buffer (vm, bi0);
-
- ip4_header_t *ip0 = vlib_buffer_get_current (b0);
- if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
+ ip4_header_t *ip0 = vlib_buffer_get_current (b0);
+ if (!ip4_get_fragment_more (ip0) && !ip4_get_fragment_offset (ip0))
+ {
+ // this is a whole packet - no fragmentation
+ if (CUSTOM != type)
{
- // this is a whole packet - no fragmentation
- if (CUSTOM != type)
- {
- next0 = IP4_FULL_REASS_NEXT_INPUT;
- }
- else
- {
- next0 = vnet_buffer (b0)->ip.reass.next_index;
- }
- goto packet_enqueue;
+ next0 = IP4_FULL_REASS_NEXT_INPUT;
}
- const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
- const u32 fragment_length =
- clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
- const u32 fragment_last = fragment_first + fragment_length - 1;
- if (fragment_first > fragment_last || fragment_first + fragment_length > UINT16_MAX - 20 || (fragment_length < 8 && ip4_get_fragment_more (ip0))) // 8 is minimum frag length per RFC 791
+ else
{
- next0 = IP4_FULL_REASS_NEXT_DROP;
- error0 = IP4_ERROR_REASS_MALFORMED_PACKET;
- goto packet_enqueue;
+ next0 = vnet_buffer (b0)->ip.reass.next_index;
}
- ip4_full_reass_kv_t kv;
- u8 do_handoff = 0;
-
- kv.k.as_u64[0] =
- (u64) vec_elt (ip4_main.fib_index_by_sw_if_index,
- vnet_buffer (b0)->sw_if_index[VLIB_RX]) |
- (u64) ip0->src_address.as_u32 << 32;
- kv.k.as_u64[1] =
- (u64) ip0->dst_address.
- as_u32 | (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48;
-
- ip4_full_reass_t *reass =
- ip4_full_reass_find_or_create (vm, node, rm, rt, &kv,
- &do_handoff);
-
- if (reass)
+ ip4_full_reass_add_trace (vm, node, NULL, bi0, PASSTHROUGH, 0, ~0);
+ goto packet_enqueue;
+ }
+
+ if (is_local && !rm->is_local_reass_enabled)
+ {
+ next0 = IP4_FULL_REASS_NEXT_DROP;
+ goto packet_enqueue;
+ }
+
+ const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
+ const u32 fragment_length =
+ clib_net_to_host_u16 (ip0->length) - ip4_header_bytes (ip0);
+ const u32 fragment_last = fragment_first + fragment_length - 1;
+
+ /* Keep track of received fragments */
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_FRAGMENTS_RCVD, 1);
+
+ if (fragment_first > fragment_last ||
+ fragment_first + fragment_length > UINT16_MAX - 20 ||
+ (fragment_length < 8 && // 8 is minimum frag length per RFC 791
+ ip4_get_fragment_more (ip0)))
+ {
+ next0 = IP4_FULL_REASS_NEXT_DROP;
+ error0 = IP4_ERROR_REASS_MALFORMED_PACKET;
+ goto packet_enqueue;
+ }
+
+ u32 fib_index = (vnet_buffer (b0)->sw_if_index[VLIB_TX] == (u32) ~0) ?
+ vec_elt (ip4_main.fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]) :
+ vnet_buffer (b0)->sw_if_index[VLIB_TX];
+
+ ip4_full_reass_kv_t kv = { .k.fib_index = fib_index,
+ .k.src.as_u32 = ip0->src_address.as_u32,
+ .k.dst.as_u32 = ip0->dst_address.as_u32,
+ .k.frag_id = ip0->fragment_id,
+ .k.proto = ip0->protocol
+
+ };
+ u8 do_handoff = 0;
+
+ ip4_full_reass_t *reass =
+ ip4_full_reass_find_or_create (vm, node, rm, rt, &kv, &do_handoff);
+
+ if (reass)
+ {
+ const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
+ if (0 == fragment_first)
{
- const u32 fragment_first = ip4_get_fragment_offset_bytes (ip0);
- if (0 == fragment_first)
- {
- reass->sendout_thread_index = vm->thread_index;
- }
+ reass->sendout_thread_index = vm->thread_index;
}
+ }
- if (PREDICT_FALSE (do_handoff))
+ if (PREDICT_FALSE (do_handoff))
+ {
+ next0 = IP4_FULL_REASS_NEXT_HANDOFF;
+ vnet_buffer (b0)->ip.reass.owner_thread_index =
+ kv.v.memory_owner_thread_index;
+ }
+ else if (reass)
+ {
+ u32 handoff_thread_idx;
+ u32 counter = ~0;
+ switch (ip4_full_reass_update (vm, node, rm, rt, reass, &bi0, &next0,
+ &error0, CUSTOM == type,
+ &handoff_thread_idx))
{
+ case IP4_REASS_RC_OK:
+ /* nothing to do here */
+ break;
+ case IP4_REASS_RC_HANDOFF:
next0 = IP4_FULL_REASS_NEXT_HANDOFF;
+ b0 = vlib_get_buffer (vm, bi0);
vnet_buffer (b0)->ip.reass.owner_thread_index =
- kv.v.memory_owner_thread_index;
- }
- else if (reass)
- {
- u32 handoff_thread_idx;
- switch (ip4_full_reass_update
- (vm, node, rm, rt, reass, &bi0, &next0,
- &error0, CUSTOM == type, &handoff_thread_idx))
- {
- case IP4_REASS_RC_OK:
- /* nothing to do here */
- break;
- case IP4_REASS_RC_HANDOFF:
- next0 = IP4_FULL_REASS_NEXT_HANDOFF;
- b0 = vlib_get_buffer (vm, bi0);
- vnet_buffer (b0)->ip.reass.owner_thread_index =
- handoff_thread_idx;
- break;
- case IP4_REASS_RC_TOO_MANY_FRAGMENTS:
- vlib_node_increment_counter (vm, node->node_index,
- IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
- 1);
- ip4_full_reass_drop_all (vm, node, rm, reass);
- ip4_full_reass_free (rm, rt, reass);
- goto next_packet;
- break;
- case IP4_REASS_RC_NO_BUF:
- vlib_node_increment_counter (vm, node->node_index,
- IP4_ERROR_REASS_NO_BUF, 1);
- ip4_full_reass_drop_all (vm, node, rm, reass);
- ip4_full_reass_free (rm, rt, reass);
- goto next_packet;
- break;
- case IP4_REASS_RC_INTERNAL_ERROR:
- /* drop everything and start with a clean slate */
- vlib_node_increment_counter (vm, node->node_index,
- IP4_ERROR_REASS_INTERNAL_ERROR,
- 1);
- ip4_full_reass_drop_all (vm, node, rm, reass);
- ip4_full_reass_free (rm, rt, reass);
- goto next_packet;
- break;
- }
+ handoff_thread_idx;
+ break;
+ case IP4_REASS_RC_TOO_MANY_FRAGMENTS:
+ counter = IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
+ break;
+ case IP4_REASS_RC_NO_BUF:
+ counter = IP4_ERROR_REASS_NO_BUF;
+ break;
+ case IP4_REASS_RC_INTERNAL_ERROR:
+ counter = IP4_ERROR_REASS_INTERNAL_ERROR;
+ /* Sanitization is needed in internal error cases only, as
+ * the incoming packet is already dropped in other cases,
+ * also adding bi0 back to the reassembly list, fixes the
+ * leaking of buffers during internal errors.
+ *
+ * Also it doesnt make sense to send these buffers custom
+ * app, these fragments are with internal errors */
+ sanitize_reass_buffers_add_missing (vm, reass, &bi0);
+ reass->error_next_index = ~0;
+ break;
}
- else
+
+ if (~0 != counter)
{
- next0 = IP4_FULL_REASS_NEXT_DROP;
- error0 = IP4_ERROR_REASS_LIMIT_REACHED;
+ vlib_node_increment_counter (vm, node->node_index, counter, 1);
+ ip4_full_reass_drop_all (vm, node, reass);
+ ip4_full_reass_free (rm, rt, reass);
+ goto next_packet;
}
+ }
+ else
+ {
+ next0 = IP4_FULL_REASS_NEXT_DROP;
+ error0 = IP4_ERROR_REASS_LIMIT_REACHED;
+ }
+ packet_enqueue:
- packet_enqueue:
-
- if (bi0 != ~0)
+ if (bi0 != ~0)
+ {
+ /* bi0 might have been updated by reass_finalize, reload */
+ b0 = vlib_get_buffer (vm, bi0);
+ if (IP4_ERROR_NONE != error0)
{
- to_next[0] = bi0;
- to_next += 1;
- n_left_to_next -= 1;
+ b0->error = node->errors[error0];
+ }
- /* bi0 might have been updated by reass_finalize, reload */
- b0 = vlib_get_buffer (vm, bi0);
- if (IP4_ERROR_NONE != error0)
+ if (next0 == IP4_FULL_REASS_NEXT_HANDOFF)
+ {
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- b0->error = node->errors[error0];
+ ip4_full_reass_add_trace (
+ vm, node, NULL, bi0, HANDOFF, 0,
+ vnet_buffer (b0)->ip.reass.owner_thread_index);
}
+ }
+ else if (FEATURE == type && IP4_ERROR_NONE == error0)
+ {
+ vnet_feature_next (&next0, b0);
+ }
- if (next0 == IP4_FULL_REASS_NEXT_HANDOFF)
- {
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- ip4_full_reass_add_trace (vm, node, rm, NULL, bi0,
- HANDOFF, 0,
- vnet_buffer (b0)->ip.
- reass.owner_thread_index);
- }
- }
- else if (FEATURE == type && IP4_ERROR_NONE == error0)
- {
- vnet_feature_next (&next0, b0);
- }
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- IP4_REASS_DEBUG_BUFFER (bi0, enqueue_next);
+ /* Increment the counter to-custom-app also as this fragment is
+ * also going to application */
+ if (CUSTOM == type)
+ {
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_TO_CUSTOM_APP, 1);
}
- next_packet:
- from += 1;
- n_left_from -= 1;
+ to_next[n_next] = bi0;
+ nexts[n_next] = next0;
+ n_next++;
+ IP4_REASS_DEBUG_BUFFER (bi0, enqueue_next);
}
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ next_packet:
+ from += 1;
+ n_left -= 1;
}
clib_spinlock_unlock (&rt->lock);
+
+ vlib_buffer_enqueue_to_next (vm, node, to_next, nexts, n_next);
return frame->n_vectors;
}
-static char *ip4_full_reass_error_strings[] = {
-#define _(sym, string) string,
- foreach_ip4_error
-#undef _
-};
-
VLIB_NODE_FN (ip4_full_reass_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_full_reass_inline (vm, node, frame, NORMAL);
+ return ip4_full_reass_inline (vm, node, frame, NORMAL, false /* is_local */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_full_reass_node) = {
.name = "ip4-full-reassembly",
.vector_size = sizeof (u32),
.format_trace = format_ip4_full_reass_trace,
- .n_errors = ARRAY_LEN (ip4_full_reass_error_strings),
- .error_strings = ip4_full_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_FULL_REASS_N_NEXT,
.next_nodes =
{
@@ -1312,22 +1377,43 @@ VLIB_REGISTER_NODE (ip4_full_reass_node) = {
},
};
-/* *INDENT-ON* */
+
+VLIB_NODE_FN (ip4_local_full_reass_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip4_full_reass_inline (vm, node, frame, NORMAL, true /* is_local */);
+}
+
+VLIB_REGISTER_NODE (ip4_local_full_reass_node) = {
+ .name = "ip4-local-full-reassembly",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip4_full_reass_trace,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
+ .n_next_nodes = IP4_FULL_REASS_N_NEXT,
+ .next_nodes =
+ {
+ [IP4_FULL_REASS_NEXT_INPUT] = "ip4-input",
+ [IP4_FULL_REASS_NEXT_DROP] = "ip4-drop",
+ [IP4_FULL_REASS_NEXT_HANDOFF] = "ip4-local-full-reassembly-handoff",
+
+ },
+};
VLIB_NODE_FN (ip4_full_reass_node_feature) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_full_reass_inline (vm, node, frame, FEATURE);
+ return ip4_full_reass_inline (vm, node, frame, FEATURE,
+ false /* is_local */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_full_reass_node_feature) = {
.name = "ip4-full-reassembly-feature",
.vector_size = sizeof (u32),
.format_trace = format_ip4_full_reass_trace,
- .n_errors = ARRAY_LEN (ip4_full_reass_error_strings),
- .error_strings = ip4_full_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_FULL_REASS_N_NEXT,
.next_nodes =
{
@@ -1336,32 +1422,28 @@ VLIB_REGISTER_NODE (ip4_full_reass_node_feature) = {
[IP4_FULL_REASS_NEXT_HANDOFF] = "ip4-full-reass-feature-hoff",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_full_reass_feature, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "ip4-full-reassembly-feature",
- .runs_before = VNET_FEATURES ("ip4-lookup",
- "ipsec4-input-feature"),
- .runs_after = 0,
+ .arc_name = "ip4-unicast",
+ .node_name = "ip4-full-reassembly-feature",
+ .runs_before = VNET_FEATURES ("ip4-lookup", "ipsec4-input-feature",
+ "ip4-sv-reassembly-feature"),
+ .runs_after = 0,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_full_reass_node_custom) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_full_reass_inline (vm, node, frame, CUSTOM);
+ return ip4_full_reass_inline (vm, node, frame, CUSTOM, false /* is_local */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_full_reass_node_custom) = {
.name = "ip4-full-reassembly-custom",
.vector_size = sizeof (u32),
.format_trace = format_ip4_full_reass_trace,
- .n_errors = ARRAY_LEN (ip4_full_reass_error_strings),
- .error_strings = ip4_full_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_FULL_REASS_N_NEXT,
.next_nodes =
{
@@ -1370,18 +1452,6 @@ VLIB_REGISTER_NODE (ip4_full_reass_node_custom) = {
[IP4_FULL_REASS_NEXT_HANDOFF] = "ip4-full-reass-custom-hoff",
},
};
-/* *INDENT-ON* */
-
-/* *INDENT-OFF* */
-VNET_FEATURE_INIT (ip4_full_reass_custom, static) = {
- .arc_name = "ip4-unicast",
- .node_name = "ip4-full-reassembly-feature",
- .runs_before = VNET_FEATURES ("ip4-lookup",
- "ipsec4-input-feature"),
- .runs_after = 0,
-};
-
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
uword
@@ -1398,7 +1468,9 @@ ip4_full_reass_get_nbuckets ()
u32 nbuckets;
u8 i;
- nbuckets = (u32) (rm->max_reass_n / IP4_REASS_HT_LOAD_FACTOR);
+ /* need more mem with more workers */
+ nbuckets = (u32) (rm->max_reass_n * (vlib_num_workers () + 1) /
+ IP4_REASS_HT_LOAD_FACTOR);
for (i = 0; i < 31; i++)
if ((1 << i) >= nbuckets)
@@ -1524,17 +1596,17 @@ ip4_full_reass_init_function (vlib_main_t * vm)
nbuckets = ip4_full_reass_get_nbuckets ();
clib_bihash_init_16_8 (&rm->hash, "ip4-dr", nbuckets, nbuckets * 1024);
- node = vlib_get_node_by_name (vm, (u8 *) "ip4-drop");
- ASSERT (node);
- rm->ip4_drop_idx = node->index;
-
rm->fq_index = vlib_frame_queue_main_init (ip4_full_reass_node.index, 0);
+ rm->fq_local_index =
+ vlib_frame_queue_main_init (ip4_local_full_reass_node.index, 0);
rm->fq_feature_index =
vlib_frame_queue_main_init (ip4_full_reass_node_feature.index, 0);
rm->fq_custom_index =
vlib_frame_queue_main_init (ip4_full_reass_node_custom.index, 0);
rm->feature_use_refcount_per_intf = NULL;
+ rm->is_local_reass_enabled = 1;
+
return error;
}
@@ -1542,8 +1614,8 @@ VLIB_INIT_FUNCTION (ip4_full_reass_init_function);
#endif /* CLIB_MARCH_VARIANT */
static uword
-ip4_full_reass_walk_expired (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * f)
+ip4_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
+ CLIB_UNUSED (vlib_frame_t *f))
{
ip4_full_reass_main_t *rm = &ip4_full_reass_main;
uword event_type, *event_data = 0;
@@ -1558,10 +1630,11 @@ ip4_full_reass_walk_expired (vlib_main_t * vm,
switch (event_type)
{
- case ~0: /* no events => timeout */
- /* nothing to do here */
- break;
+ case ~0:
+ /* no events => timeout */
+ /* fallthrough */
case IP4_EVENT_CONFIG_CHANGED:
+ /* nothing to do here */
break;
default:
clib_warning ("BUG: event type 0x%wx", event_type);
@@ -1575,6 +1648,7 @@ ip4_full_reass_walk_expired (vlib_main_t * vm,
uword thread_index = 0;
int index;
const uword nthreads = vlib_num_workers () + 1;
+
for (thread_index = 0; thread_index < nthreads; ++thread_index)
{
ip4_full_reass_per_thread_t *rt =
@@ -1582,24 +1656,46 @@ ip4_full_reass_walk_expired (vlib_main_t * vm,
clib_spinlock_lock (&rt->lock);
vec_reset_length (pool_indexes_to_free);
- /* *INDENT-OFF* */
- pool_foreach_index (index, rt->pool) {
- reass = pool_elt_at_index (rt->pool, index);
- if (now > reass->last_heard + rm->timeout)
- {
- vec_add1 (pool_indexes_to_free, index);
- }
- }
- /* *INDENT-ON* */
+
+ /* Pace the number of timeouts handled per thread,to avoid barrier
+ * sync issues in real world scenarios */
+
+ u32 beg = rt->last_id;
+ /* to ensure we walk at least once per sec per context */
+ u32 end =
+ beg + (IP4_REASS_MAX_REASSEMBLIES_DEFAULT *
+ IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS / MSEC_PER_SEC +
+ 1);
+ if (end > vec_len (rt->pool))
+ {
+ end = vec_len (rt->pool);
+ rt->last_id = 0;
+ }
+ else
+ {
+ rt->last_id = end;
+ }
+
+ pool_foreach_stepping_index (index, beg, end, rt->pool)
+ {
+ reass = pool_elt_at_index (rt->pool, index);
+ if (now > reass->last_heard + rm->timeout)
+ {
+ vec_add1 (pool_indexes_to_free, index);
+ }
+ }
+
+ if (vec_len (pool_indexes_to_free))
+ vlib_node_increment_counter (vm, node->node_index,
+ IP4_ERROR_REASS_TIMEOUT,
+ vec_len (pool_indexes_to_free));
int *i;
- /* *INDENT-OFF* */
vec_foreach (i, pool_indexes_to_free)
{
ip4_full_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
- ip4_full_reass_drop_all (vm, node, rm, reass);
- ip4_full_reass_free (rm, rt, reass);
- }
- /* *INDENT-ON* */
+ ip4_full_reass_drop_all (vm, node, reass);
+ ip4_full_reass_free (rm, rt, reass);
+ }
clib_spinlock_unlock (&rt->lock);
}
@@ -1607,33 +1703,29 @@ ip4_full_reass_walk_expired (vlib_main_t * vm,
vec_free (pool_indexes_to_free);
if (event_data)
{
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
}
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_full_reass_expire_node) = {
- .function = ip4_full_reass_walk_expired,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "ip4-full-reassembly-expire-walk",
- .format_trace = format_ip4_full_reass_trace,
- .n_errors = ARRAY_LEN (ip4_full_reass_error_strings),
- .error_strings = ip4_full_reass_error_strings,
-
+ .function = ip4_full_reass_walk_expired,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "ip4-full-reassembly-expire-walk",
+ .format_trace = format_ip4_full_reass_trace,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
};
-/* *INDENT-ON* */
static u8 *
format_ip4_full_reass_key (u8 * s, va_list * args)
{
ip4_full_reass_key_t *key = va_arg (*args, ip4_full_reass_key_t *);
s =
- format (s,
- "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
- key->xx_id, format_ip4_address, &key->src, format_ip4_address,
+ format (s, "fib_index: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
+ key->fib_index, format_ip4_address, &key->src, format_ip4_address,
&key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
return s;
}
@@ -1702,11 +1794,9 @@ show_ip4_reass (vlib_main_t * vm,
clib_spinlock_lock (&rt->lock);
if (details)
{
- /* *INDENT-OFF* */
pool_foreach (reass, rt->pool) {
vlib_cli_output (vm, "%U", format_ip4_reass, vm, reass);
}
- /* *INDENT-ON* */
}
sum_reass_n += rt->reass_n;
clib_spinlock_unlock (&rt->lock);
@@ -1730,13 +1820,11 @@ show_ip4_reass (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip4_full_reass_cmd, static) = {
.path = "show ip4-full-reassembly",
.short_help = "show ip4-full-reassembly [details]",
.function = show_ip4_reass,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
vnet_api_error_t
@@ -1788,10 +1876,10 @@ format_ip4_full_reass_handoff_trace (u8 * s, va_list * args)
}
always_inline uword
-ip4_full_reass_handoff_node_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame,
- ip4_full_reass_node_type_t type)
+ip4_full_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame,
+ ip4_full_reass_node_type_t type,
+ bool is_local)
{
ip4_full_reass_main_t *rm = &ip4_full_reass_main;
@@ -1810,7 +1898,14 @@ ip4_full_reass_handoff_node_inline (vlib_main_t * vm,
switch (type)
{
case NORMAL:
- fq_index = rm->fq_index;
+ if (is_local)
+ {
+ fq_index = rm->fq_local_index;
+ }
+ else
+ {
+ fq_index = rm->fq_index;
+ }
break;
case FEATURE:
fq_index = rm->fq_feature_index;
@@ -1820,7 +1915,6 @@ ip4_full_reass_handoff_node_inline (vlib_main_t * vm,
break;
default:
clib_warning ("Unexpected `type' (%d)!", type);
- ASSERT (0);
}
while (n_left_from > 0)
@@ -1854,11 +1948,11 @@ VLIB_NODE_FN (ip4_full_reass_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_full_reass_handoff_node_inline (vm, node, frame, NORMAL);
+ return ip4_full_reass_handoff_node_inline (vm, node, frame, NORMAL,
+ false /* is_local */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_full_reass_handoff_node) = {
.name = "ip4-full-reassembly-handoff",
.vector_size = sizeof (u32),
@@ -1872,21 +1966,37 @@ VLIB_REGISTER_NODE (ip4_full_reass_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
+VLIB_NODE_FN (ip4_local_full_reass_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip4_full_reass_handoff_node_inline (vm, node, frame, NORMAL,
+ true /* is_local */);
+}
+
+VLIB_REGISTER_NODE (ip4_local_full_reass_handoff_node) = {
+ .name = "ip4-local-full-reassembly-handoff",
+ .vector_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip4_full_reass_handoff_error_strings),
+ .error_strings = ip4_full_reass_handoff_error_strings,
+ .format_trace = format_ip4_full_reass_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
-/* *INDENT-OFF* */
VLIB_NODE_FN (ip4_full_reass_feature_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t *
node,
vlib_frame_t * frame)
{
- return ip4_full_reass_handoff_node_inline (vm, node, frame, FEATURE);
+ return ip4_full_reass_handoff_node_inline (vm, node, frame, FEATURE,
+ false /* is_local */);
}
-/* *INDENT-ON* */
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_full_reass_feature_handoff_node) = {
.name = "ip4-full-reass-feature-hoff",
.vector_size = sizeof (u32),
@@ -1900,20 +2010,16 @@ VLIB_REGISTER_NODE (ip4_full_reass_feature_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_NODE_FN (ip4_full_reass_custom_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t *
node,
vlib_frame_t * frame)
{
- return ip4_full_reass_handoff_node_inline (vm, node, frame, CUSTOM);
+ return ip4_full_reass_handoff_node_inline (vm, node, frame, CUSTOM,
+ false /* is_local */);
}
-/* *INDENT-ON* */
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_full_reass_custom_handoff_node) = {
.name = "ip4-full-reass-custom-hoff",
.vector_size = sizeof (u32),
@@ -1927,7 +2033,6 @@ VLIB_REGISTER_NODE (ip4_full_reass_custom_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -1954,8 +2059,28 @@ ip4_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
"ip4-full-reassembly-feature",
sw_if_index, 0, 0, 0);
}
- return -1;
+ return 0;
}
+
+void
+ip4_local_full_reass_enable_disable (int enable)
+{
+ if (enable)
+ {
+ ip4_full_reass_main.is_local_reass_enabled = 1;
+ }
+ else
+ {
+ ip4_full_reass_main.is_local_reass_enabled = 0;
+ }
+}
+
+int
+ip4_local_full_reass_enabled ()
+{
+ return ip4_full_reass_main.is_local_reass_enabled;
+}
+
#endif
/*
diff --git a/src/vnet/ip/reass/ip4_full_reass.h b/src/vnet/ip/reass/ip4_full_reass.h
index 000c80c5906..5df8107ca48 100644
--- a/src/vnet/ip/reass/ip4_full_reass.h
+++ b/src/vnet/ip/reass/ip4_full_reass.h
@@ -47,6 +47,9 @@ int ip4_full_reass_enable_disable_with_refcnt (u32 sw_if_index,
int is_enable);
uword ip4_full_reass_custom_register_next_node (uword node_index);
+
+void ip4_local_full_reass_enable_disable (int enable);
+int ip4_local_full_reass_enabled ();
#endif /* __included_ip4_full_reass_h__ */
/*
diff --git a/src/vnet/ip/reass/ip4_sv_reass.c b/src/vnet/ip/reass/ip4_sv_reass.c
index 9b3f1b98558..7c3c2fff217 100644
--- a/src/vnet/ip/reass/ip4_sv_reass.c
+++ b/src/vnet/ip/reass/ip4_sv_reass.c
@@ -48,7 +48,7 @@ typedef struct
{
struct
{
- u32 xx_id;
+ u32 fib_index;
ip4_address_t src;
ip4_address_t dst;
u16 frag_id;
@@ -150,6 +150,7 @@ typedef struct
/** Worker handoff */
u32 fq_index;
u32 fq_feature_index;
+ u32 fq_custom_context_index;
// reference count for enabling/disabling feature - per interface
u32 *feature_use_refcount_per_intf;
@@ -189,6 +190,7 @@ typedef struct
u8 ip_proto;
u16 l4_src_port;
u16 l4_dst_port;
+ int l4_layer_truncated;
} ip4_sv_reass_trace_t;
extern vlib_node_registration_t ip4_sv_reass_node;
@@ -225,14 +227,19 @@ format_ip4_sv_reass_trace (u8 * s, va_list * args)
s = format (s, "[not-fragmented]");
break;
}
+ if (t->l4_layer_truncated)
+ {
+ s = format (s, " [l4-layer-truncated]");
+ }
return s;
}
static void
-ip4_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip4_sv_reass_main_t * rm, ip4_sv_reass_t * reass,
- u32 bi, ip4_sv_reass_trace_operation_e action,
- u32 ip_proto, u16 l4_src_port, u16 l4_dst_port)
+ip4_sv_reass_add_trace (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip4_sv_reass_t *reass, u32 bi,
+ ip4_sv_reass_trace_operation_e action, u32 ip_proto,
+ u16 l4_src_port, u16 l4_dst_port,
+ int l4_layer_truncated)
{
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
if (pool_is_free_index
@@ -253,6 +260,7 @@ ip4_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
t->ip_proto = ip_proto;
t->l4_src_port = l4_src_port;
t->l4_dst_port = l4_dst_port;
+ t->l4_layer_truncated = l4_layer_truncated;
#if 0
static u8 *s = NULL;
s = format (s, "%U", format_ip4_sv_reass_trace, NULL, NULL, t);
@@ -314,6 +322,8 @@ ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
ip4_sv_reass_t *reass = NULL;
f64 now = vlib_time_now (vm);
+again:
+
if (!clib_bihash_search_16_8 (&rm->hash, &kv->kv, &kv->kv))
{
if (vm->thread_index != kv->v.thread_index)
@@ -368,19 +378,23 @@ ip4_sv_reass_find_or_create (vlib_main_t * vm, ip4_sv_reass_main_t * rm,
kv->v.thread_index = vm->thread_index;
reass->last_heard = now;
- if (clib_bihash_add_del_16_8 (&rm->hash, &kv->kv, 1))
+ int rv = clib_bihash_add_del_16_8 (&rm->hash, &kv->kv, 2);
+ if (rv)
{
ip4_sv_reass_free (vm, rm, rt, reass);
reass = NULL;
+ // if other worker created a context already work with the other copy
+ if (-2 == rv)
+ goto again;
}
return reass;
}
always_inline ip4_sv_reass_rc_t
-ip4_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip4_sv_reass_main_t * rm, ip4_sv_reass_per_thread_t * rt,
- ip4_header_t * ip0, ip4_sv_reass_t * reass, u32 bi0)
+ip4_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip4_sv_reass_main_t *rm, ip4_header_t *ip0,
+ ip4_sv_reass_t *reass, u32 bi0)
{
vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
ip4_sv_reass_rc_t rc = IP4_SV_REASS_RC_OK;
@@ -407,9 +421,10 @@ ip4_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_sv_reass_add_trace (vm, node, rm, reass, bi0, REASS_FINISH,
- reass->ip_proto, reass->l4_src_port,
- reass->l4_dst_port);
+ ip4_sv_reass_add_trace (
+ vm, node, reass, bi0, REASS_FINISH, reass->ip_proto,
+ reass->l4_src_port, reass->l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
}
vec_add1 (reass->cached_buffers, bi0);
@@ -417,8 +432,9 @@ ip4_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
{
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
- REASS_FRAGMENT_CACHE, ~0, ~0, ~0);
+ ip4_sv_reass_add_trace (
+ vm, node, reass, bi0, REASS_FRAGMENT_CACHE, ~0, ~0, ~0,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
if (vec_len (reass->cached_buffers) > rm->max_reass_len)
{
@@ -428,15 +444,33 @@ ip4_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
return rc;
}
+always_inline int
+l4_layer_truncated (ip4_header_t *ip)
+{
+ static const int l4_layer_length[256] = {
+ [IP_PROTOCOL_TCP] = sizeof (tcp_header_t),
+ [IP_PROTOCOL_UDP] = sizeof (udp_header_t),
+ [IP_PROTOCOL_ICMP] = sizeof (icmp46_header_t),
+ };
+
+ return ((u8 *) ip + ip4_header_bytes (ip) + l4_layer_length[ip->protocol] >
+ (u8 *) ip + clib_net_to_host_u16 (ip->length));
+}
+
always_inline uword
-ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature,
- bool is_output_feature, bool is_custom)
+ip4_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool is_output_feature, bool is_custom,
+ bool with_custom_context)
{
u32 *from = vlib_frame_vector_args (frame);
- u32 n_left_from, n_left_to_next, *to_next, next_index;
+ u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index;
ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
ip4_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
+ u32 *context;
+ if (with_custom_context)
+ context = vlib_frame_aux_args (frame);
+
clib_spinlock_lock (&rt->lock);
n_left_from = frame->n_vectors;
@@ -482,6 +516,7 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
(is_output_feature ? 1 : 0) *
vnet_buffer (b1)->
ip.save_rewrite_length);
+
if (PREDICT_FALSE
(ip4_get_fragment_more (ip0) || ip4_get_fragment_offset (ip0))
|| (ip4_get_fragment_more (ip1) || ip4_get_fragment_offset (ip1)))
@@ -506,29 +541,40 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
- if (IP_PROTOCOL_TCP == ip0->protocol)
+ if (l4_layer_truncated (ip0))
{
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((tcp_header_t *) (ip0 + 1))->flags;
- vnet_buffer (b0)->ip.reass.tcp_ack_number =
- ((tcp_header_t *) (ip0 + 1))->ack_number;
- vnet_buffer (b0)->ip.reass.tcp_seq_number =
- ((tcp_header_t *) (ip0 + 1))->seq_number;
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1;
+ vnet_buffer (b0)->ip.reass.l4_src_port = 0;
+ vnet_buffer (b0)->ip.reass.l4_dst_port = 0;
}
- else if (IP_PROTOCOL_ICMP == ip0->protocol)
+ else
{
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((icmp46_header_t *) (ip0 + 1))->type;
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0;
+ if (IP_PROTOCOL_TCP == ip0->protocol)
+ {
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ ((tcp_header_t *) (ip0 + 1))->flags;
+ vnet_buffer (b0)->ip.reass.tcp_ack_number =
+ ((tcp_header_t *) (ip0 + 1))->ack_number;
+ vnet_buffer (b0)->ip.reass.tcp_seq_number =
+ ((tcp_header_t *) (ip0 + 1))->seq_number;
+ }
+ else if (IP_PROTOCOL_ICMP == ip0->protocol)
+ {
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ ((icmp46_header_t *) (ip0 + 1))->type;
+ }
+ vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
+ vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
}
- vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
- vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_sv_reass_add_trace (vm, node, rm, NULL, from[(b - 2) - bufs],
- REASS_PASSTHROUGH,
- vnet_buffer (b0)->ip.reass.ip_proto,
- vnet_buffer (b0)->ip.reass.l4_src_port,
- vnet_buffer (b0)->ip.reass.l4_dst_port);
+ ip4_sv_reass_add_trace (
+ vm, node, NULL, from[(b - 2) - bufs], REASS_PASSTHROUGH,
+ vnet_buffer (b0)->ip.reass.ip_proto,
+ vnet_buffer (b0)->ip.reass.l4_src_port,
+ vnet_buffer (b0)->ip.reass.l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
if (is_feature)
{
@@ -541,35 +587,48 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
vnet_buffer (b1)->ip.reass.is_non_first_fragment = 0;
vnet_buffer (b1)->ip.reass.ip_proto = ip1->protocol;
- if (IP_PROTOCOL_TCP == ip1->protocol)
+ if (l4_layer_truncated (ip1))
{
- vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags =
- ((tcp_header_t *) (ip1 + 1))->flags;
- vnet_buffer (b1)->ip.reass.tcp_ack_number =
- ((tcp_header_t *) (ip1 + 1))->ack_number;
- vnet_buffer (b1)->ip.reass.tcp_seq_number =
- ((tcp_header_t *) (ip1 + 1))->seq_number;
+ vnet_buffer (b1)->ip.reass.l4_layer_truncated = 1;
+ vnet_buffer (b1)->ip.reass.l4_src_port = 0;
+ vnet_buffer (b1)->ip.reass.l4_dst_port = 0;
}
- else if (IP_PROTOCOL_ICMP == ip1->protocol)
+ else
{
- vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags =
- ((icmp46_header_t *) (ip1 + 1))->type;
+ vnet_buffer (b1)->ip.reass.l4_layer_truncated = 0;
+ if (IP_PROTOCOL_TCP == ip1->protocol)
+ {
+ vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags =
+ ((tcp_header_t *) (ip1 + 1))->flags;
+ vnet_buffer (b1)->ip.reass.tcp_ack_number =
+ ((tcp_header_t *) (ip1 + 1))->ack_number;
+ vnet_buffer (b1)->ip.reass.tcp_seq_number =
+ ((tcp_header_t *) (ip1 + 1))->seq_number;
+ }
+ else if (IP_PROTOCOL_ICMP == ip1->protocol)
+ {
+ vnet_buffer (b1)->ip.reass.icmp_type_or_tcp_flags =
+ ((icmp46_header_t *) (ip1 + 1))->type;
+ }
+ vnet_buffer (b1)->ip.reass.l4_src_port = ip4_get_port (ip1, 1);
+ vnet_buffer (b1)->ip.reass.l4_dst_port = ip4_get_port (ip1, 0);
}
- vnet_buffer (b1)->ip.reass.l4_src_port = ip4_get_port (ip1, 1);
- vnet_buffer (b1)->ip.reass.l4_dst_port = ip4_get_port (ip1, 0);
if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_sv_reass_add_trace (vm, node, rm, NULL, from[(b - 1) - bufs],
- REASS_PASSTHROUGH,
- vnet_buffer (b1)->ip.reass.ip_proto,
- vnet_buffer (b1)->ip.reass.l4_src_port,
- vnet_buffer (b1)->ip.reass.l4_dst_port);
+ ip4_sv_reass_add_trace (
+ vm, node, NULL, from[(b - 1) - bufs], REASS_PASSTHROUGH,
+ vnet_buffer (b1)->ip.reass.ip_proto,
+ vnet_buffer (b1)->ip.reass.l4_src_port,
+ vnet_buffer (b1)->ip.reass.l4_dst_port,
+ vnet_buffer (b1)->ip.reass.l4_layer_truncated);
}
n_left_from -= 2;
next[0] = next0;
next[1] = next1;
next += 2;
+ if (with_custom_context)
+ context += 2;
}
while (n_left_from > 0)
@@ -608,34 +667,45 @@ ip4_sv_reass_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
- if (IP_PROTOCOL_TCP == ip0->protocol)
+ if (l4_layer_truncated (ip0))
{
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((tcp_header_t *) (ip0 + 1))->flags;
- vnet_buffer (b0)->ip.reass.tcp_ack_number =
- ((tcp_header_t *) (ip0 + 1))->ack_number;
- vnet_buffer (b0)->ip.reass.tcp_seq_number =
- ((tcp_header_t *) (ip0 + 1))->seq_number;
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1;
}
- else if (IP_PROTOCOL_ICMP == ip0->protocol)
+ else
{
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((icmp46_header_t *) (ip0 + 1))->type;
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0;
+ if (IP_PROTOCOL_TCP == ip0->protocol)
+ {
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ ((tcp_header_t *) (ip0 + 1))->flags;
+ vnet_buffer (b0)->ip.reass.tcp_ack_number =
+ ((tcp_header_t *) (ip0 + 1))->ack_number;
+ vnet_buffer (b0)->ip.reass.tcp_seq_number =
+ ((tcp_header_t *) (ip0 + 1))->seq_number;
+ }
+ else if (IP_PROTOCOL_ICMP == ip0->protocol)
+ {
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ ((icmp46_header_t *) (ip0 + 1))->type;
+ }
+ vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
+ vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
}
- vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
- vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_sv_reass_add_trace (vm, node, rm, NULL, from[(b - 1) - bufs],
- REASS_PASSTHROUGH,
- vnet_buffer (b0)->ip.reass.ip_proto,
- vnet_buffer (b0)->ip.reass.l4_src_port,
- vnet_buffer (b0)->ip.reass.l4_dst_port);
+ ip4_sv_reass_add_trace (
+ vm, node, NULL, from[(b - 1) - bufs], REASS_PASSTHROUGH,
+ vnet_buffer (b0)->ip.reass.ip_proto,
+ vnet_buffer (b0)->ip.reass.l4_src_port,
+ vnet_buffer (b0)->ip.reass.l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
n_left_from -= 1;
next[0] = next0;
next += 1;
+ if (with_custom_context)
+ context += 1;
}
vlib_buffer_enqueue_to_next (vm, node, from, (u16 *) nexts,
@@ -649,7 +719,11 @@ slow_path:
while (n_left_from > 0)
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ if (with_custom_context)
+ vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next,
+ to_next_aux, n_left_to_next);
+ else
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
while (n_left_from > 0 && n_left_to_next > 0)
{
@@ -657,6 +731,7 @@ slow_path:
vlib_buffer_t *b0;
u32 next0;
u32 error0 = IP4_ERROR_NONE;
+ u8 forward_context = 0;
bi0 = from[0];
b0 = vlib_get_buffer (vm, bi0);
@@ -679,31 +754,42 @@ slow_path:
}
vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
vnet_buffer (b0)->ip.reass.ip_proto = ip0->protocol;
- if (IP_PROTOCOL_TCP == ip0->protocol)
+ if (l4_layer_truncated (ip0))
{
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((tcp_header_t *) (ip0 + 1))->flags;
- vnet_buffer (b0)->ip.reass.tcp_ack_number =
- ((tcp_header_t *) (ip0 + 1))->ack_number;
- vnet_buffer (b0)->ip.reass.tcp_seq_number =
- ((tcp_header_t *) (ip0 + 1))->seq_number;
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated = 1;
+ vnet_buffer (b0)->ip.reass.l4_src_port = 0;
+ vnet_buffer (b0)->ip.reass.l4_dst_port = 0;
}
- else if (IP_PROTOCOL_ICMP == ip0->protocol)
+ else
{
- vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
- ((icmp46_header_t *) (ip0 + 1))->type;
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated = 0;
+ if (IP_PROTOCOL_TCP == ip0->protocol)
+ {
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ ((tcp_header_t *) (ip0 + 1))->flags;
+ vnet_buffer (b0)->ip.reass.tcp_ack_number =
+ ((tcp_header_t *) (ip0 + 1))->ack_number;
+ vnet_buffer (b0)->ip.reass.tcp_seq_number =
+ ((tcp_header_t *) (ip0 + 1))->seq_number;
+ }
+ else if (IP_PROTOCOL_ICMP == ip0->protocol)
+ {
+ vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags =
+ ((icmp46_header_t *) (ip0 + 1))->type;
+ }
+ vnet_buffer (b0)->ip.reass.l4_src_port =
+ ip4_get_port (ip0, 1);
+ vnet_buffer (b0)->ip.reass.l4_dst_port =
+ ip4_get_port (ip0, 0);
}
- vnet_buffer (b0)->ip.reass.l4_src_port = ip4_get_port (ip0, 1);
- vnet_buffer (b0)->ip.reass.l4_dst_port = ip4_get_port (ip0, 0);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_sv_reass_add_trace (vm, node, rm, NULL, bi0,
- REASS_PASSTHROUGH,
- vnet_buffer (b0)->ip.reass.ip_proto,
- vnet_buffer (b0)->ip.
- reass.l4_src_port,
- vnet_buffer (b0)->ip.
- reass.l4_dst_port);
+ ip4_sv_reass_add_trace (
+ vm, node, NULL, bi0, REASS_PASSTHROUGH,
+ vnet_buffer (b0)->ip.reass.ip_proto,
+ vnet_buffer (b0)->ip.reass.l4_src_port,
+ vnet_buffer (b0)->ip.reass.l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
goto packet_enqueue;
}
@@ -721,13 +807,17 @@ slow_path:
ip4_sv_reass_kv_t kv;
u8 do_handoff = 0;
- kv.k.as_u64[0] =
- (u64) vec_elt (ip4_main.fib_index_by_sw_if_index,
- vnet_buffer (b0)->sw_if_index[VLIB_RX]) |
- (u64) ip0->src_address.as_u32 << 32;
- kv.k.as_u64[1] =
- (u64) ip0->dst_address.
- as_u32 | (u64) ip0->fragment_id << 32 | (u64) ip0->protocol << 48;
+ if (with_custom_context)
+ kv.k.as_u64[0] = (u64) *context | (u64) ip0->src_address.as_u32
+ << 32;
+ else
+ kv.k.as_u64[0] =
+ (u64) vec_elt (ip4_main.fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]) |
+ (u64) ip0->src_address.as_u32 << 32;
+ kv.k.as_u64[1] = (u64) ip0->dst_address.as_u32 |
+ (u64) ip0->fragment_id << 32 |
+ (u64) ip0->protocol << 48;
ip4_sv_reass_t *reass =
ip4_sv_reass_find_or_create (vm, rm, rt, &kv, &do_handoff);
@@ -737,6 +827,8 @@ slow_path:
next0 = IP4_SV_REASSEMBLY_NEXT_HANDOFF;
vnet_buffer (b0)->ip.reass.owner_thread_index =
kv.v.thread_index;
+ if (with_custom_context)
+ forward_context = 1;
goto packet_enqueue;
}
@@ -771,36 +863,34 @@ slow_path:
vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
- REASS_FRAGMENT_FORWARD,
- reass->ip_proto,
- reass->l4_src_port,
- reass->l4_dst_port);
+ ip4_sv_reass_add_trace (
+ vm, node, reass, bi0, REASS_FRAGMENT_FORWARD,
+ reass->ip_proto, reass->l4_src_port, reass->l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
goto packet_enqueue;
}
ip4_sv_reass_rc_t rc =
- ip4_sv_reass_update (vm, node, rm, rt, ip0, reass, bi0);
+ ip4_sv_reass_update (vm, node, rm, ip0, reass, bi0);
+ u32 counter = ~0;
switch (rc)
{
case IP4_SV_REASS_RC_OK:
/* nothing to do here */
break;
case IP4_SV_REASS_RC_TOO_MANY_FRAGMENTS:
- vlib_node_increment_counter (vm, node->node_index,
- IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
- 1);
- ip4_sv_reass_free (vm, rm, rt, reass);
- goto next_packet;
+ counter = IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
break;
case IP4_SV_REASS_RC_UNSUPP_IP_PROTO:
- vlib_node_increment_counter (vm, node->node_index,
- IP4_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
- 1);
+ counter = IP4_ERROR_REASS_UNSUPP_IP_PROT;
+ break;
+ }
+ if (~0 != counter)
+ {
+ vlib_node_increment_counter (vm, node->node_index, counter, 1);
ip4_sv_reass_free (vm, rm, rt, reass);
goto next_packet;
- break;
}
if (reass->is_complete)
{
@@ -846,17 +936,17 @@ slow_path:
vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip4_sv_reass_add_trace (vm, node, rm, reass, bi0,
- REASS_FRAGMENT_FORWARD,
- reass->ip_proto,
- reass->l4_src_port,
- reass->l4_dst_port);
+ ip4_sv_reass_add_trace (
+ vm, node, reass, bi0, REASS_FRAGMENT_FORWARD,
+ reass->ip_proto, reass->l4_src_port, reass->l4_dst_port,
+ vnet_buffer (b0)->ip.reass.l4_layer_truncated);
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
to_next, n_left_to_next, bi0,
next0);
}
- _vec_len (reass->cached_buffers) = 0; // buffers are owned by frame now
+ vec_set_len (reass->cached_buffers,
+ 0); // buffers are owned by frame now
}
goto next_packet;
@@ -869,13 +959,26 @@ slow_path:
b0 = vlib_get_buffer (vm, bi0);
vnet_feature_next (&next0, b0);
}
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
+ if (with_custom_context && forward_context)
+ {
+ if (to_next_aux)
+ {
+ to_next_aux[0] = *context;
+ to_next_aux += 1;
+ }
+ vlib_validate_buffer_enqueue_with_aux_x1 (
+ vm, node, next_index, to_next, to_next_aux, n_left_to_next,
+ bi0, *context, next0);
+ }
+ else
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
next_packet:
from += 1;
n_left_from -= 1;
+ if (with_custom_context)
+ context += 1;
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
@@ -886,28 +989,21 @@ done:
return frame->n_vectors;
}
-static char *ip4_sv_reass_error_strings[] = {
-#define _(sym, string) string,
- foreach_ip4_error
-#undef _
-};
-
VLIB_NODE_FN (ip4_sv_reass_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
- false /* is_output_feature */ ,
- false /* is_custom */ );
+ return ip4_sv_reass_inline (
+ vm, node, frame, false /* is_feature */, false /* is_output_feature */,
+ false /* is_custom */, false /* with_custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_node) = {
.name = "ip4-sv-reassembly",
.vector_size = sizeof (u32),
.format_trace = format_ip4_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
- .error_strings = ip4_sv_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -917,24 +1013,22 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node) = {
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_sv_reass_node_feature) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ ,
- false /* is_output_feature */ ,
- false /* is_custom */ );
+ return ip4_sv_reass_inline (
+ vm, node, frame, true /* is_feature */, false /* is_output_feature */,
+ false /* is_custom */, false /* with_custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = {
.name = "ip4-sv-reassembly-feature",
.vector_size = sizeof (u32),
.format_trace = format_ip4_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
- .error_strings = ip4_sv_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -943,34 +1037,30 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node_feature) = {
[IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_sv_reass_feature) = {
.arc_name = "ip4-unicast",
.node_name = "ip4-sv-reassembly-feature",
.runs_before = VNET_FEATURES ("ip4-lookup"),
.runs_after = 0,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_sv_reass_node_output_feature) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_inline (vm, node, frame, true /* is_feature */ ,
- true /* is_output_feature */ ,
- false /* is_custom */ );
+ return ip4_sv_reass_inline (
+ vm, node, frame, true /* is_feature */, true /* is_output_feature */,
+ false /* is_custom */, false /* with_custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = {
.name = "ip4-sv-reassembly-output-feature",
.vector_size = sizeof (u32),
.format_trace = format_ip4_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
- .error_strings = ip4_sv_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -979,24 +1069,20 @@ VLIB_REGISTER_NODE (ip4_sv_reass_node_output_feature) = {
[IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reass-feature-hoff",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_sv_reass_output_feature) = {
.arc_name = "ip4-output",
.node_name = "ip4-sv-reassembly-output-feature",
.runs_before = 0,
.runs_after = 0,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = {
.name = "ip4-sv-reassembly-custom-next",
.vector_size = sizeof (u32),
.format_trace = format_ip4_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
- .error_strings = ip4_sv_reass_error_strings,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
.n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -1006,15 +1092,39 @@ VLIB_REGISTER_NODE (ip4_sv_reass_custom_node) = {
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip4_sv_reass_custom_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_inline (vm, node, frame, false /* is_feature */ ,
- false /* is_output_feature */ ,
- true /* is_custom */ );
+ return ip4_sv_reass_inline (
+ vm, node, frame, false /* is_feature */, false /* is_output_feature */,
+ true /* is_custom */, false /* with_custom_context */);
+}
+
+VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_node) = {
+ .name = "ip4-sv-reassembly-custom-context",
+ .vector_size = sizeof (u32),
+ .aux_size = sizeof(u32),
+ .format_trace = format_ip4_sv_reass_trace,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
+ .n_next_nodes = IP4_SV_REASSEMBLY_N_NEXT,
+ .next_nodes =
+ {
+ [IP4_SV_REASSEMBLY_NEXT_INPUT] = "ip4-input",
+ [IP4_SV_REASSEMBLY_NEXT_DROP] = "ip4-drop",
+ [IP4_SV_REASSEMBLY_NEXT_HANDOFF] = "ip4-sv-reassembly-custom-context-handoff",
+
+ },
+};
+
+VLIB_NODE_FN (ip4_sv_reass_custom_context_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip4_sv_reass_inline (
+ vm, node, frame, false /* is_feature */, false /* is_output_feature */,
+ true /* is_custom */, true /* with_custom_context */);
}
#ifndef CLIB_MARCH_VARIANT
@@ -1159,6 +1269,8 @@ ip4_sv_reass_init_function (vlib_main_t * vm)
rm->fq_index = vlib_frame_queue_main_init (ip4_sv_reass_node.index, 0);
rm->fq_feature_index =
vlib_frame_queue_main_init (ip4_sv_reass_node_feature.index, 0);
+ rm->fq_custom_context_index =
+ vlib_frame_queue_main_init (ip4_sv_reass_custom_context_node.index, 0);
rm->feature_use_refcount_per_intf = NULL;
rm->output_feature_use_refcount_per_intf = NULL;
@@ -1170,8 +1282,9 @@ VLIB_INIT_FUNCTION (ip4_sv_reass_init_function);
#endif /* CLIB_MARCH_VARIANT */
static uword
-ip4_sv_reass_walk_expired (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * f)
+ip4_sv_reass_walk_expired (vlib_main_t *vm,
+ CLIB_UNUSED (vlib_node_runtime_t *node),
+ CLIB_UNUSED (vlib_frame_t *f))
{
ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
uword event_type, *event_data = 0;
@@ -1186,10 +1299,11 @@ ip4_sv_reass_walk_expired (vlib_main_t * vm,
switch (event_type)
{
- case ~0: /* no events => timeout */
- /* nothing to do here */
- break;
+ case ~0:
+ /* no events => timeout */
+ /* fallthrough */
case IP4_EVENT_CONFIG_CHANGED:
+ /* nothing to do here */
break;
default:
clib_warning ("BUG: event type 0x%wx", event_type);
@@ -1209,7 +1323,6 @@ ip4_sv_reass_walk_expired (vlib_main_t * vm,
clib_spinlock_lock (&rt->lock);
vec_reset_length (pool_indexes_to_free);
- /* *INDENT-OFF* */
pool_foreach_index (index, rt->pool) {
reass = pool_elt_at_index (rt->pool, index);
if (now > reass->last_heard + rm->timeout)
@@ -1217,15 +1330,12 @@ ip4_sv_reass_walk_expired (vlib_main_t * vm,
vec_add1 (pool_indexes_to_free, index);
}
}
- /* *INDENT-ON* */
int *i;
- /* *INDENT-OFF* */
vec_foreach (i, pool_indexes_to_free)
{
ip4_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
ip4_sv_reass_free (vm, rm, rt, reass);
}
- /* *INDENT-ON* */
clib_spinlock_unlock (&rt->lock);
}
@@ -1233,33 +1343,29 @@ ip4_sv_reass_walk_expired (vlib_main_t * vm,
vec_free (pool_indexes_to_free);
if (event_data)
{
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
}
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_expire_node) = {
- .function = ip4_sv_reass_walk_expired,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "ip4-sv-reassembly-expire-walk",
- .format_trace = format_ip4_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip4_sv_reass_error_strings),
- .error_strings = ip4_sv_reass_error_strings,
-
+ .function = ip4_sv_reass_walk_expired,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "ip4-sv-reassembly-expire-walk",
+ .format_trace = format_ip4_sv_reass_trace,
+ .n_errors = IP4_N_ERROR,
+ .error_counters = ip4_error_counters,
};
-/* *INDENT-ON* */
static u8 *
format_ip4_sv_reass_key (u8 * s, va_list * args)
{
ip4_sv_reass_key_t *key = va_arg (*args, ip4_sv_reass_key_t *);
s =
- format (s,
- "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
- key->xx_id, format_ip4_address, &key->src, format_ip4_address,
+ format (s, "fib_index: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
+ key->fib_index, format_ip4_address, &key->src, format_ip4_address,
&key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
return s;
}
@@ -1318,11 +1424,9 @@ show_ip4_reass (vlib_main_t * vm,
clib_spinlock_lock (&rt->lock);
if (details)
{
- /* *INDENT-OFF* */
pool_foreach (reass, rt->pool) {
vlib_cli_output (vm, "%U", format_ip4_sv_reass, vm, reass);
}
- /* *INDENT-ON* */
}
sum_reass_n += rt->reass_n;
clib_spinlock_unlock (&rt->lock);
@@ -1346,13 +1450,11 @@ show_ip4_reass (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip4_sv_reass_cmd, static) = {
.path = "show ip4-sv-reassembly",
.short_help = "show ip4-sv-reassembly [details]",
.function = show_ip4_reass,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
vnet_api_error_t
@@ -1403,25 +1505,30 @@ format_ip4_sv_reass_handoff_trace (u8 * s, va_list * args)
}
always_inline uword
-ip4_sv_reass_handoff_node_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature)
+ip4_sv_reass_handoff_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool is_custom_context)
{
ip4_sv_reass_main_t *rm = &ip4_sv_reass_main;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
- u32 n_enq, n_left_from, *from;
+ u32 n_enq, n_left_from, *from, *context;
u16 thread_indices[VLIB_FRAME_SIZE], *ti;
u32 fq_index;
from = vlib_frame_vector_args (frame);
+ if (is_custom_context)
+ context = vlib_frame_aux_args (frame);
+
n_left_from = frame->n_vectors;
vlib_get_buffers (vm, from, bufs, n_left_from);
b = bufs;
ti = thread_indices;
- fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index;
+ fq_index = (is_feature) ? rm->fq_feature_index :
+ (is_custom_context ? rm->fq_custom_context_index :
+ rm->fq_index);
while (n_left_from > 0)
{
@@ -1440,8 +1547,12 @@ ip4_sv_reass_handoff_node_inline (vlib_main_t * vm,
ti += 1;
b += 1;
}
- n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
- thread_indices, frame->n_vectors, 1);
+ if (is_custom_context)
+ n_enq = vlib_buffer_enqueue_to_thread_with_aux (
+ vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1);
+ else
+ n_enq = vlib_buffer_enqueue_to_thread (
+ vm, node, fq_index, from, thread_indices, frame->n_vectors, 1);
if (n_enq < frame->n_vectors)
vlib_node_increment_counter (vm, node->node_index,
@@ -1454,12 +1565,11 @@ VLIB_NODE_FN (ip4_sv_reass_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_handoff_node_inline (vm, node, frame,
- false /* is_feature */ );
+ return ip4_sv_reass_handoff_node_inline (
+ vm, node, frame, false /* is_feature */, false /* is_custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = {
.name = "ip4-sv-reassembly-handoff",
.vector_size = sizeof (u32),
@@ -1473,22 +1583,39 @@ VLIB_REGISTER_NODE (ip4_sv_reass_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
+VLIB_NODE_FN (ip4_sv_reass_custom_context_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip4_sv_reass_handoff_node_inline (
+ vm, node, frame, false /* is_feature */, true /* is_custom_context */);
+}
+
+VLIB_REGISTER_NODE (ip4_sv_reass_custom_context_handoff_node) = {
+ .name = "ip4-sv-reassembly-custom-context-handoff",
+ .vector_size = sizeof (u32),
+ .aux_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip4_sv_reass_handoff_error_strings),
+ .error_strings = ip4_sv_reass_handoff_error_strings,
+ .format_trace = format_ip4_sv_reass_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
-/* *INDENT-OFF* */
VLIB_NODE_FN (ip4_sv_reass_feature_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t *
node,
vlib_frame_t * frame)
{
- return ip4_sv_reass_handoff_node_inline (vm, node, frame,
- true /* is_feature */ );
+ return ip4_sv_reass_handoff_node_inline (
+ vm, node, frame, true /* is_feature */, false /* is_custom_context */);
}
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = {
.name = "ip4-sv-reass-feature-hoff",
.vector_size = sizeof (u32),
@@ -1502,7 +1629,6 @@ VLIB_REGISTER_NODE (ip4_sv_reass_feature_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -1540,6 +1666,13 @@ ip4_sv_reass_custom_register_next_node (uword node_index)
node_index);
}
+uword
+ip4_sv_reass_custom_context_register_next_node (uword node_index)
+{
+ return vlib_node_add_next (
+ vlib_get_main (), ip4_sv_reass_custom_context_node.index, node_index);
+}
+
int
ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index,
int is_enable)
diff --git a/src/vnet/ip/reass/ip4_sv_reass.h b/src/vnet/ip/reass/ip4_sv_reass.h
index e926dbeebcc..3a684eb9809 100644
--- a/src/vnet/ip/reass/ip4_sv_reass.h
+++ b/src/vnet/ip/reass/ip4_sv_reass.h
@@ -49,6 +49,7 @@ int ip4_sv_reass_output_enable_disable_with_refcnt (u32 sw_if_index,
int is_enable);
uword ip4_sv_reass_custom_register_next_node (uword node_index);
+uword ip4_sv_reass_custom_context_register_next_node (uword node_index);
#endif /* __included_ip4_sv_reass_h__ */
diff --git a/src/vnet/ip/reass/ip6_full_reass.c b/src/vnet/ip/reass/ip6_full_reass.c
index 67505689bca..27647985877 100644
--- a/src/vnet/ip/reass/ip6_full_reass.c
+++ b/src/vnet/ip/reass/ip6_full_reass.c
@@ -25,10 +25,14 @@
#include <vnet/ip/ip.h>
#include <vppinfra/bihash_48_8.h>
#include <vnet/ip/reass/ip6_full_reass.h>
+#include <vnet/ip/ip6_inlines.h>
#define MSEC_PER_SEC 1000
-#define IP6_FULL_REASS_TIMEOUT_DEFAULT_MS 100
-#define IP6_FULL_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 10000 // 10 seconds default
+#define IP6_FULL_REASS_TIMEOUT_DEFAULT_MS 200
+/* As there are only 1024 reass context per thread, either the DDOS attacks
+ * or fractions of real timeouts, would consume these contexts quickly and
+ * running out context space and unable to perform reassembly */
+#define IP6_FULL_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS 50 // 50 ms default
#define IP6_FULL_REASS_MAX_REASSEMBLIES_DEFAULT 1024
#define IP6_FULL_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT 3
#define IP6_FULL_REASS_HT_LOAD_FACTOR (0.75)
@@ -40,6 +44,8 @@ typedef enum
IP6_FULL_REASS_RC_TOO_MANY_FRAGMENTS,
IP6_FULL_REASS_RC_NO_BUF,
IP6_FULL_REASS_RC_HANDOFF,
+ IP6_FULL_REASS_RC_INVALID_FRAG_LEN,
+ IP6_FULL_REASS_RC_OVERLAP,
} ip6_full_reass_rc_t;
typedef struct
@@ -132,6 +138,8 @@ typedef struct
ip6_full_reass_t *pool;
u32 reass_n;
u32 id_counter;
+ // for pacing the main thread timeouts
+ u32 last_id;
clib_spinlock_t lock;
} ip6_full_reass_per_thread_t;
@@ -155,17 +163,20 @@ typedef struct
// convenience
vlib_main_t *vlib_main;
- // node index of ip6-drop node
- u32 ip6_drop_idx;
u32 ip6_icmp_error_idx;
u32 ip6_full_reass_expire_node_idx;
/** Worker handoff */
u32 fq_index;
+ u32 fq_local_index;
u32 fq_feature_index;
+ u32 fq_custom_index;
// reference count for enabling/disabling feature - per interface
u32 *feature_use_refcount_per_intf;
+
+ // whether local fragmented packets are reassembled or not
+ int is_local_reass_enabled;
} ip6_full_reass_main_t;
extern ip6_full_reass_main_t ip6_full_reass_main;
@@ -185,13 +196,22 @@ typedef enum
typedef enum
{
+ NORMAL,
+ FEATURE,
+ CUSTOM
+} ip6_full_reass_node_type_t;
+
+typedef enum
+{
RANGE_NEW,
+ RANGE_DISCARD,
RANGE_OVERLAP,
ICMP_ERROR_RT_EXCEEDED,
ICMP_ERROR_FL_TOO_BIG,
ICMP_ERROR_FL_NOT_MULT_8,
FINALIZE,
HANDOFF,
+ PASSTHROUGH,
} ip6_full_reass_trace_operation_e;
typedef struct
@@ -278,6 +298,10 @@ format_ip6_full_reass_trace (u8 * s, va_list * args)
s = format (s, "\n%Unew %U", format_white_space, indent,
format_ip6_full_reass_range_trace, &t->trace_range);
break;
+ case RANGE_DISCARD:
+ s = format (s, "\n%Udiscard %U", format_white_space, indent,
+ format_ip6_full_reass_range_trace, &t->trace_range);
+ break;
case RANGE_OVERLAP:
s = format (s, "\n%Uoverlap %U", format_white_space, indent,
format_ip6_full_reass_range_trace, &t->trace_range);
@@ -304,13 +328,15 @@ format_ip6_full_reass_trace (u8 * s, va_list * args)
format (s, "handoff from thread #%u to thread #%u", t->thread_id,
t->thread_id_to);
break;
+ case PASSTHROUGH:
+ s = format (s, "passthrough - not a fragment");
+ break;
}
return s;
}
static void
ip6_full_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip6_full_reass_main_t * rm,
ip6_full_reass_t * reass, u32 bi,
ip6_frag_hdr_t * ip6_frag_header,
ip6_full_reass_trace_operation_e action,
@@ -397,59 +423,69 @@ ip6_full_reass_free (ip6_full_reass_main_t * rm,
ip6_full_reass_free_ctx (rt, reass);
}
+/* n_left_to_next, and to_next are taken as input params, as this function
+ * could be called from a graphnode, where its managing local copy of these
+ * variables, and ignoring those and still trying to enqueue the buffers
+ * with local variables would cause either buffer leak or corruption */
always_inline void
-ip6_full_reass_drop_all (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip6_full_reass_main_t * rm, ip6_full_reass_t * reass)
+ip6_full_reass_drop_all (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip6_full_reass_t *reass, u32 *n_left_to_next,
+ u32 **to_next)
{
u32 range_bi = reass->first_bi;
vlib_buffer_t *range_b;
vnet_buffer_opaque_t *range_vnb;
u32 *to_free = NULL;
+
while (~0 != range_bi)
{
range_b = vlib_get_buffer (vm, range_bi);
range_vnb = vnet_buffer (range_b);
- u32 bi = range_bi;
- while (~0 != bi)
+
+ if (~0 != range_bi)
{
- vec_add1 (to_free, bi);
- vlib_buffer_t *b = vlib_get_buffer (vm, bi);
- if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
- {
- bi = b->next_buffer;
- b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
- }
- else
- {
- bi = ~0;
- }
+ vec_add1 (to_free, range_bi);
}
range_bi = range_vnb->ip.reass.next_range_bi;
}
+
/* send to next_error_index */
- if (~0 != reass->error_next_index)
+ if (~0 != reass->error_next_index &&
+ reass->error_next_index < node->n_next_nodes)
{
- u32 n_left_to_next, *to_next, next_index;
+ u32 next_index;
next_index = reass->error_next_index;
u32 bi = ~0;
+ /* record number of packets sent to custom app */
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_TO_CUSTOM_APP,
+ vec_len (to_free));
+
while (vec_len (to_free) > 0)
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, *to_next,
+ (*n_left_to_next));
- while (vec_len (to_free) > 0 && n_left_to_next > 0)
+ while (vec_len (to_free) > 0 && (*n_left_to_next) > 0)
{
bi = vec_pop (to_free);
if (~0 != bi)
{
- to_next[0] = bi;
- to_next += 1;
- n_left_to_next -= 1;
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ip6_full_reass_add_trace (vm, node, reass, bi, NULL,
+ RANGE_DISCARD, ~0);
+ }
+ *to_next[0] = bi;
+ (*to_next) += 1;
+ (*n_left_to_next) -= 1;
}
}
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_put_next_frame (vm, node, next_index, (*n_left_to_next));
}
}
else
@@ -460,9 +496,65 @@ ip6_full_reass_drop_all (vlib_main_t * vm, vlib_node_runtime_t * node,
}
always_inline void
-ip6_full_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip6_full_reass_main_t * rm,
- ip6_full_reass_t * reass, u32 * icmp_bi)
+sanitize_reass_buffers_add_missing (vlib_main_t *vm, ip6_full_reass_t *reass,
+ u32 *bi0)
+{
+ u32 range_bi = reass->first_bi;
+ vlib_buffer_t *range_b;
+ vnet_buffer_opaque_t *range_vnb;
+
+ while (~0 != range_bi)
+ {
+ range_b = vlib_get_buffer (vm, range_bi);
+ range_vnb = vnet_buffer (range_b);
+ u32 bi = range_bi;
+ if (~0 != bi)
+ {
+ if (bi == *bi0)
+ *bi0 = ~0;
+ if (range_b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ u32 _bi = bi;
+ vlib_buffer_t *_b = vlib_get_buffer (vm, _bi);
+ while (_b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ if (_b->next_buffer != range_vnb->ip.reass.next_range_bi)
+ {
+ _bi = _b->next_buffer;
+ _b = vlib_get_buffer (vm, _bi);
+ }
+ else
+ {
+ _b->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+ break;
+ }
+ }
+ }
+ range_bi = range_vnb->ip.reass.next_range_bi;
+ }
+ }
+ if (*bi0 != ~0)
+ {
+ vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
+ vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
+ if (~0 != reass->first_bi)
+ {
+ fvnb->ip.reass.next_range_bi = reass->first_bi;
+ reass->first_bi = *bi0;
+ }
+ else
+ {
+ reass->first_bi = *bi0;
+ fvnb->ip.reass.next_range_bi = ~0;
+ }
+ *bi0 = ~0;
+ }
+}
+
+always_inline void
+ip6_full_reass_on_timeout (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip6_full_reass_t *reass, u32 *icmp_bi,
+ u32 *n_left_to_next, u32 **to_next)
{
if (~0 == reass->first_bi)
{
@@ -476,8 +568,8 @@ ip6_full_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node,
*icmp_bi = reass->first_bi;
if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
{
- ip6_full_reass_add_trace (vm, node, rm, reass, reass->first_bi,
- NULL, ICMP_ERROR_RT_EXCEEDED, ~0);
+ ip6_full_reass_add_trace (vm, node, reass, reass->first_bi, NULL,
+ ICMP_ERROR_RT_EXCEEDED, ~0);
}
// fragment with offset zero received - send icmp message back
if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
@@ -495,15 +587,16 @@ ip6_full_reass_on_timeout (vlib_main_t * vm, vlib_node_runtime_t * node,
0);
}
}
- ip6_full_reass_drop_all (vm, node, rm, reass);
+ ip6_full_reass_drop_all (vm, node, reass, n_left_to_next, to_next);
}
always_inline ip6_full_reass_t *
-ip6_full_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip6_full_reass_main_t * rm,
- ip6_full_reass_per_thread_t * rt,
- ip6_full_reass_kv_t * kv, u32 * icmp_bi,
- u8 * do_handoff)
+ip6_full_reass_find_or_create (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip6_full_reass_main_t *rm,
+ ip6_full_reass_per_thread_t *rt,
+ ip6_full_reass_kv_t *kv, u32 *icmp_bi,
+ u8 *do_handoff, int skip_bihash,
+ u32 *n_left_to_next, u32 **to_next)
{
ip6_full_reass_t *reass;
f64 now;
@@ -513,7 +606,7 @@ again:
reass = NULL;
now = vlib_time_now (vm);
- if (!clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv))
+ if (!skip_bihash && !clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv))
{
if (vm->thread_index != kv->v.memory_owner_thread_index)
{
@@ -528,7 +621,10 @@ again:
if (now > reass->last_heard + rm->timeout)
{
- ip6_full_reass_on_timeout (vm, node, rm, reass, icmp_bi);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_TIMEOUT, 1);
+ ip6_full_reass_on_timeout (vm, node, reass, icmp_bi, n_left_to_next,
+ to_next);
ip6_full_reass_free (rm, rt, reass);
reass = NULL;
}
@@ -556,27 +652,41 @@ again:
reass->data_len = 0;
reass->next_index = ~0;
reass->error_next_index = ~0;
+ reass->memory_owner_thread_index = vm->thread_index;
++rt->reass_n;
}
- reass->key.as_u64[0] = kv->kv.key[0];
- reass->key.as_u64[1] = kv->kv.key[1];
- reass->key.as_u64[2] = kv->kv.key[2];
- reass->key.as_u64[3] = kv->kv.key[3];
- reass->key.as_u64[4] = kv->kv.key[4];
- reass->key.as_u64[5] = kv->kv.key[5];
kv->v.reass_index = (reass - rt->pool);
kv->v.memory_owner_thread_index = vm->thread_index;
reass->last_heard = now;
- int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2);
- if (rv)
+ if (!skip_bihash)
{
- ip6_full_reass_free (rm, rt, reass);
- reass = NULL;
- // if other worker created a context already work with the other copy
- if (-2 == rv)
- goto again;
+ reass->key.as_u64[0] = kv->kv.key[0];
+ reass->key.as_u64[1] = kv->kv.key[1];
+ reass->key.as_u64[2] = kv->kv.key[2];
+ reass->key.as_u64[3] = kv->kv.key[3];
+ reass->key.as_u64[4] = kv->kv.key[4];
+ reass->key.as_u64[5] = kv->kv.key[5];
+
+ int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2);
+ if (rv)
+ {
+ ip6_full_reass_free (rm, rt, reass);
+ reass = NULL;
+ // if other worker created a context already work with the other copy
+ if (-2 == rv)
+ goto again;
+ }
+ }
+ else
+ {
+ reass->key.as_u64[0] = ~0;
+ reass->key.as_u64[1] = ~0;
+ reass->key.as_u64[2] = ~0;
+ reass->key.as_u64[3] = ~0;
+ reass->key.as_u64[4] = ~0;
+ reass->key.as_u64[5] = ~0;
}
return reass;
@@ -595,8 +705,6 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_t *last_b = NULL;
u32 sub_chain_bi = reass->first_bi;
u32 total_length = 0;
- u32 buf_cnt = 0;
- u32 dropped_cnt = 0;
u32 *vec_drop_compress = NULL;
ip6_full_reass_rc_t rv = IP6_FULL_REASS_RC_OK;
do
@@ -638,19 +746,18 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_length_in_chain (vm, tmp) - trim_front - trim_end;
while (1)
{
- ++buf_cnt;
if (trim_front)
{
if (trim_front > tmp->current_length)
{
/* drop whole buffer */
- vec_add1 (vec_drop_compress, tmp_bi);
- trim_front -= tmp->current_length;
if (!(tmp->flags & VLIB_BUFFER_NEXT_PRESENT))
{
rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
goto free_buffers_and_return;
}
+ trim_front -= tmp->current_length;
+ vec_add1 (vec_drop_compress, tmp_bi);
tmp->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
tmp_bi = tmp->next_buffer;
tmp = vlib_get_buffer (vm, tmp_bi);
@@ -688,13 +795,12 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
}
else
{
- vec_add1 (vec_drop_compress, tmp_bi);
if (reass->first_bi == tmp_bi)
{
rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
goto free_buffers_and_return;
}
- ++dropped_cnt;
+ vec_add1 (vec_drop_compress, tmp_bi);
}
if (tmp->flags & VLIB_BUFFER_NEXT_PRESENT)
{
@@ -731,19 +837,27 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_buffer_opaque_t *first_b_vnb = vnet_buffer (first_b);
ip6_header_t *ip = vlib_buffer_get_current (first_b);
u16 ip6_frag_hdr_offset = first_b_vnb->ip.reass.ip6_frag_hdr_offset;
- ip6_ext_header_t *prev_hdr;
- frag_hdr =
- ip6_ext_header_find (vm, first_b, ip, IP_PROTOCOL_IPV6_FRAGMENTATION,
- &prev_hdr);
- if (prev_hdr)
+ ip6_ext_hdr_chain_t hdr_chain;
+ ip6_ext_header_t *prev_hdr = 0;
+ int res = ip6_ext_header_walk (first_b, ip, IP_PROTOCOL_IPV6_FRAGMENTATION,
+ &hdr_chain);
+ if (res < 0 ||
+ (hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION))
{
+ rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
+ goto free_buffers_and_return;
+ }
+ frag_hdr = ip6_ext_next_header_offset (ip, hdr_chain.eh[res].offset);
+ if (res > 0)
+ {
+ prev_hdr = ip6_ext_next_header_offset (ip, hdr_chain.eh[res - 1].offset);
prev_hdr->next_hdr = frag_hdr->next_hdr;
}
else
{
ip->protocol = frag_hdr->next_hdr;
}
- if (!((u8 *) frag_hdr - (u8 *) ip == ip6_frag_hdr_offset))
+ if (hdr_chain.eh[res].offset != ip6_frag_hdr_offset)
{
rv = IP6_FULL_REASS_RC_INTERNAL_ERROR;
goto free_buffers_and_return;
@@ -763,7 +877,7 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
first_b->flags &= ~VLIB_BUFFER_EXT_HDR_VALID;
if (PREDICT_FALSE (first_b->flags & VLIB_BUFFER_IS_TRACED))
{
- ip6_full_reass_add_trace (vm, node, rm, reass, reass->first_bi, NULL,
+ ip6_full_reass_add_trace (vm, node, reass, reass->first_bi, NULL,
FINALIZE, ~0);
#if 0
// following code does a hexdump of packet fragments to stdout ...
@@ -801,6 +915,15 @@ ip6_full_reass_finalize (vlib_main_t * vm, vlib_node_runtime_t * node,
*next0 = reass->next_index;
}
vnet_buffer (first_b)->ip.reass.estimated_mtu = reass->min_fragment_length;
+ /* Keep track of number of successfully reassembled packets and number of
+ * fragments reassembled */
+ vlib_node_increment_counter (vm, node->node_index, IP6_ERROR_REASS_SUCCESS,
+ 1);
+
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_FRAGMENTS_REASSEMBLED,
+ reass->fragments_n);
+
ip6_full_reass_free (rm, rt, reass);
reass = NULL;
free_buffers_and_return:
@@ -811,8 +934,6 @@ free_buffers_and_return:
always_inline void
ip6_full_reass_insert_range_in_chain (vlib_main_t * vm,
- ip6_full_reass_main_t * rm,
- ip6_full_reass_per_thread_t * rt,
ip6_full_reass_t * reass,
u32 prev_range_bi, u32 new_next_bi)
{
@@ -838,12 +959,13 @@ ip6_full_reass_insert_range_in_chain (vlib_main_t * vm,
}
always_inline ip6_full_reass_rc_t
-ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip6_full_reass_main_t * rm,
- ip6_full_reass_per_thread_t * rt,
- ip6_full_reass_t * reass, u32 * bi0, u32 * next0,
- u32 * error0, ip6_frag_hdr_t * frag_hdr,
- bool is_custom_app, u32 * handoff_thread_idx)
+ip6_full_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip6_full_reass_main_t *rm,
+ ip6_full_reass_per_thread_t *rt,
+ ip6_full_reass_t *reass, u32 *bi0, u32 *next0,
+ u32 *error0, ip6_frag_hdr_t *frag_hdr,
+ bool is_custom_app, u32 *handoff_thread_idx,
+ int skip_bihash)
{
int consumed = 0;
vlib_buffer_t *fb = vlib_get_buffer (vm, *bi0);
@@ -869,6 +991,10 @@ ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 fragment_length =
vlib_buffer_length_in_chain (vm, fb) -
(fvnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr));
+ if (0 == fragment_length)
+ {
+ return IP6_FULL_REASS_RC_INVALID_FRAG_LEN;
+ }
u32 fragment_last = fvnb->ip.reass.fragment_last =
fragment_first + fragment_length - 1;
int more_fragments = ip6_frag_hdr_more (frag_hdr);
@@ -884,8 +1010,7 @@ ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
if (~0 == reass->first_bi)
{
// starting a new reassembly
- ip6_full_reass_insert_range_in_chain (vm, rm, rt, reass, prev_range_bi,
- *bi0);
+ ip6_full_reass_insert_range_in_chain (vm, reass, prev_range_bi, *bi0);
reass->min_fragment_length = clib_net_to_host_u16 (fip->payload_length);
consumed = 1;
reass->fragments_n = 1;
@@ -907,8 +1032,8 @@ ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
~0 == candidate_range_bi)
{
// special case - this fragment falls beyond all known ranges
- ip6_full_reass_insert_range_in_chain (vm, rm, rt, reass,
- prev_range_bi, *bi0);
+ ip6_full_reass_insert_range_in_chain (vm, reass, prev_range_bi,
+ *bi0);
consumed = 1;
break;
}
@@ -917,8 +1042,8 @@ ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
if (fragment_last < candidate_vnb->ip.reass.range_first)
{
// this fragment ends before candidate range without any overlap
- ip6_full_reass_insert_range_in_chain (vm, rm, rt, reass,
- prev_range_bi, *bi0);
+ ip6_full_reass_insert_range_in_chain (vm, reass, prev_range_bi,
+ *bi0);
consumed = 1;
}
else if (fragment_first == candidate_vnb->ip.reass.range_first &&
@@ -931,14 +1056,10 @@ ip6_full_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
// overlapping fragment - not allowed by RFC 8200
if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
{
- ip6_full_reass_add_trace (vm, node, rm, reass, *bi0, frag_hdr,
+ ip6_full_reass_add_trace (vm, node, reass, *bi0, frag_hdr,
RANGE_OVERLAP, ~0);
}
- ip6_full_reass_drop_all (vm, node, rm, reass);
- ip6_full_reass_free (rm, rt, reass);
- *next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
- *error0 = IP6_ERROR_REASS_OVERLAPPING_FRAGMENT;
- return IP6_FULL_REASS_RC_OK;
+ return IP6_FULL_REASS_RC_OVERLAP;
}
break;
}
@@ -948,10 +1069,16 @@ check_if_done_maybe:
{
if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
{
- ip6_full_reass_add_trace (vm, node, rm, reass, *bi0, frag_hdr,
- RANGE_NEW, ~0);
+ ip6_full_reass_add_trace (vm, node, reass, *bi0, frag_hdr, RANGE_NEW,
+ ~0);
}
}
+ else if (skip_bihash)
+ {
+ // if this reassembly is not in bihash, then the packet must have been
+ // consumed
+ return IP6_FULL_REASS_RC_INTERNAL_ERROR;
+ }
if (~0 != reass->last_packet_octet &&
reass->data_len == reass->last_packet_octet + 1)
{
@@ -969,6 +1096,12 @@ check_if_done_maybe:
}
else
{
+ if (skip_bihash)
+ {
+ // if this reassembly is not in bihash, it should've been an atomic
+ // fragment and thus finalized
+ return IP6_FULL_REASS_RC_INTERNAL_ERROR;
+ }
if (consumed)
{
*bi0 = ~0;
@@ -987,32 +1120,28 @@ check_if_done_maybe:
}
always_inline bool
-ip6_full_reass_verify_upper_layer_present (vlib_node_runtime_t * node,
- vlib_buffer_t * b,
- ip6_frag_hdr_t * frag_hdr)
+ip6_full_reass_verify_upper_layer_present (vlib_node_runtime_t *node,
+ vlib_buffer_t *b,
+ ip6_ext_hdr_chain_t *hc)
{
- ip6_ext_header_t *tmp = (ip6_ext_header_t *) frag_hdr;
- while (ip6_ext_hdr (tmp->next_hdr))
+ int nh = hc->eh[hc->length - 1].protocol;
+ /* Checking to see if it's a terminating header */
+ if (ip6_ext_hdr (nh))
{
- tmp = ip6_ext_next_header (tmp);
- }
- if (IP_PROTOCOL_IP6_NONXT == tmp->next_hdr)
- {
- icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
- ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain,
- 0);
+ icmp6_error_set_vnet_buffer (
+ b, ICMP6_parameter_problem,
+ ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain, 0);
b->error = node->errors[IP6_ERROR_REASS_MISSING_UPPER];
-
return false;
}
return true;
}
always_inline bool
-ip6_full_reass_verify_fragment_multiple_8 (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_buffer_t * b,
- ip6_frag_hdr_t * frag_hdr)
+ip6_full_reass_verify_fragment_multiple_8 (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_buffer_t *b,
+ ip6_frag_hdr_t *frag_hdr)
{
vnet_buffer_opaque_t *vnb = vnet_buffer (b);
ip6_header_t *ip = vlib_buffer_get_current (b);
@@ -1025,16 +1154,17 @@ ip6_full_reass_verify_fragment_multiple_8 (vlib_main_t * vm,
icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
ICMP6_parameter_problem_erroneous_header_field,
(u8 *) & ip->payload_length - (u8 *) ip);
+ b->error = node->errors[IP6_ERROR_REASS_INVALID_FRAG_SIZE];
return false;
}
return true;
}
always_inline bool
-ip6_full_reass_verify_packet_size_lt_64k (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_buffer_t * b,
- ip6_frag_hdr_t * frag_hdr)
+ip6_full_reass_verify_packet_size_lt_64k (vlib_main_t *vm,
+ vlib_node_runtime_t *node,
+ vlib_buffer_t *b,
+ ip6_frag_hdr_t *frag_hdr)
{
vnet_buffer_opaque_t *vnb = vnet_buffer (b);
u32 fragment_first = ip6_frag_hdr_offset_bytes (frag_hdr);
@@ -1048,16 +1178,16 @@ ip6_full_reass_verify_packet_size_lt_64k (vlib_main_t * vm,
ICMP6_parameter_problem_erroneous_header_field,
(u8 *) & frag_hdr->fragment_offset_and_more
- (u8 *) ip0);
+ b->error = node->errors[IP6_ERROR_REASS_INVALID_FRAG_SIZE];
return false;
}
return true;
}
always_inline uword
-ip6_full_reassembly_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature,
- bool is_custom_app)
+ip6_full_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool is_custom_app, bool is_local)
{
u32 *from = vlib_frame_vector_args (frame);
u32 n_left_from, n_left_to_next, *to_next, next_index;
@@ -1084,57 +1214,95 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
ip6_header_t *ip0 = vlib_buffer_get_current (b0);
ip6_frag_hdr_t *frag_hdr = NULL;
- ip6_ext_header_t *prev_hdr;
- if (ip6_ext_hdr (ip0->protocol))
+ ip6_ext_hdr_chain_t hdr_chain;
+ vnet_buffer_opaque_t *fvnb = vnet_buffer (b0);
+
+ int res = ip6_ext_header_walk (
+ b0, ip0, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain);
+ if (res < 0 ||
+ hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION)
{
- frag_hdr =
- ip6_ext_header_find (vm, b0, ip0,
- IP_PROTOCOL_IPV6_FRAGMENTATION,
- &prev_hdr);
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_NO_FRAG_HDR, 1);
+ // this is a mangled packet - no fragmentation
+ next0 = is_custom_app ? fvnb->ip.reass.error_next_index :
+ IP6_FULL_REASSEMBLY_NEXT_DROP;
+ ip6_full_reass_add_trace (vm, node, NULL, bi0, NULL, PASSTHROUGH,
+ ~0);
+ goto skip_reass;
}
- if (!frag_hdr)
+ if (is_local && !rm->is_local_reass_enabled)
{
- // this is a regular packet - no fragmentation
- next0 = IP6_FULL_REASSEMBLY_NEXT_INPUT;
+ next0 = IP6_FULL_REASSEMBLY_NEXT_DROP;
goto skip_reass;
}
+
+ /* Keep track of received fragments */
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_FRAGMENTS_RCVD, 1);
+ frag_hdr =
+ ip6_ext_next_header_offset (ip0, hdr_chain.eh[res].offset);
vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset =
- (u8 *) frag_hdr - (u8 *) ip0;
+ hdr_chain.eh[res].offset;
if (0 == ip6_frag_hdr_offset (frag_hdr))
{
// first fragment - verify upper-layer is present
- if (!ip6_full_reass_verify_upper_layer_present
- (node, b0, frag_hdr))
+ if (!ip6_full_reass_verify_upper_layer_present (node, b0,
+ &hdr_chain))
{
- next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
+ next0 = is_custom_app ? fvnb->ip.reass.error_next_index :
+ IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
goto skip_reass;
}
}
- if (!ip6_full_reass_verify_fragment_multiple_8
- (vm, node, b0, frag_hdr)
- || !ip6_full_reass_verify_packet_size_lt_64k (vm, node, b0,
- frag_hdr))
+
+ if (!ip6_full_reass_verify_fragment_multiple_8 (vm, node, b0,
+ frag_hdr) ||
+ !ip6_full_reass_verify_packet_size_lt_64k (vm, node, b0,
+ frag_hdr))
{
- next0 = IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
+ next0 = is_custom_app ? fvnb->ip.reass.error_next_index :
+ IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR;
goto skip_reass;
}
+
+ int skip_bihash = 0;
ip6_full_reass_kv_t kv;
u8 do_handoff = 0;
- kv.k.as_u64[0] = ip0->src_address.as_u64[0];
- kv.k.as_u64[1] = ip0->src_address.as_u64[1];
- kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
- kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
- kv.k.as_u64[4] =
- ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
- vnet_buffer (b0)->sw_if_index[VLIB_RX])) << 32 |
- (u64) frag_hdr->identification;
- kv.k.as_u64[5] = ip0->protocol;
+ if (0 == ip6_frag_hdr_offset (frag_hdr) &&
+ !ip6_frag_hdr_more (frag_hdr))
+ {
+ // this is atomic fragment and needs to be processed separately
+ skip_bihash = 1;
+ }
+ else
+ {
+ u32 fib_index =
+ (vnet_buffer (b0)->sw_if_index[VLIB_TX] == (u32) ~0) ?
+ vec_elt (ip6_main.fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]) :
+ vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ kv.k.as_u64[0] = ip0->src_address.as_u64[0];
+ kv.k.as_u64[1] = ip0->src_address.as_u64[1];
+ kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
+ kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
+ kv.k.as_u64[4] =
+ ((u64) fib_index) << 32 | (u64) frag_hdr->identification;
+ /* RFC 8200: The Next Header values in the Fragment headers of
+ * different fragments of the same original packet may differ.
+ * Only the value from the Offset zero fragment packet is used
+ * for reassembly.
+ *
+ * Also, IPv6 Header doesnt contain the protocol value unlike
+ * IPv4.*/
+ kv.k.as_u64[5] = 0;
+ }
- ip6_full_reass_t *reass =
- ip6_full_reass_find_or_create (vm, node, rm, rt, &kv, &icmp_bi,
- &do_handoff);
+ ip6_full_reass_t *reass = ip6_full_reass_find_or_create (
+ vm, node, rm, rt, &kv, &icmp_bi, &do_handoff, skip_bihash,
+ &n_left_to_next, &to_next);
if (reass)
{
@@ -1153,9 +1321,10 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
else if (reass)
{
u32 handoff_thread_idx;
- switch (ip6_full_reass_update
- (vm, node, rm, rt, reass, &bi0, &next0, &error0,
- frag_hdr, is_custom_app, &handoff_thread_idx))
+ u32 counter = ~0;
+ switch (ip6_full_reass_update (
+ vm, node, rm, rt, reass, &bi0, &next0, &error0, frag_hdr,
+ is_custom_app, &handoff_thread_idx, skip_bihash))
{
case IP6_FULL_REASS_RC_OK:
/* nothing to do here */
@@ -1167,25 +1336,36 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
handoff_thread_idx;
break;
case IP6_FULL_REASS_RC_TOO_MANY_FRAGMENTS:
- vlib_node_increment_counter (vm, node->node_index,
- IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
- 1);
- ip6_full_reass_drop_all (vm, node, rm, reass);
- ip6_full_reass_free (rm, rt, reass);
- goto next_packet;
+ counter = IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
break;
case IP6_FULL_REASS_RC_NO_BUF:
- vlib_node_increment_counter (vm, node->node_index,
- IP6_ERROR_REASS_NO_BUF, 1);
- ip6_full_reass_drop_all (vm, node, rm, reass);
- ip6_full_reass_free (rm, rt, reass);
- goto next_packet;
+ counter = IP6_ERROR_REASS_NO_BUF;
+ break;
+ case IP6_FULL_REASS_RC_INVALID_FRAG_LEN:
+ counter = IP6_ERROR_REASS_INVALID_FRAG_LEN;
+ break;
+ case IP6_FULL_REASS_RC_OVERLAP:
+ counter = IP6_ERROR_REASS_OVERLAPPING_FRAGMENT;
break;
case IP6_FULL_REASS_RC_INTERNAL_ERROR:
- vlib_node_increment_counter (vm, node->node_index,
- IP6_ERROR_REASS_INTERNAL_ERROR,
+ counter = IP6_ERROR_REASS_INTERNAL_ERROR;
+ /* Sanitization is needed in internal error cases only, as
+ * the incoming packet is already dropped in other cases,
+ * also adding bi0 back to the reassembly list, fixes the
+ * leaking of buffers during internal errors.
+ *
+ * Also it doesnt make sense to send these buffers custom
+ * app, these fragments are with internal errors */
+ sanitize_reass_buffers_add_missing (vm, reass, &bi0);
+ reass->error_next_index = ~0;
+ break;
+ }
+ if (~0 != counter)
+ {
+ vlib_node_increment_counter (vm, node->node_index, counter,
1);
- ip6_full_reass_drop_all (vm, node, rm, reass);
+ ip6_full_reass_drop_all (vm, node, reass, &n_left_to_next,
+ &to_next);
ip6_full_reass_free (rm, rt, reass);
goto next_packet;
break;
@@ -1199,7 +1379,6 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
}
else
{
- vnet_buffer_opaque_t *fvnb = vnet_buffer (b0);
next0 = fvnb->ip.reass.error_next_index;
}
error0 = IP6_ERROR_REASS_LIMIT_REACHED;
@@ -1223,16 +1402,24 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
{
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip6_full_reass_add_trace (vm, node, rm, NULL, bi0,
- frag_hdr, HANDOFF,
- vnet_buffer (b0)->ip.
- reass.owner_thread_index);
+ ip6_full_reass_add_trace (
+ vm, node, NULL, bi0, frag_hdr, HANDOFF,
+ vnet_buffer (b0)->ip.reass.owner_thread_index);
}
}
else if (is_feature && IP6_ERROR_NONE == error0)
{
vnet_feature_next (&next0, b0);
}
+
+ /* Increment the counter to-custom-app also as this fragment is
+ * also going to application */
+ if (is_custom_app)
+ {
+ vlib_node_increment_counter (
+ vm, node->node_index, IP6_ERROR_REASS_TO_CUSTOM_APP, 1);
+ }
+
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
}
@@ -1259,27 +1446,21 @@ ip6_full_reassembly_inline (vlib_main_t * vm,
return frame->n_vectors;
}
-static char *ip6_full_reassembly_error_strings[] = {
-#define _(sym, string) string,
- foreach_ip6_error
-#undef _
-};
-
VLIB_NODE_FN (ip6_full_reass_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */ ,
- false /* is_custom_app */ );
+ return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */,
+ false /* is_custom_app */,
+ false /* is_local */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_full_reass_node) = {
.name = "ip6-full-reassembly",
.vector_size = sizeof (u32),
.format_trace = format_ip6_full_reass_trace,
- .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
- .error_strings = ip6_full_reassembly_error_strings,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -1289,23 +1470,46 @@ VLIB_REGISTER_NODE (ip6_full_reass_node) = {
[IP6_FULL_REASSEMBLY_NEXT_HANDOFF] = "ip6-full-reassembly-handoff",
},
};
-/* *INDENT-ON* */
+
+VLIB_NODE_FN (ip6_local_full_reass_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */,
+ false /* is_custom_app */,
+ true /* is_local */);
+}
+
+VLIB_REGISTER_NODE (ip6_local_full_reass_node) = {
+ .name = "ip6-local-full-reassembly",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_full_reass_trace,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
+ .n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
+ .next_nodes =
+ {
+ [IP6_FULL_REASSEMBLY_NEXT_INPUT] = "ip6-input",
+ [IP6_FULL_REASSEMBLY_NEXT_DROP] = "ip6-drop",
+ [IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
+ [IP6_FULL_REASSEMBLY_NEXT_HANDOFF] = "ip6-local-full-reassembly-handoff",
+ },
+};
VLIB_NODE_FN (ip6_full_reass_node_feature) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_full_reassembly_inline (vm, node, frame, true /* is_feature */ ,
- false /* is_custom_app */ );
+ return ip6_full_reassembly_inline (vm, node, frame, true /* is_feature */,
+ false /* is_custom_app */,
+ false /* is_local */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_full_reass_node_feature) = {
.name = "ip6-full-reassembly-feature",
.vector_size = sizeof (u32),
.format_trace = format_ip6_full_reass_trace,
- .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
- .error_strings = ip6_full_reassembly_error_strings,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -1315,9 +1519,7 @@ VLIB_REGISTER_NODE (ip6_full_reass_node_feature) = {
[IP6_FULL_REASSEMBLY_NEXT_HANDOFF] = "ip6-full-reass-feature-hoff",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip6_full_reassembly_feature, static) = {
.arc_name = "ip6-unicast",
.node_name = "ip6-full-reassembly-feature",
@@ -1325,7 +1527,30 @@ VNET_FEATURE_INIT (ip6_full_reassembly_feature, static) = {
"ipsec6-input-feature"),
.runs_after = 0,
};
-/* *INDENT-ON* */
+
+VLIB_NODE_FN (ip6_full_reass_node_custom)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_full_reassembly_inline (vm, node, frame, false /* is_feature */,
+ true /* is_custom_app */,
+ false /* is_local */);
+}
+
+VLIB_REGISTER_NODE (ip6_full_reass_node_custom) = {
+ .name = "ip6-full-reassembly-custom",
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_full_reass_trace,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
+ .n_next_nodes = IP6_FULL_REASSEMBLY_N_NEXT,
+ .next_nodes =
+ {
+ [IP6_FULL_REASSEMBLY_NEXT_INPUT] = "ip6-input",
+ [IP6_FULL_REASSEMBLY_NEXT_DROP] = "ip6-drop",
+ [IP6_FULL_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
+ [IP6_FULL_REASSEMBLY_NEXT_HANDOFF] = "ip6-full-reass-custom-hoff",
+ },
+};
#ifndef CLIB_MARCH_VARIANT
static u32
@@ -1335,7 +1560,9 @@ ip6_full_reass_get_nbuckets ()
u32 nbuckets;
u8 i;
- nbuckets = (u32) (rm->max_reass_n / IP6_FULL_REASS_HT_LOAD_FACTOR);
+ /* need more mem with more workers */
+ nbuckets = (u32) (rm->max_reass_n * (vlib_num_workers () + 1) /
+ IP6_FULL_REASS_HT_LOAD_FACTOR);
for (i = 0; i < 31; i++)
if ((1 << i) >= nbuckets)
@@ -1462,9 +1689,6 @@ ip6_full_reass_init_function (vlib_main_t * vm)
clib_bihash_init_48_8 (&rm->hash, "ip6-full-reass", nbuckets,
nbuckets * 1024);
- node = vlib_get_node_by_name (vm, (u8 *) "ip6-drop");
- ASSERT (node);
- rm->ip6_drop_idx = node->index;
node = vlib_get_node_by_name (vm, (u8 *) "ip6-icmp-error");
ASSERT (node);
rm->ip6_icmp_error_idx = node->index;
@@ -1472,11 +1696,16 @@ ip6_full_reass_init_function (vlib_main_t * vm)
if ((error = vlib_call_init_function (vm, ip_main_init)))
return error;
ip6_register_protocol (IP_PROTOCOL_IPV6_FRAGMENTATION,
- ip6_full_reass_node.index);
+ ip6_local_full_reass_node.index);
+ rm->is_local_reass_enabled = 1;
rm->fq_index = vlib_frame_queue_main_init (ip6_full_reass_node.index, 0);
+ rm->fq_local_index =
+ vlib_frame_queue_main_init (ip6_local_full_reass_node.index, 0);
rm->fq_feature_index =
vlib_frame_queue_main_init (ip6_full_reass_node_feature.index, 0);
+ rm->fq_custom_index =
+ vlib_frame_queue_main_init (ip6_full_reass_node_custom.index, 0);
rm->feature_use_refcount_per_intf = NULL;
return error;
@@ -1486,8 +1715,8 @@ VLIB_INIT_FUNCTION (ip6_full_reass_init_function);
#endif /* CLIB_MARCH_VARIANT */
static uword
-ip6_full_reass_walk_expired (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * f)
+ip6_full_reass_walk_expired (vlib_main_t *vm, vlib_node_runtime_t *node,
+ CLIB_UNUSED (vlib_frame_t *f))
{
ip6_full_reass_main_t *rm = &ip6_full_reass_main;
uword event_type, *event_data = 0;
@@ -1501,10 +1730,11 @@ ip6_full_reass_walk_expired (vlib_main_t * vm,
switch (event_type)
{
- case ~0: /* no events => timeout */
- /* nothing to do here */
- break;
+ case ~0:
+ /* no events => timeout */
+ /* fallthrough */
case IP6_EVENT_CONFIG_CHANGED:
+ /* nothing to do here */
break;
default:
clib_warning ("BUG: event type 0x%wx", event_type);
@@ -1519,37 +1749,64 @@ ip6_full_reass_walk_expired (vlib_main_t * vm,
int index;
const uword nthreads = vlib_num_workers () + 1;
u32 *vec_icmp_bi = NULL;
+ u32 n_left_to_next, *to_next;
+
for (thread_index = 0; thread_index < nthreads; ++thread_index)
{
ip6_full_reass_per_thread_t *rt =
&rm->per_thread_data[thread_index];
+ u32 reass_timeout_cnt = 0;
clib_spinlock_lock (&rt->lock);
vec_reset_length (pool_indexes_to_free);
- /* *INDENT-OFF* */
- pool_foreach_index (index, rt->pool) {
- reass = pool_elt_at_index (rt->pool, index);
- if (now > reass->last_heard + rm->timeout)
- {
- vec_add1 (pool_indexes_to_free, index);
- }
- }
- /* *INDENT-ON* */
+ /* Pace the number of timeouts handled per thread,to avoid barrier
+ * sync issues in real world scenarios */
+
+ u32 beg = rt->last_id;
+ /* to ensure we walk at least once per sec per context */
+ u32 end = beg + (IP6_FULL_REASS_MAX_REASSEMBLIES_DEFAULT *
+ IP6_FULL_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS /
+ MSEC_PER_SEC +
+ 1);
+ if (end > vec_len (rt->pool))
+ {
+ end = vec_len (rt->pool);
+ rt->last_id = 0;
+ }
+ else
+ {
+ rt->last_id = end;
+ }
+
+ pool_foreach_stepping_index (index, beg, end, rt->pool)
+ {
+ reass = pool_elt_at_index (rt->pool, index);
+ if (now > reass->last_heard + rm->timeout)
+ {
+ vec_add1 (pool_indexes_to_free, index);
+ }
+ }
+
int *i;
- /* *INDENT-OFF* */
vec_foreach (i, pool_indexes_to_free)
{
ip6_full_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
u32 icmp_bi = ~0;
- ip6_full_reass_on_timeout (vm, node, rm, reass, &icmp_bi);
- if (~0 != icmp_bi)
- vec_add1 (vec_icmp_bi, icmp_bi);
- ip6_full_reass_free (rm, rt, reass);
- }
- /* *INDENT-ON* */
+ reass_timeout_cnt += reass->fragments_n;
+ ip6_full_reass_on_timeout (vm, node, reass, &icmp_bi,
+ &n_left_to_next, &to_next);
+ if (~0 != icmp_bi)
+ vec_add1 (vec_icmp_bi, icmp_bi);
+
+ ip6_full_reass_free (rm, rt, reass);
+ }
clib_spinlock_unlock (&rt->lock);
+ if (reass_timeout_cnt)
+ vlib_node_increment_counter (vm, node->node_index,
+ IP6_ERROR_REASS_TIMEOUT,
+ reass_timeout_cnt);
}
while (vec_len (vec_icmp_bi) > 0)
@@ -1565,7 +1822,6 @@ ip6_full_reass_walk_expired (vlib_main_t * vm,
vlib_buffer_t *b = vlib_get_buffer (vm, bi);
if (PREDICT_FALSE (b->flags & VLIB_BUFFER_IS_TRACED))
trace_frame = 1;
- b->error = node->errors[IP6_ERROR_REASS_TIMEOUT];
to_next[0] = bi;
++f->n_vectors;
to_next += 1;
@@ -1579,25 +1835,22 @@ ip6_full_reass_walk_expired (vlib_main_t * vm,
vec_free (vec_icmp_bi);
if (event_data)
{
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
}
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_full_reass_expire_node) = {
- .function = ip6_full_reass_walk_expired,
- .format_trace = format_ip6_full_reass_trace,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "ip6-full-reassembly-expire-walk",
-
- .n_errors = ARRAY_LEN (ip6_full_reassembly_error_strings),
- .error_strings = ip6_full_reassembly_error_strings,
+ .function = ip6_full_reass_walk_expired,
+ .format_trace = format_ip6_full_reass_trace,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "ip6-full-reassembly-expire-walk",
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
};
-/* *INDENT-ON* */
static u8 *
format_ip6_full_reass_key (u8 * s, va_list * args)
@@ -1671,11 +1924,9 @@ show_ip6_full_reass (vlib_main_t * vm, unformat_input_t * input,
clib_spinlock_lock (&rt->lock);
if (details)
{
- /* *INDENT-OFF* */
pool_foreach (reass, rt->pool) {
vlib_cli_output (vm, "%U", format_ip6_full_reass, vm, reass);
}
- /* *INDENT-ON* */
}
sum_reass_n += rt->reass_n;
clib_spinlock_unlock (&rt->lock);
@@ -1701,13 +1952,11 @@ show_ip6_full_reass (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip6_full_reassembly_cmd, static) = {
.path = "show ip6-full-reassembly",
.short_help = "show ip6-full-reassembly [details]",
.function = show_ip6_full_reass,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
vnet_api_error_t
@@ -1758,9 +2007,10 @@ format_ip6_full_reassembly_handoff_trace (u8 * s, va_list * args)
}
always_inline uword
-ip6_full_reassembly_handoff_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature)
+ip6_full_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame,
+ ip6_full_reass_node_type_t type,
+ bool is_local)
{
ip6_full_reass_main_t *rm = &ip6_full_reass_main;
@@ -1776,8 +2026,28 @@ ip6_full_reassembly_handoff_inline (vlib_main_t * vm,
b = bufs;
ti = thread_indices;
- fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index;
-
+ switch (type)
+ {
+ case NORMAL:
+ if (is_local)
+ {
+ fq_index = rm->fq_local_index;
+ }
+ else
+ {
+ fq_index = rm->fq_index;
+ }
+ break;
+ case FEATURE:
+ fq_index = rm->fq_feature_index;
+ break;
+ case CUSTOM:
+ fq_index = rm->fq_custom_index;
+ break;
+ default:
+ clib_warning ("Unexpected `type' (%d)!", type);
+ ASSERT (0);
+ }
while (n_left_from > 0)
{
ti[0] = vnet_buffer (b[0])->ip.reass.owner_thread_index;
@@ -1809,11 +2079,10 @@ VLIB_NODE_FN (ip6_full_reassembly_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_full_reassembly_handoff_inline (vm, node, frame,
- false /* is_feature */ );
+ return ip6_full_reassembly_handoff_inline (vm, node, frame, NORMAL,
+ false /* is_local */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_full_reassembly_handoff_node) = {
.name = "ip6-full-reassembly-handoff",
.vector_size = sizeof (u32),
@@ -1828,15 +2097,34 @@ VLIB_REGISTER_NODE (ip6_full_reassembly_handoff_node) = {
},
};
+VLIB_NODE_FN (ip6_local_full_reassembly_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_full_reassembly_handoff_inline (vm, node, frame, NORMAL,
+ true /* is_feature */);
+}
+
+VLIB_REGISTER_NODE (ip6_local_full_reassembly_handoff_node) = {
+ .name = "ip6-local-full-reassembly-handoff",
+ .vector_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip6_full_reassembly_handoff_error_strings),
+ .error_strings = ip6_full_reassembly_handoff_error_strings,
+ .format_trace = format_ip6_full_reassembly_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
VLIB_NODE_FN (ip6_full_reassembly_feature_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
{
- return ip6_full_reassembly_handoff_inline (vm, node, frame, true /* is_feature */ );
+ return ip6_full_reassembly_handoff_inline (vm, node, frame, FEATURE,
+ false /* is_local */);
}
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_full_reassembly_feature_handoff_node) = {
.name = "ip6-full-reass-feature-hoff",
.vector_size = sizeof (u32),
@@ -1850,7 +2138,27 @@ VLIB_REGISTER_NODE (ip6_full_reassembly_feature_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
+
+VLIB_NODE_FN (ip6_full_reassembly_custom_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_full_reassembly_handoff_inline (vm, node, frame, CUSTOM,
+ false /* is_local */);
+}
+
+VLIB_REGISTER_NODE (ip6_full_reassembly_custom_handoff_node) = {
+ .name = "ip6-full-reass-custom-hoff",
+ .vector_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip6_full_reassembly_handoff_error_strings),
+ .error_strings = ip6_full_reassembly_handoff_error_strings,
+ .format_trace = format_ip6_full_reassembly_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
#ifndef CLIB_MARCH_VARIANT
int
@@ -1877,8 +2185,37 @@ ip6_full_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
"ip6-full-reassembly-feature",
sw_if_index, 0, 0, 0);
}
- return -1;
+ return 0;
}
+
+void
+ip6_local_full_reass_enable_disable (int enable)
+{
+ if (enable)
+ {
+ if (!ip6_full_reass_main.is_local_reass_enabled)
+ {
+ ip6_full_reass_main.is_local_reass_enabled = 1;
+ ip6_register_protocol (IP_PROTOCOL_IPV6_FRAGMENTATION,
+ ip6_local_full_reass_node.index);
+ }
+ }
+ else
+ {
+ if (ip6_full_reass_main.is_local_reass_enabled)
+ {
+ ip6_full_reass_main.is_local_reass_enabled = 0;
+ ip6_unregister_protocol (IP_PROTOCOL_IPV6_FRAGMENTATION);
+ }
+ }
+}
+
+int
+ip6_local_full_reass_enabled ()
+{
+ return ip6_full_reass_main.is_local_reass_enabled;
+}
+
#endif
/*
diff --git a/src/vnet/ip/reass/ip6_full_reass.h b/src/vnet/ip/reass/ip6_full_reass.h
index 546075b04b4..f66cb67d796 100644
--- a/src/vnet/ip/reass/ip6_full_reass.h
+++ b/src/vnet/ip/reass/ip6_full_reass.h
@@ -46,6 +46,8 @@ vnet_api_error_t ip6_full_reass_enable_disable (u32 sw_if_index,
int ip6_full_reass_enable_disable_with_refcnt (u32 sw_if_index,
int is_enable);
+void ip6_local_full_reass_enable_disable (int enable);
+int ip6_local_full_reass_enabled ();
#endif /* __included_ip6_full_reass_h */
/*
diff --git a/src/vnet/ip/reass/ip6_sv_reass.c b/src/vnet/ip/reass/ip6_sv_reass.c
index d5218a4fb1d..fe2ed05555c 100644
--- a/src/vnet/ip/reass/ip6_sv_reass.c
+++ b/src/vnet/ip/reass/ip6_sv_reass.c
@@ -26,6 +26,7 @@
#include <vnet/ip/ip6_to_ip4.h>
#include <vppinfra/bihash_48_8.h>
#include <vnet/ip/reass/ip6_sv_reass.h>
+#include <vnet/ip/ip6_inlines.h>
#define MSEC_PER_SEC 1000
#define IP6_SV_REASS_TIMEOUT_DEFAULT_MS 100
@@ -40,6 +41,7 @@ typedef enum
IP6_SV_REASS_RC_TOO_MANY_FRAGMENTS,
IP6_SV_REASS_RC_INTERNAL_ERROR,
IP6_SV_REASS_RC_UNSUPP_IP_PROTO,
+ IP6_SV_REASS_RC_INVALID_FRAG_LEN,
} ip6_sv_reass_rc_t;
typedef struct
@@ -50,7 +52,7 @@ typedef struct
{
ip6_address_t src;
ip6_address_t dst;
- u32 xx_id;
+ u32 fib_index;
u32 frag_id;
u8 unused[7];
u8 proto;
@@ -148,6 +150,7 @@ typedef struct
/** Worker handoff */
u32 fq_index;
u32 fq_feature_index;
+ u32 fq_custom_context_index;
// reference count for enabling/disabling feature - per interface
u32 *feature_use_refcount_per_intf;
@@ -214,7 +217,7 @@ format_ip6_sv_reass_trace (u8 * s, va_list * args)
clib_net_to_host_u16 (t->l4_dst_port));
break;
case REASS_PASSTHROUGH:
- s = format (s, "[not-fragmented]");
+ s = format (s, "[not fragmented or atomic fragment]");
break;
}
return s;
@@ -222,7 +225,6 @@ format_ip6_sv_reass_trace (u8 * s, va_list * args)
static void
ip6_sv_reass_add_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip6_sv_reass_main_t * rm,
ip6_sv_reass_t * reass, u32 bi,
ip6_sv_reass_trace_operation_e action,
u32 ip_proto, u16 l4_src_port, u16 l4_dst_port)
@@ -303,15 +305,15 @@ ip6_sv_reass_init (ip6_sv_reass_t * reass)
}
always_inline ip6_sv_reass_t *
-ip6_sv_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip6_sv_reass_main_t * rm,
- ip6_sv_reass_per_thread_t * rt,
- ip6_sv_reass_kv_t * kv, u32 * icmp_bi,
- u8 * do_handoff)
+ip6_sv_reass_find_or_create (vlib_main_t *vm, ip6_sv_reass_main_t *rm,
+ ip6_sv_reass_per_thread_t *rt,
+ ip6_sv_reass_kv_t *kv, u8 *do_handoff)
{
ip6_sv_reass_t *reass = NULL;
f64 now = vlib_time_now (vm);
+again:
+
if (!clib_bihash_search_48_8 (&rm->hash, &kv->kv, &kv->kv))
{
if (vm->thread_index != kv->v.thread_index)
@@ -371,20 +373,23 @@ ip6_sv_reass_find_or_create (vlib_main_t * vm, vlib_node_runtime_t * node,
kv->v.thread_index = vm->thread_index;
reass->last_heard = now;
- if (clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 1))
+ int rv = clib_bihash_add_del_48_8 (&rm->hash, &kv->kv, 2);
+ if (rv)
{
ip6_sv_reass_free (vm, rm, rt, reass);
reass = NULL;
+ // if other worker created a context already work with the other copy
+ if (-2 == rv)
+ goto again;
}
return reass;
}
always_inline ip6_sv_reass_rc_t
-ip6_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
- ip6_sv_reass_main_t * rm, ip6_sv_reass_per_thread_t * rt,
- ip6_sv_reass_t * reass, u32 bi0,
- ip6_frag_hdr_t * frag_hdr)
+ip6_sv_reass_update (vlib_main_t *vm, vlib_node_runtime_t *node,
+ ip6_sv_reass_main_t *rm, ip6_sv_reass_t *reass, u32 bi0,
+ ip6_frag_hdr_t *frag_hdr)
{
vlib_buffer_t *fb = vlib_get_buffer (vm, bi0);
vnet_buffer_opaque_t *fvnb = vnet_buffer (fb);
@@ -403,6 +408,10 @@ ip6_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 fragment_length =
vlib_buffer_length_in_chain (vm, fb) -
(fvnb->ip.reass.ip6_frag_hdr_offset + sizeof (*frag_hdr));
+ if (0 == fragment_length)
+ {
+ return IP6_SV_REASS_RC_INVALID_FRAG_LEN;
+ }
u32 fragment_last = fvnb->ip.reass.fragment_last =
fragment_first + fragment_length - 1;
fvnb->ip.reass.range_first = fragment_first;
@@ -421,7 +430,7 @@ ip6_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip6_sv_reass_add_trace (vm, node, rm, reass, bi0, REASS_FINISH,
+ ip6_sv_reass_add_trace (vm, node, reass, bi0, REASS_FINISH,
reass->ip_proto, reass->l4_src_port,
reass->l4_dst_port);
}
@@ -431,9 +440,9 @@ ip6_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
{
if (PREDICT_FALSE (fb->flags & VLIB_BUFFER_IS_TRACED))
{
- ip6_sv_reass_add_trace (vm, node, rm, reass, bi0,
- REASS_FRAGMENT_CACHE, reass->ip_proto,
- reass->l4_src_port, reass->l4_dst_port);
+ ip6_sv_reass_add_trace (vm, node, reass, bi0, REASS_FRAGMENT_CACHE,
+ reass->ip_proto, reass->l4_src_port,
+ reass->l4_dst_port);
}
if (vec_len (reass->cached_buffers) > rm->max_reass_len)
{
@@ -444,22 +453,18 @@ ip6_sv_reass_update (vlib_main_t * vm, vlib_node_runtime_t * node,
}
always_inline bool
-ip6_sv_reass_verify_upper_layer_present (vlib_node_runtime_t * node,
- vlib_buffer_t * b,
- ip6_frag_hdr_t * frag_hdr)
+ip6_sv_reass_verify_upper_layer_present (vlib_node_runtime_t *node,
+ vlib_buffer_t *b,
+ ip6_ext_hdr_chain_t *hc)
{
- ip6_ext_header_t *tmp = (ip6_ext_header_t *) frag_hdr;
- while (ip6_ext_hdr (tmp->next_hdr))
+ int nh = hc->eh[hc->length - 1].protocol;
+ /* Checking to see if it's a terminating header */
+ if (ip6_ext_hdr (nh))
{
- tmp = ip6_ext_next_header (tmp);
- }
- if (IP_PROTOCOL_IP6_NONXT == tmp->next_hdr)
- {
- icmp6_error_set_vnet_buffer (b, ICMP6_parameter_problem,
- ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain,
- 0);
+ icmp6_error_set_vnet_buffer (
+ b, ICMP6_parameter_problem,
+ ICMP6_parameter_problem_first_fragment_has_incomplete_header_chain, 0);
b->error = node->errors[IP6_ERROR_REASS_MISSING_UPPER];
-
return false;
}
return true;
@@ -467,7 +472,6 @@ ip6_sv_reass_verify_upper_layer_present (vlib_node_runtime_t * node,
always_inline bool
ip6_sv_reass_verify_fragment_multiple_8 (vlib_main_t * vm,
- vlib_node_runtime_t * node,
vlib_buffer_t * b,
ip6_frag_hdr_t * frag_hdr)
{
@@ -489,7 +493,6 @@ ip6_sv_reass_verify_fragment_multiple_8 (vlib_main_t * vm,
always_inline bool
ip6_sv_reass_verify_packet_size_lt_64k (vlib_main_t * vm,
- vlib_node_runtime_t * node,
vlib_buffer_t * b,
ip6_frag_hdr_t * frag_hdr)
{
@@ -511,14 +514,18 @@ ip6_sv_reass_verify_packet_size_lt_64k (vlib_main_t * vm,
}
always_inline uword
-ip6_sv_reassembly_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature)
+ip6_sv_reassembly_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool custom_next, bool custom_context)
{
u32 *from = vlib_frame_vector_args (frame);
- u32 n_left_from, n_left_to_next, *to_next, next_index;
+ u32 n_left_from, n_left_to_next, *to_next, *to_next_aux, next_index;
ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
ip6_sv_reass_per_thread_t *rt = &rm->per_thread_data[vm->thread_index];
+ u32 *context;
+ if (custom_context)
+ context = vlib_frame_aux_args (frame);
+
clib_spinlock_lock (&rt->lock);
n_left_from = frame->n_vectors;
@@ -526,7 +533,11 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
while (n_left_from > 0)
{
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ if (custom_context)
+ vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next,
+ to_next_aux, n_left_to_next);
+ else
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
while (n_left_from > 0 && n_left_to_next > 0)
{
@@ -534,24 +545,31 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
vlib_buffer_t *b0;
u32 next0 = IP6_SV_REASSEMBLY_NEXT_DROP;
u32 error0 = IP6_ERROR_NONE;
- u32 icmp_bi = ~0;
-
+ u8 forward_context = 0;
bi0 = from[0];
b0 = vlib_get_buffer (vm, bi0);
ip6_header_t *ip0 = vlib_buffer_get_current (b0);
- ip6_frag_hdr_t *frag_hdr = NULL;
- ip6_ext_header_t *prev_hdr;
- if (ip6_ext_hdr (ip0->protocol))
+ ip6_frag_hdr_t *frag_hdr;
+ ip6_ext_hdr_chain_t hdr_chain;
+ bool is_atomic_fragment = false;
+
+ int res = ip6_ext_header_walk (
+ b0, ip0, IP_PROTOCOL_IPV6_FRAGMENTATION, &hdr_chain);
+ if (res >= 0 &&
+ hdr_chain.eh[res].protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
{
frag_hdr =
- ip6_ext_header_find (vm, b0, ip0,
- IP_PROTOCOL_IPV6_FRAGMENTATION,
- &prev_hdr);
+ ip6_ext_next_header_offset (ip0, hdr_chain.eh[res].offset);
+ is_atomic_fragment = (0 == ip6_frag_hdr_offset (frag_hdr) &&
+ !ip6_frag_hdr_more (frag_hdr));
}
- if (!frag_hdr)
+
+ if (res < 0 ||
+ hdr_chain.eh[res].protocol != IP_PROTOCOL_IPV6_FRAGMENTATION ||
+ is_atomic_fragment)
{
- // this is a regular packet - no fragmentation
+ // this is a regular unfragmented packet or an atomic fragment
if (!ip6_get_port
(vm, b0, ip0, b0->current_length,
&(vnet_buffer (b0)->ip.reass.ip_proto),
@@ -567,35 +585,34 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
goto packet_enqueue;
}
vnet_buffer (b0)->ip.reass.is_non_first_fragment = 0;
- next0 = IP6_SV_REASSEMBLY_NEXT_INPUT;
+ next0 = custom_next ? vnet_buffer (b0)->ip.reass.next_index :
+ IP6_SV_REASSEMBLY_NEXT_INPUT;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip6_sv_reass_add_trace (vm, node, rm, NULL, bi0,
- REASS_PASSTHROUGH,
- vnet_buffer (b0)->ip.reass.ip_proto,
- vnet_buffer (b0)->ip.
- reass.l4_src_port,
- vnet_buffer (b0)->ip.
- reass.l4_dst_port);
+ ip6_sv_reass_add_trace (
+ vm, node, NULL, bi0, REASS_PASSTHROUGH,
+ vnet_buffer (b0)->ip.reass.ip_proto,
+ vnet_buffer (b0)->ip.reass.l4_src_port,
+ vnet_buffer (b0)->ip.reass.l4_dst_port);
}
goto packet_enqueue;
}
+
vnet_buffer (b0)->ip.reass.ip6_frag_hdr_offset =
- (u8 *) frag_hdr - (u8 *) ip0;
+ hdr_chain.eh[res].offset;
+
if (0 == ip6_frag_hdr_offset (frag_hdr))
{
// first fragment - verify upper-layer is present
- if (!ip6_sv_reass_verify_upper_layer_present
- (node, b0, frag_hdr))
+ if (!ip6_sv_reass_verify_upper_layer_present (node, b0,
+ &hdr_chain))
{
next0 = IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR;
goto packet_enqueue;
}
}
- if (!ip6_sv_reass_verify_fragment_multiple_8
- (vm, node, b0, frag_hdr)
- || !ip6_sv_reass_verify_packet_size_lt_64k (vm, node, b0,
- frag_hdr))
+ if (!ip6_sv_reass_verify_fragment_multiple_8 (vm, b0, frag_hdr) ||
+ !ip6_sv_reass_verify_packet_size_lt_64k (vm, b0, frag_hdr))
{
next0 = IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR;
goto packet_enqueue;
@@ -608,21 +625,27 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
kv.k.as_u64[1] = ip0->src_address.as_u64[1];
kv.k.as_u64[2] = ip0->dst_address.as_u64[0];
kv.k.as_u64[3] = ip0->dst_address.as_u64[1];
- kv.k.as_u64[4] =
- ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
- vnet_buffer (b0)->sw_if_index[VLIB_RX])) << 32 |
- (u64) frag_hdr->identification;
+ if (custom_context)
+ kv.k.as_u64[4] =
+ (u64) *context << 32 | (u64) frag_hdr->identification;
+ else
+ kv.k.as_u64[4] =
+ ((u64) vec_elt (ip6_main.fib_index_by_sw_if_index,
+ vnet_buffer (b0)->sw_if_index[VLIB_RX]))
+ << 32 |
+ (u64) frag_hdr->identification;
kv.k.as_u64[5] = ip0->protocol;
ip6_sv_reass_t *reass =
- ip6_sv_reass_find_or_create (vm, node, rm, rt, &kv, &icmp_bi,
- &do_handoff);
+ ip6_sv_reass_find_or_create (vm, rm, rt, &kv, &do_handoff);
if (PREDICT_FALSE (do_handoff))
{
next0 = IP6_SV_REASSEMBLY_NEXT_HANDOFF;
vnet_buffer (b0)->ip.reass.owner_thread_index =
kv.v.thread_index;
+ if (custom_context)
+ forward_context = 1;
goto packet_enqueue;
}
@@ -647,44 +670,41 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
reass->tcp_seq_number;
vnet_buffer (b0)->ip.reass.l4_src_port = reass->l4_src_port;
vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
- next0 = IP6_SV_REASSEMBLY_NEXT_INPUT;
+ next0 = custom_next ? vnet_buffer (b0)->ip.reass.next_index :
+ IP6_SV_REASSEMBLY_NEXT_INPUT;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip6_sv_reass_add_trace (vm, node, rm, reass, bi0,
- REASS_FRAGMENT_FORWARD,
- reass->ip_proto,
- reass->l4_src_port,
- reass->l4_dst_port);
+ ip6_sv_reass_add_trace (
+ vm, node, reass, bi0, REASS_FRAGMENT_FORWARD,
+ reass->ip_proto, reass->l4_src_port, reass->l4_dst_port);
}
goto packet_enqueue;
}
- switch (ip6_sv_reass_update
- (vm, node, rm, rt, reass, bi0, frag_hdr))
+ u32 counter = ~0;
+ switch (ip6_sv_reass_update (vm, node, rm, reass, bi0, frag_hdr))
{
case IP6_SV_REASS_RC_OK:
/* nothing to do here */
break;
case IP6_SV_REASS_RC_TOO_MANY_FRAGMENTS:
- vlib_node_increment_counter (vm, node->node_index,
- IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG,
- 1);
- ip6_sv_reass_free (vm, rm, rt, reass);
- goto next_packet;
+ counter = IP6_ERROR_REASS_FRAGMENT_CHAIN_TOO_LONG;
break;
case IP6_SV_REASS_RC_UNSUPP_IP_PROTO:
- vlib_node_increment_counter (vm, node->node_index,
- IP6_ERROR_REASS_UNSUPP_IP_PROTO,
- 1);
- ip6_sv_reass_free (vm, rm, rt, reass);
- goto next_packet;
+ counter = IP6_ERROR_REASS_UNSUPP_IP_PROTO;
break;
case IP6_SV_REASS_RC_INTERNAL_ERROR:
- vlib_node_increment_counter (vm, node->node_index,
- IP6_ERROR_REASS_INTERNAL_ERROR, 1);
+ counter = IP6_ERROR_REASS_INTERNAL_ERROR;
+ break;
+ case IP6_SV_REASS_RC_INVALID_FRAG_LEN:
+ counter = IP6_ERROR_REASS_INVALID_FRAG_LEN;
+ break;
+ }
+ if (~0 != counter)
+ {
+ vlib_node_increment_counter (vm, node->node_index, counter, 1);
ip6_sv_reass_free (vm, rm, rt, reass);
goto next_packet;
- break;
}
if (reass->is_complete)
@@ -724,17 +744,16 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
vnet_buffer (b0)->ip.reass.l4_dst_port = reass->l4_dst_port;
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
- ip6_sv_reass_add_trace (vm, node, rm, reass, bi0,
- REASS_FRAGMENT_FORWARD,
- reass->ip_proto,
- reass->l4_src_port,
- reass->l4_dst_port);
+ ip6_sv_reass_add_trace (
+ vm, node, reass, bi0, REASS_FRAGMENT_FORWARD,
+ reass->ip_proto, reass->l4_src_port, reass->l4_dst_port);
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
to_next, n_left_to_next, bi0,
next0);
}
- _vec_len (reass->cached_buffers) = 0; // buffers are owned by frame now
+ vec_set_len (reass->cached_buffers,
+ 0); // buffers are owned by frame now
}
goto next_packet;
@@ -747,22 +766,25 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
b0 = vlib_get_buffer (vm, bi0);
vnet_feature_next (&next0, b0);
}
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
-
- if (~0 != icmp_bi)
+ if (custom_context && forward_context)
{
- next0 = IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR;
- to_next[0] = icmp_bi;
- to_next += 1;
- n_left_to_next -= 1;
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, icmp_bi,
- next0);
+ if (to_next_aux)
+ {
+ to_next_aux[0] = *context;
+ to_next_aux += 1;
+ }
+ vlib_validate_buffer_enqueue_with_aux_x1 (
+ vm, node, next_index, to_next, to_next_aux, n_left_to_next,
+ bi0, *context, next0);
}
+ else
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
next_packet:
from += 1;
+ if (custom_context)
+ context += 1;
n_left_from -= 1;
}
@@ -773,26 +795,21 @@ ip6_sv_reassembly_inline (vlib_main_t * vm,
return frame->n_vectors;
}
-static char *ip6_sv_reassembly_error_strings[] = {
-#define _(sym, string) string,
- foreach_ip6_error
-#undef _
-};
-
VLIB_NODE_FN (ip6_sv_reass_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */ );
+ return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */,
+ false /* custom next */,
+ false /* custom context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reass_node) = {
.name = "ip6-sv-reassembly",
.vector_size = sizeof (u32),
.format_trace = format_ip6_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip6_sv_reassembly_error_strings),
- .error_strings = ip6_sv_reassembly_error_strings,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -802,22 +819,22 @@ VLIB_REGISTER_NODE (ip6_sv_reass_node) = {
[IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reassembly-handoff",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_sv_reass_node_feature) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_sv_reassembly_inline (vm, node, frame, true /* is_feature */ );
+ return ip6_sv_reassembly_inline (vm, node, frame, true /* is_feature */,
+ false /* custom next */,
+ false /* custom context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = {
.name = "ip6-sv-reassembly-feature",
.vector_size = sizeof (u32),
.format_trace = format_ip6_sv_reass_trace,
- .n_errors = ARRAY_LEN (ip6_sv_reassembly_error_strings),
- .error_strings = ip6_sv_reassembly_error_strings,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
.n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT,
.next_nodes =
{
@@ -827,16 +844,38 @@ VLIB_REGISTER_NODE (ip6_sv_reass_node_feature) = {
[IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reass-feature-hoff",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip6_sv_reassembly_feature) = {
.arc_name = "ip6-unicast",
.node_name = "ip6-sv-reassembly-feature",
.runs_before = VNET_FEATURES ("ip6-lookup"),
.runs_after = 0,
};
-/* *INDENT-ON* */
+
+VLIB_NODE_FN (ip6_sv_reass_custom_context_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_sv_reassembly_inline (vm, node, frame, false /* is_feature */,
+ true /* custom next */,
+ true /* custom context */);
+}
+
+VLIB_REGISTER_NODE (ip6_sv_reass_custom_context_node) = {
+ .name = "ip6-sv-reassembly-custom-context",
+ .vector_size = sizeof (u32),
+ .aux_size = sizeof (u32),
+ .format_trace = format_ip6_sv_reass_trace,
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
+ .n_next_nodes = IP6_SV_REASSEMBLY_N_NEXT,
+ .next_nodes =
+ {
+ [IP6_SV_REASSEMBLY_NEXT_INPUT] = "ip6-input",
+ [IP6_SV_REASSEMBLY_NEXT_DROP] = "ip6-drop",
+ [IP6_SV_REASSEMBLY_NEXT_ICMP_ERROR] = "ip6-icmp-error",
+ [IP6_SV_REASSEMBLY_NEXT_HANDOFF] = "ip6-sv-reassembly-custom-context-handoff",
+ },
+};
#ifndef CLIB_MARCH_VARIANT
static u32
@@ -987,6 +1026,8 @@ ip6_sv_reass_init_function (vlib_main_t * vm)
rm->fq_index = vlib_frame_queue_main_init (ip6_sv_reass_node.index, 0);
rm->fq_feature_index =
vlib_frame_queue_main_init (ip6_sv_reass_node_feature.index, 0);
+ rm->fq_custom_context_index =
+ vlib_frame_queue_main_init (ip6_sv_reass_custom_context_node.index, 0);
rm->feature_use_refcount_per_intf = NULL;
@@ -997,8 +1038,9 @@ VLIB_INIT_FUNCTION (ip6_sv_reass_init_function);
#endif /* CLIB_MARCH_VARIANT */
static uword
-ip6_sv_reass_walk_expired (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * f)
+ip6_sv_reass_walk_expired (vlib_main_t *vm,
+ CLIB_UNUSED (vlib_node_runtime_t *node),
+ CLIB_UNUSED (vlib_frame_t *f))
{
ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
uword event_type, *event_data = 0;
@@ -1012,10 +1054,11 @@ ip6_sv_reass_walk_expired (vlib_main_t * vm,
switch (event_type)
{
- case ~0: /* no events => timeout */
- /* nothing to do here */
- break;
+ case ~0:
+ /* no events => timeout */
+ /* fallthrough */
case IP6_EVENT_CONFIG_CHANGED:
+ /* nothing to do here */
break;
default:
clib_warning ("BUG: event type 0x%wx", event_type);
@@ -1035,7 +1078,6 @@ ip6_sv_reass_walk_expired (vlib_main_t * vm,
clib_spinlock_lock (&rt->lock);
vec_reset_length (pool_indexes_to_free);
- /* *INDENT-OFF* */
pool_foreach_index (index, rt->pool) {
reass = pool_elt_at_index (rt->pool, index);
if (now > reass->last_heard + rm->timeout)
@@ -1043,15 +1085,12 @@ ip6_sv_reass_walk_expired (vlib_main_t * vm,
vec_add1 (pool_indexes_to_free, index);
}
}
- /* *INDENT-ON* */
int *i;
- /* *INDENT-OFF* */
vec_foreach (i, pool_indexes_to_free)
{
ip6_sv_reass_t *reass = pool_elt_at_index (rt->pool, i[0]);
ip6_sv_reass_free (vm, rm, rt, reass);
}
- /* *INDENT-ON* */
clib_spinlock_unlock (&rt->lock);
}
@@ -1059,33 +1098,31 @@ ip6_sv_reass_walk_expired (vlib_main_t * vm,
vec_free (pool_indexes_to_free);
if (event_data)
{
- _vec_len (event_data) = 0;
+ vec_set_len (event_data, 0);
}
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reass_expire_node) = {
- .function = ip6_sv_reass_walk_expired,
- .format_trace = format_ip6_sv_reass_trace,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "ip6-sv-reassembly-expire-walk",
-
- .n_errors = ARRAY_LEN (ip6_sv_reassembly_error_strings),
- .error_strings = ip6_sv_reassembly_error_strings,
+ .function = ip6_sv_reass_walk_expired,
+ .format_trace = format_ip6_sv_reass_trace,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "ip6-sv-reassembly-expire-walk",
+ .n_errors = IP6_N_ERROR,
+ .error_counters = ip6_error_counters,
};
-/* *INDENT-ON* */
static u8 *
format_ip6_sv_reass_key (u8 * s, va_list * args)
{
ip6_sv_reass_key_t *key = va_arg (*args, ip6_sv_reass_key_t *);
- s = format (s, "xx_id: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
- key->xx_id, format_ip6_address, &key->src, format_ip6_address,
- &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
+ s =
+ format (s, "fib_index: %u, src: %U, dst: %U, frag_id: %u, proto: %u",
+ key->fib_index, format_ip6_address, &key->src, format_ip6_address,
+ &key->dst, clib_net_to_host_u16 (key->frag_id), key->proto);
return s;
}
@@ -1142,11 +1179,9 @@ show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input,
clib_spinlock_lock (&rt->lock);
if (details)
{
- /* *INDENT-OFF* */
pool_foreach (reass, rt->pool) {
vlib_cli_output (vm, "%U", format_ip6_sv_reass, vm, reass);
}
- /* *INDENT-ON* */
}
sum_reass_n += rt->reass_n;
clib_spinlock_unlock (&rt->lock);
@@ -1172,13 +1207,11 @@ show_ip6_sv_reass (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ip6_sv_reassembly_cmd, static) = {
.path = "show ip6-sv-reassembly",
.short_help = "show ip6-sv-reassembly [details]",
.function = show_ip6_sv_reass,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
vnet_api_error_t
@@ -1228,25 +1261,29 @@ format_ip6_sv_reassembly_handoff_trace (u8 * s, va_list * args)
}
always_inline uword
-ip6_sv_reassembly_handoff_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, bool is_feature)
+ip6_sv_reassembly_handoff_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, bool is_feature,
+ bool custom_context)
{
ip6_sv_reass_main_t *rm = &ip6_sv_reass_main;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
- u32 n_enq, n_left_from, *from;
+ u32 n_enq, n_left_from, *from, *context;
u16 thread_indices[VLIB_FRAME_SIZE], *ti;
u32 fq_index;
from = vlib_frame_vector_args (frame);
+ if (custom_context)
+ context = vlib_frame_aux_args (frame);
n_left_from = frame->n_vectors;
vlib_get_buffers (vm, from, bufs, n_left_from);
b = bufs;
ti = thread_indices;
- fq_index = (is_feature) ? rm->fq_feature_index : rm->fq_index;
+ fq_index = (is_feature) ?
+ rm->fq_feature_index :
+ (custom_context ? rm->fq_custom_context_index : rm->fq_index);
while (n_left_from > 0)
{
@@ -1265,8 +1302,12 @@ ip6_sv_reassembly_handoff_inline (vlib_main_t * vm,
ti += 1;
b += 1;
}
- n_enq = vlib_buffer_enqueue_to_thread (vm, node, fq_index, from,
- thread_indices, frame->n_vectors, 1);
+ if (custom_context)
+ n_enq = vlib_buffer_enqueue_to_thread_with_aux (
+ vm, node, fq_index, from, context, thread_indices, frame->n_vectors, 1);
+ else
+ n_enq = vlib_buffer_enqueue_to_thread (
+ vm, node, fq_index, from, thread_indices, frame->n_vectors, 1);
if (n_enq < frame->n_vectors)
vlib_node_increment_counter (vm, node->node_index,
@@ -1279,11 +1320,10 @@ VLIB_NODE_FN (ip6_sv_reassembly_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * frame)
{
- return ip6_sv_reassembly_handoff_inline (vm, node, frame,
- false /* is_feature */ );
+ return ip6_sv_reassembly_handoff_inline (
+ vm, node, frame, false /* is_feature */, false /* custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = {
.name = "ip6-sv-reassembly-handoff",
.vector_size = sizeof (u32),
@@ -1302,11 +1342,11 @@ VLIB_REGISTER_NODE (ip6_sv_reassembly_handoff_node) = {
VLIB_NODE_FN (ip6_sv_reassembly_feature_handoff_node) (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
{
- return ip6_sv_reassembly_handoff_inline (vm, node, frame, true /* is_feature */ );
+ return ip6_sv_reassembly_handoff_inline (
+ vm, node, frame, true /* is_feature */, false /* custom_context */);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = {
.name = "ip6-sv-reass-feature-hoff",
.vector_size = sizeof (u32),
@@ -1320,7 +1360,28 @@ VLIB_REGISTER_NODE (ip6_sv_reassembly_feature_handoff_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
+
+VLIB_NODE_FN (ip6_sv_reassembly_custom_context_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_sv_reassembly_handoff_inline (
+ vm, node, frame, false /* is_feature */, true /* custom_context */);
+}
+
+VLIB_REGISTER_NODE (ip6_sv_reassembly_custom_context_handoff_node) = {
+ .name = "ip6-sv-reassembly-custom-context-handoff",
+ .vector_size = sizeof (u32),
+ .aux_size = sizeof (u32),
+ .n_errors = ARRAY_LEN(ip6_sv_reassembly_handoff_error_strings),
+ .error_strings = ip6_sv_reassembly_handoff_error_strings,
+ .format_trace = format_ip6_sv_reassembly_handoff_trace,
+
+ .n_next_nodes = 1,
+
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
#ifndef CLIB_MARCH_VARIANT
int
@@ -1349,6 +1410,14 @@ ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable)
}
return 0;
}
+
+uword
+ip6_sv_reass_custom_context_register_next_node (uword node_index)
+{
+ return vlib_node_add_next (
+ vlib_get_main (), ip6_sv_reassembly_custom_context_handoff_node.index,
+ node_index);
+}
#endif
/*
diff --git a/src/vnet/ip/reass/ip6_sv_reass.h b/src/vnet/ip/reass/ip6_sv_reass.h
index 81ac2312bdf..7dc9df132dd 100644
--- a/src/vnet/ip/reass/ip6_sv_reass.h
+++ b/src/vnet/ip/reass/ip6_sv_reass.h
@@ -44,6 +44,7 @@ vnet_api_error_t ip6_sv_reass_enable_disable (u32 sw_if_index,
u8 enable_disable);
int ip6_sv_reass_enable_disable_with_refcnt (u32 sw_if_index, int is_enable);
+uword ip6_sv_reass_custom_context_register_next_node (uword node_index);
#endif /* __included_ip6_sv_reass_h */
diff --git a/src/vnet/ip/reass/reassembly.rst b/src/vnet/ip/reass/reassembly.rst
new file mode 100644
index 00000000000..49e0a8de6e6
--- /dev/null
+++ b/src/vnet/ip/reass/reassembly.rst
@@ -0,0 +1,221 @@
+.. _reassembly:
+
+IP Reassembly
+=============
+
+Some VPP functions need access to whole packet and/or stream
+classification based on L4 headers. Reassembly functionality allows
+both former and latter.
+
+Full reassembly vs shallow (virtual) reassembly
+-----------------------------------------------
+
+There are two kinds of reassembly available in VPP:
+
+1. Full reassembly changes a stream of packet fragments into one
+packet containing all data reassembled with fragment bits cleared
+and fragment header stripped (in case of ip6). Note that resulting
+packet may come out of reassembly as a buffer chain. Because it's
+impractical to parse headers which are split over multiple vnet
+buffers, vnet_buffer_chain_linearize() is called after reassembly so
+that L2/L3/L4 headers can be found in first buffer. Full reassembly
+is costly and shouldn't be used unless necessary. Full reassembly is by
+default enabled for both ipv4 and ipv6 "for us" traffic
+- that is packets aimed at VPP addresses. This can be disabled via API
+if desired, in which case "for us" fragments are dropped.
+
+2. Shallow (virtual) reassembly allows various classifying and/or
+translating features to work with fragments without having to
+understand fragmentation. It works by extracting L4 data and adding
+them to vnet_buffer for each packet/fragment passing through SVR
+nodes. This operation is performed for both fragments and regular
+packets, allowing consuming code to treat all packets in same way. SVR
+caches incoming packet fragments (buffers) until first fragment is
+seen. Then it extracts L4 data from that first fragment, fills it for
+any cached fragments and transmits them in the same order as they were
+received. From that point on, any other passing fragments get L4 data
+populated in vnet_buffer based on reassembly context.
+
+Multi-worker behaviour
+^^^^^^^^^^^^^^^^^^^^^^
+
+Both reassembly types deal with fragments arriving on different workers
+via handoff mechanism. All reassembly contexts are stored in pools.
+Bihash mapping 5-tuple key to a value containing pool index and thread
+index is used for lookups. When a lookup finds an existing reassembly on
+a different thread, it hands off the fragment to that thread. If lookup
+fails, a new reassembly context is created and current worker becomes
+owner of that context. Further fragments received on other worker
+threads are then handed off owner worker thread.
+
+Full reassembly also remembers thread index where first fragment (as in
+fragment with fragment offset 0) was seen and uses handoff mechanism to
+send the reassembled packet out on that thread even if pool owner is
+a different thread. This then requires an additional handoff to free
+reassembly context as only pool owner can do that in a thread-safe way.
+
+Limits
+^^^^^^
+
+Because reassembly could be an attack vector, there is a configurable
+limit on the number of concurrent reassemblies and also maximum
+fragments per packet.
+
+Custom applications
+^^^^^^^^^^^^^^^^^^^
+
+Both reassembly features allow to be used by custom application which
+are not part of VPP source tree. Be it patches or 3rd party plugins,
+they can build their own graph paths by using "-custom*" versions of
+nodes. Reassembly then reads next_index and error_next_index for each
+buffer from vnet_buffer, allowing custom application to steer
+both reassembled packets and any packets which are considered an error
+in a way the custom application requires.
+
+Full reassembly
+---------------
+
+Configuration
+^^^^^^^^^^^^^
+
+Configuration is via API (``ip_reassembly_enable_disable``) or CLI:
+
+``set interface reassembly <interface-name> [on|off|ip4|ip6]``
+
+here ``on`` means both ip4 and ip6.
+
+A show command is provided to see reassembly contexts:
+
+For ip4:
+
+``show ip4-full-reassembly [details]``
+
+For ip6:
+
+``show ip6-full-reassembly [details]``
+
+Global full reassembly parameters can be modified using API
+``ip_reassembly_set`` and retrieved using ``ip_reassembly_get``.
+
+Defaults
+""""""""
+
+For defaults values, see #defines in
+
+`ip4_full_reass.c <__REPOSITORY_URL__/src/vnet/ip/reass/ip4_full_reass.c>`_
+
+========================================= ==========================================
+#define description
+----------------------------------------- ------------------------------------------
+IP4_REASS_TIMEOUT_DEFAULT_MS timeout in milliseconds
+IP4_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS interval between reaping expired sessions
+IP4_REASS_MAX_REASSEMBLIES_DEFAULT maximum number of concurrent reassemblies
+IP4_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT maximum number of fragments per reassembly
+========================================= ==========================================
+
+and
+
+`ip6_full_reass.c <__REPOSITORY_URL__/src/vnet/ip/reass/ip6_full_reass.c>`_
+
+========================================= ==========================================
+#define description
+----------------------------------------- ------------------------------------------
+IP6_REASS_TIMEOUT_DEFAULT_MS timeout in milliseconds
+IP6_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS interval between reaping expired sessions
+IP6_REASS_MAX_REASSEMBLIES_DEFAULT maximum number of concurrent reassemblies
+IP6_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT maximum number of fragments per reassembly
+========================================= ==========================================
+
+Finished/expired contexts
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Reassembly contexts are freed either when reassembly is finished - when
+all data has been received or in case of timeout. There is a process
+walking all reassemblies, freeing any expired ones.
+
+Shallow (virtual) reassembly
+----------------------------
+
+Configuration
+^^^^^^^^^^^^^
+
+Configuration is via API (``ip_reassembly_enable_disable``) only as
+there is no value in turning SVR on by hand without a feature consuming
+buffer metadata. SVR is designed to be turned on by a feature requiring
+it in a programmatic way.
+
+A show command is provided to see reassembly contexts:
+
+For ip4:
+
+``show ip4-sv-reassembly [details]``
+
+For ip6:
+
+``show ip6-sv-reassembly [details]``
+
+Global shallow reassembly parameters can be modified using API
+``ip_reassembly_set`` and retrieved using ``ip_reassembly_get``.
+
+Defaults
+""""""""
+
+For defaults values, see #defines in
+
+`ip4_sv_reass.c <__REPOSITORY_URL__/src/vnet/ip/reass/ip4_sv_reass.c>`_
+
+============================================ ==========================================
+#define description
+-------------------------------------------- ------------------------------------------
+IP4_SV_REASS_TIMEOUT_DEFAULT_MS timeout in milliseconds
+IP4_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS interval between reaping expired sessions
+IP4_SV_REASS_MAX_REASSEMBLIES_DEFAULT maximum number of concurrent reassemblies
+IP4_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT maximum number of fragments per reassembly
+============================================ ==========================================
+
+and
+
+`ip6_sv_reass.c <__REPOSITORY_URL__/src/vnet/ip/reass/ip6_sv_reass.c>`_
+
+============================================ ==========================================
+#define description
+-------------------------------------------- ------------------------------------------
+IP6_SV_REASS_TIMEOUT_DEFAULT_MS timeout in milliseconds
+IP6_SV_REASS_EXPIRE_WALK_INTERVAL_DEFAULT_MS interval between reaping expired sessions
+IP6_SV_REASS_MAX_REASSEMBLIES_DEFAULT maximum number of concurrent reassemblies
+IP6_SV_REASS_MAX_REASSEMBLY_LENGTH_DEFAULT maximum number of fragments per reassembly
+============================================ ==========================================
+
+Expiring contexts
+^^^^^^^^^^^^^^^^^
+
+There is no way of knowing when a reassembly is finished without
+performing (an almost) full reassembly, so contexts in SVR cannot be
+freed in the same way as in full reassembly. Instead a different
+approach is taken. Least recently used (LRU) list is maintained where
+reassembly contexts are ordered based on last update. The oldest
+context is then freed whenever SVR hits limit on number of concurrent
+reassembly contexts. There is also a process reaping expired sessions
+similar as in full reassembly.
+
+Truncated packets
+^^^^^^^^^^^^^^^^^
+
+When SVR detects that a packet has been truncated in a way where L4
+headers are not available, it will mark it as such in vnet_buffer,
+allowing downstream features to handle such packets as they deem fit.
+
+Fast path/slow path
+^^^^^^^^^^^^^^^^^^^
+
+SVR runs is implemented fast path/slow path way. By default, it assumes
+that any passing traffic doesn't contain fragments, processing buffers
+in a dual-loop. If it sees a fragment, it then jumps to single-loop
+processing.
+
+Feature enabled by other features/reference counting
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+SVR feature is enabled by some other features, like NAT, when those
+features are enabled. For this to work, it implements a reference
+counted API for enabling/disabling SVR.
diff --git a/src/vnet/ip/vtep.h b/src/vnet/ip/vtep.h
index 92e8002e55a..97e74429e88 100644
--- a/src/vnet/ip/vtep.h
+++ b/src/vnet/ip/vtep.h
@@ -29,7 +29,6 @@
* processing and go directly to the tunnel protocol handler node.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED
(struct {
union {
@@ -40,7 +39,6 @@ typedef CLIB_PACKED
u64 as_u64;
};
}) vtep4_key_t;
-/* *INDENT-ON* */
/**
* @brief Tunnel endpoint key (IPv6)
@@ -51,13 +49,11 @@ typedef CLIB_PACKED
* processing and go directly to the tunnel protocol handler node.
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED
(struct {
ip6_address_t addr;
u32 fib_index;
}) vtep6_key_t;
-/* *INDENT-ON* */
typedef struct
{
@@ -111,13 +107,13 @@ vtep4_check (vtep_table_t * t, vlib_buffer_t * b0, ip4_header_t * ip40,
return VTEP_CHECK_PASS;
}
-#ifdef CLIB_HAVE_VEC512
typedef struct
{
vtep4_key_t vtep4_cache[8];
int idx;
} vtep4_cache_t;
+#ifdef CLIB_HAVE_VEC512
always_inline u8
vtep4_check_vector (vtep_table_t * t, vlib_buffer_t * b0, ip4_header_t * ip40,
vtep4_key_t * last_k4, vtep4_cache_t * vtep4_u512)
diff --git a/src/vnet/ip6-nd/FEATURE.yaml b/src/vnet/ip6-nd/FEATURE.yaml
index ce16fa24bd1..ccdbf10d2ef 100644
--- a/src/vnet/ip6-nd/FEATURE.yaml
+++ b/src/vnet/ip6-nd/FEATURE.yaml
@@ -1,12 +1,13 @@
---
-name: IPv6 Neighbor Discovery
+name: IPv6 Neighbor Discovery and Proxy
maintainer: Neale Ranns <nranns@cisco.com>
features:
- Neighbor discovery.
- ND Auto address configuration
- Multicast Listener Discovery - only as host role to send adverts
- Router Advertisements
+ - ND (mirror) proxy on given interface
-description: "An implementation of the IPv6 Neighbor discovery protocol as described in RFC4861 and RFC4862."
+description: "An implementation of the IPv6 Neighbor discovery protocol as described in RFC4861 and RFC4862. It also implements ND (mirror) proxy on given interface (some inspiration from RFC4389)."
state: production
properties: [API, CLI, MULTITHREAD]
diff --git a/src/vnet/ip6-nd/ip6_mld.c b/src/vnet/ip6-nd/ip6_mld.c
index ea70bcc5d19..74428ec93c3 100644
--- a/src/vnet/ip6-nd/ip6_mld.c
+++ b/src/vnet/ip6-nd/ip6_mld.c
@@ -33,7 +33,6 @@
* adjacency tables and neighbor discovery logic.
*/
-/* *INDENT-OFF*/
/* multicast listener report packet format for ethernet. */
typedef CLIB_PACKED (struct
{
@@ -51,7 +50,6 @@ typedef CLIB_PACKED (struct
ip6_header_t ip;
icmp6_multicast_listener_report_header_t report_hdr;
}) icmp6_multicast_listener_report_packet_t;
-/* *INDENT-ON*/
typedef struct
{
@@ -224,12 +222,10 @@ ip6_mld_delegate_disable (index_t imdi)
imd = pool_elt_at_index (ip6_mld_pool, imdi);
/* clean MLD pools */
- /* *INDENT-OFF* */
pool_flush (m, imd->mldp_group_pool,
({
mhash_unset (&imd->address_to_mldp_index, &m->mcast_address, 0);
}));
- /* *INDENT-ON* */
pool_free (imd->mldp_group_pool);
@@ -326,7 +322,6 @@ ip6_neighbor_send_mldpv2_report (u32 sw_if_index)
rh0->icmp.checksum = 0;
- /* *INDENT-OFF* */
pool_foreach (m, imd->mldp_group_pool)
{
rr.type = m->type;
@@ -345,7 +340,6 @@ ip6_neighbor_send_mldpv2_report (u32 sw_if_index)
payload_length += sizeof( icmp6_multicast_address_record_t);
}
- /* *INDENT-ON* */
rh0->rsvd = 0;
rh0->num_addr_records = clib_host_to_net_u16 (num_addr_records);
@@ -388,7 +382,6 @@ ip6_mld_timer_event (vlib_main_t * vm,
ip6_mld_t *imd;
/* Interface ip6 radv info list */
- /* *INDENT-OFF* */
pool_foreach (imd, ip6_mld_pool)
{
if (!vnet_sw_interface_is_admin_up (vnm, imd->sw_if_index))
@@ -405,7 +398,6 @@ ip6_mld_timer_event (vlib_main_t * vm,
imd->all_routers_mcast = 1;
}
}
- /* *INDENT-ON* */
return 0;
}
@@ -433,13 +425,11 @@ ip6_mld_event_process (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_mld_event_process_node) = {
.function = ip6_mld_event_process,
.name = "ip6-mld-process",
.type = VLIB_NODE_TYPE_PROCESS,
};
-/* *INDENT-ON* */
static u8 *
format_ip6_mld (u8 * s, va_list * args)
@@ -453,7 +443,6 @@ format_ip6_mld (u8 * s, va_list * args)
s = format (s, "%UJoined group address(es):\n", format_white_space, indent);
- /* *INDENT-OFF* */
pool_foreach (m, imd->mldp_group_pool)
{
s = format (s, "%U%U\n",
@@ -461,7 +450,6 @@ format_ip6_mld (u8 * s, va_list * args)
format_ip6_address,
&m->mcast_address);
}
- /* *INDENT-ON* */
return (s);
}
@@ -526,12 +514,10 @@ ip6_mld_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip6_mld_init) =
{
.runs_after = VLIB_INITS("icmp6_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip6-nd/ip6_nd.api b/src/vnet/ip6-nd/ip6_nd.api
index 91b5faf9bdf..3ddf25103c1 100644
--- a/src/vnet/ip6-nd/ip6_nd.api
+++ b/src/vnet/ip6-nd/ip6_nd.api
@@ -20,7 +20,7 @@
called through a shared memory interface.
*/
-option version = "1.0.0";
+option version = "1.1.0";
import "vnet/ip/ip_types.api";
import "vnet/interface_types.api";
@@ -106,6 +106,148 @@ autoreply define sw_interface_ip6nd_ra_prefix
u32 pref_lifetime;
};
+/** \brief IPv6 Router Advertisements prefix entry
+ @param prefix - prefix to advertise
+ @param onlink_flag - if true, the prefix can be used for on-link
+ determination
+ @param autonomous_flag - if true, the prefix can be used for stateless
+ address configuration
+ @param val_lifetime - valid lifetime in seconds (0xffffffff represents
+ infinity)
+ @param pref_lifetime - preferred lifetime in seconds (0xffffffff represents
+ infinity)
+ @param valid_lifetime_expires - number of seconds in which valid lifetime
+ expires (zero means never, negative value
+ means expired this number of seconds ago)
+ @param pref_lifetime_expires - number of seconds in which preferred
+ lifetime expires (zero means never, negative
+ value means expired this number of seconds
+ ago)
+ @param decrement_lifetime_flag - if true, decrement valid lifetime and
+ preferred lifetime
+ @param no_advertise - if true, the prefix will not be advertised
+*/
+typedef ip6nd_ra_prefix
+{
+ vl_api_prefix_t prefix;
+ bool onlink_flag;
+ bool autonomous_flag;
+ u32 val_lifetime;
+ u32 pref_lifetime;
+ f64 valid_lifetime_expires;
+ f64 pref_lifetime_expires;
+ bool decrement_lifetime_flag;
+ bool no_advertise;
+};
+
+/** \brief Dump IPv6 Router Advertisements details on a per-interface basis
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - interface index to use as a filter (0xffffffff
+ represents all interfaces)
+*/
+define sw_interface_ip6nd_ra_dump
+{
+ option in_progress;
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ option vat_help = "[(<if-name>|sw_if_index <if-idx>)]";
+};
+
+/** \brief Details on IPv6 Router Advertisements for a single interface
+ @param context - returned sender context, to match reply w/ request
+ @param sw_if_index - interface index the details are belong to
+ @param cur_hop_limit - current hop limit
+ @param adv_managed_flag - if true, enable DHCP for address
+ @param adv_other_flag - if true, Enable DHCP for other information
+ @param adv_router_lifetime - lifetime associated with the default router in
+ seconds (zero indicates that the router is not
+ a default router)
+ @param adv_neighbor_reachable_time - number of milliseconds within which a
+ neighbor is assumed to be reachable
+ (zero means unspecified)
+ @param adv_retransmit_interval - number of milliseconds between
+ retransmitted Neighbor Solicitation
+ messages (zero means unspecified)
+ @param adv_link_mtu - MTU that all the nodes on a link use
+ @param send_radv - if true, send periodic Router Advertisements
+ @param cease_radv - if true, cease to send periodic Router Advertisements
+ @param send_unicast - if true, destination address of a Router
+ Advertisement message will use the source address of
+ the Router Solicitation message (when available).
+ Otherwise, multicast address will be used
+ @param adv_link_layer_address - if true, add link layer address option
+ @param max_radv_interval - maximum time in seconds allowed between sending
+ unsolicited multicast Router Advertisements
+ @param min_radv_interval - minimum time in seconds allowed between sending
+ unsolicited multicast Router Advertisements
+ @param last_radv_time - number of seconds since the last time a solicited
+ Router Advertisement message was sent (zero means
+ never)
+ @param last_multicast_time - number of seconds since the last time a
+ multicast Router Advertisements message was
+ sent (zero means never)
+ @param next_multicast_time - number of seconds within which next time a
+ multicast Router Advertisement message will be
+ sent (zero means never)
+ @param initial_adverts_count - number of initial Router Advertisement
+ messages to send
+ @param initial_adverts_interval - number of seconds between initial Router
+ Advertisement messages
+ @param initial_adverts_sent - if true, all initial Router Advertisement
+ messages were sent
+ @param n_advertisements_sent - number of Router Advertisements sent
+ @param n_solicitations_rcvd - number of Router Solicitations received
+ @param n_solicitations_dropped - number of Router Solicitations dropped
+ @param n_prefixes - number of prefix entries
+ @param prefixes - array of prefix entries
+*/
+define sw_interface_ip6nd_ra_details
+{
+ option in_progress;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u8 cur_hop_limit;
+ bool adv_managed_flag;
+ bool adv_other_flag;
+ u16 adv_router_lifetime;
+ u32 adv_neighbor_reachable_time;
+ u32 adv_retransmit_interval;
+ u32 adv_link_mtu;
+ bool send_radv;
+ bool cease_radv;
+ bool send_unicast;
+ bool adv_link_layer_address;
+ f64 max_radv_interval;
+ f64 min_radv_interval;
+ f64 last_radv_time;
+ f64 last_multicast_time;
+ f64 next_multicast_time;
+ u32 initial_adverts_count;
+ f64 initial_adverts_interval;
+ bool initial_adverts_sent;
+ u32 n_advertisements_sent;
+ u32 n_solicitations_rcvd;
+ u32 n_solicitations_dropped;
+ u32 n_prefixes;
+ vl_api_ip6nd_ra_prefix_t prefixes[n_prefixes];
+};
+
+/** \brief IPv6 ND (mirror) proxy
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - The interface the host is on
+ @param is_enable - enable or disable
+*/
+autoreply define ip6nd_proxy_enable_disable
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ bool is_enable;
+};
+
/** \brief IPv6 ND proxy config
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/vnet/ip6-nd/ip6_nd.c b/src/vnet/ip6-nd/ip6_nd.c
index 772c811ae20..763aca290e6 100644
--- a/src/vnet/ip6-nd/ip6_nd.c
+++ b/src/vnet/ip6-nd/ip6_nd.c
@@ -90,6 +90,7 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm,
icmp6_neighbor_discovery_ethernet_link_layer_address_option_t *o0;
u32 bi0, options_len0, sw_if_index0, next0, error0;
u32 ip6_sadd_link_local, ip6_sadd_unspecified;
+ ip_neighbor_counter_type_t c_type;
int is_rewrite0;
u32 ni0;
@@ -148,7 +149,6 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm,
if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 &&
!ip6_sadd_unspecified))
{
- /* *INDENT-OFF* */
ip_neighbor_learn_t learn = {
.sw_if_index = sw_if_index0,
.ip = {
@@ -158,7 +158,6 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm,
h0->target_address),
}
};
- /* *INDENT-ON* */
memcpy (&learn.mac, o0->ethernet_address, sizeof (learn.mac));
ip_neighbor_learn_dp (&learn);
}
@@ -230,16 +229,24 @@ icmp6_neighbor_solicitation_or_advertisement (vlib_main_t * vm,
}
if (is_solicitation)
- next0 = (error0 != ICMP6_ERROR_NONE
- ? ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP
- : ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY);
+ {
+ next0 = (error0 != ICMP6_ERROR_NONE ?
+ ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP :
+ ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY);
+ c_type = IP_NEIGHBOR_CTR_REQUEST;
+ }
else
{
next0 = 0;
error0 = error0 == ICMP6_ERROR_NONE ?
ICMP6_ERROR_NEIGHBOR_ADVERTISEMENTS_RX : error0;
+ c_type = IP_NEIGHBOR_CTR_REPLY;
}
+ vlib_increment_simple_counter (
+ &ip_neighbor_counters[AF_IP6].ipnc[VLIB_RX][c_type],
+ vm->thread_index, sw_if_index0, 1);
+
if (is_solicitation && error0 == ICMP6_ERROR_NONE)
{
icmp6_send_neighbor_advertisement (vm, p0, ip0, h0, o0,
@@ -334,7 +341,6 @@ icmp6_neighbor_advertisement (vlib_main_t * vm,
0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_neighbor_solicitation_node,static) =
{
.function = icmp6_neighbor_solicitation,
@@ -365,7 +371,6 @@ VLIB_REGISTER_NODE (ip6_icmp_neighbor_advertisement_node,static) =
[0] = "ip6-punt",
},
};
-/* *INDENT-ON* */
static u8 *
format_ip6_nd (u8 * s, va_list * args)
@@ -418,12 +423,10 @@ ip6_nd_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip6_nd_init) =
{
.runs_after = VLIB_INITS("icmp6_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip6-nd/ip6_nd.h b/src/vnet/ip6-nd/ip6_nd.h
index 4dab7440b4a..2aade3fb512 100644
--- a/src/vnet/ip6-nd/ip6_nd.h
+++ b/src/vnet/ip6-nd/ip6_nd.h
@@ -23,6 +23,7 @@
extern int ip6_nd_proxy_add (u32 sw_if_index, const ip6_address_t * addr);
extern int ip6_nd_proxy_del (u32 sw_if_index, const ip6_address_t * addr);
+extern int ip6_nd_proxy_enable_disable (u32 sw_if_index, u8 enable);
#endif /* included_ip6_neighbor_h */
diff --git a/src/vnet/ip6-nd/ip6_nd_api.c b/src/vnet/ip6-nd/ip6_nd_api.c
index 9e4024670e6..5555d8fea64 100644
--- a/src/vnet/ip6-nd/ip6_nd_api.c
+++ b/src/vnet/ip6-nd/ip6_nd_api.c
@@ -83,7 +83,7 @@ static void
vl_api_ip6nd_proxy_dump_t_handler (vl_api_ip6nd_proxy_dump_t * mp)
{
ip6_main_t *im6 = &ip6_main;
- fib_table_t *fib_table;
+ u32 fib_index;
api_ip6nd_proxy_fib_table_walk_ctx_t ctx = {
.indices = NULL,
};
@@ -95,15 +95,11 @@ vl_api_ip6nd_proxy_dump_t_handler (vl_api_ip6nd_proxy_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
- pool_foreach (fib_table, im6->fibs)
- {
- fib_table_walk(fib_table->ft_index,
- FIB_PROTOCOL_IP6,
- api_ip6nd_proxy_fib_table_walk,
- &ctx);
- }
- /* *INDENT-ON* */
+ pool_foreach_index (fib_index, im6->fibs)
+ {
+ fib_table_walk (fib_index, FIB_PROTOCOL_IP6,
+ api_ip6nd_proxy_fib_table_walk, &ctx);
+ }
vec_sort_with_function (ctx.indices, fib_entry_cmp_for_sort);
@@ -121,6 +117,24 @@ vl_api_ip6nd_proxy_dump_t_handler (vl_api_ip6nd_proxy_dump_t * mp)
}
static void
+vl_api_ip6nd_proxy_enable_disable_t_handler (
+ vl_api_ip6nd_proxy_enable_disable_t *mp)
+{
+ vl_api_ip6nd_proxy_enable_disable_reply_t *rmp;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ if (mp->is_enable)
+ rv = ip6_nd_proxy_enable_disable (ntohl (mp->sw_if_index), 1);
+ else
+ rv = ip6_nd_proxy_enable_disable (ntohl (mp->sw_if_index), 0);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_IP6ND_PROXY_ENABLE_DISABLE_REPLY);
+}
+
+static void
vl_api_ip6nd_proxy_add_del_t_handler (vl_api_ip6nd_proxy_add_del_t * mp)
{
vl_api_ip6nd_proxy_add_del_reply_t *rmp;
@@ -206,6 +220,175 @@ static void
}
static void
+ip6_radv_prefix_encode (f64 now, const ip6_radv_prefix_t *in,
+ vl_api_ip6nd_ra_prefix_t *out)
+{
+ fib_prefix_t in_ip6_pfx = {
+ .fp_addr = {
+ .ip6 = in->prefix,
+ },
+ .fp_len = in->prefix_len,
+ .fp_proto = FIB_PROTOCOL_IP6,
+ };
+
+ ip_prefix_encode (&in_ip6_pfx, &out->prefix);
+
+ out->onlink_flag = in->adv_on_link_flag;
+ out->autonomous_flag = in->adv_autonomous_flag;
+ out->val_lifetime = htonl (in->adv_valid_lifetime_in_secs);
+ out->pref_lifetime = htonl (in->adv_pref_lifetime_in_secs);
+
+ if (in->adv_valid_lifetime_in_secs != ~0)
+ {
+ out->valid_lifetime_expires =
+ clib_host_to_net_f64 (in->valid_lifetime_expires - now);
+ }
+
+ if (in->adv_pref_lifetime_in_secs != ~0)
+ {
+ out->pref_lifetime_expires =
+ clib_host_to_net_f64 (in->pref_lifetime_expires - now);
+ }
+
+ out->decrement_lifetime_flag = in->decrement_lifetime_flag;
+ out->no_advertise = (in->enabled == 0);
+}
+
+static void
+send_sw_interface_ip6nd_ra_details (vl_api_registration_t *reg, u32 context,
+ ip6_ra_t *radv_info)
+{
+ vl_api_sw_interface_ip6nd_ra_details_t *rmp = 0;
+ vl_api_ip6nd_ra_prefix_t *api_radv_pfx;
+ u32 n_prefixes = pool_elts (radv_info->adv_prefixes_pool);
+ ip6_radv_prefix_t *radv_pfx;
+ u32 msg_size = sizeof (*rmp) + n_prefixes * sizeof (*api_radv_pfx);
+ vlib_main_t *vm = vlib_get_main ();
+ f64 now = vlib_time_now (vm);
+
+ rmp = vl_msg_api_alloc (msg_size);
+ if (!rmp)
+ return;
+ clib_memset (rmp, 0, msg_size);
+ rmp->_vl_msg_id =
+ ntohs (VL_API_SW_INTERFACE_IP6ND_RA_DETAILS + REPLY_MSG_ID_BASE);
+ rmp->context = context;
+
+ rmp->sw_if_index = htonl (radv_info->sw_if_index);
+ rmp->cur_hop_limit = radv_info->curr_hop_limit;
+ rmp->adv_managed_flag = radv_info->adv_managed_flag;
+ rmp->adv_other_flag = radv_info->adv_other_flag;
+ rmp->adv_router_lifetime = htons (radv_info->adv_router_lifetime_in_sec);
+ rmp->adv_neighbor_reachable_time =
+ htonl (radv_info->adv_neighbor_reachable_time_in_msec);
+ rmp->adv_retransmit_interval = htonl (
+ radv_info->adv_time_in_msec_between_retransmitted_neighbor_solicitations);
+ rmp->adv_link_mtu = htonl (radv_info->adv_link_mtu);
+ rmp->send_radv = radv_info->send_radv;
+ rmp->cease_radv = radv_info->cease_radv;
+ rmp->send_unicast = radv_info->send_unicast;
+ rmp->adv_link_layer_address = radv_info->adv_link_layer_address;
+ rmp->max_radv_interval = clib_host_to_net_f64 (radv_info->max_radv_interval);
+ rmp->min_radv_interval = clib_host_to_net_f64 (radv_info->min_radv_interval);
+
+ if (radv_info->last_radv_time > 0.0)
+ {
+ rmp->last_radv_time =
+ clib_host_to_net_f64 (now - radv_info->last_radv_time);
+ }
+
+ if ((radv_info->next_multicast_time - radv_info->last_multicast_time) > 0.0)
+ {
+ rmp->last_multicast_time =
+ clib_host_to_net_f64 (now - radv_info->last_multicast_time);
+ rmp->next_multicast_time =
+ clib_host_to_net_f64 (radv_info->next_multicast_time - now);
+ }
+
+ rmp->initial_adverts_count = htonl (radv_info->initial_adverts_count);
+ rmp->initial_adverts_interval =
+ clib_host_to_net_f64 (radv_info->initial_adverts_interval);
+ rmp->initial_adverts_sent = (radv_info->initial_adverts_sent == 0);
+ rmp->n_advertisements_sent = htonl (radv_info->n_advertisements_sent);
+ rmp->n_solicitations_rcvd = htonl (radv_info->n_solicitations_rcvd);
+ rmp->n_solicitations_dropped = htonl (radv_info->n_solicitations_dropped);
+ rmp->n_prefixes = htonl (n_prefixes);
+
+ api_radv_pfx = rmp->prefixes;
+ pool_foreach (radv_pfx, radv_info->adv_prefixes_pool)
+ {
+ ip6_radv_prefix_encode (now, radv_pfx, api_radv_pfx);
+
+ api_radv_pfx++;
+ }
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+typedef struct
+{
+ u32 *sw_if_indices;
+} api_dump_ip6_ra_itf_walk_ctx_t;
+
+static walk_rc_t
+api_dump_ip6_ra_itf_walk_fn (u32 sw_if_index, void *arg)
+{
+ api_dump_ip6_ra_itf_walk_ctx_t *ctx = arg;
+
+ vec_add1 (ctx->sw_if_indices, sw_if_index);
+
+ return (WALK_CONTINUE);
+}
+
+static void
+vl_api_sw_interface_ip6nd_ra_dump_t_handler (
+ vl_api_sw_interface_ip6nd_ra_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ u32 sw_if_index;
+ ip6_ra_t *radv_info;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (sw_if_index == INDEX_INVALID)
+ {
+ /* dump all interfaces */
+
+ api_dump_ip6_ra_itf_walk_ctx_t ctx = {
+ .sw_if_indices = NULL,
+ };
+ u32 *sw_if_i;
+
+ ip6_ra_itf_walk (api_dump_ip6_ra_itf_walk_fn, &ctx);
+
+ vec_foreach (sw_if_i, ctx.sw_if_indices)
+ {
+ radv_info = ip6_ra_get_itf (*sw_if_i);
+ if (radv_info != NULL)
+ {
+ send_sw_interface_ip6nd_ra_details (reg, mp->context, radv_info);
+ }
+ }
+
+ vec_free (ctx.sw_if_indices);
+ }
+ else
+ {
+ /* dump a single interface */
+
+ radv_info = ip6_ra_get_itf (sw_if_index);
+ if (radv_info != NULL)
+ {
+ send_sw_interface_ip6nd_ra_details (reg, mp->context, radv_info);
+ }
+ }
+}
+
+static void
vl_api_ip6nd_send_router_solicitation_t_handler
(vl_api_ip6nd_send_router_solicitation_t * mp)
{
@@ -234,7 +417,6 @@ static void
static void
ip6_ra_handle_report (const ip6_ra_report_t * rap)
{
- /* *INDENT-OFF* */
vpe_client_registration_t *rp;
pool_foreach (rp, vpe_api_main.ip6_ra_events_registrations)
@@ -288,7 +470,6 @@ ip6_ra_handle_report (const ip6_ra_report_t * rap)
vl_api_send_msg (vl_reg, (u8 *) event);
}
}
- /* *INDENT-ON* */
}
static void
diff --git a/src/vnet/ip6-nd/ip6_nd_inline.h b/src/vnet/ip6-nd/ip6_nd_inline.h
index ad0c3a3a79b..c959c94ed1d 100644
--- a/src/vnet/ip6-nd/ip6_nd_inline.h
+++ b/src/vnet/ip6-nd/ip6_nd_inline.h
@@ -22,6 +22,8 @@
#include <vnet/ethernet/ethernet.h>
#include <vnet/ip/icmp46_packet.h>
#include <vnet/ip/ip6.h>
+#include <vnet/ip-neighbor/ip_neighbor_types.h>
+#include <vnet/ip6-nd/ip6_ra.h>
typedef enum
{
@@ -70,6 +72,13 @@ icmp6_send_neighbor_advertisement (
clib_host_to_net_u32 (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_SOLICITED |
ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_OVERRIDE);
+ /* if sending RAs is enabled, the "router" flag should be set,
+ * otherwise, neighbors may believe we have changed from a router
+ * to a host - RFC 4861 section 4.4 */
+ if (ip6_ra_adv_enabled (sw_if_index0))
+ icmp6_nsa->advertisement_flags |=
+ clib_host_to_net_u32 (ICMP6_NEIGHBOR_ADVERTISEMENT_FLAG_ROUTER);
+
icmp6_nsa->icmp.checksum = 0;
icmp6_nsa->icmp.checksum =
ip6_tcp_udp_icmp_compute_checksum (vm, b, ip6_h, &bogus_length);
@@ -88,6 +97,10 @@ icmp6_send_neighbor_advertisement (
vnet_buffer (b)->sw_if_index[VLIB_TX] = sw_if_index0;
vnet_buffer (b)->sw_if_index[VLIB_RX] =
vnet_main.local_interface_sw_if_index;
+
+ vlib_increment_simple_counter (
+ &ip_neighbor_counters[AF_IP6].ipnc[VLIB_TX][IP_NEIGHBOR_CTR_REPLY],
+ vm->thread_index, sw_if_index0, 1);
}
#endif /* included_ip6_nd_inline_h */
diff --git a/src/vnet/ip6-nd/ip6_nd_mirror_proxy.c b/src/vnet/ip6-nd/ip6_nd_mirror_proxy.c
new file mode 100644
index 00000000000..478bb05e1d3
--- /dev/null
+++ b/src/vnet/ip6-nd/ip6_nd_mirror_proxy.c
@@ -0,0 +1,414 @@
+/*
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/feature/feature.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip-neighbor/ip6_neighbor.h>
+#include <vnet/ip-neighbor/ip_neighbor.h>
+#include <vnet/ip-neighbor/ip_neighbor_dp.h>
+#include <vnet/ip6-nd/ip6_nd_inline.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/ip/ip6_ll_table.h>
+
+#include <vppinfra/error.h>
+
+int
+ip6_nd_proxy_enable_disable (u32 sw_if_index, u8 enable)
+{
+
+ if (enable)
+ {
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-unicast-nd-proxy",
+ sw_if_index, 1, NULL, 0);
+ vnet_feature_enable_disable ("ip6-multicast", "ip6-multicast-nd-proxy",
+ sw_if_index, 1, NULL, 0);
+ }
+ else
+ {
+ vnet_feature_enable_disable ("ip6-unicast", "ip6-unicast-nd-proxy",
+ sw_if_index, 0, NULL, 0);
+ vnet_feature_enable_disable ("ip6-multicast", "ip6-multicast-nd-proxy",
+ sw_if_index, 0, NULL, 0);
+ }
+ return 0;
+}
+
+static clib_error_t *
+set_int_ip6_nd_proxy_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 sw_if_index;
+ int enable = 0;
+
+ sw_if_index = ~0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else if (unformat (input, "enable"))
+ enable = 1;
+ else if (unformat (input, "disable"))
+ enable = 0;
+ else
+ break;
+ }
+
+ if (~0 == sw_if_index)
+ return clib_error_return (0, "unknown input '%U'", format_unformat_error,
+ input);
+
+ ip6_nd_proxy_enable_disable (sw_if_index, enable);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (set_int_ip6_nd_proxy_enable_command, static) = {
+ .path = "set interface ip6-nd proxy",
+ .short_help = "set interface ip6-nd proxy <intfc> [enable|disable]",
+ .function = set_int_ip6_nd_proxy_command_fn,
+};
+
+typedef struct
+{
+ u8 is_multicast;
+ u32 sw_if_index;
+} vnet_ip6_nd_proxy_trace_t;
+
+static u8 *
+format_ip6_nd_proxy_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+ vnet_ip6_nd_proxy_trace_t *t = va_arg (*args, vnet_ip6_nd_proxy_trace_t *);
+ u32 indent = format_get_indent (s);
+
+ if (t->is_multicast)
+ s = format (s, "%U %U multicast ", format_white_space, indent,
+ format_vnet_sw_if_index_name, vnm, t->sw_if_index);
+ else
+ s = format (s, "%U %U unicast ", format_white_space, indent,
+ format_vnet_sw_if_index_name, vnm, t->sw_if_index);
+
+ return s;
+}
+
+static_always_inline void
+ip6_nd_proxy_unicast (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t *b0, ip6_header_t *ip6, u32 *next0)
+{
+ if (PREDICT_FALSE (ip6->protocol == IP_PROTOCOL_ICMP6))
+ {
+ icmp46_header_t *icmp0;
+ icmp6_type_t type0;
+
+ icmp0 = ip6_next_header (ip6);
+ type0 = icmp0->type;
+ if (type0 == ICMP6_neighbor_solicitation ||
+ type0 == ICMP6_neighbor_advertisement)
+ {
+ icmp6_neighbor_solicitation_or_advertisement_header_t *icmp6_nsa;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
+ *icmp6_nd_ell_addr;
+ u32 sw_if_index0;
+
+ icmp6_nsa = (void *) icmp0;
+ icmp6_nd_ell_addr = (void *) (icmp6_nsa + 1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+
+ /* unicast neighbor solicitation */
+ fib_node_index_t fei;
+ u32 fib_index;
+
+ fib_index = ip6_fib_table_get_index_for_sw_if_index (sw_if_index0);
+
+ if (~0 == fib_index)
+ {
+ *next0 = ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP;
+ }
+ else
+ {
+ if (ip6_address_is_link_local_unicast (&ip6->dst_address))
+ {
+ fei = ip6_fib_table_lookup_exact_match (
+ ip6_ll_fib_get (sw_if_index0), &ip6->dst_address, 128);
+ }
+ else
+ {
+ fei = ip6_fib_table_lookup_exact_match (
+ fib_index, &ip6->dst_address, 128);
+ }
+
+ if (FIB_NODE_INDEX_INVALID != fei)
+ {
+ *next0 = ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY;
+ icmp6_send_neighbor_advertisement (
+ vm, b0, ip6, icmp6_nsa, icmp6_nd_ell_addr, sw_if_index0);
+ }
+ }
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ vnet_ip6_nd_proxy_trace_t *t;
+ t = vlib_add_trace (vm, node, b0, sizeof (t[0]));
+ t->sw_if_index = sw_if_index0;
+ t->is_multicast = 0;
+ }
+ }
+ }
+}
+
+static_always_inline void
+ip6_nd_proxy_multicast (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t *b0, ip6_header_t *ip6, u32 *next0)
+{
+ if (PREDICT_FALSE (ip6->protocol == IP_PROTOCOL_ICMP6))
+ {
+ icmp46_header_t *icmp0;
+ icmp6_type_t type0;
+
+ icmp0 = ip6_next_header (ip6);
+ type0 = icmp0->type;
+ if (type0 == ICMP6_neighbor_solicitation ||
+ type0 == ICMP6_neighbor_advertisement)
+ {
+ icmp6_neighbor_solicitation_or_advertisement_header_t *icmp6_nsa;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
+ *icmp6_nd_ell_addr;
+ u32 sw_if_index0;
+
+ icmp6_nsa = (void *) icmp0;
+ icmp6_nd_ell_addr = (void *) (icmp6_nsa + 1);
+
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
+ if (type0 == ICMP6_neighbor_solicitation)
+ {
+ if (
+ (icmp6_nd_ell_addr->header.type ==
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_source_link_layer_address) &&
+ (!ip6_address_is_unspecified (&ip6->src_address)) &&
+ (!ip6_address_is_link_local_unicast (&ip6->src_address)))
+ {
+ ip_neighbor_learn_t learn = { .sw_if_index = sw_if_index0,
+ .ip = {
+ .version = AF_IP6,
+ .ip.ip6 = ip6->src_address,
+ } };
+ clib_memcpy (&learn.mac, icmp6_nd_ell_addr->ethernet_address,
+ sizeof (learn.mac));
+ ip_neighbor_learn_dp (&learn);
+
+ *next0 = ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY;
+ icmp6_send_neighbor_advertisement (
+ vm, b0, ip6, icmp6_nsa, icmp6_nd_ell_addr, sw_if_index0);
+ }
+ }
+ else // type0 = ICMP6_neighbor_advertisement
+ {
+ icmp6_neighbor_solicitation_or_advertisement_header_t
+ *icmp6_nsa = (void *) icmp0;
+ icmp6_neighbor_discovery_ethernet_link_layer_address_option_t
+ *icmp6_nd_ell_addr = (void *) (icmp6_nsa + 1);
+ if (
+ (icmp6_nd_ell_addr->header.type ==
+ ICMP6_NEIGHBOR_DISCOVERY_OPTION_target_link_layer_address) &&
+ (!ip6_address_is_unspecified (&ip6->src_address)) &&
+ (!ip6_address_is_link_local_unicast (&ip6->src_address)))
+ {
+ ip_neighbor_learn_t learn = { .sw_if_index = sw_if_index0,
+ .ip = {
+ .version = AF_IP6,
+ .ip.ip6 =
+ icmp6_nsa->target_address,
+ } };
+ clib_memcpy (&learn.mac, icmp6_nd_ell_addr->ethernet_address,
+ sizeof (learn.mac));
+ ip_neighbor_learn_dp (&learn);
+
+ *next0 = ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP;
+ }
+ }
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ vnet_ip6_nd_proxy_trace_t *t;
+ t = vlib_add_trace (vm, node, b0, sizeof (t[0]));
+ t->sw_if_index = sw_if_index0;
+ t->is_multicast = 1;
+ }
+ }
+ }
+}
+
+static_always_inline uword
+ip6_nd_proxy_node_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, u8 is_multicast)
+{
+ u32 n_left_from, *from, *to_next;
+ u32 next_index, n_left_to_next;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+
+ while (n_left_from > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from > 4 && n_left_to_next > 2)
+ {
+ ip6_header_t *ip6_0, *ip6_1;
+ u32 next0, next1;
+ u32 bi0, bi1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ vlib_prefetch_buffer_header (b[3], LOAD);
+
+ vlib_prefetch_buffer_data (b[2], LOAD);
+ vlib_prefetch_buffer_data (b[3], LOAD);
+ }
+
+ /*
+ * speculatively enqueue b0 and b1 to the current next frame
+ */
+ to_next[0] = bi0 = from[0];
+ to_next[1] = bi1 = from[1];
+ to_next += 2;
+ n_left_to_next -= 2;
+
+ vnet_feature_next (&next0, b[0]);
+ vnet_feature_next (&next1, b[1]);
+
+ ip6_0 = vlib_buffer_get_current (b[0]);
+ ip6_1 = vlib_buffer_get_current (b[1]);
+
+ if (is_multicast)
+ {
+ ip6_nd_proxy_multicast (vm, node, b[0], ip6_0, &next0);
+ ip6_nd_proxy_multicast (vm, node, b[1], ip6_1, &next1);
+ }
+ else
+ {
+ ip6_nd_proxy_unicast (vm, node, b[0], ip6_0, &next0);
+ ip6_nd_proxy_unicast (vm, node, b[1], ip6_1, &next1);
+ }
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, bi1, next0,
+ next1);
+
+ b += 2;
+ from += 2;
+ n_left_from -= 2;
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ ip6_header_t *ip6_0;
+ u32 next0, bi0;
+
+ /* speculatively enqueue b0 to the current next frame */
+ to_next[0] = bi0 = from[0];
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vnet_feature_next (&next0, b[0]);
+ ip6_0 = vlib_buffer_get_current (b[0]);
+
+ if (is_multicast)
+ ip6_nd_proxy_multicast (vm, node, b[0], ip6_0, &next0);
+ else
+ ip6_nd_proxy_unicast (vm, node, b[0], ip6_0, &next0);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ b += 1;
+ from += 1;
+ n_left_from -= 1;
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (ip6_unicast_nd_proxy_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_nd_proxy_node_inline (vm, node, frame, 0 /* is_multicast */);
+}
+
+VLIB_REGISTER_NODE (ip6_unicast_nd_proxy_node) = {
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_nd_proxy_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = 0,
+ .n_next_nodes = ICMP6_NEIGHBOR_SOLICITATION_N_NEXT,
+ .next_nodes = {
+ [ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP] = "ip6-drop",
+ [ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY] = "interface-output",
+ },
+ .name = "ip6-unicast-nd-proxy",
+};
+
+VLIB_NODE_FN (ip6_multicast_nd_proxy_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return ip6_nd_proxy_node_inline (vm, node, frame, 1 /* is_multicast */);
+}
+
+VLIB_REGISTER_NODE (ip6_multicast_nd_proxy_node) = {
+ .vector_size = sizeof (u32),
+ .format_trace = format_ip6_nd_proxy_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = 0,
+ .n_next_nodes = ICMP6_NEIGHBOR_SOLICITATION_N_NEXT,
+ .next_nodes = {
+ [ICMP6_NEIGHBOR_SOLICITATION_NEXT_DROP] = "ip6-drop",
+ [ICMP6_NEIGHBOR_SOLICITATION_NEXT_REPLY] = "interface-output",
+ },
+ .name = "ip6-multicast-nd-proxy",
+};
+
+VNET_FEATURE_INIT (ip6_unicast_nd_proxy_node, static) = {
+ .arc_name = "ip6-unicast",
+ .node_name = "ip6-unicast-nd-proxy",
+ .runs_before = VNET_FEATURES ("ip6-lookup"),
+};
+
+VNET_FEATURE_INIT (ip6_multicast_nd_proxy_node, static) = {
+ .arc_name = "ip6-multicast",
+ .node_name = "ip6-multicast-nd-proxy",
+ .runs_before = VNET_FEATURES ("ip6-mfib-forward-lookup"),
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ip6-nd/ip6_nd_proxy.c b/src/vnet/ip6-nd/ip6_nd_proxy.c
index ea7ca568946..f7f07cb59f6 100644
--- a/src/vnet/ip6-nd/ip6_nd_proxy.c
+++ b/src/vnet/ip6-nd/ip6_nd_proxy.c
@@ -23,7 +23,6 @@
static int
ip6_nd_proxy_add_del (u32 sw_if_index, const ip6_address_t * addr, u8 is_del)
{
- /* *INDENT-OFF* */
u32 fib_index;
fib_prefix_t pfx = {
.fp_len = 128,
@@ -35,7 +34,6 @@ ip6_nd_proxy_add_del (u32 sw_if_index, const ip6_address_t * addr, u8 is_del)
ip46_address_t nh = {
.ip6 = *addr,
};
- /* *INDENT-ON* */
fib_index = ip6_fib_table_get_index_for_sw_if_index (sw_if_index);
@@ -107,20 +105,22 @@ set_ip6_nd_proxy_cmd (vlib_main_t * vm,
return (unformat_parse_error (input));
}
}
+ else
+ {
+ return error;
+ }
ip6_nd_proxy_add_del (sw_if_index, &addr, is_del);
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ip6_nd_proxy_command, static) =
{
.path = "set ip6 nd proxy",
.short_help = "set ip6 nd proxy <interface> [del] <host-ip>",
.function = set_ip6_nd_proxy_cmd,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip6-nd/ip6_nd_test.c b/src/vnet/ip6-nd/ip6_nd_test.c
index 5ca37029a76..488ca591ba0 100644
--- a/src/vnet/ip6-nd/ip6_nd_test.c
+++ b/src/vnet/ip6-nd/ip6_nd_test.c
@@ -25,7 +25,7 @@
/* define message IDs */
#include <ip6-nd/ip6_nd.api_enum.h>
#include <ip6-nd/ip6_nd.api_types.h>
-#include <vpp/api/vpe.api_types.h>
+#include <vlibmemory/vlib.api_types.h>
typedef struct
{
@@ -318,6 +318,69 @@ api_sw_interface_ip6nd_ra_config (vat_main_t * vam)
W (ret);
return ret;
}
+static int
+api_ip6nd_proxy_enable_disable (vat_main_t *vam)
+{
+ // not yet implemented
+ return -1;
+}
+
+static int
+api_sw_interface_ip6nd_ra_dump (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_ip6nd_ra_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u32 sw_if_index = ~0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_sw_if_index, vam, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %u", &sw_if_index))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_IP6ND_RA_DUMP, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ /* Send it */
+ S (mp);
+
+ /* Use control ping for synchronization */
+ PING (&ip6_nd_test_main, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+
+ return ret;
+}
+
+static void
+vl_api_sw_interface_ip6nd_ra_details_t_handler (
+ vl_api_sw_interface_ip6nd_ra_details_t *mp)
+{
+ vat_main_t *vam = ip6_nd_test_main.vat_main;
+ u32 sw_if_index;
+ u8 send_radv;
+
+ /* Read the message */
+ sw_if_index = ntohl (mp->sw_if_index);
+ send_radv = mp->send_radv;
+
+ /* Print it */
+ print (vam->ofp, "sw_if_index: %u, send_radv: %s", sw_if_index,
+ (send_radv ? "on" : "off"));
+}
#include <ip6-nd/ip6_nd.api_test.c>
diff --git a/src/vnet/ip6-nd/ip6_ra.c b/src/vnet/ip6-nd/ip6_ra.c
index 270e428afad..ffc02e813e2 100644
--- a/src/vnet/ip6-nd/ip6_ra.c
+++ b/src/vnet/ip6-nd/ip6_ra.c
@@ -30,7 +30,6 @@
* The files contains the API and CLI code for managing IPv6 RAs
*/
-/* *INDENT-OFF* */
/* Router solicitation packet format for ethernet. */
typedef CLIB_PACKED (struct
{
@@ -51,7 +50,6 @@ typedef CLIB_PACKED (struct
icmp6_neighbor_discovery_prefix_information_option_t
prefix[0];
}) icmp6_router_advertisement_packet_t;
-/* *INDENT-ON* */
#define DEF_MAX_RADV_INTERVAL 200
#define DEF_MIN_RADV_INTERVAL .75 * DEF_MAX_RADV_INTERVAL
@@ -65,95 +63,6 @@ typedef CLIB_PACKED (struct
#define MAX_DELAY_BETWEEN_RAS 1800 /* seconds */
#define MAX_RA_DELAY_TIME .5 /* seconds */
-/* advertised prefix option */
-typedef struct
-{
- /* basic advertised information */
- ip6_address_t prefix;
- u8 prefix_len;
- int adv_on_link_flag;
- int adv_autonomous_flag;
- u32 adv_valid_lifetime_in_secs;
- u32 adv_pref_lifetime_in_secs;
-
- /* advertised values are computed from these times if decrementing */
- f64 valid_lifetime_expires;
- f64 pref_lifetime_expires;
-
- /* local information */
- int enabled;
- int deprecated_prefix_flag;
- int decrement_lifetime_flag;
-
-#define MIN_ADV_VALID_LIFETIME 7203 /* seconds */
-#define DEF_ADV_VALID_LIFETIME 2592000
-#define DEF_ADV_PREF_LIFETIME 604800
-
- /* extensions are added here, mobile, DNS etc.. */
-} ip6_radv_prefix_t;
-
-typedef struct ip6_ra_t_
-{
- /* advertised config information, zero means unspecified */
- u8 curr_hop_limit;
- int adv_managed_flag;
- int adv_other_flag;
- u16 adv_router_lifetime_in_sec;
- u32 adv_neighbor_reachable_time_in_msec;
- u32 adv_time_in_msec_between_retransmitted_neighbor_solicitations;
-
- /* mtu option */
- u32 adv_link_mtu;
-
- /* local information */
- u32 sw_if_index;
- int send_radv; /* radv on/off on this interface - set by config */
- int cease_radv; /* we are ceasing to send - set byf config */
- int send_unicast;
- int adv_link_layer_address;
- int prefix_option;
- int failed_device_check;
- int ref_count;
-
- /* prefix option */
- ip6_radv_prefix_t *adv_prefixes_pool;
-
- /* Hash table mapping address to index in interface advertised prefix pool. */
- mhash_t address_to_prefix_index;
-
- f64 max_radv_interval;
- f64 min_radv_interval;
- f64 min_delay_between_radv;
- f64 max_delay_between_radv;
- f64 max_rtr_default_lifetime;
-
- f64 last_radv_time;
- f64 last_multicast_time;
- f64 next_multicast_time;
-
-
- u32 initial_adverts_count;
- f64 initial_adverts_interval;
- u32 initial_adverts_sent;
-
- /* stats */
- u32 n_advertisements_sent;
- u32 n_solicitations_rcvd;
- u32 n_solicitations_dropped;
-
- /* router solicitations sending state */
- u8 keep_sending_rs; /* when true then next fields are valid */
- icmp6_send_router_solicitation_params_t params;
- f64 sleep_interval;
- f64 due_time;
- u32 n_left;
- f64 start_time;
- vlib_buffer_t *buffer;
-
- u32 seed;
-
-} ip6_ra_t;
-
static ip6_link_delegate_id_t ip6_ra_delegate_id;
static ip6_ra_t *ip6_ra_pool;
@@ -191,7 +100,7 @@ ip6_ra_report_unregister (ip6_ra_report_notify_t fn)
}
}
-static inline ip6_ra_t *
+ip6_ra_t *
ip6_ra_get_itf (u32 sw_if_index)
{
index_t rai;
@@ -204,6 +113,28 @@ ip6_ra_get_itf (u32 sw_if_index)
return (NULL);
}
+u8
+ip6_ra_adv_enabled (u32 sw_if_index)
+{
+ ip6_ra_t *ra;
+
+ ra = ip6_ra_get_itf (sw_if_index);
+
+ return ((ra != NULL) && (ra->send_radv != 0));
+}
+
+void
+ip6_ra_itf_walk (ip6_ra_itf_walk_fn_t fn, void *ctx)
+{
+ ip6_ra_t *radv_info;
+
+ pool_foreach (radv_info, ip6_ra_pool)
+ {
+ if (WALK_STOP == fn (radv_info->sw_if_index, ctx))
+ break;
+ }
+}
+
/* for "syslogging" - use elog for now */
#define foreach_log_level \
_ (DEBUG, "DEBUG") \
@@ -270,6 +201,9 @@ typedef enum
ICMP6_ROUTER_SOLICITATION_N_NEXT,
} icmp6_router_solicitation_or_advertisement_next_t;
+/*
+ * Note: Both periodic RAs and solicited RS come through here.
+ */
static_always_inline uword
icmp6_router_solicitation (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
@@ -369,7 +303,6 @@ icmp6_router_solicitation (vlib_main_t * vm,
if (PREDICT_TRUE (error0 == ICMP6_ERROR_NONE && o0 != 0 &&
!is_unspecified && !is_link_local))
{
- /* *INDENT-OFF* */
ip_neighbor_learn_t learn = {
.sw_if_index = sw_if_index0,
.ip = {
@@ -377,7 +310,6 @@ icmp6_router_solicitation (vlib_main_t * vm,
.version = AF_IP6,
},
};
- /* *INDENT-ON* */
memcpy (&learn.mac, o0->ethernet_address, sizeof (learn.mac));
ip_neighbor_learn_dp (&learn);
}
@@ -410,10 +342,9 @@ icmp6_router_solicitation (vlib_main_t * vm,
radv_info = ip6_ra_get_itf (sw_if_index0);
- error0 = ((!radv_info) ?
- ICMP6_ERROR_ROUTER_SOLICITATION_RADV_NOT_CONFIG :
- error0);
-
+ error0 = ((!radv_info || 0 == radv_info->send_radv) ?
+ ICMP6_ERROR_ROUTER_SOLICITATION_RADV_NOT_CONFIG :
+ error0);
if (error0 == ICMP6_ERROR_NONE)
{
f64 now = vlib_time_now (vm);
@@ -525,7 +456,6 @@ icmp6_router_solicitation (vlib_main_t * vm,
/* add advertised prefix options */
ip6_radv_prefix_t *pr_info;
- /* *INDENT-OFF* */
pool_foreach (pr_info, radv_info->adv_prefixes_pool)
{
if(pr_info->enabled &&
@@ -591,7 +521,6 @@ icmp6_router_solicitation (vlib_main_t * vm,
}
}
- /* *INDENT-ON* */
/* add additional options before here */
@@ -635,6 +564,8 @@ icmp6_router_solicitation (vlib_main_t * vm,
/* Reuse current MAC header, copy SMAC to DMAC and
* interface MAC to SMAC */
vlib_buffer_reset (p0);
+ vlib_buffer_advance (
+ p0, vnet_buffer (p0)->l2_hdr_offset);
eth0 = vlib_buffer_get_current (p0);
clib_memcpy (eth0->dst_address, eth0->src_address,
6);
@@ -697,7 +628,6 @@ icmp6_router_solicitation (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) =
{
.function = icmp6_router_solicitation,
@@ -714,7 +644,6 @@ VLIB_REGISTER_NODE (ip6_icmp_router_solicitation_node,static) =
[ICMP6_ROUTER_SOLICITATION_NEXT_REPLY_TX] = "interface-output",
},
};
-/* *INDENT-ON* */
/* validate advertised info for consistancy (see RFC-4861 section 6.2.7) - log any inconsistencies, packet will always be dropped */
static_always_inline uword
@@ -1007,7 +936,6 @@ icmp6_router_advertisement (vlib_main_t * vm,
prefix->prefix.fp_proto = FIB_PROTOCOL_IP6;
/* look for matching prefix - if we our advertising it, it better be consistant */
- /* *INDENT-OFF* */
pool_foreach (pr_info, radv_info->adv_prefixes_pool)
{
@@ -1038,7 +966,6 @@ icmp6_router_advertisement (vlib_main_t * vm,
}
break;
}
- /* *INDENT-ON* */
break;
}
default:
@@ -1072,7 +999,6 @@ icmp6_router_advertisement (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_icmp_router_advertisement_node,static) =
{
.function = icmp6_router_advertisement,
@@ -1087,7 +1013,6 @@ VLIB_REGISTER_NODE (ip6_icmp_router_advertisement_node,static) =
[0] = "ip6-drop",
},
};
-/* *INDENT-ON* */
static inline f64
random_f64_from_to (f64 from, f64 to)
@@ -1277,14 +1202,12 @@ send_rs_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
do
{
due_time = current_time + 1e9;
- /* *INDENT-OFF* */
pool_foreach (radv_info, ip6_ra_pool)
{
if (check_send_rs (vm, radv_info, current_time, &dt)
&& (dt < due_time))
due_time = dt;
}
- /* *INDENT-ON* */
current_time = vlib_time_now (vm);
}
while (due_time < current_time);
@@ -1295,13 +1218,11 @@ send_rs_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_rs_process_node) = {
.function = send_rs_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "ip6-rs-process",
};
-/* *INDENT-ON* */
void
icmp6_send_router_solicitation (vlib_main_t * vm, u32 sw_if_index, u8 stop,
@@ -1389,9 +1310,6 @@ ip6_ra_link_enable (u32 sw_if_index)
radv_info->initial_adverts_sent = radv_info->initial_adverts_count - 1;
radv_info->initial_adverts_interval = MAX_INITIAL_RTR_ADVERT_INTERVAL;
- /* deafult is to send */
- radv_info->send_radv = 1;
-
/* fill in delegate for this interface that will be needed later */
radv_info->adv_link_mtu =
vnet_sw_interface_get_mtu (vnet_get_main (), sw_if_index, VNET_MTU_IP6);
@@ -1412,12 +1330,10 @@ ip6_ra_delegate_disable (index_t rai)
radv_info = pool_elt_at_index (ip6_ra_pool, rai);
/* clean up prefix and MDP pools */
- /* *INDENT-OFF* */
pool_flush(p, radv_info->adv_prefixes_pool,
({
mhash_unset (&radv_info->address_to_prefix_index, &p->prefix, 0);
}));
- /* *INDENT-ON* */
pool_free (radv_info->adv_prefixes_pool);
@@ -1439,12 +1355,10 @@ ip6_ra_update_secondary_radv_info (ip6_address_t * address, u8 prefix_len,
ip6_address_mask_from_width (&mask, prefix_len);
vec_reset_length (radv_indices);
- /* *INDENT-OFF* */
pool_foreach (radv_info, ip6_ra_pool)
{
vec_add1 (radv_indices, radv_info - ip6_ra_pool);
}
- /* *INDENT-ON* */
/*
* If we have another customer for this prefix,
@@ -1459,7 +1373,6 @@ ip6_ra_update_secondary_radv_info (ip6_address_t * address, u8 prefix_len,
if (radv_info->sw_if_index == primary_sw_if_index)
continue;
- /* *INDENT-OFF* */
pool_foreach (this_prefix, radv_info->adv_prefixes_pool)
{
if (this_prefix->prefix_len == prefix_len
@@ -1482,7 +1395,6 @@ ip6_ra_update_secondary_radv_info (ip6_address_t * address, u8 prefix_len,
clib_warning ("ip6_neighbor_ra_prefix returned %d", rv);
}
}
- /* *INDENT-ON*/
}
}
@@ -1503,7 +1415,6 @@ ip6_ra_process_timer_event (vlib_main_t * vm,
f64 now = vlib_time_now (vm);
/* Interface ip6 radv info list */
- /* *INDENT-OFF* */
pool_foreach (radv_info, ip6_ra_pool)
{
if( !vnet_sw_interface_is_admin_up (vnm, radv_info->sw_if_index))
@@ -1593,7 +1504,6 @@ ip6_ra_process_timer_event (vlib_main_t * vm,
}
}
}
- /* *INDENT-ON* */
if (f)
{
@@ -1650,14 +1560,12 @@ ip6_ra_event_process (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_ra_process_node) =
{
.function = ip6_ra_event_process,
.name = "ip6-ra-process",
.type = VLIB_NODE_TYPE_PROCESS,
};
-/* *INDENT-ON* */
static void
ip6_ra_signal_report (ip6_ra_report_t * r)
@@ -1699,6 +1607,9 @@ ip6_ra_config (vlib_main_t * vm, u32 sw_if_index,
if (!radv_info)
return (VNET_API_ERROR_IP6_NOT_ENABLED);
+ /* Start off believing that we're going to send radv's */
+ radv_info->send_radv = 1;
+
if ((max_interval != 0) && (min_interval == 0))
min_interval = .75 * max_interval;
@@ -2024,8 +1935,7 @@ ip6_ra_cmd (vlib_main_t * vm,
}
else
{
- error = unformat_parse_error (line_input);
- goto done;
+ break;
}
}
@@ -2117,14 +2027,12 @@ format_ip6_ra (u8 * s, va_list * args)
indent += 2;
- /* *INDENT-OFF* */
pool_foreach (p, radv_info->adv_prefixes_pool)
{
s = format (s, "%Uprefix %U, length %d\n",
format_white_space, indent+2,
format_ip6_address, &p->prefix, p->prefix_len);
}
- /* *INDENT-ON* */
s = format (s, "%UMTU is %d\n",
format_white_space, indent, radv_info->adv_link_mtu);
@@ -2170,7 +2078,6 @@ format_ip6_ra (u8 * s, va_list * args)
return (s);
}
-
/*?
* This command is used to configure the neighbor discovery
* parameters on a given interface. Use the '<em>show ip6 interface</em>'
@@ -2178,9 +2085,16 @@ format_ip6_ra (u8 * s, va_list * args)
* on a given interface. This command has three formats:
*
*
- * <b>Format 1 - Router Advertisement Options:</b> (Only one can be entered in a single command)
+ * <b>Format 1 - Router Advertisement Options:</b> (Only one can be entered in
+ * a single command)
*
- * '<em><b>ip6 nd <interface> [no] [ra-managed-config-flag] | [ra-other-config-flag] | [ra-suppress] | [ra-suppress-link-layer] | [ra-send-unicast] | [ra-lifetime <lifetime>] | [ra-initial <cnt> <interval>] | [ra-interval <max-interval> [<min-interval>]] | [ra-cease]</b></em>'
+ * @clistart
+ * ip6 nd <interface> [no] [ra-managed-config-flag] |
+ * [ra-other-config-flag] | [ra-suppress] | [ra-suppress-link-layer] |
+ * [ra-send-unicast] | [ra-lifetime <lifetime>] |
+ * [ra-initial <cnt> <interval>] |
+ * [ra-interval <max-interval> [<min-interval>]] | [ra-cease]
+ * @cliend
*
* Where:
*
@@ -2206,7 +2120,7 @@ format_ip6_ra (u8 * s, va_list * args)
* and the '<em>no</em>' option returns it to this default state.
*
* <em>[no] ra-send-unicast</em> - Use the source address of the
- * router-solicitation message if availiable. The default is to use
+ * router-solicitation message if available. The default is to use
* multicast address of all nodes, and the '<em>no</em>' option returns
* it to this default state.
*
@@ -2237,63 +2151,69 @@ format_ip6_ra (u8 * s, va_list * args)
*
* <b>Format 2 - Prefix Options:</b>
*
- * '<em><b>ip6 nd <interface> [no] prefix <ip6-address>/<width> [<valid-lifetime> <pref-lifetime> | infinite] [no-advertise] [off-link] [no-autoconfig] [no-onlink]</b></em>'
+ * @clistart
+ * ip6 nd <interface> [no] prefix <ip6-address>/<width>
+ * [<valid-lifetime> <pref-lifetime> | infinite] [no-advertise] [off-link]
+ * [no-autoconfig] [no-onlink]
+ * @cliend
*
* Where:
*
* <em>no</em> - All additional flags are ignored and the prefix is deleted.
*
- * <em><valid-lifetime> <pref-lifetime></em> - '<em><valid-lifetime></em>' is the
- * length of time in seconds during what the prefix is valid for the purpose of
- * on-link determination. Range is 7203 to 2592000 seconds and default is 2592000
- * seconds (30 days). '<em><pref-lifetime></em>' is the prefered-lifetime and is the
- * length of time in seconds during what addresses generated from the prefix remain
- * preferred. Range is 0 to 604800 seconds and default is 604800 seconds (7 days).
+ * <em><valid-lifetime> <pref-lifetime></em> - '<em><valid-lifetime></em>' is
+ * the length of time in seconds during what the prefix is valid for the
+ * purpose of on-link determination. Range is 7203 to 2592000 seconds and
+ * default is 2592000 seconds (30 days). '<em><pref-lifetime></em>' is the
+ * preferred-lifetime and is the length of time in seconds during what
+ * addresses generated from the prefix remain preferred. Range is 0 to 604800
+ * seconds and default is 604800 seconds (7 days).
*
- * <em>infinite</em> - Both '<em><valid-lifetime></em>' and '<em><<pref-lifetime></em>'
- * are inifinte, no timeout.
+ * <em>infinite</em> - Both '<em><valid-lifetime></em>' and
+ * '<em><pref-lifetime></em>' are infinite, no timeout.
*
* <em>no-advertise</em> - Do not send full router address in prefix
* advertisement. Default is to advertise (i.e. - This flag is off by default).
*
- * <em>off-link</em> - Prefix is off-link, clear L-bit in packet. Default is on-link
- * (i.e. - This flag is off and L-bit in packet is set by default and this prefix can
- * be used for on-link determination). '<em>no-onlink</em>' also controls the L-bit.
+ * <em>off-link</em> - Prefix is off-link, clear L-bit in packet. Default is
+ * on-link (i.e. - This flag is off and L-bit in packet is set by default
+ * and this prefix can be used for on-link determination). '<em>no-onlink</em>'
+ * also controls the L-bit.
*
- * <em>no-autoconfig</em> - Do not use prefix for autoconfiguration, clear A-bit in packet.
- * Default is autoconfig (i.e. - This flag is off and A-bit in packet is set by default.
+ * <em>no-autoconfig</em> - Do not use prefix for autoconfiguration, clear
+ * A-bit in packet. Default is autoconfig (i.e. - This flag is off and A-bit
+ * in packet is set by default.
*
- * <em>no-onlink</em> - Do not use prefix for onlink determination, clear L-bit in packet.
- * Default is on-link (i.e. - This flag is off and L-bit in packet is set by default and
- * this prefix can be used for on-link determination). '<em>off-link</em>' also controls
- * the L-bit.
+ * <em>no-onlink</em> - Do not use prefix for onlink determination, clear L-bit
+ * in packet. Default is on-link (i.e. - This flag is off and L-bit in packet
+ * is set by default and this prefix can be used for on-link determination).
+ * '<em>off-link</em>' also controls the L-bit.
*
*
* <b>Format 3: - Default of Prefix:</b>
*
- * '<em><b>ip6 nd <interface> [no] prefix <ip6-address>/<width> default</b></em>'
+ * @cliexcmd{ip6 nd <interface> [no] prefix <ip6-address>/<width> default}
*
- * When a new prefix is added (or existing one is being overwritten) <em>default</em>
- * uses default values for the prefix. If <em>no</em> is used, the <em>default</em>
- * is ignored and the prefix is deleted.
+ * When a new prefix is added (or existing one is being overwritten)
+ * <em>default</em> uses default values for the prefix. If <em>no</em> is
+ * used, the <em>default</em> is ignored and the prefix is deleted.
*
*
* @cliexpar
* Example of how set a router advertisement option:
* @cliexcmd{ip6 nd GigabitEthernet2/0/0 ra-interval 100 20}
* Example of how to add a prefix:
- * @cliexcmd{ip6 nd GigabitEthernet2/0/0 prefix fe80::fe:28ff:fe9c:75b3/64 infinite no-advertise}
+ * @cliexcmd{ip6 nd GigabitEthernet2/0/0 prefix fe80::fe:28ff:fe9c:75b3/64
+ * infinite no-advertise}
* Example of how to delete a prefix:
* @cliexcmd{ip6 nd GigabitEthernet2/0/0 no prefix fe80::fe:28ff:fe9c:75b3/64}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_nd_command, static) =
{
.path = "ip6 nd",
.short_help = "ip6 nd <interface> ...",
.function = ip6_ra_cmd,
};
-/* *INDENT-ON* */
/**
* VFT for registering as a delegate to an IP6 link
@@ -2319,12 +2239,10 @@ ip6_ra_init (vlib_main_t * vm)
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (ip6_ra_init) =
{
.runs_after = VLIB_INITS("icmp6_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ip6-nd/ip6_ra.h b/src/vnet/ip6-nd/ip6_ra.h
index d09e8c0c975..958845b0a55 100644
--- a/src/vnet/ip6-nd/ip6_ra.h
+++ b/src/vnet/ip6-nd/ip6_ra.h
@@ -21,6 +21,105 @@
#include <vnet/fib/fib_types.h>
+/* advertised prefix option */
+typedef struct
+{
+ /* basic advertised information */
+ ip6_address_t prefix;
+ u8 prefix_len;
+ int adv_on_link_flag;
+ int adv_autonomous_flag;
+ u32 adv_valid_lifetime_in_secs;
+ u32 adv_pref_lifetime_in_secs;
+
+ /* advertised values are computed from these times if decrementing */
+ f64 valid_lifetime_expires;
+ f64 pref_lifetime_expires;
+
+ /* local information */
+ int enabled;
+ int deprecated_prefix_flag;
+ int decrement_lifetime_flag;
+
+#define MIN_ADV_VALID_LIFETIME 7203 /* seconds */
+#define DEF_ADV_VALID_LIFETIME 2592000
+#define DEF_ADV_PREF_LIFETIME 604800
+
+ /* extensions are added here, mobile, DNS etc.. */
+} ip6_radv_prefix_t;
+
+typedef struct
+{
+ u32 irt;
+ u32 mrt;
+ u32 mrc;
+ u32 mrd;
+} icmp6_send_router_solicitation_params_t;
+
+typedef struct ip6_ra_t_
+{
+ /* advertised config information, zero means unspecified */
+ u8 curr_hop_limit;
+ int adv_managed_flag;
+ int adv_other_flag;
+ u16 adv_router_lifetime_in_sec;
+ u32 adv_neighbor_reachable_time_in_msec;
+ u32 adv_time_in_msec_between_retransmitted_neighbor_solicitations;
+
+ /* mtu option */
+ u32 adv_link_mtu;
+
+ /* local information */
+ u32 sw_if_index;
+ int send_radv; /* radv on/off on this interface - set by config */
+ int cease_radv; /* we are ceasing to send - set byf config */
+ int send_unicast;
+ int adv_link_layer_address;
+ int prefix_option;
+ int failed_device_check;
+ int ref_count;
+
+ /* prefix option */
+ ip6_radv_prefix_t *adv_prefixes_pool;
+
+ /* Hash table mapping address to index in interface advertised prefix pool.
+ */
+ mhash_t address_to_prefix_index;
+
+ f64 max_radv_interval;
+ f64 min_radv_interval;
+ f64 min_delay_between_radv;
+ f64 max_delay_between_radv;
+ f64 max_rtr_default_lifetime;
+
+ f64 last_radv_time;
+ f64 last_multicast_time;
+ f64 next_multicast_time;
+
+ u32 initial_adverts_count;
+ f64 initial_adverts_interval;
+ u32 initial_adverts_sent;
+
+ /* stats */
+ u32 n_advertisements_sent;
+ u32 n_solicitations_rcvd;
+ u32 n_solicitations_dropped;
+
+ /* router solicitations sending state */
+ u8 keep_sending_rs; /* when true then next fields are valid */
+ icmp6_send_router_solicitation_params_t params;
+ f64 sleep_interval;
+ f64 due_time;
+ u32 n_left;
+ f64 start_time;
+ vlib_buffer_t *buffer;
+
+ u32 seed;
+
+} ip6_ra_t;
+
+extern ip6_ra_t *ip6_ra_get_itf (u32 sw_if_index);
+
extern int ip6_ra_config (vlib_main_t * vm, u32 sw_if_index,
u8 suppress, u8 managed, u8 other,
u8 ll_option, u8 send_unicast, u8 cease,
@@ -35,13 +134,9 @@ extern int ip6_ra_prefix (vlib_main_t * vm, u32 sw_if_index,
u8 off_link, u8 no_autoconfig,
u8 no_onlink, u8 is_no);
-typedef struct
-{
- u32 irt;
- u32 mrt;
- u32 mrc;
- u32 mrd;
-} icmp6_send_router_solicitation_params_t;
+typedef walk_rc_t (*ip6_ra_itf_walk_fn_t) (u32 sw_if_index, void *ctx);
+
+extern void ip6_ra_itf_walk (ip6_ra_itf_walk_fn_t fn, void *ctx);
extern void icmp6_send_router_solicitation (vlib_main_t * vm,
u32 sw_if_index,
@@ -82,7 +177,7 @@ extern void ip6_ra_update_secondary_radv_info (ip6_address_t * address,
u32 primary_sw_if_index,
u32 valid_time,
u32 preferred_time);
-
+extern u8 ip6_ra_adv_enabled (u32 sw_if_index);
#endif /* included_ip6_neighbor_h */
/*
diff --git a/src/vnet/ip6-nd/rd_cp.c b/src/vnet/ip6-nd/rd_cp.c
index 13fd90db288..5d419286051 100644
--- a/src/vnet/ip6-nd/rd_cp.c
+++ b/src/vnet/ip6-nd/rd_cp.c
@@ -72,8 +72,6 @@ enum
RD_CP_EVENT_INTERRUPT,
};
-#define vl_api_ip6_nd_address_autoconfig_t_print vl_noop_handler
-
static void
router_solicitation_start_stop (u32 sw_if_index, u8 start)
{
@@ -262,7 +260,6 @@ ip6_ra_report_handler (const ip6_ra_report_t * r)
{
router_lifetime_in_sec = r->router_lifetime_in_sec;
u8 route_already_present = 0;
- /* *INDENT-OFF* */
pool_foreach (default_route, rm->default_route_pool)
{
if (default_route->sw_if_index != sw_if_index)
@@ -276,7 +273,6 @@ ip6_ra_report_handler (const ip6_ra_report_t * r)
goto default_route_pool_foreach_out;
}
}
- /* *INDENT-ON* */
default_route_pool_foreach_out:
if (!route_already_present)
@@ -333,7 +329,6 @@ ip6_ra_report_handler (const ip6_ra_report_t * r)
continue;
u8 address_already_present = 0;
- /* *INDENT-OFF* */
pool_foreach (slaac_address, rm->slaac_address_pool)
{
if (slaac_address->sw_if_index != sw_if_index)
@@ -349,7 +344,6 @@ ip6_ra_report_handler (const ip6_ra_report_t * r)
goto slaac_address_pool_foreach_out;
}
}
- /* *INDENT-ON* */
slaac_address_pool_foreach_out:
if (address_already_present)
@@ -414,7 +408,6 @@ rd_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
* we do not use pool_foreach() to iterate over pool elements here
* as we are removing elements inside the loop body
*/
- /* *INDENT-OFF* */
pool_foreach_index (index, rm->slaac_address_pool)
{
slaac_address = pool_elt_at_index(rm->slaac_address_pool, index);
@@ -442,7 +435,6 @@ rd_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
else
remove_default_route (vm, default_route);
}
- /* *INDENT-ON* */
current_time = vlib_time_now (vm);
}
while (due_time < current_time);
@@ -453,13 +445,11 @@ rd_cp_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (rd_cp_process_node) = {
.function = rd_cp_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "rd-cp-process",
};
-/* *INDENT-ON* */
static void
interrupt_process (void)
@@ -514,21 +504,17 @@ rd_cp_set_address_autoconfig (u32 sw_if_index,
if (if_config->enabled && !enable)
{
- /* *INDENT-OFF* */
pool_foreach (slaac_address, rm->slaac_address_pool)
{
remove_slaac_address (vm, slaac_address);
}
- /* *INDENT-ON* */
}
if (if_config->install_default_routes && !install_default_routes)
{
- /* *INDENT-OFF* */
pool_foreach (default_route, rm->default_route_pool)
{
remove_default_route (vm, default_route);
}
- /* *INDENT-ON* */
}
if_config->enabled = enable;
@@ -588,13 +574,11 @@ ip6_nd_address_autoconfig (vlib_main_t * vm,
* @cliexcmd{ip6 nd address autoconfig GigabitEthernet2/0/0 disable}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ip6_nd_address_autoconfig_command, static) = {
.path = "ip6 nd address autoconfig",
.short_help = "ip6 nd address autoconfig <interface> [default-route|disable]",
.function = ip6_nd_address_autoconfig,
};
-/* *INDENT-ON* */
static clib_error_t *
rd_cp_init (vlib_main_t * vm)
diff --git a/src/vnet/ip6-nd/rd_cp_api.c b/src/vnet/ip6-nd/rd_cp_api.c
index 1f0d8587970..3cd55a702e1 100644
--- a/src/vnet/ip6-nd/rd_cp_api.c
+++ b/src/vnet/ip6-nd/rd_cp_api.c
@@ -13,6 +13,7 @@
* limitations under the License.
*/
+#include <vnet/vnet.h>
#include <vnet/ip6-nd/rd_cp.h>
#include <vlibapi/api.h>
diff --git a/src/vnet/ipfix-export/flow_api.c b/src/vnet/ipfix-export/flow_api.c
index 75a656468db..0b287335bbf 100644
--- a/src/vnet/ipfix-export/flow_api.c
+++ b/src/vnet/ipfix-export/flow_api.c
@@ -36,39 +36,96 @@
#define REPLY_MSG_ID_BASE frm->msg_id_base
#include <vlibapi/api_helper_macros.h>
-static void
-vl_api_set_ipfix_exporter_t_handler (vl_api_set_ipfix_exporter_t * mp)
+ipfix_exporter_t *
+vnet_ipfix_exporter_lookup (const ip_address_t *ipfix_collector)
+{
+ flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp;
+
+ pool_foreach (exp, frm->exporters)
+ {
+ if (ip_address_cmp (&exp->ipfix_collector, ipfix_collector) == 0)
+ return exp;
+ }
+
+ return NULL;
+}
+
+/*
+ * For backwards compatibility reasons index 0 in the set of exporters
+ * is alwyas used for the exporter created via the set_ipfix_exporter
+ * API.
+ */
+#define USE_INDEX_0 true
+#define USE_ANY_INDEX false
+
+static int
+vl_api_set_ipfix_exporter_t_internal (
+ u32 client_index, vl_api_address_t *mp_collector_address,
+ u16 mp_collector_port, vl_api_address_t *mp_src_address, u32 mp_vrf_id,
+ u32 mp_path_mtu, u32 mp_template_interval, bool mp_udp_checksum,
+ bool use_index_0, bool is_create)
{
vlib_main_t *vm = vlib_get_main ();
flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp;
vl_api_registration_t *reg;
- vl_api_set_ipfix_exporter_reply_t *rmp;
- ip4_address_t collector, src;
+ ip_address_t collector, src;
u16 collector_port = UDP_DST_PORT_ipfix;
u32 path_mtu;
u32 template_interval;
u8 udp_checksum;
u32 fib_id;
u32 fib_index = ~0;
- int rv = 0;
+ u32 ip_header_size;
- reg = vl_api_client_index_to_registration (mp->client_index);
+ reg = vl_api_client_index_to_registration (client_index);
if (!reg)
- return;
+ return VNET_API_ERROR_UNIMPLEMENTED;
- if (mp->src_address.af == ADDRESS_IP6
- || mp->collector_address.af == ADDRESS_IP6)
+ if (use_index_0)
{
- rv = VNET_API_ERROR_UNIMPLEMENTED;
- goto out;
+ /*
+ * In this case we update the existing exporter. There is no delete
+ * for exp[0]
+ */
+ exp = &frm->exporters[0];
+
+ /* Collector address must be IPv4 for exp[0] */
+ collector.version = AF_IP4;
+ ip4_address_decode (mp_collector_address->un.ip4, &collector.ip.ip4);
+ }
+ else
+ {
+ ip_address_decode2 (mp_collector_address, &collector);
+ if (is_create)
+ {
+ exp = vnet_ipfix_exporter_lookup (&collector);
+ if (!exp)
+ {
+ /* Create a new exporter instead of updating an existing one */
+ if (pool_elts (frm->exporters) >= IPFIX_EXPORTERS_MAX)
+ return VNET_API_ERROR_INVALID_VALUE;
+ pool_get (frm->exporters, exp);
+ }
+ }
+ else
+ {
+ /* Delete the exporter */
+ exp = vnet_ipfix_exporter_lookup (&collector);
+ if (!exp)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ pool_put (frm->exporters, exp);
+ return 0;
+ }
}
- ip4_address_decode (mp->collector_address.un.ip4, &collector);
- collector_port = ntohs (mp->collector_port);
+ collector_port = ntohs (mp_collector_port);
if (collector_port == (u16) ~ 0)
collector_port = UDP_DST_PORT_ipfix;
- ip4_address_decode (mp->src_address.un.ip4, &src);
- fib_id = ntohl (mp->vrf_id);
+ ip_address_decode2 (mp_src_address, &src);
+ fib_id = ntohl (mp_vrf_id);
ip4_main_t *im = &ip4_main;
if (fib_id == ~0)
@@ -79,69 +136,97 @@ vl_api_set_ipfix_exporter_t_handler (vl_api_set_ipfix_exporter_t * mp)
{
uword *p = hash_get (im->fib_index_by_table_id, fib_id);
if (!p)
- {
- rv = VNET_API_ERROR_NO_SUCH_FIB;
- goto out;
- }
+ return VNET_API_ERROR_NO_SUCH_FIB;
fib_index = p[0];
}
- path_mtu = ntohl (mp->path_mtu);
+ path_mtu = ntohl (mp_path_mtu);
if (path_mtu == ~0)
path_mtu = 512; // RFC 7011 section 10.3.3.
- template_interval = ntohl (mp->template_interval);
+ template_interval = ntohl (mp_template_interval);
if (template_interval == ~0)
template_interval = 20;
- udp_checksum = mp->udp_checksum;
+ udp_checksum = mp_udp_checksum;
- if (collector.as_u32 != 0 && src.as_u32 == 0)
- {
- rv = VNET_API_ERROR_INVALID_VALUE;
- goto out;
- }
+ /*
+ * If the collector address is set then the src must be too.
+ * Collector address can be set to 0 to disable exporter
+ */
+ if (!ip_address_is_zero (&collector) && ip_address_is_zero (&src))
+ return VNET_API_ERROR_INVALID_VALUE;
+ if (collector.version != src.version)
+ return VNET_API_ERROR_INVALID_VALUE;
if (path_mtu > 1450 /* vpp does not support fragmentation */ )
- {
- rv = VNET_API_ERROR_INVALID_VALUE;
- goto out;
- }
+ return VNET_API_ERROR_INVALID_VALUE;
if (path_mtu < 68)
- {
- rv = VNET_API_ERROR_INVALID_VALUE;
- goto out;
- }
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ /* Calculate how much header data we need. */
+ if (collector.version == AF_IP4)
+ ip_header_size = sizeof (ip4_header_t);
+ else
+ ip_header_size = sizeof (ip6_header_t);
+ exp->all_headers_size = ip_header_size + sizeof (udp_header_t) +
+ sizeof (ipfix_message_header_t) +
+ sizeof (ipfix_set_header_t);
/* Reset report streams if we are reconfiguring IP addresses */
- if (frm->ipfix_collector.as_u32 != collector.as_u32 ||
- frm->src_address.as_u32 != src.as_u32 ||
- frm->collector_port != collector_port)
- vnet_flow_reports_reset (frm);
-
- frm->ipfix_collector.as_u32 = collector.as_u32;
- frm->collector_port = collector_port;
- frm->src_address.as_u32 = src.as_u32;
- frm->fib_index = fib_index;
- frm->path_mtu = path_mtu;
- frm->template_interval = template_interval;
- frm->udp_checksum = udp_checksum;
+ if (ip_address_cmp (&exp->ipfix_collector, &collector) ||
+ ip_address_cmp (&exp->src_address, &src) ||
+ exp->collector_port != collector_port)
+ vnet_flow_reports_reset (exp);
+
+ exp->ipfix_collector = collector;
+ exp->collector_port = collector_port;
+ exp->src_address = src;
+ exp->fib_index = fib_index;
+ exp->path_mtu = path_mtu;
+ exp->template_interval = template_interval;
+ exp->udp_checksum = udp_checksum;
/* Turn on the flow reporting process */
vlib_process_signal_event (vm, flow_report_process_node.index, 1, 0);
-out:
+ return 0;
+}
+
+static void
+vl_api_set_ipfix_exporter_t_handler (vl_api_set_ipfix_exporter_t *mp)
+{
+ vl_api_set_ipfix_exporter_reply_t *rmp;
+ flow_report_main_t *frm = &flow_report_main;
+ int rv = vl_api_set_ipfix_exporter_t_internal (
+ mp->client_index, &mp->collector_address, mp->collector_port,
+ &mp->src_address, mp->vrf_id, mp->path_mtu, mp->template_interval,
+ mp->udp_checksum, USE_INDEX_0, 0);
+
REPLY_MACRO (VL_API_SET_IPFIX_EXPORTER_REPLY);
}
static void
+vl_api_ipfix_exporter_create_delete_t_handler (
+ vl_api_ipfix_exporter_create_delete_t *mp)
+{
+ vl_api_ipfix_exporter_create_delete_reply_t *rmp;
+ flow_report_main_t *frm = &flow_report_main;
+ int rv = vl_api_set_ipfix_exporter_t_internal (
+ mp->client_index, &mp->collector_address, mp->collector_port,
+ &mp->src_address, mp->vrf_id, mp->path_mtu, mp->template_interval,
+ mp->udp_checksum, USE_ANY_INDEX, mp->is_create);
+
+ REPLY_MACRO (VL_API_IPFIX_EXPORTER_CREATE_DELETE_REPLY);
+}
+
+static void
vl_api_ipfix_exporter_dump_t_handler (vl_api_ipfix_exporter_dump_t * mp)
{
flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = pool_elt_at_index (flow_report_main.exporters, 0);
vl_api_registration_t *reg;
vl_api_ipfix_exporter_details_t *rmp;
ip4_main_t *im = &ip4_main;
- ip46_address_t collector = {.as_u64[0] = 0,.as_u64[1] = 0 };
- ip46_address_t src = {.as_u64[0] = 0,.as_u64[1] = 0 };
u32 vrf_id;
reg = vl_api_client_index_to_registration (mp->client_index);
@@ -150,27 +235,69 @@ vl_api_ipfix_exporter_dump_t_handler (vl_api_ipfix_exporter_dump_t * mp)
rmp = vl_msg_api_alloc (sizeof (*rmp));
clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id = ntohs (VL_API_IPFIX_EXPORTER_DETAILS);
+ rmp->_vl_msg_id =
+ ntohs ((REPLY_MSG_ID_BASE) + VL_API_IPFIX_EXPORTER_DETAILS);
rmp->context = mp->context;
- memcpy (&collector.ip4, &frm->ipfix_collector, sizeof (ip4_address_t));
- ip_address_encode (&collector, IP46_TYPE_IP4, &rmp->collector_address);
+ ip_address_encode2 (&exp->ipfix_collector, &rmp->collector_address);
+ rmp->collector_port = htons (exp->collector_port);
+ ip_address_encode2 (&exp->src_address, &rmp->src_address);
- rmp->collector_port = htons (frm->collector_port);
+ if (exp->fib_index == ~0)
+ vrf_id = ~0;
+ else
+ vrf_id = im->fibs[exp->fib_index].ft_table_id;
+ rmp->vrf_id = htonl (vrf_id);
+ rmp->path_mtu = htonl (exp->path_mtu);
+ rmp->template_interval = htonl (exp->template_interval);
+ rmp->udp_checksum = (exp->udp_checksum != 0);
- memcpy (&src.ip4, &frm->src_address, sizeof (ip4_address_t));
- ip_address_encode (&src, IP46_TYPE_IP4, &rmp->src_address);
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
- if (frm->fib_index == ~0)
+static void
+ipfix_all_fill_details (vl_api_ipfix_all_exporter_details_t *rmp,
+ ipfix_exporter_t *exp)
+{
+ ip4_main_t *im = &ip4_main;
+ u32 vrf_id;
+
+ ip_address_encode2 (&exp->ipfix_collector, &rmp->collector_address);
+ rmp->collector_port = htons (exp->collector_port);
+ ip_address_encode2 (&exp->src_address, &rmp->src_address);
+
+ if (exp->fib_index == ~0)
vrf_id = ~0;
else
- vrf_id = im->fibs[frm->fib_index].ft_table_id;
+ vrf_id = im->fibs[exp->fib_index].ft_table_id;
rmp->vrf_id = htonl (vrf_id);
- rmp->path_mtu = htonl (frm->path_mtu);
- rmp->template_interval = htonl (frm->template_interval);
- rmp->udp_checksum = (frm->udp_checksum != 0);
+ rmp->path_mtu = htonl (exp->path_mtu);
+ rmp->template_interval = htonl (exp->template_interval);
+ rmp->udp_checksum = (exp->udp_checksum != 0);
+}
- vl_api_send_msg (reg, (u8 *) rmp);
+static void
+ipfix_all_exporter_details (flow_report_main_t *frm, u32 index,
+ vl_api_registration_t *rp, u32 context)
+{
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, index);
+
+ vl_api_ipfix_all_exporter_details_t *rmp;
+
+ REPLY_MACRO_DETAILS4 (VL_API_IPFIX_ALL_EXPORTER_DETAILS, rp, context,
+ ({ ipfix_all_fill_details (rmp, exp); }));
+}
+
+static void
+vl_api_ipfix_all_exporter_get_t_handler (vl_api_ipfix_all_exporter_get_t *mp)
+{
+ flow_report_main_t *frm = &flow_report_main;
+ vl_api_ipfix_all_exporter_get_reply_t *rmp;
+ int rv = 0;
+
+ REPLY_AND_DETAILS_MACRO (
+ VL_API_IPFIX_ALL_EXPORTER_GET_REPLY, frm->exporters,
+ ({ ipfix_all_exporter_details (frm, cursor, rp, mp->context); }));
}
static void
@@ -180,6 +307,7 @@ static void
vl_api_set_ipfix_classify_stream_reply_t *rmp;
flow_report_classify_main_t *fcm = &flow_report_classify_main;
flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = &frm->exporters[0];
u32 domain_id = 0;
u32 src_port = UDP_DST_PORT_ipfix;
int rv = 0;
@@ -190,7 +318,7 @@ static void
if (fcm->src_port != 0 &&
(fcm->domain_id != domain_id || fcm->src_port != (u16) src_port))
{
- int rv = vnet_stream_change (frm, fcm->domain_id, fcm->src_port,
+ int rv = vnet_stream_change (exp, fcm->domain_id, fcm->src_port,
domain_id, (u16) src_port);
ASSERT (rv == 0);
}
@@ -231,6 +359,7 @@ static void
vl_api_registration_t *reg;
flow_report_classify_main_t *fcm = &flow_report_classify_main;
flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = &frm->exporters[0];
vnet_flow_report_add_del_args_t args;
ipfix_classify_table_t *table;
int is_add;
@@ -296,7 +425,7 @@ static void
args.domain_id = fcm->domain_id;
args.src_port = fcm->src_port;
- rv = vnet_flow_report_add_del (frm, &args, NULL);
+ rv = vnet_flow_report_add_del (exp, &args, NULL);
/* If deleting, or add failed */
if (is_add == 0 || (rv && is_add))
diff --git a/src/vnet/ipfix-export/flow_report.c b/src/vnet/ipfix-export/flow_report.c
index 760de5f8c66..4eb93520ed8 100644
--- a/src/vnet/ipfix-export/flow_report.c
+++ b/src/vnet/ipfix-export/flow_report.c
@@ -15,6 +15,7 @@
/*
* flow_report.c
*/
+#include <vppinfra/atomics.h>
#include <vnet/ipfix-export/flow_report.h>
#include <vnet/api_errno.h>
#include <vnet/udp/udp.h>
@@ -22,45 +23,40 @@
flow_report_main_t flow_report_main;
static_always_inline u8
-stream_index_valid (u32 index)
+stream_index_valid (ipfix_exporter_t *exp, u32 index)
{
- flow_report_main_t *frm = &flow_report_main;
- return index < vec_len (frm->streams) &&
- frm->streams[index].domain_id != ~0;
+ return index < vec_len (exp->streams) && exp->streams[index].domain_id != ~0;
}
static_always_inline flow_report_stream_t *
-add_stream (void)
+add_stream (ipfix_exporter_t *exp)
{
- flow_report_main_t *frm = &flow_report_main;
u32 i;
- for (i = 0; i < vec_len (frm->streams); i++)
- if (!stream_index_valid (i))
- return &frm->streams[i];
- u32 index = vec_len (frm->streams);
- vec_validate (frm->streams, index);
- return &frm->streams[index];
+ for (i = 0; i < vec_len (exp->streams); i++)
+ if (!stream_index_valid (exp, i))
+ return &exp->streams[i];
+ u32 index = vec_len (exp->streams);
+ vec_validate (exp->streams, index);
+ return &exp->streams[index];
}
static_always_inline void
-delete_stream (u32 index)
+delete_stream (ipfix_exporter_t *exp, u32 index)
{
- flow_report_main_t *frm = &flow_report_main;
- ASSERT (index < vec_len (frm->streams));
- ASSERT (frm->streams[index].domain_id != ~0);
- frm->streams[index].domain_id = ~0;
+ ASSERT (index < vec_len (exp->streams));
+ ASSERT (exp->streams[index].domain_id != ~0);
+ exp->streams[index].domain_id = ~0;
}
static i32
-find_stream (u32 domain_id, u16 src_port)
+find_stream (ipfix_exporter_t *exp, u32 domain_id, u16 src_port)
{
- flow_report_main_t *frm = &flow_report_main;
flow_report_stream_t *stream;
u32 i;
- for (i = 0; i < vec_len (frm->streams); i++)
- if (stream_index_valid (i))
+ for (i = 0; i < vec_len (exp->streams); i++)
+ if (stream_index_valid (exp, i))
{
- stream = &frm->streams[i];
+ stream = &exp->streams[i];
if (domain_id == stream->domain_id)
{
if (src_port != stream->src_port)
@@ -76,14 +72,17 @@ find_stream (u32 domain_id, u16 src_port)
}
int
-send_template_packet (flow_report_main_t * frm,
- flow_report_t * fr, u32 * buffer_indexp)
+send_template_packet (flow_report_main_t *frm, ipfix_exporter_t *exp,
+ flow_report_t *fr, u32 *buffer_indexp)
{
u32 bi0;
vlib_buffer_t *b0;
- ip4_ipfix_template_packet_t *tp;
+ ip4_ipfix_template_packet_t *tp4;
+ ip6_ipfix_template_packet_t *tp6;
ipfix_message_header_t *h;
- ip4_header_t *ip;
+ ip4_header_t *ip4;
+ ip6_header_t *ip6;
+ void *ip;
udp_header_t *udp;
vlib_main_t *vm = frm->vlib_main;
flow_report_stream_t *stream;
@@ -92,7 +91,8 @@ send_template_packet (flow_report_main_t * frm,
if (fr->update_rewrite || fr->rewrite == 0)
{
- if (frm->ipfix_collector.as_u32 == 0 || frm->src_address.as_u32 == 0)
+ if (ip_address_is_zero (&exp->ipfix_collector) ||
+ ip_address_is_zero (&exp->src_address))
{
vlib_node_set_state (frm->vlib_main, flow_report_process_node.index,
VLIB_NODE_STATE_DISABLED);
@@ -104,13 +104,9 @@ send_template_packet (flow_report_main_t * frm,
if (fr->update_rewrite)
{
- fr->rewrite = fr->rewrite_callback (frm, fr,
- &frm->ipfix_collector,
- &frm->src_address,
- frm->collector_port,
- fr->report_elements,
- fr->n_report_elements,
- fr->stream_indexp);
+ fr->rewrite = fr->rewrite_callback (
+ exp, fr, exp->collector_port, fr->report_elements,
+ fr->n_report_elements, fr->stream_indexp);
fr->update_rewrite = 0;
}
@@ -126,11 +122,22 @@ send_template_packet (flow_report_main_t * frm,
b0->current_length = vec_len (fr->rewrite);
b0->flags |= (VLIB_BUFFER_TOTAL_LENGTH_VALID | VNET_BUFFER_F_FLOW_REPORT);
vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = frm->fib_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = exp->fib_index;
- tp = vlib_buffer_get_current (b0);
- ip = (ip4_header_t *) & tp->ip4;
- udp = (udp_header_t *) (ip + 1);
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ {
+ tp4 = vlib_buffer_get_current (b0);
+ ip4 = (ip4_header_t *) &tp4->ip4;
+ ip = ip4;
+ udp = (udp_header_t *) (ip4 + 1);
+ }
+ else
+ {
+ tp6 = vlib_buffer_get_current (b0);
+ ip6 = (ip6_header_t *) &tp6->ip6;
+ ip = ip6;
+ udp = (udp_header_t *) (ip6 + 1);
+ }
h = (ipfix_message_header_t *) (udp + 1);
/* FIXUP: message header export_time */
@@ -139,18 +146,30 @@ send_template_packet (flow_report_main_t * frm,
(vlib_time_now (frm->vlib_main) - frm->vlib_time_0));
h->export_time = clib_host_to_net_u32 (h->export_time);
- stream = &frm->streams[fr->stream_index];
+ stream = &exp->streams[fr->stream_index];
/* FIXUP: message header sequence_number. Templates do not increase it */
h->sequence_number = clib_host_to_net_u32 (stream->sequence_number);
/* FIXUP: udp length */
- udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip4));
+ else
+ udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip6));
- if (frm->udp_checksum)
+ if (exp->udp_checksum || ip_addr_version (&exp->ipfix_collector) == AF_IP6)
{
/* RFC 7011 section 10.3.2. */
- udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
+
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
+ else
+ {
+ int bogus = 0;
+ udp->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip, &bogus);
+ }
+
if (udp->checksum == 0)
udp->checksum = 0xffff;
}
@@ -162,16 +181,58 @@ send_template_packet (flow_report_main_t * frm,
return 0;
}
+u32 always_inline
+ipfix_write_headers (ipfix_exporter_t *exp, void *data, void **ip,
+ udp_header_t **udp, u32 len)
+{
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ {
+ ip4_ipfix_template_packet_t *tp4;
+ ip4_header_t *ip4;
+
+ tp4 = (ip4_ipfix_template_packet_t *) data;
+ ip4 = (ip4_header_t *) &tp4->ip4;
+ ip4->ip_version_and_header_length = 0x45;
+ ip4->ttl = 254;
+ ip4->protocol = IP_PROTOCOL_UDP;
+ ip4->flags_and_fragment_offset = 0;
+ ip4->src_address.as_u32 = exp->src_address.ip.ip4.as_u32;
+ ip4->dst_address.as_u32 = exp->ipfix_collector.ip.ip4.as_u32;
+ *ip = ip4;
+ *udp = (udp_header_t *) (ip4 + 1);
+
+ (*udp)->length = clib_host_to_net_u16 (len - sizeof (*ip4));
+ return sizeof (*ip4);
+ }
+ else
+ {
+ ip6_ipfix_template_packet_t *tp6;
+ ip6_header_t *ip6;
+
+ tp6 = (ip6_ipfix_template_packet_t *) data;
+ ip6 = (ip6_header_t *) &tp6->ip6;
+ ip6->ip_version_traffic_class_and_flow_label =
+ clib_host_to_net_u32 (6 << 28);
+ ip6->hop_limit = 254;
+ ip6->protocol = IP_PROTOCOL_UDP;
+ ip6->src_address = exp->src_address.ip.ip6;
+ ip6->dst_address = exp->ipfix_collector.ip.ip6;
+ *ip = ip6;
+ *udp = (udp_header_t *) (ip6 + 1);
+ (*udp)->length = clib_host_to_net_u16 (len - sizeof (*ip6));
+ return sizeof (*ip6);
+ }
+}
+
u8 *
-vnet_flow_rewrite_generic_callback (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+vnet_flow_rewrite_generic_callback (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t * report_elts,
- u32 n_elts, u32 * stream_indexp)
+ ipfix_report_element_t *report_elts,
+ u32 n_elts, u32 *stream_indexp)
{
- ip4_header_t *ip;
+ ip4_header_t *ip4;
+ ip6_header_t *ip6;
+ void *ip;
udp_header_t *udp;
ipfix_message_header_t *h;
ipfix_set_header_t *s;
@@ -179,41 +240,36 @@ vnet_flow_rewrite_generic_callback (flow_report_main_t * frm,
ipfix_field_specifier_t *f;
ipfix_field_specifier_t *first_field;
u8 *rewrite = 0;
- ip4_ipfix_template_packet_t *tp;
flow_report_stream_t *stream;
int i;
ipfix_report_element_t *ep;
+ u32 size;
ASSERT (stream_indexp);
ASSERT (n_elts);
ASSERT (report_elts);
- stream = &frm->streams[fr->stream_index];
+ stream = &exp->streams[fr->stream_index];
*stream_indexp = fr->stream_index;
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ size = sizeof (ip4_ipfix_template_packet_t);
+ else
+ size = sizeof (ip6_ipfix_template_packet_t);
/* allocate rewrite space */
vec_validate_aligned (rewrite,
- sizeof (ip4_ipfix_template_packet_t)
- + n_elts * sizeof (ipfix_field_specifier_t) - 1,
+ size + n_elts * sizeof (ipfix_field_specifier_t) - 1,
CLIB_CACHE_LINE_BYTES);
/* create the packet rewrite string */
- tp = (ip4_ipfix_template_packet_t *) rewrite;
- ip = (ip4_header_t *) & tp->ip4;
- udp = (udp_header_t *) (ip + 1);
+ ipfix_write_headers (exp, rewrite, &ip, &udp, vec_len (rewrite));
+
h = (ipfix_message_header_t *) (udp + 1);
s = (ipfix_set_header_t *) (h + 1);
t = (ipfix_template_header_t *) (s + 1);
first_field = f = (ipfix_field_specifier_t *) (t + 1);
-
- ip->ip_version_and_header_length = 0x45;
- ip->ttl = 254;
- ip->protocol = IP_PROTOCOL_UDP;
- ip->src_address.as_u32 = src_address->as_u32;
- ip->dst_address.as_u32 = collector_address->as_u32;
udp->src_port = clib_host_to_net_u16 (stream->src_port);
udp->dst_port = clib_host_to_net_u16 (collector_port);
- udp->length = clib_host_to_net_u16 (vec_len (rewrite) - sizeof (*ip));
/* FIXUP LATER: message header export_time */
h->domain_id = clib_host_to_net_u32 (stream->domain_id);
@@ -227,10 +283,6 @@ vnet_flow_rewrite_generic_callback (flow_report_main_t * frm,
ep++;
}
- /* Back to the template packet... */
- ip = (ip4_header_t *) & tp->ip4;
- udp = (udp_header_t *) (ip + 1);
-
ASSERT (f - first_field);
/* Field count in this template */
t->id_count = ipfix_id_count (fr->template_id, f - first_field);
@@ -242,12 +294,201 @@ vnet_flow_rewrite_generic_callback (flow_report_main_t * frm,
/* message length in octets */
h->version_length = version_length ((u8 *) f - (u8 *) h);
- ip->length = clib_host_to_net_u16 ((u8 *) f - (u8 *) ip);
- ip->checksum = ip4_header_checksum (ip);
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ {
+ ip4 = (ip4_header_t *) ip;
+ ip4->length = clib_host_to_net_u16 ((u8 *) f - (u8 *) ip4);
+ ip4->checksum = ip4_header_checksum (ip4);
+ }
+ else
+ {
+ ip6 = (ip6_header_t *) ip;
+ /* IPv6 payload length does not include the IPv6 header */
+ ip6->payload_length = clib_host_to_net_u16 ((u8 *) f - (u8 *) udp);
+ }
return rewrite;
}
+vlib_buffer_t *
+vnet_ipfix_exp_get_buffer (vlib_main_t *vm, ipfix_exporter_t *exp,
+ flow_report_t *fr, u32 thread_index)
+{
+ u32 bi0;
+ vlib_buffer_t *b0;
+
+ if (fr->per_thread_data[thread_index].buffer)
+ return fr->per_thread_data[thread_index].buffer;
+
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ return NULL;
+
+ /* Initialize the buffer */
+ b0 = fr->per_thread_data[thread_index].buffer = vlib_get_buffer (vm, bi0);
+
+ b0->current_data = 0;
+ b0->current_length = exp->all_headers_size;
+ b0->flags |= (VLIB_BUFFER_TOTAL_LENGTH_VALID | VNET_BUFFER_F_FLOW_REPORT);
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = exp->fib_index;
+ fr->per_thread_data[thread_index].next_data_offset = b0->current_length;
+
+ return b0;
+}
+
+/*
+ * Send a buffer that is mostly populated. Has flow records but needs some
+ * header fields updated.
+ */
+void
+vnet_ipfix_exp_send_buffer (vlib_main_t *vm, ipfix_exporter_t *exp,
+ flow_report_t *fr, flow_report_stream_t *stream,
+ u32 thread_index, vlib_buffer_t *b0)
+{
+ flow_report_main_t *frm = &flow_report_main;
+ vlib_frame_t *f;
+ ipfix_set_header_t *s;
+ ipfix_message_header_t *h;
+ ip4_header_t *ip4 = 0;
+ ip6_header_t *ip6 = 0;
+ void *ip;
+ udp_header_t *udp;
+ int ip_len;
+
+ /* nothing to send */
+ if (fr->per_thread_data[thread_index].next_data_offset <=
+ exp->all_headers_size)
+ return;
+
+ ip_len = ipfix_write_headers (exp, (void *) vlib_buffer_get_current (b0),
+ &ip, &udp, b0->current_length);
+
+ h = (ipfix_message_header_t *) (udp + 1);
+ s = (ipfix_set_header_t *) (h + 1);
+
+ udp->src_port = clib_host_to_net_u16 (stream->src_port);
+ udp->dst_port = clib_host_to_net_u16 (exp->collector_port);
+ udp->checksum = 0;
+
+ /* FIXUP: message header export_time */
+ h->export_time =
+ (u32) (((f64) frm->unix_time_0) + (vlib_time_now (vm) - frm->vlib_time_0));
+ h->export_time = clib_host_to_net_u32 (h->export_time);
+ h->domain_id = clib_host_to_net_u32 (stream->domain_id);
+
+ /*
+ * RFC 7011: Section 3.2
+ *
+ * Incremental sequence counter modulo 2^32 of all IPFIX Data Records
+ * sent in the current stream from the current Observation Domain by
+ * the Exporting Process
+ */
+ h->sequence_number =
+ clib_atomic_fetch_add (&stream->sequence_number,
+ fr->per_thread_data[thread_index].n_data_records);
+ h->sequence_number = clib_host_to_net_u32 (h->sequence_number);
+
+ /*
+ * For data records we use the template ID as the set ID.
+ * RFC 7011: 3.4.3
+ */
+ s->set_id_length = ipfix_set_id_length (
+ fr->template_id,
+ b0->current_length - (ip_len + sizeof (*udp) + sizeof (*h)));
+ h->version_length =
+ version_length (b0->current_length - (ip_len + sizeof (*udp)));
+
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ {
+ ip4 = (ip4_header_t *) ip;
+ ip4->length = clib_host_to_net_u16 (b0->current_length);
+ ip4->checksum = ip4_header_checksum (ip4);
+ udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip4));
+ ASSERT (ip4_header_checksum_is_valid (ip4));
+ }
+ else
+ {
+ ip6 = (ip6_header_t *) ip;
+ /* Ipv6 payload length does not include the IPv6 header */
+ ip6->payload_length =
+ clib_host_to_net_u16 (b0->current_length - sizeof (*ip6));
+ udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip6));
+ }
+
+ if (exp->udp_checksum || ip_addr_version (&exp->ipfix_collector) == AF_IP6)
+ {
+ /* RFC 7011 section 10.3.2. */
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip4);
+ else
+ {
+ int bogus = 0;
+ udp->checksum =
+ ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip6, &bogus);
+ }
+ if (udp->checksum == 0)
+ udp->checksum = 0xffff;
+ }
+
+ /* Find or allocate a frame */
+ f = fr->per_thread_data[thread_index].frame;
+ if (PREDICT_FALSE (f == 0))
+ {
+ u32 *to_next;
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
+ else
+ f = vlib_get_frame_to_node (vm, ip6_lookup_node.index);
+ fr->per_thread_data[thread_index].frame = f;
+ u32 bi0 = vlib_get_buffer_index (vm, b0);
+
+ /* Enqueue the buffer */
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi0;
+ f->n_vectors = 1;
+ }
+
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
+ else
+ vlib_put_frame_to_node (vm, ip6_lookup_node.index, f);
+
+ fr->per_thread_data[thread_index].frame = NULL;
+ fr->per_thread_data[thread_index].buffer = NULL;
+ fr->per_thread_data[thread_index].next_data_offset = 0;
+}
+
+static void
+flow_report_process_send (vlib_main_t *vm, flow_report_main_t *frm,
+ ipfix_exporter_t *exp, flow_report_t *fr,
+ u32 next_node, u32 template_bi)
+{
+ vlib_frame_t *nf = 0;
+ u32 *to_next;
+
+ nf = vlib_get_frame_to_node (vm, next_node);
+ nf->n_vectors = 0;
+ to_next = vlib_frame_vector_args (nf);
+
+ if (template_bi != ~0)
+ {
+ to_next[0] = template_bi;
+ to_next++;
+ nf->n_vectors++;
+ }
+
+ nf = fr->flow_data_callback (frm, exp, fr, nf, to_next, next_node);
+ if (nf)
+ {
+ if (nf->n_vectors)
+ vlib_put_frame_to_node (vm, next_node, nf);
+ else
+ {
+ vlib_frame_free (vm, nf);
+ }
+ }
+}
+
static uword
flow_report_process (vlib_main_t * vm,
vlib_node_runtime_t * rt, vlib_frame_t * f)
@@ -256,9 +497,9 @@ flow_report_process (vlib_main_t * vm,
flow_report_t *fr;
u32 ip4_lookup_node_index;
vlib_node_t *ip4_lookup_node;
- vlib_frame_t *nf = 0;
+ u32 ip6_lookup_node_index;
+ vlib_node_t *ip6_lookup_node;
u32 template_bi;
- u32 *to_next;
int send_template;
f64 now, wait_time;
f64 def_wait_time = 5.0;
@@ -277,6 +518,10 @@ flow_report_process (vlib_main_t * vm,
ip4_lookup_node = vlib_get_node_by_name (vm, (u8 *) "ip4-lookup");
ip4_lookup_node_index = ip4_lookup_node->index;
+ /* Enqueue pkts to ip6-lookup */
+ ip6_lookup_node = vlib_get_node_by_name (vm, (u8 *) "ip6-lookup");
+ ip6_lookup_node_index = ip6_lookup_node->index;
+
wait_time = def_wait_time;
while (1)
@@ -284,82 +529,85 @@ flow_report_process (vlib_main_t * vm,
vlib_process_wait_for_event_or_clock (vm, wait_time);
event_type = vlib_process_get_events (vm, &event_data);
vec_reset_length (event_data);
+ ipfix_exporter_t *exp;
+ pool_foreach (exp, frm->exporters)
+ {
- /* 5s delay by default, possibly reduced by template intervals */
- wait_time = def_wait_time;
-
- vec_foreach (fr, frm->reports)
- {
- f64 next_template;
- now = vlib_time_now (vm);
-
- /* Need to send a template packet? */
- send_template =
- now > (fr->last_template_sent + frm->template_interval);
- send_template += fr->last_template_sent == 0;
- template_bi = ~0;
- rv = 0;
-
- if (send_template)
- rv = send_template_packet (frm, fr, &template_bi);
-
- if (rv < 0)
- continue;
-
- /* decide if template should be sent sooner than current wait time */
- next_template =
- (fr->last_template_sent + frm->template_interval) - now;
- wait_time = clib_min (wait_time, next_template);
-
- nf = vlib_get_frame_to_node (vm, ip4_lookup_node_index);
- nf->n_vectors = 0;
- to_next = vlib_frame_vector_args (nf);
-
- if (template_bi != ~0)
- {
- to_next[0] = template_bi;
- to_next++;
- nf->n_vectors++;
- }
-
- nf = fr->flow_data_callback (frm, fr,
- nf, to_next, ip4_lookup_node_index);
- if (nf)
- vlib_put_frame_to_node (vm, ip4_lookup_node_index, nf);
- }
+ /* 5s delay by default, possibly reduced by template intervals */
+ wait_time = def_wait_time;
+
+ vec_foreach (fr, exp->reports)
+ {
+ f64 next_template;
+ now = vlib_time_now (vm);
+
+ /* Need to send a template packet? */
+ send_template =
+ now > (fr->last_template_sent + exp->template_interval);
+ send_template += fr->last_template_sent == 0;
+ template_bi = ~0;
+ rv = 0;
+
+ if (send_template)
+ rv = send_template_packet (frm, exp, fr, &template_bi);
+
+ if (rv < 0)
+ continue;
+
+ /*
+ * decide if template should be sent sooner than current wait
+ * time
+ */
+ next_template =
+ (fr->last_template_sent + exp->template_interval) - now;
+ wait_time = clib_min (wait_time, next_template);
+
+ if (ip_addr_version (&exp->ipfix_collector) == AF_IP4)
+ {
+ flow_report_process_send (
+ vm, frm, exp, fr, ip4_lookup_node_index, template_bi);
+ }
+ else
+ {
+ flow_report_process_send (
+ vm, frm, exp, fr, ip6_lookup_node_index, template_bi);
+ }
+ }
+ }
}
return 0; /* not so much */
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (flow_report_process_node) = {
.function = flow_report_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "flow-report-process",
};
-/* *INDENT-ON* */
int
-vnet_flow_report_add_del (flow_report_main_t * frm,
- vnet_flow_report_add_del_args_t * a,
- u16 * template_id)
+vnet_flow_report_add_del (ipfix_exporter_t *exp,
+ vnet_flow_report_add_del_args_t *a, u16 *template_id)
{
int i;
int found_index = ~0;
flow_report_t *fr;
flow_report_stream_t *stream;
u32 si;
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ flow_report_main_t *frm = &flow_report_main;
+ vlib_main_t *vm = frm->vlib_main;
+ int size;
- si = find_stream (a->domain_id, a->src_port);
+ si = find_stream (exp, a->domain_id, a->src_port);
if (si == -2)
return VNET_API_ERROR_INVALID_VALUE;
if (si == -1 && a->is_add == 0)
return VNET_API_ERROR_NO_SUCH_ENTRY;
- for (i = 0; i < vec_len (frm->reports); i++)
+ for (i = 0; i < vec_len (exp->reports); i++)
{
- fr = vec_elt_at_index (frm->reports, i);
+ fr = vec_elt_at_index (exp->reports, i);
if (fr->opaque.as_uword == a->opaque.as_uword
&& fr->rewrite_callback == a->rewrite_callback
&& fr->flow_data_callback == a->flow_data_callback)
@@ -375,11 +623,24 @@ vnet_flow_report_add_del (flow_report_main_t * frm,
{
if (found_index != ~0)
{
- vec_delete (frm->reports, 1, found_index);
- stream = &frm->streams[si];
+ for (int i = 0;
+ i < vec_len (exp->reports[found_index].per_thread_data); i++)
+ {
+ u32 bi;
+ if (exp->reports[found_index].per_thread_data[i].buffer)
+ {
+ bi = vlib_get_buffer_index (
+ vm, exp->reports[found_index].per_thread_data[i].buffer);
+ vlib_buffer_free (vm, &bi, 1);
+ }
+ }
+ vec_free (exp->reports[found_index].per_thread_data);
+
+ vec_delete (exp->reports, 1, found_index);
+ stream = &exp->streams[si];
stream->n_reports--;
if (stream->n_reports == 0)
- delete_stream (si);
+ delete_stream (exp, si);
return 0;
}
return VNET_API_ERROR_NO_SUCH_ENTRY;
@@ -390,19 +651,19 @@ vnet_flow_report_add_del (flow_report_main_t * frm,
if (si == -1)
{
- stream = add_stream ();
+ stream = add_stream (exp);
stream->domain_id = a->domain_id;
stream->src_port = a->src_port;
stream->sequence_number = 0;
stream->n_reports = 0;
- si = stream - frm->streams;
+ si = stream - exp->streams;
}
else
- stream = &frm->streams[si];
+ stream = &exp->streams[si];
stream->n_reports++;
- vec_add2 (frm->reports, fr, 1);
+ vec_add2 (exp->reports, fr, 1);
fr->stream_index = si;
fr->template_id = 256 + stream->next_template_no;
@@ -414,6 +675,14 @@ vnet_flow_report_add_del (flow_report_main_t * frm,
fr->report_elements = a->report_elements;
fr->n_report_elements = a->n_report_elements;
fr->stream_indexp = a->stream_indexp;
+ vec_validate (fr->per_thread_data, tm->n_threads);
+ /* Store the flow_report index back in the args struct */
+ a->flow_report_index = fr - exp->reports;
+
+ size = 0;
+ for (int i = 0; i < fr->n_report_elements; i++)
+ size += fr->report_elements[i].size;
+ fr->data_record_size = size;
if (template_id)
*template_id = fr->template_id;
@@ -442,50 +711,50 @@ flow_report_add_del_error_to_clib_error (int error)
}
void
-vnet_flow_reports_reset (flow_report_main_t * frm)
+vnet_flow_reports_reset (ipfix_exporter_t *exp)
{
flow_report_t *fr;
u32 i;
- for (i = 0; i < vec_len (frm->streams); i++)
- if (stream_index_valid (i))
- frm->streams[i].sequence_number = 0;
+ for (i = 0; i < vec_len (exp->streams); i++)
+ if (stream_index_valid (exp, i))
+ exp->streams[i].sequence_number = 0;
- vec_foreach (fr, frm->reports)
- {
- fr->update_rewrite = 1;
- fr->last_template_sent = 0;
- }
+ vec_foreach (fr, exp->reports)
+ {
+ fr->update_rewrite = 1;
+ fr->last_template_sent = 0;
+ }
}
void
-vnet_stream_reset (flow_report_main_t * frm, u32 stream_index)
+vnet_stream_reset (ipfix_exporter_t *exp, u32 stream_index)
{
flow_report_t *fr;
- frm->streams[stream_index].sequence_number = 0;
+ exp->streams[stream_index].sequence_number = 0;
- vec_foreach (fr, frm->reports)
- if (frm->reports->stream_index == stream_index)
- {
- fr->update_rewrite = 1;
- fr->last_template_sent = 0;
- }
+ vec_foreach (fr, exp->reports)
+ if (exp->reports->stream_index == stream_index)
+ {
+ fr->update_rewrite = 1;
+ fr->last_template_sent = 0;
+ }
}
int
-vnet_stream_change (flow_report_main_t * frm,
- u32 old_domain_id, u16 old_src_port,
+vnet_stream_change (ipfix_exporter_t *exp, u32 old_domain_id, u16 old_src_port,
u32 new_domain_id, u16 new_src_port)
{
- i32 stream_index = find_stream (old_domain_id, old_src_port);
+ i32 stream_index = find_stream (exp, old_domain_id, old_src_port);
+
if (stream_index < 0)
return 1;
- flow_report_stream_t *stream = &frm->streams[stream_index];
+ flow_report_stream_t *stream = &exp->streams[stream_index];
stream->domain_id = new_domain_id;
stream->src_port = new_src_port;
if (old_domain_id != new_domain_id || old_src_port != new_src_port)
- vnet_stream_reset (frm, stream_index);
+ vnet_stream_reset (exp, stream_index);
return 0;
}
@@ -495,25 +764,26 @@ set_ipfix_exporter_command_fn (vlib_main_t * vm,
vlib_cli_command_t * cmd)
{
flow_report_main_t *frm = &flow_report_main;
- ip4_address_t collector, src;
+ ip_address_t collector = IP_ADDRESS_V4_ALL_0S, src = IP_ADDRESS_V4_ALL_0S;
u16 collector_port = UDP_DST_PORT_ipfix;
u32 fib_id;
u32 fib_index = ~0;
- collector.as_u32 = 0;
- src.as_u32 = 0;
u32 path_mtu = 512; // RFC 7011 section 10.3.3.
u32 template_interval = 20;
u8 udp_checksum = 0;
+ ipfix_exporter_t *exp = pool_elt_at_index (frm->exporters, 0);
+ u32 ip_header_size;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "collector %U", unformat_ip4_address, &collector))
+ if (unformat (input, "collector %U", unformat_ip4_address,
+ &collector.ip.ip4))
;
else if (unformat (input, "port %U", unformat_udp_port,
&collector_port))
;
- else if (unformat (input, "src %U", unformat_ip4_address, &src))
+ else if (unformat (input, "src %U", unformat_ip4_address, &src.ip.ip4))
;
else if (unformat (input, "fib-id %u", &fib_id))
{
@@ -533,8 +803,15 @@ set_ipfix_exporter_command_fn (vlib_main_t * vm,
break;
}
- if (collector.as_u32 != 0 && src.as_u32 == 0)
+ /*
+ * If the collector address is set then the src must be too.
+ * Collector address can be set to 0 to disable exporter
+ */
+ if (!ip_address_is_zero (&collector) && ip_address_is_zero (&src))
return clib_error_return (0, "src address required");
+ if (collector.version != src.version)
+ return clib_error_return (
+ 0, "src address and dest address must use same IP version");
if (path_mtu > 1450 /* vpp does not support fragmentation */ )
return clib_error_return (0, "too big path-mtu value, maximum is 1450");
@@ -542,28 +819,38 @@ set_ipfix_exporter_command_fn (vlib_main_t * vm,
if (path_mtu < 68)
return clib_error_return (0, "too small path-mtu value, minimum is 68");
+ /* Calculate how much header data we need. */
+ if (collector.version == AF_IP4)
+ ip_header_size = sizeof (ip4_header_t);
+ else
+ ip_header_size = sizeof (ip6_header_t);
+ exp->all_headers_size = ip_header_size + sizeof (udp_header_t) +
+ sizeof (ipfix_message_header_t) +
+ sizeof (ipfix_set_header_t);
+
/* Reset report streams if we are reconfiguring IP addresses */
- if (frm->ipfix_collector.as_u32 != collector.as_u32 ||
- frm->src_address.as_u32 != src.as_u32 ||
- frm->collector_port != collector_port)
- vnet_flow_reports_reset (frm);
-
- frm->ipfix_collector.as_u32 = collector.as_u32;
- frm->collector_port = collector_port;
- frm->src_address.as_u32 = src.as_u32;
- frm->fib_index = fib_index;
- frm->path_mtu = path_mtu;
- frm->template_interval = template_interval;
- frm->udp_checksum = udp_checksum;
-
- if (collector.as_u32)
- vlib_cli_output (vm, "Collector %U, src address %U, "
+ if (ip_address_cmp (&exp->ipfix_collector, &collector) ||
+ ip_address_cmp (&exp->src_address, &src) ||
+ exp->collector_port != collector_port)
+ vnet_flow_reports_reset (exp);
+
+ exp->ipfix_collector = collector;
+ exp->collector_port = collector_port;
+ exp->src_address = src;
+ exp->fib_index = fib_index;
+ exp->path_mtu = path_mtu;
+ exp->template_interval = template_interval;
+ exp->udp_checksum = udp_checksum;
+
+ if (collector.ip.ip4.as_u32)
+ vlib_cli_output (vm,
+ "Collector %U, src address %U, "
"fib index %d, path MTU %u, "
"template resend interval %us, "
"udp checksum %s",
- format_ip4_address, &frm->ipfix_collector,
- format_ip4_address, &frm->src_address,
- fib_index, path_mtu, template_interval,
+ format_ip4_address, &exp->ipfix_collector.ip.ip4,
+ format_ip4_address, &exp->src_address.ip.ip4, fib_index,
+ path_mtu, template_interval,
udp_checksum ? "enabled" : "disabled");
else
vlib_cli_output (vm, "IPFIX Collector is disabled");
@@ -573,7 +860,6 @@ set_ipfix_exporter_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ipfix_exporter_command, static) = {
.path = "set ipfix exporter",
.short_help = "set ipfix exporter "
@@ -584,7 +870,6 @@ VLIB_CLI_COMMAND (set_ipfix_exporter_command, static) = {
"[udp-checksum]",
.function = set_ipfix_exporter_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -596,25 +881,31 @@ ipfix_flush_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipfix_flush_command, static) = {
.path = "ipfix flush",
.short_help = "flush the current ipfix data [for make test]",
.function = ipfix_flush_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
flow_report_init (vlib_main_t * vm)
{
flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp;
frm->vlib_main = vm;
frm->vnet_main = vnet_get_main ();
frm->unix_time_0 = time (0);
frm->vlib_time_0 = vlib_time_now (frm->vlib_main);
- frm->fib_index = ~0;
-
+ /*
+ * Make sure that we can always access the first exporter for
+ * backwards compatibility reasons.
+ */
+ pool_alloc (frm->exporters, IPFIX_EXPORTERS_MAX);
+ pool_get (frm->exporters, exp);
+ /* Verify that this is at index 0 */
+ ASSERT (frm->exporters == exp);
+ exp->fib_index = ~0;
return 0;
}
diff --git a/src/vnet/ipfix-export/flow_report.h b/src/vnet/ipfix-export/flow_report.h
index f40015879c4..cd0cafb6158 100644
--- a/src/vnet/ipfix-export/flow_report.h
+++ b/src/vnet/ipfix-export/flow_report.h
@@ -20,6 +20,7 @@
#include <vnet/ethernet/ethernet.h>
#include <vnet/ethernet/packet.h>
#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip_types.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/ip/ip6_packet.h>
#include <vnet/udp/udp_packet.h>
@@ -45,27 +46,31 @@ typedef struct
ipfix_template_packet_t ipfix;
} ip4_ipfix_template_packet_t;
+/* Used to build the rewrite */
+typedef struct
+{
+ ip6_header_t ip6;
+ udp_header_t udp;
+ ipfix_template_packet_t ipfix;
+} ip6_ipfix_template_packet_t;
+
struct flow_report_main;
struct flow_report;
+struct ipfix_exporter;
-typedef vlib_frame_t *(vnet_flow_data_callback_t) (struct flow_report_main *,
- struct flow_report *,
- vlib_frame_t *, u32 *,
- u32);
+typedef vlib_frame_t *(vnet_flow_data_callback_t) (
+ struct flow_report_main *frm, struct ipfix_exporter *exp,
+ struct flow_report *, vlib_frame_t *, u32 *, u32);
-typedef u8 *(vnet_flow_rewrite_callback_t) (struct flow_report_main *,
+typedef u8 *(vnet_flow_rewrite_callback_t) (struct ipfix_exporter *exp,
struct flow_report *,
- ip4_address_t *,
- ip4_address_t *, u16,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index);
-
-u8 *vnet_flow_rewrite_generic_callback (struct flow_report_main *,
- struct flow_report *,
- ip4_address_t *,
- ip4_address_t *, u16,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index);
+ u16, ipfix_report_element_t *elts,
+ u32 n_elts, u32 *stream_index);
+
+u8 *vnet_flow_rewrite_generic_callback (struct ipfix_exporter *exp,
+ struct flow_report *, u16,
+ ipfix_report_element_t *elts,
+ u32 n_elts, u32 *stream_index);
typedef union
{
@@ -73,6 +78,16 @@ typedef union
uword as_uword;
} opaque_t;
+/*
+ * A stream represents an IPFIX session to a destination. We can have
+ * multiple streams to the same destination, but each one has its own
+ * domain and source port. A stream has a sequence number for that
+ * session. A stream may contain multiple templates (i.e multiple for
+ * reports) and each stream also has its own template space.
+ *
+ * A stream has per thread state so that data packets can be built
+ * and send on multiple threads at the same time.
+ */
typedef struct
{
u32 domain_id;
@@ -82,11 +97,37 @@ typedef struct
u16 next_template_no;
} flow_report_stream_t;
+/*
+ * For each flow_report we want to be able to build buffers/frames per thread.
+ */
+typedef struct
+{
+ vlib_buffer_t *buffer;
+ vlib_frame_t *frame;
+ u16 next_data_offset;
+ /*
+ * We need this per stream as the IPFIX sequence number is the count of
+ * data record sent, not the count of packets with data records sent.
+ * See RFC 7011, Sec 3.1
+ */
+ u8 n_data_records;
+} flow_report_per_thread_t;
+
+/*
+ * A flow report represents a group of fields that are to be exported.
+ * Each flow_report has an associated template that is generated when
+ * the flow_report is added. Each flow_report is associated with a
+ * stream, and multiple flow_reports can use the same stream. When
+ * adding a flow_report the keys for the stream are the domain_id
+ * and the source_port.
+ */
typedef struct flow_report
{
/* ipfix rewrite, set by callback */
u8 *rewrite;
u16 template_id;
+ int data_record_size;
+ flow_report_per_thread_t *per_thread_data;
u32 stream_index;
f64 last_template_sent;
int update_rewrite;
@@ -107,15 +148,24 @@ typedef struct flow_report
vnet_flow_data_callback_t *flow_data_callback;
} flow_report_t;
-typedef struct flow_report_main
+/*
+ * The maximum number of ipfix exporters we can have at once
+ */
+#define IPFIX_EXPORTERS_MAX 5
+
+/*
+ * We support multiple exporters. Each one has its own configured
+ * destination, and its own set of reports and streams.
+ */
+typedef struct ipfix_exporter
{
flow_report_t *reports;
flow_report_stream_t *streams;
/* ipfix collector ip address, port, our ip address, fib index */
- ip4_address_t ipfix_collector;
+ ip_address_t ipfix_collector;
u16 collector_port;
- ip4_address_t src_address;
+ ip_address_t src_address;
u32 fib_index;
/* Path MTU */
@@ -127,6 +177,23 @@ typedef struct flow_report_main
/* UDP checksum calculation enable flag */
u8 udp_checksum;
+ /*
+ * The amount of data needed for all the headers, prior to the first
+ * flowset (template or data or ...) This is mostly dependent on the
+ * L3 and L4 protocols in use.
+ */
+ u32 all_headers_size;
+} ipfix_exporter_t;
+
+typedef struct flow_report_main
+{
+ /*
+ * A pool of the exporters. Entry 0 is always there for backwards
+ * compatability reasons. Entries 1 and above have to be created by
+ * the users.
+ */
+ ipfix_exporter_t *exporters;
+
/* time scale transform. Joy. */
u32 unix_time_0;
f64 vlib_time_0;
@@ -142,8 +209,6 @@ extern flow_report_main_t flow_report_main;
extern vlib_node_registration_t flow_report_process_node;
-int vnet_flow_report_enable_disable (u32 sw_if_index, u32 table_index,
- int enable_disable);
typedef struct
{
vnet_flow_data_callback_t *flow_data_callback;
@@ -155,21 +220,52 @@ typedef struct
u32 domain_id;
u16 src_port;
u32 *stream_indexp;
+ /*
+ * When adding a flow report, the index of the flow report is stored
+ * here on success.
+ */
+ u32 flow_report_index;
} vnet_flow_report_add_del_args_t;
-int vnet_flow_report_add_del (flow_report_main_t * frm,
- vnet_flow_report_add_del_args_t * a,
- u16 * template_id);
+int vnet_flow_report_add_del (ipfix_exporter_t *exp,
+ vnet_flow_report_add_del_args_t *a,
+ u16 *template_id);
clib_error_t *flow_report_add_del_error_to_clib_error (int error);
-void vnet_flow_reports_reset (flow_report_main_t * frm);
+void vnet_flow_reports_reset (ipfix_exporter_t *exp);
-void vnet_stream_reset (flow_report_main_t * frm, u32 stream_index);
+void vnet_stream_reset (ipfix_exporter_t *exp, u32 stream_index);
-int vnet_stream_change (flow_report_main_t * frm,
- u32 old_domain_id, u16 old_src_port,
- u32 new_domain_id, u16 new_src_port);
+int vnet_stream_change (ipfix_exporter_t *exp, u32 old_domain_id,
+ u16 old_src_port, u32 new_domain_id, u16 new_src_port);
+
+/*
+ * Search all the exporters for one that has a matching destination address.
+ */
+ipfix_exporter_t *
+vnet_ipfix_exporter_lookup (const ip_address_t *ipfix_collector);
+
+/*
+ * Get the currently in use buffer for the given stream on the given core.
+ * If there is no current buffer then allocate a new one and return that.
+ * This is the buffer that data records should be written into. The offset
+ * currently in use is stored in the per-thread data for the stream and
+ * should be updated as new records are written in.
+ */
+vlib_buffer_t *vnet_ipfix_exp_get_buffer (vlib_main_t *vm,
+ ipfix_exporter_t *exp,
+ flow_report_t *fr, u32 thread_index);
+
+/*
+ * Send the provided buffer. At this stage the buffer should be populated
+ * with data records, with the offset in use stored in the stream per thread
+ * data. This func will fix up all the headers and then send the buffer.
+ */
+void vnet_ipfix_exp_send_buffer (vlib_main_t *vm, ipfix_exporter_t *exp,
+ flow_report_t *fr,
+ flow_report_stream_t *stream,
+ u32 thread_index, vlib_buffer_t *b0);
#endif /* __included_vnet_flow_report_h__ */
diff --git a/src/vnet/ipfix-export/flow_report_classify.c b/src/vnet/ipfix-export/flow_report_classify.c
index 21b6411a292..9e1b99f252d 100644
--- a/src/vnet/ipfix-export/flow_report_classify.c
+++ b/src/vnet/ipfix-export/flow_report_classify.c
@@ -29,13 +29,10 @@ typedef struct
flow_report_classify_main_t flow_report_classify_main;
u8 *
-ipfix_classify_template_rewrite (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+ipfix_classify_template_rewrite (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index)
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index)
{
flow_report_classify_main_t *fcm = &flow_report_classify_main;
vnet_classify_table_t *tblp;
@@ -61,7 +58,7 @@ ipfix_classify_template_rewrite (flow_report_main_t * frm,
u8 *virt_mask;
u8 *real_mask;
- stream = &frm->streams[fr->stream_index];
+ stream = &exp->streams[fr->stream_index];
ipfix_classify_table_t *table = &fcm->tables[flow_table_index];
@@ -109,8 +106,8 @@ ipfix_classify_template_rewrite (flow_report_main_t * frm,
ip->ip_version_and_header_length = 0x45;
ip->ttl = 254;
ip->protocol = IP_PROTOCOL_UDP;
- ip->src_address.as_u32 = src_address->as_u32;
- ip->dst_address.as_u32 = collector_address->as_u32;
+ ip->src_address.as_u32 = exp->src_address.ip.ip4.as_u32;
+ ip->dst_address.as_u32 = exp->ipfix_collector.ip.ip4.as_u32;
udp->src_port = clib_host_to_net_u16 (stream->src_port);
udp->dst_port = clib_host_to_net_u16 (collector_port);
udp->length = clib_host_to_net_u16 (vec_len (rewrite) - sizeof (*ip));
@@ -158,9 +155,9 @@ ipfix_classify_template_rewrite (flow_report_main_t * frm,
}
vlib_frame_t *
-ipfix_classify_send_flows (flow_report_main_t * frm,
- flow_report_t * fr,
- vlib_frame_t * f, u32 * to_next, u32 node_index)
+ipfix_classify_send_flows (flow_report_main_t *frm, ipfix_exporter_t *exp,
+ flow_report_t *fr, vlib_frame_t *f, u32 *to_next,
+ u32 node_index)
{
flow_report_classify_main_t *fcm = &flow_report_classify_main;
vnet_classify_main_t *vcm = &vnet_classify_main;
@@ -182,7 +179,6 @@ ipfix_classify_send_flows (flow_report_main_t * frm,
tcpudp_header_t *tcpudp;
udp_header_t *udp;
int field_index;
- u32 records_this_buffer;
u16 new_l0, old_l0;
ip_csum_t sum0;
vlib_main_t *vm = frm->vlib_main;
@@ -191,7 +187,7 @@ ipfix_classify_send_flows (flow_report_main_t * frm,
u8 transport_protocol;
u8 *virt_key;
- stream = &frm->streams[fr->stream_index];
+ stream = &exp->streams[fr->stream_index];
ipfix_classify_table_t *table = &fcm->tables[flow_table_index];
@@ -233,7 +229,7 @@ ipfix_classify_send_flows (flow_report_main_t * frm,
b0->current_length = copy_len;
b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = frm->fib_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = exp->fib_index;
tp = vlib_buffer_get_current (b0);
ip = (ip4_header_t *) & tp->ip4;
@@ -254,7 +250,6 @@ ipfix_classify_send_flows (flow_report_main_t * frm,
next_offset = (u32) (((u8 *) (s + 1)) - (u8 *) tp);
record_offset = next_offset;
- records_this_buffer = 0;
}
field_index = 0;
@@ -278,14 +273,13 @@ ipfix_classify_send_flows (flow_report_main_t * frm,
sizeof (packets));
next_offset += sizeof (packets);
}
- records_this_buffer++;
stream->sequence_number++;
/* Next record will have the same size as this record */
u32 next_record_size = next_offset - record_offset;
record_offset = next_offset;
- if (next_offset + next_record_size > frm->path_mtu)
+ if (next_offset + next_record_size > exp->path_mtu)
{
s->set_id_length = ipfix_set_id_length (fr->template_id,
next_offset -
@@ -314,7 +308,7 @@ ipfix_classify_send_flows (flow_report_main_t * frm,
udp->length =
clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
- if (frm->udp_checksum)
+ if (exp->udp_checksum)
{
/* RFC 7011 section 10.3.2. */
udp->checksum =
@@ -370,7 +364,7 @@ flush:
ip->length = new_l0;
udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
- if (frm->udp_checksum)
+ if (exp->udp_checksum)
{
/* RFC 7011 section 10.3.2. */
udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
@@ -397,7 +391,7 @@ ipfix_classify_table_add_del_command_fn (vlib_main_t * vm,
vlib_cli_command_t * cmd)
{
flow_report_classify_main_t *fcm = &flow_report_classify_main;
- flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = &flow_report_main.exporters[0];
vnet_flow_report_add_del_args_t args;
ipfix_classify_table_t *table;
int rv;
@@ -475,7 +469,7 @@ ipfix_classify_table_add_del_command_fn (vlib_main_t * vm,
args.domain_id = fcm->domain_id;
args.src_port = fcm->src_port;
- rv = vnet_flow_report_add_del (frm, &args, NULL);
+ rv = vnet_flow_report_add_del (exp, &args, NULL);
error = flow_report_add_del_error_to_clib_error (rv);
@@ -486,13 +480,11 @@ ipfix_classify_table_add_del_command_fn (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipfix_classify_table_add_del_command, static) = {
.path = "ipfix classify table",
.short_help = "ipfix classify table add|del <table-index>",
.function = ipfix_classify_table_add_del_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
set_ipfix_classify_stream_command_fn (vlib_main_t * vm,
@@ -500,7 +492,7 @@ set_ipfix_classify_stream_command_fn (vlib_main_t * vm,
vlib_cli_command_t * cmd)
{
flow_report_classify_main_t *fcm = &flow_report_classify_main;
- flow_report_main_t *frm = &flow_report_main;
+ ipfix_exporter_t *exp = &flow_report_main.exporters[0];
u32 domain_id = 1;
u32 src_port = UDP_DST_PORT_ipfix;
@@ -518,7 +510,7 @@ set_ipfix_classify_stream_command_fn (vlib_main_t * vm,
if (fcm->src_port != 0 &&
(fcm->domain_id != domain_id || fcm->src_port != (u16) src_port))
{
- int rv = vnet_stream_change (frm, fcm->domain_id, fcm->src_port,
+ int rv = vnet_stream_change (exp, fcm->domain_id, fcm->src_port,
domain_id, (u16) src_port);
ASSERT (rv == 0);
}
@@ -529,14 +521,12 @@ set_ipfix_classify_stream_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_ipfix_classify_stream_command, static) = {
.path = "set ipfix classify stream",
.short_help = "set ipfix classify stream"
"[domain <domain-id>] [src-port <src-port>]",
.function = set_ipfix_classify_stream_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
flow_report_classify_init (vlib_main_t * vm)
diff --git a/src/vnet/ipfix-export/flow_report_classify.h b/src/vnet/ipfix-export/flow_report_classify.h
index a923f36714a..8ca40688599 100644
--- a/src/vnet/ipfix-export/flow_report_classify.h
+++ b/src/vnet/ipfix-export/flow_report_classify.h
@@ -112,18 +112,15 @@ ipfix_classify_delete_table (u32 index)
fcm->tables[index].classify_table_index = ~0;
}
-u8 *ipfix_classify_template_rewrite (flow_report_main_t * frm,
- flow_report_t * fr,
- ip4_address_t * collector_address,
- ip4_address_t * src_address,
+u8 *ipfix_classify_template_rewrite (ipfix_exporter_t *exp, flow_report_t *fr,
u16 collector_port,
- ipfix_report_element_t * elts,
- u32 n_elts, u32 * stream_index);
+ ipfix_report_element_t *elts, u32 n_elts,
+ u32 *stream_index);
-vlib_frame_t *ipfix_classify_send_flows (flow_report_main_t * frm,
- flow_report_t * fr,
- vlib_frame_t * f,
- u32 * to_next, u32 node_index);
+vlib_frame_t *ipfix_classify_send_flows (flow_report_main_t *frm,
+ ipfix_exporter_t *exp,
+ flow_report_t *fr, vlib_frame_t *f,
+ u32 *to_next, u32 node_index);
#endif /* __included_flow_report_classify_h__ */
diff --git a/src/vnet/ipfix-export/ipfix_doc.md b/src/vnet/ipfix-export/ipfix_doc.md
deleted file mode 100644
index edae3f73660..00000000000
--- a/src/vnet/ipfix-export/ipfix_doc.md
+++ /dev/null
@@ -1,355 +0,0 @@
-# IPFIX support {#ipfix_doc}
-
-VPP includes a high-performance IPFIX record exporter. This note
-explains how to use the internal APIs to export IPFIX data, and how to
-configure and send the required IPFIX templates.
-
-As you'll see, a bit of typing is required.
-
-## First: create an ipfix "report"
-
-Include the flow report header file, fill out a @ref
-vnet_flow_report_add_del_args_t structure, and call vnet_flow_report_add_del.
-
-```{.c}
- #include <vnet/ipfix-export/flow_report.h>
- /* Defined in flow_report.h, of interest when constructing reports */
-
- /* ipfix field definitions for a particular report */
- typedef struct
- {
- u32 info_element;
- u32 size;
- } ipfix_report_element_t;
-
- /* Report add/del argument structure */
- typedef struct
- {
- /* Callback to flush current ipfix packet / frame */
- vnet_flow_data_callback_t *flow_data_callback;
-
- /* Callback to build the template packet rewrite string */
- vnet_flow_rewrite_callback_t *rewrite_callback;
-
- /* List of ipfix elements in the report */
- ipfix_report_element_t *report_elements;
- u32 n_report_elements;
- /* Kept in flow report, used e.g. by flow classifier */
- opaque_t opaque;
- /* Add / delete a report */
- int is_add;
- /* Ipfix "domain-ID", see RFC, set as desired */
- u32 domain_id;
- /* ipfix packet source port, often set to UDP_DST_PORT_ipfix */
- u16 src_port;
- /* Set by ipfix infra, needed to send data packets */
- u32 *stream_indexp;
- } vnet_flow_report_add_del_args_t;
-
- /* Private header file contents */
-
- /* Report ipfix element definition */
- #define foreach_simple_report_ipfix_element \
- _(sourceIPv4Address, 4) \
- _(destinationIPv4Address, 4) \
- _(sourceTransportPort, 2) \
- _(destinationTransportPort, 2) \
- _(protocolIdentifier, 1) \
- _(flowStartMicroseconds, 8) \
- _(flowEndMicroseconds, 8)
-
- static ipfix_report_element_t simple_report_elements[] = {
- #define _(a,b) {a,b},
- foreach_simple_report_ipfix_element
- #undef _
- };
-
- typedef struct
- {
- /** Buffers and frames, per thread */
- vlib_buffer_t **buffers_by_thread;
- vlib_frame_t **frames_by_thread;
- u32 *next_record_offset_by_thread;
-
- /** Template ID's */
- u16 *template_ids;
-
- /** Time reference pair */
- u64 usec_time_0;
- f64 vlib_time_0;
-
- /** Stream index */
- u32 stream_index;
-
- /* Convenience */
- flow_report_main_t *flow_report_main;
- vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
- } my_logging_main_t;
-
- extern my_logging_main_t my_logging_main;
-
- ...
-
- /* Recitations */
- flow_report_main_t *frm = &flow_report_main;
- my_logging_main_t *mlm = &my_logging_main;
- vnet_flow_report_add_del_args_t a;
- int rv;
- u16 template_id;
-
- ...
-
- /* Init function: set up time reference pair */
- mlm->vlib_time_0 = vlib_time_now (vm);
- mlm->milisecond_time_0 = unix_time_now_nsec () * 1e-6;
-
- ...
-
- /* Create a report */
- memset (&a, 0, sizeof (a));
- a.is_add = 1 /* to enable the report */;
- a.domain_id = 1 /* pick a domain ID */;
- a.src_port = UDP_DST_PORT_ipfix /* src port for reports */;
-
- /* Use the generic template packet rewrite string generator */
- a.rewrite_callback = vnet_flow_rewrite_generic_callback;
-
- /* Supply a list of ipfix report elements */
- a.report_elements = simple_report_elements;
- a.n_report_elements = ARRAY_LEN (simple_report_elements);
-
- /* Pointer to the ipfix stream index, set by the report infra */
- a.stream_indexp = &mlm->stream_index;
- a.flow_data_callback = my_flow_data_callback;
-
- /* Create the report */
- rv = vnet_flow_report_add_del (frm, &a, &template_id);
- if (rv)
- oops...
-
- /* Save the template-ID for later use */
- mlm->template_id = template_id;
-
-```
-
-Several things are worth describing in more detail.
-
-### vnet_flow_rewrite_generic_callback programming
-
-This generic callback helps build ipfix template packets. When
-registering an ipfix report, pass an (array, count)
-of ipfix elements as shown above.
-
-### my_flow_data_callback
-
-The ipfix flow export infrastructure calls this callback to flush the
-current ipfix packet; to make sure that ipfix data is not retained for
-an unreasonably long period of time.
-
-We typically code it as shown below, to call an application-specific
-function with (uninteresting arguments), and "do_flush = 1":
-
-
-```{.c}
-
- vlib_frame_t *my_flow_data_callback
- (flow_report_main_t * frm,
- flow_report_t * fr,
- vlib_frame_t * f,
- u32 * to_next, u32 node_index)
- {
-
- my_buffer_flow_record (0, ... , 0, 1 /* do_flush */);
- return f;
- }
-```
-
-### my_flow_data_header
-
-This function creates the packet header for an ipfix data packet
-
-```{.c}
-
- static inline void
- my_flow_report_header (flow_report_main_t * frm,
- vlib_buffer_t * b0, u32 * offset)
- {
- my_logging_main_t *mlm = &my_logging_main;
- flow_report_stream_t *stream;
- ip4_ipfix_template_packet_t *tp;
- ipfix_message_header_t *h = 0;
-
-
- ipfix_set_header_t *s = 0;
- ip4_header_t *ip;
- udp_header_t *udp;
-
- stream = &frm->streams[mlm->stream_index];
-
- b0->current_data = 0;
- b0->current_length = sizeof (*ip) + sizeof (*udp) + sizeof (*h) +
- sizeof (*s);
- b0->flags |= (VLIB_BUFFER_TOTAL_LENGTH_VALID | VNET_BUFFER_F_FLOW_REPORT);
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = frm->fib_index;
- tp = vlib_buffer_get_current (b0);
- ip = (ip4_header_t *) & tp->ip4;
- udp = (udp_header_t *) (ip + 1);
- h = (ipfix_message_header_t *) (udp + 1);
- s = (ipfix_set_header_t *) (h + 1);
-
- ip->ip_version_and_header_length = 0x45;
- ip->ttl = 254;
- ip->protocol = IP_PROTOCOL_UDP;
- ip->flags_and_fragment_offset = 0;
- ip->src_address.as_u32 = frm->src_address.as_u32;
- ip->dst_address.as_u32 = frm->ipfix_collector.as_u32;
- udp->src_port = clib_host_to_net_u16 (stream->src_port);
- udp->dst_port = clib_host_to_net_u16 (frm->collector_port);
- udp->checksum = 0;
-
- h->export_time = clib_host_to_net_u32 ((u32)
- (((f64) frm->unix_time_0) +
- (vlib_time_now (frm->vlib_main) -
- frm->vlib_time_0)));
- h->sequence_number = clib_host_to_net_u32 (stream->sequence_number++);
- h->domain_id = clib_host_to_net_u32 (stream->domain_id);
-
- *offset = (u32) (((u8 *) (s + 1)) - (u8 *) tp);
- }
- ```
-
- ### fixup and transmit a flow record
-
- ```{.c}
-
- static inline void
- my_send_ipfix_pkt (flow_report_main_t * frm,
- vlib_frame_t * f, vlib_buffer_t * b0, u16 template_id)
- {
- ip4_ipfix_template_packet_t *tp;
- ipfix_message_header_t *h = 0;
- ipfix_set_header_t *s = 0;
- ip4_header_t *ip;
- udp_header_t *udp;
- vlib_main_t *vm = frm->vlib_main;
-
- tp = vlib_buffer_get_current (b0);
- ip = (ip4_header_t *) & tp->ip4;
- udp = (udp_header_t *) (ip + 1);
- h = (ipfix_message_header_t *) (udp + 1);
- s = (ipfix_set_header_t *) (h + 1);
-
- s->set_id_length = ipfix_set_id_length (template_id,
- b0->current_length -
- (sizeof (*ip) + sizeof (*udp) +
- sizeof (*h)));
- h->version_length = version_length (b0->current_length -
- (sizeof (*ip) + sizeof (*udp)));
-
- ip->length = clib_host_to_net_u16 (b0->current_length);
- ip->checksum = ip4_header_checksum (ip);
- udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
-
- if (frm->udp_checksum)
- {
- udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
- if (udp->checksum == 0)
- udp->checksum = 0xffff;
- }
-
- ASSERT (ip4_header_checksum_is_valid (ip));
-
- vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
- }
- ```
-
- ### my_buffer_flow_record
-
- This is the key routine which paints individual flow records into
- an ipfix packet under construction. It's pretty straightforward
- (albeit stateful) vpp data-plane code. The code shown below is
- thread-safe by construction.
-
- ```{.c}
- static inline void
- my_buffer_flow_record_internal (my_flow_record_t * rp, int do_flush,
- u32 thread_index)
- {
- vlib_main_t *vm = vlib_mains[thread_index];
- my_logging_main_t *mlm = &jvp_ipfix_main;
- flow_report_main_t *frm = &flow_report_main;
- vlib_frame_t *f;
- vlib_buffer_t *b0 = 0;
- u32 bi0 = ~0;
- u32 offset;
-
- b0 = mlm->buffers_by_thread[thread_index];
-
- if (PREDICT_FALSE (b0 == 0))
- {
- if (do_flush)
- return;
-
- if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
- {
- clib_warning ("can't allocate ipfix data buffer");
- return;
- }
-
- b0 = vlib_get_buffer (vm, bi0);
- offset = 0;
- mlm->buffers_by_thread[thread_index] = b0;
- }
- else
- {
- bi0 = vlib_get_buffer_index (vm, b0);
- offset = mlm->next_record_offset_by_thread[thread_index];
- }
-
- f = mlm->frames_by_thread[thread_index];
- if (PREDICT_FALSE (f == 0))
- {
- u32 *to_next;
- f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
- mlm->frames_by_thread[thread_index] = f;
- to_next = vlib_frame_vector_args (f);
- to_next[0] = bi0;
- f->n_vectors = 1;
- mlm->frames_by_thread[thread_index] = f;
- }
-
- if (PREDICT_FALSE (offset == 0))
- my_flow_report_header (frm, b0, &offset);
-
- if (PREDICT_TRUE (do_flush == 0))
- {
- /* Paint the new ipfix data record into the buffer */
- clib_memcpy (b0->data + offset, rp, sizeof (*rp));
- offset += sizeof (*rp);
- b0->current_length += sizeof (*rp);
- }
-
- if (PREDICT_FALSE (do_flush || (offset + sizeof (*rp)) > frm->path_mtu))
- {
- /* Nothing to send? */
- if (offset == 0)
- return;
-
- send_ipfix_pkt (frm, f, b0, mlm->template_ids[0]);
- mlm->buffers_by_thread[thread_index] = 0;
- mlm->frames_by_thread[thread_index] = 0;
- offset = 0;
- }
- mlm->next_record_offset_by_thread[thread_index] = offset;
- }
-
- static void
- my_buffer_flow_record (my_flow_record_t * rp, int do_flush)
- {
- u32 thread_index = vlib_get_thread_index();
- my_buffer_flow_record_internal (rp, do_flush, thread_index);
- }
-
-```
diff --git a/src/vnet/ipfix-export/ipfix_doc.rst b/src/vnet/ipfix-export/ipfix_doc.rst
new file mode 100644
index 00000000000..ac660b4bc93
--- /dev/null
+++ b/src/vnet/ipfix-export/ipfix_doc.rst
@@ -0,0 +1,360 @@
+.. _ipfix_doc:
+
+IPFIX support
+=============
+
+VPP includes a high-performance IPFIX record exporter. This note
+explains how to use the internal APIs to export IPFIX data, and how to
+configure and send the required IPFIX templates.
+
+As you’ll see, a bit of typing is required.
+
+First: create an ipfix “report”
+-------------------------------
+
+Include the flow report header file, fill out a @ref
+vnet_flow_report_add_del_args_t structure, and call
+vnet_flow_report_add_del.
+
+.. code:: c
+
+ #include <vnet/ipfix-export/flow_report.h>
+ /* Defined in flow_report.h, of interest when constructing reports */
+
+ /* ipfix field definitions for a particular report */
+ typedef struct
+ {
+ u32 info_element;
+ u32 size;
+ } ipfix_report_element_t;
+
+ /* Report add/del argument structure */
+ typedef struct
+ {
+ /* Callback to flush current ipfix packet / frame */
+ vnet_flow_data_callback_t *flow_data_callback;
+
+ /* Callback to build the template packet rewrite string */
+ vnet_flow_rewrite_callback_t *rewrite_callback;
+
+ /* List of ipfix elements in the report */
+ ipfix_report_element_t *report_elements;
+ u32 n_report_elements;
+ /* Kept in flow report, used e.g. by flow classifier */
+ opaque_t opaque;
+ /* Add / delete a report */
+ int is_add;
+ /* Ipfix "domain-ID", see RFC, set as desired */
+ u32 domain_id;
+ /* ipfix packet source port, often set to UDP_DST_PORT_ipfix */
+ u16 src_port;
+ /* Set by ipfix infra, needed to send data packets */
+ u32 *stream_indexp;
+ } vnet_flow_report_add_del_args_t;
+
+ /* Private header file contents */
+
+ /* Report ipfix element definition */
+ #define foreach_simple_report_ipfix_element \
+ _(sourceIPv4Address, 4) \
+ _(destinationIPv4Address, 4) \
+ _(sourceTransportPort, 2) \
+ _(destinationTransportPort, 2) \
+ _(protocolIdentifier, 1) \
+ _(flowStartMicroseconds, 8) \
+ _(flowEndMicroseconds, 8)
+
+ static ipfix_report_element_t simple_report_elements[] = {
+ #define _(a,b) {a,b},
+ foreach_simple_report_ipfix_element
+ #undef _
+ };
+
+ typedef struct
+ {
+ /** Buffers and frames, per thread */
+ vlib_buffer_t **buffers_by_thread;
+ vlib_frame_t **frames_by_thread;
+ u32 *next_record_offset_by_thread;
+
+ /** Template ID's */
+ u16 *template_ids;
+
+ /** Time reference pair */
+ u64 usec_time_0;
+ f64 vlib_time_0;
+
+ /** Stream index */
+ u32 stream_index;
+
+ /* Convenience */
+ flow_report_main_t *flow_report_main;
+ vlib_main_t *vlib_main;
+ vnet_main_t *vnet_main;
+ } my_logging_main_t;
+
+ extern my_logging_main_t my_logging_main;
+
+ ...
+
+ /* Recitations */
+ flow_report_main_t *frm = &flow_report_main;
+ my_logging_main_t *mlm = &my_logging_main;
+ vnet_flow_report_add_del_args_t a;
+ int rv;
+ u16 template_id;
+
+ ...
+
+ /* Init function: set up time reference pair */
+ mlm->vlib_time_0 = vlib_time_now (vm);
+ mlm->milisecond_time_0 = unix_time_now_nsec () * 1e-6;
+
+ ...
+
+ /* Create a report */
+ memset (&a, 0, sizeof (a));
+ a.is_add = 1 /* to enable the report */;
+ a.domain_id = 1 /* pick a domain ID */;
+ a.src_port = UDP_DST_PORT_ipfix /* src port for reports */;
+
+ /* Use the generic template packet rewrite string generator */
+ a.rewrite_callback = vnet_flow_rewrite_generic_callback;
+
+ /* Supply a list of ipfix report elements */
+ a.report_elements = simple_report_elements;
+ a.n_report_elements = ARRAY_LEN (simple_report_elements);
+
+ /* Pointer to the ipfix stream index, set by the report infra */
+ a.stream_indexp = &mlm->stream_index;
+ a.flow_data_callback = my_flow_data_callback;
+
+ /* Create the report */
+ rv = vnet_flow_report_add_del (frm, &a, &template_id);
+ if (rv)
+ oops...
+
+ /* Save the template-ID for later use */
+ mlm->template_id = template_id;
+
+Several things are worth describing in more detail.
+
+vnet_flow_rewrite_generic_callback programming
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This generic callback helps build ipfix template packets. When
+registering an ipfix report, pass an (array, count) of ipfix elements as
+shown above.
+
+my_flow_data_callback
+~~~~~~~~~~~~~~~~~~~~~
+
+The ipfix flow export infrastructure calls this callback to flush the
+current ipfix packet; to make sure that ipfix data is not retained for
+an unreasonably long period of time.
+
+We typically code it as shown below, to call an application-specific
+function with (uninteresting arguments), and “do_flush = 1”:
+
+.. code:: c
+
+
+ vlib_frame_t *my_flow_data_callback
+ (flow_report_main_t * frm,
+ flow_report_t * fr,
+ vlib_frame_t * f,
+ u32 * to_next, u32 node_index)
+ {
+
+ my_buffer_flow_record (0, ... , 0, 1 /* do_flush */);
+ return f;
+ }
+
+my_flow_data_header
+~~~~~~~~~~~~~~~~~~~
+
+This function creates the packet header for an ipfix data packet
+
+.. code:: c
+
+
+ static inline void
+ my_flow_report_header (flow_report_main_t * frm,
+ vlib_buffer_t * b0, u32 * offset)
+ {
+ my_logging_main_t *mlm = &my_logging_main;
+ flow_report_stream_t *stream;
+ ip4_ipfix_template_packet_t *tp;
+ ipfix_message_header_t *h = 0;
+
+
+ ipfix_set_header_t *s = 0;
+ ip4_header_t *ip;
+ udp_header_t *udp;
+
+ stream = &frm->streams[mlm->stream_index];
+
+ b0->current_data = 0;
+ b0->current_length = sizeof (*ip) + sizeof (*udp) + sizeof (*h) +
+ sizeof (*s);
+ b0->flags |= (VLIB_BUFFER_TOTAL_LENGTH_VALID | VNET_BUFFER_F_FLOW_REPORT);
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = frm->fib_index;
+ tp = vlib_buffer_get_current (b0);
+ ip = (ip4_header_t *) & tp->ip4;
+ udp = (udp_header_t *) (ip + 1);
+ h = (ipfix_message_header_t *) (udp + 1);
+ s = (ipfix_set_header_t *) (h + 1);
+
+ ip->ip_version_and_header_length = 0x45;
+ ip->ttl = 254;
+ ip->protocol = IP_PROTOCOL_UDP;
+ ip->flags_and_fragment_offset = 0;
+ ip->src_address.as_u32 = frm->src_address.as_u32;
+ ip->dst_address.as_u32 = frm->ipfix_collector.as_u32;
+ udp->src_port = clib_host_to_net_u16 (stream->src_port);
+ udp->dst_port = clib_host_to_net_u16 (frm->collector_port);
+ udp->checksum = 0;
+
+ h->export_time = clib_host_to_net_u32 ((u32)
+ (((f64) frm->unix_time_0) +
+ (vlib_time_now (frm->vlib_main) -
+ frm->vlib_time_0)));
+ h->sequence_number = clib_host_to_net_u32 (stream->sequence_number++);
+ h->domain_id = clib_host_to_net_u32 (stream->domain_id);
+
+ *offset = (u32) (((u8 *) (s + 1)) - (u8 *) tp);
+ }
+
+### fixup and transmit a flow record
+
+.. code:: c
+
+
+ static inline void
+ my_send_ipfix_pkt (flow_report_main_t * frm,
+ vlib_frame_t * f, vlib_buffer_t * b0, u16 template_id)
+ {
+ ip4_ipfix_template_packet_t *tp;
+ ipfix_message_header_t *h = 0;
+ ipfix_set_header_t *s = 0;
+ ip4_header_t *ip;
+ udp_header_t *udp;
+ vlib_main_t *vm = frm->vlib_main;
+
+ tp = vlib_buffer_get_current (b0);
+ ip = (ip4_header_t *) & tp->ip4;
+ udp = (udp_header_t *) (ip + 1);
+ h = (ipfix_message_header_t *) (udp + 1);
+ s = (ipfix_set_header_t *) (h + 1);
+
+ s->set_id_length = ipfix_set_id_length (template_id,
+ b0->current_length -
+ (sizeof (*ip) + sizeof (*udp) +
+ sizeof (*h)));
+ h->version_length = version_length (b0->current_length -
+ (sizeof (*ip) + sizeof (*udp)));
+
+ ip->length = clib_host_to_net_u16 (b0->current_length);
+ ip->checksum = ip4_header_checksum (ip);
+ udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
+
+ if (frm->udp_checksum)
+ {
+ udp->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ip);
+ if (udp->checksum == 0)
+ udp->checksum = 0xffff;
+ }
+
+ ASSERT (ip4_header_checksum_is_valid (ip));
+
+ vlib_put_frame_to_node (vm, ip4_lookup_node.index, f);
+ }
+
+### my_buffer_flow_record
+
+This is the key routine which paints individual flow records into an
+ipfix packet under construction. It’s pretty straightforward (albeit
+stateful) vpp data-plane code. The code shown below is thread-safe by
+construction.
+
+.. code:: c
+
+ static inline void
+ my_buffer_flow_record_internal (my_flow_record_t * rp, int do_flush,
+ u32 thread_index)
+ {
+ vlib_main_t *vm = vlib_mains[thread_index];
+ my_logging_main_t *mlm = &jvp_ipfix_main;
+ flow_report_main_t *frm = &flow_report_main;
+ vlib_frame_t *f;
+ vlib_buffer_t *b0 = 0;
+ u32 bi0 = ~0;
+ u32 offset;
+
+ b0 = mlm->buffers_by_thread[thread_index];
+
+ if (PREDICT_FALSE (b0 == 0))
+ {
+ if (do_flush)
+ return;
+
+ if (vlib_buffer_alloc (vm, &bi0, 1) != 1)
+ {
+ clib_warning ("can't allocate ipfix data buffer");
+ return;
+ }
+
+ b0 = vlib_get_buffer (vm, bi0);
+ offset = 0;
+ mlm->buffers_by_thread[thread_index] = b0;
+ }
+ else
+ {
+ bi0 = vlib_get_buffer_index (vm, b0);
+ offset = mlm->next_record_offset_by_thread[thread_index];
+ }
+
+ f = mlm->frames_by_thread[thread_index];
+ if (PREDICT_FALSE (f == 0))
+ {
+ u32 *to_next;
+ f = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
+ mlm->frames_by_thread[thread_index] = f;
+ to_next = vlib_frame_vector_args (f);
+ to_next[0] = bi0;
+ f->n_vectors = 1;
+ mlm->frames_by_thread[thread_index] = f;
+ }
+
+ if (PREDICT_FALSE (offset == 0))
+ my_flow_report_header (frm, b0, &offset);
+
+ if (PREDICT_TRUE (do_flush == 0))
+ {
+ /* Paint the new ipfix data record into the buffer */
+ clib_memcpy (b0->data + offset, rp, sizeof (*rp));
+ offset += sizeof (*rp);
+ b0->current_length += sizeof (*rp);
+ }
+
+ if (PREDICT_FALSE (do_flush || (offset + sizeof (*rp)) > frm->path_mtu))
+ {
+ /* Nothing to send? */
+ if (offset == 0)
+ return;
+
+ send_ipfix_pkt (frm, f, b0, mlm->template_ids[0]);
+ mlm->buffers_by_thread[thread_index] = 0;
+ mlm->frames_by_thread[thread_index] = 0;
+ offset = 0;
+ }
+ mlm->next_record_offset_by_thread[thread_index] = offset;
+ }
+
+ static void
+ my_buffer_flow_record (my_flow_record_t * rp, int do_flush)
+ {
+ u32 thread_index = vlib_get_thread_index();
+ my_buffer_flow_record_internal (rp, do_flush, thread_index);
+ }
diff --git a/src/vnet/ipfix-export/ipfix_export.api b/src/vnet/ipfix-export/ipfix_export.api
index a70b72bee39..8a9d5b13124 100644
--- a/src/vnet/ipfix-export/ipfix_export.api
+++ b/src/vnet/ipfix-export/ipfix_export.api
@@ -73,6 +73,80 @@ define ipfix_exporter_details
bool udp_checksum;
};
+/** Configure IPFIX exporter within the exporting process.
+ The exporting process can contain multiple independent exporters,
+ each of which have their own state. The collector_address is the key
+ field that identifies a unique exporter. The already existing API
+ 'set_ipfix_exporter' is used to modify a single exporter (which will
+ always have stat index 0). If more than one exporter is required then
+ they can be created and deleted using this API.
+
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_create - True for create, False for delete
+ @param collector_address - address of IPFIX collector
+ @param collector_port - port of IPFIX collector
+ @param src_address - address of IPFIX exporter
+ @param vrf_id - VRF / fib table ID
+ @param path_mtu - Path MTU between exporter and collector
+ @param template_interval - number of seconds after which to resend template
+ @param udp_checksum - UDP checksum calculation enable flag
+*/
+
+define ipfix_exporter_create_delete {
+ u32 client_index;
+ u32 context;
+ bool is_create;
+ vl_api_address_t collector_address;
+ u16 collector_port;
+ vl_api_address_t src_address;
+ u32 vrf_id;
+ u32 path_mtu;
+ u32 template_interval;
+ bool udp_checksum;
+};
+
+define ipfix_exporter_create_delete_reply {
+ u32 context;
+ i32 retval;
+ u32 stat_index;
+};
+
+service {
+ rpc ipfix_all_exporter_get returns ipfix_all_exporter_get_reply
+ stream ipfix_all_exporter_details;
+};
+
+define ipfix_all_exporter_get
+{
+ u32 client_index;
+ u32 context;
+ u32 cursor;
+};
+
+define ipfix_all_exporter_get_reply
+{
+ u32 context;
+ i32 retval;
+ u32 cursor;
+};
+
+/** \brief Ipfix meter details in response to the get_meters command
+ @param context - sender context, to match reply w/ request
+ @param name The name of the ipfix meter
+*/
+define ipfix_all_exporter_details
+{
+ u32 context;
+ vl_api_address_t collector_address;
+ u16 collector_port;
+ vl_api_address_t src_address;
+ u32 vrf_id;
+ u32 path_mtu;
+ u32 template_interval;
+ bool udp_checksum;
+};
+
/** \brief IPFIX classify stream configure request
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/vnet/ipip/ipip.c b/src/vnet/ipip/ipip.c
index e6ea3ebe79d..aaf21468d1e 100644
--- a/src/vnet/ipip/ipip.c
+++ b/src/vnet/ipip/ipip.c
@@ -148,7 +148,14 @@ ipip64_fixup (vlib_main_t * vm, const ip_adjacency_t * adj, vlib_buffer_t * b,
ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
tunnel_encap_fixup_6o4 (flags, ((ip6_header_t *) (ip4 + 1)), ip4);
- ip4->checksum = ip4_header_checksum (ip4);
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+ }
+ else
+ ip4->checksum = ip4_header_checksum (ip4);
}
static void
@@ -164,7 +171,14 @@ ipip44_fixup (vlib_main_t * vm, const ip_adjacency_t * adj, vlib_buffer_t * b,
ip4->length = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b));
tunnel_encap_fixup_4o4 (flags, ip4 + 1, ip4);
- ip4->checksum = ip4_header_checksum (ip4);
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+ }
+ else
+ ip4->checksum = ip4_header_checksum (ip4);
}
static void
@@ -185,6 +199,12 @@ ipip46_fixup (vlib_main_t * vm, const ip_adjacency_t * adj, vlib_buffer_t * b,
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) -
sizeof (*ip6));
tunnel_encap_fixup_4o6 (flags, b, ((ip4_header_t *) (ip6 + 1)), ip6);
+
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip6 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+ }
}
static void
@@ -205,6 +225,12 @@ ipip66_fixup (vlib_main_t * vm,
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) -
sizeof (*ip6));
tunnel_encap_fixup_6o6 (flags, ip6 + 1, ip6);
+
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip6 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+ }
}
static void
@@ -226,6 +252,12 @@ ipipm6_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b,
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) - sizeof (*ip6));
tunnel_encap_fixup_mplso6 (flags, b, (mpls_unicast_header_t *) (ip6 + 1),
ip6);
+
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip6 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+ }
}
static void
@@ -245,7 +277,15 @@ ipipm4_fixup (vlib_main_t *vm, const ip_adjacency_t *adj, vlib_buffer_t *b,
ip4->length =
clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b) - sizeof (*ip4));
tunnel_encap_fixup_mplso4 (flags, (mpls_unicast_header_t *) (ip4 + 1), ip4);
- ip4->checksum = ip4_header_checksum (ip4);
+
+ if (PREDICT_FALSE (b->flags & VNET_BUFFER_F_GSO))
+ {
+ vnet_buffer2 (b)->outer_l3_hdr_offset = (u8 *) ip4 - b->data;
+ vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_OUTER_IP_CKSUM |
+ VNET_BUFFER_OFFLOAD_F_TNL_IPIP);
+ }
+ else
+ ip4->checksum = ip4_header_checksum (ip4);
}
static void
@@ -269,7 +309,6 @@ ipip_tunnel_stack (adj_index_t ai)
}
else
{
- /* *INDENT-OFF* */
fib_prefix_t dst = {
.fp_len = t->transport == IPIP_TRANSPORT_IP6 ? 128 : 32,
.fp_proto = (t->transport == IPIP_TRANSPORT_IP6 ?
@@ -277,7 +316,6 @@ ipip_tunnel_stack (adj_index_t ai)
FIB_PROTOCOL_IP4),
.fp_addr = t->tunnel_dst
};
- /* *INDENT-ON* */
adj_midchain_delegate_stack (ai, t->fib_index, &dst);
}
@@ -348,9 +386,6 @@ ipip_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
if (!(t->flags & TUNNEL_ENCAP_DECAP_FLAG_ENCAP_INNER_HASH))
af |= ADJ_FLAG_MIDCHAIN_IP_STACK;
- if (VNET_LINK_ETHERNET == adj_get_link_type (ai))
- af |= ADJ_FLAG_MIDCHAIN_NO_COUNT;
-
fixup = ipip_get_fixup (t, adj_get_link_type (ai), &af);
adj_nbr_midchain_update_rewrite
(ai, fixup,
@@ -515,7 +550,6 @@ ipip_tunnel_desc (u32 sw_if_index,
return (0);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS(ipip_device_class) = {
.name = "IPIP tunnel device",
.format_device_name = format_ipip_tunnel_name,
@@ -545,7 +579,6 @@ VNET_HW_INTERFACE_CLASS(mipip_hw_interface_class) = {
.update_adjacency = mipip_update_adj,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA,
};
-/* *INDENT-ON* */
ipip_tunnel_t *
ipip_tunnel_db_find (const ipip_tunnel_key_t * key)
@@ -726,8 +759,6 @@ ipip_add_tunnel (ipip_transport_t transport,
{
ipip_main_t *gm = &ipip_main;
vnet_main_t *vnm = gm->vnet_main;
- ip4_main_t *im4 = &ip4_main;
- ip6_main_t *im6 = &ip6_main;
ipip_tunnel_t *t;
vnet_hw_interface_t *hi;
u32 hw_if_index, sw_if_index;
@@ -787,18 +818,16 @@ ipip_add_tunnel (ipip_transport_t transport,
gm->tunnel_index_by_sw_if_index[sw_if_index] = t_idx;
if (t->transport == IPIP_TRANSPORT_IP4)
- {
- vec_validate (im4->fib_index_by_sw_if_index, sw_if_index);
- hi->min_packet_bytes = 64 + sizeof (ip4_header_t);
- }
+ hi->frame_overhead = sizeof (ip4_header_t);
else
- {
- vec_validate (im6->fib_index_by_sw_if_index, sw_if_index);
- hi->min_packet_bytes = 64 + sizeof (ip6_header_t);
- }
+ hi->frame_overhead = sizeof (ip6_header_t);
+
+ hi->min_frame_size = hi->frame_overhead + 64;
/* Standard default ipip MTU. */
vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000);
+ vnet_set_interface_l3_output_node (gm->vlib_main, sw_if_index,
+ (u8 *) "tunnel-output");
t->tunnel_src = *src;
t->tunnel_dst = *dst;
@@ -844,6 +873,7 @@ ipip_del_tunnel (u32 sw_if_index)
teib_walk_itf (t->sw_if_index, ipip_tunnel_delete_teib_walk, t);
vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ );
+ vnet_reset_interface_l3_output_node (gm->vlib_main, t->sw_if_index);
gm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
vnet_delete_hw_interface (vnm, t->hw_if_index);
hash_unset (gm->instance_used, t->user_instance);
diff --git a/src/vnet/ipip/ipip_api.c b/src/vnet/ipip/ipip_api.c
index f44c72c3b27..2cb7bdf8dae 100644
--- a/src/vnet/ipip/ipip_api.c
+++ b/src/vnet/ipip/ipip_api.c
@@ -86,12 +86,10 @@ vl_api_ipip_add_tunnel_t_handler (vl_api_ipip_add_tunnel_t * mp)
}
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_IPIP_ADD_TUNNEL_REPLY,
({
rmp->sw_if_index = ntohl(sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -105,30 +103,45 @@ vl_api_ipip_del_tunnel_t_handler (vl_api_ipip_del_tunnel_t * mp)
REPLY_MACRO (VL_API_IPIP_DEL_TUNNEL_REPLY);
}
+static vl_api_tunnel_mode_t
+ipip_tunnel_mode_encode (ipip_mode_t mode)
+{
+ switch (mode)
+ {
+ case IPIP_MODE_P2P:
+ return TUNNEL_API_MODE_P2P;
+ case IPIP_MODE_P2MP:
+ return TUNNEL_API_MODE_MP;
+ case IPIP_MODE_6RD:
+ return TUNNEL_API_MODE_P2P;
+ default:
+ return TUNNEL_API_MODE_P2P;
+ }
+}
+
static void
send_ipip_tunnel_details (ipip_tunnel_t * t, vl_api_ipip_tunnel_dump_t * mp)
{
ipip_main_t *im = &ipip_main;
vl_api_ipip_tunnel_details_t *rmp;
bool is_ipv6 = t->transport == IPIP_TRANSPORT_IP6 ? true : false;
+ ip46_type_t ip_type = is_ipv6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4;
fib_table_t *ft;
- int rv = 0;
-
- ft = fib_table_get (t->fib_index, (is_ipv6 ? FIB_PROTOCOL_IP6 :
- FIB_PROTOCOL_IP4));
- /* *INDENT-OFF* */
- REPLY_MACRO_DETAILS2(VL_API_IPIP_TUNNEL_DETAILS,
- ({
- ip_address_encode (&t->tunnel_src, IP46_TYPE_ANY, &rmp->tunnel.src);
- ip_address_encode (&t->tunnel_dst, IP46_TYPE_ANY, &rmp->tunnel.dst);
- rmp->tunnel.table_id = htonl (ft->ft_table_id);
- rmp->tunnel.instance = htonl (t->user_instance);
- rmp->tunnel.sw_if_index = htonl (t->sw_if_index);
- rmp->tunnel.dscp = ip_dscp_encode(t->dscp);
- rmp->tunnel.flags = tunnel_encap_decap_flags_encode(t->flags);
- }));
- /* *INDENT-ON* */
+ ft = fib_table_get (t->fib_index,
+ (is_ipv6 ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4));
+
+ REPLY_MACRO_DETAILS2 (
+ VL_API_IPIP_TUNNEL_DETAILS, ({
+ ip_address_encode (&t->tunnel_src, ip_type, &rmp->tunnel.src);
+ ip_address_encode (&t->tunnel_dst, ip_type, &rmp->tunnel.dst);
+ rmp->tunnel.table_id = htonl (ft->ft_table_id);
+ rmp->tunnel.instance = htonl (t->user_instance);
+ rmp->tunnel.sw_if_index = htonl (t->sw_if_index);
+ rmp->tunnel.dscp = ip_dscp_encode (t->dscp);
+ rmp->tunnel.flags = tunnel_encap_decap_flags_encode (t->flags);
+ rmp->tunnel.mode = ipip_tunnel_mode_encode (t->mode);
+ }));
}
static void
@@ -142,12 +155,10 @@ vl_api_ipip_tunnel_dump_t_handler (vl_api_ipip_tunnel_dump_t * mp)
if (sw_if_index == ~0)
{
- /* *INDENT-OFF* */
pool_foreach (t, im->tunnels)
{
send_ipip_tunnel_details(t, mp);
}
- /* *INDENT-ON* */
}
else
{
@@ -186,12 +197,10 @@ vl_api_ipip_6rd_add_tunnel_t_handler (vl_api_ipip_6rd_add_tunnel_t * mp)
&sixrd_tunnel_index);
}
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IPIP_6RD_ADD_TUNNEL_REPLY,
({
rmp->sw_if_index = htonl (sixrd_tunnel_index);
}));
- /* *INDENT-ON* */
}
static void
diff --git a/src/vnet/ipip/ipip_cli.c b/src/vnet/ipip/ipip_cli.c
index 1a8e8896965..606a1f53f9a 100644
--- a/src/vnet/ipip/ipip_cli.c
+++ b/src/vnet/ipip/ipip_cli.c
@@ -197,7 +197,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(create_ipip_tunnel_command, static) = {
.path = "create ipip tunnel",
.short_help = "create ipip tunnel src <addr> dst <addr> [instance <n>] "
@@ -209,7 +208,6 @@ VLIB_CLI_COMMAND(delete_ipip_tunnel_command, static) = {
.short_help = "delete ipip tunnel sw_if_index <sw_if_index>",
.function = delete_ipip_tunnel_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_ipip_tunnel (u8 * s, va_list * args)
@@ -274,10 +272,8 @@ show_ipip_tunnel_command_fn (vlib_main_t * vm,
if (ti == ~0)
{
- /* *INDENT-OFF* */
pool_foreach (t, gm->tunnels)
{vlib_cli_output(vm, "%U", format_ipip_tunnel, t); }
- /* *INDENT-ON* */
}
else
{
@@ -290,12 +286,10 @@ show_ipip_tunnel_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(show_ipip_tunnel_command, static) = {
.path = "show ipip tunnel",
.function = show_ipip_tunnel_command_fn,
};
-/* *INDENT-ON* */
static u8 *
format_ipip_tunnel_key (u8 * s, va_list * args)
@@ -318,12 +312,10 @@ ipip_tunnel_hash_show (vlib_main_t * vm,
ipip_tunnel_key_t *key;
u32 index;
- /* *INDENT-OFF* */
hash_foreach(key, index, im->tunnel_by_key,
({
vlib_cli_output (vm, " %U -> %d", format_ipip_tunnel_key, key, index);
}));
- /* *INDENT-ON* */
return NULL;
}
@@ -331,14 +323,12 @@ ipip_tunnel_hash_show (vlib_main_t * vm,
/**
* show IPSEC tunnel protection hash tables
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipip_tunnel_hash_show_node, static) =
{
.path = "show ipip tunnel-hash",
.function = ipip_tunnel_hash_show,
.short_help = "show ipip tunnel-hash",
};
-/* *INDENT-ON* */
static clib_error_t *
create_sixrd_tunnel_command_fn (vlib_main_t * vm,
@@ -464,7 +454,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(create_sixrd_tunnel_command, static) = {
.path = "create 6rd tunnel",
.short_help = "create 6rd tunnel ip6-pfx <ip6-pfx> ip4-pfx <ip4-pfx> "
@@ -477,7 +466,6 @@ VLIB_CLI_COMMAND(delete_sixrd_tunnel_command, static) = {
.short_help = "delete 6rd tunnel sw_if_index <sw_if_index>",
.function = delete_sixrd_tunnel_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipip/node.c b/src/vnet/ipip/node.c
index b008a21a20f..a289cc885df 100644
--- a/src/vnet/ipip/node.c
+++ b/src/vnet/ipip/node.c
@@ -260,7 +260,6 @@ static char *ipip_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE(ipip4_input_node) = {
.name = "ipip4-input",
/* Takes a vector of packets. */
@@ -293,7 +292,6 @@ VLIB_REGISTER_NODE(ipip6_input_node) = {
.format_trace = format_ipip_rx_trace,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipip/sixrd.c b/src/vnet/ipip/sixrd.c
index 492b4f83260..6e0bfb042cc 100644
--- a/src/vnet/ipip/sixrd.c
+++ b/src/vnet/ipip/sixrd.c
@@ -250,7 +250,6 @@ sixrd_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
return /* no error */ 0;
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS(sixrd_hw_interface_class) = {
.name = "ip6ip-6rd",
.build_rewrite = sixrd_build_rewrite,
@@ -265,7 +264,6 @@ VNET_DEVICE_CLASS(sixrd_device_class) = {
#endif
}
;
-/* *INDENT-ON* */
int
sixrd_add_tunnel (ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
@@ -325,6 +323,8 @@ sixrd_add_tunnel (ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
t->user_instance = t_idx;
vnet_sw_interface_set_mtu (vnet_get_main (), t->sw_if_index, 1480);
+ vnet_set_interface_l3_output_node (gm->vlib_main, hi->sw_if_index,
+ (u8 *) "tunnel-output");
ipip_tunnel_db_add (t, &key);
@@ -339,7 +339,6 @@ sixrd_add_tunnel (ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
ip6_sw_interface_enable_disable (t->sw_if_index, true);
/* Create IPv6 route/adjacency */
- /* *INDENT-OFF* */
fib_prefix_t pfx6 = {
.fp_proto = FIB_PROTOCOL_IP6,
.fp_len = t->sixrd.ip6_prefix_len,
@@ -347,7 +346,6 @@ sixrd_add_tunnel (ip6_address_t * ip6_prefix, u8 ip6_prefix_len,
.ip6 = t->sixrd.ip6_prefix,
},
};
- /* *INDENT-ON* */
fib_table_lock (ip6_fib_index, FIB_PROTOCOL_IP6, FIB_SOURCE_6RD);
fib_table_entry_update_one_path (ip6_fib_index, &pfx6, FIB_SOURCE_6RD,
@@ -384,7 +382,6 @@ sixrd_del_tunnel (u32 sw_if_index)
return -1;
}
- /* *INDENT-OFF* */
fib_prefix_t pfx6 = {
.fp_proto = FIB_PROTOCOL_IP6,
.fp_len = t->sixrd.ip6_prefix_len,
@@ -392,7 +389,6 @@ sixrd_del_tunnel (u32 sw_if_index)
.ip6 = t->sixrd.ip6_prefix,
},
};
- /* *INDENT-ON* */
fib_table_entry_path_remove (t->sixrd.ip6_fib_index, &pfx6,
FIB_SOURCE_6RD,
@@ -403,6 +399,7 @@ sixrd_del_tunnel (u32 sw_if_index)
vnet_sw_interface_set_flags (vnet_get_main (), t->sw_if_index,
0 /* down */ );
+ vnet_reset_interface_l3_output_node (gm->vlib_main, t->sw_if_index);
ip6_sw_interface_enable_disable (t->sw_if_index, false);
gm->tunnel_index_by_sw_if_index[t->sw_if_index] = ~0;
@@ -502,7 +499,8 @@ sixrd_init (vlib_main_t * vm)
sixrd_adj_delegate_type =
adj_delegate_register_new_type (&sixrd_adj_delegate_vft);
- sixrd_fib_node_type = fib_node_register_new_type (&sixrd_fib_node_vft);
+ sixrd_fib_node_type =
+ fib_node_register_new_type ("sixrd", &sixrd_fib_node_vft);
return error;
}
diff --git a/src/vnet/ipsec/FEATURE.yaml b/src/vnet/ipsec/FEATURE.yaml
index 1f2915a0128..99df89ab378 100644
--- a/src/vnet/ipsec/FEATURE.yaml
+++ b/src/vnet/ipsec/FEATURE.yaml
@@ -3,7 +3,7 @@ name: IP Security
maintainer: Neale Ranns <nranns@cisco.com>
features:
- IPSec (https://tools.ietf.org/html/rfc4301)
- - Authetication Header (https://tools.ietf.org/html/rfc4302)
+ - Authentication Header (https://tools.ietf.org/html/rfc4302)
- Encapsulating Security Payload (https://tools.ietf.org/html/rfc4303)
description: "An implementation of IPSec"
diff --git a/src/vnet/ipsec/ah.h b/src/vnet/ipsec/ah.h
index d0b4c21a4bc..450c9cfd6dc 100644
--- a/src/vnet/ipsec/ah.h
+++ b/src/vnet/ipsec/ah.h
@@ -17,6 +17,7 @@
#include <vnet/ip/ip.h>
#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/ipsec.api_enum.h>
typedef struct
{
@@ -29,19 +30,67 @@ typedef struct
} ah_header_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip4_header_t ip4;
ah_header_t ah;
}) ip4_and_ah_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip6_header_t ip6;
ah_header_t ah;
}) ip6_and_ah_header_t;
-/* *INDENT-ON* */
+
+always_inline u32
+ah_encrypt_err_to_sa_err (u32 err)
+{
+ switch (err)
+ {
+ case AH_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR:
+ return IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR;
+ case AH_ENCRYPT_ERROR_SEQ_CYCLED:
+ return IPSEC_SA_ERROR_SEQ_CYCLED;
+ }
+ return ~0;
+}
+
+always_inline u32
+ah_decrypt_err_to_sa_err (u32 err)
+{
+ switch (err)
+ {
+ case AH_DECRYPT_ERROR_DECRYPTION_FAILED:
+ return IPSEC_SA_ERROR_DECRYPTION_FAILED;
+ case AH_DECRYPT_ERROR_INTEG_ERROR:
+ return IPSEC_SA_ERROR_INTEG_ERROR;
+ case AH_DECRYPT_ERROR_NO_TAIL_SPACE:
+ return IPSEC_SA_ERROR_NO_TAIL_SPACE;
+ case AH_DECRYPT_ERROR_DROP_FRAGMENTS:
+ return IPSEC_SA_ERROR_DROP_FRAGMENTS;
+ case AH_DECRYPT_ERROR_REPLAY:
+ return IPSEC_SA_ERROR_REPLAY;
+ }
+ return ~0;
+}
+
+always_inline void
+ah_encrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
+ u32 thread_index, u32 err, u16 index, u16 *nexts,
+ u16 drop_next, u32 sa_index)
+{
+ ipsec_set_next_index (b, node, thread_index, err,
+ ah_encrypt_err_to_sa_err (err), index, nexts,
+ drop_next, sa_index);
+}
+
+always_inline void
+ah_decrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
+ u32 thread_index, u32 err, u16 index, u16 *nexts,
+ u16 drop_next, u32 sa_index)
+{
+ ipsec_set_next_index (b, node, thread_index, err,
+ ah_decrypt_err_to_sa_err (err), index, nexts,
+ drop_next, sa_index);
+}
always_inline u8
ah_calc_icv_padding_len (u8 icv_size, int is_ipv6)
diff --git a/src/vnet/ipsec/ah_decrypt.c b/src/vnet/ipsec/ah_decrypt.c
index 182ed3d231c..918ebf03f67 100644
--- a/src/vnet/ipsec/ah_decrypt.c
+++ b/src/vnet/ipsec/ah_decrypt.c
@@ -38,28 +38,6 @@ typedef enum
AH_DECRYPT_N_NEXT,
} ah_decrypt_next_t;
-#define foreach_ah_decrypt_error \
- _ (RX_PKTS, "AH pkts received") \
- _ (DECRYPTION_FAILED, "AH decryption failed") \
- _ (INTEG_ERROR, "Integrity check failed") \
- _ (NO_TAIL_SPACE, "not enough buffer tail space (dropped)") \
- _ (DROP_FRAGMENTS, "IP fragments drop") \
- _ (REPLAY, "SA replayed packet")
-
-typedef enum
-{
-#define _(sym,str) AH_DECRYPT_ERROR_##sym,
- foreach_ah_decrypt_error
-#undef _
- AH_DECRYPT_N_ERROR,
-} ah_decrypt_error_t;
-
-static char *ah_decrypt_error_strings[] = {
-#define _(sym,string) string,
- foreach_ah_decrypt_error
-#undef _
-};
-
typedef struct
{
ipsec_integ_alg_t integ_alg;
@@ -125,8 +103,9 @@ ah_process_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
{
u32 bi = op->user_data;
- b[bi]->error = node->errors[AH_DECRYPT_ERROR_INTEG_ERROR];
- nexts[bi] = AH_DECRYPT_NEXT_DROP;
+ ah_decrypt_set_next_index (
+ b[bi], node, vm->thread_index, AH_DECRYPT_ERROR_INTEG_ERROR, bi,
+ nexts, AH_DECRYPT_NEXT_DROP, vnet_buffer (b[bi])->ipsec.sad_index);
n_fail--;
}
op++;
@@ -149,6 +128,7 @@ ah_decrypt_inline (vlib_main_t * vm,
from = vlib_frame_vector_args (from_frame);
n_left = from_frame->n_vectors;
ipsec_sa_t *sa0 = 0;
+ bool anti_replay_result;
u32 current_sa_index = ~0, current_sa_bytes = 0, current_sa_pkts = 0;
clib_memset (pkt_data, 0, VLIB_FRAME_SIZE * sizeof (pkt_data[0]));
@@ -166,8 +146,7 @@ ah_decrypt_inline (vlib_main_t * vm,
{
if (current_sa_index != ~0)
vlib_increment_combined_counter (&ipsec_sa_counters, thread_index,
- current_sa_index,
- current_sa_pkts,
+ current_sa_index, current_sa_pkts,
current_sa_bytes);
current_sa_index = vnet_buffer (b[0])->ipsec.sad_index;
sa0 = ipsec_sa_get (current_sa_index);
@@ -177,7 +156,7 @@ ah_decrypt_inline (vlib_main_t * vm,
thread_index, current_sa_index);
}
- if (PREDICT_FALSE (~0 == sa0->thread_index))
+ if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index))
{
/* this is the first packet to use this SA, claim the SA
* for this thread. this could happen simultaneously on
@@ -211,8 +190,9 @@ ah_decrypt_inline (vlib_main_t * vm,
{
if (ip4_is_fragment (ih4))
{
- b[0]->error = node->errors[AH_DECRYPT_ERROR_DROP_FRAGMENTS];
- next[0] = AH_DECRYPT_NEXT_DROP;
+ ah_decrypt_set_next_index (
+ b[0], node, vm->thread_index, AH_DECRYPT_ERROR_DROP_FRAGMENTS,
+ 0, next, AH_DECRYPT_NEXT_DROP, current_sa_index);
goto next;
}
pd->ip_hdr_size = ip4_header_bytes (ih4);
@@ -222,11 +202,21 @@ ah_decrypt_inline (vlib_main_t * vm,
pd->seq = clib_host_to_net_u32 (ah0->seq_no);
/* anti-replay check */
- if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, ~0, false,
- &pd->seq_hi))
+ if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0)))
+ {
+ anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
+ sa0, pd->seq, ~0, false, &pd->seq_hi, true);
+ }
+ else
{
- b[0]->error = node->errors[AH_DECRYPT_ERROR_REPLAY];
- next[0] = AH_DECRYPT_NEXT_DROP;
+ anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
+ sa0, pd->seq, ~0, false, &pd->seq_hi, false);
+ }
+ if (anti_replay_result)
+ {
+ ah_decrypt_set_next_index (b[0], node, vm->thread_index,
+ AH_DECRYPT_ERROR_REPLAY, 0, next,
+ AH_DECRYPT_NEXT_DROP, current_sa_index);
goto next;
}
@@ -241,8 +231,9 @@ ah_decrypt_inline (vlib_main_t * vm,
pd->current_data + b[0]->current_length
+ sizeof (u32) > buffer_data_size))
{
- b[0]->error = node->errors[AH_DECRYPT_ERROR_NO_TAIL_SPACE];
- next[0] = AH_DECRYPT_NEXT_DROP;
+ ah_decrypt_set_next_index (
+ b[0], node, vm->thread_index, AH_DECRYPT_ERROR_NO_TAIL_SPACE,
+ 0, next, AH_DECRYPT_NEXT_DROP, current_sa_index);
goto next;
}
@@ -315,6 +306,7 @@ ah_decrypt_inline (vlib_main_t * vm,
{
ip4_header_t *oh4;
ip6_header_t *oh6;
+ u64 n_lost = 0;
if (next[0] < AH_DECRYPT_N_NEXT)
goto trace;
@@ -323,21 +315,44 @@ ah_decrypt_inline (vlib_main_t * vm,
if (PREDICT_TRUE (sa0->integ_alg != IPSEC_INTEG_ALG_NONE))
{
- /* redo the anit-reply check. see esp_decrypt for details */
- if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi,
- true, NULL))
+ /* redo the anti-reply check. see esp_decrypt for details */
+ if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0)))
{
- b[0]->error = node->errors[AH_DECRYPT_ERROR_REPLAY];
- next[0] = AH_DECRYPT_NEXT_DROP;
- goto trace;
+ if (ipsec_sa_anti_replay_and_sn_advance (
+ sa0, pd->seq, pd->seq_hi, true, NULL, true))
+ {
+ ah_decrypt_set_next_index (
+ b[0], node, vm->thread_index, AH_DECRYPT_ERROR_REPLAY, 0,
+ next, AH_DECRYPT_NEXT_DROP, pd->sa_index);
+ goto trace;
+ }
+ n_lost = ipsec_sa_anti_replay_advance (
+ sa0, thread_index, pd->seq, pd->seq_hi, true);
}
- ipsec_sa_anti_replay_advance (sa0, pd->seq, pd->seq_hi);
+ else
+ {
+ if (ipsec_sa_anti_replay_and_sn_advance (
+ sa0, pd->seq, pd->seq_hi, true, NULL, false))
+ {
+ ah_decrypt_set_next_index (
+ b[0], node, vm->thread_index, AH_DECRYPT_ERROR_REPLAY, 0,
+ next, AH_DECRYPT_NEXT_DROP, pd->sa_index);
+ goto trace;
+ }
+ n_lost = ipsec_sa_anti_replay_advance (
+ sa0, thread_index, pd->seq, pd->seq_hi, false);
+ }
+ vlib_prefetch_simple_counter (
+ &ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST], thread_index,
+ pd->sa_index);
}
u16 ah_hdr_len = sizeof (ah_header_t) + pd->icv_size
+ pd->icv_padding_len;
vlib_buffer_advance (b[0], pd->ip_hdr_size + ah_hdr_len);
b[0]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ b[0]->flags &= ~(VNET_BUFFER_F_L4_CHECKSUM_COMPUTED |
+ VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
if (PREDICT_TRUE (ipsec_sa_is_set_IS_TUNNEL (sa0)))
{ /* tunnel mode */
@@ -347,8 +362,10 @@ ah_decrypt_inline (vlib_main_t * vm,
next[0] = AH_DECRYPT_NEXT_IP6_INPUT;
else
{
- b[0]->error = node->errors[AH_DECRYPT_ERROR_DECRYPTION_FAILED];
- next[0] = AH_DECRYPT_NEXT_DROP;
+ ah_decrypt_set_next_index (b[0], node, vm->thread_index,
+ AH_DECRYPT_ERROR_DECRYPTION_FAILED, 0,
+ next, AH_DECRYPT_NEXT_DROP,
+ pd->sa_index);
goto trace;
}
}
@@ -398,6 +415,11 @@ ah_decrypt_inline (vlib_main_t * vm,
}
}
+ if (PREDICT_FALSE (n_lost))
+ vlib_increment_simple_counter (
+ &ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST], thread_index,
+ pd->sa_index, n_lost);
+
vnet_buffer (b[0])->sw_if_index[VLIB_TX] = (u32) ~ 0;
trace:
if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
@@ -428,15 +450,14 @@ VLIB_NODE_FN (ah4_decrypt_node) (vlib_main_t * vm,
return ah_decrypt_inline (vm, node, from_frame, 0 /* is_ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ah4_decrypt_node) = {
.name = "ah4-decrypt",
.vector_size = sizeof (u32),
.format_trace = format_ah_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(ah_decrypt_error_strings),
- .error_strings = ah_decrypt_error_strings,
+ .n_errors = AH_DECRYPT_N_ERROR,
+ .error_counters = ah_decrypt_error_counters,
.n_next_nodes = AH_DECRYPT_N_NEXT,
.next_nodes = {
@@ -446,7 +467,6 @@ VLIB_REGISTER_NODE (ah4_decrypt_node) = {
[AH_DECRYPT_NEXT_HANDOFF] = "ah4-decrypt-handoff",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ah6_decrypt_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -455,15 +475,14 @@ VLIB_NODE_FN (ah6_decrypt_node) (vlib_main_t * vm,
return ah_decrypt_inline (vm, node, from_frame, 1 /* is_ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ah6_decrypt_node) = {
.name = "ah6-decrypt",
.vector_size = sizeof (u32),
.format_trace = format_ah_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(ah_decrypt_error_strings),
- .error_strings = ah_decrypt_error_strings,
+ .n_errors = AH_DECRYPT_N_ERROR,
+ .error_counters = ah_decrypt_error_counters,
.n_next_nodes = AH_DECRYPT_N_NEXT,
.next_nodes = {
@@ -473,7 +492,6 @@ VLIB_REGISTER_NODE (ah6_decrypt_node) = {
[AH_DECRYPT_NEXT_HANDOFF] = "ah6-decrypt-handoff",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/ipsec/ah_encrypt.c b/src/vnet/ipsec/ah_encrypt.c
index bb971e40811..960327f071d 100644
--- a/src/vnet/ipsec/ah_encrypt.c
+++ b/src/vnet/ipsec/ah_encrypt.c
@@ -22,6 +22,7 @@
#include <vnet/ipsec/ipsec.h>
#include <vnet/ipsec/esp.h>
#include <vnet/ipsec/ah.h>
+#include <vnet/ipsec/ipsec.api_enum.h>
#include <vnet/tunnel/tunnel_dp.h>
#define foreach_ah_encrypt_next \
@@ -38,25 +39,6 @@ typedef enum
AH_ENCRYPT_N_NEXT,
} ah_encrypt_next_t;
-#define foreach_ah_encrypt_error \
- _ (RX_PKTS, "AH pkts received") \
- _ (CRYPTO_ENGINE_ERROR, "crypto engine error (packet dropped)") \
- _ (SEQ_CYCLED, "sequence number cycled (packet dropped)")
-
-typedef enum
-{
-#define _(sym,str) AH_ENCRYPT_ERROR_##sym,
- foreach_ah_encrypt_error
-#undef _
- AH_ENCRYPT_N_ERROR,
-} ah_encrypt_error_t;
-
-static char *ah_encrypt_error_strings[] = {
-#define _(sym,string) string,
- foreach_ah_encrypt_error
-#undef _
-};
-
typedef struct
{
u32 sa_index;
@@ -99,8 +81,10 @@ ah_process_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
{
u32 bi = op->user_data;
- b[bi]->error = node->errors[AH_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR];
- nexts[bi] = AH_ENCRYPT_NEXT_DROP;
+ ah_encrypt_set_next_index (b[bi], node, vm->thread_index,
+ AH_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR, bi,
+ nexts, AH_ENCRYPT_NEXT_DROP,
+ vnet_buffer (b[bi])->ipsec.sad_index);
n_fail--;
}
op++;
@@ -171,19 +155,20 @@ ah_encrypt_inline (vlib_main_t * vm,
{
if (current_sa_index != ~0)
vlib_increment_combined_counter (&ipsec_sa_counters, thread_index,
- current_sa_index,
- current_sa_pkts,
+ current_sa_index, current_sa_pkts,
current_sa_bytes);
current_sa_index = vnet_buffer (b[0])->ipsec.sad_index;
sa0 = ipsec_sa_get (current_sa_index);
current_sa_bytes = current_sa_pkts = 0;
+ vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index,
+ current_sa_index);
}
pd->sa_index = current_sa_index;
next[0] = AH_ENCRYPT_NEXT_DROP;
- if (PREDICT_FALSE (~0 == sa0->thread_index))
+ if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index))
{
/* this is the first packet to use this SA, claim the SA
* for this thread. this could happen simultaneously on
@@ -201,7 +186,9 @@ ah_encrypt_inline (vlib_main_t * vm,
if (PREDICT_FALSE (esp_seq_advance (sa0)))
{
- b[0]->error = node->errors[AH_ENCRYPT_ERROR_SEQ_CYCLED];
+ ah_encrypt_set_next_index (b[0], node, vm->thread_index,
+ AH_ENCRYPT_ERROR_SEQ_CYCLED, 0, next,
+ AH_ENCRYPT_NEXT_DROP, current_sa_index);
pd->skip = 1;
goto next;
}
@@ -455,15 +442,14 @@ VLIB_NODE_FN (ah4_encrypt_node) (vlib_main_t * vm,
return ah_encrypt_inline (vm, node, from_frame, 0 /* is_ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ah4_encrypt_node) = {
.name = "ah4-encrypt",
.vector_size = sizeof (u32),
.format_trace = format_ah_encrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(ah_encrypt_error_strings),
- .error_strings = ah_encrypt_error_strings,
+ .n_errors = AH_ENCRYPT_N_ERROR,
+ .error_counters = ah_encrypt_error_counters,
.n_next_nodes = AH_ENCRYPT_N_NEXT,
.next_nodes = {
@@ -472,7 +458,6 @@ VLIB_REGISTER_NODE (ah4_encrypt_node) = {
[AH_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "interface-output",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ah6_encrypt_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -481,15 +466,14 @@ VLIB_NODE_FN (ah6_encrypt_node) (vlib_main_t * vm,
return ah_encrypt_inline (vm, node, from_frame, 1 /* is_ip6 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ah6_encrypt_node) = {
.name = "ah6-encrypt",
.vector_size = sizeof (u32),
.format_trace = format_ah_encrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(ah_encrypt_error_strings),
- .error_strings = ah_encrypt_error_strings,
+ .n_errors = AH_ENCRYPT_N_ERROR,
+ .error_counters = ah_encrypt_error_counters,
.n_next_nodes = AH_ENCRYPT_N_NEXT,
.next_nodes = {
@@ -498,7 +482,6 @@ VLIB_REGISTER_NODE (ah6_encrypt_node) = {
[AH_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "interface-output",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/ipsec/esp.h b/src/vnet/ipsec/esp.h
index d179233df49..1c3ce776ad2 100644
--- a/src/vnet/ipsec/esp.h
+++ b/src/vnet/ipsec/esp.h
@@ -18,6 +18,7 @@
#include <vnet/ip/ip.h>
#include <vnet/crypto/crypto.h>
#include <vnet/ipsec/ipsec.h>
+#include <vnet/ipsec/ipsec.api_enum.h>
typedef struct
{
@@ -36,27 +37,21 @@ typedef struct
u8 next_header;
} esp_footer_t;
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip4_header_t ip4;
esp_header_t esp;
}) ip4_and_esp_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip4_header_t ip4;
udp_header_t udp;
esp_header_t esp;
}) ip4_and_udp_and_esp_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
ip6_header_t ip6;
esp_header_t esp;
}) ip6_and_esp_header_t;
-/* *INDENT-ON* */
/**
* AES counter mode nonce
@@ -85,9 +80,6 @@ typedef struct esp_aead_t_
} __clib_packed esp_aead_t;
#define ESP_SEQ_MAX (4294967295UL)
-#define ESP_MAX_BLOCK_SIZE (16)
-#define ESP_MAX_IV_SIZE (16)
-#define ESP_MAX_ICV_SIZE (32)
u8 *format_esp_header (u8 * s, va_list * args);
@@ -141,39 +133,76 @@ esp_aad_fill (u8 *data, const esp_header_t *esp, const ipsec_sa_t *sa,
}
}
-/* Special case to drop or hand off packets for sync/async modes.
- *
- * Different than sync mode, async mode only enqueue drop or hand-off packets
- * to next nodes.
- */
-always_inline void
-esp_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node, u32 err,
- u16 index, u16 *nexts, u16 drop_next)
+always_inline u32
+esp_encrypt_err_to_sa_err (u32 err)
{
- nexts[index] = drop_next;
- b->error = node->errors[err];
+ switch (err)
+ {
+ case ESP_ENCRYPT_ERROR_HANDOFF:
+ return IPSEC_SA_ERROR_HANDOFF;
+ case ESP_ENCRYPT_ERROR_SEQ_CYCLED:
+ return IPSEC_SA_ERROR_SEQ_CYCLED;
+ case ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR:
+ return IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR;
+ case ESP_ENCRYPT_ERROR_CRYPTO_QUEUE_FULL:
+ return IPSEC_SA_ERROR_CRYPTO_QUEUE_FULL;
+ case ESP_ENCRYPT_ERROR_NO_BUFFERS:
+ return IPSEC_SA_ERROR_NO_BUFFERS;
+ case ESP_ENCRYPT_ERROR_NO_ENCRYPTION:
+ return IPSEC_SA_ERROR_NO_ENCRYPTION;
+ }
+ return ~0;
}
-/* when submitting a frame is failed, drop all buffers in the frame */
always_inline u32
-esp_async_recycle_failed_submit (vlib_main_t *vm, vnet_crypto_async_frame_t *f,
- vlib_node_runtime_t *node, u32 err, u16 index,
- u32 *from, u16 *nexts, u16 drop_next_index)
+esp_decrypt_err_to_sa_err (u32 err)
{
- u32 n_drop = f->n_elts;
- u32 *bi = f->buffer_indices;
-
- while (n_drop--)
+ switch (err)
{
- from[index] = bi[0];
- esp_set_next_index (vlib_get_buffer (vm, bi[0]), node, err, index, nexts,
- drop_next_index);
- bi++;
- index++;
+ case ESP_DECRYPT_ERROR_HANDOFF:
+ return IPSEC_SA_ERROR_HANDOFF;
+ case ESP_DECRYPT_ERROR_DECRYPTION_FAILED:
+ return IPSEC_SA_ERROR_DECRYPTION_FAILED;
+ case ESP_DECRYPT_ERROR_INTEG_ERROR:
+ return IPSEC_SA_ERROR_INTEG_ERROR;
+ case ESP_DECRYPT_ERROR_CRYPTO_ENGINE_ERROR:
+ return IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR;
+ case ESP_DECRYPT_ERROR_REPLAY:
+ return IPSEC_SA_ERROR_REPLAY;
+ case ESP_DECRYPT_ERROR_RUNT:
+ return IPSEC_SA_ERROR_RUNT;
+ case ESP_DECRYPT_ERROR_NO_BUFFERS:
+ return IPSEC_SA_ERROR_NO_BUFFERS;
+ case ESP_DECRYPT_ERROR_OVERSIZED_HEADER:
+ return IPSEC_SA_ERROR_OVERSIZED_HEADER;
+ case ESP_DECRYPT_ERROR_NO_TAIL_SPACE:
+ return IPSEC_SA_ERROR_NO_TAIL_SPACE;
+ case ESP_DECRYPT_ERROR_TUN_NO_PROTO:
+ return IPSEC_SA_ERROR_TUN_NO_PROTO;
+ case ESP_DECRYPT_ERROR_UNSUP_PAYLOAD:
+ return IPSEC_SA_ERROR_UNSUP_PAYLOAD;
}
- vnet_crypto_async_reset_frame (f);
+ return ~0;
+}
- return (f->n_elts);
+always_inline void
+esp_encrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
+ u32 thread_index, u32 err, u16 index, u16 *nexts,
+ u16 drop_next, u32 sa_index)
+{
+ ipsec_set_next_index (b, node, thread_index, err,
+ esp_encrypt_err_to_sa_err (err), index, nexts,
+ drop_next, sa_index);
+}
+
+always_inline void
+esp_decrypt_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
+ u32 thread_index, u32 err, u16 index, u16 *nexts,
+ u16 drop_next, u32 sa_index)
+{
+ ipsec_set_next_index (b, node, thread_index, err,
+ esp_decrypt_err_to_sa_err (err), index, nexts,
+ drop_next, sa_index);
}
/**
@@ -250,6 +279,43 @@ typedef struct
extern esp_async_post_next_t esp_encrypt_async_next;
extern esp_async_post_next_t esp_decrypt_async_next;
+/* when submitting a frame is failed, drop all buffers in the frame */
+always_inline u32
+esp_async_recycle_failed_submit (vlib_main_t *vm, vnet_crypto_async_frame_t *f,
+ vlib_node_runtime_t *node, u32 err,
+ u32 ipsec_sa_err, u16 index, u32 *from,
+ u16 *nexts, u16 drop_next_index,
+ bool is_encrypt)
+{
+ vlib_buffer_t *b;
+ u32 n_drop = f->n_elts;
+ u32 *bi = f->buffer_indices;
+
+ while (n_drop--)
+ {
+ u32 sa_index;
+
+ from[index] = bi[0];
+ b = vlib_get_buffer (vm, bi[0]);
+
+ if (is_encrypt)
+ {
+ sa_index = vnet_buffer (b)->ipsec.sad_index;
+ }
+ else
+ {
+ sa_index = esp_post_data (b)->decrypt_data.sa_index;
+ }
+
+ ipsec_set_next_index (b, node, vm->thread_index, err, ipsec_sa_err,
+ index, nexts, drop_next_index, sa_index);
+ bi++;
+ index++;
+ }
+
+ return (f->n_elts);
+}
+
#endif /* __ESP_H__ */
/*
diff --git a/src/vnet/ipsec/esp_decrypt.c b/src/vnet/ipsec/esp_decrypt.c
index e30fc9effcb..26d8ca1deee 100644
--- a/src/vnet/ipsec/esp_decrypt.c
+++ b/src/vnet/ipsec/esp_decrypt.c
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
#include <vnet/vnet.h>
#include <vnet/api_errno.h>
#include <vnet/ip/ip.h>
@@ -58,35 +57,6 @@ typedef enum
ESP_DECRYPT_POST_N_NEXT,
} esp_decrypt_post_next_t;
-#define foreach_esp_decrypt_error \
- _ (RX_PKTS, "ESP pkts received") \
- _ (RX_POST_PKTS, "ESP-POST pkts received") \
- _ (HANDOFF, "hand-off") \
- _ (DECRYPTION_FAILED, "ESP decryption failed") \
- _ (INTEG_ERROR, "Integrity check failed") \
- _ (CRYPTO_ENGINE_ERROR, "crypto engine error (packet dropped)") \
- _ (REPLAY, "SA replayed packet") \
- _ (RUNT, "undersized packet") \
- _ (NO_BUFFERS, "no buffers (packet dropped)") \
- _ (OVERSIZED_HEADER, "buffer with oversized header (dropped)") \
- _ (NO_TAIL_SPACE, "no enough buffer tail space (dropped)") \
- _ (TUN_NO_PROTO, "no tunnel protocol") \
- _ (UNSUP_PAYLOAD, "unsupported payload")
-
-typedef enum
-{
-#define _(sym,str) ESP_DECRYPT_ERROR_##sym,
- foreach_esp_decrypt_error
-#undef _
- ESP_DECRYPT_N_ERROR,
-} esp_decrypt_error_t;
-
-static char *esp_decrypt_error_strings[] = {
-#define _(sym,string) string,
- foreach_esp_decrypt_error
-#undef _
-};
-
typedef struct
{
u32 seq;
@@ -97,6 +67,8 @@ typedef struct
ipsec_integ_alg_t integ_alg;
} esp_decrypt_trace_t;
+typedef vl_counter_esp_decrypt_enum_t esp_decrypt_error_t;
+
/* The number of byres in the hisequence number */
#define N_HI_ESN_BYTES 4
@@ -141,8 +113,9 @@ esp_process_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
err = e;
else
err = ESP_DECRYPT_ERROR_CRYPTO_ENGINE_ERROR;
- b[bi]->error = node->errors[err];
- nexts[bi] = ESP_DECRYPT_NEXT_DROP;
+ esp_decrypt_set_next_index (b[bi], node, vm->thread_index, err, bi,
+ nexts, ESP_DECRYPT_NEXT_DROP,
+ vnet_buffer (b[bi])->ipsec.sad_index);
n_fail--;
}
op++;
@@ -173,8 +146,9 @@ esp_process_chained_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
err = e;
else
err = ESP_DECRYPT_ERROR_CRYPTO_ENGINE_ERROR;
- b[bi]->error = node->errors[err];
- nexts[bi] = ESP_DECRYPT_NEXT_DROP;
+ esp_decrypt_set_next_index (b[bi], node, vm->thread_index, err, bi,
+ nexts, ESP_DECRYPT_NEXT_DROP,
+ vnet_buffer (b[bi])->ipsec.sad_index);
n_fail--;
}
op++;
@@ -187,6 +161,9 @@ esp_remove_tail (vlib_main_t * vm, vlib_buffer_t * b, vlib_buffer_t * last,
{
vlib_buffer_t *before_last = b;
+ if (b != last)
+ b->total_length_not_including_first_buffer -= tail;
+
if (last->current_length > tail)
{
last->current_length -= tail;
@@ -204,6 +181,37 @@ esp_remove_tail (vlib_main_t * vm, vlib_buffer_t * b, vlib_buffer_t * last,
before_last->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
}
+always_inline void
+esp_remove_tail_and_tfc_padding (vlib_main_t *vm, vlib_node_runtime_t *node,
+ const esp_decrypt_packet_data_t *pd,
+ vlib_buffer_t *b, vlib_buffer_t *last,
+ u16 *next, u16 tail, int is_ip6)
+{
+ const u16 total_buffer_length = vlib_buffer_length_in_chain (vm, b);
+ u16 ip_packet_length;
+ if (is_ip6)
+ {
+ const ip6_header_t *ip6 = vlib_buffer_get_current (b);
+ ip_packet_length =
+ clib_net_to_host_u16 (ip6->payload_length) + sizeof (ip6_header_t);
+ }
+ else
+ {
+ const ip4_header_t *ip4 = vlib_buffer_get_current (b);
+ ip_packet_length = clib_net_to_host_u16 (ip4->length);
+ }
+ /* In case of TFC padding, the size of the buffer data needs to be adjusted
+ * to the ip packet length */
+ if (PREDICT_FALSE (total_buffer_length < ip_packet_length + tail))
+ {
+ esp_decrypt_set_next_index (b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_NO_TAIL_SPACE, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
+ return;
+ }
+ esp_remove_tail (vm, b, last, total_buffer_length - ip_packet_length);
+}
+
/* ICV is splitted in last two buffers so move it to the last buffer and
return pointer to it */
static_always_inline u8 *
@@ -229,9 +237,12 @@ esp_move_icv (vlib_main_t * vm, vlib_buffer_t * first,
before_last->current_length -= first_sz;
if (before_last == first)
pd->current_length -= first_sz;
+ else
+ first->total_length_not_including_first_buffer -= first_sz;
clib_memset (vlib_buffer_get_tail (before_last), 0, first_sz);
if (dif)
dif[0] = first_sz;
+ first->total_length_not_including_first_buffer -= last_sz;
pd2->lb = before_last;
pd2->icv_removed = 1;
pd2->free_buffer_index = before_last->next_buffer;
@@ -483,18 +494,16 @@ esp_decrypt_chain_crypto (vlib_main_t * vm, ipsec_per_thread_data_t * ptd,
return total_len;
}
-static_always_inline void
-esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node,
- ipsec_per_thread_data_t * ptd,
- vnet_crypto_op_t *** crypto_ops,
- vnet_crypto_op_t *** integ_ops,
- vnet_crypto_op_t * op,
- ipsec_sa_t * sa0, u8 * payload,
- u16 len, u8 icv_sz, u8 iv_sz,
- esp_decrypt_packet_data_t * pd,
- esp_decrypt_packet_data2_t * pd2,
- vlib_buffer_t * b, u16 * next, u32 index)
+static_always_inline esp_decrypt_error_t
+esp_decrypt_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
+ ipsec_sa_t *sa0, u8 *payload, u16 len, u8 icv_sz,
+ u8 iv_sz, esp_decrypt_packet_data_t *pd,
+ esp_decrypt_packet_data2_t *pd2, vlib_buffer_t *b,
+ u32 index)
{
+ vnet_crypto_op_t **crypto_ops;
+ vnet_crypto_op_t **integ_ops;
+ vnet_crypto_op_t _op, *op = &_op;
const u8 esp_sz = sizeof (esp_header_t);
if (PREDICT_TRUE (sa0->integ_op_id != VNET_CRYPTO_OP_NONE))
@@ -511,6 +520,8 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node,
if (pd->is_chain)
{
/* buffer is chained */
+ integ_ops = &ptd->chained_integ_ops;
+
op->len = pd->current_length;
/* special case when ICV is splitted and needs to be reassembled
@@ -536,8 +547,7 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node,
{
/* we now have a single buffer of crypto data, adjust
* the length (second buffer contains only ICV) */
- *integ_ops = &ptd->integ_ops;
- *crypto_ops = &ptd->crypto_ops;
+ integ_ops = &ptd->integ_ops;
len = b->current_length;
goto out;
}
@@ -551,17 +561,16 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node,
if (esp_decrypt_chain_integ (vm, ptd, pd, pd2, sa0, b, icv_sz,
payload, pd->current_length,
&op->digest, &op->n_chunks, 0) < 0)
- {
- b->error = node->errors[ESP_DECRYPT_ERROR_NO_BUFFERS];
- next[0] = ESP_DECRYPT_NEXT_DROP;
- return;
- }
+ return ESP_DECRYPT_ERROR_NO_BUFFERS;
}
else
- esp_insert_esn (vm, sa0, pd, pd2, &op->len, &op->digest, &len, b,
- payload);
+ {
+ integ_ops = &ptd->integ_ops;
+ esp_insert_esn (vm, sa0, pd, pd2, &op->len, &op->digest, &len, b,
+ payload);
+ }
out:
- vec_add_aligned (*(integ_ops[0]), op, 1, CLIB_CACHE_LINE_BYTES);
+ vec_add_aligned (*integ_ops, op, 1, CLIB_CACHE_LINE_BYTES);
}
payload += esp_sz;
@@ -587,6 +596,12 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node,
op->aad_len = esp_aad_fill (op->aad, esp0, sa0, pd->seq_hi);
op->tag = payload + len;
op->tag_len = 16;
+ if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa0)))
+ {
+ /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */
+ payload -= iv_sz;
+ len += iv_sz;
+ }
}
else
{
@@ -609,26 +624,32 @@ esp_decrypt_prepare_sync_op (vlib_main_t * vm, vlib_node_runtime_t * node,
esp_decrypt_chain_crypto (vm, ptd, pd, pd2, sa0, b, icv_sz,
payload, len - pd->iv_sz + pd->icv_sz,
&op->tag, &op->n_chunks);
+ crypto_ops = &ptd->chained_crypto_ops;
+ }
+ else
+ {
+ crypto_ops = &ptd->crypto_ops;
}
- vec_add_aligned (*(crypto_ops[0]), op, 1, CLIB_CACHE_LINE_BYTES);
+ vec_add_aligned (*crypto_ops, op, 1, CLIB_CACHE_LINE_BYTES);
}
+
+ return ESP_DECRYPT_ERROR_RX_PKTS;
}
static_always_inline esp_decrypt_error_t
-esp_decrypt_prepare_async_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
- ipsec_per_thread_data_t *ptd,
+esp_decrypt_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
vnet_crypto_async_frame_t *f, ipsec_sa_t *sa0,
u8 *payload, u16 len, u8 icv_sz, u8 iv_sz,
esp_decrypt_packet_data_t *pd,
esp_decrypt_packet_data2_t *pd2, u32 bi,
- vlib_buffer_t *b, u16 *next, u16 async_next)
+ vlib_buffer_t *b, u16 async_next)
{
const u8 esp_sz = sizeof (esp_header_t);
esp_decrypt_packet_data_t *async_pd = &(esp_post_data (b))->decrypt_data;
esp_decrypt_packet_data2_t *async_pd2 = esp_post_data2 (b);
u8 *tag = payload + len, *iv = payload + esp_sz, *aad = 0;
- u32 key_index;
+ const u32 key_index = sa0->crypto_key_index;
u32 crypto_len, integ_len = 0;
i16 crypto_start_offset, integ_start_offset = 0;
u8 flags = 0;
@@ -636,7 +657,6 @@ esp_decrypt_prepare_async_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
if (!ipsec_sa_is_set_IS_AEAD (sa0))
{
/* linked algs */
- key_index = sa0->linked_key_index;
integ_start_offset = payload - b->data;
integ_len = len;
if (PREDICT_TRUE (sa0->integ_op_id != VNET_CRYPTO_OP_NONE))
@@ -689,8 +709,6 @@ esp_decrypt_prepare_async_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
else
esp_insert_esn (vm, sa0, pd, pd2, &integ_len, &tag, &len, b, payload);
}
- else
- key_index = sa0->crypto_key_index;
out:
/* crypto */
@@ -710,6 +728,12 @@ out:
aad = (u8 *) nonce - sizeof (esp_aead_t);
esp_aad_fill (aad, esp0, sa0, pd->seq_hi);
tag = payload + len;
+ if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa0)))
+ {
+ /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */
+ payload -= iv_sz;
+ len += iv_sz;
+ }
}
else
{
@@ -748,10 +772,12 @@ out:
}
static_always_inline void
-esp_decrypt_post_crypto (vlib_main_t * vm, vlib_node_runtime_t * node,
- esp_decrypt_packet_data_t * pd,
- esp_decrypt_packet_data2_t * pd2, vlib_buffer_t * b,
- u16 * next, int is_ip6, int is_tun, int is_async)
+esp_decrypt_post_crypto (vlib_main_t *vm, vlib_node_runtime_t *node,
+ const u16 *next_by_next_header,
+ const esp_decrypt_packet_data_t *pd,
+ const esp_decrypt_packet_data2_t *pd2,
+ vlib_buffer_t *b, u16 *next, int is_ip6, int is_tun,
+ int is_async)
{
ipsec_sa_t *sa0 = ipsec_sa_get (pd->sa_index);
vlib_buffer_t *lb = b;
@@ -759,6 +785,7 @@ esp_decrypt_post_crypto (vlib_main_t * vm, vlib_node_runtime_t * node,
const u8 tun_flags = IPSEC_SA_FLAG_IS_TUNNEL | IPSEC_SA_FLAG_IS_TUNNEL_V6;
u8 pad_length = 0, next_header = 0;
u16 icv_sz;
+ u64 n_lost;
/*
* redo the anti-reply check
@@ -767,30 +794,50 @@ esp_decrypt_post_crypto (vlib_main_t * vm, vlib_node_runtime_t * node,
* check above we did so against the state of the window (W),
* after packet s-1. So each of the packets in the sequence will be
* accepted.
- * This time s will be cheked against Ws-1, s+1 chceked against Ws
- * (i.e. the window state is updated/advnaced)
- * so this time the successive s+! packet will be dropped.
+ * This time s will be cheked against Ws-1, s+1 checked against Ws
+ * (i.e. the window state is updated/advanced)
+ * so this time the successive s+1 packet will be dropped.
* This is a consequence of batching the decrypts. If the
- * check-dcrypt-advance process was done for each packet it would
+ * check-decrypt-advance process was done for each packet it would
* be fine. But we batch the decrypts because it's much more efficient
* to do so in SW and if we offload to HW and the process is async.
*
* You're probably thinking, but this means an attacker can send the
- * above sequence and cause VPP to perform decrpyts that will fail,
+ * above sequence and cause VPP to perform decrypts that will fail,
* and that's true. But if the attacker can determine s (a valid
* sequence number in the window) which is non-trivial, it can generate
* a sequence s, s+1, s+2, s+3, ... s+n and nothing will prevent any
* implementation, sequential or batching, from decrypting these.
*/
- if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi, true,
- NULL))
+ if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0)))
{
- b->error = node->errors[ESP_DECRYPT_ERROR_REPLAY];
- next[0] = ESP_DECRYPT_NEXT_DROP;
- return;
+ if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi, true,
+ NULL, true))
+ {
+ esp_decrypt_set_next_index (b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_REPLAY, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
+ return;
+ }
+ n_lost = ipsec_sa_anti_replay_advance (sa0, vm->thread_index, pd->seq,
+ pd->seq_hi, true);
+ }
+ else
+ {
+ if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, pd->seq_hi, true,
+ NULL, false))
+ {
+ esp_decrypt_set_next_index (b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_REPLAY, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
+ return;
+ }
+ n_lost = ipsec_sa_anti_replay_advance (sa0, vm->thread_index, pd->seq,
+ pd->seq_hi, false);
}
- ipsec_sa_anti_replay_advance (sa0, pd->seq, pd->seq_hi);
+ vlib_prefetch_simple_counter (&ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST],
+ vm->thread_index, pd->sa_index);
if (pd->is_chain)
{
@@ -849,7 +896,8 @@ esp_decrypt_post_crypto (vlib_main_t * vm, vlib_node_runtime_t * node,
u16 adv = pd->iv_sz + esp_sz;
u16 tail = sizeof (esp_footer_t) + pad_length + icv_sz;
u16 tail_orig = sizeof (esp_footer_t) + pad_length + pd->icv_sz;
- b->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ b->flags &=
+ ~(VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
if ((pd->flags & tun_flags) == 0 && !is_tun) /* transport mode */
{
@@ -899,14 +947,16 @@ esp_decrypt_post_crypto (vlib_main_t * vm, vlib_node_runtime_t * node,
next[0] = ESP_DECRYPT_NEXT_IP4_INPUT;
b->current_data = pd->current_data + adv;
b->current_length = pd->current_length - adv;
- esp_remove_tail (vm, b, lb, tail);
+ esp_remove_tail_and_tfc_padding (vm, node, pd, b, lb, next, tail,
+ false);
}
else if (next_header == IP_PROTOCOL_IPV6)
{
next[0] = ESP_DECRYPT_NEXT_IP6_INPUT;
b->current_data = pd->current_data + adv;
b->current_length = pd->current_length - adv;
- esp_remove_tail (vm, b, lb, tail);
+ esp_remove_tail_and_tfc_padding (vm, node, pd, b, lb, next, tail,
+ true);
}
else if (next_header == IP_PROTOCOL_MPLS_IN_IP)
{
@@ -915,44 +965,51 @@ esp_decrypt_post_crypto (vlib_main_t * vm, vlib_node_runtime_t * node,
b->current_length = pd->current_length - adv;
esp_remove_tail (vm, b, lb, tail);
}
- else
+ else if (is_tun && next_header == IP_PROTOCOL_GRE)
{
- if (is_tun && next_header == IP_PROTOCOL_GRE)
- {
- gre_header_t *gre;
+ gre_header_t *gre;
- b->current_data = pd->current_data + adv;
- b->current_length = pd->current_length - adv - tail;
+ b->current_data = pd->current_data + adv;
+ b->current_length = pd->current_length - adv - tail;
- gre = vlib_buffer_get_current (b);
+ gre = vlib_buffer_get_current (b);
- vlib_buffer_advance (b, sizeof (*gre));
+ vlib_buffer_advance (b, sizeof (*gre));
- switch (clib_net_to_host_u16 (gre->protocol))
- {
- case GRE_PROTOCOL_teb:
- vnet_update_l2_len (b);
- next[0] = ESP_DECRYPT_NEXT_L2_INPUT;
- break;
- case GRE_PROTOCOL_ip4:
- next[0] = ESP_DECRYPT_NEXT_IP4_INPUT;
- break;
- case GRE_PROTOCOL_ip6:
- next[0] = ESP_DECRYPT_NEXT_IP6_INPUT;
- break;
- default:
- b->error = node->errors[ESP_DECRYPT_ERROR_UNSUP_PAYLOAD];
- next[0] = ESP_DECRYPT_NEXT_DROP;
- break;
- }
- }
- else
+ switch (clib_net_to_host_u16 (gre->protocol))
{
- next[0] = ESP_DECRYPT_NEXT_DROP;
- b->error = node->errors[ESP_DECRYPT_ERROR_UNSUP_PAYLOAD];
- return;
+ case GRE_PROTOCOL_teb:
+ vnet_update_l2_len (b);
+ next[0] = ESP_DECRYPT_NEXT_L2_INPUT;
+ break;
+ case GRE_PROTOCOL_ip4:
+ next[0] = ESP_DECRYPT_NEXT_IP4_INPUT;
+ break;
+ case GRE_PROTOCOL_ip6:
+ next[0] = ESP_DECRYPT_NEXT_IP6_INPUT;
+ break;
+ default:
+ esp_decrypt_set_next_index (
+ b, node, vm->thread_index, ESP_DECRYPT_ERROR_UNSUP_PAYLOAD, 0,
+ next, ESP_DECRYPT_NEXT_DROP, pd->sa_index);
+ break;
}
}
+ else if ((next[0] = vec_elt (next_by_next_header, next_header)) !=
+ (u16) ~0)
+ {
+ b->current_data = pd->current_data + adv;
+ b->current_length = pd->current_length - adv;
+ esp_remove_tail (vm, b, lb, tail);
+ }
+ else
+ {
+ esp_decrypt_set_next_index (b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_UNSUP_PAYLOAD, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
+ return;
+ }
+
if (is_tun)
{
if (ipsec_sa_is_set_IS_PROTECT (sa0))
@@ -989,8 +1046,10 @@ esp_decrypt_post_crypto (vlib_main_t * vm, vlib_node_runtime_t * node,
!ip46_address_is_equal_v4 (&itp->itp_tun.dst,
&ip4->src_address))
{
- next[0] = ESP_DECRYPT_NEXT_DROP;
- b->error = node->errors[ESP_DECRYPT_ERROR_TUN_NO_PROTO];
+ esp_decrypt_set_next_index (
+ b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_TUN_NO_PROTO, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
}
}
else if (next_header == IP_PROTOCOL_IPV6)
@@ -1004,13 +1063,19 @@ esp_decrypt_post_crypto (vlib_main_t * vm, vlib_node_runtime_t * node,
!ip46_address_is_equal_v6 (&itp->itp_tun.dst,
&ip6->src_address))
{
- next[0] = ESP_DECRYPT_NEXT_DROP;
- b->error = node->errors[ESP_DECRYPT_ERROR_TUN_NO_PROTO];
+ esp_decrypt_set_next_index (
+ b, node, vm->thread_index,
+ ESP_DECRYPT_ERROR_TUN_NO_PROTO, 0, next,
+ ESP_DECRYPT_NEXT_DROP, pd->sa_index);
}
}
}
}
}
+
+ if (PREDICT_FALSE (n_lost))
+ vlib_increment_simple_counter (&ipsec_sa_err_counters[IPSEC_SA_ERROR_LOST],
+ vm->thread_index, pd->sa_index, n_lost);
}
always_inline uword
@@ -1019,6 +1084,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u16 async_next_node)
{
ipsec_main_t *im = &ipsec_main;
+ const u16 *next_by_next_header = im->next_header_registrations;
u32 thread_index = vm->thread_index;
u16 len;
ipsec_per_thread_data_t *ptd = vec_elt_at_index (im->ptd, thread_index);
@@ -1027,8 +1093,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
vlib_buffer_t *sync_bufs[VLIB_FRAME_SIZE];
u16 sync_nexts[VLIB_FRAME_SIZE], *sync_next = sync_nexts, n_sync = 0;
- u16 async_nexts[VLIB_FRAME_SIZE], *async_next = async_nexts;
- u16 noop_nexts[VLIB_FRAME_SIZE], *noop_next = noop_nexts, n_noop = 0;
+ u16 noop_nexts[VLIB_FRAME_SIZE], n_noop = 0;
u32 sync_bi[VLIB_FRAME_SIZE];
u32 noop_bi[VLIB_FRAME_SIZE];
esp_decrypt_packet_data_t pkt_data[VLIB_FRAME_SIZE], *pd = pkt_data;
@@ -1037,9 +1102,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 current_sa_index = ~0, current_sa_bytes = 0, current_sa_pkts = 0;
const u8 esp_sz = sizeof (esp_header_t);
ipsec_sa_t *sa0 = 0;
- vnet_crypto_op_t _op, *op = &_op;
- vnet_crypto_op_t **crypto_ops;
- vnet_crypto_op_t **integ_ops;
+ bool anti_replay_result;
int is_async = im->async_mode;
vnet_crypto_async_op_id_t async_op = ~0;
vnet_crypto_async_frame_t *async_frames[VNET_CRYPTO_ASYNC_OP_N_IDS];
@@ -1077,8 +1140,9 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (n_bufs == 0)
{
err = ESP_DECRYPT_ERROR_NO_BUFFERS;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- ESP_DECRYPT_NEXT_DROP);
+ esp_decrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, ESP_DECRYPT_NEXT_DROP,
+ vnet_buffer (b[0])->ipsec.sad_index);
goto next;
}
@@ -1086,12 +1150,13 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
if (current_sa_pkts)
vlib_increment_combined_counter (&ipsec_sa_counters, thread_index,
- current_sa_index,
- current_sa_pkts,
+ current_sa_index, current_sa_pkts,
current_sa_bytes);
current_sa_bytes = current_sa_pkts = 0;
current_sa_index = vnet_buffer (b[0])->ipsec.sad_index;
+ vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index,
+ current_sa_index);
sa0 = ipsec_sa_get (current_sa_index);
/* fetch the second cacheline ASAP */
@@ -1103,7 +1168,7 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
is_async = im->async_mode | ipsec_sa_is_set_IS_ASYNC (sa0);
}
- if (PREDICT_FALSE (~0 == sa0->thread_index))
+ if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index))
{
/* this is the first packet to use this SA, claim the SA
* for this thread. this could happen simultaneously on
@@ -1116,8 +1181,9 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
vnet_buffer (b[0])->ipsec.thread_index = sa0->thread_index;
err = ESP_DECRYPT_ERROR_HANDOFF;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- ESP_DECRYPT_NEXT_HANDOFF);
+ esp_decrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, ESP_DECRYPT_NEXT_HANDOFF,
+ current_sa_index);
goto next;
}
@@ -1138,33 +1204,37 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
/* find last buffer in the chain */
while (pd2->lb->flags & VLIB_BUFFER_NEXT_PRESENT)
pd2->lb = vlib_get_buffer (vm, pd2->lb->next_buffer);
+ }
- crypto_ops = &ptd->chained_crypto_ops;
- integ_ops = &ptd->chained_integ_ops;
+ pd->current_length = b[0]->current_length;
+
+ /* anti-reply check */
+ if (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa0)))
+ {
+ anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
+ sa0, pd->seq, ~0, false, &pd->seq_hi, true);
}
else
{
- crypto_ops = &ptd->crypto_ops;
- integ_ops = &ptd->integ_ops;
+ anti_replay_result = ipsec_sa_anti_replay_and_sn_advance (
+ sa0, pd->seq, ~0, false, &pd->seq_hi, false);
}
- pd->current_length = b[0]->current_length;
-
- /* anti-reply check */
- if (ipsec_sa_anti_replay_and_sn_advance (sa0, pd->seq, ~0, false,
- &pd->seq_hi))
+ if (anti_replay_result)
{
err = ESP_DECRYPT_ERROR_REPLAY;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- ESP_DECRYPT_NEXT_DROP);
+ esp_decrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, ESP_DECRYPT_NEXT_DROP,
+ current_sa_index);
goto next;
}
if (pd->current_length < cpd.icv_sz + esp_sz + cpd.iv_sz)
{
err = ESP_DECRYPT_ERROR_RUNT;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- ESP_DECRYPT_NEXT_DROP);
+ esp_decrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, ESP_DECRYPT_NEXT_DROP,
+ current_sa_index);
goto next;
}
@@ -1183,31 +1253,47 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
async_frames[async_op] =
vnet_crypto_async_get_frame (vm, async_op);
+ if (PREDICT_FALSE (!async_frames[async_op]))
+ {
+ err = ESP_DECRYPT_ERROR_NO_AVAIL_FRAME;
+ esp_decrypt_set_next_index (
+ b[0], node, thread_index, err, n_noop, noop_nexts,
+ ESP_DECRYPT_NEXT_DROP, current_sa_index);
+ goto next;
+ }
+
/* Save the frame to the list we'll submit at the end */
vec_add1 (ptd->async_frames, async_frames[async_op]);
}
err = esp_decrypt_prepare_async_frame (
- vm, node, ptd, async_frames[async_op], sa0, payload, len,
- cpd.icv_sz, cpd.iv_sz, pd, pd2, from[b - bufs], b[0], async_next,
- async_next_node);
+ vm, ptd, async_frames[async_op], sa0, payload, len, cpd.icv_sz,
+ cpd.iv_sz, pd, pd2, from[b - bufs], b[0], async_next_node);
if (ESP_DECRYPT_ERROR_RX_PKTS != err)
{
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- ESP_DECRYPT_NEXT_DROP);
+ esp_decrypt_set_next_index (
+ b[0], node, thread_index, err, n_noop, noop_nexts,
+ ESP_DECRYPT_NEXT_DROP, current_sa_index);
}
}
else
- esp_decrypt_prepare_sync_op (
- vm, node, ptd, &crypto_ops, &integ_ops, op, sa0, payload, len,
- cpd.icv_sz, cpd.iv_sz, pd, pd2, b[0], sync_next, b - bufs);
+ {
+ err = esp_decrypt_prepare_sync_op (vm, ptd, sa0, payload, len,
+ cpd.icv_sz, cpd.iv_sz, pd, pd2,
+ b[0], n_sync);
+ if (err != ESP_DECRYPT_ERROR_RX_PKTS)
+ {
+ esp_decrypt_set_next_index (b[0], node, thread_index, err, 0,
+ sync_next, ESP_DECRYPT_NEXT_DROP,
+ current_sa_index);
+ }
+ }
/* next */
next:
if (ESP_DECRYPT_ERROR_RX_PKTS != err)
{
noop_bi[n_noop] = from[b - bufs];
n_noop++;
- noop_next++;
}
else if (!is_async)
{
@@ -1218,8 +1304,6 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
pd += 1;
pd2 += 1;
}
- else
- async_next++;
n_left -= 1;
b += 1;
@@ -1245,7 +1329,8 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
n_noop += esp_async_recycle_failed_submit (
vm, *async_frame, node, ESP_DECRYPT_ERROR_CRYPTO_ENGINE_ERROR,
- n_sync, noop_bi, noop_nexts, ESP_DECRYPT_NEXT_DROP);
+ IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR, n_noop, noop_bi, noop_nexts,
+ ESP_DECRYPT_NEXT_DROP, false);
vnet_crypto_async_reset_frame (*async_frame);
vnet_crypto_async_free_frame (vm, *async_frame);
}
@@ -1298,8 +1383,8 @@ esp_decrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
current_sa_index = vnet_buffer (b[0])->ipsec.sad_index;
if (sync_next[0] >= ESP_DECRYPT_N_NEXT)
- esp_decrypt_post_crypto (vm, node, pd, pd2, b[0], sync_next, is_ip6,
- is_tun, 0);
+ esp_decrypt_post_crypto (vm, node, next_by_next_header, pd, pd2, b[0],
+ sync_next, is_ip6, is_tun, 0);
/* trace: */
if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
@@ -1340,6 +1425,8 @@ esp_decrypt_post_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * from_frame, int is_ip6, int is_tun)
{
+ const ipsec_main_t *im = &ipsec_main;
+ const u16 *next_by_next_header = im->next_header_registrations;
u32 *from = vlib_frame_vector_args (from_frame);
u32 n_left = from_frame->n_vectors;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
@@ -1357,13 +1444,13 @@ esp_decrypt_post_inline (vlib_main_t * vm,
}
if (!pd->is_chain)
- esp_decrypt_post_crypto (vm, node, pd, 0, b[0], next, is_ip6, is_tun,
- 1);
+ esp_decrypt_post_crypto (vm, node, next_by_next_header, pd, 0, b[0],
+ next, is_ip6, is_tun, 1);
else
{
esp_decrypt_packet_data2_t *pd2 = esp_post_data2 (b[0]);
- esp_decrypt_post_crypto (vm, node, pd, pd2, b[0], next, is_ip6,
- is_tun, 1);
+ esp_decrypt_post_crypto (vm, node, next_by_next_header, pd, pd2,
+ b[0], next, is_ip6, is_tun, 1);
}
/*trace: */
@@ -1457,15 +1544,14 @@ VLIB_NODE_FN (esp6_decrypt_tun_post_node) (vlib_main_t * vm,
return esp_decrypt_post_inline (vm, node, from_frame, 1, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp4_decrypt_node) = {
.name = "esp4-decrypt",
.vector_size = sizeof (u32),
.format_trace = format_esp_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
- .error_strings = esp_decrypt_error_strings,
+ .n_errors = ESP_DECRYPT_N_ERROR,
+ .error_counters = esp_decrypt_error_counters,
.n_next_nodes = ESP_DECRYPT_N_NEXT,
.next_nodes = {
@@ -1484,8 +1570,8 @@ VLIB_REGISTER_NODE (esp4_decrypt_post_node) = {
.format_trace = format_esp_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
- .error_strings = esp_decrypt_error_strings,
+ .n_errors = ESP_DECRYPT_N_ERROR,
+ .error_counters = esp_decrypt_error_counters,
.sibling_of = "esp4-decrypt",
};
@@ -1496,8 +1582,8 @@ VLIB_REGISTER_NODE (esp6_decrypt_node) = {
.format_trace = format_esp_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
- .error_strings = esp_decrypt_error_strings,
+ .n_errors = ESP_DECRYPT_N_ERROR,
+ .error_counters = esp_decrypt_error_counters,
.n_next_nodes = ESP_DECRYPT_N_NEXT,
.next_nodes = {
@@ -1516,8 +1602,8 @@ VLIB_REGISTER_NODE (esp6_decrypt_post_node) = {
.format_trace = format_esp_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
- .error_strings = esp_decrypt_error_strings,
+ .n_errors = ESP_DECRYPT_N_ERROR,
+ .error_counters = esp_decrypt_error_counters,
.sibling_of = "esp6-decrypt",
};
@@ -1527,8 +1613,8 @@ VLIB_REGISTER_NODE (esp4_decrypt_tun_node) = {
.vector_size = sizeof (u32),
.format_trace = format_esp_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
- .error_strings = esp_decrypt_error_strings,
+ .n_errors = ESP_DECRYPT_N_ERROR,
+ .error_counters = esp_decrypt_error_counters,
.n_next_nodes = ESP_DECRYPT_N_NEXT,
.next_nodes = {
[ESP_DECRYPT_NEXT_DROP] = "ip4-drop",
@@ -1546,8 +1632,8 @@ VLIB_REGISTER_NODE (esp4_decrypt_tun_post_node) = {
.format_trace = format_esp_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
- .error_strings = esp_decrypt_error_strings,
+ .n_errors = ESP_DECRYPT_N_ERROR,
+ .error_counters = esp_decrypt_error_counters,
.sibling_of = "esp4-decrypt-tun",
};
@@ -1557,8 +1643,8 @@ VLIB_REGISTER_NODE (esp6_decrypt_tun_node) = {
.vector_size = sizeof (u32),
.format_trace = format_esp_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
- .error_strings = esp_decrypt_error_strings,
+ .n_errors = ESP_DECRYPT_N_ERROR,
+ .error_counters = esp_decrypt_error_counters,
.n_next_nodes = ESP_DECRYPT_N_NEXT,
.next_nodes = {
[ESP_DECRYPT_NEXT_DROP] = "ip6-drop",
@@ -1576,12 +1662,11 @@ VLIB_REGISTER_NODE (esp6_decrypt_tun_post_node) = {
.format_trace = format_esp_decrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
- .error_strings = esp_decrypt_error_strings,
+ .n_errors = ESP_DECRYPT_N_ERROR,
+ .error_counters = esp_decrypt_error_counters,
.sibling_of = "esp6-decrypt-tun",
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/ipsec/esp_encrypt.c b/src/vnet/ipsec/esp_encrypt.c
index d102bd68f74..dd47053874c 100644
--- a/src/vnet/ipsec/esp_encrypt.c
+++ b/src/vnet/ipsec/esp_encrypt.c
@@ -18,11 +18,13 @@
#include <vnet/vnet.h>
#include <vnet/api_errno.h>
#include <vnet/ip/ip.h>
+#include <vnet/interface_output.h>
#include <vnet/crypto/crypto.h>
#include <vnet/ipsec/ipsec.h>
#include <vnet/ipsec/ipsec_tun.h>
+#include <vnet/ipsec/ipsec.api_enum.h>
#include <vnet/ipsec/esp.h>
#include <vnet/tunnel/tunnel_dp.h>
@@ -43,29 +45,6 @@ typedef enum
ESP_ENCRYPT_N_NEXT,
} esp_encrypt_next_t;
-#define foreach_esp_encrypt_error \
- _ (RX_PKTS, "ESP pkts received") \
- _ (POST_RX_PKTS, "ESP-post pkts received") \
- _ (HANDOFF, "Hand-off") \
- _ (SEQ_CYCLED, "sequence number cycled (packet dropped)") \
- _ (CRYPTO_ENGINE_ERROR, "crypto engine error (packet dropped)") \
- _ (CRYPTO_QUEUE_FULL, "crypto queue full (packet dropped)") \
- _ (NO_BUFFERS, "no buffers (packet dropped)")
-
-typedef enum
-{
-#define _(sym,str) ESP_ENCRYPT_ERROR_##sym,
- foreach_esp_encrypt_error
-#undef _
- ESP_ENCRYPT_N_ERROR,
-} esp_encrypt_error_t;
-
-static char *esp_encrypt_error_strings[] = {
-#define _(sym,string) string,
- foreach_esp_encrypt_error
-#undef _
-};
-
typedef struct
{
u32 sa_index;
@@ -82,6 +61,8 @@ typedef struct
u32 next_index;
} esp_encrypt_post_trace_t;
+typedef vl_counter_esp_encrypt_enum_t esp_encrypt_error_t;
+
/* packet trace format function */
static u8 *
format_esp_encrypt_trace (u8 * s, va_list * args)
@@ -114,8 +95,7 @@ format_esp_post_encrypt_trace (u8 * s, va_list * args)
/* pad packet in input buffer */
static_always_inline u8 *
esp_add_footer_and_icv (vlib_main_t *vm, vlib_buffer_t **last, u8 esp_align,
- u8 icv_sz, vlib_node_runtime_t *node,
- u16 buffer_data_size, uword total_len)
+ u8 icv_sz, u16 buffer_data_size, uword total_len)
{
static const u8 pad_data[ESP_MAX_BLOCK_SIZE] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
@@ -169,11 +149,9 @@ esp_update_ip4_hdr (ip4_header_t * ip4, u16 len, int is_transport, int is_udp)
if (is_transport)
{
u8 prot = is_udp ? IP_PROTOCOL_UDP : IP_PROTOCOL_IPSEC_ESP;
-
- sum = ip_csum_update (ip4->checksum, ip4->protocol,
- prot, ip4_header_t, protocol);
+ sum = ip_csum_update (ip4->checksum, ip4->protocol, prot, ip4_header_t,
+ protocol);
ip4->protocol = prot;
-
sum = ip_csum_update (sum, old_len, len, ip4_header_t, length);
}
else
@@ -202,9 +180,9 @@ ext_hdr_is_pre_esp (u8 nexthdr)
return !u8x16_is_all_zero (ext_hdr_types == u8x16_splat (nexthdr));
#else
- return ((nexthdr ^ IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) |
- (nexthdr ^ IP_PROTOCOL_IPV6_ROUTE) |
- (nexthdr ^ IP_PROTOCOL_IPV6_FRAGMENTATION) != 0);
+ return (!(nexthdr ^ IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS) ||
+ !(nexthdr ^ IP_PROTOCOL_IPV6_ROUTE) ||
+ !(nexthdr ^ IP_PROTOCOL_IPV6_FRAGMENTATION));
#endif
}
@@ -223,9 +201,8 @@ esp_get_ip6_hdr_len (ip6_header_t * ip6, ip6_ext_header_t ** ext_hdr)
return len;
}
- p = (void *) (ip6 + 1);
+ p = ip6_next_header (ip6);
len += ip6_ext_header_len (p);
-
while (ext_hdr_is_pre_esp (p->next_hdr))
{
len += ip6_ext_header_len (p);
@@ -236,6 +213,25 @@ esp_get_ip6_hdr_len (ip6_header_t * ip6, ip6_ext_header_t ** ext_hdr)
return len;
}
+/* IPsec IV generation: IVs requirements differ depending of the
+ * encryption mode: IVs must be unpredictable for AES-CBC whereas it can
+ * be predictable but should never be reused with the same key material
+ * for CTR and GCM.
+ * To avoid reusing the same IVs between multiple VPP instances and between
+ * restarts, we use a properly chosen PRNG to generate IVs. To ensure the IV is
+ * unpredictable for CBC, it is then encrypted using the same key as the
+ * message. You can refer to NIST SP800-38a and NIST SP800-38d for more
+ * details. */
+static_always_inline void *
+esp_generate_iv (ipsec_sa_t *sa, void *payload, int iv_sz)
+{
+ ASSERT (iv_sz >= sizeof (u64));
+ u64 *iv = (u64 *) (payload - iv_sz);
+ clib_memset_u8 (iv, 0, iv_sz);
+ *iv = clib_pcg64i_random_r (&sa->iv_prng);
+ return iv;
+}
+
static_always_inline void
esp_process_chained_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_crypto_op_t * ops, vlib_buffer_t * b[],
@@ -257,8 +253,10 @@ esp_process_chained_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
{
u32 bi = op->user_data;
- b[bi]->error = node->errors[ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR];
- nexts[bi] = drop_next;
+ esp_encrypt_set_next_index (b[bi], node, vm->thread_index,
+ ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR,
+ bi, nexts, drop_next,
+ vnet_buffer (b[bi])->ipsec.sad_index);
n_fail--;
}
op++;
@@ -285,8 +283,10 @@ esp_process_ops (vlib_main_t * vm, vlib_node_runtime_t * node,
if (op->status != VNET_CRYPTO_OP_STATUS_COMPLETED)
{
u32 bi = op->user_data;
- b[bi]->error = node->errors[ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR];
- nexts[bi] = drop_next;
+ esp_encrypt_set_next_index (b[bi], node, vm->thread_index,
+ ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR,
+ bi, nexts, drop_next,
+ vnet_buffer (b[bi])->ipsec.sad_index);
n_fail--;
}
op++;
@@ -389,28 +389,36 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
vnet_crypto_op_t *op;
vec_add2_aligned (crypto_ops[0], op, 1, CLIB_CACHE_LINE_BYTES);
vnet_crypto_op_init (op, sa0->crypto_enc_op_id);
+ u8 *crypto_start = payload;
+ /* esp_add_footer_and_icv() in esp_encrypt_inline() makes sure we always
+ * have enough space for ESP header and footer which includes ICV */
+ ASSERT (payload_len > icv_sz);
+ u16 crypto_len = payload_len - icv_sz;
+
+ /* generate the IV in front of the payload */
+ void *pkt_iv = esp_generate_iv (sa0, payload, iv_sz);
- op->src = op->dst = payload;
op->key_index = sa0->crypto_key_index;
- op->len = payload_len - icv_sz;
op->user_data = bi;
if (ipsec_sa_is_set_IS_CTR (sa0))
{
- ASSERT (sizeof (u64) == iv_sz);
/* construct nonce in a scratch space in front of the IP header */
esp_ctr_nonce_t *nonce =
- (esp_ctr_nonce_t *) (payload - sizeof (u64) - hdr_len -
- sizeof (*nonce));
- u64 *pkt_iv = (u64 *) (payload - sizeof (u64));
-
+ (esp_ctr_nonce_t *) (pkt_iv - hdr_len - sizeof (*nonce));
if (ipsec_sa_is_set_IS_AEAD (sa0))
{
/* constuct aad in a scratch space in front of the nonce */
op->aad = (u8 *) nonce - sizeof (esp_aead_t);
op->aad_len = esp_aad_fill (op->aad, esp, sa0, seq_hi);
- op->tag = payload + op->len;
+ op->tag = payload + crypto_len;
op->tag_len = 16;
+ if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa0)))
+ {
+ /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */
+ crypto_start -= iv_sz;
+ crypto_len += iv_sz;
+ }
}
else
{
@@ -418,23 +426,34 @@ esp_prepare_sync_op (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
}
nonce->salt = sa0->salt;
- nonce->iv = *pkt_iv = clib_host_to_net_u64 (sa0->ctr_iv_counter++);
+ nonce->iv = *(u64 *) pkt_iv;
op->iv = (u8 *) nonce;
}
else
{
- op->iv = payload - iv_sz;
- op->flags = VNET_CRYPTO_OP_FLAG_INIT_IV;
+ /* construct zero iv in front of the IP header */
+ op->iv = pkt_iv - hdr_len - iv_sz;
+ clib_memset_u8 (op->iv, 0, iv_sz);
+ /* include iv field in crypto */
+ crypto_start -= iv_sz;
+ crypto_len += iv_sz;
}
- if (lb != b[0])
+ if (PREDICT_FALSE (lb != b[0]))
{
/* is chained */
op->flags |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
op->chunk_index = vec_len (ptd->chunks);
op->tag = vlib_buffer_get_tail (lb) - icv_sz;
- esp_encrypt_chain_crypto (vm, ptd, sa0, b[0], lb, icv_sz, payload,
- payload_len, &op->n_chunks);
+ esp_encrypt_chain_crypto (vm, ptd, sa0, b[0], lb, icv_sz,
+ crypto_start, crypto_len + icv_sz,
+ &op->n_chunks);
+ }
+ else
+ {
+ /* not chained */
+ op->src = op->dst = crypto_start;
+ op->len = crypto_len;
}
}
@@ -483,33 +502,36 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
esp_post_data_t *post = esp_post_data (b);
u8 *tag, *iv, *aad = 0;
u8 flag = 0;
- u32 key_index;
- i16 crypto_start_offset, integ_start_offset = 0;
+ const u32 key_index = sa->crypto_key_index;
+ i16 crypto_start_offset, integ_start_offset;
u16 crypto_total_len, integ_total_len;
post->next_index = next;
/* crypto */
- crypto_start_offset = payload - b->data;
+ crypto_start_offset = integ_start_offset = payload - b->data;
crypto_total_len = integ_total_len = payload_len - icv_sz;
tag = payload + crypto_total_len;
- key_index = sa->linked_key_index;
+ /* generate the IV in front of the payload */
+ void *pkt_iv = esp_generate_iv (sa, payload, iv_sz);
if (ipsec_sa_is_set_IS_CTR (sa))
{
- ASSERT (sizeof (u64) == iv_sz);
/* construct nonce in a scratch space in front of the IP header */
- esp_ctr_nonce_t *nonce = (esp_ctr_nonce_t *) (payload - sizeof (u64) -
- hdr_len - sizeof (*nonce));
- u64 *pkt_iv = (u64 *) (payload - sizeof (u64));
-
+ esp_ctr_nonce_t *nonce =
+ (esp_ctr_nonce_t *) (pkt_iv - hdr_len - sizeof (*nonce));
if (ipsec_sa_is_set_IS_AEAD (sa))
{
/* constuct aad in a scratch space in front of the nonce */
aad = (u8 *) nonce - sizeof (esp_aead_t);
esp_aad_fill (aad, esp, sa, sa->seq_hi);
- key_index = sa->crypto_key_index;
+ if (PREDICT_FALSE (ipsec_sa_is_set_IS_NULL_GMAC (sa)))
+ {
+ /* RFC-4543 ENCR_NULL_AUTH_AES_GMAC: IV is part of AAD */
+ crypto_start_offset -= iv_sz;
+ crypto_total_len += iv_sz;
+ }
}
else
{
@@ -517,13 +539,17 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
}
nonce->salt = sa->salt;
- nonce->iv = *pkt_iv = clib_host_to_net_u64 (sa->ctr_iv_counter++);
+ nonce->iv = *(u64 *) pkt_iv;
iv = (u8 *) nonce;
}
else
{
- iv = payload - iv_sz;
- flag |= VNET_CRYPTO_OP_FLAG_INIT_IV;
+ /* construct zero iv in front of the IP header */
+ iv = pkt_iv - hdr_len - iv_sz;
+ clib_memset_u8 (iv, 0, iv_sz);
+ /* include iv field in crypto */
+ crypto_start_offset -= iv_sz;
+ crypto_total_len += iv_sz;
}
if (lb != b)
@@ -531,13 +557,14 @@ esp_prepare_async_frame (vlib_main_t *vm, ipsec_per_thread_data_t *ptd,
/* chain */
flag |= VNET_CRYPTO_OP_FLAG_CHAINED_BUFFERS;
tag = vlib_buffer_get_tail (lb) - icv_sz;
- crypto_total_len = esp_encrypt_chain_crypto (vm, ptd, sa, b, lb, icv_sz,
- payload, payload_len, 0);
+ crypto_total_len = esp_encrypt_chain_crypto (
+ vm, ptd, sa, b, lb, icv_sz, b->data + crypto_start_offset,
+ crypto_total_len + icv_sz, 0);
}
if (sa->integ_op_id)
{
- integ_start_offset = crypto_start_offset - iv_sz - sizeof (esp_header_t);
+ integ_start_offset -= iv_sz + sizeof (esp_header_t);
integ_total_len += iv_sz + sizeof (esp_header_t);
if (b != lb)
@@ -578,6 +605,7 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 current_sa_bytes = 0, spi = 0;
u8 esp_align = 4, iv_sz = 0, icv_sz = 0;
ipsec_sa_t *sa0 = 0;
+ u8 sa_drop_no_crypto = 0;
vlib_buffer_t *lb;
vnet_crypto_op_t **crypto_ops = &ptd->crypto_ops;
vnet_crypto_op_t **integ_ops = &ptd->integ_ops;
@@ -594,8 +622,8 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
ESP_ENCRYPT_NEXT_HANDOFF_MPLS));
vlib_buffer_t *sync_bufs[VLIB_FRAME_SIZE];
u16 sync_nexts[VLIB_FRAME_SIZE], *sync_next = sync_nexts, n_sync = 0;
- u16 async_nexts[VLIB_FRAME_SIZE], *async_next = async_nexts, n_async = 0;
- u16 noop_nexts[VLIB_FRAME_SIZE], *noop_next = noop_nexts, n_noop = 0;
+ u16 n_async = 0;
+ u16 noop_nexts[VLIB_FRAME_SIZE], n_noop = 0;
u32 sync_bi[VLIB_FRAME_SIZE];
u32 noop_bi[VLIB_FRAME_SIZE];
esp_encrypt_error_t err;
@@ -634,12 +662,24 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
CLIB_CACHE_LINE_BYTES, LOAD);
}
+ vnet_calc_checksums_inline (vm, b[0], b[0]->flags & VNET_BUFFER_F_IS_IP4,
+ b[0]->flags & VNET_BUFFER_F_IS_IP6);
+ vnet_calc_outer_checksums_inline (vm, b[0]);
+
if (is_tun)
{
/* we are on a ipsec tunnel's feature arc */
vnet_buffer (b[0])->ipsec.sad_index =
sa_index0 = ipsec_tun_protect_get_sa_out
(vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
+
+ if (PREDICT_FALSE (INDEX_INVALID == sa_index0))
+ {
+ err = ESP_ENCRYPT_ERROR_NO_PROTECTION;
+ noop_nexts[n_noop] = drop_next;
+ b[0]->error = node->errors[err];
+ goto trace;
+ }
}
else
sa_index0 = vnet_buffer (b[0])->ipsec.sad_index;
@@ -647,18 +687,24 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (sa_index0 != current_sa_index)
{
if (current_sa_packets)
- vlib_increment_combined_counter (&ipsec_sa_counters, thread_index,
- current_sa_index,
- current_sa_packets,
- current_sa_bytes);
+ vlib_increment_combined_counter (
+ &ipsec_sa_counters, thread_index, current_sa_index,
+ current_sa_packets, current_sa_bytes);
current_sa_packets = current_sa_bytes = 0;
sa0 = ipsec_sa_get (sa_index0);
+ current_sa_index = sa_index0;
+
+ sa_drop_no_crypto = ((sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE &&
+ sa0->integ_alg == IPSEC_INTEG_ALG_NONE) &&
+ !ipsec_sa_is_set_NO_ALGO_NO_DROP (sa0));
+
+ vlib_prefetch_combined_counter (&ipsec_sa_counters, thread_index,
+ current_sa_index);
/* fetch the second cacheline ASAP */
clib_prefetch_load (sa0->cacheline1);
- current_sa_index = sa_index0;
spi = clib_net_to_host_u32 (sa0->spi);
esp_align = sa0->esp_block_align;
icv_sz = sa0->integ_icv_size;
@@ -666,7 +712,15 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
is_async = im->async_mode | ipsec_sa_is_set_IS_ASYNC (sa0);
}
- if (PREDICT_FALSE (~0 == sa0->thread_index))
+ if (PREDICT_FALSE (sa_drop_no_crypto != 0))
+ {
+ err = ESP_ENCRYPT_ERROR_NO_ENCRYPTION;
+ esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, drop_next, sa_index0);
+ goto trace;
+ }
+
+ if (PREDICT_FALSE ((u16) ~0 == sa0->thread_index))
{
/* this is the first packet to use this SA, claim the SA
* for this thread. this could happen simultaneously on
@@ -679,8 +733,9 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
vnet_buffer (b[0])->ipsec.thread_index = sa0->thread_index;
err = ESP_ENCRYPT_ERROR_HANDOFF;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- handoff_next);
+ esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, handoff_next,
+ current_sa_index);
goto trace;
}
@@ -689,7 +744,8 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (n_bufs == 0)
{
err = ESP_ENCRYPT_ERROR_NO_BUFFERS;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts, drop_next);
+ esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, drop_next, current_sa_index);
goto trace;
}
@@ -703,7 +759,8 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (PREDICT_FALSE (esp_seq_advance (sa0)))
{
err = ESP_ENCRYPT_ERROR_SEQ_CYCLED;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts, drop_next);
+ esp_encrypt_set_next_index (b[0], node, thread_index, err, n_noop,
+ noop_nexts, drop_next, current_sa_index);
goto trace;
}
@@ -714,13 +771,14 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
payload = vlib_buffer_get_current (b[0]);
next_hdr_ptr = esp_add_footer_and_icv (
- vm, &lb, esp_align, icv_sz, node, buffer_data_size,
+ vm, &lb, esp_align, icv_sz, buffer_data_size,
vlib_buffer_length_in_chain (vm, b[0]));
if (!next_hdr_ptr)
{
err = ESP_ENCRYPT_ERROR_NO_BUFFERS;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- drop_next);
+ esp_encrypt_set_next_index (b[0], node, thread_index, err,
+ n_noop, noop_nexts, drop_next,
+ current_sa_index);
goto trace;
}
b[0]->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
@@ -823,27 +881,41 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
}
else /* transport mode */
{
- u8 *l2_hdr, l2_len, *ip_hdr, ip_len;
+ u8 *l2_hdr, l2_len, *ip_hdr;
+ u16 ip_len;
ip6_ext_header_t *ext_hdr;
udp_header_t *udp = 0;
u16 udp_len = 0;
u8 *old_ip_hdr = vlib_buffer_get_current (b[0]);
+ /*
+ * Get extension header chain length. It might be longer than the
+ * buffer's pre_data area.
+ */
ip_len =
(VNET_LINK_IP6 == lt ?
esp_get_ip6_hdr_len ((ip6_header_t *) old_ip_hdr, &ext_hdr) :
ip4_header_bytes ((ip4_header_t *) old_ip_hdr));
+ if ((old_ip_hdr - ip_len) < &b[0]->pre_data[0])
+ {
+ err = ESP_ENCRYPT_ERROR_NO_BUFFERS;
+ esp_encrypt_set_next_index (b[0], node, thread_index, err,
+ n_noop, noop_nexts, drop_next,
+ current_sa_index);
+ goto trace;
+ }
vlib_buffer_advance (b[0], ip_len);
payload = vlib_buffer_get_current (b[0]);
next_hdr_ptr = esp_add_footer_and_icv (
- vm, &lb, esp_align, icv_sz, node, buffer_data_size,
+ vm, &lb, esp_align, icv_sz, buffer_data_size,
vlib_buffer_length_in_chain (vm, b[0]));
if (!next_hdr_ptr)
{
err = ESP_ENCRYPT_ERROR_NO_BUFFERS;
- esp_set_next_index (b[0], node, err, n_noop, noop_nexts,
- drop_next);
+ esp_encrypt_set_next_index (b[0], node, thread_index, err,
+ n_noop, noop_nexts, drop_next,
+ current_sa_index);
goto trace;
}
@@ -879,42 +951,40 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
else
l2_len = 0;
+ u16 len;
+ len = payload_len_total + hdr_len - l2_len;
+
if (VNET_LINK_IP6 == lt)
{
ip6_header_t *ip6 = (ip6_header_t *) (old_ip_hdr);
if (PREDICT_TRUE (NULL == ext_hdr))
{
*next_hdr_ptr = ip6->protocol;
- ip6->protocol = IP_PROTOCOL_IPSEC_ESP;
+ ip6->protocol =
+ (udp) ? IP_PROTOCOL_UDP : IP_PROTOCOL_IPSEC_ESP;
}
else
{
*next_hdr_ptr = ext_hdr->next_hdr;
- ext_hdr->next_hdr = IP_PROTOCOL_IPSEC_ESP;
+ ext_hdr->next_hdr =
+ (udp) ? IP_PROTOCOL_UDP : IP_PROTOCOL_IPSEC_ESP;
}
ip6->payload_length =
- clib_host_to_net_u16 (payload_len_total + hdr_len - l2_len -
- sizeof (ip6_header_t));
+ clib_host_to_net_u16 (len - sizeof (ip6_header_t));
}
else if (VNET_LINK_IP4 == lt)
{
- u16 len;
ip4_header_t *ip4 = (ip4_header_t *) (old_ip_hdr);
*next_hdr_ptr = ip4->protocol;
- len = payload_len_total + hdr_len - l2_len;
- if (udp)
- {
- esp_update_ip4_hdr (ip4, len, /* is_transport */ 1, 1);
- udp_len = len - ip_len;
- }
- else
- esp_update_ip4_hdr (ip4, len, /* is_transport */ 1, 0);
+ esp_update_ip4_hdr (ip4, len, /* is_transport */ 1,
+ (udp != NULL));
}
clib_memcpy_le64 (ip_hdr, old_ip_hdr, ip_len);
if (udp)
{
+ udp_len = len - ip_len;
esp_fill_udp_hdr (sa0, udp, udp_len);
}
@@ -946,6 +1016,16 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
async_frames[async_op] =
vnet_crypto_async_get_frame (vm, async_op);
+
+ if (PREDICT_FALSE (!async_frames[async_op]))
+ {
+ err = ESP_ENCRYPT_ERROR_NO_AVAIL_FRAME;
+ esp_encrypt_set_next_index (b[0], node, thread_index, err,
+ n_noop, noop_nexts, drop_next,
+ current_sa_index);
+ goto trace;
+ }
+
/* Save the frame to the list we'll submit at the end */
vec_add1 (ptd->async_frames, async_frames[async_op]);
}
@@ -970,13 +1050,18 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
esp_encrypt_trace_t *tr = vlib_add_trace (vm, node, b[0],
sizeof (*tr));
- tr->sa_index = sa_index0;
- tr->spi = sa0->spi;
- tr->seq = sa0->seq;
- tr->sa_seq_hi = sa0->seq_hi;
- tr->udp_encap = ipsec_sa_is_set_UDP_ENCAP (sa0);
- tr->crypto_alg = sa0->crypto_alg;
- tr->integ_alg = sa0->integ_alg;
+ if (INDEX_INVALID == sa_index0)
+ clib_memset_u8 (tr, 0xff, sizeof (*tr));
+ else
+ {
+ tr->sa_index = sa_index0;
+ tr->spi = sa0->spi;
+ tr->seq = sa0->seq;
+ tr->sa_seq_hi = sa0->seq_hi;
+ tr->udp_encap = ipsec_sa_is_set_UDP_ENCAP (sa0);
+ tr->crypto_alg = sa0->crypto_alg;
+ tr->integ_alg = sa0->integ_alg;
+ }
}
/* next */
@@ -984,7 +1069,6 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
noop_bi[n_noop] = from[b - bufs];
n_noop++;
- noop_next++;
}
else if (!is_async)
{
@@ -996,15 +1080,15 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
else
{
n_async++;
- async_next++;
}
n_left -= 1;
b += 1;
}
- vlib_increment_combined_counter (&ipsec_sa_counters, thread_index,
- current_sa_index, current_sa_packets,
- current_sa_bytes);
+ if (INDEX_INVALID != current_sa_index)
+ vlib_increment_combined_counter (&ipsec_sa_counters, thread_index,
+ current_sa_index, current_sa_packets,
+ current_sa_bytes);
if (n_sync)
{
esp_process_ops (vm, node, ptd->crypto_ops, sync_bufs, sync_nexts,
@@ -1030,7 +1114,8 @@ esp_encrypt_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
n_noop += esp_async_recycle_failed_submit (
vm, *async_frame, node, ESP_ENCRYPT_ERROR_CRYPTO_ENGINE_ERROR,
- n_sync, noop_bi, noop_nexts, drop_next);
+ IPSEC_SA_ERROR_CRYPTO_ENGINE_ERROR, n_noop, noop_bi,
+ noop_nexts, drop_next, true);
vnet_crypto_async_reset_frame (*async_frame);
vnet_crypto_async_free_frame (vm, *async_frame);
}
@@ -1139,15 +1224,14 @@ VLIB_NODE_FN (esp4_encrypt_node) (vlib_main_t * vm,
esp_encrypt_async_next.esp4_post_next);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp4_encrypt_node) = {
.name = "esp4-encrypt",
.vector_size = sizeof (u32),
.format_trace = format_esp_encrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN (esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
.n_next_nodes = ESP_ENCRYPT_N_NEXT,
.next_nodes = { [ESP_ENCRYPT_NEXT_DROP4] = "ip4-drop",
@@ -1158,7 +1242,6 @@ VLIB_REGISTER_NODE (esp4_encrypt_node) = {
[ESP_ENCRYPT_NEXT_HANDOFF_MPLS] = "error-drop",
[ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT] = "interface-output" },
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp4_encrypt_post_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1167,7 +1250,6 @@ VLIB_NODE_FN (esp4_encrypt_post_node) (vlib_main_t * vm,
return esp_encrypt_post_inline (vm, node, from_frame);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp4_encrypt_post_node) = {
.name = "esp4-encrypt-post",
.vector_size = sizeof (u32),
@@ -1175,10 +1257,9 @@ VLIB_REGISTER_NODE (esp4_encrypt_post_node) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.sibling_of = "esp4-encrypt",
- .n_errors = ARRAY_LEN(esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp6_encrypt_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1188,7 +1269,6 @@ VLIB_NODE_FN (esp6_encrypt_node) (vlib_main_t * vm,
esp_encrypt_async_next.esp6_post_next);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp6_encrypt_node) = {
.name = "esp6-encrypt",
.vector_size = sizeof (u32),
@@ -1196,10 +1276,9 @@ VLIB_REGISTER_NODE (esp6_encrypt_node) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.sibling_of = "esp4-encrypt",
- .n_errors = ARRAY_LEN(esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp6_encrypt_post_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1208,7 +1287,6 @@ VLIB_NODE_FN (esp6_encrypt_post_node) (vlib_main_t * vm,
return esp_encrypt_post_inline (vm, node, from_frame);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp6_encrypt_post_node) = {
.name = "esp6-encrypt-post",
.vector_size = sizeof (u32),
@@ -1216,10 +1294,9 @@ VLIB_REGISTER_NODE (esp6_encrypt_post_node) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.sibling_of = "esp4-encrypt",
- .n_errors = ARRAY_LEN(esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp4_encrypt_tun_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1229,15 +1306,14 @@ VLIB_NODE_FN (esp4_encrypt_tun_node) (vlib_main_t * vm,
esp_encrypt_async_next.esp4_tun_post_next);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp4_encrypt_tun_node) = {
.name = "esp4-encrypt-tun",
.vector_size = sizeof (u32),
.format_trace = format_esp_encrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
.n_next_nodes = ESP_ENCRYPT_N_NEXT,
.next_nodes = {
@@ -1258,7 +1334,6 @@ VLIB_NODE_FN (esp4_encrypt_tun_post_node) (vlib_main_t * vm,
return esp_encrypt_post_inline (vm, node, from_frame);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp4_encrypt_tun_post_node) = {
.name = "esp4-encrypt-tun-post",
.vector_size = sizeof (u32),
@@ -1266,10 +1341,9 @@ VLIB_REGISTER_NODE (esp4_encrypt_tun_post_node) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.sibling_of = "esp4-encrypt-tun",
- .n_errors = ARRAY_LEN(esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp6_encrypt_tun_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1279,15 +1353,14 @@ VLIB_NODE_FN (esp6_encrypt_tun_node) (vlib_main_t * vm,
esp_encrypt_async_next.esp6_tun_post_next);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp6_encrypt_tun_node) = {
.name = "esp6-encrypt-tun",
.vector_size = sizeof (u32),
.format_trace = format_esp_encrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
.n_next_nodes = ESP_ENCRYPT_N_NEXT,
.next_nodes = {
@@ -1301,7 +1374,6 @@ VLIB_REGISTER_NODE (esp6_encrypt_tun_node) = {
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp6_encrypt_tun_post_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1310,7 +1382,6 @@ VLIB_NODE_FN (esp6_encrypt_tun_post_node) (vlib_main_t * vm,
return esp_encrypt_post_inline (vm, node, from_frame);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp6_encrypt_tun_post_node) = {
.name = "esp6-encrypt-tun-post",
.vector_size = sizeof (u32),
@@ -1318,10 +1389,9 @@ VLIB_REGISTER_NODE (esp6_encrypt_tun_post_node) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.sibling_of = "esp-mpls-encrypt-tun",
- .n_errors = ARRAY_LEN (esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (esp_mpls_encrypt_tun_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
@@ -1336,8 +1406,8 @@ VLIB_REGISTER_NODE (esp_mpls_encrypt_tun_node) = {
.format_trace = format_esp_encrypt_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN(esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
.n_next_nodes = ESP_ENCRYPT_N_NEXT,
.next_nodes = {
@@ -1364,123 +1434,9 @@ VLIB_REGISTER_NODE (esp_mpls_encrypt_tun_post_node) = {
.type = VLIB_NODE_TYPE_INTERNAL,
.sibling_of = "esp-mpls-encrypt-tun",
- .n_errors = ARRAY_LEN (esp_encrypt_error_strings),
- .error_strings = esp_encrypt_error_strings,
-};
-
-typedef struct
-{
- u32 sa_index;
-} esp_no_crypto_trace_t;
-
-static u8 *
-format_esp_no_crypto_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- esp_no_crypto_trace_t *t = va_arg (*args, esp_no_crypto_trace_t *);
-
- s = format (s, "esp-no-crypto: sa-index %u", t->sa_index);
-
- return s;
-}
-
-enum
-{
- ESP_NO_CRYPTO_NEXT_DROP,
- ESP_NO_CRYPTO_N_NEXT,
-};
-
-enum
-{
- ESP_NO_CRYPTO_ERROR_RX_PKTS,
-};
-
-static char *esp_no_crypto_error_strings[] = {
- "Outbound ESP packets received",
-};
-
-always_inline uword
-esp_no_crypto_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
- u32 *from = vlib_frame_vector_args (frame);
- u32 n_left = frame->n_vectors;
-
- vlib_get_buffers (vm, from, b, n_left);
-
- while (n_left > 0)
- {
- u32 sa_index0;
-
- /* packets are always going to be dropped, but get the sa_index */
- sa_index0 = ipsec_tun_protect_get_sa_out
- (vnet_buffer (b[0])->ip.adj_index[VLIB_TX]);
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- esp_no_crypto_trace_t *tr = vlib_add_trace (vm, node, b[0],
- sizeof (*tr));
- tr->sa_index = sa_index0;
- }
-
- n_left -= 1;
- b += 1;
- }
-
- vlib_node_increment_counter (vm, node->node_index,
- ESP_NO_CRYPTO_ERROR_RX_PKTS, frame->n_vectors);
-
- vlib_buffer_enqueue_to_single_next (vm, node, from,
- ESP_NO_CRYPTO_NEXT_DROP,
- frame->n_vectors);
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (esp4_no_crypto_tun_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return esp_no_crypto_inline (vm, node, from_frame);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (esp4_no_crypto_tun_node) =
-{
- .name = "esp4-no-crypto",
- .vector_size = sizeof (u32),
- .format_trace = format_esp_no_crypto_trace,
- .n_errors = ARRAY_LEN(esp_no_crypto_error_strings),
- .error_strings = esp_no_crypto_error_strings,
- .n_next_nodes = ESP_NO_CRYPTO_N_NEXT,
- .next_nodes = {
- [ESP_NO_CRYPTO_NEXT_DROP] = "ip4-drop",
- },
-};
-
-VLIB_NODE_FN (esp6_no_crypto_tun_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return esp_no_crypto_inline (vm, node, from_frame);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (esp6_no_crypto_tun_node) =
-{
- .name = "esp6-no-crypto",
- .vector_size = sizeof (u32),
- .format_trace = format_esp_no_crypto_trace,
- .n_errors = ARRAY_LEN(esp_no_crypto_error_strings),
- .error_strings = esp_no_crypto_error_strings,
- .n_next_nodes = ESP_NO_CRYPTO_N_NEXT,
- .next_nodes = {
- [ESP_NO_CRYPTO_NEXT_DROP] = "ip6-drop",
- },
+ .n_errors = ESP_ENCRYPT_N_ERROR,
+ .error_counters = esp_encrypt_error_counters,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/ipsec/ipsec.api b/src/vnet/ipsec/ipsec.api
index be45c3e2401..68efe8f50f7 100644
--- a/src/vnet/ipsec/ipsec.api
+++ b/src/vnet/ipsec/ipsec.api
@@ -57,74 +57,35 @@ autoreply define ipsec_interface_add_del_spd
u32 spd_id;
};
+/** \brief IPsec: Add/delete Security Policy Database entry
-enum ipsec_spd_action
-{
- /* bypass - no IPsec processing */
- IPSEC_API_SPD_ACTION_BYPASS = 0,
- /* discard - discard packet with ICMP processing */
- IPSEC_API_SPD_ACTION_DISCARD,
- /* resolve - send request to control plane for SA resolving */
- IPSEC_API_SPD_ACTION_RESOLVE,
- /* protect - apply IPsec policy using following parameters */
- IPSEC_API_SPD_ACTION_PROTECT,
-};
-
-/** \brief IPsec: Security Policy Database entry
-
- See RFC 4301, 4.4.1.1 on how to match packet to selectors
-
- @param spd_id - SPD instance id (control plane allocated)
- @param priority - priority of SPD entry (non-unique value). Used to order SPD matching - higher priorities match before lower
- @param is_outbound - entry applies to outbound traffic if non-zero, otherwise applies to inbound traffic
- @param remote_address_start - start of remote address range to match
- @param remote_address_stop - end of remote address range to match
- @param local_address_start - start of local address range to match
- @param local_address_stop - end of local address range to match
- @param protocol - protocol type to match [0 means any] otherwise IANA value
- @param remote_port_start - start of remote port range to match ...
- @param remote_port_stop - end of remote port range to match [0 to 65535 means ANY, 65535 to 0 means OPAQUE]
- @param local_port_start - start of local port range to match ...
- @param local_port_stop - end of remote port range to match [0 to 65535 means ANY, 65535 to 0 means OPAQUE]
- @param policy - action to perform on match
- @param sa_id - SAD instance id (control plane allocated)
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param is_add - add SPD if non-zero, else delete
+ @param entry - Description of the entry to add/dell
*/
-typedef ipsec_spd_entry
+define ipsec_spd_entry_add_del
{
- u32 spd_id;
- i32 priority;
- bool is_outbound;
-
- u32 sa_id;
- vl_api_ipsec_spd_action_t policy;
- /* Which protocol?? */
- u8 protocol;
-
- // Selector
- vl_api_address_t remote_address_start;
- vl_api_address_t remote_address_stop;
- vl_api_address_t local_address_start;
- vl_api_address_t local_address_stop;
-
- u16 remote_port_start;
- u16 remote_port_stop;
- u16 local_port_start;
- u16 local_port_stop;
+ option deprecated;
+ u32 client_index;
+ u32 context;
+ bool is_add;
+ vl_api_ipsec_spd_entry_t entry;
};
-/** \brief IPsec: Add/delete Security Policy Database entry
+/** \brief IPsec: Add/delete Security Policy Database entry v2
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param is_add - add SPD if non-zero, else delete
@param entry - Description of the entry to add/dell
*/
-define ipsec_spd_entry_add_del
+define ipsec_spd_entry_add_del_v2
{
u32 client_index;
u32 context;
bool is_add;
- vl_api_ipsec_spd_entry_t entry;
+ vl_api_ipsec_spd_entry_v2_t entry;
};
/** \brief IPsec: Reply Add/delete Security Policy Database entry
@@ -135,6 +96,20 @@ define ipsec_spd_entry_add_del
*/
define ipsec_spd_entry_add_del_reply
{
+ option deprecated;
+ u32 context;
+ i32 retval;
+ u32 stat_index;
+};
+
+/** \brief IPsec: Reply Add/delete Security Policy Database entry v2
+
+ @param context - sender context, to match reply w/ request
+ @param retval - success/fail rutrun code
+ @param stat_index - An index for the policy in the stats segment @ /net/ipec/policy
+*/
+define ipsec_spd_entry_add_del_v2_reply
+{
u32 context;
i32 retval;
u32 stat_index;
@@ -192,18 +167,23 @@ define ipsec_spd_details {
define ipsec_sad_entry_add_del
{
option deprecated;
+
u32 client_index;
u32 context;
bool is_add;
vl_api_ipsec_sad_entry_t entry;
};
+
define ipsec_sad_entry_add_del_v2
{
+ option deprecated;
+
u32 client_index;
u32 context;
bool is_add;
vl_api_ipsec_sad_entry_v2_t entry;
};
+
define ipsec_sad_entry_add_del_v3
{
u32 client_index;
@@ -211,12 +191,21 @@ define ipsec_sad_entry_add_del_v3
bool is_add;
vl_api_ipsec_sad_entry_v3_t entry;
};
+
define ipsec_sad_entry_add
{
u32 client_index;
u32 context;
vl_api_ipsec_sad_entry_v3_t entry;
};
+
+define ipsec_sad_entry_add_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_ipsec_sad_entry_v4_t entry;
+};
+
autoreply define ipsec_sad_entry_del
{
u32 client_index;
@@ -224,25 +213,76 @@ autoreply define ipsec_sad_entry_del
u32 id;
};
+
+/** \brief An API to bind an SAD entry to a specific worker
+
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sa_id - the id of the SA to bind
+ @param worker - the worker's index to which the SA will be bound to
+ */
+autoreply define ipsec_sad_bind
+{
+ u32 client_index;
+ u32 context;
+ u32 sa_id;
+ u32 worker;
+};
+
+autoreply define ipsec_sad_unbind
+{
+ u32 client_index;
+ u32 context;
+ u32 sa_id;
+};
+
+/** \brief An API to update the tunnel parameters and the ports associated with an SA
+
+ Used in the NAT-T case when the NAT data changes
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param sa_id - the id of the SA to update
+ @param is_tun - update the tunnel if non-zero, else update only the ports
+ @param tunnel - sender context, to match reply w/ request
+ @param udp_src_port - new src port for NAT-T. Used if different from 0xffff
+ @param udp_dst_port - new dst port for NAT-T. Used if different from 0xffff
+ */
+autoreply define ipsec_sad_entry_update
+{
+ u32 client_index;
+ u32 context;
+ u32 sad_id;
+ bool is_tun;
+ vl_api_tunnel_t tunnel;
+ u16 udp_src_port [default=0xffff];
+ u16 udp_dst_port [default=0xffff];
+};
+
define ipsec_sad_entry_add_del_reply
{
option deprecated;
+
u32 context;
i32 retval;
u32 stat_index;
};
+
define ipsec_sad_entry_add_del_v2_reply
{
+ option deprecated;
+
u32 context;
i32 retval;
u32 stat_index;
};
+
define ipsec_sad_entry_add_del_v3_reply
{
u32 context;
i32 retval;
u32 stat_index;
};
+
define ipsec_sad_entry_add_reply
{
u32 context;
@@ -250,6 +290,13 @@ define ipsec_sad_entry_add_reply
u32 stat_index;
};
+define ipsec_sad_entry_add_v2_reply
+{
+ u32 context;
+ i32 retval;
+ u32 stat_index;
+};
+
/** \brief Add or Update Protection for a tunnel with IPSEC
Tunnel protection directly associates an SA with all packets
@@ -413,12 +460,15 @@ define ipsec_itf_details
define ipsec_sa_dump
{
option deprecated;
+
u32 client_index;
u32 context;
u32 sa_id;
};
define ipsec_sa_v2_dump
{
+ option deprecated;
+
u32 client_index;
u32 context;
u32 sa_id;
@@ -429,6 +479,18 @@ define ipsec_sa_v3_dump
u32 context;
u32 sa_id;
};
+define ipsec_sa_v4_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sa_id;
+};
+define ipsec_sa_v5_dump
+{
+ u32 client_index;
+ u32 context;
+ u32 sa_id;
+};
/** \brief IPsec security association database response
@param context - sender context which was passed in the request
@@ -444,6 +506,7 @@ define ipsec_sa_v3_dump
*/
define ipsec_sa_details {
option deprecated;
+
u32 context;
vl_api_ipsec_sad_entry_t entry;
@@ -456,6 +519,8 @@ define ipsec_sa_details {
u32 stat_index;
};
define ipsec_sa_v2_details {
+ option deprecated;
+
u32 context;
vl_api_ipsec_sad_entry_v2_t entry;
@@ -478,6 +543,28 @@ define ipsec_sa_v3_details {
u32 stat_index;
};
+define ipsec_sa_v4_details {
+ u32 context;
+ vl_api_ipsec_sad_entry_v3_t entry;
+
+ vl_api_interface_index_t sw_if_index;
+ u64 seq_outbound;
+ u64 last_seq_inbound;
+ u64 replay_window;
+ u32 thread_index;
+ u32 stat_index;
+};
+define ipsec_sa_v5_details {
+ u32 context;
+ vl_api_ipsec_sad_entry_v4_t entry;
+
+ vl_api_interface_index_t sw_if_index;
+ u64 seq_outbound;
+ u64 last_seq_inbound;
+ u64 replay_window;
+ u32 thread_index;
+ u32 stat_index;
+};
/** \brief Dump IPsec backends
@param client_index - opaque cookie to identify the sender
@@ -527,6 +614,286 @@ autoreply define ipsec_set_async_mode {
bool async_enable;
};
+counters esp_decrypt {
+ rx_pkts {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ESP pkts received";
+ };
+ rx_post_pkts {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ESP-POST pkts received";
+ };
+ handoff {
+ severity info;
+ type counter64;
+ units "packets";
+ description "hand-off";
+ };
+ decryption_failed {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ESP decryption failed";
+ };
+ integ_error {
+ severity error;
+ type counter64;
+ units "packets";
+ description "integrity check failed";
+ };
+ crypto_engine_error {
+ severity error;
+ type counter64;
+ units "packets";
+ description "crypto engine error (packet dropped)";
+ };
+ replay {
+ severity error;
+ type counter64;
+ units "packets";
+ description "SA replayed packet";
+ };
+ runt {
+ severity error;
+ type counter64;
+ units "packets";
+ description "undersized packet";
+ };
+ no_buffers {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no buffers (packet dropped)";
+ };
+ oversized_header {
+ severity error;
+ type counter64;
+ units "packets";
+ description "buffer with oversized header (dropped)";
+ };
+ no_tail_space {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no enough buffer tail space (dropped)";
+ };
+ tun_no_proto {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no tunnel protocol";
+ };
+ unsup_payload {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unsupported payload";
+ };
+ no_avail_frame {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no available frame (packet dropped)";
+ };
+};
+
+counters esp_encrypt {
+ rx_pkts {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ESP pkts received";
+ };
+ post_rx_pkts {
+ severity info;
+ type counter64;
+ units "packets";
+ description "ESP-post pkts received";
+ };
+ handoff {
+ severity info;
+ type counter64;
+ units "packets";
+ description "Hand-off";
+ };
+ seq_cycled {
+ severity error;
+ type counter64;
+ units "packets";
+ description "sequence number cycled (packet dropped)";
+ };
+ crypto_engine_error {
+ severity error;
+ type counter64;
+ units "packets";
+ description "crypto engine error (packet dropped)";
+ };
+ crypto_queue_full {
+ severity error;
+ type counter64;
+ units "packets";
+ description "crypto queue full (packet dropped)";
+ };
+ no_buffers {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no buffers (packet dropped)";
+ };
+ no_protection {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no protecting SA (packet dropped)";
+ };
+ no_encryption {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no Encrypting SA (packet dropped)";
+ };
+ no_avail_frame {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no available frame (packet dropped)";
+ };
+};
+
+counters ah_encrypt {
+ rx_pkts {
+ severity info;
+ type counter64;
+ units "packets";
+ description "AH pkts received";
+ };
+ crypto_engine_error {
+ severity error;
+ type counter64;
+ units "packets";
+ description "crypto engine error (packet dropped)";
+ };
+ seq_cycled {
+ severity error;
+ type counter64;
+ units "packets";
+ description "sequence number cycled (packet dropped)";
+ };
+};
+
+counters ah_decrypt {
+ rx_pkts {
+ severity info;
+ type counter64;
+ units "packets";
+ description "AH pkts received";
+ };
+ decryption_failed {
+ severity error;
+ type counter64;
+ units "packets";
+ description "AH decryption failed";
+ };
+ integ_error {
+ severity error;
+ type counter64;
+ units "packets";
+ description "Integrity check failed";
+ };
+ no_tail_space {
+ severity error;
+ type counter64;
+ units "packets";
+ description "not enough buffer tail space (dropped)";
+ };
+ drop_fragments {
+ severity error;
+ type counter64;
+ units "packets";
+ description "IP fragments drop";
+ };
+ replay {
+ severity error;
+ type counter64;
+ units "packets";
+ description "SA replayed packet";
+ };
+};
+
+counters ipsec_tun {
+ rx {
+ severity info;
+ type counter64;
+ units "packets";
+ description "good packets received";
+ };
+ disabled {
+ severity error;
+ type counter64;
+ units "packets";
+ description "ipsec packets received on disabled interface";
+ };
+ no_tunnel {
+ severity error;
+ type counter64;
+ units "packets";
+ description "no matching tunnel";
+ };
+ tunnel_mismatch {
+ severity error;
+ type counter64;
+ units "packets";
+ description "SPI-tunnel mismatch";
+ };
+ nat_keepalive {
+ severity info;
+ type counter64;
+ units "packets";
+ description "NAT Keepalive";
+ };
+ too_short {
+ severity error;
+ type counter64;
+ units "packets";
+ description "Too Short";
+ };
+ spi_0 {
+ severity info;
+ type counter64;
+ units "packets";
+ description "SPI 0";
+ };
+};
+
+paths {
+ "/err/esp4-encrypt" "esp_encrypt";
+ "/err/esp4-encrypt-post" "esp_encrypt";
+ "/err/esp4-encrypt-tun" "esp_encrypt";
+ "/err/esp4-encrypt-tun-post" "esp_encrypt";
+ "/err/esp6-encrypt" "esp_encrypt";
+ "/err/esp6-encrypt-post" "esp_encrypt";
+ "/err/esp6-encrypt-tun" "esp_encrypt";
+ "/err/esp6-encrypt-tun-post" "esp_encrypt";
+ "/err/esp-mpls-encrypt-tun" "esp_encrypt";
+ "/err/esp-mpls-encrypt-tun-post" "esp_encrypt";
+ "/err/esp4-decrypt" "esp_decrypt";
+ "/err/esp4-decrypt-post" "esp_decrypt";
+ "/err/esp4-decrypt-tun" "esp_decrypt";
+ "/err/esp4-decrypt-tun-post" "esp_decrypt";
+ "/err/esp6-decrypt" "esp_decrypt";
+ "/err/esp6-decrypt-post" "esp_decrypt";
+ "/err/esp6-decrypt-tun" "esp_decrypt";
+ "/err/esp6-decrypt-tun-post" "esp_decrypt";
+ "/err/ah4-encrypt" "ah_encrypt";
+ "/err/ah6-encrypt" "ah_encrypt";
+ "/err/ipsec4-tun-input" "ipsec_tun";
+ "/err/ipsec6-tun-input" "ipsec_tun";
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/ipsec/ipsec.c b/src/vnet/ipsec/ipsec.c
index 74713458b14..f8c39c327ed 100644
--- a/src/vnet/ipsec/ipsec.c
+++ b/src/vnet/ipsec/ipsec.c
@@ -24,11 +24,69 @@
#include <vnet/ipsec/ipsec.h>
#include <vnet/ipsec/esp.h>
#include <vnet/ipsec/ah.h>
+#include <vnet/ipsec/ipsec_tun.h>
+#include <vnet/ipsec/ipsec_itf.h>
+#include <vnet/ipsec/ipsec_spd_fp_lookup.h>
+
+/* Flow cache is sized for 1 million flows with a load factor of .25.
+ */
+#define IPSEC4_OUT_SPD_DEFAULT_HASH_NUM_BUCKETS (1 << 22)
+
+/* Flow cache is sized for 1 million flows with a load factor of .25.
+ */
+#define IPSEC4_SPD_DEFAULT_HASH_NUM_BUCKETS (1 << 22)
ipsec_main_t ipsec_main;
+
esp_async_post_next_t esp_encrypt_async_next;
esp_async_post_next_t esp_decrypt_async_next;
+clib_error_t *
+ipsec_register_next_header (vlib_main_t *vm, u8 next_header,
+ const char *next_node)
+{
+ ipsec_main_t *im = &ipsec_main;
+ const vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) next_node);
+ /* -post nodes (eg. esp4-decrypt-post) are siblings of non-post nodes (eg.
+ * esp4-decrypt) and will therefore have the same next index */
+ const vlib_node_t *esp_decrypt_nodes[] = {
+ vlib_get_node (vm, im->esp4_decrypt_node_index),
+ vlib_get_node (vm, im->esp6_decrypt_node_index),
+ vlib_get_node (vm, im->esp4_decrypt_tun_node_index),
+ vlib_get_node (vm, im->esp6_decrypt_tun_node_index),
+ };
+ uword slot, max;
+ int i;
+
+ /* looks for a next_index value that we can use for all esp decrypt nodes to
+ * avoid maintaining different next index arrays... */
+
+ slot = vlib_node_get_next (vm, esp_decrypt_nodes[0]->index, node->index);
+ max = vec_len (esp_decrypt_nodes[0]->next_nodes);
+ for (i = 1; i < ARRAY_LEN (esp_decrypt_nodes); i++)
+ {
+ /* if next node already exists, check it shares the same next_index */
+ if (slot !=
+ vlib_node_get_next (vm, esp_decrypt_nodes[i]->index, node->index))
+ return clib_error_return (
+ 0, "next node already exists with different next index");
+ /* compute a suitable slot from the max of all nodes next index */
+ max = clib_max (max, vec_len (esp_decrypt_nodes[i]->next_nodes));
+ }
+
+ if (~0 == slot)
+ {
+ /* next node not there yet, add it using the computed max */
+ slot = max;
+ for (i = 0; i < ARRAY_LEN (esp_decrypt_nodes); i++)
+ vlib_node_add_next_with_slot (vm, esp_decrypt_nodes[i]->index,
+ node->index, slot);
+ }
+
+ im->next_header_registrations[next_header] = slot;
+ return 0;
+}
+
static clib_error_t *
ipsec_check_ah_support (ipsec_sa_t * sa)
{
@@ -124,14 +182,24 @@ ipsec_add_node (vlib_main_t * vm, const char *node_name,
*out_next_index = vlib_node_add_next (vm, prev_node->index, node->index);
}
+static inline uword
+ipsec_udp_registration_key (u16 port, u8 is_ip4)
+{
+ uword key = (is_ip4) ? AF_IP4 : AF_IP6;
+
+ key |= (uword) (port << 16);
+ return key;
+}
+
void
-ipsec_unregister_udp_port (u16 port)
+ipsec_unregister_udp_port (u16 port, u8 is_ip4)
{
ipsec_main_t *im = &ipsec_main;
u32 n_regs;
- uword *p;
+ uword *p, key;
- p = hash_get (im->udp_port_registrations, port);
+ key = ipsec_udp_registration_key (port, is_ip4);
+ p = hash_get (im->udp_port_registrations, key);
ASSERT (p);
@@ -139,33 +207,35 @@ ipsec_unregister_udp_port (u16 port)
if (0 == --n_regs)
{
- udp_unregister_dst_port (vlib_get_main (), port, 1);
- hash_unset (im->udp_port_registrations, port);
+ udp_unregister_dst_port (vlib_get_main (), port, is_ip4);
+ hash_unset (im->udp_port_registrations, key);
}
else
{
- hash_unset (im->udp_port_registrations, port);
- hash_set (im->udp_port_registrations, port, n_regs);
+ hash_unset (im->udp_port_registrations, key);
+ hash_set (im->udp_port_registrations, key, n_regs);
}
}
void
-ipsec_register_udp_port (u16 port)
+ipsec_register_udp_port (u16 port, u8 is_ip4)
{
ipsec_main_t *im = &ipsec_main;
- u32 n_regs;
- uword *p;
+ u32 n_regs, node_index;
+ uword *p, key;
- p = hash_get (im->udp_port_registrations, port);
+ key = ipsec_udp_registration_key (port, is_ip4);
+ node_index =
+ (is_ip4) ? ipsec4_tun_input_node.index : ipsec6_tun_input_node.index;
+ p = hash_get (im->udp_port_registrations, key);
n_regs = (p ? p[0] : 0);
if (0 == n_regs++)
- udp_register_dst_port (vlib_get_main (), port,
- ipsec4_tun_input_node.index, 1);
+ udp_register_dst_port (vlib_get_main (), port, node_index, is_ip4);
- hash_unset (im->udp_port_registrations, port);
- hash_set (im->udp_port_registrations, port, n_regs);
+ hash_unset (im->udp_port_registrations, key);
+ hash_set (im->udp_port_registrations, key, n_regs);
}
u32
@@ -205,8 +275,7 @@ ipsec_register_esp_backend (
const char *esp6_decrypt_node_name, const char *esp6_decrypt_tun_node_name,
const char *esp_mpls_encrypt_node_tun_name,
check_support_cb_t esp_check_support_cb,
- add_del_sa_sess_cb_t esp_add_del_sa_sess_cb,
- enable_disable_cb_t enable_disable_cb)
+ add_del_sa_sess_cb_t esp_add_del_sa_sess_cb)
{
ipsec_esp_backend_t *b;
@@ -237,7 +306,6 @@ ipsec_register_esp_backend (
b->check_support_cb = esp_check_support_cb;
b->add_del_sa_sess_cb = esp_add_del_sa_sess_cb;
- b->enable_disable_cb = enable_disable_cb;
return b - im->esp_backends;
}
@@ -249,6 +317,9 @@ ipsec_rsc_in_use (ipsec_main_t * im)
if (pool_elts (ipsec_sa_pool) > 0)
return clib_error_return (0, "%d SA entries configured",
pool_elts (ipsec_sa_pool));
+ if (ipsec_itf_count () > 0)
+ return clib_error_return (0, "%d IPSec interface configured",
+ ipsec_itf_count ());
return (NULL);
}
@@ -285,18 +356,6 @@ ipsec_select_esp_backend (ipsec_main_t * im, u32 backend_idx)
if (pool_is_free_index (im->esp_backends, backend_idx))
return VNET_API_ERROR_INVALID_VALUE;
- /* disable current backend */
- if (im->esp_current_backend != ~0)
- {
- ipsec_esp_backend_t *cb = pool_elt_at_index (im->esp_backends,
- im->esp_current_backend);
- if (cb->enable_disable_cb)
- {
- if ((cb->enable_disable_cb) (0) != 0)
- return -1;
- }
- }
-
ipsec_esp_backend_t *b = pool_elt_at_index (im->esp_backends, backend_idx);
im->esp_current_backend = backend_idx;
im->esp4_encrypt_node_index = b->esp4_encrypt_node_index;
@@ -315,11 +374,6 @@ ipsec_select_esp_backend (ipsec_main_t * im, u32 backend_idx)
im->esp6_encrypt_tun_node_index = b->esp6_encrypt_tun_node_index;
im->esp_mpls_encrypt_tun_node_index = b->esp_mpls_encrypt_tun_node_index;
- if (b->enable_disable_cb)
- {
- if ((b->enable_disable_cb) (1) != 0)
- return -1;
- }
return 0;
}
@@ -329,16 +383,11 @@ ipsec_set_async_mode (u32 is_enabled)
ipsec_main_t *im = &ipsec_main;
ipsec_sa_t *sa;
- vnet_crypto_request_async_mode (is_enabled);
-
im->async_mode = is_enabled;
/* change SA crypto op data */
pool_foreach (sa, ipsec_sa_pool)
- {
- sa->crypto_op_data =
- (is_enabled ? sa->async_op_data.data : sa->sync_op_data.data);
- }
+ ipsec_sa_set_async_mode (sa, is_enabled);
}
static void
@@ -412,7 +461,7 @@ ipsec_init (vlib_main_t * vm)
vm, im, "crypto engine backend", "esp4-encrypt", "esp4-encrypt-tun",
"esp4-decrypt", "esp4-decrypt-tun", "esp6-encrypt", "esp6-encrypt-tun",
"esp6-decrypt", "esp6-decrypt-tun", "esp-mpls-encrypt-tun",
- ipsec_check_esp_support, NULL, crypto_dispatch_enable_disable);
+ ipsec_check_esp_support, NULL);
im->esp_default_backend = idx;
rv = ipsec_select_esp_backend (im, idx);
@@ -506,6 +555,37 @@ ipsec_init (vlib_main_t * vm)
a->block_align = 1;
a->icv_size = 16;
+ a = im->crypto_algs + IPSEC_CRYPTO_ALG_CHACHA20_POLY1305;
+ a->enc_op_id = VNET_CRYPTO_OP_CHACHA20_POLY1305_ENC;
+ a->dec_op_id = VNET_CRYPTO_OP_CHACHA20_POLY1305_DEC;
+ a->alg = VNET_CRYPTO_ALG_CHACHA20_POLY1305;
+ a->iv_size = 8;
+ a->icv_size = 16;
+
+ a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_NULL_GMAC_128;
+ a->enc_op_id = VNET_CRYPTO_OP_AES_128_NULL_GMAC_ENC;
+ a->dec_op_id = VNET_CRYPTO_OP_AES_128_NULL_GMAC_DEC;
+ a->alg = VNET_CRYPTO_ALG_AES_128_GCM;
+ a->iv_size = 8;
+ a->block_align = 1;
+ a->icv_size = 16;
+
+ a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_NULL_GMAC_192;
+ a->enc_op_id = VNET_CRYPTO_OP_AES_192_NULL_GMAC_ENC;
+ a->dec_op_id = VNET_CRYPTO_OP_AES_192_NULL_GMAC_DEC;
+ a->alg = VNET_CRYPTO_ALG_AES_192_GCM;
+ a->iv_size = 8;
+ a->block_align = 1;
+ a->icv_size = 16;
+
+ a = im->crypto_algs + IPSEC_CRYPTO_ALG_AES_NULL_GMAC_256;
+ a->enc_op_id = VNET_CRYPTO_OP_AES_256_NULL_GMAC_ENC;
+ a->dec_op_id = VNET_CRYPTO_OP_AES_256_NULL_GMAC_DEC;
+ a->alg = VNET_CRYPTO_ALG_AES_256_GCM;
+ a->iv_size = 8;
+ a->block_align = 1;
+ a->icv_size = 16;
+
vec_validate (im->integ_algs, IPSEC_INTEG_N_ALG - 1);
ipsec_main_integ_alg_t *i;
@@ -544,11 +624,168 @@ ipsec_init (vlib_main_t * vm)
im->async_mode = 0;
crypto_engine_backend_register_post_node (vm);
+ im->ipsec4_out_spd_hash_tbl = NULL;
+ im->output_flow_cache_flag = 0;
+ im->ipsec4_out_spd_flow_cache_entries = 0;
+ im->epoch_count = 0;
+ im->ipsec4_out_spd_hash_num_buckets =
+ IPSEC4_OUT_SPD_DEFAULT_HASH_NUM_BUCKETS;
+
+ im->ipsec4_in_spd_hash_tbl = NULL;
+ im->input_flow_cache_flag = 0;
+ im->ipsec4_in_spd_flow_cache_entries = 0;
+ im->input_epoch_count = 0;
+ im->ipsec4_in_spd_hash_num_buckets = IPSEC4_SPD_DEFAULT_HASH_NUM_BUCKETS;
+
+ vec_validate_init_empty_aligned (im->next_header_registrations, 255, ~0,
+ CLIB_CACHE_LINE_BYTES);
+
+ im->fp_spd_ipv4_out_is_enabled = 0;
+ im->fp_spd_ipv6_out_is_enabled = 0;
+ im->fp_spd_ipv4_in_is_enabled = 0;
+ im->fp_spd_ipv6_in_is_enabled = 0;
+
+ im->fp_lookup_hash_buckets = IPSEC_FP_HASH_LOOKUP_HASH_BUCKETS;
+
return 0;
}
VLIB_INIT_FUNCTION (ipsec_init);
+static clib_error_t *
+ipsec_config (vlib_main_t *vm, unformat_input_t *input)
+{
+ ipsec_main_t *im = &ipsec_main;
+ unformat_input_t sub_input;
+
+ u32 ipsec4_out_spd_hash_num_buckets;
+ u32 ipsec4_in_spd_hash_num_buckets;
+ u32 ipsec_spd_fp_num_buckets;
+ bool fp_spd_ip4_enabled = false;
+ bool fp_spd_ip6_enabled = false;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "ipv6-outbound-spd-fast-path on"))
+ {
+ im->fp_spd_ipv6_out_is_enabled = 1;
+ fp_spd_ip6_enabled = true;
+ }
+ else if (unformat (input, "ipv6-outbound-spd-fast-path off"))
+ im->fp_spd_ipv6_out_is_enabled = 0;
+ else if (unformat (input, "ipv4-outbound-spd-fast-path on"))
+ {
+ im->fp_spd_ipv4_out_is_enabled = 1;
+ im->output_flow_cache_flag = 0;
+ fp_spd_ip4_enabled = true;
+ }
+ else if (unformat (input, "ipv4-outbound-spd-fast-path off"))
+ im->fp_spd_ipv4_out_is_enabled = 0;
+ else if (unformat (input, "ipv6-inbound-spd-fast-path on"))
+ {
+ im->fp_spd_ipv6_in_is_enabled = 1;
+ fp_spd_ip6_enabled = true;
+ }
+ else if (unformat (input, "ipv6-inbound-spd-fast-path off"))
+ im->fp_spd_ipv6_in_is_enabled = 0;
+ else if (unformat (input, "ipv4-inbound-spd-fast-path on"))
+ {
+ im->fp_spd_ipv4_in_is_enabled = 1;
+ im->input_flow_cache_flag = 0;
+ fp_spd_ip4_enabled = true;
+ }
+ else if (unformat (input, "ipv4-inbound-spd-fast-path off"))
+ im->fp_spd_ipv4_in_is_enabled = 0;
+ else if (unformat (input, "spd-fast-path-num-buckets %d",
+ &ipsec_spd_fp_num_buckets))
+ {
+ /* Number of bihash buckets is power of 2 >= input */
+ im->fp_lookup_hash_buckets = 1ULL
+ << max_log2 (ipsec_spd_fp_num_buckets);
+ }
+ else if (unformat (input, "ipv4-outbound-spd-flow-cache on"))
+ im->output_flow_cache_flag = im->fp_spd_ipv4_out_is_enabled ? 0 : 1;
+ else if (unformat (input, "ipv4-outbound-spd-flow-cache off"))
+ im->output_flow_cache_flag = 0;
+ else if (unformat (input, "ipv4-outbound-spd-hash-buckets %d",
+ &ipsec4_out_spd_hash_num_buckets))
+ {
+ /* Size of hash is power of 2 >= number of buckets */
+ im->ipsec4_out_spd_hash_num_buckets =
+ 1ULL << max_log2 (ipsec4_out_spd_hash_num_buckets);
+ }
+ else if (unformat (input, "ipv4-inbound-spd-flow-cache on"))
+ im->input_flow_cache_flag = im->fp_spd_ipv4_in_is_enabled ? 0 : 1;
+ else if (unformat (input, "ipv4-inbound-spd-flow-cache off"))
+ im->input_flow_cache_flag = 0;
+ else if (unformat (input, "ipv4-inbound-spd-hash-buckets %d",
+ &ipsec4_in_spd_hash_num_buckets))
+ {
+ im->ipsec4_in_spd_hash_num_buckets =
+ 1ULL << max_log2 (ipsec4_in_spd_hash_num_buckets);
+ }
+ else if (unformat (input, "ip4 %U", unformat_vlib_cli_sub_input,
+ &sub_input))
+ {
+ uword table_size = ~0;
+ u32 n_buckets = ~0;
+
+ while (unformat_check_input (&sub_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (&sub_input, "num-buckets %u", &n_buckets))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, &sub_input);
+ }
+
+ ipsec_tun_table_init (AF_IP4, table_size, n_buckets);
+ }
+ else if (unformat (input, "ip6 %U", unformat_vlib_cli_sub_input,
+ &sub_input))
+ {
+ uword table_size = ~0;
+ u32 n_buckets = ~0;
+
+ while (unformat_check_input (&sub_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (&sub_input, "num-buckets %u", &n_buckets))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, &sub_input);
+ }
+
+ ipsec_tun_table_init (AF_IP6, table_size, n_buckets);
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+ if (im->output_flow_cache_flag)
+ {
+ vec_add2 (im->ipsec4_out_spd_hash_tbl, im->ipsec4_out_spd_hash_tbl,
+ im->ipsec4_out_spd_hash_num_buckets);
+ }
+ if (im->input_flow_cache_flag)
+ {
+ vec_add2 (im->ipsec4_in_spd_hash_tbl, im->ipsec4_in_spd_hash_tbl,
+ im->ipsec4_in_spd_hash_num_buckets);
+ }
+
+ if (fp_spd_ip4_enabled)
+ pool_alloc_aligned (im->fp_ip4_lookup_hashes_pool,
+ IPSEC_FP_IP4_HASHES_POOL_SIZE, CLIB_CACHE_LINE_BYTES);
+
+ if (fp_spd_ip6_enabled)
+ pool_alloc_aligned (im->fp_ip6_lookup_hashes_pool,
+ IPSEC_FP_IP6_HASHES_POOL_SIZE, CLIB_CACHE_LINE_BYTES);
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (ipsec_config, "ipsec");
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/ipsec/ipsec.h b/src/vnet/ipsec/ipsec.h
index 0245c5575e4..4aa09d7560e 100644
--- a/src/vnet/ipsec/ipsec.h
+++ b/src/vnet/ipsec/ipsec.h
@@ -30,12 +30,47 @@
#include <vppinfra/bihash_24_16.h>
+#define IPSEC_FP_IP4_HASHES_POOL_SIZE 128
+#define IPSEC_FP_IP6_HASHES_POOL_SIZE 128
+
typedef clib_error_t *(*add_del_sa_sess_cb_t) (u32 sa_index, u8 is_add);
typedef clib_error_t *(*check_support_cb_t) (ipsec_sa_t * sa);
typedef clib_error_t *(*enable_disable_cb_t) (int is_enable);
typedef struct
{
+ u64 key[2]; // 16 bytes
+ u64 value;
+ i32 bucket_lock;
+ u32 un_used;
+} ipsec4_hash_kv_16_8_t;
+
+typedef union
+{
+ struct
+ {
+ ip4_address_t ip4_addr[2];
+ u16 port[2];
+ u8 proto;
+ u8 pad[3];
+ };
+ ipsec4_hash_kv_16_8_t kv_16_8;
+} ipsec4_spd_5tuple_t;
+
+typedef union
+{
+ struct
+ {
+ ip4_address_t ip4_src_addr;
+ ip4_address_t ip4_dest_addr;
+ ipsec_spd_policy_type_t policy_type;
+ u8 pad[4];
+ }; // 16 bytes total
+ ipsec4_hash_kv_16_8_t kv_16_8;
+} ipsec4_inbound_spd_tuple_t;
+
+typedef struct
+{
u8 *name;
/* add/del callback */
add_del_sa_sess_cb_t add_del_sa_sess_cb;
@@ -58,8 +93,6 @@ typedef struct
add_del_sa_sess_cb_t add_del_sa_sess_cb;
/* check support function */
check_support_cb_t check_support_cb;
- /* enable or disable function */
- enable_disable_cb_t enable_disable_cb;
u32 esp4_encrypt_node_index;
u32 esp4_decrypt_node_index;
u32 esp4_encrypt_next_index;
@@ -111,12 +144,27 @@ typedef struct
ipsec_spd_t *spds;
/* pool of policies */
ipsec_policy_t *policies;
+ /* pool of bihash tables for ipv4 ipsec rules */
+ clib_bihash_16_8_t *fp_ip4_lookup_hashes_pool;
+ /* pool of bihash tables for ipv6 ipsec rules */
+ clib_bihash_40_8_t *fp_ip6_lookup_hashes_pool;
+
+ u32 fp_spd_ipv4_out_is_enabled;
+ u32 fp_spd_ipv4_in_is_enabled;
+ u32 fp_spd_ipv6_out_is_enabled;
+ u32 fp_spd_ipv6_in_is_enabled;
+ /* pool of fast path mask types */
+ ipsec_fp_mask_type_entry_t *fp_mask_types;
+ u32 fp_lookup_hash_buckets; /* number of buckets should be power of two */
/* hash tables of UDP port registrations */
uword *udp_port_registrations;
uword *tunnel_index_by_key;
+ /* next_header protocol registration */
+ u16 *next_header_registrations;
+
/* convenience */
vlib_main_t *vlib_main;
vnet_main_t *vnet_main;
@@ -130,6 +178,8 @@ typedef struct
uword *ipsec_if_real_dev_by_show_dev;
uword *ipsec_if_by_sw_if_index;
+ ipsec4_hash_kv_16_8_t *ipsec4_out_spd_hash_tbl;
+ ipsec4_hash_kv_16_8_t *ipsec4_in_spd_hash_tbl;
clib_bihash_8_16_t tun4_protect_by_key;
clib_bihash_24_16_t tun6_protect_by_key;
@@ -160,14 +210,6 @@ typedef struct
u32 ah6_encrypt_next_index;
u32 ah6_decrypt_next_index;
- /* tun nodes to drop packets when no crypto alg set on outbound SA */
- u32 esp4_no_crypto_tun_node_index;
- u32 esp6_no_crypto_tun_node_index;
-
- /* tun nodes for encrypt on L2 interfaces */
- u32 esp4_encrypt_l2_tun_node_index;
- u32 esp6_encrypt_l2_tun_node_index;
-
/* pool of ah backends */
ipsec_ah_backend_t *ah_backends;
/* pool of esp backends */
@@ -206,6 +248,17 @@ typedef struct
u32 esp4_dec_tun_fq_index;
u32 esp6_dec_tun_fq_index;
+ /* Number of buckets for flow cache */
+ u32 ipsec4_out_spd_hash_num_buckets;
+ u32 ipsec4_out_spd_flow_cache_entries;
+ u32 epoch_count;
+ u8 output_flow_cache_flag;
+
+ u32 ipsec4_in_spd_hash_num_buckets;
+ u32 ipsec4_in_spd_flow_cache_entries;
+ u32 input_epoch_count;
+ u8 input_flow_cache_flag;
+
u8 async_mode;
u16 msg_id_base;
} ipsec_main_t;
@@ -247,6 +300,68 @@ get_next_output_feature_node_index (vlib_buffer_t * b,
return node->next_nodes[next];
}
+static_always_inline u64
+ipsec4_hash_16_8 (ipsec4_hash_kv_16_8_t *v)
+{
+#ifdef clib_crc32c_uses_intrinsics
+ return clib_crc32c ((u8 *) v->key, 16);
+#else
+ u64 tmp = v->key[0] ^ v->key[1];
+ return clib_xxhash (tmp);
+#endif
+}
+
+static_always_inline int
+ipsec4_hash_key_compare_16_8 (u64 *a, u64 *b)
+{
+#if defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
+ u64x2 v;
+ v = u64x2_load_unaligned (a) ^ u64x2_load_unaligned (b);
+ return u64x2_is_all_zero (v);
+#else
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1])) == 0;
+#endif
+}
+
+/* clib_spinlock_lock is not used to save another memory indirection */
+static_always_inline void
+ipsec_spinlock_lock (i32 *lock)
+{
+ i32 free = 0;
+ while (!clib_atomic_cmp_and_swap_acq_relax_n (lock, &free, 1, 0))
+ {
+ /* atomic load limits number of compare_exchange executions */
+ while (clib_atomic_load_relax_n (lock))
+ CLIB_PAUSE ();
+ /* on failure, compare_exchange writes lock into free */
+ free = 0;
+ }
+}
+
+static_always_inline void
+ipsec_spinlock_unlock (i32 *lock)
+{
+ /* Make sure all reads/writes are complete before releasing the lock */
+ clib_atomic_release (lock);
+}
+
+/* Special case to drop or hand off packets for sync/async modes.
+ *
+ * Different than sync mode, async mode only enqueue drop or hand-off packets
+ * to next nodes.
+ */
+always_inline void
+ipsec_set_next_index (vlib_buffer_t *b, vlib_node_runtime_t *node,
+ u32 thread_index, u32 err, u32 ipsec_sa_err, u16 index,
+ u16 *nexts, u16 drop_next, u32 sa_index)
+{
+ nexts[index] = drop_next;
+ b->error = node->errors[err];
+ if (PREDICT_TRUE (ipsec_sa_err != ~0))
+ vlib_increment_simple_counter (&ipsec_sa_err_counters[ipsec_sa_err],
+ thread_index, sa_index, 1);
+}
+
u32 ipsec_register_ah_backend (vlib_main_t * vm, ipsec_main_t * im,
const char *name,
const char *ah4_encrypt_node_name,
@@ -264,8 +379,7 @@ u32 ipsec_register_esp_backend (
const char *esp6_decrypt_node_name, const char *esp6_decrypt_tun_node_name,
const char *esp_mpls_encrypt_tun_node_name,
check_support_cb_t esp_check_support_cb,
- add_del_sa_sess_cb_t esp_add_del_sa_sess_cb,
- enable_disable_cb_t enable_disable_cb);
+ add_del_sa_sess_cb_t esp_add_del_sa_sess_cb);
int ipsec_select_ah_backend (ipsec_main_t * im, u32 ah_backend_idx);
int ipsec_select_esp_backend (ipsec_main_t * im, u32 esp_backend_idx);
@@ -273,8 +387,12 @@ int ipsec_select_esp_backend (ipsec_main_t * im, u32 esp_backend_idx);
clib_error_t *ipsec_rsc_in_use (ipsec_main_t * im);
void ipsec_set_async_mode (u32 is_enabled);
-extern void ipsec_register_udp_port (u16 udp_port);
-extern void ipsec_unregister_udp_port (u16 udp_port);
+extern void ipsec_register_udp_port (u16 udp_port, u8 is_ip4);
+extern void ipsec_unregister_udp_port (u16 udp_port, u8 is_ip4);
+
+extern clib_error_t *ipsec_register_next_header (vlib_main_t *vm,
+ u8 next_header,
+ const char *next_node);
#endif /* __IPSEC_H__ */
diff --git a/src/vnet/ipsec/ipsec.rst b/src/vnet/ipsec/ipsec.rst
index d7e02740fc3..933d0852a07 100644
--- a/src/vnet/ipsec/ipsec.rst
+++ b/src/vnet/ipsec/ipsec.rst
@@ -2,8 +2,8 @@
.. toctree::
-IP Security
-===========
+IPSec (IP Security)
+===================
This is not a description on how IPSec works. Please read:
diff --git a/src/vnet/ipsec/ipsec_api.c b/src/vnet/ipsec/ipsec_api.c
index 11bfa41b4f1..21216b1a614 100644
--- a/src/vnet/ipsec/ipsec_api.c
+++ b/src/vnet/ipsec/ipsec_api.c
@@ -124,6 +124,7 @@ typedef struct ipsec_dump_walk_ctx_t_
{
vl_api_registration_t *reg;
u32 context;
+ u32 sw_if_index;
} ipsec_dump_walk_ctx_t;
static walk_rc_t
@@ -149,12 +150,10 @@ send_ipsec_tunnel_protect_details (index_t itpi, void *arg)
sa = ipsec_sa_get (itp->itp_out_sa);
mp->tun.sa_out = htonl (sa->id);
mp->tun.n_sa_in = itp->itp_n_sa_in;
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa,
({
mp->tun.sa_in[ii++] = htonl (sa->id);
}));
- /* *INDENT-ON* */
vl_api_send_msg (ctx->reg, (u8 *) mp);
@@ -232,7 +231,8 @@ static void vl_api_ipsec_spd_entry_add_del_t_handler
p.is_ipv6 = (itype == IP46_TYPE_IP6);
- p.protocol = mp->entry.protocol;
+ p.protocol =
+ mp->entry.protocol ? mp->entry.protocol : IPSEC_POLICY_PROTOCOL_ANY;
p.rport.start = ntohs (mp->entry.remote_port_start);
p.rport.stop = ntohs (mp->entry.remote_port_stop);
p.lport.start = ntohs (mp->entry.local_port_start);
@@ -262,12 +262,69 @@ static void vl_api_ipsec_spd_entry_add_del_t_handler
goto out;
out:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IPSEC_SPD_ENTRY_ADD_DEL_REPLY,
({
rmp->stat_index = ntohl(stat_index);
}));
- /* *INDENT-ON* */
+}
+
+static void
+vl_api_ipsec_spd_entry_add_del_v2_t_handler (
+ vl_api_ipsec_spd_entry_add_del_v2_t *mp)
+{
+ vlib_main_t *vm __attribute__ ((unused)) = vlib_get_main ();
+ vl_api_ipsec_spd_entry_add_del_reply_t *rmp;
+ ip46_type_t itype;
+ u32 stat_index;
+ int rv;
+
+ stat_index = ~0;
+
+ ipsec_policy_t p;
+
+ clib_memset (&p, 0, sizeof (p));
+
+ p.id = ntohl (mp->entry.spd_id);
+ p.priority = ntohl (mp->entry.priority);
+
+ itype = ip_address_decode (&mp->entry.remote_address_start, &p.raddr.start);
+ ip_address_decode (&mp->entry.remote_address_stop, &p.raddr.stop);
+ ip_address_decode (&mp->entry.local_address_start, &p.laddr.start);
+ ip_address_decode (&mp->entry.local_address_stop, &p.laddr.stop);
+
+ p.is_ipv6 = (itype == IP46_TYPE_IP6);
+
+ p.protocol = mp->entry.protocol;
+ p.rport.start = ntohs (mp->entry.remote_port_start);
+ p.rport.stop = ntohs (mp->entry.remote_port_stop);
+ p.lport.start = ntohs (mp->entry.local_port_start);
+ p.lport.stop = ntohs (mp->entry.local_port_stop);
+
+ rv = ipsec_spd_action_decode (mp->entry.policy, &p.policy);
+
+ if (rv)
+ goto out;
+
+ /* policy action resolve unsupported */
+ if (p.policy == IPSEC_POLICY_ACTION_RESOLVE)
+ {
+ clib_warning ("unsupported action: 'resolve'");
+ rv = VNET_API_ERROR_UNIMPLEMENTED;
+ goto out;
+ }
+ p.sa_id = ntohl (mp->entry.sa_id);
+ rv =
+ ipsec_policy_mk_type (mp->entry.is_outbound, p.is_ipv6, p.policy, &p.type);
+ if (rv)
+ goto out;
+
+ rv = ipsec_add_del_policy (vm, &p, mp->is_add, &stat_index);
+ if (rv)
+ goto out;
+
+out:
+ REPLY_MACRO2 (VL_API_IPSEC_SPD_ENTRY_ADD_DEL_V2_REPLY,
+ ({ rmp->stat_index = ntohl (stat_index); }));
}
static void vl_api_ipsec_sad_entry_add_del_t_handler
@@ -321,18 +378,16 @@ static void vl_api_ipsec_sad_entry_add_del_t_handler
ip_address_decode2 (&mp->entry.tunnel_src, &tun.t_src);
ip_address_decode2 (&mp->entry.tunnel_dst, &tun.t_dst);
- rv = ipsec_sa_add_and_lock (id, spi, proto, crypto_alg, &crypto_key,
- integ_alg, &integ_key, flags, mp->entry.salt,
- htons (mp->entry.udp_src_port),
- htons (mp->entry.udp_dst_port), &tun, &sa_index);
+ rv = ipsec_sa_add_and_lock (
+ id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags,
+ mp->entry.salt, htons (mp->entry.udp_src_port),
+ htons (mp->entry.udp_dst_port), 0, &tun, &sa_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IPSEC_SAD_ENTRY_ADD_DEL_REPLY,
{
rmp->stat_index = htonl (sa_index);
});
- /* *INDENT-ON* */
}
static void vl_api_ipsec_sad_entry_add_del_v2_t_handler
@@ -395,18 +450,16 @@ static void vl_api_ipsec_sad_entry_add_del_v2_t_handler
ip_address_decode2 (&mp->entry.tunnel_src, &tun.t_src);
ip_address_decode2 (&mp->entry.tunnel_dst, &tun.t_dst);
- rv = ipsec_sa_add_and_lock (
- id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags,
- mp->entry.salt, htons (mp->entry.udp_src_port),
- htons (mp->entry.udp_dst_port), &tun, &sa_index);
+ rv = ipsec_sa_add_and_lock (
+ id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags,
+ mp->entry.salt, htons (mp->entry.udp_src_port),
+ htons (mp->entry.udp_dst_port), 0, &tun, &sa_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IPSEC_SAD_ENTRY_ADD_DEL_V2_REPLY,
{
rmp->stat_index = htonl (sa_index);
});
- /* *INDENT-ON* */
}
static int
@@ -419,7 +472,7 @@ ipsec_sad_entry_add_v3 (const vl_api_ipsec_sad_entry_v3_t *entry,
ipsec_protocol_t proto;
ipsec_sa_flags_t flags;
u32 id, spi;
- tunnel_t tun;
+ tunnel_t tun = { 0 };
int rv;
id = ntohl (entry->sad_id);
@@ -453,10 +506,10 @@ ipsec_sad_entry_add_v3 (const vl_api_ipsec_sad_entry_v3_t *entry,
ipsec_key_decode (&entry->crypto_key, &crypto_key);
ipsec_key_decode (&entry->integrity_key, &integ_key);
- return ipsec_sa_add_and_lock (id, spi, proto, crypto_alg, &crypto_key,
- integ_alg, &integ_key, flags, entry->salt,
- htons (entry->udp_src_port),
- htons (entry->udp_dst_port), &tun, sa_index);
+ return ipsec_sa_add_and_lock (
+ id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags,
+ entry->salt, htons (entry->udp_src_port), htons (entry->udp_dst_port), 0,
+ &tun, sa_index);
}
static void
@@ -482,6 +535,56 @@ vl_api_ipsec_sad_entry_add_del_v3_t_handler (
{ rmp->stat_index = htonl (sa_index); });
}
+static int
+ipsec_sad_entry_add_v4 (const vl_api_ipsec_sad_entry_v4_t *entry,
+ u32 *sa_index)
+{
+ ipsec_key_t crypto_key, integ_key;
+ ipsec_crypto_alg_t crypto_alg;
+ ipsec_integ_alg_t integ_alg;
+ ipsec_protocol_t proto;
+ ipsec_sa_flags_t flags;
+ u32 id, spi;
+ tunnel_t tun = { 0 };
+ int rv;
+
+ id = ntohl (entry->sad_id);
+ spi = ntohl (entry->spi);
+
+ rv = ipsec_proto_decode (entry->protocol, &proto);
+
+ if (rv)
+ return rv;
+
+ rv = ipsec_crypto_algo_decode (entry->crypto_algorithm, &crypto_alg);
+
+ if (rv)
+ return rv;
+
+ rv = ipsec_integ_algo_decode (entry->integrity_algorithm, &integ_alg);
+
+ if (rv)
+ return rv;
+
+ flags = ipsec_sa_flags_decode (entry->flags);
+
+ if (flags & IPSEC_SA_FLAG_IS_TUNNEL)
+ {
+ rv = tunnel_decode (&entry->tunnel, &tun);
+
+ if (rv)
+ return rv;
+ }
+
+ ipsec_key_decode (&entry->crypto_key, &crypto_key);
+ ipsec_key_decode (&entry->integrity_key, &integ_key);
+
+ return ipsec_sa_add_and_lock (
+ id, spi, proto, crypto_alg, &crypto_key, integ_alg, &integ_key, flags,
+ entry->salt, htons (entry->udp_src_port), htons (entry->udp_dst_port),
+ ntohl (entry->anti_replay_window_size), &tun, sa_index);
+}
+
static void
vl_api_ipsec_sad_entry_del_t_handler (vl_api_ipsec_sad_entry_del_t *mp)
{
@@ -507,6 +610,74 @@ vl_api_ipsec_sad_entry_add_t_handler (vl_api_ipsec_sad_entry_add_t *mp)
}
static void
+vl_api_ipsec_sad_entry_add_v2_t_handler (vl_api_ipsec_sad_entry_add_v2_t *mp)
+{
+ vl_api_ipsec_sad_entry_add_reply_t *rmp;
+ u32 sa_index = ~0;
+ int rv;
+
+ rv = ipsec_sad_entry_add_v4 (&mp->entry, &sa_index);
+
+ REPLY_MACRO2 (VL_API_IPSEC_SAD_ENTRY_ADD_V2_REPLY,
+ { rmp->stat_index = htonl (sa_index); });
+}
+
+static void
+vl_api_ipsec_sad_entry_update_t_handler (vl_api_ipsec_sad_entry_update_t *mp)
+{
+ vl_api_ipsec_sad_entry_update_reply_t *rmp;
+ u32 id;
+ tunnel_t tun = { 0 };
+ int rv;
+
+ id = ntohl (mp->sad_id);
+
+ if (mp->is_tun)
+ {
+ rv = tunnel_decode (&mp->tunnel, &tun);
+
+ if (rv)
+ goto out;
+ }
+
+ rv = ipsec_sa_update (id, htons (mp->udp_src_port), htons (mp->udp_dst_port),
+ &tun, mp->is_tun);
+
+out:
+ REPLY_MACRO (VL_API_IPSEC_SAD_ENTRY_UPDATE_REPLY);
+}
+
+static void
+vl_api_ipsec_sad_bind_t_handler (vl_api_ipsec_sad_bind_t *mp)
+{
+ vl_api_ipsec_sad_bind_reply_t *rmp;
+ u32 sa_id;
+ u32 worker;
+ int rv;
+
+ sa_id = ntohl (mp->sa_id);
+ worker = ntohl (mp->worker);
+
+ rv = ipsec_sa_bind (sa_id, worker, true /* bind */);
+
+ REPLY_MACRO (VL_API_IPSEC_SAD_BIND_REPLY);
+}
+
+static void
+vl_api_ipsec_sad_unbind_t_handler (vl_api_ipsec_sad_unbind_t *mp)
+{
+ vl_api_ipsec_sad_unbind_reply_t *rmp;
+ u32 sa_id;
+ int rv;
+
+ sa_id = ntohl (mp->sa_id);
+
+ rv = ipsec_sa_bind (sa_id, ~0, false /* bind */);
+
+ REPLY_MACRO (VL_API_IPSEC_SAD_UNBIND_REPLY);
+}
+
+static void
send_ipsec_spds_details (ipsec_spd_t * spd, vl_api_registration_t * reg,
u32 context)
{
@@ -660,12 +831,10 @@ vl_api_ipsec_spd_interface_dump_t_handler (vl_api_ipsec_spd_interface_dump_t *
if (mp->spd_index_valid)
{
spd_index = ntohl (mp->spd_index);
- /* *INDENT-OFF* */
hash_foreach(k, v, im->spd_index_by_sw_if_index, ({
if (v == spd_index)
send_ipsec_spd_interface_details(reg, v, k, mp->context);
}));
- /* *INDENT-ON* */
}
else
{
@@ -688,12 +857,10 @@ vl_api_ipsec_itf_create_t_handler (vl_api_ipsec_itf_create_t * mp)
if (!rv)
rv = ipsec_itf_create (ntohl (mp->itf.user_instance), mode, &sw_if_index);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_IPSEC_ITF_CREATE_REPLY,
({
rmp->sw_if_index = htonl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -713,6 +880,9 @@ send_ipsec_itf_details (ipsec_itf_t *itf, void *arg)
ipsec_dump_walk_ctx_t *ctx = arg;
vl_api_ipsec_itf_details_t *mp;
+ if (~0 != ctx->sw_if_index && ctx->sw_if_index != itf->ii_sw_if_index)
+ return (WALK_CONTINUE);
+
mp = vl_msg_api_alloc (sizeof (*mp));
clib_memset (mp, 0, sizeof (*mp));
mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_IPSEC_ITF_DETAILS);
@@ -738,6 +908,7 @@ vl_api_ipsec_itf_dump_t_handler (vl_api_ipsec_itf_dump_t * mp)
ipsec_dump_walk_ctx_t ctx = {
.reg = reg,
.context = mp->context,
+ .sw_if_index = ntohl (mp->sw_if_index),
};
ipsec_itf_walk (send_ipsec_itf_details, &ctx);
@@ -833,7 +1004,10 @@ send_ipsec_sa_details (ipsec_sa_t * sa, void *arg)
mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
}
if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
- mp->replay_window = clib_host_to_net_u64 (sa->replay_window);
+ {
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
+ }
mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
@@ -920,7 +1094,10 @@ send_ipsec_sa_v2_details (ipsec_sa_t * sa, void *arg)
mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
}
if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
- mp->replay_window = clib_host_to_net_u64 (sa->replay_window);
+ {
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
+ }
mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
@@ -1000,7 +1177,10 @@ send_ipsec_sa_v3_details (ipsec_sa_t *sa, void *arg)
mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
}
if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
- mp->replay_window = clib_host_to_net_u64 (sa->replay_window);
+ {
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
+ }
mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
@@ -1026,8 +1206,179 @@ vl_api_ipsec_sa_v3_dump_t_handler (vl_api_ipsec_sa_v3_dump_t *mp)
ipsec_sa_walk (send_ipsec_sa_v3_details, &ctx);
}
+static walk_rc_t
+send_ipsec_sa_v4_details (ipsec_sa_t *sa, void *arg)
+{
+ ipsec_dump_walk_ctx_t *ctx = arg;
+ vl_api_ipsec_sa_v4_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ clib_memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_IPSEC_SA_V4_DETAILS);
+ mp->context = ctx->context;
+
+ mp->entry.sad_id = htonl (sa->id);
+ mp->entry.spi = htonl (sa->spi);
+ mp->entry.protocol = ipsec_proto_encode (sa->protocol);
+
+ mp->entry.crypto_algorithm = ipsec_crypto_algo_encode (sa->crypto_alg);
+ ipsec_key_encode (&sa->crypto_key, &mp->entry.crypto_key);
+
+ mp->entry.integrity_algorithm = ipsec_integ_algo_encode (sa->integ_alg);
+ ipsec_key_encode (&sa->integ_key, &mp->entry.integrity_key);
+
+ mp->entry.flags = ipsec_sad_flags_encode (sa);
+ mp->entry.salt = clib_host_to_net_u32 (sa->salt);
+
+ if (ipsec_sa_is_set_IS_PROTECT (sa))
+ {
+ ipsec_sa_dump_match_ctx_t ctx = {
+ .sai = sa - ipsec_sa_pool,
+ .sw_if_index = ~0,
+ };
+ ipsec_tun_protect_walk (ipsec_sa_dump_match_sa, &ctx);
+
+ mp->sw_if_index = htonl (ctx.sw_if_index);
+ }
+ else
+ mp->sw_if_index = ~0;
+
+ if (ipsec_sa_is_set_IS_TUNNEL (sa))
+ tunnel_encode (&sa->tunnel, &mp->entry.tunnel);
+
+ if (ipsec_sa_is_set_UDP_ENCAP (sa))
+ {
+ mp->entry.udp_src_port = sa->udp_hdr.src_port;
+ mp->entry.udp_dst_port = sa->udp_hdr.dst_port;
+ }
+
+ mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq));
+ mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->seq));
+ if (ipsec_sa_is_set_USE_ESN (sa))
+ {
+ mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
+ mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
+ }
+ if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
+ {
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
+ }
+
+ mp->thread_index = clib_host_to_net_u32 (sa->thread_index);
+ mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
+
+ vl_api_send_msg (ctx->reg, (u8 *) mp);
+
+ return (WALK_CONTINUE);
+}
+
+static void
+vl_api_ipsec_sa_v4_dump_t_handler (vl_api_ipsec_sa_v4_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ ipsec_dump_walk_ctx_t ctx = {
+ .reg = reg,
+ .context = mp->context,
+ };
+
+ ipsec_sa_walk (send_ipsec_sa_v4_details, &ctx);
+}
+
+static walk_rc_t
+send_ipsec_sa_v5_details (ipsec_sa_t *sa, void *arg)
+{
+ ipsec_dump_walk_ctx_t *ctx = arg;
+ vl_api_ipsec_sa_v5_details_t *mp;
+
+ mp = vl_msg_api_alloc (sizeof (*mp));
+ clib_memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_IPSEC_SA_V5_DETAILS);
+ mp->context = ctx->context;
+
+ mp->entry.sad_id = htonl (sa->id);
+ mp->entry.spi = htonl (sa->spi);
+ mp->entry.protocol = ipsec_proto_encode (sa->protocol);
+
+ mp->entry.crypto_algorithm = ipsec_crypto_algo_encode (sa->crypto_alg);
+ ipsec_key_encode (&sa->crypto_key, &mp->entry.crypto_key);
+
+ mp->entry.integrity_algorithm = ipsec_integ_algo_encode (sa->integ_alg);
+ ipsec_key_encode (&sa->integ_key, &mp->entry.integrity_key);
+
+ mp->entry.flags = ipsec_sad_flags_encode (sa);
+ mp->entry.salt = clib_host_to_net_u32 (sa->salt);
+
+ if (ipsec_sa_is_set_IS_PROTECT (sa))
+ {
+ ipsec_sa_dump_match_ctx_t ctx = {
+ .sai = sa - ipsec_sa_pool,
+ .sw_if_index = ~0,
+ };
+ ipsec_tun_protect_walk (ipsec_sa_dump_match_sa, &ctx);
+
+ mp->sw_if_index = htonl (ctx.sw_if_index);
+ }
+ else
+ mp->sw_if_index = ~0;
+
+ if (ipsec_sa_is_set_IS_TUNNEL (sa))
+ tunnel_encode (&sa->tunnel, &mp->entry.tunnel);
+
+ if (ipsec_sa_is_set_UDP_ENCAP (sa))
+ {
+ mp->entry.udp_src_port = sa->udp_hdr.src_port;
+ mp->entry.udp_dst_port = sa->udp_hdr.dst_port;
+ }
+
+ mp->seq_outbound = clib_host_to_net_u64 (((u64) sa->seq));
+ mp->last_seq_inbound = clib_host_to_net_u64 (((u64) sa->seq));
+ if (ipsec_sa_is_set_USE_ESN (sa))
+ {
+ mp->seq_outbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
+ mp->last_seq_inbound |= (u64) (clib_host_to_net_u32 (sa->seq_hi));
+ }
+ if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
+ {
+ mp->replay_window =
+ clib_host_to_net_u64 (ipsec_sa_anti_replay_get_64b_window (sa));
+
+ mp->entry.anti_replay_window_size =
+ clib_host_to_net_u32 (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa));
+ }
+
+ mp->thread_index = clib_host_to_net_u32 (sa->thread_index);
+ mp->stat_index = clib_host_to_net_u32 (sa->stat_index);
+
+ vl_api_send_msg (ctx->reg, (u8 *) mp);
+
+ return (WALK_CONTINUE);
+}
+
+static void
+vl_api_ipsec_sa_v5_dump_t_handler (vl_api_ipsec_sa_v5_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ ipsec_dump_walk_ctx_t ctx = {
+ .reg = reg,
+ .context = mp->context,
+ };
+
+ ipsec_sa_walk (send_ipsec_sa_v5_details, &ctx);
+}
+
static void
-vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t * mp)
+vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t *mp)
{
vl_api_registration_t *rp;
ipsec_main_t *im = &ipsec_main;
@@ -1043,7 +1394,6 @@ vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t * mp)
ipsec_ah_backend_t *ab;
ipsec_esp_backend_t *eb;
- /* *INDENT-OFF* */
pool_foreach (ab, im->ah_backends) {
vl_api_ipsec_backend_details_t *mp = vl_msg_api_alloc (sizeof (*mp));
clib_memset (mp, 0, sizeof (*mp));
@@ -1068,7 +1418,6 @@ vl_api_ipsec_backend_dump_t_handler (vl_api_ipsec_backend_dump_t * mp)
mp->active = mp->index == im->esp_current_backend ? 1 : 0;
vl_api_send_msg (rp, (u8 *)mp);
}
- /* *INDENT-ON* */
}
static void
diff --git a/src/vnet/ipsec/ipsec_cli.c b/src/vnet/ipsec/ipsec_cli.c
index 3a3e53b663e..07d9df8f204 100644
--- a/src/vnet/ipsec/ipsec_cli.c
+++ b/src/vnet/ipsec/ipsec_cli.c
@@ -71,14 +71,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_spd_command, static) = {
.path = "set interface ipsec spd",
.short_help =
"set interface ipsec spd <int> <id>",
.function = set_interface_spd_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ipsec_sa_add_del_command_fn (vlib_main_t * vm,
@@ -88,6 +86,7 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm,
unformat_input_t _line_input, *line_input = &_line_input;
ipsec_crypto_alg_t crypto_alg;
ipsec_integ_alg_t integ_alg;
+ u32 anti_replay_window_size;
ipsec_protocol_t proto;
ipsec_sa_flags_t flags;
clib_error_t *error;
@@ -105,6 +104,7 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm,
is_add = 0;
flags = IPSEC_SA_FLAG_NONE;
proto = IPSEC_PROTOCOL_ESP;
+ anti_replay_window_size = 0;
integ_alg = IPSEC_INTEG_ALG_NONE;
crypto_alg = IPSEC_CRYPTO_ALG_NONE;
udp_src = udp_dst = IPSEC_UDP_PORT_NONE;
@@ -143,7 +143,7 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm,
else if (unformat (line_input, "integ-alg %U",
unformat_ipsec_integ_alg, &integ_alg))
;
- else if (unformat (line_input, " %U", unformat_tunnel, &tun))
+ else if (unformat (line_input, "%U", unformat_tunnel, &tun))
{
flags |= IPSEC_SA_FLAG_IS_TUNNEL;
if (AF_IP6 == tunnel_get_af (&tun))
@@ -153,6 +153,9 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm,
udp_src = i;
else if (unformat (line_input, "udp-dst-port %d", &i))
udp_dst = i;
+ else if (unformat (line_input, "anti-replay-size %d",
+ &anti_replay_window_size))
+ flags |= IPSEC_SA_FLAG_USE_ANTI_REPLAY;
else if (unformat (line_input, "inbound"))
flags |= IPSEC_SA_FLAG_IS_INBOUND;
else if (unformat (line_input, "use-anti-replay"))
@@ -184,9 +187,10 @@ ipsec_sa_add_del_command_fn (vlib_main_t * vm,
error = clib_error_return (0, "missing spi");
goto done;
}
- rv = ipsec_sa_add_and_lock (id, spi, proto, crypto_alg, &ck, integ_alg,
- &ik, flags, clib_host_to_net_u32 (salt),
- udp_src, udp_dst, &tun, &sai);
+ rv =
+ ipsec_sa_add_and_lock (id, spi, proto, crypto_alg, &ck, integ_alg, &ik,
+ flags, clib_host_to_net_u32 (salt), udp_src,
+ udp_dst, anti_replay_window_size, &tun, &sai);
}
else
{
@@ -202,14 +206,77 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_sa_add_del_command, static) = {
.path = "ipsec sa",
.short_help =
"ipsec sa [add|del]",
.function = ipsec_sa_add_del_command_fn,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+ipsec_sa_bind_cli (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 id = ~0;
+ u32 worker = ~0;
+ bool bind = 1;
+ int rv;
+ clib_error_t *error = NULL;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "unbind"))
+ bind = 0;
+ else if (id == ~0 && unformat (line_input, "%u", &id))
+ ;
+ else if (unformat (line_input, "%u", &worker))
+ ;
+ else
+ {
+ error = clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (id == ~0)
+ {
+ error = clib_error_return (0, "please specify SA ID");
+ goto done;
+ }
+
+ if (bind && ~0 == worker)
+ {
+ error = clib_error_return (0, "please specify worker to bind to");
+ goto done;
+ }
+
+ rv = ipsec_sa_bind (id, worker, bind);
+ switch (rv)
+ {
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (0, "please specify a valid SA ID");
+ break;
+ case VNET_API_ERROR_INVALID_WORKER:
+ error = clib_error_return (0, "please specify a valid worker index");
+ break;
+ }
+
+done:
+ unformat_free (line_input);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (ipsec_sa_bind_cmd, static) = {
+ .path = "ipsec sa bind",
+ .short_help = "ipsec sa [unbind] <sa-id> <worker>",
+ .function = ipsec_sa_bind_cli,
+};
static clib_error_t *
ipsec_spd_add_del_command_fn (vlib_main_t * vm,
@@ -254,14 +321,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_spd_add_del_command, static) = {
.path = "ipsec spd",
.short_help =
"ipsec spd [add|del] <id>",
.function = ipsec_spd_add_del_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -279,6 +344,7 @@ ipsec_policy_add_del_command_fn (vlib_main_t * vm,
clib_memset (&p, 0, sizeof (p));
p.lport.stop = p.rport.stop = ~0;
remote_range_set = local_range_set = is_outbound = 0;
+ p.protocol = IPSEC_POLICY_PROTOCOL_ANY;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -395,27 +461,23 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_policy_add_del_command, static) = {
.path = "ipsec policy",
.short_help =
"ipsec policy [add|del] spd <id> priority <n> ",
.function = ipsec_policy_add_del_command_fn,
};
-/* *INDENT-ON* */
static void
ipsec_sa_show_all (vlib_main_t * vm, ipsec_main_t * im, u8 detail)
{
u32 sai;
- /* *INDENT-OFF* */
pool_foreach_index (sai, ipsec_sa_pool)
{
vlib_cli_output (vm, "%U", format_ipsec_sa, sai,
(detail ? IPSEC_FORMAT_DETAIL : IPSEC_FORMAT_BRIEF));
}
- /* *INDENT-ON* */
}
static void
@@ -423,11 +485,18 @@ ipsec_spd_show_all (vlib_main_t * vm, ipsec_main_t * im)
{
u32 spdi;
- /* *INDENT-OFF* */
pool_foreach_index (spdi, im->spds) {
vlib_cli_output(vm, "%U", format_ipsec_spd, spdi);
}
- /* *INDENT-ON* */
+
+ if (im->output_flow_cache_flag)
+ {
+ vlib_cli_output (vm, "%U", format_ipsec_out_spd_flow_cache);
+ }
+ if (im->input_flow_cache_flag)
+ {
+ vlib_cli_output (vm, "%U", format_ipsec_in_spd_flow_cache);
+ }
}
static void
@@ -438,14 +507,12 @@ ipsec_spd_bindings_show_all (vlib_main_t * vm, ipsec_main_t * im)
vlib_cli_output (vm, "SPD Bindings:");
- /* *INDENT-OFF* */
hash_foreach(sw_if_index, spd_id, im->spd_index_by_sw_if_index, ({
spd = pool_elt_at_index (im->spds, spd_id);
vlib_cli_output (vm, " %d -> %U", spd->id,
format_vnet_sw_if_index_name, im->vnet_main,
sw_if_index);
}));
- /* *INDENT-ON* */
}
static walk_rc_t
@@ -479,13 +546,11 @@ show_ipsec_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ipsec_command, static) = {
.path = "show ipsec all",
.short_help = "show ipsec all",
.function = show_ipsec_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_ipsec_sa_command_fn (vlib_main_t * vm,
@@ -530,12 +595,10 @@ clear_ipsec_sa_command_fn (vlib_main_t * vm,
if (~0 == sai)
{
- /* *INDENT-OFF* */
pool_foreach_index (sai, ipsec_sa_pool)
{
ipsec_sa_clear (sai);
}
- /* *INDENT-ON* */
}
else
{
@@ -548,7 +611,6 @@ clear_ipsec_sa_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ipsec_sa_command, static) = {
.path = "show ipsec sa",
.short_help = "show ipsec sa [index]",
@@ -560,7 +622,6 @@ VLIB_CLI_COMMAND (clear_ipsec_sa_command, static) = {
.short_help = "clear ipsec sa [index]",
.function = clear_ipsec_sa_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_ipsec_spd_command_fn (vlib_main_t * vm,
@@ -590,13 +651,11 @@ show_ipsec_spd_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ipsec_spd_command, static) = {
.path = "show ipsec spd",
.short_help = "show ipsec spd [index]",
.function = show_ipsec_spd_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_ipsec_tunnel_command_fn (vlib_main_t * vm,
@@ -608,13 +667,11 @@ show_ipsec_tunnel_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_ipsec_tunnel_command, static) = {
.path = "show ipsec tunnel",
.short_help = "show ipsec tunnel",
.function = show_ipsec_tunnel_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ipsec_show_backends_command_fn (vlib_main_t * vm,
@@ -629,7 +686,6 @@ ipsec_show_backends_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "IPsec AH backends available:");
u8 *s = format (NULL, "%=25s %=25s %=10s\n", "Name", "Index", "Active");
ipsec_ah_backend_t *ab;
- /* *INDENT-OFF* */
pool_foreach (ab, im->ah_backends) {
s = format (s, "%=25s %=25u %=10s\n", ab->name, ab - im->ah_backends,
ab - im->ah_backends == im->ah_current_backend ? "yes" : "no");
@@ -645,13 +701,11 @@ ipsec_show_backends_command_fn (vlib_main_t * vm,
s = format (s, " dec6 %s (next %d)\n", n->name, ab->ah6_decrypt_next_index);
}
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "%v", s);
- _vec_len (s) = 0;
+ vec_set_len (s, 0);
vlib_cli_output (vm, "IPsec ESP backends available:");
s = format (s, "%=25s %=25s %=10s\n", "Name", "Index", "Active");
ipsec_esp_backend_t *eb;
- /* *INDENT-OFF* */
pool_foreach (eb, im->esp_backends) {
s = format (s, "%=25s %=25u %=10s\n", eb->name, eb - im->esp_backends,
eb - im->esp_backends == im->esp_current_backend ? "yes"
@@ -668,20 +722,17 @@ ipsec_show_backends_command_fn (vlib_main_t * vm,
s = format (s, " dec6 %s (next %d)\n", n->name, eb->esp6_decrypt_next_index);
}
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "%v", s);
vec_free (s);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_show_backends_command, static) = {
.path = "show ipsec backends",
.short_help = "show ipsec backends",
.function = ipsec_show_backends_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ipsec_select_backend_command_fn (vlib_main_t * vm,
@@ -743,14 +794,12 @@ ipsec_select_backend_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_select_backend_command, static) = {
.path = "ipsec select backend",
.short_help = "ipsec select backend <ah|esp> <backend index>",
.function = ipsec_select_backend_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
clear_ipsec_counters_command_fn (vlib_main_t * vm,
@@ -759,17 +808,17 @@ clear_ipsec_counters_command_fn (vlib_main_t * vm,
{
vlib_clear_combined_counters (&ipsec_spd_policy_counters);
vlib_clear_combined_counters (&ipsec_sa_counters);
+ for (int i = 0; i < IPSEC_SA_N_ERRORS; i++)
+ vlib_clear_simple_counters (&ipsec_sa_err_counters[i]);
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_ipsec_counters_command, static) = {
.path = "clear ipsec counters",
.short_help = "clear ipsec counters",
.function = clear_ipsec_counters_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
ipsec_tun_protect_cmd (vlib_main_t * vm,
@@ -819,7 +868,6 @@ ipsec_tun_protect_cmd (vlib_main_t * vm,
/**
* Protect tunnel with IPSEC
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_tun_protect_cmd_node, static) =
{
.path = "ipsec tunnel protect",
@@ -827,7 +875,6 @@ VLIB_CLI_COMMAND (ipsec_tun_protect_cmd_node, static) =
.short_help = "ipsec tunnel protect <interface> input-sa <SA> output-sa <SA> [add|del]",
// this is not MP safe
};
-/* *INDENT-ON* */
static clib_error_t *
@@ -842,14 +889,12 @@ ipsec_tun_protect_show (vlib_main_t * vm,
/**
* show IPSEC tunnel protection
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_tun_protect_show_node, static) =
{
.path = "show ipsec protect",
.function = ipsec_tun_protect_show,
.short_help = "show ipsec protect",
};
-/* *INDENT-ON* */
static int
ipsec_tun_protect4_hash_show_one (clib_bihash_kv_8_16_t * kv, void *arg)
@@ -898,14 +943,12 @@ ipsec_tun_protect_hash_show (vlib_main_t * vm,
/**
* show IPSEC tunnel protection hash tables
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_tun_protect_hash_show_node, static) =
{
.path = "show ipsec protect-hash",
.function = ipsec_tun_protect_hash_show,
.short_help = "show ipsec protect-hash",
};
-/* *INDENT-ON* */
clib_error_t *
ipsec_cli_init (vlib_main_t * vm)
@@ -942,13 +985,11 @@ set_async_mode_command_fn (vlib_main_t * vm, unformat_input_t * input,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_async_mode_command, static) = {
.path = "set ipsec async mode",
.short_help = "set ipsec async mode on|off",
.function = set_async_mode_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipsec/ipsec_format.c b/src/vnet/ipsec/ipsec_format.c
index 5f7caab44e4..e421a0d96b4 100644
--- a/src/vnet/ipsec/ipsec_format.c
+++ b/src/vnet/ipsec/ipsec_format.c
@@ -153,8 +153,8 @@ format_ipsec_replay_window (u8 * s, va_list * args)
return s;
}
-u8 *
-format_ipsec_policy (u8 * s, va_list * args)
+static u8 *
+format_ipsec_policy_with_suffix (u8 *s, va_list *args, u8 *suffix)
{
u32 pi = va_arg (*args, u32);
ip46_type_t ip_type = IP46_TYPE_IP4;
@@ -168,7 +168,7 @@ format_ipsec_policy (u8 * s, va_list * args)
pi, p->priority,
format_ipsec_policy_action, p->policy,
format_ipsec_policy_type, p->type);
- if (p->protocol)
+ if (p->protocol != IPSEC_POLICY_PROTOCOL_ANY)
{
s = format (s, "%U", format_ip_protocol, p->protocol);
}
@@ -180,6 +180,9 @@ format_ipsec_policy (u8 * s, va_list * args)
{
s = format (s, " sa %u", p->sa_id);
}
+ if (suffix)
+ s = format (s, " %s", suffix);
+
if (p->is_ipv6)
{
ip_type = IP46_TYPE_IP6;
@@ -201,6 +204,152 @@ format_ipsec_policy (u8 * s, va_list * args)
}
u8 *
+format_ipsec_policy (u8 *s, va_list *args)
+{
+ return format_ipsec_policy_with_suffix (s, args, 0);
+}
+
+u8 *
+format_ipsec_fp_policy (u8 *s, va_list *args)
+{
+ return format_ipsec_policy_with_suffix (s, args, (u8 *) "<fast-path>");
+}
+
+/**
+ * @brief Context when walking the fp bihash table. We need to filter
+ * only those policies that are of given type as we walk the table.
+ */
+typedef struct ipsec_spd_policy_ctx_t_
+{
+ u32 *policies;
+ ipsec_spd_policy_type_t t;
+} ipsec_fp_walk_ctx_t;
+
+static int
+ipsec_fp_table_walk_ip4_cb (clib_bihash_kv_16_8_t *kvp, void *arg)
+{
+ ipsec_fp_walk_ctx_t *ctx = (ipsec_fp_walk_ctx_t *) arg;
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *p;
+
+ ipsec_fp_lookup_value_t *val = (ipsec_fp_lookup_value_t *) &kvp->value;
+
+ u32 *policy_id;
+
+ vec_foreach (policy_id, val->fp_policies_ids)
+ {
+ p = pool_elt_at_index (im->policies, *policy_id);
+ if (p->type == ctx->t)
+ vec_add1 (ctx->policies, *policy_id);
+ }
+
+ return BIHASH_WALK_CONTINUE;
+}
+
+static int
+ipsec_fp_table_walk_ip6_cb (clib_bihash_kv_40_8_t *kvp, void *arg)
+{
+ ipsec_fp_walk_ctx_t *ctx = (ipsec_fp_walk_ctx_t *) arg;
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *p;
+
+ ipsec_fp_lookup_value_t *val = (ipsec_fp_lookup_value_t *) &kvp->value;
+
+ u32 *policy_id;
+
+ vec_foreach (policy_id, val->fp_policies_ids)
+ {
+ p = pool_elt_at_index (im->policies, *policy_id);
+ if (p->type == ctx->t)
+ vec_add1 (ctx->policies, *policy_id);
+ }
+
+ return BIHASH_WALK_CONTINUE;
+}
+
+u8 *
+format_ipsec_fp_policies (u8 *s, va_list *args)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_spd_t *spd = va_arg (*args, ipsec_spd_t *);
+ ipsec_spd_policy_type_t t = va_arg (*args, ipsec_spd_policy_type_t);
+ u32 *i;
+ ipsec_fp_walk_ctx_t ctx = {
+ .policies = 0,
+ .t = t,
+ };
+
+ u32 ip4_in_lookup_hash_idx = spd->fp_spd.ip4_in_lookup_hash_idx;
+ u32 ip4_out_lookup_hash_idx = spd->fp_spd.ip4_out_lookup_hash_idx;
+ u32 ip6_in_lookup_hash_idx = spd->fp_spd.ip6_in_lookup_hash_idx;
+ u32 ip6_out_lookup_hash_idx = spd->fp_spd.ip6_out_lookup_hash_idx;
+
+ switch (t)
+ {
+ case IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT:
+ case IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS:
+ case IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD:
+ if (INDEX_INVALID != ip4_in_lookup_hash_idx)
+ {
+ clib_bihash_16_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip4_lookup_hashes_pool, ip4_in_lookup_hash_idx);
+
+ clib_bihash_foreach_key_value_pair_16_8 (
+ bihash_table, ipsec_fp_table_walk_ip4_cb, &ctx);
+ }
+
+ break;
+
+ case IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT:
+ case IPSEC_SPD_POLICY_IP6_INBOUND_BYPASS:
+ case IPSEC_SPD_POLICY_IP6_INBOUND_DISCARD:
+ if (INDEX_INVALID != ip6_in_lookup_hash_idx)
+ {
+ clib_bihash_40_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip6_lookup_hashes_pool, ip6_in_lookup_hash_idx);
+
+ clib_bihash_foreach_key_value_pair_40_8 (
+ bihash_table, ipsec_fp_table_walk_ip6_cb, &ctx);
+ }
+
+ break;
+ case IPSEC_SPD_POLICY_IP4_OUTBOUND:
+ if (INDEX_INVALID != ip4_out_lookup_hash_idx)
+ {
+ clib_bihash_16_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip4_lookup_hashes_pool, ip4_out_lookup_hash_idx);
+
+ clib_bihash_foreach_key_value_pair_16_8 (
+ bihash_table, ipsec_fp_table_walk_ip4_cb, &ctx);
+ }
+
+ break;
+ case IPSEC_SPD_POLICY_IP6_OUTBOUND:
+ if (INDEX_INVALID != ip6_out_lookup_hash_idx)
+ {
+ clib_bihash_40_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip6_lookup_hashes_pool, ip6_out_lookup_hash_idx);
+
+ clib_bihash_foreach_key_value_pair_40_8 (
+ bihash_table, ipsec_fp_table_walk_ip6_cb, &ctx);
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ vec_foreach (i, ctx.policies)
+ {
+ s = format (s, "\n %U", format_ipsec_fp_policy, *i);
+ }
+
+ vec_free (ctx.policies);
+
+ return s;
+}
+
+u8 *
format_ipsec_spd (u8 * s, va_list * args)
{
u32 si = va_arg (*args, u32);
@@ -218,12 +367,13 @@ format_ipsec_spd (u8 * s, va_list * args)
s = format (s, "spd %u", spd->id);
-#define _(v, n) \
- s = format (s, "\n %s:", n); \
- vec_foreach(i, spd->policies[IPSEC_SPD_POLICY_##v]) \
- { \
- s = format (s, "\n %U", format_ipsec_policy, *i); \
- }
+#define _(v, n) \
+ s = format (s, "\n %s:", n); \
+ vec_foreach (i, spd->policies[IPSEC_SPD_POLICY_##v]) \
+ { \
+ s = format (s, "\n %U", format_ipsec_policy, *i); \
+ } \
+ s = format (s, "\n %U", format_ipsec_fp_policies, spd, IPSEC_SPD_POLICY_##v);
foreach_ipsec_spd_policy_type;
#undef _
@@ -232,6 +382,28 @@ done:
}
u8 *
+format_ipsec_out_spd_flow_cache (u8 *s, va_list *args)
+{
+ ipsec_main_t *im = &ipsec_main;
+
+ s = format (s, "\nipv4-outbound-spd-flow-cache-entries: %u",
+ im->ipsec4_out_spd_flow_cache_entries);
+
+ return (s);
+}
+
+u8 *
+format_ipsec_in_spd_flow_cache (u8 *s, va_list *args)
+{
+ ipsec_main_t *im = &ipsec_main;
+
+ s = format (s, "\nipv4-inbound-spd-flow-cache-entries: %u",
+ im->ipsec4_in_spd_flow_cache_entries);
+
+ return (s);
+}
+
+u8 *
format_ipsec_key (u8 * s, va_list * args)
{
ipsec_key_t *key = va_arg (*args, ipsec_key_t *);
@@ -272,6 +444,7 @@ format_ipsec_sa (u8 * s, va_list * args)
u32 sai = va_arg (*args, u32);
ipsec_format_flags_t flags = va_arg (*args, ipsec_format_flags_t);
vlib_counter_t counts;
+ counter_t errors;
ipsec_sa_t *sa;
if (pool_is_free_index (ipsec_sa_pool, sai))
@@ -293,16 +466,18 @@ format_ipsec_sa (u8 * s, va_list * args)
s = format (s, "\n salt 0x%x", clib_net_to_host_u32 (sa->salt));
s = format (s, "\n thread-index:%d", sa->thread_index);
s = format (s, "\n seq %u seq-hi %u", sa->seq, sa->seq_hi);
- s = format (s, "\n window %U", format_ipsec_replay_window,
- sa->replay_window);
- s = format (s, "\n crypto alg %U",
- format_ipsec_crypto_alg, sa->crypto_alg);
+ s = format (s, "\n window-size: %llu",
+ IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa));
+ s = format (s, "\n window: Bl <- %U Tl", format_ipsec_replay_window,
+ ipsec_sa_anti_replay_get_64b_window (sa));
+ s =
+ format (s, "\n crypto alg %U", format_ipsec_crypto_alg, sa->crypto_alg);
if (sa->crypto_alg && (flags & IPSEC_FORMAT_INSECURE))
s = format (s, " key %U", format_ipsec_key, &sa->crypto_key);
else
s = format (s, " key [redacted]");
- s = format (s, "\n integrity alg %U",
- format_ipsec_integ_alg, sa->integ_alg);
+ s =
+ format (s, "\n integrity alg %U", format_ipsec_integ_alg, sa->integ_alg);
if (sa->integ_alg && (flags & IPSEC_FORMAT_INSECURE))
s = format (s, " key %U", format_ipsec_key, &sa->integ_key);
else
@@ -312,10 +487,17 @@ format_ipsec_sa (u8 * s, va_list * args)
clib_host_to_net_u16 (sa->udp_hdr.dst_port));
vlib_get_combined_counter (&ipsec_sa_counters, sai, &counts);
- s = format (s, "\n packets %u bytes %u", counts.packets, counts.bytes);
+ s = format (s, "\n tx/rx:[packets:%Ld bytes:%Ld]", counts.packets,
+ counts.bytes);
+ s = format (s, "\n SA errors:");
+#define _(index, val, err, desc) \
+ errors = vlib_get_simple_counter (&ipsec_sa_err_counters[index], sai); \
+ s = format (s, "\n " #desc ":[packets:%Ld]", errors);
+ foreach_ipsec_sa_err
+#undef _
- if (ipsec_sa_is_set_IS_TUNNEL (sa))
- s = format (s, "\n%U", format_tunnel, &sa->tunnel, 3);
+ if (ipsec_sa_is_set_IS_TUNNEL (sa)) s =
+ format (s, "\n%U", format_tunnel, &sa->tunnel, 3);
done:
return (s);
@@ -367,12 +549,10 @@ format_ipsec_tun_protect (u8 * s, va_list * args)
IPSEC_FORMAT_BRIEF);
s = format (s, "\n input-sa:");
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SAI(itp, sai,
({
s = format (s, "\n %U", format_ipsec_sa, sai, IPSEC_FORMAT_BRIEF);
}));
- /* *INDENT-ON* */
return (s);
}
diff --git a/src/vnet/ipsec/ipsec_handoff.c b/src/vnet/ipsec/ipsec_handoff.c
index e8daa1a6a23..68a859cf732 100644
--- a/src/vnet/ipsec/ipsec_handoff.c
+++ b/src/vnet/ipsec/ipsec_handoff.c
@@ -259,7 +259,6 @@ VLIB_NODE_FN (ah6_decrypt_handoff) (vlib_main_t * vm,
return ipsec_handoff (vm, node, from_frame, im->ah6_dec_fq_index);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (esp4_encrypt_handoff) = {
.name = "esp4-encrypt-handoff",
.vector_size = sizeof (u32),
@@ -416,7 +415,6 @@ VLIB_REGISTER_NODE (ah6_decrypt_handoff) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipsec/ipsec_input.c b/src/vnet/ipsec/ipsec_input.c
index 96bad28c2b5..6ccc0be2622 100644
--- a/src/vnet/ipsec/ipsec_input.c
+++ b/src/vnet/ipsec/ipsec_input.c
@@ -19,6 +19,7 @@
#include <vnet/api_errno.h>
#include <vnet/ip/ip.h>
#include <vnet/feature/feature.h>
+#include <vnet/ipsec/ipsec_spd_fp_lookup.h>
#include <vnet/ipsec/ipsec.h>
#include <vnet/ipsec/esp.h>
@@ -51,6 +52,7 @@ typedef struct
ip_protocol_t proto;
u32 spd;
u32 policy_index;
+ u32 policy_type;
u32 sa_id;
u32 spi;
u32 seq;
@@ -64,15 +66,119 @@ format_ipsec_input_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
ipsec_input_trace_t *t = va_arg (*args, ipsec_input_trace_t *);
- s = format (s, "%U: sa_id %u spd %u policy %d spi %u (0x%08x) seq %u",
- format_ip_protocol, t->proto, t->sa_id,
- t->spd, t->policy_index, t->spi, t->spi, t->seq);
+ s =
+ format (s, "%U: sa_id %u type: %u spd %u policy %d spi %u (0x%08x) seq %u",
+ format_ip_protocol, t->proto, t->sa_id, t->policy_type, t->spd,
+ t->policy_index, t->spi, t->spi, t->seq);
return s;
}
+always_inline void
+ipsec4_input_spd_add_flow_cache_entry (ipsec_main_t *im, u32 sa, u32 da,
+ ipsec_spd_policy_type_t policy_type,
+ u32 pol_id)
+{
+ u64 hash;
+ u8 is_overwrite = 0, is_stale_overwrite = 0;
+ /* Store in network byte order to avoid conversion on lookup */
+ ipsec4_inbound_spd_tuple_t ip4_tuple = {
+ .ip4_src_addr = (ip4_address_t) clib_host_to_net_u32 (sa),
+ .ip4_dest_addr = (ip4_address_t) clib_host_to_net_u32 (da),
+ .policy_type = policy_type
+ };
+
+ ip4_tuple.kv_16_8.value =
+ (((u64) pol_id) << 32) | ((u64) im->input_epoch_count);
+
+ hash = ipsec4_hash_16_8 (&ip4_tuple.kv_16_8);
+ hash &= (im->ipsec4_in_spd_hash_num_buckets - 1);
+
+ ipsec_spinlock_lock (&im->ipsec4_in_spd_hash_tbl[hash].bucket_lock);
+ /* Check if we are overwriting an existing entry so we know
+ whether to increment the flow cache counter. Since flow
+ cache counter is reset on any policy add/remove, but
+ hash table values are not, we need to check if the entry
+ we are overwriting is stale or not. If it's a stale entry
+ overwrite, we still want to increment flow cache counter */
+ is_overwrite = (im->ipsec4_in_spd_hash_tbl[hash].value != 0);
+ /* Check if we are overwriting a stale entry by comparing
+ with current epoch count */
+ if (PREDICT_FALSE (is_overwrite))
+ is_stale_overwrite =
+ (im->input_epoch_count !=
+ ((u32) (im->ipsec4_in_spd_hash_tbl[hash].value & 0xFFFFFFFF)));
+ clib_memcpy_fast (&im->ipsec4_in_spd_hash_tbl[hash], &ip4_tuple.kv_16_8,
+ sizeof (ip4_tuple.kv_16_8));
+ ipsec_spinlock_unlock (&im->ipsec4_in_spd_hash_tbl[hash].bucket_lock);
+
+ /* Increment the counter to track active flow cache entries
+ when entering a fresh entry or overwriting a stale one */
+ if (!is_overwrite || is_stale_overwrite)
+ clib_atomic_fetch_add_relax (&im->ipsec4_in_spd_flow_cache_entries, 1);
+
+ return;
+}
+
always_inline ipsec_policy_t *
-ipsec_input_policy_match (ipsec_spd_t * spd, u32 sa, u32 da,
+ipsec4_input_spd_find_flow_cache_entry (ipsec_main_t *im, u32 sa, u32 da,
+ ipsec_spd_policy_type_t policy_type)
+{
+ ipsec_policy_t *p = NULL;
+ ipsec4_hash_kv_16_8_t kv_result;
+ u64 hash;
+ ipsec4_inbound_spd_tuple_t ip4_tuple = { .ip4_src_addr = (ip4_address_t) sa,
+ .ip4_dest_addr = (ip4_address_t) da,
+ .policy_type = policy_type };
+
+ hash = ipsec4_hash_16_8 (&ip4_tuple.kv_16_8);
+ hash &= (im->ipsec4_in_spd_hash_num_buckets - 1);
+
+ ipsec_spinlock_lock (&im->ipsec4_in_spd_hash_tbl[hash].bucket_lock);
+ kv_result = im->ipsec4_in_spd_hash_tbl[hash];
+ ipsec_spinlock_unlock (&im->ipsec4_in_spd_hash_tbl[hash].bucket_lock);
+
+ if (ipsec4_hash_key_compare_16_8 ((u64 *) &ip4_tuple.kv_16_8,
+ (u64 *) &kv_result))
+ {
+ if (im->input_epoch_count == ((u32) (kv_result.value & 0xFFFFFFFF)))
+ {
+ /* Get the policy based on the index */
+ p =
+ pool_elt_at_index (im->policies, ((u32) (kv_result.value >> 32)));
+ }
+ }
+
+ return p;
+}
+
+always_inline void
+ipsec_fp_in_5tuple_from_ip4_range (ipsec_fp_5tuple_t *tuple, u32 sa, u32 da,
+ u32 spi, u8 action)
+{
+ clib_memset (tuple->l3_zero_pad, 0, sizeof (tuple->l3_zero_pad));
+ tuple->laddr.as_u32 = da;
+ tuple->raddr.as_u32 = sa;
+ tuple->spi = spi;
+ tuple->action = action;
+ tuple->is_ipv6 = 0;
+}
+
+always_inline void
+ipsec_fp_in_5tuple_from_ip6_range (ipsec_fp_5tuple_t *tuple, ip6_address_t *sa,
+ ip6_address_t *da, u32 spi, u8 action)
+
+{
+ clib_memcpy (&tuple->ip6_laddr, da, sizeof (ip6_address_t));
+ clib_memcpy (&tuple->ip6_raddr, sa, sizeof (ip6_address_t));
+
+ tuple->spi = spi;
+ tuple->action = action;
+ tuple->is_ipv6 = 1;
+}
+
+always_inline ipsec_policy_t *
+ipsec_input_policy_match (ipsec_spd_t *spd, u32 sa, u32 da,
ipsec_spd_policy_type_t policy_type)
{
ipsec_main_t *im = &ipsec_main;
@@ -95,13 +201,18 @@ ipsec_input_policy_match (ipsec_spd_t * spd, u32 sa, u32 da,
if (sa > clib_net_to_host_u32 (p->raddr.stop.ip4.as_u32))
continue;
+ if (im->input_flow_cache_flag)
+ {
+ /* Add an Entry in Flow cache */
+ ipsec4_input_spd_add_flow_cache_entry (im, sa, da, policy_type, *i);
+ }
return p;
}
return 0;
}
always_inline ipsec_policy_t *
-ipsec_input_protect_policy_match (ipsec_spd_t * spd, u32 sa, u32 da, u32 spi)
+ipsec_input_protect_policy_match (ipsec_spd_t *spd, u32 sa, u32 da, u32 spi)
{
ipsec_main_t *im = &ipsec_main;
ipsec_policy_t *p;
@@ -124,7 +235,7 @@ ipsec_input_protect_policy_match (ipsec_spd_t * spd, u32 sa, u32 da, u32 spi)
if (sa != clib_net_to_host_u32 (s->tunnel.t_src.ip.ip4.as_u32))
continue;
- return p;
+ goto return_policy;
}
if (da < clib_net_to_host_u32 (p->laddr.start.ip4.as_u32))
@@ -139,6 +250,14 @@ ipsec_input_protect_policy_match (ipsec_spd_t * spd, u32 sa, u32 da, u32 spi)
if (sa > clib_net_to_host_u32 (p->raddr.stop.ip4.as_u32))
continue;
+ return_policy:
+ if (im->input_flow_cache_flag)
+ {
+ /* Add an Entry in Flow cache */
+ ipsec4_input_spd_add_flow_cache_entry (
+ im, sa, da, IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT, *i);
+ }
+
return p;
}
return 0;
@@ -154,6 +273,193 @@ ip6_addr_match_range (ip6_address_t * a, ip6_address_t * la,
return 0;
}
+always_inline void
+ipsec_esp_packet_process (vlib_main_t *vm, ipsec_main_t *im, ip4_header_t *ip0,
+ esp_header_t *esp0, u32 thread_index,
+ ipsec_spd_t *spd0, vlib_buffer_t **b,
+ vlib_node_runtime_t *node, u64 *ipsec_bypassed,
+ u64 *ipsec_dropped, u64 *ipsec_matched,
+ u64 *ipsec_unprocessed, u16 *next)
+
+{
+ ipsec_policy_t *p0 = NULL;
+ u32 pi0;
+ u8 has_space0;
+ bool search_flow_cache = false;
+ ipsec_policy_t *policies[1];
+ ipsec_fp_5tuple_t tuples[1];
+ bool ip_v6 = true;
+
+ /* if flow cache is enabled, first search through flow cache for a
+ * policy match for either protect, bypass or discard rules, in that
+ * order. if no match is found search_flow_cache is set to false (1)
+ * and we revert back to linear search
+ */
+
+ search_flow_cache = im->input_flow_cache_flag;
+udp_or_esp:
+
+ /* SPI ID field in the ESP header MUST NOT be a zero value */
+ if (esp0->spi == 0)
+ {
+ /* Drop the packet if SPI ID is zero */
+ *ipsec_unprocessed += 1;
+ next[0] = IPSEC_INPUT_NEXT_DROP;
+ return;
+ }
+
+ if (im->fp_spd_ipv4_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip4_in_lookup_hash_idx))
+ {
+ ipsec_fp_in_5tuple_from_ip4_range (&tuples[0], ip0->src_address.as_u32,
+ ip0->dst_address.as_u32,
+ clib_net_to_host_u32 (esp0->spi),
+ IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT);
+ ipsec_fp_in_policy_match_n (&spd0->fp_spd, !ip_v6, tuples, policies, 1);
+ p0 = policies[0];
+ }
+ else if (search_flow_cache) /* attempt to match policy in flow cache */
+ {
+ p0 = ipsec4_input_spd_find_flow_cache_entry (
+ im, ip0->src_address.as_u32, ip0->dst_address.as_u32,
+ IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT);
+ }
+
+ else /* linear search if flow cache is not enabled,
+ or flow cache search just failed */
+ {
+ p0 = ipsec_input_protect_policy_match (
+ spd0, clib_net_to_host_u32 (ip0->src_address.as_u32),
+ clib_net_to_host_u32 (ip0->dst_address.as_u32),
+ clib_net_to_host_u32 (esp0->spi));
+ }
+ has_space0 = vlib_buffer_has_space (b[0], (clib_address_t) (esp0 + 1) -
+ (clib_address_t) ip0);
+
+ if (PREDICT_TRUE ((p0 != NULL) & (has_space0)))
+ {
+ *ipsec_matched += 1;
+
+ pi0 = p0 - im->policies;
+ vlib_increment_combined_counter (&ipsec_spd_policy_counters,
+ thread_index, pi0, 1,
+ clib_net_to_host_u16 (ip0->length));
+
+ vnet_buffer (b[0])->ipsec.sad_index = p0->sa_index;
+ next[0] = im->esp4_decrypt_next_index;
+ vlib_buffer_advance (b[0], ((u8 *) esp0 - (u8 *) ip0));
+ goto trace0;
+ }
+ else
+ {
+ p0 = 0;
+ pi0 = ~0;
+ }
+ if (im->fp_spd_ipv4_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip4_in_lookup_hash_idx))
+ {
+ tuples->action = IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS;
+ ipsec_fp_in_policy_match_n (&spd0->fp_spd, !ip_v6, tuples, policies, 1);
+ p0 = policies[0];
+ }
+ else if (search_flow_cache)
+ {
+ p0 = ipsec4_input_spd_find_flow_cache_entry (
+ im, ip0->src_address.as_u32, ip0->dst_address.as_u32,
+ IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS);
+ }
+
+ else
+ {
+ p0 = ipsec_input_policy_match (
+ spd0, clib_net_to_host_u32 (ip0->src_address.as_u32),
+ clib_net_to_host_u32 (ip0->dst_address.as_u32),
+ IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS);
+ }
+
+ if (PREDICT_TRUE ((p0 != NULL)))
+ {
+ *ipsec_bypassed += 1;
+
+ pi0 = p0 - im->policies;
+ vlib_increment_combined_counter (&ipsec_spd_policy_counters,
+ thread_index, pi0, 1,
+ clib_net_to_host_u16 (ip0->length));
+
+ goto trace0;
+ }
+ else
+ {
+ p0 = 0;
+ pi0 = ~0;
+ };
+ if (im->fp_spd_ipv4_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd0->fp_spd.ip4_in_lookup_hash_idx))
+ {
+ tuples->action = IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD;
+ ipsec_fp_in_policy_match_n (&spd0->fp_spd, !ip_v6, tuples, policies, 1);
+ p0 = policies[0];
+ }
+ else
+
+ if (search_flow_cache)
+ {
+ p0 = ipsec4_input_spd_find_flow_cache_entry (
+ im, ip0->src_address.as_u32, ip0->dst_address.as_u32,
+ IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD);
+ }
+
+ else
+ {
+ p0 = ipsec_input_policy_match (
+ spd0, clib_net_to_host_u32 (ip0->src_address.as_u32),
+ clib_net_to_host_u32 (ip0->dst_address.as_u32),
+ IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD);
+ }
+
+ if (PREDICT_TRUE ((p0 != NULL)))
+ {
+ *ipsec_dropped += 1;
+
+ pi0 = p0 - im->policies;
+ vlib_increment_combined_counter (&ipsec_spd_policy_counters,
+ thread_index, pi0, 1,
+ clib_net_to_host_u16 (ip0->length));
+
+ next[0] = IPSEC_INPUT_NEXT_DROP;
+ goto trace0;
+ }
+ else
+ {
+ p0 = 0;
+ pi0 = ~0;
+ };
+ /* flow cache search failed, try again with linear search */
+ if (search_flow_cache && p0 == NULL)
+ {
+ search_flow_cache = false;
+ goto udp_or_esp;
+ }
+
+ /* Drop by default if no match on PROTECT, BYPASS or DISCARD */
+ *ipsec_unprocessed += 1;
+ next[0] = IPSEC_INPUT_NEXT_DROP;
+
+trace0:
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
+ PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ ipsec_input_trace_t *tr = vlib_add_trace (vm, node, b[0], sizeof (*tr));
+
+ tr->proto = ip0->protocol;
+ tr->sa_id = p0 ? p0->sa_id : ~0;
+ tr->spi = has_space0 ? clib_net_to_host_u32 (esp0->spi) : ~0;
+ tr->seq = has_space0 ? clib_net_to_host_u32 (esp0->seq) : ~0;
+ tr->spd = spd0->id;
+ tr->policy_index = pi0;
+ }
+}
+
always_inline ipsec_policy_t *
ipsec6_input_protect_policy_match (ipsec_spd_t * spd,
ip6_address_t * sa,
@@ -225,6 +531,7 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
ipsec_spd_t *spd0;
ipsec_policy_t *p0 = NULL;
u8 has_space0;
+ bool search_flow_cache = false;
if (n_left_from > 2)
{
@@ -240,29 +547,64 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
ip0 = vlib_buffer_get_current (b[0]);
- if (PREDICT_TRUE
- (ip0->protocol == IP_PROTOCOL_IPSEC_ESP
- || ip0->protocol == IP_PROTOCOL_UDP))
+ if (ip0->protocol == IP_PROTOCOL_UDP)
{
+ udp_header_t *udp0 = NULL;
+ udp0 = (udp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
+ /* As per rfc3948 in UDP Encapsulated Header, UDP checksum must be
+ * Zero, and receivers must not depen upon UPD checksum.
+ * inside ESP header , SPI ID value MUST NOT be a zero value
+ * */
+
+ if (udp0->checksum == 0)
+ {
+ esp0 = (esp_header_t *) ((u8 *) udp0 + sizeof (udp_header_t));
+
+ ipsec_esp_packet_process (vm, im, ip0, esp0, thread_index, spd0,
+ b, node, &ipsec_bypassed,
+ &ipsec_dropped, &ipsec_matched,
+ &ipsec_unprocessed, next);
+ if (ipsec_bypassed > 0)
+ goto ipsec_bypassed;
+ }
+ }
+ else if (PREDICT_TRUE (ip0->protocol == IP_PROTOCOL_IPSEC_ESP))
+ {
esp0 = (esp_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
- if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_UDP))
+ ipsec_esp_packet_process (vm, im, ip0, esp0, thread_index, spd0, b,
+ node, &ipsec_bypassed, &ipsec_dropped,
+ &ipsec_matched, &ipsec_unprocessed, next);
+ if (ipsec_bypassed > 0)
+ goto ipsec_bypassed;
+ }
+ else if (ip0->protocol == IP_PROTOCOL_IPSEC_AH)
+ {
+ ah0 = (ah_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
+
+ // if flow cache is enabled, first search through flow cache for a
+ // policy match and revert back to linear search on failure
+ search_flow_cache = im->input_flow_cache_flag;
+
+ ah:
+ if (search_flow_cache)
{
- /* FIXME Skip, if not a UDP encapsulated packet */
- esp0 = (esp_header_t *) ((u8 *) esp0 + sizeof (udp_header_t));
+ p0 = ipsec4_input_spd_find_flow_cache_entry (
+ im, ip0->src_address.as_u32, ip0->dst_address.as_u32,
+ IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT);
}
- p0 = ipsec_input_protect_policy_match (spd0,
- clib_net_to_host_u32
- (ip0->src_address.as_u32),
- clib_net_to_host_u32
- (ip0->dst_address.as_u32),
- clib_net_to_host_u32
- (esp0->spi));
+ else
+ {
+ p0 = ipsec_input_protect_policy_match (
+ spd0, clib_net_to_host_u32 (ip0->src_address.as_u32),
+ clib_net_to_host_u32 (ip0->dst_address.as_u32),
+ clib_net_to_host_u32 (ah0->spi));
+ }
has_space0 =
vlib_buffer_has_space (b[0],
- (clib_address_t) (esp0 + 1) -
+ (clib_address_t) (ah0 + 1) -
(clib_address_t) ip0);
if (PREDICT_TRUE ((p0 != NULL) & (has_space0)))
@@ -275,127 +617,72 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
thread_index, pi0, 1, clib_net_to_host_u16 (ip0->length));
vnet_buffer (b[0])->ipsec.sad_index = p0->sa_index;
- next[0] = im->esp4_decrypt_next_index;
- vlib_buffer_advance (b[0], ((u8 *) esp0 - (u8 *) ip0));
- goto trace0;
+ next[0] = im->ah4_decrypt_next_index;
+ goto trace1;
}
else
{
p0 = 0;
pi0 = ~0;
- };
+ }
- p0 = ipsec_input_policy_match (spd0,
- clib_net_to_host_u32
- (ip0->src_address.as_u32),
- clib_net_to_host_u32
- (ip0->dst_address.as_u32),
- IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS);
- if (PREDICT_TRUE ((p0 != NULL)))
+ if (search_flow_cache)
{
- ipsec_bypassed += 1;
-
- pi0 = p0 - im->policies;
- vlib_increment_combined_counter (
- &ipsec_spd_policy_counters, thread_index, pi0, 1,
- clib_net_to_host_u16 (ip0->length));
-
- goto trace0;
+ p0 = ipsec4_input_spd_find_flow_cache_entry (
+ im, ip0->src_address.as_u32, ip0->dst_address.as_u32,
+ IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS);
}
+
else
{
- p0 = 0;
- pi0 = ~0;
- };
+ p0 = ipsec_input_policy_match (
+ spd0, clib_net_to_host_u32 (ip0->src_address.as_u32),
+ clib_net_to_host_u32 (ip0->dst_address.as_u32),
+ IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS);
+ }
- p0 = ipsec_input_policy_match (spd0,
- clib_net_to_host_u32
- (ip0->src_address.as_u32),
- clib_net_to_host_u32
- (ip0->dst_address.as_u32),
- IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD);
if (PREDICT_TRUE ((p0 != NULL)))
{
- ipsec_dropped += 1;
+ ipsec_bypassed += 1;
pi0 = p0 - im->policies;
vlib_increment_combined_counter (
&ipsec_spd_policy_counters, thread_index, pi0, 1,
clib_net_to_host_u16 (ip0->length));
- next[0] = IPSEC_INPUT_NEXT_DROP;
- goto trace0;
+ goto trace1;
}
else
{
p0 = 0;
pi0 = ~0;
};
- trace0:
- if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
- PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- ipsec_input_trace_t *tr =
- vlib_add_trace (vm, node, b[0], sizeof (*tr));
-
- tr->proto = ip0->protocol;
- tr->sa_id = p0 ? p0->sa_id : ~0;
- tr->spi = has_space0 ? clib_net_to_host_u32 (esp0->spi) : ~0;
- tr->seq = has_space0 ? clib_net_to_host_u32 (esp0->seq) : ~0;
- tr->spd = spd0->id;
- tr->policy_index = pi0;
- }
- }
- else if (ip0->protocol == IP_PROTOCOL_IPSEC_AH)
- {
- ah0 = (ah_header_t *) ((u8 *) ip0 + ip4_header_bytes (ip0));
- p0 = ipsec_input_protect_policy_match (spd0,
- clib_net_to_host_u32
- (ip0->src_address.as_u32),
- clib_net_to_host_u32
- (ip0->dst_address.as_u32),
- clib_net_to_host_u32
- (ah0->spi));
-
- has_space0 =
- vlib_buffer_has_space (b[0],
- (clib_address_t) (ah0 + 1) -
- (clib_address_t) ip0);
- if (PREDICT_TRUE ((p0 != NULL) & (has_space0)))
+ if (search_flow_cache)
{
- ipsec_matched += 1;
-
- pi0 = p0 - im->policies;
- vlib_increment_combined_counter
- (&ipsec_spd_policy_counters,
- thread_index, pi0, 1, clib_net_to_host_u16 (ip0->length));
-
- vnet_buffer (b[0])->ipsec.sad_index = p0->sa_index;
- next[0] = im->ah4_decrypt_next_index;
- goto trace1;
+ p0 = ipsec4_input_spd_find_flow_cache_entry (
+ im, ip0->src_address.as_u32, ip0->dst_address.as_u32,
+ IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD);
}
+
else
{
- p0 = 0;
- pi0 = ~0;
+ p0 = ipsec_input_policy_match (
+ spd0, clib_net_to_host_u32 (ip0->src_address.as_u32),
+ clib_net_to_host_u32 (ip0->dst_address.as_u32),
+ IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD);
}
- p0 = ipsec_input_policy_match (spd0,
- clib_net_to_host_u32
- (ip0->src_address.as_u32),
- clib_net_to_host_u32
- (ip0->dst_address.as_u32),
- IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS);
if (PREDICT_TRUE ((p0 != NULL)))
{
- ipsec_bypassed += 1;
+ ipsec_dropped += 1;
pi0 = p0 - im->policies;
vlib_increment_combined_counter (
&ipsec_spd_policy_counters, thread_index, pi0, 1,
clib_net_to_host_u16 (ip0->length));
+ next[0] = IPSEC_INPUT_NEXT_DROP;
goto trace1;
}
else
@@ -404,29 +691,17 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
pi0 = ~0;
};
- p0 = ipsec_input_policy_match (spd0,
- clib_net_to_host_u32
- (ip0->src_address.as_u32),
- clib_net_to_host_u32
- (ip0->dst_address.as_u32),
- IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD);
- if (PREDICT_TRUE ((p0 != NULL)))
+ // flow cache search failed, retry with linear search
+ if (search_flow_cache && p0 == NULL)
{
- ipsec_dropped += 1;
+ search_flow_cache = false;
+ goto ah;
+ }
- pi0 = p0 - im->policies;
- vlib_increment_combined_counter (
- &ipsec_spd_policy_counters, thread_index, pi0, 1,
- clib_net_to_host_u16 (ip0->length));
+ /* Drop by default if no match on PROTECT, BYPASS or DISCARD */
+ ipsec_unprocessed += 1;
+ next[0] = IPSEC_INPUT_NEXT_DROP;
- next[0] = IPSEC_INPUT_NEXT_DROP;
- goto trace1;
- }
- else
- {
- p0 = 0;
- pi0 = ~0;
- };
trace1:
if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
@@ -444,6 +719,7 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
}
else
{
+ ipsec_bypassed:
ipsec_unprocessed += 1;
}
n_left_from -= 1;
@@ -475,8 +751,6 @@ VLIB_NODE_FN (ipsec4_input_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ipsec4_input_node) = {
.name = "ipsec4-input-feature",
.vector_size = sizeof (u32),
@@ -491,7 +765,6 @@ VLIB_REGISTER_NODE (ipsec4_input_node) = {
#undef _
},
};
-/* *INDENT-ON* */
extern vlib_node_registration_t ipsec6_input_node;
@@ -504,6 +777,9 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
ipsec_main_t *im = &ipsec_main;
u32 ipsec_unprocessed = 0;
u32 ipsec_matched = 0;
+ ipsec_policy_t *policies[1];
+ ipsec_fp_5tuple_t tuples[1];
+ bool ip_v6 = true;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
@@ -519,7 +795,7 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
while (n_left_from > 0 && n_left_to_next > 0)
{
- u32 bi0, next0, pi0;
+ u32 bi0, next0, pi0 = ~0;
vlib_buffer_t *b0;
ip6_header_t *ip0;
esp_header_t *esp0;
@@ -556,11 +832,22 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
clib_net_to_host_u16 (ip0->payload_length) + header_size,
spd0->id);
#endif
- p0 = ipsec6_input_protect_policy_match (spd0,
- &ip0->src_address,
- &ip0->dst_address,
- clib_net_to_host_u32
- (esp0->spi));
+ if (im->fp_spd_ipv6_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID !=
+ spd0->fp_spd.ip6_in_lookup_hash_idx))
+ {
+ ipsec_fp_in_5tuple_from_ip6_range (
+ &tuples[0], &ip0->src_address, &ip0->dst_address,
+ clib_net_to_host_u32 (esp0->spi),
+ IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT);
+ ipsec_fp_in_policy_match_n (&spd0->fp_spd, ip_v6, tuples,
+ policies, 1);
+ p0 = policies[0];
+ }
+ else
+ p0 = ipsec6_input_protect_policy_match (
+ spd0, &ip0->src_address, &ip0->dst_address,
+ clib_net_to_host_u32 (esp0->spi));
if (PREDICT_TRUE (p0 != 0))
{
@@ -576,11 +863,15 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
vnet_buffer (b0)->ipsec.sad_index = p0->sa_index;
next0 = im->esp6_decrypt_next_index;
vlib_buffer_advance (b0, header_size);
+ /* TODO Add policy matching for bypass and discard policy
+ * type */
goto trace0;
}
else
{
pi0 = ~0;
+ ipsec_unprocessed += 1;
+ next0 = IPSEC_INPUT_NEXT_DROP;
}
}
else if (ip0->protocol == IP_PROTOCOL_IPSEC_AH)
@@ -608,6 +899,8 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
else
{
pi0 = ~0;
+ ipsec_unprocessed += 1;
+ next0 = IPSEC_INPUT_NEXT_DROP;
}
}
else
@@ -623,11 +916,16 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
vlib_add_trace (vm, node, b0, sizeof (*tr));
if (p0)
- tr->sa_id = p0->sa_id;
+ {
+ tr->sa_id = p0->sa_id;
+ tr->policy_type = p0->type;
+ }
+
tr->proto = ip0->protocol;
tr->spi = clib_net_to_host_u32 (esp0->spi);
tr->seq = clib_net_to_host_u32 (esp0->seq);
tr->spd = spd0->id;
+ tr->policy_index = pi0;
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
@@ -648,7 +946,6 @@ VLIB_NODE_FN (ipsec6_input_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ipsec6_input_node) = {
.name = "ipsec6-input-feature",
.vector_size = sizeof (u32),
@@ -663,7 +960,6 @@ VLIB_REGISTER_NODE (ipsec6_input_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipsec/ipsec_itf.c b/src/vnet/ipsec/ipsec_itf.c
index 532d5be4c07..b86bf6a110c 100644
--- a/src/vnet/ipsec/ipsec_itf.c
+++ b/src/vnet/ipsec/ipsec_itf.c
@@ -21,6 +21,7 @@
#include <vnet/ipsec/ipsec.h>
#include <vnet/adj/adj_midchain.h>
#include <vnet/ethernet/mac_address.h>
+#include <vnet/mpls/mpls.h>
/* bitmap of Allocated IPSEC_ITF instances */
static uword *ipsec_itf_instances;
@@ -36,6 +37,12 @@ ipsec_itf_get (index_t ii)
return (pool_elt_at_index (ipsec_itf_pool, ii));
}
+u32
+ipsec_itf_count (void)
+{
+ return (pool_elts (ipsec_itf_pool));
+}
+
static ipsec_itf_t *
ipsec_itf_find_by_sw_if_index (u32 sw_if_index)
{
@@ -181,7 +188,6 @@ ipsec_itf_update_adj (vnet_main_t * vnm, u32 sw_if_index, adj_index_t ai)
(ai, NULL, NULL, ADJ_FLAG_MIDCHAIN_IP_STACK, ipsec_itf_build_rewrite ());
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (ipsec_itf_device_class) = {
.name = "IPSEC Tunnel",
.format_device_name = format_ipsec_itf_name,
@@ -201,7 +207,6 @@ VNET_HW_INTERFACE_CLASS(ipsec_p2mp_hw_interface_class) = {
.update_adjacency = ipsec_itf_update_adj,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_NBMA,
};
-/* *INDENT-ON* */
/*
* Maintain a bitmap of allocated ipsec_itf instance numbers.
@@ -268,6 +273,20 @@ ipsec_itf_instance_free (u32 instance)
return 0;
}
+void
+ipsec_itf_reset_tx_nodes (u32 sw_if_index)
+{
+ vnet_feature_modify_end_node (
+ ip4_main.lookup_main.output_feature_arc_index, sw_if_index,
+ vlib_get_node_by_name (vlib_get_main (), (u8 *) "ip4-drop")->index);
+ vnet_feature_modify_end_node (
+ ip6_main.lookup_main.output_feature_arc_index, sw_if_index,
+ vlib_get_node_by_name (vlib_get_main (), (u8 *) "ip6-drop")->index);
+ vnet_feature_modify_end_node (
+ mpls_main.output_feature_arc_index, sw_if_index,
+ vlib_get_node_by_name (vlib_get_main (), (u8 *) "mpls-drop")->index);
+}
+
int
ipsec_itf_create (u32 user_instance, tunnel_mode_t mode, u32 * sw_if_indexp)
{
@@ -312,6 +331,7 @@ ipsec_itf_create (u32 user_instance, tunnel_mode_t mode, u32 * sw_if_indexp)
ipsec_itf_index_by_sw_if_index[hi->sw_if_index] = t_idx;
ipsec_itf->ii_sw_if_index = *sw_if_indexp = hi->sw_if_index;
+ ipsec_itf_reset_tx_nodes (hi->sw_if_index);
return 0;
}
@@ -336,6 +356,8 @@ ipsec_itf_delete (u32 sw_if_index)
if (ipsec_itf_instance_free (hw->dev_instance) < 0)
return VNET_API_ERROR_INVALID_SW_IF_INDEX;
+ vnet_reset_interface_l3_output_node (vnm->vlib_main, sw_if_index);
+
vnet_delete_hw_interface (vnm, hw->hw_if_index);
pool_put (ipsec_itf_pool, ipsec_itf);
@@ -359,6 +381,7 @@ ipsec_itf_create_cli (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
+ tunnel_mode_t mode = TUNNEL_MODE_P2P;
u32 instance, sw_if_index;
clib_error_t *error;
mac_address_t mac;
@@ -374,6 +397,8 @@ ipsec_itf_create_cli (vlib_main_t * vm,
{
if (unformat (line_input, "instance %d", &instance))
;
+ else if (unformat (line_input, "p2mp"))
+ mode = TUNNEL_MODE_MP;
else
{
error = clib_error_return (0, "unknown input: %U",
@@ -388,7 +413,7 @@ ipsec_itf_create_cli (vlib_main_t * vm,
return error;
}
- rv = ipsec_itf_create (instance, TUNNEL_MODE_P2P, &sw_if_index);
+ rv = ipsec_itf_create (instance, mode, &sw_if_index);
if (rv)
return clib_error_return (0, "iPSec interface create failed");
@@ -403,17 +428,15 @@ ipsec_itf_create_cli (vlib_main_t * vm,
*
* @cliexpar
* The following two command syntaxes are equivalent:
- * @cliexcmd{ipsec itf create [instance <instance>]}
+ * @cliexcmd{ipsec itf create [instance <instance>] [p2mp]}
* Example of how to create a ipsec interface:
* @cliexcmd{ipsec itf create}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_itf_create_command, static) = {
.path = "ipsec itf create",
- .short_help = "ipsec itf create [instance <instance>]",
+ .short_help = "ipsec itf create [instance <instance>] [p2mp]",
.function = ipsec_itf_create_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
ipsec_itf_delete_cli (vlib_main_t * vm,
@@ -458,13 +481,11 @@ ipsec_itf_delete_cli (vlib_main_t * vm,
* Example of how to create a ipsec_itf interface:
* @cliexcmd{ipsec itf delete ipsec0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_itf_delete_command, static) = {
.path = "ipsec itf delete",
.short_help = "ipsec itf delete <interface>",
.function = ipsec_itf_delete_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
ipsec_interface_show (vlib_main_t * vm,
@@ -472,12 +493,10 @@ ipsec_interface_show (vlib_main_t * vm,
{
index_t ii;
- /* *INDENT-OFF* */
pool_foreach_index (ii, ipsec_itf_pool)
{
vlib_cli_output (vm, "%U", format_ipsec_itf, ii);
}
- /* *INDENT-ON* */
return NULL;
}
@@ -485,14 +504,12 @@ ipsec_interface_show (vlib_main_t * vm,
/**
* show IPSEC tunnel protection hash tables
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (ipsec_interface_show_node, static) =
{
.path = "show ipsec interface",
.function = ipsec_interface_show,
.short_help = "show ipsec interface",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipsec/ipsec_itf.h b/src/vnet/ipsec/ipsec_itf.h
index 4958d102b65..bf13096ed8f 100644
--- a/src/vnet/ipsec/ipsec_itf.h
+++ b/src/vnet/ipsec/ipsec_itf.h
@@ -102,6 +102,7 @@ typedef struct ipsec_itf_t_
extern int ipsec_itf_create (u32 user_instance,
tunnel_mode_t mode, u32 * sw_if_indexp);
extern int ipsec_itf_delete (u32 sw_if_index);
+extern void ipsec_itf_reset_tx_nodes (u32 sw_if_index);
extern void ipsec_itf_adj_stack (adj_index_t ai, u32 sai);
extern void ipsec_itf_adj_unstack (adj_index_t ai);
@@ -109,6 +110,7 @@ extern void ipsec_itf_adj_unstack (adj_index_t ai);
extern u8 *format_ipsec_itf (u8 * s, va_list * a);
extern ipsec_itf_t *ipsec_itf_get (index_t ii);
+extern u32 ipsec_itf_count (void);
typedef walk_rc_t (*ipsec_itf_walk_cb_t) (ipsec_itf_t *itf, void *ctx);
extern void ipsec_itf_walk (ipsec_itf_walk_cb_t cd, void *ctx);
diff --git a/src/vnet/ipsec/ipsec_output.c b/src/vnet/ipsec/ipsec_output.c
index 8fb9566fa38..787da9359e0 100644
--- a/src/vnet/ipsec/ipsec_output.c
+++ b/src/vnet/ipsec/ipsec_output.c
@@ -21,6 +21,7 @@
#include <vnet/ipsec/ipsec.h>
#include <vnet/ipsec/ipsec_io.h>
+#include <vnet/ipsec/ipsec_output.h>
#define foreach_ipsec_output_error \
_(RX_PKTS, "IPSec pkts received") \
@@ -63,114 +64,6 @@ format_ipsec_output_trace (u8 * s, va_list * args)
return s;
}
-always_inline ipsec_policy_t *
-ipsec_output_policy_match (ipsec_spd_t * spd, u8 pr, u32 la, u32 ra, u16 lp,
- u16 rp)
-{
- ipsec_main_t *im = &ipsec_main;
- ipsec_policy_t *p;
- u32 *i;
-
- if (!spd)
- return 0;
-
- vec_foreach (i, spd->policies[IPSEC_SPD_POLICY_IP4_OUTBOUND])
- {
- p = pool_elt_at_index (im->policies, *i);
- if (PREDICT_FALSE (p->protocol && (p->protocol != pr)))
- continue;
-
- if (ra < clib_net_to_host_u32 (p->raddr.start.ip4.as_u32))
- continue;
-
- if (ra > clib_net_to_host_u32 (p->raddr.stop.ip4.as_u32))
- continue;
-
- if (la < clib_net_to_host_u32 (p->laddr.start.ip4.as_u32))
- continue;
-
- if (la > clib_net_to_host_u32 (p->laddr.stop.ip4.as_u32))
- continue;
-
- if (PREDICT_FALSE
- ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP)
- && (pr != IP_PROTOCOL_SCTP)))
- return p;
-
- if (lp < p->lport.start)
- continue;
-
- if (lp > p->lport.stop)
- continue;
-
- if (rp < p->rport.start)
- continue;
-
- if (rp > p->rport.stop)
- continue;
-
- return p;
- }
- return 0;
-}
-
-always_inline uword
-ip6_addr_match_range (ip6_address_t * a, ip6_address_t * la,
- ip6_address_t * ua)
-{
- if ((memcmp (a->as_u64, la->as_u64, 2 * sizeof (u64)) >= 0) &&
- (memcmp (a->as_u64, ua->as_u64, 2 * sizeof (u64)) <= 0))
- return 1;
- return 0;
-}
-
-always_inline ipsec_policy_t *
-ipsec6_output_policy_match (ipsec_spd_t * spd,
- ip6_address_t * la,
- ip6_address_t * ra, u16 lp, u16 rp, u8 pr)
-{
- ipsec_main_t *im = &ipsec_main;
- ipsec_policy_t *p;
- u32 *i;
-
- if (!spd)
- return 0;
-
- vec_foreach (i, spd->policies[IPSEC_SPD_POLICY_IP6_OUTBOUND])
- {
- p = pool_elt_at_index (im->policies, *i);
- if (PREDICT_FALSE (p->protocol && (p->protocol != pr)))
- continue;
-
- if (!ip6_addr_match_range (ra, &p->raddr.start.ip6, &p->raddr.stop.ip6))
- continue;
-
- if (!ip6_addr_match_range (la, &p->laddr.start.ip6, &p->laddr.stop.ip6))
- continue;
-
- if (PREDICT_FALSE
- ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP)
- && (pr != IP_PROTOCOL_SCTP)))
- return p;
-
- if (lp < p->lport.start)
- continue;
-
- if (lp > p->lport.stop)
- continue;
-
- if (rp < p->rport.start)
- continue;
-
- if (rp > p->rport.stop)
- continue;
-
- return p;
- }
-
- return 0;
-}
-
static inline uword
ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * from_frame, int is_ipv6)
@@ -185,6 +78,7 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
ipsec_spd_t *spd0 = 0;
int bogus;
u64 nc_protect = 0, nc_bypass = 0, nc_discard = 0, nc_nomatch = 0;
+ u8 flow_cache_enabled = im->output_flow_cache_flag;
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
@@ -194,7 +88,7 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
{
u32 bi0, pi0, bi1;
vlib_buffer_t *b0, *b1;
- ipsec_policy_t *p0;
+ ipsec_policy_t *p0 = NULL;
ip4_header_t *ip0;
ip6_header_t *ip6_0 = 0;
udp_header_t *udp0;
@@ -262,15 +156,26 @@ ipsec_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
sw_if_index0, spd_index0, spd0->id);
#endif
- p0 = ipsec_output_policy_match (spd0, ip0->protocol,
- clib_net_to_host_u32
- (ip0->src_address.as_u32),
- clib_net_to_host_u32
- (ip0->dst_address.as_u32),
- clib_net_to_host_u16
- (udp0->src_port),
- clib_net_to_host_u16
- (udp0->dst_port));
+ /*
+ * Check whether flow cache is enabled.
+ */
+ if (flow_cache_enabled)
+ {
+ p0 = ipsec4_out_spd_find_flow_cache_entry (
+ im, ip0->protocol, ip0->src_address.as_u32,
+ ip0->dst_address.as_u32, udp0->src_port, udp0->dst_port);
+ }
+
+ /* Fall back to linear search if flow cache lookup fails */
+ if (p0 == NULL)
+ {
+ p0 = ipsec_output_policy_match (
+ spd0, ip0->protocol,
+ clib_net_to_host_u32 (ip0->src_address.as_u32),
+ clib_net_to_host_u32 (ip0->dst_address.as_u32),
+ clib_net_to_host_u16 (udp0->src_port),
+ clib_net_to_host_u16 (udp0->dst_port), flow_cache_enabled);
+ }
}
tcp0 = (void *) udp0;
@@ -430,7 +335,6 @@ VLIB_NODE_FN (ipsec4_output_node) (vlib_main_t * vm,
return ipsec_output_inline (vm, node, frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ipsec4_output_node) = {
.name = "ipsec4-output-feature",
.vector_size = sizeof (u32),
@@ -447,7 +351,6 @@ VLIB_REGISTER_NODE (ipsec4_output_node) = {
#undef _
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ipsec6_output_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
diff --git a/src/vnet/ipsec/ipsec_output.h b/src/vnet/ipsec/ipsec_output.h
new file mode 100644
index 00000000000..30f4ebedeb7
--- /dev/null
+++ b/src/vnet/ipsec/ipsec_output.h
@@ -0,0 +1,489 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2021 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef IPSEC_OUTPUT_H
+#define IPSEC_OUTPUT_H
+
+#include <vppinfra/types.h>
+#include <vnet/ipsec/ipsec_spd.h>
+#include <vnet/ipsec/ipsec_spd_fp_lookup.h>
+
+always_inline void
+ipsec4_out_spd_add_flow_cache_entry (ipsec_main_t *im, u8 pr, u32 la, u32 ra,
+ u16 lp, u16 rp, u32 pol_id)
+{
+ u64 hash;
+ u8 overwrite = 0, stale_overwrite = 0;
+ ipsec4_spd_5tuple_t ip4_5tuple = { .ip4_addr = { (ip4_address_t) la,
+ (ip4_address_t) ra },
+ .port = { lp, rp },
+ .proto = pr };
+
+ ip4_5tuple.kv_16_8.value = (((u64) pol_id) << 32) | ((u64) im->epoch_count);
+
+ hash = ipsec4_hash_16_8 (&ip4_5tuple.kv_16_8);
+ hash &= (im->ipsec4_out_spd_hash_num_buckets - 1);
+
+ ipsec_spinlock_lock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
+ /* Check if we are overwriting an existing entry so we know
+ whether to increment the flow cache counter. Since flow
+ cache counter is reset on any policy add/remove, but
+ hash table values are not, we also need to check if the entry
+ we are overwriting is stale or not. If it's a stale entry
+ overwrite, we still want to increment flow cache counter */
+ overwrite = (im->ipsec4_out_spd_hash_tbl[hash].value != 0);
+ /* Check for stale entry by comparing with current epoch count */
+ if (PREDICT_FALSE (overwrite))
+ stale_overwrite =
+ (im->epoch_count !=
+ ((u32) (im->ipsec4_out_spd_hash_tbl[hash].value & 0xFFFFFFFF)));
+ clib_memcpy_fast (&im->ipsec4_out_spd_hash_tbl[hash], &ip4_5tuple.kv_16_8,
+ sizeof (ip4_5tuple.kv_16_8));
+ ipsec_spinlock_unlock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
+
+ /* Increment the counter to track active flow cache entries
+ when entering a fresh entry or overwriting a stale one */
+ if (!overwrite || stale_overwrite)
+ clib_atomic_fetch_add_relax (&im->ipsec4_out_spd_flow_cache_entries, 1);
+
+ return;
+}
+
+always_inline void
+ipsec4_out_spd_add_flow_cache_entry_n (ipsec_main_t *im,
+ ipsec4_spd_5tuple_t *ip4_5tuple,
+ u32 pol_id)
+{
+ u64 hash;
+ u8 overwrite = 0, stale_overwrite = 0;
+
+ ip4_5tuple->kv_16_8.value = (((u64) pol_id) << 32) | ((u64) im->epoch_count);
+
+ hash = ipsec4_hash_16_8 (&ip4_5tuple->kv_16_8);
+ hash &= (im->ipsec4_out_spd_hash_num_buckets - 1);
+
+ ipsec_spinlock_lock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
+ /* Check if we are overwriting an existing entry so we know
+ whether to increment the flow cache counter. Since flow
+ cache counter is reset on any policy add/remove, but
+ hash table values are not, we also need to check if the entry
+ we are overwriting is stale or not. If it's a stale entry
+ overwrite, we still want to increment flow cache counter */
+ overwrite = (im->ipsec4_out_spd_hash_tbl[hash].value != 0);
+ /* Check for stale entry by comparing with current epoch count */
+ if (PREDICT_FALSE (overwrite))
+ stale_overwrite =
+ (im->epoch_count !=
+ ((u32) (im->ipsec4_out_spd_hash_tbl[hash].value & 0xFFFFFFFF)));
+ clib_memcpy_fast (&im->ipsec4_out_spd_hash_tbl[hash], &ip4_5tuple->kv_16_8,
+ sizeof (ip4_5tuple->kv_16_8));
+ ipsec_spinlock_unlock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
+
+ /* Increment the counter to track active flow cache entries
+ when entering a fresh entry or overwriting a stale one */
+ if (!overwrite || stale_overwrite)
+ clib_atomic_fetch_add_relax (&im->ipsec4_out_spd_flow_cache_entries, 1);
+
+ return;
+}
+
+always_inline void
+ipsec_fp_5tuple_from_ip4_range (ipsec_fp_5tuple_t *tuple, u32 la, u32 ra,
+ u16 lp, u16 rp, u8 pr)
+{
+ clib_memset (tuple->l3_zero_pad, 0, sizeof (tuple->l3_zero_pad));
+ tuple->laddr.as_u32 = clib_host_to_net_u32 (la);
+ tuple->raddr.as_u32 = clib_host_to_net_u32 (ra);
+
+ if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP) &&
+ (pr != IP_PROTOCOL_SCTP)))
+ {
+ tuple->lport = 0;
+ tuple->rport = 0;
+ }
+ else
+ {
+ tuple->lport = lp;
+ tuple->rport = rp;
+ }
+
+ tuple->protocol = pr;
+ tuple->is_ipv6 = 0;
+}
+
+always_inline void
+ipsec_fp_5tuple_from_ip4_range_n (ipsec_fp_5tuple_t *tuples,
+ ipsec4_spd_5tuple_t *ip4_5tuple, u32 n)
+{
+ u32 n_left = n;
+ ipsec_fp_5tuple_t *tuple = tuples;
+
+ while (n_left)
+ {
+ clib_memset (tuple->l3_zero_pad, 0, sizeof (tuple->l3_zero_pad));
+ tuple->laddr.as_u32 =
+ clib_host_to_net_u32 (ip4_5tuple->ip4_addr[0].as_u32);
+ tuple->raddr.as_u32 =
+ clib_host_to_net_u32 (ip4_5tuple->ip4_addr[1].as_u32);
+ if (PREDICT_FALSE ((ip4_5tuple->proto != IP_PROTOCOL_TCP) &&
+ (ip4_5tuple->proto != IP_PROTOCOL_UDP) &&
+ (ip4_5tuple->proto != IP_PROTOCOL_SCTP)))
+ {
+ tuple->lport = 0;
+ tuple->rport = 0;
+ }
+ else
+ {
+ tuple->lport = ip4_5tuple->port[0];
+ tuple->rport = ip4_5tuple->port[1];
+ }
+ tuple->protocol = ip4_5tuple->proto;
+ tuple->is_ipv6 = 0;
+ n_left--;
+ tuple++;
+ }
+}
+
+always_inline int
+ipsec_output_policy_match_n (ipsec_spd_t *spd,
+ ipsec4_spd_5tuple_t *ip4_5tuples,
+ ipsec_policy_t **policies, u32 n,
+ u8 flow_cache_enabled)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *p;
+ ipsec_policy_t **pp = policies;
+ u32 n_left = n;
+ ipsec4_spd_5tuple_t *ip4_5tuple = ip4_5tuples;
+ u32 policy_ids[n], *policy_id = policy_ids;
+ ipsec_fp_5tuple_t tuples[n];
+ u32 *i;
+ u32 counter = 0;
+
+ if (!spd)
+ return 0;
+
+ clib_memset (policies, 0, n * sizeof (ipsec_policy_t *));
+
+ if (im->fp_spd_ipv4_out_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd->fp_spd.ip4_out_lookup_hash_idx))
+ {
+ ipsec_fp_5tuple_from_ip4_range_n (tuples, ip4_5tuples, n);
+ counter += ipsec_fp_out_policy_match_n (&spd->fp_spd, 0, tuples,
+ policies, policy_ids, n);
+ }
+
+ while (n_left)
+ {
+ if (*pp != 0)
+ goto next;
+
+ vec_foreach (i, spd->policies[IPSEC_SPD_POLICY_IP4_OUTBOUND])
+ {
+ p = pool_elt_at_index (im->policies, *i);
+ if (PREDICT_FALSE (p->protocol &&
+ (p->protocol != ip4_5tuple->proto)))
+ continue;
+
+ if (ip4_5tuple->ip4_addr[0].as_u32 <
+ clib_net_to_host_u32 (p->raddr.start.ip4.as_u32))
+ continue;
+
+ if (ip4_5tuple->ip4_addr[1].as_u32 >
+ clib_net_to_host_u32 (p->raddr.stop.ip4.as_u32))
+ continue;
+
+ if (ip4_5tuple->ip4_addr[0].as_u32 <
+ clib_net_to_host_u32 (p->laddr.start.ip4.as_u32))
+ continue;
+
+ if (ip4_5tuple->ip4_addr[1].as_u32 >
+ clib_net_to_host_u32 (p->laddr.stop.ip4.as_u32))
+ continue;
+
+ if (PREDICT_FALSE ((ip4_5tuple->proto != IP_PROTOCOL_TCP) &&
+ (ip4_5tuple->proto != IP_PROTOCOL_UDP) &&
+ (ip4_5tuple->proto != IP_PROTOCOL_SCTP)))
+ {
+ ip4_5tuple->port[0] = 0;
+ ip4_5tuple->port[1] = 0;
+ goto add_policy;
+ }
+
+ if (ip4_5tuple->port[0] < p->lport.start)
+ continue;
+
+ if (ip4_5tuple->port[0] > p->lport.stop)
+ continue;
+
+ if (ip4_5tuple->port[1] < p->rport.start)
+ continue;
+
+ if (ip4_5tuple->port[1] > p->rport.stop)
+ continue;
+
+ add_policy:
+ *pp = p;
+ *policy_id = *i;
+ counter++;
+ break;
+ }
+
+ next:
+ n_left--;
+ pp++;
+ ip4_5tuple++;
+ policy_id++;
+ }
+
+ if (flow_cache_enabled)
+ {
+ n_left = n;
+ policy_id = policy_ids;
+ ip4_5tuple = ip4_5tuples;
+ pp = policies;
+
+ while (n_left)
+ {
+ if (*pp != NULL)
+ {
+ /* Add an Entry in Flow cache */
+ ipsec4_out_spd_add_flow_cache_entry_n (im, ip4_5tuple,
+ *policy_id);
+ }
+
+ n_left--;
+ policy_id++;
+ ip4_5tuple++;
+ pp++;
+ }
+ }
+
+ return counter;
+}
+
+always_inline ipsec_policy_t *
+ipsec4_out_spd_find_flow_cache_entry (ipsec_main_t *im, u8 pr, u32 la, u32 ra,
+ u16 lp, u16 rp)
+{
+ ipsec_policy_t *p = NULL;
+ ipsec4_hash_kv_16_8_t kv_result;
+ u64 hash;
+
+ if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP) &&
+ (pr != IP_PROTOCOL_SCTP)))
+ {
+ lp = 0;
+ rp = 0;
+ }
+ ipsec4_spd_5tuple_t ip4_5tuple = { .ip4_addr = { (ip4_address_t) la,
+ (ip4_address_t) ra },
+ .port = { lp, rp },
+ .proto = pr };
+
+ hash = ipsec4_hash_16_8 (&ip4_5tuple.kv_16_8);
+ hash &= (im->ipsec4_out_spd_hash_num_buckets - 1);
+
+ ipsec_spinlock_lock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
+ kv_result = im->ipsec4_out_spd_hash_tbl[hash];
+ ipsec_spinlock_unlock (&im->ipsec4_out_spd_hash_tbl[hash].bucket_lock);
+
+ if (ipsec4_hash_key_compare_16_8 ((u64 *) &ip4_5tuple.kv_16_8,
+ (u64 *) &kv_result))
+ {
+ if (im->epoch_count == ((u32) (kv_result.value & 0xFFFFFFFF)))
+ {
+ /* Get the policy based on the index */
+ p =
+ pool_elt_at_index (im->policies, ((u32) (kv_result.value >> 32)));
+ }
+ }
+
+ return p;
+}
+
+always_inline ipsec_policy_t *
+ipsec_output_policy_match (ipsec_spd_t *spd, u8 pr, u32 la, u32 ra, u16 lp,
+ u16 rp, u8 flow_cache_enabled)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *p;
+ ipsec_policy_t *policies[1];
+ ipsec_fp_5tuple_t tuples[1];
+ u32 fp_policy_ids[1];
+
+ u32 *i;
+
+ if (!spd)
+ return 0;
+
+ if (im->fp_spd_ipv4_out_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd->fp_spd.ip4_out_lookup_hash_idx))
+ {
+ ipsec_fp_5tuple_from_ip4_range (&tuples[0], la, ra, lp, rp, pr);
+ ipsec_fp_out_policy_match_n (&spd->fp_spd, 0, tuples, policies,
+ fp_policy_ids, 1);
+ p = policies[0];
+ i = fp_policy_ids;
+ if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP) &&
+ (pr != IP_PROTOCOL_SCTP)))
+ {
+ lp = 0;
+ rp = 0;
+ }
+ goto add_flow_cache;
+ }
+
+ vec_foreach (i, spd->policies[IPSEC_SPD_POLICY_IP4_OUTBOUND])
+ {
+ p = pool_elt_at_index (im->policies, *i);
+ if (PREDICT_FALSE ((p->protocol != IPSEC_POLICY_PROTOCOL_ANY) &&
+ (p->protocol != pr)))
+ continue;
+
+ if (ra < clib_net_to_host_u32 (p->raddr.start.ip4.as_u32))
+ continue;
+
+ if (ra > clib_net_to_host_u32 (p->raddr.stop.ip4.as_u32))
+ continue;
+
+ if (la < clib_net_to_host_u32 (p->laddr.start.ip4.as_u32))
+ continue;
+
+ if (la > clib_net_to_host_u32 (p->laddr.stop.ip4.as_u32))
+ continue;
+
+ if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP) &&
+ (pr != IP_PROTOCOL_SCTP)))
+ {
+ lp = 0;
+ rp = 0;
+ goto add_flow_cache;
+ }
+
+ if (lp < p->lport.start)
+ continue;
+
+ if (lp > p->lport.stop)
+ continue;
+
+ if (rp < p->rport.start)
+ continue;
+
+ if (rp > p->rport.stop)
+ continue;
+
+ add_flow_cache:
+ if (flow_cache_enabled)
+ {
+ /* Add an Entry in Flow cache */
+ ipsec4_out_spd_add_flow_cache_entry (
+ im, pr, clib_host_to_net_u32 (la), clib_host_to_net_u32 (ra),
+ clib_host_to_net_u16 (lp), clib_host_to_net_u16 (rp), *i);
+ }
+
+ return p;
+ }
+ return 0;
+}
+
+always_inline uword
+ip6_addr_match_range (ip6_address_t *a, ip6_address_t *la, ip6_address_t *ua)
+{
+ if ((memcmp (a->as_u64, la->as_u64, 2 * sizeof (u64)) >= 0) &&
+ (memcmp (a->as_u64, ua->as_u64, 2 * sizeof (u64)) <= 0))
+ return 1;
+ return 0;
+}
+
+always_inline void
+ipsec_fp_5tuple_from_ip6_range (ipsec_fp_5tuple_t *tuple, ip6_address_t *la,
+ ip6_address_t *ra, u16 lp, u16 rp, u8 pr)
+
+{
+ clib_memcpy (&tuple->ip6_laddr, la, sizeof (ip6_address_t));
+ clib_memcpy (&tuple->ip6_raddr, ra, sizeof (ip6_address_t));
+
+ tuple->lport = lp;
+ tuple->rport = rp;
+ tuple->protocol = pr;
+ tuple->is_ipv6 = 1;
+}
+
+always_inline ipsec_policy_t *
+ipsec6_output_policy_match (ipsec_spd_t *spd, ip6_address_t *la,
+ ip6_address_t *ra, u16 lp, u16 rp, u8 pr)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_policy_t *p;
+ ipsec_policy_t *policies[1];
+ ipsec_fp_5tuple_t tuples[1];
+ u32 fp_policy_ids[1];
+
+ u32 *i;
+
+ if (!spd)
+ return 0;
+
+ if (im->fp_spd_ipv6_out_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd->fp_spd.ip6_out_lookup_hash_idx))
+ {
+
+ ipsec_fp_5tuple_from_ip6_range (&tuples[0], la, ra, lp, rp, pr);
+ ipsec_fp_out_policy_match_n (&spd->fp_spd, 1, tuples, policies,
+ fp_policy_ids, 1);
+ p = policies[0];
+ i = fp_policy_ids;
+ return p;
+ }
+
+ vec_foreach (i, spd->policies[IPSEC_SPD_POLICY_IP6_OUTBOUND])
+ {
+ p = pool_elt_at_index (im->policies, *i);
+ if (PREDICT_FALSE ((p->protocol != IPSEC_POLICY_PROTOCOL_ANY) &&
+ (p->protocol != pr)))
+ continue;
+
+ if (!ip6_addr_match_range (ra, &p->raddr.start.ip6, &p->raddr.stop.ip6))
+ continue;
+
+ if (!ip6_addr_match_range (la, &p->laddr.start.ip6, &p->laddr.stop.ip6))
+ continue;
+
+ if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP) &&
+ (pr != IP_PROTOCOL_SCTP)))
+ return p;
+
+ if (lp < p->lport.start)
+ continue;
+
+ if (lp > p->lport.stop)
+ continue;
+
+ if (rp < p->rport.start)
+ continue;
+
+ if (rp > p->rport.stop)
+ continue;
+
+ return p;
+ }
+
+ return 0;
+}
+
+#endif /* !IPSEC_OUTPUT_H */
diff --git a/src/vnet/ipsec/ipsec_punt.h b/src/vnet/ipsec/ipsec_punt.h
index afed908bffb..9b9fc803391 100644
--- a/src/vnet/ipsec/ipsec_punt.h
+++ b/src/vnet/ipsec/ipsec_punt.h
@@ -20,7 +20,8 @@
#define foreach_ipsec_punt_reason \
_ (IP4_SPI_UDP_0, "ipsec4-spi-o-udp-0", IP4_PACKET) \
_ (IP4_NO_SUCH_TUNNEL, "ipsec4-no-such-tunnel", IP4_PACKET) \
- _ (IP6_NO_SUCH_TUNNEL, "ipsec6-no-such-tunnel", IP6_PACKET)
+ _ (IP6_NO_SUCH_TUNNEL, "ipsec6-no-such-tunnel", IP6_PACKET) \
+ _ (IP6_SPI_UDP_0, "ipsec6-spi-o-udp-0", IP6_PACKET)
typedef enum ipsec_punt_reason_t_
{
diff --git a/src/vnet/ipsec/ipsec_sa.c b/src/vnet/ipsec/ipsec_sa.c
index b5d58d0c053..1d5195ec793 100644
--- a/src/vnet/ipsec/ipsec_sa.c
+++ b/src/vnet/ipsec/ipsec_sa.c
@@ -13,12 +13,14 @@
* limitations under the License.
*/
+#include <sys/random.h>
#include <vnet/ipsec/ipsec.h>
#include <vnet/ipsec/esp.h>
#include <vnet/udp/udp_local.h>
#include <vnet/fib/fib_table.h>
#include <vnet/fib/fib_entry_track.h>
#include <vnet/ipsec/ipsec_tun.h>
+#include <vnet/ipsec/ipsec.api_enum.h>
/**
* @brief
@@ -28,6 +30,8 @@ vlib_combined_counter_main_t ipsec_sa_counters = {
.name = "SA",
.stat_segment_name = "/net/ipsec/sa",
};
+/* Per-SA error counters */
+vlib_simple_counter_main_t ipsec_sa_err_counters[IPSEC_SA_N_ERRORS];
ipsec_sa_t *ipsec_sa_pool;
@@ -89,18 +93,40 @@ ipsec_sa_stack (ipsec_sa_t * sa)
}
void
+ipsec_sa_set_async_mode (ipsec_sa_t *sa, int is_enabled)
+{
+ if (is_enabled)
+ {
+ sa->crypto_key_index = sa->crypto_async_key_index;
+ sa->crypto_enc_op_id = sa->crypto_async_enc_op_id;
+ sa->crypto_dec_op_id = sa->crypto_async_dec_op_id;
+ sa->integ_key_index = ~0;
+ sa->integ_op_id = ~0;
+ }
+ else
+ {
+ sa->crypto_key_index = sa->crypto_sync_key_index;
+ sa->crypto_enc_op_id = sa->crypto_sync_enc_op_id;
+ sa->crypto_dec_op_id = sa->crypto_sync_dec_op_id;
+ sa->integ_key_index = sa->integ_sync_key_index;
+ sa->integ_op_id = sa->integ_sync_op_id;
+ }
+}
+
+void
ipsec_sa_set_crypto_alg (ipsec_sa_t * sa, ipsec_crypto_alg_t crypto_alg)
{
ipsec_main_t *im = &ipsec_main;
sa->crypto_alg = crypto_alg;
sa->crypto_iv_size = im->crypto_algs[crypto_alg].iv_size;
sa->esp_block_align = clib_max (4, im->crypto_algs[crypto_alg].block_align);
- sa->sync_op_data.crypto_enc_op_id = im->crypto_algs[crypto_alg].enc_op_id;
- sa->sync_op_data.crypto_dec_op_id = im->crypto_algs[crypto_alg].dec_op_id;
+ sa->crypto_sync_enc_op_id = im->crypto_algs[crypto_alg].enc_op_id;
+ sa->crypto_sync_dec_op_id = im->crypto_algs[crypto_alg].dec_op_id;
sa->crypto_calg = im->crypto_algs[crypto_alg].alg;
ASSERT (sa->crypto_iv_size <= ESP_MAX_IV_SIZE);
ASSERT (sa->esp_block_align <= ESP_MAX_BLOCK_SIZE);
- if (IPSEC_CRYPTO_ALG_IS_GCM (crypto_alg))
+ if (IPSEC_CRYPTO_ALG_IS_GCM (crypto_alg) ||
+ IPSEC_CRYPTO_ALG_CTR_AEAD_OTHERS (crypto_alg))
{
sa->integ_icv_size = im->crypto_algs[crypto_alg].icv_size;
ipsec_sa_set_IS_CTR (sa);
@@ -110,6 +136,13 @@ ipsec_sa_set_crypto_alg (ipsec_sa_t * sa, ipsec_crypto_alg_t crypto_alg)
{
ipsec_sa_set_IS_CTR (sa);
}
+ else if (IPSEC_CRYPTO_ALG_IS_NULL_GMAC (crypto_alg))
+ {
+ sa->integ_icv_size = im->crypto_algs[crypto_alg].icv_size;
+ ipsec_sa_set_IS_CTR (sa);
+ ipsec_sa_set_IS_AEAD (sa);
+ ipsec_sa_set_IS_NULL_GMAC (sa);
+ }
}
void
@@ -118,7 +151,7 @@ ipsec_sa_set_integ_alg (ipsec_sa_t * sa, ipsec_integ_alg_t integ_alg)
ipsec_main_t *im = &ipsec_main;
sa->integ_alg = integ_alg;
sa->integ_icv_size = im->integ_algs[integ_alg].icv_size;
- sa->sync_op_data.integ_op_id = im->integ_algs[integ_alg].op_id;
+ sa->integ_sync_op_id = im->integ_algs[integ_alg].op_id;
sa->integ_calg = im->integ_algs[integ_alg].alg;
ASSERT (sa->integ_icv_size <= ESP_MAX_ICV_SIZE);
}
@@ -126,44 +159,167 @@ ipsec_sa_set_integ_alg (ipsec_sa_t * sa, ipsec_integ_alg_t integ_alg)
void
ipsec_sa_set_async_op_ids (ipsec_sa_t * sa)
{
- /* *INDENT-OFF* */
if (ipsec_sa_is_set_USE_ESN (sa))
{
-#define _(n, s, k) \
- if( sa->sync_op_data.crypto_enc_op_id == VNET_CRYPTO_OP_##n##_ENC ) \
- sa->async_op_data.crypto_async_enc_op_id = \
- VNET_CRYPTO_OP_##n##_TAG16_AAD12_ENC; \
- if( sa->sync_op_data.crypto_dec_op_id == VNET_CRYPTO_OP_##n##_DEC ) \
- sa->async_op_data.crypto_async_dec_op_id = \
- VNET_CRYPTO_OP_##n##_TAG16_AAD12_DEC;
- foreach_crypto_aead_alg
+#define _(n, s, k) \
+ if (sa->crypto_sync_enc_op_id == VNET_CRYPTO_OP_##n##_ENC) \
+ sa->crypto_async_enc_op_id = VNET_CRYPTO_OP_##n##_TAG16_AAD12_ENC; \
+ if (sa->crypto_sync_dec_op_id == VNET_CRYPTO_OP_##n##_DEC) \
+ sa->crypto_async_dec_op_id = VNET_CRYPTO_OP_##n##_TAG16_AAD12_DEC;
+ foreach_crypto_aead_alg
#undef _
}
else
{
-#define _(n, s, k) \
- if( sa->sync_op_data.crypto_enc_op_id == VNET_CRYPTO_OP_##n##_ENC ) \
- sa->async_op_data.crypto_async_enc_op_id = \
- VNET_CRYPTO_OP_##n##_TAG16_AAD8_ENC; \
- if( sa->sync_op_data.crypto_dec_op_id == VNET_CRYPTO_OP_##n##_DEC ) \
- sa->async_op_data.crypto_async_dec_op_id = \
- VNET_CRYPTO_OP_##n##_TAG16_AAD8_DEC;
- foreach_crypto_aead_alg
+#define _(n, s, k) \
+ if (sa->crypto_sync_enc_op_id == VNET_CRYPTO_OP_##n##_ENC) \
+ sa->crypto_async_enc_op_id = VNET_CRYPTO_OP_##n##_TAG16_AAD8_ENC; \
+ if (sa->crypto_sync_dec_op_id == VNET_CRYPTO_OP_##n##_DEC) \
+ sa->crypto_async_dec_op_id = VNET_CRYPTO_OP_##n##_TAG16_AAD8_DEC;
+ foreach_crypto_aead_alg
#undef _
}
-#define _(c, h, s, k ,d) \
- if( sa->sync_op_data.crypto_enc_op_id == VNET_CRYPTO_OP_##c##_ENC && \
- sa->sync_op_data.integ_op_id == VNET_CRYPTO_OP_##h##_HMAC) \
- sa->async_op_data.crypto_async_enc_op_id = \
- VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC; \
- if( sa->sync_op_data.crypto_dec_op_id == VNET_CRYPTO_OP_##c##_DEC && \
- sa->sync_op_data.integ_op_id == VNET_CRYPTO_OP_##h##_HMAC) \
- sa->async_op_data.crypto_async_dec_op_id = \
- VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC;
+#define _(c, h, s, k, d) \
+ if (sa->crypto_sync_enc_op_id == VNET_CRYPTO_OP_##c##_ENC && \
+ sa->integ_sync_op_id == VNET_CRYPTO_OP_##h##_HMAC) \
+ sa->crypto_async_enc_op_id = VNET_CRYPTO_OP_##c##_##h##_TAG##d##_ENC; \
+ if (sa->crypto_sync_dec_op_id == VNET_CRYPTO_OP_##c##_DEC && \
+ sa->integ_sync_op_id == VNET_CRYPTO_OP_##h##_HMAC) \
+ sa->crypto_async_dec_op_id = VNET_CRYPTO_OP_##c##_##h##_TAG##d##_DEC;
foreach_crypto_link_async_alg
#undef _
- /* *INDENT-ON* */
+}
+
+int
+ipsec_sa_update (u32 id, u16 src_port, u16 dst_port, const tunnel_t *tun,
+ bool is_tun)
+{
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_sa_t *sa;
+ u32 sa_index;
+ uword *p;
+ int rv;
+
+ p = hash_get (im->sa_index_by_sa_id, id);
+ if (!p)
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ sa = ipsec_sa_get (p[0]);
+ sa_index = sa - ipsec_sa_pool;
+
+ if (is_tun && ipsec_sa_is_set_IS_TUNNEL (sa) &&
+ (ip_address_cmp (&tun->t_src, &sa->tunnel.t_src) != 0 ||
+ ip_address_cmp (&tun->t_dst, &sa->tunnel.t_dst) != 0))
+ {
+ /* if the source IP is updated for an inbound SA under a tunnel protect,
+ we need to update the tun_protect DB with the new src IP */
+ if (ipsec_sa_is_set_IS_INBOUND (sa) &&
+ ip_address_cmp (&tun->t_src, &sa->tunnel.t_src) != 0 &&
+ !ip46_address_is_zero (&tun->t_src.ip))
+ {
+ if (ip46_address_is_ip4 (&sa->tunnel.t_src.ip))
+ {
+ ipsec4_tunnel_kv_t old_key, new_key;
+ clib_bihash_kv_8_16_t res,
+ *bkey = (clib_bihash_kv_8_16_t *) &old_key;
+
+ ipsec4_tunnel_mk_key (&old_key, &sa->tunnel.t_src.ip.ip4,
+ clib_host_to_net_u32 (sa->spi));
+ ipsec4_tunnel_mk_key (&new_key, &tun->t_src.ip.ip4,
+ clib_host_to_net_u32 (sa->spi));
+
+ if (!clib_bihash_search_8_16 (&im->tun4_protect_by_key, bkey,
+ &res))
+ {
+ clib_bihash_add_del_8_16 (&im->tun4_protect_by_key, &res, 0);
+ res.key = new_key.key;
+ clib_bihash_add_del_8_16 (&im->tun4_protect_by_key, &res, 1);
+ }
+ }
+ else
+ {
+ ipsec6_tunnel_kv_t old_key = {
+ .key = {
+ .remote_ip = sa->tunnel.t_src.ip.ip6,
+ .spi = clib_host_to_net_u32 (sa->spi),
+ },
+ }, new_key = {
+ .key = {
+ .remote_ip = tun->t_src.ip.ip6,
+ .spi = clib_host_to_net_u32 (sa->spi),
+ }};
+ clib_bihash_kv_24_16_t res,
+ *bkey = (clib_bihash_kv_24_16_t *) &old_key;
+
+ if (!clib_bihash_search_24_16 (&im->tun6_protect_by_key, bkey,
+ &res))
+ {
+ clib_bihash_add_del_24_16 (&im->tun6_protect_by_key, &res,
+ 0);
+ clib_memcpy (&res.key, &new_key.key, 3);
+ clib_bihash_add_del_24_16 (&im->tun6_protect_by_key, &res,
+ 1);
+ }
+ }
+ }
+ tunnel_unresolve (&sa->tunnel);
+ tunnel_copy (tun, &sa->tunnel);
+ if (!ipsec_sa_is_set_IS_INBOUND (sa))
+ {
+ dpo_reset (&sa->dpo);
+
+ sa->tunnel_flags = sa->tunnel.t_encap_decap_flags;
+
+ rv = tunnel_resolve (&sa->tunnel, FIB_NODE_TYPE_IPSEC_SA, sa_index);
+
+ if (rv)
+ {
+ hash_unset (im->sa_index_by_sa_id, sa->id);
+ pool_put (ipsec_sa_pool, sa);
+ return rv;
+ }
+ ipsec_sa_stack (sa);
+ /* generate header templates */
+ if (ipsec_sa_is_set_IS_TUNNEL_V6 (sa))
+ {
+ tunnel_build_v6_hdr (&sa->tunnel,
+ (ipsec_sa_is_set_UDP_ENCAP (sa) ?
+ IP_PROTOCOL_UDP :
+ IP_PROTOCOL_IPSEC_ESP),
+ &sa->ip6_hdr);
+ }
+ else
+ {
+ tunnel_build_v4_hdr (&sa->tunnel,
+ (ipsec_sa_is_set_UDP_ENCAP (sa) ?
+ IP_PROTOCOL_UDP :
+ IP_PROTOCOL_IPSEC_ESP),
+ &sa->ip4_hdr);
+ }
+ }
+ }
+
+ if (ipsec_sa_is_set_UDP_ENCAP (sa))
+ {
+ if (dst_port != IPSEC_UDP_PORT_NONE &&
+ dst_port != clib_net_to_host_u16 (sa->udp_hdr.dst_port))
+ {
+ if (ipsec_sa_is_set_IS_INBOUND (sa))
+ {
+ ipsec_unregister_udp_port (
+ clib_net_to_host_u16 (sa->udp_hdr.dst_port),
+ !ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
+ ipsec_register_udp_port (dst_port,
+ !ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
+ }
+ sa->udp_hdr.dst_port = clib_host_to_net_u16 (dst_port);
+ }
+ if (src_port != IPSEC_UDP_PORT_NONE &&
+ src_port != clib_net_to_host_u16 (sa->udp_hdr.src_port))
+ sa->udp_hdr.src_port = clib_host_to_net_u16 (src_port);
+ }
+ return (0);
}
int
@@ -171,13 +327,15 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
ipsec_crypto_alg_t crypto_alg, const ipsec_key_t *ck,
ipsec_integ_alg_t integ_alg, const ipsec_key_t *ik,
ipsec_sa_flags_t flags, u32 salt, u16 src_port,
- u16 dst_port, const tunnel_t *tun, u32 *sa_out_index)
+ u16 dst_port, u32 anti_replay_window_size,
+ const tunnel_t *tun, u32 *sa_out_index)
{
vlib_main_t *vm = vlib_get_main ();
ipsec_main_t *im = &ipsec_main;
clib_error_t *err;
ipsec_sa_t *sa;
u32 sa_index;
+ u64 rand[2];
uword *p;
int rv;
@@ -185,14 +343,24 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
if (p)
return VNET_API_ERROR_ENTRY_ALREADY_EXISTS;
+ if (getrandom (rand, sizeof (rand), 0) != sizeof (rand))
+ return VNET_API_ERROR_INIT_FAILED;
+
pool_get_aligned_zero (ipsec_sa_pool, sa, CLIB_CACHE_LINE_BYTES);
+ clib_pcg64i_srandom_r (&sa->iv_prng, rand[0], rand[1]);
+
fib_node_init (&sa->node, FIB_NODE_TYPE_IPSEC_SA);
fib_node_lock (&sa->node);
sa_index = sa - ipsec_sa_pool;
vlib_validate_combined_counter (&ipsec_sa_counters, sa_index);
vlib_zero_combined_counter (&ipsec_sa_counters, sa_index);
+ for (int i = 0; i < IPSEC_SA_N_ERRORS; i++)
+ {
+ vlib_validate_simple_counter (&ipsec_sa_err_counters[i], sa_index);
+ vlib_zero_simple_counter (&ipsec_sa_err_counters[i], sa_index);
+ }
tunnel_copy (tun, &sa->tunnel);
sa->id = id;
@@ -210,12 +378,14 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
ipsec_sa_set_crypto_alg (sa, crypto_alg);
ipsec_sa_set_async_op_ids (sa);
+ if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) && anti_replay_window_size > 64)
+ ipsec_sa_set_ANTI_REPLAY_HUGE (sa);
+
clib_memcpy (&sa->crypto_key, ck, sizeof (sa->crypto_key));
- sa->crypto_key_index = vnet_crypto_key_add (vm,
- im->crypto_algs[crypto_alg].alg,
- (u8 *) ck->data, ck->len);
- if (~0 == sa->crypto_key_index)
+ sa->crypto_sync_key_index = vnet_crypto_key_add (
+ vm, im->crypto_algs[crypto_alg].alg, (u8 *) ck->data, ck->len);
+ if (~0 == sa->crypto_sync_key_index)
{
pool_put (ipsec_sa_pool, sa);
return VNET_API_ERROR_KEY_LENGTH;
@@ -223,42 +393,39 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
if (integ_alg != IPSEC_INTEG_ALG_NONE)
{
- sa->integ_key_index = vnet_crypto_key_add (vm,
- im->
- integ_algs[integ_alg].alg,
- (u8 *) ik->data, ik->len);
- if (~0 == sa->integ_key_index)
+ sa->integ_sync_key_index = vnet_crypto_key_add (
+ vm, im->integ_algs[integ_alg].alg, (u8 *) ik->data, ik->len);
+ if (~0 == sa->integ_sync_key_index)
{
pool_put (ipsec_sa_pool, sa);
return VNET_API_ERROR_KEY_LENGTH;
}
}
- if (sa->async_op_data.crypto_async_enc_op_id &&
- !ipsec_sa_is_set_IS_AEAD (sa))
- { //AES-CBC & HMAC
- sa->async_op_data.linked_key_index =
- vnet_crypto_key_add_linked (vm, sa->crypto_key_index,
- sa->integ_key_index);
- }
+ if (sa->crypto_async_enc_op_id && !ipsec_sa_is_set_IS_AEAD (sa))
+ sa->crypto_async_key_index =
+ vnet_crypto_key_add_linked (vm, sa->crypto_sync_key_index,
+ sa->integ_sync_key_index); // AES-CBC & HMAC
+ else
+ sa->crypto_async_key_index = sa->crypto_sync_key_index;
if (im->async_mode)
- sa->crypto_op_data = sa->async_op_data.data;
+ {
+ ipsec_sa_set_async_mode (sa, 1);
+ }
+ else if (ipsec_sa_is_set_IS_ASYNC (sa))
+ {
+ ipsec_sa_set_async_mode (sa, 1 /* is_enabled */);
+ }
else
{
- if (ipsec_sa_is_set_IS_ASYNC (sa))
- {
- vnet_crypto_request_async_mode (1);
- sa->crypto_op_data = sa->async_op_data.data;
- }
- else
- sa->crypto_op_data = sa->sync_op_data.data;
+ ipsec_sa_set_async_mode (sa, 0 /* is_enabled */);
}
err = ipsec_check_support_cb (im, sa);
if (err)
{
- clib_warning ("%s", err->what);
+ clib_warning ("%v", err->what);
pool_put (ipsec_sa_pool, sa);
return VNET_API_ERROR_UNIMPLEMENTED;
}
@@ -319,7 +486,20 @@ ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
sa->udp_hdr.src_port = clib_host_to_net_u16 (src_port);
if (ipsec_sa_is_set_IS_INBOUND (sa))
- ipsec_register_udp_port (clib_host_to_net_u16 (sa->udp_hdr.dst_port));
+ ipsec_register_udp_port (clib_host_to_net_u16 (sa->udp_hdr.dst_port),
+ !ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
+ }
+
+ /* window size rounded up to next power of 2 */
+ if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa))
+ {
+ anti_replay_window_size = 1 << max_log2 (anti_replay_window_size);
+ sa->replay_window_huge =
+ clib_bitmap_set_region (0, 0, 1, anti_replay_window_size);
+ }
+ else
+ {
+ sa->replay_window = ~0;
}
hash_set (im->sa_index_by_sa_id, sa->id, sa_index);
@@ -345,18 +525,51 @@ ipsec_sa_del (ipsec_sa_t * sa)
(void) ipsec_call_add_del_callbacks (im, sa, sa_index, 0);
if (ipsec_sa_is_set_IS_ASYNC (sa))
- vnet_crypto_request_async_mode (0);
+ {
+ if (!ipsec_sa_is_set_IS_AEAD (sa))
+ vnet_crypto_key_del (vm, sa->crypto_async_key_index);
+ }
+
if (ipsec_sa_is_set_UDP_ENCAP (sa) && ipsec_sa_is_set_IS_INBOUND (sa))
- ipsec_unregister_udp_port (clib_net_to_host_u16 (sa->udp_hdr.dst_port));
+ ipsec_unregister_udp_port (clib_net_to_host_u16 (sa->udp_hdr.dst_port),
+ !ipsec_sa_is_set_IS_TUNNEL_V6 (sa));
if (ipsec_sa_is_set_IS_TUNNEL (sa) && !ipsec_sa_is_set_IS_INBOUND (sa))
dpo_reset (&sa->dpo);
- vnet_crypto_key_del (vm, sa->crypto_key_index);
+ vnet_crypto_key_del (vm, sa->crypto_sync_key_index);
if (sa->integ_alg != IPSEC_INTEG_ALG_NONE)
- vnet_crypto_key_del (vm, sa->integ_key_index);
+ vnet_crypto_key_del (vm, sa->integ_sync_key_index);
+ if (ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa))
+ clib_bitmap_free (sa->replay_window_huge);
pool_put (ipsec_sa_pool, sa);
}
+int
+ipsec_sa_bind (u32 id, u32 worker, bool bind)
+{
+ ipsec_main_t *im = &ipsec_main;
+ uword *p;
+ ipsec_sa_t *sa;
+
+ p = hash_get (im->sa_index_by_sa_id, id);
+ if (!p)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ sa = ipsec_sa_get (p[0]);
+
+ if (!bind)
+ {
+ sa->thread_index = ~0;
+ return 0;
+ }
+
+ if (worker >= vlib_num_workers ())
+ return VNET_API_ERROR_INVALID_WORKER;
+
+ sa->thread_index = vlib_get_worker_thread_index (worker);
+ return 0;
+}
+
void
ipsec_sa_unlock (index_t sai)
{
@@ -422,6 +635,8 @@ void
ipsec_sa_clear (index_t sai)
{
vlib_zero_combined_counter (&ipsec_sa_counters, sai);
+ for (int i = 0; i < IPSEC_SA_N_ERRORS; i++)
+ vlib_zero_simple_counter (&ipsec_sa_err_counters[i], sai);
}
void
@@ -429,13 +644,11 @@ ipsec_sa_walk (ipsec_sa_walk_cb_t cb, void *ctx)
{
ipsec_sa_t *sa;
- /* *INDENT-OFF* */
pool_foreach (sa, ipsec_sa_pool)
{
if (WALK_CONTINUE != cb (sa, ctx))
break;
}
- /* *INDENT-ON* */
}
/**
@@ -452,19 +665,18 @@ ipsec_sa_fib_node_get (fib_node_index_t index)
}
static ipsec_sa_t *
-ipsec_sa_from_fib_node (fib_node_t * node)
+ipsec_sa_from_fib_node (fib_node_t *node)
{
ASSERT (FIB_NODE_TYPE_IPSEC_SA == node->fn_type);
- return ((ipsec_sa_t *) (((char *) node) -
- STRUCT_OFFSET_OF (ipsec_sa_t, node)));
-
+ return (
+ (ipsec_sa_t *) (((char *) node) - STRUCT_OFFSET_OF (ipsec_sa_t, node)));
}
/**
* Function definition to inform the FIB node that its last lock has gone.
*/
static void
-ipsec_sa_last_lock_gone (fib_node_t * node)
+ipsec_sa_last_lock_gone (fib_node_t *node)
{
/*
* The ipsec SA is a root of the graph. As such
@@ -477,7 +689,7 @@ ipsec_sa_last_lock_gone (fib_node_t * node)
* Function definition to backwalk a FIB node
*/
static fib_node_back_walk_rc_t
-ipsec_sa_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx)
+ipsec_sa_back_walk (fib_node_t *node, fib_node_back_walk_ctx_t *ctx)
{
ipsec_sa_stack (ipsec_sa_from_fib_node (node));
@@ -494,16 +706,24 @@ const static fib_node_vft_t ipsec_sa_vft = {
.fnv_back_walk = ipsec_sa_back_walk,
};
-/* force inclusion from application's main.c */
+/* Init per-SA error counters and node type */
clib_error_t *
-ipsec_sa_interface_init (vlib_main_t * vm)
+ipsec_sa_init (vlib_main_t *vm)
{
fib_node_register_type (FIB_NODE_TYPE_IPSEC_SA, &ipsec_sa_vft);
- return 0;
+#define _(index, val, err, desc) \
+ ipsec_sa_err_counters[index].name = \
+ (char *) format (0, "SA-" #err "%c", 0); \
+ ipsec_sa_err_counters[index].stat_segment_name = \
+ (char *) format (0, "/net/ipsec/sa/err/" #err "%c", 0); \
+ ipsec_sa_err_counters[index].counters = 0;
+ foreach_ipsec_sa_err
+#undef _
+ return 0;
}
-VLIB_INIT_FUNCTION (ipsec_sa_interface_init);
+VLIB_INIT_FUNCTION (ipsec_sa_init);
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipsec/ipsec_sa.h b/src/vnet/ipsec/ipsec_sa.h
index 14461ad2cdd..4f73f1eab0f 100644
--- a/src/vnet/ipsec/ipsec_sa.h
+++ b/src/vnet/ipsec/ipsec_sa.h
@@ -16,24 +16,33 @@
#define __IPSEC_SPD_SA_H__
#include <vlib/vlib.h>
+#include <vppinfra/pcg.h>
#include <vnet/crypto/crypto.h>
#include <vnet/ip/ip.h>
#include <vnet/fib/fib_node.h>
#include <vnet/tunnel/tunnel.h>
-#define foreach_ipsec_crypto_alg \
- _ (0, NONE, "none") \
- _ (1, AES_CBC_128, "aes-cbc-128") \
- _ (2, AES_CBC_192, "aes-cbc-192") \
- _ (3, AES_CBC_256, "aes-cbc-256") \
- _ (4, AES_CTR_128, "aes-ctr-128") \
- _ (5, AES_CTR_192, "aes-ctr-192") \
- _ (6, AES_CTR_256, "aes-ctr-256") \
- _ (7, AES_GCM_128, "aes-gcm-128") \
- _ (8, AES_GCM_192, "aes-gcm-192") \
- _ (9, AES_GCM_256, "aes-gcm-256") \
- _ (10, DES_CBC, "des-cbc") \
- _ (11, 3DES_CBC, "3des-cbc")
+#define ESP_MAX_ICV_SIZE (32)
+#define ESP_MAX_IV_SIZE (16)
+#define ESP_MAX_BLOCK_SIZE (16)
+
+#define foreach_ipsec_crypto_alg \
+ _ (0, NONE, "none") \
+ _ (1, AES_CBC_128, "aes-cbc-128") \
+ _ (2, AES_CBC_192, "aes-cbc-192") \
+ _ (3, AES_CBC_256, "aes-cbc-256") \
+ _ (4, AES_CTR_128, "aes-ctr-128") \
+ _ (5, AES_CTR_192, "aes-ctr-192") \
+ _ (6, AES_CTR_256, "aes-ctr-256") \
+ _ (7, AES_GCM_128, "aes-gcm-128") \
+ _ (8, AES_GCM_192, "aes-gcm-192") \
+ _ (9, AES_GCM_256, "aes-gcm-256") \
+ _ (10, DES_CBC, "des-cbc") \
+ _ (11, 3DES_CBC, "3des-cbc") \
+ _ (12, CHACHA20_POLY1305, "chacha20-poly1305") \
+ _ (13, AES_NULL_GMAC_128, "aes-null-gmac-128") \
+ _ (14, AES_NULL_GMAC_192, "aes-null-gmac-192") \
+ _ (15, AES_NULL_GMAC_256, "aes-null-gmac-256")
typedef enum
{
@@ -43,6 +52,11 @@ typedef enum
IPSEC_CRYPTO_N_ALG,
} __clib_packed ipsec_crypto_alg_t;
+#define IPSEC_CRYPTO_ALG_IS_NULL_GMAC(_alg) \
+ ((_alg == IPSEC_CRYPTO_ALG_AES_NULL_GMAC_128) || \
+ (_alg == IPSEC_CRYPTO_ALG_AES_NULL_GMAC_192) || \
+ (_alg == IPSEC_CRYPTO_ALG_AES_NULL_GMAC_256))
+
#define IPSEC_CRYPTO_ALG_IS_GCM(_alg) \
(((_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) || \
(_alg == IPSEC_CRYPTO_ALG_AES_GCM_192) || \
@@ -53,6 +67,9 @@ typedef enum
(_alg == IPSEC_CRYPTO_ALG_AES_CTR_192) || \
(_alg == IPSEC_CRYPTO_ALG_AES_CTR_256)))
+#define IPSEC_CRYPTO_ALG_CTR_AEAD_OTHERS(_alg) \
+ (_alg == IPSEC_CRYPTO_ALG_CHACHA20_POLY1305)
+
#define foreach_ipsec_integ_alg \
_ (0, NONE, "none") \
_ (1, MD5_96, "md5-96") /* RFC2403 */ \
@@ -102,7 +119,10 @@ typedef struct ipsec_key_t_
_ (64, IS_INBOUND, "inbound") \
_ (128, IS_AEAD, "aead") \
_ (256, IS_CTR, "ctr") \
- _ (512, IS_ASYNC, "async")
+ _ (512, IS_ASYNC, "async") \
+ _ (1024, NO_ALGO_NO_DROP, "no-algo-no-drop") \
+ _ (2048, IS_NULL_GMAC, "null-gmac") \
+ _ (4096, ANTI_REPLAY_HUGE, "anti-replay-huge")
typedef enum ipsec_sad_flags_t_
{
@@ -113,51 +133,64 @@ typedef enum ipsec_sad_flags_t_
STATIC_ASSERT (sizeof (ipsec_sa_flags_t) == 2, "IPSEC SA flags != 2 byte");
+#define foreach_ipsec_sa_err \
+ _ (0, LOST, lost, "packets lost") \
+ _ (1, HANDOFF, handoff, "hand-off") \
+ _ (2, INTEG_ERROR, integ_error, "Integrity check failed") \
+ _ (3, DECRYPTION_FAILED, decryption_failed, "Decryption failed") \
+ _ (4, CRYPTO_ENGINE_ERROR, crypto_engine_error, \
+ "crypto engine error (dropped)") \
+ _ (5, REPLAY, replay, "SA replayed packet") \
+ _ (6, RUNT, runt, "undersized packet") \
+ _ (7, NO_BUFFERS, no_buffers, "no buffers (dropped)") \
+ _ (8, OVERSIZED_HEADER, oversized_header, \
+ "buffer with oversized header (dropped)") \
+ _ (9, NO_TAIL_SPACE, no_tail_space, \
+ "no enough buffer tail space (dropped)") \
+ _ (10, TUN_NO_PROTO, tun_no_proto, "no tunnel protocol") \
+ _ (11, UNSUP_PAYLOAD, unsup_payload, "unsupported payload") \
+ _ (12, SEQ_CYCLED, seq_cycled, "sequence number cycled (dropped)") \
+ _ (13, CRYPTO_QUEUE_FULL, crypto_queue_full, "crypto queue full (dropped)") \
+ _ (14, NO_ENCRYPTION, no_encryption, "no Encrypting SA (dropped)") \
+ _ (15, DROP_FRAGMENTS, drop_fragments, "IP fragments drop")
+
+typedef enum
+{
+#define _(v, f, s, d) IPSEC_SA_ERROR_##f = v,
+ foreach_ipsec_sa_err
+#undef _
+ IPSEC_SA_N_ERRORS,
+} __clib_packed ipsec_sa_err_t;
+
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- /* flags */
- ipsec_sa_flags_t flags;
-
- u8 crypto_iv_size;
- u8 esp_block_align;
- u8 integ_icv_size;
-
- u8 __pad1[3];
+ clib_pcg64i_random_t iv_prng;
- u32 thread_index;
-
- u32 spi;
- u32 seq;
- u32 seq_hi;
- u64 replay_window;
- u64 ctr_iv_counter;
+ union
+ {
+ u64 replay_window;
+ clib_bitmap_t *replay_window_huge;
+ };
dpo_id_t dpo;
vnet_crypto_key_index_t crypto_key_index;
vnet_crypto_key_index_t integ_key_index;
- /* Union data shared by sync and async ops, updated when mode is
- * changed. */
- union
- {
- struct
- {
- vnet_crypto_op_id_t crypto_enc_op_id:16;
- vnet_crypto_op_id_t crypto_dec_op_id:16;
- vnet_crypto_op_id_t integ_op_id:16;
- };
+ u32 spi;
+ u32 seq;
+ u32 seq_hi;
- struct
- {
- vnet_crypto_async_op_id_t crypto_async_enc_op_id:16;
- vnet_crypto_async_op_id_t crypto_async_dec_op_id:16;
- vnet_crypto_key_index_t linked_key_index;
- };
+ u16 crypto_enc_op_id;
+ u16 crypto_dec_op_id;
+ u16 integ_op_id;
+ ipsec_sa_flags_t flags;
+ u16 thread_index;
- u64 crypto_op_data;
- };
+ u16 integ_icv_size : 6;
+ u16 crypto_iv_size : 5;
+ u16 esp_block_align : 5;
CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
@@ -179,30 +212,7 @@ typedef struct
CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
/* Elements with u64 size multiples */
- union
- {
- struct
- {
- vnet_crypto_op_id_t crypto_enc_op_id:16;
- vnet_crypto_op_id_t crypto_dec_op_id:16;
- vnet_crypto_op_id_t integ_op_id:16;
- };
- u64 data;
- } sync_op_data;
-
- union
- {
- struct
- {
- vnet_crypto_async_op_id_t crypto_async_enc_op_id:16;
- vnet_crypto_async_op_id_t crypto_async_dec_op_id:16;
- vnet_crypto_key_index_t linked_key_index;
- };
- u64 data;
- } async_op_data;
-
tunnel_t tunnel;
-
fib_node_t node;
/* elements with u32 size */
@@ -210,6 +220,16 @@ typedef struct
u32 stat_index;
vnet_crypto_alg_t integ_calg;
vnet_crypto_alg_t crypto_calg;
+ u32 crypto_sync_key_index;
+ u32 integ_sync_key_index;
+ u32 crypto_async_key_index;
+
+ /* elements with u16 size */
+ u16 crypto_sync_enc_op_id;
+ u16 crypto_sync_dec_op_id;
+ u16 integ_sync_op_id;
+ u16 crypto_async_enc_op_id;
+ u16 crypto_async_dec_op_id;
/* else u8 packed */
ipsec_crypto_alg_t crypto_alg;
@@ -219,6 +239,10 @@ typedef struct
ipsec_key_t crypto_key;
} ipsec_sa_t;
+STATIC_ASSERT (VNET_CRYPTO_N_OP_IDS < (1 << 16), "crypto ops overflow");
+STATIC_ASSERT (ESP_MAX_ICV_SIZE < (1 << 6), "integer icv overflow");
+STATIC_ASSERT (ESP_MAX_IV_SIZE < (1 << 5), "esp iv overflow");
+STATIC_ASSERT (ESP_MAX_BLOCK_SIZE < (1 << 5), "esp alignment overflow");
STATIC_ASSERT_OFFSET_OF (ipsec_sa_t, cacheline1, CLIB_CACHE_LINE_BYTES);
STATIC_ASSERT_OFFSET_OF (ipsec_sa_t, cacheline2, 2 * CLIB_CACHE_LINE_BYTES);
@@ -235,89 +259,149 @@ STATIC_ASSERT (STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ipsec.sad_index) ==
STRUCT_OFFSET_OF (vnet_buffer_opaque_t, ip.save_protocol),
"IPSec data is overlapping with IP data");
-#define _(a,v,s) \
- always_inline int \
- ipsec_sa_is_set_##v (const ipsec_sa_t *sa) { \
- return (sa->flags & IPSEC_SA_FLAG_##v); \
+#define _(a, v, s) \
+ always_inline bool ipsec_sa_is_set_##v (const ipsec_sa_t *sa) \
+ { \
+ return (sa->flags & IPSEC_SA_FLAG_##v); \
}
foreach_ipsec_sa_flags
#undef _
-#define _(a,v,s) \
- always_inline int \
- ipsec_sa_set_##v (ipsec_sa_t *sa) { \
- return (sa->flags |= IPSEC_SA_FLAG_##v); \
+#define _(a, v, s) \
+ always_inline void ipsec_sa_set_##v (ipsec_sa_t *sa) \
+ { \
+ sa->flags |= IPSEC_SA_FLAG_##v; \
}
foreach_ipsec_sa_flags
#undef _
-#define _(a,v,s) \
- always_inline int \
- ipsec_sa_unset_##v (ipsec_sa_t *sa) { \
- return (sa->flags &= ~IPSEC_SA_FLAG_##v); \
+#define _(a, v, s) \
+ always_inline int ipsec_sa_unset_##v (ipsec_sa_t *sa) \
+ { \
+ return (sa->flags &= ~IPSEC_SA_FLAG_##v); \
}
- foreach_ipsec_sa_flags
+ foreach_ipsec_sa_flags
#undef _
-/**
- * @brief
- * SA packet & bytes counters
- */
-extern vlib_combined_counter_main_t ipsec_sa_counters;
-
-extern void ipsec_mk_key (ipsec_key_t * key, const u8 * data, u8 len);
-
-extern int
-ipsec_sa_add_and_lock (u32 id, u32 spi, ipsec_protocol_t proto,
- ipsec_crypto_alg_t crypto_alg, const ipsec_key_t *ck,
- ipsec_integ_alg_t integ_alg, const ipsec_key_t *ik,
- ipsec_sa_flags_t flags, u32 salt, u16 src_port,
- u16 dst_port, const tunnel_t *tun, u32 *sa_out_index);
+ /**
+ * @brief
+ * SA packet & bytes counters
+ */
+ extern vlib_combined_counter_main_t ipsec_sa_counters;
+extern vlib_simple_counter_main_t ipsec_sa_err_counters[IPSEC_SA_N_ERRORS];
+
+extern void ipsec_mk_key (ipsec_key_t *key, const u8 *data, u8 len);
+
+extern int ipsec_sa_update (u32 id, u16 src_port, u16 dst_port,
+ const tunnel_t *tun, bool is_tun);
+extern int ipsec_sa_add_and_lock (
+ u32 id, u32 spi, ipsec_protocol_t proto, ipsec_crypto_alg_t crypto_alg,
+ const ipsec_key_t *ck, ipsec_integ_alg_t integ_alg, const ipsec_key_t *ik,
+ ipsec_sa_flags_t flags, u32 salt, u16 src_port, u16 dst_port,
+ u32 anti_replay_window_size, const tunnel_t *tun, u32 *sa_out_index);
+extern int ipsec_sa_bind (u32 id, u32 worker, bool bind);
extern index_t ipsec_sa_find_and_lock (u32 id);
extern int ipsec_sa_unlock_id (u32 id);
extern void ipsec_sa_unlock (index_t sai);
extern void ipsec_sa_lock (index_t sai);
extern void ipsec_sa_clear (index_t sai);
-extern void ipsec_sa_set_crypto_alg (ipsec_sa_t * sa,
+extern void ipsec_sa_set_crypto_alg (ipsec_sa_t *sa,
ipsec_crypto_alg_t crypto_alg);
-extern void ipsec_sa_set_integ_alg (ipsec_sa_t * sa,
+extern void ipsec_sa_set_integ_alg (ipsec_sa_t *sa,
ipsec_integ_alg_t integ_alg);
+extern void ipsec_sa_set_async_mode (ipsec_sa_t *sa, int is_enabled);
-typedef walk_rc_t (*ipsec_sa_walk_cb_t) (ipsec_sa_t * sa, void *ctx);
+typedef walk_rc_t (*ipsec_sa_walk_cb_t) (ipsec_sa_t *sa, void *ctx);
extern void ipsec_sa_walk (ipsec_sa_walk_cb_t cd, void *ctx);
extern u8 *format_ipsec_replay_window (u8 *s, va_list *args);
-extern u8 *format_ipsec_crypto_alg (u8 * s, va_list * args);
-extern u8 *format_ipsec_integ_alg (u8 * s, va_list * args);
-extern u8 *format_ipsec_sa (u8 * s, va_list * args);
-extern u8 *format_ipsec_key (u8 * s, va_list * args);
-extern uword unformat_ipsec_crypto_alg (unformat_input_t * input,
- va_list * args);
-extern uword unformat_ipsec_integ_alg (unformat_input_t * input,
- va_list * args);
-extern uword unformat_ipsec_key (unformat_input_t * input, va_list * args);
-
-#define IPSEC_UDP_PORT_NONE ((u16)~0)
+extern u8 *format_ipsec_crypto_alg (u8 *s, va_list *args);
+extern u8 *format_ipsec_integ_alg (u8 *s, va_list *args);
+extern u8 *format_ipsec_sa (u8 *s, va_list *args);
+extern u8 *format_ipsec_key (u8 *s, va_list *args);
+extern uword unformat_ipsec_crypto_alg (unformat_input_t *input,
+ va_list *args);
+extern uword unformat_ipsec_integ_alg (unformat_input_t *input, va_list *args);
+extern uword unformat_ipsec_key (unformat_input_t *input, va_list *args);
+
+#define IPSEC_UDP_PORT_NONE ((u16) ~0)
/*
* Anti Replay definitions
*/
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (64)
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE-1)
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE(_sa) \
+ (u32) (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (_sa)) ? \
+ clib_bitmap_bytes (_sa->replay_window_huge) * 8 : \
+ BITS (_sa->replay_window))
+
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN(_sa, _is_huge) \
+ (u32) (_is_huge ? clib_bitmap_bytes (_sa->replay_window_huge) * 8 : \
+ BITS (_sa->replay_window))
+
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN(_sa) \
+ (u64) (PREDICT_FALSE (ipsec_sa_is_set_ANTI_REPLAY_HUGE (_sa)) ? \
+ clib_bitmap_count_set_bits (_sa->replay_window_huge) : \
+ count_set_bits (_sa->replay_window))
+
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN_KNOWN_WIN(_sa, _is_huge) \
+ (u64) (_is_huge ? clib_bitmap_count_set_bits (_sa->replay_window_huge) : \
+ count_set_bits (_sa->replay_window))
+
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX(_sa) \
+ (u32) (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa) - 1)
+
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX_KNOWN_WIN(_sa, _is_huge) \
+ (u32) (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa, _is_huge) - 1)
/*
* sequence number less than the lower bound are outside of the window
* From RFC4303 Appendix A:
* Bl = Tl - W + 1
*/
-#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND(_tl) (_tl - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE + 1)
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND(_sa) \
+ (u32) (_sa->seq - IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (_sa) + 1)
+
+#define IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND_KNOWN_WIN(_sa, _is_huge) \
+ (u32) (_sa->seq - \
+ IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (_sa, _is_huge) + 1)
+
+always_inline u64
+ipsec_sa_anti_replay_get_64b_window (const ipsec_sa_t *sa)
+{
+ if (!ipsec_sa_is_set_ANTI_REPLAY_HUGE (sa))
+ return sa->replay_window;
+
+ u64 w;
+ u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE (sa);
+ u32 tl_win_index = sa->seq & (window_size - 1);
+
+ if (PREDICT_TRUE (tl_win_index >= 63))
+ return clib_bitmap_get_multiple (sa->replay_window_huge, tl_win_index - 63,
+ 64);
+
+ w = clib_bitmap_get_multiple_no_check (sa->replay_window_huge, 0,
+ tl_win_index + 1)
+ << (63 - tl_win_index);
+ w |= clib_bitmap_get_multiple_no_check (sa->replay_window_huge,
+ window_size - 63 + tl_win_index,
+ 63 - tl_win_index);
+
+ return w;
+}
always_inline int
-ipsec_sa_anti_replay_check (const ipsec_sa_t *sa, u32 seq)
+ipsec_sa_anti_replay_check (const ipsec_sa_t *sa, u32 seq, bool ar_huge)
{
- if (ipsec_sa_is_set_USE_ANTI_REPLAY (sa) &&
- sa->replay_window & (1ULL << (sa->seq - seq)))
- return 1;
+ u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge);
+
+ /* we assume that the packet is in the window.
+ * if the packet falls left (sa->seq - seq >= window size),
+ * the result is wrong */
+
+ if (ar_huge)
+ return clib_bitmap_get (sa->replay_window_huge, seq & (window_size - 1));
else
- return 0;
+ return (sa->replay_window >> (window_size + seq - sa->seq - 1)) & 1;
+
+ return 0;
}
/*
@@ -337,10 +421,14 @@ ipsec_sa_anti_replay_check (const ipsec_sa_t *sa, u32 seq)
always_inline int
ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
u32 hi_seq_used, bool post_decrypt,
- u32 *hi_seq_req)
+ u32 *hi_seq_req, bool ar_huge)
{
ASSERT ((post_decrypt == false) == (hi_seq_req != 0));
+ u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge);
+ u32 window_lower_bound =
+ IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND_KNOWN_WIN (sa, ar_huge);
+
if (!ipsec_sa_is_set_USE_ESN (sa))
{
if (hi_seq_req)
@@ -353,14 +441,11 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
if (PREDICT_TRUE (seq > sa->seq))
return 0;
- u32 diff = sa->seq - seq;
-
- if (IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE > diff)
- return ((sa->replay_window & (1ULL << diff)) ? 1 : 0);
- else
+ /* does the packet fall out on the left of the window */
+ if (sa->seq >= seq + window_size)
return 1;
- return 0;
+ return ipsec_sa_anti_replay_check (sa, seq, ar_huge);
}
if (!ipsec_sa_is_set_USE_ANTI_REPLAY (sa))
@@ -400,14 +485,15 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
*/
return 0;
}
- if (PREDICT_TRUE (sa->seq >= (IPSEC_SA_ANTI_REPLAY_WINDOW_MAX_INDEX)))
+
+ if (PREDICT_TRUE (sa->seq >= window_size - 1))
{
/*
- * the last sequence number VPP recieved is more than one
+ * the last sequence number VPP received is more than one
* window size greater than zero.
* Case A from RFC4303 Appendix A.
*/
- if (seq < IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND (sa->seq))
+ if (seq < window_lower_bound)
{
/*
* the received sequence number is lower than the lower bound
@@ -419,7 +505,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
{
if (hi_seq_used == sa->seq_hi)
/* the high sequence number used to succesfully decrypt this
- * packet is the same as the last-sequnence number of the SA.
+ * packet is the same as the last-sequence number of the SA.
* that means this packet did not cause a wrap.
* this packet is thus out of window and should be dropped */
return 1;
@@ -431,8 +517,8 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
}
else
{
- /* pre-decrypt it might be the might that casues a wrap, we
- * need to decrpyt to find out */
+ /* pre-decrypt it might be the packet that causes a wrap, we
+ * need to decrypt it to find out */
if (hi_seq_req)
*hi_seq_req = sa->seq_hi + 1;
return 0;
@@ -441,17 +527,17 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
else
{
/*
- * the recieved sequence number greater than the low
+ * the received sequence number greater than the low
* end of the window.
*/
if (hi_seq_req)
*hi_seq_req = sa->seq_hi;
if (seq <= sa->seq)
/*
- * The recieved seq number is within bounds of the window
+ * The received seq number is within bounds of the window
* check if it's a duplicate
*/
- return (ipsec_sa_anti_replay_check (sa, seq));
+ return ipsec_sa_anti_replay_check (sa, seq, ar_huge);
else
/*
* The received sequence number is greater than the window
@@ -464,14 +550,14 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
else
{
/*
- * the last sequence number VPP recieved is within one window
+ * the last sequence number VPP received is within one window
* size of zero, i.e. 0 < TL < WINDOW_SIZE, the lower bound is thus a
* large sequence number.
- * Note that the check below uses unsiged integer arthimetic, so the
+ * Note that the check below uses unsigned integer arithmetic, so the
* RHS will be a larger number.
* Case B from RFC4303 Appendix A.
*/
- if (seq < IPSEC_SA_ANTI_REPLAY_WINDOW_LOWER_BOUND (sa->seq))
+ if (seq < window_lower_bound)
{
/*
* the sequence number is less than the lower bound.
@@ -484,7 +570,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
*/
if (hi_seq_req)
*hi_seq_req = sa->seq_hi;
- return (ipsec_sa_anti_replay_check (sa, seq));
+ return ipsec_sa_anti_replay_check (sa, seq, ar_huge);
}
else
{
@@ -492,7 +578,7 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
* the packet is less the window lower bound or greater than
* the higher bound, depending on how you look at it...
* We're assuming, given that the last sequence number received,
- * TL < WINDOW_SIZE, that a largeer seq num is more likely to be
+ * TL < WINDOW_SIZE, that a larger seq num is more likely to be
* a packet that moves the window forward, than a packet that has
* wrapped the high sequence again. If it were the latter then
* we've lost close to 2^32 packets.
@@ -505,15 +591,14 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
else
{
/*
- * the packet seq number is between the lower bound (a large nubmer)
- * and MAX_SEQ_NUM. This is in the window since the window upper bound
- * tl > 0.
- * However, since TL is the other side of 0 to the received
- * packet, the SA has moved on to a higher sequence number.
+ * the packet seq number is between the lower bound (a large number)
+ * and MAX_SEQ_NUM. This is in the window since the window upper
+ * bound tl > 0. However, since TL is the other side of 0 to the
+ * received packet, the SA has moved on to a higher sequence number.
*/
if (hi_seq_req)
*hi_seq_req = sa->seq_hi - 1;
- return (ipsec_sa_anti_replay_check (sa, seq));
+ return ipsec_sa_anti_replay_check (sa, seq, ar_huge);
}
}
@@ -522,6 +607,152 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
return 0;
}
+always_inline u32
+ipsec_sa_anti_replay_window_shift (ipsec_sa_t *sa, u32 inc, bool ar_huge)
+{
+ u32 n_lost = 0;
+ u32 seen = 0;
+ u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge);
+
+ if (inc < window_size)
+ {
+ if (ar_huge)
+ {
+ /* the number of packets we saw in this section of the window */
+ clib_bitmap_t *window = sa->replay_window_huge;
+ u32 window_lower_bound = (sa->seq + 1) & (window_size - 1);
+ u32 window_next_lower_bound =
+ (window_lower_bound + inc) & (window_size - 1);
+
+ uword i_block, i_word_start, i_word_end, full_words;
+ uword n_blocks = window_size >> log2_uword_bits;
+ uword mask;
+
+ i_block = window_lower_bound >> log2_uword_bits;
+
+ i_word_start = window_lower_bound & (uword_bits - 1);
+ i_word_end = window_next_lower_bound & (uword_bits - 1);
+
+ /* We stay in the same word */
+ if (i_word_start + inc <= uword_bits)
+ {
+ mask = pow2_mask (inc) << i_word_start;
+ seen += count_set_bits (window[i_block] & mask);
+ window[i_block] &= ~mask;
+ }
+ else
+ {
+ full_words = (inc + i_word_start - uword_bits - i_word_end) >>
+ log2_uword_bits;
+
+ /* count set bits in the first word */
+ mask = (uword) ~0 << i_word_start;
+ seen += count_set_bits (window[i_block] & mask);
+ window[i_block] &= ~mask;
+ i_block = (i_block + 1) & (n_blocks - 1);
+
+ /* count set bits in the next full words */
+ /* even if the last word need to be fully counted, we treat it
+ * apart */
+ while (full_words >= 8)
+ {
+ if (full_words >= 16)
+ {
+ /* prefect the next 8 blocks (64 bytes) */
+ clib_prefetch_store (
+ &window[(i_block + 8) & (n_blocks - 1)]);
+ }
+
+ seen += count_set_bits (window[i_block]);
+ seen +=
+ count_set_bits (window[(i_block + 1) & (n_blocks - 1)]);
+ seen +=
+ count_set_bits (window[(i_block + 2) & (n_blocks - 1)]);
+ seen +=
+ count_set_bits (window[(i_block + 3) & (n_blocks - 1)]);
+ seen +=
+ count_set_bits (window[(i_block + 4) & (n_blocks - 1)]);
+ seen +=
+ count_set_bits (window[(i_block + 5) & (n_blocks - 1)]);
+ seen +=
+ count_set_bits (window[(i_block + 6) & (n_blocks - 1)]);
+ seen +=
+ count_set_bits (window[(i_block + 7) & (n_blocks - 1)]);
+ window[i_block] = 0;
+ window[(i_block + 1) & (n_blocks - 1)] = 0;
+ window[(i_block + 2) & (n_blocks - 1)] = 0;
+ window[(i_block + 3) & (n_blocks - 1)] = 0;
+ window[(i_block + 4) & (n_blocks - 1)] = 0;
+ window[(i_block + 5) & (n_blocks - 1)] = 0;
+ window[(i_block + 6) & (n_blocks - 1)] = 0;
+ window[(i_block + 7) & (n_blocks - 1)] = 0;
+
+ i_block = (i_block + 8) & (n_blocks - 1);
+ full_words -= 8;
+ }
+ while (full_words > 0)
+ {
+ // last word is treated after the loop
+ seen += count_set_bits (window[i_block]);
+ window[i_block] = 0;
+ i_block = (i_block + 1) & (n_blocks - 1);
+ full_words--;
+ }
+
+ /* the last word */
+ mask = pow2_mask (i_word_end);
+ seen += count_set_bits (window[i_block] & mask);
+ window[i_block] &= ~mask;
+ }
+
+ clib_bitmap_set_no_check (window,
+ (sa->seq + inc) & (window_size - 1), 1);
+ }
+ else
+ {
+ /*
+ * count how many holes there are in the portion
+ * of the window that we will right shift of the end
+ * as a result of this increments
+ */
+ u64 old = sa->replay_window & pow2_mask (inc);
+ /* the number of packets we saw in this section of the window */
+ seen = count_set_bits (old);
+ sa->replay_window =
+ ((sa->replay_window) >> inc) | (1ULL << (window_size - 1));
+ }
+
+ /*
+ * the number we missed is the size of the window section
+ * minus the number we saw.
+ */
+ n_lost = inc - seen;
+ }
+ else
+ {
+ /* holes in the replay window are lost packets */
+ n_lost = window_size -
+ IPSEC_SA_ANTI_REPLAY_WINDOW_N_SEEN_KNOWN_WIN (sa, ar_huge);
+
+ /* any sequence numbers that now fall outside the window
+ * are forever lost */
+ n_lost += inc - window_size;
+
+ if (PREDICT_FALSE (ar_huge))
+ {
+ clib_bitmap_zero (sa->replay_window_huge);
+ clib_bitmap_set_no_check (sa->replay_window_huge,
+ (sa->seq + inc) & (window_size - 1), 1);
+ }
+ else
+ {
+ sa->replay_window = 1ULL << (window_size - 1);
+ }
+ }
+
+ return n_lost;
+}
+
/*
* Anti replay window advance
* inputs need to be in host byte order.
@@ -531,9 +762,12 @@ ipsec_sa_anti_replay_and_sn_advance (const ipsec_sa_t *sa, u32 seq,
* However, updating the window is trivial, so we do it anyway to save
* the branch cost.
*/
-always_inline void
-ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 seq, u32 hi_seq)
+always_inline u64
+ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 thread_index, u32 seq,
+ u32 hi_seq, bool ar_huge)
{
+ u64 n_lost = 0;
+ u32 window_size = IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE_KNOWN_WIN (sa, ar_huge);
u32 pos;
if (ipsec_sa_is_set_USE_ESN (sa))
@@ -543,31 +777,33 @@ ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 seq, u32 hi_seq)
if (wrap == 0 && seq > sa->seq)
{
pos = seq - sa->seq;
- if (pos < IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE)
- sa->replay_window = ((sa->replay_window) << pos) | 1;
- else
- sa->replay_window = 1;
+ n_lost = ipsec_sa_anti_replay_window_shift (sa, pos, ar_huge);
sa->seq = seq;
}
else if (wrap > 0)
{
- pos = ~seq + sa->seq + 1;
- if (pos < IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE)
- sa->replay_window = ((sa->replay_window) << pos) | 1;
- else
- sa->replay_window = 1;
+ pos = seq + ~sa->seq + 1;
+ n_lost = ipsec_sa_anti_replay_window_shift (sa, pos, ar_huge);
sa->seq = seq;
sa->seq_hi = hi_seq;
}
else if (wrap < 0)
{
pos = ~seq + sa->seq + 1;
- sa->replay_window |= (1ULL << pos);
+ if (ar_huge)
+ clib_bitmap_set_no_check (sa->replay_window_huge,
+ seq & (window_size - 1), 1);
+ else
+ sa->replay_window |= (1ULL << (window_size - 1 - pos));
}
else
{
pos = sa->seq - seq;
- sa->replay_window |= (1ULL << pos);
+ if (ar_huge)
+ clib_bitmap_set_no_check (sa->replay_window_huge,
+ seq & (window_size - 1), 1);
+ else
+ sa->replay_window |= (1ULL << (window_size - 1 - pos));
}
}
else
@@ -575,18 +811,21 @@ ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 seq, u32 hi_seq)
if (seq > sa->seq)
{
pos = seq - sa->seq;
- if (pos < IPSEC_SA_ANTI_REPLAY_WINDOW_SIZE)
- sa->replay_window = ((sa->replay_window) << pos) | 1;
- else
- sa->replay_window = 1;
+ n_lost = ipsec_sa_anti_replay_window_shift (sa, pos, ar_huge);
sa->seq = seq;
}
else
{
pos = sa->seq - seq;
- sa->replay_window |= (1ULL << pos);
+ if (ar_huge)
+ clib_bitmap_set_no_check (sa->replay_window_huge,
+ seq & (window_size - 1), 1);
+ else
+ sa->replay_window |= (1ULL << (window_size - 1 - pos));
}
}
+
+ return n_lost;
}
@@ -594,8 +833,8 @@ ipsec_sa_anti_replay_advance (ipsec_sa_t *sa, u32 seq, u32 hi_seq)
* Makes choice for thread_id should be assigned.
* if input ~0, gets random worker_id based on unix_time_now_nsec
*/
-always_inline u32
-ipsec_sa_assign_thread (u32 thread_id)
+always_inline u16
+ipsec_sa_assign_thread (u16 thread_id)
{
return ((thread_id) ? thread_id
: (unix_time_now_nsec () % vlib_num_workers ()) + 1);
diff --git a/src/vnet/ipsec/ipsec_spd.c b/src/vnet/ipsec/ipsec_spd.c
index 4e8017c35ff..7b9a0aea8ed 100644
--- a/src/vnet/ipsec/ipsec_spd.c
+++ b/src/vnet/ipsec/ipsec_spd.c
@@ -21,6 +21,7 @@ ipsec_add_del_spd (vlib_main_t * vm, u32 spd_id, int is_add)
{
ipsec_main_t *im = &ipsec_main;
ipsec_spd_t *spd = 0;
+ ipsec_spd_fp_t *fp_spd = 0;
uword *p;
u32 spd_index, k, v;
@@ -36,25 +37,160 @@ ipsec_add_del_spd (vlib_main_t * vm, u32 spd_id, int is_add)
spd = pool_elt_at_index (im->spds, spd_index);
if (!spd)
return VNET_API_ERROR_INVALID_VALUE;
- /* *INDENT-OFF* */
+
hash_foreach (k, v, im->spd_index_by_sw_if_index, ({
if (v == spd_index)
ipsec_set_interface_spd(vm, k, spd_id, 0);
}));
- /* *INDENT-ON* */
hash_unset (im->spd_index_by_spd_id, spd_id);
#define _(s,v) vec_free(spd->policies[IPSEC_SPD_POLICY_##s]);
foreach_ipsec_spd_policy_type
#undef _
- pool_put (im->spds, spd);
+
+ fp_spd = &spd->fp_spd;
+
+ if (im->fp_spd_ipv4_out_is_enabled)
+ {
+ if (fp_spd->ip4_out_lookup_hash_idx != INDEX_INVALID)
+ {
+ clib_bihash_16_8_t *bihash_table =
+ pool_elt_at_index (im->fp_ip4_lookup_hashes_pool,
+ fp_spd->ip4_out_lookup_hash_idx);
+
+ clib_bihash_free_16_8 (bihash_table);
+ vec_free (fp_spd->name4_out);
+ pool_put_index (im->fp_ip4_lookup_hashes_pool,
+ fp_spd->ip4_out_lookup_hash_idx);
+ }
+ }
+
+ if (im->fp_spd_ipv4_in_is_enabled)
+ {
+ if (fp_spd->ip4_in_lookup_hash_idx != INDEX_INVALID)
+ {
+ clib_bihash_16_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip4_lookup_hashes_pool, fp_spd->ip4_in_lookup_hash_idx);
+
+ clib_bihash_free_16_8 (bihash_table);
+ vec_free (fp_spd->name4_in);
+ pool_put_index (im->fp_ip4_lookup_hashes_pool,
+ fp_spd->ip4_in_lookup_hash_idx);
+ }
+ }
+
+ if (im->fp_spd_ipv6_out_is_enabled)
+ {
+ if (fp_spd->ip6_out_lookup_hash_idx != INDEX_INVALID)
+ {
+ clib_bihash_40_8_t *bihash_table =
+ pool_elt_at_index (im->fp_ip6_lookup_hashes_pool,
+ fp_spd->ip6_out_lookup_hash_idx);
+
+ clib_bihash_free_40_8 (bihash_table);
+ vec_free (fp_spd->name6_out);
+ pool_put_index (im->fp_ip6_lookup_hashes_pool,
+ fp_spd->ip6_out_lookup_hash_idx);
+ }
+ }
+ if (im->fp_spd_ipv6_in_is_enabled)
+ {
+ if (fp_spd->ip6_in_lookup_hash_idx != INDEX_INVALID)
+ {
+ clib_bihash_40_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip6_lookup_hashes_pool, fp_spd->ip6_in_lookup_hash_idx);
+
+ clib_bihash_free_40_8 (bihash_table);
+ vec_free (fp_spd->name6_in);
+ pool_put_index (im->fp_ip6_lookup_hashes_pool,
+ fp_spd->ip6_in_lookup_hash_idx);
+ }
+ }
+
+ pool_put (im->spds, spd);
}
- else /* create new SPD */
+ else /* create new SPD */
{
pool_get (im->spds, spd);
clib_memset (spd, 0, sizeof (*spd));
spd_index = spd - im->spds;
spd->id = spd_id;
hash_set (im->spd_index_by_spd_id, spd_id, spd_index);
+
+ fp_spd = &spd->fp_spd;
+ fp_spd->ip4_out_lookup_hash_idx = INDEX_INVALID;
+ fp_spd->ip4_in_lookup_hash_idx = INDEX_INVALID;
+ fp_spd->ip6_out_lookup_hash_idx = INDEX_INVALID;
+ fp_spd->ip6_in_lookup_hash_idx = INDEX_INVALID;
+
+ if (im->fp_spd_ipv4_out_is_enabled)
+ {
+ if (pool_elts (im->fp_ip4_lookup_hashes_pool) <
+ pool_max_len (im->fp_ip4_lookup_hashes_pool))
+ {
+ clib_bihash_16_8_t *bihash_table;
+ fp_spd->name4_out = format (0, "spd_%u_fp_ip4_out", spd_id);
+
+ pool_get (im->fp_ip4_lookup_hashes_pool, bihash_table);
+ fp_spd->ip4_out_lookup_hash_idx =
+ bihash_table - im->fp_ip4_lookup_hashes_pool;
+ clib_bihash_init_16_8 (bihash_table, (char *) fp_spd->name4_out,
+ im->fp_lookup_hash_buckets,
+ im->fp_lookup_hash_buckets *
+ IPSEC_FP_IP4_HASH_MEM_PER_BUCKET);
+ }
+ }
+
+ if (im->fp_spd_ipv4_in_is_enabled)
+ {
+ if (pool_elts (im->fp_ip4_lookup_hashes_pool) <
+ pool_max_len (im->fp_ip4_lookup_hashes_pool))
+ {
+ clib_bihash_16_8_t *bihash_table;
+ fp_spd->name4_in = format (0, "spd_%u_fp_ip4_in", spd_id);
+
+ pool_get (im->fp_ip4_lookup_hashes_pool, bihash_table);
+ fp_spd->ip4_in_lookup_hash_idx =
+ bihash_table - im->fp_ip4_lookup_hashes_pool;
+ clib_bihash_init_16_8 (bihash_table, (char *) fp_spd->name4_in,
+ im->fp_lookup_hash_buckets,
+ im->fp_lookup_hash_buckets *
+ IPSEC_FP_IP4_HASH_MEM_PER_BUCKET);
+ }
+ }
+ if (im->fp_spd_ipv6_out_is_enabled)
+ {
+ if (pool_elts (im->fp_ip6_lookup_hashes_pool) <
+ pool_max_len (im->fp_ip6_lookup_hashes_pool))
+ {
+ clib_bihash_40_8_t *bihash_table;
+
+ fp_spd->name6_out = format (0, "spd_%u_fp_ip6_out", spd_id);
+ pool_get (im->fp_ip6_lookup_hashes_pool, bihash_table);
+ fp_spd->ip6_out_lookup_hash_idx =
+ bihash_table - im->fp_ip6_lookup_hashes_pool;
+ clib_bihash_init_40_8 (bihash_table, (char *) fp_spd->name6_out,
+ im->fp_lookup_hash_buckets,
+ im->fp_lookup_hash_buckets *
+ IPSEC_FP_IP6_HASH_MEM_PER_BUCKET);
+ }
+ }
+ if (im->fp_spd_ipv6_in_is_enabled)
+ {
+ if (pool_elts (im->fp_ip6_lookup_hashes_pool) <
+ pool_max_len (im->fp_ip6_lookup_hashes_pool))
+ {
+ clib_bihash_40_8_t *bihash_table;
+
+ fp_spd->name6_in = format (0, "spd_%u_fp_ip6_in", spd_id);
+ pool_get (im->fp_ip6_lookup_hashes_pool, bihash_table);
+ fp_spd->ip6_in_lookup_hash_idx =
+ bihash_table - im->fp_ip6_lookup_hashes_pool;
+ clib_bihash_init_40_8 (bihash_table, (char *) fp_spd->name6_in,
+ im->fp_lookup_hash_buckets,
+ im->fp_lookup_hash_buckets *
+ IPSEC_FP_IP6_HASH_MEM_PER_BUCKET);
+ }
+ }
}
return 0;
}
diff --git a/src/vnet/ipsec/ipsec_spd.h b/src/vnet/ipsec/ipsec_spd.h
index 3637c27287d..3b1e4b40747 100644
--- a/src/vnet/ipsec/ipsec_spd.h
+++ b/src/vnet/ipsec/ipsec_spd.h
@@ -15,6 +15,8 @@
#ifndef __IPSEC_SPD_H__
#define __IPSEC_SPD_H__
+#include <vppinfra/bihash_40_8.h>
+#include <vppinfra/bihash_16_8.h>
#include <vlib/vlib.h>
#define foreach_ipsec_spd_policy_type \
@@ -40,8 +42,33 @@ typedef enum ipsec_spd_policy_t_
extern u8 *format_ipsec_policy_type (u8 * s, va_list * args);
+typedef struct
+{
+ /* index in the mask types pool */
+ u32 mask_type_idx;
+ /* counts references correspond to given mask type index */
+ u32 refcount;
+} ipsec_fp_mask_id_t;
+
/**
- * @brief A Secruity Policy Database
+ * @brief A fast path Security Policy Database
+ */
+typedef struct
+{
+ ipsec_fp_mask_id_t *fp_mask_ids[IPSEC_SPD_POLICY_N_TYPES];
+ /* names of bihash tables */
+ u8 *name4_out;
+ u8 *name4_in;
+ u8 *name6_out;
+ u8 *name6_in;
+ u32 ip6_out_lookup_hash_idx; /* fp ip6 lookup hash out index in the pool */
+ u32 ip4_out_lookup_hash_idx; /* fp ip4 lookup hash out index in the pool */
+ u32 ip6_in_lookup_hash_idx; /* fp ip6 lookup hash in index in the pool */
+ u32 ip4_in_lookup_hash_idx; /* fp ip4 lookup hash in index in the pool */
+} ipsec_spd_fp_t;
+
+/**
+ * @brief A Security Policy Database
*/
typedef struct
{
@@ -49,6 +76,7 @@ typedef struct
u32 id;
/** vectors for each of the policy types */
u32 *policies[IPSEC_SPD_POLICY_N_TYPES];
+ ipsec_spd_fp_t fp_spd;
} ipsec_spd_t;
/**
@@ -64,6 +92,9 @@ extern int ipsec_set_interface_spd (vlib_main_t * vm,
extern u8 *format_ipsec_spd (u8 * s, va_list * args);
+extern u8 *format_ipsec_out_spd_flow_cache (u8 *s, va_list *args);
+extern u8 *format_ipsec_in_spd_flow_cache (u8 *s, va_list *args);
+
#endif /* __IPSEC_SPD_H__ */
/*
diff --git a/src/vnet/ipsec/ipsec_spd_fp_lookup.h b/src/vnet/ipsec/ipsec_spd_fp_lookup.h
new file mode 100644
index 00000000000..2bbd7c664f9
--- /dev/null
+++ b/src/vnet/ipsec/ipsec_spd_fp_lookup.h
@@ -0,0 +1,579 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2022 Intel and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef IPSEC_SPD_FP_LOOKUP_H
+#define IPSEC_SPD_FP_LOOKUP_H
+
+#include <vnet/ipsec/ipsec.h>
+
+static_always_inline int
+single_rule_out_match_5tuple (ipsec_policy_t *policy, ipsec_fp_5tuple_t *match)
+{
+ if (PREDICT_FALSE (policy->is_ipv6 != match->is_ipv6))
+ return (0);
+
+ if (PREDICT_FALSE (policy->protocol != IPSEC_POLICY_PROTOCOL_ANY &&
+ (policy->protocol != match->protocol)))
+ return (0);
+
+ if (!policy->is_ipv6)
+ {
+ if (PREDICT_FALSE (
+ clib_net_to_host_u32 (match->laddr.as_u32) <
+ clib_net_to_host_u32 (policy->laddr.start.ip4.as_u32)))
+ return (0);
+
+ if (PREDICT_FALSE (clib_net_to_host_u32 (match->laddr.as_u32) >
+ clib_net_to_host_u32 (policy->laddr.stop.ip4.as_u32)))
+ return (0);
+
+ if (PREDICT_FALSE (
+ clib_net_to_host_u32 (match->raddr.as_u32) <
+ clib_net_to_host_u32 (policy->raddr.start.ip4.as_u32)))
+ return (0);
+
+ if (PREDICT_FALSE (clib_net_to_host_u32 (match->raddr.as_u32) >
+ clib_net_to_host_u32 (policy->raddr.stop.ip4.as_u32)))
+ return (0);
+ }
+ else
+ {
+
+ if (ip6_address_compare (&match->ip6_laddr, &policy->laddr.start.ip6) <
+ 0)
+ return (0);
+
+ if (ip6_address_compare (&policy->laddr.stop.ip6, &match->ip6_laddr) < 0)
+
+ return (0);
+
+ if (ip6_address_compare (&match->ip6_raddr, &policy->raddr.start.ip6) <
+ 0)
+
+ return (0);
+
+ if (ip6_address_compare (&policy->raddr.stop.ip6, &match->ip6_raddr) < 0)
+
+ return (0);
+ }
+
+ if (PREDICT_FALSE ((match->protocol != IP_PROTOCOL_TCP) &&
+ (match->protocol != IP_PROTOCOL_UDP) &&
+ (match->protocol != IP_PROTOCOL_SCTP)))
+ {
+ return (1);
+ }
+
+ if (match->lport < policy->lport.start)
+ return (0);
+
+ if (match->lport > policy->lport.stop)
+ return (0);
+
+ if (match->rport < policy->rport.start)
+ return (0);
+
+ if (match->rport > policy->rport.stop)
+ return (0);
+
+ return (1);
+}
+
+static_always_inline int
+single_rule_in_match_5tuple (ipsec_policy_t *policy, ipsec_fp_5tuple_t *match)
+{
+
+ u32 da = clib_net_to_host_u32 (match->laddr.as_u32);
+ u32 sa = clib_net_to_host_u32 (match->raddr.as_u32);
+
+ if (policy->policy == IPSEC_POLICY_ACTION_PROTECT)
+ {
+ ipsec_sa_t *s = ipsec_sa_get (policy->sa_index);
+
+ if (match->spi != s->spi)
+ return (0);
+
+ if (ipsec_sa_is_set_IS_TUNNEL (s))
+ {
+ if (da != clib_net_to_host_u32 (s->tunnel.t_dst.ip.ip4.as_u32))
+ return (0);
+
+ if (sa != clib_net_to_host_u32 (s->tunnel.t_src.ip.ip4.as_u32))
+ return (0);
+ }
+ }
+ else
+ {
+ if (sa < clib_net_to_host_u32 (policy->raddr.start.ip4.as_u32))
+ return (0);
+
+ if (sa > clib_net_to_host_u32 (policy->raddr.stop.ip4.as_u32))
+ return (0);
+
+ if (da < clib_net_to_host_u32 (policy->laddr.start.ip4.as_u32))
+ return (0);
+
+ if (da > clib_net_to_host_u32 (policy->laddr.stop.ip4.as_u32))
+ return (0);
+ }
+ return (1);
+}
+
+static_always_inline u32
+ipsec_fp_in_ip6_policy_match_n (void *spd_fp, ipsec_fp_5tuple_t *tuples,
+ ipsec_policy_t **policies, u32 n)
+{
+ u32 last_priority[n];
+ u32 i = 0;
+ u32 counter = 0;
+ ipsec_fp_mask_type_entry_t *mte;
+ ipsec_fp_mask_id_t *mti;
+ ipsec_fp_5tuple_t *match = tuples;
+ ipsec_policy_t *policy;
+ u32 n_left = n;
+ clib_bihash_kv_40_8_t kv;
+ /* result of the lookup */
+ clib_bihash_kv_40_8_t result;
+ ipsec_fp_lookup_value_t *result_val =
+ (ipsec_fp_lookup_value_t *) &result.value;
+ u64 *pkey, *pmatch, *pmask;
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_spd_fp_t *pspd_fp = (ipsec_spd_fp_t *) spd_fp;
+ ipsec_fp_mask_id_t *mask_type_ids = pspd_fp->fp_mask_ids[match->action];
+ clib_bihash_40_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip6_lookup_hashes_pool, pspd_fp->ip6_in_lookup_hash_idx);
+
+ /* clear the list of matched policies pointers */
+ clib_memset (policies, 0, n * sizeof (*policies));
+ clib_memset (last_priority, 0, n * sizeof (u32));
+ n_left = n;
+ while (n_left)
+ {
+ vec_foreach (mti, mask_type_ids)
+ {
+ mte = im->fp_mask_types + mti->mask_type_idx;
+ if (mte->mask.action == 0)
+ continue;
+
+ pmatch = (u64 *) match->kv_40_8.key;
+ pmask = (u64 *) mte->mask.kv_40_8.key;
+ pkey = (u64 *) kv.key;
+
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey = *pmatch & *pmask;
+
+ int res =
+ clib_bihash_search_inline_2_40_8 (bihash_table, &kv, &result);
+ /* lookup the hash by each packet in the burst for this mask. */
+
+ if (res == 0)
+ {
+ /* There is a hit in the hash table. */
+ /* Find the policy with highest priority. */
+ /* Store the lookup results in a dedicated array. */
+
+ if (vec_len (result_val->fp_policies_ids) > 1)
+ {
+ u32 *policy_id;
+ vec_foreach (policy_id, result_val->fp_policies_ids)
+ {
+ policy = im->policies + *policy_id;
+
+ if (single_rule_in_match_5tuple (policy, match))
+ {
+ if (last_priority[i] < policy->priority)
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ }
+ break;
+ }
+ }
+ }
+ else
+ {
+ u32 *policy_id;
+ ASSERT (vec_len (result_val->fp_policies_ids) == 1);
+ policy_id = result_val->fp_policies_ids;
+ policy = im->policies + *policy_id;
+ if ((last_priority[i] < policy->priority) &&
+ (single_rule_in_match_5tuple (policy, match)))
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ }
+ }
+ }
+ }
+
+ i++;
+ n_left--;
+ match++;
+ }
+ return counter;
+}
+
+static_always_inline u32
+ipsec_fp_in_ip4_policy_match_n (void *spd_fp, ipsec_fp_5tuple_t *tuples,
+ ipsec_policy_t **policies, u32 n)
+
+{
+ u32 last_priority[n];
+ u32 i = 0;
+ u32 counter = 0;
+ ipsec_fp_mask_type_entry_t *mte;
+ ipsec_fp_mask_id_t *mti;
+ ipsec_fp_5tuple_t *match = tuples;
+ ipsec_policy_t *policy;
+ u32 n_left = n;
+ clib_bihash_kv_16_8_t kv;
+ /* result of the lookup */
+ clib_bihash_kv_16_8_t result;
+ ipsec_fp_lookup_value_t *result_val =
+ (ipsec_fp_lookup_value_t *) &result.value;
+ u64 *pkey, *pmatch, *pmask;
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_spd_fp_t *pspd_fp = (ipsec_spd_fp_t *) spd_fp;
+ ipsec_fp_mask_id_t *mask_type_ids = pspd_fp->fp_mask_ids[match->action];
+ clib_bihash_16_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip4_lookup_hashes_pool, pspd_fp->ip4_in_lookup_hash_idx);
+
+ /* clear the list of matched policies pointers */
+ clib_memset (policies, 0, n * sizeof (*policies));
+ clib_memset (last_priority, 0, n * sizeof (u32));
+ n_left = n;
+ while (n_left)
+ {
+ vec_foreach (mti, mask_type_ids)
+ {
+ mte = im->fp_mask_types + mti->mask_type_idx;
+ if (mte->mask.action == 0)
+ continue;
+ pmatch = (u64 *) match->kv_16_8.key;
+ pmask = (u64 *) mte->mask.kv_16_8.key;
+ pkey = (u64 *) kv.key;
+
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey = *pmatch & *pmask;
+
+ int res =
+ clib_bihash_search_inline_2_16_8 (bihash_table, &kv, &result);
+ /* lookup the hash by each packet in the burst for this mask. */
+
+ if (res == 0)
+ {
+ /* There is a hit in the hash table. */
+ /* Find the policy with highest priority. */
+ /* Store the lookup results in a dedicated array. */
+
+ if (vec_len (result_val->fp_policies_ids) > 1)
+ {
+ u32 *policy_id;
+ vec_foreach (policy_id, result_val->fp_policies_ids)
+ {
+ policy = im->policies + *policy_id;
+
+ if (single_rule_in_match_5tuple (policy, match))
+ {
+ if (last_priority[i] < policy->priority)
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ }
+ break;
+ }
+ }
+ }
+ else
+ {
+ u32 *policy_id;
+ ASSERT (vec_len (result_val->fp_policies_ids) == 1);
+ policy_id = result_val->fp_policies_ids;
+ policy = im->policies + *policy_id;
+ if ((last_priority[i] < policy->priority) &&
+ (single_rule_in_match_5tuple (policy, match)))
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ }
+ }
+ }
+ }
+
+ i++;
+ n_left--;
+ match++;
+ }
+ return counter;
+}
+
+/**
+ * @brief function handler to perform lookup in fastpath SPD
+ * for inbound traffic burst of n packets
+ **/
+
+static_always_inline u32
+ipsec_fp_in_policy_match_n (void *spd_fp, u8 is_ipv6,
+ ipsec_fp_5tuple_t *tuples,
+ ipsec_policy_t **policies, u32 n)
+{
+ if (is_ipv6)
+ return ipsec_fp_in_ip6_policy_match_n (spd_fp, tuples, policies, n);
+ else
+ return ipsec_fp_in_ip4_policy_match_n (spd_fp, tuples, policies, n);
+}
+
+static_always_inline u32
+ipsec_fp_out_ip6_policy_match_n (void *spd_fp, ipsec_fp_5tuple_t *tuples,
+ ipsec_policy_t **policies, u32 *ids, u32 n)
+
+{
+ u32 last_priority[n];
+ u32 i = 0;
+ u32 counter = 0;
+ ipsec_fp_mask_type_entry_t *mte;
+ ipsec_fp_mask_id_t *mti;
+ ipsec_fp_5tuple_t *match = tuples;
+ ipsec_policy_t *policy;
+
+ u32 n_left = n;
+ clib_bihash_kv_40_8_t kv;
+ /* result of the lookup */
+ clib_bihash_kv_40_8_t result;
+ ipsec_fp_lookup_value_t *result_val =
+ (ipsec_fp_lookup_value_t *) &result.value;
+ u64 *pkey, *pmatch, *pmask;
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_spd_fp_t *pspd_fp = (ipsec_spd_fp_t *) spd_fp;
+ ipsec_fp_mask_id_t *mask_type_ids =
+ pspd_fp->fp_mask_ids[IPSEC_SPD_POLICY_IP6_OUTBOUND];
+ clib_bihash_40_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip6_lookup_hashes_pool, pspd_fp->ip6_out_lookup_hash_idx);
+
+ /*clear the list of matched policies pointers */
+ clib_memset (policies, 0, n * sizeof (*policies));
+ clib_memset (last_priority, 0, n * sizeof (u32));
+ n_left = n;
+ while (n_left)
+ {
+ vec_foreach (mti, mask_type_ids)
+ {
+ mte = im->fp_mask_types + mti->mask_type_idx;
+ if (mte->mask.action != 0)
+ continue;
+
+ pmatch = (u64 *) match->kv_40_8.key;
+ pmask = (u64 *) mte->mask.kv_40_8.key;
+ pkey = (u64 *) kv.key;
+
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey = *pmatch & *pmask;
+
+ int res =
+ clib_bihash_search_inline_2_40_8 (bihash_table, &kv, &result);
+ /* lookup the hash by each packet in the burst for this mask. */
+
+ if (res == 0)
+ {
+ /* There is a hit in the hash table. */
+ /* Find the policy with highest priority. */
+ /* Store the lookup results in a dedicated array. */
+
+ if (vec_len (result_val->fp_policies_ids) > 1)
+ {
+ u32 *policy_id;
+ vec_foreach (policy_id, result_val->fp_policies_ids)
+ {
+ policy = im->policies + *policy_id;
+
+ if (single_rule_out_match_5tuple (policy, match))
+ {
+ if (last_priority[i] < policy->priority)
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ ids[i] = *policy_id;
+ }
+ break;
+ }
+ }
+ }
+ else
+ {
+ u32 *policy_id;
+ ASSERT (vec_len (result_val->fp_policies_ids) == 1);
+ policy_id = result_val->fp_policies_ids;
+ policy = im->policies + *policy_id;
+ if (single_rule_out_match_5tuple (policy, match))
+ {
+ if (last_priority[i] < policy->priority)
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ ids[i] = *policy_id;
+ }
+ }
+ }
+ }
+ }
+ n_left--;
+ match++;
+ i++;
+ }
+ return counter;
+}
+
+static_always_inline u32
+ipsec_fp_out_ip4_policy_match_n (void *spd_fp, ipsec_fp_5tuple_t *tuples,
+ ipsec_policy_t **policies, u32 *ids, u32 n)
+
+{
+ u32 last_priority[n];
+ u32 i = 0;
+ u32 counter = 0;
+ ipsec_fp_mask_type_entry_t *mte;
+ ipsec_fp_mask_id_t *mti;
+ ipsec_fp_5tuple_t *match = tuples;
+ ipsec_policy_t *policy;
+
+ u32 n_left = n;
+ clib_bihash_kv_16_8_t kv;
+ /* result of the lookup */
+ clib_bihash_kv_16_8_t result;
+ ipsec_fp_lookup_value_t *result_val =
+ (ipsec_fp_lookup_value_t *) &result.value;
+ u64 *pkey, *pmatch, *pmask;
+ ipsec_main_t *im = &ipsec_main;
+ ipsec_spd_fp_t *pspd_fp = (ipsec_spd_fp_t *) spd_fp;
+ ipsec_fp_mask_id_t *mask_type_ids =
+ pspd_fp->fp_mask_ids[IPSEC_SPD_POLICY_IP4_OUTBOUND];
+ clib_bihash_16_8_t *bihash_table = pool_elt_at_index (
+ im->fp_ip4_lookup_hashes_pool, pspd_fp->ip4_out_lookup_hash_idx);
+
+ /* clear the list of matched policies pointers */
+ clib_memset (policies, 0, n * sizeof (*policies));
+ clib_memset (last_priority, 0, n * sizeof (u32));
+ n_left = n;
+ while (n_left)
+ {
+ vec_foreach (mti, mask_type_ids)
+ {
+ mte = im->fp_mask_types + mti->mask_type_idx;
+ if (mte->mask.action != 0)
+ continue;
+
+ pmatch = (u64 *) match->kv_16_8.key;
+ pmask = (u64 *) mte->mask.kv_16_8.key;
+ pkey = (u64 *) kv.key;
+
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey = *pmatch & *pmask;
+
+ int res =
+ clib_bihash_search_inline_2_16_8 (bihash_table, &kv, &result);
+ /* lookup the hash by each packet in the burst for this mask. */
+
+ if (res == 0)
+ {
+ /* There is a hit in the hash table. */
+ /* Find the policy with highest priority. */
+ /* Store the lookup results in a dedicated array. */
+
+ if (vec_len (result_val->fp_policies_ids) > 1)
+ {
+ u32 *policy_id;
+ vec_foreach (policy_id, result_val->fp_policies_ids)
+ {
+ policy = im->policies + *policy_id;
+
+ if (single_rule_out_match_5tuple (policy, match))
+ {
+ if (last_priority[i] < policy->priority)
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ ids[i] = *policy_id;
+ }
+ break;
+ }
+ }
+ }
+ else
+ {
+ u32 *policy_id;
+ ASSERT (vec_len (result_val->fp_policies_ids) == 1);
+ policy_id = result_val->fp_policies_ids;
+ policy = im->policies + *policy_id;
+ if ((last_priority[i] < policy->priority) &&
+ (single_rule_out_match_5tuple (policy, match)))
+ {
+ last_priority[i] = policy->priority;
+ if (policies[i] == 0)
+ counter++;
+ policies[i] = policy;
+ ids[i] = *policy_id;
+ }
+ }
+ }
+ }
+
+ i++;
+ n_left--;
+ match++;
+ }
+ return counter;
+}
+
+/**
+ * @brief function handler to perform lookup in fastpath SPD
+ * for outbound traffic burst of n packets
+ * returns number of successfully matched policies
+ **/
+
+static_always_inline u32
+ipsec_fp_out_policy_match_n (void *spd_fp, u8 is_ipv6,
+ ipsec_fp_5tuple_t *tuples,
+ ipsec_policy_t **policies, u32 *ids, u32 n)
+
+{
+ if (is_ipv6)
+ return ipsec_fp_out_ip6_policy_match_n (spd_fp, tuples, policies, ids, n);
+ else
+ return ipsec_fp_out_ip4_policy_match_n (spd_fp, tuples, policies, ids, n);
+}
+
+#endif /* !IPSEC_SPD_FP_LOOKUP_H */
diff --git a/src/vnet/ipsec/ipsec_spd_policy.c b/src/vnet/ipsec/ipsec_spd_policy.c
index 05cfdf0a671..af087689941 100644
--- a/src/vnet/ipsec/ipsec_spd_policy.c
+++ b/src/vnet/ipsec/ipsec_spd_policy.c
@@ -24,78 +24,6 @@ vlib_combined_counter_main_t ipsec_spd_policy_counters = {
.stat_segment_name = "/net/ipsec/policy",
};
-static int
-ipsec_policy_is_equal (ipsec_policy_t * p1, ipsec_policy_t * p2)
-{
- if (p1->priority != p2->priority)
- return 0;
- if (p1->type != p2->type)
- return (0);
- if (p1->policy != p2->policy)
- return (0);
- if (p1->sa_id != p2->sa_id)
- return (0);
- if (p1->protocol != p2->protocol)
- return (0);
- if (p1->lport.start != p2->lport.start)
- return (0);
- if (p1->lport.stop != p2->lport.stop)
- return (0);
- if (p1->rport.start != p2->rport.start)
- return (0);
- if (p1->rport.stop != p2->rport.stop)
- return (0);
- if (p1->is_ipv6 != p2->is_ipv6)
- return (0);
- if (p2->is_ipv6)
- {
- if (p1->laddr.start.ip6.as_u64[0] != p2->laddr.start.ip6.as_u64[0])
- return (0);
- if (p1->laddr.start.ip6.as_u64[1] != p2->laddr.start.ip6.as_u64[1])
- return (0);
- if (p1->laddr.stop.ip6.as_u64[0] != p2->laddr.stop.ip6.as_u64[0])
- return (0);
- if (p1->laddr.stop.ip6.as_u64[1] != p2->laddr.stop.ip6.as_u64[1])
- return (0);
- if (p1->raddr.start.ip6.as_u64[0] != p2->raddr.start.ip6.as_u64[0])
- return (0);
- if (p1->raddr.start.ip6.as_u64[1] != p2->raddr.start.ip6.as_u64[1])
- return (0);
- if (p1->raddr.stop.ip6.as_u64[0] != p2->raddr.stop.ip6.as_u64[0])
- return (0);
- if (p1->laddr.stop.ip6.as_u64[1] != p2->laddr.stop.ip6.as_u64[1])
- return (0);
- }
- else
- {
- if (p1->laddr.start.ip4.as_u32 != p2->laddr.start.ip4.as_u32)
- return (0);
- if (p1->laddr.stop.ip4.as_u32 != p2->laddr.stop.ip4.as_u32)
- return (0);
- if (p1->raddr.start.ip4.as_u32 != p2->raddr.start.ip4.as_u32)
- return (0);
- if (p1->raddr.stop.ip4.as_u32 != p2->raddr.stop.ip4.as_u32)
- return (0);
- }
- return (1);
-}
-
-static int
-ipsec_spd_entry_sort (void *a1, void *a2)
-{
- ipsec_main_t *im = &ipsec_main;
- u32 *id1 = a1;
- u32 *id2 = a2;
- ipsec_policy_t *p1, *p2;
-
- p1 = pool_elt_at_index (im->policies, *id1);
- p2 = pool_elt_at_index (im->policies, *id2);
- if (p1 && p2)
- return p2->priority - p1->priority;
-
- return 0;
-}
-
int
ipsec_policy_mk_type (bool is_outbound,
bool is_ipv6,
@@ -136,6 +64,44 @@ ipsec_policy_mk_type (bool is_outbound,
return (-1);
}
+static_always_inline int
+ipsec_is_policy_inbound (ipsec_policy_t *policy)
+{
+ if (policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT ||
+ policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS ||
+ policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD ||
+ policy->type == IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT ||
+ policy->type == IPSEC_SPD_POLICY_IP6_INBOUND_BYPASS ||
+ policy->type == IPSEC_SPD_POLICY_IP6_INBOUND_DISCARD)
+ return 1;
+
+ return 0;
+}
+
+static_always_inline int
+ipsec_is_fp_enabled (ipsec_main_t *im, ipsec_spd_t *spd,
+ ipsec_policy_t *policy)
+{
+ if ((im->fp_spd_ipv4_out_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd->fp_spd.ip4_out_lookup_hash_idx) &&
+ policy->type == IPSEC_SPD_POLICY_IP4_OUTBOUND) ||
+ (im->fp_spd_ipv4_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd->fp_spd.ip4_in_lookup_hash_idx) &&
+ (policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT ||
+ policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS ||
+ policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD)) ||
+ (im->fp_spd_ipv6_in_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd->fp_spd.ip6_in_lookup_hash_idx) &&
+ (policy->type == IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT ||
+ policy->type == IPSEC_SPD_POLICY_IP6_INBOUND_BYPASS ||
+ policy->type == IPSEC_SPD_POLICY_IP6_INBOUND_DISCARD)) ||
+ (im->fp_spd_ipv6_out_is_enabled &&
+ PREDICT_TRUE (INDEX_INVALID != spd->fp_spd.ip6_out_lookup_hash_idx) &&
+ policy->type == IPSEC_SPD_POLICY_IP6_OUTBOUND))
+ return 1;
+ return 0;
+}
+
int
ipsec_add_del_policy (vlib_main_t * vm,
ipsec_policy_t * policy, int is_add, u32 * stat_index)
@@ -156,9 +122,58 @@ ipsec_add_del_policy (vlib_main_t * vm,
if (!spd)
return VNET_API_ERROR_SYSCALL_ERROR_1;
+ if (im->output_flow_cache_flag && !policy->is_ipv6 &&
+ policy->type == IPSEC_SPD_POLICY_IP4_OUTBOUND)
+ {
+ /*
+ * Flow cache entry is valid only when epoch_count value in control
+ * plane and data plane match. Otherwise, flow cache entry is considered
+ * stale. To avoid the race condition of using old epoch_count value
+ * in data plane after the roll over of epoch_count in control plane,
+ * entire flow cache is reset.
+ */
+ if (im->epoch_count == 0xFFFFFFFF)
+ {
+ /* Reset all the entries in flow cache */
+ clib_memset_u8 (im->ipsec4_out_spd_hash_tbl, 0,
+ im->ipsec4_out_spd_hash_num_buckets *
+ (sizeof (*(im->ipsec4_out_spd_hash_tbl))));
+ }
+ /* Increment epoch counter by 1 */
+ clib_atomic_fetch_add_relax (&im->epoch_count, 1);
+ /* Reset spd flow cache counter since all old entries are stale */
+ clib_atomic_store_relax_n (&im->ipsec4_out_spd_flow_cache_entries, 0);
+ }
+
+ if ((policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT ||
+ policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_BYPASS ||
+ policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_DISCARD) &&
+ im->input_flow_cache_flag && !policy->is_ipv6)
+ {
+ /*
+ * Flow cache entry is valid only when input_epoch_count value in control
+ * plane and data plane match. Otherwise, flow cache entry is considered
+ * stale. To avoid the race condition of using old input_epoch_count
+ * value in data plane after the roll over of input_epoch_count in
+ * control plane, entire flow cache is reset.
+ */
+ if (im->input_epoch_count == 0xFFFFFFFF)
+ {
+ /* Reset all the entries in flow cache */
+ clib_memset_u8 (im->ipsec4_in_spd_hash_tbl, 0,
+ im->ipsec4_in_spd_hash_num_buckets *
+ (sizeof (*(im->ipsec4_in_spd_hash_tbl))));
+ }
+ /* Increment epoch counter by 1 */
+ clib_atomic_fetch_add_relax (&im->input_epoch_count, 1);
+ /* Reset spd flow cache counter since all old entries are stale */
+ im->ipsec4_in_spd_flow_cache_entries = 0;
+ }
+
if (is_add)
{
u32 policy_index;
+ u32 i;
if (policy->policy == IPSEC_POLICY_ACTION_PROTECT)
{
@@ -171,6 +186,14 @@ ipsec_add_del_policy (vlib_main_t * vm,
else
policy->sa_index = INDEX_INVALID;
+ /**
+ * Try adding the policy into fast path SPD first. Only adding to
+ * traditional SPD when failed.
+ **/
+ if (ipsec_is_fp_enabled (im, spd, policy))
+ return ipsec_fp_add_del_policy ((void *) &spd->fp_spd, policy, 1,
+ stat_index);
+
pool_get (im->policies, vp);
clib_memcpy (vp, policy, sizeof (*vp));
policy_index = vp - im->policies;
@@ -179,22 +202,56 @@ ipsec_add_del_policy (vlib_main_t * vm,
policy_index);
vlib_zero_combined_counter (&ipsec_spd_policy_counters, policy_index);
- vec_add1 (spd->policies[policy->type], policy_index);
- vec_sort_with_function (spd->policies[policy->type],
- ipsec_spd_entry_sort);
+ vec_foreach_index (i, spd->policies[policy->type])
+ {
+ ipsec_policy_t *p =
+ pool_elt_at_index (im->policies, spd->policies[policy->type][i]);
+
+ if (p->priority <= vp->priority)
+ {
+ break;
+ }
+ }
+
+ vec_insert_elts (spd->policies[policy->type], &policy_index, 1, i);
+
*stat_index = policy_index;
}
else
{
u32 ii;
+ /**
+ * Try to delete the policy from the fast path SPD first. Delete from
+ * traditional SPD when fp delete fails.
+ **/
+
+ if (ipsec_is_fp_enabled (im, spd, policy))
+
+ {
+ if (policy->policy == IPSEC_POLICY_ACTION_PROTECT)
+ {
+ index_t sa_index = ipsec_sa_find_and_lock (policy->sa_id);
+
+ if (INDEX_INVALID == sa_index)
+ return VNET_API_ERROR_SYSCALL_ERROR_1;
+ policy->sa_index = sa_index;
+ ipsec_sa_unlock_id (policy->sa_id);
+ }
+ else
+ policy->sa_index = INDEX_INVALID;
+
+ return ipsec_fp_add_del_policy ((void *) &spd->fp_spd, policy, 0,
+ stat_index);
+ }
+
vec_foreach_index (ii, (spd->policies[policy->type]))
{
vp = pool_elt_at_index (im->policies,
spd->policies[policy->type][ii]);
if (ipsec_policy_is_equal (vp, policy))
{
- vec_del1 (spd->policies[policy->type], ii);
+ vec_delete (spd->policies[policy->type], 1, ii);
ipsec_sa_unlock (vp->sa_index);
pool_put (im->policies, vp);
break;
@@ -205,6 +262,673 @@ ipsec_add_del_policy (vlib_main_t * vm,
return 0;
}
+static_always_inline void
+ipsec_fp_release_mask_type (ipsec_main_t *im, u32 mask_type_index)
+{
+ ipsec_fp_mask_type_entry_t *mte =
+ pool_elt_at_index (im->fp_mask_types, mask_type_index);
+ mte->refcount--;
+ if (mte->refcount == 0)
+ {
+ /* this entry is not in use anymore */
+ ASSERT (clib_memset (mte, 0xae, sizeof (*mte)) == EOK);
+ pool_put (im->fp_mask_types, mte);
+ }
+}
+
+static_always_inline u32
+find_mask_type_index (ipsec_main_t *im, ipsec_fp_5tuple_t *mask)
+{
+ ipsec_fp_mask_type_entry_t *mte;
+
+ pool_foreach (mte, im->fp_mask_types)
+ {
+ if (memcmp (&mte->mask, mask, sizeof (*mask)) == 0)
+ return (mte - im->fp_mask_types);
+ }
+
+ return ~0;
+}
+
+static_always_inline void
+fill_ip6_hash_policy_kv (ipsec_fp_5tuple_t *match, ipsec_fp_5tuple_t *mask,
+ clib_bihash_kv_40_8_t *kv)
+{
+ ipsec_fp_lookup_value_t *kv_val = (ipsec_fp_lookup_value_t *) &kv->value;
+ u64 *pmatch = (u64 *) match->kv_40_8.key;
+ u64 *pmask = (u64 *) mask->kv_40_8.key;
+ u64 *pkey = (u64 *) kv->key;
+
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey = *pmatch & *pmask;
+
+ kv_val->as_u64 = 0;
+}
+
+static_always_inline void
+fill_ip4_hash_policy_kv (ipsec_fp_5tuple_t *match, ipsec_fp_5tuple_t *mask,
+ clib_bihash_kv_16_8_t *kv)
+{
+ ipsec_fp_lookup_value_t *kv_val = (ipsec_fp_lookup_value_t *) &kv->value;
+ u64 *pmatch = (u64 *) match->kv_16_8.key;
+ u64 *pmask = (u64 *) mask->kv_16_8.key;
+ u64 *pkey = (u64 *) kv->key;
+
+ *pkey++ = *pmatch++ & *pmask++;
+ *pkey = *pmatch & *pmask;
+
+ kv_val->as_u64 = 0;
+}
+
+static_always_inline u16
+mask_out_highest_set_bit_u16 (u16 x)
+{
+ x |= x >> 8;
+ x |= x >> 4;
+ x |= x >> 2;
+ x |= x >> 1;
+ return ~x;
+}
+
+static_always_inline u32
+mask_out_highest_set_bit_u32 (u32 x)
+{
+ x |= x >> 16;
+ x |= x >> 8;
+ x |= x >> 4;
+ x |= x >> 2;
+ x |= x >> 1;
+ return ~x;
+}
+
+static_always_inline u64
+mask_out_highest_set_bit_u64 (u64 x)
+{
+ x |= x >> 32;
+ x |= x >> 16;
+ x |= x >> 8;
+ x |= x >> 4;
+ x |= x >> 2;
+ x |= x >> 1;
+ return ~x;
+}
+
+static_always_inline void
+ipsec_fp_get_policy_ports_mask (ipsec_policy_t *policy,
+ ipsec_fp_5tuple_t *mask)
+{
+ if (PREDICT_TRUE ((policy->protocol == IP_PROTOCOL_TCP) ||
+ (policy->protocol == IP_PROTOCOL_UDP) ||
+ (policy->protocol == IP_PROTOCOL_SCTP)))
+ {
+ mask->lport = policy->lport.start ^ policy->lport.stop;
+ mask->rport = policy->rport.start ^ policy->rport.stop;
+
+ mask->lport = mask_out_highest_set_bit_u16 (mask->lport);
+
+ mask->rport = mask_out_highest_set_bit_u16 (mask->rport);
+ }
+ else
+ {
+ mask->lport = 0;
+ mask->rport = 0;
+ }
+
+ mask->protocol = (policy->protocol == IPSEC_POLICY_PROTOCOL_ANY) ? 0 : ~0;
+}
+
+static_always_inline void
+ipsec_fp_ip4_get_policy_mask (ipsec_policy_t *policy, ipsec_fp_5tuple_t *mask,
+ bool inbound)
+{
+ u32 *pladdr_start = (u32 *) &policy->laddr.start.ip4;
+ u32 *pladdr_stop = (u32 *) &policy->laddr.stop.ip4;
+ u32 *plmask = (u32 *) &mask->laddr;
+ u32 *praddr_start = (u32 *) &policy->raddr.start.ip4;
+ u32 *praddr_stop = (u32 *) &policy->raddr.stop.ip4;
+ u32 *prmask = (u32 *) &mask->raddr;
+
+ clib_memset_u8 (mask, 0xff, sizeof (ipsec_fp_5tuple_t));
+ clib_memset_u8 (&mask->l3_zero_pad, 0, sizeof (mask->l3_zero_pad));
+
+ if (inbound && (policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT &&
+ policy->sa_index != INDEX_INVALID))
+ {
+ ipsec_sa_t *s = ipsec_sa_get (policy->sa_index);
+
+ if (ipsec_sa_is_set_IS_TUNNEL (s))
+ goto set_spi_mask;
+ }
+
+ /* find bits where start != stop */
+ *plmask = *pladdr_start ^ *pladdr_stop;
+ *prmask = *praddr_start ^ *praddr_stop;
+ /* Find most significant bit set (that is the first position
+ * start differs from stop). Mask out everything after that bit and
+ * the bit itself. Remember that policy stores start and stop in the net
+ * order.
+ */
+ *plmask = clib_host_to_net_u32 (
+ mask_out_highest_set_bit_u32 (clib_net_to_host_u32 (*plmask)));
+
+ *prmask = clib_host_to_net_u32 (
+ mask_out_highest_set_bit_u32 (clib_net_to_host_u32 (*prmask)));
+
+set_spi_mask:
+ if (inbound)
+ {
+ if (policy->type != IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT)
+ mask->spi = 0;
+
+ mask->protocol = 0;
+ }
+ else
+ {
+ mask->action = 0;
+ ipsec_fp_get_policy_ports_mask (policy, mask);
+ }
+}
+
+static_always_inline void
+ipsec_fp_ip6_get_policy_mask (ipsec_policy_t *policy, ipsec_fp_5tuple_t *mask,
+ bool inbound)
+{
+ u64 *pladdr_start = (u64 *) &policy->laddr.start;
+ u64 *pladdr_stop = (u64 *) &policy->laddr.stop;
+ u64 *plmask = (u64 *) &mask->ip6_laddr;
+ u64 *praddr_start = (u64 *) &policy->raddr.start;
+ u64 *praddr_stop = (u64 *) &policy->raddr.stop;
+ u64 *prmask = (u64 *) &mask->ip6_raddr;
+
+ clib_memset_u8 (mask, 0xff, sizeof (ipsec_fp_5tuple_t));
+
+ if (inbound && (policy->type == IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT &&
+ policy->sa_index != INDEX_INVALID))
+ {
+ ipsec_sa_t *s = ipsec_sa_get (policy->sa_index);
+
+ if (ipsec_sa_is_set_IS_TUNNEL (s))
+ goto set_spi_mask;
+ }
+
+ *plmask = (*pladdr_start++ ^ *pladdr_stop++);
+
+ *prmask = (*praddr_start++ ^ *praddr_stop++);
+
+ /* Find most significant bit set (that is the first position
+ * start differs from stop). Mask out everything after that bit and
+ * the bit itself. Remember that policy stores start and stop in the net
+ * order.
+ */
+ *plmask = clib_host_to_net_u64 (
+ mask_out_highest_set_bit_u64 (clib_net_to_host_u64 (*plmask)));
+
+ if (*plmask++ & clib_host_to_net_u64 (0x1))
+ {
+ *plmask = (*pladdr_start ^ *pladdr_stop);
+ *plmask = clib_host_to_net_u64 (
+ mask_out_highest_set_bit_u64 (clib_net_to_host_u64 (*plmask)));
+ }
+ else
+ *plmask = 0;
+
+ *prmask = clib_host_to_net_u64 (
+ mask_out_highest_set_bit_u64 (clib_net_to_host_u64 (*prmask)));
+
+ if (*prmask++ & clib_host_to_net_u64 (0x1))
+ {
+ *prmask = (*praddr_start ^ *praddr_stop);
+ *prmask = clib_host_to_net_u64 (
+ mask_out_highest_set_bit_u64 (clib_net_to_host_u64 (*prmask)));
+ }
+ else
+ *prmask = 0;
+set_spi_mask:
+ if (inbound)
+ {
+ if (policy->type != IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT)
+ mask->spi = 0;
+
+ mask->protocol = 0;
+ }
+ else
+ {
+ mask->action = 0;
+ ipsec_fp_get_policy_ports_mask (policy, mask);
+ }
+}
+
+static_always_inline void
+ipsec_fp_get_policy_5tuple (ipsec_policy_t *policy, ipsec_fp_5tuple_t *tuple,
+ bool inbound)
+{
+ memset (tuple, 0, sizeof (*tuple));
+ tuple->is_ipv6 = policy->is_ipv6;
+ if (tuple->is_ipv6)
+ {
+ tuple->ip6_laddr = policy->laddr.start.ip6;
+ tuple->ip6_raddr = policy->raddr.start.ip6;
+ }
+ else
+ {
+ tuple->laddr = policy->laddr.start.ip4;
+ tuple->raddr = policy->raddr.start.ip4;
+ }
+
+ if (inbound)
+ {
+
+ if ((policy->type == IPSEC_SPD_POLICY_IP4_INBOUND_PROTECT ||
+ policy->type == IPSEC_SPD_POLICY_IP6_INBOUND_PROTECT) &&
+ policy->sa_index != INDEX_INVALID)
+ {
+ ipsec_sa_t *s = ipsec_sa_get (policy->sa_index);
+
+ tuple->spi = s->spi;
+ if (ipsec_sa_is_set_IS_TUNNEL (s))
+ {
+ if (tuple->is_ipv6)
+ {
+ tuple->ip6_laddr = s->tunnel.t_dst.ip.ip6;
+ tuple->ip6_raddr = s->tunnel.t_src.ip.ip6;
+ }
+ else
+ {
+ tuple->laddr = s->tunnel.t_dst.ip.ip4;
+ tuple->raddr = s->tunnel.t_src.ip.ip4;
+ }
+ }
+ }
+ else
+ tuple->spi = INDEX_INVALID;
+ tuple->action = policy->type;
+ return;
+ }
+
+ tuple->protocol = policy->protocol;
+ tuple->lport = policy->lport.start;
+ tuple->rport = policy->rport.start;
+}
+
+static_always_inline int
+ipsec_fp_mask_type_idx_cmp (ipsec_fp_mask_id_t *mask_id, u32 *idx)
+{
+ return mask_id->mask_type_idx == *idx;
+}
+
+int
+ipsec_fp_ip4_add_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd,
+ ipsec_policy_t *policy, u32 *stat_index)
+{
+ u32 mask_index, searched_idx;
+ ipsec_policy_t *vp;
+ ipsec_fp_mask_type_entry_t *mte;
+ u32 policy_index;
+ clib_bihash_kv_16_8_t kv;
+ clib_bihash_kv_16_8_t result;
+ ipsec_fp_lookup_value_t *result_val =
+ (ipsec_fp_lookup_value_t *) &result.value;
+ ipsec_fp_lookup_value_t *key_val = (ipsec_fp_lookup_value_t *) &kv.value;
+
+ ipsec_fp_5tuple_t mask, policy_5tuple;
+ int res;
+ bool inbound = ipsec_is_policy_inbound (policy);
+ clib_bihash_16_8_t *bihash_table =
+ inbound ? pool_elt_at_index (im->fp_ip4_lookup_hashes_pool,
+ fp_spd->ip4_in_lookup_hash_idx) :
+ pool_elt_at_index (im->fp_ip4_lookup_hashes_pool,
+ fp_spd->ip4_out_lookup_hash_idx);
+
+ ipsec_fp_ip4_get_policy_mask (policy, &mask, inbound);
+ pool_get (im->policies, vp);
+ policy_index = vp - im->policies;
+ vlib_validate_combined_counter (&ipsec_spd_policy_counters, policy_index);
+ vlib_zero_combined_counter (&ipsec_spd_policy_counters, policy_index);
+ *stat_index = policy_index;
+ mask_index = find_mask_type_index (im, &mask);
+
+ if (mask_index == ~0)
+ {
+ /* mask type not found, we need to create a new entry */
+ pool_get (im->fp_mask_types, mte);
+ mask_index = mte - im->fp_mask_types;
+ mte->refcount = 0;
+ }
+ else
+ mte = im->fp_mask_types + mask_index;
+
+ policy->fp_mask_type_id = mask_index;
+ ipsec_fp_get_policy_5tuple (policy, &policy_5tuple, inbound);
+
+ fill_ip4_hash_policy_kv (&policy_5tuple, &mask, &kv);
+
+ res = clib_bihash_search_inline_2_16_8 (bihash_table, &kv, &result);
+ if (res != 0)
+ {
+ /* key was not found crate a new entry */
+ vec_add1 (key_val->fp_policies_ids, policy_index);
+ res = clib_bihash_add_del_16_8 (bihash_table, &kv, 1);
+
+ if (res != 0)
+ goto error;
+ }
+ else
+ {
+ u32 i;
+ u32 *old_fp_policies_ids = result_val->fp_policies_ids;
+
+ vec_foreach_index (i, result_val->fp_policies_ids)
+ {
+ ipsec_policy_t *p =
+ pool_elt_at_index (im->policies, result_val->fp_policies_ids[i]);
+
+ if (p->priority <= policy->priority)
+ {
+ break;
+ }
+ }
+
+ vec_insert_elts (result_val->fp_policies_ids, &policy_index, 1, i);
+
+ if (result_val->fp_policies_ids != old_fp_policies_ids)
+ {
+ res = clib_bihash_add_del_16_8 (bihash_table, &result, 1);
+
+ if (res != 0)
+ goto error;
+ }
+ }
+
+ if (mte->refcount == 0)
+ {
+ clib_memcpy (&mte->mask, &mask, sizeof (mask));
+ mte->refcount = 0;
+ }
+
+ searched_idx =
+ vec_search_with_function (fp_spd->fp_mask_ids[policy->type], &mask_index,
+ ipsec_fp_mask_type_idx_cmp);
+ if (~0 == searched_idx)
+ {
+ ipsec_fp_mask_id_t mask_id = { mask_index, 1 };
+ vec_add1 (fp_spd->fp_mask_ids[policy->type], mask_id);
+ }
+ else
+ (fp_spd->fp_mask_ids[policy->type] + searched_idx)->refcount++;
+
+ mte->refcount++;
+ clib_memcpy (vp, policy, sizeof (*vp));
+
+ return 0;
+
+error:
+ pool_put (im->policies, vp);
+ ipsec_fp_release_mask_type (im, mask_index);
+ return -1;
+}
+
+int
+ipsec_fp_ip6_add_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd,
+ ipsec_policy_t *policy, u32 *stat_index)
+{
+
+ u32 mask_index, searched_idx;
+ ipsec_policy_t *vp;
+ ipsec_fp_mask_type_entry_t *mte;
+ u32 policy_index;
+ clib_bihash_kv_40_8_t kv;
+ clib_bihash_kv_40_8_t result;
+ ipsec_fp_lookup_value_t *result_val =
+ (ipsec_fp_lookup_value_t *) &result.value;
+ ipsec_fp_lookup_value_t *key_val = (ipsec_fp_lookup_value_t *) &kv.value;
+
+ ipsec_fp_5tuple_t mask, policy_5tuple;
+ int res;
+ bool inbound = ipsec_is_policy_inbound (policy);
+
+ ipsec_fp_ip6_get_policy_mask (policy, &mask, inbound);
+ pool_get (im->policies, vp);
+ policy_index = vp - im->policies;
+ vlib_validate_combined_counter (&ipsec_spd_policy_counters, policy_index);
+ vlib_zero_combined_counter (&ipsec_spd_policy_counters, policy_index);
+ *stat_index = policy_index;
+ mask_index = find_mask_type_index (im, &mask);
+ clib_bihash_40_8_t *bihash_table =
+ inbound ? pool_elt_at_index (im->fp_ip6_lookup_hashes_pool,
+ fp_spd->ip6_in_lookup_hash_idx) :
+ pool_elt_at_index (im->fp_ip6_lookup_hashes_pool,
+ fp_spd->ip6_out_lookup_hash_idx);
+
+ if (mask_index == ~0)
+ {
+ /* mask type not found, we need to create a new entry */
+ pool_get (im->fp_mask_types, mte);
+ mask_index = mte - im->fp_mask_types;
+ mte->refcount = 0;
+ }
+ else
+ mte = im->fp_mask_types + mask_index;
+
+ policy->fp_mask_type_id = mask_index;
+ ipsec_fp_get_policy_5tuple (policy, &policy_5tuple, inbound);
+
+ fill_ip6_hash_policy_kv (&policy_5tuple, &mask, &kv);
+
+ res = clib_bihash_search_inline_2_40_8 (bihash_table, &kv, &result);
+ if (res != 0)
+ {
+ /* key was not found crate a new entry */
+ vec_add1 (key_val->fp_policies_ids, policy_index);
+ res = clib_bihash_add_del_40_8 (bihash_table, &kv, 1);
+ if (res != 0)
+ goto error;
+ }
+ else
+ {
+ u32 i;
+ u32 *old_fp_policies_ids = result_val->fp_policies_ids;
+
+ vec_foreach_index (i, result_val->fp_policies_ids)
+ {
+ ipsec_policy_t *p =
+ pool_elt_at_index (im->policies, result_val->fp_policies_ids[i]);
+
+ if (p->priority <= policy->priority)
+ {
+ break;
+ }
+ }
+
+ vec_insert_elts (result_val->fp_policies_ids, &policy_index, 1, i);
+
+ if (result_val->fp_policies_ids != old_fp_policies_ids)
+ {
+ res = clib_bihash_add_del_40_8 (bihash_table, &result, 1);
+
+ if (res != 0)
+ goto error;
+ }
+ }
+
+ if (mte->refcount == 0)
+ {
+ clib_memcpy (&mte->mask, &mask, sizeof (mask));
+ mte->refcount = 0;
+ }
+
+ searched_idx =
+ vec_search_with_function (fp_spd->fp_mask_ids[policy->type], &mask_index,
+ ipsec_fp_mask_type_idx_cmp);
+ if (~0 == searched_idx)
+ {
+ ipsec_fp_mask_id_t mask_id = { mask_index, 1 };
+ vec_add1 (fp_spd->fp_mask_ids[policy->type], mask_id);
+ }
+ else
+ (fp_spd->fp_mask_ids[policy->type] + searched_idx)->refcount++;
+
+ mte->refcount++;
+ clib_memcpy (vp, policy, sizeof (*vp));
+
+ return 0;
+
+error:
+ pool_put (im->policies, vp);
+ ipsec_fp_release_mask_type (im, mask_index);
+ return -1;
+}
+
+int
+ipsec_fp_ip6_del_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd,
+ ipsec_policy_t *policy)
+{
+ int res;
+ ipsec_fp_5tuple_t mask = { 0 }, policy_5tuple;
+ clib_bihash_kv_40_8_t kv;
+ clib_bihash_kv_40_8_t result;
+ ipsec_fp_lookup_value_t *result_val =
+ (ipsec_fp_lookup_value_t *) &result.value;
+ bool inbound = ipsec_is_policy_inbound (policy);
+ clib_bihash_40_8_t *bihash_table =
+ inbound ? pool_elt_at_index (im->fp_ip6_lookup_hashes_pool,
+ fp_spd->ip6_in_lookup_hash_idx) :
+ pool_elt_at_index (im->fp_ip6_lookup_hashes_pool,
+ fp_spd->ip6_out_lookup_hash_idx);
+
+ ipsec_policy_t *vp;
+ u32 ii, imt;
+
+ ipsec_fp_ip6_get_policy_mask (policy, &mask, inbound);
+ ipsec_fp_get_policy_5tuple (policy, &policy_5tuple, inbound);
+ fill_ip6_hash_policy_kv (&policy_5tuple, &mask, &kv);
+ res = clib_bihash_search_inline_2_40_8 (bihash_table, &kv, &result);
+ if (res != 0)
+ return -1;
+
+ vec_foreach_index (ii, result_val->fp_policies_ids)
+ {
+ vp =
+ pool_elt_at_index (im->policies, *(result_val->fp_policies_ids + ii));
+ if (ipsec_policy_is_equal (vp, policy))
+ {
+ if (vec_len (result_val->fp_policies_ids) == 1)
+ {
+ vec_free (result_val->fp_policies_ids);
+ clib_bihash_add_del_40_8 (bihash_table, &result, 0);
+ }
+ else
+ vec_delete (result_val->fp_policies_ids, 1, ii);
+
+ vec_foreach_index (imt, fp_spd->fp_mask_ids[policy->type])
+ {
+ if ((fp_spd->fp_mask_ids[policy->type] + imt)->mask_type_idx ==
+ vp->fp_mask_type_id)
+ {
+
+ if ((fp_spd->fp_mask_ids[policy->type] + imt)->refcount-- ==
+ 1)
+ vec_del1 (fp_spd->fp_mask_ids[policy->type], imt);
+
+ break;
+ }
+ }
+
+ ipsec_fp_release_mask_type (im, vp->fp_mask_type_id);
+ ipsec_sa_unlock (vp->sa_index);
+ pool_put (im->policies, vp);
+ return 0;
+ }
+ }
+ return -1;
+}
+
+int
+ipsec_fp_ip4_del_policy (ipsec_main_t *im, ipsec_spd_fp_t *fp_spd,
+ ipsec_policy_t *policy)
+{
+ int res;
+ ipsec_fp_5tuple_t mask = { 0 }, policy_5tuple;
+ clib_bihash_kv_16_8_t kv;
+ clib_bihash_kv_16_8_t result;
+ ipsec_fp_lookup_value_t *result_val =
+ (ipsec_fp_lookup_value_t *) &result.value;
+ bool inbound = ipsec_is_policy_inbound (policy);
+ ipsec_policy_t *vp;
+ u32 ii, imt;
+ clib_bihash_16_8_t *bihash_table =
+ inbound ? pool_elt_at_index (im->fp_ip4_lookup_hashes_pool,
+ fp_spd->ip4_in_lookup_hash_idx) :
+ pool_elt_at_index (im->fp_ip4_lookup_hashes_pool,
+ fp_spd->ip4_out_lookup_hash_idx);
+
+ ipsec_fp_ip4_get_policy_mask (policy, &mask, inbound);
+ ipsec_fp_get_policy_5tuple (policy, &policy_5tuple, inbound);
+ fill_ip4_hash_policy_kv (&policy_5tuple, &mask, &kv);
+ res = clib_bihash_search_inline_2_16_8 (bihash_table, &kv, &result);
+
+ if (res != 0)
+ return -1;
+
+ vec_foreach_index (ii, result_val->fp_policies_ids)
+ {
+ vp =
+ pool_elt_at_index (im->policies, *(result_val->fp_policies_ids + ii));
+ if (ipsec_policy_is_equal (vp, policy))
+ {
+ if (vec_len (result_val->fp_policies_ids) == 1)
+ {
+ vec_free (result_val->fp_policies_ids);
+ clib_bihash_add_del_16_8 (bihash_table, &result, 0);
+ }
+ else
+ vec_delete (result_val->fp_policies_ids, 1, ii);
+
+ vec_foreach_index (imt, fp_spd->fp_mask_ids[policy->type])
+ {
+ if ((fp_spd->fp_mask_ids[policy->type] + imt)->mask_type_idx ==
+ vp->fp_mask_type_id)
+ {
+
+ if ((fp_spd->fp_mask_ids[policy->type] + imt)->refcount-- ==
+ 1)
+ vec_del1 (fp_spd->fp_mask_ids[policy->type], imt);
+
+ break;
+ }
+ }
+ ipsec_fp_release_mask_type (im, vp->fp_mask_type_id);
+ ipsec_sa_unlock (vp->sa_index);
+ pool_put (im->policies, vp);
+ return 0;
+ }
+ }
+ return -1;
+}
+
+int
+ipsec_fp_add_del_policy (void *fp_spd, ipsec_policy_t *policy, int is_add,
+ u32 *stat_index)
+{
+ ipsec_main_t *im = &ipsec_main;
+
+ if (is_add)
+ if (policy->is_ipv6)
+ return ipsec_fp_ip6_add_policy (im, (ipsec_spd_fp_t *) fp_spd, policy,
+ stat_index);
+ else
+ return ipsec_fp_ip4_add_policy (im, (ipsec_spd_fp_t *) fp_spd, policy,
+ stat_index);
+
+ else if (policy->is_ipv6)
+
+ return ipsec_fp_ip6_del_policy (im, (ipsec_spd_fp_t *) fp_spd, policy);
+ else
+ return ipsec_fp_ip4_del_policy (im, (ipsec_spd_fp_t *) fp_spd, policy);
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/ipsec/ipsec_spd_policy.h b/src/vnet/ipsec/ipsec_spd_policy.h
index 6d6b69592b0..34f444efb9c 100644
--- a/src/vnet/ipsec/ipsec_spd_policy.h
+++ b/src/vnet/ipsec/ipsec_spd_policy.h
@@ -15,7 +15,30 @@
#ifndef __IPSEC_SPD_POLICY_H__
#define __IPSEC_SPD_POLICY_H__
+#include <vppinfra/bihash_40_8.h>
+#include <vppinfra/bihash_16_8.h>
#include <vnet/ipsec/ipsec_spd.h>
+/**
+ * calculated as max number of flows (2^10) divided by KVP_PER_PAGE (4)
+ */
+#define IPSEC_FP_HASH_LOOKUP_HASH_BUCKETS (1 << 8)
+
+#define IPSEC_POLICY_PROTOCOL_ANY IP_PROTOCOL_RESERVED
+
+/**
+ * This number is calculated as ceil power of 2 for the number
+ * sizeof(clib_bihash_kv_16_8_t)=24 * BIHASH_KVP_PER_PAGE=4 * COLLISIONS_NO=8
+ *
+ */
+
+#define IPSEC_FP_IP4_HASH_MEM_PER_BUCKET 1024
+
+/**
+ * This number is calculated as ceil power of 2 for the number
+ * sizeof(clib_bihash_kv_40_8_t)=48 * BIHASH_KVP_PER_PAGE=4 * COLLISIONS_NO=8
+ *
+ */
+#define IPSEC_FP_IP6_HASH_MEM_PER_BUCKET 2048
#define foreach_ipsec_policy_action \
_ (0, BYPASS, "bypass") \
@@ -71,6 +94,7 @@ typedef struct ipsec_policy_t_
ipsec_policy_action_t policy;
u32 sa_id;
u32 sa_index;
+ u32 fp_mask_type_id;
} ipsec_policy_t;
/**
@@ -91,6 +115,135 @@ extern int ipsec_policy_mk_type (bool is_outbound,
ipsec_policy_action_t action,
ipsec_spd_policy_type_t * type);
+/* A 5-tuple used to calculate the bihash entry */
+typedef union
+{
+ struct
+ {
+ union
+ {
+ struct
+ {
+ u32 l3_zero_pad[6];
+ ip4_address_t laddr;
+ ip4_address_t raddr;
+ };
+ struct
+ {
+ ip6_address_t ip6_laddr;
+ ip6_address_t ip6_raddr;
+ };
+ };
+ union
+ {
+ struct
+ {
+ u16 lport;
+ u16 rport;
+ };
+ u32 spi;
+ };
+ u8 protocol;
+ u8 action;
+ u16 is_ipv6;
+ };
+ /* for ipv6 */
+ clib_bihash_kv_40_8_t kv_40_8;
+ /* for ipv4 */
+ struct
+ {
+ u64 padding_for_kv_16_8[3];
+ clib_bihash_kv_16_8_t kv_16_8;
+ };
+} ipsec_fp_5tuple_t;
+
+/*
+ * An element describing a particular policy mask,
+ * and refcount of policies with same mask.
+ */
+typedef struct
+{
+ /** Required for pool_get_aligned */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ ipsec_fp_5tuple_t mask;
+ u32 refcount; /* counts how many policies use this mask */
+} ipsec_fp_mask_type_entry_t;
+
+/*
+ * Bihash lookup value,
+ * contains an unordered vector of policies indices in policy pool.
+ */
+typedef union
+{
+ u64 as_u64;
+ struct
+ {
+ u32 *fp_policies_ids;
+ };
+} ipsec_fp_lookup_value_t;
+
+/**
+ * @brief add or delete a fast path policy
+ */
+int ipsec_fp_add_del_policy (void *fp_spd, ipsec_policy_t *policy, int is_add,
+ u32 *stat_index);
+
+static_always_inline int
+ipsec_policy_is_equal (ipsec_policy_t *p1, ipsec_policy_t *p2)
+{
+ if (p1->priority != p2->priority)
+ return 0;
+ if (p1->type != p2->type)
+ return (0);
+ if (p1->policy != p2->policy)
+ return (0);
+ if (p1->sa_id != p2->sa_id)
+ return (0);
+ if (p1->protocol != p2->protocol)
+ return (0);
+ if (p1->lport.start != p2->lport.start)
+ return (0);
+ if (p1->lport.stop != p2->lport.stop)
+ return (0);
+ if (p1->rport.start != p2->rport.start)
+ return (0);
+ if (p1->rport.stop != p2->rport.stop)
+ return (0);
+ if (p1->is_ipv6 != p2->is_ipv6)
+ return (0);
+ if (p2->is_ipv6)
+ {
+ if (p1->laddr.start.ip6.as_u64[0] != p2->laddr.start.ip6.as_u64[0])
+ return (0);
+ if (p1->laddr.start.ip6.as_u64[1] != p2->laddr.start.ip6.as_u64[1])
+ return (0);
+ if (p1->laddr.stop.ip6.as_u64[0] != p2->laddr.stop.ip6.as_u64[0])
+ return (0);
+ if (p1->laddr.stop.ip6.as_u64[1] != p2->laddr.stop.ip6.as_u64[1])
+ return (0);
+ if (p1->raddr.start.ip6.as_u64[0] != p2->raddr.start.ip6.as_u64[0])
+ return (0);
+ if (p1->raddr.start.ip6.as_u64[1] != p2->raddr.start.ip6.as_u64[1])
+ return (0);
+ if (p1->raddr.stop.ip6.as_u64[0] != p2->raddr.stop.ip6.as_u64[0])
+ return (0);
+ if (p1->laddr.stop.ip6.as_u64[1] != p2->laddr.stop.ip6.as_u64[1])
+ return (0);
+ }
+ else
+ {
+ if (p1->laddr.start.ip4.as_u32 != p2->laddr.start.ip4.as_u32)
+ return (0);
+ if (p1->laddr.stop.ip4.as_u32 != p2->laddr.stop.ip4.as_u32)
+ return (0);
+ if (p1->raddr.start.ip4.as_u32 != p2->raddr.start.ip4.as_u32)
+ return (0);
+ if (p1->raddr.stop.ip4.as_u32 != p2->raddr.stop.ip4.as_u32)
+ return (0);
+ }
+ return (1);
+}
+
#endif /* __IPSEC_SPD_POLICY_H__ */
/*
diff --git a/src/vnet/ipsec/ipsec_test.c b/src/vnet/ipsec/ipsec_test.c
new file mode 100644
index 00000000000..86d09f18a5c
--- /dev/null
+++ b/src/vnet/ipsec/ipsec_test.c
@@ -0,0 +1,755 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vppinfra/error.h>
+#include <vpp/api/types.h>
+
+#include <vnet/ipsec/ipsec.h>
+#include <vnet/ip/ip_types_api.h>
+
+#define __plugin_msg_base ipsec_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+#include <vlibmemory/vlib.api_enum.h>
+#include <vlibmemory/vlib.api_types.h>
+
+/* Declare message IDs */
+#include <vnet/format_fns.h>
+#include <vnet/ipsec/ipsec.api_enum.h>
+#include <vnet/ipsec/ipsec.api_types.h>
+
+#define vl_endianfun /* define message structures */
+#include <vnet/ipsec/ipsec.api.h>
+#undef vl_endianfun
+
+#define vl_calcsizefun
+#include <vnet/ipsec/ipsec.api.h>
+#undef vl_calcsizefun
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ u32 ping_id;
+ vat_main_t *vat_main;
+} ipsec_test_main_t;
+
+static ipsec_test_main_t ipsec_test_main;
+
+static void
+vl_api_ipsec_spds_details_t_handler (vl_api_ipsec_spds_details_t *mp)
+{
+}
+
+static void
+vl_api_ipsec_itf_details_t_handler (vl_api_ipsec_itf_details_t *mp)
+{
+}
+
+static int
+api_ipsec_itf_delete (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ipsec_itf_create (vat_main_t *vat)
+{
+ return -1;
+}
+
+static void
+vl_api_ipsec_itf_create_reply_t_handler (vl_api_ipsec_itf_create_reply_t *vat)
+{
+}
+
+static int
+api_ipsec_spd_entry_add_del (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ipsec_spd_entry_add_del_t *mp;
+ u8 is_add = 1, is_outbound = 0;
+ u32 spd_id = 0, sa_id = 0, protocol = IPSEC_POLICY_PROTOCOL_ANY, policy = 0;
+ i32 priority = 0;
+ u32 rport_start = 0, rport_stop = (u32) ~0;
+ u32 lport_start = 0, lport_stop = (u32) ~0;
+ vl_api_address_t laddr_start = {}, laddr_stop = {}, raddr_start = {},
+ raddr_stop = {};
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ if (unformat (i, "outbound"))
+ is_outbound = 1;
+ if (unformat (i, "inbound"))
+ is_outbound = 0;
+ else if (unformat (i, "spd_id %d", &spd_id))
+ ;
+ else if (unformat (i, "sa_id %d", &sa_id))
+ ;
+ else if (unformat (i, "priority %d", &priority))
+ ;
+ else if (unformat (i, "protocol %d", &protocol))
+ ;
+ else if (unformat (i, "lport_start %d", &lport_start))
+ ;
+ else if (unformat (i, "lport_stop %d", &lport_stop))
+ ;
+ else if (unformat (i, "rport_start %d", &rport_start))
+ ;
+ else if (unformat (i, "rport_stop %d", &rport_stop))
+ ;
+ else if (unformat (i, "laddr_start %U", unformat_vl_api_address,
+ &laddr_start))
+ ;
+ else if (unformat (i, "laddr_stop %U", unformat_vl_api_address,
+ &laddr_stop))
+ ;
+ else if (unformat (i, "raddr_start %U", unformat_vl_api_address,
+ &raddr_start))
+ ;
+ else if (unformat (i, "raddr_stop %U", unformat_vl_api_address,
+ &raddr_stop))
+ ;
+ else if (unformat (i, "action %U", unformat_ipsec_policy_action,
+ &policy))
+ {
+ if (policy == IPSEC_POLICY_ACTION_RESOLVE)
+ {
+ clib_warning ("unsupported action: 'resolve'");
+ return -99;
+ }
+ }
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (IPSEC_SPD_ENTRY_ADD_DEL, mp);
+
+ mp->is_add = is_add;
+
+ mp->entry.spd_id = ntohl (spd_id);
+ mp->entry.priority = ntohl (priority);
+ mp->entry.is_outbound = is_outbound;
+
+ clib_memcpy (&mp->entry.remote_address_start, &raddr_start,
+ sizeof (vl_api_address_t));
+ clib_memcpy (&mp->entry.remote_address_stop, &raddr_stop,
+ sizeof (vl_api_address_t));
+ clib_memcpy (&mp->entry.local_address_start, &laddr_start,
+ sizeof (vl_api_address_t));
+ clib_memcpy (&mp->entry.local_address_stop, &laddr_stop,
+ sizeof (vl_api_address_t));
+
+ mp->entry.protocol = protocol ? (u8) protocol : IPSEC_POLICY_PROTOCOL_ANY;
+ mp->entry.local_port_start = ntohs ((u16) lport_start);
+ mp->entry.local_port_stop = ntohs ((u16) lport_stop);
+ mp->entry.remote_port_start = ntohs ((u16) rport_start);
+ mp->entry.remote_port_stop = ntohs ((u16) rport_stop);
+ mp->entry.policy = (u8) policy;
+ mp->entry.sa_id = ntohl (sa_id);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ipsec_spd_entry_add_del_v2 (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ipsec_spd_entry_add_del_t *mp;
+ u8 is_add = 1, is_outbound = 0;
+ u32 spd_id = 0, sa_id = 0, protocol = IPSEC_POLICY_PROTOCOL_ANY, policy = 0;
+ i32 priority = 0;
+ u32 rport_start = 0, rport_stop = (u32) ~0;
+ u32 lport_start = 0, lport_stop = (u32) ~0;
+ vl_api_address_t laddr_start = {}, laddr_stop = {}, raddr_start = {},
+ raddr_stop = {};
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ if (unformat (i, "outbound"))
+ is_outbound = 1;
+ if (unformat (i, "inbound"))
+ is_outbound = 0;
+ else if (unformat (i, "spd_id %d", &spd_id))
+ ;
+ else if (unformat (i, "sa_id %d", &sa_id))
+ ;
+ else if (unformat (i, "priority %d", &priority))
+ ;
+ else if (unformat (i, "protocol %d", &protocol))
+ ;
+ else if (unformat (i, "lport_start %d", &lport_start))
+ ;
+ else if (unformat (i, "lport_stop %d", &lport_stop))
+ ;
+ else if (unformat (i, "rport_start %d", &rport_start))
+ ;
+ else if (unformat (i, "rport_stop %d", &rport_stop))
+ ;
+ else if (unformat (i, "laddr_start %U", unformat_vl_api_address,
+ &laddr_start))
+ ;
+ else if (unformat (i, "laddr_stop %U", unformat_vl_api_address,
+ &laddr_stop))
+ ;
+ else if (unformat (i, "raddr_start %U", unformat_vl_api_address,
+ &raddr_start))
+ ;
+ else if (unformat (i, "raddr_stop %U", unformat_vl_api_address,
+ &raddr_stop))
+ ;
+ else if (unformat (i, "action %U", unformat_ipsec_policy_action,
+ &policy))
+ {
+ if (policy == IPSEC_POLICY_ACTION_RESOLVE)
+ {
+ clib_warning ("unsupported action: 'resolve'");
+ return -99;
+ }
+ }
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (IPSEC_SPD_ENTRY_ADD_DEL, mp);
+
+ mp->is_add = is_add;
+
+ mp->entry.spd_id = ntohl (spd_id);
+ mp->entry.priority = ntohl (priority);
+ mp->entry.is_outbound = is_outbound;
+
+ clib_memcpy (&mp->entry.remote_address_start, &raddr_start,
+ sizeof (vl_api_address_t));
+ clib_memcpy (&mp->entry.remote_address_stop, &raddr_stop,
+ sizeof (vl_api_address_t));
+ clib_memcpy (&mp->entry.local_address_start, &laddr_start,
+ sizeof (vl_api_address_t));
+ clib_memcpy (&mp->entry.local_address_stop, &laddr_stop,
+ sizeof (vl_api_address_t));
+
+ mp->entry.protocol = (u8) protocol;
+ mp->entry.local_port_start = ntohs ((u16) lport_start);
+ mp->entry.local_port_stop = ntohs ((u16) lport_stop);
+ mp->entry.remote_port_start = ntohs ((u16) rport_start);
+ mp->entry.remote_port_stop = ntohs ((u16) rport_stop);
+ mp->entry.policy = (u8) policy;
+ mp->entry.sa_id = ntohl (sa_id);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_ipsec_spd_details_t_handler (vl_api_ipsec_spd_details_t *mp)
+{
+}
+
+static void
+vl_api_ipsec_sad_entry_add_del_reply_t_handler (
+ vl_api_ipsec_sad_entry_add_del_reply_t *mp)
+{
+}
+
+static void
+vl_api_ipsec_sad_entry_add_del_v3_reply_t_handler (
+ vl_api_ipsec_sad_entry_add_del_v3_reply_t *mp)
+{
+}
+
+static void
+vl_api_ipsec_sad_entry_add_reply_t_handler (
+ vl_api_ipsec_sad_entry_add_reply_t *mp)
+{
+}
+
+static void
+vl_api_ipsec_sad_entry_add_v2_reply_t_handler (
+ vl_api_ipsec_sad_entry_add_reply_t *mp)
+{
+}
+
+static int
+api_ipsec_sad_entry_del (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ipsec_sad_bind (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ipsec_sad_unbind (vat_main_t *vat)
+{
+ return -1;
+}
+
+static void
+vl_api_ipsec_sad_entry_add_del_v2_reply_t_handler (
+ vl_api_ipsec_sad_entry_add_del_v2_reply_t *mp)
+{
+}
+
+static void
+vl_api_ipsec_spd_interface_details_t_handler (
+ vl_api_ipsec_spd_interface_details_t *vat)
+{
+}
+
+static int
+api_ipsec_sad_entry_add_del_v3 (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ipsec_sad_entry_update (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ipsec_tunnel_protect_update (vat_main_t *vat)
+{
+ return -1;
+}
+
+static void
+vl_api_ipsec_backend_details_t_handler (vl_api_ipsec_backend_details_t *mp)
+{
+}
+
+static int
+api_ipsec_sa_v3_dump (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ipsec_sa_v4_dump (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ipsec_sa_v5_dump (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ipsec_tunnel_protect_dump (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ipsec_tunnel_protect_del (vat_main_t *vat)
+{
+ return -1;
+}
+
+static void
+vl_api_ipsec_tunnel_protect_details_t_handler (
+ vl_api_ipsec_tunnel_protect_details_t *mp)
+{
+}
+
+static int
+api_ipsec_sad_entry_add (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_ipsec_sad_entry_add_v2 (vat_main_t *vat)
+{
+ return -1;
+}
+
+static void
+vl_api_ipsec_spd_entry_add_del_reply_t_handler (
+ vl_api_ipsec_spd_entry_add_del_reply_t *mp)
+{
+}
+
+static void
+vl_api_ipsec_spd_entry_add_del_v2_reply_t_handler (
+ vl_api_ipsec_spd_entry_add_del_v2_reply_t *mp)
+{
+}
+
+static int
+api_ipsec_spds_dump (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_ipsec_itf_dump (vat_main_t *vam)
+{
+ return -1;
+}
+
+static void
+vl_api_ipsec_sa_v3_details_t_handler (vl_api_ipsec_sa_v3_details_t *mp)
+{
+}
+
+static void
+vl_api_ipsec_sa_v4_details_t_handler (vl_api_ipsec_sa_v4_details_t *mp)
+{
+}
+
+static void
+vl_api_ipsec_sa_v5_details_t_handler (vl_api_ipsec_sa_v5_details_t *mp)
+{
+}
+
+static int
+api_ipsec_spd_interface_dump (vat_main_t *vat)
+{
+ return -1;
+}
+
+static void
+vl_api_ipsec_sa_v2_details_t_handler (vl_api_ipsec_sa_v2_details_t *mp)
+{
+}
+
+static int
+api_ipsec_sa_v2_dump (vat_main_t *mp)
+{
+ return -1;
+}
+
+static int
+api_ipsec_sa_dump (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ipsec_sa_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u32 sa_id = ~0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sa_id %d", &sa_id))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (IPSEC_SA_DUMP, mp);
+
+ mp->sa_id = ntohl (sa_id);
+
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ PING (&ipsec_test_main, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_ipsec_sa_details_t_handler (vl_api_ipsec_sa_details_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp,
+ "sa_id %u sw_if_index %u spi %u proto %u crypto_alg %u "
+ "crypto_key %U integ_alg %u integ_key %U flags %x "
+ "tunnel_src_addr %U tunnel_dst_addr %U "
+ "salt %u seq_outbound %lu last_seq_inbound %lu "
+ "replay_window %lu stat_index %u\n",
+ ntohl (mp->entry.sad_id), ntohl (mp->sw_if_index),
+ ntohl (mp->entry.spi), ntohl (mp->entry.protocol),
+ ntohl (mp->entry.crypto_algorithm), format_hex_bytes,
+ mp->entry.crypto_key.data, mp->entry.crypto_key.length,
+ ntohl (mp->entry.integrity_algorithm), format_hex_bytes,
+ mp->entry.integrity_key.data, mp->entry.integrity_key.length,
+ ntohl (mp->entry.flags), format_vl_api_address, &mp->entry.tunnel_src,
+ format_vl_api_address, &mp->entry.tunnel_dst, ntohl (mp->salt),
+ clib_net_to_host_u64 (mp->seq_outbound),
+ clib_net_to_host_u64 (mp->last_seq_inbound),
+ clib_net_to_host_u64 (mp->replay_window), ntohl (mp->stat_index));
+}
+
+static int
+api_ipsec_spd_dump (vat_main_t *vam)
+{
+ return -1;
+}
+
+uword
+unformat_ipsec_api_crypto_alg (unformat_input_t *input, va_list *args)
+{
+ u32 *r = va_arg (*args, u32 *);
+
+ if (0)
+ ;
+#define _(v, f, s) else if (unformat (input, s)) *r = IPSEC_API_CRYPTO_ALG_##f;
+ foreach_ipsec_crypto_alg
+#undef _
+ else return 0;
+ return 1;
+}
+
+uword
+unformat_ipsec_api_integ_alg (unformat_input_t *input, va_list *args)
+{
+ u32 *r = va_arg (*args, u32 *);
+
+ if (0)
+ ;
+#define _(v, f, s) else if (unformat (input, s)) *r = IPSEC_API_INTEG_ALG_##f;
+ foreach_ipsec_integ_alg
+#undef _
+ else return 0;
+ return 1;
+}
+
+static int
+api_ipsec_sad_entry_add_del (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ipsec_sad_entry_add_del_t *mp;
+ u32 sad_id = 0, spi = 0;
+ u8 *ck = 0, *ik = 0;
+ u8 is_add = 1;
+
+ vl_api_ipsec_crypto_alg_t crypto_alg = IPSEC_API_CRYPTO_ALG_NONE;
+ vl_api_ipsec_integ_alg_t integ_alg = IPSEC_API_INTEG_ALG_NONE;
+ vl_api_ipsec_sad_flags_t flags = IPSEC_API_SAD_FLAG_NONE;
+ vl_api_ipsec_proto_t protocol = IPSEC_API_PROTO_AH;
+ vl_api_address_t tun_src, tun_dst;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "sad_id %d", &sad_id))
+ ;
+ else if (unformat (i, "spi %d", &spi))
+ ;
+ else if (unformat (i, "esp"))
+ protocol = IPSEC_API_PROTO_ESP;
+ else if (unformat (i, "tunnel_src %U", unformat_vl_api_address,
+ &tun_src))
+ {
+ flags |= IPSEC_API_SAD_FLAG_IS_TUNNEL;
+ if (ADDRESS_IP6 == tun_src.af)
+ flags |= IPSEC_API_SAD_FLAG_IS_TUNNEL_V6;
+ }
+ else if (unformat (i, "tunnel_dst %U", unformat_vl_api_address,
+ &tun_dst))
+ {
+ flags |= IPSEC_API_SAD_FLAG_IS_TUNNEL;
+ if (ADDRESS_IP6 == tun_src.af)
+ flags |= IPSEC_API_SAD_FLAG_IS_TUNNEL_V6;
+ }
+ else if (unformat (i, "crypto_alg %U", unformat_ipsec_api_crypto_alg,
+ &crypto_alg))
+ ;
+ else if (unformat (i, "crypto_key %U", unformat_hex_string, &ck))
+ ;
+ else if (unformat (i, "integ_alg %U", unformat_ipsec_api_integ_alg,
+ &integ_alg))
+ ;
+ else if (unformat (i, "integ_key %U", unformat_hex_string, &ik))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ M (IPSEC_SAD_ENTRY_ADD_DEL, mp);
+
+ mp->is_add = is_add;
+ mp->entry.sad_id = ntohl (sad_id);
+ mp->entry.protocol = protocol;
+ mp->entry.spi = ntohl (spi);
+ mp->entry.flags = flags;
+
+ mp->entry.crypto_algorithm = crypto_alg;
+ mp->entry.integrity_algorithm = integ_alg;
+ mp->entry.crypto_key.length = vec_len (ck);
+ mp->entry.integrity_key.length = vec_len (ik);
+
+ if (mp->entry.crypto_key.length > sizeof (mp->entry.crypto_key.data))
+ mp->entry.crypto_key.length = sizeof (mp->entry.crypto_key.data);
+
+ if (mp->entry.integrity_key.length > sizeof (mp->entry.integrity_key.data))
+ mp->entry.integrity_key.length = sizeof (mp->entry.integrity_key.data);
+
+ if (ck)
+ clib_memcpy (mp->entry.crypto_key.data, ck, mp->entry.crypto_key.length);
+ if (ik)
+ clib_memcpy (mp->entry.integrity_key.data, ik,
+ mp->entry.integrity_key.length);
+
+ if (flags & IPSEC_API_SAD_FLAG_IS_TUNNEL)
+ {
+ clib_memcpy (&mp->entry.tunnel_src, &tun_src,
+ sizeof (mp->entry.tunnel_src));
+ clib_memcpy (&mp->entry.tunnel_dst, &tun_dst,
+ sizeof (mp->entry.tunnel_dst));
+ }
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ipsec_sad_entry_add_del_v2 (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_ipsec_interface_add_del_spd (vat_main_t *vam)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t *i = vam->input;
+ vl_api_ipsec_interface_add_del_spd_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u32 spd_id = (u32) ~0;
+ u8 is_add = 1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "spd_id %d", &spd_id))
+ ;
+ else if (unformat (i, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (spd_id == (u32) ~0)
+ {
+ errmsg ("spd_id must be set");
+ return -99;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (IPSEC_INTERFACE_ADD_DEL_SPD, mp);
+
+ mp->spd_id = ntohl (spd_id);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->is_add = is_add;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_ipsec_backend_dump (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_ipsec_select_backend (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_ipsec_set_async_mode (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_ipsec_spd_add_del (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_ipsec_spd_add_del_t *mp;
+ u32 spd_id = ~0;
+ u8 is_add = 1;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "spd_id %d", &spd_id))
+ ;
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+ if (spd_id == ~0)
+ {
+ errmsg ("spd_id must be set");
+ return -99;
+ }
+
+ M (IPSEC_SPD_ADD_DEL, mp);
+
+ mp->spd_id = ntohl (spd_id);
+ mp->is_add = is_add;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+#include <vnet/ipsec/ipsec.api_test.c>
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/ipsec/ipsec_tun.c b/src/vnet/ipsec/ipsec_tun.c
index 0b6ec0ea33e..ecda291e985 100644
--- a/src/vnet/ipsec/ipsec_tun.c
+++ b/src/vnet/ipsec/ipsec_tun.c
@@ -22,6 +22,7 @@
#include <vnet/adj/adj_delegate.h>
#include <vnet/adj/adj_midchain.h>
#include <vnet/teib/teib.h>
+#include <vnet/mpls/mpls.h>
/* instantiate the bihash functions */
#include <vppinfra/bihash_8_16.h>
@@ -100,14 +101,12 @@ ipsec_tun_register_nodes (ip_address_family_t af)
if (0 == ipsec_tun_node_regs[af]++)
{
if (AF_IP4 == af)
- {
- ipsec_register_udp_port (UDP_DST_PORT_ipsec);
- ip4_register_protocol (IP_PROTOCOL_IPSEC_ESP,
- ipsec4_tun_input_node.index);
- }
+ ip4_register_protocol (IP_PROTOCOL_IPSEC_ESP,
+ ipsec4_tun_input_node.index);
else
ip6_register_protocol (IP_PROTOCOL_IPSEC_ESP,
ipsec6_tun_input_node.index);
+ ipsec_register_udp_port (UDP_DST_PORT_ipsec, (AF_IP4 == af));
}
}
@@ -118,12 +117,10 @@ ipsec_tun_unregister_nodes (ip_address_family_t af)
if (0 == --ipsec_tun_node_regs[af])
{
if (AF_IP4 == af)
- {
- ipsec_unregister_udp_port (UDP_DST_PORT_ipsec);
- ip4_unregister_protocol (IP_PROTOCOL_IPSEC_ESP);
- }
+ ip4_unregister_protocol (IP_PROTOCOL_IPSEC_ESP);
else
ip6_unregister_protocol (IP_PROTOCOL_IPSEC_ESP);
+ ipsec_unregister_udp_port (UDP_DST_PORT_ipsec, (AF_IP4 == af));
}
}
@@ -137,12 +134,14 @@ ipsec_tun_protect_from_const_base (const adj_delegate_t * ad)
static u32
ipsec_tun_protect_get_adj_next (vnet_link_t linkt,
- const ipsec_tun_protect_t * itp)
+ const ipsec_tun_protect_t *itp)
{
ipsec_main_t *im;
- ipsec_sa_t *sa;
u32 next;
+ im = &ipsec_main;
+ next = 0;
+
if (!(itp->itp_flags & IPSEC_PROTECT_ITF))
{
if (ip46_address_is_ip4 (&itp->itp_tun.src))
@@ -151,42 +150,42 @@ ipsec_tun_protect_get_adj_next (vnet_link_t linkt,
linkt = VNET_LINK_IP6;
}
- sa = ipsec_sa_get (itp->itp_out_sa);
- im = &ipsec_main;
- next = 0;
-
- if ((sa->crypto_alg == IPSEC_CRYPTO_ALG_NONE &&
- sa->integ_alg == IPSEC_INTEG_ALG_NONE) &&
- !(itp->itp_flags & IPSEC_PROTECT_ITF))
- next = (VNET_LINK_IP4 == linkt ? im->esp4_no_crypto_tun_node_index :
- im->esp6_no_crypto_tun_node_index);
- else if (itp->itp_flags & IPSEC_PROTECT_L2)
- next = (VNET_LINK_IP4 == linkt ? im->esp4_encrypt_l2_tun_node_index :
- im->esp6_encrypt_l2_tun_node_index);
- else
+ switch (linkt)
{
- switch (linkt)
- {
- case VNET_LINK_IP4:
- next = im->esp4_encrypt_tun_node_index;
- break;
- case VNET_LINK_IP6:
- next = im->esp6_encrypt_tun_node_index;
- break;
- case VNET_LINK_MPLS:
- next = im->esp_mpls_encrypt_tun_node_index;
- break;
- case VNET_LINK_ARP:
- case VNET_LINK_NSH:
- case VNET_LINK_ETHERNET:
- ASSERT (0);
- break;
- }
+ case VNET_LINK_IP4:
+ next = im->esp4_encrypt_tun_node_index;
+ break;
+ case VNET_LINK_IP6:
+ next = im->esp6_encrypt_tun_node_index;
+ break;
+ case VNET_LINK_MPLS:
+ next = im->esp_mpls_encrypt_tun_node_index;
+ break;
+ case VNET_LINK_ARP:
+ case VNET_LINK_NSH:
+ case VNET_LINK_ETHERNET:
+ ASSERT (0);
+ break;
}
+
return (next);
}
static void
+ipsec_tun_setup_tx_nodes (u32 sw_if_index, const ipsec_tun_protect_t *itp)
+{
+ vnet_feature_modify_end_node (
+ ip4_main.lookup_main.output_feature_arc_index, sw_if_index,
+ ipsec_tun_protect_get_adj_next (VNET_LINK_IP4, itp));
+ vnet_feature_modify_end_node (
+ ip6_main.lookup_main.output_feature_arc_index, sw_if_index,
+ ipsec_tun_protect_get_adj_next (VNET_LINK_IP6, itp));
+ vnet_feature_modify_end_node (
+ mpls_main.output_feature_arc_index, sw_if_index,
+ ipsec_tun_protect_get_adj_next (VNET_LINK_MPLS, itp));
+}
+
+static void
ipsec_tun_protect_add_adj (adj_index_t ai, const ipsec_tun_protect_t * itp)
{
vec_validate_init_empty (ipsec_tun_protect_sa_by_adj_index, ai,
@@ -200,8 +199,8 @@ ipsec_tun_protect_add_adj (adj_index_t ai, const ipsec_tun_protect_t * itp)
else
{
ipsec_tun_protect_sa_by_adj_index[ai] = itp->itp_out_sa;
- adj_nbr_midchain_update_next_node
- (ai, ipsec_tun_protect_get_adj_next (adj_get_link_type (ai), itp));
+ adj_nbr_midchain_update_next_node (
+ ai, ipsec_tun_protect_get_adj_next (adj_get_link_type (ai), itp));
}
}
@@ -237,7 +236,6 @@ ipsec_tun_protect_rx_db_add (ipsec_main_t * im,
if (ip46_address_is_zero (&itp->itp_crypto.dst))
return;
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SAI(itp, sai,
({
sa = ipsec_sa_get (sai);
@@ -292,7 +290,6 @@ ipsec_tun_protect_rx_db_add (ipsec_main_t * im,
ipsec_tun_register_nodes (AF_IP6);
}
}))
- /* *INDENT-ON* */
}
static adj_walk_rc_t
@@ -329,7 +326,7 @@ ipsec_tun_protect_tx_db_add (ipsec_tun_protect_t * itp)
{
if (INDEX_INVALID == idi->id_itp)
{
- // ipsec_tun_protect_feature_set (itp, 1);
+ ipsec_tun_setup_tx_nodes (itp->itp_sw_if_index, itp);
}
idi->id_itp = itp - ipsec_tun_protect_pool;
@@ -347,7 +344,7 @@ ipsec_tun_protect_tx_db_add (ipsec_tun_protect_t * itp)
* enable the encrypt feature for egress if this is the first addition
* on this interface
*/
- // ipsec_tun_protect_feature_set (itp, 1);
+ ipsec_tun_setup_tx_nodes (itp->itp_sw_if_index, itp);
}
hash_set_mem (idi->id_hash, itp->itp_key, itp - ipsec_tun_protect_pool);
@@ -372,7 +369,6 @@ ipsec_tun_protect_rx_db_remove (ipsec_main_t * im,
{
const ipsec_sa_t *sa;
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa,
({
if (ip46_address_is_ip4 (&itp->itp_crypto.dst))
@@ -406,7 +402,6 @@ ipsec_tun_protect_rx_db_remove (ipsec_main_t * im,
}
}
}));
- /* *INDENT-ON* */
}
static adj_walk_rc_t
@@ -435,7 +430,7 @@ ipsec_tun_protect_tx_db_remove (ipsec_tun_protect_t * itp)
if (vnet_sw_interface_is_p2p (vnet_get_main (), itp->itp_sw_if_index))
{
- // ipsec_tun_protect_feature_set (itp, 0);
+ ipsec_itf_reset_tx_nodes (itp->itp_sw_if_index);
idi->id_itp = INDEX_INVALID;
FOR_EACH_FIB_IP_PROTOCOL (nh_proto)
@@ -451,7 +446,7 @@ ipsec_tun_protect_tx_db_remove (ipsec_tun_protect_t * itp)
if (0 == hash_elts (idi->id_hash))
{
- // ipsec_tun_protect_feature_set (itp, 0);
+ ipsec_itf_reset_tx_nodes (itp->itp_sw_if_index);
hash_free (idi->id_hash);
idi->id_hash = NULL;
}
@@ -465,7 +460,6 @@ ipsec_tun_protect_set_crypto_addr (ipsec_tun_protect_t * itp)
{
ipsec_sa_t *sa;
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa,
({
if (ipsec_sa_is_set_IS_TUNNEL (sa))
@@ -485,7 +479,6 @@ ipsec_tun_protect_set_crypto_addr (ipsec_tun_protect_t * itp)
itp->itp_flags &= ~IPSEC_PROTECT_ENCAPED;
}
}));
- /* *INDENT-ON* */
}
static void
@@ -502,13 +495,14 @@ ipsec_tun_protect_config (ipsec_main_t * im,
ipsec_sa_lock (itp->itp_out_sa);
- /* *INDENT-OFF* */
+ if (itp->itp_flags & IPSEC_PROTECT_ITF)
+ ipsec_sa_set_NO_ALGO_NO_DROP (ipsec_sa_get (itp->itp_out_sa));
+
FOR_EACH_IPSEC_PROTECT_INPUT_SAI(itp, sai,
({
ipsec_sa_lock(sai);
}));
ipsec_tun_protect_set_crypto_addr(itp);
- /* *INDENT-ON* */
/*
* add to the DB against each SA
@@ -525,7 +519,6 @@ ipsec_tun_protect_unconfig (ipsec_main_t * im, ipsec_tun_protect_t * itp)
ipsec_sa_t *sa;
index_t sai;
- /* *INDENT-OFF* */
FOR_EACH_IPSEC_PROTECT_INPUT_SA(itp, sa,
({
ipsec_sa_unset_IS_PROTECT (sa);
@@ -534,13 +527,13 @@ ipsec_tun_protect_unconfig (ipsec_main_t * im, ipsec_tun_protect_t * itp)
ipsec_tun_protect_rx_db_remove (im, itp);
ipsec_tun_protect_tx_db_remove (itp);
+ ipsec_sa_unset_NO_ALGO_NO_DROP (ipsec_sa_get (itp->itp_out_sa));
ipsec_sa_unlock(itp->itp_out_sa);
FOR_EACH_IPSEC_PROTECT_INPUT_SAI(itp, sai,
({
ipsec_sa_unlock(sai);
}));
- /* *INDENT-ON* */
ITP_DBG (itp, "unconfigured");
}
@@ -569,6 +562,9 @@ ipsec_tun_protect_update (u32 sw_if_index,
ipsec_main_t *im;
int rv;
+ if (NULL == nh)
+ nh = &IP_ADDR_ALL_0;
+
ITP_DBG2 ("update: %U/%U",
format_vnet_sw_if_index_name, vnet_get_main (), sw_if_index,
format_ip_address, nh);
@@ -581,8 +577,6 @@ ipsec_tun_protect_update (u32 sw_if_index,
rv = 0;
im = &ipsec_main;
- if (NULL == nh)
- nh = &IP_ADDR_ALL_0;
itpi = ipsec_tun_protect_find (sw_if_index, nh);
vec_foreach_index (ii, sas_in)
@@ -747,12 +741,10 @@ ipsec_tun_protect_walk (ipsec_tun_protect_walk_cb_t fn, void *ctx)
{
index_t itpi;
- /* *INDENT-OFF* */
pool_foreach_index (itpi, ipsec_tun_protect_pool)
{
fn (itpi, ctx);
}
- /* *INDENT-ON* */
}
void
@@ -768,12 +760,10 @@ ipsec_tun_protect_walk_itf (u32 sw_if_index,
idi = &itp_db.id_itf[sw_if_index];
- /* *INDENT-OFF* */
hash_foreach(key, itpi, idi->id_hash,
({
fn (itpi, ctx);
}));
- /* *INDENT-ON* */
if (INDEX_INVALID != idi->id_itp)
fn (idi->id_itp, ctx);
}
@@ -801,19 +791,27 @@ ipsec_tun_feature_update (u32 sw_if_index, u8 arc_index, u8 is_enable,
ipsec_main.esp4_decrypt_tun_node_index :
ipsec_main.esp6_decrypt_tun_node_index;
- vnet_feature_modify_end_node (
- feature_main.device_input_feature_arc_index, sw_if_index, decrypt_tun);
- itp->itp_flags |= IPSEC_PROTECT_FEAT;
+ if (!(itp->itp_flags & IPSEC_PROTECT_FEAT))
+ {
+ itp->itp_flags |= IPSEC_PROTECT_FEAT;
+ vnet_feature_modify_end_node (
+ feature_main.device_input_feature_arc_index, sw_if_index,
+ decrypt_tun);
+ }
}
else
{
- u32 eth_in =
- vlib_get_node_by_name (vlib_get_main (), (u8 *) "ethernet-input")
- ->index;
+ if (itp->itp_flags & IPSEC_PROTECT_FEAT)
+ {
+ itp->itp_flags &= ~IPSEC_PROTECT_FEAT;
+
+ u32 eth_in =
+ vlib_get_node_by_name (vlib_get_main (), (u8 *) "ethernet-input")
+ ->index;
- vnet_feature_modify_end_node (
- feature_main.device_input_feature_arc_index, sw_if_index, eth_in);
- itp->itp_flags &= ~IPSEC_PROTECT_FEAT;
+ vnet_feature_modify_end_node (
+ feature_main.device_input_feature_arc_index, sw_if_index, eth_in);
+ }
}
/* Propagate flag change into lookup entries */
@@ -847,6 +845,9 @@ ipsec_tun_protect_adj_delegate_adj_created (adj_index_t ai)
if (!adj_is_midchain (ai))
return;
+ vec_validate_init_empty (ipsec_tun_protect_sa_by_adj_index, ai,
+ INDEX_INVALID);
+
adj = adj_get (ai);
ip_address_from_46 (&adj->sub_type.midchain.next_hop,
@@ -925,7 +926,7 @@ const static teib_vft_t ipsec_tun_teib_vft = {
.nv_deleted = ipsec_tun_teib_entry_deleted,
};
-static void
+void
ipsec_tun_table_init (ip_address_family_t af, uword table_size, u32 n_buckets)
{
ipsec_main_t *im;
@@ -955,16 +956,6 @@ ipsec_tunnel_protect_init (vlib_main_t *vm)
IPSEC_TUN_DEFAULT_HASH_NUM_BUCKETS,
IPSEC_TUN_DEFAULT_HASH_MEMORY_SIZE);
- /* set up feature nodes to drop outbound packets with no crypto alg set */
- im->esp4_no_crypto_tun_node_index =
- vlib_get_node_by_name (vm, (u8 *) "esp4-no-crypto")->index;
- im->esp6_no_crypto_tun_node_index =
- vlib_get_node_by_name (vm, (u8 *) "esp6-no-crypto")->index;
- im->esp6_encrypt_l2_tun_node_index =
- vlib_get_node_by_name (vm, (u8 *) "esp6-encrypt-tun")->index;
- im->esp4_encrypt_l2_tun_node_index =
- vlib_get_node_by_name (vm, (u8 *) "esp4-encrypt-tun")->index;
-
ipsec_tun_adj_delegate_type =
adj_delegate_register_new_type (&ipsec_tun_adj_delegate_vft);
@@ -979,56 +970,6 @@ ipsec_tunnel_protect_init (vlib_main_t *vm)
VLIB_INIT_FUNCTION (ipsec_tunnel_protect_init);
-static clib_error_t *
-ipsec_config (vlib_main_t * vm, unformat_input_t * input)
-{
- unformat_input_t sub_input;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "ip4 %U", unformat_vlib_cli_sub_input, &sub_input))
- {
- uword table_size = ~0;
- u32 n_buckets = ~0;
-
- while (unformat_check_input (&sub_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (&sub_input, "num-buckets %u", &n_buckets))
- ;
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, &sub_input);
- }
-
- ipsec_tun_table_init (AF_IP4, table_size, n_buckets);
- }
- else if (unformat (input, "ip6 %U", unformat_vlib_cli_sub_input,
- &sub_input))
- {
- uword table_size = ~0;
- u32 n_buckets = ~0;
-
- while (unformat_check_input (&sub_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (&sub_input, "num-buckets %u", &n_buckets))
- ;
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, &sub_input);
- }
-
- ipsec_tun_table_init (AF_IP6, table_size, n_buckets);
- }
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
-
- return 0;
-}
-
-VLIB_CONFIG_FUNCTION (ipsec_config, "ipsec");
-
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/ipsec/ipsec_tun.h b/src/vnet/ipsec/ipsec_tun.h
index c79fb902dec..9d8a124443d 100644
--- a/src/vnet/ipsec/ipsec_tun.h
+++ b/src/vnet/ipsec/ipsec_tun.h
@@ -163,6 +163,9 @@ extern u8 *format_ipsec_tun_protect_index (u8 * s, va_list * args);
extern void ipsec_tun_register_nodes (ip_address_family_t af);
extern void ipsec_tun_unregister_nodes (ip_address_family_t af);
+extern void ipsec_tun_table_init (ip_address_family_t af, uword table_size,
+ u32 n_buckets);
+
/*
* DP API
*/
@@ -179,7 +182,6 @@ always_inline index_t
ipsec_tun_protect_get_sa_out (adj_index_t ai)
{
ASSERT (vec_len (ipsec_tun_protect_sa_by_adj_index) > ai);
- ASSERT (INDEX_INVALID != ipsec_tun_protect_sa_by_adj_index[ai]);
return (ipsec_tun_protect_sa_by_adj_index[ai]);
}
diff --git a/src/vnet/ipsec/ipsec_tun_in.c b/src/vnet/ipsec/ipsec_tun_in.c
index 4f8af006d2b..c82de3ebaff 100644
--- a/src/vnet/ipsec/ipsec_tun_in.c
+++ b/src/vnet/ipsec/ipsec_tun_in.c
@@ -24,31 +24,10 @@
#include <vnet/ipsec/ipsec_io.h>
#include <vnet/ipsec/ipsec_punt.h>
#include <vnet/ipsec/ipsec_tun.h>
+#include <vnet/ipsec/ipsec.api_enum.h>
#include <vnet/ip/ip4_input.h>
-/* Statistics (not really errors) */
-#define foreach_ipsec_tun_protect_input_error \
- _(RX, "good packets received") \
- _(DISABLED, "ipsec packets received on disabled interface") \
- _(NO_TUNNEL, "no matching tunnel") \
- _(TUNNEL_MISMATCH, "SPI-tunnel mismatch") \
- _(NAT_KEEPALIVE, "NAT Keepalive") \
- _(TOO_SHORT, "Too Short") \
- _(SPI_0, "SPI 0")
-
-static char *ipsec_tun_protect_input_error_strings[] = {
-#define _(sym,string) string,
- foreach_ipsec_tun_protect_input_error
-#undef _
-};
-
-typedef enum
-{
-#define _(sym,str) IPSEC_TUN_PROTECT_INPUT_ERROR_##sym,
- foreach_ipsec_tun_protect_input_error
-#undef _
- IPSEC_TUN_PROTECT_INPUT_N_ERROR,
-} ipsec_tun_protect_input_error_t;
+typedef vl_counter_ipsec_tun_enum_t ipsec_tun_protect_input_error_t;
typedef enum ipsec_tun_next_t_
{
@@ -93,25 +72,35 @@ ipsec_ip4_if_no_tunnel (vlib_node_runtime_t * node,
{
if (PREDICT_FALSE (0 == esp->spi))
{
- b->error = node->errors[IPSEC_TUN_PROTECT_INPUT_ERROR_SPI_0];
+ b->error = node->errors[IPSEC_TUN_ERROR_SPI_0];
b->punt_reason = ipsec_punt_reason[(ip4->protocol == IP_PROTOCOL_UDP ?
IPSEC_PUNT_IP4_SPI_UDP_0 :
IPSEC_PUNT_IP4_NO_SUCH_TUNNEL)];
}
else
{
- b->error = node->errors[IPSEC_TUN_PROTECT_INPUT_ERROR_NO_TUNNEL];
+ b->error = node->errors[IPSEC_TUN_ERROR_NO_TUNNEL];
b->punt_reason = ipsec_punt_reason[IPSEC_PUNT_IP4_NO_SUCH_TUNNEL];
}
return VNET_DEVICE_INPUT_NEXT_PUNT;
}
always_inline u16
-ipsec_ip6_if_no_tunnel (vlib_node_runtime_t * node,
- vlib_buffer_t * b, const esp_header_t * esp)
+ipsec_ip6_if_no_tunnel (vlib_node_runtime_t *node, vlib_buffer_t *b,
+ const esp_header_t *esp, const ip6_header_t *ip6)
{
- b->error = node->errors[IPSEC_TUN_PROTECT_INPUT_ERROR_NO_TUNNEL];
- b->punt_reason = ipsec_punt_reason[IPSEC_PUNT_IP6_NO_SUCH_TUNNEL];
+ if (PREDICT_FALSE (0 == esp->spi))
+ {
+ b->error = node->errors[IPSEC_TUN_ERROR_SPI_0];
+ b->punt_reason = ipsec_punt_reason[(ip6->protocol == IP_PROTOCOL_UDP ?
+ IPSEC_PUNT_IP6_SPI_UDP_0 :
+ IPSEC_PUNT_IP6_NO_SUCH_TUNNEL)];
+ }
+ else
+ {
+ b->error = node->errors[IPSEC_TUN_ERROR_NO_TUNNEL];
+ b->punt_reason = ipsec_punt_reason[IPSEC_PUNT_IP6_NO_SUCH_TUNNEL];
+ }
return VNET_DEVICE_INPUT_NEXT_PUNT;
}
@@ -167,8 +156,8 @@ ipsec_tun_protect_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
while (n_left_from > 0)
{
u32 sw_if_index0, len0, hdr_sz0;
- clib_bihash_kv_24_16_t bkey60;
- clib_bihash_kv_8_16_t bkey40;
+ clib_bihash_kv_24_16_t bkey60 = { 0 };
+ clib_bihash_kv_8_16_t bkey40 = { 0 };
ipsec4_tunnel_kv_t *key40;
ipsec6_tunnel_kv_t *key60;
ip4_header_t *ip40;
@@ -185,19 +174,62 @@ ipsec_tun_protect_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (is_ip6)
{
ip60 = (ip6_header_t *) ip40;
- esp0 = (esp_header_t *) (ip60 + 1);
- hdr_sz0 = sizeof (ip6_header_t);
+ if (ip60->protocol == IP_PROTOCOL_UDP)
+ {
+ /* NAT UDP port 4500 case, don't advance any more */
+ esp0 = (esp_header_t *) ((u8 *) ip60 + sizeof (ip6_header_t) +
+ sizeof (udp_header_t));
+ hdr_sz0 = 0;
+ buf_rewind0 = sizeof (ip6_header_t) + sizeof (udp_header_t);
+
+ const udp_header_t *udp0 =
+ (udp_header_t *) ((u8 *) ip60 + sizeof (ip6_header_t));
+
+ /* length 9 = sizeof(udp_header) + 1 byte of special SPI */
+ if (clib_net_to_host_u16 (udp0->length) == 9 &&
+ esp0->spi_bytes[0] == 0xff)
+ {
+ b[0]->error = node->errors[IPSEC_TUN_ERROR_NAT_KEEPALIVE];
+
+ next[0] = VNET_DEVICE_INPUT_NEXT_IP6_DROP;
+ len0 = 0;
+
+ vlib_buffer_advance (b[0], -buf_rewind0);
+ goto trace00;
+ }
+ }
+ else
+ {
+ esp0 = (esp_header_t *) (ip60 + 1);
+ buf_rewind0 = hdr_sz0 = sizeof (ip6_header_t);
+ }
}
else
{
- /* NAT UDP port 4500 case, don't advance any more */
if (ip40->protocol == IP_PROTOCOL_UDP)
{
+ /* NAT UDP port 4500 case, don't advance any more */
esp0 =
(esp_header_t *) ((u8 *) ip40 + ip4_header_bytes (ip40) +
sizeof (udp_header_t));
hdr_sz0 = 0;
buf_rewind0 = ip4_header_bytes (ip40) + sizeof (udp_header_t);
+
+ const udp_header_t *udp0 =
+ (udp_header_t *) ((u8 *) ip40 + ip4_header_bytes (ip40));
+
+ /* length 9 = sizeof(udp_header) + 1 byte of special SPI */
+ if (clib_net_to_host_u16 (udp0->length) == 9 &&
+ esp0->spi_bytes[0] == 0xff)
+ {
+ b[0]->error = node->errors[IPSEC_TUN_ERROR_NAT_KEEPALIVE];
+
+ next[0] = VNET_DEVICE_INPUT_NEXT_IP4_DROP;
+ len0 = 0;
+
+ vlib_buffer_advance (b[0], -buf_rewind0);
+ goto trace00;
+ }
}
else
{
@@ -213,15 +245,11 @@ ipsec_tun_protect_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (len0 < sizeof (esp_header_t))
{
- if (esp0->spi_bytes[0] == 0xff)
- b[0]->error =
- node->errors[IPSEC_TUN_PROTECT_INPUT_ERROR_NAT_KEEPALIVE];
- else
- b[0]->error =
- node->errors[IPSEC_TUN_PROTECT_INPUT_ERROR_TOO_SHORT];
+ b[0]->error = node->errors[IPSEC_TUN_ERROR_TOO_SHORT];
next[0] = is_ip6 ? VNET_DEVICE_INPUT_NEXT_IP6_DROP :
VNET_DEVICE_INPUT_NEXT_IP4_DROP;
+ vlib_buffer_advance (b[0], -buf_rewind0);
goto trace00;
}
@@ -249,7 +277,8 @@ ipsec_tun_protect_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
else
{
- next[0] = ipsec_ip6_if_no_tunnel (node, b[0], esp0);
+ next[0] = ipsec_ip6_if_no_tunnel (node, b[0], esp0, ip60);
+ vlib_buffer_advance (b[0], -buf_rewind0);
n_no_tunnel++;
goto trace00;
}
@@ -296,7 +325,7 @@ ipsec_tun_protect_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_increment_combined_counter
(drop_counter, thread_index, sw_if_index0, 1, len0);
n_disabled++;
- b[0]->error = node->errors[IPSEC_TUN_PROTECT_INPUT_ERROR_DISABLED];
+ b[0]->error = node->errors[IPSEC_TUN_ERROR_DISABLED];
next[0] = is_ip6 ? VNET_DEVICE_INPUT_NEXT_IP6_DROP :
VNET_DEVICE_INPUT_NEXT_IP4_DROP;
goto trace00;
@@ -364,12 +393,10 @@ ipsec_tun_protect_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
thread_index,
last_sw_if_index, n_packets, n_bytes);
- vlib_node_increment_counter (vm, node->node_index,
- IPSEC_TUN_PROTECT_INPUT_ERROR_RX,
- from_frame->n_vectors - (n_disabled +
- n_no_tunnel));
- vlib_node_increment_counter (vm, node->node_index,
- IPSEC_TUN_PROTECT_INPUT_ERROR_NO_TUNNEL,
+ vlib_node_increment_counter (vm, node->node_index, IPSEC_TUN_ERROR_RX,
+ from_frame->n_vectors -
+ (n_disabled + n_no_tunnel));
+ vlib_node_increment_counter (vm, node->node_index, IPSEC_TUN_ERROR_NO_TUNNEL,
n_no_tunnel);
vlib_buffer_enqueue_to_next (vm, node, from, nexts, from_frame->n_vectors);
@@ -384,17 +411,15 @@ VLIB_NODE_FN (ipsec4_tun_input_node) (vlib_main_t * vm,
return ipsec_tun_protect_input_inline (vm, node, from_frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ipsec4_tun_input_node) = {
.name = "ipsec4-tun-input",
.vector_size = sizeof (u32),
.format_trace = format_ipsec_tun_protect_input_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN (ipsec_tun_protect_input_error_strings),
- .error_strings = ipsec_tun_protect_input_error_strings,
+ .n_errors = IPSEC_TUN_N_ERROR,
+ .error_counters = ipsec_tun_error_counters,
.sibling_of = "device-input",
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ipsec6_tun_input_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -403,17 +428,15 @@ VLIB_NODE_FN (ipsec6_tun_input_node) (vlib_main_t * vm,
return ipsec_tun_protect_input_inline (vm, node, from_frame, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ipsec6_tun_input_node) = {
.name = "ipsec6-tun-input",
.vector_size = sizeof (u32),
.format_trace = format_ipsec_tun_protect_input_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN (ipsec_tun_protect_input_error_strings),
- .error_strings = ipsec_tun_protect_input_error_strings,
+ .n_errors = IPSEC_TUN_N_ERROR,
+ .error_counters = ipsec_tun_error_counters,
.sibling_of = "device-input",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/ipsec/ipsec_types.api b/src/vnet/ipsec/ipsec_types.api
index ed04f470fd2..37c1141ab46 100644
--- a/src/vnet/ipsec/ipsec_types.api
+++ b/src/vnet/ipsec/ipsec_types.api
@@ -36,6 +36,10 @@ enum ipsec_crypto_alg
IPSEC_API_CRYPTO_ALG_AES_GCM_256,
IPSEC_API_CRYPTO_ALG_DES_CBC,
IPSEC_API_CRYPTO_ALG_3DES_CBC,
+ IPSEC_API_CRYPTO_ALG_CHACHA20_POLY1305 [backwards_compatible],
+ IPSEC_API_CRYPTO_ALG_AES_NULL_GMAC_128 [backwards_compatible],
+ IPSEC_API_CRYPTO_ALG_AES_NULL_GMAC_192 [backwards_compatible],
+ IPSEC_API_CRYPTO_ALG_AES_NULL_GMAC_256 [backwards_compatible],
};
/*
@@ -95,10 +99,103 @@ typedef key
u8 data[128];
};
+enum ipsec_spd_action
+{
+ /* bypass - no IPsec processing */
+ IPSEC_API_SPD_ACTION_BYPASS = 0,
+ /* discard - discard packet with ICMP processing */
+ IPSEC_API_SPD_ACTION_DISCARD,
+ /* resolve - send request to control plane for SA resolving */
+ IPSEC_API_SPD_ACTION_RESOLVE,
+ /* protect - apply IPsec policy using following parameters */
+ IPSEC_API_SPD_ACTION_PROTECT,
+};
+
+/** \brief IPsec: Security Policy Database entry
+
+ See RFC 4301, 4.4.1.1 on how to match packet to selectors
+
+ @param spd_id - SPD instance id (control plane allocated)
+ @param priority - priority of SPD entry (non-unique value). Used to order SPD matching - higher priorities match before lower
+ @param is_outbound - entry applies to outbound traffic if non-zero, otherwise applies to inbound traffic
+ @param remote_address_start - start of remote address range to match
+ @param remote_address_stop - end of remote address range to match
+ @param local_address_start - start of local address range to match
+ @param local_address_stop - end of local address range to match
+ @param protocol - protocol type to match [0 means any] otherwise IANA value
+ @param remote_port_start - start of remote port range to match ...
+ @param remote_port_stop - end of remote port range to match [0 to 65535 means ANY, 65535 to 0 means OPAQUE]
+ @param local_port_start - start of local port range to match ...
+ @param local_port_stop - end of remote port range to match [0 to 65535 means ANY, 65535 to 0 means OPAQUE]
+ @param policy - action to perform on match
+ @param sa_id - SAD instance id (control plane allocated)
+*/
+typedef ipsec_spd_entry
+{
+ u32 spd_id;
+ i32 priority;
+ bool is_outbound;
+
+ u32 sa_id;
+ vl_api_ipsec_spd_action_t policy;
+ /* Which protocol?? */
+ u8 protocol;
+
+ // Selector
+ vl_api_address_t remote_address_start;
+ vl_api_address_t remote_address_stop;
+ vl_api_address_t local_address_start;
+ vl_api_address_t local_address_stop;
+
+ u16 remote_port_start;
+ u16 remote_port_stop;
+ u16 local_port_start;
+ u16 local_port_stop;
+};
+
+/** \brief IPsec: Security Policy Database entry v2
+
+ See RFC 4301, 4.4.1.1 on how to match packet to selectors
+
+ @param spd_id - SPD instance id (control plane allocated)
+ @param priority - priority of SPD entry (non-unique value). Used to order SPD matching - higher priorities match before lower
+ @param is_outbound - entry applies to outbound traffic if non-zero, otherwise applies to inbound traffic
+ @param remote_address_start - start of remote address range to match
+ @param remote_address_stop - end of remote address range to match
+ @param local_address_start - start of local address range to match
+ @param local_address_stop - end of local address range to match
+ @param protocol - protocol type to match [255 means any] otherwise IANA value
+ @param remote_port_start - start of remote port range to match ...
+ @param remote_port_stop - end of remote port range to match [0 to 65535 means ANY, 65535 to 0 means OPAQUE]
+ @param local_port_start - start of local port range to match ...
+ @param local_port_stop - end of remote port range to match [0 to 65535 means ANY, 65535 to 0 means OPAQUE]
+ @param policy - action to perform on match
+ @param sa_id - SAD instance id (control plane allocated)
+*/
+typedef ipsec_spd_entry_v2
+{
+ u32 spd_id;
+ i32 priority;
+ bool is_outbound;
+
+ u32 sa_id;
+ vl_api_ipsec_spd_action_t policy;
+ u8 protocol;
+
+ // Selector
+ vl_api_address_t remote_address_start;
+ vl_api_address_t remote_address_stop;
+ vl_api_address_t local_address_start;
+ vl_api_address_t local_address_stop;
+
+ u16 remote_port_start;
+ u16 remote_port_stop;
+ u16 local_port_start;
+ u16 local_port_stop;
+};
+
+
/** \brief IPsec: Security Association Database entry
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param is_add - add SAD entry if non-zero, else delete
@param sad_id - sad id
@param spi - security parameter index
@param protocol - 0 = AH, 1 = ESP
@@ -106,6 +203,7 @@ typedef key
@param crypto_key - crypto keying material
@param integrity_algorithm - one of the supported algorithms
@param integrity_key - integrity keying material
+ @param flags - SA flags (see ipsec_sad_flags above)
@param tunnel_src_address - IPsec tunnel source address IPv6 if is_tunnel_ipv6 is non-zero, else IPv4. Only valid if is_tunnel is non-zero
@param tunnel_dst_address - IPsec tunnel destination address IPv6 if is_tunnel_ipv6 is non-zero, else IPv4. Only valid if is_tunnel is non-zero
@param tx_table_id - the FIB id used for encapsulated packets
@@ -117,6 +215,7 @@ typedef key
@param tunnel_flags - Flags controlling the copying of encap/decap value
@param dscp - Fixed DSCP vaule for tunnel encap
*/
+
typedef ipsec_sad_entry
{
u32 sad_id;
@@ -189,6 +288,46 @@ typedef ipsec_sad_entry_v3
u16 udp_dst_port [default=4500];
};
+/** \brief IPsec: Security Association Database entry
+ @param sad_id - sad id
+ @param spi - security parameter index
+ @param protocol - 0 = AH, 1 = ESP
+ @param crypto_algorithm - a supported crypto algorithm
+ @param crypto_key - crypto keying material
+ @param integrity_algorithm - one of the supported algorithms
+ @param integrity_key - integrity keying material
+ @param flags - SA flags (see ipsec_sad_flags above)
+ @param tunnel - tunnel description (see vnet/tunnel/tunnel_types.api)
+ @param salt - for use with counter mode ciphers
+ @param udp_src_port - If using UDP Encapsulation, use this source port for
+ TX. It is ignored for RX.
+ @param udp_dst_port - If using UDP Encapsulation, use this destination port
+ for TX. Expect traffic on this port for RX.
+ @param anti_replay_window_size - AR window size to use. The supplied value is round up to the nearest power of 2.
+ */
+typedef ipsec_sad_entry_v4
+{
+ u32 sad_id;
+ u32 spi;
+
+ vl_api_ipsec_proto_t protocol;
+
+ vl_api_ipsec_crypto_alg_t crypto_algorithm;
+ vl_api_key_t crypto_key;
+
+ vl_api_ipsec_integ_alg_t integrity_algorithm;
+ vl_api_key_t integrity_key;
+
+ vl_api_ipsec_sad_flags_t flags;
+
+ vl_api_tunnel_t tunnel;
+
+ u32 salt;
+ u16 udp_src_port [default=4500];
+ u16 udp_dst_port [default=4500];
+
+ u32 anti_replay_window_size [default=64];
+};
/*
* Local Variables:
diff --git a/src/vnet/l2/feat_bitmap.c b/src/vnet/l2/feat_bitmap.c
index 349ec67462b..507fe365f07 100644
--- a/src/vnet/l2/feat_bitmap.c
+++ b/src/vnet/l2/feat_bitmap.c
@@ -155,7 +155,6 @@ feat_bitmap_drop_init (vlib_main_t * vm)
VLIB_INIT_FUNCTION (feat_bitmap_drop_init);
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (feat_bitmap_drop_node,static) = {
.function = feat_bitmap_drop_node_fn,
.name = "feature-bitmap-drop",
@@ -173,7 +172,6 @@ VLIB_REGISTER_NODE (feat_bitmap_drop_node,static) = {
[FEAT_BITMAP_DROP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2.api b/src/vnet/l2/l2.api
index b0ac23f705a..ccba9aa3df1 100644
--- a/src/vnet/l2/l2.api
+++ b/src/vnet/l2/l2.api
@@ -1,6 +1,7 @@
/* Hey Emacs use -*- mode: C -*- */
/*
* Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2022 Nordix Foundation.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -14,7 +15,7 @@
* limitations under the License.
*/
-option version = "3.1.0";
+option version = "3.2.0";
import "vnet/ip/ip_types.api";
import "vnet/ethernet/ethernet_types.api";
@@ -304,7 +305,7 @@ autoreply define bridge_domain_set_learn_limit
u32 learn_limit;
};
-/** \brief L2 bridge domain add or delete request
+/** \brief L2 bridge domain add or delete request - will be deprecated
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@param bd_id - the bridge domain to create
@@ -319,6 +320,7 @@ autoreply define bridge_domain_set_learn_limit
*/
autoreply define bridge_domain_add_del
{
+ option deprecated;
u32 client_index;
u32 context;
u32 bd_id;
@@ -333,6 +335,49 @@ autoreply define bridge_domain_add_del
bool is_add [default=true];
};
+/** \brief L2 bridge domain add delete request version 2
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bd_id - if the id == ~0 creates a bridge domain with an unused id
+ if the id != ~0 the id of the bridge domain to create/delete
+ @param flood - enable/disable bcast/mcast flooding in the bd
+ @param uu_flood - enable/disable unknown unicast flood in the bd
+ @param forward - enable/disable forwarding on all interfaces in the bd
+ @param learn - enable/disable learning on all interfaces in the bd
+ @param arp_term - enable/disable arp termination in the bd
+ @param arp_ufwd - enable/disable arp unicast forwarding in the bd
+ @param mac_age - mac aging time in min, 0 for disabled
+ @param is_add - add or delete flag
+*/
+define bridge_domain_add_del_v2
+{
+ u32 client_index;
+ u32 context;
+ u32 bd_id;
+ bool flood;
+ bool uu_flood;
+ bool forward;
+ bool learn;
+ bool arp_term;
+ bool arp_ufwd;
+ u8 mac_age;
+ string bd_tag[64];
+ bool is_add [default=true];
+};
+
+/** \brief L2 bridge domain add delete version 2 response
+ @param context - sender context, to match reply w/ request
+ @param retval - return code for the set bridge flags request
+ @param resulting_id - the id for the new bridge domain
+*/
+define bridge_domain_add_del_v2_reply
+{
+ u32 context;
+ i32 retval;
+ u32 bd_id;
+};
+
+
/** \brief L2 bridge domain request operational state details
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/vnet/l2/l2_api.c b/src/vnet/l2/l2_api.c
index 5a0432de43d..035542d298d 100644
--- a/src/vnet/l2/l2_api.c
+++ b/src/vnet/l2/l2_api.c
@@ -3,6 +3,7 @@
* l2_api.c - layer 2 forwarding api
*
* Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2022 Nordix Foundation.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -67,7 +68,6 @@ vl_api_l2_xconnect_dump_t_handler (vl_api_l2_xconnect_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
vec_foreach_index (sw_if_index, l2im->configs)
{
config = vec_elt_at_index (l2im->configs, sw_if_index);
@@ -75,7 +75,6 @@ vl_api_l2_xconnect_dump_t_handler (vl_api_l2_xconnect_dump_t * mp)
send_l2_xconnect_details (reg, mp->context, sw_if_index,
config->output_sw_if_index);
}
- /* *INDENT-ON* */
}
static void
@@ -413,12 +412,10 @@ vl_api_l2_flags_t_handler (vl_api_l2_flags_t * mp)
BAD_SW_IF_INDEX_LABEL;
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_L2_FLAGS_REPLY,
({
rmp->resulting_feature_bitmap = ntohl(rbm);
}));
- /* *INDENT-ON* */
}
static void
@@ -511,6 +508,37 @@ vl_api_bridge_domain_add_del_t_handler (vl_api_bridge_domain_add_del_t * mp)
}
static void
+vl_api_bridge_domain_add_del_v2_t_handler (
+ vl_api_bridge_domain_add_del_v2_t *mp)
+{
+ vl_api_bridge_domain_add_del_v2_reply_t *rmp;
+ u32 bd_id = ntohl (mp->bd_id);
+ int rv = 0;
+
+ if ((~0 == bd_id) && (mp->is_add))
+ bd_id = bd_get_unused_id ();
+
+ if ((~0 == bd_id) && (mp->is_add))
+ rv = VNET_API_ERROR_EAGAIN;
+ else
+ {
+ l2_bridge_domain_add_del_args_t a = { .is_add = mp->is_add,
+ .flood = mp->flood,
+ .uu_flood = mp->uu_flood,
+ .forward = mp->forward,
+ .learn = mp->learn,
+ .arp_term = mp->arp_term,
+ .arp_ufwd = mp->arp_ufwd,
+ .mac_age = mp->mac_age,
+ .bd_id = bd_id,
+ .bd_tag = mp->bd_tag };
+ rv = bd_add_del (&a);
+ }
+ REPLY_MACRO2 (VL_API_BRIDGE_DOMAIN_ADD_DEL_V2_REPLY,
+ ({ rmp->bd_id = htonl (bd_id); }));
+}
+
+static void
send_bridge_domain_details (l2input_main_t * l2im,
vl_api_registration_t * reg,
l2_bridge_domain_t * bd_config,
@@ -651,12 +679,10 @@ vl_api_bridge_flags_t_handler (vl_api_bridge_flags_t * mp)
bitmap = bd_set_flags (vm, bd_index, flags, mp->is_set);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_BRIDGE_FLAGS_REPLY,
({
rmp->resulting_feature_bitmap = ntohl(bitmap);
}));
- /* *INDENT-ON* */
}
static void
@@ -918,7 +944,6 @@ vl_api_bd_ip_mac_dump_t_handler (vl_api_bd_ip_mac_dump_t * mp)
u64 mac64;
bd_id = bd_config->bd_id;
- /* *INDENT-OFF* */
hash_foreach (ip4_addr.as_u32, mac64, bd_config->mac_by_ip4,
({
ip46_address_t ip = {
@@ -940,7 +965,6 @@ vl_api_bd_ip_mac_dump_t_handler (vl_api_bd_ip_mac_dump_t * mp)
send_bd_ip_mac_entry (am, reg, bd_id, &ip, IP46_TYPE_IP6,
&mac, mp->context);
}));
- /* *INDENT-ON* */
}
}
}
@@ -1094,12 +1118,10 @@ vl_api_bvi_create_t_handler (vl_api_bvi_create_t * mp)
rv = l2_bvi_create (ntohl (mp->user_instance), &mac, &sw_if_index);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_BVI_CREATE_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1193,13 +1215,11 @@ l2_arp_term_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_arp_term_process_node) = {
.function = l2_arp_term_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "l2-arp-term-publisher",
};
-/* *INDENT-ON* */
static void
vl_api_want_l2_arp_term_events_t_handler (vl_api_want_l2_arp_term_events_t *
@@ -1280,14 +1300,15 @@ l2_api_hookup (vlib_main_t * vm)
{
api_main_t *am = vlibapi_get_main ();
- /* Mark VL_API_BRIDGE_DOMAIN_DUMP as mp safe */
- am->is_mp_safe[VL_API_BRIDGE_DOMAIN_DUMP] = 1;
-
/*
* Set up the (msg_name, crc, message-id) table
*/
REPLY_MSG_ID_BASE = setup_message_id_table ();
+ /* Mark VL_API_BRIDGE_DOMAIN_DUMP as mp safe */
+ vl_api_set_msg_thread_safe (
+ am, REPLY_MSG_ID_BASE + VL_API_BRIDGE_DOMAIN_DUMP, 1);
+
return 0;
}
diff --git a/src/vnet/l2/l2_arp_term.c b/src/vnet/l2/l2_arp_term.c
index 17c8b1d84d0..eed9b7af7c3 100644
--- a/src/vnet/l2/l2_arp_term.c
+++ b/src/vnet/l2/l2_arp_term.c
@@ -25,6 +25,7 @@
#include <vnet/ip/ip6_packet.h>
#include <vnet/ip/icmp6.h>
#include <vnet/ip/ip6.h>
+#include <vnet/ip/ip.api_enum.h>
#include <vnet/ip/format.h>
#include <vnet/ethernet/arp_packet.h>
@@ -289,6 +290,9 @@ arp_term_l2bd (vlib_main_t * vm,
ethertype0 = clib_net_to_host_u16 (*(u16 *) (l3h0 - 2));
arp0 = (ethernet_arp_header_t *) l3h0;
+ if (p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED)
+ goto next_l2_feature;
+
if (ethertype0 != ETHERNET_TYPE_ARP)
goto check_ip6_nd;
@@ -445,7 +449,6 @@ arp_term_l2bd (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
.function = arp_term_l2bd,
.name = "arp-term-l2bd",
@@ -460,7 +463,6 @@ VLIB_REGISTER_NODE (arp_term_l2bd_node, static) = {
.format_buffer = format_ethernet_arp_header,
.format_trace = format_arp_term_input_trace,
};
-/* *INDENT-ON* */
clib_error_t *
arp_term_init (vlib_main_t * vm)
diff --git a/src/vnet/l2/l2_bd.c b/src/vnet/l2/l2_bd.c
index 7e6ea60b440..c7392c03b58 100644
--- a/src/vnet/l2/l2_bd.c
+++ b/src/vnet/l2/l2_bd.c
@@ -102,12 +102,10 @@ bd_free_ip_mac_tables (l2_bridge_domain_t * bd)
ip6_address_t *ip6_addr_key;
hash_free (bd->mac_by_ip4);
- /* *INDENT-OFF* */
hash_foreach_mem (ip6_addr_key, mac_addr, bd->mac_by_ip6,
({
clib_mem_free (ip6_addr_key); /* free memory used for ip6 addr key */
}));
- /* *INDENT-ON* */
hash_free (bd->mac_by_ip6);
}
@@ -454,13 +452,11 @@ done:
* Example of how to disable learning (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain learn 200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_learn_cli, static) = {
.path = "set bridge-domain learn",
.short_help = "set bridge-domain learn <bridge-domain-id> [disable]",
.function = bd_learn,
};
-/* *INDENT-ON* */
static clib_error_t *
bd_default_learn_limit (vlib_main_t *vm, unformat_input_t *input,
@@ -547,13 +543,11 @@ done:
* Example of how to disable forwarding (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain forward 200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_fwd_cli, static) = {
.path = "set bridge-domain forward",
.short_help = "set bridge-domain forward <bridge-domain-id> [disable]",
.function = bd_fwd,
};
-/* *INDENT-ON* */
/**
Set bridge-domain flood enable/disable.
@@ -612,13 +606,11 @@ done:
* Example of how to disable flooding (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain flood 200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_flood_cli, static) = {
.path = "set bridge-domain flood",
.short_help = "set bridge-domain flood <bridge-domain-id> [disable]",
.function = bd_flood,
};
-/* *INDENT-ON* */
/**
Set bridge-domain unknown-unicast flood enable/disable.
@@ -677,13 +669,11 @@ done:
* Example of how to disable unknown-unicast flooding (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain uu-flood 200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_uu_flood_cli, static) = {
.path = "set bridge-domain uu-flood",
.short_help = "set bridge-domain uu-flood <bridge-domain-id> [disable]",
.function = bd_uu_flood,
};
-/* *INDENT-ON* */
/**
Set bridge-domain arp-unicast forward enable/disable.
@@ -742,13 +732,11 @@ done:
* Example of how to disable arp-unicast forwarding (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain arp-ufwd 200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_arp_ufwd_cli, static) = {
.path = "set bridge-domain arp-ufwd",
.short_help = "set bridge-domain arp-ufwd <bridge-domain-id> [disable]",
.function = bd_arp_ufwd,
};
-/* *INDENT-ON* */
/**
Set bridge-domain arp term enable/disable.
@@ -854,13 +842,11 @@ done:
* Example of how to disable mac aging (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain flood 200 0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_mac_age_cli, static) = {
.path = "set bridge-domain mac-age",
.short_help = "set bridge-domain mac-age <bridge-domain-id> <mins>",
.function = bd_mac_age,
};
-/* *INDENT-ON* */
static clib_error_t *
bd_learn_limit (vlib_main_t *vm, unformat_input_t *input,
@@ -921,13 +907,11 @@ VLIB_CLI_COMMAND (bd_learn_limit_cli, static) = {
* Example of how to disable ARP termination (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain arp term 200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_arp_term_cli, static) = {
.path = "set bridge-domain arp term",
.short_help = "set bridge-domain arp term <bridge-domain-id> [disable]",
.function = bd_arp_term,
};
-/* *INDENT-ON* */
/**
@@ -1119,13 +1103,11 @@ done:
* Example of how to delete an ARP entry (where 200 is the bridge-domain-id):
* @cliexcmd{set bridge-domain arp entry 200 192.168.72.45 52:54:00:3b:83:1a del}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_arp_entry_cli, static) = {
.path = "set bridge-domain arp entry",
.short_help = "set bridge-domain arp entry <bridge-domain-id> [<ip-addr> <mac-addr> [del] | del-all]",
.function = bd_arp_entry,
};
-/* *INDENT-ON* */
static u8 *
format_uu_cfg (u8 * s, va_list * args)
@@ -1289,7 +1271,6 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
vlib_cli_output (vm,
"\n IP4/IP6 to MAC table for ARP Termination");
- /* *INDENT-OFF* */
hash_foreach (ip4_addr, mac_addr, bd_config->mac_by_ip4,
({
vlib_cli_output (vm, "%=40U => %=20U",
@@ -1303,7 +1284,6 @@ bd_show (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
format_ip6_address, ip6_addr,
format_ethernet_address, &mac_addr);
}));
- /* *INDENT-ON* */
}
if ((detail || bd_tag) && (bd_config->bd_tag))
@@ -1349,13 +1329,11 @@ done:
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_show_cli, static) = {
.path = "show bridge-domain",
.short_help = "show bridge-domain [bridge-domain-id [detail|int|arp|bd-tag]]",
.function = bd_show,
};
-/* *INDENT-ON* */
int
bd_add_del (l2_bridge_domain_add_del_args_t * a)
@@ -1493,8 +1471,15 @@ bd_add_del_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (bd_id == ~0)
{
- error = clib_error_return (0, "bridge-domain-id not specified");
- goto done;
+ if (is_add)
+ {
+ bd_id = bd_get_unused_id ();
+ }
+ else
+ {
+ error = clib_error_return (0, "bridge-domain-id not specified");
+ goto done;
+ }
}
if (bd_id == 0)
@@ -1587,7 +1572,6 @@ done:
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (bd_create_cli, static) = {
.path = "create bridge-domain",
.short_help = "create bridge-domain <bridge-domain-id>"
@@ -1595,9 +1579,38 @@ VLIB_CLI_COMMAND (bd_create_cli, static) = {
" [arp-ufwd <0|1>] [mac-age <nn>] [bd-tag <tag>] [del]",
.function = bd_add_del_command_fn,
};
-/* *INDENT-ON* */
+/*
+ * Returns an unused bridge domain id, and ~0 if it can't find one.
+ */
+u32
+bd_get_unused_id (void)
+{
+ bd_main_t *bdm = &bd_main;
+ int i, j;
+ static u32 seed = 0;
+ /* limit to 1M tries */
+ for (j = 0; j < 1 << 10; j++)
+ {
+ seed = random_u32 (&seed);
+ for (i = 0; i < 1 << 10; i++)
+ {
+ /*
+ * iterate seed+0, seed+1, seed-1, seed+2, seed-2, ... to generate id
+ */
+ seed += (2 * (i % 2) - 1) * i;
+ /* bd_id must be (1 <= bd_id <= L2_BD_ID_MAX) */
+ seed &= L2_BD_ID_MAX;
+ if (seed == 0)
+ continue;
+ if (bd_find_index (bdm, seed) == ~0)
+ return seed;
+ }
+ }
+
+ return ~0;
+}
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_bd.h b/src/vnet/l2/l2_bd.h
index 0d77292519d..082d210b972 100644
--- a/src/vnet/l2/l2_bd.h
+++ b/src/vnet/l2/l2_bd.h
@@ -2,6 +2,7 @@
* l2_bd.h : layer 2 bridge domain
*
* Copyright (c) 2013 Cisco and/or its affiliates.
+ * Copyright (c) 2022 Nordix Foundation.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
@@ -166,7 +167,7 @@ u32 bd_set_flags (vlib_main_t * vm, u32 bd_index, bd_flags_t flags,
void bd_set_mac_age (vlib_main_t * vm, u32 bd_index, u8 age);
void bd_set_learn_limit (vlib_main_t *vm, u32 bd_index, u32 learn_limit);
int bd_add_del (l2_bridge_domain_add_del_args_t * args);
-
+u32 bd_get_unused_id (void);
/**
* \brief Get a bridge domain.
*
diff --git a/src/vnet/l2/l2_bvi.c b/src/vnet/l2/l2_bvi.c
index e5623682657..e39c4aae39d 100644
--- a/src/vnet/l2/l2_bvi.c
+++ b/src/vnet/l2/l2_bvi.c
@@ -58,14 +58,12 @@ bvi_mac_change (vnet_hw_interface_t * hi,
return (NULL);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (bvi_device_class) = {
.name = "BVI",
.format_device_name = format_bvi_name,
.admin_up_down_function = bvi_admin_up_down,
.mac_addr_change_function = bvi_mac_change,
};
-/* *INDENT-ON* */
/*
* Maintain a bitmap of allocated bvi instance numbers.
@@ -138,13 +136,11 @@ l2_bvi_create (u32 user_instance,
{
vnet_main_t *vnm = vnet_get_main ();
vlib_main_t *vm = vlib_get_main ();
+ vnet_eth_interface_registration_t eir = {};
u32 instance, hw_if_index, slot;
vnet_hw_interface_t *hw_if;
- clib_error_t *error;
mac_address_t mac;
- int rv = 0;
-
ASSERT (sw_if_indexp);
*sw_if_indexp = (u32) ~ 0;
@@ -178,17 +174,10 @@ l2_bvi_create (u32 user_instance,
mac_address_copy (&mac, mac_in);
}
- error = ethernet_register_interface (vnm,
- bvi_device_class.index,
- instance, mac.bytes, &hw_if_index,
- /* flag change */ 0);
-
- if (error)
- {
- rv = VNET_API_ERROR_INVALID_REGISTRATION;
- clib_error_report (error);
- return rv;
- }
+ eir.dev_class_index = bvi_device_class.index;
+ eir.dev_instance = instance;
+ eir.address = mac.bytes;
+ hw_if_index = vnet_eth_register_interface (vnm, &eir);
hw_if = vnet_get_hw_interface (vnm, hw_if_index);
@@ -282,13 +271,11 @@ l2_bvi_create_cli (vlib_main_t * vm,
* Example of how to create a bvi interface:
* @cliexcmd{bvi create}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_bvi_create_command, static) = {
.path = "bvi create",
.short_help = "bvi create [mac <mac-addr>] [instance <instance>]",
.function = l2_bvi_create_cli,
};
-/* *INDENT-ON* */
static clib_error_t *
l2_bvi_delete_cli (vlib_main_t * vm,
@@ -333,13 +320,11 @@ l2_bvi_delete_cli (vlib_main_t * vm,
* Example of how to create a bvi interface:
* @cliexcmd{bvi delete bvi0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_bvi_delete_command, static) = {
.path = "bvi delete",
.short_help = "bvi delete <interface>",
.function = l2_bvi_delete_cli,
};
-/* *INDENT-ON* */
/*
diff --git a/src/vnet/l2/l2_classify.h b/src/vnet/l2/l2_classify.h
index 68a2bb98e64..3c86fb5ca86 100644
--- a/src/vnet/l2/l2_classify.h
+++ b/src/vnet/l2/l2_classify.h
@@ -39,7 +39,6 @@ typedef enum
L2_INPUT_CLASSIFY_NEXT_ETHERNET_INPUT,
L2_INPUT_CLASSIFY_NEXT_IP4_INPUT,
L2_INPUT_CLASSIFY_NEXT_IP6_INPUT,
- L2_INPUT_CLASSIFY_NEXT_LI,
L2_INPUT_CLASSIFY_N_NEXT,
} l2_input_classify_next_t;
diff --git a/src/vnet/l2/l2_efp_filter.c b/src/vnet/l2/l2_efp_filter.c
index ad325b83df2..47256ffa5d3 100644
--- a/src/vnet/l2/l2_efp_filter.c
+++ b/src/vnet/l2/l2_efp_filter.c
@@ -461,7 +461,6 @@ VLIB_NODE_FN (l2_efp_filter_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_efp_filter_node) = {
.name = "l2-efp-filter",
.vector_size = sizeof (u32),
@@ -478,7 +477,6 @@ VLIB_REGISTER_NODE (l2_efp_filter_node) = {
[L2_EFP_FILTER_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
@@ -559,13 +557,11 @@ done:
* Example of how to disable a Layer 2 efp-filter on a sub-interface:
* @cliexcmd{set interface l2 efp-filter GigabitEthernet0/8/0.200 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_efp_filter_cli, static) = {
.path = "set interface l2 efp-filter",
.short_help = "set interface l2 efp-filter <interface> [disable]",
.function = int_l2_efp_filter,
};
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
diff --git a/src/vnet/l2/l2_fib.c b/src/vnet/l2/l2_fib.c
index d1ee82273b1..3dcd1e7ae26 100644
--- a/src/vnet/l2/l2_fib.c
+++ b/src/vnet/l2/l2_fib.c
@@ -95,8 +95,7 @@ format_vnet_sw_if_index_name_with_NA (u8 * s, va_list * args)
if (!swif)
return format (s, "Stale");
- return format (s, "%U", format_vnet_sw_interface_name, vnm,
- vnet_get_sw_interface_or_null (vnm, sw_if_index));
+ return format (s, "%U", format_vnet_sw_if_index_name, vnm, sw_if_index);
}
typedef struct l2fib_dump_walk_ctx_t_
@@ -353,13 +352,11 @@ show_l2fib (vlib_main_t * vm,
* 3 l2fib entries
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_l2fib_cli, static) = {
.path = "show l2fib",
.short_help = "show l2fib [all] | [bd_id <nn> | bd_index <nn>] [learn | add] | [raw]",
.function = show_l2fib,
};
-/* *INDENT-ON* */
void
l2fib_table_init (void)
@@ -416,13 +413,11 @@ clear_l2fib (vlib_main_t * vm,
* no l2fib entries
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_l2fib_cli, static) = {
.path = "clear l2fib",
.short_help = "clear l2fib",
.function = clear_l2fib,
};
-/* *INDENT-ON* */
static l2fib_seq_num_t
l2fib_cur_seq_num (u32 bd_index, u32 sw_if_index)
@@ -593,20 +588,18 @@ done:
* 3 l2fib entries
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2fib_add_cli, static) = {
.path = "l2fib add",
.short_help = "l2fib add <mac> <bridge-domain-id> filter | <intf> [static | bvi]",
.function = l2fib_add,
};
-/* *INDENT-ON* */
static clib_error_t *
l2fib_test_command_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
- u8 mac[6], save_mac[6];
+ u8 mac[8], save_mac[6];
u32 bd_index = 0;
u32 sw_if_index = 8;
u32 is_add = 0;
@@ -724,13 +717,11 @@ l2fib_test_command_fn (vlib_main_t * vm,
* @cliexcmd{test l2fib del mac 52:54:00:53:00:00 count 4}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2fib_test_command, static) = {
.path = "test l2fib",
.short_help = "test l2fib [add|del|check] mac <base-addr> count <nn>",
.function = l2fib_test_command_fn,
};
-/* *INDENT-ON* */
/**
@@ -833,13 +824,11 @@ done:
* Example of how to delete a MAC Address entry from the L2 FIB table of a bridge-domain (where 200 is the bridge-domain-id):
* @cliexcmd{l2fib del 52:54:00:53:18:33 200}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2fib_del_cli, static) = {
.path = "l2fib del",
.short_help = "l2fib del <mac> <bridge-domain-id> []",
.function = l2fib_del,
};
-/* *INDENT-ON* */
static clib_error_t *
l2fib_set_scan_delay (vlib_main_t *vm, unformat_input_t *input,
@@ -977,13 +966,11 @@ l2fib_flush_mac_all (vlib_main_t * vm,
* Example of how to flush MAC Address entries learned on an interface from the L2 FIB table:
* @cliexcmd{l2fib flush-mac interface GigabitEthernet2/1/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2fib_flush_mac_all_cli, static) = {
.path = "l2fib flush-mac all",
.short_help = "l2fib flush-mac all",
.function = l2fib_flush_mac_all,
};
-/* *INDENT-ON* */
/*?
* This command kick off ager to delete all existing MAC Address entries,
@@ -993,13 +980,11 @@ VLIB_CLI_COMMAND (l2fib_flush_mac_all_cli, static) = {
* Example of how to flush MAC Address entries learned on an interface from the L2 FIB table:
* @cliexcmd{l2fib flush-mac interface GigabitEthernet2/1/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2fib_flush_mac_int_cli, static) = {
.path = "l2fib flush-mac interface",
.short_help = "l2fib flush-mac interface <if-name>",
.function = l2fib_flush_mac_int,
};
-/* *INDENT-ON* */
/**
Flush bridge-domain MACs except static ones.
@@ -1042,13 +1027,11 @@ done:
* Example of how to flush MAC Address entries learned in a bridge domain from the L2 FIB table:
* @cliexcmd{l2fib flush-mac bridge-domain 1000}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2fib_flush_mac_bd_cli, static) = {
.path = "l2fib flush-mac bridge-domain",
.short_help = "l2fib flush-mac bridge-domain <bd-id>",
.function = l2fib_flush_mac_bd,
};
-/* *INDENT-ON* */
clib_error_t *
l2fib_sw_interface_up_down (vnet_main_t * vnm, u32 sw_if_index, u32 flags)
@@ -1149,7 +1132,7 @@ l2fib_scan (vlib_main_t * vm, f64 start_time, u8 event_only)
{
for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
{
- if (v->kvp[k].key == ~0ULL && v->kvp[k].value == ~0ULL)
+ if (BV (clib_bihash_is_free) (&v->kvp[k]))
continue;
l2fib_entry_key_t key = {.raw = v->kvp[k].key };
@@ -1366,13 +1349,11 @@ l2fib_mac_age_scanner_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2fib_mac_age_scanner_process_node) = {
.function = l2fib_mac_age_scanner_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "l2fib-mac-age-scanner-process",
};
-/* *INDENT-ON* */
clib_error_t *
l2fib_init (vlib_main_t * vm)
diff --git a/src/vnet/l2/l2_fib.h b/src/vnet/l2/l2_fib.h
index 7f7cd761e20..e24d427b4e2 100644
--- a/src/vnet/l2/l2_fib.h
+++ b/src/vnet/l2/l2_fib.h
@@ -240,29 +240,9 @@ l2fib_compute_hash_bucket (l2fib_entry_key_t * key)
always_inline u64
l2fib_make_key (const u8 * mac_address, u16 bd_index)
{
- u64 temp;
-
- /*
- * The mac address in memory is A:B:C:D:E:F
- * The bd id in register is H:L
- */
-#if CLIB_ARCH_IS_LITTLE_ENDIAN
- /*
- * Create the in-register key as F:E:D:C:B:A:H:L
- * In memory the key is L:H:A:B:C:D:E:F
- */
- temp = CLIB_MEM_OVERFLOW_LOAD (*, (u64 *) mac_address) << 16;
- temp = (temp & ~0xffff) | (u64) (bd_index);
-#else
- /*
- * Create the in-register key as H:L:A:B:C:D:E:F
- * In memory the key is H:L:A:B:C:D:E:F
- */
- temp = CLIB_MEM_OVERFLOW_LOAD (*, (u64 *) mac_address) >> 16;
- temp = temp | (((u64) bd_index) << 48);
-#endif
-
- return temp;
+ l2fib_entry_key_t key = { .fields.bd_index = bd_index };
+ clib_memcpy_fast (&key.fields.mac, mac_address, sizeof (key.fields.mac));
+ return key.raw;
}
diff --git a/src/vnet/l2/l2_flood.c b/src/vnet/l2/l2_flood.c
index c0d7bf8dfab..f8cb3cb5687 100644
--- a/src/vnet/l2/l2_flood.c
+++ b/src/vnet/l2/l2_flood.c
@@ -362,7 +362,6 @@ VLIB_NODE_FN (l2flood_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2flood_node) = {
.name = "l2-flood",
.vector_size = sizeof (u32),
@@ -380,7 +379,6 @@ VLIB_REGISTER_NODE (l2flood_node) = {
[L2FLOOD_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
@@ -468,13 +466,11 @@ done:
* Example of how to disable flooding:
* @cliexcmd{set interface l2 flood GigabitEthernet0/8/0 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_flood_cli, static) = {
.path = "set interface l2 flood",
.short_help = "set interface l2 flood <interface> [disable]",
.function = int_flood,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_fwd.c b/src/vnet/l2/l2_fwd.c
index 3414f6c490e..503dfc27957 100644
--- a/src/vnet/l2/l2_fwd.c
+++ b/src/vnet/l2/l2_fwd.c
@@ -215,8 +215,7 @@ l2fwd_process (vlib_main_t * vm,
* unless some other feature is inserted before uu_flood
*/
if (vnet_buffer (b0)->l2.feature_bitmap &
- (L2INPUT_FEAT_UU_FLOOD |
- L2INPUT_FEAT_UU_FWD | L2INPUT_FEAT_GBP_FWD))
+ (L2INPUT_FEAT_UU_FLOOD | L2INPUT_FEAT_UU_FWD))
{
*next0 = vnet_l2_feature_next (b0, msm->feat_next_node_index,
L2INPUT_FEAT_FWD);
@@ -289,7 +288,6 @@ l2fwd_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
#ifdef COUNTERS
em->counters[node_counter_base_index + L2FWD_ERROR_L2FWD] += 4;
#endif
- /* *INDENT-OFF* */
l2fib_lookup_4 (msm->mac_table, &cached_key, &cached_result,
h0->dst_address, h1->dst_address,
h2->dst_address, h3->dst_address,
@@ -305,7 +303,6 @@ l2fwd_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
&result1,
&result2,
&result3);
- /* *INDENT-ON* */
l2fwd_process (vm, node, msm, em, b[0], sw_if_index0, &result0, next);
l2fwd_process (vm, node, msm, em, b[1], sw_if_index1, &result1,
next + 1);
@@ -415,7 +412,6 @@ VLIB_NODE_FN (l2fwd_node) (vlib_main_t * vm,
return l2fwd_node_inline (vm, node, frame, 0 /* do_trace */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2fwd_node) = {
.name = "l2-fwd",
.vector_size = sizeof (u32),
@@ -433,7 +429,6 @@ VLIB_REGISTER_NODE (l2fwd_node) = {
[L2FWD_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
@@ -528,13 +523,11 @@ done:
* Example of how to disable forwarding:
* @cliexcmd{set interface l2 forward GigabitEthernet0/8/0 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_fwd_cli, static) = {
.path = "set interface l2 forward",
.short_help = "set interface l2 forward <interface> [disable]",
.function = int_fwd,
};
-/* *INDENT-ON* */
#endif
diff --git a/src/vnet/l2/l2_in_out_acl.c b/src/vnet/l2/l2_in_out_acl.c
index f8293c1feee..2e2cb1e7f36 100644
--- a/src/vnet/l2/l2_in_out_acl.c
+++ b/src/vnet/l2/l2_in_out_acl.c
@@ -278,7 +278,7 @@ l2_in_out_acl_node_fn (vlib_main_t * vm,
u32 table_index0;
vnet_classify_table_t *t0;
vnet_classify_entry_t *e0;
- u64 hash0;
+ u32 hash0;
u8 *h0;
u8 error0;
@@ -288,7 +288,7 @@ l2_in_out_acl_node_fn (vlib_main_t * vm,
vlib_buffer_t *p1 = vlib_get_buffer (vm, from[3]);
vnet_classify_table_t *tp1;
u32 table_index1;
- u64 phash1;
+ u32 phash1;
table_index1 = vnet_buffer (p1)->l2_classify.table_index;
@@ -464,7 +464,6 @@ VLIB_NODE_FN (l2_outacl_node) (vlib_main_t * vm,
IN_OUT_ACL_OUTPUT_TABLE_GROUP);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_inacl_node) = {
.name = "l2-input-acl",
.vector_size = sizeof (u32),
@@ -498,7 +497,6 @@ VLIB_REGISTER_NODE (l2_outacl_node) = {
[ACL_NEXT_INDEX_DENY] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/l2/l2_in_out_feat_arc.c b/src/vnet/l2/l2_in_out_feat_arc.c
index b3b4a8cbb73..26fbd3eb776 100644
--- a/src/vnet/l2/l2_in_out_feat_arc.c
+++ b/src/vnet/l2/l2_in_out_feat_arc.c
@@ -257,7 +257,8 @@ l2_in_out_feat_arc_node_fn (vlib_main_t * vm,
sw_if_index = sw_if_indices;
n_left = frame->n_vectors;
- CLIB_PREFETCH (next_node_indices, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (next_node_indices,
+ sizeof (fam->feat_next_node_index[is_output]), LOAD);
while (n_left > 3 * L2_FEAT_ARC_VEC_SIZE)
{
@@ -395,7 +396,6 @@ vnet_l2_in_out_feat_arc_enable_disable (u32 sw_if_index, int is_output,
}
#endif /* CLIB_MARCH_VARIANT */
-/* *INDENT-OFF* */
VNET_FEATURE_ARC_INIT (l2_in_ip4_arc, static) =
{
.arc_name = "l2-input-ip4",
@@ -437,10 +437,8 @@ VNET_FEATURE_ARC_INIT (l2_in_nonip_arc, static) =
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_in_feat_arc_node) = {
.name = "l2-input-feat-arc",
.vector_size = sizeof (u32),
@@ -520,7 +518,6 @@ VNET_FEATURE_INIT (l2_out_nonip_arc_end, static) =
.node_name = "l2-output-feat-arc-end",
.runs_before = 0, /* not before any other features */
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
diff --git a/src/vnet/l2/l2_input.c b/src/vnet/l2/l2_input.c
index de22cef600e..23bd5cc9958 100644
--- a/src/vnet/l2/l2_input.c
+++ b/src/vnet/l2/l2_input.c
@@ -646,13 +646,11 @@ done:
* Example of how to remove an interface from a Layer2 bridge-domain:
* @cliexcmd{set interface l3 GigabitEthernet0/a/0.200}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_bridge_cli, static) = {
.path = "set interface l2 bridge",
.short_help = "set interface l2 bridge <interface> <bridge-domain-id> [bvi|uu-fwd] [shg]",
.function = int_l2_bridge,
};
-/* *INDENT-ON* */
/**
* Set subinterface in xconnect mode with another interface.
@@ -712,13 +710,11 @@ done:
* @cliexcmd{set interface l3 GigabitEthernet0/8/0.300}
* @cliexcmd{set interface l3 GigabitEthernet0/9/0.300}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_xc_cli, static) = {
.path = "set interface l2 xconnect",
.short_help = "set interface l2 xconnect <interface> <peer interface>",
.function = int_l2_xc,
};
-/* *INDENT-ON* */
/**
* Set subinterface in L3 mode.
@@ -762,13 +758,11 @@ done:
* Example of how to set the mode of an interface to Layer 3:
* @cliexcmd{set interface l3 GigabitEthernet0/8/0.200}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l3_cli, static) = {
.path = "set interface l3",
.short_help = "set interface l3 <interface>",
.function = int_l3,
};
-/* *INDENT-ON* */
/**
* Show interface mode.
@@ -809,10 +803,8 @@ show_int_mode (vlib_main_t * vm,
{
/* Gather interfaces. */
sis = vec_new (vnet_sw_interface_t, pool_elts (im->sw_interfaces));
- _vec_len (sis) = 0;
- /* *INDENT-OFF* */
+ vec_set_len (sis, 0);
pool_foreach (si, im->sw_interfaces) { vec_add1 (sis, si[0]); }
- /* *INDENT-ON* */
}
vec_foreach (si, sis)
@@ -878,13 +870,11 @@ done:
* l2 bridge GigabitEthernet0/8/0.200 bd_id 200 shg 0
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_l2_mode, static) = {
.path = "show mode",
.short_help = "show mode [<if-name1> <if-name2> ...]",
.function = show_int_mode,
};
-/* *INDENT-ON* */
#define foreach_l2_init_function \
_(feat_bitmap_drop_init) \
diff --git a/src/vnet/l2/l2_input.h b/src/vnet/l2/l2_input.h
index ba4c4b6ed31..3de1537b45e 100644
--- a/src/vnet/l2/l2_input.h
+++ b/src/vnet/l2/l2_input.h
@@ -27,6 +27,7 @@
#include <vnet/ethernet/packet.h>
#include <vnet/ip/ip4_inlines.h>
#include <vnet/ip/ip6_inlines.h>
+#include <vnet/mpls/mpls_lookup.h>
/* l2 connection type */
typedef enum l2_input_flags_t_
@@ -136,17 +137,10 @@ l2input_bd_config (u32 bd_index)
_(ARP_UFWD, "l2-uu-fwd") \
_(ARP_TERM, "arp-term-l2bd") \
_(UU_FLOOD, "l2-flood") \
- _(GBP_FWD, "gbp-fwd") \
_(UU_FWD, "l2-uu-fwd") \
_(FWD, "l2-fwd") \
_(RW, "l2-rw") \
_(LEARN, "l2-learn") \
- _(L2_EMULATION, "l2-emulation") \
- _(GBP_LEARN, "gbp-learn-l2") \
- _(GBP_LPM_ANON_CLASSIFY, "l2-gbp-lpm-anon-classify") \
- _(GBP_NULL_CLASSIFY, "gbp-null-classify") \
- _(GBP_SRC_CLASSIFY, "gbp-src-classify") \
- _(GBP_LPM_CLASSIFY, "l2-gbp-lpm-classify") \
_(VTR, "l2-input-vtr") \
_(L2_IP_QOS_RECORD, "l2-ip-qos-record") \
_(VPATH, "vpath-input-l2") \
@@ -334,7 +328,7 @@ vnet_update_l2_len (vlib_buffer_t *b)
/*
* Compute flow hash of an ethernet packet, use 5-tuple hash if L3 packet
- * is ip4 or ip6. Otherwise hash on smac/dmac/etype.
+ * is ip4, ip6, or mpls. Otherwise hash on smac/dmac/etype.
* The vlib buffer current pointer is expected to be at ethernet header
* and vnet l2.l2_len is expected to be setup already.
*/
@@ -349,6 +343,9 @@ vnet_l2_compute_flow_hash (vlib_buffer_t * b)
return ip4_compute_flow_hash ((ip4_header_t *) l3h, IP_FLOW_HASH_DEFAULT);
else if (ethertype == ETHERNET_TYPE_IP6)
return ip6_compute_flow_hash ((ip6_header_t *) l3h, IP_FLOW_HASH_DEFAULT);
+ else if (ethertype == ETHERNET_TYPE_MPLS)
+ return mpls_compute_flow_hash ((mpls_unicast_header_t *) l3h,
+ IP_FLOW_HASH_DEFAULT);
else
{
u32 a, b, c;
diff --git a/src/vnet/l2/l2_input_classify.c b/src/vnet/l2/l2_input_classify.c
index 53d46399daf..cc031bd46a5 100644
--- a/src/vnet/l2/l2_input_classify.c
+++ b/src/vnet/l2/l2_input_classify.c
@@ -179,8 +179,7 @@ VLIB_NODE_FN (l2_input_classify_node) (vlib_main_t * vm,
int type_index0, type_index1;
vnet_classify_table_t *t0, *t1;
u32 table_index0, table_index1;
- u64 hash0, hash1;
-
+ u32 hash0, hash1;
/* prefetch next iteration */
{
@@ -265,7 +264,7 @@ VLIB_NODE_FN (l2_input_classify_node) (vlib_main_t * vm,
u32 type_index0;
vnet_classify_table_t *t0;
u32 table_index0;
- u64 hash0;
+ u32 hash0;
bi0 = from[0];
b0 = vlib_get_buffer (vm, bi0);
@@ -316,14 +315,14 @@ VLIB_NODE_FN (l2_input_classify_node) (vlib_main_t * vm,
u32 next0 = ~0; /* next l2 input feature, please... */
ethernet_header_t *h0;
u32 table_index0;
- u64 hash0;
+ u32 hash0;
vnet_classify_table_t *t0;
vnet_classify_entry_t *e0;
if (PREDICT_TRUE (n_left_from > 2))
{
vlib_buffer_t *p2 = vlib_get_buffer (vm, from[2]);
- u64 phash2;
+ u32 phash2;
u32 table_index2;
vnet_classify_table_t *tp2;
@@ -443,7 +442,6 @@ VLIB_NODE_FN (l2_input_classify_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_input_classify_node) = {
.name = "l2-input-classify",
.vector_size = sizeof (u32),
@@ -463,10 +461,8 @@ VLIB_REGISTER_NODE (l2_input_classify_node) = {
[L2_INPUT_CLASSIFY_NEXT_ETHERNET_INPUT] = "ethernet-input-not-l2",
[L2_INPUT_CLASSIFY_NEXT_IP4_INPUT] = "ip4-input",
[L2_INPUT_CLASSIFY_NEXT_IP6_INPUT] = "ip6-input",
- [L2_INPUT_CLASSIFY_NEXT_LI] = "li-hit",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
/** l2 input classsifier feature initialization. */
@@ -643,7 +639,6 @@ int_l2_input_classify_command_fn (vlib_main_t * vm,
* @todo This is incomplete. This needs a detailed description and a
* practical example.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_input_classify_cli, static) = {
.path = "set interface l2 input classify",
.short_help =
@@ -651,7 +646,6 @@ VLIB_CLI_COMMAND (int_l2_input_classify_cli, static) = {
" [ip6-table <n>] [other-table <n>]",
.function = int_l2_input_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_input_node.c b/src/vnet/l2/l2_input_node.c
index 3638a8aa00d..76b94809eb3 100644
--- a/src/vnet/l2/l2_input_node.c
+++ b/src/vnet/l2/l2_input_node.c
@@ -141,9 +141,8 @@ classify_and_dispatch (l2input_main_t * msm, vlib_buffer_t * b0, u16 * next0)
u8 protocol = ((ip6_header_t *) l3h0)->protocol;
/* Disable bridge forwarding (flooding will execute instead if not xconnect) */
- feat_mask &= ~(L2INPUT_FEAT_FWD |
- L2INPUT_FEAT_UU_FLOOD |
- L2INPUT_FEAT_UU_FWD | L2INPUT_FEAT_GBP_FWD);
+ feat_mask &=
+ ~(L2INPUT_FEAT_FWD | L2INPUT_FEAT_UU_FLOOD | L2INPUT_FEAT_UU_FWD);
if (ethertype != ETHERNET_TYPE_ARP)
feat_mask &= ~(L2INPUT_FEAT_ARP_UFWD);
@@ -252,11 +251,11 @@ l2input_node_inline (vlib_main_t * vm,
/* Prefetch next iteration. */
{
- /* Prefetch the buffer header and packet for the N+2 loop iteration */
- vlib_prefetch_buffer_header (b[4], LOAD);
- vlib_prefetch_buffer_header (b[5], LOAD);
- vlib_prefetch_buffer_header (b[6], LOAD);
- vlib_prefetch_buffer_header (b[7], LOAD);
+ /* Prefetch the buffer header for the N+2 loop iteration */
+ clib_prefetch_store (b[4]);
+ clib_prefetch_store (b[5]);
+ clib_prefetch_store (b[6]);
+ clib_prefetch_store (b[7]);
clib_prefetch_store (b[4]->data);
clib_prefetch_store (b[5]->data);
@@ -366,7 +365,6 @@ VLIB_NODE_FN (l2input_node) (vlib_main_t * vm,
return l2input_node_inline (vm, node, frame, 0 /* do_trace */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2input_node) = {
.name = "l2-input",
.vector_size = sizeof (u32),
@@ -386,7 +384,6 @@ VLIB_REGISTER_NODE (l2input_node) = {
[L2INPUT_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_input_vtr.c b/src/vnet/l2/l2_input_vtr.c
index 3c1235bfa32..ccf3efa2390 100644
--- a/src/vnet/l2/l2_input_vtr.c
+++ b/src/vnet/l2/l2_input_vtr.c
@@ -319,7 +319,6 @@ VLIB_NODE_FN (l2_invtr_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_invtr_node) = {
.name = "l2-input-vtr",
.vector_size = sizeof (u32),
@@ -336,7 +335,6 @@ VLIB_REGISTER_NODE (l2_invtr_node) = {
[L2_INVTR_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
diff --git a/src/vnet/l2/l2_learn.c b/src/vnet/l2/l2_learn.c
index 6d90cee62a7..24b5389e55a 100644
--- a/src/vnet/l2/l2_learn.c
+++ b/src/vnet/l2/l2_learn.c
@@ -439,7 +439,6 @@ VLIB_NODE_FN (l2learn_node) (vlib_main_t * vm,
return l2learn_node_inline (vm, node, frame, 0 /* do_trace */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2learn_node) = {
.name = "l2-learn",
.vector_size = sizeof (u32),
@@ -457,7 +456,6 @@ VLIB_REGISTER_NODE (l2learn_node) = {
[L2LEARN_NEXT_L2FWD] = "l2-fwd",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
@@ -540,13 +538,11 @@ done:
* Example of how to disable learning:
* @cliexcmd{set interface l2 learn GigabitEthernet0/8/0 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_learn_cli, static) = {
.path = "set interface l2 learn",
.short_help = "set interface l2 learn <interface> [disable]",
.function = int_learn,
};
-/* *INDENT-ON* */
static clib_error_t *
diff --git a/src/vnet/l2/l2_output.c b/src/vnet/l2/l2_output.c
index ba40de316d1..7c70cf9f4c7 100644
--- a/src/vnet/l2/l2_output.c
+++ b/src/vnet/l2/l2_output.c
@@ -22,6 +22,7 @@
#include <vppinfra/error.h>
#include <vppinfra/hash.h>
+#include <vppinfra/vector/count_equal.h>
#include <vnet/l2/feat_bitmap.h>
#include <vnet/l2/l2_output.h>
@@ -442,7 +443,6 @@ VLIB_NODE_FN (l2output_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2output_node) = {
.name = "l2-output",
.vector_size = sizeof (u32),
@@ -460,7 +460,6 @@ VLIB_REGISTER_NODE (l2output_node) = {
[L2OUTPUT_NEXT_BAD_INTF] = "l2-output-bad-intf",
},
};
-/* *INDENT-ON* */
#define foreach_l2output_bad_intf_error \
@@ -548,7 +547,6 @@ VLIB_NODE_FN (l2output_bad_intf_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2output_bad_intf_node) = {
.name = "l2-output-bad-intf",
.vector_size = sizeof (u32),
@@ -564,7 +562,6 @@ VLIB_REGISTER_NODE (l2output_bad_intf_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
static clib_error_t *
l2output_init (vlib_main_t * vm)
diff --git a/src/vnet/l2/l2_output.h b/src/vnet/l2/l2_output.h
index 1cc1e738841..201f5e195a4 100644
--- a/src/vnet/l2/l2_output.h
+++ b/src/vnet/l2/l2_output.h
@@ -81,9 +81,6 @@ extern vlib_node_registration_t l2output_node;
#define foreach_l2output_feat \
_(OUTPUT, "interface-output") \
_(SPAN, "span-l2-output") \
- _(GBP_POLICY_LPM, "gbp-policy-lpm") \
- _(GBP_POLICY_PORT, "gbp-policy-port") \
- _(GBP_POLICY_MAC, "gbp-policy-mac") \
_(CFM, "feature-bitmap-drop") \
_(QOS, "feature-bitmap-drop") \
_(ACL, "l2-output-acl") \
diff --git a/src/vnet/l2/l2_output_classify.c b/src/vnet/l2/l2_output_classify.c
index 96d0b14753a..33a7c927386 100644
--- a/src/vnet/l2/l2_output_classify.c
+++ b/src/vnet/l2/l2_output_classify.c
@@ -172,8 +172,7 @@ VLIB_NODE_FN (l2_output_classify_node) (vlib_main_t * vm,
int type_index0, type_index1;
vnet_classify_table_t *t0, *t1;
u32 table_index0, table_index1;
- u64 hash0, hash1;
-
+ u32 hash0, hash1;
/* prefetch next iteration */
{
@@ -257,7 +256,7 @@ VLIB_NODE_FN (l2_output_classify_node) (vlib_main_t * vm,
u32 type_index0;
vnet_classify_table_t *t0;
u32 table_index0;
- u64 hash0;
+ u32 hash0;
bi0 = from[0];
b0 = vlib_get_buffer (vm, bi0);
@@ -308,14 +307,14 @@ VLIB_NODE_FN (l2_output_classify_node) (vlib_main_t * vm,
u32 next0 = ~0;
ethernet_header_t *h0;
u32 table_index0;
- u64 hash0;
+ u32 hash0;
vnet_classify_table_t *t0;
vnet_classify_entry_t *e0;
if (PREDICT_TRUE (n_left_from > 2))
{
vlib_buffer_t *p2 = vlib_get_buffer (vm, from[2]);
- u64 phash2;
+ u32 phash2;
u32 table_index2;
vnet_classify_table_t *tp2;
@@ -436,7 +435,6 @@ VLIB_NODE_FN (l2_output_classify_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_output_classify_node) = {
.name = "l2-output-classify",
.vector_size = sizeof (u32),
@@ -455,7 +453,6 @@ VLIB_REGISTER_NODE (l2_output_classify_node) = {
[L2_OUTPUT_CLASSIFY_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
/** l2 output classsifier feature initialization. */
@@ -635,7 +632,6 @@ int_l2_output_classify_command_fn (vlib_main_t * vm,
* @todo This is incomplete. This needs a detailed description and a
* practical example.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_output_classify_cli, static) = {
.path = "set interface l2 output classify",
.short_help =
@@ -643,7 +639,6 @@ VLIB_CLI_COMMAND (int_l2_output_classify_cli, static) = {
" [ip6-table <n>] [other-table <n>]",
.function = int_l2_output_classify_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_patch.c b/src/vnet/l2/l2_patch.c
index 6de4e50a298..f85938ed799 100644
--- a/src/vnet/l2/l2_patch.c
+++ b/src/vnet/l2/l2_patch.c
@@ -206,7 +206,6 @@ VLIB_NODE_FN (l2_patch_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_patch_node) = {
.name = "l2-patch",
.vector_size = sizeof (u32),
@@ -223,7 +222,6 @@ VLIB_REGISTER_NODE (l2_patch_node) = {
[L2_PATCH_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
extern int
vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add);
@@ -270,6 +268,8 @@ vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add)
vnet_feature_enable_disable ("device-input", "l2-patch",
rxhi->sw_if_index, 1, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth", "l2-patch",
+ rxhi->sw_if_index, 1, 0, 0);
}
else
{
@@ -278,6 +278,8 @@ vnet_l2_patch_add_del (u32 rx_sw_if_index, u32 tx_sw_if_index, int is_add)
vnet_feature_enable_disable ("device-input", "l2-patch",
rxhi->sw_if_index, 0, 0, 0);
+ vnet_feature_enable_disable ("port-rx-eth", "l2-patch",
+ rxhi->sw_if_index, 0, 0, 0);
if (vec_len (l2pm->tx_next_by_rx_sw_if_index) > rx_sw_if_index)
{
l2pm->tx_next_by_rx_sw_if_index[rx_sw_if_index] = ~0;
@@ -369,13 +371,11 @@ done:
* @todo This is incomplete. This needs a detailed description and a
* practical example.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (test_patch_command, static) = {
.path = "test l2patch",
.short_help = "test l2patch rx <intfc> tx <intfc> [del]",
.function = test_patch_command_fn,
};
-/* *INDENT-ON* */
/** Display the contents of the l2patch table. */
static clib_error_t *
@@ -421,13 +421,11 @@ show_l2patch (vlib_main_t * vm,
* @todo This is incomplete. This needs a detailed description and a
* practical example.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_l2patch_cli, static) = {
.path = "show l2patch",
.short_help = "Show l2 interface cross-connect entries",
.function = show_l2patch,
};
-/* *INDENT-ON* */
static clib_error_t *
l2_patch_init (vlib_main_t * vm)
diff --git a/src/vnet/l2/l2_rw.c b/src/vnet/l2/l2_rw.c
index b6de2faffc5..c0e8ec489fc 100644
--- a/src/vnet/l2/l2_rw.c
+++ b/src/vnet/l2/l2_rw.c
@@ -109,6 +109,7 @@ l2_rw_rewrite (l2_rw_entry_t * rwe, u8 * h)
/* FALLTHROUGH */
case 1:
d[0] = (d[0] & ~rwe->mask[0]) | rwe->value[0];
+ rwe->hit_count++;
break;
default:
abort ();
@@ -332,6 +333,7 @@ l2_rw_mod_entry (u32 * index,
return 0;
}
+ e->hit_count = 0;
e->skip_n_vectors = skip / sizeof (u32x4);
skip -= e->skip_n_vectors * sizeof (u32x4);
e->rewrite_n_vectors = (skip + len - 1) / sizeof (u32x4) + 1;
@@ -398,17 +400,19 @@ l2_rw_entry_cli_fn (vlib_main_t * vm,
* the provisioned mask and value, modifies the packet header.
*
* @cliexpar
- * @todo This is incomplete. This needs a detailed description and a
- * practical example.
+ * Example of how to add an l2 rewrite entry to change the destination mac of
+ * the packet to 00:8a:00:0d:0e:02 (where parameter mask is Ethernet header's
+mask,
+ * parameter value is Ethernet header's value):
+ * @cliexcmd{l2 rewrite entry mask ffffffffffff00000000000000000000 value
+008a000d0e0200000000000000000000}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_entry_cli, static) = {
.path = "l2 rewrite entry",
.short_help =
"l2 rewrite entry [index <index>] [mask <hex-mask>] [value <hex-value>] [skip <n_bytes>] [del]",
.function = l2_rw_entry_cli_fn,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -468,21 +472,36 @@ l2_rw_interface_cli_fn (vlib_main_t * vm,
}
/*?
- * Layer 2-Rewrite node uses classify tables to match packets. Then, using
- * the provisioned mask and value, modifies the packet header.
+ * Apply the rule to the interface. The following example shows how to use
+classify
+ * entry and Layer 2-Rewrite entry to modify the packet ethernet header on the
+ * interface.
*
* @cliexpar
- * @todo This is incomplete. This needs a detailed description and a
- * practical example.
+ * Example use the classify to filter packets that do not need to be modified
+(where
+ * 192.168.68.34 is the destination ip of the data packet, 8080 is the
+destination port
+ * of the packet):
+ * @cliexcmd{classify table mask l3 ip4 dst l4 dst_port}
+ * @cliexcmd{classify session acl-hit-next permit table-index 0 match l3 ip4
+dst 192.168.68.34 l4 dst_port 8080}
+ *
+ * @cliexpar
+ * Example apply classify and l2 rewrite rules to the interface (where
+YusurK2Eth6/0/1/3
+ * is interface, \"table 0\" means Table Id is 0, \"miss 0\" means the packet
+that matches
+ * the classify. miss will be modified according to the l2 rewrite entry with
+index 0):
+ * @cliexcmd{set interface l2 rewrite YusurK2Eth6/0/1/3 table 0 miss-index 0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_interface_cli, static) = {
.path = "set interface l2 rewrite",
.short_help =
"set interface l2 rewrite <interface> [table <table index>] [miss-index <entry-index>]",
.function = l2_rw_interface_cli_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
l2_rw_show_interfaces_cli_fn (vlib_main_t * vm,
@@ -494,30 +513,27 @@ l2_rw_show_interfaces_cli_fn (vlib_main_t * vm,
vlib_cli_output (vm, "No interface is currently using l2 rewrite\n");
uword i;
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, rw->configs_bitmap) {
vlib_cli_output (vm, "sw_if_index:%d %U\n", i, format_l2_rw_config, &rw->configs[i]);
}
- /* *INDENT-ON* */
return 0;
}
/*?
- * Layer 2-Rewrite node uses classify tables to match packets. Then, using
- * the provisioned mask and value, modifies the packet header.
+ * This command displays the l2 rewrite entries of the interfaces.
*
* @cliexpar
- * @todo This is incomplete. This needs a detailed description and a
- * practical example.
+ * Example of how to display the l2 rewrite rules on the interface:
+ * @cliexstart{show l2 rewrite interfaces}
+ * sw_if_index:4 table-index:0 miss-index:0
+ * @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_show_interfaces_cli, static) = {
.path = "show l2 rewrite interfaces",
.short_help =
"show l2 rewrite interfaces",
.function = l2_rw_show_interfaces_cli_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
l2_rw_show_entries_cli_fn (vlib_main_t * vm,
@@ -528,30 +544,29 @@ l2_rw_show_entries_cli_fn (vlib_main_t * vm,
if (pool_elts (rw->entries) == 0)
vlib_cli_output (vm, "No entries\n");
- /* *INDENT-OFF* */
pool_foreach (e, rw->entries) {
vlib_cli_output (vm, "%U\n", format_l2_rw_entry, e);
}
- /* *INDENT-ON* */
return 0;
}
/*?
- * Layer 2-Rewrite node uses classify tables to match packets. Then, using
- * the provisioned mask and value, modifies the packet header.
+ * This command displays all l2 rewrite entries.
*
* @cliexpar
- * @todo This is incomplete. This needs a detailed description and a
- * practical example.
+ * Example of how to display all l2 rewrite entries:
+ * @cliexstart{show l2 rewrite entries}
+ * 0 - mask:ffffffffffff00000000000000000000
+value:aabbccddeeff00000000000000000000
+ * hits:0 skip_bytes:0
+ * @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_show_entries_cli, static) = {
.path = "show l2 rewrite entries",
.short_help =
"show l2 rewrite entries",
.function = l2_rw_show_entries_cli_fn,
};
-/* *INDENT-ON* */
static int
l2_rw_enable_disable (u32 bridge_domain, u8 disable)
@@ -587,21 +602,22 @@ l2_rw_set_cli_fn (vlib_main_t * vm,
}
/*?
- * Layer 2-Rewrite node uses classify tables to match packets. Then, using
- * the provisioned mask and value, modfies the packet header.
+ * Layer 2 rewrite can be enabled and disabled on each interface and on each
+bridge-domain.
+ * Use this command to manage l2 rewrite on bridge-domain.
*
* @cliexpar
- * @todo This is incomplete. This needs a detailed description and a
- * practical example.
+ * Example of how to enable rewrite (where 100 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain rewrite 100}
+ * Example of how to disable rewrite (where 100 is the bridge-domain-id):
+ * @cliexcmd{set bridge-domain rewrite 100 disable}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (l2_rw_set_cli, static) = {
.path = "set bridge-domain rewrite",
.short_help =
"set bridge-domain rewrite <bridge-domain> [disable]",
.function = l2_rw_set_cli_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
l2_rw_init (vlib_main_t * vm)
@@ -643,7 +659,6 @@ static char *l2_rw_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_rw_node) = {
.name = "l2-rw",
.vector_size = sizeof (u32),
@@ -655,7 +670,6 @@ VLIB_REGISTER_NODE (l2_rw_node) = {
.n_next_nodes = L2_RW_N_NEXT,
.next_nodes = { [L2_RW_NEXT_DROP] = "error-drop"},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_rw.h b/src/vnet/l2/l2_rw.h
index f9b10333f43..6d12a21fe55 100644
--- a/src/vnet/l2/l2_rw.h
+++ b/src/vnet/l2/l2_rw.h
@@ -27,7 +27,6 @@
#include <vnet/l2/l2_input.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct _l2_rw_entry {
u16 skip_n_vectors;
u16 rewrite_n_vectors;
@@ -35,15 +34,12 @@ typedef CLIB_PACKED(struct _l2_rw_entry {
u32x4 *mask;
u32x4 *value;
}) l2_rw_entry_t;
-/* *INDENT-ON* */
/* l2_rw configuration for one interface */
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct _l2_rw_config {
u32 table_index; /* Which classify table to use */
u32 miss_index; /* Rewrite entry to use if table does not match */
}) l2_rw_config_t;
-/* *INDENT-ON* */
typedef struct
{
diff --git a/src/vnet/l2/l2_test.c b/src/vnet/l2/l2_test.c
new file mode 100644
index 00000000000..b78e388a9f1
--- /dev/null
+++ b/src/vnet/l2/l2_test.c
@@ -0,0 +1,1435 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ * Copyright(c) 2022 Nordix Foundation.
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vppinfra/error.h>
+#include <vpp/api/types.h>
+#include <inttypes.h>
+
+#include <vnet/l2/l2_classify.h>
+#include <vnet/l2/l2_vtr.h>
+#include <vnet/ip/ip_types_api.h>
+
+#define __plugin_msg_base l2_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+#include <vlibmemory/vlib.api_enum.h>
+#include <vlibmemory/vlib.api_types.h>
+
+/* Declare message IDs */
+#include <vnet/format_fns.h>
+#include <vnet/l2/l2.api_enum.h>
+#include <vnet/l2/l2.api_types.h>
+
+#define vl_endianfun /* define message structures */
+#include <vnet/l2/l2.api.h>
+#undef vl_endianfun
+
+#define vl_calcsizefun
+#include <vnet/l2/l2.api.h>
+#undef vl_calcsizefun
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ u32 ping_id;
+ vat_main_t *vat_main;
+} l2_test_main_t;
+
+static l2_test_main_t l2_test_main;
+
+static void
+vl_api_l2_fib_table_details_t_handler (vl_api_l2_fib_table_details_t *mp)
+{
+ vat_main_t *vam = l2_test_main.vat_main;
+
+ fformat (
+ vam->ofp, "%3" PRIu32 " %U %3" PRIu32 " %d %d %d",
+ ntohl (mp->bd_id), format_ethernet_address, mp->mac,
+ ntohl (mp->sw_if_index), mp->static_mac, mp->filter_mac, mp->bvi_mac);
+}
+
+static int
+api_l2_fib_table_dump (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_l2_fib_table_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u32 bd_id;
+ u8 bd_id_set = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bd_id %d", &bd_id))
+ bd_id_set = 1;
+ else
+ break;
+ }
+
+ if (bd_id_set == 0)
+ {
+ errmsg ("missing bridge domain");
+ return -99;
+ }
+
+ fformat (vam->ofp, "BD-ID Mac Address sw-ndx Static Filter BVI");
+
+ /* Get list of l2 fib entries */
+ M (L2_FIB_TABLE_DUMP, mp);
+
+ mp->bd_id = ntohl (bd_id);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ PING (&l2_test_main, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_l2_xconnect_details_t_handler (vl_api_l2_xconnect_details_t *mp)
+{
+ vat_main_t *vam = l2_test_main.vat_main;
+ fformat (vam->ofp, "%15d%15d", ntohl (mp->rx_sw_if_index),
+ ntohl (mp->tx_sw_if_index));
+}
+
+static int
+api_l2_xconnect_dump (vat_main_t *vam)
+{
+ vl_api_l2_xconnect_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%15s%15s", "rx_sw_if_index", "tx_sw_if_index");
+ }
+
+ M (L2_XCONNECT_DUMP, mp);
+
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ PING (&l2_test_main, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_want_l2_arp_term_events (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_want_l2_macs_events (vat_main_t *vam)
+{
+ unformat_input_t *line_input = vam->input;
+ vl_api_want_l2_macs_events_t *mp;
+ u8 enable_disable = 1;
+ u32 scan_delay = 0;
+ u32 max_macs_in_event = 0;
+ u32 learn_limit = 0;
+ int ret;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "learn-limit %d", &learn_limit))
+ ;
+ else if (unformat (line_input, "scan-delay %d", &scan_delay))
+ ;
+ else if (unformat (line_input, "max-entries %d", &max_macs_in_event))
+ ;
+ else if (unformat (line_input, "disable"))
+ enable_disable = 0;
+ else
+ break;
+ }
+
+ M (WANT_L2_MACS_EVENTS, mp);
+ mp->enable_disable = enable_disable;
+ mp->pid = htonl (getpid ());
+ mp->learn_limit = htonl (learn_limit);
+ mp->scan_delay = (u8) scan_delay;
+ mp->max_macs_in_event = (u8) (max_macs_in_event / 10);
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_l2fib_flush_all (vat_main_t *vam)
+{
+ return -1;
+}
+
+static void
+increment_mac_address (u8 *mac)
+{
+ u64 tmp = *((u64 *) mac);
+ tmp = clib_net_to_host_u64 (tmp);
+ tmp += 1 << 16; /* skip unused (least significant) octets */
+ tmp = clib_host_to_net_u64 (tmp);
+
+ clib_memcpy (mac, &tmp, 6);
+}
+
+static int
+api_l2fib_add_del (vat_main_t *vam)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t *i = vam->input;
+ vl_api_l2fib_add_del_t *mp;
+ f64 timeout;
+ u8 mac[8] = { 0 };
+ u8 mac_set = 0;
+ u32 bd_id;
+ u8 bd_id_set = 0;
+ u32 sw_if_index = 0;
+ u8 sw_if_index_set = 0;
+ u8 is_add = 1;
+ u8 static_mac = 0;
+ u8 filter_mac = 0;
+ u8 bvi_mac = 0;
+ int count = 1;
+ f64 before = 0;
+ int j;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "mac %U", unformat_ethernet_address, mac))
+ mac_set = 1;
+ else if (unformat (i, "bd_id %d", &bd_id))
+ bd_id_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if"))
+ {
+ if (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ sw_if_index_set = 1;
+ }
+ else
+ break;
+ }
+ else if (unformat (i, "static"))
+ static_mac = 1;
+ else if (unformat (i, "filter"))
+ {
+ filter_mac = 1;
+ static_mac = 1;
+ }
+ else if (unformat (i, "bvi"))
+ {
+ bvi_mac = 1;
+ static_mac = 1;
+ }
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "count %d", &count))
+ ;
+ else
+ break;
+ }
+
+ if (mac_set == 0)
+ {
+ errmsg ("missing mac address");
+ return -99;
+ }
+
+ if (bd_id_set == 0)
+ {
+ errmsg ("missing bridge domain");
+ return -99;
+ }
+
+ if (is_add && sw_if_index_set == 0 && filter_mac == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ if (count > 1)
+ {
+ /* Turn on async mode */
+ vam->async_mode = 1;
+ vam->async_errors = 0;
+ before = vat_time_now (vam);
+ }
+
+ for (j = 0; j < count; j++)
+ {
+ M (L2FIB_ADD_DEL, mp);
+
+ clib_memcpy (mp->mac, mac, 6);
+ mp->bd_id = ntohl (bd_id);
+ mp->is_add = is_add;
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ if (is_add)
+ {
+ mp->static_mac = static_mac;
+ mp->filter_mac = filter_mac;
+ mp->bvi_mac = bvi_mac;
+ }
+ increment_mac_address (mac);
+ /* send it... */
+ S (mp);
+ }
+
+ if (count > 1)
+ {
+ vl_api_control_ping_t *mp_ping;
+ f64 after;
+
+ /* Shut off async mode */
+ vam->async_mode = 0;
+
+ PING (&l2_test_main, mp_ping);
+ S (mp_ping);
+
+ timeout = vat_time_now (vam) + 1.0;
+ while (vat_time_now (vam) < timeout)
+ if (vam->result_ready == 1)
+ goto out;
+ vam->retval = -99;
+
+ out:
+ if (vam->retval == -99)
+ errmsg ("timeout");
+
+ if (vam->async_errors > 0)
+ {
+ errmsg ("%d asynchronous errors", vam->async_errors);
+ vam->retval = -98;
+ }
+ vam->async_errors = 0;
+ after = vat_time_now (vam);
+
+ print (vam->ofp, "%d routes in %.6f secs, %.2f routes/sec", count,
+ after - before, count / (after - before));
+ }
+ else
+ {
+ int ret;
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+ }
+ /* Return the good/bad news */
+ return (vam->retval);
+}
+
+static int
+api_l2fib_flush_int (vat_main_t *vam)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t *i = vam->input;
+ vl_api_l2fib_flush_int_t *mp;
+ u32 sw_if_index = ~0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ ;
+ else
+ break;
+ }
+
+ if (sw_if_index == ~0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (L2FIB_FLUSH_INT, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_l2_fib_clear_table (vat_main_t *vam)
+{
+ vl_api_l2_fib_clear_table_t *mp;
+ int ret;
+
+ M (L2_FIB_CLEAR_TABLE, mp);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_bridge_domain_set_mac_age (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_bridge_domain_set_mac_age_t *mp;
+ u32 bd_id = ~0;
+ u32 mac_age = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bd_id %d", &bd_id))
+ ;
+ else if (unformat (i, "mac-age %d", &mac_age))
+ ;
+ else
+ break;
+ }
+
+ if (bd_id == ~0)
+ {
+ errmsg ("missing bridge domain");
+ return -99;
+ }
+
+ if (mac_age > 255)
+ {
+ errmsg ("mac age must be less than 256 ");
+ return -99;
+ }
+
+ M (BRIDGE_DOMAIN_SET_MAC_AGE, mp);
+
+ mp->bd_id = htonl (bd_id);
+ mp->mac_age = (u8) mac_age;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_l2fib_set_scan_delay (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_want_l2_macs_events2 (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_l2_flags (vat_main_t *vam)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t *i = vam->input;
+ vl_api_l2_flags_t *mp;
+ u32 sw_if_index;
+ u32 flags = 0;
+ u8 sw_if_index_set = 0;
+ u8 is_set = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if"))
+ {
+ if (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ sw_if_index_set = 1;
+ }
+ else
+ break;
+ }
+ else if (unformat (i, "learn"))
+ flags |= L2_LEARN;
+ else if (unformat (i, "forward"))
+ flags |= L2_FWD;
+ else if (unformat (i, "flood"))
+ flags |= L2_FLOOD;
+ else if (unformat (i, "uu-flood"))
+ flags |= L2_UU_FLOOD;
+ else if (unformat (i, "arp-term"))
+ flags |= L2_ARP_TERM;
+ else if (unformat (i, "off"))
+ is_set = 0;
+ else if (unformat (i, "disable"))
+ is_set = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ M (L2_FLAGS, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->feature_bitmap = ntohl (flags);
+ mp->is_set = is_set;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_l2_flags_reply_t_handler (vl_api_l2_flags_reply_t *mp)
+{
+ vat_main_t *vam = l2_test_main.vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+}
+
+static int
+api_l2fib_flush_bd (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_l2fib_flush_bd_t *mp;
+ u32 bd_id = ~0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bd_id %d", &bd_id))
+ ;
+ else
+ break;
+ }
+
+ if (bd_id == ~0)
+ {
+ errmsg ("missing bridge domain");
+ return -99;
+ }
+
+ M (L2FIB_FLUSH_BD, mp);
+
+ mp->bd_id = htonl (bd_id);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_bridge_domain_add_del (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_bridge_domain_add_del_t *mp;
+ u32 bd_id = ~0;
+ u8 is_add = 1;
+ u32 flood = 1, forward = 1, learn = 1, uu_flood = 1, arp_term = 0;
+ u8 *bd_tag = NULL;
+ u32 mac_age = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bd_id %d", &bd_id))
+ ;
+ else if (unformat (i, "flood %d", &flood))
+ ;
+ else if (unformat (i, "uu-flood %d", &uu_flood))
+ ;
+ else if (unformat (i, "forward %d", &forward))
+ ;
+ else if (unformat (i, "learn %d", &learn))
+ ;
+ else if (unformat (i, "arp-term %d", &arp_term))
+ ;
+ else if (unformat (i, "mac-age %d", &mac_age))
+ ;
+ else if (unformat (i, "bd-tag %s", &bd_tag))
+ ;
+ else if (unformat (i, "del"))
+ {
+ is_add = 0;
+ flood = uu_flood = forward = learn = 0;
+ }
+ else
+ break;
+ }
+
+ if (bd_id == ~0)
+ {
+ errmsg ("missing bridge domain");
+ ret = -99;
+ goto done;
+ }
+
+ if (mac_age > 255)
+ {
+ errmsg ("mac age must be less than 256 ");
+ ret = -99;
+ goto done;
+ }
+
+ if ((bd_tag) && (vec_len (bd_tag) > 63))
+ {
+ errmsg ("bd-tag cannot be longer than 63");
+ ret = -99;
+ goto done;
+ }
+
+ M (BRIDGE_DOMAIN_ADD_DEL, mp);
+
+ mp->bd_id = ntohl (bd_id);
+ mp->flood = flood;
+ mp->uu_flood = uu_flood;
+ mp->forward = forward;
+ mp->learn = learn;
+ mp->arp_term = arp_term;
+ mp->is_add = is_add;
+ mp->mac_age = (u8) mac_age;
+ if (bd_tag)
+ {
+ clib_memcpy (mp->bd_tag, bd_tag, vec_len (bd_tag));
+ mp->bd_tag[vec_len (bd_tag)] = 0;
+ }
+ S (mp);
+ W (ret);
+
+done:
+ vec_free (bd_tag);
+ return ret;
+}
+
+static int
+api_bridge_domain_add_del_v2 (vat_main_t *vam)
+{
+ return -1;
+}
+
+static void
+vl_api_bridge_domain_add_del_v2_reply_t_handler (
+ vl_api_bridge_domain_add_del_v2_reply_t *mp)
+{
+}
+
+#define foreach_pbb_vtr_op \
+ _ ("disable", L2_VTR_DISABLED) \
+ _ ("pop", L2_VTR_POP_2) \
+ _ ("push", L2_VTR_PUSH_2)
+
+static int
+api_l2_interface_pbb_tag_rewrite (vat_main_t *vam)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t *i = vam->input;
+ vl_api_l2_interface_pbb_tag_rewrite_t *mp;
+ u32 sw_if_index = ~0, vtr_op = ~0;
+ u16 outer_tag = ~0;
+ u8 dmac[6], smac[6];
+ u8 dmac_set = 0, smac_set = 0;
+ u16 vlanid = 0;
+ u32 sid = ~0;
+ u32 tmp;
+ int ret;
+
+ /* Shut up coverity */
+ clib_memset (dmac, 0, sizeof (dmac));
+ clib_memset (smac, 0, sizeof (smac));
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
+ ;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ ;
+ else if (unformat (i, "vtr_op %d", &vtr_op))
+ ;
+#define _(n, v) \
+ else if (unformat (i, n)) { vtr_op = v; }
+ foreach_pbb_vtr_op
+#undef _
+ else if (unformat (i, "translate_pbb_stag"))
+ {
+ if (unformat (i, "%d", &tmp))
+ {
+ vtr_op = L2_VTR_TRANSLATE_2_1;
+ outer_tag = tmp;
+ }
+ else
+ {
+ errmsg (
+ "translate_pbb_stag operation requires outer tag definition");
+ return -99;
+ }
+ }
+ else if (unformat (i, "dmac %U", unformat_ethernet_address, dmac))
+ dmac_set++;
+ else if (unformat (i, "smac %U", unformat_ethernet_address, smac))
+ smac_set++;
+ else if (unformat (i, "sid %d", &sid));
+ else if (unformat (i, "vlanid %d", &tmp)) vlanid = tmp;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if ((sw_if_index == ~0) || (vtr_op == ~0))
+ {
+ errmsg ("missing sw_if_index or vtr operation");
+ return -99;
+ }
+ if (((vtr_op == L2_VTR_PUSH_2) || (vtr_op == L2_VTR_TRANSLATE_2_2)) &&
+ ((dmac_set == 0) || (smac_set == 0) || (sid == ~0)))
+ {
+ errmsg ("push and translate_qinq operations require dmac, smac, sid and "
+ "optionally vlanid");
+ return -99;
+ }
+
+ M (L2_INTERFACE_PBB_TAG_REWRITE, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->vtr_op = ntohl (vtr_op);
+ mp->outer_tag = ntohs (outer_tag);
+ clib_memcpy (mp->b_dmac, dmac, sizeof (dmac));
+ clib_memcpy (mp->b_smac, smac, sizeof (smac));
+ mp->b_vlanid = ntohs (vlanid);
+ mp->i_sid = ntohl (sid);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_set_l2_xconnect (vat_main_t *vam)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_l2_xconnect_t *mp;
+ u32 rx_sw_if_index;
+ u8 rx_sw_if_index_set = 0;
+ u32 tx_sw_if_index;
+ u8 tx_sw_if_index_set = 0;
+ u8 enable = 1;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "rx_sw_if_index %d", &rx_sw_if_index))
+ rx_sw_if_index_set = 1;
+ else if (unformat (i, "tx_sw_if_index %d", &tx_sw_if_index))
+ tx_sw_if_index_set = 1;
+ else if (unformat (i, "rx"))
+ {
+ if (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_vnet_sw_interface, vnm,
+ &rx_sw_if_index))
+ rx_sw_if_index_set = 1;
+ }
+ else
+ break;
+ }
+ else if (unformat (i, "tx"))
+ {
+ if (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_vnet_sw_interface, vnm,
+ &tx_sw_if_index))
+ tx_sw_if_index_set = 1;
+ }
+ else
+ break;
+ }
+ else if (unformat (i, "enable"))
+ enable = 1;
+ else if (unformat (i, "disable"))
+ enable = 0;
+ else
+ break;
+ }
+
+ if (rx_sw_if_index_set == 0)
+ {
+ errmsg ("missing rx interface name or rx_sw_if_index");
+ return -99;
+ }
+
+ if (enable && (tx_sw_if_index_set == 0))
+ {
+ errmsg ("missing tx interface name or tx_sw_if_index");
+ return -99;
+ }
+
+ M (SW_INTERFACE_SET_L2_XCONNECT, mp);
+
+ mp->rx_sw_if_index = ntohl (rx_sw_if_index);
+ mp->tx_sw_if_index = ntohl (tx_sw_if_index);
+ mp->enable = enable;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_l2_interface_efp_filter (vat_main_t *vam)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t *i = vam->input;
+ vl_api_l2_interface_efp_filter_t *mp;
+ u32 sw_if_index;
+ u8 enable = 1;
+ u8 sw_if_index_set = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "enable"))
+ enable = 1;
+ else if (unformat (i, "disable"))
+ enable = 0;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing sw_if_index");
+ return -99;
+ }
+
+ M (L2_INTERFACE_EFP_FILTER, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->enable_disable = enable;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_bd_ip_mac_details_t_handler (vl_api_bd_ip_mac_details_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+
+ print (vam->ofp, "\n%-5d %U %U", ntohl (mp->entry.bd_id),
+ format_vl_api_mac_address, mp->entry.mac, format_vl_api_address,
+ &mp->entry.ip);
+}
+
+static void
+vl_api_bvi_create_reply_t_handler (vl_api_bvi_create_reply_t *mp)
+{
+}
+
+static int
+api_sw_interface_set_l2_bridge (vat_main_t *vam)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_l2_bridge_t *mp;
+ vl_api_l2_port_type_t port_type;
+ u32 rx_sw_if_index;
+ u8 rx_sw_if_index_set = 0;
+ u32 bd_id;
+ u8 bd_id_set = 0;
+ u32 shg = 0;
+ u8 enable = 1;
+ int ret;
+
+ port_type = L2_API_PORT_TYPE_NORMAL;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "sw_if_index %d", &rx_sw_if_index))
+ rx_sw_if_index_set = 1;
+ else if (unformat (i, "bd_id %d", &bd_id))
+ bd_id_set = 1;
+ else if (unformat (i, "%U", unformat_vnet_sw_interface, vnm,
+ &rx_sw_if_index))
+ rx_sw_if_index_set = 1;
+ else if (unformat (i, "shg %d", &shg))
+ ;
+ else if (unformat (i, "bvi"))
+ port_type = L2_API_PORT_TYPE_BVI;
+ else if (unformat (i, "uu-fwd"))
+ port_type = L2_API_PORT_TYPE_UU_FWD;
+ else if (unformat (i, "enable"))
+ enable = 1;
+ else if (unformat (i, "disable"))
+ enable = 0;
+ else
+ break;
+ }
+
+ if (rx_sw_if_index_set == 0)
+ {
+ errmsg ("missing rx interface name or sw_if_index");
+ return -99;
+ }
+
+ if (enable && (bd_id_set == 0))
+ {
+ errmsg ("missing bridge domain");
+ return -99;
+ }
+
+ M (SW_INTERFACE_SET_L2_BRIDGE, mp);
+
+ mp->rx_sw_if_index = ntohl (rx_sw_if_index);
+ mp->bd_id = ntohl (bd_id);
+ mp->shg = (u8) shg;
+ mp->port_type = ntohl (port_type);
+ mp->enable = enable;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_sw_interface_set_vpath (vat_main_t *vam)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t *i = vam->input;
+ vl_api_sw_interface_set_vpath_t *mp;
+ u32 sw_if_index = 0;
+ u8 sw_if_index_set = 0;
+ u8 is_enable = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "enable"))
+ is_enable = 1;
+ else if (unformat (i, "disable"))
+ is_enable = 0;
+ else
+ break;
+ }
+
+ if (sw_if_index_set == 0)
+ {
+ errmsg ("missing interface name or sw_if_index");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SW_INTERFACE_SET_VPATH, mp);
+
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->enable = is_enable;
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_l2_patch_add_del (vat_main_t *vam)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t *i = vam->input;
+ vl_api_l2_patch_add_del_t *mp;
+ u32 rx_sw_if_index;
+ u8 rx_sw_if_index_set = 0;
+ u32 tx_sw_if_index;
+ u8 tx_sw_if_index_set = 0;
+ u8 is_add = 1;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "rx_sw_if_index %d", &rx_sw_if_index))
+ rx_sw_if_index_set = 1;
+ else if (unformat (i, "tx_sw_if_index %d", &tx_sw_if_index))
+ tx_sw_if_index_set = 1;
+ else if (unformat (i, "rx"))
+ {
+ if (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_vnet_sw_interface, vnm,
+ &rx_sw_if_index))
+ rx_sw_if_index_set = 1;
+ }
+ else
+ break;
+ }
+ else if (unformat (i, "tx"))
+ {
+ if (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_vnet_sw_interface, vnm,
+ &tx_sw_if_index))
+ tx_sw_if_index_set = 1;
+ }
+ else
+ break;
+ }
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (rx_sw_if_index_set == 0)
+ {
+ errmsg ("missing rx interface name or rx_sw_if_index");
+ return -99;
+ }
+
+ if (tx_sw_if_index_set == 0)
+ {
+ errmsg ("missing tx interface name or tx_sw_if_index");
+ return -99;
+ }
+
+ M (L2_PATCH_ADD_DEL, mp);
+
+ mp->rx_sw_if_index = ntohl (rx_sw_if_index);
+ mp->tx_sw_if_index = ntohl (tx_sw_if_index);
+ mp->is_add = is_add;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_bridge_flags_reply_t_handler (vl_api_bridge_flags_reply_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ vam->result_ready = 1;
+ }
+}
+
+#define foreach_vtr_op \
+ _ ("disable", L2_VTR_DISABLED) \
+ _ ("push-1", L2_VTR_PUSH_1) \
+ _ ("push-2", L2_VTR_PUSH_2) \
+ _ ("pop-1", L2_VTR_POP_1) \
+ _ ("pop-2", L2_VTR_POP_2) \
+ _ ("translate-1-1", L2_VTR_TRANSLATE_1_1) \
+ _ ("translate-1-2", L2_VTR_TRANSLATE_1_2) \
+ _ ("translate-2-1", L2_VTR_TRANSLATE_2_1) \
+ _ ("translate-2-2", L2_VTR_TRANSLATE_2_2)
+
+static int
+api_l2_interface_vlan_tag_rewrite (vat_main_t *vam)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ unformat_input_t *i = vam->input;
+ vl_api_l2_interface_vlan_tag_rewrite_t *mp;
+ u32 sw_if_index;
+ u8 sw_if_index_set = 0;
+ u8 vtr_op_set = 0;
+ u32 vtr_op = 0;
+ u32 push_dot1q = 1;
+ u32 tag1 = ~0;
+ u32 tag2 = ~0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "%U", unformat_vnet_sw_interface, vnm, &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "vtr_op %d", &vtr_op))
+ vtr_op_set = 1;
+#define _(n, v) \
+ else if (unformat (i, n)) \
+ { \
+ vtr_op = v; \
+ vtr_op_set = 1; \
+ }
+ foreach_vtr_op
+#undef _
+ else if (unformat (i, "push_dot1q %d", &push_dot1q));
+ else if (unformat (i, "tag1 %d", &tag1));
+ else if (unformat (i, "tag2 %d", &tag2));
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if ((sw_if_index_set == 0) || (vtr_op_set == 0))
+ {
+ errmsg ("missing vtr operation or sw_if_index");
+ return -99;
+ }
+
+ M (L2_INTERFACE_VLAN_TAG_REWRITE, mp);
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->vtr_op = ntohl (vtr_op);
+ mp->push_dot1q = ntohl (push_dot1q);
+ mp->tag1 = ntohl (tag1);
+ mp->tag2 = ntohl (tag2);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_bridge_domain_set_learn_limit (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_bd_ip_mac_add_del (vat_main_t *vam)
+{
+ vl_api_address_t ip = VL_API_ZERO_ADDRESS;
+ vl_api_mac_address_t mac = { 0 };
+ unformat_input_t *i = vam->input;
+ vl_api_bd_ip_mac_add_del_t *mp;
+ u32 bd_id;
+ u8 is_add = 1;
+ u8 bd_id_set = 0;
+ u8 ip_set = 0;
+ u8 mac_set = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bd_id %d", &bd_id))
+ {
+ bd_id_set++;
+ }
+ else if (unformat (i, "%U", unformat_vl_api_address, &ip))
+ {
+ ip_set++;
+ }
+ else if (unformat (i, "%U", unformat_vl_api_mac_address, &mac))
+ {
+ mac_set++;
+ }
+ else if (unformat (i, "del"))
+ is_add = 0;
+ else
+ break;
+ }
+
+ if (bd_id_set == 0)
+ {
+ errmsg ("missing bridge domain");
+ return -99;
+ }
+ else if (ip_set == 0)
+ {
+ errmsg ("missing IP address");
+ return -99;
+ }
+ else if (mac_set == 0)
+ {
+ errmsg ("missing MAC address");
+ return -99;
+ }
+
+ M (BD_IP_MAC_ADD_DEL, mp);
+
+ mp->entry.bd_id = ntohl (bd_id);
+ mp->is_add = is_add;
+
+ clib_memcpy (&mp->entry.ip, &ip, sizeof (ip));
+ clib_memcpy (&mp->entry.mac, &mac, sizeof (mac));
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_bridge_domain_details_t_handler (vl_api_bridge_domain_details_t *mp)
+{
+ vat_main_t *vam = l2_test_main.vat_main;
+ u32 n_sw_ifs = ntohl (mp->n_sw_ifs);
+ int i;
+
+ print (vam->ofp, "\n%-3s %-3s %-3s %-3s %-3s %-6s %-3s", " ID", "LRN", "FWD",
+ "FLD", "BVI", "UU-FWD", "#IF");
+
+ print (vam->ofp, "%3d %3d %3d %3d %3d %6d %3d", ntohl (mp->bd_id), mp->learn,
+ mp->forward, mp->flood, ntohl (mp->bvi_sw_if_index),
+ ntohl (mp->uu_fwd_sw_if_index), n_sw_ifs);
+
+ if (n_sw_ifs)
+ {
+ vl_api_bridge_domain_sw_if_t *sw_ifs;
+ print (vam->ofp, "\n\n%s %s %s", "sw_if_index", "SHG",
+ "Interface Name");
+
+ sw_ifs = mp->sw_if_details;
+ for (i = 0; i < n_sw_ifs; i++)
+ {
+ u8 *sw_if_name = 0;
+ u32 sw_if_index;
+ hash_pair_t *p;
+
+ sw_if_index = ntohl (sw_ifs->sw_if_index);
+
+ hash_foreach_pair (p, vam->sw_if_index_by_interface_name, ({
+ if ((u32) p->value[0] == sw_if_index)
+ {
+ sw_if_name = (u8 *) (p->key);
+ break;
+ }
+ }));
+ print (vam->ofp, "%7d %3d %s", sw_if_index, sw_ifs->shg,
+ sw_if_name ? (char *) sw_if_name : "sw_if_index not found!");
+
+ sw_ifs++;
+ }
+ }
+}
+
+static int
+api_bridge_domain_dump (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_bridge_domain_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ u32 bd_id = ~0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bd_id %d", &bd_id))
+ ;
+ else
+ break;
+ }
+
+ M (BRIDGE_DOMAIN_DUMP, mp);
+ mp->bd_id = ntohl (bd_id);
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ PING (&l2_test_main, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_bridge_domain_set_default_learn_limit (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_bd_ip_mac_flush (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_bd_ip_mac_flush_t *mp;
+ u32 bd_id;
+ u8 bd_id_set = 0;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bd_id %d", &bd_id))
+ {
+ bd_id_set++;
+ }
+ else
+ break;
+ }
+
+ if (bd_id_set == 0)
+ {
+ errmsg ("missing bridge domain");
+ return -99;
+ }
+
+ M (BD_IP_MAC_FLUSH, mp);
+
+ mp->bd_id = ntohl (bd_id);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_bd_ip_mac_dump (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_bd_ip_mac_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+ u32 bd_id;
+ u8 bd_id_set = 0;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bd_id %d", &bd_id))
+ {
+ bd_id_set++;
+ }
+ else
+ break;
+ }
+
+ fformat (vam->ofp, "\n%-5s %-7s %-20s %-30s", "bd_id", "is_ipv6",
+ "mac_address", "ip_address");
+
+ /* Dump Bridge Domain Ip to Mac entries */
+ M (BD_IP_MAC_DUMP, mp);
+
+ if (bd_id_set)
+ mp->bd_id = htonl (bd_id);
+ else
+ mp->bd_id = ~0;
+
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ PING (&l2_test_main, mp_ping);
+ S (mp_ping);
+
+ W (ret);
+ return ret;
+}
+
+static int
+api_bvi_create (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_bvi_delete (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_bridge_flags (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_bridge_flags_t *mp;
+ u32 bd_id;
+ u8 bd_id_set = 0;
+ u8 is_set = 1;
+ bd_flags_t flags = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bd_id %d", &bd_id))
+ bd_id_set = 1;
+ else if (unformat (i, "learn"))
+ flags |= BRIDGE_API_FLAG_LEARN;
+ else if (unformat (i, "forward"))
+ flags |= BRIDGE_API_FLAG_FWD;
+ else if (unformat (i, "flood"))
+ flags |= BRIDGE_API_FLAG_FLOOD;
+ else if (unformat (i, "uu-flood"))
+ flags |= BRIDGE_API_FLAG_UU_FLOOD;
+ else if (unformat (i, "arp-term"))
+ flags |= BRIDGE_API_FLAG_ARP_TERM;
+ else if (unformat (i, "off"))
+ is_set = 0;
+ else if (unformat (i, "disable"))
+ is_set = 0;
+ else
+ break;
+ }
+
+ if (bd_id_set == 0)
+ {
+ errmsg ("missing bridge domain");
+ return -99;
+ }
+
+ M (BRIDGE_FLAGS, mp);
+
+ mp->bd_id = ntohl (bd_id);
+ mp->flags = ntohl (flags);
+ mp->is_set = is_set;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+#include <vnet/l2/l2.api_test.c>
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/l2/l2_uu_fwd.c b/src/vnet/l2/l2_uu_fwd.c
index fb3571d159c..4a510b658d7 100644
--- a/src/vnet/l2/l2_uu_fwd.c
+++ b/src/vnet/l2/l2_uu_fwd.c
@@ -211,7 +211,6 @@ VLIB_NODE_FN (l2_uu_fwd_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_uu_fwd_node) = {
.name = "l2-uu-fwd",
.vector_size = sizeof (u32),
@@ -228,7 +227,6 @@ VLIB_REGISTER_NODE (l2_uu_fwd_node) = {
[L2_UU_FWD_NEXT_L2_OUTPUT] = "l2-output",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_vtr.c b/src/vnet/l2/l2_vtr.c
index bfd1dcb9280..4053c0fc1cb 100644
--- a/src/vnet/l2/l2_vtr.c
+++ b/src/vnet/l2/l2_vtr.c
@@ -670,13 +670,11 @@ done:
* @cliexend
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_vtr_cli, static) = {
.path = "set interface l2 tag-rewrite",
.short_help = "set interface l2 tag-rewrite <interface> [disable | pop {1|2} | push {dot1q|dot1ad} <tag> <tag>]",
.function = int_l2_vtr,
};
-/* *INDENT-ON* */
/**
* Get pbb tag rewrite on the given interface.
@@ -816,13 +814,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (int_l2_pbb_vtr_cli, static) = {
.path = "set interface l2 pbb-tag-rewrite",
.short_help = "set interface l2 pbb-tag-rewrite <interface> [disable | pop | push | translate_pbb_stag <outer_tag> dmac <address> smac <address> s_id <nn> [b_vlanid <nn>]]",
.function = int_l2_pbb_vtr,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/l2/l2_xcrw.c b/src/vnet/l2/l2_xcrw.c
index c2c325a796f..9edd8b6ba57 100644
--- a/src/vnet/l2/l2_xcrw.c
+++ b/src/vnet/l2/l2_xcrw.c
@@ -238,7 +238,6 @@ VLIB_NODE_FN (l2_xcrw_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_xcrw_node) = {
.name = "l2-xcrw",
.vector_size = sizeof (u32),
@@ -255,7 +254,6 @@ VLIB_REGISTER_NODE (l2_xcrw_node) = {
[L2_XCRW_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
clib_error_t *
@@ -279,18 +277,17 @@ format_xcrw_name (u8 * s, va_list * args)
return format (s, "xcrw%d", dev_instance);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (xcrw_device_class,static) = {
.name = "Xcrw",
.format_device_name = format_xcrw_name,
};
-/* *INDENT-ON* */
/* Create a sham tunnel interface and return its sw_if_index */
static u32
create_xcrw_interface (vlib_main_t * vm)
{
vnet_main_t *vnm = vnet_get_main ();
+ vnet_eth_interface_registration_t eir = {};
static u32 instance;
u8 address[6];
u32 hw_if_index;
@@ -301,10 +298,9 @@ create_xcrw_interface (vlib_main_t * vm)
clib_memset (address, 0, sizeof (address));
address[2] = 0x12;
- /* can returns error iff phy != 0 */
- (void) ethernet_register_interface
- (vnm, xcrw_device_class.index, instance++, address, &hw_if_index,
- /* flag change */ 0);
+ eir.dev_class_index = xcrw_device_class.index;
+ eir.dev_instance = instance++, eir.address = address;
+ hw_if_index = vnet_eth_register_interface (vnm, &eir);
hi = vnet_get_hw_interface (vnm, hw_if_index);
sw_if_index = hi->sw_if_index;
@@ -496,7 +492,6 @@ done:
* @todo This is incomplete. This needs a detailed description and a
* practical example.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_l2_xcrw_command, static) = {
.path = "set interface l2 xcrw",
.short_help =
@@ -504,7 +499,6 @@ VLIB_CLI_COMMAND (set_l2_xcrw_command, static) = {
" [del] [tx-fib-id <id>] [ipv6] rw <hex-bytes>",
.function = set_l2_xcrw_command_fn,
};
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
@@ -568,12 +562,10 @@ show_l2xcrw_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "%U", format_l2xcrw, 0, 0);
- /* *INDENT-OFF* */
pool_foreach (t, xcm->tunnels)
{
vlib_cli_output (vm, "%U", format_l2xcrw, vnm, t);
}
- /* *INDENT-ON* */
return 0;
}
@@ -585,13 +577,11 @@ show_l2xcrw_command_fn (vlib_main_t * vm,
* @todo This is incomplete. This needs a detailed description and a
* practical example.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_l2xcrw_command, static) = {
.path = "show l2xcrw",
.short_help = "show l2xcrw",
.function = show_l2xcrw_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/lawful-intercept/lawful_intercept.c b/src/vnet/lawful-intercept/lawful_intercept.c
deleted file mode 100644
index fff44fc3a67..00000000000
--- a/src/vnet/lawful-intercept/lawful_intercept.c
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/lawful-intercept/lawful_intercept.h>
-
-li_main_t li_main;
-
-static clib_error_t *
-set_li_command_fn (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- li_main_t *lm = &li_main;
- ip4_address_t collector;
- u8 collector_set = 0;
- ip4_address_t src;
- u8 src_set = 0;
- u32 tmp;
- u16 udp_port = 0;
- u8 is_add = 1;
- int i;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "collector %U", unformat_ip4_address, &collector))
- collector_set = 1;
- if (unformat (input, "src %U", unformat_ip4_address, &src))
- src_set = 1;
- else if (unformat (input, "udp-port %d", &tmp))
- udp_port = tmp;
- else if (unformat (input, "del"))
- is_add = 0;
- else
- break;
- }
-
- if (collector_set == 0)
- return clib_error_return (0, "collector must be set...");
- if (src_set == 0)
- return clib_error_return (0, "src must be set...");
- if (udp_port == 0)
- return clib_error_return (0, "udp-port must be set...");
-
- if (is_add == 1)
- {
- for (i = 0; i < vec_len (lm->collectors); i++)
- {
- if (lm->collectors[i].as_u32 == collector.as_u32)
- {
- if (lm->ports[i] == udp_port)
- return clib_error_return (
- 0, "collector %U:%d already configured", format_ip4_address,
- &collector, udp_port);
- else
- return clib_error_return (
- 0, "collector %U already configured with port %d",
- format_ip4_address, &collector, (int) (lm->ports[i]));
- }
- }
- vec_add1 (lm->collectors, collector);
- vec_add1 (lm->ports, udp_port);
- vec_add1 (lm->src_addrs, src);
- return 0;
- }
- else
- {
- for (i = 0; i < vec_len (lm->collectors); i++)
- {
- if ((lm->collectors[i].as_u32 == collector.as_u32)
- && lm->ports[i] == udp_port)
- {
- vec_delete (lm->collectors, 1, i);
- vec_delete (lm->ports, 1, i);
- vec_delete (lm->src_addrs, 1, i);
- return 0;
- }
- }
- return clib_error_return (0, "collector %U:%d not configured",
- &collector, udp_port);
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_li_command, static) = {
- .path = "set li",
- .short_help =
- "set li src <ip4-address> collector <ip4-address> udp-port <nnnn>",
- .function = set_li_command_fn,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-li_init (vlib_main_t * vm)
-{
- li_main_t *lm = &li_main;
-
- lm->vlib_main = vm;
- lm->vnet_main = vnet_get_main ();
- lm->hit_node_index = li_hit_node.index;
- return 0;
-}
-
-VLIB_INIT_FUNCTION (li_init);
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/lawful-intercept/node.c b/src/vnet/lawful-intercept/node.c
deleted file mode 100644
index c5328e672d0..00000000000
--- a/src/vnet/lawful-intercept/node.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <vppinfra/error.h>
-
-#include <vnet/lawful-intercept/lawful_intercept.h>
-
-#include <vppinfra/error.h>
-#include <vppinfra/elog.h>
-
-extern vlib_node_registration_t li_hit_node;
-
-typedef struct
-{
- u32 next_index;
-} li_hit_trace_t;
-
-/* packet trace format function */
-static u8 *
-format_li_hit_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- li_hit_trace_t *t = va_arg (*args, li_hit_trace_t *);
-
- s = format (s, "LI_HIT: next index %d", t->next_index);
-
- return s;
-}
-
-#define foreach_li_hit_error \
-_(HITS, "LI packets processed") \
-_(NO_COLLECTOR, "No collector configured") \
-_(BUFFER_ALLOCATION_FAILURE, "Buffer allocation failure")
-
-typedef enum
-{
-#define _(sym,str) LI_HIT_ERROR_##sym,
- foreach_li_hit_error
-#undef _
- LI_HIT_N_ERROR,
-} li_hit_error_t;
-
-static char *li_hit_error_strings[] = {
-#define _(sym,string) string,
- foreach_li_hit_error
-#undef _
-};
-
-typedef enum
-{
- LI_HIT_NEXT_ETHERNET,
- LI_HIT_N_NEXT,
-} li_hit_next_t;
-
-VLIB_NODE_FN (li_hit_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
-{
- u32 n_left_from, *from, *to_next;
- li_hit_next_t next_index;
- vlib_frame_t *int_frame = 0;
- u32 *to_int_next = 0;
- li_main_t *lm = &li_main;
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- if (PREDICT_FALSE (vec_len (lm->collectors) == 0))
- {
- vlib_node_increment_counter (vm, li_hit_node.index,
- LI_HIT_ERROR_NO_COLLECTOR, n_left_from);
- }
- else
- {
- /* The intercept frame... */
- int_frame = vlib_get_frame_to_node (vm, ip4_lookup_node.index);
- to_int_next = vlib_frame_vector_args (int_frame);
- }
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-#if 0
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- u32 next0 = LI_HIT_NEXT_INTERFACE_OUTPUT;
- u32 next1 = LI_HIT_NEXT_INTERFACE_OUTPUT;
- u32 sw_if_index0, sw_if_index1;
- u8 tmp0[6], tmp1[6];
- ethernet_header_t *en0, *en1;
- u32 bi0, bi1;
- vlib_buffer_t *b0, *b1;
-
- /* Prefetch next iteration. */
- {
- vlib_buffer_t *p2, *p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
-
- clib_prefetch_store (p2->data);
- clib_prefetch_store (p3->data);
- }
-
- /* speculatively enqueue b0 and b1 to the current next frame */
- to_next[0] = bi0 = from[0];
- to_next[1] = bi1 = from[1];
- from += 2;
- to_next += 2;
- n_left_from -= 2;
- n_left_to_next -= 2;
-
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
-
- /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
- ASSERT (b0->current_data == 0);
- ASSERT (b1->current_data == 0);
-
- en0 = vlib_buffer_get_current (b0);
- en1 = vlib_buffer_get_current (b1);
-
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
-
- /* Send pkt back out the RX interface */
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = sw_if_index0;
- vnet_buffer (b1)->sw_if_index[VLIB_TX] = sw_if_index1;
-
- /* $$$$$ End of processing 2 x packets $$$$$ */
-
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
- {
- if (b0->flags & VLIB_BUFFER_IS_TRACED)
- {
- li_hit_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
- t->sw_if_index = sw_if_index0;
- t->next_index = next0;
- }
- if (b1->flags & VLIB_BUFFER_IS_TRACED)
- {
- li_hit_trace_t *t =
- vlib_add_trace (vm, node, b1, sizeof (*t));
- t->sw_if_index = sw_if_index1;
- t->next_index = next1;
- }
- }
-
- /* verify speculative enqueues, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-#endif /* $$$ dual-loop off */
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- vlib_buffer_t *b0;
- vlib_buffer_t *c0;
- ip4_udp_header_t *iu0;
- ip4_header_t *ip0;
- udp_header_t *udp0;
- u32 next0 = LI_HIT_NEXT_ETHERNET;
-
- /* speculatively enqueue b0 to the current next frame */
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- if (PREDICT_TRUE (to_int_next != 0))
- {
- /* Make an intercept copy. This can fail. */
- c0 = vlib_buffer_copy (vm, b0);
-
- if (PREDICT_FALSE (c0 == 0))
- {
- vlib_node_increment_counter
- (vm, node->node_index,
- LI_HIT_ERROR_BUFFER_ALLOCATION_FAILURE, 1);
- goto skip;
- }
-
- vlib_buffer_advance (c0, -sizeof (*iu0));
-
- iu0 = vlib_buffer_get_current (c0);
- ip0 = &iu0->ip4;
-
- ip0->ip_version_and_header_length = 0x45;
- ip0->ttl = 254;
- ip0->protocol = IP_PROTOCOL_UDP;
-
- ip0->src_address.as_u32 = lm->src_addrs[0].as_u32;
- ip0->dst_address.as_u32 = lm->collectors[0].as_u32;
- ip0->length = vlib_buffer_length_in_chain (vm, c0);
- ip0->checksum = ip4_header_checksum (ip0);
-
- udp0 = &iu0->udp;
- udp0->src_port = udp0->dst_port =
- clib_host_to_net_u16 (lm->ports[0]);
- udp0->checksum = 0;
- udp0->length =
- clib_net_to_host_u16 (vlib_buffer_length_in_chain (vm, b0));
-
- to_int_next[0] = vlib_get_buffer_index (vm, c0);
- to_int_next++;
- }
-
- skip:
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- li_hit_trace_t *t = vlib_add_trace (vm, node, b0, sizeof (*t));
- t->next_index = next0;
- }
-
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- if (int_frame)
- {
- int_frame->n_vectors = frame->n_vectors;
- vlib_put_frame_to_node (vm, ip4_lookup_node.index, int_frame);
- }
-
- vlib_node_increment_counter (vm, li_hit_node.index,
- LI_HIT_ERROR_HITS, frame->n_vectors);
- return frame->n_vectors;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (li_hit_node) = {
- .name = "li-hit",
- .vector_size = sizeof (u32),
- .format_trace = format_li_hit_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(li_hit_error_strings),
- .error_strings = li_hit_error_strings,
-
- .n_next_nodes = LI_HIT_N_NEXT,
-
- /* edit / add dispositions here */
- .next_nodes = {
- [LI_HIT_NEXT_ETHERNET] = "ethernet-input-not-l2",
- },
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/llc/llc.c b/src/vnet/llc/llc.c
index 4a7fdf9d9ba..4cbf17d48df 100644
--- a/src/vnet/llc/llc.c
+++ b/src/vnet/llc/llc.c
@@ -181,14 +181,12 @@ llc_build_rewrite (vnet_main_t * vnm,
return (rewrite);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (llc_hw_interface_class) = {
.name = "LLC",
.format_header = format_llc_header_with_length,
.unformat_header = unformat_llc_header,
.build_rewrite = llc_build_rewrite,
};
-/* *INDENT-ON* */
static void
add_protocol (llc_main_t * pm, llc_protocol_t protocol, char *protocol_name)
diff --git a/src/vnet/llc/node.c b/src/vnet/llc/node.c
index 086925bd305..d1ee6948269 100644
--- a/src/vnet/llc/node.c
+++ b/src/vnet/llc/node.c
@@ -246,7 +246,6 @@ static char *llc_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (llc_input_node) = {
.function = llc_input,
.name = "llc-input",
@@ -267,7 +266,6 @@ VLIB_REGISTER_NODE (llc_input_node) = {
.format_trace = format_llc_input_trace,
.unformat_buffer = unformat_llc_header,
};
-/* *INDENT-ON* */
static void
llc_setup_node (vlib_main_t *vm, u32 node_index)
diff --git a/src/vnet/mfib/.clang-format b/src/vnet/mfib/.clang-format
new file mode 100644
index 00000000000..9d159247d51
--- /dev/null
+++ b/src/vnet/mfib/.clang-format
@@ -0,0 +1,2 @@
+DisableFormat: true
+SortIncludes: false
diff --git a/src/vnet/mfib/ip4_mfib.c b/src/vnet/mfib/ip4_mfib.c
index 9c64c4ae92c..2ad873f82bf 100644
--- a/src/vnet/mfib/ip4_mfib.c
+++ b/src/vnet/mfib/ip4_mfib.c
@@ -42,6 +42,18 @@ static const mfib_prefix_t ip4_specials[] =
.fp_proto = FIB_PROTOCOL_IP4,
},
};
+static const fib_route_path_t ip4_special_path =
+ {
+ .frp_proto = DPO_PROTO_IP4,
+ .frp_addr = {
+ .ip4.data_u32 = 0x0,
+ },
+ .frp_sw_if_index = ~0,
+ .frp_fib_index = ~0,
+ .frp_weight = 1,
+ .frp_flags = FIB_ROUTE_PATH_LOCAL,
+ .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
+ };
static u32
ip4_create_mfib_with_table_id (u32 table_id,
@@ -76,15 +88,6 @@ ip4_create_mfib_with_table_id (u32 table_id,
MFIB_RPF_ID_NONE,
MFIB_ENTRY_FLAG_DROP);
- const fib_route_path_t path = {
- .frp_proto = DPO_PROTO_IP4,
- .frp_addr = zero_addr,
- .frp_sw_if_index = ~0,
- .frp_fib_index = ~0,
- .frp_weight = 1,
- .frp_flags = FIB_ROUTE_PATH_LOCAL,
- .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
- };
int ii;
for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++)
@@ -92,7 +95,8 @@ ip4_create_mfib_with_table_id (u32 table_id,
mfib_table_entry_path_update(mfib_table->mft_index,
&ip4_specials[ii],
MFIB_SOURCE_SPECIAL,
- &path);
+ MFIB_ENTRY_FLAG_NONE,
+ &ip4_special_path);
}
return (mfib_table->mft_index);
@@ -112,11 +116,12 @@ ip4_mfib_table_destroy (ip4_mfib_t *mfib)
MFIB_SOURCE_DEFAULT_ROUTE);
for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++)
- {
- mfib_table_entry_delete(mfib_table->mft_index,
- &ip4_specials[ii],
- MFIB_SOURCE_SPECIAL);
- }
+ {
+ mfib_table_entry_path_remove(mfib_table->mft_index,
+ &ip4_specials[ii],
+ MFIB_SOURCE_SPECIAL,
+ &ip4_special_path);
+ }
/*
* validate no more routes.
@@ -124,6 +129,8 @@ ip4_mfib_table_destroy (ip4_mfib_t *mfib)
ASSERT(0 == mfib_table->mft_total_route_counts);
ASSERT(~0 != mfib_table->mft_table_id);
+ for (u32 i = 0; i < ARRAY_LEN (mfib->fib_entry_by_dst_address); i++)
+ hash_free (mfib->fib_entry_by_dst_address[i]);
hash_unset (ip4_main.mfib_index_by_table_id, mfib_table->mft_table_id);
pool_put(ip4_main.mfibs, mfib_table);
}
@@ -142,7 +149,6 @@ ip4_mfib_interface_enable_disable (u32 sw_if_index, int is_enable)
u32 mfib_index;
int ii;
- vec_validate (ip4_main.mfib_index_by_sw_if_index, sw_if_index);
mfib_index = ip4_mfib_table_get_index_for_sw_if_index(sw_if_index);
for (ii = 0; ii < ARRAY_LEN(ip4_specials); ii++)
@@ -152,6 +158,7 @@ ip4_mfib_interface_enable_disable (u32 sw_if_index, int is_enable)
mfib_table_entry_path_update(mfib_index,
&ip4_specials[ii],
MFIB_SOURCE_SPECIAL,
+ MFIB_ENTRY_FLAG_NONE,
&path);
}
else
@@ -591,12 +598,13 @@ ip4_show_mfib (vlib_main_t * vm,
return 0;
}
+/* clang-format off */
/*?
* This command displays the IPv4 MulticasrFIB Tables (VRF Tables) and
* the route entries for each table.
*
* @note This command will run for a long time when the FIB tables are
- * comprised of millions of entries. For those senarios, consider displaying
+ * comprised of millions of entries. For those scenarios, consider displaying
* a single table or summary mode.
*
* @cliexpar
@@ -633,10 +641,9 @@ ip4_show_mfib (vlib_main_t * vm,
* 32 4
* @cliexend
?*/
-/* *INDENT-OFF* */
+/* clang-format on */
VLIB_CLI_COMMAND (ip4_show_mfib_command, static) = {
.path = "show ip mfib",
.short_help = "show ip mfib [summary] [table <table-id>] [index <fib-id>] [<grp-addr>[/<mask>]] [<grp-addr>] [<src-addr> <grp-addr>]",
.function = ip4_show_mfib,
};
-/* *INDENT-ON* */
diff --git a/src/vnet/mfib/ip6_mfib.c b/src/vnet/mfib/ip6_mfib.c
index bbab7ae2214..ac0dd8275f3 100644
--- a/src/vnet/mfib/ip6_mfib.c
+++ b/src/vnet/mfib/ip6_mfib.c
@@ -183,6 +183,7 @@ ip6_create_mfib_with_table_id (u32 table_id,
mfib_table_entry_path_update(mfib_table->mft_index,
&pfx,
MFIB_SOURCE_SPECIAL,
+ MFIB_ENTRY_FLAG_NONE,
&path_for_us);
}));
@@ -246,7 +247,6 @@ ip6_mfib_interface_enable_disable (u32 sw_if_index, int is_enable)
};
u32 mfib_index;
- vec_validate (ip6_main.mfib_index_by_sw_if_index, sw_if_index);
mfib_index = ip6_mfib_table_get_index_for_sw_if_index(sw_if_index);
if (is_enable)
@@ -256,6 +256,7 @@ ip6_mfib_interface_enable_disable (u32 sw_if_index, int is_enable)
mfib_table_entry_path_update(mfib_index,
&pfx,
MFIB_SOURCE_SPECIAL,
+ MFIB_ENTRY_FLAG_NONE,
&path);
});
}
@@ -731,12 +732,13 @@ ip6_show_mfib (vlib_main_t * vm,
return 0;
}
-/*
+/* clang-format off */
+/*?
* This command displays the IPv6 MulticasrFIB Tables (VRF Tables) and
* the route entries for each table.
*
* @note This command will run for a long time when the FIB tables are
- * comprised of millions of entries. For those senarios, consider displaying
+ * comprised of millions of entries. For those scenarios, consider displaying
* a single table or summary mode.
*
* @cliexpar
@@ -772,14 +774,13 @@ ip6_show_mfib (vlib_main_t * vm,
* 24 2
* 32 4
* @cliexend
- */
-/* *INDENT-OFF* */
+ ?*/
+/* clang-format on */
VLIB_CLI_COMMAND (ip6_show_fib_command, static) = {
.path = "show ip6 mfib",
.short_help = "show ip mfib [summary] [table <table-id>] [index <fib-id>] [<grp-addr>[/<mask>]] [<grp-addr>] [<src-addr> <grp-addr>]",
.function = ip6_show_mfib,
};
-/* *INDENT-ON* */
static clib_error_t *
ip6_mfib_init (vlib_main_t * vm)
diff --git a/src/vnet/mfib/mfib_entry.c b/src/vnet/mfib/mfib_entry.c
index 2d47c3cdd31..244dd4fb206 100644
--- a/src/vnet/mfib/mfib_entry.c
+++ b/src/vnet/mfib/mfib_entry.c
@@ -412,6 +412,8 @@ mfib_entry_src_flush (mfib_entry_src_t *msrc)
}));
hash_free(msrc->mfes_itfs);
msrc->mfes_itfs = NULL;
+ hash_free(msrc->mfes_exts);
+ msrc->mfes_exts = NULL;
fib_path_list_unlock(msrc->mfes_pl);
}
@@ -497,7 +499,7 @@ mfib_entry_alloc (u32 fib_index,
}
static inline mfib_path_ext_t *
-mfib_entry_path_ext_find (mfib_path_ext_t *exts,
+mfib_entry_path_ext_find (uword *exts,
fib_node_index_t path_index)
{
uword *p;
@@ -547,6 +549,7 @@ typedef struct mfib_entry_collect_forwarding_ctx_t_
load_balance_path_t * next_hops;
fib_forward_chain_type_t fct;
mfib_entry_src_t *msrc;
+ dpo_proto_t payload_proto;
} mfib_entry_collect_forwarding_ctx_t;
static fib_path_list_walk_rc_t
@@ -592,7 +595,8 @@ mfib_entry_src_collect_forwarding (fib_node_index_t pl_index,
nh->path_index = path_index;
nh->path_weight = fib_path_get_weight(path_index);
- fib_path_contribute_forwarding(path_index, ctx->fct, &nh->path_dpo);
+ fib_path_contribute_forwarding(path_index, ctx->fct,
+ ctx->payload_proto, &nh->path_dpo);
break;
case FIB_FORW_CHAIN_TYPE_UNICAST_IP4:
@@ -632,6 +636,7 @@ mfib_entry_stack (mfib_entry_t *mfib_entry,
.next_hops = NULL,
.fct = mfib_entry_get_default_chain_type(mfib_entry),
.msrc = msrc,
+ .payload_proto = fib_proto_to_dpo(mfib_entry->mfe_prefix.fp_proto),
};
/*
@@ -1659,8 +1664,8 @@ show_mfib_entry_command (vlib_main_t * vm,
}
/*?
- * This commnad displays an entry, or all entries, in the mfib tables indexed by their unique
- * numerical indentifier.
+ * This command displays an entry, or all entries, in the mfib tables indexed
+ * by their unique numerical identifier.
?*/
VLIB_CLI_COMMAND (show_mfib_entry, static) = {
.path = "show mfib entry",
diff --git a/src/vnet/mfib/mfib_entry_src.h b/src/vnet/mfib/mfib_entry_src.h
index b85c010779c..ab3cb3ebda7 100644
--- a/src/vnet/mfib/mfib_entry_src.h
+++ b/src/vnet/mfib/mfib_entry_src.h
@@ -109,7 +109,7 @@ typedef struct mfib_entry_src_t_
/**
* Hash table of path extensions
*/
- mfib_path_ext_t *mfes_exts;
+ uword *mfes_exts;
/**
* Covering entry (if needed)
diff --git a/src/vnet/mfib/mfib_entry_src_rr.c b/src/vnet/mfib/mfib_entry_src_rr.c
index a6a1e0d8aa5..5f697a5fad1 100644
--- a/src/vnet/mfib/mfib_entry_src_rr.c
+++ b/src/vnet/mfib/mfib_entry_src_rr.c
@@ -20,8 +20,8 @@
#include <vnet/fib/fib_path_list.h>
static void
-mfib_entry_src_rr_deactiviate (mfib_entry_t *mfib_entry,
- mfib_entry_src_t *msrc)
+mfib_entry_src_rr_deactivate (mfib_entry_t *mfib_entry,
+ mfib_entry_src_t *msrc)
{
mfib_entry_t *cover;
@@ -42,8 +42,8 @@ mfib_entry_src_rr_deactiviate (mfib_entry_t *mfib_entry,
}
static void
-mfib_entry_src_rr_activiate (mfib_entry_t *mfib_entry,
- mfib_entry_src_t *msrc)
+mfib_entry_src_rr_activate (mfib_entry_t *mfib_entry,
+ mfib_entry_src_t *msrc)
{
mfib_entry_src_t *csrc;
mfib_entry_t *cover;
@@ -72,8 +72,8 @@ static mfib_src_res_t
mfib_entry_src_rr_cover_change (mfib_entry_t *mfib_entry,
mfib_entry_src_t *msrc)
{
- mfib_entry_src_rr_deactiviate(mfib_entry, msrc);
- mfib_entry_src_rr_activiate(mfib_entry, msrc);
+ mfib_entry_src_rr_deactivate(mfib_entry, msrc);
+ mfib_entry_src_rr_activate(mfib_entry, msrc);
return (MFIB_SRC_REEVALUATE);
}
@@ -87,6 +87,7 @@ mfib_entry_src_rr_cover_update (mfib_entry_t *mfib_entry,
* so there's no need to check for a new one. but we do need to
* copy down any new flags and input interfaces
*/
+ mfib_entry_src_t *csrc;
mfib_entry_t *cover;
cover = mfib_entry_get(msrc->mfes_cover);
@@ -95,6 +96,13 @@ mfib_entry_src_rr_cover_update (mfib_entry_t *mfib_entry,
msrc->mfes_itfs = cover->mfe_itfs;
msrc->mfes_rpf_id = cover->mfe_rpf_id;
+ /* The update to the cover could have removed the extensions.
+ * When a cover is removed from the table, the covereds see it first
+ * updated (to have no forwarding) and then changed
+ */
+ csrc = mfib_entry_get_best_src(cover);
+ msrc->mfes_exts = (csrc ? csrc->mfes_exts : NULL);
+
return (MFIB_SRC_REEVALUATE);
}
@@ -102,8 +110,8 @@ void
mfib_entry_src_rr_module_init (void)
{
mfib_entry_src_vft mvft = {
- .mev_activate = mfib_entry_src_rr_activiate,
- .mev_deactivate = mfib_entry_src_rr_deactiviate,
+ .mev_activate = mfib_entry_src_rr_activate,
+ .mev_deactivate = mfib_entry_src_rr_deactivate,
.mev_cover_change = mfib_entry_src_rr_cover_change,
.mev_cover_update = mfib_entry_src_rr_cover_update,
};
diff --git a/src/vnet/mfib/mfib_forward.c b/src/vnet/mfib/mfib_forward.c
index a7b7a6bed4b..3befce041bb 100644
--- a/src/vnet/mfib/mfib_forward.c
+++ b/src/vnet/mfib/mfib_forward.c
@@ -74,7 +74,7 @@ mfib_forward_lookup_trace (vlib_main_t * vm,
t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
t0->entry_index = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
t0->fib_index = vec_elt (im->mfib_index_by_sw_if_index,
- vnet_buffer(b1)->sw_if_index[VLIB_RX]);
+ vnet_buffer(b0)->sw_if_index[VLIB_RX]);
}
if (b1->flags & VLIB_BUFFER_IS_TRACED)
{
@@ -443,33 +443,33 @@ mfib_forward_rpf (vlib_main_t * vm,
else
{
next0 = MFIB_FORWARD_RPF_NEXT_DROP;
- error0 = IP4_ERROR_RPF_FAILURE;
- }
-
- b0->error = error0 ? error_node->errors[error0] : 0;
+ error0 =
+ (is_v4 ? IP4_ERROR_RPF_FAILURE : IP6_ERROR_RPF_FAILURE);
+ }
- if (b0->flags & VLIB_BUFFER_IS_TRACED)
- {
- mfib_forward_rpf_trace_t *t0;
+ b0->error = error0 ? error_node->errors[error0] : 0;
- t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
- t0->entry_index = mfei0;
- t0->itf_flags = iflags0;
- if (NULL == mfi0)
- {
- t0->sw_if_index = ~0;
- }
- else
- {
- t0->sw_if_index = mfi0->mfi_sw_if_index;
- }
- }
- vlib_validate_buffer_enqueue_x1 (vm, node, next,
- to_next, n_left_to_next,
- pi0, next0);
- }
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ mfib_forward_rpf_trace_t *t0;
+
+ t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+ t0->entry_index = mfei0;
+ t0->itf_flags = iflags0;
+ if (NULL == mfi0)
+ {
+ t0->sw_if_index = ~0;
+ }
+ else
+ {
+ t0->sw_if_index = mfi0->mfi_sw_if_index;
+ }
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next, to_next,
+ n_left_to_next, pi0, next0);
+ }
- vlib_put_next_frame(vm, node, next, n_left_to_next);
+ vlib_put_next_frame (vm, node, next, n_left_to_next);
}
return frame->n_vectors;
diff --git a/src/vnet/mfib/mfib_itf.c b/src/vnet/mfib/mfib_itf.c
index 99408f79f89..e65a6d733cf 100644
--- a/src/vnet/mfib/mfib_itf.c
+++ b/src/vnet/mfib/mfib_itf.c
@@ -206,10 +206,8 @@ format_mfib_itf (u8 * s, va_list * args)
if (~0 != mfib_itf->mfi_sw_if_index)
{
return (format(s, " %U: %U",
- format_vnet_sw_interface_name,
- vnm,
- vnet_get_sw_interface(vnm,
- mfib_itf->mfi_sw_if_index),
+ format_vnet_sw_if_index_name,
+ vnm, mfib_itf->mfi_sw_if_index,
format_mfib_itf_flags, mfib_itf->mfi_flags));
}
else
@@ -261,8 +259,8 @@ show_mfib_itf_command (vlib_main_t * vm,
}
/*?
- * This commnad displays an MFIB interface, or all interfaces, indexed by their unique
- * numerical indentifier.
+ * This command displays an MFIB interface, or all interfaces, indexed by their
+ * unique numerical identifier.
?*/
VLIB_CLI_COMMAND (show_mfib_itf, static) = {
.path = "show mfib interface",
diff --git a/src/vnet/mfib/mfib_table.c b/src/vnet/mfib/mfib_table.c
index a6a82774794..bbb9b05674a 100644
--- a/src/vnet/mfib/mfib_table.c
+++ b/src/vnet/mfib/mfib_table.c
@@ -292,6 +292,7 @@ static fib_node_index_t
mfib_table_entry_paths_update_i (u32 fib_index,
const mfib_prefix_t *prefix,
mfib_source_t source,
+ mfib_entry_flags_t entry_flags,
const fib_route_path_t *rpaths)
{
fib_node_index_t mfib_entry_index;
@@ -306,7 +307,7 @@ mfib_table_entry_paths_update_i (u32 fib_index,
source,
prefix,
MFIB_RPF_ID_NONE,
- MFIB_ENTRY_FLAG_NONE,
+ entry_flags,
INDEX_INVALID);
mfib_entry_path_update(mfib_entry_index, source, rpaths);
@@ -325,6 +326,7 @@ fib_node_index_t
mfib_table_entry_path_update (u32 fib_index,
const mfib_prefix_t *prefix,
mfib_source_t source,
+ mfib_entry_flags_t entry_flags,
const fib_route_path_t *rpath)
{
fib_node_index_t mfib_entry_index;
@@ -333,7 +335,8 @@ mfib_table_entry_path_update (u32 fib_index,
vec_add1(rpaths, *rpath);
mfib_entry_index = mfib_table_entry_paths_update_i(fib_index, prefix,
- source, rpaths);
+ source, entry_flags,
+ rpaths);
vec_free(rpaths);
return (mfib_entry_index);
@@ -343,10 +346,11 @@ fib_node_index_t
mfib_table_entry_paths_update (u32 fib_index,
const mfib_prefix_t *prefix,
mfib_source_t source,
+ mfib_entry_flags_t entry_flags,
const fib_route_path_t *rpaths)
{
return (mfib_table_entry_paths_update_i(fib_index, prefix,
- source, rpaths));
+ source, entry_flags, rpaths));
}
static void
diff --git a/src/vnet/mfib/mfib_table.h b/src/vnet/mfib/mfib_table.h
index 9a682b53a67..e3441342aaa 100644
--- a/src/vnet/mfib/mfib_table.h
+++ b/src/vnet/mfib/mfib_table.h
@@ -210,10 +210,12 @@ extern fib_node_index_t mfib_table_entry_update(u32 fib_index,
extern fib_node_index_t mfib_table_entry_path_update(u32 fib_index,
const mfib_prefix_t *prefix,
mfib_source_t source,
+ mfib_entry_flags_t entry_flags,
const fib_route_path_t *rpath);
extern fib_node_index_t mfib_table_entry_paths_update(u32 fib_index,
const mfib_prefix_t *prefix,
mfib_source_t source,
+ mfib_entry_flags_t entry_flags,
const fib_route_path_t *rpath);
/**
diff --git a/src/vnet/mfib/mfib_types.c b/src/vnet/mfib/mfib_types.c
index 19583ea18f4..755f656a7b2 100644
--- a/src/vnet/mfib/mfib_types.c
+++ b/src/vnet/mfib/mfib_types.c
@@ -253,7 +253,6 @@ mfib_show_route_flags (vlib_main_t * vm,
/*?
* This command displays the set of supported flags applicable to an MFIB route
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (mfib_route_flags_command, static) =
{
.path = "show mfib route flags",
@@ -261,7 +260,6 @@ VLIB_CLI_COMMAND (mfib_route_flags_command, static) =
.function = mfib_show_route_flags,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
clib_error_t *
mfib_show_itf_flags (vlib_main_t * vm,
@@ -282,7 +280,6 @@ mfib_show_itf_flags (vlib_main_t * vm,
/*?
* This command displays the set of supported flags applicable to an MFIB interface
*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (mfib_itf_flags_command, static) =
{
.path = "show mfib itf flags",
@@ -290,4 +287,3 @@ VLIB_CLI_COMMAND (mfib_itf_flags_command, static) =
.function = mfib_show_itf_flags,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
diff --git a/src/vnet/mfib/mfib_types.h b/src/vnet/mfib/mfib_types.h
index edc25fe5b99..34e8f6b928d 100644
--- a/src/vnet/mfib/mfib_types.h
+++ b/src/vnet/mfib/mfib_types.h
@@ -160,23 +160,25 @@ typedef enum mfib_itf_flags_t_
*/
typedef enum mfib_source_t_
{
- MFIB_SOURCE_SPECIAL,
- MFIB_SOURCE_6RD,
- MFIB_SOURCE_API,
- MFIB_SOURCE_CLI,
- MFIB_SOURCE_VXLAN,
- MFIB_SOURCE_DHCP,
- MFIB_SOURCE_SRv6,
- MFIB_SOURCE_GTPU,
- MFIB_SOURCE_VXLAN_GPE,
- MFIB_SOURCE_GENEVE,
- MFIB_SOURCE_IGMP,
- MFIB_SOURCE_VXLAN_GBP,
- MFIB_SOURCE_PLUGIN_LOW,
- MFIB_SOURCE_RR,
- MFIB_SOURCE_DEFAULT_ROUTE,
+ MFIB_SOURCE_SPECIAL,
+ MFIB_SOURCE_6RD,
+ MFIB_SOURCE_API,
+ MFIB_SOURCE_CLI,
+ MFIB_SOURCE_VXLAN,
+ MFIB_SOURCE_DHCP,
+ MFIB_SOURCE_SRv6,
+ MFIB_SOURCE_GTPU,
+ MFIB_SOURCE_VXLAN_GPE,
+ MFIB_SOURCE_GENEVE,
+ MFIB_SOURCE_IGMP,
+ MFIB_SOURCE_VXLAN_GBP,
+ MFIB_SOURCE_PLUGIN_LOW,
+ MFIB_SOURCE_RR,
+ MFIB_SOURCE_INTERFACE, /* used exclusively for mfib locks */
+ MFIB_SOURCE_DEFAULT_ROUTE,
} mfib_source_t;
+/* clang-format off */
#define MFIB_SOURCE_NAMES { \
[MFIB_SOURCE_SPECIAL] = "Special", \
[MFIB_SOURCE_6RD] = "6RD", \
@@ -192,8 +194,10 @@ typedef enum mfib_source_t_
[MFIB_SOURCE_VXLAN_GBP] = "VXLAN-GBP", \
[MFIB_SOURCE_PLUGIN_LOW] = "plugin-low", \
[MFIB_SOURCE_RR] = "Recursive-resolution", \
+ [MFIB_SOURCE_INTERFACE] = "Interface", \
[MFIB_SOURCE_DEFAULT_ROUTE] = "Default Route", \
}
+/* clang-format on */
#define FOREACH_MFIB_SOURCE(_ms) \
for (_ms = MFIB_SOURCE_SPECIAL; \
diff --git a/src/vnet/misc.c b/src/vnet/misc.c
index 18d4651cff3..ea816615a50 100644
--- a/src/vnet/misc.c
+++ b/src/vnet/misc.c
@@ -56,18 +56,14 @@ vnet_local_interface_tx (vlib_main_t * vm,
return f->n_vectors;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (vnet_local_interface_device_class) = {
.name = "local",
.tx_function = vnet_local_interface_tx,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (vnet_local_interface_hw_class,static) = {
.name = "local",
};
-/* *INDENT-ON* */
clib_error_t *
vnet_main_init (vlib_main_t * vm)
@@ -86,10 +82,12 @@ vnet_main_init (vlib_main_t * vm)
vnm->local_interface_hw_if_index = hw_if_index;
vnm->local_interface_sw_if_index = hw->sw_if_index;
+ vnm->pcap.current_filter_function =
+ vlib_is_packet_traced_default_function ();
+
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (vnet_main_init)=
{
.init_order = VLIB_INITS("vnet_interface_init",
@@ -102,7 +100,6 @@ VLIB_INIT_FUNCTION (vnet_main_init)=
"mpls_init",
"vnet_main_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/mpls/error.def b/src/vnet/mpls/error.def
deleted file mode 100644
index 9941b18baf4..00000000000
--- a/src/vnet/mpls/error.def
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * mpls_error.def: mpls errors
- *
- * Copyright (c) 2012 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-mpls_error (NONE, "no error")
-mpls_error (UNKNOWN_PROTOCOL, "unknown protocol")
-mpls_error (UNSUPPORTED_VERSION, "unsupported version")
-mpls_error (PKTS_DECAP, "MPLS input packets decapsulated")
-mpls_error (PKTS_ENCAP, "MPLS output packets encapsulated")
-mpls_error (PKTS_NEED_FRAG, "MPLS output packets needs fragmentation")
-mpls_error (NO_LABEL, "MPLS no label for fib/dst")
-mpls_error (TTL_EXPIRED, "MPLS ttl expired")
-mpls_error (S_NOT_SET, "MPLS s-bit not set")
-mpls_error (BAD_LABEL, "invalid FIB id in label")
-mpls_error (NOT_IP4, "non-ip4 packets dropped")
-mpls_error (DISALLOWED_FIB, "disallowed FIB id")
-mpls_error (NOT_ENABLED, "MPLS not enabled")
-mpls_error (DROP, "MPLS DROP DPO")
-mpls_error (PUNT, "MPLS PUNT DPO")
diff --git a/src/vnet/mpls/interface.c b/src/vnet/mpls/interface.c
index fd075c92d3d..fd654dca891 100644
--- a/src/vnet/mpls/interface.c
+++ b/src/vnet/mpls/interface.c
@@ -22,6 +22,14 @@
#include <vnet/adj/adj_midchain.h>
#include <vnet/dpo/classify_dpo.h>
+typedef struct
+{
+ mpls_interface_state_change_function_t *function;
+ uword function_opaque;
+} mpls_interface_state_change_callback_t;
+
+/** Functions to call when interface becomes MPLS enabled/disabled. */
+static mpls_interface_state_change_callback_t *state_change_callbacks;
u8
mpls_sw_interface_is_enabled (u32 sw_if_index)
@@ -34,11 +42,20 @@ mpls_sw_interface_is_enabled (u32 sw_if_index)
return (mm->mpls_enabled_by_sw_if_index[sw_if_index]);
}
+void
+mpls_interface_state_change_add_callback (
+ mpls_interface_state_change_function_t *function, uword opaque)
+{
+ mpls_interface_state_change_callback_t cb = {
+ .function = function,
+ .function_opaque = opaque,
+ };
+ vec_add1 (state_change_callbacks, cb);
+}
+
int
-mpls_sw_interface_enable_disable (mpls_main_t * mm,
- u32 sw_if_index,
- u8 is_enable,
- u8 is_api)
+mpls_sw_interface_enable_disable (mpls_main_t *mm, u32 sw_if_index,
+ u8 is_enable)
{
fib_node_index_t lfib_index;
vnet_main_t *vnm = vnet_get_main ();
@@ -60,8 +77,7 @@ mpls_sw_interface_enable_disable (mpls_main_t * mm,
if (1 != ++mm->mpls_enabled_by_sw_if_index[sw_if_index])
return (0);
- fib_table_lock(lfib_index, FIB_PROTOCOL_MPLS,
- (is_api? FIB_SOURCE_API: FIB_SOURCE_CLI));
+ fib_table_lock (lfib_index, FIB_PROTOCOL_MPLS, FIB_SOURCE_INTERFACE);
vec_validate(mm->fib_index_by_sw_if_index, sw_if_index);
mm->fib_index_by_sw_if_index[sw_if_index] = lfib_index;
@@ -72,9 +88,8 @@ mpls_sw_interface_enable_disable (mpls_main_t * mm,
if (0 != --mm->mpls_enabled_by_sw_if_index[sw_if_index])
return (0);
- fib_table_unlock(mm->fib_index_by_sw_if_index[sw_if_index],
- FIB_PROTOCOL_MPLS,
- (is_api? FIB_SOURCE_API: FIB_SOURCE_CLI));
+ fib_table_unlock (mm->fib_index_by_sw_if_index[sw_if_index],
+ FIB_PROTOCOL_MPLS, FIB_SOURCE_INTERFACE);
}
vnet_feature_enable_disable ("mpls-input", "mpls-not-enabled",
@@ -85,6 +100,12 @@ mpls_sw_interface_enable_disable (mpls_main_t * mm,
else if (hi->l3_if_count)
hi->l3_if_count--;
+ {
+ mpls_interface_state_change_callback_t *cb;
+ vec_foreach (cb, state_change_callbacks)
+ cb->function (mm, cb->function_opaque, sw_if_index, is_enable);
+ }
+
return (0);
}
@@ -118,7 +139,7 @@ mpls_interface_enable_disable (vlib_main_t * vm,
goto done;
}
- rv = mpls_sw_interface_enable_disable(&mpls_main, sw_if_index, enable, 0);
+ rv = mpls_sw_interface_enable_disable (&mpls_main, sw_if_index, enable);
if (VNET_API_ERROR_NO_SUCH_FIB == rv)
error = clib_error_return (0, "default MPLS table must be created first");
@@ -128,7 +149,7 @@ mpls_interface_enable_disable (vlib_main_t * vm,
}
/*?
- * This command enables an interface to accpet MPLS packets
+ * This command enables an interface to accept MPLS packets
*
* @cliexpar
* @cliexstart{set interface mpls}
diff --git a/src/vnet/mpls/mpls.api b/src/vnet/mpls/mpls.api
index 9d4ec0bf7bf..5d775dafdfc 100644
--- a/src/vnet/mpls/mpls.api
+++ b/src/vnet/mpls/mpls.api
@@ -92,6 +92,26 @@ define mpls_tunnel_details
vl_api_mpls_tunnel_t mt_tunnel;
};
+/** \brief Dump mpls enabled interface(s)
+ @param client_index - opaque cookie to identify the sender
+ @param sw_if_index - sw_if_index of a specific interface, or -1 (default)
+ to return all MPLS enabled interfaces
+*/
+define mpls_interface_dump
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index [default=0xffffffff];
+};
+
+/** \brief mpls enabled interface details
+*/
+define mpls_interface_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+};
+
/** \brief MPLS Route Add / del route
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -212,6 +232,108 @@ autoreply define sw_interface_set_mpls_enable
bool enable [default=true];
};
+counters mpls {
+ none {
+ severity info;
+ type counter64;
+ units "packets";
+ description "no error";
+ };
+ unknown_protocol {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unknown protocol";
+ };
+ unsupported_version {
+ severity error;
+ type counter64;
+ units "packets";
+ description "unsupported version";
+ };
+ pkts_decap {
+ severity info;
+ type counter64;
+ units "packets";
+ description "MPLS input packets decapsulated";
+ };
+ pkts_encap {
+ severity info;
+ type counter64;
+ units "packets";
+ description "MPLS output packets encapsulated";
+ };
+ pkts_need_frag {
+ severity info;
+ type counter64;
+ units "packets";
+ description "MPLS output packets needs fragmentation";
+ };
+ no_label {
+ severity error;
+ type counter64;
+ units "packets";
+ description "MPLS no label for fib/dst";
+ };
+ ttl_expired {
+ severity error;
+ type counter64;
+ units "packets";
+ description "MPLS ttl expired";
+ };
+ s_not_set {
+ severity error;
+ type counter64;
+ units "packets";
+ description "MPLS s-bit not set";
+ };
+ bad_label {
+ severity error;
+ type counter64;
+ units "packets";
+ description "invalid FIB id in label";
+ };
+ not_ip4 {
+ severity error;
+ type counter64;
+ units "packets";
+ description "non-ip4 packets dropped";
+ };
+ disallowed_fib {
+ severity error;
+ type counter64;
+ units "packets";
+ description "disallowed FIB id";
+ };
+ not_enabled {
+ severity error;
+ type counter64;
+ units "packets";
+ description "MPLS not enabled";
+ };
+ drop {
+ severity error;
+ type counter64;
+ units "packets";
+ description "MPLS DROP DPO";
+ };
+ punt {
+ severity error;
+ type counter64;
+ units "packets";
+ description "MPLS PUNT DPO";
+ };
+};
+
+paths {
+ "/err/mpls-input" "mpls";
+ "/err/mpls-output" "mpls";
+ "/err/mpls-lookup" "mpls";
+ "/err/mpls-midchain" "mpls";
+ "/err/mpls-adj-incomplete" "mpls";
+ "/err/mpls-frag" "mpls";
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/mpls/mpls.c b/src/vnet/mpls/mpls.c
index 4076a8980a9..7d922b003cc 100644
--- a/src/vnet/mpls/mpls.c
+++ b/src/vnet/mpls/mpls.c
@@ -370,7 +370,13 @@ done:
VLIB_CLI_COMMAND (mpls_local_label_command, static) = {
.path = "mpls local-label",
.function = vnet_mpls_local_label,
- .short_help = "mpls local-label [add|del] <label-value> [eos|non-eos] via [next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-attached] [rx-ip4 <interface>] [out-labels <value value value>]",
+ .short_help =
+ "mpls local-label [add|del] <label-value> [eos|non-eos] via "
+ "[next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight "
+ "<value>] [preference <value>] [udp-encap-id <value>] "
+ "[ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] "
+ "[mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-attached] "
+ "[rx-ip4|rx-ip6 <interface>] [out-labels <value value value>]",
};
clib_error_t *
@@ -425,17 +431,16 @@ vnet_mpls_table_cmd (vlib_main_t * vm,
}
done:
- unformat_free (line_input);
- return error;
+ vec_free (name);
+ unformat_free (line_input);
+ return error;
}
-/* *INDENT-ON* */
/*?
* This command is used to add or delete MPLS Tables. All
* Tables must be explicitly added before that can be used,
* Including the default table.
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (mpls_table_command, static) = {
.path = "mpls table",
.short_help = "mpls table [add|del] <table-id>",
diff --git a/src/vnet/mpls/mpls.h b/src/vnet/mpls/mpls.h
index 00b493f4576..6baaaad95ba 100644
--- a/src/vnet/mpls/mpls.h
+++ b/src/vnet/mpls/mpls.h
@@ -23,22 +23,18 @@
#include <vnet/fib/fib_node.h>
#include <vnet/adj/adj.h>
-typedef enum
-{
-#define mpls_error(n,s) MPLS_ERROR_##n,
-#include <vnet/mpls/error.def>
-#undef mpls_error
- MPLS_N_ERROR,
-} mpls_error_t;
+struct mpls_main_t;
/**
* @brief Definition of a callback for receiving MPLS interface state change
* notifications
*/
-typedef void (*mpls_interface_state_change_callback_t) (u32 sw_if_index,
- u32 is_enable);
+typedef void (mpls_interface_state_change_function_t) (struct mpls_main_t *mm,
+ uword opaque,
+ u32 sw_if_index,
+ u32 is_enable);
-typedef struct
+typedef struct mpls_main_t
{
/* MPLS FIB index for each software interface */
u32 *fib_index_by_sw_if_index;
@@ -85,12 +81,14 @@ unformat_function_t unformat_mpls_unicast_label;
unformat_function_t unformat_mpls_header;
unformat_function_t unformat_pg_mpls_header;
-int mpls_sw_interface_enable_disable (mpls_main_t * mm,
- u32 sw_if_index,
- u8 is_enable, u8 is_api);
-
u8 mpls_sw_interface_is_enabled (u32 sw_if_index);
+void mpls_interface_state_change_add_callback (
+ mpls_interface_state_change_function_t *function, uword opaque);
+
+int mpls_sw_interface_enable_disable (mpls_main_t *mm, u32 sw_if_index,
+ u8 is_enable);
+
int mpls_dest_cmp (void *a1, void *a2);
int mpls_fib_index_cmp (void *a1, void *a2);
diff --git a/src/vnet/mpls/mpls_api.c b/src/vnet/mpls/mpls_api.c
index e89732f0d10..58998a6576c 100644
--- a/src/vnet/mpls/mpls_api.c
+++ b/src/vnet/mpls/mpls_api.c
@@ -199,19 +199,15 @@ vl_api_mpls_route_add_del_t_handler (vl_api_mpls_route_add_del_t * mp)
rv = mpls_route_add_del_t_handler (vnm, mp, &stats_index);
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_MPLS_ROUTE_ADD_DEL_REPLY,
({
rmp->stats_index = htonl (stats_index);
}));
- /* *INDENT-ON* */
}
void
mpls_table_create (u32 table_id, u8 is_api, const u8 * name)
{
- u32 fib_index;
-
/*
* The MPLS defult table must also be explicitly created via the API.
* So in contrast to IP, it gets no special treatment here.
@@ -222,16 +218,11 @@ mpls_table_create (u32 table_id, u8 is_api, const u8 * name)
* i.e. it can be added many times via the API but needs to be
* deleted only once.
*/
- fib_index = fib_table_find (FIB_PROTOCOL_MPLS, table_id);
-
- if (~0 == fib_index)
- {
fib_table_find_or_create_and_lock_w_name (FIB_PROTOCOL_MPLS,
table_id,
(is_api ?
FIB_SOURCE_API :
FIB_SOURCE_CLI), name);
- }
}
static void
@@ -277,13 +268,11 @@ vl_api_mpls_tunnel_add_del_t_handler (vl_api_mpls_tunnel_add_del_t * mp)
vec_free (rpaths);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_MPLS_TUNNEL_ADD_DEL_REPLY,
({
rmp->sw_if_index = ntohl(tunnel_sw_if_index);
rmp->tunnel_index = ntohl(tunnel_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -295,9 +284,8 @@ static void
VALIDATE_SW_IF_INDEX (mp);
- rv = mpls_sw_interface_enable_disable (&mpls_main,
- ntohl (mp->sw_if_index),
- mp->enable, 1);
+ rv = mpls_sw_interface_enable_disable (&mpls_main, ntohl (mp->sw_if_index),
+ mp->enable);
BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_SW_INTERFACE_SET_MPLS_ENABLE_REPLY);
@@ -409,12 +397,58 @@ vl_api_mpls_table_dump_t_handler (vl_api_mpls_table_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (fib_table, mm->fibs)
{
send_mpls_table_details(am, reg, mp->context, fib_table);
}
- /* *INDENT-ON* */
+}
+
+static void
+send_mpls_interface_details (vpe_api_main_t *am, vl_api_registration_t *reg,
+ u32 context, const u32 sw_if_index)
+{
+ vl_api_mpls_interface_details_t *mp;
+
+ mp = vl_msg_api_alloc_zero (sizeof (*mp));
+ mp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_MPLS_INTERFACE_DETAILS);
+ mp->context = context;
+
+ mp->sw_if_index = htonl (sw_if_index);
+ vl_api_send_msg (reg, (u8 *) mp);
+}
+
+static void
+vl_api_mpls_interface_dump_t_handler (vl_api_mpls_interface_dump_t *mp)
+{
+ vpe_api_main_t *am = &vpe_api_main;
+ vl_api_registration_t *reg;
+ vnet_interface_main_t *im = &vnet_main.interface_main;
+ vnet_sw_interface_t *si;
+ u32 sw_if_index = ~0;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (sw_if_index == ~0)
+ {
+ pool_foreach (si, im->sw_interfaces)
+ {
+ if (mpls_sw_interface_is_enabled (si->sw_if_index))
+ {
+ send_mpls_interface_details (am, reg, mp->context,
+ si->sw_if_index);
+ }
+ }
+ }
+ else
+ {
+ if (mpls_sw_interface_is_enabled (sw_if_index))
+ {
+ send_mpls_interface_details (am, reg, mp->context, sw_if_index);
+ }
+ }
}
static void
@@ -516,7 +550,8 @@ mpls_api_hookup (vlib_main_t * vm)
/*
* Trace space for 8 MPLS encap labels
*/
- am->api_trace_cfg[VL_API_MPLS_TUNNEL_ADD_DEL].size += 8 * sizeof (u32);
+ vl_api_increase_msg_trace_size (am, VL_API_MPLS_TUNNEL_ADD_DEL,
+ 8 * sizeof (u32));
/*
* Set up the (msg_name, crc, message-id) table
diff --git a/src/vnet/mpls/mpls_features.c b/src/vnet/mpls/mpls_features.c
index 070f90a1cc6..3b535032908 100644
--- a/src/vnet/mpls/mpls_features.c
+++ b/src/vnet/mpls/mpls_features.c
@@ -16,6 +16,7 @@
*/
#include <vnet/mpls/mpls.h>
+#include <vnet/mpls/mpls.api_enum.h>
static u8 *
format_mpls_drop_trace (u8 * s, va_list * args)
diff --git a/src/vnet/mpls/mpls_input.c b/src/vnet/mpls/mpls_input.c
index 37fa1aead12..0505d9a1829 100644
--- a/src/vnet/mpls/mpls_input.c
+++ b/src/vnet/mpls/mpls_input.c
@@ -19,6 +19,7 @@
#include <vnet/pg/pg.h>
#include <vnet/mpls/mpls.h>
#include <vnet/feature/feature.h>
+#include <vnet/mpls/mpls.api_enum.h>
typedef struct {
u32 next_index;
@@ -236,12 +237,6 @@ VLIB_NODE_FN (mpls_input_node) (vlib_main_t * vm,
return mpls_input_inline (vm, node, from_frame);
}
-static char * mpls_error_strings[] = {
-#define mpls_error(n,s) s,
-#include "error.def"
-#undef mpls_error
-};
-
VLIB_REGISTER_NODE (mpls_input_node) = {
.name = "mpls-input",
/* Takes a vector of packets. */
@@ -250,7 +245,7 @@ VLIB_REGISTER_NODE (mpls_input_node) = {
.runtime_data_bytes = sizeof(mpls_input_runtime_t),
.n_errors = MPLS_N_ERROR,
- .error_strings = mpls_error_strings,
+ .error_counters = mpls_error_counters,
.n_next_nodes = MPLS_INPUT_N_NEXT,
.next_nodes = {
@@ -283,10 +278,8 @@ static clib_error_t * mpls_input_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (mpls_input_init) =
{
.runs_after = VLIB_INITS("mpls_init"),
};
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
diff --git a/src/vnet/mpls/mpls_lookup.c b/src/vnet/mpls/mpls_lookup.c
index 07c5cc47198..a5ac56534a5 100644
--- a/src/vnet/mpls/mpls_lookup.c
+++ b/src/vnet/mpls/mpls_lookup.c
@@ -20,6 +20,7 @@
#include <vnet/fib/mpls_fib.h>
#include <vnet/dpo/load_balance_map.h>
#include <vnet/dpo/replicate_dpo.h>
+#include <vnet/mpls/mpls.api_enum.h>
/**
* The arc/edge from the MPLS lookup node to the MPLS replicate node
@@ -43,13 +44,13 @@ format_mpls_lookup_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
mpls_lookup_trace_t * t = va_arg (*args, mpls_lookup_trace_t *);
- s = format (s, "MPLS: next [%d], lookup fib index %d, LB index %d hash %x "
- "label %d eos %d",
- t->next_index, t->lfib_index, t->lb_index, t->hash,
- vnet_mpls_uc_get_label(
- clib_net_to_host_u32(t->label_net_byte_order)),
- vnet_mpls_uc_get_s(
- clib_net_to_host_u32(t->label_net_byte_order)));
+ s = format (
+ s,
+ "MPLS: next [%d], lookup fib index %d, LB index %d hash 0x%08x "
+ "label %d eos %d",
+ t->next_index, t->lfib_index, t->lb_index, t->hash,
+ vnet_mpls_uc_get_label (clib_net_to_host_u32 (t->label_net_byte_order)),
+ vnet_mpls_uc_get_s (clib_net_to_host_u32 (t->label_net_byte_order)));
return s;
}
@@ -454,18 +455,12 @@ VLIB_NODE_FN (mpls_lookup_node) (vlib_main_t * vm,
return from_frame->n_vectors;
}
-static char * mpls_error_strings[] = {
-#define mpls_error(n,s) s,
-#include "error.def"
-#undef mpls_error
-};
-
VLIB_REGISTER_NODE (mpls_lookup_node) = {
.name = "mpls-lookup",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = MPLS_N_ERROR,
- .error_strings = mpls_error_strings,
+ .error_counters = mpls_error_counters,
.sibling_of = "mpls-load-balance",
@@ -487,8 +482,8 @@ format_mpls_load_balance_trace (u8 * s, va_list * args)
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
mpls_load_balance_trace_t * t = va_arg (*args, mpls_load_balance_trace_t *);
- s = format (s, "MPLS: next [%d], LB index %d hash %d",
- t->next_index, t->lb_index, t->hash);
+ s = format (s, "MPLS: next [%d], LB index %d hash 0x%08x", t->next_index,
+ t->lb_index, t->hash);
return s;
}
@@ -558,75 +553,77 @@ VLIB_NODE_FN (mpls_load_balance_node) (vlib_main_t * vm,
* We don't want to use the same hash value at each level in the recursion
* graph as that would lead to polarisation
*/
- hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
- hc1 = vnet_buffer (p1)->ip.flow_hash = 0;
-
- if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
- {
- if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash))
- {
- hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1;
- }
- else
- {
- hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0);
- }
- dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1));
- }
- else
- {
- dpo0 = load_balance_get_bucket_i (lb0, 0);
- }
- if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
- {
- if (PREDICT_TRUE (vnet_buffer(p1)->ip.flow_hash))
- {
- hc1 = vnet_buffer(p1)->ip.flow_hash = vnet_buffer(p1)->ip.flow_hash >> 1;
- }
- else
- {
- hc1 = vnet_buffer(p1)->ip.flow_hash = mpls_compute_flow_hash(mpls1, hc1);
- }
- dpo1 = load_balance_get_fwd_bucket(lb1, (hc1 & lb1->lb_n_buckets_minus_1));
- }
- else
- {
- dpo1 = load_balance_get_bucket_i (lb1, 0);
- }
-
- next0 = dpo0->dpoi_next_node;
- next1 = dpo1->dpoi_next_node;
-
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
- vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
-
- vlib_increment_combined_counter
- (cm, thread_index, lbi0, 1,
- vlib_buffer_length_in_chain (vm, p0));
- vlib_increment_combined_counter
- (cm, thread_index, lbi1, 1,
- vlib_buffer_length_in_chain (vm, p1));
-
- if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
- {
- mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node,
- p0, sizeof (*tr));
- tr->next_index = next0;
- tr->lb_index = lbi0;
- tr->hash = hc0;
- }
- if (PREDICT_FALSE(p1->flags & VLIB_BUFFER_IS_TRACED))
- {
- mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node,
- p1, sizeof (*tr));
- tr->next_index = next1;
- tr->lb_index = lbi1;
- tr->hash = hc1;
- }
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next,
- to_next, n_left_to_next,
- pi0, pi1, next0, next1);
+ hc0 = hc1 = 0;
+
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ mpls_compute_flow_hash (mpls0, lb0->lb_hash_config);
+ }
+ dpo0 = load_balance_get_fwd_bucket (
+ lb0, (hc0 & lb0->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+ if (PREDICT_FALSE (lb1->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p1)->ip.flow_hash))
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ vnet_buffer (p1)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc1 = vnet_buffer (p1)->ip.flow_hash =
+ mpls_compute_flow_hash (mpls1, lb1->lb_hash_config);
+ }
+ dpo1 = load_balance_get_fwd_bucket (
+ lb1, (hc1 & lb1->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo1 = load_balance_get_bucket_i (lb1, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ next1 = dpo1->dpoi_next_node;
+
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+ vnet_buffer (p1)->ip.adj_index[VLIB_TX] = dpo1->dpoi_index;
+
+ vlib_increment_combined_counter (
+ cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+ vlib_increment_combined_counter (
+ cm, thread_index, lbi1, 1, vlib_buffer_length_in_chain (vm, p1));
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_load_balance_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->lb_index = lbi0;
+ tr->hash = hc0;
+ }
+ if (PREDICT_FALSE (p1->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_load_balance_trace_t *tr =
+ vlib_add_trace (vm, node, p1, sizeof (*tr));
+ tr->next_index = next1;
+ tr->lb_index = lbi1;
+ tr->hash = hc1;
+ }
+
+ vlib_validate_buffer_enqueue_x2 (
+ vm, node, next, to_next, n_left_to_next, pi0, pi1, next0, next1);
}
while (n_left_from > 0 && n_left_to_next > 0)
@@ -651,44 +648,45 @@ VLIB_NODE_FN (mpls_load_balance_node) (vlib_main_t * vm,
lb0 = load_balance_get(lbi0);
- hc0 = vnet_buffer (p0)->ip.flow_hash = 0;
- if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
- {
- if (PREDICT_TRUE (vnet_buffer(p0)->ip.flow_hash))
- {
- hc0 = vnet_buffer(p0)->ip.flow_hash = vnet_buffer(p0)->ip.flow_hash >> 1;
- }
- else
- {
- hc0 = vnet_buffer(p0)->ip.flow_hash = mpls_compute_flow_hash(mpls0, hc0);
- }
- dpo0 = load_balance_get_fwd_bucket(lb0, (hc0 & lb0->lb_n_buckets_minus_1));
- }
- else
- {
- dpo0 = load_balance_get_bucket_i (lb0, 0);
- }
-
- next0 = dpo0->dpoi_next_node;
- vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
-
- if (PREDICT_FALSE(p0->flags & VLIB_BUFFER_IS_TRACED))
- {
- mpls_load_balance_trace_t *tr = vlib_add_trace (vm, node,
- p0, sizeof (*tr));
- tr->next_index = next0;
- tr->lb_index = lbi0;
- tr->hash = hc0;
- }
-
- vlib_increment_combined_counter
- (cm, thread_index, lbi0, 1,
- vlib_buffer_length_in_chain (vm, p0));
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next,
- to_next, n_left_to_next,
- pi0, next0);
- }
+ hc0 = 0;
+ if (PREDICT_FALSE (lb0->lb_n_buckets > 1))
+ {
+ if (PREDICT_TRUE (vnet_buffer (p0)->ip.flow_hash))
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ vnet_buffer (p0)->ip.flow_hash >> 1;
+ }
+ else
+ {
+ hc0 = vnet_buffer (p0)->ip.flow_hash =
+ mpls_compute_flow_hash (mpls0, lb0->lb_hash_config);
+ }
+ dpo0 = load_balance_get_fwd_bucket (
+ lb0, (hc0 & lb0->lb_n_buckets_minus_1));
+ }
+ else
+ {
+ dpo0 = load_balance_get_bucket_i (lb0, 0);
+ }
+
+ next0 = dpo0->dpoi_next_node;
+ vnet_buffer (p0)->ip.adj_index[VLIB_TX] = dpo0->dpoi_index;
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_load_balance_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->next_index = next0;
+ tr->lb_index = lbi0;
+ tr->hash = hc0;
+ }
+
+ vlib_increment_combined_counter (
+ cm, thread_index, lbi0, 1, vlib_buffer_length_in_chain (vm, p0));
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next, to_next,
+ n_left_to_next, pi0, next0);
+ }
vlib_put_next_frame (vm, node, next, n_left_to_next);
}
diff --git a/src/vnet/mpls/mpls_output.c b/src/vnet/mpls/mpls_output.c
index a1d2d3baa88..9c1d7316db7 100644
--- a/src/vnet/mpls/mpls_output.c
+++ b/src/vnet/mpls/mpls_output.c
@@ -20,6 +20,7 @@
#include <vnet/mpls/mpls.h>
#include <vnet/ip/ip_frag.h>
#include <vnet/adj/adj_dp.h>
+#include <vnet/mpls/mpls.api_enum.h>
typedef struct {
/* Adjacency taken. */
@@ -317,12 +318,6 @@ mpls_output_inline (vlib_main_t * vm,
return from_frame->n_vectors;
}
-static char * mpls_error_strings[] = {
-#define mpls_error(n,s) s,
-#include "error.def"
-#undef mpls_error
-};
-
VLIB_NODE_FN (mpls_output_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * from_frame)
@@ -335,7 +330,7 @@ VLIB_REGISTER_NODE (mpls_output_node) = {
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = MPLS_N_ERROR,
- .error_strings = mpls_error_strings,
+ .error_counters = mpls_error_counters,
.n_next_nodes = MPLS_OUTPUT_N_NEXT,
.next_nodes = {
@@ -357,18 +352,12 @@ VLIB_REGISTER_NODE (mpls_midchain_node) = {
.vector_size = sizeof (u32),
.n_errors = MPLS_N_ERROR,
- .error_strings = mpls_error_strings,
+ .error_counters = mpls_error_counters,
.sibling_of = "mpls-output",
.format_trace = format_mpls_output_trace,
};
-static char *mpls_frag_error_strings[] = {
-#define _(sym,string) string,
- foreach_ip_frag_error
-#undef _
-};
-
typedef struct mpls_frag_trace_t_
{
u16 pkt_size;
@@ -377,11 +366,12 @@ typedef struct mpls_frag_trace_t_
typedef enum
{
- MPLS_FRAG_NEXT_REWRITE,
- MPLS_FRAG_NEXT_REWRITE_MIDCHAIN,
- MPLS_FRAG_NEXT_ICMP_ERROR,
- MPLS_FRAG_NEXT_DROP,
- MPLS_FRAG_N_NEXT,
+ MPLS_FRAG_NEXT_REWRITE,
+ MPLS_FRAG_NEXT_REWRITE_MIDCHAIN,
+ MPLS_FRAG_NEXT_ICMP4_ERROR,
+ MPLS_FRAG_NEXT_ICMP6_ERROR,
+ MPLS_FRAG_NEXT_DROP,
+ MPLS_FRAG_N_NEXT,
} mpls_frag_next_t;
static uword
@@ -390,9 +380,7 @@ mpls_frag (vlib_main_t * vm,
vlib_frame_t * frame)
{
u32 n_left_from, next_index, * from, * to_next, n_left_to_next, *frags;
- vlib_node_runtime_t * error_node;
- error_node = vlib_node_get_runtime (vm, mpls_output_node.index);
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
@@ -410,91 +398,111 @@ mpls_frag (vlib_main_t * vm,
mpls_frag_next_t next0;
u32 pi0, adj_index0;
ip_frag_error_t error0 = IP_FRAG_ERROR_NONE;
- i16 encap_size;
- u8 is_ip4;
-
- pi0 = to_next[0] = from[0];
- p0 = vlib_get_buffer (vm, pi0);
- from += 1;
- n_left_from -= 1;
- is_ip4 = vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4;
-
- adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
- adj0 = adj_get(adj_index0);
-
- /* the size of the MPLS stack */
- encap_size = vnet_buffer(p0)->l3_hdr_offset - p0->current_data;
-
- /* IP fragmentation */
- if (is_ip4)
- error0 = ip4_frag_do_fragment (vm, pi0,
- adj0->rewrite_header.max_l3_packet_bytes,
- encap_size, &frags);
- else
- error0 = ip6_frag_do_fragment (vm, pi0,
- adj0->rewrite_header.max_l3_packet_bytes,
- encap_size, &frags);
-
- if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
- {
- mpls_frag_trace_t *tr =
- vlib_add_trace (vm, node, p0, sizeof (*tr));
- tr->mtu = adj0->rewrite_header.max_l3_packet_bytes;
- tr->pkt_size = vlib_buffer_length_in_chain(vm, p0);
- }
-
- if (PREDICT_TRUE(error0 == IP_FRAG_ERROR_NONE))
- {
- /* Free original buffer chain */
- vlib_buffer_free_one (vm, pi0); /* Free original packet */
- next0 = (IP_LOOKUP_NEXT_MIDCHAIN == adj0->lookup_next_index ?
- MPLS_FRAG_NEXT_REWRITE_MIDCHAIN :
- MPLS_FRAG_NEXT_REWRITE);
- }
- else if (is_ip4 && error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
- {
- icmp4_error_set_vnet_buffer (
- p0, ICMP4_destination_unreachable,
- ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
- vnet_buffer (p0)->ip_frag.mtu);
- next0 = MPLS_FRAG_NEXT_ICMP_ERROR;
- }
- else
- {
- vlib_error_count (vm, mpls_output_node.index, error0, 1);
- vec_add1 (frags, pi0); /* Get rid of the original buffer */
- next0 = MPLS_FRAG_NEXT_DROP;
- }
-
- /* Send fragments that were added in the frame */
- u32 *frag_from, frag_left;
-
- frag_from = frags;
- frag_left = vec_len (frags);
-
- while (frag_left > 0)
- {
- while (frag_left > 0 && n_left_to_next > 0)
- {
- u32 i;
- i = to_next[0] = frag_from[0];
- frag_from += 1;
- frag_left -= 1;
- to_next += 1;
- n_left_to_next -= 1;
-
- p0 = vlib_get_buffer (vm, i);
- p0->error = error_node->errors[error0];
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next, i,
- next0);
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- vlib_get_next_frame (vm, node, next_index, to_next,
- n_left_to_next);
- }
- vec_reset_length (frags);
+ i16 encap_size, mtu;
+ u8 is_ip4;
+
+ pi0 = to_next[0] = from[0];
+ p0 = vlib_get_buffer (vm, pi0);
+ from += 1;
+ n_left_from -= 1;
+ is_ip4 = vnet_buffer (p0)->mpls.pyld_proto == DPO_PROTO_IP4;
+
+ adj_index0 = vnet_buffer (p0)->ip.adj_index[VLIB_TX];
+ adj0 = adj_get (adj_index0);
+
+ /* the size of the MPLS stack */
+ encap_size = vnet_buffer (p0)->l3_hdr_offset - p0->current_data;
+ mtu = adj0->rewrite_header.max_l3_packet_bytes - encap_size;
+
+ /* IP fragmentation */
+ if (is_ip4)
+ error0 = ip4_frag_do_fragment (vm, pi0, mtu, encap_size, &frags);
+ else
+ {
+ if (!(p0->flags & VNET_BUFFER_F_LOCALLY_ORIGINATED))
+ {
+ /* only fragment locally generated IPv6 */
+ error0 = IP_FRAG_ERROR_DONT_FRAGMENT_SET;
+ }
+ else
+ {
+ error0 =
+ ip6_frag_do_fragment (vm, pi0, mtu, encap_size, &frags);
+ }
+ }
+
+ if (PREDICT_FALSE (p0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ mpls_frag_trace_t *tr =
+ vlib_add_trace (vm, node, p0, sizeof (*tr));
+ tr->mtu = mtu;
+ tr->pkt_size = vlib_buffer_length_in_chain (vm, p0);
+ }
+
+ if (PREDICT_TRUE (error0 == IP_FRAG_ERROR_NONE))
+ {
+ /* Free original buffer chain */
+ vlib_buffer_free_one (vm, pi0);
+ next0 = (IP_LOOKUP_NEXT_MIDCHAIN == adj0->lookup_next_index ?
+ MPLS_FRAG_NEXT_REWRITE_MIDCHAIN :
+ MPLS_FRAG_NEXT_REWRITE);
+ }
+ else
+ {
+ vlib_error_count (vm, node->node_index, error0, 1);
+
+ if (error0 == IP_FRAG_ERROR_DONT_FRAGMENT_SET)
+ {
+ vlib_buffer_advance (p0, encap_size);
+ if (is_ip4)
+ {
+ icmp4_error_set_vnet_buffer (
+ p0, ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_fragmentation_needed_and_dont_fragment_set,
+ mtu);
+ next0 = MPLS_FRAG_NEXT_ICMP4_ERROR;
+ }
+ else
+ {
+ icmp6_error_set_vnet_buffer (p0, ICMP6_packet_too_big,
+ 0, mtu);
+ next0 = MPLS_FRAG_NEXT_ICMP6_ERROR;
+ }
+ }
+ else
+ {
+ next0 = MPLS_FRAG_NEXT_DROP;
+ }
+
+ /* Get rid of the original buffer */
+ vec_add1 (frags, pi0);
+ }
+
+ /* Send fragments that were added in the frame */
+ u32 *frag_from, frag_left;
+
+ frag_from = frags;
+ frag_left = vec_len (frags);
+
+ while (frag_left > 0)
+ {
+ while (frag_left > 0 && n_left_to_next > 0)
+ {
+ u32 i;
+ i = to_next[0] = frag_from[0];
+ frag_from += 1;
+ frag_left -= 1;
+ to_next += 1;
+ n_left_to_next -= 1;
+
+ vlib_validate_buffer_enqueue_x1 (
+ vm, node, next_index, to_next, n_left_to_next, i, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ vlib_get_next_frame (vm, node, next_index, to_next,
+ n_left_to_next);
+ }
+ vec_reset_length (frags);
}
vlib_put_next_frame (vm, node, next_index, n_left_to_next);
}
@@ -515,22 +523,21 @@ format_mpls_frag_trace (u8 * s, va_list * args)
}
VLIB_REGISTER_NODE (mpls_frag_node) = {
- .function = mpls_frag,
- .name = "mpls-frag",
- .vector_size = sizeof (u32),
- .format_trace = format_mpls_frag_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = IP_FRAG_N_ERROR,
- .error_strings = mpls_frag_error_strings,
-
- .n_next_nodes = MPLS_FRAG_N_NEXT,
- .next_nodes = {
- [MPLS_FRAG_NEXT_REWRITE] = "mpls-output",
- [MPLS_FRAG_NEXT_REWRITE_MIDCHAIN] = "mpls-midchain",
- [MPLS_FRAG_NEXT_ICMP_ERROR] = "ip4-icmp-error",
- [MPLS_FRAG_NEXT_DROP] = "mpls-drop"
- },
+ .function = mpls_frag,
+ .name = "mpls-frag",
+ .vector_size = sizeof (u32),
+ .format_trace = format_mpls_frag_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+
+ .n_errors = IP_FRAG_N_ERROR,
+ .error_counters = ip_frag_error_counters,
+
+ .n_next_nodes = MPLS_FRAG_N_NEXT,
+ .next_nodes = { [MPLS_FRAG_NEXT_REWRITE] = "mpls-output",
+ [MPLS_FRAG_NEXT_REWRITE_MIDCHAIN] = "mpls-midchain",
+ [MPLS_FRAG_NEXT_ICMP4_ERROR] = "ip4-icmp-error",
+ [MPLS_FRAG_NEXT_ICMP6_ERROR] = "ip6-icmp-error",
+ [MPLS_FRAG_NEXT_DROP] = "mpls-drop" },
};
/*
@@ -649,7 +656,7 @@ VLIB_REGISTER_NODE (mpls_adj_incomplete_node) = {
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = MPLS_N_ERROR,
- .error_strings = mpls_error_strings,
+ .error_counters = mpls_error_counters,
.n_next_nodes = MPLS_ADJ_INCOMPLETE_N_NEXT,
.next_nodes = {
diff --git a/src/vnet/mpls/mpls_tunnel.c b/src/vnet/mpls/mpls_tunnel.c
index 54458eacdf8..b03a4a57f68 100644
--- a/src/vnet/mpls/mpls_tunnel.c
+++ b/src/vnet/mpls/mpls_tunnel.c
@@ -265,10 +265,8 @@ mpls_tunnel_collect_forwarding (fib_node_index_t pl_index,
* found a matching extension. stack it to obtain the forwarding
* info for this path.
*/
- ctx->next_hops = fib_path_ext_stack(path_ext,
- ctx->fct,
- ctx->fct,
- ctx->next_hops);
+ ctx->next_hops =
+ fib_path_ext_stack (path_ext, DPO_PROTO_MPLS, ctx->fct, ctx->next_hops);
return (FIB_PATH_LIST_WALK_CONTINUE);
}
@@ -638,6 +636,7 @@ vnet_mpls_tunnel_del (u32 sw_if_index)
mt->mt_sibling_index);
dpo_reset(&mt->mt_l2_lb);
+ vnet_reset_interface_l3_output_node (vlib_get_main (), mt->mt_sw_if_index);
vnet_delete_hw_interface (vnet_get_main(), mt->mt_hw_if_index);
pool_put(mpls_tunnel_pool, mt);
@@ -685,6 +684,9 @@ vnet_mpls_tunnel_create (u8 l2_only,
if (mt->mt_flags & MPLS_TUNNEL_FLAG_L2)
vnet_set_interface_output_node (vnm, mt->mt_hw_if_index,
mpls_tunnel_tx.index);
+ else
+ vnet_set_interface_l3_output_node (vnm->vlib_main, hi->sw_if_index,
+ (u8 *) "tunnel-output");
/* Standard default MPLS tunnel MTU. */
vnet_sw_interface_set_mtu (vnm, hi->sw_if_index, 9000);
@@ -930,7 +932,12 @@ done:
VLIB_CLI_COMMAND (create_mpls_tunnel_command, static) = {
.path = "mpls tunnel",
.short_help =
- "mpls tunnel [multicast] [l2-only] via [next-hop-address] [next-hop-interface] [next-hop-table <value>] [weight <value>] [preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table <value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] [resolve-via-host] [resolve-via-connected] [rx-ip4 <interface>] [out-labels <value value value>]",
+ "mpls tunnel [multicast] [l2-only] via [next-hop-address] "
+ "[next-hop-interface] [next-hop-table <value>] [weight <value>] "
+ "[preference <value>] [udp-encap-id <value>] [ip4-lookup-in-table "
+ "<value>] [ip6-lookup-in-table <value>] [mpls-lookup-in-table <value>] "
+ "[resolve-via-host] [resolve-via-connected] [rx-ip4|rx-ip6 <interface>] "
+ "[out-labels <value value value>]",
.function = vnet_create_mpls_tunnel_command_fn,
};
diff --git a/src/vnet/mtu.rst b/src/vnet/mtu.rst
new file mode 100644
index 00000000000..c7e92523c7f
--- /dev/null
+++ b/src/vnet/mtu.rst
@@ -0,0 +1,108 @@
+.. _mtu_doc:
+
+MTU in VPP
+==========
+
+Maximum Transmission Unit is a term used to describe the maximum sized
+“thingy” that can be sent out an interface. It can refer to the maximum
+frame size that a NIC can send. On Ethernet that would include the
+Ethernet header but typically not the IGF. It can refer to the maximum
+packet size, that is, on Ethernet an MTU of 1500, would allow an IPv4
+packet of 1500 bytes, that would result in an Ethernet frame of 1518
+bytes.
+
+
+VPP allows setting of the physical payload MTU. I.e. not including L2
+overhead. Setting the hardware MTU will program the NIC. This MTU will
+be inherited by all software interfaces.
+
+VPP also allows setting of the payload MTU for software interfaces.
+Independently of the MTU set on the hardware. If the software payload
+MTU is set higher than the capability of the NIC, the packet will be
+dropped.
+
+In addition VPP supports setting the MTU of individual network layer
+protocols. IPv4, IPv6 or MPLS. For example an IPv4 MTU of 1500 (includes
+the IPv4 header) will fit in a hardware payload MTU of 1500.
+
+*Note we might consider changing the hardware payload MTU to hardware
+MTU*. That is, the MTU includes all L2 framing. Then the payload MTU can
+be calculated based on the interface’s configuration. E.g. 802.1q tags
+etc.
+
+There are currently no checks or warnings if e.g. the user configures a
+per-protocol MTU larger than the underlying payload MTU. If that happens
+packets will be fragmented or dropped.
+
+Data structures
+^^^^^^^^^^^^^^^
+
+The hardware payload MTU is stored in the max_packet_bytes variable in
+the vnet_hw_interface_t structure.
+
+The software MTU (previously max_l3_packet_bytes) is in
+vnet_sw_interface_t->in mtu[VNET_N_MTU].
+
+MTU API
+-------
+
+Set physical MTU
+^^^^^^^^^^^^^^^^
+
+This API message is used to set the physical MTU. It is currently
+limited to Ethernet interfaces. Note, this programs the NIC.
+
+::
+
+ autoreply define hw_interface_set_mtu
+ {
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ u16 mtu;
+ };
+
+Set the L2 payload MTU
+^^^^^^^^^^^^^^^^^^^^^^
+
+:: note
+ (not including the L2 header) and per-protocol MTUs
+
+This API message sets the L3 payload MTU. E.g. on Ethernet it is the
+maximum size of the Ethernet payload. If a value is left as 0, then the
+default is picked from VNET_MTU_L3.
+
+::
+
+ autoreply define sw_interface_set_mtu
+ {
+ u32 client_index;
+ u32 context;
+ u32 sw_if_index;
+ /* $$$$ Replace with enum */
+ u32 mtu[4]; /* 0 - L3, 1 - IP4, 2 - IP6, 3 - MPLS */
+ };
+
+Get interface MTU
+^^^^^^^^^^^^^^^^^
+
+The various MTUs on an interface can be queried with the
+sw_interface_dump/sw_interface_details calls.
+
+::
+
+ define sw_interface_details
+ {
+ /* MTU */
+ u16 link_mtu;
+
+ /* Per protocol MTUs */
+ u32 mtu[4]; /* 0 - L3, 1 - IP4, 2 - IP6, 3 - MPLS */
+ };
+
+MTU CLI
+-------
+
+::
+
+ set interface mtu [packet|ip4|ip6|mpls] <value> <interface>
diff --git a/src/vnet/osi/node.c b/src/vnet/osi/node.c
index 4eb3e461139..9edc354cda7 100644
--- a/src/vnet/osi/node.c
+++ b/src/vnet/osi/node.c
@@ -239,7 +239,6 @@ static char *osi_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (osi_input_node) = {
.function = osi_input,
.name = "osi-input",
@@ -260,7 +259,6 @@ VLIB_REGISTER_NODE (osi_input_node) = {
.format_trace = format_osi_input_trace,
.unformat_buffer = unformat_osi_header,
};
-/* *INDENT-ON* */
static void
osi_setup_node (vlib_main_t *vm, u32 node_index)
diff --git a/src/vnet/pg/cli.c b/src/vnet/pg/cli.c
index e57e72573f3..3f2de2604b2 100644
--- a/src/vnet/pg/cli.c
+++ b/src/vnet/pg/cli.c
@@ -47,12 +47,10 @@
/* Root of all packet generator cli commands. */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_pg_command, static) = {
.path = "packet-generator",
.short_help = "Packet generator commands",
};
-/* *INDENT-ON* */
void
pg_enable_disable (u32 stream_index, int is_enable)
@@ -63,11 +61,9 @@ pg_enable_disable (u32 stream_index, int is_enable)
if (stream_index == ~0)
{
/* No stream specified: enable/disable all streams. */
- /* *INDENT-OFF* */
pool_foreach (s, pg->streams) {
pg_stream_enable_disable (pg, s, is_enable);
}
- /* *INDENT-ON* */
}
else
{
@@ -138,23 +134,19 @@ doit:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (enable_streams_cli, static) = {
.path = "packet-generator enable-stream",
.short_help = "Enable packet generator streams",
.function = enable_disable_stream,
.function_arg = 1, /* is_enable */
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (disable_streams_cli, static) = {
.path = "packet-generator disable-stream",
.short_help = "Disable packet generator streams",
.function = enable_disable_stream,
.function_arg = 0, /* is_enable */
};
-/* *INDENT-ON* */
static u8 *
format_pg_edit_group (u8 * s, va_list * va)
@@ -210,12 +202,10 @@ format_pg_stream (u8 * s, va_list * va)
if (verbose)
{
pg_edit_group_t *g;
- /* *INDENT-OFF* */
vec_foreach (g, t->edit_groups)
{
s = format (s, "\n%U%U", format_white_space, indent, format_pg_edit_group, g);
}
- /* *INDENT-ON* */
}
return s;
@@ -244,23 +234,19 @@ show_streams (vlib_main_t * vm,
}
vlib_cli_output (vm, "%U", format_pg_stream, 0, 0);
- /* *INDENT-OFF* */
pool_foreach (s, pg->streams) {
vlib_cli_output (vm, "%U", format_pg_stream, s, verbose);
}
- /* *INDENT-ON* */
done:
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_streams_cli, static) = {
.path = "show packet-generator ",
.short_help = "show packet-generator [verbose]",
.function = show_streams,
};
-/* *INDENT-ON* */
static clib_error_t *
pg_pcap_read (pg_stream_t * s, char *file_name)
@@ -446,8 +432,6 @@ new_stream (vlib_main_t * vm,
{
vlib_node_t *n;
- ASSERT (s.if_id != ~0);
-
if (s.if_id != ~0)
n = vlib_get_node_by_name (vm, (u8 *) pg_interface_get_input_node (
&pg->interfaces[s.if_id]));
@@ -507,7 +491,6 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (new_stream_cli, static) = {
.path = "packet-generator new",
.function = new_stream,
@@ -525,7 +508,6 @@ VLIB_CLI_COMMAND (new_stream_cli, static) = {
"rate PPS rate to transfer packet data\n"
"maxframe NPKTS maximum number of packets per frame\n",
};
-/* *INDENT-ON* */
static clib_error_t *
del_stream (vlib_main_t * vm,
@@ -543,13 +525,11 @@ del_stream (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (del_stream_cli, static) = {
.path = "packet-generator delete",
.function = del_stream,
.short_help = "Delete stream with given name",
};
-/* *INDENT-ON* */
static clib_error_t *
change_stream_parameters (vlib_main_t * vm,
@@ -590,13 +570,11 @@ change_stream_parameters (vlib_main_t * vm,
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (change_stream_parameters_cli, static) = {
.path = "packet-generator configure",
.short_help = "Change packet generator stream parameters",
.function = change_stream_parameters,
};
-/* *INDENT-ON* */
static clib_error_t *
pg_capture_cmd_fn (vlib_main_t * vm,
@@ -673,13 +651,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (pg_capture_cmd, static) = {
.path = "packet-generator capture",
.short_help = "packet-generator capture <interface name> pcap <filename> [count <n>]",
.function = pg_capture_cmd_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
create_pg_if_cmd_fn (vlib_main_t * vm,
@@ -687,8 +663,9 @@ create_pg_if_cmd_fn (vlib_main_t * vm,
{
pg_main_t *pg = &pg_main;
unformat_input_t _line_input, *line_input = &_line_input;
- u32 if_id, gso_enabled = 0, gso_size = 0, coalesce_enabled = 0;
+ u32 if_id = ~0, gso_enabled = 0, gso_size = 0, coalesce_enabled = 0;
clib_error_t *error = NULL;
+ pg_interface_mode_t mode = PG_MODE_ETHERNET;
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -710,6 +687,10 @@ create_pg_if_cmd_fn (vlib_main_t * vm,
goto done;
}
}
+ else if (unformat (line_input, "mode ip4"))
+ mode = PG_MODE_IP4;
+ else if (unformat (line_input, "mode ip6"))
+ mode = PG_MODE_IP6;
else
{
error = clib_error_create ("unknown input `%U'",
@@ -719,7 +700,7 @@ create_pg_if_cmd_fn (vlib_main_t * vm,
}
pg_interface_add_or_get (pg, if_id, gso_enabled, gso_size, coalesce_enabled,
- PG_MODE_ETHERNET);
+ mode);
done:
unformat_free (line_input);
@@ -727,14 +708,13 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_pg_if_cmd, static) = {
.path = "create packet-generator",
.short_help = "create packet-generator interface <interface name>"
- " [gso-enabled gso-size <size> [coalesce-enabled]]",
+ " [gso-enabled gso-size <size> [coalesce-enabled]]"
+ " [mode <ethernet | ip4 | ip6>]",
.function = create_pg_if_cmd_fn,
};
-/* *INDENT-ON* */
/* Dummy init function so that we can be linked in. */
static clib_error_t *
diff --git a/src/vnet/pg/example.script b/src/vnet/pg/example.script
index 0e29b9ecae6..662088657bf 100644
--- a/src/vnet/pg/example.script
+++ b/src/vnet/pg/example.script
@@ -1,6 +1,6 @@
-packet-generator new {
- name x
- limit 1
- node ethernet-input
- data { IP: 1.2.3 -> 4.5.6 incrementing 100 }
+packet-generator new { \
+ name x \
+ limit 1 \
+ node ethernet-input \
+ data { IP: 1.2.3 -> 4.5.6 incrementing 100 } \
}
diff --git a/src/vnet/pg/input.c b/src/vnet/pg/input.c
index 98db46abd6a..321472c4d85 100644
--- a/src/vnet/pg/input.c
+++ b/src/vnet/pg/input.c
@@ -78,7 +78,7 @@ validate_buffer_data2 (vlib_buffer_t * b, pg_stream_t * s,
if (i >= n_bytes)
return 1;
- clib_warning ("buffer %U", format_vnet_buffer, b);
+ clib_warning ("buffer %U", format_vnet_buffer_no_chain, b);
clib_warning ("differ at index %d", i);
clib_warning ("is %U", format_hex_bytes, bd, n_bytes);
clib_warning ("mask %U", format_hex_bytes, pm, n_bytes);
@@ -965,7 +965,7 @@ pg_generate_fix_multi_buffer_lengths (pg_main_t * pg,
if (vec_len (unused_buffers) > 0)
{
vlib_buffer_free_no_next (vm, unused_buffers, vec_len (unused_buffers));
- _vec_len (unused_buffers) = 0;
+ vec_set_len (unused_buffers, 0);
}
}
@@ -1435,8 +1435,8 @@ format_pg_input_trace (u8 * s, va_list * va)
s = format (s, ", %d bytes", t->packet_length);
s = format (s, ", sw_if_index %d", t->sw_if_index);
- s = format (s, "\n%U%U",
- format_white_space, indent, format_vnet_buffer, &t->buffer);
+ s = format (s, "\n%U%U", format_white_space, indent,
+ format_vnet_buffer_no_chain, &t->buffer);
s = format (s, "\n%U", format_white_space, indent);
@@ -1578,7 +1578,7 @@ fill_buffer_offload_flags (vlib_main_t *vm, u32 *buffers, u32 n_buffers,
(VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L2_HDR_OFFSET_VALID |
VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
VNET_BUFFER_F_L4_HDR_OFFSET_VALID);
- if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM)
+ if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_IP_CKSUM || gso_enabled)
oflags |= VNET_BUFFER_OFFLOAD_F_IP_CKSUM;
}
else if (PREDICT_TRUE (ethertype == ETHERNET_TYPE_IP6))
@@ -1596,7 +1596,7 @@ fill_buffer_offload_flags (vlib_main_t *vm, u32 *buffers, u32 n_buffers,
if (l4_proto == IP_PROTOCOL_TCP)
{
- if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM)
+ if (buffer_oflags & VNET_BUFFER_OFFLOAD_F_TCP_CKSUM || gso_enabled)
oflags |= VNET_BUFFER_OFFLOAD_F_TCP_CKSUM;
/* only set GSO flag for chained buffers */
@@ -1639,8 +1639,8 @@ pg_generate_packets (vlib_node_runtime_t * node,
pg_interface_t *pi;
int i;
- pi = pool_elt_at_index (pg->interfaces,
- pg->if_id_by_sw_if_index[s->sw_if_index[VLIB_RX]]);
+ pi = pool_elt_at_index (
+ pg->interfaces, pg->if_index_by_sw_if_index[s->sw_if_index[VLIB_RX]]);
bi0 = s->buffer_indices;
n_packets_in_fifo = pg_stream_fill (pg, s, n_packets_to_generate);
@@ -1657,7 +1657,11 @@ pg_generate_packets (vlib_node_runtime_t * node,
}
if (PREDICT_FALSE (pi->coalesce_enabled))
- vnet_gro_flow_table_schedule_node_on_dispatcher (vm, pi->flow_table);
+ {
+ vnet_hw_if_tx_queue_t txq = { 0 };
+ vnet_gro_flow_table_schedule_node_on_dispatcher (vm, &txq,
+ pi->flow_table);
+ }
while (n_packets_to_generate > 0)
{
@@ -1812,17 +1816,14 @@ pg_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
if (vlib_num_workers ())
worker_index = vlib_get_current_worker_index ();
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, pg->enabled_streams[worker_index]) {
pg_stream_t *s = vec_elt_at_index (pg->streams, i);
n_packets += pg_input_stream (node, pg, s);
}
- /* *INDENT-ON* */
return n_packets;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (pg_input_node) = {
.function = pg_input,
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
@@ -1835,7 +1836,6 @@ VLIB_REGISTER_NODE (pg_input_node) = {
/* Input node will be left disabled until a stream is active. */
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (pg_input_mac_filter) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -1860,9 +1860,9 @@ VLIB_NODE_FN (pg_input_mac_filter) (vlib_main_t * vm,
pg_interface_t *pi;
mac_address_t in;
- pi = pool_elt_at_index
- (pg->interfaces,
- pg->if_id_by_sw_if_index[vnet_buffer (b[0])->sw_if_index[VLIB_RX]]);
+ pi = pool_elt_at_index (
+ pg->interfaces,
+ pg->if_index_by_sw_if_index[vnet_buffer (b[0])->sw_if_index[VLIB_RX]]);
eth = vlib_buffer_get_current (b[0]);
mac_address_from_bytes (&in, eth->dst_address);
@@ -1894,7 +1894,6 @@ VLIB_NODE_FN (pg_input_mac_filter) (vlib_main_t * vm,
return (frame->n_vectors);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (pg_input_mac_filter) = {
.name = "pg-input-mac-filter",
.vector_size = sizeof (u32),
@@ -1908,7 +1907,6 @@ VNET_FEATURE_INIT (pg_input_mac_filter_feat, static) = {
.arc_name = "device-input",
.node_name = "pg-input-mac-filter",
};
-/* *INDENT-ON* */
static clib_error_t *
pg_input_mac_filter_cfg (vlib_main_t * vm,
@@ -1946,13 +1944,11 @@ pg_input_mac_filter_cfg (vlib_main_t * vm,
return NULL;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (enable_streams_cli, static) = {
.path = "packet-generator mac-filter",
.short_help = "packet-generator mac-filter <INTERFACE> <on|off>",
.function = pg_input_mac_filter_cfg,
};
-/* *INDENT-ON* */
/*
diff --git a/src/vnet/pg/pg.api b/src/vnet/pg/pg.api
index 3630e0c2f0d..4f531fb1f5e 100644
--- a/src/vnet/pg/pg.api
+++ b/src/vnet/pg/pg.api
@@ -38,6 +38,8 @@ enum pg_interface_mode : u8
*/
define pg_create_interface
{
+ option deprecated;
+
u32 client_index;
u32 context;
vl_api_interface_index_t interface_id;
@@ -60,6 +62,8 @@ define pg_create_interface_v2
*/
define pg_create_interface_reply
{
+ option deprecated;
+
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
diff --git a/src/vnet/pg/pg.h b/src/vnet/pg/pg.h
index 963d23a8e01..6d5b25ba25a 100644
--- a/src/vnet/pg/pg.h
+++ b/src/vnet/pg/pg.h
@@ -296,7 +296,7 @@ pg_free_edit_group (pg_stream_t * s)
pg_edit_group_free (g);
clib_memset (g, 0, sizeof (g[0]));
- _vec_len (s->edit_groups) = i;
+ vec_set_len (s->edit_groups, i);
}
typedef enum pg_interface_mode_t_
@@ -349,7 +349,7 @@ typedef struct pg_main_t
/* Pool of interfaces. */
pg_interface_t *interfaces;
uword *if_index_by_if_id;
- uword *if_id_by_sw_if_index;
+ uword *if_index_by_sw_if_index;
/* Vector of buffer indices for use in pg_stream_fill_replay, per thread */
u32 **replay_buffers_by_thread;
@@ -383,7 +383,7 @@ void pg_interface_enable_disable_coalesce (pg_interface_t * pi, u8 enable,
u32 tx_node_index);
/* Find/create free packet-generator interface index. */
-u32 pg_interface_add_or_get (pg_main_t *pg, uword stream_index, u8 gso_enabled,
+u32 pg_interface_add_or_get (pg_main_t *pg, u32 stream_index, u8 gso_enabled,
u32 gso_size, u8 coalesce_enabled,
pg_interface_mode_t mode);
diff --git a/src/vnet/pg/pg_api.c b/src/vnet/pg/pg_api.c
index 468c88ee8bb..e5d0a08a527 100644
--- a/src/vnet/pg/pg_api.c
+++ b/src/vnet/pg/pg_api.c
@@ -40,12 +40,10 @@ vl_api_pg_create_interface_t_handler (vl_api_pg_create_interface_t * mp)
ntohl (mp->gso_size), 0, PG_MODE_ETHERNET);
pg_interface_t *pi = pool_elt_at_index (pg->interfaces, pg_if_id);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_PG_CREATE_INTERFACE_REPLY,
({
rmp->sw_if_index = ntohl(pi->sw_if_index);
}));
- /* *INDENT-ON* */
}
static void
diff --git a/src/vnet/pg/stream.c b/src/vnet/pg/stream.c
index 686627b8d9a..cf3d37d5e9e 100644
--- a/src/vnet/pg/stream.c
+++ b/src/vnet/pg/stream.c
@@ -102,9 +102,8 @@ format_pg_output_trace (u8 * s, va_list * va)
pg_output_trace_t *t = va_arg (*va, pg_output_trace_t *);
u32 indent = format_get_indent (s);
- s = format (s, "%Ubuffer 0x%x: %U",
- format_white_space, indent,
- t->buffer_index, format_vnet_buffer, &t->buffer);
+ s = format (s, "%Ubuffer 0x%x: %U", format_white_space, indent,
+ t->buffer_index, format_vnet_buffer_no_chain, &t->buffer);
s = format (s, "\n%U%U", format_white_space, indent,
format_ethernet_header_with_length, t->buffer.pre_data,
@@ -172,7 +171,6 @@ pg_add_del_mac_address (vnet_hw_interface_t * hi,
return (NULL);
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (pg_dev_class) = {
.name = "pg",
.tx_function = pg_output,
@@ -181,7 +179,6 @@ VNET_DEVICE_CLASS (pg_dev_class) = {
.admin_up_down_function = pg_interface_admin_up_down,
.mac_addr_add_del_function = pg_add_del_mac_address,
};
-/* *INDENT-ON* */
static u8 *
pg_build_rewrite (vnet_main_t * vnm,
@@ -198,12 +195,10 @@ pg_build_rewrite (vnet_main_t * vnm,
return (rewrite);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (pg_interface_class,static) = {
.name = "Packet generator",
.build_rewrite = pg_build_rewrite,
};
-/* *INDENT-ON* */
static u32
pg_eth_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
@@ -246,10 +241,11 @@ VNET_HW_INTERFACE_CLASS (pg_tun_hw_interface_class) = {
.build_rewrite = NULL,
//.update_adjacency = gre_update_adj,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
+ .tx_hash_fn_type = VNET_HASH_FN_TYPE_IP,
};
u32
-pg_interface_add_or_get (pg_main_t *pg, uword if_id, u8 gso_enabled,
+pg_interface_add_or_get (pg_main_t *pg, u32 if_id, u8 gso_enabled,
u32 gso_size, u8 coalesce_enabled,
pg_interface_mode_t mode)
{
@@ -268,6 +264,7 @@ pg_interface_add_or_get (pg_main_t *pg, uword if_id, u8 gso_enabled,
}
else
{
+ vnet_eth_interface_registration_t eir = {};
u8 hw_addr[6];
f64 now = vlib_time_now (vm);
u32 rnd;
@@ -287,8 +284,11 @@ pg_interface_add_or_get (pg_main_t *pg, uword if_id, u8 gso_enabled,
switch (pi->mode)
{
case PG_MODE_ETHERNET:
- ethernet_register_interface (vnm, pg_dev_class.index, i, hw_addr,
- &pi->hw_if_index, pg_eth_flag_change);
+ eir.dev_class_index = pg_dev_class.index;
+ eir.dev_instance = i;
+ eir.address = hw_addr;
+ eir.cb.flag_change = pg_eth_flag_change;
+ pi->hw_if_index = vnet_eth_register_interface (vnm, &eir);
break;
case PG_MODE_IP4:
case PG_MODE_IP6:
@@ -299,7 +299,7 @@ pg_interface_add_or_get (pg_main_t *pg, uword if_id, u8 gso_enabled,
hi = vnet_get_hw_interface (vnm, pi->hw_if_index);
if (gso_enabled)
{
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO;
+ vnet_hw_if_set_caps (vnm, pi->hw_if_index, VNET_HW_IF_CAP_TCP_GSO);
pi->gso_enabled = 1;
pi->gso_size = gso_size;
if (coalesce_enabled)
@@ -311,8 +311,8 @@ pg_interface_add_or_get (pg_main_t *pg, uword if_id, u8 gso_enabled,
hash_set (pg->if_index_by_if_id, if_id, i);
- vec_validate (pg->if_id_by_sw_if_index, hi->sw_if_index);
- pg->if_id_by_sw_if_index[hi->sw_if_index] = i;
+ vec_validate (pg->if_index_by_sw_if_index, hi->sw_if_index);
+ pg->if_index_by_sw_if_index[hi->sw_if_index] = i;
if (vlib_num_workers ())
{
@@ -556,6 +556,11 @@ pg_stream_add (pg_main_t * pg, pg_stream_t * s_init)
*/
s->sw_if_index[VLIB_RX] = pi->sw_if_index;
}
+ else if (vec_len (pg->if_index_by_sw_if_index) <= s->sw_if_index[VLIB_RX])
+ {
+ vec_validate (pg->if_index_by_sw_if_index, s->sw_if_index[VLIB_RX]);
+ pg->if_index_by_sw_if_index[s->sw_if_index[VLIB_RX]] = s->pg_if_index;
+ }
/* Connect the graph. */
s->next_index = vlib_node_add_next (vm, device_input_node.index,
diff --git a/src/vnet/policer/node_funcs.c b/src/vnet/policer/node_funcs.c
index 21b9393a222..2d2252d247a 100644
--- a/src/vnet/policer/node_funcs.c
+++ b/src/vnet/policer/node_funcs.c
@@ -68,7 +68,7 @@ static char *vnet_policer_error_strings[] = {
static inline uword
vnet_policer_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
- vlib_frame_t *frame)
+ vlib_frame_t *frame, vlib_dir_t dir)
{
u32 n_left_from, *from, *to_next;
vnet_policer_next_t next_index;
@@ -120,11 +120,11 @@ vnet_policer_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
b0 = vlib_get_buffer (vm, bi0);
b1 = vlib_get_buffer (vm, bi1);
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
- sw_if_index1 = vnet_buffer (b1)->sw_if_index[VLIB_RX];
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[dir];
+ sw_if_index1 = vnet_buffer (b1)->sw_if_index[dir];
- pi0 = pm->policer_index_by_sw_if_index[sw_if_index0];
- pi1 = pm->policer_index_by_sw_if_index[sw_if_index1];
+ pi0 = pm->policer_index_by_sw_if_index[dir][sw_if_index0];
+ pi1 = pm->policer_index_by_sw_if_index[dir][sw_if_index1];
act0 = vnet_policer_police (vm, b0, pi0, time_in_policer_periods,
POLICE_CONFORM /* no chaining */, true);
@@ -206,9 +206,8 @@ vnet_policer_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
b0 = vlib_get_buffer (vm, bi0);
- sw_if_index0 = vnet_buffer (b0)->sw_if_index[VLIB_RX];
-
- pi0 = pm->policer_index_by_sw_if_index[sw_if_index0];
+ sw_if_index0 = vnet_buffer (b0)->sw_if_index[dir];
+ pi0 = pm->policer_index_by_sw_if_index[dir][sw_if_index0];
act0 = vnet_policer_police (vm, b0, pi0, time_in_policer_periods,
POLICE_CONFORM /* no chaining */, true);
@@ -256,7 +255,7 @@ vnet_policer_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
VLIB_NODE_FN (policer_input_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return vnet_policer_inline (vm, node, frame);
+ return vnet_policer_inline (vm, node, frame, VLIB_RX);
}
VLIB_REGISTER_NODE (policer_input_node) = {
@@ -279,12 +278,43 @@ VNET_FEATURE_INIT (policer_input_node, static) = {
.runs_before = VNET_FEATURES ("ethernet-input"),
};
+VLIB_NODE_FN (policer_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return vnet_policer_inline (vm, node, frame, VLIB_TX);
+}
+
+VLIB_REGISTER_NODE (policer_output_node) = {
+ .name = "policer-output",
+ .vector_size = sizeof (u32),
+ .format_trace = format_policer_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(vnet_policer_error_strings),
+ .error_strings = vnet_policer_error_strings,
+ .n_next_nodes = VNET_POLICER_N_NEXT,
+ .next_nodes = {
+ [VNET_POLICER_NEXT_DROP] = "error-drop",
+ [VNET_POLICER_NEXT_HANDOFF] = "policer-output-handoff",
+ },
+};
+
+VNET_FEATURE_INIT (policer_output_node, static) = {
+ .arc_name = "ip4-output",
+ .node_name = "policer-output",
+};
+
+VNET_FEATURE_INIT (policer6_output_node, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "policer-output",
+};
+
static char *policer_input_handoff_error_strings[] = { "congestion drop" };
VLIB_NODE_FN (policer_input_handoff_node)
(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return policer_handoff (vm, node, frame, vnet_policer_main.fq_index, ~0);
+ return policer_handoff (vm, node, frame, vnet_policer_main.fq_index[VLIB_RX],
+ ~0);
}
VLIB_REGISTER_NODE (policer_input_handoff_node) = {
@@ -301,6 +331,26 @@ VLIB_REGISTER_NODE (policer_input_handoff_node) = {
},
};
+VLIB_NODE_FN (policer_output_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return policer_handoff (vm, node, frame, vnet_policer_main.fq_index[VLIB_TX],
+ ~0);
+}
+
+VLIB_REGISTER_NODE (policer_output_handoff_node) = {
+ .name = "policer-output-handoff",
+ .vector_size = sizeof (u32),
+ .format_trace = format_policer_handoff_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN(policer_input_handoff_error_strings),
+ .error_strings = policer_input_handoff_error_strings,
+
+ .n_next_nodes = 1,
+ .next_nodes = {
+ [0] = "error-drop",
+ },
+};
typedef struct
{
u32 sw_if_index;
@@ -477,7 +527,7 @@ policer_classify_inline (vlib_main_t * vm,
u32 table_index0;
vnet_classify_table_t *t0;
vnet_classify_entry_t *e0;
- u64 hash0;
+ u32 hash0;
u8 *h0;
u8 act0;
@@ -487,7 +537,7 @@ policer_classify_inline (vlib_main_t * vm,
vlib_buffer_t *p1 = vlib_get_buffer (vm, from[3]);
vnet_classify_table_t *tp1;
u32 table_index1;
- u64 phash1;
+ u32 phash1;
table_index1 = vnet_buffer (p1)->l2_classify.table_index;
@@ -620,7 +670,6 @@ VLIB_NODE_FN (ip4_policer_classify_node) (vlib_main_t * vm,
POLICER_CLASSIFY_TABLE_IP4);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_policer_classify_node) = {
.name = "ip4-policer-classify",
.vector_size = sizeof (u32),
@@ -632,7 +681,6 @@ VLIB_REGISTER_NODE (ip4_policer_classify_node) = {
[POLICER_CLASSIFY_NEXT_INDEX_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (ip6_policer_classify_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -642,7 +690,6 @@ VLIB_NODE_FN (ip6_policer_classify_node) (vlib_main_t * vm,
POLICER_CLASSIFY_TABLE_IP6);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_policer_classify_node) = {
.name = "ip6-policer-classify",
.vector_size = sizeof (u32),
@@ -654,7 +701,6 @@ VLIB_REGISTER_NODE (ip6_policer_classify_node) = {
[POLICER_CLASSIFY_NEXT_INDEX_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
VLIB_NODE_FN (l2_policer_classify_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -663,7 +709,6 @@ VLIB_NODE_FN (l2_policer_classify_node) (vlib_main_t * vm,
return policer_classify_inline (vm, node, frame, POLICER_CLASSIFY_TABLE_L2);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (l2_policer_classify_node) = {
.name = "l2-policer-classify",
.vector_size = sizeof (u32),
@@ -675,7 +720,6 @@ VLIB_REGISTER_NODE (l2_policer_classify_node) = {
[POLICER_CLASSIFY_NEXT_INDEX_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
static clib_error_t *
diff --git a/src/vnet/policer/police.h b/src/vnet/policer/police.h
index 5ad249ef40e..8f126e22175 100644
--- a/src/vnet/policer/police.h
+++ b/src/vnet/policer/police.h
@@ -73,8 +73,6 @@ typedef enum
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- u32 lock; // for exclusive access to the struct
-
u32 single_rate; // 1 = single rate policer, 0 = two rate policer
u32 color_aware; // for hierarchical policing
u32 scale; // power-of-2 shift amount for lower rates
@@ -93,11 +91,9 @@ typedef struct
u32 current_bucket; // MOD
u32 extended_limit;
u32 extended_bucket; // MOD
-
- u64 last_update_time; // MOD
u32 thread_index; // Tie policer to a thread, rather than lock
- u32 pad32;
-
+ u64 last_update_time; // MOD
+ u8 *name;
} policer_t;
STATIC_ASSERT_SIZEOF (policer_t, CLIB_CACHE_LINE_BYTES);
diff --git a/src/vnet/policer/police_inlines.h b/src/vnet/policer/police_inlines.h
index 6b0c0ecf725..08000b9a303 100644
--- a/src/vnet/policer/police_inlines.h
+++ b/src/vnet/policer/police_inlines.h
@@ -123,7 +123,7 @@ policer_handoff (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 n_enq, n_left_from, *from;
vnet_policer_main_t *pm;
policer_t *policer;
- u32 this_thread, policer_thread;
+ u32 this_thread, policer_thread = 0;
bool single_policer_node = (policer_index != ~0);
pm = &vnet_policer_main;
diff --git a/src/vnet/policer/policer.api b/src/vnet/policer/policer.api
index a664ab0be76..a5a60b35c6b 100644
--- a/src/vnet/policer/policer.api
+++ b/src/vnet/policer/policer.api
@@ -13,7 +13,7 @@
* limitations under the License.
*/
-option version = "2.0.0";
+option version = "3.0.0";
import "vnet/interface_types.api";
import "vnet/policer/policer_types.api";
@@ -35,6 +35,16 @@ autoreply define policer_bind
bool bind_enable;
};
+autoreply define policer_bind_v2
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+ u32 worker_index;
+ bool bind_enable;
+};
+
/** \brief policer input: Apply policer as an input feature.
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -52,6 +62,43 @@ autoreply define policer_input
bool apply;
};
+autoreply define policer_input_v2
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+ vl_api_interface_index_t sw_if_index;
+ bool apply;
+};
+
+/** \brief policer output: Apply policer as an output feature.
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param name - policer name
+ @param sw_if_index - interface to apply the policer
+ @param apply - Apply/remove
+*/
+autoreply define policer_output
+{
+ u32 client_index;
+ u32 context;
+
+ string name[64];
+ vl_api_interface_index_t sw_if_index;
+ bool apply;
+};
+
+autoreply define policer_output_v2
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+ vl_api_interface_index_t sw_if_index;
+ bool apply;
+};
+
/** \brief Add/del policer
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -89,6 +136,40 @@ define policer_add_del
vl_api_sse2_qos_action_t violate_action;
};
+define policer_add
+{
+ u32 client_index;
+ u32 context;
+
+ string name[64];
+ vl_api_policer_config_t infos;
+};
+
+autoreply define policer_del
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+};
+
+autoreply define policer_update
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+ vl_api_policer_config_t infos;
+};
+
+autoreply define policer_reset
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+};
+
/** \brief Add/del policer response
@param context - sender context, to match reply w/ request
@param retval - return value for request
@@ -101,6 +182,13 @@ define policer_add_del_reply
u32 policer_index;
};
+define policer_add_reply
+{
+ u32 context;
+ i32 retval;
+ u32 policer_index;
+};
+
/** \brief Get list of policers
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -116,6 +204,23 @@ define policer_dump
string match_name[64];
};
+/** \brief Get list of policers
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param policer_index - index of policer in the pool, ~0 to request all
+*/
+define policer_dump_v2
+{
+ u32 client_index;
+ u32 context;
+
+ u32 policer_index;
+};
+
+service {
+ rpc policer_dump_v2 returns stream policer_details;
+};
+
/** \brief Policer operational state response.
@param context - sender context, to match reply w/ request
@param name - policer name
diff --git a/src/vnet/policer/policer.c b/src/vnet/policer/policer.c
index 516a029dcee..eb7d40a340a 100644
--- a/src/vnet/policer/policer.c
+++ b/src/vnet/policer/policer.c
@@ -49,105 +49,161 @@ vlib_combined_counter_main_t policer_counters[] = {
},
};
-clib_error_t *
-policer_add_del (vlib_main_t *vm, u8 *name, qos_pol_cfg_params_st *cfg,
- u32 *policer_index, u8 is_add)
+int
+policer_add (vlib_main_t *vm, const u8 *name, const qos_pol_cfg_params_st *cfg,
+ u32 *policer_index)
{
vnet_policer_main_t *pm = &vnet_policer_main;
policer_t test_policer;
policer_t *policer;
+ policer_t *pp;
+ qos_pol_cfg_params_st *cp;
uword *p;
u32 pi;
int rv;
+ int i;
p = hash_get_mem (pm->policer_config_by_name, name);
- if (is_add == 0)
- {
- /* free policer config and template */
- if (p == 0)
- {
- vec_free (name);
- return clib_error_return (0, "No such policer configuration");
- }
- pool_put_index (pm->configs, p[0]);
- pool_put_index (pm->policer_templates, p[0]);
- hash_unset_mem (pm->policer_config_by_name, name);
+ if (p != NULL)
+ return VNET_API_ERROR_VALUE_EXIST;
- /* free policer */
- p = hash_get_mem (pm->policer_index_by_name, name);
- if (p == 0)
- {
- vec_free (name);
- return clib_error_return (0, "No such policer");
- }
- pool_put_index (pm->policers, p[0]);
- hash_unset_mem (pm->policer_index_by_name, name);
+ /* Vet the configuration before adding it to the table */
+ rv = pol_logical_2_physical (cfg, &test_policer);
- vec_free (name);
- return 0;
- }
+ if (rv != 0)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ pool_get (pm->configs, cp);
+ pool_get_aligned (pm->policers, policer, CLIB_CACHE_LINE_BYTES);
- if (p != 0)
+ clib_memcpy (cp, cfg, sizeof (*cp));
+ clib_memcpy (policer, &test_policer, sizeof (*pp));
+
+ policer->name = format (0, "%s%c", name, 0);
+ pi = policer - pm->policers;
+
+ hash_set_mem (pm->policer_config_by_name, policer->name, cp - pm->configs);
+ hash_set_mem (pm->policer_index_by_name, policer->name, pi);
+ *policer_index = pi;
+ policer->thread_index = ~0;
+
+ for (i = 0; i < NUM_POLICE_RESULTS; i++)
{
- vec_free (name);
- return clib_error_return (0, "Policer already exists");
+ vlib_validate_combined_counter (&policer_counters[i], pi);
+ vlib_zero_combined_counter (&policer_counters[i], pi);
}
- /* Vet the configuration before adding it to the table */
- rv = pol_logical_2_physical (cfg, &test_policer);
+ return 0;
+}
+
+int
+policer_del (vlib_main_t *vm, u32 policer_index)
+{
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ policer_t *policer;
+ uword *p;
- if (rv == 0)
+ if (pool_is_free_index (pm->policers, policer_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ policer = &pm->policers[policer_index];
+
+ p = hash_get_mem (pm->policer_config_by_name, policer->name);
+
+ /* free policer config */
+ if (p != NULL)
{
- policer_t *pp;
- qos_pol_cfg_params_st *cp;
- int i;
+ pool_put_index (pm->configs, p[0]);
+ hash_unset_mem (pm->policer_config_by_name, policer->name);
+ }
- pool_get (pm->configs, cp);
- pool_get (pm->policer_templates, pp);
+ /* free policer */
+ hash_unset_mem (pm->policer_index_by_name, policer->name);
+ vec_free (policer->name);
+ pool_put_index (pm->policers, policer_index);
- ASSERT (cp - pm->configs == pp - pm->policer_templates);
+ return 0;
+}
- clib_memcpy (cp, cfg, sizeof (*cp));
- clib_memcpy (pp, &test_policer, sizeof (*pp));
+int
+policer_update (vlib_main_t *vm, u32 policer_index,
+ const qos_pol_cfg_params_st *cfg)
+{
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ policer_t test_policer;
+ policer_t *policer;
+ qos_pol_cfg_params_st *cp;
+ uword *p;
+ u8 *name;
+ int rv;
+ int i;
- hash_set_mem (pm->policer_config_by_name, name, cp - pm->configs);
- pool_get_aligned (pm->policers, policer, CLIB_CACHE_LINE_BYTES);
- policer[0] = pp[0];
- pi = policer - pm->policers;
- hash_set_mem (pm->policer_index_by_name, name, pi);
- *policer_index = pi;
- policer->thread_index = ~0;
+ if (pool_is_free_index (pm->policers, policer_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
- for (i = 0; i < NUM_POLICE_RESULTS; i++)
- {
- vlib_validate_combined_counter (&policer_counters[i], pi);
- vlib_zero_combined_counter (&policer_counters[i], pi);
- }
+ policer = &pm->policers[policer_index];
+
+ /* Vet the configuration before adding it to the table */
+ rv = pol_logical_2_physical (cfg, &test_policer);
+ if (rv != 0)
+ return VNET_API_ERROR_INVALID_VALUE;
+
+ p = hash_get_mem (pm->policer_config_by_name, policer->name);
+
+ if (PREDICT_TRUE (p != NULL))
+ {
+ cp = &pm->configs[p[0]];
}
else
{
- vec_free (name);
- return clib_error_return (0, "Config failed sanity check");
+ /* recover from a missing configuration */
+ pool_get (pm->configs, cp);
+ hash_set_mem (pm->policer_config_by_name, policer->name,
+ cp - pm->configs);
}
+ name = policer->name;
+
+ clib_memcpy (cp, cfg, sizeof (*cp));
+ clib_memcpy (policer, &test_policer, sizeof (*policer));
+
+ policer->name = name;
+ policer->thread_index = ~0;
+
+ for (i = 0; i < NUM_POLICE_RESULTS; i++)
+ vlib_zero_combined_counter (&policer_counters[i], policer_index);
+
return 0;
}
int
-policer_bind_worker (u8 *name, u32 worker, bool bind)
+policer_reset (vlib_main_t *vm, u32 policer_index)
{
vnet_policer_main_t *pm = &vnet_policer_main;
policer_t *policer;
- uword *p;
- p = hash_get_mem (pm->policer_index_by_name, name);
- if (p == 0)
- {
- return VNET_API_ERROR_NO_SUCH_ENTRY;
- }
+ if (pool_is_free_index (pm->policers, policer_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
- policer = &pm->policers[p[0]];
+ policer = &pm->policers[policer_index];
+
+ policer->current_bucket = policer->current_limit;
+ policer->extended_bucket = policer->extended_limit;
+
+ return 0;
+}
+
+int
+policer_bind_worker (u32 policer_index, u32 worker, bool bind)
+{
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ policer_t *policer;
+
+ if (pool_is_free_index (pm->policers, policer_index))
+ return VNET_API_ERROR_NO_SUCH_ENTRY;
+
+ policer = &pm->policers[policer_index];
if (bind)
{
@@ -166,54 +222,53 @@ policer_bind_worker (u8 *name, u32 worker, bool bind)
}
int
-policer_input (u8 *name, u32 sw_if_index, bool apply)
+policer_input (u32 policer_index, u32 sw_if_index, vlib_dir_t dir, bool apply)
{
vnet_policer_main_t *pm = &vnet_policer_main;
- policer_t *policer;
- u32 policer_index;
- uword *p;
- p = hash_get_mem (pm->policer_index_by_name, name);
- if (p == 0)
+ if (apply)
{
- return VNET_API_ERROR_NO_SUCH_ENTRY;
+ vec_validate (pm->policer_index_by_sw_if_index[dir], sw_if_index);
+ pm->policer_index_by_sw_if_index[dir][sw_if_index] = policer_index;
+ }
+ else
+ {
+ pm->policer_index_by_sw_if_index[dir][sw_if_index] = ~0;
}
- policer = &pm->policers[p[0]];
- policer_index = policer - pm->policers;
-
- if (apply)
+ if (dir == VLIB_RX)
{
- vec_validate (pm->policer_index_by_sw_if_index, sw_if_index);
- pm->policer_index_by_sw_if_index[sw_if_index] = policer_index;
+ vnet_feature_enable_disable ("device-input", "policer-input",
+ sw_if_index, apply, 0, 0);
}
else
{
- pm->policer_index_by_sw_if_index[sw_if_index] = ~0;
+ vnet_feature_enable_disable ("ip4-output", "policer-output", sw_if_index,
+ apply, 0, 0);
+ vnet_feature_enable_disable ("ip6-output", "policer-output", sw_if_index,
+ apply, 0, 0);
}
-
- vnet_feature_enable_disable ("device-input", "policer-input", sw_if_index,
- apply, 0, 0);
return 0;
}
u8 *
format_policer_instance (u8 * s, va_list * va)
{
+ vnet_policer_main_t *pm = &vnet_policer_main;
policer_t *i = va_arg (*va, policer_t *);
- uword pi = va_arg (*va, uword);
+ u32 policer_index = i - pm->policers;
int result;
vlib_counter_t counts[NUM_POLICE_RESULTS];
for (result = 0; result < NUM_POLICE_RESULTS; result++)
{
- vlib_get_combined_counter (&policer_counters[result], pi,
+ vlib_get_combined_counter (&policer_counters[result], policer_index,
&counts[result]);
}
- s = format (s, "policer at %llx: %s rate, %s color-aware\n",
- i, i->single_rate ? "single" : "dual",
- i->color_aware ? "is" : "not");
+ s =
+ format (s, "Policer at index %d: %s rate, %s color-aware\n", policer_index,
+ i->single_rate ? "single" : "dual", i->color_aware ? "is" : "not");
s = format (s, "cir %u tok/period, pir %u tok/period, scale %u\n",
i->cir_tokens_per_period, i->pir_tokens_per_period, i->scale);
s = format (s, "cur lim %u, cur bkt %u, ext lim %u, ext bkt %u\n",
@@ -465,6 +520,7 @@ unformat_policer_classify_next_index (unformat_input_t * input, va_list * va)
return 0;
p = hash_get_mem (pm->policer_index_by_name, match_name);
+ vec_free (match_name);
if (p == 0)
return 0;
@@ -503,12 +559,16 @@ static clib_error_t *
policer_add_command_fn (vlib_main_t *vm, unformat_input_t *input,
vlib_cli_command_t *cmd)
{
+ vnet_policer_main_t *pm = &vnet_policer_main;
qos_pol_cfg_params_st c;
unformat_input_t _line_input, *line_input = &_line_input;
- u8 is_add = 1;
u8 *name = 0;
+ uword *p;
u32 pi;
+ u32 policer_index = ~0;
+ int rv = 0;
clib_error_t *error = NULL;
+ u8 is_update = cmd->function_arg;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -518,9 +578,9 @@ policer_add_command_fn (vlib_main_t *vm, unformat_input_t *input,
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "del"))
- is_add = 0;
- else if (unformat (line_input, "name %s", &name))
+ if (unformat (line_input, "name %s", &name))
+ ;
+ else if (is_update && unformat (line_input, "index %u", &policer_index))
;
else if (unformat (line_input, "color-aware"))
c.color_aware = 1;
@@ -536,10 +596,41 @@ policer_add_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
}
- error = policer_add_del (vm, name, &c, &pi, is_add);
+ if (is_update)
+ {
+ if (~0 == policer_index && 0 != name)
+ {
+ p = hash_get_mem (pm->policer_index_by_name, name);
+ if (p != NULL)
+ policer_index = p[0];
+ }
+
+ if (~0 != policer_index)
+ {
+ rv = policer_update (vm, policer_index, &c);
+ }
+ }
+ else
+ {
+ rv = policer_add (vm, name, &c, &pi);
+ }
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "No such policer");
+ break;
+ case VNET_API_ERROR_VALUE_EXIST:
+ error = clib_error_return (0, "Policer already exists");
+ break;
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (0, "Config failed sanity check");
+ break;
+ }
done:
unformat_free (line_input);
+ vec_free (name);
return error;
}
@@ -550,6 +641,10 @@ policer_del_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
unformat_input_t _line_input, *line_input = &_line_input;
clib_error_t *error = NULL;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ int rv;
+ u32 policer_index = ~0;
+ uword *p;
u8 *name = 0;
/* Get a line of input. */
@@ -560,6 +655,8 @@ policer_del_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
if (unformat (line_input, "name %s", &name))
;
+ else if (unformat (line_input, "index %u", &policer_index))
+ ;
else
{
error = clib_error_return (0, "unknown input `%U'",
@@ -568,10 +665,30 @@ policer_del_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
}
- error = policer_add_del (vm, name, NULL, NULL, 0);
+ if (~0 == policer_index && 0 != name)
+ {
+ p = hash_get_mem (pm->policer_index_by_name, name);
+ if (p != NULL)
+ policer_index = p[0];
+ }
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (~0 != policer_index)
+ rv = policer_del (vm, policer_index);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_INVALID_VALUE:
+ error = clib_error_return (0, "No such policer configuration");
+ break;
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "No such policer");
+ break;
+ }
done:
unformat_free (line_input);
+ vec_free (name);
return error;
}
@@ -582,13 +699,14 @@ policer_bind_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
unformat_input_t _line_input, *line_input = &_line_input;
clib_error_t *error = NULL;
- u8 bind, *name = 0;
- u32 worker;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ u8 bind = 1;
+ u8 *name = 0;
+ u32 worker = ~0;
+ u32 policer_index = ~0;
+ uword *p;
int rv;
- bind = 1;
- worker = ~0;
-
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
return 0;
@@ -597,6 +715,8 @@ policer_bind_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
if (unformat (line_input, "name %s", &name))
;
+ else if (unformat (line_input, "index %u", &policer_index))
+ ;
else if (unformat (line_input, "unbind"))
bind = 0;
else if (unformat (line_input, "%d", &worker))
@@ -616,7 +736,16 @@ policer_bind_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
else
{
- rv = policer_bind_worker (name, worker, bind);
+ if (~0 == policer_index && 0 != name)
+ {
+ p = hash_get_mem (pm->policer_index_by_name, name);
+ if (p != NULL)
+ policer_index = p[0];
+ }
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (~0 != policer_index)
+ rv = policer_bind_worker (policer_index, worker, bind);
if (rv)
error = clib_error_return (0, "failed: `%d'", rv);
@@ -624,6 +753,7 @@ policer_bind_command_fn (vlib_main_t *vm, unformat_input_t *input,
done:
unformat_free (line_input);
+ vec_free (name);
return error;
}
@@ -634,12 +764,14 @@ policer_input_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
unformat_input_t _line_input, *line_input = &_line_input;
clib_error_t *error = NULL;
- u8 apply, *name = 0;
- u32 sw_if_index;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ u8 apply = 1;
+ u8 *name = 0;
+ u32 sw_if_index = ~0;
+ u32 policer_index = ~0;
+ uword *p;
int rv;
-
- apply = 1;
- sw_if_index = ~0;
+ vlib_dir_t dir = cmd->function_arg;
/* Get a line of input. */
if (!unformat_user (input, unformat_line_input, line_input))
@@ -649,6 +781,8 @@ policer_input_command_fn (vlib_main_t *vm, unformat_input_t *input,
{
if (unformat (line_input, "name %s", &name))
;
+ else if (unformat (line_input, "index %u", &policer_index))
+ ;
else if (unformat (line_input, "unapply"))
apply = 0;
else if (unformat (line_input, "%U", unformat_vnet_sw_interface,
@@ -669,7 +803,16 @@ policer_input_command_fn (vlib_main_t *vm, unformat_input_t *input,
}
else
{
- rv = policer_input (name, sw_if_index, apply);
+ if (~0 == policer_index && 0 != name)
+ {
+ p = hash_get_mem (pm->policer_index_by_name, name);
+ if (p != NULL)
+ policer_index = p[0];
+ }
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (~0 != policer_index)
+ rv = policer_input (policer_index, sw_if_index, dir, apply);
if (rv)
error = clib_error_return (0, "failed: `%d'", rv);
@@ -677,83 +820,199 @@ policer_input_command_fn (vlib_main_t *vm, unformat_input_t *input,
done:
unformat_free (line_input);
+ vec_free (name);
+
+ return error;
+}
+
+static clib_error_t *
+policer_reset_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ int rv;
+ u32 policer_index = ~0;
+ uword *p;
+ u8 *name = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "name %s", &name))
+ ;
+ else if (unformat (line_input, "index %u", &policer_index))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == policer_index && 0 != name)
+ {
+ p = hash_get_mem (pm->policer_index_by_name, name);
+ if (p != NULL)
+ policer_index = p[0];
+ }
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (~0 != policer_index)
+ rv = policer_reset (vm, policer_index);
+
+ switch (rv)
+ {
+ case VNET_API_ERROR_NO_SUCH_ENTRY:
+ error = clib_error_return (0, "No such policer");
+ break;
+ }
+
+done:
+ unformat_free (line_input);
+ vec_free (name);
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (configure_policer_command, static) = {
.path = "configure policer",
- .short_help = "configure policer name <name> <params> ",
+ .short_help = "configure policer [name <name> | index <index>] [type 1r2c | "
+ "1r3c | 2r3c-2698 "
+ "| 2r3c-4115] [color-aware] [cir <cir>] [cb <cb>] [eir <eir>] "
+ "[eb <eb>] [rate kbps | pps] [round closest | up | down] "
+ "[conform-action drop | transmit | mark-and-transmit <dscp>] "
+ "[exceed-action drop | transmit | mark-and-transmit <dscp>] "
+ "[violate-action drop | transmit | mark-and-transmit <dscp>]",
.function = policer_add_command_fn,
+ .function_arg = 1
};
+
VLIB_CLI_COMMAND (policer_add_command, static) = {
.path = "policer add",
- .short_help = "policer name <name> <params> ",
+ .short_help = "policer add name <name> [type 1r2c | 1r3c | 2r3c-2698 | "
+ "2r3c-4115] [color-aware] [cir <cir>] [cb <cb>] [eir <eir>] "
+ "[eb <eb>] [rate kbps | pps] [round closest | up | down] "
+ "[conform-action drop | transmit | mark-and-transmit <dscp>] "
+ "[exceed-action drop | transmit | mark-and-transmit <dscp>] "
+ "[violate-action drop | transmit | mark-and-transmit <dscp>]",
.function = policer_add_command_fn,
+ .function_arg = 0
};
+
VLIB_CLI_COMMAND (policer_del_command, static) = {
.path = "policer del",
- .short_help = "policer del name <name> ",
+ .short_help = "policer del [name <name> | index <index>]",
.function = policer_del_command_fn,
};
+
VLIB_CLI_COMMAND (policer_bind_command, static) = {
.path = "policer bind",
- .short_help = "policer bind [unbind] name <name> <worker>",
+ .short_help = "policer bind [unbind] [name <name> | index <index>] <worker>",
.function = policer_bind_command_fn,
};
+
VLIB_CLI_COMMAND (policer_input_command, static) = {
.path = "policer input",
- .short_help = "policer input [unapply] name <name> <interfac>",
+ .short_help =
+ "policer input [unapply] [name <name> | index <index>] <interface>",
.function = policer_input_command_fn,
+ .function_arg = VLIB_RX,
+};
+
+VLIB_CLI_COMMAND (policer_output_command, static) = {
+ .path = "policer output",
+ .short_help =
+ "policer output [unapply] [name <name> | index <index>] <interface>",
+ .function = policer_input_command_fn,
+ .function_arg = VLIB_TX,
+};
+
+VLIB_CLI_COMMAND (policer_reset_command, static) = {
+ .path = "policer reset",
+ .short_help = "policer reset [name <name> | index <index>]",
+ .function = policer_reset_command_fn
};
-/* *INDENT-ON* */
static clib_error_t *
show_policer_command_fn (vlib_main_t * vm,
unformat_input_t * input, vlib_cli_command_t * cmd)
{
vnet_policer_main_t *pm = &vnet_policer_main;
- hash_pair_t *p;
- u32 pool_index;
- u8 *match_name = 0;
- u8 *name;
- uword *pi;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ policer_t *policer;
+ u32 policer_index = ~0;
+ u8 *name = 0;
+ uword *ci, *pi;
qos_pol_cfg_params_st *config;
- policer_t *templ;
-
- (void) unformat (input, "name %s", &match_name);
-
- /* *INDENT-OFF* */
- hash_foreach_pair (p, pm->policer_config_by_name,
- ({
- name = (u8 *) p->key;
- if (match_name == 0 || !strcmp((char *) name, (char *) match_name))
- {
- pi = hash_get_mem (pm->policer_index_by_name, name);
-
- pool_index = p->value[0];
- config = pool_elt_at_index (pm->configs, pool_index);
- templ = pool_elt_at_index (pm->policer_templates, pool_index);
- vlib_cli_output (vm, "Name \"%s\" %U ", name, format_policer_config,
- config);
- vlib_cli_output (vm, "Template %U", format_policer_instance, templ,
- pi[0]);
- vlib_cli_output (vm, "-----------");
- }
- }));
- /* *INDENT-ON* */
- return 0;
+ clib_error_t *error = 0;
+
+ /* Get a line of input. */
+ if (!unformat_user (input, unformat_line_input, line_input))
+ {
+ pool_foreach (policer, pm->policers)
+ {
+ ci = hash_get_mem (pm->policer_config_by_name, policer->name);
+ config = pool_elt_at_index (pm->configs, ci[0]);
+
+ vlib_cli_output (vm, "Name \"%s\" %U ", policer->name,
+ format_policer_config, config);
+ vlib_cli_output (vm, "%U", format_policer_instance, policer);
+ vlib_cli_output (vm, "-----------");
+ }
+ return 0;
+ }
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "name %s", &name))
+ ;
+ else if (unformat (line_input, "index %u", &policer_index))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, line_input);
+ goto done;
+ }
+ }
+
+ if (~0 == policer_index && 0 != name)
+ {
+ pi = hash_get_mem (pm->policer_index_by_name, name);
+ if (pi != NULL)
+ policer_index = pi[0];
+ }
+
+ if (~0 == policer_index || pool_is_free_index (pm->policers, policer_index))
+ goto done;
+
+ policer = &pm->policers[policer_index];
+ ci = hash_get_mem (pm->policer_config_by_name, policer->name);
+ config = pool_elt_at_index (pm->configs, ci[0]);
+ vlib_cli_output (vm, "Name \"%s\" %U ", policer->name, format_policer_config,
+ config);
+ vlib_cli_output (vm, "%U", format_policer_instance, policer);
+ vlib_cli_output (vm, "-----------");
+
+done:
+ unformat_free (line_input);
+ vec_free (name);
+
+ return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_policer_command, static) = {
- .path = "show policer",
- .short_help = "show policer [name]",
- .function = show_policer_command_fn,
+ .path = "show policer",
+ .short_help = "show policer [name <name> | index <index>]",
+ .function = show_policer_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_policer_pools_command_fn (vlib_main_t * vm,
@@ -762,19 +1021,15 @@ show_policer_pools_command_fn (vlib_main_t * vm,
{
vnet_policer_main_t *pm = &vnet_policer_main;
- vlib_cli_output (vm, "pool sizes: configs=%d templates=%d policers=%d",
- pool_elts (pm->configs),
- pool_elts (pm->policer_templates),
- pool_elts (pm->policers));
+ vlib_cli_output (vm, "pool sizes: configs=%d policers=%d",
+ pool_elts (pm->configs), pool_elts (pm->policers));
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_policer_pools_command, static) = {
.path = "show policer pools",
.short_help = "show policer pools",
.function = show_policer_pools_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
policer_init (vlib_main_t * vm)
@@ -784,7 +1039,10 @@ policer_init (vlib_main_t * vm)
pm->vlib_main = vm;
pm->vnet_main = vnet_get_main ();
pm->log_class = vlib_log_register_class ("policer", 0);
- pm->fq_index = vlib_frame_queue_main_init (policer_input_node.index, 0);
+ pm->fq_index[VLIB_RX] =
+ vlib_frame_queue_main_init (policer_input_node.index, 0);
+ pm->fq_index[VLIB_TX] =
+ vlib_frame_queue_main_init (policer_output_node.index, 0);
pm->policer_config_by_name = hash_create_string (0, sizeof (uword));
pm->policer_index_by_name = hash_create_string (0, sizeof (uword));
diff --git a/src/vnet/policer/policer.h b/src/vnet/policer/policer.h
index 2687064bf0d..7ce7fc79d47 100644
--- a/src/vnet/policer/policer.h
+++ b/src/vnet/policer/policer.h
@@ -32,14 +32,14 @@ typedef struct
qos_pol_cfg_params_st *configs;
policer_t *policer_templates;
- /* Config by name hash */
+ /* Config by policer name hash */
uword *policer_config_by_name;
/* Policer by name hash */
uword *policer_index_by_name;
/* Policer by sw_if_index vector */
- u32 *policer_index_by_sw_if_index;
+ u32 *policer_index_by_sw_if_index[VLIB_N_RX_TX];
/* convenience */
vlib_main_t *vlib_main;
@@ -48,7 +48,7 @@ typedef struct
vlib_log_class_t log_class;
/* frame queue for thread handoff */
- u32 fq_index;
+ u32 fq_index[VLIB_N_RX_TX];
u16 msg_id_base;
} vnet_policer_main_t;
@@ -58,6 +58,7 @@ extern vnet_policer_main_t vnet_policer_main;
extern vlib_combined_counter_main_t policer_counters[];
extern vlib_node_registration_t policer_input_node;
+extern vlib_node_registration_t policer_output_node;
typedef enum
{
@@ -67,11 +68,16 @@ typedef enum
} vnet_policer_next_t;
u8 *format_policer_instance (u8 * s, va_list * va);
-clib_error_t *policer_add_del (vlib_main_t *vm, u8 *name,
- qos_pol_cfg_params_st *cfg, u32 *policer_index,
- u8 is_add);
-int policer_bind_worker (u8 *name, u32 worker, bool bind);
-int policer_input (u8 *name, u32 sw_if_index, bool apply);
+int policer_add (vlib_main_t *vm, const u8 *name,
+ const qos_pol_cfg_params_st *cfg, u32 *policer_index);
+
+int policer_update (vlib_main_t *vm, u32 policer_index,
+ const qos_pol_cfg_params_st *cfg);
+int policer_del (vlib_main_t *vm, u32 policer_index);
+int policer_reset (vlib_main_t *vm, u32 policer_index);
+int policer_bind_worker (u32 policer_index, u32 worker, bool bind);
+int policer_input (u32 policer_index, u32 sw_if_index, vlib_dir_t dir,
+ bool apply);
#endif /* __included_policer_h__ */
diff --git a/src/vnet/policer/policer.rst b/src/vnet/policer/policer.rst
new file mode 100644
index 00000000000..0e7369e373b
--- /dev/null
+++ b/src/vnet/policer/policer.rst
@@ -0,0 +1,217 @@
+.. _policer:
+
+Policing
+========
+
+VPP implements several policer types, that don't always conform
+to the related RFCs [#rfc2697]_ [#rfc2698]_ [#rfc4115]_.
+Only policers implemented in VPP will be presented, along with
+the differences they have compared to RFCs.
+
+.. contents:: :local:
+ :depth: 1
+
+
+1 rate 2 color (1r2c)
+---------------------
+
+This is the most straightforward policer. There is no RFC describing it,
+however we can found its description in many documentation [#juniper]_ [#cisco]_ .
+
+A 1r2c policer is great to classify incoming packets into two categories:
+conforming packets (said green), and violating ones (said red).
+
+Parameters
+~~~~~~~~~~
+
+To set-up such a policer, only two parameters are needed:
+
+Committed Information Rate (CIR)
+ Given in bytes per second, this parameter is the average
+ throughput allowed by the policer.
+
+ It sets the limit between conforming arriving packets (those making the
+ traffic fall below the CIR), and violating arriving packets
+ (those making the traffic exceed the CIR).
+
+Committed Burst Size (CBS)
+ It represents the size (in bytes) of a token bucket used to allow
+ some burstiness from the incoming traffic.
+
+.. figure:: /_images/policer-1r2c-bucket.png
+ :align: center
+ :scale: 25%
+
+ Figure 1: 1r2c bucket filling logic
+
+The committed token bucket (C) is filling up at CIR tokens (bytes)
+per second, up to CBS tokens. All overflowing tokens are lost.
+
+Color-Blind algorithm
+~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: /_images/policer-1r2c-blind.png
+ :align: center
+ :scale: 75%
+
+|
+
+Color-Aware algorithm
+~~~~~~~~~~~~~~~~~~~~~
+
+In online documentation, there is no trace of a color-aware 1r2c policer.
+However, VPP implementation allows such a thing.
+
+.. image:: /_images/policer-1r2c-aware.png
+ :align: center
+ :scale: 75%
+
+|
+
+
+1 rate 3 color (1r3c) RFC 2697 [#rfc2697]_
+------------------------------------------
+
+As for the `1 rate 2 color (1r2c)`_ policer, only one rate parameters is required
+to setup a 1r3c policer. However, such a policer adds another kind of packet category:
+exceeding ones (said yellow).
+
+Parameters
+~~~~~~~~~~
+
+To set-up such a policer, three parameters are needed:
+
+Committed Information Rate (CIR)
+ As in the `1 rate 2 color (1r2c)`_ policer.
+
+Committed Burst Size (CBS)
+ As in the `1 rate 2 color (1r2c)`_ policer.
+
+Excess Burst Size (EBS)
+ It represents the size (in bytes) of a second token bucket used
+ to allow an additional burstiness from the incoming traffic, when
+ traffic as been below the CIR for some time.
+
+.. figure:: /_images/policer-1r3c-bucket.png
+ :align: center
+ :scale: 25%
+
+ Figure 2: 1r3c buckets filling logic
+
+The committed token bucket (C) is filling up at CIR tokens (bytes)
+per second, up to CBS tokens. When C is full, tokens are overflowing
+into the excess token bucket (E), up to EBS tokens. Only overflowing
+tokens from E are lost.
+
+Color-Blind algorithm
+~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: /_images/policer-1r3c-blind.png
+ :align: center
+ :scale: 75%
+
+|
+
+Color-Aware algorithm
+~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: /_images/policer-1r3c-aware.png
+ :align: center
+ :scale: 75%
+
+|
+
+Notes
+~~~~~
+
+In the RFC 2697 [#rfc2697]_ describing the 1r3c policer, conforming (green) packets
+only consume tokens from the token bucket C. Whereas, in VPP, they also consume tokens from E.
+
+One way to stick to the RFC is then to set the EBS parameter to be superior to CBS, so that
+EBS - CBS corresponds to the EBS from the RFC.
+
+However, VPP does not enforce setting EBS > CBS, which could result in undesired behavior.
+
+2 rate 3 color (2r3c) RFC 2698 [#rfc2698]_
+------------------------------------------
+
+Instead of setting the limit between yellow and red packets in terms of bursts,
+as it is done by `1 rate 3 color (1r3c) RFC 2697`_ policers, two rate policers introduce
+another rate parameter to discriminate between those two kinds of packets.
+
+Parameters
+~~~~~~~~~~
+
+To set-up such a policer, four parameters are needed:
+
+Committed Information Rate (CIR)
+ As in the `1 rate 2 color (1r2c)`_ policer.
+
+Committed Burst Size (CBS)
+ As in the `1 rate 2 color (1r2c)`_ policer.
+
+Peak Information Rate (PIR)
+ Given in bytes per second, this parameter is the average
+ throughput allowed by the policer when there is a peak in
+ traffic.
+
+ It sets a second limit between exceeding arriving packets
+ (those making the traffic fall below the PIR, but above CIR),
+ and violating arriving packets (those making the traffic exceed the PIR).
+
+Peak Burst Size (PBS)
+ It represents the size (in bytes) of a second token bucket used
+ to allow an additional peak traffic.
+
+.. figure:: /_images/policer-2r3c-bucket.png
+ :align: center
+ :scale: 25%
+
+ Figure 2: 2r3c-rfc2698 buckets filling logic
+
+The committed token bucket (C) is filling up at CIR tokens (bytes)
+per second, up to CBS tokens. In the meantime, the peak token bucket (P)
+is filling up at PIR tokens per second, up to PBS. All overflowing tokens
+from C and P are lost.
+
+Color-Blind algorithm
+~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: /_images/policer-2r3c-blind.png
+ :align: center
+ :scale: 75%
+
+|
+
+Color-Aware algorithm
+~~~~~~~~~~~~~~~~~~~~~
+
+.. image:: /_images/policer-2r3c-aware.png
+ :align: center
+ :scale: 50%
+
+|
+
+Notes
+~~~~~
+
+To have a working policer, the condition PIR >= CIR needs to hold.
+Indeed, since we assume that peak traffic should have a greater
+rate than committed ones.
+
+
+2 rate 3 color (2r3c) RFC 4115 [#rfc4115]_
+------------------------------------------
+
+The 2r3c-RFC4115 is an allowed choice by VPP. However, there is currently
+no implementation of such a policer. Hence, the only two rate policer VPP
+implements is the `2 rate 3 color (2r3c) RFC 2698`_ policer.
+
+
+.. rubric:: References:
+
+.. [#juniper] https://www.juniper.net/documentation/us/en/software/junos/traffic-mgmt-nfx/routing-policy/topics/concept/tcm-overview-cos-qfx-series-understanding.html
+.. [#cisco] https://www.cisco.com/c/en/us/td/docs/ios-xml/ios/qos_mqc/configuration/xe-16-8/qos-mqc-xe-16-8-book/qos-pkt-policing.html
+.. [#rfc2697] https://www.rfc-editor.org/rfc/rfc2697.html
+.. [#rfc2698] https://www.rfc-editor.org/rfc/rfc2698.html
+.. [#rfc4115] https://www.rfc-editor.org/rfc/rfc4115.html
diff --git a/src/vnet/policer/policer_api.c b/src/vnet/policer/policer_api.c
index 1382d17e2de..df35b472a89 100644
--- a/src/vnet/policer/policer_api.c
+++ b/src/vnet/policer/policer_api.c
@@ -35,99 +35,293 @@ static void
vl_api_policer_add_del_t_handler (vl_api_policer_add_del_t * mp)
{
vlib_main_t *vm = vlib_get_main ();
+ vnet_policer_main_t *pm = &vnet_policer_main;
vl_api_policer_add_del_reply_t *rmp;
int rv = 0;
- u8 *name = NULL;
+ uword *p;
+ char name[sizeof (mp->name) + 1];
+ qos_pol_cfg_params_st cfg;
+ u32 policer_index;
+
+ snprintf (name, sizeof (name), "%s", mp->name);
+
+ if (mp->is_add)
+ {
+ clib_memset (&cfg, 0, sizeof (cfg));
+ cfg.rfc = (qos_policer_type_en) mp->type;
+ cfg.rnd_type = (qos_round_type_en) mp->round_type;
+ cfg.rate_type = (qos_rate_type_en) mp->rate_type;
+ cfg.rb.kbps.cir_kbps = ntohl (mp->cir);
+ cfg.rb.kbps.eir_kbps = ntohl (mp->eir);
+ cfg.rb.kbps.cb_bytes = clib_net_to_host_u64 (mp->cb);
+ cfg.rb.kbps.eb_bytes = clib_net_to_host_u64 (mp->eb);
+ cfg.conform_action.action_type =
+ (qos_action_type_en) mp->conform_action.type;
+ cfg.conform_action.dscp = mp->conform_action.dscp;
+ cfg.exceed_action.action_type =
+ (qos_action_type_en) mp->exceed_action.type;
+ cfg.exceed_action.dscp = mp->exceed_action.dscp;
+ cfg.violate_action.action_type =
+ (qos_action_type_en) mp->violate_action.type;
+ cfg.violate_action.dscp = mp->violate_action.dscp;
+ cfg.color_aware = mp->color_aware;
+
+ rv = policer_add (vm, (u8 *) name, &cfg, &policer_index);
+ }
+ else
+ {
+ p = hash_get_mem (pm->policer_index_by_name, name);
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (p != NULL)
+ rv = policer_del (vm, p[0]);
+ }
+
+ REPLY_MACRO2 (VL_API_POLICER_ADD_DEL_REPLY, ({
+ if (rv == 0 && mp->is_add)
+ rmp->policer_index = htonl (policer_index);
+ else
+ rmp->policer_index = ~0;
+ }));
+}
+
+static_always_inline void
+policer_set_configuration (qos_pol_cfg_params_st *cfg,
+ vl_api_policer_config_t *infos)
+{
+ clib_memset (cfg, 0, sizeof (*cfg));
+ cfg->rfc = (qos_policer_type_en) infos->type;
+ cfg->rnd_type = (qos_round_type_en) infos->round_type;
+ cfg->rate_type = (qos_rate_type_en) infos->rate_type;
+ cfg->rb.kbps.cir_kbps = ntohl (infos->cir);
+ cfg->rb.kbps.eir_kbps = ntohl (infos->eir);
+ cfg->rb.kbps.cb_bytes = clib_net_to_host_u64 (infos->cb);
+ cfg->rb.kbps.eb_bytes = clib_net_to_host_u64 (infos->eb);
+ cfg->conform_action.action_type =
+ (qos_action_type_en) infos->conform_action.type;
+ cfg->conform_action.dscp = infos->conform_action.dscp;
+ cfg->exceed_action.action_type =
+ (qos_action_type_en) infos->exceed_action.type;
+ cfg->exceed_action.dscp = infos->exceed_action.dscp;
+ cfg->violate_action.action_type =
+ (qos_action_type_en) infos->violate_action.type;
+ cfg->violate_action.dscp = infos->violate_action.dscp;
+ cfg->color_aware = infos->color_aware;
+}
+
+static void
+vl_api_policer_add_t_handler (vl_api_policer_add_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_policer_add_reply_t *rmp;
+ int rv = 0;
+ char name[sizeof (mp->name) + 1];
+ qos_pol_cfg_params_st cfg;
+ u32 policer_index;
+
+ snprintf (name, sizeof (name), "%s", mp->name);
+
+ policer_set_configuration (&cfg, &mp->infos);
+
+ rv = policer_add (vm, (u8 *) name, &cfg, &policer_index);
+
+ REPLY_MACRO2 (VL_API_POLICER_ADD_REPLY, ({
+ if (rv == 0)
+ rmp->policer_index = htonl (policer_index);
+ else
+ rmp->policer_index = ~0;
+ }));
+}
+
+static void
+vl_api_policer_del_t_handler (vl_api_policer_del_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_policer_del_reply_t *rmp;
+ u32 policer_index;
+ int rv = 0;
+
+ policer_index = ntohl (mp->policer_index);
+ rv = policer_del (vm, policer_index);
+
+ REPLY_MACRO (VL_API_POLICER_DEL_REPLY);
+}
+
+static void
+vl_api_policer_update_t_handler (vl_api_policer_update_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_policer_update_reply_t *rmp;
+ int rv = 0;
qos_pol_cfg_params_st cfg;
- clib_error_t *error;
u32 policer_index;
- name = format (0, "%s", mp->name);
- vec_terminate_c_string (name);
-
- clib_memset (&cfg, 0, sizeof (cfg));
- cfg.rfc = (qos_policer_type_en) mp->type;
- cfg.rnd_type = (qos_round_type_en) mp->round_type;
- cfg.rate_type = (qos_rate_type_en) mp->rate_type;
- cfg.rb.kbps.cir_kbps = ntohl (mp->cir);
- cfg.rb.kbps.eir_kbps = ntohl (mp->eir);
- cfg.rb.kbps.cb_bytes = clib_net_to_host_u64 (mp->cb);
- cfg.rb.kbps.eb_bytes = clib_net_to_host_u64 (mp->eb);
- cfg.conform_action.action_type =
- (qos_action_type_en) mp->conform_action.type;
- cfg.conform_action.dscp = mp->conform_action.dscp;
- cfg.exceed_action.action_type = (qos_action_type_en) mp->exceed_action.type;
- cfg.exceed_action.dscp = mp->exceed_action.dscp;
- cfg.violate_action.action_type =
- (qos_action_type_en) mp->violate_action.type;
- cfg.violate_action.dscp = mp->violate_action.dscp;
-
- cfg.color_aware = mp->color_aware;
-
- error = policer_add_del (vm, name, &cfg, &policer_index, mp->is_add);
-
- if (error)
- rv = VNET_API_ERROR_UNSPECIFIED;
-
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_POLICER_ADD_DEL_REPLY,
- ({
- if (rv == 0 && mp->is_add)
- rmp->policer_index = ntohl(policer_index);
- else
- rmp->policer_index = ~0;
- }));
- /* *INDENT-ON* */
+ policer_set_configuration (&cfg, &mp->infos);
+
+ policer_index = ntohl (mp->policer_index);
+ rv = policer_update (vm, policer_index, &cfg);
+
+ REPLY_MACRO (VL_API_POLICER_UPDATE_REPLY);
+}
+
+static void
+vl_api_policer_reset_t_handler (vl_api_policer_reset_t *mp)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vl_api_policer_reset_reply_t *rmp;
+ u32 policer_index;
+ int rv = 0;
+
+ policer_index = ntohl (mp->policer_index);
+ rv = policer_reset (vm, policer_index);
+
+ REPLY_MACRO (VL_API_POLICER_RESET_REPLY);
}
static void
vl_api_policer_bind_t_handler (vl_api_policer_bind_t *mp)
{
vl_api_policer_bind_reply_t *rmp;
- u8 *name;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ char name[sizeof (mp->name) + 1];
+ uword *p;
u32 worker_index;
u8 bind_enable;
int rv;
- name = format (0, "%s", mp->name);
- vec_terminate_c_string (name);
+ snprintf (name, sizeof (name), "%s", mp->name);
worker_index = ntohl (mp->worker_index);
bind_enable = mp->bind_enable;
- rv = policer_bind_worker (name, worker_index, bind_enable);
- vec_free (name);
+ p = hash_get_mem (pm->policer_index_by_name, name);
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (p != NULL)
+ rv = policer_bind_worker (p[0], worker_index, bind_enable);
+
REPLY_MACRO (VL_API_POLICER_BIND_REPLY);
}
static void
+vl_api_policer_bind_v2_t_handler (vl_api_policer_bind_v2_t *mp)
+{
+ vl_api_policer_bind_v2_reply_t *rmp;
+ u32 policer_index;
+ u32 worker_index;
+ u8 bind_enable;
+ int rv;
+
+ policer_index = ntohl (mp->policer_index);
+ worker_index = ntohl (mp->worker_index);
+ bind_enable = mp->bind_enable;
+
+ rv = policer_bind_worker (policer_index, worker_index, bind_enable);
+
+ REPLY_MACRO (VL_API_POLICER_BIND_V2_REPLY);
+}
+
+static void
vl_api_policer_input_t_handler (vl_api_policer_input_t *mp)
{
- vl_api_policer_bind_reply_t *rmp;
- u8 *name;
+ vl_api_policer_input_reply_t *rmp;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ char name[sizeof (mp->name) + 1];
+ uword *p;
u32 sw_if_index;
u8 apply;
int rv;
VALIDATE_SW_IF_INDEX (mp);
- name = format (0, "%s", mp->name);
- vec_terminate_c_string (name);
+ snprintf (name, sizeof (name), "%s", mp->name);
sw_if_index = ntohl (mp->sw_if_index);
apply = mp->apply;
- rv = policer_input (name, sw_if_index, apply);
- vec_free (name);
+ p = hash_get_mem (pm->policer_index_by_name, name);
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (p != NULL)
+ rv = policer_input (p[0], sw_if_index, VLIB_RX, apply);
BAD_SW_IF_INDEX_LABEL;
REPLY_MACRO (VL_API_POLICER_INPUT_REPLY);
}
static void
-send_policer_details (u8 *name, qos_pol_cfg_params_st *config,
- policer_t *templ, vl_api_registration_t *reg,
- u32 context)
+vl_api_policer_input_v2_t_handler (vl_api_policer_input_v2_t *mp)
+{
+ vl_api_policer_input_v2_reply_t *rmp;
+ u32 policer_index;
+ u32 sw_if_index;
+ u8 apply;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ policer_index = ntohl (mp->policer_index);
+ sw_if_index = ntohl (mp->sw_if_index);
+ apply = mp->apply;
+
+ rv = policer_input (policer_index, sw_if_index, VLIB_RX, apply);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_POLICER_INPUT_REPLY);
+}
+
+static void
+vl_api_policer_output_t_handler (vl_api_policer_output_t *mp)
+{
+ vl_api_policer_output_reply_t *rmp;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ char name[sizeof (mp->name) + 1];
+ uword *p;
+ u32 sw_if_index;
+ u8 apply;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ snprintf (name, sizeof (name), "%s", mp->name);
+
+ sw_if_index = ntohl (mp->sw_if_index);
+ apply = mp->apply;
+
+ p = hash_get_mem (pm->policer_index_by_name, name);
+
+ rv = VNET_API_ERROR_NO_SUCH_ENTRY;
+ if (p != NULL)
+ rv = policer_input (p[0], sw_if_index, VLIB_TX, apply);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_POLICER_OUTPUT_REPLY);
+}
+
+static void
+vl_api_policer_output_v2_t_handler (vl_api_policer_output_v2_t *mp)
+{
+ vl_api_policer_output_reply_t *rmp;
+ u32 policer_index;
+ u32 sw_if_index;
+ u8 apply;
+ int rv;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ policer_index = ntohl (mp->policer_index);
+ sw_if_index = ntohl (mp->sw_if_index);
+ apply = mp->apply;
+
+ rv = policer_input (policer_index, sw_if_index, VLIB_TX, apply);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_POLICER_OUTPUT_REPLY);
+}
+
+static void
+send_policer_details (qos_pol_cfg_params_st *config, policer_t *policer,
+ vl_api_registration_t *reg, u32 context)
{
vl_api_policer_details_t *mp;
@@ -143,26 +337,27 @@ send_policer_details (u8 *name, qos_pol_cfg_params_st *config,
mp->round_type = (vl_api_sse2_qos_round_type_t) config->rnd_type;
mp->type = (vl_api_sse2_qos_policer_type_t) config->rfc;
mp->conform_action.type =
- (vl_api_sse2_qos_action_type_t) config->conform_action.action_type;
- mp->conform_action.dscp = config->conform_action.dscp;
+ (vl_api_sse2_qos_action_type_t) policer->action[POLICE_CONFORM];
+ mp->conform_action.dscp = policer->mark_dscp[POLICE_CONFORM];
mp->exceed_action.type =
- (vl_api_sse2_qos_action_type_t) config->exceed_action.action_type;
- mp->exceed_action.dscp = config->exceed_action.dscp;
+ (vl_api_sse2_qos_action_type_t) policer->action[POLICE_EXCEED];
+ mp->exceed_action.dscp = policer->mark_dscp[POLICE_EXCEED];
mp->violate_action.type =
- (vl_api_sse2_qos_action_type_t) config->violate_action.action_type;
- mp->violate_action.dscp = config->violate_action.dscp;
- mp->single_rate = templ->single_rate ? 1 : 0;
- mp->color_aware = templ->color_aware ? 1 : 0;
- mp->scale = htonl (templ->scale);
- mp->cir_tokens_per_period = htonl (templ->cir_tokens_per_period);
- mp->pir_tokens_per_period = htonl (templ->pir_tokens_per_period);
- mp->current_limit = htonl (templ->current_limit);
- mp->current_bucket = htonl (templ->current_bucket);
- mp->extended_limit = htonl (templ->extended_limit);
- mp->extended_bucket = htonl (templ->extended_bucket);
- mp->last_update_time = clib_host_to_net_u64 (templ->last_update_time);
-
- strncpy ((char *) mp->name, (char *) name, ARRAY_LEN (mp->name) - 1);
+ (vl_api_sse2_qos_action_type_t) policer->action[POLICE_VIOLATE];
+ mp->violate_action.dscp = policer->mark_dscp[POLICE_VIOLATE];
+ mp->single_rate = policer->single_rate ? 1 : 0;
+ mp->color_aware = policer->color_aware ? 1 : 0;
+ mp->scale = htonl (policer->scale);
+ mp->cir_tokens_per_period = htonl (policer->cir_tokens_per_period);
+ mp->pir_tokens_per_period = htonl (policer->pir_tokens_per_period);
+ mp->current_limit = htonl (policer->current_limit);
+ mp->current_bucket = htonl (policer->current_bucket);
+ mp->extended_limit = htonl (policer->extended_limit);
+ mp->extended_bucket = htonl (policer->extended_bucket);
+ mp->last_update_time = clib_host_to_net_u64 (policer->last_update_time);
+
+ strncpy ((char *) mp->name, (char *) policer->name,
+ ARRAY_LEN (mp->name) - 1);
vl_api_send_msg (reg, (u8 *) mp);
}
@@ -172,13 +367,11 @@ vl_api_policer_dump_t_handler (vl_api_policer_dump_t * mp)
{
vl_api_registration_t *reg;
vnet_policer_main_t *pm = &vnet_policer_main;
- hash_pair_t *hp;
- uword *p;
- u32 pool_index;
+ uword *p, *pi;
+ u32 pool_index, policer_index;
u8 *match_name = 0;
- u8 *name;
qos_pol_cfg_params_st *config;
- policer_t *templ;
+ policer_t *policer;
reg = vl_api_client_index_to_registration (mp->client_index);
if (!reg)
@@ -193,26 +386,67 @@ vl_api_policer_dump_t_handler (vl_api_policer_dump_t * mp)
if (mp->match_name_valid)
{
p = hash_get_mem (pm->policer_config_by_name, match_name);
- if (p)
+ pi = hash_get_mem (pm->policer_index_by_name, match_name);
+ if (0 == p || 0 == pi)
+ return;
+
+ pool_index = p[0];
+ policer_index = pi[0];
+ config = pool_elt_at_index (pm->configs, pool_index);
+ policer = pool_elt_at_index (pm->policers, policer_index);
+ send_policer_details (config, policer, reg, mp->context);
+ }
+ else
+ {
+ pool_foreach (policer, pm->policers)
+ {
+ p = hash_get_mem (pm->policer_config_by_name, policer->name);
+ if (0 == p)
+ continue;
+
+ pool_index = p[0];
+ config = pool_elt_at_index (pm->configs, pool_index);
+ send_policer_details (config, policer, reg, mp->context);
+ };
+ }
+}
+
+static void
+vl_api_policer_dump_v2_t_handler (vl_api_policer_dump_v2_t *mp)
+{
+ vl_api_registration_t *reg;
+ vnet_policer_main_t *pm = &vnet_policer_main;
+ qos_pol_cfg_params_st *config;
+ u32 policer_index, pool_index;
+ policer_t *policer;
+ uword *p;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ policer_index = ntohl (mp->policer_index);
+
+ if (~0 == policer_index)
+ {
+ pool_foreach (policer, pm->policers)
{
+ p = hash_get_mem (pm->policer_config_by_name, policer->name);
pool_index = p[0];
config = pool_elt_at_index (pm->configs, pool_index);
- templ = pool_elt_at_index (pm->policer_templates, pool_index);
- send_policer_details (match_name, config, templ, reg, mp->context);
- }
+ send_policer_details (config, policer, reg, mp->context);
+ };
}
else
{
- /* *INDENT-OFF* */
- hash_foreach_pair (hp, pm->policer_config_by_name,
- ({
- name = (u8 *) hp->key;
- pool_index = hp->value[0];
- config = pool_elt_at_index (pm->configs, pool_index);
- templ = pool_elt_at_index (pm->policer_templates, pool_index);
- send_policer_details(name, config, templ, reg, mp->context);
- }));
- /* *INDENT-ON* */
+ if (pool_is_free_index (pm->policers, policer_index))
+ return;
+
+ policer = &pm->policers[policer_index];
+ p = hash_get_mem (pm->policer_config_by_name, policer->name);
+ pool_index = p[0];
+ config = pool_elt_at_index (pm->configs, pool_index);
+ send_policer_details (config, policer, reg, mp->context);
}
}
diff --git a/src/vnet/policer/policer_types.api b/src/vnet/policer/policer_types.api
index 3e21b7d707c..9d4c6447f69 100644
--- a/src/vnet/policer/policer_types.api
+++ b/src/vnet/policer/policer_types.api
@@ -56,6 +56,34 @@ typedef sse2_qos_action
u8 dscp;
};
+/** \brief Policer configuration
+ @param cir - CIR
+ @param eir - EIR
+ @param cb - Committed Burst
+ @param eb - Excess or Peak Burst
+ @param rate_type - rate type
+ @param round_type - rounding type
+ @param type - policer algorithm
+ @param color_aware - 0=color-blind, 1=color-aware
+ @param conform_action - conform action
+ @param exceed_action - exceed action type
+ @param violate_action - violate action type
+*/
+typedef policer_config
+{
+ u32 cir;
+ u32 eir;
+ u64 cb;
+ u64 eb;
+ vl_api_sse2_qos_rate_type_t rate_type;
+ vl_api_sse2_qos_round_type_t round_type;
+ vl_api_sse2_qos_policer_type_t type;
+ bool color_aware;
+ vl_api_sse2_qos_action_t conform_action;
+ vl_api_sse2_qos_action_t exceed_action;
+ vl_api_sse2_qos_action_t violate_action;
+};
+
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vnet/policer/xlate.c b/src/vnet/policer/xlate.c
index 9c4d76fd990..bffd208716d 100644
--- a/src/vnet/policer/xlate.c
+++ b/src/vnet/policer/xlate.c
@@ -1058,7 +1058,7 @@ x86_pol_compute_hw_params (qos_pol_cfg_params_st *cfg, policer_t *hw)
* Return: Status, success or failure code.
*/
int
-pol_logical_2_physical (qos_pol_cfg_params_st *cfg, policer_t *phys)
+pol_logical_2_physical (const qos_pol_cfg_params_st *cfg, policer_t *phys)
{
int rc;
qos_pol_cfg_params_st kbps_cfg;
diff --git a/src/vnet/policer/xlate.h b/src/vnet/policer/xlate.h
index 722ac2fb777..7f6ebe7b65d 100644
--- a/src/vnet/policer/xlate.h
+++ b/src/vnet/policer/xlate.h
@@ -158,7 +158,7 @@ typedef struct qos_pol_hw_params_st_
u32 extd_bkt;
} qos_pol_hw_params_st;
-int pol_logical_2_physical (qos_pol_cfg_params_st *cfg, policer_t *phys);
+int pol_logical_2_physical (const qos_pol_cfg_params_st *cfg, policer_t *phys);
#endif /* __included_xlate_h__ */
diff --git a/src/vnet/ppp/node.c b/src/vnet/ppp/node.c
index eead2b2f0c1..fa056bfb99f 100644
--- a/src/vnet/ppp/node.c
+++ b/src/vnet/ppp/node.c
@@ -265,7 +265,6 @@ static char *ppp_error_strings[] = {
#undef ppp_error
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ppp_input_node) = {
.function = ppp_input,
.name = "ppp-input",
@@ -288,7 +287,6 @@ VLIB_REGISTER_NODE (ppp_input_node) = {
.format_trace = format_ppp_input_trace,
.unformat_buffer = unformat_ppp_header,
};
-/* *INDENT-ON* */
static clib_error_t *
ppp_input_runtime_init (vlib_main_t * vm)
diff --git a/src/vnet/ppp/ppp.c b/src/vnet/ppp/ppp.c
index b1fafa13145..8aa8504fcdd 100644
--- a/src/vnet/ppp/ppp.c
+++ b/src/vnet/ppp/ppp.c
@@ -197,7 +197,6 @@ ppp_build_rewrite (vnet_main_t * vnm,
return (rewrite);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (ppp_hw_interface_class) = {
.name = "PPP",
.format_header = format_ppp_header_with_length,
@@ -205,7 +204,6 @@ VNET_HW_INTERFACE_CLASS (ppp_hw_interface_class) = {
.build_rewrite = ppp_build_rewrite,
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
static void
add_protocol (ppp_main_t * pm, ppp_protocol_t protocol, char *protocol_name)
diff --git a/src/vnet/qos/FEATURE.yaml b/src/vnet/qos/FEATURE.yaml
index 47ccf0ac86b..40ab774123c 100644
--- a/src/vnet/qos/FEATURE.yaml
+++ b/src/vnet/qos/FEATURE.yaml
@@ -7,6 +7,6 @@ features:
- Mark - write [mapped] QoS bits into packet headers
- Store - write in packet metadata a fixed QoS value
-description: "An implentation of Quality of Service (QoS)"
+description: "An implementation of Quality of Service (QoS)"
state: production
properties: [API, CLI, MULTITHREAD]
diff --git a/src/vnet/qos/qos.api b/src/vnet/qos/qos.api
index d655165cef2..84addf0e449 100644
--- a/src/vnet/qos/qos.api
+++ b/src/vnet/qos/qos.api
@@ -57,7 +57,7 @@ typedef qos_store
/**
* Enable/Disable QoS storing
* The QoS bits from the packet at the specified input layer are copied
- * into the packet. Storeing should be used in conjunction with marking
+ * into the packet. Storing should be used in conjunction with marking
* @param enable - enable=1 or disable the feature
* @param store - Store configuration
*/
diff --git a/src/vnet/qos/qos_egress_map.c b/src/vnet/qos/qos_egress_map.c
index 7985579d3cf..43c0c55df07 100644
--- a/src/vnet/qos/qos_egress_map.c
+++ b/src/vnet/qos/qos_egress_map.c
@@ -47,13 +47,11 @@ qos_egress_map_get_id (index_t qemi)
qos_egress_map_id_t qid;
index_t qmi;
- /* *INDENT-OFF* */
hash_foreach(qid, qmi, qem_db,
({
if (qmi == qemi)
return (qid);
}));
- /* *INDENT-OFF* */
return (~0);
}
@@ -129,12 +127,10 @@ qos_egress_map_walk (qos_egress_map_walk_cb_t fn, void *c)
qos_egress_map_id_t qid;
index_t qmi;
- /* *INDENT-OFF* */
hash_foreach(qid, qmi, qem_db,
({
fn(qid, pool_elt_at_index(qem_pool, qmi), c);
}));
- /* *INDENT-OFF* */
}
static clib_error_t *
@@ -181,14 +177,12 @@ qos_egress_map_update_cli (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{qos egress map id 0 [ip][4]=4}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_egress_map_update_command, static) = {
.path = "qos egress map",
.short_help = "qos egress map id %d [delete] {[SOURCE][INPUT]=OUTPUT}",
.function = qos_egress_map_update_cli,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
u8 *format_qos_egress_map (u8 * s, va_list * args)
{
@@ -239,7 +233,6 @@ VLIB_CLI_COMMAND (qos_egress_map_update_command, static) = {
{
index_t qemi;
- /* *INDENT-OFF* */
hash_foreach(map_id, qemi, qem_db,
({
vlib_cli_output (vm, " Map-ID:%d\n%U",
@@ -247,7 +240,6 @@ VLIB_CLI_COMMAND (qos_egress_map_update_command, static) = {
format_qos_egress_map,
pool_elt_at_index(qem_pool, qemi), 2);
}));
- /* *INDENT-ON* */
}
else
{
@@ -274,14 +266,12 @@ VLIB_CLI_COMMAND (qos_egress_map_update_command, static) = {
* @cliexpar
* @cliexcmd{show qos egress map}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_egress_map_show_command, static) = {
.path = "show qos egress map",
.short_help = "show qos egress map id %d",
.function = qos_egress_map_show,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/qos/qos_mark.c b/src/vnet/qos/qos_mark.c
index 44bb34bd010..3817c89a009 100644
--- a/src/vnet/qos/qos_mark.c
+++ b/src/vnet/qos/qos_mark.c
@@ -187,14 +187,12 @@ qos_mark_cli (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{qos egress interface GigEthernet0/9/0 id 0 output ip}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_egress_map_interface_command, static) = {
.path = "qos mark",
.short_help = "qos mark <SOURCE> <INTERFACE> id <MAP>",
.function = qos_mark_cli,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static void
qos_mark_show_one_interface (vlib_main_t * vm, u32 sw_if_index)
@@ -271,14 +269,12 @@ qos_mark_show (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{show qos egress map}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_mark_show_command, static) = {
.path = "show qos mark",
.short_help = "show qos mark [interface]",
.function = qos_mark_show,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/qos/qos_mark_node.c b/src/vnet/qos/qos_mark_node.c
index f12e66b4fa0..16a487aede8 100644
--- a/src/vnet/qos/qos_mark_node.c
+++ b/src/vnet/qos/qos_mark_node.c
@@ -212,7 +212,6 @@ VLIB_NODE_FN (vlan_ip6_qos_mark_node) (vlib_main_t * vm,
return (qos_mark_inline (vm, node, frame, QOS_SOURCE_VLAN, 0));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_qos_mark_node) = {
.name = "ip4-qos-mark",
.vector_size = sizeof (u32),
@@ -330,7 +329,6 @@ VNET_FEATURE_INIT (vlan_mpls_qos_mark_node, static) = {
.runs_after = VNET_FEATURES ("mpls-qos-mark"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/qos/qos_record.c b/src/vnet/qos/qos_record.c
index d52c1442d8d..fdf79766471 100644
--- a/src/vnet/qos/qos_record.c
+++ b/src/vnet/qos/qos_record.c
@@ -203,14 +203,12 @@ qos_record_cli (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{qos record ip GigEthernet0/1/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_record_command, static) = {
.path = "qos record",
.short_help = "qos record <record-source> <INTERFACE> [disable]",
.function = qos_record_cli,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static void
qos_record_show_one_interface (vlib_main_t * vm, u32 sw_if_index)
@@ -285,14 +283,12 @@ qos_record_show (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{show qos egress map}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_record_show_command, static) = {
.path = "show qos record",
.short_help = "show qos record [interface]",
.function = qos_record_show,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/qos/qos_record_node.c b/src/vnet/qos/qos_record_node.c
index 75e1421dc08..1a34891f85d 100644
--- a/src/vnet/qos/qos_record_node.c
+++ b/src/vnet/qos/qos_record_node.c
@@ -222,7 +222,6 @@ VLIB_NODE_FN (l2_ip_qos_record_node) (vlib_main_t * vm,
return (qos_record_inline (vm, node, frame, QOS_SOURCE_VLAN, 0, 1));
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_qos_record_node) = {
.name = "ip4-qos-record",
.vector_size = sizeof (u32),
@@ -372,7 +371,6 @@ VLIB_REGISTER_NODE (l2_ip_qos_record_node) = {
[0] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/qos/qos_store.c b/src/vnet/qos/qos_store.c
index 06336434e5d..3424a914e35 100644
--- a/src/vnet/qos/qos_store.c
+++ b/src/vnet/qos/qos_store.c
@@ -55,7 +55,7 @@ qos_store_feature_config (u32 sw_if_index,
case QOS_SOURCE_MPLS:
case QOS_SOURCE_VLAN:
case QOS_SOURCE_EXT:
- /* not a valid option for storeing */
+ /* not a valid option for storing */
break;
}
}
@@ -127,7 +127,7 @@ qos_store_walk (qos_store_walk_cb_t fn, void *c)
}
/*
- * Disable storeing feature for all protocols when the interface
+ * Disable storing feature for all protocols when the interface
* is deleted
*/
static clib_error_t *
@@ -203,7 +203,7 @@ qos_store_cli (vlib_main_t * vm,
}
/*?
- * Enable QoS bit storeing on an interface using the packet's input DSCP bits
+ * Enable QoS bit storing on an interface using the packet's input DSCP bits
* Which input QoS bits to use are either; IP, MPLS or VLAN. If more than
* one protocol is chosen (which is foolish) the higher layers override the
* lower.
@@ -211,14 +211,12 @@ qos_store_cli (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{qos store ip GigEthernet0/1/0}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_store_command, static) = {
.path = "qos store",
.short_help = "qos store <store-source> <INTERFACE> [disable]",
.function = qos_store_cli,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
static void
qos_store_show_one_interface (vlib_main_t * vm, u32 sw_if_index)
@@ -295,14 +293,12 @@ qos_store_show (vlib_main_t * vm,
* @cliexpar
* @cliexcmd{show qos egress map}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (qos_store_show_command, static) = {
.path = "show qos store",
.short_help = "show qos store [interface]",
.function = qos_store_show,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/qos/qos_store_node.c b/src/vnet/qos/qos_store_node.c
index 2273b2eac77..6a5ad24453d 100644
--- a/src/vnet/qos/qos_store_node.c
+++ b/src/vnet/qos/qos_store_node.c
@@ -121,7 +121,6 @@ VLIB_NODE_FN (ip6_qos_store_node) (vlib_main_t * vm,
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_qos_store_node) = {
.name = "ip4-qos-store",
.vector_size = sizeof (u32),
@@ -168,7 +167,6 @@ VNET_FEATURE_INIT (ip6m_qos_store_node, static) = {
.node_name = "ip6-qos-store",
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c
index 7fe81885725..c66548507e5 100644
--- a/src/vnet/session/application.c
+++ b/src/vnet/session/application.c
@@ -31,10 +31,12 @@ static app_main_t app_main;
static app_listener_t *
app_listener_alloc (application_t * app)
{
+ app_main_t *am = &app_main;
app_listener_t *app_listener;
- pool_get (app->listeners, app_listener);
+
+ pool_get (am->listeners, app_listener);
clib_memset (app_listener, 0, sizeof (*app_listener));
- app_listener->al_index = app_listener - app->listeners;
+ app_listener->al_index = app_listener - am->listeners;
app_listener->app_index = app->app_index;
app_listener->session_index = SESSION_INVALID_INDEX;
app_listener->local_index = SESSION_INVALID_INDEX;
@@ -43,18 +45,23 @@ app_listener_alloc (application_t * app)
}
app_listener_t *
-app_listener_get (application_t * app, u32 app_listener_index)
+app_listener_get (u32 app_listener_index)
{
- return pool_elt_at_index (app->listeners, app_listener_index);
+ app_main_t *am = &app_main;
+
+ return pool_elt_at_index (am->listeners, app_listener_index);
}
static void
app_listener_free (application_t * app, app_listener_t * app_listener)
{
+ app_main_t *am = &app_main;
+
clib_bitmap_free (app_listener->workers);
+ vec_free (app_listener->cl_listeners);
if (CLIB_DEBUG)
clib_memset (app_listener, 0xfa, sizeof (*app_listener));
- pool_put (app->listeners, app_listener);
+ pool_put (am->listeners, app_listener);
}
session_handle_t
@@ -63,24 +70,14 @@ app_listener_handle (app_listener_t * al)
return al->ls_handle;
}
-app_listener_t *
-app_listener_get_w_session (session_t * ls)
-{
- application_t *app;
-
- app = application_get_if_valid (ls->app_index);
- if (!app)
- return 0;
- return app_listener_get (app, ls->al_index);
-}
-
session_handle_t
app_listen_session_handle (session_t * ls)
{
app_listener_t *al;
- al = app_listener_get_w_session (ls);
- if (!al)
+ /* TODO(fcoras): quic session handles */
+ if (ls->al_index == SESSION_INVALID_INDEX)
return listen_session_get_handle (ls);
+ al = app_listener_get (ls->al_index);
return al->ls_handle;
}
@@ -91,7 +88,7 @@ app_listener_get_w_handle (session_handle_t handle)
ls = session_get_from_handle_if_valid (handle);
if (!ls)
return 0;
- return app_listener_get_w_session (ls);
+ return app_listener_get (ls->al_index);
}
app_listener_t *
@@ -112,7 +109,7 @@ app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext)
if (handle != SESSION_INVALID_HANDLE)
{
ls = listen_session_get_from_handle (handle);
- return app_listener_get_w_session (ls);
+ return app_listener_get (ls->al_index);
}
}
@@ -122,7 +119,7 @@ app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext)
if (handle != SESSION_INVALID_HANDLE)
{
ls = listen_session_get_from_handle (handle);
- return app_listener_get_w_session ((session_t *) ls);
+ return app_listener_get (ls->al_index);
}
/*
@@ -144,7 +141,7 @@ app_listener_lookup (application_t * app, session_endpoint_cfg_t * sep_ext)
if (handle != SESSION_INVALID_HANDLE)
{
ls = listen_session_get_from_handle (handle);
- return app_listener_get_w_session ((session_t *) ls);
+ return app_listener_get (ls->al_index);
}
}
}
@@ -181,7 +178,6 @@ app_listener_alloc_and_init (application_t * app,
local_st = session_type_from_proto_and_ip (TRANSPORT_PROTO_NONE,
sep->is_ip4);
ls = listen_session_alloc (0, local_st);
- ls->app_index = app->app_index;
ls->app_wrk_index = sep->app_wrk_index;
lh = session_handle (ls);
@@ -189,11 +185,12 @@ app_listener_alloc_and_init (application_t * app,
{
ls = session_get_from_handle (lh);
session_free (ls);
+ app_listener_free (app, app_listener);
return rv;
}
ls = session_get_from_handle (lh);
- app_listener = app_listener_get (app, al_index);
+ app_listener = app_listener_get (al_index);
app_listener->local_index = ls->session_index;
app_listener->ls_handle = lh;
ls->al_index = al_index;
@@ -212,7 +209,6 @@ app_listener_alloc_and_init (application_t * app,
* build it's own specific listening connection.
*/
ls = listen_session_alloc (0, st);
- ls->app_index = app->app_index;
ls->app_wrk_index = sep->app_wrk_index;
/* Listen pool can be reallocated if the transport is
@@ -223,10 +219,11 @@ app_listener_alloc_and_init (application_t * app,
{
ls = listen_session_get_from_handle (lh);
session_free (ls);
+ app_listener_free (app, app_listener);
return rv;
}
ls = listen_session_get_from_handle (lh);
- app_listener = app_listener_get (app, al_index);
+ app_listener = app_listener_get (al_index);
app_listener->session_index = ls->session_index;
app_listener->ls_handle = lh;
ls->al_index = al_index;
@@ -288,8 +285,9 @@ app_listener_cleanup (app_listener_t * al)
}
static app_worker_t *
-app_listener_select_worker (application_t * app, app_listener_t * al)
+app_listener_select_worker (app_listener_t *al)
{
+ application_t *app;
u32 wrk_index;
app = application_get (al->app_index);
@@ -319,6 +317,13 @@ app_listener_get_local_session (app_listener_t * al)
return listen_session_get (al->local_index);
}
+session_t *
+app_listener_get_wrk_cl_session (app_listener_t *al, u32 wrk_map_index)
+{
+ u32 si = vec_elt (al->cl_listeners, wrk_map_index);
+ return session_get (si, 0 /* listener thread */);
+}
+
static app_worker_map_t *
app_worker_map_alloc (application_t * app)
{
@@ -642,7 +647,7 @@ app_rx_mqs_alloc (application_t *app)
cfg->ring_cfgs = rc;
eqs->ssvm.ssvm_size = svm_msg_q_size_to_alloc (cfg) * n_mqs + (1 << 20);
- eqs->ssvm.name = format (0, "%s-rx-mqs-seg%c", app->name, 0);
+ eqs->ssvm.name = format (0, "%v-rx-mqs-seg%c", app->name, 0);
if (ssvm_server_init (&eqs->ssvm, SSVM_SEGMENT_MEMFD))
{
@@ -684,7 +689,7 @@ application_get_rx_mqs_segment (application_t *app)
{
if (application_use_private_rx_mqs ())
return &app->rx_mqs_segment;
- return session_main_get_evt_q_segment ();
+ return session_main_get_wrk_mqs_segment ();
}
void
@@ -723,6 +728,12 @@ application_get_if_valid (u32 app_index)
return pool_elt_at_index (app_main.app_pool, app_index);
}
+static int
+_null_app_tx_callback (session_t *s)
+{
+ return 0;
+}
+
static void
application_verify_cb_fns (session_cb_vft_t * cb_fns)
{
@@ -734,6 +745,8 @@ application_verify_cb_fns (session_cb_vft_t * cb_fns)
clib_warning ("No session disconnect callback function provided");
if (cb_fns->session_reset_callback == 0)
clib_warning ("No session reset callback function provided");
+ if (!cb_fns->builtin_app_tx_callback)
+ cb_fns->builtin_app_tx_callback = _null_app_tx_callback;
}
/**
@@ -747,14 +760,14 @@ application_verify_cfg (ssvm_segment_type_t st)
u8 is_valid;
if (st == SSVM_SEGMENT_MEMFD)
{
- is_valid = (session_main_get_evt_q_segment () != 0);
+ is_valid = (session_main_get_wrk_mqs_segment () != 0);
if (!is_valid)
clib_warning ("memfd seg: vpp's event qs IN binary api svm region");
return is_valid;
}
else if (st == SSVM_SEGMENT_SHM)
{
- is_valid = (session_main_get_evt_q_segment () == 0);
+ is_valid = (session_main_get_wrk_mqs_segment () == 0);
if (!is_valid)
clib_warning ("shm seg: vpp's event qs NOT IN binary api svm region");
return is_valid;
@@ -763,8 +776,8 @@ application_verify_cfg (ssvm_segment_type_t st)
return 1;
}
-static int
-application_alloc_and_init (app_init_args_t * a)
+static session_error_t
+application_alloc_and_init (app_init_args_t *a)
{
ssvm_segment_type_t seg_type = SSVM_SEGMENT_MEMFD;
segment_manager_props_t *props;
@@ -785,15 +798,15 @@ application_alloc_and_init (app_init_args_t * a)
{
clib_warning ("mq eventfds can only be used if socket transport is "
"used for binary api");
- return VNET_API_ERROR_APP_UNSUPPORTED_CFG;
+ return SESSION_E_NOSUPPORT;
}
if (!application_verify_cfg (seg_type))
- return VNET_API_ERROR_APP_UNSUPPORTED_CFG;
+ return SESSION_E_NOSUPPORT;
if (opts[APP_OPTIONS_PREALLOC_FIFO_PAIRS] &&
opts[APP_OPTIONS_PREALLOC_FIFO_HDRS])
- return VNET_API_ERROR_APP_UNSUPPORTED_CFG;
+ return SESSION_E_NOSUPPORT;
/* Check that the obvious things are properly set up */
application_verify_cb_fns (a->session_cb_vft);
@@ -819,6 +832,8 @@ application_alloc_and_init (app_init_args_t * a)
props->add_segment_size = opts[APP_OPTIONS_ADD_SEGMENT_SIZE];
props->add_segment = 1;
}
+ if (opts[APP_OPTIONS_FLAGS] & APP_OPTIONS_FLAGS_USE_HUGE_PAGE)
+ props->huge_page = 1;
if (opts[APP_OPTIONS_RX_FIFO_SIZE])
props->rx_fifo_size = opts[APP_OPTIONS_RX_FIFO_SIZE];
if (opts[APP_OPTIONS_TX_FIFO_SIZE])
@@ -872,12 +887,10 @@ application_free (application_t * app)
* Free workers
*/
- /* *INDENT-OFF* */
pool_flush (wrk_map, app->worker_maps, ({
app_wrk = app_worker_get (wrk_map->wrk_index);
app_worker_free (app_wrk);
}));
- /* *INDENT-ON* */
pool_free (app->worker_maps);
/*
@@ -920,13 +933,11 @@ application_detach_process (application_t * app, u32 api_client_index)
APP_DBG ("Detaching for app %v index %u api client index %u", app->name,
app->app_index, api_client_index);
- /* *INDENT-OFF* */
pool_foreach (wrk_map, app->worker_maps) {
app_wrk = app_worker_get (wrk_map->wrk_index);
if (app_wrk->api_client_index == api_client_index)
vec_add1 (wrks, app_wrk->wrk_index);
}
- /* *INDENT-ON* */
if (!vec_len (wrks))
{
@@ -947,6 +958,31 @@ application_detach_process (application_t * app, u32 api_client_index)
vec_free (wrks);
}
+void
+application_namespace_cleanup (app_namespace_t *app_ns)
+{
+ u32 *app_indices = 0, *app_index;
+ application_t *app;
+ u32 ns_index;
+
+ ns_index = app_namespace_index (app_ns);
+ pool_foreach (app, app_main.app_pool)
+ if (app->ns_index == ns_index)
+ vec_add1 (app_indices, app->ns_index);
+
+ vec_foreach (app_index, app_indices)
+ {
+ app = application_get (*app_index);
+
+ if (application_is_proxy (app))
+ application_remove_proxy (app);
+ app->flags &= ~APP_OPTIONS_FLAGS_IS_PROXY;
+
+ application_free (app);
+ }
+ vec_free (app_indices);
+}
+
app_worker_t *
application_get_worker (application_t * app, u32 wrk_map_index)
{
@@ -972,12 +1008,55 @@ application_n_workers (application_t * app)
app_worker_t *
application_listener_select_worker (session_t * ls)
{
- application_t *app;
app_listener_t *al;
- app = application_get (ls->app_index);
- al = app_listener_get (app, ls->al_index);
- return app_listener_select_worker (app, al);
+ al = app_listener_get (ls->al_index);
+ return app_listener_select_worker (al);
+}
+
+always_inline u32
+app_listener_cl_flow_hash (session_dgram_hdr_t *hdr)
+{
+ u32 hash = 0;
+
+ if (hdr->is_ip4)
+ {
+ hash = clib_crc32c_u32 (hash, hdr->rmt_ip.ip4.as_u32);
+ hash = clib_crc32c_u32 (hash, hdr->lcl_ip.ip4.as_u32);
+ hash = clib_crc32c_u16 (hash, hdr->rmt_port);
+ hash = clib_crc32c_u16 (hash, hdr->lcl_port);
+ }
+ else
+ {
+ hash = clib_crc32c_u64 (hash, hdr->rmt_ip.ip6.as_u64[0]);
+ hash = clib_crc32c_u64 (hash, hdr->rmt_ip.ip6.as_u64[1]);
+ hash = clib_crc32c_u64 (hash, hdr->lcl_ip.ip6.as_u64[0]);
+ hash = clib_crc32c_u64 (hash, hdr->lcl_ip.ip6.as_u64[1]);
+ hash = clib_crc32c_u16 (hash, hdr->rmt_port);
+ hash = clib_crc32c_u16 (hash, hdr->lcl_port);
+ }
+
+ return hash;
+}
+
+session_t *
+app_listener_select_wrk_cl_session (session_t *ls, session_dgram_hdr_t *hdr)
+{
+ u32 wrk_map_index = 0;
+ app_listener_t *al;
+
+ al = app_listener_get (ls->al_index);
+ /* Crude test to check if only worker 0 is set */
+ if (al->workers[0] != 1)
+ {
+ u32 hash = app_listener_cl_flow_hash (hdr);
+ hash %= vec_len (al->workers) * sizeof (uword);
+ wrk_map_index = clib_bitmap_next_set (al->workers, hash);
+ if (wrk_map_index == ~0)
+ wrk_map_index = clib_bitmap_first_set (al->workers);
+ }
+
+ return app_listener_get_wrk_cl_session (al, wrk_map_index);
}
int
@@ -1019,8 +1098,8 @@ application_alloc_worker_and_init (application_t * app, app_worker_t ** wrk)
return 0;
}
-int
-vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a)
+session_error_t
+vnet_app_worker_add_del (vnet_app_worker_add_del_args_t *a)
{
fifo_segment_t *fs;
app_worker_map_t *wrk_map;
@@ -1031,7 +1110,7 @@ vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a)
app = application_get (a->app_index);
if (!app)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if (a->is_add)
{
@@ -1054,13 +1133,15 @@ vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a)
{
wrk_map = app_worker_map_get (app, a->wrk_map_index);
if (!wrk_map)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
app_wrk = app_worker_get (wrk_map->wrk_index);
if (!app_wrk)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
application_api_table_del (app_wrk->api_client_index);
+ if (appns_sapi_enabled ())
+ sapi_socket_close_w_handle (app_wrk->api_client_index);
app_worker_free (app_wrk);
app_worker_map_free (app, wrk_map);
if (application_n_workers (app) == 0)
@@ -1069,8 +1150,8 @@ vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a)
return 0;
}
-static int
-app_validate_namespace (u8 * namespace_id, u64 secret, u32 * app_ns_index)
+static session_error_t
+app_validate_namespace (u8 *namespace_id, u64 secret, u32 *app_ns_index)
{
app_namespace_t *app_ns;
if (vec_len (namespace_id) == 0)
@@ -1082,12 +1163,12 @@ app_validate_namespace (u8 * namespace_id, u64 secret, u32 * app_ns_index)
*app_ns_index = app_namespace_index_from_id (namespace_id);
if (*app_ns_index == APP_NAMESPACE_INVALID_INDEX)
- return VNET_API_ERROR_APP_INVALID_NS;
+ return SESSION_E_INVALID_NS;
app_ns = app_namespace_get (*app_ns_index);
if (!app_ns)
- return VNET_API_ERROR_APP_INVALID_NS;
+ return SESSION_E_INVALID_NS;
if (app_ns->ns_secret != secret)
- return VNET_API_ERROR_APP_WRONG_NS_SECRET;
+ return SESSION_E_WRONG_NS_SECRET;
return 0;
}
@@ -1111,8 +1192,8 @@ app_name_from_api_index (u32 api_client_index)
* to external app and a segment manager for shared memory fifo based
* communication with the external app.
*/
-int
-vnet_application_attach (vnet_app_attach_args_t * a)
+session_error_t
+vnet_application_attach (vnet_app_attach_args_t *a)
{
fifo_segment_t *fs;
application_t *app = 0;
@@ -1121,17 +1202,17 @@ vnet_application_attach (vnet_app_attach_args_t * a)
u32 app_ns_index = 0;
u8 *app_name = 0;
u64 secret;
- int rv;
+ session_error_t rv;
if (a->api_client_index != APP_INVALID_INDEX)
app = application_lookup (a->api_client_index);
else if (a->name)
app = application_lookup_name (a->name);
else
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if (app)
- return VNET_API_ERROR_APP_ALREADY_ATTACHED;
+ return SESSION_E_APP_ATTACHED;
/* Socket api sets the name and validates namespace prior to attach */
if (!a->use_sock_api)
@@ -1185,8 +1266,8 @@ vnet_application_attach (vnet_app_attach_args_t * a)
/**
* Detach application from vpp
*/
-int
-vnet_application_detach (vnet_app_detach_args_t * a)
+session_error_t
+vnet_application_detach (vnet_app_detach_args_t *a)
{
application_t *app;
@@ -1194,7 +1275,7 @@ vnet_application_detach (vnet_app_detach_args_t * a)
if (!app)
{
clib_warning ("app not attached");
- return VNET_API_ERROR_APPLICATION_NOT_ATTACHED;
+ return SESSION_E_NOAPP;
}
app_interface_check_thread_and_barrier (vnet_application_detach, a);
@@ -1202,11 +1283,15 @@ vnet_application_detach (vnet_app_detach_args_t * a)
return 0;
}
-
static u8
-session_endpoint_in_ns (session_endpoint_t * sep)
+session_endpoint_in_ns (session_endpoint_cfg_t *sep)
{
- u8 is_lep = session_endpoint_is_local (sep);
+ u8 is_lep;
+
+ if (sep->flags & SESSION_ENDPT_CFG_F_PROXY_LISTEN)
+ return 1;
+
+ is_lep = session_endpoint_is_local ((session_endpoint_t *) sep);
if (!is_lep && sep->sw_if_index != ENDPOINT_INVALID_INDEX
&& !ip_interface_has_address (sep->sw_if_index, &sep->ip, sep->is_ip4))
{
@@ -1215,6 +1300,7 @@ session_endpoint_in_ns (session_endpoint_t * sep)
sep->is_ip4);
return 0;
}
+
return (is_lep || ip_is_local (sep->fib_index, &sep->ip, sep->is_ip4));
}
@@ -1263,8 +1349,8 @@ session_endpoint_update_for_app (session_endpoint_cfg_t * sep,
}
}
-int
-vnet_listen (vnet_listen_args_t * a)
+session_error_t
+vnet_listen (vnet_listen_args_t *a)
{
app_listener_t *app_listener;
app_worker_t *app_wrk;
@@ -1284,7 +1370,7 @@ vnet_listen (vnet_listen_args_t * a)
a->sep_ext.app_wrk_index = app_wrk->wrk_index;
session_endpoint_update_for_app (&a->sep_ext, app, 0 /* is_connect */ );
- if (!session_endpoint_in_ns (&a->sep))
+ if (!session_endpoint_in_ns (&a->sep_ext))
return SESSION_E_INVALID_NS;
/*
@@ -1317,13 +1403,13 @@ vnet_listen (vnet_listen_args_t * a)
return 0;
}
-int
-vnet_connect (vnet_connect_args_t * a)
+session_error_t
+vnet_connect (vnet_connect_args_t *a)
{
app_worker_t *client_wrk;
application_t *client;
- ASSERT (vlib_thread_is_main_w_barrier ());
+ ASSERT (session_vlib_thread_is_cl_thread ());
if (session_endpoint_is_zero (&a->sep))
return SESSION_E_INVALID_RMT_IP;
@@ -1341,7 +1427,7 @@ vnet_connect (vnet_connect_args_t * a)
*/
if (application_has_local_scope (client))
{
- int rv;
+ session_error_t rv;
a->sep_ext.original_tp = a->sep_ext.transport_proto;
a->sep_ext.transport_proto = TRANSPORT_PROTO_NONE;
@@ -1356,8 +1442,8 @@ vnet_connect (vnet_connect_args_t * a)
return app_worker_connect_session (client_wrk, &a->sep_ext, &a->sh);
}
-int
-vnet_unlisten (vnet_unlisten_args_t * a)
+session_error_t
+vnet_unlisten (vnet_unlisten_args_t *a)
{
app_worker_t *app_wrk;
app_listener_t *al;
@@ -1387,7 +1473,7 @@ vnet_unlisten (vnet_unlisten_args_t * a)
return app_worker_stop_listen (app_wrk, al);
}
-int
+session_error_t
vnet_shutdown_session (vnet_shutdown_args_t *a)
{
app_worker_t *app_wrk;
@@ -1408,8 +1494,8 @@ vnet_shutdown_session (vnet_shutdown_args_t *a)
return 0;
}
-int
-vnet_disconnect_session (vnet_disconnect_args_t * a)
+session_error_t
+vnet_disconnect_session (vnet_disconnect_args_t *a)
{
app_worker_t *app_wrk;
session_t *s;
@@ -1449,7 +1535,7 @@ application_change_listener_owner (session_t * s, app_worker_t * app_wrk)
if (!app)
return SESSION_E_NOAPP;
- app_listener = app_listener_get (app, s->al_index);
+ app_listener = app_listener_get (s->al_index);
/* Only remove from lb for now */
app_listener->workers = clib_bitmap_set (app_listener->workers,
@@ -1493,6 +1579,12 @@ application_has_global_scope (application_t * app)
return app->flags & APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE;
}
+int
+application_original_dst_is_enabled (application_t *app)
+{
+ return app->flags & APP_OPTIONS_FLAGS_GET_ORIGINAL_DST;
+}
+
static clib_error_t *
application_start_stop_proxy_fib_proto (application_t * app, u8 fib_proto,
u8 transport_proto, u8 is_start)
@@ -1609,12 +1701,8 @@ application_setup_proxy (application_t * app)
ASSERT (application_is_proxy (app));
- /* *INDENT-OFF* */
- transport_proto_foreach (tp, ({
- if (transports & (1 << tp))
- application_start_stop_proxy (app, tp, 1);
- }));
- /* *INDENT-ON* */
+ transport_proto_foreach (tp, transports)
+ application_start_stop_proxy (app, tp, 1);
}
void
@@ -1625,12 +1713,8 @@ application_remove_proxy (application_t * app)
ASSERT (application_is_proxy (app));
- /* *INDENT-OFF* */
- transport_proto_foreach (tp, ({
- if (transports & (1 << tp))
- application_start_stop_proxy (app, tp, 0);
- }));
- /* *INDENT-ON* */
+ transport_proto_foreach (tp, transports)
+ application_start_stop_proxy (app, tp, 0);
}
segment_manager_props_t *
@@ -1657,12 +1741,11 @@ application_format_listeners (application_t * app, int verbose)
if (!app)
{
- vlib_cli_output (vm, "%U", format_app_worker_listener, 0 /* header */ ,
+ vlib_cli_output (vm, "%U", format_app_worker_listener, NULL /* header */,
0, 0, verbose);
return;
}
- /* *INDENT-OFF* */
pool_foreach (wrk_map, app->worker_maps) {
app_wrk = app_worker_get (wrk_map->wrk_index);
if (hash_elts (app_wrk->listeners_table) == 0)
@@ -1672,7 +1755,6 @@ application_format_listeners (application_t * app, int verbose)
handle, sm_index, verbose);
}));
}
- /* *INDENT-ON* */
}
static void
@@ -1687,12 +1769,10 @@ application_format_connects (application_t * app, int verbose)
return;
}
- /* *INDENT-OFF* */
pool_foreach (wrk_map, app->worker_maps) {
app_wrk = app_worker_get (wrk_map->wrk_index);
app_worker_format_connects (app_wrk, verbose);
}
- /* *INDENT-ON* */
}
u8 *
@@ -1793,12 +1873,10 @@ format_application (u8 * s, va_list * args)
format_memory_size, props->rx_fifo_size,
format_memory_size, props->tx_fifo_size);
- /* *INDENT-OFF* */
pool_foreach (wrk_map, app->worker_maps) {
app_wrk = app_worker_get (wrk_map->wrk_index);
s = format (s, "%U", format_app_worker, app_wrk);
}
- /* *INDENT-ON* */
return s;
}
@@ -1816,11 +1894,9 @@ application_format_all_listeners (vlib_main_t * vm, int verbose)
application_format_listeners (0, verbose);
- /* *INDENT-OFF* */
pool_foreach (app, app_main.app_pool) {
application_format_listeners (app, verbose);
}
- /* *INDENT-ON* */
}
void
@@ -1836,11 +1912,9 @@ application_format_all_clients (vlib_main_t * vm, int verbose)
application_format_connects (0, verbose);
- /* *INDENT-OFF* */
pool_foreach (app, app_main.app_pool) {
application_format_connects (app, verbose);
}
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -1850,11 +1924,9 @@ show_certificate_command_fn (vlib_main_t * vm, unformat_input_t * input,
app_cert_key_pair_t *ckpair;
session_cli_return_if_not_enabled ();
- /* *INDENT-OFF* */
pool_foreach (ckpair, app_main.cert_key_pair_store) {
vlib_cli_output (vm, "%U", format_cert_key_pair, ckpair);
}
- /* *INDENT-ON* */
return 0;
}
@@ -1865,14 +1937,12 @@ appliction_format_app_mq (vlib_main_t * vm, application_t * app)
app_worker_t *wrk;
int i;
- /* *INDENT-OFF* */
pool_foreach (map, app->worker_maps) {
wrk = app_worker_get (map->wrk_index);
vlib_cli_output (vm, "[A%d][%d]%U", app->app_index,
map->wrk_index, format_svm_msg_q,
wrk->event_queue);
}
- /* *INDENT-ON* */
for (i = 0; i < vec_len (app->rx_mqs); i++)
vlib_cli_output (vm, "[A%d][R%d]%U", app->app_index, i, format_svm_msg_q,
@@ -1893,11 +1963,9 @@ appliction_format_all_app_mq (vlib_main_t * vm)
session_main_get_vpp_event_queue (i));
}
- /* *INDENT-OFF* */
pool_foreach (app, app_main.app_pool) {
appliction_format_app_mq (vm, app);
}
- /* *INDENT-ON* */
return 0;
}
@@ -1905,10 +1973,11 @@ static clib_error_t *
show_app_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- int do_server = 0, do_client = 0, do_mq = 0;
+ int do_server = 0, do_client = 0, do_mq = 0, do_transports = 0;
application_t *app;
u32 app_index = ~0;
int verbose = 0;
+ u8 is_ta;
session_cli_return_if_not_enabled ();
@@ -1918,6 +1987,8 @@ show_app_command_fn (vlib_main_t * vm, unformat_input_t * input,
do_server = 1;
else if (unformat (input, "client"))
do_client = 1;
+ else if (unformat (input, "transports"))
+ do_transports = 1;
else if (unformat (input, "mq"))
do_mq = 1;
else if (unformat (input, "%u", &app_index))
@@ -1971,11 +2042,11 @@ show_app_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (!do_server && !do_client)
{
vlib_cli_output (vm, "%U", format_application, 0, 0);
- /* *INDENT-OFF* */
pool_foreach (app, app_main.app_pool) {
- vlib_cli_output (vm, "%U", format_application, app, 0);
+ is_ta = app->flags & APP_OPTIONS_FLAGS_IS_TRANSPORT_APP;
+ if ((!do_transports && !is_ta) || (do_transports && is_ta))
+ vlib_cli_output (vm, "%U", format_application, app, 0);
}
- /* *INDENT-ON* */
}
return 0;
@@ -2045,7 +2116,7 @@ vnet_app_del_cert_key_pair (u32 index)
u32 *app_index;
if (!(ckpair = app_cert_key_pair_get_if_valid (index)))
- return (VNET_API_ERROR_INVALID_VALUE);
+ return SESSION_E_INVALID;
vec_foreach (app_index, ckpair->app_interests)
{
@@ -2078,23 +2149,20 @@ application_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (application_init);
-VLIB_CLI_COMMAND (show_app_command, static) =
-{
+VLIB_CLI_COMMAND (show_app_command, static) = {
.path = "show app",
- .short_help = "show app [app_id] [server|client] [mq] [verbose]",
+ .short_help = "show app [index] [server|client] [mq] [verbose] "
+ "[transports]",
.function = show_app_command_fn,
};
-VLIB_CLI_COMMAND (show_certificate_command, static) =
-{
+VLIB_CLI_COMMAND (show_certificate_command, static) = {
.path = "show app certificate",
.short_help = "list app certs and keys present in store",
.function = show_certificate_command_fn,
};
-/* *INDENT-ON* */
crypto_engine_type_t
app_crypto_engine_type_add (void)
diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h
index 5ddf1d21fe3..c68a911230f 100644
--- a/src/vnet/session/application.h
+++ b/src/vnet/session/application.h
@@ -29,6 +29,16 @@
#define APP_DBG(_fmt, _args...)
#endif
+typedef struct app_wrk_postponed_msg_
+{
+ u32 len;
+ u8 event_type;
+ u8 ring;
+ u8 is_sapi;
+ int fd;
+ u8 data[SESSION_CTRL_MSG_TX_MAX_SIZE];
+} app_wrk_postponed_msg_t;
+
typedef struct app_worker_
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@@ -59,11 +69,20 @@ typedef struct app_worker_
/** API index for the worker. Needed for multi-process apps */
u32 api_client_index;
+ /** Set if mq is congested */
+ u8 mq_congested;
+
u8 app_is_builtin;
/** Pool of half-open session handles. Tracked in case worker detaches */
session_handle_t *half_open_table;
+ /* Per vpp worker fifos of events for app worker */
+ session_event_t **wrk_evts;
+
+ /* Vector of vpp workers mq congestion flags */
+ u8 *wrk_mq_congested;
+
/** Protects detached seg managers */
clib_spinlock_t detached_seg_managers_lock;
@@ -87,6 +106,8 @@ typedef struct app_listener_
session_handle_t ls_handle; /**< session handle of the local or global
listening session that also identifies
the app listener */
+ u32 *cl_listeners; /**< vector that maps app workers to their
+ cl sessions with fifos */
} app_listener_t;
typedef enum app_rx_mq_flags_
@@ -130,9 +151,6 @@ typedef struct application_
u16 proxied_transports;
- /** Pool of listeners for the app */
- app_listener_t *listeners;
-
/** Preferred tls engine */
u8 tls_engine;
@@ -179,6 +197,9 @@ typedef struct app_main_
*/
application_t *app_pool;
+ /** Pool of app listeners */
+ app_listener_t *listeners;
+
/**
* Hash table of apps by api client index
*/
@@ -227,7 +248,7 @@ typedef struct _vnet_app_worker_add_del_args
#define APP_NS_INVALID_INDEX ((u32)~0)
#define APP_INVALID_SEGMENT_MANAGER_INDEX ((u32) ~0)
-app_listener_t *app_listener_get (application_t * app, u32 al_index);
+app_listener_t *app_listener_get (u32 al_index);
int app_listener_alloc_and_init (application_t * app,
session_endpoint_cfg_t * sep,
app_listener_t ** listener);
@@ -235,6 +256,8 @@ void app_listener_cleanup (app_listener_t * app_listener);
session_handle_t app_listener_handle (app_listener_t * app_listener);
app_listener_t *app_listener_lookup (application_t * app,
session_endpoint_cfg_t * sep);
+session_t *app_listener_select_wrk_cl_session (session_t *ls,
+ session_dgram_hdr_t *hdr);
/**
* Get app listener handle for listening session
@@ -258,9 +281,9 @@ session_handle_t app_listen_session_handle (session_t * ls);
* @return pointer to app listener or 0
*/
app_listener_t *app_listener_get_w_handle (session_handle_t handle);
-app_listener_t *app_listener_get_w_session (session_t * ls);
session_t *app_listener_get_session (app_listener_t * al);
session_t *app_listener_get_local_session (app_listener_t * al);
+session_t *app_listener_get_wrk_cl_session (app_listener_t *al, u32 wrk_index);
application_t *application_get (u32 index);
application_t *application_get_if_valid (u32 index);
@@ -280,6 +303,8 @@ u8 application_has_local_scope (application_t * app);
u8 application_has_global_scope (application_t * app);
void application_setup_proxy (application_t * app);
void application_remove_proxy (application_t * app);
+void application_namespace_cleanup (app_namespace_t *app_ns);
+int application_original_dst_is_enabled (application_t *app);
segment_manager_props_t *application_get_segment_manager_properties (u32
app_index);
@@ -296,6 +321,12 @@ void application_enable_rx_mqs_nodes (u8 is_en);
* App worker
*/
+always_inline u8
+app_worker_mq_is_congested (app_worker_t *app_wrk)
+{
+ return app_wrk->mq_congested > 0;
+}
+
app_worker_t *app_worker_alloc (application_t * app);
int application_alloc_worker_and_init (application_t * app,
app_worker_t ** wrk);
@@ -306,9 +337,14 @@ int app_worker_own_session (app_worker_t * app_wrk, session_t * s);
void app_worker_free (app_worker_t * app_wrk);
int app_worker_connect_session (app_worker_t *app, session_endpoint_cfg_t *sep,
session_handle_t *rsh);
-int app_worker_start_listen (app_worker_t * app_wrk, app_listener_t * lstnr);
+session_error_t app_worker_start_listen (app_worker_t *app_wrk,
+ app_listener_t *lstnr);
int app_worker_stop_listen (app_worker_t * app_wrk, app_listener_t * al);
int app_worker_init_accepted (session_t * s);
+int app_worker_listened_notify (app_worker_t *app_wrk, session_handle_t alsh,
+ u32 opaque, session_error_t err);
+int app_worker_unlisten_reply (app_worker_t *app_wrk, session_handle_t sh,
+ u32 opaque, session_error_t err);
int app_worker_accept_notify (app_worker_t * app_wrk, session_t * s);
int app_worker_init_connected (app_worker_t * app_wrk, session_t * s);
int app_worker_connect_notify (app_worker_t * app_wrk, session_t * s,
@@ -321,13 +357,21 @@ int app_worker_transport_closed_notify (app_worker_t * app_wrk,
int app_worker_reset_notify (app_worker_t * app_wrk, session_t * s);
int app_worker_cleanup_notify (app_worker_t * app_wrk, session_t * s,
session_cleanup_ntf_t ntf);
+int app_worker_cleanup_notify_custom (app_worker_t *app_wrk, session_t *s,
+ session_cleanup_ntf_t ntf,
+ void (*cleanup_cb) (session_t *s));
int app_worker_migrate_notify (app_worker_t * app_wrk, session_t * s,
session_handle_t new_sh);
-int app_worker_builtin_rx (app_worker_t * app_wrk, session_t * s);
-int app_worker_builtin_tx (app_worker_t * app_wrk, session_t * s);
+int app_worker_rx_notify (app_worker_t *app_wrk, session_t *s);
int app_worker_session_fifo_tuning (app_worker_t * app_wrk, session_t * s,
svm_fifo_t * f,
session_ft_action_t act, u32 len);
+void app_worker_add_event (app_worker_t *app_wrk, session_t *s,
+ session_evt_type_t evt_type);
+void app_worker_add_event_custom (app_worker_t *app_wrk, u32 thread_index,
+ session_event_t *evt);
+int app_wrk_flush_wrk_events (app_worker_t *app_wrk, u32 thread_index);
+void app_worker_del_all_events (app_worker_t *app_wrk);
segment_manager_t *app_worker_get_listen_segment_manager (app_worker_t *,
session_t *);
segment_manager_t *app_worker_get_connect_segment_manager (app_worker_t *);
@@ -338,9 +382,14 @@ int app_worker_del_segment_notify (app_worker_t * app_wrk,
u32 app_worker_n_listeners (app_worker_t * app);
session_t *app_worker_first_listener (app_worker_t * app,
u8 fib_proto, u8 transport_proto);
-int app_worker_send_event (app_worker_t * app, session_t * s, u8 evt);
-int app_worker_lock_and_send_event (app_worker_t * app, session_t * s,
- u8 evt_type);
+void app_wrk_send_ctrl_evt_fd (app_worker_t *app_wrk, u8 evt_type, void *msg,
+ u32 msg_len, int fd);
+void app_wrk_send_ctrl_evt (app_worker_t *app_wrk, u8 evt_type, void *msg,
+ u32 msg_len);
+u8 app_worker_mq_wrk_is_congested (app_worker_t *app_wrk, u32 thread_index);
+void app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, u32 thread_index);
+void app_worker_unset_wrk_mq_congested (app_worker_t *app_wrk,
+ u32 thread_index);
session_t *app_worker_proxy_listener (app_worker_t * app, u8 fib_proto,
u8 transport_proto);
void app_worker_del_detached_sm (app_worker_t * app_wrk, u32 sm_index);
@@ -349,7 +398,7 @@ u8 *format_app_worker_listener (u8 * s, va_list * args);
u8 *format_crypto_engine (u8 * s, va_list * args);
u8 *format_crypto_context (u8 * s, va_list * args);
void app_worker_format_connects (app_worker_t * app_wrk, int verbose);
-int vnet_app_worker_add_del (vnet_app_worker_add_del_args_t * a);
+session_error_t vnet_app_worker_add_del (vnet_app_worker_add_del_args_t *a);
uword unformat_application_proto (unformat_input_t * input, va_list * args);
@@ -357,17 +406,17 @@ app_cert_key_pair_t *app_cert_key_pair_get (u32 index);
app_cert_key_pair_t *app_cert_key_pair_get_if_valid (u32 index);
app_cert_key_pair_t *app_cert_key_pair_get_default ();
-/* Needed while we support both bapi and mq ctrl messages */
-int mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context,
- session_handle_t handle, int rv);
-int mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
- session_t * s, session_error_t err);
-void mq_send_unlisten_reply (app_worker_t * app_wrk, session_handle_t sh,
- u32 context, int rv);
+void sapi_socket_close_w_handle (u32 api_handle);
crypto_engine_type_t app_crypto_engine_type_add (void);
u8 app_crypto_engine_n_types (void);
+static inline u8
+app_worker_application_is_builtin (app_worker_t *app_wrk)
+{
+ return app_wrk->app_is_builtin;
+}
+
#endif /* SRC_VNET_SESSION_APPLICATION_H_ */
/*
diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c
index 74f456a1eab..a62f914d43a 100644
--- a/src/vnet/session/application_interface.c
+++ b/src/vnet/session/application_interface.c
@@ -73,8 +73,8 @@ unformat_vnet_uri (unformat_input_t * input, va_list * args)
static u8 *cache_uri;
static session_endpoint_cfg_t *cache_sep;
-int
-parse_uri (char *uri, session_endpoint_cfg_t * sep)
+session_error_t
+parse_uri (char *uri, session_endpoint_cfg_t *sep)
{
unformat_input_t _input, *input = &_input;
@@ -92,7 +92,7 @@ parse_uri (char *uri, session_endpoint_cfg_t * sep)
if (!unformat (input, "%U", unformat_vnet_uri, sep))
{
unformat_free (input);
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
}
unformat_free (input);
@@ -106,8 +106,8 @@ parse_uri (char *uri, session_endpoint_cfg_t * sep)
return 0;
}
-int
-vnet_bind_uri (vnet_listen_args_t * a)
+session_error_t
+vnet_bind_uri (vnet_listen_args_t *a)
{
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
int rv;
@@ -120,36 +120,36 @@ vnet_bind_uri (vnet_listen_args_t * a)
return vnet_listen (a);
}
-int
-vnet_unbind_uri (vnet_unlisten_args_t * a)
+session_error_t
+vnet_unbind_uri (vnet_unlisten_args_t *a)
{
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
application_t *app;
session_t *listener;
u32 table_index;
- int rv;
+ session_error_t rv;
if ((rv = parse_uri (a->uri, &sep)))
return rv;
app = application_get (a->app_index);
if (!app)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
table_index = application_session_table (app, fib_ip_proto (!sep.is_ip4));
listener = session_lookup_listener (table_index,
(session_endpoint_t *) & sep);
if (!listener)
- return VNET_API_ERROR_ADDRESS_NOT_IN_USE;
+ return SESSION_E_ADDR_NOT_IN_USE;
a->handle = listen_session_get_handle (listener);
return vnet_unlisten (a);
}
-int
-vnet_connect_uri (vnet_connect_args_t * a)
+session_error_t
+vnet_connect_uri (vnet_connect_args_t *a)
{
session_endpoint_cfg_t sep = SESSION_ENDPOINT_CFG_NULL;
- int rv;
+ session_error_t rv;
if ((rv = parse_uri (a->uri, &sep)))
return rv;
diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h
index b10dd6c150d..f175e4a58c6 100644
--- a/src/vnet/session/application_interface.h
+++ b/src/vnet/session/application_interface.h
@@ -62,6 +62,13 @@ typedef struct session_cb_vft_
/** Notify app that session pool migration happened */
void (*session_migrate_callback) (session_t * s, session_handle_t new_sh);
+ /** Notify app (external only) that listen was processed */
+ int (*session_listened_callback) (u32 app_wrk_index, u32 api_context,
+ session_handle_t handle, int rv);
+ /** Notify app (external only) that unlisten was processed */
+ void (*session_unlistened_callback) (u32 app_wrk_index, session_handle_t sh,
+ u32 context, int rv);
+
/** Direct RX callback for built-in application */
int (*builtin_app_rx_callback) (session_t * session);
@@ -74,6 +81,8 @@ typedef struct session_cb_vft_
/** Delegate fifo-tuning-logic to application */
int (*fifo_tuning_callback) (session_t * s, svm_fifo_t * f,
session_ft_action_t act, u32 bytes);
+ /** Custom fifo allocation for proxy */
+ int (*proxy_alloc_session_fifos) (session_t *s);
} session_cb_vft_t;
@@ -117,7 +126,7 @@ typedef struct _vnet_bind_args_t
/*
* Results
*/
- u64 handle;
+ session_handle_t handle;
} vnet_listen_args_t;
typedef struct _vnet_unlisten_args_t
@@ -125,7 +134,7 @@ typedef struct _vnet_unlisten_args_t
union
{
char *uri;
- u64 handle; /**< Session handle */
+ session_handle_t handle; /**< Session handle */
};
u32 app_index; /**< Owning application index */
u32 wrk_map_index; /**< App's local pool worker index */
@@ -232,7 +241,9 @@ typedef enum
_ (USE_GLOBAL_SCOPE, "App can use global session scope") \
_ (USE_LOCAL_SCOPE, "App can use local session scope") \
_ (EVT_MQ_USE_EVENTFD, "Use eventfds for signaling") \
- _ (MEMFD_FOR_BUILTIN, "Use memfd for builtin app segs")
+ _ (MEMFD_FOR_BUILTIN, "Use memfd for builtin app segs") \
+ _ (USE_HUGE_PAGE, "Use huge page for FIFO") \
+ _ (GET_ORIGINAL_DST, "Get original dst enabled")
typedef enum _app_options
{
@@ -269,24 +280,26 @@ typedef enum session_fd_flag_
#undef _
} session_fd_flag_t;
-int parse_uri (char *uri, session_endpoint_cfg_t * sep);
-int vnet_bind_uri (vnet_listen_args_t *);
-int vnet_unbind_uri (vnet_unlisten_args_t * a);
-int vnet_connect_uri (vnet_connect_args_t * a);
+session_error_t parse_uri (char *uri, session_endpoint_cfg_t *sep);
+session_error_t vnet_bind_uri (vnet_listen_args_t *);
+session_error_t vnet_unbind_uri (vnet_unlisten_args_t *a);
+session_error_t vnet_connect_uri (vnet_connect_args_t *a);
-int vnet_application_attach (vnet_app_attach_args_t * a);
-int vnet_application_detach (vnet_app_detach_args_t * a);
-int vnet_listen (vnet_listen_args_t * a);
-int vnet_connect (vnet_connect_args_t * a);
-int vnet_unlisten (vnet_unlisten_args_t * a);
-int vnet_shutdown_session (vnet_shutdown_args_t *a);
-int vnet_disconnect_session (vnet_disconnect_args_t * a);
+session_error_t vnet_application_attach (vnet_app_attach_args_t *a);
+session_error_t vnet_application_detach (vnet_app_detach_args_t *a);
+session_error_t vnet_listen (vnet_listen_args_t *a);
+session_error_t vnet_connect (vnet_connect_args_t *a);
+session_error_t vnet_unlisten (vnet_unlisten_args_t *a);
+session_error_t vnet_shutdown_session (vnet_shutdown_args_t *a);
+session_error_t vnet_disconnect_session (vnet_disconnect_args_t *a);
int vnet_app_add_cert_key_pair (vnet_app_add_cert_key_pair_args_t * a);
int vnet_app_del_cert_key_pair (u32 index);
/** Ask for app cb on pair deletion */
int vnet_app_add_cert_key_interest (u32 index, u32 app_index);
+uword unformat_vnet_uri (unformat_input_t *input, va_list *args);
+
typedef struct app_session_transport_
{
ip46_address_t rmt_ip; /**< remote ip */
@@ -296,15 +309,15 @@ typedef struct app_session_transport_
u8 is_ip4; /**< set if uses ip4 networking */
} app_session_transport_t;
-#define foreach_app_session_field \
- _(svm_fifo_t, *rx_fifo) /**< rx fifo */ \
- _(svm_fifo_t, *tx_fifo) /**< tx fifo */ \
- _(session_type_t, session_type) /**< session type */ \
- _(volatile u8, session_state) /**< session state */ \
- _(u32, session_index) /**< index in owning pool */ \
- _(app_session_transport_t, transport) /**< transport info */ \
- _(svm_msg_q_t, *vpp_evt_q) /**< vpp event queue */ \
- _(u8, is_dgram) /**< flag for dgram mode */ \
+#define foreach_app_session_field \
+ _ (svm_fifo_t, *rx_fifo) /**< rx fifo */ \
+ _ (svm_fifo_t, *tx_fifo) /**< tx fifo */ \
+ _ (session_type_t, session_type) /**< session type */ \
+ _ (volatile u8, session_state) /**< session state */ \
+ _ (u32, session_index) /**< index in owning pool */ \
+ _ (app_session_transport_t, transport) /**< transport info */ \
+ _ (svm_msg_q_t, *vpp_evt_q) /**< vpp event queue */ \
+ _ (u8, is_dgram) /**< flag for dgram mode */
typedef struct
{
@@ -343,7 +356,7 @@ STATIC_ASSERT (sizeof (session_listen_uri_msg_t) <= SESSION_CTRL_MSG_MAX_SIZE,
typedef struct session_bound_msg_
{
u32 context;
- u64 handle;
+ session_handle_t handle;
i32 retval;
u8 lcl_is_ip4;
u8 lcl_ip[16];
@@ -366,15 +379,15 @@ typedef struct session_unlisten_msg_
typedef struct session_unlisten_reply_msg_
{
u32 context;
- u64 handle;
+ session_handle_t handle;
i32 retval;
} __clib_packed session_unlisten_reply_msg_t;
typedef struct session_accepted_msg_
{
u32 context;
- u64 listener_handle;
- u64 handle;
+ session_handle_t listener_handle;
+ session_handle_t handle;
uword server_rx_fifo;
uword server_tx_fifo;
u64 segment_handle;
@@ -383,13 +396,15 @@ typedef struct session_accepted_msg_
transport_endpoint_t lcl;
transport_endpoint_t rmt;
u8 flags;
+ u32 original_dst_ip4;
+ u16 original_dst_port;
} __clib_packed session_accepted_msg_t;
typedef struct session_accepted_reply_msg_
{
u32 context;
i32 retval;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_accepted_reply_msg_t;
typedef struct session_connect_msg_
@@ -408,6 +423,7 @@ typedef struct session_connect_msg_
u32 ckpair_index;
u8 crypto_engine;
u8 flags;
+ u8 dscp;
uword ext_config;
} __clib_packed session_connect_msg_t;
@@ -428,7 +444,7 @@ typedef struct session_connected_msg_
{
u32 context;
i32 retval;
- u64 handle;
+ session_handle_t handle;
uword server_rx_fifo;
uword server_tx_fifo;
u64 segment_handle;
@@ -458,33 +474,33 @@ typedef struct session_disconnected_msg_
{
u32 client_index;
u32 context;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_disconnected_msg_t;
typedef struct session_disconnected_reply_msg_
{
u32 context;
i32 retval;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_disconnected_reply_msg_t;
typedef struct session_reset_msg_
{
u32 client_index;
u32 context;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_reset_msg_t;
typedef struct session_reset_reply_msg_
{
u32 context;
i32 retval;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_reset_reply_msg_t;
typedef struct session_req_worker_update_msg_
{
- u64 session_handle;
+ session_handle_t session_handle;
} __clib_packed session_req_worker_update_msg_t;
/* NOTE: using u16 for wrk indices because message needs to fit in 18B */
@@ -493,12 +509,12 @@ typedef struct session_worker_update_msg_
u32 client_index;
u16 wrk_index;
u16 req_wrk_index;
- u64 handle;
+ session_handle_t handle;
} __clib_packed session_worker_update_msg_t;
typedef struct session_worker_update_reply_msg_
{
- u64 handle;
+ session_handle_t handle;
uword rx_fifo;
uword tx_fifo;
u64 segment_handle;
@@ -612,8 +628,8 @@ app_send_io_evt_to_vpp (svm_msg_q_t * mq, u32 session_index, u8 evt_type,
{
if (svm_msg_q_try_lock (mq))
return -1;
- if (PREDICT_FALSE (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)
- || svm_msg_q_is_full (mq)))
+ if (PREDICT_FALSE (
+ svm_msg_q_or_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)))
{
svm_msg_q_unlock (mq);
return -2;
@@ -628,9 +644,8 @@ app_send_io_evt_to_vpp (svm_msg_q_t * mq, u32 session_index, u8 evt_type,
else
{
svm_msg_q_lock (mq);
- while (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)
- || svm_msg_q_is_full (mq))
- svm_msg_q_wait_prod (mq);
+ while (svm_msg_q_or_ring_is_full (mq, SESSION_MQ_IO_EVT_RING))
+ svm_msg_q_or_ring_wait_prod (mq, SESSION_MQ_IO_EVT_RING);
msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
evt->session_index = session_index;
@@ -640,14 +655,18 @@ app_send_io_evt_to_vpp (svm_msg_q_t * mq, u32 session_index, u8 evt_type,
}
}
+#define app_send_dgram_raw(f, at, vpp_evt_q, data, len, evt_type, do_evt, \
+ noblock) \
+ app_send_dgram_raw_gso (f, at, vpp_evt_q, data, len, 0, evt_type, do_evt, \
+ noblock)
+
always_inline int
-app_send_dgram_raw (svm_fifo_t * f, app_session_transport_t * at,
- svm_msg_q_t * vpp_evt_q, u8 * data, u32 len, u8 evt_type,
- u8 do_evt, u8 noblock)
+app_send_dgram_raw_gso (svm_fifo_t *f, app_session_transport_t *at,
+ svm_msg_q_t *vpp_evt_q, u8 *data, u32 len,
+ u16 gso_size, u8 evt_type, u8 do_evt, u8 noblock)
{
session_dgram_hdr_t hdr;
int rv;
-
if (svm_fifo_max_enqueue_prod (f) < (sizeof (session_dgram_hdr_t) + len))
return 0;
@@ -658,10 +677,8 @@ app_send_dgram_raw (svm_fifo_t * f, app_session_transport_t * at,
hdr.rmt_port = at->rmt_port;
clib_memcpy_fast (&hdr.lcl_ip, &at->lcl_ip, sizeof (ip46_address_t));
hdr.lcl_port = at->lcl_port;
-
- /* *INDENT-OFF* */
+ hdr.gso_size = gso_size;
svm_fifo_seg_t segs[2] = {{ (u8 *) &hdr, sizeof (hdr) }, { data, len }};
- /* *INDENT-ON* */
rv = svm_fifo_enqueue_segments (f, segs, 2, 0 /* allow partial */ );
if (PREDICT_FALSE (rv < 0))
@@ -786,13 +803,11 @@ app_recv (app_session_t * s, u8 * data, u32 len)
return app_recv_stream (s, data, len);
}
-/* *INDENT-OFF* */
static char *session_error_str[] = {
#define _(sym, str) str,
foreach_session_error
#undef _
};
-/* *INDENT-ON* */
static inline u8 *
format_session_error (u8 * s, va_list * args)
@@ -817,6 +832,8 @@ typedef enum app_sapi_msg_type
APP_SAPI_MSG_TYPE_ADD_DEL_WORKER,
APP_SAPI_MSG_TYPE_ADD_DEL_WORKER_REPLY,
APP_SAPI_MSG_TYPE_SEND_FDS,
+ APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY,
+ APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY_REPLY,
} __clib_packed app_sapi_msg_type_e;
typedef struct app_sapi_attach_msg_
@@ -861,6 +878,22 @@ typedef struct app_sapi_worker_add_del_reply_msg_
u8 is_add;
} __clib_packed app_sapi_worker_add_del_reply_msg_t;
+typedef struct app_sapi_cert_key_add_del_msg_
+{
+ u32 context;
+ u32 index;
+ u16 cert_len;
+ u16 certkey_len;
+ u8 is_add;
+} __clib_packed app_sapi_cert_key_add_del_msg_t;
+
+typedef struct app_sapi_cert_key_add_del_reply_msg_
+{
+ u32 context;
+ i32 retval;
+ u32 index;
+} __clib_packed app_sapi_cert_key_add_del_reply_msg_t;
+
typedef struct app_sapi_msg_
{
app_sapi_msg_type_e type;
@@ -870,6 +903,8 @@ typedef struct app_sapi_msg_
app_sapi_attach_reply_msg_t attach_reply;
app_sapi_worker_add_del_msg_t worker_add_del;
app_sapi_worker_add_del_reply_msg_t worker_add_del_reply;
+ app_sapi_cert_key_add_del_msg_t cert_key_add_del;
+ app_sapi_cert_key_add_del_reply_msg_t cert_key_add_del_reply;
};
} __clib_packed app_sapi_msg_t;
diff --git a/src/vnet/session/application_local.c b/src/vnet/session/application_local.c
index 3c62dade0f5..3cb743d10e0 100644
--- a/src/vnet/session/application_local.c
+++ b/src/vnet/session/application_local.c
@@ -41,9 +41,25 @@ typedef struct ct_segments_
ct_segment_t *segments;
} ct_segments_ctx_t;
+typedef struct ct_cleanup_req_
+{
+ u32 ct_index;
+} ct_cleanup_req_t;
+
+typedef struct ct_worker_
+{
+ ct_connection_t *connections; /**< Per-worker connection pools */
+ u32 *pending_connects; /**< Fifo of pending ho indices */
+ ct_cleanup_req_t *pending_cleanups; /**< Fifo of pending indices */
+ u8 have_connects; /**< Set if connect rpc pending */
+ u8 have_cleanups; /**< Set if cleanup rpc pending */
+ clib_spinlock_t pending_connects_lock; /**< Lock for pending connects */
+ u32 *new_connects; /**< Burst of connects to be done */
+} ct_worker_t;
+
typedef struct ct_main_
{
- ct_connection_t **connections; /**< Per-worker connection pools */
+ ct_worker_t *wrk; /**< Per-worker state */
u32 n_workers; /**< Number of vpp workers */
u32 n_sessions; /**< Cumulative sessions counter */
u32 *ho_reusable; /**< Vector of reusable ho indices */
@@ -51,17 +67,28 @@ typedef struct ct_main_
clib_rwlock_t app_segs_lock; /**< RW lock for seg contexts */
uword *app_segs_ctxs_table; /**< App handle to segment pool map */
ct_segments_ctx_t *app_seg_ctxs; /**< Pool of ct segment contexts */
+ u32 **fwrk_pending_connects; /**< First wrk pending half-opens */
+ u32 fwrk_thread; /**< First worker thread */
+ u8 fwrk_have_flush; /**< Flag for connect flush rpc */
} ct_main_t;
static ct_main_t ct_main;
+static inline ct_worker_t *
+ct_worker_get (u32 thread_index)
+{
+ return &ct_main.wrk[thread_index];
+}
+
static ct_connection_t *
ct_connection_alloc (u32 thread_index)
{
+ ct_worker_t *wrk = ct_worker_get (thread_index);
ct_connection_t *ct;
- pool_get_zero (ct_main.connections[thread_index], ct);
- ct->c_c_index = ct - ct_main.connections[thread_index];
+ pool_get_aligned_safe (wrk->connections, ct, CLIB_CACHE_LINE_BYTES);
+ clib_memset (ct, 0, sizeof (*ct));
+ ct->c_c_index = ct - wrk->connections;
ct->c_thread_index = thread_index;
ct->client_wrk = ~0;
ct->server_wrk = ~0;
@@ -73,22 +100,25 @@ ct_connection_alloc (u32 thread_index)
static ct_connection_t *
ct_connection_get (u32 ct_index, u32 thread_index)
{
- if (pool_is_free_index (ct_main.connections[thread_index], ct_index))
+ ct_worker_t *wrk = ct_worker_get (thread_index);
+
+ if (pool_is_free_index (wrk->connections, ct_index))
return 0;
- return pool_elt_at_index (ct_main.connections[thread_index], ct_index);
+ return pool_elt_at_index (wrk->connections, ct_index);
}
static void
ct_connection_free (ct_connection_t * ct)
{
+ ct_worker_t *wrk = ct_worker_get (ct->c_thread_index);
+
if (CLIB_DEBUG)
{
- u32 thread_index = ct->c_thread_index;
- memset (ct, 0xfc, sizeof (*ct));
- pool_put (ct_main.connections[thread_index], ct);
+ clib_memset (ct, 0xfc, sizeof (*ct));
+ pool_put (wrk->connections, ct);
return;
}
- pool_put (ct_main.connections[ct->c_thread_index], ct);
+ pool_put (wrk->connections, ct);
}
static ct_connection_t *
@@ -99,11 +129,18 @@ ct_half_open_alloc (void)
clib_spinlock_lock (&cm->ho_reuseable_lock);
vec_foreach (hip, cm->ho_reusable)
- pool_put_index (cm->connections[0], *hip);
+ pool_put_index (cm->wrk[cm->fwrk_thread].connections, *hip);
vec_reset_length (cm->ho_reusable);
clib_spinlock_unlock (&cm->ho_reuseable_lock);
- return ct_connection_alloc (0);
+ return ct_connection_alloc (cm->fwrk_thread);
+}
+
+static ct_connection_t *
+ct_half_open_get (u32 ho_index)
+{
+ ct_main_t *cm = &ct_main;
+ return ct_connection_get (ho_index, cm->fwrk_thread);
}
void
@@ -137,6 +174,33 @@ ct_session_endpoint (session_t * ll, session_endpoint_t * sep)
}
static void
+ct_set_invalid_app_wrk (ct_connection_t *ct, u8 is_client)
+{
+ ct_connection_t *peer_ct;
+
+ peer_ct = ct_connection_get (ct->peer_index, ct->c_thread_index);
+
+ if (is_client)
+ {
+ ct->client_wrk = APP_INVALID_INDEX;
+ if (peer_ct)
+ ct->client_wrk = APP_INVALID_INDEX;
+ }
+ else
+ {
+ ct->server_wrk = APP_INVALID_INDEX;
+ if (peer_ct)
+ ct->server_wrk = APP_INVALID_INDEX;
+ }
+}
+
+static inline u64
+ct_client_seg_handle (u64 server_sh, u32 client_wrk_index)
+{
+ return (((u64) client_wrk_index << 56) | server_sh);
+}
+
+static void
ct_session_dealloc_fifos (ct_connection_t *ct, svm_fifo_t *rx_fifo,
svm_fifo_t *tx_fifo)
{
@@ -146,8 +210,8 @@ ct_session_dealloc_fifos (ct_connection_t *ct, svm_fifo_t *rx_fifo,
app_worker_t *app_wrk;
ct_segment_t *ct_seg;
fifo_segment_t *fs;
- u8 del_segment = 0;
u32 seg_index;
+ session_t *s;
int cnt;
/*
@@ -202,77 +266,82 @@ ct_session_dealloc_fifos (ct_connection_t *ct, svm_fifo_t *rx_fifo,
if (ct->flags & CT_CONN_F_CLIENT)
{
cnt = ct_seg->client_n_sessions;
- if (!cnt)
- ct_seg->flags |= CT_SEGMENT_F_CLIENT_DETACHED;
+ if (cnt)
+ goto done;
+ ct_seg->flags |= CT_SEGMENT_F_CLIENT_DETACHED;
+ s = session_get (ct->c_s_index, ct->c_thread_index);
+ if (s->app_wrk_index == APP_INVALID_INDEX)
+ ct_set_invalid_app_wrk (ct, 1 /* is_client */);
}
else
{
cnt = ct_seg->server_n_sessions;
- if (!cnt)
- ct_seg->flags |= CT_SEGMENT_F_SERVER_DETACHED;
+ if (cnt)
+ goto done;
+ ct_seg->flags |= CT_SEGMENT_F_SERVER_DETACHED;
+ s = session_get (ct->c_s_index, ct->c_thread_index);
+ if (s->app_wrk_index == APP_INVALID_INDEX)
+ ct_set_invalid_app_wrk (ct, 0 /* is_client */);
}
+ if (!(ct_seg->flags & CT_SEGMENT_F_CLIENT_DETACHED) ||
+ !(ct_seg->flags & CT_SEGMENT_F_SERVER_DETACHED))
+ goto done;
+
/*
* Remove segment context because both client and server detached
*/
- if (!cnt && (ct_seg->flags & CT_SEGMENT_F_CLIENT_DETACHED) &&
- (ct_seg->flags & CT_SEGMENT_F_SERVER_DETACHED))
- {
- pool_put_index (seg_ctx->segments, ct->ct_seg_index);
+ pool_put_index (seg_ctx->segments, ct->ct_seg_index);
- /*
- * No more segment indices left, remove the segments context
- */
- if (!pool_elts (seg_ctx->segments))
- {
- u64 table_handle = seg_ctx->client_wrk << 16 | seg_ctx->server_wrk;
- table_handle = (u64) seg_ctx->sm_index << 32 | table_handle;
- hash_unset (cm->app_segs_ctxs_table, table_handle);
- pool_free (seg_ctx->segments);
- pool_put_index (cm->app_seg_ctxs, ct->seg_ctx_index);
- }
- del_segment = 1;
+ /*
+ * No more segment indices left, remove the segments context
+ */
+ if (!pool_elts (seg_ctx->segments))
+ {
+ u64 table_handle = seg_ctx->client_wrk << 16 | seg_ctx->server_wrk;
+ table_handle = (u64) seg_ctx->sm_index << 32 | table_handle;
+ hash_unset (cm->app_segs_ctxs_table, table_handle);
+ pool_free (seg_ctx->segments);
+ pool_put_index (cm->app_seg_ctxs, ct->seg_ctx_index);
}
- clib_rwlock_writer_unlock (&cm->app_segs_lock);
-
/*
- * Session counter went to zero, notify the app that detached
+ * Segment to be removed so notify both apps
*/
- if (cnt)
- return;
- if (ct->flags & CT_CONN_F_CLIENT)
- {
- app_wrk = app_worker_get_if_valid (ct->client_wrk);
- /* Determine if client app still needs notification, i.e., if it is
- * still attached. If client detached and this is the last ct session
- * on this segment, then its connects segment manager should also be
- * detached, so do not send notification */
- if (app_wrk)
- {
- segment_manager_t *csm;
- csm = app_worker_get_connect_segment_manager (app_wrk);
- if (!segment_manager_app_detached (csm))
- app_worker_del_segment_notify (app_wrk, ct->segment_handle);
- }
- }
- else if (!segment_manager_app_detached (sm))
+ app_wrk = app_worker_get_if_valid (ct->client_wrk);
+ /* Determine if client app still needs notification, i.e., if it is
+ * still attached. If client detached and this is the last ct session
+ * on this segment, then its connects segment manager should also be
+ * detached, so do not send notification */
+ if (app_wrk)
{
- app_wrk = app_worker_get (ct->server_wrk);
- app_worker_del_segment_notify (app_wrk, ct->segment_handle);
+ segment_manager_t *csm;
+ csm = app_worker_get_connect_segment_manager (app_wrk);
+ if (!segment_manager_app_detached (csm))
+ app_worker_del_segment_notify (
+ app_wrk, ct_client_seg_handle (ct->segment_handle, ct->client_wrk));
}
- if (!del_segment)
- return;
-
+ /* Notify server app and free segment */
segment_manager_lock_and_del_segment (sm, seg_index);
/* Cleanup segment manager if needed. If server detaches there's a chance
* the client's sessions will hold up segment removal */
if (segment_manager_app_detached (sm) && !segment_manager_has_fifos (sm))
segment_manager_free_safe (sm);
+
+done:
+
+ clib_rwlock_writer_unlock (&cm->app_segs_lock);
+}
+
+static void
+ct_session_force_disconnect_server (ct_connection_t *sct)
+{
+ sct->peer_index = ~0;
+ session_transport_closing_notify (&sct->connection);
}
int
@@ -294,9 +363,7 @@ ct_session_connect_notify (session_t *ss, session_error_t err)
/* Client closed while waiting for reply from server */
if (PREDICT_FALSE (!cct))
{
- session_transport_closing_notify (&sct->connection);
- session_transport_delete_notify (&sct->connection);
- ct_connection_free (sct);
+ ct_session_force_disconnect_server (sct);
return 0;
}
@@ -307,16 +374,19 @@ ct_session_connect_notify (session_t *ss, session_error_t err)
goto connect_error;
/*
- * Alloc client session
+ * Alloc client session, server session assumed to be established
*/
+ ASSERT (ss->session_state >= SESSION_STATE_READY);
+
cs = session_alloc (thread_index);
ss = session_get (ss_index, thread_index);
cs->session_type = ss->session_type;
cs->listener_handle = SESSION_INVALID_HANDLE;
- cs->session_state = SESSION_STATE_CONNECTING;
+ session_set_state (cs, SESSION_STATE_CONNECTING);
cs->app_wrk_index = client_wrk->wrk_index;
cs->connection_index = cct->c_c_index;
+ cs->opaque = opaque;
cct->c_s_index = cs->session_index;
/* This will allocate fifos for the session. They won't be used for
@@ -325,23 +395,23 @@ ct_session_connect_notify (session_t *ss, session_error_t err)
if ((err = app_worker_init_connected (client_wrk, cs)))
{
session_free (cs);
- session_close (ss);
+ ct_session_force_disconnect_server (sct);
err = SESSION_E_ALLOC;
goto connect_error;
}
- cs->session_state = SESSION_STATE_CONNECTING;
+ session_set_state (cs, SESSION_STATE_CONNECTING);
if (app_worker_connect_notify (client_wrk, cs, 0, opaque))
{
segment_manager_dealloc_fifos (cs->rx_fifo, cs->tx_fifo);
session_free (cs);
- session_close (ss);
+ ct_session_force_disconnect_server (sct);
goto cleanup_client;
}
cs = session_get (cct->c_s_index, cct->c_thread_index);
- cs->session_state = SESSION_STATE_READY;
+ session_set_state (cs, SESSION_STATE_READY);
return 0;
@@ -373,9 +443,6 @@ ct_lookup_free_segment (ct_main_t *cm, segment_manager_t *sm,
pool_foreach (ct_seg, seg_ctx->segments)
{
/* Client or server has detached so segment cannot be used */
- if ((ct_seg->flags & CT_SEGMENT_F_SERVER_DETACHED) ||
- (ct_seg->flags & CT_SEGMENT_F_CLIENT_DETACHED))
- continue;
fs = segment_manager_get_segment (sm, ct_seg->segment_index);
free_bytes = fifo_segment_available_bytes (fs);
max_fifos = fifo_segment_size (fs) / seg_ctx->fifo_pair_bytes;
@@ -395,11 +462,11 @@ ct_alloc_segment (ct_main_t *cm, app_worker_t *server_wrk, u64 table_handle,
segment_manager_t *sm, u32 client_wrk_index)
{
u32 seg_ctx_index = ~0, sm_index, pair_bytes;
+ u64 seg_size, seg_handle, client_seg_handle;
segment_manager_props_t *props;
const u32 margin = 16 << 10;
ct_segments_ctx_t *seg_ctx;
app_worker_t *client_wrk;
- u64 seg_size, seg_handle;
application_t *server;
ct_segment_t *ct_seg;
uword *spp;
@@ -461,7 +528,11 @@ ct_alloc_segment (ct_main_t *cm, app_worker_t *server_wrk, u64 table_handle,
goto error;
client_wrk = app_worker_get (client_wrk_index);
- if (app_worker_add_segment_notify (client_wrk, seg_handle))
+ /* Make sure client workers do not have overlapping segment handles.
+ * Ideally, we should attach fs to client worker segment manager and
+ * create a new handle but that's not currently possible. */
+ client_seg_handle = ct_client_seg_handle (seg_handle, client_wrk_index);
+ if (app_worker_add_segment_notify (client_wrk, client_seg_handle))
{
app_worker_del_segment_notify (server_wrk, seg_handle);
goto error;
@@ -515,6 +586,8 @@ ct_init_accepted_session (app_worker_t *server_wrk, ct_connection_t *ct,
ct->seg_ctx_index = ct_seg->seg_ctx_index;
ct->ct_seg_index = ct_seg->ct_seg_index;
fs_index = ct_seg->segment_index;
+ ct_seg->flags &=
+ ~(CT_SEGMENT_F_SERVER_DETACHED | CT_SEGMENT_F_CLIENT_DETACHED);
__atomic_add_fetch (&ct_seg->server_n_sessions, 1, __ATOMIC_RELAXED);
__atomic_add_fetch (&ct_seg->client_n_sessions, 1, __ATOMIC_RELAXED);
}
@@ -573,10 +646,6 @@ ct_init_accepted_session (app_worker_t *server_wrk, ct_connection_t *ct,
ls->tx_fifo->shr->master_session_index = ls->session_index;
ls->rx_fifo->master_thread_index = ls->thread_index;
ls->tx_fifo->master_thread_index = ls->thread_index;
- ls->rx_fifo->segment_manager = sm_index;
- ls->tx_fifo->segment_manager = sm_index;
- ls->rx_fifo->segment_index = fs_index;
- ls->tx_fifo->segment_index = fs_index;
seg_handle = segment_manager_segment_handle (sm, fs);
segment_manager_segment_reader_unlock (sm);
@@ -587,23 +656,21 @@ ct_init_accepted_session (app_worker_t *server_wrk, ct_connection_t *ct,
}
static void
-ct_accept_rpc_wrk_handler (void *accept_args)
+ct_accept_one (u32 thread_index, u32 ho_index)
{
- u32 cct_index, ho_index, thread_index, ll_index;
ct_connection_t *sct, *cct, *ho;
transport_connection_t *ll_ct;
app_worker_t *server_wrk;
+ u32 cct_index, ll_index;
session_t *ss, *ll;
/*
* Alloc client ct and initialize from ho
*/
- thread_index = vlib_get_thread_index ();
cct = ct_connection_alloc (thread_index);
cct_index = cct->c_c_index;
- ho_index = pointer_to_uword (accept_args);
- ho = ct_connection_get (ho_index, 0);
+ ho = ct_half_open_get (ho_index);
/* Unlikely but half-open session and transport could have been freed */
if (PREDICT_FALSE (!ho))
@@ -640,7 +707,7 @@ ct_accept_rpc_wrk_handler (void *accept_args)
sct->c_rmt_port = 0;
sct->c_lcl_port = ll_ct->lcl_port;
sct->c_is_ip4 = cct->c_is_ip4;
- clib_memcpy (&sct->c_lcl_ip, &ll_ct->lcl_ip, sizeof (ll_ct->lcl_ip));
+ clib_memcpy (&sct->c_lcl_ip, &cct->c_rmt_ip, sizeof (cct->c_rmt_ip));
sct->client_wrk = cct->client_wrk;
sct->c_proto = TRANSPORT_PROTO_NONE;
sct->client_opaque = cct->client_opaque;
@@ -659,7 +726,7 @@ ct_accept_rpc_wrk_handler (void *accept_args)
sct->c_is_ip4);
ss->connection_index = sct->c_c_index;
ss->listener_handle = listen_session_get_handle (ll);
- ss->session_state = SESSION_STATE_CREATED;
+ session_set_state (ss, SESSION_STATE_CREATED);
server_wrk = application_listener_select_worker (ll);
ss->app_wrk_index = server_wrk->wrk_index;
@@ -675,15 +742,17 @@ ct_accept_rpc_wrk_handler (void *accept_args)
return;
}
+ cct->server_wrk = sct->server_wrk;
cct->seg_ctx_index = sct->seg_ctx_index;
cct->ct_seg_index = sct->ct_seg_index;
cct->client_rx_fifo = ss->tx_fifo;
cct->client_tx_fifo = ss->rx_fifo;
cct->client_rx_fifo->refcnt++;
cct->client_tx_fifo->refcnt++;
- cct->segment_handle = sct->segment_handle;
+ cct->segment_handle =
+ ct_client_seg_handle (sct->segment_handle, cct->client_wrk);
- ss->session_state = SESSION_STATE_ACCEPTING;
+ session_set_state (ss, SESSION_STATE_ACCEPTING);
if (app_worker_accept_notify (server_wrk, ss))
{
ct_session_connect_notify (ss, SESSION_E_REFUSED);
@@ -693,13 +762,93 @@ ct_accept_rpc_wrk_handler (void *accept_args)
}
}
-static int
-ct_connect (app_worker_t * client_wrk, session_t * ll,
- session_endpoint_cfg_t * sep)
+static void
+ct_accept_rpc_wrk_handler (void *rpc_args)
{
- u32 thread_index, ho_index;
+ u32 thread_index, n_connects, i, n_pending;
+ const u32 max_connects = 32;
+ ct_worker_t *wrk;
+ u8 need_rpc = 0;
+
+ thread_index = pointer_to_uword (rpc_args);
+ wrk = ct_worker_get (thread_index);
+
+ /* Connects could be handled without worker barrier so grab lock */
+ clib_spinlock_lock (&wrk->pending_connects_lock);
+
+ n_pending = clib_fifo_elts (wrk->pending_connects);
+ n_connects = clib_min (n_pending, max_connects);
+ vec_validate (wrk->new_connects, n_connects);
+
+ for (i = 0; i < n_connects; i++)
+ clib_fifo_sub1 (wrk->pending_connects, wrk->new_connects[i]);
+
+ if (n_pending == n_connects)
+ wrk->have_connects = 0;
+ else
+ need_rpc = 1;
+
+ clib_spinlock_unlock (&wrk->pending_connects_lock);
+
+ for (i = 0; i < n_connects; i++)
+ ct_accept_one (thread_index, wrk->new_connects[i]);
+
+ if (need_rpc)
+ session_send_rpc_evt_to_thread_force (
+ thread_index, ct_accept_rpc_wrk_handler,
+ uword_to_pointer (thread_index, void *));
+}
+
+static void
+ct_fwrk_flush_connects (void *rpc_args)
+{
+ u32 thread_index, fwrk_index, n_workers;
ct_main_t *cm = &ct_main;
- ct_connection_t *ho;
+ ct_worker_t *wrk;
+ u8 need_rpc;
+
+ fwrk_index = cm->fwrk_thread;
+ n_workers = vec_len (cm->fwrk_pending_connects);
+
+ for (thread_index = fwrk_index; thread_index < n_workers; thread_index++)
+ {
+ if (!vec_len (cm->fwrk_pending_connects[thread_index]))
+ continue;
+
+ wrk = ct_worker_get (thread_index);
+
+ /* Connects can be done without worker barrier, grab dst worker lock */
+ if (thread_index != fwrk_index)
+ clib_spinlock_lock (&wrk->pending_connects_lock);
+
+ clib_fifo_add (wrk->pending_connects,
+ cm->fwrk_pending_connects[thread_index],
+ vec_len (cm->fwrk_pending_connects[thread_index]));
+ if (!wrk->have_connects)
+ {
+ wrk->have_connects = 1;
+ need_rpc = 1;
+ }
+
+ if (thread_index != fwrk_index)
+ clib_spinlock_unlock (&wrk->pending_connects_lock);
+
+ vec_reset_length (cm->fwrk_pending_connects[thread_index]);
+
+ if (need_rpc)
+ session_send_rpc_evt_to_thread_force (
+ thread_index, ct_accept_rpc_wrk_handler,
+ uword_to_pointer (thread_index, void *));
+ }
+
+ cm->fwrk_have_flush = 0;
+}
+
+static void
+ct_program_connect_to_wrk (u32 ho_index)
+{
+ ct_main_t *cm = &ct_main;
+ u32 thread_index;
/* Simple round-robin policy for spreading sessions over workers. We skip
* thread index 0, i.e., offset the index by 1, when we have workers as it
@@ -708,6 +857,25 @@ ct_connect (app_worker_t * client_wrk, session_t * ll,
cm->n_sessions += 1;
thread_index = cm->n_workers ? (cm->n_sessions % cm->n_workers) + 1 : 0;
+ /* Pospone flushing of connect request to dst worker until after session
+ * layer fully initializes the half-open session. */
+ vec_add1 (cm->fwrk_pending_connects[thread_index], ho_index);
+ if (!cm->fwrk_have_flush)
+ {
+ session_send_rpc_evt_to_thread_force (
+ cm->fwrk_thread, ct_fwrk_flush_connects,
+ uword_to_pointer (thread_index, void *));
+ cm->fwrk_have_flush = 1;
+ }
+}
+
+static int
+ct_connect (app_worker_t *client_wrk, session_t *ll,
+ session_endpoint_cfg_t *sep)
+{
+ ct_connection_t *ho;
+ u32 ho_index;
+
/*
* Alloc and init client half-open transport
*/
@@ -725,22 +893,19 @@ ct_connect (app_worker_t * client_wrk, session_t * ll,
clib_memcpy (&ho->c_rmt_ip, &sep->ip, sizeof (sep->ip));
ho->flags |= CT_CONN_F_CLIENT;
ho->c_s_index = ~0;
- ho->actual_tp = sep->transport_proto;
+ ho->actual_tp = sep->original_tp;
/*
- * Accept connection on thread selected above. Connected reply comes
+ * Program connect on a worker, connected reply comes
* after server accepts the connection.
*/
-
- session_send_rpc_evt_to_thread_force (thread_index,
- ct_accept_rpc_wrk_handler,
- uword_to_pointer (ho_index, void *));
+ ct_program_connect_to_wrk (ho_index);
return ho_index;
}
static u32
-ct_start_listen (u32 app_listener_index, transport_endpoint_t * tep)
+ct_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep)
{
session_endpoint_cfg_t *sep;
ct_connection_t *ct;
@@ -772,9 +937,9 @@ ct_listener_get (u32 ct_index)
}
static transport_connection_t *
-ct_half_open_get (u32 ct_index)
+ct_session_half_open_get (u32 ct_index)
{
- return (transport_connection_t *) ct_connection_get (ct_index, 0);
+ return (transport_connection_t *) ct_half_open_get (ct_index);
}
static void
@@ -796,7 +961,10 @@ ct_session_cleanup (u32 conn_index, u32 thread_index)
static void
ct_cleanup_ho (u32 ho_index)
{
- ct_connection_free (ct_connection_get (ho_index, 0));
+ ct_connection_t *ho;
+
+ ho = ct_half_open_get (ho_index);
+ ct_connection_free (ho);
}
static int
@@ -827,7 +995,7 @@ ct_session_connect (transport_endpoint_cfg_t * tep)
goto global_scope;
ll = listen_session_get_from_handle (lh);
- al = app_listener_get_w_session (ll);
+ al = app_listener_get (ll->al_index);
/*
* Break loop if rule in local table points to connecting app. This
@@ -856,58 +1024,189 @@ global_scope:
ll = session_lookup_listener_wildcard (table_index, sep);
/* Avoid connecting app to own listener */
- if (ll && ll->app_index != app->app_index)
- return ct_connect (app_wrk, ll, sep_ext);
+ if (ll)
+ {
+ al = app_listener_get (ll->al_index);
+ if (al->app_index != app->app_index)
+ return ct_connect (app_wrk, ll, sep_ext);
+ }
/* Failed to connect but no error */
return SESSION_E_LOCAL_CONNECT;
}
+static inline int
+ct_close_is_reset (ct_connection_t *ct, session_t *s)
+{
+ if (ct->flags & CT_CONN_F_RESET)
+ return 1;
+ if (ct->flags & CT_CONN_F_CLIENT)
+ return (svm_fifo_max_dequeue (ct->client_rx_fifo) > 0);
+ else
+ return (svm_fifo_max_dequeue (s->rx_fifo) > 0);
+}
+
static void
-ct_session_close (u32 ct_index, u32 thread_index)
+ct_session_cleanup_server_session (session_t *s)
{
- ct_connection_t *ct, *peer_ct;
+ ct_connection_t *ct;
+
+ ct = (ct_connection_t *) session_get_transport (s);
+ ct_session_dealloc_fifos (ct, s->rx_fifo, s->tx_fifo);
+ session_free (s);
+ ct_connection_free (ct);
+}
+
+static void
+ct_session_postponed_cleanup (ct_connection_t *ct)
+{
+ ct_connection_t *peer_ct;
app_worker_t *app_wrk;
session_t *s;
- ct = ct_connection_get (ct_index, thread_index);
s = session_get (ct->c_s_index, ct->c_thread_index);
- peer_ct = ct_connection_get (ct->peer_index, thread_index);
+ app_wrk = app_worker_get_if_valid (s->app_wrk_index);
+
+ peer_ct = ct_connection_get (ct->peer_index, ct->c_thread_index);
if (peer_ct)
{
- peer_ct->peer_index = ~0;
- /* Make sure session was allocated */
- if (peer_ct->flags & CT_CONN_F_HALF_OPEN)
- {
- ct_session_connect_notify (s, SESSION_E_REFUSED);
- }
- else if (peer_ct->c_s_index != ~0)
- session_transport_closing_notify (&peer_ct->connection);
+ if (ct_close_is_reset (ct, s))
+ session_transport_reset_notify (&peer_ct->connection);
else
- ct_connection_free (peer_ct);
+ session_transport_closing_notify (&peer_ct->connection);
}
+ session_transport_closed_notify (&ct->connection);
+
+ /* It would be cleaner to call session_transport_delete_notify
+ * but then we can't control session cleanup lower */
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
+ if (app_wrk)
+ app_worker_cleanup_notify (app_wrk, s, SESSION_CLEANUP_TRANSPORT);
if (ct->flags & CT_CONN_F_CLIENT)
{
/* Normal free for client session as the fifos are allocated through
* the connects segment manager in a segment that's not shared with
* the server */
- session_free_w_fifos (s);
ct_session_dealloc_fifos (ct, ct->client_rx_fifo, ct->client_tx_fifo);
+ session_program_cleanup (s);
+ ct_connection_free (ct);
}
else
{
/* Manual session and fifo segment cleanup to avoid implicit
* segment manager cleanups and notifications */
- app_wrk = app_worker_get_if_valid (s->app_wrk_index);
if (app_wrk)
- app_worker_cleanup_notify (app_wrk, s, SESSION_CLEANUP_SESSION);
+ {
+ /* Remove custom cleanup notify infra when/if switching to normal
+ * session cleanup. Note that ct is freed in the cb function */
+ app_worker_cleanup_notify_custom (app_wrk, s,
+ SESSION_CLEANUP_SESSION,
+ ct_session_cleanup_server_session);
+ }
+ else
+ {
+ ct_connection_free (ct);
+ }
+ }
+}
+
+static void
+ct_handle_cleanups (void *args)
+{
+ uword thread_index = pointer_to_uword (args);
+ const u32 max_cleanups = 100;
+ ct_cleanup_req_t *req;
+ ct_connection_t *ct;
+ u32 n_to_handle = 0;
+ ct_worker_t *wrk;
+ session_t *s;
+
+ wrk = ct_worker_get (thread_index);
+ wrk->have_cleanups = 0;
+ n_to_handle = clib_fifo_elts (wrk->pending_cleanups);
+ n_to_handle = clib_min (n_to_handle, max_cleanups);
+
+ while (n_to_handle)
+ {
+ clib_fifo_sub2 (wrk->pending_cleanups, req);
+ ct = ct_connection_get (req->ct_index, thread_index);
+ s = session_get (ct->c_s_index, ct->c_thread_index);
+ if (svm_fifo_has_event (s->tx_fifo) || (s->flags & SESSION_F_RX_EVT))
+ clib_fifo_add1 (wrk->pending_cleanups, *req);
+ else
+ ct_session_postponed_cleanup (ct);
+ n_to_handle -= 1;
+ }
- ct_session_dealloc_fifos (ct, s->rx_fifo, s->tx_fifo);
- session_free (s);
+ if (clib_fifo_elts (wrk->pending_cleanups))
+ {
+ wrk->have_cleanups = 1;
+ session_send_rpc_evt_to_thread_force (
+ thread_index, ct_handle_cleanups,
+ uword_to_pointer (thread_index, void *));
}
+}
- ct_connection_free (ct);
+static void
+ct_program_cleanup (ct_connection_t *ct)
+{
+ ct_cleanup_req_t *req;
+ uword thread_index;
+ ct_worker_t *wrk;
+
+ thread_index = ct->c_thread_index;
+ wrk = ct_worker_get (ct->c_thread_index);
+
+ clib_fifo_add2 (wrk->pending_cleanups, req);
+ req->ct_index = ct->c_c_index;
+
+ if (wrk->have_cleanups)
+ return;
+
+ wrk->have_cleanups = 1;
+ session_send_rpc_evt_to_thread_force (
+ thread_index, ct_handle_cleanups, uword_to_pointer (thread_index, void *));
+}
+
+static void
+ct_session_close (u32 ct_index, u32 thread_index)
+{
+ ct_connection_t *ct, *peer_ct;
+ session_t *s;
+
+ ct = ct_connection_get (ct_index, thread_index);
+ s = session_get (ct->c_s_index, ct->c_thread_index);
+ peer_ct = ct_connection_get (ct->peer_index, thread_index);
+ if (peer_ct)
+ {
+ peer_ct->peer_index = ~0;
+ /* Make sure session was allocated */
+ if (peer_ct->flags & CT_CONN_F_HALF_OPEN)
+ {
+ ct_session_connect_notify (s, SESSION_E_REFUSED);
+ ct->peer_index = ~0;
+ }
+ else if (peer_ct->c_s_index == ~0)
+ {
+ /* should not happen */
+ clib_warning ("ct peer without session");
+ ct_connection_free (peer_ct);
+ }
+ }
+
+ /* Do not send closed notify to make sure pending tx events are
+ * still delivered and program cleanup */
+ ct_program_cleanup (ct);
+}
+
+static void
+ct_session_reset (u32 ct_index, u32 thread_index)
+{
+ ct_connection_t *ct;
+ ct = ct_connection_get (ct_index, thread_index);
+ ct->flags |= CT_CONN_F_RESET;
+ ct_session_close (ct_index, thread_index);
}
static transport_connection_t *
@@ -966,12 +1265,17 @@ static int
ct_app_rx_evt (transport_connection_t * tc)
{
ct_connection_t *ct = (ct_connection_t *) tc, *peer_ct;
- session_t *ps;
+ session_t *ps, *s;
+ s = session_get (ct->c_s_index, ct->c_thread_index);
+ if (session_has_transport (s) || s->session_state < SESSION_STATE_READY)
+ return -1;
peer_ct = ct_connection_get (ct->peer_index, tc->thread_index);
- if (!peer_ct)
+ if (!peer_ct || (peer_ct->flags & CT_CONN_F_HALF_OPEN))
return -1;
ps = session_get (peer_ct->c_s_index, peer_ct->c_thread_index);
+ if (ps->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
+ return -1;
return session_dequeue_notify (ps);
}
@@ -993,7 +1297,7 @@ format_ct_half_open (u8 *s, va_list *args)
{
u32 ho_index = va_arg (*args, u32);
u32 verbose = va_arg (*args, u32);
- ct_connection_t *ct = ct_connection_get (ho_index, 0);
+ ct_connection_t *ct = ct_half_open_get (ho_index);
s = format (s, "%-" SESSION_CLI_ID_LEN "U", format_ct_connection_id, ct);
if (verbose)
s = format (s, "%-" SESSION_CLI_STATE_LEN "s", "HALF-OPEN");
@@ -1042,27 +1346,33 @@ format_ct_session (u8 * s, va_list * args)
clib_error_t *
ct_enable_disable (vlib_main_t * vm, u8 is_en)
{
+ vlib_thread_main_t *vtm = &vlib_thread_main;
ct_main_t *cm = &ct_main;
+ ct_worker_t *wrk;
cm->n_workers = vlib_num_workers ();
- vec_validate (cm->connections, cm->n_workers);
+ cm->fwrk_thread = transport_cl_thread ();
+ vec_validate (cm->wrk, vtm->n_vlib_mains);
+ vec_foreach (wrk, cm->wrk)
+ clib_spinlock_init (&wrk->pending_connects_lock);
clib_spinlock_init (&cm->ho_reuseable_lock);
clib_rwlock_init (&cm->app_segs_lock);
+ vec_validate (cm->fwrk_pending_connects, cm->n_workers);
return 0;
}
-/* *INDENT-OFF* */
static const transport_proto_vft_t cut_thru_proto = {
.enable = ct_enable_disable,
.start_listen = ct_start_listen,
.stop_listen = ct_stop_listen,
.get_connection = ct_session_get,
.get_listener = ct_listener_get,
- .get_half_open = ct_half_open_get,
+ .get_half_open = ct_session_half_open_get,
.cleanup = ct_session_cleanup,
.cleanup_ho = ct_cleanup_ho,
.connect = ct_session_connect,
.close = ct_session_close,
+ .reset = ct_session_reset,
.custom_tx = ct_custom_tx,
.app_rx_evt = ct_app_rx_evt,
.format_listener = format_ct_listener,
@@ -1075,7 +1385,14 @@ static const transport_proto_vft_t cut_thru_proto = {
.service_type = TRANSPORT_SERVICE_VC,
},
};
-/* *INDENT-ON* */
+
+static inline int
+ct_session_can_tx (session_t *s)
+{
+ return (s->session_state == SESSION_STATE_READY ||
+ s->session_state == SESSION_STATE_CLOSING ||
+ s->session_state == SESSION_STATE_APP_CLOSED);
+}
int
ct_session_tx (session_t * s)
@@ -1083,6 +1400,8 @@ ct_session_tx (session_t * s)
ct_connection_t *ct, *peer_ct;
session_t *peer_s;
+ if (!ct_session_can_tx (s))
+ return 0;
ct = (ct_connection_t *) session_get_transport (s);
peer_ct = ct_connection_get (ct->peer_index, ct->c_thread_index);
if (!peer_ct)
@@ -1090,6 +1409,7 @@ ct_session_tx (session_t * s)
peer_s = session_get (peer_ct->c_s_index, peer_ct->c_thread_index);
if (peer_s->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
return 0;
+ peer_s->flags |= SESSION_F_RX_EVT;
return session_enqueue_notify (peer_s);
}
diff --git a/src/vnet/session/application_local.h b/src/vnet/session/application_local.h
index 86edf243b22..fd2804c7baf 100644
--- a/src/vnet/session/application_local.h
+++ b/src/vnet/session/application_local.h
@@ -22,7 +22,8 @@
#define foreach_ct_flags \
_ (CLIENT, "client") \
- _ (HALF_OPEN, "half-open")
+ _ (HALF_OPEN, "half-open") \
+ _ (RESET, "reset")
enum
{
diff --git a/src/vnet/session/application_namespace.c b/src/vnet/session/application_namespace.c
index 6d91fc362b2..f547dcfc031 100644
--- a/src/vnet/session/application_namespace.c
+++ b/src/vnet/session/application_namespace.c
@@ -19,6 +19,7 @@
#include <vnet/session/session.h>
#include <vnet/fib/fib_table.h>
#include <vppinfra/file.h>
+#include <vppinfra/format_table.h>
#include <vlib/unix/unix.h>
/**
@@ -40,7 +41,7 @@ app_namespace_get (u32 index)
}
app_namespace_t *
-app_namespace_get_from_id (const u8 * ns_id)
+app_namespace_get_from_id (const u8 *ns_id)
{
u32 index = app_namespace_index_from_id (ns_id);
if (index == APP_NAMESPACE_INVALID_INDEX)
@@ -54,31 +55,46 @@ app_namespace_index (app_namespace_t * app_ns)
return (app_ns - app_namespace_pool);
}
+void
+app_namespace_free (app_namespace_t *app_ns)
+{
+ hash_unset_mem (app_namespace_lookup_table, app_ns->ns_id);
+ vec_free (app_ns->ns_id);
+
+ pool_put (app_namespace_pool, app_ns);
+}
+
app_namespace_t *
-app_namespace_alloc (u8 * ns_id)
+app_namespace_alloc (const u8 *ns_id)
{
app_namespace_t *app_ns;
+
pool_get (app_namespace_pool, app_ns);
clib_memset (app_ns, 0, sizeof (*app_ns));
- app_ns->ns_id = vec_dup (ns_id);
+
+ app_ns->ns_id = vec_dup ((u8 *) ns_id);
+ vec_terminate_c_string (app_ns->ns_id);
+
hash_set_mem (app_namespace_lookup_table, app_ns->ns_id,
app_ns - app_namespace_pool);
+
return app_ns;
}
-int
-vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a)
+session_error_t
+vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t *a)
{
app_namespace_t *app_ns;
session_table_t *st;
+ u32 ns_index;
+ session_error_t rv;
if (a->is_add)
{
if (a->sw_if_index != APP_NAMESPACE_INVALID_INDEX
&& !vnet_get_sw_interface_or_null (vnet_get_main (),
a->sw_if_index))
- return VNET_API_ERROR_INVALID_SW_IF_INDEX;
-
+ return SESSION_E_INVALID;
if (a->sw_if_index != APP_NAMESPACE_INVALID_INDEX)
{
@@ -91,7 +107,7 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a)
}
if (a->sw_if_index == APP_NAMESPACE_INVALID_INDEX
&& a->ip4_fib_id == APP_NAMESPACE_INVALID_INDEX)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
app_ns = app_namespace_get_from_id (a->ns_id);
if (!app_ns)
@@ -102,9 +118,23 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a)
st->is_local = 1;
st->appns_index = app_namespace_index (app_ns);
app_ns->local_table_index = session_table_index (st);
+ if (a->sock_name)
+ {
+ app_ns->sock_name = vec_dup (a->sock_name);
+ vec_terminate_c_string (app_ns->sock_name);
+ }
+
+ /* Add socket for namespace,
+ * only at creation time */
+ if (app_sapi_enabled)
+ {
+ rv = appns_sapi_add_ns_socket (app_ns);
+ if (rv)
+ return rv;
+ }
}
+
app_ns->ns_secret = a->secret;
- app_ns->netns = a->netns ? vec_dup (a->netns) : 0;
app_ns->sw_if_index = a->sw_if_index;
app_ns->ip4_fib_index =
fib_table_find (FIB_PROTOCOL_IP4, a->ip4_fib_id);
@@ -112,14 +142,31 @@ vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a)
fib_table_find (FIB_PROTOCOL_IP6, a->ip6_fib_id);
session_lookup_set_tables_appns (app_ns);
- /* Add socket for namespace */
- if (app_sapi_enabled)
- appns_sapi_add_ns_socket (app_ns);
}
else
{
- return VNET_API_ERROR_UNIMPLEMENTED;
+ ns_index = app_namespace_index_from_id (a->ns_id);
+ if (ns_index == APP_NAMESPACE_INVALID_INDEX)
+ return SESSION_E_INVALID;
+
+ app_ns = app_namespace_get (ns_index);
+ if (!app_ns)
+ return SESSION_E_INVALID;
+
+ application_namespace_cleanup (app_ns);
+
+ if (app_sapi_enabled)
+ appns_sapi_del_ns_socket (app_ns);
+
+ st = session_table_get (app_ns->local_table_index);
+
+ session_table_free (st, FIB_PROTOCOL_MAX);
+ if (app_ns->sock_name)
+ vec_free (app_ns->sock_name);
+
+ app_namespace_free (app_ns);
}
+
return 0;
}
@@ -133,7 +180,13 @@ u32
app_namespace_index_from_id (const u8 * ns_id)
{
uword *indexp;
- indexp = hash_get_mem (app_namespace_lookup_table, ns_id);
+ u8 *key;
+
+ key = vec_dup ((u8 *) ns_id);
+ vec_terminate_c_string (key);
+
+ indexp = hash_get_mem (app_namespace_lookup_table, key);
+ vec_free (key);
if (!indexp)
return APP_NAMESPACE_INVALID_INDEX;
return *indexp;
@@ -161,10 +214,15 @@ app_namespace_get_local_table (app_namespace_t * app_ns)
return session_table_get (app_ns->local_table_index);
}
-void
-appns_sapi_enable (void)
+int
+appns_sapi_enable_disable (int is_enable)
{
- app_sapi_enabled = 1;
+ /* This cannot be called with active sockets */
+ if (pool_elts (app_namespace_pool))
+ return -1;
+
+ app_sapi_enabled = is_enable;
+ return 0;
}
u8
@@ -189,7 +247,7 @@ app_namespaces_init (void)
/* clang-format off */
vnet_app_namespace_add_del_args_t a = {
.ns_id = ns_id,
- .netns = 0,
+ .sock_name = 0,
.secret = 0,
.sw_if_index = APP_NAMESPACE_INVALID_INDEX,
.is_add = 1
@@ -204,9 +262,11 @@ static clib_error_t *
app_ns_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- u8 is_add = 0, *ns_id = 0, secret_set = 0, sw_if_index_set = 0, *netns = 0;
+ u8 is_add = 0, *ns_id = 0, secret_set = 0, sw_if_index_set = 0;
+ u8 *sock_name = 0;
unformat_input_t _line_input, *line_input = &_line_input;
u32 sw_if_index, fib_id = APP_NAMESPACE_INVALID_INDEX;
+ vnet_main_t *vnm = vnet_get_main ();
u64 secret;
clib_error_t *error = 0;
int rv;
@@ -220,15 +280,20 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input,
{
if (unformat (line_input, "add"))
is_add = 1;
+ else if (unformat (line_input, "del"))
+ is_add = 0;
else if (unformat (line_input, "id %_%v%_", &ns_id))
;
else if (unformat (line_input, "secret %lu", &secret))
secret_set = 1;
else if (unformat (line_input, "sw_if_index %u", &sw_if_index))
sw_if_index_set = 1;
+ else if (unformat (line_input, "if %U", unformat_vnet_sw_interface, vnm,
+ &sw_if_index))
+ sw_if_index_set = 1;
else if (unformat (line_input, "fib_id", &fib_id))
;
- else if (unformat (line_input, "netns %_%v%_", &netns))
+ else if (unformat (line_input, "sock-name %_%v%_", &sock_name))
;
else
{
@@ -238,57 +303,62 @@ app_ns_fn (vlib_main_t * vm, unformat_input_t * input,
}
}
- if (!ns_id || !secret_set || !sw_if_index_set)
+ if (!ns_id)
{
- vlib_cli_output (vm, "namespace-id, secret and sw_if_index must be "
- "provided");
+ vlib_cli_output (vm, "namespace-id must be provided");
goto done;
}
- if (is_add)
+ if (is_add && (!secret_set || !sw_if_index_set))
{
- /* clang-format off */
- vnet_app_namespace_add_del_args_t args = {
- .ns_id = ns_id,
- .netns = netns,
- .secret = secret,
- .sw_if_index = sw_if_index,
- .ip4_fib_id = fib_id,
- .is_add = 1
- };
- /* clang-format on */
-
- if ((rv = vnet_app_namespace_add_del (&args)))
- error = clib_error_return (0, "app namespace add del returned %d", rv);
+ vlib_cli_output (vm, "secret and interface must be provided");
+ goto done;
}
+ /* clang-format off */
+ vnet_app_namespace_add_del_args_t args = {
+ .ns_id = ns_id,
+ .secret = secret,
+ .sw_if_index = sw_if_index,
+ .sock_name = sock_name,
+ .ip4_fib_id = fib_id,
+ .is_add = is_add,
+ };
+ /* clang-format on */
+
+ if ((rv = vnet_app_namespace_add_del (&args)))
+ error = clib_error_return (0, "app namespace add del returned %d", rv);
+
done:
vec_free (ns_id);
- vec_free (netns);
+ vec_free (sock_name);
unformat_free (line_input);
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (app_ns_command, static) = {
.path = "app ns",
- .short_help = "app ns [add] id <namespace-id> secret <secret> "
- "sw_if_index <sw_if_index> [netns <ns>]",
+ .short_help = "app ns [add|del] id <namespace-id> secret <secret> "
+ "sw_if_index <sw_if_index> if <interface>",
.function = app_ns_fn,
};
-/* *INDENT-ON* */
u8 *
format_app_namespace (u8 * s, va_list * args)
{
app_namespace_t *app_ns = va_arg (*args, app_namespace_t *);
+ vnet_main_t *vnm = vnet_get_main ();
+
+ s = format (s, "Application namespace [%u]\nid: %s\nsecret: %lu",
+ app_namespace_index (app_ns), app_ns->ns_id, app_ns->ns_secret);
+ if (app_ns->sw_if_index != (u32) ~0)
+ s = format (s, "\nInterface: %U", format_vnet_sw_if_index_name, vnm,
+ app_ns->sw_if_index);
+ if (app_ns->sock_name)
+ s = format (s, "\nSocket: %s", app_ns->sock_name);
- s =
- format (s, "%-10u%-10lu%-15d%-15v%-15v%-40v", app_namespace_index (app_ns),
- app_ns->ns_secret, app_ns->sw_if_index, app_ns->ns_id,
- app_ns->netns, app_ns->sock_name);
return s;
}
@@ -314,7 +384,6 @@ app_namespace_show_api (vlib_main_t * vm, app_namespace_t * app_ns)
vlib_cli_output (vm, "%12s%12s%5s", "app index", "wrk index", "fd");
- /* *INDENT-OFF* */
pool_foreach (cs, app_ns->app_sockets) {
handle = (app_ns_api_handle_t *) &cs->private_data;
cf = clib_file_get (&file_main, handle->aah_file_index);
@@ -327,7 +396,6 @@ app_namespace_show_api (vlib_main_t * vm, app_namespace_t * app_ns)
vlib_cli_output (vm, "%12d%12d%5u", app_wrk->app_index,
app_wrk->wrk_map_index, cf->file_descriptor);
}
- /* *INDENT-ON* */
}
static clib_error_t *
@@ -335,9 +403,11 @@ show_app_ns_fn (vlib_main_t * vm, unformat_input_t * main_input,
vlib_cli_command_t * cmd)
{
unformat_input_t _line_input, *line_input = &_line_input;
- u8 *ns_id, do_table = 0, had_input = 1, do_api = 0;
+ u8 *ns_id = 0, do_table = 0, had_input = 1, do_api = 0;
app_namespace_t *app_ns;
+ vnet_main_t *vnm = vnet_get_main ();
session_table_t *st;
+ table_t table = {}, *t = &table;
session_cli_return_if_not_enabled ();
@@ -349,7 +419,7 @@ show_app_ns_fn (vlib_main_t * vm, unformat_input_t * main_input,
while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "table %_%v%_", &ns_id))
+ if (unformat (line_input, "id %_%v%_", &ns_id))
do_table = 1;
else if (unformat (line_input, "api-clients"))
do_api = 1;
@@ -386,20 +456,32 @@ show_app_ns_fn (vlib_main_t * vm, unformat_input_t * main_input,
vlib_cli_output (vm, "table for ns %v could not be found", ns_id);
goto done;
}
+ vlib_cli_output (vm, "%U", format_app_namespace, app_ns);
session_lookup_show_table_entries (vm, st, 0, 1);
vec_free (ns_id);
goto done;
}
do_ns_list:
- vlib_cli_output (vm, "%-10s%-10s%-15s%-15s%-15s%-40s", "Index", "Secret",
- "sw_if_index", "Id", "netns", "Socket");
+ table_add_header_col (t, 5, "Index", "Secret", "Interface", "Id", "Socket");
+ int i = 0;
+ pool_foreach (app_ns, app_namespace_pool)
+ {
+ int j = 0;
+ table_format_cell (t, i, j++, "%u", app_namespace_index (app_ns));
+ table_format_cell (t, i, j++, "%lu", app_ns->ns_secret);
+ table_format_cell (t, i, j++, "%U", format_vnet_sw_if_index_name, vnm,
+ app_ns->sw_if_index);
+ table_format_cell (t, i, j++, "%s", app_ns->ns_id);
+ table_format_cell (t, i++, j++, "%s", app_ns->sock_name);
+ }
- /* *INDENT-OFF* */
- pool_foreach (app_ns, app_namespace_pool) {
- vlib_cli_output (vm, "%U", format_app_namespace, app_ns);
- }
- /* *INDENT-ON* */
+ t->default_body.align = TTAA_LEFT;
+ t->default_header_col.align = TTAA_LEFT;
+ t->default_header_col.fg_color = TTAC_YELLOW;
+ t->default_header_col.flags = TTAF_FG_COLOR_SET;
+ vlib_cli_output (vm, "%U", format_table, t);
+ table_free (t);
done:
if (had_input)
@@ -407,14 +489,11 @@ done:
return 0;
}
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (show_app_ns_command, static) =
-{
+VLIB_CLI_COMMAND (show_app_ns_command, static) = {
.path = "show app ns",
- .short_help = "show app ns [table <id> [api-clients]]",
+ .short_help = "show app ns [id <id> [api-clients]]",
.function = show_app_ns_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/application_namespace.h b/src/vnet/session/application_namespace.h
index 313b2d0e63d..261325cbe0e 100644
--- a/src/vnet/session/application_namespace.h
+++ b/src/vnet/session/application_namespace.h
@@ -51,11 +51,6 @@ typedef struct _app_namespace
u8 *ns_id;
/**
- * Linux netns if one was provided
- */
- u8 *netns;
-
- /**
* Name of socket applications can use to attach to session layer
*/
u8 *sock_name;
@@ -69,7 +64,7 @@ typedef struct _app_namespace
typedef struct _vnet_app_namespace_add_del_args
{
u8 *ns_id;
- u8 *netns;
+ u8 *sock_name;
u64 secret;
u32 sw_if_index;
u32 ip4_fib_id;
@@ -79,15 +74,16 @@ typedef struct _vnet_app_namespace_add_del_args
#define APP_NAMESPACE_INVALID_INDEX ((u32)~0)
-app_namespace_t *app_namespace_alloc (u8 * ns_id);
+app_namespace_t *app_namespace_alloc (const u8 *ns_id);
app_namespace_t *app_namespace_get (u32 index);
-app_namespace_t *app_namespace_get_from_id (const u8 * ns_id);
+app_namespace_t *app_namespace_get_from_id (const u8 *ns_id);
u32 app_namespace_index (app_namespace_t * app_ns);
const u8 *app_namespace_id (app_namespace_t * app_ns);
const u8 *app_namespace_id_from_index (u32 index);
-u32 app_namespace_index_from_id (const u8 * ns_id);
+u32 app_namespace_index_from_id (const u8 *ns_id);
void app_namespaces_init (void);
-int vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t * a);
+session_error_t
+vnet_app_namespace_add_del (vnet_app_namespace_add_del_args_t *a);
u32 app_namespace_get_fib_index (app_namespace_t * app_ns, u8 fib_proto);
session_table_t *app_namespace_get_local_table (app_namespace_t * app_ns);
@@ -159,8 +155,9 @@ appns_sapi_handle_sock_index (u32 sapi_sock_handle)
}
int appns_sapi_add_ns_socket (app_namespace_t * app_ns);
+void appns_sapi_del_ns_socket (app_namespace_t *app_ns);
u8 appns_sapi_enabled (void);
-void appns_sapi_enable (void);
+int appns_sapi_enable_disable (int is_enable);
#endif /* SRC_VNET_SESSION_APPLICATION_NAMESPACE_H_ */
diff --git a/src/vnet/session/application_worker.c b/src/vnet/session/application_worker.c
index be8a9e86bd5..befdb7c7002 100644
--- a/src/vnet/session/application_worker.c
+++ b/src/vnet/session/application_worker.c
@@ -26,6 +26,7 @@ app_worker_t *
app_worker_alloc (application_t * app)
{
app_worker_t *app_wrk;
+
pool_get (app_workers, app_wrk);
clib_memset (app_wrk, 0, sizeof (*app_wrk));
app_wrk->wrk_index = app_wrk - app_workers;
@@ -33,6 +34,8 @@ app_worker_alloc (application_t * app)
app_wrk->wrk_map_index = ~0;
app_wrk->connects_seg_manager = APP_INVALID_SEGMENT_MANAGER_INDEX;
clib_spinlock_init (&app_wrk->detached_seg_managers_lock);
+ vec_validate (app_wrk->wrk_evts, vlib_num_workers ());
+ vec_validate (app_wrk->wrk_mq_congested, vlib_num_workers ());
APP_DBG ("New app %v worker %u", app->name, app_wrk->wrk_index);
return app_wrk;
}
@@ -55,26 +58,34 @@ void
app_worker_free (app_worker_t * app_wrk)
{
application_t *app = application_get (app_wrk->app_index);
+ session_handle_t handle, *handles = 0, *sh;
vnet_unlisten_args_t _a, *a = &_a;
- u64 handle, *handles = 0, *sm_indices = 0;
segment_manager_t *sm;
- session_handle_t *sh;
+ u64 *sm_indices = 0;
session_t *ls;
u32 sm_index;
int i;
/*
+ * Cleanup vpp wrk events
+ */
+ app_worker_del_all_events (app_wrk);
+ for (i = 0; i < vec_len (app_wrk->wrk_evts); i++)
+ clib_fifo_free (app_wrk->wrk_evts[i]);
+
+ vec_free (app_wrk->wrk_evts);
+ vec_free (app_wrk->wrk_mq_congested);
+
+ /*
* Listener cleanup
*/
- /* *INDENT-OFF* */
hash_foreach (handle, sm_index, app_wrk->listeners_table, ({
ls = listen_session_get_from_handle (handle);
vec_add1 (handles, app_listen_session_handle (ls));
vec_add1 (sm_indices, sm_index);
sm = segment_manager_get (sm_index);
}));
- /* *INDENT-ON* */
for (i = 0; i < vec_len (handles); i++)
{
@@ -91,7 +102,7 @@ app_worker_free (app_worker_t * app_wrk)
segment_manager_init_free (sm);
}
}
- vec_reset_length (handles);
+ vec_free (handles);
vec_free (sm_indices);
hash_free (app_wrk->listeners_table);
@@ -175,31 +186,85 @@ app_worker_alloc_session_fifos (segment_manager_t * sm, session_t * s)
}
int
+app_worker_alloc_wrk_cl_session (app_worker_t *app_wrk, session_t *ls)
+{
+ svm_fifo_t *rx_fifo = 0, *tx_fifo = 0;
+ segment_manager_t *sm;
+ session_handle_t lsh;
+ app_listener_t *al;
+ session_t *s;
+
+ al = app_listener_get (ls->al_index);
+ sm = app_worker_get_listen_segment_manager (app_wrk, ls);
+ lsh = session_handle (ls);
+
+ s = session_alloc (0 /* listener on main worker */);
+ session_set_state (s, SESSION_STATE_LISTENING);
+ s->flags |= SESSION_F_IS_CLESS;
+ s->app_wrk_index = app_wrk->wrk_index;
+ ls = session_get_from_handle (lsh);
+ s->session_type = ls->session_type;
+ s->connection_index = ls->connection_index;
+
+ segment_manager_alloc_session_fifos (sm, s->thread_index, &rx_fifo,
+ &tx_fifo);
+
+ rx_fifo->shr->master_session_index = s->session_index;
+ rx_fifo->master_thread_index = s->thread_index;
+
+ tx_fifo->shr->master_session_index = s->session_index;
+ tx_fifo->master_thread_index = s->thread_index;
+
+ s->rx_fifo = rx_fifo;
+ s->tx_fifo = tx_fifo;
+
+ vec_validate (al->cl_listeners, app_wrk->wrk_map_index);
+ al->cl_listeners[app_wrk->wrk_map_index] = s->session_index;
+
+ return 0;
+}
+
+void
+app_worker_free_wrk_cl_session (app_worker_t *app_wrk, session_t *ls)
+{
+ app_listener_t *al;
+ session_t *s;
+
+ al = app_listener_get (ls->al_index);
+
+ s = app_listener_get_wrk_cl_session (al, app_wrk->wrk_map_index);
+ segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo);
+ session_free (s);
+
+ al->cl_listeners[app_wrk->wrk_map_index] = SESSION_INVALID_INDEX;
+}
+
+int
app_worker_init_listener (app_worker_t * app_wrk, session_t * ls)
{
segment_manager_t *sm;
/* Allocate segment manager. All sessions derived out of a listen session
- * have fifos allocated by the same segment manager. */
+ * have fifos allocated by the same segment manager.
+ * TODO(fcoras): limit memory consumption by cless listeners */
if (!(sm = app_worker_alloc_segment_manager (app_wrk)))
return SESSION_E_ALLOC;
+ /* Once the first segment is mapped, don't remove it until unlisten */
+ sm->first_is_protected = 1;
+
/* Keep track of the segment manager for the listener or this worker */
hash_set (app_wrk->listeners_table, listen_session_get_handle (ls),
segment_manager_index (sm));
- if (transport_connection_is_cless (session_get_transport (ls)))
- {
- if (ls->rx_fifo)
- return SESSION_E_NOSUPPORT;
- return app_worker_alloc_session_fifos (sm, ls);
- }
+ if (ls->flags & SESSION_F_IS_CLESS)
+ return app_worker_alloc_wrk_cl_session (app_wrk, ls);
+
return 0;
}
-int
-app_worker_start_listen (app_worker_t * app_wrk,
- app_listener_t * app_listener)
+session_error_t
+app_worker_start_listen (app_worker_t *app_wrk, app_listener_t *app_listener)
{
session_t *ls;
int rv;
@@ -263,17 +328,14 @@ app_worker_stop_listen_session (app_worker_t * app_wrk, session_t * ls)
if (PREDICT_FALSE (!sm_indexp))
return;
- /* Dealloc fifos, if any (dgram listeners) */
- if (ls->rx_fifo)
- {
- segment_manager_dealloc_fifos (ls->rx_fifo, ls->tx_fifo);
- ls->tx_fifo = ls->rx_fifo = 0;
- }
+ if (ls->flags & SESSION_F_IS_CLESS)
+ app_worker_free_wrk_cl_session (app_wrk, ls);
/* Try to cleanup segment manager */
sm = segment_manager_get (*sm_indexp);
if (sm)
{
+ sm->first_is_protected = 0;
segment_manager_app_detach (sm);
if (!segment_manager_has_fifos (sm))
{
@@ -334,8 +396,10 @@ app_worker_init_accepted (session_t * s)
listener = listen_session_get_from_handle (s->listener_handle);
app_wrk = application_listener_select_worker (listener);
- s->app_wrk_index = app_wrk->wrk_index;
+ if (PREDICT_FALSE (app_worker_mq_is_congested (app_wrk)))
+ return -1;
+ s->app_wrk_index = app_wrk->wrk_index;
app = application_get (app_wrk->app_index);
if (app->cb_fns.fifo_tuning_callback)
s->flags |= SESSION_F_CUSTOM_FIFO_TUNING;
@@ -348,10 +412,35 @@ app_worker_init_accepted (session_t * s)
}
int
+app_worker_listened_notify (app_worker_t *app_wrk, session_handle_t alsh,
+ u32 opaque, session_error_t err)
+{
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_BOUND,
+ .as_u64[0] = alsh,
+ .as_u64[1] = (u64) opaque << 32 | (u32) err };
+
+ app_worker_add_event_custom (app_wrk, 0 /* thread index */, &evt);
+
+ return 0;
+}
+
+int
+app_worker_unlisten_reply (app_worker_t *app_wrk, session_handle_t sh,
+ u32 opaque, session_error_t err)
+{
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_UNLISTEN_REPLY,
+ .as_u64[0] = sh,
+ .as_u64[1] = (u64) opaque << 32 | (u32) err };
+
+ app_worker_add_event_custom (app_wrk, 0 /* thread index */, &evt);
+ return 0;
+}
+
+int
app_worker_accept_notify (app_worker_t * app_wrk, session_t * s)
{
- application_t *app = application_get (app_wrk->app_index);
- return app->cb_fns.session_accept_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_ACCEPTED);
+ return 0;
}
int
@@ -365,7 +454,7 @@ app_worker_init_connected (app_worker_t * app_wrk, session_t * s)
/* Allocate fifos for session, unless the app is a builtin proxy */
if (application_is_builtin_proxy (app))
- return 0;
+ return app->cb_fns.proxy_alloc_session_fifos (s);
sm = app_worker_get_connect_segment_manager (app_wrk);
return app_worker_alloc_session_fifos (sm, s);
@@ -375,9 +464,13 @@ int
app_worker_connect_notify (app_worker_t * app_wrk, session_t * s,
session_error_t err, u32 opaque)
{
- application_t *app = application_get (app_wrk->app_index);
- return app->cb_fns.session_connected_callback (app_wrk->wrk_index, opaque,
- s, err);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_CONNECTED,
+ .as_u64[0] = s ? s->session_index : ~0,
+ .as_u64[1] = (u64) opaque << 32 | (u32) err };
+ u32 thread_index = s ? s->thread_index : vlib_get_thread_index ();
+
+ app_worker_add_event_custom (app_wrk, thread_index, &evt);
+ return 0;
}
int
@@ -385,7 +478,7 @@ app_worker_add_half_open (app_worker_t *app_wrk, session_handle_t sh)
{
session_handle_t *shp;
- ASSERT (vlib_get_thread_index () == 0);
+ ASSERT (session_vlib_thread_is_cl_thread ());
pool_get (app_wrk->half_open_table, shp);
*shp = sh;
@@ -395,36 +488,28 @@ app_worker_add_half_open (app_worker_t *app_wrk, session_handle_t sh)
int
app_worker_del_half_open (app_worker_t *app_wrk, session_t *s)
{
- application_t *app = application_get (app_wrk->app_index);
- ASSERT (vlib_get_thread_index () <= 1);
- pool_put_index (app_wrk->half_open_table, s->ho_index);
- if (app->cb_fns.half_open_cleanup_callback)
- app->cb_fns.half_open_cleanup_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_HALF_CLEANUP);
return 0;
}
int
app_worker_close_notify (app_worker_t * app_wrk, session_t * s)
{
- application_t *app = application_get (app_wrk->app_index);
- app->cb_fns.session_disconnect_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_DISCONNECTED);
return 0;
}
int
app_worker_transport_closed_notify (app_worker_t * app_wrk, session_t * s)
{
- application_t *app = application_get (app_wrk->app_index);
- if (app->cb_fns.session_transport_closed_callback)
- app->cb_fns.session_transport_closed_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_TRANSPORT_CLOSED);
return 0;
}
int
app_worker_reset_notify (app_worker_t * app_wrk, session_t * s)
{
- application_t *app = application_get (app_wrk->app_index);
- app->cb_fns.session_reset_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_CTRL_EVT_RESET);
return 0;
}
@@ -432,29 +517,33 @@ int
app_worker_cleanup_notify (app_worker_t * app_wrk, session_t * s,
session_cleanup_ntf_t ntf)
{
- application_t *app = application_get (app_wrk->app_index);
- if (app->cb_fns.session_cleanup_callback)
- app->cb_fns.session_cleanup_callback (s, ntf);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_CLEANUP,
+ .as_u64[0] = (u64) ntf << 32 | s->session_index,
+ .as_u64[1] = pointer_to_uword (session_cleanup) };
+
+ app_worker_add_event_custom (app_wrk, s->thread_index, &evt);
+
return 0;
}
int
-app_worker_builtin_rx (app_worker_t * app_wrk, session_t * s)
+app_worker_cleanup_notify_custom (app_worker_t *app_wrk, session_t *s,
+ session_cleanup_ntf_t ntf,
+ void (*cleanup_cb) (session_t *s))
{
- application_t *app = application_get (app_wrk->app_index);
- app->cb_fns.builtin_app_rx_callback (s);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_CLEANUP,
+ .as_u64[0] = (u64) ntf << 32 | s->session_index,
+ .as_u64[1] = pointer_to_uword (cleanup_cb) };
+
+ app_worker_add_event_custom (app_wrk, s->thread_index, &evt);
+
return 0;
}
int
-app_worker_builtin_tx (app_worker_t * app_wrk, session_t * s)
+app_worker_rx_notify (app_worker_t *app_wrk, session_t *s)
{
- application_t *app = application_get (app_wrk->app_index);
-
- if (!app->cb_fns.builtin_app_tx_callback)
- return 0;
-
- app->cb_fns.builtin_app_tx_callback (s);
+ app_worker_add_event (app_wrk, s, SESSION_IO_EVT_RX);
return 0;
}
@@ -462,8 +551,11 @@ int
app_worker_migrate_notify (app_worker_t * app_wrk, session_t * s,
session_handle_t new_sh)
{
- application_t *app = application_get (app_wrk->app_index);
- app->cb_fns.session_migrate_callback (s, new_sh);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_MIGRATED,
+ .as_u64[0] = s->session_index,
+ .as_u64[1] = new_sh };
+
+ app_worker_add_event_custom (app_wrk, s->thread_index, &evt);
return 0;
}
@@ -472,6 +564,7 @@ app_worker_own_session (app_worker_t * app_wrk, session_t * s)
{
segment_manager_t *sm;
svm_fifo_t *rxf, *txf;
+ int rv;
if (s->session_state == SESSION_STATE_LISTENING)
return application_change_listener_owner (s, app_wrk);
@@ -488,8 +581,8 @@ app_worker_own_session (app_worker_t * app_wrk, session_t * s)
s->tx_fifo = 0;
sm = app_worker_get_connect_segment_manager (app_wrk);
- if (app_worker_alloc_session_fifos (sm, s))
- return -1;
+ if ((rv = app_worker_alloc_session_fifos (sm, s)))
+ return rv;
if (!svm_fifo_is_empty_cons (rxf))
svm_fifo_clone (s->rx_fifo, rxf);
@@ -506,6 +599,9 @@ int
app_worker_connect_session (app_worker_t *app_wrk, session_endpoint_cfg_t *sep,
session_handle_t *rsh)
{
+ if (PREDICT_FALSE (app_worker_mq_is_congested (app_wrk)))
+ return SESSION_E_REFUSED;
+
sep->app_wrk_index = app_wrk->wrk_index;
return session_open (sep, rsh);
@@ -549,14 +645,12 @@ app_worker_first_listener (app_worker_t * app_wrk, u8 fib_proto,
sst = session_type_from_proto_and_ip (transport_proto,
fib_proto == FIB_PROTOCOL_IP4);
- /* *INDENT-OFF* */
hash_foreach (handle, sm_index, app_wrk->listeners_table, ({
listener = listen_session_get_from_handle (handle);
if (listener->session_type == sst
&& !(listener->flags & SESSION_F_PROXY))
return listener;
}));
- /* *INDENT-ON* */
return 0;
}
@@ -573,13 +667,11 @@ app_worker_proxy_listener (app_worker_t * app_wrk, u8 fib_proto,
sst = session_type_from_proto_and_ip (transport_proto,
fib_proto == FIB_PROTOCOL_IP4);
- /* *INDENT-OFF* */
hash_foreach (handle, sm_index, app_wrk->listeners_table, ({
listener = listen_session_get_from_handle (handle);
if (listener->session_type == sst && (listener->flags & SESSION_F_PROXY))
return listener;
}));
- /* *INDENT-ON* */
return 0;
}
@@ -590,130 +682,178 @@ app_worker_proxy_listener (app_worker_t * app_wrk, u8 fib_proto,
int
app_worker_add_segment_notify (app_worker_t * app_wrk, u64 segment_handle)
{
- application_t *app = application_get (app_wrk->app_index);
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_APP_ADD_SEGMENT,
+ .as_u64[1] = segment_handle };
- return app->cb_fns.add_segment_callback (app_wrk->wrk_index,
- segment_handle);
+ app_worker_add_event_custom (app_wrk, vlib_get_thread_index (), &evt);
+
+ return 0;
}
int
app_worker_del_segment_notify (app_worker_t * app_wrk, u64 segment_handle)
{
- application_t *app = application_get (app_wrk->app_index);
- return app->cb_fns.del_segment_callback (app_wrk->wrk_index,
- segment_handle);
-}
+ session_event_t evt = { .event_type = SESSION_CTRL_EVT_APP_DEL_SEGMENT,
+ .as_u64[1] = segment_handle };
-static inline u8
-app_worker_application_is_builtin (app_worker_t * app_wrk)
-{
- return app_wrk->app_is_builtin;
+ app_worker_add_event_custom (app_wrk, vlib_get_thread_index (), &evt);
+
+ return 0;
}
-static inline int
-app_send_io_evt_rx (app_worker_t * app_wrk, session_t * s)
+static int
+app_wrk_send_fd (app_worker_t *app_wrk, int fd)
{
- session_event_t *evt;
- svm_msg_q_msg_t msg;
- svm_msg_q_t *mq;
+ if (!appns_sapi_enabled ())
+ {
+ vl_api_registration_t *reg;
+ clib_error_t *error;
- if (app_worker_application_is_builtin (app_wrk))
- return app_worker_builtin_rx (app_wrk, s);
+ reg =
+ vl_mem_api_client_index_to_registration (app_wrk->api_client_index);
+ if (!reg)
+ {
+ clib_warning ("no api registration for client: %u",
+ app_wrk->api_client_index);
+ return -1;
+ }
- if (svm_fifo_has_event (s->rx_fifo))
- return 0;
+ if (vl_api_registration_file_index (reg) == VL_API_INVALID_FI)
+ return -1;
- mq = app_wrk->event_queue;
- svm_msg_q_lock (mq);
+ error = vl_api_send_fd_msg (reg, &fd, 1);
+ if (error)
+ {
+ clib_error_report (error);
+ return -1;
+ }
- if (PREDICT_FALSE (svm_msg_q_is_full (mq)))
- {
- clib_warning ("evt q full");
- svm_msg_q_unlock (mq);
- return -1;
+ return 0;
}
- if (PREDICT_FALSE (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)))
+ app_sapi_msg_t smsg = { 0 };
+ app_namespace_t *app_ns;
+ clib_error_t *error;
+ application_t *app;
+ clib_socket_t *cs;
+ u32 cs_index;
+
+ app = application_get (app_wrk->app_index);
+ app_ns = app_namespace_get (app->ns_index);
+ cs_index = appns_sapi_handle_sock_index (app_wrk->api_client_index);
+ cs = appns_sapi_get_socket (app_ns, cs_index);
+ if (PREDICT_FALSE (!cs))
+ return -1;
+
+ /* There's no payload for the message only the type */
+ smsg.type = APP_SAPI_MSG_TYPE_SEND_FDS;
+ error = clib_socket_sendmsg (cs, &smsg, sizeof (smsg), &fd, 1);
+ if (error)
{
- clib_warning ("evt q rings full");
- svm_msg_q_unlock (mq);
+ clib_error_report (error);
return -1;
}
- msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
- evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
- evt->session_index = s->rx_fifo->shr->client_session_index;
- evt->event_type = SESSION_IO_EVT_RX;
-
- (void) svm_fifo_set_event (s->rx_fifo);
- svm_msg_q_add_and_unlock (mq, &msg);
-
return 0;
}
-static inline int
-app_send_io_evt_tx (app_worker_t * app_wrk, session_t * s)
+void
+app_worker_add_event (app_worker_t *app_wrk, session_t *s,
+ session_evt_type_t evt_type)
{
- svm_msg_q_t *mq;
session_event_t *evt;
- svm_msg_q_msg_t msg;
- if (app_worker_application_is_builtin (app_wrk))
- return app_worker_builtin_tx (app_wrk, s);
+ ASSERT (s->thread_index == vlib_get_thread_index ());
+ clib_fifo_add2 (app_wrk->wrk_evts[s->thread_index], evt);
+ evt->session_index = s->session_index;
+ evt->event_type = evt_type;
+ evt->postponed = 0;
- mq = app_wrk->event_queue;
- svm_msg_q_lock (mq);
-
- if (PREDICT_FALSE (svm_msg_q_is_full (mq)))
+ /* First event for this app_wrk. Schedule it for handling in session input */
+ if (clib_fifo_elts (app_wrk->wrk_evts[s->thread_index]) == 1)
{
- clib_warning ("evt q full");
- svm_msg_q_unlock (mq);
- return -1;
+ session_worker_t *wrk = session_main_get_worker (s->thread_index);
+ session_wrk_program_app_wrk_evts (wrk, app_wrk->wrk_index);
}
+}
+
+void
+app_worker_add_event_custom (app_worker_t *app_wrk, u32 thread_index,
+ session_event_t *evt)
+{
+ clib_fifo_add1 (app_wrk->wrk_evts[thread_index], *evt);
- if (PREDICT_FALSE (svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)))
+ /* First event for this app_wrk. Schedule it for handling in session input */
+ if (clib_fifo_elts (app_wrk->wrk_evts[thread_index]) == 1)
{
- clib_warning ("evt q rings full");
- svm_msg_q_unlock (mq);
- return -1;
+ session_worker_t *wrk = session_main_get_worker (thread_index);
+ session_wrk_program_app_wrk_evts (wrk, app_wrk->wrk_index);
}
+}
- msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
- evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
- evt->event_type = SESSION_IO_EVT_TX;
- evt->session_index = s->tx_fifo->shr->client_session_index;
+always_inline void
+app_wrk_send_ctrl_evt_inline (app_worker_t *app_wrk, u8 evt_type, void *msg,
+ u32 msg_len, int fd)
+{
+ svm_msg_q_msg_t _mq_msg, *mq_msg = &_mq_msg;
+ svm_msg_q_t *mq = app_wrk->event_queue;
+ session_event_t *evt;
- svm_msg_q_add_and_unlock (mq, &msg);
- return 0;
+ ASSERT (!svm_msg_q_or_ring_is_full (mq, SESSION_MQ_CTRL_EVT_RING));
+ *mq_msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_CTRL_EVT_RING);
+
+ evt = svm_msg_q_msg_data (mq, mq_msg);
+ clib_memset (evt, 0, sizeof (*evt));
+ evt->event_type = evt_type;
+ clib_memcpy_fast (evt->data, msg, msg_len);
+
+ if (fd != -1)
+ app_wrk_send_fd (app_wrk, fd);
+
+ svm_msg_q_add_raw (mq, mq_msg);
+}
+
+void
+app_wrk_send_ctrl_evt_fd (app_worker_t *app_wrk, u8 evt_type, void *msg,
+ u32 msg_len, int fd)
+{
+ app_wrk_send_ctrl_evt_inline (app_wrk, evt_type, msg, msg_len, fd);
}
-/* *INDENT-OFF* */
-typedef int (app_send_evt_handler_fn) (app_worker_t *app,
- session_t *s);
-static app_send_evt_handler_fn * const app_send_evt_handler_fns[2] = {
- app_send_io_evt_rx,
- app_send_io_evt_tx,
-};
-/* *INDENT-ON* */
+void
+app_wrk_send_ctrl_evt (app_worker_t *app_wrk, u8 evt_type, void *msg,
+ u32 msg_len)
+{
+ app_wrk_send_ctrl_evt_inline (app_wrk, evt_type, msg, msg_len, -1);
+}
-/**
- * Send event to application
- *
- * Logic from queue perspective is blocking. However, if queue is full,
- * we return.
- */
-int
-app_worker_lock_and_send_event (app_worker_t * app, session_t * s,
- u8 evt_type)
+u8
+app_worker_mq_wrk_is_congested (app_worker_t *app_wrk, u32 thread_index)
+{
+ return app_wrk->wrk_mq_congested[thread_index] > 0;
+}
+
+void
+app_worker_set_mq_wrk_congested (app_worker_t *app_wrk, u32 thread_index)
+{
+ clib_atomic_fetch_add_relax (&app_wrk->mq_congested, 1);
+ ASSERT (thread_index == vlib_get_thread_index ());
+ app_wrk->wrk_mq_congested[thread_index] = 1;
+}
+
+void
+app_worker_unset_wrk_mq_congested (app_worker_t *app_wrk, u32 thread_index)
{
- return app_send_evt_handler_fns[evt_type] (app, s);
+ clib_atomic_fetch_sub_relax (&app_wrk->mq_congested, 1);
+ ASSERT (thread_index == vlib_get_thread_index ());
+ app_wrk->wrk_mq_congested[thread_index] = 0;
}
u8 *
format_app_worker_listener (u8 * s, va_list * args)
{
app_worker_t *app_wrk = va_arg (*args, app_worker_t *);
- u64 handle = va_arg (*args, u64);
+ session_handle_t handle = va_arg (*args, u64);
u32 sm_index = va_arg (*args, u32);
int verbose = va_arg (*args, int);
session_t *listener;
@@ -760,10 +900,12 @@ format_app_worker (u8 * s, va_list * args)
app_worker_t *app_wrk = va_arg (*args, app_worker_t *);
u32 indent = 1;
- s = format (s, "%U wrk-index %u app-index %u map-index %u "
- "api-client-index %d\n", format_white_space, indent,
- app_wrk->wrk_index, app_wrk->app_index, app_wrk->wrk_map_index,
- app_wrk->api_client_index);
+ s = format (s,
+ "%U wrk-index %u app-index %u map-index %u "
+ "api-client-index %d mq-cong %u\n",
+ format_white_space, indent, app_wrk->wrk_index,
+ app_wrk->app_index, app_wrk->wrk_map_index,
+ app_wrk->api_client_index, app_wrk->mq_congested);
return s;
}
diff --git a/src/vnet/session/mma_template.c b/src/vnet/session/mma_template.c
index ae730e5dbea..4b2770bb756 100644
--- a/src/vnet/session/mma_template.c
+++ b/src/vnet/session/mma_template.c
@@ -65,6 +65,11 @@ RT (mma_rule_free) (RTT (mma_rules_table) * srt, RTT (mma_rule) * rule)
return rule;
}
+void RT (mma_rules_table_free) (RTT (mma_rules_table) * srt)
+{
+ pool_free (srt->rules);
+}
+
RTT (mma_rule) *
RT (mma_rules_table_get_rule) (RTT (mma_rules_table) * srt, u32 srt_index)
{
diff --git a/src/vnet/session/mma_template.h b/src/vnet/session/mma_template.h
index dc3545a4ffe..2c0230c2869 100644
--- a/src/vnet/session/mma_template.h
+++ b/src/vnet/session/mma_template.h
@@ -41,11 +41,9 @@ typedef struct
{
u32 action_index;
u32 *next_indices;
- /* *INDENT-OFF* */
RTT (mma_mask_or_match) mask;
RTT (mma_mask_or_match) match;
RTT (mma_mask_or_match) max_match;
- /* *INDENT-ON* */
} RTT (mma_rule);
typedef int (*RTT (rule_cmp_fn)) (RTT (mma_rule) * rule1,
diff --git a/src/vnet/session/segment_manager.c b/src/vnet/session/segment_manager.c
index c7a06d8b636..80bebdca9b5 100644
--- a/src/vnet/session/segment_manager.c
+++ b/src/vnet/session/segment_manager.c
@@ -89,28 +89,30 @@ segment_manager_segment_index (segment_manager_t * sm, fifo_segment_t * seg)
*/
static inline int
segment_manager_add_segment_inline (segment_manager_t *sm, uword segment_size,
- u8 notify_app, u8 flags)
+ u8 notify_app, u8 flags, u8 need_lock)
{
segment_manager_main_t *smm = &sm_main;
segment_manager_props_t *props;
+ app_worker_t *app_wrk;
fifo_segment_t *fs;
u32 fs_index = ~0;
u8 *seg_name;
int rv;
props = segment_manager_properties_get (sm);
+ app_wrk = app_worker_get (sm->app_wrk_index);
/* Not configured for addition of new segments and not first */
if (!props->add_segment && !segment_size)
{
- clib_warning ("cannot allocate new segment");
- return VNET_API_ERROR_INVALID_VALUE;
+ SESSION_DBG ("cannot allocate new segment");
+ return SESSION_E_INVALID;
}
/*
* Allocate fifo segment and grab lock if needed
*/
- if (vlib_num_workers ())
+ if (need_lock)
clib_rwlock_writer_lock (&sm->segments_rwlock);
pool_get_zero (sm->segments, fs);
@@ -119,18 +121,24 @@ segment_manager_add_segment_inline (segment_manager_t *sm, uword segment_size,
* Allocate ssvm segment
*/
segment_size = segment_size ? segment_size : props->add_segment_size;
- segment_size = round_pow2 (segment_size, clib_mem_get_page_size ());
-
- if (props->segment_type != SSVM_SEGMENT_PRIVATE)
+ /* add overhead to ensure the result segment size is at least
+ * of that requested */
+ segment_size +=
+ sizeof (fifo_segment_header_t) +
+ vlib_thread_main.n_vlib_mains * sizeof (fifo_segment_slice_t) +
+ FIFO_SEGMENT_ALLOC_OVERHEAD;
+
+ if (props->huge_page)
{
- seg_name = format (0, "%d-%d%c", getpid (), smm->seg_name_counter++, 0);
+ uword hugepage_size = clib_mem_get_default_hugepage_size ();
+ segment_size = round_pow2 (segment_size, hugepage_size);
+ fs->ssvm.huge_page = 1;
}
else
- {
- app_worker_t *app_wrk = app_worker_get (sm->app_wrk_index);
- application_t *app = application_get (app_wrk->app_index);
- seg_name = format (0, "%v segment%c", app->name, 0);
- }
+ segment_size = round_pow2 (segment_size, clib_mem_get_page_size ());
+
+ seg_name = format (0, "seg-%u-%u-%u%c", app_wrk->app_index,
+ app_wrk->wrk_index, smm->seg_name_counter++, 0);
fs->ssvm.ssvm_size = segment_size;
fs->ssvm.name = seg_name;
@@ -154,15 +162,17 @@ segment_manager_add_segment_inline (segment_manager_t *sm, uword segment_size,
* Save segment index before dropping lock, if any held
*/
fs_index = fs - sm->segments;
+ fs->fs_index = fs_index;
+ fs->sm_index = segment_manager_index (sm);
/*
* Set watermarks in segment
*/
- fs->h->high_watermark = sm->high_watermark;
- fs->h->low_watermark = sm->low_watermark;
+ fs->high_watermark = sm->high_watermark;
+ fs->low_watermark = sm->low_watermark;
+ fs->flags = flags;
+ fs->flags &= ~FIFO_SEGMENT_F_MEM_LIMIT;
fs->h->pct_first_alloc = props->pct_first_alloc;
- fs->h->flags = flags;
- fs->h->flags &= ~FIFO_SEGMENT_F_MEM_LIMIT;
if (notify_app)
{
@@ -172,11 +182,14 @@ segment_manager_add_segment_inline (segment_manager_t *sm, uword segment_size,
app_wrk = app_worker_get (sm->app_wrk_index);
rv = app_worker_add_segment_notify (app_wrk, fs_handle);
if (rv)
- return rv;
+ {
+ fs_index = rv;
+ goto done;
+ }
}
done:
- if (vlib_num_workers ())
+ if (need_lock)
clib_rwlock_writer_unlock (&sm->segments_rwlock);
return fs_index;
@@ -186,14 +199,16 @@ int
segment_manager_add_segment (segment_manager_t *sm, uword segment_size,
u8 notify_app)
{
- return segment_manager_add_segment_inline (sm, segment_size, notify_app, 0);
+ return segment_manager_add_segment_inline (sm, segment_size, notify_app,
+ 0 /* flags */, 0 /* need_lock */);
}
int
segment_manager_add_segment2 (segment_manager_t *sm, uword segment_size,
u8 flags)
{
- return segment_manager_add_segment_inline (sm, segment_size, 0, flags);
+ return segment_manager_add_segment_inline (sm, segment_size, 0, flags,
+ vlib_num_workers ());
}
/**
@@ -235,7 +250,8 @@ segment_manager_get_segment_if_valid (segment_manager_t * sm,
* Removes segment after acquiring writer lock
*/
static inline void
-sm_lock_and_del_segment_inline (segment_manager_t * sm, u32 fs_index)
+sm_lock_and_del_segment_inline (segment_manager_t *sm, u32 fs_index,
+ u8 check_if_empty)
{
fifo_segment_t *fs;
u8 is_prealloc;
@@ -246,6 +262,9 @@ sm_lock_and_del_segment_inline (segment_manager_t * sm, u32 fs_index)
if (!fs)
goto done;
+ if (check_if_empty && fifo_segment_has_fifos (fs))
+ goto done;
+
is_prealloc = fifo_segment_flags (fs) & FIFO_SEGMENT_F_IS_PREALLOCATED;
if (is_prealloc && !segment_manager_app_detached (sm))
goto done;
@@ -259,7 +278,7 @@ done:
void
segment_manager_lock_and_del_segment (segment_manager_t * sm, u32 fs_index)
{
- sm_lock_and_del_segment_inline (sm, fs_index);
+ sm_lock_and_del_segment_inline (sm, fs_index, 0 /* check_if_empty */);
}
/**
@@ -326,12 +345,6 @@ segment_manager_segment_reader_unlock (segment_manager_t * sm)
clib_rwlock_reader_unlock (&sm->segments_rwlock);
}
-void
-segment_manager_segment_writer_unlock (segment_manager_t * sm)
-{
- clib_rwlock_writer_unlock (&sm->segments_rwlock);
-}
-
segment_manager_t *
segment_manager_alloc (void)
{
@@ -405,7 +418,7 @@ segment_manager_init_first (segment_manager_t * sm)
fs_index = segment_manager_add_segment (sm, max_seg_size, 0);
if (fs_index < 0)
{
- clib_warning ("Failed to preallocate segment %d", i);
+ SESSION_DBG ("Failed to preallocate segment %d", i);
return fs_index;
}
@@ -427,7 +440,7 @@ segment_manager_init_first (segment_manager_t * sm)
fs_index = segment_manager_add_segment (sm, first_seg_size, 0);
if (fs_index < 0)
{
- clib_warning ("Failed to allocate segment");
+ SESSION_DBG ("Failed to allocate segment");
return fs_index;
}
@@ -445,7 +458,7 @@ segment_manager_init_first (segment_manager_t * sm)
for (; i < fs->n_slices; i++)
{
if (fifo_segment_prealloc_fifo_hdrs (fs, i, hdrs_per_slice))
- return VNET_API_ERROR_SVM_SEGMENT_CREATE_FAIL;
+ return SESSION_E_SEG_CREATE;
}
}
@@ -486,11 +499,9 @@ segment_manager_free (segment_manager_t * sm)
* the manager is explicitly deleted/detached by the app. */
clib_rwlock_writer_lock (&sm->segments_rwlock);
- /* *INDENT-OFF* */
pool_foreach (fifo_segment, sm->segments) {
segment_manager_del_segment (sm, fifo_segment);
}
- /* *INDENT-ON* */
pool_free (sm->segments);
clib_rwlock_writer_unlock (&sm->segments_rwlock);
@@ -569,7 +580,6 @@ segment_manager_has_fifos (segment_manager_t * sm)
fifo_segment_t *seg;
u8 first = 1;
- /* *INDENT-OFF* */
segment_manager_foreach_segment_w_lock (seg, sm, ({
if (CLIB_DEBUG && !first && !fifo_segment_has_fifos (seg)
&& !(fifo_segment_flags (seg) & FIFO_SEGMENT_F_IS_PREALLOCATED))
@@ -584,7 +594,6 @@ segment_manager_has_fifos (segment_manager_t * sm)
return 1;
}
}));
- /* *INDENT-ON* */
return 0;
}
@@ -604,7 +613,6 @@ segment_manager_del_sessions (segment_manager_t * sm)
ASSERT (pool_elts (sm->segments) != 0);
/* Across all fifo segments used by the server */
- /* *INDENT-OFF* */
segment_manager_foreach_segment_w_lock (fs, sm, ({
for (slice_index = 0; slice_index < fs->n_slices; slice_index++)
{
@@ -629,7 +637,6 @@ segment_manager_del_sessions (segment_manager_t * sm)
* sessions if the segment can be removed.
*/
}));
- /* *INDENT-ON* */
vec_foreach (handle, handles)
{
@@ -695,19 +702,16 @@ segment_manager_del_sessions_filter (segment_manager_t *sm,
}
int
-segment_manager_try_alloc_fifos (fifo_segment_t * fifo_segment,
- u32 thread_index,
+segment_manager_try_alloc_fifos (fifo_segment_t *fs, u32 thread_index,
u32 rx_fifo_size, u32 tx_fifo_size,
- svm_fifo_t ** rx_fifo, svm_fifo_t ** tx_fifo)
+ svm_fifo_t **rx_fifo, svm_fifo_t **tx_fifo)
{
rx_fifo_size = clib_max (rx_fifo_size, sm_main.default_fifo_size);
- *rx_fifo = fifo_segment_alloc_fifo_w_slice (fifo_segment, thread_index,
- rx_fifo_size,
+ *rx_fifo = fifo_segment_alloc_fifo_w_slice (fs, thread_index, rx_fifo_size,
FIFO_SEGMENT_RX_FIFO);
tx_fifo_size = clib_max (tx_fifo_size, sm_main.default_fifo_size);
- *tx_fifo = fifo_segment_alloc_fifo_w_slice (fifo_segment, thread_index,
- tx_fifo_size,
+ *tx_fifo = fifo_segment_alloc_fifo_w_slice (fs, thread_index, tx_fifo_size,
FIFO_SEGMENT_TX_FIFO);
if (*rx_fifo == 0)
@@ -715,45 +719,37 @@ segment_manager_try_alloc_fifos (fifo_segment_t * fifo_segment,
/* This would be very odd, but handle it... */
if (*tx_fifo != 0)
{
- fifo_segment_free_fifo (fifo_segment, *tx_fifo);
+ fifo_segment_free_fifo (fs, *tx_fifo);
*tx_fifo = 0;
}
- return -1;
+ return SESSION_E_SEG_NO_SPACE;
}
if (*tx_fifo == 0)
{
if (*rx_fifo != 0)
{
- fifo_segment_free_fifo (fifo_segment, *rx_fifo);
+ fifo_segment_free_fifo (fs, *rx_fifo);
*rx_fifo = 0;
}
- return -1;
+ return SESSION_E_SEG_NO_SPACE;
}
return 0;
}
-int
-segment_manager_alloc_session_fifos (segment_manager_t * sm,
- u32 thread_index,
- svm_fifo_t ** rx_fifo,
- svm_fifo_t ** tx_fifo)
+static inline int
+sm_lookup_segment_and_alloc_fifos (segment_manager_t *sm,
+ segment_manager_props_t *props,
+ u32 thread_index, svm_fifo_t **rx_fifo,
+ svm_fifo_t **tx_fifo)
{
- int alloc_fail = 1, rv = 0, new_fs_index;
- uword free_bytes, max_free_bytes = 0;
- segment_manager_props_t *props;
- fifo_segment_t *fs = 0, *cur;
- u32 sm_index, fs_index;
-
- props = segment_manager_properties_get (sm);
-
- /*
- * Find the first free segment to allocate the fifos in
- */
+ uword free_bytes, max_free_bytes;
+ fifo_segment_t *cur, *fs = 0;
- segment_manager_segment_reader_lock (sm);
+ max_free_bytes = props->rx_fifo_size + props->tx_fifo_size - 1;
- pool_foreach (cur, sm->segments) {
+ pool_foreach (cur, sm->segments)
+ {
if (fifo_segment_flags (cur) & FIFO_SEGMENT_F_CUSTOM_USE)
continue;
free_bytes = fifo_segment_available_bytes (cur);
@@ -762,63 +758,93 @@ segment_manager_alloc_session_fifos (segment_manager_t * sm,
max_free_bytes = free_bytes;
fs = cur;
}
- }
-
- if (fs)
- {
- alloc_fail = segment_manager_try_alloc_fifos (fs, thread_index,
- props->rx_fifo_size,
- props->tx_fifo_size,
- rx_fifo, tx_fifo);
- /* On success, keep lock until fifos are initialized */
- if (!alloc_fail)
- goto alloc_success;
}
- segment_manager_segment_reader_unlock (sm);
+ if (PREDICT_FALSE (!fs))
+ return SESSION_E_SEG_NO_SPACE;
- /*
- * Allocation failed, see if we can add a new segment
- */
- if (props->add_segment)
+ return segment_manager_try_alloc_fifos (
+ fs, thread_index, props->rx_fifo_size, props->tx_fifo_size, rx_fifo,
+ tx_fifo);
+}
+
+static int
+sm_lock_and_alloc_segment_and_fifos (segment_manager_t *sm,
+ segment_manager_props_t *props,
+ u32 thread_index, svm_fifo_t **rx_fifo,
+ svm_fifo_t **tx_fifo)
+{
+ int new_fs_index, rv;
+ fifo_segment_t *fs;
+
+ if (!props->add_segment)
+ return SESSION_E_SEG_NO_SPACE;
+
+ clib_rwlock_writer_lock (&sm->segments_rwlock);
+
+ /* Make sure there really is no free space. Another worker might've freed
+ * some fifos or allocated a segment */
+ rv = sm_lookup_segment_and_alloc_fifos (sm, props, thread_index, rx_fifo,
+ tx_fifo);
+ if (!rv)
+ goto done;
+
+ new_fs_index =
+ segment_manager_add_segment (sm, 0 /* segment_size*/, 1 /* notify_app */);
+ if (new_fs_index < 0)
{
- if ((new_fs_index = segment_manager_add_segment (sm, 0, 1)) < 0)
- {
- clib_warning ("Failed to add new segment");
- return SESSION_E_SEG_CREATE;
- }
- fs = segment_manager_get_segment_w_lock (sm, new_fs_index);
- alloc_fail = segment_manager_try_alloc_fifos (fs, thread_index,
- props->rx_fifo_size,
- props->tx_fifo_size,
- rx_fifo, tx_fifo);
- if (alloc_fail)
- {
- clib_warning ("Added a segment, still can't allocate a fifo");
- segment_manager_segment_reader_unlock (sm);
- return SESSION_E_SEG_NO_SPACE2;
- }
+ rv = SESSION_E_SEG_CREATE;
+ goto done;
}
- else
+ fs = segment_manager_get_segment (sm, new_fs_index);
+ rv = segment_manager_try_alloc_fifos (fs, thread_index, props->rx_fifo_size,
+ props->tx_fifo_size, rx_fifo, tx_fifo);
+ if (rv)
{
- SESSION_DBG ("Can't add new seg and no space to allocate fifos!");
- return SESSION_E_SEG_NO_SPACE;
+ SESSION_DBG ("Added a segment, still can't allocate a fifo");
+ rv = SESSION_E_SEG_NO_SPACE2;
+ goto done;
}
-alloc_success:
- ASSERT (rx_fifo && tx_fifo);
+done:
+
+ clib_rwlock_writer_unlock (&sm->segments_rwlock);
+
+ return rv;
+}
+
+int
+segment_manager_alloc_session_fifos (segment_manager_t * sm,
+ u32 thread_index,
+ svm_fifo_t ** rx_fifo,
+ svm_fifo_t ** tx_fifo)
+{
+ segment_manager_props_t *props;
+ int rv;
+
+ props = segment_manager_properties_get (sm);
- sm_index = segment_manager_index (sm);
- fs_index = segment_manager_segment_index (sm, fs);
- (*tx_fifo)->segment_manager = sm_index;
- (*rx_fifo)->segment_manager = sm_index;
- (*tx_fifo)->segment_index = fs_index;
- (*rx_fifo)->segment_index = fs_index;
+ /*
+ * Fast path: find the first segment with enough free space and
+ * try to allocate the fifos. Done with reader lock
+ */
+
+ segment_manager_segment_reader_lock (sm);
+
+ rv = sm_lookup_segment_and_alloc_fifos (sm, props, thread_index, rx_fifo,
+ tx_fifo);
- /* Drop the lock after app is notified */
segment_manager_segment_reader_unlock (sm);
- return rv;
+ /*
+ * Slow path: if no fifo segment or alloc fail grab writer lock and try
+ * to allocate new segment
+ */
+ if (PREDICT_FALSE (rv < 0))
+ return sm_lock_and_alloc_segment_and_fifos (sm, props, thread_index,
+ rx_fifo, tx_fifo);
+
+ return 0;
}
void
@@ -827,10 +853,15 @@ segment_manager_dealloc_fifos (svm_fifo_t * rx_fifo, svm_fifo_t * tx_fifo)
segment_manager_t *sm;
fifo_segment_t *fs;
u32 segment_index;
+ u8 try_delete = 0;
if (!rx_fifo || !tx_fifo)
return;
+ /* Thread that allocated the fifos must be the one to clean them up */
+ ASSERT (rx_fifo->master_thread_index == vlib_get_thread_index () ||
+ rx_fifo->refcnt > 1 || vlib_thread_is_main_w_barrier ());
+
/* It's possible to have no segment manager if the session was removed
* as result of a detach. */
if (!(sm = segment_manager_get_if_valid (rx_fifo->segment_manager)))
@@ -842,26 +873,30 @@ segment_manager_dealloc_fifos (svm_fifo_t * rx_fifo, svm_fifo_t * tx_fifo)
fifo_segment_free_fifo (fs, tx_fifo);
/*
- * Try to remove svm segment if it has no fifos. This can be done only if
+ * Try to remove fifo segment if it has no fifos. This can be done only if
* the segment is not the first in the segment manager or if it is first
* and it is not protected. Moreover, if the segment is first and the app
* has detached from the segment manager, remove the segment manager.
*/
if (!fifo_segment_has_fifos (fs))
{
- segment_manager_segment_reader_unlock (sm);
+ /* If first, remove only if not protected */
+ try_delete = segment_index != 0 || !sm->first_is_protected;
+ }
+
+ segment_manager_segment_reader_unlock (sm);
- /* Remove segment if it holds no fifos or first but not protected */
- if (segment_index != 0 || !sm->first_is_protected)
- sm_lock_and_del_segment_inline (sm, segment_index);
+ if (PREDICT_FALSE (try_delete))
+ {
+ /* Only remove if empty after writer lock acquired */
+ sm_lock_and_del_segment_inline (sm, segment_index,
+ 1 /* check_if_empty */);
/* Remove segment manager if no sessions and detached from app */
if (segment_manager_app_detached (sm)
&& !segment_manager_has_fifos (sm))
segment_manager_free_safe (sm);
}
- else
- segment_manager_segment_reader_unlock (sm);
}
void
@@ -920,12 +955,10 @@ segment_manager_alloc_queue (fifo_segment_t * segment,
fifo_evt_size = sizeof (session_event_t);
notif_q_size = clib_max (16, props->evt_q_size >> 4);
- /* *INDENT-OFF* */
svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = {
{props->evt_q_size, fifo_evt_size, 0},
{notif_q_size, session_evt_size, 0}
};
- /* *INDENT-ON* */
cfg->consumer_pid = 0;
cfg->n_rings = 2;
cfg->q_nitems = props->evt_q_size;
@@ -984,79 +1017,111 @@ segment_manager_main_init (void)
sm->default_low_watermark = 50;
}
+static u8 *
+format_segment_manager (u8 *s, va_list *args)
+{
+ segment_manager_t *sm = va_arg (*args, segment_manager_t *);
+ int verbose = va_arg (*args, int);
+ app_worker_t *app_wrk;
+ uword max_fifo_size;
+ fifo_segment_t *seg;
+ application_t *app;
+ u8 custom_logic;
+
+ app_wrk = app_worker_get_if_valid (sm->app_wrk_index);
+ app = app_wrk ? application_get (app_wrk->app_index) : 0;
+ custom_logic = (app && (app->cb_fns.fifo_tuning_callback)) ? 1 : 0;
+ max_fifo_size = sm->max_fifo_size;
+
+ s = format (s,
+ "[%u] %v app-wrk: %u segs: %u max-fifo-sz: %U "
+ "wmarks: %u %u %s flags: 0x%x",
+ segment_manager_index (sm), app ? app->name : 0,
+ sm->app_wrk_index, pool_elts (sm->segments), format_memory_size,
+ max_fifo_size, sm->high_watermark, sm->low_watermark,
+ custom_logic ? "custom-tuning" : "no-tuning", sm->flags);
+
+ if (!verbose || !pool_elts (sm->segments))
+ return s;
+
+ s = format (s, "\n\n");
+
+ segment_manager_foreach_segment_w_lock (
+ seg, sm, ({ s = format (s, " *%U", format_fifo_segment, seg, verbose); }));
+
+ return s;
+}
+
static clib_error_t *
segment_manager_show_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
+ unformat_input_t _line_input, *line_input = &_line_input;
segment_manager_main_t *smm = &sm_main;
u8 show_segments = 0, verbose = 0;
- uword max_fifo_size;
segment_manager_t *sm;
- fifo_segment_t *seg;
- app_worker_t *app_wrk;
- application_t *app;
- u8 custom_logic;
+ u32 sm_index = ~0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ {
+ vlib_cli_output (vm, "%d segment managers allocated",
+ pool_elts (smm->segment_managers));
+ return 0;
+ }
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "segments"))
+ if (unformat (line_input, "segments"))
show_segments = 1;
- else if (unformat (input, "verbose"))
+ else if (unformat (line_input, "verbose"))
verbose = 1;
+ else if (unformat (line_input, "index %u", &sm_index))
+ ;
else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ {
+ vlib_cli_output (vm, "unknown input [%U]", format_unformat_error,
+ line_input);
+ goto done;
+ }
}
- vlib_cli_output (vm, "%d segment managers allocated",
- pool_elts (smm->segment_managers));
- if (verbose && pool_elts (smm->segment_managers))
+
+ if (!pool_elts (smm->segment_managers))
+ goto done;
+
+ if (sm_index != ~0)
{
- vlib_cli_output (vm, "%-6s%=10s%=10s%=13s%=11s%=11s%=12s",
- "Index", "AppIndex", "Segments", "MaxFifoSize",
- "HighWater", "LowWater", "FifoTuning");
+ sm = segment_manager_get_if_valid (sm_index);
+ if (!sm)
+ {
+ vlib_cli_output (vm, "segment manager %u not allocated", sm_index);
+ goto done;
+ }
+ vlib_cli_output (vm, "%U", format_segment_manager, sm, 1 /* verbose */);
+ goto done;
+ }
- /* *INDENT-OFF* */
+ if (verbose || show_segments)
+ {
pool_foreach (sm, smm->segment_managers) {
- app_wrk = app_worker_get_if_valid (sm->app_wrk_index);
- app = app_wrk ? application_get (app_wrk->app_index) : 0;
- custom_logic = (app && (app->cb_fns.fifo_tuning_callback)) ? 1 : 0;
- max_fifo_size = sm->max_fifo_size;
-
- vlib_cli_output (vm, "%-6d%=10d%=10d%=13U%=11d%=11d%=12s",
- segment_manager_index (sm),
- sm->app_wrk_index, pool_elts (sm->segments),
- format_memory_size, max_fifo_size,
- sm->high_watermark, sm->low_watermark,
- custom_logic ? "custom" : "none");
+ vlib_cli_output (vm, "%U", format_segment_manager, sm,
+ show_segments);
}
- /* *INDENT-ON* */
vlib_cli_output (vm, "\n");
}
- if (show_segments)
- {
- vlib_cli_output (vm, "%U", format_fifo_segment, 0, verbose);
- /* *INDENT-OFF* */
- pool_foreach (sm, smm->segment_managers) {
- segment_manager_foreach_segment_w_lock (seg, sm, ({
- vlib_cli_output (vm, "%U", format_fifo_segment, seg, verbose);
- }));
- }
- /* *INDENT-ON* */
+done:
+
+ unformat_free (line_input);
- }
return 0;
}
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (segment_manager_show_command, static) =
-{
+VLIB_CLI_COMMAND (segment_manager_show_command, static) = {
.path = "show segment-manager",
- .short_help = "show segment-manager [segments][verbose]",
+ .short_help = "show segment-manager [segments][verbose][index <nn>]",
.function = segment_manager_show_fn,
};
-/* *INDENT-ON* */
void
segment_manager_format_sessions (segment_manager_t * sm, int verbose)
@@ -1085,7 +1150,6 @@ segment_manager_format_sessions (segment_manager_t * sm, int verbose)
clib_rwlock_reader_lock (&sm->segments_rwlock);
- /* *INDENT-OFF* */
pool_foreach (fs, sm->segments) {
for (slice_index = 0; slice_index < fs->n_slices; slice_index++)
{
@@ -1117,7 +1181,6 @@ segment_manager_format_sessions (segment_manager_t * sm, int verbose)
vec_free (s);
}
}
- /* *INDENT-ON* */
clib_rwlock_reader_unlock (&sm->segments_rwlock);
}
diff --git a/src/vnet/session/segment_manager.h b/src/vnet/session/segment_manager.h
index 5a3d772ff02..1e99c4605a6 100644
--- a/src/vnet/session/segment_manager.h
+++ b/src/vnet/session/segment_manager.h
@@ -40,6 +40,7 @@ typedef struct _segment_manager_props
u8 high_watermark; /**< memory usage high watermark % */
u8 low_watermark; /**< memory usage low watermark % */
u8 pct_first_alloc; /**< pct of fifo size to alloc */
+ u8 huge_page; /**< use hugepage */
} segment_manager_props_t;
typedef enum seg_manager_flag_
@@ -102,8 +103,23 @@ segment_manager_t *segment_manager_get (u32 index);
segment_manager_t *segment_manager_get_if_valid (u32 index);
u32 segment_manager_index (segment_manager_t * sm);
+/**
+ * Add segment without lock
+ *
+ * @param sm Segment manager
+ * @param segment_size Size of segment to be added
+ * @param notify_app Flag set if app notification requested
+ */
int segment_manager_add_segment (segment_manager_t *sm, uword segment_size,
u8 notify_app);
+
+/**
+ * Add segment with lock
+ *
+ * @param sm Segment manager
+ * @param segment_size Size of segment to be added
+ * @param flags Flags to be set on segment
+ */
int segment_manager_add_segment2 (segment_manager_t *sm, uword segment_size,
u8 flags);
void segment_manager_del_segment (segment_manager_t * sm,
@@ -122,7 +138,6 @@ u64 segment_manager_make_segment_handle (u32 segment_manager_index,
u64 segment_manager_segment_handle (segment_manager_t * sm,
fifo_segment_t * segment);
void segment_manager_segment_reader_unlock (segment_manager_t * sm);
-void segment_manager_segment_writer_unlock (segment_manager_t * sm);
int segment_manager_alloc_session_fifos (segment_manager_t * sm,
u32 thread_index,
@@ -175,7 +190,9 @@ static inline void
segment_manager_parse_segment_handle (u64 segment_handle, u32 * sm_index,
u32 * segment_index)
{
- *sm_index = segment_handle >> 32;
+ /* Upper 8 bits zeroed out as they may be used for cut-through segments.
+ * See @ref ct_alloc_segment */
+ *sm_index = (segment_handle >> 32) & 0xFFFFFF;
*segment_index = segment_handle & 0xFFFFFFFF;
}
diff --git a/src/vnet/session/session.api b/src/vnet/session/session.api
index 43bde1afbbd..6affae4112d 100644
--- a/src/vnet/session/session.api
+++ b/src/vnet/session/session.api
@@ -117,38 +117,6 @@ autoreply define app_del_cert_key_pair {
u32 index;
};
-/** \brief Application add TLS certificate
- ### WILL BE DEPRECATED POST 20.01 ###
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param cert_len - certificate length
- @param cert - certificate as a string
-*/
-autoreply define application_tls_cert_add {
- option deprecated="to be removed post 21.06";
- u32 client_index;
- u32 context;
- u32 app_index;
- u16 cert_len;
- u8 cert[cert_len];
-};
-
-/** \brief Application add TLS key
- ### WILL BE DEPRECATED POST 20.01 ###
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param key_len - certificate length
- @param key - PEM encoded key as a string
-*/
-autoreply define application_tls_key_add {
- option deprecated="to be removed post 21.06";
- u32 client_index;
- u32 context;
- u32 app_index;
- u16 key_len;
- u8 key[key_len];
-};
-
/** \brief add/del application worker
@param client_index - opaque cookie to identify the sender
client to vpp direction only
@@ -203,6 +171,18 @@ autoreply define session_enable_disable {
bool is_enable [default=true];
};
+/** \brief enable/disable session layer socket api
+ @param client_index - opaque cookie to identify the sender
+ client to vpp direction only
+ @param context - sender context, to match reply w/ request
+ @param is_enable - disable session layer if 0, enable otherwise
+*/
+autoreply define session_sapi_enable_disable {
+ u32 client_index;
+ u32 context;
+ bool is_enable [default=true];
+};
+
/** \brief add/del application namespace
@param client_index - opaque cookie to identify the sender
client to vpp direction only
@@ -239,17 +219,86 @@ define app_namespace_add_del {
@param ip6_fib_id - id of ip6 fib that "supports" the namespace. Ignored
if sw_if_index set.
@param namespace_id - namespace id
+ @param sock_name - socket name (path, abstract socket name)
+*/
+define app_namespace_add_del_v4 {
+ option deprecated;
+ u32 client_index;
+ u32 context;
+ u64 secret;
+ bool is_add [default=true];
+ vl_api_interface_index_t sw_if_index [default=0xffffffff];
+ u32 ip4_fib_id;
+ u32 ip6_fib_id;
+ string namespace_id[64];
+ string sock_name[];
+};
+
+/** \brief Reply for app namespace add/del
+ @param context - returned sender context, to match reply w/ request
+ @param retval - return code
+ @param appns_index - app namespace index
+*/
+define app_namespace_add_del_v4_reply
+{
+ u32 context;
+ i32 retval;
+ u32 appns_index;
+};
+
+/** \brief add/del application namespace
+ @param client_index - opaque cookie to identify the sender
+ client to vpp direction only
+ @param context - sender context, to match reply w/ request
+ @param secret - secret shared between app and vpp
+ @param sw_if_index - local interface that "supports" namespace. Set to
+ ~0 if no preference
+ @param ip4_fib_id - id of ip4 fib that "supports" the namespace. Ignored
+ if sw_if_index set.
+ @param ip6_fib_id - id of ip6 fib that "supports" the namespace. Ignored
+ if sw_if_index set.
+ @param namespace_id - namespace id
@param netns - linux net namespace
*/
define app_namespace_add_del_v2 {
+ option deprecated;
+ u32 client_index;
+ u32 context;
+ u64 secret;
+ vl_api_interface_index_t sw_if_index [default=0xffffffff];
+ u32 ip4_fib_id;
+ u32 ip6_fib_id;
+ string namespace_id[64];
+ string netns[64];
+};
+
+/** \brief add/del application namespace
+ @param client_index - opaque cookie to identify the sender
+ client to vpp direction only
+ @param context - sender context, to match reply w/ request
+ @param secret - secret shared between app and vpp
+ @param sw_if_index - local interface that "supports" namespace. Set to
+ ~0 if no preference
+ @param ip4_fib_id - id of ip4 fib that "supports" the namespace. Ignored
+ if sw_if_index set.
+ @param ip6_fib_id - id of ip6 fib that "supports" the namespace. Ignored
+ if sw_if_index set.
+ @param namespace_id - namespace id
+ @param netns - linux net namespace
+ @param sock_name - socket name (path, abstract socket name)
+*/
+define app_namespace_add_del_v3 {
+ option deprecated;
u32 client_index;
u32 context;
u64 secret;
+ bool is_add [default=true];
vl_api_interface_index_t sw_if_index [default=0xffffffff];
u32 ip4_fib_id;
u32 ip6_fib_id;
string namespace_id[64];
string netns[64];
+ string sock_name[];
};
/** \brief Reply for app namespace add/del
@@ -272,6 +321,15 @@ define app_namespace_add_del_reply
*/
define app_namespace_add_del_v2_reply
{
+ option deprecated;
+ u32 context;
+ i32 retval;
+ u32 appns_index;
+};
+
+define app_namespace_add_del_v3_reply
+{
+ option deprecated;
u32 context;
i32 retval;
u32 appns_index;
diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c
index 05712181ab0..67e7ee39001 100644
--- a/src/vnet/session/session.c
+++ b/src/vnet/session/session.c
@@ -17,10 +17,13 @@
* @brief Session and session manager
*/
+#include <vnet/plugin/plugin.h>
#include <vnet/session/session.h>
#include <vnet/session/application.h>
#include <vnet/dpo/load_balance.h>
#include <vnet/fib/ip4_fib.h>
+#include <vlib/stats/stats.h>
+#include <vlib/dma/dma.h>
session_main_t session_main;
@@ -36,8 +39,7 @@ session_send_evt_to_thread (void *data, void *args, u32 thread_index,
mq = wrk->vpp_event_queue;
if (PREDICT_FALSE (svm_msg_q_lock (mq)))
return -1;
- if (PREDICT_FALSE (svm_msg_q_is_full (mq)
- || svm_msg_q_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)))
+ if (PREDICT_FALSE (svm_msg_q_or_ring_is_full (mq, SESSION_MQ_IO_EVT_RING)))
{
svm_msg_q_unlock (mq);
return -2;
@@ -58,7 +60,7 @@ session_send_evt_to_thread (void *data, void *args, u32 thread_index,
evt = (session_event_t *) svm_msg_q_msg_data (mq, &msg);
evt->session_index = *(u32 *) data;
break;
- case SESSION_IO_EVT_BUILTIN_TX:
+ case SESSION_IO_EVT_TX_MAIN:
case SESSION_CTRL_EVT_CLOSE:
case SESSION_CTRL_EVT_RESET:
msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
@@ -95,6 +97,13 @@ session_send_io_evt_to_thread_custom (void *data, u32 thread_index,
}
int
+session_program_tx_io_evt (session_handle_tu_t sh, session_evt_type_t evt_type)
+{
+ return session_send_evt_to_thread ((void *) &sh.session_index, 0,
+ (u32) sh.thread_index, evt_type);
+}
+
+int
session_send_ctrl_evt_to_thread (session_t * s, session_evt_type_t evt_type)
{
/* only events supported are disconnect, shutdown and reset */
@@ -202,39 +211,25 @@ session_alloc (u32 thread_index)
{
session_worker_t *wrk = &session_main.wrk[thread_index];
session_t *s;
- u8 will_expand = 0;
- pool_get_aligned_will_expand (wrk->sessions, will_expand,
- CLIB_CACHE_LINE_BYTES);
- /* If we have peekers, let them finish */
- if (PREDICT_FALSE (will_expand && vlib_num_workers ()))
- {
- clib_rwlock_writer_lock (&wrk->peekers_rw_locks);
- pool_get_aligned (wrk->sessions, s, CLIB_CACHE_LINE_BYTES);
- clib_rwlock_writer_unlock (&wrk->peekers_rw_locks);
- }
- else
- {
- pool_get_aligned (wrk->sessions, s, CLIB_CACHE_LINE_BYTES);
- }
+
+ pool_get_aligned_safe (wrk->sessions, s, CLIB_CACHE_LINE_BYTES);
clib_memset (s, 0, sizeof (*s));
s->session_index = s - wrk->sessions;
s->thread_index = thread_index;
- s->app_index = APP_INVALID_INDEX;
+ s->al_index = APP_INVALID_INDEX;
+
return s;
}
void
session_free (session_t * s)
{
- if (CLIB_DEBUG)
- {
- u8 thread_index = s->thread_index;
- clib_memset (s, 0xFA, sizeof (*s));
- pool_put (session_main.wrk[thread_index].sessions, s);
- return;
- }
+ session_worker_t *wrk = &session_main.wrk[s->thread_index];
+
SESSION_EVT (SESSION_EVT_FREE, s);
- pool_put (session_main.wrk[s->thread_index].sessions, s);
+ if (CLIB_DEBUG)
+ clib_memset (s, 0xFA, sizeof (*s));
+ pool_put (wrk->sessions, s);
}
u8
@@ -252,35 +247,48 @@ session_is_valid (u32 si, u8 thread_index)
|| s->session_state <= SESSION_STATE_LISTENING)
return 1;
- if (s->session_state == SESSION_STATE_CONNECTING &&
+ if ((s->session_state == SESSION_STATE_CONNECTING ||
+ s->session_state == SESSION_STATE_TRANSPORT_CLOSED) &&
(s->flags & SESSION_F_HALF_OPEN))
return 1;
tc = session_get_transport (s);
- if (s->connection_index != tc->c_index
- || s->thread_index != tc->thread_index || tc->s_index != si)
+ if (s->connection_index != tc->c_index ||
+ s->thread_index != tc->thread_index || tc->s_index != si)
return 0;
return 1;
}
+void
+session_cleanup (session_t *s)
+{
+ segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo);
+ session_free (s);
+}
+
static void
session_cleanup_notify (session_t * s, session_cleanup_ntf_t ntf)
{
app_worker_t *app_wrk;
app_wrk = app_worker_get_if_valid (s->app_wrk_index);
- if (!app_wrk)
- return;
+ if (PREDICT_FALSE (!app_wrk))
+ {
+ if (ntf == SESSION_CLEANUP_TRANSPORT)
+ return;
+
+ session_cleanup (s);
+ return;
+ }
app_worker_cleanup_notify (app_wrk, s, ntf);
}
void
-session_free_w_fifos (session_t * s)
+session_program_cleanup (session_t *s)
{
+ ASSERT (s->session_state == SESSION_STATE_TRANSPORT_DELETED);
session_cleanup_notify (s, SESSION_CLEANUP_SESSION);
- segment_manager_dealloc_fifos (s->rx_fifo, s->tx_fifo);
- session_free (s);
}
/**
@@ -297,7 +305,7 @@ session_delete (session_t * s)
if ((rv = session_lookup_del_session (s)))
clib_warning ("session %u hash delete rv %d", s->session_index, rv);
- session_free_w_fifos (s);
+ session_program_cleanup (s);
}
void
@@ -312,16 +320,27 @@ session_cleanup_half_open (session_handle_t ho_handle)
* session should be removed. */
if (ho->connection_index == ~0)
{
- ho->session_state = SESSION_STATE_CLOSED;
+ session_set_state (ho, SESSION_STATE_CLOSED);
return;
}
/* Migrated transports are no longer half-opens */
transport_cleanup (session_get_transport_proto (ho),
- ho->connection_index, ho->app_index /* overloaded */);
+ ho->connection_index, ho->al_index /* overloaded */);
+ }
+ else if (ho->session_state != SESSION_STATE_TRANSPORT_DELETED)
+ {
+ /* Cleanup half-open session lookup table if need be */
+ if (ho->session_state != SESSION_STATE_TRANSPORT_CLOSED)
+ {
+ transport_connection_t *tc;
+ tc = transport_get_half_open (session_get_transport_proto (ho),
+ ho->connection_index);
+ if (tc && !(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP))
+ session_lookup_del_half_open (tc);
+ }
+ transport_cleanup_half_open (session_get_transport_proto (ho),
+ ho->connection_index);
}
- else
- transport_cleanup_half_open (session_get_transport_proto (ho),
- ho->connection_index);
session_free (ho);
}
@@ -330,10 +349,12 @@ session_half_open_free (session_t *ho)
{
app_worker_t *app_wrk;
- ASSERT (vlib_get_thread_index () <= 1);
- app_wrk = app_worker_get (ho->app_wrk_index);
- app_worker_del_half_open (app_wrk, ho);
- session_free (ho);
+ ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
+ app_wrk = app_worker_get_if_valid (ho->app_wrk_index);
+ if (app_wrk)
+ app_worker_del_half_open (app_wrk, ho);
+ else
+ session_free (ho);
}
static void
@@ -346,16 +367,26 @@ session_half_open_free_rpc (void *args)
void
session_half_open_delete_notify (transport_connection_t *tc)
{
+ session_t *ho = ho_session_get (tc->s_index);
+
+ /* Cleanup half-open lookup table if need be */
+ if (ho->session_state != SESSION_STATE_TRANSPORT_CLOSED)
+ {
+ if (!(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP))
+ session_lookup_del_half_open (tc);
+ }
+ session_set_state (ho, SESSION_STATE_TRANSPORT_DELETED);
+
/* Notification from ctrl thread accepted without rpc */
- if (!tc->thread_index)
+ if (tc->thread_index == transport_cl_thread ())
{
- session_half_open_free (ho_session_get (tc->s_index));
+ session_half_open_free (ho);
}
else
{
void *args = uword_to_pointer ((uword) tc->s_index, void *);
- session_send_rpc_evt_to_thread_force (0, session_half_open_free_rpc,
- args);
+ session_send_rpc_evt_to_thread_force (transport_cl_thread (),
+ session_half_open_free_rpc, args);
}
}
@@ -364,6 +395,9 @@ session_half_open_migrate_notify (transport_connection_t *tc)
{
session_t *ho;
+ /* Support half-open migrations only for transports with no lookup */
+ ASSERT (tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP);
+
ho = ho_session_get (tc->s_index);
ho->flags |= SESSION_F_IS_MIGRATING;
ho->connection_index = ~0;
@@ -383,8 +417,8 @@ session_half_open_migrated_notify (transport_connection_t *tc)
return -1;
}
ho->connection_index = tc->c_index;
- /* Overload app index for half-open with new thread */
- ho->app_index = tc->thread_index;
+ /* Overload al_index for half-open with new thread */
+ ho->al_index = tc->thread_index;
return 0;
}
@@ -399,7 +433,7 @@ session_alloc_for_connection (transport_connection_t * tc)
s = session_alloc (thread_index);
s->session_type = session_type_from_proto_and_ip (tc->proto, tc->is_ip4);
- s->session_state = SESSION_STATE_CLOSED;
+ session_set_state (s, SESSION_STATE_CLOSED);
/* Attach transport to session and vice versa */
s->connection_index = tc->c_index;
@@ -546,10 +580,162 @@ session_fifo_tuning (session_t * s, svm_fifo_t * f,
}
}
+void
+session_wrk_program_app_wrk_evts (session_worker_t *wrk, u32 app_wrk_index)
+{
+ u8 need_interrupt;
+
+ ASSERT ((wrk - session_main.wrk) == vlib_get_thread_index ());
+ need_interrupt = clib_bitmap_is_zero (wrk->app_wrks_pending_ntf);
+ wrk->app_wrks_pending_ntf =
+ clib_bitmap_set (wrk->app_wrks_pending_ntf, app_wrk_index, 1);
+
+ if (need_interrupt)
+ vlib_node_set_interrupt_pending (wrk->vm, session_input_node.index);
+}
+
+always_inline void
+session_program_io_event (app_worker_t *app_wrk, session_t *s,
+ session_evt_type_t et, u8 is_cl)
+{
+ if (is_cl)
+ {
+ /* Special events for connectionless sessions */
+ et += SESSION_IO_EVT_BUILTIN_RX - SESSION_IO_EVT_RX;
+
+ ASSERT (s->thread_index == 0 || et == SESSION_IO_EVT_TX_MAIN);
+ session_event_t evt = {
+ .event_type = et,
+ .session_handle = session_handle (s),
+ };
+
+ app_worker_add_event_custom (app_wrk, vlib_get_thread_index (), &evt);
+ }
+ else
+ {
+ app_worker_add_event (app_wrk, s, et);
+ }
+}
+
+static inline int
+session_notify_subscribers (u32 app_index, session_t *s, svm_fifo_t *f,
+ session_evt_type_t evt_type)
+{
+ app_worker_t *app_wrk;
+ application_t *app;
+ u8 is_cl;
+ int i;
+
+ app = application_get (app_index);
+ if (!app)
+ return -1;
+
+ is_cl = s->thread_index != vlib_get_thread_index ();
+ for (i = 0; i < f->shr->n_subscribers; i++)
+ {
+ app_wrk = application_get_worker (app, f->shr->subscribers[i]);
+ if (!app_wrk)
+ continue;
+ session_program_io_event (app_wrk, s, evt_type, is_cl ? 1 : 0);
+ }
+
+ return 0;
+}
+
+always_inline int
+session_enqueue_notify_inline (session_t *s, u8 is_cl)
+{
+ app_worker_t *app_wrk;
+
+ app_wrk = app_worker_get_if_valid (s->app_wrk_index);
+ if (PREDICT_FALSE (!app_wrk))
+ return -1;
+
+ session_program_io_event (app_wrk, s, SESSION_IO_EVT_RX, is_cl);
+
+ if (PREDICT_FALSE (svm_fifo_n_subscribers (s->rx_fifo)))
+ return session_notify_subscribers (app_wrk->app_index, s, s->rx_fifo,
+ SESSION_IO_EVT_RX);
+
+ return 0;
+}
+
+int
+session_enqueue_notify (session_t *s)
+{
+ return session_enqueue_notify_inline (s, 0 /* is_cl */);
+}
+
+int
+session_enqueue_notify_cl (session_t *s)
+{
+ return session_enqueue_notify_inline (s, 1 /* is_cl */);
+}
+
+int
+session_dequeue_notify (session_t *s)
+{
+ app_worker_t *app_wrk;
+ u8 is_cl;
+
+ /* Unset as soon as event is requested */
+ svm_fifo_clear_deq_ntf (s->tx_fifo);
+
+ app_wrk = app_worker_get_if_valid (s->app_wrk_index);
+ if (PREDICT_FALSE (!app_wrk))
+ return -1;
+
+ is_cl = s->session_state == SESSION_STATE_LISTENING ||
+ s->session_state == SESSION_STATE_OPENED;
+ session_program_io_event (app_wrk, s, SESSION_IO_EVT_TX, is_cl ? 1 : 0);
+
+ if (PREDICT_FALSE (svm_fifo_n_subscribers (s->tx_fifo)))
+ return session_notify_subscribers (app_wrk->app_index, s, s->tx_fifo,
+ SESSION_IO_EVT_TX);
+
+ return 0;
+}
+
+/**
+ * Flushes queue of sessions that are to be notified of new data
+ * enqueued events.
+ *
+ * @param transport_proto transport protocol for which queue to be flushed
+ * @param thread_index Thread index for which the flush is to be performed.
+ * @return 0 on success or a positive number indicating the number of
+ * failures due to API queue being full.
+ */
+void
+session_main_flush_enqueue_events (transport_proto_t transport_proto,
+ u32 thread_index)
+{
+ session_worker_t *wrk = session_main_get_worker (thread_index);
+ session_handle_t *handles;
+ session_t *s;
+ u32 i, is_cl;
+
+ handles = wrk->session_to_enqueue[transport_proto];
+
+ for (i = 0; i < vec_len (handles); i++)
+ {
+ s = session_get_from_handle (handles[i]);
+ session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED,
+ 0 /* TODO/not needed */);
+ is_cl =
+ s->thread_index != thread_index || (s->flags & SESSION_F_IS_CLESS);
+ if (!is_cl)
+ session_enqueue_notify_inline (s, 0);
+ else
+ session_enqueue_notify_inline (s, 1);
+ }
+
+ vec_reset_length (handles);
+ wrk->session_to_enqueue[transport_proto] = handles;
+}
+
/*
- * Enqueue data for delivery to session peer. Does not notify peer of enqueue
- * event but on request can queue notification events for later delivery by
- * calling stream_server_flush_enqueue_events().
+ * Enqueue data for delivery to app. If requested, it queues app notification
+ * event for later delivery.
*
* @param tc Transport connection which is to be enqueued data
* @param b Buffer to be enqueued
@@ -598,15 +784,14 @@ session_enqueue_stream_connection (transport_connection_t * tc,
if (queue_event)
{
- /* Queue RX event on this fifo. Eventually these will need to be flushed
- * by calling stream_server_flush_enqueue_events () */
- session_worker_t *wrk;
-
- wrk = session_main_get_worker (s->thread_index);
+ /* Queue RX event on this fifo. Eventually these will need to be
+ * flushed by calling @ref session_main_flush_enqueue_events () */
if (!(s->flags & SESSION_F_RX_EVT))
{
+ session_worker_t *wrk = session_main_get_worker (s->thread_index);
+ ASSERT (s->thread_index == vlib_get_thread_index ());
s->flags |= SESSION_F_RX_EVT;
- vec_add1 (wrk->session_to_enqueue[tc->proto], s->session_index);
+ vec_add1 (wrk->session_to_enqueue[tc->proto], session_handle (s));
}
session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0);
@@ -615,10 +800,11 @@ session_enqueue_stream_connection (transport_connection_t * tc,
return enqueued;
}
-int
-session_enqueue_dgram_connection (session_t * s,
- session_dgram_hdr_t * hdr,
- vlib_buffer_t * b, u8 proto, u8 queue_event)
+always_inline int
+session_enqueue_dgram_connection_inline (session_t *s,
+ session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event, u32 is_cl)
{
int rv;
@@ -627,12 +813,10 @@ session_enqueue_dgram_connection (session_t * s,
if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)))
{
- /* *INDENT-OFF* */
svm_fifo_seg_t segs[2] = {
{ (u8 *) hdr, sizeof (*hdr) },
{ vlib_buffer_get_current (b), b->current_length }
};
- /* *INDENT-ON* */
rv = svm_fifo_enqueue_segments (s->rx_fifo, segs, 2,
0 /* allow_partial */ );
@@ -664,15 +848,16 @@ session_enqueue_dgram_connection (session_t * s,
if (queue_event && rv > 0)
{
- /* Queue RX event on this fifo. Eventually these will need to be flushed
- * by calling stream_server_flush_enqueue_events () */
- session_worker_t *wrk;
-
- wrk = session_main_get_worker (s->thread_index);
+ /* Queue RX event on this fifo. Eventually these will need to be
+ * flushed by calling @ref session_main_flush_enqueue_events () */
if (!(s->flags & SESSION_F_RX_EVT))
{
+ u32 thread_index =
+ is_cl ? vlib_get_thread_index () : s->thread_index;
+ session_worker_t *wrk = session_main_get_worker (thread_index);
+ ASSERT (s->thread_index == vlib_get_thread_index () || is_cl);
s->flags |= SESSION_F_RX_EVT;
- vec_add1 (wrk->session_to_enqueue[proto], s->session_index);
+ vec_add1 (wrk->session_to_enqueue[proto], session_handle (s));
}
session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED, 0);
@@ -681,6 +866,34 @@ session_enqueue_dgram_connection (session_t * s,
}
int
+session_enqueue_dgram_connection (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto, u8 queue_event)
+{
+ return session_enqueue_dgram_connection_inline (s, hdr, b, proto,
+ queue_event, 0 /* is_cl */);
+}
+
+int
+session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto, u8 queue_event)
+{
+ return session_enqueue_dgram_connection_inline (s, hdr, b, proto,
+ queue_event, 1 /* is_cl */);
+}
+
+int
+session_enqueue_dgram_connection_cl (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event)
+{
+ session_t *awls;
+
+ awls = app_listener_select_wrk_cl_session (s, hdr);
+ return session_enqueue_dgram_connection_inline (awls, hdr, b, proto,
+ queue_event, 1 /* is_cl */);
+}
+
+int
session_tx_fifo_peek_bytes (transport_connection_t * tc, u8 * buffer,
u32 offset, u32 max_bytes)
{
@@ -703,187 +916,6 @@ session_tx_fifo_dequeue_drop (transport_connection_t * tc, u32 max_bytes)
return rv;
}
-static inline int
-session_notify_subscribers (u32 app_index, session_t * s,
- svm_fifo_t * f, session_evt_type_t evt_type)
-{
- app_worker_t *app_wrk;
- application_t *app;
- int i;
-
- app = application_get (app_index);
- if (!app)
- return -1;
-
- for (i = 0; i < f->shr->n_subscribers; i++)
- {
- app_wrk = application_get_worker (app, f->shr->subscribers[i]);
- if (!app_wrk)
- continue;
- if (app_worker_lock_and_send_event (app_wrk, s, evt_type))
- return -1;
- }
-
- return 0;
-}
-
-/**
- * Notify session peer that new data has been enqueued.
- *
- * @param s Stream session for which the event is to be generated.
- * @param lock Flag to indicate if call should lock message queue.
- *
- * @return 0 on success or negative number if failed to send notification.
- */
-static inline int
-session_enqueue_notify_inline (session_t * s)
-{
- app_worker_t *app_wrk;
- u32 session_index;
- u8 n_subscribers;
-
- session_index = s->session_index;
- n_subscribers = svm_fifo_n_subscribers (s->rx_fifo);
-
- app_wrk = app_worker_get_if_valid (s->app_wrk_index);
- if (PREDICT_FALSE (!app_wrk))
- {
- SESSION_DBG ("invalid s->app_index = %d", s->app_wrk_index);
- return 0;
- }
-
- SESSION_EVT (SESSION_EVT_ENQ, s, svm_fifo_max_dequeue_prod (s->rx_fifo));
-
- s->flags &= ~SESSION_F_RX_EVT;
-
- /* Application didn't confirm accept yet */
- if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING))
- return 0;
-
- if (PREDICT_FALSE (app_worker_lock_and_send_event (app_wrk, s,
- SESSION_IO_EVT_RX)))
- return -1;
-
- if (PREDICT_FALSE (n_subscribers))
- {
- s = session_get (session_index, vlib_get_thread_index ());
- return session_notify_subscribers (app_wrk->app_index, s,
- s->rx_fifo, SESSION_IO_EVT_RX);
- }
-
- return 0;
-}
-
-int
-session_enqueue_notify (session_t * s)
-{
- return session_enqueue_notify_inline (s);
-}
-
-static void
-session_enqueue_notify_rpc (void *arg)
-{
- u32 session_index = pointer_to_uword (arg);
- session_t *s;
-
- s = session_get_if_valid (session_index, vlib_get_thread_index ());
- if (!s)
- return;
-
- session_enqueue_notify (s);
-}
-
-/**
- * Like session_enqueue_notify, but can be called from a thread that does not
- * own the session.
- */
-void
-session_enqueue_notify_thread (session_handle_t sh)
-{
- u32 thread_index = session_thread_from_handle (sh);
- u32 session_index = session_index_from_handle (sh);
-
- /*
- * Pass session index (u32) as opposed to handle (u64) in case pointers
- * are not 64-bit.
- */
- session_send_rpc_evt_to_thread (thread_index,
- session_enqueue_notify_rpc,
- uword_to_pointer (session_index, void *));
-}
-
-int
-session_dequeue_notify (session_t * s)
-{
- app_worker_t *app_wrk;
-
- svm_fifo_clear_deq_ntf (s->tx_fifo);
-
- app_wrk = app_worker_get_if_valid (s->app_wrk_index);
- if (PREDICT_FALSE (!app_wrk))
- return -1;
-
- if (PREDICT_FALSE (app_worker_lock_and_send_event (app_wrk, s,
- SESSION_IO_EVT_TX)))
- return -1;
-
- if (PREDICT_FALSE (s->tx_fifo->shr->n_subscribers))
- return session_notify_subscribers (app_wrk->app_index, s,
- s->tx_fifo, SESSION_IO_EVT_TX);
-
- return 0;
-}
-
-/**
- * Flushes queue of sessions that are to be notified of new data
- * enqueued events.
- *
- * @param thread_index Thread index for which the flush is to be performed.
- * @return 0 on success or a positive number indicating the number of
- * failures due to API queue being full.
- */
-int
-session_main_flush_enqueue_events (u8 transport_proto, u32 thread_index)
-{
- session_worker_t *wrk = session_main_get_worker (thread_index);
- session_t *s;
- int i, errors = 0;
- u32 *indices;
-
- indices = wrk->session_to_enqueue[transport_proto];
-
- for (i = 0; i < vec_len (indices); i++)
- {
- s = session_get_if_valid (indices[i], thread_index);
- if (PREDICT_FALSE (!s))
- {
- errors++;
- continue;
- }
-
- session_fifo_tuning (s, s->rx_fifo, SESSION_FT_ACTION_ENQUEUED,
- 0 /* TODO/not needed */ );
-
- if (PREDICT_FALSE (session_enqueue_notify_inline (s)))
- errors++;
- }
-
- vec_reset_length (indices);
- wrk->session_to_enqueue[transport_proto] = indices;
-
- return errors;
-}
-
-int
-session_main_flush_all_enqueue_events (u8 transport_proto)
-{
- vlib_thread_main_t *vtm = vlib_get_thread_main ();
- int i, errors = 0;
- for (i = 0; i < 1 + vtm->n_threads; i++)
- errors += session_main_flush_enqueue_events (transport_proto, i);
- return errors;
-}
-
int
session_stream_connect_notify (transport_connection_t * tc,
session_error_t err)
@@ -898,6 +930,7 @@ session_stream_connect_notify (transport_connection_t * tc,
session_lookup_del_half_open (tc);
ho = ho_session_get (tc->s_index);
+ session_set_state (ho, SESSION_STATE_TRANSPORT_CLOSED);
opaque = ho->opaque;
app_wrk = app_worker_get_if_valid (ho->app_wrk_index);
if (!app_wrk)
@@ -907,8 +940,9 @@ session_stream_connect_notify (transport_connection_t * tc,
return app_worker_connect_notify (app_wrk, s, err, opaque);
s = session_alloc_for_connection (tc);
- s->session_state = SESSION_STATE_CONNECTING;
+ session_set_state (s, SESSION_STATE_CONNECTING);
s->app_wrk_index = app_wrk->wrk_index;
+ s->opaque = opaque;
new_si = s->session_index;
new_ti = s->thread_index;
@@ -920,7 +954,7 @@ session_stream_connect_notify (transport_connection_t * tc,
}
s = session_get (new_si, new_ti);
- s->session_state = SESSION_STATE_READY;
+ session_set_state (s, SESSION_STATE_READY);
session_lookup_add_connection (tc, session_handle (s));
if (app_worker_connect_notify (app_wrk, s, SESSION_E_NONE, opaque))
@@ -937,17 +971,19 @@ session_stream_connect_notify (transport_connection_t * tc,
}
static void
-session_switch_pool_reply (void *arg)
+session_switch_pool_closed_rpc (void *arg)
{
- u32 session_index = pointer_to_uword (arg);
+ session_handle_t sh;
session_t *s;
- s = session_get_if_valid (session_index, vlib_get_thread_index ());
+ sh = pointer_to_uword (arg);
+ s = session_get_from_handle_if_valid (sh);
if (!s)
return;
- /* Notify app that it has data on the new session */
- session_enqueue_notify (s);
+ transport_cleanup (session_get_transport_proto (s), s->connection_index,
+ s->thread_index);
+ session_cleanup (s);
}
typedef struct _session_switch_pool_args
@@ -965,39 +1001,40 @@ static void
session_switch_pool (void *cb_args)
{
session_switch_pool_args_t *args = (session_switch_pool_args_t *) cb_args;
- session_handle_t new_sh;
+ session_handle_t sh, new_sh;
segment_manager_t *sm;
app_worker_t *app_wrk;
session_t *s;
- void *rargs;
ASSERT (args->thread_index == vlib_get_thread_index ());
s = session_get (args->session_index, args->thread_index);
- transport_cleanup (session_get_transport_proto (s), s->connection_index,
- s->thread_index);
+ app_wrk = app_worker_get_if_valid (s->app_wrk_index);
+ if (!app_wrk)
+ goto app_closed;
- new_sh = session_make_handle (args->new_session_index,
- args->new_thread_index);
+ /* Cleanup fifo segment slice state for fifos */
+ sm = app_worker_get_connect_segment_manager (app_wrk);
+ segment_manager_detach_fifo (sm, &s->rx_fifo);
+ segment_manager_detach_fifo (sm, &s->tx_fifo);
- app_wrk = app_worker_get_if_valid (s->app_wrk_index);
- if (app_wrk)
- {
- /* Cleanup fifo segment slice state for fifos */
- sm = app_worker_get_connect_segment_manager (app_wrk);
- segment_manager_detach_fifo (sm, &s->rx_fifo);
- segment_manager_detach_fifo (sm, &s->tx_fifo);
+ /* Check if session closed during migration */
+ if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
+ goto app_closed;
- /* Notify app, using old session, about the migration event */
- app_worker_migrate_notify (app_wrk, s, new_sh);
- }
+ new_sh =
+ session_make_handle (args->new_session_index, args->new_thread_index);
+ app_worker_migrate_notify (app_wrk, s, new_sh);
- /* Trigger app read and fifo updates on the new thread */
- rargs = uword_to_pointer (args->new_session_index, void *);
- session_send_rpc_evt_to_thread (args->new_thread_index,
- session_switch_pool_reply, rargs);
+ clib_mem_free (cb_args);
+ return;
- session_free (s);
+app_closed:
+ /* Session closed during migration. Clean everything up */
+ sh = session_handle (s);
+ session_send_rpc_evt_to_thread (args->new_thread_index,
+ session_switch_pool_closed_rpc,
+ uword_to_pointer (sh, void *));
clib_mem_free (cb_args);
}
@@ -1018,7 +1055,7 @@ session_dgram_connect_notify (transport_connection_t * tc,
*/
new_s = session_clone_safe (tc->s_index, old_thread_index);
new_s->connection_index = tc->c_index;
- new_s->session_state = SESSION_STATE_READY;
+ session_set_state (new_s, SESSION_STATE_READY);
new_s->flags |= SESSION_F_IS_MIGRATING;
if (!(tc->flags & TRANSPORT_CONNECTION_F_NO_LOOKUP))
@@ -1067,7 +1104,16 @@ session_transport_closing_notify (transport_connection_t * tc)
s = session_get (tc->s_index, tc->thread_index);
if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
return;
- s->session_state = SESSION_STATE_TRANSPORT_CLOSING;
+
+ /* Wait for reply from app before sending notification as the
+ * accept might be rejected */
+ if (s->session_state == SESSION_STATE_ACCEPTING)
+ {
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING);
+ return;
+ }
+
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING);
app_wrk = app_worker_get (s->app_wrk_index);
app_worker_close_notify (app_wrk, s);
}
@@ -1108,7 +1154,7 @@ session_transport_delete_notify (transport_connection_t * tc)
* because transport will soon be closed and closed sessions
* are assumed to have been removed from the lookup table */
session_lookup_del_session (s);
- s->session_state = SESSION_STATE_TRANSPORT_DELETED;
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
session_cleanup_notify (s, SESSION_CLEANUP_TRANSPORT);
svm_fifo_dequeue_drop_all (s->tx_fifo);
break;
@@ -1119,7 +1165,7 @@ session_transport_delete_notify (transport_connection_t * tc)
* session is just removed because both transport and app have
* confirmed the close*/
session_lookup_del_session (s);
- s->session_state = SESSION_STATE_TRANSPORT_DELETED;
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
session_cleanup_notify (s, SESSION_CLEANUP_TRANSPORT);
svm_fifo_dequeue_drop_all (s->tx_fifo);
session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_CLOSE);
@@ -1128,6 +1174,7 @@ session_transport_delete_notify (transport_connection_t * tc)
break;
case SESSION_STATE_CLOSED:
session_cleanup_notify (s, SESSION_CLEANUP_TRANSPORT);
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
session_delete (s);
break;
default:
@@ -1155,6 +1202,9 @@ session_transport_closed_notify (transport_connection_t * tc)
if (!(s = session_get_if_valid (tc->s_index, tc->thread_index)))
return;
+ if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSED)
+ return;
+
/* Transport thinks that app requested close but it actually didn't.
* Can happen for tcp:
* 1)if fin and rst are received in close succession.
@@ -1163,17 +1213,15 @@ session_transport_closed_notify (transport_connection_t * tc)
{
session_transport_closing_notify (tc);
svm_fifo_dequeue_drop_all (s->tx_fifo);
- s->session_state = SESSION_STATE_TRANSPORT_CLOSED;
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSED);
}
/* If app close has not been received or has not yet resulted in
* a transport close, only mark the session transport as closed */
else if (s->session_state <= SESSION_STATE_CLOSING)
- {
- s->session_state = SESSION_STATE_TRANSPORT_CLOSED;
- }
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSED);
/* If app also closed, switch to closed */
else if (s->session_state == SESSION_STATE_APP_CLOSED)
- s->session_state = SESSION_STATE_CLOSED;
+ session_set_state (s, SESSION_STATE_CLOSED);
app_wrk = app_worker_get_if_valid (s->app_wrk_index);
if (app_wrk)
@@ -1193,7 +1241,12 @@ session_transport_reset_notify (transport_connection_t * tc)
svm_fifo_dequeue_drop_all (s->tx_fifo);
if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
return;
- s->session_state = SESSION_STATE_TRANSPORT_CLOSING;
+ if (s->session_state == SESSION_STATE_ACCEPTING)
+ {
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING);
+ return;
+ }
+ session_set_state (s, SESSION_STATE_TRANSPORT_CLOSING);
app_wrk = app_worker_get (s->app_wrk_index);
app_worker_reset_notify (app_wrk, s);
}
@@ -1210,12 +1263,12 @@ session_stream_accept_notify (transport_connection_t * tc)
return -1;
if (s->session_state != SESSION_STATE_CREATED)
return 0;
- s->session_state = SESSION_STATE_ACCEPTING;
+ session_set_state (s, SESSION_STATE_ACCEPTING);
if (app_worker_accept_notify (app_wrk, s))
{
/* On transport delete, no notifications should be sent. Unless, the
* accept is retried and successful. */
- s->session_state = SESSION_STATE_CREATED;
+ session_set_state (s, SESSION_STATE_CREATED);
return -1;
}
return 0;
@@ -1233,7 +1286,7 @@ session_stream_accept (transport_connection_t * tc, u32 listener_index,
s = session_alloc_for_connection (tc);
s->listener_handle = ((u64) thread_index << 32) | (u64) listener_index;
- s->session_state = SESSION_STATE_CREATED;
+ session_set_state (s, SESSION_STATE_CREATED);
if ((rv = app_worker_init_accepted (s)))
{
@@ -1277,6 +1330,7 @@ session_dgram_accept (transport_connection_t * tc, u32 listener_index,
}
session_lookup_add_connection (tc, session_handle (s));
+ session_set_state (s, SESSION_STATE_ACCEPTING);
app_wrk = app_worker_get (s->app_wrk_index);
if ((rv = app_worker_accept_notify (app_wrk, s)))
@@ -1314,7 +1368,10 @@ session_open_cl (session_endpoint_cfg_t *rmt, session_handle_t *rsh)
app_wrk = app_worker_get (rmt->app_wrk_index);
s = session_alloc_for_connection (tc);
s->app_wrk_index = app_wrk->wrk_index;
- s->session_state = SESSION_STATE_OPENED;
+ s->opaque = rmt->opaque;
+ session_set_state (s, SESSION_STATE_OPENED);
+ if (transport_connection_is_cless (tc))
+ s->flags |= SESSION_F_IS_CLESS;
if (app_worker_init_connected (app_wrk, s))
{
session_free (s);
@@ -1382,13 +1439,11 @@ session_open_app (session_endpoint_cfg_t *rmt, session_handle_t *rsh)
typedef int (*session_open_service_fn) (session_endpoint_cfg_t *,
session_handle_t *);
-/* *INDENT-OFF* */
static session_open_service_fn session_open_srv_fns[TRANSPORT_N_SERVICES] = {
session_open_vc,
session_open_cl,
session_open_app,
};
-/* *INDENT-ON* */
/**
* Ask transport to open connection to remote transport endpoint.
@@ -1422,12 +1477,12 @@ session_open (session_endpoint_cfg_t *rmt, session_handle_t *rsh)
int
session_listen (session_t * ls, session_endpoint_cfg_t * sep)
{
- transport_endpoint_t *tep;
+ transport_endpoint_cfg_t *tep;
int tc_index;
u32 s_index;
/* Transport bind/listen */
- tep = session_endpoint_to_transport (sep);
+ tep = session_endpoint_to_transport_cfg (sep);
s_index = ls->session_index;
tc_index = transport_start_listen (session_get_transport_proto (ls),
s_index, tep);
@@ -1439,6 +1494,9 @@ session_listen (session_t * ls, session_endpoint_cfg_t * sep)
* worker because local tables (for ct sessions) are not backed by a fib */
ls = listen_session_get (s_index);
ls->connection_index = tc_index;
+ ls->opaque = sep->opaque;
+ if (transport_connection_is_cless (session_get_transport (ls)))
+ ls->flags |= SESSION_F_IS_CLESS;
return 0;
}
@@ -1493,9 +1551,15 @@ session_half_close (session_t *s)
void
session_close (session_t * s)
{
- if (!s)
+ if (!s || (s->flags & SESSION_F_APP_CLOSED))
return;
+ /* Transports can close and delete their state independent of app closes
+ * and transport initiated state transitions can hide app closes. Instead
+ * of extending the state machine to support separate tracking of app and
+ * transport initiated closes, use a flag. */
+ s->flags |= SESSION_F_APP_CLOSED;
+
if (s->session_state >= SESSION_STATE_CLOSING)
{
/* Session will only be removed once both app and transport
@@ -1506,7 +1570,12 @@ session_close (session_t * s)
return;
}
- s->session_state = SESSION_STATE_CLOSING;
+ /* App closed so stop propagating dequeue notifications.
+ * App might disconnect session before connected, in this case,
+ * tx_fifo may not be setup yet, so clear only it's inited. */
+ if (s->tx_fifo)
+ svm_fifo_clear_deq_ntf (s->tx_fifo);
+ session_set_state (s, SESSION_STATE_CLOSING);
session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_CLOSE);
}
@@ -1518,12 +1587,46 @@ session_reset (session_t * s)
{
if (s->session_state >= SESSION_STATE_CLOSING)
return;
- /* Drop all outstanding tx data */
- svm_fifo_dequeue_drop_all (s->tx_fifo);
- s->session_state = SESSION_STATE_CLOSING;
+ /* Drop all outstanding tx data
+ * App might disconnect session before connected, in this case,
+ * tx_fifo may not be setup yet, so clear only it's inited. */
+ if (s->tx_fifo)
+ svm_fifo_dequeue_drop_all (s->tx_fifo);
+ session_set_state (s, SESSION_STATE_CLOSING);
session_program_transport_ctrl_evt (s, SESSION_CTRL_EVT_RESET);
}
+void
+session_detach_app (session_t *s)
+{
+ if (s->session_state < SESSION_STATE_TRANSPORT_CLOSING)
+ {
+ session_close (s);
+ }
+ else if (s->session_state < SESSION_STATE_TRANSPORT_DELETED)
+ {
+ transport_connection_t *tc;
+
+ /* Transport is closing but it's not yet deleted. Confirm close and
+ * subsequently detach transport from session and enqueue a session
+ * cleanup notification. Transport closed and cleanup notifications are
+ * going to be dropped by session layer apis */
+ transport_close (session_get_transport_proto (s), s->connection_index,
+ s->thread_index);
+ tc = session_get_transport (s);
+ tc->s_index = SESSION_INVALID_INDEX;
+ session_set_state (s, SESSION_STATE_TRANSPORT_DELETED);
+ session_cleanup_notify (s, SESSION_CLEANUP_SESSION);
+ }
+ else
+ {
+ session_cleanup_notify (s, SESSION_CLEANUP_SESSION);
+ }
+
+ s->flags |= SESSION_F_APP_CLOSED;
+ s->app_wrk_index = APP_INVALID_INDEX;
+}
+
/**
* Notify transport the session can be half-disconnected.
*
@@ -1555,10 +1658,10 @@ session_transport_close (session_t * s)
if (s->session_state >= SESSION_STATE_APP_CLOSED)
{
if (s->session_state == SESSION_STATE_TRANSPORT_CLOSED)
- s->session_state = SESSION_STATE_CLOSED;
+ session_set_state (s, SESSION_STATE_CLOSED);
/* If transport is already deleted, just free the session */
else if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED)
- session_free_w_fifos (s);
+ session_program_cleanup (s);
return;
}
@@ -1568,7 +1671,7 @@ session_transport_close (session_t * s)
* delete notify. This will finally lead to the complete cleanup of the
* session.
*/
- s->session_state = SESSION_STATE_APP_CLOSED;
+ session_set_state (s, SESSION_STATE_APP_CLOSED);
transport_close (session_get_transport_proto (s), s->connection_index,
s->thread_index);
@@ -1583,13 +1686,13 @@ session_transport_reset (session_t * s)
if (s->session_state >= SESSION_STATE_APP_CLOSED)
{
if (s->session_state == SESSION_STATE_TRANSPORT_CLOSED)
- s->session_state = SESSION_STATE_CLOSED;
+ session_set_state (s, SESSION_STATE_CLOSED);
else if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED)
- session_free_w_fifos (s);
+ session_program_cleanup (s);
return;
}
- s->session_state = SESSION_STATE_APP_CLOSED;
+ session_set_state (s, SESSION_STATE_APP_CLOSED);
transport_reset (session_get_transport_proto (s), s->connection_index,
s->thread_index);
}
@@ -1616,64 +1719,63 @@ session_transport_cleanup (session_t * s)
}
/**
- * Allocate event queues in the shared-memory segment
+ * Allocate worker mqs in share-able segment
*
- * That can only be a newly created memfd segment, that must be
- * mapped by all apps/stack users.
+ * That can only be a newly created memfd segment, that must be mapped
+ * by all apps/stack users unless private rx mqs are enabled.
*/
void
-session_vpp_event_queues_allocate (session_main_t * smm)
+session_vpp_wrk_mqs_alloc (session_main_t *smm)
{
- u32 evt_q_length = 2048, evt_size = sizeof (session_event_t);
- fifo_segment_t *eqs = &smm->evt_qs_segment;
- uword eqs_size = 64 << 20;
- pid_t vpp_pid = getpid ();
+ u32 mq_q_length = 2048, evt_size = sizeof (session_event_t);
+ fifo_segment_t *mqs_seg = &smm->wrk_mqs_segment;
+ svm_msg_q_cfg_t _cfg, *cfg = &_cfg;
+ uword mqs_seg_size;
int i;
- if (smm->configured_event_queue_length)
- evt_q_length = smm->configured_event_queue_length;
+ mq_q_length = clib_max (mq_q_length, smm->configured_wrk_mq_length);
- if (smm->evt_qs_segment_size)
- eqs_size = smm->evt_qs_segment_size;
+ svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = {
+ { mq_q_length, evt_size, 0 }, { mq_q_length >> 1, 256, 0 }
+ };
+ cfg->consumer_pid = 0;
+ cfg->n_rings = 2;
+ cfg->q_nitems = mq_q_length;
+ cfg->ring_cfgs = rc;
+
+ /*
+ * Compute mqs segment size based on rings config and leave space
+ * for passing extended configuration messages, i.e., data allocated
+ * outside of the rings. If provided with a config value, accept it
+ * if larger than minimum size.
+ */
+ mqs_seg_size = svm_msg_q_size_to_alloc (cfg) * vec_len (smm->wrk);
+ mqs_seg_size = mqs_seg_size + (1 << 20);
+ mqs_seg_size = clib_max (mqs_seg_size, smm->wrk_mqs_segment_size);
- eqs->ssvm.ssvm_size = eqs_size;
- eqs->ssvm.my_pid = vpp_pid;
- eqs->ssvm.name = format (0, "%s%c", "session: evt-qs-segment", 0);
- /* clib_mem_vm_map_shared consumes first page before requested_va */
- eqs->ssvm.requested_va = smm->session_baseva + clib_mem_get_page_size ();
+ mqs_seg->ssvm.ssvm_size = mqs_seg_size;
+ mqs_seg->ssvm.my_pid = getpid ();
+ mqs_seg->ssvm.name = format (0, "%s%c", "session: wrk-mqs-segment", 0);
- if (ssvm_server_init (&eqs->ssvm, SSVM_SEGMENT_MEMFD))
+ if (ssvm_server_init (&mqs_seg->ssvm, SSVM_SEGMENT_MEMFD))
{
clib_warning ("failed to initialize queue segment");
return;
}
- fifo_segment_init (eqs);
+ fifo_segment_init (mqs_seg);
/* Special fifo segment that's filled only with mqs */
- eqs->h->n_mqs = vec_len (smm->wrk);
+ mqs_seg->h->n_mqs = vec_len (smm->wrk);
for (i = 0; i < vec_len (smm->wrk); i++)
- {
- svm_msg_q_cfg_t _cfg, *cfg = &_cfg;
- svm_msg_q_ring_cfg_t rc[SESSION_MQ_N_RINGS] = {
- {evt_q_length, evt_size, 0}
- ,
- {evt_q_length >> 1, 256, 0}
- };
- cfg->consumer_pid = 0;
- cfg->n_rings = 2;
- cfg->q_nitems = evt_q_length;
- cfg->ring_cfgs = rc;
-
- smm->wrk[i].vpp_event_queue = fifo_segment_msg_q_alloc (eqs, i, cfg);
- }
+ smm->wrk[i].vpp_event_queue = fifo_segment_msg_q_alloc (mqs_seg, i, cfg);
}
fifo_segment_t *
-session_main_get_evt_q_segment (void)
+session_main_get_wrk_mqs_segment (void)
{
- return &session_main.evt_qs_segment;
+ return &session_main.wrk_mqs_segment;
}
u64
@@ -1689,14 +1791,28 @@ session_segment_handle (session_t * s)
f->segment_index);
}
-/* *INDENT-OFF* */
+void
+session_get_original_dst (transport_endpoint_t *i2o_src,
+ transport_endpoint_t *i2o_dst,
+ transport_proto_t transport_proto, u32 *original_dst,
+ u16 *original_dst_port)
+{
+ session_main_t *smm = vnet_get_session_main ();
+ ip_protocol_t proto =
+ (transport_proto == TRANSPORT_PROTO_TCP ? IPPROTO_TCP : IPPROTO_UDP);
+ if (!smm->original_dst_lookup || !i2o_dst->is_ip4)
+ return;
+ smm->original_dst_lookup (&i2o_src->ip.ip4, i2o_src->port, &i2o_dst->ip.ip4,
+ i2o_dst->port, proto, original_dst,
+ original_dst_port);
+}
+
static session_fifo_rx_fn *session_tx_fns[TRANSPORT_TX_N_FNS] = {
session_tx_fifo_peek_and_snd,
session_tx_fifo_dequeue_and_snd,
session_tx_fifo_dequeue_internal,
session_tx_fifo_dequeue_and_snd
};
-/* *INDENT-ON* */
void
session_register_transport (transport_proto_t transport_proto,
@@ -1721,6 +1837,39 @@ session_register_transport (transport_proto_t transport_proto,
session_tx_fns[vft->transport_options.tx_type];
}
+void
+session_register_update_time_fn (session_update_time_fn fn, u8 is_add)
+{
+ session_main_t *smm = &session_main;
+ session_update_time_fn *fi;
+ u32 fi_pos = ~0;
+ u8 found = 0;
+
+ vec_foreach (fi, smm->update_time_fns)
+ {
+ if (*fi == fn)
+ {
+ fi_pos = fi - smm->update_time_fns;
+ found = 1;
+ break;
+ }
+ }
+
+ if (is_add)
+ {
+ if (found)
+ {
+ clib_warning ("update time fn %p already registered", fn);
+ return;
+ }
+ vec_add1 (smm->update_time_fns, fn);
+ }
+ else
+ {
+ vec_del1 (smm->update_time_fns, fi_pos);
+ }
+}
+
transport_proto_t
session_add_transport_proto (void)
{
@@ -1788,6 +1937,44 @@ session_queue_run_on_main_thread (vlib_main_t * vm)
vlib_node_set_interrupt_pending (vm, session_queue_node.index);
}
+static void
+session_stats_collector_fn (vlib_stats_collector_data_t *d)
+{
+ u32 i, n_workers, n_wrk_sessions, n_sessions = 0;
+ session_main_t *smm = &session_main;
+ session_worker_t *wrk;
+ counter_t **counters;
+ counter_t *cb;
+
+ n_workers = vec_len (smm->wrk);
+ vlib_stats_validate (d->entry_index, 0, n_workers - 1);
+ counters = d->entry->data;
+ cb = counters[0];
+
+ for (i = 0; i < vec_len (smm->wrk); i++)
+ {
+ wrk = session_main_get_worker (i);
+ n_wrk_sessions = pool_elts (wrk->sessions);
+ cb[i] = n_wrk_sessions;
+ n_sessions += n_wrk_sessions;
+ }
+
+ vlib_stats_set_gauge (d->private_data, n_sessions);
+}
+
+static void
+session_stats_collector_init (void)
+{
+ vlib_stats_collector_reg_t reg = {};
+
+ reg.entry_index =
+ vlib_stats_add_counter_vector ("/sys/session/sessions_per_worker");
+ reg.private_data = vlib_stats_add_gauge ("/sys/session/sessions_total");
+ reg.collect_fn = session_stats_collector_fn;
+ vlib_stats_register_collector_fn (&reg);
+ vlib_stats_validate (reg.entry_index, 0, vlib_get_n_threads ());
+}
+
static clib_error_t *
session_manager_main_enable (vlib_main_t * vm)
{
@@ -1808,6 +1995,7 @@ session_manager_main_enable (vlib_main_t * vm)
/* Allocate cache line aligned worker contexts */
vec_validate_aligned (smm->wrk, num_threads - 1, CLIB_CACHE_LINE_BYTES);
+ clib_spinlock_init (&session_main.pool_realloc_lock);
for (i = 0; i < num_threads; i++)
{
@@ -1816,21 +2004,20 @@ session_manager_main_enable (vlib_main_t * vm)
wrk->new_head = clib_llist_make_head (wrk->event_elts, evt_list);
wrk->old_head = clib_llist_make_head (wrk->event_elts, evt_list);
wrk->pending_connects = clib_llist_make_head (wrk->event_elts, evt_list);
+ wrk->evts_pending_main =
+ clib_llist_make_head (wrk->event_elts, evt_list);
wrk->vm = vlib_get_main_by_index (i);
wrk->last_vlib_time = vlib_time_now (vm);
wrk->last_vlib_us_time = wrk->last_vlib_time * CLIB_US_TIME_FREQ;
wrk->timerfd = -1;
vec_validate (wrk->session_to_enqueue, smm->last_transport_proto_type);
- if (num_threads > 1)
- clib_rwlock_init (&smm->wrk[i].peekers_rw_locks);
-
if (!smm->no_adaptive && smm->use_private_rx_mqs)
session_wrk_enable_adaptive_mode (wrk);
}
/* Allocate vpp event queues segment and queue */
- session_vpp_event_queues_allocate (smm);
+ session_vpp_wrk_mqs_alloc (smm);
/* Initialize segment manager properties */
segment_manager_main_init ();
@@ -1860,6 +2047,7 @@ session_manager_main_enable (vlib_main_t * vm)
session_lookup_init ();
app_namespaces_init ();
transport_init ();
+ session_stats_collector_init ();
smm->is_initialized = 1;
done:
@@ -1879,6 +2067,87 @@ session_manager_main_disable (vlib_main_t * vm)
transport_enable_disable (vm, 0 /* is_en */ );
}
+/* in this new callback, cookie hint the index */
+void
+session_dma_completion_cb (vlib_main_t *vm, struct vlib_dma_batch *batch)
+{
+ session_worker_t *wrk;
+ wrk = session_main_get_worker (vm->thread_index);
+ session_dma_transfer *dma_transfer;
+
+ dma_transfer = &wrk->dma_trans[wrk->trans_head];
+ vec_add (wrk->pending_tx_buffers, dma_transfer->pending_tx_buffers,
+ vec_len (dma_transfer->pending_tx_buffers));
+ vec_add (wrk->pending_tx_nexts, dma_transfer->pending_tx_nexts,
+ vec_len (dma_transfer->pending_tx_nexts));
+ vec_reset_length (dma_transfer->pending_tx_buffers);
+ vec_reset_length (dma_transfer->pending_tx_nexts);
+ wrk->trans_head++;
+ if (wrk->trans_head == wrk->trans_size)
+ wrk->trans_head = 0;
+ return;
+}
+
+static void
+session_prepare_dma_args (vlib_dma_config_t *args)
+{
+ args->max_batches = 16;
+ args->max_transfers = DMA_TRANS_SIZE;
+ args->max_transfer_size = 65536;
+ args->features = 0;
+ args->sw_fallback = 1;
+ args->barrier_before_last = 1;
+ args->callback_fn = session_dma_completion_cb;
+}
+
+static void
+session_node_enable_dma (u8 is_en, int n_vlibs)
+{
+ vlib_dma_config_t args;
+ session_prepare_dma_args (&args);
+ session_worker_t *wrk;
+ vlib_main_t *vm;
+
+ int config_index = -1;
+
+ if (is_en)
+ {
+ vm = vlib_get_main_by_index (0);
+ config_index = vlib_dma_config_add (vm, &args);
+ }
+ else
+ {
+ vm = vlib_get_main_by_index (0);
+ wrk = session_main_get_worker (0);
+ if (wrk->config_index >= 0)
+ vlib_dma_config_del (vm, wrk->config_index);
+ }
+ int i;
+ for (i = 0; i < n_vlibs; i++)
+ {
+ vm = vlib_get_main_by_index (i);
+ wrk = session_main_get_worker (vm->thread_index);
+ wrk->config_index = config_index;
+ if (is_en)
+ {
+ if (config_index >= 0)
+ wrk->dma_enabled = true;
+ wrk->dma_trans = (session_dma_transfer *) clib_mem_alloc (
+ sizeof (session_dma_transfer) * DMA_TRANS_SIZE);
+ bzero (wrk->dma_trans,
+ sizeof (session_dma_transfer) * DMA_TRANS_SIZE);
+ }
+ else
+ {
+ if (wrk->dma_trans)
+ clib_mem_free (wrk->dma_trans);
+ }
+ wrk->trans_head = 0;
+ wrk->trans_tail = 0;
+ wrk->trans_size = DMA_TRANS_SIZE;
+ }
+}
+
void
session_node_enable_disable (u8 is_en)
{
@@ -1914,11 +2183,15 @@ session_node_enable_disable (u8 is_en)
if (!sm->poll_main)
continue;
}
+ vlib_node_set_state (vm, session_input_node.index, mstate);
vlib_node_set_state (vm, session_queue_node.index, state);
}
if (sm->use_private_rx_mqs)
application_enable_rx_mqs_nodes (is_en);
+
+ if (sm->dma_enabled)
+ session_node_enable_dma (is_en, n_vlibs);
}
clib_error_t *
@@ -1953,17 +2226,9 @@ session_main_init (vlib_main_t * vm)
smm->poll_main = 0;
smm->use_private_rx_mqs = 0;
smm->no_adaptive = 0;
- smm->session_baseva = HIGH_SEGMENT_BASEVA;
-
-#if (HIGH_SEGMENT_BASEVA > (4ULL << 30))
- smm->session_va_space_size = 128ULL << 30;
- smm->evt_qs_segment_size = 64 << 20;
-#else
- smm->session_va_space_size = 128 << 20;
- smm->evt_qs_segment_size = 1 << 20;
-#endif
-
- smm->last_transport_proto_type = TRANSPORT_PROTO_SRTP;
+ smm->last_transport_proto_type = TRANSPORT_PROTO_HTTP;
+ smm->port_allocator_min_src_port = 1024;
+ smm->port_allocator_max_src_port = 65535;
return 0;
}
@@ -1993,13 +2258,16 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input)
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (input, "event-queue-length %d", &nitems))
+ if (unformat (input, "wrk-mq-length %d", &nitems))
{
if (nitems >= 2048)
- smm->configured_event_queue_length = nitems;
+ smm->configured_wrk_mq_length = nitems;
else
clib_warning ("event queue length %d too small, ignored", nitems);
}
+ else if (unformat (input, "wrk-mqs-segment-size %U",
+ unformat_memory_size, &smm->wrk_mqs_segment_size))
+ ;
else if (unformat (input, "preallocated-sessions %d",
&smm->preallocated_sessions))
;
@@ -2058,24 +2326,44 @@ session_config_fn (vlib_main_t * vm, unformat_input_t * input)
else if (unformat (input, "local-endpoints-table-buckets %d",
&smm->local_endpoints_table_buckets))
;
- /* Deprecated but maintained for compatibility */
- else if (unformat (input, "evt_qs_memfd_seg"))
- ;
- else if (unformat (input, "evt_qs_seg_size %U", unformat_memory_size,
- &smm->evt_qs_segment_size))
- ;
+ else if (unformat (input, "min-src-port %d", &tmp))
+ smm->port_allocator_min_src_port = tmp;
+ else if (unformat (input, "max-src-port %d", &tmp))
+ smm->port_allocator_max_src_port = tmp;
else if (unformat (input, "enable"))
smm->session_enable_asap = 1;
- else if (unformat (input, "segment-baseva 0x%lx", &smm->session_baseva))
- ;
else if (unformat (input, "use-app-socket-api"))
- appns_sapi_enable ();
+ (void) appns_sapi_enable_disable (1 /* is_enable */);
else if (unformat (input, "poll-main"))
smm->poll_main = 1;
else if (unformat (input, "use-private-rx-mqs"))
smm->use_private_rx_mqs = 1;
else if (unformat (input, "no-adaptive"))
smm->no_adaptive = 1;
+ else if (unformat (input, "use-dma"))
+ smm->dma_enabled = 1;
+ else if (unformat (input, "nat44-original-dst-enable"))
+ {
+ smm->original_dst_lookup = vlib_get_plugin_symbol (
+ "nat_plugin.so", "nat44_original_dst_lookup");
+ }
+ /*
+ * Deprecated but maintained for compatibility
+ */
+ else if (unformat (input, "evt_qs_memfd_seg"))
+ ;
+ else if (unformat (input, "segment-baseva 0x%lx", &tmp))
+ ;
+ else if (unformat (input, "evt_qs_seg_size %U", unformat_memory_size,
+ &smm->wrk_mqs_segment_size))
+ ;
+ else if (unformat (input, "event-queue-length %d", &nitems))
+ {
+ if (nitems >= 2048)
+ smm->configured_wrk_mq_length = nitems;
+ else
+ clib_warning ("event queue length %d too small, ignored", nitems);
+ }
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h
index 2d01eb6a67a..a5604bf8725 100644
--- a/src/vnet/session/session.h
+++ b/src/vnet/session/session.h
@@ -21,23 +21,12 @@
#include <vnet/session/session_debug.h>
#include <svm/message_queue.h>
#include <svm/fifo_segment.h>
+#include <vlib/dma/dma.h>
-#define foreach_session_input_error \
-_(NO_SESSION, "No session drops") \
-_(NO_LISTENER, "No listener for dst port drops") \
-_(ENQUEUED, "Packets pushed into rx fifo") \
-_(NOT_READY, "Session not ready packets") \
-_(FIFO_FULL, "Packets dropped for lack of rx fifo space") \
-_(EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") \
-_(API_QUEUE_FULL, "Sessions not created for lack of API queue space") \
-
-typedef enum
+typedef struct session_wrk_stats_
{
-#define _(sym,str) SESSION_ERROR_##sym,
- foreach_session_input_error
-#undef _
- SESSION_N_ERROR,
-} session_input_error_t;
+ u32 errors[SESSION_N_ERRORS];
+} session_wrk_stats_t;
typedef struct session_tx_context_
{
@@ -59,6 +48,7 @@ typedef struct session_tx_context_
/** Vector of tx buffer free lists */
u32 *tx_buffers;
+ vlib_buffer_t **transport_pending_bufs;
} session_tx_context_t;
typedef struct session_evt_elt
@@ -84,6 +74,13 @@ typedef enum session_wrk_flags_
SESSION_WRK_F_ADAPTIVE = 1 << 0,
} __clib_packed session_wrk_flag_t;
+#define DMA_TRANS_SIZE 1024
+typedef struct
+{
+ u32 *pending_tx_buffers;
+ u16 *pending_tx_nexts;
+} session_dma_transfer;
+
typedef struct session_worker_
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
@@ -103,8 +100,8 @@ typedef struct session_worker_
/** Convenience pointer to this worker's vlib_main */
vlib_main_t *vm;
- /** Per-proto vector of sessions to enqueue */
- u32 **session_to_enqueue;
+ /** Per-proto vector of session handles to enqueue */
+ session_handle_t **session_to_enqueue;
/** Timerfd used to periodically signal wrk session queue node */
int timerfd;
@@ -133,9 +130,6 @@ typedef struct session_worker_
/** Head of list of pending events */
clib_llist_index_t old_head;
- /** Peekers rw lock */
- clib_rwlock_t peekers_rw_locks;
-
/** Vector of buffers to be sent */
u32 *pending_tx_buffers;
@@ -151,8 +145,22 @@ typedef struct session_worker_
/** Flag that is set if main thread signaled to handle connects */
u32 n_pending_connects;
- /** Main thread loops in poll mode without a connect */
- u32 no_connect_loops;
+ /** List head for first worker evts pending handling on main */
+ clib_llist_index_t evts_pending_main;
+
+ /** Per-app-worker bitmap of pending notifications */
+ uword *app_wrks_pending_ntf;
+
+ int config_index;
+ u8 dma_enabled;
+ session_dma_transfer *dma_trans;
+ u16 trans_head;
+ u16 trans_tail;
+ u16 trans_size;
+ u16 batch_num;
+ vlib_dma_batch_t *batch;
+
+ session_wrk_stats_t stats;
#if SESSION_DEBUG
/** last event poll time by thread */
@@ -170,13 +178,22 @@ extern session_fifo_rx_fn session_tx_fifo_dequeue_internal;
u8 session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e);
+typedef void (*session_update_time_fn) (f64 time_now, u8 thread_index);
+typedef void (*nat44_original_dst_lookup_fn) (
+ ip4_address_t *i2o_src, u16 i2o_src_port, ip4_address_t *i2o_dst,
+ u16 i2o_dst_port, ip_protocol_t proto, u32 *original_dst,
+ u16 *original_dst_port);
+
typedef struct session_main_
{
/** Worker contexts */
session_worker_t *wrk;
+ /** Vector of transport update time functions */
+ session_update_time_fn *update_time_fns;
+
/** Event queues memfd segment */
- fifo_segment_t evt_qs_segment;
+ fifo_segment_t wrk_mqs_segment;
/** Unique segment name counter */
u32 unique_segment_name_counter;
@@ -189,11 +206,22 @@ typedef struct session_main_
* Trade memory for speed, for now */
u32 *session_type_to_next;
- /** Thread for cl and ho that rely on cl allocs */
+ /** Thread used for allocating active open connections, i.e., half-opens
+ * for transports like tcp, and sessions that will be migrated for cl
+ * transports like udp. If vpp has workers, this will be first worker. */
u32 transport_cl_thread;
transport_proto_t last_transport_proto_type;
+ /** Number of workers at pool realloc barrier */
+ volatile u32 pool_realloc_at_barrier;
+
+ /** Number of workers doing reallocs */
+ volatile u32 pool_realloc_doing_work;
+
+ /** Lock to synchronize parallel forced reallocs */
+ clib_spinlock_t pool_realloc_lock;
+
/*
* Config parameters
*/
@@ -217,12 +245,13 @@ typedef struct session_main_
u8 no_adaptive;
/** vpp fifo event queue configured length */
- u32 configured_event_queue_length;
+ u32 configured_wrk_mq_length;
/** Session ssvm segment configs*/
- uword session_baseva;
- uword session_va_space_size;
- uword evt_qs_segment_size;
+ uword wrk_mqs_segment_size;
+
+ /** Session enable dma*/
+ u8 dma_enabled;
/** Session table size parameters */
u32 configured_v4_session_table_buckets;
@@ -238,14 +267,22 @@ typedef struct session_main_
u32 local_endpoints_table_memory;
u32 local_endpoints_table_buckets;
+ /** Transport source port allocation range */
+ u16 port_allocator_min_src_port;
+ u16 port_allocator_max_src_port;
+
/** Preallocate session config parameter */
u32 preallocated_sessions;
u16 msg_id_base;
+
+ /** Query nat44-ed session to get original dst ip4 & dst port. */
+ nat44_original_dst_lookup_fn original_dst_lookup;
} session_main_t;
extern session_main_t session_main;
extern vlib_node_registration_t session_queue_node;
+extern vlib_node_registration_t session_input_node;
extern vlib_node_registration_t session_queue_process_node;
extern vlib_node_registration_t session_queue_pre_input_node;
@@ -301,7 +338,7 @@ session_evt_ctrl_data (session_worker_t * wrk, session_evt_elt_t * elt)
static inline void
session_evt_ctrl_data_free (session_worker_t * wrk, session_evt_elt_t * elt)
{
- ASSERT (elt->evt.event_type > SESSION_IO_EVT_BUILTIN_TX);
+ ASSERT (elt->evt.event_type >= SESSION_CTRL_EVT_RPC);
pool_put_index (wrk->ctrl_evts_data, elt->evt.ctrl_data_index);
}
@@ -329,7 +366,8 @@ int session_wrk_handle_mq (session_worker_t *wrk, svm_msg_q_t *mq);
session_t *session_alloc (u32 thread_index);
void session_free (session_t * s);
-void session_free_w_fifos (session_t * s);
+void session_cleanup (session_t *s);
+void session_program_cleanup (session_t *s);
void session_cleanup_half_open (session_handle_t ho_handle);
u8 session_is_valid (u32 si, u8 thread_index);
@@ -354,100 +392,53 @@ session_get_if_valid (u64 si, u32 thread_index)
}
always_inline session_t *
-session_get_from_handle (session_handle_t handle)
+session_get_from_handle (session_handle_tu_t handle)
{
session_main_t *smm = &session_main;
- u32 session_index, thread_index;
- session_parse_handle (handle, &session_index, &thread_index);
- return pool_elt_at_index (smm->wrk[thread_index].sessions, session_index);
+ return pool_elt_at_index (smm->wrk[handle.thread_index].sessions,
+ handle.session_index);
}
always_inline session_t *
-session_get_from_handle_if_valid (session_handle_t handle)
+session_get_from_handle_if_valid (session_handle_tu_t handle)
{
- u32 session_index, thread_index;
- session_parse_handle (handle, &session_index, &thread_index);
- return session_get_if_valid (session_index, thread_index);
+ return session_get_if_valid (handle.session_index, handle.thread_index);
}
-u64 session_segment_handle (session_t * s);
-
/**
- * Acquires a lock that blocks a session pool from expanding.
+ * Get session from handle and avoid pool validation if no same thread
*
- * This is typically used for safely peeking into other threads'
- * pools in order to clone elements. Lock should be dropped as soon
- * as possible by calling @ref session_pool_remove_peeker.
- *
- * NOTE: Avoid using pool_elt_at_index while the lock is held because
- * it may lead to free elt bitmap expansion/contraction!
- */
-always_inline void
-session_pool_add_peeker (u32 thread_index)
-{
- session_worker_t *wrk = &session_main.wrk[thread_index];
- if (thread_index == vlib_get_thread_index ())
- return;
- clib_rwlock_reader_lock (&wrk->peekers_rw_locks);
-}
-
-always_inline void
-session_pool_remove_peeker (u32 thread_index)
-{
- session_worker_t *wrk = &session_main.wrk[thread_index];
- if (thread_index == vlib_get_thread_index ())
- return;
- clib_rwlock_reader_unlock (&wrk->peekers_rw_locks);
-}
-
-/**
- * Get session from handle and 'lock' pool resize if not in same thread
- *
- * Caller should drop the peek 'lock' as soon as possible.
+ * Peekers are fine because pool grows with barrier (see @ref session_alloc)
*/
always_inline session_t *
-session_get_from_handle_safe (u64 handle)
+session_get_from_handle_safe (session_handle_tu_t handle)
{
- u32 thread_index = session_thread_from_handle (handle);
- session_worker_t *wrk = &session_main.wrk[thread_index];
+ session_worker_t *wrk = &session_main.wrk[handle.thread_index];
- if (thread_index == vlib_get_thread_index ())
+ if (handle.thread_index == vlib_get_thread_index ())
{
- return pool_elt_at_index (wrk->sessions,
- session_index_from_handle (handle));
+ return pool_elt_at_index (wrk->sessions, handle.session_index);
}
else
{
- session_pool_add_peeker (thread_index);
- /* Don't use pool_elt_at index. See @ref session_pool_add_peeker */
- return wrk->sessions + session_index_from_handle (handle);
+ /* Don't use pool_elt_at index to avoid pool bitmap reallocs */
+ return wrk->sessions + handle.session_index;
}
}
-always_inline u32
-session_get_index (session_t * s)
-{
- return (s - session_main.wrk[s->thread_index].sessions);
-}
-
always_inline session_t *
session_clone_safe (u32 session_index, u32 thread_index)
{
+ u32 current_thread_index = vlib_get_thread_index (), new_index;
session_t *old_s, *new_s;
- u32 current_thread_index = vlib_get_thread_index ();
- /* If during the memcpy pool is reallocated AND the memory allocator
- * decides to give the old chunk of memory to somebody in a hurry to
- * scribble something on it, we have a problem. So add this thread as
- * a session pool peeker.
- */
- session_pool_add_peeker (thread_index);
new_s = session_alloc (current_thread_index);
+ new_index = new_s->session_index;
+ /* Session pools are reallocated with barrier (see @ref session_alloc) */
old_s = session_main.wrk[thread_index].sessions + session_index;
clib_memcpy_fast (new_s, old_s, sizeof (*new_s));
- session_pool_remove_peeker (thread_index);
new_s->thread_index = current_thread_index;
- new_s->session_index = session_get_index (new_s);
+ new_s->session_index = new_index;
return new_s;
}
@@ -457,16 +448,19 @@ int session_stop_listen (session_t * s);
void session_half_close (session_t *s);
void session_close (session_t * s);
void session_reset (session_t * s);
+void session_detach_app (session_t *s);
void session_transport_half_close (session_t *s);
void session_transport_close (session_t * s);
void session_transport_reset (session_t * s);
void session_transport_cleanup (session_t * s);
-int session_send_io_evt_to_thread (svm_fifo_t * f,
- session_evt_type_t evt_type);
-int session_enqueue_notify (session_t * s);
+int session_enqueue_notify (session_t *s);
int session_dequeue_notify (session_t * s);
+int session_enqueue_notify_cl (session_t *s);
+int session_send_io_evt_to_thread (svm_fifo_t *f, session_evt_type_t evt_type);
int session_send_io_evt_to_thread_custom (void *data, u32 thread_index,
session_evt_type_t evt_type);
+int session_program_tx_io_evt (session_handle_tu_t sh,
+ session_evt_type_t evt_type);
void session_send_rpc_evt_to_thread (u32 thread_index, void *fp,
void *rpc_args);
void session_send_rpc_evt_to_thread_force (u32 thread_index, void *fp,
@@ -479,6 +473,7 @@ void session_get_endpoint (session_t * s, transport_endpoint_t * tep,
u8 is_lcl);
int session_transport_attribute (session_t *s, u8 is_get,
transport_endpt_attr_t *attr);
+u64 session_segment_handle (session_t *s);
u8 *format_session (u8 * s, va_list * args);
uword unformat_session (unformat_input_t * input, va_list * args);
@@ -496,6 +491,13 @@ int session_enqueue_dgram_connection (session_t * s,
session_dgram_hdr_t * hdr,
vlib_buffer_t * b, u8 proto,
u8 queue_event);
+int session_enqueue_dgram_connection2 (session_t *s, session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event);
+int session_enqueue_dgram_connection_cl (session_t *s,
+ session_dgram_hdr_t *hdr,
+ vlib_buffer_t *b, u8 proto,
+ u8 queue_event);
int session_stream_connect_notify (transport_connection_t * tc,
session_error_t err);
int session_dgram_connect_notify (transport_connection_t * tc,
@@ -513,6 +515,7 @@ int session_stream_accept (transport_connection_t * tc, u32 listener_index,
u32 thread_index, u8 notify);
int session_dgram_accept (transport_connection_t * tc, u32 listener_index,
u32 thread_index);
+
/**
* Initialize session layer for given transport proto and ip version
*
@@ -529,10 +532,18 @@ void session_register_transport (transport_proto_t transport_proto,
const transport_proto_vft_t * vft, u8 is_ip4,
u32 output_node);
transport_proto_t session_add_transport_proto (void);
+void session_register_update_time_fn (session_update_time_fn fn, u8 is_add);
int session_tx_fifo_peek_bytes (transport_connection_t * tc, u8 * buffer,
u32 offset, u32 max_bytes);
u32 session_tx_fifo_dequeue_drop (transport_connection_t * tc, u32 max_bytes);
+always_inline void
+session_set_state (session_t *s, session_state_t session_state)
+{
+ s->session_state = session_state;
+ SESSION_EVT (SESSION_EVT_STATE_CHANGE, s);
+}
+
always_inline u32
transport_max_rx_enqueue (transport_connection_t * tc)
{
@@ -575,6 +586,19 @@ transport_rx_fifo_has_ooo_data (transport_connection_t * tc)
return svm_fifo_has_ooo_data (s->rx_fifo);
}
+always_inline u32
+transport_tx_fifo_has_dgram (transport_connection_t *tc)
+{
+ session_t *s = session_get (tc->s_index, tc->thread_index);
+ u32 max_deq = svm_fifo_max_dequeue_cons (s->tx_fifo);
+ session_dgram_pre_hdr_t phdr;
+
+ if (max_deq <= sizeof (session_dgram_hdr_t))
+ return 0;
+ svm_fifo_peek (s->tx_fifo, 0, sizeof (phdr), (u8 *) &phdr);
+ return max_deq >= phdr.data_length + sizeof (session_dgram_hdr_t);
+}
+
always_inline void
transport_rx_fifo_req_deq_ntf (transport_connection_t *tc)
{
@@ -615,12 +639,19 @@ transport_cl_thread (void)
return session_main.transport_cl_thread;
}
+always_inline u32
+session_vlib_thread_is_cl_thread (void)
+{
+ return (vlib_get_thread_index () == transport_cl_thread () ||
+ vlib_thread_is_main_w_barrier ());
+}
+
/*
* Listen sessions
*/
-always_inline u64
-listen_session_get_handle (session_t * s)
+always_inline session_handle_t
+listen_session_get_handle (session_t *s)
{
ASSERT (s->session_state == SESSION_STATE_LISTENING ||
session_get_transport_proto (s) == TRANSPORT_PROTO_QUIC);
@@ -667,8 +698,8 @@ always_inline session_t *
ho_session_alloc (void)
{
session_t *s;
- ASSERT (vlib_get_thread_index () == 0);
- s = session_alloc (0);
+ ASSERT (session_vlib_thread_is_cl_thread ());
+ s = session_alloc (transport_cl_thread ());
s->session_state = SESSION_STATE_CONNECTING;
s->flags |= SESSION_F_HALF_OPEN;
return s;
@@ -677,7 +708,7 @@ ho_session_alloc (void)
always_inline session_t *
ho_session_get (u32 ho_index)
{
- return session_get (ho_index, 0 /* half-open thread */);
+ return session_get (ho_index, transport_cl_thread ());
}
always_inline void
@@ -702,7 +733,7 @@ vnet_get_session_main ()
always_inline session_worker_t *
session_main_get_worker (u32 thread_index)
{
- return &session_main.wrk[thread_index];
+ return vec_elt_at_index (session_main.wrk, thread_index);
}
static inline session_worker_t *
@@ -710,13 +741,13 @@ session_main_get_worker_if_valid (u32 thread_index)
{
if (thread_index > vec_len (session_main.wrk))
return 0;
- return &session_main.wrk[thread_index];
+ return session_main_get_worker (thread_index);
}
always_inline svm_msg_q_t *
session_main_get_vpp_event_queue (u32 thread_index)
{
- return session_main.wrk[thread_index].vpp_event_queue;
+ return session_main_get_worker (thread_index)->vpp_event_queue;
}
always_inline u8
@@ -725,14 +756,31 @@ session_main_is_enabled ()
return session_main.is_enabled == 1;
}
+always_inline void
+session_worker_stat_error_inc (session_worker_t *wrk, int error, int value)
+{
+ if ((-(error) >= 0 && -(error) < SESSION_N_ERRORS))
+ wrk->stats.errors[-error] += value;
+ else
+ SESSION_DBG ("unknown session counter");
+}
+
+always_inline void
+session_stat_error_inc (int error, int value)
+{
+ session_worker_t *wrk;
+ wrk = session_main_get_worker (vlib_get_thread_index ());
+ session_worker_stat_error_inc (wrk, error, value);
+}
+
#define session_cli_return_if_not_enabled() \
do { \
if (!session_main.is_enabled) \
return clib_error_return (0, "session layer is not enabled"); \
} while (0)
-int session_main_flush_enqueue_events (u8 proto, u32 thread_index);
-int session_main_flush_all_enqueue_events (u8 transport_proto);
+void session_main_flush_enqueue_events (transport_proto_t transport_proto,
+ u32 thread_index);
void session_queue_run_on_main_thread (vlib_main_t * vm);
/**
@@ -761,12 +809,116 @@ session_wrk_update_time (session_worker_t *wrk, f64 now)
}
void session_wrk_enable_adaptive_mode (session_worker_t *wrk);
-fifo_segment_t *session_main_get_evt_q_segment (void);
+fifo_segment_t *session_main_get_wrk_mqs_segment (void);
void session_node_enable_disable (u8 is_en);
clib_error_t *vnet_session_enable_disable (vlib_main_t * vm, u8 is_en);
+void session_wrk_handle_evts_main_rpc (void *);
+void session_wrk_program_app_wrk_evts (session_worker_t *wrk,
+ u32 app_wrk_index);
session_t *session_alloc_for_connection (transport_connection_t * tc);
session_t *session_alloc_for_half_open (transport_connection_t *tc);
+void session_get_original_dst (transport_endpoint_t *i2o_src,
+ transport_endpoint_t *i2o_dst,
+ transport_proto_t transport_proto,
+ u32 *original_dst, u16 *original_dst_port);
+
+typedef void (pool_safe_realloc_rpc_fn) (void *rpc_args);
+
+typedef struct
+{
+ u8 ph[STRUCT_OFFSET_OF (pool_header_t, max_elts) + 4];
+ u32 flag;
+} pool_safe_realloc_header_t;
+
+STATIC_ASSERT_SIZEOF (pool_safe_realloc_header_t, sizeof (pool_header_t));
+
+#define POOL_REALLOC_SAFE_ELT_THRESH 32
+
+#define pool_realloc_flag(PH) \
+ ((pool_safe_realloc_header_t *) pool_header (PH))->flag
+
+typedef struct pool_realloc_rpc_args_
+{
+ void **pool;
+ uword elt_size;
+ uword align;
+} pool_realloc_rpc_args_t;
+
+always_inline void
+pool_program_safe_realloc_rpc (void *args)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ u32 free_elts, max_elts, n_alloc;
+ pool_realloc_rpc_args_t *pra;
+
+ ASSERT (vlib_get_thread_index () == 0);
+ pra = (pool_realloc_rpc_args_t *) args;
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ free_elts = _pool_free_elts (*pra->pool, pra->elt_size);
+ if (free_elts < POOL_REALLOC_SAFE_ELT_THRESH)
+ {
+ max_elts = _vec_max_len (*pra->pool, pra->elt_size);
+ n_alloc = clib_max (2 * max_elts, POOL_REALLOC_SAFE_ELT_THRESH);
+ _pool_alloc (pra->pool, n_alloc, pra->align, 0, pra->elt_size);
+ }
+ pool_realloc_flag (*pra->pool) = 0;
+ clib_mem_free (args);
+
+ vlib_worker_thread_barrier_release (vm);
+}
+
+always_inline void
+pool_program_safe_realloc (void **p, u32 elt_size, u32 align)
+{
+ pool_realloc_rpc_args_t *pra;
+
+ /* Reuse pad as a realloc flag */
+ if (pool_realloc_flag (*p))
+ return;
+
+ pra = clib_mem_alloc (sizeof (*pra));
+ pra->pool = p;
+ pra->elt_size = elt_size;
+ pra->align = align;
+ pool_realloc_flag (*p) = 1;
+
+ session_send_rpc_evt_to_thread (0 /* thread index */,
+ pool_program_safe_realloc_rpc, pra);
+}
+
+#define pool_needs_realloc(P) \
+ ((!P) || \
+ (vec_len (pool_header (P)->free_indices) < POOL_REALLOC_SAFE_ELT_THRESH && \
+ pool_free_elts (P) < POOL_REALLOC_SAFE_ELT_THRESH))
+
+#define pool_get_aligned_safe(P, E, align) \
+ do \
+ { \
+ if (PREDICT_FALSE (pool_needs_realloc (P))) \
+ { \
+ if (PREDICT_FALSE (!(P))) \
+ { \
+ pool_alloc_aligned (P, POOL_REALLOC_SAFE_ELT_THRESH, align); \
+ } \
+ else if (PREDICT_FALSE (!pool_free_elts (P))) \
+ { \
+ vlib_workers_sync (); \
+ pool_alloc_aligned (P, pool_max_len (P), align); \
+ vlib_workers_continue (); \
+ ALWAYS_ASSERT (pool_free_elts (P) > 0); \
+ } \
+ else \
+ { \
+ pool_program_safe_realloc ((void **) &(P), sizeof ((P)[0]), \
+ _vec_align (P, align)); \
+ } \
+ } \
+ pool_get_aligned (P, E, align); \
+ } \
+ while (0)
#endif /* __included_session_h__ */
diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c
index 00e67dcd2d0..48eb932a2c9 100644
--- a/src/vnet/session/session_api.c
+++ b/src/vnet/session/session_api.c
@@ -82,40 +82,12 @@ session_send_fds (vl_api_registration_t * reg, int fds[], int n_fds)
}
static int
-mq_try_lock_and_alloc_msg (svm_msg_q_t * app_mq, svm_msg_q_msg_t * msg)
-{
- int rv;
- u8 try = 0;
- while (try < 100)
- {
- rv = svm_msg_q_lock_and_alloc_msg_w_ring (app_mq,
- SESSION_MQ_CTRL_EVT_RING,
- SVM_Q_NOWAIT, msg);
- if (!rv)
- return 0;
- /*
- * Break the loop if mq is full, usually this is because the
- * app has crashed or is hanging on somewhere.
- */
- if (rv != -1)
- break;
- try++;
- usleep (1);
- }
- clib_warning ("failed to alloc msg");
- return -1;
-}
-
-static int
mq_send_session_accepted_cb (session_t * s)
{
app_worker_t *app_wrk = app_worker_get (s->app_wrk_index);
- svm_msg_q_msg_t _msg, *msg = &_msg;
session_accepted_msg_t m = { 0 };
- svm_msg_q_t *app_mq;
fifo_segment_t *eq_seg;
session_t *listener;
- session_event_t *evt;
application_t *app;
app = application_get (app_wrk->app_index);
@@ -164,15 +136,14 @@ mq_send_session_accepted_cb (session_t * s)
m.mq_index = s->thread_index;
}
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return SESSION_E_MQ_MSG_ALLOC;
+ if (application_original_dst_is_enabled (app))
+ {
+ session_get_original_dst (&m.lcl, &m.rmt,
+ session_get_transport_proto (s),
+ &m.original_dst_ip4, &m.original_dst_port);
+ }
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_ACCEPTED;
- clib_memcpy_fast (evt->data, &m, sizeof (m));
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_ACCEPTED, &m, sizeof (m));
return 0;
}
@@ -181,21 +152,12 @@ static inline void
mq_send_session_close_evt (app_worker_t * app_wrk, session_handle_t sh,
session_evt_type_t evt_type)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_disconnected_msg_t *mp;
- svm_msg_q_t *app_mq;
- session_event_t *evt;
+ session_disconnected_msg_t m = { 0 };
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return;
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = evt_type;
- mp = (session_disconnected_msg_t *) evt->data;
- mp->handle = sh;
- mp->context = app_wrk->api_client_index;
- svm_msg_q_add_and_unlock (app_mq, msg);
+ m.handle = sh;
+ m.context = app_wrk->api_client_index;
+
+ app_wrk_send_ctrl_evt (app_wrk, evt_type, &m, sizeof (m));
}
static inline void
@@ -249,13 +211,9 @@ int
mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
session_t * s, session_error_t err)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
session_connected_msg_t m = { 0 };
- svm_msg_q_t *app_mq;
- transport_connection_t *tc;
fifo_segment_t *eq_seg;
app_worker_t *app_wrk;
- session_event_t *evt;
application_t *app;
app_wrk = app_worker_get (app_wrk_index);
@@ -271,14 +229,6 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
if (session_has_transport (s))
{
- tc = session_get_transport (s);
- if (!tc)
- {
- clib_warning ("failed to retrieve transport!");
- m.retval = SESSION_E_REFUSED;
- goto snd_msg;
- }
-
m.handle = session_handle (s);
m.vpp_event_queue_address =
fifo_segment_msg_q_offset (eq_seg, s->thread_index);
@@ -293,7 +243,6 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
else
{
ct_connection_t *cct;
- session_t *ss;
cct = (ct_connection_t *) session_get_transport (s);
m.handle = session_handle (s);
@@ -304,11 +253,10 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
m.server_rx_fifo = fifo_segment_fifo_offset (s->rx_fifo);
m.server_tx_fifo = fifo_segment_fifo_offset (s->tx_fifo);
m.segment_handle = session_segment_handle (s);
- ss = ct_session_get_peer (s);
- m.ct_rx_fifo = fifo_segment_fifo_offset (ss->tx_fifo);
- m.ct_tx_fifo = fifo_segment_fifo_offset (ss->rx_fifo);
- m.ct_segment_handle = session_segment_handle (ss);
m.mq_index = s->thread_index;
+ m.ct_rx_fifo = fifo_segment_fifo_offset (cct->client_rx_fifo);
+ m.ct_tx_fifo = fifo_segment_fifo_offset (cct->client_tx_fifo);
+ m.ct_segment_handle = cct->segment_handle;
}
/* Setup client session index in advance, in case data arrives
@@ -318,31 +266,19 @@ mq_send_session_connected_cb (u32 app_wrk_index, u32 api_context,
snd_msg:
- app_mq = app_wrk->event_queue;
-
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return SESSION_E_MQ_MSG_ALLOC;
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_CONNECTED, &m, sizeof (m));
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_CONNECTED;
- clib_memcpy_fast (evt->data, &m, sizeof (m));
-
- svm_msg_q_add_and_unlock (app_mq, msg);
return 0;
}
-int
+static int
mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context,
session_handle_t handle, int rv)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
session_bound_msg_t m = { 0 };
- svm_msg_q_t *app_mq;
- transport_endpoint_t tep;
+ transport_connection_t *ltc;
fifo_segment_t *eq_seg;
app_worker_t *app_wrk;
- session_event_t *evt;
application_t *app;
app_listener_t *al;
session_t *ls = 0;
@@ -362,77 +298,60 @@ mq_send_session_bound_cb (u32 app_wrk_index, u32 api_context,
else
ls = app_listener_get_local_session (al);
- session_get_endpoint (ls, &tep, 1 /* is_lcl */);
- m.lcl_port = tep.port;
- m.lcl_is_ip4 = tep.is_ip4;
- clib_memcpy_fast (m.lcl_ip, &tep.ip, sizeof (tep.ip));
+ ltc = session_get_transport (ls);
+ m.lcl_port = ltc->lcl_port;
+ m.lcl_is_ip4 = ltc->is_ip4;
+ clib_memcpy_fast (m.lcl_ip, &ltc->lcl_ip, sizeof (m.lcl_ip));
app = application_get (app_wrk->app_index);
eq_seg = application_get_rx_mqs_segment (app);
m.vpp_evt_q = fifo_segment_msg_q_offset (eq_seg, ls->thread_index);
m.mq_index = ls->thread_index;
- if (session_transport_service_type (ls) == TRANSPORT_SERVICE_CL &&
- ls->rx_fifo)
+ if (transport_connection_is_cless (ltc))
{
- m.rx_fifo = fifo_segment_fifo_offset (ls->rx_fifo);
- m.tx_fifo = fifo_segment_fifo_offset (ls->tx_fifo);
- m.segment_handle = session_segment_handle (ls);
+ session_t *wrk_ls;
+ m.mq_index = transport_cl_thread ();
+ m.vpp_evt_q = fifo_segment_msg_q_offset (eq_seg, m.mq_index);
+ wrk_ls = app_listener_get_wrk_cl_session (al, app_wrk->wrk_map_index);
+ m.rx_fifo = fifo_segment_fifo_offset (wrk_ls->rx_fifo);
+ m.tx_fifo = fifo_segment_fifo_offset (wrk_ls->tx_fifo);
+ m.segment_handle = session_segment_handle (wrk_ls);
}
snd_msg:
- app_mq = app_wrk->event_queue;
-
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return SESSION_E_MQ_MSG_ALLOC;
-
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_BOUND;
- clib_memcpy_fast (evt->data, &m, sizeof (m));
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_BOUND, &m, sizeof (m));
- svm_msg_q_add_and_unlock (app_mq, msg);
return 0;
}
-void
-mq_send_unlisten_reply (app_worker_t * app_wrk, session_handle_t sh,
- u32 context, int rv)
+static void
+mq_send_unlisten_cb (u32 app_wrk_index, session_handle_t sh, u32 context,
+ int rv)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_unlisten_reply_msg_t *ump;
- svm_msg_q_t *app_mq;
- session_event_t *evt;
+ session_unlisten_reply_msg_t m = { 0 };
+ app_worker_t *app_wrk;
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return;
+ app_wrk = app_worker_get (app_wrk_index);
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_UNLISTEN_REPLY;
- ump = (session_unlisten_reply_msg_t *) evt->data;
- ump->context = context;
- ump->handle = sh;
- ump->retval = rv;
- svm_msg_q_add_and_unlock (app_mq, msg);
+ m.context = context;
+ m.handle = sh;
+ m.retval = rv;
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_UNLISTEN_REPLY, &m,
+ sizeof (m));
}
static void
mq_send_session_migrate_cb (session_t * s, session_handle_t new_sh)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
session_migrated_msg_t m = { 0 };
fifo_segment_t *eq_seg;
app_worker_t *app_wrk;
- session_event_t *evt;
- svm_msg_q_t *app_mq;
application_t *app;
u32 thread_index;
thread_index = session_thread_from_handle (new_sh);
app_wrk = app_worker_get (s->app_wrk_index);
- app_mq = app_wrk->event_queue;
app = application_get (app_wrk->app_index);
eq_seg = application_get_rx_mqs_segment (app);
@@ -442,27 +361,15 @@ mq_send_session_migrate_cb (session_t * s, session_handle_t new_sh)
m.vpp_evt_q = fifo_segment_msg_q_offset (eq_seg, thread_index);
m.segment_handle = SESSION_INVALID_HANDLE;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return;
-
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_MIGRATED;
- clib_memcpy_fast (evt->data, &m, sizeof (m));
-
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_MIGRATED, &m, sizeof (m));
}
static int
mq_send_add_segment_cb (u32 app_wrk_index, u64 segment_handle)
{
- int fds[SESSION_N_FD_TYPE], n_fds = 0;
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_app_add_segment_msg_t *mp;
+ session_app_add_segment_msg_t m = { 0 };
vl_api_registration_t *reg;
app_worker_t *app_wrk;
- session_event_t *evt;
- svm_msg_q_t *app_mq;
fifo_segment_t *fs;
ssvm_private_t *sp;
u8 fd_flags = 0;
@@ -488,29 +395,16 @@ mq_send_add_segment_cb (u32 app_wrk_index, u64 segment_handle)
}
fd_flags |= SESSION_FD_F_MEMFD_SEGMENT;
- fds[n_fds] = sp->fd;
- n_fds += 1;
}
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return -1;
-
- if (n_fds)
- session_send_fds (reg, fds, n_fds);
-
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_APP_ADD_SEGMENT;
- mp = (session_app_add_segment_msg_t *) evt->data;
- clib_memset (mp, 0, sizeof (*mp));
- mp->segment_size = sp->ssvm_size;
- mp->fd_flags = fd_flags;
- mp->segment_handle = segment_handle;
- strncpy ((char *) mp->segment_name, (char *) sp->name,
- sizeof (mp->segment_name) - 1);
+ m.segment_size = sp->ssvm_size;
+ m.fd_flags = fd_flags;
+ m.segment_handle = segment_handle;
+ strncpy ((char *) m.segment_name, (char *) sp->name,
+ sizeof (m.segment_name) - 1);
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt_fd (app_wrk, SESSION_CTRL_EVT_APP_ADD_SEGMENT, &m,
+ sizeof (m), sp->fd);
return 0;
}
@@ -518,12 +412,9 @@ mq_send_add_segment_cb (u32 app_wrk_index, u64 segment_handle)
static int
mq_send_del_segment_cb (u32 app_wrk_index, u64 segment_handle)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_app_del_segment_msg_t *mp;
+ session_app_del_segment_msg_t m = { 0 };
vl_api_registration_t *reg;
app_worker_t *app_wrk;
- session_event_t *evt;
- svm_msg_q_t *app_mq;
app_wrk = app_worker_get (app_wrk_index);
reg = vl_mem_api_client_index_to_registration (app_wrk->api_client_index);
@@ -533,17 +424,10 @@ mq_send_del_segment_cb (u32 app_wrk_index, u64 segment_handle)
return -1;
}
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return -1;
+ m.segment_handle = segment_handle;
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_APP_DEL_SEGMENT;
- mp = (session_app_del_segment_msg_t *) evt->data;
- clib_memset (mp, 0, sizeof (*mp));
- mp->segment_handle = segment_handle;
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_APP_DEL_SEGMENT, &m,
+ sizeof (m));
return 0;
}
@@ -551,10 +435,7 @@ mq_send_del_segment_cb (u32 app_wrk_index, u64 segment_handle)
static void
mq_send_session_cleanup_cb (session_t * s, session_cleanup_ntf_t ntf)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_cleanup_msg_t *mp;
- svm_msg_q_t *app_mq;
- session_event_t *evt;
+ session_cleanup_msg_t m = { 0 };
app_worker_t *app_wrk;
/* Propagate transport cleanup notifications only if app didn't close */
@@ -566,17 +447,56 @@ mq_send_session_cleanup_cb (session_t * s, session_cleanup_ntf_t ntf)
if (!app_wrk)
return;
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return;
+ m.handle = session_handle (s);
+ m.type = ntf;
+
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_CLEANUP, &m, sizeof (m));
+}
+
+static int
+mq_send_io_rx_event (session_t *s)
+{
+ session_event_t *mq_evt;
+ svm_msg_q_msg_t mq_msg;
+ app_worker_t *app_wrk;
+ svm_msg_q_t *mq;
+
+ if (svm_fifo_has_event (s->rx_fifo))
+ return 0;
+
+ app_wrk = app_worker_get (s->app_wrk_index);
+ mq = app_wrk->event_queue;
+
+ mq_msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
+ mq_evt = svm_msg_q_msg_data (mq, &mq_msg);
+
+ mq_evt->event_type = SESSION_IO_EVT_RX;
+ mq_evt->session_index = s->rx_fifo->shr->client_session_index;
+
+ (void) svm_fifo_set_event (s->rx_fifo);
+
+ svm_msg_q_add_raw (mq, &mq_msg);
+
+ return 0;
+}
+
+static int
+mq_send_io_tx_event (session_t *s)
+{
+ app_worker_t *app_wrk = app_worker_get (s->app_wrk_index);
+ svm_msg_q_t *mq = app_wrk->event_queue;
+ session_event_t *mq_evt;
+ svm_msg_q_msg_t mq_msg;
+
+ mq_msg = svm_msg_q_alloc_msg_w_ring (mq, SESSION_MQ_IO_EVT_RING);
+ mq_evt = svm_msg_q_msg_data (mq, &mq_msg);
+
+ mq_evt->event_type = SESSION_IO_EVT_TX;
+ mq_evt->session_index = s->tx_fifo->shr->client_session_index;
+
+ svm_msg_q_add_raw (mq, &mq_msg);
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_CLEANUP;
- mp = (session_cleanup_msg_t *) evt->data;
- mp->handle = session_handle (s);
- mp->type = ntf;
- svm_msg_q_add_and_unlock (app_mq, msg);
+ return 0;
}
static session_cb_vft_t session_mq_cb_vft = {
@@ -586,8 +506,12 @@ static session_cb_vft_t session_mq_cb_vft = {
.session_reset_callback = mq_send_session_reset_cb,
.session_migrate_callback = mq_send_session_migrate_cb,
.session_cleanup_callback = mq_send_session_cleanup_cb,
+ .session_listened_callback = mq_send_session_bound_cb,
+ .session_unlistened_callback = mq_send_unlisten_cb,
.add_segment_callback = mq_send_add_segment_cb,
.del_segment_callback = mq_send_del_segment_cb,
+ .builtin_app_rx_callback = mq_send_io_rx_event,
+ .builtin_app_tx_callback = mq_send_io_tx_event,
};
static void
@@ -602,6 +526,17 @@ vl_api_session_enable_disable_t_handler (vl_api_session_enable_disable_t * mp)
}
static void
+vl_api_session_sapi_enable_disable_t_handler (
+ vl_api_session_sapi_enable_disable_t *mp)
+{
+ vl_api_session_sapi_enable_disable_reply_t *rmp;
+ int rv = 0;
+
+ rv = appns_sapi_enable_disable (mp->is_enable);
+ REPLY_MACRO (VL_API_SESSION_SAPI_ENABLE_DISABLE_REPLY);
+}
+
+static void
vl_api_app_attach_t_handler (vl_api_app_attach_t * mp)
{
int rv = 0, *fds = 0, n_fds = 0, n_workers, i;
@@ -642,7 +577,8 @@ vl_api_app_attach_t_handler (vl_api_app_attach_t * mp)
if ((rv = vnet_application_attach (a)))
{
- clib_warning ("attach returned: %d", rv);
+ clib_warning ("attach returned: %U", format_session_error, rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
vec_free (a->namespace_id);
goto done;
}
@@ -684,27 +620,28 @@ vl_api_app_attach_t_handler (vl_api_app_attach_t * mp)
}
done:
- /* *INDENT-OFF* */
- REPLY_MACRO2 (VL_API_APP_ATTACH_REPLY, ({
- if (!rv)
- {
- ctrl_thread = n_workers ? 1 : 0;
- segp = (fifo_segment_t *) a->segment;
- rmp->app_index = clib_host_to_net_u32 (a->app_index);
- rmp->app_mq = fifo_segment_msg_q_offset (segp, 0);
- rmp->vpp_ctrl_mq = fifo_segment_msg_q_offset (rx_mqs_seg, ctrl_thread);
- rmp->vpp_ctrl_mq_thread = ctrl_thread;
- rmp->n_fds = n_fds;
- rmp->fd_flags = fd_flags;
- if (vec_len (segp->ssvm.name))
- {
- vl_api_vec_to_api_string (segp->ssvm.name, &rmp->segment_name);
- }
- rmp->segment_size = segp->ssvm.ssvm_size;
- rmp->segment_handle = clib_host_to_net_u64 (a->segment_handle);
- }
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO3 (
+ VL_API_APP_ATTACH_REPLY,
+ ((!rv) ? vec_len (((fifo_segment_t *) a->segment)->ssvm.name) : 0), ({
+ if (!rv)
+ {
+ ctrl_thread = n_workers ? 1 : 0;
+ segp = (fifo_segment_t *) a->segment;
+ rmp->app_index = clib_host_to_net_u32 (a->app_index);
+ rmp->app_mq = fifo_segment_msg_q_offset (segp, 0);
+ rmp->vpp_ctrl_mq =
+ fifo_segment_msg_q_offset (rx_mqs_seg, ctrl_thread);
+ rmp->vpp_ctrl_mq_thread = ctrl_thread;
+ rmp->n_fds = n_fds;
+ rmp->fd_flags = fd_flags;
+ if (vec_len (segp->ssvm.name))
+ {
+ vl_api_vec_to_api_string (segp->ssvm.name, &rmp->segment_name);
+ }
+ rmp->segment_size = segp->ssvm.ssvm_size;
+ rmp->segment_handle = clib_host_to_net_u64 (a->segment_handle);
+ }
+ }));
if (n_fds)
session_send_fds (reg, fds, n_fds);
@@ -746,7 +683,9 @@ vl_api_app_worker_add_del_t_handler (vl_api_app_worker_add_del_t * mp)
rv = vnet_app_worker_add_del (&args);
if (rv)
{
- clib_warning ("app worker add/del returned: %d", rv);
+ clib_warning ("app worker add/del returned: %U", format_session_error,
+ rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
goto done;
}
@@ -767,25 +706,27 @@ vl_api_app_worker_add_del_t_handler (vl_api_app_worker_add_del_t * mp)
n_fds += 1;
}
- /* *INDENT-OFF* */
done:
- REPLY_MACRO2 (VL_API_APP_WORKER_ADD_DEL_REPLY, ({
- rmp->is_add = mp->is_add;
- rmp->wrk_index = clib_host_to_net_u32 (args.wrk_map_index);
- rmp->segment_handle = clib_host_to_net_u64 (args.segment_handle);
- if (!rv && mp->is_add)
- {
- rmp->app_event_queue_address =
- fifo_segment_msg_q_offset ((fifo_segment_t *) args.segment, 0);
- rmp->n_fds = n_fds;
- rmp->fd_flags = fd_flags;
- if (vec_len (args.segment->name))
- {
- vl_api_vec_to_api_string (args.segment->name, &rmp->segment_name);
- }
- }
- }));
- /* *INDENT-ON* */
+ REPLY_MACRO3 (
+ VL_API_APP_WORKER_ADD_DEL_REPLY,
+ ((!rv && mp->is_add) ? vec_len (args.segment->name) : 0), ({
+ rmp->is_add = mp->is_add;
+ rmp->wrk_index = mp->wrk_index;
+ if (!rv && mp->is_add)
+ {
+ rmp->wrk_index = clib_host_to_net_u32 (args.wrk_map_index);
+ rmp->segment_handle = clib_host_to_net_u64 (args.segment_handle);
+ rmp->app_event_queue_address =
+ fifo_segment_msg_q_offset ((fifo_segment_t *) args.segment, 0);
+ rmp->n_fds = n_fds;
+ rmp->fd_flags = fd_flags;
+ if (vec_len (args.segment->name))
+ {
+ vl_api_vec_to_api_string (args.segment->name,
+ &rmp->segment_name);
+ }
+ }
+ }));
if (n_fds)
session_send_fds (reg, fds, n_fds);
@@ -811,6 +752,12 @@ vl_api_application_detach_t_handler (vl_api_application_detach_t * mp)
a->app_index = app->app_index;
a->api_client_index = mp->client_index;
rv = vnet_application_detach (a);
+ if (rv)
+ {
+ clib_warning ("vnet_application_detach: %U", format_session_error,
+ rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
}
done:
@@ -834,6 +781,7 @@ vl_api_app_namespace_add_del_t_handler (vl_api_app_namespace_add_del_t * mp)
vnet_app_namespace_add_del_args_t args = {
.ns_id = ns_id,
+ .sock_name = 0,
.secret = clib_net_to_host_u64 (mp->secret),
.sw_if_index = clib_net_to_host_u32 (mp->sw_if_index),
.ip4_fib_id = clib_net_to_host_u32 (mp->ip4_fib_id),
@@ -852,13 +800,11 @@ vl_api_app_namespace_add_del_t_handler (vl_api_app_namespace_add_del_t * mp)
}
vec_free (ns_id);
- /* *INDENT-OFF* */
done:
REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_REPLY, ({
if (!rv)
rmp->appns_index = clib_host_to_net_u32 (appns_index);
}));
- /* *INDENT-ON* */
}
static void
@@ -866,7 +812,7 @@ vl_api_app_namespace_add_del_v2_t_handler (
vl_api_app_namespace_add_del_v2_t *mp)
{
vl_api_app_namespace_add_del_v2_reply_t *rmp;
- u8 *ns_id = 0, *netns = 0;
+ u8 *ns_id = 0;
u32 appns_index = 0;
int rv = 0;
@@ -877,13 +823,11 @@ vl_api_app_namespace_add_del_v2_t_handler (
}
mp->namespace_id[sizeof (mp->namespace_id) - 1] = 0;
- mp->netns[sizeof (mp->netns) - 1] = 0;
ns_id = format (0, "%s", &mp->namespace_id);
- netns = format (0, "%s", &mp->netns);
vnet_app_namespace_add_del_args_t args = {
.ns_id = ns_id,
- .netns = netns,
+ .sock_name = 0,
.secret = clib_net_to_host_u64 (mp->secret),
.sw_if_index = clib_net_to_host_u32 (mp->sw_if_index),
.ip4_fib_id = clib_net_to_host_u32 (mp->ip4_fib_id),
@@ -896,12 +840,11 @@ vl_api_app_namespace_add_del_v2_t_handler (
appns_index = app_namespace_index_from_id (ns_id);
if (appns_index == APP_NAMESPACE_INVALID_INDEX)
{
- clib_warning ("app ns lookup failed");
+ clib_warning ("app ns lookup failed id:%s", ns_id);
rv = VNET_API_ERROR_UNSPECIFIED;
}
}
vec_free (ns_id);
- vec_free (netns);
done:
REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_V2_REPLY, ({
@@ -911,6 +854,107 @@ done:
}
static void
+vl_api_app_namespace_add_del_v4_t_handler (
+ vl_api_app_namespace_add_del_v4_t *mp)
+{
+ vl_api_app_namespace_add_del_v4_reply_t *rmp;
+ u8 *ns_id = 0, *sock_name = 0;
+ u32 appns_index = 0;
+ int rv = 0;
+ if (session_main_is_enabled () == 0)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto done;
+ }
+ mp->namespace_id[sizeof (mp->namespace_id) - 1] = 0;
+ ns_id = format (0, "%s", &mp->namespace_id);
+ sock_name = vl_api_from_api_to_new_vec (mp, &mp->sock_name);
+ vnet_app_namespace_add_del_args_t args = {
+ .ns_id = ns_id,
+ .sock_name = sock_name,
+ .secret = clib_net_to_host_u64 (mp->secret),
+ .sw_if_index = clib_net_to_host_u32 (mp->sw_if_index),
+ .ip4_fib_id = clib_net_to_host_u32 (mp->ip4_fib_id),
+ .ip6_fib_id = clib_net_to_host_u32 (mp->ip6_fib_id),
+ .is_add = mp->is_add,
+ };
+ rv = vnet_app_namespace_add_del (&args);
+ if (!rv && mp->is_add)
+ {
+ appns_index = app_namespace_index_from_id (ns_id);
+ if (appns_index == APP_NAMESPACE_INVALID_INDEX)
+ {
+ clib_warning ("app ns lookup failed id:%s", ns_id);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
+ }
+ vec_free (ns_id);
+ vec_free (sock_name);
+done:
+ REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_V4_REPLY, ({
+ if (!rv)
+ rmp->appns_index = clib_host_to_net_u32 (appns_index);
+ }));
+}
+
+static void
+vl_api_app_namespace_add_del_v3_t_handler (
+ vl_api_app_namespace_add_del_v3_t *mp)
+{
+ vl_api_app_namespace_add_del_v3_reply_t *rmp;
+ u8 *ns_id = 0, *sock_name = 0, *api_sock_name = 0;
+ u32 appns_index = 0;
+ int rv = 0;
+ if (session_main_is_enabled () == 0)
+ {
+ rv = VNET_API_ERROR_FEATURE_DISABLED;
+ goto done;
+ }
+ mp->namespace_id[sizeof (mp->namespace_id) - 1] = 0;
+ ns_id = format (0, "%s", &mp->namespace_id);
+ api_sock_name = vl_api_from_api_to_new_vec (mp, &mp->sock_name);
+ mp->netns[sizeof (mp->netns) - 1] = 0;
+ if (strlen ((char *) mp->netns) != 0)
+ {
+ sock_name =
+ format (0, "abstract:%v,netns_name=%s", api_sock_name, &mp->netns);
+ }
+ else
+ {
+ sock_name = api_sock_name;
+ api_sock_name = 0; // for vec_free
+ }
+
+ vnet_app_namespace_add_del_args_t args = {
+ .ns_id = ns_id,
+ .sock_name = sock_name,
+ .secret = clib_net_to_host_u64 (mp->secret),
+ .sw_if_index = clib_net_to_host_u32 (mp->sw_if_index),
+ .ip4_fib_id = clib_net_to_host_u32 (mp->ip4_fib_id),
+ .ip6_fib_id = clib_net_to_host_u32 (mp->ip6_fib_id),
+ .is_add = mp->is_add,
+ };
+ rv = vnet_app_namespace_add_del (&args);
+ if (!rv && mp->is_add)
+ {
+ appns_index = app_namespace_index_from_id (ns_id);
+ if (appns_index == APP_NAMESPACE_INVALID_INDEX)
+ {
+ clib_warning ("app ns lookup failed id:%s", ns_id);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
+ }
+ vec_free (ns_id);
+ vec_free (sock_name);
+ vec_free (api_sock_name);
+done:
+ REPLY_MACRO2 (VL_API_APP_NAMESPACE_ADD_DEL_V3_REPLY, ({
+ if (!rv)
+ rmp->appns_index = clib_host_to_net_u32 (appns_index);
+ }));
+}
+
+static void
vl_api_session_rule_add_del_t_handler (vl_api_session_rule_add_del_t * mp)
{
vl_api_session_rule_add_del_reply_t *rmp;
@@ -937,7 +981,10 @@ vl_api_session_rule_add_del_t_handler (vl_api_session_rule_add_del_t * mp)
rv = vnet_session_rule_add_del (&args);
if (rv)
- clib_warning ("rule add del returned: %d", rv);
+ {
+ clib_warning ("rule add del returned: %U", format_session_error, rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
vec_free (table_args->tag);
REPLY_MACRO (VL_API_SESSION_RULE_ADD_DEL_REPLY);
}
@@ -1040,7 +1087,6 @@ send_session_rules_table_details (session_rules_table_t * srt, u8 fib_proto,
if (is_local || fib_proto == FIB_PROTOCOL_IP4)
{
u8 *tag = 0;
- /* *INDENT-OFF* */
srt16 = &srt->session_rules_tables_16;
pool_foreach (rule16, srt16->rules) {
ri = mma_rules_table_rule_index_16 (srt16, rule16);
@@ -1048,12 +1094,10 @@ send_session_rules_table_details (session_rules_table_t * srt, u8 fib_proto,
send_session_rule_details4 (rule16, is_local, tp, appns_index, tag,
reg, context);
}
- /* *INDENT-ON* */
}
if (is_local || fib_proto == FIB_PROTOCOL_IP6)
{
u8 *tag = 0;
- /* *INDENT-OFF* */
srt40 = &srt->session_rules_tables_40;
pool_foreach (rule40, srt40->rules) {
ri = mma_rules_table_rule_index_40 (srt40, rule40);
@@ -1061,7 +1105,6 @@ send_session_rules_table_details (session_rules_table_t * srt, u8 fib_proto,
send_session_rule_details6 (rule40, is_local, tp, appns_index, tag,
reg, context);
}
- /* *INDENT-ON* */
}
}
@@ -1076,7 +1119,6 @@ vl_api_session_rules_dump_t_handler (vl_api_session_rules_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
session_table_foreach (st, ({
for (tp = 0; tp < TRANSPORT_N_PROTOS; tp++)
{
@@ -1086,7 +1128,6 @@ vl_api_session_rules_dump_t_handler (vl_api_session_rules_dump_t * mp)
mp->context);
}
}));
- /* *INDENT-ON* */
}
static void
@@ -1131,12 +1172,10 @@ vl_api_app_add_cert_key_pair_t_handler (vl_api_app_add_cert_key_pair_t * mp)
rv = vnet_app_add_cert_key_pair (a);
done:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_APP_ADD_CERT_KEY_PAIR_REPLY, ({
if (!rv)
rmp->index = clib_host_to_net_u32 (a->index);
}));
- /* *INDENT-ON* */
}
static void
@@ -1152,6 +1191,12 @@ vl_api_app_del_cert_key_pair_t_handler (vl_api_app_del_cert_key_pair_t * mp)
}
ckpair_index = clib_net_to_host_u32 (mp->index);
rv = vnet_app_del_cert_key_pair (ckpair_index);
+ if (rv)
+ {
+ clib_warning ("vnet_app_del_cert_key_pair: %U", format_session_error,
+ rv);
+ rv = VNET_API_ERROR_UNSPECIFIED;
+ }
done:
REPLY_MACRO (VL_API_APP_DEL_CERT_KEY_PAIR_REPLY);
@@ -1177,36 +1222,11 @@ VL_MSG_API_REAPER_FUNCTION (application_reaper_cb);
* Socket api functions
*/
-static void
-sapi_send_fds (app_worker_t * app_wrk, int *fds, int n_fds)
-{
- app_sapi_msg_t smsg = { 0 };
- app_namespace_t *app_ns;
- application_t *app;
- clib_socket_t *cs;
- u32 cs_index;
-
- app = application_get (app_wrk->app_index);
- app_ns = app_namespace_get (app->ns_index);
- cs_index = appns_sapi_handle_sock_index (app_wrk->api_client_index);
- cs = appns_sapi_get_socket (app_ns, cs_index);
- if (PREDICT_FALSE (!cs))
- return;
-
- /* There's no payload for the message only the type */
- smsg.type = APP_SAPI_MSG_TYPE_SEND_FDS;
- clib_socket_sendmsg (cs, &smsg, sizeof (smsg), fds, n_fds);
-}
-
static int
mq_send_add_segment_sapi_cb (u32 app_wrk_index, u64 segment_handle)
{
- int fds[SESSION_N_FD_TYPE], n_fds = 0;
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_app_add_segment_msg_t *mp;
+ session_app_add_segment_msg_t m = { 0 };
app_worker_t *app_wrk;
- session_event_t *evt;
- svm_msg_q_t *app_mq;
fifo_segment_t *fs;
ssvm_private_t *sp;
u8 fd_flags = 0;
@@ -1218,33 +1238,15 @@ mq_send_add_segment_sapi_cb (u32 app_wrk_index, u64 segment_handle)
ASSERT (ssvm_type (sp) == SSVM_SEGMENT_MEMFD);
fd_flags |= SESSION_FD_F_MEMFD_SEGMENT;
- fds[n_fds] = sp->fd;
- n_fds += 1;
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return -1;
+ m.segment_size = sp->ssvm_size;
+ m.fd_flags = fd_flags;
+ m.segment_handle = segment_handle;
+ strncpy ((char *) m.segment_name, (char *) sp->name,
+ sizeof (m.segment_name) - 1);
- /*
- * Send the fd over api socket
- */
- sapi_send_fds (app_wrk, fds, n_fds);
-
- /*
- * Send the actual message over mq
- */
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_APP_ADD_SEGMENT;
- mp = (session_app_add_segment_msg_t *) evt->data;
- clib_memset (mp, 0, sizeof (*mp));
- mp->segment_size = sp->ssvm_size;
- mp->fd_flags = fd_flags;
- mp->segment_handle = segment_handle;
- strncpy ((char *) mp->segment_name, (char *) sp->name,
- sizeof (mp->segment_name) - 1);
-
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt_fd (app_wrk, SESSION_CTRL_EVT_APP_ADD_SEGMENT, &m,
+ sizeof (m), sp->fd);
return 0;
}
@@ -1252,25 +1254,15 @@ mq_send_add_segment_sapi_cb (u32 app_wrk_index, u64 segment_handle)
static int
mq_send_del_segment_sapi_cb (u32 app_wrk_index, u64 segment_handle)
{
- svm_msg_q_msg_t _msg, *msg = &_msg;
- session_app_del_segment_msg_t *mp;
+ session_app_del_segment_msg_t m = { 0 };
app_worker_t *app_wrk;
- session_event_t *evt;
- svm_msg_q_t *app_mq;
app_wrk = app_worker_get (app_wrk_index);
- app_mq = app_wrk->event_queue;
- if (mq_try_lock_and_alloc_msg (app_mq, msg))
- return -1;
+ m.segment_handle = segment_handle;
- evt = svm_msg_q_msg_data (app_mq, msg);
- clib_memset (evt, 0, sizeof (*evt));
- evt->event_type = SESSION_CTRL_EVT_APP_DEL_SEGMENT;
- mp = (session_app_del_segment_msg_t *) evt->data;
- clib_memset (mp, 0, sizeof (*mp));
- mp->segment_handle = segment_handle;
- svm_msg_q_add_and_unlock (app_mq, msg);
+ app_wrk_send_ctrl_evt (app_wrk, SESSION_CTRL_EVT_APP_DEL_SEGMENT, &m,
+ sizeof (m));
return 0;
}
@@ -1282,8 +1274,12 @@ static session_cb_vft_t session_mq_sapi_cb_vft = {
.session_reset_callback = mq_send_session_reset_cb,
.session_migrate_callback = mq_send_session_migrate_cb,
.session_cleanup_callback = mq_send_session_cleanup_cb,
+ .session_listened_callback = mq_send_session_bound_cb,
+ .session_unlistened_callback = mq_send_unlisten_cb,
.add_segment_callback = mq_send_add_segment_sapi_cb,
.del_segment_callback = mq_send_del_segment_sapi_cb,
+ .builtin_app_rx_callback = mq_send_io_rx_event,
+ .builtin_app_tx_callback = mq_send_io_tx_event,
};
static void
@@ -1385,7 +1381,7 @@ done:
vec_free (fds);
}
-static void
+void
sapi_socket_close_w_handle (u32 api_handle)
{
app_namespace_t *app_ns = app_namespace_get (api_handle >> 16);
@@ -1423,7 +1419,7 @@ sapi_add_del_worker_handler (app_namespace_t * app_ns,
app = application_get_if_valid (mp->app_index);
if (!app)
{
- rv = VNET_API_ERROR_INVALID_VALUE;
+ rv = SESSION_E_INVALID;
goto done;
}
@@ -1438,15 +1434,13 @@ sapi_add_del_worker_handler (app_namespace_t * app_ns,
rv = vnet_app_worker_add_del (&args);
if (rv)
{
- clib_warning ("app worker add/del returned: %d", rv);
+ clib_warning ("app worker add/del returned: %U", format_session_error,
+ rv);
goto done;
}
if (!mp->is_add)
- {
- sapi_socket_close_w_handle (sapi_handle);
- goto done;
- }
+ goto done;
/* Send fifo segment fd if needed */
if (ssvm_type (args.segment) == SSVM_SEGMENT_MEMFD)
@@ -1464,15 +1458,20 @@ sapi_add_del_worker_handler (app_namespace_t * app_ns,
done:
+ /* With app sock api socket expected to be closed, no reply */
+ if (!mp->is_add && appns_sapi_enabled ())
+ return;
+
msg.type = APP_SAPI_MSG_TYPE_ADD_DEL_WORKER_REPLY;
rmp = &msg.worker_add_del_reply;
rmp->retval = rv;
rmp->is_add = mp->is_add;
+ rmp->wrk_index = mp->wrk_index;
rmp->api_client_handle = sapi_handle;
- rmp->wrk_index = args.wrk_map_index;
- rmp->segment_handle = args.segment_handle;
if (!rv && mp->is_add)
{
+ rmp->wrk_index = args.wrk_map_index;
+ rmp->segment_handle = args.segment_handle;
/* No segment name and size. This supports only memfds */
rmp->app_event_queue_address =
fifo_segment_msg_q_offset ((fifo_segment_t *) args.segment, 0);
@@ -1488,6 +1487,108 @@ done:
clib_socket_sendmsg (cs, &msg, sizeof (msg), fds, n_fds);
}
+/* This is a workaround for the case when session layer starts reading
+ * the socket before the client actualy sends the data
+ */
+static clib_error_t *
+sapi_socket_receive_wait (clib_socket_t *cs, u8 *msg, u32 msg_len)
+{
+ clib_error_t *err;
+ int n_tries = 5;
+
+ while (1)
+ {
+ err = clib_socket_recvmsg (cs, msg, msg_len, 0, 0);
+ if (!err)
+ break;
+
+ if (!n_tries)
+ return err;
+
+ n_tries--;
+ usleep (1);
+ }
+
+ return err;
+}
+
+static void
+sapi_add_del_cert_key_handler (app_namespace_t *app_ns, clib_socket_t *cs,
+ app_sapi_cert_key_add_del_msg_t *mp)
+{
+ vnet_app_add_cert_key_pair_args_t _a, *a = &_a;
+ app_sapi_cert_key_add_del_reply_msg_t *rmp;
+ app_sapi_msg_t msg = { 0 };
+ int rv = 0;
+
+ if (mp->is_add)
+ {
+ const u32 max_certkey_len = 2e4, max_cert_len = 1e4, max_key_len = 1e4;
+ clib_error_t *err;
+ u8 *certkey = 0;
+ u32 key_len;
+
+ if (mp->certkey_len > max_certkey_len)
+ {
+ rv = SESSION_E_INVALID;
+ goto send_reply;
+ }
+
+ vec_validate (certkey, mp->certkey_len - 1);
+
+ err = sapi_socket_receive_wait (cs, certkey, mp->certkey_len);
+ if (err)
+ {
+ clib_error_report (err);
+ rv = SESSION_E_INVALID;
+ goto send_reply;
+ }
+
+ if (mp->cert_len > max_cert_len)
+ {
+ rv = SESSION_E_INVALID;
+ goto send_reply;
+ }
+
+ if (mp->certkey_len < mp->cert_len)
+ {
+ rv = SESSION_E_INVALID;
+ goto send_reply;
+ }
+
+ key_len = mp->certkey_len - mp->cert_len;
+ if (key_len > max_key_len)
+ {
+ rv = SESSION_E_INVALID;
+ goto send_reply;
+ }
+
+ clib_memset (a, 0, sizeof (*a));
+ a->cert = certkey;
+ a->key = certkey + mp->cert_len;
+ a->cert_len = mp->cert_len;
+ a->key_len = key_len;
+ rv = vnet_app_add_cert_key_pair (a);
+
+ vec_free (certkey);
+ }
+ else
+ {
+ rv = vnet_app_del_cert_key_pair (mp->index);
+ }
+
+send_reply:
+
+ msg.type = APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY_REPLY;
+ rmp = &msg.cert_key_add_del_reply;
+ rmp->retval = rv;
+ rmp->context = mp->context;
+ if (!rv && mp->is_add)
+ rmp->index = a->index;
+
+ clib_socket_sendmsg (cs, &msg, sizeof (msg), 0, 0);
+}
+
static void
sapi_socket_detach (app_namespace_t * app_ns, clib_socket_t * cs)
{
@@ -1496,11 +1597,12 @@ sapi_socket_detach (app_namespace_t * app_ns, clib_socket_t * cs)
u32 api_client_handle;
api_client_handle = appns_sapi_socket_handle (app_ns, cs);
- sapi_socket_close_w_handle (api_client_handle);
/* Cleanup everything because app worker closed socket or crashed */
handle = (app_ns_api_handle_t *) & cs->private_data;
- app_wrk = app_worker_get (handle->aah_app_wrk_index);
+ app_wrk = app_worker_get_if_valid (handle->aah_app_wrk_index);
+ if (!app_wrk)
+ return;
vnet_app_worker_add_del_args_t args = {
.app_index = app_wrk->app_index,
@@ -1548,6 +1650,9 @@ sapi_sock_read_ready (clib_file_t * cf)
case APP_SAPI_MSG_TYPE_ADD_DEL_WORKER:
sapi_add_del_worker_handler (app_ns, cs, &msg.worker_add_del);
break;
+ case APP_SAPI_MSG_TYPE_ADD_DEL_CERT_KEY:
+ sapi_add_del_cert_key_handler (app_ns, cs, &msg.cert_key_add_del);
+ break;
default:
clib_warning ("app wrk %u unknown message type: %u",
handle->aah_app_wrk_index, msg.type);
@@ -1635,6 +1740,23 @@ error:
return err;
}
+void
+appns_sapi_del_ns_socket (app_namespace_t *app_ns)
+{
+ app_ns_api_handle_t *handle;
+ clib_socket_t *cs;
+
+ pool_foreach (cs, app_ns->app_sockets)
+ {
+ handle = (app_ns_api_handle_t *) &cs->private_data;
+ clib_file_del_by_index (&file_main, handle->aah_file_index);
+
+ clib_socket_close (cs);
+ clib_socket_free (cs);
+ }
+ pool_free (app_ns->app_sockets);
+}
+
int
appns_sapi_add_ns_socket (app_namespace_t * app_ns)
{
@@ -1644,49 +1766,42 @@ appns_sapi_add_ns_socket (app_namespace_t * app_ns)
struct stat file_stat;
clib_error_t *err;
clib_socket_t *cs;
- u8 *dir = 0;
- int rv = 0;
+ char dir[4096];
- vec_add (dir, vlib_unix_get_runtime_dir (),
- strlen (vlib_unix_get_runtime_dir ()));
- vec_add (dir, (u8 *) subdir, strlen (subdir));
+ snprintf (dir, sizeof (dir), "%s%s", vlib_unix_get_runtime_dir (), subdir);
- err = vlib_unix_recursive_mkdir ((char *) dir);
- if (err)
- {
- clib_error_report (err);
- rv = -1;
- goto error;
- }
-
- /* Use abstract sockets if a netns was provided */
- if (app_ns->netns)
- app_ns->sock_name = format (0, "@vpp/session/%v%c", app_ns->ns_id, 0);
- else
- app_ns->sock_name = format (0, "%v%v%c", dir, app_ns->ns_id, 0);
+ if (!app_ns->sock_name)
+ app_ns->sock_name = format (0, "%s%v%c", dir, app_ns->ns_id, 0);
/*
* Create and initialize socket to listen on
*/
cs = appns_sapi_alloc_socket (app_ns);
- cs->config = (char *) app_ns->sock_name;
+ cs->config = (char *) vec_dup (app_ns->sock_name);
cs->flags = CLIB_SOCKET_F_IS_SERVER |
CLIB_SOCKET_F_ALLOW_GROUP_WRITE |
CLIB_SOCKET_F_SEQPACKET | CLIB_SOCKET_F_PASSCRED;
- if ((err = clib_socket_init_netns (cs, app_ns->netns)))
+ if (clib_socket_prefix_get_type (cs->config) == CLIB_SOCKET_TYPE_UNIX)
{
- clib_error_report (err);
- rv = -1;
- goto error;
+ err = vlib_unix_recursive_mkdir ((char *) dir);
+ if (err)
+ {
+ clib_error_report (err);
+ return SESSION_E_SYSCALL;
+ }
}
- if (!app_ns->netns && stat ((char *) app_ns->sock_name, &file_stat) == -1)
+ if ((err = clib_socket_init (cs)))
{
- rv = -1;
- goto error;
+ clib_error_report (err);
+ return -1;
}
+ if (clib_socket_prefix_get_type (cs->config) == CLIB_SOCKET_TYPE_UNIX &&
+ stat ((char *) app_ns->sock_name, &file_stat) == -1)
+ return -1;
+
/*
* Start polling it
*/
@@ -1703,22 +1818,7 @@ appns_sapi_add_ns_socket (app_namespace_t * app_ns)
handle->aah_file_index = clib_file_add (&file_main, &cf);
handle->aah_app_wrk_index = APP_INVALID_INDEX;
-error:
- vec_free (dir);
- return rv;
-}
-
-static void
-vl_api_application_tls_cert_add_t_handler (
- vl_api_application_tls_cert_add_t *mp)
-{
- /* deprecated */
-}
-
-static void
-vl_api_application_tls_key_add_t_handler (vl_api_application_tls_key_add_t *mp)
-{
- /* deprecated */
+ return 0;
}
#include <vnet/session/session.api.c>
diff --git a/src/vnet/session/session_cli.c b/src/vnet/session/session_cli.c
index 24d8cfb1e24..569a77bccc1 100644
--- a/src/vnet/session/session_cli.c
+++ b/src/vnet/session/session_cli.c
@@ -145,8 +145,11 @@ format_session (u8 * s, va_list * args)
else if (ss->session_state == SESSION_STATE_CONNECTING)
{
if (ss->flags & SESSION_F_HALF_OPEN)
- s = format (s, "%U%v", format_transport_half_open_connection, tp,
- ss->connection_index, ss->thread_index, verbose, str);
+ {
+ s = format (s, "%U", format_transport_half_open_connection, tp,
+ ss->connection_index, ss->thread_index, verbose);
+ s = format (s, "%v", str);
+ }
else
s = format (s, "%U", format_transport_connection, tp,
ss->connection_index, ss->thread_index, verbose);
@@ -259,7 +262,6 @@ unformat_session (unformat_input_t * input, va_list * args)
if (s)
{
*result = s;
- session_pool_remove_peeker (s->thread_index);
return 1;
}
return 0;
@@ -340,7 +342,6 @@ session_cli_show_all_sessions (vlib_main_t * vm, int verbose)
n_closed = 0;
- /* *INDENT-OFF* */
pool_foreach (s, pool) {
if (s->session_state >= SESSION_STATE_TRANSPORT_DELETED)
{
@@ -349,7 +350,6 @@ session_cli_show_all_sessions (vlib_main_t * vm, int verbose)
}
vlib_cli_output (vm, "%U", format_session, s, verbose);
}
- /* *INDENT-ON* */
if (!n_closed)
vlib_cli_output (vm, "Thread %d: active sessions %u", thread_index,
@@ -488,7 +488,6 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
{
u8 one_session = 0, do_listeners = 0, sst, do_elog = 0, do_filter = 0;
u32 track_index, thread_index = 0, start = 0, end = ~0, session_index;
- unformat_input_t _line_input, *line_input = &_line_input;
transport_proto_t transport_proto = TRANSPORT_PROTO_INVALID;
session_state_t state = SESSION_N_STATES, *states = 0;
session_main_t *smm = &session_main;
@@ -502,26 +501,20 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
session_cli_return_if_not_enabled ();
- if (!unformat_user (input, unformat_line_input, line_input))
- {
- session_cli_show_all_sessions (vm, 0);
- return 0;
- }
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "verbose %d", &verbose))
+ if (unformat (input, "verbose %d", &verbose))
;
- else if (unformat (line_input, "verbose"))
+ else if (unformat (input, "verbose"))
verbose = 1;
- else if (unformat (line_input, "listeners %U", unformat_transport_proto,
+ else if (unformat (input, "listeners %U", unformat_transport_proto,
&transport_proto))
do_listeners = 1;
- else if (unformat (line_input, "%U", unformat_session, &s))
+ else if (unformat (input, "%U", unformat_session, &s))
{
one_session = 1;
}
- else if (unformat (line_input, "thread %u index %u", &thread_index,
+ else if (unformat (input, "thread %u index %u", &thread_index,
&session_index))
{
s = session_get_if_valid (session_index, thread_index);
@@ -532,19 +525,17 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
}
one_session = 1;
}
- else if (unformat (line_input, "thread %u", &thread_index))
+ else if (unformat (input, "thread %u", &thread_index))
{
do_filter = 1;
}
- else
- if (unformat (line_input, "state %U", unformat_session_state, &state))
+ else if (unformat (input, "state %U", unformat_session_state, &state))
{
vec_add1 (states, state);
do_filter = 1;
}
- else if (unformat (line_input, "proto %U index %u",
- unformat_transport_proto, &transport_proto,
- &transport_index))
+ else if (unformat (input, "proto %U index %u", unformat_transport_proto,
+ &transport_proto, &transport_index))
{
transport_connection_t *tc;
tc = transport_get_connection (transport_proto, transport_index,
@@ -565,34 +556,34 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
}
one_session = 1;
}
- else if (unformat (line_input, "proto %U", unformat_transport_proto,
+ else if (unformat (input, "proto %U", unformat_transport_proto,
&transport_proto))
do_filter = 1;
- else if (unformat (line_input, "range %u %u", &start, &end))
+ else if (unformat (input, "range %u %u", &start, &end))
do_filter = 1;
- else if (unformat (line_input, "range %u", &start))
+ else if (unformat (input, "range %u", &start))
{
end = start + 50;
do_filter = 1;
}
- else if (unformat (line_input, "elog"))
+ else if (unformat (input, "elog"))
do_elog = 1;
- else if (unformat (line_input, "protos"))
+ else if (unformat (input, "protos"))
{
vlib_cli_output (vm, "%U", format_transport_protos);
goto done;
}
- else if (unformat (line_input, "states"))
+ else if (unformat (input, "states"))
{
session_cli_print_session_states (vm);
goto done;
}
- else if (unformat (line_input, "events"))
+ else if (unformat (input, "events"))
do_events = 1;
else
{
error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
+ format_unformat_error, input);
goto done;
}
}
@@ -625,7 +616,6 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "%-" SESSION_CLI_ID_LEN "s%-24s", "Listener",
"App");
- /* *INDENT-OFF* */
pool_foreach (s, smm->wrk[0].sessions) {
if (s->session_state != SESSION_STATE_LISTENING
|| s->session_type != sst)
@@ -635,7 +625,6 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "%U%-25v%", format_session, s, 0,
app_name);
}
- /* *INDENT-ON* */
goto done;
}
@@ -661,12 +650,10 @@ show_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
session_cli_show_all_sessions (vm, verbose);
done:
- unformat_free (line_input);
vec_free (states);
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (vlib_cli_show_session_command) =
{
.path = "show session",
@@ -676,7 +663,6 @@ VLIB_CLI_COMMAND (vlib_cli_show_session_command) =
"[protos] [states] ",
.function = show_session_command_fn,
};
-/* *INDENT-ON* */
static int
clear_session (session_t * s)
@@ -728,27 +714,23 @@ clear_session_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (clear_all)
{
- /* *INDENT-OFF* */
vec_foreach (wrk, smm->wrk)
{
pool_foreach (session, wrk->sessions) {
clear_session (session);
}
};
- /* *INDENT-ON* */
}
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_session_command, static) =
{
.path = "clear session",
.short_help = "clear session thread <thread> session <index>",
.function = clear_session_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_session_fifo_trace_command_fn (vlib_main_t * vm,
@@ -791,14 +773,12 @@ show_session_fifo_trace_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_session_fifo_trace_command, static) =
{
.path = "show session fifo trace",
.short_help = "show session fifo trace <session>",
.function = show_session_fifo_trace_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
session_replay_fifo_command_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -838,53 +818,98 @@ session_replay_fifo_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (session_replay_fifo_trace_command, static) =
{
.path = "session replay fifo",
.short_help = "session replay fifo <session>",
.function = session_replay_fifo_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
session_enable_disable_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
- unformat_input_t _line_input, *line_input = &_line_input;
- u8 is_en = 1;
- clib_error_t *error;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return clib_error_return (0, "expected enable | disable");
+ u8 is_en = 2;
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
- if (unformat (line_input, "enable"))
+ if (unformat (input, "enable"))
is_en = 1;
- else if (unformat (line_input, "disable"))
+ else if (unformat (input, "disable"))
is_en = 0;
else
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, line_input);
- unformat_free (line_input);
- return error;
- }
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
}
- unformat_free (line_input);
+ if (is_en > 1)
+ return clib_error_return (0, "expected enable | disable");
+
return vnet_session_enable_disable (vm, is_en);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (session_enable_disable_command, static) =
{
.path = "session",
.short_help = "session [enable|disable]",
.function = session_enable_disable_fn,
};
-/* *INDENT-ON* */
+
+static clib_error_t *
+show_session_stats_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ session_main_t *smm = &session_main;
+ session_worker_t *wrk;
+ unsigned int *e;
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ return clib_error_return (0, "unknown input `%U'", format_unformat_error,
+ input);
+
+ vec_foreach (wrk, smm->wrk)
+ {
+ vlib_cli_output (vm, "Thread %u:\n", wrk - smm->wrk);
+ e = wrk->stats.errors;
+#define _(name, str) \
+ if (e[SESSION_EP_##name]) \
+ vlib_cli_output (vm, " %lu %s", e[SESSION_EP_##name], str);
+ foreach_session_error
+#undef _
+ }
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_session_stats_command, static) = {
+ .path = "show session stats",
+ .short_help = "show session stats",
+ .function = show_session_stats_fn,
+};
+
+static clib_error_t *
+clear_session_stats_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ session_main_t *smm = &session_main;
+ session_worker_t *wrk;
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ return clib_error_return (0, "unknown input `%U'", format_unformat_error,
+ input);
+
+ vec_foreach (wrk, smm->wrk)
+ {
+ clib_memset (&wrk->stats, 0, sizeof (wrk->stats));
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (clear_session_stats_command, static) = {
+ .path = "clear session stats",
+ .short_help = "clear session stats",
+ .function = clear_session_stats_fn,
+};
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/session_debug.c b/src/vnet/session/session_debug.c
index 349d1ec9b46..2a50adac5dd 100644
--- a/src/vnet/session/session_debug.c
+++ b/src/vnet/session/session_debug.c
@@ -52,15 +52,20 @@ show_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_session_dbg_clock_cycles_command, static) =
{
.path = "show session dbg clock_cycles",
.short_help = "show session dbg clock_cycles",
.function = show_session_dbg_clock_cycles_fn,
};
-/* *INDENT-ON* */
+static_always_inline f64
+session_dbg_time_now (u32 thread)
+{
+ vlib_main_t *vm = vlib_get_main_by_index (thread);
+
+ return clib_time_now (&vm->clib_time) + vm->time_offset;
+}
static clib_error_t *
clear_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -77,7 +82,7 @@ clear_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input,
{
sde = &session_dbg_main.wrk[thread];
clib_memset (sde, 0, sizeof (session_dbg_evts_t));
- sde->last_time = vlib_time_now (vlib_mains[thread]);
+ sde->last_time = session_dbg_time_now (thread);
sde->start_time = sde->last_time;
}
@@ -85,14 +90,12 @@ clear_session_dbg_clock_cycles_fn (vlib_main_t * vm, unformat_input_t * input,
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_session_clock_cycles_command, static) =
{
.path = "clear session dbg clock_cycles",
.short_help = "clear session dbg clock_cycles",
.function = clear_session_dbg_clock_cycles_fn,
};
-/* *INDENT-ON* */
void
session_debug_init (void)
@@ -107,15 +110,99 @@ session_debug_init (void)
for (thread = 0; thread < num_threads; thread++)
{
clib_memset (&sdm->wrk[thread], 0, sizeof (session_dbg_evts_t));
- sdm->wrk[thread].start_time = vlib_time_now (vlib_mains[thread]);
+ sdm->wrk[thread].start_time = session_dbg_time_now (thread);
+ }
+}
+
+static const char *session_evt_grp_str[] = {
+#define _(sym, str) str,
+ foreach_session_evt_grp
+#undef _
+};
+
+static void
+session_debug_show_groups (vlib_main_t *vm)
+{
+ session_dbg_main_t *sdm = &session_dbg_main;
+ int i = 0;
+
+ vlib_cli_output (vm, "%-10s%-30s%-10s", "Index", "Group", "Level");
+
+ for (i = 0; i < SESSION_EVT_N_GRP; i++)
+ vlib_cli_output (vm, "%-10d%-30s%-10d", i, session_evt_grp_str[i],
+ sdm->grp_dbg_lvl[i]);
+}
+
+static clib_error_t *
+session_debug_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ session_dbg_main_t *sdm = &session_dbg_main;
+ u32 group, level = ~0;
+ clib_error_t *error = 0;
+ u8 is_show = 0;
+ uword *bitmap = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "show"))
+ is_show = 1;
+ else if (unformat (input, "group %U", unformat_bitmap_list, &bitmap))
+ ;
+ else if (unformat (input, "level %d", &level))
+ ;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ if (is_show)
+ {
+ session_debug_show_groups (vm);
+ goto done;
+ }
+ if (level == ~0)
+ {
+ vlib_cli_output (vm, "level must be entered");
+ goto done;
+ }
+
+ group = clib_bitmap_last_set (bitmap);
+ if (group == ~0)
+ {
+ vlib_cli_output (vm, "group must be entered");
+ goto done;
+ }
+ if (group >= SESSION_EVT_N_GRP)
+ {
+ vlib_cli_output (vm, "group out of bounds");
+ goto done;
}
+ clib_bitmap_foreach (group, bitmap)
+ sdm->grp_dbg_lvl[group] = level;
+
+done:
+
+ clib_bitmap_free (bitmap);
+ return error;
}
+
+VLIB_CLI_COMMAND (session_debug_command, static) = {
+ .path = "session debug",
+ .short_help = "session debug {show | debug group <list> level <n>}",
+ .function = session_debug_fn,
+ .is_mp_safe = 1,
+};
+
#else
void
session_debug_init (void)
{
}
-#endif
+#endif /* SESSION_DEBUG */
void
dump_thread_0_event_queue (void)
@@ -144,6 +231,8 @@ dump_thread_0_event_queue (void)
{
case SESSION_IO_EVT_TX:
s0 = session_get_if_valid (e->session_index, my_thread_index);
+ if (!s0)
+ break;
fformat (stdout, "[%04d] TX session %d\n", i, s0->session_index);
break;
@@ -155,6 +244,8 @@ dump_thread_0_event_queue (void)
case SESSION_IO_EVT_BUILTIN_RX:
s0 = session_get_if_valid (e->session_index, my_thread_index);
+ if (!s0)
+ break;
fformat (stdout, "[%04d] builtin_rx %d\n", i, s0->session_index);
break;
@@ -180,28 +271,18 @@ dump_thread_0_event_queue (void)
static u8
session_node_cmp_event (session_event_t * e, svm_fifo_t * f)
{
- session_t *s;
switch (e->event_type)
{
case SESSION_IO_EVT_RX:
case SESSION_IO_EVT_TX:
case SESSION_IO_EVT_BUILTIN_RX:
- case SESSION_IO_EVT_BUILTIN_TX:
+ case SESSION_IO_EVT_TX_MAIN:
case SESSION_IO_EVT_TX_FLUSH:
if (e->session_index == f->shr->master_session_index)
return 1;
break;
case SESSION_CTRL_EVT_CLOSE:
- break;
case SESSION_CTRL_EVT_RPC:
- s = session_get_from_handle (e->session_handle);
- if (!s)
- {
- clib_warning ("session has event but doesn't exist!");
- break;
- }
- if (s->rx_fifo == f || s->tx_fifo == f)
- return 1;
break;
default:
break;
@@ -217,7 +298,6 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
session_worker_t *wrk;
int i, index, found = 0;
svm_msg_q_msg_t *msg;
- svm_msg_q_ring_t *ring;
svm_msg_q_t *mq;
u8 thread_index;
@@ -234,8 +314,7 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
for (i = 0; i < sq->cursize; i++)
{
msg = (svm_msg_q_msg_t *) (&sq->data[0] + sq->elsize * index);
- ring = svm_msg_q_ring (mq, msg->ring_index);
- clib_memcpy_fast (e, svm_msg_q_msg_data (mq, msg), ring->elsize);
+ clib_memcpy_fast (e, svm_msg_q_msg_data (mq, msg), sizeof (*e));
found = session_node_cmp_event (e, f);
if (found)
return 1;
@@ -245,7 +324,6 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
* Search pending events vector
*/
- /* *INDENT-OFF* */
clib_llist_foreach (wrk->event_elts, evt_list,
pool_elt_at_index (wrk->event_elts, wrk->new_head),
elt, ({
@@ -256,9 +334,7 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
goto done;
}
}));
- /* *INDENT-ON* */
- /* *INDENT-OFF* */
clib_llist_foreach (wrk->event_elts, evt_list,
pool_elt_at_index (wrk->event_elts, wrk->old_head),
elt, ({
@@ -269,7 +345,6 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_event_t * e)
goto done;
}
}));
- /* *INDENT-ON* */
done:
return found;
diff --git a/src/vnet/session/session_debug.h b/src/vnet/session/session_debug.h
index 9e49a35dbe6..d433ef47fb1 100644
--- a/src/vnet/session/session_debug.h
+++ b/src/vnet/session/session_debug.h
@@ -17,49 +17,81 @@
#include <vnet/session/transport.h>
#include <vlib/vlib.h>
-
-#define foreach_session_dbg_evt \
- _(ENQ, "enqueue") \
- _(DEQ, "dequeue") \
- _(DEQ_NODE, "dequeue") \
- _(POLL_GAP_TRACK, "poll gap track") \
- _(POLL_DISPATCH_TIME, "dispatch time") \
- _(DISPATCH_START, "dispatch start") \
- _(DISPATCH_END, "dispatch end") \
- _(FREE, "session free") \
- _(DSP_CNTRS, "dispatch counters") \
- _(IO_EVT_COUNTS, "io evt counts") \
- _(EVT_COUNTS, "ctrl evt counts") \
+#include <vpp/vnet/config.h>
+
+#define foreach_session_dbg_evt \
+ _ (ENQ, DEQ_EVTS, 1, "enqueue") \
+ _ (DEQ, DEQ_EVTS, 1, "dequeue") \
+ _ (DEQ_NODE, DISPATCH_DBG, 1, "dequeue") \
+ _ (POLL_GAP_TRACK, EVT_POLL_DBG, 1, "poll gap track") \
+ _ (POLL_DISPATCH_TIME, EVT_POLL_DBG, 1, "dispatch time") \
+ _ (DISPATCH_START, CLOCKS_EVT_DBG, 1, "dispatch start") \
+ _ (DISPATCH_END, CLOCKS_EVT_DBG, 1, "dispatch end") \
+ _ (DSP_CNTRS, CLOCKS_EVT_DBG, 1, "dispatch counters") \
+ _ (STATE_CHANGE, SM, 1, "session state change") \
+ _ (FREE, SM, 1, "session free") \
+ _ (IO_EVT_COUNTS, COUNTS_EVT_DBG, 1, "io evt counts") \
+ _ (COUNTS, COUNTS_EVT_DBG, 1, "ctrl evt counts")
typedef enum _session_evt_dbg
{
-#define _(sym, str) SESSION_EVT_##sym,
+#define _(sym, grp, lvl, str) SESSION_EVT_##sym,
foreach_session_dbg_evt
#undef _
} session_evt_dbg_e;
-#define foreach_session_events \
-_(CLK_UPDATE_TIME, 1, 1, "Time Update Time") \
-_(CLK_MQ_DEQ, 1, 1, "Time MQ Dequeue") \
-_(CLK_CTRL_EVTS, 1, 1, "Time Ctrl Events") \
-_(CLK_NEW_IO_EVTS, 1, 1, "Time New IO Events") \
-_(CLK_OLD_IO_EVTS, 1, 1, "Time Old IO Events") \
-_(CLK_TOTAL, 1, 1, "Time Total in Node") \
-_(CLK_START, 1, 1, "Time Since Last Reset") \
- \
-_(CNT_MQ_EVTS, 1, 0, "# of MQ Events Processed" ) \
-_(CNT_CTRL_EVTS, 1, 0, "# of Ctrl Events Processed" ) \
-_(CNT_NEW_EVTS, 1, 0, "# of New Events Processed" ) \
-_(CNT_OLD_EVTS, 1, 0, "# of Old Events Processed" ) \
-_(CNT_IO_EVTS, 1, 0, "# of Events Processed" ) \
-_(CNT_NODE_CALL, 1, 0, "# of Node Calls") \
- \
-_(BASE_OFFSET_IO_EVTS, 0, 0, "NULL") \
-_(SESSION_IO_EVT_RX, 1, 0, "# of IO Event RX") \
-_(SESSION_IO_EVT_TX, 1, 0, "# of IO Event TX") \
-_(SESSION_IO_EVT_TX_FLUSH, 1, 0, "# of IO Event TX Flush") \
-_(SESSION_IO_EVT_BUILTIN_RX, 1, 0, "# of IO Event BuiltIn RX") \
-_(SESSION_IO_EVT_BUILTIN_TX, 1, 0, "# of IO Event BuiltIn TX") \
+typedef enum session_evt_lvl_
+{
+#define _(sym, grp, lvl, str) SESSION_EVT_##sym##_LVL = lvl,
+ foreach_session_dbg_evt
+#undef _
+} session_evt_lvl_e;
+
+#define foreach_session_evt_grp \
+ _ (DEQ_EVTS, "dequeue/enqueue events") \
+ _ (DISPATCH_DBG, "dispatch") \
+ _ (EVT_POLL_DBG, "event poll") \
+ _ (SM, "state machine") \
+ _ (CLOCKS_EVT_DBG, "clocks events") \
+ _ (COUNTS_EVT_DBG, "counts events")
+
+typedef enum session_evt_grp_
+{
+#define _(sym, str) SESSION_EVT_GRP_##sym,
+ foreach_session_evt_grp
+#undef _
+ SESSION_EVT_N_GRP
+} session_evt_grp_e;
+
+typedef enum session_evt_to_grp_
+{
+#define _(sym, grp, lvl, str) SESSION_EVT_##sym##_GRP = SESSION_EVT_GRP_##grp,
+ foreach_session_dbg_evt
+#undef _
+} session_evt_to_grp_e;
+
+#define foreach_session_events \
+ _ (CLK_UPDATE_TIME, 1, 1, "Time Update Time") \
+ _ (CLK_MQ_DEQ, 1, 1, "Time MQ Dequeue") \
+ _ (CLK_CTRL_EVTS, 1, 1, "Time Ctrl Events") \
+ _ (CLK_NEW_IO_EVTS, 1, 1, "Time New IO Events") \
+ _ (CLK_OLD_IO_EVTS, 1, 1, "Time Old IO Events") \
+ _ (CLK_TOTAL, 1, 1, "Time Total in Node") \
+ _ (CLK_START, 1, 1, "Time Since Last Reset") \
+ \
+ _ (CNT_MQ_EVTS, 1, 0, "# of MQ Events Processed") \
+ _ (CNT_CTRL_EVTS, 1, 0, "# of Ctrl Events Processed") \
+ _ (CNT_NEW_EVTS, 1, 0, "# of New Events Processed") \
+ _ (CNT_OLD_EVTS, 1, 0, "# of Old Events Processed") \
+ _ (CNT_IO_EVTS, 1, 0, "# of Events Processed") \
+ _ (CNT_NODE_CALL, 1, 0, "# of Node Calls") \
+ \
+ _ (BASE_OFFSET_IO_EVTS, 0, 0, "NULL") \
+ _ (SESSION_IO_EVT_RX, 1, 0, "# of IO Event RX") \
+ _ (SESSION_IO_EVT_TX, 1, 0, "# of IO Event TX") \
+ _ (SESSION_IO_EVT_TX_FLUSH, 1, 0, "# of IO Event TX Flush") \
+ _ (SESSION_IO_EVT_BUILTIN_RX, 1, 0, "# of IO Event BuiltIn RX") \
+ _ (SESSION_IO_EVT_TX_MAIN, 1, 0, "# of IO Event TX Main")
typedef enum
{
@@ -90,17 +122,28 @@ typedef struct session_dbg_evts_t
typedef struct session_dbg_main_
{
session_dbg_evts_t *wrk;
+ u8 grp_dbg_lvl[SESSION_EVT_N_GRP];
} session_dbg_main_t;
extern session_dbg_main_t session_dbg_main;
-#define SESSION_DEBUG 0 * (TRANSPORT_DEBUG > 0)
-#define SESSION_DEQ_EVTS (0)
-#define SESSION_DISPATCH_DBG (0)
-#define SESSION_EVT_POLL_DBG (0)
-#define SESSION_SM (0)
+#if defined VPP_SESSION_DEBUG && (TRANSPORT_DEBUG > 0)
+#define SESSION_DEBUG (1)
+#define SESSION_DEQ_EVTS (1)
+#define SESSION_DISPATCH_DBG (1)
+#define SESSION_EVT_POLL_DBG (1)
+#define SESSION_SM (1)
+#define SESSION_CLOCKS_EVT_DBG (1)
+#define SESSION_COUNTS_EVT_DBG (1)
+#else
+#define SESSION_DEBUG (0)
+#define SESSION_DEQ_EVTS (0)
+#define SESSION_DISPATCH_DBG (0)
+#define SESSION_EVT_POLL_DBG (0)
+#define SESSION_SM (0)
#define SESSION_CLOCKS_EVT_DBG (0)
#define SESSION_COUNTS_EVT_DBG (0)
+#endif
#if SESSION_DEBUG
@@ -123,17 +166,43 @@ extern session_dbg_main_t session_dbg_main;
ed = ELOG_DATA (&vlib_global_main.elog_main, _e)
#if SESSION_SM
-#define SESSION_EVT_FREE_HANDLER(_s) \
-{ \
- ELOG_TYPE_DECLARE (_e) = \
- { \
- .format = "free: idx %u", \
- .format_args = "i4", \
- }; \
- DEC_SESSION_ETD(_s, _e, 1); \
- ed->data[0] = _s->session_index; \
-}
+#define SESSION_EVT_STATE_CHANGE_HANDLER(_s) \
+ { \
+ ELOG_TYPE_DECLARE (_e) = { \
+ .format = "%s: idx %u", \
+ .format_args = "t4i4", \
+ .n_enum_strings = 12, \
+ .enum_strings = { \
+ "created", \
+ "listening", \
+ "connecting", \
+ "accepting", \
+ "ready", \
+ "opened", \
+ "transport closing", \
+ "closing", \
+ "app closed", \
+ "transport closed", \
+ "closed", \
+ "transport deleted", \
+ }, \
+ }; \
+ DEC_SESSION_ETD (_s, _e, 2); \
+ ed->data[0] = _s->session_state; \
+ ed->data[1] = _s->session_index; \
+ }
+
+#define SESSION_EVT_FREE_HANDLER(_s) \
+ { \
+ ELOG_TYPE_DECLARE (_e) = { \
+ .format = "free: idx %u", \
+ .format_args = "i4", \
+ }; \
+ DEC_SESSION_ED (_e, 1); \
+ ed->data[0] = _s->session_index; \
+ }
#else
+#define SESSION_EVT_STATE_CHANGE_HANDLER(_s)
#define SESSION_EVT_FREE_HANDLER(_s)
#endif
@@ -282,17 +351,17 @@ extern session_dbg_main_t session_dbg_main;
counters[SESS_Q_##_node_evt].u64 += _cnt; \
}
-#define SESSION_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk) \
-{ \
- u8 type = SESS_Q_BASE_OFFSET_IO_EVTS + _node_evt + 1; \
- session_dbg_evts_t *sde; \
- sde = &session_dbg_main.wrk[_wrk->vm->thread_index]; \
- sde->counters[type].u64 += _cnt; \
- sde->counters[SESS_Q_CNT_IO_EVTS].u64 += _cnt ; \
-}
+#define SESSION_EVT_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk) \
+ { \
+ u8 type = SESS_Q_BASE_OFFSET_IO_EVTS + _node_evt + 1; \
+ session_dbg_evts_t *sde; \
+ sde = &session_dbg_main.wrk[_wrk->vm->thread_index]; \
+ sde->counters[type].u64 += _cnt; \
+ sde->counters[SESS_Q_CNT_IO_EVTS].u64 += _cnt; \
+ }
#else
#define SESSION_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk)
-#define SESSION_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk)
+#define SESSION_EVT_IO_EVT_COUNTS_HANDLER(_node_evt, _cnt, _wrk)
#endif /*SESSION_COUNTS_EVT_DBG */
@@ -322,8 +391,18 @@ extern session_dbg_main_t session_dbg_main;
#define CONCAT_HELPER(_a, _b) _a##_b
#define CC(_a, _b) CONCAT_HELPER(_a, _b)
-#define SESSION_EVT(_evt, _args...) CC(_evt, _HANDLER)(_args)
-
+#define session_evt_lvl(_evt) CC (_evt, _LVL)
+#define session_evt_grp(_evt) CC (_evt, _GRP)
+#define session_evt_grp_dbg_lvl(_evt) \
+ session_dbg_main.grp_dbg_lvl[session_evt_grp (_evt)]
+#define SESSION_EVT(_evt, _args...) \
+ do \
+ { \
+ if (PREDICT_FALSE (session_evt_grp_dbg_lvl (_evt) >= \
+ session_evt_lvl (_evt))) \
+ CC (_evt, _HANDLER) (_args); \
+ } \
+ while (0)
#else
#define SESSION_EVT(_evt, _args...)
#define SESSION_DBG(_fmt, _args...)
diff --git a/src/vnet/session/session_input.c b/src/vnet/session/session_input.c
new file mode 100644
index 00000000000..73b777127fd
--- /dev/null
+++ b/src/vnet/session/session_input.c
@@ -0,0 +1,343 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vnet/session/session.h>
+#include <vnet/session/application.h>
+
+static inline int
+mq_try_lock (svm_msg_q_t *mq)
+{
+ int rv, n_try = 0;
+
+ while (n_try < 100)
+ {
+ rv = svm_msg_q_try_lock (mq);
+ if (!rv)
+ return 0;
+ n_try += 1;
+ usleep (1);
+ }
+
+ return -1;
+}
+
+always_inline u8
+mq_event_ring_index (session_evt_type_t et)
+{
+ return (et >= SESSION_CTRL_EVT_RPC ? SESSION_MQ_CTRL_EVT_RING :
+ SESSION_MQ_IO_EVT_RING);
+}
+
+void
+app_worker_del_all_events (app_worker_t *app_wrk)
+{
+ session_worker_t *wrk;
+ session_event_t *evt;
+ u32 thread_index;
+ session_t *s;
+
+ for (thread_index = 0; thread_index < vec_len (app_wrk->wrk_evts);
+ thread_index++)
+ {
+ while (clib_fifo_elts (app_wrk->wrk_evts[thread_index]))
+ {
+ clib_fifo_sub2 (app_wrk->wrk_evts[thread_index], evt);
+ switch (evt->event_type)
+ {
+ case SESSION_CTRL_EVT_MIGRATED:
+ s = session_get (evt->session_index, thread_index);
+ transport_cleanup (session_get_transport_proto (s),
+ s->connection_index, s->thread_index);
+ session_free (s);
+ break;
+ case SESSION_CTRL_EVT_CLEANUP:
+ s = session_get (evt->as_u64[0] & 0xffffffff, thread_index);
+ if (evt->as_u64[0] >> 32 != SESSION_CLEANUP_SESSION)
+ break;
+ uword_to_pointer (evt->as_u64[1], void (*) (session_t * s)) (s);
+ break;
+ case SESSION_CTRL_EVT_HALF_CLEANUP:
+ s = ho_session_get (evt->session_index);
+ pool_put_index (app_wrk->half_open_table, s->ho_index);
+ session_free (s);
+ break;
+ default:
+ break;
+ }
+ }
+ wrk = session_main_get_worker (thread_index);
+ clib_bitmap_set (wrk->app_wrks_pending_ntf, app_wrk->wrk_index, 0);
+ }
+}
+
+always_inline int
+app_worker_flush_events_inline (app_worker_t *app_wrk, u32 thread_index,
+ u8 is_builtin)
+{
+ application_t *app = application_get (app_wrk->app_index);
+ svm_msg_q_t *mq = app_wrk->event_queue;
+ u8 ring_index, mq_is_cong;
+ session_state_t old_state;
+ session_event_t *evt;
+ u32 n_evts = 128, i;
+ session_t *s;
+ int rv;
+
+ n_evts = clib_min (n_evts, clib_fifo_elts (app_wrk->wrk_evts[thread_index]));
+
+ if (!is_builtin)
+ {
+ mq_is_cong = app_worker_mq_is_congested (app_wrk);
+ if (mq_try_lock (mq))
+ {
+ app_worker_set_mq_wrk_congested (app_wrk, thread_index);
+ return 0;
+ }
+ }
+
+ for (i = 0; i < n_evts; i++)
+ {
+ evt = clib_fifo_head (app_wrk->wrk_evts[thread_index]);
+ if (!is_builtin)
+ {
+ ring_index = mq_event_ring_index (evt->event_type);
+ if (svm_msg_q_or_ring_is_full (mq, ring_index))
+ {
+ app_worker_set_mq_wrk_congested (app_wrk, thread_index);
+ break;
+ }
+ }
+
+ switch (evt->event_type)
+ {
+ case SESSION_IO_EVT_RX:
+ s = session_get (evt->session_index, thread_index);
+ s->flags &= ~SESSION_F_RX_EVT;
+ /* Application didn't confirm accept yet */
+ if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING ||
+ s->session_state == SESSION_STATE_CONNECTING))
+ break;
+ app->cb_fns.builtin_app_rx_callback (s);
+ break;
+ /* Handle sessions that might not be on current thread */
+ case SESSION_IO_EVT_BUILTIN_RX:
+ s = session_get_from_handle_if_valid (evt->session_handle);
+ if (!s)
+ break;
+ s->flags &= ~SESSION_F_RX_EVT;
+ if (PREDICT_FALSE (s->session_state == SESSION_STATE_ACCEPTING ||
+ s->session_state == SESSION_STATE_CONNECTING))
+ break;
+ app->cb_fns.builtin_app_rx_callback (s);
+ break;
+ case SESSION_IO_EVT_TX:
+ s = session_get (evt->session_index, thread_index);
+ app->cb_fns.builtin_app_tx_callback (s);
+ break;
+ case SESSION_IO_EVT_TX_MAIN:
+ s = session_get_from_handle_if_valid (evt->session_handle);
+ if (!s)
+ break;
+ app->cb_fns.builtin_app_tx_callback (s);
+ break;
+ case SESSION_CTRL_EVT_BOUND:
+ /* No app cb function currently */
+ if (is_builtin)
+ break;
+ app->cb_fns.session_listened_callback (
+ app_wrk->wrk_index, evt->as_u64[1] >> 32, evt->session_handle,
+ evt->as_u64[1] & 0xffffffff);
+ break;
+ case SESSION_CTRL_EVT_ACCEPTED:
+ s = session_get (evt->session_index, thread_index);
+ old_state = s->session_state;
+ if (app->cb_fns.session_accept_callback (s))
+ {
+ session_detach_app (s);
+ break;
+ }
+ if (is_builtin)
+ {
+ if (old_state >= SESSION_STATE_TRANSPORT_CLOSING)
+ {
+ session_set_state (s,
+ clib_max (old_state, s->session_state));
+ if (!(s->flags & SESSION_F_APP_CLOSED))
+ app->cb_fns.session_disconnect_callback (s);
+ }
+ }
+ break;
+ case SESSION_CTRL_EVT_CONNECTED:
+ if (!(evt->as_u64[1] & 0xffffffff))
+ {
+ s = session_get (evt->session_index, thread_index);
+ old_state = s->session_state;
+ }
+ else
+ s = 0;
+ rv = app->cb_fns.session_connected_callback (
+ app_wrk->wrk_index, evt->as_u64[1] >> 32, s,
+ evt->as_u64[1] & 0xffffffff);
+ if (!s)
+ break;
+ if (rv)
+ {
+ session_detach_app (s);
+ break;
+ }
+ if (old_state >= SESSION_STATE_TRANSPORT_CLOSING)
+ {
+ session_set_state (s, clib_max (old_state, s->session_state));
+ if (!(s->flags & SESSION_F_APP_CLOSED))
+ app->cb_fns.session_disconnect_callback (s);
+ }
+ break;
+ case SESSION_CTRL_EVT_DISCONNECTED:
+ s = session_get (evt->session_index, thread_index);
+ if (!(s->flags & SESSION_F_APP_CLOSED))
+ app->cb_fns.session_disconnect_callback (s);
+ break;
+ case SESSION_CTRL_EVT_RESET:
+ s = session_get (evt->session_index, thread_index);
+ if (!(s->flags & SESSION_F_APP_CLOSED))
+ app->cb_fns.session_reset_callback (s);
+ break;
+ case SESSION_CTRL_EVT_UNLISTEN_REPLY:
+ if (is_builtin)
+ break;
+ app->cb_fns.session_unlistened_callback (
+ app_wrk->wrk_index, evt->session_handle, evt->as_u64[1] >> 32,
+ evt->as_u64[1] & 0xffffffff);
+ break;
+ case SESSION_CTRL_EVT_MIGRATED:
+ s = session_get (evt->session_index, thread_index);
+ app->cb_fns.session_migrate_callback (s, evt->as_u64[1]);
+ transport_cleanup (session_get_transport_proto (s),
+ s->connection_index, s->thread_index);
+ session_free (s);
+ /* Notify app that it has data on the new session */
+ s = session_get_from_handle (evt->as_u64[1]);
+ session_send_io_evt_to_thread (s->rx_fifo,
+ SESSION_IO_EVT_BUILTIN_RX);
+ break;
+ case SESSION_CTRL_EVT_TRANSPORT_CLOSED:
+ s = session_get (evt->session_index, thread_index);
+ /* Notification enqueued before session was refused by app */
+ if (PREDICT_FALSE (s->app_wrk_index == APP_INVALID_INDEX))
+ break;
+ if (app->cb_fns.session_transport_closed_callback)
+ app->cb_fns.session_transport_closed_callback (s);
+ break;
+ case SESSION_CTRL_EVT_CLEANUP:
+ s = session_get (evt->as_u64[0] & 0xffffffff, thread_index);
+ /* Notification enqueued before session was refused by app */
+ if (PREDICT_TRUE (s->app_wrk_index != APP_INVALID_INDEX))
+ {
+ if (app->cb_fns.session_cleanup_callback)
+ app->cb_fns.session_cleanup_callback (s, evt->as_u64[0] >> 32);
+ }
+ if (evt->as_u64[0] >> 32 != SESSION_CLEANUP_SESSION)
+ break;
+ uword_to_pointer (evt->as_u64[1], void (*) (session_t * s)) (s);
+ break;
+ case SESSION_CTRL_EVT_HALF_CLEANUP:
+ s = ho_session_get (evt->session_index);
+ ASSERT (session_vlib_thread_is_cl_thread ());
+ if (app->cb_fns.half_open_cleanup_callback)
+ app->cb_fns.half_open_cleanup_callback (s);
+ pool_put_index (app_wrk->half_open_table, s->ho_index);
+ session_free (s);
+ break;
+ case SESSION_CTRL_EVT_APP_ADD_SEGMENT:
+ app->cb_fns.add_segment_callback (app_wrk->wrk_index,
+ evt->as_u64[1]);
+ break;
+ case SESSION_CTRL_EVT_APP_DEL_SEGMENT:
+ app->cb_fns.del_segment_callback (app_wrk->wrk_index,
+ evt->as_u64[1]);
+ break;
+ default:
+ clib_warning ("unexpected event: %u", evt->event_type);
+ ASSERT (0);
+ break;
+ }
+ clib_fifo_advance_head (app_wrk->wrk_evts[thread_index], 1);
+ }
+
+ if (!is_builtin)
+ {
+ svm_msg_q_unlock (mq);
+ if (mq_is_cong && i == n_evts)
+ app_worker_unset_wrk_mq_congested (app_wrk, thread_index);
+ }
+
+ return 0;
+}
+
+int
+app_wrk_flush_wrk_events (app_worker_t *app_wrk, u32 thread_index)
+{
+ if (app_worker_application_is_builtin (app_wrk))
+ return app_worker_flush_events_inline (app_wrk, thread_index,
+ 1 /* is_builtin */);
+ else
+ return app_worker_flush_events_inline (app_wrk, thread_index,
+ 0 /* is_builtin */);
+}
+
+static inline int
+session_wrk_flush_events (session_worker_t *wrk)
+{
+ app_worker_t *app_wrk;
+ uword app_wrk_index;
+ u32 thread_index;
+
+ thread_index = wrk->vm->thread_index;
+ app_wrk_index = clib_bitmap_first_set (wrk->app_wrks_pending_ntf);
+
+ while (app_wrk_index != ~0)
+ {
+ app_wrk = app_worker_get_if_valid (app_wrk_index);
+ /* app_wrk events are flushed on free, so should be valid here */
+ ASSERT (app_wrk != 0);
+ app_wrk_flush_wrk_events (app_wrk, thread_index);
+
+ if (!clib_fifo_elts (app_wrk->wrk_evts[thread_index]))
+ clib_bitmap_set (wrk->app_wrks_pending_ntf, app_wrk->wrk_index, 0);
+
+ app_wrk_index =
+ clib_bitmap_next_set (wrk->app_wrks_pending_ntf, app_wrk_index + 1);
+ }
+
+ if (!clib_bitmap_is_zero (wrk->app_wrks_pending_ntf))
+ vlib_node_set_interrupt_pending (wrk->vm, session_input_node.index);
+
+ return 0;
+}
+
+VLIB_NODE_FN (session_input_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ u32 thread_index = vm->thread_index;
+ session_worker_t *wrk;
+
+ wrk = session_main_get_worker (thread_index);
+ session_wrk_flush_events (wrk);
+
+ return 0;
+}
+
+VLIB_REGISTER_NODE (session_input_node) = {
+ .name = "session-input",
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_DISABLED,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */ \ No newline at end of file
diff --git a/src/vnet/session/session_lookup.c b/src/vnet/session/session_lookup.c
index 6e060cb119d..9d028dbb28c 100644
--- a/src/vnet/session/session_lookup.c
+++ b/src/vnet/session/session_lookup.c
@@ -29,13 +29,14 @@
#include <vnet/session/session.h>
#include <vnet/session/application.h>
+static session_lookup_main_t sl_main;
+
/**
* Network namespace index (i.e., fib index) to session lookup table. We
* should have one per network protocol type but for now we only support IP4/6
*/
static u32 *fib_index_to_table_index[2];
-/* *INDENT-OFF* */
/* 16 octets */
typedef CLIB_PACKED (struct {
union
@@ -72,7 +73,6 @@ typedef CLIB_PACKED (struct {
u64 as_u64[6];
};
}) v6_connection_key_t;
-/* *INDENT-ON* */
typedef clib_bihash_kv_16_8_t session_kv4_t;
typedef clib_bihash_kv_48_8_t session_kv6_t;
@@ -155,29 +155,70 @@ make_v6_ss_kv_from_tc (session_kv6_t * kv, transport_connection_t * tc)
tc->rmt_port, tc->proto);
}
+static inline u8
+session_table_alloc_needs_sync (void)
+{
+ return !vlib_thread_is_main_w_barrier () && (vlib_num_workers () > 1);
+}
+
+static_always_inline u8
+session_table_is_alloced (u8 fib_proto, u32 fib_index)
+{
+ return (vec_len (fib_index_to_table_index[fib_proto]) > fib_index &&
+ fib_index_to_table_index[fib_proto][fib_index] != ~0);
+}
+
static session_table_t *
session_table_get_or_alloc (u8 fib_proto, u32 fib_index)
{
session_table_t *st;
u32 table_index;
+
ASSERT (fib_index != ~0);
- if (vec_len (fib_index_to_table_index[fib_proto]) > fib_index &&
- fib_index_to_table_index[fib_proto][fib_index] != ~0)
+
+ if (session_table_is_alloced (fib_proto, fib_index))
{
table_index = fib_index_to_table_index[fib_proto][fib_index];
return session_table_get (table_index);
}
+
+ u8 needs_sync = session_table_alloc_needs_sync ();
+ session_lookup_main_t *slm = &sl_main;
+
+ /* Stop workers, otherwise consumers might be affected. This is
+ * acceptable because new tables should seldom be allocated */
+ if (needs_sync)
+ {
+ vlib_workers_sync ();
+
+ /* We might have a race, only one worker allowed at once */
+ clib_spinlock_lock (&slm->st_alloc_lock);
+ }
+
+ /* Another worker just allocated this table */
+ if (session_table_is_alloced (fib_proto, fib_index))
+ {
+ table_index = fib_index_to_table_index[fib_proto][fib_index];
+ st = session_table_get (table_index);
+ }
else
{
st = session_table_alloc ();
- table_index = session_table_index (st);
+ st->active_fib_proto = fib_proto;
+ session_table_init (st, fib_proto);
vec_validate_init_empty (fib_index_to_table_index[fib_proto], fib_index,
~0);
+ table_index = session_table_index (st);
fib_index_to_table_index[fib_proto][fib_index] = table_index;
- st->active_fib_proto = fib_proto;
- session_table_init (st, fib_proto);
- return st;
}
+
+ if (needs_sync)
+ {
+ clib_spinlock_unlock (&slm->st_alloc_lock);
+ vlib_workers_continue ();
+ }
+
+ return st;
}
static session_table_t *
@@ -1046,9 +1087,7 @@ session_lookup_connection4 (u32 fib_index, ip4_address_t * lcl,
/**
* Lookup session with ip4 and transport layer information
*
- * Important note: this may look into another thread's pool table and
- * register as 'peeker'. Caller should call @ref session_pool_remove_peeker as
- * if needed as soon as possible.
+ * Important note: this may look into another thread's pool table
*
* Lookup logic is similar to that of @ref session_lookup_connection_wt4 but
* this returns a session as opposed to a transport connection and it does not
@@ -1313,8 +1352,8 @@ session_lookup_connection (u32 fib_index, ip46_address_t * lcl,
lcl_port, rmt_port, proto);
}
-int
-vnet_session_rule_add_del (session_rule_add_del_args_t * args)
+session_error_t
+vnet_session_rule_add_del (session_rule_add_del_args_t *args)
{
app_namespace_t *app_ns = app_namespace_get (args->appns_index);
session_rules_table_t *srt;
@@ -1324,14 +1363,14 @@ vnet_session_rule_add_del (session_rule_add_del_args_t * args)
int rv = 0;
if (!app_ns)
- return VNET_API_ERROR_APP_INVALID_NS;
+ return SESSION_E_INVALID_NS;
if (args->scope > 3)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if (args->transport_proto != TRANSPORT_PROTO_TCP
&& args->transport_proto != TRANSPORT_PROTO_UDP)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if ((args->scope & SESSION_RULE_SCOPE_GLOBAL) || args->scope == 0)
{
@@ -1452,6 +1491,7 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_command_t * cmd)
{
u32 proto = ~0, lcl_port, rmt_port, action = 0, lcl_plen = 0, rmt_plen = 0;
+ clib_error_t *error = 0;
u32 appns_index, scope = 0;
ip46_address_t lcl_ip, rmt_ip;
u8 is_ip4 = 1, conn_set = 0;
@@ -1501,29 +1541,32 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input,
else if (unformat (input, "tag %_%v%_", &tag))
;
else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
}
if (proto == ~0)
{
vlib_cli_output (vm, "proto must be set");
- return 0;
+ goto done;
}
if (is_add && !conn_set && action == ~0)
{
vlib_cli_output (vm, "connection and action must be set for add");
- return 0;
+ goto done;
}
if (!is_add && !tag && !conn_set)
{
vlib_cli_output (vm, "connection or tag must be set for delete");
- return 0;
+ goto done;
}
if (vec_len (tag) > SESSION_RULE_TAG_MAX_LEN)
{
vlib_cli_output (vm, "tag too long (max u64)");
- return 0;
+ goto done;
}
if (ns_id)
@@ -1532,7 +1575,7 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (!app_ns)
{
vlib_cli_output (vm, "namespace %v does not exist", ns_id);
- return 0;
+ goto done;
}
}
else
@@ -1559,13 +1602,14 @@ session_rule_command_fn (vlib_main_t * vm, unformat_input_t * input,
.scope = scope,
};
if ((rv = vnet_session_rule_add_del (&args)))
- return clib_error_return (0, "rule add del returned %u", rv);
+ error = clib_error_return (0, "rule add del returned %u", rv);
+done:
+ vec_free (ns_id);
vec_free (tag);
- return 0;
+ return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (session_rule_command, static) =
{
.path = "session rule",
@@ -1573,7 +1617,6 @@ VLIB_CLI_COMMAND (session_rule_command, static) =
"<lcl-ip/plen> <lcl-port> <rmt-ip/plen> <rmt-port> action <action>",
.function = session_rule_command_fn,
};
-/* *INDENT-ON* */
void
session_lookup_dump_rules_table (u32 fib_index, u8 fib_proto,
@@ -1696,7 +1739,6 @@ show_session_rules_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_session_rules_command, static) =
{
.path = "show session rules",
@@ -1704,11 +1746,93 @@ VLIB_CLI_COMMAND (show_session_rules_command, static) =
"<lcl-port> <rmt-ip/plen> <rmt-port> scope <scope>]",
.function = show_session_rules_command_fn,
};
-/* *INDENT-ON* */
+
+u8 *
+format_session_lookup_tables (u8 *s, va_list *args)
+{
+ u32 fib_proto = va_arg (*args, u32);
+ u32 *fibs, num_fibs = 0, fib_index, indent;
+ session_table_t *st;
+ u64 total_mem = 0;
+
+ fibs = fib_index_to_table_index[fib_proto];
+
+ for (fib_index = 0; fib_index < vec_len (fibs); fib_index++)
+ {
+ if (fibs[fib_index] == ~0)
+ continue;
+
+ num_fibs += 1;
+ st = session_table_get (fibs[fib_index]);
+ total_mem += session_table_memory_size (st);
+ }
+
+ indent = format_get_indent (s);
+ s = format (s, "active fibs:\t%u\n", num_fibs);
+ s = format (s, "%Umax fib-index:\t%u\n", format_white_space, indent,
+ vec_len (fibs) - 1);
+ s = format (s, "%Utable memory:\t%U\n", format_white_space, indent,
+ format_memory_size, total_mem);
+ s = format (s, "%Uvec memory:\t%U\n", format_white_space, indent,
+ format_memory_size, vec_mem_size (fibs));
+
+ return s;
+}
+
+static clib_error_t *
+show_session_lookup_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ session_table_t *st;
+ u32 fib_index = ~0;
+
+ session_cli_return_if_not_enabled ();
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "table %u", &fib_index))
+ ;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (fib_index != ~0)
+ {
+ st = session_table_get_for_fib_index (FIB_PROTOCOL_IP4, fib_index);
+ if (st)
+ vlib_cli_output (vm, "%U", format_session_table, st);
+ else
+ vlib_cli_output (vm, "no ip4 table for fib-index %u", fib_index);
+ st = session_table_get_for_fib_index (FIB_PROTOCOL_IP6, fib_index);
+ if (st)
+ vlib_cli_output (vm, "%U", format_session_table, st);
+ else
+ vlib_cli_output (vm, "no ip6 table for fib-index %u", fib_index);
+ goto done;
+ }
+
+ vlib_cli_output (vm, "ip4 fib lookup tables:\n %U",
+ format_session_lookup_tables, FIB_PROTOCOL_IP4);
+ vlib_cli_output (vm, "ip6 fib lookup tables:\n %U",
+ format_session_lookup_tables, FIB_PROTOCOL_IP6);
+
+done:
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_session_lookup_command, static) = {
+ .path = "show session lookup",
+ .short_help = "show session lookup [table <fib-index>]",
+ .function = show_session_lookup_command_fn,
+};
void
session_lookup_init (void)
{
+ session_lookup_main_t *slm = &sl_main;
+
+ clib_spinlock_init (&slm->st_alloc_lock);
+
/*
* Allocate default table and map it to fib_index 0
*/
diff --git a/src/vnet/session/session_lookup.h b/src/vnet/session/session_lookup.h
index c1037dff8c9..f9ffc15165a 100644
--- a/src/vnet/session/session_lookup.h
+++ b/src/vnet/session/session_lookup.h
@@ -29,6 +29,11 @@ typedef enum session_lookup_result_
SESSION_LOOKUP_RESULT_FILTERED
} session_lookup_result_t;
+typedef struct session_lookup_main_
+{
+ clib_spinlock_t st_alloc_lock;
+} session_lookup_main_t;
+
session_t *session_lookup_safe4 (u32 fib_index, ip4_address_t * lcl,
ip4_address_t * rmt, u16 lcl_port,
u16 rmt_port, u8 proto);
@@ -130,7 +135,7 @@ typedef struct _session_rule_add_del_args
u8 transport_proto;
} session_rule_add_del_args_t;
-int vnet_session_rule_add_del (session_rule_add_del_args_t * args);
+session_error_t vnet_session_rule_add_del (session_rule_add_del_args_t *args);
void session_lookup_set_tables_appns (app_namespace_t * app_ns);
void session_lookup_init (void);
diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c
index b8b5ce2d8de..0ec158fb429 100644
--- a/src/vnet/session/session_node.c
+++ b/src/vnet/session/session_node.c
@@ -26,12 +26,28 @@
#include <svm/queue.h>
#include <sys/timerfd.h>
-#define app_check_thread_and_barrier(_fn, _arg) \
- if (!vlib_thread_is_main_w_barrier ()) \
- { \
- vlib_rpc_call_main_thread (_fn, (u8 *) _arg, sizeof(*_arg)); \
- return; \
- }
+static inline void
+session_wrk_send_evt_to_main (session_worker_t *wrk, session_evt_elt_t *elt)
+{
+ session_evt_elt_t *he;
+ uword thread_index;
+ u8 is_empty;
+
+ thread_index = wrk->vm->thread_index;
+ he = clib_llist_elt (wrk->event_elts, wrk->evts_pending_main);
+ is_empty = clib_llist_is_empty (wrk->event_elts, evt_list, he);
+ clib_llist_add_tail (wrk->event_elts, evt_list, elt, he);
+ if (is_empty)
+ session_send_rpc_evt_to_thread (0, session_wrk_handle_evts_main_rpc,
+ uword_to_pointer (thread_index, void *));
+}
+
+#define app_check_thread_and_barrier(_wrk, _elt) \
+ if (!vlib_thread_is_main_w_barrier ()) \
+ { \
+ session_wrk_send_evt_to_main (wrk, elt); \
+ return; \
+ }
static void
session_wrk_timerfd_update (session_worker_t *wrk, u64 time_ns)
@@ -93,16 +109,17 @@ session_mq_free_ext_config (application_t *app, uword offset)
}
static void
-session_mq_listen_handler (void *data)
+session_mq_listen_handler (session_worker_t *wrk, session_evt_elt_t *elt)
{
- session_listen_msg_t *mp = (session_listen_msg_t *) data;
vnet_listen_args_t _a, *a = &_a;
+ session_listen_msg_t *mp;
app_worker_t *app_wrk;
application_t *app;
int rv;
- app_check_thread_and_barrier (session_mq_listen_handler, mp);
+ app_check_thread_and_barrier (wrk, elt);
+ mp = session_evt_ctrl_data (wrk, elt);
app = application_lookup (mp->client_index);
if (!app)
return;
@@ -122,26 +139,31 @@ session_mq_listen_handler (void *data)
a->sep_ext.ext_cfg = session_mq_get_ext_config (app, mp->ext_config);
if ((rv = vnet_listen (a)))
- clib_warning ("listen returned: %U", format_session_error, rv);
+ session_worker_stat_error_inc (wrk, rv, 1);
app_wrk = application_get_worker (app, mp->wrk_index);
- mq_send_session_bound_cb (app_wrk->wrk_index, mp->context, a->handle, rv);
+ app_worker_listened_notify (app_wrk, a->handle, mp->context, rv);
if (mp->ext_config)
session_mq_free_ext_config (app, mp->ext_config);
+
+ /* Make sure events are flushed before releasing barrier, to avoid
+ * potential race with accept. */
+ app_wrk_flush_wrk_events (app_wrk, 0);
}
static void
-session_mq_listen_uri_handler (void *data)
+session_mq_listen_uri_handler (session_worker_t *wrk, session_evt_elt_t *elt)
{
- session_listen_uri_msg_t *mp = (session_listen_uri_msg_t *) data;
vnet_listen_args_t _a, *a = &_a;
+ session_listen_uri_msg_t *mp;
app_worker_t *app_wrk;
application_t *app;
int rv;
- app_check_thread_and_barrier (session_mq_listen_uri_handler, mp);
+ app_check_thread_and_barrier (wrk, elt);
+ mp = session_evt_ctrl_data (wrk, elt);
app = application_lookup (mp->client_index);
if (!app)
return;
@@ -152,7 +174,8 @@ session_mq_listen_uri_handler (void *data)
rv = vnet_bind_uri (a);
app_wrk = application_get_worker (app, 0);
- mq_send_session_bound_cb (app_wrk->wrk_index, mp->context, a->handle, rv);
+ app_worker_listened_notify (app_wrk, a->handle, mp->context, rv);
+ app_wrk_flush_wrk_events (app_wrk, 0);
}
static void
@@ -160,6 +183,7 @@ session_mq_connect_one (session_connect_msg_t *mp)
{
vnet_connect_args_t _a, *a = &_a;
app_worker_t *app_wrk;
+ session_worker_t *wrk;
application_t *app;
int rv;
@@ -173,6 +197,7 @@ session_mq_connect_one (session_connect_msg_t *mp)
a->sep.port = mp->port;
a->sep.transport_proto = mp->proto;
a->sep.peer.fib_index = mp->vrf;
+ a->sep.dscp = mp->dscp;
clib_memcpy_fast (&a->sep.peer.ip, &mp->lcl_ip, sizeof (mp->lcl_ip));
if (mp->is_ip4)
{
@@ -192,9 +217,10 @@ session_mq_connect_one (session_connect_msg_t *mp)
if ((rv = vnet_connect (a)))
{
- clib_warning ("connect returned: %U", format_session_error, rv);
+ wrk = session_main_get_worker (vlib_get_thread_index ());
+ session_worker_stat_error_inc (wrk, rv, 1);
app_wrk = application_get_worker (app, mp->wrk_index);
- mq_send_session_connected_cb (app_wrk->wrk_index, mp->context, 0, rv);
+ app_worker_connect_notify (app_wrk, 0, rv, mp->context);
}
if (mp->ext_config)
@@ -205,23 +231,20 @@ static void
session_mq_handle_connects_rpc (void *arg)
{
u32 max_connects = 32, n_connects = 0;
- vlib_main_t *vm = vlib_get_main ();
session_evt_elt_t *he, *elt, *next;
- session_worker_t *fwrk, *wrk;
+ session_worker_t *fwrk;
- ASSERT (vlib_get_thread_index () == 0);
+ ASSERT (session_vlib_thread_is_cl_thread ());
/* Pending connects on linked list pertaining to first worker */
- fwrk = session_main_get_worker (1);
+ fwrk = session_main_get_worker (transport_cl_thread ());
if (!fwrk->n_pending_connects)
- goto update_state;
-
- vlib_worker_thread_barrier_sync (vm);
+ return;
he = clib_llist_elt (fwrk->event_elts, fwrk->pending_connects);
elt = clib_llist_next (fwrk->event_elts, evt_list, he);
- /* Avoid holding the barrier for too long */
+ /* Avoid holding the worker for too long */
while (n_connects < max_connects && elt != he)
{
next = clib_llist_next (fwrk->event_elts, evt_list, elt);
@@ -235,45 +258,10 @@ session_mq_handle_connects_rpc (void *arg)
/* Decrement with worker barrier */
fwrk->n_pending_connects -= n_connects;
-
- vlib_worker_thread_barrier_release (vm);
-
-update_state:
-
- /* Switch worker to poll mode if it was in interrupt mode and had work or
- * back to interrupt if threshold of loops without a connect is passed.
- * While in poll mode, reprogram connects rpc */
- wrk = session_main_get_worker (0);
- if (wrk->state != SESSION_WRK_POLLING)
+ if (fwrk->n_pending_connects > 0)
{
- if (n_connects)
- {
- session_wrk_set_state (wrk, SESSION_WRK_POLLING);
- vlib_node_set_state (vm, session_queue_node.index,
- VLIB_NODE_STATE_POLLING);
- wrk->no_connect_loops = 0;
- }
- }
- else
- {
- if (!n_connects)
- {
- if (++wrk->no_connect_loops > 1e5)
- {
- session_wrk_set_state (wrk, SESSION_WRK_INTERRUPT);
- vlib_node_set_state (vm, session_queue_node.index,
- VLIB_NODE_STATE_INTERRUPT);
- }
- }
- else
- wrk->no_connect_loops = 0;
- }
-
- if (wrk->state == SESSION_WRK_POLLING)
- {
- elt = session_evt_alloc_ctrl (wrk);
- elt->evt.event_type = SESSION_CTRL_EVT_RPC;
- elt->evt.rpc_args.fp = session_mq_handle_connects_rpc;
+ session_send_rpc_evt_to_thread_force (fwrk->vm->thread_index,
+ session_mq_handle_connects_rpc, 0);
}
}
@@ -283,20 +271,28 @@ session_mq_connect_handler (session_worker_t *wrk, session_evt_elt_t *elt)
u32 thread_index = wrk - session_main.wrk;
session_evt_elt_t *he;
- /* No workers, so just deal with the connect now */
- if (PREDICT_FALSE (!thread_index))
+ if (PREDICT_FALSE (thread_index > transport_cl_thread ()))
{
- session_mq_connect_one (session_evt_ctrl_data (wrk, elt));
+ clib_warning ("Connect on wrong thread. Dropping");
return;
}
- if (PREDICT_FALSE (thread_index != 1))
+ /* If on worker, check if main has any pending messages. Avoids reordering
+ * with other control messages that need to be handled by main
+ */
+ if (thread_index)
{
- clib_warning ("Connect on wrong thread. Dropping");
- return;
+ he = clib_llist_elt (wrk->event_elts, wrk->evts_pending_main);
+
+ /* Events pending on main, postpone to avoid reordering */
+ if (!clib_llist_is_empty (wrk->event_elts, evt_list, he))
+ {
+ clib_llist_add_tail (wrk->event_elts, evt_list, elt, he);
+ return;
+ }
}
- /* Add to pending list to be handled by main thread */
+ /* Add to pending list to be handled by first worker */
he = clib_llist_elt (wrk->event_elts, wrk->pending_connects);
clib_llist_add_tail (wrk->event_elts, evt_list, elt, he);
@@ -304,23 +300,23 @@ session_mq_connect_handler (session_worker_t *wrk, session_evt_elt_t *elt)
wrk->n_pending_connects += 1;
if (wrk->n_pending_connects == 1)
{
- vlib_node_set_interrupt_pending (vlib_get_main_by_index (0),
- session_queue_node.index);
- session_send_rpc_evt_to_thread (0, session_mq_handle_connects_rpc, 0);
+ session_send_rpc_evt_to_thread_force (thread_index,
+ session_mq_handle_connects_rpc, 0);
}
}
static void
-session_mq_connect_uri_handler (void *data)
+session_mq_connect_uri_handler (session_worker_t *wrk, session_evt_elt_t *elt)
{
- session_connect_uri_msg_t *mp = (session_connect_uri_msg_t *) data;
vnet_connect_args_t _a, *a = &_a;
+ session_connect_uri_msg_t *mp;
app_worker_t *app_wrk;
application_t *app;
int rv;
- app_check_thread_and_barrier (session_mq_connect_uri_handler, mp);
+ app_check_thread_and_barrier (wrk, elt);
+ mp = session_evt_ctrl_data (wrk, elt);
app = application_lookup (mp->client_index);
if (!app)
return;
@@ -331,9 +327,9 @@ session_mq_connect_uri_handler (void *data)
a->app_index = app->app_index;
if ((rv = vnet_connect_uri (a)))
{
- clib_warning ("connect_uri returned: %d", rv);
+ session_worker_stat_error_inc (wrk, rv, 1);
app_wrk = application_get_worker (app, 0 /* default wrk only */ );
- mq_send_session_connected_cb (app_wrk->wrk_index, mp->context, 0, rv);
+ app_worker_connect_notify (app_wrk, 0, rv, mp->context);
}
}
@@ -370,14 +366,15 @@ session_mq_disconnect_handler (void *data)
}
static void
-app_mq_detach_handler (void *data)
+app_mq_detach_handler (session_worker_t *wrk, session_evt_elt_t *elt)
{
- session_app_detach_msg_t *mp = (session_app_detach_msg_t *) data;
vnet_app_detach_args_t _a, *a = &_a;
+ session_app_detach_msg_t *mp;
application_t *app;
- app_check_thread_and_barrier (app_mq_detach_handler, mp);
+ app_check_thread_and_barrier (wrk, elt);
+ mp = session_evt_ctrl_data (wrk, elt);
app = application_lookup (mp->client_index);
if (!app)
return;
@@ -388,18 +385,19 @@ app_mq_detach_handler (void *data)
}
static void
-session_mq_unlisten_rpc (session_unlisten_msg_t *mp)
+session_mq_unlisten_handler (session_worker_t *wrk, session_evt_elt_t *elt)
{
- vlib_main_t *vm = vlib_get_main ();
vnet_unlisten_args_t _a, *a = &_a;
+ session_unlisten_msg_t *mp;
app_worker_t *app_wrk;
session_handle_t sh;
application_t *app;
- u32 context;
int rv;
+ app_check_thread_and_barrier (wrk, elt);
+
+ mp = session_evt_ctrl_data (wrk, elt);
sh = mp->handle;
- context = mp->context;
app = application_lookup (mp->client_index);
if (!app)
@@ -410,65 +408,34 @@ session_mq_unlisten_rpc (session_unlisten_msg_t *mp)
a->handle = sh;
a->wrk_map_index = mp->wrk_index;
- vlib_worker_thread_barrier_sync (vm);
-
if ((rv = vnet_unlisten (a)))
- clib_warning ("unlisten returned: %d", rv);
-
- vlib_worker_thread_barrier_release (vm);
+ session_worker_stat_error_inc (wrk, rv, 1);
app_wrk = application_get_worker (app, a->wrk_map_index);
if (!app_wrk)
return;
- mq_send_unlisten_reply (app_wrk, sh, context, rv);
- clib_mem_free (mp);
-}
-
-static void
-session_mq_unlisten_handler (session_worker_t *wrk, session_evt_elt_t *elt)
-{
- u32 thread_index = wrk - session_main.wrk;
- session_unlisten_msg_t *mp, *arg;
-
- mp = session_evt_ctrl_data (wrk, elt);
- arg = clib_mem_alloc (sizeof (session_unlisten_msg_t));
- clib_memcpy_fast (arg, mp, sizeof (*arg));
-
- if (PREDICT_FALSE (!thread_index))
- {
- session_mq_unlisten_rpc (arg);
- return;
- }
-
- session_send_rpc_evt_to_thread_force (0, session_mq_unlisten_rpc, arg);
+ app_worker_unlisten_reply (app_wrk, sh, mp->context, rv);
}
static void
-session_mq_accepted_reply_handler (void *data)
+session_mq_accepted_reply_handler (session_worker_t *wrk,
+ session_evt_elt_t *elt)
{
- session_accepted_reply_msg_t *mp = (session_accepted_reply_msg_t *) data;
vnet_disconnect_args_t _a = { 0 }, *a = &_a;
+ session_accepted_reply_msg_t *mp;
session_state_t old_state;
app_worker_t *app_wrk;
session_t *s;
- /* Server isn't interested, kill the session */
- if (mp->retval)
- {
- a->app_index = mp->context;
- a->handle = mp->handle;
- vnet_disconnect_session (a);
- return;
- }
+ mp = session_evt_ctrl_data (wrk, elt);
/* Mail this back from the main thread. We're not polling in main
* thread so we're using other workers for notifications. */
- if (vlib_num_workers () && vlib_get_thread_index () != 0
- && session_thread_from_handle (mp->handle) == 0)
+ if (session_thread_from_handle (mp->handle) == 0 && vlib_num_workers () &&
+ vlib_get_thread_index () != 0)
{
- vlib_rpc_call_main_thread (session_mq_accepted_reply_handler,
- (u8 *) mp, sizeof (*mp));
+ session_wrk_send_evt_to_main (wrk, elt);
return;
}
@@ -483,27 +450,36 @@ session_mq_accepted_reply_handler (void *data)
return;
}
- if (!session_has_transport (s))
+ /* Server isn't interested, disconnect the session */
+ if (mp->retval)
{
- s->session_state = SESSION_STATE_READY;
- if (ct_session_connect_notify (s, SESSION_E_NONE))
- return;
+ a->app_index = mp->context;
+ a->handle = mp->handle;
+ vnet_disconnect_session (a);
+ s->app_wrk_index = SESSION_INVALID_INDEX;
+ return;
}
- else
+
+ /* Special handling for cut-through sessions */
+ if (!session_has_transport (s))
{
- old_state = s->session_state;
- s->session_state = SESSION_STATE_READY;
+ session_set_state (s, SESSION_STATE_READY);
+ ct_session_connect_notify (s, SESSION_E_NONE);
+ return;
+ }
- if (!svm_fifo_is_empty_prod (s->rx_fifo))
- app_worker_lock_and_send_event (app_wrk, s, SESSION_IO_EVT_RX);
+ old_state = s->session_state;
+ session_set_state (s, SESSION_STATE_READY);
- /* Closed while waiting for app to reply. Resend disconnect */
- if (old_state >= SESSION_STATE_TRANSPORT_CLOSING)
- {
- app_worker_close_notify (app_wrk, s);
- s->session_state = old_state;
- return;
- }
+ if (!svm_fifo_is_empty_prod (s->rx_fifo))
+ app_worker_rx_notify (app_wrk, s);
+
+ /* Closed while waiting for app to reply. Resend disconnect */
+ if (old_state >= SESSION_STATE_TRANSPORT_CLOSING)
+ {
+ app_worker_close_notify (app_wrk, s);
+ session_set_state (s, old_state);
+ return;
}
}
@@ -515,15 +491,13 @@ session_mq_reset_reply_handler (void *data)
app_worker_t *app_wrk;
session_t *s;
application_t *app;
- u32 index, thread_index;
mp = (session_reset_reply_msg_t *) data;
app = application_lookup (mp->context);
if (!app)
return;
- session_parse_handle (mp->handle, &index, &thread_index);
- s = session_get_if_valid (index, thread_index);
+ s = session_get_from_handle_if_valid (mp->handle);
/* No session or not the right session */
if (!s || s->session_state < SESSION_STATE_TRANSPORT_CLOSING)
@@ -633,6 +607,7 @@ session_mq_worker_update_handler (void *data)
session_event_t *evt;
session_t *s;
application_t *app;
+ int rv;
app = application_lookup (mp->client_index);
if (!app)
@@ -669,7 +644,9 @@ session_mq_worker_update_handler (void *data)
return;
}
- app_worker_own_session (app_wrk, s);
+ rv = app_worker_own_session (app_wrk, s);
+ if (rv)
+ session_stat_error_inc (rv, 1);
/*
* Send reply
@@ -696,7 +673,7 @@ session_mq_worker_update_handler (void *data)
session_send_io_evt_to_thread (s->tx_fifo, SESSION_IO_EVT_TX);
if (s->rx_fifo && !svm_fifo_is_empty (s->rx_fifo))
- app_worker_lock_and_send_event (app_wrk, s, SESSION_IO_EVT_RX);
+ app_worker_rx_notify (app_wrk, s);
if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSING)
app_worker_close_notify (app_wrk, s);
@@ -774,6 +751,67 @@ session_mq_transport_attr_handler (void *data)
svm_msg_q_add_and_unlock (app_wrk->event_queue, msg);
}
+void
+session_wrk_handle_evts_main_rpc (void *args)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ clib_llist_index_t ei, next_ei;
+ session_evt_elt_t *he, *elt;
+ session_worker_t *fwrk;
+ u32 thread_index;
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ thread_index = pointer_to_uword (args);
+ fwrk = session_main_get_worker (thread_index);
+
+ he = clib_llist_elt (fwrk->event_elts, fwrk->evts_pending_main);
+ ei = clib_llist_next_index (he, evt_list);
+
+ while (ei != fwrk->evts_pending_main)
+ {
+ elt = clib_llist_elt (fwrk->event_elts, ei);
+ next_ei = clib_llist_next_index (elt, evt_list);
+ clib_llist_remove (fwrk->event_elts, evt_list, elt);
+ switch (elt->evt.event_type)
+ {
+ case SESSION_CTRL_EVT_LISTEN:
+ session_mq_listen_handler (fwrk, elt);
+ break;
+ case SESSION_CTRL_EVT_UNLISTEN:
+ session_mq_unlisten_handler (fwrk, elt);
+ break;
+ case SESSION_CTRL_EVT_APP_DETACH:
+ app_mq_detach_handler (fwrk, elt);
+ break;
+ case SESSION_CTRL_EVT_CONNECT_URI:
+ session_mq_connect_uri_handler (fwrk, elt);
+ break;
+ case SESSION_CTRL_EVT_ACCEPTED_REPLY:
+ session_mq_accepted_reply_handler (fwrk, elt);
+ break;
+ case SESSION_CTRL_EVT_CONNECT:
+ session_mq_connect_handler (fwrk, elt);
+ break;
+ default:
+ clib_warning ("unhandled %u", elt->evt.event_type);
+ ALWAYS_ASSERT (0);
+ break;
+ }
+
+ /* Regrab element in case pool moved */
+ elt = clib_llist_elt (fwrk->event_elts, ei);
+ if (!clib_llist_elt_is_linked (elt, evt_list))
+ {
+ session_evt_ctrl_data_free (fwrk, elt);
+ clib_llist_put (fwrk->event_elts, elt);
+ }
+ ei = next_ei;
+ }
+
+ vlib_worker_thread_barrier_release (vm);
+}
+
vlib_node_registration_t session_queue_node;
typedef struct
@@ -795,21 +833,21 @@ format_session_queue_trace (u8 * s, va_list * args)
return s;
}
-#define foreach_session_queue_error \
-_(TX, "Packets transmitted") \
-_(TIMER, "Timer events") \
-_(NO_BUFFER, "Out of buffers")
+#define foreach_session_queue_error \
+ _ (TX, tx, INFO, "Packets transmitted") \
+ _ (TIMER, timer, INFO, "Timer events") \
+ _ (NO_BUFFER, no_buffer, ERROR, "Out of buffers")
typedef enum
{
-#define _(sym,str) SESSION_QUEUE_ERROR_##sym,
+#define _(f, n, s, d) SESSION_QUEUE_ERROR_##f,
foreach_session_queue_error
#undef _
SESSION_QUEUE_N_ERROR,
} session_queue_error_t;
-static char *session_queue_error_strings[] = {
-#define _(sym,string) string,
+static vlib_error_desc_t session_error_counters[] = {
+#define _(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
foreach_session_queue_error
#undef _
};
@@ -822,36 +860,134 @@ enum
};
static void
-session_tx_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
- u32 next_index, u32 * to_next, u16 n_segs,
- session_t * s, u32 n_trace)
+session_tx_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
+ u32 next_index, vlib_buffer_t **bufs, u16 n_segs,
+ session_t *s, u32 n_trace)
{
+ vlib_buffer_t **b = bufs;
+
while (n_trace && n_segs)
{
- vlib_buffer_t *b = vlib_get_buffer (vm, to_next[0]);
- if (PREDICT_TRUE
- (vlib_trace_buffer
- (vm, node, next_index, b, 1 /* follow_chain */ )))
+ if (PREDICT_TRUE (vlib_trace_buffer (vm, node, next_index, b[0],
+ 1 /* follow_chain */)))
{
session_queue_trace_t *t =
- vlib_add_trace (vm, node, b, sizeof (*t));
+ vlib_add_trace (vm, node, b[0], sizeof (*t));
t->session_index = s->session_index;
t->server_thread_index = s->thread_index;
n_trace--;
}
- to_next++;
+ b++;
n_segs--;
}
vlib_set_trace_count (vm, node, n_trace);
}
+always_inline int
+session_tx_fill_dma_transfers (session_worker_t *wrk,
+ session_tx_context_t *ctx, vlib_buffer_t *b)
+{
+ vlib_main_t *vm = wrk->vm;
+ u32 len_to_deq;
+ u8 *data0 = NULL;
+ int n_bytes_read, len_write;
+ svm_fifo_seg_t data_fs[2];
+
+ u32 n_segs = 2;
+ u16 n_transfers = 0;
+ /*
+ * Start with the first buffer in chain
+ */
+ b->error = 0;
+ b->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ b->current_data = 0;
+ data0 = vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN);
+ len_to_deq = clib_min (ctx->left_to_snd, ctx->deq_per_first_buf);
+
+ n_bytes_read = svm_fifo_segments (ctx->s->tx_fifo, ctx->sp.tx_offset,
+ data_fs, &n_segs, len_to_deq);
+
+ len_write = n_bytes_read;
+ ASSERT (n_bytes_read == len_to_deq);
+
+ while (n_bytes_read)
+ {
+ wrk->batch_num++;
+ vlib_dma_batch_add (vm, wrk->batch, data0, data_fs[n_transfers].data,
+ data_fs[n_transfers].len);
+ data0 += data_fs[n_transfers].len;
+ n_bytes_read -= data_fs[n_transfers].len;
+ n_transfers++;
+ }
+ return len_write;
+}
+
+always_inline int
+session_tx_fill_dma_transfers_tail (session_worker_t *wrk,
+ session_tx_context_t *ctx,
+ vlib_buffer_t *b, u32 len_to_deq, u8 *data)
+{
+ vlib_main_t *vm = wrk->vm;
+ int n_bytes_read, len_write;
+ svm_fifo_seg_t data_fs[2];
+ u32 n_segs = 2;
+ u16 n_transfers = 0;
+
+ n_bytes_read = svm_fifo_segments (ctx->s->tx_fifo, ctx->sp.tx_offset,
+ data_fs, &n_segs, len_to_deq);
+
+ len_write = n_bytes_read;
+
+ ASSERT (n_bytes_read == len_to_deq);
+
+ while (n_bytes_read)
+ {
+ wrk->batch_num++;
+ vlib_dma_batch_add (vm, wrk->batch, data, data_fs[n_transfers].data,
+ data_fs[n_transfers].len);
+ data += data_fs[n_transfers].len;
+ n_bytes_read -= data_fs[n_transfers].len;
+ n_transfers++;
+ }
+
+ return len_write;
+}
+
+always_inline int
+session_tx_copy_data (session_worker_t *wrk, session_tx_context_t *ctx,
+ vlib_buffer_t *b, u32 len_to_deq, u8 *data0)
+{
+ int n_bytes_read;
+ if (PREDICT_TRUE (!wrk->dma_enabled))
+ n_bytes_read =
+ svm_fifo_peek (ctx->s->tx_fifo, ctx->sp.tx_offset, len_to_deq, data0);
+ else
+ n_bytes_read = session_tx_fill_dma_transfers (wrk, ctx, b);
+ return n_bytes_read;
+}
+
+always_inline int
+session_tx_copy_data_tail (session_worker_t *wrk, session_tx_context_t *ctx,
+ vlib_buffer_t *b, u32 len_to_deq, u8 *data)
+{
+ int n_bytes_read;
+ if (PREDICT_TRUE (!wrk->dma_enabled))
+ n_bytes_read =
+ svm_fifo_peek (ctx->s->tx_fifo, ctx->sp.tx_offset, len_to_deq, data);
+ else
+ n_bytes_read =
+ session_tx_fill_dma_transfers_tail (wrk, ctx, b, len_to_deq, data);
+ return n_bytes_read;
+}
+
always_inline void
-session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx,
- vlib_buffer_t * b, u16 * n_bufs, u8 peek_data)
+session_tx_fifo_chain_tail (session_worker_t *wrk, session_tx_context_t *ctx,
+ vlib_buffer_t *b, u16 *n_bufs, u8 peek_data)
{
+ vlib_main_t *vm = wrk->vm;
vlib_buffer_t *chain_b, *prev_b;
u32 chain_bi0, to_deq, left_from_seg;
- u16 len_to_deq, n_bytes_read;
+ int len_to_deq, n_bytes_read;
u8 *data, j;
b->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
@@ -873,8 +1009,8 @@ session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx,
data = vlib_buffer_get_current (chain_b);
if (peek_data)
{
- n_bytes_read = svm_fifo_peek (ctx->s->tx_fifo,
- ctx->sp.tx_offset, len_to_deq, data);
+ n_bytes_read =
+ session_tx_copy_data_tail (wrk, ctx, b, len_to_deq, data);
ctx->sp.tx_offset += n_bytes_read;
}
else
@@ -931,13 +1067,12 @@ session_tx_fifo_chain_tail (vlib_main_t * vm, session_tx_context_t * ctx,
}
always_inline void
-session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx,
- vlib_buffer_t * b, u16 * n_bufs, u8 peek_data)
+session_tx_fill_buffer (session_worker_t *wrk, session_tx_context_t *ctx,
+ vlib_buffer_t *b, u16 *n_bufs, u8 peek_data)
{
u32 len_to_deq;
u8 *data0;
int n_bytes_read;
-
/*
* Start with the first buffer in chain
*/
@@ -950,8 +1085,7 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx,
if (peek_data)
{
- n_bytes_read = svm_fifo_peek (ctx->s->tx_fifo, ctx->sp.tx_offset,
- len_to_deq, data0);
+ n_bytes_read = session_tx_copy_data (wrk, ctx, b, len_to_deq, data0);
ASSERT (n_bytes_read > 0);
/* Keep track of progress locally, transport is also supposed to
* increment it independently when pushing the header */
@@ -973,10 +1107,10 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx,
n_bytes_read = svm_fifo_peek (f, offset, deq_now, data0);
ASSERT (n_bytes_read > 0);
- if (ctx->s->session_state == SESSION_STATE_LISTENING)
+ if (transport_connection_is_cless (ctx->tc))
{
- ip_copy (&ctx->tc->rmt_ip, &hdr->rmt_ip, ctx->tc->is_ip4);
- ctx->tc->rmt_port = hdr->rmt_port;
+ clib_memcpy_fast (data0 - sizeof (session_dgram_hdr_t), hdr,
+ sizeof (*hdr));
}
hdr->data_offset += n_bytes_read;
if (hdr->data_offset == hdr->data_length)
@@ -998,6 +1132,7 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx,
ASSERT (n_bytes_read > 0);
}
}
+
b->current_length = n_bytes_read;
ctx->left_to_snd -= n_bytes_read;
@@ -1005,7 +1140,7 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx,
* Fill in the remaining buffers in the chain, if any
*/
if (PREDICT_FALSE (ctx->n_bufs_per_seg > 1 && ctx->left_to_snd))
- session_tx_fifo_chain_tail (vm, ctx, b, n_bufs, peek_data);
+ session_tx_fifo_chain_tail (wrk, ctx, b, n_bufs, peek_data);
}
always_inline u8
@@ -1018,7 +1153,15 @@ session_tx_not_ready (session_t * s, u8 peek_data)
/* Can retransmit for closed sessions but can't send new data if
* session is not ready or closed */
else if (s->session_state < SESSION_STATE_READY)
- return 1;
+ {
+ /* Allow accepting session to send custom packets.
+ * For instance, tcp want to send acks in established, but
+ * the app has not called accept() yet */
+ if (s->session_state == SESSION_STATE_ACCEPTING &&
+ (s->flags & SESSION_F_CUSTOM_TX))
+ return 0;
+ return 1;
+ }
else if (s->session_state >= SESSION_STATE_TRANSPORT_CLOSED)
{
/* Allow closed transports to still send custom packets.
@@ -1029,6 +1172,11 @@ session_tx_not_ready (session_t * s, u8 peek_data)
return 2;
}
}
+ else
+ {
+ if (s->session_state == SESSION_STATE_TRANSPORT_DELETED)
+ return 2;
+ }
return 0;
}
@@ -1085,9 +1233,28 @@ session_tx_set_dequeue_params (vlib_main_t * vm, session_tx_context_t * ctx,
svm_fifo_peek (ctx->s->tx_fifo, 0, sizeof (ctx->hdr),
(u8 *) & ctx->hdr);
+ /* Zero length dgrams not supported */
+ if (PREDICT_FALSE (ctx->hdr.data_length == 0))
+ {
+ svm_fifo_dequeue_drop (ctx->s->tx_fifo, sizeof (ctx->hdr));
+ ctx->max_len_to_snd = 0;
+ return;
+ }
+ /* We cannot be sure apps have not enqueued incomplete dgrams */
+ if (PREDICT_FALSE (ctx->max_dequeue <
+ ctx->hdr.data_length + sizeof (ctx->hdr)))
+ {
+ ctx->max_len_to_snd = 0;
+ return;
+ }
ASSERT (ctx->hdr.data_length > ctx->hdr.data_offset);
len = ctx->hdr.data_length - ctx->hdr.data_offset;
+ if (ctx->hdr.gso_size)
+ {
+ ctx->sp.snd_mss = clib_min (ctx->sp.snd_mss, ctx->hdr.gso_size);
+ }
+
/* Process multiple dgrams if smaller than min (buf_space, mss).
* This avoids handling multiple dgrams if they require buffer
* chains */
@@ -1107,11 +1274,13 @@ session_tx_set_dequeue_params (vlib_main_t * vm, session_tx_context_t * ctx,
{
svm_fifo_peek (ctx->s->tx_fifo, offset, sizeof (ctx->hdr),
(u8 *) & hdr);
- ASSERT (hdr.data_length > hdr.data_offset);
dgram_len = hdr.data_length - hdr.data_offset;
- if (len + dgram_len > ctx->max_dequeue
- || first_dgram_len != dgram_len)
+ if (offset + sizeof (hdr) + hdr.data_length >
+ ctx->max_dequeue ||
+ first_dgram_len != dgram_len)
break;
+ /* Assert here to allow test above with zero length dgrams */
+ ASSERT (hdr.data_length > hdr.data_offset);
len += dgram_len;
offset += sizeof (hdr) + hdr.data_length;
}
@@ -1180,8 +1349,30 @@ session_tx_maybe_reschedule (session_worker_t * wrk,
svm_fifo_unset_event (s->tx_fifo);
if (svm_fifo_max_dequeue_cons (s->tx_fifo) > ctx->sp.tx_offset)
- if (svm_fifo_set_event (s->tx_fifo))
- session_evt_add_head_old (wrk, elt);
+ {
+ if (svm_fifo_set_event (s->tx_fifo))
+ session_evt_add_head_old (wrk, elt);
+ }
+ else
+ {
+ transport_connection_deschedule (ctx->tc);
+ }
+}
+
+always_inline void
+session_tx_add_pending_buffer (session_worker_t *wrk, u32 bi, u32 next_index)
+{
+ if (PREDICT_TRUE (!wrk->dma_enabled))
+ {
+ vec_add1 (wrk->pending_tx_buffers, bi);
+ vec_add1 (wrk->pending_tx_nexts, next_index);
+ }
+ else
+ {
+ session_dma_transfer *dma_transfer = &wrk->dma_trans[wrk->trans_tail];
+ vec_add1 (dma_transfer->pending_tx_buffers, bi);
+ vec_add1 (dma_transfer->pending_tx_nexts, next_index);
+ }
}
always_inline int
@@ -1227,9 +1418,12 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
ctx->sp.max_burst_size = max_burst;
n_custom_tx = ctx->transport_vft->custom_tx (ctx->tc, &ctx->sp);
*n_tx_packets += n_custom_tx;
- if (PREDICT_FALSE
- (ctx->s->session_state >= SESSION_STATE_TRANSPORT_CLOSED))
- return SESSION_TX_OK;
+ if (PREDICT_FALSE (ctx->s->session_state >=
+ SESSION_STATE_TRANSPORT_CLOSED))
+ {
+ svm_fifo_unset_event (ctx->s->tx_fifo);
+ return SESSION_TX_OK;
+ }
max_burst -= n_custom_tx;
if (!max_burst || (ctx->s->flags & SESSION_F_CUSTOM_TX))
{
@@ -1238,6 +1432,11 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
}
}
+ /* Connection previously descheduled because it had no data to send.
+ * Clear descheduled flag and reset pacer if in use */
+ if (transport_connection_is_descheduled (ctx->tc))
+ transport_connection_clear_descheduled (ctx->tc);
+
transport_connection_snd_params (ctx->tc, &ctx->sp);
if (!ctx->sp.snd_space)
@@ -1300,6 +1499,8 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
ctx->left_to_snd = ctx->max_len_to_snd;
n_left = ctx->n_segs_per_evt;
+ vec_validate (ctx->transport_pending_bufs, n_left);
+
while (n_left >= 4)
{
vlib_buffer_t *b0, *b1;
@@ -1318,18 +1519,15 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
b0 = vlib_get_buffer (vm, bi0);
b1 = vlib_get_buffer (vm, bi1);
- session_tx_fill_buffer (vm, ctx, b0, &n_bufs, peek_data);
- session_tx_fill_buffer (vm, ctx, b1, &n_bufs, peek_data);
-
- ctx->transport_vft->push_header (ctx->tc, b0);
- ctx->transport_vft->push_header (ctx->tc, b1);
+ session_tx_fill_buffer (wrk, ctx, b0, &n_bufs, peek_data);
+ session_tx_fill_buffer (wrk, ctx, b1, &n_bufs, peek_data);
+ ctx->transport_pending_bufs[ctx->n_segs_per_evt - n_left] = b0;
+ ctx->transport_pending_bufs[ctx->n_segs_per_evt - n_left + 1] = b1;
n_left -= 2;
- vec_add1 (wrk->pending_tx_buffers, bi0);
- vec_add1 (wrk->pending_tx_buffers, bi1);
- vec_add1 (wrk->pending_tx_nexts, next_index);
- vec_add1 (wrk->pending_tx_nexts, next_index);
+ session_tx_add_pending_buffer (wrk, bi0, next_index);
+ session_tx_add_pending_buffer (wrk, bi1, next_index);
}
while (n_left)
{
@@ -1345,20 +1543,20 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
bi0 = ctx->tx_buffers[--n_bufs];
b0 = vlib_get_buffer (vm, bi0);
- session_tx_fill_buffer (vm, ctx, b0, &n_bufs, peek_data);
-
- /* Ask transport to push header after current_length and
- * total_length_not_including_first_buffer are updated */
- ctx->transport_vft->push_header (ctx->tc, b0);
+ session_tx_fill_buffer (wrk, ctx, b0, &n_bufs, peek_data);
+ ctx->transport_pending_bufs[ctx->n_segs_per_evt - n_left] = b0;
n_left -= 1;
- vec_add1 (wrk->pending_tx_buffers, bi0);
- vec_add1 (wrk->pending_tx_nexts, next_index);
+ session_tx_add_pending_buffer (wrk, bi0, next_index);
}
+ /* Ask transport to push headers */
+ ctx->transport_vft->push_header (ctx->tc, ctx->transport_pending_bufs,
+ ctx->n_segs_per_evt);
+
if (PREDICT_FALSE ((n_trace = vlib_get_trace_count (vm, node)) > 0))
- session_tx_trace_frame (vm, node, next_index, wrk->pending_tx_buffers,
+ session_tx_trace_frame (vm, node, next_index, ctx->transport_pending_bufs,
ctx->n_segs_per_evt, ctx->s, n_trace);
if (PREDICT_FALSE (n_bufs))
@@ -1367,7 +1565,7 @@ session_tx_fifo_read_and_snd_i (session_worker_t * wrk,
*n_tx_packets += ctx->n_segs_per_evt;
SESSION_EVT (SESSION_EVT_DEQ, ctx->s, ctx->max_len_to_snd, ctx->max_dequeue,
- ctx->s->tx_fifo->has_event, wrk->last_vlib_time);
+ ctx->s->tx_fifo->shr->has_event, wrk->last_vlib_time);
ASSERT (ctx->left_to_snd == 0);
@@ -1412,20 +1610,30 @@ session_tx_fifo_dequeue_internal (session_worker_t * wrk,
{
transport_send_params_t *sp = &wrk->ctx.sp;
session_t *s = wrk->ctx.s;
+ clib_llist_index_t ei;
u32 n_packets;
- if (PREDICT_FALSE (s->session_state >= SESSION_STATE_TRANSPORT_CLOSED))
+ if (PREDICT_FALSE ((s->session_state >= SESSION_STATE_TRANSPORT_CLOSED) ||
+ (s->session_state == SESSION_STATE_CONNECTING &&
+ (s->flags & SESSION_F_HALF_OPEN))))
return 0;
/* Clear custom-tx flag used to request reschedule for tx */
s->flags &= ~SESSION_F_CUSTOM_TX;
+ sp->flags = 0;
+ sp->bytes_dequeued = 0;
sp->max_burst_size = clib_min (SESSION_NODE_FRAME_SIZE - *n_tx_packets,
TRANSPORT_PACER_MAX_BURST_PKTS);
+ /* Grab elt index since app transports can enqueue events on tx */
+ ei = clib_llist_entry_index (wrk->event_elts, elt);
+
n_packets = transport_custom_tx (session_get_transport_proto (s), s, sp);
*n_tx_packets += n_packets;
+ elt = clib_llist_elt (wrk->event_elts, ei);
+
if (s->flags & SESSION_F_CUSTOM_TX)
{
session_evt_add_old (wrk, elt);
@@ -1438,8 +1646,8 @@ session_tx_fifo_dequeue_internal (session_worker_t * wrk,
session_evt_add_head_old (wrk, elt);
}
- if (sp->max_burst_size &&
- svm_fifo_needs_deq_ntf (s->tx_fifo, sp->max_burst_size))
+ if (sp->bytes_dequeued &&
+ svm_fifo_needs_deq_ntf (s->tx_fifo, sp->bytes_dequeued))
session_dequeue_notify (s);
return n_packets;
@@ -1491,10 +1699,10 @@ session_event_dispatch_ctrl (session_worker_t * wrk, session_evt_elt_t * elt)
session_transport_reset (s);
break;
case SESSION_CTRL_EVT_LISTEN:
- session_mq_listen_handler (session_evt_ctrl_data (wrk, elt));
+ session_mq_listen_handler (wrk, elt);
break;
case SESSION_CTRL_EVT_LISTEN_URI:
- session_mq_listen_uri_handler (session_evt_ctrl_data (wrk, elt));
+ session_mq_listen_uri_handler (wrk, elt);
break;
case SESSION_CTRL_EVT_UNLISTEN:
session_mq_unlisten_handler (wrk, elt);
@@ -1503,7 +1711,7 @@ session_event_dispatch_ctrl (session_worker_t * wrk, session_evt_elt_t * elt)
session_mq_connect_handler (wrk, elt);
break;
case SESSION_CTRL_EVT_CONNECT_URI:
- session_mq_connect_uri_handler (session_evt_ctrl_data (wrk, elt));
+ session_mq_connect_uri_handler (wrk, elt);
break;
case SESSION_CTRL_EVT_SHUTDOWN:
session_mq_shutdown_handler (session_evt_ctrl_data (wrk, elt));
@@ -1515,7 +1723,7 @@ session_event_dispatch_ctrl (session_worker_t * wrk, session_evt_elt_t * elt)
session_mq_disconnected_handler (session_evt_ctrl_data (wrk, elt));
break;
case SESSION_CTRL_EVT_ACCEPTED_REPLY:
- session_mq_accepted_reply_handler (session_evt_ctrl_data (wrk, elt));
+ session_mq_accepted_reply_handler (wrk, elt);
break;
case SESSION_CTRL_EVT_DISCONNECTED_REPLY:
session_mq_disconnected_reply_handler (session_evt_ctrl_data (wrk,
@@ -1528,7 +1736,7 @@ session_event_dispatch_ctrl (session_worker_t * wrk, session_evt_elt_t * elt)
session_mq_worker_update_handler (session_evt_ctrl_data (wrk, elt));
break;
case SESSION_CTRL_EVT_APP_DETACH:
- app_mq_detach_handler (session_evt_ctrl_data (wrk, elt));
+ app_mq_detach_handler (wrk, elt);
break;
case SESSION_CTRL_EVT_APP_WRK_RPC:
session_mq_app_wrk_rpc_handler (session_evt_ctrl_data (wrk, elt));
@@ -1572,7 +1780,7 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node,
s = session_event_get_session (wrk, e);
if (PREDICT_FALSE (!s))
break;
- CLIB_PREFETCH (s->tx_fifo, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (s->tx_fifo, sizeof (*(s->tx_fifo)), LOAD);
wrk->ctx.s = s;
/* Spray packets in per session type frames, since they go to
* different nodes */
@@ -1580,7 +1788,7 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node,
break;
case SESSION_IO_EVT_RX:
s = session_event_get_session (wrk, e);
- if (!s)
+ if (!s || s->session_state >= SESSION_STATE_TRANSPORT_CLOSED)
break;
transport_app_rx_evt (session_get_transport_proto (s),
s->connection_index, s->thread_index);
@@ -1591,19 +1799,21 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node,
break;
svm_fifo_unset_event (s->rx_fifo);
app_wrk = app_worker_get (s->app_wrk_index);
- app_worker_builtin_rx (app_wrk, s);
+ app_worker_rx_notify (app_wrk, s);
break;
- case SESSION_IO_EVT_BUILTIN_TX:
- s = session_get_from_handle_if_valid (e->session_handle);
+ case SESSION_IO_EVT_TX_MAIN:
+ s = session_get_if_valid (e->session_index, 0 /* main thread */);
+ if (PREDICT_FALSE (!s))
+ break;
wrk->ctx.s = s;
if (PREDICT_TRUE (s != 0))
- session_tx_fifo_dequeue_internal (wrk, node, elt, n_tx_packets);
+ (smm->session_tx_fns[s->session_type]) (wrk, node, elt, n_tx_packets);
break;
default:
clib_warning ("unhandled event type %d", e->event_type);
}
- SESSION_EVT (SESSION_IO_EVT_COUNTS, e->event_type, 1, wrk);
+ SESSION_EVT (SESSION_EVT_IO_EVT_COUNTS, e->event_type, 1, wrk);
/* Regrab elements in case pool moved */
elt = clib_llist_elt (wrk->event_elts, ei);
@@ -1611,14 +1821,22 @@ session_event_dispatch_io (session_worker_t * wrk, vlib_node_runtime_t * node,
clib_llist_put (wrk->event_elts, elt);
}
-/* *INDENT-OFF* */
static const u32 session_evt_msg_sizes[] = {
#define _(symc, sym) \
[SESSION_CTRL_EVT_ ## symc] = sizeof (session_ ## sym ##_msg_t),
foreach_session_ctrl_evt
#undef _
};
-/* *INDENT-ON* */
+
+always_inline void
+session_update_time_subscribers (session_main_t *smm, clib_time_type_t now,
+ u32 thread_index)
+{
+ session_update_time_fn *fn;
+
+ vec_foreach (fn, smm->update_time_fns)
+ (*fn) (now, thread_index);
+}
always_inline void
session_evt_add_to_list (session_worker_t * wrk, session_event_t * evt)
@@ -1652,9 +1870,9 @@ static void
session_flush_pending_tx_buffers (session_worker_t * wrk,
vlib_node_runtime_t * node)
{
- vlib_buffer_enqueue_to_next (wrk->vm, node, wrk->pending_tx_buffers,
- wrk->pending_tx_nexts,
- vec_len (wrk->pending_tx_nexts));
+ vlib_buffer_enqueue_to_next_vec (wrk->vm, node, &wrk->pending_tx_buffers,
+ &wrk->pending_tx_nexts,
+ vec_len (wrk->pending_tx_nexts));
vec_reset_length (wrk->pending_tx_buffers);
vec_reset_length (wrk->pending_tx_nexts);
}
@@ -1685,7 +1903,7 @@ session_wrk_update_state (session_worker_t *wrk)
if (wrk->state == SESSION_WRK_POLLING)
{
- if (clib_llist_elts (wrk->event_elts) == 4 &&
+ if (clib_llist_elts (wrk->event_elts) == 5 &&
vlib_last_vectors_per_main_loop (vm) < 1)
{
session_wrk_set_state (wrk, SESSION_WRK_INTERRUPT);
@@ -1695,7 +1913,7 @@ session_wrk_update_state (session_worker_t *wrk)
}
else if (wrk->state == SESSION_WRK_INTERRUPT)
{
- if (clib_llist_elts (wrk->event_elts) > 4 ||
+ if (clib_llist_elts (wrk->event_elts) > 5 ||
vlib_last_vectors_per_main_loop (vm) > 1)
{
session_wrk_set_state (wrk, SESSION_WRK_POLLING);
@@ -1734,10 +1952,19 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
/*
* Update transport time
*/
- transport_update_time (wrk->last_vlib_time, thread_index);
+ session_update_time_subscribers (smm, wrk->last_vlib_time, thread_index);
n_tx_packets = vec_len (wrk->pending_tx_buffers);
SESSION_EVT (SESSION_EVT_DSP_CNTRS, UPDATE_TIME, wrk);
+ if (PREDICT_FALSE (wrk->dma_enabled))
+ {
+ if (wrk->trans_head == ((wrk->trans_tail + 1) & (wrk->trans_size - 1)))
+ return 0;
+ wrk->batch = vlib_dma_batch_new (vm, wrk->config_index);
+ if (!wrk->batch)
+ return 0;
+ }
+
/*
* Dequeue new internal mq events
*/
@@ -1807,6 +2034,20 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
};
}
+ if (PREDICT_FALSE (wrk->dma_enabled))
+ {
+ if (wrk->batch_num)
+ {
+ vlib_dma_batch_set_cookie (vm, wrk->batch, wrk->trans_tail);
+ wrk->batch_num = 0;
+ wrk->trans_tail++;
+ if (wrk->trans_tail == wrk->trans_size)
+ wrk->trans_tail = 0;
+ }
+
+ vlib_dma_batch_submit (vm, wrk->batch);
+ }
+
SESSION_EVT (SESSION_EVT_DSP_CNTRS, OLD_IO_EVTS, wrk);
if (vec_len (wrk->pending_tx_buffers))
@@ -1823,19 +2064,16 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return n_tx_packets;
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (session_queue_node) =
-{
+VLIB_REGISTER_NODE (session_queue_node) = {
.function = session_queue_node_fn,
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
.name = "session-queue",
.format_trace = format_session_queue_trace,
.type = VLIB_NODE_TYPE_INPUT,
- .n_errors = ARRAY_LEN (session_queue_error_strings),
- .error_strings = session_queue_error_strings,
+ .n_errors = SESSION_QUEUE_N_ERROR,
+ .error_counters = session_error_counters,
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
static clib_error_t *
session_wrk_tfd_read_ready (clib_file_t *cf)
@@ -1939,7 +2177,6 @@ session_queue_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
return 0;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (session_queue_process_node) =
{
.function = session_queue_process,
@@ -1947,7 +2184,6 @@ VLIB_REGISTER_NODE (session_queue_process_node) =
.name = "session-queue-process",
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
static_always_inline uword
session_queue_pre_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
@@ -1960,7 +2196,6 @@ session_queue_pre_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
return session_queue_node_fn (vm, node, frame);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (session_queue_pre_input_node) =
{
.function = session_queue_pre_input_inline,
@@ -1968,7 +2203,6 @@ VLIB_REGISTER_NODE (session_queue_pre_input_node) =
.name = "session-queue-main",
.state = VLIB_NODE_STATE_DISABLED,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/session_rules_table.c b/src/vnet/session/session_rules_table.c
index 34bd6a38676..70a702cf55c 100644
--- a/src/vnet/session/session_rules_table.c
+++ b/src/vnet/session/session_rules_table.c
@@ -386,11 +386,11 @@ session_rules_table_lookup6 (session_rules_table_t * srt,
* @param srt table where rule should be added
* @param args rule arguments
*
- * @return 0 if success, clib_error_t error otherwise
+ * @return 0 if success, session_error_t error otherwise
*/
-int
-session_rules_table_add_del (session_rules_table_t * srt,
- session_rule_table_add_del_args_t * args)
+session_error_t
+session_rules_table_add_del (session_rules_table_t *srt,
+ session_rule_table_add_del_args_t *args)
{
u8 fib_proto = args->rmt.fp_proto, *rt;
u32 ri_from_tag, ri;
@@ -398,7 +398,7 @@ session_rules_table_add_del (session_rules_table_t * srt,
ri_from_tag = session_rules_table_rule_for_tag (srt, args->tag);
if (args->is_add && ri_from_tag != SESSION_RULES_TABLE_INVALID_INDEX)
- return VNET_API_ERROR_INVALID_VALUE;
+ return SESSION_E_INVALID;
if (fib_proto == FIB_PROTOCOL_IP4)
{
@@ -509,11 +509,18 @@ session_rules_table_add_del (session_rules_table_t * srt,
}
}
else
- return VNET_API_ERROR_INVALID_VALUE_2;
+ return SESSION_E_INVALID;
return 0;
}
void
+session_rules_table_free (session_rules_table_t *srt)
+{
+ mma_rules_table_free_16 (&srt->session_rules_tables_16);
+ mma_rules_table_free_40 (&srt->session_rules_tables_40);
+}
+
+void
session_rules_table_init (session_rules_table_t * srt)
{
mma_rules_table_16_t *srt4;
@@ -598,11 +605,9 @@ session_rules_table_cli_dump (vlib_main_t * vm, session_rules_table_t * srt,
srt4 = &srt->session_rules_tables_16;
vlib_cli_output (vm, "IP4 rules");
- /* *INDENT-OFF* */
pool_foreach (sr4, srt4->rules) {
vlib_cli_output (vm, "%U", format_session_rule4, srt, sr4);
}
- /* *INDENT-ON* */
}
else if (fib_proto == FIB_PROTOCOL_IP6)
@@ -612,11 +617,9 @@ session_rules_table_cli_dump (vlib_main_t * vm, session_rules_table_t * srt,
srt6 = &srt->session_rules_tables_40;
vlib_cli_output (vm, "IP6 rules");
- /* *INDENT-OFF* */
pool_foreach (sr6, srt6->rules) {
vlib_cli_output (vm, "%U", format_session_rule6, srt, sr6);
}
- /* *INDENT-ON* */
}
}
diff --git a/src/vnet/session/session_rules_table.h b/src/vnet/session/session_rules_table.h
index 8679cb8a0c7..010d50a6398 100644
--- a/src/vnet/session/session_rules_table.h
+++ b/src/vnet/session/session_rules_table.h
@@ -18,11 +18,11 @@
#include <vnet/vnet.h>
#include <vnet/fib/fib.h>
+#include <vnet/session/session_types.h>
#include <vnet/session/transport.h>
#include <vnet/session/mma_16.h>
#include <vnet/session/mma_40.h>
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct
{
union
@@ -52,7 +52,6 @@ typedef CLIB_PACKED (struct
u64 as_u64[5];
};
}) session_mask_or_match_6_t;
-/* *INDENT-ON* */
#define SESSION_RULE_TAG_MAX_LEN 64
#define SESSION_RULES_TABLE_INVALID_INDEX MMA_TABLE_INVALID_INDEX
@@ -111,11 +110,13 @@ void session_rules_table_show_rule (vlib_main_t * vm,
ip46_address_t * lcl_ip, u16 lcl_port,
ip46_address_t * rmt_ip, u16 rmt_port,
u8 is_ip4);
-int session_rules_table_add_del (session_rules_table_t * srt,
- session_rule_table_add_del_args_t * args);
+session_error_t
+session_rules_table_add_del (session_rules_table_t *srt,
+ session_rule_table_add_del_args_t *args);
u8 *session_rules_table_rule_tag (session_rules_table_t * srt, u32 ri,
u8 is_ip4);
void session_rules_table_init (session_rules_table_t * srt);
+void session_rules_table_free (session_rules_table_t *srt);
#endif /* SRC_VNET_SESSION_SESSION_RULES_TABLE_H_ */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/session/session_table.c b/src/vnet/session/session_table.c
index d0b576fda7b..dbbe771979c 100644
--- a/src/vnet/session/session_table.c
+++ b/src/vnet/session/session_table.c
@@ -60,6 +60,31 @@ session_table_get (u32 table_index)
_(v6,halfopen,buckets,20000) \
_(v6,halfopen,memory,(64<<20))
+void
+session_table_free (session_table_t *slt, u8 fib_proto)
+{
+ u8 all = fib_proto > FIB_PROTOCOL_IP6 ? 1 : 0;
+ int i;
+
+ for (i = 0; i < TRANSPORT_N_PROTOS; i++)
+ session_rules_table_free (&slt->session_rules[i]);
+
+ vec_free (slt->session_rules);
+
+ if (fib_proto == FIB_PROTOCOL_IP4 || all)
+ {
+ clib_bihash_free_16_8 (&slt->v4_session_hash);
+ clib_bihash_free_16_8 (&slt->v4_half_open_hash);
+ }
+ if (fib_proto == FIB_PROTOCOL_IP6 || all)
+ {
+ clib_bihash_free_48_8 (&slt->v6_session_hash);
+ clib_bihash_free_48_8 (&slt->v6_half_open_hash);
+ }
+
+ pool_put (lookup_tables, slt);
+}
+
/**
* Initialize session table hash tables
*
@@ -160,7 +185,66 @@ ip4_session_table_walk (clib_bihash_16_8_t * hash,
&ctx);
}
-/* *INDENT-ON* */
+u32
+session_table_memory_size (session_table_t *st)
+{
+ u64 total_size = 0;
+
+ if (clib_bihash_is_initialised_16_8 (&st->v4_session_hash))
+ {
+ clib_bihash_alloc_chunk_16_8_t *c = st->v4_session_hash.chunks;
+ while (c)
+ {
+ total_size += c->size;
+ c = c->next;
+ }
+ c = st->v4_half_open_hash.chunks;
+ while (c)
+ {
+ total_size += c->size;
+ c = c->next;
+ }
+ }
+
+ if (clib_bihash_is_initialised_48_8 (&st->v6_session_hash))
+ {
+ clib_bihash_alloc_chunk_48_8_t *c = st->v6_session_hash.chunks;
+ while (c)
+ {
+ total_size += c->size;
+ c = c->next;
+ }
+ c = st->v6_half_open_hash.chunks;
+ while (c)
+ {
+ total_size += c->size;
+ c = c->next;
+ }
+ }
+
+ return total_size;
+}
+
+u8 *
+format_session_table (u8 *s, va_list *args)
+{
+ session_table_t *st = va_arg (*args, session_table_t *);
+
+ if (clib_bihash_is_initialised_16_8 (&st->v4_session_hash))
+ {
+ s = format (s, "%U", format_bihash_16_8, &st->v4_session_hash, 0);
+ s = format (s, "%U", format_bihash_16_8, &st->v4_half_open_hash, 0);
+ }
+
+ if (clib_bihash_is_initialised_48_8 (&st->v6_session_hash))
+ {
+ s = format (s, "%U", format_bihash_48_8, &st->v6_session_hash, 0);
+ s = format (s, "%U", format_bihash_48_8, &st->v6_half_open_hash, 0);
+ }
+
+ return s;
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/session/session_table.h b/src/vnet/session/session_table.h
index ead3c302681..636b8d77bee 100644
--- a/src/vnet/session/session_table.h
+++ b/src/vnet/session/session_table.h
@@ -67,6 +67,10 @@ session_table_t *session_table_alloc (void);
session_table_t *session_table_get (u32 table_index);
u32 session_table_index (session_table_t * slt);
void session_table_init (session_table_t * slt, u8 fib_proto);
+void session_table_free (session_table_t *slt, u8 fib_proto);
+
+u32 session_table_memory_size (session_table_t *st);
+u8 *format_session_table (u8 *s, va_list *args);
/* Internal, try not to use it! */
session_table_t *_get_session_tables ();
@@ -75,7 +79,6 @@ session_table_t *_get_session_tables ();
pool_foreach (VAR, _get_session_tables ()) BODY
#endif /* SRC_VNET_SESSION_SESSION_TABLE_H_ */
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/session/session_test.c b/src/vnet/session/session_test.c
new file mode 100644
index 00000000000..770e7263024
--- /dev/null
+++ b/src/vnet/session/session_test.c
@@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vppinfra/error.h>
+#include <vpp/api/types.h>
+
+#include <vnet/ip/ip_types_api.h>
+
+#define __plugin_msg_base session_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+#include <vlibmemory/vlib.api_enum.h>
+#include <vlibmemory/vlib.api_types.h>
+
+/* Declare message IDs */
+#include <vnet/format_fns.h>
+#include <vnet/session/session.api_enum.h>
+#include <vnet/session/session.api_types.h>
+
+#define vl_endianfun /* define message structures */
+#include <vnet/session/session.api.h>
+#undef vl_endianfun
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ u32 ping_id;
+ vat_main_t *vat_main;
+} session_test_main_t;
+
+static session_test_main_t session_test_main;
+
+static int
+api_session_rule_add_del (vat_main_t *vam)
+{
+ vl_api_session_rule_add_del_t *mp;
+ unformat_input_t *i = vam->input;
+ u32 proto = ~0, lcl_port, rmt_port, action = 0, lcl_plen, rmt_plen;
+ u32 appns_index = 0, scope = 0;
+ ip4_address_t lcl_ip4, rmt_ip4;
+ ip6_address_t lcl_ip6, rmt_ip6;
+ u8 is_ip4 = 1, conn_set = 0;
+ u8 is_add = 1, *tag = 0;
+ int ret;
+ fib_prefix_t lcl, rmt;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_add = 0;
+ else if (unformat (i, "add"))
+ ;
+ else if (unformat (i, "proto tcp"))
+ proto = 0;
+ else if (unformat (i, "proto udp"))
+ proto = 1;
+ else if (unformat (i, "appns %d", &appns_index))
+ ;
+ else if (unformat (i, "scope %d", &scope))
+ ;
+ else if (unformat (i, "tag %_%v%_", &tag))
+ ;
+ else if (unformat (i, "%U/%d %d %U/%d %d", unformat_ip4_address,
+ &lcl_ip4, &lcl_plen, &lcl_port, unformat_ip4_address,
+ &rmt_ip4, &rmt_plen, &rmt_port))
+ {
+ is_ip4 = 1;
+ conn_set = 1;
+ }
+ else if (unformat (i, "%U/%d %d %U/%d %d", unformat_ip6_address,
+ &lcl_ip6, &lcl_plen, &lcl_port, unformat_ip6_address,
+ &rmt_ip6, &rmt_plen, &rmt_port))
+ {
+ is_ip4 = 0;
+ conn_set = 1;
+ }
+ else if (unformat (i, "action %d", &action))
+ ;
+ else
+ break;
+ }
+ if (proto == ~0 || !conn_set || action == ~0)
+ {
+ errmsg ("transport proto, connection and action must be set");
+ return -99;
+ }
+
+ if (scope > 3)
+ {
+ errmsg ("scope should be 0-3");
+ return -99;
+ }
+
+ M (SESSION_RULE_ADD_DEL, mp);
+
+ clib_memset (&lcl, 0, sizeof (lcl));
+ clib_memset (&rmt, 0, sizeof (rmt));
+ if (is_ip4)
+ {
+ ip_set (&lcl.fp_addr, &lcl_ip4, 1);
+ ip_set (&rmt.fp_addr, &rmt_ip4, 1);
+ lcl.fp_len = lcl_plen;
+ rmt.fp_len = rmt_plen;
+ }
+ else
+ {
+ ip_set (&lcl.fp_addr, &lcl_ip6, 0);
+ ip_set (&rmt.fp_addr, &rmt_ip6, 0);
+ lcl.fp_len = lcl_plen;
+ rmt.fp_len = rmt_plen;
+ }
+
+ ip_prefix_encode (&lcl, &mp->lcl);
+ ip_prefix_encode (&rmt, &mp->rmt);
+ mp->lcl_port = clib_host_to_net_u16 ((u16) lcl_port);
+ mp->rmt_port = clib_host_to_net_u16 ((u16) rmt_port);
+ mp->transport_proto =
+ proto ? TRANSPORT_PROTO_API_UDP : TRANSPORT_PROTO_API_TCP;
+ mp->action_index = clib_host_to_net_u32 (action);
+ mp->appns_index = clib_host_to_net_u32 (appns_index);
+ mp->scope = scope;
+ mp->is_add = is_add;
+ if (tag)
+ {
+ clib_memcpy (mp->tag, tag, vec_len (tag));
+ vec_free (tag);
+ }
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_app_attach_reply_t_handler (vl_api_app_attach_reply_t *mp)
+{
+}
+
+static void
+vl_api_app_add_cert_key_pair_reply_t_handler (
+ vl_api_app_add_cert_key_pair_reply_t *mp)
+{
+}
+
+static int
+api_app_attach (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_application_detach (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_app_del_cert_key_pair (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_app_add_cert_key_pair (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_session_rules_dump (vat_main_t *vam)
+{
+ vl_api_session_rules_dump_t *mp;
+ vl_api_control_ping_t *mp_ping;
+ int ret;
+
+ if (!vam->json_output)
+ {
+ print (vam->ofp, "%=20s", "Session Rules");
+ }
+
+ M (SESSION_RULES_DUMP, mp);
+ /* send it... */
+ S (mp);
+
+ /* Use a control ping for synchronization */
+ PING (&session_test_main, mp_ping);
+ S (mp_ping);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_session_rules_details_t_handler (vl_api_session_rules_details_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ fib_prefix_t lcl, rmt;
+
+ ip_prefix_decode (&mp->lcl, &lcl);
+ ip_prefix_decode (&mp->rmt, &rmt);
+
+ if (lcl.fp_proto == FIB_PROTOCOL_IP4)
+ {
+ print (vam->ofp,
+ "appns %u tp %u scope %d %U/%d %d %U/%d %d action: %d tag: %s",
+ clib_net_to_host_u32 (mp->appns_index), mp->transport_proto,
+ mp->scope, format_ip4_address, &lcl.fp_addr.ip4, lcl.fp_len,
+ clib_net_to_host_u16 (mp->lcl_port), format_ip4_address,
+ &rmt.fp_addr.ip4, rmt.fp_len, clib_net_to_host_u16 (mp->rmt_port),
+ clib_net_to_host_u32 (mp->action_index), mp->tag);
+ }
+ else
+ {
+ print (vam->ofp,
+ "appns %u tp %u scope %d %U/%d %d %U/%d %d action: %d tag: %s",
+ clib_net_to_host_u32 (mp->appns_index), mp->transport_proto,
+ mp->scope, format_ip6_address, &lcl.fp_addr.ip6, lcl.fp_len,
+ clib_net_to_host_u16 (mp->lcl_port), format_ip6_address,
+ &rmt.fp_addr.ip6, rmt.fp_len, clib_net_to_host_u16 (mp->rmt_port),
+ clib_net_to_host_u32 (mp->action_index), mp->tag);
+ }
+}
+
+static void
+vl_api_app_namespace_add_del_reply_t_handler (
+ vl_api_app_namespace_add_del_reply_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+ if (vam->async_mode)
+ {
+ vam->async_errors += (retval < 0);
+ }
+ else
+ {
+ vam->retval = retval;
+ if (retval == 0)
+ errmsg ("app ns index %d\n", ntohl (mp->appns_index));
+ vam->result_ready = 1;
+ }
+}
+
+static void
+vl_api_app_namespace_add_del_v2_reply_t_handler (
+ vl_api_app_namespace_add_del_v2_reply_t *vat)
+{
+}
+
+static void
+vl_api_app_worker_add_del_reply_t_handler (
+ vl_api_app_worker_add_del_reply_t *vat)
+{
+}
+
+static int
+api_app_namespace_add_del_v2 (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_session_enable_disable (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_app_worker_add_del (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_app_namespace_add_del (vat_main_t *vam)
+{
+ vl_api_app_namespace_add_del_t *mp;
+ unformat_input_t *i = vam->input;
+ u8 *ns_id = 0, secret_set = 0, sw_if_index_set = 0;
+ u32 sw_if_index, ip4_fib_id, ip6_fib_id;
+ u64 secret;
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "id %_%v%_", &ns_id))
+ ;
+ else if (unformat (i, "secret %lu", &secret))
+ secret_set = 1;
+ else if (unformat (i, "sw_if_index %d", &sw_if_index))
+ sw_if_index_set = 1;
+ else if (unformat (i, "ip4_fib_id %d", &ip4_fib_id))
+ ;
+ else if (unformat (i, "ip6_fib_id %d", &ip6_fib_id))
+ ;
+ else
+ break;
+ }
+ if (!ns_id || !secret_set || !sw_if_index_set)
+ {
+ errmsg ("namespace id, secret and sw_if_index must be set");
+ return -99;
+ }
+ if (vec_len (ns_id) > 64)
+ {
+ errmsg ("namespace id too long");
+ return -99;
+ }
+ M (APP_NAMESPACE_ADD_DEL, mp);
+
+ vl_api_vec_to_api_string (ns_id, &mp->namespace_id);
+ mp->secret = clib_host_to_net_u64 (secret);
+ mp->sw_if_index = clib_host_to_net_u32 (sw_if_index);
+ mp->ip4_fib_id = clib_host_to_net_u32 (ip4_fib_id);
+ mp->ip6_fib_id = clib_host_to_net_u32 (ip6_fib_id);
+ vec_free (ns_id);
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static void
+vl_api_app_namespace_add_del_v4_reply_t_handler (
+ vl_api_app_namespace_add_del_v4_reply_t *mp)
+{
+}
+
+static int
+api_app_namespace_add_del_v4 (vat_main_t *vat)
+{
+ return -1;
+}
+
+static void
+vl_api_app_namespace_add_del_v3_reply_t_handler (
+ vl_api_app_namespace_add_del_v3_reply_t *mp)
+{
+}
+
+static int
+api_app_namespace_add_del_v3 (vat_main_t *vat)
+{
+ return -1;
+}
+
+static int
+api_session_sapi_enable_disable (vat_main_t *vat)
+{
+ return -1;
+}
+
+#include <vnet/session/session.api_test.c>
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/session/session_types.h b/src/vnet/session/session_types.h
index 246978e0ac3..5e650727d61 100644
--- a/src/vnet/session/session_types.h
+++ b/src/vnet/session/session_types.h
@@ -22,8 +22,22 @@
#define SESSION_INVALID_INDEX ((u32)~0)
#define SESSION_INVALID_HANDLE ((u64)~0)
#define SESSION_CTRL_MSG_MAX_SIZE 86
+#define SESSION_CTRL_MSG_TX_MAX_SIZE 160
#define SESSION_NODE_FRAME_SIZE 128
+typedef u8 session_type_t;
+typedef u64 session_handle_t;
+
+typedef union session_handle_tu_
+{
+ session_handle_t handle;
+ struct
+ {
+ u32 session_index;
+ u32 thread_index;
+ };
+} __attribute__ ((__transparent_union__)) session_handle_tu_t;
+
#define foreach_session_endpoint_fields \
foreach_transport_endpoint_cfg_fields \
_(u8, transport_proto) \
@@ -35,6 +49,23 @@ typedef struct _session_endpoint
#undef _
} session_endpoint_t;
+#define foreach_session_endpoint_cfg_flags _ (PROXY_LISTEN, "proxy listener")
+
+typedef enum session_endpoint_cfg_flags_bits_
+{
+#define _(sym, str) SESSION_ENDPT_CFG_F_BIT_##sym,
+ foreach_session_endpoint_cfg_flags
+#undef _
+} __clib_packed session_endpoint_cfg_flags_bits_t;
+
+typedef enum session_endpoint_cfg_flags_
+{
+#define _(sym, str) \
+ SESSION_ENDPT_CFG_F_##sym = 1 << SESSION_ENDPT_CFG_F_BIT_##sym,
+ foreach_session_endpoint_cfg_flags
+#undef _
+} __clib_packed session_endpoint_cfg_flags_t;
+
typedef struct _session_endpoint_cfg
{
#define _(type, name) type name;
@@ -45,7 +76,7 @@ typedef struct _session_endpoint_cfg
u32 ns_index;
u8 original_tp;
u64 parent_handle;
- u8 flags;
+ session_endpoint_cfg_flags_t flags;
transport_endpt_ext_cfg_t *ext_cfg;
} session_endpoint_cfg_t;
@@ -107,9 +138,6 @@ session_endpoint_is_zero (session_endpoint_t * sep)
return ip_is_zero (&sep->ip, sep->is_ip4);
}
-typedef u8 session_type_t;
-typedef u64 session_handle_t;
-
typedef enum
{
SESSION_CLEANUP_TRANSPORT,
@@ -126,19 +154,19 @@ typedef enum session_ft_action_
/*
* Session states
*/
-#define foreach_session_state \
- _(CREATED, "created") \
- _(LISTENING, "listening") \
- _(CONNECTING, "connecting") \
- _(ACCEPTING, "accepting") \
- _(READY, "ready") \
- _(OPENED, "opened") \
- _(TRANSPORT_CLOSING, "transport-closing") \
- _(CLOSING, "closing") \
- _(APP_CLOSED, "app-closed") \
- _(TRANSPORT_CLOSED, "transport-closed") \
- _(CLOSED, "closed") \
- _(TRANSPORT_DELETED, "transport-deleted") \
+#define foreach_session_state \
+ _ (CREATED, "created") \
+ _ (LISTENING, "listening") \
+ _ (CONNECTING, "connecting") \
+ _ (ACCEPTING, "accepting") \
+ _ (READY, "ready") \
+ _ (OPENED, "opened") \
+ _ (TRANSPORT_CLOSING, "transport-closing") \
+ _ (CLOSING, "closing") \
+ _ (APP_CLOSED, "app-closed") \
+ _ (TRANSPORT_CLOSED, "transport-closed") \
+ _ (CLOSED, "closed") \
+ _ (TRANSPORT_DELETED, "transport-deleted")
typedef enum
{
@@ -146,7 +174,7 @@ typedef enum
foreach_session_state
#undef _
SESSION_N_STATES,
-} session_state_t;
+} __clib_packed session_state_t;
#define foreach_session_flag \
_ (RX_EVT, "rx-event") \
@@ -155,7 +183,9 @@ typedef enum
_ (IS_MIGRATING, "migrating") \
_ (UNIDIRECTIONAL, "unidirectional") \
_ (CUSTOM_FIFO_TUNING, "custom-fifo-tuning") \
- _ (HALF_OPEN, "half-open")
+ _ (HALF_OPEN, "half-open") \
+ _ (APP_CLOSED, "app-closed") \
+ _ (IS_CLESS, "connectionless")
typedef enum session_flags_bits_
{
@@ -178,38 +208,42 @@ typedef struct session_
svm_fifo_t *rx_fifo;
svm_fifo_t *tx_fifo;
+ union
+ {
+ session_handle_t handle;
+ struct
+ {
+ /** Index in thread pool where session was allocated */
+ u32 session_index;
+
+ /** Index of the thread that allocated the session */
+ u32 thread_index;
+ };
+ };
+
/** Type built from transport and network protocol types */
session_type_t session_type;
/** State in session layer state machine. See @ref session_state_t */
- volatile u8 session_state;
-
- /** Index in thread pool where session was allocated */
- u32 session_index;
+ volatile session_state_t session_state;
/** Index of the app worker that owns the session */
u32 app_wrk_index;
- /** Index of the thread that allocated the session */
- u8 thread_index;
-
/** Session flags. See @ref session_flags_t */
- u32 flags;
+ session_flags_t flags;
/** Index of the transport connection associated to the session */
u32 connection_index;
- /** Index of application that owns the listener. Set only if a listener */
- u32 app_index;
+ /** App listener index in app's listener pool if a listener */
+ u32 al_index;
union
{
/** Parent listener session index if the result of an accept */
session_handle_t listener_handle;
- /** App listener index in app's listener pool if a listener */
- u32 al_index;
-
/** Index in app worker's half-open table if a half-open */
u32 ho_index;
};
@@ -282,45 +316,35 @@ session_tx_is_dgram (session_t * s)
always_inline session_handle_t
session_handle (session_t * s)
{
- return ((u64) s->thread_index << 32) | (u64) s->session_index;
+ return s->handle;
}
always_inline u32
-session_index_from_handle (session_handle_t handle)
+session_index_from_handle (session_handle_tu_t handle)
{
- return handle & 0xFFFFFFFF;
+ return handle.session_index;
}
always_inline u32
-session_thread_from_handle (session_handle_t handle)
+session_thread_from_handle (session_handle_tu_t handle)
{
- return handle >> 32;
+ return handle.thread_index;
}
always_inline void
-session_parse_handle (session_handle_t handle, u32 * index,
- u32 * thread_index)
+session_parse_handle (session_handle_tu_t handle, u32 *index,
+ u32 *thread_index)
{
- *index = session_index_from_handle (handle);
- *thread_index = session_thread_from_handle (handle);
+ *index = handle.session_index;
+ *thread_index = handle.thread_index;
}
static inline session_handle_t
session_make_handle (u32 session_index, u32 data)
{
- return (((u64) data << 32) | (u64) session_index);
-}
-
-always_inline u32
-session_handle_index (session_handle_t ho_handle)
-{
- return (ho_handle & 0xffffffff);
-}
-
-always_inline u32
-session_handle_data (session_handle_t ho_handle)
-{
- return (ho_handle >> 32);
+ return ((session_handle_tu_t){ .session_index = session_index,
+ .thread_index = data })
+ .handle;
}
typedef enum
@@ -329,7 +353,7 @@ typedef enum
SESSION_IO_EVT_TX,
SESSION_IO_EVT_TX_FLUSH,
SESSION_IO_EVT_BUILTIN_RX,
- SESSION_IO_EVT_BUILTIN_TX,
+ SESSION_IO_EVT_TX_MAIN,
SESSION_CTRL_EVT_RPC,
SESSION_CTRL_EVT_HALF_CLOSE,
SESSION_CTRL_EVT_CLOSE,
@@ -360,6 +384,8 @@ typedef enum
SESSION_CTRL_EVT_APP_WRK_RPC,
SESSION_CTRL_EVT_TRANSPORT_ATTR,
SESSION_CTRL_EVT_TRANSPORT_ATTR_REPLY,
+ SESSION_CTRL_EVT_TRANSPORT_CLOSED,
+ SESSION_CTRL_EVT_HALF_CLEANUP,
} session_evt_type_t;
#define foreach_session_ctrl_evt \
@@ -394,7 +420,6 @@ typedef enum
#define FIFO_EVENT_APP_TX SESSION_IO_EVT_TX
#define FIFO_EVENT_DISCONNECT SESSION_CTRL_EVT_CLOSE
#define FIFO_EVENT_BUILTIN_RX SESSION_IO_EVT_BUILTIN_RX
-#define FIFO_EVENT_BUILTIN_TX SESSION_IO_EVT_BUILTIN_TX
typedef enum
{
@@ -419,6 +444,7 @@ typedef struct
session_handle_t session_handle;
session_rpc_args_t rpc_args;
u32 ctrl_data_index;
+ u64 as_u64[2];
struct
{
u8 data[0];
@@ -443,12 +469,12 @@ typedef struct session_dgram_header_
u16 rmt_port;
u16 lcl_port;
u8 is_ip4;
+ u16 gso_size;
} __clib_packed session_dgram_hdr_t;
#define SESSION_CONN_ID_LEN 37
-#define SESSION_CONN_HDR_LEN 45
-
-STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8),
+#define SESSION_CONN_HDR_LEN 47
+STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 10),
"session conn id wrong length");
#define foreach_session_error \
@@ -466,9 +492,12 @@ STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8),
_ (NOLISTEN, "not listening") \
_ (NOSESSION, "session does not exist") \
_ (NOAPP, "app not attached") \
+ _ (APP_ATTACHED, "app already attached") \
_ (PORTINUSE, "lcl port in use") \
_ (IPINUSE, "ip in use") \
_ (ALREADY_LISTENING, "ip port pair already listened on") \
+ _ (ADDR_NOT_IN_USE, "address not in use") \
+ _ (INVALID, "invalid value") \
_ (INVALID_RMT_IP, "invalid remote ip") \
_ (INVALID_APPWRK, "invalid app worker") \
_ (INVALID_NS, "invalid namespace") \
@@ -486,7 +515,10 @@ STATIC_ASSERT (sizeof (session_dgram_hdr_t) == (SESSION_CONN_ID_LEN + 8),
_ (NOEXTCFG, "no extended transport config") \
_ (NOCRYPTOENG, "no crypto engine") \
_ (NOCRYPTOCKP, "cert key pair not found ") \
- _ (LOCAL_CONNECT, "could not connect with local scope")
+ _ (LOCAL_CONNECT, "could not connect with local scope") \
+ _ (WRONG_NS_SECRET, "wrong ns secret") \
+ _ (SYSCALL, "system call error") \
+ _ (TRANSPORT_NO_REG, "transport was not registered")
typedef enum session_error_p_
{
diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c
index 526f1a2da15..1c2a9261d3c 100644
--- a/src/vnet/session/transport.c
+++ b/src/vnet/session/transport.c
@@ -17,36 +17,31 @@
#include <vnet/session/session.h>
#include <vnet/fib/fib.h>
-typedef struct local_endpoint_
-{
- transport_endpoint_t ep;
- int refcnt;
-} local_endpoint_t;
-
/**
* Per-type vector of transport protocol virtual function tables
*/
transport_proto_vft_t *tp_vfts;
-/*
- * Port allocator seed
- */
-static u32 port_allocator_seed;
-
-/*
- * Local endpoints table
- */
-static transport_endpoint_table_t local_endpoints_table;
+typedef struct local_endpoint_
+{
+ transport_endpoint_t ep;
+ transport_proto_t proto;
+ int refcnt;
+} local_endpoint_t;
-/*
- * Pool of local endpoints
- */
-static local_endpoint_t *local_endpoints;
+typedef struct transport_main_
+{
+ transport_endpoint_table_t local_endpoints_table;
+ local_endpoint_t *local_endpoints;
+ u32 *lcl_endpts_freelist;
+ u32 port_allocator_seed;
+ u16 port_allocator_min_src_port;
+ u16 port_allocator_max_src_port;
+ u8 lcl_endpts_cleanup_pending;
+ clib_spinlock_t local_endpoints_lock;
+} transport_main_t;
-/*
- * Local endpoints pool lock
- */
-static clib_spinlock_t local_endpoints_lock;
+static transport_main_t tp_main;
u8 *
format_transport_proto (u8 * s, va_list * args)
@@ -76,6 +71,35 @@ format_transport_proto_short (u8 * s, va_list * args)
return s;
}
+const char *transport_flags_str[] = {
+#define _(sym, str) str,
+ foreach_transport_connection_flag
+#undef _
+};
+
+u8 *
+format_transport_flags (u8 *s, va_list *args)
+{
+ transport_connection_flags_t flags;
+ int i, last = -1;
+
+ flags = va_arg (*args, transport_connection_flags_t);
+
+ for (i = 0; i < TRANSPORT_CONNECTION_N_FLAGS; i++)
+ if (flags & (1 << i))
+ last = i;
+
+ for (i = 0; i < last; i++)
+ {
+ if (flags & (1 << i))
+ s = format (s, "%s, ", transport_flags_str[i]);
+ }
+ if (last >= 0)
+ s = format (s, "%s", transport_flags_str[last]);
+
+ return s;
+}
+
u8 *
format_transport_connection (u8 * s, va_list * args)
{
@@ -100,8 +124,8 @@ format_transport_connection (u8 * s, va_list * args)
if (transport_connection_is_tx_paced (tc))
s = format (s, "%Upacer: %U\n", format_white_space, indent,
format_transport_pacer, &tc->pacer, tc->thread_index);
- s = format (s, "%Utransport: flags 0x%x\n", format_white_space, indent,
- tc->flags);
+ s = format (s, "%Utransport: flags: %U\n", format_white_space, indent,
+ format_transport_flags, tc->flags);
}
return s;
}
@@ -124,14 +148,13 @@ u8 *
format_transport_half_open_connection (u8 * s, va_list * args)
{
u32 transport_proto = va_arg (*args, u32);
- u32 ho_index = va_arg (*args, u32);
transport_proto_vft_t *tp_vft;
tp_vft = transport_protocol_get_vft (transport_proto);
if (!tp_vft)
return s;
- s = format (s, "%U", tp_vft->format_half_open, ho_index);
+ s = (tp_vft->format_half_open) (s, args);
return s;
}
@@ -314,6 +337,8 @@ transport_cleanup_half_open (transport_proto_t tp, u32 conn_index)
int
transport_connect (transport_proto_t tp, transport_endpoint_cfg_t * tep)
{
+ if (PREDICT_FALSE (!tp_vfts[tp].connect))
+ return SESSION_E_TRANSPORT_NO_REG;
return tp_vfts[tp].connect (tep);
}
@@ -341,8 +366,10 @@ transport_reset (transport_proto_t tp, u32 conn_index, u8 thread_index)
u32
transport_start_listen (transport_proto_t tp, u32 session_index,
- transport_endpoint_t * tep)
+ transport_endpoint_cfg_t *tep)
{
+ if (PREDICT_FALSE (!tp_vfts[tp].start_listen))
+ return SESSION_E_TRANSPORT_NO_REG;
return tp_vfts[tp].start_listen (session_index, tep);
}
@@ -420,67 +447,148 @@ transport_connection_attribute (transport_proto_t tp, u32 conn_index,
#define PORT_MASK ((1 << 16)- 1)
void
-transport_endpoint_del (u32 tepi)
+transport_endpoint_free (u32 tepi)
{
- clib_spinlock_lock_if_init (&local_endpoints_lock);
- pool_put_index (local_endpoints, tepi);
- clib_spinlock_unlock_if_init (&local_endpoints_lock);
+ transport_main_t *tm = &tp_main;
+ pool_put_index (tm->local_endpoints, tepi);
}
always_inline local_endpoint_t *
-transport_endpoint_new (void)
+transport_endpoint_alloc (void)
{
+ transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
- pool_get_zero (local_endpoints, lep);
+
+ ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
+
+ pool_get_aligned_safe (tm->local_endpoints, lep, 0);
return lep;
}
+static void
+transport_cleanup_freelist (void)
+{
+ transport_main_t *tm = &tp_main;
+ local_endpoint_t *lep;
+ u32 *lep_indexp;
+
+ clib_spinlock_lock (&tm->local_endpoints_lock);
+
+ vec_foreach (lep_indexp, tm->lcl_endpts_freelist)
+ {
+ lep = pool_elt_at_index (tm->local_endpoints, *lep_indexp);
+
+ /* Port re-shared after attempt to cleanup */
+ if (lep->refcnt > 0)
+ continue;
+
+ transport_endpoint_table_del (&tm->local_endpoints_table, lep->proto,
+ &lep->ep);
+ transport_endpoint_free (*lep_indexp);
+ }
+
+ vec_reset_length (tm->lcl_endpts_freelist);
+
+ tm->lcl_endpts_cleanup_pending = 0;
+
+ clib_spinlock_unlock (&tm->local_endpoints_lock);
+}
+
void
-transport_endpoint_cleanup (u8 proto, ip46_address_t * lcl_ip, u16 port)
+transport_program_endpoint_cleanup (u32 lepi)
+{
+ transport_main_t *tm = &tp_main;
+ u8 flush_fl = 0;
+
+ /* All workers can free connections. Synchronize access to freelist */
+ clib_spinlock_lock (&tm->local_endpoints_lock);
+
+ vec_add1 (tm->lcl_endpts_freelist, lepi);
+
+ /* Avoid accumulating lots of endpoints for cleanup */
+ if (!tm->lcl_endpts_cleanup_pending &&
+ vec_len (tm->lcl_endpts_freelist) > 32)
+ {
+ tm->lcl_endpts_cleanup_pending = 1;
+ flush_fl = 1;
+ }
+
+ clib_spinlock_unlock (&tm->local_endpoints_lock);
+
+ if (flush_fl)
+ session_send_rpc_evt_to_thread_force (transport_cl_thread (),
+ transport_cleanup_freelist, 0);
+}
+
+int
+transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip, u16 port)
{
+ transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
u32 lepi;
- /* Cleanup local endpoint if this was an active connect */
- lepi = transport_endpoint_lookup (&local_endpoints_table, proto, lcl_ip,
- clib_net_to_host_u16 (port));
- if (lepi != ENDPOINT_INVALID_INDEX)
+ lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto, lcl_ip,
+ port);
+ if (lepi == ENDPOINT_INVALID_INDEX)
+ return -1;
+
+ /* First worker may be cleaning up ports so avoid touching free bitmap */
+ lep = &tm->local_endpoints[lepi];
+ ASSERT (lep->refcnt >= 1);
+
+ /* Local endpoint no longer in use, program cleanup */
+ if (!clib_atomic_sub_fetch (&lep->refcnt, 1))
{
- lep = pool_elt_at_index (local_endpoints, lepi);
- if (!clib_atomic_sub_fetch (&lep->refcnt, 1))
- {
- transport_endpoint_table_del (&local_endpoints_table, proto,
- &lep->ep);
- transport_endpoint_del (lepi);
- }
+ transport_program_endpoint_cleanup (lepi);
+ return 0;
}
+
+ /* Not an error, just in idication that endpoint was not cleaned up */
+ return -1;
}
-static void
-transport_endpoint_mark_used (u8 proto, ip46_address_t * ip, u16 port)
+static int
+transport_endpoint_mark_used (u8 proto, ip46_address_t *ip, u16 port)
{
+ transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
- clib_spinlock_lock_if_init (&local_endpoints_lock);
- lep = transport_endpoint_new ();
+ u32 tei;
+
+ ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
+
+ tei =
+ transport_endpoint_lookup (&tm->local_endpoints_table, proto, ip, port);
+ if (tei != ENDPOINT_INVALID_INDEX)
+ return SESSION_E_PORTINUSE;
+
+ /* Pool reallocs with worker barrier */
+ lep = transport_endpoint_alloc ();
clib_memcpy_fast (&lep->ep.ip, ip, sizeof (*ip));
lep->ep.port = port;
+ lep->proto = proto;
lep->refcnt = 1;
- transport_endpoint_table_add (&local_endpoints_table, proto, &lep->ep,
- lep - local_endpoints);
- clib_spinlock_unlock_if_init (&local_endpoints_lock);
+
+ transport_endpoint_table_add (&tm->local_endpoints_table, proto, &lep->ep,
+ lep - tm->local_endpoints);
+
+ return 0;
}
void
transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port)
{
+ transport_main_t *tm = &tp_main;
local_endpoint_t *lep;
u32 lepi;
- lepi = transport_endpoint_lookup (&local_endpoints_table, proto, lcl_ip,
- clib_net_to_host_u16 (port));
+ /* Active opens should call this only from a control thread, which are also
+ * used to allocate and free ports. So, pool has only one writer and
+ * potentially many readers. Listeners are allocated with barrier */
+ lepi = transport_endpoint_lookup (&tm->local_endpoints_table, proto, lcl_ip,
+ port);
if (lepi != ENDPOINT_INVALID_INDEX)
{
- lep = pool_elt_at_index (local_endpoints, lepi);
+ lep = pool_elt_at_index (tm->local_endpoints, lepi);
clib_atomic_add_fetch (&lep->refcnt, 1);
}
}
@@ -488,18 +596,22 @@ transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip, u16 port)
/**
* Allocate local port and add if successful add entry to local endpoint
* table to mark the pair as used.
+ *
+ * @return port in net order or -1 if port cannot be allocated
*/
int
-transport_alloc_local_port (u8 proto, ip46_address_t * ip)
+transport_alloc_local_port (u8 proto, ip46_address_t *lcl_addr,
+ transport_endpoint_cfg_t *rmt)
{
- u16 min = 1024, max = 65535; /* XXX configurable ? */
+ transport_main_t *tm = &tp_main;
+ u16 min = tm->port_allocator_min_src_port;
+ u16 max = tm->port_allocator_max_src_port;
int tries, limit;
- u32 tei;
limit = max - min;
- /* Only support active opens from thread 0 */
- ASSERT (vlib_get_thread_index () == 0);
+ /* Only support active opens from one of ctrl threads */
+ ASSERT (vlib_get_thread_index () <= transport_cl_thread ());
/* Search for first free slot */
for (tries = 0; tries < limit; tries++)
@@ -509,19 +621,26 @@ transport_alloc_local_port (u8 proto, ip46_address_t * ip)
/* Find a port in the specified range */
while (1)
{
- port = random_u32 (&port_allocator_seed) & PORT_MASK;
+ port = random_u32 (&tm->port_allocator_seed) & PORT_MASK;
if (PREDICT_TRUE (port >= min && port < max))
- break;
+ {
+ port = clib_host_to_net_u16 (port);
+ break;
+ }
}
- /* Look it up. If not found, we're done */
- tei = transport_endpoint_lookup (&local_endpoints_table, proto, ip,
- port);
- if (tei == ENDPOINT_INVALID_INDEX)
- {
- transport_endpoint_mark_used (proto, ip, port);
- return port;
- }
+ if (!transport_endpoint_mark_used (proto, lcl_addr, port))
+ return port;
+
+ /* IP:port pair already in use, check if 6-tuple available */
+ if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip, port,
+ rmt->port, proto, rmt->is_ip4))
+ continue;
+
+ /* 6-tuple is available so increment lcl endpoint refcount */
+ transport_share_local_endpoint (proto, lcl_addr, port);
+
+ return port;
}
return -1;
}
@@ -549,14 +668,14 @@ transport_get_interface_ip (u32 sw_if_index, u8 is_ip4, ip46_address_t * addr)
}
static session_error_t
-transport_find_local_ip_for_remote (u32 sw_if_index,
- transport_endpoint_t * rmt,
- ip46_address_t * lcl_addr)
+transport_find_local_ip_for_remote (u32 *sw_if_index,
+ transport_endpoint_t *rmt,
+ ip46_address_t *lcl_addr)
{
fib_node_index_t fei;
fib_prefix_t prefix;
- if (sw_if_index == ENDPOINT_INVALID_INDEX)
+ if (*sw_if_index == ENDPOINT_INVALID_INDEX)
{
/* Find a FIB path to the destination */
clib_memcpy_fast (&prefix.fp_addr, &rmt->ip, sizeof (rmt->ip));
@@ -570,13 +689,13 @@ transport_find_local_ip_for_remote (u32 sw_if_index,
if (fei == FIB_NODE_INDEX_INVALID)
return SESSION_E_NOROUTE;
- sw_if_index = fib_entry_get_resolving_interface (fei);
- if (sw_if_index == ENDPOINT_INVALID_INDEX)
+ *sw_if_index = fib_entry_get_resolving_interface (fei);
+ if (*sw_if_index == ENDPOINT_INVALID_INDEX)
return SESSION_E_NOINTF;
}
clib_memset (lcl_addr, 0, sizeof (*lcl_addr));
- return transport_get_interface_ip (sw_if_index, rmt->is_ip4, lcl_addr);
+ return transport_get_interface_ip (*sw_if_index, rmt->is_ip4, lcl_addr);
}
int
@@ -584,16 +703,16 @@ transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg,
ip46_address_t * lcl_addr, u16 * lcl_port)
{
transport_endpoint_t *rmt = (transport_endpoint_t *) rmt_cfg;
+ transport_main_t *tm = &tp_main;
session_error_t error;
int port;
- u32 tei;
/*
* Find the local address
*/
if (ip_is_zero (&rmt_cfg->peer.ip, rmt_cfg->peer.is_ip4))
{
- error = transport_find_local_ip_for_remote (rmt_cfg->peer.sw_if_index,
+ error = transport_find_local_ip_for_remote (&rmt_cfg->peer.sw_if_index,
rmt, lcl_addr);
if (error)
return error;
@@ -605,26 +724,37 @@ transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt_cfg,
sizeof (rmt_cfg->peer.ip));
}
+ /* Cleanup freelist if need be */
+ if (vec_len (tm->lcl_endpts_freelist))
+ transport_cleanup_freelist ();
+
/*
* Allocate source port
*/
if (rmt_cfg->peer.port == 0)
{
- port = transport_alloc_local_port (proto, lcl_addr);
+ port = transport_alloc_local_port (proto, lcl_addr, rmt_cfg);
if (port < 1)
return SESSION_E_NOPORT;
*lcl_port = port;
}
else
{
- port = clib_net_to_host_u16 (rmt_cfg->peer.port);
- *lcl_port = port;
- tei = transport_endpoint_lookup (&local_endpoints_table, proto,
- lcl_addr, port);
- if (tei != ENDPOINT_INVALID_INDEX)
+ *lcl_port = rmt_cfg->peer.port;
+
+ if (!transport_endpoint_mark_used (proto, lcl_addr, rmt_cfg->peer.port))
+ return 0;
+
+ /* IP:port pair already in use, check if 6-tuple available */
+ if (session_lookup_connection (rmt->fib_index, lcl_addr, &rmt->ip,
+ rmt_cfg->peer.port, rmt->port, proto,
+ rmt->is_ip4))
return SESSION_E_PORTINUSE;
- transport_endpoint_mark_used (proto, lcl_addr, port);
+ /* 6-tuple is available so increment lcl endpoint refcount */
+ transport_share_local_endpoint (proto, lcl_addr, rmt_cfg->peer.port);
+
+ return 0;
}
return 0;
@@ -660,15 +790,15 @@ static inline u32
spacer_max_burst (spacer_t * pacer, clib_us_time_t time_now)
{
u64 n_periods = (time_now - pacer->last_update);
- u64 inc;
+ i64 inc;
if ((inc = (f32) n_periods * pacer->tokens_per_period) > 10)
{
pacer->last_update = time_now;
- pacer->bucket = clib_min (pacer->bucket + inc, pacer->max_burst);
+ pacer->bucket = clib_min (pacer->bucket + inc, (i64) pacer->max_burst);
}
- return pacer->bucket > 0 ? pacer->max_burst : 0;
+ return pacer->bucket >= 0 ? pacer->max_burst : 0;
}
static inline void
@@ -790,7 +920,7 @@ void
transport_connection_reschedule (transport_connection_t * tc)
{
tc->flags &= ~TRANSPORT_CONNECTION_F_DESCHED;
- transport_connection_tx_pacer_reset_bucket (tc, TRANSPORT_PACER_MIN_BURST);
+ transport_connection_tx_pacer_reset_bucket (tc, 0 /* bucket */);
if (transport_max_tx_dequeue (tc))
sesssion_reschedule_tx (tc);
else
@@ -830,6 +960,9 @@ transport_enable_disable (vlib_main_t * vm, u8 is_en)
{
if (vft->enable)
(vft->enable) (vm, is_en);
+
+ if (vft->update_time)
+ session_register_update_time_fn (vft->update_time, is_en);
}
}
@@ -838,6 +971,7 @@ transport_init (void)
{
vlib_thread_main_t *vtm = vlib_get_thread_main ();
session_main_t *smm = vnet_get_session_main ();
+ transport_main_t *tm = &tp_main;
u32 num_threads;
if (smm->local_endpoints_table_buckets == 0)
@@ -846,15 +980,18 @@ transport_init (void)
smm->local_endpoints_table_memory = 512 << 20;
/* Initialize [port-allocator] random number seed */
- port_allocator_seed = (u32) clib_cpu_time_now ();
+ tm->port_allocator_seed = (u32) clib_cpu_time_now ();
+ tm->port_allocator_min_src_port = smm->port_allocator_min_src_port;
+ tm->port_allocator_max_src_port = smm->port_allocator_max_src_port;
- clib_bihash_init_24_8 (&local_endpoints_table, "local endpoints table",
+ clib_bihash_init_24_8 (&tm->local_endpoints_table, "local endpoints table",
smm->local_endpoints_table_buckets,
smm->local_endpoints_table_memory);
+ clib_spinlock_init (&tm->local_endpoints_lock);
+
num_threads = 1 /* main thread */ + vtm->n_threads;
if (num_threads > 1)
{
- clib_spinlock_init (&local_endpoints_lock);
/* Main not polled if there are workers */
smm->transport_cl_thread = 1;
}
diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h
index 447552c539e..e6ba1ecbc5f 100644
--- a/src/vnet/session/transport.h
+++ b/src/vnet/session/transport.h
@@ -57,6 +57,7 @@ typedef struct transport_send_params_
struct
{
u32 max_burst_size;
+ u32 bytes_dequeued;
};
};
transport_snd_flags_t flags;
@@ -65,13 +66,12 @@ typedef struct transport_send_params_
/*
* Transport protocol virtual function table
*/
-/* *INDENT-OFF* */
typedef struct _transport_proto_vft
{
/*
* Setup
*/
- u32 (*start_listen) (u32 session_index, transport_endpoint_t * lcl);
+ u32 (*start_listen) (u32 session_index, transport_endpoint_cfg_t *lcl);
u32 (*stop_listen) (u32 conn_index);
int (*connect) (transport_endpoint_cfg_t * rmt);
void (*half_close) (u32 conn_index, u32 thread_index);
@@ -85,7 +85,8 @@ typedef struct _transport_proto_vft
* Transmission
*/
- u32 (*push_header) (transport_connection_t * tconn, vlib_buffer_t * b);
+ u32 (*push_header) (transport_connection_t *tconn, vlib_buffer_t **b,
+ u32 n_bufs);
int (*send_params) (transport_connection_t * tconn,
transport_send_params_t *sp);
void (*update_time) (f64 time_now, u8 thread_index);
@@ -123,16 +124,13 @@ typedef struct _transport_proto_vft
*/
transport_options_t transport_options;
} transport_proto_vft_t;
-/* *INDENT-ON* */
extern transport_proto_vft_t *tp_vfts;
-#define transport_proto_foreach(VAR, BODY) \
-do { \
- for (VAR = 0; VAR < vec_len (tp_vfts); VAR++) \
- if (tp_vfts[VAR].push_header != 0) \
- do { BODY; } while (0); \
-} while (0)
+#define transport_proto_foreach(VAR, VAR_ALLOW_BM) \
+ for (VAR = 0; VAR < vec_len (tp_vfts); VAR++) \
+ if (tp_vfts[VAR].push_header != 0) \
+ if (VAR_ALLOW_BM & (1 << VAR))
int transport_connect (transport_proto_t tp, transport_endpoint_cfg_t * tep);
void transport_half_close (transport_proto_t tp, u32 conn_index,
@@ -140,7 +138,7 @@ void transport_half_close (transport_proto_t tp, u32 conn_index,
void transport_close (transport_proto_t tp, u32 conn_index, u8 thread_index);
void transport_reset (transport_proto_t tp, u32 conn_index, u8 thread_index);
u32 transport_start_listen (transport_proto_t tp, u32 session_index,
- transport_endpoint_t * tep);
+ transport_endpoint_cfg_t *tep);
u32 transport_stop_listen (transport_proto_t tp, u32 conn_index);
void transport_cleanup (transport_proto_t tp, u32 conn_index,
u8 thread_index);
@@ -246,13 +244,14 @@ transport_register_new_protocol (const transport_proto_vft_t * vft,
transport_proto_vft_t *transport_protocol_get_vft (transport_proto_t tp);
void transport_update_time (clib_time_type_t time_now, u8 thread_index);
-int transport_alloc_local_port (u8 proto, ip46_address_t * ip);
-int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t * rmt,
- ip46_address_t * lcl_addr,
- u16 * lcl_port);
+int transport_alloc_local_port (u8 proto, ip46_address_t *ip,
+ transport_endpoint_cfg_t *rmt);
+int transport_alloc_local_endpoint (u8 proto, transport_endpoint_cfg_t *rmt,
+ ip46_address_t *lcl_addr, u16 *lcl_port);
void transport_share_local_endpoint (u8 proto, ip46_address_t * lcl_ip,
u16 port);
-void transport_endpoint_cleanup (u8 proto, ip46_address_t * lcl_ip, u16 port);
+int transport_release_local_endpoint (u8 proto, ip46_address_t *lcl_ip,
+ u16 port);
void transport_enable_disable (vlib_main_t * vm, u8 is_en);
void transport_init (void);
@@ -329,6 +328,19 @@ transport_connection_is_tx_paced (transport_connection_t * tc)
return (tc->flags & TRANSPORT_CONNECTION_F_IS_TX_PACED);
}
+/**
+ * Clear descheduled flag and update pacer if needed
+ *
+ * To add session to scheduler use @ref transport_connection_reschedule
+ */
+always_inline void
+transport_connection_clear_descheduled (transport_connection_t *tc)
+{
+ tc->flags &= ~TRANSPORT_CONNECTION_F_DESCHED;
+ if (transport_connection_is_tx_paced (tc))
+ transport_connection_tx_pacer_reset_bucket (tc, 0 /* bucket */);
+}
+
u8 *format_transport_pacer (u8 * s, va_list * args);
/**
diff --git a/src/vnet/session/transport_types.h b/src/vnet/session/transport_types.h
index 9ea1f2102b4..b3469fa9fdb 100644
--- a/src/vnet/session/transport_types.h
+++ b/src/vnet/session/transport_types.h
@@ -21,10 +21,8 @@
#include <vnet/tcp/tcp_debug.h>
#include <vppinfra/bihash_24_8.h>
-
#define TRANSPORT_MAX_HDRS_LEN 140 /* Max number of bytes for headers */
-
typedef enum transport_dequeue_type_
{
TRANSPORT_TX_PEEK, /**< reliable transport protos */
@@ -42,24 +40,35 @@ typedef enum transport_service_type_
TRANSPORT_N_SERVICES
} transport_service_type_t;
+/*
+ * IS_TX_PACED : Connection sending is paced
+ * NO_LOOKUP: Don't register connection in lookup. Does not apply to local
+ * apps and transports using the network layer (udp/tcp)
+ * DESCHED: Connection descheduled by the session layer
+ * CLESS: Connection is "connection less". Some important implications of that
+ * are that connections are not pinned to workers and listeners will
+ * have fifos associated to them
+ */
+#define foreach_transport_connection_flag \
+ _ (IS_TX_PACED, "tx_paced") \
+ _ (NO_LOOKUP, "no_lookup") \
+ _ (DESCHED, "descheduled") \
+ _ (CLESS, "connectionless")
+
+typedef enum transport_connection_flags_bits_
+{
+#define _(sym, str) TRANSPORT_CONNECTION_F_BIT_##sym,
+ foreach_transport_connection_flag
+#undef _
+ TRANSPORT_CONNECTION_N_FLAGS
+} transport_connection_flags_bits_t;
+
typedef enum transport_connection_flags_
{
- TRANSPORT_CONNECTION_F_IS_TX_PACED = 1 << 0,
- /**
- * Don't register connection in lookup. Does not apply to local apps
- * and transports using the network layer (udp/tcp)
- */
- TRANSPORT_CONNECTION_F_NO_LOOKUP = 1 << 1,
- /**
- * Connection descheduled by the session layer.
- */
- TRANSPORT_CONNECTION_F_DESCHED = 1 << 2,
- /**
- * Connection is "connection less". Some important implications of that
- * are that connections are not pinned to workers and listeners will
- * have fifos associated to them
- */
- TRANSPORT_CONNECTION_F_CLESS = 1 << 3,
+#define _(sym, str) \
+ TRANSPORT_CONNECTION_F_##sym = 1 << TRANSPORT_CONNECTION_F_BIT_##sym,
+ foreach_transport_connection_flag
+#undef _
} transport_connection_flags_t;
typedef struct _spacer
@@ -106,6 +115,7 @@ typedef struct _transport_connection
u32 c_index; /**< Connection index in transport pool */
u32 thread_index; /**< Worker-thread index */
u8 flags; /**< Transport specific flags */
+ u8 dscp; /**< Differentiated Services Code Point */
/*fib_node_index_t rmt_fei;
dpo_id_t rmt_dpo; */
@@ -114,7 +124,7 @@ typedef struct _transport_connection
#if TRANSPORT_DEBUG
elog_track_t elog_track; /**< Event logging */
- u32 cc_stat_tstamp; /**< CC stats timestamp */
+ f64 cc_stat_tstamp; /**< CC stats timestamp */
#endif
/**
@@ -146,6 +156,7 @@ typedef struct _transport_connection
#define c_stats connection.stats
#define c_pacer connection.pacer
#define c_flags connection.flags
+#define c_dscp connection.dscp
#define s_ho_handle pacer.bytes_per_sec
} transport_connection_t;
@@ -164,7 +175,8 @@ STATIC_ASSERT (sizeof (transport_connection_t) <= 128,
_ (TLS, "tls", "J") \
_ (QUIC, "quic", "Q") \
_ (DTLS, "dtls", "D") \
- _ (SRTP, "srtp", "R")
+ _ (SRTP, "srtp", "R") \
+ _ (HTTP, "http", "H")
typedef enum _transport_proto
{
@@ -175,6 +187,7 @@ typedef enum _transport_proto
u8 *format_transport_proto (u8 * s, va_list * args);
u8 *format_transport_proto_short (u8 * s, va_list * args);
+u8 *format_transport_flags (u8 *s, va_list *args);
u8 *format_transport_connection (u8 * s, va_list * args);
u8 *format_transport_listen_connection (u8 * s, va_list * args);
u8 *format_transport_half_open_connection (u8 * s, va_list * args);
@@ -209,6 +222,7 @@ typedef enum transport_endpt_cfg_flags_
_ (u32, next_node_index) \
_ (u32, next_node_opaque) \
_ (u16, mss) \
+ _ (u8, dscp) \
_ (u8, transport_flags) \
/* clang-format on */
diff --git a/src/vnet/snap/node.c b/src/vnet/snap/node.c
index 2a42907321c..ad88b2b3a90 100644
--- a/src/vnet/snap/node.c
+++ b/src/vnet/snap/node.c
@@ -261,7 +261,6 @@ static char *snap_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (snap_input_node) = {
.function = snap_input,
.name = "snap-input",
@@ -282,7 +281,6 @@ VLIB_REGISTER_NODE (snap_input_node) = {
.format_trace = format_snap_input_trace,
.unformat_buffer = unformat_snap_header,
};
-/* *INDENT-ON* */
static void
snap_setup_node (vlib_main_t *vm, u32 node_index)
diff --git a/src/vnet/snap/snap.h b/src/vnet/snap/snap.h
index f6b3be1847f..028df4ede66 100644
--- a/src/vnet/snap/snap.h
+++ b/src/vnet/snap/snap.h
@@ -75,7 +75,6 @@ typedef enum
typedef union
{
- /* *INDENT-OFF* */
CLIB_PACKED (struct {
/* OUI: organization unique identifier. */
u8 oui[3];
@@ -83,7 +82,6 @@ typedef union
/* Per-OUI protocol. */
u16 protocol;
});
- /* *INDENT-ON* */
u8 as_u8[5];
} snap_header_t;
diff --git a/src/vnet/span/node.c b/src/vnet/span/node.c
index ca5ea68ae90..56977b58dc2 100644
--- a/src/vnet/span/node.c
+++ b/src/vnet/span/node.c
@@ -84,7 +84,6 @@ span_mirror (vlib_main_t * vm, vlib_node_runtime_t * node, u32 sw_if_index0,
if (PREDICT_FALSE (b0->flags & VNET_BUFFER_F_SPAN_CLONE))
return;
- /* *INDENT-OFF* */
clib_bitmap_foreach (i, sm0->mirror_ports)
{
if (mirror_frames[i] == 0)
@@ -122,7 +121,6 @@ span_mirror (vlib_main_t * vm, vlib_node_runtime_t * node, u32 sw_if_index0,
}
}
}
- /* *INDENT-ON* */
}
static_always_inline uword
@@ -304,7 +302,6 @@ VLIB_NODE_FN (span_l2_output_node) (vlib_main_t * vm,
[0] = "error-drop" \
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (span_input_node) = {
span_node_defs,
.name = "span-input",
@@ -349,7 +346,6 @@ clib_error_t *span_init (vlib_main_t * vm)
}
VLIB_INIT_FUNCTION (span_init);
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
#undef span_node_defs
diff --git a/src/vnet/span/span.c b/src/vnet/span/span.c
index ec47920504a..bf5e20f4d14 100644
--- a/src/vnet/span/span.c
+++ b/src/vnet/span/span.c
@@ -87,6 +87,9 @@ span_add_delete_entry (vlib_main_t * vm,
if (enable_rx || disable_rx)
vnet_feature_enable_disable ("device-input", "span-input",
src_sw_if_index, rx, 0, 0);
+ if (enable_rx || disable_rx)
+ vnet_feature_enable_disable ("port-rx-eth", "span-input",
+ src_sw_if_index, rx, 0, 0);
if (enable_tx || disable_tx)
vnet_feature_enable_disable ("interface-output", "span-output",
src_sw_if_index, tx, 0, 0);
@@ -163,13 +166,11 @@ set_interface_span_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_span_command, static) = {
.path = "set interface span",
.short_help = "set interface span <if-name> [l2] {disable | destination <if-name> [both|rx|tx]}",
.function = set_interface_span_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_interfaces_span_command_fn (vlib_main_t * vm,
@@ -188,7 +189,6 @@ show_interfaces_span_command_fn (vlib_main_t * vm,
};
u8 *s = 0;
- /* *INDENT-OFF* */
vec_foreach (si, sm->interfaces)
{
span_mirror_t * drxm = &si->mirror_rxtx[SPAN_FEAT_DEVICE][VLIB_RX];
@@ -229,18 +229,15 @@ show_interfaces_span_command_fn (vlib_main_t * vm,
clib_bitmap_free (d);
}
}
- /* *INDENT-ON* */
vec_free (s);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_interfaces_span_command, static) = {
.path = "show interface span",
.short_help = "Shows SPAN mirror table",
.function = show_interfaces_span_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/span/span_api.c b/src/vnet/span/span_api.c
index 300f619934e..f5b24bdf214 100644
--- a/src/vnet/span/span_api.c
+++ b/src/vnet/span/span_api.c
@@ -61,7 +61,6 @@ vl_api_sw_interface_span_dump_t_handler (vl_api_sw_interface_span_dump_t * mp)
return;
span_feat_t sf = mp->is_l2 ? SPAN_FEAT_L2 : SPAN_FEAT_DEVICE;
- /* *INDENT-OFF* */
vec_foreach (si, sm->interfaces)
{
span_mirror_t * rxm = &si->mirror_rxtx[sf][VLIB_RX];
@@ -90,7 +89,6 @@ vl_api_sw_interface_span_dump_t_handler (vl_api_sw_interface_span_dump_t * mp)
clib_bitmap_free (b);
}
}
- /* *INDENT-ON* */
}
#include <vnet/span/span.api.c>
diff --git a/src/vnet/span/span_doc.md b/src/vnet/span/span_doc.md
deleted file mode 100644
index 9f1db0a6c90..00000000000
--- a/src/vnet/span/span_doc.md
+++ /dev/null
@@ -1,65 +0,0 @@
-# VPP SPAN implementation {#span_doc}
-
-This is a memo intended to contain documentation of the VPP SPAN implementation.
-Everything that is not directly obvious should come here.
-
-
-## Switched Port Analyzer (SPAN)
-Port mirroring is used on a network switch to send a copy of network packets seen on one switch port to a network monitoring connection on another switch port.
-Can be used by network engineers or administrators to measure performance, analyze and debug data or diagnose errors on a network.
-
-### RX traffic node
-There is one static node to mirror incoming packets.
-* span-input: Creates a copy of incoming buffer due to incoming buffers can be reused internally.
-
-Chaining: dpdk-input -> span-input ->
-* original buffer is sent to ethernet-input for processing
-* buffer copy is sent to interface-output
-
-### Configuration
-SPAN supports the following CLI configuration commands:
-
-#### Enable/Disable SPAN (CLI)
- set interface span <if-name> [disable | destination <if-name>]
-
-<if-name>: mirrored interface name
-destination <if-name>: monitoring interface name
-disable: delete mirroring
-
-#### Enable/Disable SPAN (API)
-SPAN supports the following API configuration command:
- sw_interface_span_enable_disable src GigabitEthernet0/8/0 dst GigabitEthernet0/9/0
- sw_interface_span_enable_disable src_sw_if_index 1 dst_sw_if_index 2
-
-src/src_sw_if_index: mirrored interface name
-dst/dst_sw_if_index: monitoring interface name
-
-#### Remove SPAN entry (API)
-SPAN supports the following API configuration command:
- sw_interface_span_enable_disable src_sw_if_index 1 dst_sw_if_index 2 disable
-
-src_sw_if_index: mirrored interface name
-dst_sw_if_index: monitoring interface name
-
-### Configuration example
-
-Mirror all packets on interface GigabitEthernet0/10/0 to interface GigabitEthernet0/11/0.
-
-Configure IPv4 addresses on mirrored interface:
-set interface ip address GigabitEthernet0/10/0 192.168.1.13/24
-set interface state GigabitEthernet0/10/0 up
-
-Configure IPv4 addresses on monitoring interface:
-set interface ip address GigabitEthernet0/11/0 192.168.2.13/24
-set interface state GigabitEthernet0/11/0 up
-
-Configure SPAN
-set span src GigabitEthernet0/10/0 dst GigabitEthernet0/11/0
-
-### Operational data
-
-Active SPAN mirroring CLI show command:
- show interfaces span
-
-Active SPAN mirroring API dump command:
- sw_interface_span_dump
diff --git a/src/vnet/span/span_doc.rst b/src/vnet/span/span_doc.rst
new file mode 100644
index 00000000000..f529fb36eb4
--- /dev/null
+++ b/src/vnet/span/span_doc.rst
@@ -0,0 +1,84 @@
+.. _span_doc:
+
+Switched Port Analyzer
+======================
+
+This is a memo intended to contain documentation of the VPP SPAN
+implementation. Everything that is not directly obvious should come
+here.
+
+Port mirroring is used on a network switch to send a copy of network
+packets seen on one switch port to a network monitoring connection on
+another switch port. Can be used by network engineers or administrators
+to measure performance, analyze and debug data or diagnose errors on a
+network.
+
+RX traffic node
+~~~~~~~~~~~~~~~
+
+There is one static node to mirror incoming packets. \* span-input:
+Creates a copy of incoming buffer due to incoming buffers can be reused
+internally.
+
+Chaining: dpdk-input -> span-input -> \* original buffer is sent to
+ethernet-input for processing \* buffer copy is sent to interface-output
+
+Configuration
+~~~~~~~~~~~~~
+
+SPAN supports the following CLI configuration commands:
+
+Enable/Disable SPAN (CLI)
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+::
+
+ set interface span <if-name> [disable | destination <if-name>]
+
+: mirrored interface name destination : monitoring interface name
+disable: delete mirroring
+
+Enable/Disable SPAN (API)
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+SPAN supports the following API configuration command:
+sw_interface_span_enable_disable src GigabitEthernet0/8/0 dst
+GigabitEthernet0/9/0 sw_interface_span_enable_disable src_sw_if_index 1
+dst_sw_if_index 2
+
+src/src_sw_if_index: mirrored interface name dst/dst_sw_if_index:
+monitoring interface name
+
+Remove SPAN entry (API)
+^^^^^^^^^^^^^^^^^^^^^^^
+
+SPAN supports the following API configuration command:
+sw_interface_span_enable_disable src_sw_if_index 1 dst_sw_if_index 2
+disable
+
+src_sw_if_index: mirrored interface name dst_sw_if_index: monitoring
+interface name
+
+Configuration example
+~~~~~~~~~~~~~~~~~~~~~
+
+Mirror all packets on interface GigabitEthernet0/10/0 to interface
+GigabitEthernet0/11/0.
+
+Configure IPv4 addresses on mirrored interface: set interface ip address
+GigabitEthernet0/10/0 192.168.1.13/24 set interface state
+GigabitEthernet0/10/0 up
+
+Configure IPv4 addresses on monitoring interface: set interface ip
+address GigabitEthernet0/11/0 192.168.2.13/24 set interface state
+GigabitEthernet0/11/0 up
+
+Configure SPAN set span src GigabitEthernet0/10/0 dst
+GigabitEthernet0/11/0
+
+Operational data
+~~~~~~~~~~~~~~~~
+
+Active SPAN mirroring CLI show command: show interfaces span
+
+Active SPAN mirroring API dump command: sw_interface_span_dump
diff --git a/src/vnet/srmpls/dir.dox b/src/vnet/srmpls/dir.dox
index 76ec1d6a41b..76ec1d6a41b 100755..100644
--- a/src/vnet/srmpls/dir.dox
+++ b/src/vnet/srmpls/dir.dox
diff --git a/src/vnet/srmpls/sr_doc.md b/src/vnet/srmpls/sr_doc.md
deleted file mode 100644
index 29110ec8c41..00000000000
--- a/src/vnet/srmpls/sr_doc.md
+++ /dev/null
@@ -1,121 +0,0 @@
-# SR-MPLS: Segment Routing for MPLS {#srmpls_doc}
-
-This is a memo intended to contain documentation of the VPP SR-MPLS implementation.
-Everything that is not directly obvious should come here.
-For any feedback on content that should be explained please mailto:pcamaril@cisco.com
-
-## Segment Routing
-
-Segment routing is a network technology focused on addressing the limitations of existing IP and Multiprotocol Label Switching (MPLS) networks in terms of simplicity, scale, and ease of operation. It is a foundation for application engineered routing as it prepares the networks for new business models where applications can control the network behavior.
-
-Segment routing seeks the right balance between distributed intelligence and centralized optimization and programming. It was built for the software-defined networking (SDN) era.
-
-Segment routing enhances packet forwarding behavior by enabling a network to transport unicast packets through a specific forwarding path, different from the normal path that a packet usually takes (IGP shortest path or BGP best path). This capability benefits many use cases, and one can build those specific paths based on application requirements.
-
-Segment routing uses the source routing paradigm. A node, usually a router but also a switch, a trusted server, or a virtual forwarder running on a hypervisor, steers a packet through an ordered list of instructions, called segments. A segment can represent any instruction, topological or service-based. A segment can have a local semantic to a segment-routing node or global within a segment-routing network. Segment routing allows an operator to enforce a flow through any topological path and service chain while maintaining per-flow state only at the ingress node to the segment-routing network. Segment routing also supports equal-cost multipath (ECMP) by design.
-
-Segment routing can operate with either an MPLS or an IPv6 data plane. All the currently available MPLS services, such as Layer 3 VPN (L3VPN), L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services [VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet VPN [PBB-EVPN]), can run on top of a segment-routing transport network.
-
-**The implementation of Segment Routing in VPP covers both the IPv6 data plane (SRv6) as well as the MPLS data plane (SR-MPLS). This page contains the SR-MPLS documentation.**
-
-## Segment Routing terminology
-
-* SegmentID (SID): is an MPLS label.
-* Segment List (SL) (SID List): is the sequence of SIDs that the packet will traverse.
-* SR Policy: is a set of candidate paths (SID list+weight). An SR policy is uniquely identified by its Binding SID and associated with a weighted set of Segment Lists. In case several SID lists are defined, traffic steered into the policy is unevenly load-balanced among them according to their respective weights.
-* BindingSID: a BindingSID is a SID (only one) associated one-one with an SR Policy. If a packet arrives with MPLS label corresponding to a BindingSID, then the SR policy will be applied to such packet. (BindingSID is popped first.)
-
-## SR-MPLS features in VPP
-
-The SR-MPLS implementation is focused on the SR policies, as well on its steering. Others SR-MPLS features, such as for example AdjSIDs, can be achieved using the regular VPP MPLS implementation.
-
-The <a href="https://datatracker.ietf.org/doc/draft-filsfils-spring-segment-routing-policy/">Segment Routing Policy (*draft-filsfils-spring-segment-routing-policy*)</a> defines SR Policies.
-
-## Creating a SR Policy
-
-An SR Policy is defined by a Binding SID and a weighted set of Segment Lists.
-
-A new SR policy is created with a first SID list using:
-
- sr mpls policy add bsid 40001 next 16001 next 16002 next 16003 (weight 5)
-
-* The weight parameter is only used if more than one SID list is associated with the policy.
-
-An SR policy is deleted with:
-
- sr mpls policy del bsid 40001
-
-The existing SR policies are listed with:
-
- show sr mpls policies
-
-### Adding/Removing SID Lists from an SR policy
-
-An additional SID list is associated with an existing SR policy with:
-
- sr mpls policy mod bsid 40001 add sl next 16001 next 16002 next 16003 (weight 3)
-
-Conversely, a SID list can be removed from an SR policy with:
-
- sr mpls policy mod bsid 4001 del sl index 1
-
-Note that this CLI cannot be used to remove the last SID list of a policy. Instead the SR policy delete CLI must be used.
-
-The weight of a SID list can also be modified with:
-
- sr mpls policy mod bsid 40001 mod sl index 1 weight 4
-
-### SR Policies: Spray policies
-
-Spray policies are a specific type of SR policies where the packet is replicated on all the SID lists, rather than load-balanced among them.
-
-SID list weights are ignored with this type of policies.
-
-A Spray policy is instantiated by appending the keyword **spray** to a regular SR-MPLS policy command, as in:
-
- sr mpls policy add bsid 40002 next 16001 next 16002 next 16003 spray
-
-Spray policies are used for removing multicast state from a network core domain, and instead send a linear unicast copy to every access node. The last SID in each list accesses the multicast tree within the access node.
-
-## Steering packets into a SR Policy
-
-Segment Routing supports three methos of steering traffic into an SR policy.
-
-### Local steering
-
-In this variant incoming packets match a routing policy which directs them on a local SR policy.
-
-In order to achieve this behavior the user needs to create an 'sr steering policy via sr policy bsid'.
-
- sr mpls steer l3 2001::/64 via sr policy bsid 40001
- sr mpls steer l3 2001::/64 via sr policy bsid 40001 fib-table 3
- sr mpls steer l3 10.0.0.0/16 via sr policy bsid 40001
- sr mpls steer l3 10.0.0.0/16 via sr policy bsid 40001 vpn-label 500
-
-### Remote steering
-
-In this variant incoming packets have an active SID matching a local BSID at the head-end.
-
-In order to achieve this behavior the packets should simply arrive with an active SID equal to the Binding SID of a locally instantiated SR policy.
-
-### Automated steering
-
-In this variant incoming packets match a BGP/Service route which recurses on the BSID of a local policy.
-
-In order to achieve this behavior the user first needs to color the SR policies. He can do so by using the CLI:
-
- sr mpls policy te bsid xxxxx endpoint x.x.x.x color 12341234
-
-Notice that an SR policy can have a single endpoint and a single color. Notice that the *endpoint* value is an IP46 address and the color a u32.
-
-
-Then, for any BGP/Service route the user has to use the API to steer prefixes:
-
- sr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2
- sr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2 vpn-label 500
-
-Notice that *co* refers to the CO-bits (values [0|1|2|3]).
-
-Notice also that a given prefix might be steered over several colors (same next-hop and same co-bit value). In order to add new colors just execute the API several times (or with the del parameter to delete the color).
-
-This variant is meant to be used in conjunction with a control plane agent that uses the underlying binary API bindings of *sr_mpls_steering_policy_add*/*sr_mpls_steering_policy_del* for any BGP service route received. \ No newline at end of file
diff --git a/src/vnet/srmpls/sr_doc.rst b/src/vnet/srmpls/sr_doc.rst
new file mode 100644
index 00000000000..ed847fa0d42
--- /dev/null
+++ b/src/vnet/srmpls/sr_doc.rst
@@ -0,0 +1,215 @@
+.. _srmpls_doc:
+
+SR-MPLS: Segment Routing for MPLS
+=================================
+
+This is a memo intended to contain documentation of the VPP SR-MPLS
+implementation. Everything that is not directly obvious should come
+here. For any feedback on content that should be explained please
+mailto:pcamaril@cisco.com
+
+Segment Routing
+---------------
+
+Segment routing is a network technology focused on addressing the
+limitations of existing IP and Multiprotocol Label Switching (MPLS)
+networks in terms of simplicity, scale, and ease of operation. It is a
+foundation for application engineered routing as it prepares the
+networks for new business models where applications can control the
+network behavior.
+
+Segment routing seeks the right balance between distributed intelligence
+and centralized optimization and programming. It was built for the
+software-defined networking (SDN) era.
+
+Segment routing enhances packet forwarding behavior by enabling a
+network to transport unicast packets through a specific forwarding path,
+different from the normal path that a packet usually takes (IGP shortest
+path or BGP best path). This capability benefits many use cases, and one
+can build those specific paths based on application requirements.
+
+Segment routing uses the source routing paradigm. A node, usually a
+router but also a switch, a trusted server, or a virtual forwarder
+running on a hypervisor, steers a packet through an ordered list of
+instructions, called segments. A segment can represent any instruction,
+topological or service-based. A segment can have a local semantic to a
+segment-routing node or global within a segment-routing network. Segment
+routing allows an operator to enforce a flow through any topological
+path and service chain while maintaining per-flow state only at the
+ingress node to the segment-routing network. Segment routing also
+supports equal-cost multipath (ECMP) by design.
+
+Segment routing can operate with either an MPLS or an IPv6 data plane.
+All the currently available MPLS services, such as Layer 3 VPN (L3VPN),
+L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services
+[VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet
+VPN [PBB-EVPN]), can run on top of a segment-routing transport network.
+
+**The implementation of Segment Routing in VPP covers both the IPv6 data
+plane (SRv6) as well as the MPLS data plane (SR-MPLS). This page
+contains the SR-MPLS documentation.**
+
+Segment Routing terminology
+---------------------------
+
+- SegmentID (SID): is an MPLS label.
+- Segment List (SL) (SID List): is the sequence of SIDs that the packet
+ will traverse.
+- SR Policy: is a set of candidate paths (SID list+weight). An SR
+ policy is uniquely identified by its Binding SID and associated with
+ a weighted set of Segment Lists. In case several SID lists are
+ defined, traffic steered into the policy is unevenly load-balanced
+ among them according to their respective weights.
+- BindingSID: a BindingSID is a SID (only one) associated one-one with
+ an SR Policy. If a packet arrives with MPLS label corresponding to a
+ BindingSID, then the SR policy will be applied to such packet.
+ (BindingSID is popped first.)
+
+SR-MPLS features in VPP
+-----------------------
+
+The SR-MPLS implementation is focused on the SR policies, as well on its
+steering. Others SR-MPLS features, such as for example AdjSIDs, can be
+achieved using the regular VPP MPLS implementation.
+
+The Segment Routing Policy
+(*draft-filsfils-spring-segment-routing-policy*) defines SR Policies.
+
+Creating a SR Policy
+--------------------
+
+An SR Policy is defined by a Binding SID and a weighted set of Segment
+Lists.
+
+A new SR policy is created with a first SID list using:
+
+::
+
+ sr mpls policy add bsid 40001 next 16001 next 16002 next 16003 (weight 5)
+
+- The weight parameter is only used if more than one SID list is
+ associated with the policy.
+
+An SR policy is deleted with:
+
+::
+
+ sr mpls policy del bsid 40001
+
+The existing SR policies are listed with:
+
+::
+
+ show sr mpls policies
+
+Adding/Removing SID Lists from an SR policy
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+An additional SID list is associated with an existing SR policy with:
+
+::
+
+ sr mpls policy mod bsid 40001 add sl next 16001 next 16002 next 16003 (weight 3)
+
+Conversely, a SID list can be removed from an SR policy with:
+
+::
+
+ sr mpls policy mod bsid 4001 del sl index 1
+
+Note that this CLI cannot be used to remove the last SID list of a
+policy. Instead the SR policy delete CLI must be used.
+
+The weight of a SID list can also be modified with:
+
+::
+
+ sr mpls policy mod bsid 40001 mod sl index 1 weight 4
+
+SR Policies: Spray policies
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Spray policies are a specific type of SR policies where the packet is
+replicated on all the SID lists, rather than load-balanced among them.
+
+SID list weights are ignored with this type of policies.
+
+A Spray policy is instantiated by appending the keyword **spray** to a
+regular SR-MPLS policy command, as in:
+
+::
+
+ sr mpls policy add bsid 40002 next 16001 next 16002 next 16003 spray
+
+Spray policies are used for removing multicast state from a network core
+domain, and instead send a linear unicast copy to every access node. The
+last SID in each list accesses the multicast tree within the access
+node.
+
+Steering packets into a SR Policy
+---------------------------------
+
+Segment Routing supports three methods of steering traffic into an SR
+policy.
+
+Local steering
+~~~~~~~~~~~~~~
+
+In this variant incoming packets match a routing policy which directs
+them on a local SR policy.
+
+In order to achieve this behavior the user needs to create an ‘sr
+steering policy via sr policy bsid’.
+
+::
+
+ sr mpls steer l3 2001::/64 via sr policy bsid 40001
+ sr mpls steer l3 2001::/64 via sr policy bsid 40001 fib-table 3
+ sr mpls steer l3 10.0.0.0/16 via sr policy bsid 40001
+ sr mpls steer l3 10.0.0.0/16 via sr policy bsid 40001 vpn-label 500
+
+Remote steering
+~~~~~~~~~~~~~~~
+
+In this variant incoming packets have an active SID matching a local
+BSID at the head-end.
+
+In order to achieve this behavior the packets should simply arrive with
+an active SID equal to the Binding SID of a locally instantiated SR
+policy.
+
+Automated steering
+~~~~~~~~~~~~~~~~~~
+
+In this variant incoming packets match a BGP/Service route which
+recurses on the BSID of a local policy.
+
+In order to achieve this behavior the user first needs to color the SR
+policies. He can do so by using the CLI:
+
+::
+
+ sr mpls policy te bsid xxxxx endpoint x.x.x.x color 12341234
+
+Notice that an SR policy can have a single endpoint and a single color.
+Notice that the *endpoint* value is an IP46 address and the color a u32.
+
+Then, for any BGP/Service route the user has to use the API to steer
+prefixes:
+
+::
+
+ sr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2
+ sr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2 vpn-label 500
+
+Notice that *co* refers to the CO-bits (values [0|1|2|3]).
+
+Notice also that a given prefix might be steered over several colors
+(same next-hop and same co-bit value). In order to add new colors just
+execute the API several times (or with the del parameter to delete the
+color).
+
+This variant is meant to be used in conjunction with a control plane
+agent that uses the underlying binary API bindings of
+*sr_mpls_steering_policy_add*/*sr_mpls_steering_policy_del* for any BGP
+service route received.
diff --git a/src/vnet/srmpls/sr_mpls.h b/src/vnet/srmpls/sr_mpls.h
index 5b04f76b7a7..a8f9494428f 100644
--- a/src/vnet/srmpls/sr_mpls.h
+++ b/src/vnet/srmpls/sr_mpls.h
@@ -67,7 +67,7 @@ typedef struct
u8 type; /**< Type (default is 0) */
/* SR Policy specific DPO */
- /* IF Type = DEFAULT Then Load Balancer DPO among SID lists */
+ /* IF Type = DEFAULT Then Load-Balancer DPO among SID lists */
/* IF Type = SPRAY then Spray DPO with all SID lists */
ip46_address_t endpoint; /**< Optional NH for SR TE */
diff --git a/src/vnet/srmpls/sr_mpls_api.c b/src/vnet/srmpls/sr_mpls_api.c
index d6216c68391..920856acff6 100644
--- a/src/vnet/srmpls/sr_mpls_api.c
+++ b/src/vnet/srmpls/sr_mpls_api.c
@@ -29,7 +29,6 @@
#include <vnet/srmpls/sr_mpls.api_enum.h>
#include <vnet/srmpls/sr_mpls.api_types.h>
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
#define vl_api_version(n, v) static u32 api_version = v;
#include <vnet/srmpls/sr_mpls.api.h>
@@ -39,6 +38,10 @@
#include <vnet/srmpls/sr_mpls.api.h>
#undef vl_endianfun
+#define vl_calcsizefun
+#include <vnet/srmpls/sr_mpls.api.h>
+#undef vl_calcsizefun
+
#define vl_printfun
#include <vnet/srmpls/sr_mpls.api.h>
#undef vl_printfun
@@ -190,10 +193,18 @@ sr_mpls_api_hookup (vlib_main_t * vm)
vec_free (name);
#define _(N, n) \
- vl_msg_api_set_handlers (REPLY_MSG_ID_BASE + VL_API_##N, #n, \
- vl_api_##n##_t_handler, vl_noop_handler, \
- vl_api_##n##_t_endian, vl_api_##n##_t_print, \
- sizeof (vl_api_##n##_t), 1);
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){ \
+ .id = REPLY_MSG_ID_BASE + VL_API_##N, \
+ .name = #n, \
+ .handler = vl_api_##n##_t_handler, \
+ .endian = vl_api_##n##_t_endian, \
+ .format_fn = vl_api_##n##_t_format, \
+ .size = sizeof (vl_api_##n##_t), \
+ .traced = 1, \
+ .tojson = vl_api_##n##_t_tojson, \
+ .fromjson = vl_api_##n##_t_fromjson, \
+ .calc_size = vl_api_##n##_t_calc_size, \
+ });
foreach_vpe_api_msg;
#undef _
@@ -201,21 +212,34 @@ sr_mpls_api_hookup (vlib_main_t * vm)
* Manually register the sr policy add msg, so we trace enough bytes
* to capture a typical segment list
*/
- vl_msg_api_set_handlers (REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_ADD,
- "sr_mpls_policy_add",
- vl_api_sr_mpls_policy_add_t_handler,
- vl_noop_handler, vl_api_sr_mpls_policy_add_t_endian,
- vl_api_sr_mpls_policy_add_t_print, 256, 1);
-
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){
+ .id = REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_ADD,
+ .name = "sr_mpls_policy_add",
+ .handler = vl_api_sr_mpls_policy_add_t_handler,
+ .endian = vl_api_sr_mpls_policy_add_t_endian,
+ .format_fn = vl_api_sr_mpls_policy_add_t_format,
+ .size = 256,
+ .traced = 1,
+ .tojson = vl_api_sr_mpls_policy_add_t_tojson,
+ .fromjson = vl_api_sr_mpls_policy_add_t_fromjson,
+ .calc_size = vl_api_sr_mpls_policy_add_t_calc_size,
+ });
/*
* Manually register the sr policy mod msg, so we trace enough bytes
* to capture a typical segment list
*/
- vl_msg_api_set_handlers (REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_MOD,
- "sr_mpls_policy_mod",
- vl_api_sr_mpls_policy_mod_t_handler,
- vl_noop_handler, vl_api_sr_mpls_policy_mod_t_endian,
- vl_api_sr_mpls_policy_mod_t_print, 256, 1);
+ vl_msg_api_config (&(vl_msg_api_msg_config_t){
+ .id = REPLY_MSG_ID_BASE + VL_API_SR_MPLS_POLICY_MOD,
+ .name = "sr_mpls_policy_mod",
+ .handler = vl_api_sr_mpls_policy_mod_t_handler,
+ .endian = vl_api_sr_mpls_policy_mod_t_endian,
+ .format_fn = vl_api_sr_mpls_policy_mod_t_format,
+ .size = 256,
+ .traced = 1,
+ .tojson = vl_api_sr_mpls_policy_mod_t_tojson,
+ .fromjson = vl_api_sr_mpls_policy_mod_t_fromjson,
+ .calc_size = vl_api_sr_mpls_policy_mod_t_calc_size,
+ });
/*
* Set up the (msg_name, crc, message-id) table
diff --git a/src/vnet/srmpls/sr_mpls_policy.c b/src/vnet/srmpls/sr_mpls_policy.c
index 8f0804850f1..41cb71601e9 100644
--- a/src/vnet/srmpls/sr_mpls_policy.c
+++ b/src/vnet/srmpls/sr_mpls_policy.c
@@ -108,7 +108,6 @@ create_sl (mpls_sr_policy_t * sr_policy, mpls_label_t * sl, u32 weight)
fib_route_path_t *paths = NULL;
vec_add1 (paths, path);
- /* *INDENT-OFF* */
fib_prefix_t pfx = {
.fp_len = 21,
.fp_proto = FIB_PROTOCOL_MPLS,
@@ -116,7 +115,6 @@ create_sl (mpls_sr_policy_t * sr_policy, mpls_label_t * sl, u32 weight)
.fp_eos = eos,
.fp_payload_proto = DPO_PROTO_MPLS,
};
- /* *INDENT-ON* */
fib_table_entry_path_add2 (0,
&pfx,
@@ -245,7 +243,6 @@ sr_mpls_policy_del (mpls_label_t bsid)
/* remove each of the MPLS routes */
FOR_EACH_MPLS_EOS_BIT (eos)
{
- /* *INDENT-OFF* */
fib_prefix_t pfx = {
.fp_len = 21,
.fp_proto = FIB_PROTOCOL_MPLS,
@@ -253,7 +250,6 @@ sr_mpls_policy_del (mpls_label_t bsid)
.fp_eos = eos,
.fp_payload_proto = DPO_PROTO_MPLS,
};
- /* *INDENT-ON* */
fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
}
@@ -359,7 +355,6 @@ sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
FOR_EACH_MPLS_EOS_BIT (eos)
{
- /* *INDENT-OFF* */
fib_prefix_t pfx = {
.fp_len = 21,
.fp_proto = FIB_PROTOCOL_MPLS,
@@ -367,7 +362,6 @@ sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
.fp_eos = eos,
.fp_payload_proto = DPO_PROTO_MPLS,
};
- /* *INDENT-ON* */
fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
}
@@ -411,7 +405,6 @@ sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
FOR_EACH_MPLS_EOS_BIT (eos)
{
- /* *INDENT-OFF* */
fib_prefix_t pfx = {
.fp_len = 21,
.fp_proto = FIB_PROTOCOL_MPLS,
@@ -419,7 +412,6 @@ sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
.fp_eos = eos,
.fp_payload_proto = DPO_PROTO_MPLS,
};
- /* *INDENT-ON* */
fib_table_entry_path_remove2 (0, &pfx, FIB_SOURCE_SR, paths);
}
@@ -434,7 +426,6 @@ sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
FOR_EACH_MPLS_EOS_BIT (eos)
{
- /* *INDENT-OFF* */
fib_prefix_t pfx = {
.fp_len = 21,
.fp_proto = FIB_PROTOCOL_MPLS,
@@ -442,7 +433,6 @@ sr_mpls_policy_mod (mpls_label_t bsid, u8 operation,
.fp_eos = eos,
.fp_payload_proto = DPO_PROTO_MPLS,
};
- /* *INDENT-ON* */
fib_table_entry_path_add2 (0,
&pfx,
@@ -568,7 +558,6 @@ sr_mpls_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(sr_mpls_policy_command, static)=
{
.path = "sr mpls policy",
@@ -577,7 +566,6 @@ VLIB_CLI_COMMAND(sr_mpls_policy_command, static)=
.long_help = "TBD.\n",
.function = sr_mpls_policy_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief CLI to display onscreen all the SR MPLS policies
@@ -597,11 +585,9 @@ show_sr_mpls_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "SR MPLS policies:");
- /* *INDENT-OFF* */
pool_foreach (sr_policy, sm->sr_policies) {
vec_add1(vec_policies, sr_policy);
}
- /* *INDENT-ON* */
vec_foreach_index (i, vec_policies)
{
@@ -647,14 +633,12 @@ show_sr_mpls_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(show_sr_mpls_policies_command, static)=
{
.path = "show sr mpls policies",
.short_help = "show sr mpls policies",
.function = show_sr_mpls_policies_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief Update the Endpoint,Color tuple of an SR policy
@@ -888,14 +872,12 @@ cli_sr_mpls_policy_ec_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(cli_sr_mpls_policy_ec_command, static)=
{
.path = "sr mpls policy te",
.short_help = "sr mpls policy te bsid xxxxx endpoint x.x.x.x color 12341234",
.function = cli_sr_mpls_policy_ec_command_fn,
};
-/* *INDENT-ON* */
/********************* SR MPLS Policy initialization ***********************/
/**
diff --git a/src/vnet/srmpls/sr_mpls_steering.c b/src/vnet/srmpls/sr_mpls_steering.c
index b12e78d2755..e8920df542b 100644
--- a/src/vnet/srmpls/sr_mpls_steering.c
+++ b/src/vnet/srmpls/sr_mpls_steering.c
@@ -770,7 +770,6 @@ sr_mpls_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(sr_mpls_steer_policy_command, static)=
{
.path = "sr mpls steer",
@@ -785,7 +784,6 @@ VLIB_CLI_COMMAND(sr_mpls_steer_policy_command, static)=
"\t\tsr steer l3 2001::/64 via next-hop 2001::1 color 1234 co 2 vpn-label 500\n",
.function = sr_mpls_steer_policy_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_sr_mpls_steering_policies_command_fn (vlib_main_t * vm,
@@ -799,11 +797,9 @@ show_sr_mpls_steering_policies_command_fn (vlib_main_t * vm,
int i;
vlib_cli_output (vm, "SR MPLS steering policies:");
- /* *INDENT-OFF* */
pool_foreach (steer_pl, sm->steer_policies) {
vec_add1(steer_policies, steer_pl);
}
- /* *INDENT-ON* */
for (i = 0; i < vec_len (steer_policies); i++)
{
vlib_cli_output (vm, "==========================");
@@ -871,14 +867,12 @@ show_sr_mpls_steering_policies_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND(show_sr_mpls_steering_policies_command, static)=
{
.path = "show sr mpls steering policies",
.short_help = "show sr mpls steering policies",
.function = show_sr_mpls_steering_policies_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
sr_mpls_steering_init (vlib_main_t * vm)
@@ -894,9 +888,7 @@ sr_mpls_steering_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION(sr_mpls_steering_init);
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/srmpls/sr_mpls_test.c b/src/vnet/srmpls/sr_mpls_test.c
new file mode 100644
index 00000000000..e5d68462443
--- /dev/null
+++ b/src/vnet/srmpls/sr_mpls_test.c
@@ -0,0 +1,174 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vppinfra/error.h>
+#include <vpp/api/types.h>
+
+#define __plugin_msg_base sr_mpls_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/* Declare message IDs */
+#include <vnet/format_fns.h>
+#include <vnet/srmpls/sr_mpls.api_enum.h>
+#include <vnet/srmpls/sr_mpls.api_types.h>
+
+#define vl_endianfun /* define message structures */
+#include <vnet/srmpls/sr_mpls.api.h>
+#undef vl_endianfun
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ u32 ping_id;
+ vat_main_t *vat_main;
+} sr_mpls_test_main_t;
+
+static sr_mpls_test_main_t sr_mpls_test_main;
+
+static int
+api_sr_mpls_policy_mod (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_mpls_steering_add_del (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_mpls_policy_assign_endpoint_color (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_mpls_policy_add (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sr_mpls_policy_add_t *mp;
+ u32 bsid = 0;
+ u32 weight = 1;
+ u8 type = 0;
+ u8 n_segments = 0;
+ u32 sid;
+ u32 *segments = NULL;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bsid %d", &bsid))
+ ;
+ else if (unformat (i, "weight %d", &weight))
+ ;
+ else if (unformat (i, "spray"))
+ type = 1;
+ else if (unformat (i, "next %d", &sid))
+ {
+ n_segments += 1;
+ vec_add1 (segments, htonl (sid));
+ }
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (bsid == 0)
+ {
+ errmsg ("bsid not set");
+ return -99;
+ }
+
+ if (n_segments == 0)
+ {
+ errmsg ("no sid in segment stack");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M2 (SR_MPLS_POLICY_ADD, mp, sizeof (u32) * n_segments);
+
+ mp->bsid = htonl (bsid);
+ mp->weight = htonl (weight);
+ mp->is_spray = type;
+ mp->n_segments = n_segments;
+ memcpy (mp->segments, segments, sizeof (u32) * n_segments);
+ vec_free (segments);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+static int
+api_sr_mpls_policy_del (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sr_mpls_policy_del_t *mp;
+ u32 bsid = 0;
+ int ret;
+
+ /* Parse args required to build the message */
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "bsid %d", &bsid))
+ ;
+ else
+ {
+ clib_warning ("parse error '%U'", format_unformat_error, i);
+ return -99;
+ }
+ }
+
+ if (bsid == 0)
+ {
+ errmsg ("bsid not set");
+ return -99;
+ }
+
+ /* Construct the API message */
+ M (SR_MPLS_POLICY_DEL, mp);
+
+ mp->bsid = htonl (bsid);
+
+ /* send it... */
+ S (mp);
+
+ /* Wait for a reply... */
+ W (ret);
+ return ret;
+}
+
+#include <vnet/srmpls/sr_mpls.api_test.c>
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/srp/node.c b/src/vnet/srp/node.c
index 12c14012b61..26c3f0b8c1f 100644
--- a/src/vnet/srp/node.c
+++ b/src/vnet/srp/node.c
@@ -878,9 +878,11 @@ static clib_error_t * srp_init (vlib_main_t * vm)
sm->default_data_ttl = 255;
sm->vlib_main = vm;
- vlib_register_node (vm, &srp_ips_process_node);
- vlib_register_node (vm, &srp_input_node);
- vlib_register_node (vm, &srp_control_input_node);
+ vlib_register_node (vm, &srp_ips_process_node, "%s",
+ srp_ips_process_node.name);
+ vlib_register_node (vm, &srp_input_node, "%s", srp_input_node.name);
+ vlib_register_node (vm, &srp_control_input_node, "%s",
+ srp_control_input_node.name);
srp_setup_node (vm, srp_input_node.index);
return 0;
diff --git a/src/vnet/srp/packet.h b/src/vnet/srp/packet.h
index 96dab648b32..38296ac6ec8 100644
--- a/src/vnet/srp/packet.h
+++ b/src/vnet/srp/packet.h
@@ -40,8 +40,7 @@
#ifndef included_srp_packet_h
#define included_srp_packet_h
-#include <vppinfra/byte_order.h>
-#include <vppinfra/bitops.h>
+#include <vppinfra/clib.h>
#include <vnet/ethernet/packet.h>
/* SRP version 2. */
diff --git a/src/vnet/srv6/dir.dox b/src/vnet/srv6/dir.dox
index 3f539a58ef1..3f539a58ef1 100755..100644
--- a/src/vnet/srv6/dir.dox
+++ b/src/vnet/srv6/dir.dox
diff --git a/src/vnet/srv6/sr.api b/src/vnet/srv6/sr.api
index 6190a8c7ff5..4766ce3ba11 100644
--- a/src/vnet/srv6/sr.api
+++ b/src/vnet/srv6/sr.api
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-option version = "2.0.0";
+option version = "2.1.0";
import "vnet/interface_types.api";
import "vnet/ip/ip_types.api";
@@ -109,6 +109,65 @@ autoreply define sr_policy_mod
vl_api_srv6_sid_list_t sids;
};
+enum sr_policy_type : u8
+{
+ SR_API_POLICY_TYPE_DEFAULT = 0,
+ SR_API_POLICY_TYPE_SPRAY = 1,
+ SR_API_POLICY_TYPE_TEF = 2,
+};
+
+/** \brief IPv6 SR policy add
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid is the bindingSID of the SR Policy
+ @param weight is the weight of the sid list. optional.
+ @param is_encap is the behavior of the SR policy. (0.SRH insert // 1.Encapsulation)
+ @param type is the SR policy param. (0.Default // 1.Spray // 2.Tef)
+ @param fib_table is the VRF where to install the FIB entry for the BSID
+ @param sids is a srv6_sid_list object
+ @param encap_src is a encaps IPv6 source addr. optional.
+*/
+autoreply define sr_policy_add_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_ip6_address_t bsid_addr;
+ u32 weight;
+ bool is_encap;
+ vl_api_sr_policy_type_t type [default=0x0];
+ u32 fib_table;
+ vl_api_srv6_sid_list_t sids;
+ vl_api_ip6_address_t encap_src;
+ option status="in_progress";
+};
+
+/** \brief IPv6 SR policy modification
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param bsid is the bindingSID of the SR Policy
+ @param sr_policy_index is the index of the SR policy
+ @param fib_table is the VRF where to install the FIB entry for the BSID
+ @param operation is the operation to perform (among the top ones)
+ @param sl_index is the index of the Segment List to modify/delete
+ @param weight is the weight of the sid list. optional.
+ @param sids is a srv6_sid_list object
+ @param encap_src is a encaps IPv6 source addr. optional.
+*/
+autoreply define sr_policy_mod_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_ip6_address_t bsid_addr;
+ u32 sr_policy_index;
+ u32 fib_table;
+ vl_api_sr_policy_op_t operation;
+ u32 sl_index;
+ u32 weight;
+ vl_api_srv6_sid_list_t sids;
+ vl_api_ip6_address_t encap_src;
+ option status="in_progress";
+};
+
/** \brief IPv6 SR policy deletion
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -195,12 +254,45 @@ define sr_localsids_details
u32 xconnect_iface_or_vrf_table;
};
+
+/** \brief Dump the list of SR LocalSIDs along with packet statistics
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define sr_localsids_with_packet_stats_dump
+{
+ u32 client_index;
+ u32 context;
+ option status="in_progress";
+};
+
+define sr_localsids_with_packet_stats_details
+{
+ u32 context;
+ vl_api_ip6_address_t addr;
+ bool end_psp;
+ vl_api_sr_behavior_t behavior;
+ u32 fib_table;
+ u32 vlan_index;
+ vl_api_address_t xconnect_nh_addr;
+ u32 xconnect_iface_or_vrf_table;
+ u64 good_traffic_bytes;
+ u64 good_traffic_pkt_count;
+ u64 bad_traffic_bytes;
+ u64 bad_traffic_pkt_count;
+ option status="in_progress";
+};
+
+
+
/** \brief Dump the list of SR policies
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
*/
define sr_policies_dump
{
+ option deprecated;
+
u32 client_index;
u32 context;
};
@@ -217,6 +309,28 @@ define sr_policies_details
vl_api_srv6_sid_list_t sid_lists[num_sid_lists];
};
+/** \brief Dump the list of SR policies v2
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+*/
+define sr_policies_v2_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+define sr_policies_v2_details
+{
+ u32 context;
+ vl_api_ip6_address_t bsid;
+ vl_api_ip6_address_t encap_src;
+ vl_api_sr_policy_type_t type;
+ bool is_encap;
+ u32 fib_table;
+ u8 num_sid_lists;
+ vl_api_srv6_sid_list_t sid_lists[num_sid_lists];
+};
+
/** \brief Dump the list of SR policies along with actual segment list index on VPP
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
diff --git a/src/vnet/srv6/sr.h b/src/vnet/srv6/sr.h
index d47c2132ee4..c2867eb7508 100644
--- a/src/vnet/srv6/sr.h
+++ b/src/vnet/srv6/sr.h
@@ -56,13 +56,11 @@
#define SR_SEGMENT_LIST_WEIGHT_DEFAULT 1
-/* *INDENT-OFF* */
typedef struct
{
ip6_header_t ip;
ip6_sr_header_t sr;
} __attribute__ ((packed)) ip6srv_combo_header_t;
-/* *INDENT-ON* */
/**
* @brief SR Segment List (SID list)
@@ -75,6 +73,7 @@ typedef struct
u8 *rewrite; /**< Precomputed rewrite header */
u8 *rewrite_bsid; /**< Precomputed rewrite header for bindingSID */
+ u8 policy_type;
u32 egress_fib_table; /**< Egress FIB table for encap packet */
@@ -89,6 +88,7 @@ typedef struct
/* SR policy types */
#define SR_POLICY_TYPE_DEFAULT 0
#define SR_POLICY_TYPE_SPRAY 1
+#define SR_POLICY_TYPE_TEF 2
/**
* @brief SR Policy
*/
@@ -100,7 +100,7 @@ typedef struct
u8 type; /**< Type (default is 0) */
/* SR Policy specific DPO */
- /* IF Type = DEFAULT Then Load Balancer DPO among SID lists */
+ /* IF Type = DEFAULT Then Load-Balancer DPO among SID lists */
/* IF Type = SPRAY then Spray DPO with all SID lists */
dpo_id_t bsid_dpo; /**< SR Policy specific DPO - BSID */
dpo_id_t ip4_dpo; /**< SR Policy specific DPO - IPv6 */
@@ -110,6 +110,8 @@ typedef struct
u8 is_encap; /**< Mode (0 is SRH insert, 1 Encaps) */
+ ip6_address_t encap_src;
+
u16 plugin;
void *plugin_mem;
} ip6_sr_policy_t;
@@ -127,7 +129,7 @@ typedef struct
char end_psp; /**< Combined with End.PSP? */
- u16 behavior; /**< Behavior associated to this localsid */
+ u8 behavior; /**< Behavior associated to this localsid */
union
{
@@ -342,13 +344,13 @@ sr_policy_register_function (vlib_main_t * vm, u8 * fn_name,
sr_p_plugin_callback_t * creation_fn,
sr_p_plugin_callback_t * removal_fn);
-extern int
-sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments,
- u32 weight, u8 behavior, u32 fib_table, u8 is_encap,
- u16 plugin, void *plugin_mem);
-extern int sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table,
- u8 operation, ip6_address_t * segments,
- u32 sl_index, u32 weight);
+extern int sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
+ ip6_address_t *encap_src, u32 weight, u8 type,
+ u32 fib_table, u8 is_encap, u16 plugin,
+ void *plugin_mem);
+extern int sr_policy_mod (ip6_address_t *bsid, u32 index, u32 fib_table,
+ u8 operation, ip6_address_t *segments,
+ ip6_address_t *encap_src, u32 sl_index, u32 weight);
extern int sr_policy_del (ip6_address_t * bsid, u32 index);
extern int
diff --git a/src/vnet/srv6/sr_api.c b/src/vnet/srv6/sr_api.c
index c68b355922b..a44c3098112 100644
--- a/src/vnet/srv6/sr_api.c
+++ b/src/vnet/srv6/sr_api.c
@@ -82,17 +82,16 @@ vl_api_sr_policy_add_t_handler (vl_api_sr_policy_add_t * mp)
ip6_address_decode (mp->bsid_addr, &bsid_addr);
-/*
- * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
- * u32 weight, u8 behavior, u32 fib_table, u8 is_encap,
- * u16 behavior, void *plugin_mem)
- */
+ /*
+ * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
+ * ip6_address_t *encap_src,
+ * u32 weight, u8 behavior, u32 fib_table, u8 is_encap,
+ * u16 behavior, void *plugin_mem)
+ */
int rv = 0;
- rv = sr_policy_add (&bsid_addr,
- segments,
- ntohl (mp->sids.weight),
- mp->is_spray, ntohl (mp->fib_table), mp->is_encap, 0,
- NULL);
+ rv =
+ sr_policy_add (&bsid_addr, segments, NULL, ntohl (mp->sids.weight),
+ mp->is_spray, ntohl (mp->fib_table), mp->is_encap, 0, NULL);
vec_free (segments);
REPLY_MACRO (VL_API_SR_POLICY_ADD_REPLY);
@@ -115,18 +114,93 @@ vl_api_sr_policy_mod_t_handler (vl_api_sr_policy_mod_t * mp)
ip6_address_decode (mp->bsid_addr, &bsid_addr);
int rv = 0;
-/*
- * int
- * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table,
- * u8 operation, ip6_address_t *segments, u32 sl_index,
- * u32 weight, u8 is_encap)
- */
- rv = sr_policy_mod (&bsid_addr,
- ntohl (mp->sr_policy_index),
- ntohl (mp->fib_table),
- mp->operation,
- segments, ntohl (mp->sl_index),
- ntohl (mp->sids.weight));
+ /*
+ * int
+ * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table,
+ * u8 operation, ip6_address_t *segments,
+ * ip6_address_t *encap_src, u32 sl_index,
+ * u32 weight, u8 is_encap)
+ */
+ rv = sr_policy_mod (&bsid_addr, ntohl (mp->sr_policy_index),
+ ntohl (mp->fib_table), mp->operation, segments, NULL,
+ ntohl (mp->sl_index), ntohl (mp->sids.weight));
+ vec_free (segments);
+
+ REPLY_MACRO (VL_API_SR_POLICY_MOD_REPLY);
+}
+
+static void
+vl_api_sr_policy_add_v2_t_handler (vl_api_sr_policy_add_v2_t *mp)
+{
+ vl_api_sr_policy_add_v2_reply_t *rmp;
+ ip6_address_t *segments = 0, *seg;
+ ip6_address_t bsid_addr;
+ ip6_address_t encap_src;
+
+ int i;
+ for (i = 0; i < mp->sids.num_sids; i++)
+ {
+ vec_add2 (segments, seg, 1);
+ ip6_address_decode (mp->sids.sids[i], seg);
+ }
+
+ ip6_address_decode (mp->bsid_addr, &bsid_addr);
+ ip6_address_decode (mp->encap_src, &encap_src);
+
+ if (ip6_address_is_zero (&encap_src))
+ {
+ encap_src = *sr_get_encaps_source ();
+ }
+ /*
+ * sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
+ * ip6_address_t *encap_src,
+ * u32 weight, u8 behavior, u32 fib_table, u8 is_encap,
+ * u16 behavior, void *plugin_mem)
+ */
+ int rv = 0;
+ rv =
+ sr_policy_add (&bsid_addr, segments, &encap_src, ntohl (mp->sids.weight),
+ mp->type, ntohl (mp->fib_table), mp->is_encap, 0, NULL);
+ vec_free (segments);
+
+ REPLY_MACRO (VL_API_SR_POLICY_ADD_V2_REPLY);
+}
+
+static void
+vl_api_sr_policy_mod_v2_t_handler (vl_api_sr_policy_mod_v2_t *mp)
+{
+ vl_api_sr_policy_mod_v2_reply_t *rmp;
+ ip6_address_t *segments = 0, *seg;
+ ip6_address_t bsid_addr;
+ ip6_address_t encap_src;
+
+ int i;
+ for (i = 0; i < mp->sids.num_sids; i++)
+ {
+ vec_add2 (segments, seg, 1);
+ ip6_address_decode (mp->sids.sids[i], seg);
+ }
+
+ ip6_address_decode (mp->bsid_addr, &bsid_addr);
+ ip6_address_decode (mp->encap_src, &encap_src);
+
+ if (ip6_address_is_zero (&encap_src))
+ {
+ encap_src = *sr_get_encaps_source ();
+ }
+
+ int rv = 0;
+ /*
+ * int
+ * sr_policy_mod(ip6_address_t *bsid, u32 index, u32 fib_table,
+ * u8 operation, ip6_address_t *segments,
+ * ip6_address_t *encap_src, u32 sl_index,
+ * u32 weight, u8 is_encap)
+ */
+ rv =
+ sr_policy_mod (&bsid_addr, ntohl (mp->sr_policy_index),
+ ntohl (mp->fib_table), mp->operation, segments, &encap_src,
+ ntohl (mp->sl_index), ntohl (mp->sids.weight));
vec_free (segments);
REPLY_MACRO (VL_API_SR_POLICY_MOD_REPLY);
@@ -217,7 +291,7 @@ static void send_sr_localsid_details
rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_SR_LOCALSIDS_DETAILS);
ip6_address_encode (&t->localsid, rmp->addr);
rmp->end_psp = t->end_psp;
- rmp->behavior = htons (t->behavior);
+ rmp->behavior = t->behavior;
rmp->fib_table = htonl (t->fib_table);
rmp->vlan_index = htonl (t->vlan_index);
ip_address_encode (&t->next_hop, IP46_TYPE_ANY, &rmp->xconnect_nh_addr);
@@ -247,12 +321,77 @@ static void vl_api_sr_localsids_dump_t_handler
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (t, sm->localsids)
{
send_sr_localsid_details(t, reg, mp->context);
}
- /* *INDENT-ON* */
+}
+
+static void
+send_sr_localsid_with_packet_stats_details (int local_sid_index,
+ ip6_sr_localsid_t *t,
+ vl_api_registration_t *reg,
+ u32 context)
+{
+ vl_api_sr_localsids_with_packet_stats_details_t *rmp;
+ vlib_counter_t good_traffic, bad_traffic;
+ ip6_sr_main_t *sm = &sr_main;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (REPLY_MSG_ID_BASE + VL_API_SR_LOCALSIDS_WITH_PACKET_STATS_DETAILS);
+ ip6_address_encode (&t->localsid, rmp->addr);
+ rmp->end_psp = t->end_psp;
+ rmp->behavior = t->behavior;
+ rmp->fib_table = htonl (t->fib_table);
+ rmp->vlan_index = htonl (t->vlan_index);
+ ip_address_encode (&t->next_hop, IP46_TYPE_ANY, &rmp->xconnect_nh_addr);
+
+ if (t->behavior == SR_BEHAVIOR_T || t->behavior == SR_BEHAVIOR_DT6)
+ rmp->xconnect_iface_or_vrf_table =
+ htonl (fib_table_get_table_id (t->sw_if_index, FIB_PROTOCOL_IP6));
+ else if (t->behavior == SR_BEHAVIOR_DT4)
+ rmp->xconnect_iface_or_vrf_table =
+ htonl (fib_table_get_table_id (t->sw_if_index, FIB_PROTOCOL_IP4));
+ else
+ rmp->xconnect_iface_or_vrf_table = htonl (t->sw_if_index);
+
+ rmp->context = context;
+ vlib_get_combined_counter (&(sm->sr_ls_valid_counters), local_sid_index,
+ &good_traffic);
+ vlib_get_combined_counter (&(sm->sr_ls_invalid_counters), local_sid_index,
+ &bad_traffic);
+ rmp->good_traffic_bytes = clib_host_to_net_u64 (good_traffic.bytes);
+ rmp->good_traffic_pkt_count = clib_host_to_net_u64 (good_traffic.packets);
+ rmp->bad_traffic_bytes = clib_host_to_net_u64 (bad_traffic.bytes);
+ rmp->bad_traffic_pkt_count = clib_host_to_net_u64 (bad_traffic.packets);
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_sr_localsids_with_packet_stats_dump_t_handler (
+ vl_api_sr_localsids_with_packet_stats_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_localsid_t **localsid_list = 0;
+ ip6_sr_localsid_t *t;
+ int i;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ pool_foreach (t, sm->localsids)
+ {
+ vec_add1 (localsid_list, t);
+ }
+ for (i = 0; i < vec_len (localsid_list); i++)
+ {
+ t = localsid_list[i];
+ send_sr_localsid_with_packet_stats_details (i, t, reg, mp->context);
+ }
}
static void send_sr_policies_details
@@ -312,15 +451,74 @@ vl_api_sr_policies_dump_t_handler (vl_api_sr_policies_dump_t * mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (t, sm->sr_policies)
{
send_sr_policies_details(t, reg, mp->context);
}
- /* *INDENT-ON* */
}
+static void
+send_sr_policies_v2_details (ip6_sr_policy_t *t, vl_api_registration_t *reg,
+ u32 context)
+{
+ vl_api_sr_policies_v2_details_t *rmp;
+ ip6_sr_main_t *sm = &sr_main;
+
+ u32 *sl_index, slidx = 0;
+ ip6_sr_sl_t *segment_list = 0;
+ ip6_address_t *segment;
+ vl_api_srv6_sid_list_t *api_sid_list;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp) + vec_len (t->segments_lists) *
+ sizeof (vl_api_srv6_sid_list_t));
+ clib_memset (rmp, 0,
+ (sizeof (*rmp) + vec_len (t->segments_lists) *
+ sizeof (vl_api_srv6_sid_list_t)));
+
+ rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_SR_POLICIES_V2_DETAILS);
+ ip6_address_encode (&t->bsid, rmp->bsid);
+ ip6_address_encode (&t->encap_src, rmp->encap_src);
+ rmp->is_encap = t->is_encap;
+ rmp->type = t->type;
+ rmp->fib_table = htonl (t->fib_table);
+ rmp->num_sid_lists = vec_len (t->segments_lists);
+
+ /* Fill in all the segments lists */
+ vec_foreach (sl_index, t->segments_lists)
+ {
+ segment_list = pool_elt_at_index (sm->sid_lists, *sl_index);
+
+ api_sid_list = &rmp->sid_lists[sl_index - t->segments_lists];
+
+ api_sid_list->num_sids = vec_len (segment_list->segments);
+ api_sid_list->weight = htonl (segment_list->weight);
+ slidx = 0;
+ vec_foreach (segment, segment_list->segments)
+ {
+ ip6_address_encode (segment, api_sid_list->sids[slidx++]);
+ }
+ }
+
+ rmp->context = context;
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_sr_policies_v2_dump_t_handler (vl_api_sr_policies_v2_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ ip6_sr_main_t *sm = &sr_main;
+ ip6_sr_policy_t *t;
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ pool_foreach (t, sm->sr_policies)
+ {
+ send_sr_policies_v2_details (t, reg, mp->context);
+ }
+}
static void send_sr_policies_details_with_sl_index
(ip6_sr_policy_t * t, vl_api_registration_t * reg, u32 context)
@@ -381,12 +579,10 @@ static void
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (t, sm->sr_policies)
{
send_sr_policies_details_with_sl_index(t, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void send_sr_steering_pol_details
@@ -428,12 +624,10 @@ static void vl_api_sr_steering_pol_dump_t_handler
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (t, sm->steer_policies)
{
send_sr_steering_pol_details(t, reg, mp->context);
}
- /* *INDENT-ON* */
}
#include <vnet/srv6/sr.api.c>
diff --git a/src/vnet/srv6/sr_doc.md b/src/vnet/srv6/sr_doc.md
deleted file mode 100644
index c80a0fc18f7..00000000000
--- a/src/vnet/srv6/sr_doc.md
+++ /dev/null
@@ -1,63 +0,0 @@
-# SRv6: Segment Routing for IPv6 {#srv6_doc}
-
-This is a memo intended to contain documentation of the VPP SRv6 implementation.
-Everything that is not directly obvious should come here.
-For any feedback on content that should be explained please mailto:pcamaril@cisco.com
-
-## Segment Routing
-
-Segment routing is a network technology focused on addressing the limitations of existing IP and Multiprotocol Label Switching (MPLS) networks in terms of simplicity, scale, and ease of operation. It is a foundation for application engineered routing as it prepares the networks for new business models where applications can control the network behavior.
-
-Segment routing seeks the right balance between distributed intelligence and centralized optimization and programming. It was built for the software-defined networking (SDN) era.
-
-Segment routing enhances packet forwarding behavior by enabling a network to transport unicast packets through a specific forwarding path, different from the normal path that a packet usually takes (IGP shortest path or BGP best path). This capability benefits many use cases, and one can build those specific paths based on application requirements.
-
-Segment routing uses the source routing paradigm. A node, usually a router but also a switch, a trusted server, or a virtual forwarder running on a hypervisor, steers a packet through an ordered list of instructions, called segments. A segment can represent any instruction, topological or service-based. A segment can have a local semantic to a segment-routing node or global within a segment-routing network. Segment routing allows an operator to enforce a flow through any topological path and service chain while maintaining per-flow state only at the ingress node to the segment-routing network. Segment routing also supports equal-cost multipath (ECMP) by design.
-
-Segment routing can operate with either an MPLS or an IPv6 data plane. All the currently available MPLS services, such as Layer 3 VPN (L3VPN), L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services [VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet VPN [PBB-EVPN]), can run on top of a segment-routing transport network.
-
-**The implementation of Segment Routing in VPP covers both the IPv6 data plane (SRv6) as well as the MPLS data plane (SR-MPLS). This page contains the SRv6 documentation.**
-
-## Segment Routing terminology
-
-* Segment Routing Header (SRH): IPv6 routing extension header of type 'Segment Routing'. (draft-ietf-6man-segment-routing-header-05)
-* SegmentID (SID): is an IPv6 address.
-* Segment List (SL) (SID List): is the sequence of SIDs that the packet will traverse.
-* SR Policy: defines the SRH that will be applied to a packet. A packet steered into an SR policy may either receive the SRH by IPv6 header encapsulation (as recommended in draft-ietf-6man-rfc2460bis) or it could be inserted within an existing IPv6 header. An SR policy is uniquely identified by its Binding SID and associated with a weighted set of Segment Lists. In case several SID lists are defined, traffic steered into the policy is unevenly load-balanced among them according to their respective weights.
-* Local SID: is a SID associated with a processing function on the local node, which may go from advancing to the next SID in the SRH, to complex user-defined behaviors. When a FIB lookup, either in the main FIB or in a specific VRF, returns a match on a local SID, the associated function is performed.
-* BindingSID: a BindingSID is a SID (only one) associated one-one with an SR Policy. If a packet arrives with an IPv6 DA corresponding to a BindingSID, then the SR policy will be applied to such packet.
-
-## SRv6 Features in VPP
-
-The <a href="https://datatracker.ietf.org/doc/draft-filsfils-spring-srv6-network-programming/">SRv6 Network Programming (*draft-filsfils-spring-srv6-network-programming*)</a> defines the SRv6 architecture.
-
-VPP supports the following SRv6 LocalSID functions: End, End.X, End.DX6, End.DT6, End.DX4, End.DT4, End.DX2, End.B6, End.B6.Encaps.
-
-For further information and how to configure each specific function: @subpage srv6_localsid_doc
-
-
-The <a href="https://datatracker.ietf.org/doc/draft-filsfils-spring-segment-routing-policy/">Segment Routing Policy (*draft-filsfils-spring-segment-routing-policy*)</a> defines SR Policies.
-
-VPP supports SRv6 Policies with T.Insert and T.Encaps behaviors.
-
-For further information on how to create SR Policies: @subpage srv6_policy_doc
-
-For further information on how to steer traffic into SR Policies: @subpage srv6_steering_doc
-
-## SRv6 LocalSID development framework
-
-One of the *'key'* concepts about SRv6 is network programmability. This is why an SRv6 LocalSID is associated with an specific function.
-
-However, the trully way to enable network programmability is allowing any developer **easily** create his own SRv6 LocalSID function. That is the reason why we have added some API calls such that any developer can code his own SRv6 LocalSID behaviors as plugins an add them to the running SRv6 code.
-
-The principle is that the developer only codes the behavior -the graph node-. However all the FIB handling, SR LocalSID instantiation and so on are done by the VPP SRv6 code.
-
-For more information please refer to: @subpage srv6_plugin_doc
-
-Available SRv6 plugins include:
-
-- @subpage srv6_as_plugin_doc
-- @subpage srv6_ad_plugin_doc
-- @subpage srv6_am_plugin_doc
-- @subpage srv6_mobile_plugin_doc
-
diff --git a/src/vnet/srv6/sr_doc.rst b/src/vnet/srv6/sr_doc.rst
new file mode 100644
index 00000000000..24501832b85
--- /dev/null
+++ b/src/vnet/srv6/sr_doc.rst
@@ -0,0 +1,123 @@
+.. _srv6_doc:
+
+SRv6: Segment Routing for IPv6
+==============================
+
+This is a memo intended to contain documentation of the VPP SRv6
+implementation. Everything that is not directly obvious should come
+here. For any feedback on content that should be explained please
+mailto:pcamaril@cisco.com
+
+Segment Routing
+---------------
+
+Segment routing is a network technology focused on addressing the
+limitations of existing IP and Multiprotocol Label Switching (MPLS)
+networks in terms of simplicity, scale, and ease of operation. It is a
+foundation for application engineered routing as it prepares the
+networks for new business models where applications can control the
+network behavior.
+
+Segment routing seeks the right balance between distributed intelligence
+and centralized optimization and programming. It was built for the
+software-defined networking (SDN) era.
+
+Segment routing enhances packet forwarding behavior by enabling a
+network to transport unicast packets through a specific forwarding path,
+different from the normal path that a packet usually takes (IGP shortest
+path or BGP best path). This capability benefits many use cases, and one
+can build those specific paths based on application requirements.
+
+Segment routing uses the source routing paradigm. A node, usually a
+router but also a switch, a trusted server, or a virtual forwarder
+running on a hypervisor, steers a packet through an ordered list of
+instructions, called segments. A segment can represent any instruction,
+topological or service-based. A segment can have a local semantic to a
+segment-routing node or global within a segment-routing network. Segment
+routing allows an operator to enforce a flow through any topological
+path and service chain while maintaining per-flow state only at the
+ingress node to the segment-routing network. Segment routing also
+supports equal-cost multipath (ECMP) by design.
+
+Segment routing can operate with either an MPLS or an IPv6 data plane.
+All the currently available MPLS services, such as Layer 3 VPN (L3VPN),
+L2VPN (Virtual Private Wire Service [VPWS], Virtual Private LAN Services
+[VPLS], Ethernet VPN [E-VPN], and Provider Backbone Bridging Ethernet
+VPN [PBB-EVPN]), can run on top of a segment-routing transport network.
+
+**The implementation of Segment Routing in VPP covers both the IPv6 data
+plane (SRv6) as well as the MPLS data plane (SR-MPLS). This page
+contains the SRv6 documentation.**
+
+Segment Routing terminology
+---------------------------
+
+- Segment Routing Header (SRH): IPv6 routing extension header of type
+ ‘Segment Routing’. (draft-ietf-6man-segment-routing-header-05)
+- SegmentID (SID): is an IPv6 address.
+- Segment List (SL) (SID List): is the sequence of SIDs that the packet
+ will traverse.
+- SR Policy: defines the SRH that will be applied to a packet. A packet
+ steered into an SR policy may either receive the SRH by IPv6 header
+ encapsulation (as recommended in draft-ietf-6man-rfc2460bis) or it
+ could be inserted within an existing IPv6 header. An SR policy is
+ uniquely identified by its Binding SID and associated with a weighted
+ set of Segment Lists. In case several SID lists are defined, traffic
+ steered into the policy is unevenly load-balanced among them
+ according to their respective weights.
+- Local SID: is a SID associated with a processing function on the
+ local node, which may go from advancing to the next SID in the SRH,
+ to complex user-defined behaviors. When a FIB lookup, either in the
+ main FIB or in a specific VRF, returns a match on a local SID, the
+ associated function is performed.
+- BindingSID: a BindingSID is a SID (only one) associated one-one with
+ an SR Policy. If a packet arrives with an IPv6 DA corresponding to a
+ BindingSID, then the SR policy will be applied to such packet.
+
+SRv6 Features in VPP
+--------------------
+
+The SRv6 Network Programming
+(*draft-filsfils-spring-srv6-network-programming*) defines the SRv6
+architecture.
+
+VPP supports the following SRv6 LocalSID functions: End, End.X, End.DX6,
+End.DT6, End.DX4, End.DT4, End.DX2, End.B6, End.B6.Encaps.
+
+For further information and how to configure each specific function:
+:ref:`srv6_localsid_doc`
+
+The Segment Routing Policy
+(*draft-filsfils-spring-segment-routing-policy*) defines SR Policies.
+
+VPP supports SRv6 Policies with T.Insert and T.Encaps behaviors.
+
+For further information on how to create SR Policies: :ref:`srv6_policy_doc`
+
+For further information on how to steer traffic into SR Policies:
+:ref:`srv6_steering_doc`
+
+SRv6 LocalSID development framework
+-----------------------------------
+
+One of the *‘key’* concepts about SRv6 is network programmability. This
+is why an SRv6 LocalSID is associated with an specific function.
+
+However, the true way to enable network programmability is allowing
+any developer **easily** create his own SRv6 LocalSID function. That is
+the reason why we have added some API calls such that any developer can
+code his own SRv6 LocalSID behaviors as plugins an add them to the
+running SRv6 code.
+
+The principle is that the developer only codes the behavior -the graph
+node-. However all the FIB handling, SR LocalSID instantiation and so on
+are done by the VPP SRv6 code.
+
+For more information please refer to: :ref:`srv6_plugin_doc`
+
+Available SRv6 plugins include:
+
+- :ref:`srv6_as_plugin_doc`
+- :ref:`srv6_ad_plugin_doc`
+- :ref:`srv6_am_plugin_doc`
+- :ref:`srv6_mobile_plugin_doc`
diff --git a/src/vnet/srv6/sr_localsid.c b/src/vnet/srv6/sr_localsid.c
index a055c923be9..12349bb95e8 100644
--- a/src/vnet/srv6/sr_localsid.c
+++ b/src/vnet/srv6/sr_localsid.c
@@ -396,12 +396,10 @@ sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
sr_localsid_fn_registration_t **plugin_it = 0;
/* Create a vector out of the plugin pool as recommended */
- /* *INDENT-OFF* */
pool_foreach (plugin, sm->plugin_functions)
{
vec_add1 (vec_plugins, plugin);
}
- /* *INDENT-ON* */
vec_foreach (plugin_it, vec_plugins)
{
@@ -506,7 +504,6 @@ sr_cli_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (sr_localsid_command, static) = {
.path = "sr localsid",
.short_help = "sr localsid (del) address XX:XX::YY:YY"
@@ -534,7 +531,6 @@ VLIB_CLI_COMMAND (sr_localsid_command, static) = {
"\t\tParameters: '<ip4_fib_table>'\n",
.function = sr_cli_localsid_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief CLI function to 'show' all SR LocalSIDs on console.
@@ -551,9 +547,7 @@ show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "SRv6 - My LocalSID Table:");
vlib_cli_output (vm, "=========================");
- /* *INDENT-OFF* */
pool_foreach (ls, sm->localsids) { vec_add1 (localsid_list, ls); }
- /* *INDENT-ON* */
for (i = 0; i < vec_len (localsid_list); i++)
{
ls = localsid_list[i];
@@ -676,13 +670,11 @@ show_sr_localsid_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_localsid_command, static) = {
.path = "show sr localsids",
.short_help = "show sr localsids",
.function = show_sr_localsid_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief Function to 'clear' ALL SR localsid counters
@@ -700,13 +692,11 @@ clear_sr_localsid_counters_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_sr_localsid_counters_command, static) = {
.path = "clear sr localsid-counters",
.short_help = "clear sr localsid-counters",
.function = clear_sr_localsid_counters_command_fn,
};
-/* *INDENT-ON* */
/************************ SR LocalSID graphs node ****************************/
/**
@@ -1438,7 +1428,6 @@ sr_localsid_d_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_localsid_d_node) = {
.function = sr_localsid_d_fn,
.name = "sr-localsid-d",
@@ -1454,7 +1443,6 @@ VLIB_REGISTER_NODE (sr_localsid_d_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/**
* @brief SR LocalSID graph node. Supports all default SR Endpoint without decaps
@@ -1748,7 +1736,6 @@ sr_localsid_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_localsid_node) = {
.function = sr_localsid_fn,
.name = "sr-localsid",
@@ -1764,7 +1751,6 @@ VLIB_REGISTER_NODE (sr_localsid_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/**
* @brief SR LocalSID uN graph node. Supports all default SR Endpoint without decaps
@@ -2058,7 +2044,6 @@ sr_localsid_un_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_localsid_un_node) = {
.function = sr_localsid_un_fn,
.name = "sr-localsid-un",
@@ -2074,7 +2059,6 @@ VLIB_REGISTER_NODE (sr_localsid_un_node) = {
#undef _
},
};
-/* *INDENT-ON* */
static uword
sr_localsid_un_perf_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
@@ -2270,7 +2254,6 @@ sr_localsid_un_perf_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_localsid_un_perf_node) = {
.function = sr_localsid_un_perf_fn,
.name = "sr-localsid-un-perf",
@@ -2286,7 +2269,6 @@ VLIB_REGISTER_NODE (sr_localsid_un_perf_node) = {
#undef _
},
};
-/* *INDENT-ON* */
static u8 *
format_sr_dpo (u8 * s, va_list * args)
@@ -2406,10 +2388,8 @@ show_sr_localsid_behaviors_command_fn (vlib_main_t * vm,
vlib_cli_output (vm,
"SR LocalSIDs behaviors:\n-----------------------\n\n");
- /* *INDENT-OFF* */
pool_foreach (plugin, sm->plugin_functions)
{ vec_add1 (plugins_vec, plugin); }
- /* *INDENT-ON* */
/* Print static behaviors */
vlib_cli_output (vm, "Default behaviors:\n"
@@ -2439,13 +2419,11 @@ show_sr_localsid_behaviors_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_localsid_behaviors_command, static) = {
.path = "show sr localsids behaviors",
.short_help = "show sr localsids behaviors",
.function = show_sr_localsid_behaviors_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief SR LocalSID initialization
diff --git a/src/vnet/srv6/sr_localsid.md b/src/vnet/srv6/sr_localsid.md
deleted file mode 100644
index fbc7ef827e6..00000000000
--- a/src/vnet/srv6/sr_localsid.md
+++ /dev/null
@@ -1,58 +0,0 @@
-# SR LocalSIDs {#srv6_localsid_doc}
-
-A local SID is associated to a Segment Routing behavior -or function- on the current node.
-
-The most basic behavior is called END. It simply activates the next SID in the current packet, by decrementing the Segments Left value and updating the IPv6 DA.
-
-A local END SID is instantiated using the following CLI:
-
- sr localsid (del) address XX::YY behavior end
-
-This creates a new entry in the main FIB for IPv6 address XX::YY. All packets whose IPv6 DA matches this FIB entry are redirected to the sr-localsid node, where they are processed as described above.
-
-Other examples of local SIDs are the following:
-
- sr localsid (del) address XX::YY behavior end
- sr localsid (del) address XX::YY behavior end.x GE0/1/0 2001::a
- sr localsid (del) address XX::YY behavior end.dx6 GE0/1/0 2001::a
- sr localsid (del) address XX::YY behavior end.dx4 GE0/1/0 10.0.0.1
- sr localsid (del) address XX::YY behavior end.dx2 GigabitE0/11/0
- sr localsid (del) address XX::YY behavior end.dt6 5
- sr localsid (del) address XX::YY behavior end.dt6 5
-
-Note that all of these behaviors match the definitions of the SRv6 architecture (*draft-filsfils-spring-srv6-network-programming*). Please refer to this document for a detailed description of each behavior.
-
-Note also that you can configure the PSP flavor of the End and End.X behaviors by typing:
-
- sr localsid (del) address XX::YY behavior end psp
- sr localsid (del) address XX::YY behavior end.x GE0/1/0 2001::a psp
-
-Help on the available local SID behaviors and their usage can be obtained with:
-
- help sr localsid
-
-Alternatively they can be obtained using.
-
- show sr localsids behavior
-
-The difference in between those two commands is that the first one will only display the SR LocalSID behaviors that are built-in VPP, while the latter will display those behaviors plus the ones added with the SR LocalSID Development Framework.
-
-
-VPP keeps a 'My LocalSID Table' where it stores all the SR local SIDs instantiated as well as their parameters. Every time a new local SID is instantiated, a new entry is added to this table. In addition, counters for correctly and incorrectly processed traffic are maintained for each local SID. The counters store both the number of packets and bytes.
-
-The contents of the 'My LocalSID Table' is shown with:
-
- vpp# show sr localsid
- SRv6 - My LocalSID Table:
- =========================
- Address: c3::1
- Behavior: DX6 (Endpoint with decapsulation and IPv6 cross-connect)
- Iface: GigabitEthernet0/5/0
- Next hop: b:c3::b
- Good traffic: [51277 packets : 5332808 bytes]
- Bad traffic: [0 packets : 0 bytes]
- --------------------
-
-The traffic counters can be reset with:
-
- vpp# clear sr localsid-counters
diff --git a/src/vnet/srv6/sr_localsid.rst b/src/vnet/srv6/sr_localsid.rst
new file mode 100644
index 00000000000..cf042a847b4
--- /dev/null
+++ b/src/vnet/srv6/sr_localsid.rst
@@ -0,0 +1,90 @@
+.. _srv6_localsid_doc:
+
+SR LocalSIDs
+============
+
+A local SID is associated to a Segment Routing behavior -or function- on
+the current node.
+
+The most basic behavior is called END. It simply activates the next SID
+in the current packet, by decrementing the Segments Left value and
+updating the IPv6 DA.
+
+A local END SID is instantiated using the following CLI:
+
+::
+
+ sr localsid (del) address XX::YY behavior end
+
+This creates a new entry in the main FIB for IPv6 address XX::YY. All
+packets whose IPv6 DA matches this FIB entry are redirected to the
+sr-localsid node, where they are processed as described above.
+
+Other examples of local SIDs are the following:
+
+::
+
+ sr localsid (del) address XX::YY behavior end
+ sr localsid (del) address XX::YY behavior end.x GE0/1/0 2001::a
+ sr localsid (del) address XX::YY behavior end.dx6 GE0/1/0 2001::a
+ sr localsid (del) address XX::YY behavior end.dx4 GE0/1/0 10.0.0.1
+ sr localsid (del) address XX::YY behavior end.dx2 GigabitE0/11/0
+ sr localsid (del) address XX::YY behavior end.dt6 5
+ sr localsid (del) address XX::YY behavior end.dt6 5
+
+Note that all of these behaviors match the definitions of the SRv6
+architecture (*draft-filsfils-spring-srv6-network-programming*). Please
+refer to this document for a detailed description of each behavior.
+
+Note also that you can configure the PSP flavor of the End and End.X
+behaviors by typing:
+
+::
+
+ sr localsid (del) address XX::YY behavior end psp
+ sr localsid (del) address XX::YY behavior end.x GE0/1/0 2001::a psp
+
+Help on the available local SID behaviors and their usage can be
+obtained with:
+
+::
+
+ help sr localsid
+
+Alternatively they can be obtained using.
+
+::
+
+ show sr localsids behavior
+
+The difference in between those two commands is that the first one will
+only display the SR LocalSID behaviors that are built-in VPP, while the
+latter will display those behaviors plus the ones added with the SR
+LocalSID Development Framework.
+
+VPP keeps a ‘My LocalSID Table’ where it stores all the SR local SIDs
+instantiated as well as their parameters. Every time a new local SID is
+instantiated, a new entry is added to this table. In addition, counters
+for correctly and incorrectly processed traffic are maintained for each
+local SID. The counters store both the number of packets and bytes.
+
+The contents of the ‘My LocalSID Table’ is shown with:
+
+::
+
+ vpp# show sr localsid
+ SRv6 - My LocalSID Table:
+ =========================
+ Address: c3::1
+ Behavior: DX6 (Endpoint with decapsulation and IPv6 cross-connect)
+ Iface: GigabitEthernet0/5/0
+ Next hop: b:c3::b
+ Good traffic: [51277 packets : 5332808 bytes]
+ Bad traffic: [0 packets : 0 bytes]
+ --------------------
+
+The traffic counters can be reset with:
+
+::
+
+ vpp# clear sr localsid-counters
diff --git a/src/vnet/srv6/sr_packet.h b/src/vnet/srv6/sr_packet.h
index dda776b4037..cf9fcb70bcc 100644
--- a/src/vnet/srv6/sr_packet.h
+++ b/src/vnet/srv6/sr_packet.h
@@ -116,6 +116,9 @@
#define ROUTING_HEADER_TYPE_SR 4
+#define IP6_SRH_PT_TLV_TYPE 128
+#define IP6_SRH_PT_TLV_LEN 14
+
typedef struct
{
/* Protocol for next header. */
@@ -156,6 +159,21 @@ typedef struct
u8 value[0];
} __attribute__ ((packed)) ip6_sr_tlv_t;
+typedef struct
+{
+ u32 sec;
+ u32 nsec;
+} __attribute__ ((packed)) timestamp_64_t;
+
+typedef struct
+{
+ u8 type;
+ u8 length;
+ u16 id_ld;
+ timestamp_64_t t64;
+ u16 session_id;
+ u16 seq_num;
+} __attribute__ ((packed)) ip6_sr_pt_tlv_t;
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vnet/srv6/sr_policy.md b/src/vnet/srv6/sr_policy.md
deleted file mode 100644
index 2a7eb4c9870..00000000000
--- a/src/vnet/srv6/sr_policy.md
+++ /dev/null
@@ -1,60 +0,0 @@
-# Creating a SR Policy {#srv6_policy_doc}
-
-An SR Policy is defined by a Binding SID and a weighted set of Segment Lists.
-
-A new SR policy is created with a first SID list using:
-
- sr policy add bsid 2001::1 next A1:: next B1:: next C1:: (weight 5) (fib-table 3)
-
-* The weight parameter is only used if more than one SID list is associated with the policy.
-* The fib-table parameter specifies in which table (VRF) the Binding SID is to be installed.
-
-An SR policy is deleted with:
-
- sr policy del bsid 2001::1
- sr policy del index 1
-
-The existing SR policies are listed with:
-
- show sr policies
-
-## Adding/Removing SID Lists from an SR policy
-
-An additional SID list is associated with an existing SR policy with:
-
- sr policy mod bsid 2001::1 add sl next A2:: next B2:: next C2:: (weight 3)
- sr policy mod index 3 add sl next A2:: next B2:: next C2:: (weight 3)
-
-Conversely, a SID list can be removed from an SR policy with:
-
- sr policy mod bsid 2001::1 del sl index 1
- sr policy mod index 3 del sl index 1
-
-Note that this cannot be used to remove the last SID list of a policy.
-
-The weight of a SID list can also be modified with:
-
- sr policy mod bsid 2001::1 mod sl index 1 weight 4
- sr policy mod index 3 mod sl index 1 weight 4
-
-## SR Policies: Spray policies
-
-Spray policies are a specific type of SR policies where the packet is replicated on all the SID lists, rather than load-balanced among them.
-
-SID list weights are ignored with this type of policies.
-
-A Spray policy is instantiated by appending the keyword **spray** to a regular SR policy command, as in:
-
- sr policy add bsid 2001::1 next A1:: next B1:: next C1:: spray
-
-Spray policies are used for removing multicast state from a network core domain, and instead send a linear unicast copy to every access node. The last SID in each list accesses the multicast tree within the access node.
-
-## Encapsulation SR policies
-
-In case the user decides to create an SR policy an IPv6 Source Address must be specified for the encapsulated traffic. In order to do so the user might use the following command:
-
- set sr encaps source addr XXXX::YYYY
-
-Default hop-limit for the encapsulating IPv6 header is 64. It is possible to specify custom hop-limit value from 1 to 255 using this command:
-
- set sr encaps hop-limit N
diff --git a/src/vnet/srv6/sr_policy.rst b/src/vnet/srv6/sr_policy.rst
new file mode 100644
index 00000000000..50cc19bfb14
--- /dev/null
+++ b/src/vnet/srv6/sr_policy.rst
@@ -0,0 +1,96 @@
+.. _srv6_policy_doc:
+
+Creating a SR Policy
+====================
+
+An SR Policy is defined by a Binding SID and a weighted set of Segment
+Lists.
+
+A new SR policy is created with a first SID list using:
+
+::
+
+ sr policy add bsid 2001::1 next A1:: next B1:: next C1:: (weight 5) (fib-table 3)
+
+- The weight parameter is only used if more than one SID list is
+ associated with the policy.
+- The fib-table parameter specifies in which table (VRF) the Binding
+ SID is to be installed.
+
+An SR policy is deleted with:
+
+::
+
+ sr policy del bsid 2001::1
+ sr policy del index 1
+
+The existing SR policies are listed with:
+
+::
+
+ show sr policies
+
+Adding/Removing SID Lists from an SR policy
+-------------------------------------------
+
+An additional SID list is associated with an existing SR policy with:
+
+::
+
+ sr policy mod bsid 2001::1 add sl next A2:: next B2:: next C2:: (weight 3)
+ sr policy mod index 3 add sl next A2:: next B2:: next C2:: (weight 3)
+
+Conversely, a SID list can be removed from an SR policy with:
+
+::
+
+ sr policy mod bsid 2001::1 del sl index 1
+ sr policy mod index 3 del sl index 1
+
+Note that this cannot be used to remove the last SID list of a policy.
+
+The weight of a SID list can also be modified with:
+
+::
+
+ sr policy mod bsid 2001::1 mod sl index 1 weight 4
+ sr policy mod index 3 mod sl index 1 weight 4
+
+SR Policies: Spray policies
+---------------------------
+
+Spray policies are a specific type of SR policies where the packet is
+replicated on all the SID lists, rather than load-balanced among them.
+
+SID list weights are ignored with this type of policies.
+
+A Spray policy is instantiated by appending the keyword **spray** to a
+regular SR policy command, as in:
+
+::
+
+ sr policy add bsid 2001::1 next A1:: next B1:: next C1:: spray
+
+Spray policies are used for removing multicast state from a network core
+domain, and instead send a linear unicast copy to every access node. The
+last SID in each list accesses the multicast tree within the access
+node.
+
+Encapsulation SR policies
+-------------------------
+
+In case the user decides to create an SR policy an IPv6 Source Address
+must be specified for the encapsulated traffic. In order to do so the
+user might use the following command:
+
+::
+
+ set sr encaps source addr XXXX::YYYY
+
+Default hop-limit for the encapsulating IPv6 header is 64. It is
+possible to specify custom hop-limit value from 1 to 255 using this
+command:
+
+::
+
+ set sr encaps hop-limit N
diff --git a/src/vnet/srv6/sr_policy_rewrite.c b/src/vnet/srv6/sr_policy_rewrite.c
index 79de7792061..0aa88cc273e 100644
--- a/src/vnet/srv6/sr_policy_rewrite.c
+++ b/src/vnet/srv6/sr_policy_rewrite.c
@@ -33,7 +33,7 @@
* Traffic input usually is IPv6 packets. However it is possible to have
* IPv4 packets or L2 frames. (that are encapsulated into IPv6 with SRH)
*
- * This file provides the appropiates VPP graph nodes to do any of these
+ * This file provides the appropriate VPP graph nodes to do any of these
* methods.
*
*/
@@ -47,7 +47,9 @@
#include <vnet/fib/ip6_fib.h>
#include <vnet/dpo/dpo.h>
#include <vnet/dpo/replicate_dpo.h>
+#include <vnet/srv6/sr_pt.h>
+#include <vppinfra/byte_order.h>
#include <vppinfra/error.h>
#include <vppinfra/elog.h>
@@ -140,13 +142,11 @@ set_sr_src_command_fn (vlib_main_t * vm, unformat_input_t * input,
return clib_error_return (0, "No address specified");
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_sr_src_command, static) = {
.path = "set sr encaps source",
.short_help = "set sr encaps source addr <ip6_addr>",
.function = set_sr_src_command_fn,
};
-/* *INDENT-ON* */
/******************** SR rewrite set encaps IPv6 hop-limit ********************/
@@ -178,34 +178,40 @@ set_sr_hop_limit_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_sr_hop_limit_command, static) = {
.path = "set sr encaps hop-limit",
.short_help = "set sr encaps hop-limit <value>",
.function = set_sr_hop_limit_command_fn,
};
-/* *INDENT-ON* */
/*********************** SR rewrite string computation ************************/
/**
* @brief SR rewrite string computation for IPv6 encapsulation (inline)
*
* @param sl is a vector of IPv6 addresses composing the Segment List
+ * @param src_v6addr is a encaps IPv6 source addr
*
* @return precomputed rewrite string for encapsulation
*/
static inline u8 *
-compute_rewrite_encaps (ip6_address_t * sl)
+compute_rewrite_encaps (ip6_address_t *sl, ip6_address_t *src_v6addr, u8 type)
{
ip6_header_t *iph;
ip6_sr_header_t *srh;
+ ip6_sr_pt_tlv_t *srh_pt_tlv;
ip6_address_t *addrp, *this_address;
u32 header_length = 0;
u8 *rs = NULL;
header_length = 0;
header_length += IPv6_DEFAULT_HEADER_LENGTH;
- if (vec_len (sl) > 1)
+ if (type == SR_POLICY_TYPE_TEF)
+ {
+ header_length += sizeof (ip6_sr_header_t);
+ header_length += vec_len (sl) * sizeof (ip6_address_t);
+ header_length += sizeof (ip6_sr_pt_tlv_t);
+ }
+ else if (vec_len (sl) > 1)
{
header_length += sizeof (ip6_sr_header_t);
header_length += vec_len (sl) * sizeof (ip6_address_t);
@@ -216,13 +222,39 @@ compute_rewrite_encaps (ip6_address_t * sl)
iph = (ip6_header_t *) rs;
iph->ip_version_traffic_class_and_flow_label =
clib_host_to_net_u32 (0 | ((6 & 0xF) << 28));
- iph->src_address.as_u64[0] = sr_pr_encaps_src.as_u64[0];
- iph->src_address.as_u64[1] = sr_pr_encaps_src.as_u64[1];
+ iph->src_address.as_u64[0] = src_v6addr->as_u64[0];
+ iph->src_address.as_u64[1] = src_v6addr->as_u64[1];
iph->payload_length = header_length - IPv6_DEFAULT_HEADER_LENGTH;
iph->protocol = IP_PROTOCOL_IPV6;
iph->hop_limit = sr_pr_encaps_hop_limit;
- if (vec_len (sl) > 1)
+ if (type == SR_POLICY_TYPE_TEF)
+ {
+ srh = (ip6_sr_header_t *) (iph + 1);
+ iph->protocol = IP_PROTOCOL_IPV6_ROUTE;
+ srh->protocol = IP_PROTOCOL_IPV6;
+ srh->type = ROUTING_HEADER_TYPE_SR;
+ srh->flags = 0x00;
+ srh->tag = 0x0000;
+ srh->segments_left = vec_len (sl) - 1;
+ srh->last_entry = vec_len (sl) - 1;
+ srh->length =
+ ((sizeof (ip6_sr_header_t) + (vec_len (sl) * sizeof (ip6_address_t)) +
+ sizeof (ip6_sr_pt_tlv_t)) /
+ 8) -
+ 1;
+ addrp = srh->segments + vec_len (sl) - 1;
+ vec_foreach (this_address, sl)
+ {
+ clib_memcpy_fast (addrp->as_u8, this_address->as_u8,
+ sizeof (ip6_address_t));
+ addrp--;
+ }
+ srh_pt_tlv = (ip6_sr_pt_tlv_t *) (srh->segments + vec_len (sl));
+ srh_pt_tlv->type = IP6_SRH_PT_TLV_TYPE;
+ srh_pt_tlv->length = IP6_SRH_PT_TLV_LEN;
+ }
+ else if (vec_len (sl) > 1)
{
srh = (ip6_sr_header_t *) (iph + 1);
iph->protocol = IP_PROTOCOL_IPV6_ROUTE;
@@ -255,7 +287,7 @@ compute_rewrite_encaps (ip6_address_t * sl)
* @return precomputed rewrite string for SRH insertion
*/
static inline u8 *
-compute_rewrite_insert (ip6_address_t * sl)
+compute_rewrite_insert (ip6_address_t *sl, u8 type)
{
ip6_sr_header_t *srh;
ip6_address_t *addrp, *this_address;
@@ -335,18 +367,20 @@ compute_rewrite_bsid (ip6_address_t * sl)
*
* @param sr_policy is the SR policy where the SL will be added
* @param sl is a vector of IPv6 addresses composing the Segment List
+ * @param encap_src is a encaps IPv6 source addr. optional.
* @param weight is the weight of the SegmentList (for load-balancing purposes)
* @param is_encap represents the mode (SRH insertion vs Encapsulation)
*
* @return pointer to the just created segment list
*/
static inline ip6_sr_sl_t *
-create_sl (ip6_sr_policy_t * sr_policy, ip6_address_t * sl, u32 weight,
- u8 is_encap)
+create_sl (ip6_sr_policy_t *sr_policy, ip6_address_t *sl,
+ ip6_address_t *encap_src, u32 weight, u8 is_encap)
{
ip6_sr_main_t *sm = &sr_main;
ip6_sr_sl_t *segment_list;
sr_policy_fn_registration_t *plugin = 0;
+ ip6_address_t encap_srcv6 = sr_pr_encaps_src;
pool_get (sm->sid_lists, segment_list);
clib_memset (segment_list, 0, sizeof (*segment_list));
@@ -358,18 +392,25 @@ create_sl (ip6_sr_policy_t * sr_policy, ip6_address_t * sl, u32 weight,
(weight != (u32) ~ 0 ? weight : SR_SEGMENT_LIST_WEIGHT_DEFAULT);
segment_list->segments = vec_dup (sl);
+ segment_list->policy_type = sr_policy->type;
segment_list->egress_fib_table =
ip6_fib_index_from_table_id (sr_policy->fib_table);
if (is_encap)
{
- segment_list->rewrite = compute_rewrite_encaps (sl);
+ if (encap_src)
+ {
+ clib_memcpy_fast (&encap_srcv6, encap_src, sizeof (ip6_address_t));
+ }
+ segment_list->rewrite =
+ compute_rewrite_encaps (sl, &encap_srcv6, sr_policy->type);
segment_list->rewrite_bsid = segment_list->rewrite;
+ sr_policy->encap_src = encap_srcv6;
}
else
{
- segment_list->rewrite = compute_rewrite_insert (sl);
+ segment_list->rewrite = compute_rewrite_insert (sl, sr_policy->type);
segment_list->rewrite_bsid = compute_rewrite_bsid (sl);
}
@@ -433,7 +474,7 @@ create_sl (ip6_sr_policy_t * sr_policy, ip6_address_t * sl, u32 weight,
}
/**
- * @brief Updates the Load Balancer after an SR Policy change
+ * @brief Updates the Load-Balancer after an SR Policy change
*
* @param sr_policy is the modified SR Policy
*/
@@ -624,17 +665,19 @@ update_replicate (ip6_sr_policy_t * sr_policy)
*
* @param bsid is the bindingSID of the SR Policy
* @param segments is a vector of IPv6 address composing the segment list
+ * @param encap_src is a encaps IPv6 source addr. optional.
* @param weight is the weight of the sid list. optional.
* @param behavior is the behavior of the SR policy. (default//spray)
* @param fib_table is the VRF where to install the FIB entry for the BSID
- * @param is_encap (bool) whether SR policy should behave as Encap/SRH Insertion
+ * @param is_encap (bool) whether SR policy should behave as Encap/SRH
+ * Insertion
*
* @return 0 if correct, else error
*/
int
-sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments,
- u32 weight, u8 behavior, u32 fib_table, u8 is_encap,
- u16 plugin, void *ls_plugin_mem)
+sr_policy_add (ip6_address_t *bsid, ip6_address_t *segments,
+ ip6_address_t *encap_src, u32 weight, u8 type, u32 fib_table,
+ u8 is_encap, u16 plugin, void *ls_plugin_mem)
{
ip6_sr_main_t *sm = &sr_main;
ip6_sr_policy_t *sr_policy = 0;
@@ -675,7 +718,7 @@ sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments,
pool_get (sm->sr_policies, sr_policy);
clib_memset (sr_policy, 0, sizeof (*sr_policy));
clib_memcpy_fast (&sr_policy->bsid, bsid, sizeof (ip6_address_t));
- sr_policy->type = behavior;
+ sr_policy->type = type;
sr_policy->fib_table = (fib_table != (u32) ~ 0 ? fib_table : 0); //Is default FIB 0 ?
sr_policy->is_encap = is_encap;
@@ -690,7 +733,7 @@ sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments,
NULL);
/* Create a segment list and add the index to the SR policy */
- create_sl (sr_policy, segments, weight, is_encap);
+ create_sl (sr_policy, segments, encap_src, weight, is_encap);
/* If FIB doesnt exist, create them */
if (sm->fib_table_ip6 == (u32) ~ 0)
@@ -704,7 +747,8 @@ sr_policy_add (ip6_address_t * bsid, ip6_address_t * segments,
}
/* Create IPv6 FIB for the BindingSID attached to the DPO of the only SL */
- if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
+ if (sr_policy->type == SR_POLICY_TYPE_DEFAULT ||
+ sr_policy->type == SR_POLICY_TYPE_TEF)
update_lb (sr_policy);
else if (sr_policy->type == SR_POLICY_TYPE_SPRAY)
update_replicate (sr_policy);
@@ -739,8 +783,6 @@ sr_policy_del (ip6_address_t * bsid, u32 index)
else
{
sr_policy = pool_elt_at_index (sm->sr_policies, index);
- if (!sr_policy)
- return -1;
}
/* Remove BindingSID FIB entry */
@@ -821,6 +863,7 @@ sr_policy_del (ip6_address_t * bsid, u32 index)
* @param fib_table is the VRF where to install the FIB entry for the BSID
* @param operation is the operation to perform (among the top ones)
* @param segments is a vector of IPv6 address composing the segment list
+ * @param encap_src is a encaps IPv6 source addr. optional.
* @param sl_index is the index of the Segment List to modify/delete
* @param weight is the weight of the sid list. optional.
* @param is_encap Mode. Encapsulation or SRH insertion.
@@ -828,8 +871,8 @@ sr_policy_del (ip6_address_t * bsid, u32 index)
* @return 0 if correct, else error
*/
int
-sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table,
- u8 operation, ip6_address_t * segments, u32 sl_index,
+sr_policy_mod (ip6_address_t *bsid, u32 index, u32 fib_table, u8 operation,
+ ip6_address_t *segments, ip6_address_t *encap_src, u32 sl_index,
u32 weight)
{
ip6_sr_main_t *sm = &sr_main;
@@ -849,15 +892,13 @@ sr_policy_mod (ip6_address_t * bsid, u32 index, u32 fib_table,
else
{
sr_policy = pool_elt_at_index (sm->sr_policies, index);
- if (!sr_policy)
- return -1;
}
if (operation == 1) /* Add SR List to an existing SR policy */
{
/* Create the new SL */
- segment_list =
- create_sl (sr_policy, segments, weight, sr_policy->is_encap);
+ segment_list = create_sl (sr_policy, segments, encap_src, weight,
+ sr_policy->is_encap);
/* Create a new LB DPO */
if (sr_policy->type == SR_POLICY_TYPE_DEFAULT)
@@ -930,15 +971,16 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
int rv = -1;
char is_del = 0, is_add = 0, is_mod = 0;
char policy_set = 0;
- ip6_address_t bsid, next_address;
+ ip6_address_t bsid, next_address, src_v6addr;
u32 sr_policy_index = (u32) ~ 0, sl_index = (u32) ~ 0;
u32 weight = (u32) ~ 0, fib_table = (u32) ~ 0;
ip6_address_t *segments = 0, *this_seg;
u8 operation = 0;
char is_encap = 1;
- char is_spray = 0;
+ u8 type = SR_POLICY_TYPE_DEFAULT;
u16 behavior = 0;
void *ls_plugin_mem = 0;
+ ip6_address_t *encap_src = 0;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
@@ -962,6 +1004,10 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
clib_memcpy_fast (this_seg->as_u8, next_address.as_u8,
sizeof (*this_seg));
}
+ else if (unformat (input, "v6src %U", unformat_ip6_address, &src_v6addr))
+ {
+ encap_src = &src_v6addr;
+ }
else if (unformat (input, "add sl"))
operation = 1;
else if (unformat (input, "del sl index %d", &sl_index))
@@ -975,18 +1021,18 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
else if (unformat (input, "insert"))
is_encap = 0;
else if (unformat (input, "spray"))
- is_spray = 1;
+ type = SR_POLICY_TYPE_SPRAY;
+ else if (unformat (input, "tef"))
+ type = SR_POLICY_TYPE_TEF;
else if (!behavior && unformat (input, "behavior"))
{
sr_policy_fn_registration_t *plugin = 0, **vec_plugins = 0;
sr_policy_fn_registration_t **plugin_it = 0;
- /* *INDENT-OFF* */
pool_foreach (plugin, sm->policy_plugin_functions)
{
vec_add1 (vec_plugins, plugin);
}
- /* *INDENT-ON* */
vec_foreach (plugin_it, vec_plugins)
{
@@ -1024,10 +1070,8 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (vec_len (segments) == 0)
return clib_error_return (0, "No Segment List specified");
- rv = sr_policy_add (&bsid, segments, weight,
- (is_spray ? SR_POLICY_TYPE_SPRAY :
- SR_POLICY_TYPE_DEFAULT), fib_table, is_encap,
- behavior, ls_plugin_mem);
+ rv = sr_policy_add (&bsid, segments, encap_src, weight, type, fib_table,
+ is_encap, behavior, ls_plugin_mem);
vec_free (segments);
}
@@ -1045,9 +1089,9 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
if (operation == 3 && weight == (u32) ~ 0)
return clib_error_return (0, "No new weight for the SL specified");
- rv = sr_policy_mod ((sr_policy_index != (u32) ~ 0 ? NULL : &bsid),
+ rv = sr_policy_mod ((sr_policy_index != (u32) ~0 ? NULL : &bsid),
sr_policy_index, fib_table, operation, segments,
- sl_index, weight);
+ encap_src, sl_index, weight);
if (segments)
vec_free (segments);
@@ -1083,7 +1127,6 @@ sr_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (sr_policy_command, static) = {
.path = "sr policy",
.short_help = "sr policy [add||del||mod] [bsid 2001::1||index 5] "
@@ -1103,7 +1146,6 @@ VLIB_CLI_COMMAND (sr_policy_command, static) = {
"SID lists.\n",
.function = sr_policy_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief CLI to display onscreen all the SR policies
@@ -1123,10 +1165,8 @@ show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "SR policies:");
- /* *INDENT-OFF* */
pool_foreach (sr_policy, sm->sr_policies)
{vec_add1 (vec_policies, sr_policy); }
- /* *INDENT-ON* */
vec_foreach_index (i, vec_policies)
{
@@ -1137,9 +1177,24 @@ show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
vlib_cli_output (vm, "\tBehavior: %s",
(sr_policy->is_encap ? "Encapsulation" :
"SRH insertion"));
- vlib_cli_output (vm, "\tType: %s",
- (sr_policy->type ==
- SR_POLICY_TYPE_DEFAULT ? "Default" : "Spray"));
+ if (sr_policy->is_encap)
+ {
+ vlib_cli_output (vm, "\tEncapSrcIP: %U", format_ip6_address,
+ &sr_policy->encap_src);
+ }
+ switch (sr_policy->type)
+ {
+ case SR_POLICY_TYPE_SPRAY:
+ vlib_cli_output (vm, "\tType: %s", "Spray");
+ break;
+ case SR_POLICY_TYPE_TEF:
+ vlib_cli_output (vm, "\tType: %s",
+ "TEF (Timestamp, Encapsulate, and Forward)");
+ break;
+ default:
+ vlib_cli_output (vm, "\tType: %s", "Default");
+ break;
+ }
vlib_cli_output (vm, "\tFIB table: %u",
(sr_policy->fib_table !=
(u32) ~ 0 ? sr_policy->fib_table : 0));
@@ -1163,13 +1218,11 @@ show_sr_policies_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_policies_command, static) = {
.path = "show sr policies",
.short_help = "show sr policies",
.function = show_sr_policies_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief CLI to display onscreen the SR encaps source addr
@@ -1184,13 +1237,11 @@ show_sr_encaps_source_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_encaps_source_command, static) = {
.path = "show sr encaps source addr",
.short_help = "show sr encaps source addr",
.function = show_sr_encaps_source_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief CLI to display onscreen the hop-limit value used for SRv6 encapsulation
@@ -1205,13 +1256,11 @@ show_sr_encaps_hop_limit_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_encaps_hop_limit_command, static) = {
.path = "show sr encaps hop-limit",
.short_help = "show sr encaps hop-limit",
.function = show_sr_encaps_hop_limit_command_fn,
};
-/* *INDENT-ON* */
/*************************** SR rewrite graph node ****************************/
/**
@@ -1231,14 +1280,44 @@ format_sr_policy_rewrite_trace (u8 * s, va_list * args)
return s;
}
+/**
+ * @brief SRv6 TEF (Timestamp, Encapsulate, and Forward) behavior
+ */
+static_always_inline void
+srv6_tef_behavior (vlib_node_runtime_t *node, vlib_buffer_t *b0,
+ ip6_header_t *ip0)
+{
+ ip6_sr_header_t *srh;
+ ip6_sr_pt_tlv_t *srh_pt_tlv;
+ timestamp_64_t ts;
+ sr_pt_iface_t *ls = 0;
+ u16 id_ld = 0;
+ srh = (ip6_sr_header_t *) (ip0 + 1);
+
+ srh_pt_tlv =
+ (ip6_sr_pt_tlv_t *) ((u8 *) ip0 + sizeof (ip6_header_t) +
+ sizeof (ip6_sr_header_t) +
+ sizeof (ip6_address_t) * (srh->last_entry + 1));
+
+ unix_time_now_nsec_fraction (&ts.sec, &ts.nsec);
+ srh_pt_tlv->t64.sec = clib_host_to_net_u32 (ts.sec);
+ srh_pt_tlv->t64.nsec = clib_host_to_net_u32 (ts.nsec);
+ ls = sr_pt_find_iface (vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+ if (ls)
+ {
+ id_ld = ls->id << 4;
+ id_ld |= ls->ingress_load;
+ srh_pt_tlv->id_ld = clib_host_to_net_u16 (id_ld);
+ }
+}
/**
* @brief IPv6 encapsulation processing as per RFC2473
*/
static_always_inline void
-encaps_processing_v6 (vlib_node_runtime_t * node,
- vlib_buffer_t * b0,
- ip6_header_t * ip0, ip6_header_t * ip0_encap)
+encaps_processing_v6 (vlib_node_runtime_t *node, vlib_buffer_t *b0,
+ ip6_header_t *ip0, ip6_header_t *ip0_encap,
+ u8 policy_type)
{
u32 new_l0;
u32 flow_label;
@@ -1256,6 +1335,8 @@ encaps_processing_v6 (vlib_node_runtime_t * node,
ip0_encap->ip_version_traffic_class_and_flow_label) &
0xfff00000) |
(flow_label & 0x0000ffff));
+ if (policy_type == SR_POLICY_TYPE_TEF)
+ srv6_tef_behavior (node, b0, ip0);
}
/**
@@ -1373,10 +1454,10 @@ sr_policy_rewrite_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
ip2 = vlib_buffer_get_current (b2);
ip3 = vlib_buffer_get_current (b3);
- encaps_processing_v6 (node, b0, ip0, ip0_encap);
- encaps_processing_v6 (node, b1, ip1, ip1_encap);
- encaps_processing_v6 (node, b2, ip2, ip2_encap);
- encaps_processing_v6 (node, b3, ip3, ip3_encap);
+ encaps_processing_v6 (node, b0, ip0, ip0_encap, sl0->policy_type);
+ encaps_processing_v6 (node, b1, ip1, ip1_encap, sl1->policy_type);
+ encaps_processing_v6 (node, b2, ip2, ip2_encap, sl2->policy_type);
+ encaps_processing_v6 (node, b3, ip3, ip3_encap, sl3->policy_type);
vnet_buffer (b0)->sw_if_index[VLIB_TX] = sl0->egress_fib_table;
vnet_buffer (b1)->sw_if_index[VLIB_TX] = sl1->egress_fib_table;
@@ -1463,7 +1544,7 @@ sr_policy_rewrite_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
ip0 = vlib_buffer_get_current (b0);
- encaps_processing_v6 (node, b0, ip0, ip0_encap);
+ encaps_processing_v6 (node, b0, ip0, ip0_encap, sl0->policy_type);
vnet_buffer (b0)->sw_if_index[VLIB_TX] = sl0->egress_fib_table;
@@ -1497,7 +1578,6 @@ sr_policy_rewrite_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_node) = {
.function = sr_policy_rewrite_encaps,
.name = "sr-pl-rewrite-encaps",
@@ -1513,7 +1593,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/**
* @brief IPv4 encapsulation processing as per RFC2473
@@ -1790,7 +1869,6 @@ sr_policy_rewrite_encaps_v4 (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_v4_node) = {
.function = sr_policy_rewrite_encaps_v4,
.name = "sr-pl-rewrite-encaps-v4",
@@ -1806,7 +1884,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_v4_node) = {
#undef _
},
};
-/* *INDENT-ON* */
always_inline u32
ip_flow_hash (void *data)
@@ -2232,7 +2309,6 @@ sr_policy_rewrite_encaps_l2 (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_l2_node) = {
.function = sr_policy_rewrite_encaps_l2,
.name = "sr-pl-rewrite-encaps-l2",
@@ -2248,7 +2324,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_encaps_l2_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/**
* @brief Graph node for applying a SR policy into a packet. SRH insertion.
@@ -2654,7 +2729,6 @@ sr_policy_rewrite_insert (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_policy_rewrite_insert_node) = {
.function = sr_policy_rewrite_insert,
.name = "sr-pl-rewrite-insert",
@@ -2670,7 +2744,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_insert_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/**
* @brief Graph node for applying a SR policy into a packet. BSID - SRH insertion.
@@ -3065,7 +3138,6 @@ sr_policy_rewrite_b_insert (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_policy_rewrite_b_insert_node) = {
.function = sr_policy_rewrite_b_insert,
.name = "sr-pl-rewrite-b-insert",
@@ -3081,16 +3153,14 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_b_insert_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/**
* @brief Function BSID encapsulation
*/
static_always_inline void
-end_bsid_encaps_srh_processing (vlib_node_runtime_t * node,
- vlib_buffer_t * b0,
- ip6_header_t * ip0,
- ip6_sr_header_t * sr0, u32 * next0)
+end_bsid_encaps_srh_processing (vlib_node_runtime_t *node, vlib_buffer_t *b0,
+ ip6_header_t *ip0, ip6_sr_header_t *sr0,
+ u32 *next0, u8 policy_type)
{
ip6_address_t *new_dst0;
@@ -3108,6 +3178,8 @@ end_bsid_encaps_srh_processing (vlib_node_runtime_t * node,
ip0->dst_address.as_u64[1] = new_dst0->as_u64[1];
return;
}
+ else if (sr0->segments_left == 0 && policy_type == SR_POLICY_TYPE_TEF)
+ return;
}
error_bsid_encaps:
@@ -3224,10 +3296,14 @@ sr_policy_rewrite_b_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
ip6_ext_header_find (vm, b3, ip3_encap, IP_PROTOCOL_IPV6_ROUTE,
NULL);
- end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0);
- end_bsid_encaps_srh_processing (node, b1, ip1_encap, sr1, &next1);
- end_bsid_encaps_srh_processing (node, b2, ip2_encap, sr2, &next2);
- end_bsid_encaps_srh_processing (node, b3, ip3_encap, sr3, &next3);
+ end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0,
+ sl0->policy_type);
+ end_bsid_encaps_srh_processing (node, b1, ip1_encap, sr1, &next1,
+ sl1->policy_type);
+ end_bsid_encaps_srh_processing (node, b2, ip2_encap, sr2, &next2,
+ sl2->policy_type);
+ end_bsid_encaps_srh_processing (node, b3, ip3_encap, sr3, &next3,
+ sl3->policy_type);
clib_memcpy_fast (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
sl0->rewrite, vec_len (sl0->rewrite));
@@ -3248,10 +3324,10 @@ sr_policy_rewrite_b_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
ip2 = vlib_buffer_get_current (b2);
ip3 = vlib_buffer_get_current (b3);
- encaps_processing_v6 (node, b0, ip0, ip0_encap);
- encaps_processing_v6 (node, b1, ip1, ip1_encap);
- encaps_processing_v6 (node, b2, ip2, ip2_encap);
- encaps_processing_v6 (node, b3, ip3, ip3_encap);
+ encaps_processing_v6 (node, b0, ip0, ip0_encap, sl0->policy_type);
+ encaps_processing_v6 (node, b1, ip1, ip1_encap, sl1->policy_type);
+ encaps_processing_v6 (node, b2, ip2, ip2_encap, sl2->policy_type);
+ encaps_processing_v6 (node, b3, ip3, ip3_encap, sl3->policy_type);
if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
{
@@ -3330,7 +3406,8 @@ sr_policy_rewrite_b_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
sr0 =
ip6_ext_header_find (vm, b0, ip0_encap, IP_PROTOCOL_IPV6_ROUTE,
NULL);
- end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0);
+ end_bsid_encaps_srh_processing (node, b0, ip0_encap, sr0, &next0,
+ sl0->policy_type);
clib_memcpy_fast (((u8 *) ip0_encap) - vec_len (sl0->rewrite),
sl0->rewrite, vec_len (sl0->rewrite));
@@ -3338,7 +3415,7 @@ sr_policy_rewrite_b_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
ip0 = vlib_buffer_get_current (b0);
- encaps_processing_v6 (node, b0, ip0, ip0_encap);
+ encaps_processing_v6 (node, b0, ip0, ip0_encap, sl0->policy_type);
if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE) &&
PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -3370,7 +3447,6 @@ sr_policy_rewrite_b_encaps (vlib_main_t * vm, vlib_node_runtime_t * node,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (sr_policy_rewrite_b_encaps_node) = {
.function = sr_policy_rewrite_b_encaps,
.name = "sr-pl-rewrite-b-encaps",
@@ -3386,7 +3462,6 @@ VLIB_REGISTER_NODE (sr_policy_rewrite_b_encaps_node) = {
#undef _
},
};
-/* *INDENT-ON* */
/*************************** SR Policy plugins ******************************/
/**
@@ -3454,10 +3529,8 @@ show_sr_policy_behaviors_command_fn (vlib_main_t * vm,
vlib_cli_output (vm, "SR Policy behaviors:\n-----------------------\n\n");
- /* *INDENT-OFF* */
pool_foreach (plugin, sm->policy_plugin_functions)
{ vec_add1 (plugins_vec, plugin); }
- /* *INDENT-ON* */
vlib_cli_output (vm, "Plugin behaviors:\n");
for (i = 0; i < vec_len (plugins_vec); i++)
@@ -3470,13 +3543,11 @@ show_sr_policy_behaviors_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_policy_behaviors_command, static) = {
.path = "show sr policy behaviors",
.short_help = "show sr policy behaviors",
.function = show_sr_policy_behaviors_command_fn,
};
-/* *INDENT-ON* */
/*************************** SR Segment Lists DPOs ****************************/
static u8 *
diff --git a/src/vnet/srv6/sr_pt.api b/src/vnet/srv6/sr_pt.api
new file mode 100644
index 00000000000..e86359b421f
--- /dev/null
+++ b/src/vnet/srv6/sr_pt.api
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+option version = "1.0.0";
+
+import "vnet/interface_types.api";
+
+/** \brief SR PT iface dump request
+ @param client_index - opaque cookie to identifty the sender
+ @param context - sender context, to match reply w/ request
+*/
+define sr_pt_iface_dump
+{
+ u32 client_index;
+ u32 context;
+};
+
+define sr_pt_iface_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u16 id;
+ u8 ingress_load;
+ u8 egress_load;
+ u8 tts_template;
+};
+
+/** \brief SR PT iface add request
+ @param client_index - opaque cookie to identifty the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - index of the interface to add to SR PT
+ @param id - SR PT interface id
+ @param ingress_load - incoming interface load
+ @param egress_load - outgoing interface load
+ @param tts_template - truncated timestamp template to use
+*/
+autoreply define sr_pt_iface_add
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ u16 id;
+ u8 ingress_load;
+ u8 egress_load;
+ u8 tts_template;
+};
+
+/** \brief SR PT iface del request
+ @param client_index - opaque cookie to identifty the sender
+ @param context - sender context, to match reply w/ request
+ @param sw_if_index - index of the interface to delete from SR PT
+*/
+autoreply define sr_pt_iface_del
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+}; \ No newline at end of file
diff --git a/src/vnet/srv6/sr_pt.c b/src/vnet/srv6/sr_pt.c
new file mode 100644
index 00000000000..6299faa84ab
--- /dev/null
+++ b/src/vnet/srv6/sr_pt.c
@@ -0,0 +1,281 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+/**
+ * @file
+ * @brief SR Path Tracing (PT)
+ *
+ * SR PT CLI
+ *
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/ip/ip.h>
+#include <vnet/srv6/sr_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/dpo.h>
+#include <vnet/adj/adj.h>
+#include <vnet/srv6/sr_pt.h>
+
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+sr_pt_main_t sr_pt_main;
+
+void *
+sr_pt_find_iface (u32 iface)
+{
+ sr_pt_main_t *sr_pt = &sr_pt_main;
+ uword *p;
+
+ /* Search for the item */
+ p = mhash_get (&sr_pt->sr_pt_iface_index_hash, &iface);
+ if (p)
+ {
+ /* Retrieve sr_pt_iface */
+ return pool_elt_at_index (sr_pt->sr_pt_iface, p[0]);
+ }
+ return NULL;
+}
+
+int
+sr_pt_add_iface (u32 iface, u16 id, u8 ingress_load, u8 egress_load,
+ u8 tts_template)
+{
+ sr_pt_main_t *sr_pt = &sr_pt_main;
+ uword *p;
+
+ sr_pt_iface_t *ls = 0;
+
+ if (iface == (u32) ~0)
+ return SR_PT_ERR_IFACE_INVALID;
+
+ /* Search for the item */
+ p = mhash_get (&sr_pt->sr_pt_iface_index_hash, &iface);
+
+ if (p)
+ return SR_PT_ERR_EXIST;
+
+ if (id > SR_PT_ID_MAX)
+ return SR_PT_ERR_ID_INVALID;
+
+ if (ingress_load > SR_PT_LOAD_MAX || egress_load > SR_PT_LOAD_MAX)
+ return SR_PT_ERR_LOAD_INVALID;
+
+ if (tts_template > SR_PT_TTS_TEMPLATE_MAX)
+ return SR_PT_ERR_TTS_TEMPLATE_INVALID;
+
+ vnet_feature_enable_disable ("ip6-output", "pt", iface, 1, 0, 0);
+
+ /* Create a new sr_pt_iface */
+ pool_get_zero (sr_pt->sr_pt_iface, ls);
+ ls->iface = iface;
+ ls->id = id;
+ ls->ingress_load = ingress_load;
+ ls->egress_load = egress_load;
+ ls->tts_template = tts_template;
+
+ /* Set hash key for searching sr_pt_iface by iface */
+ mhash_set (&sr_pt->sr_pt_iface_index_hash, &iface, ls - sr_pt->sr_pt_iface,
+ NULL);
+ return 0;
+}
+
+int
+sr_pt_del_iface (u32 iface)
+{
+ sr_pt_main_t *sr_pt = &sr_pt_main;
+ uword *p;
+
+ sr_pt_iface_t *ls = 0;
+
+ if (iface == (u32) ~0)
+ return SR_PT_ERR_IFACE_INVALID;
+
+ /* Search for the item */
+ p = mhash_get (&sr_pt->sr_pt_iface_index_hash, &iface);
+
+ if (p)
+ {
+ /* Retrieve sr_pt_iface */
+ ls = pool_elt_at_index (sr_pt->sr_pt_iface, p[0]);
+ vnet_feature_enable_disable ("ip6-output", "pt", iface, 0, 0, 0);
+ /* Delete sr_pt_iface */
+ pool_put (sr_pt->sr_pt_iface, ls);
+ mhash_unset (&sr_pt->sr_pt_iface_index_hash, &iface, NULL);
+ }
+ else
+ {
+ return SR_PT_ERR_NOENT;
+ }
+ return 0;
+}
+
+/**
+ * @brief "sr pt add iface" CLI function.
+ *
+ * @see sr_pt_add_iface
+ */
+static clib_error_t *
+sr_pt_add_iface_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 iface = (u32) ~0;
+ u32 id = (u32) ~0;
+ u32 ingress_load = 0;
+ u32 egress_load = 0;
+ u32 tts_template = SR_PT_TTS_TEMPLATE_DEFAULT;
+
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &iface))
+ ;
+ else if (unformat (input, "id %u", &id))
+ ;
+ else if (unformat (input, "ingress-load %u", &ingress_load))
+ ;
+ else if (unformat (input, "egress-load %u", &egress_load))
+ ;
+ else if (unformat (input, "tts-template %u", &tts_template))
+ ;
+ else
+ break;
+ }
+
+ rv = sr_pt_add_iface (iface, id, ingress_load, egress_load, tts_template);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case SR_PT_ERR_EXIST:
+ return clib_error_return (0, "Error: Identical iface already exists.");
+ case SR_PT_ERR_IFACE_INVALID:
+ return clib_error_return (0, "Error: The iface name invalid.");
+ case SR_PT_ERR_ID_INVALID:
+ return clib_error_return (0, "Error: The iface id value invalid.");
+ case SR_PT_ERR_LOAD_INVALID:
+ return clib_error_return (
+ 0, "Error: The iface ingress or egress load value invalid.");
+ case SR_PT_ERR_TTS_TEMPLATE_INVALID:
+ return clib_error_return (
+ 0, "Error: The iface TTS Template value invalid.");
+ default:
+ return clib_error_return (0, "Error: unknown error.");
+ }
+ return 0;
+}
+
+/**
+ * @brief "sr pt del iface" CLI function.
+ *
+ * @see sr_pt_del_iface
+ */
+static clib_error_t *
+sr_pt_del_iface_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ u32 iface = (u32) ~0;
+
+ int rv;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%U", unformat_vnet_sw_interface, vnm, &iface))
+ ;
+ else
+ break;
+ }
+
+ rv = sr_pt_del_iface (iface);
+
+ switch (rv)
+ {
+ case 0:
+ break;
+ case SR_PT_ERR_NOENT:
+ return clib_error_return (0, "Error: No such iface.");
+ case SR_PT_ERR_IFACE_INVALID:
+ return clib_error_return (0, "Error: The iface name is not valid.");
+ default:
+ return clib_error_return (0, "Error: unknown error.");
+ }
+ return 0;
+}
+
+/**
+ * @brief CLI function to show all SR PT interfcaes
+ */
+static clib_error_t *
+sr_pt_show_iface_command_fn (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ sr_pt_main_t *sr_pt = &sr_pt_main;
+ sr_pt_iface_t **sr_pt_iface_list = 0;
+ sr_pt_iface_t *ls;
+ int i;
+
+ vlib_cli_output (vm, "SR PT Interfaces");
+ vlib_cli_output (vm, "==================================");
+
+ pool_foreach (ls, sr_pt->sr_pt_iface)
+ {
+ vec_add1 (sr_pt_iface_list, ls);
+ };
+
+ for (i = 0; i < vec_len (sr_pt_iface_list); i++)
+ {
+ ls = sr_pt_iface_list[i];
+ vlib_cli_output (
+ vm,
+ "\tiface : \t%U\n\tid : \t%d\n\tingress-load: "
+ "\t%d\n\tegress-load : \t%d\n\ttts-template: \t%d ",
+ format_vnet_sw_if_index_name, vnm, ls->iface, ls->id, ls->ingress_load,
+ ls->egress_load, ls->tts_template);
+ vlib_cli_output (vm, "--------------------------------");
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (sr_pt_add_iface_command, static) = {
+ .path = "sr pt add iface",
+ .short_help = "sr pt add iface <iface-name> id <pt-iface-id> ingress-load "
+ "<ingress-load-value> egress-load <egress-load-value> "
+ "tts-template <tts-template-value>",
+ .function = sr_pt_add_iface_command_fn,
+};
+
+VLIB_CLI_COMMAND (sr_pt_del_iface_command, static) = {
+ .path = "sr pt del iface",
+ .short_help = "sr pt del iface <iface-name>",
+ .function = sr_pt_del_iface_command_fn,
+};
+
+VLIB_CLI_COMMAND (sr_pt_show_iface_command, static) = {
+ .path = "sr pt show iface",
+ .short_help = "sr pt show iface",
+ .function = sr_pt_show_iface_command_fn,
+};
+
+/**
+ * * @brief SR PT initialization
+ * */
+clib_error_t *
+sr_pt_init (vlib_main_t *vm)
+{
+ sr_pt_main_t *pt = &sr_pt_main;
+ mhash_init (&pt->sr_pt_iface_index_hash, sizeof (uword), sizeof (u32));
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (sr_pt_init); \ No newline at end of file
diff --git a/src/vnet/srv6/sr_pt.h b/src/vnet/srv6/sr_pt.h
new file mode 100644
index 00000000000..53001e10ac7
--- /dev/null
+++ b/src/vnet/srv6/sr_pt.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+/**
+ * @file
+ * @brief SR Path Tracing data structures definitions
+ *
+ */
+
+#ifndef included_vnet_sr_pt_h
+#define included_vnet_sr_pt_h
+
+#define IP6_HBH_PT_TYPE 50
+
+/*SR PT error codes*/
+#define SR_PT_ERR_NOENT -1 /* No such entry*/
+#define SR_PT_ERR_EXIST -2 /* Entry exists */
+#define SR_PT_ERR_IFACE_INVALID -3 /* IFACE invalid */
+#define SR_PT_ERR_ID_INVALID -4 /* ID invalid */
+#define SR_PT_ERR_LOAD_INVALID -5 /* LOAD invalid*/
+#define SR_PT_ERR_TTS_TEMPLATE_INVALID -6 /* TTS Template invalid */
+
+/*SR PT paramters max values*/
+#define SR_PT_ID_MAX 4095
+#define SR_PT_LOAD_MAX 15
+#define SR_PT_TTS_TEMPLATE_MAX 3
+
+/*SR PT TTS Templates*/
+#define SR_PT_TTS_TEMPLATE_0 0
+#define SR_PT_TTS_TEMPLATE_1 1
+#define SR_PT_TTS_TEMPLATE_2 2
+#define SR_PT_TTS_TEMPLATE_3 3
+#define SR_PT_TTS_TEMPLATE_DEFAULT 2
+
+/*SR PT TTS Template shift value*/
+#define SR_PT_TTS_SHIFT_TEMPLATE_0 8
+#define SR_PT_TTS_SHIFT_TEMPLATE_1 12
+#define SR_PT_TTS_SHIFT_TEMPLATE_2 16
+#define SR_PT_TTS_SHIFT_TEMPLATE_3 20
+
+/*PT node behaviors*/
+#define PT_BEHAVIOR_SRC 0
+#define PT_BEHAVIOR_MID 1
+#define PT_BEHAVIOR_SNK 2
+
+typedef struct
+{
+ u32 iface; /**< Interface */
+ u16 id; /**< Interface ID */
+ u8 ingress_load; /**< Interface Ingress Load */
+ u8 egress_load; /**< Interface Egress Load */
+ u8 tts_template; /**< Interface TTS Template */
+} sr_pt_iface_t;
+
+typedef struct
+{
+ u16 oif_oil;
+ u8 tts;
+} __clib_packed sr_pt_cmd_t;
+
+typedef struct
+{
+ sr_pt_cmd_t cmd_stack[12];
+} __clib_packed ip6_hop_by_hop_option_pt_t;
+
+/**
+ * @brief SR Path Tracing main datastructure
+ */
+typedef struct
+{
+ /* Pool of sr_pt_iface instances */
+ sr_pt_iface_t *sr_pt_iface;
+
+ /* Hash table for sr_pt_iface parameters */
+ mhash_t sr_pt_iface_index_hash;
+
+ /* convenience */
+ u16 msg_id_base;
+} sr_pt_main_t;
+
+extern sr_pt_main_t sr_pt_main;
+extern vlib_node_registration_t sr_pt_node;
+extern int sr_pt_add_iface (u32 iface, u16 id, u8 ingress_load, u8 egress_load,
+ u8 tts_template);
+extern int sr_pt_del_iface (u32 iface);
+extern void *sr_pt_find_iface (u32 iface);
+
+#endif /* included_vnet_sr_pt_h */ \ No newline at end of file
diff --git a/src/vnet/srv6/sr_pt_api.c b/src/vnet/srv6/sr_pt_api.c
new file mode 100644
index 00000000000..b0b67a210fb
--- /dev/null
+++ b/src/vnet/srv6/sr_pt_api.c
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+#include <vnet/srv6/sr_pt.h>
+
+#include <vnet/interface.h>
+#include <vnet/api_errno.h>
+
+#include <vnet/srv6/sr_pt.api_enum.h>
+#include <vnet/srv6/sr_pt.api_types.h>
+
+#define REPLY_MSG_ID_BASE sr_pt_main.msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static void
+send_sr_pt_iface_details (sr_pt_iface_t *t, vl_api_registration_t *reg,
+ u32 context)
+{
+ vl_api_sr_pt_iface_details_t *rmp;
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id = ntohs (REPLY_MSG_ID_BASE + VL_API_SR_PT_IFACE_DETAILS);
+
+ rmp->sw_if_index = ntohl (t->iface);
+ rmp->id = ntohs (t->id);
+ rmp->ingress_load = t->ingress_load;
+ rmp->egress_load = t->egress_load;
+ rmp->tts_template = t->tts_template;
+
+ rmp->context = context;
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_sr_pt_iface_dump_t_handler (vl_api_sr_pt_iface_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ sr_pt_main_t *pt = &sr_pt_main;
+ sr_pt_iface_t *t;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ pool_foreach (t, pt->sr_pt_iface)
+ {
+ send_sr_pt_iface_details (t, reg, mp->context);
+ }
+}
+
+static void
+vl_api_sr_pt_iface_add_t_handler (vl_api_sr_pt_iface_add_t *mp)
+{
+ vl_api_sr_pt_iface_add_reply_t *rmp;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = sr_pt_add_iface (ntohl (mp->sw_if_index), ntohs (mp->id),
+ mp->ingress_load, mp->egress_load, mp->tts_template);
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_SR_PT_IFACE_ADD_REPLY);
+}
+
+static void
+vl_api_sr_pt_iface_del_t_handler (vl_api_sr_pt_iface_del_t *mp)
+{
+ vl_api_sr_pt_iface_del_reply_t *rmp;
+ int rv = 0;
+
+ VALIDATE_SW_IF_INDEX (mp);
+
+ rv = sr_pt_del_iface (ntohl (mp->sw_if_index));
+
+ BAD_SW_IF_INDEX_LABEL;
+ REPLY_MACRO (VL_API_SR_PT_IFACE_DEL_REPLY);
+}
+
+#include <vnet/srv6/sr_pt.api.c>
+static clib_error_t *
+sr_pt_api_hookup (vlib_main_t *vm)
+{
+ /*
+ * Set up the (msg_name, crc, message-id) table
+ */
+ REPLY_MSG_ID_BASE = setup_message_id_table ();
+
+ return 0;
+}
+
+VLIB_API_INIT_FUNCTION (sr_pt_api_hookup); \ No newline at end of file
diff --git a/src/vnet/srv6/sr_pt_node.c b/src/vnet/srv6/sr_pt_node.c
new file mode 100644
index 00000000000..fa8b1f69b57
--- /dev/null
+++ b/src/vnet/srv6/sr_pt_node.c
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vnet/fib/ip6_fib.h>
+#include <vnet/dpo/load_balance.h>
+#include <vnet/l2/feat_bitmap.h>
+#include <vnet/fib/fib_table.h>
+#include <vnet/srv6/sr.h>
+#include <vnet/srv6/sr_pt.h>
+
+/**
+ * @brief PT node trace
+ */
+typedef struct
+{
+ u32 iface;
+ u16 id;
+ u8 load;
+ timestamp_64_t t64;
+ u8 tts_template;
+ u8 tts;
+ u8 behavior;
+} pt_trace_t;
+
+static u8 *
+format_pt_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ pt_trace_t *t = va_arg (*args, pt_trace_t *);
+ switch (t->behavior)
+ {
+ case PT_BEHAVIOR_MID:
+ s = format (
+ s,
+ "Behavior Midpoint, outgoing interface %U, outgoing interface id %u, "
+ "outgoing interface load %u, t64_sec %u, t64_nsec %u, tts_template "
+ "%u, tts %u",
+ format_vnet_sw_if_index_name, vnet_get_main (), t->iface, t->id,
+ t->load, clib_host_to_net_u32 (t->t64.sec),
+ clib_host_to_net_u32 (t->t64.nsec), t->tts_template, t->tts);
+ break;
+ default:
+ break;
+ }
+ return s;
+}
+
+static_always_inline void
+pt_midpoint_processing (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t *b0, ip6_header_t *ip0,
+ sr_pt_iface_t *ls, timestamp_64_t t64)
+{
+ ip6_hop_by_hop_header_t *hbh;
+ ip6_hop_by_hop_option_t *hbh_opt;
+ ip6_hop_by_hop_option_pt_t *hbh_opt_pt;
+
+ if (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)
+ {
+ hbh = (void *) (ip0 + 1);
+ hbh_opt = (void *) (hbh + 1);
+ if (hbh_opt->type == IP6_HBH_PT_TYPE)
+ {
+ hbh_opt_pt = (void *) (hbh_opt + 1);
+ clib_memcpy_fast (&hbh_opt_pt->cmd_stack[1],
+ &hbh_opt_pt->cmd_stack[0], 33);
+ hbh_opt_pt->cmd_stack[0].oif_oil =
+ clib_host_to_net_u16 (ls->id << 4);
+ hbh_opt_pt->cmd_stack[0].oif_oil |= ls->egress_load;
+ switch (ls->tts_template)
+ {
+ case SR_PT_TTS_TEMPLATE_0:
+ hbh_opt_pt->cmd_stack[0].tts =
+ t64.nsec >> SR_PT_TTS_SHIFT_TEMPLATE_0;
+ break;
+ case SR_PT_TTS_TEMPLATE_1:
+ hbh_opt_pt->cmd_stack[0].tts =
+ t64.nsec >> SR_PT_TTS_SHIFT_TEMPLATE_1;
+ break;
+ case SR_PT_TTS_TEMPLATE_2:
+ hbh_opt_pt->cmd_stack[0].tts =
+ t64.nsec >> SR_PT_TTS_SHIFT_TEMPLATE_2;
+ break;
+ case SR_PT_TTS_TEMPLATE_3:
+ hbh_opt_pt->cmd_stack[0].tts =
+ t64.nsec >> SR_PT_TTS_SHIFT_TEMPLATE_0;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ return;
+}
+
+VLIB_NODE_FN (sr_pt_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ u32 n_left_from, next_index, *from, *to_next;
+ from = vlib_frame_vector_args (from_frame);
+ n_left_from = from_frame->n_vectors;
+ next_index = node->cached_next_index;
+ u8 pt_behavior = ~(u8) 0;
+ sr_pt_iface_t *ls = 0;
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ // Getting the timestamp (one for each batch of packets)
+ timestamp_64_t t64 = {};
+ unix_time_now_nsec_fraction (&t64.sec, &t64.nsec);
+
+ // Single loop for potentially the last three packets
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ u32 bi0;
+ u32 iface;
+ vlib_buffer_t *b0;
+ u32 next0 = 0;
+ ethernet_header_t *en0;
+ ip6_header_t *ip0 = 0;
+ bi0 = from[0];
+ to_next[0] = bi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ iface = vnet_buffer (b0)->sw_if_index[VLIB_TX];
+ ls = sr_pt_find_iface (iface);
+ if (ls)
+ {
+ en0 = vlib_buffer_get_current (b0);
+ ip0 = (void *) (en0 + 1);
+ pt_midpoint_processing (vm, node, b0, ip0, ls, t64);
+ pt_behavior = PT_BEHAVIOR_MID;
+ }
+ if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+ {
+ pt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
+ tr->iface = iface;
+ tr->id = ls->id;
+ tr->load = ls->egress_load;
+ tr->tts_template = ls->tts_template;
+ tr->t64.sec = t64.sec;
+ tr->t64.nsec = t64.nsec;
+ tr->tts = t64.nsec >> 20;
+ tr->behavior = pt_behavior;
+ }
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+ n_left_to_next, bi0, next0);
+ }
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return from_frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (sr_pt_node) = {
+ .name = "pt",
+ .vector_size = sizeof (u32),
+ .format_trace = format_pt_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = 0,
+ .n_next_nodes = 1,
+ .next_nodes = { [0] = "interface-output" },
+};
+
+VNET_FEATURE_INIT (sr_pt_node, static) = {
+ .arc_name = "ip6-output",
+ .node_name = "pt",
+}; \ No newline at end of file
diff --git a/src/vnet/srv6/sr_steering.c b/src/vnet/srv6/sr_steering.c
index cb1d81742eb..94c3d67a27a 100644
--- a/src/vnet/srv6/sr_steering.c
+++ b/src/vnet/srv6/sr_steering.c
@@ -184,9 +184,6 @@ sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index,
else
sr_policy = pool_elt_at_index (sm->sr_policies, sr_policy_index);
- if (!sr_policy)
- return -2;
-
steer_pl->sr_policy = sr_policy - sm->sr_policies;
/* Remove old FIB/hw redirection and create a new one */
@@ -459,7 +456,6 @@ sr_steer_policy_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (sr_steer_policy_command, static) = {
.path = "sr steer",
.short_help = "sr steer (del) [l3 <ip_addr/mask>|l2 <sf_if>] "
@@ -474,7 +470,6 @@ VLIB_CLI_COMMAND (sr_steer_policy_command, static) = {
"\t\tsr steer del l3 2001::/64 via sr_policy index 5\n",
.function = sr_steer_policy_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_sr_steering_policies_command_fn (vlib_main_t * vm,
@@ -491,9 +486,7 @@ show_sr_steering_policies_command_fn (vlib_main_t * vm,
int i;
vlib_cli_output (vm, "SR steering policies:");
- /* *INDENT-OFF* */
pool_foreach (steer_pl, sm->steer_policies) {vec_add1(steer_policies, steer_pl);}
- /* *INDENT-ON* */
vlib_cli_output (vm, "Traffic\t\tSR policy BSID");
for (i = 0; i < vec_len (steer_policies); i++)
{
@@ -526,13 +519,11 @@ show_sr_steering_policies_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_sr_steering_policies_command, static) = {
.path = "show sr steering-policies",
.short_help = "show sr steering-policies",
.function = show_sr_steering_policies_command_fn,
};
-/* *INDENT-ON* */
clib_error_t *
sr_steering_init (vlib_main_t * vm)
@@ -550,18 +541,14 @@ sr_steering_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (sr_steering_init);
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (sr_pl_rewrite_encaps_l2, static) =
{
.arc_name = "device-input",
.node_name = "sr-pl-rewrite-encaps-l2",
.runs_before = VNET_FEATURES ("ethernet-input"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/srv6/sr_steering.md b/src/vnet/srv6/sr_steering.md
deleted file mode 100644
index ca5cc7b6c7a..00000000000
--- a/src/vnet/srv6/sr_steering.md
+++ /dev/null
@@ -1,35 +0,0 @@
-# Steering packets into a SR Policy {#srv6_steering_doc}
-
-## steer packets uging the sr steering policy
-
-To steer packets in Transit into an SR policy (T.Insert, T.Encaps and T.Encaps.L2 behaviors), the user needs to create an 'sr steering policy'.
-
- sr steer l3 2001::/64 via index 1
- sr steer l3 2001::/64 via bsid cafe::1
- sr steer l3 2001::/64 via bsid cafe::1 fib-table 3
- sr steer l3 10.0.0.0/16 via bsid cafe::1
- sr steer l2 TenGE0/1/0 via bsid cafe::1
-
-Disclaimer: The T.Encaps.L2 will steer L2 frames into an SR Policy. Notice that creating an SR steering policy for L2 frames will actually automatically *put the interface into promiscous mode*.
-
-## steer packets using the classifier
-
-Another way to steer packet is to use the classifier.
-
-First the user need to manually add the source routing node to the list of the
-ip6-inacl next nodes.
-Using the python api this can be donne with:
-
- # jsonfiles = get list of json api files
- vpp = VPP(jsonfiles)
- vpp.add_node_next(node_name='ip6-inacl', next_name='sr-pl-rewrite-insert')
-
-Below is a classifier mask filtering all the packets from the interface
-TenGigabitEthernet5/0/0 on ip version and moving all ipv6 packets to the
-sr-pl-rewrite-insert node (dropping the others) and applying the source routing
-index 2.
-In essence, this means "apply this sr policy to all the packets from this interface)
-
- vpp# classify table miss-next 0 current-data-flag 1 mask hex f000000000000000 skip 0
- vpp# classify session acl-hit-next 1 table-index 0 match hex 6000000000000000 action set-sr-policy-index 2
- vpp# set interface input acl intfc TenGigabitEthernet5/0/0 ip6-table 0
diff --git a/src/vnet/srv6/sr_steering.rst b/src/vnet/srv6/sr_steering.rst
new file mode 100644
index 00000000000..b8a82e57550
--- /dev/null
+++ b/src/vnet/srv6/sr_steering.rst
@@ -0,0 +1,50 @@
+.. _srv6_steering_doc:
+
+Steering packets into a SR Policy
+=================================
+
+steer packets using the sr steering policy
+------------------------------------------
+
+To steer packets in Transit into an SR policy (T.Insert, T.Encaps and
+T.Encaps.L2 behaviors), the user needs to create an ‘sr steering
+policy’.
+
+::
+
+ sr steer l3 2001::/64 via index 1
+ sr steer l3 2001::/64 via bsid cafe::1
+ sr steer l3 2001::/64 via bsid cafe::1 fib-table 3
+ sr steer l3 10.0.0.0/16 via bsid cafe::1
+ sr steer l2 TenGE0/1/0 via bsid cafe::1
+
+Disclaimer: The T.Encaps.L2 will steer L2 frames into an SR Policy.
+Notice that creating an SR steering policy for L2 frames will actually
+automatically *put the interface into promiscous mode*.
+
+steer packets using the classifier
+----------------------------------
+
+Another way to steer packet is to use the classifier.
+
+First the user need to manually add the source routing node to the list
+of the ip6-inacl next nodes. Using the python api this can be done
+with:
+
+::
+
+ # jsonfiles = get list of json api files
+ vpp = VPP(jsonfiles)
+ vpp.add_node_next(node_name='ip6-inacl', next_name='sr-pl-rewrite-insert')
+
+Below is a classifier mask filtering all the packets from the interface
+TenGigabitEthernet5/0/0 on ip version and moving all ipv6 packets to the
+sr-pl-rewrite-insert node (dropping the others) and applying the source
+routing index 2. In essence, this means “apply this sr policy to all the
+packets from this interface)
+
+::
+
+ vpp# classify table miss-next 0 current-data-flag 1 mask hex f000000000000000 skip 0
+ vpp# classify session acl-hit-next 1 table-index 0 match hex 6000000000000000 action set-sr-policy-index 2
+ vpp# set interface input acl intfc TenGigabitEthernet5/0/0 ip6-table 0
diff --git a/src/vnet/srv6/sr_test.c b/src/vnet/srv6/sr_test.c
new file mode 100644
index 00000000000..be898599e96
--- /dev/null
+++ b/src/vnet/srv6/sr_test.c
@@ -0,0 +1,218 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vppinfra/error.h>
+#include <vpp/api/types.h>
+#include <vnet/ip/ip_types_api.h>
+
+#define __plugin_msg_base sr_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/* Declare message IDs */
+#include <vnet/format_fns.h>
+#include <vnet/srv6/sr.api_enum.h>
+#include <vnet/srv6/sr.api_types.h>
+
+#define vl_endianfun /* define message structures */
+#include <vnet/srv6/sr.api.h>
+#undef vl_endianfun
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ u32 ping_id;
+ vat_main_t *vat_main;
+} sr_test_main_t;
+
+static sr_test_main_t sr_test_main;
+
+static int
+api_sr_steering_add_del (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_set_encap_hop_limit (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_set_encap_source (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_policy_del (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_policy_mod (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_policy_add (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_policy_mod_v2 (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_policy_add_v2 (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_localsids_dump (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_policies_dump (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_policies_v2_dump (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_policies_with_sl_index_dump (vat_main_t *vam)
+{
+ return -1;
+}
+
+static int
+api_sr_steering_pol_dump (vat_main_t *vam)
+{
+ return -1;
+}
+
+static void
+vl_api_sr_policies_details_t_handler (vl_api_sr_policies_details_t *mp)
+{
+}
+
+static void
+vl_api_sr_policies_v2_details_t_handler (vl_api_sr_policies_v2_details_t *mp)
+{
+}
+
+static void
+vl_api_sr_localsids_details_t_handler (vl_api_sr_localsids_details_t *mp)
+{
+}
+
+static void
+vl_api_sr_policies_with_sl_index_details_t_handler (
+ vl_api_sr_policies_with_sl_index_details_t *mp)
+{
+}
+
+static void
+vl_api_sr_steering_pol_details_t_handler (vl_api_sr_steering_pol_details_t *mp)
+{
+}
+
+static int
+api_sr_localsid_add_del (vat_main_t *vam)
+{
+ unformat_input_t *i = vam->input;
+ vl_api_sr_localsid_add_del_t *mp;
+
+ u8 is_del;
+ ip6_address_t localsid;
+ u8 end_psp = 0;
+ u8 behavior = ~0;
+ u32 sw_if_index;
+ u32 fib_table = ~(u32) 0;
+ ip46_address_t nh_addr;
+ clib_memset (&nh_addr, 0, sizeof (ip46_address_t));
+
+ bool nexthop_set = 0;
+
+ int ret;
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "del"))
+ is_del = 1;
+ else if (unformat (i, "address %U", unformat_ip6_address, &localsid))
+ ;
+ else if (unformat (i, "next-hop %U", unformat_ip46_address, &nh_addr))
+ nexthop_set = 1;
+ else if (unformat (i, "behavior %u", &behavior))
+ ;
+ else if (unformat (i, "sw_if_index %u", &sw_if_index))
+ ;
+ else if (unformat (i, "fib-table %u", &fib_table))
+ ;
+ else if (unformat (i, "end.psp %u", &behavior))
+ ;
+ else
+ break;
+ }
+
+ M (SR_LOCALSID_ADD_DEL, mp);
+
+ clib_memcpy (mp->localsid, &localsid, sizeof (mp->localsid));
+
+ if (nexthop_set)
+ {
+ clib_memcpy (&mp->nh_addr.un, &nh_addr, sizeof (mp->nh_addr.un));
+ }
+ mp->behavior = behavior;
+ mp->sw_if_index = ntohl (sw_if_index);
+ mp->fib_table = ntohl (fib_table);
+ mp->end_psp = end_psp;
+ mp->is_del = is_del;
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+#include <vnet/srv6/sr.api_test.c>
+
+VAT_REGISTER_FEATURE_FUNCTION (vat_sr_plugin_register);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/syslog/sylog_doc.md b/src/vnet/syslog/sylog_doc.md
deleted file mode 100644
index 0b48d4db573..00000000000
--- a/src/vnet/syslog/sylog_doc.md
+++ /dev/null
@@ -1,65 +0,0 @@
-# Syslog protocol support {#syslog_doc}
-
-VPP provides [RFC5424](https://tools.ietf.org/html/rfc5424) syslog protocol
-logging, which is used to transport event messages across network. VPP
-currently suports UDP transport based on
-[RFC5426](https://tools.ietf.org/html/rfc5426).
-
-The syslog message has the following format:
-* header
-* structured data
-* free-form message
-
-The header contains, priority, version, timestamp, hostname, application,
-process id and message id. It is followed by structured data, which provides
-a mechanism to express event data in easily parsable format. Structured data
-can contain zero, one or multiple structured data elements. Structured data
-element contains name-value pairs. Structured data can by followed by free-form
-message.
-
-Following example explains how to use the internal APIs to genrate syslog
-message:
-```{.c}
- #include <vnet/syslog/syslog.h>
-
- ...
-
- syslog_msg_t syslog_msg;
-
- /* Check if syslog logging is enabled */
- if (!syslog_is_enabled ())
- return;
-
- /* Severity filer test */
- if (syslog_severity_filter_block (severity))
- return;
-
- /* Initialize syslog message header */
- syslog_msg_init (&syslog_msg, facility, severity, "NAT", "SADD");
-
- /* Create structured data element */
- syslog_msg_sd_init (&syslog_msg, "nsess");
- /* Add structured data element parameters (name-value pairs) */
- syslog_msg_add_sd_param (&syslog_msg, "SSUBIX", "%d", ssubix);
- syslog_msg_add_sd_param (&syslog_msg, "SVLAN", "%d", svlan);
- syslog_msg_add_sd_param (&syslog_msg, "IATYP", "IPv4");
- syslog_msg_add_sd_param (&syslog_msg, "ISADDR", "%U",
- format_ip4_address, isaddr);
- syslog_msg_add_sd_param (&syslog_msg, "ISPORT", "%d", isport);
- syslog_msg_add_sd_param (&syslog_msg, "XATYP", "IPv4");
- syslog_msg_add_sd_param (&syslog_msg, "XSADDR", "%U",
- format_ip4_address, xsaddr);
- syslog_msg_add_sd_param (&syslog_msg, "XSPORT", "%d", xsport);
- syslog_msg_add_sd_param (&syslog_msg, "PROTO", "%d", proto);
-
- /* Send syslog message */
- syslog_msg_send (&syslog_msg);
-```
-
-Example above produces following syslog message:
- <134>1 2018-11-12T11:25:30.252715Z 172.16.4.1 NAT 5901 SADD [nsess SSUBIX="0" SVLAN="0" IATYP="IPv4" ISADDR="172.16.1.2" ISPORT="6303" XATYP="IPv4" XSADDR="10.0.0.3" XSPORT="16253" PROTO="6"]
-
-To add free-form message use:
-```{.c}
- syslog_msg_add_msg (&syslog_msg, "event log entry");
-```
diff --git a/src/vnet/syslog/sylog_doc.rst b/src/vnet/syslog/sylog_doc.rst
new file mode 100644
index 00000000000..f39c9c490dc
--- /dev/null
+++ b/src/vnet/syslog/sylog_doc.rst
@@ -0,0 +1,70 @@
+.. _syslog_doc:
+
+Syslog protocol support
+=======================
+
+VPP provides `RFC5424 <https://tools.ietf.org/html/rfc5424>`__ syslog
+protocol logging, which is used to transport event messages across
+network. VPP currently supports UDP transport based on
+`RFC5426 <https://tools.ietf.org/html/rfc5426>`__.
+
+The syslog message has the following format: \* header \* structured
+data \* free-form message
+
+The header contains, priority, version, timestamp, hostname,
+application, process id and message id. It is followed by structured
+data, which provides a mechanism to express event data in easily
+parsable format. Structured data can contain zero, one or multiple
+structured data elements. Structured data element contains name-value
+pairs. Structured data can by followed by free-form message.
+
+Following example explains how to use the internal APIs to generate
+syslog message:
+
+.. code:: c
+
+ #include <vnet/syslog/syslog.h>
+
+ ...
+
+ syslog_msg_t syslog_msg;
+
+ /* Check if syslog logging is enabled */
+ if (!syslog_is_enabled ())
+ return;
+
+ /* Severity filer test */
+ if (syslog_severity_filter_block (severity))
+ return;
+
+ /* Initialize syslog message header */
+ syslog_msg_init (&syslog_msg, facility, severity, "NAT", "SADD");
+
+ /* Create structured data element */
+ syslog_msg_sd_init (&syslog_msg, "nsess");
+ /* Add structured data element parameters (name-value pairs) */
+ syslog_msg_add_sd_param (&syslog_msg, "SSUBIX", "%d", ssubix);
+ syslog_msg_add_sd_param (&syslog_msg, "SVLAN", "%d", svlan);
+ syslog_msg_add_sd_param (&syslog_msg, "IATYP", "IPv4");
+ syslog_msg_add_sd_param (&syslog_msg, "ISADDR", "%U",
+ format_ip4_address, isaddr);
+ syslog_msg_add_sd_param (&syslog_msg, "ISPORT", "%d", isport);
+ syslog_msg_add_sd_param (&syslog_msg, "XATYP", "IPv4");
+ syslog_msg_add_sd_param (&syslog_msg, "XSADDR", "%U",
+ format_ip4_address, xsaddr);
+ syslog_msg_add_sd_param (&syslog_msg, "XSPORT", "%d", xsport);
+ syslog_msg_add_sd_param (&syslog_msg, "PROTO", "%d", proto);
+
+ /* Send syslog message */
+ syslog_msg_send (&syslog_msg);
+
+Example above produces following syslog message: <134>1
+2018-11-12T11:25:30.252715Z 172.16.4.1 NAT 5901 SADD [nsess SSUBIX=“0”
+SVLAN=“0” IATYP=“IPv4” ISADDR=“172.16.1.2” ISPORT=“6303” XATYP=“IPv4”
+XSADDR=“10.0.0.3” XSPORT=“16253” PROTO=“6”]
+
+To add free-form message use:
+
+.. code:: c
+
+ syslog_msg_add_msg (&syslog_msg, "event log entry");
diff --git a/src/vnet/syslog/syslog.c b/src/vnet/syslog/syslog.c
index 8f3313950e8..caa55830eb3 100644
--- a/src/vnet/syslog/syslog.c
+++ b/src/vnet/syslog/syslog.c
@@ -506,7 +506,6 @@ show_syslog_filter_command_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
/*?
* Set syslog sender configuration.
*
@@ -599,7 +598,6 @@ VLIB_CLI_COMMAND (show_syslog_filter_command, static) = {
.short_help = "show syslog filter",
.function = show_syslog_filter_command_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
syslog_init (vlib_main_t * vm)
diff --git a/src/vnet/syslog/syslog_api.c b/src/vnet/syslog/syslog_api.c
index 21e79c6e2bd..195a6e52eef 100644
--- a/src/vnet/syslog/syslog_api.c
+++ b/src/vnet/syslog/syslog_api.c
@@ -128,7 +128,6 @@ vl_api_syslog_get_sender_t_handler (vl_api_syslog_get_sender_t * mp)
syslog_main_t *sm = &syslog_main;
u32 vrf_id;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SYSLOG_GET_SENDER_REPLY,
({
clib_memcpy (&rmp->collector_address, &(sm->collector),
@@ -143,7 +142,6 @@ vl_api_syslog_get_sender_t_handler (vl_api_syslog_get_sender_t * mp)
rmp->vrf_id = vrf_id;
rmp->max_msg_size = htonl (sm->max_msg_size);
}))
- /* *INDENT-ON* */
}
static void
@@ -171,12 +169,10 @@ vl_api_syslog_get_filter_t_handler (vl_api_syslog_get_filter_t * mp)
vl_api_syslog_get_filter_reply_t *rmp;
syslog_main_t *sm = &syslog_main;
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_SYSLOG_GET_FILTER_REPLY,
({
rv = syslog_severity_encode (sm->severity_filter, &rmp->severity);
}))
- /* *INDENT-ON* */
}
#include <vnet/syslog/syslog.api.c>
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 4b1dd8e5cf5..efc72a227e8 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -25,6 +25,8 @@
#include <vnet/dpo/load_balance.h>
#include <math.h>
+#include <vlib/stats/stats.h>
+
tcp_main_t tcp_main;
typedef struct
@@ -71,6 +73,10 @@ tcp_add_del_adjacency (tcp_connection_t * tc, u8 is_add)
static void
tcp_cc_init (tcp_connection_t * tc)
{
+ /* As per RFC 6582 initialize "recover" to iss */
+ if (tcp_opts_sack_permitted (&tc->rcv_opts))
+ tc->snd_congestion = tc->iss;
+
tc->cc_algo->init (tc);
}
@@ -108,7 +114,7 @@ tcp_cc_algo_new_type (const tcp_cc_algorithm_t * vft)
}
static u32
-tcp_connection_bind (u32 session_index, transport_endpoint_t * lcl)
+tcp_connection_bind (u32 session_index, transport_endpoint_cfg_t *lcl)
{
tcp_main_t *tm = &tcp_main;
tcp_connection_t *listener;
@@ -143,7 +149,7 @@ tcp_connection_bind (u32 session_index, transport_endpoint_t * lcl)
}
static u32
-tcp_session_bind (u32 session_index, transport_endpoint_t * tep)
+tcp_session_bind (u32 session_index, transport_endpoint_cfg_t *tep)
{
return tcp_connection_bind (session_index, tep);
}
@@ -184,8 +190,7 @@ tcp_session_get_listener (u32 listener_index)
static tcp_connection_t *
tcp_half_open_connection_alloc (void)
{
- ASSERT (vlib_get_thread_index () == 0);
- return tcp_connection_alloc (0);
+ return tcp_connection_alloc (transport_cl_thread ());
}
/**
@@ -195,7 +200,8 @@ tcp_half_open_connection_alloc (void)
static void
tcp_half_open_connection_free (tcp_connection_t * tc)
{
- ASSERT (vlib_get_thread_index () == 0);
+ ASSERT (vlib_get_thread_index () == tc->c_thread_index ||
+ vlib_thread_is_main_w_barrier ());
return tcp_connection_free (tc);
}
@@ -236,8 +242,8 @@ tcp_connection_cleanup (tcp_connection_t * tc)
/* Cleanup local endpoint if this was an active connect */
if (!(tc->cfg_flags & TCP_CFG_F_NO_ENDPOINT))
- transport_endpoint_cleanup (TRANSPORT_PROTO_TCP, &tc->c_lcl_ip,
- tc->c_lcl_port);
+ transport_release_local_endpoint (TRANSPORT_PROTO_TCP, &tc->c_lcl_ip,
+ tc->c_lcl_port);
/* Check if connection is not yet fully established */
if (tc->state == TCP_STATE_SYN_SENT)
@@ -289,7 +295,7 @@ tcp_connection_alloc (u8 thread_index)
tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
tcp_connection_t *tc;
- pool_get (wrk->connections, tc);
+ pool_get_aligned_safe (wrk->connections, tc, CLIB_CACHE_LINE_BYTES);
clib_memset (tc, 0, sizeof (*tc));
tc->c_c_index = tc - wrk->connections;
tc->c_thread_index = thread_index;
@@ -306,12 +312,12 @@ tcp_connection_alloc_w_base (u8 thread_index, tcp_connection_t **base)
if ((*base)->c_thread_index == thread_index)
{
u32 base_index = (*base)->c_c_index;
- pool_get (wrk->connections, tc);
+ pool_get_aligned_safe (wrk->connections, tc, CLIB_CACHE_LINE_BYTES);
*base = tcp_connection_get (base_index, thread_index);
}
else
{
- pool_get (wrk->connections, tc);
+ pool_get_aligned_safe (wrk->connections, tc, CLIB_CACHE_LINE_BYTES);
}
clib_memcpy_fast (tc, *base, sizeof (*tc));
tc->c_c_index = tc - wrk->connections;
@@ -404,8 +410,8 @@ tcp_connection_close (tcp_connection_t * tc)
case TCP_STATE_CLOSE_WAIT:
if (!transport_max_tx_dequeue (&tc->connection))
{
- tcp_send_fin (tc);
tcp_connection_timers_reset (tc);
+ tcp_send_fin (tc);
tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
tcp_cfg.lastack_time);
@@ -485,6 +491,14 @@ tcp_session_reset (u32 conn_index, u32 thread_index)
{
tcp_connection_t *tc;
tc = tcp_connection_get (conn_index, thread_index);
+
+ /* For half-opens just cleanup */
+ if (tc->state == TCP_STATE_SYN_SENT)
+ {
+ tcp_connection_cleanup (tc);
+ return;
+ }
+
tcp_send_reset (tc);
tcp_connection_timers_reset (tc);
tcp_cong_recovery_off (tc);
@@ -702,7 +716,13 @@ tcp_init_snd_vars (tcp_connection_t * tc)
tcp_update_time_now (tcp_get_worker (vlib_get_thread_index ()));
tcp_init_rcv_mss (tc);
- tc->iss = tcp_generate_random_iss (tc);
+ /*
+ * In special case of early-kill of timewait socket, the iss will already
+ * be initialized to ensure it is greater than the last incarnation of the
+ * connection. see syn_during_timewait() for more details.
+ */
+ if (!tc->iss)
+ tc->iss = tcp_generate_random_iss (tc);
tc->snd_una = tc->iss;
tc->snd_nxt = tc->iss + 1;
tc->srtt = 0.1 * THZ; /* 100 ms */
@@ -754,15 +774,18 @@ tcp_connection_init_vars (tcp_connection_t * tc)
}
static int
-tcp_alloc_custom_local_endpoint (tcp_main_t * tm, ip46_address_t * lcl_addr,
- u16 * lcl_port, u8 is_ip4)
+tcp_alloc_custom_local_endpoint (ip46_address_t *lcl_addr, u16 *lcl_port,
+ transport_endpoint_cfg_t *rmt)
{
+ tcp_main_t *tm = vnet_get_tcp_main ();
int index, port;
- if (is_ip4)
+
+ if (rmt->is_ip4)
{
index = tm->last_v4_addr_rotor++;
if (tm->last_v4_addr_rotor >= vec_len (tcp_cfg.ip4_src_addrs))
tm->last_v4_addr_rotor = 0;
+ clib_memset (lcl_addr, 0, sizeof (*lcl_addr));
lcl_addr->ip4.as_u32 = tcp_cfg.ip4_src_addrs[index].as_u32;
}
else
@@ -773,7 +796,7 @@ tcp_alloc_custom_local_endpoint (tcp_main_t * tm, ip46_address_t * lcl_addr,
clib_memcpy_fast (&lcl_addr->ip6, &tcp_cfg.ip6_src_addrs[index],
sizeof (ip6_address_t));
}
- port = transport_alloc_local_port (TRANSPORT_PROTO_TCP, lcl_addr);
+ port = transport_alloc_local_port (TRANSPORT_PROTO_TCP, lcl_addr, rmt);
if (port < 1)
return SESSION_E_NOPORT;
*lcl_port = port;
@@ -783,7 +806,6 @@ tcp_alloc_custom_local_endpoint (tcp_main_t * tm, ip46_address_t * lcl_addr,
static int
tcp_session_open (transport_endpoint_cfg_t * rmt)
{
- tcp_main_t *tm = vnet_get_tcp_main ();
tcp_connection_t *tc;
ip46_address_t lcl_addr;
u16 lcl_port;
@@ -794,27 +816,13 @@ tcp_session_open (transport_endpoint_cfg_t * rmt)
*/
if ((rmt->is_ip4 && vec_len (tcp_cfg.ip4_src_addrs))
|| (!rmt->is_ip4 && vec_len (tcp_cfg.ip6_src_addrs)))
- rv = tcp_alloc_custom_local_endpoint (tm, &lcl_addr, &lcl_port,
- rmt->is_ip4);
+ rv = tcp_alloc_custom_local_endpoint (&lcl_addr, &lcl_port, rmt);
else
- rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_TCP,
- rmt, &lcl_addr, &lcl_port);
+ rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_TCP, rmt, &lcl_addr,
+ &lcl_port);
if (rv)
- {
- if (rv != SESSION_E_PORTINUSE)
- return rv;
-
- if (session_lookup_connection (rmt->fib_index, &lcl_addr, &rmt->ip,
- lcl_port, rmt->port, TRANSPORT_PROTO_TCP,
- rmt->is_ip4))
- return SESSION_E_PORTINUSE;
-
- /* 5-tuple is available so increase lcl endpoint refcount and proceed
- * with connection allocation */
- transport_share_local_endpoint (TRANSPORT_PROTO_TCP, &lcl_addr,
- lcl_port);
- }
+ return rv;
/*
* Create connection and send SYN
@@ -823,7 +831,7 @@ tcp_session_open (transport_endpoint_cfg_t * rmt)
ip_copy (&tc->c_rmt_ip, &rmt->ip, rmt->is_ip4);
ip_copy (&tc->c_lcl_ip, &lcl_addr, rmt->is_ip4);
tc->c_rmt_port = rmt->port;
- tc->c_lcl_port = clib_host_to_net_u16 (lcl_port);
+ tc->c_lcl_port = lcl_port;
tc->c_is_ip4 = rmt->is_ip4;
tc->c_proto = TRANSPORT_PROTO_TCP;
tc->c_fib_index = rmt->fib_index;
@@ -831,6 +839,8 @@ tcp_session_open (transport_endpoint_cfg_t * rmt)
/* The other connection vars will be initialized after SYN ACK */
tcp_connection_timers_init (tc);
tc->mss = rmt->mss;
+ if (rmt->peer.sw_if_index != ENDPOINT_INVALID_INDEX)
+ tc->sw_if_index = rmt->peer.sw_if_index;
tc->next_node_index = rmt->next_node_index;
tc->next_node_opaque = rmt->next_node_opaque;
@@ -1213,7 +1223,6 @@ tcp_timer_waitclose_handler (tcp_connection_t * tc)
}
}
-/* *INDENT-OFF* */
static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] =
{
tcp_timer_retransmit_handler,
@@ -1221,7 +1230,6 @@ static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] =
tcp_timer_waitclose_handler,
tcp_timer_retransmit_syn_handler,
};
-/* *INDENT-ON* */
static void
tcp_dispatch_pending_timers (tcp_worker_ctx_t * wrk)
@@ -1329,7 +1337,6 @@ tcp_session_app_rx_evt (transport_connection_t *conn)
return 0;
}
-/* *INDENT-OFF* */
const static transport_proto_vft_t tcp_proto = {
.enable = vnet_tcp_enable_disable,
.start_listen = tcp_session_bind,
@@ -1360,7 +1367,6 @@ const static transport_proto_vft_t tcp_proto = {
.service_type = TRANSPORT_SERVICE_VC,
},
};
-/* *INDENT-ON* */
void
tcp_connection_tx_pacer_update (tcp_connection_t * tc)
@@ -1429,7 +1435,8 @@ tcp_expired_timers_dispatch (u32 * expired_timers)
clib_fifo_add (wrk->pending_timers, expired_timers, n_expired);
- max_loops = clib_max (1, 0.5 * TCP_TIMER_TICK * wrk->vm->loops_per_second);
+ max_loops =
+ clib_max ((u32) 0.5 * TCP_TIMER_TICK * wrk->vm->loops_per_second, 1);
max_per_loop = clib_max ((n_left + n_expired) / max_loops, 10);
max_per_loop = clib_min (max_per_loop, VLIB_FRAME_SIZE);
wrk->max_timers_per_loop = clib_max (n_left ? wrk->max_timers_per_loop : 0,
@@ -1449,6 +1456,51 @@ tcp_initialize_iss_seed (tcp_main_t * tm)
tm->iss_seed.second = random_u64 (&time_now);
}
+static void
+tcp_stats_collector_fn (vlib_stats_collector_data_t *d)
+{
+ tcp_main_t *tm = vnet_get_tcp_main ();
+ counter_t **counters = d->entry->data;
+ counter_t *cb = counters[0];
+ tcp_wrk_stats_t acc = {};
+ tcp_worker_ctx_t *wrk;
+
+ vec_foreach (wrk, tm->wrk_ctx)
+ {
+#define _(name, type, str) acc.name += wrk->stats.name;
+ foreach_tcp_wrk_stat
+#undef _
+ }
+
+#define _(name, type, str) cb[TCP_STAT_##name] = acc.name;
+ foreach_tcp_wrk_stat
+#undef _
+}
+
+static void
+tcp_counters_init (tcp_main_t *tm)
+{
+ vlib_stats_collector_reg_t r = {};
+ u32 idx;
+
+ if (tm->counters_init)
+ return;
+
+ r.entry_index = idx = vlib_stats_add_counter_vector ("/sys/tcp");
+ r.collect_fn = tcp_stats_collector_fn;
+ vlib_stats_validate (idx, 0, TCP_STAT_no_buffer);
+
+#define _(name, type, str) \
+ vlib_stats_add_symlink (idx, TCP_STAT_##name, "/sys/tcp/%s", \
+ CLIB_STRING_MACRO (name));
+ foreach_tcp_wrk_stat
+#undef _
+
+ vlib_stats_register_collector_fn (&r);
+
+ tm->counters_init = 1;
+}
+
static clib_error_t *
tcp_main_enable (vlib_main_t * vm)
{
@@ -1525,10 +1577,8 @@ tcp_main_enable (vlib_main_t * vm)
tm->bytes_per_buffer = vlib_buffer_get_default_data_size (vm);
tm->cc_last_type = TCP_CC_LAST;
- tm->ipl_next_node[0] = vlib_node_get_next (vm, session_queue_node.index,
- ip4_lookup_node.index);
- tm->ipl_next_node[1] = vlib_node_get_next (vm, session_queue_node.index,
- ip6_lookup_node.index);
+ tcp_counters_init (tm);
+
return error;
}
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index ca650b7fa29..2362a8bb857 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -34,7 +34,7 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
typedef enum _tcp_error
{
-#define tcp_error(n,s) TCP_ERROR_##n,
+#define tcp_error(f, n, s, d) TCP_ERROR_##f,
#include <vnet/tcp/tcp_error.def>
#undef tcp_error
TCP_N_ERROR,
@@ -66,6 +66,13 @@ typedef struct tcp_wrk_stats_
#undef _
} tcp_wrk_stats_t;
+typedef enum
+{
+#define _(name, type, str) TCP_STAT_##name,
+ foreach_tcp_wrk_stat
+#undef _
+} tcp_wrk_stats_e;
+
typedef struct tcp_free_req_
{
clib_time_type_t free_time;
@@ -215,9 +222,6 @@ typedef struct _tcp_main
/** vlib buffer size */
u32 bytes_per_buffer;
- /** Session layer edge indices to ip lookup (syns, rst) */
- u32 ipl_next_node[2];
-
/** Dispatch table by state and flags */
tcp_lookup_dispatch_t dispatch_table[TCP_N_STATES][64];
@@ -236,6 +240,9 @@ typedef struct _tcp_main
/** Flag that indicates if stack is on or off */
u8 is_enabled;
+ /** Set if counters on stats segment initialized */
+ u8 counters_init;
+
/** Flag that indicates if v4 punting is enabled */
u8 punt_unknown4;
@@ -268,6 +275,10 @@ extern vlib_node_registration_t tcp4_rcv_process_node;
extern vlib_node_registration_t tcp6_rcv_process_node;
extern vlib_node_registration_t tcp4_listen_node;
extern vlib_node_registration_t tcp6_listen_node;
+extern vlib_node_registration_t tcp4_input_nolookup_node;
+extern vlib_node_registration_t tcp6_input_nolookup_node;
+extern vlib_node_registration_t tcp4_drop_node;
+extern vlib_node_registration_t tcp6_drop_node;
#define tcp_cfg tcp_main.cfg
#define tcp_node_index(node_id, is_ip4) \
@@ -313,8 +324,8 @@ u32 tcp_snd_space (tcp_connection_t * tc);
int tcp_fastrecovery_prr_snd_space (tcp_connection_t * tc);
void tcp_reschedule (tcp_connection_t * tc);
fib_node_index_t tcp_lookup_rmt_in_fib (tcp_connection_t * tc);
-u32 tcp_session_push_header (transport_connection_t * tconn,
- vlib_buffer_t * b);
+u32 tcp_session_push_header (transport_connection_t *tconn, vlib_buffer_t **b,
+ u32 n_bufs);
int tcp_session_custom_tx (void *conn, transport_send_params_t * sp);
void tcp_connection_timers_init (tcp_connection_t * tc);
@@ -327,6 +338,7 @@ void tcp_connection_tx_pacer_reset (tcp_connection_t * tc, u32 window,
void tcp_program_cleanup (tcp_worker_ctx_t * wrk, tcp_connection_t * tc);
void tcp_check_gso (tcp_connection_t *tc);
+int tcp_buffer_make_reset (vlib_main_t *vm, vlib_buffer_t *b, u8 is_ip4);
void tcp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
int tcp_configure_v4_source_address_range (vlib_main_t * vm,
ip4_address_t * start,
diff --git a/src/vnet/tcp/tcp_bt.c b/src/vnet/tcp/tcp_bt.c
index 67e9a14ceda..3cb57a550de 100644
--- a/src/vnet/tcp/tcp_bt.c
+++ b/src/vnet/tcp/tcp_bt.c
@@ -638,11 +638,9 @@ tcp_bt_flush_samples (tcp_connection_t * tc)
vec_validate (samples, pool_elts (bt->samples) - 1);
vec_reset_length (samples);
- /* *INDENT-OFF* */
pool_foreach (bts, bt->samples) {
vec_add1 (samples, bts - bt->samples);
}
- /* *INDENT-ON* */
vec_foreach (si, samples)
{
diff --git a/src/vnet/tcp/tcp_cli.c b/src/vnet/tcp/tcp_cli.c
index e602f114a74..b04c0bdc0cf 100644
--- a/src/vnet/tcp/tcp_cli.c
+++ b/src/vnet/tcp/tcp_cli.c
@@ -208,8 +208,9 @@ format_tcp_vars (u8 * s, va_list * args)
tc->mrtt_us * 1e3, tc->rttvar / 1000.0);
s = format (s, " rtt_ts %.4f rtt_seq %u\n", tc->rtt_ts,
tc->rtt_seq - tc->iss);
- s = format (s, " next_node %u opaque 0x%x fib_index %u\n",
- tc->next_node_index, tc->next_node_opaque, tc->c_fib_index);
+ s = format (s, " next_node %u opaque 0x%x fib_index %u sw_if_index %d\n",
+ tc->next_node_index, tc->next_node_opaque, tc->c_fib_index,
+ tc->sw_if_index);
s = format (s, " cong: %U", format_tcp_congestion, tc);
if (tc->state >= TCP_STATE_ESTABLISHED)
@@ -410,6 +411,8 @@ tcp_configure_v4_source_address_range (vlib_main_t * vm,
return VNET_API_ERROR_NEXT_HOP_NOT_IN_FIB;
sw_if_index = fib_entry_get_resolving_interface (fei);
+ if (sw_if_index == (u32) ~0)
+ return VNET_API_ERROR_NO_MATCHING_INTERFACE;
/* Configure proxy arp across the range */
rv = ip4_neighbor_proxy_add (fib_index, start, end);
@@ -430,7 +433,7 @@ tcp_configure_v4_source_address_range (vlib_main_t * vm,
/* Add local adjacencies for the range */
- receive_dpo_add_or_lock (DPO_PROTO_IP4, ~0 /* sw_if_index */ ,
+ receive_dpo_add_or_lock (DPO_PROTO_IP4, sw_if_index /* sw_if_index */,
NULL, &dpo);
prefix.fp_len = 32;
prefix.fp_proto = FIB_PROTOCOL_IP4;
@@ -505,7 +508,7 @@ tcp_configure_v6_source_address_range (vlib_main_t * vm,
ip6_neighbor_proxy_add (sw_if_index, start);
/* Add a receive adjacency for this address */
- receive_dpo_add_or_lock (DPO_PROTO_IP6, ~0 /* sw_if_index */ ,
+ receive_dpo_add_or_lock (DPO_PROTO_IP6, sw_if_index /* sw_if_index */,
NULL, &dpo);
fib_table_entry_special_dpo_update (fib_index,
@@ -610,14 +613,12 @@ tcp_src_address_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tcp_src_address_command, static) =
{
.path = "tcp src-address",
.short_help = "tcp src-address <ip-addr> [- <ip-addr>] add src address range",
.function = tcp_src_address_fn,
};
-/* *INDENT-ON* */
static u8 *
tcp_scoreboard_dump_trace (u8 * s, sack_scoreboard_t * sb)
@@ -673,14 +674,12 @@ tcp_show_scoreboard_trace_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tcp_show_scoreboard_trace_command, static) =
{
.path = "show tcp scoreboard trace",
.short_help = "show tcp scoreboard trace <connection>",
.function = tcp_show_scoreboard_trace_fn,
};
-/* *INDENT-ON* */
u8 *
tcp_scoreboard_replay (u8 * s, tcp_connection_t * tc, u8 verbose)
@@ -798,14 +797,12 @@ tcp_scoreboard_trace_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tcp_replay_scoreboard_command, static) =
{
.path = "tcp replay scoreboard",
.short_help = "tcp replay scoreboard <connection>",
.function = tcp_scoreboard_trace_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_tcp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -821,14 +818,12 @@ show_tcp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
tm->punt_unknown6 ? "enabled" : "disabled");
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_tcp_punt_command, static) =
{
.path = "show tcp punt",
.short_help = "show tcp punt",
.function = show_tcp_punt_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
show_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -860,14 +855,12 @@ show_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_tcp_stats_command, static) =
{
.path = "show tcp stats",
.short_help = "show tcp stats",
.function = show_tcp_stats_fn,
};
-/* *INDENT-ON* */
static clib_error_t *
clear_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input,
@@ -890,14 +883,12 @@ clear_tcp_stats_fn (vlib_main_t * vm, unformat_input_t * input,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (clear_tcp_stats_command, static) =
{
.path = "clear tcp stats",
.short_help = "clear tcp stats",
.function = clear_tcp_stats_fn,
};
-/* *INDENT-ON* */
uword
unformat_tcp_cc_algo (unformat_input_t * input, va_list * va)
diff --git a/src/vnet/tcp/tcp_cubic.c b/src/vnet/tcp/tcp_cubic.c
index cc2ffeae9f0..cf2b9a17d18 100644
--- a/src/vnet/tcp/tcp_cubic.c
+++ b/src/vnet/tcp/tcp_cubic.c
@@ -141,7 +141,7 @@ cubic_cwnd_accumulate (tcp_connection_t * tc, u32 thresh, u32 bytes_acked)
tc->cwnd_acc_bytes = 0;
}
- tcp_cwnd_accumulate (tc, thresh, tc->bytes_acked);
+ tcp_cwnd_accumulate (tc, thresh, bytes_acked);
}
static void
@@ -158,7 +158,7 @@ cubic_rcv_ack (tcp_connection_t * tc, tcp_rate_sample_t * rs)
if (tcp_in_slowstart (tc))
{
- tc->cwnd += tc->bytes_acked;
+ tc->cwnd += rs->delivered;
return;
}
@@ -169,7 +169,7 @@ cubic_rcv_ack (tcp_connection_t * tc, tcp_rate_sample_t * rs)
w_aimd = (u64) W_est (cd, t, rtt_sec) * tc->snd_mss;
if (w_cubic < w_aimd)
{
- cubic_cwnd_accumulate (tc, tc->cwnd, tc->bytes_acked);
+ cubic_cwnd_accumulate (tc, tc->cwnd, rs->delivered);
}
else
{
@@ -195,7 +195,7 @@ cubic_rcv_ack (tcp_connection_t * tc, tcp_rate_sample_t * rs)
/* Practically we can't increment so just inflate threshold */
thresh = 50 * tc->cwnd;
}
- cubic_cwnd_accumulate (tc, thresh, tc->bytes_acked);
+ cubic_cwnd_accumulate (tc, thresh, rs->delivered);
}
}
@@ -232,6 +232,23 @@ cubic_unformat_config (unformat_input_t * input)
return 1;
}
+void
+cubic_event (tcp_connection_t *tc, tcp_cc_event_t evt)
+{
+ cubic_data_t *cd;
+ f64 now;
+
+ if (evt != TCP_CC_EVT_START_TX)
+ return;
+
+ /* App was idle so update t_start to avoid artificially
+ * inflating cwnd if nothing recently sent and acked */
+ cd = (cubic_data_t *) tcp_cc_data (tc);
+ now = cubic_time (tc->c_thread_index);
+ if (now > tc->mrtt_us + 1)
+ cd->t_start = now;
+}
+
const static tcp_cc_algorithm_t tcp_cubic = {
.name = "cubic",
.unformat_cfg = cubic_unformat_config,
@@ -240,6 +257,7 @@ const static tcp_cc_algorithm_t tcp_cubic = {
.recovered = cubic_recovered,
.rcv_ack = cubic_rcv_ack,
.rcv_cong_ack = newreno_rcv_cong_ack,
+ .event = cubic_event,
.init = cubic_conn_init,
};
diff --git a/src/vnet/tcp/tcp_debug.c b/src/vnet/tcp/tcp_debug.c
index e3d7452b591..ab466f30efb 100644
--- a/src/vnet/tcp/tcp_debug.c
+++ b/src/vnet/tcp/tcp_debug.c
@@ -26,7 +26,7 @@ tcp_evt_track_register (elog_track_t * et)
if (fl_len)
{
track_index = tdm->free_track_indices[fl_len - 1];
- _vec_len (tdm->free_track_indices) -= 1;
+ vec_dec_len (tdm->free_track_indices, 1);
et->track_index_plus_one = track_index + 1;
}
else
@@ -134,14 +134,12 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (tcp_debug_command, static) =
{
.path = "tcp debug",
.short_help = "tcp [show] [debug group <N> level <N>]",
.function = tcp_debug_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
index 1202f7f44d3..04e921cd601 100644
--- a/src/vnet/tcp/tcp_debug.h
+++ b/src/vnet/tcp/tcp_debug.h
@@ -17,13 +17,18 @@
#define SRC_VNET_TCP_TCP_DEBUG_H_
#include <vlib/vlib.h>
+#include <vpp/vnet/config.h>
/**
* Build debugging infra unconditionally. Debug components controlled via
* debug configuration. Comes with some overhead so it's not recommended for
* production/performance scenarios. Takes priority over TCP_DEBUG_ENABLE.
*/
+#ifdef VPP_TCP_DEBUG_ALWAYS
+#define TCP_DEBUG_ALWAYS (1)
+#else
#define TCP_DEBUG_ALWAYS (0)
+#endif
/**
* Build debugging infra only if enabled. Debug components controlled via
* macros that follow.
@@ -867,11 +872,12 @@ if (TCP_DEBUG_CC > 1) \
*/
#if TCP_DEBUG_CS || TCP_DEBUG_ALWAYS
-#define STATS_INTERVAL 1
+#define STATS_INTERVAL 0.001
-#define tcp_cc_time_to_print_stats(_tc) \
- _tc->c_cc_stat_tstamp + STATS_INTERVAL < tcp_time_now() \
- || tcp_in_fastrecovery (_tc) \
+#define tcp_cc_time_to_print_stats(_tc) \
+ _tc->c_cc_stat_tstamp + STATS_INTERVAL < \
+ tcp_time_now_us (_tc->c_thread_index) || \
+ tcp_in_fastrecovery (_tc)
#define TCP_EVT_CC_RTO_STAT_PRINT(_tc) \
{ \
@@ -887,14 +893,14 @@ if (TCP_DEBUG_CC > 1) \
ed->data[3] = _tc->rttvar; \
}
-#define TCP_EVT_CC_RTO_STAT_HANDLER(_tc, ...) \
-{ \
-if (tcp_cc_time_to_print_stats (_tc)) \
-{ \
- TCP_EVT_CC_RTO_STAT_PRINT (_tc); \
- _tc->c_cc_stat_tstamp = tcp_time_now (); \
-} \
-}
+#define TCP_EVT_CC_RTO_STAT_HANDLER(_tc, ...) \
+ { \
+ if (tcp_cc_time_to_print_stats (_tc)) \
+ { \
+ TCP_EVT_CC_RTO_STAT_PRINT (_tc); \
+ _tc->c_cc_stat_tstamp = tcp_time_now_us (_tc->c_thread_index); \
+ } \
+ }
#define TCP_EVT_CC_SND_STAT_PRINT(_tc) \
{ \
@@ -911,14 +917,14 @@ if (tcp_cc_time_to_print_stats (_tc)) \
ed->data[3] = _tc->snd_rxt_bytes; \
}
-#define TCP_EVT_CC_SND_STAT_HANDLER(_tc, ...) \
-{ \
-if (tcp_cc_time_to_print_stats (_tc)) \
-{ \
- TCP_EVT_CC_SND_STAT_PRINT(_tc); \
- _tc->c_cc_stat_tstamp = tcp_time_now (); \
-} \
-}
+#define TCP_EVT_CC_SND_STAT_HANDLER(_tc, ...) \
+ { \
+ if (tcp_cc_time_to_print_stats (_tc)) \
+ { \
+ TCP_EVT_CC_SND_STAT_PRINT (_tc); \
+ _tc->c_cc_stat_tstamp = tcp_time_now_us (_tc->c_thread_index); \
+ } \
+ }
#define TCP_EVT_CC_STAT_PRINT(_tc) \
{ \
@@ -937,14 +943,14 @@ if (tcp_cc_time_to_print_stats (_tc)) \
TCP_EVT_CC_SND_STAT_PRINT (_tc); \
}
-#define TCP_EVT_CC_STAT_HANDLER(_tc, ...) \
-{ \
-if (tcp_cc_time_to_print_stats (_tc)) \
-{ \
- TCP_EVT_CC_STAT_PRINT (_tc); \
- _tc->c_cc_stat_tstamp = tcp_time_now(); \
-} \
-}
+#define TCP_EVT_CC_STAT_HANDLER(_tc, ...) \
+ { \
+ if (tcp_cc_time_to_print_stats (_tc)) \
+ { \
+ TCP_EVT_CC_STAT_PRINT (_tc); \
+ _tc->c_cc_stat_tstamp = tcp_time_now_us (_tc->c_thread_index); \
+ } \
+ }
#else
#define TCP_EVT_CC_STAT_HANDLER(_tc, ...)
#define TCP_EVT_CC_STAT_PRINT(_tc)
diff --git a/src/vnet/tcp/tcp_error.def b/src/vnet/tcp/tcp_error.def
index d51e7ebf756..87fdcc02615 100644
--- a/src/vnet/tcp/tcp_error.def
+++ b/src/vnet/tcp/tcp_error.def
@@ -12,40 +12,41 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-tcp_error (NONE, "no error")
-tcp_error (WRONG_THREAD, "Wrong worker thread")
-tcp_error (FILTERED, "Packets filtered")
-tcp_error (LENGTH, "inconsistent ip/tcp lengths")
-tcp_error (NO_LISTENER, "no listener for dst port")
-tcp_error (LOOKUP_DROPS, "lookup drops")
-tcp_error (DISPATCH, "Dispatch error")
-tcp_error (ENQUEUED, "Packets pushed into rx fifo")
-tcp_error (ENQUEUED_OOO, "OOO packets pushed into rx fifo")
-tcp_error (FIFO_FULL, "Packets dropped for lack of rx fifo space")
-tcp_error (PARTIALLY_ENQUEUED, "Packets partially pushed into rx fifo")
-tcp_error (SEGMENT_OLD, "Old segment")
-tcp_error (SEGMENT_INVALID, "Invalid segments")
-tcp_error (SYNS_RCVD, "SYNs received")
-tcp_error (SPURIOUS_SYN, "Spurious SYNs received")
-tcp_error (SYN_ACKS_RCVD, "SYN-ACKs received")
-tcp_error (SPURIOUS_SYN_ACK, "Spurious SYN-ACKs received")
-tcp_error (MSG_QUEUE_FULL, "Events not sent for lack of msg queue space")
-tcp_error (CREATE_SESSION_FAIL, "Sessions couldn't be allocated")
-tcp_error (ACK_OK, "Pure ACKs received")
-tcp_error (ACK_INVALID, "Invalid ACK")
-tcp_error (ACK_DUP, "Duplicate ACK")
-tcp_error (ACK_OLD, "Old ACK")
-tcp_error (ACK_FUTURE, "Future ACK")
-tcp_error (PKTS_SENT, "Packets sent")
-tcp_error (RST_SENT, "Resets sent")
-tcp_error (RST_RCVD, "Resets received")
-tcp_error (INVALID_CONNECTION, "Invalid connection")
-tcp_error (CONNECTION_CLOSED, "Connection closed")
-tcp_error (CREATE_EXISTS, "Connection already exists")
-tcp_error (PUNT, "Packets punted")
-tcp_error (OPTIONS, "Could not parse options")
-tcp_error (PAWS, "PAWS check failed")
-tcp_error (RCV_WND, "Segment not in receive window")
-tcp_error (FIN_RCVD, "FINs received")
-tcp_error (LINK_LOCAL_RW, "No rewrite for link local connection")
-tcp_error (ZERO_RWND, "Zero receive window") \ No newline at end of file
+tcp_error (NONE, none, INFO, "no error")
+tcp_error (WRONG_THREAD, wrong_thread, ERROR, "Wrong worker thread")
+tcp_error (FILTERED, filtered, INFO, "Packets filtered")
+tcp_error (LENGTH, length, ERROR, "inconsistent ip/tcp lengths")
+tcp_error (NO_LISTENER, no_listener, ERROR, "no listener for dst port")
+tcp_error (LOOKUP_DROPS, lookup_drops, ERROR, "lookup drops")
+tcp_error (DISPATCH, dispatch, ERROR, "Dispatch error")
+tcp_error (ENQUEUED, enqueued, INFO, "Packets pushed into rx fifo")
+tcp_error (ENQUEUED_OOO, enqueued_ooo, WARN, "OOO packets pushed into rx fifo")
+tcp_error (FIFO_FULL, fifo_full, ERROR, "Packets dropped for lack of rx fifo space")
+tcp_error (PARTIALLY_ENQUEUED, partially_enqueued, WARN, "Packets partially pushed into rx fifo")
+tcp_error (SEGMENT_OLD, segment_old, WARN, "Old segment")
+tcp_error (SEGMENT_INVALID, segment_invalid, ERROR, "Invalid segments")
+tcp_error (SYNS_RCVD, syns_rcvd, INFO, "SYNs received")
+tcp_error (SPURIOUS_SYN, spurious_syn, WARN, "Spurious SYNs received")
+tcp_error (SYN_ACKS_RCVD, syn_acks_rcvd, INFO, "SYN-ACKs received")
+tcp_error (SPURIOUS_SYN_ACK, spurious_syn_ack, WARN, "Spurious SYN-ACKs received")
+tcp_error (MSG_QUEUE_FULL, msg_queue_full, ERROR, "Events not sent for lack of msg queue space")
+tcp_error (CREATE_SESSION_FAIL, create_session_fail, ERROR, "Sessions couldn't be allocated")
+tcp_error (ACK_OK, ack_ok, INFO, "Pure ACKs received")
+tcp_error (ACK_INVALID, ack_invalid, ERROR, "Invalid ACK")
+tcp_error (ACK_DUP, ack_dup, WARN, "Duplicate ACK")
+tcp_error (ACK_OLD, ack_old, WARN, "Old ACK")
+tcp_error (ACK_FUTURE, ack_future, WARN, "Future ACK")
+tcp_error (PKTS_SENT, pkts_sent, INFO, "Packets sent")
+tcp_error (RST_SENT, rst_sent, INFO, "Resets sent")
+tcp_error (RST_RCVD, rst_rcvd, INFO, "Resets received")
+tcp_error (INVALID_CONNECTION, invalid_connection, ERROR, "Invalid connection")
+tcp_error (CONNECTION_CLOSED, connection_closed, WARN, "Connection closed")
+tcp_error (CREATE_EXISTS, create_exists, ERROR, "Connection already exists")
+tcp_error (PUNT, punt, INFO, "Packets punted")
+tcp_error (OPTIONS, options, ERROR, "Could not parse options")
+tcp_error (PAWS, paws, ERROR, "PAWS check failed")
+tcp_error (RCV_WND, rcv_wnd, WARN, "Segment not in receive window")
+tcp_error (FIN_RCVD, fin_rcvd, INFO, "FINs received")
+tcp_error (LINK_LOCAL_RW, link_local_rw, ERROR, "No rewrite for link local connection")
+tcp_error (ZERO_RWND, zero_rwnd, WARN, "Zero receive window")
+tcp_error (CONN_ACCEPTED, conn_accepted, INFO, "Connections accepted") \ No newline at end of file
diff --git a/src/vnet/tcp/tcp_format.c b/src/vnet/tcp/tcp_format.c
index a3245f2046a..4674f2cbaed 100644
--- a/src/vnet/tcp/tcp_format.c
+++ b/src/vnet/tcp/tcp_format.c
@@ -52,12 +52,68 @@ format_tcp_flags (u8 * s, va_list * args)
return s;
}
+u8 *
+format_tcp_options (u8 *s, va_list *args)
+{
+ tcp_options_t *opts = va_arg (*args, tcp_options_t *);
+ u32 indent, n_opts = 0;
+ int i;
+
+ if (!opts->flags)
+ return s;
+
+ indent = format_get_indent (s);
+ indent += 2;
+
+ s = format (s, "options:\n%U", format_white_space, indent);
+
+ if (tcp_opts_mss (opts))
+ {
+ s = format (s, "mss %d", opts->mss);
+ n_opts++;
+ }
+ if (tcp_opts_wscale (opts))
+ {
+ s = format (s, "%swindow scale %d", n_opts > 0 ? ", " : "",
+ format_white_space, indent, opts->wscale);
+ n_opts++;
+ }
+ if (tcp_opts_tstamp (opts))
+ {
+ s = format (s, "%stimestamp %d, echo/reflected timestamp",
+ n_opts > 0 ? ", " : "", format_white_space, indent,
+ opts->tsval, opts->tsecr);
+ n_opts++;
+ }
+ if (tcp_opts_sack_permitted (opts))
+ {
+ s = format (s, "%ssack permitted", n_opts > 0 ? ", " : "",
+ format_white_space, indent);
+ n_opts++;
+ }
+ if (tcp_opts_sack (opts))
+ {
+ s = format (s, "%ssacks:", n_opts > 0 ? ", " : "", format_white_space,
+ indent);
+ for (i = 0; i < opts->n_sack_blocks; ++i)
+ {
+ s = format (s, "\n%Ublock %d: start %d, end %d", format_white_space,
+ indent + 2, i + 1, opts->sacks[i].start,
+ opts->sacks[i].end);
+ }
+ n_opts++;
+ }
+
+ return s;
+}
+
/* Format TCP header. */
u8 *
format_tcp_header (u8 * s, va_list * args)
{
tcp_header_t *tcp = va_arg (*args, tcp_header_t *);
u32 max_header_bytes = va_arg (*args, u32);
+ tcp_options_t opts = { .flags = 0 };
u32 header_bytes;
u32 indent;
@@ -83,32 +139,13 @@ format_tcp_header (u8 * s, va_list * args)
clib_net_to_host_u16 (tcp->window),
clib_net_to_host_u16 (tcp->checksum));
-
-#if 0
- /* Format TCP options. */
- {
- u8 *o;
- u8 *option_start = (void *) (tcp + 1);
- u8 *option_end = (void *) tcp + header_bytes;
-
- for (o = option_start; o < option_end;)
- {
- u32 length = o[1];
- switch (o[0])
- {
- case TCP_OPTION_END:
- length = 1;
- o = option_end;
- break;
-
- case TCP_OPTION_NOOP:
- length = 1;
- break;
-
- }
- }
- }
-#endif
+ if (header_bytes > max_header_bytes)
+ s = format (s, "\n%Uoptions: truncated", format_white_space, indent);
+ else if (tcp_options_parse (tcp, &opts, tcp_is_syn (tcp)) < 0)
+ s = format (s, "\n%Uoptions: parsing failed", format_white_space, indent);
+ else
+ s = format (s, "\n%U%U", format_white_space, indent, format_tcp_options,
+ &opts);
/* Recurse into next protocol layer. */
if (max_header_bytes != 0 && header_bytes < max_header_bytes)
diff --git a/src/vnet/tcp/tcp_inlines.h b/src/vnet/tcp/tcp_inlines.h
index dfdf801d0ab..ccd0e3fe3ee 100644
--- a/src/vnet/tcp/tcp_inlines.h
+++ b/src/vnet/tcp/tcp_inlines.h
@@ -18,6 +18,35 @@
#include <vnet/tcp/tcp.h>
+always_inline void
+tcp_node_inc_counter_i (vlib_main_t *vm, u32 tcp4_node, u32 tcp6_node,
+ u8 is_ip4, u32 evt, u32 val)
+{
+ if (is_ip4)
+ vlib_node_increment_counter (vm, tcp4_node, evt, val);
+ else
+ vlib_node_increment_counter (vm, tcp6_node, evt, val);
+}
+
+#define tcp_inc_counter(node_id, err, count) \
+ tcp_node_inc_counter_i (vm, tcp4_##node_id##_node.index, \
+ tcp6_##node_id##_node.index, is_ip4, err, count)
+#define tcp_maybe_inc_err_counter(cnts, err) \
+ { \
+ cnts[err] += (next0 != tcp_next_drop (is_ip4)); \
+ }
+#define tcp_inc_err_counter(cnts, err, val) \
+ { \
+ cnts[err] += val; \
+ }
+#define tcp_store_err_counters(node_id, cnts) \
+ { \
+ int i; \
+ for (i = 0; i < TCP_N_ERROR; i++) \
+ if (cnts[i]) \
+ tcp_inc_counter (node_id, i, cnts[i]); \
+ }
+
always_inline tcp_header_t *
tcp_buffer_hdr (vlib_buffer_t * b)
{
@@ -66,7 +95,7 @@ tcp_listener_get (u32 tli)
always_inline tcp_connection_t *
tcp_half_open_connection_get (u32 conn_index)
{
- return tcp_connection_get (conn_index, 0);
+ return tcp_connection_get (conn_index, transport_cl_thread ());
}
/**
@@ -293,7 +322,7 @@ tcp_input_lookup_buffer (vlib_buffer_t * b, u8 thread_index, u32 * error,
{
ip6_main_t *im = &ip6_main;
fib_index = vec_elt (im->fib_index_by_sw_if_index,
- vnet_buffer (b)->sw_if_index[VLIB_RX]);
+ vnet_buffer (b)->ip.rx_sw_if_index);
}
tc = session_lookup_connection_wt6 (fib_index, &ip6->dst_address,
@@ -304,6 +333,10 @@ tcp_input_lookup_buffer (vlib_buffer_t * b, u8 thread_index, u32 * error,
}
}
+ /* Set the sw_if_index[VLIB_RX] to the interface we received
+ * the connection on (the local interface) */
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->ip.rx_sw_if_index;
+
if (is_nolookup)
tc =
(transport_connection_t *) tcp_connection_get (vnet_buffer (b)->
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index 1e27b7dcb2e..70b5d28e0cc 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -21,65 +21,23 @@
#include <vnet/session/session.h>
#include <math.h>
-static char *tcp_error_strings[] = {
-#define tcp_error(n,s) s,
+static vlib_error_desc_t tcp_input_error_counters[] = {
+#define tcp_error(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
#include <vnet/tcp/tcp_error.def>
#undef tcp_error
};
-/* All TCP nodes have the same outgoing arcs */
-#define foreach_tcp_state_next \
- _ (DROP4, "ip4-drop") \
- _ (DROP6, "ip6-drop") \
- _ (TCP4_OUTPUT, "tcp4-output") \
- _ (TCP6_OUTPUT, "tcp6-output")
-
-typedef enum _tcp_established_next
-{
-#define _(s,n) TCP_ESTABLISHED_NEXT_##s,
- foreach_tcp_state_next
-#undef _
- TCP_ESTABLISHED_N_NEXT,
-} tcp_established_next_t;
-
-typedef enum _tcp_rcv_process_next
-{
-#define _(s,n) TCP_RCV_PROCESS_NEXT_##s,
- foreach_tcp_state_next
-#undef _
- TCP_RCV_PROCESS_N_NEXT,
-} tcp_rcv_process_next_t;
-
-typedef enum _tcp_syn_sent_next
-{
-#define _(s,n) TCP_SYN_SENT_NEXT_##s,
- foreach_tcp_state_next
-#undef _
- TCP_SYN_SENT_N_NEXT,
-} tcp_syn_sent_next_t;
-
-typedef enum _tcp_listen_next
-{
-#define _(s,n) TCP_LISTEN_NEXT_##s,
- foreach_tcp_state_next
-#undef _
- TCP_LISTEN_N_NEXT,
-} tcp_listen_next_t;
-
-/* Generic, state independent indices */
-typedef enum _tcp_state_next
+typedef enum _tcp_input_next
{
-#define _(s,n) TCP_NEXT_##s,
- foreach_tcp_state_next
-#undef _
- TCP_STATE_N_NEXT,
-} tcp_state_next_t;
-
-#define tcp_next_output(is_ip4) (is_ip4 ? TCP_NEXT_TCP4_OUTPUT \
- : TCP_NEXT_TCP6_OUTPUT)
-
-#define tcp_next_drop(is_ip4) (is_ip4 ? TCP_NEXT_DROP4 \
- : TCP_NEXT_DROP6)
+ TCP_INPUT_NEXT_DROP,
+ TCP_INPUT_NEXT_LISTEN,
+ TCP_INPUT_NEXT_RCV_PROCESS,
+ TCP_INPUT_NEXT_SYN_SENT,
+ TCP_INPUT_NEXT_ESTABLISHED,
+ TCP_INPUT_NEXT_RESET,
+ TCP_INPUT_NEXT_PUNT,
+ TCP_INPUT_N_NEXT
+} tcp_input_next_t;
/**
* Validate segment sequence number. As per RFC793:
@@ -404,17 +362,10 @@ tcp_rcv_ack_no_cc (tcp_connection_t * tc, vlib_buffer_t * b, u32 * error)
if (!(seq_leq (tc->snd_una, vnet_buffer (b)->tcp.ack_number)
&& seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)))
{
- if (seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)
- && seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_una))
- {
- tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
- goto acceptable;
- }
*error = TCP_ERROR_ACK_INVALID;
return -1;
}
-acceptable:
tc->bytes_acked = vnet_buffer (b)->tcp.ack_number - tc->snd_una;
tc->snd_una = vnet_buffer (b)->tcp.ack_number;
*error = TCP_ERROR_ACK_OK;
@@ -594,7 +545,7 @@ tcp_handle_postponed_dequeues (tcp_worker_ctx_t * wrk)
tc->burst_acked = 0;
}
- _vec_len (wrk->pending_deq_acked) = 0;
+ vec_set_len (wrk->pending_deq_acked, 0);
}
static void
@@ -629,11 +580,15 @@ tcp_update_snd_wnd (tcp_connection_t * tc, u32 seq, u32 ack, u32 snd_wnd)
if (PREDICT_FALSE (tc->snd_wnd < tc->snd_mss))
{
- /* Set persist timer if not set and we just got 0 wnd */
- if (!tcp_timer_is_active (tc, TCP_TIMER_PERSIST)
- && !tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT))
+ if (!tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT))
{
tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
+
+ /* Set persist timer if we just got 0 wnd. If already set,
+ * update it because some data sent with snd_wnd < snd_mss was
+ * acked. */
+ if (tcp_timer_is_active (tc, TCP_TIMER_PERSIST))
+ tcp_persist_timer_reset (&wrk->timer_wheel, tc);
tcp_persist_timer_set (&wrk->timer_wheel, tc);
}
}
@@ -742,7 +697,7 @@ tcp_should_fastrecover (tcp_connection_t * tc, u8 has_sack)
}
static int
-tcp_cc_recover (tcp_connection_t * tc)
+tcp_cc_try_recover (tcp_connection_t *tc)
{
sack_scoreboard_hole_t *hole;
u8 is_spurious = 0;
@@ -757,14 +712,14 @@ tcp_cc_recover (tcp_connection_t * tc)
tcp_connection_tx_pacer_reset (tc, tc->cwnd, 0 /* start bucket */ );
tc->rcv_dupacks = 0;
+ tcp_recovery_off (tc);
/* Previous recovery left us congested. Continue sending as part
* of the current recovery event with an updated snd_congestion */
- if (tc->sack_sb.sacked_bytes)
+ if (tc->sack_sb.sacked_bytes && tcp_in_fastrecovery (tc))
{
tc->snd_congestion = tc->snd_nxt;
- tcp_program_retransmit (tc);
- return is_spurious;
+ return -1;
}
tc->rxt_delivered = 0;
@@ -778,19 +733,18 @@ tcp_cc_recover (tcp_connection_t * tc)
if (hole && hole->start == tc->snd_una && hole->end == tc->snd_nxt)
scoreboard_clear (&tc->sack_sb);
- if (!tcp_in_recovery (tc) && !is_spurious)
+ if (tcp_in_fastrecovery (tc) && !is_spurious)
tcp_cc_recovered (tc);
tcp_fastrecovery_off (tc);
tcp_fastrecovery_first_off (tc);
- tcp_recovery_off (tc);
TCP_EVT (TCP_EVT_CC_EVT, tc, 3);
ASSERT (tc->rto_boff == 0);
ASSERT (!tcp_in_cong_recovery (tc));
ASSERT (tcp_scoreboard_is_sane_post_recovery (tc));
- return is_spurious;
+ return 0;
}
static void
@@ -803,15 +757,6 @@ tcp_cc_update (tcp_connection_t * tc, tcp_rate_sample_t * rs)
/* If a cumulative ack, make sure dupacks is 0 */
tc->rcv_dupacks = 0;
-
- /* When dupacks hits the threshold we only enter fast retransmit if
- * cumulative ack covers more than snd_congestion. Should snd_una
- * wrap this test may fail under otherwise valid circumstances.
- * Therefore, proactively update snd_congestion when wrap detected. */
- if (PREDICT_FALSE
- (seq_leq (tc->snd_congestion, tc->snd_una - tc->bytes_acked)
- && seq_gt (tc->snd_congestion, tc->snd_una)))
- tc->snd_congestion = tc->snd_una - 1;
}
/**
@@ -857,6 +802,20 @@ tcp_cc_handle_event (tcp_connection_t * tc, tcp_rate_sample_t * rs,
*/
/*
+ * See if we can exit and stop retransmitting
+ */
+ if (seq_geq (tc->snd_una, tc->snd_congestion))
+ {
+ /* If successfully recovered, treat ack as congestion avoidance ack
+ * and return. Otherwise, we're still congested so process feedback */
+ if (!tcp_cc_try_recover (tc))
+ {
+ tcp_cc_rcv_ack (tc, rs);
+ return;
+ }
+ }
+
+ /*
* Process (re)transmit feedback. Output path uses this to decide how much
* more data to release into the network
*/
@@ -866,8 +825,7 @@ tcp_cc_handle_event (tcp_connection_t * tc, tcp_rate_sample_t * rs,
tcp_fastrecovery_first_on (tc);
tc->rxt_delivered += tc->sack_sb.rxt_sacked;
- tc->prr_delivered += tc->bytes_acked + tc->sack_sb.last_sacked_bytes
- - tc->sack_sb.last_bytes_delivered;
+ tc->prr_delivered += rs->delivered;
}
else
{
@@ -891,23 +849,6 @@ tcp_cc_handle_event (tcp_connection_t * tc, tcp_rate_sample_t * rs,
tcp_fastrecovery_first_on (tc);
}
- /*
- * See if we can exit and stop retransmitting
- */
- if (seq_geq (tc->snd_una, tc->snd_congestion))
- {
- /* If spurious return, we've already updated everything */
- if (tcp_cc_recover (tc))
- {
- tc->tsecr_last_ack = tc->rcv_opts.tsecr;
- return;
- }
-
- /* Treat as congestion avoidance ack */
- tcp_cc_rcv_ack (tc, rs);
- return;
- }
-
tcp_program_retransmit (tc);
/*
@@ -991,15 +932,6 @@ tcp_rcv_ack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b,
/* If the ACK acks something not yet sent (SEG.ACK > SND.NXT) */
if (PREDICT_FALSE (seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)))
{
- /* We've probably entered recovery and the peer still has some
- * of the data we've sent. Update snd_nxt and accept the ack */
- if (seq_leq (vnet_buffer (b)->tcp.ack_number, tc->snd_nxt)
- && seq_gt (vnet_buffer (b)->tcp.ack_number, tc->snd_una))
- {
- tc->snd_nxt = vnet_buffer (b)->tcp.ack_number;
- goto process_ack;
- }
-
tc->errors.above_ack_wnd += 1;
*error = TCP_ERROR_ACK_FUTURE;
TCP_EVT (TCP_EVT_ACK_RCV_ERR, tc, 0, vnet_buffer (b)->tcp.ack_number);
@@ -1022,8 +954,6 @@ tcp_rcv_ack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b,
return 0;
}
-process_ack:
-
/*
* Looks okay, process feedback
*/
@@ -1042,6 +972,9 @@ process_ack:
if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
tcp_bt_sample_delivery_rate (tc, &rs);
+ else
+ rs.delivered = tc->bytes_acked + tc->sack_sb.last_sacked_bytes -
+ tc->sack_sb.last_bytes_delivered;
if (tc->bytes_acked + tc->sack_sb.last_sacked_bytes)
{
@@ -1106,7 +1039,7 @@ tcp_handle_disconnects (tcp_worker_ctx_t * wrk)
tcp_disconnect_pending_off (tc);
session_transport_closing_notify (&tc->connection);
}
- _vec_len (wrk->pending_disconnects) = 0;
+ vec_set_len (wrk->pending_disconnects, 0);
}
if (vec_len (wrk->pending_resets))
@@ -1119,7 +1052,7 @@ tcp_handle_disconnects (tcp_worker_ctx_t * wrk)
tcp_disconnect_pending_off (tc);
tcp_handle_rst (tc);
}
- _vec_len (wrk->pending_resets) = 0;
+ vec_set_len (wrk->pending_resets, 0);
}
}
@@ -1156,7 +1089,6 @@ tcp_session_enqueue_data (tcp_connection_t * tc, vlib_buffer_t * b,
ASSERT (data_len);
written = session_enqueue_stream_connection (&tc->connection, b, 0,
1 /* queue event */ , 1);
- tc->bytes_in += written;
TCP_EVT (TCP_EVT_INPUT, tc, 0, data_len, written);
@@ -1164,17 +1096,20 @@ tcp_session_enqueue_data (tcp_connection_t * tc, vlib_buffer_t * b,
if (PREDICT_TRUE (written == data_len))
{
tc->rcv_nxt += written;
+ tc->bytes_in += written;
}
/* If more data written than expected, account for out-of-order bytes. */
else if (written > data_len)
{
tc->rcv_nxt += written;
+ tc->bytes_in += data_len;
TCP_EVT (TCP_EVT_CC_INPUT, tc, data_len, written);
}
else if (written > 0)
{
/* We've written something but FIFO is probably full now */
tc->rcv_nxt += written;
+ tc->bytes_in += written;
error = TCP_ERROR_PARTIALLY_ENQUEUED;
}
else
@@ -1361,9 +1296,13 @@ format_tcp_rx_trace (u8 * s, va_list * args)
tcp_connection_t *tc = &t->tcp_connection;
u32 indent = format_get_indent (s);
- s = format (s, "%U state %U\n%U%U", format_tcp_connection_id, tc,
- format_tcp_state, tc->state, format_white_space, indent,
- format_tcp_header, &t->tcp_header, 128);
+ if (!tc->c_lcl_port)
+ s = format (s, "no tcp connection\n%U%U", format_white_space, indent,
+ format_tcp_header, &t->tcp_header, 128);
+ else
+ s = format (s, "%U state %U\n%U%U", format_tcp_connection_id, tc,
+ format_tcp_state, tc->state, format_white_space, indent,
+ format_tcp_header, &t->tcp_header, 128);
return s;
}
@@ -1433,53 +1372,14 @@ tcp_established_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
}
}
-always_inline void
-tcp_node_inc_counter_i (vlib_main_t * vm, u32 tcp4_node, u32 tcp6_node,
- u8 is_ip4, u32 evt, u32 val)
-{
- if (is_ip4)
- vlib_node_increment_counter (vm, tcp4_node, evt, val);
- else
- vlib_node_increment_counter (vm, tcp6_node, evt, val);
-}
-
-#define tcp_maybe_inc_counter(node_id, err, count) \
-{ \
- if (next0 != tcp_next_drop (is_ip4)) \
- tcp_node_inc_counter_i (vm, tcp4_##node_id##_node.index, \
- tcp6_##node_id##_node.index, is_ip4, err, \
- 1); \
-}
-#define tcp_inc_counter(node_id, err, count) \
- tcp_node_inc_counter_i (vm, tcp4_##node_id##_node.index, \
- tcp6_##node_id##_node.index, is_ip4, \
- err, count)
-#define tcp_maybe_inc_err_counter(cnts, err) \
-{ \
- cnts[err] += (next0 != tcp_next_drop (is_ip4)); \
-}
-#define tcp_inc_err_counter(cnts, err, val) \
-{ \
- cnts[err] += val; \
-}
-#define tcp_store_err_counters(node_id, cnts) \
-{ \
- int i; \
- for (i = 0; i < TCP_N_ERROR; i++) \
- if (cnts[i]) \
- tcp_inc_counter(node_id, i, cnts[i]); \
-}
-
-
always_inline uword
tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame, int is_ip4)
{
- u32 thread_index = vm->thread_index, errors = 0;
+ u32 thread_index = vm->thread_index, n_left_from, *from;
tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 err_counters[TCP_N_ERROR] = { 0 };
- u32 n_left_from, *from;
if (node->flags & VLIB_NODE_FLAG_TRACE)
tcp_established_trace_frame (vm, node, frame, is_ip4);
@@ -1543,9 +1443,7 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
b += 1;
}
- errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP,
- thread_index);
- err_counters[TCP_ERROR_MSG_QUEUE_FULL] = errors;
+ session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, thread_index);
tcp_store_err_counters (established, err_counters);
tcp_handle_postponed_dequeues (wrk);
tcp_handle_disconnects (wrk);
@@ -1568,43 +1466,23 @@ VLIB_NODE_FN (tcp6_established_node) (vlib_main_t * vm,
return tcp46_established_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (tcp4_established_node) =
-{
+VLIB_REGISTER_NODE (tcp4_established_node) = {
.name = "tcp4-established",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
- .error_strings = tcp_error_strings,
- .n_next_nodes = TCP_ESTABLISHED_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_ESTABLISHED_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
+ .error_counters = tcp_input_error_counters,
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (tcp6_established_node) =
-{
+VLIB_REGISTER_NODE (tcp6_established_node) = {
.name = "tcp6-established",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
- .error_strings = tcp_error_strings,
- .n_next_nodes = TCP_ESTABLISHED_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_ESTABLISHED_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
+ .error_counters = tcp_input_error_counters,
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
static u8
@@ -1796,15 +1674,54 @@ tcp_check_tx_offload (tcp_connection_t * tc, int is_ipv4)
return;
hw_if = vnet_get_sup_hw_interface (vnm, sw_if_idx);
- if (hw_if->caps & VNET_HW_INTERFACE_CAP_SUPPORTS_TCP_GSO)
+ if (hw_if->caps & VNET_HW_IF_CAP_TCP_GSO)
tc->cfg_flags |= TCP_CFG_F_TSO;
}
+static void
+tcp_input_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t **bs, u16 *nexts, u32 n_bufs, u8 is_ip4)
+{
+ tcp_connection_t *tc;
+ tcp_header_t *tcp;
+ tcp_rx_trace_t *t;
+ u8 flags;
+ int i;
+
+ for (i = 0; i < n_bufs; i++)
+ {
+ if (!(bs[i]->flags & VLIB_BUFFER_IS_TRACED))
+ continue;
+
+ t = vlib_add_trace (vm, node, bs[i], sizeof (*t));
+ if (nexts[i] == TCP_INPUT_NEXT_DROP || nexts[i] == TCP_INPUT_NEXT_PUNT ||
+ nexts[i] == TCP_INPUT_NEXT_RESET)
+ {
+ tc = 0;
+ }
+ else
+ {
+ flags = vnet_buffer (bs[i])->tcp.flags;
+
+ if (flags == TCP_STATE_LISTEN)
+ tc = tcp_listener_get (vnet_buffer (bs[i])->tcp.connection_index);
+ else if (flags == TCP_STATE_SYN_SENT)
+ tc = tcp_half_open_connection_get (
+ vnet_buffer (bs[i])->tcp.connection_index);
+ else
+ tc = tcp_connection_get (vnet_buffer (bs[i])->tcp.connection_index,
+ vm->thread_index);
+ }
+ tcp = tcp_buffer_hdr (bs[i]);
+ tcp_set_rx_trace_data (t, tc, tcp, bs[i], is_ip4);
+ }
+}
+
always_inline uword
tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame, int is_ip4)
{
- u32 n_left_from, *from, thread_index = vm->thread_index, errors = 0;
+ u32 n_left_from, *from, thread_index = vm->thread_index;
tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
@@ -1933,7 +1850,6 @@ tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
new_tc->rcv_nxt = vnet_buffer (b[0])->tcp.seq_end;
new_tc->irs = seq;
new_tc->timers[TCP_TIMER_RETRANSMIT_SYN] = TCP_TIMER_HANDLE_INVALID;
- new_tc->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
if (tcp_opts_tstamp (&new_tc->rcv_opts))
{
@@ -1971,7 +1887,9 @@ tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
SESSION_E_NONE))
{
tcp_send_reset_w_pkt (new_tc, b[0], thread_index, is_ip4);
- tcp_connection_cleanup (new_tc);
+ tcp_program_cleanup (wrk, new_tc);
+ new_tc->state = TCP_STATE_CLOSED;
+ new_tc->c_s_index = ~0;
error = TCP_ERROR_CREATE_SESSION_FAIL;
goto cleanup_ho;
}
@@ -1992,8 +1910,10 @@ tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (session_stream_connect_notify (&new_tc->connection,
SESSION_E_NONE))
{
- tcp_connection_cleanup (new_tc);
tcp_send_reset_w_pkt (tc, b[0], thread_index, is_ip4);
+ tcp_program_cleanup (wrk, new_tc);
+ new_tc->state = TCP_STATE_CLOSED;
+ new_tc->c_s_index = ~0;
TCP_EVT (TCP_EVT_RST_SENT, tc);
error = TCP_ERROR_CREATE_SESSION_FAIL;
goto cleanup_ho;
@@ -2040,9 +1960,7 @@ tcp46_syn_sent_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp_inc_counter (syn_sent, error, 1);
}
- errors =
- session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, thread_index);
- tcp_inc_counter (syn_sent, TCP_ERROR_MSG_QUEUE_FULL, errors);
+ session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, thread_index);
vlib_buffer_free (vm, from, frame->n_vectors);
tcp_handle_disconnects (wrk);
@@ -2063,43 +1981,25 @@ VLIB_NODE_FN (tcp6_syn_sent_node) (vlib_main_t * vm,
return tcp46_syn_sent_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp4_syn_sent_node) =
{
.name = "tcp4-syn-sent",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
- .error_strings = tcp_error_strings,
- .n_next_nodes = TCP_SYN_SENT_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
+ .error_counters = tcp_input_error_counters,
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp6_syn_sent_node) =
{
.name = "tcp6-syn-sent",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
- .error_strings = tcp_error_strings,
- .n_next_nodes = TCP_SYN_SENT_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_SYN_SENT_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
+ .error_counters = tcp_input_error_counters,
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
static void
tcp46_rcv_process_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
@@ -2131,7 +2031,7 @@ always_inline uword
tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
vlib_frame_t *frame, int is_ip4)
{
- u32 thread_index = vm->thread_index, errors, n_left_from, *from, max_deq;
+ u32 thread_index = vm->thread_index, n_left_from, *from, max_deq;
tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
@@ -2199,15 +2099,6 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
switch (tc->state)
{
case TCP_STATE_SYN_RCVD:
-
- /* Make sure the segment is exactly right */
- if (tc->rcv_nxt != vnet_buffer (b[0])->tcp.seq_number || is_fin)
- {
- tcp_send_reset_w_pkt (tc, b[0], thread_index, is_ip4);
- error = TCP_ERROR_SEGMENT_INVALID;
- goto drop;
- }
-
/*
* If the segment acknowledgment is not acceptable, form a
* reset segment,
@@ -2221,6 +2112,10 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
goto drop;
}
+ /* Avoid notifying app if connection is about to be closed */
+ if (PREDICT_FALSE (is_fin))
+ break;
+
/* Update rtt and rto */
tcp_estimate_initial_rtt (tc);
tcp_connection_tx_pacer_update (tc);
@@ -2249,7 +2144,7 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp_connection_cleanup (tc);
goto drop;
}
- error = TCP_ERROR_ACK_OK;
+ error = TCP_ERROR_CONN_ACCEPTED;
break;
case TCP_STATE_ESTABLISHED:
/* We can get packets in established state here because they
@@ -2328,8 +2223,8 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (max_deq > tc->burst_acked)
break;
- tcp_send_fin (tc);
tcp_connection_timers_reset (tc);
+ tcp_send_fin (tc);
tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
tcp_timer_set (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
tcp_cfg.lastack_time);
@@ -2441,15 +2336,15 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp_cfg.closewait_time);
break;
case TCP_STATE_SYN_RCVD:
- /* Send FIN-ACK, enter LAST-ACK and because the app was not
- * notified yet, set a cleanup timer instead of relying on
- * disconnect notify and the implicit close call. */
+ /* Send FIN-ACK and enter TIME-WAIT, as opposed to LAST-ACK,
+ * because the app was not notified yet and we want to avoid
+ * session state transitions to ensure cleanup does not
+ * propagate to app. */
tcp_connection_timers_reset (tc);
tc->rcv_nxt += 1;
tcp_send_fin (tc);
- tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
- tcp_timer_set (&wrk->timer_wheel, tc, TCP_TIMER_WAITCLOSE,
- tcp_cfg.lastack_time);
+ tcp_connection_set_state (tc, TCP_STATE_TIME_WAIT);
+ tcp_program_cleanup (wrk, tc);
break;
case TCP_STATE_CLOSE_WAIT:
case TCP_STATE_CLOSING:
@@ -2504,9 +2399,7 @@ tcp46_rcv_process_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp_inc_counter (rcv_process, error, 1);
}
- errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP,
- thread_index);
- tcp_inc_counter (rcv_process, TCP_ERROR_MSG_QUEUE_FULL, errors);
+ session_main_flush_enqueue_events (TRANSPORT_PROTO_TCP, thread_index);
tcp_handle_postponed_dequeues (wrk);
tcp_handle_disconnects (wrk);
vlib_buffer_free (vm, from, frame->n_vectors);
@@ -2528,43 +2421,23 @@ VLIB_NODE_FN (tcp6_rcv_process_node) (vlib_main_t * vm,
return tcp46_rcv_process_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (tcp4_rcv_process_node) =
-{
+VLIB_REGISTER_NODE (tcp4_rcv_process_node) = {
.name = "tcp4-rcv-process",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
- .error_strings = tcp_error_strings,
- .n_next_nodes = TCP_RCV_PROCESS_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_RCV_PROCESS_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
+ .error_counters = tcp_input_error_counters,
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (tcp6_rcv_process_node) =
-{
+VLIB_REGISTER_NODE (tcp6_rcv_process_node) = {
.name = "tcp6-rcv-process",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
- .error_strings = tcp_error_strings,
- .n_next_nodes = TCP_RCV_PROCESS_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_RCV_PROCESS_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
+ .error_counters = tcp_input_error_counters,
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
static void
tcp46_listen_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
@@ -2588,6 +2461,61 @@ tcp46_listen_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
}
/**
+ * SYN received in TIME-WAIT state.
+ *
+ * RFC 1122:
+ * "When a connection is [...] on TIME-WAIT state [...]
+ * [a TCP] MAY accept a new SYN from the remote TCP to
+ * reopen the connection directly, if it:
+ *
+ * (1) assigns its initial sequence number for the new
+ * connection to be larger than the largest sequence
+ * number it used on the previous connection incarnation,
+ * and
+ *
+ * (2) returns to TIME-WAIT state if the SYN turns out
+ * to be an old duplicate".
+ *
+ * The function returns true if the syn can be accepted during
+ * connection time-wait (port reuse). In this case the function
+ * also calculates what the iss should be for the new connection.
+ */
+always_inline int
+syn_during_timewait (tcp_connection_t *tc, vlib_buffer_t *b, u32 *iss)
+{
+ int paws_reject = tcp_segment_check_paws (tc);
+ u32 tw_iss;
+
+ *iss = 0;
+ /* Check that the SYN arrived out of window. We accept it */
+ if (!paws_reject &&
+ (seq_geq (vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt) ||
+ (tcp_opts_tstamp (&tc->rcv_opts) &&
+ timestamp_lt (tc->tsval_recent, tc->rcv_opts.tsval))))
+ {
+ /* Set the iss of the new connection to be the largest sequence number
+ * the old peer would have accepted and add some random number
+ */
+ tw_iss = tc->snd_nxt + tcp_available_snd_wnd (tc) +
+ (uword) (tcp_time_now_us (tc->c_thread_index) * 1e6) % 65535;
+ if (tw_iss == 0)
+ tw_iss++;
+ *iss = tw_iss;
+
+ return 1;
+ }
+ else
+ {
+ TCP_DBG (
+ "ERROR not accepting SYN in timewait,paws_reject=%d, seq_num =%ld, "
+ "rcv_nxt=%ld, tstamp_present=%d, tsval_recent = %d, tsval = %d\n",
+ paws_reject, vnet_buffer (b)->tcp.seq_number, tc->rcv_nxt,
+ tcp_opts_tstamp (&tc->rcv_opts), tc->tsval_recent, tc->rcv_opts.tsval);
+ return 0;
+ }
+}
+
+/**
* LISTEN state processing as per RFC 793 p. 65
*/
always_inline uword
@@ -2597,6 +2525,7 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
u32 n_left_from, *from, n_syns = 0;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u32 thread_index = vm->thread_index;
+ u32 tw_iss = 0;
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -2609,7 +2538,6 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
while (n_left_from > 0)
{
- u32 error = TCP_ERROR_NONE;
tcp_connection_t *lc, *child;
/* Flags initialized with connection state after lookup */
@@ -2617,6 +2545,7 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
{
lc = tcp_listener_get (vnet_buffer (b[0])->tcp.connection_index);
}
+ /* Probably we are in time-wait or closed state */
else
{
tcp_connection_t *tc;
@@ -2624,16 +2553,24 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
thread_index);
if (tc->state != TCP_STATE_TIME_WAIT)
{
- error = TCP_ERROR_CREATE_EXISTS;
+ tcp_inc_counter (listen, TCP_ERROR_CREATE_EXISTS, 1);
+ goto done;
+ }
+
+ if (PREDICT_FALSE (!syn_during_timewait (tc, b[0], &tw_iss)))
+ {
+ /* This SYN can't be accepted */
+ tcp_inc_counter (listen, TCP_ERROR_CREATE_EXISTS, 1);
goto done;
}
+
lc = tcp_lookup_listener (b[0], tc->c_fib_index, is_ip4);
/* clean up the old session */
tcp_connection_del (tc);
/* listener was cleaned up */
if (!lc)
{
- error = TCP_ERROR_NO_LISTENER;
+ tcp_inc_counter (listen, TCP_ERROR_NO_LISTENER, 1);
goto done;
}
}
@@ -2643,7 +2580,7 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
tcp_lookup_connection (lc->c_fib_index, b[0], thread_index, is_ip4);
if (PREDICT_FALSE (child->state != TCP_STATE_LISTEN))
{
- error = TCP_ERROR_CREATE_EXISTS;
+ tcp_inc_counter (listen, TCP_ERROR_CREATE_EXISTS, 1);
goto done;
}
@@ -2660,7 +2597,7 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
if (tcp_options_parse (tcp_buffer_hdr (b[0]), &child->rcv_opts, 1))
{
- error = TCP_ERROR_OPTIONS;
+ tcp_inc_counter (listen, TCP_ERROR_OPTIONS, 1);
tcp_connection_free (child);
goto done;
}
@@ -2670,6 +2607,12 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
child->state = TCP_STATE_SYN_RCVD;
child->c_fib_index = lc->c_fib_index;
child->cc_algo = lc->cc_algo;
+
+ /* In the regular case, the tw_iss will be zero, but
+ * in the special case of syn arriving in time_wait state, the value
+ * will be set according to rfc 1122
+ */
+ child->iss = tw_iss;
tcp_connection_init_vars (child);
child->rto = TCP_RTO_MIN;
@@ -2684,7 +2627,7 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
lc->c_thread_index, 0 /* notify */ ))
{
tcp_connection_cleanup (child);
- error = TCP_ERROR_CREATE_SESSION_FAIL;
+ tcp_inc_counter (listen, TCP_ERROR_CREATE_SESSION_FAIL, 1);
goto done;
}
@@ -2692,12 +2635,11 @@ tcp46_listen_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
child->tx_fifo_size = transport_tx_fifo_size (&child->connection);
tcp_send_synack (child);
+ n_syns += 1;
done:
-
b += 1;
n_left_from -= 1;
- n_syns += (error == TCP_ERROR_NONE);
}
tcp_inc_counter (listen, TCP_ERROR_SYNS_RCVD, n_syns);
@@ -2718,98 +2660,82 @@ VLIB_NODE_FN (tcp6_listen_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return tcp46_listen_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (tcp4_listen_node) =
-{
+VLIB_REGISTER_NODE (tcp4_listen_node) = {
.name = "tcp4-listen",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
- .error_strings = tcp_error_strings,
- .n_next_nodes = TCP_LISTEN_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_LISTEN_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
+ .error_counters = tcp_input_error_counters,
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (tcp6_listen_node) =
-{
+VLIB_REGISTER_NODE (tcp6_listen_node) = {
.name = "tcp6-listen",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
- .error_strings = tcp_error_strings,
- .n_next_nodes = TCP_LISTEN_N_NEXT,
- .next_nodes =
- {
-#define _(s,n) [TCP_LISTEN_NEXT_##s] = n,
- foreach_tcp_state_next
-#undef _
- },
+ .error_counters = tcp_input_error_counters,
.format_trace = format_tcp_rx_trace_short,
};
-/* *INDENT-ON* */
-typedef enum _tcp_input_next
+always_inline uword
+tcp46_drop_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, int is_ip4)
{
- TCP_INPUT_NEXT_DROP,
- TCP_INPUT_NEXT_LISTEN,
- TCP_INPUT_NEXT_RCV_PROCESS,
- TCP_INPUT_NEXT_SYN_SENT,
- TCP_INPUT_NEXT_ESTABLISHED,
- TCP_INPUT_NEXT_RESET,
- TCP_INPUT_NEXT_PUNT,
- TCP_INPUT_N_NEXT
-} tcp_input_next_t;
-
-#define foreach_tcp4_input_next \
- _ (DROP, "ip4-drop") \
- _ (LISTEN, "tcp4-listen") \
- _ (RCV_PROCESS, "tcp4-rcv-process") \
- _ (SYN_SENT, "tcp4-syn-sent") \
- _ (ESTABLISHED, "tcp4-established") \
- _ (RESET, "tcp4-reset") \
- _ (PUNT, "ip4-punt")
+ u32 *from = vlib_frame_vector_args (frame);
-#define foreach_tcp6_input_next \
- _ (DROP, "ip6-drop") \
- _ (LISTEN, "tcp6-listen") \
- _ (RCV_PROCESS, "tcp6-rcv-process") \
- _ (SYN_SENT, "tcp6-syn-sent") \
- _ (ESTABLISHED, "tcp6-established") \
- _ (RESET, "tcp6-reset") \
- _ (PUNT, "ip6-punt")
+ /* Error counters must be incremented by previous nodes */
+ vlib_buffer_free (vm, from, frame->n_vectors);
-#define filter_flags (TCP_FLAG_SYN|TCP_FLAG_ACK|TCP_FLAG_RST|TCP_FLAG_FIN)
+ return frame->n_vectors;
+}
-static void
-tcp_input_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_buffer_t ** bs, u32 n_bufs, u8 is_ip4)
+VLIB_NODE_FN (tcp4_drop_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
{
- tcp_connection_t *tc;
- tcp_header_t *tcp;
- tcp_rx_trace_t *t;
- int i;
+ return tcp46_drop_inline (vm, node, from_frame, 1 /* is_ip4 */);
+}
- for (i = 0; i < n_bufs; i++)
- {
- if (bs[i]->flags & VLIB_BUFFER_IS_TRACED)
- {
- t = vlib_add_trace (vm, node, bs[i], sizeof (*t));
- tc = tcp_connection_get (vnet_buffer (bs[i])->tcp.connection_index,
- vm->thread_index);
- tcp = vlib_buffer_get_current (bs[i]);
- tcp_set_rx_trace_data (t, tc, tcp, bs[i], is_ip4);
- }
- }
+VLIB_NODE_FN (tcp6_drop_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return tcp46_drop_inline (vm, node, from_frame, 0 /* is_ip4 */);
}
+VLIB_REGISTER_NODE (tcp4_drop_node) = {
+ .name = "tcp4-drop",
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_counters = tcp_input_error_counters,
+};
+
+VLIB_REGISTER_NODE (tcp6_drop_node) = {
+ .name = "tcp6-drop",
+ .vector_size = sizeof (u32),
+ .n_errors = TCP_N_ERROR,
+ .error_counters = tcp_input_error_counters,
+};
+
+#define foreach_tcp4_input_next \
+ _ (DROP, "tcp4-drop") \
+ _ (LISTEN, "tcp4-listen") \
+ _ (RCV_PROCESS, "tcp4-rcv-process") \
+ _ (SYN_SENT, "tcp4-syn-sent") \
+ _ (ESTABLISHED, "tcp4-established") \
+ _ (RESET, "tcp4-reset") \
+ _ (PUNT, "ip4-punt")
+
+#define foreach_tcp6_input_next \
+ _ (DROP, "tcp6-drop") \
+ _ (LISTEN, "tcp6-listen") \
+ _ (RCV_PROCESS, "tcp6-rcv-process") \
+ _ (SYN_SENT, "tcp6-syn-sent") \
+ _ (ESTABLISHED, "tcp6-established") \
+ _ (RESET, "tcp6-reset") \
+ _ (PUNT, "ip6-punt")
+
+#define filter_flags (TCP_FLAG_SYN|TCP_FLAG_ACK|TCP_FLAG_RST|TCP_FLAG_FIN)
+
static void
tcp_input_set_error_next (tcp_main_t * tm, u16 * next, u32 * error, u8 is_ip4)
{
@@ -2830,9 +2756,8 @@ tcp_input_set_error_next (tcp_main_t * tm, u16 * next, u32 * error, u8 is_ip4)
}
static inline void
-tcp_input_dispatch_buffer (tcp_main_t * tm, tcp_connection_t * tc,
- vlib_buffer_t * b, u16 * next,
- vlib_node_runtime_t * error_node)
+tcp_input_dispatch_buffer (tcp_main_t *tm, tcp_connection_t *tc,
+ vlib_buffer_t *b, u16 *next, u16 *err_counters)
{
tcp_header_t *tcp;
u32 error;
@@ -2844,13 +2769,17 @@ tcp_input_dispatch_buffer (tcp_main_t * tm, tcp_connection_t * tc,
error = tm->dispatch_table[tc->state][flags].error;
tc->segs_in += 1;
- /* Track connection state when packet was received. It helps
- * @ref tcp46_listen_inline detect port reuse */
+ /* Track connection state when packet was received. It is required
+ * for @ref tcp46_listen_inline to detect whether we reached
+ * the node as a result of a SYN packet received while in time-wait
+ * state. In this case the connection_index in vnet buffer will point
+ * to the existing tcp connection and not the listener
+ */
vnet_buffer (b)->tcp.flags = tc->state;
if (PREDICT_FALSE (error != TCP_ERROR_NONE))
{
- b->error = error_node->errors[error];
+ tcp_inc_err_counter (err_counters, error, 1);
if (error == TCP_ERROR_DISPATCH)
clib_warning ("tcp conn %u disp error state %U flags %U",
tc->c_c_index, format_tcp_state, tc->state,
@@ -2866,6 +2795,7 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tcp_main_t *tm = vnet_get_tcp_main ();
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 nexts[VLIB_FRAME_SIZE], *next;
+ u16 err_counters[TCP_N_ERROR] = { 0 };
tcp_update_time_now (tcp_get_worker (thread_index));
@@ -2904,8 +2834,8 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vnet_buffer (b[0])->tcp.connection_index = tc0->c_c_index;
vnet_buffer (b[1])->tcp.connection_index = tc1->c_c_index;
- tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], node);
- tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1], node);
+ tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], err_counters);
+ tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1], err_counters);
}
else
{
@@ -2913,24 +2843,26 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
{
ASSERT (tcp_lookup_is_valid (tc0, b[0], tcp_buffer_hdr (b[0])));
vnet_buffer (b[0])->tcp.connection_index = tc0->c_c_index;
- tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], node);
+ tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0],
+ err_counters);
}
else
{
tcp_input_set_error_next (tm, &next[0], &error0, is_ip4);
- b[0]->error = node->errors[error0];
+ tcp_inc_err_counter (err_counters, error0, 1);
}
if (PREDICT_TRUE (tc1 != 0))
{
ASSERT (tcp_lookup_is_valid (tc1, b[1], tcp_buffer_hdr (b[1])));
vnet_buffer (b[1])->tcp.connection_index = tc1->c_c_index;
- tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1], node);
+ tcp_input_dispatch_buffer (tm, tc1, b[1], &next[1],
+ err_counters);
}
else
{
tcp_input_set_error_next (tm, &next[1], &error1, is_ip4);
- b[1]->error = node->errors[error1];
+ tcp_inc_err_counter (err_counters, error1, 1);
}
}
@@ -2956,12 +2888,12 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
{
ASSERT (tcp_lookup_is_valid (tc0, b[0], tcp_buffer_hdr (b[0])));
vnet_buffer (b[0])->tcp.connection_index = tc0->c_c_index;
- tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], node);
+ tcp_input_dispatch_buffer (tm, tc0, b[0], &next[0], err_counters);
}
else
{
tcp_input_set_error_next (tm, &next[0], &error0, is_ip4);
- b[0]->error = node->errors[error0];
+ tcp_inc_err_counter (err_counters, error0, 1);
}
b += 1;
@@ -2970,8 +2902,9 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
- tcp_input_trace_frame (vm, node, bufs, frame->n_vectors, is_ip4);
+ tcp_input_trace_frame (vm, node, bufs, nexts, frame->n_vectors, is_ip4);
+ tcp_store_err_counters (input, err_counters);
vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
return frame->n_vectors;
}
@@ -2992,14 +2925,13 @@ VLIB_NODE_FN (tcp6_input_nolookup_node) (vlib_main_t * vm,
1 /* is_nolookup */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp4_input_nolookup_node) =
{
.name = "tcp4-input-nolookup",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
- .error_strings = tcp_error_strings,
+ .error_counters = tcp_input_error_counters,
.n_next_nodes = TCP_INPUT_N_NEXT,
.next_nodes =
{
@@ -3010,16 +2942,14 @@ VLIB_REGISTER_NODE (tcp4_input_nolookup_node) =
.format_buffer = format_tcp_header,
.format_trace = format_tcp_rx_trace,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp6_input_nolookup_node) =
{
.name = "tcp6-input-nolookup",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
- .error_strings = tcp_error_strings,
+ .error_counters = tcp_input_error_counters,
.n_next_nodes = TCP_INPUT_N_NEXT,
.next_nodes =
{
@@ -3030,7 +2960,6 @@ VLIB_REGISTER_NODE (tcp6_input_nolookup_node) =
.format_buffer = format_tcp_header,
.format_trace = format_tcp_rx_trace,
};
-/* *INDENT-ON* */
VLIB_NODE_FN (tcp4_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * from_frame)
@@ -3046,14 +2975,13 @@ VLIB_NODE_FN (tcp6_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
0 /* is_nolookup */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp4_input_node) =
{
.name = "tcp4-input",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
- .error_strings = tcp_error_strings,
+ .error_counters = tcp_input_error_counters,
.n_next_nodes = TCP_INPUT_N_NEXT,
.next_nodes =
{
@@ -3064,16 +2992,14 @@ VLIB_REGISTER_NODE (tcp4_input_node) =
.format_buffer = format_tcp_header,
.format_trace = format_tcp_rx_trace,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp6_input_node) =
{
.name = "tcp6-input",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
- .error_strings = tcp_error_strings,
+ .error_counters = tcp_input_error_counters,
.n_next_nodes = TCP_INPUT_N_NEXT,
.next_nodes =
{
@@ -3084,7 +3010,6 @@ VLIB_REGISTER_NODE (tcp6_input_node) =
.format_buffer = format_tcp_header,
.format_trace = format_tcp_rx_trace,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
void
@@ -3270,6 +3195,8 @@ do { \
_(FIN_WAIT_2, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
TCP_ERROR_NONE);
_(FIN_WAIT_2, TCP_FLAG_SYN, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
+ _ (FIN_WAIT_2, TCP_FLAG_SYN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
+ TCP_ERROR_NONE);
_(CLOSE_WAIT, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE);
_(CLOSE_WAIT, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS,
TCP_ERROR_NONE);
@@ -3319,7 +3246,7 @@ do { \
_(CLOSED, TCP_FLAG_RST | TCP_FLAG_ACK, TCP_INPUT_NEXT_DROP,
TCP_ERROR_CONNECTION_CLOSED);
_(CLOSED, TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET, TCP_ERROR_CONNECTION_CLOSED);
- _(CLOSED, TCP_FLAG_SYN, TCP_INPUT_NEXT_RESET, TCP_ERROR_CONNECTION_CLOSED);
+ _ (CLOSED, TCP_FLAG_SYN, TCP_INPUT_NEXT_LISTEN, TCP_ERROR_NONE);
_(CLOSED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_RESET,
TCP_ERROR_CONNECTION_CLOSED);
#undef _
diff --git a/src/vnet/tcp/tcp_newreno.c b/src/vnet/tcp/tcp_newreno.c
index c5ffc2a4109..8c7e77fc974 100644
--- a/src/vnet/tcp/tcp_newreno.c
+++ b/src/vnet/tcp/tcp_newreno.c
@@ -49,12 +49,12 @@ newreno_rcv_ack (tcp_connection_t * tc, tcp_rate_sample_t * rs)
{
if (tcp_in_slowstart (tc))
{
- tc->cwnd += clib_min (tc->snd_mss, tc->bytes_acked);
+ tc->cwnd += clib_min (tc->snd_mss, rs->delivered);
}
else
{
/* tc->cwnd += clib_max ((tc->snd_mss * tc->snd_mss) / tc->cwnd, 1); */
- tcp_cwnd_accumulate (tc, tc->cwnd, tc->bytes_acked);
+ tcp_cwnd_accumulate (tc, tc->cwnd, rs->delivered);
}
}
@@ -62,30 +62,31 @@ void
newreno_rcv_cong_ack (tcp_connection_t * tc, tcp_cc_ack_t ack_type,
tcp_rate_sample_t * rs)
{
+ /* With sacks prr controls the data in flight post congestion */
+ if (PREDICT_TRUE (tcp_opts_sack_permitted (tc)))
+ return;
+
if (ack_type == TCP_CC_DUPACK)
{
- if (!tcp_opts_sack_permitted (tc))
- tc->cwnd += tc->snd_mss;
+ tc->cwnd += tc->snd_mss;
}
else if (ack_type == TCP_CC_PARTIALACK)
{
- /* RFC 6582 Sec. 3.2 */
- if (!tcp_opts_sack_permitted (&tc->rcv_opts))
- {
- /* Deflate the congestion window by the amount of new data
- * acknowledged by the Cumulative Acknowledgment field.
- * If the partial ACK acknowledges at least one SMSS of new data,
- * then add back SMSS bytes to the congestion window. This
- * artificially inflates the congestion window in order to reflect
- * the additional segment that has left the network. This "partial
- * window deflation" attempts to ensure that, when fast recovery
- * eventually ends, approximately ssthresh amount of data will be
- * outstanding in the network.*/
- tc->cwnd = (tc->cwnd > tc->bytes_acked + tc->snd_mss) ?
- tc->cwnd - tc->bytes_acked : tc->snd_mss;
- if (tc->bytes_acked > tc->snd_mss)
- tc->cwnd += tc->snd_mss;
- }
+ /* RFC 6582 Sec. 3.2
+ * Deflate the congestion window by the amount of new data
+ * acknowledged by the Cumulative Acknowledgment field.
+ * If the partial ACK acknowledges at least one SMSS of new data,
+ * then add back SMSS bytes to the congestion window. This
+ * artificially inflates the congestion window in order to reflect
+ * the additional segment that has left the network. This "partial
+ * window deflation" attempts to ensure that, when fast recovery
+ * eventually ends, approximately ssthresh amount of data will be
+ * outstanding in the network. */
+ tc->cwnd = (tc->cwnd > tc->bytes_acked + tc->snd_mss) ?
+ tc->cwnd - tc->bytes_acked :
+ tc->snd_mss;
+ if (tc->bytes_acked > tc->snd_mss)
+ tc->cwnd += tc->snd_mss;
}
}
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 82694995840..78148cd5695 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -40,8 +40,8 @@ typedef enum _tcp_output_next
_ (IP_REWRITE, "ip6-rewrite") \
_ (IP_ARP, "ip6-discover-neighbor")
-static char *tcp_error_strings[] = {
-#define tcp_error(n,s) s,
+static vlib_error_desc_t tcp_output_error_counters[] = {
+#define tcp_error(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
#include <vnet/tcp/tcp_error.def>
#undef tcp_error
};
@@ -321,7 +321,6 @@ tcp_update_burst_snd_vars (tcp_connection_t * tc)
if (tc->snd_una == tc->snd_nxt)
{
tcp_cc_event (tc, TCP_CC_EVT_START_TX);
- tcp_connection_tx_pacer_reset (tc, tc->cwnd, TRANSPORT_PACER_MIN_BURST);
}
if (tc->flags & TCP_CONN_PSH_PENDING)
@@ -332,25 +331,6 @@ tcp_update_burst_snd_vars (tcp_connection_t * tc)
}
}
-#endif /* CLIB_MARCH_VARIANT */
-
-static void *
-tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b)
-{
- if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
- vlib_buffer_free_one (vm, b->next_buffer);
- /* Zero all flags but free list index and trace flag */
- b->flags &= VLIB_BUFFER_NEXT_PRESENT - 1;
- b->current_data = 0;
- b->current_length = 0;
- b->total_length_not_including_first_buffer = 0;
- vnet_buffer (b)->tcp.flags = 0;
- VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
- /* Leave enough space for headers */
- return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN);
-}
-
-#ifndef CLIB_MARCH_VARIANT
static void *
tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b)
{
@@ -363,7 +343,6 @@ tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b)
return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN);
}
-
/* Compute TCP checksum in software when offloading is disabled for a connection */
u16
ip6_tcp_compute_checksum_custom (vlib_main_t * vm, vlib_buffer_t * p0,
@@ -441,7 +420,7 @@ static inline void
tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state,
u8 flags)
{
- tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
+ tcp_options_t _snd_opts = {}, *snd_opts = &_snd_opts;
u8 tcp_opts_len, tcp_hdr_opts_len;
tcp_header_t *th;
u16 wnd;
@@ -568,24 +547,24 @@ tcp_enqueue_to_output (tcp_worker_ctx_t * wrk, vlib_buffer_t * b, u32 bi,
wrk->tco_next_node[!is_ip4]);
}
-#endif /* CLIB_MARCH_VARIANT */
-
-static int
-tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b, u8 is_ip4)
+int
+tcp_buffer_make_reset (vlib_main_t *vm, vlib_buffer_t *b, u8 is_ip4)
{
- ip4_header_t *ih4;
- ip6_header_t *ih6;
- tcp_header_t *th;
- ip4_address_t src_ip4, dst_ip4;
+ ip4_address_t src_ip4 = {}, dst_ip4 = {};
ip6_address_t src_ip6, dst_ip6;
u16 src_port, dst_port;
u32 tmp, len, seq, ack;
+ ip4_header_t *ih4;
+ ip6_header_t *ih6;
+ tcp_header_t *th;
u8 flags;
- /* Find IP and TCP headers */
+ /*
+ * Find IP and TCP headers and glean information from them. Assumes
+ * buffer was parsed by something like @ref tcp_input_lookup_buffer
+ */
th = tcp_buffer_hdr (b);
- /* Save src and dst ip */
if (is_ip4)
{
ih4 = vlib_buffer_get_current (b);
@@ -625,7 +604,23 @@ tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b, u8 is_ip4)
seq = 0;
}
- tcp_reuse_buffer (vm, b);
+ /*
+ * Clear and reuse current buffer for reset
+ */
+ if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ vlib_buffer_free_one (vm, b->next_buffer);
+
+ /* Zero all flags but free list index and trace flag */
+ b->flags &= VLIB_BUFFER_NEXT_PRESENT - 1;
+ /* Make sure new tcp header comes after current ip */
+ b->current_data = ((u8 *) th - b->data) + sizeof (tcp_header_t);
+ b->current_length = 0;
+ b->total_length_not_including_first_buffer = 0;
+ vnet_buffer (b)->tcp.flags = 0;
+
+ /*
+ * Add TCP and IP headers
+ */
th = vlib_buffer_push_tcp_net_order (b, dst_port, src_port, seq, ack,
sizeof (tcp_header_t), flags, 0);
@@ -646,7 +641,6 @@ tcp_make_reset_in_place (vlib_main_t * vm, vlib_buffer_t * b, u8 is_ip4)
return 0;
}
-#ifndef CLIB_MARCH_VARIANT
/**
* Send reset without reusing existing buffer
*
@@ -662,8 +656,8 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
u8 tcp_hdr_len, flags = 0;
tcp_header_t *th, *pkt_th;
u32 seq, ack, bi;
- ip4_header_t *ih4, *pkt_ih4;
- ip6_header_t *ih6, *pkt_ih6;
+ ip4_header_t *pkt_ih4;
+ ip6_header_t *pkt_ih6;
if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
{
@@ -673,6 +667,7 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
b = vlib_get_buffer (vm, bi);
tcp_init_buffer (vm, b);
+ vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
/* Make and write options */
tcp_hdr_len = sizeof (tcp_header_t);
@@ -693,6 +688,7 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
flags = TCP_FLAG_RST;
seq = pkt_th->ack_number;
ack = (tc->state >= TCP_STATE_SYN_RCVD) ? tc->rcv_nxt : 0;
+ ack = clib_host_to_net_u32 (ack);
}
else
{
@@ -703,28 +699,7 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port,
seq, ack, tcp_hdr_len, flags, 0);
-
- /* Swap src and dst ip */
- if (is_ip4)
- {
- ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
- ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
- &pkt_ih4->src_address, IP_PROTOCOL_TCP,
- tcp_csum_offload (tc));
- th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
- }
- else
- {
- int bogus = ~0;
- ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) ==
- 0x60);
- ih6 = vlib_buffer_push_ip6_custom (vm, b, &pkt_ih6->dst_address,
- &pkt_ih6->src_address,
- IP_PROTOCOL_TCP,
- tc->ipv6_flow_label);
- th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
- ASSERT (!bogus);
- }
+ th->checksum = tcp_compute_checksum (tc, b);
tcp_enqueue_half_open (wrk, tc, b, bi);
TCP_EVT (TCP_EVT_RST_SENT, tc);
@@ -792,7 +767,7 @@ tcp_send_syn (tcp_connection_t * tc)
* such that we can return if we've ran out.
*/
tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
- tc->rto * TCP_TO_TIMER_TICK);
+ (u32) tc->rto * TCP_TO_TIMER_TICK);
if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
{
@@ -863,10 +838,9 @@ tcp_send_fin (tcp_connection_t * tc)
/* Out of buffers so program fin retransmit ASAP */
tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
tcp_cfg.alloc_err_timeout);
- if (fin_snt)
- tc->snd_nxt += 1;
- else
- /* Make sure retransmit retries a fin not data */
+ tc->snd_nxt += 1;
+ /* Make sure retransmit retries a fin not data with right snd_nxt */
+ if (!fin_snt)
tc->flags |= TCP_CONN_FINSNT;
tcp_worker_stats_inc (wrk, no_buffer, 1);
return;
@@ -968,11 +942,9 @@ tcp_buffer_len (vlib_buffer_t * b)
return data_len;
}
-u32
-tcp_session_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
+always_inline u32
+tcp_push_one_header (tcp_connection_t *tc, vlib_buffer_t *b)
{
- tcp_connection_t *tc = (tcp_connection_t *) tconn;
-
if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
tcp_bt_track_tx (tc, tcp_buffer_len (b));
@@ -980,6 +952,37 @@ tcp_session_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
/* update_snd_nxt */ 1);
tcp_validate_txf_size (tc, tc->snd_nxt - tc->snd_una);
+ return 0;
+}
+
+u32
+tcp_session_push_header (transport_connection_t *tconn, vlib_buffer_t **bs,
+ u32 n_bufs)
+{
+ tcp_connection_t *tc = (tcp_connection_t *) tconn;
+
+ while (n_bufs >= 4)
+ {
+ vlib_prefetch_buffer_header (bs[2], STORE);
+ vlib_prefetch_buffer_header (bs[3], STORE);
+
+ tcp_push_one_header (tc, bs[0]);
+ tcp_push_one_header (tc, bs[1]);
+
+ n_bufs -= 2;
+ bs += 2;
+ }
+ while (n_bufs)
+ {
+ if (n_bufs > 1)
+ vlib_prefetch_buffer_header (bs[1], STORE);
+
+ tcp_push_one_header (tc, bs[0]);
+
+ n_bufs -= 1;
+ bs += 1;
+ }
+
/* If not tracking an ACK, start tracking */
if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))
{
@@ -1113,7 +1116,7 @@ tcp_prepare_segment (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
data = tcp_init_buffer (vm, *b);
n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
max_deq_bytes);
- ASSERT (n_bytes == max_deq_bytes);
+ ASSERT (n_bytes > 0);
b[0]->current_length = n_bytes;
tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
/* burst */ 0, /* update_snd_nxt */ 0);
@@ -1275,6 +1278,7 @@ tcp_cc_init_rxt_timeout (tcp_connection_t * tc)
tc->cwnd_acc_bytes = 0;
tc->tr_occurences += 1;
tc->sack_sb.reorder = TCP_DUPACK_THRESHOLD;
+ tc->sack_sb.rescue_rxt = tc->snd_una - 1;
tcp_recovery_on (tc);
}
@@ -1341,7 +1345,10 @@ tcp_timer_retransmit_handler (tcp_connection_t * tc)
}
if (tcp_opts_sack_permitted (&tc->rcv_opts))
- tcp_check_sack_reneging (tc);
+ {
+ tcp_check_sack_reneging (tc);
+ scoreboard_rxt_mark_lost (&tc->sack_sb, tc->snd_una, tc->snd_nxt);
+ }
/* Update send congestion to make sure that rxt has data to send */
tc->snd_congestion = tc->snd_nxt;
@@ -1482,7 +1489,7 @@ tcp_timer_retransmit_syn_handler (tcp_connection_t * tc)
tcp_enqueue_half_open (wrk, tc, b, bi);
tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
- tc->rto * TCP_TO_TIMER_TICK);
+ (u32) tc->rto * TCP_TO_TIMER_TICK);
}
/**
@@ -1538,8 +1545,10 @@ tcp_timer_persist_handler (tcp_connection_t * tc)
tcp_validate_txf_size (tc, offset);
tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
- max_snd_bytes = clib_min (tc->snd_mss,
+ max_snd_bytes = clib_min (clib_min (tc->snd_mss, available_bytes),
tm->bytes_per_buffer - TRANSPORT_MAX_HDRS_LEN);
+ if (tc->snd_wnd > 0)
+ max_snd_bytes = clib_min (tc->snd_wnd, max_snd_bytes);
n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
max_snd_bytes);
b->current_length = n_bytes;
@@ -1720,7 +1729,7 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
&& tc->rxt_head != tc->snd_una
&& tcp_retransmit_should_retry_head (tc, sb))
{
- max_bytes = clib_min (tc->snd_mss, tc->snd_congestion - tc->snd_una);
+ max_bytes = clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una);
n_written = tcp_prepare_retransmit_segment (wrk, tc, 0, max_bytes, &b);
if (!n_written)
{
@@ -1752,7 +1761,7 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
if (!hole)
{
/* We are out of lost holes to retransmit so send some new data. */
- if (max_deq > tc->snd_mss)
+ if (max_deq)
{
u32 n_segs_new;
int av_wnd;
@@ -1762,7 +1771,10 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
av_wnd = (int) tc->snd_wnd - (tc->snd_nxt - tc->snd_una);
av_wnd = clib_max (av_wnd - tc->snd_mss, 0);
snd_space = clib_min (snd_space, av_wnd);
- snd_space = clib_min (max_deq, snd_space);
+ /* Low bound max_deq to mss to be able to send a segment even
+ * when it is less than mss */
+ snd_space =
+ clib_min (clib_max (max_deq, tc->snd_mss), snd_space);
burst_size = clib_min (burst_size - n_segs,
snd_space / tc->snd_mss);
burst_size = clib_min (burst_size, TCP_RXT_MAX_BURST);
@@ -1774,8 +1786,7 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
goto done;
}
- if (tcp_in_recovery (tc) || !can_rescue
- || scoreboard_rescue_rxt_valid (sb, tc))
+ if (!can_rescue || scoreboard_rescue_rxt_valid (sb, tc))
break;
/* If rescue rxt undefined or less than snd_una then one segment of
@@ -1799,7 +1810,11 @@ tcp_retransmit_sack (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
break;
}
- max_bytes = clib_min (hole->end - sb->high_rxt, snd_space);
+ max_bytes = hole->end - sb->high_rxt;
+ /* Avoid retransmitting segment less than mss if possible */
+ if (snd_space < tc->snd_mss && max_bytes > snd_space)
+ break;
+ max_bytes = clib_min (max_bytes, snd_space);
max_bytes = snd_limited ? clib_min (max_bytes, tc->snd_mss) : max_bytes;
if (max_bytes == 0)
break;
@@ -2136,7 +2151,7 @@ tcp_output_handle_packet (tcp_connection_t * tc0, vlib_buffer_t * b0,
}
vnet_buffer (b0)->sw_if_index[VLIB_TX] = tc0->c_fib_index;
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = tc0->sw_if_index;
if (!is_ip4)
{
@@ -2162,6 +2177,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
u32 n_left_from, *from, thread_index = vm->thread_index;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 nexts[VLIB_FRAME_SIZE], *next;
+ u16 err_counters[TCP_N_ERROR] = { 0 };
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -2212,7 +2228,8 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
else
{
- b[0]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
+ tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION,
+ 1);
next[0] = TCP_OUTPUT_NEXT_DROP;
}
if (tc1 != 0)
@@ -2223,7 +2240,8 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
else
{
- b[1]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
+ tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION,
+ 1);
next[1] = TCP_OUTPUT_NEXT_DROP;
}
}
@@ -2253,7 +2271,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
else
{
- b[0]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
+ tcp_inc_err_counter (err_counters, TCP_ERROR_INVALID_CONNECTION, 1);
next[0] = TCP_OUTPUT_NEXT_DROP;
}
@@ -2262,6 +2280,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
n_left_from -= 1;
}
+ tcp_store_err_counters (output, err_counters);
vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
vlib_node_increment_counter (vm, tcp_node_index (output, is_ip4),
TCP_ERROR_PKTS_SENT, frame->n_vectors);
@@ -2280,7 +2299,6 @@ VLIB_NODE_FN (tcp6_output_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp4_output_node) =
{
.name = "tcp4-output",
@@ -2288,7 +2306,7 @@ VLIB_REGISTER_NODE (tcp4_output_node) =
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
.protocol_hint = VLIB_NODE_PROTO_HINT_TCP,
- .error_strings = tcp_error_strings,
+ .error_counters = tcp_output_error_counters,
.n_next_nodes = TCP_OUTPUT_N_NEXT,
.next_nodes = {
#define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
@@ -2298,9 +2316,7 @@ VLIB_REGISTER_NODE (tcp4_output_node) =
.format_buffer = format_tcp_header,
.format_trace = format_tcp_tx_trace,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp6_output_node) =
{
.name = "tcp6-output",
@@ -2308,7 +2324,7 @@ VLIB_REGISTER_NODE (tcp6_output_node) =
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
.protocol_hint = VLIB_NODE_PROTO_HINT_TCP,
- .error_strings = tcp_error_strings,
+ .error_counters = tcp_output_error_counters,
.n_next_nodes = TCP_OUTPUT_N_NEXT,
.next_nodes = {
#define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
@@ -2318,7 +2334,6 @@ VLIB_REGISTER_NODE (tcp6_output_node) =
.format_buffer = format_tcp_header,
.format_trace = format_tcp_tx_trace,
};
-/* *INDENT-ON* */
typedef enum _tcp_reset_next
{
@@ -2335,84 +2350,105 @@ typedef enum _tcp_reset_next
_(DROP, "error-drop") \
_(IP_LOOKUP, "ip6-lookup")
+static void
+tcp_reset_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_buffer_t **bs, u32 n_bufs, u8 is_ip4)
+{
+ tcp_header_t *tcp;
+ tcp_tx_trace_t *t;
+ int i;
+
+ for (i = 0; i < n_bufs; i++)
+ {
+ if (bs[i]->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ tcp = vlib_buffer_get_current (bs[i]);
+ t = vlib_add_trace (vm, node, bs[i], sizeof (*t));
+
+ if (is_ip4)
+ {
+ ip4_header_t *ih4 = vlib_buffer_get_current (bs[i]);
+ tcp = ip4_next_header (ih4);
+ t->tcp_connection.c_lcl_ip.ip4 = ih4->dst_address;
+ t->tcp_connection.c_rmt_ip.ip4 = ih4->src_address;
+ t->tcp_connection.c_is_ip4 = 1;
+ }
+ else
+ {
+ ip6_header_t *ih6 = vlib_buffer_get_current (bs[i]);
+ tcp = ip6_next_header (ih6);
+ t->tcp_connection.c_lcl_ip.ip6 = ih6->dst_address;
+ t->tcp_connection.c_rmt_ip.ip6 = ih6->src_address;
+ }
+ t->tcp_connection.c_lcl_port = tcp->dst_port;
+ t->tcp_connection.c_rmt_port = tcp->src_port;
+ t->tcp_connection.c_proto = TRANSPORT_PROTO_TCP;
+ clib_memcpy_fast (&t->tcp_header, tcp, sizeof (t->tcp_header));
+ }
+ }
+}
+
static uword
-tcp46_send_reset_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * from_frame, u8 is_ip4)
+tcp46_reset_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, u8 is_ip4)
{
- u32 error0 = TCP_ERROR_RST_SENT, next0 = TCP_RESET_NEXT_IP_LOOKUP;
- u32 n_left_from, next_index, *from, *to_next;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+ u16 nexts[VLIB_FRAME_SIZE], *next;
+ u32 n_left_from, *from;
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ vlib_get_buffers (vm, from, bufs, n_left_from);
- next_index = node->cached_next_index;
+ b = bufs;
+ next = nexts;
while (n_left_from > 0)
{
- u32 n_left_to_next;
+ tcp_buffer_make_reset (vm, b[0], is_ip4);
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+ /* IP lookup in fib where it was received. Previous value
+ * was overwritten by tcp-input */
+ vnet_buffer (b[0])->sw_if_index[VLIB_TX] =
+ vec_elt (ip4_main.fib_index_by_sw_if_index,
+ vnet_buffer (b[0])->sw_if_index[VLIB_RX]);
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- vlib_buffer_t *b0;
- tcp_tx_trace_t *t0;
- tcp_header_t *th0;
- u32 bi0;
-
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- tcp_make_reset_in_place (vm, b0, is_ip4);
-
- /* Prepare to send to IP lookup */
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
-
- b0->error = node->errors[error0];
- b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- th0 = vlib_buffer_get_current (b0);
- if (is_ip4)
- th0 = ip4_next_header ((ip4_header_t *) th0);
- else
- th0 = ip6_next_header ((ip6_header_t *) th0);
- t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
- clib_memcpy_fast (&t0->tcp_header, th0,
- sizeof (t0->tcp_header));
- }
+ b[0]->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ next[0] = TCP_RESET_NEXT_IP_LOOKUP;
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ b += 1;
+ next += 1;
+ n_left_from -= 1;
}
- return from_frame->n_vectors;
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ tcp_reset_trace_frame (vm, node, bufs, frame->n_vectors, is_ip4);
+
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+
+ vlib_node_increment_counter (vm, node->node_index, TCP_ERROR_RST_SENT,
+ frame->n_vectors);
+
+ return frame->n_vectors;
}
VLIB_NODE_FN (tcp4_reset_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * from_frame)
{
- return tcp46_send_reset_inline (vm, node, from_frame, 1);
+ return tcp46_reset_inline (vm, node, from_frame, 1);
}
VLIB_NODE_FN (tcp6_reset_node) (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * from_frame)
{
- return tcp46_send_reset_inline (vm, node, from_frame, 0);
+ return tcp46_reset_inline (vm, node, from_frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp4_reset_node) = {
.name = "tcp4-reset",
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
- .error_strings = tcp_error_strings,
+ .error_counters = tcp_output_error_counters,
.n_next_nodes = TCP_RESET_N_NEXT,
.next_nodes = {
#define _(s,n) [TCP_RESET_NEXT_##s] = n,
@@ -2421,14 +2457,12 @@ VLIB_REGISTER_NODE (tcp4_reset_node) = {
},
.format_trace = format_tcp_tx_trace,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tcp6_reset_node) = {
.name = "tcp6-reset",
.vector_size = sizeof (u32),
.n_errors = TCP_N_ERROR,
- .error_strings = tcp_error_strings,
+ .error_counters = tcp_output_error_counters,
.n_next_nodes = TCP_RESET_N_NEXT,
.next_nodes = {
#define _(s,n) [TCP_RESET_NEXT_##s] = n,
@@ -2437,7 +2471,6 @@ VLIB_REGISTER_NODE (tcp6_reset_node) = {
},
.format_trace = format_tcp_tx_trace,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/tcp/tcp_packet.h b/src/vnet/tcp/tcp_packet.h
index b0636d871d5..c137ea68108 100644
--- a/src/vnet/tcp/tcp_packet.h
+++ b/src/vnet/tcp/tcp_packet.h
@@ -16,7 +16,8 @@
#ifndef included_tcp_packet_h
#define included_tcp_packet_h
-#include <vnet/vnet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
/* TCP flags bit 0 first. */
#define foreach_tcp_flag \
@@ -185,6 +186,100 @@ typedef struct
#define timestamp_lt(_t1, _t2) ((i32)((_t1)-(_t2)) < 0)
#define timestamp_leq(_t1, _t2) ((i32)((_t1)-(_t2)) <= 0)
+always_inline void
+ip4_tcp_reply_x1 (ip4_header_t *ip0, tcp_header_t *tcp0)
+{
+ u32 src0, dst0;
+
+ src0 = ip0->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ ip0->src_address.data_u32 = dst0;
+ ip0->dst_address.data_u32 = src0;
+
+ src0 = tcp0->src;
+ dst0 = tcp0->dst;
+ tcp0->src = dst0;
+ tcp0->dst = src0;
+}
+
+always_inline void
+ip4_tcp_reply_x2 (ip4_header_t *ip0, ip4_header_t *ip1, tcp_header_t *tcp0,
+ tcp_header_t *tcp1)
+{
+ u32 src0, dst0, src1, dst1;
+
+ src0 = ip0->src_address.data_u32;
+ src1 = ip1->src_address.data_u32;
+ dst0 = ip0->dst_address.data_u32;
+ dst1 = ip1->dst_address.data_u32;
+ ip0->src_address.data_u32 = dst0;
+ ip1->src_address.data_u32 = dst1;
+ ip0->dst_address.data_u32 = src0;
+ ip1->dst_address.data_u32 = src1;
+
+ src0 = tcp0->src;
+ src1 = tcp1->src;
+ dst0 = tcp0->dst;
+ dst1 = tcp1->dst;
+ tcp0->src = dst0;
+ tcp1->src = dst1;
+ tcp0->dst = src0;
+ tcp1->dst = src1;
+}
+
+always_inline void
+ip6_tcp_reply_x1 (ip6_header_t *ip0, tcp_header_t *tcp0)
+{
+ {
+ ip6_address_t src0, dst0;
+
+ src0 = ip0->src_address;
+ dst0 = ip0->dst_address;
+ ip0->src_address = dst0;
+ ip0->dst_address = src0;
+ }
+
+ {
+ u16 src0, dst0;
+
+ src0 = tcp0->src;
+ dst0 = tcp0->dst;
+ tcp0->src = dst0;
+ tcp0->dst = src0;
+ }
+}
+
+always_inline void
+ip6_tcp_reply_x2 (ip6_header_t *ip0, ip6_header_t *ip1, tcp_header_t *tcp0,
+ tcp_header_t *tcp1)
+{
+ {
+ ip6_address_t src0, dst0, src1, dst1;
+
+ src0 = ip0->src_address;
+ src1 = ip1->src_address;
+ dst0 = ip0->dst_address;
+ dst1 = ip1->dst_address;
+ ip0->src_address = dst0;
+ ip1->src_address = dst1;
+ ip0->dst_address = src0;
+ ip1->dst_address = src1;
+ }
+
+ {
+ u16 src0, dst0, src1, dst1;
+
+ src0 = tcp0->src;
+ src1 = tcp1->src;
+ dst0 = tcp0->dst;
+ dst1 = tcp1->dst;
+ tcp0->src = dst0;
+ tcp1->src = dst1;
+ tcp0->dst = src0;
+ tcp1->dst = src1;
+ }
+}
+
/**
* Parse TCP header options.
*
diff --git a/src/vnet/tcp/tcp_pg.c b/src/vnet/tcp/tcp_pg.c
index 07bdb113fd0..9b98e3d8ee4 100644
--- a/src/vnet/tcp/tcp_pg.c
+++ b/src/vnet/tcp/tcp_pg.c
@@ -51,6 +51,13 @@
_ (ECE) \
_ (CWR)
+#define foreach_tcp_options \
+ _ (mss, TCP_OPTION_MSS, TCP_OPTION_LEN_MSS, 1) \
+ _ (timestamp, TCP_OPTION_TIMESTAMP, TCP_OPTION_LEN_TIMESTAMP, 2) \
+ _ (winscale, TCP_OPTION_WINDOW_SCALE, TCP_OPTION_LEN_WINDOW_SCALE, 1) \
+ _ (sackperm, TCP_OPTION_SACK_PERMITTED, TCP_OPTION_LEN_SACK_PERMITTED, 0) \
+ _ (sack, TCP_OPTION_SACK_BLOCK, TCP_OPTION_LEN_SACK_BLOCK, 0)
+
static void
tcp_pg_edit_function (pg_main_t * pg,
pg_stream_t * s,
@@ -150,82 +157,192 @@ uword
unformat_pg_tcp_header (unformat_input_t * input, va_list * args)
{
pg_stream_t *s = va_arg (*args, pg_stream_t *);
- pg_tcp_header_t *p;
- u32 group_index;
+ pg_tcp_header_t *pth;
+ u32 header_group_index, opt_group_index = ~0, noop_len, opts_len = 0;
- p = pg_create_edit_group (s, sizeof (p[0]), sizeof (tcp_header_t),
- &group_index);
- pg_tcp_header_init (p);
+ pth = pg_create_edit_group (s, sizeof (pth[0]), sizeof (tcp_header_t),
+ &header_group_index);
+ pg_tcp_header_init (pth);
/* Defaults. */
- pg_edit_set_fixed (&p->seq_number, 0);
- pg_edit_set_fixed (&p->ack_number, 0);
-
- pg_edit_set_fixed (&p->data_offset_and_reserved,
- sizeof (tcp_header_t) / sizeof (u32));
+ pg_edit_set_fixed (&pth->seq_number, 0);
+ pg_edit_set_fixed (&pth->ack_number, 0);
- pg_edit_set_fixed (&p->window, 4096);
- pg_edit_set_fixed (&p->urgent_pointer, 0);
+ pg_edit_set_fixed (&pth->window, 4096);
+ pg_edit_set_fixed (&pth->urgent_pointer, 0);
-#define _(f) pg_edit_set_fixed (&p->f##_flag, 0);
+#define _(f) pg_edit_set_fixed (&pth->f##_flag, 0);
foreach_tcp_flag
#undef _
- p->checksum.type = PG_EDIT_UNSPECIFIED;
+ pth->checksum.type = PG_EDIT_UNSPECIFIED;
- if (!unformat (input, "TCP: %U -> %U",
- unformat_pg_edit,
- unformat_tcp_udp_port, &p->src,
- unformat_pg_edit, unformat_tcp_udp_port, &p->dst))
+ if (!unformat (input, "TCP: %U -> %U", unformat_pg_edit,
+ unformat_tcp_udp_port, &pth->src, unformat_pg_edit,
+ unformat_tcp_udp_port, &pth->dst))
goto error;
/* Parse options. */
while (1)
{
- if (unformat (input, "window %U",
- unformat_pg_edit, unformat_pg_number, &p->window))
+ if (unformat (input, "window %U", unformat_pg_edit, unformat_pg_number,
+ &pth->window))
;
- else if (unformat (input, "checksum %U",
- unformat_pg_edit, unformat_pg_number, &p->checksum))
+ else if (unformat (input, "checksum %U", unformat_pg_edit,
+ unformat_pg_number, &pth->checksum))
;
else if (unformat (input, "seqnum %U", unformat_pg_edit,
- unformat_pg_number, &p->seq_number))
+ unformat_pg_number, &pth->seq_number))
;
else if (unformat (input, "acknum %U", unformat_pg_edit,
- unformat_pg_number, &p->ack_number))
+ unformat_pg_number, &pth->ack_number))
;
/* Flags. */
-#define _(f) else if (unformat (input, #f)) pg_edit_set_fixed (&p->f##_flag, 1);
+#define _(f) \
+ else if (unformat (input, #f)) pg_edit_set_fixed (&pth->f##_flag, 1);
foreach_tcp_flag
#undef _
- /* Can't parse input: try next protocol level. */
+ /* Can't parse input: try TCP options and next protocol level. */
+ else break;
+ }
+
+ while (unformat (input, "opt"))
+ {
+ int i;
+ pg_edit_t *opt_header, *opt_values;
+ u8 type, opt_len, n_values;
+
+ /* first allocate a new edit group for options */
+ if (opt_group_index == ~0)
+ (void) pg_create_edit_group (s, 0, 0, &opt_group_index);
+
+ if (false)
+ {
+ }
+#define _(n, t, l, k) \
+ else if (unformat (input, #n)) \
+ { \
+ type = (t); \
+ opt_len = (l); \
+ n_values = (k); \
+ }
+ foreach_tcp_options
+#undef _
else
+ {
+ /* unknown TCP option */
break;
+ }
+
+#define pg_tcp_option_init(e, o, b) \
+ do \
+ { \
+ *(o) += (b); \
+ (e)->lsb_bit_offset = *(o) > 0 ? (*(o) -1) * BITS (u8) : 0; \
+ (e)->n_bits = (b) *BITS (u8); \
+ } \
+ while (0);
+
+ /* if we don't know how many values to read, just ask */
+ if (n_values == 0 &&
+ unformat (input, "nvalues %D", sizeof (n_values), &n_values))
+ {
+ switch (type)
+ {
+ case TCP_OPTION_SACK_BLOCK:
+ /* each sack block is composed of 2 32-bits values */
+ n_values *= 2;
+ /*
+ opt_len contains the length of a single sack block,
+ it needs to be updated to contains the final number of bytes
+ for the sack options
+ */
+ opt_len = 2 + 2 * opt_len;
+ break;
+ default:
+ /* unknown variable options */
+ continue;
+ }
+ }
+
+ opt_header = pg_add_edits (s, sizeof (pg_edit_t) * (2 + n_values),
+ opt_len, opt_group_index);
+ pg_tcp_option_init (opt_header, &opts_len, 1);
+ pg_tcp_option_init (opt_header + 1, &opts_len, 1);
+ pg_edit_set_fixed (opt_header, type);
+ pg_edit_set_fixed (opt_header + 1, opt_len);
+ opt_values = opt_header + 2;
+
+ switch (type)
+ {
+ case TCP_OPTION_MSS:
+ pg_tcp_option_init (opt_values, &opts_len, 2);
+ break;
+ case TCP_OPTION_WINDOW_SCALE:
+ pg_tcp_option_init (opt_values, &opts_len, 1);
+ break;
+ case TCP_OPTION_TIMESTAMP:
+ case TCP_OPTION_SACK_BLOCK:
+ for (i = 0; i < n_values; ++i)
+ pg_tcp_option_init (opt_values + i, &opts_len, 4);
+ break;
+ default:
+ break;
+ }
+
+ for (i = 0; i < n_values; ++i)
+ {
+ if (!unformat (input, "%U", unformat_pg_edit, unformat_pg_number,
+ opt_values + i))
+ goto error;
+ }
}
+ /* add TCP NO-OP options to fill options up to a 4-bytes boundary */
+ noop_len = (TCP_OPTS_ALIGN - opts_len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
+ if (noop_len > 0)
+ {
+ pg_edit_t *noop_edit;
+ u8 *noops = 0;
+
+ vec_validate (noops, noop_len - 1);
+ clib_memset (noops, 1, noop_len);
+
+ noop_edit =
+ pg_add_edits (s, sizeof (noop_edit[0]), noop_len, opt_group_index);
+ pg_tcp_option_init (noop_edit, &opts_len, noop_len);
+ noop_edit->type = PG_EDIT_FIXED;
+ noop_edit->values[PG_EDIT_LO] = noops;
+ }
+#undef pg_tcp_option_init
+
+ /* set the data offset according to options */
+ pg_edit_set_fixed (&pth->data_offset_and_reserved,
+ (sizeof (tcp_header_t) + opts_len) / sizeof (u32));
+
{
ip_main_t *im = &ip_main;
u16 dst_port;
tcp_udp_port_info_t *pi;
pi = 0;
- if (p->dst.type == PG_EDIT_FIXED)
+ if (pth->dst.type == PG_EDIT_FIXED)
{
- dst_port = pg_edit_get_value (&p->dst, PG_EDIT_LO);
+ dst_port = pg_edit_get_value (&pth->dst, PG_EDIT_LO);
pi = ip_get_tcp_udp_port_info (im, dst_port);
}
- if (pi && pi->unformat_pg_edit
- && unformat_user (input, pi->unformat_pg_edit, s))
+ if (pi && pi->unformat_pg_edit &&
+ unformat_user (input, pi->unformat_pg_edit, s))
;
else if (!unformat_user (input, unformat_pg_payload, s))
goto error;
- if (p->checksum.type == PG_EDIT_UNSPECIFIED)
+ if (pth->checksum.type == PG_EDIT_UNSPECIFIED)
{
- pg_edit_group_t *g = pg_stream_get_group (s, group_index);
+ pg_edit_group_t *g = pg_stream_get_group (s, header_group_index);
g->edit_function = tcp_pg_edit_function;
g->edit_function_opaque = 0;
}
diff --git a/src/vnet/tcp/tcp_sack.c b/src/vnet/tcp/tcp_sack.c
index 8f51b517361..63af07b50cd 100644
--- a/src/vnet/tcp/tcp_sack.c
+++ b/src/vnet/tcp/tcp_sack.c
@@ -265,6 +265,27 @@ scoreboard_init_rxt (sack_scoreboard_t * sb, u32 snd_una)
}
void
+scoreboard_rxt_mark_lost (sack_scoreboard_t *sb, u32 snd_una, u32 snd_nxt)
+{
+ sack_scoreboard_hole_t *hole;
+
+ hole = scoreboard_first_hole (sb);
+ if (!hole)
+ {
+ hole = scoreboard_insert_hole (sb, TCP_INVALID_SACK_HOLE_INDEX, snd_una,
+ snd_nxt);
+ sb->tail = scoreboard_hole_index (sb, hole);
+ sb->high_sacked = snd_una;
+ }
+
+ if (hole->is_lost)
+ return;
+
+ hole->is_lost = 1;
+ sb->lost_bytes += scoreboard_hole_bytes (hole);
+}
+
+void
scoreboard_init (sack_scoreboard_t * sb)
{
sb->head = TCP_INVALID_SACK_HOLE_INDEX;
diff --git a/src/vnet/tcp/tcp_sack.h b/src/vnet/tcp/tcp_sack.h
index 1c3fa95510b..bb206b92dbb 100644
--- a/src/vnet/tcp/tcp_sack.h
+++ b/src/vnet/tcp/tcp_sack.h
@@ -105,6 +105,8 @@ void scoreboard_clear (sack_scoreboard_t * sb);
void scoreboard_clear_reneging (sack_scoreboard_t * sb, u32 start, u32 end);
void scoreboard_init (sack_scoreboard_t * sb);
void scoreboard_init_rxt (sack_scoreboard_t * sb, u32 snd_una);
+void scoreboard_rxt_mark_lost (sack_scoreboard_t *sb, u32 snd_una,
+ u32 snd_nxt);
format_function_t format_tcp_scoreboard;
diff --git a/src/vnet/tcp/tcp_syn_filter4.c b/src/vnet/tcp/tcp_syn_filter4.c
index ef7a3280c03..6e867240ad6 100644
--- a/src/vnet/tcp/tcp_syn_filter4.c
+++ b/src/vnet/tcp/tcp_syn_filter4.c
@@ -54,20 +54,20 @@ format_syn_filter4_trace (u8 * s, va_list * args)
extern vlib_node_registration_t syn_filter4_node;
-#define foreach_syn_filter_error \
-_(THROTTLED, "TCP SYN packet throttle drops") \
-_(OK, "TCP SYN packets passed")
+#define foreach_syn_filter_error \
+ _ (THROTTLED, throttled, ERROR, "TCP SYN packet throttle drops") \
+ _ (OK, ok, INFO, "TCP SYN packets passed")
typedef enum
{
-#define _(sym,str) SYN_FILTER_ERROR_##sym,
+#define _(f, n, s, d) SYN_FILTER_ERROR_##f,
foreach_syn_filter_error
#undef _
SYN_FILTER_N_ERROR,
} syn_filter_error_t;
-static char *syn_filter4_error_strings[] = {
-#define _(sym,string) string,
+static vlib_error_desc_t tcp_syn_error_counters[] = {
+#define _(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
foreach_syn_filter_error
#undef _
};
@@ -399,7 +399,6 @@ VLIB_NODE_FN (syn_filter4_node) (vlib_main_t * vm,
return frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (syn_filter4_node) =
{
.name = "syn-filter-4",
@@ -408,8 +407,8 @@ VLIB_REGISTER_NODE (syn_filter4_node) =
.type = VLIB_NODE_TYPE_INTERNAL,
.runtime_data_bytes = sizeof (syn_filter4_runtime_t),
- .n_errors = ARRAY_LEN(syn_filter4_error_strings),
- .error_strings = syn_filter4_error_strings,
+ .n_errors = SYN_FILTER_N_ERROR,
+ .error_counters = tcp_syn_error_counters,
.n_next_nodes = SYN_FILTER_N_NEXT,
@@ -418,16 +417,13 @@ VLIB_REGISTER_NODE (syn_filter4_node) =
[SYN_FILTER_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (syn_filter_4, static) =
{
.arc_name = "ip4-local",
.node_name = "syn-filter-4",
.runs_before = VNET_FEATURES("ip4-local-end-of-arc"),
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
int
@@ -525,14 +521,12 @@ syn_filter_enable_disable_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (sr_content_command, static) =
{
.path = "ip syn filter",
.short_help = "ip syn filter <interface-name> [disable]",
.function = syn_filter_enable_disable_command_fn,
};
-/* *INDENT-ON* */
#endif /* CLIB_MARCH_VARIANT */
/*
diff --git a/src/vnet/tcp/tcp_timer.h b/src/vnet/tcp/tcp_timer.h
index 4668c79cabf..c0907cae1cc 100644
--- a/src/vnet/tcp/tcp_timer.h
+++ b/src/vnet/tcp/tcp_timer.h
@@ -17,11 +17,18 @@
#include <vnet/tcp/tcp_types.h>
+static inline u8
+tcp_timer_thread_is_valid (tcp_connection_t *tc)
+{
+ return ((tc->c_thread_index == vlib_get_thread_index ()) ||
+ vlib_thread_is_main_w_barrier ());
+}
+
always_inline void
-tcp_timer_set (tcp_timer_wheel_t * tw, tcp_connection_t * tc, u8 timer_id,
+tcp_timer_set (tcp_timer_wheel_t *tw, tcp_connection_t *tc, u8 timer_id,
u32 interval)
{
- ASSERT (tc->c_thread_index == vlib_get_thread_index ());
+ ASSERT (tcp_timer_thread_is_valid (tc));
ASSERT (tc->timers[timer_id] == TCP_TIMER_HANDLE_INVALID);
tc->timers[timer_id] = tw_timer_start_tcp_twsl (tw, tc->c_c_index,
timer_id, interval);
@@ -30,7 +37,7 @@ tcp_timer_set (tcp_timer_wheel_t * tw, tcp_connection_t * tc, u8 timer_id,
always_inline void
tcp_timer_reset (tcp_timer_wheel_t * tw, tcp_connection_t * tc, u8 timer_id)
{
- ASSERT (tc->c_thread_index == vlib_get_thread_index ());
+ ASSERT (tcp_timer_thread_is_valid (tc));
tc->pending_timers &= ~(1 << timer_id);
if (tc->timers[timer_id] == TCP_TIMER_HANDLE_INVALID)
return;
@@ -43,7 +50,7 @@ always_inline void
tcp_timer_update (tcp_timer_wheel_t * tw, tcp_connection_t * tc, u8 timer_id,
u32 interval)
{
- ASSERT (tc->c_thread_index == vlib_get_thread_index ());
+ ASSERT (tcp_timer_thread_is_valid (tc));
if (tc->timers[timer_id] != TCP_TIMER_HANDLE_INVALID)
tw_timer_update_tcp_twsl (tw, tc->timers[timer_id], interval);
else
@@ -51,12 +58,19 @@ tcp_timer_update (tcp_timer_wheel_t * tw, tcp_connection_t * tc, u8 timer_id,
timer_id, interval);
}
+always_inline u8
+tcp_timer_is_active (tcp_connection_t *tc, tcp_timers_e timer)
+{
+ return tc->timers[timer] != TCP_TIMER_HANDLE_INVALID ||
+ (tc->pending_timers & (1 << timer));
+}
+
always_inline void
tcp_retransmit_timer_set (tcp_timer_wheel_t * tw, tcp_connection_t * tc)
{
ASSERT (tc->snd_una != tc->snd_nxt);
tcp_timer_set (tw, tc, TCP_TIMER_RETRANSMIT,
- clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
+ clib_max ((u32) tc->rto * TCP_TO_TIMER_TICK, 1));
}
always_inline void
@@ -70,20 +84,7 @@ tcp_persist_timer_set (tcp_timer_wheel_t * tw, tcp_connection_t * tc)
{
/* Reuse RTO. It's backed off in handler */
tcp_timer_set (tw, tc, TCP_TIMER_PERSIST,
- clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
-}
-
-always_inline void
-tcp_persist_timer_update (tcp_timer_wheel_t * tw, tcp_connection_t * tc)
-{
- u32 interval;
-
- if (seq_leq (tc->snd_una, tc->snd_congestion + tc->burst_acked))
- interval = 1;
- else
- interval = clib_max (tc->rto * TCP_TO_TIMER_TICK, 1);
-
- tcp_timer_update (tw, tc, TCP_TIMER_PERSIST, interval);
+ clib_max ((u32) tc->rto * TCP_TO_TIMER_TICK, 1));
}
always_inline void
@@ -98,19 +99,13 @@ tcp_retransmit_timer_update (tcp_timer_wheel_t * tw, tcp_connection_t * tc)
if (tc->snd_una == tc->snd_nxt)
{
tcp_retransmit_timer_reset (tw, tc);
- if (tc->snd_wnd < tc->snd_mss)
- tcp_persist_timer_update (tw, tc);
+ if (tc->snd_wnd < tc->snd_mss &&
+ !tcp_timer_is_active (tc, TCP_TIMER_PERSIST))
+ tcp_persist_timer_set (tw, tc);
}
else
tcp_timer_update (tw, tc, TCP_TIMER_RETRANSMIT,
- clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
-}
-
-always_inline u8
-tcp_timer_is_active (tcp_connection_t * tc, tcp_timers_e timer)
-{
- return tc->timers[timer] != TCP_TIMER_HANDLE_INVALID
- || (tc->pending_timers & (1 << timer));
+ clib_max ((u32) tc->rto * TCP_TO_TIMER_TICK, 1));
}
always_inline void
diff --git a/src/vnet/tcp/tcp_types.h b/src/vnet/tcp/tcp_types.h
index aacfd8f2fd4..f9a9ff9a4da 100644
--- a/src/vnet/tcp/tcp_types.h
+++ b/src/vnet/tcp/tcp_types.h
@@ -389,7 +389,6 @@ typedef struct _tcp_connection
#define rst_state snd_wl1
} tcp_connection_t;
-/* *INDENT-OFF* */
struct _tcp_cc_algorithm
{
const char *name;
@@ -406,7 +405,6 @@ struct _tcp_cc_algorithm
void (*event) (tcp_connection_t *tc, tcp_cc_event_t evt);
u64 (*get_pacing_rate) (tcp_connection_t *tc);
};
-/* *INDENT-ON* */
#define tcp_fastrecovery_on(tc) (tc)->flags |= TCP_CONN_FAST_RECOVERY
#define tcp_fastrecovery_off(tc) (tc)->flags &= ~TCP_CONN_FAST_RECOVERY
diff --git a/src/vnet/teib/teib.c b/src/vnet/teib/teib.c
index dc0c99b1dbe..a9234bbeb5e 100644
--- a/src/vnet/teib/teib.c
+++ b/src/vnet/teib/teib.c
@@ -34,7 +34,7 @@ struct teib_entry_t_
{
teib_key_t *te_key;
fib_prefix_t te_nh;
- u32 te_fib_index;
+ u32 te_nh_fib_index;
};
typedef struct teib_db_t_
@@ -83,7 +83,7 @@ teib_entry_get_af (const teib_entry_t * te)
u32
teib_entry_get_fib_index (const teib_entry_t * te)
{
- return (te->te_fib_index);
+ return (te->te_nh_fib_index);
}
const ip_address_t *
@@ -101,7 +101,7 @@ teib_entry_get_nh (const teib_entry_t * te)
void
teib_entry_adj_stack (const teib_entry_t * te, adj_index_t ai)
{
- adj_midchain_delegate_stack (ai, te->te_fib_index, &te->te_nh);
+ adj_midchain_delegate_stack (ai, te->te_nh_fib_index, &te->te_nh);
}
teib_entry_t *
@@ -139,7 +139,7 @@ teib_entry_find_46 (u32 sw_if_index,
}
static void
-teib_adj_fib_add (const ip_address_t * ip, u32 sw_if_index, u32 fib_index)
+teib_adj_fib_add (const ip_address_t *ip, u32 sw_if_index, u32 peer_fib_index)
{
if (AF_IP6 == ip_addr_version (ip) &&
ip6_address_is_link_local_unicast (&ip_addr_v6 (ip)))
@@ -155,21 +155,18 @@ teib_adj_fib_add (const ip_address_t * ip, u32 sw_if_index, u32 fib_index)
fib_prefix_t pfx;
ip_address_to_fib_prefix (ip, &pfx);
- fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
- FIB_ENTRY_FLAG_ATTACHED,
- fib_proto_to_dpo (pfx.fp_proto),
- &pfx.fp_addr,
- sw_if_index,
- ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
-
+ fib_table_entry_path_add (
+ peer_fib_index, &pfx, FIB_SOURCE_ADJ, FIB_ENTRY_FLAG_ATTACHED,
+ fib_proto_to_dpo (pfx.fp_proto), &pfx.fp_addr, sw_if_index, ~0, 1,
+ NULL, FIB_ROUTE_PATH_FLAG_NONE);
if (0 == teib_db.td_n_entries[ip_addr_version (ip)]++)
- fib_table_lock (fib_index, pfx.fp_proto, FIB_SOURCE_ADJ);
+ fib_table_lock (peer_fib_index, pfx.fp_proto, FIB_SOURCE_ADJ);
}
}
static void
-teib_adj_fib_remove (ip_address_t * ip, u32 sw_if_index, u32 fib_index)
+teib_adj_fib_remove (ip_address_t *ip, u32 sw_if_index, u32 peer_fib_index)
{
if (AF_IP6 == ip_addr_version (ip) &&
ip6_address_is_link_local_unicast (&ip_addr_v6 (ip)))
@@ -185,14 +182,12 @@ teib_adj_fib_remove (ip_address_t * ip, u32 sw_if_index, u32 fib_index)
fib_prefix_t pfx;
ip_address_to_fib_prefix (ip, &pfx);
- fib_table_entry_path_remove (fib_index, &pfx, FIB_SOURCE_ADJ,
- fib_proto_to_dpo (pfx.fp_proto),
- &pfx.fp_addr,
- sw_if_index,
- ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
+ fib_table_entry_path_remove (
+ peer_fib_index, &pfx, FIB_SOURCE_ADJ, fib_proto_to_dpo (pfx.fp_proto),
+ &pfx.fp_addr, sw_if_index, ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
if (0 == --teib_db.td_n_entries[ip_addr_version (ip)])
- fib_table_unlock (fib_index, pfx.fp_proto, FIB_SOURCE_ADJ);
+ fib_table_unlock (peer_fib_index, pfx.fp_proto, FIB_SOURCE_ADJ);
}
}
@@ -203,15 +198,17 @@ teib_entry_add (u32 sw_if_index,
{
fib_protocol_t nh_proto;
teib_entry_t *te;
- u32 fib_index;
+ u32 nh_fib_index, peer_fib_index;
index_t tei;
nh_proto = (AF_IP4 == ip_addr_version (nh) ?
FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6);
- fib_index = fib_table_find (nh_proto, nh_table_id);
+ peer_fib_index = fib_table_get_index_for_sw_if_index (
+ ip_address_family_to_fib_proto (peer->version), sw_if_index);
+ nh_fib_index = fib_table_find (nh_proto, nh_table_id);
- if (~0 == fib_index)
+ if (~0 == nh_fib_index)
{
return (VNET_API_ERROR_NO_SUCH_FIB);
}
@@ -225,9 +222,6 @@ teib_entry_add (u32 sw_if_index,
.tk_sw_if_index = sw_if_index,
};
teib_entry_t *te;
- u32 fib_index;
-
- fib_index = fib_table_get_index_for_sw_if_index (nh_proto, sw_if_index);
pool_get_zero (teib_pool, te);
@@ -236,12 +230,12 @@ teib_entry_add (u32 sw_if_index,
clib_memcpy (te->te_key, &nk, sizeof (*te->te_key));
ip_address_to_fib_prefix (nh, &te->te_nh);
- te->te_fib_index = fib_index;
+ te->te_nh_fib_index = nh_fib_index;
hash_set_mem (teib_db.td_db, te->te_key, tei);
/* we how have a /32 in the overlay, add an adj-fib */
- teib_adj_fib_add (&te->te_key->tk_peer, sw_if_index, fib_index);
+ teib_adj_fib_add (&te->te_key->tk_peer, sw_if_index, peer_fib_index);
TEIB_NOTIFY (te, nv_added);
TEIB_TE_INFO (te, "created");
@@ -265,13 +259,12 @@ teib_entry_del (u32 sw_if_index, const ip_address_t * peer)
{
TEIB_TE_INFO (te, "removed");
- u32 fib_index;
+ u32 peer_fib_index;
- fib_index = fib_table_get_index_for_sw_if_index
- (ip_address_family_to_fib_proto (ip_addr_version (peer)),
- sw_if_index);
+ peer_fib_index = fib_table_get_index_for_sw_if_index (
+ ip_address_family_to_fib_proto (peer->version), sw_if_index);
- teib_adj_fib_remove (&te->te_key->tk_peer, sw_if_index, fib_index);
+ teib_adj_fib_remove (&te->te_key->tk_peer, sw_if_index, peer_fib_index);
hash_unset_mem (teib_db.td_db, te->te_key);
@@ -282,8 +275,7 @@ teib_entry_del (u32 sw_if_index, const ip_address_t * peer)
}
else
{
- TEIB_INFO ("no such entry: %U, %U, %U",
- format_vnet_sw_if_index_name,
+ TEIB_INFO ("no such entry: %U, %U", format_vnet_sw_if_index_name,
vnet_get_main (), sw_if_index, format_ip_address, peer);
return (VNET_API_ERROR_NO_SUCH_ENTRY);
}
@@ -305,7 +297,7 @@ format_teib_entry (u8 * s, va_list * args)
s = format (s, "%U", format_ip_address,
&te->te_key->tk_peer, IP46_TYPE_ANY);
s = format (s, " via [%d]:%U",
- fib_table_get_table_id (te->te_fib_index, te->te_nh.fp_proto),
+ fib_table_get_table_id (te->te_nh_fib_index, te->te_nh.fp_proto),
format_fib_prefix, &te->te_nh);
return (s);
@@ -316,12 +308,10 @@ teib_walk (teib_walk_cb_t fn, void *ctx)
{
index_t tei;
- /* *INDENT-OFF* */
pool_foreach_index (tei, teib_pool)
{
fn(tei, ctx);
}
- /* *INDENT-ON* */
}
void
@@ -329,13 +319,11 @@ teib_walk_itf (u32 sw_if_index, teib_walk_cb_t fn, void *ctx)
{
index_t tei;
- /* *INDENT-OFF* */
pool_foreach_index (tei, teib_pool)
{
if (sw_if_index == teib_entry_get_sw_if_index(teib_entry_get(tei)))
fn(tei, ctx);
}
- /* *INDENT-ON* */
}
static void
@@ -344,20 +332,18 @@ teib_walk_itf_proto (u32 sw_if_index,
{
index_t tei;
- /* *INDENT-OFF* */
pool_foreach_index (tei, teib_pool)
{
if (sw_if_index == teib_entry_get_sw_if_index(teib_entry_get(tei)) &&
af == teib_entry_get_af(teib_entry_get(tei)))
fn(tei, ctx);
}
- /* *INDENT-ON* */
}
typedef struct teib_table_bind_ctx_t_
{
- u32 new_fib_index;
- u32 old_fib_index;
+ u32 new_peer_fib_index;
+ u32 old_peer_fib_index;
} teib_table_bind_ctx_t;
static walk_rc_t
@@ -368,12 +354,13 @@ teib_walk_table_bind (index_t tei, void *arg)
te = teib_entry_get (tei);
- TEIB_TE_INFO (te, "bind: %d -> %d", ctx->old_fib_index, ctx->new_fib_index);
+ TEIB_TE_INFO (te, "bind: %d -> %d", ctx->old_peer_fib_index,
+ ctx->new_peer_fib_index);
- teib_adj_fib_remove (&te->te_key->tk_peer,
- te->te_key->tk_sw_if_index, ctx->old_fib_index);
- teib_adj_fib_add (&te->te_key->tk_peer,
- te->te_key->tk_sw_if_index, ctx->new_fib_index);
+ teib_adj_fib_remove (&te->te_key->tk_peer, te->te_key->tk_sw_if_index,
+ ctx->old_peer_fib_index);
+ teib_adj_fib_add (&te->te_key->tk_peer, te->te_key->tk_sw_if_index,
+ ctx->new_peer_fib_index);
return (WALK_CONTINUE);
}
@@ -384,8 +371,8 @@ teib_table_bind_v4 (ip4_main_t * im,
u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
{
teib_table_bind_ctx_t ctx = {
- .old_fib_index = old_fib_index,
- .new_fib_index = new_fib_index,
+ .old_peer_fib_index = old_fib_index,
+ .new_peer_fib_index = new_fib_index,
};
teib_walk_itf_proto (sw_if_index, AF_IP4, teib_walk_table_bind, &ctx);
@@ -397,8 +384,8 @@ teib_table_bind_v6 (ip6_main_t * im,
u32 sw_if_index, u32 new_fib_index, u32 old_fib_index)
{
teib_table_bind_ctx_t ctx = {
- .old_fib_index = old_fib_index,
- .new_fib_index = new_fib_index,
+ .old_peer_fib_index = old_fib_index,
+ .new_peer_fib_index = new_fib_index,
};
teib_walk_itf_proto (sw_if_index, AF_IP6, teib_walk_table_bind, &ctx);
diff --git a/src/vnet/teib/teib_cli.c b/src/vnet/teib/teib_cli.c
index a23902e0f60..03cec15c7a1 100644
--- a/src/vnet/teib/teib_cli.c
+++ b/src/vnet/teib/teib_cli.c
@@ -85,13 +85,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (teib_create_command, static) = {
.path = "create teib",
.short_help = "create teib <interface> peer <addr> nh <addr> [nh-table-id <ID>]",
.function = teib_add,
};
-/* *INDENT-ON* */
static clib_error_t *
teib_del (vlib_main_t * vm,
@@ -150,13 +148,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (teib_delete_command, static) = {
.path = "delete teib",
.short_help = "delete teib <interface> peer <addr>",
.function = teib_del,
};
-/* *INDENT-ON* */
static walk_rc_t
teib_show_one (index_t nei, void *ctx)
@@ -175,13 +171,11 @@ teib_show (vlib_main_t * vm,
return (NULL);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (teib_show_command, static) = {
.path = "show teib",
.short_help = "show teib",
.function = teib_show,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/tls/tls.c b/src/vnet/tls/tls.c
index 0787c22f763..5f00e6e302d 100644
--- a/src/vnet/tls/tls.c
+++ b/src/vnet/tls/tls.c
@@ -61,8 +61,7 @@ tls_add_vpp_q_rx_evt (session_t * s)
int
tls_add_vpp_q_builtin_rx_evt (session_t * s)
{
- if (svm_fifo_set_event (s->rx_fifo))
- session_send_io_evt_to_thread (s->rx_fifo, SESSION_IO_EVT_BUILTIN_RX);
+ session_enqueue_notify (s);
return 0;
}
@@ -75,9 +74,10 @@ tls_add_vpp_q_tx_evt (session_t * s)
}
static inline int
-tls_add_app_q_evt (app_worker_t * app, session_t * app_session)
+tls_add_app_q_evt (app_worker_t *app_wrk, session_t *app_session)
{
- return app_worker_lock_and_send_event (app, app_session, SESSION_IO_EVT_RX);
+ app_worker_add_event (app_wrk, app_session, SESSION_IO_EVT_RX);
+ return 0;
}
u32
@@ -115,58 +115,74 @@ u32
tls_ctx_half_open_alloc (void)
{
tls_main_t *tm = &tls_main;
- u8 will_expand = 0;
tls_ctx_t *ctx;
- u32 ctx_index;
- pool_get_aligned_will_expand (tm->half_open_ctx_pool, will_expand, 0);
- if (PREDICT_FALSE (will_expand && vlib_num_workers ()))
- {
- clib_rwlock_writer_lock (&tm->half_open_rwlock);
- pool_get_zero (tm->half_open_ctx_pool, ctx);
- ctx->c_c_index = ctx - tm->half_open_ctx_pool;
- ctx_index = ctx->c_c_index;
- clib_rwlock_writer_unlock (&tm->half_open_rwlock);
- }
- else
- {
- /* reader lock assumption: only main thread will call pool_get */
- clib_rwlock_reader_lock (&tm->half_open_rwlock);
- pool_get_zero (tm->half_open_ctx_pool, ctx);
- ctx->c_c_index = ctx - tm->half_open_ctx_pool;
- ctx_index = ctx->c_c_index;
- clib_rwlock_reader_unlock (&tm->half_open_rwlock);
- }
- return ctx_index;
+ if (vec_len (tm->postponed_ho_free))
+ tls_flush_postponed_ho_cleanups ();
+
+ pool_get_aligned_safe (tm->half_open_ctx_pool, ctx, CLIB_CACHE_LINE_BYTES);
+
+ clib_memset (ctx, 0, sizeof (*ctx));
+ ctx->c_c_index = ctx - tm->half_open_ctx_pool;
+ ctx->c_thread_index = transport_cl_thread ();
+
+ return ctx->c_c_index;
}
void
tls_ctx_half_open_free (u32 ho_index)
{
- tls_main_t *tm = &tls_main;
- clib_rwlock_writer_lock (&tm->half_open_rwlock);
pool_put_index (tls_main.half_open_ctx_pool, ho_index);
- clib_rwlock_writer_unlock (&tm->half_open_rwlock);
}
tls_ctx_t *
tls_ctx_half_open_get (u32 ctx_index)
{
tls_main_t *tm = &tls_main;
- clib_rwlock_reader_lock (&tm->half_open_rwlock);
return pool_elt_at_index (tm->half_open_ctx_pool, ctx_index);
}
void
-tls_ctx_half_open_reader_unlock ()
+tls_add_postponed_ho_cleanups (u32 ho_index)
{
- clib_rwlock_reader_unlock (&tls_main.half_open_rwlock);
+ tls_main_t *tm = &tls_main;
+ vec_add1 (tm->postponed_ho_free, ho_index);
}
-u32
-tls_ctx_half_open_index (tls_ctx_t * ctx)
+static void
+tls_ctx_ho_try_free (u32 ho_index)
+{
+ tls_ctx_t *ctx;
+
+ ctx = tls_ctx_half_open_get (ho_index);
+ /* Probably tcp connected just before tcp establish timeout and
+ * worker that owns established session has not yet received
+ * @ref tls_session_connected_cb */
+ if (!(ctx->flags & TLS_CONN_F_HO_DONE))
+ {
+ ctx->tls_session_handle = SESSION_INVALID_HANDLE;
+ tls_add_postponed_ho_cleanups (ho_index);
+ return;
+ }
+ if (!(ctx->flags & TLS_CONN_F_NO_APP_SESSION))
+ session_half_open_delete_notify (&ctx->connection);
+ tls_ctx_half_open_free (ho_index);
+}
+
+void
+tls_flush_postponed_ho_cleanups ()
{
- return (ctx - tls_main.half_open_ctx_pool);
+ tls_main_t *tm = &tls_main;
+ u32 *ho_indexp, *tmp;
+
+ tmp = tm->postponed_ho_free;
+ tm->postponed_ho_free = tm->ho_free_list;
+ tm->ho_free_list = tmp;
+
+ vec_foreach (ho_indexp, tm->ho_free_list)
+ tls_ctx_ho_try_free (*ho_indexp);
+
+ vec_reset_length (tm->ho_free_list);
}
void
@@ -189,17 +205,19 @@ tls_notify_app_accept (tls_ctx_t * ctx)
lctx = tls_listener_ctx_get (ctx->listener_ctx_index);
app_listener = listen_session_get_from_handle (lctx->app_session_handle);
- app_session = session_get (ctx->c_s_index, ctx->c_thread_index);
- app_session->app_wrk_index = ctx->parent_app_wrk_index;
- app_session->connection_index = ctx->tls_ctx_handle;
+ app_session = session_alloc (ctx->c_thread_index);
+ app_session->session_state = SESSION_STATE_ACCEPTING;
app_session->session_type = app_listener->session_type;
app_session->listener_handle = listen_session_get_handle (app_listener);
- app_session->session_state = SESSION_STATE_ACCEPTING;
+ app_session->app_wrk_index = ctx->parent_app_wrk_index;
+ app_session->connection_index = ctx->tls_ctx_handle;
+ ctx->c_s_index = app_session->session_index;
if ((rv = app_worker_init_accepted (app_session)))
{
TLS_DBG (1, "failed to allocate fifos");
session_free (app_session);
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
return rv;
}
ctx->app_session_handle = session_handle (app_session);
@@ -211,67 +229,67 @@ tls_notify_app_accept (tls_ctx_t * ctx)
int
tls_notify_app_connected (tls_ctx_t * ctx, session_error_t err)
{
+ u32 parent_app_api_ctx;
session_t *app_session;
app_worker_t *app_wrk;
app_wrk = app_worker_get_if_valid (ctx->parent_app_wrk_index);
if (!app_wrk)
{
- tls_disconnect_transport (ctx);
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
return -1;
}
if (err)
{
- /* Free app session pre-allocated when transport was established */
- if (ctx->tls_type == TRANSPORT_PROTO_TLS)
- session_free (session_get (ctx->c_s_index, ctx->c_thread_index));
- ctx->no_app_session = 1;
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
goto send_reply;
}
- /* For DTLS the app session is not preallocated because the underlying udp
- * session might migrate to a different worker during the handshake */
+ app_session = session_alloc (ctx->c_thread_index);
+ app_session->session_state = SESSION_STATE_CREATED;
+ app_session->connection_index = ctx->tls_ctx_handle;
+
if (ctx->tls_type == TRANSPORT_PROTO_DTLS)
{
- session_type_t st;
/* Cleanup half-open session as we don't get notification from udp */
session_half_open_delete_notify (&ctx->connection);
- app_session = session_alloc (ctx->c_thread_index);
- app_session->session_state = SESSION_STATE_CREATED;
- ctx->c_s_index = app_session->session_index;
- st =
+ app_session->session_type =
session_type_from_proto_and_ip (TRANSPORT_PROTO_DTLS, ctx->tcp_is_ip4);
- app_session->session_type = st;
- app_session->connection_index = ctx->tls_ctx_handle;
}
else
{
- app_session = session_get (ctx->c_s_index, ctx->c_thread_index);
+ app_session->session_type =
+ session_type_from_proto_and_ip (TRANSPORT_PROTO_TLS, ctx->tcp_is_ip4);
}
app_session->app_wrk_index = ctx->parent_app_wrk_index;
+ app_session->opaque = ctx->parent_app_api_context;
+ ctx->c_s_index = app_session->session_index;
if ((err = app_worker_init_connected (app_wrk, app_session)))
- goto failed;
+ {
+ app_worker_connect_notify (app_wrk, 0, err, ctx->parent_app_api_context);
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
+ session_free (app_session);
+ return -1;
+ }
app_session->session_state = SESSION_STATE_READY;
- if (app_worker_connect_notify (app_wrk, app_session,
- SESSION_E_NONE, ctx->parent_app_api_context))
+ parent_app_api_ctx = ctx->parent_app_api_context;
+ ctx->app_session_handle = session_handle (app_session);
+
+ if (app_worker_connect_notify (app_wrk, app_session, SESSION_E_NONE,
+ parent_app_api_ctx))
{
TLS_DBG (1, "failed to notify app");
- app_session->session_state = SESSION_STATE_CONNECTING;
- tls_disconnect (ctx->tls_ctx_handle, vlib_get_thread_index ());
+ session_free (session_get (ctx->c_s_index, ctx->c_thread_index));
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
return -1;
}
- ctx->app_session_handle = session_handle (app_session);
-
return 0;
-failed:
- ctx->no_app_session = 1;
- tls_disconnect (ctx->tls_ctx_handle, vlib_get_thread_index ());
send_reply:
return app_worker_connect_notify (app_wrk, 0, err,
ctx->parent_app_api_context);
@@ -365,7 +383,7 @@ tls_ctx_write (tls_ctx_t * ctx, session_t * app_session,
sp->max_burst_size = sp->max_burst_size * TRANSPORT_PACER_MIN_MSS;
n_wrote = tls_vfts[ctx->tls_ctx_engine].ctx_write (ctx, app_session, sp);
- sp->max_burst_size = n_wrote;
+ sp->bytes_dequeued = n_wrote;
return n_wrote > 0 ? clib_max (n_wrote / TRANSPORT_PACER_MIN_MSS, 1) : 0;
}
@@ -382,6 +400,12 @@ tls_ctx_transport_close (tls_ctx_t * ctx)
}
static inline int
+tls_ctx_transport_reset (tls_ctx_t *ctx)
+{
+ return tls_vfts[ctx->tls_ctx_engine].ctx_transport_reset (ctx);
+}
+
+static inline int
tls_ctx_app_close (tls_ctx_t * ctx)
{
return tls_vfts[ctx->tls_ctx_engine].ctx_app_close (ctx);
@@ -399,44 +423,37 @@ tls_ctx_handshake_is_over (tls_ctx_t * ctx)
return tls_vfts[ctx->tls_ctx_engine].ctx_handshake_is_over (ctx);
}
+int
+tls_reinit_ca_chain (crypto_engine_type_t tls_engine_id)
+{
+ return tls_vfts[tls_engine_id].ctx_reinit_cachain ();
+}
+
void
-tls_session_reset_callback (session_t * s)
+tls_notify_app_io_error (tls_ctx_t *ctx)
+{
+ ASSERT (tls_ctx_handshake_is_over (ctx));
+
+ session_transport_reset_notify (&ctx->connection);
+ session_transport_closed_notify (&ctx->connection);
+ tls_disconnect_transport (ctx);
+}
+
+void
+tls_session_reset_callback (session_t *ts)
{
tls_ctx_t *ctx;
- transport_connection_t *tc;
- session_t *app_session;
- ctx = tls_ctx_get (s->opaque);
- ctx->is_passive_close = 1;
- tc = &ctx->connection;
- if (tls_ctx_handshake_is_over (ctx))
- {
- session_transport_reset_notify (tc);
- session_transport_closed_notify (tc);
- tls_disconnect_transport (ctx);
- }
- else
- if ((app_session =
- session_get_if_valid (ctx->c_s_index, ctx->c_thread_index)))
- {
- session_free (app_session);
- ctx->c_s_index = SESSION_INVALID_INDEX;
- tls_disconnect_transport (ctx);
- }
+ ctx = tls_ctx_get_w_thread (ts->opaque, ts->thread_index);
+ ctx->flags |= TLS_CONN_F_PASSIVE_CLOSE;
+ tls_ctx_transport_reset (ctx);
}
static void
tls_session_cleanup_ho (session_t *s)
{
- tls_ctx_t *ctx;
- u32 ho_index;
-
/* session opaque stores the opaque passed on connect */
- ho_index = s->opaque;
- ctx = tls_ctx_half_open_get (ho_index);
- session_half_open_delete_notify (&ctx->connection);
- tls_ctx_half_open_reader_unlock ();
- tls_ctx_half_open_free (ho_index);
+ tls_ctx_ho_try_free (s->opaque);
}
int
@@ -464,56 +481,69 @@ tls_session_disconnect_callback (session_t * tls_session)
|| vlib_thread_is_main_w_barrier ());
ctx = tls_ctx_get_w_thread (tls_session->opaque, tls_session->thread_index);
- ctx->is_passive_close = 1;
+ ctx->flags |= TLS_CONN_F_PASSIVE_CLOSE;
tls_ctx_transport_close (ctx);
}
int
-tls_session_accept_callback (session_t * tls_session)
+tls_session_accept_callback (session_t *ts)
{
- session_t *tls_listener, *app_session;
+ session_t *tls_listener;
tls_ctx_t *lctx, *ctx;
u32 ctx_handle;
- tls_listener =
- listen_session_get_from_handle (tls_session->listener_handle);
+ tls_listener = listen_session_get_from_handle (ts->listener_handle);
lctx = tls_listener_ctx_get (tls_listener->opaque);
ctx_handle = tls_ctx_alloc (lctx->tls_ctx_engine);
ctx = tls_ctx_get (ctx_handle);
- memcpy (ctx, lctx, sizeof (*lctx));
- ctx->c_thread_index = vlib_get_thread_index ();
+ clib_memcpy (ctx, lctx, sizeof (*lctx));
+ ctx->c_s_index = SESSION_INVALID_INDEX;
+ ctx->c_thread_index = ts->thread_index;
ctx->tls_ctx_handle = ctx_handle;
- tls_session->session_state = SESSION_STATE_READY;
- tls_session->opaque = ctx_handle;
- ctx->tls_session_handle = session_handle (tls_session);
+ ts->opaque = ctx_handle;
+ ctx->tls_session_handle = session_handle (ts);
ctx->listener_ctx_index = tls_listener->opaque;
ctx->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
ctx->ckpair_index = lctx->ckpair_index;
- /* Preallocate app session. Avoids allocating a session post handshake
- * on tls_session rx and potentially invalidating the session pool */
- app_session = session_alloc (ctx->c_thread_index);
- app_session->session_state = SESSION_STATE_CREATED;
- ctx->c_s_index = app_session->session_index;
-
TLS_DBG (1, "Accept on listener %u new connection [%u]%x",
tls_listener->opaque, vlib_get_thread_index (), ctx_handle);
- return tls_ctx_init_server (ctx);
+ if (tls_ctx_init_server (ctx))
+ {
+ /* Do not free ctx yet, in case we have pending rx events */
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
+ tls_disconnect_transport (ctx);
+ }
+
+ if (ts->session_state < SESSION_STATE_READY)
+ ts->session_state = SESSION_STATE_READY;
+
+ return 0;
}
int
-tls_app_rx_callback (session_t * tls_session)
+tls_app_rx_callback (session_t *ts)
{
tls_ctx_t *ctx;
/* DTLS session migrating, wait for next notification */
- if (PREDICT_FALSE (tls_session->flags & SESSION_F_IS_MIGRATING))
+ if (PREDICT_FALSE (ts->flags & SESSION_F_IS_MIGRATING))
return 0;
- ctx = tls_ctx_get (tls_session->opaque);
- tls_ctx_read (ctx, tls_session);
+ /* Read rescheduled but underlying transport deleted now */
+ if (PREDICT_FALSE ((ts->session_state == SESSION_STATE_TRANSPORT_DELETED)))
+ return 0;
+
+ ctx = tls_ctx_get (ts->opaque);
+ if (PREDICT_FALSE ((ctx->flags & TLS_CONN_F_NO_APP_SESSION) ||
+ (ctx->flags & TLS_CONN_F_APP_CLOSED)))
+ {
+ TLS_DBG (1, "Local App closed");
+ return 0;
+ }
+ tls_ctx_read (ctx, ts);
return 0;
}
@@ -532,9 +562,7 @@ int
tls_session_connected_cb (u32 tls_app_index, u32 ho_ctx_index,
session_t *tls_session, session_error_t err)
{
- session_t *app_session;
tls_ctx_t *ho_ctx, *ctx;
- session_type_t st;
u32 ctx_handle;
ho_ctx = tls_ctx_half_open_get (ho_ctx_index);
@@ -542,8 +570,9 @@ tls_session_connected_cb (u32 tls_app_index, u32 ho_ctx_index,
ctx_handle = tls_ctx_alloc (ho_ctx->tls_ctx_engine);
ctx = tls_ctx_get (ctx_handle);
clib_memcpy_fast (ctx, ho_ctx, sizeof (*ctx));
+
/* Half-open freed on tcp half-open cleanup notification */
- tls_ctx_half_open_reader_unlock ();
+ __atomic_fetch_or (&ho_ctx->flags, TLS_CONN_F_HO_DONE, __ATOMIC_RELEASE);
ctx->c_thread_index = vlib_get_thread_index ();
ctx->tls_ctx_handle = ctx_handle;
@@ -555,18 +584,17 @@ tls_session_connected_cb (u32 tls_app_index, u32 ho_ctx_index,
ctx->tls_session_handle = session_handle (tls_session);
tls_session->opaque = ctx_handle;
- tls_session->session_state = SESSION_STATE_READY;
- /* Preallocate app session. Avoids allocating a session post handshake
- * on tls_session rx and potentially invalidating the session pool */
- app_session = session_alloc (ctx->c_thread_index);
- app_session->session_state = SESSION_STATE_CREATED;
- ctx->c_s_index = app_session->session_index;
- st = session_type_from_proto_and_ip (TRANSPORT_PROTO_TLS, ctx->tcp_is_ip4);
- app_session->session_type = st;
- app_session->connection_index = ctx->tls_ctx_handle;
+ if (tls_ctx_init_client (ctx))
+ {
+ tls_notify_app_connected (ctx, SESSION_E_TLS_HANDSHAKE);
+ tls_disconnect_transport (ctx);
+ }
- return tls_ctx_init_client (ctx);
+ if (tls_session->session_state < SESSION_STATE_READY)
+ tls_session->session_state = SESSION_STATE_READY;
+
+ return 0;
}
int
@@ -598,13 +626,13 @@ tls_session_connected_callback (u32 tls_app_index, u32 ho_ctx_index,
u32 api_context;
ho_ctx = tls_ctx_half_open_get (ho_ctx_index);
+ ho_ctx->flags |= TLS_CONN_F_HO_DONE;
app_wrk = app_worker_get_if_valid (ho_ctx->parent_app_wrk_index);
if (app_wrk)
{
api_context = ho_ctx->parent_app_api_context;
app_worker_connect_notify (app_wrk, 0, err, api_context);
}
- tls_ctx_half_open_reader_unlock ();
return 0;
}
@@ -631,7 +659,7 @@ tls_app_session_cleanup (session_t * s, session_cleanup_ntf_t ntf)
}
ctx = tls_ctx_get (s->opaque);
- if (!ctx->no_app_session)
+ if (!(ctx->flags & TLS_CONN_F_NO_APP_SESSION))
session_transport_delete_notify (&ctx->connection);
tls_ctx_free (ctx);
}
@@ -657,7 +685,7 @@ dtls_migrate_ctx (void *arg)
/* Probably the app detached while the session was migrating. Cleanup */
if (session_half_open_migrated_notify (&ctx->connection))
{
- ctx->no_app_session = 1;
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
tls_disconnect (ctx->tls_ctx_handle, vlib_get_thread_index ());
return;
}
@@ -676,7 +704,7 @@ dtls_session_migrate_callback (session_t *us, session_handle_t new_sh)
ctx = tls_ctx_get_w_thread (us->opaque, us->thread_index);
ctx->tls_session_handle = new_sh;
cloned_ctx = tls_ctx_detach (ctx);
- ctx->is_migrated = 1;
+ ctx->flags |= TLS_CONN_F_MIGRATED;
session_half_open_migrate_notify (&ctx->connection);
session_send_rpc_evt_to_thread (new_thread, dtls_migrate_ctx,
@@ -685,11 +713,22 @@ dtls_session_migrate_callback (session_t *us, session_handle_t new_sh)
tls_ctx_free (ctx);
}
+static void
+tls_session_transport_closed_callback (session_t *ts)
+{
+ tls_ctx_t *ctx;
+
+ ctx = tls_ctx_get_w_thread (ts->opaque, ts->thread_index);
+ if (!(ctx->flags & TLS_CONN_F_NO_APP_SESSION))
+ session_transport_closed_notify (&ctx->connection);
+}
+
static session_cb_vft_t tls_app_cb_vft = {
.session_accept_callback = tls_session_accept_callback,
.session_disconnect_callback = tls_session_disconnect_callback,
.session_connected_callback = tls_session_connected_callback,
.session_reset_callback = tls_session_reset_callback,
+ .session_transport_closed_callback = tls_session_transport_closed_callback,
.half_open_cleanup_callback = tls_session_cleanup_ho,
.add_segment_callback = tls_add_segment_callback,
.del_segment_callback = tls_del_segment_callback,
@@ -742,7 +781,6 @@ tls_connect (transport_endpoint_cfg_t * tep)
ctx->srv_hostname = format (0, "%s", ccfg->hostname);
vec_terminate_c_string (ctx->srv_hostname);
}
- tls_ctx_half_open_reader_unlock ();
ctx->tls_ctx_engine = engine_type;
@@ -752,7 +790,10 @@ tls_connect (transport_endpoint_cfg_t * tep)
cargs->api_context = ctx_index;
cargs->sep_ext.ns_index = app->ns_index;
if ((rv = vnet_connect (cargs)))
- return rv;
+ {
+ tls_ctx_half_open_free (ctx_index);
+ return rv;
+ }
/* Track half-open tcp session in case we need to clean it up */
ctx->tls_session_handle = cargs->sh;
@@ -769,11 +810,12 @@ tls_disconnect (u32 ctx_handle, u32 thread_index)
TLS_DBG (1, "Disconnecting %x", ctx_handle);
ctx = tls_ctx_get (ctx_handle);
+ ctx->flags |= TLS_CONN_F_APP_CLOSED;
tls_ctx_app_close (ctx);
}
u32
-tls_start_listen (u32 app_listener_index, transport_endpoint_t * tep)
+tls_start_listen (u32 app_listener_index, transport_endpoint_cfg_t *tep)
{
vnet_listen_args_t _bargs, *args = &_bargs;
transport_endpt_crypto_cfg_t *ccfg;
@@ -834,6 +876,8 @@ tls_start_listen (u32 app_listener_index, transport_endpoint_t * tep)
lctx->tls_ctx_engine = engine_type;
lctx->tls_type = sep->transport_proto;
lctx->ckpair_index = ccfg->ckpair_index;
+ lctx->c_s_index = app_listener_index;
+ lctx->c_flags |= TRANSPORT_CONNECTION_F_NO_LOOKUP;
if (tls_vfts[engine_type].ctx_start_listen (lctx))
{
@@ -910,40 +954,53 @@ tls_listener_get (u32 listener_index)
static transport_connection_t *
tls_half_open_get (u32 ho_index)
{
- tls_main_t *tm = &tls_main;
tls_ctx_t *ctx;
ctx = tls_ctx_half_open_get (ho_index);
- clib_rwlock_reader_unlock (&tm->half_open_rwlock);
return &ctx->connection;
}
static void
tls_cleanup_ho (u32 ho_index)
{
- tls_main_t *tm = &tls_main;
- session_handle_t tcp_sh;
tls_ctx_t *ctx;
+ session_t *s;
ctx = tls_ctx_half_open_get (ho_index);
- tcp_sh = ctx->tls_session_handle;
- clib_rwlock_reader_unlock (&tm->half_open_rwlock);
- session_cleanup_half_open (tcp_sh);
- tls_ctx_half_open_free (ho_index);
+ /* Already pending cleanup */
+ if (ctx->tls_session_handle == SESSION_INVALID_HANDLE)
+ {
+ ASSERT (ctx->flags & TLS_CONN_F_HO_DONE);
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
+ return;
+ }
+
+ s = session_get_from_handle (ctx->tls_session_handle);
+ /* If no pending cleanup notification, force cleanup now. Otherwise,
+ * wait for cleanup notification and set no app session on ctx */
+ if (s->session_state != SESSION_STATE_TRANSPORT_DELETED)
+ {
+ session_cleanup_half_open (ctx->tls_session_handle);
+ tls_ctx_half_open_free (ho_index);
+ }
+ else
+ ctx->flags |= TLS_CONN_F_NO_APP_SESSION;
}
int
tls_custom_tx_callback (void *session, transport_send_params_t * sp)
{
- session_t *app_session = (session_t *) session;
+ session_t *as = (session_t *) session;
tls_ctx_t *ctx;
- if (PREDICT_FALSE (app_session->session_state
- >= SESSION_STATE_TRANSPORT_CLOSED))
- return 0;
+ if (PREDICT_FALSE (as->session_state >= SESSION_STATE_TRANSPORT_CLOSED ||
+ as->session_state <= SESSION_STATE_ACCEPTING))
+ {
+ sp->flags |= TRANSPORT_SND_F_DESCHED;
+ return 0;
+ }
- sp->flags = 0;
- ctx = tls_ctx_get (app_session->connection_index);
- return tls_ctx_write (ctx, app_session, sp);
+ ctx = tls_ctx_get (as->connection_index);
+ return tls_ctx_write (ctx, as, sp);
}
u8 *
@@ -1054,6 +1111,7 @@ format_tls_half_open (u8 * s, va_list * args)
{
u32 ho_index = va_arg (*args, u32);
u32 __clib_unused thread_index = va_arg (*args, u32);
+ u32 __clib_unused verbose = va_arg (*args, u32);
session_t *tcp_ho;
tls_ctx_t *ho_ctx;
@@ -1065,7 +1123,6 @@ format_tls_half_open (u8 * s, va_list * args)
ho_ctx->parent_app_wrk_index, ho_ctx->tls_ctx_engine,
tcp_ho->thread_index, tcp_ho->session_index);
- tls_ctx_half_open_reader_unlock ();
return s;
}
@@ -1074,10 +1131,11 @@ tls_transport_endpoint_get (u32 ctx_handle, u32 thread_index,
transport_endpoint_t * tep, u8 is_lcl)
{
tls_ctx_t *ctx = tls_ctx_get_w_thread (ctx_handle, thread_index);
- session_t *tcp_session;
+ session_t *ts;
- tcp_session = session_get_from_handle (ctx->tls_session_handle);
- session_get_endpoint (tcp_session, tep, is_lcl);
+ ts = session_get_from_handle (ctx->tls_session_handle);
+ if (ts && ts->session_state < SESSION_STATE_TRANSPORT_DELETED)
+ session_get_endpoint (ts, tep, is_lcl);
}
static void
@@ -1096,12 +1154,11 @@ tls_transport_listener_endpoint_get (u32 ctx_handle,
static clib_error_t *
tls_enable (vlib_main_t * vm, u8 is_en)
{
- u32 add_segment_size = 256 << 20, first_seg_size = 32 << 20;
vnet_app_detach_args_t _da, *da = &_da;
vnet_app_attach_args_t _a, *a = &_a;
u64 options[APP_OPTIONS_N_OPTIONS];
tls_main_t *tm = &tls_main;
- u32 fifo_size = 128 << 12;
+ u32 fifo_size = 512 << 10;
if (!is_en)
{
@@ -1111,7 +1168,6 @@ tls_enable (vlib_main_t * vm, u8 is_en)
return 0;
}
- first_seg_size = tm->first_seg_size ? tm->first_seg_size : first_seg_size;
fifo_size = tm->fifo_size ? tm->fifo_size : fifo_size;
clib_memset (a, 0, sizeof (*a));
@@ -1121,8 +1177,8 @@ tls_enable (vlib_main_t * vm, u8 is_en)
a->api_client_index = APP_INVALID_INDEX;
a->options = options;
a->name = format (0, "tls");
- a->options[APP_OPTIONS_SEGMENT_SIZE] = first_seg_size;
- a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = add_segment_size;
+ a->options[APP_OPTIONS_SEGMENT_SIZE] = tm->first_seg_size;
+ a->options[APP_OPTIONS_ADD_SEGMENT_SIZE] = tm->add_seg_size;
a->options[APP_OPTIONS_RX_FIFO_SIZE] = fifo_size;
a->options[APP_OPTIONS_TX_FIFO_SIZE] = fifo_size;
a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
@@ -1311,11 +1367,12 @@ tls_init (vlib_main_t * vm)
if (!tm->ca_cert_path)
tm->ca_cert_path = TLS_CA_CERT_PATH;
- clib_rwlock_init (&tm->half_open_rwlock);
-
vec_validate (tm->rx_bufs, num_threads - 1);
vec_validate (tm->tx_bufs, num_threads - 1);
+ tm->first_seg_size = 32 << 20;
+ tm->add_seg_size = 256 << 20;
+
transport_register_protocol (TRANSPORT_PROTO_TLS, &tls_proto,
FIB_PROTOCOL_IP4, ~0);
transport_register_protocol (TRANSPORT_PROTO_TLS, &tls_proto,
@@ -1344,6 +1401,9 @@ tls_config_fn (vlib_main_t * vm, unformat_input_t * input)
else if (unformat (input, "first-segment-size %U", unformat_memory_size,
&tm->first_seg_size))
;
+ else if (unformat (input, "add-segment-size %U", unformat_memory_size,
+ &tm->add_seg_size))
+ ;
else if (unformat (input, "fifo-size %U", unformat_memory_size, &tmp))
{
if (tmp >= 0x100000000ULL)
@@ -1360,7 +1420,7 @@ tls_config_fn (vlib_main_t * vm, unformat_input_t * input)
return 0;
}
-VLIB_EARLY_CONFIG_FUNCTION (tls_config_fn, "tls");
+VLIB_CONFIG_FUNCTION (tls_config_fn, "tls");
tls_main_t *
vnet_tls_get_main (void)
diff --git a/src/vnet/tls/tls.h b/src/vnet/tls/tls.h
index eba70c0a8bb..6bd1371b984 100644
--- a/src/vnet/tls/tls.h
+++ b/src/vnet/tls/tls.h
@@ -36,26 +36,48 @@
#define TLS_DBG(_lvl, _fmt, _args...)
#endif
-/* *INDENT-OFF* */
typedef struct tls_cxt_id_
{
- union {
- session_handle_t app_session_handle;
- u32 parent_app_api_ctx;
- };
+ session_handle_t app_session_handle;
session_handle_t tls_session_handle;
void *migrate_ctx;
u32 parent_app_wrk_index;
u32 ssl_ctx;
- u32 listener_ctx_index;
+ union
+ {
+ u32 listener_ctx_index;
+ u32 parent_app_api_ctx;
+ };
u8 tcp_is_ip4;
u8 tls_engine_id;
} tls_ctx_id_t;
-/* *INDENT-ON* */
STATIC_ASSERT (sizeof (tls_ctx_id_t) <= TRANSPORT_CONN_ID_LEN,
"ctx id must be less than TRANSPORT_CONN_ID_LEN");
+#define foreach_tls_conn_flags \
+ _ (HO_DONE, "ho-done") \
+ _ (PASSIVE_CLOSE, "passive-close") \
+ _ (APP_CLOSED, "app-closed") \
+ _ (MIGRATED, "migrated") \
+ _ (NO_APP_SESSION, "no-app-session") \
+ _ (RESUME, "resume") \
+ _ (HS_DONE, "handshake-done")
+
+typedef enum tls_conn_flags_bit_
+{
+#define _(sym, str) TLS_CONN_F_BIT_##sym,
+ foreach_tls_conn_flags
+#undef _
+} tls_conn_flags_bit_t;
+
+typedef enum tls_conn_flags_
+{
+#define _(sym, str) TLS_CONN_F_##sym = 1 << TLS_CONN_F_BIT_##sym,
+ foreach_tls_conn_flags
+#undef _
+} __clib_packed tls_conn_flags_t;
+
typedef struct tls_ctx_
{
union
@@ -76,11 +98,7 @@ typedef struct tls_ctx_
#define parent_app_api_context c_tls_ctx_id.parent_app_api_ctx
#define migration_ctx c_tls_ctx_id.migrate_ctx
- u8 is_passive_close;
- u8 resume;
- u8 app_closed;
- u8 no_app_session;
- u8 is_migrated;
+ tls_conn_flags_t flags;
u8 *srv_hostname;
u32 evt_index;
u32 ckpair_index;
@@ -92,7 +110,8 @@ typedef struct tls_main_
u32 app_index;
tls_ctx_t *listener_ctx_pool;
tls_ctx_t *half_open_ctx_pool;
- clib_rwlock_t half_open_rwlock;
+ u32 *postponed_ho_free;
+ u32 *ho_free_list;
u8 **rx_bufs;
u8 **tx_bufs;
@@ -102,6 +121,7 @@ typedef struct tls_main_
u8 use_test_cert_in_ca;
char *ca_cert_path;
u64 first_seg_size;
+ u64 add_seg_size;
u32 fifo_size;
} tls_main_t;
@@ -123,7 +143,9 @@ typedef struct tls_engine_vft_
int (*ctx_start_listen) (tls_ctx_t * ctx);
int (*ctx_stop_listen) (tls_ctx_t * ctx);
int (*ctx_transport_close) (tls_ctx_t * ctx);
+ int (*ctx_transport_reset) (tls_ctx_t *ctx);
int (*ctx_app_close) (tls_ctx_t * ctx);
+ int (*ctx_reinit_cachain) (void);
} tls_engine_vft_t;
tls_main_t *vnet_tls_get_main (void);
@@ -136,7 +158,13 @@ int tls_add_vpp_q_builtin_rx_evt (session_t * s);
int tls_notify_app_accept (tls_ctx_t * ctx);
int tls_notify_app_connected (tls_ctx_t * ctx, session_error_t err);
void tls_notify_app_enqueue (tls_ctx_t * ctx, session_t * app_session);
+void tls_notify_app_io_error (tls_ctx_t *ctx);
void tls_disconnect_transport (tls_ctx_t * ctx);
+int tls_reinit_ca_chain (crypto_engine_type_t tls_engine_id);
+
+void tls_add_postponed_ho_cleanups (u32 ho_index);
+void tls_flush_postponed_ho_cleanups ();
+
#endif /* SRC_VNET_TLS_TLS_H_ */
/*
diff --git a/src/vnet/tunnel/tunnel.c b/src/vnet/tunnel/tunnel.c
index d45a46205d8..0d27ad82538 100644
--- a/src/vnet/tunnel/tunnel.c
+++ b/src/vnet/tunnel/tunnel.c
@@ -66,16 +66,19 @@ unformat_tunnel_mode (unformat_input_t * input, va_list * args)
u8 *
format_tunnel_encap_decap_flags (u8 * s, va_list * args)
{
- tunnel_encap_decap_flags_t f = va_arg (*args, int);
+ tunnel_encap_decap_flags_t f = va_arg (*args, u32);
if (f == TUNNEL_ENCAP_DECAP_FLAG_NONE)
s = format (s, "none");
-
+ else
+ {
#define _(a, b, c) \
- else if (f & TUNNEL_ENCAP_DECAP_FLAG_##a) s = format (s, "%s ", b);
- foreach_tunnel_encap_decap_flag
+ if (f & TUNNEL_ENCAP_DECAP_FLAG_##a) \
+ s = format (s, "%s ", b);
+ foreach_tunnel_encap_decap_flag
#undef _
- return (s);
+ }
+ return (s);
}
uword
@@ -95,15 +98,19 @@ unformat_tunnel_encap_decap_flags (unformat_input_t * input, va_list * args)
u8 *
format_tunnel_flags (u8 *s, va_list *args)
{
- tunnel_flags_t f = va_arg (*args, int);
+ tunnel_flags_t f = va_arg (*args, u32);
if (f == TUNNEL_FLAG_NONE)
s = format (s, "none");
-
-#define _(a, b, c) else if (f & TUNNEL_FLAG_##a) s = format (s, "%s ", c);
- foreach_tunnel_flag
+ else
+ {
+#define _(a, b, c) \
+ if (f & TUNNEL_FLAG_##a) \
+ s = format (s, "%s ", c);
+ foreach_tunnel_flag
#undef _
- return (s);
+ }
+ return (s);
}
uword
diff --git a/src/vnet/tunnel/tunnel_types_api.c b/src/vnet/tunnel/tunnel_types_api.c
index 894eecb8407..247c13cd416 100644
--- a/src/vnet/tunnel/tunnel_types_api.c
+++ b/src/vnet/tunnel/tunnel_types_api.c
@@ -60,9 +60,14 @@ tunnel_flags_decode (vl_api_tunnel_flags_t f, tunnel_flags_t *o)
}
vl_api_tunnel_flags_t
-tunnel_flags_encode (tunnel_flags_t f)
+tunnel_flags_encode (tunnel_flags_t in)
{
- return ((vl_api_tunnel_flags_t) f);
+ vl_api_tunnel_flags_t out = 0;
+
+ if (in & TUNNEL_FLAG_TRACK_MTU)
+ out |= TUNNEL_API_FLAG_TRACK_MTU;
+
+ return (out);
}
int
diff --git a/src/vnet/udp/udp.api b/src/vnet/udp/udp.api
index 02176be7c2b..6b468be461a 100644
--- a/src/vnet/udp/udp.api
+++ b/src/vnet/udp/udp.api
@@ -32,7 +32,7 @@ import "vnet/ip/ip_types.api";
* @param dst_ip - Encap destination address
* @param src_ip - Encap source address
* @param dst_port - Encap destination port
- * @param src_port - Encap source port
+ * @param src_port - Encap source port, 0 for entopy per rfc7510
* @param id - VPP assigned id; ignored in add message, set in dump
*/
typedef udp_encap
diff --git a/src/vnet/udp/udp.c b/src/vnet/udp/udp.c
index 40e0053bb96..b3c02510232 100644
--- a/src/vnet/udp/udp.c
+++ b/src/vnet/udp/udp.c
@@ -23,97 +23,63 @@
udp_main_t udp_main;
static void
-udp_connection_register_port (vlib_main_t * vm, u16 lcl_port, u8 is_ip4)
+udp_connection_register_port (u16 lcl_port, u8 is_ip4)
{
udp_main_t *um = &udp_main;
- udp_dst_port_info_t *pi;
u16 *n;
- pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- if (!pi)
- {
- udp_add_dst_port (um, lcl_port, 0, is_ip4);
- pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- pi->n_connections = 1;
- }
- else
- {
- pi->n_connections += 1;
- /* Do not return. The fact that the pi is valid does not mean
- * it's up to date */
- }
-
- pi->node_index = is_ip4 ? udp4_input_node.index : udp6_input_node.index;
- pi->next_index = um->local_to_input_edge[is_ip4];
+ /* Setup udp protocol -> next index sparse vector mapping. Do not setup
+ * udp_dst_port_info_t as that is used to distinguish between external
+ * and transport consumed ports */
- /* Setup udp protocol -> next index sparse vector mapping. */
if (is_ip4)
- n = sparse_vec_validate (um->next_by_dst_port4,
- clib_host_to_net_u16 (lcl_port));
+ n = sparse_vec_validate (um->next_by_dst_port4, lcl_port);
else
- n = sparse_vec_validate (um->next_by_dst_port6,
- clib_host_to_net_u16 (lcl_port));
+ n = sparse_vec_validate (um->next_by_dst_port6, lcl_port);
- n[0] = pi->next_index;
+ n[0] = um->local_to_input_edge[is_ip4];
+
+ __atomic_add_fetch (&um->transport_ports_refcnt[is_ip4][lcl_port], 1,
+ __ATOMIC_RELAXED);
+}
+
+void
+udp_connection_share_port (u16 lcl_port, u8 is_ip4)
+{
+ udp_main_t *um = &udp_main;
+ __atomic_add_fetch (&um->transport_ports_refcnt[is_ip4][lcl_port], 1,
+ __ATOMIC_RELAXED);
}
static void
udp_connection_unregister_port (u16 lcl_port, u8 is_ip4)
{
udp_main_t *um = &udp_main;
- udp_dst_port_info_t *pi;
+ u16 *n;
- pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- if (!pi)
+ /* Needed because listeners are not tracked as local endpoints */
+ if (__atomic_sub_fetch (&um->transport_ports_refcnt[is_ip4][lcl_port], 1,
+ __ATOMIC_RELAXED))
return;
- if (!pi->n_connections)
- {
- clib_warning ("no connections using port %u", lcl_port);
- return;
- }
-
- if (!clib_atomic_sub_fetch (&pi->n_connections, 1))
- udp_unregister_dst_port (0, lcl_port, is_ip4);
-}
-
-void
-udp_connection_share_port (u16 lcl_port, u8 is_ip4)
-{
- udp_main_t *um = &udp_main;
- udp_dst_port_info_t *pi;
+ if (is_ip4)
+ n = sparse_vec_validate (um->next_by_dst_port4, lcl_port);
+ else
+ n = sparse_vec_validate (um->next_by_dst_port6, lcl_port);
- /* Done without a lock but the operation is atomic. Writers to pi hash
- * table and vector should be guarded by a barrier sync */
- pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- clib_atomic_fetch_add_rel (&pi->n_connections, 1);
+ n[0] = UDP_NO_NODE_SET;
}
udp_connection_t *
udp_connection_alloc (u32 thread_index)
{
- udp_main_t *um = &udp_main;
+ udp_worker_t *wrk = udp_worker_get (thread_index);
udp_connection_t *uc;
- u32 will_expand = 0;
- pool_get_aligned_will_expand (um->connections[thread_index], will_expand,
- CLIB_CACHE_LINE_BYTES);
- if (PREDICT_FALSE (will_expand))
- {
- clib_spinlock_lock_if_init (&udp_main.peekers_write_locks
- [thread_index]);
- pool_get_aligned (udp_main.connections[thread_index], uc,
- CLIB_CACHE_LINE_BYTES);
- clib_spinlock_unlock_if_init (&udp_main.peekers_write_locks
- [thread_index]);
- }
- else
- {
- pool_get_aligned (um->connections[thread_index], uc,
- CLIB_CACHE_LINE_BYTES);
- }
+ pool_get_aligned_safe (wrk->connections, uc, CLIB_CACHE_LINE_BYTES);
+
clib_memset (uc, 0, sizeof (*uc));
- uc->c_c_index = uc - um->connections[thread_index];
+ uc->c_c_index = uc - wrk->connections;
uc->c_thread_index = thread_index;
uc->c_proto = TRANSPORT_PROTO_UDP;
return uc;
@@ -122,20 +88,20 @@ udp_connection_alloc (u32 thread_index)
void
udp_connection_free (udp_connection_t * uc)
{
- u32 thread_index = uc->c_thread_index;
+ udp_worker_t *wrk = udp_worker_get (uc->c_thread_index);
+
clib_spinlock_free (&uc->rx_lock);
if (CLIB_DEBUG)
clib_memset (uc, 0xFA, sizeof (*uc));
- pool_put (udp_main.connections[thread_index], uc);
+ pool_put (wrk->connections, uc);
}
static void
udp_connection_cleanup (udp_connection_t * uc)
{
- transport_endpoint_cleanup (TRANSPORT_PROTO_UDP, &uc->c_lcl_ip,
- uc->c_lcl_port);
- udp_connection_unregister_port (clib_net_to_host_u16 (uc->c_lcl_port),
- uc->c_is_ip4);
+ transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &uc->c_lcl_ip,
+ uc->c_lcl_port);
+ udp_connection_unregister_port (uc->c_lcl_port, uc->c_is_ip4);
udp_connection_free (uc);
}
@@ -146,6 +112,38 @@ udp_connection_delete (udp_connection_t * uc)
udp_connection_cleanup (uc);
}
+static void
+udp_handle_cleanups (void *args)
+{
+ u32 thread_index = (u32) pointer_to_uword (args);
+ udp_connection_t *uc;
+ udp_worker_t *wrk;
+ u32 *uc_index;
+
+ wrk = udp_worker_get (thread_index);
+ vec_foreach (uc_index, wrk->pending_cleanups)
+ {
+ uc = udp_connection_get (*uc_index, thread_index);
+ udp_connection_delete (uc);
+ }
+ vec_reset_length (wrk->pending_cleanups);
+}
+
+static void
+udp_connection_program_cleanup (udp_connection_t *uc)
+{
+ uword thread_index = uc->c_thread_index;
+ udp_worker_t *wrk;
+
+ wrk = udp_worker_get (uc->c_thread_index);
+ vec_add1 (wrk->pending_cleanups, uc->c_c_index);
+
+ if (vec_len (wrk->pending_cleanups) == 1)
+ session_send_rpc_evt_to_thread_force (
+ thread_index, udp_handle_cleanups,
+ uword_to_pointer (thread_index, void *));
+}
+
static u8
udp_connection_port_used_extern (u16 lcl_port, u8 is_ip4)
{
@@ -153,8 +151,7 @@ udp_connection_port_used_extern (u16 lcl_port, u8 is_ip4)
udp_dst_port_info_t *pi;
pi = udp_get_dst_port_info (um, lcl_port, is_ip4);
- return (pi && !pi->n_connections
- && udp_is_valid_dst_port (lcl_port, is_ip4));
+ return (pi && udp_is_valid_dst_port (lcl_port, is_ip4));
}
static u16
@@ -165,18 +162,15 @@ udp_default_mtu (udp_main_t * um, u8 is_ip4)
}
static u32
-udp_session_bind (u32 session_index, transport_endpoint_t * lcl)
+udp_session_bind (u32 session_index, transport_endpoint_cfg_t *lcl)
{
udp_main_t *um = vnet_get_udp_main ();
- vlib_main_t *vm = vlib_get_main ();
transport_endpoint_cfg_t *lcl_ext;
udp_connection_t *listener;
- u16 lcl_port_ho;
void *iface_ip;
- lcl_port_ho = clib_net_to_host_u16 (lcl->port);
-
- if (udp_connection_port_used_extern (lcl_port_ho, lcl->is_ip4))
+ if (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl->port),
+ lcl->is_ip4))
{
clib_warning ("port already used");
return SESSION_E_PORTINUSE;
@@ -200,7 +194,8 @@ udp_session_bind (u32 session_index, transport_endpoint_t * lcl)
listener->c_proto = TRANSPORT_PROTO_UDP;
listener->c_s_index = session_index;
listener->c_fib_index = lcl->fib_index;
- listener->mss = udp_default_mtu (um, listener->c_is_ip4);
+ listener->mss =
+ lcl->mss ? lcl->mss : udp_default_mtu (um, listener->c_is_ip4);
listener->flags |= UDP_CONN_F_OWNS_PORT | UDP_CONN_F_LISTEN;
lcl_ext = (transport_endpoint_cfg_t *) lcl;
if (lcl_ext->transport_flags & TRANSPORT_CFG_F_CONNECTED)
@@ -208,8 +203,10 @@ udp_session_bind (u32 session_index, transport_endpoint_t * lcl)
else
listener->c_flags |= TRANSPORT_CONNECTION_F_CLESS;
clib_spinlock_init (&listener->rx_lock);
+ if (!um->csum_offload)
+ listener->cfg_flags |= UDP_CFG_F_NO_CSUM_OFFLOAD;
- udp_connection_register_port (vm, lcl_port_ho, lcl->is_ip4);
+ udp_connection_register_port (listener->c_lcl_port, lcl->is_ip4);
return listener->c_c_index;
}
@@ -220,8 +217,7 @@ udp_session_unbind (u32 listener_index)
udp_connection_t *listener;
listener = udp_listener_get (listener_index);
- udp_connection_unregister_port (clib_net_to_host_u16 (listener->c_lcl_port),
- listener->c_is_ip4);
+ udp_connection_unregister_port (listener->c_lcl_port, listener->c_is_ip4);
clib_spinlock_free (&listener->rx_lock);
pool_put (um->listener_pool, listener);
return 0;
@@ -236,30 +232,100 @@ udp_session_get_listener (u32 listener_index)
return &us->connection;
}
+always_inline u32
+udp_push_one_header (vlib_main_t *vm, udp_connection_t *uc, vlib_buffer_t *b,
+ u8 is_cless)
+{
+ b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ /* reuse tcp medatada for now */
+ vnet_buffer (b)->tcp.connection_index = uc->c_c_index;
+
+ if (!is_cless)
+ {
+ vlib_buffer_push_udp (b, uc->c_lcl_port, uc->c_rmt_port,
+ udp_csum_offload (uc));
+
+ if (uc->c_is_ip4)
+ vlib_buffer_push_ip4_custom (vm, b, &uc->c_lcl_ip4, &uc->c_rmt_ip4,
+ IP_PROTOCOL_UDP, udp_csum_offload (uc),
+ 0 /* is_df */, uc->c_dscp);
+ else
+ vlib_buffer_push_ip6 (vm, b, &uc->c_lcl_ip6, &uc->c_rmt_ip6,
+ IP_PROTOCOL_UDP);
+
+ vnet_buffer (b)->tcp.flags = 0;
+ }
+ else
+ {
+ u8 *data = vlib_buffer_get_current (b);
+ session_dgram_hdr_t hdr;
+
+ hdr = *(session_dgram_hdr_t *) (data - sizeof (hdr));
+
+ /* Local port assumed to be bound, not overwriting it */
+ vlib_buffer_push_udp (b, uc->c_lcl_port, hdr.rmt_port,
+ udp_csum_offload (uc));
+
+ if (uc->c_is_ip4)
+ vlib_buffer_push_ip4_custom (vm, b, &hdr.lcl_ip.ip4, &hdr.rmt_ip.ip4,
+ IP_PROTOCOL_UDP, udp_csum_offload (uc),
+ 0 /* is_df */, uc->c_dscp);
+ else
+ vlib_buffer_push_ip6 (vm, b, &hdr.lcl_ip.ip6, &hdr.rmt_ip.ip6,
+ IP_PROTOCOL_UDP);
+
+ /* Not connected udp session. Mark buffer for custom handling in
+ * udp_output */
+ vnet_buffer (b)->tcp.flags |= UDP_CONN_F_LISTEN;
+ }
+
+ return 0;
+}
+
+always_inline void
+udp_push_header_batch (udp_connection_t *uc, vlib_buffer_t **bs, u32 n_bufs,
+ u8 is_cless)
+{
+ vlib_main_t *vm = vlib_get_main ();
+
+ while (n_bufs >= 4)
+ {
+ vlib_prefetch_buffer_header (bs[2], STORE);
+ vlib_prefetch_buffer_header (bs[3], STORE);
+
+ udp_push_one_header (vm, uc, bs[0], is_cless);
+ udp_push_one_header (vm, uc, bs[1], is_cless);
+
+ n_bufs -= 2;
+ bs += 2;
+ }
+ while (n_bufs)
+ {
+ if (n_bufs > 1)
+ vlib_prefetch_buffer_header (bs[1], STORE);
+
+ udp_push_one_header (vm, uc, bs[0], is_cless);
+
+ n_bufs -= 1;
+ bs += 1;
+ }
+}
+
static u32
-udp_push_header (transport_connection_t * tc, vlib_buffer_t * b)
+udp_push_header (transport_connection_t *tc, vlib_buffer_t **bs, u32 n_bufs)
{
udp_connection_t *uc;
- vlib_main_t *vm = vlib_get_main ();
uc = udp_connection_from_transport (tc);
-
- vlib_buffer_push_udp (b, uc->c_lcl_port, uc->c_rmt_port, 1);
- if (tc->is_ip4)
- vlib_buffer_push_ip4_custom (vm, b, &uc->c_lcl_ip4, &uc->c_rmt_ip4,
- IP_PROTOCOL_UDP, 1 /* csum offload */ ,
- 0 /* is_df */ );
+ if (uc->flags & UDP_CONN_F_CONNECTED)
+ udp_push_header_batch (uc, bs, n_bufs, 0 /* is_cless */);
else
- vlib_buffer_push_ip6 (vm, b, &uc->c_lcl_ip6, &uc->c_rmt_ip6,
- IP_PROTOCOL_UDP);
- vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
- vnet_buffer (b)->sw_if_index[VLIB_TX] = uc->c_fib_index;
- b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+ udp_push_header_batch (uc, bs, n_bufs, 1 /* is_cless */);
if (PREDICT_FALSE (uc->flags & UDP_CONN_F_CLOSING))
{
- if (!transport_max_tx_dequeue (&uc->connection))
- udp_connection_delete (uc);
+ if (!transport_tx_fifo_has_dgram (&uc->connection))
+ udp_connection_program_cleanup (uc);
}
return 0;
@@ -281,11 +347,11 @@ udp_session_close (u32 connection_index, u32 thread_index)
udp_connection_t *uc;
uc = udp_connection_get (connection_index, thread_index);
- if (!uc)
+ if (!uc || (uc->flags & UDP_CONN_F_MIGRATED))
return;
- if (!transport_max_tx_dequeue (&uc->connection))
- udp_connection_delete (uc);
+ if (!transport_tx_fifo_has_dgram (&uc->connection))
+ udp_connection_program_cleanup (uc);
else
uc->flags |= UDP_CONN_F_CLOSING;
}
@@ -323,57 +389,42 @@ udp_session_send_params (transport_connection_t * tconn,
static int
udp_open_connection (transport_endpoint_cfg_t * rmt)
{
- vlib_main_t *vm = vlib_get_main ();
- u32 thread_index = vm->thread_index;
udp_main_t *um = &udp_main;
ip46_address_t lcl_addr;
udp_connection_t *uc;
+ u32 thread_index;
u16 lcl_port;
int rv;
rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_UDP, rmt, &lcl_addr,
&lcl_port);
if (rv)
- {
- if (rv != SESSION_E_PORTINUSE)
- return rv;
-
- if (udp_connection_port_used_extern (lcl_port, rmt->is_ip4))
- return SESSION_E_PORTINUSE;
-
- /* If port in use, check if 5-tuple is also in use */
- if (session_lookup_connection (rmt->fib_index, &lcl_addr, &rmt->ip,
- lcl_port, rmt->port, TRANSPORT_PROTO_UDP,
- rmt->is_ip4))
- return SESSION_E_PORTINUSE;
-
- /* 5-tuple is available so increase lcl endpoint refcount and proceed
- * with connection allocation */
- transport_share_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr,
- lcl_port);
- goto conn_alloc;
- }
+ return rv;
- if (udp_is_valid_dst_port (lcl_port, rmt->is_ip4))
+ if (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl_port),
+ rmt->is_ip4))
{
/* If specific source port was requested abort */
if (rmt->peer.port)
- return SESSION_E_PORTINUSE;
+ {
+ transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr,
+ lcl_port);
+ return SESSION_E_PORTINUSE;
+ }
/* Try to find a port that's not used */
- while (udp_is_valid_dst_port (lcl_port, rmt->is_ip4))
+ while (udp_connection_port_used_extern (clib_net_to_host_u16 (lcl_port),
+ rmt->is_ip4))
{
- lcl_port = transport_alloc_local_port (TRANSPORT_PROTO_UDP,
- &lcl_addr);
- if (lcl_port < 1)
+ transport_release_local_endpoint (TRANSPORT_PROTO_UDP, &lcl_addr,
+ lcl_port);
+ lcl_port =
+ transport_alloc_local_port (TRANSPORT_PROTO_UDP, &lcl_addr, rmt);
+ if ((int) lcl_port < 1)
return SESSION_E_PORTINUSE;
}
}
-conn_alloc:
-
- udp_connection_register_port (vm, lcl_port, rmt->is_ip4);
-
/* We don't poll main thread if we have workers */
thread_index = transport_cl_thread ();
@@ -381,11 +432,14 @@ conn_alloc:
ip_copy (&uc->c_rmt_ip, &rmt->ip, rmt->is_ip4);
ip_copy (&uc->c_lcl_ip, &lcl_addr, rmt->is_ip4);
uc->c_rmt_port = rmt->port;
- uc->c_lcl_port = clib_host_to_net_u16 (lcl_port);
+ uc->c_lcl_port = lcl_port;
uc->c_is_ip4 = rmt->is_ip4;
uc->c_proto = TRANSPORT_PROTO_UDP;
uc->c_fib_index = rmt->fib_index;
+ uc->c_dscp = rmt->dscp;
uc->mss = rmt->mss ? rmt->mss : udp_default_mtu (um, uc->c_is_ip4);
+ if (rmt->peer.sw_if_index != ENDPOINT_INVALID_INDEX)
+ uc->sw_if_index = rmt->peer.sw_if_index;
uc->flags |= UDP_CONN_F_OWNS_PORT;
if (rmt->transport_flags & TRANSPORT_CFG_F_CONNECTED)
{
@@ -396,6 +450,12 @@ conn_alloc:
clib_spinlock_init (&uc->rx_lock);
uc->c_flags |= TRANSPORT_CONNECTION_F_CLESS;
}
+ if (!um->csum_offload)
+ uc->cfg_flags |= UDP_CFG_F_NO_CSUM_OFFLOAD;
+ uc->next_node_index = rmt->next_node_index;
+ uc->next_node_opaque = rmt->next_node_opaque;
+
+ udp_connection_register_port (uc->c_lcl_port, rmt->is_ip4);
return uc->c_c_index;
}
@@ -445,8 +505,90 @@ format_udp_listener_session (u8 * s, va_list * args)
return format (s, "%U", format_udp_connection, uc, verbose);
}
-/* *INDENT-OFF* */
+static void
+udp_realloc_ports_sv (u16 **ports_nh_svp)
+{
+ u16 port, port_no, *ports_nh_sv, *mc;
+ u32 *ports = 0, *nh = 0, msum, i;
+ sparse_vec_header_t *h;
+ uword sv_index, *mb;
+
+ ports_nh_sv = *ports_nh_svp;
+
+ for (port = 1; port < 65535; port++)
+ {
+ port_no = clib_host_to_net_u16 (port);
+
+ sv_index = sparse_vec_index (ports_nh_sv, port_no);
+ if (sv_index != SPARSE_VEC_INVALID_INDEX)
+ {
+ vec_add1 (ports, port_no);
+ vec_add1 (nh, ports_nh_sv[sv_index]);
+ }
+ }
+
+ sparse_vec_free (ports_nh_sv);
+
+ ports_nh_sv =
+ sparse_vec_new (/* elt bytes */ sizeof (ports_nh_sv[0]),
+ /* bits in index */ BITS (((udp_header_t *) 0)->dst_port));
+
+ vec_resize (ports_nh_sv, 65535);
+
+ for (port = 1; port < 65535; port++)
+ ports_nh_sv[port] = UDP_NO_NODE_SET;
+
+ for (i = 0; i < vec_len (ports); i++)
+ ports_nh_sv[ports[i]] = nh[i];
+
+ h = sparse_vec_header (ports_nh_sv);
+ vec_foreach (mb, h->is_member_bitmap)
+ *mb = (uword) ~0;
+
+ msum = 0;
+ vec_foreach (mc, h->member_counts)
+ {
+ *mc = msum;
+ msum += msum == 0 ? 63 : 64;
+ }
+
+ vec_free (ports);
+ vec_free (nh);
+
+ *ports_nh_svp = ports_nh_sv;
+}
+
+static clib_error_t *
+udp_enable_disable (vlib_main_t *vm, u8 is_en)
+{
+ udp_main_t *um = &udp_main;
+
+ /* Not ideal. The sparse vector used to map ports to next nodes assumes
+ * only a few ports are ever used. When udp transport is enabled this does
+ * not hold and, to make matters worse, ports are consumed in a random
+ * order.
+ *
+ * This can lead to a lot of slow updates to internal data structures
+ * which in turn can slow udp connection allocations until all ports are
+ * eventually consumed.
+ *
+ * Consequently, reallocate sparse vector, preallocate all ports and have
+ * them point to UDP_NO_NODE_SET. We could consider switching the sparse
+ * vector to a preallocated vector but that would increase memory
+ * consumption for vpp deployments that do not rely on host stack.
+ */
+
+ udp_realloc_ports_sv (&um->next_by_dst_port4);
+ udp_realloc_ports_sv (&um->next_by_dst_port6);
+
+ vec_validate (um->transport_ports_refcnt[0], 65535);
+ vec_validate (um->transport_ports_refcnt[1], 65535);
+
+ return 0;
+}
+
static const transport_proto_vft_t udp_proto = {
+ .enable = udp_enable_disable,
.start_listen = udp_session_bind,
.connect = udp_open_connection,
.stop_listen = udp_session_unbind,
@@ -467,7 +609,6 @@ static const transport_proto_vft_t udp_proto = {
.service_type = TRANSPORT_SERVICE_CL,
},
};
-/* *INDENT-ON* */
static clib_error_t *
udp_init (vlib_main_t * vm)
@@ -477,7 +618,6 @@ udp_init (vlib_main_t * vm)
vlib_thread_main_t *tm = vlib_get_thread_main ();
u32 num_threads;
ip_protocol_info_t *pi;
- int i;
/*
* Registrations
@@ -490,28 +630,18 @@ udp_init (vlib_main_t * vm)
pi->format_header = format_udp_header;
pi->unformat_pg_edit = unformat_pg_udp_header;
- /* Register as transport with URI */
+ /* Register as transport with session layer */
transport_register_protocol (TRANSPORT_PROTO_UDP, &udp_proto,
- FIB_PROTOCOL_IP4, ip4_lookup_node.index);
+ FIB_PROTOCOL_IP4, udp4_output_node.index);
transport_register_protocol (TRANSPORT_PROTO_UDP, &udp_proto,
- FIB_PROTOCOL_IP6, ip6_lookup_node.index);
+ FIB_PROTOCOL_IP6, udp6_output_node.index);
/*
* Initialize data structures
*/
num_threads = 1 /* main thread */ + tm->n_threads;
- vec_validate (um->connections, num_threads - 1);
- vec_validate (um->connection_peekers, num_threads - 1);
- vec_validate (um->peekers_readers_locks, num_threads - 1);
- vec_validate (um->peekers_write_locks, num_threads - 1);
-
- if (num_threads > 1)
- for (i = 0; i < num_threads; i++)
- {
- clib_spinlock_init (&um->peekers_readers_locks[i]);
- clib_spinlock_init (&um->peekers_write_locks[i]);
- }
+ vec_validate (um->wrk, num_threads - 1);
um->local_to_input_edge[UDP_IP4] =
vlib_node_add_next (vm, udp4_local_node.index, udp4_input_node.index);
@@ -519,16 +649,15 @@ udp_init (vlib_main_t * vm)
vlib_node_add_next (vm, udp6_local_node.index, udp6_input_node.index);
um->default_mtu = 1500;
+ um->csum_offload = 1;
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (udp_init) =
{
.runs_after = VLIB_INITS("ip_main_init", "ip4_lookup_init",
"ip6_lookup_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/udp/udp.h b/src/vnet/udp/udp.h
index dc3f8f5bfa1..8e4e87f85a8 100644
--- a/src/vnet/udp/udp.h
+++ b/src/vnet/udp/udp.h
@@ -25,9 +25,11 @@
#include <vnet/ip/ip.h>
#include <vnet/session/transport.h>
+#define UDP_NO_NODE_SET ((u16) ~0)
+
typedef enum
{
-#define udp_error(n,s) UDP_ERROR_##n,
+#define udp_error(f, n, s, d) UDP_ERROR_##f,
#include <vnet/udp/udp_error.def>
#undef udp_error
UDP_N_ERROR,
@@ -55,6 +57,24 @@ typedef enum udp_conn_flags_
#undef _
} udp_conn_flags_t;
+#define foreach_udp_cfg_flag _ (NO_CSUM_OFFLOAD, "no-csum-offload")
+
+typedef enum udp_cfg_flag_bits_
+{
+#define _(sym, str) UDP_CFG_F_##sym##_BIT,
+ foreach_udp_cfg_flag
+#undef _
+ UDP_CFG_N_FLAG_BITS
+} udp_cfg_flag_bits_e;
+
+typedef enum udp_cfg_flag_
+{
+#define _(sym, str) UDP_CFG_F_##sym = 1 << UDP_CFG_F_##sym##_BIT,
+ foreach_udp_cfg_flag
+#undef _
+ UDP_CFG_N_FLAGS
+} __clib_packed udp_cfg_flags_t;
+
typedef struct
{
/** Required for pool_get_aligned */
@@ -62,9 +82,15 @@ typedef struct
transport_connection_t connection; /**< must be first */
clib_spinlock_t rx_lock; /**< rx fifo lock */
u8 flags; /**< connection flags */
+ udp_cfg_flags_t cfg_flags; /**< configuration flags */
u16 mss; /**< connection mss */
+ u32 sw_if_index; /**< connection sw_if_index */
+ u32 next_node_index; /**< Can be used to control next node in output */
+ u32 next_node_opaque; /**< Opaque to pass to next node */
} udp_connection_t;
+#define udp_csum_offload(uc) (!((uc)->cfg_flags & UDP_CFG_F_NO_CSUM_OFFLOAD))
+
typedef struct
{
/* Name (a c string). */
@@ -79,9 +105,6 @@ typedef struct
/* Next index for this type. */
u32 next_index;
- /* UDP sessions refcount (not tunnels) */
- u32 n_connections;
-
/* Parser for packet generator edits for this protocol */
unformat_function_t *unformat_pg_edit;
} udp_dst_port_info_t;
@@ -93,6 +116,12 @@ typedef enum
N_UDP_AF,
} udp_af_t;
+typedef struct udp_worker_
+{
+ udp_connection_t *connections;
+ u32 *pending_cleanups;
+} udp_worker_t;
+
typedef struct
{
udp_dst_port_info_t *dst_port_infos[N_UDP_AF];
@@ -112,16 +141,21 @@ typedef struct
u32 local_to_input_edge[N_UDP_AF];
/*
- * Per-worker thread udp connection pools used with session layer
+ * UDP transport layer per-thread context
*/
- udp_connection_t **connections;
- u32 *connection_peekers;
- clib_spinlock_t *peekers_readers_locks;
- clib_spinlock_t *peekers_write_locks;
+
+ udp_worker_t *wrk;
udp_connection_t *listener_pool;
+ /* Refcounts for ports consumed by udp transports to handle
+ * both passive and active opens using the same port */
+ u16 *transport_ports_refcnt[N_UDP_AF];
+
u16 default_mtu;
u16 msg_id_base;
+ u8 csum_offload;
+
+ u8 icmp_send_unreachable_disabled;
} udp_main_t;
extern udp_main_t udp_main;
@@ -129,16 +163,26 @@ extern vlib_node_registration_t udp4_input_node;
extern vlib_node_registration_t udp6_input_node;
extern vlib_node_registration_t udp4_local_node;
extern vlib_node_registration_t udp6_local_node;
+extern vlib_node_registration_t udp4_output_node;
+extern vlib_node_registration_t udp6_output_node;
void udp_add_dst_port (udp_main_t * um, udp_dst_port_t dst_port,
char *dst_port_name, u8 is_ip4);
+always_inline udp_worker_t *
+udp_worker_get (u32 thread_index)
+{
+ return vec_elt_at_index (udp_main.wrk, thread_index);
+}
+
always_inline udp_connection_t *
udp_connection_get (u32 conn_index, u32 thread_index)
{
- if (pool_is_free_index (udp_main.connections[thread_index], conn_index))
+ udp_worker_t *wrk = udp_worker_get (thread_index);
+
+ if (pool_is_free_index (wrk->connections, conn_index))
return 0;
- return pool_elt_at_index (udp_main.connections[thread_index], conn_index);
+ return pool_elt_at_index (wrk->connections, conn_index);
}
always_inline udp_connection_t *
@@ -159,65 +203,24 @@ udp_connection_from_transport (transport_connection_t * tc)
return ((udp_connection_t *) tc);
}
-always_inline u32
-udp_connection_index (udp_connection_t * uc)
-{
- return (uc - udp_main.connections[uc->c_thread_index]);
-}
-
void udp_connection_free (udp_connection_t * uc);
udp_connection_t *udp_connection_alloc (u32 thread_index);
-
-/**
- * Acquires a lock that blocks a connection pool from expanding.
- */
-always_inline void
-udp_pool_add_peeker (u32 thread_index)
-{
- if (thread_index != vlib_get_thread_index ())
- return;
- clib_spinlock_lock_if_init (&udp_main.peekers_readers_locks[thread_index]);
- udp_main.connection_peekers[thread_index] += 1;
- if (udp_main.connection_peekers[thread_index] == 1)
- clib_spinlock_lock_if_init (&udp_main.peekers_write_locks[thread_index]);
- clib_spinlock_unlock_if_init (&udp_main.peekers_readers_locks
- [thread_index]);
-}
-
-always_inline void
-udp_pool_remove_peeker (u32 thread_index)
-{
- if (thread_index != vlib_get_thread_index ())
- return;
- ASSERT (udp_main.connection_peekers[thread_index] > 0);
- clib_spinlock_lock_if_init (&udp_main.peekers_readers_locks[thread_index]);
- udp_main.connection_peekers[thread_index] -= 1;
- if (udp_main.connection_peekers[thread_index] == 0)
- clib_spinlock_unlock_if_init (&udp_main.peekers_write_locks
- [thread_index]);
- clib_spinlock_unlock_if_init (&udp_main.peekers_readers_locks
- [thread_index]);
-}
+void udp_connection_share_port (u16 lcl_port, u8 is_ip4);
always_inline udp_connection_t *
udp_connection_clone_safe (u32 connection_index, u32 thread_index)
{
+ u32 current_thread_index = vlib_get_thread_index (), new_index;
udp_connection_t *old_c, *new_c;
- u32 current_thread_index = vlib_get_thread_index ();
- new_c = udp_connection_alloc (current_thread_index);
- /* If during the memcpy pool is reallocated AND the memory allocator
- * decides to give the old chunk of memory to somebody in a hurry to
- * scribble something on it, we have a problem. So add this thread as
- * a session pool peeker.
- */
- udp_pool_add_peeker (thread_index);
- old_c = udp_main.connections[thread_index] + connection_index;
+ new_c = udp_connection_alloc (current_thread_index);
+ new_index = new_c->c_c_index;
+ /* Connection pool always realloced with barrier */
+ old_c = udp_main.wrk[thread_index].connections + connection_index;
clib_memcpy_fast (new_c, old_c, sizeof (*new_c));
old_c->flags |= UDP_CONN_F_MIGRATED;
- udp_pool_remove_peeker (thread_index);
new_c->c_thread_index = current_thread_index;
- new_c->c_c_index = udp_connection_index (new_c);
+ new_c->c_c_index = new_index;
new_c->c_fib_index = old_c->c_fib_index;
/* Assume cloned sessions don't need lock */
new_c->rx_lock = 0;
@@ -237,8 +240,6 @@ format_function_t format_udp_connection;
unformat_function_t unformat_udp_header;
unformat_function_t unformat_udp_port;
-void udp_connection_share_port (u16 lcl_port, u8 is_ip4);
-
void udp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
/*
diff --git a/src/vnet/udp/udp_api.c b/src/vnet/udp/udp_api.c
index 0f2d014946f..1f952aa36ea 100644
--- a/src/vnet/udp/udp_api.c
+++ b/src/vnet/udp/udp_api.c
@@ -86,12 +86,10 @@ vl_api_udp_encap_dump_t_handler (vl_api_udp_encap_dump_t *mp)
if (!reg)
return;
- /* *INDENT-OFF* */
pool_foreach (ue, udp_encap_pool)
{
send_udp_encap_details(ue, reg, mp->context);
}
- /* *INDENT-ON* */
}
static void
@@ -99,6 +97,7 @@ vl_api_udp_encap_add_t_handler (vl_api_udp_encap_add_t *mp)
{
vl_api_udp_encap_add_reply_t *rmp;
ip46_address_t src_ip, dst_ip;
+ udp_encap_fixup_flags_t flags;
u32 fib_index, table_id;
fib_protocol_t fproto;
ip46_type_t itype;
@@ -119,19 +118,19 @@ vl_api_udp_encap_add_t_handler (vl_api_udp_encap_add_t *mp)
goto done;
}
- uei = udp_encap_add_and_lock (fproto, fib_index,
- &src_ip, &dst_ip,
+ flags = UDP_ENCAP_FIXUP_NONE;
+ if (mp->udp_encap.src_port == 0)
+ flags |= UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY;
+
+ uei = udp_encap_add_and_lock (fproto, fib_index, &src_ip, &dst_ip,
ntohs (mp->udp_encap.src_port),
- ntohs (mp->udp_encap.dst_port),
- UDP_ENCAP_FIXUP_NONE);
+ ntohs (mp->udp_encap.dst_port), flags);
done:
- /* *INDENT-OFF* */
REPLY_MACRO2 (VL_API_UDP_ENCAP_ADD_REPLY,
({
rmp->id = ntohl (uei);
}));
- /* *INDENT-ON* */
}
@@ -189,11 +188,19 @@ vl_api_udp_decap_add_del_t_handler (vl_api_udp_decap_add_del_t *mp)
static clib_error_t *
udp_api_hookup (vlib_main_t * vm)
{
+ api_main_t *am = vlibapi_get_main ();
+
/*
* Set up the (msg_name, crc, message-id) table
*/
REPLY_MSG_ID_BASE = setup_message_id_table ();
+ /* Mark these APIs as mp safe */
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_UDP_ENCAP_ADD, 1);
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_UDP_ENCAP_DEL, 1);
+ vl_api_set_msg_thread_safe (am, REPLY_MSG_ID_BASE + VL_API_UDP_ENCAP_DUMP,
+ 1);
+
return 0;
}
diff --git a/src/vnet/udp/udp_cli.c b/src/vnet/udp/udp_cli.c
index 09e3a8a3d7f..6c8992cd0de 100644
--- a/src/vnet/udp/udp_cli.c
+++ b/src/vnet/udp/udp_cli.c
@@ -13,6 +13,9 @@
* limitations under the License.
*/
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/format_table.h>
#include <vnet/udp/udp.h>
#include <vnet/session/session_types.h>
@@ -35,6 +38,33 @@ format_udp_connection_id (u8 * s, va_list * args)
return s;
}
+static const char *udp_cfg_flags_str[] = {
+#define _(sym, str) str,
+ foreach_udp_cfg_flag
+#undef _
+};
+
+static u8 *
+format_udp_cfg_flags (u8 *s, va_list *args)
+{
+ udp_connection_t *tc = va_arg (*args, udp_connection_t *);
+ int i, last = -1;
+
+ for (i = 0; i < UDP_CFG_N_FLAG_BITS; i++)
+ if (tc->cfg_flags & (1 << i))
+ last = i;
+ if (last >= 0)
+ s = format (s, " cfg: ");
+ for (i = 0; i < last; i++)
+ {
+ if (tc->cfg_flags & (1 << i))
+ s = format (s, "%s, ", udp_cfg_flags_str[i]);
+ }
+ if (last >= 0)
+ s = format (s, "%s", udp_cfg_flags_str[last]);
+ return s;
+}
+
static const char *udp_connection_flags_str[] = {
#define _(sym, str) str,
foreach_udp_connection_flag
@@ -64,11 +94,15 @@ static u8 *
format_udp_vars (u8 * s, va_list * args)
{
udp_connection_t *uc = va_arg (*args, udp_connection_t *);
- s = format (s, " index %u flags: %U", uc->c_c_index,
- format_udp_connection_flags, uc);
+ s = format (s, " index %u%U flags: %U\n", uc->c_c_index,
+ format_udp_cfg_flags, uc, format_udp_connection_flags, uc);
+ s = format (s, " fib_index: %u next_node: %u opaque: %u ", uc->c_fib_index);
if (!(uc->flags & UDP_CONN_F_LISTEN))
+ s = format (s, " sw_if_index: %d mss: %u\n", uc->sw_if_index, uc->mss);
+ else
s = format (s, "\n");
+
return s;
}
@@ -100,6 +134,10 @@ udp_config_fn (vlib_main_t * vm, unformat_input_t * input)
{
if (unformat (input, "mtu %u", &tmp))
um->default_mtu = tmp;
+ else if (unformat (input, "icmp-unreachable-disabled"))
+ um->icmp_send_unreachable_disabled = 1;
+ else if (unformat (input, "no-csum-offload"))
+ um->csum_offload = 0;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
@@ -149,7 +187,7 @@ show_udp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
u8 *s = NULL;
vec_foreach (port_info, um->dst_port_infos[UDP_IP6])
{
- if (udp_is_valid_dst_port (port_info->dst_port, 01))
+ if (udp_is_valid_dst_port (port_info->dst_port, 0))
{
s = format (s, (!s) ? "%d" : ", %d", port_info->dst_port);
}
@@ -160,14 +198,199 @@ show_udp_punt_fn (vlib_main_t * vm, unformat_input_t * input,
return (error);
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_tcp_punt_command, static) =
{
.path = "show udp punt",
.short_help = "show udp punt [ipv4|ipv6]",
.function = show_udp_punt_fn,
};
-/* *INDENT-ON* */
+
+static void
+table_format_udp_port_ (vlib_main_t *vm, udp_main_t *um, table_t *t, int *c,
+ int port, int bind, int is_ip4)
+{
+ const udp_dst_port_info_t *pi;
+
+ if (bind && !udp_is_valid_dst_port (port, is_ip4))
+ return;
+
+ pi = udp_get_dst_port_info (um, port, is_ip4);
+ if (!pi)
+ return;
+
+ table_format_cell (t, *c, 0, "%d", pi->dst_port);
+ table_format_cell (t, *c, 1, is_ip4 ? "ip4" : "ip6");
+ table_format_cell (t, *c, 2, ~0 == pi->node_index ? "none" : "%U",
+ format_vlib_node_name, vm, pi->node_index);
+ table_format_cell (t, *c, 3, "%s", pi->name);
+
+ (*c)++;
+}
+
+static void
+table_format_udp_port (vlib_main_t *vm, udp_main_t *um, table_t *t, int *c,
+ int port, int bind, int ip4, int ip6)
+{
+ if (ip4)
+ table_format_udp_port_ (vm, um, t, c, port, bind, 1 /* is_ip4 */);
+ if (ip6)
+ table_format_udp_port_ (vm, um, t, c, port, bind, 0 /* is_ip4 */);
+}
+
+static clib_error_t *
+show_udp_ports (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ table_t table = {}, *t = &table;
+ udp_main_t *um = &udp_main;
+ clib_error_t *err = 0;
+ int ip4 = 1, ip6 = 1;
+ int port = -1;
+ int bind = 1;
+ int c = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "ip4"))
+ ip6 = 0;
+ else if (unformat (input, "ip6"))
+ ip4 = 0;
+ else if (unformat (input, "bind"))
+ bind = 1;
+ else if (unformat (input, "all"))
+ bind = 0;
+ else if (unformat (input, "%d", &port))
+ ;
+ else
+ {
+ err = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto out;
+ }
+ }
+
+ table_add_header_col (t, 4, "port", "proto", "node", "desc");
+
+ if (port > 65535)
+ {
+ err = clib_error_return (0, "wrong port %d", port);
+ goto out;
+ }
+ else if (port < 0)
+ {
+ for (port = 0; port < 65536; port++)
+ table_format_udp_port (vm, um, t, &c, port, bind, ip4, ip6);
+ }
+ else
+ {
+ table_format_udp_port (vm, um, t, &c, port, bind, ip4, ip6);
+ }
+
+ vlib_cli_output (vm, "%U", format_table, t);
+
+out:
+ table_free (t);
+ return err;
+}
+
+VLIB_CLI_COMMAND (show_udp_ports_cmd, static) = {
+ .path = "show udp ports",
+ .function = show_udp_ports,
+ .short_help = "show udp ports [ip4|ip6] [bind|all|<port>]",
+ .is_mp_safe = 1,
+};
+
+static void
+table_format_udp_transport_port_ (vlib_main_t *vm, table_t *t, int *c,
+ int port, int is_ip4)
+{
+ udp_main_t *um = &udp_main;
+ u32 refcnt;
+ u16 port_ne;
+
+ port_ne = clib_host_to_net_u16 (port);
+ refcnt = um->transport_ports_refcnt[is_ip4][port_ne];
+ if (!refcnt)
+ return;
+
+ if (!udp_is_valid_dst_port (port, is_ip4))
+ {
+ clib_warning ("Port %u is not registered refcnt %u!", port, refcnt);
+ return;
+ }
+
+ table_format_cell (t, *c, 0, "%d", port);
+ table_format_cell (t, *c, 1, is_ip4 ? "ip4" : "ip6");
+ table_format_cell (t, *c, 2, "%d", refcnt);
+
+ (*c)++;
+}
+
+static void
+table_format_udp_transport_port (vlib_main_t *vm, table_t *t, int *c, int port,
+ int ipv)
+{
+ if (ipv == -1 || ipv == 0)
+ table_format_udp_transport_port_ (vm, t, c, port, 1 /* is_ip4 */);
+ if (ipv == -1 || ipv == 1)
+ table_format_udp_transport_port_ (vm, t, c, port, 0 /* is_ip4 */);
+}
+
+static clib_error_t *
+show_udp_transport_ports (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ table_t table = {}, *t = &table;
+ int ipv = -1, port = -1, c = 0;
+ clib_error_t *err = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "ip4"))
+ ipv = 0;
+ else if (unformat (input, "ip6"))
+ ipv = 1;
+ else if (unformat (input, "%d", &port))
+ ;
+ else
+ {
+ err = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ goto out;
+ }
+ }
+
+ table_add_header_col (t, 3, "port", "proto", "ref-cnt");
+
+ if (port > 65535)
+ {
+ err = clib_error_return (0, "wrong port %d", port);
+ goto out;
+ }
+
+ if (port < 0)
+ {
+ for (port = 0; port < 65536; port++)
+ table_format_udp_transport_port (vm, t, &c, port, ipv);
+ }
+ else
+ {
+ table_format_udp_transport_port (vm, t, &c, port, ipv);
+ }
+
+ vlib_cli_output (vm, "%U\n", format_table, t);
+
+out:
+ table_free (t);
+ return err;
+}
+
+VLIB_CLI_COMMAND (show_udp_transport_ports_cmd, static) = {
+ .path = "show udp transport ports",
+ .function = show_udp_transport_ports,
+ .short_help = "show udp transport ports [ip4|ip6] [<port>]",
+ .is_mp_safe = 1,
+};
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/udp/udp_encap.c b/src/vnet/udp/udp_encap.c
index cb93adb8d39..e4e5271da63 100644
--- a/src/vnet/udp/udp_encap.c
+++ b/src/vnet/udp/udp_encap.c
@@ -47,8 +47,7 @@ static void
udp_encap_restack (udp_encap_t * ue)
{
dpo_stack (udp_encap_dpo_types[ue->ue_ip_proto],
- fib_proto_to_dpo (ue->ue_ip_proto),
- &ue->ue_dpo,
+ fib_proto_to_dpo (ue->ue_ip_proto), &ue->ue_dpo,
fib_entry_contribute_ip_forwarding (ue->ue_fib_entry_index));
}
@@ -196,6 +195,20 @@ udp_encap_dpo_unlock (dpo_id_t * dpo)
fib_node_unlock (&ue->ue_fib_node);
}
+u8 *
+format_udp_encap_fixup_flags (u8 *s, va_list *args)
+{
+ udp_encap_fixup_flags_t flags = va_arg (*args, udp_encap_fixup_flags_t);
+
+ if (flags == UDP_ENCAP_FIXUP_NONE)
+ return format (s, "none");
+
+ if (flags & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY)
+ s = format (s, "%s", "src-port-is-entropy");
+
+ return (s);
+}
+
static u8 *
format_udp_encap_i (u8 * s, va_list * args)
{
@@ -211,23 +224,21 @@ format_udp_encap_i (u8 * s, va_list * args)
s = format (s, "udp-encap:[%d]: ip-fib-index:%d ", uei, ue->ue_fib_index);
if (FIB_PROTOCOL_IP4 == ue->ue_ip_proto)
{
- s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d, dst:%d]",
- format_ip4_address,
- &ue->ue_hdrs.ip4.ue_ip4.src_address,
- format_ip4_address,
- &ue->ue_hdrs.ip4.ue_ip4.dst_address,
+ s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d, dst:%d] flags:%U",
+ format_ip4_address, &ue->ue_hdrs.ip4.ue_ip4.src_address,
+ format_ip4_address, &ue->ue_hdrs.ip4.ue_ip4.dst_address,
clib_net_to_host_u16 (ue->ue_hdrs.ip4.ue_udp.src_port),
- clib_net_to_host_u16 (ue->ue_hdrs.ip4.ue_udp.dst_port));
+ clib_net_to_host_u16 (ue->ue_hdrs.ip4.ue_udp.dst_port),
+ format_udp_encap_fixup_flags, ue->ue_flags);
}
else
{
- s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d dst:%d]",
- format_ip6_address,
- &ue->ue_hdrs.ip6.ue_ip6.src_address,
- format_ip6_address,
- &ue->ue_hdrs.ip6.ue_ip6.dst_address,
+ s = format (s, "ip:[src:%U, dst:%U] udp:[src:%d dst:%d] flags:%U",
+ format_ip6_address, &ue->ue_hdrs.ip6.ue_ip6.src_address,
+ format_ip6_address, &ue->ue_hdrs.ip6.ue_ip6.dst_address,
clib_net_to_host_u16 (ue->ue_hdrs.ip6.ue_udp.src_port),
- clib_net_to_host_u16 (ue->ue_hdrs.ip6.ue_udp.dst_port));
+ clib_net_to_host_u16 (ue->ue_hdrs.ip6.ue_udp.dst_port),
+ format_udp_encap_fixup_flags, ue->ue_flags);
}
vlib_get_combined_counter (&(udp_encap_counters), uei, &to);
s = format (s, " to:[%Ld:%Ld]]", to.packets, to.bytes);
@@ -325,12 +336,12 @@ udp_encap_fib_last_lock_gone (fib_node_t * node)
}
const static char *const udp4_encap_ip4_nodes[] = {
- "udp4-encap",
+ "udp4o4-encap",
NULL,
};
const static char *const udp4_encap_ip6_nodes[] = {
- "udp4-encap",
+ "udp6o4-encap",
NULL,
};
@@ -345,12 +356,12 @@ const static char *const udp4_encap_bier_nodes[] = {
};
const static char *const udp6_encap_ip4_nodes[] = {
- "udp6-encap",
+ "udp4o6-encap",
NULL,
};
const static char *const udp6_encap_ip6_nodes[] = {
- "udp6-encap",
+ "udp6o6-encap",
NULL,
};
@@ -507,13 +518,11 @@ udp_encap_walk (udp_encap_walk_cb_t cb, void *ctx)
{
index_t uei;
- /* *INDENT-OFF* */
pool_foreach_index (uei, udp_encap_pool)
{
if (WALK_STOP == cb(uei, ctx))
break;
}
- /* *INDENT-ON* */
}
clib_error_t *
@@ -536,12 +545,10 @@ udp_encap_show (vlib_main_t * vm,
if (INDEX_INVALID == uei)
{
- /* *INDENT-OFF* */
pool_foreach_index (uei, udp_encap_pool)
{
vlib_cli_output(vm, "%U", format_udp_encap, uei, 0);
}
- /* *INDENT-ON* */
}
else
{
@@ -551,20 +558,20 @@ udp_encap_show (vlib_main_t * vm,
return NULL;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (udp_encap_add_command, static) = {
.path = "udp encap",
- .short_help = "udp encap [add|del] <id ID> <src-ip> <dst-ip> [<src-port>] <dst-port> [src-port-is-entropy] [table-id <table>]",
+ .short_help = "udp encap [add|del] <id ID> <src-ip> <dst-ip> [<src-port>] "
+ "<dst-port> [src-port-is-entropy] [table-id <table>]",
.function = udp_encap_cli,
.is_mp_safe = 1,
};
+
VLIB_CLI_COMMAND (udp_encap_show_command, static) = {
.path = "show udp encap",
.short_help = "show udp encap [ID]",
.function = udp_encap_show,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/udp/udp_encap.h b/src/vnet/udp/udp_encap.h
index b096e0f5c09..c8b42ffa92c 100644
--- a/src/vnet/udp/udp_encap.h
+++ b/src/vnet/udp/udp_encap.h
@@ -85,7 +85,7 @@ typedef struct udp_encap_t_
/**
* The second cacheline contains control-plane data
*/
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
/**
* linkage into the FIB graph
@@ -115,6 +115,7 @@ extern index_t udp_encap_add_and_lock (fib_protocol_t proto,
extern void udp_encap_lock (index_t uei);
extern void udp_encap_unlock (index_t uei);
extern u8 *format_udp_encap (u8 * s, va_list * args);
+extern u8 *format_udp_encap_fixup_flags (u8 *s, va_list *args);
extern void udp_encap_contribute_forwarding (index_t uei,
dpo_proto_t proto,
dpo_id_t * dpo);
diff --git a/src/vnet/udp/udp_encap_node.c b/src/vnet/udp/udp_encap_node.c
index 5b9fc0bf34b..a86614f5475 100644
--- a/src/vnet/udp/udp_encap_node.c
+++ b/src/vnet/udp/udp_encap_node.c
@@ -20,12 +20,16 @@ typedef struct udp4_encap_trace_t_
{
udp_header_t udp;
ip4_header_t ip;
+ u32 flow_hash;
+ udp_encap_fixup_flags_t flags;
} udp4_encap_trace_t;
typedef struct udp6_encap_trace_t_
{
udp_header_t udp;
ip6_header_t ip;
+ u32 flow_hash;
+ udp_encap_fixup_flags_t flags;
} udp6_encap_trace_t;
extern vlib_combined_counter_main_t udp_encap_counters;
@@ -35,13 +39,16 @@ format_udp4_encap_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ u32 indent = format_get_indent (s);
udp4_encap_trace_t *t;
t = va_arg (*args, udp4_encap_trace_t *);
- s = format (s, "%U\n %U",
- format_ip4_header, &t->ip, sizeof (t->ip),
- format_udp_header, &t->udp, sizeof (t->udp));
+ s = format (s, "flags: %U, flow hash: 0x%08x\n%U%U\n%U%U",
+ format_udp_encap_fixup_flags, t->flags, t->flow_hash,
+ format_white_space, indent, format_ip4_header, &t->ip,
+ sizeof (t->ip), format_white_space, indent, format_udp_header,
+ &t->udp, sizeof (t->udp));
return (s);
}
@@ -50,20 +57,23 @@ format_udp6_encap_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ u32 indent = format_get_indent (s);
udp6_encap_trace_t *t;
t = va_arg (*args, udp6_encap_trace_t *);
- s = format (s, "%U\n %U",
- format_ip6_header, &t->ip, sizeof (t->ip),
- format_udp_header, &t->udp, sizeof (t->udp));
+ s = format (s, "flags: %U, flow hash: 0x%08x\n%U%U\n%U%U",
+ format_udp_encap_fixup_flags, t->flags, t->flow_hash,
+ format_white_space, indent, format_ip6_header, &t->ip,
+ sizeof (t->ip), format_white_space, indent, format_udp_header,
+ &t->udp, sizeof (t->udp));
return (s);
}
always_inline uword
-udp_encap_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, int is_encap_v6)
+udp_encap_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, ip_address_family_t encap_family,
+ ip_address_family_t payload_family)
{
vlib_combined_counter_main_t *cm = &udp_encap_counters;
u32 *from = vlib_frame_vector_args (frame);
@@ -121,18 +131,22 @@ udp_encap_inline (vlib_main_t * vm,
ue1 = udp_encap_get (uei1);
/* Paint */
- if (is_encap_v6)
+ if (encap_family == AF_IP6)
{
const u8 n_bytes =
sizeof (udp_header_t) + sizeof (ip6_header_t);
- ip_udp_encap_two (vm, b0, b1, (u8 *) & ue0->ue_hdrs,
- (u8 *) & ue1->ue_hdrs, n_bytes, 0);
+ ip_udp_encap_two (vm, b0, b1, (u8 *) &ue0->ue_hdrs,
+ (u8 *) &ue1->ue_hdrs, n_bytes, encap_family,
+ payload_family, ue0->ue_flags, ue1->ue_flags);
+
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
udp6_encap_trace_t *tr =
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->udp = ue0->ue_hdrs.ip6.ue_udp;
tr->ip = ue0->ue_hdrs.ip6.ue_ip6;
+ tr->flags = ue0->ue_flags;
+ tr->flow_hash = vnet_buffer (b0)->ip.flow_hash;
}
if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -140,6 +154,8 @@ udp_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b1, sizeof (*tr));
tr->udp = ue1->ue_hdrs.ip6.ue_udp;
tr->ip = ue1->ue_hdrs.ip6.ue_ip6;
+ tr->flags = ue1->ue_flags;
+ tr->flow_hash = vnet_buffer (b1)->ip.flow_hash;
}
}
else
@@ -147,9 +163,9 @@ udp_encap_inline (vlib_main_t * vm,
const u8 n_bytes =
sizeof (udp_header_t) + sizeof (ip4_header_t);
- ip_udp_encap_two (vm, b0, b1,
- (u8 *) & ue0->ue_hdrs,
- (u8 *) & ue1->ue_hdrs, n_bytes, 1);
+ ip_udp_encap_two (vm, b0, b1, (u8 *) &ue0->ue_hdrs,
+ (u8 *) &ue1->ue_hdrs, n_bytes, encap_family,
+ payload_family, ue0->ue_flags, ue1->ue_flags);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -157,6 +173,8 @@ udp_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->udp = ue0->ue_hdrs.ip4.ue_udp;
tr->ip = ue0->ue_hdrs.ip4.ue_ip4;
+ tr->flags = ue0->ue_flags;
+ tr->flow_hash = vnet_buffer (b0)->ip.flow_hash;
}
if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -164,6 +182,8 @@ udp_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b1, sizeof (*tr));
tr->udp = ue1->ue_hdrs.ip4.ue_udp;
tr->ip = ue1->ue_hdrs.ip4.ue_ip4;
+ tr->flags = ue1->ue_flags;
+ tr->flow_hash = vnet_buffer (b1)->ip.flow_hash;
}
}
@@ -202,12 +222,12 @@ udp_encap_inline (vlib_main_t * vm,
b0));
/* Paint */
- if (is_encap_v6)
+ if (encap_family == AF_IP6)
{
const u8 n_bytes =
sizeof (udp_header_t) + sizeof (ip6_header_t);
- ip_udp_encap_one (vm, b0, (u8 *) & ue0->ue_hdrs.ip6, n_bytes,
- 0);
+ ip_udp_encap_one (vm, b0, (u8 *) &ue0->ue_hdrs.ip6, n_bytes,
+ encap_family, payload_family, ue0->ue_flags);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -215,6 +235,8 @@ udp_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->udp = ue0->ue_hdrs.ip6.ue_udp;
tr->ip = ue0->ue_hdrs.ip6.ue_ip6;
+ tr->flags = ue0->ue_flags;
+ tr->flow_hash = vnet_buffer (b0)->ip.flow_hash;
}
}
else
@@ -222,8 +244,8 @@ udp_encap_inline (vlib_main_t * vm,
const u8 n_bytes =
sizeof (udp_header_t) + sizeof (ip4_header_t);
- ip_udp_encap_one (vm, b0, (u8 *) & ue0->ue_hdrs.ip4, n_bytes,
- 1);
+ ip_udp_encap_one (vm, b0, (u8 *) &ue0->ue_hdrs.ip4, n_bytes,
+ encap_family, payload_family, ue0->ue_flags);
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
{
@@ -231,6 +253,8 @@ udp_encap_inline (vlib_main_t * vm,
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->udp = ue0->ue_hdrs.ip4.ue_udp;
tr->ip = ue0->ue_hdrs.ip4.ue_ip4;
+ tr->flags = ue0->ue_flags;
+ tr->flow_hash = vnet_buffer (b0)->ip.flow_hash;
}
}
@@ -248,39 +272,87 @@ udp_encap_inline (vlib_main_t * vm,
return frame->n_vectors;
}
-VLIB_NODE_FN (udp4_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (udp4o4_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return udp_encap_inline (vm, node, frame, AF_IP4, AF_IP4);
+}
+
+VLIB_NODE_FN (udp6o4_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return udp_encap_inline (vm, node, frame, AF_IP4, AF_IP6);
+}
+
+VLIB_NODE_FN (udp4_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return udp_encap_inline (vm, node, frame, AF_IP4, N_AF);
+}
+
+VLIB_NODE_FN (udp6o6_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return udp_encap_inline (vm, node, frame, 0);
+ return udp_encap_inline (vm, node, frame, AF_IP6, AF_IP6);
}
-VLIB_NODE_FN (udp6_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
+VLIB_NODE_FN (udp4o6_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
{
- return udp_encap_inline (vm, node, frame, 1);
+ return udp_encap_inline (vm, node, frame, AF_IP6, AF_IP4);
}
-/* *INDENT-OFF* */
+VLIB_NODE_FN (udp6_encap_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+ return udp_encap_inline (vm, node, frame, AF_IP6, N_AF);
+}
+
+VLIB_REGISTER_NODE (udp4o4_encap_node) = {
+ .name = "udp4o4-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_udp4_encap_trace,
+ .n_next_nodes = 0,
+};
+
+VLIB_REGISTER_NODE (udp6o4_encap_node) = {
+ .name = "udp6o4-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_udp4_encap_trace,
+ .n_next_nodes = 0,
+ .sibling_of = "udp4o4-encap",
+};
+
VLIB_REGISTER_NODE (udp4_encap_node) = {
.name = "udp4-encap",
.vector_size = sizeof (u32),
-
.format_trace = format_udp4_encap_trace,
+ .n_next_nodes = 0,
+ .sibling_of = "udp4o4-encap",
+};
+VLIB_REGISTER_NODE (udp6o6_encap_node) = {
+ .name = "udp6o6-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_udp6_encap_trace,
+ .n_next_nodes = 0,
+};
+
+VLIB_REGISTER_NODE (udp4o6_encap_node) = {
+ .name = "udp4o6-encap",
+ .vector_size = sizeof (u32),
+ .format_trace = format_udp6_encap_trace,
.n_next_nodes = 0,
+ .sibling_of = "udp6o6-encap",
};
VLIB_REGISTER_NODE (udp6_encap_node) = {
.name = "udp6-encap",
.vector_size = sizeof (u32),
-
.format_trace = format_udp6_encap_trace,
-
.n_next_nodes = 0,
+ .sibling_of = "udp6o6-encap",
};
-/* *INDENT-ON* */
/*
diff --git a/src/vnet/udp/udp_error.def b/src/vnet/udp/udp_error.def
index 776d94a8ec1..ef19970ce72 100644
--- a/src/vnet/udp/udp_error.def
+++ b/src/vnet/udp/udp_error.def
@@ -15,13 +15,16 @@
* limitations under the License.
*/
-udp_error (NONE, "No error")
-udp_error (NO_LISTENER, "No listener for dst port")
-udp_error (LENGTH_ERROR, "Packets with length errors")
-udp_error (PUNT, "No listener punt")
-udp_error (ENQUEUED, "Packets enqueued")
-udp_error (FIFO_FULL, "Fifo full")
-udp_error (NOT_READY, "Connection not ready")
-udp_error (ACCEPT, "Accepted session")
-udp_error (CREATE_SESSION, "Failed to create session")
-udp_error (MQ_FULL, "Application msg queue full")
+udp_error (NONE, none, INFO, "No error")
+udp_error (NO_LISTENER, no_listener, ERROR, "No listener for dst port")
+udp_error (LENGTH_ERROR, length_error, ERROR, "Packets with length errors")
+udp_error (PUNT, punt, ERROR, "No listener punt")
+udp_error (ENQUEUED, enqueued, INFO, "Packets enqueued")
+udp_error (FIFO_FULL, fifo_full, ERROR, "Fifo full")
+udp_error (FIFO_NOMEM, fifo_nomem, ERROR, "Fifo no mem")
+udp_error (NOT_READY, not_ready, ERROR, "Connection not ready")
+udp_error (ACCEPT, accept, INFO, "Accepted session")
+udp_error (CREATE_SESSION, create_session, ERROR, "Failed to create session")
+udp_error (MQ_FULL, mq_full, ERROR, "Application msg queue full")
+udp_error (INVALID_CONNECTION, invalid_connection, ERROR, "Invalid connection")
+udp_error (PKTS_SENT, pkts_sent, INFO, "Packets sent")
diff --git a/src/vnet/udp/udp_inlines.h b/src/vnet/udp/udp_inlines.h
index e4eb0c88e83..f0dd44f48b5 100644
--- a/src/vnet/udp/udp_inlines.h
+++ b/src/vnet/udp/udp_inlines.h
@@ -21,6 +21,9 @@
#include <vnet/ip/ip6.h>
#include <vnet/udp/udp_packet.h>
#include <vnet/interface_output.h>
+#include <vnet/ip/ip4_inlines.h>
+#include <vnet/ip/ip6_inlines.h>
+#include <vnet/udp/udp_encap.h>
always_inline void *
vlib_buffer_push_udp (vlib_buffer_t * b, u16 sp, u16 dp, u8 offload_csum)
@@ -42,8 +45,39 @@ vlib_buffer_push_udp (vlib_buffer_t * b, u16 sp, u16 dp, u8 offload_csum)
return uh;
}
+/*
+ * Encode udp source port entropy value per
+ * https://datatracker.ietf.org/doc/html/rfc7510#section-3
+ */
+always_inline u16
+ip_udp_sport_entropy (vlib_buffer_t *b0)
+{
+ u16 port = clib_host_to_net_u16 (0x03 << 14);
+ port |= vnet_buffer (b0)->ip.flow_hash & 0xffff;
+ return port;
+}
+
+always_inline u32
+ip_udp_compute_flow_hash (vlib_buffer_t *b0, u8 is_ip4)
+{
+ ip4_header_t *ip4;
+ ip6_header_t *ip6;
+
+ if (is_ip4)
+ {
+ ip4 = (ip4_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
+ return ip4_compute_flow_hash (ip4, IP_FLOW_HASH_DEFAULT);
+ }
+ else
+ {
+ ip6 = (ip6_header_t *) (b0->data + vnet_buffer (b0)->l3_hdr_offset);
+ return ip6_compute_flow_hash (ip6, IP_FLOW_HASH_DEFAULT);
+ }
+}
+
always_inline void
-ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
+ip_udp_fixup_one (vlib_main_t *vm, vlib_buffer_t *b0, u8 is_ip4,
+ u8 sport_entropy)
{
u16 new_l0;
udp_header_t *udp0;
@@ -71,6 +105,9 @@ ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
new_l0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0)
- sizeof (*ip0));
udp0->length = new_l0;
+
+ if (sport_entropy)
+ udp0->src_port = ip_udp_sport_entropy (b0);
}
else
{
@@ -87,6 +124,9 @@ ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
udp0 = (udp_header_t *) (ip0 + 1);
udp0->length = new_l0;
+ if (sport_entropy)
+ udp0->src_port = ip_udp_sport_entropy (b0);
+
udp0->checksum =
ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0);
ASSERT (bogus0 == 0);
@@ -97,14 +137,27 @@ ip_udp_fixup_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 is_ip4)
}
always_inline void
-ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len,
- u8 is_ip4)
+ip_udp_encap_one (vlib_main_t *vm, vlib_buffer_t *b0, u8 *ec0, word ec_len,
+ ip_address_family_t encap_family,
+ ip_address_family_t payload_family,
+ udp_encap_fixup_flags_t flags)
{
- vnet_calc_checksums_inline (vm, b0, is_ip4, !is_ip4);
+ u8 sport_entropy = (flags & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) != 0;
+
+ if (payload_family < N_AF)
+ {
+ vnet_calc_checksums_inline (vm, b0, payload_family == AF_IP4,
+ payload_family == AF_IP6);
+
+ /* Сalculate flow hash to be used for entropy */
+ if (sport_entropy && 0 == vnet_buffer (b0)->ip.flow_hash)
+ vnet_buffer (b0)->ip.flow_hash =
+ ip_udp_compute_flow_hash (b0, payload_family == AF_IP4);
+ }
vlib_buffer_advance (b0, -ec_len);
- if (is_ip4)
+ if (encap_family == AF_IP4)
{
ip4_header_t *ip0;
@@ -112,7 +165,7 @@ ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len,
/* Apply the encap string. */
clib_memcpy_fast (ip0, ec0, ec_len);
- ip_udp_fixup_one (vm, b0, 1);
+ ip_udp_fixup_one (vm, b0, 1, sport_entropy);
}
else
{
@@ -122,26 +175,42 @@ ip_udp_encap_one (vlib_main_t * vm, vlib_buffer_t * b0, u8 * ec0, word ec_len,
/* Apply the encap string. */
clib_memcpy_fast (ip0, ec0, ec_len);
- ip_udp_fixup_one (vm, b0, 0);
+ ip_udp_fixup_one (vm, b0, 0, sport_entropy);
}
}
always_inline void
-ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1,
- u8 * ec0, u8 * ec1, word ec_len, u8 is_v4)
+ip_udp_encap_two (vlib_main_t *vm, vlib_buffer_t *b0, vlib_buffer_t *b1,
+ u8 *ec0, u8 *ec1, word ec_len,
+ ip_address_family_t encap_family,
+ ip_address_family_t payload_family,
+ udp_encap_fixup_flags_t flags0,
+ udp_encap_fixup_flags_t flags1)
{
u16 new_l0, new_l1;
udp_header_t *udp0, *udp1;
+ int payload_ip4 = (payload_family == AF_IP4);
+ int sport_entropy0 = (flags0 & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) != 0;
+ int sport_entropy1 = (flags1 & UDP_ENCAP_FIXUP_UDP_SRC_PORT_ENTROPY) != 0;
- ASSERT (_vec_len (ec0) == _vec_len (ec1));
-
- vnet_calc_checksums_inline (vm, b0, is_v4, !is_v4);
- vnet_calc_checksums_inline (vm, b1, is_v4, !is_v4);
+ if (payload_family < N_AF)
+ {
+ vnet_calc_checksums_inline (vm, b0, payload_ip4, !payload_ip4);
+ vnet_calc_checksums_inline (vm, b1, payload_ip4, !payload_ip4);
+
+ /* Сalculate flow hash to be used for entropy */
+ if (sport_entropy0 && 0 == vnet_buffer (b0)->ip.flow_hash)
+ vnet_buffer (b0)->ip.flow_hash =
+ ip_udp_compute_flow_hash (b0, payload_ip4);
+ if (sport_entropy1 && 0 == vnet_buffer (b1)->ip.flow_hash)
+ vnet_buffer (b1)->ip.flow_hash =
+ ip_udp_compute_flow_hash (b1, payload_ip4);
+ }
vlib_buffer_advance (b0, -ec_len);
vlib_buffer_advance (b1, -ec_len);
- if (is_v4)
+ if (encap_family == AF_IP4)
{
ip4_header_t *ip0, *ip1;
ip_csum_t sum0, sum1;
@@ -185,6 +254,11 @@ ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1,
sizeof (*ip1));
udp0->length = new_l0;
udp1->length = new_l1;
+
+ if (sport_entropy0)
+ udp0->src_port = ip_udp_sport_entropy (b0);
+ if (sport_entropy1)
+ udp1->src_port = ip_udp_sport_entropy (b1);
}
else
{
@@ -212,6 +286,11 @@ ip_udp_encap_two (vlib_main_t * vm, vlib_buffer_t * b0, vlib_buffer_t * b1,
udp0->length = new_l0;
udp1->length = new_l1;
+ if (sport_entropy0)
+ udp0->src_port = ip_udp_sport_entropy (b0);
+ if (sport_entropy1)
+ udp1->src_port = ip_udp_sport_entropy (b1);
+
udp0->checksum =
ip6_tcp_udp_icmp_compute_checksum (vm, b0, ip0, &bogus0);
udp1->checksum =
diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c
index c76c1b796bd..a90461186c1 100644
--- a/src/vnet/udp/udp_input.c
+++ b/src/vnet/udp/udp_input.c
@@ -26,8 +26,8 @@
#include <vnet/udp/udp_packet.h>
#include <vnet/session/session.h>
-static char *udp_error_strings[] = {
-#define udp_error(n,s) s,
+static vlib_error_desc_t udp_error_counters[] = {
+#define udp_error(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
#include "udp_error.def"
#undef udp_error
};
@@ -115,6 +115,7 @@ udp_connection_accept (udp_connection_t * listener, session_dgram_hdr_t * hdr,
uc->c_fib_index = listener->c_fib_index;
uc->mss = listener->mss;
uc->flags |= UDP_CONN_F_CONNECTED;
+ uc->cfg_flags = listener->cfg_flags;
if (session_dgram_accept (&uc->connection, listener->c_s_index,
listener->c_thread_index))
@@ -122,8 +123,8 @@ udp_connection_accept (udp_connection_t * listener, session_dgram_hdr_t * hdr,
udp_connection_free (uc);
return 0;
}
- udp_connection_share_port (clib_net_to_host_u16
- (uc->c_lcl_port), uc->c_is_ip4);
+
+ udp_connection_share_port (uc->c_lcl_port, uc->c_is_ip4);
return uc;
}
@@ -135,37 +136,46 @@ udp_connection_enqueue (udp_connection_t * uc0, session_t * s0,
int wrote0;
if (!(uc0->flags & UDP_CONN_F_CONNECTED))
- clib_spinlock_lock (&uc0->rx_lock);
+ {
+ clib_spinlock_lock (&uc0->rx_lock);
+
+ wrote0 = session_enqueue_dgram_connection_cl (
+ s0, hdr0, b, TRANSPORT_PROTO_UDP, queue_event);
+
+ clib_spinlock_unlock (&uc0->rx_lock);
+
+ /* Expect cl udp enqueue to fail because fifo enqueue */
+ if (PREDICT_FALSE (wrote0 == 0))
+ *error0 = UDP_ERROR_FIFO_FULL;
+
+ return;
+ }
if (svm_fifo_max_enqueue_prod (s0->rx_fifo)
< hdr0->data_length + sizeof (session_dgram_hdr_t))
{
*error0 = UDP_ERROR_FIFO_FULL;
- goto unlock_rx_lock;
+ return;
}
/* If session is owned by another thread and rx event needed,
* enqueue event now while we still have the peeker lock */
if (s0->thread_index != thread_index)
{
- wrote0 = session_enqueue_dgram_connection (s0, hdr0, b,
- TRANSPORT_PROTO_UDP,
- /* queue event */ 0);
- if (queue_event && !svm_fifo_has_event (s0->rx_fifo))
- session_enqueue_notify (s0);
+ wrote0 = session_enqueue_dgram_connection2 (
+ s0, hdr0, b, TRANSPORT_PROTO_UDP,
+ queue_event && !svm_fifo_has_event (s0->rx_fifo));
}
else
{
- wrote0 = session_enqueue_dgram_connection (s0, hdr0, b,
- TRANSPORT_PROTO_UDP,
- queue_event);
+ wrote0 = session_enqueue_dgram_connection (
+ s0, hdr0, b, TRANSPORT_PROTO_UDP, queue_event);
}
- ASSERT (wrote0 > 0);
-
-unlock_rx_lock:
- if (!(uc0->flags & UDP_CONN_F_CONNECTED))
- clib_spinlock_unlock (&uc0->rx_lock);
+ /* In some rare cases, session_enqueue_dgram_connection can fail because a
+ * chunk cannot be allocated in the RX FIFO */
+ if (PREDICT_FALSE (wrote0 == 0))
+ *error0 = UDP_ERROR_FIFO_NOMEM;
}
always_inline session_t *
@@ -184,6 +194,7 @@ udp_parse_and_lookup_buffer (vlib_buffer_t * b, session_dgram_hdr_t * hdr,
hdr->lcl_port = udp->dst_port;
hdr->rmt_port = udp->src_port;
hdr->is_ip4 = is_ip4;
+ hdr->gso_size = 0;
if (is_ip4)
{
@@ -213,6 +224,10 @@ udp_parse_and_lookup_buffer (vlib_buffer_t * b, session_dgram_hdr_t * hdr,
udp->src_port, TRANSPORT_PROTO_UDP);
}
+ /* Set the sw_if_index[VLIB_RX] to the interface we received
+ * the connection on (the local interface) */
+ vnet_buffer (b)->sw_if_index[VLIB_RX] = vnet_buffer (b)->ip.rx_sw_if_index;
+
if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)))
b->current_length = hdr->data_length;
else
@@ -226,10 +241,9 @@ always_inline uword
udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * frame, u8 is_ip4)
{
- u32 n_left_from, *from, errors, *first_buffer;
+ u32 thread_index = vm->thread_index, n_left_from, *from, *first_buffer;
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
u16 err_counters[UDP_N_ERROR] = { 0 };
- u32 thread_index = vm->thread_index;
from = first_buffer = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
@@ -251,15 +265,11 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
goto done;
}
- /*
- * If session exists pool peeker lock is taken at this point unless
- * the session is already on the right thread or is a listener
- */
-
if (s0->session_state == SESSION_STATE_OPENED)
{
u8 queue_event = 1;
uc0 = udp_connection_from_transport (session_get_transport (s0));
+ uc0->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
if (uc0->flags & UDP_CONN_F_CONNECTED)
{
if (s0->thread_index != thread_index)
@@ -273,10 +283,8 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
ASSERT (s0->session_index == uc0->c_s_index);
/*
- * Drop the peeker lock on pool resize and ask session
- * layer for a new session.
+ * Ask session layer for a new session.
*/
- session_pool_remove_peeker (s0->thread_index);
session_dgram_connect_notify (&uc0->connection,
s0->thread_index, &s0);
queue_event = 0;
@@ -286,9 +294,9 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0],
queue_event, &error0);
- session_pool_remove_peeker (s0->thread_index);
}
- else if (s0->session_state == SESSION_STATE_READY)
+ else if (s0->session_state == SESSION_STATE_READY ||
+ s0->session_state == SESSION_STATE_ACCEPTING)
{
uc0 = udp_connection_from_transport (session_get_transport (s0));
udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0], 1,
@@ -306,6 +314,7 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
goto done;
}
s0 = session_get (uc0->c_s_index, uc0->c_thread_index);
+ uc0->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
error0 = UDP_ERROR_ACCEPT;
}
udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0], 1,
@@ -314,7 +323,6 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
else
{
error0 = UDP_ERROR_NOT_READY;
- session_pool_remove_peeker (s0->thread_index);
}
done:
@@ -328,9 +336,7 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
}
vlib_buffer_free (vm, first_buffer, frame->n_vectors);
- errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_UDP,
- thread_index);
- err_counters[UDP_ERROR_MQ_FULL] = errors;
+ session_main_flush_enqueue_events (TRANSPORT_PROTO_UDP, thread_index);
udp_store_err_counters (vm, is_ip4, err_counters);
return frame->n_vectors;
}
@@ -342,7 +348,6 @@ udp4_input (vlib_main_t * vm, vlib_node_runtime_t * node,
return udp46_input_inline (vm, node, frame, 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_input_node) =
{
.function = udp4_input,
@@ -350,8 +355,8 @@ VLIB_REGISTER_NODE (udp4_input_node) =
.vector_size = sizeof (u32),
.format_trace = format_udp_input_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN (udp_error_strings),
- .error_strings = udp_error_strings,
+ .n_errors = UDP_N_ERROR,
+ .error_counters = udp_error_counters,
.n_next_nodes = UDP_INPUT_N_NEXT,
.next_nodes = {
#define _(s, n) [UDP_INPUT_NEXT_##s] = n,
@@ -359,7 +364,6 @@ VLIB_REGISTER_NODE (udp4_input_node) =
#undef _
},
};
-/* *INDENT-ON* */
static uword
udp6_input (vlib_main_t * vm, vlib_node_runtime_t * node,
@@ -368,7 +372,6 @@ udp6_input (vlib_main_t * vm, vlib_node_runtime_t * node,
return udp46_input_inline (vm, node, frame, 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp6_input_node) =
{
.function = udp6_input,
@@ -376,8 +379,8 @@ VLIB_REGISTER_NODE (udp6_input_node) =
.vector_size = sizeof (u32),
.format_trace = format_udp_input_trace,
.type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN (udp_error_strings),
- .error_strings = udp_error_strings,
+ .n_errors = UDP_N_ERROR,
+ .error_counters = udp_error_counters,
.n_next_nodes = UDP_INPUT_N_NEXT,
.next_nodes = {
#define _(s, n) [UDP_INPUT_NEXT_##s] = n,
@@ -385,7 +388,6 @@ VLIB_REGISTER_NODE (udp6_input_node) =
#undef _
},
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/udp/udp_local.c b/src/vnet/udp/udp_local.c
index 61aafaf8896..6531b73cd11 100644
--- a/src/vnet/udp/udp_local.c
+++ b/src/vnet/udp/udp_local.c
@@ -36,7 +36,11 @@ typedef struct
u8 bound;
} udp_local_rx_trace_t;
-#define UDP_NO_NODE_SET ((u16) ~0)
+static vlib_error_desc_t udp_error_counters[] = {
+#define udp_error(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
+#include "udp_error.def"
+#undef udp_error
+};
#ifndef CLIB_MARCH_VARIANT
u8 *
@@ -54,6 +58,48 @@ format_udp_rx_trace (u8 * s, va_list * args)
}
#endif /* CLIB_MARCH_VARIANT */
+always_inline void
+udp_dispatch_error (vlib_node_runtime_t *node, vlib_buffer_t *b, u32 advance,
+ u8 is_ip4, u32 *next)
+{
+ udp_main_t *um = &udp_main;
+ u8 punt_unknown = is_ip4 ? um->punt_unknown4 : um->punt_unknown6;
+
+ if (PREDICT_FALSE (punt_unknown))
+ {
+ vlib_buffer_advance (b, -(word) advance);
+ b->error = node->errors[UDP_ERROR_PUNT];
+ *next = UDP_LOCAL_NEXT_PUNT;
+ }
+ else if (um->icmp_send_unreachable_disabled)
+ {
+ *next = UDP_LOCAL_NEXT_DROP;
+ b->error = node->errors[UDP_ERROR_NO_LISTENER];
+ }
+ else
+ {
+ /* move the pointer back so icmp-error can find the ip packet header */
+ vlib_buffer_advance (b, -(word) advance);
+
+ if (is_ip4)
+ {
+ icmp4_error_set_vnet_buffer (
+ b, ICMP4_destination_unreachable,
+ ICMP4_destination_unreachable_port_unreachable, 0);
+ b->error = node->errors[UDP_ERROR_NO_LISTENER];
+ *next = UDP_LOCAL_NEXT_ICMP;
+ }
+ else
+ {
+ icmp6_error_set_vnet_buffer (
+ b, ICMP6_destination_unreachable,
+ ICMP6_destination_unreachable_port_unreachable, 0);
+ b->error = node->errors[UDP_ERROR_NO_LISTENER];
+ *next = UDP_LOCAL_NEXT_ICMP;
+ }
+ }
+}
+
always_inline uword
udp46_local_inline (vlib_main_t * vm,
vlib_node_runtime_t * node,
@@ -61,7 +107,6 @@ udp46_local_inline (vlib_main_t * vm,
{
udp_main_t *um = &udp_main;
__attribute__ ((unused)) u32 n_left_from, next_index, *from, *to_next;
- u8 punt_unknown = is_ip4 ? um->punt_unknown4 : um->punt_unknown6;
u16 *next_by_dst_port = (is_ip4 ?
um->next_by_dst_port4 : um->next_by_dst_port6);
from = vlib_frame_vector_args (from_frame);
@@ -80,9 +125,8 @@ udp46_local_inline (vlib_main_t * vm,
u32 bi0, bi1;
vlib_buffer_t *b0, *b1;
udp_header_t *h0 = 0, *h1 = 0;
- u32 i0, i1, dst_port0, dst_port1;
+ u32 i0, i1, next0, next1;
u32 advance0, advance1;
- u32 error0, next0, error1, next1;
/* Prefetch next iteration. */
{
@@ -124,124 +168,106 @@ udp46_local_inline (vlib_main_t * vm,
if (PREDICT_FALSE (b0->current_length < advance0 + sizeof (*h0)))
{
- error0 = UDP_ERROR_LENGTH_ERROR;
+ b0->error = node->errors[UDP_ERROR_LENGTH_ERROR];
next0 = UDP_LOCAL_NEXT_DROP;
}
else
{
vlib_buffer_advance (b0, advance0);
h0 = vlib_buffer_get_current (b0);
- error0 = UDP_ERROR_NONE;
next0 = UDP_LOCAL_NEXT_PUNT;
if (PREDICT_FALSE (clib_net_to_host_u16 (h0->length) >
vlib_buffer_length_in_chain (vm, b0)))
{
- error0 = UDP_ERROR_LENGTH_ERROR;
+ b0->error = node->errors[UDP_ERROR_LENGTH_ERROR];
next0 = UDP_LOCAL_NEXT_DROP;
}
}
if (PREDICT_FALSE (b1->current_length < advance1 + sizeof (*h1)))
{
- error1 = UDP_ERROR_LENGTH_ERROR;
+ b1->error = node->errors[UDP_ERROR_LENGTH_ERROR];
next1 = UDP_LOCAL_NEXT_DROP;
}
else
{
vlib_buffer_advance (b1, advance1);
h1 = vlib_buffer_get_current (b1);
- error1 = UDP_ERROR_NONE;
next1 = UDP_LOCAL_NEXT_PUNT;
if (PREDICT_FALSE (clib_net_to_host_u16 (h1->length) >
vlib_buffer_length_in_chain (vm, b1)))
{
- error1 = UDP_ERROR_LENGTH_ERROR;
+ b1->error = node->errors[UDP_ERROR_LENGTH_ERROR];
next1 = UDP_LOCAL_NEXT_DROP;
}
}
/* Index sparse array with network byte order. */
- dst_port0 = (error0 == 0) ? h0->dst_port : 0;
- dst_port1 = (error1 == 0) ? h1->dst_port : 0;
- sparse_vec_index2 (next_by_dst_port, dst_port0, dst_port1, &i0,
- &i1);
- next0 = (error0 == 0) ? vec_elt (next_by_dst_port, i0) : next0;
- next1 = (error1 == 0) ? vec_elt (next_by_dst_port, i1) : next1;
-
- if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX ||
- next0 == UDP_NO_NODE_SET))
+ if (PREDICT_TRUE (next0 == UDP_LOCAL_NEXT_PUNT &&
+ next1 == UDP_LOCAL_NEXT_PUNT))
{
- // move the pointer back so icmp-error can find the
- // ip packet header
- vlib_buffer_advance (b0, -(word) advance0);
+ sparse_vec_index2 (next_by_dst_port, h0->dst_port, h1->dst_port,
+ &i0, &i1);
+ next0 = vec_elt (next_by_dst_port, i0);
+ next1 = vec_elt (next_by_dst_port, i1);
- if (PREDICT_FALSE (punt_unknown))
+ if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX ||
+ next0 == UDP_NO_NODE_SET))
{
- b0->error = node->errors[UDP_ERROR_PUNT];
- next0 = UDP_LOCAL_NEXT_PUNT;
+ udp_dispatch_error (node, b0, advance0, is_ip4, &next0);
}
- else if (is_ip4)
+ else
+ {
+ b0->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b0, sizeof (*h0));
+ }
+
+ if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX ||
+ next1 == UDP_NO_NODE_SET))
{
- icmp4_error_set_vnet_buffer (b0,
- ICMP4_destination_unreachable,
- ICMP4_destination_unreachable_port_unreachable,
- 0);
- b0->error = node->errors[UDP_ERROR_NO_LISTENER];
- next0 = UDP_LOCAL_NEXT_ICMP;
+ udp_dispatch_error (node, b1, advance1, is_ip4, &next1);
}
else
{
- icmp6_error_set_vnet_buffer (b0,
- ICMP6_destination_unreachable,
- ICMP6_destination_unreachable_port_unreachable,
- 0);
- b0->error = node->errors[UDP_ERROR_NO_LISTENER];
- next0 = UDP_LOCAL_NEXT_ICMP;
+ b1->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b1, sizeof (*h1));
}
}
- else
+ else if (next0 == UDP_LOCAL_NEXT_PUNT)
{
- b0->error = node->errors[UDP_ERROR_NONE];
- // advance to the payload
- vlib_buffer_advance (b0, sizeof (*h0));
- }
-
- if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX ||
- next1 == UDP_NO_NODE_SET))
- {
- // move the pointer back so icmp-error can find the
- // ip packet header
- vlib_buffer_advance (b1, -(word) advance1);
+ i0 = sparse_vec_index (next_by_dst_port, h0->dst_port);
+ next0 = vec_elt (next_by_dst_port, i0);
- if (PREDICT_FALSE (punt_unknown))
- {
- b1->error = node->errors[UDP_ERROR_PUNT];
- next1 = UDP_LOCAL_NEXT_PUNT;
- }
- else if (is_ip4)
+ if (PREDICT_FALSE (i0 == SPARSE_VEC_INVALID_INDEX ||
+ next0 == UDP_NO_NODE_SET))
{
- icmp4_error_set_vnet_buffer (b1,
- ICMP4_destination_unreachable,
- ICMP4_destination_unreachable_port_unreachable,
- 0);
- b1->error = node->errors[UDP_ERROR_NO_LISTENER];
- next1 = UDP_LOCAL_NEXT_ICMP;
+ udp_dispatch_error (node, b0, advance0, is_ip4, &next0);
}
else
{
- icmp6_error_set_vnet_buffer (b1,
- ICMP6_destination_unreachable,
- ICMP6_destination_unreachable_port_unreachable,
- 0);
- b1->error = node->errors[UDP_ERROR_NO_LISTENER];
- next1 = UDP_LOCAL_NEXT_ICMP;
+ b0->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b0, sizeof (*h0));
}
}
- else
+ else if (next1 == UDP_LOCAL_NEXT_PUNT)
{
- b1->error = node->errors[UDP_ERROR_NONE];
- // advance to the payload
- vlib_buffer_advance (b1, sizeof (*h1));
+ i1 = sparse_vec_index (next_by_dst_port, h1->dst_port);
+ next1 = vec_elt (next_by_dst_port, i1);
+
+ if (PREDICT_FALSE (i1 == SPARSE_VEC_INVALID_INDEX ||
+ next1 == UDP_NO_NODE_SET))
+ {
+ udp_dispatch_error (node, b1, advance1, is_ip4, &next1);
+ }
+ else
+ {
+ b1->error = node->errors[UDP_ERROR_NONE];
+ // advance to the payload
+ vlib_buffer_advance (b1, sizeof (*h1));
+ }
}
if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
@@ -315,33 +341,7 @@ udp46_local_inline (vlib_main_t * vm,
if (PREDICT_FALSE ((i0 == SPARSE_VEC_INVALID_INDEX) ||
next0 == UDP_NO_NODE_SET))
{
- // move the pointer back so icmp-error can find the
- // ip packet header
- vlib_buffer_advance (b0, -(word) advance0);
-
- if (PREDICT_FALSE (punt_unknown))
- {
- b0->error = node->errors[UDP_ERROR_PUNT];
- next0 = UDP_LOCAL_NEXT_PUNT;
- }
- else if (is_ip4)
- {
- icmp4_error_set_vnet_buffer (b0,
- ICMP4_destination_unreachable,
- ICMP4_destination_unreachable_port_unreachable,
- 0);
- b0->error = node->errors[UDP_ERROR_NO_LISTENER];
- next0 = UDP_LOCAL_NEXT_ICMP;
- }
- else
- {
- icmp6_error_set_vnet_buffer (b0,
- ICMP6_destination_unreachable,
- ICMP6_destination_unreachable_port_unreachable,
- 0);
- b0->error = node->errors[UDP_ERROR_NO_LISTENER];
- next0 = UDP_LOCAL_NEXT_ICMP;
- }
+ udp_dispatch_error (node, b0, advance0, is_ip4, &next0);
}
else
{
@@ -379,12 +379,6 @@ udp46_local_inline (vlib_main_t * vm,
return from_frame->n_vectors;
}
-static char *udp_error_strings[] = {
-#define udp_error(n,s) s,
-#include "udp_error.def"
-#undef udp_error
-};
-
VLIB_NODE_FN (udp4_local_node) (vlib_main_t * vm,
vlib_node_runtime_t * node,
vlib_frame_t * from_frame)
@@ -399,14 +393,13 @@ VLIB_NODE_FN (udp6_local_node) (vlib_main_t * vm,
return udp46_local_inline (vm, node, from_frame, 0 /* is_ip4 */ );
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp4_local_node) = {
.name = "ip4-udp-lookup",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = UDP_N_ERROR,
- .error_strings = udp_error_strings,
+ .error_counters = udp_error_counters,
.n_next_nodes = UDP_LOCAL_N_NEXT,
.next_nodes = {
@@ -419,16 +412,14 @@ VLIB_REGISTER_NODE (udp4_local_node) = {
.format_trace = format_udp_rx_trace,
.unformat_buffer = unformat_udp_header,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (udp6_local_node) = {
.name = "ip6-udp-lookup",
/* Takes a vector of packets. */
.vector_size = sizeof (u32),
.n_errors = UDP_N_ERROR,
- .error_strings = udp_error_strings,
+ .error_counters = udp_error_counters,
.n_next_nodes = UDP_LOCAL_N_NEXT,
.next_nodes = {
@@ -441,7 +432,6 @@ VLIB_REGISTER_NODE (udp6_local_node) = {
.format_trace = format_udp_rx_trace,
.unformat_buffer = unformat_udp_header,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
void
@@ -529,16 +519,12 @@ u8
udp_is_valid_dst_port (udp_dst_port_t dst_port, u8 is_ip4)
{
udp_main_t *um = &udp_main;
- u16 *n;
-
- if (is_ip4)
- n = sparse_vec_validate (um->next_by_dst_port4,
- clib_host_to_net_u16 (dst_port));
- else
- n = sparse_vec_validate (um->next_by_dst_port6,
- clib_host_to_net_u16 (dst_port));
-
- return (n[0] != SPARSE_VEC_INVALID_INDEX && n[0] != UDP_NO_NODE_SET);
+ u16 *next_by_dst_port =
+ is_ip4 ? um->next_by_dst_port4 : um->next_by_dst_port6;
+ uword index =
+ sparse_vec_index (next_by_dst_port, clib_host_to_net_u16 (dst_port));
+ return (index != SPARSE_VEC_INVALID_INDEX &&
+ vec_elt (next_by_dst_port, index) != UDP_NO_NODE_SET);
}
void
diff --git a/src/vnet/udp/udp_output.c b/src/vnet/udp/udp_output.c
new file mode 100644
index 00000000000..22b94141365
--- /dev/null
+++ b/src/vnet/udp/udp_output.c
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vnet/udp/udp.h>
+#include <vnet/ip/ip4_inlines.h>
+#include <vnet/ip/ip6_inlines.h>
+
+#define udp_node_index(node_id, is_ip4) \
+ ((is_ip4) ? udp4_##node_id##_node.index : udp6_##node_id##_node.index)
+
+typedef enum udp_output_next_
+{
+ UDP_OUTPUT_NEXT_DROP,
+ UDP_OUTPUT_NEXT_IP_LOOKUP,
+ UDP_OUTPUT_N_NEXT
+} udp_output_next_t;
+
+#define foreach_udp4_output_next \
+ _ (DROP, "error-drop") \
+ _ (IP_LOOKUP, "ip4-lookup")
+
+#define foreach_udp6_output_next \
+ _ (DROP, "error-drop") \
+ _ (IP_LOOKUP, "ip6-lookup")
+
+static vlib_error_desc_t udp_output_error_counters[] = {
+#define udp_error(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
+#include <vnet/udp/udp_error.def>
+#undef udp_error
+};
+
+typedef struct udp_tx_trace_
+{
+ udp_header_t udp_header;
+ udp_connection_t udp_connection;
+} udp_tx_trace_t;
+
+static u8 *
+format_udp_tx_trace (u8 *s, va_list *args)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+ CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+ udp_tx_trace_t *t = va_arg (*args, udp_tx_trace_t *);
+ udp_connection_t *uc = &t->udp_connection;
+ u32 indent = format_get_indent (s);
+
+ s = format (s, "%U\n%U%U", format_udp_connection, uc, 1, format_white_space,
+ indent, format_udp_header, &t->udp_header, 128);
+
+ return s;
+}
+
+always_inline udp_connection_t *
+udp_output_get_connection (vlib_buffer_t *b, u32 thread_index)
+{
+ if (PREDICT_FALSE (vnet_buffer (b)->tcp.flags & UDP_CONN_F_LISTEN))
+ return udp_listener_get (vnet_buffer (b)->tcp.connection_index);
+
+ return udp_connection_get (vnet_buffer (b)->tcp.connection_index,
+ thread_index);
+}
+
+static void
+udp46_output_trace_frame (vlib_main_t *vm, vlib_node_runtime_t *node,
+ u32 *to_next, u32 n_bufs)
+{
+ udp_connection_t *uc;
+ udp_tx_trace_t *t;
+ vlib_buffer_t *b;
+ udp_header_t *uh;
+ int i;
+
+ for (i = 0; i < n_bufs; i++)
+ {
+ b = vlib_get_buffer (vm, to_next[i]);
+ if (!(b->flags & VLIB_BUFFER_IS_TRACED))
+ continue;
+ uh = vlib_buffer_get_current (b);
+ uc = udp_output_get_connection (b, vm->thread_index);
+ t = vlib_add_trace (vm, node, b, sizeof (*t));
+ clib_memcpy_fast (&t->udp_header, uh, sizeof (t->udp_header));
+ clib_memcpy_fast (&t->udp_connection, uc, sizeof (t->udp_connection));
+ }
+}
+
+always_inline void
+udp_output_handle_packet (udp_connection_t *uc0, vlib_buffer_t *b0,
+ vlib_node_runtime_t *error_node, u16 *next0,
+ u8 is_ip4)
+{
+ /* If next_index is not drop use it */
+ if (uc0->next_node_index)
+ {
+ *next0 = uc0->next_node_index;
+ vnet_buffer (b0)->tcp.next_node_opaque = uc0->next_node_opaque;
+ }
+ else
+ {
+ *next0 = UDP_OUTPUT_NEXT_IP_LOOKUP;
+ }
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = uc0->c_fib_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = uc0->sw_if_index;
+}
+
+always_inline uword
+udp46_output_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+ vlib_frame_t *frame, int is_ip4)
+{
+ u32 n_left_from, *from, thread_index = vm->thread_index;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+ u16 nexts[VLIB_FRAME_SIZE], *next;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ udp46_output_trace_frame (vm, node, from, n_left_from);
+
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+ b = bufs;
+ next = nexts;
+
+ while (n_left_from >= 4)
+ {
+ udp_connection_t *uc0, *uc1;
+
+ vlib_prefetch_buffer_header (b[2], STORE);
+ CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+
+ vlib_prefetch_buffer_header (b[3], STORE);
+ CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+
+ uc0 = udp_output_get_connection (b[0], thread_index);
+ uc1 = udp_output_get_connection (b[1], thread_index);
+
+ if (PREDICT_TRUE (!uc0 + !uc1 == 0))
+ {
+ udp_output_handle_packet (uc0, b[0], node, &next[0], is_ip4);
+ udp_output_handle_packet (uc1, b[1], node, &next[1], is_ip4);
+ }
+ else
+ {
+ if (uc0 != 0)
+ {
+ udp_output_handle_packet (uc0, b[0], node, &next[0], is_ip4);
+ }
+ else
+ {
+ b[0]->error = node->errors[UDP_ERROR_INVALID_CONNECTION];
+ next[0] = UDP_OUTPUT_NEXT_DROP;
+ }
+ if (uc1 != 0)
+ {
+ udp_output_handle_packet (uc1, b[1], node, &next[1], is_ip4);
+ }
+ else
+ {
+ b[1]->error = node->errors[UDP_ERROR_INVALID_CONNECTION];
+ next[1] = UDP_OUTPUT_NEXT_DROP;
+ }
+ }
+
+ b += 2;
+ next += 2;
+ n_left_from -= 2;
+ }
+ while (n_left_from > 0)
+ {
+ udp_connection_t *uc0;
+
+ if (n_left_from > 1)
+ {
+ vlib_prefetch_buffer_header (b[1], STORE);
+ CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+ }
+
+ uc0 = udp_output_get_connection (b[0], thread_index);
+
+ if (PREDICT_TRUE (uc0 != 0))
+ {
+ udp_output_handle_packet (uc0, b[0], node, &next[0], is_ip4);
+ }
+ else
+ {
+ b[0]->error = node->errors[UDP_ERROR_INVALID_CONNECTION];
+ next[0] = UDP_OUTPUT_NEXT_DROP;
+ }
+
+ b += 1;
+ next += 1;
+ n_left_from -= 1;
+ }
+
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+ vlib_node_increment_counter (vm, udp_node_index (output, is_ip4),
+ UDP_ERROR_PKTS_SENT, frame->n_vectors);
+ return frame->n_vectors;
+}
+
+VLIB_NODE_FN (udp4_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return udp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */);
+}
+
+VLIB_NODE_FN (udp6_output_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
+{
+ return udp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */);
+}
+
+VLIB_REGISTER_NODE (udp4_output_node) =
+{
+ .name = "udp4-output",
+ .vector_size = sizeof (u32),
+ .n_errors = UDP_N_ERROR,
+ .protocol_hint = VLIB_NODE_PROTO_HINT_UDP,
+ .error_counters = udp_output_error_counters,
+ .n_next_nodes = UDP_OUTPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [UDP_OUTPUT_NEXT_##s] = n,
+ foreach_udp4_output_next
+#undef _
+ },
+ .format_buffer = format_udp_header,
+ .format_trace = format_udp_tx_trace,
+};
+
+VLIB_REGISTER_NODE (udp6_output_node) =
+{
+ .name = "udp6-output",
+ .vector_size = sizeof (u32),
+ .n_errors = UDP_N_ERROR,
+ .protocol_hint = VLIB_NODE_PROTO_HINT_UDP,
+ .error_counters = udp_output_error_counters,
+ .n_next_nodes = UDP_OUTPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [UDP_OUTPUT_NEXT_##s] = n,
+ foreach_udp6_output_next
+#undef _
+ },
+ .format_buffer = format_udp_header,
+ .format_trace = format_udp_tx_trace,
+};
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/unix/gdb_funcs.c b/src/vnet/unix/gdb_funcs.c
index 886d849c173..d6fdc985bd9 100644
--- a/src/vnet/unix/gdb_funcs.c
+++ b/src/vnet/unix/gdb_funcs.c
@@ -318,13 +318,11 @@ show_gdb_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_gdb_funcs_command, static) = {
.path = "show gdb",
.short_help = "Describe functions which can be called from gdb",
.function = show_gdb_command_fn,
};
-/* *INDENT-ON* */
vlib_buffer_t *
vgb (u32 bi)
@@ -421,6 +419,12 @@ gdb_dump_trajectory_trace (u32 bi)
#endif
}
+void
+gdb_dump_buffer (vlib_buffer_t *b)
+{
+ fformat (stderr, "%U\n", format_vnet_buffer, b);
+}
+
/* Cafeteria plan, maybe you don't want these functions */
clib_error_t *
gdb_func_init (vlib_main_t * vm)
diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c
index 4a848349ae1..f1102dc321e 100644
--- a/src/vnet/unix/tuntap.c
+++ b/src/vnet/unix/tuntap.c
@@ -172,7 +172,7 @@ tuntap_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
/* Re-set iovecs if present. */
if (tm->threads[thread_index].iovecs)
- _vec_len (tm->threads[thread_index].iovecs) = 0;
+ vec_set_len (tm->threads[thread_index].iovecs, 0);
/** VLIB buffer chain -> Unix iovec(s). */
vec_add2 (tm->threads[thread_index].iovecs, iov, 1);
@@ -217,14 +217,12 @@ tuntap_tx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
return n_packets;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tuntap_tx_node,static) = {
.function = tuntap_tx,
.name = "tuntap-tx",
.type = VLIB_NODE_TYPE_INTERNAL,
.vector_size = 4,
};
-/* *INDENT-ON* */
/**
* @brief TUNTAP receive node
@@ -260,7 +258,7 @@ tuntap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
vlib_buffer_alloc (vm,
tm->threads[thread_index].rx_buffers + n_left,
VLIB_FRAME_SIZE - n_left);
- _vec_len (tm->threads[thread_index].rx_buffers) = n_left + n_alloc;
+ vec_set_len (tm->threads[thread_index].rx_buffers, n_left + n_alloc);
}
}
@@ -324,7 +322,7 @@ tuntap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
+ VNET_INTERFACE_COUNTER_RX,
thread_index, tm->sw_if_index, 1, n_bytes_in_packet);
- _vec_len (tm->threads[thread_index].rx_buffers) = i_rx;
+ vec_set_len (tm->threads[thread_index].rx_buffers, i_rx);
}
b = vlib_get_buffer (vm, bi);
@@ -366,7 +364,7 @@ tuntap_rx (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)
next_index = VNET_DEVICE_INPUT_NEXT_DROP;
}
- vnet_feature_start_device_input_x1 (tm->sw_if_index, &next_index, b);
+ vnet_feature_start_device_input (tm->sw_if_index, &next_index, b);
vlib_set_next_frame_buffer (vm, node, next_index, bi);
@@ -385,7 +383,6 @@ static char *tuntap_rx_error_strings[] = {
"unknown packet type",
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (tuntap_rx_node,static) = {
.function = tuntap_rx,
.flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
@@ -397,7 +394,6 @@ VLIB_REGISTER_NODE (tuntap_rx_node,static) = {
.n_errors = 1,
.error_strings = tuntap_rx_error_strings,
};
-/* *INDENT-ON* */
/**
* @brief Gets called when file descriptor is ready from epoll.
@@ -624,12 +620,12 @@ tuntap_config (vlib_main_t * vm, unformat_input_t * input)
if (have_normal_interface)
{
vnet_main_t *vnm = vnet_get_main ();
- error = ethernet_register_interface
- (vnm, tuntap_dev_class.index, 0 /* device instance */ ,
- tm->ether_dst_mac /* ethernet address */ ,
- &tm->hw_if_index, 0 /* flag change */ );
- if (error)
- clib_error_report (error);
+ vnet_eth_interface_registration_t eir = {};
+
+ eir.dev_class_index = tuntap_dev_class.index;
+ eir.address = tm->ether_dst_mac;
+ tm->hw_if_index = vnet_eth_register_interface (vnm, &eir);
+
tm->sw_if_index = tm->hw_if_index;
vm->os_punt_frame = tuntap_nopunt_frame;
}
@@ -912,7 +908,7 @@ tuntap_punt_frame (vlib_main_t * vm,
vlib_node_runtime_t * node, vlib_frame_t * frame)
{
tuntap_tx (vm, node, frame);
- vlib_frame_free (vm, node, frame);
+ vlib_frame_free (vm, frame);
}
/**
@@ -930,15 +926,13 @@ tuntap_nopunt_frame (vlib_main_t * vm,
u32 *buffers = vlib_frame_vector_args (frame);
uword n_packets = frame->n_vectors;
vlib_buffer_free (vm, buffers, n_packets);
- vlib_frame_free (vm, node, frame);
+ vlib_frame_free (vm, frame);
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (tuntap_interface_class,static) = {
.name = "tuntap",
.flags = VNET_HW_INTERFACE_CLASS_FLAG_P2P,
};
-/* *INDENT-ON* */
/**
* @brief Format tun/tap interface name
@@ -984,13 +978,11 @@ tuntap_intfc_tx (vlib_main_t * vm,
return n_buffers;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (tuntap_dev_class,static) = {
.name = "tuntap",
.tx_function = tuntap_intfc_tx,
.format_device_name = format_tuntap_interface_name,
};
-/* *INDENT-ON* */
/**
* @brief tun/tap node init
@@ -1025,12 +1017,10 @@ tuntap_init (vlib_main_t * vm)
return 0;
}
-/* *INDENT-OFF* */
VLIB_INIT_FUNCTION (tuntap_init) =
{
.runs_after = VLIB_INITS("ip4_init"),
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vnet/util/throttle.c b/src/vnet/util/throttle.c
index 0985b4a81a3..8b8e030bf53 100644
--- a/src/vnet/util/throttle.c
+++ b/src/vnet/util/throttle.c
@@ -16,17 +16,18 @@
#include <vnet/util/throttle.h>
void
-throttle_init (throttle_t * t, u32 n_threads, f64 time)
+throttle_init (throttle_t *t, u32 n_threads, u32 buckets, f64 time)
{
u32 i;
t->time = time;
+ t->buckets = 1 << max_log2 (buckets);
vec_validate (t->bitmaps, n_threads);
vec_validate (t->seeds, n_threads);
vec_validate (t->last_seed_change_time, n_threads);
for (i = 0; i < n_threads; i++)
- vec_validate (t->bitmaps[i], (THROTTLE_BITS / BITS (uword)) - 1);
+ clib_bitmap_alloc (t->bitmaps[i], t->buckets);
}
/*
diff --git a/src/vnet/util/throttle.h b/src/vnet/util/throttle.h
index 38ace280131..53435c4a359 100644
--- a/src/vnet/util/throttle.h
+++ b/src/vnet/util/throttle.h
@@ -31,11 +31,13 @@ typedef struct throttle_t_
uword **bitmaps;
u64 *seeds;
f64 *last_seed_change_time;
+ u32 buckets;
} throttle_t;
#define THROTTLE_BITS (512)
-extern void throttle_init (throttle_t * t, u32 n_threads, f64 time);
+extern void throttle_init (throttle_t *t, u32 n_threads, u32 buckets,
+ f64 time);
always_inline u64
throttle_seed (throttle_t * t, u32 thread_index, f64 time_now)
@@ -43,7 +45,7 @@ throttle_seed (throttle_t * t, u32 thread_index, f64 time_now)
if (time_now - t->last_seed_change_time[thread_index] > t->time)
{
(void) random_u64 (&t->seeds[thread_index]);
- clib_memset (t->bitmaps[thread_index], 0, THROTTLE_BITS / BITS (u8));
+ clib_bitmap_zero (t->bitmaps[thread_index]);
t->last_seed_change_time[thread_index] = time_now;
}
@@ -53,21 +55,14 @@ throttle_seed (throttle_t * t, u32 thread_index, f64 time_now)
always_inline int
throttle_check (throttle_t * t, u32 thread_index, u64 hash, u64 seed)
{
- int drop;
- uword m;
- u32 w;
+ ASSERT (is_pow2 (t->buckets));
hash = clib_xxhash (hash ^ seed);
/* Select bit number */
- hash &= THROTTLE_BITS - 1;
- w = hash / BITS (uword);
- m = (uword) 1 << (hash % BITS (uword));
+ hash &= t->buckets - 1;
- drop = (t->bitmaps[thread_index][w] & m) != 0;
- t->bitmaps[thread_index][w] |= m;
-
- return (drop);
+ return clib_bitmap_set_no_check (t->bitmaps[thread_index], hash, 1);
}
#endif
diff --git a/src/vnet/vnet.h b/src/vnet/vnet.h
index 24afe633af2..54988aec667 100644
--- a/src/vnet/vnet.h
+++ b/src/vnet/vnet.h
@@ -45,6 +45,7 @@
#include <vppinfra/types.h>
#include <vppinfra/pcap.h>
+#include <vnet/error.h>
#include <vnet/buffer.h>
#include <vnet/config.h>
#include <vnet/interface.h>
@@ -70,6 +71,7 @@ typedef struct
u32 pcap_sw_if_index;
pcap_main_t pcap_main;
u32 filter_classify_table_index;
+ vlib_is_packet_traced_fn_t *current_filter_function;
vlib_error_t pcap_error_index;
} vnet_pcap_t;
diff --git a/src/vnet/vxlan-gbp/decap.c b/src/vnet/vxlan-gbp/decap.c
deleted file mode 100644
index 927c778b211..00000000000
--- a/src/vnet/vxlan-gbp/decap.c
+++ /dev/null
@@ -1,1050 +0,0 @@
-/*
- * decap.c: vxlan gbp tunnel decap packet processing
- *
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vlib/vlib.h>
-
-#include <vnet/vxlan-gbp/vxlan_gbp.h>
-
-typedef struct
-{
- u32 next_index;
- u32 tunnel_index;
- u32 error;
- u32 vni;
- u16 sclass;
- u8 flags;
-} vxlan_gbp_rx_trace_t;
-
-static u8 *
-format_vxlan_gbp_rx_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- vxlan_gbp_rx_trace_t *t = va_arg (*args, vxlan_gbp_rx_trace_t *);
-
- if (t->tunnel_index == ~0)
- return format (s,
- "VXLAN_GBP decap error - tunnel for vni %d does not exist",
- t->vni);
- return format (s,
- "VXLAN_GBP decap from vxlan_gbp_tunnel%d vni %d sclass %d"
- " flags %U next %d error %d",
- t->tunnel_index, t->vni, t->sclass,
- format_vxlan_gbp_header_gpflags, t->flags,
- t->next_index, t->error);
-}
-
-always_inline u32
-buf_fib_index (vlib_buffer_t * b, u32 is_ip4)
-{
- u32 sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_TX];
- if (sw_if_index != (u32) ~ 0)
- return sw_if_index;
-
- u32 *fib_index_by_sw_if_index = is_ip4 ?
- ip4_main.fib_index_by_sw_if_index : ip6_main.fib_index_by_sw_if_index;
- sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX];
-
- return vec_elt (fib_index_by_sw_if_index, sw_if_index);
-}
-
-typedef vxlan4_gbp_tunnel_key_t last_tunnel_cache4;
-
-always_inline vxlan_gbp_tunnel_t *
-vxlan4_gbp_find_tunnel (vxlan_gbp_main_t * vxm, last_tunnel_cache4 * cache,
- u32 fib_index, ip4_header_t * ip4_0,
- vxlan_gbp_header_t * vxlan_gbp0)
-{
- /*
- * Check unicast first since that's where most of the traffic comes from
- * Make sure VXLAN_GBP tunnel exist according to packet SIP, DIP and VNI
- */
- vxlan4_gbp_tunnel_key_t key4;
- int rv;
-
- key4.key[1] = (((u64) fib_index << 32) |
- (vxlan_gbp0->vni_reserved &
- clib_host_to_net_u32 (0xffffff00)));
- key4.key[0] =
- (((u64) ip4_0->dst_address.as_u32 << 32) | ip4_0->src_address.as_u32);
-
- if (PREDICT_FALSE (key4.key[0] != cache->key[0] ||
- key4.key[1] != cache->key[1]))
- {
- rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_gbp_tunnel_by_key,
- &key4);
- if (PREDICT_FALSE (rv == 0))
- {
- *cache = key4;
- return (pool_elt_at_index (vxm->tunnels, cache->value));
- }
- }
- else
- {
- return (pool_elt_at_index (vxm->tunnels, cache->value));
- }
-
- /* No unicast match - try multicast */
- if (PREDICT_TRUE (!ip4_address_is_multicast (&ip4_0->dst_address)))
- return (NULL);
-
- key4.key[0] = ip4_0->dst_address.as_u32;
- /* Make sure mcast VXLAN_GBP tunnel exist by packet DIP and VNI */
- rv = clib_bihash_search_inline_16_8 (&vxm->vxlan4_gbp_tunnel_by_key, &key4);
-
- if (PREDICT_FALSE (rv != 0))
- return (NULL);
-
- return (pool_elt_at_index (vxm->tunnels, key4.value));
-}
-
-typedef vxlan6_gbp_tunnel_key_t last_tunnel_cache6;
-
-always_inline vxlan_gbp_tunnel_t *
-vxlan6_gbp_find_tunnel (vxlan_gbp_main_t * vxm, last_tunnel_cache6 * cache,
- u32 fib_index, ip6_header_t * ip6_0,
- vxlan_gbp_header_t * vxlan_gbp0)
-{
- /* Make sure VXLAN_GBP tunnel exist according to packet SIP and VNI */
- vxlan6_gbp_tunnel_key_t key6 = {
- .key = {
- [0] = ip6_0->src_address.as_u64[0],
- [1] = ip6_0->src_address.as_u64[1],
- [2] = ((((u64) fib_index) << 32) |
- (vxlan_gbp0->vni_reserved &
- clib_host_to_net_u32 (0xffffff00))),
- }
- };
- int rv;
-
- if (PREDICT_FALSE
- (clib_bihash_key_compare_24_8 (key6.key, cache->key) == 0))
- {
- rv = clib_bihash_search_inline_24_8 (&vxm->vxlan6_gbp_tunnel_by_key,
- &key6);
- if (PREDICT_FALSE (rv != 0))
- return NULL;
-
- *cache = key6;
- }
- vxlan_gbp_tunnel_t *t0 = pool_elt_at_index (vxm->tunnels, cache->value);
-
- /* Validate VXLAN_GBP tunnel SIP against packet DIP */
- if (PREDICT_FALSE
- (!ip6_address_is_equal (&ip6_0->dst_address, &t0->src.ip6)))
- {
- /* try multicast */
- if (PREDICT_TRUE (!ip6_address_is_multicast (&ip6_0->dst_address)))
- return 0;
-
- /* Make sure mcast VXLAN_GBP tunnel exist by packet DIP and VNI */
- key6.key[0] = ip6_0->dst_address.as_u64[0];
- key6.key[1] = ip6_0->dst_address.as_u64[1];
- rv = clib_bihash_search_inline_24_8 (&vxm->vxlan6_gbp_tunnel_by_key,
- &key6);
- if (PREDICT_FALSE (rv != 0))
- return 0;
-
- }
-
- return t0;
-}
-
-always_inline vxlan_gbp_input_next_t
-vxlan_gbp_tunnel_get_next (const vxlan_gbp_tunnel_t * t, vlib_buffer_t * b0)
-{
- if (VXLAN_GBP_TUNNEL_MODE_L2 == t->mode)
- return (VXLAN_GBP_INPUT_NEXT_L2_INPUT);
- else
- {
- ethernet_header_t *e0;
- u16 type0;
-
- e0 = vlib_buffer_get_current (b0);
- vlib_buffer_advance (b0, sizeof (*e0));
- type0 = clib_net_to_host_u16 (e0->type);
- switch (type0)
- {
- case ETHERNET_TYPE_IP4:
- return (VXLAN_GBP_INPUT_NEXT_IP4_INPUT);
- case ETHERNET_TYPE_IP6:
- return (VXLAN_GBP_INPUT_NEXT_IP6_INPUT);
- }
- }
- return (VXLAN_GBP_INPUT_NEXT_DROP);
-}
-
-always_inline uword
-vxlan_gbp_input (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame, u8 is_ip4)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- vnet_main_t *vnm = vxm->vnet_main;
- vnet_interface_main_t *im = &vnm->interface_main;
- vlib_combined_counter_main_t *rx_counter =
- im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX;
- vlib_combined_counter_main_t *drop_counter =
- im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_DROP;
- last_tunnel_cache4 last4;
- last_tunnel_cache6 last6;
- u32 pkts_decapsulated = 0;
- u32 thread_index = vlib_get_thread_index ();
-
- if (is_ip4)
- clib_memset (&last4, 0xff, sizeof last4);
- else
- clib_memset (&last6, 0xff, sizeof last6);
-
- u32 next_index = node->cached_next_index;
-
- u32 *from = vlib_frame_vector_args (from_frame);
- u32 n_left_from = from_frame->n_vectors;
-
- while (n_left_from > 0)
- {
- u32 *to_next, n_left_to_next;
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- /* Prefetch next iteration. */
- {
- vlib_buffer_t *p2, *p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
-
- CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- }
-
- u32 bi0 = to_next[0] = from[0];
- u32 bi1 = to_next[1] = from[1];
- from += 2;
- to_next += 2;
- n_left_to_next -= 2;
- n_left_from -= 2;
-
- vlib_buffer_t *b0, *b1;
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
-
- /* udp leaves current_data pointing at the vxlan_gbp header */
- void *cur0 = vlib_buffer_get_current (b0);
- void *cur1 = vlib_buffer_get_current (b1);
- vxlan_gbp_header_t *vxlan_gbp0 = cur0;
- vxlan_gbp_header_t *vxlan_gbp1 = cur1;
-
- ip4_header_t *ip4_0, *ip4_1;
- ip6_header_t *ip6_0, *ip6_1;
- if (is_ip4)
- {
- ip4_0 = cur0 - sizeof (udp_header_t) - sizeof (ip4_header_t);
- ip4_1 = cur1 - sizeof (udp_header_t) - sizeof (ip4_header_t);
- }
- else
- {
- ip6_0 = cur0 - sizeof (udp_header_t) - sizeof (ip6_header_t);
- ip6_1 = cur1 - sizeof (udp_header_t) - sizeof (ip6_header_t);
- }
-
- u32 fi0 = buf_fib_index (b0, is_ip4);
- u32 fi1 = buf_fib_index (b1, is_ip4);
-
- vxlan_gbp_tunnel_t *t0, *t1;
- if (is_ip4)
- {
- t0 =
- vxlan4_gbp_find_tunnel (vxm, &last4, fi0, ip4_0, vxlan_gbp0);
- t1 =
- vxlan4_gbp_find_tunnel (vxm, &last4, fi1, ip4_1, vxlan_gbp1);
- }
- else
- {
- t0 =
- vxlan6_gbp_find_tunnel (vxm, &last6, fi0, ip6_0, vxlan_gbp0);
- t1 =
- vxlan6_gbp_find_tunnel (vxm, &last6, fi1, ip6_1, vxlan_gbp1);
- }
-
- u32 len0 = vlib_buffer_length_in_chain (vm, b0);
- u32 len1 = vlib_buffer_length_in_chain (vm, b1);
-
- vxlan_gbp_input_next_t next0, next1;
- u8 error0 = 0, error1 = 0;
- u8 flags0 = vxlan_gbp_get_flags (vxlan_gbp0);
- u8 flags1 = vxlan_gbp_get_flags (vxlan_gbp1);
- /* Required to make the l2 tag push / pop code work on l2 subifs */
- /* pop vxlan_gbp */
- vlib_buffer_advance (b0, sizeof *vxlan_gbp0);
- vlib_buffer_advance (b1, sizeof *vxlan_gbp1);
-
- u8 i_and_g0 = ((flags0 & VXLAN_GBP_FLAGS_GI) == VXLAN_GBP_FLAGS_GI);
- u8 i_and_g1 = ((flags1 & VXLAN_GBP_FLAGS_GI) == VXLAN_GBP_FLAGS_GI);
-
- /* Validate VXLAN_GBP tunnel encap-fib index against packet */
- if (PREDICT_FALSE (t0 == NULL || !i_and_g0))
- {
- if (t0 != NULL && !i_and_g0)
- {
- error0 = VXLAN_GBP_ERROR_BAD_FLAGS;
- vlib_increment_combined_counter
- (drop_counter, thread_index, t0->sw_if_index, 1, len0);
- next0 = VXLAN_GBP_INPUT_NEXT_DROP;
- }
- else
- {
- error0 = VXLAN_GBP_ERROR_NO_SUCH_TUNNEL;
- next0 = VXLAN_GBP_INPUT_NEXT_PUNT;
- if (is_ip4)
- b0->punt_reason =
- vxm->punt_no_such_tunnel[FIB_PROTOCOL_IP4];
- else
- b0->punt_reason =
- vxm->punt_no_such_tunnel[FIB_PROTOCOL_IP6];
- }
- b0->error = node->errors[error0];
- }
- else
- {
- next0 = vxlan_gbp_tunnel_get_next (t0, b0);
-
- /* Set packet input sw_if_index to unicast VXLAN tunnel for learning */
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = t0->sw_if_index;
- vlib_increment_combined_counter
- (rx_counter, thread_index, t0->sw_if_index, 1, len0);
- pkts_decapsulated++;
- }
-
- vnet_buffer2 (b0)->gbp.flags = (vxlan_gbp_get_gpflags (vxlan_gbp0) |
- VXLAN_GBP_GPFLAGS_R);
- vnet_buffer2 (b0)->gbp.sclass = vxlan_gbp_get_sclass (vxlan_gbp0);
-
-
- if (PREDICT_FALSE (t1 == NULL || !i_and_g1))
- {
- if (t1 != NULL && !i_and_g1)
- {
- error1 = VXLAN_GBP_ERROR_BAD_FLAGS;
- vlib_increment_combined_counter
- (drop_counter, thread_index, t1->sw_if_index, 1, len1);
- next1 = VXLAN_GBP_INPUT_NEXT_DROP;
- }
- else
- {
- error1 = VXLAN_GBP_ERROR_NO_SUCH_TUNNEL;
- next1 = VXLAN_GBP_INPUT_NEXT_PUNT;
- if (is_ip4)
- b1->punt_reason =
- vxm->punt_no_such_tunnel[FIB_PROTOCOL_IP4];
- else
- b1->punt_reason =
- vxm->punt_no_such_tunnel[FIB_PROTOCOL_IP6];
- }
- b1->error = node->errors[error1];
- }
- else
- {
- next1 = vxlan_gbp_tunnel_get_next (t1, b1);
-
- /* Set packet input sw_if_index to unicast VXLAN_GBP tunnel for learning */
- vnet_buffer (b1)->sw_if_index[VLIB_RX] = t1->sw_if_index;
- pkts_decapsulated++;
-
- vlib_increment_combined_counter
- (rx_counter, thread_index, t1->sw_if_index, 1, len1);
- }
-
- vnet_buffer2 (b1)->gbp.flags = (vxlan_gbp_get_gpflags (vxlan_gbp1) |
- VXLAN_GBP_GPFLAGS_R);
-
- vnet_buffer2 (b1)->gbp.sclass = vxlan_gbp_get_sclass (vxlan_gbp1);
-
- vnet_update_l2_len (b0);
- vnet_update_l2_len (b1);
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_gbp_rx_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->next_index = next0;
- tr->error = error0;
- tr->tunnel_index = t0 == 0 ? ~0 : t0 - vxm->tunnels;
- tr->vni = vxlan_gbp_get_vni (vxlan_gbp0);
- tr->sclass = vxlan_gbp_get_sclass (vxlan_gbp0);
- tr->flags = vxlan_gbp_get_gpflags (vxlan_gbp0);
- }
- if (PREDICT_FALSE (b1->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_gbp_rx_trace_t *tr =
- vlib_add_trace (vm, node, b1, sizeof (*tr));
- tr->next_index = next1;
- tr->error = error1;
- tr->tunnel_index = t1 == 0 ? ~0 : t1 - vxm->tunnels;
- tr->vni = vxlan_gbp_get_vni (vxlan_gbp1);
- tr->sclass = vxlan_gbp_get_sclass (vxlan_gbp1);
- tr->flags = vxlan_gbp_get_gpflags (vxlan_gbp1);
- }
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0 = to_next[0] = from[0];
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
-
- /* udp leaves current_data pointing at the vxlan_gbp header */
- void *cur0 = vlib_buffer_get_current (b0);
- vxlan_gbp_header_t *vxlan_gbp0 = cur0;
- ip4_header_t *ip4_0;
- ip6_header_t *ip6_0;
- if (is_ip4)
- ip4_0 = cur0 - sizeof (udp_header_t) - sizeof (ip4_header_t);
- else
- ip6_0 = cur0 - sizeof (udp_header_t) - sizeof (ip6_header_t);
-
- u32 fi0 = buf_fib_index (b0, is_ip4);
-
- vxlan_gbp_tunnel_t *t0;
- if (is_ip4)
- t0 = vxlan4_gbp_find_tunnel (vxm, &last4, fi0, ip4_0, vxlan_gbp0);
- else
- t0 = vxlan6_gbp_find_tunnel (vxm, &last6, fi0, ip6_0, vxlan_gbp0);
-
- uword len0 = vlib_buffer_length_in_chain (vm, b0);
-
- vxlan_gbp_input_next_t next0;
- u8 error0 = 0;
- u8 flags0 = vxlan_gbp_get_flags (vxlan_gbp0);
-
- /* pop (ip, udp, vxlan_gbp) */
- vlib_buffer_advance (b0, sizeof (*vxlan_gbp0));
-
- u8 i_and_g0 = ((flags0 & VXLAN_GBP_FLAGS_GI) == VXLAN_GBP_FLAGS_GI);
-
- /* Validate VXLAN_GBP tunnel encap-fib index against packet */
- if (PREDICT_FALSE (t0 == NULL || !i_and_g0))
- {
- if (t0 != NULL && !i_and_g0)
- {
- error0 = VXLAN_GBP_ERROR_BAD_FLAGS;
- vlib_increment_combined_counter
- (drop_counter, thread_index, t0->sw_if_index, 1, len0);
- next0 = VXLAN_GBP_INPUT_NEXT_DROP;
- }
- else
- {
- error0 = VXLAN_GBP_ERROR_NO_SUCH_TUNNEL;
- next0 = VXLAN_GBP_INPUT_NEXT_PUNT;
- if (is_ip4)
- b0->punt_reason =
- vxm->punt_no_such_tunnel[FIB_PROTOCOL_IP4];
- else
- b0->punt_reason =
- vxm->punt_no_such_tunnel[FIB_PROTOCOL_IP6];
- }
- b0->error = node->errors[error0];
- }
- else
- {
- next0 = vxlan_gbp_tunnel_get_next (t0, b0);
- /* Set packet input sw_if_index to unicast VXLAN_GBP tunnel for learning */
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = t0->sw_if_index;
- pkts_decapsulated++;
-
- vlib_increment_combined_counter
- (rx_counter, thread_index, t0->sw_if_index, 1, len0);
- }
- vnet_buffer2 (b0)->gbp.flags = (vxlan_gbp_get_gpflags (vxlan_gbp0) |
- VXLAN_GBP_GPFLAGS_R);
-
- vnet_buffer2 (b0)->gbp.sclass = vxlan_gbp_get_sclass (vxlan_gbp0);
-
- /* Required to make the l2 tag push / pop code work on l2 subifs */
- vnet_update_l2_len (b0);
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_gbp_rx_trace_t *tr
- = vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->next_index = next0;
- tr->error = error0;
- tr->tunnel_index = t0 == 0 ? ~0 : t0 - vxm->tunnels;
- tr->vni = vxlan_gbp_get_vni (vxlan_gbp0);
- tr->sclass = vxlan_gbp_get_sclass (vxlan_gbp0);
- tr->flags = vxlan_gbp_get_gpflags (vxlan_gbp0);
- }
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
- /* Do we still need this now that tunnel tx stats is kept? */
- u32 node_idx =
- is_ip4 ? vxlan4_gbp_input_node.index : vxlan6_gbp_input_node.index;
- vlib_node_increment_counter (vm, node_idx, VXLAN_GBP_ERROR_DECAPSULATED,
- pkts_decapsulated);
-
- return from_frame->n_vectors;
-}
-
-VLIB_NODE_FN (vxlan4_gbp_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return vxlan_gbp_input (vm, node, from_frame, /* is_ip4 */ 1);
-}
-
-VLIB_NODE_FN (vxlan6_gbp_input_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- return vxlan_gbp_input (vm, node, from_frame, /* is_ip4 */ 0);
-}
-
-static char *vxlan_gbp_error_strings[] = {
-#define vxlan_gbp_error(n,s) s,
-#include <vnet/vxlan-gbp/vxlan_gbp_error.def>
-#undef vxlan_gbp_error
-#undef _
-};
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (vxlan4_gbp_input_node) =
-{
- .name = "vxlan4-gbp-input",
- .vector_size = sizeof (u32),
- .n_errors = VXLAN_GBP_N_ERROR,
- .error_strings = vxlan_gbp_error_strings,
- .n_next_nodes = VXLAN_GBP_INPUT_N_NEXT,
- .format_trace = format_vxlan_gbp_rx_trace,
- .next_nodes = {
-#define _(s,n) [VXLAN_GBP_INPUT_NEXT_##s] = n,
- foreach_vxlan_gbp_input_next
-#undef _
- },
-};
-
-VLIB_REGISTER_NODE (vxlan6_gbp_input_node) =
-{
- .name = "vxlan6-gbp-input",
- .vector_size = sizeof (u32),
- .n_errors = VXLAN_GBP_N_ERROR,
- .error_strings = vxlan_gbp_error_strings,
- .n_next_nodes = VXLAN_GBP_INPUT_N_NEXT,
- .next_nodes = {
-#define _(s,n) [VXLAN_GBP_INPUT_NEXT_##s] = n,
- foreach_vxlan_gbp_input_next
-#undef _
- },
- .format_trace = format_vxlan_gbp_rx_trace,
-};
-/* *INDENT-ON* */
-
-typedef enum
-{
- IP_VXLAN_GBP_BYPASS_NEXT_DROP,
- IP_VXLAN_GBP_BYPASS_NEXT_VXLAN_GBP,
- IP_VXLAN_GBP_BYPASS_N_NEXT,
-} ip_vxlan_gbp_bypass_next_t;
-
-always_inline uword
-ip_vxlan_gbp_bypass_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame, u32 is_ip4)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- u32 *from, *to_next, n_left_from, n_left_to_next, next_index;
- vlib_node_runtime_t *error_node =
- vlib_node_get_runtime (vm, ip4_input_node.index);
- ip4_address_t addr4; /* last IPv4 address matching a local VTEP address */
- ip6_address_t addr6; /* last IPv6 address matching a local VTEP address */
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
-
- if (node->flags & VLIB_NODE_FLAG_TRACE)
- ip4_forward_next_trace (vm, node, frame, VLIB_TX);
-
- if (is_ip4)
- addr4.data_u32 = ~0;
- else
- ip6_address_set_zero (&addr6);
-
- while (n_left_from > 0)
- {
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- vlib_buffer_t *b0, *b1;
- ip4_header_t *ip40, *ip41;
- ip6_header_t *ip60, *ip61;
- udp_header_t *udp0, *udp1;
- u32 bi0, ip_len0, udp_len0, flags0, next0;
- u32 bi1, ip_len1, udp_len1, flags1, next1;
- i32 len_diff0, len_diff1;
- u8 error0, good_udp0, proto0;
- u8 error1, good_udp1, proto1;
-
- /* Prefetch next iteration. */
- {
- vlib_buffer_t *p2, *p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
-
- CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- }
-
- bi0 = to_next[0] = from[0];
- bi1 = to_next[1] = from[1];
- from += 2;
- n_left_from -= 2;
- to_next += 2;
- n_left_to_next -= 2;
-
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
- if (is_ip4)
- {
- ip40 = vlib_buffer_get_current (b0);
- ip41 = vlib_buffer_get_current (b1);
- }
- else
- {
- ip60 = vlib_buffer_get_current (b0);
- ip61 = vlib_buffer_get_current (b1);
- }
-
- /* Setup packet for next IP feature */
- vnet_feature_next (&next0, b0);
- vnet_feature_next (&next1, b1);
-
- if (is_ip4)
- {
- /* Treat IP frag packets as "experimental" protocol for now
- until support of IP frag reassembly is implemented */
- proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol;
- proto1 = ip4_is_fragment (ip41) ? 0xfe : ip41->protocol;
- }
- else
- {
- proto0 = ip60->protocol;
- proto1 = ip61->protocol;
- }
-
- /* Process packet 0 */
- if (proto0 != IP_PROTOCOL_UDP)
- goto exit0; /* not UDP packet */
-
- if (is_ip4)
- udp0 = ip4_next_header (ip40);
- else
- udp0 = ip6_next_header (ip60);
-
- if (udp0->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_vxlan_gbp))
- goto exit0; /* not VXLAN_GBP packet */
-
- /* Validate DIP against VTEPs */
- if (is_ip4)
- {
- if (addr4.as_u32 != ip40->dst_address.as_u32)
- {
- if (!hash_get (vxm->vtep4, ip40->dst_address.as_u32))
- goto exit0; /* no local VTEP for VXLAN_GBP packet */
- addr4 = ip40->dst_address;
- }
- }
- else
- {
- if (!ip6_address_is_equal (&addr6, &ip60->dst_address))
- {
- if (!hash_get_mem (vxm->vtep6, &ip60->dst_address))
- goto exit0; /* no local VTEP for VXLAN_GBP packet */
- addr6 = ip60->dst_address;
- }
- }
-
- flags0 = b0->flags;
- good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
-
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- good_udp0 |= udp0->checksum == 0;
-
- /* Verify UDP length */
- if (is_ip4)
- ip_len0 = clib_net_to_host_u16 (ip40->length);
- else
- ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
- udp_len0 = clib_net_to_host_u16 (udp0->length);
- len_diff0 = ip_len0 - udp_len0;
-
- /* Verify UDP checksum */
- if (PREDICT_FALSE (!good_udp0))
- {
- if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
- {
- if (is_ip4)
- flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
- else
- flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
- good_udp0 =
- (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
- }
- }
-
- if (is_ip4)
- {
- error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
- }
- else
- {
- error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
- }
-
- next0 = error0 ?
- IP_VXLAN_GBP_BYPASS_NEXT_DROP :
- IP_VXLAN_GBP_BYPASS_NEXT_VXLAN_GBP;
- b0->error = error0 ? error_node->errors[error0] : 0;
-
- /* vxlan-gbp-input node expect current at VXLAN_GBP header */
- if (is_ip4)
- vlib_buffer_advance (b0,
- sizeof (ip4_header_t) +
- sizeof (udp_header_t));
- else
- vlib_buffer_advance (b0,
- sizeof (ip6_header_t) +
- sizeof (udp_header_t));
-
- exit0:
- /* Process packet 1 */
- if (proto1 != IP_PROTOCOL_UDP)
- goto exit1; /* not UDP packet */
-
- if (is_ip4)
- udp1 = ip4_next_header (ip41);
- else
- udp1 = ip6_next_header (ip61);
-
- if (udp1->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_vxlan_gbp))
- goto exit1; /* not VXLAN_GBP packet */
-
- /* Validate DIP against VTEPs */
- if (is_ip4)
- {
- if (addr4.as_u32 != ip41->dst_address.as_u32)
- {
- if (!hash_get (vxm->vtep4, ip41->dst_address.as_u32))
- goto exit1; /* no local VTEP for VXLAN_GBP packet */
- addr4 = ip41->dst_address;
- }
- }
- else
- {
- if (!ip6_address_is_equal (&addr6, &ip61->dst_address))
- {
- if (!hash_get_mem (vxm->vtep6, &ip61->dst_address))
- goto exit1; /* no local VTEP for VXLAN_GBP packet */
- addr6 = ip61->dst_address;
- }
- }
-
- flags1 = b1->flags;
- good_udp1 = (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
-
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- good_udp1 |= udp1->checksum == 0;
-
- /* Verify UDP length */
- if (is_ip4)
- ip_len1 = clib_net_to_host_u16 (ip41->length);
- else
- ip_len1 = clib_net_to_host_u16 (ip61->payload_length);
- udp_len1 = clib_net_to_host_u16 (udp1->length);
- len_diff1 = ip_len1 - udp_len1;
-
- /* Verify UDP checksum */
- if (PREDICT_FALSE (!good_udp1))
- {
- if ((flags1 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
- {
- if (is_ip4)
- flags1 = ip4_tcp_udp_validate_checksum (vm, b1);
- else
- flags1 = ip6_tcp_udp_icmp_validate_checksum (vm, b1);
- good_udp1 =
- (flags1 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
- }
- }
-
- if (is_ip4)
- {
- error1 = good_udp1 ? 0 : IP4_ERROR_UDP_CHECKSUM;
- error1 = (len_diff1 >= 0) ? error1 : IP4_ERROR_UDP_LENGTH;
- }
- else
- {
- error1 = good_udp1 ? 0 : IP6_ERROR_UDP_CHECKSUM;
- error1 = (len_diff1 >= 0) ? error1 : IP6_ERROR_UDP_LENGTH;
- }
-
- next1 = error1 ?
- IP_VXLAN_GBP_BYPASS_NEXT_DROP :
- IP_VXLAN_GBP_BYPASS_NEXT_VXLAN_GBP;
- b1->error = error1 ? error_node->errors[error1] : 0;
-
- /* vxlan_gbp-input node expect current at VXLAN_GBP header */
- if (is_ip4)
- vlib_buffer_advance (b1,
- sizeof (ip4_header_t) +
- sizeof (udp_header_t));
- else
- vlib_buffer_advance (b1,
- sizeof (ip6_header_t) +
- sizeof (udp_header_t));
-
- exit1:
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- vlib_buffer_t *b0;
- ip4_header_t *ip40;
- ip6_header_t *ip60;
- udp_header_t *udp0;
- u32 bi0, ip_len0, udp_len0, flags0, next0;
- i32 len_diff0;
- u8 error0, good_udp0, proto0;
-
- bi0 = to_next[0] = from[0];
- from += 1;
- n_left_from -= 1;
- to_next += 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- if (is_ip4)
- ip40 = vlib_buffer_get_current (b0);
- else
- ip60 = vlib_buffer_get_current (b0);
-
- /* Setup packet for next IP feature */
- vnet_feature_next (&next0, b0);
-
- if (is_ip4)
- /* Treat IP4 frag packets as "experimental" protocol for now
- until support of IP frag reassembly is implemented */
- proto0 = ip4_is_fragment (ip40) ? 0xfe : ip40->protocol;
- else
- proto0 = ip60->protocol;
-
- if (proto0 != IP_PROTOCOL_UDP)
- goto exit; /* not UDP packet */
-
- if (is_ip4)
- udp0 = ip4_next_header (ip40);
- else
- udp0 = ip6_next_header (ip60);
-
- if (udp0->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_vxlan_gbp))
- goto exit; /* not VXLAN_GBP packet */
-
- /* Validate DIP against VTEPs */
- if (is_ip4)
- {
- if (addr4.as_u32 != ip40->dst_address.as_u32)
- {
- if (!hash_get (vxm->vtep4, ip40->dst_address.as_u32))
- goto exit; /* no local VTEP for VXLAN_GBP packet */
- addr4 = ip40->dst_address;
- }
- }
- else
- {
- if (!ip6_address_is_equal (&addr6, &ip60->dst_address))
- {
- if (!hash_get_mem (vxm->vtep6, &ip60->dst_address))
- goto exit; /* no local VTEP for VXLAN_GBP packet */
- addr6 = ip60->dst_address;
- }
- }
-
- flags0 = b0->flags;
- good_udp0 = (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
-
- /* Don't verify UDP checksum for packets with explicit zero checksum. */
- good_udp0 |= udp0->checksum == 0;
-
- /* Verify UDP length */
- if (is_ip4)
- ip_len0 = clib_net_to_host_u16 (ip40->length);
- else
- ip_len0 = clib_net_to_host_u16 (ip60->payload_length);
- udp_len0 = clib_net_to_host_u16 (udp0->length);
- len_diff0 = ip_len0 - udp_len0;
-
- /* Verify UDP checksum */
- if (PREDICT_FALSE (!good_udp0))
- {
- if ((flags0 & VNET_BUFFER_F_L4_CHECKSUM_COMPUTED) == 0)
- {
- if (is_ip4)
- flags0 = ip4_tcp_udp_validate_checksum (vm, b0);
- else
- flags0 = ip6_tcp_udp_icmp_validate_checksum (vm, b0);
- good_udp0 =
- (flags0 & VNET_BUFFER_F_L4_CHECKSUM_CORRECT) != 0;
- }
- }
-
- if (is_ip4)
- {
- error0 = good_udp0 ? 0 : IP4_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP4_ERROR_UDP_LENGTH;
- }
- else
- {
- error0 = good_udp0 ? 0 : IP6_ERROR_UDP_CHECKSUM;
- error0 = (len_diff0 >= 0) ? error0 : IP6_ERROR_UDP_LENGTH;
- }
-
- next0 = error0 ?
- IP_VXLAN_GBP_BYPASS_NEXT_DROP :
- IP_VXLAN_GBP_BYPASS_NEXT_VXLAN_GBP;
- b0->error = error0 ? error_node->errors[error0] : 0;
-
- /* vxlan_gbp-input node expect current at VXLAN_GBP header */
- if (is_ip4)
- vlib_buffer_advance (b0,
- sizeof (ip4_header_t) +
- sizeof (udp_header_t));
- else
- vlib_buffer_advance (b0,
- sizeof (ip6_header_t) +
- sizeof (udp_header_t));
-
- exit:
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- return frame->n_vectors;
-}
-
-VLIB_NODE_FN (ip4_vxlan_gbp_bypass_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return ip_vxlan_gbp_bypass_inline (vm, node, frame, /* is_ip4 */ 1);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip4_vxlan_gbp_bypass_node) =
-{
- .name = "ip4-vxlan-gbp-bypass",
- .vector_size = sizeof (u32),
- .n_next_nodes = IP_VXLAN_GBP_BYPASS_N_NEXT,
- .next_nodes = {
- [IP_VXLAN_GBP_BYPASS_NEXT_DROP] = "error-drop",
- [IP_VXLAN_GBP_BYPASS_NEXT_VXLAN_GBP] = "vxlan4-gbp-input",
- },
- .format_buffer = format_ip4_header,
- .format_trace = format_ip4_forward_next_trace,
-};
-/* *INDENT-ON* */
-
-#ifndef CLIB_MARCH_VARIANT
-/* Dummy init function to get us linked in. */
-clib_error_t *
-ip4_vxlan_gbp_bypass_init (vlib_main_t * vm)
-{
- return 0;
-}
-
-VLIB_INIT_FUNCTION (ip4_vxlan_gbp_bypass_init);
-#endif /* CLIB_MARCH_VARIANT */
-
-VLIB_NODE_FN (ip6_vxlan_gbp_bypass_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- return ip_vxlan_gbp_bypass_inline (vm, node, frame, /* is_ip4 */ 0);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (ip6_vxlan_gbp_bypass_node) =
-{
- .name = "ip6-vxlan-gbp-bypass",
- .vector_size = sizeof (u32),
- .n_next_nodes = IP_VXLAN_GBP_BYPASS_N_NEXT,
- .next_nodes = {
- [IP_VXLAN_GBP_BYPASS_NEXT_DROP] = "error-drop",
- [IP_VXLAN_GBP_BYPASS_NEXT_VXLAN_GBP] = "vxlan6-gbp-input",
- },
- .format_buffer = format_ip6_header,
- .format_trace = format_ip6_forward_next_trace,
-};
-/* *INDENT-ON* */
-
-#ifndef CLIB_MARCH_VARIANT
-/* Dummy init function to get us linked in. */
-clib_error_t *
-ip6_vxlan_gbp_bypass_init (vlib_main_t * vm)
-{
- return 0;
-}
-
-VLIB_INIT_FUNCTION (ip6_vxlan_gbp_bypass_init);
-#endif /* CLIB_MARCH_VARIANT */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gbp/encap.c b/src/vnet/vxlan-gbp/encap.c
deleted file mode 100644
index 2a4e8a8e312..00000000000
--- a/src/vnet/vxlan-gbp/encap.c
+++ /dev/null
@@ -1,601 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vppinfra/error.h>
-#include <vppinfra/hash.h>
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/interface_output.h>
-#include <vnet/vxlan-gbp/vxlan_gbp.h>
-#include <vnet/qos/qos_types.h>
-#include <vnet/adj/rewrite.h>
-
-/* Statistics (not all errors) */
-#define foreach_vxlan_gbp_encap_error \
-_(ENCAPSULATED, "good packets encapsulated")
-
-static char *vxlan_gbp_encap_error_strings[] = {
-#define _(sym,string) string,
- foreach_vxlan_gbp_encap_error
-#undef _
-};
-
-typedef enum
-{
-#define _(sym,str) VXLAN_GBP_ENCAP_ERROR_##sym,
- foreach_vxlan_gbp_encap_error
-#undef _
- VXLAN_GBP_ENCAP_N_ERROR,
-} vxlan_gbp_encap_error_t;
-
-typedef enum
-{
- VXLAN_GBP_ENCAP_NEXT_DROP,
- VXLAN_GBP_ENCAP_N_NEXT,
-} vxlan_gbp_encap_next_t;
-
-typedef struct
-{
- u32 tunnel_index;
- u32 vni;
- u16 sclass;
- u8 flags;
-} vxlan_gbp_encap_trace_t;
-
-#ifndef CLIB_MARCH_VARIANT
-u8 *
-format_vxlan_gbp_encap_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- vxlan_gbp_encap_trace_t *t = va_arg (*args, vxlan_gbp_encap_trace_t *);
-
- s =
- format (s,
- "VXLAN_GBP encap to vxlan_gbp_tunnel%d vni %d sclass %d flags %U",
- t->tunnel_index, t->vni, t->sclass,
- format_vxlan_gbp_header_gpflags, t->flags);
- return s;
-}
-#endif /* CLIB_MARCH_VARIANT */
-
-always_inline uword
-vxlan_gbp_encap_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame, u8 is_ip4, u8 csum_offload)
-{
- u32 n_left_from, next_index, *from, *to_next;
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- vnet_main_t *vnm = vxm->vnet_main;
- vnet_interface_main_t *im = &vnm->interface_main;
- vlib_combined_counter_main_t *tx_counter =
- im->combined_sw_if_counters + VNET_INTERFACE_COUNTER_TX;
- u32 pkts_encapsulated = 0;
- u32 thread_index = vlib_get_thread_index ();
- u32 sw_if_index0 = 0, sw_if_index1 = 0;
- u32 next0 = 0, next1 = 0;
- vxlan_gbp_tunnel_t *t0 = NULL, *t1 = NULL;
- index_t dpoi_idx0 = INDEX_INVALID, dpoi_idx1 = INDEX_INVALID;
- vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
-
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
- vlib_get_buffers (vm, from, bufs, n_left_from);
-
- next_index = node->cached_next_index;
-
- STATIC_ASSERT_SIZEOF (ip6_vxlan_gbp_header_t, 56);
- STATIC_ASSERT_SIZEOF (ip4_vxlan_gbp_header_t, 36);
-
- u8 const underlay_hdr_len = is_ip4 ?
- sizeof (ip4_vxlan_gbp_header_t) : sizeof (ip6_vxlan_gbp_header_t);
- u16 const l3_len = is_ip4 ? sizeof (ip4_header_t) : sizeof (ip6_header_t);
- u32 const csum_flags =
- is_ip4 ? VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L4_HDR_OFFSET_VALID :
- VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
- u32 const outer_packet_csum_offload_flags =
- is_ip4 ? VNET_BUFFER_OFFLOAD_F_IP_CKSUM | VNET_BUFFER_OFFLOAD_F_UDP_CKSUM :
- VNET_BUFFER_OFFLOAD_F_UDP_CKSUM;
- u32 const inner_packet_removed_flags =
- VNET_BUFFER_F_IS_IP4 | VNET_BUFFER_F_IS_IP6 |
- VNET_BUFFER_F_L2_HDR_OFFSET_VALID | VNET_BUFFER_F_L3_HDR_OFFSET_VALID |
- VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from >= 4 && n_left_to_next >= 2)
- {
- /* Prefetch next iteration. */
- {
- vlib_buffer_t *p2, *p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
-
- CLIB_PREFETCH (b[2]->data - CLIB_CACHE_LINE_BYTES,
- 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (b[3]->data - CLIB_CACHE_LINE_BYTES,
- 2 * CLIB_CACHE_LINE_BYTES, LOAD);
- }
-
- u32 bi0 = to_next[0] = from[0];
- u32 bi1 = to_next[1] = from[1];
- from += 2;
- to_next += 2;
- n_left_to_next -= 2;
- n_left_from -= 2;
-
- u32 or_flags = b[0]->flags | b[1]->flags;
- if (csum_offload && (or_flags & VNET_BUFFER_F_OFFLOAD))
- {
- /* Only calculate the non-GSO packet csum offload */
- if ((b[0]->flags & VNET_BUFFER_F_GSO) == 0)
- {
- vnet_calc_checksums_inline (vm, b[0],
- b[0]->flags &
- VNET_BUFFER_F_IS_IP4,
- b[0]->flags &
- VNET_BUFFER_F_IS_IP6);
- b[0]->flags &= ~inner_packet_removed_flags;
- }
- if ((b[1]->flags & VNET_BUFFER_F_GSO) == 0)
- {
- vnet_calc_checksums_inline (vm, b[1],
- b[1]->flags &
- VNET_BUFFER_F_IS_IP4,
- b[1]->flags &
- VNET_BUFFER_F_IS_IP6);
- b[1]->flags &= ~inner_packet_removed_flags;
- }
- }
-
- u32 flow_hash0 = vnet_l2_compute_flow_hash (b[0]);
- u32 flow_hash1 = vnet_l2_compute_flow_hash (b[1]);
-
- /* Get next node index and adj index from tunnel next_dpo */
- if (sw_if_index0 != vnet_buffer (b[0])->sw_if_index[VLIB_TX])
- {
- sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
- vnet_hw_interface_t *hi0 =
- vnet_get_sup_hw_interface (vnm, sw_if_index0);
- t0 = &vxm->tunnels[hi0->dev_instance];
- /* Note: change to always set next0 if it may set to drop */
- next0 = t0->next_dpo.dpoi_next_node;
- dpoi_idx0 = t0->next_dpo.dpoi_index;
- }
-
- /* Get next node index and adj index from tunnel next_dpo */
- if (sw_if_index1 != vnet_buffer (b[1])->sw_if_index[VLIB_TX])
- {
- if (sw_if_index0 == vnet_buffer (b[1])->sw_if_index[VLIB_TX])
- {
- sw_if_index1 = sw_if_index0;
- t1 = t0;
- next1 = next0;
- dpoi_idx1 = dpoi_idx0;
- }
- else
- {
- sw_if_index1 = vnet_buffer (b[1])->sw_if_index[VLIB_TX];
- vnet_hw_interface_t *hi1 =
- vnet_get_sup_hw_interface (vnm, sw_if_index1);
- t1 = &vxm->tunnels[hi1->dev_instance];
- /* Note: change to always set next1 if it may set to drop */
- next1 = t1->next_dpo.dpoi_next_node;
- dpoi_idx1 = t1->next_dpo.dpoi_index;
- }
- }
-
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpoi_idx0;
- vnet_buffer (b[1])->ip.adj_index[VLIB_TX] = dpoi_idx1;
-
- ASSERT (t0->rewrite_header.data_bytes == underlay_hdr_len);
- ASSERT (t1->rewrite_header.data_bytes == underlay_hdr_len);
- vnet_rewrite_two_headers (*t0, *t1, vlib_buffer_get_current (b[0]),
- vlib_buffer_get_current (b[1]),
- underlay_hdr_len);
-
- vlib_buffer_advance (b[0], -underlay_hdr_len);
- vlib_buffer_advance (b[1], -underlay_hdr_len);
-
- u32 len0 = vlib_buffer_length_in_chain (vm, b[0]);
- u32 len1 = vlib_buffer_length_in_chain (vm, b[1]);
- u16 payload_l0 = clib_host_to_net_u16 (len0 - l3_len);
- u16 payload_l1 = clib_host_to_net_u16 (len1 - l3_len);
-
- void *underlay0 = vlib_buffer_get_current (b[0]);
- void *underlay1 = vlib_buffer_get_current (b[1]);
-
- ip4_header_t *ip4_0, *ip4_1;
- qos_bits_t ip4_0_tos = 0, ip4_1_tos = 0;
- ip6_header_t *ip6_0, *ip6_1;
- udp_header_t *udp0, *udp1;
- vxlan_gbp_header_t *vxlan_gbp0, *vxlan_gbp1;
- u8 *l3_0, *l3_1;
- if (is_ip4)
- {
- ip4_vxlan_gbp_header_t *hdr0 = underlay0;
- ip4_vxlan_gbp_header_t *hdr1 = underlay1;
-
- /* Fix the IP4 checksum and length */
- ip4_0 = &hdr0->ip4;
- ip4_1 = &hdr1->ip4;
- ip4_0->length = clib_host_to_net_u16 (len0);
- ip4_1->length = clib_host_to_net_u16 (len1);
-
- if (PREDICT_FALSE (b[0]->flags & VNET_BUFFER_F_QOS_DATA_VALID))
- {
- ip4_0_tos = vnet_buffer2 (b[0])->qos.bits;
- ip4_0->tos = ip4_0_tos;
- }
- if (PREDICT_FALSE (b[1]->flags & VNET_BUFFER_F_QOS_DATA_VALID))
- {
- ip4_1_tos = vnet_buffer2 (b[1])->qos.bits;
- ip4_1->tos = ip4_1_tos;
- }
-
- l3_0 = (u8 *) ip4_0;
- l3_1 = (u8 *) ip4_1;
- udp0 = &hdr0->udp;
- udp1 = &hdr1->udp;
- vxlan_gbp0 = &hdr0->vxlan_gbp;
- vxlan_gbp1 = &hdr1->vxlan_gbp;
- }
- else /* ipv6 */
- {
- ip6_vxlan_gbp_header_t *hdr0 = underlay0;
- ip6_vxlan_gbp_header_t *hdr1 = underlay1;
-
- /* Fix IP6 payload length */
- ip6_0 = &hdr0->ip6;
- ip6_1 = &hdr1->ip6;
- ip6_0->payload_length = payload_l0;
- ip6_1->payload_length = payload_l1;
-
- l3_0 = (u8 *) ip6_0;
- l3_1 = (u8 *) ip6_1;
- udp0 = &hdr0->udp;
- udp1 = &hdr1->udp;
- vxlan_gbp0 = &hdr0->vxlan_gbp;
- vxlan_gbp1 = &hdr1->vxlan_gbp;
- }
-
- /* Fix UDP length and set source port */
- udp0->length = payload_l0;
- udp0->src_port = flow_hash0;
- udp1->length = payload_l1;
- udp1->src_port = flow_hash1;
-
- /* set source class and gpflags */
- vxlan_gbp0->gpflags = vnet_buffer2 (b[0])->gbp.flags;
- vxlan_gbp1->gpflags = vnet_buffer2 (b[1])->gbp.flags;
- vxlan_gbp0->sclass =
- clib_host_to_net_u16 (vnet_buffer2 (b[0])->gbp.sclass);
- vxlan_gbp1->sclass =
- clib_host_to_net_u16 (vnet_buffer2 (b[1])->gbp.sclass);
-
- if (csum_offload)
- {
- b[0]->flags |= csum_flags;
- vnet_buffer (b[0])->l3_hdr_offset = l3_0 - b[0]->data;
- vnet_buffer (b[0])->l4_hdr_offset = (u8 *) udp0 - b[0]->data;
- vnet_buffer_offload_flags_set (b[0],
- outer_packet_csum_offload_flags);
- b[1]->flags |= csum_flags;
- vnet_buffer (b[1])->l3_hdr_offset = l3_1 - b[1]->data;
- vnet_buffer (b[1])->l4_hdr_offset = (u8 *) udp1 - b[1]->data;
- vnet_buffer_offload_flags_set (b[1],
- outer_packet_csum_offload_flags);
- }
- /* IPv4 UDP checksum only if checksum offload is used */
- else if (is_ip4)
- {
- ip_csum_t sum0 = ip4_0->checksum;
- sum0 = ip_csum_update (sum0, 0, ip4_0->length, ip4_header_t,
- length /* changed member */ );
- if (PREDICT_FALSE (ip4_0_tos))
- {
- sum0 = ip_csum_update (sum0, 0, ip4_0_tos, ip4_header_t,
- tos /* changed member */ );
- }
- ip4_0->checksum = ip_csum_fold (sum0);
- ip_csum_t sum1 = ip4_1->checksum;
- sum1 = ip_csum_update (sum1, 0, ip4_1->length, ip4_header_t,
- length /* changed member */ );
- if (PREDICT_FALSE (ip4_1_tos))
- {
- sum1 = ip_csum_update (sum1, 0, ip4_1_tos, ip4_header_t,
- tos /* changed member */ );
- }
- ip4_1->checksum = ip_csum_fold (sum1);
- }
- /* IPv6 UDP checksum is mandatory */
- else
- {
- int bogus = 0;
-
- udp0->checksum = ip6_tcp_udp_icmp_compute_checksum
- (vm, b[0], ip6_0, &bogus);
- ASSERT (bogus == 0);
- if (udp0->checksum == 0)
- udp0->checksum = 0xffff;
- udp1->checksum = ip6_tcp_udp_icmp_compute_checksum
- (vm, b[1], ip6_1, &bogus);
- ASSERT (bogus == 0);
- if (udp1->checksum == 0)
- udp1->checksum = 0xffff;
- }
-
- /* save inner packet flow_hash for load-balance node */
- vnet_buffer (b[0])->ip.flow_hash = flow_hash0;
- vnet_buffer (b[1])->ip.flow_hash = flow_hash1;
-
- vlib_increment_combined_counter (tx_counter, thread_index,
- sw_if_index0, 1, len0);
- vlib_increment_combined_counter (tx_counter, thread_index,
- sw_if_index1, 1, len1);
- pkts_encapsulated += 2;
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_gbp_encap_trace_t *tr =
- vlib_add_trace (vm, node, b[0], sizeof (*tr));
- tr->tunnel_index = t0 - vxm->tunnels;
- tr->vni = t0->vni;
- tr->sclass = vnet_buffer2 (b[0])->gbp.sclass;
- tr->flags = vnet_buffer2 (b[0])->gbp.flags;
- }
-
- if (PREDICT_FALSE (b[1]->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_gbp_encap_trace_t *tr =
- vlib_add_trace (vm, node, b[1], sizeof (*tr));
- tr->tunnel_index = t1 - vxm->tunnels;
- tr->vni = t1->vni;
- tr->sclass = vnet_buffer2 (b[1])->gbp.sclass;
- tr->flags = vnet_buffer2 (b[1])->gbp.flags;
- }
- b += 2;
-
- vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, next0, next1);
- }
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0 = to_next[0] = from[0];
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- if (csum_offload && (b[0]->flags & VNET_BUFFER_F_OFFLOAD))
- {
- /* Only calculate the non-GSO packet csum offload */
- if ((b[0]->flags & VNET_BUFFER_F_GSO) == 0)
- {
- vnet_calc_checksums_inline (vm, b[0],
- b[0]->flags &
- VNET_BUFFER_F_IS_IP4,
- b[0]->flags &
- VNET_BUFFER_F_IS_IP6);
- b[0]->flags &= ~inner_packet_removed_flags;
- }
- }
-
- u32 flow_hash0 = vnet_l2_compute_flow_hash (b[0]);
-
- /* Get next node index and adj index from tunnel next_dpo */
- if (sw_if_index0 != vnet_buffer (b[0])->sw_if_index[VLIB_TX])
- {
- sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
- vnet_hw_interface_t *hi0 =
- vnet_get_sup_hw_interface (vnm, sw_if_index0);
- t0 = &vxm->tunnels[hi0->dev_instance];
- /* Note: change to always set next0 if it may be set to drop */
- next0 = t0->next_dpo.dpoi_next_node;
- dpoi_idx0 = t0->next_dpo.dpoi_index;
- }
- vnet_buffer (b[0])->ip.adj_index[VLIB_TX] = dpoi_idx0;
-
- ASSERT (t0->rewrite_header.data_bytes == underlay_hdr_len);
- vnet_rewrite_one_header (*t0, vlib_buffer_get_current (b[0]),
- underlay_hdr_len);
-
- vlib_buffer_advance (b[0], -underlay_hdr_len);
- void *underlay0 = vlib_buffer_get_current (b[0]);
-
- u32 len0 = vlib_buffer_length_in_chain (vm, b[0]);
- u16 payload_l0 = clib_host_to_net_u16 (len0 - l3_len);
-
- vxlan_gbp_header_t *vxlan_gbp0;
- udp_header_t *udp0;
- ip4_header_t *ip4_0;
- qos_bits_t ip4_0_tos = 0;
- ip6_header_t *ip6_0;
- u8 *l3_0;
- if (is_ip4)
- {
- ip4_vxlan_gbp_header_t *hdr = underlay0;
-
- /* Fix the IP4 checksum and length */
- ip4_0 = &hdr->ip4;
- ip4_0->length = clib_host_to_net_u16 (len0);
-
- if (PREDICT_FALSE (b[0]->flags & VNET_BUFFER_F_QOS_DATA_VALID))
- {
- ip4_0_tos = vnet_buffer2 (b[0])->qos.bits;
- ip4_0->tos = ip4_0_tos;
- }
-
- l3_0 = (u8 *) ip4_0;
- udp0 = &hdr->udp;
- vxlan_gbp0 = &hdr->vxlan_gbp;
- }
- else /* ip6 path */
- {
- ip6_vxlan_gbp_header_t *hdr = underlay0;
-
- /* Fix IP6 payload length */
- ip6_0 = &hdr->ip6;
- ip6_0->payload_length = payload_l0;
-
- l3_0 = (u8 *) ip6_0;
- udp0 = &hdr->udp;
- vxlan_gbp0 = &hdr->vxlan_gbp;
- }
-
- /* Fix UDP length and set source port */
- udp0->length = payload_l0;
- udp0->src_port = flow_hash0;
-
- /* set source class and gpflags */
- vxlan_gbp0->gpflags = vnet_buffer2 (b[0])->gbp.flags;
- vxlan_gbp0->sclass =
- clib_host_to_net_u16 (vnet_buffer2 (b[0])->gbp.sclass);
-
- if (csum_offload)
- {
- b[0]->flags |= csum_flags;
- vnet_buffer (b[0])->l3_hdr_offset = l3_0 - b[0]->data;
- vnet_buffer (b[0])->l4_hdr_offset = (u8 *) udp0 - b[0]->data;
- vnet_buffer_offload_flags_set (b[0],
- outer_packet_csum_offload_flags);
- }
- /* IPv4 UDP checksum only if checksum offload is used */
- else if (is_ip4)
- {
- ip_csum_t sum0 = ip4_0->checksum;
- sum0 = ip_csum_update (sum0, 0, ip4_0->length, ip4_header_t,
- length /* changed member */ );
- if (PREDICT_FALSE (ip4_0_tos))
- {
- sum0 = ip_csum_update (sum0, 0, ip4_0_tos, ip4_header_t,
- tos /* changed member */ );
- }
- ip4_0->checksum = ip_csum_fold (sum0);
- }
- /* IPv6 UDP checksum is mandatory */
- else
- {
- int bogus = 0;
-
- udp0->checksum = ip6_tcp_udp_icmp_compute_checksum
- (vm, b[0], ip6_0, &bogus);
- ASSERT (bogus == 0);
- if (udp0->checksum == 0)
- udp0->checksum = 0xffff;
- }
-
- /* save inner packet flow_hash for load-balance node */
- vnet_buffer (b[0])->ip.flow_hash = flow_hash0;
-
- vlib_increment_combined_counter (tx_counter, thread_index,
- sw_if_index0, 1, len0);
- pkts_encapsulated++;
-
- if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
- {
- vxlan_gbp_encap_trace_t *tr =
- vlib_add_trace (vm, node, b[0], sizeof (*tr));
- tr->tunnel_index = t0 - vxm->tunnels;
- tr->vni = t0->vni;
- tr->sclass = vnet_buffer2 (b[0])->gbp.sclass;
- tr->flags = vnet_buffer2 (b[0])->gbp.flags;
- }
- b += 1;
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
-
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- /* Do we still need this now that tunnel tx stats is kept? */
- vlib_node_increment_counter (vm, node->node_index,
- VXLAN_GBP_ENCAP_ERROR_ENCAPSULATED,
- pkts_encapsulated);
-
- return from_frame->n_vectors;
-}
-
-VLIB_NODE_FN (vxlan4_gbp_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- /* Disable chksum offload as setup overhead in tx node is not worthwhile
- for ip4 header checksum only, unless udp checksum is also required */
- return vxlan_gbp_encap_inline (vm, node, from_frame, /* is_ip4 */ 1,
- /* csum_offload */ 0);
-}
-
-VLIB_NODE_FN (vxlan6_gbp_encap_node) (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- /* Enable checksum offload for ip6 as udp checksum is mandatory, */
- return vxlan_gbp_encap_inline (vm, node, from_frame, /* is_ip4 */ 0,
- /* csum_offload */ 1);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (vxlan4_gbp_encap_node) =
-{
- .name = "vxlan4-gbp-encap",
- .vector_size = sizeof (u32),
- .format_trace = format_vxlan_gbp_encap_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN (vxlan_gbp_encap_error_strings),
- .error_strings = vxlan_gbp_encap_error_strings,
- .n_next_nodes = VXLAN_GBP_ENCAP_N_NEXT,
- .next_nodes = {
- [VXLAN_GBP_ENCAP_NEXT_DROP] = "error-drop",
- },
-};
-
-VLIB_REGISTER_NODE (vxlan6_gbp_encap_node) =
-{
- .name = "vxlan6-gbp-encap",
- .vector_size = sizeof (u32),
- .format_trace = format_vxlan_gbp_encap_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
- .n_errors = ARRAY_LEN (vxlan_gbp_encap_error_strings),
- .error_strings = vxlan_gbp_encap_error_strings,
- .n_next_nodes = VXLAN_GBP_ENCAP_N_NEXT,
- .next_nodes = {
- [VXLAN_GBP_ENCAP_NEXT_DROP] = "error-drop",
- },
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gbp/vxlan_gbp.api b/src/vnet/vxlan-gbp/vxlan_gbp.api
deleted file mode 100644
index 68566697000..00000000000
--- a/src/vnet/vxlan-gbp/vxlan_gbp.api
+++ /dev/null
@@ -1,100 +0,0 @@
-/* Hey Emacs use -*- mode: C -*- */
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-option version = "1.1.1";
-import "vnet/ip/ip_types.api";
-import "vnet/interface_types.api";
-
-enum vxlan_gbp_api_tunnel_mode
-{
- VXLAN_GBP_API_TUNNEL_MODE_L2,
- VXLAN_GBP_API_TUNNEL_MODE_L3,
-};
-
-/** \brief Definition of a VXLAN GBP tunnel
- @param instance - optional unique custom device instance, else ~0.
- @param src - Source IP address
- @param dst - Destination IP address, can be multicast
- @param mcast_sw_if_index - Interface for multicast destination
- @param encap_table_id - Encap route table
- @param vni - The VXLAN Network Identifier, uint24
- @param sw_ifindex - Ignored in add message, set in details
-*/
-typedef vxlan_gbp_tunnel
-{
- u32 instance;
- vl_api_address_t src;
- vl_api_address_t dst;
- vl_api_interface_index_t mcast_sw_if_index;
- u32 encap_table_id;
- u32 vni;
- vl_api_interface_index_t sw_if_index;
- vl_api_vxlan_gbp_api_tunnel_mode_t mode;
-};
-
-/** \brief Create or delete a VXLAN-GBP tunnel
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param is_add - Use 1 to create the tunnel, 0 to remove it
-*/
-define vxlan_gbp_tunnel_add_del
-{
- u32 client_index;
- u32 context;
- bool is_add [default=true];
- vl_api_vxlan_gbp_tunnel_t tunnel;
- option in_progress;
-};
-
-define vxlan_gbp_tunnel_add_del_reply
-{
- u32 context;
- i32 retval;
- vl_api_interface_index_t sw_if_index;
- option in_progress;
-};
-
-define vxlan_gbp_tunnel_dump
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index [default=0xffffffff];
- option in_progress;
-};
-
-define vxlan_gbp_tunnel_details
-{
- u32 context;
- vl_api_vxlan_gbp_tunnel_t tunnel;
- option in_progress;
-};
-
-/** \brief Interface set vxlan-bypass request
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param sw_if_index - interface used to reach neighbor
- @param is_ipv6 - if non-zero, enable ipv6-vxlan-bypass, else ipv4-vxlan-bypass
- @param enable - if non-zero enable, else disable
-*/
-autoreply define sw_interface_set_vxlan_gbp_bypass
-{
- u32 client_index;
- u32 context;
- vl_api_interface_index_t sw_if_index;
- bool is_ipv6;
- bool enable [default=true];
- option in_progress;
-};
diff --git a/src/vnet/vxlan-gbp/vxlan_gbp.c b/src/vnet/vxlan-gbp/vxlan_gbp.c
deleted file mode 100644
index 37cfd728121..00000000000
--- a/src/vnet/vxlan-gbp/vxlan_gbp.c
+++ /dev/null
@@ -1,1192 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vnet/vxlan-gbp/vxlan_gbp.h>
-#include <vnet/ip/format.h>
-#include <vnet/ip/punt.h>
-#include <vnet/fib/fib_entry.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/fib/fib_entry_track.h>
-#include <vnet/mfib/mfib_table.h>
-#include <vnet/adj/adj_mcast.h>
-#include <vnet/adj/rewrite.h>
-#include <vnet/interface.h>
-#include <vlib/vlib.h>
-
-/**
- * @file
- * @brief VXLAN GBP.
- *
- * VXLAN GBP provides the features of vxlan and carry group policy id.
- */
-static vlib_punt_hdl_t punt_hdl;
-
-vxlan_gbp_main_t vxlan_gbp_main;
-
-u8 *
-format_vxlan_gbp_tunnel_mode (u8 * s, va_list * args)
-{
- vxlan_gbp_tunnel_mode_t mode = va_arg (*args, vxlan_gbp_tunnel_mode_t);
-
- switch (mode)
- {
- case VXLAN_GBP_TUNNEL_MODE_L2:
- s = format (s, "L2");
- break;
- case VXLAN_GBP_TUNNEL_MODE_L3:
- s = format (s, "L3");
- break;
- }
- return (s);
-}
-
-u8 *
-format_vxlan_gbp_tunnel (u8 * s, va_list * args)
-{
- vxlan_gbp_tunnel_t *t = va_arg (*args, vxlan_gbp_tunnel_t *);
-
- s = format (s,
- "[%d] instance %d src %U dst %U vni %d fib-idx %d"
- " sw-if-idx %d mode %U ",
- t->dev_instance, t->user_instance,
- format_ip46_address, &t->src, IP46_TYPE_ANY,
- format_ip46_address, &t->dst, IP46_TYPE_ANY,
- t->vni, t->encap_fib_index, t->sw_if_index,
- format_vxlan_gbp_tunnel_mode, t->mode);
-
- s = format (s, "encap-dpo-idx %d ", t->next_dpo.dpoi_index);
-
- if (PREDICT_FALSE (ip46_address_is_multicast (&t->dst)))
- s = format (s, "mcast-sw-if-idx %d ", t->mcast_sw_if_index);
-
- return s;
-}
-
-static u8 *
-format_vxlan_gbp_name (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- vxlan_gbp_tunnel_t *t;
-
- if (dev_instance == ~0)
- return format (s, "<cached-unused>");
-
- if (dev_instance >= vec_len (vxm->tunnels))
- return format (s, "<improperly-referenced>");
-
- t = pool_elt_at_index (vxm->tunnels, dev_instance);
-
- return format (s, "vxlan_gbp_tunnel%d", t->user_instance);
-}
-
-static clib_error_t *
-vxlan_gbp_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
- u32 flags)
-{
- u32 hw_flags = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) ?
- VNET_HW_INTERFACE_FLAG_LINK_UP : 0;
- vnet_hw_interface_set_flags (vnm, hw_if_index, hw_flags);
-
- return /* no error */ 0;
-}
-
-/* *INDENT-OFF* */
-VNET_DEVICE_CLASS (vxlan_gbp_device_class, static) = {
- .name = "VXLAN-GBP",
- .format_device_name = format_vxlan_gbp_name,
- .format_tx_trace = format_vxlan_gbp_encap_trace,
- .admin_up_down_function = vxlan_gbp_interface_admin_up_down,
-};
-/* *INDENT-ON* */
-
-static u8 *
-format_vxlan_gbp_header_with_length (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- s = format (s, "unimplemented dev %u", dev_instance);
- return s;
-}
-
-/* *INDENT-OFF* */
-VNET_HW_INTERFACE_CLASS (vxlan_gbp_hw_class) = {
- .name = "VXLAN-GBP",
- .format_header = format_vxlan_gbp_header_with_length,
- .build_rewrite = default_build_rewrite,
-};
-/* *INDENT-ON* */
-
-static void
-vxlan_gbp_tunnel_restack_dpo (vxlan_gbp_tunnel_t * t)
-{
- u8 is_ip4 = ip46_address_is_ip4 (&t->dst);
- dpo_id_t dpo = DPO_INVALID;
- fib_forward_chain_type_t forw_type = is_ip4 ?
- FIB_FORW_CHAIN_TYPE_UNICAST_IP4 : FIB_FORW_CHAIN_TYPE_UNICAST_IP6;
-
- fib_entry_contribute_forwarding (t->fib_entry_index, forw_type, &dpo);
-
- /* vxlan_gbp uses the payload hash as the udp source port
- * hence the packet's hash is unknown
- * skip single bucket load balance dpo's */
- while (DPO_LOAD_BALANCE == dpo.dpoi_type)
- {
- load_balance_t *lb = load_balance_get (dpo.dpoi_index);
- if (lb->lb_n_buckets > 1)
- break;
-
- dpo_copy (&dpo, load_balance_get_bucket_i (lb, 0));
- }
-
- u32 encap_index = is_ip4 ?
- vxlan4_gbp_encap_node.index : vxlan6_gbp_encap_node.index;
- dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
- dpo_reset (&dpo);
-}
-
-static vxlan_gbp_tunnel_t *
-vxlan_gbp_tunnel_from_fib_node (fib_node_t * node)
-{
- ASSERT (FIB_NODE_TYPE_VXLAN_GBP_TUNNEL == node->fn_type);
- return ((vxlan_gbp_tunnel_t *) (((char *) node) -
- STRUCT_OFFSET_OF (vxlan_gbp_tunnel_t,
- node)));
-}
-
-/**
- * Function definition to backwalk a FIB node -
- * Here we will restack the new dpo of VXLAN DIP to encap node.
- */
-static fib_node_back_walk_rc_t
-vxlan_gbp_tunnel_back_walk (fib_node_t * node, fib_node_back_walk_ctx_t * ctx)
-{
- vxlan_gbp_tunnel_restack_dpo (vxlan_gbp_tunnel_from_fib_node (node));
- return (FIB_NODE_BACK_WALK_CONTINUE);
-}
-
-/**
- * Function definition to get a FIB node from its index
- */
-static fib_node_t *
-vxlan_gbp_tunnel_fib_node_get (fib_node_index_t index)
-{
- vxlan_gbp_tunnel_t *t;
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
-
- t = pool_elt_at_index (vxm->tunnels, index);
-
- return (&t->node);
-}
-
-/**
- * Function definition to inform the FIB node that its last lock has gone.
- */
-static void
-vxlan_gbp_tunnel_last_lock_gone (fib_node_t * node)
-{
- /*
- * The VXLAN GBP tunnel is a root of the graph. As such
- * it never has children and thus is never locked.
- */
- ASSERT (0);
-}
-
-/*
- * Virtual function table registered by VXLAN GBP tunnels
- * for participation in the FIB object graph.
- */
-const static fib_node_vft_t vxlan_gbp_vft = {
- .fnv_get = vxlan_gbp_tunnel_fib_node_get,
- .fnv_last_lock = vxlan_gbp_tunnel_last_lock_gone,
- .fnv_back_walk = vxlan_gbp_tunnel_back_walk,
-};
-
-
-#define foreach_copy_field \
-_(vni) \
-_(mode) \
-_(mcast_sw_if_index) \
-_(encap_fib_index) \
-_(src) \
-_(dst)
-
-static void
-vxlan_gbp_rewrite (vxlan_gbp_tunnel_t * t, bool is_ip6)
-{
- union
- {
- ip4_vxlan_gbp_header_t h4;
- ip6_vxlan_gbp_header_t h6;
- } h;
- int len = is_ip6 ? sizeof h.h6 : sizeof h.h4;
-
- udp_header_t *udp;
- vxlan_gbp_header_t *vxlan_gbp;
- /* Fixed portion of the (outer) ip header */
-
- clib_memset (&h, 0, sizeof (h));
- if (!is_ip6)
- {
- ip4_header_t *ip = &h.h4.ip4;
- udp = &h.h4.udp, vxlan_gbp = &h.h4.vxlan_gbp;
- ip->ip_version_and_header_length = 0x45;
- ip->ttl = 254;
- ip->protocol = IP_PROTOCOL_UDP;
-
- ip->src_address = t->src.ip4;
- ip->dst_address = t->dst.ip4;
-
- /* we fix up the ip4 header length and checksum after-the-fact */
- ip->checksum = ip4_header_checksum (ip);
- }
- else
- {
- ip6_header_t *ip = &h.h6.ip6;
- udp = &h.h6.udp, vxlan_gbp = &h.h6.vxlan_gbp;
- ip->ip_version_traffic_class_and_flow_label =
- clib_host_to_net_u32 (6 << 28);
- ip->hop_limit = 255;
- ip->protocol = IP_PROTOCOL_UDP;
-
- ip->src_address = t->src.ip6;
- ip->dst_address = t->dst.ip6;
- }
-
- /* UDP header, randomize src port on something, maybe? */
- udp->src_port = clib_host_to_net_u16 (47789);
- udp->dst_port = clib_host_to_net_u16 (UDP_DST_PORT_vxlan_gbp);
-
- /* VXLAN header */
- vxlan_gbp_set_header (vxlan_gbp, t->vni);
- vnet_rewrite_set_data (*t, &h, len);
-}
-
-static uword
-vtep_addr_ref (ip46_address_t * ip)
-{
- uword *vtep = ip46_address_is_ip4 (ip) ?
- hash_get (vxlan_gbp_main.vtep4, ip->ip4.as_u32) :
- hash_get_mem (vxlan_gbp_main.vtep6, &ip->ip6);
- if (vtep)
- return ++(*vtep);
- ip46_address_is_ip4 (ip) ?
- hash_set (vxlan_gbp_main.vtep4, ip->ip4.as_u32, 1) :
- hash_set_mem_alloc (&vxlan_gbp_main.vtep6, &ip->ip6, 1);
- return 1;
-}
-
-static uword
-vtep_addr_unref (ip46_address_t * ip)
-{
- uword *vtep = ip46_address_is_ip4 (ip) ?
- hash_get (vxlan_gbp_main.vtep4, ip->ip4.as_u32) :
- hash_get_mem (vxlan_gbp_main.vtep6, &ip->ip6);
- ALWAYS_ASSERT (vtep);
- if (--(*vtep) != 0)
- return *vtep;
- ip46_address_is_ip4 (ip) ?
- hash_unset (vxlan_gbp_main.vtep4, ip->ip4.as_u32) :
- hash_unset_mem_free (&vxlan_gbp_main.vtep6, &ip->ip6);
- return 0;
-}
-
-/* *INDENT-OFF* */
-typedef CLIB_PACKED(union
-{
- struct
- {
- fib_node_index_t mfib_entry_index;
- adj_index_t mcast_adj_index;
- };
- u64 as_u64;
-}) mcast_shared_t;
-/* *INDENT-ON* */
-
-static inline mcast_shared_t
-mcast_shared_get (ip46_address_t * ip)
-{
- ASSERT (ip46_address_is_multicast (ip));
- uword *p = hash_get_mem (vxlan_gbp_main.mcast_shared, ip);
- ALWAYS_ASSERT (p);
- mcast_shared_t ret = {.as_u64 = *p };
- return ret;
-}
-
-static inline void
-mcast_shared_add (ip46_address_t * dst, fib_node_index_t mfei, adj_index_t ai)
-{
- mcast_shared_t new_ep = {
- .mcast_adj_index = ai,
- .mfib_entry_index = mfei,
- };
-
- hash_set_mem_alloc (&vxlan_gbp_main.mcast_shared, dst, new_ep.as_u64);
-}
-
-static inline void
-mcast_shared_remove (ip46_address_t * dst)
-{
- mcast_shared_t ep = mcast_shared_get (dst);
-
- adj_unlock (ep.mcast_adj_index);
- mfib_table_entry_delete_index (ep.mfib_entry_index, MFIB_SOURCE_VXLAN_GBP);
-
- hash_unset_mem_free (&vxlan_gbp_main.mcast_shared, dst);
-}
-
-inline void
-vxlan_gbp_register_udp_ports (void)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
-
- if (vxm->udp_ports_registered == 0)
- {
- udp_register_dst_port (vxm->vlib_main, UDP_DST_PORT_vxlan_gbp,
- vxlan4_gbp_input_node.index, /* is_ip4 */ 1);
- udp_register_dst_port (vxm->vlib_main, UDP_DST_PORT_vxlan6_gbp,
- vxlan6_gbp_input_node.index, /* is_ip4 */ 0);
- }
- /*
- * Counts the number of vxlan_gbp tunnels
- */
- vxm->udp_ports_registered += 1;
-}
-
-inline void
-vxlan_gbp_unregister_udp_ports (void)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
-
- ASSERT (vxm->udp_ports_registered != 0);
-
- if (vxm->udp_ports_registered == 1)
- {
- udp_unregister_dst_port (vxm->vlib_main, UDP_DST_PORT_vxlan_gbp,
- /* is_ip4 */ 1);
- udp_unregister_dst_port (vxm->vlib_main, UDP_DST_PORT_vxlan6_gbp,
- /* is_ip4 */ 0);
- }
-
- vxm->udp_ports_registered -= 1;
-}
-
-int vnet_vxlan_gbp_tunnel_add_del
- (vnet_vxlan_gbp_tunnel_add_del_args_t * a, u32 * sw_if_indexp)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- vxlan_gbp_tunnel_t *t = 0;
- vnet_main_t *vnm = vxm->vnet_main;
- u64 *p;
- u32 sw_if_index = ~0;
- vxlan4_gbp_tunnel_key_t key4;
- vxlan6_gbp_tunnel_key_t key6;
- u32 is_ip6 = a->is_ip6;
-
- int not_found;
- if (!is_ip6)
- {
- key4.key[0] = ip46_address_is_multicast (&a->dst) ?
- a->dst.ip4.as_u32 :
- a->dst.ip4.as_u32 | (((u64) a->src.ip4.as_u32) << 32);
- key4.key[1] = (((u64) a->encap_fib_index) << 32)
- | clib_host_to_net_u32 (a->vni << 8);
- not_found =
- clib_bihash_search_inline_16_8 (&vxm->vxlan4_gbp_tunnel_by_key,
- &key4);
- p = &key4.value;
- }
- else
- {
- key6.key[0] = a->dst.ip6.as_u64[0];
- key6.key[1] = a->dst.ip6.as_u64[1];
- key6.key[2] = (((u64) a->encap_fib_index) << 32)
- | clib_host_to_net_u32 (a->vni << 8);
- not_found =
- clib_bihash_search_inline_24_8 (&vxm->vxlan6_gbp_tunnel_by_key,
- &key6);
- p = &key6.value;
- }
-
- if (not_found)
- p = 0;
-
- if (a->is_add)
- {
- l2input_main_t *l2im = &l2input_main;
- u32 dev_instance; /* real dev instance tunnel index */
- u32 user_instance; /* request and actual instance number */
-
- /* adding a tunnel: tunnel must not already exist */
- if (p)
- {
- t = pool_elt_at_index (vxm->tunnels, *p);
- *sw_if_indexp = t->sw_if_index;
- return VNET_API_ERROR_TUNNEL_EXIST;
- }
- pool_get_aligned (vxm->tunnels, t, CLIB_CACHE_LINE_BYTES);
- clib_memset (t, 0, sizeof (*t));
- dev_instance = t - vxm->tunnels;
-
- /* copy from arg structure */
-#define _(x) t->x = a->x;
- foreach_copy_field;
-#undef _
-
- vxlan_gbp_rewrite (t, is_ip6);
- /*
- * Reconcile the real dev_instance and a possible requested instance.
- */
- user_instance = a->instance;
- if (user_instance == ~0)
- user_instance = dev_instance;
- if (hash_get (vxm->instance_used, user_instance))
- {
- pool_put (vxm->tunnels, t);
- return VNET_API_ERROR_INSTANCE_IN_USE;
- }
- hash_set (vxm->instance_used, user_instance, 1);
-
- t->dev_instance = dev_instance; /* actual */
- t->user_instance = user_instance; /* name */
-
- /* copy the key */
- int add_failed;
- if (is_ip6)
- {
- key6.value = (u64) dev_instance;
- add_failed =
- clib_bihash_add_del_24_8 (&vxm->vxlan6_gbp_tunnel_by_key, &key6,
- 1 /*add */ );
- }
- else
- {
- key4.value = (u64) dev_instance;
- add_failed =
- clib_bihash_add_del_16_8 (&vxm->vxlan4_gbp_tunnel_by_key, &key4,
- 1 /*add */ );
- }
-
- if (add_failed)
- {
- pool_put (vxm->tunnels, t);
- return VNET_API_ERROR_INVALID_REGISTRATION;
- }
-
- vxlan_gbp_register_udp_ports ();
-
- t->hw_if_index = vnet_register_interface
- (vnm, vxlan_gbp_device_class.index, dev_instance,
- vxlan_gbp_hw_class.index, dev_instance);
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, t->hw_if_index);
-
- /* Set vxlan_gbp tunnel output node */
- u32 encap_index = !is_ip6 ?
- vxlan4_gbp_encap_node.index : vxlan6_gbp_encap_node.index;
- vnet_set_interface_output_node (vnm, t->hw_if_index, encap_index);
-
- t->sw_if_index = sw_if_index = hi->sw_if_index;
-
- if (VXLAN_GBP_TUNNEL_MODE_L3 == t->mode)
- {
- ip4_sw_interface_enable_disable (t->sw_if_index, 1);
- ip6_sw_interface_enable_disable (t->sw_if_index, 1);
- }
-
- vec_validate_init_empty (vxm->tunnel_index_by_sw_if_index, sw_if_index,
- ~0);
- vxm->tunnel_index_by_sw_if_index[sw_if_index] = dev_instance;
-
- /* setup l2 input config with l2 feature and bd 0 to drop packet */
- vec_validate (l2im->configs, sw_if_index);
- l2im->configs[sw_if_index].feature_bitmap = L2INPUT_FEAT_DROP;
- l2im->configs[sw_if_index].bd_index = 0;
-
- vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
- si->flags &= ~VNET_SW_INTERFACE_FLAG_HIDDEN;
- vnet_sw_interface_set_flags (vnm, sw_if_index,
- VNET_SW_INTERFACE_FLAG_ADMIN_UP);
-
- fib_node_init (&t->node, FIB_NODE_TYPE_VXLAN_GBP_TUNNEL);
- fib_prefix_t tun_dst_pfx;
- vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
-
- fib_prefix_from_ip46_addr (&t->dst, &tun_dst_pfx);
- if (!ip46_address_is_multicast (&t->dst))
- {
- /* Unicast tunnel -
- * source the FIB entry for the tunnel's destination
- * and become a child thereof. The tunnel will then get poked
- * when the forwarding for the entry updates, and the tunnel can
- * re-stack accordingly
- */
- vtep_addr_ref (&t->src);
- t->fib_entry_index = fib_entry_track (t->encap_fib_index,
- &tun_dst_pfx,
- FIB_NODE_TYPE_VXLAN_GBP_TUNNEL,
- dev_instance,
- &t->sibling_index);
- vxlan_gbp_tunnel_restack_dpo (t);
- }
- else
- {
- /* Multicast tunnel -
- * as the same mcast group can be used for multiple mcast tunnels
- * with different VNIs, create the output fib adjacency only if
- * it does not already exist
- */
- fib_protocol_t fp = fib_ip_proto (is_ip6);
-
- if (vtep_addr_ref (&t->dst) == 1)
- {
- fib_node_index_t mfei;
- adj_index_t ai;
- fib_route_path_t path = {
- .frp_proto = fib_proto_to_dpo (fp),
- .frp_addr = zero_addr,
- .frp_sw_if_index = 0xffffffff,
- .frp_fib_index = ~0,
- .frp_weight = 0,
- .frp_flags = FIB_ROUTE_PATH_LOCAL,
- .frp_mitf_flags = MFIB_ITF_FLAG_FORWARD,
- };
- const mfib_prefix_t mpfx = {
- .fp_proto = fp,
- .fp_len = (is_ip6 ? 128 : 32),
- .fp_grp_addr = tun_dst_pfx.fp_addr,
- };
-
- /*
- * Setup the (*,G) to receive traffic on the mcast group
- * - the forwarding interface is for-us
- * - the accepting interface is that from the API
- */
- mfib_table_entry_path_update (t->encap_fib_index,
- &mpfx,
- MFIB_SOURCE_VXLAN_GBP, &path);
-
- path.frp_sw_if_index = a->mcast_sw_if_index;
- path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
- path.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT;
- mfei = mfib_table_entry_path_update (t->encap_fib_index,
- &mpfx,
- MFIB_SOURCE_VXLAN_GBP,
- &path);
-
- /*
- * Create the mcast adjacency to send traffic to the group
- */
- ai = adj_mcast_add_or_lock (fp,
- fib_proto_to_link (fp),
- a->mcast_sw_if_index);
-
- /*
- * create a new end-point
- */
- mcast_shared_add (&t->dst, mfei, ai);
- }
-
- dpo_id_t dpo = DPO_INVALID;
- mcast_shared_t ep = mcast_shared_get (&t->dst);
-
- /* Stack shared mcast dst mac addr rewrite on encap */
- dpo_set (&dpo, DPO_ADJACENCY_MCAST,
- fib_proto_to_dpo (fp), ep.mcast_adj_index);
-
- dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
- dpo_reset (&dpo);
- flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
- }
-
- vnet_get_sw_interface (vnet_get_main (), sw_if_index)->flood_class =
- flood_class;
- }
- else
- {
- /* deleting a tunnel: tunnel must exist */
- if (!p)
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-
- u32 instance = p[0];
- t = pool_elt_at_index (vxm->tunnels, instance);
-
- sw_if_index = t->sw_if_index;
- vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ );
-
- if (VXLAN_GBP_TUNNEL_MODE_L3 == t->mode)
- {
- ip4_sw_interface_enable_disable (t->sw_if_index, 0);
- ip6_sw_interface_enable_disable (t->sw_if_index, 0);
- }
-
- vxm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
-
- if (!is_ip6)
- clib_bihash_add_del_16_8 (&vxm->vxlan4_gbp_tunnel_by_key, &key4,
- 0 /*del */ );
- else
- clib_bihash_add_del_24_8 (&vxm->vxlan6_gbp_tunnel_by_key, &key6,
- 0 /*del */ );
-
- if (!ip46_address_is_multicast (&t->dst))
- {
- vtep_addr_unref (&t->src);
- fib_entry_untrack (t->fib_entry_index, t->sibling_index);
- }
- else if (vtep_addr_unref (&t->dst) == 0)
- {
- mcast_shared_remove (&t->dst);
- }
-
- vxlan_gbp_unregister_udp_ports ();
- vnet_delete_hw_interface (vnm, t->hw_if_index);
- hash_unset (vxm->instance_used, t->user_instance);
-
- fib_node_deinit (&t->node);
- pool_put (vxm->tunnels, t);
- }
-
- if (sw_if_indexp)
- *sw_if_indexp = sw_if_index;
-
- return 0;
-}
-
-int
-vnet_vxlan_gbp_tunnel_del (u32 sw_if_index)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- vxlan_gbp_tunnel_t *t = 0;
- u32 ti;
-
- if (sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index))
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-
- ti = vxm->tunnel_index_by_sw_if_index[sw_if_index];
- if (~0 != ti)
- {
- t = pool_elt_at_index (vxm->tunnels, ti);
-
- vnet_vxlan_gbp_tunnel_add_del_args_t args = {
- .is_add = 0,
- .is_ip6 = !ip46_address_is_ip4 (&t->src),
- .vni = t->vni,
- .src = t->src,
- .dst = t->dst,
- .instance = ~0,
- };
-
- return (vnet_vxlan_gbp_tunnel_add_del (&args, NULL));
- }
-
- return VNET_API_ERROR_NO_SUCH_ENTRY;
-}
-
-static uword
-get_decap_next_for_node (u32 node_index, u32 ipv4_set)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- vlib_main_t *vm = vxm->vlib_main;
- uword input_node = (ipv4_set) ? vxlan4_gbp_input_node.index :
- vxlan6_gbp_input_node.index;
-
- return vlib_node_add_next (vm, input_node, node_index);
-}
-
-static uword
-unformat_decap_next (unformat_input_t * input, va_list * args)
-{
- u32 *result = va_arg (*args, u32 *);
- u32 ipv4_set = va_arg (*args, int);
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- vlib_main_t *vm = vxm->vlib_main;
- u32 node_index;
- u32 tmp;
-
- if (unformat (input, "l2"))
- *result = VXLAN_GBP_INPUT_NEXT_L2_INPUT;
- else if (unformat (input, "node %U", unformat_vlib_node, vm, &node_index))
- *result = get_decap_next_for_node (node_index, ipv4_set);
- else if (unformat (input, "%d", &tmp))
- *result = tmp;
- else
- return 0;
- return 1;
-}
-
-static clib_error_t *
-vxlan_gbp_tunnel_add_del_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- ip46_address_t src = ip46_address_initializer, dst =
- ip46_address_initializer;
- vxlan_gbp_tunnel_mode_t mode = VXLAN_GBP_TUNNEL_MODE_L2;
- u8 is_add = 1;
- u8 src_set = 0;
- u8 dst_set = 0;
- u8 grp_set = 0;
- u8 ipv4_set = 0;
- u8 ipv6_set = 0;
- u32 instance = ~0;
- u32 encap_fib_index = 0;
- u32 mcast_sw_if_index = ~0;
- u32 decap_next_index = VXLAN_GBP_INPUT_NEXT_L2_INPUT;
- u32 vni = 0;
- u32 table_id;
- clib_error_t *parse_error = NULL;
-
- /* Get a line of input. */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "del"))
- {
- is_add = 0;
- }
- else if (unformat (line_input, "instance %d", &instance))
- ;
- else if (unformat (line_input, "src %U",
- unformat_ip46_address, &src, IP46_TYPE_ANY))
- {
- src_set = 1;
- ip46_address_is_ip4 (&src) ? (ipv4_set = 1) : (ipv6_set = 1);
- }
- else if (unformat (line_input, "dst %U",
- unformat_ip46_address, &dst, IP46_TYPE_ANY))
- {
- dst_set = 1;
- ip46_address_is_ip4 (&dst) ? (ipv4_set = 1) : (ipv6_set = 1);
- }
- else if (unformat (line_input, "group %U %U",
- unformat_ip46_address, &dst, IP46_TYPE_ANY,
- unformat_vnet_sw_interface,
- vnet_get_main (), &mcast_sw_if_index))
- {
- grp_set = dst_set = 1;
- ip46_address_is_ip4 (&dst) ? (ipv4_set = 1) : (ipv6_set = 1);
- }
- else if (unformat (line_input, "encap-vrf-id %d", &table_id))
- {
- encap_fib_index =
- fib_table_find (fib_ip_proto (ipv6_set), table_id);
- }
- else if (unformat (line_input, "decap-next %U", unformat_decap_next,
- &decap_next_index, ipv4_set))
- ;
- else if (unformat (line_input, "vni %d", &vni))
- ;
- else
- {
- parse_error = clib_error_return (0, "parse error: '%U'",
- format_unformat_error, line_input);
- break;
- }
- }
-
- unformat_free (line_input);
-
- if (parse_error)
- return parse_error;
-
- if (encap_fib_index == ~0)
- return clib_error_return (0, "nonexistent encap-vrf-id %d", table_id);
-
- if (src_set == 0)
- return clib_error_return (0, "tunnel src address not specified");
-
- if (dst_set == 0)
- return clib_error_return (0, "tunnel dst address not specified");
-
- if (grp_set && !ip46_address_is_multicast (&dst))
- return clib_error_return (0, "tunnel group address not multicast");
-
- if (grp_set == 0 && ip46_address_is_multicast (&dst))
- return clib_error_return (0, "dst address must be unicast");
-
- if (grp_set && mcast_sw_if_index == ~0)
- return clib_error_return (0, "tunnel nonexistent multicast device");
-
- if (ipv4_set && ipv6_set)
- return clib_error_return (0, "both IPv4 and IPv6 addresses specified");
-
- if (ip46_address_cmp (&src, &dst) == 0)
- return clib_error_return (0, "src and dst addresses are identical");
-
- if (decap_next_index == ~0)
- return clib_error_return (0, "next node not found");
-
- if (vni == 0)
- return clib_error_return (0, "vni not specified");
-
- if (vni >> 24)
- return clib_error_return (0, "vni %d out of range", vni);
-
- vnet_vxlan_gbp_tunnel_add_del_args_t a = {
- .is_add = is_add,
- .is_ip6 = ipv6_set,
- .instance = instance,
-#define _(x) .x = x,
- foreach_copy_field
-#undef _
- };
-
- u32 tunnel_sw_if_index;
- int rv = vnet_vxlan_gbp_tunnel_add_del (&a, &tunnel_sw_if_index);
-
- switch (rv)
- {
- case 0:
- if (is_add)
- vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
- vnet_get_main (), tunnel_sw_if_index);
- break;
-
- case VNET_API_ERROR_TUNNEL_EXIST:
- return clib_error_return (0, "tunnel already exists...");
-
- case VNET_API_ERROR_NO_SUCH_ENTRY:
- return clib_error_return (0, "tunnel does not exist...");
-
- case VNET_API_ERROR_INSTANCE_IN_USE:
- return clib_error_return (0, "Instance is in use");
-
- default:
- return clib_error_return
- (0, "vnet_vxlan_gbp_tunnel_add_del returned %d", rv);
- }
-
- return 0;
-}
-
-/*?
- * Add or delete a VXLAN Tunnel.
- *
- * VXLAN provides the features needed to allow L2 bridge domains (BDs)
- * to span multiple servers. This is done by building an L2 overlay on
- * top of an L3 network underlay using VXLAN tunnels.
- *
- * This makes it possible for servers to be co-located in the same data
- * center or be separated geographically as long as they are reachable
- * through the underlay L3 network.
- *
- * You can refer to this kind of L2 overlay bridge domain as a VXLAN
- * (Virtual eXtensible VLAN) segment.
- *
- * @cliexpar
- * Example of how to create a VXLAN Tunnel:
- * @cliexcmd{create vxlan_gbp tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 encap-vrf-id 7}
- * Example of how to create a VXLAN Tunnel with a known name, vxlan_gbp_tunnel42:
- * @cliexcmd{create vxlan_gbp tunnel src 10.0.3.1 dst 10.0.3.3 instance 42}
- * Example of how to create a multicast VXLAN Tunnel with a known name, vxlan_gbp_tunnel23:
- * @cliexcmd{create vxlan_gbp tunnel src 10.0.3.1 group 239.1.1.1 GigabitEthernet0/8/0 instance 23}
- * Example of how to delete a VXLAN Tunnel:
- * @cliexcmd{create vxlan_gbp tunnel src 10.0.3.1 dst 10.0.3.3 vni 13 del}
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (create_vxlan_gbp_tunnel_command, static) = {
- .path = "create vxlan-gbp tunnel",
- .short_help =
- "create vxlan-gbp tunnel src <local-vtep-addr>"
- " {dst <remote-vtep-addr>|group <mcast-vtep-addr> <intf-name>} vni <nn>"
- " [instance <id>]"
- " [encap-vrf-id <nn>] [decap-next [l2|node <name>]] [del]",
- .function = vxlan_gbp_tunnel_add_del_command_fn,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-show_vxlan_gbp_tunnel_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- vxlan_gbp_tunnel_t *t;
- int raw = 0;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "raw"))
- raw = 1;
- else
- return clib_error_return (0, "parse error: '%U'",
- format_unformat_error, input);
- }
-
- if (pool_elts (vxm->tunnels) == 0)
- vlib_cli_output (vm, "No vxlan-gbp tunnels configured...");
-
-/* *INDENT-OFF* */
- pool_foreach (t, vxm->tunnels)
- {
- vlib_cli_output (vm, "%U", format_vxlan_gbp_tunnel, t);
- }
-/* *INDENT-ON* */
-
- if (raw)
- {
- vlib_cli_output (vm, "Raw IPv4 Hash Table:\n%U\n",
- format_bihash_16_8, &vxm->vxlan4_gbp_tunnel_by_key,
- 1 /* verbose */ );
- vlib_cli_output (vm, "Raw IPv6 Hash Table:\n%U\n",
- format_bihash_24_8, &vxm->vxlan6_gbp_tunnel_by_key,
- 1 /* verbose */ );
- }
-
- return 0;
-}
-
-/*?
- * Display all the VXLAN Tunnel entries.
- *
- * @cliexpar
- * Example of how to display the VXLAN Tunnel entries:
- * @cliexstart{show vxlan_gbp tunnel}
- * [0] src 10.0.3.1 dst 10.0.3.3 vni 13 encap_fib_index 0 sw_if_index 5 decap_next l2
- * @cliexend
- ?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (show_vxlan_gbp_tunnel_command, static) = {
- .path = "show vxlan-gbp tunnel",
- .short_help = "show vxlan-gbp tunnel [raw]",
- .function = show_vxlan_gbp_tunnel_command_fn,
-};
-/* *INDENT-ON* */
-
-
-void
-vnet_int_vxlan_gbp_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable)
-{
- if (is_ip6)
- vnet_feature_enable_disable ("ip6-unicast", "ip6-vxlan-gbp-bypass",
- sw_if_index, is_enable, 0, 0);
- else
- vnet_feature_enable_disable ("ip4-unicast", "ip4-vxlan-gbp-bypass",
- sw_if_index, is_enable, 0, 0);
-}
-
-
-static clib_error_t *
-set_ip_vxlan_gbp_bypass (u32 is_ip6,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- vnet_main_t *vnm = vnet_get_main ();
- clib_error_t *error = 0;
- u32 sw_if_index, is_enable;
-
- sw_if_index = ~0;
- is_enable = 1;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat_user
- (line_input, unformat_vnet_sw_interface, vnm, &sw_if_index))
- ;
- else if (unformat (line_input, "del"))
- is_enable = 0;
- else
- {
- error = unformat_parse_error (line_input);
- goto done;
- }
- }
-
- if (~0 == sw_if_index)
- {
- error = clib_error_return (0, "unknown interface `%U'",
- format_unformat_error, line_input);
- goto done;
- }
-
- vnet_int_vxlan_gbp_bypass_mode (sw_if_index, is_ip6, is_enable);
-
-done:
- unformat_free (line_input);
-
- return error;
-}
-
-static clib_error_t *
-set_ip4_vxlan_gbp_bypass (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- return set_ip_vxlan_gbp_bypass (0, input, cmd);
-}
-
-/*?
- * This command adds the 'ip4-vxlan-gbp-bypass' graph node for a given interface.
- * By adding the IPv4 vxlan_gbp-bypass graph node to an interface, the node checks
- * for and validate input vxlan_gbp packet and bypass ip4-lookup, ip4-local,
- * ip4-udp-lookup nodes to speedup vxlan_gbp packet forwarding. This node will
- * cause extra overhead to for non-vxlan_gbp packets which is kept at a minimum.
- *
- * @cliexpar
- * @parblock
- * Example of graph node before ip4-vxlan_gbp-bypass is enabled:
- * @cliexstart{show vlib graph ip4-vxlan_gbp-bypass}
- * Name Next Previous
- * ip4-vxlan-gbp-bypass error-drop [0]
- * vxlan4-gbp-input [1]
- * ip4-lookup [2]
- * @cliexend
- *
- * Example of how to enable ip4-vxlan-gbp-bypass on an interface:
- * @cliexcmd{set interface ip vxlan-gbp-bypass GigabitEthernet2/0/0}
- *
- * Example of graph node after ip4-vxlan-gbp-bypass is enabled:
- * @cliexstart{show vlib graph ip4-vxlan-gbp-bypass}
- * Name Next Previous
- * ip4-vxlan-gbp-bypass error-drop [0] ip4-input
- * vxlan4-gbp-input [1] ip4-input-no-checksum
- * ip4-lookup [2]
- * @cliexend
- *
- * Example of how to display the feature enabled on an interface:
- * @cliexstart{show ip interface features GigabitEthernet2/0/0}
- * IP feature paths configured on GigabitEthernet2/0/0...
- * ...
- * ipv4 unicast:
- * ip4-vxlan-gbp-bypass
- * ip4-lookup
- * ...
- * @cliexend
- *
- * Example of how to disable ip4-vxlan-gbp-bypass on an interface:
- * @cliexcmd{set interface ip vxlan-gbp-bypass GigabitEthernet2/0/0 del}
- * @endparblock
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_interface_ip_vxlan_gbp_bypass_command, static) = {
- .path = "set interface ip vxlan-gbp-bypass",
- .function = set_ip4_vxlan_gbp_bypass,
- .short_help = "set interface ip vxlan-gbp-bypass <interface> [del]",
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-set_ip6_vxlan_gbp_bypass (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- return set_ip_vxlan_gbp_bypass (1, input, cmd);
-}
-
-/*?
- * This command adds the 'ip6-vxlan-gbp-bypass' graph node for a given interface.
- * By adding the IPv6 vxlan-gbp-bypass graph node to an interface, the node checks
- * for and validate input vxlan_gbp packet and bypass ip6-lookup, ip6-local,
- * ip6-udp-lookup nodes to speedup vxlan_gbp packet forwarding. This node will
- * cause extra overhead to for non-vxlan packets which is kept at a minimum.
- *
- * @cliexpar
- * @parblock
- * Example of graph node before ip6-vxlan-gbp-bypass is enabled:
- * @cliexstart{show vlib graph ip6-vxlan-gbp-bypass}
- * Name Next Previous
- * ip6-vxlan-gbp-bypass error-drop [0]
- * vxlan6-gbp-input [1]
- * ip6-lookup [2]
- * @cliexend
- *
- * Example of how to enable ip6-vxlan-gbp-bypass on an interface:
- * @cliexcmd{set interface ip6 vxlan-gbp-bypass GigabitEthernet2/0/0}
- *
- * Example of graph node after ip6-vxlan-gbp-bypass is enabled:
- * @cliexstart{show vlib graph ip6-vxlan-gbp-bypass}
- * Name Next Previous
- * ip6-vxlan-gbp-bypass error-drop [0] ip6-input
- * vxlan6-gbp-input [1] ip4-input-no-checksum
- * ip6-lookup [2]
- * @cliexend
- *
- * Example of how to display the feature enabled on an interface:
- * @cliexstart{show ip interface features GigabitEthernet2/0/0}
- * IP feature paths configured on GigabitEthernet2/0/0...
- * ...
- * ipv6 unicast:
- * ip6-vxlan-gbp-bypass
- * ip6-lookup
- * ...
- * @cliexend
- *
- * Example of how to disable ip6-vxlan-gbp-bypass on an interface:
- * @cliexcmd{set interface ip6 vxlan-gbp-bypass GigabitEthernet2/0/0 del}
- * @endparblock
-?*/
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (set_interface_ip6_vxlan_gbp_bypass_command, static) = {
- .path = "set interface ip6 vxlan-gbp-bypass",
- .function = set_ip6_vxlan_gbp_bypass,
- .short_help = "set interface ip6 vxlan-gbp-bypass <interface> [del]",
-};
-/* *INDENT-ON* */
-
-#define VXLAN_GBP_HASH_NUM_BUCKETS (2 * 1024)
-#define VXLAN_GBP_HASH_MEMORY_SIZE (1 << 20)
-
-clib_error_t *
-vxlan_gbp_init (vlib_main_t * vm)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
-
- vxm->vnet_main = vnet_get_main ();
- vxm->vlib_main = vm;
-
- /* initialize the ip6 hash */
- clib_bihash_init_16_8 (&vxm->vxlan4_gbp_tunnel_by_key, "vxlan4-gbp",
- VXLAN_GBP_HASH_NUM_BUCKETS,
- VXLAN_GBP_HASH_MEMORY_SIZE);
- clib_bihash_init_24_8 (&vxm->vxlan6_gbp_tunnel_by_key, "vxlan6-gbp",
- VXLAN_GBP_HASH_NUM_BUCKETS,
- VXLAN_GBP_HASH_MEMORY_SIZE);
- vxm->vtep6 = hash_create_mem (0, sizeof (ip6_address_t), sizeof (uword));
- vxm->mcast_shared = hash_create_mem (0,
- sizeof (ip46_address_t),
- sizeof (mcast_shared_t));
-
- fib_node_register_type (FIB_NODE_TYPE_VXLAN_GBP_TUNNEL, &vxlan_gbp_vft);
-
- punt_hdl = vlib_punt_client_register ("vxlan-gbp");
-
- vlib_punt_reason_alloc (punt_hdl, "VXLAN-GBP-no-such-v4-tunnel", NULL, NULL,
- &vxm->punt_no_such_tunnel[FIB_PROTOCOL_IP4],
- VNET_PUNT_REASON_F_IP4_PACKET,
- format_vnet_punt_reason_flags);
- vlib_punt_reason_alloc (punt_hdl, "VXLAN-GBP-no-such-v6-tunnel", NULL, NULL,
- &vxm->punt_no_such_tunnel[FIB_PROTOCOL_IP6],
- VNET_PUNT_REASON_F_IP6_PACKET,
- format_vnet_punt_reason_flags);
-
- return (0);
-}
-
-/* *INDENT-OFF* */
-VLIB_INIT_FUNCTION (vxlan_gbp_init) =
-{
- .runs_after = VLIB_INITS("punt_init"),
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gbp/vxlan_gbp.h b/src/vnet/vxlan-gbp/vxlan_gbp.h
deleted file mode 100644
index fe93587cb00..00000000000
--- a/src/vnet/vxlan-gbp/vxlan_gbp.h
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef included_vnet_vxlan_gbp_h
-#define included_vnet_vxlan_gbp_h
-
-#include <vppinfra/error.h>
-#include <vppinfra/hash.h>
-#include <vppinfra/bihash_16_8.h>
-#include <vppinfra/bihash_24_8.h>
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/l2/l2_input.h>
-#include <vnet/l2/l2_output.h>
-#include <vnet/l2/l2_bd.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/vxlan-gbp/vxlan_gbp_packet.h>
-#include <vnet/ip/ip4_packet.h>
-#include <vnet/ip/ip6_packet.h>
-#include <vnet/udp/udp_local.h>
-#include <vnet/udp/udp_packet.h>
-#include <vnet/dpo/dpo.h>
-#include <vnet/adj/adj_types.h>
-
-/* *INDENT-OFF* */
-typedef CLIB_PACKED (struct {
- ip4_header_t ip4; /* 20 bytes */
- udp_header_t udp; /* 8 bytes */
- vxlan_gbp_header_t vxlan_gbp; /* 8 bytes */
-}) ip4_vxlan_gbp_header_t;
-
-typedef CLIB_PACKED (struct {
- ip6_header_t ip6; /* 40 bytes */
- udp_header_t udp; /* 8 bytes */
- vxlan_gbp_header_t vxlan_gbp; /* 8 bytes */
-}) ip6_vxlan_gbp_header_t;
-/* *INDENT-ON* */
-
-/*
-* Key fields: remote ip, vni on incoming VXLAN packet
-* all fields in NET byte order
-*/
-typedef clib_bihash_kv_16_8_t vxlan4_gbp_tunnel_key_t;
-
-/*
-* Key fields: remote ip, vni and fib index on incoming VXLAN packet
-* ip, vni fields in NET byte order
-* fib index field in host byte order
-*/
-typedef clib_bihash_kv_24_8_t vxlan6_gbp_tunnel_key_t;
-
-typedef enum vxlan_gbp_tunnel_mode_t_
-{
- VXLAN_GBP_TUNNEL_MODE_L2,
- VXLAN_GBP_TUNNEL_MODE_L3,
-} vxlan_gbp_tunnel_mode_t;
-
-extern u8 *format_vxlan_gbp_tunnel_mode (u8 * s, va_list * args);
-
-typedef struct
-{
- /* Required for pool_get_aligned */
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-
- /* FIB DPO for IP forwarding of VXLAN encap packet */
- dpo_id_t next_dpo;
-
- /* flags */
- u16 flags;
-
- /* vxlan VNI in HOST byte order */
- u32 vni;
-
- /* tunnel src and dst addresses */
- ip46_address_t src;
- ip46_address_t dst;
-
- /* mcast packet output intfc index (used only if dst is mcast) */
- u32 mcast_sw_if_index;
-
- /* The FIB index for src/dst addresses */
- u32 encap_fib_index;
-
- /* vnet intfc index */
- u32 sw_if_index;
- u32 hw_if_index;
-
- /** Next node after VxLAN-GBP encap */
- uword encap_next_node;
-
- /**
- * Tunnel mode.
- * L2 tunnels decap to L2 path, L3 tunnels to the L3 path
- */
- vxlan_gbp_tunnel_mode_t mode;
-
- /**
- * Linkage into the FIB object graph
- */
- fib_node_t node;
-
- /*
- * The FIB entry for (depending on VXLAN-GBP tunnel is unicast or mcast)
- * sending unicast VXLAN-GBP encap packets or receiving mcast VXLAN-GBP packets
- */
- fib_node_index_t fib_entry_index;
- adj_index_t mcast_adj_index;
-
- /**
- * The tunnel is a child of the FIB entry for its destination. This is
- * so it receives updates when the forwarding information for that entry
- * changes.
- * The tunnels sibling index on the FIB entry's dependency list.
- */
- u32 sibling_index;
-
- u32 dev_instance; /* Real device instance in tunnel vector */
- u32 user_instance; /* Instance name being shown to user */
-
-
- VNET_DECLARE_REWRITE;
-} vxlan_gbp_tunnel_t;
-
-#define foreach_vxlan_gbp_input_next \
- _(DROP, "error-drop") \
- _(PUNT, "punt-dispatch") \
- _(L2_INPUT, "l2-input") \
- _(IP4_INPUT, "ip4-input") \
- _(IP6_INPUT, "ip6-input")
-
-typedef enum
-{
-#define _(s,n) VXLAN_GBP_INPUT_NEXT_##s,
- foreach_vxlan_gbp_input_next
-#undef _
- VXLAN_GBP_INPUT_N_NEXT,
-} vxlan_gbp_input_next_t;
-
-typedef enum
-{
-#define vxlan_gbp_error(n,s) VXLAN_GBP_ERROR_##n,
-#include <vnet/vxlan-gbp/vxlan_gbp_error.def>
-#undef vxlan_gbp_error
- VXLAN_GBP_N_ERROR,
-} vxlan_gbp_input_error_t;
-
-/**
- * Call back function packets that do not match a configured tunnel
- */
-typedef vxlan_gbp_input_next_t (*vxlan_bgp_no_tunnel_t) (vlib_buffer_t * b,
- u32 thread_index,
- u8 is_ip6);
-
-typedef struct
-{
- /* vector of encap tunnel instances */
- vxlan_gbp_tunnel_t *tunnels;
-
- /* lookup tunnel by key */
- clib_bihash_16_8_t vxlan4_gbp_tunnel_by_key; /* keyed on ipv4.dst + fib + vni */
- clib_bihash_24_8_t vxlan6_gbp_tunnel_by_key; /* keyed on ipv6.dst + fib + vni */
-
- /* local VTEP IPs ref count used by vxlan-bypass node to check if
- received VXLAN packet DIP matches any local VTEP address */
- uword *vtep4; /* local ip4 VTEPs keyed on their ip4 addr */
- uword *vtep6; /* local ip6 VTEPs keyed on their ip6 addr */
-
- /* mcast shared info */
- uword *mcast_shared; /* keyed on mcast ip46 addr */
-
- /* Mapping from sw_if_index to tunnel index */
- u32 *tunnel_index_by_sw_if_index;
-
- /* On demand udp port registration */
- u32 udp_ports_registered;
-
- /* convenience */
- vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
-
- /* Record used instances */
- uword *instance_used;
-
- /**
- * Punt reasons for no such tunnel
- */
- vlib_punt_reason_t punt_no_such_tunnel[FIB_PROTOCOL_IP_MAX];
-} vxlan_gbp_main_t;
-
-extern vxlan_gbp_main_t vxlan_gbp_main;
-
-extern vlib_node_registration_t vxlan4_gbp_input_node;
-extern vlib_node_registration_t vxlan6_gbp_input_node;
-extern vlib_node_registration_t vxlan4_gbp_encap_node;
-extern vlib_node_registration_t vxlan6_gbp_encap_node;
-extern void vxlan_gbp_register_udp_ports (void);
-extern void vxlan_gbp_unregister_udp_ports (void);
-
-u8 *format_vxlan_gbp_encap_trace (u8 * s, va_list * args);
-
-typedef struct
-{
- u8 is_add;
- u8 is_ip6;
- u32 instance;
- vxlan_gbp_tunnel_mode_t mode;
- ip46_address_t src, dst;
- u32 mcast_sw_if_index;
- u32 encap_fib_index;
- u32 vni;
-} vnet_vxlan_gbp_tunnel_add_del_args_t;
-
-int vnet_vxlan_gbp_tunnel_add_del
- (vnet_vxlan_gbp_tunnel_add_del_args_t * a, u32 * sw_if_indexp);
-int vnet_vxlan_gbp_tunnel_del (u32 sw_if_indexp);
-
-void vnet_int_vxlan_gbp_bypass_mode (u32 sw_if_index, u8 is_ip6,
- u8 is_enable);
-
-always_inline u32
-vxlan_gbp_tunnel_by_sw_if_index (u32 sw_if_index)
-{
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
-
- if (sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index))
- return ~0;
-
- return (vxm->tunnel_index_by_sw_if_index[sw_if_index]);
-}
-
-#endif /* included_vnet_vxlan_gbp_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gbp/vxlan_gbp_api.c b/src/vnet/vxlan-gbp/vxlan_gbp_api.c
deleted file mode 100644
index a3f2246f463..00000000000
--- a/src/vnet/vxlan-gbp/vxlan_gbp_api.c
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- *------------------------------------------------------------------
- * vxlan_gbp_api.c - vxlan gbp api
- *
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vnet/vnet.h>
-#include <vlibmemory/api.h>
-
-#include <vnet/interface.h>
-#include <vnet/api_errno.h>
-#include <vnet/feature/feature.h>
-#include <vnet/vxlan-gbp/vxlan_gbp.h>
-#include <vnet/fib/fib_table.h>
-#include <vnet/ip/ip_types_api.h>
-#include <vnet/format_fns.h>
-
-#include <vxlan-gbp/vxlan_gbp.api_enum.h>
-#include <vxlan-gbp/vxlan_gbp.api_types.h>
-
-#define REPLY_MSG_ID_BASE msg_id_base
-#include <vlibapi/api_helper_macros.h>
-
-static u16 msg_id_base;
-
-static void
- vl_api_sw_interface_set_vxlan_gbp_bypass_t_handler
- (vl_api_sw_interface_set_vxlan_gbp_bypass_t * mp)
-{
- vl_api_sw_interface_set_vxlan_gbp_bypass_reply_t *rmp;
- int rv = 0;
- u32 sw_if_index = ntohl (mp->sw_if_index);
-
- VALIDATE_SW_IF_INDEX (mp);
-
- vnet_int_vxlan_gbp_bypass_mode (sw_if_index, mp->is_ipv6, mp->enable);
- BAD_SW_IF_INDEX_LABEL;
-
- REPLY_MACRO (VL_API_SW_INTERFACE_SET_VXLAN_GBP_BYPASS_REPLY);
-}
-
-static int
-vxlan_gbp_tunnel_mode_decode (vl_api_vxlan_gbp_api_tunnel_mode_t in,
- vxlan_gbp_tunnel_mode_t * out)
-{
- in = clib_net_to_host_u32 (in);
-
- switch (in)
- {
- case VXLAN_GBP_API_TUNNEL_MODE_L2:
- *out = VXLAN_GBP_TUNNEL_MODE_L2;
- return (0);
- case VXLAN_GBP_API_TUNNEL_MODE_L3:
- *out = VXLAN_GBP_TUNNEL_MODE_L3;
- return (0);
- }
- return (VNET_API_ERROR_INVALID_VALUE);
-}
-
-static void vl_api_vxlan_gbp_tunnel_add_del_t_handler
- (vl_api_vxlan_gbp_tunnel_add_del_t * mp)
-{
- vl_api_vxlan_gbp_tunnel_add_del_reply_t *rmp;
- vxlan_gbp_tunnel_mode_t mode;
- ip46_address_t src, dst;
- ip46_type_t itype;
- int rv = 0;
- u32 sw_if_index = ~0;
- u32 fib_index;
-
- itype = ip_address_decode (&mp->tunnel.src, &src);
- itype = ip_address_decode (&mp->tunnel.dst, &dst);
-
- fib_index = fib_table_find (fib_proto_from_ip46 (itype),
- ntohl (mp->tunnel.encap_table_id));
- if (fib_index == ~0)
- {
- rv = VNET_API_ERROR_NO_SUCH_FIB;
- goto out;
- }
-
- rv = vxlan_gbp_tunnel_mode_decode (mp->tunnel.mode, &mode);
-
- if (rv)
- goto out;
-
- vnet_vxlan_gbp_tunnel_add_del_args_t a = {
- .is_add = mp->is_add,
- .is_ip6 = (itype == IP46_TYPE_IP6),
- .instance = ntohl (mp->tunnel.instance),
- .mcast_sw_if_index = ntohl (mp->tunnel.mcast_sw_if_index),
- .encap_fib_index = fib_index,
- .vni = ntohl (mp->tunnel.vni),
- .dst = dst,
- .src = src,
- .mode = mode,
- };
-
- /* Check src & dst are different */
- if (ip46_address_cmp (&a.dst, &a.src) == 0)
- {
- rv = VNET_API_ERROR_SAME_SRC_DST;
- goto out;
- }
- if (ip46_address_is_multicast (&a.dst) &&
- !vnet_sw_if_index_is_api_valid (a.mcast_sw_if_index))
- {
- rv = VNET_API_ERROR_INVALID_SW_IF_INDEX;
- goto out;
- }
-
- rv = vnet_vxlan_gbp_tunnel_add_del (&a, &sw_if_index);
-
-out:
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_VXLAN_GBP_TUNNEL_ADD_DEL_REPLY,
- ({
- rmp->sw_if_index = ntohl (sw_if_index);
- }));
- /* *INDENT-ON* */
-}
-
-static void send_vxlan_gbp_tunnel_details
- (vxlan_gbp_tunnel_t * t, vl_api_registration_t * reg, u32 context)
-{
- vl_api_vxlan_gbp_tunnel_details_t *rmp;
- ip46_type_t itype = (ip46_address_is_ip4 (&t->dst) ?
- IP46_TYPE_IP4 : IP46_TYPE_IP6);
-
- rmp = vl_msg_api_alloc (sizeof (*rmp));
- clib_memset (rmp, 0, sizeof (*rmp));
- rmp->_vl_msg_id =
- ntohs (VL_API_VXLAN_GBP_TUNNEL_DETAILS + REPLY_MSG_ID_BASE);
-
- ip_address_encode (&t->src, itype, &rmp->tunnel.src);
- ip_address_encode (&t->dst, itype, &rmp->tunnel.dst);
- rmp->tunnel.encap_table_id =
- fib_table_get_table_id (t->encap_fib_index, fib_proto_from_ip46 (itype));
-
- rmp->tunnel.instance = htonl (t->user_instance);
- rmp->tunnel.mcast_sw_if_index = htonl (t->mcast_sw_if_index);
- rmp->tunnel.vni = htonl (t->vni);
- rmp->tunnel.sw_if_index = htonl (t->sw_if_index);
- rmp->context = context;
-
- vl_api_send_msg (reg, (u8 *) rmp);
-}
-
-static void vl_api_vxlan_gbp_tunnel_dump_t_handler
- (vl_api_vxlan_gbp_tunnel_dump_t * mp)
-{
- vl_api_registration_t *reg;
- vxlan_gbp_main_t *vxm = &vxlan_gbp_main;
- vxlan_gbp_tunnel_t *t;
- u32 sw_if_index;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- sw_if_index = ntohl (mp->sw_if_index);
-
- if (~0 == sw_if_index)
- {
- /* *INDENT-OFF* */
- pool_foreach (t, vxm->tunnels)
- {
- send_vxlan_gbp_tunnel_details(t, reg, mp->context);
- }
- /* *INDENT-ON* */
- }
- else
- {
- if ((sw_if_index >= vec_len (vxm->tunnel_index_by_sw_if_index)) ||
- (~0 == vxm->tunnel_index_by_sw_if_index[sw_if_index]))
- {
- return;
- }
- t = &vxm->tunnels[vxm->tunnel_index_by_sw_if_index[sw_if_index]];
- send_vxlan_gbp_tunnel_details (t, reg, mp->context);
- }
-}
-
-#include <vxlan-gbp/vxlan_gbp.api.c>
-static clib_error_t *
-vxlan_gbp_api_hookup (vlib_main_t * vm)
-{
- /*
- * Set up the (msg_name, crc, message-id) table
- */
- msg_id_base = setup_message_id_table ();
-
- return 0;
-}
-
-VLIB_API_INIT_FUNCTION (vxlan_gbp_api_hookup);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gbp/vxlan_gbp_packet.c b/src/vnet/vxlan-gbp/vxlan_gbp_packet.c
deleted file mode 100644
index 01c7a19bfb9..00000000000
--- a/src/vnet/vxlan-gbp/vxlan_gbp_packet.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/vxlan-gbp/vxlan_gbp_packet.h>
-
-u8 *
-format_vxlan_gbp_header_flags (u8 * s, va_list * args)
-{
- vxlan_gbp_flags_t flags = va_arg (*args, int);
-
- if (VXLAN_GBP_FLAGS_NONE == flags)
- {
- s = format (s, "None");
- }
-#define _(n,f) { \
- if (VXLAN_GBP_FLAGS_##f & flags) \
- s = format (s, #f); \
- }
- foreach_vxlan_gbp_flags
-#undef _
- return (s);
-}
-
-u8 *
-format_vxlan_gbp_header_gpflags (u8 * s, va_list * args)
-{
- vxlan_gbp_gpflags_t flags = va_arg (*args, int);
-
- if (VXLAN_GBP_GPFLAGS_NONE == flags)
- {
- s = format (s, "None");
- }
-#define _(n,f) { \
- if (VXLAN_GBP_GPFLAGS_##f & flags) \
- s = format (s, #f); \
- }
- foreach_vxlan_gbp_gpflags
-#undef _
- return (s);
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gbp/vxlan_gbp_packet.h b/src/vnet/vxlan-gbp/vxlan_gbp_packet.h
deleted file mode 100644
index e655b333b89..00000000000
--- a/src/vnet/vxlan-gbp/vxlan_gbp_packet.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __included_vxlan_gbp_packet_h__
-#define __included_vxlan_gbp_packet_h__ 1
-
-#include <vlib/vlib.h>
-
-/*
- * From draft-smith-vxlan-group-policy-04.txt
- *
- * 0 1 2 3
- * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * |G|R|R|R|I|R|R|R|R|D|E|S|A|R|R|R| Group Policy ID |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- * | VXLAN Network Identifier (VNI) | Reserved |
- * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- *
- * G bit: Bit 0 of the initial word is defined as the G (Group Based
- * Policy Extension) bit.
- *
- * I bit: where the I flag MUST be set to 1 for a valid
- * VXLAN Network ID (VNI).
- *
- * D bit: Bit 9 of the initial word is defined as the Don't Learn bit.
- * When set, this bit indicates that the egress VTEP MUST NOT learn the
- * source address of the encapsulated frame.
- *
- * E bit: Bit 10 of the initial word is defined as the bounce packet.
- * When set, this bit indicates that packet is bounced and must be
- * dropped.
- *
- * S bit: Bit 11 of the initial word is defined as the source policy
- * applied bit.
- *
- * A bit: Bit 12 of the initial word is defined as the A (Policy
- * Applied) bit. This bit is only defined as the A bit when the G bit
- * is set to 1.
- *
- * A = 1 indicates that the group policy has already been applied to
- * this packet. Policies MUST NOT be applied by devices when the A
- * bit is set.
- *
- * A = 0 indicates that the group policy has not been applied to this
- * packet. Group policies MUST be applied by devices when the A bit
- * is set to 0 and the destination Group has been determined.
- * Devices that apply the Group policy MUST set the A bit to 1 after
- * the policy has been applied.
- *
- * Group Policy ID: 16 bit identifier that indicates the source TSI
- * Group membership being encapsulated by VXLAN. Its value is source
- * class id.
- *
- * FOR INTERNAL USE ONLY
- * R bit: Bit 12 of the initial word is defined as the reflection bit
- * Set on packet rx checked on tx and dropped if set. this prevents
- * packets recieved on an iVXLAN tunnel being reflected back to
- * another.
- */
-
-typedef struct
-{
- union
- {
- struct
- {
- union
- {
- struct
- {
- u8 flag_g_i;
- u8 gpflags;
- };
- u16 flags;
- };
- u16 sclass;
- };
- u32 flags_sclass_as_u32;
- };
- u32 vni_reserved;
-} vxlan_gbp_header_t;
-
-#define foreach_vxlan_gbp_flags \
- _ (0x80, G) \
- _ (0x08, I)
-
-typedef enum
-{
- VXLAN_GBP_FLAGS_NONE = 0,
-#define _(n,f) VXLAN_GBP_FLAGS_##f = n,
- foreach_vxlan_gbp_flags
-#undef _
-} __attribute__ ((packed)) vxlan_gbp_flags_t;
-
-#define VXLAN_GBP_FLAGS_GI (VXLAN_GBP_FLAGS_G|VXLAN_GBP_FLAGS_I)
-
-#define foreach_vxlan_gbp_gpflags \
-_ (0x40, D) \
-_ (0x20, E) \
-_ (0x10, S) \
-_ (0x08, A) \
-_ (0x04, R)
-
-typedef enum
-{
- VXLAN_GBP_GPFLAGS_NONE = 0,
-#define _(n,f) VXLAN_GBP_GPFLAGS_##f = n,
- foreach_vxlan_gbp_gpflags
-#undef _
-} __attribute__ ((packed)) vxlan_gbp_gpflags_t;
-
-static inline u32
-vxlan_gbp_get_vni (vxlan_gbp_header_t * h)
-{
- u32 vni_reserved_host_byte_order;
-
- vni_reserved_host_byte_order = clib_net_to_host_u32 (h->vni_reserved);
- return vni_reserved_host_byte_order >> 8;
-}
-
-static inline u16
-vxlan_gbp_get_sclass (vxlan_gbp_header_t * h)
-{
- u16 sclass_host_byte_order;
-
- sclass_host_byte_order = clib_net_to_host_u16 (h->sclass);
- return sclass_host_byte_order;
-}
-
-static inline vxlan_gbp_gpflags_t
-vxlan_gbp_get_gpflags (vxlan_gbp_header_t * h)
-{
- return h->gpflags;
-}
-
-static inline vxlan_gbp_flags_t
-vxlan_gbp_get_flags (vxlan_gbp_header_t * h)
-{
- return h->flag_g_i;
-}
-
-static inline void
-vxlan_gbp_set_header (vxlan_gbp_header_t * h, u32 vni)
-{
- h->vni_reserved = clib_host_to_net_u32 (vni << 8);
- h->flags_sclass_as_u32 = 0;
- h->flag_g_i = VXLAN_GBP_FLAGS_I | VXLAN_GBP_FLAGS_G;
-}
-
-extern u8 *format_vxlan_gbp_header_flags (u8 * s, va_list * args);
-extern u8 *format_vxlan_gbp_header_gpflags (u8 * s, va_list * args);
-
-#endif /* __included_vxlan_gbp_packet_h__ */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vnet/vxlan-gpe/decap.c b/src/vnet/vxlan-gpe/decap.c
index 035e8a3fd6a..d4c7424630d 100644
--- a/src/vnet/vxlan-gpe/decap.c
+++ b/src/vnet/vxlan-gpe/decap.c
@@ -79,10 +79,106 @@ format_vxlan_gpe_with_length (u8 * s, va_list * args)
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-
return s;
}
+typedef struct
+{
+ vxlan4_gpe_tunnel_key_t key;
+ vxlan_gpe_decap_info_t val;
+} vxlan4_gpe_tunnel_cache_t;
+
+static const vxlan_gpe_decap_info_t decap_not_found = {
+ .tunnel_index = ~0,
+ .next_index = VXLAN_GPE_INPUT_NEXT_DROP,
+ .error = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL
+};
+
+always_inline vxlan_gpe_decap_info_t
+vxlan4_gpe_find_tunnel (vxlan_gpe_main_t *nngm,
+ vxlan4_gpe_tunnel_cache_t *cache,
+ ip4_vxlan_gpe_header_t *iuvn4_0)
+{
+ /* Make sure VXLAN GPE tunnel exist according to packet S/D IP, UDP port and
+ * VNI */
+ vxlan4_gpe_tunnel_key_t key4 = {
+ .local = iuvn4_0->ip4.dst_address.as_u32,
+ .remote = iuvn4_0->ip4.src_address.as_u32,
+ .vni = iuvn4_0->vxlan.vni_res,
+ .port = (u32) iuvn4_0->udp.dst_port,
+ };
+
+ if (PREDICT_TRUE (key4.as_u64[0] == cache->key.as_u64[0] &&
+ key4.as_u64[1] == cache->key.as_u64[1]))
+ {
+ /* cache hit */
+ return cache->val;
+ }
+
+ uword *p = hash_get_mem (nngm->vxlan4_gpe_tunnel_by_key, &key4);
+ if (PREDICT_TRUE (p != 0))
+ {
+ u32 next = (iuvn4_0->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX) ?
+ nngm->decap_next_node_list[iuvn4_0->vxlan.protocol] :
+ VXLAN_GPE_INPUT_NEXT_DROP;
+
+ cache->key.as_u64[0] = key4.as_u64[0];
+ cache->key.as_u64[1] = key4.as_u64[1];
+
+ cache->val.error = 0;
+ cache->val.tunnel_index = p[0];
+ cache->val.next_index = next;
+
+ return cache->val;
+ }
+
+ return decap_not_found;
+}
+
+typedef struct
+{
+ vxlan6_gpe_tunnel_key_t key;
+ vxlan_gpe_decap_info_t val;
+} vxlan6_gpe_tunnel_cache_t;
+
+always_inline vxlan_gpe_decap_info_t
+vxlan6_gpe_find_tunnel (vxlan_gpe_main_t *nngm,
+ vxlan6_gpe_tunnel_cache_t *cache,
+ ip6_vxlan_gpe_header_t *iuvn6_0)
+{
+ /* Make sure VXLAN GPE tunnel exist according to packet S/D IP, UDP port and
+ * VNI */
+ vxlan6_gpe_tunnel_key_t key6;
+
+ ip6_address_copy (&key6.local, &iuvn6_0->ip6.dst_address);
+ ip6_address_copy (&key6.remote, &iuvn6_0->ip6.src_address);
+ key6.vni = iuvn6_0->vxlan.vni_res;
+ key6.port = iuvn6_0->udp.dst_port;
+
+ if (PREDICT_TRUE (memcmp (&key6, &cache->key, sizeof (cache->key)) == 0))
+ {
+ /* cache hit */
+ return cache->val;
+ }
+
+ uword *p = hash_get_mem (nngm->vxlan6_gpe_tunnel_by_key, &key6);
+ if (PREDICT_TRUE (p != 0))
+ {
+ u32 next = (iuvn6_0->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX) ?
+ nngm->decap_next_node_list[iuvn6_0->vxlan.protocol] :
+ VXLAN_GPE_INPUT_NEXT_DROP;
+
+ clib_memcpy_fast (&cache->key, &key6, sizeof (key6));
+ cache->val.error = 0;
+ cache->val.tunnel_index = p[0];
+ cache->val.next_index = next;
+
+ return cache->val;
+ }
+
+ return decap_not_found;
+}
+
/**
* @brief Common processing for IPv4 and IPv6 VXLAN GPE decap dispatch functions
*
@@ -111,17 +207,16 @@ vxlan_gpe_input (vlib_main_t * vm,
vxlan_gpe_main_t *nngm = &vxlan_gpe_main;
vnet_main_t *vnm = nngm->vnet_main;
vnet_interface_main_t *im = &vnm->interface_main;
- u32 last_tunnel_index = ~0;
- vxlan4_gpe_tunnel_key_t last_key4;
- vxlan6_gpe_tunnel_key_t last_key6;
+ vxlan4_gpe_tunnel_cache_t last4;
+ vxlan6_gpe_tunnel_cache_t last6;
u32 pkts_decapsulated = 0;
u32 thread_index = vm->thread_index;
u32 stats_sw_if_index, stats_n_packets, stats_n_bytes;
if (is_ip4)
- clib_memset (&last_key4, 0xff, sizeof (last_key4));
+ clib_memset (&last4, 0xff, sizeof (last4));
else
- clib_memset (&last_key6, 0xff, sizeof (last_key6));
+ clib_memset (&last6, 0xff, sizeof (last6));
from = vlib_frame_vector_args (from_frame);
n_left_from = from_frame->n_vectors;
@@ -143,11 +238,8 @@ vxlan_gpe_input (vlib_main_t * vm,
u32 next0, next1;
ip4_vxlan_gpe_header_t *iuvn4_0, *iuvn4_1;
ip6_vxlan_gpe_header_t *iuvn6_0, *iuvn6_1;
- uword *p0, *p1;
- u32 tunnel_index0, tunnel_index1;
+ vxlan_gpe_decap_info_t di0, di1;
vxlan_gpe_tunnel_t *t0, *t1;
- vxlan4_gpe_tunnel_key_t key4_0, key4_1;
- vxlan6_gpe_tunnel_key_t key6_0, key6_1;
u32 error0, error1;
u32 sw_if_index0, sw_if_index1, len0, len1;
@@ -193,6 +285,9 @@ vxlan_gpe_input (vlib_main_t * vm,
/* pop (ip, udp, vxlan) */
vlib_buffer_advance (b0, sizeof (*iuvn4_0));
vlib_buffer_advance (b1, sizeof (*iuvn4_1));
+
+ di0 = vxlan4_gpe_find_tunnel (nngm, &last4, iuvn4_0);
+ di1 = vxlan4_gpe_find_tunnel (nngm, &last4, iuvn4_1);
}
else
{
@@ -210,125 +305,20 @@ vxlan_gpe_input (vlib_main_t * vm,
/* pop (ip, udp, vxlan) */
vlib_buffer_advance (b0, sizeof (*iuvn6_0));
vlib_buffer_advance (b1, sizeof (*iuvn6_1));
- }
- tunnel_index0 = ~0;
- tunnel_index1 = ~0;
- error0 = 0;
- error1 = 0;
-
- if (is_ip4)
- {
- next0 =
- (iuvn4_0->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX) ?
- nngm->decap_next_node_list[iuvn4_0->vxlan.protocol] :
- VXLAN_GPE_INPUT_NEXT_DROP;
- next1 =
- (iuvn4_1->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX) ?
- nngm->decap_next_node_list[iuvn4_1->vxlan.protocol] :
- VXLAN_GPE_INPUT_NEXT_DROP;
-
- key4_0.local = iuvn4_0->ip4.dst_address.as_u32;
- key4_1.local = iuvn4_1->ip4.dst_address.as_u32;
-
- key4_0.remote = iuvn4_0->ip4.src_address.as_u32;
- key4_1.remote = iuvn4_1->ip4.src_address.as_u32;
-
- key4_0.vni = iuvn4_0->vxlan.vni_res;
- key4_1.vni = iuvn4_1->vxlan.vni_res;
-
- key4_0.pad = 0;
- key4_1.pad = 0;
+ di0 = vxlan6_gpe_find_tunnel (nngm, &last6, iuvn6_0);
+ di1 = vxlan6_gpe_find_tunnel (nngm, &last6, iuvn6_1);
}
- else /* is_ip6 */
- {
- next0 = (iuvn6_0->vxlan.protocol < node->n_next_nodes) ?
- iuvn6_0->vxlan.protocol : VXLAN_GPE_INPUT_NEXT_DROP;
- next1 = (iuvn6_1->vxlan.protocol < node->n_next_nodes) ?
- iuvn6_1->vxlan.protocol : VXLAN_GPE_INPUT_NEXT_DROP;
-
- key6_0.local.as_u64[0] = iuvn6_0->ip6.dst_address.as_u64[0];
- key6_0.local.as_u64[1] = iuvn6_0->ip6.dst_address.as_u64[1];
- key6_1.local.as_u64[0] = iuvn6_1->ip6.dst_address.as_u64[0];
- key6_1.local.as_u64[1] = iuvn6_1->ip6.dst_address.as_u64[1];
-
- key6_0.remote.as_u64[0] = iuvn6_0->ip6.src_address.as_u64[0];
- key6_0.remote.as_u64[1] = iuvn6_0->ip6.src_address.as_u64[1];
- key6_1.remote.as_u64[0] = iuvn6_1->ip6.src_address.as_u64[0];
- key6_1.remote.as_u64[1] = iuvn6_1->ip6.src_address.as_u64[1];
-
- key6_0.vni = iuvn6_0->vxlan.vni_res;
- key6_1.vni = iuvn6_1->vxlan.vni_res;
- }
-
- /* Processing packet 0 */
- if (is_ip4)
- {
- /* Processing for key4_0 */
- if (PREDICT_FALSE ((key4_0.as_u64[0] != last_key4.as_u64[0])
- || (key4_0.as_u64[1] !=
- last_key4.as_u64[1])))
- {
- p0 = hash_get_mem (nngm->vxlan4_gpe_tunnel_by_key, &key4_0);
-
- if (p0 == 0)
- {
- error0 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
- goto trace0;
- }
- last_key4.as_u64[0] = key4_0.as_u64[0];
- last_key4.as_u64[1] = key4_0.as_u64[1];
- tunnel_index0 = last_tunnel_index = p0[0];
- }
- else
- tunnel_index0 = last_tunnel_index;
- }
- else /* is_ip6 */
+ /* Process packet 0 */
+ next0 = di0.next_index;
+ error0 = di0.error;
+ if (error0 != 0)
{
- next0 =
- (iuvn6_0->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX) ?
- nngm->decap_next_node_list[iuvn6_0->vxlan.protocol] :
- VXLAN_GPE_INPUT_NEXT_DROP;
- next1 =
- (iuvn6_1->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX) ?
- nngm->decap_next_node_list[iuvn6_1->vxlan.protocol] :
- VXLAN_GPE_INPUT_NEXT_DROP;
-
- key6_0.local.as_u64[0] = iuvn6_0->ip6.dst_address.as_u64[0];
- key6_0.local.as_u64[1] = iuvn6_0->ip6.dst_address.as_u64[1];
- key6_1.local.as_u64[0] = iuvn6_1->ip6.dst_address.as_u64[0];
- key6_1.local.as_u64[1] = iuvn6_1->ip6.dst_address.as_u64[1];
-
- key6_0.remote.as_u64[0] = iuvn6_0->ip6.src_address.as_u64[0];
- key6_0.remote.as_u64[1] = iuvn6_0->ip6.src_address.as_u64[1];
- key6_1.remote.as_u64[0] = iuvn6_1->ip6.src_address.as_u64[0];
- key6_1.remote.as_u64[1] = iuvn6_1->ip6.src_address.as_u64[1];
-
- key6_0.vni = iuvn6_0->vxlan.vni_res;
- key6_1.vni = iuvn6_1->vxlan.vni_res;
-
- /* Processing for key6_0 */
- if (PREDICT_FALSE
- (memcmp (&key6_0, &last_key6, sizeof (last_key6)) != 0))
- {
- p0 = hash_get_mem (nngm->vxlan6_gpe_tunnel_by_key, &key6_0);
-
- if (p0 == 0)
- {
- error0 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
- goto trace0;
- }
-
- memcpy (&last_key6, &key6_0, sizeof (key6_0));
- tunnel_index0 = last_tunnel_index = p0[0];
- }
- else
- tunnel_index0 = last_tunnel_index;
+ goto trace0;
}
- t0 = pool_elt_at_index (nngm->tunnels, tunnel_index0);
-
+ t0 = pool_elt_at_index (nngm->tunnels, di0.tunnel_index);
sw_if_index0 = t0->sw_if_index;
len0 = vlib_buffer_length_in_chain (vm, b0);
@@ -372,54 +362,18 @@ vxlan_gpe_input (vlib_main_t * vm,
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->next_index = next0;
tr->error = error0;
- tr->tunnel_index = tunnel_index0;
+ tr->tunnel_index = di0.tunnel_index;
}
/* Process packet 1 */
- if (is_ip4)
+ next1 = di1.next_index;
+ error1 = di1.error;
+ if (error1 != 0)
{
- /* Processing for key4_1 */
- if (PREDICT_FALSE ((key4_1.as_u64[0] != last_key4.as_u64[0])
- || (key4_1.as_u64[1] !=
- last_key4.as_u64[1])))
- {
- p1 = hash_get_mem (nngm->vxlan4_gpe_tunnel_by_key, &key4_1);
-
- if (p1 == 0)
- {
- error1 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
- goto trace1;
- }
-
- last_key4.as_u64[0] = key4_1.as_u64[0];
- last_key4.as_u64[1] = key4_1.as_u64[1];
- tunnel_index1 = last_tunnel_index = p1[0];
- }
- else
- tunnel_index1 = last_tunnel_index;
+ goto trace1;
}
- else /* is_ip6 */
- {
- /* Processing for key6_1 */
- if (PREDICT_FALSE
- (memcmp (&key6_1, &last_key6, sizeof (last_key6)) != 0))
- {
- p1 = hash_get_mem (nngm->vxlan6_gpe_tunnel_by_key, &key6_1);
- if (p1 == 0)
- {
- error1 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
- goto trace1;
- }
-
- memcpy (&last_key6, &key6_1, sizeof (key6_1));
- tunnel_index1 = last_tunnel_index = p1[0];
- }
- else
- tunnel_index1 = last_tunnel_index;
- }
-
- t1 = pool_elt_at_index (nngm->tunnels, tunnel_index1);
+ t1 = pool_elt_at_index (nngm->tunnels, di1.tunnel_index);
sw_if_index1 = t1->sw_if_index;
len1 = vlib_buffer_length_in_chain (vm, b1);
@@ -466,7 +420,7 @@ vxlan_gpe_input (vlib_main_t * vm,
vlib_add_trace (vm, node, b1, sizeof (*tr));
tr->next_index = next1;
tr->error = error1;
- tr->tunnel_index = tunnel_index1;
+ tr->tunnel_index = di1.tunnel_index;
}
vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next,
@@ -481,11 +435,8 @@ vxlan_gpe_input (vlib_main_t * vm,
u32 next0;
ip4_vxlan_gpe_header_t *iuvn4_0;
ip6_vxlan_gpe_header_t *iuvn6_0;
- uword *p0;
- u32 tunnel_index0;
+ vxlan_gpe_decap_info_t di0;
vxlan_gpe_tunnel_t *t0;
- vxlan4_gpe_tunnel_key_t key4_0;
- vxlan6_gpe_tunnel_key_t key6_0;
u32 error0;
u32 sw_if_index0, len0;
@@ -509,6 +460,8 @@ vxlan_gpe_input (vlib_main_t * vm,
/* pop (ip, udp, vxlan) */
vlib_buffer_advance (b0, sizeof (*iuvn4_0));
+
+ di0 = vxlan4_gpe_find_tunnel (nngm, &last4, iuvn4_0);
}
else
{
@@ -521,77 +474,18 @@ vxlan_gpe_input (vlib_main_t * vm,
/* pop (ip, udp, vxlan) */
vlib_buffer_advance (b0, sizeof (*iuvn6_0));
- }
-
- tunnel_index0 = ~0;
- error0 = 0;
- if (is_ip4)
- {
- next0 =
- (iuvn4_0->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX) ?
- nngm->decap_next_node_list[iuvn4_0->vxlan.protocol] :
- VXLAN_GPE_INPUT_NEXT_DROP;
-
- key4_0.local = iuvn4_0->ip4.dst_address.as_u32;
- key4_0.remote = iuvn4_0->ip4.src_address.as_u32;
- key4_0.vni = iuvn4_0->vxlan.vni_res;
- key4_0.pad = 0;
-
- /* Processing for key4_0 */
- if (PREDICT_FALSE ((key4_0.as_u64[0] != last_key4.as_u64[0])
- || (key4_0.as_u64[1] !=
- last_key4.as_u64[1])))
- {
- p0 = hash_get_mem (nngm->vxlan4_gpe_tunnel_by_key, &key4_0);
-
- if (p0 == 0)
- {
- error0 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
- goto trace00;
- }
-
- last_key4.as_u64[0] = key4_0.as_u64[0];
- last_key4.as_u64[1] = key4_0.as_u64[1];
- tunnel_index0 = last_tunnel_index = p0[0];
+ di0 = vxlan6_gpe_find_tunnel (nngm, &last6, iuvn6_0);
}
- else
- tunnel_index0 = last_tunnel_index;
- }
- else /* is_ip6 */
- {
- next0 =
- (iuvn6_0->vxlan.protocol < VXLAN_GPE_PROTOCOL_MAX) ?
- nngm->decap_next_node_list[iuvn6_0->vxlan.protocol] :
- VXLAN_GPE_INPUT_NEXT_DROP;
-
- key6_0.local.as_u64[0] = iuvn6_0->ip6.dst_address.as_u64[0];
- key6_0.local.as_u64[1] = iuvn6_0->ip6.dst_address.as_u64[1];
- key6_0.remote.as_u64[0] = iuvn6_0->ip6.src_address.as_u64[0];
- key6_0.remote.as_u64[1] = iuvn6_0->ip6.src_address.as_u64[1];
- key6_0.vni = iuvn6_0->vxlan.vni_res;
-
- /* Processing for key6_0 */
- if (PREDICT_FALSE
- (memcmp (&key6_0, &last_key6, sizeof (last_key6)) != 0))
- {
- p0 = hash_get_mem (nngm->vxlan6_gpe_tunnel_by_key, &key6_0);
-
- if (p0 == 0)
- {
- error0 = VXLAN_GPE_ERROR_NO_SUCH_TUNNEL;
- goto trace00;
- }
- memcpy (&last_key6, &key6_0, sizeof (key6_0));
- tunnel_index0 = last_tunnel_index = p0[0];
- }
- else
- tunnel_index0 = last_tunnel_index;
+ next0 = di0.next_index;
+ error0 = di0.error;
+ if (error0 != 0)
+ {
+ goto trace00;
}
- t0 = pool_elt_at_index (nngm->tunnels, tunnel_index0);
-
+ t0 = pool_elt_at_index (nngm->tunnels, di0.tunnel_index);
sw_if_index0 = t0->sw_if_index;
len0 = vlib_buffer_length_in_chain (vm, b0);
@@ -637,7 +531,7 @@ vxlan_gpe_input (vlib_main_t * vm,
vlib_add_trace (vm, node, b0, sizeof (*tr));
tr->next_index = next0;
tr->error = error0;
- tr->tunnel_index = tunnel_index0;
+ tr->tunnel_index = di0.tunnel_index;
}
vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
n_left_to_next, bi0, next0);
@@ -728,7 +622,6 @@ static char *vxlan_gpe_error_strings[] = {
#undef _
};
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan4_gpe_input_node) = {
.name = "vxlan4-gpe-input",
/* Takes a vector of packets. */
@@ -748,9 +641,7 @@ VLIB_REGISTER_NODE (vxlan4_gpe_input_node) = {
.format_trace = format_vxlan_gpe_rx_trace,
// $$$$ .unformat_buffer = unformat_vxlan_gpe_header,
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan6_gpe_input_node) = {
.name = "vxlan6-gpe-input",
/* Takes a vector of packets. */
@@ -770,7 +661,6 @@ VLIB_REGISTER_NODE (vxlan6_gpe_input_node) = {
.format_trace = format_vxlan_gpe_rx_trace,
// $$$$ .unformat_buffer = unformat_vxlan_gpe_header,
};
-/* *INDENT-ON* */
typedef enum
{
@@ -794,6 +684,9 @@ ip_vxlan_gpe_bypass_inline (vlib_main_t * vm,
matching a local VTEP address */
vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ vxlan4_gpe_tunnel_cache_t last4;
+ vxlan6_gpe_tunnel_cache_t last6;
+
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
@@ -804,9 +697,15 @@ ip_vxlan_gpe_bypass_inline (vlib_main_t * vm,
ip4_forward_next_trace (vm, node, frame, VLIB_TX);
if (is_ip4)
- vtep4_key_init (&last_vtep4);
+ {
+ vtep4_key_init (&last_vtep4);
+ clib_memset (&last4, 0xff, sizeof last4);
+ }
else
- vtep6_key_init (&last_vtep6);
+ {
+ vtep6_key_init (&last_vtep6);
+ clib_memset (&last6, 0xff, sizeof last6);
+ }
while (n_left_from > 0)
{
@@ -818,6 +717,9 @@ ip_vxlan_gpe_bypass_inline (vlib_main_t * vm,
ip4_header_t *ip40, *ip41;
ip6_header_t *ip60, *ip61;
udp_header_t *udp0, *udp1;
+ ip4_vxlan_gpe_header_t *iuvn4_0, *iuvn4_1;
+ ip6_vxlan_gpe_header_t *iuvn6_0, *iuvn6_1;
+ vxlan_gpe_decap_info_t di0, di1;
u32 bi0, ip_len0, udp_len0, flags0, next0;
u32 bi1, ip_len1, udp_len1, flags1, next1;
i32 len_diff0, len_diff1;
@@ -874,12 +776,20 @@ ip_vxlan_gpe_bypass_inline (vlib_main_t * vm,
goto exit0; /* not UDP packet */
if (is_ip4)
- udp0 = ip4_next_header (ip40);
+ {
+ udp0 = ip4_next_header (ip40);
+ iuvn4_0 = vlib_buffer_get_current (b0);
+ di0 = vxlan4_gpe_find_tunnel (ngm, &last4, iuvn4_0);
+ }
else
- udp0 = ip6_next_header (ip60);
+ {
+ udp0 = ip6_next_header (ip60);
+ iuvn6_0 = vlib_buffer_get_current (b0);
+ di0 = vxlan6_gpe_find_tunnel (ngm, &last6, iuvn6_0);
+ }
- if (udp0->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE))
- goto exit0; /* not VXLAN packet */
+ if (PREDICT_FALSE (di0.tunnel_index == ~0))
+ goto exit0; /* unknown interface */
/* Validate DIP against VTEPs */
if (is_ip4)
@@ -957,12 +867,20 @@ ip_vxlan_gpe_bypass_inline (vlib_main_t * vm,
goto exit1; /* not UDP packet */
if (is_ip4)
- udp1 = ip4_next_header (ip41);
+ {
+ udp1 = ip4_next_header (ip41);
+ iuvn4_1 = vlib_buffer_get_current (b1);
+ di1 = vxlan4_gpe_find_tunnel (ngm, &last4, iuvn4_1);
+ }
else
- udp1 = ip6_next_header (ip61);
+ {
+ udp1 = ip6_next_header (ip61);
+ iuvn6_1 = vlib_buffer_get_current (b1);
+ di1 = vxlan6_gpe_find_tunnel (ngm, &last6, iuvn6_1);
+ }
- if (udp1->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE))
- goto exit1; /* not VXLAN packet */
+ if (PREDICT_FALSE (di1.tunnel_index == ~0))
+ goto exit1; /* unknown interface */
/* Validate DIP against VTEPs */
if (is_ip4)
@@ -1046,6 +964,9 @@ ip_vxlan_gpe_bypass_inline (vlib_main_t * vm,
ip4_header_t *ip40;
ip6_header_t *ip60;
udp_header_t *udp0;
+ ip4_vxlan_gpe_header_t *iuvn4_0;
+ ip6_vxlan_gpe_header_t *iuvn6_0;
+ vxlan_gpe_decap_info_t di0;
u32 bi0, ip_len0, udp_len0, flags0, next0;
i32 len_diff0;
u8 error0, good_udp0, proto0;
@@ -1075,12 +996,20 @@ ip_vxlan_gpe_bypass_inline (vlib_main_t * vm,
goto exit; /* not UDP packet */
if (is_ip4)
- udp0 = ip4_next_header (ip40);
+ {
+ udp0 = ip4_next_header (ip40);
+ iuvn4_0 = vlib_buffer_get_current (b0);
+ di0 = vxlan4_gpe_find_tunnel (ngm, &last4, iuvn4_0);
+ }
else
- udp0 = ip6_next_header (ip60);
+ {
+ udp0 = ip6_next_header (ip60);
+ iuvn6_0 = vlib_buffer_get_current (b0);
+ di0 = vxlan6_gpe_find_tunnel (ngm, &last6, iuvn6_0);
+ }
- if (udp0->dst_port != clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE))
- goto exit; /* not VXLAN packet */
+ if (PREDICT_FALSE (di0.tunnel_index == ~0))
+ goto exit; /* unknown interface */
/* Validate DIP against VTEPs */
@@ -1172,7 +1101,6 @@ VLIB_NODE_FN (ip4_vxlan_gpe_bypass_node) (vlib_main_t * vm,
return ip_vxlan_gpe_bypass_inline (vm, node, frame, /* is_ip4 */ 1);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip4_vxlan_gpe_bypass_node) = {
.name = "ip4-vxlan-gpe-bypass",
.vector_size = sizeof (u32),
@@ -1186,7 +1114,6 @@ VLIB_REGISTER_NODE (ip4_vxlan_gpe_bypass_node) = {
.format_buffer = format_ip4_header,
.format_trace = format_ip4_forward_next_trace,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
/* Dummy init function to get us linked in. */
@@ -1206,7 +1133,6 @@ VLIB_NODE_FN (ip6_vxlan_gpe_bypass_node) (vlib_main_t * vm,
return ip_vxlan_gpe_bypass_inline (vm, node, frame, /* is_ip4 */ 0);
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (ip6_vxlan_gpe_bypass_node) = {
.name = "ip6-vxlan-gpe-bypass",
.vector_size = sizeof (u32),
@@ -1220,7 +1146,6 @@ VLIB_REGISTER_NODE (ip6_vxlan_gpe_bypass_node) = {
.format_buffer = format_ip6_header,
.format_trace = format_ip6_forward_next_trace,
};
-/* *INDENT-ON* */
#ifndef CLIB_MARCH_VARIANT
/* Dummy init function to get us linked in. */
diff --git a/src/vnet/vxlan-gpe/encap.c b/src/vnet/vxlan-gpe/encap.c
index daa0381c4bb..a769861577d 100644
--- a/src/vnet/vxlan-gpe/encap.c
+++ b/src/vnet/vxlan-gpe/encap.c
@@ -88,13 +88,15 @@ format_vxlan_gpe_encap_trace (u8 * s, va_list * args)
*
*/
always_inline void
-vxlan_gpe_encap_one_inline (vxlan_gpe_main_t * ngm, vlib_buffer_t * b0,
- vxlan_gpe_tunnel_t * t0, u32 * next0, u8 is_v4)
+vxlan_gpe_encap_one_inline (vxlan_gpe_main_t *ngm, vlib_buffer_t *b0,
+ vxlan_gpe_tunnel_t *t0, u32 *next0,
+ ip_address_family_t af)
{
ASSERT (sizeof (ip4_vxlan_gpe_header_t) == 36);
ASSERT (sizeof (ip6_vxlan_gpe_header_t) == 56);
- ip_udp_encap_one (ngm->vlib_main, b0, t0->rewrite, t0->rewrite_size, is_v4);
+ ip_udp_encap_one (ngm->vlib_main, b0, t0->rewrite, t0->rewrite_size, af,
+ N_AF, UDP_ENCAP_FIXUP_NONE);
next0[0] = t0->encap_next_node;
}
@@ -112,16 +114,18 @@ vxlan_gpe_encap_one_inline (vxlan_gpe_main_t * ngm, vlib_buffer_t * b0,
*
*/
always_inline void
-vxlan_gpe_encap_two_inline (vxlan_gpe_main_t * ngm, vlib_buffer_t * b0,
- vlib_buffer_t * b1, vxlan_gpe_tunnel_t * t0,
- vxlan_gpe_tunnel_t * t1, u32 * next0,
- u32 * next1, u8 is_v4)
+vxlan_gpe_encap_two_inline (vxlan_gpe_main_t *ngm, vlib_buffer_t *b0,
+ vlib_buffer_t *b1, vxlan_gpe_tunnel_t *t0,
+ vxlan_gpe_tunnel_t *t1, u32 *next0, u32 *next1,
+ ip_address_family_t af)
{
ASSERT (sizeof (ip4_vxlan_gpe_header_t) == 36);
ASSERT (sizeof (ip6_vxlan_gpe_header_t) == 56);
- ip_udp_encap_one (ngm->vlib_main, b0, t0->rewrite, t0->rewrite_size, is_v4);
- ip_udp_encap_one (ngm->vlib_main, b1, t1->rewrite, t1->rewrite_size, is_v4);
+ ip_udp_encap_one (ngm->vlib_main, b0, t0->rewrite, t0->rewrite_size, af,
+ N_AF, UDP_ENCAP_FIXUP_NONE);
+ ip_udp_encap_one (ngm->vlib_main, b1, t1->rewrite, t1->rewrite_size, af,
+ N_AF, UDP_ENCAP_FIXUP_NONE);
next0[0] = next1[0] = t0->encap_next_node;
}
@@ -170,7 +174,7 @@ vxlan_gpe_encap (vlib_main_t * vm,
u32 sw_if_index0 = ~0, sw_if_index1 = ~0, len0, len1;
vnet_hw_interface_t *hi0, *hi1;
vxlan_gpe_tunnel_t *t0 = NULL, *t1 = NULL;
- u8 is_ip4_0 = 0, is_ip4_1 = 0;
+ ip_address_family_t af_0 = AF_IP4, af_1 = AF_IP4;
vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
@@ -201,7 +205,7 @@ vxlan_gpe_encap (vlib_main_t * vm,
n_left_to_next -= 2;
n_left_from -= 2;
- /* get the flag "is_ip4" */
+ /* get "af_0" */
if (sw_if_index0 != vnet_buffer (b[0])->sw_if_index[VLIB_TX])
{
sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
@@ -210,10 +214,10 @@ vxlan_gpe_encap (vlib_main_t * vm,
vnet_buffer (b[0])->sw_if_index
[VLIB_TX]);
t0 = pool_elt_at_index (ngm->tunnels, hi0->dev_instance);
- is_ip4_0 = (t0->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
+ af_0 = (t0->flags & VXLAN_GPE_TUNNEL_IS_IPV4 ? AF_IP4 : AF_IP6);
}
- /* get the flag "is_ip4" */
+ /* get "af_1" */
if (sw_if_index1 != vnet_buffer (b[1])->sw_if_index[VLIB_TX])
{
if (sw_if_index0 == vnet_buffer (b[1])->sw_if_index[VLIB_TX])
@@ -221,7 +225,7 @@ vxlan_gpe_encap (vlib_main_t * vm,
sw_if_index1 = sw_if_index0;
hi1 = hi0;
t1 = t0;
- is_ip4_1 = is_ip4_0;
+ af_1 = af_0;
}
else
{
@@ -231,19 +235,20 @@ vxlan_gpe_encap (vlib_main_t * vm,
vnet_buffer (b[1])->sw_if_index
[VLIB_TX]);
t1 = pool_elt_at_index (ngm->tunnels, hi1->dev_instance);
- is_ip4_1 = (t1->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
+ af_1 =
+ (t1->flags & VXLAN_GPE_TUNNEL_IS_IPV4 ? AF_IP4 : AF_IP6);
}
}
- if (PREDICT_TRUE (is_ip4_0 == is_ip4_1))
+ if (PREDICT_TRUE (af_0 == af_1))
{
vxlan_gpe_encap_two_inline (ngm, b[0], b[1], t0, t1, &next0,
- &next1, is_ip4_0);
+ &next1, af_0);
}
else
{
- vxlan_gpe_encap_one_inline (ngm, b[0], t0, &next0, is_ip4_0);
- vxlan_gpe_encap_one_inline (ngm, b[1], t1, &next1, is_ip4_1);
+ vxlan_gpe_encap_one_inline (ngm, b[0], t0, &next0, af_0);
+ vxlan_gpe_encap_one_inline (ngm, b[1], t1, &next1, af_1);
}
/* Reset to look up tunnel partner in the configured FIB */
@@ -325,7 +330,7 @@ vxlan_gpe_encap (vlib_main_t * vm,
n_left_from -= 1;
n_left_to_next -= 1;
- /* get the flag "is_ip4" */
+ /* get "af_0" */
if (sw_if_index0 != vnet_buffer (b[0])->sw_if_index[VLIB_TX])
{
sw_if_index0 = vnet_buffer (b[0])->sw_if_index[VLIB_TX];
@@ -336,10 +341,10 @@ vxlan_gpe_encap (vlib_main_t * vm,
t0 = pool_elt_at_index (ngm->tunnels, hi0->dev_instance);
- is_ip4_0 = (t0->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
+ af_0 = (t0->flags & VXLAN_GPE_TUNNEL_IS_IPV4 ? AF_IP4 : AF_IP6);
}
- vxlan_gpe_encap_one_inline (ngm, b[0], t0, &next0, is_ip4_0);
+ vxlan_gpe_encap_one_inline (ngm, b[0], t0, &next0, af_0);
/* Reset to look up tunnel partner in the configured FIB */
vnet_buffer (b[0])->sw_if_index[VLIB_TX] = t0->encap_fib_index;
@@ -399,7 +404,6 @@ vxlan_gpe_encap (vlib_main_t * vm,
return from_frame->n_vectors;
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (vxlan_gpe_encap_node) = {
.function = vxlan_gpe_encap,
.name = "vxlan-gpe-encap",
@@ -418,7 +422,6 @@ VLIB_REGISTER_NODE (vxlan_gpe_encap_node) = {
[VXLAN_GPE_ENCAP_NEXT_DROP] = "error-drop",
},
};
-/* *INDENT-ON* */
/*
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.api b/src/vnet/vxlan-gpe/vxlan_gpe.api
index 35d8c642192..3cbd7ab7f71 100644
--- a/src/vnet/vxlan-gpe/vxlan_gpe.api
+++ b/src/vnet/vxlan-gpe/vxlan_gpe.api
@@ -13,7 +13,7 @@
* limitations under the License.
*/
-option version = "2.0.0";
+option version = "2.1.0";
import "vnet/interface_types.api";
import "vnet/ip/ip_types.api";
@@ -32,12 +32,48 @@ define vxlan_gpe_add_del_tunnel
bool is_add [default=true];
};
+/** \brief Create or delete a VXLAN-GPE tunnel
+ @param client_index - opaque cookie to identify the sender
+ @param context - sender context, to match reply w/ request
+ @param local - Source IP address
+ @param remote - Destination IP address, can be multicast
+ @param local_port - Source UDP port. It is not included in sent packets. Used only for port registration
+ @param remote_port - Destination UDP port
+ @param mcast_sw_if_index - Interface for multicast destination
+ @param encap_vrf_id - Encap route table FIB index
+ @param decap_vrf_id - Decap route table FIB index
+ @param protocol - Encapsulated protocol
+ @param vni - The VXLAN Network Identifier, uint24
+ @param is_add - Use 1 to create the tunnel, 0 to remove it
+*/
+define vxlan_gpe_add_del_tunnel_v2
+{
+ u32 client_index;
+ u32 context;
+ vl_api_address_t local;
+ vl_api_address_t remote;
+ u16 local_port;
+ u16 remote_port;
+ vl_api_interface_index_t mcast_sw_if_index;
+ u32 encap_vrf_id;
+ u32 decap_vrf_id;
+ vl_api_ip_proto_t protocol;
+ u32 vni;
+ bool is_add [default=true];
+};
+
define vxlan_gpe_add_del_tunnel_reply
{
u32 context;
i32 retval;
vl_api_interface_index_t sw_if_index;
};
+define vxlan_gpe_add_del_tunnel_v2_reply
+{
+ u32 context;
+ i32 retval;
+ vl_api_interface_index_t sw_if_index;
+};
define vxlan_gpe_tunnel_dump
{
@@ -45,6 +81,12 @@ define vxlan_gpe_tunnel_dump
u32 context;
vl_api_interface_index_t sw_if_index;
};
+define vxlan_gpe_tunnel_v2_dump
+{
+ u32 client_index;
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+};
define vxlan_gpe_tunnel_details
{
@@ -59,6 +101,21 @@ define vxlan_gpe_tunnel_details
u32 decap_vrf_id;
bool is_ipv6;
};
+define vxlan_gpe_tunnel_v2_details
+{
+ u32 context;
+ vl_api_interface_index_t sw_if_index;
+ vl_api_address_t local;
+ vl_api_address_t remote;
+ u16 local_port;
+ u16 remote_port;
+ u32 vni;
+ vl_api_ip_proto_t protocol;
+ vl_api_interface_index_t mcast_sw_if_index;
+ u32 encap_vrf_id;
+ u32 decap_vrf_id;
+ bool is_ipv6;
+};
/** \brief Interface set vxlan-gpe-bypass request
@param client_index - opaque cookie to identify the sender
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.c b/src/vnet/vxlan-gpe/vxlan_gpe.c
index e5ca4ec769e..5a5262ea9db 100644
--- a/src/vnet/vxlan-gpe/vxlan_gpe.c
+++ b/src/vnet/vxlan-gpe/vxlan_gpe.c
@@ -87,11 +87,12 @@ format_vxlan_gpe_tunnel (u8 * s, va_list * args)
vxlan_gpe_tunnel_t *t = va_arg (*args, vxlan_gpe_tunnel_t *);
vxlan_gpe_main_t *ngm = &vxlan_gpe_main;
- s = format (s, "[%d] lcl %U rmt %U vni %d fib-idx %d sw-if-idx %d ",
- t - ngm->tunnels,
- format_ip46_address, &t->local, IP46_TYPE_ANY,
- format_ip46_address, &t->remote, IP46_TYPE_ANY,
- t->vni, t->encap_fib_index, t->sw_if_index);
+ s = format (s,
+ "[%d] lcl %U rmt %U lcl_port %d rmt_port %d vni %d "
+ "fib-idx %d sw-if-idx %d ",
+ t - ngm->tunnels, format_ip46_address, &t->local, IP46_TYPE_ANY,
+ format_ip46_address, &t->remote, IP46_TYPE_ANY, t->local_port,
+ t->remote_port, t->vni, t->encap_fib_index, t->sw_if_index);
#if 0
/* next_dpo not yet used by vxlan-gpe-encap node */
@@ -143,14 +144,12 @@ vxlan_gpe_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index,
return 0;
}
-/* *INDENT-OFF* */
VNET_DEVICE_CLASS (vxlan_gpe_device_class,static) = {
.name = "VXLAN_GPE",
.format_device_name = format_vxlan_gpe_name,
.format_tx_trace = format_vxlan_gpe_encap_trace,
.admin_up_down_function = vxlan_gpe_interface_admin_up_down,
};
-/* *INDENT-ON* */
/**
@@ -170,13 +169,11 @@ format_vxlan_gpe_header_with_length (u8 * s, va_list * args)
return s;
}
-/* *INDENT-OFF* */
VNET_HW_INTERFACE_CLASS (vxlan_gpe_hw_class) = {
.name = "VXLAN_GPE",
.format_header = format_vxlan_gpe_header_with_length,
.build_rewrite = default_build_rewrite,
};
-/* *INDENT-ON* */
static void
vxlan_gpe_tunnel_restack_dpo (vxlan_gpe_tunnel_t * t)
@@ -248,12 +245,14 @@ const static fib_node_vft_t vxlan_gpe_vft = {
.fnv_back_walk = vxlan_gpe_tunnel_back_walk,
};
-#define foreach_gpe_copy_field \
-_(vni) \
-_(protocol) \
-_(mcast_sw_if_index) \
-_(encap_fib_index) \
-_(decap_fib_index)
+#define foreach_gpe_copy_field \
+ _ (vni) \
+ _ (protocol) \
+ _ (mcast_sw_if_index) \
+ _ (encap_fib_index) \
+ _ (decap_fib_index) \
+ _ (local_port) \
+ _ (remote_port)
#define foreach_copy_ipv4 { \
_(local.ip4.as_u32) \
@@ -304,8 +303,8 @@ vxlan4_gpe_rewrite (vxlan_gpe_tunnel_t * t, u32 extension_size,
ip0->checksum = ip4_header_checksum (ip0);
/* UDP header, randomize src port on something, maybe? */
- h0->udp.src_port = clib_host_to_net_u16 (4790);
- h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE);
+ h0->udp.src_port = clib_host_to_net_u16 (t->local_port);
+ h0->udp.dst_port = clib_host_to_net_u16 (t->remote_port);
/* VXLAN header. Are we having fun yet? */
h0->vxlan.flags = VXLAN_GPE_FLAGS_I | VXLAN_GPE_FLAGS_P;
@@ -363,8 +362,8 @@ vxlan6_gpe_rewrite (vxlan_gpe_tunnel_t * t, u32 extension_size,
ip0->dst_address.as_u64[1] = t->remote.ip6.as_u64[1];
/* UDP header, randomize src port on something, maybe? */
- h0->udp.src_port = clib_host_to_net_u16 (4790);
- h0->udp.dst_port = clib_host_to_net_u16 (UDP_DST_PORT_VXLAN_GPE);
+ h0->udp.src_port = clib_host_to_net_u16 (t->local_port);
+ h0->udp.dst_port = clib_host_to_net_u16 (t->remote_port);
/* VXLAN header. Are we having fun yet? */
h0->vxlan.flags = VXLAN_GPE_FLAGS_I | VXLAN_GPE_FLAGS_P;
@@ -385,7 +384,6 @@ vxlan6_gpe_rewrite (vxlan_gpe_tunnel_t * t, u32 extension_size,
return (0);
}
-/* *INDENT-OFF* */
typedef CLIB_PACKED(union {
struct {
fib_node_index_t mfib_entry_index;
@@ -393,7 +391,6 @@ typedef CLIB_PACKED(union {
};
u64 as_u64;
}) mcast_shared_t;
-/* *INDENT-ON* */
static inline mcast_shared_t
mcast_shared_get (ip46_address_t * ip)
@@ -453,12 +450,19 @@ int vnet_vxlan_gpe_add_del_tunnel
vxlan6_gpe_tunnel_key_t key6, *key6_copy;
u32 is_ip6 = a->is_ip6;
+ /* Set udp-ports */
+ if (a->local_port == 0)
+ a->local_port = is_ip6 ? UDP_DST_PORT_VXLAN6_GPE : UDP_DST_PORT_VXLAN_GPE;
+
+ if (a->remote_port == 0)
+ a->remote_port = is_ip6 ? UDP_DST_PORT_VXLAN6_GPE : UDP_DST_PORT_VXLAN_GPE;
+
if (!is_ip6)
{
key4.local = a->local.ip4.as_u32;
key4.remote = a->remote.ip4.as_u32;
key4.vni = clib_host_to_net_u32 (a->vni << 8);
- key4.pad = 0;
+ key4.port = (u32) clib_host_to_net_u16 (a->local_port);
p = hash_get_mem (ngm->vxlan4_gpe_tunnel_by_key, &key4);
}
@@ -469,6 +473,7 @@ int vnet_vxlan_gpe_add_del_tunnel
key6.remote.as_u64[0] = a->remote.ip6.as_u64[0];
key6.remote.as_u64[1] = a->remote.ip6.as_u64[1];
key6.vni = clib_host_to_net_u32 (a->vni << 8);
+ key6.port = (u32) clib_host_to_net_u16 (a->local_port);
p = hash_get_mem (ngm->vxlan6_gpe_tunnel_by_key, &key6);
}
@@ -485,7 +490,6 @@ int vnet_vxlan_gpe_add_del_tunnel
clib_memset (t, 0, sizeof (*t));
/* copy from arg structure */
-/* *INDENT-OFF* */
#define _(x) t->x = a->x;
foreach_gpe_copy_field;
if (!a->is_ip6)
@@ -493,7 +497,6 @@ int vnet_vxlan_gpe_add_del_tunnel
else
foreach_copy_ipv6
#undef _
-/* *INDENT-ON* */
if (!a->is_ip6)
t->flags |= VXLAN_GPE_TUNNEL_IS_IPV4;
@@ -533,7 +536,7 @@ int vnet_vxlan_gpe_add_del_tunnel
vnet_interface_main_t *im = &vnm->interface_main;
hw_if_index = ngm->free_vxlan_gpe_tunnel_hw_if_indices
[vec_len (ngm->free_vxlan_gpe_tunnel_hw_if_indices) - 1];
- _vec_len (ngm->free_vxlan_gpe_tunnel_hw_if_indices) -= 1;
+ vec_dec_len (ngm->free_vxlan_gpe_tunnel_hw_if_indices, 1);
hi = vnet_get_hw_interface (vnm, hw_if_index);
hi->dev_instance = t - ngm->tunnels;
@@ -583,7 +586,8 @@ int vnet_vxlan_gpe_add_del_tunnel
fib_prefix_t tun_remote_pfx;
vnet_flood_class_t flood_class = VNET_FLOOD_CLASS_TUNNEL_NORMAL;
- fib_prefix_from_ip46_addr (&t->remote, &tun_remote_pfx);
+ fib_protocol_t fp = fib_ip_proto (is_ip6);
+ fib_prefix_from_ip46_addr (fp, &t->remote, &tun_remote_pfx);
if (!ip46_address_is_multicast (&t->remote))
{
/* Unicast tunnel -
@@ -607,8 +611,6 @@ int vnet_vxlan_gpe_add_del_tunnel
* with different VNIs, create the output fib adjacency only if
* it does not already exist
*/
- fib_protocol_t fp = fib_ip_proto (is_ip6);
-
if (vtep_addr_ref (&ngm->vtep_table,
t->encap_fib_index, &t->remote) == 1)
{
@@ -634,17 +636,16 @@ int vnet_vxlan_gpe_add_del_tunnel
* - the forwarding interface is for-us
* - the accepting interface is that from the API
*/
- mfib_table_entry_path_update (t->encap_fib_index,
- &mpfx,
- MFIB_SOURCE_VXLAN_GPE, &path);
+ mfib_table_entry_path_update (t->encap_fib_index, &mpfx,
+ MFIB_SOURCE_VXLAN_GPE,
+ MFIB_ENTRY_FLAG_NONE, &path);
path.frp_sw_if_index = a->mcast_sw_if_index;
path.frp_flags = FIB_ROUTE_PATH_FLAG_NONE;
path.frp_mitf_flags = MFIB_ITF_FLAG_ACCEPT;
- mfei = mfib_table_entry_path_update (t->encap_fib_index,
- &mpfx,
- MFIB_SOURCE_VXLAN_GPE,
- &path);
+ mfei = mfib_table_entry_path_update (
+ t->encap_fib_index, &mpfx, MFIB_SOURCE_VXLAN_GPE,
+ MFIB_ENTRY_FLAG_NONE, &path);
/*
* Create the mcast adjacency to send traffic to the group
@@ -719,12 +720,12 @@ int vnet_vxlan_gpe_add_del_tunnel
if (a->is_add)
{
/* register udp ports */
- if (!is_ip6 && !udp_is_valid_dst_port (UDP_DST_PORT_VXLAN_GPE, 1))
- udp_register_dst_port (ngm->vlib_main, UDP_DST_PORT_VXLAN_GPE,
- vxlan4_gpe_input_node.index, 1 /* is_ip4 */ );
- if (is_ip6 && !udp_is_valid_dst_port (UDP_DST_PORT_VXLAN6_GPE, 0))
- udp_register_dst_port (ngm->vlib_main, UDP_DST_PORT_VXLAN6_GPE,
- vxlan6_gpe_input_node.index, 0 /* is_ip4 */ );
+ if (!is_ip6 && !udp_is_valid_dst_port (a->local_port, 1))
+ udp_register_dst_port (ngm->vlib_main, a->local_port,
+ vxlan4_gpe_input_node.index, 1 /* is_ip4 */);
+ if (is_ip6 && !udp_is_valid_dst_port (a->remote_port, 0))
+ udp_register_dst_port (ngm->vlib_main, a->remote_port,
+ vxlan6_gpe_input_node.index, 0 /* is_ip4 */);
}
return 0;
@@ -749,6 +750,8 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm,
u8 protocol = VXLAN_GPE_PROTOCOL_IP4;
u32 vni;
u8 vni_set = 0;
+ u32 local_port = 0;
+ u32 remote_port = 0;
int rv;
u32 tmp;
vnet_vxlan_gpe_add_del_tunnel_args_t _a, *a = &_a;
@@ -833,6 +836,10 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm,
}
else if (unformat (line_input, "vni %d", &vni))
vni_set = 1;
+ else if (unformat (line_input, "local_port %d", &local_port))
+ ;
+ else if (unformat (line_input, "remote_port %d", &remote_port))
+ ;
else if (unformat (line_input, "next-ip4"))
protocol = VXLAN_GPE_PROTOCOL_IP4;
else if (unformat (line_input, "next-ip6"))
@@ -903,7 +910,6 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm,
a->is_add = is_add;
a->is_ip6 = ipv6_set;
-/* *INDENT-OFF* */
#define _(x) a->x = x;
foreach_gpe_copy_field;
if (ipv4_set)
@@ -911,7 +917,6 @@ vxlan_gpe_add_del_tunnel_command_fn (vlib_main_t * vm,
else
foreach_copy_ipv6
#undef _
-/* *INDENT-ON* */
rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index);
@@ -964,7 +969,6 @@ done:
* Example of how to delete a VXLAN-GPE Tunnel:
* @cliexcmd{create vxlan-gpe tunnel local 10.0.3.1 remote 10.0.3.3 vni 13 del}
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (create_vxlan_gpe_tunnel_command, static) = {
.path = "create vxlan-gpe tunnel",
.short_help =
@@ -974,7 +978,6 @@ VLIB_CLI_COMMAND (create_vxlan_gpe_tunnel_command, static) = {
" [encap-vrf-id <nn>] [decap-vrf-id <nn>] [del]\n",
.function = vxlan_gpe_add_del_tunnel_command_fn,
};
-/* *INDENT-ON* */
/**
* @brief CLI function for showing VXLAN GPE tunnels
@@ -997,12 +1000,10 @@ show_vxlan_gpe_tunnel_command_fn (vlib_main_t * vm,
if (pool_elts (ngm->tunnels) == 0)
vlib_cli_output (vm, "No vxlan-gpe tunnels configured.");
- /* *INDENT-OFF* */
pool_foreach (t, ngm->tunnels)
{
vlib_cli_output (vm, "%U", format_vxlan_gpe_tunnel, t);
}
- /* *INDENT-ON* */
return 0;
}
@@ -1016,12 +1017,10 @@ show_vxlan_gpe_tunnel_command_fn (vlib_main_t * vm,
* [0] local 10.0.3.1 remote 10.0.3.3 vni 13 encap_fib_index 0 sw_if_index 5 decap_next l2
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_vxlan_gpe_tunnel_command, static) = {
.path = "show vxlan-gpe",
.function = show_vxlan_gpe_tunnel_command_fn,
};
-/* *INDENT-ON* */
void
vnet_int_vxlan_gpe_bypass_mode (u32 sw_if_index, u8 is_ip6, u8 is_enable)
@@ -1087,11 +1086,12 @@ set_ip4_vxlan_gpe_bypass (vlib_main_t * vm,
}
/*?
- * This command adds the 'ip4-vxlan-gpe-bypass' graph node for a given interface.
- * By adding the IPv4 vxlan-gpe-bypass graph node to an interface, the node checks
- * for and validate input vxlan_gpe packet and bypass ip4-lookup, ip4-local,
- * ip4-udp-lookup nodes to speedup vxlan_gpe packet forwarding. This node will
- * cause extra overhead to for non-vxlan_gpe packets which is kept at a minimum.
+ * This command adds the 'ip4-vxlan-gpe-bypass' graph node for a given
+ * interface. By adding the IPv4 vxlan-gpe-bypass graph node to an interface,
+ * the node checks for and validate input vxlan_gpe packet and bypass
+ * ip4-lookup, ip4-local, ip4-udp-lookup nodes to speedup vxlan_gpe packet
+ * forwarding. This node will cause extra overhead to for non-vxlan_gpe
+ * packets which is kept at a minimum.
*
* @cliexpar
* @parblock
@@ -1108,10 +1108,10 @@ set_ip4_vxlan_gpe_bypass (vlib_main_t * vm,
*
* Example of graph node after ip4-vxlan-gpe-bypass is enabled:
* @cliexstart{show vlib graph ip4-vxlan-gpe-bypass}
- * Name Next Previous
- * ip4-vxlan-gpe-bypass error-drop [0] ip4-input
- * vxlan4-gpe-input [1] ip4-input-no-checksum
- * ip4-lookup [2]
+ * Name Next Previous
+ * ip4-vxlan-gpe-bypass error-drop [0] ip4-input
+ * vxlan4-gpe-input [1] ip4-input-no-checksum
+ * ip4-lookup [2]
* @cliexend
*
* Example of how to display the feature enabled on an interface:
@@ -1128,13 +1128,11 @@ set_ip4_vxlan_gpe_bypass (vlib_main_t * vm,
* @cliexcmd{set interface ip vxlan-gpe-bypass GigabitEthernet2/0/0 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip_vxlan_gpe_bypass_command, static) = {
.path = "set interface ip vxlan-gpe-bypass",
.function = set_ip4_vxlan_gpe_bypass,
.short_help = "set interface ip vxlan-gpe-bypass <interface> [del]",
};
-/* *INDENT-ON* */
static clib_error_t *
set_ip6_vxlan_gpe_bypass (vlib_main_t * vm,
@@ -1144,11 +1142,12 @@ set_ip6_vxlan_gpe_bypass (vlib_main_t * vm,
}
/*?
- * This command adds the 'ip6-vxlan-gpe-bypass' graph node for a given interface.
- * By adding the IPv6 vxlan-gpe-bypass graph node to an interface, the node checks
- * for and validate input vxlan_gpe packet and bypass ip6-lookup, ip6-local,
- * ip6-udp-lookup nodes to speedup vxlan_gpe packet forwarding. This node will
- * cause extra overhead to for non-vxlan_gpe packets which is kept at a minimum.
+ * This command adds the 'ip6-vxlan-gpe-bypass' graph node for a given
+ * interface. By adding the IPv6 vxlan-gpe-bypass graph node to an interface,
+ * the node checks for and validate input vxlan_gpe packet and bypass
+ * ip6-lookup, ip6-local, ip6-udp-lookup nodes to speedup vxlan_gpe packet
+ * forwarding. This node will cause extra overhead to for non-vxlan_gpe packets
+ * which is kept at a minimum.
*
* @cliexpar
* @parblock
@@ -1165,10 +1164,10 @@ set_ip6_vxlan_gpe_bypass (vlib_main_t * vm,
*
* Example of graph node after ip6-vxlan-gpe-bypass is enabled:
* @cliexstart{show vlib graph ip6-vxlan-gpe-bypass}
- * Name Next Previous
- * ip6-vxlan-gpe-bypass error-drop [0] ip6-input
- * vxlan6-gpe-input [1] ip4-input-no-checksum
- * ip6-lookup [2]
+ * Name Next Previous
+ * ip6-vxlan-gpe-bypass error-drop [0] ip6-input
+ * vxlan6-gpe-input [1] ip4-input-no-checksum
+ * ip6-lookup [2]
* @cliexend
*
* Example of how to display the feature enabled on an interface:
@@ -1185,15 +1184,12 @@ set_ip6_vxlan_gpe_bypass (vlib_main_t * vm,
* @cliexcmd{set interface ip6 vxlan-gpe-bypass GigabitEthernet2/0/0 del}
* @endparblock
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_interface_ip6_vxlan_gpe_bypass_command, static) = {
.path = "set interface ip6 vxlan-gpe-bypass",
.function = set_ip6_vxlan_gpe_bypass,
.short_help = "set interface ip6 vxlan-gpe-bypass <interface> [del]",
};
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
VNET_FEATURE_INIT (ip4_vxlan_gpe_bypass, static) =
{
.arc_name = "ip4-unicast",
@@ -1207,7 +1203,6 @@ VNET_FEATURE_INIT (ip6_vxlan_gpe_bypass, static) =
.node_name = "ip6-vxlan-gpe-bypass",
.runs_before = VNET_FEATURES ("ip6-lookup"),
};
-/* *INDENT-ON* */
/**
* @brief Feature init function for VXLAN GPE
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe.h b/src/vnet/vxlan-gpe/vxlan_gpe.h
index 0f8250a1788..aabaafeee6f 100644
--- a/src/vnet/vxlan-gpe/vxlan_gpe.h
+++ b/src/vnet/vxlan-gpe/vxlan_gpe.h
@@ -40,7 +40,6 @@
* @brief VXLAN GPE header struct
*
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
/** 20 bytes */
ip4_header_t ip4;
@@ -49,9 +48,7 @@ typedef CLIB_PACKED (struct {
/** 8 bytes */
vxlan_gpe_header_t vxlan;
}) ip4_vxlan_gpe_header_t;
-/* *INDENT-ON* */
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
/** 40 bytes */
ip6_header_t ip6;
@@ -60,15 +57,13 @@ typedef CLIB_PACKED (struct {
/** 8 bytes */
vxlan_gpe_header_t vxlan;
}) ip6_vxlan_gpe_header_t;
-/* *INDENT-ON* */
/**
* @brief Key struct for IPv4 VXLAN GPE tunnel.
- * Key fields: local remote, vni
+ * Key fields: local remote, vni, udp-port
* all fields in NET byte order
* VNI shifted 8 bits
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
union {
struct {
@@ -76,26 +71,35 @@ typedef CLIB_PACKED(struct {
u32 remote;
u32 vni;
- u32 pad;
+ u32 port;
};
u64 as_u64[2];
};
}) vxlan4_gpe_tunnel_key_t;
-/* *INDENT-ON* */
/**
* @brief Key struct for IPv6 VXLAN GPE tunnel.
- * Key fields: local remote, vni
+ * Key fields: local remote, vni, udp-port
* all fields in NET byte order
* VNI shifted 8 bits
*/
-/* *INDENT-OFF* */
typedef CLIB_PACKED(struct {
ip6_address_t local;
ip6_address_t remote;
u32 vni;
+ u32 port;
}) vxlan6_gpe_tunnel_key_t;
-/* *INDENT-ON* */
+
+typedef union
+{
+ struct
+ {
+ u32 tunnel_index;
+ u16 next_index;
+ u8 error;
+ };
+ u64 as_u64;
+} vxlan_gpe_decap_info_t;
/**
* @brief Struct for VXLAN GPE tunnel
@@ -117,6 +121,10 @@ typedef struct
ip46_address_t local;
/** tunnel remote address */
ip46_address_t remote;
+ /** local udp-port **/
+ u16 local_port;
+ /** remote udp-port **/
+ u16 remote_port;
/* mcast packet output intfc index (used only if dst is mcast) */
u32 mcast_sw_if_index;
@@ -221,9 +229,7 @@ typedef struct
vnet_main_t *vnet_main;
/* cache for last 8 vxlan_gpe tunnel */
-#ifdef CLIB_HAVE_VEC512
vtep4_cache_t vtep4_u512;
-#endif
/** List of next nodes for the decap indexed on protocol */
uword decap_next_node_list[VXLAN_GPE_PROTOCOL_MAX];
@@ -248,6 +254,8 @@ typedef struct
u32 encap_fib_index;
u32 decap_fib_index;
u32 vni;
+ u16 local_port;
+ u16 remote_port;
} vnet_vxlan_gpe_add_del_tunnel_args_t;
diff --git a/src/vnet/vxlan-gpe/vxlan_gpe_api.c b/src/vnet/vxlan-gpe/vxlan_gpe_api.c
index 243ddfb7fe1..cc74e1f58d4 100644
--- a/src/vnet/vxlan-gpe/vxlan_gpe_api.c
+++ b/src/vnet/vxlan-gpe/vxlan_gpe_api.c
@@ -114,12 +114,77 @@ static void
rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index);
out:
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_VXLAN_GPE_ADD_DEL_TUNNEL_REPLY,
({
rmp->sw_if_index = ntohl (sw_if_index);
}));
- /* *INDENT-ON* */
+}
+
+static void
+vl_api_vxlan_gpe_add_del_tunnel_v2_t_handler (
+ vl_api_vxlan_gpe_add_del_tunnel_v2_t *mp)
+{
+ vl_api_vxlan_gpe_add_del_tunnel_v2_reply_t *rmp;
+ int rv = 0;
+ vnet_vxlan_gpe_add_del_tunnel_args_t _a, *a = &_a;
+ u32 encap_fib_index, decap_fib_index;
+ u8 protocol;
+ uword *p;
+ ip4_main_t *im = &ip4_main;
+ u32 sw_if_index = ~0;
+
+ p = hash_get (im->fib_index_by_table_id, ntohl (mp->encap_vrf_id));
+ if (!p)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_FIB;
+ goto out;
+ }
+ encap_fib_index = p[0];
+
+ protocol = mp->protocol;
+
+ /* Interpret decap_vrf_id as an opaque if sending to other-than-ip4-input */
+ if (protocol == VXLAN_GPE_INPUT_NEXT_IP4_INPUT)
+ {
+ p = hash_get (im->fib_index_by_table_id, ntohl (mp->decap_vrf_id));
+ if (!p)
+ {
+ rv = VNET_API_ERROR_NO_SUCH_INNER_FIB;
+ goto out;
+ }
+ decap_fib_index = p[0];
+ }
+ else
+ {
+ decap_fib_index = ntohl (mp->decap_vrf_id);
+ }
+
+ clib_memset (a, 0, sizeof (*a));
+
+ a->is_add = mp->is_add;
+ ip_address_decode (&mp->local, &a->local);
+ ip_address_decode (&mp->remote, &a->remote);
+
+ /* Check src & dst are different */
+ if (ip46_address_is_equal (&a->local, &a->remote))
+ {
+ rv = VNET_API_ERROR_SAME_SRC_DST;
+ goto out;
+ }
+
+ a->local_port = ntohs (mp->local_port);
+ a->remote_port = ntohs (mp->remote_port);
+ a->is_ip6 = !ip46_address_is_ip4 (&a->local);
+ a->mcast_sw_if_index = ntohl (mp->mcast_sw_if_index);
+ a->encap_fib_index = encap_fib_index;
+ a->decap_fib_index = decap_fib_index;
+ a->protocol = protocol;
+ a->vni = ntohl (mp->vni);
+ rv = vnet_vxlan_gpe_add_del_tunnel (a, &sw_if_index);
+
+out:
+ REPLY_MACRO2 (VL_API_VXLAN_GPE_ADD_DEL_TUNNEL_V2_REPLY,
+ ({ rmp->sw_if_index = ntohl (sw_if_index); }));
}
static void send_vxlan_gpe_tunnel_details
@@ -175,12 +240,10 @@ static void vl_api_vxlan_gpe_tunnel_dump_t_handler
if (~0 == sw_if_index)
{
- /* *INDENT-OFF* */
pool_foreach (t, vgm->tunnels)
- {
- send_vxlan_gpe_tunnel_details(t, reg, mp->context);
- }
- /* *INDENT-ON* */
+ {
+ send_vxlan_gpe_tunnel_details (t, reg, mp->context);
+ }
}
else
{
@@ -194,6 +257,80 @@ static void vl_api_vxlan_gpe_tunnel_dump_t_handler
}
}
+static void
+send_vxlan_gpe_tunnel_v2_details (vxlan_gpe_tunnel_t *t,
+ vl_api_registration_t *reg, u32 context)
+{
+ vl_api_vxlan_gpe_tunnel_v2_details_t *rmp;
+ ip4_main_t *im4 = &ip4_main;
+ ip6_main_t *im6 = &ip6_main;
+ u8 is_ipv6 = !(t->flags & VXLAN_GPE_TUNNEL_IS_IPV4);
+
+ rmp = vl_msg_api_alloc (sizeof (*rmp));
+ clib_memset (rmp, 0, sizeof (*rmp));
+ rmp->_vl_msg_id =
+ ntohs (REPLY_MSG_ID_BASE + VL_API_VXLAN_GPE_TUNNEL_V2_DETAILS);
+
+ ip_address_encode (&t->local, is_ipv6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
+ &rmp->local);
+ ip_address_encode (&t->remote, is_ipv6 ? IP46_TYPE_IP6 : IP46_TYPE_IP4,
+ &rmp->remote);
+ rmp->local_port = htons (t->local_port);
+ rmp->remote_port = htons (t->remote_port);
+
+ if (ip46_address_is_ip4 (&t->local))
+ {
+ rmp->encap_vrf_id = htonl (im4->fibs[t->encap_fib_index].ft_table_id);
+ rmp->decap_vrf_id = htonl (im4->fibs[t->decap_fib_index].ft_table_id);
+ }
+ else
+ {
+ rmp->encap_vrf_id = htonl (im6->fibs[t->encap_fib_index].ft_table_id);
+ rmp->decap_vrf_id = htonl (im6->fibs[t->decap_fib_index].ft_table_id);
+ }
+ rmp->mcast_sw_if_index = htonl (t->mcast_sw_if_index);
+ rmp->vni = htonl (t->vni);
+ rmp->protocol = t->protocol;
+ rmp->sw_if_index = htonl (t->sw_if_index);
+ rmp->context = context;
+
+ vl_api_send_msg (reg, (u8 *) rmp);
+}
+
+static void
+vl_api_vxlan_gpe_tunnel_v2_dump_t_handler (
+ vl_api_vxlan_gpe_tunnel_v2_dump_t *mp)
+{
+ vl_api_registration_t *reg;
+ vxlan_gpe_main_t *vgm = &vxlan_gpe_main;
+ vxlan_gpe_tunnel_t *t;
+ u32 sw_if_index;
+
+ reg = vl_api_client_index_to_registration (mp->client_index);
+ if (!reg)
+ return;
+
+ sw_if_index = ntohl (mp->sw_if_index);
+
+ if (~0 == sw_if_index)
+ {
+ pool_foreach (t, vgm->tunnels)
+ {
+ send_vxlan_gpe_tunnel_v2_details (t, reg, mp->context);
+ }
+ }
+ else
+ {
+ if ((sw_if_index >= vec_len (vgm->tunnel_index_by_sw_if_index)) ||
+ (~0 == vgm->tunnel_index_by_sw_if_index[sw_if_index]))
+ {
+ return;
+ }
+ t = &vgm->tunnels[vgm->tunnel_index_by_sw_if_index[sw_if_index]];
+ send_vxlan_gpe_tunnel_v2_details (t, reg, mp->context);
+ }
+}
+
#include <vxlan-gpe/vxlan_gpe.api.c>
static clib_error_t *
@@ -201,8 +338,8 @@ vxlan_gpe_api_hookup (vlib_main_t * vm)
{
api_main_t *am = vlibapi_get_main ();
- am->api_trace_cfg[VL_API_VXLAN_GPE_ADD_DEL_TUNNEL].size +=
- 17 * sizeof (u32);
+ vl_api_increase_msg_trace_size (am, VL_API_VXLAN_GPE_ADD_DEL_TUNNEL,
+ 17 * sizeof (u32));
/*
* Set up the (msg_name, crc, message-id) table
diff --git a/src/vpp-api/client/client.c b/src/vpp-api/client/client.c
index 902ed3bd625..d59273ed6cb 100644
--- a/src/vpp-api/client/client.c
+++ b/src/vpp-api/client/client.c
@@ -30,7 +30,8 @@
#include <vlibapi/api.h>
#include <vlibmemory/api.h>
-#include <vpp/api/vpe_msg_enum.h>
+#include <vlibmemory/memclnt.api_enum.h>
+#include <vlibmemory/memclnt.api_types.h>
#include "vppapiclient.h"
@@ -48,14 +49,6 @@ bool rx_thread_done;
* vac_read() -> resumes RX thread
*/
-#define vl_typedefs /* define message structures */
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_typedefs
-
-#define vl_endianfun /* define message structures */
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_endianfun
-
typedef struct {
u8 connected_to_vlib;
pthread_t rx_thread_handle;
@@ -108,14 +101,6 @@ cleanup (void)
clib_memset(pm, 0, sizeof(*pm));
}
-/*
- * Satisfy external references when -lvlib is not available.
- */
-void vlib_cli_output (struct vlib_main_t * vm, char * fmt, ...)
-{
- clib_warning ("vlib_cli_output called...");
-}
-
void
vac_free (void * msg)
{
@@ -320,6 +305,8 @@ vac_connect (char * name, char * chroot_prefix, vac_callback_t cb,
}
/* Start read timeout thread */
+ timeout_in_progress = false;
+ timeout_thread_cancelled = false;
rv = pthread_create(&pm->timeout_thread_handle, NULL,
vac_timeout_thread_fn, 0);
if (rv) {
@@ -497,10 +484,11 @@ vac_read (char **p, int *l, u16 timeout)
/*
* XXX: Makes the assumption that client_index is the first member
*/
-typedef VL_API_PACKED(struct _vl_api_header {
+typedef struct _vl_api_header
+{
u16 _vl_msg_id;
u32 client_index;
-}) vl_api_header_t;
+} __attribute__ ((packed)) vl_api_header_t;
static u32
vac_client_index (void)
diff --git a/src/vpp-api/client/stat_client.c b/src/vpp-api/client/stat_client.c
index 2c30be62326..359813f8d57 100644
--- a/src/vpp-api/client/stat_client.c
+++ b/src/vpp-api/client/stat_client.c
@@ -29,7 +29,8 @@
#include <vppinfra/vec.h>
#include <vppinfra/lock.h>
#include <stdatomic.h>
-#include <vpp/stats/stat_segment.h>
+#include <vlib/vlib.h>
+#include <vlib/stats/stats.h>
#include <vpp-api/client/stat_client.h>
stat_client_main_t stat_client_main;
@@ -81,8 +82,8 @@ recv_fd (int sock)
return fd;
}
-static stat_segment_directory_entry_t *
-get_stat_vector_r (stat_client_main_t * sm)
+static vlib_stats_entry_t *
+get_stat_vector_r (stat_client_main_t *sm)
{
ASSERT (sm->shared_header);
return stat_segment_adjust (sm,
@@ -172,7 +173,7 @@ double
stat_segment_heartbeat_r (stat_client_main_t * sm)
{
stat_segment_access_t sa;
- stat_segment_directory_entry_t *ep;
+ vlib_stats_entry_t *ep;
/* Has directory been updated? */
if (sm->shared_header->epoch != sm->current_epoch)
@@ -223,18 +224,18 @@ stat_vec_combined_init (vlib_counter_t c)
* threads), otherwise copy out all values.
*/
static stat_segment_data_t
-copy_data (stat_segment_directory_entry_t *ep, u32 index2, char *name,
- stat_client_main_t *sm)
+copy_data (vlib_stats_entry_t *ep, u32 index2, char *name,
+ stat_client_main_t *sm, bool via_symlink)
{
stat_segment_data_t result = { 0 };
int i;
vlib_counter_t **combined_c; /* Combined counter */
counter_t **simple_c; /* Simple counter */
- uint64_t *error_vector;
assert (sm->shared_header);
result.type = ep->type;
+ result.via_symlink = via_symlink;
result.name = strdup (name ? name : ep->name);
switch (ep->type)
@@ -270,18 +271,6 @@ copy_data (stat_segment_directory_entry_t *ep, u32 index2, char *name,
}
break;
- case STAT_DIR_TYPE_ERROR_INDEX:
- /* Gather errors from all threads into a vector */
- error_vector =
- stat_segment_adjust (sm, (void *) sm->shared_header->error_vector);
- vec_validate (result.error_vector, vec_len (error_vector) - 1);
- for (i = 0; i < vec_len (error_vector); i++)
- {
- counter_t *cb = stat_segment_adjust (sm, (void *) error_vector[i]);
- result.error_vector[i] = cb[ep->index];
- }
- break;
-
case STAT_DIR_TYPE_NAME_VECTOR:
{
uint8_t **name_vector = stat_segment_adjust (sm, ep->data);
@@ -297,9 +286,11 @@ copy_data (stat_segment_directory_entry_t *ep, u32 index2, char *name,
case STAT_DIR_TYPE_SYMLINK:
/* Gather info from all threads into a vector */
{
- stat_segment_directory_entry_t *ep2;
+ vlib_stats_entry_t *ep2;
ep2 = vec_elt_at_index (sm->directory_vector, ep->index1);
- return copy_data (ep2, ep->index2, ep->name, sm);
+ /* We do not intend to return the "result", avoid a leak */
+ free (result.name);
+ return copy_data (ep2, ep->index2, ep->name, sm, true);
}
case STAT_DIR_TYPE_EMPTY:
@@ -334,10 +325,8 @@ stat_segment_data_free (stat_segment_data_t * res)
vec_free (res[i].name_vector[j]);
vec_free (res[i].name_vector);
break;
- case STAT_DIR_TYPE_ERROR_INDEX:
- vec_free (res[i].error_vector);
- break;
case STAT_DIR_TYPE_SCALAR_INDEX:
+ case STAT_DIR_TYPE_EMPTY:
break;
default:
assert (0);
@@ -369,7 +358,7 @@ stat_segment_ls_r (uint8_t ** patterns, stat_client_main_t * sm)
if (stat_segment_access_start (&sa, sm))
return 0;
- stat_segment_directory_entry_t *counter_vec = get_stat_vector_r (sm);
+ vlib_stats_entry_t *counter_vec = get_stat_vector_r (sm);
for (j = 0; j < vec_len (counter_vec); j++)
{
for (i = 0; i < vec_len (patterns); i++)
@@ -412,7 +401,7 @@ stat_segment_data_t *
stat_segment_dump_r (uint32_t * stats, stat_client_main_t * sm)
{
int i;
- stat_segment_directory_entry_t *ep;
+ vlib_stats_entry_t *ep;
stat_segment_data_t *res = 0;
stat_segment_access_t sa;
@@ -423,11 +412,20 @@ stat_segment_dump_r (uint32_t * stats, stat_client_main_t * sm)
if (stat_segment_access_start (&sa, sm))
return 0;
+ /* preallocate the elements.
+ * This takes care of a special case where
+ * the vec_len(stats) == 0,
+ * such that we return a vector of
+ * length 0, rather than a null pointer
+ * (since null pointer is an error)
+ */
+ vec_alloc (res, vec_len (stats));
+
for (i = 0; i < vec_len (stats); i++)
{
/* Collect counter */
ep = vec_elt_at_index (sm->directory_vector, stats[i]);
- vec_add1 (res, copy_data (ep, ~0, 0, sm));
+ vec_add1 (res, copy_data (ep, ~0, 0, sm, false));
}
if (stat_segment_access_end (&sa, sm))
@@ -435,6 +433,8 @@ stat_segment_dump_r (uint32_t * stats, stat_client_main_t * sm)
fprintf (stderr, "Epoch changed while reading, invalid results\n");
// TODO increase counter
+ if (res)
+ stat_segment_data_free (res);
return 0;
}
@@ -473,7 +473,7 @@ stat_segment_string_vector (uint8_t ** string_vector, const char *string)
stat_segment_data_t *
stat_segment_dump_entry_r (uint32_t index, stat_client_main_t * sm)
{
- stat_segment_directory_entry_t *ep;
+ vlib_stats_entry_t *ep;
stat_segment_data_t *res = 0;
stat_segment_access_t sa;
@@ -486,7 +486,7 @@ stat_segment_dump_entry_r (uint32_t index, stat_client_main_t * sm)
/* Collect counter */
ep = vec_elt_at_index (sm->directory_vector, index);
- vec_add1 (res, copy_data (ep, ~0, 0, sm));
+ vec_add1 (res, copy_data (ep, ~0, 0, sm, false));
if (stat_segment_access_end (&sa, sm))
return res;
@@ -503,9 +503,9 @@ stat_segment_dump_entry (uint32_t index)
char *
stat_segment_index_to_name_r (uint32_t index, stat_client_main_t * sm)
{
- stat_segment_directory_entry_t *ep;
+ vlib_stats_entry_t *ep;
stat_segment_access_t sa;
- stat_segment_directory_entry_t *vec;
+ vlib_stats_entry_t *vec;
/* Has directory been update? */
if (sm->shared_header->epoch != sm->current_epoch)
@@ -514,6 +514,11 @@ stat_segment_index_to_name_r (uint32_t index, stat_client_main_t * sm)
return 0;
vec = get_stat_vector_r (sm);
ep = vec_elt_at_index (vec, index);
+ if (ep->type == STAT_DIR_TYPE_EMPTY)
+ {
+ stat_segment_access_end (&sa, sm);
+ return 0;
+ }
if (!stat_segment_access_end (&sa, sm))
return 0;
return strdup (ep->name);
diff --git a/src/vpp-api/client/stat_client.h b/src/vpp-api/client/stat_client.h
index 730badd1728..d9671c69ff2 100644
--- a/src/vpp-api/client/stat_client.h
+++ b/src/vpp-api/client/stat_client.h
@@ -25,7 +25,7 @@
#include <vlib/counter_types.h>
#include <time.h>
#include <stdbool.h>
-#include <vpp/stats/stat_segment_shared.h>
+#include <vlib/stats/shared.h>
/* Default socket to exchange segment fd */
/* TODO: Get from runtime directory */
@@ -36,6 +36,7 @@ typedef struct
{
char *name;
stat_directory_type_t type;
+ bool via_symlink;
union
{
double scalar_value;
@@ -49,8 +50,8 @@ typedef struct
typedef struct
{
uint64_t current_epoch;
- stat_segment_shared_header_t *shared_header;
- stat_segment_directory_entry_t *directory_vector;
+ vlib_stats_shared_header_t *shared_header;
+ vlib_stats_entry_t *directory_vector;
ssize_t memory_size;
uint64_t timeout;
} stat_client_main_t;
@@ -115,7 +116,7 @@ static inline int
stat_segment_access_start (stat_segment_access_t * sa,
stat_client_main_t * sm)
{
- stat_segment_shared_header_t *shared_header = sm->shared_header;
+ vlib_stats_shared_header_t *shared_header = sm->shared_header;
uint64_t max_time;
sa->epoch = shared_header->epoch;
@@ -130,10 +131,8 @@ stat_segment_access_start (stat_segment_access_t * sa,
while (shared_header->in_progress != 0)
;
}
- sm->directory_vector =
- (stat_segment_directory_entry_t *) stat_segment_adjust (sm,
- (void *)
- sm->shared_header->directory_vector);
+ sm->directory_vector = (vlib_stats_entry_t *) stat_segment_adjust (
+ sm, (void *) sm->shared_header->directory_vector);
if (sm->timeout)
return _time_now_nsec () < max_time ? 0 : -1;
return 0;
@@ -164,7 +163,7 @@ stat_segment_set_timeout (uint64_t timeout)
static inline bool
stat_segment_access_end (stat_segment_access_t * sa, stat_client_main_t * sm)
{
- stat_segment_shared_header_t *shared_header = sm->shared_header;
+ vlib_stats_shared_header_t *shared_header = sm->shared_header;
if (shared_header->epoch != sa->epoch || shared_header->in_progress)
return false;
diff --git a/src/vpp-api/client/test.c b/src/vpp-api/client/test.c
index 9bfed996e1b..c242e6611a4 100644
--- a/src/vpp-api/client/test.c
+++ b/src/vpp-api/client/test.c
@@ -32,14 +32,17 @@
#include <vlib/unix/unix.h>
#include <vlibapi/api.h>
#include <vppinfra/time.h>
-#include <vpp/api/vpe_msg_enum.h>
#include <signal.h>
#include "vppapiclient.h"
#include "stat_client.h"
-#define vl_typedefs /* define message structures */
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_typedefs
+#include <vlibmemory/vlib.api_enum.h>
+#include <vlibmemory/vlib.api_types.h>
+#include <vlibmemory/memclnt.api_enum.h>
+#include <vlibmemory/memclnt.api_types.h>
+
+#include <vpp/api/vpe.api_enum.h>
+#include <vpp/api/vpe.api_types.h>
volatile int sigterm_received = 0;
volatile u32 result_ready;
@@ -67,7 +70,6 @@ wrap_vac_callback (unsigned char *data, int len)
static void
test_connect ()
{
- static int i;
int rv = vac_connect("vac_client", NULL, wrap_vac_callback, 32 /* rx queue-length*/);
if (rv != 0) {
printf("Connect failed: %d\n", rv);
@@ -75,7 +77,6 @@ test_connect ()
}
printf(".");
vac_disconnect();
- i++;
}
static void
@@ -139,7 +140,7 @@ test_stats (void)
assert(rv == 0);
u32 *dir;
- int i, j, k;
+ int i, k;
stat_segment_data_t *res;
u8 **pattern = 0;
vec_add1(pattern, (u8 *)"/if/names");
@@ -158,11 +159,6 @@ test_stats (void)
fformat (stdout, "[%d]: %s %s\n", k, res[i].name_vector[k],
res[i].name);
break;
- case STAT_DIR_TYPE_ERROR_INDEX:
- for (j = 0; j < vec_len (res[i].error_vector); j++)
- fformat (stdout, "%llu %s\n", res[i].error_vector[j],
- res[i].name);
- break;
default:
assert(0);
}
diff --git a/src/vpp-api/python/CMakeLists.txt b/src/vpp-api/python/CMakeLists.txt
index 6450fd92f2d..3059619ff21 100644
--- a/src/vpp-api/python/CMakeLists.txt
+++ b/src/vpp-api/python/CMakeLists.txt
@@ -11,26 +11,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-if (CMAKE_VERSION VERSION_LESS 3.12)
- find_package(PythonInterp 2.7)
-else()
- find_package(Python3 COMPONENTS Interpreter)
-endif()
+find_package(Python3 REQUIRED COMPONENTS Interpreter)
+set(PYTHONINTERP_FOUND ${Python3_Interpreter_FOUND})
+set(PYTHON_EXECUTABLE ${Python3_EXECUTABLE})
-if(PYTHONINTERP_FOUND)
- install(
- CODE "
- execute_process(
- WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
- COMMAND ${PYTHON_EXECUTABLE} ./setup.py
- install
- --root /
- --prefix=${CMAKE_INSTALL_PREFIX}
- --single-version-externally-managed
- bdist_egg
- --dist-dir=${CMAKE_INSTALL_PREFIX}
- OUTPUT_QUIET
- )"
- COMPONENT vpp-api-python
- )
-endif()
+install(
+ CODE "
+ execute_process(
+ WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+ COMMAND ${PYTHON_EXECUTABLE} ./setup.py
+ install
+ --root=\$ENV{DESTDIR}/
+ --prefix=${CMAKE_INSTALL_PREFIX}
+ --single-version-externally-managed
+ bdist_egg
+ OUTPUT_QUIET
+ )"
+ COMPONENT vpp-api-python
+)
diff --git a/src/vpp-api/python/README.rst b/src/vpp-api/python/README.rst
deleted file mode 100644
index e69de29bb2d..00000000000
--- a/src/vpp-api/python/README.rst
+++ /dev/null
diff --git a/src/vpp-api/python/setup.py b/src/vpp-api/python/setup.py
index 8bf6def2227..784013fc606 100644
--- a/src/vpp-api/python/setup.py
+++ b/src/vpp-api/python/setup.py
@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-import sys
try:
from setuptools import setup, find_packages
@@ -21,15 +20,17 @@ except ImportError:
requirements = []
setup(
- name='vpp_papi',
- version='2.0.0',
- description='VPP Python binding',
- author='Ole Troan',
- author_email='ot@cisco.com',
- url='https://wiki.fd.io/view/VPP/Python_API',
- license='Apache-2.0',
- test_suite='vpp_papi.tests',
+ name="vpp_papi",
+ version="2.1.0",
+ description="VPP Python binding",
+ author="Ole Troan",
+ author_email="ot@cisco.com",
+ url="https://wiki.fd.io/view/VPP/Python_API",
+ license="Apache-2.0",
+ test_suite="vpp_papi.tests",
install_requires=requirements,
packages=find_packages(),
- long_description='''VPP Python language binding.''',
- zip_safe=True)
+ package_data={"vpp_papi": ["data/*.json"]},
+ long_description="""VPP Python language binding.""",
+ zip_safe=True,
+)
diff --git a/src/vpp-api/python/vpp_papi/__init__.py b/src/vpp-api/python/vpp_papi/__init__.py
index b2b4fc78fc1..dc58c1e18cb 100644
--- a/src/vpp-api/python/vpp_papi/__init__.py
+++ b/src/vpp-api/python/vpp_papi/__init__.py
@@ -3,7 +3,7 @@ from .vpp_papi import VppEnum, VppEnumType, VppEnumFlag # noqa: F401
from .vpp_papi import VPPIOError, VPPRuntimeError, VPPValueError # noqa: F401
from .vpp_papi import VPPApiClient # noqa: F401
from .vpp_papi import VPPApiJSONFiles # noqa: F401
-from . macaddress import MACAddress, mac_pton, mac_ntop # noqa: F401
+from .macaddress import MACAddress, mac_pton, mac_ntop # noqa: F401
# sorted lexicographically
from .vpp_serializer import BaseTypes # noqa: F401
@@ -11,7 +11,8 @@ from .vpp_serializer import VPPEnumType, VPPType, VPPTypeAlias # noqa: F401
from .vpp_serializer import VPPMessage, VPPUnionType # noqa: F401
import pkg_resources # part of setuptools
+
try:
__version__ = pkg_resources.get_distribution("vpp_papi").version
-except (pkg_resources.DistributionNotFound):
+except pkg_resources.DistributionNotFound:
"""Can't find vpp_papi via setuptools"""
diff --git a/src/vpp-api/python/vpp_papi/data/memclnt.api.json b/src/vpp-api/python/vpp_papi/data/memclnt.api.json
new file mode 100644
index 00000000000..1734cf12ab0
--- /dev/null
+++ b/src/vpp-api/python/vpp_papi/data/memclnt.api.json
@@ -0,0 +1,809 @@
+{
+ "types": [
+ [
+ "module_version",
+ [
+ "u32",
+ "major"
+ ],
+ [
+ "u32",
+ "minor"
+ ],
+ [
+ "u32",
+ "patch"
+ ],
+ [
+ "string",
+ "name",
+ 64
+ ]
+ ],
+ [
+ "message_table_entry",
+ [
+ "u16",
+ "index"
+ ],
+ [
+ "string",
+ "name",
+ 64
+ ]
+ ]
+ ],
+ "messages": [
+ [
+ "memclnt_create",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "i32",
+ "ctx_quota"
+ ],
+ [
+ "u64",
+ "input_queue"
+ ],
+ [
+ "string",
+ "name",
+ 64
+ ],
+ [
+ "u32",
+ "api_versions",
+ 8
+ ],
+ {
+ "crc": "0x9c5e1c2f",
+ "options": {
+ "deprecated": null
+ },
+ "comment": "/*\n * Create a client registration\n */"
+ }
+ ],
+ [
+ "memclnt_create_reply",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "i32",
+ "response"
+ ],
+ [
+ "u64",
+ "handle"
+ ],
+ [
+ "u32",
+ "index"
+ ],
+ [
+ "u64",
+ "message_table"
+ ],
+ {
+ "crc": "0x42ec4560",
+ "options": {
+ "deprecated": null
+ }
+ }
+ ],
+ [
+ "memclnt_delete",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "index"
+ ],
+ [
+ "u64",
+ "handle"
+ ],
+ [
+ "bool",
+ "do_cleanup"
+ ],
+ {
+ "crc": "0x7e1c04e3",
+ "options": {},
+ "comment": "/*\n * Delete a client registration\n */"
+ }
+ ],
+ [
+ "memclnt_delete_reply",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "i32",
+ "response"
+ ],
+ [
+ "u64",
+ "handle"
+ ],
+ {
+ "crc": "0x3d3b6312",
+ "options": {}
+ }
+ ],
+ [
+ "rx_thread_exit",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u8",
+ "dummy"
+ ],
+ {
+ "crc": "0xc3a3a452",
+ "options": {},
+ "comment": "/*\n * Client RX thread exit\n */"
+ }
+ ],
+ [
+ "memclnt_rx_thread_suspend",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u8",
+ "dummy"
+ ],
+ {
+ "crc": "0xc3a3a452",
+ "options": {},
+ "comment": "/*\n * Client RX thread suspend\n */"
+ }
+ ],
+ [
+ "memclnt_read_timeout",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u8",
+ "dummy"
+ ],
+ {
+ "crc": "0xc3a3a452",
+ "options": {},
+ "comment": "/*\n * Client read timeout\n */"
+ }
+ ],
+ [
+ "rpc_call",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "client_index"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "u64",
+ "function"
+ ],
+ [
+ "u8",
+ "multicast"
+ ],
+ [
+ "u8",
+ "need_barrier_sync"
+ ],
+ [
+ "u8",
+ "send_reply"
+ ],
+ [
+ "u32",
+ "data_len"
+ ],
+ [
+ "u8",
+ "data",
+ 0,
+ "data_len"
+ ],
+ {
+ "crc": "0x7e8a2c95",
+ "options": {},
+ "comment": "/*\n * RPC\n */"
+ }
+ ],
+ [
+ "rpc_call_reply",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "i32",
+ "retval"
+ ],
+ {
+ "crc": "0xe8d4e804",
+ "options": {}
+ }
+ ],
+ [
+ "get_first_msg_id",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "client_index"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "string",
+ "name",
+ 64
+ ],
+ {
+ "crc": "0xebf79a66",
+ "options": {},
+ "comment": "/*\n * Lookup message-ID base by name\n */"
+ }
+ ],
+ [
+ "get_first_msg_id_reply",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "i32",
+ "retval"
+ ],
+ [
+ "u16",
+ "first_msg_id"
+ ],
+ {
+ "crc": "0x7d337472",
+ "options": {}
+ }
+ ],
+ [
+ "api_versions",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "client_index"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ {
+ "crc": "0x51077d14",
+ "options": {},
+ "comment": "/*\n * Get API version table (includes built-in and plugins)\n */"
+ }
+ ],
+ [
+ "api_versions_reply",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "i32",
+ "retval"
+ ],
+ [
+ "u32",
+ "count"
+ ],
+ [
+ "vl_api_module_version_t",
+ "api_versions",
+ 0,
+ "count"
+ ],
+ {
+ "crc": "0x5f0d99d6",
+ "options": {}
+ }
+ ],
+ [
+ "trace_plugin_msg_ids",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "client_index"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "string",
+ "plugin_name",
+ 128
+ ],
+ [
+ "u16",
+ "first_msg_id"
+ ],
+ [
+ "u16",
+ "last_msg_id"
+ ],
+ {
+ "crc": "0xf476d3ce",
+ "options": {},
+ "comment": "/*\n * Trace the plugin message-id allocator\n * so we stand a chance of dealing with different sets of plugins\n * at api trace replay time\n */"
+ }
+ ],
+ [
+ "sockclnt_create",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "string",
+ "name",
+ 64
+ ],
+ {
+ "crc": "0x455fb9c4",
+ "options": {},
+ "comment": "/*\n * Create a socket client registration.\n */"
+ }
+ ],
+ [
+ "sockclnt_create_reply",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "client_index"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "i32",
+ "response"
+ ],
+ [
+ "u32",
+ "index"
+ ],
+ [
+ "u16",
+ "count"
+ ],
+ [
+ "vl_api_message_table_entry_t",
+ "message_table",
+ 0,
+ "count"
+ ],
+ {
+ "crc": "0x35166268",
+ "options": {}
+ }
+ ],
+ [
+ "sockclnt_delete",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "client_index"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "u32",
+ "index"
+ ],
+ {
+ "crc": "0x8ac76db6",
+ "options": {},
+ "comment": "/*\n * Delete a client registration\n */"
+ }
+ ],
+ [
+ "sockclnt_delete_reply",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "i32",
+ "response"
+ ],
+ {
+ "crc": "0x8f38b1ee",
+ "options": {}
+ }
+ ],
+ [
+ "sock_init_shm",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "client_index"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "u32",
+ "requested_size"
+ ],
+ [
+ "u8",
+ "nitems"
+ ],
+ [
+ "u64",
+ "configs",
+ 0,
+ "nitems"
+ ],
+ {
+ "crc": "0x51646d92",
+ "options": {},
+ "comment": "/*\n * Initialize shm api over socket api\n */"
+ }
+ ],
+ [
+ "sock_init_shm_reply",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "i32",
+ "retval"
+ ],
+ {
+ "crc": "0xe8d4e804",
+ "options": {}
+ }
+ ],
+ [
+ "memclnt_keepalive",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "client_index"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ {
+ "crc": "0x51077d14",
+ "options": {},
+ "comment": "/*\n * Memory client ping / response\n * Only sent on inactive connections\n */"
+ }
+ ],
+ [
+ "memclnt_keepalive_reply",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "i32",
+ "retval"
+ ],
+ {
+ "crc": "0xe8d4e804",
+ "options": {}
+ }
+ ],
+ [
+ "control_ping",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "client_index"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ {
+ "crc": "0x51077d14",
+ "options": {},
+ "comment": "/** \\brief Control ping from client to api server request\n @param client_index - opaque cookie to identify the sender\n @param context - sender context, to match reply w/ request\n*/"
+ }
+ ],
+ [
+ "control_ping_reply",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "i32",
+ "retval"
+ ],
+ [
+ "u32",
+ "client_index"
+ ],
+ [
+ "u32",
+ "vpe_pid"
+ ],
+ {
+ "crc": "0xf6b0b8ca",
+ "options": {},
+ "comment": "/** \\brief Control ping from the client to the server response\n @param client_index - opaque cookie to identify the sender\n @param context - sender context, to match reply w/ request\n @param retval - return code for the request\n @param vpe_pid - the pid of the vpe, returned by the server\n*/"
+ }
+ ],
+ [
+ "memclnt_create_v2",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "i32",
+ "ctx_quota"
+ ],
+ [
+ "u64",
+ "input_queue"
+ ],
+ [
+ "string",
+ "name",
+ 64
+ ],
+ [
+ "u32",
+ "api_versions",
+ 8
+ ],
+ [
+ "bool",
+ "keepalive",
+ {
+ "default": "true"
+ }
+ ],
+ {
+ "crc": "0xc4bd4882",
+ "options": {}
+ }
+ ],
+ [
+ "memclnt_create_v2_reply",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "i32",
+ "response"
+ ],
+ [
+ "u64",
+ "handle"
+ ],
+ [
+ "u32",
+ "index"
+ ],
+ [
+ "u64",
+ "message_table"
+ ],
+ {
+ "crc": "0x42ec4560",
+ "options": {}
+ }
+ ],
+ [
+ "get_api_json",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "client_index"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ {
+ "crc": "0x51077d14",
+ "options": {}
+ }
+ ],
+ [
+ "get_api_json_reply",
+ [
+ "u16",
+ "_vl_msg_id"
+ ],
+ [
+ "u32",
+ "context"
+ ],
+ [
+ "i32",
+ "retval"
+ ],
+ [
+ "string",
+ "json",
+ 0
+ ],
+ {
+ "crc": "0xea715b59",
+ "options": {}
+ }
+ ]
+ ],
+ "unions": [],
+ "enums": [],
+ "enumflags": [],
+ "services": {
+ "memclnt_rx_thread_suspend": {
+ "reply": "null"
+ },
+ "memclnt_read_timeout": {
+ "reply": "null"
+ },
+ "rx_thread_exit": {
+ "reply": "null"
+ },
+ "trace_plugin_msg_ids": {
+ "reply": "null"
+ },
+ "memclnt_create": {
+ "reply": "memclnt_create_reply"
+ },
+ "memclnt_delete": {
+ "reply": "memclnt_delete_reply"
+ },
+ "rpc_call": {
+ "reply": "rpc_call_reply"
+ },
+ "get_first_msg_id": {
+ "reply": "get_first_msg_id_reply"
+ },
+ "api_versions": {
+ "reply": "api_versions_reply"
+ },
+ "sockclnt_create": {
+ "reply": "sockclnt_create_reply"
+ },
+ "sockclnt_delete": {
+ "reply": "sockclnt_delete_reply"
+ },
+ "sock_init_shm": {
+ "reply": "sock_init_shm_reply"
+ },
+ "memclnt_keepalive": {
+ "reply": "memclnt_keepalive_reply"
+ },
+ "control_ping": {
+ "reply": "control_ping_reply"
+ },
+ "memclnt_create_v2": {
+ "reply": "memclnt_create_v2_reply"
+ },
+ "get_api_json": {
+ "reply": "get_api_json_reply"
+ }
+ },
+ "options": {
+ "version": "2.1.0"
+ },
+ "aliases": {},
+ "vl_api_version": "0xb197c551",
+ "imports": [],
+ "counters": [],
+ "paths": []
+}
diff --git a/src/vpp-api/python/vpp_papi/macaddress.py b/src/vpp-api/python/vpp_papi/macaddress.py
index c3b10a3c11e..66349a3c19a 100644
--- a/src/vpp-api/python/vpp_papi/macaddress.py
+++ b/src/vpp-api/python/vpp_papi/macaddress.py
@@ -18,20 +18,19 @@ import binascii
def mac_pton(s):
- '''Convert MAC address as text to binary'''
- return binascii.unhexlify(s.replace(':', ''))
+ """Convert MAC address as text to binary"""
+ return binascii.unhexlify(s.replace(":", ""))
def mac_ntop(binary):
- '''Convert MAC address as binary to text'''
- x = b':'.join(binascii.hexlify(binary)[i:i + 2]
- for i in range(0, 12, 2))
- return str(x.decode('ascii'))
+ """Convert MAC address as binary to text"""
+ x = b":".join(binascii.hexlify(binary)[i : i + 2] for i in range(0, 12, 2))
+ return str(x.decode("ascii"))
-class MACAddress():
+class MACAddress:
def __init__(self, mac):
- '''MAC Address as a text-string (aa:bb:cc:dd:ee:ff) or 6 bytes'''
+ """MAC Address as a text-string (aa:bb:cc:dd:ee:ff) or 6 bytes"""
# Of course Python 2 doesn't distinguish str from bytes
if type(mac) is bytes and len(mac) == 6:
self.mac_binary = mac
@@ -51,10 +50,9 @@ class MACAddress():
return self.mac_string
def __repr__(self):
- return '%s(%s)' % (self.__class__.__name__, self.mac_string)
+ return "%s(%s)" % (self.__class__.__name__, self.mac_string)
def __eq__(self, other):
-
if not isinstance(other, MACAddress):
try:
# if it looks like a mac address, we'll take it.
diff --git a/src/vpp-api/python/vpp_papi/tests/test_macaddress.py b/src/vpp-api/python/vpp_papi/tests/test_macaddress.py
index 08e365afd92..e86ec75c76e 100644
--- a/src/vpp-api/python/vpp_papi/tests/test_macaddress.py
+++ b/src/vpp-api/python/vpp_papi/tests/test_macaddress.py
@@ -3,8 +3,6 @@ from vpp_papi import MACAddress
class TestMacAddress(unittest.TestCase):
-
def test_eq(self):
- mac = '11:22:33:44:55:66'
- self.assertEqual(MACAddress(mac),
- MACAddress(mac))
+ mac = "11:22:33:44:55:66"
+ self.assertEqual(MACAddress(mac), MACAddress(mac))
diff --git a/src/vpp-api/python/vpp_papi/tests/test_vpp_format.py b/src/vpp-api/python/vpp_papi/tests/test_vpp_format.py
index 5c179c02e0a..ae4d2c5126d 100644
--- a/src/vpp-api/python/vpp_papi/tests/test_vpp_format.py
+++ b/src/vpp-api/python/vpp_papi/tests/test_vpp_format.py
@@ -25,57 +25,64 @@ from vpp_papi import vpp_format
from parameterized import parameterized
-ip4_addr = '1.2.3.4'
-ip4_addrn = b'\x01\x02\x03\x04'
+ip4_addr = "1.2.3.4"
+ip4_addrn = b"\x01\x02\x03\x04"
ip4_prefix_len = 32
-ip4_prefix = '%s/%s' % (ip4_addr, ip4_prefix_len)
+ip4_prefix = "%s/%s" % (ip4_addr, ip4_prefix_len)
ipv4_network = ipaddress.IPv4Network(text_type(ip4_prefix))
-ip4_addr_format_vl_api_address_t = {'un': {'ip4': b'\x01\x02\x03\x04'},
- 'af': 0}
-ip4_addr_format_vl_api_prefix_t = {'address': # noqa: E127,E501
- {'un': {'ip4': b'\x01\x02\x03\x04'},
- 'af': 0},
- 'len': ip4_prefix_len}
-ip4_addr_format_vl_api_prefix_packed_t = {'address': b'\x01\x02\x03\x04',
- 'len': ip4_prefix_len}
-
-ip6_addr = 'dead::'
-ip6_addrn = b'\xde\xad\x00\x00\x00\x00\x00\x00' \
- b'\x00\x00\x00\x00\x00\x00\x00\x00'
+ip4_addr_format_vl_api_address_t = {"un": {"ip4": b"\x01\x02\x03\x04"}, "af": 0}
+ip4_addr_format_vl_api_prefix_t = {
+ "address": {"un": {"ip4": b"\x01\x02\x03\x04"}, "af": 0}, # noqa: E127,E501
+ "len": ip4_prefix_len,
+}
+ip4_addr_format_vl_api_prefix_packed_t = {
+ "address": b"\x01\x02\x03\x04",
+ "len": ip4_prefix_len,
+}
+
+ip6_addr = "dead::"
+ip6_addrn = b"\xde\xad\x00\x00\x00\x00\x00\x00" b"\x00\x00\x00\x00\x00\x00\x00\x00"
ip6_prefix_len = 127
-ip6_prefix = '%s/%s' % (ip6_addr, ip6_prefix_len)
+ip6_prefix = "%s/%s" % (ip6_addr, ip6_prefix_len)
ipv6_network = ipaddress.IPv6Network(text_type(ip6_prefix))
-ip6_addr_format_vl_api_address_t = {'un': {'ip6': b'\xde\xad\x00\x00'
- b'\x00\x00\x00\x00'
- b'\x00\x00\x00\x00'
- b'\x00\x00\x00\x00'},
- 'af': 1}
-ip6_addr_format_vl_api_prefix_t = {'address': # noqa: E127
- {'af': 1,
- 'un': {
- 'ip6': b'\xde\xad\x00\x00'
- b'\x00\x00\x00\x00'
- b'\x00\x00\x00\x00'
- b'\x00\x00\x00\x00'}},
- 'len': ip6_prefix_len}
-ip6_addr_format_vl_api_prefix_packed_t = {'address': b'\xde\xad\x00\x00' # noqa: E127,E501
- b'\x00\x00\x00\x00'
- b'\x00\x00\x00\x00'
- b'\x00\x00\x00\x00',
- 'len': ip6_prefix_len}
+ip6_addr_format_vl_api_address_t = {
+ "un": {
+ "ip6": b"\xde\xad\x00\x00"
+ b"\x00\x00\x00\x00"
+ b"\x00\x00\x00\x00"
+ b"\x00\x00\x00\x00"
+ },
+ "af": 1,
+}
+ip6_addr_format_vl_api_prefix_t = {
+ "address": { # noqa: E127
+ "af": 1,
+ "un": {
+ "ip6": b"\xde\xad\x00\x00"
+ b"\x00\x00\x00\x00"
+ b"\x00\x00\x00\x00"
+ b"\x00\x00\x00\x00"
+ },
+ },
+ "len": ip6_prefix_len,
+}
+ip6_addr_format_vl_api_prefix_packed_t = {
+ "address": b"\xde\xad\x00\x00" # noqa: E127,E501
+ b"\x00\x00\x00\x00"
+ b"\x00\x00\x00\x00"
+ b"\x00\x00\x00\x00",
+ "len": ip6_prefix_len,
+}
class TestVppFormat(unittest.TestCase):
-
def test_format_vl_api_address_t(self):
res = vpp_format.format_vl_api_address_t(ip4_addr)
self.assertEqual(res, ip4_addr_format_vl_api_address_t)
# PY2: raises socket.error
# PY3: raises OSError
- with self.assertRaises((TypeError,
- socket.error,
- OSError)):
+ with self.assertRaises((TypeError, socket.error, OSError)):
res = vpp_format.format_vl_api_address_t(ip4_addrn)
res = vpp_format.format_vl_api_address_t(ip6_addr)
@@ -84,19 +91,14 @@ class TestVppFormat(unittest.TestCase):
with self.assertRaises(TypeError):
es = vpp_format.format_vl_api_address_t(ip6_addrn)
- @parameterized.expand([('ip4 prefix',
- ip4_prefix,
- ip4_addr_format_vl_api_prefix_t),
- ('ip6 prefix',
- ip6_prefix,
- ip6_addr_format_vl_api_prefix_t),
- ('IPv4Network',
- ipv4_network,
- ip4_addr_format_vl_api_prefix_t),
- ('IPv6Network',
- ipv6_network,
- ip6_addr_format_vl_api_prefix_t),
- ])
+ @parameterized.expand(
+ [
+ ("ip4 prefix", ip4_prefix, ip4_addr_format_vl_api_prefix_t),
+ ("ip6 prefix", ip6_prefix, ip6_addr_format_vl_api_prefix_t),
+ ("IPv4Network", ipv4_network, ip4_addr_format_vl_api_prefix_t),
+ ("IPv6Network", ipv6_network, ip6_addr_format_vl_api_prefix_t),
+ ]
+ )
def test_format_vl_api_prefix_t(self, _, arg, expected):
res = vpp_format.format_vl_api_prefix_t(arg)
self.assertEqual(res, expected)
diff --git a/src/vpp-api/python/vpp_papi/tests/test_vpp_papi.py b/src/vpp-api/python/vpp_papi/tests/test_vpp_papi.py
index 99acb7c7469..51c024aa3ab 100644
--- a/src/vpp-api/python/vpp_papi/tests/test_vpp_papi.py
+++ b/src/vpp-api/python/vpp_papi/tests/test_vpp_papi.py
@@ -24,8 +24,7 @@ from vpp_papi import vpp_transport_shmem
class TestVppPapiVPPApiClient(unittest.TestCase):
def test_getcontext(self):
- vpp_papi.VPPApiClient.apidir = '.'
- c = vpp_papi.VPPApiClient(testmode=True, use_socket=True)
+ c = vpp_papi.VPPApiClient(apidir=".", testmode=True, use_socket=True)
# reset initialization at module load time.
c.get_context.context = mp.Value(ctypes.c_uint, 0)
@@ -39,8 +38,7 @@ class TestVppPapiVPPApiClientMp(unittest.TestCase):
# run_tests.py (eg. make test TEST_JOBS=10)
def test_get_context_mp(self):
- vpp_papi.VPPApiClient.apidir = '.'
- c = vpp_papi.VPPApiClient(testmode=True, use_socket=True)
+ c = vpp_papi.VPPApiClient(apidir=".", testmode=True, use_socket=True)
# reset initialization at module load time.
c.get_context.context = mp.Value(ctypes.c_uint, 0)
@@ -243,11 +241,11 @@ class TestVppPapiLogging(unittest.TestCase):
pass
client = Vpp
- with self.assertLogs('vpp_papi', level='DEBUG') as cm:
+ with self.assertLogs("vpp_papi", level="DEBUG") as cm:
vpp_papi.vpp_atexit(client)
- self.assertEqual(cm.output, ['DEBUG:vpp_papi:Cleaning up VPP on exit'])
+ self.assertEqual(cm.output, ["DEBUG:vpp_papi:Cleaning up VPP on exit"])
with self.assertRaises(AssertionError):
- with self.assertLogs('vpp_papi.serializer', level='DEBUG') as cm:
+ with self.assertLogs("vpp_papi.serializer", level="DEBUG") as cm:
vpp_papi.vpp_atexit(client)
self.assertEqual(cm.output, [])
diff --git a/src/vpp-api/python/vpp_papi/tests/test_vpp_serializer.py b/src/vpp-api/python/vpp_papi/tests/test_vpp_serializer.py
index c9b3d672d6a..f0d2846214a 100755
--- a/src/vpp-api/python/vpp_papi/tests/test_vpp_serializer.py
+++ b/src/vpp-api/python/vpp_papi/tests/test_vpp_serializer.py
@@ -13,61 +13,57 @@ from ipaddress import *
class TestLimits(unittest.TestCase):
def test_string(self):
- fixed_string = VPPType('fixed_string',
- [['string', 'name', 16]])
+ fixed_string = VPPType("fixed_string", [["string", "name", 16]])
- b = fixed_string.pack({'name': 'foobar'})
+ b = fixed_string.pack({"name": "foobar"})
self.assertEqual(len(b), 16)
# Ensure string is nul terminated
- self.assertEqual(b.decode('ascii')[6], '\x00')
+ self.assertEqual(b.decode("ascii")[6], "\x00")
nt, size = fixed_string.unpack(b)
self.assertEqual(size, 16)
- self.assertEqual(nt.name, 'foobar')
+ self.assertEqual(nt.name, "foobar")
# Empty string
- b = fixed_string.pack({'name': ''})
+ b = fixed_string.pack({"name": ""})
self.assertEqual(len(b), 16)
nt, size = fixed_string.unpack(b)
self.assertEqual(size, 16)
- self.assertEqual(nt.name, '')
+ self.assertEqual(nt.name, "")
# String too long
with self.assertRaises(VPPSerializerValueError):
- b = fixed_string.pack({'name': 'foobarfoobar1234'})
+ b = fixed_string.pack({"name": "foobarfoobar1234"})
- variable_string = VPPType('variable_string',
- [['string', 'name', 0]])
- b = variable_string.pack({'name': 'foobar'})
- self.assertEqual(len(b), 4 + len('foobar'))
+ variable_string = VPPType("variable_string", [["string", "name", 0]])
+ b = variable_string.pack({"name": "foobar"})
+ self.assertEqual(len(b), 4 + len("foobar"))
nt, size = variable_string.unpack(b)
- self.assertEqual(size, 4 + len('foobar'))
- self.assertEqual(nt.name, 'foobar')
- self.assertEqual(len(nt.name), len('foobar'))
+ self.assertEqual(size, 4 + len("foobar"))
+ self.assertEqual(nt.name, "foobar")
+ self.assertEqual(len(nt.name), len("foobar"))
def test_limit(self):
- limited_type = VPPType('limited_type_t',
- [['string', 'name', 0, {'limit': 16}]])
- unlimited_type = VPPType('limited_type_t',
- [['string', 'name', 0]])
+ limited_type = VPPType("limited_type_t", [["string", "name", 0, {"limit": 16}]])
+ unlimited_type = VPPType("limited_type_t", [["string", "name", 0]])
- b = limited_type.pack({'name': 'foobar'})
+ b = limited_type.pack({"name": "foobar"})
self.assertEqual(len(b), 10)
- b = unlimited_type.pack({'name': 'foobar'})
+ b = unlimited_type.pack({"name": "foobar"})
self.assertEqual(len(b), 10)
with self.assertRaises(VPPSerializerValueError):
- b = limited_type.pack({'name': 'foobar'*3})
+ b = limited_type.pack({"name": "foobar" * 3})
class TestDefaults(unittest.TestCase):
def test_defaults(self):
- default_type = VPPType('default_type_t',
- [['u16', 'mtu', {'default': 1500, 'limit': 0}]])
- without_default_type = VPPType('without_default_type_t',
- [['u16', 'mtu']])
+ default_type = VPPType(
+ "default_type_t", [["u16", "mtu", {"default": 1500, "limit": 0}]]
+ )
+ without_default_type = VPPType("without_default_type_t", [["u16", "mtu"]])
b = default_type.pack({})
self.assertEqual(len(b), 2)
@@ -76,7 +72,7 @@ class TestDefaults(unittest.TestCase):
self.assertEqual(nt.mtu, 1500)
# distinguish between parameter 0 and parameter not passed
- b = default_type.pack({'mtu': 0})
+ b = default_type.pack({"mtu": 0})
self.assertEqual(len(b), 2)
nt, size = default_type.unpack(b)
self.assertEqual(len(b), size)
@@ -90,13 +86,15 @@ class TestDefaults(unittest.TestCase):
self.assertEqual(nt.mtu, 0)
# default enum type
- VPPEnumType('vl_api_enum_t', [["ADDRESS_IP4", 0],
- ["ADDRESS_IP6", 1],
- {"enumtype": "u32"}])
+ VPPEnumType(
+ "vl_api_enum_t",
+ [["ADDRESS_IP4", 0], ["ADDRESS_IP6", 1], {"enumtype": "u32"}],
+ )
- default_with_enum = VPPType('default_enum_type_t',
- [['u16', 'mtu'], ['vl_api_enum_t',
- 'e', {'default': 1}]])
+ default_with_enum = VPPType(
+ "default_enum_type_t",
+ [["u16", "mtu"], ["vl_api_enum_t", "e", {"default": 1}]],
+ )
b = default_with_enum.pack({})
self.assertEqual(len(b), 6)
@@ -106,275 +104,275 @@ class TestDefaults(unittest.TestCase):
class TestAddType(unittest.TestCase):
-
def test_union(self):
- un = VPPUnionType('test_union',
- [['u8', 'is_bool'],
- ['u32', 'is_int']])
+ un = VPPUnionType("test_union", [["u8", "is_bool"], ["u32", "is_int"]])
- b = un.pack({'is_int': 0x12345678})
+ b = un.pack({"is_int": 0x12345678})
nt, size = un.unpack(b)
self.assertEqual(len(b), size)
self.assertEqual(nt.is_bool, 0x12)
self.assertEqual(nt.is_int, 0x12345678)
def test_address(self):
- af = VPPEnumType('vl_api_address_family_t', [["ADDRESS_IP4", 0],
- ["ADDRESS_IP6", 1],
- {"enumtype": "u32"}])
- aff = VPPEnumFlagType('vl_api_address_family_flag_t', [["ADDRESS_IP4", 0],
- ["ADDRESS_IP6", 1],
- {"enumtype": "u32"}])
- ip4 = VPPTypeAlias('vl_api_ip4_address_t', {'type': 'u8',
- 'length': 4})
- ip6 = VPPTypeAlias('vl_api_ip6_address_t', {'type': 'u8',
- 'length': 16})
- VPPUnionType('vl_api_address_union_t',
- [["vl_api_ip4_address_t", "ip4"],
- ["vl_api_ip6_address_t", "ip6"]])
-
- address = VPPType('vl_api_address_t',
- [['vl_api_address_family_t', 'af'],
- ['vl_api_address_union_t', 'un']])
-
- prefix = VPPType('vl_api_prefix_t',
- [['vl_api_address_t', 'address'],
- ['u8', 'len']])
-
- va_address_list = VPPType('list_addresses',
- [['u8', 'count'],
- ['vl_api_address_t', 'addresses',
- 0, 'count']])
-
- message_with_va_address_list = VPPType('msg_with_vla',
- [['list_addresses',
- 'vla_address'],
- ['u8', 'is_cool']])
-
- b = ip4.pack(inet_pton(AF_INET, '1.1.1.1'))
+ af = VPPEnumType(
+ "vl_api_address_family_t",
+ [["ADDRESS_IP4", 0], ["ADDRESS_IP6", 1], {"enumtype": "u32"}],
+ )
+ aff = VPPEnumFlagType(
+ "vl_api_address_family_flag_t",
+ [["ADDRESS_IP4", 0], ["ADDRESS_IP6", 1], {"enumtype": "u32"}],
+ )
+ ip4 = VPPTypeAlias("vl_api_ip4_address_t", {"type": "u8", "length": 4})
+ ip6 = VPPTypeAlias("vl_api_ip6_address_t", {"type": "u8", "length": 16})
+ VPPUnionType(
+ "vl_api_address_union_t",
+ [["vl_api_ip4_address_t", "ip4"], ["vl_api_ip6_address_t", "ip6"]],
+ )
+
+ address = VPPType(
+ "vl_api_address_t",
+ [["vl_api_address_family_t", "af"], ["vl_api_address_union_t", "un"]],
+ )
+
+ prefix = VPPType(
+ "vl_api_prefix_t", [["vl_api_address_t", "address"], ["u8", "len"]]
+ )
+
+ va_address_list = VPPType(
+ "list_addresses",
+ [["u8", "count"], ["vl_api_address_t", "addresses", 0, "count"]],
+ )
+
+ message_with_va_address_list = VPPType(
+ "msg_with_vla", [["list_addresses", "vla_address"], ["u8", "is_cool"]]
+ )
+
+ b = ip4.pack(inet_pton(AF_INET, "1.1.1.1"))
self.assertEqual(len(b), 4)
nt, size = ip4.unpack(b)
- self.assertEqual(str(nt), '1.1.1.1')
+ self.assertEqual(str(nt), "1.1.1.1")
- b = ip6.pack(inet_pton(AF_INET6, '1::1'))
+ b = ip6.pack(inet_pton(AF_INET6, "1::1"))
self.assertEqual(len(b), 16)
- b = address.pack({'af': af.ADDRESS_IP4,
- 'un':
- {'ip4': inet_pton(AF_INET, '2.2.2.2')}})
+ b = address.pack(
+ {"af": af.ADDRESS_IP4, "un": {"ip4": inet_pton(AF_INET, "2.2.2.2")}}
+ )
self.assertEqual(len(b), 20)
nt, size = address.unpack(b)
- self.assertEqual(str(nt), '2.2.2.2')
+ self.assertEqual(str(nt), "2.2.2.2")
# List of addresses
address_list = []
for i in range(4):
- address_list.append({'af': af.ADDRESS_IP4,
- 'un':
- {'ip4': inet_pton(AF_INET, '2.2.2.2')}})
- b = va_address_list.pack({'count': len(address_list),
- 'addresses': address_list})
+ address_list.append(
+ {"af": af.ADDRESS_IP4, "un": {"ip4": inet_pton(AF_INET, "2.2.2.2")}}
+ )
+ b = va_address_list.pack(
+ {"count": len(address_list), "addresses": address_list}
+ )
self.assertEqual(len(b), 81)
nt, size = va_address_list.unpack(b)
- self.assertEqual(str(nt.addresses[0]), '2.2.2.2')
-
- b = message_with_va_address_list.pack({'vla_address':
- {'count': len(address_list),
- 'addresses': address_list},
- 'is_cool': 100})
+ self.assertEqual(str(nt.addresses[0]), "2.2.2.2")
+
+ b = message_with_va_address_list.pack(
+ {
+ "vla_address": {"count": len(address_list), "addresses": address_list},
+ "is_cool": 100,
+ }
+ )
self.assertEqual(len(b), 82)
nt, size = message_with_va_address_list.unpack(b)
self.assertEqual(nt.is_cool, 100)
def test_address_with_prefix(self):
- af = VPPEnumType('vl_api_address_family_t', [["ADDRESS_IP4", 0],
- ["ADDRESS_IP6", 1],
- {"enumtype": "u32"}])
- ip4 = VPPTypeAlias('vl_api_ip4_address_t', {'type': 'u8',
- 'length': 4})
- ip6 = VPPTypeAlias('vl_api_ip6_address_t', {'type': 'u8',
- 'length': 16})
- VPPUnionType('vl_api_address_union_t',
- [["vl_api_ip4_address_t", "ip4"],
- ["vl_api_ip6_address_t", "ip6"]])
-
- address = VPPType('vl_api_address_t',
- [['vl_api_address_family_t', 'af'],
- ['vl_api_address_union_t', 'un']])
-
- prefix = VPPType('vl_api_prefix_t',
- [['vl_api_address_t', 'address'],
- ['u8', 'len']])
- prefix4 = VPPType('vl_api_ip4_prefix_t',
- [['vl_api_ip4_address_t', 'address'],
- ['u8', 'len']])
- prefix6 = VPPType('vl_api_ip6_prefix_t',
- [['vl_api_ip6_address_t', 'address'],
- ['u8', 'len']])
-
- address_with_prefix = VPPTypeAlias('vl_api_address_with_prefix_t', {'type': 'vl_api_prefix_t' })
- address4_with_prefix = VPPTypeAlias('vl_api_ip4_address_with_prefix_t',
- {'type': 'vl_api_ip4_prefix_t' })
- address6_with_prefix = VPPTypeAlias('vl_api_ip6_address_with_prefix_t',
- {'type': 'vl_api_ip6_prefix_t' })
-
- awp_type = VPPType('foobar_t',
- [['vl_api_address_with_prefix_t', 'address']])
+ af = VPPEnumType(
+ "vl_api_address_family_t",
+ [["ADDRESS_IP4", 0], ["ADDRESS_IP6", 1], {"enumtype": "u32"}],
+ )
+ ip4 = VPPTypeAlias("vl_api_ip4_address_t", {"type": "u8", "length": 4})
+ ip6 = VPPTypeAlias("vl_api_ip6_address_t", {"type": "u8", "length": 16})
+ VPPUnionType(
+ "vl_api_address_union_t",
+ [["vl_api_ip4_address_t", "ip4"], ["vl_api_ip6_address_t", "ip6"]],
+ )
+
+ address = VPPType(
+ "vl_api_address_t",
+ [["vl_api_address_family_t", "af"], ["vl_api_address_union_t", "un"]],
+ )
+
+ prefix = VPPType(
+ "vl_api_prefix_t", [["vl_api_address_t", "address"], ["u8", "len"]]
+ )
+ prefix4 = VPPType(
+ "vl_api_ip4_prefix_t", [["vl_api_ip4_address_t", "address"], ["u8", "len"]]
+ )
+ prefix6 = VPPType(
+ "vl_api_ip6_prefix_t", [["vl_api_ip6_address_t", "address"], ["u8", "len"]]
+ )
+
+ address_with_prefix = VPPTypeAlias(
+ "vl_api_address_with_prefix_t", {"type": "vl_api_prefix_t"}
+ )
+ address4_with_prefix = VPPTypeAlias(
+ "vl_api_ip4_address_with_prefix_t", {"type": "vl_api_ip4_prefix_t"}
+ )
+ address6_with_prefix = VPPTypeAlias(
+ "vl_api_ip6_address_with_prefix_t", {"type": "vl_api_ip6_prefix_t"}
+ )
+
+ awp_type = VPPType("foobar_t", [["vl_api_address_with_prefix_t", "address"]])
# address with prefix
- b = address_with_prefix.pack(IPv4Interface('2.2.2.2/24'))
+ b = address_with_prefix.pack(IPv4Interface("2.2.2.2/24"))
self.assertEqual(len(b), 21)
nt, size = address_with_prefix.unpack(b)
self.assertTrue(isinstance(nt, IPv4Interface))
- self.assertEqual(str(nt), '2.2.2.2/24')
+ self.assertEqual(str(nt), "2.2.2.2/24")
- b = address_with_prefix.pack(IPv6Interface('2::2/64'))
+ b = address_with_prefix.pack(IPv6Interface("2::2/64"))
self.assertEqual(len(b), 21)
nt, size = address_with_prefix.unpack(b)
self.assertTrue(isinstance(nt, IPv6Interface))
- self.assertEqual(str(nt), '2::2/64')
+ self.assertEqual(str(nt), "2::2/64")
- b = address_with_prefix.pack(IPv4Network('2.2.2.2/24', strict=False))
+ b = address_with_prefix.pack(IPv4Network("2.2.2.2/24", strict=False))
self.assertEqual(len(b), 21)
nt, size = address_with_prefix.unpack(b)
self.assertTrue(isinstance(nt, IPv4Interface))
- self.assertEqual(str(nt), '2.2.2.0/24')
+ self.assertEqual(str(nt), "2.2.2.0/24")
- b = address4_with_prefix.pack('2.2.2.2/24')
+ b = address4_with_prefix.pack("2.2.2.2/24")
self.assertEqual(len(b), 5)
nt, size = address4_with_prefix.unpack(b)
self.assertTrue(isinstance(nt, IPv4Interface))
- self.assertEqual(str(nt), '2.2.2.2/24')
- b = address4_with_prefix.pack(IPv4Interface('2.2.2.2/24'))
+ self.assertEqual(str(nt), "2.2.2.2/24")
+ b = address4_with_prefix.pack(IPv4Interface("2.2.2.2/24"))
self.assertEqual(len(b), 5)
- b = address6_with_prefix.pack('2::2/64')
+ b = address6_with_prefix.pack("2::2/64")
self.assertEqual(len(b), 17)
nt, size = address6_with_prefix.unpack(b)
self.assertTrue(isinstance(nt, IPv6Interface))
- self.assertEqual(str(nt), '2::2/64')
- b = address6_with_prefix.pack(IPv6Interface('2::2/64'))
+ self.assertEqual(str(nt), "2::2/64")
+ b = address6_with_prefix.pack(IPv6Interface("2::2/64"))
self.assertEqual(len(b), 17)
- b = prefix.pack('192.168.10.0/24')
+ b = prefix.pack("192.168.10.0/24")
self.assertEqual(len(b), 21)
nt, size = prefix.unpack(b)
self.assertTrue(isinstance(nt, IPv4Network))
- self.assertEqual(str(nt), '192.168.10.0/24')
+ self.assertEqual(str(nt), "192.168.10.0/24")
- b = awp_type.pack({'address': '1.2.3.4/24'})
+ b = awp_type.pack({"address": "1.2.3.4/24"})
self.assertEqual(len(b), 21)
nt, size = awp_type.unpack(b)
self.assertTrue(isinstance(nt.address, IPv4Interface))
- self.assertEqual(str(nt.address), '1.2.3.4/24')
+ self.assertEqual(str(nt.address), "1.2.3.4/24")
- b = awp_type.pack({'address': IPv4Interface('1.2.3.4/24')})
+ b = awp_type.pack({"address": IPv4Interface("1.2.3.4/24")})
self.assertEqual(len(b), 21)
nt, size = awp_type.unpack(b)
self.assertTrue(isinstance(nt.address, IPv4Interface))
- self.assertEqual(str(nt.address), '1.2.3.4/24')
+ self.assertEqual(str(nt.address), "1.2.3.4/24")
def test_recursive_address(self):
- af = VPPEnumType('vl_api_address_family_t', [["ADDRESS_IP4", 0],
- ["ADDRESS_IP6", 1],
- {"enumtype": "u32"}])
- ip4 = VPPTypeAlias('vl_api_ip4_address_t', {'type': 'u8',
- 'length': 4})
- b = ip4.pack('1.1.1.1')
+ af = VPPEnumType(
+ "vl_api_address_family_t",
+ [["ADDRESS_IP4", 0], ["ADDRESS_IP6", 1], {"enumtype": "u32"}],
+ )
+ ip4 = VPPTypeAlias("vl_api_ip4_address_t", {"type": "u8", "length": 4})
+ b = ip4.pack("1.1.1.1")
self.assertEqual(len(b), 4)
nt, size = ip4.unpack(b)
- self.assertEqual(str(nt), '1.1.1.1')
+ self.assertEqual(str(nt), "1.1.1.1")
- ip6 = VPPTypeAlias('vl_api_ip6_address_t', {'type': 'u8',
- 'length': 16})
- VPPUnionType('vl_api_address_union_t',
- [["vl_api_ip4_address_t", "ip4"],
- ["vl_api_ip6_address_t", "ip6"]])
+ ip6 = VPPTypeAlias("vl_api_ip6_address_t", {"type": "u8", "length": 16})
+ VPPUnionType(
+ "vl_api_address_union_t",
+ [["vl_api_ip4_address_t", "ip4"], ["vl_api_ip6_address_t", "ip6"]],
+ )
- address = VPPType('vl_api_address_t',
- [['vl_api_address_family_t', 'af'],
- ['vl_api_address_union_t', 'un']])
+ address = VPPType(
+ "vl_api_address_t",
+ [["vl_api_address_family_t", "af"], ["vl_api_address_union_t", "un"]],
+ )
- prefix = VPPType('vl_api_prefix_t',
- [['vl_api_address_t', 'address'],
- ['u8', 'len']])
- message = VPPMessage('svs',
- [['vl_api_prefix_t', 'prefix']])
- message_addr = VPPMessage('svs_address',
- [['vl_api_address_t', 'address']])
+ prefix = VPPType(
+ "vl_api_prefix_t", [["vl_api_address_t", "address"], ["u8", "len"]]
+ )
+ message = VPPMessage("svs", [["vl_api_prefix_t", "prefix"]])
+ message_addr = VPPMessage("svs_address", [["vl_api_address_t", "address"]])
- b = message_addr.pack({'address': "1::1"})
+ b = message_addr.pack({"address": "1::1"})
self.assertEqual(len(b), 20)
nt, size = message_addr.unpack(b)
self.assertEqual("1::1", str(nt.address))
- b = message_addr.pack({'address': "1.1.1.1"})
+ b = message_addr.pack({"address": "1.1.1.1"})
self.assertEqual(len(b), 20)
nt, size = message_addr.unpack(b)
self.assertEqual("1.1.1.1", str(nt.address))
- b = message.pack({'prefix': "1.1.1.0/24"})
+ b = message.pack({"prefix": "1.1.1.0/24"})
self.assertEqual(len(b), 21)
nt, size = message.unpack(b)
self.assertEqual("1.1.1.0/24", str(nt.prefix))
- message_array = VPPMessage('address_array',
- [['vl_api_ip6_address_t',
- 'addresses', 2]])
- b = message_array.pack({'addresses': [IPv6Address(u"1::1"), "2::2"]})
+ message_array = VPPMessage(
+ "address_array", [["vl_api_ip6_address_t", "addresses", 2]]
+ )
+ b = message_array.pack({"addresses": [IPv6Address("1::1"), "2::2"]})
self.assertEqual(len(b), 32)
- message_array_vla = VPPMessage('address_array_vla',
- [['u32', 'num'],
- ['vl_api_ip6_address_t',
- 'addresses', 0, 'num']])
- b = message_array_vla.pack({'addresses': ["1::1", "2::2"], 'num': 2})
+ message_array_vla = VPPMessage(
+ "address_array_vla",
+ [["u32", "num"], ["vl_api_ip6_address_t", "addresses", 0, "num"]],
+ )
+ b = message_array_vla.pack({"addresses": ["1::1", "2::2"], "num": 2})
self.assertEqual(len(b), 36)
- message_array4 = VPPMessage('address_array4',
- [['vl_api_ip4_address_t',
- 'addresses', 2]])
- b = message_array4.pack({'addresses': ["1.1.1.1", "2.2.2.2"]})
+ message_array4 = VPPMessage(
+ "address_array4", [["vl_api_ip4_address_t", "addresses", 2]]
+ )
+ b = message_array4.pack({"addresses": ["1.1.1.1", "2.2.2.2"]})
self.assertEqual(len(b), 8)
- b = message_array4.pack({'addresses': [IPv4Address(u"1.1.1.1"),
- "2.2.2.2"]})
+ b = message_array4.pack({"addresses": [IPv4Address("1.1.1.1"), "2.2.2.2"]})
self.assertEqual(len(b), 8)
- message = VPPMessage('address', [['vl_api_address_t', 'address']])
- b = message.pack({'address': '1::1'})
+ message = VPPMessage("address", [["vl_api_address_t", "address"]])
+ b = message.pack({"address": "1::1"})
self.assertEqual(len(b), 20)
- b = message.pack({'address': '1.1.1.1'})
+ b = message.pack({"address": "1.1.1.1"})
self.assertEqual(len(b), 20)
- message = VPPMessage('prefix', [['vl_api_prefix_t', 'prefix']])
- b = message.pack({'prefix': '1::1/130'})
+ message = VPPMessage("prefix", [["vl_api_prefix_t", "prefix"]])
+ b = message.pack({"prefix": "1::1/130"})
self.assertEqual(len(b), 21)
- b = message.pack({'prefix': IPv6Network(u'1::/119')})
+ b = message.pack({"prefix": IPv6Network("1::/119")})
self.assertEqual(len(b), 21)
- b = message.pack({'prefix': IPv4Network(u'1.1.0.0/16')})
+ b = message.pack({"prefix": IPv4Network("1.1.0.0/16")})
self.assertEqual(len(b), 21)
def test_zero_vla(self):
- '''Default zero'ed out for VLAs'''
- list = VPPType('vl_api_list_t',
- [['u8', 'count', 10]])
+ """Default zero'ed out for VLAs"""
+ list = VPPType("vl_api_list_t", [["u8", "count", 10]])
# Define an embedded VLA type
- valist = VPPType('vl_api_valist_t',
- [['u8', 'count'],
- ['u8', 'string', 0, 'count']])
+ valist = VPPType(
+ "vl_api_valist_t", [["u8", "count"], ["u8", "string", 0, "count"]]
+ )
# Define a message
- vamessage = VPPMessage('vamsg',
- [['vl_api_valist_t', 'valist'],
- ['u8', 'is_something']])
+ vamessage = VPPMessage(
+ "vamsg", [["vl_api_valist_t", "valist"], ["u8", "is_something"]]
+ )
- message = VPPMessage('msg',
- [['vl_api_list_t', 'list'],
- ['u8', 'is_something']])
+ message = VPPMessage("msg", [["vl_api_list_t", "list"], ["u8", "is_something"]])
# Pack message without VLA specified
- b = message.pack({'is_something': 1})
- b = vamessage.pack({'is_something': 1})
+ b = message.pack({"is_something": 1})
+ b = vamessage.pack({"is_something": 1})
def test_arrays(self):
# Test cases
@@ -382,254 +380,299 @@ class TestAddType(unittest.TestCase):
# 2. Fixed list of variable length sub type
# 3. Variable length type
#
- s = VPPType('str', [['u32', 'length'],
- ['u8', 'string', 0, 'length']])
+ s = VPPType("str", [["u32", "length"], ["u8", "string", 0, "length"]])
- ip4 = VPPType('ip4_address', [['u8', 'address', 4]])
- listip4 = VPPType('list_ip4_t', [['ip4_address', 'addresses', 4]])
- valistip4 = VPPType('list_ip4_t',
- [['u8', 'count'],
- ['ip4_address', 'addresses', 0, 'count']])
+ ip4 = VPPType("ip4_address", [["u8", "address", 4]])
+ listip4 = VPPType("list_ip4_t", [["ip4_address", "addresses", 4]])
+ valistip4 = VPPType(
+ "list_ip4_t", [["u8", "count"], ["ip4_address", "addresses", 0, "count"]]
+ )
- valistip4_legacy = VPPType('list_ip4_t',
- [['u8', 'foo'],
- ['ip4_address', 'addresses', 0]])
+ valistip4_legacy = VPPType(
+ "list_ip4_t", [["u8", "foo"], ["ip4_address", "addresses", 0]]
+ )
addresses = []
for i in range(4):
- addresses.append({'address': inet_pton(AF_INET, '2.2.2.2')})
- b = listip4.pack({'addresses': addresses})
+ addresses.append({"address": inet_pton(AF_INET, "2.2.2.2")})
+ b = listip4.pack({"addresses": addresses})
self.assertEqual(len(b), 16)
nt, size = listip4.unpack(b)
- self.assertEqual(nt.addresses[0].address,
- inet_pton(AF_INET, '2.2.2.2'))
+ self.assertEqual(nt.addresses[0].address, inet_pton(AF_INET, "2.2.2.2"))
- b = valistip4.pack({'count': len(addresses), 'addresses': addresses})
+ b = valistip4.pack({"count": len(addresses), "addresses": addresses})
self.assertEqual(len(b), 17)
nt, size = valistip4.unpack(b)
self.assertEqual(nt.count, 4)
- self.assertEqual(nt.addresses[0].address,
- inet_pton(AF_INET, '2.2.2.2'))
+ self.assertEqual(nt.addresses[0].address, inet_pton(AF_INET, "2.2.2.2"))
- b = valistip4_legacy.pack({'foo': 1, 'addresses': addresses})
+ b = valistip4_legacy.pack({"foo": 1, "addresses": addresses})
self.assertEqual(len(b), 17)
nt, size = valistip4_legacy.unpack(b)
self.assertEqual(len(nt.addresses), 4)
- self.assertEqual(nt.addresses[0].address,
- inet_pton(AF_INET, '2.2.2.2'))
+ self.assertEqual(nt.addresses[0].address, inet_pton(AF_INET, "2.2.2.2"))
- string = 'foobar foobar'
- b = s.pack({'length': len(string), 'string': string.encode('utf-8')})
+ string = "foobar foobar"
+ b = s.pack({"length": len(string), "string": string.encode("utf-8")})
nt, size = s.unpack(b)
self.assertEqual(len(b), size)
def test_string(self):
- s = VPPType('str', [['u32', 'length'],
- ['u8', 'string', 0, 'length']])
+ s = VPPType("str", [["u32", "length"], ["u8", "string", 0, "length"]])
- string = ''
- b = s.pack({'length': len(string), 'string': string.encode('utf-8')})
+ string = ""
+ b = s.pack({"length": len(string), "string": string.encode("utf-8")})
nt, size = s.unpack(b)
self.assertEqual(len(b), size)
+ # Try same with VLA u8
+ byte_array = [b"\0"] * (10)
+ vla_u8 = VPPType("vla_u8", [["u8", "length"], ["u8", "data", 0, "length"]])
+ b = vla_u8.pack({"length": len(byte_array), "data": byte_array})
+ nt, size = vla_u8.unpack(b)
+
+ # VLA Array of fixed length strings
+ fixed_string = VPPType("fixed_string", [["string", "data", 32]])
+ s = VPPType(
+ "string_vla", [["u32", "length"], ["fixed_string", "services", 0, "length"]]
+ )
+
+ string_list = [{"data": "foobar1"}, {"data": "foobar2"}]
+ b = s.pack({"length": 2, "services": string_list})
+ nt, size = s.unpack(b)
+
+ # Try same with u8
+ fixed_u8 = VPPType("fixed_u8", [["u8", "data", 32]])
+ s = VPPType(
+ "u8_vla", [["u32", "length"], ["fixed_string", "services", 0, "length"]]
+ )
+
+ u8_list = [{"data": "foobar1"}, {"data": "foobar2"}]
+ b = s.pack({"length": 2, "services": u8_list})
+ nt, size = s.unpack(b)
+
def test_message(self):
- foo = VPPMessage('foo', [['u16', '_vl_msg_id'],
- ['u8', 'client_index'],
- ['u8', 'something'],
- {"crc": "0x559b9f3c"}])
- b = foo.pack({'_vl_msg_id': 1, 'client_index': 5,
- 'something': 200})
+ foo = VPPMessage(
+ "foo",
+ [
+ ["u16", "_vl_msg_id"],
+ ["u8", "client_index"],
+ ["u8", "something"],
+ {"crc": "0x559b9f3c"},
+ ],
+ )
+ b = foo.pack({"_vl_msg_id": 1, "client_index": 5, "something": 200})
nt, size = foo.unpack(b)
self.assertEqual(len(b), size)
self.assertEqual(nt.something, 200)
def test_abf(self):
+ fib_mpls_label = VPPType(
+ "vl_api_fib_mpls_label_t",
+ [["u8", "is_uniform"], ["u32", "label"], ["u8", "ttl"], ["u8", "exp"]],
+ )
- fib_mpls_label = VPPType('vl_api_fib_mpls_label_t',
- [['u8', 'is_uniform'],
- ['u32', 'label'],
- ['u8', 'ttl'],
- ['u8', 'exp']])
-
- label_stack = {'is_uniform': 0,
- 'label': 0,
- 'ttl': 0,
- 'exp': 0}
+ label_stack = {"is_uniform": 0, "label": 0, "ttl": 0, "exp": 0}
b = fib_mpls_label.pack(label_stack)
self.assertEqual(len(b), 7)
- fib_path = VPPType('vl_api_fib_path_t',
- [['u32', 'sw_if_index'],
- ['u32', 'table_id'],
- ['u8', 'weight'],
- ['u8', 'preference'],
- ['u8', 'is_local'],
- ['u8', 'is_drop'],
- ['u8', 'is_udp_encap'],
- ['u8', 'is_unreach'],
- ['u8', 'is_prohibit'],
- ['u8', 'is_resolve_host'],
- ['u8', 'is_resolve_attached'],
- ['u8', 'is_dvr'],
- ['u8', 'is_source_lookup'],
- ['u8', 'afi'],
- ['u8', 'next_hop', 16],
- ['u32', 'next_hop_id'],
- ['u32', 'rpf_id'],
- ['u32', 'via_label'],
- ['u8', 'n_labels'],
- ['vl_api_fib_mpls_label_t', 'label_stack', 16]])
+ fib_path = VPPType(
+ "vl_api_fib_path_t",
+ [
+ ["u32", "sw_if_index"],
+ ["u32", "table_id"],
+ ["u8", "weight"],
+ ["u8", "preference"],
+ ["u8", "is_local"],
+ ["u8", "is_drop"],
+ ["u8", "is_udp_encap"],
+ ["u8", "is_unreach"],
+ ["u8", "is_prohibit"],
+ ["u8", "is_resolve_host"],
+ ["u8", "is_resolve_attached"],
+ ["u8", "is_dvr"],
+ ["u8", "is_source_lookup"],
+ ["u8", "afi"],
+ ["u8", "next_hop", 16],
+ ["u32", "next_hop_id"],
+ ["u32", "rpf_id"],
+ ["u32", "via_label"],
+ ["u8", "n_labels"],
+ ["vl_api_fib_mpls_label_t", "label_stack", 16],
+ ],
+ )
label_stack_list = []
for i in range(16):
label_stack_list.append(label_stack)
- paths = {'is_udp_encap': 0,
- 'next_hop': b'\x10\x02\x02\xac',
- 'table_id': 0,
- 'afi': 0,
- 'weight': 1,
- 'next_hop_id': 4294967295,
- 'label_stack': label_stack_list,
- 'n_labels': 0,
- 'sw_if_index': 4294967295,
- 'preference': 0}
+ paths = {
+ "is_udp_encap": 0,
+ "next_hop": b"\x10\x02\x02\xac",
+ "table_id": 0,
+ "afi": 0,
+ "weight": 1,
+ "next_hop_id": 4294967295,
+ "label_stack": label_stack_list,
+ "n_labels": 0,
+ "sw_if_index": 4294967295,
+ "preference": 0,
+ }
b = fib_path.pack(paths)
- self.assertEqual(len(b), (7*16) + 49)
+ self.assertEqual(len(b), (7 * 16) + 49)
- abf_policy = VPPType('vl_api_abf_policy_t',
- [['u32', 'policy_id'],
- ['u32', 'acl_index'],
- ['u8', 'n_paths'],
- ['vl_api_fib_path_t', 'paths', 0, 'n_paths']])
+ abf_policy = VPPType(
+ "vl_api_abf_policy_t",
+ [
+ ["u32", "policy_id"],
+ ["u32", "acl_index"],
+ ["u8", "n_paths"],
+ ["vl_api_fib_path_t", "paths", 0, "n_paths"],
+ ],
+ )
- policy = {
- 'n_paths': 1,
- 'paths': [paths],
- 'acl_index': 0,
- 'policy_id': 10}
+ policy = {"n_paths": 1, "paths": [paths], "acl_index": 0, "policy_id": 10}
b = abf_policy.pack(policy)
- self.assertEqual(len(b), (7*16) + 49 + 9)
-
- abf_policy_add_del = VPPMessage('abf_policy_add_del',
- [['u16', '_vl_msg_id'],
- ['u32', 'client_index'],
- ['u32', 'context'],
- ['u8', 'is_add'],
- ['vl_api_abf_policy_t', 'policy']])
-
- b = abf_policy_add_del.pack({'is_add': 1,
- 'context': 66,
- '_vl_msg_id': 1066,
- 'policy': policy})
+ self.assertEqual(len(b), (7 * 16) + 49 + 9)
+
+ abf_policy_add_del = VPPMessage(
+ "abf_policy_add_del",
+ [
+ ["u16", "_vl_msg_id"],
+ ["u32", "client_index"],
+ ["u32", "context"],
+ ["u8", "is_add"],
+ ["vl_api_abf_policy_t", "policy"],
+ ],
+ )
+
+ b = abf_policy_add_del.pack(
+ {"is_add": 1, "context": 66, "_vl_msg_id": 1066, "policy": policy}
+ )
nt, size = abf_policy_add_del.unpack(b)
- self.assertEqual(nt.policy.paths[0].next_hop,
- b'\x10\x02\x02\xac\x00\x00\x00\x00'
- b'\x00\x00\x00\x00\x00\x00\x00\x00')
+ self.assertEqual(
+ nt.policy.paths[0].next_hop,
+ b"\x10\x02\x02\xac\x00\x00\x00\x00" b"\x00\x00\x00\x00\x00\x00\x00\x00",
+ )
def test_bier(self):
-
- bier_table_id = VPPType('vl_api_bier_table_id_t',
- [['u8', 'bt_set'],
- ['u8', 'bt_sub_domain'],
- ['u8', 'bt_hdr_len_id']])
-
- bier_imp_add = VPPMessage('bier_imp_add',
- [['u32', 'client_index'],
- ['u32', 'context'],
- ['vl_api_bier_table_id_t', 'bi_tbl_id'],
- ['u16', 'bi_src'],
- ['u8', 'bi_n_bytes'],
- ['u8', 'bi_bytes', 0, 'bi_n_bytes']])
-
- table_id = {'bt_set': 0,
- 'bt_sub_domain': 0,
- 'bt_hdr_len_id': 0}
-
- bibytes = b'foobar'
-
- b = bier_imp_add.pack({'bi_tbl_id': table_id,
- 'bi_n_bytes': len(bibytes),
- 'bi_bytes': bibytes})
+ bier_table_id = VPPType(
+ "vl_api_bier_table_id_t",
+ [["u8", "bt_set"], ["u8", "bt_sub_domain"], ["u8", "bt_hdr_len_id"]],
+ )
+
+ bier_imp_add = VPPMessage(
+ "bier_imp_add",
+ [
+ ["u32", "client_index"],
+ ["u32", "context"],
+ ["vl_api_bier_table_id_t", "bi_tbl_id"],
+ ["u16", "bi_src"],
+ ["u8", "bi_n_bytes"],
+ ["u8", "bi_bytes", 0, "bi_n_bytes"],
+ ],
+ )
+
+ table_id = {"bt_set": 0, "bt_sub_domain": 0, "bt_hdr_len_id": 0}
+
+ bibytes = b"foobar"
+
+ b = bier_imp_add.pack(
+ {"bi_tbl_id": table_id, "bi_n_bytes": len(bibytes), "bi_bytes": bibytes}
+ )
self.assertEqual(len(b), 20)
def test_lisp(self):
- VPPEnumType('vl_api_eid_type_t',
- [["EID_TYPE_API_PREFIX", 0],
- ["EID_TYPE_API_MAC", 1],
- ["EID_TYPE_API_NSH", 2],
- {"enumtype": "u32"}])
-
- VPPTypeAlias('vl_api_mac_address_t', {'type': 'u8',
- 'length': 6})
-
- VPPType('vl_api_nsh_t',
- [["u32", "spi"],
- ["u8", "si"]])
-
- VPPEnumType('vl_api_address_family_t', [["ADDRESS_IP4", 0],
- ["ADDRESS_IP6", 1],
- {"enumtype": "u32"}])
- VPPTypeAlias('vl_api_ip4_address_t', {'type': 'u8',
- 'length': 4})
- VPPTypeAlias('vl_api_ip6_address_t', {'type': 'u8',
- 'length': 16})
- VPPUnionType('vl_api_address_union_t',
- [["vl_api_ip4_address_t", "ip4"],
- ["vl_api_ip6_address_t", "ip6"]])
-
- VPPType('vl_api_address_t',
- [['vl_api_address_family_t', 'af'],
- ['vl_api_address_union_t', 'un']])
-
- VPPType('vl_api_prefix_t',
- [['vl_api_address_t', 'address'],
- ['u8', 'len']])
-
- VPPUnionType('vl_api_eid_address_t',
- [["vl_api_prefix_t", "prefix"],
- ["vl_api_mac_address_t", "mac"],
- ["vl_api_nsh_t", "nsh"]])
-
- eid = VPPType('vl_api_eid_t',
- [["vl_api_eid_type_t", "type"],
- ["vl_api_eid_address_t", "address"]])
-
- b = eid.pack({'type':1,
- 'address': {
- 'mac': MACAddress('aa:bb:cc:dd:ee:ff')}})
+ VPPEnumType(
+ "vl_api_eid_type_t",
+ [
+ ["EID_TYPE_API_PREFIX", 0],
+ ["EID_TYPE_API_MAC", 1],
+ ["EID_TYPE_API_NSH", 2],
+ {"enumtype": "u32"},
+ ],
+ )
+
+ VPPTypeAlias("vl_api_mac_address_t", {"type": "u8", "length": 6})
+
+ VPPType("vl_api_nsh_t", [["u32", "spi"], ["u8", "si"]])
+
+ VPPEnumType(
+ "vl_api_address_family_t",
+ [["ADDRESS_IP4", 0], ["ADDRESS_IP6", 1], {"enumtype": "u32"}],
+ )
+ VPPTypeAlias("vl_api_ip4_address_t", {"type": "u8", "length": 4})
+ VPPTypeAlias("vl_api_ip6_address_t", {"type": "u8", "length": 16})
+ VPPUnionType(
+ "vl_api_address_union_t",
+ [["vl_api_ip4_address_t", "ip4"], ["vl_api_ip6_address_t", "ip6"]],
+ )
+
+ VPPType(
+ "vl_api_address_t",
+ [["vl_api_address_family_t", "af"], ["vl_api_address_union_t", "un"]],
+ )
+
+ VPPType("vl_api_prefix_t", [["vl_api_address_t", "address"], ["u8", "len"]])
+
+ VPPUnionType(
+ "vl_api_eid_address_t",
+ [
+ ["vl_api_prefix_t", "prefix"],
+ ["vl_api_mac_address_t", "mac"],
+ ["vl_api_nsh_t", "nsh"],
+ ],
+ )
+
+ eid = VPPType(
+ "vl_api_eid_t",
+ [["vl_api_eid_type_t", "type"], ["vl_api_eid_address_t", "address"]],
+ )
+
+ b = eid.pack({"type": 1, "address": {"mac": MACAddress("aa:bb:cc:dd:ee:ff")}})
self.assertEqual(len(b), 25)
nt, size = eid.unpack(b)
- self.assertEqual(str(nt.address.mac), 'aa:bb:cc:dd:ee:ff')
+ self.assertEqual(str(nt.address.mac), "aa:bb:cc:dd:ee:ff")
self.assertIsNone(nt.address.prefix)
class TestVppSerializerLogging(unittest.TestCase):
-
def test_logger(self):
# test logger name 'vpp_papi.serializer'
with self.assertRaises(VPPSerializerValueError) as ctx:
- with self.assertLogs('vpp_papi.serializer', level='DEBUG') as cm:
- u = VPPUnionType('vl_api_eid_address_t',
- [["vl_api_prefix_t", "prefix"],
- ["vl_api_mac_address_t", "mac"],
- ["vl_api_nsh_t", "nsh"]])
- self.assertEqual(cm.output, ["DEBUG:vpp_papi.serializer:Unknown union type vl_api_prefix_t"])
+ with self.assertLogs("vpp_papi.serializer", level="DEBUG") as cm:
+ u = VPPUnionType(
+ "vl_api_eid_address_t",
+ [
+ ["vl_api_prefix_t", "prefix"],
+ ["vl_api_mac_address_t", "mac"],
+ ["vl_api_nsh_t", "nsh"],
+ ],
+ )
+ self.assertEqual(
+ cm.output, ["DEBUG:vpp_papi.serializer:Unknown union type vl_api_prefix_t"]
+ )
# test parent logger name 'vpp_papi'
with self.assertRaises(VPPSerializerValueError) as ctx:
- with self.assertLogs('vpp_papi', level='DEBUG') as cm:
- u = VPPUnionType('vl_api_eid_address_t',
- [["vl_api_prefix_t", "prefix"],
- ["vl_api_mac_address_t", "mac"],
- ["vl_api_nsh_t", "nsh"]])
- self.assertEqual(cm.output, ["DEBUG:vpp_papi.serializer:Unknown union type vl_api_prefix_t"])
-
-
-if __name__ == '__main__':
+ with self.assertLogs("vpp_papi", level="DEBUG") as cm:
+ u = VPPUnionType(
+ "vl_api_eid_address_t",
+ [
+ ["vl_api_prefix_t", "prefix"],
+ ["vl_api_mac_address_t", "mac"],
+ ["vl_api_nsh_t", "nsh"],
+ ],
+ )
+ self.assertEqual(
+ cm.output, ["DEBUG:vpp_papi.serializer:Unknown union type vl_api_prefix_t"]
+ )
+
+
+if __name__ == "__main__":
unittest.main()
diff --git a/src/vpp-api/python/vpp_papi/vpp_format.py b/src/vpp-api/python/vpp_papi/vpp_format.py
index 0b85eb4fcb6..f80a781c753 100644
--- a/src/vpp-api/python/vpp_papi/vpp_format.py
+++ b/src/vpp-api/python/vpp_papi/vpp_format.py
@@ -25,8 +25,8 @@ ADDRESS_IP6 = 1
def verify_enum_hint(e):
- return (e.ADDRESS_IP4.value == ADDRESS_IP4) and\
- (e.ADDRESS_IP6.value == ADDRESS_IP6)
+ return (e.ADDRESS_IP4.value == ADDRESS_IP4) and (e.ADDRESS_IP6.value == ADDRESS_IP6)
+
#
# Type conversion for input arguments and return values
@@ -35,146 +35,128 @@ def verify_enum_hint(e):
def format_vl_api_address_t(args):
try:
- return {'un': {'ip6': inet_pton(AF_INET6, args)},
- 'af': ADDRESS_IP6}
+ return {"un": {"ip6": inet_pton(AF_INET6, args)}, "af": ADDRESS_IP6}
# PY2: raises socket.error
# PY3: raises OSError
except (socket.error, OSError):
- return {'un': {'ip4': inet_pton(AF_INET, args)},
- 'af': ADDRESS_IP4}
+ return {"un": {"ip4": inet_pton(AF_INET, args)}, "af": ADDRESS_IP4}
def format_vl_api_prefix_t(args):
if isinstance(args, (ipaddress.IPv4Network, ipaddress.IPv6Network)):
- return {'address': format_vl_api_address_t(
- str(args.network_address)),
- 'len': int(args.prefixlen)}
- p, length = args.split('/')
- return {'address': format_vl_api_address_t(p),
- 'len': int(length)}
+ return {
+ "address": format_vl_api_address_t(str(args.network_address)),
+ "len": int(args.prefixlen),
+ }
+ p, length = args.split("/")
+ return {"address": format_vl_api_address_t(p), "len": int(length)}
def format_vl_api_address_with_prefix_t(args):
if isinstance(args, (ipaddress.IPv4Interface, ipaddress.IPv6Interface)):
- return {'address': format_vl_api_address_t(
- str(args.network_address)),
- 'len': int(args.prefixlen)}
- p, length = args.split('/')
- return {'address': format_vl_api_address_t(p),
- 'len': int(length)}
+ return {
+ "address": format_vl_api_address_t(str(args.network_address)),
+ "len": int(args.prefixlen),
+ }
+ p, length = args.split("/")
+ return {"address": format_vl_api_address_t(p), "len": int(length)}
def format_vl_api_ip6_prefix_t(args):
if isinstance(args, ipaddress.IPv6Network):
- return {'address': args.network_address.packed,
- 'len': int(args.prefixlen)}
- p, length = args.split('/')
- return {'address': inet_pton(AF_INET6, p),
- 'len': int(length)}
+ return {"address": args.network_address.packed, "len": int(args.prefixlen)}
+ p, length = args.split("/")
+ return {"address": inet_pton(AF_INET6, p), "len": int(length)}
def format_vl_api_ip6_address_with_prefix_t(args):
if isinstance(args, ipaddress.IPv6Interface):
- return {'address': args.network_address.packed,
- 'len': int(args.prefixlen)}
- p, length = args.split('/')
- return {'address': inet_pton(AF_INET6, p),
- 'len': int(length)}
+ return {"address": args.network_address.packed, "len": int(args.prefixlen)}
+ p, length = args.split("/")
+ return {"address": inet_pton(AF_INET6, p), "len": int(length)}
def format_vl_api_ip4_prefix_t(args):
if isinstance(args, ipaddress.IPv4Network):
- return {'address': args.network_address.packed,
- 'len': int(args.prefixlen)}
- p, length = args.split('/')
- return {'address': inet_pton(AF_INET, p),
- 'len': int(length)}
+ return {"address": args.network_address.packed, "len": int(args.prefixlen)}
+ p, length = args.split("/")
+ return {"address": inet_pton(AF_INET, p), "len": int(length)}
def format_vl_api_ip4_address_with_prefix_t(args):
if isinstance(args, ipaddress.IPv4Interface):
- return {'address': args.network_address.packed,
- 'len': int(args.prefixlen)}
- p, length = args.split('/')
- return {'address': inet_pton(AF_INET, p),
- 'len': int(length)}
+ return {"address": args.network_address.packed, "len": int(args.prefixlen)}
+ p, length = args.split("/")
+ return {"address": inet_pton(AF_INET, p), "len": int(length)}
conversion_table = {
- 'vl_api_ip6_address_t':
- {
- 'IPv6Address': lambda o: o.packed,
- 'str': lambda s: inet_pton(AF_INET6, s)
+ "vl_api_ip6_address_t": {
+ "IPv6Address": lambda o: o.packed,
+ "str": lambda s: inet_pton(AF_INET6, s),
+ },
+ "vl_api_ip4_address_t": {
+ "IPv4Address": lambda o: o.packed,
+ "str": lambda s: inet_pton(AF_INET, s),
},
- 'vl_api_ip4_address_t':
- {
- 'IPv4Address': lambda o: o.packed,
- 'str': lambda s: inet_pton(AF_INET, s)
+ "vl_api_ip6_prefix_t": {
+ "IPv6Network": lambda o: {
+ "address": o.network_address.packed,
+ "len": o.prefixlen,
+ },
+ "str": lambda s: format_vl_api_ip6_prefix_t(s),
},
- 'vl_api_ip6_prefix_t':
- {
- 'IPv6Network': lambda o: {'address': o.network_address.packed,
- 'len': o.prefixlen},
- 'str': lambda s: format_vl_api_ip6_prefix_t(s)
+ "vl_api_ip4_prefix_t": {
+ "IPv4Network": lambda o: {
+ "address": o.network_address.packed,
+ "len": o.prefixlen,
+ },
+ "str": lambda s: format_vl_api_ip4_prefix_t(s),
},
- 'vl_api_ip4_prefix_t':
- {
- 'IPv4Network': lambda o: {'address': o.network_address.packed,
- 'len': o.prefixlen},
- 'str': lambda s: format_vl_api_ip4_prefix_t(s)
+ "vl_api_address_t": {
+ "IPv4Address": lambda o: {"af": ADDRESS_IP4, "un": {"ip4": o.packed}},
+ "IPv6Address": lambda o: {"af": ADDRESS_IP6, "un": {"ip6": o.packed}},
+ "str": lambda s: format_vl_api_address_t(s),
},
- 'vl_api_address_t':
- {
- 'IPv4Address': lambda o: {'af': ADDRESS_IP4, 'un': {'ip4': o.packed}},
- 'IPv6Address': lambda o: {'af': ADDRESS_IP6, 'un': {'ip6': o.packed}},
- 'str': lambda s: format_vl_api_address_t(s)
+ "vl_api_prefix_t": {
+ "IPv4Network": lambda o: {
+ "address": {"af": ADDRESS_IP4, "un": {"ip4": o.network_address.packed}},
+ "len": o.prefixlen,
+ },
+ "IPv6Network": lambda o: {
+ "address": {"af": ADDRESS_IP6, "un": {"ip6": o.network_address.packed}},
+ "len": o.prefixlen,
+ },
+ "str": lambda s: format_vl_api_prefix_t(s),
},
- 'vl_api_prefix_t':
- {
- 'IPv4Network': lambda o: {'address':
- {'af': ADDRESS_IP4, 'un':
- {'ip4': o.network_address.packed}},
- 'len': o.prefixlen},
- 'IPv6Network': lambda o: {'address':
- {'af': ADDRESS_IP6, 'un':
- {'ip6': o.network_address.packed}},
- 'len': o.prefixlen},
- 'str': lambda s: format_vl_api_prefix_t(s)
+ "vl_api_address_with_prefix_t": {
+ "IPv4Interface": lambda o: {
+ "address": {"af": ADDRESS_IP4, "un": {"ip4": o.packed}},
+ "len": o.network.prefixlen,
+ },
+ "IPv6Interface": lambda o: {
+ "address": {"af": ADDRESS_IP6, "un": {"ip6": o.packed}},
+ "len": o.network.prefixlen,
+ },
+ "str": lambda s: format_vl_api_address_with_prefix_t(s),
},
- 'vl_api_address_with_prefix_t':
- {
- 'IPv4Interface': lambda o: {'address':
- {'af': ADDRESS_IP4, 'un':
- {'ip4': o.packed}},
- 'len': o.network.prefixlen},
- 'IPv6Interface': lambda o: {'address':
- {'af': ADDRESS_IP6, 'un':
- {'ip6': o.packed}},
- 'len': o.network.prefixlen},
- 'str': lambda s: format_vl_api_address_with_prefix_t(s)
+ "vl_api_ip4_address_with_prefix_t": {
+ "IPv4Interface": lambda o: {"address": o.packed, "len": o.network.prefixlen},
+ "str": lambda s: format_vl_api_ip4_address_with_prefix_t(s),
},
- 'vl_api_ip4_address_with_prefix_t':
- {
- 'IPv4Interface': lambda o: {'address': o.packed,
- 'len': o.network.prefixlen},
- 'str': lambda s: format_vl_api_ip4_address_with_prefix_t(s)
+ "vl_api_ip6_address_with_prefix_t": {
+ "IPv6Interface": lambda o: {"address": o.packed, "len": o.network.prefixlen},
+ "str": lambda s: format_vl_api_ip6_address_with_prefix_t(s),
},
- 'vl_api_ip6_address_with_prefix_t':
- {
- 'IPv6Interface': lambda o: {'address': o.packed,
- 'len': o.network.prefixlen},
- 'str': lambda s: format_vl_api_ip6_address_with_prefix_t(s)
+ "vl_api_mac_address_t": {
+ "MACAddress": lambda o: o.packed,
+ "str": lambda s: macaddress.mac_pton(s),
},
- 'vl_api_mac_address_t':
- {
- 'MACAddress': lambda o: o.packed,
- 'str': lambda s: macaddress.mac_pton(s)
+ "vl_api_timestamp_t": {
+ "datetime.datetime": lambda o: (
+ o - datetime.datetime(1970, 1, 1)
+ ).total_seconds()
},
- 'vl_api_timestamp_t':
- {
- 'datetime.datetime': lambda o:
- (o - datetime.datetime(1970, 1, 1)).total_seconds()
- }
}
@@ -197,7 +179,7 @@ def unformat_api_prefix_t(o):
return ipaddress.IPv4Network((o.address, o.len), False)
if isinstance(o.address, ipaddress.IPv6Address):
return ipaddress.IPv6Network((o.address, o.len), False)
- raise ValueError('Unknown instance {}', format(o))
+ raise ValueError("Unknown instance {}", format(o))
def unformat_api_address_with_prefix_t(o):
@@ -217,16 +199,20 @@ def unformat_api_ip6_address_with_prefix_t(o):
conversion_unpacker_table = {
- 'vl_api_ip6_address_t': lambda o: ipaddress.IPv6Address(o),
- 'vl_api_ip6_prefix_t': lambda o: ipaddress.IPv6Network((o.address, o.len)),
- 'vl_api_ip4_address_t': lambda o: ipaddress.IPv4Address(o),
- 'vl_api_ip4_prefix_t': lambda o: ipaddress.IPv4Network((o.address, o.len)),
- 'vl_api_address_t': lambda o: unformat_api_address_t(o),
- 'vl_api_prefix_t': lambda o: unformat_api_prefix_t(o),
- 'vl_api_address_with_prefix_t': lambda o: unformat_api_address_with_prefix_t(o),
- 'vl_api_ip4_address_with_prefix_t': lambda o: unformat_api_ip4_address_with_prefix_t(o),
- 'vl_api_ip6_address_with_prefix_t': lambda o: unformat_api_ip6_address_with_prefix_t(o),
- 'vl_api_mac_address_t': lambda o: macaddress.MACAddress(o),
- 'vl_api_timestamp_t': lambda o: datetime.datetime.fromtimestamp(o),
- 'vl_api_timedelta_t': lambda o: datetime.timedelta(seconds=o),
+ "vl_api_ip6_address_t": lambda o: ipaddress.IPv6Address(o),
+ "vl_api_ip6_prefix_t": lambda o: ipaddress.IPv6Network((o.address, o.len)),
+ "vl_api_ip4_address_t": lambda o: ipaddress.IPv4Address(o),
+ "vl_api_ip4_prefix_t": lambda o: ipaddress.IPv4Network((o.address, o.len)),
+ "vl_api_address_t": lambda o: unformat_api_address_t(o),
+ "vl_api_prefix_t": lambda o: unformat_api_prefix_t(o),
+ "vl_api_address_with_prefix_t": lambda o: unformat_api_address_with_prefix_t(o),
+ "vl_api_ip4_address_with_prefix_t": lambda o: unformat_api_ip4_address_with_prefix_t(
+ o
+ ),
+ "vl_api_ip6_address_with_prefix_t": lambda o: unformat_api_ip6_address_with_prefix_t(
+ o
+ ),
+ "vl_api_mac_address_t": lambda o: macaddress.MACAddress(o),
+ "vl_api_timestamp_t": lambda o: datetime.datetime.fromtimestamp(o),
+ "vl_api_timedelta_t": lambda o: datetime.timedelta(seconds=o),
}
diff --git a/src/vpp-api/python/vpp_papi/vpp_papi.py b/src/vpp-api/python/vpp_papi/vpp_papi.py
index 3465f503e9e..30c00cd8dd3 100644
--- a/src/vpp-api/python/vpp_papi/vpp_papi.py
+++ b/src/vpp-api/python/vpp_papi/vpp_papi.py
@@ -18,7 +18,6 @@ from __future__ import print_function
from __future__ import absolute_import
import ctypes
import ipaddress
-import sys
import multiprocessing as mp
import os
import queue
@@ -30,13 +29,15 @@ import fnmatch
import weakref
import atexit
import time
-from . vpp_format import verify_enum_hint
-from . vpp_serializer import VPPType, VPPEnumType, VPPEnumFlagType, VPPUnionType
-from . vpp_serializer import VPPMessage, vpp_get_type, VPPTypeAlias
+import pkg_resources
+from .vpp_format import verify_enum_hint
+from .vpp_serializer import VPPType, VPPEnumType, VPPEnumFlagType, VPPUnionType
+from .vpp_serializer import VPPMessage, vpp_get_type, VPPTypeAlias
try:
import VppTransport
except ModuleNotFoundError:
+
class V:
"""placeholder for VppTransport as the implementation is dependent on
VPPAPIClient's initialization values
@@ -44,15 +45,22 @@ except ModuleNotFoundError:
VppTransport = V
-from . vpp_transport_socket import VppTransport
+from .vpp_transport_socket import VppTransport
-logger = logging.getLogger('vpp_papi')
+logger = logging.getLogger("vpp_papi")
logger.addHandler(logging.NullHandler())
-__all__ = ('FuncWrapper', 'VppApiDynamicMethodHolder',
- 'VppEnum', 'VppEnumType', 'VppEnumFlag',
- 'VPPIOError', 'VPPRuntimeError', 'VPPValueError',
- 'VPPApiClient', )
+__all__ = (
+ "FuncWrapper",
+ "VppApiDynamicMethodHolder",
+ "VppEnum",
+ "VppEnumType",
+ "VppEnumFlag",
+ "VPPIOError",
+ "VPPRuntimeError",
+ "VPPValueError",
+ "VPPApiClient",
+)
def metaclass(metaclass):
@@ -83,7 +91,7 @@ def vpp_atexit(vpp_weakref):
"""Clean up VPP connection on shutdown."""
vpp_instance = vpp_weakref()
if vpp_instance and vpp_instance.transport.connected:
- logger.debug('Cleaning up VPP on exit')
+ logger.debug("Cleaning up VPP on exit")
vpp_instance.disconnect()
@@ -98,9 +106,9 @@ def add_convenience_methods():
def _vapi_af_name(self):
if 6 == self._version:
- return 'ip6'
+ return "ip6"
if 4 == self._version:
- return 'ip4'
+ return "ip4"
raise ValueError("Invalid _version.")
ipaddress._IPAddressBase.vapi_af = property(_vapi_af)
@@ -121,7 +129,7 @@ class FuncWrapper:
return self._func(**kwargs)
def __repr__(self):
- return '<FuncWrapper(func=<%s(%s)>)>' % (self.__name__, self.__doc__)
+ return "<FuncWrapper(func=<%s(%s)>)>" % (self.__name__, self.__doc__)
class VPPApiError(Exception):
@@ -146,7 +154,7 @@ class VPPValueError(ValueError):
class VPPApiJSONFiles:
@classmethod
- def find_api_dir(cls, dirs):
+ def find_api_dir(cls, dirs=[]):
"""Attempt to find the best directory in which API definition
files may reside. If the value VPP_API_DIR exists in the environment
then it is first on the search list. If we're inside a recognized
@@ -161,7 +169,11 @@ class VPPApiJSONFiles:
# perhaps we're in the 'src/scripts' or 'src/vpp-api/python' dir;
# in which case, plot a course to likely places in the src tree
import __main__ as main
- if hasattr(main, '__file__'):
+
+ if os.getenv("VPP_API_DIR"):
+ dirs.append(os.getenv("VPP_API_DIR"))
+
+ if hasattr(main, "__file__"):
# get the path of the calling script
localdir = os.path.dirname(os.path.realpath(main.__file__))
else:
@@ -171,7 +183,7 @@ class VPPApiJSONFiles:
def dmatch(dir):
"""Match dir against right-hand components of the script dir"""
- d = dir.split('/') # param 'dir' assumes a / separator
+ d = dir.split("/") # param 'dir' assumes a / separator
length = len(d)
return len(localdir_s) > length and localdir_s[-length:] == d
@@ -180,43 +192,45 @@ class VPPApiJSONFiles:
'variant' (typically '' or '_debug')"""
# Since 'core' and 'plugin' files are staged
# in separate directories, we target the parent dir.
- return os.path.sep.join((
- srcdir,
- 'build-root',
- 'install-vpp%s-native' % variant,
- 'vpp',
- 'share',
- 'vpp',
- 'api',
- ))
+ return os.path.sep.join(
+ (
+ srcdir,
+ "build-root",
+ "install-vpp%s-native" % variant,
+ "vpp",
+ "share",
+ "vpp",
+ "api",
+ )
+ )
srcdir = None
- if dmatch('src/scripts'):
+ if dmatch("src/scripts"):
srcdir = os.path.sep.join(localdir_s[:-2])
- elif dmatch('src/vpp-api/python'):
+ elif dmatch("src/vpp-api/python"):
srcdir = os.path.sep.join(localdir_s[:-3])
- elif dmatch('test'):
+ elif dmatch("test"):
# we're apparently running tests
srcdir = os.path.sep.join(localdir_s[:-1])
if srcdir:
# we're in the source tree, try both the debug and release
# variants.
- dirs.append(sdir(srcdir, '_debug'))
- dirs.append(sdir(srcdir, ''))
+ dirs.append(sdir(srcdir, "_debug"))
+ dirs.append(sdir(srcdir, ""))
# Test for staged copies of the scripts
# For these, since we explicitly know if we're running a debug versus
# release variant, target only the relevant directory
- if dmatch('build-root/install-vpp_debug-native/vpp/bin'):
+ if dmatch("build-root/install-vpp_debug-native/vpp/bin"):
srcdir = os.path.sep.join(localdir_s[:-4])
- dirs.append(sdir(srcdir, '_debug'))
- if dmatch('build-root/install-vpp-native/vpp/bin'):
+ dirs.append(sdir(srcdir, "_debug"))
+ if dmatch("build-root/install-vpp-native/vpp/bin"):
srcdir = os.path.sep.join(localdir_s[:-4])
- dirs.append(sdir(srcdir, ''))
+ dirs.append(sdir(srcdir, ""))
# finally, try the location system packages typically install into
- dirs.append(os.path.sep.join(('', 'usr', 'share', 'vpp', 'api')))
+ dirs.append(os.path.sep.join(("", "usr", "share", "vpp", "api")))
# check the directories for existence; first one wins
for dir in dirs:
@@ -226,7 +240,7 @@ class VPPApiJSONFiles:
return None
@classmethod
- def find_api_files(cls, api_dir=None, patterns='*'): # -> list
+ def find_api_files(cls, api_dir=None, patterns="*"): # -> list
"""Find API definition files from the given directory tree with the
given pattern. If no directory is given then find_api_dir() is used
to locate one. If no pattern is given then all definition files found
@@ -252,9 +266,9 @@ class VPPApiJSONFiles:
raise VPPApiError("api_dir cannot be located")
if isinstance(patterns, list) or isinstance(patterns, tuple):
- patterns = [p.strip() + '.api.json' for p in patterns]
+ patterns = [p.strip() + ".api.json" for p in patterns]
else:
- patterns = [p.strip() + '.api.json' for p in patterns.split(",")]
+ patterns = [p.strip() + ".api.json" for p in patterns.split(",")]
api_files = []
for root, dirnames, files in os.walk(api_dir):
@@ -275,45 +289,57 @@ class VPPApiJSONFiles:
api = json.loads(json_str)
return self._process_json(api)
+ @classmethod
+ def process_json_array_str(self, json_str):
+ services = {}
+ messages = {}
+
+ apis = json.loads(json_str)
+ for a in apis:
+ m, s = self._process_json(a)
+ messages.update(m)
+ services.update(s)
+ return messages, services
+
@staticmethod
def _process_json(api): # -> Tuple[Dict, Dict]
types = {}
services = {}
messages = {}
try:
- for t in api['enums']:
- t[0] = 'vl_api_' + t[0] + '_t'
- types[t[0]] = {'type': 'enum', 'data': t}
+ for t in api["enums"]:
+ t[0] = "vl_api_" + t[0] + "_t"
+ types[t[0]] = {"type": "enum", "data": t}
except KeyError:
pass
try:
- for t in api['enumflags']:
- t[0] = 'vl_api_' + t[0] + '_t'
- types[t[0]] = {'type': 'enum', 'data': t}
+ for t in api["enumflags"]:
+ t[0] = "vl_api_" + t[0] + "_t"
+ types[t[0]] = {"type": "enum", "data": t}
except KeyError:
pass
try:
- for t in api['unions']:
- t[0] = 'vl_api_' + t[0] + '_t'
- types[t[0]] = {'type': 'union', 'data': t}
+ for t in api["unions"]:
+ t[0] = "vl_api_" + t[0] + "_t"
+ types[t[0]] = {"type": "union", "data": t}
except KeyError:
pass
try:
- for t in api['types']:
- t[0] = 'vl_api_' + t[0] + '_t'
- types[t[0]] = {'type': 'type', 'data': t}
+ for t in api["types"]:
+ t[0] = "vl_api_" + t[0] + "_t"
+ types[t[0]] = {"type": "type", "data": t}
except KeyError:
pass
try:
- for t, v in api['aliases'].items():
- types['vl_api_' + t + '_t'] = {'type': 'alias', 'data': v}
+ for t, v in api["aliases"].items():
+ types["vl_api_" + t + "_t"] = {"type": "alias", "data": v}
except KeyError:
pass
try:
- services.update(api['services'])
+ services.update(api["services"])
except KeyError:
pass
@@ -321,30 +347,30 @@ class VPPApiJSONFiles:
while True:
unresolved = {}
for k, v in types.items():
- t = v['data']
+ t = v["data"]
if not vpp_get_type(k):
- if v['type'] == 'enum':
+ if v["type"] == "enum":
try:
VPPEnumType(t[0], t[1:])
except ValueError:
unresolved[k] = v
if not vpp_get_type(k):
- if v['type'] == 'enumflag':
+ if v["type"] == "enumflag":
try:
VPPEnumFlagType(t[0], t[1:])
except ValueError:
unresolved[k] = v
- elif v['type'] == 'union':
+ elif v["type"] == "union":
try:
VPPUnionType(t[0], t[1:])
except ValueError:
unresolved[k] = v
- elif v['type'] == 'type':
+ elif v["type"] == "type":
try:
VPPType(t[0], t[1:])
except ValueError:
unresolved[k] = v
- elif v['type'] == 'alias':
+ elif v["type"] == "alias":
try:
VPPTypeAlias(k, t)
except ValueError:
@@ -352,21 +378,43 @@ class VPPApiJSONFiles:
if len(unresolved) == 0:
break
if i > 3:
- raise VPPValueError('Unresolved type definitions {}'
- .format(unresolved))
+ raise VPPValueError("Unresolved type definitions {}".format(unresolved))
types = unresolved
i += 1
try:
- for m in api['messages']:
+ for m in api["messages"]:
try:
messages[m[0]] = VPPMessage(m[0], m[1:])
except VPPNotImplementedError:
- ### OLE FIXME
- logger.error('Not implemented error for {}'.format(m[0]))
+ logger.error("Not implemented error for {}".format(m[0]))
except KeyError:
pass
return messages, services
+ @staticmethod
+ def load_api(apifiles=None, apidir=None):
+ messages = {}
+ services = {}
+ if not apifiles:
+ # Pick up API definitions from default directory
+ try:
+ if isinstance(apidir, list):
+ apifiles = []
+ for d in apidir:
+ apifiles += VPPApiJSONFiles.find_api_files(d)
+ else:
+ apifiles = VPPApiJSONFiles.find_api_files(apidir)
+ except (RuntimeError, VPPApiError):
+ raise VPPRuntimeError
+
+ for file in apifiles:
+ with open(file) as apidef_file:
+ m, s = VPPApiJSONFiles.process_json_file(apidef_file)
+ messages.update(m)
+ services.update(s)
+
+ return apifiles, messages, services
+
class VPPApiClient:
"""VPP interface.
@@ -380,18 +428,27 @@ class VPPApiClient:
provides a means to register a callback function to receive
these messages in a background thread.
"""
- apidir = None
+
VPPApiError = VPPApiError
VPPRuntimeError = VPPRuntimeError
VPPValueError = VPPValueError
VPPNotImplementedError = VPPNotImplementedError
VPPIOError = VPPIOError
-
- def __init__(self, *, apifiles=None, testmode=False, async_thread=True,
- logger=None, loglevel=None,
- read_timeout=5, use_socket=True,
- server_address='/run/vpp/api.sock'):
+ def __init__(
+ self,
+ *,
+ apifiles=None,
+ apidir=None,
+ testmode=False,
+ async_thread=True,
+ logger=None,
+ loglevel=None,
+ read_timeout=5,
+ use_socket=True,
+ server_address="/run/vpp/api.sock",
+ bootstrapapi=False,
+ ):
"""Create a VPP API object.
apifiles is a list of files containing API
@@ -406,7 +463,8 @@ class VPPApiClient:
"""
if logger is None:
logger = logging.getLogger(
- "{}.{}".format(__name__, self.__class__.__name__))
+ "{}.{}".format(__name__, self.__class__.__name__)
+ )
if loglevel is not None:
logger.setLevel(loglevel)
self.logger = logger
@@ -415,9 +473,9 @@ class VPPApiClient:
self.services = {}
self.id_names = []
self.id_msgdef = []
- self.header = VPPType('header', [['u16', 'msgid'],
- ['u32', 'client_index']])
+ self.header = VPPType("header", [["u16", "msgid"], ["u32", "client_index"]])
self.apifiles = []
+ self.apidir = apidir
self.event_callback = None
self.message_queue = queue.Queue()
self.read_timeout = read_timeout
@@ -427,35 +485,41 @@ class VPPApiClient:
self.server_address = server_address
self._apifiles = apifiles
self.stats = {}
+ self.bootstrapapi = bootstrapapi
- if not apifiles:
- # Pick up API definitions from default directory
+ if not bootstrapapi:
+ if self.apidir is None and hasattr(self.__class__, "apidir"):
+ # Keep supporting the old style of providing apidir.
+ self.apidir = self.__class__.apidir
try:
- apifiles = VPPApiJSONFiles.find_api_files(self.apidir)
- except (RuntimeError, VPPApiError):
- # In test mode we don't care that we can't find the API files
+ self.apifiles, self.messages, self.services = VPPApiJSONFiles.load_api(
+ apifiles, self.apidir
+ )
+ except VPPRuntimeError as e:
if testmode:
- apifiles = []
+ self.apifiles = []
else:
- raise VPPRuntimeError
-
- for file in apifiles:
- with open(file) as apidef_file:
- m, s = VPPApiJSONFiles.process_json_file(apidef_file)
- self.messages.update(m)
- self.services.update(s)
-
- self.apifiles = apifiles
+ raise e
+ else:
+ # Bootstrap the API (memclnt.api bundled with VPP PAPI)
+ resource_path = "/".join(("data", "memclnt.api.json"))
+ file_content = pkg_resources.resource_string(__name__, resource_path)
+ self.messages, self.services = VPPApiJSONFiles.process_json_str(
+ file_content
+ )
# Basic sanity check
if len(self.messages) == 0 and not testmode:
- raise VPPValueError(1, 'Missing JSON message definitions')
- if not(verify_enum_hint(VppEnum.vl_api_address_family_t)):
- raise VPPRuntimeError("Invalid address family hints. "
- "Cannot continue.")
-
- self.transport = VppTransport(self, read_timeout=read_timeout,
- server_address=server_address)
+ raise VPPValueError(1, "Missing JSON message definitions")
+ if not bootstrapapi:
+ if not (verify_enum_hint(VppEnum.vl_api_address_family_t)):
+ raise VPPRuntimeError(
+ "Invalid address family hints. " "Cannot continue."
+ )
+
+ self.transport = VppTransport(
+ self, read_timeout=read_timeout, server_address=server_address
+ )
# Make sure we allow VPP to clean up the message rings.
atexit.register(vpp_atexit, weakref.ref(self))
@@ -466,6 +530,7 @@ class VPPApiClient:
class ContextId:
"""Multiprocessing-safe provider of unique context IDs."""
+
def __init__(self):
self.context = mp.Value(ctypes.c_uint, 0)
self.lock = mp.Lock()
@@ -475,6 +540,7 @@ class VPPApiClient:
with self.lock:
self.context.value += 1
return self.context.value
+
get_context = ContextId()
def get_type(self, name):
@@ -487,27 +553,37 @@ class VPPApiClient:
return self._api
def make_function(self, msg, i, multipart, do_async):
- if (do_async):
+ if do_async:
+
def f(**kwargs):
return self._call_vpp_async(i, msg, **kwargs)
+
else:
+
def f(**kwargs):
return self._call_vpp(i, msg, multipart, **kwargs)
f.__name__ = str(msg.name)
- f.__doc__ = ", ".join(["%s %s" %
- (msg.fieldtypes[j], k)
- for j, k in enumerate(msg.fields)])
+ f.__doc__ = ", ".join(
+ ["%s %s" % (msg.fieldtypes[j], k) for j, k in enumerate(msg.fields)]
+ )
f.msg = msg
return f
+ def make_pack_function(self, msg, i, multipart):
+ def f(**kwargs):
+ return self._call_vpp_pack(i, msg, **kwargs)
+
+ f.msg = msg
+ return f
+
def _register_functions(self, do_async=False):
self.id_names = [None] * (self.vpp_dictionary_maxid + 1)
self.id_msgdef = [None] * (self.vpp_dictionary_maxid + 1)
self._api = VppApiDynamicMethodHolder()
for name, msg in self.messages.items():
- n = name + '_' + msg.crc[2:]
+ n = name + "_" + msg.crc[2:]
i = self.transport.get_msg_index(n)
if i > 0:
self.id_msgdef[i] = msg
@@ -516,30 +592,49 @@ class VPPApiClient:
# Create function for client side messages.
if name in self.services:
f = self.make_function(msg, i, self.services[name], do_async)
+ f_pack = self.make_pack_function(msg, i, self.services[name])
setattr(self._api, name, FuncWrapper(f))
+ setattr(self._api, name + "_pack", FuncWrapper(f_pack))
else:
- self.logger.debug(
- 'No such message type or failed CRC checksum: %s', n)
+ self.logger.debug("No such message type or failed CRC checksum: %s", n)
+
+ def get_api_definitions(self):
+ """get_api_definition. Bootstrap from the embedded memclnt.api.json file."""
+
+ # Bootstrap so we can call the get_api_json function
+ self._register_functions(do_async=False)
+
+ r = self.api.get_api_json()
+ if r.retval != 0:
+ raise VPPApiError("Failed to load API definitions from VPP")
- def connect_internal(self, name, msg_handler, chroot_prefix, rx_qlen,
- do_async):
- pfx = chroot_prefix.encode('utf-8') if chroot_prefix else None
+ # Process JSON
+ m, s = VPPApiJSONFiles.process_json_array_str(r.json)
+ self.messages.update(m)
+ self.services.update(s)
- rv = self.transport.connect(name, pfx,
- msg_handler, rx_qlen)
+ def connect_internal(self, name, msg_handler, chroot_prefix, rx_qlen, do_async):
+ pfx = chroot_prefix.encode("utf-8") if chroot_prefix else None
+
+ rv = self.transport.connect(name, pfx, msg_handler, rx_qlen, do_async)
if rv != 0:
- raise VPPIOError(2, 'Connect failed')
+ raise VPPIOError(2, "Connect failed")
self.vpp_dictionary_maxid = self.transport.msg_table_max_index()
+
+ # Register functions
+ if self.bootstrapapi:
+ self.get_api_definitions()
self._register_functions(do_async=do_async)
# Initialise control ping
- crc = self.messages['control_ping'].crc
+ crc = self.messages["control_ping"].crc
self.control_ping_index = self.transport.get_msg_index(
- ('control_ping' + '_' + crc[2:]))
- self.control_ping_msgdef = self.messages['control_ping']
+ ("control_ping" + "_" + crc[2:])
+ )
+ self.control_ping_msgdef = self.messages["control_ping"]
+
if self.async_thread:
- self.event_thread = threading.Thread(
- target=self.thread_msg_handler)
+ self.event_thread = threading.Thread(target=self.thread_msg_handler)
self.event_thread.daemon = True
self.event_thread.start()
else:
@@ -556,8 +651,9 @@ class VPPApiClient:
client and server.
"""
msg_handler = self.transport.get_callback(do_async)
- return self.connect_internal(name, msg_handler, chroot_prefix, rx_qlen,
- do_async)
+ return self.connect_internal(
+ name, msg_handler, chroot_prefix, rx_qlen, do_async
+ )
def connect_sync(self, name, chroot_prefix=None, rx_qlen=32):
"""Attach to VPP in synchronous mode. Application must poll for events.
@@ -568,8 +664,7 @@ class VPPApiClient:
client and server.
"""
- return self.connect_internal(name, None, chroot_prefix, rx_qlen,
- do_async=False)
+ return self.connect_internal(name, None, chroot_prefix, rx_qlen, do_async=False)
def disconnect(self):
"""Detach from VPP."""
@@ -590,42 +685,44 @@ class VPPApiClient:
# If we have a context, then use the context to find any
# request waiting for a reply
context = 0
- if hasattr(r, 'context') and r.context > 0:
+ if hasattr(r, "context") and r.context > 0:
context = r.context
if context == 0:
# No context -> async notification that we feed to the callback
self.message_queue.put_nowait(r)
else:
- raise VPPIOError(2, 'RPC reply message received in event handler')
+ raise VPPIOError(2, "RPC reply message received in event handler")
def has_context(self, msg):
if len(msg) < 10:
return False
- header = VPPType('header_with_context', [['u16', 'msgid'],
- ['u32', 'client_index'],
- ['u32', 'context']])
+ header = VPPType(
+ "header_with_context",
+ [["u16", "msgid"], ["u32", "client_index"], ["u32", "context"]],
+ )
(i, ci, context), size = header.unpack(msg, 0)
- if self.id_names[i] == 'rx_thread_exit':
+
+ if self.id_names[i] == "rx_thread_exit":
return
#
# Decode message and returns a tuple.
#
msgobj = self.id_msgdef[i]
- if 'context' in msgobj.field_by_name and context >= 0:
+ if "context" in msgobj.field_by_name and context >= 0:
return True
return False
def decode_incoming_msg(self, msg, no_type_conversion=False):
if not msg:
- logger.warning('vpp_api.read failed')
+ logger.warning("vpp_api.read failed")
return
(i, ci), size = self.header.unpack(msg, 0)
- if self.id_names[i] == 'rx_thread_exit':
+ if self.id_names[i] == "rx_thread_exit":
return
#
@@ -633,7 +730,7 @@ class VPPApiClient:
#
msgobj = self.id_msgdef[i]
if not msgobj:
- raise VPPIOError(2, 'Reply message undefined')
+ raise VPPIOError(2, "Reply message undefined")
r, size = msgobj.unpack(msg, ntc=no_type_conversion)
return r
@@ -654,41 +751,39 @@ class VPPApiClient:
def _control_ping(self, context):
"""Send a ping command."""
- self._call_vpp_async(self.control_ping_index,
- self.control_ping_msgdef,
- context=context)
+ self._call_vpp_async(
+ self.control_ping_index, self.control_ping_msgdef, context=context
+ )
def validate_args(self, msg, kwargs):
d = set(kwargs.keys()) - set(msg.field_by_name.keys())
if d:
- raise VPPValueError('Invalid argument {} to {}'
- .format(list(d), msg.name))
+ raise VPPValueError("Invalid argument {} to {}".format(list(d), msg.name))
def _add_stat(self, name, ms):
if not name in self.stats:
- self.stats[name] = {'max': ms, 'count': 1, 'avg': ms}
+ self.stats[name] = {"max": ms, "count": 1, "avg": ms}
else:
- if ms > self.stats[name]['max']:
- self.stats[name]['max'] = ms
- self.stats[name]['count'] += 1
- n = self.stats[name]['count']
- self.stats[name]['avg'] = self.stats[name]['avg'] * (n - 1) / n + ms / n
+ if ms > self.stats[name]["max"]:
+ self.stats[name]["max"] = ms
+ self.stats[name]["count"] += 1
+ n = self.stats[name]["count"]
+ self.stats[name]["avg"] = self.stats[name]["avg"] * (n - 1) / n + ms / n
def get_stats(self):
- s = '\n=== API PAPI STATISTICS ===\n'
- s += '{:<30} {:>4} {:>6} {:>6}\n'.format('message', 'cnt', 'avg', 'max')
- for n in sorted(self.stats.items(), key=lambda v: v[1]['avg'], reverse=True):
- s += '{:<30} {:>4} {:>6.2f} {:>6.2f}\n'.format(n[0], n[1]['count'],
- n[1]['avg'], n[1]['max'])
+ s = "\n=== API PAPI STATISTICS ===\n"
+ s += "{:<30} {:>4} {:>6} {:>6}\n".format("message", "cnt", "avg", "max")
+ for n in sorted(self.stats.items(), key=lambda v: v[1]["avg"], reverse=True):
+ s += "{:<30} {:>4} {:>6.2f} {:>6.2f}\n".format(
+ n[0], n[1]["count"], n[1]["avg"], n[1]["max"]
+ )
return s
def get_field_options(self, msg, fld_name):
# when there is an option, the msgdef has 3 elements.
# ['u32', 'ring_size', {'default': 1024}]
for _def in self.messages[msg].msgdef:
- if isinstance(_def, list) and \
- len(_def) == 3 and \
- _def[1] == fld_name:
+ if isinstance(_def, list) and len(_def) == 3 and _def[1] == fld_name:
return _def[2]
def _call_vpp(self, i, msgdef, service, **kwargs):
@@ -707,25 +802,26 @@ class VPPApiClient:
no response within the timeout window.
"""
ts = time.time()
- if 'context' not in kwargs:
+ if "context" not in kwargs:
context = self.get_context()
- kwargs['context'] = context
+ kwargs["context"] = context
else:
- context = kwargs['context']
- kwargs['_vl_msg_id'] = i
+ context = kwargs["context"]
+ kwargs["_vl_msg_id"] = i
- no_type_conversion = kwargs.pop('_no_type_conversion', False)
- timeout = kwargs.pop('_timeout', None)
+ no_type_conversion = kwargs.pop("_no_type_conversion", False)
+ timeout = kwargs.pop("_timeout", None)
try:
if self.transport.socket_index:
- kwargs['client_index'] = self.transport.socket_index
+ kwargs["client_index"] = self.transport.socket_index
except AttributeError:
pass
self.validate_args(msgdef, kwargs)
- s = 'Calling {}({})'.format(msgdef.name,
- ','.join(['{!r}:{!r}'.format(k, v) for k, v in kwargs.items()]))
+ s = "Calling {}({})".format(
+ msgdef.name, ",".join(["{!r}:{!r}".format(k, v) for k, v in kwargs.items()])
+ )
self.logger.debug(s)
b = msgdef.pack(kwargs)
@@ -733,17 +829,17 @@ class VPPApiClient:
self.transport.write(b)
- msgreply = service['reply']
- stream = True if 'stream' in service else False
+ msgreply = service["reply"]
+ stream = True if "stream" in service else False
if stream:
- if 'stream_msg' in service:
+ if "stream_msg" in service:
# New service['reply'] = _reply and service['stream_message'] = _details
- stream_message = service['stream_msg']
- modern =True
+ stream_message = service["stream_msg"]
+ modern = True
else:
# Old service['reply'] = _details
stream_message = msgreply
- msgreply = 'control_ping_reply'
+ msgreply = "control_ping_reply"
modern = False
# Send a ping after the request - we use its response
# to detect that we have seen all results.
@@ -751,22 +847,22 @@ class VPPApiClient:
# Block until we get a reply.
rl = []
- while (True):
+ while True:
r = self.read_blocking(no_type_conversion, timeout)
if r is None:
- raise VPPIOError(2, 'VPP API client: read failed')
+ raise VPPIOError(2, "VPP API client: read failed")
msgname = type(r).__name__
if context not in r or r.context == 0 or context != r.context:
# Message being queued
self.message_queue.put_nowait(r)
continue
if msgname != msgreply and (stream and (msgname != stream_message)):
- print('REPLY MISMATCH', msgreply, msgname, stream_message, stream)
+ print("REPLY MISMATCH", msgreply, msgname, stream_message, stream)
if not stream:
rl = r
break
if msgname == msgreply:
- if modern: # Return both reply and list
+ if modern: # Return both reply and list
rl = r, rl
break
@@ -774,7 +870,7 @@ class VPPApiClient:
self.transport.resume()
- s = 'Return value: {!r}'.format(r)
+ s = "Return value: {!r}".format(r)
if len(s) > 80:
s = s[:80] + "..."
self.logger.debug(s)
@@ -795,22 +891,29 @@ class VPPApiClient:
The returned context will help with assigning which call
the reply belongs to.
"""
- if 'context' not in kwargs:
+ if "context" not in kwargs:
context = self.get_context()
- kwargs['context'] = context
+ kwargs["context"] = context
else:
- context = kwargs['context']
+ context = kwargs["context"]
try:
if self.transport.socket_index:
- kwargs['client_index'] = self.transport.socket_index
+ kwargs["client_index"] = self.transport.socket_index
except AttributeError:
- kwargs['client_index'] = 0
- kwargs['_vl_msg_id'] = i
+ kwargs["client_index"] = 0
+ kwargs["_vl_msg_id"] = i
b = msg.pack(kwargs)
self.transport.write(b)
return context
+ def _call_vpp_pack(self, i, msg, **kwargs):
+ """Given a message, return the binary representation."""
+ kwargs["_vl_msg_id"] = i
+ kwargs["client_index"] = 0
+ kwargs["context"] = 0
+ return msg.pack(kwargs)
+
def read_blocking(self, no_type_conversion=False, timeout=None):
"""Get next received message from transport within timeout, decoded.
@@ -891,26 +994,34 @@ class VPPApiClient:
"""Return VPPs API message table as name_crc dictionary,
filtered by message name list."""
- replies = [self.services[n]['reply'] for n in msglist]
+ replies = [self.services[n]["reply"] for n in msglist]
message_table_filtered = {}
for name in msglist + replies:
- for k,v in self.transport.message_table.items():
+ for k, v in self.transport.message_table.items():
if k.startswith(name):
message_table_filtered[k] = v
break
return message_table_filtered
def __repr__(self):
- return "<VPPApiClient apifiles=%s, testmode=%s, async_thread=%s, " \
- "logger=%s, read_timeout=%s, " \
- "server_address='%s'>" % (
- self._apifiles, self.testmode, self.async_thread,
- self.logger, self.read_timeout, self.server_address)
+ return (
+ "<VPPApiClient apifiles=%s, testmode=%s, async_thread=%s, "
+ "logger=%s, read_timeout=%s, "
+ "server_address='%s'>"
+ % (
+ self._apifiles,
+ self.testmode,
+ self.async_thread,
+ self.logger,
+ self.read_timeout,
+ self.server_address,
+ )
+ )
def details_iter(self, f, **kwargs):
cursor = 0
while True:
- kwargs['cursor'] = cursor
+ kwargs["cursor"] = cursor
rv, details = f(**kwargs)
for d in details:
yield d
diff --git a/src/vpp-api/python/vpp_papi/vpp_serializer.py b/src/vpp-api/python/vpp_papi/vpp_serializer.py
index 644aeac65c6..d724cb33ce9 100644
--- a/src/vpp-api/python/vpp_papi/vpp_serializer.py
+++ b/src/vpp-api/python/vpp_papi/vpp_serializer.py
@@ -27,7 +27,7 @@ from . import vpp_format
# logger = logging.getLogger('vpp_serializer')
# logger.setLevel(logging.DEBUG)
#
-logger = logging.getLogger('vpp_papi.serializer')
+logger = logging.getLogger("vpp_papi.serializer")
def check(d):
@@ -46,8 +46,7 @@ def conversion_required(data, field_type):
def conversion_packer(data, field_type):
t = type(data).__name__
- return types[field_type].pack(vpp_format.
- conversion_table[field_type][t](data))
+ return types[field_type].pack(vpp_format.conversion_table[field_type][t](data))
def conversion_unpacker(data, field_type):
@@ -77,30 +76,33 @@ class Packer:
return c._get_packer_with_options(f_type, options)
except IndexError:
raise VPPSerializerValueError(
- "Options not supported for {}{} ({})".
- format(f_type, types[f_type].__class__,
- options))
+ "Options not supported for {}{} ({})".format(
+ f_type, types[f_type].__class__, options
+ )
+ )
class BaseTypes(Packer):
def __init__(self, type, elements=0, options=None):
self._type = type
self._elements = elements
- base_types = {'u8': '>B',
- 'i8': '>b',
- 'string': '>s',
- 'u16': '>H',
- 'i16': '>h',
- 'u32': '>I',
- 'i32': '>i',
- 'u64': '>Q',
- 'i64': '>q',
- 'f64': '=d',
- 'bool': '>?',
- 'header': '>HI'}
-
- if elements > 0 and (type == 'u8' or type == 'string'):
- self.packer = struct.Struct('>%ss' % elements)
+ base_types = {
+ "u8": ">B",
+ "i8": ">b",
+ "string": ">s",
+ "u16": ">H",
+ "i16": ">h",
+ "u32": ">I",
+ "i32": ">i",
+ "u64": ">Q",
+ "i64": ">q",
+ "f64": "=d",
+ "bool": ">?",
+ "header": ">HI",
+ }
+
+ if elements > 0 and (type == "u8" or type == "string"):
+ self.packer = struct.Struct(">%ss" % elements)
else:
self.packer = struct.Struct(base_types[type])
self.size = self.packer.size
@@ -108,8 +110,8 @@ class BaseTypes(Packer):
def pack(self, data, kwargs=None):
if data is None: # Default to zero if not specified
- if self.options and 'default' in self.options:
- data = self.options['default']
+ if self.options and "default" in self.options:
+ data = self.options["default"]
else:
data = 0
return self.packer.pack(data)
@@ -122,23 +124,27 @@ class BaseTypes(Packer):
return BaseTypes(f_type, options=options)
def __repr__(self):
- return "BaseTypes(type=%s, elements=%s, options=%s)" % (self._type,
- self._elements,
- self.options)
+ return "BaseTypes(type=%s, elements=%s, options=%s)" % (
+ self._type,
+ self._elements,
+ self.options,
+ )
class String(Packer):
def __init__(self, name, num, options):
self.name = name
self.num = num
- self.size = 1
- self.length_field_packer = BaseTypes('u32')
- self.limit = options['limit'] if 'limit' in options else num
+ self.size = num if num else 1
+ self.length_field_packer = BaseTypes("u32")
+ self.limit = options["limit"] if "limit" in options else num
self.fixed = True if num else False
if self.fixed and not self.limit:
raise VPPSerializerValueError(
- "Invalid combination for: {}, {} fixed:{} limit:{}".
- format(name, options, self.fixed, self.limit))
+ "Invalid combination for: {}, {} fixed:{} limit:{}".format(
+ name, options, self.fixed, self.limit
+ )
+ )
def pack(self, list, kwargs=None):
if not list:
@@ -147,34 +153,42 @@ class String(Packer):
return self.length_field_packer.pack(0) + b""
if self.limit and len(list) > self.limit - 1:
raise VPPSerializerValueError(
- "Invalid argument length for: {}, {} maximum {}".
- format(list, len(list), self.limit - 1))
+ "Invalid argument length for: {}, {} maximum {}".format(
+ list, len(list), self.limit - 1
+ )
+ )
if self.fixed:
- return list.encode('ascii').ljust(self.limit, b'\x00')
- return self.length_field_packer.pack(len(list)) + list.encode('ascii')
+ return list.encode("ascii").ljust(self.limit, b"\x00")
+ return self.length_field_packer.pack(len(list)) + list.encode("ascii")
def unpack(self, data, offset=0, result=None, ntc=False):
if self.fixed:
- p = BaseTypes('u8', self.num)
+ p = BaseTypes("u8", self.num)
s = p.unpack(data, offset)
- s2 = s[0].split(b'\0', 1)[0]
- return (s2.decode('ascii'), self.num)
+ s2 = s[0].split(b"\0", 1)[0]
+ return (s2.decode("ascii"), self.num)
- length, length_field_size = self.length_field_packer.unpack(data,
- offset)
+ length, length_field_size = self.length_field_packer.unpack(data, offset)
if length == 0:
- return '', 0
- p = BaseTypes('u8', length)
+ return "", 0
+ p = BaseTypes("u8", length)
x, size = p.unpack(data, offset + length_field_size)
- return (x.decode('ascii', errors='replace'), size + length_field_size)
-
-
-types = {'u8': BaseTypes('u8'), 'i8': BaseTypes('i8'),
- 'u16': BaseTypes('u16'), 'i16': BaseTypes('i16'),
- 'u32': BaseTypes('u32'), 'i32': BaseTypes('i32'),
- 'u64': BaseTypes('u64'), 'i64': BaseTypes('i64'),
- 'f64': BaseTypes('f64'),
- 'bool': BaseTypes('bool'), 'string': String}
+ return (x.decode("ascii", errors="replace"), size + length_field_size)
+
+
+types = {
+ "u8": BaseTypes("u8"),
+ "i8": BaseTypes("i8"),
+ "u16": BaseTypes("u16"),
+ "i16": BaseTypes("i16"),
+ "u32": BaseTypes("u32"),
+ "i32": BaseTypes("i32"),
+ "u64": BaseTypes("u64"),
+ "i64": BaseTypes("i64"),
+ "f64": BaseTypes("f64"),
+ "bool": BaseTypes("bool"),
+ "string": String,
+}
class_types = {}
@@ -202,32 +216,34 @@ class FixedList_u8(Packer):
"""Packs a fixed length bytestring. Left-pads with zeros
if input data is too short."""
if not data:
- return b'\x00' * self.size
+ return b"\x00" * self.size
if len(data) > self.num:
raise VPPSerializerValueError(
'Fixed list length error for "{}", got: {}'
- ' expected: {}'
- .format(self.name, len(data), self.num))
+ " expected: {}".format(self.name, len(data), self.num)
+ )
try:
return self.packer.pack(data)
except struct.error:
raise VPPSerializerValueError(
- 'Packing failed for "{}" {}'
- .format(self.name, kwargs))
+ 'Packing failed for "{}" {}'.format(self.name, kwargs)
+ )
def unpack(self, data, offset=0, result=None, ntc=False):
if len(data[offset:]) < self.num:
raise VPPSerializerValueError(
'Invalid array length for "{}" got {}'
- ' expected {}'
- .format(self.name, len(data[offset:]), self.num))
+ " expected {}".format(self.name, len(data[offset:]), self.num)
+ )
return self.packer.unpack(data, offset)
def __repr__(self):
return "FixedList_u8(name=%s, field_type=%s, num=%s)" % (
- self.name, self.field_type, self.num
+ self.name,
+ self.field_type,
+ self.num,
)
@@ -242,12 +258,14 @@ class FixedList(Packer):
def pack(self, list, kwargs):
if len(list) != self.num:
raise VPPSerializerValueError(
- 'Fixed list length error, got: {} expected: {}'
- .format(len(list), self.num))
- b = bytes()
+ "Fixed list length error, got: {} expected: {}".format(
+ len(list), self.num
+ )
+ )
+ b = bytearray()
for e in list:
b += self.packer.pack(e)
- return b
+ return bytes(b)
def unpack(self, data, offset=0, result=None, ntc=False):
# Return a list of arguments
@@ -262,7 +280,10 @@ class FixedList(Packer):
def __repr__(self):
return "FixedList(name=%s, field_type=%s, num=%s)" % (
- self.name, self.field_type, self.num)
+ self.name,
+ self.field_type,
+ self.num,
+ )
class VLAList(Packer):
@@ -279,29 +300,30 @@ class VLAList(Packer):
return b""
if len(lst) != kwargs[self.length_field]:
raise VPPSerializerValueError(
- 'Variable length error, got: {} expected: {}'
- .format(len(lst), kwargs[self.length_field]))
-
+ "Variable length error, got: {} expected: {}".format(
+ len(lst), kwargs[self.length_field]
+ )
+ )
# u8 array
- if self.packer.size == 1:
+ if self.packer.size == 1 and self.field_type == "u8":
if isinstance(lst, list):
- return b''.join(lst)
+ return b"".join(lst)
return bytes(lst)
- b = bytes()
+ b = bytearray()
for e in lst:
b += self.packer.pack(e)
- return b
+ return bytes(b)
def unpack(self, data, offset=0, result=None, ntc=False):
# Return a list of arguments
total = 0
# u8 array
- if self.packer.size == 1:
+ if self.packer.size == 1 and self.field_type == "u8":
if result[self.index] == 0:
- return b'', 0
- p = BaseTypes('u8', result[self.index])
+ return b"", 0
+ p = BaseTypes("u8", result[self.index])
return p.unpack(data, offset, ntc=ntc)
r = []
@@ -313,10 +335,12 @@ class VLAList(Packer):
return r, total
def __repr__(self):
- return "VLAList(name=%s, field_type=%s, " \
- "len_field_name=%s, index=%s)" % (
- self.name, self.field_type, self.length_field, self.index
- )
+ return "VLAList(name=%s, field_type=%s, " "len_field_name=%s, index=%s)" % (
+ self.name,
+ self.field_type,
+ self.length_field,
+ self.index,
+ )
class VLAList_legacy(Packer):
@@ -330,17 +354,18 @@ class VLAList_legacy(Packer):
if self.packer.size == 1:
return bytes(list)
- b = bytes()
+ b = bytearray()
for e in list:
b += self.packer.pack(e)
- return b
+ return bytes(b)
def unpack(self, data, offset=0, result=None, ntc=False):
total = 0
# Return a list of arguments
if (len(data) - offset) % self.packer.size:
raise VPPSerializerValueError(
- 'Legacy Variable Length Array length mismatch.')
+ "Legacy Variable Length Array length mismatch."
+ )
elements = int((len(data) - offset) / self.packer.size)
r = []
for e in range(elements):
@@ -351,9 +376,7 @@ class VLAList_legacy(Packer):
return r, total
def __repr__(self):
- return "VLAList_legacy(name=%s, field_type=%s)" % (
- self.name, self.field_type
- )
+ return "VLAList_legacy(name=%s, field_type=%s)" % (self.name, self.field_type)
# Will change to IntEnum after 21.04 release
@@ -361,16 +384,16 @@ class VPPEnumType(Packer):
output_class = IntFlag
def __init__(self, name, msgdef, options=None):
- self.size = types['u32'].size
+ self.size = types["u32"].size
self.name = name
- self.enumtype = 'u32'
+ self.enumtype = "u32"
self.msgdef = msgdef
e_hash = {}
for f in msgdef:
- if type(f) is dict and 'enumtype' in f:
- if f['enumtype'] != 'u32':
- self.size = types[f['enumtype']].size
- self.enumtype = f['enumtype']
+ if type(f) is dict and "enumtype" in f:
+ if f["enumtype"] != "u32":
+ self.size = types[f["enumtype"]].size
+ self.enumtype = f["enumtype"]
continue
ename, evalue = f
e_hash[ename] = evalue
@@ -387,8 +410,8 @@ class VPPEnumType(Packer):
def pack(self, data, kwargs=None):
if data is None: # Default to zero if not specified
- if self.options and 'default' in self.options:
- data = self.options['default']
+ if self.options and "default" in self.options:
+ data = self.options["default"]
else:
data = 0
@@ -404,7 +427,10 @@ class VPPEnumType(Packer):
def __repr__(self):
return "%s(name=%s, msgdef=%s, options=%s)" % (
- self.__class__.__name__, self.name, self.msgdef, self.options
+ self.__class__.__name__,
+ self.name,
+ self.msgdef,
+ self.options,
)
@@ -424,14 +450,13 @@ class VPPUnionType(Packer):
fields = []
self.packers = collections.OrderedDict()
for i, f in enumerate(msgdef):
- if type(f) is dict and 'crc' in f:
- self.crc = f['crc']
+ if type(f) is dict and "crc" in f:
+ self.crc = f["crc"]
continue
f_type, f_name = f
if f_type not in types:
- logger.debug('Unknown union type {}'.format(f_type))
- raise VPPSerializerValueError(
- 'Unknown message type {}'.format(f_type))
+ logger.debug("Unknown union type {}".format(f_type))
+ raise VPPSerializerValueError("Unknown message type {}".format(f_type))
fields.append(f_name)
size = types[f_type].size
self.packers[f_name] = types[f_type]
@@ -445,14 +470,14 @@ class VPPUnionType(Packer):
# Union of variable length?
def pack(self, data, kwargs=None):
if not data:
- return b'\x00' * self.size
+ return b"\x00" * self.size
for k, v in data.items():
logger.debug("Key: {} Value: {}".format(k, v))
b = self.packers[k].pack(v, kwargs)
break
r = bytearray(self.size)
- r[:len(b)] = b
+ r[: len(b)] = b
return r
def unpack(self, data, offset=0, result=None, ntc=False):
@@ -466,25 +491,24 @@ class VPPUnionType(Packer):
return self.tuple._make(r), maxsize
def __repr__(self):
- return"VPPUnionType(name=%s, msgdef=%r)" % (self.name, self.msgdef)
+ return "VPPUnionType(name=%s, msgdef=%r)" % (self.name, self.msgdef)
class VPPTypeAlias(Packer):
def __init__(self, name, msgdef, options=None):
self.name = name
self.msgdef = msgdef
- t = vpp_get_type(msgdef['type'])
+ t = vpp_get_type(msgdef["type"])
if not t:
- raise ValueError('No such type: {}'.format(msgdef['type']))
- if 'length' in msgdef:
- if msgdef['length'] == 0:
+ raise ValueError("No such type: {}".format(msgdef["type"]))
+ if "length" in msgdef:
+ if msgdef["length"] == 0:
raise ValueError()
- if msgdef['type'] == 'u8':
- self.packer = FixedList_u8(name, msgdef['type'],
- msgdef['length'])
+ if msgdef["type"] == "u8":
+ self.packer = FixedList_u8(name, msgdef["type"], msgdef["length"])
self.size = self.packer.size
else:
- self.packer = FixedList(name, msgdef['type'], msgdef['length'])
+ self.packer = FixedList(name, msgdef["type"], msgdef["length"])
else:
self.packer = t
self.size = t.size
@@ -498,11 +522,11 @@ class VPPTypeAlias(Packer):
try:
return conversion_packer(data, self.name)
# Python 2 and 3 raises different exceptions from inet_pton
- except(OSError, socket.error, TypeError):
+ except (OSError, socket.error, TypeError):
pass
if data is None: # Default to zero if not specified
- if self.options and 'default' in self.options:
- data = self.options['default']
+ if self.options and "default" in self.options:
+ data = self.options["default"]
else:
data = 0
@@ -525,7 +549,10 @@ class VPPTypeAlias(Packer):
def __repr__(self):
return "VPPTypeAlias(name=%s, msgdef=%s, options=%s)" % (
- self.name, self.msgdef, self.options)
+ self.name,
+ self.msgdef,
+ self.options,
+ )
class VPPType(Packer):
@@ -539,17 +566,16 @@ class VPPType(Packer):
self.field_by_name = {}
size = 0
for i, f in enumerate(msgdef):
- if type(f) is dict and 'crc' in f:
- self.crc = f['crc']
+ if type(f) is dict and "crc" in f:
+ self.crc = f["crc"]
continue
f_type, f_name = f[:2]
self.fields.append(f_name)
self.field_by_name[f_name] = None
self.fieldtypes.append(f_type)
if f_type not in types:
- logger.debug('Unknown type {}'.format(f_type))
- raise VPPSerializerValueError(
- 'Unknown message type {}'.format(f_type))
+ logger.debug("Unknown type {}".format(f_type))
+ raise VPPSerializerValueError("Unknown message type {}".format(f_type))
fieldlen = len(f)
options = [x for x in f if type(x) is dict]
@@ -561,16 +587,16 @@ class VPPType(Packer):
if fieldlen == 3: # list
list_elements = f[2]
if list_elements == 0:
- if f_type == 'string':
+ if f_type == "string":
p = String(f_name, 0, self.options)
else:
p = VLAList_legacy(f_name, f_type)
self.packers.append(p)
- elif f_type == 'u8':
+ elif f_type == "u8":
p = FixedList_u8(f_name, f_type, list_elements)
self.packers.append(p)
size += p.size
- elif f_type == 'string':
+ elif f_type == "string":
p = String(f_name, list_elements, self.options)
self.packers.append(p)
size += p.size
@@ -584,14 +610,13 @@ class VPPType(Packer):
self.packers.append(p)
else:
# default support for types that decay to basetype
- if 'default' in self.options:
+ if "default" in self.options:
p = self.get_packer_with_options(f_type, self.options)
else:
p = types[f_type]
self.packers.append(p)
size += p.size
-
self.size = size
self.tuple = collections.namedtuple(name, self.fields, rename=True)
types[name] = self
@@ -600,7 +625,7 @@ class VPPType(Packer):
def pack(self, data, kwargs=None):
if not kwargs:
kwargs = data
- b = bytes()
+ b = bytearray()
# Try one of the format functions
if data and conversion_required(data, self.name):
@@ -609,8 +634,8 @@ class VPPType(Packer):
for i, a in enumerate(self.fields):
if data and type(data) is not dict and a not in data:
raise VPPSerializerValueError(
- "Invalid argument: {} expected {}.{}".
- format(data, self.name, a))
+ "Invalid argument: {} expected {}.{}".format(data, self.name, a)
+ )
# Defaulting to zero.
if not data or a not in data: # Default to 0
@@ -624,7 +649,7 @@ class VPPType(Packer):
else:
b += self.packers[i].pack(arg, kwargs)
- return b
+ return bytes(b)
def unpack(self, data, offset=0, result=None, ntc=False):
# Return a list of arguments
@@ -651,7 +676,9 @@ class VPPType(Packer):
def __repr__(self):
return "%s(name=%s, msgdef=%s)" % (
- self.__class__.__name__, self.name, self.msgdef
+ self.__class__.__name__,
+ self.name,
+ self.msgdef,
)
diff --git a/src/vpp-api/python/vpp_papi/vpp_stats.py b/src/vpp-api/python/vpp_papi/vpp_stats.py
index b9b23b52d66..aa9ff85b3c7 100755
--- a/src/vpp-api/python/vpp_papi/vpp_stats.py
+++ b/src/vpp-api/python/vpp_papi/vpp_stats.py
@@ -14,7 +14,7 @@
# limitations under the License.
#
-'''
+"""
This module implement Python access to the VPP statistics segment. It
accesses the data structures directly in shared memory.
VPP uses optimistic locking, so data structures may change underneath
@@ -39,7 +39,7 @@ stat['/if/rx'][:, 1].sum_packets() - returns the sum of packet counters for
interface 1 on all threads
stat['/if/rx-miss'][:, 1].sum() - returns the sum of packet counters for
interface 1 on all threads for simple counters
-'''
+"""
import os
import socket
@@ -50,31 +50,36 @@ import time
import unittest
import re
+
def recv_fd(sock):
- '''Get file descriptor for memory map'''
- fds = array.array("i") # Array of ints
- _, ancdata, _, _ = sock.recvmsg(0, socket.CMSG_LEN(4))
+ """Get file descriptor for memory map"""
+ fds = array.array("i") # Array of ints
+ _, ancdata, _, _ = sock.recvmsg(0, socket.CMSG_SPACE(4))
for cmsg_level, cmsg_type, cmsg_data in ancdata:
if cmsg_level == socket.SOL_SOCKET and cmsg_type == socket.SCM_RIGHTS:
- fds.frombytes(cmsg_data[:len(cmsg_data) - (len(cmsg_data) % fds.itemsize)])
+ fds.frombytes(cmsg_data[: len(cmsg_data) - (len(cmsg_data) % fds.itemsize)])
return list(fds)[0]
-VEC_LEN_FMT = Struct('I')
+
+VEC_LEN_FMT = Struct("I")
+
+
def get_vec_len(stats, vector_offset):
- '''Equivalent to VPP vec_len()'''
+ """Equivalent to VPP vec_len()"""
return VEC_LEN_FMT.unpack_from(stats.statseg, vector_offset - 8)[0]
+
def get_string(stats, ptr):
- '''Get a string from a VPP vector'''
+ """Get a string from a VPP vector"""
namevector = ptr - stats.base
namevectorlen = get_vec_len(stats, namevector)
if namevector + namevectorlen >= stats.size:
- raise IOError('String overruns stats segment')
- return stats.statseg[namevector:namevector+namevectorlen-1].decode('ascii')
+ raise IOError("String overruns stats segment")
+ return stats.statseg[namevector : namevector + namevectorlen - 1].decode("ascii")
class StatsVector:
- '''A class representing a VPP vector'''
+ """A class representing a VPP vector"""
def __init__(self, stats, ptr, fmt):
self.vec_start = ptr - stats.base
@@ -86,28 +91,35 @@ class StatsVector:
self.stats = stats
if self.vec_start + self.vec_len * self.elementsize >= stats.size:
- raise IOError('Vector overruns stats segment')
+ raise IOError("Vector overruns stats segment")
def __iter__(self):
with self.stats.lock:
- return self.struct.iter_unpack(self.statseg[self.vec_start:self.vec_start +
- self.elementsize*self.vec_len])
+ return self.struct.iter_unpack(
+ self.statseg[
+ self.vec_start : self.vec_start + self.elementsize * self.vec_len
+ ]
+ )
def __getitem__(self, index):
if index > self.vec_len:
- raise IOError('Index beyond end of vector')
+ raise IOError("Index beyond end of vector")
with self.stats.lock:
if self.fmtlen == 1:
- return self.struct.unpack_from(self.statseg, self.vec_start +
- (index * self.elementsize))[0]
- return self.struct.unpack_from(self.statseg, self.vec_start +
- (index * self.elementsize))
+ return self.struct.unpack_from(
+ self.statseg, self.vec_start + (index * self.elementsize)
+ )[0]
+ return self.struct.unpack_from(
+ self.statseg, self.vec_start + (index * self.elementsize)
+ )
+
+
+class VPPStats:
+ """Main class implementing Python access to the VPP statistics segment"""
-class VPPStats():
- '''Main class implementing Python access to the VPP statistics segment'''
# pylint: disable=too-many-instance-attributes
- shared_headerfmt = Struct('QPQQPP')
- default_socketname = '/run/vpp/stats.sock'
+ shared_headerfmt = Struct("QPQQPP")
+ default_socketname = "/run/vpp/stats.sock"
def __init__(self, socketname=default_socketname, timeout=10):
self.socketname = socketname
@@ -117,89 +129,87 @@ class VPPStats():
self.connected = False
self.size = 0
self.last_epoch = 0
- self.error_vectors = 0
self.statseg = 0
def connect(self):
- '''Connect to stats segment'''
+ """Connect to stats segment"""
if self.connected:
return
sock = socket.socket(socket.AF_UNIX, socket.SOCK_SEQPACKET)
+
+ # Our connect races the corresponding recv_fds call in VPP, if we beat
+ # VPP then we will try (unsuccessfully) to receive file descriptors and
+ # will have gone away before VPP can respond to our connect. A short
+ # timeout here stops this error occurring.
+ sock.settimeout(1)
sock.connect(self.socketname)
mfd = recv_fd(sock)
sock.close()
stat_result = os.fstat(mfd)
- self.statseg = mmap.mmap(mfd, stat_result.st_size, mmap.PROT_READ, mmap.MAP_SHARED)
+ self.statseg = mmap.mmap(
+ mfd, stat_result.st_size, mmap.PROT_READ, mmap.MAP_SHARED
+ )
os.close(mfd)
self.size = stat_result.st_size
if self.version != 2:
- raise Exception('Incompatbile stat segment version {}'
- .format(self.version))
+ raise Exception("Incompatbile stat segment version {}".format(self.version))
self.refresh()
self.connected = True
def disconnect(self):
- '''Disconnect from stats segment'''
+ """Disconnect from stats segment"""
if self.connected:
self.statseg.close()
self.connected = False
@property
def version(self):
- '''Get version of stats segment'''
+ """Get version of stats segment"""
return self.shared_headerfmt.unpack_from(self.statseg)[0]
@property
def base(self):
- '''Get base pointer of stats segment'''
+ """Get base pointer of stats segment"""
return self.shared_headerfmt.unpack_from(self.statseg)[1]
@property
def epoch(self):
- '''Get current epoch value from stats segment'''
+ """Get current epoch value from stats segment"""
return self.shared_headerfmt.unpack_from(self.statseg)[2]
@property
def in_progress(self):
- '''Get value of in_progress from stats segment'''
+ """Get value of in_progress from stats segment"""
return self.shared_headerfmt.unpack_from(self.statseg)[3]
@property
def directory_vector(self):
- '''Get pointer of directory vector'''
+ """Get pointer of directory vector"""
return self.shared_headerfmt.unpack_from(self.statseg)[4]
- @property
- def error_vector(self):
- '''Get pointer of error vector'''
- return self.shared_headerfmt.unpack_from(self.statseg)[5]
-
- elementfmt = 'IQ128s'
+ elementfmt = "IQ128s"
def refresh(self, blocking=True):
- '''Refresh directory vector cache (epoch changed)'''
+ """Refresh directory vector cache (epoch changed)"""
directory = {}
directory_by_idx = {}
while True:
try:
with self.lock:
self.last_epoch = self.epoch
- for i, direntry in enumerate(StatsVector(self, self.directory_vector, self.elementfmt)):
- path_raw = direntry[2].find(b'\x00')
- path = direntry[2][:path_raw].decode('ascii')
+ for i, direntry in enumerate(
+ StatsVector(self, self.directory_vector, self.elementfmt)
+ ):
+ path_raw = direntry[2].find(b"\x00")
+ path = direntry[2][:path_raw].decode("ascii")
directory[path] = StatsEntry(direntry[0], direntry[1])
directory_by_idx[i] = path
self.directory = directory
self.directory_by_idx = directory_by_idx
-
- # Cache the error index vectors
- self.error_vectors = []
- for threads in StatsVector(self, self.error_vector, 'P'):
- self.error_vectors.append(StatsVector(self, threads[0], 'Q'))
return
except IOError:
if not blocking:
@@ -222,78 +232,66 @@ class VPPStats():
return iter(self.directory.items())
def set_errors(self, blocking=True):
- '''Return dictionary of error counters > 0'''
+ """Return dictionary of error counters > 0"""
if not self.connected:
self.connect()
- errors = {k:v for k, v in self.directory.items() if k.startswith("/err/")}
+ errors = {k: v for k, v in self.directory.items() if k.startswith("/err/")}
result = {}
- while True:
+ for k in errors:
try:
- if self.last_epoch != self.epoch:
- self.refresh(blocking)
- with self.lock:
- for k, entry in errors.items():
- total = 0
- i = entry.value
- for per_thread in self.error_vectors:
- total += per_thread[i]
- if total:
- result[k] = total
- return result
- except IOError:
- if not blocking:
- raise
+ total = self[k].sum()
+ if total:
+ result[k] = total
+ except KeyError:
+ pass
+ return result
def set_errors_str(self, blocking=True):
- '''Return all errors counters > 0 pretty printed'''
- error_string = ['ERRORS:']
+ """Return all errors counters > 0 pretty printed"""
+ error_string = ["ERRORS:"]
error_counters = self.set_errors(blocking)
for k in sorted(error_counters):
- error_string.append('{:<60}{:>10}'.format(k, error_counters[k]))
- return '%s\n' % '\n'.join(error_string)
+ error_string.append("{:<60}{:>10}".format(k, error_counters[k]))
+ return "%s\n" % "\n".join(error_string)
def get_counter(self, name, blocking=True):
- '''Alternative call to __getitem__'''
+ """Alternative call to __getitem__"""
return self.__getitem__(name, blocking)
def get_err_counter(self, name, blocking=True):
- '''Return a single value (sum of all threads)'''
- if not self.connected:
- self.connect()
- if name.startswith("/err/"):
- while True:
- try:
- if self.last_epoch != self.epoch:
- self.refresh(blocking)
- with self.lock:
- return sum(self.directory[name].get_counter(self))
- except IOError:
- if not blocking:
- raise
+ """Alternative call to __getitem__"""
+ return self.__getitem__(name, blocking).sum()
def ls(self, patterns):
- '''Returns list of counters matching pattern'''
+ """Returns list of counters matching pattern"""
# pylint: disable=invalid-name
if not self.connected:
self.connect()
if not isinstance(patterns, list):
patterns = [patterns]
regex = [re.compile(i) for i in patterns]
- return [k for k, v in self.directory.items()
- if any(re.match(pattern, k) for pattern in regex)]
+ if self.last_epoch != self.epoch:
+ self.refresh()
+
+ return [
+ k
+ for k, v in self.directory.items()
+ if any(re.match(pattern, k) for pattern in regex)
+ ]
def dump(self, counters, blocking=True):
- '''Given a list of counters return a dictionary of results'''
+ """Given a list of counters return a dictionary of results"""
if not self.connected:
self.connect()
result = {}
for cnt in counters:
- result[cnt] = self.__getitem__(cnt,blocking)
+ result[cnt] = self.__getitem__(cnt, blocking)
return result
-class StatsLock():
- '''Stat segment optimistic locking'''
+
+class StatsLock:
+ """Stat segment optimistic locking"""
def __init__(self, stats):
self.stats = stats
@@ -308,7 +306,7 @@ class StatsLock():
self.release()
def acquire(self, blocking=True, timeout=-1):
- '''Acquire the lock. Await in progress to go false. Record epoch.'''
+ """Acquire the lock. Await in progress to go false. Record epoch."""
self.epoch = self.stats.epoch
if timeout > 0:
start = time.monotonic()
@@ -321,46 +319,49 @@ class StatsLock():
return True
def release(self):
- '''Check if data read while locked is valid'''
+ """Check if data read while locked is valid"""
if self.stats.in_progress or self.stats.epoch != self.epoch:
- raise IOError('Optimistic lock failed, retry')
+ raise IOError("Optimistic lock failed, retry")
def locked(self):
- '''Not used'''
+ """Not used"""
class StatsCombinedList(list):
- '''Column slicing for Combined counters list'''
+ """Column slicing for Combined counters list"""
def __getitem__(self, item):
- '''Supports partial numpy style 2d support. Slice by column [:,1]'''
+ """Supports partial numpy style 2d support. Slice by column [:,1]"""
if isinstance(item, int):
return list.__getitem__(self, item)
return CombinedList([row[item[1]] for row in self])
+
class CombinedList(list):
- '''Combined Counters 2-dimensional by thread by index of packets/octets'''
+ """Combined Counters 2-dimensional by thread by index of packets/octets"""
def packets(self):
- '''Return column (2nd dimension). Packets for all threads'''
+ """Return column (2nd dimension). Packets for all threads"""
return [pair[0] for pair in self]
def octets(self):
- '''Return column (2nd dimension). Octets for all threads'''
+ """Return column (2nd dimension). Octets for all threads"""
return [pair[1] for pair in self]
def sum_packets(self):
- '''Return column (2nd dimension). Sum of all packets for all threads'''
+ """Return column (2nd dimension). Sum of all packets for all threads"""
return sum(self.packets())
def sum_octets(self):
- '''Return column (2nd dimension). Sum of all octets for all threads'''
+ """Return column (2nd dimension). Sum of all octets for all threads"""
return sum(self.octets())
+
class StatsTuple(tuple):
- '''A Combined vector tuple (packets, octets)'''
+ """A Combined vector tuple (packets, octets)"""
+
def __init__(self, data):
- self.dictionary = {'packets': data[0], 'bytes': data[1]}
+ self.dictionary = {"packets": data[0], "bytes": data[1]}
super().__init__()
def __repr__(self):
@@ -369,28 +370,32 @@ class StatsTuple(tuple):
def __getitem__(self, item):
if isinstance(item, int):
return tuple.__getitem__(self, item)
- if item == 'packets':
+ if item == "packets":
return tuple.__getitem__(self, 0)
return tuple.__getitem__(self, 1)
+
class StatsSimpleList(list):
- '''Simple Counters 2-dimensional by thread by index of packets'''
+ """Simple Counters 2-dimensional by thread by index of packets"""
def __getitem__(self, item):
- '''Supports partial numpy style 2d support. Slice by column [:,1]'''
+ """Supports partial numpy style 2d support. Slice by column [:,1]"""
if isinstance(item, int):
return list.__getitem__(self, item)
return SimpleList([row[item[1]] for row in self])
+
class SimpleList(list):
- '''Simple counter'''
+ """Simple counter"""
def sum(self):
- '''Sum the vector'''
+ """Sum the vector"""
return sum(self)
-class StatsEntry():
- '''An individual stats entry'''
+
+class StatsEntry:
+ """An individual stats entry"""
+
# pylint: disable=unused-argument,no-self-use
def __init__(self, stattype, statvalue):
@@ -404,140 +409,135 @@ class StatsEntry():
elif stattype == 3:
self.function = self.combined
elif stattype == 4:
- self.function = self.error
- elif stattype == 5:
self.function = self.name
- elif stattype == 7:
+ elif stattype == 6:
self.function = self.symlink
else:
self.function = self.illegal
def illegal(self, stats):
- '''Invalid or unknown counter type'''
+ """Invalid or unknown counter type"""
return None
def scalar(self, stats):
- '''Scalar counter'''
+ """Scalar counter"""
return self.value
def simple(self, stats):
- '''Simple counter'''
+ """Simple counter"""
counter = StatsSimpleList()
- for threads in StatsVector(stats, self.value, 'P'):
- clist = [v[0] for v in StatsVector(stats, threads[0], 'Q')]
+ for threads in StatsVector(stats, self.value, "P"):
+ clist = [v[0] for v in StatsVector(stats, threads[0], "Q")]
counter.append(clist)
return counter
def combined(self, stats):
- '''Combined counter'''
+ """Combined counter"""
counter = StatsCombinedList()
- for threads in StatsVector(stats, self.value, 'P'):
- clist = [StatsTuple(cnt) for cnt in StatsVector(stats, threads[0], 'QQ')]
+ for threads in StatsVector(stats, self.value, "P"):
+ clist = [StatsTuple(cnt) for cnt in StatsVector(stats, threads[0], "QQ")]
counter.append(clist)
return counter
- def error(self, stats):
- '''Error counter'''
- counter = SimpleList()
- for clist in stats.error_vectors:
- counter.append(clist[self.value])
- return counter
-
def name(self, stats):
- '''Name counter'''
+ """Name counter"""
counter = []
- for name in StatsVector(stats, self.value, 'P'):
+ for name in StatsVector(stats, self.value, "P"):
if name[0]:
counter.append(get_string(stats, name[0]))
return counter
- SYMLINK_FMT1 = Struct('II')
- SYMLINK_FMT2 = Struct('Q')
+ SYMLINK_FMT1 = Struct("II")
+ SYMLINK_FMT2 = Struct("Q")
+
def symlink(self, stats):
- '''Symlink counter'''
+ """Symlink counter"""
b = self.SYMLINK_FMT2.pack(self.value)
index1, index2 = self.SYMLINK_FMT1.unpack(b)
name = stats.directory_by_idx[index1]
- return stats[name][:,index2]
+ return stats[name][:, index2]
def get_counter(self, stats):
- '''Return a list of counters'''
+ """Return a list of counters"""
if stats:
return self.function(stats)
+
class TestStats(unittest.TestCase):
- '''Basic statseg tests'''
+ """Basic statseg tests"""
def setUp(self):
- '''Connect to statseg'''
+ """Connect to statseg"""
self.stat = VPPStats()
self.stat.connect()
self.profile = cProfile.Profile()
self.profile.enable()
def tearDown(self):
- '''Disconnect from statseg'''
+ """Disconnect from statseg"""
self.stat.disconnect()
profile = Stats(self.profile)
profile.strip_dirs()
- profile.sort_stats('cumtime')
+ profile.sort_stats("cumtime")
profile.print_stats()
print("\n--->>>")
def test_counters(self):
- '''Test access to statseg'''
-
- print('/err/abf-input-ip4/missed', self.stat['/err/abf-input-ip4/missed'])
- print('/sys/heartbeat', self.stat['/sys/heartbeat'])
- print('/if/names', self.stat['/if/names'])
- print('/if/rx-miss', self.stat['/if/rx-miss'])
- print('/if/rx-miss', self.stat['/if/rx-miss'][1])
- print('/nat44-ed/out2in/slowpath/drops', self.stat['/nat44-ed/out2in/slowpath/drops'])
- print('Set Errors', self.stat.set_errors())
+ """Test access to statseg"""
+
+ print("/err/abf-input-ip4/missed", self.stat["/err/abf-input-ip4/missed"])
+ print("/sys/heartbeat", self.stat["/sys/heartbeat"])
+ print("/if/names", self.stat["/if/names"])
+ print("/if/rx-miss", self.stat["/if/rx-miss"])
+ print("/if/rx-miss", self.stat["/if/rx-miss"][1])
+ print(
+ "/nat44-ed/out2in/slowpath/drops",
+ self.stat["/nat44-ed/out2in/slowpath/drops"],
+ )
with self.assertRaises(KeyError):
- print('NO SUCH COUNTER', self.stat['foobar'])
- print('/if/rx', self.stat.get_counter('/if/rx'))
- print('/err/ethernet-input/no error',
- self.stat.get_err_counter('/err/ethernet-input/no error'))
+ print("NO SUCH COUNTER", self.stat["foobar"])
+ print("/if/rx", self.stat.get_counter("/if/rx"))
+ print(
+ "/err/ethernet-input/no_error",
+ self.stat.get_counter("/err/ethernet-input/no_error"),
+ )
def test_column(self):
- '''Test column slicing'''
-
- print('/if/rx-miss', self.stat['/if/rx-miss'])
- print('/if/rx', self.stat['/if/rx']) # All interfaces for thread #1
- print('/if/rx thread #1', self.stat['/if/rx'][0]) # All interfaces for thread #1
- print('/if/rx thread #1, interface #1',
- self.stat['/if/rx'][0][1]) # All interfaces for thread #1
- print('/if/rx if_index #1', self.stat['/if/rx'][:, 1])
- print('/if/rx if_index #1 packets', self.stat['/if/rx'][:, 1].packets())
- print('/if/rx if_index #1 packets', self.stat['/if/rx'][:, 1].sum_packets())
- print('/if/rx if_index #1 packets', self.stat['/if/rx'][:, 1].octets())
- print('/if/rx-miss', self.stat['/if/rx-miss'])
- print('/if/rx-miss if_index #1 packets', self.stat['/if/rx-miss'][:, 1].sum())
- print('/if/rx if_index #1 packets', self.stat['/if/rx'][0][1]['packets'])
-
- def test_error(self):
- '''Test the error vector'''
-
- print('/err/ethernet-input', self.stat['/err/ethernet-input/no error'])
- print('/err/nat44-ei-ha/pkts-processed', self.stat['/err/nat44-ei-ha/pkts-processed'])
- print('/err/ethernet-input', self.stat.get_err_counter('/err/ethernet-input/no error'))
- print('/err/ethernet-input', self.stat['/err/ethernet-input/no error'].sum())
+ """Test column slicing"""
+
+ print("/if/rx-miss", self.stat["/if/rx-miss"])
+ print("/if/rx", self.stat["/if/rx"]) # All interfaces for thread #1
+ print(
+ "/if/rx thread #1", self.stat["/if/rx"][0]
+ ) # All interfaces for thread #1
+ print(
+ "/if/rx thread #1, interface #1", self.stat["/if/rx"][0][1]
+ ) # All interfaces for thread #1
+ print("/if/rx if_index #1", self.stat["/if/rx"][:, 1])
+ print("/if/rx if_index #1 packets", self.stat["/if/rx"][:, 1].packets())
+ print("/if/rx if_index #1 packets", self.stat["/if/rx"][:, 1].sum_packets())
+ print("/if/rx if_index #1 packets", self.stat["/if/rx"][:, 1].octets())
+ print("/if/rx-miss", self.stat["/if/rx-miss"])
+ print("/if/rx-miss if_index #1 packets", self.stat["/if/rx-miss"][:, 1].sum())
+ print("/if/rx if_index #1 packets", self.stat["/if/rx"][0][1]["packets"])
def test_nat44(self):
- '''Test the nat counters'''
+ """Test the nat counters"""
- print('/nat44-ei/ha/del-event-recv', self.stat['/nat44-ei/ha/del-event-recv'])
- print('/err/nat44-ei-ha/pkts-processed', self.stat['/err/nat44-ei-ha/pkts-processed'].sum())
+ print("/nat44-ei/ha/del-event-recv", self.stat["/nat44-ei/ha/del-event-recv"])
+ print(
+ "/err/nat44-ei-ha/pkts-processed",
+ self.stat["/err/nat44-ei-ha/pkts-processed"].sum(),
+ )
def test_legacy(self):
- '''Legacy interface'''
+ """Legacy interface"""
directory = self.stat.ls(["^/if", "/err/ip4-input", "/sys/node/ip4-input"])
data = self.stat.dump(directory)
print(data)
- print('Looking up sys node')
+ print("Looking up sys node")
directory = self.stat.ls(["^/sys/node"])
- print('Dumping sys node')
+ print("Dumping sys node")
data = self.stat.dump(directory)
print(data)
directory = self.stat.ls(["^/foobar"])
@@ -545,18 +545,19 @@ class TestStats(unittest.TestCase):
print(data)
def test_sys_nodes(self):
- '''Test /sys/nodes'''
- counters = self.stat.ls('^/sys/node')
- print('COUNTERS:', counters)
- print('/sys/node', self.stat.dump(counters))
- print('/net/route/to', self.stat['/net/route/to'])
+ """Test /sys/nodes"""
+ counters = self.stat.ls("^/sys/node")
+ print("COUNTERS:", counters)
+ print("/sys/node", self.stat.dump(counters))
+ print("/net/route/to", self.stat["/net/route/to"])
def test_symlink(self):
- '''Symbolic links'''
- print('/interface/local0/rx', self.stat['/interfaces/local0/rx'])
- print('/sys/nodes/unix-epoll-input', self.stat['/nodes/unix-epoll-input/calls'])
+ """Symbolic links"""
+ print("/interface/local0/rx", self.stat["/interfaces/local0/rx"])
+ print("/sys/nodes/unix-epoll-input", self.stat["/nodes/unix-epoll-input/calls"])
+
-if __name__ == '__main__':
+if __name__ == "__main__":
import cProfile
from pstats import Stats
diff --git a/src/vpp-api/python/vpp_papi/vpp_transport_socket.py b/src/vpp-api/python/vpp_papi/vpp_transport_socket.py
index c82b8c365a1..174ab74d0b8 100644
--- a/src/vpp-api/python/vpp_papi/vpp_transport_socket.py
+++ b/src/vpp-api/python/vpp_papi/vpp_transport_socket.py
@@ -9,7 +9,7 @@ import multiprocessing
import queue
import logging
-logger = logging.getLogger('vpp_papi.transport')
+logger = logging.getLogger("vpp_papi.transport")
logger.addHandler(logging.NullHandler())
@@ -26,7 +26,7 @@ class VppTransport:
self.read_timeout = read_timeout if read_timeout > 0 else None
self.parent = parent
self.server_address = server_address
- self.header = struct.Struct('>QII')
+ self.header = struct.Struct(">QII")
self.message_table = {}
# These queues can be accessed async.
# They are always up, but replaced on connect.
@@ -41,11 +41,10 @@ class VppTransport:
def msg_thread_func(self):
while True:
try:
- rlist, _, _ = select.select([self.socket,
- self.sque._reader], [], [])
- except socket.error:
+ rlist, _, _ = select.select([self.socket, self.sque._reader], [], [])
+ except (socket.error, ValueError):
# Terminate thread
- logging.error('select failed')
+ logging.error("select failed")
self.q.put(None)
return
@@ -66,21 +65,21 @@ class VppTransport:
return
# Put either to local queue or if context == 0
# callback queue
- if self.parent.has_context(msg):
+ if not self.do_async and self.parent.has_context(msg):
self.q.put(msg)
else:
self.parent.msg_handler_async(msg)
else:
- raise VppTransportSocketIOError(
- 2, 'Unknown response from select')
+ raise VppTransportSocketIOError(2, "Unknown response from select")
- def connect(self, name, pfx, msg_handler, rx_qlen):
+ def connect(self, name, pfx, msg_handler, rx_qlen, do_async=False):
# TODO: Reorder the actions and add "roll-backs",
# to restore clean disconnect state when failure happens durng connect.
if self.message_thread is not None:
raise VppTransportSocketIOError(
- 1, "PAPI socket transport connect: Need to disconnect first.")
+ 1, "PAPI socket transport connect: Need to disconnect first."
+ )
# Create a UDS socket
self.socket = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
@@ -107,19 +106,17 @@ class VppTransport:
self.message_thread = threading.Thread(target=self.msg_thread_func)
# Initialise sockclnt_create
- sockclnt_create = self.parent.messages['sockclnt_create']
- sockclnt_create_reply = self.parent.messages['sockclnt_create_reply']
+ sockclnt_create = self.parent.messages["sockclnt_create"]
+ sockclnt_create_reply = self.parent.messages["sockclnt_create_reply"]
- args = {'_vl_msg_id': 15,
- 'name': name,
- 'context': 124}
+ args = {"_vl_msg_id": 15, "name": name, "context": 124}
b = sockclnt_create.pack(args)
self.write(b)
msg = self._read()
hdr, length = self.parent.header.unpack(msg, 0)
if hdr.msgid != 16:
# TODO: Add first numeric argument.
- raise VppTransportSocketIOError('Invalid reply message')
+ raise VppTransportSocketIOError("Invalid reply message")
r, length = sockclnt_create_reply.unpack(msg)
self.socket_index = r.index
@@ -128,6 +125,7 @@ class VppTransport:
self.message_table[n] = m.index
self.message_thread.daemon = True
+ self.do_async = do_async
self.message_thread.start()
return 0
@@ -184,7 +182,7 @@ class VppTransport:
def write(self, buf):
"""Send a binary-packed message to VPP."""
if not self.connected:
- raise VppTransportSocketIOError(1, 'Not connected')
+ raise VppTransportSocketIOError(1, "Not connected")
# Send header
header = self.header.pack(0, len(buf), 0)
@@ -192,8 +190,7 @@ class VppTransport:
self.socket.sendall(header)
self.socket.sendall(buf)
except socket.error as err:
- raise VppTransportSocketIOError(1, 'Sendall error: {err!r}'.format(
- err=err))
+ raise VppTransportSocketIOError(1, "Sendall error: {err!r}".format(err=err))
def _read_fixed(self, size):
"""Repeat receive until fixed size is read. Return empty on error."""
@@ -223,11 +220,11 @@ class VppTransport:
msg = self._read_fixed(hdrlen)
if hdrlen == len(msg):
return msg
- raise VppTransportSocketIOError(1, 'Unknown socket read error')
+ raise VppTransportSocketIOError(1, "Unknown socket read error")
def read(self, timeout=None):
if not self.connected:
- raise VppTransportSocketIOError(1, 'Not connected')
+ raise VppTransportSocketIOError(1, "Not connected")
if timeout is None:
timeout = self.read_timeout
try:
diff --git a/src/vpp-api/vapi/CMakeLists.txt b/src/vpp-api/vapi/CMakeLists.txt
index 53034bd27b8..e53d3e8b238 100644
--- a/src/vpp-api/vapi/CMakeLists.txt
+++ b/src/vpp-api/vapi/CMakeLists.txt
@@ -34,7 +34,7 @@ install(
vapi.hpp
vapi_internal.h
DESTINATION
- include/vapi
+ ${CMAKE_INSTALL_INCLUDEDIR}/vapi
COMPONENT
vpp-dev
)
@@ -45,7 +45,7 @@ install(
vapi_json_parser.py
vapi_cpp_gen.py
DESTINATION
- share/vpp
+ ${CMAKE_INSTALL_DATADIR}/vpp
COMPONENT
vpp-dev
)
@@ -94,6 +94,7 @@ if(SUBUNIT_INCLUDE_DIR AND SUBUNIT_LIB)
vapi_c_test.c
DEPENDS fake_api_vapi_h
LINK_LIBRARIES ${libs}
+ NO_INSTALL
)
enable_language(CXX)
@@ -102,6 +103,7 @@ if(SUBUNIT_INCLUDE_DIR AND SUBUNIT_LIB)
vapi_cpp_test.cpp
DEPENDS fake_api_vapi_hpp
LINK_LIBRARIES ${libs}
+ NO_INSTALL
)
else()
diff --git a/src/vpp-api/vapi/fake.api.json b/src/vpp-api/vapi/fake.api.json
index 24c9f4dbfa1..f7238c468fa 100644
--- a/src/vpp-api/vapi/fake.api.json
+++ b/src/vpp-api/vapi/fake.api.json
@@ -10,6 +10,8 @@
},
"enums" : [
],
+ "enumflags" : [
+ ],
"unions" : [
],
"types" : [
diff --git a/src/vpp-api/vapi/vapi.c b/src/vpp-api/vapi/vapi.c
index ec87e7b7b72..022f023aeb0 100644
--- a/src/vpp-api/vapi/vapi.c
+++ b/src/vpp-api/vapi/vapi.c
@@ -30,8 +30,17 @@
#include <vlib/vlib.h>
#include <vlibapi/api_common.h>
#include <vlibmemory/memory_client.h>
+#include <vlibmemory/memory_api.h>
+#include <vlibmemory/api.h>
#include <vapi/memclnt.api.vapi.h>
+#include <vapi/vlib.api.vapi.h>
+
+#include <vlibmemory/vl_memory_msg_enum.h>
+
+#define vl_typedefs /* define message structures */
+#include <vlibmemory/vl_memory_api_h.h>
+#undef vl_typedefs
/* we need to use control pings for some stuff and because we're forced to put
* the code in headers, we need a way to be able to grab the ids of these
@@ -40,7 +49,7 @@ vapi_msg_id_t vapi_msg_id_control_ping = 0;
vapi_msg_id_t vapi_msg_id_control_ping_reply = 0;
DEFINE_VAPI_MSG_IDS_MEMCLNT_API_JSON;
-DEFINE_VAPI_MSG_IDS_VPE_API_JSON;
+DEFINE_VAPI_MSG_IDS_VLIB_API_JSON;
struct
{
@@ -54,7 +63,8 @@ typedef struct
u32 context;
vapi_cb_t callback;
void *callback_ctx;
- bool is_dump;
+ vapi_msg_id_t response_id;
+ enum vapi_request_type type;
} vapi_req_t;
static const u32 context_counter_mask = (1 << 31);
@@ -88,6 +98,14 @@ struct vapi_ctx_s
bool connected;
bool handle_keepalives;
pthread_mutex_t requests_mutex;
+ bool use_uds;
+
+ svm_queue_t *vl_input_queue;
+ clib_socket_t client_socket;
+ clib_time_t time;
+ u32 my_client_index;
+ /** client message index hash table */
+ uword *msg_index_by_name_and_crc;
};
u32
@@ -123,15 +141,17 @@ vapi_requests_end (vapi_ctx_t ctx)
}
void
-vapi_store_request (vapi_ctx_t ctx, u32 context, bool is_dump,
- vapi_cb_t callback, void *callback_ctx)
+vapi_store_request (vapi_ctx_t ctx, u32 context, vapi_msg_id_t response_id,
+ enum vapi_request_type request_type, vapi_cb_t callback,
+ void *callback_ctx)
{
assert (!vapi_requests_full (ctx));
/* if the mutex is not held, bad things will happen */
assert (0 != pthread_mutex_trylock (&ctx->requests_mutex));
const int requests_end = vapi_requests_end (ctx);
vapi_req_t *slot = &ctx->requests[requests_end];
- slot->is_dump = is_dump;
+ slot->type = request_type;
+ slot->response_id = response_id;
slot->context = context;
slot->callback = callback;
slot->callback_ctx = callback_ctx;
@@ -213,14 +233,14 @@ vapi_to_be_freed_validate ()
#endif
-void *
-vapi_msg_alloc (vapi_ctx_t ctx, size_t size)
+static void *
+vapi_shm_msg_alloc (vapi_ctx_t ctx, size_t size)
{
if (!ctx->connected)
{
return NULL;
}
- void *rv = vl_msg_api_alloc_or_null (size);
+ void *rv = vl_msg_api_alloc_as_if_client_or_null (size);
if (rv)
{
clib_memset (rv, 0, size);
@@ -228,6 +248,23 @@ vapi_msg_alloc (vapi_ctx_t ctx, size_t size)
return rv;
}
+static void *
+vapi_sock_msg_alloc (size_t size)
+{
+ u8 *rv = 0;
+ vec_validate_init_empty (rv, size - 1, 0);
+ return rv;
+}
+
+void *
+vapi_msg_alloc (vapi_ctx_t ctx, size_t size)
+{
+ if (ctx->use_uds)
+ return vapi_sock_msg_alloc (size);
+
+ return vapi_shm_msg_alloc (ctx, size);
+}
+
void
vapi_msg_free (vapi_ctx_t ctx, void *msg)
{
@@ -235,10 +272,19 @@ vapi_msg_free (vapi_ctx_t ctx, void *msg)
{
return;
}
+
#if VAPI_DEBUG_ALLOC
vapi_trace_free (msg);
#endif
- vl_msg_api_free (msg);
+
+ if (ctx->use_uds)
+ {
+ vec_free (msg);
+ }
+ else
+ {
+ vl_msg_api_free (msg);
+ }
}
vapi_msg_id_t
@@ -277,6 +323,7 @@ vapi_ctx_alloc (vapi_ctx_t * result)
}
pthread_mutex_init (&ctx->requests_mutex, NULL);
*result = ctx;
+ clib_time_init (&ctx->time);
return VAPI_OK;
fail:
vapi_ctx_free (ctx);
@@ -301,21 +348,630 @@ vapi_is_msg_available (vapi_ctx_t ctx, vapi_msg_id_t id)
return vapi_lookup_vl_msg_id (ctx, id) != UINT16_MAX;
}
+/* Cut and paste to avoid adding dependency to client library */
+__clib_nosanitize_addr static void
+VL_API_VEC_UNPOISON (const void *v)
+{
+ const vec_header_t *vh = &((vec_header_t *) v)[-1];
+ clib_mem_unpoison (vh, sizeof (*vh) + vec_len (v));
+}
+
+static void
+vapi_api_name_and_crc_free (vapi_ctx_t ctx)
+{
+ int i;
+ u8 **keys = 0;
+ hash_pair_t *hp;
+
+ if (!ctx->msg_index_by_name_and_crc)
+ return;
+ hash_foreach_pair (hp, ctx->msg_index_by_name_and_crc,
+ ({ vec_add1 (keys, (u8 *) hp->key); }));
+ for (i = 0; i < vec_len (keys); i++)
+ vec_free (keys[i]);
+ vec_free (keys);
+ hash_free (ctx->msg_index_by_name_and_crc);
+}
+
+static vapi_error_e
+vapi_sock_get_errno (int err)
+{
+ switch (err)
+ {
+ case ENOTSOCK:
+ return VAPI_ENOTSOCK;
+ case EACCES:
+ return VAPI_EACCES;
+ case ECONNRESET:
+ return VAPI_ECONNRESET;
+ default:
+ break;
+ }
+ return VAPI_ESOCK_FAILURE;
+}
+
+static vapi_error_e
+vapi_sock_send (vapi_ctx_t ctx, u8 *msg)
+{
+ size_t n;
+ struct msghdr hdr;
+
+ const size_t len = vec_len (msg);
+ const size_t total_len = len + sizeof (msgbuf_t);
+
+ msgbuf_t msgbuf1 = {
+ .q = 0,
+ .gc_mark_timestamp = 0,
+ .data_len = htonl (len),
+ };
+
+ struct iovec bufs[2] = {
+ [0] = { .iov_base = &msgbuf1, .iov_len = sizeof (msgbuf1) },
+ [1] = { .iov_base = msg, .iov_len = len },
+ };
+
+ clib_memset (&hdr, 0, sizeof (hdr));
+ hdr.msg_iov = bufs;
+ hdr.msg_iovlen = 2;
+
+ n = sendmsg (ctx->client_socket.fd, &hdr, 0);
+ if (n < 0)
+ {
+ return vapi_sock_get_errno (errno);
+ }
+
+ if (n < total_len)
+ {
+ return VAPI_EAGAIN;
+ }
+
+ vec_free (msg);
+
+ return VAPI_OK;
+}
+
+static vapi_error_e
+vapi_sock_send2 (vapi_ctx_t ctx, u8 *msg1, u8 *msg2)
+{
+ size_t n;
+ struct msghdr hdr;
+
+ const size_t len1 = vec_len (msg1);
+ const size_t len2 = vec_len (msg2);
+ const size_t total_len = len1 + len2 + 2 * sizeof (msgbuf_t);
+
+ msgbuf_t msgbuf1 = {
+ .q = 0,
+ .gc_mark_timestamp = 0,
+ .data_len = htonl (len1),
+ };
+
+ msgbuf_t msgbuf2 = {
+ .q = 0,
+ .gc_mark_timestamp = 0,
+ .data_len = htonl (len2),
+ };
+
+ struct iovec bufs[4] = {
+ [0] = { .iov_base = &msgbuf1, .iov_len = sizeof (msgbuf1) },
+ [1] = { .iov_base = msg1, .iov_len = len1 },
+ [2] = { .iov_base = &msgbuf2, .iov_len = sizeof (msgbuf2) },
+ [3] = { .iov_base = msg2, .iov_len = len2 },
+ };
+
+ clib_memset (&hdr, 0, sizeof (hdr));
+ hdr.msg_iov = bufs;
+ hdr.msg_iovlen = 4;
+
+ n = sendmsg (ctx->client_socket.fd, &hdr, 0);
+ if (n < 0)
+ {
+ return vapi_sock_get_errno (errno);
+ }
+
+ if (n < total_len)
+ {
+ return VAPI_EAGAIN;
+ }
+
+ vec_free (msg1);
+ vec_free (msg2);
+
+ return VAPI_OK;
+}
+
+static vapi_error_e
+vapi_sock_recv_internal (vapi_ctx_t ctx, u8 **vec_msg, u32 timeout)
+{
+ clib_socket_t *sock = &ctx->client_socket;
+ u32 data_len = 0, msg_size;
+ msgbuf_t *mbp = 0;
+ ssize_t n, current_rx_index;
+ f64 deadline;
+ vapi_error_e rv = VAPI_EAGAIN;
+
+ if (ctx->client_socket.fd == 0)
+ return VAPI_ENOTSOCK;
+
+ deadline = clib_time_now (&ctx->time) + timeout;
+
+ while (1)
+ {
+ current_rx_index = vec_len (sock->rx_buffer);
+ while (current_rx_index < sizeof (*mbp))
+ {
+ vec_validate (sock->rx_buffer, sizeof (*mbp) - 1);
+ n = recv (sock->fd, sock->rx_buffer + current_rx_index,
+ sizeof (*mbp) - current_rx_index, MSG_DONTWAIT);
+ if (n < 0)
+ {
+ if (errno == EAGAIN && clib_time_now (&ctx->time) >= deadline)
+ return VAPI_EAGAIN;
+
+ if (errno == EAGAIN)
+ continue;
+
+ clib_unix_warning ("socket_read");
+ vec_set_len (sock->rx_buffer, current_rx_index);
+ return vapi_sock_get_errno (errno);
+ }
+ current_rx_index += n;
+ }
+ vec_set_len (sock->rx_buffer, current_rx_index);
+
+ mbp = (msgbuf_t *) (sock->rx_buffer);
+ data_len = ntohl (mbp->data_len);
+ current_rx_index = vec_len (sock->rx_buffer);
+ vec_validate (sock->rx_buffer, current_rx_index + data_len);
+ mbp = (msgbuf_t *) (sock->rx_buffer);
+ msg_size = data_len + sizeof (*mbp);
+
+ while (current_rx_index < msg_size)
+ {
+ n = recv (sock->fd, sock->rx_buffer + current_rx_index,
+ msg_size - current_rx_index, MSG_DONTWAIT);
+ if (n < 0)
+ {
+ if (errno == EAGAIN && clib_time_now (&ctx->time) >= deadline)
+ return VAPI_EAGAIN;
+
+ if (errno == EAGAIN)
+ continue;
+
+ clib_unix_warning ("socket_read");
+ vec_set_len (sock->rx_buffer, current_rx_index);
+ return vapi_sock_get_errno (errno);
+ }
+ current_rx_index += n;
+ }
+ vec_set_len (sock->rx_buffer, current_rx_index);
+
+ if (vec_len (sock->rx_buffer) >= data_len + sizeof (*mbp))
+ {
+ if (data_len)
+ {
+ vec_add (*vec_msg, mbp->data, data_len);
+ rv = VAPI_OK;
+ }
+ else
+ {
+ *vec_msg = 0;
+ }
+
+ if (vec_len (sock->rx_buffer) == data_len + sizeof (*mbp))
+ vec_set_len (sock->rx_buffer, 0);
+ else
+ vec_delete (sock->rx_buffer, data_len + sizeof (*mbp), 0);
+ mbp = 0;
+
+ /* Quit if we're out of data, and not expecting a ping reply */
+ if (vec_len (sock->rx_buffer) == 0)
+ break;
+ }
+ }
+ return rv;
+}
+
+static void
+vapi_memclnt_create_v2_reply_t_handler (vapi_ctx_t ctx,
+ vl_api_memclnt_create_v2_reply_t *mp)
+{
+ serialize_main_t _sm, *sm = &_sm;
+ u8 *tblv;
+ u32 nmsgs;
+ int i;
+ u8 *name_and_crc;
+ u32 msg_index;
+
+ ctx->my_client_index = mp->index;
+
+ /* Clean out any previous hash table (unlikely) */
+ vapi_api_name_and_crc_free (ctx);
+
+ ctx->msg_index_by_name_and_crc = hash_create_string (0, sizeof (uword));
+
+ /* Recreate the vnet-side API message handler table */
+ tblv = uword_to_pointer (mp->message_table, u8 *);
+ unserialize_open_data (sm, tblv, vec_len (tblv));
+ unserialize_integer (sm, &nmsgs, sizeof (u32));
+
+ VL_API_VEC_UNPOISON (tblv);
+
+ for (i = 0; i < nmsgs; i++)
+ {
+ msg_index = unserialize_likely_small_unsigned_integer (sm);
+ unserialize_cstring (sm, (char **) &name_and_crc);
+ hash_set_mem (ctx->msg_index_by_name_and_crc, name_and_crc, msg_index);
+ }
+}
+
+static void
+vapi_sockclnt_create_reply_t_handler (vapi_ctx_t ctx,
+ vl_api_sockclnt_create_reply_t *mp)
+{
+ int i;
+ u8 *name_and_crc;
+
+ ctx->my_client_index = mp->index;
+
+ /* Clean out any previous hash table (unlikely) */
+ vapi_api_name_and_crc_free (ctx);
+
+ ctx->msg_index_by_name_and_crc = hash_create_string (0, sizeof (uword));
+
+ for (i = 0; i < be16toh (mp->count); i++)
+ {
+ name_and_crc = format (0, "%s%c", mp->message_table[i].name, 0);
+ hash_set_mem (ctx->msg_index_by_name_and_crc, name_and_crc,
+ be16toh (mp->message_table[i].index));
+ }
+}
+
+static void
+vapi_memclnt_delete_reply_t_handler (vapi_ctx_t ctx,
+ vl_api_memclnt_delete_reply_t *mp)
+{
+ void *oldheap;
+ oldheap = vl_msg_push_heap ();
+ svm_queue_free (ctx->vl_input_queue);
+ vl_msg_pop_heap (oldheap);
+
+ ctx->my_client_index = ~0;
+ ctx->vl_input_queue = 0;
+}
+
+static void
+vapi_sockclnt_delete_reply_t_handler (vapi_ctx_t ctx,
+ vl_api_sockclnt_delete_reply_t *mp)
+{
+ ctx->my_client_index = ~0;
+ ctx->vl_input_queue = 0;
+}
+
+static int
+vapi_shm_client_connect (vapi_ctx_t ctx, const char *name, int ctx_quota,
+ int input_queue_size, bool keepalive)
+{
+ vl_api_memclnt_create_v2_t *mp;
+ vl_api_memclnt_create_v2_reply_t *rp;
+ svm_queue_t *vl_input_queue;
+ vl_shmem_hdr_t *shmem_hdr;
+ int rv = 0;
+ void *oldheap;
+ api_main_t *am = vlibapi_get_main ();
+
+ shmem_hdr = am->shmem_hdr;
+
+ if (shmem_hdr == 0 || shmem_hdr->vl_input_queue == 0)
+ {
+ clib_warning ("shmem_hdr / input queue NULL");
+ return VAPI_ECON_FAIL;
+ }
+
+ clib_mem_unpoison (shmem_hdr, sizeof (*shmem_hdr));
+ VL_MSG_API_SVM_QUEUE_UNPOISON (shmem_hdr->vl_input_queue);
+
+ oldheap = vl_msg_push_heap ();
+ vl_input_queue =
+ svm_queue_alloc_and_init (input_queue_size, sizeof (uword), getpid ());
+ vl_msg_pop_heap (oldheap);
+
+ ctx->my_client_index = ~0;
+ ctx->vl_input_queue = vl_input_queue;
+
+ mp = vl_msg_api_alloc_as_if_client (sizeof (vl_api_memclnt_create_v2_t));
+ clib_memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_MEMCLNT_CREATE_V2);
+ mp->ctx_quota = ctx_quota;
+ mp->input_queue = (uword) vl_input_queue;
+ strncpy ((char *) mp->name, name, sizeof (mp->name) - 1);
+ mp->keepalive = keepalive;
+
+ vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) &mp);
+
+ while (1)
+ {
+ int qstatus;
+ struct timespec ts, tsrem;
+ int i;
+
+ /* Wait up to 10 seconds */
+ for (i = 0; i < 1000; i++)
+ {
+ qstatus =
+ svm_queue_sub (vl_input_queue, (u8 *) &rp, SVM_Q_NOWAIT, 0);
+ if (qstatus == 0)
+ goto read_one_msg;
+ ts.tv_sec = 0;
+ ts.tv_nsec = 10000 * 1000; /* 10 ms */
+ while (nanosleep (&ts, &tsrem) < 0)
+ ts = tsrem;
+ }
+ /* Timeout... */
+ return VAPI_ECON_FAIL;
+
+ read_one_msg:
+ VL_MSG_API_UNPOISON (rp);
+ if (ntohs (rp->_vl_msg_id) != VL_API_MEMCLNT_CREATE_V2_REPLY)
+ {
+ clib_warning ("unexpected reply: id %d", ntohs (rp->_vl_msg_id));
+ continue;
+ }
+ rv = clib_net_to_host_u32 (rp->response);
+ vapi_memclnt_create_v2_reply_t_handler (ctx, rp);
+ break;
+ }
+ return (rv);
+}
+
+static int
+vapi_sock_client_connect (vapi_ctx_t ctx, char *path, const char *name)
+{
+ clib_error_t *error;
+ clib_socket_t *sock;
+ vl_api_sockclnt_create_t *mp;
+ vl_api_sockclnt_create_reply_t *rp;
+ int rv = 0;
+ u8 *msg = 0;
+
+ ctx->my_client_index = ~0;
+
+ if (ctx->client_socket.fd)
+ return VAPI_EINVAL;
+
+ if (name == 0)
+ return VAPI_EINVAL;
+
+ sock = &ctx->client_socket;
+ sock->config = path ? path : API_SOCKET_FILE;
+ sock->flags = CLIB_SOCKET_F_IS_CLIENT;
+
+ if ((error = clib_socket_init (sock)))
+ {
+ clib_error_report (error);
+ return VAPI_ECON_FAIL;
+ }
+
+ mp = vapi_sock_msg_alloc (sizeof (vl_api_sockclnt_create_t));
+ mp->_vl_msg_id = ntohs (VL_API_SOCKCLNT_CREATE);
+ strncpy ((char *) mp->name, name, sizeof (mp->name) - 1);
+
+ if (vapi_sock_send (ctx, (void *) mp) != VAPI_OK)
+ {
+ return VAPI_ECON_FAIL;
+ }
+
+ while (1)
+ {
+ int qstatus;
+ struct timespec ts, tsrem;
+ int i;
+
+ /* Wait up to 10 seconds */
+ for (i = 0; i < 1000; i++)
+ {
+ qstatus = vapi_sock_recv_internal (ctx, &msg, 0);
+
+ if (qstatus == 0)
+ goto read_one_msg;
+ ts.tv_sec = 0;
+ ts.tv_nsec = 10000 * 1000; /* 10 ms */
+ while (nanosleep (&ts, &tsrem) < 0)
+ ts = tsrem;
+ }
+ /* Timeout... */
+ return -1;
+
+ read_one_msg:
+ if (vec_len (msg) == 0)
+ continue;
+
+ rp = (void *) msg;
+ if (ntohs (rp->_vl_msg_id) != VL_API_SOCKCLNT_CREATE_REPLY)
+ {
+ clib_warning ("unexpected reply: id %d", ntohs (rp->_vl_msg_id));
+ continue;
+ }
+ rv = clib_net_to_host_u32 (rp->response);
+ vapi_sockclnt_create_reply_t_handler (ctx, rp);
+ break;
+ }
+ return (rv);
+}
+
+static void
+vapi_shm_client_send_disconnect (vapi_ctx_t ctx, u8 do_cleanup)
+{
+ vl_api_memclnt_delete_t *mp;
+ vl_shmem_hdr_t *shmem_hdr;
+ api_main_t *am = vlibapi_get_main ();
+
+ ASSERT (am->vlib_rp);
+ shmem_hdr = am->shmem_hdr;
+ ASSERT (shmem_hdr && shmem_hdr->vl_input_queue);
+
+ mp = vl_msg_api_alloc (sizeof (vl_api_memclnt_delete_t));
+ clib_memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_MEMCLNT_DELETE);
+ mp->index = ctx->my_client_index;
+ mp->do_cleanup = do_cleanup;
+
+ vl_msg_api_send_shmem (shmem_hdr->vl_input_queue, (u8 *) &mp);
+}
+
+static vapi_error_e
+vapi_sock_client_send_disconnect (vapi_ctx_t ctx)
+{
+ vl_api_sockclnt_delete_t *mp;
+
+ mp = vapi_msg_alloc (ctx, sizeof (vl_api_sockclnt_delete_t));
+ clib_memset (mp, 0, sizeof (*mp));
+ mp->_vl_msg_id = ntohs (VL_API_SOCKCLNT_DELETE);
+ mp->client_index = ctx->my_client_index;
+
+ return vapi_sock_send (ctx, (void *) mp);
+}
+
+static int
+vapi_shm_client_disconnect (vapi_ctx_t ctx)
+{
+ vl_api_memclnt_delete_reply_t *rp;
+ svm_queue_t *vl_input_queue;
+ time_t begin;
+ msgbuf_t *msgbuf;
+
+ vl_input_queue = ctx->vl_input_queue;
+ vapi_shm_client_send_disconnect (ctx, 0 /* wait for reply */);
+
+ /*
+ * Have to be careful here, in case the client is disconnecting
+ * because e.g. the vlib process died, or is unresponsive.
+ */
+ begin = time (0);
+ while (1)
+ {
+ time_t now;
+
+ now = time (0);
+
+ if (now >= (begin + 2))
+ {
+ clib_warning ("peer unresponsive, give up");
+ ctx->my_client_index = ~0;
+ return VAPI_ENORESP;
+ }
+ if (svm_queue_sub (vl_input_queue, (u8 *) &rp, SVM_Q_NOWAIT, 0) < 0)
+ continue;
+
+ VL_MSG_API_UNPOISON (rp);
+
+ /* drain the queue */
+ if (ntohs (rp->_vl_msg_id) != VL_API_MEMCLNT_DELETE_REPLY)
+ {
+ clib_warning ("queue drain: %d", ntohs (rp->_vl_msg_id));
+ msgbuf = (msgbuf_t *) ((u8 *) rp - offsetof (msgbuf_t, data));
+ vl_msg_api_handler ((void *) rp, ntohl (msgbuf->data_len));
+ continue;
+ }
+ msgbuf = (msgbuf_t *) ((u8 *) rp - offsetof (msgbuf_t, data));
+ vl_msg_api_handler ((void *) rp, ntohl (msgbuf->data_len));
+ break;
+ }
+
+ vapi_api_name_and_crc_free (ctx);
+ return 0;
+}
+
+static vapi_error_e
+vapi_sock_client_disconnect (vapi_ctx_t ctx)
+{
+ vl_api_sockclnt_delete_reply_t *rp;
+ u8 *msg = 0;
+ msgbuf_t *msgbuf;
+ int rv;
+ f64 deadline;
+
+ deadline = clib_time_now (&ctx->time) + 2;
+
+ do
+ {
+ rv = vapi_sock_client_send_disconnect (ctx);
+ }
+ while (clib_time_now (&ctx->time) < deadline && rv != VAPI_OK);
+
+ while (1)
+ {
+ if (clib_time_now (&ctx->time) >= deadline)
+ {
+ clib_warning ("peer unresponsive, give up");
+ ctx->my_client_index = ~0;
+ return VAPI_ENORESP;
+ }
+
+ if (vapi_sock_recv_internal (ctx, &msg, 0) != VAPI_OK)
+ continue;
+
+ msgbuf = (void *) msg;
+ rp = (void *) msgbuf->data;
+ /* drain the queue */
+ if (ntohs (rp->_vl_msg_id) != VL_API_SOCKCLNT_DELETE_REPLY)
+ {
+ clib_warning ("queue drain: %d", ntohs (rp->_vl_msg_id));
+ msgbuf = (msgbuf_t *) ((u8 *) rp - offsetof (msgbuf_t, data));
+ vl_msg_api_handler ((void *) rp, ntohl (msgbuf->data_len));
+ continue;
+ }
+ msgbuf = (msgbuf_t *) ((u8 *) rp - offsetof (msgbuf_t, data));
+ vl_msg_api_handler ((void *) rp, ntohl (msgbuf->data_len));
+ break;
+ }
+
+ clib_socket_close (&ctx->client_socket);
+ vapi_api_name_and_crc_free (ctx);
+ return VAPI_OK;
+}
+
+int
+vapi_client_disconnect (vapi_ctx_t ctx)
+{
+ if (ctx->use_uds)
+ {
+ return vapi_sock_client_disconnect (ctx);
+ }
+ return vapi_shm_client_disconnect (ctx);
+}
+
+u32
+vapi_api_get_msg_index (vapi_ctx_t ctx, u8 *name_and_crc)
+{
+ uword *p;
+
+ if (ctx->msg_index_by_name_and_crc)
+ {
+ p = hash_get_mem (ctx->msg_index_by_name_and_crc, name_and_crc);
+ if (p)
+ return p[0];
+ }
+ return ~0;
+}
+
vapi_error_e
-vapi_connect (vapi_ctx_t ctx, const char *name,
- const char *chroot_prefix,
- int max_outstanding_requests,
- int response_queue_size, vapi_mode_e mode,
- bool handle_keepalives)
+vapi_connect_ex (vapi_ctx_t ctx, const char *name, const char *path,
+ int max_outstanding_requests, int response_queue_size,
+ vapi_mode_e mode, bool handle_keepalives, bool use_uds)
{
+ int rv;
+
if (response_queue_size <= 0 || max_outstanding_requests <= 0)
{
return VAPI_EINVAL;
}
- if (!clib_mem_get_per_cpu_heap () && !clib_mem_init (0, 1024 * 1024 * 32))
+
+ if (!clib_mem_get_per_cpu_heap () && !clib_mem_init (0, 1024L * 1024 * 32))
{
return VAPI_ENOMEM;
}
+
ctx->requests_size = max_outstanding_requests;
const size_t size = ctx->requests_size * sizeof (*ctx->requests);
void *tmp = realloc (ctx->requests, size);
@@ -327,34 +983,48 @@ vapi_connect (vapi_ctx_t ctx, const char *name,
clib_memset (ctx->requests, 0, size);
/* coverity[MISSING_LOCK] - 177211 requests_mutex is not needed here */
ctx->requests_start = ctx->requests_count = 0;
- if (chroot_prefix)
- {
- VAPI_DBG ("set memory root path `%s'", chroot_prefix);
- vl_set_memory_root_path ((char *) chroot_prefix);
- }
- static char api_map[] = "/vpe-api";
- VAPI_DBG ("client api map `%s'", api_map);
- if ((vl_client_api_map (api_map)) < 0)
+ ctx->use_uds = use_uds;
+
+ if (use_uds)
{
- return VAPI_EMAP_FAIL;
+ if (vapi_sock_client_connect (ctx, (char *) path, name) < 0)
+ {
+ return VAPI_ECON_FAIL;
+ }
}
- VAPI_DBG ("connect client `%s'", name);
- if (vl_client_connect ((char *) name, 0, response_queue_size) < 0)
+ else
{
- vl_client_api_unmap ();
- return VAPI_ECON_FAIL;
- }
+ if (path)
+ {
+ VAPI_DBG ("set memory root path `%s'", path);
+ vl_set_memory_root_path ((char *) path);
+ }
+ static char api_map[] = "/vpe-api";
+ VAPI_DBG ("client api map `%s'", api_map);
+ if ((rv = vl_map_shmem (api_map, 0 /* is_vlib */)) < 0)
+ {
+ return VAPI_EMAP_FAIL;
+ }
+ VAPI_DBG ("connect client `%s'", name);
+ if (vapi_shm_client_connect (ctx, (char *) name, 0, response_queue_size,
+ true) < 0)
+ {
+ vl_client_api_unmap ();
+ return VAPI_ECON_FAIL;
+ }
#if VAPI_DEBUG_CONNECT
VAPI_DBG ("start probing messages");
#endif
- int rv;
+ }
+
int i;
for (i = 0; i < __vapi_metadata.count; ++i)
{
vapi_message_desc_t *m = __vapi_metadata.msgs[i];
u8 scratch[m->name_with_crc_len + 1];
memcpy (scratch, m->name_with_crc, m->name_with_crc_len + 1);
- u32 id = vl_msg_api_get_msg_index (scratch);
+ u32 id = vapi_api_get_msg_index (ctx, scratch);
+
if (VAPI_INVALID_MSG_ID != id)
{
if (id > UINT16_MAX)
@@ -366,10 +1036,9 @@ vapi_connect (vapi_ctx_t ctx, const char *name,
}
if (id > ctx->vl_msg_id_max)
{
- vapi_msg_id_t *tmp = realloc (ctx->vl_msg_id_to_vapi_msg_t,
- sizeof
- (*ctx->vl_msg_id_to_vapi_msg_t) *
- (id + 1));
+ vapi_msg_id_t *tmp =
+ realloc (ctx->vl_msg_id_to_vapi_msg_t,
+ sizeof (*ctx->vl_msg_id_to_vapi_msg_t) * (id + 1));
if (!tmp)
{
rv = VAPI_ENOMEM;
@@ -397,8 +1066,8 @@ vapi_connect (vapi_ctx_t ctx, const char *name,
if (!vapi_is_msg_available (ctx, vapi_msg_id_control_ping) ||
!vapi_is_msg_available (ctx, vapi_msg_id_control_ping_reply))
{
- VAPI_ERR
- ("control ping or control ping reply not available, cannot connect");
+ VAPI_ERR (
+ "control ping or control ping reply not available, cannot connect");
rv = VAPI_EINCOMPATIBLE;
goto fail;
}
@@ -414,111 +1083,393 @@ vapi_connect (vapi_ctx_t ctx, const char *name,
}
return VAPI_OK;
fail:
- vl_client_disconnect ();
+ vapi_client_disconnect (ctx);
vl_client_api_unmap ();
return rv;
}
vapi_error_e
-vapi_disconnect (vapi_ctx_t ctx)
+vapi_connect (vapi_ctx_t ctx, const char *name, const char *chroot_prefix,
+ int max_outstanding_requests, int response_queue_size,
+ vapi_mode_e mode, bool handle_keepalives)
+{
+ return vapi_connect_ex (ctx, name, chroot_prefix, max_outstanding_requests,
+ response_queue_size, mode, handle_keepalives, false);
+}
+
+/*
+ * API client running in the same process as VPP
+ */
+vapi_error_e
+vapi_connect_from_vpp (vapi_ctx_t ctx, const char *name,
+ int max_outstanding_requests, int response_queue_size,
+ vapi_mode_e mode, bool handle_keepalives)
+{
+ int rv;
+
+ if (ctx->use_uds)
+ {
+ return VAPI_ENOTSUP;
+ }
+
+ if (response_queue_size <= 0 || max_outstanding_requests <= 0)
+ {
+ return VAPI_EINVAL;
+ }
+
+ ctx->requests_size = max_outstanding_requests;
+ const size_t size = ctx->requests_size * sizeof (*ctx->requests);
+ void *tmp = realloc (ctx->requests, size);
+ if (!tmp)
+ {
+ return VAPI_ENOMEM;
+ }
+ ctx->requests = tmp;
+ clib_memset (ctx->requests, 0, size);
+ /* coverity[MISSING_LOCK] - 177211 requests_mutex is not needed here */
+ ctx->requests_start = ctx->requests_count = 0;
+
+ VAPI_DBG ("connect client `%s'", name);
+ if (vapi_shm_client_connect (ctx, (char *) name, 0, response_queue_size,
+ handle_keepalives) < 0)
+ {
+ return VAPI_ECON_FAIL;
+ }
+
+ int i;
+ for (i = 0; i < __vapi_metadata.count; ++i)
+ {
+ vapi_message_desc_t *m = __vapi_metadata.msgs[i];
+ u8 scratch[m->name_with_crc_len + 1];
+ memcpy (scratch, m->name_with_crc, m->name_with_crc_len + 1);
+ u32 id = vapi_api_get_msg_index (ctx, scratch);
+ if (VAPI_INVALID_MSG_ID != id)
+ {
+ if (id > UINT16_MAX)
+ {
+ VAPI_ERR ("Returned vl_msg_id `%u' > UINT16MAX `%u'!", id,
+ UINT16_MAX);
+ rv = VAPI_EINVAL;
+ goto fail;
+ }
+ if (id > ctx->vl_msg_id_max)
+ {
+ vapi_msg_id_t *tmp =
+ realloc (ctx->vl_msg_id_to_vapi_msg_t,
+ sizeof (*ctx->vl_msg_id_to_vapi_msg_t) * (id + 1));
+ if (!tmp)
+ {
+ rv = VAPI_ENOMEM;
+ goto fail;
+ }
+ ctx->vl_msg_id_to_vapi_msg_t = tmp;
+ ctx->vl_msg_id_max = id;
+ }
+ ctx->vl_msg_id_to_vapi_msg_t[id] = m->id;
+ ctx->vapi_msg_id_t_to_vl_msg_id[m->id] = id;
+ }
+ else
+ {
+ ctx->vapi_msg_id_t_to_vl_msg_id[m->id] = UINT16_MAX;
+ VAPI_DBG ("Message `%s' not available", m->name_with_crc);
+ }
+ }
+ if (!vapi_is_msg_available (ctx, vapi_msg_id_control_ping) ||
+ !vapi_is_msg_available (ctx, vapi_msg_id_control_ping_reply))
+ {
+ VAPI_ERR (
+ "control ping or control ping reply not available, cannot connect");
+ rv = VAPI_EINCOMPATIBLE;
+ goto fail;
+ }
+ ctx->mode = mode;
+ ctx->connected = true;
+ if (vapi_is_msg_available (ctx, vapi_msg_id_memclnt_keepalive))
+ {
+ ctx->handle_keepalives = handle_keepalives;
+ }
+ else
+ {
+ ctx->handle_keepalives = false;
+ }
+ return VAPI_OK;
+fail:
+ vapi_client_disconnect (ctx);
+ return rv;
+}
+
+vapi_error_e
+vapi_disconnect_from_vpp (vapi_ctx_t ctx)
{
if (!ctx->connected)
{
return VAPI_EINVAL;
}
- vl_client_disconnect ();
+
+ if (ctx->use_uds)
+ {
+ return VAPI_ENOTSUP;
+ }
+
+ vl_api_memclnt_delete_reply_t *rp;
+ svm_queue_t *vl_input_queue;
+ time_t begin;
+ vl_input_queue = ctx->vl_input_queue;
+ vapi_shm_client_send_disconnect (ctx, 0 /* wait for reply */);
+
+ /*
+ * Have to be careful here, in case the client is disconnecting
+ * because e.g. the vlib process died, or is unresponsive.
+ */
+ begin = time (0);
+ vapi_error_e rv = VAPI_OK;
+ while (1)
+ {
+ time_t now;
+
+ now = time (0);
+
+ if (now >= (begin + 2))
+ {
+ clib_warning ("peer unresponsive, give up");
+ ctx->my_client_index = ~0;
+ rv = VAPI_ENORESP;
+ goto fail;
+ }
+ if (svm_queue_sub (vl_input_queue, (u8 *) &rp, SVM_Q_NOWAIT, 0) < 0)
+ continue;
+
+ VL_MSG_API_UNPOISON (rp);
+
+ /* drain the queue */
+ if (ntohs (rp->_vl_msg_id) != VL_API_MEMCLNT_DELETE_REPLY)
+ {
+ clib_warning ("queue drain: %d", ntohs (rp->_vl_msg_id));
+ vl_msg_api_free (rp);
+ continue;
+ }
+ vapi_memclnt_delete_reply_t_handler (
+ ctx, (void *) rp /*, ntohl (msgbuf->data_len)*/);
+ break;
+ }
+fail:
+ vapi_api_name_and_crc_free (ctx);
+
+ ctx->connected = false;
+ return rv;
+}
+
+static vapi_error_e
+vapi_shm_disconnect (vapi_ctx_t ctx)
+{
+ vl_api_memclnt_delete_reply_t *rp;
+ svm_queue_t *vl_input_queue;
+ time_t begin;
+ vl_input_queue = ctx->vl_input_queue;
+ vapi_shm_client_send_disconnect (ctx, 0 /* wait for reply */);
+
+ /*
+ * Have to be careful here, in case the client is disconnecting
+ * because e.g. the vlib process died, or is unresponsive.
+ */
+ begin = time (0);
+ vapi_error_e rv = VAPI_OK;
+ while (1)
+ {
+ time_t now;
+
+ now = time (0);
+
+ if (now >= (begin + 2))
+ {
+ clib_warning ("peer unresponsive, give up");
+ ctx->my_client_index = ~0;
+ rv = VAPI_ENORESP;
+ goto fail;
+ }
+ if (svm_queue_sub (vl_input_queue, (u8 *) &rp, SVM_Q_NOWAIT, 0) < 0)
+ continue;
+
+ VL_MSG_API_UNPOISON (rp);
+
+ /* drain the queue */
+ if (ntohs (rp->_vl_msg_id) != VL_API_MEMCLNT_DELETE_REPLY)
+ {
+ clib_warning ("queue drain: %d", ntohs (rp->_vl_msg_id));
+ vl_msg_api_free (rp);
+ continue;
+ }
+ vapi_memclnt_delete_reply_t_handler (
+ ctx, (void *) rp /*, ntohl (msgbuf->data_len)*/);
+ break;
+ }
+fail:
+ vapi_api_name_and_crc_free (ctx);
+
vl_client_api_unmap ();
#if VAPI_DEBUG_ALLOC
vapi_to_be_freed_validate ();
#endif
ctx->connected = false;
- return VAPI_OK;
+ return rv;
+}
+
+static vapi_error_e
+vapi_sock_disconnect (vapi_ctx_t ctx)
+{
+ vl_api_sockclnt_delete_reply_t *rp;
+ time_t begin;
+ u8 *msg = 0;
+
+ vapi_sock_client_send_disconnect (ctx);
+
+ begin = time (0);
+ vapi_error_e rv = VAPI_OK;
+ while (1)
+ {
+ time_t now;
+
+ now = time (0);
+
+ if (now >= (begin + 2))
+ {
+ clib_warning ("peer unresponsive, give up");
+ ctx->my_client_index = ~0;
+ rv = VAPI_ENORESP;
+ goto fail;
+ }
+ if (vapi_sock_recv_internal (ctx, &msg, 0) < 0)
+ continue;
+
+ if (vec_len (msg) == 0)
+ continue;
+
+ rp = (void *) msg;
+
+ /* drain the queue */
+ if (ntohs (rp->_vl_msg_id) != VL_API_SOCKCLNT_DELETE_REPLY)
+ {
+ clib_warning ("queue drain: %d", ntohs (rp->_vl_msg_id));
+ continue;
+ }
+ vapi_sockclnt_delete_reply_t_handler (
+ ctx, (void *) rp /*, ntohl (msgbuf->data_len)*/);
+ break;
+ }
+fail:
+ clib_socket_close (&ctx->client_socket);
+ vapi_api_name_and_crc_free (ctx);
+
+ ctx->connected = false;
+ return rv;
}
vapi_error_e
-vapi_get_fd (vapi_ctx_t ctx, int *fd)
+vapi_disconnect (vapi_ctx_t ctx)
{
- return VAPI_ENOTSUP;
+ if (!ctx->connected)
+ {
+ return VAPI_EINVAL;
+ }
+
+ if (ctx->use_uds)
+ {
+ return vapi_sock_disconnect (ctx);
+ }
+ return vapi_shm_disconnect (ctx);
}
vapi_error_e
-vapi_send (vapi_ctx_t ctx, void *msg)
+vapi_get_fd (vapi_ctx_t ctx, int *fd)
{
- vapi_error_e rv = VAPI_OK;
- if (!ctx || !msg || !ctx->connected)
+ if (ctx->use_uds && fd)
{
- rv = VAPI_EINVAL;
- goto out;
+ *fd = ctx->client_socket.fd;
+ return VAPI_OK;
}
- int tmp;
- svm_queue_t *q = vlibapi_get_main ()->shmem_hdr->vl_input_queue;
+ return VAPI_ENOTSUP;
+}
+
#if VAPI_DEBUG
+static void
+vapi_debug_log (vapi_ctx_t ctx, void *msg, const char *fun)
+{
unsigned msgid = be16toh (*(u16 *) msg);
if (msgid <= ctx->vl_msg_id_max)
{
vapi_msg_id_t id = ctx->vl_msg_id_to_vapi_msg_t[msgid];
if (id < __vapi_metadata.count)
{
- VAPI_DBG ("send msg@%p:%u[%s]", msg, msgid,
+ VAPI_DBG ("%s msg@%p:%u[%s]", fun, msg, msgid,
__vapi_metadata.msgs[id]->name);
}
else
{
- VAPI_DBG ("send msg@%p:%u[UNKNOWN]", msg, msgid);
+ VAPI_DBG ("%s msg@%p:%u[UNKNOWN]", fun, msg, msgid);
}
}
else
{
- VAPI_DBG ("send msg@%p:%u[UNKNOWN]", msg, msgid);
+ VAPI_DBG ("%s msg@%p:%u[UNKNOWN]", fun, msg, msgid);
}
+}
#endif
- tmp = svm_queue_add (q, (u8 *) & msg,
- VAPI_MODE_BLOCKING == ctx->mode ? 0 : 1);
+
+static vapi_error_e
+vapi_shm_send (vapi_ctx_t ctx, void *msg)
+{
+ int rv = VAPI_OK;
+ int tmp;
+ svm_queue_t *q = vlibapi_get_main ()->shmem_hdr->vl_input_queue;
+#if VAPI_DEBUG
+ vapi_debug_log (ctx, msg, "send");
+#endif
+ tmp =
+ svm_queue_add (q, (u8 *) &msg, VAPI_MODE_BLOCKING == ctx->mode ? 0 : 1);
if (tmp < 0)
{
rv = VAPI_EAGAIN;
}
else
VL_MSG_API_POISON (msg);
-out:
- VAPI_DBG ("vapi_send() rv = %d", rv);
+
return rv;
}
vapi_error_e
-vapi_send2 (vapi_ctx_t ctx, void *msg1, void *msg2)
+vapi_send (vapi_ctx_t ctx, void *msg)
{
vapi_error_e rv = VAPI_OK;
- if (!ctx || !msg1 || !msg2 || !ctx->connected)
+ if (!ctx || !msg || !ctx->connected)
{
rv = VAPI_EINVAL;
goto out;
}
- svm_queue_t *q = vlibapi_get_main ()->shmem_hdr->vl_input_queue;
-#if VAPI_DEBUG
- unsigned msgid1 = be16toh (*(u16 *) msg1);
- unsigned msgid2 = be16toh (*(u16 *) msg2);
- const char *name1 = "UNKNOWN";
- const char *name2 = "UNKNOWN";
- if (msgid1 <= ctx->vl_msg_id_max)
+
+ if (ctx->use_uds)
{
- vapi_msg_id_t id = ctx->vl_msg_id_to_vapi_msg_t[msgid1];
- if (id < __vapi_metadata.count)
- {
- name1 = __vapi_metadata.msgs[id]->name;
- }
+ rv = vapi_sock_send (ctx, msg);
}
- if (msgid2 <= ctx->vl_msg_id_max)
+ else
{
- vapi_msg_id_t id = ctx->vl_msg_id_to_vapi_msg_t[msgid2];
- if (id < __vapi_metadata.count)
- {
- name2 = __vapi_metadata.msgs[id]->name;
- }
+ rv = vapi_shm_send (ctx, msg);
}
- VAPI_DBG ("send two: %u[%s], %u[%s]", msgid1, name1, msgid2, name2);
+
+out:
+ VAPI_DBG ("vapi_send() rv = %d", rv);
+ return rv;
+}
+
+static vapi_error_e
+vapi_shm_send2 (vapi_ctx_t ctx, void *msg1, void *msg2)
+{
+ vapi_error_e rv = VAPI_OK;
+ svm_queue_t *q = vlibapi_get_main ()->shmem_hdr->vl_input_queue;
+#if VAPI_DEBUG
+ vapi_debug_log (ctx, msg1, "send2");
+ vapi_debug_log (ctx, msg2, "send2");
#endif
- int tmp = svm_queue_add2 (q, (u8 *) & msg1, (u8 *) & msg2,
+ int tmp = svm_queue_add2 (q, (u8 *) &msg1, (u8 *) &msg2,
VAPI_MODE_BLOCKING == ctx->mode ? 0 : 1);
if (tmp < 0)
{
@@ -526,36 +1477,52 @@ vapi_send2 (vapi_ctx_t ctx, void *msg1, void *msg2)
}
else
VL_MSG_API_POISON (msg1);
-out:
- VAPI_DBG ("vapi_send() rv = %d", rv);
+
return rv;
}
vapi_error_e
-vapi_recv (vapi_ctx_t ctx, void **msg, size_t * msg_size,
- svm_q_conditional_wait_t cond, u32 time)
+vapi_send2 (vapi_ctx_t ctx, void *msg1, void *msg2)
{
- if (!ctx || !ctx->connected || !msg || !msg_size)
+ vapi_error_e rv = VAPI_OK;
+ if (!ctx || !msg1 || !msg2 || !ctx->connected)
{
- return VAPI_EINVAL;
+ rv = VAPI_EINVAL;
+ goto out;
}
- vapi_error_e rv = VAPI_OK;
- api_main_t *am = vlibapi_get_main ();
- uword data;
- if (am->our_pid == 0)
+ if (ctx->use_uds)
{
- return VAPI_EINVAL;
+ rv = vapi_sock_send2 (ctx, msg1, msg2);
+ }
+ else
+ {
+ rv = vapi_shm_send2 (ctx, msg1, msg2);
}
- svm_queue_t *q = am->vl_input_queue;
-again:
+out:
+ VAPI_DBG ("vapi_send() rv = %d", rv);
+ return rv;
+}
+
+static vapi_error_e
+vapi_shm_recv (vapi_ctx_t ctx, void **msg, size_t *msg_size,
+ svm_q_conditional_wait_t cond, u32 time)
+{
+ vapi_error_e rv = VAPI_OK;
+ uword data;
+
+ svm_queue_t *q = ctx->vl_input_queue;
+
VAPI_DBG ("doing shm queue sub");
int tmp = svm_queue_sub (q, (u8 *) & data, cond, time);
- if (tmp == 0)
+ if (tmp != 0)
{
+ return VAPI_EAGAIN;
+ }
+
VL_MSG_API_UNPOISON ((void *) data);
#if VAPI_DEBUG_ALLOC
vapi_add_to_be_freed ((void *) data);
@@ -569,62 +1536,99 @@ again:
}
*msg = (u8 *) data;
*msg_size = ntohl (msgbuf->data_len);
+
#if VAPI_DEBUG
- unsigned msgid = be16toh (*(u16 *) * msg);
- if (msgid <= ctx->vl_msg_id_max)
- {
- vapi_msg_id_t id = ctx->vl_msg_id_to_vapi_msg_t[msgid];
- if (id < __vapi_metadata.count)
- {
- VAPI_DBG ("recv msg@%p:%u[%s]", *msg, msgid,
- __vapi_metadata.msgs[id]->name);
- }
- else
- {
- VAPI_DBG ("recv msg@%p:%u[UNKNOWN]", *msg, msgid);
- }
- }
- else
- {
- VAPI_DBG ("recv msg@%p:%u[UNKNOWN]", *msg, msgid);
- }
+ vapi_debug_log (ctx, msg, "recv");
+#endif
+
+ return rv;
+}
+
+static vapi_error_e
+vapi_sock_recv (vapi_ctx_t ctx, void **msg, size_t *msg_size, u32 time)
+{
+ vapi_error_e rv = VAPI_OK;
+ u8 *data = 0;
+ if (time == 0 && ctx->mode == VAPI_MODE_BLOCKING)
+ time = 1;
+
+ rv = vapi_sock_recv_internal (ctx, &data, time);
+
+ if (rv != VAPI_OK)
+ {
+ return rv;
+ }
+
+ *msg = data;
+ *msg_size = vec_len (data);
+
+#if VAPI_DEBUG
+ vapi_debug_log (ctx, msg, "recv");
#endif
- if (ctx->handle_keepalives)
+
+ return rv;
+}
+
+vapi_error_e
+vapi_recv (vapi_ctx_t ctx, void **msg, size_t *msg_size,
+ svm_q_conditional_wait_t cond, u32 time)
+{
+ if (!ctx || !ctx->connected || !msg || !msg_size)
+ {
+ return VAPI_EINVAL;
+ }
+ vapi_error_e rv = VAPI_OK;
+
+again:
+ if (ctx->use_uds)
+ {
+ rv = vapi_sock_recv (ctx, msg, msg_size, time);
+ }
+ else
+ {
+ rv = vapi_shm_recv (ctx, msg, msg_size, cond, time);
+ }
+
+ if (rv != VAPI_OK)
+ return rv;
+
+ if (ctx->handle_keepalives)
+ {
+ unsigned msgid = be16toh (*(u16 *) *msg);
+ if (msgid == vapi_lookup_vl_msg_id (ctx, vapi_msg_id_memclnt_keepalive))
{
- unsigned msgid = be16toh (*(u16 *) * msg);
- if (msgid ==
- vapi_lookup_vl_msg_id (ctx, vapi_msg_id_memclnt_keepalive))
+ vapi_msg_memclnt_keepalive_reply *reply = NULL;
+ do
{
- vapi_msg_memclnt_keepalive_reply *reply = NULL;
- do
- {
- reply = vapi_msg_alloc (ctx, sizeof (*reply));
- }
- while (!reply);
- reply->header.context = vapi_get_client_index (ctx);
- reply->header._vl_msg_id =
- vapi_lookup_vl_msg_id (ctx,
- vapi_msg_id_memclnt_keepalive_reply);
- reply->payload.retval = 0;
- vapi_msg_memclnt_keepalive_reply_hton (reply);
- while (VAPI_EAGAIN == vapi_send (ctx, reply));
- vapi_msg_free (ctx, *msg);
- VAPI_DBG ("autohandled memclnt_keepalive");
- goto again;
+ reply = vapi_msg_alloc (ctx, sizeof (*reply));
}
+ while (!reply);
+ reply->header.context = vapi_get_client_index (ctx);
+ reply->header._vl_msg_id =
+ vapi_lookup_vl_msg_id (ctx, vapi_msg_id_memclnt_keepalive_reply);
+ reply->payload.retval = 0;
+ vapi_msg_memclnt_keepalive_reply_hton (reply);
+ while (VAPI_EAGAIN == vapi_send (ctx, reply))
+ ;
+ vapi_msg_free (ctx, *msg);
+ goto again;
}
}
- else
- {
- rv = VAPI_EAGAIN;
- }
+
return rv;
}
vapi_error_e
-vapi_wait (vapi_ctx_t ctx, vapi_wait_mode_e mode)
+vapi_wait (vapi_ctx_t ctx)
{
- return VAPI_ENOTSUP;
+ if (ctx->use_uds)
+ return VAPI_ENOTSUP;
+
+ svm_queue_lock (ctx->vl_input_queue);
+ svm_queue_wait (ctx->vl_input_queue);
+ svm_queue_unlock (ctx->vl_input_queue);
+
+ return VAPI_OK;
}
static vapi_error_e
@@ -675,8 +1679,34 @@ vapi_dispatch_response (vapi_ctx_t ctx, vapi_msg_id_t id,
int payload_offset = vapi_get_payload_offset (id);
void *payload = ((u8 *) msg) + payload_offset;
bool is_last = true;
- if (ctx->requests[tmp].is_dump)
+ switch (ctx->requests[tmp].type)
{
+ case VAPI_REQUEST_STREAM:
+ if (ctx->requests[tmp].response_id == id)
+ {
+ is_last = false;
+ }
+ else
+ {
+ VAPI_DBG ("Stream response ID doesn't match current ID, move to "
+ "next ID");
+ clib_memset (&ctx->requests[tmp], 0,
+ sizeof (ctx->requests[tmp]));
+ ++ctx->requests_start;
+ --ctx->requests_count;
+ if (ctx->requests_start == ctx->requests_size)
+ {
+ ctx->requests_start = 0;
+ }
+ tmp = ctx->requests_start;
+ if (ctx->requests[tmp].context != context)
+ {
+ VAPI_ERR ("Unexpected context %u, expected context %u!",
+ ctx->requests[tmp].context, context);
+ }
+ }
+ break;
+ case VAPI_REQUEST_DUMP:
if (vapi_msg_id_control_ping_reply == id)
{
payload = NULL;
@@ -685,12 +1715,14 @@ vapi_dispatch_response (vapi_ctx_t ctx, vapi_msg_id_t id,
{
is_last = false;
}
+ break;
+ case VAPI_REQUEST_REG:
+ break;
}
if (payload_offset != -1)
{
- rv =
- ctx->requests[tmp].callback (ctx, ctx->requests[tmp].callback_ctx,
- VAPI_OK, is_last, payload);
+ rv = ctx->requests[tmp].callback (
+ ctx, ctx->requests[tmp].callback_ctx, VAPI_OK, is_last, payload);
}
else
{
@@ -752,13 +1784,22 @@ vapi_msg_is_with_context (vapi_msg_id_t id)
return __vapi_metadata.msgs[id]->has_context;
}
+static int
+vapi_verify_msg_size (vapi_msg_id_t id, void *buf, uword buf_size)
+{
+ assert (id < __vapi_metadata.count);
+ return __vapi_metadata.msgs[id]->verify_msg_size (buf, buf_size);
+}
+
vapi_error_e
vapi_dispatch_one (vapi_ctx_t ctx)
{
VAPI_DBG ("vapi_dispatch_one()");
void *msg;
- size_t size;
- vapi_error_e rv = vapi_recv (ctx, &msg, &size, SVM_Q_WAIT, 0);
+ uword size;
+ svm_q_conditional_wait_t cond =
+ vapi_is_nonblocking (ctx) ? SVM_Q_NOWAIT : SVM_Q_WAIT;
+ vapi_error_e rv = vapi_recv (ctx, &msg, &size, cond, 0);
if (VAPI_OK != rv)
{
VAPI_DBG ("vapi_recv failed with rv=%d", rv);
@@ -780,17 +1821,13 @@ vapi_dispatch_one (vapi_ctx_t ctx)
return VAPI_EINVAL;
}
const vapi_msg_id_t id = ctx->vl_msg_id_to_vapi_msg_t[vpp_id];
- const size_t expect_size = vapi_get_message_size (id);
- if (size < expect_size)
+ vapi_get_swap_to_host_func (id) (msg);
+ if (vapi_verify_msg_size (id, msg, size))
{
- VAPI_ERR
- ("Invalid msg received, unexpected size `%zu' < expected min `%zu'",
- size, expect_size);
vapi_msg_free (ctx, msg);
return VAPI_EINVAL;
}
u32 context;
- vapi_get_swap_to_host_func (id) (msg);
if (vapi_msg_is_with_context (id))
{
context = *(u32 *) (((u8 *) msg) + vapi_get_context_offset (id));
@@ -864,7 +1901,7 @@ vapi_lookup_vl_msg_id (vapi_ctx_t ctx, vapi_msg_id_t id)
int
vapi_get_client_index (vapi_ctx_t ctx)
{
- return vlibapi_get_main ()->my_client_index;
+ return ctx->my_client_index;
}
bool
@@ -899,13 +1936,6 @@ void (*vapi_get_swap_to_be_func (vapi_msg_id_t id)) (void *msg)
}
size_t
-vapi_get_message_size (vapi_msg_id_t id)
-{
- assert (id < __vapi_metadata.count);
- return __vapi_metadata.msgs[id]->size;
-}
-
-size_t
vapi_get_context_offset (vapi_msg_id_t id)
{
assert (id < __vapi_metadata.count);
@@ -982,6 +2012,16 @@ vapi_get_msg_name (vapi_msg_id_t id)
return __vapi_metadata.msgs[id]->name;
}
+void
+vapi_stop_rx_thread (vapi_ctx_t ctx)
+{
+ if (!ctx || !ctx->connected || !ctx->vl_input_queue)
+ {
+ return;
+ }
+
+ vl_client_stop_rx_thread (ctx->vl_input_queue);
+}
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vpp-api/vapi/vapi.h b/src/vpp-api/vapi/vapi.h
index 08d016b0dd7..970c5080667 100644
--- a/src/vpp-api/vapi/vapi.h
+++ b/src/vpp-api/vapi/vapi.h
@@ -44,7 +44,7 @@ extern "C"
* process). It's not recommended to mix the higher and lower level APIs. Due
* to version issues, the higher-level APIs are not part of the shared library.
*/
- typedef struct vapi_ctx_s *vapi_ctx_t;
+typedef struct vapi_ctx_s *vapi_ctx_t;
/**
* @brief allocate vapi message of given size
@@ -56,7 +56,7 @@ extern "C"
*
* @return pointer to message or NULL if out of memory
*/
- void *vapi_msg_alloc (vapi_ctx_t ctx, size_t size);
+void *vapi_msg_alloc (vapi_ctx_t ctx, size_t size);
/**
* @brief free a vapi message
@@ -66,7 +66,7 @@ extern "C"
* @param ctx opaque vapi context
* @param msg message to be freed
*/
- void vapi_msg_free (vapi_ctx_t ctx, void *msg);
+void vapi_msg_free (vapi_ctx_t ctx, void *msg);
/**
* @brief allocate vapi context
@@ -75,18 +75,18 @@ extern "C"
*
* @return VAPI_OK on success, other error code on error
*/
- vapi_error_e vapi_ctx_alloc (vapi_ctx_t * result);
+vapi_error_e vapi_ctx_alloc (vapi_ctx_t *result);
/**
* @brief free vapi context
*/
- void vapi_ctx_free (vapi_ctx_t ctx);
+void vapi_ctx_free (vapi_ctx_t ctx);
/**
* @brief check if message identified by it's message id is known by the vpp to
* which the connection is open
*/
- bool vapi_is_msg_available (vapi_ctx_t ctx, vapi_msg_id_t type);
+bool vapi_is_msg_available (vapi_ctx_t ctx, vapi_msg_id_t type);
/**
* @brief connect to vpp
@@ -101,11 +101,49 @@ extern "C"
*
* @return VAPI_OK on success, other error code on error
*/
- vapi_error_e vapi_connect (vapi_ctx_t ctx, const char *name,
- const char *chroot_prefix,
- int max_outstanding_requests,
- int response_queue_size, vapi_mode_e mode,
- bool handle_keepalives);
+vapi_error_e vapi_connect (vapi_ctx_t ctx, const char *name,
+ const char *chroot_prefix,
+ int max_outstanding_requests,
+ int response_queue_size, vapi_mode_e mode,
+ bool handle_keepalives);
+
+/**
+ * @brief connect to vpp
+ *
+ * @param ctx opaque vapi context, must be allocated using vapi_ctx_alloc first
+ * @param name application name
+ * @param path shared memory prefix or path to unix socket
+ * @param max_outstanding_requests max number of outstanding requests queued
+ * @param response_queue_size size of the response queue
+ * @param mode mode of operation - blocking or nonblocking
+ * @param handle_keepalives - if true, automatically handle memclnt_keepalive
+ * @param use_uds - if true, use unix domain socket transport
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+vapi_error_e vapi_connect_ex (vapi_ctx_t ctx, const char *name,
+ const char *path, int max_outstanding_requests,
+ int response_queue_size, vapi_mode_e mode,
+ bool handle_keepalives, bool use_uds);
+
+/**
+ * @brief connect to vpp from a client in same process
+ * @remark This MUST be called from a separate thread. If called
+ * from the main thread, it will deadlock.
+ *
+ * @param ctx opaque vapi context, must be allocated using vapi_ctx_alloc first
+ * @param name application name
+ * @param max_outstanding_requests max number of outstanding requests queued
+ * @param response_queue_size size of the response queue
+ * @param mode mode of operation - blocking or nonblocking
+ * @param handle_keepalives - if true, automatically handle memclnt_keepalive
+ *
+ * @return VAPI_OK on success, other error code on error
+ */
+vapi_error_e vapi_connect_from_vpp (vapi_ctx_t ctx, const char *name,
+ int max_outstanding_requests,
+ int response_queue_size, vapi_mode_e mode,
+ bool handle_keepalives);
/**
* @brief disconnect from vpp
@@ -114,7 +152,8 @@ extern "C"
*
* @return VAPI_OK on success, other error code on error
*/
- vapi_error_e vapi_disconnect (vapi_ctx_t ctx);
+vapi_error_e vapi_disconnect (vapi_ctx_t ctx);
+vapi_error_e vapi_disconnect_from_vpp (vapi_ctx_t ctx);
/**
* @brief get event file descriptor
@@ -127,7 +166,7 @@ extern "C"
*
* @return VAPI_OK on success, other error code on error
*/
- vapi_error_e vapi_get_fd (vapi_ctx_t ctx, int *fd);
+vapi_error_e vapi_get_fd (vapi_ctx_t ctx, int *fd);
/**
* @brief low-level api for sending messages to vpp
@@ -140,7 +179,7 @@ extern "C"
*
* @return VAPI_OK on success, other error code on error
*/
- vapi_error_e vapi_send (vapi_ctx_t ctx, void *msg);
+vapi_error_e vapi_send (vapi_ctx_t ctx, void *msg);
/**
* @brief low-level api for atomically sending two messages to vpp - either
@@ -155,7 +194,7 @@ extern "C"
*
* @return VAPI_OK on success, other error code on error
*/
- vapi_error_e vapi_send2 (vapi_ctx_t ctx, void *msg1, void *msg2);
+vapi_error_e vapi_send2 (vapi_ctx_t ctx, void *msg1, void *msg2);
/**
* @brief low-level api for reading messages from vpp
@@ -171,25 +210,24 @@ extern "C"
*
* @return VAPI_OK on success, other error code on error
*/
- vapi_error_e vapi_recv (vapi_ctx_t ctx, void **msg, size_t * msg_size,
- svm_q_conditional_wait_t cond, u32 time);
+vapi_error_e vapi_recv (vapi_ctx_t ctx, void **msg, size_t *msg_size,
+ svm_q_conditional_wait_t cond, u32 time);
/**
- * @brief wait for connection to become readable or writable
+ * @brief wait for connection to become readable
*
* @param ctx opaque vapi context
- * @param mode type of property to wait for - readability, writability or both
*
* @return VAPI_OK on success, other error code on error
*/
- vapi_error_e vapi_wait (vapi_ctx_t ctx, vapi_wait_mode_e mode);
+vapi_error_e vapi_wait (vapi_ctx_t ctx);
/**
* @brief pick next message sent by vpp and call the appropriate callback
*
* @return VAPI_OK on success, other error code on error
*/
- vapi_error_e vapi_dispatch_one (vapi_ctx_t ctx);
+vapi_error_e vapi_dispatch_one (vapi_ctx_t ctx);
/**
* @brief loop vapi_dispatch_one until responses to all currently outstanding
@@ -205,11 +243,11 @@ extern "C"
*
* @return VAPI_OK on success, other error code on error
*/
- vapi_error_e vapi_dispatch (vapi_ctx_t ctx);
+vapi_error_e vapi_dispatch (vapi_ctx_t ctx);
/** generic vapi event callback */
- typedef vapi_error_e (*vapi_event_cb) (vapi_ctx_t ctx, void *callback_ctx,
- void *payload);
+typedef vapi_error_e (*vapi_event_cb) (vapi_ctx_t ctx, void *callback_ctx,
+ void *payload);
/**
* @brief set event callback to call when message with given id is dispatched
@@ -219,8 +257,8 @@ extern "C"
* @param callback callback
* @param callback_ctx context pointer stored and passed to callback
*/
- void vapi_set_event_cb (vapi_ctx_t ctx, vapi_msg_id_t id,
- vapi_event_cb callback, void *callback_ctx);
+void vapi_set_event_cb (vapi_ctx_t ctx, vapi_msg_id_t id,
+ vapi_event_cb callback, void *callback_ctx);
/**
* @brief clear event callback for given message id
@@ -228,12 +266,12 @@ extern "C"
* @param ctx opaque vapi context
* @param id message id
*/
- void vapi_clear_event_cb (vapi_ctx_t ctx, vapi_msg_id_t id);
+void vapi_clear_event_cb (vapi_ctx_t ctx, vapi_msg_id_t id);
/** generic vapi event callback */
- typedef vapi_error_e (*vapi_generic_event_cb) (vapi_ctx_t ctx,
- void *callback_ctx,
- vapi_msg_id_t id, void *msg);
+typedef vapi_error_e (*vapi_generic_event_cb) (vapi_ctx_t ctx,
+ void *callback_ctx,
+ vapi_msg_id_t id, void *msg);
/**
* @brief set generic event callback
*
@@ -244,16 +282,29 @@ extern "C"
* @param callback callback
* @param callback_ctx context pointer stored and passed to callback
*/
- void vapi_set_generic_event_cb (vapi_ctx_t ctx,
- vapi_generic_event_cb callback,
- void *callback_ctx);
+void vapi_set_generic_event_cb (vapi_ctx_t ctx, vapi_generic_event_cb callback,
+ void *callback_ctx);
/**
* @brief clear generic event callback
*
* @param ctx opaque vapi context
*/
- void vapi_clear_generic_event_cb (vapi_ctx_t ctx);
+void vapi_clear_generic_event_cb (vapi_ctx_t ctx);
+
+/**
+ * @brief signal RX thread to exit
+ *
+ * @note This adds a message to the client input queue that indicates that
+ * an RX thread should stop processing incoming messages and exit. If an
+ * application has an RX thread which sleeps while waiting for incoming
+ * messages using vapi_wait(), this call will allow the application to
+ * wake up from the vapi_wait() call and figure out that it should stop
+ * running.
+ *
+ * @param ctx opaque vapi context
+ */
+void vapi_stop_rx_thread (vapi_ctx_t ctx);
#ifdef __cplusplus
}
diff --git a/src/vpp-api/vapi/vapi.hpp b/src/vpp-api/vapi/vapi.hpp
index a1e33a93fd4..34d8f97ad89 100644
--- a/src/vpp-api/vapi/vapi.hpp
+++ b/src/vpp-api/vapi/vapi.hpp
@@ -140,6 +140,10 @@ private:
template <typename Req, typename Resp, typename... Args> friend class Dump;
+ template <typename Req, typename Resp, typename StreamMessage,
+ typename... Args>
+ friend class Stream;
+
template <typename M> friend class Event_registration;
};
@@ -199,13 +203,14 @@ public:
*
* @return VAPI_OK on success, other error code on error
*/
- vapi_error_e connect (const char *name, const char *chroot_prefix,
- int max_outstanding_requests, int response_queue_size,
- bool handle_keepalives = true)
+ vapi_error_e
+ connect (const char *name, const char *chroot_prefix,
+ int max_outstanding_requests, int response_queue_size,
+ bool handle_keepalives = true, bool use_uds = false)
{
- return vapi_connect (vapi_ctx, name, chroot_prefix,
- max_outstanding_requests, response_queue_size,
- VAPI_MODE_BLOCKING, handle_keepalives);
+ return vapi_connect_ex (vapi_ctx, name, chroot_prefix,
+ max_outstanding_requests, response_queue_size,
+ VAPI_MODE_BLOCKING, handle_keepalives, use_uds);
}
/**
@@ -417,7 +422,7 @@ private:
void unregister_request (Common_req *request)
{
std::lock_guard<std::recursive_mutex> lock (requests_mutex);
- std::remove (requests.begin (), requests.end (), request);
+ requests.erase (std::remove (requests.begin (), requests.end (), request));
}
template <typename M> void register_event (Event_registration<M> *event)
@@ -451,6 +456,10 @@ private:
template <typename Req, typename Resp, typename... Args> friend class Dump;
+ template <typename Req, typename Resp, typename StreamMessage,
+ typename... Args>
+ friend class Stream;
+
template <typename M> friend class Result_set;
template <typename M> friend class Event_registration;
@@ -497,6 +506,10 @@ template <typename Req, typename Resp, typename... Args> class Request;
template <typename Req, typename Resp, typename... Args> class Dump;
+template <typename Req, typename Resp, typename StreamMessage,
+ typename... Args>
+class Stream;
+
template <class, class = void> struct vapi_has_payload_trait : std::false_type
{
};
@@ -627,6 +640,10 @@ private:
template <typename Req, typename Resp, typename... Args> friend class Dump;
+ template <typename Req, typename Resp, typename StreamMessage,
+ typename... Args>
+ friend class Stream;
+
template <typename X> friend class Event_registration;
template <typename X> friend class Result_set;
@@ -644,10 +661,11 @@ class Request : public Common_req
{
public:
Request (Connection &con, Args... args,
- std::function<vapi_error_e (Request<Req, Resp, Args...> &)>
- callback = nullptr)
- : Common_req{con}, callback{callback},
- request{con, vapi_alloc<Req> (con, args...)}, response{con, nullptr}
+ std::function<vapi_error_e (Request<Req, Resp, Args...> &)>
+ callback = nullptr)
+ : Common_req{ con }, callback{ std::move (callback) },
+ request{ con, vapi_alloc<Req> (con, args...) }, response{ con,
+ nullptr }
{
}
@@ -772,12 +790,96 @@ private:
bool complete;
std::vector<Msg<M>, typename Msg<M>::Msg_allocator> set;
+ template <typename Req, typename Resp, typename StreamMessage,
+ typename... Args>
+ friend class Stream;
+
template <typename Req, typename Resp, typename... Args> friend class Dump;
template <typename X> friend class Event_registration;
};
/**
+ * Class representing a RPC request - zero or more identical responses to a
+ * single request message with a response
+ */
+template <typename Req, typename Resp, typename StreamMessage,
+ typename... Args>
+class Stream : public Common_req
+{
+public:
+ Stream (
+ Connection &con, Args... args,
+ std::function<vapi_error_e (Stream<Req, Resp, StreamMessage, Args...> &)>
+ cb = nullptr)
+ : Common_req{ con }, request{ con, vapi_alloc<Req> (con, args...) },
+ response{ con, nullptr }, result_set{ con }, callback{ std::move (cb) }
+ {
+ }
+
+ Stream (const Stream &) = delete;
+
+ virtual ~Stream () {}
+
+ virtual std::tuple<vapi_error_e, bool>
+ assign_response (vapi_msg_id_t id, void *shm_data)
+ {
+ if (id == response.get_msg_id ())
+ {
+ response.assign_response (id, shm_data);
+ result_set.mark_complete ();
+ set_response_state (RESPONSE_READY);
+ if (nullptr != callback)
+ {
+ return std::make_pair (callback (*this), true);
+ }
+ return std::make_pair (VAPI_OK, true);
+ }
+ else
+ {
+ result_set.assign_response (id, shm_data);
+ }
+ return std::make_pair (VAPI_OK, false);
+ }
+
+ vapi_error_e
+ execute ()
+ {
+ return con.send (this);
+ }
+
+ const Msg<Req> &
+ get_request (void)
+ {
+ return request;
+ }
+
+ const Msg<Resp> &
+ get_response (void)
+ {
+ return response;
+ }
+
+ using resp_type = typename Msg<StreamMessage>::shm_data_type;
+
+ const Result_set<StreamMessage> &
+ get_result_set (void) const
+ {
+ return result_set;
+ }
+
+private:
+ Msg<Req> request;
+ Msg<Resp> response;
+ Result_set<StreamMessage> result_set;
+ std::function<vapi_error_e (Stream<Req, Resp, StreamMessage, Args...> &)>
+ callback;
+
+ friend class Connection;
+ friend class Result_set<StreamMessage>;
+};
+
+/**
* Class representing a dump request - zero or more identical responses to a
* single request message
*/
@@ -786,10 +888,10 @@ class Dump : public Common_req
{
public:
Dump (Connection &con, Args... args,
- std::function<vapi_error_e (Dump<Req, Resp, Args...> &)> callback =
- nullptr)
- : Common_req{con}, request{con, vapi_alloc<Req> (con, args...)},
- result_set{con}, callback{callback}
+ std::function<vapi_error_e (Dump<Req, Resp, Args...> &)> callback =
+ nullptr)
+ : Common_req{ con }, request{ con, vapi_alloc<Req> (con, args...) },
+ result_set{ con }, callback{ std::move (callback) }
{
}
@@ -853,9 +955,9 @@ template <typename M> class Event_registration : public Common_req
{
public:
Event_registration (
- Connection &con,
- std::function<vapi_error_e (Event_registration<M> &)> callback = nullptr)
- : Common_req{con}, result_set{con}, callback{callback}
+ Connection &con,
+ std::function<vapi_error_e (Event_registration<M> &)> callback = nullptr)
+ : Common_req{ con }, result_set{ con }, callback{ std::move (callback) }
{
if (!con.is_msg_available (M::get_msg_id ()))
{
diff --git a/src/vpp-api/vapi/vapi_c_gen.py b/src/vpp-api/vapi/vapi_c_gen.py
index f0a284ccbc1..9d1efb5e438 100755
--- a/src/vpp-api/vapi/vapi_c_gen.py
+++ b/src/vpp-api/vapi/vapi_c_gen.py
@@ -1,11 +1,21 @@
#!/usr/bin/env python3
import argparse
+import inspect
import os
import sys
import logging
-from vapi_json_parser import Field, Struct, Enum, Union, Message, JsonParser,\
- SimpleType, StructType, Alias
+from vapi_json_parser import (
+ Field,
+ Struct,
+ Enum,
+ Union,
+ Message,
+ JsonParser,
+ SimpleType,
+ StructType,
+ Alias,
+)
class CField(Field):
@@ -13,7 +23,7 @@ class CField(Field):
return "vapi_type_%s" % self.name
def get_c_def(self):
- if self.type.get_c_name() == 'vl_api_string_t':
+ if self.type.get_c_name() == "string":
if self.len:
return "u8 %s[%d];" % (self.name, self.len)
else:
@@ -27,50 +37,63 @@ class CField(Field):
def get_swap_to_be_code(self, struct, var):
if self.len is not None and type(self.len) != dict:
if self.len > 0:
- return "do { unsigned i; for (i = 0; i < %d; ++i) { %s } }"\
- " while(0);" % (
- self.len,
- self.type.get_swap_to_be_code(struct, "%s[i]" % var))
+ return (
+ "do { unsigned i; for (i = 0; i < %d; ++i) { %s } }"
+ " while(0);"
+ % (self.len, self.type.get_swap_to_be_code(struct, "%s[i]" % var))
+ )
else:
if self.nelem_field.needs_byte_swap():
nelem_field = "%s(%s%s)" % (
self.nelem_field.type.get_swap_to_host_func_name(),
- struct, self.nelem_field.name)
+ struct,
+ self.nelem_field.name,
+ )
else:
nelem_field = "%s%s" % (struct, self.nelem_field.name)
return (
"do { unsigned i; for (i = 0; i < %s; ++i) { %s } }"
- " while(0);" %
- (nelem_field, self.type.get_swap_to_be_code(
- struct, "%s[i]" % var)))
+ " while(0);"
+ % (
+ nelem_field,
+ self.type.get_swap_to_be_code(struct, "%s[i]" % var),
+ )
+ )
return self.type.get_swap_to_be_code(struct, "%s" % var)
def get_swap_to_host_code(self, struct, var):
if self.len is not None and type(self.len) != dict:
if self.len > 0:
- return "do { unsigned i; for (i = 0; i < %d; ++i) { %s } }"\
- " while(0);" % (
- self.len,
- self.type.get_swap_to_host_code(struct, "%s[i]" % var))
+ return (
+ "do { unsigned i; for (i = 0; i < %d; ++i) { %s } }"
+ " while(0);"
+ % (self.len, self.type.get_swap_to_host_code(struct, "%s[i]" % var))
+ )
else:
# nelem_field already swapped to host here...
return (
"do { unsigned i; for (i = 0; i < %s%s; ++i) { %s } }"
- " while(0);" %
- (struct, self.nelem_field.name,
- self.type.get_swap_to_host_code(
- struct, "%s[i]" % var)))
+ " while(0);"
+ % (
+ struct,
+ self.nelem_field.name,
+ self.type.get_swap_to_host_code(struct, "%s[i]" % var),
+ )
+ )
return self.type.get_swap_to_host_code(struct, "%s" % var)
def needs_byte_swap(self):
return self.type.needs_byte_swap()
- def get_vla_field_length_name(self, path):
+ def get_vla_parameter_name(self, path):
return "%s_%s_array_size" % ("_".join(path), self.name)
+ def get_vla_field_name(self, path):
+ return ".".join(path + [self.nelem_field.name])
+
def get_alloc_vla_param_names(self, path):
if self.is_vla():
- result = [self.get_vla_field_length_name(path)]
+ result = [self.get_vla_parameter_name(path)]
else:
result = []
if self.type.has_vla():
@@ -78,26 +101,38 @@ class CField(Field):
result.extend(t)
return result
- def get_vla_calc_size_code(self, prefix, path):
+ def get_vla_calc_size_code(self, prefix, path, is_alloc):
if self.is_vla():
- result = ["sizeof(%s.%s[0]) * %s" % (
- ".".join([prefix] + path),
- self.name,
- self.get_vla_field_length_name(path))]
+ result = [
+ "sizeof(%s.%s[0]) * %s"
+ % (
+ ".".join([prefix] + path),
+ self.name,
+ (
+ self.get_vla_parameter_name(path)
+ if is_alloc
+ else "%s.%s" % (prefix, self.get_vla_field_name(path))
+ ),
+ )
+ ]
else:
result = []
if self.type.has_vla():
- t = self.type.get_vla_calc_size_code(prefix, path + [self.name])
+ t = self.type.get_vla_calc_size_code(prefix, path + [self.name], is_alloc)
result.extend(t)
return result
def get_vla_assign_code(self, prefix, path):
result = []
if self.is_vla():
- result.append("%s.%s = %s" % (
- ".".join([prefix] + path),
- self.nelem_field.name,
- self.get_vla_field_length_name(path)))
+ result.append(
+ "%s.%s = %s"
+ % (
+ ".".join([prefix] + path),
+ self.nelem_field.name,
+ self.get_vla_parameter_name(path),
+ )
+ )
if self.type.has_vla():
t = self.type.get_vla_assign_code(prefix, path + [self.name])
result.extend(t)
@@ -111,52 +146,74 @@ class CAlias(CField):
def get_c_def(self):
if self.len is not None:
return "typedef %s vapi_type_%s[%d];" % (
- self.type.get_c_name(), self.name, self.len)
+ self.type.get_c_name(),
+ self.name,
+ self.len,
+ )
else:
- return "typedef %s vapi_type_%s;" % (
- self.type.get_c_name(), self.name)
+ return "typedef %s vapi_type_%s;" % (self.type.get_c_name(), self.name)
class CStruct(Struct):
def get_c_def(self):
- return "\n".join([
- "typedef struct __attribute__((__packed__)) {\n%s" % (
- "\n".join([" %s" % x.get_c_def()
- for x in self.fields])),
- "} %s;" % self.get_c_name()])
+ return "\n".join(
+ [
+ "typedef struct __attribute__((__packed__)) {\n%s"
+ % ("\n".join([" %s" % x.get_c_def() for x in self.fields])),
+ "} %s;" % self.get_c_name(),
+ ]
+ )
def get_vla_assign_code(self, prefix, path):
- return [x for f in self.fields if f.has_vla()
- for x in f.get_vla_assign_code(prefix, path)]
+ return [
+ x
+ for f in self.fields
+ if f.has_vla()
+ for x in f.get_vla_assign_code(prefix, path)
+ ]
def get_alloc_vla_param_names(self, path):
- return [x for f in self.fields
- if f.has_vla()
- for x in f.get_alloc_vla_param_names(path)]
-
- def get_vla_calc_size_code(self, prefix, path):
- return [x for f in self.fields if f.has_vla()
- for x in f.get_vla_calc_size_code(prefix, path)]
+ return [
+ x
+ for f in self.fields
+ if f.has_vla()
+ for x in f.get_alloc_vla_param_names(path)
+ ]
+ def get_vla_calc_size_code(self, prefix, path, is_alloc):
+ return [
+ x
+ for f in self.fields
+ if f.has_vla()
+ for x in f.get_vla_calc_size_code(prefix, path, is_alloc)
+ ]
-class CSimpleType (SimpleType):
+class CSimpleType(SimpleType):
swap_to_be_dict = {
- 'i16': 'htobe16', 'u16': 'htobe16',
- 'i32': 'htobe32', 'u32': 'htobe32',
- 'i64': 'htobe64', 'u64': 'htobe64',
+ "i16": "htobe16",
+ "u16": "htobe16",
+ "i32": "htobe32",
+ "u32": "htobe32",
+ "i64": "htobe64",
+ "u64": "htobe64",
}
swap_to_host_dict = {
- 'i16': 'be16toh', 'u16': 'be16toh',
- 'i32': 'be32toh', 'u32': 'be32toh',
- 'i64': 'be64toh', 'u64': 'be64toh',
+ "i16": "be16toh",
+ "u16": "be16toh",
+ "i32": "be32toh",
+ "u32": "be32toh",
+ "i64": "be64toh",
+ "u64": "be64toh",
}
__packed = "__attribute__((packed))"
pack_dict = {
- 'i8': __packed, 'u8': __packed,
- 'i16': __packed, 'u16': __packed,
+ "i8": __packed,
+ "u8": __packed,
+ "i16": __packed,
+ "u16": __packed,
}
def get_c_name(self):
@@ -173,15 +230,21 @@ class CSimpleType (SimpleType):
def get_swap_to_be_code(self, struct, var, cast=None):
x = "%s%s" % (struct, var)
- return "%s = %s%s(%s);" % (x,
- "(%s)" % cast if cast else "",
- self.get_swap_to_be_func_name(), x)
+ return "%s = %s%s(%s);" % (
+ x,
+ "(%s)" % cast if cast else "",
+ self.get_swap_to_be_func_name(),
+ x,
+ )
def get_swap_to_host_code(self, struct, var, cast=None):
x = "%s%s" % (struct, var)
- return "%s = %s%s(%s);" % (x,
- "(%s)" % cast if cast else "",
- self.get_swap_to_host_func_name(), x)
+ return "%s = %s%s(%s);" % (
+ x,
+ "(%s)" % cast if cast else "",
+ self.get_swap_to_host_func_name(),
+ x,
+ )
def needs_byte_swap(self):
try:
@@ -203,7 +266,7 @@ class CEnum(Enum):
return "typedef enum {\n%s\n} %s %s;" % (
"\n".join([" %s = %s," % (i, j) for i, j in self.value_pairs]),
self.type.get_packed(),
- self.get_c_name()
+ self.get_c_name(),
)
def needs_byte_swap(self):
@@ -222,17 +285,18 @@ class CUnion(Union):
def get_c_def(self):
return "typedef union {\n%s\n} %s;" % (
- "\n".join([" %s %s;" % (i.get_c_name(), j)
- for i, j in self.type_pairs]),
- self.get_c_name()
+ "\n".join([" %s %s;" % (i.get_c_name(), j) for i, j in self.type_pairs]),
+ self.get_c_name(),
)
def needs_byte_swap(self):
return False
-class CStructType (StructType, CStruct):
+class CStructType(StructType, CStruct):
def get_c_name(self):
+ if self.name == "vl_api_string_t":
+ return "vl_api_string_t"
return "vapi_type_%s" % self.name
def get_swap_to_be_func_name(self):
@@ -242,27 +306,36 @@ class CStructType (StructType, CStruct):
return "%s_ntoh" % self.get_c_name()
def get_swap_to_be_func_decl(self):
- return "void %s(%s *msg)" % (
- self.get_swap_to_be_func_name(), self.get_c_name())
+ return "void %s(%s *msg)" % (self.get_swap_to_be_func_name(), self.get_c_name())
def get_swap_to_be_func_def(self):
return "%s\n{\n%s\n}" % (
self.get_swap_to_be_func_decl(),
- "\n".join([
- " %s" % p.get_swap_to_be_code("msg->", "%s" % p.name)
- for p in self.fields if p.needs_byte_swap()]),
+ "\n".join(
+ [
+ " %s" % p.get_swap_to_be_code("msg->", "%s" % p.name)
+ for p in self.fields
+ if p.needs_byte_swap()
+ ]
+ ),
)
def get_swap_to_host_func_decl(self):
return "void %s(%s *msg)" % (
- self.get_swap_to_host_func_name(), self.get_c_name())
+ self.get_swap_to_host_func_name(),
+ self.get_c_name(),
+ )
def get_swap_to_host_func_def(self):
return "%s\n{\n%s\n}" % (
self.get_swap_to_host_func_decl(),
- "\n".join([
- " %s" % p.get_swap_to_host_code("msg->", "%s" % p.name)
- for p in self.fields if p.needs_byte_swap()]),
+ "\n".join(
+ [
+ " %s" % p.get_swap_to_host_code("msg->", "%s" % p.name)
+ for p in self.fields
+ if p.needs_byte_swap()
+ ]
+ ),
)
def get_swap_to_be_code(self, struct, var):
@@ -278,13 +351,11 @@ class CStructType (StructType, CStruct):
return False
-class CMessage (Message):
+class CMessage(Message):
def __init__(self, logger, definition, json_parser):
super(CMessage, self).__init__(logger, definition, json_parser)
self.payload_members = [
- " %s" % p.get_c_def()
- for p in self.fields
- if p.type != self.header
+ " %s" % p.get_c_def() for p in self.fields if p.type != self.header
]
def has_payload(self):
@@ -303,46 +374,67 @@ class CMessage (Message):
return "vapi_alloc_%s" % self.name
def get_alloc_vla_param_names(self):
- return [x for f in self.fields
- if f.has_vla()
- for x in f.get_alloc_vla_param_names([])]
+ return [
+ x
+ for f in self.fields
+ if f.has_vla()
+ for x in f.get_alloc_vla_param_names([])
+ ]
def get_alloc_func_decl(self):
return "%s* %s(struct vapi_ctx_s *ctx%s)" % (
self.get_c_name(),
self.get_alloc_func_name(),
- "".join([", size_t %s" % n for n in
- self.get_alloc_vla_param_names()]))
+ "".join([", size_t %s" % n for n in self.get_alloc_vla_param_names()]),
+ )
def get_alloc_func_def(self):
extra = []
- if self.header.has_field('client_index'):
- extra.append(
- " msg->header.client_index = vapi_get_client_index(ctx);")
- if self.header.has_field('context'):
+ if self.header.has_field("client_index"):
+ extra.append(" msg->header.client_index = vapi_get_client_index(ctx);")
+ if self.header.has_field("context"):
extra.append(" msg->header.context = 0;")
- return "\n".join([
- "%s" % self.get_alloc_func_decl(),
- "{",
- " %s *msg = NULL;" % self.get_c_name(),
- " const size_t size = sizeof(%s)%s;" % (
- self.get_c_name(),
- "".join([" + %s" % x for f in self.fields if f.has_vla()
- for x in f.get_vla_calc_size_code("msg->payload",
- [])])),
- " /* cast here required to play nicely with C++ world ... */",
- " msg = (%s*)vapi_msg_alloc(ctx, size);" % self.get_c_name(),
- " if (!msg) {",
- " return NULL;",
- " }",
- ] + extra + [
- " msg->header._vl_msg_id = vapi_lookup_vl_msg_id(ctx, %s);" %
- self.get_msg_id_name(),
- "".join([" %s;\n" % line
- for f in self.fields if f.has_vla()
- for line in f.get_vla_assign_code("msg->payload", [])]),
- " return msg;",
- "}"])
+ return "\n".join(
+ [
+ "%s" % self.get_alloc_func_decl(),
+ "{",
+ " %s *msg = NULL;" % self.get_c_name(),
+ " const size_t size = sizeof(%s)%s;"
+ % (
+ self.get_c_name(),
+ "".join(
+ [
+ " + %s" % x
+ for f in self.fields
+ if f.has_vla()
+ for x in f.get_vla_calc_size_code(
+ "msg->payload", [], is_alloc=True
+ )
+ ]
+ ),
+ ),
+ " /* cast here required to play nicely with C++ world ... */",
+ " msg = (%s*)vapi_msg_alloc(ctx, size);" % self.get_c_name(),
+ " if (!msg) {",
+ " return NULL;",
+ " }",
+ ]
+ + extra
+ + [
+ " msg->header._vl_msg_id = vapi_lookup_vl_msg_id(ctx, %s);"
+ % self.get_msg_id_name(),
+ "".join(
+ [
+ " %s;\n" % line
+ for f in self.fields
+ if f.has_vla()
+ for line in f.get_vla_assign_code("msg->payload", [])
+ ]
+ ),
+ " return msg;",
+ "}",
+ ]
+ )
def get_calc_msg_size_func_name(self):
return "vapi_calc_%s_msg_size" % self.name
@@ -350,43 +442,92 @@ class CMessage (Message):
def get_calc_msg_size_func_decl(self):
return "uword %s(%s *msg)" % (
self.get_calc_msg_size_func_name(),
- self.get_c_name())
+ self.get_c_name(),
+ )
def get_calc_msg_size_func_def(self):
- return "\n".join([
- "%s" % self.get_calc_msg_size_func_decl(),
- "{",
- " return sizeof(*msg)%s;" %
- "".join(["+ msg->payload.%s * sizeof(msg->payload.%s[0])" % (
- f.nelem_field.name,
- f.name)
- for f in self.fields
- if f.nelem_field is not None
- ]),
- "}",
- ])
+ return "\n".join(
+ [
+ "%s" % self.get_calc_msg_size_func_decl(),
+ "{",
+ " return sizeof(*msg)%s;"
+ % "".join(
+ [
+ " + %s" % x
+ for f in self.fields
+ if f.has_vla()
+ for x in f.get_vla_calc_size_code(
+ "msg->payload", [], is_alloc=False
+ )
+ ]
+ ),
+ "}",
+ ]
+ )
+
+ def get_verify_msg_size_func_name(self):
+ return f"vapi_verify_{self.name}_msg_size"
+
+ def get_verify_msg_size_func_decl(self):
+ return "int %s(%s *msg, uword buf_size)" % (
+ self.get_verify_msg_size_func_name(),
+ self.get_c_name(),
+ )
+
+ def get_verify_msg_size_func_def(self):
+ return inspect.cleandoc(
+ f"""
+ {self.get_verify_msg_size_func_decl()}
+ {{
+ if (sizeof({self.get_c_name()}) > buf_size)
+ {{
+ VAPI_ERR("Truncated '{self.name}' msg received, received %lu"
+ "bytes, expected %lu bytes.", buf_size,
+ sizeof({self.get_c_name()}));
+ return -1;
+ }}
+ if ({self.get_calc_msg_size_func_name()}(msg) > buf_size)
+ {{
+ VAPI_ERR("Truncated '{self.name}' msg received, received %lu"
+ "bytes, expected %lu bytes.", buf_size,
+ {self.get_calc_msg_size_func_name()}(msg));
+ return -1;
+ }}
+ return 0;
+ }}
+ """
+ )
def get_c_def(self):
if self.has_payload():
- return "\n".join([
- "typedef struct __attribute__ ((__packed__)) {",
- "%s " %
- "\n".join(self.payload_members),
- "} %s;" % self.get_payload_struct_name(),
- "",
- "typedef struct __attribute__ ((__packed__)) {",
- (" %s %s;" % (self.header.get_c_name(),
- self.fields[0].name)
- if self.header is not None else ""),
- " %s payload;" % self.get_payload_struct_name(),
- "} %s;" % self.get_c_name(), ])
+ return "\n".join(
+ [
+ "typedef struct __attribute__ ((__packed__)) {",
+ "%s " % "\n".join(self.payload_members),
+ "} %s;" % self.get_payload_struct_name(),
+ "",
+ "typedef struct __attribute__ ((__packed__)) {",
+ (
+ " %s %s;" % (self.header.get_c_name(), self.fields[0].name)
+ if self.header is not None
+ else ""
+ ),
+ " %s payload;" % self.get_payload_struct_name(),
+ "} %s;" % self.get_c_name(),
+ ]
+ )
else:
- return "\n".join([
- "typedef struct __attribute__ ((__packed__)) {",
- (" %s %s;" % (self.header.get_c_name(),
- self.fields[0].name)
- if self.header is not None else ""),
- "} %s;" % self.get_c_name(), ])
+ return "\n".join(
+ [
+ "typedef struct __attribute__ ((__packed__)) {",
+ (
+ " %s %s;" % (self.header.get_c_name(), self.fields[0].name)
+ if self.header is not None
+ else ""
+ ),
+ "} %s;" % self.get_c_name(),
+ ]
+ )
def get_swap_payload_to_host_func_name(self):
return "%s_payload_ntoh" % self.get_c_name()
@@ -397,29 +538,37 @@ class CMessage (Message):
def get_swap_payload_to_host_func_decl(self):
return "void %s(%s *payload)" % (
self.get_swap_payload_to_host_func_name(),
- self.get_payload_struct_name())
+ self.get_payload_struct_name(),
+ )
def get_swap_payload_to_be_func_decl(self):
return "void %s(%s *payload)" % (
self.get_swap_payload_to_be_func_name(),
- self.get_payload_struct_name())
+ self.get_payload_struct_name(),
+ )
def get_swap_payload_to_be_func_def(self):
return "%s\n{\n%s\n}" % (
self.get_swap_payload_to_be_func_decl(),
- "\n".join([
- " %s" % p.get_swap_to_be_code("payload->", "%s" % p.name)
- for p in self.fields
- if p.needs_byte_swap() and p.type != self.header]),
+ "\n".join(
+ [
+ " %s" % p.get_swap_to_be_code("payload->", "%s" % p.name)
+ for p in self.fields
+ if p.needs_byte_swap() and p.type != self.header
+ ]
+ ),
)
def get_swap_payload_to_host_func_def(self):
return "%s\n{\n%s\n}" % (
self.get_swap_payload_to_host_func_decl(),
- "\n".join([
- " %s" % p.get_swap_to_host_code("payload->", "%s" % p.name)
- for p in self.fields
- if p.needs_byte_swap() and p.type != self.header]),
+ "\n".join(
+ [
+ " %s" % p.get_swap_to_host_code("payload->", "%s" % p.name)
+ for p in self.fields
+ if p.needs_byte_swap() and p.type != self.header
+ ]
+ ),
)
def get_swap_to_host_func_name(self):
@@ -430,150 +579,205 @@ class CMessage (Message):
def get_swap_to_host_func_decl(self):
return "void %s(%s *msg)" % (
- self.get_swap_to_host_func_name(), self.get_c_name())
+ self.get_swap_to_host_func_name(),
+ self.get_c_name(),
+ )
def get_swap_to_be_func_decl(self):
- return "void %s(%s *msg)" % (
- self.get_swap_to_be_func_name(), self.get_c_name())
+ return "void %s(%s *msg)" % (self.get_swap_to_be_func_name(), self.get_c_name())
def get_swap_to_be_func_def(self):
- return "\n".join([
- "%s" % self.get_swap_to_be_func_decl(),
- "{",
- (" VAPI_DBG(\"Swapping `%s'@%%p to big endian\", msg);" %
- self.get_c_name()),
- " %s(&msg->header);" % self.header.get_swap_to_be_func_name()
- if self.header is not None else "",
- " %s(&msg->payload);" % self.get_swap_payload_to_be_func_name()
- if self.has_payload() else "",
- "}",
- ])
+ return "\n".join(
+ [
+ "%s" % self.get_swap_to_be_func_decl(),
+ "{",
+ (
+ ' VAPI_DBG("Swapping `%s\'@%%p to big endian", msg);'
+ % self.get_c_name()
+ ),
+ (
+ " %s(&msg->header);" % self.header.get_swap_to_be_func_name()
+ if self.header is not None
+ else ""
+ ),
+ (
+ " %s(&msg->payload);" % self.get_swap_payload_to_be_func_name()
+ if self.has_payload()
+ else ""
+ ),
+ "}",
+ ]
+ )
def get_swap_to_host_func_def(self):
- return "\n".join([
- "%s" % self.get_swap_to_host_func_decl(),
- "{",
- (" VAPI_DBG(\"Swapping `%s'@%%p to host byte order\", msg);" %
- self.get_c_name()),
- " %s(&msg->header);" % self.header.get_swap_to_host_func_name()
- if self.header is not None else "",
- " %s(&msg->payload);" % self.get_swap_payload_to_host_func_name()
- if self.has_payload() else "",
- "}",
- ])
+ return "\n".join(
+ [
+ "%s" % self.get_swap_to_host_func_decl(),
+ "{",
+ (
+ ' VAPI_DBG("Swapping `%s\'@%%p to host byte order", msg);'
+ % self.get_c_name()
+ ),
+ (
+ " %s(&msg->header);" % self.header.get_swap_to_host_func_name()
+ if self.header is not None
+ else ""
+ ),
+ (
+ " %s(&msg->payload);" % self.get_swap_payload_to_host_func_name()
+ if self.has_payload()
+ else ""
+ ),
+ "}",
+ ]
+ )
def get_op_func_name(self):
return "vapi_%s" % self.name
def get_op_func_decl(self):
- if self.reply.has_payload():
- return "vapi_error_e %s(%s)" % (
- self.get_op_func_name(),
- ",\n ".join([
- 'struct vapi_ctx_s *ctx',
- '%s *msg' % self.get_c_name(),
- 'vapi_error_e (*callback)(struct vapi_ctx_s *ctx',
- ' void *callback_ctx',
- ' vapi_error_e rv',
- ' bool is_last',
- ' %s *reply)' %
- self.reply.get_payload_struct_name(),
- 'void *callback_ctx',
- ])
- )
- else:
- return "vapi_error_e %s(%s)" % (
- self.get_op_func_name(),
- ",\n ".join([
- 'struct vapi_ctx_s *ctx',
- '%s *msg' % self.get_c_name(),
- 'vapi_error_e (*callback)(struct vapi_ctx_s *ctx',
- ' void *callback_ctx',
- ' vapi_error_e rv',
- ' bool is_last)',
- 'void *callback_ctx',
- ])
- )
+ stream_param_lines = []
+ if self.has_stream_msg:
+ stream_param_lines = [
+ "vapi_error_e (*details_callback)(struct vapi_ctx_s *ctx",
+ " void *callback_ctx",
+ " vapi_error_e rv",
+ " bool is_last",
+ " %s *details)"
+ % self.stream_msg.get_payload_struct_name(),
+ "void *details_callback_ctx",
+ ]
+
+ return "vapi_error_e %s(%s)" % (
+ self.get_op_func_name(),
+ ",\n ".join(
+ [
+ "struct vapi_ctx_s *ctx",
+ "%s *msg" % self.get_c_name(),
+ "vapi_error_e (*reply_callback)(struct vapi_ctx_s *ctx",
+ " void *callback_ctx",
+ " vapi_error_e rv",
+ " bool is_last",
+ " %s *reply)"
+ % self.reply.get_payload_struct_name(),
+ ]
+ + [
+ "void *reply_callback_ctx",
+ ]
+ + stream_param_lines
+ ),
+ )
def get_op_func_def(self):
- return "\n".join([
- "%s" % self.get_op_func_decl(),
- "{",
- " if (!msg || !callback) {",
- " return VAPI_EINVAL;",
- " }",
- " if (vapi_is_nonblocking(ctx) && vapi_requests_full(ctx)) {",
- " return VAPI_EAGAIN;",
- " }",
- " vapi_error_e rv;",
- " if (VAPI_OK != (rv = vapi_producer_lock (ctx))) {",
- " return rv;",
- " }",
- " u32 req_context = vapi_gen_req_context(ctx);",
- " msg->header.context = req_context;",
- " %s(msg);" % self.get_swap_to_be_func_name(),
- (" if (VAPI_OK == (rv = vapi_send_with_control_ping "
- "(ctx, msg, req_context))) {"
- if self.reply_is_stream else
- " if (VAPI_OK == (rv = vapi_send (ctx, msg))) {"
- ),
- (" vapi_store_request(ctx, req_context, %s, "
- "(vapi_cb_t)callback, callback_ctx);" %
- ("true" if self.reply_is_stream else "false")),
- " if (VAPI_OK != vapi_producer_unlock (ctx)) {",
- " abort (); /* this really shouldn't happen */",
- " }",
- " if (vapi_is_nonblocking(ctx)) {",
- " rv = VAPI_OK;",
- " } else {",
- " rv = vapi_dispatch(ctx);",
- " }",
- " } else {",
- " %s(msg);" % self.get_swap_to_host_func_name(),
- " if (VAPI_OK != vapi_producer_unlock (ctx)) {",
- " abort (); /* this really shouldn't happen */",
- " }",
- " }",
- " return rv;",
- "}",
- "",
- ])
+ param_check_lines = [" if (!msg || !reply_callback) {"]
+ store_request_lines = [
+ " vapi_store_request(ctx, req_context, %s, %s, "
+ % (
+ self.reply.get_msg_id_name(),
+ "VAPI_REQUEST_DUMP" if self.reply_is_stream else "VAPI_REQUEST_REG",
+ ),
+ " (vapi_cb_t)reply_callback, reply_callback_ctx);",
+ ]
+ if self.has_stream_msg:
+ param_check_lines = [
+ " if (!msg || !reply_callback || !details_callback) {"
+ ]
+ store_request_lines = [
+ f" vapi_store_request(ctx, req_context, {self.stream_msg.get_msg_id_name()}, VAPI_REQUEST_STREAM, ",
+ " (vapi_cb_t)details_callback, details_callback_ctx);",
+ f" vapi_store_request(ctx, req_context, {self.reply.get_msg_id_name()}, VAPI_REQUEST_REG, ",
+ " (vapi_cb_t)reply_callback, reply_callback_ctx);",
+ ]
+
+ return "\n".join(
+ [
+ "%s" % self.get_op_func_decl(),
+ "{",
+ ]
+ + param_check_lines
+ + [
+ " return VAPI_EINVAL;",
+ " }",
+ " if (vapi_is_nonblocking(ctx) && vapi_requests_full(ctx)) {",
+ " return VAPI_EAGAIN;",
+ " }",
+ " vapi_error_e rv;",
+ " if (VAPI_OK != (rv = vapi_producer_lock (ctx))) {",
+ " return rv;",
+ " }",
+ " u32 req_context = vapi_gen_req_context(ctx);",
+ " msg->header.context = req_context;",
+ " %s(msg);" % self.get_swap_to_be_func_name(),
+ (
+ " if (VAPI_OK == (rv = vapi_send_with_control_ping "
+ "(ctx, msg, req_context))) {"
+ if (self.reply_is_stream and not self.has_stream_msg)
+ else " if (VAPI_OK == (rv = vapi_send (ctx, msg))) {"
+ ),
+ ]
+ + store_request_lines
+ + [
+ " if (VAPI_OK != vapi_producer_unlock (ctx)) {",
+ " abort (); /* this really shouldn't happen */",
+ " }",
+ " if (vapi_is_nonblocking(ctx)) {",
+ " rv = VAPI_OK;",
+ " } else {",
+ " rv = vapi_dispatch(ctx);",
+ " }",
+ " } else {",
+ " %s(msg);" % self.get_swap_to_host_func_name(),
+ " if (VAPI_OK != vapi_producer_unlock (ctx)) {",
+ " abort (); /* this really shouldn't happen */",
+ " }",
+ " }",
+ " return rv;",
+ "}",
+ "",
+ ]
+ )
def get_event_cb_func_decl(self):
if not self.is_reply and not self.is_event:
- raise Exception(
- "Cannot register event callback for non-reply message")
+ raise Exception("Cannot register event callback for non-reply message")
if self.has_payload():
- return "\n".join([
- "void vapi_set_%s_event_cb (" %
- self.get_c_name(),
- " struct vapi_ctx_s *ctx, ",
- (" vapi_error_e (*callback)(struct vapi_ctx_s *ctx, "
- "void *callback_ctx, %s *payload)," %
- self.get_payload_struct_name()),
- " void *callback_ctx)",
- ])
+ return "\n".join(
+ [
+ "void vapi_set_%s_event_cb (" % self.get_c_name(),
+ " struct vapi_ctx_s *ctx, ",
+ (
+ " vapi_error_e (*callback)(struct vapi_ctx_s *ctx, "
+ "void *callback_ctx, %s *payload),"
+ % self.get_payload_struct_name()
+ ),
+ " void *callback_ctx)",
+ ]
+ )
else:
- return "\n".join([
- "void vapi_set_%s_event_cb (" %
- self.get_c_name(),
- " struct vapi_ctx_s *ctx, ",
- " vapi_error_e (*callback)(struct vapi_ctx_s *ctx, "
- "void *callback_ctx),",
- " void *callback_ctx)",
- ])
+ return "\n".join(
+ [
+ "void vapi_set_%s_event_cb (" % self.get_c_name(),
+ " struct vapi_ctx_s *ctx, ",
+ " vapi_error_e (*callback)(struct vapi_ctx_s *ctx, "
+ "void *callback_ctx),",
+ " void *callback_ctx)",
+ ]
+ )
def get_event_cb_func_def(self):
if not self.is_reply and not self.is_event:
- raise Exception(
- "Cannot register event callback for non-reply function")
- return "\n".join([
- "%s" % self.get_event_cb_func_decl(),
- "{",
- (" vapi_set_event_cb(ctx, %s, (vapi_event_cb)callback, "
- "callback_ctx);" %
- self.get_msg_id_name()),
- "}"])
+ raise Exception("Cannot register event callback for non-reply function")
+ return "\n".join(
+ [
+ "%s" % self.get_event_cb_func_decl(),
+ "{",
+ (
+ " vapi_set_event_cb(ctx, %s, (vapi_event_cb)callback, "
+ "callback_ctx);" % self.get_msg_id_name()
+ ),
+ "}",
+ ]
+ )
def get_c_metadata_struct_name(self):
return "__vapi_metadata_%s" % self.name
@@ -581,53 +785,45 @@ class CMessage (Message):
def get_c_constructor(self):
has_context = False
if self.header is not None:
- has_context = self.header.has_field('context')
- return '\n'.join([
- 'static void __attribute__((constructor)) __vapi_constructor_%s()'
- % self.name,
- '{',
- ' static const char name[] = "%s";' % self.name,
- ' static const char name_with_crc[] = "%s_%s";'
- % (self.name, self.crc[2:]),
- ' static vapi_message_desc_t %s = {' %
- self.get_c_metadata_struct_name(),
- ' name,',
- ' sizeof(name) - 1,',
- ' name_with_crc,',
- ' sizeof(name_with_crc) - 1,',
- ' true,' if has_context else ' false,',
- ' offsetof(%s, context),' % self.header.get_c_name()
- if has_context else ' 0,',
- (' offsetof(%s, payload),' % self.get_c_name())
- if self.has_payload() else ' VAPI_INVALID_MSG_ID,',
- ' sizeof(%s),' % self.get_c_name(),
- ' (generic_swap_fn_t)%s,' % self.get_swap_to_be_func_name(),
- ' (generic_swap_fn_t)%s,' % self.get_swap_to_host_func_name(),
- ' VAPI_INVALID_MSG_ID,',
- ' };',
- '',
- ' %s = vapi_register_msg(&%s);' %
- (self.get_msg_id_name(), self.get_c_metadata_struct_name()),
- ' VAPI_DBG("Assigned msg id %%d to %s", %s);' %
- (self.name, self.get_msg_id_name()),
- '}',
- ])
-
-
-vapi_send_with_control_ping = """
-static inline vapi_error_e
-vapi_send_with_control_ping (vapi_ctx_t ctx, void *msg, u32 context)
-{
- vapi_msg_control_ping *ping = vapi_alloc_control_ping (ctx);
- if (!ping)
- {
- return VAPI_ENOMEM;
- }
- ping->header.context = context;
- vapi_msg_control_ping_hton (ping);
- return vapi_send2 (ctx, msg, ping);
-}
-"""
+ has_context = self.header.has_field("context")
+ return "\n".join(
+ [
+ "static void __attribute__((constructor)) __vapi_constructor_%s()"
+ % self.name,
+ "{",
+ ' static const char name[] = "%s";' % self.name,
+ ' static const char name_with_crc[] = "%s_%s";'
+ % (self.name, self.crc[2:]),
+ " static vapi_message_desc_t %s = {"
+ % self.get_c_metadata_struct_name(),
+ " name,",
+ " sizeof(name) - 1,",
+ " name_with_crc,",
+ " sizeof(name_with_crc) - 1,",
+ " true," if has_context else " false,",
+ (
+ " offsetof(%s, context)," % self.header.get_c_name()
+ if has_context
+ else " 0,"
+ ),
+ (
+ (" offsetof(%s, payload)," % self.get_c_name())
+ if self.has_payload()
+ else " VAPI_INVALID_MSG_ID,"
+ ),
+ " (verify_msg_size_fn_t)%s," % self.get_verify_msg_size_func_name(),
+ " (generic_swap_fn_t)%s," % self.get_swap_to_be_func_name(),
+ " (generic_swap_fn_t)%s," % self.get_swap_to_host_func_name(),
+ " VAPI_INVALID_MSG_ID,",
+ " };",
+ "",
+ " %s = vapi_register_msg(&%s);"
+ % (self.get_msg_id_name(), self.get_c_metadata_struct_name()),
+ ' VAPI_DBG("Assigned msg id %%d to %s", %s);'
+ % (self.name, self.get_msg_id_name()),
+ "}",
+ ]
+ )
def emit_definition(parser, json_file, emitted, o):
@@ -640,12 +836,16 @@ def emit_definition(parser, json_file, emitted, o):
emit_definition(parser, json_file, emitted, x)
if hasattr(o, "reply"):
emit_definition(parser, json_file, emitted, o.reply)
+ if hasattr(o, "stream_msg"):
+ emit_definition(parser, json_file, emitted, o.stream_msg)
if hasattr(o, "get_c_def"):
- if (o not in parser.enums_by_json[json_file] and
- o not in parser.types_by_json[json_file] and
- o not in parser.unions_by_json[json_file] and
- o.name not in parser.messages_by_json[json_file] and
- o not in parser.aliases_by_json[json_file]):
+ if (
+ o not in parser.enums_by_json[json_file]
+ and o not in parser.types_by_json[json_file]
+ and o not in parser.unions_by_json[json_file]
+ and o.name not in parser.messages_by_json[json_file]
+ and o not in parser.aliases_by_json[json_file]
+ ):
return
guard = "defined_%s" % o.get_c_name()
print("#ifndef %s" % guard)
@@ -655,25 +855,25 @@ def emit_definition(parser, json_file, emitted, o):
function_attrs = "static inline "
if o.name in parser.messages_by_json[json_file]:
if o.has_payload():
- print("%s%s" % (function_attrs,
- o.get_swap_payload_to_be_func_def()))
+ print("%s%s" % (function_attrs, o.get_swap_payload_to_be_func_def()))
print("")
- print("%s%s" % (function_attrs,
- o.get_swap_payload_to_host_func_def()))
+ print("%s%s" % (function_attrs, o.get_swap_payload_to_host_func_def()))
print("")
print("%s%s" % (function_attrs, o.get_swap_to_be_func_def()))
print("")
print("%s%s" % (function_attrs, o.get_swap_to_host_func_def()))
print("")
print("%s%s" % (function_attrs, o.get_calc_msg_size_func_def()))
- if not o.is_reply and not o.is_event:
+ print("")
+ print("%s%s" % (function_attrs, o.get_verify_msg_size_func_def()))
+ if not o.is_reply and not o.is_event and not o.is_stream:
print("")
print("%s%s" % (function_attrs, o.get_alloc_func_def()))
print("")
print("%s%s" % (function_attrs, o.get_op_func_def()))
print("")
print("%s" % o.get_c_constructor())
- if o.is_reply or o.is_event:
+ if (o.is_reply or o.is_event) and not o.is_stream:
print("")
print("%s%s;" % (function_attrs, o.get_event_cb_func_def()))
elif hasattr(o, "get_swap_to_be_func_def"):
@@ -691,7 +891,12 @@ def gen_json_unified_header(parser, logger, j, io, name):
orig_stdout = sys.stdout
sys.stdout = io
include_guard = "__included_%s" % (
- j.replace(".", "_").replace("/", "_").replace("-", "_").replace("+", "_"))
+ j.replace(".", "_")
+ .replace("/", "_")
+ .replace("-", "_")
+ .replace("+", "_")
+ .replace("@", "_")
+ )
print("#ifndef %s" % include_guard)
print("#define %s" % include_guard)
print("")
@@ -703,24 +908,48 @@ def gen_json_unified_header(parser, logger, j, io, name):
print("#include <vapi/vapi_dbg.h>")
print("")
print("#ifdef __cplusplus")
- print("extern \"C\" {")
+ print('extern "C" {')
print("#endif")
- if name == "vpe.api.vapi.h":
+
+ print("#ifndef __vl_api_string_swap_fns_defined__")
+ print("#define __vl_api_string_swap_fns_defined__")
+ print("")
+ print("#include <vlibapi/api_types.h>")
+ print("")
+ function_attrs = "static inline "
+ o = parser.types["vl_api_string_t"]
+ print("%s%s" % (function_attrs, o.get_swap_to_be_func_def()))
+ print("")
+ print("%s%s" % (function_attrs, o.get_swap_to_host_func_def()))
+ print("")
+ print("#endif //__vl_api_string_swap_fns_defined__")
+
+ if name == "memclnt.api.vapi.h":
print("")
- print("static inline vapi_error_e vapi_send_with_control_ping "
- "(vapi_ctx_t ctx, void * msg, u32 context);")
+ print(
+ "static inline vapi_error_e vapi_send_with_control_ping "
+ "(vapi_ctx_t ctx, void * msg, u32 context);"
+ )
+ elif name == "vlib.api.vapi.h":
+ print("#include <vapi/memclnt.api.vapi.h>")
else:
- print("#include <vapi/vpe.api.vapi.h>")
+ print("#include <vapi/vlib.api.vapi.h>")
print("")
for m in parser.messages_by_json[j].values():
print("extern vapi_msg_id_t %s;" % m.get_msg_id_name())
print("")
- print("#define DEFINE_VAPI_MSG_IDS_%s\\" %
- f.replace(".", "_").replace("/", "_").replace("-", "_").upper())
- print("\\\n".join([
- " vapi_msg_id_t %s;" % m.get_msg_id_name()
- for m in parser.messages_by_json[j].values()
- ]))
+ print(
+ "#define DEFINE_VAPI_MSG_IDS_%s\\"
+ % f.replace(".", "_").replace("/", "_").replace("-", "_").upper()
+ )
+ print(
+ "\\\n".join(
+ [
+ " vapi_msg_id_t %s;" % m.get_msg_id_name()
+ for m in parser.messages_by_json[j].values()
+ ]
+ )
+ )
print("")
print("")
emitted = []
@@ -737,8 +966,22 @@ def gen_json_unified_header(parser, logger, j, io, name):
print("")
- if name == "vpe.api.vapi.h":
- print("%s" % vapi_send_with_control_ping)
+ if name == "vlib.api.vapi.h":
+ vapi_send_with_control_ping_function = """
+static inline vapi_error_e
+vapi_send_with_control_ping (vapi_ctx_t ctx, void *msg, u32 context)
+{
+ vapi_msg_control_ping *ping = vapi_alloc_control_ping (ctx);
+ if (!ping)
+ {
+ return VAPI_ENOMEM;
+ }
+ ping->header.context = context;
+ vapi_msg_control_ping_hton (ping);
+ return vapi_send2 (ctx, msg, ping);
+}
+"""
+ print("%s" % vapi_send_with_control_ping_function)
print("")
print("#ifdef __cplusplus")
@@ -765,12 +1008,11 @@ def gen_c_unified_headers(parser, logger, prefix, remove_path):
d, f = os.path.split(j)
else:
f = j
- with open('%s%s' % (prefix, json_to_c_header_name(f)), "w") as io:
- gen_json_unified_header(
- parser, logger, j, io, json_to_c_header_name(f))
+ with open("%s%s" % (prefix, json_to_c_header_name(f)), "w") as io:
+ gen_json_unified_header(parser, logger, j, io, json_to_c_header_name(f))
-if __name__ == '__main__':
+if __name__ == "__main__":
try:
verbose = int(os.getenv("V", 0))
except:
@@ -788,23 +1030,30 @@ if __name__ == '__main__':
logger.setLevel(log_level)
argparser = argparse.ArgumentParser(description="VPP C API generator")
- argparser.add_argument('files', metavar='api-file', action='append',
- type=str, help='json api file'
- '(may be specified multiple times)')
- argparser.add_argument('--prefix', action='store', default=None,
- help='path prefix')
- argparser.add_argument('--remove-path', action='store_true',
- help='remove path from filename')
+ argparser.add_argument(
+ "files",
+ metavar="api-file",
+ action="append",
+ type=str,
+ help="json api file" "(may be specified multiple times)",
+ )
+ argparser.add_argument("--prefix", action="store", default=None, help="path prefix")
+ argparser.add_argument(
+ "--remove-path", action="store_true", help="remove path from filename"
+ )
args = argparser.parse_args()
- jsonparser = JsonParser(logger, args.files,
- simple_type_class=CSimpleType,
- enum_class=CEnum,
- union_class=CUnion,
- struct_type_class=CStructType,
- field_class=CField,
- message_class=CMessage,
- alias_class=CAlias)
+ jsonparser = JsonParser(
+ logger,
+ args.files,
+ simple_type_class=CSimpleType,
+ enum_class=CEnum,
+ union_class=CUnion,
+ struct_type_class=CStructType,
+ field_class=CField,
+ message_class=CMessage,
+ alias_class=CAlias,
+ )
# not using the model of having separate generated header and code files
# with generated symbols present in shared library (per discussion with
diff --git a/src/vpp-api/vapi/vapi_c_test.c b/src/vpp-api/vapi/vapi_c_test.c
index efa6a735611..7a0e462e40a 100644
--- a/src/vpp-api/vapi/vapi_c_test.c
+++ b/src/vpp-api/vapi/vapi_c_test.c
@@ -24,8 +24,11 @@
#include <check.h>
#include <vppinfra/string.h>
#include <vapi/vapi.h>
+#include <vapi/memclnt.api.vapi.h>
+#include <vapi/vlib.api.vapi.h>
#include <vapi/vpe.api.vapi.h>
#include <vapi/interface.api.vapi.h>
+#include <vapi/mss_clamp.api.vapi.h>
#include <vapi/l2.api.vapi.h>
#include <fake.api.vapi.h>
@@ -34,11 +37,13 @@
DEFINE_VAPI_MSG_IDS_VPE_API_JSON;
DEFINE_VAPI_MSG_IDS_INTERFACE_API_JSON;
+DEFINE_VAPI_MSG_IDS_MSS_CLAMP_API_JSON;
DEFINE_VAPI_MSG_IDS_L2_API_JSON;
DEFINE_VAPI_MSG_IDS_FAKE_API_JSON;
static char *app_name = NULL;
static char *api_prefix = NULL;
+static bool use_uds = false;
static const int max_outstanding_requests = 64;
static const int response_queue_size = 32;
@@ -61,8 +66,9 @@ START_TEST (test_invalid_values)
ck_assert_ptr_eq (NULL, sv);
rv = vapi_send (ctx, sv);
ck_assert_int_eq (VAPI_EINVAL, rv);
- rv = vapi_connect (ctx, app_name, api_prefix, max_outstanding_requests,
- response_queue_size, VAPI_MODE_BLOCKING, true);
+ rv =
+ vapi_connect_ex (ctx, app_name, api_prefix, max_outstanding_requests,
+ response_queue_size, VAPI_MODE_BLOCKING, true, use_uds);
ck_assert_int_eq (VAPI_OK, rv);
rv = vapi_send (ctx, NULL);
ck_assert_int_eq (VAPI_EINVAL, rv);
@@ -363,8 +369,9 @@ START_TEST (test_connect)
vapi_ctx_t ctx;
vapi_error_e rv = vapi_ctx_alloc (&ctx);
ck_assert_int_eq (VAPI_OK, rv);
- rv = vapi_connect (ctx, app_name, api_prefix, max_outstanding_requests,
- response_queue_size, VAPI_MODE_BLOCKING, true);
+ rv =
+ vapi_connect_ex (ctx, app_name, api_prefix, max_outstanding_requests,
+ response_queue_size, VAPI_MODE_BLOCKING, true, use_uds);
ck_assert_int_eq (VAPI_OK, rv);
rv = vapi_disconnect (ctx);
ck_assert_int_eq (VAPI_OK, rv);
@@ -380,8 +387,9 @@ setup_blocking (void)
{
vapi_error_e rv = vapi_ctx_alloc (&ctx);
ck_assert_int_eq (VAPI_OK, rv);
- rv = vapi_connect (ctx, app_name, api_prefix, max_outstanding_requests,
- response_queue_size, VAPI_MODE_BLOCKING, true);
+ rv =
+ vapi_connect_ex (ctx, app_name, api_prefix, max_outstanding_requests,
+ response_queue_size, VAPI_MODE_BLOCKING, true, use_uds);
ck_assert_int_eq (VAPI_OK, rv);
}
@@ -390,8 +398,9 @@ setup_nonblocking (void)
{
vapi_error_e rv = vapi_ctx_alloc (&ctx);
ck_assert_int_eq (VAPI_OK, rv);
- rv = vapi_connect (ctx, app_name, api_prefix, max_outstanding_requests,
- response_queue_size, VAPI_MODE_NONBLOCKING, true);
+ rv = vapi_connect_ex (ctx, app_name, api_prefix, max_outstanding_requests,
+ response_queue_size, VAPI_MODE_NONBLOCKING, true,
+ use_uds);
ck_assert_int_eq (VAPI_OK, rv);
}
@@ -479,6 +488,48 @@ sw_interface_dump_cb (struct vapi_ctx_s *ctx, void *callback_ctx,
return VAPI_OK;
}
+vapi_error_e
+vapi_mss_clamp_enable_disable_reply_cb (
+ struct vapi_ctx_s *ctx, void *callback_ctx, vapi_error_e rv, bool is_last,
+ vapi_payload_mss_clamp_enable_disable_reply *reply)
+{
+ bool *x = callback_ctx;
+ *x = true;
+ return VAPI_OK;
+}
+
+vapi_error_e
+vapi_mss_clamp_get_reply_cb (struct vapi_ctx_s *ctx, void *callback_ctx,
+ vapi_error_e rv, bool is_last,
+ vapi_payload_mss_clamp_get_reply *reply)
+{
+ int *counter = callback_ctx;
+ ck_assert_int_gt (*counter, 0); // make sure details were called first
+ ++*counter;
+ ck_assert_int_eq (is_last, true);
+ printf ("Got mss clamp reply error %d\n", rv);
+ ck_assert_int_eq (rv, VAPI_OK);
+ printf ("counter is %d", *counter);
+ return VAPI_OK;
+}
+
+vapi_error_e
+vapi_mss_clamp_get_details_cb (struct vapi_ctx_s *ctx, void *callback_ctx,
+ vapi_error_e rv, bool is_last,
+ vapi_payload_mss_clamp_details *details)
+{
+ int *counter = callback_ctx;
+ ++*counter;
+ if (!is_last)
+ {
+ printf ("Got ipv4 mss clamp to %u for sw_if_index %u\n",
+ details->ipv4_mss, details->sw_if_index);
+ ck_assert_int_eq (details->ipv4_mss, 1000 + details->sw_if_index);
+ }
+ printf ("counter is %d", *counter);
+ return VAPI_OK;
+}
+
START_TEST (test_loopbacks_1)
{
printf ("--- Create/delete loopbacks using blocking API ---\n");
@@ -501,8 +552,11 @@ START_TEST (test_loopbacks_1)
for (i = 0; i < num_ifs; ++i)
{
vapi_msg_create_loopback *cl = vapi_alloc_create_loopback (ctx);
- memcpy (cl->payload.mac_address, mac_addresses[i],
- sizeof (cl->payload.mac_address));
+ int j;
+ for (j = 0; j < 6; ++j)
+ {
+ cl->payload.mac_address[j] = mac_addresses[i][j];
+ }
vapi_error_e rv =
vapi_create_loopback (ctx, cl, loopback_create_cb, &clcs[i]);
ck_assert_int_eq (VAPI_OK, rv);
@@ -516,6 +570,37 @@ START_TEST (test_loopbacks_1)
mac_addresses[i][3], mac_addresses[i][4], mac_addresses[i][5],
sw_if_indexes[i]);
}
+
+ { // new context
+ for (int i = 0; i < num_ifs; ++i)
+ {
+ vapi_msg_mss_clamp_enable_disable *mc =
+ vapi_alloc_mss_clamp_enable_disable (ctx);
+ mc->payload.sw_if_index = sw_if_indexes[i];
+ mc->payload.ipv4_mss = 1000 + sw_if_indexes[i];
+ mc->payload.ipv4_direction = MSS_CLAMP_DIR_RX;
+ bool reply_ctx = false;
+ printf ("Set ipv4 mss clamp to %u for sw_if_index %u\n",
+ mc->payload.ipv4_mss, mc->payload.sw_if_index);
+ vapi_error_e rv = vapi_mss_clamp_enable_disable (
+ ctx, mc, vapi_mss_clamp_enable_disable_reply_cb, &reply_ctx);
+ ck_assert_int_eq (VAPI_OK, rv);
+ ck_assert_int_eq (reply_ctx, true);
+ }
+ }
+
+ { // new context
+ int counter = 0;
+ vapi_msg_mss_clamp_get *msg = vapi_alloc_mss_clamp_get (ctx);
+ msg->payload.sw_if_index = ~0;
+ vapi_error_e rv =
+ vapi_mss_clamp_get (ctx, msg, vapi_mss_clamp_get_reply_cb, &counter,
+ vapi_mss_clamp_get_details_cb, &counter);
+ printf ("counter is %d", counter);
+ ck_assert_int_eq (VAPI_OK, rv);
+ ck_assert_int_eq (counter, num_ifs + 1);
+ }
+
bool seen[num_ifs];
sw_interface_dump_ctx dctx = { false, num_ifs, sw_if_indexes, seen, 0 };
vapi_msg_sw_interface_dump *dump;
@@ -525,7 +610,7 @@ START_TEST (test_loopbacks_1)
{
dctx.last_called = false;
clib_memset (&seen, 0, sizeof (seen));
- dump = vapi_alloc_sw_interface_dump (ctx);
+ dump = vapi_alloc_sw_interface_dump (ctx, 0);
while (VAPI_EAGAIN ==
(rv =
vapi_sw_interface_dump (ctx, dump, sw_interface_dump_cb,
@@ -554,7 +639,7 @@ START_TEST (test_loopbacks_1)
}
dctx.last_called = false;
clib_memset (&seen, 0, sizeof (seen));
- dump = vapi_alloc_sw_interface_dump (ctx);
+ dump = vapi_alloc_sw_interface_dump (ctx, 0);
while (VAPI_EAGAIN ==
(rv =
vapi_sw_interface_dump (ctx, dump, sw_interface_dump_cb, &dctx)))
@@ -580,7 +665,8 @@ START_TEST (test_show_version_3)
;
ck_assert_int_eq (VAPI_OK, rv);
ck_assert_int_eq (0, called);
- rv = vapi_dispatch (ctx);
+ while (VAPI_EAGAIN == (rv = vapi_dispatch (ctx)))
+ ;
ck_assert_int_eq (VAPI_OK, rv);
ck_assert_int_eq (1, called);
called = 0;
@@ -614,14 +700,16 @@ START_TEST (test_show_version_4)
ck_assert_int_eq (0, contexts[j]);
}
}
- rv = vapi_dispatch (ctx);
+ while (VAPI_EAGAIN == (rv = vapi_dispatch (ctx)))
+ ;
ck_assert_int_eq (VAPI_OK, rv);
for (i = 0; i < num_req; ++i)
{
ck_assert_int_eq (1, contexts[i]);
}
clib_memset (contexts, 0, sizeof (contexts));
- rv = vapi_dispatch (ctx);
+ while (VAPI_EAGAIN == (rv = vapi_dispatch (ctx)))
+ ;
ck_assert_int_eq (VAPI_OK, rv);
for (i = 0; i < num_req; ++i)
{
@@ -654,15 +742,19 @@ START_TEST (test_loopbacks_2)
for (i = 0; i < num_ifs; ++i)
{
vapi_msg_create_loopback *cl = vapi_alloc_create_loopback (ctx);
- memcpy (cl->payload.mac_address, mac_addresses[i],
- sizeof (cl->payload.mac_address));
+ int j;
+ for (j = 0; j < 6; ++j)
+ {
+ cl->payload.mac_address[j] = mac_addresses[i][j];
+ }
while (VAPI_EAGAIN ==
(rv =
vapi_create_loopback (ctx, cl, loopback_create_cb, &clcs[i])))
;
ck_assert_int_eq (VAPI_OK, rv);
}
- rv = vapi_dispatch (ctx);
+ while (VAPI_EAGAIN == (rv = vapi_dispatch (ctx)))
+ ;
ck_assert_int_eq (VAPI_OK, rv);
for (i = 0; i < num_ifs; ++i)
{
@@ -676,7 +768,7 @@ START_TEST (test_loopbacks_2)
bool seen[num_ifs];
clib_memset (&seen, 0, sizeof (seen));
sw_interface_dump_ctx dctx = { false, num_ifs, sw_if_indexes, seen, 0 };
- vapi_msg_sw_interface_dump *dump = vapi_alloc_sw_interface_dump (ctx);
+ vapi_msg_sw_interface_dump *dump = vapi_alloc_sw_interface_dump (ctx, 0);
while (VAPI_EAGAIN ==
(rv =
vapi_sw_interface_dump (ctx, dump, sw_interface_dump_cb, &dctx)))
@@ -687,7 +779,8 @@ START_TEST (test_loopbacks_2)
}
clib_memset (&seen, 0, sizeof (seen));
ck_assert_int_eq (false, dctx.last_called);
- rv = vapi_dispatch (ctx);
+ while (VAPI_EAGAIN == (rv = vapi_dispatch (ctx)))
+ ;
ck_assert_int_eq (VAPI_OK, rv);
for (i = 0; i < num_ifs; ++i)
{
@@ -705,7 +798,8 @@ START_TEST (test_loopbacks_2)
;
ck_assert_int_eq (VAPI_OK, rv);
}
- rv = vapi_dispatch (ctx);
+ while (VAPI_EAGAIN == (rv = vapi_dispatch (ctx)))
+ ;
ck_assert_int_eq (VAPI_OK, rv);
for (i = 0; i < num_ifs; ++i)
{
@@ -714,12 +808,13 @@ START_TEST (test_loopbacks_2)
}
clib_memset (&seen, 0, sizeof (seen));
dctx.last_called = false;
- dump = vapi_alloc_sw_interface_dump (ctx);
+ dump = vapi_alloc_sw_interface_dump (ctx, 0);
while (VAPI_EAGAIN ==
(rv =
vapi_sw_interface_dump (ctx, dump, sw_interface_dump_cb, &dctx)))
;
- rv = vapi_dispatch (ctx);
+ while (VAPI_EAGAIN == (rv = vapi_dispatch (ctx)))
+ ;
ck_assert_int_eq (VAPI_OK, rv);
for (i = 0; i < num_ifs; ++i)
{
@@ -758,7 +853,8 @@ START_TEST (test_show_version_5)
int called = 0;
vapi_set_generic_event_cb (ctx, generic_cb, &called);
ck_assert_int_eq (VAPI_OK, rv);
- rv = vapi_dispatch_one (ctx);
+ while (VAPI_EAGAIN == (rv = vapi_dispatch_one (ctx)))
+ ;
ck_assert_int_eq (VAPI_OK, rv);
ck_assert_int_eq (1, called);
sv = vapi_alloc_show_version (ctx);
@@ -768,7 +864,8 @@ START_TEST (test_show_version_5)
;
ck_assert_int_eq (VAPI_OK, rv);
vapi_clear_generic_event_cb (ctx);
- rv = vapi_dispatch_one (ctx);
+ while (VAPI_EAGAIN == (rv = vapi_dispatch_one (ctx)))
+ ;
ck_assert_int_eq (VAPI_OK, rv);
ck_assert_int_eq (1, called); /* needs to remain unchanged */
}
@@ -805,7 +902,8 @@ START_TEST (test_no_response_1)
(rv = vapi_show_version (ctx, sv, show_version_cb, &called)))
;
ck_assert_int_eq (VAPI_OK, rv);
- rv = vapi_dispatch (ctx);
+ while (VAPI_EAGAIN == (rv = vapi_dispatch (ctx)))
+ ;
ck_assert_int_eq (VAPI_OK, rv);
ck_assert_int_eq (2, called);
}
@@ -829,14 +927,15 @@ START_TEST (test_no_response_2)
{
printf ("--- Simulate no response to dump message ---\n");
vapi_error_e rv;
- vapi_msg_sw_interface_dump *dump = vapi_alloc_sw_interface_dump (ctx);
+ vapi_msg_sw_interface_dump *dump = vapi_alloc_sw_interface_dump (ctx, 0);
dump->header._vl_msg_id = ~0; /* malformed ID causes vpp to drop the msg */
int no_called = 0;
while (VAPI_EAGAIN ==
(rv = vapi_sw_interface_dump (ctx, dump, no_msg_cb, &no_called)))
;
ck_assert_int_eq (VAPI_OK, rv);
- rv = vapi_dispatch (ctx);
+ while (VAPI_EAGAIN == (rv = vapi_dispatch (ctx)))
+ ;
ck_assert_int_eq (VAPI_OK, rv);
ck_assert_int_eq (1, no_called);
}
@@ -910,6 +1009,7 @@ START_TEST (test_api_strings)
/* Assert nul terminator NOT present */
ck_assert_int_eq (vec_len (vstr), strlen (str));
vec_free (vstr);
+ free (dump);
}
END_TEST;
@@ -970,13 +1070,23 @@ test_suite (void)
int
main (int argc, char *argv[])
{
- if (3 != argc)
+ if (4 != argc)
{
printf ("Invalid argc==`%d'\n", argc);
return EXIT_FAILURE;
}
app_name = argv[1];
api_prefix = argv[2];
+ if (!strcmp (argv[3], "shm"))
+ use_uds = 0;
+ else if (!strcmp (argv[3], "uds"))
+ use_uds = 1;
+ else
+ {
+ printf ("Unrecognised required argument '%s', expected 'uds' or 'shm'.",
+ argv[3]);
+ return EXIT_FAILURE;
+ }
printf ("App name: `%s', API prefix: `%s'\n", app_name, api_prefix);
int number_failed;
diff --git a/src/vpp-api/vapi/vapi_common.h b/src/vpp-api/vapi/vapi_common.h
index 7157f0a8e0d..69b9b788b51 100644
--- a/src/vpp-api/vapi/vapi_common.h
+++ b/src/vpp-api/vapi/vapi_common.h
@@ -22,37 +22,34 @@
extern "C" {
#endif
-typedef enum
-{
- VAPI_OK = 0, /**< success */
- VAPI_EINVAL, /**< invalid value encountered */
- VAPI_EAGAIN, /**< operation would block */
- VAPI_ENOTSUP, /**< operation not supported */
- VAPI_ENOMEM, /**< out of memory */
- VAPI_ENORESP, /**< no response to request */
- VAPI_EMAP_FAIL, /**< failure while mapping api */
- VAPI_ECON_FAIL, /**< failure while connecting to vpp */
- VAPI_EINCOMPATIBLE, /**< fundamental incompatibility while connecting to vpp
- (control ping/control ping reply mismatch) */
- VAPI_MUTEX_FAILURE, /**< failure manipulating internal mutex(es) */
- VAPI_EUSER, /**< user error used for breaking dispatch,
- never used by VAPI */
-} vapi_error_e;
-
-typedef enum
-{
- VAPI_MODE_BLOCKING = 1, /**< operations block until response received */
- VAPI_MODE_NONBLOCKING = 2, /**< operations never block */
-} vapi_mode_e;
-
-typedef enum
-{
- VAPI_WAIT_FOR_READ, /**< wait until some message is readable */
- VAPI_WAIT_FOR_WRITE, /**< wait until a message can be written */
- VAPI_WAIT_FOR_READ_WRITE, /**< wait until a read or write can be done */
-} vapi_wait_mode_e;
-
-typedef unsigned int vapi_msg_id_t;
+ typedef enum
+ {
+ VAPI_OK = 0, /**< success */
+ VAPI_EINVAL, /**< invalid value encountered */
+ VAPI_EAGAIN, /**< operation would block */
+ VAPI_ENOTSUP, /**< operation not supported */
+ VAPI_ENOMEM, /**< out of memory */
+ VAPI_ENORESP, /**< no response to request */
+ VAPI_EMAP_FAIL, /**< failure while mapping api */
+ VAPI_ECON_FAIL, /**< failure while connecting to vpp */
+ VAPI_EINCOMPATIBLE, /**< fundamental incompatibility while connecting to
+ vpp (control ping/control ping reply mismatch) */
+ VAPI_MUTEX_FAILURE, /**< failure manipulating internal mutex(es) */
+ VAPI_EUSER, /**< user error used for breaking dispatch,
+ never used by VAPI */
+ VAPI_ENOTSOCK, /**< vapi socket doesn't refer to a socket */
+ VAPI_EACCES, /**< no write permission for socket */
+ VAPI_ECONNRESET, /**< connection reset by peer*/
+ VAPI_ESOCK_FAILURE, /**< generic socket failure, check errno */
+ } vapi_error_e;
+
+ typedef enum
+ {
+ VAPI_MODE_BLOCKING = 1, /**< operations block until response received */
+ VAPI_MODE_NONBLOCKING = 2, /**< operations never block */
+ } vapi_mode_e;
+
+ typedef unsigned int vapi_msg_id_t;
#define VAPI_INVALID_MSG_ID ((vapi_msg_id_t)(~0))
diff --git a/src/vpp-api/vapi/vapi_cpp_gen.py b/src/vpp-api/vapi/vapi_cpp_gen.py
index c6aa009cbb9..165730ca4b8 100755
--- a/src/vpp-api/vapi/vapi_cpp_gen.py
+++ b/src/vpp-api/vapi/vapi_cpp_gen.py
@@ -4,8 +4,16 @@ import argparse
import os
import sys
import logging
-from vapi_c_gen import CField, CEnum, CStruct, CSimpleType, CStructType,\
- CMessage, json_to_c_header_name, CAlias
+from vapi_c_gen import (
+ CField,
+ CEnum,
+ CStruct,
+ CSimpleType,
+ CStructType,
+ CMessage,
+ json_to_c_header_name,
+ CAlias,
+)
from vapi_json_parser import JsonParser
@@ -25,58 +33,76 @@ class CppAlias(CAlias):
pass
-class CppSimpleType (CSimpleType):
+class CppSimpleType(CSimpleType):
pass
-class CppStructType (CStructType, CppStruct):
+class CppStructType(CStructType, CppStruct):
pass
-class CppMessage (CMessage):
+class CppMessage(CMessage):
def get_swap_to_be_template_instantiation(self):
- return "\n".join([
- "template <> inline void vapi_swap_to_be<%s>(%s *msg)" %
- (self.get_c_name(), self.get_c_name()),
- "{",
- " %s(msg);" % self.get_swap_to_be_func_name(),
- "}",
- ])
+ return "\n".join(
+ [
+ "template <> inline void vapi_swap_to_be<%s>(%s *msg)"
+ % (self.get_c_name(), self.get_c_name()),
+ "{",
+ " %s(msg);" % self.get_swap_to_be_func_name(),
+ "}",
+ ]
+ )
def get_swap_to_host_template_instantiation(self):
- return "\n".join([
- "template <> inline void vapi_swap_to_host<%s>(%s *msg)" %
- (self.get_c_name(), self.get_c_name()),
- "{",
- " %s(msg);" % self.get_swap_to_host_func_name(),
- "}",
- ])
+ return "\n".join(
+ [
+ "template <> inline void vapi_swap_to_host<%s>(%s *msg)"
+ % (self.get_c_name(), self.get_c_name()),
+ "{",
+ " %s(msg);" % self.get_swap_to_host_func_name(),
+ "}",
+ ]
+ )
def get_alloc_template_instantiation(self):
- return "\n".join([
- "template <> inline %s* vapi_alloc<%s%s>"
- "(Connection &con%s)" %
- (self.get_c_name(), self.get_c_name(),
- ", size_t" * len(self.get_alloc_vla_param_names()),
- "".join([", size_t %s" % n for n in
- self.get_alloc_vla_param_names()])
- ),
- "{",
- " %s* result = %s(con.vapi_ctx%s);" %
- (self.get_c_name(), self.get_alloc_func_name(),
- "".join([", %s" % n
- for n in self.get_alloc_vla_param_names()])),
- "#if VAPI_CPP_DEBUG_LEAKS",
- " con.on_shm_data_alloc(result);",
- "#endif",
- " return result;",
- "}",
- ])
+ return "\n".join(
+ [
+ "template <> inline %s* vapi_alloc<%s%s>"
+ "(Connection &con%s)"
+ % (
+ self.get_c_name(),
+ self.get_c_name(),
+ ", size_t" * len(self.get_alloc_vla_param_names()),
+ "".join(
+ [", size_t %s" % n for n in self.get_alloc_vla_param_names()]
+ ),
+ ),
+ "{",
+ " %s* result = %s(con.vapi_ctx%s);"
+ % (
+ self.get_c_name(),
+ self.get_alloc_func_name(),
+ "".join([", %s" % n for n in self.get_alloc_vla_param_names()]),
+ ),
+ "#if VAPI_CPP_DEBUG_LEAKS",
+ " con.on_shm_data_alloc(result);",
+ "#endif",
+ " return result;",
+ "}",
+ ]
+ )
def get_cpp_name(self):
return "%s%s" % (self.name[0].upper(), self.name[1:])
def get_req_template_name(self):
+ if self.has_stream_msg:
+ return "Stream<%s, %s, %s>" % (
+ self.get_c_name(),
+ self.reply.get_c_name(),
+ self.stream_msg.get_c_name(),
+ )
+
if self.reply_is_stream:
template = "Dump"
else:
@@ -86,51 +112,60 @@ class CppMessage (CMessage):
template,
self.get_c_name(),
self.reply.get_c_name(),
- "".join([", size_t"] * len(self.get_alloc_vla_param_names()))
+ "".join([", size_t"] * len(self.get_alloc_vla_param_names())),
)
def get_req_template_instantiation(self):
return "template class %s;" % self.get_req_template_name()
def get_type_alias(self):
- return "using %s = %s;" % (
- self.get_cpp_name(), self.get_req_template_name())
+ return "using %s = %s;" % (self.get_cpp_name(), self.get_req_template_name())
def get_reply_template_name(self):
return "Msg<%s>" % (self.get_c_name())
def get_reply_type_alias(self):
- return "using %s = %s;" % (
- self.get_cpp_name(), self.get_reply_template_name())
+ return "using %s = %s;" % (self.get_cpp_name(), self.get_reply_template_name())
def get_msg_class_instantiation(self):
return "template class Msg<%s>;" % self.get_c_name()
def get_get_msg_id_t_instantiation(self):
- return "\n".join([
- ("template <> inline vapi_msg_id_t vapi_get_msg_id_t<%s>()"
- % self.get_c_name()),
- "{",
- " return ::%s; " % self.get_msg_id_name(),
- "}",
- "",
- ("template <> inline vapi_msg_id_t "
- "vapi_get_msg_id_t<Msg<%s>>()" % self.get_c_name()),
- "{",
- " return ::%s; " % self.get_msg_id_name(),
- "}",
- ])
+ return "\n".join(
+ [
+ (
+ "template <> inline vapi_msg_id_t vapi_get_msg_id_t<%s>()"
+ % self.get_c_name()
+ ),
+ "{",
+ " return ::%s; " % self.get_msg_id_name(),
+ "}",
+ "",
+ (
+ "template <> inline vapi_msg_id_t "
+ "vapi_get_msg_id_t<Msg<%s>>()" % self.get_c_name()
+ ),
+ "{",
+ " return ::%s; " % self.get_msg_id_name(),
+ "}",
+ ]
+ )
def get_cpp_constructor(self):
- return '\n'.join([
- ('static void __attribute__((constructor)) '
- '__vapi_cpp_constructor_%s()'
- % self.name),
- '{',
- (' vapi::vapi_msg_set_msg_id<%s>(%s);' % (
- self.get_c_name(), self.get_msg_id_name())),
- '}',
- ])
+ return "\n".join(
+ [
+ (
+ "static void __attribute__((constructor)) "
+ "__vapi_cpp_constructor_%s()" % self.name
+ ),
+ "{",
+ (
+ " vapi::vapi_msg_set_msg_id<%s>(%s);"
+ % (self.get_c_name(), self.get_msg_id_name())
+ ),
+ "}",
+ ]
+ )
def gen_json_header(parser, logger, j, io, gen_h_prefix, add_debug_comments):
@@ -139,7 +174,8 @@ def gen_json_header(parser, logger, j, io, gen_h_prefix, add_debug_comments):
sys.stdout = io
d, f = os.path.split(j)
include_guard = "__included_hpp_%s" % (
- f.replace(".", "_").replace("/", "_").replace("-", "_"))
+ f.replace(".", "_").replace("/", "_").replace("-", "_").replace("@", "_")
+ )
print("#ifndef %s" % include_guard)
print("#define %s" % include_guard)
print("")
@@ -167,7 +203,7 @@ def gen_json_header(parser, logger, j, io, gen_h_prefix, add_debug_comments):
print("/* m.get_cpp_constructor() */")
print("%s" % m.get_cpp_constructor())
print("")
- if not m.is_reply and not m.is_event:
+ if not m.is_reply and not m.is_event and not m.is_stream:
if add_debug_comments:
print("/* m.get_alloc_template_instantiation() */")
print("%s" % m.get_alloc_template_instantiation())
@@ -181,6 +217,8 @@ def gen_json_header(parser, logger, j, io, gen_h_prefix, add_debug_comments):
print("/* m.get_reply_type_alias() */")
print("%s" % m.get_reply_type_alias())
continue
+ if m.is_stream:
+ continue
if add_debug_comments:
print("/* m.get_req_template_instantiation() */")
print("%s" % m.get_req_template_instantiation())
@@ -201,8 +239,9 @@ def json_to_cpp_header_name(json_name):
raise Exception("Unexpected json name `%s'!" % json_name)
-def gen_cpp_headers(parser, logger, prefix, gen_h_prefix, remove_path,
- add_debug_comments=False):
+def gen_cpp_headers(
+ parser, logger, prefix, gen_h_prefix, remove_path, add_debug_comments=False
+):
if prefix == "" or prefix is None:
prefix = ""
else:
@@ -216,12 +255,11 @@ def gen_cpp_headers(parser, logger, prefix, gen_h_prefix, remove_path,
d, f = os.path.split(j)
else:
f = j
- with open('%s%s' % (prefix, json_to_cpp_header_name(f)), "w") as io:
- gen_json_header(parser, logger, j, io,
- gen_h_prefix, add_debug_comments)
+ with open("%s%s" % (prefix, json_to_cpp_header_name(f)), "w") as io:
+ gen_json_header(parser, logger, j, io, gen_h_prefix, add_debug_comments)
-if __name__ == '__main__':
+if __name__ == "__main__":
try:
verbose = int(os.getenv("V", 0))
except:
@@ -239,27 +277,36 @@ if __name__ == '__main__':
logger.setLevel(log_level)
argparser = argparse.ArgumentParser(description="VPP C++ API generator")
- argparser.add_argument('files', metavar='api-file', action='append',
- type=str, help='json api file'
- '(may be specified multiple times)')
- argparser.add_argument('--prefix', action='store', default=None,
- help='path prefix')
- argparser.add_argument('--gen-h-prefix', action='store', default=None,
- help='generated C header prefix')
- argparser.add_argument('--remove-path', action='store_true',
- help='remove path from filename')
+ argparser.add_argument(
+ "files",
+ metavar="api-file",
+ action="append",
+ type=str,
+ help="json api file" "(may be specified multiple times)",
+ )
+ argparser.add_argument("--prefix", action="store", default=None, help="path prefix")
+ argparser.add_argument(
+ "--gen-h-prefix", action="store", default=None, help="generated C header prefix"
+ )
+ argparser.add_argument(
+ "--remove-path", action="store_true", help="remove path from filename"
+ )
args = argparser.parse_args()
- jsonparser = JsonParser(logger, args.files,
- simple_type_class=CppSimpleType,
- struct_type_class=CppStructType,
- field_class=CppField,
- enum_class=CppEnum,
- message_class=CppMessage,
- alias_class=CppAlias)
-
- gen_cpp_headers(jsonparser, logger, args.prefix, args.gen_h_prefix,
- args.remove_path)
+ jsonparser = JsonParser(
+ logger,
+ args.files,
+ simple_type_class=CppSimpleType,
+ struct_type_class=CppStructType,
+ field_class=CppField,
+ enum_class=CppEnum,
+ message_class=CppMessage,
+ alias_class=CppAlias,
+ )
+
+ gen_cpp_headers(
+ jsonparser, logger, args.prefix, args.gen_h_prefix, args.remove_path
+ )
for e in jsonparser.exceptions:
logger.warning(e)
diff --git a/src/vpp-api/vapi/vapi_cpp_test.cpp b/src/vpp-api/vapi/vapi_cpp_test.cpp
index 1dd58f85484..918c7590b60 100644
--- a/src/vpp-api/vapi/vapi_cpp_test.cpp
+++ b/src/vpp-api/vapi/vapi_cpp_test.cpp
@@ -21,17 +21,21 @@
#include <assert.h>
#include <setjmp.h>
#include <check.h>
+#include <vapi/memclnt.api.vapi.h>
#include <vapi/vapi.hpp>
#include <vapi/vpe.api.vapi.hpp>
#include <vapi/interface.api.vapi.hpp>
+#include <vapi/mss_clamp.api.vapi.hpp>
#include <fake.api.vapi.hpp>
DEFINE_VAPI_MSG_IDS_VPE_API_JSON;
DEFINE_VAPI_MSG_IDS_INTERFACE_API_JSON;
+DEFINE_VAPI_MSG_IDS_MSS_CLAMP_API_JSON;
DEFINE_VAPI_MSG_IDS_FAKE_API_JSON;
static char *app_name = nullptr;
static char *api_prefix = nullptr;
+static bool use_uds = false;
static const int max_outstanding_requests = 32;
static const int response_queue_size = 32;
@@ -57,8 +61,9 @@ Connection con;
void setup (void)
{
- vapi_error_e rv = con.connect (
- app_name, api_prefix, max_outstanding_requests, response_queue_size);
+ vapi_error_e rv =
+ con.connect (app_name, api_prefix, max_outstanding_requests,
+ response_queue_size, true, use_uds);
ck_assert_int_eq (VAPI_OK, rv);
}
@@ -144,9 +149,53 @@ START_TEST (test_loopbacks_1)
}
{ // new context
+ for (int i = 0; i < num_ifs; ++i)
+ {
+ Mss_clamp_enable_disable d (con);
+ auto &req = d.get_request ().get_payload ();
+ req.sw_if_index = sw_if_indexes[i];
+ req.ipv4_mss = 1420;
+ req.ipv4_direction = vapi_enum_mss_clamp_dir::MSS_CLAMP_DIR_RX;
+ auto rv = d.execute ();
+ ck_assert_int_eq (VAPI_OK, rv);
+ WAIT_FOR_RESPONSE (d, rv);
+ ck_assert_int_eq (VAPI_OK, rv);
+ }
+ }
+
+ { // new context
+ bool seen[num_ifs] = { 0 };
+ Mss_clamp_get d (con);
+ d.get_request ().get_payload ().sw_if_index = ~0;
+ auto rv = d.execute ();
+ ck_assert_int_eq (VAPI_OK, rv);
+ WAIT_FOR_RESPONSE (d, rv);
+ ck_assert_int_eq (VAPI_OK, rv);
+ auto &rs = d.get_result_set ();
+ for (auto &r : rs)
+ {
+ auto &p = r.get_payload ();
+ ck_assert_int_eq (p.ipv4_mss, 1420);
+ printf ("tcp-clamp: sw_if_idx %u ip4-mss %d dir %d\n", p.sw_if_index,
+ p.ipv4_mss, p.ipv4_direction);
+ for (int i = 0; i < num_ifs; ++i)
+ {
+ if (sw_if_indexes[i] == p.sw_if_index)
+ {
+ ck_assert_int_eq (0, seen[i]);
+ seen[i] = true;
+ }
+ }
+ }
+ for (int i = 0; i < num_ifs; ++i)
+ {
+ ck_assert_int_eq (1, seen[i]);
+ }
+ }
+
+ { // new context
bool seen[num_ifs] = {0};
- Sw_interface_dump d (con);
- d.get_request ().get_payload ();
+ Sw_interface_dump d (con, 0);
auto rv = d.execute ();
ck_assert_int_eq (VAPI_OK, rv);
WAIT_FOR_RESPONSE (d, rv);
@@ -185,8 +234,7 @@ START_TEST (test_loopbacks_1)
}
{ // new context
- Sw_interface_dump d (con);
- d.get_request ().get_payload ();
+ Sw_interface_dump d (con, 0);
auto rv = d.execute ();
ck_assert_int_eq (VAPI_OK, rv);
WAIT_FOR_RESPONSE (d, rv);
@@ -207,7 +255,7 @@ END_TEST;
struct Create_loopback_cb
{
- Create_loopback_cb () : called{0}, sw_if_index{0} {};
+ Create_loopback_cb () : called{ 0 }, sw_if_index{ 0 }, seen{ false } {};
int called;
u32 sw_if_index;
bool seen;
@@ -222,7 +270,7 @@ struct Create_loopback_cb
struct Delete_loopback_cb
{
- Delete_loopback_cb () : called{0}, sw_if_index{0} {};
+ Delete_loopback_cb () : called{ 0 }, sw_if_index{ 0 }, seen{ false } {};
int called;
u32 sw_if_index;
bool seen;
@@ -301,8 +349,7 @@ START_TEST (test_loopbacks_2)
}
Sw_interface_dump_cb<num_ifs> swdcb (ccbs);
- Sw_interface_dump d (con, std::ref (swdcb));
- d.get_request ().get_payload ();
+ Sw_interface_dump d (con, 0, std::ref (swdcb));
auto rv = d.execute ();
ck_assert_int_eq (VAPI_OK, rv);
WAIT_FOR_RESPONSE (d, rv);
@@ -328,8 +375,7 @@ START_TEST (test_loopbacks_2)
}
{ // new context
- Sw_interface_dump d (con);
- d.get_request ().get_payload ();
+ Sw_interface_dump d (con, 0);
auto rv = d.execute ();
ck_assert_int_eq (VAPI_OK, rv);
WAIT_FOR_RESPONSE (d, rv);
@@ -408,14 +454,25 @@ Suite *test_suite (void)
int main (int argc, char *argv[])
{
- if (3 != argc)
+ if (4 != argc)
{
printf ("Invalid argc==`%d'\n", argc);
return EXIT_FAILURE;
}
app_name = argv[1];
api_prefix = argv[2];
- printf ("App name: `%s', API prefix: `%s'\n", app_name, api_prefix);
+ if (!strcmp (argv[3], "shm"))
+ use_uds = 0;
+ else if (!strcmp (argv[3], "uds"))
+ use_uds = 1;
+ else
+ {
+ printf ("Unrecognised required argument '%s', expected 'uds' or 'shm'.",
+ argv[3]);
+ return EXIT_FAILURE;
+ }
+ printf ("App name: `%s', API prefix: `%s', use unix sockets %d\n", app_name,
+ api_prefix, use_uds);
int number_failed;
Suite *s;
diff --git a/src/vpp-api/vapi/vapi_doc.md b/src/vpp-api/vapi/vapi_doc.md
deleted file mode 100644
index 0e7e29dde01..00000000000
--- a/src/vpp-api/vapi/vapi_doc.md
+++ /dev/null
@@ -1,155 +0,0 @@
-# VPP API module {#vapi_doc}
-
-## Overview
-
-VPP API module allows communicating with VPP over shared memory interface.
-The API consists of 3 parts:
-
-* common code - low-level API
-* generated code - high-level API
-* code generator - to generate your own high-level API e.g. for custom plugins
-
-### Common code
-
-#### C common code
-
-C common code represents the basic, low-level API, providing functions to
-connect/disconnect, perform message discovery and send/receive messages.
-The C variant is in vapi.h.
-
-#### C++ common code
-
-C++ is provided by vapi.hpp and contains high-level API templates,
-which are specialized by generated code.
-
-### Generated code
-
-Each API file present in the source tree is automatically translated to JSON
-file, which the code generator parses and generates either C (`vapi_c_gen.py`)
-or C++ (`vapi_cpp_gen.py`) code.
-
-This can then be included in the client application and provides convenient way
-to interact with VPP. This includes:
-
-* automatic byte-swapping
-* automatic request-response matching based on context
-* automatic casts to appropriate types (type-safety) when calling callbacks
-* automatic sending of control-pings for dump messages
-
-The API supports two modes of operation:
-
-* blocking
-* non-blocking
-
-In blocking mode, whenever an operation is initiated, the code waits until it
-can finish. This means that when sending a message, the call blocks until
-the message can be written to shared memory. Similarly, receiving a message
-blocks until a message becomes available. On higher level, this also means that
-when doing a request (e.g. `show_version`), the call blocks until a response
-comes back (e.g. `show_version_reply`).
-
-In non-blocking mode, these are decoupled, the API returns VAPI_EAGAIN whenever
-an operation cannot be performed and after sending a request, it's up to
-the client to wait for and process a response.
-
-### Code generator
-
-Python code generator comes in two flavors - C and C++ and generates high-level
-API headers. All the code is stored in the headers.
-
-## Usage
-
-### Low-level API
-
-Refer to inline API documentation in doxygen format in `vapi.h` header
-for description of functions. It's recommened to use the safer, high-level
-API provided by specialized headers (e.g. `vpe.api.vapi.h`
-or `vpe.api.vapi.hpp`).
-
-#### C high-level API
-
-##### Callbacks
-
-The C high-level API is strictly callback-based for maximum efficiency.
-Whenever an operation is initiated a callback with a callback context is part
-of that operation. The callback is then invoked when the response (or multiple
-responses) arrive which are tied to the request. Also, callbacks are invoked
-whenever an event arrives, if such callback is registered. All the pointers
-to responses/events point to shared memory and are immediately freed after
-callback finishes so the client needs to extract/copy any data in which it
-is interested in.
-
-#### Blocking mode
-
-In simple blocking mode, the whole operation (being a simple request or a dump)
-is finished and it's callback is called (potentially multiple times for dumps)
-during function call.
-
-Example pseudo-code for a simple request in this mode:
-
-`
-vapi_show_version(message, callback, callback_context)
-
-1. generate unique internal context and assign it to message.header.context
-2. byteswap the message to network byte order
-3. send message to vpp (message is now consumed and vpp will free it)
-4. create internal "outstanding request context" which stores the callback,
- callback context and the internal context value
-5. call dispatch, which in this mode receives and processes responses until
- the internal "outstanding requests" queue is empty. In blocking mode, this
- queue always contains at most one item.
-`
-
-**Note**: it's possible for different - unrelated callbacks to be called before
-the response callbacks is called in cases where e.g. events are stored
-in shared memory queue.
-
-#### Non-blocking mode
-
-In non-blocking mode, all the requests are only byte-swapped and the context
-information along with callbacks is stored locally (so in the above example,
-only steps 1-4 are executed and step 5 is skipped). Calling dispatch is up to
-the client application. This allows to alternate between sending/receiving
-messages or have a dedicated thread which calls dispatch.
-
-### C++ high level API
-
-#### Callbacks
-
-In C++ API, the response is automatically tied to the corresponding `Request`,
-`Dump` or `Event_registration` object. Optionally a callback might be specified,
-which then gets called when the response is received.
-
-**Note**: responses take up shared memory space and should be freed either
-manually (in case of result sets) or automatically (by destroying the object
-owning them) when no longer needed. Once a Request or Dump object was executed,
-it cannot be re-sent, since the request itself (stores in shared memory)
-is consumed by vpp and inaccessible (set to nullptr) anymore.
-
-#### Usage
-
-#### Requests & dumps
-
-0. Create on object of `Connection` type and call `connect()` to connect to vpp.
-1. Create an object of `Request` or `Dump` type using it's typedef (e.g.
- `Show_version`)
-2. Use `get_request()` to obtain and manipulate the underlying request if
- required.
-3. Issue `execute()` to send the request.
-4. Use either `wait_for_response()` or `dispatch()` to wait for the response.
-5. Use `get_response_state()` to get the state and `get_response()` to read
- the response.
-
-#### Events
-
-0. Create a `Connection` and execute the appropriate `Request` to subscribe to
- events (e.g. `Want_stats`)
-1. Create an `Event_registration` with a template argument being the type of
- event you are insterested in.
-2. Call `dispatch()` or `wait_for_response()` to wait for the event. A callback
- will be called when an event occurs (if passed to `Event_registration()`
- constructor). Alternatively, read the result set.
-
-**Note**: events stored in the result set take up space in shared memory
-and should be freed regularly (e.g. in the callback, once the event is
-processed).
diff --git a/src/vpp-api/vapi/vapi_doc.rst b/src/vpp-api/vapi/vapi_doc.rst
new file mode 100644
index 00000000000..4efbf2d9988
--- /dev/null
+++ b/src/vpp-api/vapi/vapi_doc.rst
@@ -0,0 +1,191 @@
+.. _vapi_doc:
+
+VPP API module
+==============
+
+Overview
+--------
+
+VPP API module allows communicating with VPP over shared memory
+interface. The API consists of 3 parts:
+
+- common code - low-level API
+- generated code - high-level API
+- code generator - to generate your own high-level API e.g. for custom
+ plugins
+
+Common code
+~~~~~~~~~~~
+
+C common code
+^^^^^^^^^^^^^
+
+C common code represents the basic, low-level API, providing functions
+to connect/disconnect, perform message discovery and send/receive
+messages. The C variant is in vapi.h.
+
+.. _c-common-code-1:
+
+C++ common code
+^^^^^^^^^^^^^^^
+
+C++ is provided by vapi.hpp and contains high-level API templates, which
+are specialized by generated code.
+
+Generated code
+~~~~~~~~~~~~~~
+
+Each API file present in the source tree is automatically translated to
+JSON file, which the code generator parses and generates either C
+(``vapi_c_gen.py``) or C++ (``vapi_cpp_gen.py``) code.
+
+This can then be included in the client application and provides
+convenient way to interact with VPP. This includes:
+
+- automatic byte-swapping
+- automatic request-response matching based on context
+- automatic casts to appropriate types (type-safety) when calling
+ callbacks
+- automatic sending of control-pings for dump messages
+
+The API supports two modes of operation:
+
+- blocking
+- non-blocking
+
+In blocking mode, whenever an operation is initiated, the code waits
+until it can finish. This means that when sending a message, the call
+blocks until the message can be written to shared memory. Similarly,
+receiving a message blocks until a message becomes available. On higher
+level, this also means that when doing a request
+(e.g. ``show_version``), the call blocks until a response comes back
+(e.g. ``show_version_reply``).
+
+In non-blocking mode, these are decoupled, the API returns VAPI_EAGAIN
+whenever an operation cannot be performed and after sending a request,
+it’s up to the client to wait for and process a response.
+
+Code generator
+~~~~~~~~~~~~~~
+
+Python code generator comes in two flavors - C and C++ and generates
+high-level API headers. All the code is stored in the headers.
+
+Usage
+-----
+
+Low-level API
+~~~~~~~~~~~~~
+
+Refer to inline API documentation in doxygen format in ``vapi.h`` header
+for description of functions. It’s recommended to use the safer,
+high-level API provided by specialized headers (e.g. ``vpe.api.vapi.h``
+or ``vpe.api.vapi.hpp``).
+
+C high-level API
+^^^^^^^^^^^^^^^^
+
+Callbacks
+'''''''''
+
+The C high-level API is strictly callback-based for maximum efficiency.
+Whenever an operation is initiated a callback with a callback context is
+part of that operation. The callback is then invoked when the response
+(or multiple responses) arrive which are tied to the request. Also,
+callbacks are invoked whenever an event arrives, if such callback is
+registered. All the pointers to responses/events point to shared memory
+and are immediately freed after callback finishes so the client needs to
+extract/copy any data in which it is interested in.
+
+Blocking mode
+^^^^^^^^^^^^^
+
+In simple blocking mode, the whole operation (being a simple request or
+a dump) is finished and it’s callback is called (potentially multiple
+times for dumps) during function call.
+
+Example pseudo-code for a simple request in this mode:
+
+\` vapi_show_version(message, callback, callback_context)
+
+1. generate unique internal context and assign it to
+ message.header.context
+2. byteswap the message to network byte order
+3. send message to vpp (message is now consumed and vpp will free it)
+4. create internal “outstanding request context” which stores the
+ callback, callback context and the internal context value
+5. call dispatch, which in this mode receives and processes responses
+ until the internal “outstanding requests” queue is empty. In blocking
+ mode, this queue always contains at most one item. \`
+
+**Note**: it’s possible for different - unrelated callbacks to be called
+before the response callbacks is called in cases where e.g. events are
+stored in shared memory queue.
+
+Non-blocking mode
+^^^^^^^^^^^^^^^^^
+
+In non-blocking mode, all the requests are only byte-swapped and the
+context information along with callbacks is stored locally (so in the
+above example, only steps 1-4 are executed and step 5 is skipped).
+Calling dispatch is up to the client application. This allows to
+alternate between sending/receiving messages or have a dedicated thread
+which calls dispatch.
+
+.. _c-high-level-api-1:
+
+C++ high level API
+~~~~~~~~~~~~~~~~~~
+
+.. _callbacks-1:
+
+Callbacks
+^^^^^^^^^
+
+In C++ API, the response is automatically tied to the corresponding
+``Request``, ``Dump`` or ``Event_registration`` object. Optionally a
+callback might be specified, which then gets called when the response is
+received.
+
+**Note**: responses take up shared memory space and should be freed
+either manually (in case of result sets) or automatically (by destroying
+the object owning them) when no longer needed. Once a Request or Dump
+object was executed, it cannot be re-sent, since the request itself
+(stores in shared memory) is consumed by vpp and inaccessible (set to
+nullptr) anymore.
+
+.. _usage-1:
+
+Usage
+^^^^^
+
+Requests & dumps
+^^^^^^^^^^^^^^^^
+
+0. Create on object of ``Connection`` type and call ``connect()`` to
+ connect to vpp.
+1. Create an object of ``Request`` or ``Dump`` type using it’s typedef
+ (e.g. ``Show_version``)
+2. Use ``get_request()`` to obtain and manipulate the underlying request
+ if required.
+3. Issue ``execute()`` to send the request.
+4. Use either ``wait_for_response()`` or ``dispatch()`` to wait for the
+ response.
+5. Use ``get_response_state()`` to get the state and ``get_response()``
+ to read the response.
+
+Events
+^^^^^^
+
+0. Create a ``Connection`` and execute the appropriate ``Request`` to
+ subscribe to events (e.g. ``Want_stats``)
+1. Create an ``Event_registration`` with a template argument being the
+ type of event you are interested in.
+2. Call ``dispatch()`` or ``wait_for_response()`` to wait for the event.
+ A callback will be called when an event occurs (if passed to
+ ``Event_registration()`` constructor). Alternatively, read the result
+ set.
+
+**Note**: events stored in the result set take up space in shared memory
+and should be freed regularly (e.g. in the callback, once the event is
+processed).
diff --git a/src/vpp-api/vapi/vapi_internal.h b/src/vpp-api/vapi/vapi_internal.h
index e9a9726d86e..ca47dd10459 100644
--- a/src/vpp-api/vapi/vapi_internal.h
+++ b/src/vpp-api/vapi/vapi_internal.h
@@ -82,6 +82,7 @@ typedef vapi_error_e (*vapi_cb_t) (struct vapi_ctx_s *, void *, vapi_error_e,
bool, void *);
typedef void (*generic_swap_fn_t) (void *payload);
+typedef int (*verify_msg_size_fn_t) (void *msg, uword buf_size);
typedef struct
{
@@ -92,7 +93,7 @@ typedef struct
bool has_context;
unsigned int context_offset;
unsigned int payload_offset;
- size_t size;
+ verify_msg_size_fn_t verify_msg_size;
generic_swap_fn_t swap_to_be;
generic_swap_fn_t swap_to_host;
vapi_msg_id_t id; /* assigned at run-time */
@@ -117,12 +118,21 @@ bool vapi_requests_full (vapi_ctx_t ctx);
size_t vapi_get_request_count (vapi_ctx_t ctx);
size_t vapi_get_max_request_count (vapi_ctx_t ctx);
u32 vapi_gen_req_context (vapi_ctx_t ctx);
-void vapi_store_request (vapi_ctx_t ctx, u32 context, bool is_dump,
- vapi_cb_t callback, void *callback_ctx);
+
+enum vapi_request_type
+{
+ VAPI_REQUEST_REG = 0,
+ VAPI_REQUEST_DUMP = 1,
+ VAPI_REQUEST_STREAM = 2,
+};
+
+void vapi_store_request (vapi_ctx_t ctx, u32 context,
+ vapi_msg_id_t response_id,
+ enum vapi_request_type type, vapi_cb_t callback,
+ void *callback_ctx);
int vapi_get_payload_offset (vapi_msg_id_t id);
void (*vapi_get_swap_to_host_func (vapi_msg_id_t id)) (void *payload);
void (*vapi_get_swap_to_be_func (vapi_msg_id_t id)) (void *payload);
-size_t vapi_get_message_size (vapi_msg_id_t id);
size_t vapi_get_context_offset (vapi_msg_id_t id);
bool vapi_msg_is_with_context (vapi_msg_id_t id);
size_t vapi_get_message_count();
diff --git a/src/vpp-api/vapi/vapi_json_parser.py b/src/vpp-api/vapi/vapi_json_parser.py
index 1383d456bf1..c06cb8cf77b 100644
--- a/src/vpp-api/vapi/vapi_json_parser.py
+++ b/src/vpp-api/vapi/vapi_json_parser.py
@@ -3,7 +3,7 @@
import json
-class ParseError (Exception):
+class ParseError(Exception):
pass
@@ -13,14 +13,12 @@ magic_suffix = "_t"
def remove_magic(what):
if what.startswith(magic_prefix) and what.endswith(magic_suffix):
- return what[len(magic_prefix): - len(magic_suffix)]
+ return what[len(magic_prefix) : -len(magic_suffix)]
return what
class Field(object):
-
- def __init__(self, field_name, field_type, array_len=None,
- nelem_field=None):
+ def __init__(self, field_name, field_type, array_len=None, nelem_field=None):
self.name = field_name
self.type = field_type
self.len = array_len
@@ -30,17 +28,23 @@ class Field(object):
if self.len is None:
return "Field(name: %s, type: %s)" % (self.name, self.type)
elif type(self.len) == dict:
- return "Field(name: %s, type: %s, length: %s)" % (self.name,
- self.type,
- self.len)
+ return "Field(name: %s, type: %s, length: %s)" % (
+ self.name,
+ self.type,
+ self.len,
+ )
elif self.len > 0:
- return "Field(name: %s, type: %s, length: %s)" % (self.name,
- self.type,
- self.len)
+ return "Field(name: %s, type: %s, length: %s)" % (
+ self.name,
+ self.type,
+ self.len,
+ )
else:
- return (
- "Field(name: %s, type: %s, variable length stored in: %s)" %
- (self.name, self.type, self.nelem_field))
+ return "Field(name: %s, type: %s, variable length stored in: %s)" % (
+ self.name,
+ self.type,
+ self.nelem_field,
+ )
def is_vla(self):
return self.nelem_field is not None
@@ -61,32 +65,38 @@ class Type(object):
return self.name
-class SimpleType (Type):
-
+class SimpleType(Type):
def has_vla(self):
return False
def get_msg_header_defs(struct_type_class, field_class, json_parser, logger):
return [
- struct_type_class(['msg_header1_t',
- ['u16', '_vl_msg_id'],
- ['u32', 'context'],
- ],
- json_parser, field_class, logger
- ),
- struct_type_class(['msg_header2_t',
- ['u16', '_vl_msg_id'],
- ['u32', 'client_index'],
- ['u32', 'context'],
- ],
- json_parser, field_class, logger
- ),
+ struct_type_class(
+ [
+ "msg_header1_t",
+ ["u16", "_vl_msg_id"],
+ ["u32", "context"],
+ ],
+ json_parser,
+ field_class,
+ logger,
+ ),
+ struct_type_class(
+ [
+ "msg_header2_t",
+ ["u16", "_vl_msg_id"],
+ ["u32", "client_index"],
+ ["u32", "context"],
+ ],
+ json_parser,
+ field_class,
+ logger,
+ ),
]
class Struct(object):
-
def __init__(self, name, fields):
self.name = name
self.fields = fields
@@ -112,7 +122,7 @@ class Enum(SimpleType):
def __str__(self):
return "Enum(%s, [%s])" % (
self.name,
- "], [" .join(["%s => %s" % (i, j) for i, j in self.value_pairs])
+ "], [".join(["%s => %s" % (i, j) for i, j in self.value_pairs]),
)
@@ -126,7 +136,7 @@ class Union(Type):
def __str__(self):
return "Union(%s, [%s])" % (
self.name,
- "], [" .join(["%s %s" % (i, j) for i, j in self.type_pairs])
+ "], [".join(["%s %s" % (i, j) for i, j in self.type_pairs]),
)
def has_vla(self):
@@ -134,7 +144,6 @@ class Union(Type):
class Message(object):
-
def __init__(self, logger, definition, json_parser):
struct_type_class = json_parser.struct_type_class
field_class = json_parser.field_class
@@ -149,23 +158,26 @@ class Message(object):
self.header = None
self.is_reply = json_parser.is_reply(self.name)
self.is_event = json_parser.is_event(self.name)
+ self.is_stream = json_parser.is_stream(self.name)
fields = []
- for header in get_msg_header_defs(struct_type_class, field_class,
- json_parser, logger):
+ for header in get_msg_header_defs(
+ struct_type_class, field_class, json_parser, logger
+ ):
logger.debug("Probing header `%s'" % header.name)
if header.is_part_of_def(m[1:]):
self.header = header
logger.debug("Found header `%s'" % header.name)
- fields.append(field_class(field_name='header',
- field_type=self.header))
+ fields.append(field_class(field_name="header", field_type=self.header))
ignore = False
break
if ignore and not self.is_event and not self.is_reply:
- raise ParseError("While parsing message `%s': could not find all "
- "common header fields" % name)
+ raise ParseError(
+ "While parsing message `%s': could not find all "
+ "common header fields" % name
+ )
for field in m[1:]:
- if isinstance(field, dict) and 'crc' in field:
- self.crc = field['crc']
+ if isinstance(field, dict) and "crc" in field:
+ self.crc = field["crc"]
logger.debug("Found CRC `%s'" % self.crc)
continue
else:
@@ -175,25 +187,28 @@ class Message(object):
if any(type(n) is dict for n in field):
l -= 1
if l == 2:
- if self.header is not None and\
- self.header.has_field(field[1]):
+ if self.header is not None and self.header.has_field(field[1]):
continue
- p = field_class(field_name=field[1],
- field_type=field_type)
+ p = field_class(field_name=field[1], field_type=field_type)
elif l == 3:
- if field[2] == 0 and field[0] != 'string':
+ if field[2] == 0 and field[0] != "string":
raise ParseError(
"While parsing message `%s': variable length "
"array `%s' doesn't have reference to member "
- "containing the actual length" % (
- name, field[1]))
- if field[0] == 'string' and field[2] > 0:
- field_type = json_parser.lookup_type_like_id('u8')
-
- p = field_class(
- field_name=field[1],
- field_type=field_type,
- array_len=field[2])
+ "containing the actual length" % (name, field[1])
+ )
+ if field[0] == "string" and field[2] == 0:
+ field_type = json_parser.lookup_type_like_id("vl_api_string_t")
+ p = field_class(field_name=field[1], field_type=field_type)
+ else:
+ if field[0] == "string" and field[2] > 0:
+ field_type = json_parser.lookup_type_like_id("u8")
+
+ p = field_class(
+ field_name=field[1],
+ field_type=field_type,
+ array_len=field[2],
+ )
elif l == 4:
nelem_field = None
for f in fields:
@@ -203,17 +218,19 @@ class Message(object):
raise ParseError(
"While parsing message `%s': couldn't find "
"variable length array `%s' member containing "
- "the actual length `%s'" % (
- name, field[1], field[3]))
+ "the actual length `%s'" % (name, field[1], field[3])
+ )
p = field_class(
field_name=field[1],
field_type=field_type,
array_len=field[2],
- nelem_field=nelem_field)
+ nelem_field=nelem_field,
+ )
else:
- raise Exception("Don't know how to parse message "
- "definition for message `%s': `%s'" %
- (m, m[1:]))
+ raise Exception(
+ "Don't know how to parse message "
+ "definition for message `%s': `%s'" % (m, m[1:])
+ )
logger.debug("Parsed field `%s'" % p)
fields.append(p)
self.fields = fields
@@ -221,35 +238,54 @@ class Message(object):
logger.debug("Parsed message: %s" % self)
def __str__(self):
- return "Message(%s, [%s], {crc: %s}" % \
- (self.name,
- "], [".join([str(f) for f in self.fields]),
- self.crc)
-
+ return "Message(%s, [%s], {crc: %s}" % (
+ self.name,
+ "], [".join([str(f) for f in self.fields]),
+ self.crc,
+ )
-class StructType (Type, Struct):
+class StructType(Type, Struct):
def __init__(self, definition, json_parser, field_class, logger):
t = definition
logger.debug("Parsing struct definition `%s'" % t)
name = t[0]
fields = []
for field in t[1:]:
- if len(field) == 1 and 'crc' in field:
- self.crc = field['crc']
+ if len(field) == 1 and "crc" in field:
+ self.crc = field["crc"]
continue
field_type = json_parser.lookup_type_like_id(field[0])
logger.debug("Parsing type field `%s'" % field)
if len(field) == 2:
- p = field_class(field_name=field[1],
- field_type=field_type)
+ p = field_class(field_name=field[1], field_type=field_type)
elif len(field) == 3:
if field[2] == 0:
- raise ParseError("While parsing type `%s': array `%s' has "
- "variable length" % (name, field[1]))
- p = field_class(field_name=field[1],
- field_type=field_type,
- array_len=field[2])
+ if name == "vl_api_string_t":
+ p = None
+ for f in fields:
+ if f.name == "length":
+ nelem_field = f
+ p = field_class(
+ field_name=field[1],
+ field_type=field_type,
+ array_len=field[2],
+ nelem_field=nelem_field,
+ )
+ break
+ if p is None:
+ raise ParseError(
+ "While parsing type `%s': missing `length'" % name
+ )
+ else:
+ raise ParseError(
+ "While parsing type `%s': array `%s' has "
+ "variable length" % (name, field[1])
+ )
+ else:
+ p = field_class(
+ field_name=field[1], field_type=field_type, array_len=field[2]
+ )
elif len(field) == 4:
nelem_field = None
for f in fields:
@@ -259,23 +295,25 @@ class StructType (Type, Struct):
raise ParseError(
"While parsing message `%s': couldn't find "
"variable length array `%s' member containing "
- "the actual length `%s'" % (
- name, field[1], field[3]))
- p = field_class(field_name=field[1],
- field_type=field_type,
- array_len=field[2],
- nelem_field=nelem_field)
+ "the actual length `%s'" % (name, field[1], field[3])
+ )
+ p = field_class(
+ field_name=field[1],
+ field_type=field_type,
+ array_len=field[2],
+ nelem_field=nelem_field,
+ )
else:
raise ParseError(
"Don't know how to parse field `%s' of type definition "
- "for type `%s'" % (field, t))
+ "for type `%s'" % (field, t)
+ )
fields.append(p)
Type.__init__(self, name)
Struct.__init__(self, name, fields)
def __str__(self):
- return "StructType(%s, %s)" % (Type.__str__(self),
- Struct.__str__(self))
+ return "StructType(%s, %s)" % (Type.__str__(self), Struct.__str__(self))
def has_field(self, name):
return name in self.field_names
@@ -289,32 +327,57 @@ class StructType (Type, Struct):
if field[0] != p.type.name:
raise ParseError(
"Unexpected field type `%s' (should be `%s'), "
- "while parsing msg/def/field `%s/%s/%s'" %
- (field[0], p.type, p.name, definition, field))
+ "while parsing msg/def/field `%s/%s/%s'"
+ % (field[0], p.type, p.name, definition, field)
+ )
return True
class JsonParser(object):
- def __init__(self, logger, files, simple_type_class=SimpleType,
- enum_class=Enum, union_class=Union,
- struct_type_class=StructType, field_class=Field,
- message_class=Message, alias_class=Alias):
+ def __init__(
+ self,
+ logger,
+ files,
+ simple_type_class=SimpleType,
+ enum_class=Enum,
+ union_class=Union,
+ struct_type_class=StructType,
+ field_class=Field,
+ message_class=Message,
+ alias_class=Alias,
+ ):
self.services = {}
self.messages = {}
self.enums = {}
+ self.enumflags = {}
self.unions = {}
self.aliases = {}
self.types = {
- x: simple_type_class(x) for x in [
- 'i8', 'i16', 'i32', 'i64',
- 'u8', 'u16', 'u32', 'u64',
- 'f64', 'bool'
+ x: simple_type_class(x)
+ for x in [
+ "i8",
+ "i16",
+ "i32",
+ "i64",
+ "u8",
+ "u16",
+ "u32",
+ "u64",
+ "f64",
+ "bool",
]
}
- self.types['string'] = simple_type_class('vl_api_string_t')
+ self.types["string"] = simple_type_class("u8")
+ self.types["vl_api_string_t"] = struct_type_class(
+ ["vl_api_string_t", ["u32", "length"], ["u8", "buf", 0]],
+ self,
+ field_class,
+ logger,
+ )
self.replies = set()
self.events = set()
+ self.streams = set()
self.simple_type_class = simple_type_class
self.enum_class = enum_class
self.union_class = union_class
@@ -345,15 +408,17 @@ class JsonParser(object):
self.messages_by_json[path] = {}
with open(path) as f:
j = json.load(f)
- for k in j['services']:
+ for k in j["services"]:
if k in self.services:
raise ParseError("Duplicate service `%s'" % k)
- self.services[k] = j['services'][k]
+ self.services[k] = j["services"][k]
self.replies.add(self.services[k]["reply"])
if "events" in self.services[k]:
for x in self.services[k]["events"]:
self.events.add(x)
- for e in j['enums']:
+ if "stream_msg" in self.services[k]:
+ self.streams.add(self.services[k]["stream_msg"])
+ for e in j["enums"]:
name = e[0]
value_pairs = e[1:-1]
enumtype = self.types[e[-1]["enumtype"]]
@@ -361,18 +426,27 @@ class JsonParser(object):
self.enums[enum.name] = enum
self.logger.debug("Parsed enum: %s" % enum)
self.enums_by_json[path].append(enum)
+ for e in j["enumflags"]:
+ name = e[0]
+ value_pairs = e[1:-1]
+ enumtype = self.types[e[-1]["enumtype"]]
+ enum = self.enum_class(name, value_pairs, enumtype)
+ self.enums[enum.name] = enum
+ self.logger.debug("Parsed enumflag: %s" % enum)
+ self.enums_by_json[path].append(enum)
exceptions = []
progress = 0
last_progress = 0
while True:
- for u in j['unions']:
+ for u in j["unions"]:
name = u[0]
if name in self.unions:
progress = progress + 1
continue
try:
- type_pairs = [[self.lookup_type_like_id(t), n]
- for t, n in u[1:]]
+ type_pairs = [
+ [self.lookup_type_like_id(t), n] for t, n in u[1:]
+ ]
union = self.union_class(name, type_pairs, 0)
progress = progress + 1
except ParseError as e:
@@ -381,17 +455,16 @@ class JsonParser(object):
self.unions[union.name] = union
self.logger.debug("Parsed union: %s" % union)
self.unions_by_json[path].append(union)
- for t in j['types']:
+ for t in j["types"]:
if t[0] in self.types:
progress = progress + 1
continue
try:
- type_ = self.struct_type_class(t, self,
- self.field_class,
- self.logger)
+ type_ = self.struct_type_class(
+ t, self, self.field_class, self.logger
+ )
if type_.name in self.types:
- raise ParseError(
- "Duplicate type `%s'" % type_.name)
+ raise ParseError("Duplicate type `%s'" % type_.name)
progress = progress + 1
except ParseError as e:
exceptions.append(e)
@@ -399,16 +472,16 @@ class JsonParser(object):
self.types[type_.name] = type_
self.types_by_json[path].append(type_)
self.logger.debug("Parsed type: %s" % type_)
- for name, body in j['aliases'].items():
+ for name, body in j["aliases"].items():
if name in self.aliases:
progress = progress + 1
continue
- if 'length' in body:
- array_len = body['length']
+ if "length" in body:
+ array_len = body["length"]
else:
array_len = None
try:
- t = self.lookup_type_like_id(body['type'])
+ t = self.lookup_type_like_id(body["type"])
except ParseError as e:
exceptions.append(e)
continue
@@ -430,14 +503,13 @@ class JsonParser(object):
processed = []
while True:
exceptions = []
- for m in j['messages']:
+ for m in j["messages"]:
if m in processed:
continue
try:
msg = self.message_class(self.logger, m, self)
if msg.name in self.messages:
- raise ParseError(
- "Duplicate message `%s'" % msg.name)
+ raise ParseError("Duplicate message `%s'" % msg.name)
except ParseError as e:
exceptions.append(e)
continue
@@ -456,6 +528,8 @@ class JsonParser(object):
return self.types[name]
elif name in self.enums:
return self.enums[name]
+ elif name in self.enumflags:
+ return self.enumflags[name]
elif name in self.unions:
return self.unions[name]
elif name in self.aliases:
@@ -464,13 +538,16 @@ class JsonParser(object):
return self.types[mundane_name]
elif mundane_name in self.enums:
return self.enums[mundane_name]
+ elif mundane_name in self.enumflags:
+ return self.enumflags[mundane_name]
elif mundane_name in self.unions:
return self.unions[mundane_name]
elif mundane_name in self.aliases:
return self.aliases[mundane_name]
raise ParseError(
"Could not find type, enum or union by magic name `%s' nor by "
- "mundane name `%s'" % (name, mundane_name))
+ "mundane name `%s'" % (name, mundane_name)
+ )
def is_reply(self, message):
return message in self.replies
@@ -478,8 +555,22 @@ class JsonParser(object):
def is_event(self, message):
return message in self.events
+ def is_stream(self, message):
+ return message in self.streams
+
+ def has_stream_msg(self, message):
+ return (
+ message.name in self.services
+ and "stream_msg" in self.services[message.name]
+ )
+
+ def get_stream_msg(self, message):
+ if not self.has_stream_msg(message):
+ return None
+ return self.messages[self.services[message.name]["stream_msg"]]
+
def get_reply(self, message):
- return self.messages[self.services[message]['reply']]
+ return self.messages[self.services[message]["reply"]]
def finalize_parsing(self):
if len(self.messages) == 0:
@@ -489,21 +580,20 @@ class JsonParser(object):
remove = []
for n, m in j.items():
try:
- if not m.is_reply and not m.is_event:
+ if not m.is_reply and not m.is_event and not m.is_stream:
try:
m.reply = self.get_reply(n)
+ m.reply_is_stream = False
+ m.has_stream_msg = self.has_stream_msg(m)
if "stream" in self.services[m.name]:
- m.reply_is_stream = \
- self.services[m.name]["stream"]
- else:
- m.reply_is_stream = False
+ m.reply_is_stream = self.services[m.name]["stream"]
+ if m.has_stream_msg:
+ m.stream_msg = self.get_stream_msg(m)
m.reply.request = m
except:
- raise ParseError(
- "Cannot find reply to message `%s'" % n)
+ raise ParseError("Cannot find reply to message `%s'" % n)
except ParseError as e:
self.exceptions.append(e)
remove.append(n)
- self.messages_by_json[jn] = {
- k: v for k, v in j.items() if k not in remove}
+ self.messages_by_json[jn] = {k: v for k, v in j.items() if k not in remove}
diff --git a/src/vpp/CMakeLists.txt b/src/vpp/CMakeLists.txt
index 6cd2838f0e5..84144e4d059 100644
--- a/src/vpp/CMakeLists.txt
+++ b/src/vpp/CMakeLists.txt
@@ -28,6 +28,12 @@ add_custom_target(vpp_version_h
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/app/version.h
)
+install(
+ FILES ${CMAKE_CURRENT_BINARY_DIR}/app/version.h
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vpp/app
+ COMPONENT vpp-dev
+)
+
##############################################################################
# vpp binary
##############################################################################
@@ -38,6 +44,12 @@ configure_file(
${CMAKE_CURRENT_BINARY_DIR}/vnet/config.h
)
+install(
+ FILES ${CMAKE_CURRENT_BINARY_DIR}/vnet/config.h
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vpp/vnet
+ COMPONENT vpp-dev
+)
+
set(VPP_API_FILES
api/vpe_types.api
api/vpe.api
@@ -51,7 +63,7 @@ foreach(file ${VPP_API_FILES})
FILES ${CMAKE_CURRENT_BINARY_DIR}/${file}.h
${CMAKE_CURRENT_BINARY_DIR}/${file}_enum.h
${CMAKE_CURRENT_BINARY_DIR}/${file}_types.h
- DESTINATION include/vpp/${dir}
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vpp/${dir}
COMPONENT vpp-dev
)
endforeach()
@@ -60,8 +72,6 @@ set(VPP_SOURCES
vnet/main.c
app/vpe_cli.c
app/version.c
- stats/stat_segment.c
- stats/stat_segment_provider.c
api/api.c
api/json_format.c
api/types.c
@@ -73,7 +83,6 @@ if(VPP_API_TEST_BUILTIN)
api/api_main.c
api/plugin.c
api/types.c
- ../vnet/arp/arp_test.c
)
add_definitions(-DVPP_API_TEST_BUILTIN=1)
endif()
@@ -81,23 +90,16 @@ endif()
add_vpp_executable(vpp
ENABLE_EXPORTS
SOURCES ${VPP_SOURCES}
- LINK_LIBRARIES svm vlib vppinfra vlibmemory vnet Threads::Threads ${CMAKE_DL_LIBS}
+ LINK_LIBRARIES svm vlib vppinfra vlibmemory vnet Threads::Threads ${CMAKE_DL_LIBS} ${EPOLL_LIB}
DEPENDS vpp_version_h api_headers
)
-add_vpp_headers(vpp
- api/vpe_msg_enum.h
- api/vpe_all_api_h.h
- stats/stat_segment.h
- stats/stat_segment_shared.h
-)
-
##############################################################################
# vppctl binary
##############################################################################
add_vpp_executable(vppctl
SOURCES app/vppctl.c
- LINK_LIBRARIES vppinfra
+ LINK_LIBRARIES vppinfra ${EPOLL_LIB}
)
##############################################################################
@@ -120,7 +122,7 @@ add_vpp_executable(vpp_get_stats
add_vpp_executable(vpp_prometheus_export
SOURCES app/vpp_prometheus_export.c
- LINK_LIBRARIES vppapiclient vppinfra svm vlibmemoryclient
+ LINK_LIBRARIES vppapiclient vppinfra svm vlibmemoryclient ${EPOLL_LIB}
DEPENDS api_headers
)
@@ -132,8 +134,8 @@ add_vpp_library(vppmem_preload
LINK_LIBRARIES vppinfra
)
-install(FILES conf/startup.conf DESTINATION etc/vpp COMPONENT vpp)
-install(FILES conf/80-vpp.conf DESTINATION etc/sysctl.d COMPONENT vpp)
+install(FILES conf/startup.conf DESTINATION ${CMAKE_INSTALL_SYSCONFDIR}/vpp COMPONENT vpp)
+install(FILES conf/80-vpp.conf DESTINATION ${CMAKE_INSTALL_SYSCONFDIR}/sysctl.d COMPONENT vpp)
##############################################################################
# VAT2 plugins
@@ -142,6 +144,10 @@ add_vpp_test_library(vpp
${VPP_API_FILES}
)
+add_vat_test_library(vpp
+ api/api_test.c
+)
+
##############################################################################
# minimal interactive startup.conf - only if not present
##############################################################################
diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c
index b935c002228..6041b066578 100644
--- a/src/vpp/api/api.c
+++ b/src/vpp/api/api.c
@@ -58,38 +58,14 @@
#include <vnet/ip/format.h>
-#include <vpp/api/vpe_msg_enum.h>
#include <vpp/api/types.h>
-#include <vnet/classify/classify.api_enum.h>
-#include <vnet/ip/ip.api_enum.h>
-
-#define vl_typedefs /* define message structures */
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_typedefs
-#define vl_endianfun /* define message structures */
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_endianfun
-/* instantiate all the print functions we know about */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define vl_printfun
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_printfun
-#include <vlibapi/api_helper_macros.h>
-#define foreach_vpe_api_msg \
-_(CONTROL_PING, control_ping) \
-_(CLI, cli) \
-_(CLI_INBAND, cli_inband) \
-_(GET_NODE_INDEX, get_node_index) \
-_(ADD_NODE_NEXT, add_node_next) \
-_(SHOW_VERSION, show_version) \
-_(SHOW_THREADS, show_threads) \
-_(GET_NODE_GRAPH, get_node_graph) \
-_(GET_NEXT_INDEX, get_next_index) \
-_(LOG_DUMP, log_dump) \
-_(SHOW_VPE_SYSTEM_TIME, show_vpe_system_time) \
-_(GET_F64_ENDIAN_VALUE, get_f64_endian_value) \
-_(GET_F64_INCREMENT_BY_ONE, get_f64_increment_by_one) \
+#include <vpp/api/vpe.api_enum.h>
+#include <vpp/api/vpe.api_types.h>
+
+static u16 msg_id_base;
+#define REPLY_MSG_ID_BASE msg_id_base
+#include <vlibapi/api_helper_macros.h>
#define QUOTE_(x) #x
#define QUOTE(x) QUOTE_(x)
@@ -125,122 +101,6 @@ memclnt_delete_callback (u32 client_index)
VL_MSG_API_REAPER_FUNCTION (memclnt_delete_callback);
static void
-vl_api_control_ping_t_handler (vl_api_control_ping_t * mp)
-{
- vl_api_control_ping_reply_t *rmp;
- int rv = 0;
-
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_CONTROL_PING_REPLY,
- ({
- rmp->vpe_pid = ntohl (getpid());
- }));
- /* *INDENT-ON* */
-}
-
-static void
-shmem_cli_output (uword arg, u8 * buffer, uword buffer_bytes)
-{
- u8 **shmem_vecp = (u8 **) arg;
- u8 *shmem_vec;
- void *oldheap;
- u32 offset;
-
- shmem_vec = *shmem_vecp;
-
- offset = vec_len (shmem_vec);
-
- oldheap = vl_msg_push_heap ();
-
- vec_validate (shmem_vec, offset + buffer_bytes - 1);
-
- clib_memcpy (shmem_vec + offset, buffer, buffer_bytes);
-
- vl_msg_pop_heap (oldheap);
-
- *shmem_vecp = shmem_vec;
-}
-
-
-static void
-vl_api_cli_t_handler (vl_api_cli_t * mp)
-{
- vl_api_cli_reply_t *rp;
- vl_api_registration_t *reg;
- vlib_main_t *vm = vlib_get_main ();
- unformat_input_t input;
- u8 *shmem_vec = 0;
- void *oldheap;
-
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;;
-
- rp = vl_msg_api_alloc (sizeof (*rp));
- rp->_vl_msg_id = ntohs (VL_API_CLI_REPLY);
- rp->context = mp->context;
-
- unformat_init_vector (&input, (u8 *) (uword) mp->cmd_in_shmem);
-
- vlib_cli_input (vm, &input, shmem_cli_output, (uword) & shmem_vec);
-
- oldheap = vl_msg_push_heap ();
- vec_add1 (shmem_vec, 0);
- vl_msg_pop_heap (oldheap);
-
- rp->reply_in_shmem = (uword) shmem_vec;
-
- vl_api_send_msg (reg, (u8 *) rp);
-}
-
-static void
-inband_cli_output (uword arg, u8 * buffer, uword buffer_bytes)
-{
- u8 **mem_vecp = (u8 **) arg;
- u8 *mem_vec = *mem_vecp;
- u32 offset = vec_len (mem_vec);
-
- vec_validate (mem_vec, offset + buffer_bytes - 1);
- clib_memcpy (mem_vec + offset, buffer, buffer_bytes);
- *mem_vecp = mem_vec;
-}
-
-static void
-vl_api_cli_inband_t_handler (vl_api_cli_inband_t * mp)
-{
- vl_api_cli_inband_reply_t *rmp;
- int rv = 0;
- vlib_main_t *vm = vlib_get_main ();
- unformat_input_t input;
- u8 *out_vec = 0;
- u8 *cmd_vec = 0;
-
- if (vl_msg_api_get_msg_length (mp) <
- vl_api_string_len (&mp->cmd) + sizeof (*mp))
- {
- rv = -1;
- goto error;
- }
-
- cmd_vec = vl_api_from_api_to_new_vec (mp, &mp->cmd);
-
- unformat_init_string (&input, (char *) cmd_vec,
- vl_api_string_len (&mp->cmd));
- rv = vlib_cli_input (vm, &input, inband_cli_output, (uword) & out_vec);
- unformat_free (&input);
-
-error:
- /* *INDENT-OFF* */
- REPLY_MACRO3(VL_API_CLI_INBAND_REPLY, vec_len (out_vec),
- ({
- vl_api_vec_to_api_string(out_vec, &rmp->reply);
- }));
- /* *INDENT-ON* */
- vec_free (out_vec);
- vec_free (cmd_vec);
-}
-
-static void
vl_api_show_version_t_handler (vl_api_show_version_t * mp)
{
vl_api_show_version_reply_t *rmp;
@@ -249,7 +109,6 @@ vl_api_show_version_t_handler (vl_api_show_version_t * mp)
char *vpe_api_get_version (void);
char *vpe_api_get_build_date (void);
- /* *INDENT-OFF* */
REPLY_MACRO2(VL_API_SHOW_VERSION_REPLY,
({
strncpy ((char *) rmp->program, "vpe", ARRAY_LEN(rmp->program)-1);
@@ -260,209 +119,16 @@ vl_api_show_version_t_handler (vl_api_show_version_t * mp)
strncpy ((char *) rmp->build_date, vpe_api_get_build_date(),
ARRAY_LEN(rmp->build_date)-1);
}));
- /* *INDENT-ON* */
-}
-
-static void
-get_thread_data (vl_api_thread_data_t * td, int index)
-{
- vlib_worker_thread_t *w = vlib_worker_threads + index;
- td->id = htonl (index);
- if (w->name)
- strncpy ((char *) td->name, (char *) w->name, ARRAY_LEN (td->name) - 1);
- if (w->registration)
- strncpy ((char *) td->type, (char *) w->registration->name,
- ARRAY_LEN (td->type) - 1);
- td->pid = htonl (w->lwp);
- td->cpu_id = htonl (w->cpu_id);
- td->core = htonl (w->core_id);
- td->cpu_socket = htonl (w->numa_id);
-}
-
-static void
-vl_api_show_threads_t_handler (vl_api_show_threads_t * mp)
-{
- int count = 0;
-
-#if !defined(__powerpc64__)
- vl_api_registration_t *reg;
- vl_api_show_threads_reply_t *rmp;
- vl_api_thread_data_t *td;
- int i, msg_size = 0;
- count = vec_len (vlib_worker_threads);
- if (!count)
- return;
-
- msg_size = sizeof (*rmp) + sizeof (rmp->thread_data[0]) * count;
- reg = vl_api_client_index_to_registration (mp->client_index);
- if (!reg)
- return;
-
- rmp = vl_msg_api_alloc (msg_size);
- clib_memset (rmp, 0, msg_size);
- rmp->_vl_msg_id = htons (VL_API_SHOW_THREADS_REPLY);
- rmp->context = mp->context;
- rmp->count = htonl (count);
- td = rmp->thread_data;
-
- for (i = 0; i < count; i++)
- {
- get_thread_data (&td[i], i);
- }
-
- vl_api_send_msg (reg, (u8 *) rmp);
-#else
-
- /* unimplemented support */
- rv = -9;
- clib_warning ("power pc does not support show threads api");
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_SHOW_THREADS_REPLY,
- ({
- rmp->count = htonl(count);
- }));
- /* *INDENT-ON* */
-#endif
-}
-
-static void
-vl_api_get_node_index_t_handler (vl_api_get_node_index_t * mp)
-{
- vlib_main_t *vm = vlib_get_main ();
- vl_api_get_node_index_reply_t *rmp;
- vlib_node_t *n;
- int rv = 0;
- u32 node_index = ~0;
-
- n = vlib_get_node_by_name (vm, mp->node_name);
-
- if (n == 0)
- rv = VNET_API_ERROR_NO_SUCH_NODE;
- else
- node_index = n->index;
-
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_GET_NODE_INDEX_REPLY,
- ({
- rmp->node_index = htonl(node_index);
- }));
- /* *INDENT-ON* */
-}
-
-static void
-vl_api_get_next_index_t_handler (vl_api_get_next_index_t * mp)
-{
- vlib_main_t *vm = vlib_get_main ();
- vl_api_get_next_index_reply_t *rmp;
- vlib_node_t *node, *next_node;
- int rv = 0;
- u32 next_node_index = ~0, next_index = ~0;
- uword *p;
-
- node = vlib_get_node_by_name (vm, mp->node_name);
-
- if (node == 0)
- {
- rv = VNET_API_ERROR_NO_SUCH_NODE;
- goto out;
- }
-
- next_node = vlib_get_node_by_name (vm, mp->next_name);
-
- if (next_node == 0)
- {
- rv = VNET_API_ERROR_NO_SUCH_NODE2;
- goto out;
- }
- else
- next_node_index = next_node->index;
-
- p = hash_get (node->next_slot_by_node, next_node_index);
-
- if (p == 0)
- {
- rv = VNET_API_ERROR_NO_SUCH_ENTRY;
- goto out;
- }
- else
- next_index = p[0];
-
-out:
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_GET_NEXT_INDEX_REPLY,
- ({
- rmp->next_index = htonl(next_index);
- }));
- /* *INDENT-ON* */
}
static void
-vl_api_add_node_next_t_handler (vl_api_add_node_next_t * mp)
+vl_api_show_vpe_system_time_t_handler (vl_api_show_vpe_system_time_t *mp)
{
- vlib_main_t *vm = vlib_get_main ();
- vl_api_add_node_next_reply_t *rmp;
- vlib_node_t *n, *next;
int rv = 0;
- u32 next_index = ~0;
-
- n = vlib_get_node_by_name (vm, mp->node_name);
-
- if (n == 0)
- {
- rv = VNET_API_ERROR_NO_SUCH_NODE;
- goto out;
- }
-
- next = vlib_get_node_by_name (vm, mp->next_name);
-
- if (next == 0)
- rv = VNET_API_ERROR_NO_SUCH_NODE2;
- else
- next_index = vlib_node_add_next (vm, n->index, next->index);
-
-out:
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_ADD_NODE_NEXT_REPLY,
- ({
- rmp->next_index = htonl(next_index);
- }));
- /* *INDENT-ON* */
-}
-
-static void
-vl_api_get_node_graph_t_handler (vl_api_get_node_graph_t * mp)
-{
- int rv = 0;
- u8 *vector = 0;
- vlib_main_t *vm = vlib_get_main ();
- void *oldheap;
- vl_api_get_node_graph_reply_t *rmp;
- static vlib_node_t ***node_dups;
- static vlib_main_t **stat_vms;
-
- oldheap = vl_msg_push_heap ();
-
- /*
- * Keep the number of memcpy ops to a minimum (e.g. 1).
- */
- vec_validate (vector, 16384);
- vec_reset_length (vector);
-
- vlib_node_get_nodes (vm, 0 /* main threads */ ,
- 0 /* include stats */ ,
- 1 /* barrier sync */ ,
- &node_dups, &stat_vms);
- vector = vlib_node_serialize (vm, node_dups, vector, 1 /* include nexts */ ,
- 1 /* include stats */ );
-
- vl_msg_pop_heap (oldheap);
-
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_GET_NODE_GRAPH_REPLY,
- ({
- rmp->reply_in_shmem = (uword) vector;
- }));
- /* *INDENT-ON* */
+ vl_api_show_vpe_system_time_reply_t *rmp;
+ REPLY_MACRO2 (
+ VL_API_SHOW_VPE_SYSTEM_TIME_REPLY,
+ ({ rmp->vpe_system_time = clib_host_to_net_f64 (unix_time_now ()); }));
}
static void
@@ -481,7 +147,7 @@ show_log_details (vl_api_registration_t * reg, u32 context,
rmp = vl_msg_api_alloc (msg_size);
clib_memset (rmp, 0, msg_size);
- rmp->_vl_msg_id = ntohs (VL_API_LOG_DETAILS);
+ rmp->_vl_msg_id = ntohs (VL_API_LOG_DETAILS + msg_id_base);
rmp->context = context;
rmp->timestamp = clib_host_to_net_f64 (timestamp);
@@ -530,51 +196,6 @@ vl_api_log_dump_t_handler (vl_api_log_dump_t * mp)
}
-static void
-vl_api_show_vpe_system_time_t_handler (vl_api_show_vpe_system_time_t * mp)
-{
- int rv = 0;
- vl_api_show_vpe_system_time_reply_t *rmp;
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_SHOW_VPE_SYSTEM_TIME_REPLY,
- ({
- rmp->vpe_system_time = clib_host_to_net_f64 (unix_time_now ());
- }));
- /* *INDENT-ON* */
-}
-
-static void
-vl_api_get_f64_endian_value_t_handler (vl_api_get_f64_endian_value_t * mp)
-{
- int rv = 0;
- f64 one = 1.0;
- vl_api_get_f64_endian_value_reply_t *rmp;
- if (1.0 != clib_net_to_host_f64 (mp->f64_one))
- rv = VNET_API_ERROR_API_ENDIAN_FAILED;
-
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_GET_F64_ENDIAN_VALUE_REPLY,
- ({
- rmp->f64_one_result = clib_host_to_net_f64 (one);
- }));
- /* *INDENT-ON* */
-}
-
-static void
-vl_api_get_f64_increment_by_one_t_handler (vl_api_get_f64_increment_by_one_t *
- mp)
-{
- int rv = 0;
- vl_api_get_f64_increment_by_one_reply_t *rmp;
-
- /* *INDENT-OFF* */
- REPLY_MACRO2(VL_API_GET_F64_INCREMENT_BY_ONE_REPLY,
- ({
- rmp->f64_value = clib_host_to_net_f64 (clib_net_to_host_f64(mp->f64_value) + 1.0);
- }));
- /* *INDENT-ON* */
-}
-
#define BOUNCE_HANDLER(nn) \
static void vl_api_##nn##_t_handler ( \
vl_api_##nn##_t *mp) \
@@ -605,8 +226,6 @@ static void vl_api_##nn##_t_handler ( \
vl_msg_api_free (mp); \
}
-static void setup_message_id_table (api_main_t * am);
-
/*
* vpe_api_hookup
* Add vpe's API message handlers to the table.
@@ -614,40 +233,21 @@ static void setup_message_id_table (api_main_t * am);
* added the client registration handlers.
* See .../open-repo/vlib/memclnt_vlib.c:memclnt_process()
*/
+#include <vpp/api/vpe.api.c>
static clib_error_t *
vpe_api_hookup (vlib_main_t * vm)
{
api_main_t *am = vlibapi_get_main ();
-#define _(N,n) \
- vl_msg_api_set_handlers(VL_API_##N, #n, \
- vl_api_##n##_t_handler, \
- vl_noop_handler, \
- vl_api_##n##_t_endian, \
- vl_api_##n##_t_print, \
- sizeof(vl_api_##n##_t), 1);
- foreach_vpe_api_msg;
-#undef _
-
- /*
- * Trace space for classifier mask+match
- */
- am->api_trace_cfg[VL_API_CLASSIFY_ADD_DEL_TABLE].size += 5 * sizeof (u32x4);
- am->api_trace_cfg[VL_API_CLASSIFY_ADD_DEL_SESSION].size +=
- 5 * sizeof (u32x4);
-
- /*
- * Thread-safe API messages
- */
- am->is_mp_safe[VL_API_CONTROL_PING] = 1;
- am->is_mp_safe[VL_API_CONTROL_PING_REPLY] = 1;
- am->is_mp_safe[VL_API_IP_ROUTE_ADD_DEL] = 1;
- am->is_mp_safe[VL_API_GET_NODE_GRAPH] = 1;
-
/*
* Set up the (msg_name, crc, message-id) table
*/
- setup_message_id_table (am);
+ msg_id_base = setup_message_id_table ();
+
+ /* Mark messages as mp safe */
+ vl_api_set_msg_thread_safe (am, msg_id_base + VL_API_SHOW_VERSION, 1);
+ vl_api_set_msg_thread_safe (am, msg_id_base + VL_API_SHOW_VPE_SYSTEM_TIME,
+ 1);
return 0;
}
@@ -797,25 +397,6 @@ get_unformat_vnet_sw_interface (void)
return (void *) &unformat_vnet_sw_interface;
}
-#define vl_msg_name_crc_list
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_msg_name_crc_list
-
-static void
-setup_message_id_table (api_main_t * am)
-{
-#define _(id,n,crc) vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id);
- foreach_vl_msg_name_crc_memclnt;
- foreach_vl_msg_name_crc_vpe;
-#undef _
-
-#define vl_api_version_tuple(n,mj, mi, p) \
- vl_msg_api_add_version (am, #n, mj, mi, p);
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_api_version_tuple
-}
-
-
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vpp/api/api_main.c b/src/vpp/api/api_main.c
index 97dec9e1542..f2516752b9d 100644
--- a/src/vpp/api/api_main.c
+++ b/src/vpp/api/api_main.c
@@ -1,9 +1,11 @@
#include "vat.h"
+#include <dlfcn.h>
+#include <vat/plugin.h>
vat_main_t vat_main;
-void
-vat_suspend (vlib_main_t * vm, f64 interval)
+void __clib_no_tail_calls
+vat_suspend (vlib_main_t *vm, f64 interval)
{
vlib_process_suspend (vm, interval);
}
@@ -94,6 +96,30 @@ vat_plugin_hash_create (void)
}
static void
+vat_register_interface_dump (vat_main_t *vam)
+{
+ void *handle;
+ plugin_info_t *pi;
+
+ vec_foreach (pi, vat_plugin_main.plugin_info)
+ {
+ handle = dlsym (pi->handle, "api_sw_interface_dump");
+ if (handle)
+ {
+ vam->api_sw_interface_dump = handle;
+ break;
+ }
+ }
+
+ if (!vam->api_sw_interface_dump)
+ {
+ fformat (stderr,
+ "sw_interface_dump not found in interface_test plugin!\n");
+ exit (1);
+ }
+}
+
+static void
maybe_register_api_client (vat_main_t * vam)
{
vl_api_registration_t **regpp;
@@ -130,7 +156,8 @@ maybe_register_api_client (vat_main_t * vam)
am->shmem_hdr->application_restarts);
vam->vl_input_queue = am->shmem_hdr->vl_input_queue;
- api_sw_interface_dump (vam);
+ vat_register_interface_dump (vam);
+ vam->api_sw_interface_dump (vam);
}
static clib_error_t *
@@ -149,7 +176,7 @@ api_command_fn (vlib_main_t * vm,
maybe_register_api_client (vam);
/* vec_validated in the init routine */
- _vec_len (vam->inbuf) = 0;
+ vec_set_len (vam->inbuf, 0);
vam->input = &_input;
@@ -234,13 +261,12 @@ api_command_fn (vlib_main_t * vm,
if (vam->regenerate_interface_table)
{
vam->regenerate_interface_table = 0;
- api_sw_interface_dump (vam);
+ vam->api_sw_interface_dump (vam);
}
unformat_free (vam->input);
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (api_command, static) =
{
.path = "binary-api",
@@ -248,7 +274,6 @@ VLIB_CLI_COMMAND (api_command, static) =
.function = api_command_fn,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
void
api_cli_output (void *notused, const char *fmt, ...)
diff --git a/src/vpp/api/api_test.c b/src/vpp/api/api_test.c
new file mode 100644
index 00000000000..0893d485607
--- /dev/null
+++ b/src/vpp/api/api_test.c
@@ -0,0 +1,99 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2021 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vppinfra/error.h>
+#include <vpp/api/types.h>
+
+#include <vpp/api/vpe.api_enum.h>
+#include <vpp/api/vpe.api_types.h>
+
+#define __plugin_msg_base vpe_test_main.msg_id_base
+#include <vlibapi/vat_helper_macros.h>
+
+/* Declare message IDs */
+#include <vnet/format_fns.h>
+
+typedef struct
+{
+ /* API message ID base */
+ u16 msg_id_base;
+ vat_main_t *vat_main;
+} vpe_test_main_t;
+
+vpe_test_main_t vpe_test_main;
+
+static int
+api_show_version (vat_main_t *vam)
+{
+ vl_api_show_version_t *mp;
+ int ret;
+
+ M (SHOW_VERSION, mp);
+
+ S (mp);
+ W (ret);
+ return ret;
+}
+
+static int
+api_log_dump (vat_main_t *vam)
+{
+ /* Not yet implemented */
+ return -1;
+}
+
+static int
+api_show_vpe_system_time (vat_main_t *vam)
+{
+ /* Not yet implemented */
+ return -1;
+}
+
+static void
+vl_api_show_version_reply_t_handler (vl_api_show_version_reply_t *mp)
+{
+ vat_main_t *vam = &vat_main;
+ i32 retval = ntohl (mp->retval);
+
+ if (retval >= 0)
+ {
+ errmsg (" program: %s", mp->program);
+ errmsg (" version: %s", mp->version);
+ errmsg (" build date: %s", mp->build_date);
+ errmsg ("build directory: %s", mp->build_directory);
+ }
+ vam->retval = retval;
+ vam->result_ready = 1;
+}
+
+static void
+vl_api_log_details_t_handler (vl_api_log_details_t *mp)
+{
+ /* Not yet implemented */
+}
+
+static void
+vl_api_show_vpe_system_time_reply_t_handler (
+ vl_api_show_vpe_system_time_reply_t *mp)
+{
+ /* Not yet implemented */
+}
+
+#include <vpp/api/vpe.api_test.c>
diff --git a/src/vpp/api/gmon.c b/src/vpp/api/gmon.c
index ff561efbc07..e4ac1543cfc 100644
--- a/src/vpp/api/gmon.c
+++ b/src/vpp/api/gmon.c
@@ -73,7 +73,6 @@ get_significant_errors (gmon_main_t * gm)
int vm_index;
u64 significant_errors = 0;
- /* *INDENT-OFF* */
clib_bitmap_foreach (code, gm->sig_error_bitmap)
{
for (vm_index = 0; vm_index < vec_len (gm->my_vlib_mains); vm_index++)
@@ -85,7 +84,6 @@ get_significant_errors (gmon_main_t * gm)
em->counters_last_clear[code] : 0);
}
}
- /* *INDENT-ON* */
return (significant_errors);
}
@@ -145,13 +143,11 @@ gmon_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
return 0; /* not so much */
}
-/* *INDENT-OFF* */
VLIB_REGISTER_NODE (gmon_process_node,static) = {
.function = gmon_process,
.type = VLIB_NODE_TYPE_PROCESS,
.name = "gmon-process",
};
-/* *INDENT-ON* */
static clib_error_t *
gmon_init (vlib_main_t * vm)
@@ -288,13 +284,11 @@ set_significant_error_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (set_significant_error_command, static) = {
.path = "set significant error",
.short_help = "set significant error <counter-index-nnn> [disable]",
.function = set_significant_error_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vpp/api/json_format.h b/src/vpp/api/json_format.h
index 63217979ccd..3cfb87032e0 100644
--- a/src/vpp/api/json_format.h
+++ b/src/vpp/api/json_format.h
@@ -22,6 +22,11 @@
#include <vppinfra/clib.h>
#include <vppinfra/format.h>
+#ifdef __FreeBSD__
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#endif /* __FreeBSD__ */
#include <netinet/ip.h>
/* JSON value type */
diff --git a/src/vpp/api/plugin.c b/src/vpp/api/plugin.c
index 410d497a0aa..e7ca874dbc9 100644
--- a/src/vpp/api/plugin.c
+++ b/src/vpp/api/plugin.c
@@ -161,6 +161,7 @@ vat_load_new_plugins (plugin_main_t * pm)
if (p == 0)
{
vec_add2 (pm->plugin_info, pi, 1);
+ clib_memset (pi, 0, sizeof (*pi));
pi->name = plugin_name;
pi->filename = file_name;
pi->file_info = statb;
@@ -169,10 +170,9 @@ vat_load_new_plugins (plugin_main_t * pm)
{
vec_free (file_name);
vec_free (plugin_name);
- _vec_len (pm->plugin_info) = vec_len (pm->plugin_info) - 1;
+ vec_set_len (pm->plugin_info, vec_len (pm->plugin_info) - 1);
continue;
}
- clib_memset (pi, 0, sizeof (*pi));
hash_set_mem (pm->plugin_by_name_hash, plugin_name,
pi - pm->plugin_info);
}
diff --git a/src/vpp/api/test_client.c b/src/vpp/api/test_client.c
deleted file mode 100644
index 38afefa8601..00000000000
--- a/src/vpp/api/test_client.c
+++ /dev/null
@@ -1,1538 +0,0 @@
-/*
- *------------------------------------------------------------------
- * api.c - message handler registration
- *
- * Copyright (c) 2010 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <netinet/in.h>
-#include <signal.h>
-#include <pthread.h>
-#include <unistd.h>
-#include <time.h>
-#include <fcntl.h>
-#include <string.h>
-#include <vppinfra/clib.h>
-#include <vppinfra/vec.h>
-#include <vppinfra/hash.h>
-#include <vppinfra/bitmap.h>
-#include <vppinfra/fifo.h>
-#include <vppinfra/time.h>
-#include <vppinfra/heap.h>
-#include <vppinfra/pool.h>
-#include <vppinfra/format.h>
-#include <vppinfra/error.h>
-
-#include <vnet/vnet.h>
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vlibapi/api.h>
-#include <vlibmemory/api.h>
-
-#include <vpp/api/vpe_msg_enum.h>
-
-#include <vnet/ip/ip.h>
-#include <vnet/interface.h>
-
-#define f64_endian(a)
-#define f64_print(a,b)
-
-#define vl_typedefs /* define message structures */
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_typedefs
-
-#define vl_endianfun /* define message structures */
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_endianfun
-
-/* instantiate all the print functions we know about */
-#define vl_print(handle, ...)
-#define vl_printfun
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_printfun
-
-vl_shmem_hdr_t *shmem_hdr;
-
-typedef struct
-{
- int link_events_on;
- int stats_on;
- int oam_events_on;
-
- /* convenience */
- svm_queue_t *vl_input_queue;
- u32 my_client_index;
-} test_main_t;
-
-test_main_t test_main;
-
-/*
- * Satisfy external references when -lvlib is not available.
- */
-
-void
-vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...)
-{
- clib_warning ("vlib_cli_output called...");
-}
-
-u8 *
-format_ethernet_address (u8 * s, va_list * args)
-{
- u8 *a = va_arg (*args, u8 *);
-
- return format (s, "%02x:%02x:%02x:%02x:%02x:%02x",
- a[0], a[1], a[2], a[3], a[4], a[5]);
-}
-
-static void
-vl_api_sw_interface_details_t_handler (vl_api_sw_interface_details_t * mp)
-{
- char *duplex, *speed;
-
- switch (mp->link_duplex << VNET_HW_INTERFACE_FLAG_DUPLEX_SHIFT)
- {
- case VNET_HW_INTERFACE_FLAG_HALF_DUPLEX:
- duplex = "half";
- break;
- case VNET_HW_INTERFACE_FLAG_FULL_DUPLEX:
- duplex = "full";
- break;
- default:
- duplex = "bogus";
- break;
- }
- switch (mp->link_speed << VNET_HW_INTERFACE_FLAG_SPEED_SHIFT)
- {
- case VNET_HW_INTERFACE_FLAG_SPEED_10M:
- speed = "10Mbps";
- break;
- case VNET_HW_INTERFACE_FLAG_SPEED_100M:
- speed = "100Mbps";
- break;
- case VNET_HW_INTERFACE_FLAG_SPEED_1G:
- speed = "1Gbps";
- break;
- case VNET_HW_INTERFACE_FLAG_SPEED_2_5G:
- speed = "2.5Gbps";
- break;
- case VNET_HW_INTERFACE_FLAG_SPEED_5G:
- speed = "5Gbps";
- break;
- case VNET_HW_INTERFACE_FLAG_SPEED_10G:
- speed = "10Gbps";
- break;
- case VNET_HW_INTERFACE_FLAG_SPEED_20G:
- speed = "20Gbps";
- break;
- case VNET_HW_INTERFACE_FLAG_SPEED_25G:
- speed = "25Gbps";
- break;
- case VNET_HW_INTERFACE_FLAG_SPEED_40G:
- speed = "40Gbps";
- break;
- case VNET_HW_INTERFACE_FLAG_SPEED_50G:
- speed = "50Gbps";
- break;
- case VNET_HW_INTERFACE_FLAG_SPEED_56G:
- speed = "56Gbps";
- break;
- case VNET_HW_INTERFACE_FLAG_SPEED_100G:
- speed = "100Gbps";
- break;
- default:
- speed = "bogus";
- break;
- }
- fformat (stdout,
- "details: %s device_type %s sw_if_index %d sup_sw_if_index %d "
- "link_duplex %s link_speed %s", mp->interface_name,
- mp->interface_dev_type, ntohl (mp->sw_if_index),
- ntohl (mp->sup_sw_if_index), duplex, speed);
-
- if (mp->l2_address_length)
- fformat (stdout, " l2 address: %U\n",
- format_ethernet_address, mp->l2_address);
- else
- fformat (stdout, "\n");
-}
-
-static void
-vl_api_sw_interface_set_flags_t_handler (vl_api_sw_interface_set_flags_t * mp)
-{
- fformat (stdout, "set flags: sw_if_index %d, admin %s\n",
- ntohl (mp->sw_if_index), mp->admin_up_down ? "up" : "down");
-}
-
-static void
- vl_api_sw_interface_set_flags_reply_t_handler
- (vl_api_sw_interface_set_flags_reply_t * mp)
-{
- fformat (stdout, "set flags reply: reply %d\n", ntohl (mp->retval));
-}
-
-static void
- vl_api_want_interface_events_reply_t_handler
- (vl_api_want_interface_events_reply_t * mp)
-{
-}
-
-static void
-vl_api_want_stats_reply_t_handler (vl_api_want_stats_reply_t * mp)
-{
- fformat (stdout, "want stats reply %d\n", ntohl (mp->retval));
-}
-
-static void
-vl_api_want_oam_events_reply_t_handler (vl_api_want_oam_events_reply_t * mp)
-{
- fformat (stdout, "want oam reply %d\n", ntohl (mp->retval));
-}
-
-static void
-vl_api_ip_add_del_route_reply_t_handler (vl_api_ip_add_del_route_reply_t * mp)
-{
- fformat (stdout, "add_route reply %d\n", ntohl (mp->retval));
-}
-
-static void
- vl_api_sw_interface_add_del_address_reply_t_handler
- (vl_api_sw_interface_add_del_address_reply_t * mp)
-{
- fformat (stdout, "add_del_address reply %d\n", ntohl (mp->retval));
-}
-
-static void
- vl_api_sw_interface_set_table_reply_t_handler
- (vl_api_sw_interface_set_table_reply_t * mp)
-{
- fformat (stdout, "set_table reply %d\n", ntohl (mp->retval));
-}
-
-static void
-vl_api_tap_connect_reply_t_handler (vl_api_tap_connect_reply_t * mp)
-{
- fformat (stdout, "tap connect reply %d, sw_if_index %d\n",
- ntohl (mp->retval), ntohl (mp->sw_if_index));
-}
-
-static void
-vl_api_create_vlan_subif_reply_t_handler (vl_api_create_vlan_subif_reply_t *
- mp)
-{
- fformat (stdout, "create vlan subif reply %d, sw_if_index %d\n",
- ntohl (mp->retval), ntohl (mp->sw_if_index));
-}
-
-static void vl_api_proxy_arp_add_del_reply_t_handler
- (vl_api_proxy_arp_add_del_reply_t * mp)
-{
- fformat (stdout, "add del proxy arp reply %d\n", ntohl (mp->retval));
-}
-
-static void vl_api_proxy_arp_intfc_enable_disable_reply_t_handler
- (vl_api_proxy_arp_intfc_enable_disable_reply_t * mp)
-{
- fformat (stdout, "proxy arp intfc ena/dis reply %d\n", ntohl (mp->retval));
-}
-
-static void vl_api_ip_neighbor_add_del_reply_t_handler
- (vl_api_ip_neighbor_add_del_reply_t * mp)
-{
- fformat (stdout, "ip neighbor add del reply %d\n", ntohl (mp->retval));
-}
-
-#if 0
-static void
-vl_api_vnet_interface_counters_t_handler (vl_api_vnet_interface_counters_t *
- mp)
-{
- char *counter_name;
- u32 count, sw_if_index;
- int i;
-
- count = ntohl (mp->count);
- sw_if_index = ntohl (mp->first_sw_if_index);
- if (mp->is_combined == 0)
- {
- u64 *vp, v;
- vp = (u64 *) mp->data;
-
- switch (mp->vnet_counter_type)
- {
- case VNET_INTERFACE_COUNTER_DROP:
- counter_name = "drop";
- break;
- case VNET_INTERFACE_COUNTER_PUNT:
- counter_name = "punt";
- break;
- case VNET_INTERFACE_COUNTER_IP4:
- counter_name = "ip4";
- break;
- case VNET_INTERFACE_COUNTER_IP6:
- counter_name = "ip6";
- break;
- case VNET_INTERFACE_COUNTER_RX_NO_BUF:
- counter_name = "rx-no-buf";
- break;
- case VNET_INTERFACE_COUNTER_RX_MISS:
- counter_name = "rx-miss";
- break;
- case VNET_INTERFACE_COUNTER_RX_ERROR:
- counter_name = "rx-error";
- break;
- case VNET_INTERFACE_COUNTER_TX_ERROR:
- counter_name = "tx-error (fifo-full)";
- break;
- default:
- counter_name = "bogus";
- break;
- }
- for (i = 0; i < count; i++)
- {
- v = clib_mem_unaligned (vp, u64);
- v = clib_net_to_host_u64 (v);
- vp++;
- fformat (stdout, "%d.%s %lld\n", sw_if_index, counter_name, v);
- sw_if_index++;
- }
- }
- else
- {
- vlib_counter_t *vp;
- u64 packets, bytes;
- vp = (vlib_counter_t *) mp->data;
-
- switch (mp->vnet_counter_type)
- {
- case VNET_INTERFACE_COUNTER_RX:
- counter_name = "rx";
- break;
- case VNET_INTERFACE_COUNTER_TX:
- counter_name = "tx";
- break;
- default:
- counter_name = "bogus";
- break;
- }
- for (i = 0; i < count; i++)
- {
- packets = clib_mem_unaligned (&vp->packets, u64);
- packets = clib_net_to_host_u64 (packets);
- bytes = clib_mem_unaligned (&vp->bytes, u64);
- bytes = clib_net_to_host_u64 (bytes);
- vp++;
- fformat (stdout, "%d.%s.packets %lld\n",
- sw_if_index, counter_name, packets);
- fformat (stdout, "%d.%s.bytes %lld\n",
- sw_if_index, counter_name, bytes);
- sw_if_index++;
- }
- }
-}
-#endif
-
-/* Format an IP4 address. */
-u8 *
-format_ip4_address (u8 * s, va_list * args)
-{
- u8 *a = va_arg (*args, u8 *);
- return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]);
-}
-
-/* Format an IP4 route destination and length. */
-u8 *
-format_ip4_address_and_length (u8 * s, va_list * args)
-{
- u8 *a = va_arg (*args, u8 *);
- u8 l = va_arg (*args, u32);
- return format (s, "%U/%d", format_ip4_address, a, l);
-}
-
-static void
-vl_api_vnet_ip4_fib_counters_t_handler (vl_api_vnet_ip4_fib_counters_t * mp)
-{
- int i;
- vl_api_ip4_fib_counter_t *ctrp;
- u32 count;
-
- count = ntohl (mp->count);
-
- fformat (stdout, "fib id %d, count this msg %d\n",
- ntohl (mp->vrf_id), count);
-
- ctrp = mp->c;
- for (i = 0; i < count; i++)
- {
- fformat (stdout, "%U: %lld packets, %lld bytes\n",
- format_ip4_address_and_length, &ctrp->address,
- (u32) ctrp->address_length,
- clib_net_to_host_u64 (ctrp->packets),
- clib_net_to_host_u64 (ctrp->bytes));
- ctrp++;
- }
-}
-
-/* Format an IP6 address. */
-u8 *
-format_ip6_address (u8 * s, va_list * args)
-{
- ip6_address_t *a = va_arg (*args, ip6_address_t *);
- u32 i, i_max_n_zero, max_n_zeros, i_first_zero, n_zeros, last_double_colon;
-
- i_max_n_zero = ARRAY_LEN (a->as_u16);
- max_n_zeros = 0;
- i_first_zero = i_max_n_zero;
- n_zeros = 0;
- for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
- {
- u32 is_zero = a->as_u16[i] == 0;
- if (is_zero && i_first_zero >= ARRAY_LEN (a->as_u16))
- {
- i_first_zero = i;
- n_zeros = 0;
- }
- n_zeros += is_zero;
- if ((!is_zero && n_zeros > max_n_zeros)
- || (i + 1 >= ARRAY_LEN (a->as_u16) && n_zeros > max_n_zeros))
- {
- i_max_n_zero = i_first_zero;
- max_n_zeros = n_zeros;
- i_first_zero = ARRAY_LEN (a->as_u16);
- n_zeros = 0;
- }
- }
-
- last_double_colon = 0;
- for (i = 0; i < ARRAY_LEN (a->as_u16); i++)
- {
- if (i == i_max_n_zero && max_n_zeros > 1)
- {
- s = format (s, "::");
- i += max_n_zeros - 1;
- last_double_colon = 1;
- }
- else
- {
- s = format (s, "%s%x",
- (last_double_colon || i == 0) ? "" : ":",
- clib_net_to_host_u16 (a->as_u16[i]));
- last_double_colon = 0;
- }
- }
-
- return s;
-}
-
-/* Format an IP6 route destination and length. */
-u8 *
-format_ip6_address_and_length (u8 * s, va_list * args)
-{
- ip6_address_t *a = va_arg (*args, ip6_address_t *);
- u8 l = va_arg (*args, u32);
- return format (s, "%U/%d", format_ip6_address, a, l);
-}
-
-static void
-vl_api_vnet_ip6_fib_counters_t_handler (vl_api_vnet_ip6_fib_counters_t * mp)
-{
- int i;
- vl_api_ip6_fib_counter_t *ctrp;
- u32 count;
-
- count = ntohl (mp->count);
-
- fformat (stdout, "fib id %d, count this msg %d\n",
- ntohl (mp->vrf_id), count);
-
- ctrp = mp->c;
- for (i = 0; i < count; i++)
- {
- fformat (stdout, "%U: %lld packets, %lld bytes\n",
- format_ip6_address_and_length, &ctrp->address,
- (u32) ctrp->address_length,
- clib_net_to_host_u64 (ctrp->packets),
- clib_net_to_host_u64 (ctrp->bytes));
- ctrp++;
- }
-}
-
-static void
-vl_api_oam_event_t_handler (vl_api_oam_event_t * mp)
-{
- fformat (stdout, "OAM: %U now %s\n",
- format_ip4_address, &mp->dst_address,
- mp->state == 1 ? "alive" : "dead");
-}
-
-static void
-vl_api_oam_add_del_reply_t_handler (vl_api_oam_add_del_reply_t * mp)
-{
- fformat (stdout, "oam add del reply %d\n", ntohl (mp->retval));
-}
-
-static void
-vl_api_reset_fib_reply_t_handler (vl_api_reset_fib_reply_t * mp)
-{
- fformat (stdout, "fib reset reply %d\n", ntohl (mp->retval));
-}
-
-static void
-vl_api_dhcp_proxy_set_vss_reply_t_handler (vl_api_dhcp_proxy_set_vss_reply_t *
- mp)
-{
- fformat (stdout, "dhcp proxy set vss reply %d\n", ntohl (mp->retval));
-}
-
-static void
-vl_api_dhcp_proxy_config_reply_t_handler (vl_api_dhcp_proxy_config_reply_t *
- mp)
-{
- fformat (stdout, "dhcp proxy config reply %d\n", ntohl (mp->retval));
-}
-
-static void
-vl_api_set_ip_flow_hash_reply_t_handler (vl_api_set_ip_flow_hash_reply_t * mp)
-{
- fformat (stdout, "set ip flow hash reply %d\n", ntohl (mp->retval));
-}
-
-static void
- vl_api_sw_interface_ip6nd_ra_config_reply_t_handler
- (vl_api_sw_interface_ip6nd_ra_config_reply_t * mp)
-{
- fformat (stdout, "ip6 nd ra-config reply %d\n", ntohl (mp->retval));
-}
-
-static void
- vl_api_sw_interface_ip6nd_ra_prefix_reply_t_handler
- (vl_api_sw_interface_ip6nd_ra_prefix_reply_t * mp)
-{
- fformat (stdout, "ip6 nd ra-prefix reply %d\n", ntohl (mp->retval));
-}
-
-static void
- vl_api_sw_interface_ip6_enable_disable_reply_t_handler
- (vl_api_sw_interface_ip6_enable_disable_reply_t * mp)
-{
- fformat (stdout, "ip6 enable/disable reply %d\n", ntohl (mp->retval));
-}
-
-static void
- vl_api_sw_interface_ip6_set_link_local_address_reply_t_handler
- (vl_api_sw_interface_ip6_set_link_local_address_reply_t * mp)
-{
- fformat (stdout, "ip6 set link-local address reply %d\n",
- ntohl (mp->retval));
-}
-
-static void vl_api_create_loopback_reply_t_handler
- (vl_api_create_loopback_reply_t * mp)
-{
- fformat (stdout, "create loopback status %d, sw_if_index %d\n",
- ntohl (mp->retval), ntohl (mp->sw_if_index));
-}
-
-static void vl_api_create_loopback_instance_reply_t_handler
- (vl_api_create_loopback_instance_reply_t * mp)
-{
- fformat (stdout, "create loopback status %d, sw_if_index %d\n",
- ntohl (mp->retval), ntohl (mp->sw_if_index));
-}
-
-static void vl_api_l2_patch_add_del_reply_t_handler
- (vl_api_l2_patch_add_del_reply_t * mp)
-{
- fformat (stdout, "l2 patch reply %d\n", ntohl (mp->retval));
-}
-
-static void vl_api_sw_interface_set_l2_xconnect_reply_t_handler
- (vl_api_sw_interface_set_l2_xconnect_reply_t * mp)
-{
- fformat (stdout, "l2_xconnect reply %d\n", ntohl (mp->retval));
-}
-
-static void vl_api_sw_interface_set_l2_bridge_reply_t_handler
- (vl_api_sw_interface_set_l2_bridge_reply_t * mp)
-{
- fformat (stdout, "l2_bridge reply %d\n", ntohl (mp->retval));
-}
-
-static void
-noop_handler (void *notused)
-{
-}
-
-#define vl_api_vnet_ip4_fib_counters_t_endian noop_handler
-#define vl_api_vnet_ip4_fib_counters_t_print noop_handler
-#define vl_api_vnet_ip6_fib_counters_t_endian noop_handler
-#define vl_api_vnet_ip6_fib_counters_t_print noop_handler
-
-#define foreach_api_msg \
-_(SW_INTERFACE_DETAILS, sw_interface_details) \
-_(SW_INTERFACE_SET_FLAGS, sw_interface_set_flags) \
-_(SW_INTERFACE_SET_FLAGS_REPLY, sw_interface_set_flags_reply) \
-_(WANT_INTERFACE_EVENTS_REPLY, want_interface_events_reply) \
-_(WANT_STATS_REPLY, want_stats_reply) \
-_(WANT_OAM_EVENTS_REPLY, want_oam_events_reply) \
-_(OAM_EVENT, oam_event) \
-_(OAM_ADD_DEL_REPLY, oam_add_del_reply) \
-_(VNET_IP4_FIB_COUNTERS, vnet_ip4_fib_counters) \
-_(VNET_IP6_FIB_COUNTERS, vnet_ip6_fib_counters) \
-_(IP_ADD_DEL_ROUTE_REPLY, ip_add_del_route_reply) \
-_(SW_INTERFACE_ADD_DEL_ADDRESS_REPLY, sw_interface_add_del_address_reply) \
-_(SW_INTERFACE_SET_TABLE_REPLY, sw_interface_set_table_reply) \
-_(TAP_CONNECT_REPLY, tap_connect_reply) \
-_(CREATE_VLAN_SUBIF_REPLY, create_vlan_subif_reply) \
-_(PROXY_ARP_ADD_DEL_REPLY, proxy_arp_add_del_reply) \
-_(PROXY_ARP_INTFC_ENABLE_DISABLE_REPLY, proxy_arp_intfc_enable_disable_reply) \
-_(IP_NEIGHBOR_ADD_DEL_REPLY, ip_neighbor_add_del_reply) \
-_(RESET_FIB_REPLY, reset_fib_reply) \
-_(DHCP_PROXY_CONFIG_REPLY, dhcp_proxy_config_reply) \
-_(DHCP_PROXY_SET_VSS_REPLY, dhcp_proxy_set_vss_reply) \
-_(SET_IP_FLOW_HASH_REPLY, set_ip_flow_hash_reply) \
-_(SW_INTERFACE_IP6ND_RA_CONFIG_REPLY, sw_interface_ip6nd_ra_config_reply) \
-_(SW_INTERFACE_IP6ND_RA_PREFIX_REPLY, sw_interface_ip6nd_ra_prefix_reply) \
-_(SW_INTERFACE_IP6_ENABLE_DISABLE_REPLY, sw_interface_ip6_enable_disable_reply) \
-_(SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS_REPLY, sw_interface_ip6_set_link_local_address_reply) \
- _(CREATE_LOOPBACK_REPLY, create_loopback_reply) \
- _(CREATE_LOOPBACK_INSTANCE_REPLY, create_loopback_instance_reply) \
-_(L2_PATCH_ADD_DEL_REPLY, l2_patch_add_del_reply) \
-_(SW_INTERFACE_SET_L2_XCONNECT_REPLY, sw_interface_set_l2_xconnect_reply) \
-_(SW_INTERFACE_SET_L2_BRIDGE_REPLY, sw_interface_set_l2_bridge_reply)
-
-int
-connect_to_vpe (char *name)
-{
- int rv = 0;
-
- rv = vl_client_connect_to_vlib ("/vpe-api", name, 32);
-
-#define _(N,n) \
- vl_msg_api_set_handlers(VL_API_##N, #n, \
- vl_api_##n##_t_handler, \
- noop_handler, \
- vl_api_##n##_t_endian, \
- vl_api_##n##_t_print, \
- sizeof(vl_api_##n##_t), 1);
- foreach_api_msg;
-#undef _
-
- shmem_hdr = api_main.shmem_hdr;
-
- return rv;
-}
-
-int
-disconnect_from_vpe (void)
-{
- vl_client_disconnect_from_vlib ();
- return 0;
-}
-
-void
-link_up_down_enable_disable (test_main_t * tm, int enable)
-{
- vl_api_want_interface_events_t *mp;
-
- /* Request admin / link up down messages */
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_WANT_INTERFACE_EVENTS);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->enable_disable = enable;
- mp->pid = getpid ();
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
- tm->link_events_on = enable;
-}
-
-void
-stats_enable_disable (test_main_t * tm, int enable)
-{
- vl_api_want_stats_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_WANT_STATS);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->enable_disable = enable;
- mp->pid = getpid ();
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
- tm->stats_on = enable;
-}
-
-void
-oam_events_enable_disable (test_main_t * tm, int enable)
-{
- vl_api_want_oam_events_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_WANT_OAM_EVENTS);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->enable_disable = enable;
- mp->pid = getpid ();
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
- tm->oam_events_on = enable;
-}
-
-void
-oam_add_del (test_main_t * tm, int is_add)
-{
- vl_api_oam_add_del_t *mp;
- ip4_address_t tmp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_OAM_ADD_DEL);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->is_add = is_add;
-
- tmp.as_u32 = ntohl (0xc0a80101); /* 192.168.1.1 */
- clib_memcpy (mp->src_address, tmp.as_u8, 4);
-
- tmp.as_u32 = ntohl (0xc0a80103); /* 192.168.1.3 */
- clib_memcpy (mp->dst_address, tmp.as_u8, 4);
-
- mp->vrf_id = 0;
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-dump (test_main_t * tm)
-{
- vl_api_sw_interface_dump_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_DUMP);
- mp->client_index = tm->my_client_index;
- mp->name_filter_valid = 1;
- strncpy ((char *) mp->name_filter, "eth", sizeof (mp->name_filter) - 1);
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-add_del_ip4_route (test_main_t * tm, int enable_disable)
-{
- vl_api_ip_add_del_route_t *mp;
- u32 tmp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_IP_ADD_DEL_ROUTE);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->table_id = ntohl (0);
-
- mp->next_hop_sw_if_index = ntohl (5);
- mp->is_add = enable_disable;
- mp->next_hop_weight = 1;
-
- /* Next hop: 6.0.0.1 */
- tmp = ntohl (0x06000001);
- clib_memcpy (mp->next_hop_address, &tmp, sizeof (tmp));
-
- /* Destination: 10.0.0.1/32 */
- tmp = ntohl (0x0);
- clib_memcpy (mp->dst_address, &tmp, sizeof (tmp));
- mp->dst_address_length = 0;
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-add_del_ip6_route (test_main_t * tm, int enable_disable)
-{
- vl_api_ip_add_del_route_t *mp;
- u64 tmp[2];
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_IP_ADD_DEL_ROUTE);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->next_hop_sw_if_index = ntohl (5);
- mp->is_add = enable_disable;
- mp->is_ipv6 = 1;
- mp->next_hop_weight = 1;
- mp->dst_address_length = 64;
-
- /* add/del dabe::/64 via db01::11 */
-
- tmp[0] = clib_host_to_net_u64 (0xdabe000000000000ULL);
- tmp[1] = clib_host_to_net_u64 (0x0ULL);
- clib_memcpy (mp->dst_address, &tmp[0], 8);
- clib_memcpy (&mp->dst_address[8], &tmp[1], 8);
-
- tmp[0] = clib_host_to_net_u64 (0xdb01000000000000ULL);
- tmp[1] = clib_host_to_net_u64 (0x11ULL);
- clib_memcpy (mp->next_hop_address, &tmp[0], 8);
- clib_memcpy (&mp->next_hop_address[8], &tmp[1], 8);
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-add_del_interface_address (test_main_t * tm, int enable_disable)
-{
- vl_api_sw_interface_add_del_address_t *mp;
- u32 tmp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_ADD_DEL_ADDRESS);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->sw_if_index = ntohl (5);
- mp->is_add = enable_disable;
- mp->address_length = 8;
-
- tmp = ntohl (0x01020304);
- clib_memcpy (mp->address, &tmp, 4);
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-add_del_v6_interface_address (test_main_t * tm, int enable_disable)
-{
- vl_api_sw_interface_add_del_address_t *mp;
- u64 tmp[2];
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_ADD_DEL_ADDRESS);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->is_ipv6 = 1;
- mp->sw_if_index = ntohl (5);
- mp->is_add = enable_disable;
- mp->address_length = 64;
-
- tmp[0] = clib_host_to_net_u64 (0xdb01000000000000ULL);
- tmp[1] = clib_host_to_net_u64 (0x11ULL);
-
- clib_memcpy (mp->address, &tmp[0], 8);
- clib_memcpy (&mp->address[8], &tmp[1], 8);
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-del_all_interface_addresses (test_main_t * tm)
-{
- vl_api_sw_interface_add_del_address_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_ADD_DEL_ADDRESS);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->sw_if_index = ntohl (5);
- mp->del_all = 1;
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-set_interface_table (test_main_t * tm, int is_ipv6, u32 vrf_id)
-{
- vl_api_sw_interface_set_table_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SET_TABLE);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->sw_if_index = ntohl (5);
- mp->is_ipv6 = is_ipv6;
- mp->vrf_id = ntohl (vrf_id);
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-connect_unix_tap (test_main_t * tm, char *name)
-{
- vl_api_tap_connect_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_TAP_CONNECT);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- strncpy ((char *) mp->tap_name, name, sizeof (mp->tap_name) - 1);
- mp->use_random_mac = 1;
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-create_vlan_subif (test_main_t * tm, u32 vlan_id)
-{
- vl_api_create_vlan_subif_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_CREATE_VLAN_SUBIF);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->sw_if_index = ntohl (5);
- mp->vlan_id = ntohl (vlan_id);
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-add_del_proxy_arp (test_main_t * tm, int is_add)
-{
- vl_api_proxy_arp_add_del_t *mp;
- u32 tmp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_PROXY_ARP_ADD_DEL);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->proxy.vrf_id = ntohl (11);
- mp->is_add = is_add;
-
- /* proxy fib 11, 1.1.1.1 -> 1.1.1.10 */
- tmp = ntohl (0x01010101);
- clib_memcpy (mp->proxy.low_address, &tmp, 4);
-
- tmp = ntohl (0x0101010a);
- clib_memcpy (mp->proxy.hi_address, &tmp, 4);
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-proxy_arp_intfc_enable_disable (test_main_t * tm, int enable_disable)
-{
- vl_api_proxy_arp_intfc_enable_disable_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_PROXY_ARP_INTFC_ENABLE_DISABLE);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->sw_if_index = ntohl (6);
- mp->enable_disable = enable_disable;
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-add_ip4_neighbor (test_main_t * tm, int add_del)
-{
- vl_api_ip_neighbor_add_del_t *mp;
- u32 tmp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_IP_NEIGHBOR_ADD_DEL);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->sw_if_index = ntohl (6);
- mp->is_add = add_del;
-
- clib_memset (mp->mac_address, 0xbe, sizeof (mp->mac_address));
-
- tmp = ntohl (0x0101010a);
- clib_memcpy (mp->dst_address, &tmp, 4);
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-add_ip6_neighbor (test_main_t * tm, int add_del)
-{
- vl_api_ip_neighbor_add_del_t *mp;
- u64 tmp[2];
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_IP_NEIGHBOR_ADD_DEL);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->sw_if_index = ntohl (6);
- mp->is_add = add_del;
- mp->is_ipv6 = 1;
-
- clib_memset (mp->mac_address, 0xbe, sizeof (mp->mac_address));
-
- tmp[0] = clib_host_to_net_u64 (0xdb01000000000000ULL);
- tmp[1] = clib_host_to_net_u64 (0x11ULL);
-
- clib_memcpy (mp->dst_address, &tmp[0], 8);
- clib_memcpy (&mp->dst_address[8], &tmp[1], 8);
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-reset_fib (test_main_t * tm, u8 is_ip6)
-{
- vl_api_reset_fib_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_RESET_FIB);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->vrf_id = ntohl (11);
- mp->is_ipv6 = is_ip6;
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-dhcpv6_set_vss (test_main_t * tm)
-{
- vl_api_dhcp_proxy_set_vss_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_DHCP_PROXY_SET_VSS);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->oui = ntohl (6);
- mp->tbl_id = ntohl (60);
- mp->is_add = 1;
- mp->is_ipv6 = 1;
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-dhcpv4_set_vss (test_main_t * tm)
-{
- vl_api_dhcp_proxy_set_vss_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_DHCP_PROXY_SET_VSS);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->oui = ntohl (4);
- mp->tbl_id = ntohl (40);
- mp->is_add = 1;
- mp->is_ipv6 = 0;
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-dhcp_set_vss (test_main_t * tm)
-{
- dhcpv4_set_vss (tm);
- dhcpv6_set_vss (tm);
-}
-
-void
-dhcp_set_proxy (test_main_t * tm, int ipv6)
-{
- vl_api_dhcp_proxy_config_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_DHCP_PROXY_CONFIG);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->is_ipv6 = ipv6;
- mp->is_add = 1;
- mp->dhcp_server[0] = 0x20;
- mp->dhcp_server[1] = 0x01;
- mp->dhcp_server[2] = 0xab;
- mp->dhcp_server[3] = 0xcd;
- mp->dhcp_server[4] = 0x12;
- mp->dhcp_server[5] = 0x34;
- mp->dhcp_server[6] = 0xfe;
- mp->dhcp_server[7] = 0xdc;
- mp->dhcp_server[14] = 0;
- mp->dhcp_server[15] = 0x2;
-
- mp->dhcp_src_address[0] = 0x20;
- mp->dhcp_src_address[1] = 0x01;
- mp->dhcp_src_address[2] = 0xab;
- mp->dhcp_src_address[3] = 0xcd;
- mp->dhcp_src_address[4] = 0x12;
- mp->dhcp_src_address[5] = 0x34;
- mp->dhcp_src_address[6] = 0x56;
- mp->dhcp_src_address[7] = 0x78;
- mp->dhcp_src_address[14] = 0;
- mp->dhcp_src_address[15] = 0x2;
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-set_ip_flow_hash (test_main_t * tm, u8 is_ip6)
-{
- vl_api_set_ip_flow_hash_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_SET_IP_FLOW_HASH);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->vrf_id = 0;
- mp->is_ipv6 = is_ip6;
- mp->dst = 1;
- mp->reverse = 1;
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-ip6nd_ra_config (test_main_t * tm, int is_no)
-{
- vl_api_sw_interface_ip6nd_ra_config_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
-
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->sw_if_index = ntohl (5);
- mp->is_no = is_no;
-
- mp->suppress = 1;
-
-
- mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_IP6ND_RA_CONFIG);
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-ip6nd_ra_prefix (test_main_t * tm, int is_no)
-{
- vl_api_sw_interface_ip6nd_ra_prefix_t *mp;
- u64 tmp[2];
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
-
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->sw_if_index = ntohl (5);
- mp->is_no = is_no;
-
- mp->use_default = 1;
-
-
- tmp[0] = clib_host_to_net_u64 (0xdb01000000000000ULL);
- tmp[1] = clib_host_to_net_u64 (0x11ULL);
-
-
- clib_memcpy (mp->address, &tmp[0], 8);
- clib_memcpy (&mp->address[8], &tmp[1], 8);
-
- mp->address_length = 64;
-
-
- mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_IP6ND_RA_PREFIX);
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-ip6_enable_disable (test_main_t * tm, int enable)
-{
- vl_api_sw_interface_ip6_enable_disable_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
-
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->sw_if_index = ntohl (5);
- mp->enable = (enable == 1);;
-
- mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_IP6_ENABLE_DISABLE);
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-loop_create (test_main_t * tm)
-{
- vl_api_create_loopback_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
-
- mp->_vl_msg_id = ntohs (VL_API_CREATE_LOOPBACK);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-ip6_set_link_local_address (test_main_t * tm)
-{
- vl_api_sw_interface_ip6_set_link_local_address_t *mp;
- u64 tmp[2];
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
-
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->sw_if_index = ntohl (5);
-
- tmp[0] = clib_host_to_net_u64 (0xfe80000000000000ULL);
- tmp[1] = clib_host_to_net_u64 (0x11ULL);
-
- ip6_address_encode ((ip6_address_encode *) & tmp, mp->address);
-
- mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_IP6_SET_LINK_LOCAL_ADDRESS);
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-
-void
-set_flags (test_main_t * tm, int up_down)
-{
- vl_api_sw_interface_set_flags_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
-
- mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SET_FLAGS);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->sw_if_index = ntohl (5);
- mp->admin_up_down = up_down;
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-
-}
-
-void
-l2_patch_add_del (test_main_t * tm, int is_add)
-{
- vl_api_l2_patch_add_del_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_L2_PATCH_ADD_DEL);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->is_add = is_add;
- mp->rx_sw_if_index = ntohl (1);
- mp->tx_sw_if_index = ntohl (2);
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-l2_xconnect (test_main_t * tm)
-{
- vl_api_sw_interface_set_l2_xconnect_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SET_L2_XCONNECT);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->rx_sw_if_index = ntohl (5);
- mp->tx_sw_if_index = ntohl (6);
- mp->enable = 1;
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-void
-l2_bridge (test_main_t * tm)
-{
- vl_api_sw_interface_set_l2_bridge_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_SW_INTERFACE_SET_L2_BRIDGE);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
- mp->rx_sw_if_index = ntohl (5);
- mp->bd_id = ntohl (6);
- mp->bvi = ntohl (1);
- mp->shg = ntohl (0);
- mp->enable = 1;
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-int
-main (int argc, char **argv)
-{
- api_main_t *am = vlibapi_get_main ();
- test_main_t *tm = &test_main;
- int ch;
-
- connect_to_vpe ("test_client");
-
- tm->vl_input_queue = shmem_hdr->vl_input_queue;
- tm->my_client_index = am->my_client_index;
-
- fformat (stdout, "Type 'h' for help, 'q' to quit...\n");
-
- while (1)
- {
- ch = getchar ();
- switch (ch)
- {
- case 'q':
- goto done;
- case 'd':
- dump (tm);
- break;
- case 'L':
- link_up_down_enable_disable (tm, 1 /* enable_disable */ );
- break;
- case 'l':
- link_up_down_enable_disable (tm, 0 /* enable_disable */ );
- break;
- case 'S':
- stats_enable_disable (tm, 1 /* enable_disable */ );
- break;
- case 's':
- stats_enable_disable (tm, 0 /* enable_disable */ );
- break;
- case '3':
- add_del_ip4_route (tm, 0 /* add */ );
- break;
- case '4':
- add_del_ip4_route (tm, 1 /* add */ );
- break;
- case '5':
- add_del_ip6_route (tm, 0 /* add */ );
- break;
- case '6':
- add_del_ip6_route (tm, 1 /* add */ );
- break;
- case 'A':
- add_del_interface_address (tm, 1 /* add */ );
- break;
- case 'a':
- add_del_interface_address (tm, 0 /* add */ );
- break;
- case 'B':
- add_del_v6_interface_address (tm, 1 /* add */ );
- break;
- case 'b':
- add_del_v6_interface_address (tm, 0 /* add */ );
- break;
- case 'E':
- l2_patch_add_del (tm, 1 /* is_add */ );
- break;
- case 'e':
- l2_patch_add_del (tm, 0 /* is_add */ );
- break;
- case 'z':
- del_all_interface_addresses (tm);
- break;
- case 't':
- set_interface_table (tm, 0 /* is_ipv6 */ ,
- 11 /* my amp goes to 11 */ );
- break;
- case 'T':
- set_interface_table (tm, 1 /* is_ipv6 */ ,
- 12 /* my amp goes to 12 */ );
- break;
-
- case 'u':
- create_vlan_subif (tm, 123);
- break;
-
- case 'c':
- connect_unix_tap (tm, "foo");
- break;
-
- case 'n':
- add_ip4_neighbor (tm, 1 /* is_add */ );
- add_ip6_neighbor (tm, 1 /* is_add */ );
- break;
-
- case 'N':
- add_ip4_neighbor (tm, 0 /* is_add */ );
- add_ip6_neighbor (tm, 0 /* is_add */ );
- break;
-
- case 'p':
- add_del_proxy_arp (tm, 1 /* add */ );
- break;
-
- case 'i':
- proxy_arp_intfc_enable_disable (tm, 1 /* enable */ );
- break;
-
- case 'O':
- oam_events_enable_disable (tm, 0 /* enable */ );
- break;
-
- case 'o':
- oam_events_enable_disable (tm, 1 /* enable */ );
- break;
-
- case '0':
- oam_add_del (tm, 0 /* is_add */ );
- break;
-
- case '1':
- oam_add_del (tm, 1 /* is_add */ );
- break;
-
- case 'r':
- reset_fib (tm, 0 /* is_ip6 */ );
- break;
-
- case 'R':
- reset_fib (tm, 1 /* is_ip6 */ );
- break;
-
- case 'j':
- dhcp_set_vss (tm);
- break;
-
- case 'k':
- dhcp_set_proxy (tm, 0);
- break;
-
- case 'K':
- dhcp_set_proxy (tm, 1 /*ipv6 */ );
- break;
-
- case 'v':
- set_ip_flow_hash (tm, 0 /* is_ip6 */ );
- break;
-
- case 'V':
- ip6_set_link_local_address (tm);
- break;
-
- case 'w':
- ip6_enable_disable (tm, 1 /* enable */ );
- break;
-
- case 'W':
- ip6_enable_disable (tm, 0 /* disable */ );
- break;
-
- case 'x':
- ip6nd_ra_config (tm, 0 /* is_no */ );
- break;
- case 'X':
- ip6nd_ra_config (tm, 1 /* is_no */ );
- break;
- case 'y':
- ip6nd_ra_prefix (tm, 0 /* is_no */ );
- break;
- case 'Y':
- ip6nd_ra_prefix (tm, 1 /* is_no */ );
- break;
-
- case '7':
- loop_create (tm);
- break;
-
- case 'F':
- set_flags (tm, 1 /* up_down */ );
- break;
-
- case 'f':
- set_flags (tm, 0 /* up_down */ );
- break;
-
- case '@':
- l2_xconnect (tm);
- break;
-
- case '#':
- l2_bridge (tm);
- break;
-
- case 'h':
- fformat (stdout, "q=quit,d=dump,L=link evts on,l=link evts off\n");
- fformat (stdout, "S=stats on,s=stats off\n");
- fformat (stdout, "4=add v4 route, 3=del v4 route\n");
- fformat (stdout, "6=add v6 route, 5=del v6 route\n");
- fformat (stdout, "A=add v4 intfc route, a=del v4 intfc route\n");
- fformat (stdout, "B=add v6 intfc route, b=del v6 intfc route\n");
- fformat (stdout, "z=del all intfc routes\n");
- fformat (stdout, "t=set v4 intfc table, T=set v6 intfc table\n");
- fformat (stdout, "c=connect unix tap\n");
- fformat (stdout,
- "j=set dhcpv4 and v6 link-address/option-82 params\n");
- fformat (stdout, "k=set dhcpv4 relay agent params\n");
- fformat (stdout, "K=set dhcpv6 relay agent params\n");
- fformat (stdout, "E=add l2 patch, e=del l2 patch\n");
- fformat (stdout, "V=ip6 set link-local address \n");
- fformat (stdout, "w=ip6 enable \n");
- fformat (stdout, "W=ip6 disable \n");
- fformat (stdout, "x=ip6 nd config \n");
- fformat (stdout, "X=no ip6 nd config\n");
- fformat (stdout, "y=ip6 nd prefix \n");
- fformat (stdout, "Y=no ip6 nd prefix\n");
- fformat (stdout, "@=l2 xconnect\n");
- fformat (stdout, "#=l2 bridge\n");
-
- default:
- break;
- }
-
- }
-
-done:
-
- if (tm->link_events_on)
- link_up_down_enable_disable (tm, 0 /* enable */ );
- if (tm->stats_on)
- stats_enable_disable (tm, 0 /* enable */ );
- if (tm->oam_events_on)
- oam_events_enable_disable (tm, 0 /* enable */ );
-
- disconnect_from_vpe ();
- exit (0);
-}
-
-#undef vl_api_version
-#define vl_api_version(n,v) static u32 vpe_api_version = v;
-#include <vpp/api/vpe.api.h>
-#undef vl_api_version
-
-void
-vl_client_add_api_signatures (vl_api_memclnt_create_t * mp)
-{
- /*
- * Send the main API signature in slot 0. This bit of code must
- * match the checks in ../vpe/api/api.c: vl_msg_api_version_check().
- */
- mp->api_versions[0] = clib_host_to_net_u32 (vpe_api_version);
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vpp/api/test_ha.c b/src/vpp/api/test_ha.c
deleted file mode 100644
index 0cc1074031e..00000000000
--- a/src/vpp/api/test_ha.c
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- *------------------------------------------------------------------
- * api.c - message handler registration
- *
- * Copyright (c) 2010 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <netinet/in.h>
-#include <signal.h>
-#include <pthread.h>
-#include <unistd.h>
-#include <time.h>
-#include <fcntl.h>
-#include <string.h>
-#include <vppinfra/clib.h>
-#include <vppinfra/vec.h>
-#include <vppinfra/hash.h>
-#include <vppinfra/bitmap.h>
-#include <vppinfra/fifo.h>
-#include <vppinfra/time.h>
-#include <vppinfra/heap.h>
-#include <vppinfra/pool.h>
-#include <vppinfra/format.h>
-#include <vppinfra/error.h>
-
-#include <vnet/vnet.h>
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include <vlibapi/api.h>
-#include <vlibmemory/api.h>
-#include <svm/svm.h>
-#include <svm/svmdb.h>
-
-#include <vpp/api/vpe_msg_enum.h>
-
-#include <vnet/ip/ip.h>
-
-#define f64_endian(a)
-#define f64_print(a,b)
-
-#define vl_typedefs /* define message structures */
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_typedefs
-
-#define vl_endianfun /* define message structures */
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_endianfun
-
-/* instantiate all the print functions we know about */
-#define vl_print(handle, ...)
-#define vl_printfun
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_printfun
-
-vl_shmem_hdr_t *shmem_hdr;
-
-typedef struct
-{
- u32 pings_sent;
- u32 pings_replied;
- volatile u32 signal_received;
-
- /* convenience */
- svm_queue_t *vl_input_queue;
- u32 my_client_index;
- svmdb_client_t *svmdb_client;
-} test_main_t;
-
-test_main_t test_main;
-
-static void vl_api_control_ping_reply_t_handler
- (vl_api_control_ping_reply_t * mp)
-{
- test_main_t *tm = &test_main;
-
- fformat (stdout, "control ping reply from pid %d\n", ntohl (mp->vpe_pid));
- tm->pings_replied++;
-}
-
-void
-vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...)
-{
- clib_warning ("BUG: vlib_cli_output called...");
-}
-
-#define foreach_api_msg \
-_(CONTROL_PING_REPLY,control_ping_reply)
-
-void
-ping (test_main_t * tm)
-{
- vl_api_control_ping_t *mp;
-
- mp = vl_msg_api_alloc (sizeof (*mp));
- clib_memset (mp, 0, sizeof (*mp));
- mp->_vl_msg_id = ntohs (VL_API_CONTROL_PING);
- mp->client_index = tm->my_client_index;
- mp->context = 0xdeadbeef;
-
- vl_msg_api_send_shmem (tm->vl_input_queue, (u8 *) & mp);
-}
-
-static void
-noop_handler (void *notused)
-{
-}
-
-int
-connect_to_vpe (char *name)
-{
- int rv = 0;
- test_main_t *tm = &test_main;
- api_main_t *am = vlibapi_get_main ();
-
- rv = vl_client_connect_to_vlib ("/vpe-api", name, 32);
- if (rv < 0)
- return rv;
-
-#define _(N,n) \
- vl_msg_api_set_handlers(VL_API_##N, #n, \
- vl_api_##n##_t_handler, \
- noop_handler, \
- vl_api_##n##_t_endian, \
- vl_api_##n##_t_print, \
- sizeof(vl_api_##n##_t), 1);
- foreach_api_msg;
-#undef _
-
- shmem_hdr = api_main.shmem_hdr;
- tm->vl_input_queue = shmem_hdr->vl_input_queue;
- tm->my_client_index = am->my_client_index;
- return 0;
-}
-
-int
-disconnect_from_vpe (void)
-{
- vl_client_disconnect_from_vlib ();
-
- return 0;
-}
-
-void
-signal_handler (int signo)
-{
- test_main_t *tm = &test_main;
-
- tm->signal_received = 1;
-}
-
-
-int
-main (int argc, char **argv)
-{
- test_main_t *tm = &test_main;
- api_main_t *am = vlibapi_get_main ();
- u32 swt_pid = 0;
- int connected = 0;
-
- signal (SIGINT, signal_handler);
-
- while (1)
- {
- if (tm->signal_received)
- break;
-
- if (am->shmem_hdr)
- swt_pid = am->shmem_hdr->vl_pid;
-
- /* If kill returns 0, the vpe-f process is alive */
- if (kill (swt_pid, 0) == 0)
- {
- /* Try to connect */
- if (connected == 0)
- {
- fformat (stdout, "Connect to VPE-f\n");
- if (connect_to_vpe ("test_ha_client") >= 0)
- {
- tm->pings_sent = 0;
- tm->pings_replied = 0;
- connected = 1;
- }
- else
- {
- fformat (stdout, "Connect failed, sleep and retry...\n");
- sleep (1);
- continue;
- }
- }
- tm->pings_sent++;
- ping (tm);
-
- sleep (1);
-
- /* havent heard back in 3 seconds, disco / reco */
- if ((tm->pings_replied + 3) <= tm->pings_sent)
- {
- fformat (stdout, "VPE-f pid %d not responding\n", swt_pid);
- swt_pid = 0;
- disconnect_from_vpe ();
- connected = 0;
- }
- }
- else
- {
- if (connected)
- {
- fformat (stdout, "VPE-f pid %d died\n", swt_pid);
- swt_pid = 0;
- disconnect_from_vpe ();
- connected = 0;
- }
- sleep (1);
- }
- }
-
- fformat (stdout, "Signal received, graceful exit\n");
- disconnect_from_vpe ();
- exit (0);
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vpp/api/types.c b/src/vpp/api/types.c
index a30736fbb9f..92bbdb30ac9 100644
--- a/src/vpp/api/types.c
+++ b/src/vpp/api/types.c
@@ -88,8 +88,7 @@ format_vl_api_prefix (u8 * s, va_list * args)
{
const vl_api_prefix_t *pfx = va_arg (*args, vl_api_prefix_t *);
- s = format (s, "%U/%d", format_vl_api_address,
- &pfx->address, pfx->len);
+ s = format (s, "%U/%u", format_vl_api_address, &pfx->address, pfx->len);
return s;
}
@@ -106,7 +105,7 @@ u8 *
format_vl_api_version (u8 * s, va_list * args)
{
vl_api_version_t *ver = va_arg (*args, vl_api_version_t *);
- s = format(s, "%d.%d.%d", ver->major, ver->minor, ver->patch);
+ s = format (s, "%u.%u.%u", ver->major, ver->minor, ver->patch);
if (ver->pre_release[0] != 0)
{
s = format(s, "-%v", ver->pre_release);
@@ -176,13 +175,14 @@ unformat_vl_api_ip6_address (unformat_input_t * input, va_list * args)
}
uword
-unformat_vl_api_prefix (unformat_input_t * input, va_list * args)
+unformat_vl_api_prefix (unformat_input_t *input, va_list *args)
{
- vl_api_prefix_t *pfx = va_arg (*args, vl_api_prefix_t *);
+ vl_api_prefix_t *pfx = va_arg (*args, vl_api_prefix_t *);
+
+ if (unformat (input, "%U/%U", unformat_vl_api_address, &pfx->address,
+ unformat_u8, &pfx->len))
+ return (1);
- if (unformat (input, "%U/%d", unformat_vl_api_address, &pfx->address,
- &pfx->len))
- return (1);
return (0);
}
@@ -191,14 +191,14 @@ unformat_vl_api_mprefix (unformat_input_t * input, va_list * args)
{
vl_api_mprefix_t *pfx = va_arg (*args, vl_api_mprefix_t *);
- if (unformat (input, "%U/%d",
- unformat_vl_api_ip4_address, &pfx->grp_address.ip4,
- &pfx->grp_address_length))
- pfx->af = ADDRESS_IP4;
- else if (unformat (input, "%U/%d",
- unformat_vl_api_ip6_address, &pfx->grp_address.ip6,
- &pfx->grp_address_length))
- pfx->af = ADDRESS_IP6;
+ if (unformat (input, "%U/%U", unformat_vl_api_ip4_address,
+ &pfx->grp_address.ip4, unformat_u16,
+ &pfx->grp_address_length))
+ pfx->af = ADDRESS_IP4;
+ else if (unformat (input, "%U/%U", unformat_vl_api_ip6_address,
+ &pfx->grp_address.ip6, unformat_u16,
+ &pfx->grp_address_length))
+ pfx->af = ADDRESS_IP6;
else if (unformat (input, "%U %U",
unformat_vl_api_ip4_address, &pfx->src_address.ip4,
unformat_vl_api_ip4_address, &pfx->grp_address.ip4))
@@ -235,17 +235,14 @@ unformat_vl_api_mprefix (unformat_input_t * input, va_list * args)
uword unformat_vl_api_version (unformat_input_t * input, va_list * args)
{
-vl_api_version_t *ver = va_arg (*args, vl_api_version_t *);
+ vl_api_version_t *ver = va_arg (*args, vl_api_version_t *);
-if (unformat (input, "%d.%d.%d-%s+%s", ver->major, ver->minor, ver->patch, ver->pre_release, ver->build_metadata
- ))
- return (1);
-else if (unformat (input, "%d.%d.%d-%s", ver->major, ver->minor, ver->patch, ver->pre_release
- ))
- return (1);
-else if (unformat (input, "%d.%d.%d", ver->major, ver->minor, ver->patch
- ))
- return (1);
+ if (unformat (input, "%u.%u.%u-%s+%s", ver->major, ver->minor, ver->patch,
+ ver->pre_release, ver->build_metadata) ||
+ unformat (input, "%u.%u.%u-%s", ver->major, ver->minor, ver->patch,
+ ver->pre_release) ||
+ unformat (input, "%u.%u.%u", ver->major, ver->minor, ver->patch))
+ return (1);
return (0);
}
diff --git a/src/vpp/api/types.h b/src/vpp/api/types.h
index feafe8f5cb3..9864e8c38a8 100644
--- a/src/vpp/api/types.h
+++ b/src/vpp/api/types.h
@@ -21,9 +21,8 @@
#include <vnet/ip/ip_types.api_types.h>
#include <vnet/ethernet/ethernet_types.api_types.h>
-#define vl_typedefs /* define message structures */
-#include <vpp/api/vpe_all_api_h.h>
-#undef vl_typedefs
+
+#include <vpp/api/vpe_types.api_types.h>
extern const vl_api_mac_address_t VL_API_ZERO_MAC_ADDRESS;
extern const vl_api_address_t VL_API_ZERO_ADDRESS;
diff --git a/src/vpp/api/vpe.api b/src/vpp/api/vpe.api
index 9047d0e1aa9..5976f3d99a9 100644
--- a/src/vpp/api/vpe.api
+++ b/src/vpp/api/vpe.api
@@ -19,7 +19,7 @@
called through a shared memory interface.
*/
-option version = "1.6.1";
+option version = "1.7.0";
import "vpp/api/vpe_types.api";
@@ -53,116 +53,6 @@ import "vpp/api/vpe_types.api";
* flow APIs: see .../vnet/vnet/flow/{flow.api, flow_api.c}
*/
-/** \brief Control ping from client to api server request
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
-*/
-define control_ping
-{
- u32 client_index;
- u32 context;
-};
-
-/** \brief Control ping from the client to the server response
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param retval - return code for the request
- @param vpe_pid - the pid of the vpe, returned by the server
-*/
-define control_ping_reply
-{
- u32 context;
- i32 retval;
- u32 client_index;
- u32 vpe_pid;
-};
-
-/** \brief Process a vpe parser cli string request
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param cmd_in_shmem - pointer to cli command string
-*/
-define cli
-{
- u32 client_index;
- u32 context;
- u64 cmd_in_shmem;
-};
-define cli_inband
-{
- u32 client_index;
- u32 context;
- string cmd[];
-};
-
-/** \brief vpe parser cli string response
- @param context - sender context, to match reply w/ request
- @param retval - return code for request
- @param reply_in_shmem - Reply string from cli processing if any
-*/
-define cli_reply
-{
- u32 context;
- i32 retval;
- u64 reply_in_shmem;
-};
-define cli_inband_reply
-{
- u32 context;
- i32 retval;
- string reply[];
-};
-
-/** \brief Get node index using name request
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param node_name[] - name of the node
-*/
-define get_node_index
-{
- u32 client_index;
- u32 context;
- string node_name[64];
-};
-
-/** \brief Get node index using name request
- @param context - sender context, to match reply w/ request
- @param retval - return code for the request
- @param node_index - index of the desired node if found, else ~0
-*/
-define get_node_index_reply
-{
- u32 context;
- i32 retval;
- u32 node_index;
-};
-
-/** \brief Set the next node for a given node request
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param node_name[] - node to add the next node to
- @param next_name[] - node to add as the next node
-*/
-define add_node_next
-{
- u32 client_index;
- u32 context;
- string node_name[64];
- string next_name[64];
-};
-
-/** \brief IP Set the next node for a given node response
- @param context - sender context, to match reply w/ request
- @param retval - return code for the add next node request
- @param next_index - the index of the next node if success, else ~0
-*/
-define add_node_next_reply
-{
- u32 context;
- i32 retval;
- u32 next_index;
-};
-
/** \brief show version
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
@@ -190,99 +80,26 @@ define show_version_reply
string build_directory[256];
};
-
-/** \brief show_threads display the information about vpp
- threads running on system along with their process id,
- cpu id, physical core and cpu socket.
-*/
-define show_threads
-{
- u32 client_index;
- u32 context;
-};
-
-/** \brief thread data
- @param id - thread index
- @param name - thread name i.e. vpp_main or vpp_wk_0
- @param type - thread type i.e. workers or stats
- @param pid - thread Process Id
- @param cpu_id - thread pinned to cpu.
- "CPUs or Logical cores are the number of physical cores times
- the number of threads that can run on each core through
- the use of hyperthreading." (from unix.stackexchange.com)
- @param core - thread pinned to actual physical core.
- @param cpu_socket - thread is running on which cpu socket.
-*/
-typedef thread_data
-{
- u32 id;
- string name[64];
- string type[64];
- u32 pid;
- u32 cpu_id;
- u32 core;
- u32 cpu_socket;
-};
-
-/** \brief show_threads_reply
- @param context - returned sender context, to match reply w/ request
- @param retval - return code
- @param count - number of threads in thread_data array
- @param thread_data - array of thread data
-*/
-define show_threads_reply
-{
- u32 context;
- i32 retval;
- u32 count;
- vl_api_thread_data_t thread_data[count];
-};
-
-define get_node_graph
-{
- u32 client_index;
- u32 context;
-};
-
-/** \brief get_node_graph_reply
- @param context - returned sender context, to match reply w/ request
- @param retval - return code
- @param reply_in_shmem - result from vlib_node_serialize, in shared
- memory. Process with vlib_node_unserialize, remember to switch
- heaps and free the result.
-*/
-
-define get_node_graph_reply
-{
- u32 context;
- i32 retval;
- u64 reply_in_shmem;
-};
-
-/** \brief Query relative index via node names
+/** \brief Show the current system timestamp.
@param client_index - opaque cookie to identify the sender
@param context - sender context, to match reply w/ request
- @param node_name - name of node to find relative index from
- @param next_name - next node from node_name to find relative index of
*/
-define get_next_index
+define show_vpe_system_time
{
u32 client_index;
u32 context;
- string node_name[64];
- string next_name[64];
};
-/** \brief Reply for get next node index
+/** \brief Reply for show vpe system time.
@param context - sender context which was passed in the request
@param retval - return value
- @param next_index - index of the next_node
+ @param vpe_system_time - the time in seconds since epoch of the host system.
*/
-define get_next_index_reply
+define show_vpe_system_time_reply
{
u32 context;
i32 retval;
- u32 next_index;
+ vl_api_timestamp_t vpe_system_time;
};
define log_dump {
@@ -299,76 +116,6 @@ define log_details {
string message[256];
};
-/** \brief Show the current system timestamp.
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
-*/
-define show_vpe_system_time
-{
- u32 client_index;
- u32 context;
-};
-
-/** \brief Reply for show vpe system time.
- @param context - sender context which was passed in the request
- @param retval - return value
- @param vpe_system_time - the time in seconds since epoch of the host system.
-*/
-define show_vpe_system_time_reply
-{
- u32 context;
- i32 retval;
- vl_api_timestamp_t vpe_system_time;
-};
-
-/** \brief f64 types are not standardized across the wire. Sense wire format in each direction by sending the f64 value 1.0.
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param f64_one - The constant of 1.0. If you send a different value, expect an rv=VNET_API_ERROR_API_ENDIAN_FAILED.
-*/
-define get_f64_endian_value
-{
- u32 client_index;
- u32 context;
- f64 f64_one [default=1.0];
-};
-
-/** \brief get_f64_endian_value reply message
- @param context - sender context which was passed in the request
- @param retval - return value - VNET_API_ERROR_API_ENDIAN_FAILED if f64_one != 1.0
- @param f64_one_result - The value of 'f64 1.0'
-*/
-define get_f64_endian_value_reply
-{
- u32 context;
- u32 retval;
- f64 f64_one_result;
-};
-
-/** \brief Verify f64 wire format by sending a value and receiving the value + 1.0
- @param client_index - opaque cookie to identify the sender.
- @param context - sender context, to match reply w/ request.
- @param f64_value - The value you want to test. Default: 1.0.
-*/
-define get_f64_increment_by_one
-{
- u32 client_index;
- u32 context;
- f64 f64_value [default=1.0];
-};
-
-/** \brief get_f64_increment_by_one reply
- @param client_index - opaque cookie to identify the sender.
- @param context - sender context, to match reply w/ request.
- @param f64_value - The input f64_value incremented by 1.0.
-*/
-define get_f64_increment_by_one_reply
-{
- u32 context;
- u32 retval;
- f64 f64_value;
-};
-
/*
* Local Variables:
* eval: (c-set-style "gnu")
diff --git a/src/vpp/api/vpp_get_metrics.c b/src/vpp/api/vpp_get_metrics.c
index 41d2393cc96..a3860ab56b0 100644
--- a/src/vpp/api/vpp_get_metrics.c
+++ b/src/vpp/api/vpp_get_metrics.c
@@ -77,6 +77,7 @@ setup_signal_handlers (void)
/* these signals take the default action */
case SIGABRT:
case SIGKILL:
+ case SIGCONT:
case SIGSTOP:
case SIGUSR1:
case SIGUSR2:
diff --git a/src/vpp/app/version.c b/src/vpp/app/version.c
index db13c863965..1446777b74f 100644
--- a/src/vpp/app/version.c
+++ b/src/vpp/app/version.c
@@ -59,7 +59,8 @@ show_vpe_version_command_fn (vlib_main_t * vm,
int verbose = 0;
int cmdline = 0;
int indent = 2;
- char **argv = (char **) vm->argv;
+ vlib_global_main_t *vgm = vlib_get_global_main ();
+ char **argv = (char **) vgm->argv;
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
@@ -133,14 +134,12 @@ show_vpe_version_command_fn (vlib_main_t * vm,
* @cliexend
?*/
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_vpe_version_command, static) = {
.path = "show version",
.short_help = "show version [verbose] [cmdline]",
.function = show_vpe_version_command_fn,
.is_mp_safe = 1,
};
-/* *INDENT-ON* */
/** Return the image build directory name */
char *
diff --git a/src/vpp/app/vpe_cli.c b/src/vpp/app/vpe_cli.c
index fc623b101f0..6ad194992e9 100644
--- a/src/vpp/app/vpe_cli.c
+++ b/src/vpp/app/vpe_cli.c
@@ -119,13 +119,11 @@ done:
return error;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (virtual_ip_cmd_fn_command, static) = {
.path = "ip virtual",
.short_help = "ip virtual <addr> <interface> [mac <Mi>]+ [next-hop <ip4_address>]+",
.function = virtual_ip_cmd_fn_command_fn,
};
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vpp/app/vpp_get_stats.c b/src/vpp/app/vpp_get_stats.c
index d13e4d9b2b2..1c3b9d9538d 100644
--- a/src/vpp/app/vpp_get_stats.c
+++ b/src/vpp/app/vpp_get_stats.c
@@ -19,6 +19,7 @@
#include <vpp-api/client/stat_client.h>
#include <vlib/vlib.h>
+#include <vpp/vnet/config.h>
static int
stat_poll_loop (u8 ** patterns)
@@ -79,12 +80,6 @@ stat_poll_loop (u8 ** patterns)
res[i].name);
break;
- case STAT_DIR_TYPE_ERROR_INDEX:
- for (j = 0; j < vec_len (res[i].error_vector); j++)
- fformat (stdout, "%llu %s\n", res[i].error_vector[j],
- res[i].name);
- break;
-
case STAT_DIR_TYPE_SCALAR_INDEX:
fformat (stdout, "%.2f %s\n", res[i].scalar_value, res[i].name);
break;
@@ -116,6 +111,15 @@ enum stat_client_cmd_e
STAT_CLIENT_CMD_TIGHTPOLL,
};
+#ifdef CLIB_SANITIZE_ADDR
+/* default options for Address Sanitizer */
+const char *
+__asan_default_options (void)
+{
+ return VPP_SANITIZE_ADDR_OPTIONS;
+}
+#endif /* CLIB_SANITIZE_ADDR */
+
int
main (int argc, char **argv)
{
@@ -185,6 +189,8 @@ reconnect:
for (i = 0; i < vec_len (dir); i++)
{
char *n = stat_segment_index_to_name (dir[i]);
+ if (!n)
+ continue;
printf ("%s\n", n);
free (n);
}
@@ -217,12 +223,6 @@ reconnect:
res[i].name);
break;
- case STAT_DIR_TYPE_ERROR_INDEX:
- for (j = 0; j < vec_len (res[i].error_vector); j++)
- fformat (stdout, "[@%d] %llu %s\n", j, res[i].error_vector[j],
- res[i].name);
- break;
-
case STAT_DIR_TYPE_SCALAR_INDEX:
fformat (stdout, "%.2f %s\n", res[i].scalar_value, res[i].name);
break;
diff --git a/src/vpp/app/vpp_prometheus_export.c b/src/vpp/app/vpp_prometheus_export.c
index 99944917766..c6000a8a008 100644
--- a/src/vpp/app/vpp_prometheus_export.c
+++ b/src/vpp/app/vpp_prometheus_export.c
@@ -27,6 +27,11 @@
#include <errno.h>
#include <unistd.h>
#include <netdb.h>
+#ifdef __FreeBSD__
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#endif /* __FreeBSD__ */
#include <sys/socket.h>
#include <vpp-api/client/stat_client.h>
#include <vlib/vlib.h>
@@ -35,6 +40,8 @@
/* https://github.com/prometheus/prometheus/wiki/Default-port-allocations */
#define SERVER_PORT 9482
+#define MAX_TOKENS 10
+
static char *
prom_string (char *s)
{
@@ -49,79 +56,269 @@ prom_string (char *s)
}
static void
-dump_metrics (FILE * stream, u8 ** patterns)
+print_metric_v1 (FILE *stream, stat_segment_data_t *res)
{
- stat_segment_data_t *res;
- int i, j, k;
- static u32 *stats = 0;
+ int j, k;
-retry:
- res = stat_segment_dump (stats);
- if (res == 0)
- { /* Memory layout has changed */
- if (stats)
- vec_free (stats);
- stats = stat_segment_ls (patterns);
- goto retry;
+ switch (res->type)
+ {
+ case STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE:
+ fformat (stream, "# TYPE %s counter\n", prom_string (res->name));
+ for (k = 0; k < vec_len (res->simple_counter_vec); k++)
+ for (j = 0; j < vec_len (res->simple_counter_vec[k]); j++)
+ fformat (stream, "%s{thread=\"%d\",interface=\"%d\"} %lld\n",
+ prom_string (res->name), k, j,
+ res->simple_counter_vec[k][j]);
+ break;
+
+ case STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED:
+ fformat (stream, "# TYPE %s_packets counter\n", prom_string (res->name));
+ fformat (stream, "# TYPE %s_bytes counter\n", prom_string (res->name));
+ for (k = 0; k < vec_len (res->simple_counter_vec); k++)
+ for (j = 0; j < vec_len (res->combined_counter_vec[k]); j++)
+ {
+ fformat (stream,
+ "%s_packets{thread=\"%d\",interface=\"%d\"} %lld\n",
+ prom_string (res->name), k, j,
+ res->combined_counter_vec[k][j].packets);
+ fformat (stream, "%s_bytes{thread=\"%d\",interface=\"%d\"} %lld\n",
+ prom_string (res->name), k, j,
+ res->combined_counter_vec[k][j].bytes);
+ }
+ break;
+ case STAT_DIR_TYPE_SCALAR_INDEX:
+ fformat (stream, "# TYPE %s counter\n", prom_string (res->name));
+ fformat (stream, "%s %.2f\n", prom_string (res->name),
+ res->scalar_value);
+ break;
+
+ case STAT_DIR_TYPE_NAME_VECTOR:
+ fformat (stream, "# TYPE %s_info gauge\n", prom_string (res->name));
+ for (k = 0; k < vec_len (res->name_vector); k++)
+ if (res->name_vector[k])
+ fformat (stream, "%s_info{index=\"%d\",name=\"%s\"} 1\n",
+ prom_string (res->name), k, res->name_vector[k]);
+ break;
+
+ case STAT_DIR_TYPE_EMPTY:
+ break;
+
+ default:
+ fformat (stderr, "Unknown value %d\n", res->type);
+ ;
}
+}
- for (i = 0; i < vec_len (res); i++)
+static void
+sanitize (char *str, int len)
+{
+ for (int i = 0; i < len; i++)
{
- switch (res[i].type)
+ if (!isalnum (str[i]))
+ str[i] = '_';
+ }
+}
+
+static int
+tokenize (const char *name, char **tokens, int *lengths, int max_tokens)
+{
+ char *p = (char *) name;
+ char *savep = p;
+
+ int i = 0;
+ while (*p && i < max_tokens - 1)
+ {
+ if (*p == '/')
{
- case STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE:
- fformat (stream, "# TYPE %s counter\n", prom_string (res[i].name));
- for (k = 0; k < vec_len (res[i].simple_counter_vec); k++)
- for (j = 0; j < vec_len (res[i].simple_counter_vec[k]); j++)
- fformat (stream, "%s{thread=\"%d\",interface=\"%d\"} %lld\n",
- prom_string (res[i].name), k, j,
- res[i].simple_counter_vec[k][j]);
- break;
+ tokens[i] = (char *) savep;
+ lengths[i] = (int) (p - savep);
+ i++;
+ p++;
+ savep = p;
+ }
+ else
+ {
+ p++;
+ }
+ }
+ tokens[i] = (char *) savep;
+ lengths[i] = (int) (p - savep);
+
+ i++;
+ return i;
+}
- case STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED:
- fformat (stream, "# TYPE %s_packets counter\n",
- prom_string (res[i].name));
- fformat (stream, "# TYPE %s_bytes counter\n",
- prom_string (res[i].name));
- for (k = 0; k < vec_len (res[i].simple_counter_vec); k++)
- for (j = 0; j < vec_len (res[i].combined_counter_vec[k]); j++)
+static void
+print_metric_v2 (FILE *stream, stat_segment_data_t *res)
+{
+ int num_tokens = 0;
+ char *tokens[MAX_TOKENS];
+ int lengths[MAX_TOKENS];
+ int j, k;
+
+ num_tokens = tokenize (res->name, tokens, lengths, MAX_TOKENS);
+ switch (res->type)
+ {
+ case STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE:
+ if (res->simple_counter_vec == 0)
+ return;
+ for (k = 0; k < vec_len (res->simple_counter_vec); k++)
+ for (j = 0; j < vec_len (res->simple_counter_vec[k]); j++)
+ {
+ if ((num_tokens == 4) &&
+ (!strncmp (tokens[1], "nodes", lengths[1]) ||
+ !strncmp (tokens[1], "interfaces", lengths[1])))
+ {
+ sanitize (tokens[1], lengths[1]);
+ sanitize (tokens[3], lengths[3]);
+ fformat (
+ stream,
+ "%.*s_%.*s{%.*s=\"%.*s\",index=\"%d\",thread=\"%d\"} %lu\n",
+ lengths[1], tokens[1], lengths[3], tokens[3], lengths[1] - 1,
+ tokens[1], lengths[2], tokens[2], j, k,
+ res->simple_counter_vec[k][j]);
+ }
+ else if ((num_tokens == 3) &&
+ !strncmp (tokens[1], "sys", lengths[1]))
+ {
+ sanitize (tokens[1], lengths[1]);
+ fformat (stream, "%.*s_%.*s{index=\"%d\",thread=\"%d\"} %lu\n",
+ lengths[1], tokens[1], lengths[2], tokens[2], j, k,
+ res->simple_counter_vec[k][j]);
+ }
+ else if (!strncmp (tokens[1], "mem", lengths[1]))
+ {
+ if (num_tokens == 3)
+ {
+ fformat (
+ stream,
+ "%.*s{heap=\"%.*s\",index=\"%d\",thread=\"%d\"} %lu\n",
+ lengths[1], tokens[1], lengths[2], tokens[2], j, k,
+ res->simple_counter_vec[k][j]);
+ }
+ else if (num_tokens == 4)
+ {
+ fformat (stream,
+ "%.*s_%.*s{heap=\"%.*s\",index=\"%d\",thread=\"%"
+ "d\"} %lu\n",
+ lengths[1], tokens[1], lengths[3], tokens[3],
+ lengths[2], tokens[2], j, k,
+ res->simple_counter_vec[k][j]);
+ }
+ else
+ {
+ print_metric_v1 (stream, res);
+ }
+ }
+ else if (!strncmp (tokens[1], "err", lengths[1]))
{
+ // NOTE: the error is in token3, but it may contain '/'.
+ // Considering this is the last token, it is safe to print
+ // token3 until the end of res->name
fformat (stream,
- "%s_packets{thread=\"%d\",interface=\"%d\"} %lld\n",
- prom_string (res[i].name), k, j,
- res[i].combined_counter_vec[k][j].packets);
+ "%.*s{node=\"%.*s\",error=\"%s\",index=\"%d\",thread="
+ "\"%d\"} %lu\n",
+ lengths[1], tokens[1], lengths[2], tokens[2],
+ tokens[3], j, k, res->simple_counter_vec[k][j]);
+ }
+ else
+ {
+ print_metric_v1 (stream, res);
+ }
+ }
+ break;
+
+ case STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED:
+ if (res->combined_counter_vec == 0)
+ return;
+ for (k = 0; k < vec_len (res->combined_counter_vec); k++)
+ for (j = 0; j < vec_len (res->combined_counter_vec[k]); j++)
+ {
+ if ((num_tokens == 4) &&
+ !strncmp (tokens[1], "interfaces", lengths[1]))
+ {
+ sanitize (tokens[1], lengths[1]);
+ sanitize (tokens[3], lengths[3]);
+ fformat (stream,
+ "%.*s_%.*s_packets{interface=\"%.*s\",index=\"%d\","
+ "thread=\"%d\"} %lu\n",
+ lengths[1], tokens[1], lengths[3], tokens[3],
+ lengths[2], tokens[2], j, k,
+ res->combined_counter_vec[k][j].packets);
fformat (stream,
- "%s_bytes{thread=\"%d\",interface=\"%d\"} %lld\n",
- prom_string (res[i].name), k, j,
- res[i].combined_counter_vec[k][j].bytes);
+ "%.*s_%.*s_bytes{interface=\"%.*s\",index=\"%d\","
+ "thread=\"%d\"} %lu\n",
+ lengths[1], tokens[1], lengths[3], tokens[3],
+ lengths[2], tokens[2], j, k,
+ res->combined_counter_vec[k][j].bytes);
}
- break;
- case STAT_DIR_TYPE_ERROR_INDEX:
- for (j = 0; j < vec_len (res[i].error_vector); j++)
+ else
+ {
+ print_metric_v1 (stream, res);
+ }
+ }
+ break;
+
+ case STAT_DIR_TYPE_SCALAR_INDEX:
+ if ((num_tokens == 4) &&
+ !strncmp (tokens[1], "buffer-pools", lengths[1]))
+ {
+ sanitize (tokens[1], lengths[1]);
+ sanitize (tokens[3], lengths[3]);
+ fformat (stream, "%.*s_%.*s{pool=\"%.*s\"} %.2f\n", lengths[1],
+ tokens[1], lengths[3], tokens[3], lengths[2], tokens[2],
+ res->scalar_value);
+ }
+ else if ((num_tokens == 3) && !strncmp (tokens[1], "sys", lengths[1]))
+ {
+ sanitize (tokens[1], lengths[1]);
+ sanitize (tokens[2], lengths[2]);
+ fformat (stream, "%.*s_%.*s %.2f\n", lengths[1], tokens[1],
+ lengths[2], tokens[2], res->scalar_value);
+ if (!strncmp (tokens[2], "boottime", lengths[2]))
{
- fformat (stream, "# TYPE %s counter\n",
- prom_string (res[i].name));
- fformat (stream, "%s{thread=\"%d\"} %lld\n",
- prom_string (res[i].name), j, res[i].error_vector[j]);
+ struct timeval tv;
+ gettimeofday (&tv, NULL);
+ fformat (stream, "sys_uptime %.2f\n",
+ tv.tv_sec - res->scalar_value);
}
- break;
+ }
+ else
+ {
+ print_metric_v1 (stream, res);
+ }
+ break;
- case STAT_DIR_TYPE_SCALAR_INDEX:
- fformat (stream, "# TYPE %s counter\n", prom_string (res[i].name));
- fformat (stream, "%s %.2f\n", prom_string (res[i].name),
- res[i].scalar_value);
- break;
+ default:;
+ fformat (stderr, "Unhandled type %d name %s\n", res->type, res->name);
+ }
+}
- case STAT_DIR_TYPE_EMPTY:
- break;
+static void
+dump_metrics (FILE *stream, u8 **patterns, u8 v2)
+{
+ stat_segment_data_t *res;
+ int i;
+ static u32 *stats = 0;
- default:
- fformat (stderr, "Unknown value %d\n", res[i].type);
- ;
- }
+retry:
+ res = stat_segment_dump (stats);
+ if (res == 0)
+ { /* Memory layout has changed */
+ if (stats)
+ vec_free (stats);
+ stats = stat_segment_ls (patterns);
+ goto retry;
}
- stat_segment_data_free (res);
+ for (i = 0; i < vec_len (res); i++)
+ {
+ if (v2)
+ print_metric_v2 (stream, &res[i]);
+ else
+ print_metric_v1 (stream, &res[i]);
+ }
+ stat_segment_data_free (res);
}
@@ -129,7 +326,7 @@ retry:
#define NOT_FOUND_ERROR "<html><head><title>Document not found</title></head><body><h1>404 - Document not found</h1></body></html>"
static void
-http_handler (FILE * stream, u8 ** patterns)
+http_handler (FILE *stream, u8 **patterns, u8 v2)
{
char status[80] = { 0 };
if (fgets (status, sizeof (status) - 1, stream) == 0)
@@ -181,7 +378,7 @@ http_handler (FILE * stream, u8 ** patterns)
return;
}
fputs ("HTTP/1.0 200 OK\r\nContent-Type: text/plain\r\n\r\n", stream);
- dump_metrics (stream, patterns);
+ dump_metrics (stream, patterns, v2);
}
static int
@@ -243,10 +440,14 @@ main (int argc, char **argv)
{
unformat_input_t _argv, *a = &_argv;
u8 *stat_segment_name, *pattern = 0, **patterns = 0;
+ u16 port = SERVER_PORT;
+ char *usage =
+ "%s: usage [socket-name <name>] [port <0 - 65535>] [v2] <patterns> ...\n";
int rv;
+ u8 v2 = 0;
- /* Allocating 32MB heap */
- clib_mem_init (0, 32 << 20);
+ /* Allocating 256MB heap */
+ clib_mem_init (0, 256 << 20);
unformat_init_command_line (a, argv);
@@ -256,23 +457,24 @@ main (int argc, char **argv)
{
if (unformat (a, "socket-name %s", &stat_segment_name))
;
+ if (unformat (a, "v2"))
+ v2 = 1;
+ else if (unformat (a, "port %d", &port))
+ ;
else if (unformat (a, "%s", &pattern))
{
vec_add1 (patterns, pattern);
}
else
{
- fformat (stderr,
- "%s: usage [socket-name <name>] <patterns> ...\n",
- argv[0]);
+ fformat (stderr, usage, argv[0]);
exit (1);
}
}
if (vec_len (patterns) == 0)
{
- fformat (stderr,
- "%s: usage [socket-name <name>] <patterns> ...\n", argv[0]);
+ fformat (stderr, usage, argv[0]);
exit (1);
}
@@ -284,7 +486,7 @@ main (int argc, char **argv)
exit (1);
}
- int fd = start_listen (SERVER_PORT);
+ int fd = start_listen (port);
if (fd < 0)
{
exit (1);
@@ -319,7 +521,7 @@ main (int argc, char **argv)
continue;
}
/* Single reader at the moment */
- http_handler (stream, patterns);
+ http_handler (stream, patterns, v2);
fclose (stream);
}
diff --git a/src/vpp/app/vppctl.c b/src/vpp/app/vppctl.c
index 31c9e43a770..f1d69c3ca84 100644
--- a/src/vpp/app/vppctl.c
+++ b/src/vpp/app/vppctl.c
@@ -21,6 +21,9 @@
#include <termios.h>
#include <unistd.h>
#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
#define DEBUG 0
@@ -29,11 +32,11 @@
#define TELOPTS
#endif
+#include <vppinfra/clib.h>
#include <arpa/telnet.h>
-
-#include <vppinfra/mem.h>
-#include <vppinfra/format.h>
-#include <vppinfra/socket.h>
+#ifndef STATIC_VPPCTL
+#include <vpp/vnet/config.h>
+#endif
#define SOCKET_FILE "/run/vpp/cli.sock"
@@ -41,35 +44,54 @@ volatile int window_resized = 0;
struct termios orig_tio;
static void
-send_ttype (clib_socket_t * s, int is_interactive)
+send_ttype (int sock_fd, int is_interactive)
{
char *term;
+ static char buf[2048];
+
+ /* wipe the buffer so there is no potential
+ * for inter-invocation leakage */
+ memset (buf, 0, sizeof (buf));
term = is_interactive ? getenv ("TERM") : "vppctl";
if (term == NULL)
term = "dumb";
- clib_socket_tx_add_formatted (s, "%c%c%c" "%c%s" "%c%c",
- IAC, SB, TELOPT_TTYPE, 0, term, IAC, SE);
- clib_socket_tx (s);
+ int len = snprintf (buf, sizeof (buf),
+ "%c%c%c"
+ "%c%s"
+ "%c%c",
+ IAC, SB, TELOPT_TTYPE, 0, term, IAC, SE);
+ if (send (sock_fd, buf, len, 0) < 0)
+ {
+ perror ("send_ttype");
+ }
}
static void
-send_naws (clib_socket_t * s)
+send_naws (int sock_fd)
{
struct winsize ws;
+ static char buf[2048];
+ memset (buf, 0, sizeof (buf));
if (ioctl (STDIN_FILENO, TIOCGWINSZ, &ws) < 0)
{
- clib_unix_warning ("ioctl(TIOCGWINSZ)");
+ fprintf (stderr, "ioctl(TIOCGWINSZ)");
return;
}
- clib_socket_tx_add_formatted (s, "%c%c%c" "%c%c%c%c" "%c%c",
- IAC, SB, TELOPT_NAWS,
- ws.ws_col >> 8, ws.ws_col & 0xff,
- ws.ws_row >> 8, ws.ws_row & 0xff, IAC, SE);
- clib_socket_tx (s);
+ int len = snprintf (buf, sizeof (buf),
+ "%c%c%c"
+ "%c%c%c%c"
+ "%c%c",
+ IAC, SB, TELOPT_NAWS, ws.ws_col >> 8, ws.ws_col & 0xff,
+ ws.ws_row >> 8, ws.ws_row & 0xff, IAC, SE);
+ int n_written = write (sock_fd, buf, len);
+ if (n_written < len)
+ {
+ perror ("send_naws");
+ }
}
static void
@@ -84,74 +106,90 @@ signal_handler_term (int signum)
tcsetattr (STDIN_FILENO, TCSAFLUSH, &orig_tio);
}
-static u8 *
-process_input (u8 * str, clib_socket_t * s, int is_interactive,
- int *sent_ttype)
+static int
+process_input (int sock_fd, unsigned char *rx_buf, int rx_buf_len,
+ int is_interactive, int *sent_ttype)
{
int i = 0;
+ int j = 0;
- while (i < vec_len (s->rx_buffer))
+ while (i < rx_buf_len)
{
- if (s->rx_buffer[i] == IAC)
+ if (rx_buf[i] == IAC)
{
- if (s->rx_buffer[i + 1] == SB)
+ if (rx_buf[i + 1] == SB)
{
- u8 *sb = 0;
- char opt = s->rx_buffer[i + 2];
+ char opt = rx_buf[i + 2];
i += 3;
- while (s->rx_buffer[i] != IAC)
- vec_add1 (sb, s->rx_buffer[i++]);
-
#if DEBUG
- clib_warning ("SB %s\n %U", TELOPT (opt),
- format_hexdump, sb, vec_len (sb));
+ if (rx_buf[i] != IAC)
+ {
+ fprintf (stderr, "SB ");
+ }
+ while (rx_buf[i] != IAC && i < rx_buf_len)
+ fprintf (stderr, "%02x ", rx_buf[i++]);
+ fprintf (stderr, "\n");
+#else
+ while (rx_buf[i] != IAC && i < rx_buf_len)
+ {
+ i++;
+ }
#endif
- vec_free (sb);
i += 2;
if (opt == TELOPT_TTYPE)
{
- send_ttype (s, is_interactive);
+ send_ttype (sock_fd, is_interactive);
*sent_ttype = 1;
}
else if (is_interactive && opt == TELOPT_NAWS)
- send_naws (s);
+ send_naws (sock_fd);
}
else
{
#if DEBUG
- clib_warning ("IAC at %d, IAC %s %s", i,
- TELCMD (s->rx_buffer[i + 1]),
- TELOPT (s->rx_buffer[i + 2]));
+ fprintf (stderr, "IAC at %d, IAC %s %s", i,
+ TELCMD (rx_buf[i + 1]), TELOPT (rx_buf[i + 2]));
#endif
i += 3;
}
}
else
- vec_add1 (str, s->rx_buffer[i++]);
+ {
+ /* i is always the same or ahead of j, so at worst this is a no-op */
+ rx_buf[j] = rx_buf[i];
+ i++;
+ j++;
+ }
}
- vec_reset_length (s->rx_buffer);
- return str;
+ return j;
}
+#if !defined(STATIC_VPPCTL) && defined(CLIB_SANITIZE_ADDR)
+/* default options for Address Sanitizer */
+const char *
+__asan_default_options (void)
+{
+ return VPP_SANITIZE_ADDR_OPTIONS;
+}
+#endif /* CLIB_SANITIZE_ADDR */
int
main (int argc, char *argv[])
{
- clib_socket_t _s = { 0 }, *s = &_s;
- clib_error_t *error = 0;
struct epoll_event event;
struct sigaction sa;
struct termios tio;
int efd = -1;
- u8 *str = 0;
- u8 *cmd = 0;
+ char *cmd = 0;
+ unsigned long cmd_len = 0;
int do_quit = 0;
int is_interactive = 0;
int acked = 1; /* counts messages from VPP; starts at 1 */
int sent_ttype = 0;
-
-
- clib_mem_init (0, 64ULL << 10);
+ char *sock_fname = SOCKET_FILE;
+ int sock_fd = -1;
+ int error = 0;
+ int arg = 0;
/* process command line */
argc--;
@@ -159,32 +197,74 @@ main (int argc, char *argv[])
if (argc > 1 && strncmp (argv[0], "-s", 2) == 0)
{
- s->config = argv[1];
+ sock_fname = argv[1];
argc -= 2;
argv += 2;
}
- else
- s->config = SOCKET_FILE;
- while (argc--)
- cmd = format (cmd, "%s%c", (argv++)[0], argc ? ' ' : 0);
+ struct sockaddr_un saddr = { 0 };
+ saddr.sun_family = AF_UNIX;
- s->flags = CLIB_SOCKET_F_IS_CLIENT;
+ if (strlen (sock_fname) > sizeof (saddr.sun_path) - 1)
+ {
+ perror ("socket path too long");
+ exit (1);
+ }
- error = clib_socket_init (s);
- if (error)
- goto done;
+ strncpy (saddr.sun_path, sock_fname, sizeof (saddr.sun_path) - 1);
+
+ sock_fd = socket (AF_UNIX, SOCK_STREAM, 0);
+ if (sock_fd < 0)
+ {
+ perror ("socket");
+ exit (1);
+ }
+
+ if (connect (sock_fd, (struct sockaddr *) &saddr, sizeof (saddr)) < 0)
+ {
+ perror ("connect");
+ exit (1);
+ }
+
+ for (arg = 0; arg < argc; arg++)
+ {
+ cmd_len += strlen (argv[arg]) + 1;
+ }
+ if (cmd_len > 0)
+ {
+ cmd_len++; // account for 0 at end
+ cmd = malloc (cmd_len);
+ if (!cmd)
+ {
+ error = errno;
+ perror ("malloc failed");
+ goto done;
+ }
+ memset (cmd, 0, cmd_len);
+ unsigned long space_left = cmd_len - 1; // reserve space for 0 at end
+ while (argc--)
+ {
+ strncat (cmd, *argv, space_left);
+ space_left -= strlen (*argv);
+ ++argv;
+ strncat (cmd, " ", space_left);
+ --space_left;
+ }
+ cmd[cmd_len - 2] = '\n';
+ cmd[cmd_len - 1] = 0;
+ }
is_interactive = isatty (STDIN_FILENO) && cmd == 0;
if (is_interactive)
{
/* Capture terminal resize events */
- clib_memset (&sa, 0, sizeof (struct sigaction));
+ memset (&sa, 0, sizeof (struct sigaction));
sa.sa_handler = signal_handler_winch;
if (sigaction (SIGWINCH, &sa, 0) < 0)
{
- error = clib_error_return_unix (0, "sigaction");
+ error = errno;
+ perror ("sigaction for SIGWINCH");
goto done;
}
@@ -192,14 +272,16 @@ main (int argc, char *argv[])
sa.sa_handler = signal_handler_term;
if (sigaction (SIGTERM, &sa, 0) < 0)
{
- error = clib_error_return_unix (0, "sigaction");
+ error = errno;
+ perror ("sigaction for SIGTERM");
goto done;
}
/* Save the original tty state so we can restore it later */
if (tcgetattr (STDIN_FILENO, &orig_tio) < 0)
{
- error = clib_error_return_unix (0, "tcgetattr");
+ error = errno;
+ perror ("tcgetattr");
goto done;
}
@@ -212,7 +294,8 @@ main (int argc, char *argv[])
if (tcsetattr (STDIN_FILENO, TCSAFLUSH, &tio) < 0)
{
- error = clib_error_return_unix (0, "tcsetattr");
+ error = errno;
+ perror ("tcsetattr");
goto done;
}
}
@@ -220,35 +303,43 @@ main (int argc, char *argv[])
efd = epoll_create1 (0);
/* register STDIN */
- event.events = EPOLLIN | EPOLLPRI | EPOLLERR;
- event.data.fd = STDIN_FILENO;
- if (epoll_ctl (efd, EPOLL_CTL_ADD, STDIN_FILENO, &event) != 0)
+ if (cmd == 0)
{
- /* ignore EPERM; it means stdin is something like /dev/null */
- if (errno != EPERM)
+ event.events = EPOLLIN | EPOLLPRI | EPOLLERR;
+ event.data.fd = STDIN_FILENO;
+ if (epoll_ctl (efd, EPOLL_CTL_ADD, STDIN_FILENO, &event) != 0)
{
- error = clib_error_return_unix (0, "epoll_ctl[%d]", STDIN_FILENO);
- goto done;
+ /* ignore EPERM; it means stdin is something like /dev/null */
+ if (errno != EPERM)
+ {
+ error = errno;
+ fprintf (stderr, "epoll_ctl[%d]", STDIN_FILENO);
+ perror (0);
+ goto done;
+ }
}
}
/* register socket */
event.events = EPOLLIN | EPOLLPRI | EPOLLERR;
- event.data.fd = s->fd;
- if (epoll_ctl (efd, EPOLL_CTL_ADD, s->fd, &event) != 0)
+ event.data.fd = sock_fd;
+ if (epoll_ctl (efd, EPOLL_CTL_ADD, sock_fd, &event) != 0)
{
- error = clib_error_return_unix (0, "epoll_ctl[%d]", s->fd);
+ error = errno;
+ fprintf (stderr, "epoll_ctl[%d]", sock_fd);
+ perror (0);
goto done;
}
while (1)
{
int n;
+ static int sent_cmd = 0;
if (window_resized)
{
window_resized = 0;
- send_naws (s);
+ send_naws (sock_fd);
}
if ((n = epoll_wait (efd, &event, 1, -1)) < 0)
@@ -257,14 +348,15 @@ main (int argc, char *argv[])
if (errno == EINTR)
continue;
- error = clib_error_return_unix (0, "epoll_wait");
+ error = errno;
+ perror ("epoll_wait");
goto done;
}
if (n == 0)
continue;
- if (event.data.fd == STDIN_FILENO)
+ if (event.data.fd == STDIN_FILENO && cmd == 0)
{
int n;
char c[100];
@@ -275,31 +367,37 @@ main (int argc, char *argv[])
n = read (STDIN_FILENO, c, sizeof (c));
if (n > 0)
{
- memcpy (clib_socket_tx_add (s, n), c, n);
- error = clib_socket_tx (s);
+ int n_written = write (sock_fd, c, n);
+ if (n_written < n)
+ error = errno;
if (error)
goto done;
}
else if (n < 0)
- clib_warning ("read rv=%d", n);
- else /* EOF */
+ fprintf (stderr, "read rv=%d", n);
+ else /* EOF */
do_quit = 1;
}
- else if (event.data.fd == s->fd)
+ else if (event.data.fd == sock_fd)
{
- error = clib_socket_rx (s, 100);
+ unsigned char rx_buf[100];
+ memset (rx_buf, 0, sizeof (rx_buf));
+ int nread = recv (sock_fd, rx_buf, sizeof (rx_buf), 0);
+
+ if (nread < 0)
+ error = errno;
if (error)
break;
- if (clib_socket_rx_end_of_file (s))
+ if (nread == 0)
break;
- str = process_input (str, s, is_interactive, &sent_ttype);
+ int len = process_input (sock_fd, rx_buf, nread, is_interactive,
+ &sent_ttype);
- if (vec_len (str) > 0)
+ if (len > 0)
{
- int len = vec_len (str);
- u8 *p = str, *q = str;
+ unsigned char *p = rx_buf, *q = rx_buf;
while (len)
{
@@ -310,7 +408,8 @@ main (int argc, char *argv[])
n = write (STDOUT_FILENO, p, q - p);
if (n < 0)
{
- error = clib_error_return_unix (0, "write");
+ error = errno;
+ perror ("write");
goto done;
}
@@ -322,42 +421,50 @@ main (int argc, char *argv[])
len -= q - p;
p = q;
}
-
- vec_reset_length (str);
}
if (do_quit && do_quit < acked)
{
/* Ask the other end to close the connection */
- clib_socket_tx_add_formatted (s, "quit\n");
- clib_socket_tx (s);
+ char quit_str[] = "quit\n";
+ int n = write (sock_fd, quit_str, strlen (quit_str));
+ if (n < strlen (quit_str))
+ {
+ error = errno;
+ perror ("write quit");
+ }
do_quit = 0;
}
- if (cmd && sent_ttype)
+ if (cmd && sent_ttype && !sent_cmd)
{
/* We wait until after the TELNET TTYPE option has been sent.
* That is to make sure the session at the VPP end has switched
* to line-by-line mode, and thus avoid prompts and echoing.
* Note that it does also disable further TELNET option processing.
*/
- clib_socket_tx_add_formatted (s, "%s\n", cmd);
- clib_socket_tx (s);
- vec_free (cmd);
+ int n_written = write (sock_fd, cmd, strlen (cmd) + 1);
+ sent_cmd = 1;
+ if (n_written < strlen (cmd))
+ {
+ error = errno;
+ perror ("write command");
+ goto done;
+ }
do_quit = acked; /* quit after the next response */
}
}
else
{
- error = clib_error_return (0, "unknown fd");
+ error = errno;
+ perror ("unknown fd");
goto done;
}
}
- error = clib_socket_close (s);
+ close (sock_fd);
done:
- vec_free (cmd);
- vec_free (str);
+ free (cmd);
if (efd > -1)
close (efd);
@@ -366,14 +473,12 @@ done:
if (error)
{
- clib_error_report (error);
return 1;
}
return 0;
}
-/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vpp/conf/startup.conf b/src/vpp/conf/startup.conf
index d96a4307231..929106a4c8a 100644
--- a/src/vpp/conf/startup.conf
+++ b/src/vpp/conf/startup.conf
@@ -51,6 +51,8 @@ socksvr {
## special keyword 'default-hugepage' will use system default hugepage
## size
# main-heap-page-size 1G
+ ## Set the default huge page size.
+ # default-hugepage-size 1G
#}
cpu {
@@ -106,6 +108,12 @@ cpu {
# page-size default-hugepage
# }
+# dsa {
+ ## DSA work queue address
+ # dev wq0.0
+ # dev wq0.1
+# }
+
# dpdk {
## Change default settings for all interfaces
# dev default {
@@ -123,10 +131,6 @@ cpu {
# num-rx-desc 512
# num-tx-desc 512
- ## VLAN strip offload mode for interface
- ## Default is off
- # vlan-strip-offload on
-
## TCP Segment Offload
## Default is off
## To enable TSO, 'enable-tcp-udp-checksum' must be set
@@ -199,6 +203,8 @@ cpu {
# plugins {
## Adjusting the plugin path depending on where the VPP plugins are
# path /ws/vpp/build-root/install-vpp-native/vpp/lib/vpp_plugins
+ ## Add additional directory to the plugin path
+ # add-path /tmp/vpp_plugins
## Disable all plugins by default and then selectively enable specific plugins
# plugin default { disable }
diff --git a/src/vpp/mem/mem.c b/src/vpp/mem/mem.c
index d438c970447..9383da9bdea 100644
--- a/src/vpp/mem/mem.c
+++ b/src/vpp/mem/mem.c
@@ -69,7 +69,7 @@ realloc(void *p, size_t size)
if (!check_vpp_heap ())
return __libc_realloc (p, size);
- return clib_mem_realloc (p, size, clib_mem_size (p));
+ return clib_mem_realloc (p, size);
}
int
diff --git a/src/vpp/mem/mem.md b/src/vpp/mem/mem.md
deleted file mode 100644
index 84ab820e5e5..00000000000
--- a/src/vpp/mem/mem.md
+++ /dev/null
@@ -1,21 +0,0 @@
-# VPP mem preload {#mempreload_doc}
-
-Internal VPP memory allocations rely on VPP main-heap, however when using
-external libraries, esp. in plugins (eg. OpenSSL library used by the IKEv2
-plugin), those external libraries usually manages memory using the standard
-libc `malloc()`/`free()`/... calls. This, in turn, makes use of the default
-libc heap.
-
-VPP has no knowledge of this heap and tools such as memory traces cannot be
-used.
-
-In order to enable the use of standard VPP debugging tools, this library
-replaces standard libc memory management calls with version using VPP
-main-heap.
-
-To use it, you need to use the `LD_PRELOAD` mechanism, eg.
-```
-~# LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libvppmem_preload.so /usr/bin/vpp -c /etc/vpp/startup.conf
-```
-
-You can then use tools such as memory traces as usual.
diff --git a/src/vpp/mem/mem.rst b/src/vpp/mem/mem.rst
new file mode 100644
index 00000000000..82ae2ff35df
--- /dev/null
+++ b/src/vpp/mem/mem.rst
@@ -0,0 +1,25 @@
+.. _mempreload_doc:
+
+VPP mem preload
+===============
+
+Internal VPP memory allocations rely on VPP main-heap, however when
+using external libraries, esp. in plugins (e.g. OpenSSL library used by
+the IKEv2 plugin), those external libraries usually manages memory using
+the standard libc ``malloc()``/``free()``/… calls. This, in turn, makes
+use of the default libc heap.
+
+VPP has no knowledge of this heap and tools such as memory traces cannot
+be used.
+
+In order to enable the use of standard VPP debugging tools, this library
+replaces standard libc memory management calls with version using VPP
+main-heap.
+
+To use it, you need to use the ``LD_PRELOAD`` mechanism, e.g.
+
+::
+
+ ~# LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libvppmem_preload.so /usr/bin/vpp -c /etc/vpp/startup.conf
+
+You can then use tools such as memory traces as usual.
diff --git a/src/vpp/stats/stat_segment.c b/src/vpp/stats/stat_segment.c
deleted file mode 100644
index 1ca9f5bab0d..00000000000
--- a/src/vpp/stats/stat_segment.c
+++ /dev/null
@@ -1,1115 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vppinfra/mem.h>
-#include <vlib/vlib.h>
-#include <vlib/unix/unix.h>
-#include "stat_segment.h"
-#include <vnet/vnet.h>
-#include <vnet/devices/devices.h> /* vnet_get_aggregate_rx_packets */
-#include <vpp-api/client/stat_client.h>
-
-stat_segment_main_t stat_segment_main;
-
-/*
- * Used only by VPP writers
- */
-void
-vlib_stat_segment_lock (void)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- clib_spinlock_lock (sm->stat_segment_lockp);
- sm->shared_header->in_progress = 1;
-}
-
-void
-vlib_stat_segment_unlock (void)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- sm->shared_header->epoch++;
- sm->shared_header->in_progress = 0;
- clib_spinlock_unlock (sm->stat_segment_lockp);
-}
-
-/*
- * Change heap to the stats shared memory segment
- */
-void *
-vlib_stats_push_heap (void *old)
-{
- stat_segment_main_t *sm = &stat_segment_main;
-
- sm->last = old;
- ASSERT (sm && sm->shared_header);
- return clib_mem_set_heap (sm->heap);
-}
-
-static u32
-lookup_hash_index (u8 * name)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- u32 index = STAT_SEGMENT_INDEX_INVALID;
- hash_pair_t *hp;
-
- /* Must be called in the context of the main heap */
- ASSERT (clib_mem_get_heap () != sm->heap);
-
- hp = hash_get_pair (sm->directory_vector_by_name, name);
- if (hp)
- {
- index = hp->value[0];
- }
-
- return index;
-}
-
-static void
-create_hash_index (u8 * name, u32 index)
-{
- stat_segment_main_t *sm = &stat_segment_main;
-
- /* Must be called in the context of the main heap */
- ASSERT (clib_mem_get_heap () != sm->heap);
-
- hash_set (sm->directory_vector_by_name, format (0, "%s%c", name, 0), index);
-}
-
-static u32
-vlib_stats_get_next_vector_index ()
-{
- stat_segment_main_t *sm = &stat_segment_main;
- u32 next_vector_index = vec_len (sm->directory_vector);
-
- ssize_t i;
- vec_foreach_index_backwards (i, sm->directory_vector)
- {
- if (sm->directory_vector[i].type == STAT_DIR_TYPE_EMPTY)
- {
- next_vector_index = i;
- break;
- }
- }
-
- return next_vector_index;
-}
-
-static u32
-vlib_stats_create_counter (stat_segment_directory_entry_t * e, void *oldheap)
-{
- stat_segment_main_t *sm = &stat_segment_main;
-
- ASSERT (clib_mem_get_heap () == sm->heap);
-
- u32 index = vlib_stats_get_next_vector_index ();
-
- clib_mem_set_heap (oldheap);
- create_hash_index ((u8 *) e->name, index);
- clib_mem_set_heap (sm->heap);
-
- vec_validate (sm->directory_vector, index);
- sm->directory_vector[index] = *e;
-
- return index;
-}
-
-static void
-vlib_stats_delete_counter (u32 index, void *oldheap)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- stat_segment_directory_entry_t *e;
-
- ASSERT (clib_mem_get_heap () == sm->heap);
-
- if (index > vec_len (sm->directory_vector))
- return;
-
- e = &sm->directory_vector[index];
-
- clib_mem_set_heap (oldheap);
- hash_unset (sm->directory_vector_by_name, &e->name);
- clib_mem_set_heap (sm->heap);
-
- memset (e, 0, sizeof (*e));
- e->type = STAT_DIR_TYPE_EMPTY;
-}
-
-/*
- * Called from main heap
- */
-void
-vlib_stats_delete_cm (void *cm_arg)
-{
- vlib_simple_counter_main_t *cm = (vlib_simple_counter_main_t *) cm_arg;
- stat_segment_main_t *sm = &stat_segment_main;
- stat_segment_directory_entry_t *e;
-
- /* Not all counters have names / hash-table entries */
- if (!cm->name && !cm->stat_segment_name)
- {
- return;
- }
- vlib_stat_segment_lock ();
-
- /* Lookup hash-table is on the main heap */
- char *stat_segment_name =
- cm->stat_segment_name ? cm->stat_segment_name : cm->name;
- u32 index = lookup_hash_index ((u8 *) stat_segment_name);
-
- e = &sm->directory_vector[index];
- hash_unset (sm->directory_vector_by_name, &e->name);
-
- void *oldheap = clib_mem_set_heap (sm->heap); /* Enter stats segment */
- clib_mem_set_heap (oldheap); /* Exit stats segment */
-
- memset (e, 0, sizeof (*e));
- e->type = STAT_DIR_TYPE_EMPTY;
-
- vlib_stat_segment_unlock ();
-}
-
-void
-vlib_stats_pop_heap (void *cm_arg, void *oldheap, u32 cindex,
- stat_directory_type_t type)
-{
- vlib_simple_counter_main_t *cm = (vlib_simple_counter_main_t *) cm_arg;
- stat_segment_main_t *sm = &stat_segment_main;
- stat_segment_shared_header_t *shared_header = sm->shared_header;
- char *stat_segment_name;
- stat_segment_directory_entry_t e = { 0 };
-
- /* Not all counters have names / hash-table entries */
- if (!cm->name && !cm->stat_segment_name)
- {
- clib_mem_set_heap (oldheap);
- return;
- }
-
- ASSERT (shared_header);
-
- vlib_stat_segment_lock ();
-
- /* Lookup hash-table is on the main heap */
- stat_segment_name =
- cm->stat_segment_name ? cm->stat_segment_name : cm->name;
-
- clib_mem_set_heap (oldheap); /* Exit stats segment */
- u32 vector_index = lookup_hash_index ((u8 *) stat_segment_name);
- /* Back to stats segment */
- clib_mem_set_heap (sm->heap); /* Re-enter stat segment */
-
-
- /* Update the vector */
- if (vector_index == STAT_SEGMENT_INDEX_INVALID)
- { /* New */
- strncpy (e.name, stat_segment_name, 128 - 1);
- e.type = type;
- vector_index = vlib_stats_create_counter (&e, oldheap);
- }
-
- stat_segment_directory_entry_t *ep = &sm->directory_vector[vector_index];
- ep->data = cm->counters;
-
- /* Reset the client hash table pointer, since it WILL change! */
- shared_header->directory_vector = sm->directory_vector;
-
- vlib_stat_segment_unlock ();
- clib_mem_set_heap (oldheap);
-}
-
-u8 *
-format_vlib_stats_symlink (u8 *s, va_list *args)
-{
- char *input = va_arg (*args, char *);
- char *modified_input = vec_dup (input);
- int i;
- u8 *result;
-
- for (i = 0; i < strlen (modified_input); i++)
- if (modified_input[i] == '/')
- modified_input[i] = '_';
-
- result = format (s, "%s", modified_input);
- vec_free (modified_input);
- return result;
-}
-
-void
-vlib_stats_register_symlink (void *oldheap, u8 *name, u32 index1, u32 index2,
- u8 lock)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- stat_segment_shared_header_t *shared_header = sm->shared_header;
- stat_segment_directory_entry_t e;
-
- ASSERT (shared_header);
-
- if (lock)
- vlib_stat_segment_lock ();
- clib_mem_set_heap (oldheap); /* Exit stats segment */
- u32 vector_index = lookup_hash_index (name);
- /* Back to stats segment */
- clib_mem_set_heap (sm->heap); /* Re-enter stat segment */
-
- if (vector_index == STAT_SEGMENT_INDEX_INVALID)
- {
- memcpy (e.name, name, vec_len (name));
- e.name[vec_len (name)] = '\0';
- e.type = STAT_DIR_TYPE_SYMLINK;
- e.index1 = index1;
- e.index2 = index2;
- vector_index = vlib_stats_create_counter (&e, oldheap);
-
- /* Warn clients to refresh any pointers they might be holding */
- shared_header->directory_vector = sm->directory_vector;
- }
-
- if (lock)
- vlib_stat_segment_unlock ();
-}
-
-void
-vlib_stats_rename_symlink (void *oldheap, u64 index, u8 *new_name)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- stat_segment_directory_entry_t *e;
-
- ASSERT (clib_mem_get_heap () == sm->heap);
-
- if (index > vec_len (sm->directory_vector))
- return;
-
- e = &sm->directory_vector[index];
-
- clib_mem_set_heap (oldheap);
- hash_unset (sm->directory_vector_by_name, &e->name);
- clib_mem_set_heap (sm->heap);
-
- strncpy (e->name, (char *) new_name, 128 - 1);
- clib_mem_set_heap (oldheap);
- hash_set (sm->directory_vector_by_name, &e->name, index);
- clib_mem_set_heap (sm->heap);
-}
-
-void
-vlib_stats_register_error_index (void *oldheap, u8 * name, u64 * em_vec,
- u64 index)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- stat_segment_shared_header_t *shared_header = sm->shared_header;
- stat_segment_directory_entry_t e;
-
- ASSERT (shared_header);
-
- vlib_stat_segment_lock ();
- clib_mem_set_heap (oldheap); /* Exit stats segment */
- u32 vector_index = lookup_hash_index (name);
- /* Back to stats segment */
- clib_mem_set_heap (sm->heap); /* Re-enter stat segment */
-
- if (vector_index == STAT_SEGMENT_INDEX_INVALID)
- {
- memcpy (e.name, name, vec_len (name));
- e.name[vec_len (name)] = '\0';
- e.type = STAT_DIR_TYPE_ERROR_INDEX;
- e.index = index;
- vector_index = vlib_stats_create_counter (&e, oldheap);
-
- /* Warn clients to refresh any pointers they might be holding */
- shared_header->directory_vector = sm->directory_vector;
- }
-
- vlib_stat_segment_unlock ();
-}
-
-/*
- * Creates a two dimensional vector with the maximum valid index specified in
- * both dimensions as arguments.
- * Must be called on the stat segment heap.
- */
-static void
-stat_validate_counter_vector2 (stat_segment_directory_entry_t *ep, u32 max1,
- u32 max2)
-{
- counter_t **counters = ep->data;
- int i;
- vec_validate_aligned (counters, max1, CLIB_CACHE_LINE_BYTES);
- for (i = 0; i <= max1; i++)
- vec_validate_aligned (counters[i], max2, CLIB_CACHE_LINE_BYTES);
-
- ep->data = counters;
-}
-
-static void
-stat_validate_counter_vector (stat_segment_directory_entry_t *ep, u32 max)
-{
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- ASSERT (tm->n_vlib_mains > 0);
- stat_validate_counter_vector2 (ep, tm->n_vlib_mains, max);
-}
-
-void
-vlib_stats_pop_heap2 (u64 * error_vector, u32 thread_index, void *oldheap,
- int lock)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- stat_segment_shared_header_t *shared_header = sm->shared_header;
-
- ASSERT (shared_header);
-
- if (lock)
- vlib_stat_segment_lock ();
-
- /* Reset the client hash table pointer, since it WILL change! */
- vec_validate (sm->error_vector, thread_index);
- sm->error_vector[thread_index] = error_vector;
-
- shared_header->error_vector = sm->error_vector;
- shared_header->directory_vector = sm->directory_vector;
-
- if (lock)
- vlib_stat_segment_unlock ();
- clib_mem_set_heap (oldheap);
-}
-
-/*
- * Create a new entry and add name to directory hash.
- * Returns ~0 if name exists.
- * Called from main heap.
- * The name is either C-string or nul-terminated vector
- */
-u32
-stat_segment_new_entry (u8 *name, stat_directory_type_t t)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- stat_segment_shared_header_t *shared_header = sm->shared_header;
- void *oldheap;
- stat_segment_directory_entry_t e;
-
- ASSERT (shared_header);
-
- u32 vector_index = lookup_hash_index (name);
- if (vector_index != STAT_SEGMENT_INDEX_INVALID) /* Already registered */
- return ~0;
-
- memset (&e, 0, sizeof (e));
- e.type = t;
- strcpy_s (e.name, sizeof (e.name), (char *) name);
-
- oldheap = vlib_stats_push_heap (NULL);
- vlib_stat_segment_lock ();
- vector_index = vlib_stats_create_counter (&e, oldheap);
-
- shared_header->directory_vector = sm->directory_vector;
-
- vlib_stat_segment_unlock ();
- clib_mem_set_heap (oldheap);
-
- return vector_index;
-}
-
-clib_error_t *
-vlib_map_stat_segment_init (void)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- stat_segment_shared_header_t *shared_header;
- void *oldheap;
- uword memory_size, sys_page_sz;
- int mfd;
- char *mem_name = "stat segment";
- void *heap, *memaddr;
-
- memory_size = sm->memory_size;
- if (memory_size == 0)
- memory_size = STAT_SEGMENT_DEFAULT_SIZE;
-
- if (sm->log2_page_sz == CLIB_MEM_PAGE_SZ_UNKNOWN)
- sm->log2_page_sz = CLIB_MEM_PAGE_SZ_DEFAULT;
-
- mfd = clib_mem_vm_create_fd (sm->log2_page_sz, mem_name);
-
- if (mfd == -1)
- return clib_error_return (0, "stat segment memory fd failure: %U",
- format_clib_error, clib_mem_get_last_error ());
- /* Set size */
- if ((ftruncate (mfd, memory_size)) == -1)
- {
- close (mfd);
- return clib_error_return (0, "stat segment ftruncate failure");
- }
-
- memaddr = clib_mem_vm_map_shared (0, memory_size, mfd, 0, mem_name);
-
- if (memaddr == CLIB_MEM_VM_MAP_FAILED)
- return clib_error_return (0, "stat segment mmap failure");
-
- sys_page_sz = clib_mem_get_page_size ();
-
- heap =
- clib_mem_create_heap (((u8 *) memaddr) + sys_page_sz,
- memory_size - sys_page_sz, 1 /* locked */, mem_name);
- sm->heap = heap;
- sm->memfd = mfd;
-
- sm->directory_vector_by_name = hash_create_string (0, sizeof (uword));
- sm->shared_header = shared_header = memaddr;
-
- shared_header->version = STAT_SEGMENT_VERSION;
- shared_header->base = memaddr;
-
- sm->stat_segment_lockp = clib_mem_alloc (sizeof (clib_spinlock_t));
- clib_spinlock_init (sm->stat_segment_lockp);
-
- oldheap = clib_mem_set_heap (sm->heap);
-
- /* Set up the name to counter-vector hash table */
- sm->directory_vector = 0;
-
- shared_header->epoch = 1;
-
- /* Scalar stats and node counters */
- vec_validate (sm->directory_vector, STAT_COUNTERS - 1);
-#define _(E,t,n,p) \
- strcpy(sm->directory_vector[STAT_COUNTER_##E].name, #p "/" #n); \
- sm->directory_vector[STAT_COUNTER_##E].type = STAT_DIR_TYPE_##t;
- foreach_stat_segment_counter_name
-#undef _
- /* Save the vector in the shared segment, for clients */
- shared_header->directory_vector = sm->directory_vector;
-
- clib_mem_set_heap (oldheap);
-
- vlib_stats_register_mem_heap (heap);
-
- return 0;
-}
-
-static int
-name_sort_cmp (void *a1, void *a2)
-{
- stat_segment_directory_entry_t *n1 = a1;
- stat_segment_directory_entry_t *n2 = a2;
-
- return strcmp ((char *) n1->name, (char *) n2->name);
-}
-
-static u8 *
-format_stat_dir_entry (u8 * s, va_list * args)
-{
- stat_segment_directory_entry_t *ep =
- va_arg (*args, stat_segment_directory_entry_t *);
- char *type_name;
- char *format_string;
-
- format_string = "%-74s %-10s %10lld";
-
- switch (ep->type)
- {
- case STAT_DIR_TYPE_SCALAR_INDEX:
- type_name = "ScalarPtr";
- break;
-
- case STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE:
- case STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED:
- type_name = "CMainPtr";
- break;
-
- case STAT_DIR_TYPE_ERROR_INDEX:
- type_name = "ErrIndex";
- break;
-
- case STAT_DIR_TYPE_NAME_VECTOR:
- type_name = "NameVector";
- break;
-
- case STAT_DIR_TYPE_EMPTY:
- type_name = "empty";
- break;
-
- case STAT_DIR_TYPE_SYMLINK:
- type_name = "Symlink";
- break;
-
- default:
- type_name = "illegal!";
- break;
- }
-
- return format (s, format_string, ep->name, type_name, 0);
-}
-
-static clib_error_t *
-show_stat_segment_command_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- stat_segment_directory_entry_t *show_data;
- int i;
-
- int verbose = 0;
-
- if (unformat (input, "verbose"))
- verbose = 1;
-
- /* Lock even as reader, as this command doesn't handle epoch changes */
- vlib_stat_segment_lock ();
- show_data = vec_dup (sm->directory_vector);
- vlib_stat_segment_unlock ();
-
- vec_sort_with_function (show_data, name_sort_cmp);
-
- vlib_cli_output (vm, "%-74s %10s %10s", "Name", "Type", "Value");
-
- for (i = 0; i < vec_len (show_data); i++)
- {
- stat_segment_directory_entry_t *ep = vec_elt_at_index (show_data, i);
-
- if (ep->type == STAT_DIR_TYPE_EMPTY)
- continue;
-
- vlib_cli_output (vm, "%-100U", format_stat_dir_entry,
- vec_elt_at_index (show_data, i));
- }
-
- if (verbose)
- {
- ASSERT (sm->heap);
- vlib_cli_output (vm, "%U", format_clib_mem_heap, sm->heap,
- 0 /* verbose */ );
- }
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (show_stat_segment_command, static) =
-{
- .path = "show statistics segment",
- .short_help = "show statistics segment [verbose]",
- .function = show_stat_segment_command_fn,
-};
-/* *INDENT-ON* */
-
-/*
- * Node performance counters:
- * total_calls [threads][node-index]
- * total_vectors
- * total_calls
- * total suspends
- */
-
-static inline void
-update_node_counters (stat_segment_main_t * sm)
-{
- vlib_main_t **stat_vms = 0;
- vlib_node_t ***node_dups = 0;
- int i, j;
- static u32 no_max_nodes = 0;
-
- vlib_node_get_nodes (0 /* vm, for barrier sync */,
- (u32) ~0 /* all threads */, 1 /* include stats */,
- 0 /* barrier sync */, &node_dups, &stat_vms);
-
- u32 l = vec_len (node_dups[0]);
- u8 *symlink_name = 0;
-
- /*
- * Extend performance nodes if necessary
- */
- if (l > no_max_nodes)
- {
- void *oldheap = clib_mem_set_heap (sm->heap);
- vlib_stat_segment_lock ();
-
- stat_validate_counter_vector (
- &sm->directory_vector[STAT_COUNTER_NODE_CLOCKS], l - 1);
- stat_validate_counter_vector (
- &sm->directory_vector[STAT_COUNTER_NODE_VECTORS], l - 1);
- stat_validate_counter_vector (
- &sm->directory_vector[STAT_COUNTER_NODE_CALLS], l - 1);
- stat_validate_counter_vector (
- &sm->directory_vector[STAT_COUNTER_NODE_SUSPENDS], l - 1);
-
- vec_validate (sm->nodes, l - 1);
- stat_segment_directory_entry_t *ep;
- ep = &sm->directory_vector[STAT_COUNTER_NODE_NAMES];
- ep->data = sm->nodes;
-
- /* Update names dictionary */
- vlib_node_t **nodes = node_dups[0];
- int i;
- for (i = 0; i < vec_len (nodes); i++)
- {
- vlib_node_t *n = nodes[i];
- u8 *s = 0;
- s = format (s, "%v%c", n->name, 0);
- if (sm->nodes[n->index])
- vec_free (sm->nodes[n->index]);
- sm->nodes[n->index] = s;
-
-#define _(E, t, name, p) \
- vec_reset_length (symlink_name); \
- symlink_name = format (symlink_name, "/nodes/%U/" #name "%c", \
- format_vlib_stats_symlink, s, 0); \
- vlib_stats_register_symlink (oldheap, symlink_name, STAT_COUNTER_##E, \
- n->index, 0 /* don't lock */);
- foreach_stat_segment_node_counter_name
-#undef _
- }
- vec_free (symlink_name);
- vlib_stat_segment_unlock ();
- clib_mem_set_heap (oldheap);
- no_max_nodes = l;
- }
-
- for (j = 0; j < vec_len (node_dups); j++)
- {
- vlib_node_t **nodes = node_dups[j];
-
- for (i = 0; i < vec_len (nodes); i++)
- {
- counter_t **counters;
- counter_t *c;
- vlib_node_t *n = nodes[i];
-
- if (j == 0)
- {
- if (strncmp ((char *) sm->nodes[n->index], (char *) n->name,
- strlen ((char *) sm->nodes[n->index])))
- {
- u8 *s = 0;
- u32 vector_index;
- u8 *symlink_new_name = 0;
- void *oldheap = clib_mem_set_heap (sm->heap);
- vlib_stat_segment_lock ();
- s = format (s, "%v%c", n->name, 0);
-#define _(E, t, name, p) \
- vec_reset_length (symlink_name); \
- symlink_name = format (symlink_name, "/nodes/%U/" #name "%c", \
- format_vlib_stats_symlink, sm->nodes[n->index], 0); \
- clib_mem_set_heap (oldheap); /* Exit stats segment */ \
- vector_index = lookup_hash_index ((u8 *) symlink_name); \
- clib_mem_set_heap (sm->heap); /* Re-enter stat segment */ \
- vec_reset_length (symlink_new_name); \
- symlink_new_name = format (symlink_new_name, "/nodes/%U/" #name "%c", \
- format_vlib_stats_symlink, s, 0); \
- vlib_stats_rename_symlink (oldheap, vector_index, symlink_new_name);
- foreach_stat_segment_node_counter_name
-#undef _
- vec_free (symlink_name);
- vec_free (symlink_new_name);
- vec_free (sm->nodes[n->index]);
- sm->nodes[n->index] = s;
- vlib_stat_segment_unlock ();
- clib_mem_set_heap (oldheap);
- }
- }
-
- counters = sm->directory_vector[STAT_COUNTER_NODE_CLOCKS].data;
- c = counters[j];
- c[n->index] = n->stats_total.clocks - n->stats_last_clear.clocks;
-
- counters = sm->directory_vector[STAT_COUNTER_NODE_VECTORS].data;
- c = counters[j];
- c[n->index] = n->stats_total.vectors - n->stats_last_clear.vectors;
-
- counters = sm->directory_vector[STAT_COUNTER_NODE_CALLS].data;
- c = counters[j];
- c[n->index] = n->stats_total.calls - n->stats_last_clear.calls;
-
- counters = sm->directory_vector[STAT_COUNTER_NODE_SUSPENDS].data;
- c = counters[j];
- c[n->index] = n->stats_total.suspends - n->stats_last_clear.suspends;
- }
- vec_free (node_dups[j]);
- }
- vec_free (node_dups);
- vec_free (stat_vms);
-}
-
-static void
-do_stat_segment_updates (vlib_main_t *vm, stat_segment_main_t *sm)
-{
- u64 input_packets;
- f64 dt, now;
- static int num_worker_threads_set;
-
- /*
- * Set once at the beginning of time.
- * Can't do this from the init routine, which happens before
- * start_workers sets up vlib_mains...
- */
- if (PREDICT_FALSE (num_worker_threads_set == 0))
- {
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- ASSERT (tm->n_vlib_mains > 0);
- stat_provider_register_vector_rate (tm->n_vlib_mains - 1);
- sm->directory_vector[STAT_COUNTER_NUM_WORKER_THREADS].value =
- tm->n_vlib_mains - 1;
- num_worker_threads_set = 1;
- }
-
- /*
- * Compute the aggregate input rate
- */
- now = vlib_time_now (vm);
- dt = now - sm->directory_vector[STAT_COUNTER_LAST_UPDATE].value;
- input_packets = vnet_get_aggregate_rx_packets ();
- sm->directory_vector[STAT_COUNTER_INPUT_RATE].value =
- (f64) (input_packets - sm->last_input_packets) / dt;
- sm->directory_vector[STAT_COUNTER_LAST_UPDATE].value = now;
- sm->last_input_packets = input_packets;
- sm->directory_vector[STAT_COUNTER_LAST_STATS_CLEAR].value =
- vm->node_main.time_last_runtime_stats_clear;
-
- if (sm->node_counters_enabled)
- update_node_counters (sm);
-
- /* *INDENT-OFF* */
- stat_segment_gauges_pool_t *g;
- pool_foreach (g, sm->gauges)
- {
- g->fn(&sm->directory_vector[g->directory_index], g->caller_index);
- }
- /* *INDENT-ON* */
-
- /* Heartbeat, so clients detect we're still here */
- sm->directory_vector[STAT_COUNTER_HEARTBEAT].value++;
-}
-
-/*
- * Accept connection on the socket and exchange the fd for the shared
- * memory segment.
- */
-static clib_error_t *
-stats_socket_accept_ready (clib_file_t * uf)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- clib_error_t *err;
- clib_socket_t client = { 0 };
-
- err = clib_socket_accept (sm->socket, &client);
- if (err)
- {
- clib_error_report (err);
- return err;
- }
-
- /* Send the fd across and close */
- err = clib_socket_sendmsg (&client, 0, 0, &sm->memfd, 1);
- if (err)
- clib_error_report (err);
- clib_socket_close (&client);
-
- return 0;
-}
-
-static clib_error_t *
-stats_segment_socket_init (void)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- clib_error_t *error;
- clib_socket_t *s = clib_mem_alloc (sizeof (clib_socket_t));
-
- memset (s, 0, sizeof (clib_socket_t));
- s->config = (char *) sm->socket_name;
- s->flags = CLIB_SOCKET_F_IS_SERVER | CLIB_SOCKET_F_SEQPACKET |
- CLIB_SOCKET_F_ALLOW_GROUP_WRITE | CLIB_SOCKET_F_PASSCRED;
-
- if ((error = clib_socket_init (s)))
- return error;
-
- clib_file_t template = { 0 };
- template.read_function = stats_socket_accept_ready;
- template.file_descriptor = s->fd;
- template.description = format (0, "stats segment listener %s", s->config);
- clib_file_add (&file_main, &template);
-
- sm->socket = s;
-
- return 0;
-}
-
-static clib_error_t *
-stats_segment_socket_exit (vlib_main_t * vm)
-{
- /*
- * cleanup the listener socket on exit.
- */
- stat_segment_main_t *sm = &stat_segment_main;
- unlink ((char *) sm->socket_name);
- return 0;
-}
-
-VLIB_MAIN_LOOP_EXIT_FUNCTION (stats_segment_socket_exit);
-
-/* Overrides weak reference in vlib:node_cli.c */
-f64
-vlib_get_stat_segment_update_rate (void)
-{
- return stat_segment_main.update_interval;
-}
-
-static uword
-stat_segment_collector_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
- vlib_frame_t * f)
-{
- stat_segment_main_t *sm = &stat_segment_main;
-
- while (1)
- {
- do_stat_segment_updates (vm, sm);
- vlib_process_suspend (vm, sm->update_interval);
- }
- return 0; /* or not */
-}
-
-/*
- * Add a data provider (via callback) for a given stats entry.
- * TODO: Add support for per-provider interval.
- */
-void
-stat_segment_poll_add (u32 vector_index, stat_segment_update_fn update_fn,
- u32 caller_index, u32 interval)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- stat_segment_gauges_pool_t *gauge;
-
- pool_get (sm->gauges, gauge);
- gauge->fn = update_fn;
- gauge->caller_index = caller_index;
- gauge->directory_index = vector_index;
-
- return;
-}
-
-/*
- * Create an scalar entry with a data provider.
- * Deprecated, replace with stat_segment_new_entry + stat_segment_pool_add
- */
-clib_error_t *
-stat_segment_register_gauge (u8 * name, stat_segment_update_fn update_fn,
- u32 caller_index)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- stat_segment_gauges_pool_t *gauge;
-
- u32 vector_index = stat_segment_new_entry (name, STAT_DIR_TYPE_SCALAR_INDEX);
- if (vector_index == ~0) /* Already registered */
- return clib_error_return (0, "%v is already registered", name);
-
- pool_get (sm->gauges, gauge);
- gauge->fn = update_fn;
- gauge->caller_index = caller_index;
- gauge->directory_index = vector_index;
-
- return NULL;
-}
-
-clib_error_t *
-stat_segment_register_state_counter (u8 * name, u32 * index)
-{
- ASSERT (vlib_get_thread_index () == 0);
-
- u32 vector_index = stat_segment_new_entry (name, STAT_DIR_TYPE_SCALAR_INDEX);
- if (vector_index == ~0) /* Already registered */
- return clib_error_return (0, "%v is already registered", name);
- *index = vector_index;
- return 0;
-}
-
-clib_error_t *
-stat_segment_deregister_state_counter (u32 index)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- stat_segment_shared_header_t *shared_header = sm->shared_header;
- stat_segment_directory_entry_t *e;
- void *oldheap;
-
- ASSERT (shared_header);
-
- if (index > vec_len (sm->directory_vector))
- return clib_error_return (0, "%u index does not exist", index);
-
- e = &sm->directory_vector[index];
- if (e->type != STAT_DIR_TYPE_SCALAR_INDEX)
- return clib_error_return (0, "%u index cannot be deleted", index);
-
- oldheap = vlib_stats_push_heap (NULL);
- vlib_stat_segment_lock ();
-
- vlib_stats_delete_counter (index, oldheap);
-
- vlib_stat_segment_unlock ();
- clib_mem_set_heap (oldheap);
-
- return 0;
-}
-
-void
-stat_segment_set_state_counter (u32 index, u64 value)
-{
- stat_segment_main_t *sm = &stat_segment_main;
-
- ASSERT (index < vec_len (sm->directory_vector));
- sm->directory_vector[index].index = value;
-}
-
-static clib_error_t *
-statseg_config (vlib_main_t * vm, unformat_input_t * input)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- sm->update_interval = 10.0;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "socket-name %s", &sm->socket_name))
- ;
- /* DEPRECATE: default (does nothing) */
- else if (unformat (input, "default"))
- ;
- else if (unformat (input, "size %U",
- unformat_memory_size, &sm->memory_size))
- ;
- else if (unformat (input, "page-size %U",
- unformat_log2_page_size, &sm->log2_page_sz))
- ;
- else if (unformat (input, "per-node-counters on"))
- sm->node_counters_enabled = 1;
- else if (unformat (input, "per-node-counters off"))
- sm->node_counters_enabled = 0;
- else if (unformat (input, "update-interval %f", &sm->update_interval))
- ;
- else
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
-
- /*
- * NULL-terminate socket name string
- * clib_socket_init()->socket_config() use C str*
- */
- if (vec_len (sm->socket_name))
- vec_terminate_c_string (sm->socket_name);
-
- return 0;
-}
-
-VLIB_EARLY_CONFIG_FUNCTION (statseg_config, "statseg");
-
-static clib_error_t *
-statseg_sw_interface_add_del (vnet_main_t * vnm, u32 sw_if_index, u32 is_add)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- vnet_sw_interface_t *si = vnet_get_sw_interface (vnm, sw_if_index);
- vnet_sw_interface_t *si_sup =
- vnet_get_sup_sw_interface (vnm, si->sw_if_index);
- vnet_hw_interface_t *hi_sup;
- u8 *s = 0;
- u8 *symlink_name = 0;
- u32 vector_index;
-
- void *oldheap = vlib_stats_push_heap (sm->interfaces);
- vlib_stat_segment_lock ();
-
- vec_validate (sm->interfaces, sw_if_index);
-
- ASSERT (si_sup->type == VNET_SW_INTERFACE_TYPE_HARDWARE);
- hi_sup = vnet_get_hw_interface (vnm, si_sup->hw_if_index);
-
- s = format (s, "%v", hi_sup->name);
- if (si->type != VNET_SW_INTERFACE_TYPE_HARDWARE)
- s = format (s, ".%d", si->sub.id);
- s = format (s, "%c", 0);
-
- if (is_add)
- {
- sm->interfaces[sw_if_index] = s;
-#define _(E, n, p) \
- clib_mem_set_heap (oldheap); /* Exit stats segment */ \
- vector_index = lookup_hash_index ((u8 *) "/" #p "/" #n); \
- clib_mem_set_heap (sm->heap); /* Re-enter stat segment */ \
- vec_reset_length (symlink_name); \
- symlink_name = format (symlink_name, "/interfaces/%U/" #n "%c", \
- format_vlib_stats_symlink, s, 0); \
- vlib_stats_register_symlink (oldheap, symlink_name, vector_index, \
- sw_if_index, 0 /* don't lock */);
- foreach_simple_interface_counter_name
- foreach_combined_interface_counter_name
-#undef _
-
- vec_free (symlink_name);
- }
- else
- {
- vec_free (sm->interfaces[sw_if_index]);
- sm->interfaces[sw_if_index] = 0;
-#define _(E, n, p) \
- vec_reset_length (symlink_name); \
- symlink_name = format (symlink_name, "/interfaces/%U/" #n "%c", \
- format_vlib_stats_symlink, s, 0); \
- clib_mem_set_heap (oldheap); /* Exit stats segment */ \
- vector_index = lookup_hash_index ((u8 *) symlink_name); \
- clib_mem_set_heap (sm->heap); /* Re-enter stat segment */ \
- vlib_stats_delete_counter (vector_index, oldheap);
- foreach_simple_interface_counter_name
- foreach_combined_interface_counter_name
-#undef _
-
- vec_free (symlink_name);
- }
-
- stat_segment_directory_entry_t *ep;
- ep = &sm->directory_vector[STAT_COUNTER_INTERFACE_NAMES];
- ep->data = sm->interfaces;
-
- vlib_stat_segment_unlock ();
- clib_mem_set_heap (oldheap);
-
- return 0;
-}
-
-VNET_SW_INTERFACE_ADD_DEL_FUNCTION (statseg_sw_interface_add_del);
-
-VLIB_REGISTER_NODE (stat_segment_collector, static) =
-{
-.function = stat_segment_collector_process,
-.name = "statseg-collector-process",
-.type = VLIB_NODE_TYPE_PROCESS,
-};
-
-static clib_error_t *
-statseg_init (vlib_main_t *vm)
-{
- stat_segment_main_t *sm = &stat_segment_main;
-
- /* set default socket file name when statseg config stanza is empty. */
- if (!vec_len (sm->socket_name))
- sm->socket_name = format (0, "%s/%s%c", vlib_unix_get_runtime_dir (),
- STAT_SEGMENT_SOCKET_FILENAME, 0);
- return stats_segment_socket_init ();
-}
-
-VLIB_INIT_FUNCTION (statseg_init) = {
- .runs_after = VLIB_INITS ("unix_input_init"),
-};
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vpp/stats/stat_segment.h b/src/vpp/stats/stat_segment.h
deleted file mode 100644
index f5862a684e4..00000000000
--- a/src/vpp/stats/stat_segment.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef included_stat_segment_h
-#define included_stat_segment_h
-
-#include <vlib/vlib.h>
-#include <vppinfra/socket.h>
-#include <vpp/stats/stat_segment_shared.h>
-
-typedef enum
-{
- STAT_COUNTER_NUM_WORKER_THREADS = 0,
- STAT_COUNTER_INPUT_RATE,
- STAT_COUNTER_LAST_UPDATE,
- STAT_COUNTER_LAST_STATS_CLEAR,
- STAT_COUNTER_HEARTBEAT,
- STAT_COUNTER_NODE_CLOCKS,
- STAT_COUNTER_NODE_VECTORS,
- STAT_COUNTER_NODE_CALLS,
- STAT_COUNTER_NODE_SUSPENDS,
- STAT_COUNTER_INTERFACE_NAMES,
- STAT_COUNTER_NODE_NAMES,
- STAT_COUNTERS
-} stat_segment_counter_t;
-
-/* clang-format off */
-#define foreach_stat_segment_node_counter_name \
- _ (NODE_CLOCKS, COUNTER_VECTOR_SIMPLE, clocks, /sys/node) \
- _ (NODE_VECTORS, COUNTER_VECTOR_SIMPLE, vectors, /sys/node) \
- _ (NODE_CALLS, COUNTER_VECTOR_SIMPLE, calls, /sys/node) \
- _ (NODE_SUSPENDS, COUNTER_VECTOR_SIMPLE, suspends, /sys/node)
-
-#define foreach_stat_segment_counter_name \
- _ (NUM_WORKER_THREADS, SCALAR_INDEX, num_worker_threads, /sys) \
- _ (INPUT_RATE, SCALAR_INDEX, input_rate, /sys) \
- _ (LAST_UPDATE, SCALAR_INDEX, last_update, /sys) \
- _ (LAST_STATS_CLEAR, SCALAR_INDEX, last_stats_clear, /sys) \
- _ (HEARTBEAT, SCALAR_INDEX, heartbeat, /sys) \
- _ (INTERFACE_NAMES, NAME_VECTOR, names, /if) \
- _ (NODE_NAMES, NAME_VECTOR, names, /sys/node) \
- foreach_stat_segment_node_counter_name
-/* clang-format on */
-
-/* Default stat segment 32m */
-#define STAT_SEGMENT_DEFAULT_SIZE (32<<20)
-
-/* Shared segment memory layout version */
-#define STAT_SEGMENT_VERSION 2
-
-#define STAT_SEGMENT_INDEX_INVALID UINT32_MAX
-
-typedef void (*stat_segment_update_fn)(stat_segment_directory_entry_t * e, u32 i);
-
-typedef struct {
- u32 directory_index;
- stat_segment_update_fn fn;
- u32 caller_index;
-} stat_segment_gauges_pool_t;
-
-typedef struct
-{
- /* internal, does not point to shared memory */
- stat_segment_gauges_pool_t *gauges;
-
- /* statistics segment */
- uword *directory_vector_by_name;
- stat_segment_directory_entry_t *directory_vector;
- volatile u64 **error_vector;
- u8 **interfaces;
- u8 **nodes;
-
- /* Update interval */
- f64 update_interval;
-
- clib_spinlock_t *stat_segment_lockp;
- clib_socket_t *socket;
- u8 *socket_name;
- ssize_t memory_size;
- clib_mem_page_sz_t log2_page_sz;
- u8 node_counters_enabled;
- void *last;
- void *heap;
- stat_segment_shared_header_t *shared_header; /* pointer to shared memory segment */
- int memfd;
-
- u64 last_input_packets; // OLE REMOVE?
-} stat_segment_main_t;
-
-extern stat_segment_main_t stat_segment_main;
-
-clib_error_t *
-stat_segment_register_gauge (u8 *names, stat_segment_update_fn update_fn, u32 index);
-clib_error_t *
-stat_segment_register_state_counter(u8 *name, u32 *index);
-clib_error_t *
-stat_segment_deregister_state_counter(u32 index);
-void stat_segment_set_state_counter (u32 index, u64 value);
-void stat_segment_poll_add (u32 vector_index, stat_segment_update_fn update_fn,
- u32 caller_index, u32 interval);
-
-u32 stat_segment_new_entry (u8 *name, stat_directory_type_t t);
-void vlib_stats_register_mem_heap (clib_mem_heap_t *heap);
-void vlib_stat_segment_lock (void);
-void vlib_stat_segment_unlock (void);
-void vlib_stats_register_symlink (void *oldheap, u8 *name, u32 index1,
- u32 index2, u8 lock);
-
-void stat_provider_register_vector_rate (u32 num_workers);
-
-#endif
diff --git a/src/vpp/stats/stat_segment_provider.c b/src/vpp/stats/stat_segment_provider.c
deleted file mode 100644
index 766261ce0e0..00000000000
--- a/src/vpp/stats/stat_segment_provider.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Copyright (c) 2021 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * Counters handled by the stats module directly.
- */
-
-#include <stdbool.h>
-#include <vppinfra/mem.h>
-#include <vppinfra/vec.h>
-#include <vlib/vlib.h>
-#include <vlib/counter.h>
-#include "stat_segment.h"
-
-clib_mem_heap_t **memory_heaps_vec;
-u32 mem_vector_index;
-bool initialized = false;
-
-enum
-{
- STAT_MEM_TOTAL = 0,
- STAT_MEM_USED,
- STAT_MEM_FREE,
- STAT_MEM_USED_MMAP,
- STAT_MEM_TOTAL_ALLOC,
- STAT_MEM_FREE_CHUNKS,
- STAT_MEM_RELEASABLE,
-} stat_mem_usage_e;
-
-/*
- * Called from the stats periodic process to update memory counters.
- */
-static void
-stat_provider_mem_usage_update_fn (stat_segment_directory_entry_t *e,
- u32 index)
-{
- clib_mem_usage_t usage;
- clib_mem_heap_t *heap;
- counter_t **counters = e->data;
- counter_t *cb;
-
- heap = vec_elt (memory_heaps_vec, index);
- clib_mem_get_heap_usage (heap, &usage);
- cb = counters[0];
- cb[STAT_MEM_TOTAL] = usage.bytes_total;
- cb[STAT_MEM_USED] = usage.bytes_used;
- cb[STAT_MEM_FREE] = usage.bytes_free;
- cb[STAT_MEM_USED_MMAP] = usage.bytes_used_mmap;
- cb[STAT_MEM_TOTAL_ALLOC] = usage.bytes_max;
- cb[STAT_MEM_FREE_CHUNKS] = usage.bytes_free_reclaimed;
- cb[STAT_MEM_RELEASABLE] = usage.bytes_overhead;
-}
-
-static counter_t **
-stat_validate_counter_vector3 (counter_t **counters, u32 max1, u32 max2)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- int i;
- void *oldheap = clib_mem_set_heap (sm->heap);
- vec_validate_aligned (counters, max1, CLIB_CACHE_LINE_BYTES);
- for (i = 0; i <= max1; i++)
- vec_validate_aligned (counters[i], max2, CLIB_CACHE_LINE_BYTES);
- clib_mem_set_heap (oldheap);
- return counters;
-}
-
-/*
- * Provide memory heap counters.
- * Two dimensional array of heap index and per-heap gauges.
- */
-void
-vlib_stats_register_mem_heap (clib_mem_heap_t *heap)
-{
- stat_segment_main_t *sm = &stat_segment_main;
- vec_add1 (memory_heaps_vec, heap);
- u32 heap_index = vec_len (memory_heaps_vec) - 1;
-
- /* Memory counters provider */
- u8 *s = format (0, "/mem/%s%c", heap->name, 0);
- u8 *s_used = format (0, "/mem/%s/used%c", heap->name, 0);
- u8 *s_total = format (0, "/mem/%s/total%c", heap->name, 0);
- u8 *s_free = format (0, "/mem/%s/free%c", heap->name, 0);
- mem_vector_index =
- stat_segment_new_entry (s, STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE);
- vec_free (s);
- if (mem_vector_index == ~0)
- ASSERT (0);
-
- vlib_stat_segment_lock ();
- stat_segment_directory_entry_t *ep = &sm->directory_vector[mem_vector_index];
- ep->data = stat_validate_counter_vector3 (ep->data, 0, STAT_MEM_RELEASABLE);
-
- /* Create symlink */
- void *oldheap = clib_mem_set_heap (sm->heap);
- vlib_stats_register_symlink (oldheap, s_total, mem_vector_index,
- STAT_MEM_TOTAL, 0);
- vlib_stats_register_symlink (oldheap, s_used, mem_vector_index,
- STAT_MEM_USED, 0);
- vlib_stats_register_symlink (oldheap, s_free, mem_vector_index,
- STAT_MEM_FREE, 0);
- vlib_stat_segment_unlock ();
- clib_mem_set_heap (oldheap);
- vec_free (s_used);
- vec_free (s_total);
- vec_free (s_free);
-
- stat_segment_poll_add (mem_vector_index, stat_provider_mem_usage_update_fn,
- heap_index, 10);
-}
-
-static void
-stat_provider_vector_rate_per_thread_update_fn (
- stat_segment_directory_entry_t *e, u32 index)
-{
- vlib_main_t *this_vlib_main;
- int i;
- ASSERT (e->data);
- counter_t **counters = e->data;
-
- for (i = 0; i < vlib_get_n_threads (); i++)
- {
-
- f64 this_vector_rate;
-
- this_vlib_main = vlib_get_main_by_index (i);
-
- this_vector_rate = vlib_internal_node_vector_rate (this_vlib_main);
- vlib_clear_internal_node_vector_rate (this_vlib_main);
- /* Set the per-worker rate */
- counter_t *cb = counters[i];
- cb[0] = this_vector_rate;
- }
-}
-
-static void
-stat_provider_vector_rate_update_fn (stat_segment_directory_entry_t *e,
- u32 index)
-{
- vlib_main_t *this_vlib_main;
- int i;
- f64 vector_rate = 0.0;
- for (i = 0; i < vlib_get_n_threads (); i++)
- {
-
- f64 this_vector_rate;
-
- this_vlib_main = vlib_get_main_by_index (i);
-
- this_vector_rate = vlib_internal_node_vector_rate (this_vlib_main);
- vlib_clear_internal_node_vector_rate (this_vlib_main);
-
- vector_rate += this_vector_rate;
- }
-
- /* And set the system average rate */
- vector_rate /= (f64) (i > 1 ? i - 1 : 1);
- e->value = vector_rate;
-}
-
-void
-stat_provider_register_vector_rate (u32 num_workers)
-{
- int i;
-
- u8 *s = format (0, "/sys/vector_rate%c", 0);
-
- i = stat_segment_new_entry (s, STAT_DIR_TYPE_SCALAR_INDEX);
- if (i == ~0)
- ASSERT (0);
- vec_free (s);
- stat_segment_poll_add (i, stat_provider_vector_rate_update_fn, ~0, 10);
-
- s = format (0, "/sys/vector_rate_per_worker%c", 0);
- i = stat_segment_new_entry (s, STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE);
- if (i == ~0)
- ASSERT (0);
- vec_free (s);
- stat_segment_poll_add (i, stat_provider_vector_rate_per_thread_update_fn, ~0,
- 10);
-
- stat_segment_main_t *sm = &stat_segment_main;
- vlib_stat_segment_lock ();
- stat_segment_directory_entry_t *ep = &sm->directory_vector[i];
- ep->data = stat_validate_counter_vector3 (ep->data, num_workers, 0);
- vlib_stat_segment_unlock ();
-}
diff --git a/src/vpp/stats/stat_segment_shared.h b/src/vpp/stats/stat_segment_shared.h
deleted file mode 100644
index 6f26d4b9fdf..00000000000
--- a/src/vpp/stats/stat_segment_shared.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2018 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef included_stat_segment_shared_h
-#define included_stat_segment_shared_h
-
-typedef enum
-{
- STAT_DIR_TYPE_ILLEGAL = 0,
- STAT_DIR_TYPE_SCALAR_INDEX,
- STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE,
- STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED,
- STAT_DIR_TYPE_ERROR_INDEX,
- STAT_DIR_TYPE_NAME_VECTOR,
- STAT_DIR_TYPE_EMPTY,
- STAT_DIR_TYPE_SYMLINK,
-} stat_directory_type_t;
-
-typedef struct
-{
- stat_directory_type_t type;
- union {
- struct
- {
- uint32_t index1;
- uint32_t index2;
- };
- uint64_t index;
- uint64_t value;
- void *data;
- };
- char name[128]; // TODO change this to pointer to "somewhere"
-} stat_segment_directory_entry_t;
-
-/*
- * Shared header first in the shared memory segment.
- */
-typedef struct
-{
- uint64_t version;
- void *base;
- volatile uint64_t epoch;
- volatile uint64_t in_progress;
- volatile stat_segment_directory_entry_t *directory_vector;
- volatile uint64_t **error_vector;
-} stat_segment_shared_header_t;
-
-static inline void *
-stat_segment_pointer (void *start, uint64_t offset)
-{
- return ((char *) start + offset);
-}
-
-#endif /* included_stat_segment_shared_h */
diff --git a/src/vpp/stats/stats.md b/src/vpp/stats/stats.md
deleted file mode 100644
index 8671f56e4a5..00000000000
--- a/src/vpp/stats/stats.md
+++ /dev/null
@@ -1,130 +0,0 @@
-# Statistics {#stats_doc}
-
-In VPP most things are measured and counted. There are counters for interface statistics, like RX, TX counters, packet drops, and so on. Every node has a set of per-node counters, one set of error counters, like TTL exceeded, or packet to big or out-of-buffers. And a set of performance counters, like number of clocks, vectors, calls and suspends.
-
-There is also a set of system counters and performance counters, e.g. memory utilization per heap, buffer utilisation and so on.
-
-## VPP Counter Architecture
-
-Counters are exposed directly via shared memory. These are the actual counters in VPP, no sampling or aggregation is done by the statistics infrastructure. With the exception of per node performance data under /sys/node and a few system counters.
-
-
-Clients mount the shared memory segment read-only, using a optimistic concurrency algorithm.
-
-Directory structure as an index.
-
-### Memory layout
-
-The memory segment consists of a shared header, containing atomics for the optimistic concurrency mechanism, and offsets into memory for the directory vectors. The only data structure used is the VPP vectors. All pointers are converted to offsets so that client applications can map the shared memory wherever it pleases.
-
-### Directory layout
-
-### Optimistic concurrency
-
-```
-/*
- * Shared header first in the shared memory segment.
- */
-typedef struct {
- atomic_int_fast64_t epoch;
- atomic_int_fast64_t in_progress;
- atomic_int_fast64_t directory_offset;
- atomic_int_fast64_t error_offset;
- atomic_int_fast64_t stats_offset;
-} stat_segment_shared_header_t;
-
-```
-
-#### Writer
-On the VPP side there is a single writer (controlled by a spinlock). When the writer starts it sets in_progress=1, continues with the update of the data-structures, and when done, bumps epoch++ and sets in_progress=0.
-
-#### Readers
-If in_progress=1, there is no point continuing, so reader sits spinning on the in_progress flag until it is 0. Then it sets start_epoch = epoch and continues copying out the counter data it is interested in, while doing strict boundary checks on all offsets / pointers. When the reader is done, it checks if in_progress=1 or if epoch != start_epoch. If either of those are true is discards the data read.
-
-## How are counters exposed out of VPP?
-
-## Types of Counters
-
-All counters under /err and /if are the directly exposed VPP counters.
-
-* Gauges
-* u64 / float
-* Interface Counters
- * Simple counters, counter_t array of threads of an array of interfaces
- * Combined counters, vlib_counter_t array of threads of an array of interfaces.
-
-
-## Client libraries
-### Writing a new client library
-A new client library can either wrap the C library (libvppapiclient.so) or it can integrate directly with the shared memory. That involves exchanging a file descriptor over the VPP stats Unix domain socket, and opening the memory mapped segment.
-
-### Python
-
-```
-#!/usr/bin/env python3
-from vpp_papi.vpp_stats import VPPStats
-stats = VPPStats('/run/vpp/stats.sock')
-dir = stats.ls(['^/if', '/err/ip4-input', '/sys/node/ip4-input'])
-counters = stats.dump(dir)
-
-# Print the RX counters for the first interface on the first worker core
-print ('RX interface core 0, sw_if_index 0', counters['/if/rx'][0][0])
-
-```
-### C
-```
-#include <vpp-api/client/stat_client.h>
-#include <vppinfra/vec.h>
-
-int main (int argc, char **argv) {
- uint8_t *patterns = 0;
-
- vec_add1(patterns, "^/if");
- vec_add1(patterns, "ip4-input");
-
- int rv = stat_segment_connect(STAT_SEGMENT_SOCKET_FILE);
- uint32_t *dir = stat_segment_ls(patterns);
- stat_segment_data_t *res = stat_segment_dump(dir);
-
- for (int i = 0; i < vec_len(res); i++) {
- switch (res[i].type) {
- case STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE:
- for (k = 0; k < vec_len (res[i].simple_counter_vec) - 1; k++)
- for (j = 0; j < vec_len (res[i].simple_counter_vec[k]); j++)
- fformat (stdout, "[%d @ %d]: %llu packets %s\n",
- j, k, res[i].simple_counter_vec[k][j],
- res[i].name);
- break;
-
- case STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED:
- for (k = 0; k < vec_len (res[i].combined_counter_vec); k++)
- for (j = 0; j < vec_len (res[i].combined_counter_vec[k]); j++)
- fformat (stdout, "[%d @ %d]: %llu packets, %llu bytes %s\n",
- j, k, res[i].combined_counter_vec[k][j].packets,
- res[i].combined_counter_vec[k][j].bytes,
- res[i].name);
- break;
-
- case STAT_DIR_TYPE_ERROR_INDEX:
- for (j = 0; j < vec_len (res[i].error_vector); j++)
- fformat (stdout, "[@%d] %llu %s\n", j, res[i].error_vector[j], res[i].name);
- break;
-
- case STAT_DIR_TYPE_SCALAR_INDEX:
- fformat (stdout, "%.2f %s\n", res[i].scalar_value, res[i].name);
- break;
-
- default:
- ;
- }
- }
- stat_segment_data_free (res);
-}
-```
-
-## Integrations
-* CLI command. vpp_get_stats [ls | dump | poll]
-* Prometheus
-
-## Future evolution
-* Deprecate the stats over binary API calls that are based on want_stats
diff --git a/src/vpp/stats/stats.rst b/src/vpp/stats/stats.rst
new file mode 100644
index 00000000000..26e4db8c0db
--- /dev/null
+++ b/src/vpp/stats/stats.rst
@@ -0,0 +1,178 @@
+.. _stats_doc:
+
+Statistics
+==========
+
+In VPP most things are measured and counted. There are counters for
+interface statistics, like RX, TX counters, packet drops, and so on.
+Every node has a set of per-node counters, one set of error counters,
+like TTL exceeded, or packet to big or out-of-buffers. And a set of
+performance counters, like number of clocks, vectors, calls and
+suspends.
+
+There is also a set of system counters and performance counters,
+e.g. memory utilization per heap, buffer utilisation and so on.
+
+VPP Counter Architecture
+------------------------
+
+Counters are exposed directly via shared memory. These are the actual
+counters in VPP, no sampling or aggregation is done by the statistics
+infrastructure. With the exception of per node performance data under
+/sys/node and a few system counters.
+
+Clients mount the shared memory segment read-only, using a optimistic
+concurrency algorithm.
+
+Directory structure as an index.
+
+Memory layout
+~~~~~~~~~~~~~
+
+The memory segment consists of a shared header, containing atomics for
+the optimistic concurrency mechanism, and offsets into memory for the
+directory vectors. The only data structure used is the VPP vectors. All
+pointers are converted to offsets so that client applications can map
+the shared memory wherever it pleases.
+
+Directory layout
+~~~~~~~~~~~~~~~~
+
+Optimistic concurrency
+~~~~~~~~~~~~~~~~~~~~~~
+
+::
+
+ /*
+ * Shared header first in the shared memory segment.
+ */
+ typedef struct {
+ atomic_int_fast64_t epoch;
+ atomic_int_fast64_t in_progress;
+ atomic_int_fast64_t directory_offset;
+ atomic_int_fast64_t error_offset;
+ atomic_int_fast64_t stats_offset;
+ } stat_segment_shared_header_t;
+
+Writer
+^^^^^^
+
+On the VPP side there is a single writer (controlled by a spinlock).
+When the writer starts it sets in_progress=1, continues with the update
+of the data-structures, and when done, bumps epoch++ and sets
+in_progress=0.
+
+Readers
+^^^^^^^
+
+If in_progress=1, there is no point continuing, so reader sits spinning
+on the in_progress flag until it is 0. Then it sets start_epoch = epoch
+and continues copying out the counter data it is interested in, while
+doing strict boundary checks on all offsets / pointers. When the reader
+is done, it checks if in_progress=1 or if epoch != start_epoch. If
+either of those are true is discards the data read.
+
+How are counters exposed out of VPP?
+------------------------------------
+
+Types of Counters
+-----------------
+
+All counters under /err and /if are the directly exposed VPP counters.
+
+- Gauges
+- u64 / float
+- Interface Counters
+- Simple counters, counter_t array of threads of an array of interfaces
+- Combined counters, vlib_counter_t array of threads of an array of
+ interfaces.
+
+Client libraries
+----------------
+
+Writing a new client library
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A new client library can either wrap the C library (libvppapiclient.so)
+or it can integrate directly with the shared memory. That involves
+exchanging a file descriptor over the VPP stats Unix domain socket, and
+opening the memory mapped segment.
+
+Python
+~~~~~~
+
+::
+
+ #!/usr/bin/env python3
+ from vpp_papi.vpp_stats import VPPStats
+ stats = VPPStats('/run/vpp/stats.sock')
+ dir = stats.ls(['^/if', '/err/ip4-input', '/sys/node/ip4-input'])
+ counters = stats.dump(dir)
+
+ # Print the RX counters for the first interface on the first worker core
+ print ('RX interface core 0, sw_if_index 0', counters['/if/rx'][0][0])
+
+C
+~
+
+::
+
+ #include <vpp-api/client/stat_client.h>
+ #include <vppinfra/vec.h>
+
+ int main (int argc, char **argv) {
+ uint8_t *patterns = 0;
+
+ vec_add1(patterns, "^/if");
+ vec_add1(patterns, "ip4-input");
+
+ int rv = stat_segment_connect(STAT_SEGMENT_SOCKET_FILE);
+ uint32_t *dir = stat_segment_ls(patterns);
+ stat_segment_data_t *res = stat_segment_dump(dir);
+
+ for (int i = 0; i < vec_len(res); i++) {
+ switch (res[i].type) {
+ case STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE:
+ for (k = 0; k < vec_len (res[i].simple_counter_vec) - 1; k++)
+ for (j = 0; j < vec_len (res[i].simple_counter_vec[k]); j++)
+ fformat (stdout, "[%d @ %d]: %llu packets %s\n",
+ j, k, res[i].simple_counter_vec[k][j],
+ res[i].name);
+ break;
+
+ case STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED:
+ for (k = 0; k < vec_len (res[i].combined_counter_vec); k++)
+ for (j = 0; j < vec_len (res[i].combined_counter_vec[k]); j++)
+ fformat (stdout, "[%d @ %d]: %llu packets, %llu bytes %s\n",
+ j, k, res[i].combined_counter_vec[k][j].packets,
+ res[i].combined_counter_vec[k][j].bytes,
+ res[i].name);
+ break;
+
+ case STAT_DIR_TYPE_ERROR_INDEX:
+ for (j = 0; j < vec_len (res[i].error_vector); j++)
+ fformat (stdout, "[@%d] %llu %s\n", j, res[i].error_vector[j], res[i].name);
+ break;
+
+ case STAT_DIR_TYPE_SCALAR_INDEX:
+ fformat (stdout, "%.2f %s\n", res[i].scalar_value, res[i].name);
+ break;
+
+ default:
+ ;
+ }
+ }
+ stat_segment_data_free (res);
+ }
+
+Integrations
+------------
+
+- CLI command. vpp_get_stats [ls \| dump \| poll]
+- Prometheus
+
+Future evolution
+----------------
+
+- Deprecate the stats over binary API calls that are based on
+ want_stats
diff --git a/src/vpp/vnet/config.h.in b/src/vpp/vnet/config.h.in
index 8eee6da4913..e8548c9b492 100644
--- a/src/vpp/vnet/config.h.in
+++ b/src/vpp/vnet/config.h.in
@@ -17,5 +17,8 @@
#define included_vpp_vnet_config_h
#define VPP_SANITIZE_ADDR_OPTIONS "@VPP_SANITIZE_ADDR_OPTIONS@"
+#cmakedefine VPP_IP_FIB_MTRIE_16
+#cmakedefine VPP_TCP_DEBUG_ALWAYS
+#cmakedefine VPP_SESSION_DEBUG
#endif
diff --git a/src/vpp/vnet/main.c b/src/vpp/vnet/main.c
index bf1eb7a1d1d..c57efd59a62 100644
--- a/src/vpp/vnet/main.c
+++ b/src/vpp/vnet/main.c
@@ -15,9 +15,15 @@
#define _GNU_SOURCE
#include <pthread.h>
+#ifdef __FreeBSD__
+#include <pthread_np.h>
+#endif /* __FreeBSD__ */
#include <sched.h>
+#include <vppinfra/clib.h>
#include <vppinfra/cpu.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/unix.h>
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
#include <vlib/threads.h>
@@ -25,7 +31,7 @@
#include <vnet/ethernet/ethernet.h>
#include <vpp/app/version.h>
#include <vpp/vnet/config.h>
-#include <vpp/api/vpe_msg_enum.h>
+#include <vlibmemory/memclnt.api_enum.h> /* To get the last static message id */
#include <limits.h>
/*
@@ -39,36 +45,38 @@ static void
vpp_find_plugin_path ()
{
extern char *vat_plugin_path;
- char *p, path[PATH_MAX];
- int rv;
- u8 *s;
+ char *p;
+ u8 *s, *path;
/* find executable path */
- if ((rv = readlink ("/proc/self/exe", path, PATH_MAX - 1)) == -1)
+ path = os_get_exec_path ();
+
+ if (!path)
return;
- /* readlink doesn't provide null termination */
- path[rv] = 0;
+ /* add null termination */
+ vec_add1 (path, 0);
/* strip filename */
- if ((p = strrchr (path, '/')) == 0)
- return;
+ if ((p = strrchr ((char *) path, '/')) == 0)
+ goto done;
*p = 0;
/* strip bin/ */
- if ((p = strrchr (path, '/')) == 0)
- return;
+ if ((p = strrchr ((char *) path, '/')) == 0)
+ goto done;
*p = 0;
- s = format (0, "%s/lib/" CLIB_TARGET_TRIPLET "/vpp_plugins:"
- "%s/lib/vpp_plugins", path, path);
+ s = format (0, "%s/" CLIB_LIB_DIR "/vpp_plugins", path, path);
vec_add1 (s, 0);
vlib_plugin_path = (char *) s;
- s = format (0, "%s/lib/" CLIB_TARGET_TRIPLET "/vpp_api_test_plugins:"
- "%s/lib/vpp_api_test_plugins", path, path);
+ s = format (0, "%s/" CLIB_LIB_DIR "/vpp_api_test_plugins", path, path);
vec_add1 (s, 0);
vat_plugin_path = (char *) s;
+
+done:
+ vec_free (path);
}
static void
@@ -111,9 +119,10 @@ main (int argc, char *argv[])
u8 *sizep;
u32 size;
clib_mem_page_sz_t main_heap_log2_page_sz = CLIB_MEM_PAGE_SZ_DEFAULT;
+ clib_mem_page_sz_t default_log2_hugepage_sz = CLIB_MEM_PAGE_SZ_UNKNOWN;
unformat_input_t input, sub_input;
u8 *s = 0, *v = 0;
- int main_core = 1;
+ int main_core = ~0;
cpu_set_t cpuset;
void *main_heap;
@@ -269,6 +278,10 @@ main (int argc, char *argv[])
main_core = x;
}
}
+ else if (!strncmp (argv[i], "interactive", 11))
+ unix_main.flags |= UNIX_FLAG_INTERACTIVE;
+ else if (!strncmp (argv[i], "nosyslog", 8))
+ unix_main.flags |= UNIX_FLAG_NOSYSLOG;
}
defaulted:
@@ -291,6 +304,10 @@ defaulted:
unformat_log2_page_size,
&main_heap_log2_page_sz))
;
+ else if (unformat (&sub_input, "default-hugepage-size %U",
+ unformat_log2_page_size,
+ &default_log2_hugepage_sz))
+ ;
else
{
fformat (stderr, "unknown 'memory' config input '%U'\n",
@@ -313,12 +330,21 @@ defaulted:
unformat_free (&input);
/* set process affinity for main thread */
- CPU_ZERO (&cpuset);
- CPU_SET (main_core, &cpuset);
- pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t), &cpuset);
+ if (main_core != ~0)
+ {
+ CPU_ZERO (&cpuset);
+ CPU_SET (main_core, &cpuset);
+ if (pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t),
+ &cpuset))
+ {
+ clib_unix_error (
+ "pthread_setaffinity_np() on cpu %d failed for main thread",
+ main_core);
+ }
+ }
/* Set up the plugin message ID allocator right now... */
- vl_msg_api_set_first_available_msg_id (VL_MSG_FIRST_AVAILABLE);
+ vl_msg_api_set_first_available_msg_id (VL_MSG_MEMCLNT_LAST + 1);
/* destroy temporary heap and create main one */
clib_mem_destroy ();
@@ -329,6 +355,9 @@ defaulted:
/* Figure out which numa runs the main thread */
__os_numa_index = clib_get_current_numa_node ();
+ if (default_log2_hugepage_sz != CLIB_MEM_PAGE_SZ_UNKNOWN)
+ clib_mem_set_log2_default_hugepage_size (default_log2_hugepage_sz);
+
/* and use the main heap as that numa's numa heap */
clib_mem_set_per_numa_heap (main_heap);
vlib_main_init ();
@@ -489,14 +518,12 @@ show_bihash_command_fn (vlib_main_t * vm,
return 0;
}
-/* *INDENT-OFF* */
VLIB_CLI_COMMAND (show_bihash_command, static) =
{
.path = "show bihash",
.short_help = "show bihash",
.function = show_bihash_command_fn,
};
-/* *INDENT-ON* */
#ifdef CLIB_SANITIZE_ADDR
/* default options for Address Sanitizer */
diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt
index c682d70f6f1..5878f0612f0 100644
--- a/src/vppinfra/CMakeLists.txt
+++ b/src/vppinfra/CMakeLists.txt
@@ -32,14 +32,14 @@ configure_file(
install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/config.h
- DESTINATION include/vppinfra
+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vppinfra
COMPONENT vpp-dev
)
add_definitions(-fvisibility=hidden)
# Ensure symbols from cJSON are exported
-set_source_files_properties( cJSON.c PROPERTIES
+set_source_files_properties( cJSON.c jsonformat.c PROPERTIES
COMPILE_DEFINITIONS " CJSON_API_VISIBILITY " )
@@ -57,10 +57,11 @@ set(VPPINFRA_SRCS
error.c
fifo.c
format.c
- graph.c
+ format_table.c
hash.c
heap.c
interrupt.c
+ jsonformat.c
longjmp.S
macros.c
maplog.c
@@ -77,7 +78,6 @@ set(VPPINFRA_SRCS
random.c
random_isaac.c
rbtree.c
- sanitizer.c
serialize.c
socket.c
std-formats.c
@@ -97,11 +97,12 @@ set(VPPINFRA_SRCS
valloc.c
vec.c
vector.c
+ vector/toeplitz.c
cJSON.c
)
set(VPPINFRA_HEADERS
- sanitizer.h
+ bihash_12_4.h
bihash_16_8.h
bihash_24_8.h
bihash_32_8.h
@@ -124,6 +125,13 @@ set(VPPINFRA_HEADERS
clib.h
cpu.h
crc32.h
+ crypto/sha2.h
+ crypto/ghash.h
+ crypto/aes.h
+ crypto/aes_cbc.h
+ crypto/aes_ctr.h
+ crypto/aes_gcm.h
+ crypto/poly1305.h
dlist.h
dlmalloc.h
elf_clib.h
@@ -134,10 +142,11 @@ set(VPPINFRA_HEADERS
fifo.h
file.h
format.h
- graph.h
+ format_table.h
hash.h
heap.h
interrupt.h
+ jsonformat.h
lb_hash_hash.h
llist.h
lock.h
@@ -146,15 +155,15 @@ set(VPPINFRA_HEADERS
maplog.h
math.h
memcpy.h
- memcpy_avx2.h
- memcpy_avx512.h
- memcpy_sse3.h
+ memcpy_x86_64.h
mem.h
mhash.h
mpcap.h
os.h
pcap.h
pcap_funcs.h
+ pcg.h
+ perfmon/perfmon.h
pmalloc.h
pool.h
ptclosure.h
@@ -163,7 +172,6 @@ set(VPPINFRA_HEADERS
random_isaac.h
rbtree.h
serialize.h
- sha2.h
smp.h
socket.h
sparse_vec.h
@@ -188,9 +196,13 @@ set(VPPINFRA_HEADERS
vector_altivec.h
vector_avx2.h
vector_avx512.h
- vector/mask_compare.h
- vector/compress.h
vector/array_mask.h
+ vector/compress.h
+ vector/count_equal.h
+ vector/index_to_ptr.h
+ vector/ip_csum.h
+ vector/mask_compare.h
+ vector/toeplitz.h
vector.h
vector_neon.h
vector_sse42.h
@@ -205,10 +217,24 @@ if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux")
linux/mem.c
linux/sysfs.c
linux/netns.c
+# TODO: Temporarily don't build perfmon on non-Linux
+ perfmon/bundle_default.c
+ perfmon/bundle_core_power.c
+ perfmon/perfmon.c
)
+elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD")
+ list(APPEND VPPINFRA_SRCS
+ elf_clib.c
+ freebsd/mem.c
+ )
+endif()
+
+if("${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD")
+ option(VPP_USE_EXTERNAL_LIBEXECINFO "Use external libexecinfo (useful for non-glibc targets)." ON)
+else()
+ option(VPP_USE_EXTERNAL_LIBEXECINFO "Use external libexecinfo (useful for non-glibc targets)." OFF)
endif()
-option(VPP_USE_EXTERNAL_LIBEXECINFO "Use external libexecinfo (useful for non-glibc targets)." OFF)
if(VPP_USE_EXTERNAL_LIBEXECINFO)
set(EXECINFO_LIB execinfo)
endif()
@@ -235,10 +261,12 @@ if(VPP_BUILD_VPPINFRA_TESTS)
fpool
hash
heap
+ interrupt
longjmp
macros
maplog
pmalloc
+ pool_alloc
pool_iterate
ptclosure
random
@@ -265,23 +293,35 @@ if(VPP_BUILD_VPPINFRA_TESTS)
LINK_LIBRARIES vppinfra Threads::Threads
)
endforeach()
+endif(VPP_BUILD_VPPINFRA_TESTS)
set(test_files
- vector/test/compress.c
- vector/test/mask_compare.c
- vector/test/array_mask.c
+ test/aes_cbc.c
+ test/aes_ctr.c
+ test/aes_gcm.c
+ test/poly1305.c
+ test/array_mask.c
+ test/compress.c
+ test/count_equal.c
+ test/crc32c.c
+ test/index_to_ptr.c
+ test/ip_csum.c
+ test/mask_compare.c
+ test/memcpy_x86_64.c
+ test/sha2.c
+ test/toeplitz.c
)
-add_vpp_executable(test_vector_funcs
+add_vpp_executable(test_infra
SOURCES
- vector/test/test.c
+ test/test.c
${test_files}
LINK_LIBRARIES vppinfra
+ NO_INSTALL
)
-vpp_library_set_multiarch_sources(test_vector_funcs
+vpp_library_set_multiarch_sources(test_infra
SOURCES
${test_files}
)
-endif(VPP_BUILD_VPPINFRA_TESTS)
diff --git a/src/vppinfra/atomics.h b/src/vppinfra/atomics.h
index 5d3c5f8d601..92c45610391 100644
--- a/src/vppinfra/atomics.h
+++ b/src/vppinfra/atomics.h
@@ -52,6 +52,8 @@
#define clib_atomic_store_rel_n(a, b) __atomic_store_n ((a), (b), __ATOMIC_RELEASE)
#define clib_atomic_store_seq_cst(a, b) \
__atomic_store_n ((a), (b), __ATOMIC_SEQ_CST)
+#define clib_atomic_store_relax_n(a, b) \
+ __atomic_store_n ((a), (b), __ATOMIC_RELAXED)
#define clib_atomic_load_seq_cst(a) __atomic_load_n ((a), __ATOMIC_SEQ_CST)
#define clib_atomic_swap_acq_n(a, b) __atomic_exchange_n ((a), (b), __ATOMIC_ACQUIRE)
diff --git a/src/vppinfra/bihash_12_4.h b/src/vppinfra/bihash_12_4.h
new file mode 100644
index 00000000000..3fdf1847861
--- /dev/null
+++ b/src/vppinfra/bihash_12_4.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+#undef BIHASH_TYPE
+#undef BIHASH_KVP_PER_PAGE
+#undef BIHASH_32_64_SVM
+#undef BIHASH_ENABLE_STATS
+#undef BIHASH_KVP_AT_BUCKET_LEVEL
+#undef BIHASH_LAZY_INSTANTIATE
+#undef BIHASH_BUCKET_PREFETCH_CACHE_LINES
+#undef BIHASH_USE_HEAP
+
+#define BIHASH_TYPE _12_4
+#define BIHASH_KVP_PER_PAGE 4
+#define BIHASH_KVP_AT_BUCKET_LEVEL 0
+#define BIHASH_LAZY_INSTANTIATE 1
+#define BIHASH_BUCKET_PREFETCH_CACHE_LINES 1
+#define BIHASH_USE_HEAP 1
+
+#ifndef __included_bihash_12_4_h__
+#define __included_bihash_12_4_h__
+
+#include <vppinfra/crc32.h>
+#include <vppinfra/heap.h>
+#include <vppinfra/format.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/xxhash.h>
+
+typedef union
+{
+ struct
+ {
+ u32 key[3];
+ u32 value;
+ };
+ u64 as_u64[2];
+} clib_bihash_kv_12_4_t;
+
+static inline void
+clib_bihash_mark_free_12_4 (clib_bihash_kv_12_4_t *v)
+{
+ v->value = 0xFEEDFACE;
+}
+
+static inline int
+clib_bihash_is_free_12_4 (const clib_bihash_kv_12_4_t *v)
+{
+ if (v->value == 0xFEEDFACE)
+ return 1;
+ return 0;
+}
+
+static inline u64
+clib_bihash_hash_12_4 (const clib_bihash_kv_12_4_t *v)
+{
+#ifdef clib_crc32c_uses_intrinsics
+ return clib_crc32c ((u8 *) v->key, 12);
+#else
+ u64 tmp = v->as_u64[0] ^ v->key[2];
+ return clib_xxhash (tmp);
+#endif
+}
+
+static inline u8 *
+format_bihash_kvp_12_4 (u8 *s, va_list *args)
+{
+ clib_bihash_kv_12_4_t *v = va_arg (*args, clib_bihash_kv_12_4_t *);
+
+ s = format (s, "key %u %u %u value %u", v->key[0], v->key[1], v->key[2],
+ v->value);
+ return s;
+}
+
+static inline int
+clib_bihash_key_compare_12_4 (u32 *a, u32 *b)
+{
+#if defined(CLIB_HAVE_VEC128)
+ u32x4 v = (*(u32x4u *) a) ^ (*(u32x4u *) b);
+ v[3] = 0;
+ return u32x4_is_all_zero (v);
+#else
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0;
+#endif
+}
+
+#undef __included_bihash_template_h__
+#include <vppinfra/bihash_template.h>
+
+#endif /* __included_bihash_12_4_h__ */
diff --git a/src/vppinfra/bihash_16_8.h b/src/vppinfra/bihash_16_8.h
index 6b116bcf3e4..67aa678efa9 100644
--- a/src/vppinfra/bihash_16_8.h
+++ b/src/vppinfra/bihash_16_8.h
@@ -43,11 +43,16 @@ typedef struct
u64 value;
} clib_bihash_kv_16_8_t;
+static inline void
+clib_bihash_mark_free_16_8 (clib_bihash_kv_16_8_t *v)
+{
+ v->value = 0xFEEDFACE8BADF00DULL;
+}
+
static inline int
clib_bihash_is_free_16_8 (clib_bihash_kv_16_8_t * v)
{
- /* Free values are clib_memset to 0xff, check a bit... */
- if (v->key[0] == ~0ULL && v->value == ~0ULL)
+ if (v->value == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
diff --git a/src/vppinfra/bihash_16_8_32.h b/src/vppinfra/bihash_16_8_32.h
index 9453f88ace7..d899253302c 100644
--- a/src/vppinfra/bihash_16_8_32.h
+++ b/src/vppinfra/bihash_16_8_32.h
@@ -43,11 +43,16 @@ typedef struct
u64 value;
} clib_bihash_kv_16_8_32_t;
+static inline void
+clib_bihash_mark_free_16_8_32 (clib_bihash_kv_16_8_32_t *v)
+{
+ v->value = 0xFEEDFACE8BADF00DULL;
+}
+
static inline int
clib_bihash_is_free_16_8_32 (clib_bihash_kv_16_8_32_t * v)
{
- /* Free values are clib_memset to 0xff, check a bit... */
- if (v->key[0] == ~0ULL && v->value == ~0ULL)
+ if (v->value == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
diff --git a/src/vppinfra/bihash_24_16.h b/src/vppinfra/bihash_24_16.h
index 4e979b49410..b421ab12edc 100644
--- a/src/vppinfra/bihash_24_16.h
+++ b/src/vppinfra/bihash_24_16.h
@@ -43,11 +43,16 @@ typedef struct
u64 value[2];
} clib_bihash_kv_24_16_t;
+static inline void
+clib_bihash_mark_free_24_16 (clib_bihash_kv_24_16_t *v)
+{
+ v->value[0] = 0xFEEDFACE8BADF00DULL;
+}
+
static inline int
clib_bihash_is_free_24_16 (const clib_bihash_kv_24_16_t * v)
{
- /* Free values are clib_memset to 0xff, check a bit... */
- if (v->key[0] == ~0ULL && v->value[0] == ~0ULL && v->value[1] == ~0ULL)
+ if (v->value[0] == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
@@ -77,8 +82,8 @@ static inline int
clib_bihash_key_compare_24_16 (u64 * a, u64 * b)
{
#if defined (CLIB_HAVE_VEC512)
- u64x8 v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b);
- return (u64x8_is_zero_mask (v) & 0x7) == 0;
+ return u64x8_is_equal (u64x8_mask_load_zero (a, 0x7),
+ u64x8_mask_load_zero (b, 0x7));
#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
u64x2 v = { a[2] ^ b[2], 0 };
v |= u64x2_load_unaligned (a) ^ u64x2_load_unaligned (b);
diff --git a/src/vppinfra/bihash_24_8.h b/src/vppinfra/bihash_24_8.h
index 2d667ad9aa3..14e5225ccfd 100644
--- a/src/vppinfra/bihash_24_8.h
+++ b/src/vppinfra/bihash_24_8.h
@@ -43,11 +43,16 @@ typedef struct
u64 value;
} clib_bihash_kv_24_8_t;
+static inline void
+clib_bihash_mark_free_24_8 (clib_bihash_kv_24_8_t *v)
+{
+ v->value = 0xFEEDFACE8BADF00DULL;
+}
+
static inline int
clib_bihash_is_free_24_8 (const clib_bihash_kv_24_8_t * v)
{
- /* Free values are clib_memset to 0xff, check a bit... */
- if (v->key[0] == ~0ULL && v->value == ~0ULL)
+ if (v->value == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
@@ -77,8 +82,8 @@ static inline int
clib_bihash_key_compare_24_8 (u64 * a, u64 * b)
{
#if defined (CLIB_HAVE_VEC512)
- u64x8 v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b);
- return (u64x8_is_zero_mask (v) & 0x7) == 0;
+ return u64x8_is_equal (u64x8_mask_load_zero (a, 0x7),
+ u64x8_mask_load_zero (b, 0x7));
#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
u64x2 v = { a[2] ^ b[2], 0 };
v |= u64x2_load_unaligned (a) ^ u64x2_load_unaligned (b);
diff --git a/src/vppinfra/bihash_32_8.h b/src/vppinfra/bihash_32_8.h
index 0935fcce184..8139a0eab62 100644
--- a/src/vppinfra/bihash_32_8.h
+++ b/src/vppinfra/bihash_32_8.h
@@ -43,11 +43,16 @@ typedef struct
u64 value;
} clib_bihash_kv_32_8_t;
+static inline void
+clib_bihash_mark_free_32_8 (clib_bihash_kv_32_8_t *v)
+{
+ v->value = 0xFEEDFACE8BADF00DULL;
+}
+
static inline int
clib_bihash_is_free_32_8 (const clib_bihash_kv_32_8_t *v)
{
- /* Free values are clib_memset to 0xff, check a bit... */
- if (v->key[0] == ~0ULL && v->value == ~0ULL)
+ if (v->value == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
@@ -77,8 +82,8 @@ static inline int
clib_bihash_key_compare_32_8 (u64 *a, u64 *b)
{
#if defined(CLIB_HAVE_VEC512)
- u64x8 v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b);
- return (u64x8_is_zero_mask (v) & 0xf) == 0;
+ return u64x8_is_equal (u64x8_mask_load_zero (a, 0xf),
+ u64x8_mask_load_zero (b, 0xf));
#elif defined(CLIB_HAVE_VEC256)
u64x4 v = u64x4_load_unaligned (a) ^ u64x4_load_unaligned (b);
return u64x4_is_all_zero (v);
diff --git a/src/vppinfra/bihash_40_8.h b/src/vppinfra/bihash_40_8.h
index 1fb344fdeeb..27207a3a69c 100644
--- a/src/vppinfra/bihash_40_8.h
+++ b/src/vppinfra/bihash_40_8.h
@@ -44,11 +44,16 @@ typedef struct
u64 value;
} clib_bihash_kv_40_8_t;
+static inline void
+clib_bihash_mark_free_40_8 (clib_bihash_kv_40_8_t *v)
+{
+ v->value = 0xFEEDFACE8BADF00DULL;
+}
+
static inline int
clib_bihash_is_free_40_8 (const clib_bihash_kv_40_8_t * v)
{
- /* Free values are clib_memset to 0xff, check a bit... */
- if (v->key[0] == ~0ULL && v->value == ~0ULL)
+ if (v->value == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
@@ -78,9 +83,8 @@ static inline int
clib_bihash_key_compare_40_8 (u64 * a, u64 * b)
{
#if defined (CLIB_HAVE_VEC512)
- u64x8 v;
- v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b);
- return (u64x8_is_zero_mask (v) & 0x1f) == 0;
+ return u64x8_is_equal (u64x8_mask_load_zero (a, 0x1f),
+ u64x8_mask_load_zero (b, 0x1f));
#elif defined (CLIB_HAVE_VEC256)
u64x4 v = { a[4] ^ b[4], 0, 0, 0 };
v |= u64x4_load_unaligned (a) ^ u64x4_load_unaligned (b);
diff --git a/src/vppinfra/bihash_48_8.h b/src/vppinfra/bihash_48_8.h
index 54fd7090e81..dbc92c3df1d 100644
--- a/src/vppinfra/bihash_48_8.h
+++ b/src/vppinfra/bihash_48_8.h
@@ -42,11 +42,16 @@ typedef struct
u64 value;
} clib_bihash_kv_48_8_t;
+static inline void
+clib_bihash_mark_free_48_8 (clib_bihash_kv_48_8_t *v)
+{
+ v->value = 0xFEEDFACE8BADF00DULL;
+}
+
static inline int
clib_bihash_is_free_48_8 (const clib_bihash_kv_48_8_t * v)
{
- /* Free values are clib_memset to 0xff, check a bit... */
- if (v->key[0] == ~0ULL && v->value == ~0ULL)
+ if (v->value == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
@@ -78,8 +83,8 @@ static inline int
clib_bihash_key_compare_48_8 (u64 * a, u64 * b)
{
#if defined (CLIB_HAVE_VEC512)
- u64x8 v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b);
- return (u64x8_is_zero_mask (v) & 0x3f) == 0;
+ return u64x8_is_equal (u64x8_mask_load_zero (a, 0x3f),
+ u64x8_mask_load_zero (b, 0x3f));
#elif defined (CLIB_HAVE_VEC256)
u64x4 v = { 0 };
v = u64x4_insert_lo (v, u64x2_load_unaligned (a + 4) ^
diff --git a/src/vppinfra/bihash_8_16.h b/src/vppinfra/bihash_8_16.h
index b42b32c33d2..36ddda7149b 100644
--- a/src/vppinfra/bihash_8_16.h
+++ b/src/vppinfra/bihash_8_16.h
@@ -44,13 +44,19 @@ typedef struct
u64 value[2]; /**< the value */
} clib_bihash_kv_8_16_t;
+static inline void
+clib_bihash_mark_free_8_16 (clib_bihash_kv_8_16_t *v)
+{
+ v->value[0] = 0xFEEDFACE8BADF00DULL;
+}
+
/** Decide if a clib_bihash_kv_8_16_t instance is free
@param v- pointer to the (key,value) pair
*/
static inline int
clib_bihash_is_free_8_16 (clib_bihash_kv_8_16_t * v)
{
- if (v->key == ~0ULL && v->value[0] == ~0ULL && v->value[1] == ~0ULL)
+ if (v->value[0] == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
@@ -80,8 +86,7 @@ format_bihash_kvp_8_16 (u8 * s, va_list * args)
clib_bihash_kv_8_16_t *v = va_arg (*args, clib_bihash_kv_8_16_t *);
s =
- format (s, "key %llu value [%ll,%llx]u", v->key, v->value[0],
- v->value[1]);
+ format (s, "key %llx value [%llx,%llx]", v->key, v->value[0], v->value[1]);
return s;
}
diff --git a/src/vppinfra/bihash_8_8.h b/src/vppinfra/bihash_8_8.h
index 2fdd2ed7aef..2471871fc81 100644
--- a/src/vppinfra/bihash_8_8.h
+++ b/src/vppinfra/bihash_8_8.h
@@ -44,13 +44,19 @@ typedef struct
u64 value; /**< the value */
} clib_bihash_kv_8_8_t;
+static inline void
+clib_bihash_mark_free_8_8 (clib_bihash_kv_8_8_t *v)
+{
+ v->value = 0xFEEDFACE8BADF00DULL;
+}
+
/** Decide if a clib_bihash_kv_8_8_t instance is free
@param v- pointer to the (key,value) pair
*/
static inline int
clib_bihash_is_free_8_8 (clib_bihash_kv_8_8_t * v)
{
- if (v->key == ~0ULL && v->value == ~0ULL)
+ if (v->value == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
diff --git a/src/vppinfra/bihash_8_8_stats.h b/src/vppinfra/bihash_8_8_stats.h
index 2237a0d624f..14702dfd782 100644
--- a/src/vppinfra/bihash_8_8_stats.h
+++ b/src/vppinfra/bihash_8_8_stats.h
@@ -45,13 +45,19 @@ typedef struct
u64 value; /**< the value */
} clib_bihash_kv_8_8_stats_t;
+static inline void
+clib_bihash_mark_free_8_8_stats (clib_bihash_kv_8_8_stats_t *v)
+{
+ v->value = 0xFEEDFACE8BADF00DULL;
+}
+
/** Decide if a clib_bihash_kv_8_8_t instance is free
@param v- pointer to the (key,value) pair
*/
static inline int
clib_bihash_is_free_8_8_stats (clib_bihash_kv_8_8_stats_t * v)
{
- if (v->key == ~0ULL && v->value == ~0ULL)
+ if (v->value == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
diff --git a/src/vppinfra/bihash_doc.h b/src/vppinfra/bihash_doc.h
index 7c7e5179961..f6d32ce0b56 100644
--- a/src/vppinfra/bihash_doc.h
+++ b/src/vppinfra/bihash_doc.h
@@ -90,83 +90,172 @@ static inline void *clib_bihash_get_value (clib_bihash * h, uword offset);
/** Get clib mheap offset given a pointer */
static inline uword clib_bihash_get_offset (clib_bihash * h, void *v);
-/** initialize a bounded index extensible hash table
-
- @param h - the bi-hash table to initialize
- @param name - name of the hash table
- @param nbuckets - the number of buckets, will be rounded up to
-a power of two
- @param memory_size - clib mheap size, in bytes
-*/
-
+/**
+ * initialize a bounded index extensible hash table
+ *
+ * @param h - the bi-hash table to initialize
+ * @param name - name of the hash table
+ * @param nbuckets - the number of buckets, will be rounded up to
+ * a power of two
+ * @param memory_size - clib mheap size, in bytes
+ */
void clib_bihash_init
(clib_bihash * h, char *name, u32 nbuckets, uword memory_size);
-/** Destroy a bounded index extensible hash table
- @param h - the bi-hash table to free
-*/
+/**
+ * initialize a bounded index extensible hash table with arguments passed as
+ * a struct
+ *
+ * @param a - initialization parameters
+ * h - the bi-hash table to initialize;
+ * name - name of the hash table
+ * nbuckets - the number of buckets, will be rounded up to a power of two
+ * memory_size - clib mheap size, in bytes
+ * format_function_t - format function for the bihash kv pairs
+ * instantiate_immediately - allocate memory right away
+ * dont_add_to_all_bihash_list - dont mention in 'show bihash'
+ */
+void BV (clib_bihash_init2) (BVT (clib_bihash_init2_args) * a);
-void clib_bihash_free (clib_bihash * h);
+/**
+ * Set the formating function for the bihash
+ *
+ * @param h - the bi-hash table
+ * @param kvp_fmt_fn - the format function
+ */
+void BV (clib_bihash_set_kvp_format_fn) (BVT (clib_bihash) * h,
+ format_function_t *kvp_fmt_fn);
-/** Add or delete a (key,value) pair from a bi-hash table
+/**
+ * Destroy a bounded index extensible hash table
+ *
+ * @param h - the bi-hash table to free
+ */
+void clib_bihash_free (clib_bihash *h);
- @param h - the bi-hash table to search
- @param add_v - the (key,value) pair to add
- @param is_add - add=1 (BIHASH_ADD), delete=0 (BIHASH_DEL)
- @returns 0 on success, < 0 on error
- @note This function will replace an existing (key,value) pair if the
- new key matches an existing key
-*/
+/**
+ * Add or delete a (key,value) pair from a bi-hash table
+ *
+ * @param h - the bi-hash table to search
+ * @param add_v - the (key,value) pair to add
+ * @param is_add - add=1 (BIHASH_ADD), delete=0 (BIHASH_DEL)
+ * @returns 0 on success, < 0 on error
+ * @note This function will replace an existing (key,value) pair if the
+ * new key matches an existing key
+ */
int clib_bihash_add_del (clib_bihash * h, clib_bihash_kv * add_v, int is_add);
+/**
+ * Add or delete a (key,value) pair from a bi-hash table, using a pre-computed
+ * hash
+ *
+ * @param h - the bi-hash table to search
+ * @param add_v - the (key,value) pair to add
+ * @param hash - the precomputed hash of the key
+ * @param is_add - add=1 (BIHASH_ADD), delete=0 (BIHASH_DEL)
+ * @returns 0 on success, < 0 on error
+ * @note This function will replace an existing (key,value) pair if the
+ * new key matches an existing key
+ */
+int BV (clib_bihash_add_del_with_hash) (BVT (clib_bihash) * h,
+ BVT (clib_bihash_kv) * add_v, u64 hash,
+ int is_add);
-/** Search a bi-hash table, use supplied hash code
+/**
+ * Add a (key,value) pair to a bi-hash table, and tries to free stale entries
+ * on collisions with passed filter.
+ *
+ * @param h - the bi-hash table to search
+ * @param add_v - the (key,value) pair to add
+ * @param is_stale_cb - callback receiving a kv pair, returning 1 if the kv is
+ * stale and can be overwriten. This will be called on adding a kv in a full
+ * page before trying to split & rehash its bucket.
+ * @param arg - opaque arguement passed to is_stale_cb
+ * @returns 0 on success, < 0 on error
+ * @note This function will replace an existing (key,value) pair if the
+ * new key matches an existing key
+ */
+int BV (clib_bihash_add_or_overwrite_stale) (
+ BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v,
+ int (*is_stale_cb) (BVT (clib_bihash_kv) *, void *), void *arg);
- @param h - the bi-hash table to search
- @param hash - the hash code
- @param in_out_kv - (key,value) pair containing the search key
- @returns 0 on success (with in_out_kv set), < 0 on error
-*/
-int clib_bihash_search_inline_with_hash
- (clib_bihash * h, u64 hash, clib_bihash_kv * in_out_kv);
+/**
+ * Add a (key,value) pair to a bi-hash table, calling a callback on overwrite
+ * with the bucket lock held.
+ *
+ * @param h - the bi-hash table to search
+ * @param add_v - the (key,value) pair to add
+ * @param overwrite_cb - callback called when overwriting a key, allowing
+ * you to cleanup the value with the bucket lock held.
+ * @param arg - opaque arguement passed to overwrite_cb
+ * @returns 0 on success, < 0 on error
+ * @note This function will replace an existing (key,value) pair if the
+ * new key matches an existing key
+ */
+int BV (clib_bihash_add_with_overwrite_cb) (
+ BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v,
+ void (*overwrite_cb) (BVT (clib_bihash_kv) *, void *), void *arg);
-/** Search a bi-hash table
+/**
+ * Tells if the bihash was initialised (i.e. mem allocated by first add)
+ *
+ * @param h - the bi-hash table to search
+ */
+int BV (clib_bihash_is_initialised) (const BVT (clib_bihash) * h);
- @param h - the bi-hash table to search
- @param in_out_kv - (key,value) pair containing the search key
- @returns 0 on success (with in_out_kv set), < 0 on error
-*/
-int clib_bihash_search_inline (clib_bihash * h, clib_bihash_kv * in_out_kv);
+/**
+ * Search a bi-hash table, use supplied hash code
+ *
+ * @param h - the bi-hash table to search
+ * @param hash - the hash code
+ * @param in_out_kv - (key,value) pair containing the search key
+ * @returns 0 on success (with in_out_kv set), < 0 on error
+ */
+int clib_bihash_search_inline_with_hash (clib_bihash *h, u64 hash,
+ clib_bihash_kv *in_out_kv);
-/** Prefetch a bi-hash bucket given a hash code
+/**
+ * Search a bi-hash table
+ *
+ * @param h - the bi-hash table to search
+ * @param in_out_kv - (key,value) pair containing the search key
+ * @returns 0 on success (with in_out_kv set), < 0 on error
+ */
+int clib_bihash_search_inline (clib_bihash *h, clib_bihash_kv *in_out_kv);
- @param h - the bi-hash table to search
- @param hash - the hash code
- @note see also clib_bihash_hash to compute the code
-*/
+/**
+ * Prefetch a bi-hash bucket given a hash code
+ *
+ * @param h - the bi-hash table to search
+ * @param hash - the hash code
+ * @note see also clib_bihash_hash to compute the code
+ */
void clib_bihash_prefetch_bucket (clib_bihash * h, u64 hash);
-/** Prefetch bi-hash (key,value) data given a hash code
-
- @param h - the bi-hash table to search
- @param hash - the hash code
- @note assumes that the bucket has been prefetched, see
- clib_bihash_prefetch_bucket
-*/
+/**
+ * Prefetch bi-hash (key,value) data given a hash code
+ *
+ * @param h - the bi-hash table to search
+ * @param hash - the hash code
+ * @note assumes that the bucket has been prefetched, see
+ * clib_bihash_prefetch_bucket
+ */
void clib_bihash_prefetch_data (clib_bihash * h, u64 hash);
-/** Search a bi-hash table
-
- @param h - the bi-hash table to search
- @param search_key - (key,value) pair containing the search key
- @param valuep - (key,value) set to search result
- @returns 0 on success (with valuep set), < 0 on error
- @note used in situations where key modification is not desired
-*/
+/**
+ * Search a bi-hash table
+ *
+ * @param h - the bi-hash table to search
+ * @param search_key - (key,value) pair containing the search key
+ * @param valuep - (key,value) set to search result
+ * @returns 0 on success (with valuep set), < 0 on error
+ * @note used in situations where key modification is not desired
+ */
int clib_bihash_search_inline_2
(clib_bihash * h, clib_bihash_kv * search_key, clib_bihash_kv * valuep);
-/* Calback function for walking a bihash table
+/**
+ * Calback function for walking a bihash table
*
* @param kv - KV pair visited
* @param ctx - Context passed to the walk
@@ -175,13 +264,14 @@ int clib_bihash_search_inline_2
typedef int (*clib_bihash_foreach_key_value_pair_cb) (clib_bihash_kv * kv,
void *ctx);
-/** Visit active (key,value) pairs in a bi-hash table
-
- @param h - the bi-hash table to search
- @param callback - function to call with each active (key,value) pair
- @param arg - arbitrary second argument passed to the callback function
- First argument is the (key,value) pair to visit
-*/
+/**
+ * Visit active (key,value) pairs in a bi-hash table
+ *
+ * @param h - the bi-hash table to search
+ * @param callback - function to call with each active (key,value) pair
+ * @param arg - arbitrary second argument passed to the callback function
+ * First argument is the (key,value) pair to visit
+ */
void clib_bihash_foreach_key_value_pair (clib_bihash * h,
clib_bihash_foreach_key_value_pair_cb
* callback, void *arg);
diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c
index ddaccbdb126..d488b1a659c 100644
--- a/src/vppinfra/bihash_template.c
+++ b/src/vppinfra/bihash_template.c
@@ -106,8 +106,10 @@ static inline void *BV (alloc_aligned) (BVT (clib_bihash) * h, uword nbytes)
void *base, *rv;
uword alloc = alloc_arena_next (h) - alloc_arena_mapped (h);
int mmap_flags = MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS;
+#if __linux__
int mmap_flags_huge = (mmap_flags | MAP_HUGETLB | MAP_LOCKED |
BIHASH_LOG2_HUGEPAGE_SIZE << MAP_HUGE_SHIFT);
+#endif /* __linux__ */
/* new allocation is 25% of existing one */
if (alloc_arena_mapped (h) >> 2 > alloc)
@@ -118,7 +120,11 @@ static inline void *BV (alloc_aligned) (BVT (clib_bihash) * h, uword nbytes)
base = (void *) (uword) (alloc_arena (h) + alloc_arena_mapped (h));
+#if __linux__
rv = mmap (base, alloc, PROT_READ | PROT_WRITE, mmap_flags_huge, -1, 0);
+#elif __FreeBSD__
+ rv = MAP_FAILED;
+#endif /* __linux__ */
/* fallback - maybe we are still able to allocate normal pages */
if (rv == MAP_FAILED || mlock (base, alloc) != 0)
@@ -165,19 +171,23 @@ static void BV (clib_bihash_instantiate) (BVT (clib_bihash) * h)
if (BIHASH_KVP_AT_BUCKET_LEVEL)
{
- int i;
+ int i, j;
BVT (clib_bihash_bucket) * b;
b = h->buckets;
for (i = 0; i < h->nbuckets; i++)
{
+ BVT (clib_bihash_kv) * v;
b->offset = BV (clib_bihash_get_offset) (h, (void *) (b + 1));
b->refcnt = 1;
/* Mark all elements free */
- clib_memset_u8 ((b + 1), 0xff, BIHASH_KVP_PER_PAGE *
- sizeof (BVT (clib_bihash_kv)));
-
+ v = (void *) (b + 1);
+ for (j = 0; j < BIHASH_KVP_PER_PAGE; j++)
+ {
+ BV (clib_bihash_mark_free) (v);
+ v++;
+ }
/* Compute next bucket start address */
b = (void *) (((uword) b) + sizeof (*b) +
(BIHASH_KVP_PER_PAGE *
@@ -201,6 +211,7 @@ void BV (clib_bihash_init2) (BVT (clib_bihash_init2_args) * a)
h->log2_nbuckets = max_log2 (a->nbuckets);
h->memory_size = BIHASH_USE_HEAP ? 0 : a->memory_size;
h->instantiated = 0;
+ h->dont_add_to_all_bihash_list = a->dont_add_to_all_bihash_list;
h->fmt_fn = BV (format_bihash);
h->kvp_fmt_fn = a->kvp_fmt_fn;
@@ -425,6 +436,7 @@ void BV (clib_bihash_free) (BVT (clib_bihash) * h)
vec_free (h->working_copies);
vec_free (h->working_copy_lengths);
+ clib_mem_free ((void *) h->alloc_lock);
#if BIHASH_32_64_SVM == 0
vec_free (h->freelists);
#else
@@ -435,6 +447,11 @@ void BV (clib_bihash_free) (BVT (clib_bihash) * h)
clib_mem_vm_free ((void *) (uword) (alloc_arena (h)),
alloc_arena_size (h));
never_initialized:
+ if (h->dont_add_to_all_bihash_list)
+ {
+ clib_memset_u8 (h, 0, sizeof (*h));
+ return;
+ }
clib_memset_u8 (h, 0, sizeof (*h));
for (i = 0; i < vec_len (clib_all_bihashes); i++)
{
@@ -452,6 +469,7 @@ static
BVT (clib_bihash_value) *
BV (value_alloc) (BVT (clib_bihash) * h, u32 log2_pages)
{
+ int i;
BVT (clib_bihash_value) * rv = 0;
ASSERT (h->alloc_lock[0]);
@@ -471,12 +489,15 @@ BV (value_alloc) (BVT (clib_bihash) * h, u32 log2_pages)
initialize:
ASSERT (rv);
- /*
- * Latest gcc complains that the length arg is zero
- * if we replace (1<<log2_pages) with vec_len(rv).
- * No clue.
- */
- clib_memset_u8 (rv, 0xff, sizeof (*rv) * (1 << log2_pages));
+
+ BVT (clib_bihash_kv) * v;
+ v = (BVT (clib_bihash_kv) *) rv;
+
+ for (i = 0; i < BIHASH_KVP_PER_PAGE * (1 << log2_pages); i++)
+ {
+ BV (clib_bihash_mark_free) (v);
+ v++;
+ }
return rv;
}
@@ -665,9 +686,10 @@ BV (split_and_rehash_linear)
return new_values;
}
-static_always_inline int BV (clib_bihash_add_del_inline_with_hash)
- (BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v, u64 hash, int is_add,
- int (*is_stale_cb) (BVT (clib_bihash_kv) *, void *), void *arg)
+static_always_inline int BV (clib_bihash_add_del_inline_with_hash) (
+ BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v, u64 hash, int is_add,
+ int (*is_stale_cb) (BVT (clib_bihash_kv) *, void *), void *is_stale_arg,
+ void (*overwrite_cb) (BVT (clib_bihash_kv) *, void *), void *overwrite_arg)
{
BVT (clib_bihash_bucket) * b, tmp_b;
BVT (clib_bihash_value) * v, *new_v, *save_new_v, *working_copy;
@@ -678,12 +700,10 @@ static_always_inline int BV (clib_bihash_add_del_inline_with_hash)
int mark_bucket_linear;
int resplit_once;
- /* *INDENT-OFF* */
static const BVT (clib_bihash_bucket) mask = {
.linear_search = 1,
.log2_pages = -1
};
- /* *INDENT-ON* */
#if BIHASH_LAZY_INSTANTIATE
/*
@@ -705,6 +725,12 @@ static_always_inline int BV (clib_bihash_add_del_inline_with_hash)
ASSERT (h->instantiated != 0);
#endif
+ /*
+ * Debug image: make sure that an item being added doesn't accidentally
+ * look like a free item.
+ */
+ ASSERT ((is_add && BV (clib_bihash_is_free) (add_v)) == 0);
+
b = BV (clib_bihash_get_bucket) (h, hash);
BV (clib_bihash_lock_bucket) (b);
@@ -761,6 +787,8 @@ static_always_inline int BV (clib_bihash_add_del_inline_with_hash)
*/
for (i = 0; i < limit; i++)
{
+ if (BV (clib_bihash_is_free) (&(v->kvp[i])))
+ continue;
if (BV (clib_bihash_key_compare) (v->kvp[i].key, add_v->key))
{
/* Add but do not overwrite? */
@@ -769,7 +797,8 @@ static_always_inline int BV (clib_bihash_add_del_inline_with_hash)
BV (clib_bihash_unlock_bucket) (b);
return (-2);
}
-
+ if (overwrite_cb)
+ overwrite_cb (&(v->kvp[i]), overwrite_arg);
clib_memcpy_fast (&(v->kvp[i].value),
&add_v->value, sizeof (add_v->value));
BV (clib_bihash_unlock_bucket) (b);
@@ -805,7 +834,7 @@ static_always_inline int BV (clib_bihash_add_del_inline_with_hash)
{
for (i = 0; i < limit; i++)
{
- if (is_stale_cb (&(v->kvp[i]), arg))
+ if (is_stale_cb (&(v->kvp[i]), is_stale_arg))
{
clib_memcpy_fast (&(v->kvp[i]), add_v, sizeof (*add_v));
CLIB_MEMORY_STORE_BARRIER ();
@@ -821,10 +850,13 @@ static_always_inline int BV (clib_bihash_add_del_inline_with_hash)
{
for (i = 0; i < limit; i++)
{
+ /* no sense even looking at this one */
+ if (BV (clib_bihash_is_free) (&(v->kvp[i])))
+ continue;
/* Found the key? Kill it... */
if (BV (clib_bihash_key_compare) (v->kvp[i].key, add_v->key))
{
- clib_memset_u8 (&(v->kvp[i]), 0xff, sizeof (*(add_v)));
+ BV (clib_bihash_mark_free) (&(v->kvp[i]));
/* Is the bucket empty? */
if (PREDICT_TRUE (b->refcnt > 1))
{
@@ -839,8 +871,13 @@ static_always_inline int BV (clib_bihash_add_del_inline_with_hash)
b->linear_search = 0;
b->log2_pages = 0;
/* Clean up the bucket-level kvp array */
- clib_memset_u8 ((b + 1), 0xff, BIHASH_KVP_PER_PAGE *
- sizeof (BVT (clib_bihash_kv)));
+ BVT (clib_bihash_kv) *v = (void *) (b + 1);
+ int j;
+ for (j = 0; j < BIHASH_KVP_PER_PAGE; j++)
+ {
+ BV (clib_bihash_mark_free) (v);
+ v++;
+ }
CLIB_MEMORY_STORE_BARRIER ();
BV (clib_bihash_unlock_bucket) (b);
BV (clib_bihash_increment_stat) (h, BIHASH_STAT_del, 1);
@@ -987,7 +1024,15 @@ static_always_inline int BV (clib_bihash_add_del_inline)
{
u64 hash = BV (clib_bihash_hash) (add_v);
return BV (clib_bihash_add_del_inline_with_hash) (h, add_v, hash, is_add,
- is_stale_cb, arg);
+ is_stale_cb, arg, 0, 0);
+}
+
+int BV (clib_bihash_add_del_with_hash) (BVT (clib_bihash) * h,
+ BVT (clib_bihash_kv) * add_v, u64 hash,
+ int is_add)
+{
+ return BV (clib_bihash_add_del_inline_with_hash) (h, add_v, hash, is_add, 0,
+ 0, 0, 0);
}
int BV (clib_bihash_add_del)
@@ -1003,6 +1048,15 @@ int BV (clib_bihash_add_or_overwrite_stale)
return BV (clib_bihash_add_del_inline) (h, add_v, 1, stale_callback, arg);
}
+int BV (clib_bihash_add_with_overwrite_cb) (
+ BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v,
+ void (overwrite_cb) (BVT (clib_bihash_kv) *, void *), void *arg)
+{
+ u64 hash = BV (clib_bihash_hash) (add_v);
+ return BV (clib_bihash_add_del_inline_with_hash) (h, add_v, hash, 1, 0, 0,
+ overwrite_cb, arg);
+}
+
int BV (clib_bihash_search)
(BVT (clib_bihash) * h,
BVT (clib_bihash_kv) * search_key, BVT (clib_bihash_kv) * valuep)
diff --git a/src/vppinfra/bihash_template.h b/src/vppinfra/bihash_template.h
index da2f684b685..8f5879b4634 100644
--- a/src/vppinfra/bihash_template.h
+++ b/src/vppinfra/bihash_template.h
@@ -99,7 +99,6 @@ typedef struct
STATIC_ASSERT_SIZEOF (BVT (clib_bihash_bucket), sizeof (u64));
-/* *INDENT-OFF* */
typedef CLIB_PACKED (struct {
/*
* Backing store allocation. Since bihash manages its own
@@ -118,7 +117,6 @@ typedef CLIB_PACKED (struct {
volatile u32 ready;
u64 pad[1];
}) BVT (clib_bihash_shared_header);
-/* *INDENT-ON* */
STATIC_ASSERT_SIZEOF (BVT (clib_bihash_shared_header), 8 * sizeof (u64));
@@ -170,6 +168,7 @@ BVS (clib_bihash)
u64 alloc_arena; /* Base of the allocation arena */
volatile u8 instantiated;
+ u8 dont_add_to_all_bihash_list;
/**
* A custom format function to print the Key and Value of bihash_key instead of default hexdump
@@ -281,9 +280,7 @@ static inline void BV (clib_bihash_alloc_unlock) (BVT (clib_bihash) * h)
static inline void BV (clib_bihash_lock_bucket) (BVT (clib_bihash_bucket) * b)
{
- /* *INDENT-OFF* */
BVT (clib_bihash_bucket) mask = { .lock = 1 };
- /* *INDENT-ON* */
u64 old;
try_again:
@@ -355,12 +352,19 @@ void BV (clib_bihash_free) (BVT (clib_bihash) * h);
int BV (clib_bihash_add_del) (BVT (clib_bihash) * h,
BVT (clib_bihash_kv) * add_v, int is_add);
+
+int BV (clib_bihash_add_del_with_hash) (BVT (clib_bihash) * h,
+ BVT (clib_bihash_kv) * add_v, u64 hash,
+ int is_add);
int BV (clib_bihash_add_or_overwrite_stale) (BVT (clib_bihash) * h,
BVT (clib_bihash_kv) * add_v,
int (*is_stale_cb) (BVT
(clib_bihash_kv)
*, void *),
void *arg);
+int BV (clib_bihash_add_with_overwrite_cb) (
+ BVT (clib_bihash) * h, BVT (clib_bihash_kv) * add_v,
+ void (*overwrite_cb) (BVT (clib_bihash_kv) *, void *), void *arg);
int BV (clib_bihash_search) (BVT (clib_bihash) * h,
BVT (clib_bihash_kv) * search_v,
BVT (clib_bihash_kv) * return_v);
@@ -402,16 +406,15 @@ BV (clib_bihash_get_bucket) (BVT (clib_bihash) * h, u64 hash)
static inline int BV (clib_bihash_search_inline_with_hash)
(BVT (clib_bihash) * h, u64 hash, BVT (clib_bihash_kv) * key_result)
{
+ BVT (clib_bihash_kv) rv;
BVT (clib_bihash_value) * v;
BVT (clib_bihash_bucket) * b;
int i, limit;
- /* *INDENT-OFF* */
static const BVT (clib_bihash_bucket) mask = {
.linear_search = 1,
.log2_pages = -1
};
- /* *INDENT-ON* */
#if BIHASH_LAZY_INSTANTIATE
if (PREDICT_FALSE (h->instantiated == 0))
@@ -447,7 +450,10 @@ static inline int BV (clib_bihash_search_inline_with_hash)
{
if (BV (clib_bihash_key_compare) (v->kvp[i].key, key_result->key))
{
- *key_result = v->kvp[i];
+ rv = v->kvp[i];
+ if (BV (clib_bihash_is_free) (&rv))
+ return -1;
+ *key_result = rv;
return 0;
}
}
@@ -501,16 +507,15 @@ static inline int BV (clib_bihash_search_inline_2_with_hash)
(BVT (clib_bihash) * h,
u64 hash, BVT (clib_bihash_kv) * search_key, BVT (clib_bihash_kv) * valuep)
{
+ BVT (clib_bihash_kv) rv;
BVT (clib_bihash_value) * v;
BVT (clib_bihash_bucket) * b;
int i, limit;
-/* *INDENT-OFF* */
static const BVT (clib_bihash_bucket) mask = {
.linear_search = 1,
.log2_pages = -1
};
-/* *INDENT-ON* */
ASSERT (valuep);
@@ -548,7 +553,10 @@ static inline int BV (clib_bihash_search_inline_2_with_hash)
{
if (BV (clib_bihash_key_compare) (v->kvp[i].key, search_key->key))
{
- *valuep = v->kvp[i];
+ rv = v->kvp[i];
+ if (BV (clib_bihash_is_free) (&rv))
+ return -1;
+ *valuep = rv;
return 0;
}
}
diff --git a/src/vppinfra/bihash_vec8_8.h b/src/vppinfra/bihash_vec8_8.h
index 15c6d8cebff..822f1bcc51f 100644
--- a/src/vppinfra/bihash_vec8_8.h
+++ b/src/vppinfra/bihash_vec8_8.h
@@ -42,13 +42,19 @@ typedef struct
u64 value; /**< the value */
} clib_bihash_kv_vec8_8_t;
+static inline void
+clib_bihash_mark_free_vec8_8 (clib_bihash_kv_vec8_8_t *v)
+{
+ v->value = 0xFEEDFACE8BADF00DULL;
+}
+
/** Decide if a clib_bihash_kv_vec8_8_t instance is free
@param v- pointer to the (key,value) pair
*/
static inline int
clib_bihash_is_free_vec8_8 (clib_bihash_kv_vec8_8_t * v)
{
- if (v->key == ~0ULL && v->value == ~0ULL)
+ if (v->value == 0xFEEDFACE8BADF00DULL)
return 1;
return 0;
}
diff --git a/src/vppinfra/bitmap.h b/src/vppinfra/bitmap.h
index 92205bfc8e8..4ab7bcf7a7c 100644
--- a/src/vppinfra/bitmap.h
+++ b/src/vppinfra/bitmap.h
@@ -45,7 +45,6 @@
#include <vppinfra/vec.h>
#include <vppinfra/random.h>
#include <vppinfra/error.h>
-#include <vppinfra/bitops.h> /* for count_set_bits */
typedef uword clib_bitmap_t;
@@ -115,6 +114,24 @@ clib_bitmap_is_equal (uword * a, uword * b)
#define clib_bitmap_validate(v,n_bits) \
clib_bitmap_vec_validate ((v), ((n_bits) - 1) / BITS (uword))
+/** Copy a bitmap
+ @param dst - copy to
+ @param src - copy from
+*/
+static_always_inline void
+clib_bitmap_copy (clib_bitmap_t **dst, const clib_bitmap_t *src)
+{
+ if (vec_len (src))
+ {
+ clib_bitmap_vec_validate (*dst, vec_len (src) - 1);
+ vec_copy (*dst, src);
+ }
+ else
+ {
+ vec_reset_length (*dst);
+ }
+}
+
/* low-level routine to remove trailing zeros from a bitmap */
always_inline uword *
_clib_bitmap_remove_trailing_zeros (uword * a)
@@ -125,7 +142,7 @@ _clib_bitmap_remove_trailing_zeros (uword * a)
for (i = _vec_len (a) - 1; i >= 0; i--)
if (a[i] != 0)
break;
- _vec_len (a) = i + 1;
+ vec_set_len (a, i + 1);
}
return a;
}
@@ -161,7 +178,7 @@ clib_bitmap_set_no_check (uword * a, uword i, uword new_value)
@param ai - pointer to the bitmap
@param i - the bit position to interrogate
@param value - new value for the bit
- @returns the old value of the bit
+ @returns the (possibly reallocated) bitmap object pointer
*/
always_inline uword *
clib_bitmap_set (uword * ai, uword i, uword value)
@@ -188,6 +205,12 @@ clib_bitmap_set (uword * ai, uword i, uword value)
return ai;
}
+always_inline u8
+clib_bitmap_will_expand (uword *ai, uword i)
+{
+ return (i / BITS (ai[0])) >= vec_max_len (ai);
+}
+
/** Gets the ith bit value from a bitmap
@param ai - pointer to the bitmap
@param i - the bit position to interrogate
@@ -222,7 +245,7 @@ clib_bitmap_get_multiple_no_check (uword * ai, uword i, uword n_bits)
uword i0 = i / BITS (ai[0]);
uword i1 = i % BITS (ai[0]);
ASSERT (i1 + n_bits <= BITS (uword));
- return 0 != ((ai[i0] >> i1) & pow2_mask (n_bits));
+ return ((ai[i0] >> i1) & pow2_mask (n_bits));
}
/** Gets the ith through ith + n_bits bit values from a bitmap
@@ -282,7 +305,7 @@ clib_bitmap_set_multiple (uword * bitmap, uword i, uword value, uword n_bits)
i1 = i % BITS (bitmap[0]);
/* Allocate bitmap. */
- clib_bitmap_vec_validate (bitmap, (i + n_bits) / BITS (bitmap[0]));
+ clib_bitmap_vec_validate (bitmap, (i + n_bits - 1) / BITS (bitmap[0]));
l = vec_len (bitmap);
m = ~0;
@@ -316,14 +339,15 @@ clib_bitmap_set_multiple (uword * bitmap, uword i, uword value, uword n_bits)
always_inline uword *
clib_bitmap_set_region (uword * bitmap, uword i, uword value, uword n_bits)
{
- uword a0, a1, b0;
+ uword a0, a1, b0, b1;
uword i_end, mask;
a0 = i / BITS (bitmap[0]);
a1 = i % BITS (bitmap[0]);
- i_end = i + n_bits;
+ i_end = i + n_bits - 1;
b0 = i_end / BITS (bitmap[0]);
+ b1 = i_end % BITS (bitmap[0]);
clib_bitmap_vec_validate (bitmap, b0);
@@ -341,8 +365,7 @@ clib_bitmap_set_region (uword * bitmap, uword i, uword value, uword n_bits)
if (a0 == b0)
{
- word n_bits_left = n_bits - (BITS (bitmap[0]) - a1);
- mask = pow2_mask (n_bits_left);
+ mask = (uword) ~0 >> (BITS (bitmap[0]) - b1 - 1);
if (value)
bitmap[a0] |= mask;
else
@@ -495,37 +518,38 @@ always_inline uword *clib_bitmap_or (uword * ai, uword * bi);
always_inline uword *clib_bitmap_xor (uword * ai, uword * bi);
/* ALU function definition macro for functions taking two bitmaps. */
-#define _(name, body, check_zero) \
-always_inline uword * \
-clib_bitmap_##name (uword * ai, uword * bi) \
-{ \
- uword i, a, b, bi_len, n_trailing_zeros; \
- \
- n_trailing_zeros = 0; \
- bi_len = vec_len (bi); \
- if (bi_len > 0) \
- clib_bitmap_vec_validate (ai, bi_len - 1); \
- for (i = 0; i < vec_len (ai); i++) \
- { \
- a = ai[i]; \
- b = i < bi_len ? bi[i] : 0; \
- do { body; } while (0); \
- ai[i] = a; \
- if (check_zero) \
- n_trailing_zeros = a ? 0 : (n_trailing_zeros + 1); \
- } \
- if (check_zero) \
- _vec_len (ai) -= n_trailing_zeros; \
- return ai; \
-}
+#define _(name, body, check_zero) \
+ always_inline uword *clib_bitmap_##name (uword *ai, uword *bi) \
+ { \
+ uword i, a, b, bi_len, n_trailing_zeros; \
+ \
+ n_trailing_zeros = 0; \
+ bi_len = vec_len (bi); \
+ if (bi_len > 0) \
+ clib_bitmap_vec_validate (ai, bi_len - 1); \
+ for (i = 0; i < vec_len (ai); i++) \
+ { \
+ a = ai[i]; \
+ b = i < bi_len ? bi[i] : 0; \
+ do \
+ { \
+ body; \
+ } \
+ while (0); \
+ ai[i] = a; \
+ if (check_zero) \
+ n_trailing_zeros = a ? 0 : (n_trailing_zeros + 1); \
+ } \
+ if (check_zero) \
+ vec_dec_len (ai, n_trailing_zeros); \
+ return ai; \
+ }
/* ALU functions: */
-/* *INDENT-OFF* */
_(and, a = a & b, 1)
_(andnot, a = a & ~b, 1)
_(or, a = a | b, 0)
_(xor, a = a ^ b, 1)
-/* *INDENT-ON* */
#undef _
/** Logical operator across two bitmaps which duplicates the first bitmap
@@ -564,12 +588,10 @@ always_inline uword *clib_bitmap_dup_xor (uword * ai, uword * bi);
clib_bitmap_dup_##name (uword * ai, uword * bi) \
{ return clib_bitmap_##name (clib_bitmap_dup (ai), bi); }
-/* *INDENT-OFF* */
_(and);
_(andnot);
_(or);
_(xor);
-/* *INDENT-ON* */
#undef _
/* ALU function definition macro for functions taking one bitmap and an
@@ -592,12 +614,10 @@ clib_bitmap_##name (uword * ai, uword i) \
}
/* ALU functions immediate: */
-/* *INDENT-OFF* */
_(andi, a = a & b, 1)
_(andnoti, a = a & ~b, 1)
_(ori, a = a | b, 0)
_(xori, a = a ^ b, 1)
-/* *INDENT-ON* */
#undef _
/* ALU function definition macro for functions taking one bitmap and an
@@ -618,13 +638,11 @@ clib_bitmap_##name##_notrim (uword * ai, uword i) \
}
/* ALU functions immediate: */
-/* *INDENT-OFF* */
_(andi, a = a & b)
_(andnoti, a = a & ~b)
_(ori, a = a | b)
_(xori, a = a ^ b)
#undef _
-/* *INDENT-ON* */
/** Return a random bitmap of the requested length
@param ai - pointer to the destination bitmap
@@ -716,8 +734,7 @@ clib_bitmap_next_clear (uword * ai, uword i)
return log2_first_set (t) + i0 * BITS (ai[0]);
}
- /* no clear bit left in bitmap, return bit just beyond bitmap */
- return (i0 * BITS (ai[0])) + 1;
+ return i0 * BITS (ai[0]);
}
return i;
}
diff --git a/src/vppinfra/bitops.h b/src/vppinfra/bitops.h
index 17ad49ffb46..c1122f59ff6 100644
--- a/src/vppinfra/bitops.h
+++ b/src/vppinfra/bitops.h
@@ -38,18 +38,41 @@
#ifndef included_clib_bitops_h
#define included_clib_bitops_h
-#include <vppinfra/clib.h>
+#define SET_BIT(i) (1 << i)
+#define GET_BIT(n, i) (n >> i) & 1U
+
+static_always_inline uword
+clear_lowest_set_bit (uword x)
+{
+#ifdef __BMI__
+ return uword_bits > 32 ? _blsr_u64 (x) : _blsr_u32 (x);
+#else
+ return x & (x - 1);
+#endif
+}
+
+static_always_inline uword
+get_lowest_set_bit (uword x)
+{
+#ifdef __BMI__
+ return uword_bits > 32 ? _blsi_u64 (x) : _blsi_u32 (x);
+#else
+ return x & -x;
+#endif
+}
+
+static_always_inline u8
+get_lowest_set_bit_index (uword x)
+{
+ return uword_bits > 32 ? __builtin_ctzll (x) : __builtin_ctz (x);
+}
/* Population count from Hacker's Delight. */
always_inline uword
count_set_bits (uword x)
{
#ifdef __POPCNT__
-#if uword_bits == 64
- return __builtin_popcountll (x);
-#else
- return __builtin_popcount (x);
-#endif
+ return uword_bits > 32 ? __builtin_popcountll (x) : __builtin_popcount (x);
#else
#if uword_bits == 64
const uword c1 = 0x5555555555555555;
@@ -81,6 +104,15 @@ count_set_bits (uword x)
#endif
}
+#if uword_bits == 64
+#define count_leading_zeros(x) __builtin_clzll (x)
+#else
+#define count_leading_zeros(x) __builtin_clzll (x)
+#endif
+
+#define count_trailing_zeros(x) get_lowest_set_bit_index (x)
+#define log2_first_set(x) get_lowest_set_bit_index (x)
+
/* Based on "Hacker's Delight" code from GLS. */
typedef struct
{
@@ -163,19 +195,158 @@ next_with_same_number_of_set_bits (uword x)
return ripple | ones;
}
-#define foreach_set_bit(var,mask,body) \
-do { \
- uword _foreach_set_bit_m_##var = (mask); \
- uword _foreach_set_bit_f_##var; \
- while (_foreach_set_bit_m_##var != 0) \
- { \
- _foreach_set_bit_f_##var = first_set (_foreach_set_bit_m_##var); \
- _foreach_set_bit_m_##var ^= _foreach_set_bit_f_##var; \
- (var) = min_log2 (_foreach_set_bit_f_##var); \
- do { body; } while (0); \
- } \
-} while (0)
+#define foreach_set_bit_index(i, v) \
+ for (uword _tmp = (v) + 0 * (uword) (i = get_lowest_set_bit_index (v)); \
+ _tmp; \
+ i = get_lowest_set_bit_index (_tmp = clear_lowest_set_bit (_tmp)))
+
+static_always_inline uword
+uword_bitmap_count_set_bits (uword *bmp, uword n_uwords)
+{
+ uword count = 0;
+ while (n_uwords--)
+ count += count_set_bits (bmp++[0]);
+ return count;
+}
+
+static_always_inline uword
+uword_bitmap_is_bit_set (uword *bmp, uword bit_index)
+{
+ bmp += bit_index / uword_bits;
+ bit_index %= uword_bits;
+ return (bmp[0] >> bit_index) & 1;
+}
+
+static_always_inline void
+uword_bitmap_set_bits_at_index (uword *bmp, uword bit_index, uword n_bits)
+{
+ bmp += bit_index / uword_bits;
+ bit_index %= uword_bits;
+ uword max_bits = uword_bits - bit_index;
+
+ if (n_bits < max_bits)
+ {
+ bmp[0] |= pow2_mask (n_bits) << bit_index;
+ return;
+ }
+
+ bmp++[0] |= pow2_mask (max_bits) << bit_index;
+ n_bits -= max_bits;
+
+ for (; n_bits >= uword_bits; bmp++, n_bits -= uword_bits)
+ bmp[0] = ~0ULL;
+
+ if (n_bits)
+ bmp[0] |= pow2_mask (n_bits);
+}
+
+static_always_inline void
+uword_bitmap_clear_bits_at_index (uword *bmp, uword bit_index, uword n_bits)
+{
+ bmp += bit_index / uword_bits;
+ bit_index %= uword_bits;
+ uword max_bits = uword_bits - bit_index;
+
+ if (n_bits < max_bits)
+ {
+ bmp[0] &= ~(pow2_mask (n_bits) << bit_index);
+ return;
+ }
+
+ bmp++[0] &= ~(pow2_mask (max_bits) << bit_index);
+ n_bits -= max_bits;
+
+ for (; n_bits >= uword_bits; bmp++, n_bits -= uword_bits)
+ bmp[0] = 0ULL;
+
+ if (n_bits)
+ bmp[0] &= ~pow2_mask (n_bits);
+}
+
+static_always_inline int
+uword_bitmap_find_first_set (uword *bmp)
+{
+ uword *b = bmp;
+ while (b[0] == 0)
+ b++;
+
+ return (b - bmp) * uword_bits + get_lowest_set_bit_index (b[0]);
+}
+
+static_always_inline u32
+bit_extract_u32 (u32 v, u32 mask)
+{
+#ifdef __BMI2__
+ return _pext_u32 (v, mask);
+#else
+ u32 rv = 0;
+ u32 bit = 1;
+
+ while (mask)
+ {
+ u32 lowest_mask_bit = get_lowest_set_bit (mask);
+ mask ^= lowest_mask_bit;
+ rv |= (v & lowest_mask_bit) ? bit : 0;
+ bit <<= 1;
+ }
+
+ return rv;
+#endif
+}
+
+static_always_inline u64
+bit_extract_u64 (u64 v, u64 mask)
+{
+#ifdef __BMI2__
+ return _pext_u64 (v, mask);
+#else
+ u64 rv = 0;
+ u64 bit = 1;
+
+ while (mask)
+ {
+ u64 lowest_mask_bit = get_lowest_set_bit (mask);
+ mask ^= lowest_mask_bit;
+ rv |= (v & lowest_mask_bit) ? bit : 0;
+ bit <<= 1;
+ }
+
+ return rv;
+#endif
+}
+
+static_always_inline void
+u64_bit_set (u64 *p, u8 bit_index, u8 is_one)
+{
+ u64 val = *p;
+ val &= ~(1ULL << bit_index);
+ val |= 1ULL << bit_index;
+ *p = val;
+}
+
+static_always_inline void
+u32_bit_set (u32 *p, u8 bit_index, u8 is_one)
+{
+ u32 val = *p;
+ val &= ~(1U << bit_index);
+ val |= 1U << bit_index;
+ *p = val;
+}
+
+static_always_inline int
+u64_is_bit_set (u64 v, u8 bit_index)
+{
+ return (v & 1ULL << bit_index) != 0;
+}
+
+static_always_inline int
+u32_is_bit_set (u32 v, u8 bit_index)
+{
+ return (v & 1U << bit_index) != 0;
+}
+#else
+#warning "already included"
#endif /* included_clib_bitops_h */
/*
diff --git a/src/vppinfra/byte_order.h b/src/vppinfra/byte_order.h
index 9beb4470634..7bc26002a2f 100644
--- a/src/vppinfra/byte_order.h
+++ b/src/vppinfra/byte_order.h
@@ -173,12 +173,10 @@ _(i64);
#undef _
/* Dummy endian swap functions for IEEE floating-point numbers */
-/* *INDENT-OFF* */
always_inline f64 clib_net_to_host_f64 (f64 x) { return x; }
always_inline f64 clib_host_to_net_f64 (f64 x) { return x; }
always_inline f32 clib_net_to_host_f32 (f32 x) { return x; }
always_inline f32 clib_host_to_net_f32 (f32 x) { return x; }
-/* *INDENT-ON* */
#endif /* included_clib_byte_order_h */
diff --git a/src/vppinfra/cJSON.c b/src/vppinfra/cJSON.c
index 5b26a4be4e1..448435de4dc 100644
--- a/src/vppinfra/cJSON.c
+++ b/src/vppinfra/cJSON.c
@@ -157,7 +157,7 @@ typedef struct internal_hooks
{
void *(CJSON_CDECL *allocate)(size_t size);
void (CJSON_CDECL *deallocate)(void *pointer);
- void *(CJSON_CDECL *reallocate)(void *pointer, size_t size);
+ void *(CJSON_CDECL *reallocate)(void *pointer, size_t new_size, size_t old_size);
} internal_hooks;
#if defined(_MSC_VER)
@@ -170,16 +170,20 @@ static void CJSON_CDECL internal_free(void *pointer)
{
free(pointer);
}
-static void * CJSON_CDECL internal_realloc(void *pointer, size_t size)
-{
- return realloc(pointer, size);
-}
#else
#define internal_malloc malloc
#define internal_free free
-#define internal_realloc realloc
#endif
+static void * CJSON_CDECL internal_realloc(void *pointer, size_t new_size,
+ size_t old_size)
+{
+ return realloc(pointer, new_size);
+}
+
+static void *
+cjson_realloc_internal (void *ptr, size_t new_size, size_t old_size);
+
/* strlen of character literals resolved at compile time */
#define static_strlen(string_literal) (sizeof(string_literal) - sizeof(""))
@@ -213,7 +217,7 @@ CJSON_PUBLIC(void) cJSON_InitHooks(cJSON_Hooks* hooks)
/* Reset hooks */
global_hooks.allocate = malloc;
global_hooks.deallocate = free;
- global_hooks.reallocate = realloc;
+ global_hooks.reallocate = internal_realloc;
return;
}
@@ -233,7 +237,11 @@ CJSON_PUBLIC(void) cJSON_InitHooks(cJSON_Hooks* hooks)
global_hooks.reallocate = NULL;
if ((global_hooks.allocate == malloc) && (global_hooks.deallocate == free))
{
- global_hooks.reallocate = realloc;
+ global_hooks.reallocate = internal_realloc;
+ }
+ else
+ {
+ global_hooks.reallocate = cjson_realloc_internal;
}
}
@@ -435,6 +443,27 @@ typedef struct
internal_hooks hooks;
} printbuffer;
+static void *
+cjson_realloc_internal (void *ptr, size_t new_size, size_t old_size)
+{
+ size_t copy_size;
+ if (old_size < new_size)
+ copy_size = old_size;
+ else
+ copy_size = new_size;
+
+ unsigned char *newbuffer = global_hooks.allocate(new_size);
+ if (!newbuffer)
+ {
+ global_hooks.deallocate(ptr);
+ return NULL;
+ }
+
+ memcpy (newbuffer, ptr, copy_size);
+ global_hooks.deallocate (ptr);
+ return newbuffer;
+}
+
/* realloc printbuffer if necessary to have at least "needed" bytes more */
static unsigned char* ensure(printbuffer * const p, size_t needed)
{
@@ -486,34 +515,13 @@ static unsigned char* ensure(printbuffer * const p, size_t needed)
newsize = needed * 2;
}
- if (p->hooks.reallocate != NULL)
+ newbuffer = p->hooks.reallocate (p->buffer, newsize, p->length);
+ if (newbuffer == NULL)
{
- /* reallocate with realloc if available */
- newbuffer = (unsigned char*)p->hooks.reallocate(p->buffer, newsize);
- if (newbuffer == NULL)
- {
- p->hooks.deallocate(p->buffer);
- p->length = 0;
- p->buffer = NULL;
-
- return NULL;
- }
- }
- else
- {
- /* otherwise reallocate manually */
- newbuffer = (unsigned char*)p->hooks.allocate(newsize);
- if (!newbuffer)
- {
- p->hooks.deallocate(p->buffer);
- p->length = 0;
- p->buffer = NULL;
-
- return NULL;
- }
-
- memcpy (newbuffer, p->buffer, p->offset + 1);
- p->hooks.deallocate (p->buffer);
+ p->hooks.deallocate(p->buffer);
+ p->length = 0;
+ p->buffer = NULL;
+ return NULL;
}
p->length = newsize;
p->buffer = newbuffer;
@@ -1208,7 +1216,7 @@ static unsigned char *print(const cJSON * const item, cJSON_bool format, const i
/* check if reallocate is available */
if (hooks->reallocate != NULL)
{
- printed = (unsigned char*) hooks->reallocate(buffer->buffer, buffer->offset + 1);
+ printed = (unsigned char*) hooks->reallocate(buffer->buffer, buffer->offset + 1, default_buffer_size);
if (printed == NULL) {
goto fail;
}
@@ -3112,3 +3120,8 @@ CJSON_PUBLIC(void) cJSON_free(void *object)
{
global_hooks.deallocate(object);
}
+
+CJSON_PUBLIC(void *) cJSON_realloc(void *object, size_t new_size, size_t old_size)
+{
+ return global_hooks.reallocate(object, new_size, old_size);
+}
diff --git a/src/vppinfra/cJSON.h b/src/vppinfra/cJSON.h
index e97e5f4cdc4..1474c4e5c49 100644
--- a/src/vppinfra/cJSON.h
+++ b/src/vppinfra/cJSON.h
@@ -127,6 +127,8 @@ typedef struct cJSON_Hooks
/* malloc/free are CDECL on Windows regardless of the default calling convention of the compiler, so ensure the hooks allow passing those functions directly. */
void *(CJSON_CDECL *malloc_fn)(size_t sz);
void (CJSON_CDECL *free_fn)(void *ptr);
+ void *(CJSON_CDECL *realloc_fn) (void *ptr, size_t new_size,
+ size_t old_size);
} cJSON_Hooks;
typedef int cJSON_bool;
@@ -285,6 +287,8 @@ CJSON_PUBLIC(char*) cJSON_SetValuestring(cJSON *object, const char *valuestring)
/* malloc/free objects using the malloc/free functions that have been set with cJSON_InitHooks */
CJSON_PUBLIC(void *) cJSON_malloc(size_t size);
CJSON_PUBLIC(void) cJSON_free(void *object);
+CJSON_PUBLIC (void *)
+cJSON_realloc (void *object, size_t new_size, size_t old_size);
#ifdef __cplusplus
}
diff --git a/src/vppinfra/cache.h b/src/vppinfra/cache.h
index 04f91e00061..4229a068486 100644
--- a/src/vppinfra/cache.h
+++ b/src/vppinfra/cache.h
@@ -40,66 +40,64 @@
#include <vppinfra/error_bootstrap.h>
-/*
- * Allow CFLAGS to override the configured / deduced cache line size
- */
-#ifndef CLIB_LOG2_CACHE_LINE_BYTES
-
/* Default cache line size of 64 bytes. */
#ifndef CLIB_LOG2_CACHE_LINE_BYTES
#define CLIB_LOG2_CACHE_LINE_BYTES 6
#endif
-#endif /* CLIB_LOG2_CACHE_LINE_BYTES defined */
-
-#if (CLIB_LOG2_CACHE_LINE_BYTES >= 9)
-#error Cache line size 512 bytes or greater
+/* How much data prefetch instruction prefetches */
+#ifndef CLIB_LOG2_CACHE_PREFETCH_BYTES
+#define CLIB_LOG2_CACHE_PREFETCH_BYTES CLIB_LOG2_CACHE_LINE_BYTES
#endif
-#define CLIB_CACHE_LINE_BYTES (1 << CLIB_LOG2_CACHE_LINE_BYTES)
-#define CLIB_CACHE_LINE_ALIGN_MARK(mark) u8 mark[0] __attribute__((aligned(CLIB_CACHE_LINE_BYTES)))
-#define CLIB_CACHE_LINE_ROUND(x) ((x + CLIB_CACHE_LINE_BYTES - 1) & ~(CLIB_CACHE_LINE_BYTES - 1))
-
/* Default cache line fill buffers. */
#ifndef CLIB_N_PREFETCHES
#define CLIB_N_PREFETCHES 16
#endif
+#define CLIB_CACHE_LINE_BYTES (1 << CLIB_LOG2_CACHE_LINE_BYTES)
+#define CLIB_CACHE_PREFETCH_BYTES (1 << CLIB_LOG2_CACHE_PREFETCH_BYTES)
+#define CLIB_CACHE_LINE_ALIGN_MARK(mark) \
+ u8 mark[0] __attribute__ ((aligned (CLIB_CACHE_LINE_BYTES)))
+#define CLIB_CACHE_LINE_ROUND(x) \
+ ((x + CLIB_CACHE_LINE_BYTES - 1) & ~(CLIB_CACHE_LINE_BYTES - 1))
+
/* Read/write arguments to __builtin_prefetch. */
#define CLIB_PREFETCH_READ 0
#define CLIB_PREFETCH_LOAD 0 /* alias for read */
#define CLIB_PREFETCH_WRITE 1
#define CLIB_PREFETCH_STORE 1 /* alias for write */
-#define _CLIB_PREFETCH(n,size,type) \
- if ((size) > (n)*CLIB_CACHE_LINE_BYTES) \
- __builtin_prefetch (_addr + (n)*CLIB_CACHE_LINE_BYTES, \
- CLIB_PREFETCH_##type, \
- /* locality */ 3);
-
-#define CLIB_PREFETCH(addr,size,type) \
-do { \
- void * _addr = (addr); \
- \
- ASSERT ((size) <= 4*CLIB_CACHE_LINE_BYTES); \
- _CLIB_PREFETCH (0, size, type); \
- _CLIB_PREFETCH (1, size, type); \
- _CLIB_PREFETCH (2, size, type); \
- _CLIB_PREFETCH (3, size, type); \
-} while (0)
+#define _CLIB_PREFETCH(n, size, type) \
+ if ((size) > (n) *CLIB_CACHE_PREFETCH_BYTES) \
+ __builtin_prefetch (_addr + (n) *CLIB_CACHE_PREFETCH_BYTES, \
+ CLIB_PREFETCH_##type, /* locality */ 3);
+
+#define CLIB_PREFETCH(addr, size, type) \
+ do \
+ { \
+ void *_addr = (addr); \
+ \
+ ASSERT ((size) <= 4 * CLIB_CACHE_PREFETCH_BYTES); \
+ _CLIB_PREFETCH (0, size, type); \
+ _CLIB_PREFETCH (1, size, type); \
+ _CLIB_PREFETCH (2, size, type); \
+ _CLIB_PREFETCH (3, size, type); \
+ } \
+ while (0)
#undef _
static_always_inline void
clib_prefetch_load (void *p)
{
- CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, LOAD);
+ __builtin_prefetch (p, /* rw */ 0, /* locality */ 3);
}
static_always_inline void
clib_prefetch_store (void *p)
{
- CLIB_PREFETCH (p, CLIB_CACHE_LINE_BYTES, STORE);
+ __builtin_prefetch (p, /* rw */ 1, /* locality */ 3);
}
#endif /* included_clib_cache_h */
diff --git a/src/vppinfra/clib.h b/src/vppinfra/clib.h
index ade7e5fa4bb..d14582492d6 100644
--- a/src/vppinfra/clib.h
+++ b/src/vppinfra/clib.h
@@ -53,6 +53,12 @@
#define CLIB_UNIX
#endif
+#ifdef __linux__
+#define CLIB_LINUX 1
+#else
+#define CLIB_LINUX 0
+#endif
+
#include <vppinfra/types.h>
#include <vppinfra/atomics.h>
@@ -68,6 +74,8 @@
#define BITS(x) (8*sizeof(x))
#define ARRAY_LEN(x) (sizeof (x)/sizeof (x[0]))
+#define FOREACH_ARRAY_ELT(a, b) \
+ for (typeof ((b)[0]) *(a) = (b); (a) - (b) < ARRAY_LEN (b); (a)++)
#define _STRUCT_FIELD(t,f) (((t *) 0)->f)
#define STRUCT_OFFSET_OF(t,f) offsetof(t, f)
@@ -95,15 +103,45 @@
/* Make a string from the macro's argument */
#define CLIB_STRING_MACRO(x) #x
+#define CLIB_STRING_ARRAY(...) \
+ (char *[]) { __VA_ARGS__, 0 }
+
+/* sanitizers */
+#ifdef __has_feature
+#if __has_feature(address_sanitizer)
+#define CLIB_SANITIZE_ADDR 1
+#endif
+#elif defined(__SANITIZE_ADDRESS__)
+#define CLIB_SANITIZE_ADDR 1
+#endif
+
#define __clib_unused __attribute__ ((unused))
#define __clib_weak __attribute__ ((weak))
#define __clib_packed __attribute__ ((packed))
+#define __clib_flatten __attribute__ ((flatten))
#define __clib_constructor __attribute__ ((constructor))
#define __clib_noinline __attribute__ ((noinline))
+#ifdef __clang__
+#define __clib_noclone
+#else
+#define __clib_noclone __attribute__ ((noclone))
+#endif
#define __clib_aligned(x) __attribute__ ((aligned(x)))
#define __clib_section(s) __attribute__ ((section(s)))
#define __clib_warn_unused_result __attribute__ ((warn_unused_result))
#define __clib_export __attribute__ ((visibility("default")))
+#ifdef __clang__
+#define __clib_no_tail_calls __attribute__ ((disable_tail_calls))
+#else
+#define __clib_no_tail_calls \
+ __attribute__ ((optimize ("no-optimize-sibling-calls")))
+#endif
+
+#ifdef CLIB_SANITIZE_ADDR
+#define __clib_nosanitize_addr __attribute__ ((no_sanitize_address))
+#else
+#define __clib_nosanitize_addr
+#endif
#define never_inline __attribute__ ((__noinline__))
@@ -124,10 +162,17 @@
#define PREDICT_FALSE(x) __builtin_expect((x),0)
#define PREDICT_TRUE(x) __builtin_expect((x),1)
#define COMPILE_TIME_CONST(x) __builtin_constant_p (x)
+#define CLIB_ASSUME(x) \
+ do \
+ { \
+ if (!(x)) \
+ __builtin_unreachable (); \
+ } \
+ while (0)
/*
* Compiler barrier
- * prevent compiler to reorder memory access accross this boundary
+ * prevent compiler to reorder memory access across this boundary
* prevent compiler to cache values in register (force reload)
* Not to be confused with CPU memory barrier below
*/
@@ -136,7 +181,7 @@
/* Full memory barrier (read and write). */
#define CLIB_MEMORY_BARRIER() __sync_synchronize ()
-#if __x86_64__
+#if __SSE__
#define CLIB_MEMORY_STORE_BARRIER() __builtin_ia32_sfence ()
#else
#define CLIB_MEMORY_STORE_BARRIER() __sync_synchronize ()
@@ -152,26 +197,17 @@
decl __attribute ((destructor)); \
decl
-/* Use __builtin_clz if available. */
-#if uword_bits == 64
-#define count_leading_zeros(x) __builtin_clzll (x)
-#define count_trailing_zeros(x) __builtin_ctzll (x)
-#else
-#define count_leading_zeros(x) __builtin_clzl (x)
-#define count_trailing_zeros(x) __builtin_ctzl (x)
-#endif
-
-#if defined (count_leading_zeros)
always_inline uword
-clear_lowest_set_bit (uword x)
+pow2_mask (uword x)
{
#ifdef __BMI2__
- return _blsr_u64 (x);
-#else
- return x ^ (1ULL << count_trailing_zeros (x));
+ return _bzhi_u64 (-1ULL, x);
#endif
+ return ((uword) 1 << x) - (uword) 1;
}
+#include <vppinfra/bitops.h>
+
always_inline uword
min_log2 (uword x)
{
@@ -179,45 +215,6 @@ min_log2 (uword x)
n = count_leading_zeros (x);
return BITS (uword) - n - 1;
}
-#else
-always_inline uword
-min_log2 (uword x)
-{
- uword a = x, b = BITS (uword) / 2, c = 0, r = 0;
-
- /* Reduce x to 4 bit result. */
-#define _ \
-{ \
- c = a >> b; \
- if (c) a = c; \
- if (c) r += b; \
- b /= 2; \
-}
-
- if (BITS (uword) > 32)
- _;
- _;
- _;
- _;
-#undef _
-
- /* Do table lookup on 4 bit partial. */
- if (BITS (uword) > 32)
- {
- const u64 table = 0x3333333322221104LL;
- uword t = (table >> (4 * a)) & 0xf;
- r = t < 4 ? r + t : ~0;
- }
- else
- {
- const u32 table = 0x22221104;
- uword t = (a & 8) ? 3 : ((table >> (4 * a)) & 0xf);
- r = t < 4 ? r + t : ~0;
- }
-
- return r;
-}
-#endif
always_inline uword
max_log2 (uword x)
@@ -249,12 +246,6 @@ min_log2_u64 (u64 x)
}
always_inline uword
-pow2_mask (uword x)
-{
- return ((uword) 1 << x) - (uword) 1;
-}
-
-always_inline uword
max_pow2 (uword x)
{
word y = (word) 1 << min_log2 (x);
@@ -293,18 +284,6 @@ first_set (uword x)
return x & -x;
}
-always_inline uword
-log2_first_set (uword x)
-{
- uword result;
-#ifdef count_trailing_zeros
- result = count_trailing_zeros (x);
-#else
- result = min_log2 (first_set (x));
-#endif
- return result;
-}
-
always_inline f64
flt_round_down (f64 x)
{
@@ -360,6 +339,44 @@ extract_bits (uword x, int start, int count)
_x < 0 ? -_x : _x; \
})
+static_always_inline u64
+u64_add_with_carry (u64 *carry, u64 a, u64 b)
+{
+#if defined(__x86_64__)
+ unsigned long long v;
+ *carry = _addcarry_u64 (*carry, a, b, &v);
+ return (u64) v;
+#elif defined(__clang__)
+ unsigned long long c;
+ u64 rv = __builtin_addcll (a, b, *carry, &c);
+ *carry = c;
+ return rv;
+#else
+ u64 rv = a + b + *carry;
+ *carry = rv < a;
+ return rv;
+#endif
+}
+
+static_always_inline u64
+u64_sub_with_borrow (u64 *borrow, u64 x, u64 y)
+{
+#if defined(__x86_64__)
+ unsigned long long v;
+ *borrow = _subborrow_u64 (*borrow, x, y, &v);
+ return (u64) v;
+#elif defined(__clang__)
+ unsigned long long b;
+ u64 rv = __builtin_subcll (x, y, *borrow, &b);
+ *borrow = b;
+ return rv;
+#else
+ unsigned long long rv = x - (y + *borrow);
+ *borrow = rv >= x;
+ return rv;
+#endif
+}
+
/* Standard standalone-only function declarations. */
#ifndef CLIB_UNIX
void clib_standalone_init (void *memory, uword memory_bytes);
diff --git a/src/vppinfra/config.h.in b/src/vppinfra/config.h.in
index 7aad027a8b6..c2b804cd590 100644
--- a/src/vppinfra/config.h.in
+++ b/src/vppinfra/config.h.in
@@ -20,7 +20,7 @@
#define CLIB_LOG2_CACHE_LINE_BYTES @LOG2_CACHE_LINE_BYTES@
#endif
-#define CLIB_TARGET_TRIPLET "@CMAKE_C_COMPILER_TARGET@"
+#define CLIB_LIB_DIR "@VPP_LIBRARY_DIR@"
#define CLIB_VECTOR_GROW_BY_ONE @VECTOR_GROW_BY_ONE@
#endif
diff --git a/src/vppinfra/cpu.c b/src/vppinfra/cpu.c
index 045d1f727f4..385a4e25408 100644
--- a/src/vppinfra/cpu.c
+++ b/src/vppinfra/cpu.c
@@ -17,70 +17,76 @@
#include <vppinfra/format.h>
#include <vppinfra/cpu.h>
-#define foreach_x86_cpu_uarch \
- _(0x06, 0x9e, "Kaby Lake", "Kaby Lake DT/H/S/X") \
- _(0x06, 0x8e, "Kaby Lake", "Kaby Lake Y/U") \
- _(0x06, 0x8c, "Tiger Lake", "Tiger Lake U") \
- _(0x06, 0x86, "Tremont", "Elkhart Lake") \
- _(0x06, 0x85, "Knights Mill", "Knights Mill") \
- _(0x06, 0x7e, "Ice Lake", "Ice Lake U") \
- _(0x06, 0x7d, "Ice Lake", "Ice Lake Y") \
- _(0x06, 0x7a, "Goldmont Plus", "Gemini Lake") \
- _(0x06, 0x6c, "Ice Lake", "Ice Lake SP") \
- _(0x06, 0x6a, "Ice Lake", "Ice Lake DE") \
- _(0x06, 0x66, "Cannon Lake", "Cannon Lake U") \
- _(0x06, 0x5f, "Goldmont", "Denverton") \
- _(0x06, 0x5e, "Skylake", "Skylake DT/H/S") \
- _(0x06, 0x5c, "Goldmont", "Apollo Lake") \
- _(0x06, 0x5a, "Silvermont", "Moorefield") \
- _(0x06, 0x57, "Knights Landing", "Knights Landing") \
- _(0x06, 0x56, "Broadwell", "Broadwell DE") \
- _(0x06, 0x55, "Skylake", "Skylake X/SP") \
- _(0x06, 0x4f, "Broadwell", "Broadwell EP/EX") \
- _(0x06, 0x4e, "Skylake", "Skylake Y/U") \
- _(0x06, 0x4d, "Silvermont", "Rangeley") \
- _(0x06, 0x4c, "Airmont", "Braswell") \
- _(0x06, 0x47, "Broadwell", "Broadwell H") \
- _(0x06, 0x46, "Haswell", "Crystalwell") \
- _(0x06, 0x45, "Haswell", "Haswell ULT") \
- _(0x06, 0x3f, "Haswell", "Haswell E") \
- _(0x06, 0x3e, "Ivy Bridge", "Ivy Bridge E/EN/EP") \
- _(0x06, 0x3d, "Broadwell", "Broadwell U") \
- _(0x06, 0x3c, "Haswell", "Haswell") \
- _(0x06, 0x3a, "Ivy Bridge", "IvyBridge") \
- _(0x06, 0x37, "Silvermont", "BayTrail") \
- _(0x06, 0x36, "Saltwell", "Cedarview,Centerton") \
- _(0x06, 0x35, "Saltwell", "Cloverview") \
- _(0x06, 0x2f, "Westmere", "Westmere EX") \
- _(0x06, 0x2e, "Nehalem", "Nehalem EX") \
- _(0x06, 0x2d, "Sandy Bridge", "SandyBridge E/EN/EP") \
- _(0x06, 0x2c, "Westmere", "Westmere EP/EX,Gulftown") \
- _(0x06, 0x2a, "Sandy Bridge", "Sandy Bridge") \
- _(0x06, 0x27, "Saltwell", "Medfield") \
- _(0x06, 0x26, "Bonnell", "Tunnel Creek") \
- _(0x06, 0x25, "Westmere", "Arrandale,Clarksdale") \
- _(0x06, 0x1e, "Nehalem", "Clarksfield,Lynnfield,Jasper Forest") \
- _(0x06, 0x1d, "Penryn", "Dunnington") \
- _(0x06, 0x1c, "Bonnell", "Pineview,Silverthorne") \
- _(0x06, 0x1a, "Nehalem", "Nehalem EP,Bloomfield)") \
- _(0x06, 0x17, "Penryn", "Yorkfield,Wolfdale,Penryn,Harpertown")
+#define foreach_x86_cpu_uarch \
+ _ (0x06, 0x9e, "Kaby Lake", "Kaby Lake DT/H/S/X") \
+ _ (0x06, 0x9c, "Tremont", "Jasper Lake") \
+ _ (0x06, 0x9a, "Alder Lake", "Alder Lake L") \
+ _ (0x06, 0x97, "Alder Lake", "Alder Lake") \
+ _ (0x06, 0x96, "Tremont", "Elkhart Lake") \
+ _ (0x06, 0x8f, "Sapphire Rapids", "Sapphire Rapids X") \
+ _ (0x06, 0x8e, "Kaby Lake", "Kaby Lake Y/U") \
+ _ (0x06, 0x8c, "Tiger Lake", "Tiger Lake U") \
+ _ (0x06, 0x86, "Tremont", "Jacobsville") \
+ _ (0x06, 0x85, "Knights Mill", "Knights Mill") \
+ _ (0x06, 0x7e, "Ice Lake", "Ice Lake U") \
+ _ (0x06, 0x7d, "Ice Lake", "Ice Lake Y") \
+ _ (0x06, 0x7a, "Goldmont Plus", "Gemini Lake") \
+ _ (0x06, 0x6c, "Ice Lake", "Ice Lake SP") \
+ _ (0x06, 0x6a, "Ice Lake", "Ice Lake DE") \
+ _ (0x06, 0x66, "Cannon Lake", "Cannon Lake U") \
+ _ (0x06, 0x5f, "Goldmont", "Denverton") \
+ _ (0x06, 0x5e, "Skylake", "Skylake DT/H/S") \
+ _ (0x06, 0x5c, "Goldmont", "Apollo Lake") \
+ _ (0x06, 0x5a, "Silvermont", "Moorefield") \
+ _ (0x06, 0x57, "Knights Landing", "Knights Landing") \
+ _ (0x06, 0x56, "Broadwell", "Broadwell DE") \
+ _ (0x06, 0x55, "Skylake", "Skylake X/SP") \
+ _ (0x06, 0x4f, "Broadwell", "Broadwell EP/EX") \
+ _ (0x06, 0x4e, "Skylake", "Skylake Y/U") \
+ _ (0x06, 0x4d, "Silvermont", "Rangeley") \
+ _ (0x06, 0x4c, "Airmont", "Braswell") \
+ _ (0x06, 0x47, "Broadwell", "Broadwell H") \
+ _ (0x06, 0x46, "Haswell", "Crystalwell") \
+ _ (0x06, 0x45, "Haswell", "Haswell ULT") \
+ _ (0x06, 0x3f, "Haswell", "Haswell E") \
+ _ (0x06, 0x3e, "Ivy Bridge", "Ivy Bridge E/EN/EP") \
+ _ (0x06, 0x3d, "Broadwell", "Broadwell U") \
+ _ (0x06, 0x3c, "Haswell", "Haswell") \
+ _ (0x06, 0x3a, "Ivy Bridge", "IvyBridge") \
+ _ (0x06, 0x37, "Silvermont", "BayTrail") \
+ _ (0x06, 0x36, "Saltwell", "Cedarview,Centerton") \
+ _ (0x06, 0x35, "Saltwell", "Cloverview") \
+ _ (0x06, 0x2f, "Westmere", "Westmere EX") \
+ _ (0x06, 0x2e, "Nehalem", "Nehalem EX") \
+ _ (0x06, 0x2d, "Sandy Bridge", "SandyBridge E/EN/EP") \
+ _ (0x06, 0x2c, "Westmere", "Westmere EP/EX,Gulftown") \
+ _ (0x06, 0x2a, "Sandy Bridge", "Sandy Bridge") \
+ _ (0x06, 0x27, "Saltwell", "Medfield") \
+ _ (0x06, 0x26, "Bonnell", "Tunnel Creek") \
+ _ (0x06, 0x25, "Westmere", "Arrandale,Clarksdale") \
+ _ (0x06, 0x1e, "Nehalem", "Clarksfield,Lynnfield,Jasper Forest") \
+ _ (0x06, 0x1d, "Penryn", "Dunnington") \
+ _ (0x06, 0x1c, "Bonnell", "Pineview,Silverthorne") \
+ _ (0x06, 0x1a, "Nehalem", "Nehalem EP,Bloomfield)") \
+ _ (0x06, 0x17, "Penryn", "Yorkfield,Wolfdale,Penryn,Harpertown")
/* _(implementor-id, part-id, vendor-name, cpu-name, show CPU pass as string) */
-#define foreach_aarch64_cpu_uarch \
- _(0x41, 0xd03, "ARM", "Cortex-A53", 0) \
- _(0x41, 0xd07, "ARM", "Cortex-A57", 0) \
- _(0x41, 0xd08, "ARM", "Cortex-A72", 0) \
- _(0x41, 0xd09, "ARM", "Cortex-A73", 0) \
- _(0x41, 0xd0a, "ARM", "Cortex-A75", 0) \
- _(0x41, 0xd0b, "ARM", "Cortex-A76", 0) \
- _(0x41, 0xd0c, "ARM", "Neoverse-N1", 0) \
- _(0x41, 0xd4a, "ARM", "Neoverse-E1", 0) \
- _(0x43, 0x0a1, "Marvell", "THUNDERX CN88XX", 0) \
- _(0x43, 0x0a2, "Marvell", "OCTEON TX CN81XX", 0) \
- _(0x43, 0x0a3, "Marvell", "OCTEON TX CN83XX", 0) \
- _(0x43, 0x0af, "Marvell", "THUNDERX2 CN99XX", 1) \
- _(0x43, 0x0b1, "Marvell", "OCTEON TX2 CN98XX", 1) \
- _(0x43, 0x0b2, "Marvell", "OCTEON TX2 CN96XX", 1)
+#define foreach_aarch64_cpu_uarch \
+ _ (0x41, 0xd03, "ARM", "Cortex-A53", 0) \
+ _ (0x41, 0xd07, "ARM", "Cortex-A57", 0) \
+ _ (0x41, 0xd08, "ARM", "Cortex-A72", 0) \
+ _ (0x41, 0xd09, "ARM", "Cortex-A73", 0) \
+ _ (0x41, 0xd0a, "ARM", "Cortex-A75", 0) \
+ _ (0x41, 0xd0b, "ARM", "Cortex-A76", 0) \
+ _ (0x41, 0xd0c, "ARM", "Neoverse-N1", 0) \
+ _ (0x41, 0xd49, "ARM", "Neoverse-N2", 0) \
+ _ (0x41, 0xd4a, "ARM", "Neoverse-E1", 0) \
+ _ (0x43, 0x0a1, "Marvell", "THUNDERX CN88XX", 0) \
+ _ (0x43, 0x0a2, "Marvell", "OCTEON TX CN81XX", 0) \
+ _ (0x43, 0x0a3, "Marvell", "OCTEON TX CN83XX", 0) \
+ _ (0x43, 0x0af, "Marvell", "THUNDERX2 CN99XX", 1) \
+ _ (0x43, 0x0b1, "Marvell", "OCTEON TX2 CN98XX", 1) \
+ _ (0x43, 0x0b2, "Marvell", "OCTEON TX2 CN96XX", 1)
__clib_export u8 *
format_cpu_uarch (u8 * s, va_list * args)
@@ -88,13 +94,34 @@ format_cpu_uarch (u8 * s, va_list * args)
#if __x86_64__
u32 __attribute__ ((unused)) eax, ebx, ecx, edx;
u8 model, family, stepping;
+ u8 amd_vendor = 0;
+
+ if (__get_cpuid (0, &eax, &ebx, &ecx, &edx) == 0)
+ return format (s, "unknown (missing cpuid)");
+
+ if (amd_vendor (ebx, ecx, edx))
+ amd_vendor = 1;
if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0)
return format (s, "unknown (missing cpuid)");
- model = ((eax >> 4) & 0x0f) | ((eax >> 12) & 0xf0);
- family = (eax >> 8) & 0x0f;
stepping = eax & 0x0f;
+ if (amd_vendor)
+ {
+ family = ((eax >> 8) & 0x0f);
+ model = ((eax >> 4) & 0x0f);
+ if (family >= 0xf)
+ {
+ family = family + ((eax >> 20) & 0xf);
+ model = (model | ((eax >> 12) & 0xf0));
+ }
+ return format (s, "Zen (family 0x%02x model 0x%02x)", family, model);
+ }
+ else
+ {
+ model = ((eax >> 4) & 0x0f) | ((eax >> 12) & 0xf0);
+ family = (eax >> 8) & 0x0f;
+ }
#define _(f,m,a,c) if ((model == m) && (family == f)) return \
format(s, "[0x%x] %s ([0x%02x] %s) stepping 0x%x", f, a, m, c, stepping);
@@ -103,30 +130,28 @@ format(s, "[0x%x] %s ([0x%02x] %s) stepping 0x%x", f, a, m, c, stepping);
return format (s, "unknown (family 0x%02x model 0x%02x)", family, model);
#elif __aarch64__
- int fd;
unformat_input_t input;
u32 implementer, primary_part_number, variant, revision;
- fd = open ("/proc/cpuinfo", 0);
- if (fd < 0)
- return format (s, "unknown");
-
- unformat_init_clib_file (&input, fd);
- while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ if (unformat_init_file (&input, "/proc/cpuinfo"))
{
- if (unformat (&input, "CPU implementer%_: 0x%x", &implementer))
- ;
- else if (unformat (&input, "CPU part%_: 0x%x", &primary_part_number))
- ;
- else if (unformat (&input, "CPU variant%_: 0x%x", &variant))
- ;
- else if (unformat (&input, "CPU revision%_: %u", &revision))
- ;
- else
- unformat_skip_line (&input);
+ while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (&input, "CPU implementer%_: 0x%x", &implementer))
+ ;
+ else if (unformat (&input, "CPU part%_: 0x%x", &primary_part_number))
+ ;
+ else if (unformat (&input, "CPU variant%_: 0x%x", &variant))
+ ;
+ else if (unformat (&input, "CPU revision%_: %u", &revision))
+ ;
+ else
+ unformat_skip_line (&input);
+ }
+ unformat_free (&input);
}
- unformat_free (&input);
- close (fd);
+ else
+ return format (s, "unknown");
#define _(i,p,a,c,_format) if ((implementer == i) && (primary_part_number == p)){ \
if (_format)\
@@ -193,7 +218,7 @@ format_cpu_model_name (u8 * s, va_list * args)
#endif
}
-
+#if defined(__x86_64__) || defined(__aarch64__)
static inline char const *
flag_skip_prefix (char const *flag, const char *pfx, int len)
{
@@ -201,20 +226,22 @@ flag_skip_prefix (char const *flag, const char *pfx, int len)
return flag + len - 1;
return flag;
}
+#endif
__clib_export u8 *
-format_cpu_flags (u8 * s, va_list * args)
+format_cpu_flags (u8 *s, va_list *args)
{
#if defined(__x86_64__)
-#define _(flag, func, reg, bit) \
- if (clib_cpu_supports_ ## flag()) \
- s = format (s, "%s ", flag_skip_prefix(#flag, "x86_", sizeof("x86_")));
+#define _(flag, func, reg, bit) \
+ if (clib_cpu_supports_##flag ()) \
+ s = format (s, "%s ", flag_skip_prefix (#flag, "x86_", sizeof ("x86_")));
foreach_x86_64_flags return s;
#undef _
#elif defined(__aarch64__)
-#define _(flag, bit) \
- if (clib_cpu_supports_ ## flag()) \
- s = format (s, "%s ", flag_skip_prefix(#flag, "aarch64_", sizeof("aarch64_")));
+#define _(flag, bit) \
+ if (clib_cpu_supports_##flag ()) \
+ s = format (s, "%s ", \
+ flag_skip_prefix (#flag, "aarch64_", sizeof ("aarch64_")));
foreach_aarch64_flags return s;
#undef _
#else /* ! ! __x86_64__ && ! __aarch64__ */
@@ -225,17 +252,25 @@ format_cpu_flags (u8 * s, va_list * args)
__clib_export u32
clib_get_current_cpu_id ()
{
+#ifdef __linux__
unsigned cpu, node;
syscall (__NR_getcpu, &cpu, &node, 0);
return cpu;
+#else
+ return 0;
+#endif /* __linux__ */
}
__clib_export u32
clib_get_current_numa_node ()
{
+#ifdef __linux__
unsigned cpu, node;
syscall (__NR_getcpu, &cpu, &node, 0);
return node;
+#else
+ return 0;
+#endif /* __linux__ */
}
__clib_export u8 *
@@ -250,10 +285,39 @@ format_march_variant (u8 *s, va_list *args)
return format (s, "%s", variants[t]);
}
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+#ifdef __aarch64__
+
+__clib_export const clib_cpu_info_t *
+clib_get_cpu_info ()
+{
+ static int first_run = 1;
+ static clib_cpu_info_t info = {};
+ if (first_run)
+ {
+ FILE *fp = fopen ("/proc/cpuinfo", "r");
+ char buf[128];
+
+ if (!fp)
+ return 0;
+
+ while (!feof (fp))
+ {
+ if (!fgets (buf, sizeof (buf), fp))
+ break;
+ buf[127] = '\0';
+ if (strstr (buf, "CPU part"))
+ info.aarch64.part_num =
+ strtol (memchr (buf, ':', 128) + 2, NULL, 0);
+
+ if (strstr (buf, "CPU implementer"))
+ info.aarch64.implementer =
+ strtol (memchr (buf, ':', 128) + 2, NULL, 0);
+ }
+ fclose (fp);
+
+ first_run = 0;
+ }
+ return &info;
+}
+
+#endif
diff --git a/src/vppinfra/cpu.h b/src/vppinfra/cpu.h
index c1f2e9e8248..7a1b75fcf7d 100644
--- a/src/vppinfra/cpu.h
+++ b/src/vppinfra/cpu.h
@@ -21,21 +21,31 @@
#if defined(__x86_64__)
#define foreach_march_variant \
+ _ (scalar, "Generic (SIMD disabled)") \
_ (hsw, "Intel Haswell") \
_ (trm, "Intel Tremont") \
_ (skx, "Intel Skylake (server) / Cascade Lake") \
- _ (icl, "Intel Ice Lake")
+ _ (icl, "Intel Ice Lake") \
+ _ (adl, "Intel Alder Lake") \
+ _ (spr, "Intel Sapphire Rapids") \
+ _ (znver3, "AMD Milan") \
+ _ (znver4, "AMD Genoa")
#elif defined(__aarch64__)
#define foreach_march_variant \
_ (octeontx2, "Marvell Octeon TX2") \
_ (thunderx2t99, "Marvell ThunderX2 T99") \
_ (qdf24xx, "Qualcomm CentriqTM 2400") \
_ (cortexa72, "ARM Cortex-A72") \
- _ (neoversen1, "ARM Neoverse N1")
+ _ (neoversen1, "ARM Neoverse N1") \
+ _ (neoversen2, "ARM Neoverse N2")
#else
#define foreach_march_variant
#endif
+#define amd_vendor(t1, t2, t3) \
+ ((t1 == 0x68747541) && /* htuA */ \
+ (t2 == 0x444d4163) && /* DMAc */ \
+ (t3 == 0x69746e65)) /* itne */
typedef enum
{
CLIB_MARCH_VARIANT_TYPE = 0,
@@ -84,6 +94,9 @@ clib_march_select_fn_ptr (clib_march_fn_registration * r)
#define CLIB_MARCH_FN_POINTER(fn) \
(__typeof__ (fn) *) clib_march_select_fn_ptr (fn##_march_fn_registrations);
+#define CLIB_MARCH_FN_VOID_POINTER(fn) \
+ clib_march_select_fn_ptr (fn##_march_fn_registrations);
+
#define _CLIB_MARCH_FN_REGISTRATION(fn) \
static clib_march_fn_registration \
CLIB_MARCH_SFX(fn##_march_fn_registration) = \
@@ -120,6 +133,7 @@ _CLIB_MARCH_FN_REGISTRATION(fn)
_ (avx, 1, ecx, 28) \
_ (rdrand, 1, ecx, 30) \
_ (avx2, 7, ebx, 5) \
+ _ (bmi2, 7, ebx, 8) \
_ (rtm, 7, ebx, 11) \
_ (pqm, 7, ebx, 12) \
_ (pqe, 7, ebx, 15) \
@@ -134,7 +148,10 @@ _CLIB_MARCH_FN_REGISTRATION(fn)
_ (avx512_vpopcntdq, 7, ecx, 14) \
_ (movdiri, 7, ecx, 27) \
_ (movdir64b, 7, ecx, 28) \
- _ (invariant_tsc, 0x80000007, edx, 8)
+ _ (enqcmd, 7, ecx, 29) \
+ _ (avx512_fp16, 7, edx, 23) \
+ _ (invariant_tsc, 0x80000007, edx, 8) \
+ _ (monitorx, 0x80000001, ecx, 29)
#define foreach_aarch64_flags \
_ (fp, 0) \
@@ -161,8 +178,10 @@ _ (asimddp, 20) \
_ (sha512, 21) \
_ (sve, 22)
-u32 clib_get_current_cpu_id ();
-u32 clib_get_current_numa_node ();
+u32 clib_get_current_cpu_id (void);
+u32 clib_get_current_numa_node (void);
+
+typedef int (*clib_cpu_supports_func_t) (void);
#if defined(__x86_64__)
#include "cpuid.h"
@@ -179,8 +198,6 @@ clib_get_cpuid (const u32 lev, u32 * eax, u32 * ebx, u32 * ecx, u32 * edx)
return 1;
}
-typedef int (*clib_cpu_supports_func_t) ();
-
#define _(flag, func, reg, bit) \
static inline int \
clib_cpu_supports_ ## flag() \
@@ -234,6 +251,20 @@ clib_cpu_supports_aes ()
}
static inline int
+clib_cpu_march_priority_scalar ()
+{
+ return 1;
+}
+
+static inline int
+clib_cpu_march_priority_spr ()
+{
+ if (clib_cpu_supports_enqcmd ())
+ return 300;
+ return -1;
+}
+
+static inline int
clib_cpu_march_priority_icl ()
{
if (clib_cpu_supports_avx512_bitalg ())
@@ -242,6 +273,14 @@ clib_cpu_march_priority_icl ()
}
static inline int
+clib_cpu_march_priority_adl ()
+{
+ if (clib_cpu_supports_movdiri () && clib_cpu_supports_avx2 ())
+ return 150;
+ return -1;
+}
+
+static inline int
clib_cpu_march_priority_skx ()
{
if (clib_cpu_supports_avx512f ())
@@ -253,7 +292,7 @@ static inline int
clib_cpu_march_priority_trm ()
{
if (clib_cpu_supports_movdiri ())
- return 60;
+ return 40;
return -1;
}
@@ -265,116 +304,149 @@ clib_cpu_march_priority_hsw ()
return -1;
}
-static inline u32
-clib_cpu_implementer ()
+static inline int
+clib_cpu_march_priority_znver4 ()
{
- char buf[128];
- static u32 implementer = -1;
-
- if (-1 != implementer)
- return implementer;
-
- FILE *fp = fopen ("/proc/cpuinfo", "r");
- if (!fp)
- return implementer;
-
- while (!feof (fp))
- {
- if (!fgets (buf, sizeof (buf), fp))
- break;
- buf[127] = '\0';
- if (strstr (buf, "CPU implementer"))
- implementer = (u32) strtol (memchr (buf, ':', 128) + 2, NULL, 0);
- if (-1 != implementer)
- break;
- }
- fclose (fp);
-
- return implementer;
+ if (clib_cpu_supports_avx512_bitalg () && clib_cpu_supports_monitorx ())
+ return 250;
+ return -1;
}
-static inline u32
-clib_cpu_part ()
+static inline int
+clib_cpu_march_priority_znver3 ()
{
- char buf[128];
- static u32 part = -1;
+ if (clib_cpu_supports_avx2 () && clib_cpu_supports_monitorx ())
+ return 70;
+ return -1;
+}
- if (-1 != part)
- return part;
+#define X86_CPU_ARCH_PERF_FUNC 0xA
- FILE *fp = fopen ("/proc/cpuinfo", "r");
- if (!fp)
- return part;
+static inline int
+clib_get_pmu_counter_count (u8 *fixed, u8 *general)
+{
+#if defined(__x86_64__)
+ u32 __clib_unused eax = 0, ebx = 0, ecx = 0, edx = 0;
+ clib_get_cpuid (X86_CPU_ARCH_PERF_FUNC, &eax, &ebx, &ecx, &edx);
- while (!feof (fp))
- {
- if (!fgets (buf, sizeof (buf), fp))
- break;
- buf[127] = '\0';
- if (strstr (buf, "CPU part"))
- part = (u32) strtol (memchr (buf, ':', 128) + 2, NULL, 0);
- if (-1 != part)
- break;
- }
- fclose (fp);
+ *general = (eax & 0xFF00) >> 8;
+ *fixed = (edx & 0xF);
- return part;
+ return 1;
+#else
+ return 0;
+#endif
}
+typedef struct
+{
+ struct
+ {
+ u8 implementer;
+ u16 part_num;
+ } aarch64;
+} clib_cpu_info_t;
+
+const clib_cpu_info_t *clib_get_cpu_info ();
+
+/* ARM */
+#define AARCH64_CPU_IMPLEMENTER_ARM 0x41
+#define AARCH64_CPU_PART_CORTEXA72 0xd08
+#define AARCH64_CPU_PART_NEOVERSEN1 0xd0c
+#define AARCH64_CPU_PART_NEOVERSEN2 0xd49
+
+/*cavium */
#define AARCH64_CPU_IMPLEMENTER_CAVIUM 0x43
#define AARCH64_CPU_PART_THUNDERX2 0x0af
#define AARCH64_CPU_PART_OCTEONTX2T96 0x0b2
#define AARCH64_CPU_PART_OCTEONTX2T98 0x0b1
-#define AARCH64_CPU_IMPLEMENTER_QDF24XX 0x51
+
+/* Qualcomm */
+#define AARCH64_CPU_IMPLEMENTER_QUALCOMM 0x51
#define AARCH64_CPU_PART_QDF24XX 0xc00
-#define AARCH64_CPU_IMPLEMENTER_CORTEXA72 0x41
-#define AARCH64_CPU_PART_CORTEXA72 0xd08
-#define AARCH64_CPU_IMPLEMENTER_NEOVERSEN1 0x41
-#define AARCH64_CPU_PART_NEOVERSEN1 0xd0c
static inline int
clib_cpu_march_priority_octeontx2 ()
{
- if ((AARCH64_CPU_IMPLEMENTER_CAVIUM == clib_cpu_implementer ()) &&
- ((AARCH64_CPU_PART_OCTEONTX2T96 == clib_cpu_part ())
- || AARCH64_CPU_PART_OCTEONTX2T98 == clib_cpu_part ()))
+ const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+ if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_CAVIUM)
+ return -1;
+
+ if (info->aarch64.part_num == AARCH64_CPU_PART_OCTEONTX2T96 ||
+ info->aarch64.part_num == AARCH64_CPU_PART_OCTEONTX2T98)
return 20;
+
return -1;
}
static inline int
clib_cpu_march_priority_thunderx2t99 ()
{
- if ((AARCH64_CPU_IMPLEMENTER_CAVIUM == clib_cpu_implementer ()) &&
- (AARCH64_CPU_PART_THUNDERX2 == clib_cpu_part ()))
+ const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+ if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_CAVIUM)
+ return -1;
+
+ if (info->aarch64.part_num == AARCH64_CPU_PART_THUNDERX2)
return 20;
+
return -1;
}
static inline int
clib_cpu_march_priority_qdf24xx ()
{
- if ((AARCH64_CPU_IMPLEMENTER_QDF24XX == clib_cpu_implementer ()) &&
- (AARCH64_CPU_PART_QDF24XX == clib_cpu_part ()))
+ const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+ if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_QUALCOMM)
+ return -1;
+
+ if (info->aarch64.part_num == AARCH64_CPU_PART_QDF24XX)
return 20;
+
return -1;
}
static inline int
clib_cpu_march_priority_cortexa72 ()
{
- if ((AARCH64_CPU_IMPLEMENTER_CORTEXA72 == clib_cpu_implementer ()) &&
- (AARCH64_CPU_PART_CORTEXA72 == clib_cpu_part ()))
+ const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+ if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_ARM)
+ return -1;
+
+ if (info->aarch64.part_num == AARCH64_CPU_PART_CORTEXA72)
return 10;
+
return -1;
}
static inline int
clib_cpu_march_priority_neoversen1 ()
{
- if ((AARCH64_CPU_IMPLEMENTER_NEOVERSEN1 == clib_cpu_implementer ()) &&
- (AARCH64_CPU_PART_NEOVERSEN1 == clib_cpu_part ()))
+ const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+ if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_ARM)
+ return -1;
+
+ if (info->aarch64.part_num == AARCH64_CPU_PART_NEOVERSEN1)
+ return 10;
+
+ return -1;
+}
+
+static inline int
+clib_cpu_march_priority_neoversen2 ()
+{
+ const clib_cpu_info_t *info = clib_get_cpu_info ();
+
+ if (!info || info->aarch64.implementer != AARCH64_CPU_IMPLEMENTER_ARM)
+ return -1;
+
+ if (info->aarch64.part_num == AARCH64_CPU_PART_NEOVERSEN2)
return 10;
+
return -1;
}
diff --git a/src/vppinfra/crc32.h b/src/vppinfra/crc32.h
index fec67cd9757..5c5e548401a 100644
--- a/src/vppinfra/crc32.h
+++ b/src/vppinfra/crc32.h
@@ -21,67 +21,156 @@
#if __SSE4_2__
#define clib_crc32c_uses_intrinsics
#include <x86intrin.h>
-
-#define crc32_u64 _mm_crc32_u64
-#define crc32_u32 _mm_crc32_u32
-
static_always_inline u32
-clib_crc32c (u8 * s, int len)
+clib_crc32c_u8 (u32 last, u8 data)
{
- u32 v = 0;
-
-#if defined(__x86_64__)
- for (; len >= 8; len -= 8, s += 8)
- v = _mm_crc32_u64 (v, *((u64 *) s));
-#else
- /* workaround weird GCC bug when using _mm_crc32_u32
- which happens with -O2 optimization */
-#if !defined (__i686__)
- asm volatile ("":::"memory");
-#endif
-#endif
-
- for (; len >= 4; len -= 4, s += 4)
- v = _mm_crc32_u32 (v, *((u32 *) s));
+ return _mm_crc32_u8 (last, data);
+}
- for (; len >= 2; len -= 2, s += 2)
- v = _mm_crc32_u16 (v, *((u16 *) s));
+static_always_inline u32
+clib_crc32c_u16 (u32 last, u16 data)
+{
+ return _mm_crc32_u16 (last, data);
+}
- for (; len >= 1; len -= 1, s += 1)
- v = _mm_crc32_u8 (v, *((u16 *) s));
+static_always_inline u32
+clib_crc32c_u32 (u32 last, u32 data)
+{
+ return _mm_crc32_u32 (last, data);
+}
- return v;
+static_always_inline u32
+clib_crc32c_u64 (u32 last, u64 data)
+{
+ return _mm_crc32_u64 (last, data);
}
+#endif
-#elif __ARM_FEATURE_CRC32
+#if __ARM_FEATURE_CRC32
#define clib_crc32c_uses_intrinsics
#include <arm_acle.h>
+static_always_inline u32
+clib_crc32c_u8 (u32 last, u8 data)
+{
+ return __crc32cb (last, data);
+}
+static_always_inline u32
+clib_crc32c_u16 (u32 last, u16 data)
+{
+ return __crc32ch (last, data);
+}
-#define crc32_u64 __crc32cd
-#define crc32_u32 __crc32cw
+static_always_inline u32
+clib_crc32c_u32 (u32 last, u32 data)
+{
+ return __crc32cw (last, data);
+}
static_always_inline u32
-clib_crc32c (u8 * s, int len)
+clib_crc32c_u64 (u32 last, u64 data)
{
- u32 v = 0;
+ return __crc32cd (last, data);
+}
+#endif
+#ifdef clib_crc32c_uses_intrinsics
+static_always_inline u32
+clib_crc32c_with_init (u8 *s, int len, u32 last)
+{
for (; len >= 8; len -= 8, s += 8)
- v = __crc32cd (v, *((u64 *) s));
+ last = clib_crc32c_u64 (last, *((u64u *) s));
for (; len >= 4; len -= 4, s += 4)
- v = __crc32cw (v, *((u32 *) s));
+ last = clib_crc32c_u32 (last, *((u32u *) s));
for (; len >= 2; len -= 2, s += 2)
- v = __crc32ch (v, *((u16 *) s));
+ last = clib_crc32c_u16 (last, *((u16u *) s));
for (; len >= 1; len -= 1, s += 1)
- v = __crc32cb (v, *((u8 *) s));
+ last = clib_crc32c_u8 (last, *((u8 *) s));
- return v;
+ return last;
}
+static_always_inline u32
+clib_crc32c (u8 *s, int len)
+{
+ return clib_crc32c_with_init (s, len, 0);
+}
+#else
+
+static_always_inline u32
+_clib_crc32c (u32 crc, const u8 *p, uword len)
+{
+ static const u32 clib_crc32c_table[256] = {
+ 0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L, 0xC79A971FL,
+ 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL, 0x8AD958CFL, 0x78B2DBCCL,
+ 0x6BE22838L, 0x9989AB3BL, 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L,
+ 0x5E133C24L, 0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL,
+ 0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L, 0x9A879FA0L,
+ 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L, 0x5D1D08BFL, 0xAF768BBCL,
+ 0xBC267848L, 0x4E4DFB4BL, 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L,
+ 0x33ED7D2AL, 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L,
+ 0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L, 0x6DFE410EL,
+ 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL, 0x30E349B1L, 0xC288CAB2L,
+ 0xD1D83946L, 0x23B3BA45L, 0xF779DEAEL, 0x05125DADL, 0x1642AE59L,
+ 0xE4292D5AL, 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL,
+ 0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L, 0x417B1DBCL,
+ 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L, 0x86E18AA3L, 0x748A09A0L,
+ 0x67DAFA54L, 0x95B17957L, 0xCBA24573L, 0x39C9C670L, 0x2A993584L,
+ 0xD8F2B687L, 0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L,
+ 0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L, 0x96BF4DCCL,
+ 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L, 0xDBFC821CL, 0x2997011FL,
+ 0x3AC7F2EBL, 0xC8AC71E8L, 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L,
+ 0x0F36E6F7L, 0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L,
+ 0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L, 0xEB1FCBADL,
+ 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L, 0x2C855CB2L, 0xDEEEDFB1L,
+ 0xCDBE2C45L, 0x3FD5AF46L, 0x7198540DL, 0x83F3D70EL, 0x90A324FAL,
+ 0x62C8A7F9L, 0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L,
+ 0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L, 0x3CDB9BDDL,
+ 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L, 0x82F63B78L, 0x709DB87BL,
+ 0x63CD4B8FL, 0x91A6C88CL, 0x456CAC67L, 0xB7072F64L, 0xA457DC90L,
+ 0x563C5F93L, 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L,
+ 0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL, 0x92A8FC17L,
+ 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L, 0x55326B08L, 0xA759E80BL,
+ 0xB4091BFFL, 0x466298FCL, 0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL,
+ 0x0B21572CL, 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L,
+ 0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L, 0x65D122B9L,
+ 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL, 0x2892ED69L, 0xDAF96E6AL,
+ 0xC9A99D9EL, 0x3BC21E9DL, 0xEF087A76L, 0x1D63F975L, 0x0E330A81L,
+ 0xFC588982L, 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL,
+ 0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L, 0x38CC2A06L,
+ 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L, 0xFF56BD19L, 0x0D3D3E1AL,
+ 0x1E6DCDEEL, 0xEC064EEDL, 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L,
+ 0xD0DDD530L, 0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL,
+ 0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL, 0x8ECEE914L,
+ 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L, 0xD3D3E1ABL, 0x21B862A8L,
+ 0x32E8915CL, 0xC083125FL, 0x144976B4L, 0xE622F5B7L, 0xF5720643L,
+ 0x07198540L, 0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L,
+ 0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL, 0xE330A81AL,
+ 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL, 0x24AA3F05L, 0xD6C1BC06L,
+ 0xC5914FF2L, 0x37FACCF1L, 0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L,
+ 0x7AB90321L, 0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL,
+ 0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L, 0x34F4F86AL,
+ 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL, 0x79B737BAL, 0x8BDCB4B9L,
+ 0x988C474DL, 0x6AE7C44EL, 0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L,
+ 0xAD7D5351L
+ };
+
+ while (len--)
+ crc = (crc >> 8) ^ clib_crc32c_table[(u8) crc ^ p++[0]];
+
+ return crc;
+}
+
+static_always_inline u32
+clib_crc32c (const u8 *p, uword len)
+{
+ return _clib_crc32c (0, p, len);
+}
#endif
+
#endif /* __included_crc32_h__ */
/*
diff --git a/src/plugins/crypto_native/aes.h b/src/vppinfra/crypto/aes.h
index 762d528d064..9e80e3b0318 100644
--- a/src/plugins/crypto_native/aes.h
+++ b/src/vppinfra/crypto/aes.h
@@ -15,8 +15,8 @@
*------------------------------------------------------------------
*/
-#ifndef __aesni_h__
-#define __aesni_h__
+#ifndef __aes_h__
+#define __aes_h__
typedef enum
{
@@ -28,10 +28,6 @@ typedef enum
#define AES_KEY_ROUNDS(x) (10 + x * 2)
#define AES_KEY_BYTES(x) (16 + x * 8)
-static const u8x16 byte_mask_scale = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
-};
-
static_always_inline u8x16
aes_block_load (u8 * p)
{
@@ -39,7 +35,7 @@ aes_block_load (u8 * p)
}
static_always_inline u8x16
-aes_enc_round (u8x16 a, u8x16 k)
+aes_enc_round_x1 (u8x16 a, u8x16 k)
{
#if defined (__AES__)
return (u8x16) _mm_aesenc_si128 ((__m128i) a, (__m128i) k);
@@ -48,7 +44,7 @@ aes_enc_round (u8x16 a, u8x16 k)
#endif
}
-#if defined (__VAES__)
+#if defined(__VAES__) && defined(__AVX512F__)
static_always_inline u8x64
aes_enc_round_x4 (u8x64 a, u8x64 k)
{
@@ -74,8 +70,34 @@ aes_dec_last_round_x4 (u8x64 a, u8x64 k)
}
#endif
+#ifdef __VAES__
+static_always_inline u8x32
+aes_enc_round_x2 (u8x32 a, u8x32 k)
+{
+ return (u8x32) _mm256_aesenc_epi128 ((__m256i) a, (__m256i) k);
+}
+
+static_always_inline u8x32
+aes_enc_last_round_x2 (u8x32 a, u8x32 k)
+{
+ return (u8x32) _mm256_aesenclast_epi128 ((__m256i) a, (__m256i) k);
+}
+
+static_always_inline u8x32
+aes_dec_round_x2 (u8x32 a, u8x32 k)
+{
+ return (u8x32) _mm256_aesdec_epi128 ((__m256i) a, (__m256i) k);
+}
+
+static_always_inline u8x32
+aes_dec_last_round_x2 (u8x32 a, u8x32 k)
+{
+ return (u8x32) _mm256_aesdeclast_epi128 ((__m256i) a, (__m256i) k);
+}
+#endif
+
static_always_inline u8x16
-aes_enc_last_round (u8x16 a, u8x16 k)
+aes_enc_last_round_x1 (u8x16 a, u8x16 k)
{
#if defined (__AES__)
return (u8x16) _mm_aesenclast_si128 ((__m128i) a, (__m128i) k);
@@ -87,13 +109,13 @@ aes_enc_last_round (u8x16 a, u8x16 k)
#ifdef __x86_64__
static_always_inline u8x16
-aes_dec_round (u8x16 a, u8x16 k)
+aes_dec_round_x1 (u8x16 a, u8x16 k)
{
return (u8x16) _mm_aesdec_si128 ((__m128i) a, (__m128i) k);
}
static_always_inline u8x16
-aes_dec_last_round (u8x16 a, u8x16 k)
+aes_dec_last_round_x1 (u8x16 a, u8x16 k)
{
return (u8x16) _mm_aesdeclast_si128 ((__m128i) a, (__m128i) k);
}
@@ -106,47 +128,13 @@ aes_block_store (u8 * p, u8x16 r)
}
static_always_inline u8x16
-aes_byte_mask (u8x16 x, u8 n_bytes)
-{
- return x & u8x16_is_greater (u8x16_splat (n_bytes), byte_mask_scale);
-}
-
-static_always_inline u8x16
-aes_load_partial (u8x16u * p, int n_bytes)
-{
- ASSERT (n_bytes <= 16);
-#ifdef __AVX512F__
- __m128i zero = { };
- return (u8x16) _mm_mask_loadu_epi8 (zero, (1 << n_bytes) - 1, p);
-#else
- return aes_byte_mask (CLIB_MEM_OVERFLOW_LOAD (*, p), n_bytes);
-#endif
-}
-
-static_always_inline void
-aes_store_partial (void *p, u8x16 r, int n_bytes)
-{
-#if __aarch64__
- clib_memcpy_fast (p, &r, n_bytes);
-#else
-#ifdef __AVX512F__
- _mm_mask_storeu_epi8 (p, (1 << n_bytes) - 1, (__m128i) r);
-#else
- u8x16 mask = u8x16_is_greater (u8x16_splat (n_bytes), byte_mask_scale);
- _mm_maskmoveu_si128 ((__m128i) r, (__m128i) mask, p);
-#endif
-#endif
-}
-
-
-static_always_inline u8x16
aes_encrypt_block (u8x16 block, const u8x16 * round_keys, aes_key_size_t ks)
{
int rounds = AES_KEY_ROUNDS (ks);
block ^= round_keys[0];
for (int i = 1; i < rounds; i += 1)
- block = aes_enc_round (block, round_keys[i]);
- return aes_enc_last_round (block, round_keys[rounds]);
+ block = aes_enc_round_x1 (block, round_keys[i]);
+ return aes_enc_last_round_x1 (block, round_keys[rounds]);
}
static_always_inline u8x16
@@ -178,7 +166,7 @@ aes128_key_assist (u8x16 * rk, u8x16 r)
}
static_always_inline void
-aes128_key_expand (u8x16 * rk, u8x16 const *k)
+aes128_key_expand (u8x16 *rk, u8x16u const *k)
{
rk[0] = k[0];
aes128_key_assist (rk + 1, aes_keygen_assist (rk[0], 0x01));
@@ -211,9 +199,7 @@ aes192_key_expand (u8x16 * rk, u8x16u const *k)
u8x16 r1, r2;
rk[0] = r1 = k[0];
- /* *INDENT-OFF* */
rk[1] = r2 = (u8x16) (u64x2) { *(u64 *) (k + 1), 0 };
- /* *INDENT-ON* */
aes192_key_assist (&r1, &r2, aes_keygen_assist (r2, 0x1));
rk[1] = (u8x16) _mm_shuffle_pd ((__m128d) rk[1], (__m128d) r1, 0);
@@ -308,7 +294,7 @@ aes128_key_expand_round_neon (u8x16 * rk, u32 rcon)
}
static_always_inline void
-aes128_key_expand (u8x16 * rk, const u8x16 * k)
+aes128_key_expand (u8x16 *rk, u8x16u const *k)
{
rk[0] = k[0];
aes128_key_expand_round_neon (rk + 1, 0x01);
@@ -385,7 +371,7 @@ aes256_key_expand_round_neon (u8x16 * rk, u32 rcon)
}
static_always_inline void
-aes256_key_expand (u8x16 * rk, u8x16 const *k)
+aes256_key_expand (u8x16 *rk, u8x16u const *k)
{
rk[0] = k[0];
rk[1] = k[1];
@@ -439,13 +425,67 @@ aes_key_enc_to_dec (u8x16 * ke, u8x16 * kd, aes_key_size_t ks)
kd[rounds / 2] = aes_inv_mix_column (ke[rounds / 2]);
}
+#if defined(__VAES__) && defined(__AVX512F__)
+#define N_AES_LANES 4
+#define aes_load_partial(p, n) u8x64_load_partial ((u8 *) (p), n)
+#define aes_store_partial(v, p, n) u8x64_store_partial (v, (u8 *) (p), n)
+#define aes_reflect(r) u8x64_reflect_u8x16 (r)
+typedef u8x64 aes_data_t;
+typedef u8x64u aes_mem_t;
+typedef u32x16 aes_counter_t;
+#elif defined(__VAES__)
+#define N_AES_LANES 2
+#define aes_load_partial(p, n) u8x32_load_partial ((u8 *) (p), n)
+#define aes_store_partial(v, p, n) u8x32_store_partial (v, (u8 *) (p), n)
+#define aes_reflect(r) u8x32_reflect_u8x16 (r)
+typedef u8x32 aes_data_t;
+typedef u8x32u aes_mem_t;
+typedef u32x8 aes_counter_t;
+#else
+#define N_AES_LANES 1
+#define aes_load_partial(p, n) u8x16_load_partial ((u8 *) (p), n)
+#define aes_store_partial(v, p, n) u8x16_store_partial (v, (u8 *) (p), n)
+#define aes_reflect(r) u8x16_reflect (r)
+typedef u8x16 aes_data_t;
+typedef u8x16u aes_mem_t;
+typedef u32x4 aes_counter_t;
+#endif
-#endif /* __aesni_h__ */
+#define N_AES_BYTES (N_AES_LANES * 16)
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+typedef union
+{
+ u8x16 x1;
+ u8x32 x2;
+ u8x64 x4;
+ u8x16 lanes[4];
+} aes_expaned_key_t;
+
+static_always_inline void
+aes_enc_round (aes_data_t *r, const aes_expaned_key_t *ek, uword n_blocks)
+{
+ for (int i = 0; i < n_blocks; i++)
+#if N_AES_LANES == 4
+ r[i] = aes_enc_round_x4 (r[i], ek->x4);
+#elif N_AES_LANES == 2
+ r[i] = aes_enc_round_x2 (r[i], ek->x2);
+#else
+ r[i] = aes_enc_round_x1 (r[i], ek->x1);
+#endif
+}
+
+static_always_inline void
+aes_enc_last_round (aes_data_t *r, aes_data_t *d, const aes_expaned_key_t *ek,
+ uword n_blocks)
+{
+ for (int i = 0; i < n_blocks; i++)
+#if N_AES_LANES == 4
+ d[i] ^= r[i] = aes_enc_last_round_x4 (r[i], ek->x4);
+#elif N_AES_LANES == 2
+ d[i] ^= r[i] = aes_enc_last_round_x2 (r[i], ek->x2);
+#else
+ d[i] ^= r[i] = aes_enc_last_round_x1 (r[i], ek->x1);
+#endif
+}
+
+#endif /* __aes_h__ */
diff --git a/src/vppinfra/crypto/aes_cbc.h b/src/vppinfra/crypto/aes_cbc.h
new file mode 100644
index 00000000000..cb3d0784051
--- /dev/null
+++ b/src/vppinfra/crypto/aes_cbc.h
@@ -0,0 +1,542 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef __crypto_aes_cbc_h__
+#define __crypto_aes_cbc_h__
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vector.h>
+#include <vppinfra/crypto/aes.h>
+
+typedef struct
+{
+ const u8x16 encrypt_key[15];
+ const u8x16 decrypt_key[15];
+} aes_cbc_key_data_t;
+
+static_always_inline void
+clib_aes_cbc_encrypt (const aes_cbc_key_data_t *kd, const u8 *src, uword len,
+ const u8 *iv, aes_key_size_t ks, u8 *dst)
+{
+ int rounds = AES_KEY_ROUNDS (ks);
+ u8x16 r, *k = (u8x16 *) kd->encrypt_key;
+
+ r = *(u8x16u *) iv;
+
+ for (int i = 0; i < len; i += 16)
+ {
+ int j;
+ r = u8x16_xor3 (r, *(u8x16u *) (src + i), k[0]);
+ for (j = 1; j < rounds; j++)
+ r = aes_enc_round_x1 (r, k[j]);
+ r = aes_enc_last_round_x1 (r, k[rounds]);
+ *(u8x16u *) (dst + i) = r;
+ }
+}
+
+static_always_inline void
+clib_aes128_cbc_encrypt (const aes_cbc_key_data_t *kd, const u8 *plaintext,
+ uword len, const u8 *iv, u8 *ciphertext)
+{
+ clib_aes_cbc_encrypt (kd, plaintext, len, iv, AES_KEY_128, ciphertext);
+}
+
+static_always_inline void
+clib_aes192_cbc_encrypt (const aes_cbc_key_data_t *kd, const u8 *plaintext,
+ uword len, const u8 *iv, u8 *ciphertext)
+{
+ clib_aes_cbc_encrypt (kd, plaintext, len, iv, AES_KEY_192, ciphertext);
+}
+
+static_always_inline void
+clib_aes256_cbc_encrypt (const aes_cbc_key_data_t *kd, const u8 *plaintext,
+ uword len, const u8 *iv, u8 *ciphertext)
+{
+ clib_aes_cbc_encrypt (kd, plaintext, len, iv, AES_KEY_256, ciphertext);
+}
+
+static_always_inline void __clib_unused
+aes_cbc_dec (const u8x16 *k, u8x16u *src, u8x16u *dst, u8x16u *iv, int count,
+ int rounds)
+{
+ u8x16 r[4], c[4], f;
+
+ f = iv[0];
+ while (count >= 64)
+ {
+ c[0] = r[0] = src[0];
+ c[1] = r[1] = src[1];
+ c[2] = r[2] = src[2];
+ c[3] = r[3] = src[3];
+
+#if __x86_64__
+ r[0] ^= k[0];
+ r[1] ^= k[0];
+ r[2] ^= k[0];
+ r[3] ^= k[0];
+
+ for (int i = 1; i < rounds; i++)
+ {
+ r[0] = aes_dec_round_x1 (r[0], k[i]);
+ r[1] = aes_dec_round_x1 (r[1], k[i]);
+ r[2] = aes_dec_round_x1 (r[2], k[i]);
+ r[3] = aes_dec_round_x1 (r[3], k[i]);
+ }
+
+ r[0] = aes_dec_last_round_x1 (r[0], k[rounds]);
+ r[1] = aes_dec_last_round_x1 (r[1], k[rounds]);
+ r[2] = aes_dec_last_round_x1 (r[2], k[rounds]);
+ r[3] = aes_dec_last_round_x1 (r[3], k[rounds]);
+#else
+ for (int i = 0; i < rounds - 1; i++)
+ {
+ r[0] = vaesimcq_u8 (vaesdq_u8 (r[0], k[i]));
+ r[1] = vaesimcq_u8 (vaesdq_u8 (r[1], k[i]));
+ r[2] = vaesimcq_u8 (vaesdq_u8 (r[2], k[i]));
+ r[3] = vaesimcq_u8 (vaesdq_u8 (r[3], k[i]));
+ }
+ r[0] = vaesdq_u8 (r[0], k[rounds - 1]) ^ k[rounds];
+ r[1] = vaesdq_u8 (r[1], k[rounds - 1]) ^ k[rounds];
+ r[2] = vaesdq_u8 (r[2], k[rounds - 1]) ^ k[rounds];
+ r[3] = vaesdq_u8 (r[3], k[rounds - 1]) ^ k[rounds];
+#endif
+ dst[0] = r[0] ^ f;
+ dst[1] = r[1] ^ c[0];
+ dst[2] = r[2] ^ c[1];
+ dst[3] = r[3] ^ c[2];
+ f = c[3];
+
+ count -= 64;
+ src += 4;
+ dst += 4;
+ }
+
+ while (count > 0)
+ {
+ c[0] = r[0] = src[0];
+#if __x86_64__
+ r[0] ^= k[0];
+ for (int i = 1; i < rounds; i++)
+ r[0] = aes_dec_round_x1 (r[0], k[i]);
+ r[0] = aes_dec_last_round_x1 (r[0], k[rounds]);
+#else
+ c[0] = r[0] = src[0];
+ for (int i = 0; i < rounds - 1; i++)
+ r[0] = vaesimcq_u8 (vaesdq_u8 (r[0], k[i]));
+ r[0] = vaesdq_u8 (r[0], k[rounds - 1]) ^ k[rounds];
+#endif
+ dst[0] = r[0] ^ f;
+ f = c[0];
+
+ count -= 16;
+ src += 1;
+ dst += 1;
+ }
+}
+
+#if __x86_64__
+#if defined(__VAES__) && defined(__AVX512F__)
+
+static_always_inline u8x64
+aes_block_load_x4 (u8 *src[], int i)
+{
+ u8x64 r = {};
+ r = u8x64_insert_u8x16 (r, aes_block_load (src[0] + i), 0);
+ r = u8x64_insert_u8x16 (r, aes_block_load (src[1] + i), 1);
+ r = u8x64_insert_u8x16 (r, aes_block_load (src[2] + i), 2);
+ r = u8x64_insert_u8x16 (r, aes_block_load (src[3] + i), 3);
+ return r;
+}
+
+static_always_inline void
+aes_block_store_x4 (u8 *dst[], int i, u8x64 r)
+{
+ aes_block_store (dst[0] + i, u8x64_extract_u8x16 (r, 0));
+ aes_block_store (dst[1] + i, u8x64_extract_u8x16 (r, 1));
+ aes_block_store (dst[2] + i, u8x64_extract_u8x16 (r, 2));
+ aes_block_store (dst[3] + i, u8x64_extract_u8x16 (r, 3));
+}
+
+static_always_inline u8x64
+aes4_cbc_dec_permute (u8x64 a, u8x64 b)
+{
+ return (u8x64) u64x8_shuffle2 (a, b, 6, 7, 8, 9, 10, 11, 12, 13);
+}
+
+static_always_inline void
+aes4_cbc_dec (const u8x16 *k, u8x64u *src, u8x64u *dst, u8x16u *iv, int count,
+ aes_key_size_t rounds)
+{
+ u8x64 f, k4, r[4], c[4] = {};
+ __mmask8 m;
+ int i, n_blocks = count >> 4;
+
+ f = u8x64_insert_u8x16 (u8x64_zero (), *iv, 3);
+
+ while (n_blocks >= 16)
+ {
+ k4 = u8x64_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ c[1] = src[1];
+ c[2] = src[2];
+ c[3] = src[3];
+
+ r[0] = c[0] ^ k4;
+ r[1] = c[1] ^ k4;
+ r[2] = c[2] ^ k4;
+ r[3] = c[3] ^ k4;
+
+ for (i = 1; i < rounds; i++)
+ {
+ k4 = u8x64_splat_u8x16 (k[i]);
+ r[0] = aes_dec_round_x4 (r[0], k4);
+ r[1] = aes_dec_round_x4 (r[1], k4);
+ r[2] = aes_dec_round_x4 (r[2], k4);
+ r[3] = aes_dec_round_x4 (r[3], k4);
+ }
+
+ k4 = u8x64_splat_u8x16 (k[i]);
+ r[0] = aes_dec_last_round_x4 (r[0], k4);
+ r[1] = aes_dec_last_round_x4 (r[1], k4);
+ r[2] = aes_dec_last_round_x4 (r[2], k4);
+ r[3] = aes_dec_last_round_x4 (r[3], k4);
+
+ dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]);
+ dst[1] = r[1] ^= aes4_cbc_dec_permute (c[0], c[1]);
+ dst[2] = r[2] ^= aes4_cbc_dec_permute (c[1], c[2]);
+ dst[3] = r[3] ^= aes4_cbc_dec_permute (c[2], c[3]);
+ f = c[3];
+
+ n_blocks -= 16;
+ src += 4;
+ dst += 4;
+ }
+
+ if (n_blocks >= 12)
+ {
+ k4 = u8x64_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ c[1] = src[1];
+ c[2] = src[2];
+
+ r[0] = c[0] ^ k4;
+ r[1] = c[1] ^ k4;
+ r[2] = c[2] ^ k4;
+
+ for (i = 1; i < rounds; i++)
+ {
+ k4 = u8x64_splat_u8x16 (k[i]);
+ r[0] = aes_dec_round_x4 (r[0], k4);
+ r[1] = aes_dec_round_x4 (r[1], k4);
+ r[2] = aes_dec_round_x4 (r[2], k4);
+ }
+
+ k4 = u8x64_splat_u8x16 (k[i]);
+ r[0] = aes_dec_last_round_x4 (r[0], k4);
+ r[1] = aes_dec_last_round_x4 (r[1], k4);
+ r[2] = aes_dec_last_round_x4 (r[2], k4);
+
+ dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]);
+ dst[1] = r[1] ^= aes4_cbc_dec_permute (c[0], c[1]);
+ dst[2] = r[2] ^= aes4_cbc_dec_permute (c[1], c[2]);
+ f = c[2];
+
+ n_blocks -= 12;
+ src += 3;
+ dst += 3;
+ }
+ else if (n_blocks >= 8)
+ {
+ k4 = u8x64_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ c[1] = src[1];
+
+ r[0] = c[0] ^ k4;
+ r[1] = c[1] ^ k4;
+
+ for (i = 1; i < rounds; i++)
+ {
+ k4 = u8x64_splat_u8x16 (k[i]);
+ r[0] = aes_dec_round_x4 (r[0], k4);
+ r[1] = aes_dec_round_x4 (r[1], k4);
+ }
+
+ k4 = u8x64_splat_u8x16 (k[i]);
+ r[0] = aes_dec_last_round_x4 (r[0], k4);
+ r[1] = aes_dec_last_round_x4 (r[1], k4);
+
+ dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]);
+ dst[1] = r[1] ^= aes4_cbc_dec_permute (c[0], c[1]);
+ f = c[1];
+
+ n_blocks -= 8;
+ src += 2;
+ dst += 2;
+ }
+ else if (n_blocks >= 4)
+ {
+ c[0] = src[0];
+
+ r[0] = c[0] ^ u8x64_splat_u8x16 (k[0]);
+
+ for (i = 1; i < rounds; i++)
+ r[0] = aes_dec_round_x4 (r[0], u8x64_splat_u8x16 (k[i]));
+
+ r[0] = aes_dec_last_round_x4 (r[0], u8x64_splat_u8x16 (k[i]));
+
+ dst[0] = r[0] ^= aes4_cbc_dec_permute (f, c[0]);
+ f = c[0];
+
+ n_blocks -= 4;
+ src += 1;
+ dst += 1;
+ }
+
+ if (n_blocks > 0)
+ {
+ k4 = u8x64_splat_u8x16 (k[0]);
+ m = (1 << (n_blocks * 2)) - 1;
+ c[0] =
+ (u8x64) _mm512_mask_loadu_epi64 ((__m512i) c[0], m, (__m512i *) src);
+ f = aes4_cbc_dec_permute (f, c[0]);
+ r[0] = c[0] ^ k4;
+ for (i = 1; i < rounds; i++)
+ r[0] = aes_dec_round_x4 (r[0], u8x64_splat_u8x16 (k[i]));
+ r[0] = aes_dec_last_round_x4 (r[0], u8x64_splat_u8x16 (k[i]));
+ _mm512_mask_storeu_epi64 ((__m512i *) dst, m, (__m512i) (r[0] ^ f));
+ }
+}
+#elif defined(__VAES__)
+
+static_always_inline u8x32
+aes_block_load_x2 (u8 *src[], int i)
+{
+ u8x32 r = {};
+ r = u8x32_insert_lo (r, aes_block_load (src[0] + i));
+ r = u8x32_insert_hi (r, aes_block_load (src[1] + i));
+ return r;
+}
+
+static_always_inline void
+aes_block_store_x2 (u8 *dst[], int i, u8x32 r)
+{
+ aes_block_store (dst[0] + i, u8x32_extract_lo (r));
+ aes_block_store (dst[1] + i, u8x32_extract_hi (r));
+}
+
+static_always_inline u8x32
+aes2_cbc_dec_permute (u8x32 a, u8x32 b)
+{
+ return (u8x32) u64x4_shuffle2 ((u64x4) a, (u64x4) b, 2, 3, 4, 5);
+}
+
+static_always_inline void
+aes2_cbc_dec (const u8x16 *k, u8x32u *src, u8x32u *dst, u8x16u *iv, int count,
+ aes_key_size_t rounds)
+{
+ u8x32 k2, f = {}, r[4], c[4] = {};
+ int i, n_blocks = count >> 4;
+
+ f = u8x32_insert_hi (f, *iv);
+
+ while (n_blocks >= 8)
+ {
+ k2 = u8x32_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ c[1] = src[1];
+ c[2] = src[2];
+ c[3] = src[3];
+
+ r[0] = c[0] ^ k2;
+ r[1] = c[1] ^ k2;
+ r[2] = c[2] ^ k2;
+ r[3] = c[3] ^ k2;
+
+ for (i = 1; i < rounds; i++)
+ {
+ k2 = u8x32_splat_u8x16 (k[i]);
+ r[0] = aes_dec_round_x2 (r[0], k2);
+ r[1] = aes_dec_round_x2 (r[1], k2);
+ r[2] = aes_dec_round_x2 (r[2], k2);
+ r[3] = aes_dec_round_x2 (r[3], k2);
+ }
+
+ k2 = u8x32_splat_u8x16 (k[i]);
+ r[0] = aes_dec_last_round_x2 (r[0], k2);
+ r[1] = aes_dec_last_round_x2 (r[1], k2);
+ r[2] = aes_dec_last_round_x2 (r[2], k2);
+ r[3] = aes_dec_last_round_x2 (r[3], k2);
+
+ dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]);
+ dst[1] = r[1] ^= aes2_cbc_dec_permute (c[0], c[1]);
+ dst[2] = r[2] ^= aes2_cbc_dec_permute (c[1], c[2]);
+ dst[3] = r[3] ^= aes2_cbc_dec_permute (c[2], c[3]);
+ f = c[3];
+
+ n_blocks -= 8;
+ src += 4;
+ dst += 4;
+ }
+
+ if (n_blocks >= 6)
+ {
+ k2 = u8x32_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ c[1] = src[1];
+ c[2] = src[2];
+
+ r[0] = c[0] ^ k2;
+ r[1] = c[1] ^ k2;
+ r[2] = c[2] ^ k2;
+
+ for (i = 1; i < rounds; i++)
+ {
+ k2 = u8x32_splat_u8x16 (k[i]);
+ r[0] = aes_dec_round_x2 (r[0], k2);
+ r[1] = aes_dec_round_x2 (r[1], k2);
+ r[2] = aes_dec_round_x2 (r[2], k2);
+ }
+
+ k2 = u8x32_splat_u8x16 (k[i]);
+ r[0] = aes_dec_last_round_x2 (r[0], k2);
+ r[1] = aes_dec_last_round_x2 (r[1], k2);
+ r[2] = aes_dec_last_round_x2 (r[2], k2);
+
+ dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]);
+ dst[1] = r[1] ^= aes2_cbc_dec_permute (c[0], c[1]);
+ dst[2] = r[2] ^= aes2_cbc_dec_permute (c[1], c[2]);
+ f = c[2];
+
+ n_blocks -= 6;
+ src += 3;
+ dst += 3;
+ }
+ else if (n_blocks >= 4)
+ {
+ k2 = u8x32_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ c[1] = src[1];
+
+ r[0] = c[0] ^ k2;
+ r[1] = c[1] ^ k2;
+
+ for (i = 1; i < rounds; i++)
+ {
+ k2 = u8x32_splat_u8x16 (k[i]);
+ r[0] = aes_dec_round_x2 (r[0], k2);
+ r[1] = aes_dec_round_x2 (r[1], k2);
+ }
+
+ k2 = u8x32_splat_u8x16 (k[i]);
+ r[0] = aes_dec_last_round_x2 (r[0], k2);
+ r[1] = aes_dec_last_round_x2 (r[1], k2);
+
+ dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]);
+ dst[1] = r[1] ^= aes2_cbc_dec_permute (c[0], c[1]);
+ f = c[1];
+
+ n_blocks -= 4;
+ src += 2;
+ dst += 2;
+ }
+ else if (n_blocks >= 2)
+ {
+ k2 = u8x32_splat_u8x16 (k[0]);
+ c[0] = src[0];
+ r[0] = c[0] ^ k2;
+
+ for (i = 1; i < rounds; i++)
+ r[0] = aes_dec_round_x2 (r[0], u8x32_splat_u8x16 (k[i]));
+
+ r[0] = aes_dec_last_round_x2 (r[0], u8x32_splat_u8x16 (k[i]));
+ dst[0] = r[0] ^= aes2_cbc_dec_permute (f, c[0]);
+ f = c[0];
+
+ n_blocks -= 2;
+ src += 1;
+ dst += 1;
+ }
+
+ if (n_blocks > 0)
+ {
+ u8x16 rl = *(u8x16u *) src ^ k[0];
+ for (i = 1; i < rounds; i++)
+ rl = aes_dec_round_x1 (rl, k[i]);
+ rl = aes_dec_last_round_x1 (rl, k[i]);
+ *(u8x16u *) dst = rl ^ u8x32_extract_hi (f);
+ }
+}
+#endif
+#endif
+
+static_always_inline void
+clib_aes_cbc_key_expand (aes_cbc_key_data_t *kd, const u8 *key,
+ aes_key_size_t ks)
+{
+ u8x16 e[15], d[15];
+ aes_key_expand (e, key, ks);
+ aes_key_enc_to_dec (e, d, ks);
+ for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
+ {
+ ((u8x16 *) kd->decrypt_key)[i] = d[i];
+ ((u8x16 *) kd->encrypt_key)[i] = e[i];
+ }
+}
+
+static_always_inline void
+clib_aes128_cbc_key_expand (aes_cbc_key_data_t *kd, const u8 *key)
+{
+ clib_aes_cbc_key_expand (kd, key, AES_KEY_128);
+}
+static_always_inline void
+clib_aes192_cbc_key_expand (aes_cbc_key_data_t *kd, const u8 *key)
+{
+ clib_aes_cbc_key_expand (kd, key, AES_KEY_192);
+}
+static_always_inline void
+clib_aes256_cbc_key_expand (aes_cbc_key_data_t *kd, const u8 *key)
+{
+ clib_aes_cbc_key_expand (kd, key, AES_KEY_256);
+}
+
+static_always_inline void
+clib_aes_cbc_decrypt (const aes_cbc_key_data_t *kd, const u8 *ciphertext,
+ uword len, const u8 *iv, aes_key_size_t ks,
+ u8 *plaintext)
+{
+ int rounds = AES_KEY_ROUNDS (ks);
+#if defined(__VAES__) && defined(__AVX512F__)
+ aes4_cbc_dec (kd->decrypt_key, (u8x64u *) ciphertext, (u8x64u *) plaintext,
+ (u8x16u *) iv, (int) len, rounds);
+#elif defined(__VAES__)
+ aes2_cbc_dec (kd->decrypt_key, (u8x32u *) ciphertext, (u8x32u *) plaintext,
+ (u8x16u *) iv, (int) len, rounds);
+#else
+ aes_cbc_dec (kd->decrypt_key, (u8x16u *) ciphertext, (u8x16u *) plaintext,
+ (u8x16u *) iv, (int) len, rounds);
+#endif
+}
+
+static_always_inline void
+clib_aes128_cbc_decrypt (const aes_cbc_key_data_t *kd, const u8 *ciphertext,
+ uword len, const u8 *iv, u8 *plaintext)
+{
+ clib_aes_cbc_decrypt (kd, ciphertext, len, iv, AES_KEY_128, plaintext);
+}
+
+static_always_inline void
+clib_aes192_cbc_decrypt (const aes_cbc_key_data_t *kd, const u8 *ciphertext,
+ uword len, const u8 *iv, u8 *plaintext)
+{
+ clib_aes_cbc_decrypt (kd, ciphertext, len, iv, AES_KEY_192, plaintext);
+}
+
+static_always_inline void
+clib_aes256_cbc_decrypt (const aes_cbc_key_data_t *kd, const u8 *ciphertext,
+ uword len, const u8 *iv, u8 *plaintext)
+{
+ clib_aes_cbc_decrypt (kd, ciphertext, len, iv, AES_KEY_256, plaintext);
+}
+
+#endif /* __crypto_aes_cbc_h__ */
diff --git a/src/vppinfra/crypto/aes_ctr.h b/src/vppinfra/crypto/aes_ctr.h
new file mode 100644
index 00000000000..74a9f96d90d
--- /dev/null
+++ b/src/vppinfra/crypto/aes_ctr.h
@@ -0,0 +1,190 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2024 Cisco Systems, Inc.
+ */
+
+#ifndef __crypto_aes_ctr_h__
+#define __crypto_aes_ctr_h__
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vector.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/string.h>
+#include <vppinfra/crypto/aes.h>
+
+typedef struct
+{
+ const aes_expaned_key_t exp_key[AES_KEY_ROUNDS (AES_KEY_256) + 1];
+} aes_ctr_key_data_t;
+
+typedef struct
+{
+ const aes_expaned_key_t exp_key[AES_KEY_ROUNDS (AES_KEY_256) + 1];
+ aes_counter_t ctr; /* counter (reflected) */
+ u8 keystream_bytes[N_AES_BYTES]; /* keystream leftovers */
+ u32 n_keystream_bytes; /* number of keystream leftovers */
+} aes_ctr_ctx_t;
+
+static_always_inline aes_counter_t
+aes_ctr_one_block (aes_ctr_ctx_t *ctx, aes_counter_t ctr, const u8 *src,
+ u8 *dst, u32 n_parallel, u32 n_bytes, int rounds, int last)
+{
+ u32 __clib_aligned (N_AES_BYTES)
+ inc[] = { N_AES_LANES, 0, 0, 0, N_AES_LANES, 0, 0, 0,
+ N_AES_LANES, 0, 0, 0, N_AES_LANES, 0, 0, 0 };
+ const aes_expaned_key_t *k = ctx->exp_key;
+ const aes_mem_t *sv = (aes_mem_t *) src;
+ aes_mem_t *dv = (aes_mem_t *) dst;
+ aes_data_t d[4], t[4];
+ u32 r;
+
+ n_bytes -= (n_parallel - 1) * N_AES_BYTES;
+
+ /* AES First Round */
+ for (int i = 0; i < n_parallel; i++)
+ {
+#if N_AES_LANES == 4
+ t[i] = k[0].x4 ^ (u8x64) aes_reflect ((u8x64) ctr);
+#elif N_AES_LANES == 2
+ t[i] = k[0].x2 ^ (u8x32) aes_reflect ((u8x32) ctr);
+#else
+ t[i] = k[0].x1 ^ (u8x16) aes_reflect ((u8x16) ctr);
+#endif
+ ctr += *(aes_counter_t *) inc;
+ }
+
+ /* Load Data */
+ for (int i = 0; i < n_parallel - last; i++)
+ d[i] = sv[i];
+
+ if (last)
+ d[n_parallel - 1] =
+ aes_load_partial ((u8 *) (sv + n_parallel - 1), n_bytes);
+
+ /* AES Intermediate Rounds */
+ for (r = 1; r < rounds; r++)
+ aes_enc_round (t, k + r, n_parallel);
+
+ /* AES Last Round */
+ aes_enc_last_round (t, d, k + r, n_parallel);
+
+ /* Store Data */
+ for (int i = 0; i < n_parallel - last; i++)
+ dv[i] = d[i];
+
+ if (last)
+ {
+ aes_store_partial (d[n_parallel - 1], dv + n_parallel - 1, n_bytes);
+ *(aes_data_t *) ctx->keystream_bytes = t[n_parallel - 1];
+ ctx->n_keystream_bytes = N_AES_BYTES - n_bytes;
+ }
+
+ return ctr;
+}
+
+static_always_inline void
+clib_aes_ctr_init (aes_ctr_ctx_t *ctx, const aes_ctr_key_data_t *kd,
+ const u8 *iv, aes_key_size_t ks)
+{
+ u32x4 ctr = (u32x4) u8x16_reflect (*(u8x16u *) iv);
+#if N_AES_LANES == 4
+ ctx->ctr = (aes_counter_t) u32x16_splat_u32x4 (ctr) +
+ (u32x16){ 0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0 };
+#elif N_AES_LANES == 2
+ ctx->ctr = (aes_counter_t) u32x8_splat_u32x4 (ctr) +
+ (u32x8){ 0, 0, 0, 0, 1, 0, 0, 0 };
+#else
+ ctx->ctr = ctr;
+#endif
+ for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
+ ((aes_expaned_key_t *) ctx->exp_key)[i] = kd->exp_key[i];
+ ctx->n_keystream_bytes = 0;
+}
+
+static_always_inline void
+clib_aes_ctr_transform (aes_ctr_ctx_t *ctx, const u8 *src, u8 *dst,
+ u32 n_bytes, aes_key_size_t ks)
+{
+ int r = AES_KEY_ROUNDS (ks);
+ aes_counter_t ctr = ctx->ctr;
+
+ if (ctx->n_keystream_bytes)
+ {
+ u8 *ks = ctx->keystream_bytes + N_AES_BYTES - ctx->n_keystream_bytes;
+
+ if (ctx->n_keystream_bytes >= n_bytes)
+ {
+ for (int i = 0; i < n_bytes; i++)
+ dst[i] = src[i] ^ ks[i];
+ ctx->n_keystream_bytes -= n_bytes;
+ return;
+ }
+
+ for (int i = 0; i < ctx->n_keystream_bytes; i++)
+ dst++[0] = src++[0] ^ ks[i];
+
+ n_bytes -= ctx->n_keystream_bytes;
+ ctx->n_keystream_bytes = 0;
+ }
+
+ /* main loop */
+ for (int n = 4 * N_AES_BYTES; n_bytes >= n; n_bytes -= n, dst += n, src += n)
+ ctr = aes_ctr_one_block (ctx, ctr, src, dst, 4, n, r, 0);
+
+ if (n_bytes)
+ {
+ if (n_bytes > 3 * N_AES_BYTES)
+ ctr = aes_ctr_one_block (ctx, ctr, src, dst, 4, n_bytes, r, 1);
+ else if (n_bytes > 2 * N_AES_BYTES)
+ ctr = aes_ctr_one_block (ctx, ctr, src, dst, 3, n_bytes, r, 1);
+ else if (n_bytes > N_AES_BYTES)
+ ctr = aes_ctr_one_block (ctx, ctr, src, dst, 2, n_bytes, r, 1);
+ else
+ ctr = aes_ctr_one_block (ctx, ctr, src, dst, 1, n_bytes, r, 1);
+ }
+ else
+ ctx->n_keystream_bytes = 0;
+
+ ctx->ctr = ctr;
+}
+
+static_always_inline void
+clib_aes_ctr_key_expand (aes_ctr_key_data_t *kd, const u8 *key,
+ aes_key_size_t ks)
+{
+ u8x16 ek[AES_KEY_ROUNDS (AES_KEY_256) + 1];
+ aes_expaned_key_t *k = (aes_expaned_key_t *) kd->exp_key;
+
+ /* expand AES key */
+ aes_key_expand (ek, key, ks);
+ for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
+ k[i].lanes[0] = k[i].lanes[1] = k[i].lanes[2] = k[i].lanes[3] = ek[i];
+}
+
+static_always_inline void
+clib_aes128_ctr (const aes_ctr_key_data_t *kd, const u8 *src, u32 n_bytes,
+ const u8 *iv, u8 *dst)
+{
+ aes_ctr_ctx_t ctx;
+ clib_aes_ctr_init (&ctx, kd, iv, AES_KEY_128);
+ clib_aes_ctr_transform (&ctx, src, dst, n_bytes, AES_KEY_128);
+}
+
+static_always_inline void
+clib_aes192_ctr (const aes_ctr_key_data_t *kd, const u8 *src, u32 n_bytes,
+ const u8 *iv, u8 *dst)
+{
+ aes_ctr_ctx_t ctx;
+ clib_aes_ctr_init (&ctx, kd, iv, AES_KEY_192);
+ clib_aes_ctr_transform (&ctx, src, dst, n_bytes, AES_KEY_192);
+}
+
+static_always_inline void
+clib_aes256_ctr (const aes_ctr_key_data_t *kd, const u8 *src, u32 n_bytes,
+ const u8 *iv, u8 *dst)
+{
+ aes_ctr_ctx_t ctx;
+ clib_aes_ctr_init (&ctx, kd, iv, AES_KEY_256);
+ clib_aes_ctr_transform (&ctx, src, dst, n_bytes, AES_KEY_256);
+}
+
+#endif /* __crypto_aes_ctr_h__ */
diff --git a/src/vppinfra/crypto/aes_gcm.h b/src/vppinfra/crypto/aes_gcm.h
new file mode 100644
index 00000000000..5b628c87745
--- /dev/null
+++ b/src/vppinfra/crypto/aes_gcm.h
@@ -0,0 +1,944 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef __crypto_aes_gcm_h__
+#define __crypto_aes_gcm_h__
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vector.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/string.h>
+#include <vppinfra/crypto/aes.h>
+#include <vppinfra/crypto/ghash.h>
+
+#define NUM_HI 36
+#if N_AES_LANES == 4
+typedef u8x64u aes_ghash_t;
+#define aes_gcm_splat(v) u8x64_splat (v)
+#define aes_gcm_ghash_reduce(c) ghash4_reduce (&(c)->gd)
+#define aes_gcm_ghash_reduce2(c) ghash4_reduce2 (&(c)->gd)
+#define aes_gcm_ghash_final(c) (c)->T = ghash4_final (&(c)->gd)
+#elif N_AES_LANES == 2
+typedef u8x32u aes_ghash_t;
+#define aes_gcm_splat(v) u8x32_splat (v)
+#define aes_gcm_ghash_reduce(c) ghash2_reduce (&(c)->gd)
+#define aes_gcm_ghash_reduce2(c) ghash2_reduce2 (&(c)->gd)
+#define aes_gcm_ghash_final(c) (c)->T = ghash2_final (&(c)->gd)
+#else
+typedef u8x16 aes_ghash_t;
+#define aes_gcm_splat(v) u8x16_splat (v)
+#define aes_gcm_ghash_reduce(c) ghash_reduce (&(c)->gd)
+#define aes_gcm_ghash_reduce2(c) ghash_reduce2 (&(c)->gd)
+#define aes_gcm_ghash_final(c) (c)->T = ghash_final (&(c)->gd)
+#endif
+
+typedef enum
+{
+ AES_GCM_OP_UNKNONW = 0,
+ AES_GCM_OP_ENCRYPT,
+ AES_GCM_OP_DECRYPT,
+ AES_GCM_OP_GMAC
+} aes_gcm_op_t;
+
+typedef struct
+{
+ /* pre-calculated hash key values */
+ const u8x16 Hi[NUM_HI];
+ /* extracted AES key */
+ const aes_expaned_key_t Ke[AES_KEY_ROUNDS (AES_KEY_256) + 1];
+} aes_gcm_key_data_t;
+
+typedef struct
+{
+ aes_gcm_op_t operation;
+ int last;
+ u8 rounds;
+ uword data_bytes;
+ uword aad_bytes;
+
+ u8x16 T;
+
+ /* hash */
+ const u8x16 *Hi;
+ const aes_ghash_t *next_Hi;
+
+ /* expaded keys */
+ const aes_expaned_key_t *Ke;
+
+ /* counter */
+ u32 counter;
+ u8x16 EY0;
+ aes_counter_t Y;
+
+ /* ghash */
+ ghash_ctx_t gd;
+} aes_gcm_ctx_t;
+
+static_always_inline u8x16
+aes_gcm_final_block (aes_gcm_ctx_t *ctx)
+{
+ return (u8x16) ((u64x2){ ctx->data_bytes, ctx->aad_bytes } << 3);
+}
+
+static_always_inline void
+aes_gcm_ghash_mul_first (aes_gcm_ctx_t *ctx, aes_data_t data, u32 n_lanes)
+{
+ uword hash_offset = NUM_HI - n_lanes;
+ ctx->next_Hi = (aes_ghash_t *) (ctx->Hi + hash_offset);
+#if N_AES_LANES == 4
+ u8x64 tag4 = {};
+ tag4 = u8x64_insert_u8x16 (tag4, ctx->T, 0);
+ ghash4_mul_first (&ctx->gd, aes_reflect (data) ^ tag4, *ctx->next_Hi++);
+#elif N_AES_LANES == 2
+ u8x32 tag2 = {};
+ tag2 = u8x32_insert_lo (tag2, ctx->T);
+ ghash2_mul_first (&ctx->gd, aes_reflect (data) ^ tag2, *ctx->next_Hi++);
+#else
+ ghash_mul_first (&ctx->gd, aes_reflect (data) ^ ctx->T, *ctx->next_Hi++);
+#endif
+}
+
+static_always_inline void
+aes_gcm_ghash_mul_next (aes_gcm_ctx_t *ctx, aes_data_t data)
+{
+#if N_AES_LANES == 4
+ ghash4_mul_next (&ctx->gd, aes_reflect (data), *ctx->next_Hi++);
+#elif N_AES_LANES == 2
+ ghash2_mul_next (&ctx->gd, aes_reflect (data), *ctx->next_Hi++);
+#else
+ ghash_mul_next (&ctx->gd, aes_reflect (data), *ctx->next_Hi++);
+#endif
+}
+
+static_always_inline void
+aes_gcm_ghash_mul_final_block (aes_gcm_ctx_t *ctx)
+{
+#if N_AES_LANES == 4
+ u8x64 h = u8x64_insert_u8x16 (u8x64_zero (), ctx->Hi[NUM_HI - 1], 0);
+ u8x64 r4 = u8x64_insert_u8x16 (u8x64_zero (), aes_gcm_final_block (ctx), 0);
+ ghash4_mul_next (&ctx->gd, r4, h);
+#elif N_AES_LANES == 2
+ u8x32 h = u8x32_insert_lo (u8x32_zero (), ctx->Hi[NUM_HI - 1]);
+ u8x32 r2 = u8x32_insert_lo (u8x32_zero (), aes_gcm_final_block (ctx));
+ ghash2_mul_next (&ctx->gd, r2, h);
+#else
+ ghash_mul_next (&ctx->gd, aes_gcm_final_block (ctx), ctx->Hi[NUM_HI - 1]);
+#endif
+}
+
+static_always_inline void
+aes_gcm_enc_ctr0_round (aes_gcm_ctx_t *ctx, int aes_round)
+{
+ if (aes_round == 0)
+ ctx->EY0 ^= ctx->Ke[0].x1;
+ else if (aes_round == ctx->rounds)
+ ctx->EY0 = aes_enc_last_round_x1 (ctx->EY0, ctx->Ke[aes_round].x1);
+ else
+ ctx->EY0 = aes_enc_round_x1 (ctx->EY0, ctx->Ke[aes_round].x1);
+}
+
+static_always_inline void
+aes_gcm_ghash (aes_gcm_ctx_t *ctx, u8 *data, u32 n_left)
+{
+ uword i;
+ aes_data_t r = {};
+ const aes_mem_t *d = (aes_mem_t *) data;
+
+ for (int n = 8 * N_AES_BYTES; n_left >= n; n_left -= n, d += 8)
+ {
+ if (ctx->operation == AES_GCM_OP_GMAC && n_left == n)
+ {
+ aes_gcm_ghash_mul_first (ctx, d[0], 8 * N_AES_LANES + 1);
+ for (i = 1; i < 8; i++)
+ aes_gcm_ghash_mul_next (ctx, d[i]);
+ aes_gcm_ghash_mul_final_block (ctx);
+ aes_gcm_ghash_reduce (ctx);
+ aes_gcm_ghash_reduce2 (ctx);
+ aes_gcm_ghash_final (ctx);
+ goto done;
+ }
+
+ aes_gcm_ghash_mul_first (ctx, d[0], 8 * N_AES_LANES);
+ for (i = 1; i < 8; i++)
+ aes_gcm_ghash_mul_next (ctx, d[i]);
+ aes_gcm_ghash_reduce (ctx);
+ aes_gcm_ghash_reduce2 (ctx);
+ aes_gcm_ghash_final (ctx);
+ }
+
+ if (n_left > 0)
+ {
+ int n_lanes = (n_left + 15) / 16;
+
+ if (ctx->operation == AES_GCM_OP_GMAC)
+ n_lanes++;
+
+ if (n_left < N_AES_BYTES)
+ {
+ clib_memcpy_fast (&r, d, n_left);
+ aes_gcm_ghash_mul_first (ctx, r, n_lanes);
+ }
+ else
+ {
+ aes_gcm_ghash_mul_first (ctx, d[0], n_lanes);
+ n_left -= N_AES_BYTES;
+ i = 1;
+
+ if (n_left >= 4 * N_AES_BYTES)
+ {
+ aes_gcm_ghash_mul_next (ctx, d[i]);
+ aes_gcm_ghash_mul_next (ctx, d[i + 1]);
+ aes_gcm_ghash_mul_next (ctx, d[i + 2]);
+ aes_gcm_ghash_mul_next (ctx, d[i + 3]);
+ n_left -= 4 * N_AES_BYTES;
+ i += 4;
+ }
+ if (n_left >= 2 * N_AES_BYTES)
+ {
+ aes_gcm_ghash_mul_next (ctx, d[i]);
+ aes_gcm_ghash_mul_next (ctx, d[i + 1]);
+ n_left -= 2 * N_AES_BYTES;
+ i += 2;
+ }
+
+ if (n_left >= N_AES_BYTES)
+ {
+ aes_gcm_ghash_mul_next (ctx, d[i]);
+ n_left -= N_AES_BYTES;
+ i += 1;
+ }
+
+ if (n_left)
+ {
+ clib_memcpy_fast (&r, d + i, n_left);
+ aes_gcm_ghash_mul_next (ctx, r);
+ }
+ }
+
+ if (ctx->operation == AES_GCM_OP_GMAC)
+ aes_gcm_ghash_mul_final_block (ctx);
+ aes_gcm_ghash_reduce (ctx);
+ aes_gcm_ghash_reduce2 (ctx);
+ aes_gcm_ghash_final (ctx);
+ }
+ else if (ctx->operation == AES_GCM_OP_GMAC)
+ ctx->T =
+ ghash_mul (aes_gcm_final_block (ctx) ^ ctx->T, ctx->Hi[NUM_HI - 1]);
+
+done:
+ /* encrypt counter 0 E(Y0, k) */
+ if (ctx->operation == AES_GCM_OP_GMAC)
+ for (int i = 0; i < ctx->rounds + 1; i += 1)
+ aes_gcm_enc_ctr0_round (ctx, i);
+}
+
+static_always_inline void
+aes_gcm_enc_first_round (aes_gcm_ctx_t *ctx, aes_data_t *r, uword n_blocks)
+{
+ const aes_expaned_key_t Ke0 = ctx->Ke[0];
+ uword i = 0;
+
+ /* As counter is stored in network byte order for performance reasons we
+ are incrementing least significant byte only except in case where we
+ overlow. As we are processing four 128, 256 or 512-blocks in parallel
+ except the last round, overflow can happen only when n_blocks == 4 */
+
+#if N_AES_LANES == 4
+ const u32x16 ctr_inv_4444 = { 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24,
+ 0, 0, 0, 4 << 24, 0, 0, 0, 4 << 24 };
+
+ const u32x16 ctr_4444 = {
+ 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0,
+ };
+
+ if (n_blocks == 4)
+ for (; i < 2; i++)
+ {
+ r[i] = Ke0.x4 ^ (u8x64) ctx->Y; /* Initial AES round */
+ ctx->Y += ctr_inv_4444;
+ }
+
+ if (n_blocks == 4 && PREDICT_FALSE ((u8) ctx->counter == 242))
+ {
+ u32x16 Yr = (u32x16) aes_reflect ((u8x64) ctx->Y);
+
+ for (; i < n_blocks; i++)
+ {
+ r[i] = Ke0.x4 ^ (u8x64) ctx->Y; /* Initial AES round */
+ Yr += ctr_4444;
+ ctx->Y = (u32x16) aes_reflect ((u8x64) Yr);
+ }
+ }
+ else
+ {
+ for (; i < n_blocks; i++)
+ {
+ r[i] = Ke0.x4 ^ (u8x64) ctx->Y; /* Initial AES round */
+ ctx->Y += ctr_inv_4444;
+ }
+ }
+ ctx->counter += n_blocks * 4;
+#elif N_AES_LANES == 2
+ const u32x8 ctr_inv_22 = { 0, 0, 0, 2 << 24, 0, 0, 0, 2 << 24 };
+ const u32x8 ctr_22 = { 2, 0, 0, 0, 2, 0, 0, 0 };
+
+ if (n_blocks == 4)
+ for (; i < 2; i++)
+ {
+ r[i] = Ke0.x2 ^ (u8x32) ctx->Y; /* Initial AES round */
+ ctx->Y += ctr_inv_22;
+ }
+
+ if (n_blocks == 4 && PREDICT_FALSE ((u8) ctx->counter == 250))
+ {
+ u32x8 Yr = (u32x8) aes_reflect ((u8x32) ctx->Y);
+
+ for (; i < n_blocks; i++)
+ {
+ r[i] = Ke0.x2 ^ (u8x32) ctx->Y; /* Initial AES round */
+ Yr += ctr_22;
+ ctx->Y = (u32x8) aes_reflect ((u8x32) Yr);
+ }
+ }
+ else
+ {
+ for (; i < n_blocks; i++)
+ {
+ r[i] = Ke0.x2 ^ (u8x32) ctx->Y; /* Initial AES round */
+ ctx->Y += ctr_inv_22;
+ }
+ }
+ ctx->counter += n_blocks * 2;
+#else
+ const u32x4 ctr_inv_1 = { 0, 0, 0, 1 << 24 };
+
+ if (PREDICT_TRUE ((u8) ctx->counter < 0xfe) || n_blocks < 3)
+ {
+ for (; i < n_blocks; i++)
+ {
+ r[i] = Ke0.x1 ^ (u8x16) ctx->Y; /* Initial AES round */
+ ctx->Y += ctr_inv_1;
+ }
+ ctx->counter += n_blocks;
+ }
+ else
+ {
+ r[i++] = Ke0.x1 ^ (u8x16) ctx->Y; /* Initial AES round */
+ ctx->Y += ctr_inv_1;
+ ctx->counter += 1;
+
+ for (; i < n_blocks; i++)
+ {
+ r[i] = Ke0.x1 ^ (u8x16) ctx->Y; /* Initial AES round */
+ ctx->counter++;
+ ctx->Y[3] = clib_host_to_net_u32 (ctx->counter);
+ }
+ }
+#endif
+}
+
+static_always_inline void
+aes_gcm_enc_last_round (aes_gcm_ctx_t *ctx, aes_data_t *r, aes_data_t *d,
+ const aes_expaned_key_t *Ke, uword n_blocks)
+{
+ /* additional ronuds for AES-192 and AES-256 */
+ for (int i = 10; i < ctx->rounds; i++)
+ aes_enc_round (r, Ke + i, n_blocks);
+
+ aes_enc_last_round (r, d, Ke + ctx->rounds, n_blocks);
+}
+
+static_always_inline void
+aes_gcm_calc (aes_gcm_ctx_t *ctx, aes_data_t *d, const u8 *src, u8 *dst, u32 n,
+ u32 n_bytes, int with_ghash)
+{
+ const aes_expaned_key_t *k = ctx->Ke;
+ const aes_mem_t *sv = (aes_mem_t *) src;
+ aes_mem_t *dv = (aes_mem_t *) dst;
+ uword ghash_blocks, gc = 1;
+ aes_data_t r[4];
+ u32 i, n_lanes;
+
+ if (ctx->operation == AES_GCM_OP_ENCRYPT)
+ {
+ ghash_blocks = 4;
+ n_lanes = N_AES_LANES * 4;
+ }
+ else
+ {
+ ghash_blocks = n;
+ n_lanes = n * N_AES_LANES;
+#if N_AES_LANES != 1
+ if (ctx->last)
+ n_lanes = (n_bytes + 15) / 16;
+#endif
+ }
+
+ n_bytes -= (n - 1) * N_AES_BYTES;
+
+ /* AES rounds 0 and 1 */
+ aes_gcm_enc_first_round (ctx, r, n);
+ aes_enc_round (r, k + 1, n);
+
+ /* load data - decrypt round */
+ if (ctx->operation == AES_GCM_OP_DECRYPT)
+ {
+ for (i = 0; i < n - ctx->last; i++)
+ d[i] = sv[i];
+
+ if (ctx->last)
+ d[n - 1] = aes_load_partial ((u8 *) (sv + n - 1), n_bytes);
+ }
+
+ /* GHASH multiply block 0 */
+ if (with_ghash)
+ aes_gcm_ghash_mul_first (ctx, d[0], n_lanes);
+
+ /* AES rounds 2 and 3 */
+ aes_enc_round (r, k + 2, n);
+ aes_enc_round (r, k + 3, n);
+
+ /* GHASH multiply block 1 */
+ if (with_ghash && gc++ < ghash_blocks)
+ aes_gcm_ghash_mul_next (ctx, (d[1]));
+
+ /* AES rounds 4 and 5 */
+ aes_enc_round (r, k + 4, n);
+ aes_enc_round (r, k + 5, n);
+
+ /* GHASH multiply block 2 */
+ if (with_ghash && gc++ < ghash_blocks)
+ aes_gcm_ghash_mul_next (ctx, (d[2]));
+
+ /* AES rounds 6 and 7 */
+ aes_enc_round (r, k + 6, n);
+ aes_enc_round (r, k + 7, n);
+
+ /* GHASH multiply block 3 */
+ if (with_ghash && gc++ < ghash_blocks)
+ aes_gcm_ghash_mul_next (ctx, (d[3]));
+
+ /* load 4 blocks of data - decrypt round */
+ if (ctx->operation == AES_GCM_OP_ENCRYPT)
+ {
+ for (i = 0; i < n - ctx->last; i++)
+ d[i] = sv[i];
+
+ if (ctx->last)
+ d[n - 1] = aes_load_partial (sv + n - 1, n_bytes);
+ }
+
+ /* AES rounds 8 and 9 */
+ aes_enc_round (r, k + 8, n);
+ aes_enc_round (r, k + 9, n);
+
+ /* AES last round(s) */
+ aes_gcm_enc_last_round (ctx, r, d, k, n);
+
+ /* store data */
+ for (i = 0; i < n - ctx->last; i++)
+ dv[i] = d[i];
+
+ if (ctx->last)
+ aes_store_partial (d[n - 1], dv + n - 1, n_bytes);
+
+ /* GHASH reduce 1st step */
+ aes_gcm_ghash_reduce (ctx);
+
+ /* GHASH reduce 2nd step */
+ if (with_ghash)
+ aes_gcm_ghash_reduce2 (ctx);
+
+ /* GHASH final step */
+ if (with_ghash)
+ aes_gcm_ghash_final (ctx);
+}
+
+static_always_inline void
+aes_gcm_calc_double (aes_gcm_ctx_t *ctx, aes_data_t *d, const u8 *src, u8 *dst)
+{
+ const aes_expaned_key_t *k = ctx->Ke;
+ const aes_mem_t *sv = (aes_mem_t *) src;
+ aes_mem_t *dv = (aes_mem_t *) dst;
+ aes_data_t r[4];
+
+ /* AES rounds 0 and 1 */
+ aes_gcm_enc_first_round (ctx, r, 4);
+ aes_enc_round (r, k + 1, 4);
+
+ /* load 4 blocks of data - decrypt round */
+ if (ctx->operation == AES_GCM_OP_DECRYPT)
+ for (int i = 0; i < 4; i++)
+ d[i] = sv[i];
+
+ /* GHASH multiply block 0 */
+ aes_gcm_ghash_mul_first (ctx, d[0], N_AES_LANES * 8);
+
+ /* AES rounds 2 and 3 */
+ aes_enc_round (r, k + 2, 4);
+ aes_enc_round (r, k + 3, 4);
+
+ /* GHASH multiply block 1 */
+ aes_gcm_ghash_mul_next (ctx, (d[1]));
+
+ /* AES rounds 4 and 5 */
+ aes_enc_round (r, k + 4, 4);
+ aes_enc_round (r, k + 5, 4);
+
+ /* GHASH multiply block 2 */
+ aes_gcm_ghash_mul_next (ctx, (d[2]));
+
+ /* AES rounds 6 and 7 */
+ aes_enc_round (r, k + 6, 4);
+ aes_enc_round (r, k + 7, 4);
+
+ /* GHASH multiply block 3 */
+ aes_gcm_ghash_mul_next (ctx, (d[3]));
+
+ /* AES rounds 8 and 9 */
+ aes_enc_round (r, k + 8, 4);
+ aes_enc_round (r, k + 9, 4);
+
+ /* load 4 blocks of data - encrypt round */
+ if (ctx->operation == AES_GCM_OP_ENCRYPT)
+ for (int i = 0; i < 4; i++)
+ d[i] = sv[i];
+
+ /* AES last round(s) */
+ aes_gcm_enc_last_round (ctx, r, d, k, 4);
+
+ /* store 4 blocks of data */
+ for (int i = 0; i < 4; i++)
+ dv[i] = d[i];
+
+ /* load next 4 blocks of data data - decrypt round */
+ if (ctx->operation == AES_GCM_OP_DECRYPT)
+ for (int i = 0; i < 4; i++)
+ d[i] = sv[i + 4];
+
+ /* GHASH multiply block 4 */
+ aes_gcm_ghash_mul_next (ctx, (d[0]));
+
+ /* AES rounds 0 and 1 */
+ aes_gcm_enc_first_round (ctx, r, 4);
+ aes_enc_round (r, k + 1, 4);
+
+ /* GHASH multiply block 5 */
+ aes_gcm_ghash_mul_next (ctx, (d[1]));
+
+ /* AES rounds 2 and 3 */
+ aes_enc_round (r, k + 2, 4);
+ aes_enc_round (r, k + 3, 4);
+
+ /* GHASH multiply block 6 */
+ aes_gcm_ghash_mul_next (ctx, (d[2]));
+
+ /* AES rounds 4 and 5 */
+ aes_enc_round (r, k + 4, 4);
+ aes_enc_round (r, k + 5, 4);
+
+ /* GHASH multiply block 7 */
+ aes_gcm_ghash_mul_next (ctx, (d[3]));
+
+ /* AES rounds 6 and 7 */
+ aes_enc_round (r, k + 6, 4);
+ aes_enc_round (r, k + 7, 4);
+
+ /* GHASH reduce 1st step */
+ aes_gcm_ghash_reduce (ctx);
+
+ /* AES rounds 8 and 9 */
+ aes_enc_round (r, k + 8, 4);
+ aes_enc_round (r, k + 9, 4);
+
+ /* GHASH reduce 2nd step */
+ aes_gcm_ghash_reduce2 (ctx);
+
+ /* load 4 blocks of data - encrypt round */
+ if (ctx->operation == AES_GCM_OP_ENCRYPT)
+ for (int i = 0; i < 4; i++)
+ d[i] = sv[i + 4];
+
+ /* AES last round(s) */
+ aes_gcm_enc_last_round (ctx, r, d, k, 4);
+
+ /* store data */
+ for (int i = 0; i < 4; i++)
+ dv[i + 4] = d[i];
+
+ /* GHASH final step */
+ aes_gcm_ghash_final (ctx);
+}
+
+static_always_inline void
+aes_gcm_mask_bytes (aes_data_t *d, uword n_bytes)
+{
+ const union
+ {
+ u8 b[64];
+ aes_data_t r;
+ } scale = {
+ .b = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 },
+ };
+
+ d[0] &= (aes_gcm_splat (n_bytes) > scale.r);
+}
+
+static_always_inline void
+aes_gcm_calc_last (aes_gcm_ctx_t *ctx, aes_data_t *d, int n_blocks,
+ u32 n_bytes)
+{
+ int n_lanes = (N_AES_LANES == 1 ? n_blocks : (n_bytes + 15) / 16) + 1;
+ n_bytes -= (n_blocks - 1) * N_AES_BYTES;
+ int i;
+
+ aes_gcm_enc_ctr0_round (ctx, 0);
+ aes_gcm_enc_ctr0_round (ctx, 1);
+
+ if (n_bytes != N_AES_BYTES)
+ aes_gcm_mask_bytes (d + n_blocks - 1, n_bytes);
+
+ aes_gcm_ghash_mul_first (ctx, d[0], n_lanes);
+
+ aes_gcm_enc_ctr0_round (ctx, 2);
+ aes_gcm_enc_ctr0_round (ctx, 3);
+
+ if (n_blocks > 1)
+ aes_gcm_ghash_mul_next (ctx, d[1]);
+
+ aes_gcm_enc_ctr0_round (ctx, 4);
+ aes_gcm_enc_ctr0_round (ctx, 5);
+
+ if (n_blocks > 2)
+ aes_gcm_ghash_mul_next (ctx, d[2]);
+
+ aes_gcm_enc_ctr0_round (ctx, 6);
+ aes_gcm_enc_ctr0_round (ctx, 7);
+
+ if (n_blocks > 3)
+ aes_gcm_ghash_mul_next (ctx, d[3]);
+
+ aes_gcm_enc_ctr0_round (ctx, 8);
+ aes_gcm_enc_ctr0_round (ctx, 9);
+
+ aes_gcm_ghash_mul_final_block (ctx);
+ aes_gcm_ghash_reduce (ctx);
+
+ for (i = 10; i < ctx->rounds; i++)
+ aes_gcm_enc_ctr0_round (ctx, i);
+
+ aes_gcm_ghash_reduce2 (ctx);
+
+ aes_gcm_ghash_final (ctx);
+
+ aes_gcm_enc_ctr0_round (ctx, i);
+}
+
+static_always_inline void
+aes_gcm_enc (aes_gcm_ctx_t *ctx, const u8 *src, u8 *dst, u32 n_left)
+{
+ aes_data_t d[4];
+
+ if (PREDICT_FALSE (n_left == 0))
+ {
+ int i;
+ for (i = 0; i < ctx->rounds + 1; i++)
+ aes_gcm_enc_ctr0_round (ctx, i);
+ return;
+ }
+
+ if (n_left < 4 * N_AES_BYTES)
+ {
+ ctx->last = 1;
+ if (n_left > 3 * N_AES_BYTES)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 0);
+ aes_gcm_calc_last (ctx, d, 4, n_left);
+ }
+ else if (n_left > 2 * N_AES_BYTES)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 0);
+ aes_gcm_calc_last (ctx, d, 3, n_left);
+ }
+ else if (n_left > N_AES_BYTES)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 0);
+ aes_gcm_calc_last (ctx, d, 2, n_left);
+ }
+ else
+ {
+ aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 0);
+ aes_gcm_calc_last (ctx, d, 1, n_left);
+ }
+ return;
+ }
+
+ aes_gcm_calc (ctx, d, src, dst, 4, 4 * N_AES_BYTES, /* with_ghash */ 0);
+
+ /* next */
+ n_left -= 4 * N_AES_BYTES;
+ dst += 4 * N_AES_BYTES;
+ src += 4 * N_AES_BYTES;
+
+ for (int n = 8 * N_AES_BYTES; n_left >= n; n_left -= n, src += n, dst += n)
+ aes_gcm_calc_double (ctx, d, src, dst);
+
+ if (n_left >= 4 * N_AES_BYTES)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 4, 4 * N_AES_BYTES, /* with_ghash */ 1);
+
+ /* next */
+ n_left -= 4 * N_AES_BYTES;
+ dst += 4 * N_AES_BYTES;
+ src += 4 * N_AES_BYTES;
+ }
+
+ if (n_left == 0)
+ {
+ aes_gcm_calc_last (ctx, d, 4, 4 * N_AES_BYTES);
+ return;
+ }
+
+ ctx->last = 1;
+
+ if (n_left > 3 * N_AES_BYTES)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 1);
+ aes_gcm_calc_last (ctx, d, 4, n_left);
+ }
+ else if (n_left > 2 * N_AES_BYTES)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 1);
+ aes_gcm_calc_last (ctx, d, 3, n_left);
+ }
+ else if (n_left > N_AES_BYTES)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 1);
+ aes_gcm_calc_last (ctx, d, 2, n_left);
+ }
+ else
+ {
+ aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 1);
+ aes_gcm_calc_last (ctx, d, 1, n_left);
+ }
+}
+
+static_always_inline void
+aes_gcm_dec (aes_gcm_ctx_t *ctx, const u8 *src, u8 *dst, uword n_left)
+{
+ aes_data_t d[4] = {};
+ ghash_ctx_t gd;
+
+ /* main encryption loop */
+ for (int n = 8 * N_AES_BYTES; n_left >= n; n_left -= n, dst += n, src += n)
+ aes_gcm_calc_double (ctx, d, src, dst);
+
+ if (n_left >= 4 * N_AES_BYTES)
+ {
+ aes_gcm_calc (ctx, d, src, dst, 4, 4 * N_AES_BYTES, /* with_ghash */ 1);
+
+ /* next */
+ n_left -= 4 * N_AES_BYTES;
+ dst += N_AES_BYTES * 4;
+ src += N_AES_BYTES * 4;
+ }
+
+ if (n_left)
+ {
+ ctx->last = 1;
+
+ if (n_left > 3 * N_AES_BYTES)
+ aes_gcm_calc (ctx, d, src, dst, 4, n_left, /* with_ghash */ 1);
+ else if (n_left > 2 * N_AES_BYTES)
+ aes_gcm_calc (ctx, d, src, dst, 3, n_left, /* with_ghash */ 1);
+ else if (n_left > N_AES_BYTES)
+ aes_gcm_calc (ctx, d, src, dst, 2, n_left, /* with_ghash */ 1);
+ else
+ aes_gcm_calc (ctx, d, src, dst, 1, n_left, /* with_ghash */ 1);
+ }
+
+ /* interleaved counter 0 encryption E(Y0, k) and ghash of final GCM
+ * (bit length) block */
+
+ aes_gcm_enc_ctr0_round (ctx, 0);
+ aes_gcm_enc_ctr0_round (ctx, 1);
+
+ ghash_mul_first (&gd, aes_gcm_final_block (ctx) ^ ctx->T,
+ ctx->Hi[NUM_HI - 1]);
+
+ aes_gcm_enc_ctr0_round (ctx, 2);
+ aes_gcm_enc_ctr0_round (ctx, 3);
+
+ ghash_reduce (&gd);
+
+ aes_gcm_enc_ctr0_round (ctx, 4);
+ aes_gcm_enc_ctr0_round (ctx, 5);
+
+ ghash_reduce2 (&gd);
+
+ aes_gcm_enc_ctr0_round (ctx, 6);
+ aes_gcm_enc_ctr0_round (ctx, 7);
+
+ ctx->T = ghash_final (&gd);
+
+ aes_gcm_enc_ctr0_round (ctx, 8);
+ aes_gcm_enc_ctr0_round (ctx, 9);
+
+ for (int i = 10; i < ctx->rounds + 1; i += 1)
+ aes_gcm_enc_ctr0_round (ctx, i);
+}
+
+static_always_inline int
+aes_gcm (const u8 *src, u8 *dst, const u8 *aad, u8 *ivp, u8 *tag,
+ u32 data_bytes, u32 aad_bytes, u8 tag_len,
+ const aes_gcm_key_data_t *kd, int aes_rounds, aes_gcm_op_t op)
+{
+ u8 *addt = (u8 *) aad;
+ u32x4 Y0;
+
+ aes_gcm_ctx_t _ctx = { .counter = 2,
+ .rounds = aes_rounds,
+ .operation = op,
+ .data_bytes = data_bytes,
+ .aad_bytes = aad_bytes,
+ .Ke = kd->Ke,
+ .Hi = kd->Hi },
+ *ctx = &_ctx;
+
+ /* initalize counter */
+ Y0 = (u32x4) (u64x2){ *(u64u *) ivp, 0 };
+ Y0[2] = *(u32u *) (ivp + 8);
+ Y0[3] = 1 << 24;
+ ctx->EY0 = (u8x16) Y0;
+
+#if N_AES_LANES == 4
+ ctx->Y = u32x16_splat_u32x4 (Y0) + (u32x16){
+ 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24, 0, 0, 0, 3 << 24, 0, 0, 0, 4 << 24,
+ };
+#elif N_AES_LANES == 2
+ ctx->Y =
+ u32x8_splat_u32x4 (Y0) + (u32x8){ 0, 0, 0, 1 << 24, 0, 0, 0, 2 << 24 };
+#else
+ ctx->Y = Y0 + (u32x4){ 0, 0, 0, 1 << 24 };
+#endif
+
+ /* calculate ghash for AAD */
+ aes_gcm_ghash (ctx, addt, aad_bytes);
+
+ /* ghash and encrypt/edcrypt */
+ if (op == AES_GCM_OP_ENCRYPT)
+ aes_gcm_enc (ctx, src, dst, data_bytes);
+ else if (op == AES_GCM_OP_DECRYPT)
+ aes_gcm_dec (ctx, src, dst, data_bytes);
+
+ /* final tag is */
+ ctx->T = u8x16_reflect (ctx->T) ^ ctx->EY0;
+
+ /* tag_len 16 -> 0 */
+ tag_len &= 0xf;
+
+ if (op == AES_GCM_OP_ENCRYPT || op == AES_GCM_OP_GMAC)
+ {
+ /* store tag */
+ if (tag_len)
+ u8x16_store_partial (ctx->T, tag, tag_len);
+ else
+ ((u8x16u *) tag)[0] = ctx->T;
+ }
+ else
+ {
+ /* check tag */
+ if (tag_len)
+ {
+ u16 mask = pow2_mask (tag_len);
+ u8x16 expected = u8x16_load_partial (tag, tag_len);
+ if ((u8x16_msb_mask (expected == ctx->T) & mask) == mask)
+ return 1;
+ }
+ else
+ {
+ if (u8x16_is_equal (ctx->T, *(u8x16u *) tag))
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static_always_inline void
+clib_aes_gcm_key_expand (aes_gcm_key_data_t *kd, const u8 *key,
+ aes_key_size_t ks)
+{
+ u8x16 H;
+ u8x16 ek[AES_KEY_ROUNDS (AES_KEY_256) + 1];
+ aes_expaned_key_t *Ke = (aes_expaned_key_t *) kd->Ke;
+
+ /* expand AES key */
+ aes_key_expand (ek, key, ks);
+ for (int i = 0; i < AES_KEY_ROUNDS (ks) + 1; i++)
+ Ke[i].lanes[0] = Ke[i].lanes[1] = Ke[i].lanes[2] = Ke[i].lanes[3] = ek[i];
+
+ /* pre-calculate H */
+ H = aes_encrypt_block (u8x16_zero (), ek, ks);
+ H = u8x16_reflect (H);
+ ghash_precompute (H, (u8x16 *) kd->Hi, ARRAY_LEN (kd->Hi));
+}
+
+static_always_inline void
+clib_aes128_gcm_enc (const aes_gcm_key_data_t *kd, const u8 *plaintext,
+ u32 data_bytes, const u8 *aad, u32 aad_bytes,
+ const u8 *iv, u32 tag_bytes, u8 *cyphertext, u8 *tag)
+{
+ aes_gcm (plaintext, cyphertext, aad, (u8 *) iv, tag, data_bytes, aad_bytes,
+ tag_bytes, kd, AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_ENCRYPT);
+}
+
+static_always_inline void
+clib_aes256_gcm_enc (const aes_gcm_key_data_t *kd, const u8 *plaintext,
+ u32 data_bytes, const u8 *aad, u32 aad_bytes,
+ const u8 *iv, u32 tag_bytes, u8 *cyphertext, u8 *tag)
+{
+ aes_gcm (plaintext, cyphertext, aad, (u8 *) iv, tag, data_bytes, aad_bytes,
+ tag_bytes, kd, AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_ENCRYPT);
+}
+
+static_always_inline int
+clib_aes128_gcm_dec (const aes_gcm_key_data_t *kd, const u8 *cyphertext,
+ u32 data_bytes, const u8 *aad, u32 aad_bytes,
+ const u8 *iv, const u8 *tag, u32 tag_bytes, u8 *plaintext)
+{
+ return aes_gcm (cyphertext, plaintext, aad, (u8 *) iv, (u8 *) tag,
+ data_bytes, aad_bytes, tag_bytes, kd,
+ AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_DECRYPT);
+}
+
+static_always_inline int
+clib_aes256_gcm_dec (const aes_gcm_key_data_t *kd, const u8 *cyphertext,
+ u32 data_bytes, const u8 *aad, u32 aad_bytes,
+ const u8 *iv, const u8 *tag, u32 tag_bytes, u8 *plaintext)
+{
+ return aes_gcm (cyphertext, plaintext, aad, (u8 *) iv, (u8 *) tag,
+ data_bytes, aad_bytes, tag_bytes, kd,
+ AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_DECRYPT);
+}
+
+static_always_inline void
+clib_aes128_gmac (const aes_gcm_key_data_t *kd, const u8 *data, u32 data_bytes,
+ const u8 *iv, u32 tag_bytes, u8 *tag)
+{
+ aes_gcm (0, 0, data, (u8 *) iv, tag, 0, data_bytes, tag_bytes, kd,
+ AES_KEY_ROUNDS (AES_KEY_128), AES_GCM_OP_GMAC);
+}
+
+static_always_inline void
+clib_aes256_gmac (const aes_gcm_key_data_t *kd, const u8 *data, u32 data_bytes,
+ const u8 *iv, u32 tag_bytes, u8 *tag)
+{
+ aes_gcm (0, 0, data, (u8 *) iv, tag, 0, data_bytes, tag_bytes, kd,
+ AES_KEY_ROUNDS (AES_KEY_256), AES_GCM_OP_GMAC);
+}
+
+#endif /* __crypto_aes_gcm_h__ */
diff --git a/src/plugins/crypto_native/ghash.h b/src/vppinfra/crypto/ghash.h
index f389d11cfe7..66e3f6a673a 100644
--- a/src/plugins/crypto_native/ghash.h
+++ b/src/vppinfra/crypto/ghash.h
@@ -86,10 +86,10 @@
* This allows us to improve performance by deferring reduction. For example
* to caclulate ghash of 4 128-bit blocks of data (b0, b1, b2, b3), we can do:
*
- * __i128 Hi[4];
+ * u8x16 Hi[4];
* ghash_precompute (H, Hi, 4);
*
- * ghash_data_t _gd, *gd = &_gd;
+ * ghash_ctx_t _gd, *gd = &_gd;
* ghash_mul_first (gd, GH ^ b0, Hi[3]);
* ghash_mul_next (gd, b1, Hi[2]);
* ghash_mul_next (gd, b2, Hi[1]);
@@ -151,8 +151,10 @@ gmul_hi_hi (u8x16 a, u8x16 b)
typedef struct
{
u8x16 mid, hi, lo, tmp_lo, tmp_hi;
+ u8x32 hi2, lo2, mid2, tmp_lo2, tmp_hi2;
+ u8x64 hi4, lo4, mid4, tmp_lo4, tmp_hi4;
int pending;
-} ghash_data_t;
+} ghash_ctx_t;
static const u8x16 ghash_poly = {
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -165,14 +167,14 @@ static const u8x16 ghash_poly2 = {
};
static_always_inline void
-ghash_mul_first (ghash_data_t * gd, u8x16 a, u8x16 b)
+ghash_mul_first (ghash_ctx_t *gd, u8x16 a, u8x16 b)
{
/* a1 * b1 */
gd->hi = gmul_hi_hi (a, b);
/* a0 * b0 */
gd->lo = gmul_lo_lo (a, b);
/* a0 * b1 ^ a1 * b0 */
- gd->mid = (gmul_hi_lo (a, b) ^ gmul_lo_hi (a, b));
+ gd->mid = gmul_hi_lo (a, b) ^ gmul_lo_hi (a, b);
/* set gd->pending to 0 so next invocation of ghash_mul_next(...) knows that
there is no pending data in tmp_lo and tmp_hi */
@@ -180,7 +182,7 @@ ghash_mul_first (ghash_data_t * gd, u8x16 a, u8x16 b)
}
static_always_inline void
-ghash_mul_next (ghash_data_t * gd, u8x16 a, u8x16 b)
+ghash_mul_next (ghash_ctx_t *gd, u8x16 a, u8x16 b)
{
/* a1 * b1 */
u8x16 hi = gmul_hi_hi (a, b);
@@ -209,7 +211,7 @@ ghash_mul_next (ghash_data_t * gd, u8x16 a, u8x16 b)
}
static_always_inline void
-ghash_reduce (ghash_data_t * gd)
+ghash_reduce (ghash_ctx_t *gd)
{
u8x16 r;
@@ -234,14 +236,14 @@ ghash_reduce (ghash_data_t * gd)
}
static_always_inline void
-ghash_reduce2 (ghash_data_t * gd)
+ghash_reduce2 (ghash_ctx_t *gd)
{
gd->tmp_lo = gmul_lo_lo (ghash_poly2, gd->lo);
gd->tmp_hi = gmul_lo_hi (ghash_poly2, gd->lo);
}
static_always_inline u8x16
-ghash_final (ghash_data_t * gd)
+ghash_final (ghash_ctx_t *gd)
{
return u8x16_xor3 (gd->hi, u8x16_word_shift_right (gd->tmp_lo, 4),
u8x16_word_shift_left (gd->tmp_hi, 4));
@@ -250,14 +252,14 @@ ghash_final (ghash_data_t * gd)
static_always_inline u8x16
ghash_mul (u8x16 a, u8x16 b)
{
- ghash_data_t _gd, *gd = &_gd;
+ ghash_ctx_t _gd, *gd = &_gd;
ghash_mul_first (gd, a, b);
ghash_reduce (gd);
ghash_reduce2 (gd);
return ghash_final (gd);
}
-#ifdef __VPCLMULQDQ__
+#if defined(__VPCLMULQDQ__) && defined(__AVX512F__)
static const u8x64 ghash4_poly2 = {
0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00,
@@ -270,12 +272,6 @@ static const u8x64 ghash4_poly2 = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
};
-typedef struct
-{
- u8x64 hi, lo, mid, tmp_lo, tmp_hi;
- int pending;
-} ghash4_data_t;
-
static_always_inline u8x64
gmul4_lo_lo (u8x64 a, u8x64 b)
{
@@ -300,18 +296,17 @@ gmul4_hi_hi (u8x64 a, u8x64 b)
return (u8x64) _mm512_clmulepi64_epi128 ((__m512i) a, (__m512i) b, 0x11);
}
-
static_always_inline void
-ghash4_mul_first (ghash4_data_t * gd, u8x64 a, u8x64 b)
+ghash4_mul_first (ghash_ctx_t *gd, u8x64 a, u8x64 b)
{
- gd->hi = gmul4_hi_hi (a, b);
- gd->lo = gmul4_lo_lo (a, b);
- gd->mid = (gmul4_hi_lo (a, b) ^ gmul4_lo_hi (a, b));
+ gd->hi4 = gmul4_hi_hi (a, b);
+ gd->lo4 = gmul4_lo_lo (a, b);
+ gd->mid4 = gmul4_hi_lo (a, b) ^ gmul4_lo_hi (a, b);
gd->pending = 0;
}
static_always_inline void
-ghash4_mul_next (ghash4_data_t * gd, u8x64 a, u8x64 b)
+ghash4_mul_next (ghash_ctx_t *gd, u8x64 a, u8x64 b)
{
u8x64 hi = gmul4_hi_hi (a, b);
u8x64 lo = gmul4_lo_lo (a, b);
@@ -319,63 +314,62 @@ ghash4_mul_next (ghash4_data_t * gd, u8x64 a, u8x64 b)
if (gd->pending)
{
/* there is peding data from previous invocation so we can XOR */
- gd->hi = u8x64_xor3 (gd->hi, gd->tmp_hi, hi);
- gd->lo = u8x64_xor3 (gd->lo, gd->tmp_lo, lo);
+ gd->hi4 = u8x64_xor3 (gd->hi4, gd->tmp_hi4, hi);
+ gd->lo4 = u8x64_xor3 (gd->lo4, gd->tmp_lo4, lo);
gd->pending = 0;
}
else
{
/* there is no peding data from previous invocation so we postpone XOR */
- gd->tmp_hi = hi;
- gd->tmp_lo = lo;
+ gd->tmp_hi4 = hi;
+ gd->tmp_lo4 = lo;
gd->pending = 1;
}
- gd->mid = u8x64_xor3 (gd->mid, gmul4_hi_lo (a, b), gmul4_lo_hi (a, b));
+ gd->mid4 = u8x64_xor3 (gd->mid4, gmul4_hi_lo (a, b), gmul4_lo_hi (a, b));
}
static_always_inline void
-ghash4_reduce (ghash4_data_t * gd)
+ghash4_reduce (ghash_ctx_t *gd)
{
u8x64 r;
/* Final combination:
- gd->lo ^= gd->mid << 64
- gd->hi ^= gd->mid >> 64 */
+ gd->lo4 ^= gd->mid4 << 64
+ gd->hi4 ^= gd->mid4 >> 64 */
- u8x64 midl = u8x64_word_shift_left (gd->mid, 8);
- u8x64 midr = u8x64_word_shift_right (gd->mid, 8);
+ u8x64 midl = u8x64_word_shift_left (gd->mid4, 8);
+ u8x64 midr = u8x64_word_shift_right (gd->mid4, 8);
if (gd->pending)
{
- gd->lo = u8x64_xor3 (gd->lo, gd->tmp_lo, midl);
- gd->hi = u8x64_xor3 (gd->hi, gd->tmp_hi, midr);
+ gd->lo4 = u8x64_xor3 (gd->lo4, gd->tmp_lo4, midl);
+ gd->hi4 = u8x64_xor3 (gd->hi4, gd->tmp_hi4, midr);
}
else
{
- gd->lo ^= midl;
- gd->hi ^= midr;
+ gd->lo4 ^= midl;
+ gd->hi4 ^= midr;
}
- r = gmul4_hi_lo (ghash4_poly2, gd->lo);
- gd->lo ^= u8x64_word_shift_left (r, 8);
-
+ r = gmul4_hi_lo (ghash4_poly2, gd->lo4);
+ gd->lo4 ^= u8x64_word_shift_left (r, 8);
}
static_always_inline void
-ghash4_reduce2 (ghash4_data_t * gd)
+ghash4_reduce2 (ghash_ctx_t *gd)
{
- gd->tmp_lo = gmul4_lo_lo (ghash4_poly2, gd->lo);
- gd->tmp_hi = gmul4_lo_hi (ghash4_poly2, gd->lo);
+ gd->tmp_lo4 = gmul4_lo_lo (ghash4_poly2, gd->lo4);
+ gd->tmp_hi4 = gmul4_lo_hi (ghash4_poly2, gd->lo4);
}
static_always_inline u8x16
-ghash4_final (ghash4_data_t * gd)
+ghash4_final (ghash_ctx_t *gd)
{
u8x64 r;
u8x32 t;
- r = u8x64_xor3 (gd->hi, u8x64_word_shift_right (gd->tmp_lo, 4),
- u8x64_word_shift_left (gd->tmp_hi, 4));
+ r = u8x64_xor3 (gd->hi4, u8x64_word_shift_right (gd->tmp_lo4, 4),
+ u8x64_word_shift_left (gd->tmp_hi4, 4));
/* horizontal XOR of 4 128-bit lanes */
t = u8x64_extract_lo (r) ^ u8x64_extract_hi (r);
@@ -383,6 +377,117 @@ ghash4_final (ghash4_data_t * gd)
}
#endif
+#if defined(__VPCLMULQDQ__)
+
+static const u8x32 ghash2_poly2 = {
+ 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0xc2, 0x00, 0x00, 0x00, 0xc2, 0x01, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2,
+};
+
+static_always_inline u8x32
+gmul2_lo_lo (u8x32 a, u8x32 b)
+{
+ return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x00);
+}
+
+static_always_inline u8x32
+gmul2_hi_lo (u8x32 a, u8x32 b)
+{
+ return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x01);
+}
+
+static_always_inline u8x32
+gmul2_lo_hi (u8x32 a, u8x32 b)
+{
+ return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x10);
+}
+
+static_always_inline u8x32
+gmul2_hi_hi (u8x32 a, u8x32 b)
+{
+ return (u8x32) _mm256_clmulepi64_epi128 ((__m256i) a, (__m256i) b, 0x11);
+}
+
+static_always_inline void
+ghash2_mul_first (ghash_ctx_t *gd, u8x32 a, u8x32 b)
+{
+ gd->hi2 = gmul2_hi_hi (a, b);
+ gd->lo2 = gmul2_lo_lo (a, b);
+ gd->mid2 = gmul2_hi_lo (a, b) ^ gmul2_lo_hi (a, b);
+ gd->pending = 0;
+}
+
+static_always_inline void
+ghash2_mul_next (ghash_ctx_t *gd, u8x32 a, u8x32 b)
+{
+ u8x32 hi = gmul2_hi_hi (a, b);
+ u8x32 lo = gmul2_lo_lo (a, b);
+
+ if (gd->pending)
+ {
+ /* there is peding data from previous invocation so we can XOR */
+ gd->hi2 = u8x32_xor3 (gd->hi2, gd->tmp_hi2, hi);
+ gd->lo2 = u8x32_xor3 (gd->lo2, gd->tmp_lo2, lo);
+ gd->pending = 0;
+ }
+ else
+ {
+ /* there is no peding data from previous invocation so we postpone XOR */
+ gd->tmp_hi2 = hi;
+ gd->tmp_lo2 = lo;
+ gd->pending = 1;
+ }
+ gd->mid2 = u8x32_xor3 (gd->mid2, gmul2_hi_lo (a, b), gmul2_lo_hi (a, b));
+}
+
+static_always_inline void
+ghash2_reduce (ghash_ctx_t *gd)
+{
+ u8x32 r;
+
+ /* Final combination:
+ gd->lo2 ^= gd->mid2 << 64
+ gd->hi2 ^= gd->mid2 >> 64 */
+
+ u8x32 midl = u8x32_word_shift_left (gd->mid2, 8);
+ u8x32 midr = u8x32_word_shift_right (gd->mid2, 8);
+
+ if (gd->pending)
+ {
+ gd->lo2 = u8x32_xor3 (gd->lo2, gd->tmp_lo2, midl);
+ gd->hi2 = u8x32_xor3 (gd->hi2, gd->tmp_hi2, midr);
+ }
+ else
+ {
+ gd->lo2 ^= midl;
+ gd->hi2 ^= midr;
+ }
+
+ r = gmul2_hi_lo (ghash2_poly2, gd->lo2);
+ gd->lo2 ^= u8x32_word_shift_left (r, 8);
+}
+
+static_always_inline void
+ghash2_reduce2 (ghash_ctx_t *gd)
+{
+ gd->tmp_lo2 = gmul2_lo_lo (ghash2_poly2, gd->lo2);
+ gd->tmp_hi2 = gmul2_lo_hi (ghash2_poly2, gd->lo2);
+}
+
+static_always_inline u8x16
+ghash2_final (ghash_ctx_t *gd)
+{
+ u8x32 r;
+
+ r = u8x32_xor3 (gd->hi2, u8x32_word_shift_right (gd->tmp_lo2, 4),
+ u8x32_word_shift_left (gd->tmp_hi2, 4));
+
+ /* horizontal XOR of 2 128-bit lanes */
+ return u8x32_extract_hi (r) ^ u8x32_extract_lo (r);
+}
+#endif
+
static_always_inline void
ghash_precompute (u8x16 H, u8x16 * Hi, int n)
{
@@ -398,9 +503,7 @@ ghash_precompute (u8x16 H, u8x16 * Hi, int n)
#else
r32[3] = r32[0];
#endif
- /* *INDENT-OFF* */
r32 = r32 == (u32x4) {1, 0, 0, 1};
- /* *INDENT-ON* */
Hi[n - 1] = H = H ^ ((u8x16) r32 & ghash_poly);
/* calculate H^(i + 1) */
@@ -410,10 +513,3 @@ ghash_precompute (u8x16 H, u8x16 * Hi, int n)
#endif /* __ghash_h__ */
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/crypto/poly1305.h b/src/vppinfra/crypto/poly1305.h
new file mode 100644
index 00000000000..cd6ea60cdf7
--- /dev/null
+++ b/src/vppinfra/crypto/poly1305.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#ifndef __clib_poly1305_h__
+#define __clib_poly1305_h__
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vector.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/string.h>
+
+/* implementation of DJB's poly1305 using 64-bit arithmetrics */
+
+typedef struct
+{
+ const u64 r[3], s[2];
+ u64 h[3];
+
+ /* partial data */
+ union
+ {
+ u8 as_u8[16];
+ u64 as_u64[2];
+ } partial;
+
+ size_t n_partial_bytes;
+} clib_poly1305_ctx;
+
+static_always_inline void
+clib_poly1305_init (clib_poly1305_ctx *ctx, const u8 key[32])
+{
+ u64u *k = (u64u *) key;
+ u64 *h = (u64 *) ctx->h;
+ u64 *r = (u64 *) ctx->r;
+ u64 *s = (u64 *) ctx->s;
+
+ /* initialize accumulator */
+ h[0] = h[1] = h[2] = 0;
+
+ /* clamp 1st half of the key and store it into r[] */
+ r[0] = k[0] & 0x0ffffffc0fffffff;
+ r[1] = k[1] & 0x0ffffffc0ffffffc;
+ s[0] = k[2];
+ s[1] = k[3];
+
+ /* precompute (r[1] >> 2) * 5 */
+ r[2] = r[1] + (r[1] >> 2);
+
+ ctx->n_partial_bytes = 0;
+}
+
+static_always_inline void
+_clib_poly1305_multiply_and_reduce (u64 h[3], const u64 r[3])
+{
+ union
+ {
+ struct
+ {
+ u64 lo, hi;
+ };
+ u128 n;
+ } l0, l1, l2;
+ u64 c;
+
+ /*
+ h2 h1 h0
+ x r1 r0
+ ---------------------------------------
+ r0 x h2 r0 x h1 r0 × h0
+ + r1 x h2 r1 x h1 r1 x h0
+ ---------------------------------------
+
+ for p = 2^130-5, following applies:
+ (r * 2^130) mod p == (r * 5) mod p
+
+ bits above 130 can be shifted right (divided by 2^130)
+ and multiplied by 5 per equation above
+
+ h2 h1 h0
+ x r1 r0
+ ----------------------------------------------
+ r0 x h2 r0 x h1 r0 × h0
+ + r1 x h0
+ + 5x (r1 >>2) x h2 5x (r1 >>2) x h1
+ ----------------------------------------------
+ [0:l2.lo] [l1.hi:l1.lo] [l0.hi:l0.lo]
+ */
+
+ l0.n = l1.n = l2.n = 0;
+ /* u64 x u64 = u128 multiplications */
+ l0.n += (u128) h[0] * r[0];
+ l0.n += (u128) h[1] * r[2]; /* r[2] holds precomputed (r[1] >> 2) * 5 */
+ l1.n += (u128) h[0] * r[1];
+ l1.n += (u128) h[1] * r[0];
+
+ /* u64 x u64 = u64 multiplications, as h[2] may have only lower 2 bits set
+ * and r[1] have clamped bits 60-63 */
+ l1.n += (u128) (h[2] * r[2]);
+ l2.n += (u128) (h[2] * r[0]);
+
+ /* propagate upper 64 bits to higher limb */
+ c = 0;
+ l1.lo = u64_add_with_carry (&c, l1.lo, l0.hi);
+ l2.lo = u64_add_with_carry (&c, l2.lo, l1.hi);
+
+ l2.hi = l2.lo;
+ /* keep bits [128:129] */
+ l2.lo &= 3;
+
+ /* bits 130 and above multiply with 5 and store to l2.hi */
+ l2.hi -= l2.lo;
+ l2.hi += l2.hi >> 2;
+
+ /* add l2.hi to l0.lo with carry propagation and store result to h2:h1:h0 */
+ c = 0;
+ h[0] = u64_add_with_carry (&c, l0.lo, l2.hi);
+ h[1] = u64_add_with_carry (&c, l1.lo, 0);
+ h[2] = u64_add_with_carry (&c, l2.lo, 0);
+}
+
+static_always_inline u32
+_clib_poly1305_add_blocks (clib_poly1305_ctx *ctx, const u8 *msg,
+ uword n_bytes, const u32 bit17)
+{
+ u64 r[3], h[3];
+
+ for (int i = 0; i < 3; i++)
+ {
+ h[i] = ctx->h[i];
+ r[i] = ctx->r[i];
+ }
+
+ for (const u64u *m = (u64u *) msg; n_bytes >= 16; n_bytes -= 16, m += 2)
+ {
+ u64 c = 0;
+
+ /* h += m */
+ h[0] = u64_add_with_carry (&c, h[0], m[0]);
+ h[1] = u64_add_with_carry (&c, h[1], m[1]);
+ h[2] = u64_add_with_carry (&c, h[2], bit17 ? 1 : 0);
+
+ /* h = (h * r) mod p */
+ _clib_poly1305_multiply_and_reduce (h, r);
+ }
+
+ for (int i = 0; i < 3; i++)
+ ctx->h[i] = h[i];
+
+ return n_bytes;
+}
+
+static_always_inline void
+clib_poly1305_update (clib_poly1305_ctx *ctx, const u8 *msg, uword len)
+{
+ uword n_left = len;
+
+ if (n_left == 0)
+ return;
+
+ if (ctx->n_partial_bytes)
+ {
+ u16 missing_bytes = 16 - ctx->n_partial_bytes;
+ if (PREDICT_FALSE (n_left < missing_bytes))
+ {
+ clib_memcpy_fast (ctx->partial.as_u8 + ctx->n_partial_bytes, msg,
+ n_left);
+ ctx->n_partial_bytes += n_left;
+ return;
+ }
+
+ clib_memcpy_fast (ctx->partial.as_u8 + ctx->n_partial_bytes, msg,
+ missing_bytes);
+ _clib_poly1305_add_blocks (ctx, ctx->partial.as_u8, 16, 1);
+ ctx->n_partial_bytes = 0;
+ n_left -= missing_bytes;
+ msg += missing_bytes;
+ }
+
+ n_left = _clib_poly1305_add_blocks (ctx, msg, n_left, 1);
+
+ if (n_left)
+ {
+ ctx->partial.as_u64[0] = ctx->partial.as_u64[1] = 0;
+ clib_memcpy_fast (ctx->partial.as_u8, msg + len - n_left, n_left);
+ ctx->n_partial_bytes = n_left;
+ }
+}
+
+static_always_inline void
+clib_poly1305_final (clib_poly1305_ctx *ctx, u8 *out)
+{
+ const u64 p[] = { 0xFFFFFFFFFFFFFFFB, 0xFFFFFFFFFFFFFFFF, 3 }; /* 2^128-5 */
+ const u64 *s = ctx->s;
+ u64u *t = (u64u *) out;
+ u64 h0, h1, t0, t1;
+ u64 c;
+
+ if (ctx->n_partial_bytes)
+ {
+ ctx->partial.as_u8[ctx->n_partial_bytes] = 1;
+ _clib_poly1305_add_blocks (ctx, ctx->partial.as_u8, 16, 0);
+ }
+
+ h0 = ctx->h[0];
+ h1 = ctx->h[1];
+
+ /* h may not be fully reduced, try to subtract 2^128-5 */
+ c = 0;
+ t0 = u64_sub_with_borrow (&c, h0, p[0]);
+ t1 = u64_sub_with_borrow (&c, h1, p[1]);
+ u64_sub_with_borrow (&c, ctx->h[2], p[2]);
+
+ if (!c)
+ {
+ h0 = t0;
+ h1 = t1;
+ }
+
+ c = 0;
+ t[0] = u64_add_with_carry (&c, h0, s[0]);
+ t[1] = u64_add_with_carry (&c, h1, s[1]);
+}
+
+static_always_inline void
+clib_poly1305 (const u8 *key, const u8 *msg, uword len, u8 *out)
+{
+ clib_poly1305_ctx ctx;
+ clib_poly1305_init (&ctx, key);
+ clib_poly1305_update (&ctx, msg, len);
+ clib_poly1305_final (&ctx, out);
+}
+
+#endif /* __clib_poly1305_h__ */
diff --git a/src/vppinfra/crypto/sha2.h b/src/vppinfra/crypto/sha2.h
new file mode 100644
index 00000000000..69a24a2d087
--- /dev/null
+++ b/src/vppinfra/crypto/sha2.h
@@ -0,0 +1,715 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2024 Cisco Systems, Inc.
+ */
+
+#ifndef included_sha2_h
+#define included_sha2_h
+
+#include <vppinfra/clib.h>
+#include <vppinfra/vector.h>
+#include <vppinfra/string.h>
+
+#define SHA256_ROTR(x, y) ((x >> y) | (x << (32 - y)))
+#define SHA256_CH(a, b, c) ((a & b) ^ (~a & c))
+#define SHA256_MAJ(a, b, c) ((a & b) ^ (a & c) ^ (b & c))
+#define SHA256_CSIGMA0(x) \
+ (SHA256_ROTR (x, 2) ^ SHA256_ROTR (x, 13) ^ SHA256_ROTR (x, 22));
+#define SHA256_CSIGMA1(x) \
+ (SHA256_ROTR (x, 6) ^ SHA256_ROTR (x, 11) ^ SHA256_ROTR (x, 25));
+#define SHA256_SSIGMA0(x) (SHA256_ROTR (x, 7) ^ SHA256_ROTR (x, 18) ^ (x >> 3))
+#define SHA256_SSIGMA1(x) \
+ (SHA256_ROTR (x, 17) ^ SHA256_ROTR (x, 19) ^ (x >> 10))
+
+#define SHA256_MSG_SCHED(w, j) \
+ { \
+ w[j] = w[j - 7] + w[j - 16]; \
+ w[j] += SHA256_SSIGMA0 (w[j - 15]); \
+ w[j] += SHA256_SSIGMA1 (w[j - 2]); \
+ }
+
+#define SHA256_TRANSFORM(s, w, i, k) \
+ { \
+ __typeof__ (s[0]) t1, t2; \
+ t1 = k + w[i] + s[7]; \
+ t1 += SHA256_CSIGMA1 (s[4]); \
+ t1 += SHA256_CH (s[4], s[5], s[6]); \
+ t2 = SHA256_CSIGMA0 (s[0]); \
+ t2 += SHA256_MAJ (s[0], s[1], s[2]); \
+ s[7] = s[6]; \
+ s[6] = s[5]; \
+ s[5] = s[4]; \
+ s[4] = s[3] + t1; \
+ s[3] = s[2]; \
+ s[2] = s[1]; \
+ s[1] = s[0]; \
+ s[0] = t1 + t2; \
+ }
+
+#define SHA512_ROTR(x, y) ((x >> y) | (x << (64 - y)))
+#define SHA512_CH(a, b, c) ((a & b) ^ (~a & c))
+#define SHA512_MAJ(a, b, c) ((a & b) ^ (a & c) ^ (b & c))
+#define SHA512_CSIGMA0(x) \
+ (SHA512_ROTR (x, 28) ^ SHA512_ROTR (x, 34) ^ SHA512_ROTR (x, 39))
+#define SHA512_CSIGMA1(x) \
+ (SHA512_ROTR (x, 14) ^ SHA512_ROTR (x, 18) ^ SHA512_ROTR (x, 41))
+#define SHA512_SSIGMA0(x) (SHA512_ROTR (x, 1) ^ SHA512_ROTR (x, 8) ^ (x >> 7))
+#define SHA512_SSIGMA1(x) \
+ (SHA512_ROTR (x, 19) ^ SHA512_ROTR (x, 61) ^ (x >> 6))
+
+#define SHA512_MSG_SCHED(w, j) \
+ { \
+ w[j] = w[j - 7] + w[j - 16]; \
+ w[j] += SHA512_SSIGMA0 (w[j - 15]); \
+ w[j] += SHA512_SSIGMA1 (w[j - 2]); \
+ }
+
+#define SHA512_TRANSFORM(s, w, i, k) \
+ { \
+ __typeof__ (s[0]) t1, t2; \
+ t1 = k + w[i] + s[7]; \
+ t1 += SHA512_CSIGMA1 (s[4]); \
+ t1 += SHA512_CH (s[4], s[5], s[6]); \
+ t2 = SHA512_CSIGMA0 (s[0]); \
+ t2 += SHA512_MAJ (s[0], s[1], s[2]); \
+ s[7] = s[6]; \
+ s[6] = s[5]; \
+ s[5] = s[4]; \
+ s[4] = s[3] + t1; \
+ s[3] = s[2]; \
+ s[2] = s[1]; \
+ s[1] = s[0]; \
+ s[0] = t1 + t2; \
+ }
+
+#if defined(__SHA__) && defined(__x86_64__)
+#define CLIB_SHA256_ISA_INTEL
+#define CLIB_SHA256_ISA
+#endif
+
+#ifdef __ARM_FEATURE_SHA2
+#define CLIB_SHA256_ISA_ARM
+#define CLIB_SHA256_ISA
+#endif
+
+static const u32 sha224_h[8] = { 0xc1059ed8, 0x367cd507, 0x3070dd17,
+ 0xf70e5939, 0xffc00b31, 0x68581511,
+ 0x64f98fa7, 0xbefa4fa4 };
+
+static const u32 sha256_h[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372,
+ 0xa54ff53a, 0x510e527f, 0x9b05688c,
+ 0x1f83d9ab, 0x5be0cd19 };
+
+static const u32 clib_sha2_256_k[64] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1,
+ 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786,
+ 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147,
+ 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b,
+ 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a,
+ 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+static const u64 sha384_h[8] = { 0xcbbb9d5dc1059ed8, 0x629a292a367cd507,
+ 0x9159015a3070dd17, 0x152fecd8f70e5939,
+ 0x67332667ffc00b31, 0x8eb44a8768581511,
+ 0xdb0c2e0d64f98fa7, 0x47b5481dbefa4fa4 };
+
+static const u64 sha512_h[8] = { 0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
+ 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
+ 0x510e527fade682d1, 0x9b05688c2b3e6c1f,
+ 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179 };
+
+static const u64 sha512_224_h[8] = { 0x8c3d37c819544da2, 0x73e1996689dcd4d6,
+ 0x1dfab7ae32ff9c82, 0x679dd514582f9fcf,
+ 0x0f6d2b697bd44da8, 0x77e36f7304c48942,
+ 0x3f9d85a86a1d36c8, 0x1112e6ad91d692a1 };
+
+static const u64 sha512_256_h[8] = { 0x22312194fc2bf72c, 0x9f555fa3c84c64c2,
+ 0x2393b86b6f53b151, 0x963877195940eabd,
+ 0x96283ee2a88effe3, 0xbe5e1e2553863992,
+ 0x2b0199fc2c85b8aa, 0x0eb72ddc81c52ca2 };
+
+static const u64 clib_sha2_512_k[80] = {
+ 0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f,
+ 0xe9b5dba58189dbbc, 0x3956c25bf348b538, 0x59f111f1b605d019,
+ 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, 0xd807aa98a3030242,
+ 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
+ 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235,
+ 0xc19bf174cf692694, 0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
+ 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, 0x2de92c6f592b0275,
+ 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
+ 0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f,
+ 0xbf597fc7beef0ee4, 0xc6e00bf33da88fc2, 0xd5a79147930aa725,
+ 0x06ca6351e003826f, 0x142929670a0e6e70, 0x27b70a8546d22ffc,
+ 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
+ 0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6,
+ 0x92722c851482353b, 0xa2bfe8a14cf10364, 0xa81a664bbc423001,
+ 0xc24b8b70d0f89791, 0xc76c51a30654be30, 0xd192e819d6ef5218,
+ 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8,
+ 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99,
+ 0x34b0bcb5e19b48a8, 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
+ 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, 0x748f82ee5defb2fc,
+ 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec,
+ 0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915,
+ 0xc67178f2e372532b, 0xca273eceea26619c, 0xd186b8c721c0c207,
+ 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, 0x06f067aa72176fba,
+ 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b,
+ 0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc,
+ 0x431d67c49c100d4c, 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
+ 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
+};
+
+typedef enum
+{
+ CLIB_SHA2_224,
+ CLIB_SHA2_256,
+ CLIB_SHA2_384,
+ CLIB_SHA2_512,
+ CLIB_SHA2_512_224,
+ CLIB_SHA2_512_256,
+} clib_sha2_type_t;
+
+#define CLIB_SHA2_256_BLOCK_SIZE 64
+#define CLIB_SHA2_512_BLOCK_SIZE 128
+#define SHA2_MAX_BLOCK_SIZE CLIB_SHA2_512_BLOCK_SIZE
+#define SHA2_MAX_DIGEST_SIZE 64
+
+static const struct
+{
+ u8 block_size;
+ u8 digest_size;
+ const u32 *h32;
+ const u64 *h64;
+} clib_sha2_variants[] = {
+ [CLIB_SHA2_224] = {
+ .block_size = CLIB_SHA2_256_BLOCK_SIZE,
+ .digest_size = 28,
+ .h32 = sha224_h,
+ },
+ [CLIB_SHA2_256] = {
+ .block_size = CLIB_SHA2_256_BLOCK_SIZE,
+ .digest_size = 32,
+ .h32 = sha256_h,
+ },
+ [CLIB_SHA2_384] = {
+ .block_size = CLIB_SHA2_512_BLOCK_SIZE,
+ .digest_size = 48,
+ .h64 = sha384_h,
+ },
+ [CLIB_SHA2_512] = {
+ .block_size = CLIB_SHA2_512_BLOCK_SIZE,
+ .digest_size = 64,
+ .h64 = sha512_h,
+ },
+ [CLIB_SHA2_512_224] = {
+ .block_size = CLIB_SHA2_512_BLOCK_SIZE,
+ .digest_size = 28,
+ .h64 = sha512_224_h,
+ },
+ [CLIB_SHA2_512_256] = {
+ .block_size = CLIB_SHA2_512_BLOCK_SIZE,
+ .digest_size = 32,
+ .h64 = sha512_256_h,
+ },
+};
+
+typedef union
+{
+ u32 h32[8];
+ u64 h64[8];
+#ifdef CLIB_SHA256_ISA
+ u32x4 h32x4[2];
+#endif
+} clib_sha2_h_t;
+
+typedef struct
+{
+ u64 total_bytes;
+ u16 n_pending;
+ clib_sha2_h_t h;
+ union
+ {
+ u8 as_u8[SHA2_MAX_BLOCK_SIZE];
+ u64 as_u64[SHA2_MAX_BLOCK_SIZE / sizeof (u64)];
+ uword as_uword[SHA2_MAX_BLOCK_SIZE / sizeof (uword)];
+ } pending;
+} clib_sha2_state_t;
+
+typedef struct
+{
+ clib_sha2_type_t type;
+ u8 block_size;
+ u8 digest_size;
+ clib_sha2_state_t state;
+} clib_sha2_ctx_t;
+
+static_always_inline void
+clib_sha2_state_init (clib_sha2_state_t *state, clib_sha2_type_t type)
+{
+ clib_sha2_state_t st = {};
+
+ if (clib_sha2_variants[type].block_size == CLIB_SHA2_256_BLOCK_SIZE)
+ for (int i = 0; i < 8; i++)
+ st.h.h32[i] = clib_sha2_variants[type].h32[i];
+ else
+ for (int i = 0; i < 8; i++)
+ st.h.h64[i] = clib_sha2_variants[type].h64[i];
+
+ *state = st;
+}
+
+static_always_inline void
+clib_sha2_init (clib_sha2_ctx_t *ctx, clib_sha2_type_t type)
+{
+ clib_sha2_state_init (&ctx->state, type);
+ ctx->block_size = clib_sha2_variants[type].block_size;
+ ctx->digest_size = clib_sha2_variants[type].digest_size;
+ ctx->type = type;
+}
+
+#ifdef CLIB_SHA256_ISA
+static inline void
+clib_sha256_vec_cycle_w (u32x4 w[], u8 i)
+{
+ u8 j = (i + 1) % 4;
+ u8 k = (i + 2) % 4;
+ u8 l = (i + 3) % 4;
+#ifdef CLIB_SHA256_ISA_INTEL
+ w[i] = (u32x4) _mm_sha256msg1_epu32 ((__m128i) w[i], (__m128i) w[j]);
+ w[i] += (u32x4) _mm_alignr_epi8 ((__m128i) w[l], (__m128i) w[k], 4);
+ w[i] = (u32x4) _mm_sha256msg2_epu32 ((__m128i) w[i], (__m128i) w[l]);
+#elif defined(CLIB_SHA256_ISA_ARM)
+ w[i] = vsha256su1q_u32 (vsha256su0q_u32 (w[i], w[j]), w[k], w[l]);
+#endif
+}
+
+static inline void
+clib_sha256_vec_4_rounds (u32x4 w, u8 n, u32x4 s[])
+{
+#ifdef CLIB_SHA256_ISA_INTEL
+ u32x4 r = *(u32x4 *) (clib_sha2_256_k + 4 * n) + w;
+ s[0] = (u32x4) _mm_sha256rnds2_epu32 ((__m128i) s[0], (__m128i) s[1],
+ (__m128i) r);
+ r = (u32x4) u64x2_interleave_hi ((u64x2) r, (u64x2) r);
+ s[1] = (u32x4) _mm_sha256rnds2_epu32 ((__m128i) s[1], (__m128i) s[0],
+ (__m128i) r);
+#elif defined(CLIB_SHA256_ISA_ARM)
+ u32x4 r0, s0;
+ const u32x4u *k = (u32x4u *) clib_sha2_256_k;
+
+ r0 = w + k[n];
+ s0 = s[0];
+ s[0] = vsha256hq_u32 (s[0], s[1], r0);
+ s[1] = vsha256h2q_u32 (s[1], s0, r0);
+#endif
+}
+#endif
+
+#if defined(CLIB_SHA256_ISA)
+static inline u32x4
+clib_sha256_vec_load (u32x4 r)
+{
+#if defined(CLIB_SHA256_ISA_INTEL)
+ return u32x4_byte_swap (r);
+#elif defined(CLIB_SHA256_ISA_ARM)
+ return vreinterpretq_u32_u8 (vrev32q_u8 (vreinterpretq_u8_u32 (r)));
+#endif
+}
+
+static inline void
+clib_sha256_vec_shuffle (u32x4 d[2])
+{
+#if defined(CLIB_SHA256_ISA_INTEL)
+ /* {0, 1, 2, 3}, {4, 5, 6, 7} -> {7, 6, 3, 2}, {5, 4, 1, 0} */
+ u32x4 r;
+ r = (u32x4) _mm_shuffle_ps ((__m128) d[1], (__m128) d[0], 0xbb);
+ d[1] = (u32x4) _mm_shuffle_ps ((__m128) d[1], (__m128) d[0], 0x11);
+ d[0] = r;
+#endif
+}
+#endif
+
+static inline void
+clib_sha256_block (clib_sha2_state_t *st, const u8 *msg, uword n_blocks)
+{
+#if defined(CLIB_SHA256_ISA)
+ u32x4 h[2];
+ u32x4u *m = (u32x4u *) msg;
+
+ h[0] = st->h.h32x4[0];
+ h[1] = st->h.h32x4[1];
+
+ clib_sha256_vec_shuffle (h);
+
+ for (; n_blocks; m += 4, n_blocks--)
+ {
+ u32x4 s[2], w[4];
+
+ s[0] = h[0];
+ s[1] = h[1];
+
+ w[0] = clib_sha256_vec_load (m[0]);
+ w[1] = clib_sha256_vec_load (m[1]);
+ w[2] = clib_sha256_vec_load (m[2]);
+ w[3] = clib_sha256_vec_load (m[3]);
+
+ clib_sha256_vec_4_rounds (w[0], 0, s);
+ clib_sha256_vec_4_rounds (w[1], 1, s);
+ clib_sha256_vec_4_rounds (w[2], 2, s);
+ clib_sha256_vec_4_rounds (w[3], 3, s);
+
+ clib_sha256_vec_cycle_w (w, 0);
+ clib_sha256_vec_4_rounds (w[0], 4, s);
+ clib_sha256_vec_cycle_w (w, 1);
+ clib_sha256_vec_4_rounds (w[1], 5, s);
+ clib_sha256_vec_cycle_w (w, 2);
+ clib_sha256_vec_4_rounds (w[2], 6, s);
+ clib_sha256_vec_cycle_w (w, 3);
+ clib_sha256_vec_4_rounds (w[3], 7, s);
+
+ clib_sha256_vec_cycle_w (w, 0);
+ clib_sha256_vec_4_rounds (w[0], 8, s);
+ clib_sha256_vec_cycle_w (w, 1);
+ clib_sha256_vec_4_rounds (w[1], 9, s);
+ clib_sha256_vec_cycle_w (w, 2);
+ clib_sha256_vec_4_rounds (w[2], 10, s);
+ clib_sha256_vec_cycle_w (w, 3);
+ clib_sha256_vec_4_rounds (w[3], 11, s);
+
+ clib_sha256_vec_cycle_w (w, 0);
+ clib_sha256_vec_4_rounds (w[0], 12, s);
+ clib_sha256_vec_cycle_w (w, 1);
+ clib_sha256_vec_4_rounds (w[1], 13, s);
+ clib_sha256_vec_cycle_w (w, 2);
+ clib_sha256_vec_4_rounds (w[2], 14, s);
+ clib_sha256_vec_cycle_w (w, 3);
+ clib_sha256_vec_4_rounds (w[3], 15, s);
+
+ h[0] += s[0];
+ h[1] += s[1];
+ }
+
+ clib_sha256_vec_shuffle (h);
+
+ st->h.h32x4[0] = h[0];
+ st->h.h32x4[1] = h[1];
+#else
+ u32 w[64], s[8], i;
+ clib_sha2_h_t h;
+
+ h = st->h;
+
+ for (; n_blocks; msg += CLIB_SHA2_256_BLOCK_SIZE, n_blocks--)
+ {
+ for (i = 0; i < 8; i++)
+ s[i] = h.h32[i];
+
+ for (i = 0; i < 16; i++)
+ {
+ w[i] = clib_net_to_host_u32 ((((u32u *) msg)[i]));
+ SHA256_TRANSFORM (s, w, i, clib_sha2_256_k[i]);
+ }
+
+ for (i = 16; i < 64; i++)
+ {
+ SHA256_MSG_SCHED (w, i);
+ SHA256_TRANSFORM (s, w, i, clib_sha2_256_k[i]);
+ }
+
+ for (i = 0; i < 8; i++)
+ h.h32[i] += s[i];
+ }
+
+ st->h = h;
+#endif
+}
+
+static_always_inline void
+clib_sha512_block (clib_sha2_state_t *st, const u8 *msg, uword n_blocks)
+{
+ u64 w[80], s[8], i;
+ clib_sha2_h_t h;
+
+ h = st->h;
+
+ for (; n_blocks; msg += CLIB_SHA2_512_BLOCK_SIZE, n_blocks--)
+ {
+ for (i = 0; i < 8; i++)
+ s[i] = h.h64[i];
+
+ for (i = 0; i < 16; i++)
+ {
+ w[i] = clib_net_to_host_u64 ((((u64u *) msg)[i]));
+ SHA512_TRANSFORM (s, w, i, clib_sha2_512_k[i]);
+ }
+
+ for (i = 16; i < 80; i++)
+ {
+ SHA512_MSG_SCHED (w, i);
+ SHA512_TRANSFORM (s, w, i, clib_sha2_512_k[i]);
+ }
+
+ for (i = 0; i < 8; i++)
+ h.h64[i] += s[i];
+ }
+
+ st->h = h;
+}
+
+static_always_inline void
+clib_sha2_update_internal (clib_sha2_state_t *st, u8 block_size, const u8 *msg,
+ uword n_bytes)
+{
+ uword n_blocks;
+ if (st->n_pending)
+ {
+ uword n_left = block_size - st->n_pending;
+ if (n_bytes < n_left)
+ {
+ clib_memcpy_fast (st->pending.as_u8 + st->n_pending, msg, n_bytes);
+ st->n_pending += n_bytes;
+ return;
+ }
+ else
+ {
+ clib_memcpy_fast (st->pending.as_u8 + st->n_pending, msg, n_left);
+ if (block_size == CLIB_SHA2_512_BLOCK_SIZE)
+ clib_sha512_block (st, st->pending.as_u8, 1);
+ else
+ clib_sha256_block (st, st->pending.as_u8, 1);
+ st->n_pending = 0;
+ st->total_bytes += block_size;
+ n_bytes -= n_left;
+ msg += n_left;
+ }
+ }
+
+ if ((n_blocks = n_bytes / block_size))
+ {
+ if (block_size == CLIB_SHA2_512_BLOCK_SIZE)
+ clib_sha512_block (st, msg, n_blocks);
+ else
+ clib_sha256_block (st, msg, n_blocks);
+ n_bytes -= n_blocks * block_size;
+ msg += n_blocks * block_size;
+ st->total_bytes += n_blocks * block_size;
+ }
+
+ if (n_bytes)
+ {
+ clib_memset_u8 (st->pending.as_u8, 0, block_size);
+ clib_memcpy_fast (st->pending.as_u8, msg, n_bytes);
+ st->n_pending = n_bytes;
+ }
+ else
+ st->n_pending = 0;
+}
+
+static_always_inline void
+clib_sha2_update (clib_sha2_ctx_t *ctx, const u8 *msg, uword n_bytes)
+{
+ clib_sha2_update_internal (&ctx->state, ctx->block_size, msg, n_bytes);
+}
+
+static_always_inline void
+clib_sha2_final_internal (clib_sha2_state_t *st, u8 block_size, u8 digest_size,
+ u8 *digest)
+{
+ int i;
+
+ st->total_bytes += st->n_pending;
+ if (st->n_pending == 0)
+ {
+ clib_memset (st->pending.as_u8, 0, block_size);
+ st->pending.as_u8[0] = 0x80;
+ }
+ else if (st->n_pending + sizeof (u64) + sizeof (u8) > block_size)
+ {
+ st->pending.as_u8[st->n_pending] = 0x80;
+ if (block_size == CLIB_SHA2_512_BLOCK_SIZE)
+ clib_sha512_block (st, st->pending.as_u8, 1);
+ else
+ clib_sha256_block (st, st->pending.as_u8, 1);
+ clib_memset (st->pending.as_u8, 0, block_size);
+ }
+ else
+ st->pending.as_u8[st->n_pending] = 0x80;
+
+ st->pending.as_u64[block_size / 8 - 1] =
+ clib_net_to_host_u64 (st->total_bytes * 8);
+
+ if (block_size == CLIB_SHA2_512_BLOCK_SIZE)
+ {
+ clib_sha512_block (st, st->pending.as_u8, 1);
+ for (i = 0; i < digest_size / sizeof (u64); i++)
+ ((u64 *) digest)[i] = clib_net_to_host_u64 (st->h.h64[i]);
+
+ /* sha512-224 case - write half of u64 */
+ if (i * sizeof (u64) < digest_size)
+ ((u32 *) digest)[2 * i] = clib_net_to_host_u32 (st->h.h64[i] >> 32);
+ }
+ else
+ {
+ clib_sha256_block (st, st->pending.as_u8, 1);
+ for (i = 0; i < digest_size / sizeof (u32); i++)
+ *((u32 *) digest + i) = clib_net_to_host_u32 (st->h.h32[i]);
+ }
+}
+
+static_always_inline void
+clib_sha2_final (clib_sha2_ctx_t *ctx, u8 *digest)
+{
+ clib_sha2_final_internal (&ctx->state, ctx->block_size, ctx->digest_size,
+ digest);
+}
+
+static_always_inline void
+clib_sha2 (clib_sha2_type_t type, const u8 *msg, uword len, u8 *digest)
+{
+ clib_sha2_ctx_t ctx;
+ clib_sha2_init (&ctx, type);
+ clib_sha2_update (&ctx, msg, len);
+ clib_sha2_final (&ctx, digest);
+}
+
+#define clib_sha224(...) clib_sha2 (CLIB_SHA2_224, __VA_ARGS__)
+#define clib_sha256(...) clib_sha2 (CLIB_SHA2_256, __VA_ARGS__)
+#define clib_sha384(...) clib_sha2 (CLIB_SHA2_384, __VA_ARGS__)
+#define clib_sha512(...) clib_sha2 (CLIB_SHA2_512, __VA_ARGS__)
+#define clib_sha512_224(...) clib_sha2 (CLIB_SHA2_512_224, __VA_ARGS__)
+#define clib_sha512_256(...) clib_sha2 (CLIB_SHA2_512_256, __VA_ARGS__)
+
+/*
+ * HMAC
+ */
+
+typedef struct
+{
+ clib_sha2_h_t ipad_h;
+ clib_sha2_h_t opad_h;
+} clib_sha2_hmac_key_data_t;
+
+typedef struct
+{
+ clib_sha2_type_t type;
+ u8 block_size;
+ u8 digest_size;
+ clib_sha2_state_t ipad_state;
+ clib_sha2_state_t opad_state;
+} clib_sha2_hmac_ctx_t;
+
+static_always_inline void
+clib_sha2_hmac_key_data (clib_sha2_type_t type, const u8 *key, uword key_len,
+ clib_sha2_hmac_key_data_t *kd)
+{
+ u8 block_size = clib_sha2_variants[type].block_size;
+ u8 data[SHA2_MAX_BLOCK_SIZE] = {};
+ u8 ikey[SHA2_MAX_BLOCK_SIZE];
+ u8 okey[SHA2_MAX_BLOCK_SIZE];
+ clib_sha2_state_t ipad_state;
+ clib_sha2_state_t opad_state;
+
+ /* key */
+ if (key_len > block_size)
+ {
+ /* key is longer than block, calculate hash of key */
+ clib_sha2_ctx_t ctx;
+ clib_sha2_init (&ctx, type);
+ clib_sha2_update (&ctx, key, key_len);
+ clib_sha2_final (&ctx, (u8 *) data);
+ }
+ else
+ clib_memcpy_fast (data, key, key_len);
+
+ for (int i = 0, w = 0; w < block_size; w += sizeof (uword), i++)
+ {
+ ((uwordu *) ikey)[i] = ((uwordu *) data)[i] ^ 0x3636363636363636UL;
+ ((uwordu *) okey)[i] = ((uwordu *) data)[i] ^ 0x5c5c5c5c5c5c5c5cUL;
+ }
+
+ clib_sha2_state_init (&ipad_state, type);
+ clib_sha2_state_init (&opad_state, type);
+
+ if (block_size == CLIB_SHA2_512_BLOCK_SIZE)
+ {
+ clib_sha512_block (&ipad_state, ikey, 1);
+ clib_sha512_block (&opad_state, okey, 1);
+ }
+ else
+ {
+ clib_sha256_block (&ipad_state, ikey, 1);
+ clib_sha256_block (&opad_state, okey, 1);
+ }
+
+ kd->ipad_h = ipad_state.h;
+ kd->opad_h = opad_state.h;
+}
+
+static_always_inline void
+clib_sha2_hmac_init (clib_sha2_hmac_ctx_t *ctx, clib_sha2_type_t type,
+ clib_sha2_hmac_key_data_t *kd)
+{
+ u8 block_size = clib_sha2_variants[type].block_size;
+ u8 digest_size = clib_sha2_variants[type].digest_size;
+
+ *ctx = (clib_sha2_hmac_ctx_t) {
+ .type = type,
+ .block_size = block_size,
+ .digest_size = digest_size,
+ .ipad_state = {
+ .h = kd->ipad_h,
+ .total_bytes = block_size,
+ },
+ .opad_state = {
+ .h = kd->opad_h,
+ .total_bytes = block_size,
+ },
+ };
+}
+
+static_always_inline void
+clib_sha2_hmac_update (clib_sha2_hmac_ctx_t *ctx, const u8 *msg, uword len)
+{
+ clib_sha2_update_internal (&ctx->ipad_state, ctx->block_size, msg, len);
+}
+
+static_always_inline void
+clib_sha2_hmac_final (clib_sha2_hmac_ctx_t *ctx, u8 *digest)
+{
+ u8 i_digest[SHA2_MAX_DIGEST_SIZE];
+
+ clib_sha2_final_internal (&ctx->ipad_state, ctx->block_size,
+ ctx->digest_size, i_digest);
+ clib_sha2_update_internal (&ctx->opad_state, ctx->block_size, i_digest,
+ ctx->digest_size);
+ clib_sha2_final_internal (&ctx->opad_state, ctx->block_size,
+ ctx->digest_size, digest);
+}
+
+static_always_inline void
+clib_sha2_hmac (clib_sha2_type_t type, const u8 *key, uword key_len,
+ const u8 *msg, uword len, u8 *digest)
+{
+ clib_sha2_hmac_ctx_t _ctx, *ctx = &_ctx;
+ clib_sha2_hmac_key_data_t kd;
+
+ clib_sha2_hmac_key_data (type, key, key_len, &kd);
+ clib_sha2_hmac_init (ctx, type, &kd);
+ clib_sha2_hmac_update (ctx, msg, len);
+ clib_sha2_hmac_final (ctx, digest);
+}
+
+#define clib_hmac_sha224(...) clib_sha2_hmac (CLIB_SHA2_224, __VA_ARGS__)
+#define clib_hmac_sha256(...) clib_sha2_hmac (CLIB_SHA2_256, __VA_ARGS__)
+#define clib_hmac_sha384(...) clib_sha2_hmac (CLIB_SHA2_384, __VA_ARGS__)
+#define clib_hmac_sha512(...) clib_sha2_hmac (CLIB_SHA2_512, __VA_ARGS__)
+#define clib_hmac_sha512_224(...) \
+ clib_sha2_hmac (CLIB_SHA2_512_224, __VA_ARGS__)
+#define clib_hmac_sha512_256(...) \
+ clib_sha2_hmac (CLIB_SHA2_512_256, __VA_ARGS__)
+
+#endif /* included_sha2_h */
diff --git a/src/vppinfra/dlmalloc.c b/src/vppinfra/dlmalloc.c
index 36c80b09b87..5cdc6f6cc13 100644
--- a/src/vppinfra/dlmalloc.c
+++ b/src/vppinfra/dlmalloc.c
@@ -5,8 +5,8 @@
comments, complaints, performance data, etc to dl@cs.oswego.edu
*/
+#include <vppinfra/clib.h>
#include <vppinfra/dlmalloc.h>
-#include <vppinfra/sanitizer.h>
/*------------------------------ internal #includes ---------------------- */
@@ -460,7 +460,7 @@ static FORCEINLINE void x86_clear_lock(int* sl) {
#if !defined(USE_RECURSIVE_LOCKS) || USE_RECURSIVE_LOCKS == 0
/* Plain spin locks use single word (embedded in malloc_states) */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static int spin_acquire_lock(int *sl) {
int spins = 0;
while (*(volatile int *)sl != 0 || CAS_LOCK(sl)) {
@@ -1286,7 +1286,7 @@ static struct malloc_state _gm_;
((char*)(A) >= S->base && (char*)(A) < S->base + S->size)
/* Return segment holding given address */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static msegmentptr segment_holding(mstate m, char* addr) {
msegmentptr sp = &m->seg;
for (;;) {
@@ -1298,7 +1298,7 @@ static msegmentptr segment_holding(mstate m, char* addr) {
}
/* Return true if segment contains a segment link */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static int has_segment_link(mstate m, msegmentptr ss) {
msegmentptr sp = &m->seg;
for (;;) {
@@ -1616,7 +1616,7 @@ static size_t traverse_and_check(mstate m);
#if (FOOTERS && !INSECURE)
/* Check if (alleged) mstate m has expected magic field */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static inline int
ok_magic (const mstate m)
{
@@ -2083,7 +2083,7 @@ static void do_check_malloc_state(mstate m) {
/* ----------------------------- statistics ------------------------------ */
#if !NO_MALLINFO
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static struct dlmallinfo internal_mallinfo(mstate m) {
struct dlmallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
ensure_initialization();
@@ -2493,7 +2493,7 @@ static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb, int flags) {
/* -------------------------- mspace management -------------------------- */
/* Initialize top chunk and its size */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static void init_top(mstate m, mchunkptr p, size_t psize) {
/* Ensure alignment */
size_t offset = align_offset(chunk2mem(p));
@@ -2538,7 +2538,7 @@ static void reset_on_error(mstate m) {
#endif /* PROCEED_ON_ERROR */
/* Allocate chunk and prepend remainder with chunk in successor base. */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static void* prepend_alloc(mstate m, char* newbase, char* oldbase,
size_t nb) {
mchunkptr p = align_as_chunk(newbase);
@@ -2581,7 +2581,7 @@ static void* prepend_alloc(mstate m, char* newbase, char* oldbase,
}
/* Add a segment to hold a new noncontiguous region */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) {
/* Determine locations and sizes of segment, fenceposts, old top */
char* old_top = (char*)m->top;
@@ -2596,7 +2596,7 @@ static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) {
msegmentptr ss = (msegmentptr)(chunk2mem(sp));
mchunkptr tnext = chunk_plus_offset(sp, ssize);
mchunkptr p = tnext;
- int nfences = 0;
+ int __attribute__((unused)) nfences = 0;
/* reset top to new space */
init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
@@ -2637,7 +2637,7 @@ static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) {
/* -------------------------- System allocation -------------------------- */
/* Get memory from system using MORECORE or MMAP */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static void* sys_alloc(mstate m, size_t nb) {
char* tbase = CMFAIL;
size_t tsize = 0;
@@ -2852,7 +2852,7 @@ static void* sys_alloc(mstate m, size_t nb) {
/* ----------------------- system deallocation -------------------------- */
/* Unmap and unlink any mmapped segments that don't contain used chunks */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static size_t release_unused_segments(mstate m) {
size_t released = 0;
int nsegs = 0;
@@ -2900,7 +2900,7 @@ static size_t release_unused_segments(mstate m) {
return released;
}
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static int sys_trim(mstate m, size_t pad) {
size_t released = 0;
ensure_initialization();
@@ -2969,7 +2969,7 @@ static int sys_trim(mstate m, size_t pad) {
/* Consolidate and bin a chunk. Differs from exported versions
of free mainly in that the chunk need not be marked as inuse.
*/
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static void dispose_chunk(mstate m, mchunkptr p, size_t psize) {
mchunkptr next = chunk_plus_offset(p, psize);
if (!pinuse(p)) {
@@ -3041,7 +3041,7 @@ static void dispose_chunk(mstate m, mchunkptr p, size_t psize) {
/* ---------------------------- malloc --------------------------- */
/* allocate a large request from the best fitting chunk in a treebin */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static void* tmalloc_large(mstate m, size_t nb) {
tchunkptr v = 0;
size_t rsize = -nb; /* Unsigned negation */
@@ -3113,7 +3113,7 @@ static void* tmalloc_large(mstate m, size_t nb) {
}
/* allocate a small request from the best fitting chunk in a treebin */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static void* tmalloc_small(mstate m, size_t nb) {
tchunkptr t, v;
size_t rsize;
@@ -3420,7 +3420,7 @@ void* dlcalloc(size_t n_elements, size_t elem_size) {
/* ------------ Internal support for realloc, memalign, etc -------------- */
/* Try to realloc; only in-place unless can_move true */
-static mchunkptr try_realloc_chunk(mstate m, mchunkptr p, size_t nb,
+static __clib_nosanitize_addr mchunkptr try_realloc_chunk(mstate m, mchunkptr p, size_t nb,
int can_move) {
mchunkptr newp = 0;
size_t oldsize = chunksize(p);
@@ -3499,7 +3499,7 @@ static mchunkptr try_realloc_chunk(mstate m, mchunkptr p, size_t nb,
return newp;
}
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
static void* internal_memalign(mstate m, size_t alignment, size_t bytes) {
void* mem = 0;
if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */
@@ -4082,7 +4082,7 @@ int mspace_track_large_chunks(mspace msp, int enable) {
return ret;
}
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
size_t destroy_mspace(mspace msp) {
size_t freed = 0;
mstate ms = (mstate)msp;
@@ -4118,7 +4118,7 @@ void mspace_get_address_and_size (mspace msp, char **addrp, size_t *sizep)
*sizep = this_seg->size;
}
-CLIB_NOSANITIZE_ADDR __clib_export
+__clib_nosanitize_addr
int mspace_is_heap_object (mspace msp, void *p)
{
msegment *this_seg;
@@ -4144,7 +4144,7 @@ int mspace_is_heap_object (mspace msp, void *p)
return 0;
}
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
void *mspace_least_addr (mspace msp)
{
mstate ms = (mstate) msp;
@@ -4158,7 +4158,7 @@ void mspace_disable_expand (mspace msp)
disable_expand (ms);
}
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
int mspace_enable_disable_trace (mspace msp, int enable)
{
mstate ms = (mstate)msp;
@@ -4175,7 +4175,7 @@ int mspace_enable_disable_trace (mspace msp, int enable)
return (was_enabled);
}
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
int mspace_is_traced (mspace msp)
{
mstate ms = (mstate)msp;
@@ -4185,7 +4185,7 @@ int mspace_is_traced (mspace msp)
return 0;
}
-CLIB_NOSANITIZE_ADDR __clib_export
+__clib_nosanitize_addr
void* mspace_get_aligned (mspace msp,
unsigned long n_user_data_bytes,
unsigned long align,
@@ -4265,7 +4265,7 @@ void* mspace_get_aligned (mspace msp,
return (void *) searchp;
}
-CLIB_NOSANITIZE_ADDR __clib_export
+__clib_nosanitize_addr
void mspace_put (mspace msp, void *p_arg)
{
char *object_header;
@@ -4315,7 +4315,7 @@ void mspace_put_no_offset (mspace msp, void *p_arg)
mspace_free (msp, p_arg);
}
-CLIB_NOSANITIZE_ADDR __clib_export
+__clib_nosanitize_addr
size_t mspace_usable_size_with_delta (const void *p)
{
size_t usable_size;
@@ -4341,7 +4341,7 @@ size_t mspace_usable_size_with_delta (const void *p)
versions. This is not so nice but better than the alternatives.
*/
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
void* mspace_malloc(mspace msp, size_t bytes) {
mstate ms = (mstate)msp;
if (!ok_magic(ms)) {
@@ -4456,7 +4456,7 @@ void* mspace_malloc(mspace msp, size_t bytes) {
return 0;
}
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
void mspace_free(mspace msp, void* mem) {
if (mem != 0) {
mchunkptr p = mem2chunk(mem);
@@ -4623,6 +4623,7 @@ void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) {
return mem;
}
+__clib_nosanitize_addr
void* mspace_realloc_in_place(mspace msp, void* oldmem, size_t bytes) {
void* mem = 0;
if (oldmem != 0) {
@@ -4655,6 +4656,7 @@ void* mspace_realloc_in_place(mspace msp, void* oldmem, size_t bytes) {
return mem;
}
+__clib_nosanitize_addr
void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) {
mstate ms = (mstate)msp;
if (!ok_magic(ms)) {
@@ -4794,7 +4796,7 @@ size_t mspace_set_footprint_limit(mspace msp, size_t bytes) {
}
#if !NO_MALLINFO
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
struct dlmallinfo mspace_mallinfo(mspace msp) {
mstate ms = (mstate)msp;
if (!ok_magic(ms)) {
@@ -4804,7 +4806,7 @@ struct dlmallinfo mspace_mallinfo(mspace msp) {
}
#endif /* NO_MALLINFO */
-CLIB_NOSANITIZE_ADDR
+__clib_nosanitize_addr
size_t mspace_usable_size(const void* mem) {
if (mem != 0) {
mchunkptr p = mem2chunk(mem);
diff --git a/src/vppinfra/dlmalloc.h b/src/vppinfra/dlmalloc.h
index b8adf74831d..5fcaf7c30ca 100644
--- a/src/vppinfra/dlmalloc.h
+++ b/src/vppinfra/dlmalloc.h
@@ -1447,6 +1447,8 @@ DLMALLOC_EXPORT int mspace_trim(mspace msp, size_t pad);
*/
DLMALLOC_EXPORT int mspace_mallopt(int, int);
+DLMALLOC_EXPORT void* mspace_realloc_in_place (mspace msp, void *oldmem, size_t bytes);
+
DLMALLOC_EXPORT void* mspace_get_aligned (mspace msp,
unsigned long n_user_data_bytes,
unsigned long align,
diff --git a/src/vppinfra/elf.c b/src/vppinfra/elf.c
index 11fac45b27e..f660195e101 100644
--- a/src/vppinfra/elf.c
+++ b/src/vppinfra/elf.c
@@ -1357,7 +1357,7 @@ elf_read_file (elf_main_t * em, char *file_name)
goto done;
}
- CLIB_MEM_UNPOISON (data, mmap_length);
+ clib_mem_unpoison (data, mmap_length);
em->file_name = file_name;
@@ -1455,7 +1455,6 @@ static void
layout_sections (elf_main_t * em)
{
elf_section_t *s;
- u32 n_sections_with_changed_exec_address = 0;
u32 *deferred_symbol_and_string_sections = 0;
u32 n_deleted_sections = 0;
/* note: rebuild is always zero. Intent lost in the sands of time */
@@ -1614,7 +1613,6 @@ layout_sections (elf_main_t * em)
if (s->header.flags & ELF_SECTION_FLAG_ALLOC)
{
s->exec_address_change = exec_address - s->header.exec_address;
- n_sections_with_changed_exec_address += s->exec_address_change != 0;
s->header.exec_address = exec_address;
}
@@ -1704,7 +1702,6 @@ layout_sections (elf_main_t * em)
continue;
s_lo = s_hi = 0;
- /* *INDENT-OFF* */
clib_bitmap_foreach (si, g->section_index_bitmap) {
u64 lo, hi;
@@ -1729,7 +1726,6 @@ layout_sections (elf_main_t * em)
s_hi = hi;
}
}
- /* *INDENT-ON* */
if (n_sections == 0)
continue;
@@ -1977,7 +1973,7 @@ elf_create_section_with_contents (elf_main_t * em,
if ((p = hash_get_mem (em->section_by_name, section_name)))
{
s = vec_elt_at_index (em->sections, p[0]);
- _vec_len (s->contents) = 0;
+ vec_set_len (s->contents, 0);
c = s->contents;
}
else
diff --git a/src/vppinfra/elf.h b/src/vppinfra/elf.h
index cceb13e256b..56869f1b9c7 100644
--- a/src/vppinfra/elf.h
+++ b/src/vppinfra/elf.h
@@ -966,12 +966,10 @@ elf_get_section_contents (elf_main_t * em,
result = 0;
if (vec_len (s->contents) > 0)
{
+ vec_attr_t va = { .elt_sz = elt_size };
/* Make vector copy of contents with given element size. */
- result = _vec_resize (result,
- vec_len (s->contents) / elt_size,
- vec_len (s->contents),
- /* header_bytes */ 0,
- /* align */ 0);
+ result =
+ _vec_realloc_internal (result, vec_len (s->contents) / elt_size, &va);
clib_memcpy (result, s->contents, vec_len (s->contents));
}
diff --git a/src/vppinfra/elf_clib.c b/src/vppinfra/elf_clib.c
index d2865f800e3..d4d511e0fba 100644
--- a/src/vppinfra/elf_clib.c
+++ b/src/vppinfra/elf_clib.c
@@ -319,20 +319,33 @@ symbol_by_address_or_name (char *by_name,
return 0;
}
-uword
-clib_elf_symbol_by_name (char *by_name, clib_elf_symbol_t * s)
+__clib_export uword
+clib_elf_symbol_by_name (char *by_name, clib_elf_symbol_t *s)
{
return symbol_by_address_or_name (by_name, /* by_address */ 0, s);
}
-uword
-clib_elf_symbol_by_address (uword by_address, clib_elf_symbol_t * s)
+__clib_export uword
+clib_elf_symbol_by_address (uword by_address, clib_elf_symbol_t *s)
{
return symbol_by_address_or_name ( /* by_name */ 0, by_address, s);
}
-u8 *
-format_clib_elf_symbol (u8 * s, va_list * args)
+__clib_export const char *
+clib_elf_symbol_name (clib_elf_symbol_t *s)
+{
+ clib_elf_main_t *cem = &clib_elf_main;
+ elf_main_t *em;
+ elf_symbol_table_t *t;
+
+ em = vec_elt_at_index (cem->elf_mains, s->elf_main_index);
+ t = vec_elt_at_index (em->symbol_tables, s->symbol_table_index);
+
+ return (const char *) elf_symbol_name (t, &s->symbol);
+}
+
+__clib_export u8 *
+format_clib_elf_symbol (u8 *s, va_list *args)
{
clib_elf_main_t *cem = &clib_elf_main;
clib_elf_symbol_t *sym = va_arg (*args, clib_elf_symbol_t *);
diff --git a/src/vppinfra/elf_clib.h b/src/vppinfra/elf_clib.h
index 25b928c22a5..4e5d4d72165 100644
--- a/src/vppinfra/elf_clib.h
+++ b/src/vppinfra/elf_clib.h
@@ -131,6 +131,8 @@ typedef struct
uword clib_elf_symbol_by_name (char *name, clib_elf_symbol_t * result);
uword clib_elf_symbol_by_address (uword address, clib_elf_symbol_t * result);
+const char *clib_elf_symbol_name (clib_elf_symbol_t *result);
+
format_function_t format_clib_elf_symbol, format_clib_elf_symbol_with_address;
#endif /* included_clib_elf_self_h */
diff --git a/src/vppinfra/elog.c b/src/vppinfra/elog.c
index 8ae752eb6af..a0e5712aaab 100644
--- a/src/vppinfra/elog.c
+++ b/src/vppinfra/elog.c
@@ -494,7 +494,7 @@ elog_alloc_internal (elog_main_t * em, u32 n_events, int free_ring)
em->event_ring_size = n_events = max_pow2 (n_events);
vec_validate_aligned (em->event_ring, n_events, CLIB_CACHE_LINE_BYTES);
- _vec_len (em->event_ring) = n_events;
+ vec_set_len (em->event_ring, n_events);
}
__clib_export void
@@ -1198,7 +1198,7 @@ elog_write_file_not_inline (elog_main_t * em, char *clib_file, int flush_ring)
__clib_export clib_error_t *
elog_read_file_not_inline (elog_main_t * em, char *clib_file)
{
- serialize_main_t m;
+ serialize_main_t m = { 0 };
clib_error_t *error;
error = unserialize_open_clib_file (&m, clib_file);
diff --git a/src/vppinfra/error.c b/src/vppinfra/error.c
index b2b1a83e552..374b8b5256a 100644
--- a/src/vppinfra/error.c
+++ b/src/vppinfra/error.c
@@ -109,8 +109,8 @@ dispatch_message (u8 * msg)
}
__clib_export void
-_clib_error (int how_to_die,
- char *function_name, uword line_number, char *fmt, ...)
+_clib_error (int how_to_die, const char *function_name, uword line_number,
+ const char *fmt, ...)
{
u8 *msg = 0;
va_list va;
@@ -146,8 +146,8 @@ _clib_error (int how_to_die,
}
__clib_export clib_error_t *
-_clib_error_return (clib_error_t * errors,
- any code, uword flags, char *where, char *fmt, ...)
+_clib_error_return (clib_error_t *errors, any code, uword flags,
+ const char *where, const char *fmt, ...)
{
clib_error_t *e;
va_list va;
diff --git a/src/vppinfra/error.h b/src/vppinfra/error.h
index e0e2d4726b2..9eae8ea6818 100644
--- a/src/vppinfra/error.h
+++ b/src/vppinfra/error.h
@@ -85,10 +85,9 @@ extern void *clib_error_free_vector (clib_error_t * errors);
#define clib_error_free(e) e = clib_error_free_vector(e)
-extern clib_error_t *_clib_error_return (clib_error_t * errors,
- any code,
- uword flags,
- char *where, char *fmt, ...);
+extern clib_error_t *_clib_error_return (clib_error_t *errors, any code,
+ uword flags, const char *where,
+ const char *fmt, ...);
#define clib_error_return_code(e,code,flags,args...) \
_clib_error_return((e),(code),(flags),(char *)clib_error_function,args)
diff --git a/src/vppinfra/error_bootstrap.h b/src/vppinfra/error_bootstrap.h
index 185f4c6c4af..ae23d1bcca8 100644
--- a/src/vppinfra/error_bootstrap.h
+++ b/src/vppinfra/error_bootstrap.h
@@ -62,9 +62,8 @@ enum
/* Low level error reporting function.
Code specifies whether to call exit, abort or nothing at
all (for non-fatal warnings). */
-extern void _clib_error (int code,
- char *function_name,
- uword line_number, char *format, ...);
+extern void _clib_error (int code, const char *function_name,
+ uword line_number, const char *format, ...);
#define ASSERT(truth) \
do { \
diff --git a/src/vppinfra/fifo.c b/src/vppinfra/fifo.c
index 52d65ae1f37..2b1cfea6fe0 100644
--- a/src/vppinfra/fifo.c
+++ b/src/vppinfra/fifo.c
@@ -77,12 +77,16 @@
*/
__clib_export void *
-_clib_fifo_resize (void *v_old, uword n_new_elts, uword elt_bytes)
+_clib_fifo_resize (void *v_old, uword n_new_elts, uword align, uword elt_bytes)
{
- void *v_new, *end, *head;
- uword n_old_elts, header_bytes;
+ void *end, *head;
+ u8 *v_new = 0;
+ uword n_old_elts;
uword n_copy_bytes, n_zero_bytes;
clib_fifo_header_t *f_new, *f_old;
+ vec_attr_t va = { .elt_sz = elt_bytes,
+ .hdr_sz = sizeof (clib_fifo_header_t),
+ .align = align };
n_old_elts = clib_fifo_elts (v_old);
n_new_elts += n_old_elts;
@@ -91,15 +95,10 @@ _clib_fifo_resize (void *v_old, uword n_new_elts, uword elt_bytes)
else
n_new_elts = max_pow2 (n_new_elts);
- header_bytes = vec_header_bytes (sizeof (clib_fifo_header_t));
-
- v_new = clib_mem_alloc_no_fail (n_new_elts * elt_bytes + header_bytes);
- v_new += header_bytes;
-
+ v_new = _vec_alloc_internal (n_new_elts, &va);
f_new = clib_fifo_header (v_new);
f_new->head_index = 0;
f_new->tail_index = n_old_elts;
- _vec_len (v_new) = n_new_elts;
/* Copy old -> new. */
n_copy_bytes = n_old_elts * elt_bytes;
diff --git a/src/vppinfra/fifo.h b/src/vppinfra/fifo.h
index 5dc1b4512cf..b6a8b8f5c3b 100644
--- a/src/vppinfra/fifo.h
+++ b/src/vppinfra/fifo.h
@@ -54,7 +54,7 @@ typedef struct
always_inline clib_fifo_header_t *
clib_fifo_header (void *f)
{
- return vec_header (f, sizeof (clib_fifo_header_t));
+ return vec_header (f);
}
/* Aliases. */
@@ -91,31 +91,31 @@ clib_fifo_reset (void *v)
if (v)
{
f->head_index = f->tail_index = 0;
- _vec_len (v) = 0;
+ vec_set_len (v, 0);
}
}
/* External resize function. */
-void *_clib_fifo_resize (void *v, uword n_elts, uword elt_bytes);
+void *_clib_fifo_resize (void *v, uword n_elts, uword align, uword elt_bytes);
-#define clib_fifo_resize(f,n_elts) \
- f = _clib_fifo_resize ((f), (n_elts), sizeof ((f)[0]))
+#define clib_fifo_resize(f, n_elts) \
+ f = _clib_fifo_resize ((f), (n_elts), _vec_align (f, 0), _vec_elt_sz (f))
always_inline void *
-_clib_fifo_validate (void *v, uword n_elts, uword elt_bytes)
+_clib_fifo_validate (void *v, uword n_elts, uword align, uword elt_bytes)
{
if (clib_fifo_free_elts (v) < n_elts)
- v = _clib_fifo_resize (v, n_elts, elt_bytes);
+ v = _clib_fifo_resize (v, n_elts, align, elt_bytes);
return v;
}
-#define clib_fifo_validate(f,n_elts) \
- f = _clib_fifo_validate ((f), (n_elts), sizeof (f[0]))
+#define clib_fifo_validate(f, n_elts) \
+ f = _clib_fifo_validate ((f), (n_elts), _vec_align (f, 0), _vec_elt_sz (f))
/* Advance tail pointer by N_ELTS which can be either positive or negative. */
always_inline void *
-_clib_fifo_advance_tail (void *v, word n_elts, uword elt_bytes,
- uword * tail_return)
+_clib_fifo_advance_tail (void *v, word n_elts, uword align, uword elt_bytes,
+ uword *tail_return)
{
word i, l, n_free;
clib_fifo_header_t *f;
@@ -123,7 +123,7 @@ _clib_fifo_advance_tail (void *v, word n_elts, uword elt_bytes,
n_free = clib_fifo_free_elts (v);
if (n_free < n_elts)
{
- v = _clib_fifo_resize (v, n_elts, elt_bytes);
+ v = _clib_fifo_resize (v, n_elts, align, elt_bytes);
n_free = clib_fifo_free_elts (v);
}
@@ -158,12 +158,13 @@ _clib_fifo_advance_tail (void *v, word n_elts, uword elt_bytes,
return v;
}
-#define clib_fifo_advance_tail(f,n_elts) \
-({ \
- uword _i; \
- (f) = _clib_fifo_advance_tail ((f), (n_elts), sizeof ((f)[0]), &_i); \
- (f) + _i; \
-})
+#define clib_fifo_advance_tail(f, n_elts) \
+ ({ \
+ uword _i; \
+ (f) = _clib_fifo_advance_tail ((f), (n_elts), _vec_align (f, 0), \
+ _vec_elt_sz (f), &_i); \
+ (f) + _i; \
+ })
always_inline uword
clib_fifo_advance_head (void *v, uword n_elts)
@@ -189,36 +190,46 @@ clib_fifo_advance_head (void *v, uword n_elts)
}
/* Add given element to fifo. */
-#define clib_fifo_add1(f,e) \
-do { \
- uword _i; \
- (f) = _clib_fifo_advance_tail ((f), 1, sizeof ((f)[0]), &_i); \
- (f)[_i] = (e); \
-} while (0)
+#define clib_fifo_add1(f, e) \
+ do \
+ { \
+ uword _i; \
+ (f) = _clib_fifo_advance_tail ((f), 1, _vec_align (f, 0), \
+ _vec_elt_sz (f), &_i); \
+ (f)[_i] = (e); \
+ } \
+ while (0)
/* Add element to fifo; return pointer to new element. */
-#define clib_fifo_add2(f,p) \
-do { \
- uword _i; \
- (f) = _clib_fifo_advance_tail ((f), 1, sizeof ((f)[0]), &_i); \
- (p) = (f) + _i; \
-} while (0)
+#define clib_fifo_add2(f, p) \
+ do \
+ { \
+ uword _i; \
+ (f) = _clib_fifo_advance_tail ((f), 1, _vec_align (f, 0), \
+ _vec_elt_sz (f), &_i); \
+ (p) = (f) + _i; \
+ } \
+ while (0)
/* Add several elements to fifo. */
-#define clib_fifo_add(f,e,n) \
-do { \
- uword _i, _l; word _n0, _n1; \
- \
- _n0 = (n); \
- (f) = _clib_fifo_advance_tail ((f), _n0, sizeof ((f)[0]), &_i); \
- _l = clib_fifo_len (f); \
- _n1 = _i + _n0 - _l; \
- _n1 = _n1 < 0 ? 0 : _n1; \
- _n0 -= _n1; \
- clib_memcpy_fast ((f) + _i, (e), _n0 * sizeof ((f)[0])); \
- if (_n1) \
- clib_memcpy_fast ((f) + 0, (e) + _n0, _n1 * sizeof ((f)[0])); \
-} while (0)
+#define clib_fifo_add(f, e, n) \
+ do \
+ { \
+ uword _i, _l; \
+ word _n0, _n1; \
+ \
+ _n0 = (n); \
+ (f) = _clib_fifo_advance_tail ((f), _n0, _vec_align (f, 0), \
+ _vec_elt_sz (f), &_i); \
+ _l = clib_fifo_len (f); \
+ _n1 = _i + _n0 - _l; \
+ _n1 = _n1 < 0 ? 0 : _n1; \
+ _n0 -= _n1; \
+ clib_memcpy_fast ((f) + _i, (e), _n0 * sizeof ((f)[0])); \
+ if (_n1) \
+ clib_memcpy_fast ((f) + 0, (e) + _n0, _n1 * sizeof ((f)[0])); \
+ } \
+ while (0)
/* Subtract element from fifo. */
#define clib_fifo_sub1(f,e) \
@@ -254,7 +265,7 @@ clib_fifo_tail_index (void *v)
#define clib_fifo_head(v) ((v) + clib_fifo_head_index (v))
#define clib_fifo_tail(v) ((v) + clib_fifo_tail_index (v))
-#define clib_fifo_free(f) vec_free_h((f),sizeof(clib_fifo_header_t))
+#define clib_fifo_free(f) vec_free ((f))
always_inline uword
clib_fifo_elt_index (void *v, uword i)
@@ -274,7 +285,7 @@ clib_fifo_elt_index (void *v, uword i)
return result;
}
-#define clib_fifo_elt_at_index(v,i) ((v) + clib_fifo_elt_index (v, (i)))
+#define clib_fifo_elt_at_index(v, i) ((v) + (i))
#define clib_fifo_foreach(v,f,body) \
do { \
diff --git a/src/vppinfra/file.h b/src/vppinfra/file.h
index 09dd2fd0496..71956137665 100644
--- a/src/vppinfra/file.h
+++ b/src/vppinfra/file.h
@@ -163,6 +163,8 @@ clib_file_write (clib_file_t * f)
return f->write_function (f);
}
+u8 *clib_file_get_resolved_basename (char *fmt, ...);
+
#endif /* included_clib_file_h */
/*
diff --git a/src/vppinfra/format.c b/src/vppinfra/format.c
index ccd999e582f..cf17b8a1acb 100644
--- a/src/vppinfra/format.c
+++ b/src/vppinfra/format.c
@@ -114,7 +114,7 @@ justify (u8 * s, format_info_t * fi, uword s_len_orig)
l0 = l1;
if (l1 > l0)
- _vec_len (s) = l0;
+ vec_set_len (s, l0);
else if (l0 > l1)
{
uword n = l0 - l1;
diff --git a/src/vppinfra/format.h b/src/vppinfra/format.h
index 70882adac99..a1a70a2d64f 100644
--- a/src/vppinfra/format.h
+++ b/src/vppinfra/format.h
@@ -98,6 +98,7 @@ _(format_hex_bytes_no_wrap);
_(format_white_space);
_(format_f64);
_(format_time_interval);
+_ (format_duration);
#ifdef CLIB_UNIX
/* Unix specific formats. */
@@ -132,8 +133,11 @@ typedef struct _unformat_input_t
(and argument). */
uword (*fill_buffer) (struct _unformat_input_t * i);
- /* Return values for fill buffer function which indicate whether not
- input has been exhausted. */
+ /* User's function to be called on input_free */
+ void (*free) (struct _unformat_input_t *i);
+
+ /* Return values for fill buffer function which indicate whether not
+ input has been exhausted. */
#define UNFORMAT_END_OF_INPUT (~0)
#define UNFORMAT_MORE_INPUT 0
@@ -154,6 +158,8 @@ unformat_init (unformat_input_t * i,
always_inline void
unformat_free (unformat_input_t * i)
{
+ if (i->free)
+ i->free (i);
vec_free (i->buffer);
vec_free (i->buffer_marks);
clib_memset (i, 0, sizeof (i[0]));
@@ -199,6 +205,22 @@ unformat_put_input (unformat_input_t * input)
input->index -= 1;
}
+always_inline uword
+is_white_space (uword c)
+{
+ switch (c)
+ {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
/* Peek current input character without advancing. */
always_inline uword
unformat_peek_input (unformat_input_t * input)
@@ -242,8 +264,8 @@ uword va_unformat (unformat_input_t * i, const char *fmt, va_list * args);
void unformat_init_command_line (unformat_input_t * input, char *argv[]);
/* Setup for unformat of given string. */
-void unformat_init_string (unformat_input_t * input,
- char *string, int string_len);
+void unformat_init_string (unformat_input_t *input, const char *string,
+ int string_len);
always_inline void
unformat_init_cstring (unformat_input_t * input, char *string)
@@ -254,6 +276,12 @@ unformat_init_cstring (unformat_input_t * input, char *string)
/* Setup for unformat of given vector string; vector will be freed by unformat_string. */
void unformat_init_vector (unformat_input_t * input, u8 * vector_string);
+/* Unformat u8 */
+unformat_function_t unformat_u8;
+
+/* Unformat u16 */
+unformat_function_t unformat_u16;
+
/* Format function for unformat input usable when an unformat error
has occurred. */
u8 *format_unformat_error (u8 * s, va_list * va);
@@ -287,6 +315,16 @@ unformat_function_t unformat_eof;
/* Parse memory size e.g. 100, 100k, 100m, 100g. */
unformat_function_t unformat_memory_size;
+/* Unformat C string array, takes array length as 2nd argument */
+unformat_function_t unformat_c_string_array;
+
+/* Unformat sigle and double quoted string */
+unformat_function_t unformat_single_quoted_string;
+unformat_function_t unformat_double_quoted_string;
+
+/* Format base 10 e.g. 100, 100K, 100M, 100G */
+u8 *format_base10 (u8 *s, va_list *va);
+
/* Unparse memory size e.g. 100, 100k, 100m, 100g. */
u8 *format_memory_size (u8 * s, va_list * va);
@@ -301,12 +339,21 @@ u8 *format_c_identifier (u8 * s, va_list * va);
/* Format hexdump with both hex and printable chars - compatible with text2pcap */
u8 *format_hexdump (u8 * s, va_list * va);
+u8 *format_hexdump_u16 (u8 *s, va_list *va);
+u8 *format_hexdump_u32 (u8 *s, va_list *va);
+u8 *format_hexdump_u64 (u8 *s, va_list *va);
+
+/* Format bitmap of array of uword numbers */
+u8 *format_uword_bitmap (u8 *s, va_list *va);
/* Unix specific formats. */
#ifdef CLIB_UNIX
/* Setup input from Unix file. */
void unformat_init_clib_file (unformat_input_t * input, int file_descriptor);
+/* Setup input from flesystem path. */
+uword unformat_init_file (unformat_input_t *input, char *fmt, ...);
+
/* Take input from Unix environment variable; returns
1 if variable exists zero otherwise. */
uword unformat_init_unix_env (unformat_input_t * input, char *var);
diff --git a/src/plugins/perfmon/table.c b/src/vppinfra/format_table.c
index e3fc0982bb0..dd92e417acd 100644
--- a/src/plugins/perfmon/table.c
+++ b/src/vppinfra/format_table.c
@@ -22,7 +22,7 @@
*/
#include <vppinfra/format.h>
-#include "table.h"
+#include <vppinfra/format_table.h>
static table_text_attr_t default_title = {
.flags = TTAF_FG_COLOR_SET | TTAF_BOLD,
@@ -52,7 +52,7 @@ format_text_cell (table_t *t, u8 *s, table_cell_t *c, table_text_attr_t *def,
{
table_text_attr_t _a = {}, *a = &_a;
- if (a == 0)
+ if (c == 0)
return format (s, t->no_ansi ? "" : "\x1b[0m");
clib_memcpy (a, def, sizeof (table_text_attr_t));
@@ -64,12 +64,14 @@ format_text_cell (table_t *t, u8 *s, table_cell_t *c, table_text_attr_t *def,
{
a->fg_color = c->attr.fg_color;
a->flags |= TTAF_FG_COLOR_SET;
+ a->flags |= c->attr.flags & TTAF_FG_COLOR_BRIGHT;
}
if (c->attr.flags & TTAF_BG_COLOR_SET)
{
a->bg_color = c->attr.bg_color;
a->flags |= TTAF_BG_COLOR_SET;
+ a->flags |= c->attr.flags & TTAF_BG_COLOR_BRIGHT;
}
if (a->flags & TTAF_RESET)
@@ -123,20 +125,29 @@ format_table (u8 *s, va_list *args)
table_t *t = va_arg (*args, table_t *);
table_cell_t title_cell = { .text = t->title };
int table_width = 0;
+ u32 indent = format_get_indent (s);
for (int i = 0; i < vec_len (t->row_sizes); i++)
table_width += t->row_sizes[i];
- s = format_text_cell (t, s, &title_cell, &default_title, table_width);
- s = format (s, "\n");
+ if (t->title)
+ {
+ table_text_attr_t *title_default;
+ title_default =
+ t->default_title.as_u32 ? &t->default_title : &default_title;
+ s = format_text_cell (t, s, &title_cell, title_default, table_width);
+ s = format (s, "\n%U", format_white_space, indent);
+ }
for (int c = 0; c < vec_len (t->cells); c++)
{
table_text_attr_t *col_default;
if (c < t->n_header_cols)
- col_default = &default_header_col;
+ col_default = t->default_header_col.as_u32 ? &t->default_header_col :
+ &default_header_col;
else
- col_default = &default_body;
+ col_default =
+ t->default_body.as_u32 ? &t->default_body : &default_body;
for (int r = 0; r < vec_len (t->cells[c]); r++)
{
@@ -144,11 +155,14 @@ format_table (u8 *s, va_list *args)
if (r)
s = format (s, " ");
if (r < t->n_header_rows && c >= t->n_header_cols)
- row_default = &default_header_row;
+ row_default = t->default_header_row.as_u32 ?
+ &t->default_header_row :
+ &default_header_row;
s = format_text_cell (t, s, &t->cells[c][r], row_default,
t->row_sizes[r]);
}
- s = format (s, "\n");
+ if (c + 1 < vec_len (t->cells))
+ s = format (s, "\n%U", format_white_space, indent);
}
return s;
@@ -205,16 +219,24 @@ void
table_set_cell_fg_color (table_t *t, int c, int r, table_text_attr_color_t v)
{
table_cell_t *cell = table_get_cell (t, c, r);
- cell->attr.fg_color = v;
+ cell->attr.fg_color = v & 0x7;
cell->attr.flags |= TTAF_FG_COLOR_SET;
+ if (v & 8)
+ cell->attr.flags |= TTAF_FG_COLOR_BRIGHT;
+ else
+ cell->attr.flags &= ~TTAF_FG_COLOR_BRIGHT;
}
void
table_set_cell_bg_color (table_t *t, int c, int r, table_text_attr_color_t v)
{
table_cell_t *cell = table_get_cell (t, c, r);
- cell->attr.bg_color = v;
+ cell->attr.bg_color = v & 0x7;
cell->attr.flags |= TTAF_BG_COLOR_SET;
+ if (v & 8)
+ cell->attr.flags |= TTAF_BG_COLOR_BRIGHT;
+ else
+ cell->attr.flags &= ~TTAF_BG_COLOR_BRIGHT;
}
void
diff --git a/src/plugins/perfmon/table.h b/src/vppinfra/format_table.h
index 93102a033f0..4f4a7724b0e 100644
--- a/src/plugins/perfmon/table.h
+++ b/src/vppinfra/format_table.h
@@ -21,8 +21,8 @@
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#ifndef __table_h__
-#define __table_h__
+#ifndef __format_table_h__
+#define __format_table_h__
typedef enum
{
@@ -46,6 +46,14 @@ typedef enum
TTAC_MAGENTA = 5,
TTAC_CYAN = 6,
TTAC_WHITE = 7,
+ TTAC_BRIGHT_BLACK = 8,
+ TTAC_BRIGHT_RED = 9,
+ TTAC_BRIGHT_GREEN = 10,
+ TTAC_BRIGHT_YELLOW = 11,
+ TTAC_BRIGHT_BLUE = 12,
+ TTAC_BRIGHT_MAGENTA = 13,
+ TTAC_BRIGHT_CYAN = 14,
+ TTAC_BRIGHT_WHITE = 15,
} table_text_attr_color_t;
typedef enum
@@ -58,10 +66,17 @@ typedef enum
typedef struct
{
- table_text_attr_flags_t flags : 16;
- table_text_attr_color_t fg_color : 4;
- table_text_attr_color_t bg_color : 4;
- table_text_attr_align_t align : 4;
+ union
+ {
+ struct
+ {
+ table_text_attr_flags_t flags : 16;
+ table_text_attr_color_t fg_color : 4;
+ table_text_attr_color_t bg_color : 4;
+ table_text_attr_align_t align : 4;
+ };
+ u32 as_u32;
+ };
} table_text_attr_t;
typedef struct
@@ -79,20 +94,25 @@ typedef struct
int n_header_cols;
int n_header_rows;
int n_footer_cols;
+ table_text_attr_t default_title;
+ table_text_attr_t default_body;
+ table_text_attr_t default_header_col;
+ table_text_attr_t default_header_row;
} table_t;
-format_function_t format_table;
+__clib_export format_function_t format_table;
-void table_format_title (table_t *t, char *fmt, ...);
-void table_format_cell (table_t *t, int c, int r, char *fmt, ...);
-void table_set_cell_align (table_t *t, int c, int r,
- table_text_attr_align_t a);
-void table_set_cell_fg_color (table_t *t, int c, int r,
- table_text_attr_color_t v);
-void table_set_cell_bg_color (table_t *t, int c, int r,
- table_text_attr_color_t v);
-void table_free (table_t *t);
-void table_add_header_col (table_t *t, int n_strings, ...);
-void table_add_header_row (table_t *t, int n_strings, ...);
+__clib_export void table_format_title (table_t *t, char *fmt, ...);
+__clib_export void table_format_cell (table_t *t, int c, int r, char *fmt,
+ ...);
+__clib_export void table_set_cell_align (table_t *t, int c, int r,
+ table_text_attr_align_t a);
+__clib_export void table_set_cell_fg_color (table_t *t, int c, int r,
+ table_text_attr_color_t v);
+__clib_export void table_set_cell_bg_color (table_t *t, int c, int r,
+ table_text_attr_color_t v);
+__clib_export void table_free (table_t *t);
+__clib_export void table_add_header_col (table_t *t, int n_strings, ...);
+__clib_export void table_add_header_row (table_t *t, int n_strings, ...);
#endif
diff --git a/src/vppinfra/freebsd/mem.c b/src/vppinfra/freebsd/mem.c
new file mode 100644
index 00000000000..7d27a0dc169
--- /dev/null
+++ b/src/vppinfra/freebsd/mem.c
@@ -0,0 +1,471 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ * Copyright(c) 2024 Tom Jones <thj@freebsd.org>
+ */
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/memrange.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <vppinfra/clib.h>
+#include <vppinfra/mem.h>
+#include <vppinfra/lock.h>
+#include <vppinfra/time.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/format.h>
+#include <vppinfra/clib_error.h>
+
+#ifndef F_FBSD_SPECIFIC_BASE
+#define F_FBSD_SPECIFIC_BASE 1024
+#endif
+
+#ifndef F_ADD_SEALS
+#define F_ADD_SEALS (F_FBSD_SPECIFIC_BASE + 9)
+#define F_GET_SEALS (F_FBSD_SPECIFIC_BASE + 10)
+
+#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
+#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
+#define F_SEAL_GROW 0x0004 /* prevent file from growing */
+#define F_SEAL_WRITE 0x0008 /* prevent writes */
+#endif
+
+#ifndef MFD_HUGETLB
+#define MFD_HUGETLB 0x0004U
+#endif
+
+#ifndef MAP_HUGE_SHIFT
+#define MAP_HUGE_SHIFT 26
+#endif
+
+#ifndef MFD_HUGE_SHIFT
+#define MFD_HUGE_SHIFT 26
+#endif
+
+#ifndef MAP_FIXED_NOREPLACE
+#define MAP_FIXED_NOREPLACE MAP_FIXED
+#endif
+
+static void
+map_lock ()
+{
+ while (clib_atomic_test_and_set (&clib_mem_main.map_lock))
+ CLIB_PAUSE ();
+}
+
+static void
+map_unlock ()
+{
+ clib_atomic_release (&clib_mem_main.map_lock);
+}
+
+void
+clib_mem_main_init (void)
+{
+ clib_mem_main_t *mm = &clib_mem_main;
+ long sysconf_page_size;
+ uword page_size;
+ void *va;
+
+ if (mm->log2_page_sz != CLIB_MEM_PAGE_SZ_UNKNOWN)
+ return;
+
+ /* system page size */
+ sysconf_page_size = sysconf (_SC_PAGESIZE);
+ if (sysconf_page_size < 0)
+ {
+ clib_panic ("Could not determine the page size");
+ }
+ page_size = sysconf_page_size;
+ mm->log2_page_sz = min_log2 (page_size);
+
+ mm->log2_default_hugepage_sz = min_log2 (page_size);
+ mm->log2_sys_default_hugepage_sz = mm->log2_default_hugepage_sz;
+
+ /* numa nodes */
+ va = mmap (0, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ if (va == MAP_FAILED)
+ return;
+
+ if (mlock (va, page_size))
+ goto done;
+
+ /*
+ * TODO: In linux/mem.c we can move pages to numa domains, this isn't an
+ * option in FreeBSD yet.
+ */
+
+done:
+ munmap (va, page_size);
+}
+
+__clib_export u64
+clib_mem_get_fd_page_size (int fd)
+{
+ struct stat st = { 0 };
+ if (fstat (fd, &st) == -1)
+ return 0;
+ return st.st_blksize;
+}
+
+__clib_export clib_mem_page_sz_t
+clib_mem_get_fd_log2_page_size (int fd)
+{
+ uword page_size = clib_mem_get_fd_page_size (fd);
+ return page_size ? min_log2 (page_size) : CLIB_MEM_PAGE_SZ_UNKNOWN;
+}
+
+__clib_export void
+clib_mem_vm_randomize_va (uword *requested_va,
+ clib_mem_page_sz_t log2_page_size)
+{
+ /* TODO: Not yet implemented */
+}
+
+__clib_export int
+clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...)
+{
+ clib_mem_main_t *mm = &clib_mem_main;
+ int fd;
+ unsigned int memfd_flags;
+ va_list va;
+ u8 *s = 0;
+
+ if (log2_page_size == mm->log2_page_sz)
+ log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT;
+ else if (log2_page_size == mm->log2_sys_default_hugepage_sz)
+ log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT_HUGE;
+
+ switch (log2_page_size)
+ {
+ case CLIB_MEM_PAGE_SZ_UNKNOWN:
+ return CLIB_MEM_ERROR;
+ case CLIB_MEM_PAGE_SZ_DEFAULT:
+ memfd_flags = MFD_ALLOW_SEALING;
+ break;
+ case CLIB_MEM_PAGE_SZ_DEFAULT_HUGE:
+ memfd_flags = MFD_HUGETLB;
+ break;
+ default:
+ memfd_flags = MFD_HUGETLB | log2_page_size << MFD_HUGE_SHIFT;
+ }
+
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+
+ /* memfd_create maximum string size is 249 chars without trailing zero */
+ if (vec_len (s) > 249)
+ vec_set_len (s, 249);
+ vec_add1 (s, 0);
+
+ fd = memfd_create ((char *) s, memfd_flags);
+ if (fd == -1)
+ {
+ vec_reset_length (mm->error);
+ mm->error = clib_error_return_unix (mm->error, "memfd_create");
+ vec_free (s);
+ return CLIB_MEM_ERROR;
+ }
+
+ vec_free (s);
+
+ if ((memfd_flags & MFD_ALLOW_SEALING) &&
+ ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1))
+ {
+ vec_reset_length (mm->error);
+ mm->error = clib_error_return_unix (mm->error, "fcntl (F_ADD_SEALS)");
+ close (fd);
+ return CLIB_MEM_ERROR;
+ }
+
+ return fd;
+}
+
+uword
+clib_mem_vm_reserve (uword start, uword size, clib_mem_page_sz_t log2_page_sz)
+{
+ clib_mem_main_t *mm = &clib_mem_main;
+ uword pagesize = 1ULL << log2_page_sz;
+ uword sys_page_sz = 1ULL << mm->log2_page_sz;
+ uword n_bytes;
+ void *base = 0, *p;
+
+ size = round_pow2 (size, pagesize);
+
+ /* in adition of requested reservation, we also rserve one system page
+ * (typically 4K) adjacent to the start off reservation */
+
+ if (start)
+ {
+ /* start address is provided, so we just need to make sure we are not
+ * replacing existing map */
+ if (start & pow2_mask (log2_page_sz))
+ return ~0;
+ base = (void *) start - sys_page_sz;
+ base = mmap (base, size + sys_page_sz, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0);
+
+ return (base == MAP_FAILED) ? ~0 : start;
+ }
+
+ /* to make sure that we get reservation aligned to page_size we need to
+ * request one additional page as mmap will return us address which is
+ * aligned only to system page size */
+ base =
+ mmap (0, size + pagesize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (base == MAP_FAILED)
+ return ~0;
+
+ /* return additional space at the end of allocation */
+ p = base + size + pagesize;
+ n_bytes = (uword) p & pow2_mask (log2_page_sz);
+ if (n_bytes)
+ {
+ p -= n_bytes;
+ munmap (p, n_bytes);
+ }
+
+ /* return additional space at the start of allocation */
+ n_bytes = pagesize - sys_page_sz - n_bytes;
+ if (n_bytes)
+ {
+ munmap (base, n_bytes);
+ base += n_bytes;
+ }
+
+ return (uword) base + sys_page_sz;
+}
+
+__clib_export clib_mem_vm_map_hdr_t *
+clib_mem_vm_get_next_map_hdr (clib_mem_vm_map_hdr_t *hdr)
+{
+ /* TODO: Not yet implemented */
+ return NULL;
+}
+
+void *
+clib_mem_vm_map_internal (void *base, clib_mem_page_sz_t log2_page_sz,
+ uword size, int fd, uword offset, char *name)
+{
+ clib_mem_main_t *mm = &clib_mem_main;
+ clib_mem_vm_map_hdr_t *hdr;
+ uword sys_page_sz = 1ULL << mm->log2_page_sz;
+ int mmap_flags = MAP_FIXED, is_huge = 0;
+
+ if (fd != -1)
+ {
+ mmap_flags |= MAP_SHARED;
+ log2_page_sz = clib_mem_get_fd_log2_page_size (fd);
+ if (log2_page_sz > mm->log2_page_sz)
+ is_huge = 1;
+ }
+ else
+ {
+ mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
+
+ if (log2_page_sz == mm->log2_page_sz)
+ log2_page_sz = CLIB_MEM_PAGE_SZ_DEFAULT;
+
+ switch (log2_page_sz)
+ {
+ case CLIB_MEM_PAGE_SZ_UNKNOWN:
+ /* will fail later */
+ break;
+ case CLIB_MEM_PAGE_SZ_DEFAULT:
+ log2_page_sz = mm->log2_page_sz;
+ break;
+ case CLIB_MEM_PAGE_SZ_DEFAULT_HUGE:
+ /* We shouldn't be selecting HUGETLB on FreeBSD */
+ log2_page_sz = CLIB_MEM_PAGE_SZ_UNKNOWN;
+ break;
+ default:
+ log2_page_sz = mm->log2_page_sz;
+ break;
+ }
+ }
+
+ size = round_pow2 (size, 1ULL << log2_page_sz);
+
+ base = (void *) clib_mem_vm_reserve ((uword) base, size, log2_page_sz);
+
+ if (base == (void *) ~0)
+ return CLIB_MEM_VM_MAP_FAILED;
+
+ base = mmap (base, size, PROT_READ | PROT_WRITE, mmap_flags, fd, offset);
+
+ if (base == MAP_FAILED)
+ return CLIB_MEM_VM_MAP_FAILED;
+
+ if (is_huge && (mlock (base, size) != 0))
+ {
+ munmap (base, size);
+ return CLIB_MEM_VM_MAP_FAILED;
+ }
+
+ hdr = mmap (base - sys_page_sz, sys_page_sz, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+
+ if (hdr != base - sys_page_sz)
+ {
+ munmap (base, size);
+ return CLIB_MEM_VM_MAP_FAILED;
+ }
+
+ map_lock ();
+
+ if (mm->last_map)
+ {
+ mprotect (mm->last_map, sys_page_sz, PROT_READ | PROT_WRITE);
+ mm->last_map->next = hdr;
+ mprotect (mm->last_map, sys_page_sz, PROT_NONE);
+ }
+ else
+ mm->first_map = hdr;
+
+ clib_mem_unpoison (hdr, sys_page_sz);
+ hdr->next = 0;
+ hdr->prev = mm->last_map;
+ snprintf (hdr->name, CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1, "%s", (char *) name);
+ mm->last_map = hdr;
+
+ hdr->base_addr = (uword) base;
+ hdr->log2_page_sz = log2_page_sz;
+ hdr->num_pages = size >> log2_page_sz;
+ hdr->fd = fd;
+ hdr->name[CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1] = 0;
+ mprotect (hdr, sys_page_sz, PROT_NONE);
+
+ map_unlock ();
+
+ clib_mem_unpoison (base, size);
+ return base;
+}
+
+__clib_export int
+clib_mem_vm_unmap (void *base)
+{
+ clib_mem_main_t *mm = &clib_mem_main;
+ uword size, sys_page_sz = 1ULL << mm->log2_page_sz;
+ clib_mem_vm_map_hdr_t *hdr = base - sys_page_sz;
+ ;
+
+ map_lock ();
+ if (mprotect (hdr, sys_page_sz, PROT_READ | PROT_WRITE) != 0)
+ goto out;
+
+ size = hdr->num_pages << hdr->log2_page_sz;
+ if (munmap ((void *) hdr->base_addr, size) != 0)
+ goto out;
+
+ if (hdr->next)
+ {
+ mprotect (hdr->next, sys_page_sz, PROT_READ | PROT_WRITE);
+ hdr->next->prev = hdr->prev;
+ mprotect (hdr->next, sys_page_sz, PROT_NONE);
+ }
+ else
+ mm->last_map = hdr->prev;
+
+ if (hdr->prev)
+ {
+ mprotect (hdr->prev, sys_page_sz, PROT_READ | PROT_WRITE);
+ hdr->prev->next = hdr->next;
+ mprotect (hdr->prev, sys_page_sz, PROT_NONE);
+ }
+ else
+ mm->first_map = hdr->next;
+
+ map_unlock ();
+
+ if (munmap (hdr, sys_page_sz) != 0)
+ return CLIB_MEM_ERROR;
+
+ return 0;
+out:
+ map_unlock ();
+ return CLIB_MEM_ERROR;
+}
+
+__clib_export void
+clib_mem_get_page_stats (void *start, clib_mem_page_sz_t log2_page_size,
+ uword n_pages, clib_mem_page_stats_t *stats)
+{
+ int i, *status = 0;
+ void **ptr = 0;
+
+ log2_page_size = clib_mem_log2_page_size_validate (log2_page_size);
+
+ vec_validate (status, n_pages - 1);
+ vec_validate (ptr, n_pages - 1);
+
+ for (i = 0; i < n_pages; i++)
+ ptr[i] = start + (i << log2_page_size);
+
+ clib_memset (stats, 0, sizeof (clib_mem_page_stats_t));
+ stats->total = n_pages;
+ stats->log2_page_sz = log2_page_size;
+
+ /*
+ * TODO: Until FreeBSD has support for tracking pages in NUMA domains just
+ * return that all are unknown for the statsistics.
+ */
+ stats->unknown = n_pages;
+
+ vec_free (status);
+ vec_free (ptr);
+}
+
+__clib_export u64 *
+clib_mem_vm_get_paddr (void *mem, clib_mem_page_sz_t log2_page_size,
+ int n_pages)
+{
+ struct mem_extract meme;
+ int pagesize = sysconf (_SC_PAGESIZE);
+ int fd;
+ int i;
+ u64 *r = 0;
+
+ log2_page_size = clib_mem_log2_page_size_validate (log2_page_size);
+
+ if ((fd = open ((char *) "/dev/mem", O_RDONLY)) == -1)
+ return 0;
+
+ for (i = 0; i < n_pages; i++)
+ {
+ meme.me_vaddr = pointer_to_uword (mem) + (((u64) i) << log2_page_size);
+
+ if (ioctl (fd, MEM_EXTRACT_PADDR, &meme) == -1)
+ goto done;
+ vec_add1 (r, meme.me_paddr * pagesize);
+ }
+
+done:
+ close (fd);
+ if (vec_len (r) != n_pages)
+ {
+ vec_free (r);
+ return 0;
+ }
+ return r;
+}
+
+__clib_export int
+clib_mem_set_numa_affinity (u8 numa_node, int force)
+{
+ /* TODO: Not yet implemented */
+ return CLIB_MEM_ERROR;
+}
+
+__clib_export int
+clib_mem_set_default_numa_affinity ()
+{
+ /* TODO: Not yet implemented */
+ return 0;
+}
diff --git a/src/vppinfra/graph.c b/src/vppinfra/graph.c
deleted file mode 100644
index 4c92f8ef45f..00000000000
--- a/src/vppinfra/graph.c
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vppinfra/graph.h>
-
-/* Set link distance, creating link if not found. */
-u32
-graph_set_link (graph_t * g, u32 src, u32 dst, u32 distance)
-{
- graph_node_t *src_node, *dst_node;
- graph_link_t *l;
- u32 old_distance;
-
- /* The following validate will not work if src or dst are on the
- pool free list. */
- if (src < vec_len (g->nodes))
- ASSERT (!pool_is_free_index (g->nodes, src));
- if (dst < vec_len (g->nodes))
- ASSERT (!pool_is_free_index (g->nodes, dst));
-
- /* Make new (empty) nodes to make src and dst valid. */
- pool_validate_index (g->nodes, clib_max (src, dst));
-
- src_node = pool_elt_at_index (g->nodes, src);
- dst_node = pool_elt_at_index (g->nodes, dst);
-
- l = graph_dir_get_link_to_node (&src_node->next, dst);
- if (l)
- {
- old_distance = l->distance;
- l->distance = distance;
-
- l = graph_dir_get_link_to_node (&dst_node->prev, src);
- l->distance = distance;
- }
- else
- {
- uword li_next, li_prev;
-
- old_distance = ~0;
-
- li_next = graph_dir_add_link (&src_node->next, dst, distance);
- li_prev = graph_dir_add_link (&dst_node->prev, src, distance);
-
- l = vec_elt_at_index (src_node->next.links, li_next);
- l->link_to_self_index = li_prev;
-
- l = vec_elt_at_index (dst_node->prev.links, li_prev);
- l->link_to_self_index = li_next;
- }
-
- return old_distance;
-}
-
-void
-graph_del_link (graph_t * g, u32 src, u32 dst)
-{
- graph_node_t *src_node, *dst_node;
-
- src_node = pool_elt_at_index (g->nodes, src);
- dst_node = pool_elt_at_index (g->nodes, dst);
-
- graph_dir_del_link (&src_node->next, dst);
- graph_dir_del_link (&dst_node->next, src);
-}
-
-/* Delete source node and all links from other nodes from/to source. */
-uword
-graph_del_node (graph_t * g, u32 src)
-{
- graph_node_t *src_node, *n;
- uword index;
- graph_link_t *l;
-
- src_node = pool_elt_at_index (g->nodes, src);
-
- vec_foreach (l, src_node->next.links)
- {
- n = pool_elt_at_index (g->nodes, l->node_index);
- graph_dir_del_link (&n->prev, src);
- }
-
- vec_foreach (l, src_node->prev.links)
- {
- n = pool_elt_at_index (g->nodes, l->node_index);
- graph_dir_del_link (&n->next, src);
- }
-
- graph_dir_free (&src_node->next);
- graph_dir_free (&src_node->prev);
-
- index = src_node - g->nodes;
- pool_put (g->nodes, src_node);
- clib_memset (src_node, ~0, sizeof (src_node[0]));
-
- return index;
-}
-
-uword
-unformat_graph (unformat_input_t * input, va_list * args)
-{
- graph_t *g = va_arg (*args, graph_t *);
- typedef struct
- {
- u32 src, dst, distance;
- } T;
- T *links = 0, *l;
- uword result;
-
- while (1)
- {
- vec_add2 (links, l, 1);
- if (!unformat (input, "%d%d%d", &l->src, &l->dst, &l->distance))
- break;
- }
- _vec_len (links) -= 1;
- result = vec_len (links) > 0;
- vec_foreach (l, links)
- {
- graph_set_link (g, l->src, l->dst, l->distance);
- graph_set_link (g, l->dst, l->src, l->distance);
- }
-
- vec_free (links);
- return result;
-}
-
-u8 *
-format_graph_node (u8 * s, va_list * args)
-{
- graph_t *g = va_arg (*args, graph_t *);
- u32 node_index = va_arg (*args, u32);
-
- if (g->format_node)
- s = format (s, "%U", g->format_node, g, node_index);
- else
- s = format (s, "%d", node_index);
-
- return s;
-}
-
-u8 *
-format_graph (u8 * s, va_list * args)
-{
- graph_t *g = va_arg (*args, graph_t *);
- graph_node_t *n;
- graph_link_t *l;
- u32 indent = format_get_indent (s);
-
- s = format (s, "graph %d nodes", pool_elts (g->nodes));
- /* *INDENT-OFF* */
- pool_foreach (n, g->nodes) {
- s = format (s, "\n%U", format_white_space, indent + 2);
- s = format (s, "%U -> ", format_graph_node, g, n - g->nodes);
- vec_foreach (l, n->next.links)
- s = format (s, "%U (%d), ",
- format_graph_node, g, l->node_index,
- l->distance);
- }
- /* *INDENT-ON* */
-
- return s;
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/graph.h b/src/vppinfra/graph.h
deleted file mode 100644
index 1c26118f76c..00000000000
--- a/src/vppinfra/graph.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef included_clib_graph_h
-#define included_clib_graph_h
-
-#include <vppinfra/format.h>
-#include <vppinfra/hash.h>
-#include <vppinfra/pool.h>
-
-/* Generic graphs. */
-typedef struct
-{
- /* Next node along this link. */
- u32 node_index;
-
- /* Other direction link index to reach back to current node. */
- u32 link_to_self_index;
-
- /* Distance to next node. */
- u32 distance;
-} graph_link_t;
-
-/* Direction on graph: either next or previous. */
-typedef struct
-{
- /* Vector of links. */
- graph_link_t *links;
-
- /* Hash mapping node index to link which visits this node. */
- uword *link_index_by_node_index;
-} graph_dir_t;
-
-always_inline void
-graph_dir_free (graph_dir_t * d)
-{
- vec_free (d->links);
- hash_free (d->link_index_by_node_index);
-}
-
-always_inline graph_link_t *
-graph_dir_get_link_to_node (graph_dir_t * d, u32 node_index)
-{
- uword *p = hash_get (d->link_index_by_node_index, node_index);
- return p ? vec_elt_at_index (d->links, p[0]) : 0;
-}
-
-always_inline uword
-graph_dir_add_link (graph_dir_t * d, u32 node_index, u32 distance)
-{
- graph_link_t *l;
- ASSERT (!graph_dir_get_link_to_node (d, node_index));
- vec_add2 (d->links, l, 1);
- l->node_index = node_index;
- l->distance = distance;
- hash_set (d->link_index_by_node_index, node_index, l - d->links);
- return l - d->links;
-}
-
-always_inline void
-graph_dir_del_link (graph_dir_t * d, u32 node_index)
-{
- graph_link_t *l = graph_dir_get_link_to_node (d, node_index);
- uword li = l - d->links;
- uword n_links = vec_len (d->links);
-
- ASSERT (l != 0);
- hash_unset (d->link_index_by_node_index, node_index);
- n_links -= 1;
- if (li < n_links)
- d->links[li] = d->links[n_links];
- _vec_len (d->links) = n_links;
-}
-
-typedef struct
-{
- /* Nodes we are connected to plus distances. */
- graph_dir_t next, prev;
-} graph_node_t;
-
-typedef struct
-{
- /* Pool of nodes. */
- graph_node_t *nodes;
-
- void *opaque;
-
- format_function_t *format_node;
-} graph_t;
-
-/* Set link distance, creating link if not found. */
-u32 graph_set_link (graph_t * g, u32 src, u32 dst, u32 distance);
-
-always_inline void
-graph_set_bidirectional_link (graph_t * g, u32 src, u32 dst, u32 distance)
-{
- graph_set_link (g, src, dst, distance);
- graph_set_link (g, dst, src, distance);
-}
-
-void graph_del_link (graph_t * g, u32 src, u32 dst);
-uword graph_del_node (graph_t * g, u32 src);
-
-unformat_function_t unformat_graph;
-format_function_t format_graph;
-format_function_t format_graph_node;
-
-#endif /* included_clib_graph_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/hash.c b/src/vppinfra/hash.c
index fc6c4518048..0e650e67a90 100644
--- a/src/vppinfra/hash.c
+++ b/src/vppinfra/hash.c
@@ -77,237 +77,53 @@ set_is_user (void *v, uword i, uword is_user)
static u8 *hash_format_pair_default (u8 * s, va_list * args);
-#if uword_bits == 64
-
-static inline u64
-zap64 (u64 x, word n)
-{
-#define _(n) (((u64) 1 << (u64) (8*(n))) - (u64) 1)
- static u64 masks_little_endian[] = {
- 0, _(1), _(2), _(3), _(4), _(5), _(6), _(7),
- };
- static u64 masks_big_endian[] = {
- 0, ~_(7), ~_(6), ~_(5), ~_(4), ~_(3), ~_(2), ~_(1),
- };
-#undef _
- if (clib_arch_is_big_endian)
- return x & masks_big_endian[n];
- else
- return x & masks_little_endian[n];
-}
-
-/**
- * make address-sanitizer skip this:
- * clib_mem_unaligned + zap64 casts its input as u64, computes a mask
- * according to the input length, and returns the casted maked value.
- * Therefore all the 8 Bytes of the u64 are systematically read, which
- * rightfully causes address-sanitizer to raise an error on smaller inputs.
- *
- * However the invalid Bytes are discarded within zap64(), which is why
- * this can be silenced safely.
- *
- * The above is true *unless* the extra bytes cross a page boundary
- * into unmapped or no-access space, hence the boundary crossing check.
- */
-static inline u64
-hash_memory64 (void *p, word n_bytes, u64 state)
+__clib_export uword
+hash_memory (void *p, word n_bytes, uword state)
{
- u64 *q = p;
+ uword last[3] = {};
+ uwordu *q = p;
u64 a, b, c, n;
- int page_boundary_crossing;
- u64 start_addr, end_addr;
- union
- {
- u8 as_u8[8];
- u64 as_u64;
- } tmp;
-
- /*
- * If the request crosses a 4k boundary, it's not OK to assume
- * that the zap64 game is safe. 4k is the minimum known page size.
- */
- start_addr = (u64) p;
- end_addr = start_addr + n_bytes + 7;
- page_boundary_crossing = (start_addr >> 12) != (end_addr >> 12);
-
- a = b = 0x9e3779b97f4a7c13LL;
- c = state;
- n = n_bytes;
-
- while (n >= 3 * sizeof (u64))
- {
- a += clib_mem_unaligned (q + 0, u64);
- b += clib_mem_unaligned (q + 1, u64);
- c += clib_mem_unaligned (q + 2, u64);
- hash_mix64 (a, b, c);
- n -= 3 * sizeof (u64);
- q += 3;
- }
-
- c += n_bytes;
- switch (n / sizeof (u64))
- {
- case 2:
- a += clib_mem_unaligned (q + 0, u64);
- b += clib_mem_unaligned (q + 1, u64);
- if (n % sizeof (u64))
- {
- if (PREDICT_TRUE (page_boundary_crossing == 0))
- c +=
- zap64 (CLIB_MEM_OVERFLOW
- (clib_mem_unaligned (q + 2, u64), q + 2, sizeof (u64)),
- n % sizeof (u64)) << 8;
- else
- {
- clib_memcpy_fast (tmp.as_u8, q + 2, n % sizeof (u64));
- c += zap64 (tmp.as_u64, n % sizeof (u64)) << 8;
- }
- }
- break;
-
- case 1:
- a += clib_mem_unaligned (q + 0, u64);
- if (n % sizeof (u64))
- {
- if (PREDICT_TRUE (page_boundary_crossing == 0))
- b +=
- zap64 (CLIB_MEM_OVERFLOW
- (clib_mem_unaligned (q + 1, u64), q + 1, sizeof (u64)),
- n % sizeof (u64));
- else
- {
- clib_memcpy_fast (tmp.as_u8, q + 1, n % sizeof (u64));
- b += zap64 (tmp.as_u64, n % sizeof (u64));
- }
- }
- break;
- case 0:
- if (n % sizeof (u64))
- {
- if (PREDICT_TRUE (page_boundary_crossing == 0))
- a +=
- zap64 (CLIB_MEM_OVERFLOW
- (clib_mem_unaligned (q + 0, u64), q + 0, sizeof (u64)),
- n % sizeof (u64));
- else
- {
- clib_memcpy_fast (tmp.as_u8, q, n % sizeof (u64));
- a += zap64 (tmp.as_u64, n % sizeof (u64));
- }
- }
- break;
- }
-
- hash_mix64 (a, b, c);
-
- return c;
-}
-
-#else /* if uword_bits == 64 */
-
-static inline u32
-zap32 (u32 x, word n)
-{
-#define _(n) (((u32) 1 << (u32) (8*(n))) - (u32) 1)
- static u32 masks_little_endian[] = {
- 0, _(1), _(2), _(3),
- };
- static u32 masks_big_endian[] = {
- 0, ~_(3), ~_(2), ~_(1),
- };
-#undef _
- if (clib_arch_is_big_endian)
- return x & masks_big_endian[n];
- else
- return x & masks_little_endian[n];
-}
-
-static inline u32
-hash_memory32 (void *p, word n_bytes, u32 state)
-{
- u32 *q = p;
- u32 a, b, c, n;
-
- a = b = 0x9e3779b9;
+ a = b = (uword_bits == 64) ? 0x9e3779b97f4a7c13LL : 0x9e3779b9;
c = state;
n = n_bytes;
- while (n >= 3 * sizeof (u32))
+ while (n >= 3 * sizeof (uword))
{
- a += clib_mem_unaligned (q + 0, u32);
- b += clib_mem_unaligned (q + 1, u32);
- c += clib_mem_unaligned (q + 2, u32);
- hash_mix32 (a, b, c);
- n -= 3 * sizeof (u32);
+ a += q[0];
+ b += q[1];
+ c += q[2];
+ hash_mix (a, b, c);
+ n -= 3 * sizeof (uword);
q += 3;
}
c += n_bytes;
- switch (n / sizeof (u32))
- {
- case 2:
- a += clib_mem_unaligned (q + 0, u32);
- b += clib_mem_unaligned (q + 1, u32);
- if (n % sizeof (u32))
- c += zap32 (clib_mem_unaligned (q + 2, u32), n % sizeof (u32)) << 8;
- break;
-
- case 1:
- a += clib_mem_unaligned (q + 0, u32);
- if (n % sizeof (u32))
- b += zap32 (clib_mem_unaligned (q + 1, u32), n % sizeof (u32));
- break;
- case 0:
- if (n % sizeof (u32))
- a += zap32 (clib_mem_unaligned (q + 0, u32), n % sizeof (u32));
- break;
+ if (n > 0)
+ {
+ clib_memcpy_fast (&last, q, n);
+ a += last[0];
+ b += last[1];
+ c += last[2];
}
- hash_mix32 (a, b, c);
+ hash_mix (a, b, c);
return c;
}
-#endif
-
-__clib_export uword
-hash_memory (void *p, word n_bytes, uword state)
-{
- uword *q = p;
-
-#if uword_bits == 64
- return hash_memory64 (q, n_bytes, state);
-#else
- return hash_memory32 (q, n_bytes, state);
-#endif
-}
-#if uword_bits == 64
always_inline uword
hash_uword (uword x)
{
- u64 a, b, c;
+ uword a, b, c;
- a = b = 0x9e3779b97f4a7c13LL;
+ a = b = (uword_bits == 64) ? 0x9e3779b97f4a7c13LL : 0x9e3779b9;
c = 0;
a += x;
- hash_mix64 (a, b, c);
+ hash_mix (a, b, c);
return c;
}
-#else
-always_inline uword
-hash_uword (uword x)
-{
- u32 a, b, c;
-
- a = b = 0x9e3779b9;
- c = 0;
- a += x;
- hash_mix32 (a, b, c);
- return c;
-}
-#endif
/* Call sum function. Hash code will be sum function value
modulo the prime length of the hash table. */
@@ -469,9 +285,7 @@ set_indirect (void *v, hash_pair_indirect_t * pi, uword key,
new_len = len + 1;
if (new_len * hash_pair_bytes (h) > (1ULL << log2_bytes))
{
- pi->pairs = clib_mem_realloc (pi->pairs,
- 1ULL << (log2_bytes + 1),
- 1ULL << log2_bytes);
+ pi->pairs = clib_mem_realloc (pi->pairs, 1ULL << (log2_bytes + 1));
log2_bytes++;
}
@@ -528,7 +342,7 @@ unset_indirect (void *v, uword i, hash_pair_t * q)
else
zero_pair (h, q);
if (is_vec)
- _vec_len (pi->pairs) -= 1;
+ vec_dec_len (pi->pairs, 1);
else
indirect_pair_set (pi, indirect_pair_get_log2_bytes (pi), len - 1);
}
@@ -734,6 +548,7 @@ _hash_create (uword elts, hash_t * h_user)
hash_t *h;
uword log2_pair_size;
void *v;
+ vec_attr_t va = { .hdr_sz = sizeof (h[0]), .align = sizeof (hash_pair_t) };
/* Size of hash is power of 2 >= ELTS and larger than
number of bits in is_user bitmap elements. */
@@ -744,19 +559,19 @@ _hash_create (uword elts, hash_t * h_user)
if (h_user)
log2_pair_size = h_user->log2_pair_size;
- v = _vec_resize ((void *) 0,
- /* vec len: */ elts,
- /* data bytes: */
- (elts << log2_pair_size) * sizeof (hash_pair_t),
- /* header bytes: */
- sizeof (h[0]) +
- (elts / BITS (h->is_user[0])) * sizeof (h->is_user[0]),
- /* alignment */ sizeof (hash_pair_t));
+ va.elt_sz = (1 << log2_pair_size) * sizeof (hash_pair_t),
+ v = _vec_alloc_internal (elts, &va);
h = hash_header (v);
if (h_user)
- h[0] = h_user[0];
+ {
+ h[0] = h_user[0];
+ h->is_user = 0;
+ }
+ vec_validate_aligned (
+ h->is_user, ((elts / BITS (h->is_user[0])) * sizeof (h->is_user[0])) - 1,
+ CLIB_CACHE_LINE_BYTES);
h->log2_pair_size = log2_pair_size;
h->elts = 0;
@@ -796,6 +611,7 @@ _hash_free (void *v)
clib_mem_free (p->indirect.pairs);
}
+ vec_free (h->is_user);
vec_free_header (h);
return 0;
@@ -812,11 +628,9 @@ hash_resize_internal (void *old, uword new_size, uword free_old)
{
hash_t *h = old ? hash_header (old) : 0;
new = _hash_create (new_size, h);
- /* *INDENT-OFF* */
hash_foreach_pair (p, old, {
new = _hash_set3 (new, p->key, &p->value[0], 0);
});
- /* *INDENT-ON* */
}
if (free_old)
@@ -824,7 +638,7 @@ hash_resize_internal (void *old, uword new_size, uword free_old)
return new;
}
-void *
+__clib_export void *
hash_resize (void *old, uword new_size)
{
return hash_resize_internal (old, new_size, 1);
@@ -999,7 +813,7 @@ hash_bytes (void *v)
if (!v)
return 0;
- bytes = vec_capacity (v, hash_header_bytes (v));
+ bytes = vec_mem_size (v);
for (i = 0; i < hash_capacity (v); i++)
{
@@ -1009,7 +823,7 @@ hash_bytes (void *v)
if (h->log2_pair_size > 0)
bytes += 1 << indirect_pair_get_log2_bytes (&p->indirect);
else
- bytes += vec_capacity (p->indirect.pairs, 0);
+ bytes += vec_mem_size (p->indirect.pairs);
}
}
return bytes;
@@ -1068,11 +882,9 @@ format_hash (u8 *s, va_list *va)
if (verbose)
{
- /* *INDENT-OFF* */
hash_foreach_pair (p, v, {
s = format (s, " %U\n", h->format_pair, h->format_pair_arg, v, p);
});
- /* *INDENT-ON* */
}
return s;
diff --git a/src/vppinfra/hash.h b/src/vppinfra/hash.h
index e4a65d21e65..3c754c8e29f 100644
--- a/src/vppinfra/hash.h
+++ b/src/vppinfra/hash.h
@@ -93,24 +93,14 @@ typedef struct hash_header
/* Bit i is set if pair i is a user object (as opposed to being
either zero or an indirect array of pairs). */
- uword is_user[0];
+ uword *is_user;
} hash_t;
-/* Hash header size in bytes */
-always_inline uword
-hash_header_bytes (void *v)
-{
- hash_t *h;
- uword is_user_bytes =
- (sizeof (h->is_user[0]) * vec_len (v)) / BITS (h->is_user[0]);
- return sizeof (h[0]) + is_user_bytes;
-}
-
/* Returns a pointer to the hash header given the vector pointer */
always_inline hash_t *
hash_header (void *v)
{
- return vec_header (v, hash_header_bytes (v));
+ return vec_header (v);
}
/* Number of elements in the hash table */
@@ -133,8 +123,9 @@ always_inline uword
hash_is_user (void *v, uword i)
{
hash_t *h = hash_header (v);
- uword i0 = i / BITS (h->is_user[0]);
- uword i1 = i % BITS (h->is_user[0]);
+ uword bits = BITS (h->is_user[0]);
+ uword i0 = i / bits;
+ uword i1 = i % bits;
return (h->is_user[i0] & ((uword) 1 << i1)) != 0;
}
@@ -278,9 +269,20 @@ uword hash_bytes (void *v);
always_inline void
hash_set_mem_alloc (uword ** h, const void *key, uword v)
{
+ int objsize = __builtin_object_size (key, 0);
size_t ksz = hash_header (*h)->user;
- void *copy = clib_mem_alloc (ksz);
- clib_memcpy_fast (copy, key, ksz);
+ void *copy;
+ if (objsize > 0)
+ {
+ ASSERT (objsize == ksz);
+ copy = clib_mem_alloc (objsize);
+ clib_memcpy_fast (copy, key, objsize);
+ }
+ else
+ {
+ copy = clib_mem_alloc (ksz);
+ clib_memcpy_fast (copy, key, ksz);
+ }
hash_set_mem (*h, copy, v);
}
@@ -526,6 +528,12 @@ do { \
#define hash_mix64_step_3(a,b,c) hash_mix_step(a,b,c,35,49,11)
#define hash_mix64_step_4(a,b,c) hash_mix_step(a,b,c,12,18,22)
+#if uword_bits == 64
+#define hash_mix(a, b, c) hash_mix64 (a, b, c)
+#else
+#define hash_mix(a, b, c) hash_mix32 (a, b, c)
+#endif
+
/* Hash function based on that of Bob Jenkins (bob_jenkins@compuserve.com).
Thanks, Bob. */
#define hash_mix64(a0,b0,c0) \
diff --git a/src/vppinfra/heap.c b/src/vppinfra/heap.c
index bc22da1d8f6..7db814200f8 100644
--- a/src/vppinfra/heap.c
+++ b/src/vppinfra/heap.c
@@ -139,7 +139,7 @@ elt_delete (heap_header_t * h, heap_elt_t * e)
if (e < l)
vec_add1 (h->free_elts, e - h->elts);
else
- _vec_len (h->elts)--;
+ vec_dec_len (h->elts, 1);
}
/*
@@ -200,7 +200,7 @@ elt_new (heap_header_t * h)
if ((l = vec_len (h->free_elts)) > 0)
{
e = elt_at (h, h->free_elts[l - 1]);
- _vec_len (h->free_elts) -= 1;
+ vec_dec_len (h->free_elts, 1);
}
else
vec_add2 (h->elts, e, 1);
@@ -276,7 +276,7 @@ remove_free_block (void *v, uword b, uword i)
h->free_lists[b][i] = t;
set_free_elt (v, elt_at (h, t), i);
}
- _vec_len (h->free_lists[b]) = l - 1;
+ vec_set_len (h->free_lists[b], l - 1);
}
static heap_elt_t *
@@ -413,6 +413,9 @@ _heap_alloc (void *v,
if (!e)
{
uword max_len;
+ vec_attr_t va = { .elt_sz = elt_bytes,
+ .hdr_sz = sizeof (h[0]),
+ .align = HEAP_DATA_ALIGN };
offset = vec_len (v);
max_len = heap_get_max_len (v);
@@ -422,12 +425,9 @@ _heap_alloc (void *v,
h = heap_header (v);
if (!v || !(h->flags & HEAP_IS_STATIC))
- v = _vec_resize (v,
- align_size,
- (offset + align_size) * elt_bytes,
- sizeof (h[0]), HEAP_DATA_ALIGN);
+ v = _vec_realloc_internal (v, offset + align_size, &va);
else
- _vec_len (v) += align_size;
+ vec_inc_len (v, align_size);
if (offset == 0)
{
@@ -624,7 +624,7 @@ _heap_free (void *v)
vec_free (h->free_elts);
vec_free (h->small_free_elt_free_index);
if (!(h->flags & HEAP_IS_STATIC))
- vec_free_h (v, sizeof (h[0]));
+ vec_free (v);
return v;
}
@@ -640,10 +640,10 @@ heap_bytes (void *v)
bytes = sizeof (h[0]);
bytes += vec_len (v) * sizeof (h->elt_bytes);
for (b = 0; b < vec_len (h->free_lists); b++)
- bytes += vec_capacity (h->free_lists[b], 0);
+ bytes += vec_mem_size (h->free_lists[b]);
bytes += vec_bytes (h->free_lists);
- bytes += vec_capacity (h->elts, 0);
- bytes += vec_capacity (h->free_elts, 0);
+ bytes += vec_mem_size (h->elts);
+ bytes += vec_mem_size (h->free_elts);
bytes += vec_bytes (h->used_elt_bitmap);
return bytes;
diff --git a/src/vppinfra/heap.h b/src/vppinfra/heap.h
index 22fc335c072..45f3131a45b 100644
--- a/src/vppinfra/heap.h
+++ b/src/vppinfra/heap.h
@@ -160,13 +160,7 @@ typedef struct
always_inline heap_header_t *
heap_header (void *v)
{
- return vec_header (v, sizeof (heap_header_t));
-}
-
-always_inline uword
-heap_header_bytes ()
-{
- return vec_header_bytes (sizeof (heap_header_t));
+ return vec_header (v);
}
always_inline void
@@ -191,6 +185,9 @@ always_inline void *
_heap_dup (void *v_old, uword v_bytes)
{
heap_header_t *h_old, *h_new;
+ vec_attr_t va = { .align = HEAP_DATA_ALIGN,
+ .hdr_sz = sizeof (heap_header_t),
+ .elt_sz = 1 };
void *v_new;
h_old = heap_header (v_old);
@@ -198,10 +195,7 @@ _heap_dup (void *v_old, uword v_bytes)
if (!v_old)
return v_old;
- v_new = 0;
- v_new =
- _vec_resize (v_new, _vec_len (v_old), v_bytes, sizeof (heap_header_t),
- HEAP_DATA_ALIGN);
+ v_new = _vec_alloc_internal (_vec_len (v_old), &va);
h_new = heap_header (v_new);
heap_dup_header (h_old, h_new);
clib_memcpy_fast (v_new, v_old, v_bytes);
@@ -220,9 +214,10 @@ uword heap_bytes (void *v);
always_inline void *
_heap_new (u32 len, u32 n_elt_bytes)
{
- void *v = _vec_resize ((void *) 0, len, (uword) len * n_elt_bytes,
- sizeof (heap_header_t),
- HEAP_DATA_ALIGN);
+ vec_attr_t va = { .align = HEAP_DATA_ALIGN,
+ .hdr_sz = sizeof (heap_header_t),
+ .elt_sz = n_elt_bytes };
+ void *v = _vec_alloc_internal (len, &va);
heap_header (v)->elt_bytes = n_elt_bytes;
return v;
}
@@ -249,27 +244,6 @@ heap_get_max_len (void *v)
return v ? heap_header (v)->max_len : 0;
}
-/* Create fixed size heap with given block of memory. */
-always_inline void *
-heap_create_from_memory (void *memory, uword max_len, uword elt_bytes)
-{
- heap_header_t *h;
- void *v;
-
- if (max_len * elt_bytes < sizeof (h[0]))
- return 0;
-
- h = memory;
- clib_memset (h, 0, sizeof (h[0]));
- h->max_len = max_len;
- h->elt_bytes = elt_bytes;
- h->flags = HEAP_IS_STATIC;
-
- v = (void *) (memory + heap_header_bytes ());
- _vec_len (v) = 0;
- return v;
-}
-
/* Execute BODY for each allocated heap element. */
#define heap_foreach(var,len,heap,body) \
do { \
diff --git a/src/vppinfra/interrupt.c b/src/vppinfra/interrupt.c
index 20b7450ceed..c9f0078c5e4 100644
--- a/src/vppinfra/interrupt.c
+++ b/src/vppinfra/interrupt.c
@@ -1,43 +1,33 @@
-
-/*
- * Copyright (c) 2020 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
*/
#include <vppinfra/clib.h>
-#include <vppinfra/bitops.h> /* for count_set_bits */
-#include <vppinfra/vec.h>
#include <vppinfra/interrupt.h>
-#include <vppinfra/format.h>
__clib_export void
-clib_interrupt_init (void **data, uword n_int)
+clib_interrupt_init (void **data, u32 n_int)
{
clib_interrupt_header_t *h;
- uword sz = sizeof (clib_interrupt_header_t);
- uword data_size = round_pow2 (n_int, CLIB_CACHE_LINE_BYTES * 8) / 8;
+ const u32 bits_in_cl = 8 << CLIB_LOG2_CACHE_LINE_BYTES;
+ u32 sz = sizeof (clib_interrupt_header_t);
+ u32 n_cl = round_pow2 (n_int, bits_in_cl) / bits_in_cl;
- sz += 2 * data_size;
+ sz += 2 * n_cl * CLIB_CACHE_LINE_BYTES;
h = data[0] = clib_mem_alloc_aligned (sz, CLIB_CACHE_LINE_BYTES);
clib_memset (data[0], 0, sz);
h->n_int = n_int;
- h->n_uword_alloc = (data_size * 8) >> log2_uword_bits;
+ h->uwords_allocated = n_cl * bits_in_cl / uword_bits;
+ h->uwords_used = round_pow2 (n_int, uword_bits) / uword_bits;
+ h->local = (uword *) (h + 1);
+ h->remote = h->local + h->uwords_allocated;
}
__clib_export void
-clib_interrupt_resize (void **data, uword n_int)
+clib_interrupt_resize (void **data, u32 n_int)
{
clib_interrupt_header_t *h = data[0];
+ u32 new_n_uwords, i;
if (data[0] == 0)
{
@@ -45,48 +35,37 @@ clib_interrupt_resize (void **data, uword n_int)
return;
}
- if (n_int < h->n_int)
+ if (n_int == h->n_int)
+ return;
+
+ new_n_uwords = round_pow2 (n_int, uword_bits) / uword_bits;
+
+ if (new_n_uwords > h->uwords_allocated)
{
- uword *old_bmp, *old_abp, v;
- old_bmp = clib_interrupt_get_bitmap (data[0]);
- old_abp = clib_interrupt_get_atomic_bitmap (data[0]);
- for (uword i = 0; i < h->n_uword_alloc; i++)
- {
- v = old_abp[i];
- old_abp[i] = 0;
- if (n_int > ((i + 1) * uword_bits))
- old_bmp[i] |= v;
- else if (n_int > (i * uword_bits))
- old_bmp[i] = (old_bmp[i] | v) & pow2_mask (n_int - i * uword_bits);
- else
- old_bmp[i] = 0;
- }
+ clib_interrupt_header_t *nh;
+ clib_interrupt_init ((void **) &nh, n_int);
+ for (int i = 0; i < h->uwords_used; i++)
+ nh->local[i] = h->local[i] | h->remote[i];
+ clib_mem_free (data[0]);
+ data[0] = nh;
+ return;
}
- else if (n_int > h->n_uword_alloc * uword_bits)
- {
- void *old = data[0];
- uword *old_bmp, *old_abp, *new_bmp;
- uword n_uwords = round_pow2 (h->n_int, uword_bits) / uword_bits;
- clib_interrupt_init (data, n_int);
- h = data[0];
+ h->n_int = n_int;
+ h->uwords_used = new_n_uwords;
- new_bmp = clib_interrupt_get_bitmap (data[0]);
- old_bmp = clib_interrupt_get_bitmap (old);
- old_abp = clib_interrupt_get_atomic_bitmap (old);
+ for (i = 0; i < new_n_uwords; i++)
+ h->local[i] |= h->remote[i];
- for (uword i = 0; i < n_uwords; i++)
- new_bmp[i] = old_bmp[i] | old_abp[i];
+ for (i = 0; i < h->uwords_allocated; i++)
+ h->remote[i] = 0;
- clib_mem_free (old);
- }
- h->n_int = n_int;
+ for (i = new_n_uwords; i < h->uwords_allocated; i++)
+ h->local[i] = 0;
+
+ n_int &= pow2_mask (log2_uword_bits);
+
+ if (n_int)
+ h->local[n_int >> log2_uword_bits] &= pow2_mask (n_int);
}
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/interrupt.h b/src/vppinfra/interrupt.h
index 60c01fa0248..b0d7dde272a 100644
--- a/src/vppinfra/interrupt.h
+++ b/src/vppinfra/interrupt.h
@@ -1,34 +1,25 @@
-/*
- * Copyright (c) 2020 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2023 Cisco Systems, Inc.
*/
#ifndef included_clib_interrupt_h
#define included_clib_interrupt_h
#include <vppinfra/clib.h>
-#include <vppinfra/bitops.h> /* for count_set_bits */
#include <vppinfra/vec.h>
typedef struct
{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- int n_int;
- uword n_uword_alloc;
+ u32 n_int;
+ u32 uwords_allocated;
+ u32 uwords_used;
+ uword *local;
+ uword *remote;
} clib_interrupt_header_t;
-void clib_interrupt_init (void **data, uword n_interrupts);
-void clib_interrupt_resize (void **data, uword n_interrupts);
+void clib_interrupt_init (void **data, u32 n_interrupts);
+void clib_interrupt_resize (void **data, u32 n_interrupts);
static_always_inline void
clib_interrupt_free (void **data)
@@ -49,94 +40,98 @@ clib_interrupt_get_n_int (void *d)
return 0;
}
-static_always_inline uword *
-clib_interrupt_get_bitmap (void *d)
-{
- return d + sizeof (clib_interrupt_header_t);
-}
-
-static_always_inline uword *
-clib_interrupt_get_atomic_bitmap (void *d)
-{
- clib_interrupt_header_t *h = d;
- return clib_interrupt_get_bitmap (d) + h->n_uword_alloc;
-}
-
static_always_inline void
clib_interrupt_set (void *in, int int_num)
{
- uword *bmp = clib_interrupt_get_bitmap (in);
- uword mask = 1ULL << (int_num & (uword_bits - 1));
- bmp += int_num >> log2_uword_bits;
+ clib_interrupt_header_t *h = in;
+ u32 off = int_num >> log2_uword_bits;
+ uword bit = 1ULL << (int_num & pow2_mask (log2_uword_bits));
- ASSERT (int_num < ((clib_interrupt_header_t *) in)->n_int);
+ ASSERT (int_num < h->n_int);
- *bmp |= mask;
+ h->local[off] |= bit;
}
static_always_inline void
clib_interrupt_set_atomic (void *in, int int_num)
{
- uword *bmp = clib_interrupt_get_atomic_bitmap (in);
- uword mask = 1ULL << (int_num & (uword_bits - 1));
- bmp += int_num >> log2_uword_bits;
+ clib_interrupt_header_t *h = in;
+ u32 off = int_num >> log2_uword_bits;
+ uword bit = 1ULL << (int_num & pow2_mask (log2_uword_bits));
- ASSERT (int_num < ((clib_interrupt_header_t *) in)->n_int);
+ ASSERT (int_num < h->n_int);
- __atomic_fetch_or (bmp, mask, __ATOMIC_RELAXED);
+ __atomic_fetch_or (h->remote + off, bit, __ATOMIC_RELAXED);
}
static_always_inline void
clib_interrupt_clear (void *in, int int_num)
{
- uword *bmp = clib_interrupt_get_bitmap (in);
- uword *abm = clib_interrupt_get_atomic_bitmap (in);
- uword mask = 1ULL << (int_num & (uword_bits - 1));
- uword off = int_num >> log2_uword_bits;
+ clib_interrupt_header_t *h = in;
+ u32 off = int_num >> log2_uword_bits;
+ uword bit = 1ULL << (int_num & pow2_mask (log2_uword_bits));
+ uword *loc = h->local;
+ uword *rem = h->remote;
+ uword v;
- ASSERT (int_num < ((clib_interrupt_header_t *) in)->n_int);
+ ASSERT (int_num < h->n_int);
- bmp[off] |= __atomic_exchange_n (abm + off, 0, __ATOMIC_SEQ_CST);
- bmp[off] &= ~mask;
+ v = loc[off] | __atomic_exchange_n (rem + off, 0, __ATOMIC_SEQ_CST);
+ loc[off] = v & ~bit;
}
static_always_inline int
-clib_interrupt_get_next (void *in, int last)
+clib_interrupt_get_next_and_clear (void *in, int last)
{
- uword *bmp = clib_interrupt_get_bitmap (in);
- uword *abm = clib_interrupt_get_atomic_bitmap (in);
clib_interrupt_header_t *h = in;
- uword bmp_uword, off;
+ uword bit, v;
+ uword *loc = h->local;
+ uword *rem = h->remote;
+ u32 off, n_uwords = h->uwords_used;
- ASSERT (last >= -1 && last < h->n_int);
+ ASSERT (last >= -1 && last < (int) h->n_int);
off = (last + 1) >> log2_uword_bits;
- last -= off << log2_uword_bits;
- bmp[off] |= __atomic_exchange_n (abm + off, 0, __ATOMIC_SEQ_CST);
- bmp_uword = bmp[off] & ~pow2_mask (last + 1);
+ if (off >= n_uwords)
+ return -1;
-next:
- if (bmp_uword)
- return (off << log2_uword_bits) + count_trailing_zeros (bmp_uword);
+ v = loc[off] | __atomic_exchange_n (rem + off, 0, __ATOMIC_SEQ_CST);
+ loc[off] = v;
- off++;
+ v &= ~pow2_mask ((last + 1) & pow2_mask (log2_uword_bits));
- if (off > h->n_int >> log2_uword_bits)
- return -1;
+ while (v == 0)
+ {
+ if (++off == n_uwords)
+ return -1;
- bmp[off] |= __atomic_exchange_n (abm + off, 0, __ATOMIC_SEQ_CST);
- bmp_uword = bmp[off];
+ v = loc[off] | __atomic_exchange_n (rem + off, 0, __ATOMIC_SEQ_CST);
+ loc[off] = v;
+ }
- goto next;
+ bit = get_lowest_set_bit (v);
+ loc[off] &= ~bit;
+ return get_lowest_set_bit_index (bit) + (int) (off << log2_uword_bits);
}
-#endif /* included_clib_interrupt_h */
+static_always_inline int
+clib_interrupt_is_any_pending (void *in)
+{
+ clib_interrupt_header_t *h = in;
+ u32 n_uwords = h->uwords_used;
+ uword *loc = h->local;
+ uword *rem = h->remote;
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+ for (u32 i = 0; i < n_uwords; i++)
+ if (loc[i])
+ return 1;
+
+ for (u32 i = 0; i < n_uwords; i++)
+ if (rem[i])
+ return 1;
+
+ return 0;
+}
+
+#endif /* included_clib_interrupt_h */
diff --git a/src/vat2/jsonconvert.c b/src/vppinfra/jsonformat.c
index 645f7d4ca20..1aa3864be04 100644
--- a/src/vat2/jsonconvert.c
+++ b/src/vppinfra/jsonformat.c
@@ -18,19 +18,21 @@
#include <vnet/ip/ip6_packet.h>
#include <vnet/ip/ip_format_fns.h>
#include <vpp/api/types.h>
-#include "jsonconvert.h"
-
-#define _(T) \
-int vl_api_ ##T## _fromjson(cJSON *o, T *d) \
-{ \
- if (!cJSON_IsNumber(o)) return -1; \
- memcpy(d, &o->valueint, sizeof(T)); \
- return 0; \
-}
- foreach_vat2_fromjson
+#include "jsonformat.h"
+
+#define _(T) \
+ int vl_api_##T##_fromjson (cJSON *o, T *d) \
+ { \
+ if (!cJSON_IsNumber (o)) \
+ return -1; \
+ d[0] = (T) cJSON_GetNumberValue (o); \
+ return 0; \
+ }
+foreach_type_fromjson
#undef _
-int vl_api_bool_fromjson(cJSON *o, bool *d)
+ int
+ vl_api_bool_fromjson (cJSON *o, bool *d)
{
if (!cJSON_IsBool(o)) return -1;
*d = o->valueint ? true : false;
@@ -426,24 +428,6 @@ format_ip4_address (u8 * s, va_list * args)
return format (s, "%d.%d.%d.%d", a[0], a[1], a[2], a[3]);
}
-int
-vl_api_c_string_to_api_string (const char *buf, vl_api_string_t * str)
-{
- /* copy without nul terminator */
- u32 len = strlen (buf);
- if (len > 0)
- clib_memcpy_fast (str->buf, buf, len);
- str->length = htonl (len);
- return len + sizeof (u32);
-}
-
-u8 *
-format_vl_api_interface_index_t (u8 *s, va_list *args)
-{
- u32 *a = va_arg (*args, u32 *);
- return format (s, "%u", *a);
-}
-
void
vl_api_string_cJSON_AddToObject(cJSON * const object, const char * const name, vl_api_string_t *astr)
{
@@ -490,10 +474,6 @@ unformat_vl_api_timestamp_t(unformat_input_t * input, va_list * args)
{
return 0;
}
-u8 *format_vl_api_gbp_scope_t(u8 * s, va_list * args)
-{
- return 0;
-}
uword unformat_vl_api_gbp_scope_t(unformat_input_t * input, va_list * args)
{
return 0;
@@ -501,24 +481,15 @@ uword unformat_vl_api_gbp_scope_t(unformat_input_t * input, va_list * args)
cJSON *
vl_api_ip4_address_with_prefix_t_tojson (vl_api_ip4_prefix_t *a) {
- u8 *s = format(0, "%U", format_vl_api_ip4_address_t, a);
- cJSON *o = cJSON_CreateString((char *)s);
- vec_free(s);
- return o;
+ return vl_api_ip4_prefix_t_tojson (a);
}
cJSON *
vl_api_ip6_address_with_prefix_t_tojson (vl_api_ip6_prefix_t *a) {
- u8 *s = format(0, "%U", format_vl_api_ip6_address_t, a);
- cJSON *o = cJSON_CreateString((char *)s);
- vec_free(s);
- return o;
+ return vl_api_ip6_prefix_t_tojson (a);
}
cJSON *
vl_api_address_with_prefix_t_tojson (vl_api_prefix_t *a) {
- u8 *s = format(0, "%U", format_vl_api_address_t, a);
- cJSON *o = cJSON_CreateString((char *)s);
- vec_free(s);
- return o;
+ return vl_api_prefix_t_tojson (a);
}
u8 *
format_vl_api_mac_address_t (u8 * s, va_list * args)
@@ -536,5 +507,5 @@ format_vl_api_mac_address_t (u8 * s, va_list * args)
vec_free(s); \
return o; \
}
-foreach_vat2_tojson
+foreach_type_tojson
#undef _
diff --git a/src/vppinfra/jsonformat.h b/src/vppinfra/jsonformat.h
new file mode 100644
index 00000000000..062e4e188ef
--- /dev/null
+++ b/src/vppinfra/jsonformat.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2020 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_json_convert_h
+#define included_json_convert_h
+
+#include <stdbool.h>
+#include <vppinfra/cJSON.h>
+#include <vnet/ethernet/mac_address.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/ip_types.api_types.h>
+#include <vnet/ethernet/ethernet_types.api_types.h>
+
+#define foreach_type_fromjson \
+ _ (i8) \
+ _ (u8) \
+ _ (i16) \
+ _ (u16) \
+ _ (i32) \
+ _ (u32) \
+ _ (u64) \
+ _ (f64)
+
+#define _(T) CJSON_PUBLIC (int) vl_api_##T##_fromjson (cJSON *o, T *d);
+foreach_type_fromjson
+#undef _
+
+/* Prototypes */
+CJSON_PUBLIC (int) vl_api_bool_fromjson (cJSON *o, bool *d);
+CJSON_PUBLIC (int)
+vl_api_ip4_address_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_ip4_address_t *a);
+CJSON_PUBLIC (int)
+vl_api_ip4_prefix_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_ip4_prefix_t *a);
+CJSON_PUBLIC (int)
+vl_api_ip4_address_with_prefix_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_ip4_prefix_t *a);
+CJSON_PUBLIC (int)
+vl_api_ip6_address_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_ip6_address_t *a);
+CJSON_PUBLIC (int)
+vl_api_ip6_prefix_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_ip6_prefix_t *a);
+CJSON_PUBLIC (int)
+vl_api_ip6_address_with_prefix_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_ip6_prefix_t *a);
+CJSON_PUBLIC (int)
+vl_api_address_t_fromjson (void **mp, int *len, cJSON *o, vl_api_address_t *a);
+CJSON_PUBLIC (int)
+vl_api_prefix_t_fromjson (void **mp, int *len, cJSON *o, vl_api_prefix_t *a);
+CJSON_PUBLIC (int)
+vl_api_address_with_prefix_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_prefix_t *a);
+CJSON_PUBLIC (int)
+vl_api_mac_address_t_fromjson (void **mp, int *len, cJSON *o,
+ vl_api_mac_address_t *a);
+
+CJSON_PUBLIC (uword)
+unformat_ip4_address (unformat_input_t *input, va_list *args);
+CJSON_PUBLIC (uword)
+unformat_ip6_address (unformat_input_t *input, va_list *args);
+CJSON_PUBLIC (u8 *) format_ip6_address (u8 *s, va_list *args);
+CJSON_PUBLIC (uword)
+unformat_mac_address (unformat_input_t *input, va_list *args);
+CJSON_PUBLIC (u8 *) format_ip4_address (u8 *s, va_list *args);
+CJSON_PUBLIC (uword)
+unformat_vl_api_timedelta_t (unformat_input_t *input, va_list *args);
+CJSON_PUBLIC (uword)
+unformat_vl_api_timestamp_t (unformat_input_t *input, va_list *args);
+CJSON_PUBLIC (uword)
+unformat_vl_api_gbp_scope_t (unformat_input_t *input, va_list *args);
+
+CJSON_PUBLIC (void)
+vl_api_string_cJSON_AddToObject (cJSON *const object, const char *const name,
+ vl_api_string_t *astr);
+
+CJSON_PUBLIC (u8 *) u8string_fromjson (cJSON *o, char *fieldname);
+CJSON_PUBLIC (int) u8string_fromjson2 (cJSON *o, char *fieldname, u8 *data);
+CJSON_PUBLIC (int) vl_api_u8_string_fromjson (cJSON *o, u8 *s, int len);
+
+#define foreach_type_tojson \
+ _ (ip4_address) \
+ _ (ip4_prefix) \
+ _ (ip6_address) \
+ _ (ip6_prefix) \
+ _ (address) \
+ _ (prefix) \
+ _ (mac_address)
+
+#define _(T) CJSON_PUBLIC (cJSON *) vl_api_##T##_t_tojson (vl_api_##T##_t *);
+foreach_type_tojson
+#undef _
+
+CJSON_PUBLIC (cJSON *)
+ vl_api_ip4_address_with_prefix_t_tojson (vl_api_ip4_prefix_t *a);
+CJSON_PUBLIC (cJSON *)
+vl_api_ip6_address_with_prefix_t_tojson (vl_api_ip6_prefix_t *a);
+CJSON_PUBLIC (cJSON *)
+vl_api_address_with_prefix_t_tojson (vl_api_prefix_t *a);
+
+#endif
diff --git a/src/vppinfra/lb_hash_hash.h b/src/vppinfra/lb_hash_hash.h
index fb251591eeb..f355515bce4 100644
--- a/src/vppinfra/lb_hash_hash.h
+++ b/src/vppinfra/lb_hash_hash.h
@@ -24,11 +24,11 @@ static_always_inline u32
lb_hash_hash (u64 k0, u64 k1, u64 k2, u64 k3, u64 k4)
{
u64 val = 0;
- val = crc32_u64 (val, k0);
- val = crc32_u64 (val, k1);
- val = crc32_u64 (val, k2);
- val = crc32_u64 (val, k3);
- val = crc32_u64 (val, k4);
+ val = clib_crc32c_u64 (val, k0);
+ val = clib_crc32c_u64 (val, k1);
+ val = clib_crc32c_u64 (val, k2);
+ val = clib_crc32c_u64 (val, k3);
+ val = clib_crc32c_u64 (val, k4);
return (u32) val;
}
@@ -37,8 +37,8 @@ static_always_inline u32
lb_hash_hash_2_tuples (u64 k0, u32 k1)
{
u64 val = 0;
- val = crc32_u64 (val, k0);
- val = crc32_u32 (val, k1);
+ val = clib_crc32c_u64 (val, k0);
+ val = clib_crc32c_u32 (val, k1);
return (u32) val;
}
#else
diff --git a/src/vppinfra/linux/mem.c b/src/vppinfra/linux/mem.c
index 036890f9c8d..734f5c4788c 100644
--- a/src/vppinfra/linux/mem.c
+++ b/src/vppinfra/linux/mem.c
@@ -28,9 +28,9 @@
#include <vppinfra/mem.h>
#include <vppinfra/lock.h>
#include <vppinfra/time.h>
+#include <vppinfra/bitmap.h>
#include <vppinfra/format.h>
#include <vppinfra/clib_error.h>
-#include <vppinfra/linux/sysfs.h>
#ifndef F_LINUX_SPECIFIC_BASE
#define F_LINUX_SPECIFIC_BASE 1024
@@ -75,40 +75,6 @@ map_unlock ()
clib_atomic_release (&clib_mem_main.map_lock);
}
-__clib_export uword
-clib_mem_get_default_hugepage_size (void)
-{
- unformat_input_t input;
- static u32 size = 0;
- int fd;
-
- if (size)
- goto done;
-
- /*
- * If the kernel doesn't support hugepages, /proc/meminfo won't
- * say anything about it. Use the regular page size as a default.
- */
- size = clib_mem_get_page_size () / 1024;
-
- if ((fd = open ("/proc/meminfo", 0)) == -1)
- return 0;
-
- unformat_init_clib_file (&input, fd);
-
- while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (&input, "Hugepagesize:%_%u kB", &size))
- ;
- else
- unformat_skip_line (&input);
- }
- unformat_free (&input);
- close (fd);
-done:
- return 1024ULL * size;
-}
-
static clib_mem_page_sz_t
legacy_get_log2_default_hugepage_size (void)
{
@@ -133,9 +99,10 @@ legacy_get_log2_default_hugepage_size (void)
}
void
-clib_mem_main_init ()
+clib_mem_main_init (void)
{
clib_mem_main_t *mm = &clib_mem_main;
+ long sysconf_page_size;
uword page_size;
void *va;
int fd;
@@ -144,7 +111,12 @@ clib_mem_main_init ()
return;
/* system page size */
- page_size = sysconf (_SC_PAGESIZE);
+ sysconf_page_size = sysconf (_SC_PAGESIZE);
+ if (sysconf_page_size < 0)
+ {
+ clib_panic ("Could not determine the page size");
+ }
+ page_size = sysconf_page_size;
mm->log2_page_sz = min_log2 (page_size);
/* default system hugeppage size */
@@ -156,6 +128,8 @@ clib_mem_main_init ()
else /* likely kernel older than 4.14 */
mm->log2_default_hugepage_sz = legacy_get_log2_default_hugepage_size ();
+ mm->log2_sys_default_hugepage_sz = mm->log2_default_hugepage_sz;
+
/* numa nodes */
va = mmap (0, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE |
MAP_ANONYMOUS, -1, 0);
@@ -270,7 +244,7 @@ clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...)
if (log2_page_size == mm->log2_page_sz)
log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT;
- else if (log2_page_size == mm->log2_default_hugepage_sz)
+ else if (log2_page_size == mm->log2_sys_default_hugepage_sz)
log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT_HUGE;
switch (log2_page_size)
@@ -293,7 +267,7 @@ clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...)
/* memfd_create maximum string size is 249 chars without trailing zero */
if (vec_len (s) > 249)
- _vec_len (s) = 249;
+ vec_set_len (s, 249);
vec_add1 (s, 0);
/* memfd_create introduced in kernel 3.17, we don't support older kernels */
@@ -487,14 +461,12 @@ clib_mem_vm_map_internal (void *base, clib_mem_page_sz_t log2_page_sz,
else
mm->first_map = hdr;
- CLIB_MEM_UNPOISON (hdr, sys_page_sz);
+ clib_mem_unpoison (hdr, sys_page_sz);
hdr->next = 0;
hdr->prev = mm->last_map;
snprintf (hdr->name, CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1, "%s", (char *) name);
mm->last_map = hdr;
- map_unlock ();
-
hdr->base_addr = (uword) base;
hdr->log2_page_sz = log2_page_sz;
hdr->num_pages = size >> log2_page_sz;
@@ -502,7 +474,9 @@ clib_mem_vm_map_internal (void *base, clib_mem_page_sz_t log2_page_sz,
hdr->name[CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1] = 0;
mprotect (hdr, sys_page_sz, PROT_NONE);
- CLIB_MEM_UNPOISON (base, size);
+ map_unlock ();
+
+ clib_mem_unpoison (base, size);
return base;
}
@@ -640,8 +614,8 @@ __clib_export int
clib_mem_set_numa_affinity (u8 numa_node, int force)
{
clib_mem_main_t *mm = &clib_mem_main;
- long unsigned int mask[16] = { 0 };
- int mask_len = sizeof (mask) * 8 + 1;
+ clib_bitmap_t *bmp = 0;
+ int rv;
/* no numa support */
if (mm->numa_node_bitmap == 0)
@@ -657,19 +631,21 @@ clib_mem_set_numa_affinity (u8 numa_node, int force)
return 0;
}
- mask[0] = 1 << numa_node;
+ bmp = clib_bitmap_set (bmp, numa_node, 1);
- if (syscall (__NR_set_mempolicy, force ? MPOL_BIND : MPOL_PREFERRED, mask,
- mask_len))
- goto error;
+ rv = syscall (__NR_set_mempolicy, force ? MPOL_BIND : MPOL_PREFERRED, bmp,
+ vec_len (bmp) * sizeof (bmp[0]) * 8 + 1);
+ clib_bitmap_free (bmp);
vec_reset_length (mm->error);
- return 0;
-error:
- vec_reset_length (mm->error);
- mm->error = clib_error_return_unix (mm->error, (char *) __func__);
- return CLIB_MEM_ERROR;
+ if (rv)
+ {
+ mm->error = clib_error_return_unix (mm->error, (char *) __func__);
+ return CLIB_MEM_ERROR;
+ }
+
+ return 0;
}
__clib_export int
diff --git a/src/vppinfra/linux/sysfs.c b/src/vppinfra/linux/sysfs.c
index 758eaa1a86c..61ee6378c8c 100644
--- a/src/vppinfra/linux/sysfs.c
+++ b/src/vppinfra/linux/sysfs.c
@@ -70,7 +70,7 @@ clib_sysfs_read (char *file_name, char *fmt, ...)
return clib_error_return_unix (0, "read `%s'", file_name);
}
- _vec_len (s) = sz;
+ vec_set_len (s, sz);
unformat_init_vector (&input, s);
va_list va;
@@ -87,32 +87,6 @@ clib_sysfs_read (char *file_name, char *fmt, ...)
return 0;
}
-__clib_export u8 *
-clib_sysfs_link_to_name (char *link)
-{
- char *p, buffer[64];
- unformat_input_t in;
- u8 *s = 0;
- int r;
-
- r = readlink (link, buffer, sizeof (buffer) - 1);
-
- if (r < 0)
- return 0;
-
- buffer[r] = 0;
- p = strrchr (buffer, '/');
-
- if (!p)
- return 0;
-
- unformat_init_string (&in, p + 1, strlen (p + 1));
- if (unformat (&in, "%s", &s) != 1)
- clib_unix_warning ("no string?");
- unformat_free (&in);
-
- return s;
-}
clib_error_t *
clib_sysfs_set_nr_hugepages (int numa_node, int log2_page_size, int nr)
@@ -154,7 +128,7 @@ clib_sysfs_set_nr_hugepages (int numa_node, int log2_page_size, int nr)
goto done;
}
- _vec_len (p) -= 1;
+ vec_dec_len (p, 1);
p = format (p, "/hugepages/hugepages-%ukB/nr_hugepages%c", page_size, 0);
clib_sysfs_write ((char *) p, "%d", nr);
@@ -207,7 +181,7 @@ clib_sysfs_get_xxx_hugepages (char *type, int numa_node,
goto done;
}
- _vec_len (p) -= 1;
+ vec_dec_len (p, 1);
p = format (p, "/hugepages/hugepages-%ukB/%s_hugepages%c", page_size,
type, 0);
error = clib_sysfs_read ((char *) p, "%d", val);
@@ -263,13 +237,21 @@ clib_sysfs_prealloc_hugepages (int numa_node, int log2_page_size, int nr)
return clib_sysfs_set_nr_hugepages (numa_node, log2_page_size, n + needed);
}
-__clib_export uword *
-clib_sysfs_list_to_bitmap (char *filename)
+__clib_export clib_bitmap_t *
+clib_sysfs_read_bitmap (char *fmt, ...)
{
FILE *fp;
uword *r = 0;
+ va_list va;
+ u8 *filename;
+
+ va_start (va, fmt);
+ filename = va_format (0, fmt, &va);
+ va_end (va);
+ vec_add1 (filename, 0);
- fp = fopen (filename, "r");
+ fp = fopen ((char *) filename, "r");
+ vec_free (filename);
if (fp != NULL)
{
diff --git a/src/vppinfra/linux/sysfs.h b/src/vppinfra/linux/sysfs.h
index 9cbc34823dd..f2f822d9741 100644
--- a/src/vppinfra/linux/sysfs.h
+++ b/src/vppinfra/linux/sysfs.h
@@ -17,13 +17,12 @@
#define included_linux_sysfs_h
#include <vppinfra/error.h>
+#include <vppinfra/bitmap.h>
clib_error_t *clib_sysfs_write (char *file_name, char *fmt, ...);
clib_error_t *clib_sysfs_read (char *file_name, char *fmt, ...);
-u8 *clib_sysfs_link_to_name (char *link);
-
clib_error_t *clib_sysfs_set_nr_hugepages (int numa_node,
int log2_page_size, int nr);
clib_error_t *clib_sysfs_get_nr_hugepages (int numa_node,
@@ -35,7 +34,7 @@ clib_error_t *clib_sysfs_get_surplus_hugepages (int numa_node,
clib_error_t *clib_sysfs_prealloc_hugepages (int numa_node,
int log2_page_size, int nr);
-uword *clib_sysfs_list_to_bitmap (char *filename);
+uword *clib_sysfs_read_bitmap (char *fmt, ...);
#endif /* included_linux_sysfs_h */
diff --git a/src/vppinfra/longjmp.S b/src/vppinfra/longjmp.S
index a3435ccb969..c5090877fd7 100644
--- a/src/vppinfra/longjmp.S
+++ b/src/vppinfra/longjmp.S
@@ -816,6 +816,56 @@ cdecl(clib_calljmp):
mov sp, x3
ret
.size cdecl(clib_calljmp), .-cdecl(clib_calljmp)
+#elif defined(__riscv)
+#define foreach_0_to_11 _(0) _(1) _(2) _(3) _(4) _(5) _(6) _(7) _(8) _(9) _(10) _(11)
+ .global cdecl(clib_setjmp)
+ .align 1
+ .type cdecl(clib_setjmp), @function
+cdecl(clib_setjmp):
+ sd ra, 0*8(a0)
+ sd sp, 1*8(a0)
+#define _(x) sd s##x, (x + 2)*8(a0);
+ foreach_0_to_11
+#undef _
+#define _(x) fsd fs##x, (x + 14)*8(a0);
+ foreach_0_to_11
+#undef _
+ mv a0,a1
+ ret
+ .size cdecl(clib_setjmp), .-cdecl(clib_setjmp)
+
+ .global cdecl(clib_longjmp)
+ .align 1
+ .type cdecl(clib_longjmp), @function
+cdecl(clib_longjmp):
+ ld ra, 0*8(a0)
+ ld sp, 1*8(a0)
+#define _(x) ld s##x, (x + 2)*8(a0);
+ foreach_0_to_11
+#undef _
+#define _(x) fld fs##x, (x + 14)*8(a0);
+ foreach_0_to_11
+#undef _
+ mv a0,a1
+ ret
+ .size cdecl(clib_longjmp), .-cdecl(clib_longjmp)
+
+ .global cdecl(clib_calljmp)
+ .align 1
+ .type cdecl(clib_calljmp), @function
+cdecl(clib_calljmp):
+ andi a2,a2, -16 /* Make sure stack is 16-byte aligned. */
+ addi a2, a2, -16 /* allocate space on the new stack */
+ sd ra, 8(a2) /* store return address */
+ sd sp, 0(a2) /* store existing stack pointer */
+ mv sp, a2 /* change stack */
+ mv a2, a0 /* functon pointer to a2 */
+ mv a0, a1 /* 2nd argument becomes 1st one */
+ jalr a2 /* function call */
+ ld ra, 8(sp) /* restore old return address */
+ ld sp, 0(sp) /* restore old stack pointer */
+ ret
+ .size cdecl(clib_calljmp), .-cdecl(clib_calljmp)
#else
#error "unknown machine"
#endif
diff --git a/src/vppinfra/longjmp.h b/src/vppinfra/longjmp.h
index 67c650a6174..62daaad59bd 100644
--- a/src/vppinfra/longjmp.h
+++ b/src/vppinfra/longjmp.h
@@ -95,6 +95,9 @@
#define CLIB_ARCH_LONGJMP_REGS (22)
#elif defined(_mips) && __mips == 64
#define CLIB_ARCH_LONGJMP_REGS (12)
+#elif defined(__riscv)
+/* ra, sp, s0-s11, fs0-fs11 */
+#define CLIB_ARCH_LONGJMP_REGS (26)
#else
#error "unknown machine"
#endif
diff --git a/src/vppinfra/macros.c b/src/vppinfra/macros.c
index b8a8e1744aa..b8644b2738e 100644
--- a/src/vppinfra/macros.c
+++ b/src/vppinfra/macros.c
@@ -175,7 +175,7 @@ clib_macro_eval (clib_macro_main_t * mm, i8 * s, i32 complain, u16 level,
/* add results to answer */
vec_append (rv, ts);
/* Remove NULL termination or the results are sad */
- _vec_len (rv) = vec_len (rv) - 1;
+ vec_set_len (rv, vec_len (rv) - 1);
vec_free (ts);
}
else
@@ -183,8 +183,7 @@ clib_macro_eval (clib_macro_main_t * mm, i8 * s, i32 complain, u16 level,
if (complain)
clib_warning ("Undefined Variable Reference: %s\n", varname);
vec_append (rv, format (0, "UNSET "));
- _vec_len (rv) = vec_len (rv) - 1;
-
+ vec_set_len (rv, vec_len (rv) - 1);
}
vec_free (varname);
}
@@ -252,13 +251,11 @@ clib_macro_free (clib_macro_main_t * mm)
hash_free (mm->the_builtin_eval_hash);
- /* *INDENT-OFF* */
hash_foreach_pair (p, mm->the_value_table_hash,
({
vec_add1 (strings_to_free, (u8 *) (p->key));
vec_add1 (strings_to_free, (u8 *) (p->value[0]));
}));
- /* *INDENT-ON* */
for (i = 0; i < vec_len (strings_to_free); i++)
vec_free (strings_to_free[i]);
@@ -291,14 +288,12 @@ format_clib_macro_main (u8 * s, va_list * args)
name_sort_t *nses = 0, *ns;
int i;
- /* *INDENT-OFF* */
hash_foreach_pair (p, mm->the_value_table_hash,
({
vec_add2 (nses, ns, 1);
ns->name = (u8 *)(p->key);
ns->value = (u8 *)(p->value[0]);
}));
- /* *INDENT-ON* */
if (vec_len (nses) == 0)
return s;
diff --git a/src/vppinfra/mem.h b/src/vppinfra/mem.h
index 1cab0ae7252..75015d59a4a 100644
--- a/src/vppinfra/mem.h
+++ b/src/vppinfra/mem.h
@@ -47,12 +47,16 @@
#include <vppinfra/os.h>
#include <vppinfra/string.h> /* memcpy, clib_memset */
-#include <vppinfra/sanitizer.h>
+#ifdef CLIB_SANITIZE_ADDR
+#include <sanitizer/asan_interface.h>
+#endif
#define CLIB_MAX_MHEAPS 256
#define CLIB_MAX_NUMAS 16
#define CLIB_MEM_VM_MAP_FAILED ((void *) ~0)
#define CLIB_MEM_ERROR (-1)
+#define CLIB_MEM_LOG2_MIN_ALIGN (3)
+#define CLIB_MEM_MIN_ALIGN (1 << CLIB_MEM_LOG2_MIN_ALIGN)
typedef enum
{
@@ -93,9 +97,10 @@ typedef struct _clib_mem_vm_map_hdr
struct _clib_mem_vm_map_hdr *prev, *next;
} clib_mem_vm_map_hdr_t;
-#define foreach_clib_mem_heap_flag \
- _(0, LOCKED, "locked") \
- _(1, UNMAP_ON_DESTROY, "unmap-on-destroy")
+#define foreach_clib_mem_heap_flag \
+ _ (0, LOCKED, "locked") \
+ _ (1, UNMAP_ON_DESTROY, "unmap-on-destroy") \
+ _ (2, TRACED, "traced")
typedef enum
{
@@ -130,9 +135,12 @@ typedef struct
/* log2 system page size */
clib_mem_page_sz_t log2_page_sz;
- /* log2 system default hugepage size */
+ /* log2 default hugepage size */
clib_mem_page_sz_t log2_default_hugepage_sz;
+ /* log2 system default hugepage size */
+ clib_mem_page_sz_t log2_sys_default_hugepage_sz;
+
/* bitmap of available numa nodes */
u32 numa_node_bitmap;
@@ -157,6 +165,22 @@ extern clib_mem_main_t clib_mem_main;
/* Unspecified NUMA socket */
#define VEC_NUMA_UNSPECIFIED (0xFF)
+static_always_inline void
+clib_mem_poison (const void volatile *p, uword s)
+{
+#ifdef CLIB_SANITIZE_ADDR
+ ASAN_POISON_MEMORY_REGION (p, s);
+#endif
+}
+
+static_always_inline void
+clib_mem_unpoison (const void volatile *p, uword s)
+{
+#ifdef CLIB_SANITIZE_ADDR
+ ASAN_UNPOISON_MEMORY_REGION (p, s);
+#endif
+}
+
always_inline clib_mem_heap_t *
clib_mem_get_per_cpu_heap (void)
{
@@ -210,77 +234,29 @@ clib_mem_set_thread_index (void)
ASSERT (__os_thread_index > 0);
}
-always_inline uword
-clib_mem_size_nocheck (void *p)
-{
- size_t mspace_usable_size_with_delta (const void *p);
- return mspace_usable_size_with_delta (p);
-}
-
-/* Memory allocator which may call os_out_of_memory() if it fails */
-always_inline void *
-clib_mem_alloc_aligned_at_offset (uword size, uword align, uword align_offset,
- int os_out_of_memory_on_failure)
-{
- void *mspace_get_aligned (void *msp, unsigned long n_user_data_bytes,
- unsigned long align, unsigned long align_offset);
- clib_mem_heap_t *h = clib_mem_get_per_cpu_heap ();
- void *p;
-
- if (align_offset > align)
- {
- if (align > 0)
- align_offset %= align;
- else
- align_offset = align;
- }
-
- p = mspace_get_aligned (h->mspace, size, align, align_offset);
-
- if (PREDICT_FALSE (0 == p))
- {
- if (os_out_of_memory_on_failure)
- os_out_of_memory ();
- return 0;
- }
-
- CLIB_MEM_UNPOISON (p, size);
- return p;
-}
-
/* Memory allocator which calls os_out_of_memory() when it fails */
-always_inline void *
-clib_mem_alloc (uword size)
-{
- return clib_mem_alloc_aligned_at_offset (size, /* align */ 1,
- /* align_offset */ 0,
- /* os_out_of_memory */ 1);
-}
-
-always_inline void *
-clib_mem_alloc_aligned (uword size, uword align)
-{
- return clib_mem_alloc_aligned_at_offset (size, align, /* align_offset */ 0,
- /* os_out_of_memory */ 1);
-}
-
-/* Memory allocator which calls os_out_of_memory() when it fails */
-always_inline void *
-clib_mem_alloc_or_null (uword size)
-{
- return clib_mem_alloc_aligned_at_offset (size, /* align */ 1,
- /* align_offset */ 0,
- /* os_out_of_memory */ 0);
-}
-
-always_inline void *
-clib_mem_alloc_aligned_or_null (uword size, uword align)
-{
- return clib_mem_alloc_aligned_at_offset (size, align, /* align_offset */ 0,
- /* os_out_of_memory */ 0);
-}
-
-
+void *clib_mem_alloc (uword size);
+void *clib_mem_alloc_aligned (uword size, uword align);
+void *clib_mem_alloc_or_null (uword size);
+void *clib_mem_alloc_aligned_or_null (uword size, uword align);
+void *clib_mem_realloc (void *p, uword new_size);
+void *clib_mem_realloc_aligned (void *p, uword new_size, uword align);
+uword clib_mem_is_heap_object (void *p);
+void clib_mem_free (void *p);
+
+void *clib_mem_heap_alloc (void *heap, uword size);
+void *clib_mem_heap_alloc_aligned (void *heap, uword size, uword align);
+void *clib_mem_heap_alloc_or_null (void *heap, uword size);
+void *clib_mem_heap_alloc_aligned_or_null (void *heap, uword size,
+ uword align);
+void *clib_mem_heap_realloc (void *heap, void *p, uword new_size);
+void *clib_mem_heap_realloc_aligned (void *heap, void *p, uword new_size,
+ uword align);
+uword clib_mem_heap_is_heap_object (void *heap, void *p);
+void clib_mem_heap_free (void *heap, void *p);
+
+uword clib_mem_size (void *p);
+void clib_mem_free_s (void *p);
/* Memory allocator which panics when it fails.
Use macro so that clib_panic macro can expand __FUNCTION__ and __LINE__. */
@@ -299,62 +275,6 @@ clib_mem_alloc_aligned_or_null (uword size, uword align)
/* Alias to stack allocator for naming consistency. */
#define clib_mem_alloc_stack(bytes) __builtin_alloca(bytes)
-always_inline uword
-clib_mem_is_heap_object (void *p)
-{
- int mspace_is_heap_object (void *msp, void *p);
- clib_mem_heap_t *h = clib_mem_get_per_cpu_heap ();
- return mspace_is_heap_object (h->mspace, p);
-}
-
-always_inline void
-clib_mem_free (void *p)
-{
- void mspace_put (void *msp, void *p_arg);
- clib_mem_heap_t *h = clib_mem_get_per_cpu_heap ();
-
- /* Make sure object is in the correct heap. */
- ASSERT (clib_mem_is_heap_object (p));
-
- CLIB_MEM_POISON (p, clib_mem_size_nocheck (p));
-
- mspace_put (h->mspace, p);
-}
-
-always_inline void *
-clib_mem_realloc (void *p, uword new_size, uword old_size)
-{
- /* By default use alloc, copy and free to emulate realloc. */
- void *q = clib_mem_alloc (new_size);
- if (q)
- {
- uword copy_size;
- if (old_size < new_size)
- copy_size = old_size;
- else
- copy_size = new_size;
- clib_memcpy_fast (q, p, copy_size);
- clib_mem_free (p);
- }
- return q;
-}
-
-always_inline uword
-clib_mem_size (void *p)
-{
- ASSERT (clib_mem_is_heap_object (p));
- return clib_mem_size_nocheck (p);
-}
-
-always_inline void
-clib_mem_free_s (void *p)
-{
- uword size = clib_mem_size (p);
- CLIB_MEM_UNPOISON (p, size);
- memset_s_inline (p, size, 0, size);
- clib_mem_free (p);
-}
-
always_inline clib_mem_heap_t *
clib_mem_get_heap (void)
{
@@ -434,7 +354,7 @@ clib_mem_vm_alloc (uword size)
if (mmap_addr == (void *) -1)
mmap_addr = 0;
else
- CLIB_MEM_UNPOISON (mmap_addr, size);
+ clib_mem_unpoison (mmap_addr, size);
return mmap_addr;
}
@@ -470,15 +390,26 @@ clib_mem_get_page_size (void)
return 1ULL << clib_mem_main.log2_page_sz;
}
+static_always_inline void
+clib_mem_set_log2_default_hugepage_size (clib_mem_page_sz_t log2_page_sz)
+{
+ clib_mem_main.log2_default_hugepage_sz = log2_page_sz;
+}
+
static_always_inline clib_mem_page_sz_t
clib_mem_get_log2_default_hugepage_size ()
{
return clib_mem_main.log2_default_hugepage_sz;
}
+static_always_inline uword
+clib_mem_get_default_hugepage_size (void)
+{
+ return 1ULL << clib_mem_main.log2_default_hugepage_sz;
+}
+
int clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...);
uword clib_mem_get_fd_page_size (int fd);
-uword clib_mem_get_default_hugepage_size (void);
clib_mem_page_sz_t clib_mem_get_fd_log2_page_size (int fd);
uword clib_mem_vm_reserve (uword start, uword size,
clib_mem_page_sz_t log2_page_sz);
diff --git a/src/vppinfra/mem_bulk.c b/src/vppinfra/mem_bulk.c
index ba8b2e94909..4dd6a168217 100644
--- a/src/vppinfra/mem_bulk.c
+++ b/src/vppinfra/mem_bulk.c
@@ -66,7 +66,7 @@ clib_mem_bulk_init (u32 elt_sz, u32 align, u32 min_elts_per_chunk)
if (min_elts_per_chunk == 0)
min_elts_per_chunk = CLIB_MEM_BULK_DEFAULT_MIN_ELTS_PER_CHUNK;
- CLIB_MEM_UNPOISON (b, sizeof (clib_mem_bulk_t));
+ clib_mem_unpoison (b, sizeof (clib_mem_bulk_t));
clib_memset (b, 0, sizeof (clib_mem_bulk_t));
b->mspace = heap->mspace;
b->align = align;
@@ -92,7 +92,7 @@ again:
while (c)
{
next = c->next;
- CLIB_MEM_POISON (c, bulk_chunk_size (b));
+ clib_mem_poison (c, bulk_chunk_size (b));
mspace_free (ms, c);
c = next;
}
@@ -104,7 +104,7 @@ again:
goto again;
}
- CLIB_MEM_POISON (b, sizeof (clib_mem_bulk_t));
+ clib_mem_poison (b, sizeof (clib_mem_bulk_t));
mspace_free (ms, b);
}
@@ -148,7 +148,7 @@ clib_mem_bulk_alloc (clib_mem_bulk_handle_t h)
{
u32 i, sz = bulk_chunk_size (b);
c = mspace_memalign (b->mspace, b->chunk_align, sz);
- CLIB_MEM_UNPOISON (c, sz);
+ clib_mem_unpoison (c, sz);
clib_memset (c, 0, sizeof (clib_mem_bulk_chunk_hdr_t));
b->avail_chunks = c;
c->n_free = b->elts_per_chunk;
@@ -192,7 +192,7 @@ clib_mem_bulk_free (clib_mem_bulk_handle_t h, void *p)
{
/* chunk is empty - give it back */
remove_from_chunk_list (&b->avail_chunks, c);
- CLIB_MEM_POISON (c, bulk_chunk_size (b));
+ clib_mem_poison (c, bulk_chunk_size (b));
mspace_free (b->mspace, c);
return;
}
diff --git a/src/vppinfra/mem_dlmalloc.c b/src/vppinfra/mem_dlmalloc.c
index e2a0f71e084..a188164a7ba 100644
--- a/src/vppinfra/mem_dlmalloc.c
+++ b/src/vppinfra/mem_dlmalloc.c
@@ -19,7 +19,6 @@
#include <vppinfra/lock.h>
#include <vppinfra/hash.h>
#include <vppinfra/elf_clib.h>
-#include <vppinfra/sanitizer.h>
typedef struct
{
@@ -39,7 +38,6 @@ typedef struct
typedef struct
{
clib_spinlock_t lock;
- uword enabled;
mheap_trace_t *traces;
@@ -53,22 +51,24 @@ typedef struct
uword *trace_index_by_offset;
/* So we can easily shut off current segment trace, if any */
- void *current_traced_mheap;
+ const clib_mem_heap_t *current_traced_mheap;
} mheap_trace_main_t;
mheap_trace_main_t mheap_trace_main;
-void
-mheap_get_trace (uword offset, uword size)
+static __thread int mheap_trace_thread_disable;
+
+static void
+mheap_get_trace_internal (const clib_mem_heap_t *heap, uword offset,
+ uword size)
{
mheap_trace_main_t *tm = &mheap_trace_main;
mheap_trace_t *t;
uword i, n_callers, trace_index, *p;
mheap_trace_t trace;
- uword save_enabled;
- if (tm->enabled == 0 || (clib_mem_get_heap () != tm->current_traced_mheap))
+ if (heap != tm->current_traced_mheap || mheap_trace_thread_disable)
return;
/* Spurious Coverity warnings be gone. */
@@ -76,9 +76,12 @@ mheap_get_trace (uword offset, uword size)
clib_spinlock_lock (&tm->lock);
- /* Turn off tracing to avoid embarrassment... */
- save_enabled = tm->enabled;
- tm->enabled = 0;
+ /* heap could have changed while we were waiting on the lock */
+ if (heap != tm->current_traced_mheap)
+ goto out;
+
+ /* Turn off tracing for this thread to avoid embarrassment... */
+ mheap_trace_thread_disable = 1;
/* Skip our frame and mspace_get_aligned's frame */
n_callers = clib_backtrace (trace.callers, ARRAY_LEN (trace.callers), 2);
@@ -101,7 +104,7 @@ mheap_get_trace (uword offset, uword size)
if (i > 0)
{
trace_index = tm->trace_free_list[i - 1];
- _vec_len (tm->trace_free_list) = i - 1;
+ vec_set_len (tm->trace_free_list, i - 1);
}
else
{
@@ -114,14 +117,12 @@ mheap_get_trace (uword offset, uword size)
{
hash_pair_t *p;
mheap_trace_t *q;
- /* *INDENT-OFF* */
hash_foreach_pair (p, tm->trace_by_callers,
({
q = uword_to_pointer (p->key, mheap_trace_t *);
ASSERT (q >= old_start && q < old_end);
p->key = pointer_to_uword (tm->traces + (q - old_start));
}));
- /* *INDENT-ON* */
}
trace_index = t - tm->traces;
}
@@ -139,34 +140,33 @@ mheap_get_trace (uword offset, uword size)
hash_set (tm->trace_index_by_offset, offset, t - tm->traces);
out:
- tm->enabled = save_enabled;
+ mheap_trace_thread_disable = 0;
clib_spinlock_unlock (&tm->lock);
}
-void
-mheap_put_trace (uword offset, uword size)
+static void
+mheap_put_trace_internal (const clib_mem_heap_t *heap, uword offset,
+ uword size)
{
mheap_trace_t *t;
uword trace_index, *p;
mheap_trace_main_t *tm = &mheap_trace_main;
- uword save_enabled;
- if (tm->enabled == 0)
+ if (heap != tm->current_traced_mheap || mheap_trace_thread_disable)
return;
clib_spinlock_lock (&tm->lock);
- /* Turn off tracing for a moment */
- save_enabled = tm->enabled;
- tm->enabled = 0;
+ /* heap could have changed while we were waiting on the lock */
+ if (heap != tm->current_traced_mheap)
+ goto out;
+
+ /* Turn off tracing for this thread for a moment */
+ mheap_trace_thread_disable = 1;
p = hash_get (tm->trace_index_by_offset, offset);
if (!p)
- {
- tm->enabled = save_enabled;
- clib_spinlock_unlock (&tm->lock);
- return;
- }
+ goto out;
trace_index = p[0];
hash_unset (tm->trace_index_by_offset, offset);
@@ -183,17 +183,34 @@ mheap_put_trace (uword offset, uword size)
vec_add1 (tm->trace_free_list, trace_index);
clib_memset (t, 0, sizeof (t[0]));
}
- tm->enabled = save_enabled;
+
+out:
+ mheap_trace_thread_disable = 0;
clib_spinlock_unlock (&tm->lock);
}
+void
+mheap_get_trace (uword offset, uword size)
+{
+ mheap_get_trace_internal (clib_mem_get_heap (), offset, size);
+}
+
+void
+mheap_put_trace (uword offset, uword size)
+{
+ mheap_put_trace_internal (clib_mem_get_heap (), offset, size);
+}
+
always_inline void
mheap_trace_main_free (mheap_trace_main_t * tm)
{
+ CLIB_SPINLOCK_ASSERT_LOCKED (&tm->lock);
+ tm->current_traced_mheap = 0;
vec_free (tm->traces);
vec_free (tm->trace_free_list);
hash_free (tm->trace_by_callers);
hash_free (tm->trace_index_by_offset);
+ mheap_trace_thread_disable = 0;
}
static clib_mem_heap_t *
@@ -235,7 +252,7 @@ clib_mem_create_heap_internal (void *base, uword size,
mspace_disable_expand (h->mspace);
- CLIB_MEM_POISON (mspace_least_addr (h->mspace),
+ clib_mem_poison (mspace_least_addr (h->mspace),
mspace_footprint (h->mspace));
return h;
@@ -257,7 +274,14 @@ clib_mem_init_internal (void *base, uword size,
clib_mem_set_heap (h);
if (mheap_trace_main.lock == 0)
- clib_spinlock_init (&mheap_trace_main.lock);
+ {
+ /* clib_spinlock_init() dynamically allocates the spinlock in the current
+ * per-cpu heap, but it is used for all traces accross all heaps and
+ * hence we can't really allocate it in the current per-cpu heap as it
+ * could be destroyed later */
+ static struct clib_spinlock_s mheap_trace_main_lock = {};
+ mheap_trace_main.lock = &mheap_trace_main_lock;
+ }
return h;
}
@@ -288,13 +312,12 @@ clib_mem_destroy (void)
{
mheap_trace_main_t *tm = &mheap_trace_main;
clib_mem_heap_t *heap = clib_mem_get_heap ();
- void *base = mspace_least_addr (heap->mspace);
- if (tm->enabled && heap->mspace == tm->current_traced_mheap)
- tm->enabled = 0;
+ if (heap->mspace == tm->current_traced_mheap)
+ mheap_trace (heap, 0);
destroy_mspace (heap->mspace);
- clib_mem_vm_unmap (base);
+ clib_mem_vm_unmap (heap);
}
__clib_export u8 *
@@ -357,6 +380,7 @@ format_mheap_trace (u8 * s, va_list * va)
int verbose = va_arg (*va, int);
int have_traces = 0;
int i;
+ int n = 0;
clib_spinlock_lock (&tm->lock);
if (vec_len (tm->traces) > 0 &&
@@ -383,9 +407,10 @@ format_mheap_trace (u8 * s, va_list * va)
total_objects_traced += t->n_allocations;
- /* When not verbose only report allocations of more than 1k. */
- if (!verbose && t->n_bytes < 1024)
+ /* When not verbose only report the 50 biggest allocations */
+ if (!verbose && n >= 50)
continue;
+ n++;
if (t == traces_copy)
s = format (s, "%=9s%=9s %=10s Traceback\n", "Bytes", "Count",
@@ -464,13 +489,13 @@ format_clib_mem_heap (u8 * s, va_list * va)
format_white_space, indent + 2, format_msize, mi.usmblks);
}
- if (mspace_is_traced (heap->mspace))
+ if (heap->flags & CLIB_MEM_HEAP_F_TRACED)
s = format (s, "\n%U", format_mheap_trace, tm, verbose);
return s;
}
-__clib_export void
-clib_mem_get_heap_usage (clib_mem_heap_t * heap, clib_mem_usage_t * usage)
+__clib_export __clib_flatten void
+clib_mem_get_heap_usage (clib_mem_heap_t *heap, clib_mem_usage_t *usage)
{
struct dlmallinfo mi = mspace_mallinfo (heap->mspace);
@@ -493,42 +518,50 @@ uword clib_mem_validate_serial = 0;
__clib_export void
mheap_trace (clib_mem_heap_t * h, int enable)
{
- (void) mspace_enable_disable_trace (h->mspace, enable);
+ mheap_trace_main_t *tm = &mheap_trace_main;
+
+ clib_spinlock_lock (&tm->lock);
+
+ if (tm->current_traced_mheap != 0 && tm->current_traced_mheap != h)
+ {
+ clib_warning ("tracing already enabled for another heap, ignoring");
+ goto out;
+ }
- if (enable == 0)
- mheap_trace_main_free (&mheap_trace_main);
+ if (enable)
+ {
+ h->flags |= CLIB_MEM_HEAP_F_TRACED;
+ tm->current_traced_mheap = h;
+ }
+ else
+ {
+ h->flags &= ~CLIB_MEM_HEAP_F_TRACED;
+ mheap_trace_main_free (&mheap_trace_main);
+ }
+
+out:
+ clib_spinlock_unlock (&tm->lock);
}
__clib_export void
clib_mem_trace (int enable)
{
- mheap_trace_main_t *tm = &mheap_trace_main;
void *current_heap = clib_mem_get_heap ();
-
- tm->enabled = enable;
mheap_trace (current_heap, enable);
-
- if (enable)
- tm->current_traced_mheap = current_heap;
- else
- tm->current_traced_mheap = 0;
}
int
clib_mem_is_traced (void)
{
clib_mem_heap_t *h = clib_mem_get_heap ();
- return mspace_is_traced (h->mspace);
+ return (h->flags &= CLIB_MEM_HEAP_F_TRACED) != 0;
}
__clib_export uword
clib_mem_trace_enable_disable (uword enable)
{
- uword rv;
- mheap_trace_main_t *tm = &mheap_trace_main;
-
- rv = tm->enabled;
- tm->enabled = enable;
+ uword rv = !mheap_trace_thread_disable;
+ mheap_trace_thread_disable = !enable;
return rv;
}
@@ -567,37 +600,224 @@ clib_mem_destroy_heap (clib_mem_heap_t * h)
{
mheap_trace_main_t *tm = &mheap_trace_main;
- if (tm->enabled && h->mspace == tm->current_traced_mheap)
- tm->enabled = 0;
+ if (h->mspace == tm->current_traced_mheap)
+ mheap_trace (h, 0);
destroy_mspace (h->mspace);
if (h->flags & CLIB_MEM_HEAP_F_UNMAP_ON_DESTROY)
clib_mem_vm_unmap (h->base);
}
-__clib_export uword
-clib_mem_get_heap_free_space (clib_mem_heap_t * h)
+__clib_export __clib_flatten uword
+clib_mem_get_heap_free_space (clib_mem_heap_t *h)
{
struct dlmallinfo dlminfo = mspace_mallinfo (h->mspace);
return dlminfo.fordblks;
}
-__clib_export void *
-clib_mem_get_heap_base (clib_mem_heap_t * h)
+__clib_export __clib_flatten void *
+clib_mem_get_heap_base (clib_mem_heap_t *h)
{
return h->base;
}
-__clib_export uword
-clib_mem_get_heap_size (clib_mem_heap_t * heap)
+__clib_export __clib_flatten uword
+clib_mem_get_heap_size (clib_mem_heap_t *heap)
{
return heap->size;
}
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
+/* Memory allocator which may call os_out_of_memory() if it fails */
+static inline void *
+clib_mem_heap_alloc_inline (void *heap, uword size, uword align,
+ int os_out_of_memory_on_failure)
+{
+ clib_mem_heap_t *h = heap ? heap : clib_mem_get_per_cpu_heap ();
+ void *p;
+
+ align = clib_max (CLIB_MEM_MIN_ALIGN, align);
+
+ p = mspace_memalign (h->mspace, align, size);
+
+ if (PREDICT_FALSE (0 == p))
+ {
+ if (os_out_of_memory_on_failure)
+ os_out_of_memory ();
+ return 0;
+ }
+
+ if (PREDICT_FALSE (h->flags & CLIB_MEM_HEAP_F_TRACED))
+ mheap_get_trace_internal (h, pointer_to_uword (p), clib_mem_size (p));
+
+ clib_mem_unpoison (p, size);
+ return p;
+}
+
+/* Memory allocator which calls os_out_of_memory() when it fails */
+__clib_export __clib_flatten void *
+clib_mem_alloc (uword size)
+{
+ return clib_mem_heap_alloc_inline (0, size, CLIB_MEM_MIN_ALIGN,
+ /* os_out_of_memory */ 1);
+}
+
+__clib_export __clib_flatten void *
+clib_mem_alloc_aligned (uword size, uword align)
+{
+ return clib_mem_heap_alloc_inline (0, size, align,
+ /* os_out_of_memory */ 1);
+}
+
+/* Memory allocator which calls os_out_of_memory() when it fails */
+__clib_export __clib_flatten void *
+clib_mem_alloc_or_null (uword size)
+{
+ return clib_mem_heap_alloc_inline (0, size, CLIB_MEM_MIN_ALIGN,
+ /* os_out_of_memory */ 0);
+}
+
+__clib_export __clib_flatten void *
+clib_mem_alloc_aligned_or_null (uword size, uword align)
+{
+ return clib_mem_heap_alloc_inline (0, size, align,
+ /* os_out_of_memory */ 0);
+}
+
+__clib_export __clib_flatten void *
+clib_mem_heap_alloc (void *heap, uword size)
+{
+ return clib_mem_heap_alloc_inline (heap, size, CLIB_MEM_MIN_ALIGN,
+ /* os_out_of_memory */ 1);
+}
+
+__clib_export __clib_flatten void *
+clib_mem_heap_alloc_aligned (void *heap, uword size, uword align)
+{
+ return clib_mem_heap_alloc_inline (heap, size, align,
+ /* os_out_of_memory */ 1);
+}
+
+__clib_export __clib_flatten void *
+clib_mem_heap_alloc_or_null (void *heap, uword size)
+{
+ return clib_mem_heap_alloc_inline (heap, size, CLIB_MEM_MIN_ALIGN,
+ /* os_out_of_memory */ 0);
+}
+
+__clib_export __clib_flatten void *
+clib_mem_heap_alloc_aligned_or_null (void *heap, uword size, uword align)
+{
+ return clib_mem_heap_alloc_inline (heap, size, align,
+ /* os_out_of_memory */ 0);
+}
+
+__clib_export __clib_flatten void *
+clib_mem_heap_realloc_aligned (void *heap, void *p, uword new_size,
+ uword align)
+{
+ uword old_alloc_size;
+ clib_mem_heap_t *h = heap ? heap : clib_mem_get_per_cpu_heap ();
+ void *new;
+
+ ASSERT (count_set_bits (align) == 1);
+
+ old_alloc_size = p ? mspace_usable_size (p) : 0;
+
+ if (new_size == old_alloc_size)
+ return p;
+
+ if (p && pointer_is_aligned (p, align) &&
+ mspace_realloc_in_place (h->mspace, p, new_size))
+ {
+ clib_mem_unpoison (p, new_size);
+ if (PREDICT_FALSE (h->flags & CLIB_MEM_HEAP_F_TRACED))
+ {
+ mheap_put_trace_internal (h, pointer_to_uword (p), old_alloc_size);
+ mheap_get_trace_internal (h, pointer_to_uword (p),
+ clib_mem_size (p));
+ }
+ }
+ else
+ {
+ new = clib_mem_heap_alloc_inline (h, new_size, align, 1);
+
+ clib_mem_unpoison (new, new_size);
+ if (old_alloc_size)
+ {
+ clib_mem_unpoison (p, old_alloc_size);
+ clib_memcpy_fast (new, p, clib_min (new_size, old_alloc_size));
+ clib_mem_heap_free (h, p);
+ }
+ p = new;
+ }
+
+ return p;
+}
+
+__clib_export __clib_flatten void *
+clib_mem_heap_realloc (void *heap, void *p, uword new_size)
+{
+ return clib_mem_heap_realloc_aligned (heap, p, new_size, CLIB_MEM_MIN_ALIGN);
+}
+
+__clib_export __clib_flatten void *
+clib_mem_realloc_aligned (void *p, uword new_size, uword align)
+{
+ return clib_mem_heap_realloc_aligned (0, p, new_size, align);
+}
+
+__clib_export __clib_flatten void *
+clib_mem_realloc (void *p, uword new_size)
+{
+ return clib_mem_heap_realloc_aligned (0, p, new_size, CLIB_MEM_MIN_ALIGN);
+}
+
+__clib_export __clib_flatten uword
+clib_mem_heap_is_heap_object (void *heap, void *p)
+{
+ clib_mem_heap_t *h = heap ? heap : clib_mem_get_per_cpu_heap ();
+ return mspace_is_heap_object (h->mspace, p);
+}
+
+__clib_export __clib_flatten uword
+clib_mem_is_heap_object (void *p)
+{
+ return clib_mem_heap_is_heap_object (0, p);
+}
+
+__clib_export __clib_flatten void
+clib_mem_heap_free (void *heap, void *p)
+{
+ clib_mem_heap_t *h = heap ? heap : clib_mem_get_per_cpu_heap ();
+ uword size = clib_mem_size (p);
+
+ /* Make sure object is in the correct heap. */
+ ASSERT (clib_mem_heap_is_heap_object (h, p));
+
+ if (PREDICT_FALSE (h->flags & CLIB_MEM_HEAP_F_TRACED))
+ mheap_put_trace_internal (h, pointer_to_uword (p), size);
+ clib_mem_poison (p, clib_mem_size (p));
+
+ mspace_free (h->mspace, p);
+}
+
+__clib_export __clib_flatten void
+clib_mem_free (void *p)
+{
+ clib_mem_heap_free (0, p);
+}
+
+__clib_export __clib_flatten uword
+clib_mem_size (void *p)
+{
+ return mspace_usable_size (p);
+}
+
+__clib_export void
+clib_mem_free_s (void *p)
+{
+ uword size = clib_mem_size (p);
+ clib_mem_unpoison (p, size);
+ memset_s_inline (p, size, 0, size);
+ clib_mem_free (p);
+}
diff --git a/src/vppinfra/memcpy.h b/src/vppinfra/memcpy.h
index f3adc78d53d..e895cbf7485 100644
--- a/src/vppinfra/memcpy.h
+++ b/src/vppinfra/memcpy.h
@@ -6,6 +6,49 @@
#ifndef included_memcpy_h
#define included_memcpy_h
+static_always_inline void
+clib_memcpy_may_overrun (void *dst, void *src, u32 n_bytes)
+{
+ word n_left = n_bytes;
+#if defined(CLIB_HAVE_VEC512)
+ u8x64u *sv = (u8x64u *) src;
+ u8x64u *dv = (u8x64u *) dst;
+#elif defined(CLIB_HAVE_VEC256)
+ u8x32u *sv = (u8x32u *) src;
+ u8x32u *dv = (u8x32u *) dst;
+#elif defined(CLIB_HAVE_VEC128)
+ u8x16u *sv = (u8x16u *) src;
+ u8x16u *dv = (u8x16u *) dst;
+#else
+ u64u *sv = (u64u *) src;
+ u64u *dv = (u64u *) dst;
+#endif
+
+ while (n_left >= 4 * sizeof (sv[0]))
+ {
+ __typeof__ (*sv) v0, v1, v2, v3;
+ v0 = sv[0];
+ v1 = sv[1];
+ v2 = sv[2];
+ v3 = sv[3];
+ sv += 4;
+ n_left -= 4 * sizeof (sv[0]);
+ dv[0] = v0;
+ dv[1] = v1;
+ dv[2] = v2;
+ dv[3] = v3;
+ dv += 4;
+ }
+
+ while (n_left > 0)
+ {
+ dv[0] = sv[0];
+ sv += 1;
+ dv += 1;
+ n_left -= sizeof (sv[0]);
+ }
+}
+
#ifndef __COVERITY__
static_always_inline void
diff --git a/src/vppinfra/memcpy_avx2.h b/src/vppinfra/memcpy_avx2.h
deleted file mode 100644
index f7a36f0e5cb..00000000000
--- a/src/vppinfra/memcpy_avx2.h
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef included_clib_memcpy_avx2_h
-#define included_clib_memcpy_avx2_h
-
-#include <stdint.h>
-#include <x86intrin.h>
-#include <vppinfra/warnings.h>
-
-/* *INDENT-OFF* */
-WARN_OFF (stringop-overflow)
-/* *INDENT-ON* */
-
-static inline void
-clib_mov16 (u8 * dst, const u8 * src)
-{
- __m128i xmm0;
-
- xmm0 = _mm_loadu_si128 ((const __m128i *) src);
- _mm_storeu_si128 ((__m128i *) dst, xmm0);
-}
-
-static inline void
-clib_mov32 (u8 * dst, const u8 * src)
-{
- __m256i ymm0;
-
- ymm0 = _mm256_loadu_si256 ((const __m256i *) src);
- _mm256_storeu_si256 ((__m256i *) dst, ymm0);
-}
-
-static inline void
-clib_mov64 (u8 * dst, const u8 * src)
-{
- clib_mov32 ((u8 *) dst + 0 * 32, (const u8 *) src + 0 * 32);
- clib_mov32 ((u8 *) dst + 1 * 32, (const u8 *) src + 1 * 32);
-}
-
-static inline void
-clib_mov128 (u8 * dst, const u8 * src)
-{
- clib_mov64 ((u8 *) dst + 0 * 64, (const u8 *) src + 0 * 64);
- clib_mov64 ((u8 *) dst + 1 * 64, (const u8 *) src + 1 * 64);
-}
-
-static inline void
-clib_mov128blocks (u8 * dst, const u8 * src, size_t n)
-{
- __m256i ymm0, ymm1, ymm2, ymm3;
-
- while (n >= 128)
- {
- ymm0 =
- _mm256_loadu_si256 ((const __m256i *) ((const u8 *) src + 0 * 32));
- n -= 128;
- ymm1 =
- _mm256_loadu_si256 ((const __m256i *) ((const u8 *) src + 1 * 32));
- ymm2 =
- _mm256_loadu_si256 ((const __m256i *) ((const u8 *) src + 2 * 32));
- ymm3 =
- _mm256_loadu_si256 ((const __m256i *) ((const u8 *) src + 3 * 32));
- src = (const u8 *) src + 128;
- _mm256_storeu_si256 ((__m256i *) ((u8 *) dst + 0 * 32), ymm0);
- _mm256_storeu_si256 ((__m256i *) ((u8 *) dst + 1 * 32), ymm1);
- _mm256_storeu_si256 ((__m256i *) ((u8 *) dst + 2 * 32), ymm2);
- _mm256_storeu_si256 ((__m256i *) ((u8 *) dst + 3 * 32), ymm3);
- dst = (u8 *) dst + 128;
- }
-}
-
-static inline void *
-clib_memcpy_fast_avx2 (void *dst, const void *src, size_t n)
-{
- uword dstu = (uword) dst;
- uword srcu = (uword) src;
- void *ret = dst;
- size_t dstofss;
- size_t bits;
-
- /**
- * Copy less than 16 bytes
- */
- if (n < 16)
- {
- if (n & 0x01)
- {
- *(u8 *) dstu = *(const u8 *) srcu;
- srcu = (uword) ((const u8 *) srcu + 1);
- dstu = (uword) ((u8 *) dstu + 1);
- }
- if (n & 0x02)
- {
- *(u16 *) dstu = *(const u16 *) srcu;
- srcu = (uword) ((const u16 *) srcu + 1);
- dstu = (uword) ((u16 *) dstu + 1);
- }
- if (n & 0x04)
- {
- *(u32 *) dstu = *(const u32 *) srcu;
- srcu = (uword) ((const u32 *) srcu + 1);
- dstu = (uword) ((u32 *) dstu + 1);
- }
- if (n & 0x08)
- {
- *(u64 *) dstu = *(const u64 *) srcu;
- }
- return ret;
- }
-
- /**
- * Fast way when copy size doesn't exceed 512 bytes
- */
- if (n <= 32)
- {
- clib_mov16 ((u8 *) dst, (const u8 *) src);
- clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
- return ret;
- }
- if (n <= 48)
- {
- clib_mov16 ((u8 *) dst, (const u8 *) src);
- clib_mov16 ((u8 *) dst + 16, (const u8 *) src + 16);
- clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
- return ret;
- }
- if (n <= 64)
- {
- clib_mov32 ((u8 *) dst, (const u8 *) src);
- clib_mov32 ((u8 *) dst - 32 + n, (const u8 *) src - 32 + n);
- return ret;
- }
- if (n <= 256)
- {
- if (n >= 128)
- {
- n -= 128;
- clib_mov128 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + 128;
- dst = (u8 *) dst + 128;
- }
- COPY_BLOCK_128_BACK31:
- if (n >= 64)
- {
- n -= 64;
- clib_mov64 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + 64;
- dst = (u8 *) dst + 64;
- }
- if (n > 32)
- {
- clib_mov32 ((u8 *) dst, (const u8 *) src);
- clib_mov32 ((u8 *) dst - 32 + n, (const u8 *) src - 32 + n);
- return ret;
- }
- if (n > 0)
- {
- clib_mov32 ((u8 *) dst - 32 + n, (const u8 *) src - 32 + n);
- }
- return ret;
- }
-
- /**
- * Make store aligned when copy size exceeds 256 bytes
- */
- dstofss = (uword) dst & 0x1F;
- if (dstofss > 0)
- {
- dstofss = 32 - dstofss;
- n -= dstofss;
- clib_mov32 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + dstofss;
- dst = (u8 *) dst + dstofss;
- }
-
- /**
- * Copy 128-byte blocks.
- */
- clib_mov128blocks ((u8 *) dst, (const u8 *) src, n);
- bits = n;
- n = n & 127;
- bits -= n;
- src = (const u8 *) src + bits;
- dst = (u8 *) dst + bits;
-
- /**
- * Copy whatever left
- */
- goto COPY_BLOCK_128_BACK31;
-}
-
-/* *INDENT-OFF* */
-WARN_ON (stringop-overflow)
-/* *INDENT-ON* */
-
-#endif /* included_clib_memcpy_avx2_h */
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/memcpy_avx512.h b/src/vppinfra/memcpy_avx512.h
deleted file mode 100644
index 98dac75beac..00000000000
--- a/src/vppinfra/memcpy_avx512.h
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef included_clib_memcpy_avx512_h
-#define included_clib_memcpy_avx512_h
-
-#include <stdint.h>
-#include <x86intrin.h>
-#include <vppinfra/warnings.h>
-
-/* *INDENT-OFF* */
-WARN_OFF (stringop-overflow)
-/* *INDENT-ON* */
-
-static inline void
-clib_mov16 (u8 * dst, const u8 * src)
-{
- __m128i xmm0;
-
- xmm0 = _mm_loadu_si128 ((const __m128i *) src);
- _mm_storeu_si128 ((__m128i *) dst, xmm0);
-}
-
-static inline void
-clib_mov32 (u8 * dst, const u8 * src)
-{
- __m256i ymm0;
-
- ymm0 = _mm256_loadu_si256 ((const __m256i *) src);
- _mm256_storeu_si256 ((__m256i *) dst, ymm0);
-}
-
-static inline void
-clib_mov64 (u8 * dst, const u8 * src)
-{
- __m512i zmm0;
-
- zmm0 = _mm512_loadu_si512 ((const void *) src);
- _mm512_storeu_si512 ((void *) dst, zmm0);
-}
-
-static inline void
-clib_mov128 (u8 * dst, const u8 * src)
-{
- clib_mov64 (dst + 0 * 64, src + 0 * 64);
- clib_mov64 (dst + 1 * 64, src + 1 * 64);
-}
-
-static inline void
-clib_mov256 (u8 * dst, const u8 * src)
-{
- clib_mov128 (dst + 0 * 128, src + 0 * 128);
- clib_mov128 (dst + 1 * 128, src + 1 * 128);
-}
-
-static inline void
-clib_mov128blocks (u8 * dst, const u8 * src, size_t n)
-{
- __m512i zmm0, zmm1;
-
- while (n >= 128)
- {
- zmm0 = _mm512_loadu_si512 ((const void *) (src + 0 * 64));
- n -= 128;
- zmm1 = _mm512_loadu_si512 ((const void *) (src + 1 * 64));
- src = src + 128;
- _mm512_storeu_si512 ((void *) (dst + 0 * 64), zmm0);
- _mm512_storeu_si512 ((void *) (dst + 1 * 64), zmm1);
- dst = dst + 128;
- }
-}
-
-static inline void
-clib_mov512blocks (u8 * dst, const u8 * src, size_t n)
-{
- __m512i zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7;
-
- while (n >= 512)
- {
- zmm0 = _mm512_loadu_si512 ((const void *) (src + 0 * 64));
- n -= 512;
- zmm1 = _mm512_loadu_si512 ((const void *) (src + 1 * 64));
- zmm2 = _mm512_loadu_si512 ((const void *) (src + 2 * 64));
- zmm3 = _mm512_loadu_si512 ((const void *) (src + 3 * 64));
- zmm4 = _mm512_loadu_si512 ((const void *) (src + 4 * 64));
- zmm5 = _mm512_loadu_si512 ((const void *) (src + 5 * 64));
- zmm6 = _mm512_loadu_si512 ((const void *) (src + 6 * 64));
- zmm7 = _mm512_loadu_si512 ((const void *) (src + 7 * 64));
- src = src + 512;
- _mm512_storeu_si512 ((void *) (dst + 0 * 64), zmm0);
- _mm512_storeu_si512 ((void *) (dst + 1 * 64), zmm1);
- _mm512_storeu_si512 ((void *) (dst + 2 * 64), zmm2);
- _mm512_storeu_si512 ((void *) (dst + 3 * 64), zmm3);
- _mm512_storeu_si512 ((void *) (dst + 4 * 64), zmm4);
- _mm512_storeu_si512 ((void *) (dst + 5 * 64), zmm5);
- _mm512_storeu_si512 ((void *) (dst + 6 * 64), zmm6);
- _mm512_storeu_si512 ((void *) (dst + 7 * 64), zmm7);
- dst = dst + 512;
- }
-}
-
-static inline void *
-clib_memcpy_fast_avx512 (void *dst, const void *src, size_t n)
-{
- uword dstu = (uword) dst;
- uword srcu = (uword) src;
- void *ret = dst;
- size_t dstofss;
- size_t bits;
-
- /**
- * Copy less than 16 bytes
- */
- if (n < 16)
- {
- if (n & 0x01)
- {
- *(u8 *) dstu = *(const u8 *) srcu;
- srcu = (uword) ((const u8 *) srcu + 1);
- dstu = (uword) ((u8 *) dstu + 1);
- }
- if (n & 0x02)
- {
- *(u16 *) dstu = *(const u16 *) srcu;
- srcu = (uword) ((const u16 *) srcu + 1);
- dstu = (uword) ((u16 *) dstu + 1);
- }
- if (n & 0x04)
- {
- *(u32 *) dstu = *(const u32 *) srcu;
- srcu = (uword) ((const u32 *) srcu + 1);
- dstu = (uword) ((u32 *) dstu + 1);
- }
- if (n & 0x08)
- *(u64 *) dstu = *(const u64 *) srcu;
- return ret;
- }
-
- /**
- * Fast way when copy size doesn't exceed 512 bytes
- */
- if (n <= 32)
- {
- clib_mov16 ((u8 *) dst, (const u8 *) src);
- clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
- return ret;
- }
- if (n <= 64)
- {
- clib_mov32 ((u8 *) dst, (const u8 *) src);
- clib_mov32 ((u8 *) dst - 32 + n, (const u8 *) src - 32 + n);
- return ret;
- }
- if (n <= 512)
- {
- if (n >= 256)
- {
- n -= 256;
- clib_mov256 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + 256;
- dst = (u8 *) dst + 256;
- }
- if (n >= 128)
- {
- n -= 128;
- clib_mov128 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + 128;
- dst = (u8 *) dst + 128;
- }
- COPY_BLOCK_128_BACK63:
- if (n > 64)
- {
- clib_mov64 ((u8 *) dst, (const u8 *) src);
- clib_mov64 ((u8 *) dst - 64 + n, (const u8 *) src - 64 + n);
- return ret;
- }
- if (n > 0)
- clib_mov64 ((u8 *) dst - 64 + n, (const u8 *) src - 64 + n);
- return ret;
- }
-
- /**
- * Make store aligned when copy size exceeds 512 bytes
- */
- dstofss = (uword) dst & 0x3F;
- if (dstofss > 0)
- {
- dstofss = 64 - dstofss;
- n -= dstofss;
- clib_mov64 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + dstofss;
- dst = (u8 *) dst + dstofss;
- }
-
- /**
- * Copy 512-byte blocks.
- * Use copy block function for better instruction order control,
- * which is important when load is unaligned.
- */
- clib_mov512blocks ((u8 *) dst, (const u8 *) src, n);
- bits = n;
- n = n & 511;
- bits -= n;
- src = (const u8 *) src + bits;
- dst = (u8 *) dst + bits;
-
- /**
- * Copy 128-byte blocks.
- * Use copy block function for better instruction order control,
- * which is important when load is unaligned.
- */
- if (n >= 128)
- {
- clib_mov128blocks ((u8 *) dst, (const u8 *) src, n);
- bits = n;
- n = n & 127;
- bits -= n;
- src = (const u8 *) src + bits;
- dst = (u8 *) dst + bits;
- }
-
- /**
- * Copy whatever left
- */
- goto COPY_BLOCK_128_BACK63;
-}
-
-/* *INDENT-OFF* */
-WARN_ON (stringop-overflow)
-/* *INDENT-ON* */
-
-#endif /* included_clib_memcpy_avx512_h */
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/memcpy_sse3.h b/src/vppinfra/memcpy_sse3.h
deleted file mode 100644
index aea2005d95a..00000000000
--- a/src/vppinfra/memcpy_sse3.h
+++ /dev/null
@@ -1,368 +0,0 @@
-/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef included_clib_memcpy_sse3_h
-#define included_clib_memcpy_sse3_h
-
-#include <stdint.h>
-#include <x86intrin.h>
-#include <vppinfra/warnings.h>
-
-/* *INDENT-OFF* */
-WARN_OFF (stringop-overflow)
-/* *INDENT-ON* */
-
-static inline void
-clib_mov16 (u8 * dst, const u8 * src)
-{
- __m128i xmm0;
-
- xmm0 = _mm_loadu_si128 ((const __m128i *) src);
- _mm_storeu_si128 ((__m128i *) dst, xmm0);
-}
-
-static inline void
-clib_mov32 (u8 * dst, const u8 * src)
-{
- clib_mov16 ((u8 *) dst + 0 * 16, (const u8 *) src + 0 * 16);
- clib_mov16 ((u8 *) dst + 1 * 16, (const u8 *) src + 1 * 16);
-}
-
-static inline void
-clib_mov64 (u8 * dst, const u8 * src)
-{
- clib_mov32 ((u8 *) dst + 0 * 32, (const u8 *) src + 0 * 32);
- clib_mov32 ((u8 *) dst + 1 * 32, (const u8 *) src + 1 * 32);
-}
-
-static inline void
-clib_mov128 (u8 * dst, const u8 * src)
-{
- clib_mov64 ((u8 *) dst + 0 * 64, (const u8 *) src + 0 * 64);
- clib_mov64 ((u8 *) dst + 1 * 64, (const u8 *) src + 1 * 64);
-}
-
-static inline void
-clib_mov256 (u8 * dst, const u8 * src)
-{
- clib_mov128 ((u8 *) dst + 0 * 128, (const u8 *) src + 0 * 128);
- clib_mov128 ((u8 *) dst + 1 * 128, (const u8 *) src + 1 * 128);
-}
-
-/**
- * Macro for copying unaligned block from one location to another with constant load offset,
- * 47 bytes leftover maximum,
- * locations should not overlap.
- * Requirements:
- * - Store is aligned
- * - Load offset is <offset>, which must be immediate value within [1, 15]
- * - For <src>, make sure <offset> bit backwards & <16 - offset> bit forwards are available for loading
- * - <dst>, <src>, <len> must be variables
- * - __m128i <xmm0> ~ <xmm8> must be pre-defined
- */
-#define CLIB_MVUNALIGN_LEFT47_IMM(dst, src, len, offset) \
-({ \
- int tmp; \
- while (len >= 128 + 16 - offset) { \
- xmm0 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 0 * 16)); \
- len -= 128; \
- xmm1 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 1 * 16)); \
- xmm2 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 2 * 16)); \
- xmm3 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 3 * 16)); \
- xmm4 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 4 * 16)); \
- xmm5 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 5 * 16)); \
- xmm6 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 6 * 16)); \
- xmm7 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 7 * 16)); \
- xmm8 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 8 * 16)); \
- src = (const u8 *)src + 128; \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 2 * 16), _mm_alignr_epi8(xmm3, xmm2, offset)); \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 3 * 16), _mm_alignr_epi8(xmm4, xmm3, offset)); \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 4 * 16), _mm_alignr_epi8(xmm5, xmm4, offset)); \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 5 * 16), _mm_alignr_epi8(xmm6, xmm5, offset)); \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 6 * 16), _mm_alignr_epi8(xmm7, xmm6, offset)); \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 7 * 16), _mm_alignr_epi8(xmm8, xmm7, offset)); \
- dst = (u8 *)dst + 128; \
- } \
- tmp = len; \
- len = ((len - 16 + offset) & 127) + 16 - offset; \
- tmp -= len; \
- src = (const u8 *)src + tmp; \
- dst = (u8 *)dst + tmp; \
- if (len >= 32 + 16 - offset) { \
- while (len >= 32 + 16 - offset) { \
- xmm0 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 0 * 16)); \
- len -= 32; \
- xmm1 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 1 * 16)); \
- xmm2 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 2 * 16)); \
- src = (const u8 *)src + 32; \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \
- _mm_storeu_si128((__m128i *)((u8 *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \
- dst = (u8 *)dst + 32; \
- } \
- tmp = len; \
- len = ((len - 16 + offset) & 31) + 16 - offset; \
- tmp -= len; \
- src = (const u8 *)src + tmp; \
- dst = (u8 *)dst + tmp; \
- } \
-})
-
-/**
- * Macro for copying unaligned block from one location to another,
- * 47 bytes leftover maximum,
- * locations should not overlap.
- * Use switch here because the aligning instruction requires immediate value for shift count.
- * Requirements:
- * - Store is aligned
- * - Load offset is <offset>, which must be within [1, 15]
- * - For <src>, make sure <offset> bit backwards & <16 - offset> bit forwards are available for loading
- * - <dst>, <src>, <len> must be variables
- * - __m128i <xmm0> ~ <xmm8> used in CLIB_MVUNALIGN_LEFT47_IMM must be pre-defined
- */
-#define CLIB_MVUNALIGN_LEFT47(dst, src, len, offset) \
-({ \
- switch (offset) { \
- case 0x01: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x01); break; \
- case 0x02: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x02); break; \
- case 0x03: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x03); break; \
- case 0x04: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x04); break; \
- case 0x05: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x05); break; \
- case 0x06: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x06); break; \
- case 0x07: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x07); break; \
- case 0x08: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x08); break; \
- case 0x09: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x09); break; \
- case 0x0A: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0A); break; \
- case 0x0B: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0B); break; \
- case 0x0C: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0C); break; \
- case 0x0D: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0D); break; \
- case 0x0E: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0E); break; \
- case 0x0F: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0F); break; \
- default:; \
- } \
-})
-
-static inline void *
-clib_memcpy_fast_sse3 (void *dst, const void *src, size_t n)
-{
- __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
- uword dstu = (uword) dst;
- uword srcu = (uword) src;
- void *ret = dst;
- size_t dstofss;
- size_t srcofs;
-
- /**
- * Copy less than 16 bytes
- */
- if (n < 16)
- {
- if (n & 0x01)
- {
- *(u8 *) dstu = *(const u8 *) srcu;
- srcu = (uword) ((const u8 *) srcu + 1);
- dstu = (uword) ((u8 *) dstu + 1);
- }
- if (n & 0x02)
- {
- *(u16 *) dstu = *(const u16 *) srcu;
- srcu = (uword) ((const u16 *) srcu + 1);
- dstu = (uword) ((u16 *) dstu + 1);
- }
- if (n & 0x04)
- {
- *(u32 *) dstu = *(const u32 *) srcu;
- srcu = (uword) ((const u32 *) srcu + 1);
- dstu = (uword) ((u32 *) dstu + 1);
- }
- if (n & 0x08)
- {
- *(u64 *) dstu = *(const u64 *) srcu;
- }
- return ret;
- }
-
- /**
- * Fast way when copy size doesn't exceed 512 bytes
- */
- if (n <= 32)
- {
- clib_mov16 ((u8 *) dst, (const u8 *) src);
- clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
- return ret;
- }
- if (n <= 48)
- {
- clib_mov32 ((u8 *) dst, (const u8 *) src);
- clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
- return ret;
- }
- if (n <= 64)
- {
- clib_mov32 ((u8 *) dst, (const u8 *) src);
- clib_mov16 ((u8 *) dst + 32, (const u8 *) src + 32);
- clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
- return ret;
- }
- if (n <= 128)
- {
- goto COPY_BLOCK_128_BACK15;
- }
- if (n <= 512)
- {
- if (n >= 256)
- {
- n -= 256;
- clib_mov128 ((u8 *) dst, (const u8 *) src);
- clib_mov128 ((u8 *) dst + 128, (const u8 *) src + 128);
- src = (const u8 *) src + 256;
- dst = (u8 *) dst + 256;
- }
- COPY_BLOCK_255_BACK15:
- if (n >= 128)
- {
- n -= 128;
- clib_mov128 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + 128;
- dst = (u8 *) dst + 128;
- }
- COPY_BLOCK_128_BACK15:
- if (n >= 64)
- {
- n -= 64;
- clib_mov64 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + 64;
- dst = (u8 *) dst + 64;
- }
- COPY_BLOCK_64_BACK15:
- if (n >= 32)
- {
- n -= 32;
- clib_mov32 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + 32;
- dst = (u8 *) dst + 32;
- }
- if (n > 16)
- {
- clib_mov16 ((u8 *) dst, (const u8 *) src);
- clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
- return ret;
- }
- if (n > 0)
- {
- clib_mov16 ((u8 *) dst - 16 + n, (const u8 *) src - 16 + n);
- }
- return ret;
- }
-
- /**
- * Make store aligned when copy size exceeds 512 bytes,
- * and make sure the first 15 bytes are copied, because
- * unaligned copy functions require up to 15 bytes
- * backwards access.
- */
- dstofss = (uword) dst & 0x0F;
- if (dstofss > 0)
- {
- dstofss = 16 - dstofss + 16;
- n -= dstofss;
- clib_mov32 ((u8 *) dst, (const u8 *) src);
- src = (const u8 *) src + dstofss;
- dst = (u8 *) dst + dstofss;
- }
- srcofs = ((uword) src & 0x0F);
-
- /**
- * For aligned copy
- */
- if (srcofs == 0)
- {
- /**
- * Copy 256-byte blocks
- */
- for (; n >= 256; n -= 256)
- {
- clib_mov256 ((u8 *) dst, (const u8 *) src);
- dst = (u8 *) dst + 256;
- src = (const u8 *) src + 256;
- }
-
- /**
- * Copy whatever left
- */
- goto COPY_BLOCK_255_BACK15;
- }
-
- /**
- * For copy with unaligned load
- */
- CLIB_MVUNALIGN_LEFT47 (dst, src, n, srcofs);
-
- /**
- * Copy whatever left
- */
- goto COPY_BLOCK_64_BACK15;
-}
-
-/* *INDENT-OFF* */
-WARN_ON (stringop-overflow)
-/* *INDENT-ON* */
-
-#undef CLIB_MVUNALIGN_LEFT47_IMM
-#undef CLIB_MVUNALIGN_LEFT47
-
-#endif /* included_clib_memcpy_sse3_h */
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/memcpy_x86_64.h b/src/vppinfra/memcpy_x86_64.h
new file mode 100644
index 00000000000..39258f19748
--- /dev/null
+++ b/src/vppinfra/memcpy_x86_64.h
@@ -0,0 +1,613 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Damjan Marion
+ */
+
+#ifndef included_clib_memcpy_x86_64_h
+#define included_clib_memcpy_x86_64_h
+#ifdef __x86_64__
+
+#include <vppinfra/clib.h>
+#include <vppinfra/warnings.h>
+#include <stdio.h>
+
+/* clang-format off */
+WARN_OFF (stringop-overflow)
+/* clang-format on */
+
+static_always_inline void
+clib_memcpy1 (void *d, void *s)
+{
+ *(u8 *) d = *(u8 *) s;
+}
+
+static_always_inline void
+clib_memcpy2 (void *d, void *s)
+{
+ *(u16u *) d = *(u16u *) s;
+}
+
+static_always_inline void
+clib_memcpy4 (void *d, void *s)
+{
+ *(u32u *) d = *(u32u *) s;
+}
+
+static_always_inline void
+clib_memcpy8 (void *d, void *s)
+{
+ *(u64u *) d = *(u64u *) s;
+}
+
+static_always_inline void
+clib_memcpy16 (void *d, void *s)
+{
+#ifdef CLIB_HAVE_VEC128
+ *(u8x16u *) d = *(u8x16u *) s;
+#else
+ clib_memcpy8 (d, s);
+ clib_memcpy8 (d + 8, s + 8);
+#endif
+}
+
+#ifdef CLIB_HAVE_VEC256
+static_always_inline void
+clib_memcpy32 (void *d, void *s)
+{
+ *(u8x32u *) d = *(u8x32u *) s;
+}
+#endif
+
+#ifdef CLIB_HAVE_VEC512
+static_always_inline void
+clib_memcpy64 (void *d, void *s)
+{
+ *(u8x64u *) d = *(u8x64u *) s;
+}
+#endif
+
+static_always_inline void
+clib_memcpy_const_le32 (u8 *dst, u8 *src, size_t n)
+{
+ switch (n)
+ {
+ case 1:
+ clib_memcpy1 (dst, src);
+ break;
+ case 2:
+ clib_memcpy2 (dst, src);
+ break;
+ case 3:
+ clib_memcpy2 (dst, src);
+ clib_memcpy1 (dst + 2, src + 2);
+ break;
+ case 4:
+ clib_memcpy4 (dst, src);
+ break;
+ case 5:
+ clib_memcpy4 (dst, src);
+ clib_memcpy1 (dst + 4, src + 4);
+ break;
+ case 6:
+ clib_memcpy4 (dst, src);
+ clib_memcpy2 (dst + 4, src + 4);
+ break;
+ case 7:
+ clib_memcpy4 (dst, src);
+ clib_memcpy4 (dst + 3, src + 3);
+ break;
+ case 8:
+ clib_memcpy8 (dst, src);
+ break;
+ case 9:
+ clib_memcpy8 (dst, src);
+ clib_memcpy1 (dst + 8, src + 8);
+ break;
+ case 10:
+ clib_memcpy8 (dst, src);
+ clib_memcpy2 (dst + 8, src + 8);
+ break;
+ case 11:
+ case 12:
+ clib_memcpy8 (dst, src);
+ clib_memcpy4 (dst + n - 4, src + n - 4);
+ break;
+ case 13:
+ case 14:
+ case 15:
+ clib_memcpy8 (dst, src);
+ clib_memcpy8 (dst + n - 8, src + n - 8);
+ break;
+ case 16:
+ clib_memcpy16 (dst, src);
+ break;
+ case 17:
+ clib_memcpy16 (dst, src);
+ clib_memcpy1 (dst + 16, src + 16);
+ break;
+ case 18:
+ clib_memcpy16 (dst, src);
+ clib_memcpy2 (dst + 16, src + 16);
+ break;
+ case 20:
+ clib_memcpy16 (dst, src);
+ clib_memcpy4 (dst + 16, src + 16);
+ break;
+ case 24:
+ clib_memcpy16 (dst, src);
+ clib_memcpy8 (dst + 16, src + 16);
+ break;
+ default:
+ clib_memcpy16 (dst, src);
+ clib_memcpy16 (dst + n - 16, src + n - 16);
+ break;
+ }
+}
+
+static_always_inline void
+clib_memcpy_const_le64 (u8 *dst, u8 *src, size_t n)
+{
+ if (n < 32)
+ {
+ clib_memcpy_const_le32 (dst, src, n);
+ return;
+ }
+
+#if defined(CLIB_HAVE_VEC256)
+ switch (n)
+ {
+ case 32:
+ clib_memcpy32 (dst, src);
+ break;
+ case 33:
+ clib_memcpy32 (dst, src);
+ clib_memcpy1 (dst + 32, src + 32);
+ break;
+ case 34:
+ clib_memcpy32 (dst, src);
+ clib_memcpy2 (dst + 32, src + 32);
+ break;
+ case 36:
+ clib_memcpy32 (dst, src);
+ clib_memcpy4 (dst + 32, src + 32);
+ break;
+ case 40:
+ clib_memcpy32 (dst, src);
+ clib_memcpy8 (dst + 32, src + 32);
+ break;
+ case 48:
+ clib_memcpy32 (dst, src);
+ clib_memcpy16 (dst + 32, src + 32);
+ break;
+ default:
+ clib_memcpy32 (dst, src);
+ clib_memcpy32 (dst + n - 32, src + n - 32);
+ break;
+ }
+#else
+ while (n > 31)
+ {
+ clib_memcpy16 (dst, src);
+ clib_memcpy16 (dst + 16, src + 16);
+ dst += 32;
+ src += 32;
+ n -= 32;
+ }
+ clib_memcpy_const_le32 (dst, src, n);
+#endif
+}
+
+static_always_inline void
+clib_memcpy_x86_64_const (u8 *dst, u8 *src, size_t n)
+{
+#if defined(CLIB_HAVE_VEC512)
+ while (n > 128)
+ {
+ clib_memcpy64 (dst, src);
+ dst += 64;
+ src += 64;
+ n -= 64;
+ }
+
+ if (n < 64)
+ {
+ clib_memcpy_const_le64 (dst, src, n);
+ return;
+ }
+
+ switch (n)
+ {
+ case 64:
+ clib_memcpy64 (dst, src);
+ break;
+ case 65:
+ clib_memcpy64 (dst, src);
+ clib_memcpy1 (dst + 64, src + 64);
+ break;
+ case 66:
+ clib_memcpy64 (dst, src);
+ clib_memcpy2 (dst + 64, src + 64);
+ break;
+ case 68:
+ clib_memcpy64 (dst, src);
+ clib_memcpy4 (dst + 64, src + 64);
+ break;
+ case 72:
+ clib_memcpy64 (dst, src);
+ clib_memcpy8 (dst + 64, src + 64);
+ break;
+ case 80:
+ clib_memcpy64 (dst, src);
+ clib_memcpy16 (dst + 64, src + 64);
+ break;
+ case 96:
+ clib_memcpy64 (dst, src);
+ clib_memcpy32 (dst + 64, src + 64);
+ break;
+ default:
+ clib_memcpy64 (dst, src);
+ clib_memcpy64 (dst + n - 64, src + n - 64);
+ break;
+ }
+#elif defined(CLIB_HAVE_VEC256)
+ while (n > 64)
+ {
+ clib_memcpy32 (dst, src);
+ dst += 32;
+ src += 32;
+ n -= 32;
+ }
+ clib_memcpy_const_le64 (dst, src, n);
+#else
+ while (n > 32)
+ {
+ clib_memcpy16 (dst, src);
+ dst += 16;
+ src += 16;
+ n -= 16;
+ }
+ clib_memcpy_const_le32 (dst, src, n);
+#endif
+}
+
+static_always_inline void *
+clib_memcpy_x86_64 (void *restrict dst, const void *restrict src, size_t n)
+{
+ u8 *d = (u8 *) dst, *s = (u8 *) src;
+
+ if (n == 0)
+ return dst;
+
+ if (COMPILE_TIME_CONST (n))
+ {
+ if (n)
+ clib_memcpy_x86_64_const (d, s, n);
+ return dst;
+ }
+
+ if (n <= 32)
+ {
+#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u32 mask = pow2_mask (n);
+ u8x32_mask_store (u8x32_mask_load_zero (s, mask), d, mask);
+#else
+ if (PREDICT_TRUE (n >= 16))
+ {
+ clib_memcpy16 (d, s);
+ clib_memcpy16 (d + n - 16, s + n - 16);
+ }
+ else if (PREDICT_TRUE (n >= 8))
+ {
+ clib_memcpy8 (d, s);
+ clib_memcpy8 (d + n - 8, s + n - 8);
+ }
+ else if (PREDICT_TRUE (n >= 4))
+ {
+ clib_memcpy4 (d, s);
+ clib_memcpy4 (d + n - 4, s + n - 4);
+ }
+ else if (PREDICT_TRUE (n > 1))
+ {
+ clib_memcpy2 (d, s);
+ clib_memcpy2 (d + n - 2, s + n - 2);
+ }
+ else
+ clib_memcpy1 (d, s);
+#endif
+ }
+#ifdef CLIB_HAVE_VEC512
+ else
+ {
+ u8x64 v0, v1, v2, v3;
+ u64 final_off, nr, off = 64;
+
+ if (n <= 64)
+ {
+ n -= 32;
+ u8x32_store_unaligned (u8x32_load_unaligned (s), d);
+ u8x32_store_unaligned (u8x32_load_unaligned (s + n), d + n);
+ return dst;
+ }
+
+ u8x64_store_unaligned (u8x64_load_unaligned (s), d);
+
+ if (n <= 128)
+ goto done2;
+
+ if (n <= 192)
+ goto one;
+
+ if (n <= 512 + 64)
+ {
+ nr = round_pow2 (n - 128, 64);
+ goto last;
+ }
+
+ off -= ((u64) d) & 0x3f;
+ nr = round_pow2 (n - off - 64, 64);
+ final_off = (nr & ~(u64) 0x1ff) + off;
+
+ more:
+ v0 = u8x64_load_unaligned (s + off + 0x000);
+ v1 = u8x64_load_unaligned (s + off + 0x040);
+ v2 = u8x64_load_unaligned (s + off + 0x080);
+ v3 = u8x64_load_unaligned (s + off + 0x0c0);
+ u8x64_store_unaligned (v0, d + off + 0x000);
+ u8x64_store_unaligned (v1, d + off + 0x040);
+ u8x64_store_unaligned (v2, d + off + 0x080);
+ u8x64_store_unaligned (v3, d + off + 0x0c0);
+ v0 = u8x64_load_unaligned (s + off + 0x100);
+ v1 = u8x64_load_unaligned (s + off + 0x140);
+ v2 = u8x64_load_unaligned (s + off + 0x180);
+ v3 = u8x64_load_unaligned (s + off + 0x1c0);
+ u8x64_store_unaligned (v0, d + off + 0x100);
+ u8x64_store_unaligned (v1, d + off + 0x140);
+ u8x64_store_unaligned (v2, d + off + 0x180);
+ u8x64_store_unaligned (v3, d + off + 0x1c0);
+ off += 512;
+ if (off != final_off)
+ goto more;
+
+ if ((nr & 0x1ff) == 0)
+ goto done2;
+
+ last:
+ if (PREDICT_TRUE (nr & 256))
+ {
+ v0 = u8x64_load_unaligned (s + off + 0x000);
+ v1 = u8x64_load_unaligned (s + off + 0x040);
+ v2 = u8x64_load_unaligned (s + off + 0x080);
+ v3 = u8x64_load_unaligned (s + off + 0x0c0);
+ u8x64_store_unaligned (v0, d + off + 0x000);
+ u8x64_store_unaligned (v1, d + off + 0x040);
+ u8x64_store_unaligned (v2, d + off + 0x080);
+ u8x64_store_unaligned (v3, d + off + 0x0c0);
+ off += 256;
+ }
+ if (PREDICT_TRUE (nr & 128))
+ {
+ v0 = u8x64_load_unaligned (s + off + 0x000);
+ v1 = u8x64_load_unaligned (s + off + 0x040);
+ u8x64_store_unaligned (v0, d + off + 0x000);
+ u8x64_store_unaligned (v1, d + off + 0x040);
+ off += 128;
+ }
+ if (PREDICT_TRUE (nr & 64))
+ {
+ one:
+ u8x64_store_unaligned (u8x64_load_unaligned (s + off), d + off);
+ }
+ done2:
+ u8x64_store_unaligned (u8x64_load_unaligned (s + n - 64), d + n - 64);
+ }
+ return dst;
+#elif defined(CLIB_HAVE_VEC256)
+ else
+ {
+ u8x32 v0, v1, v2, v3;
+ u64 final_off, nr, off = 32;
+
+ u8x32_store_unaligned (u8x32_load_unaligned (s), d);
+
+ if (n <= 64)
+ goto done2;
+
+ if (n <= 96)
+ goto one;
+
+ if (n <= 256 + 32)
+ {
+ nr = round_pow2 (n - 64, 32);
+ goto last;
+ }
+
+ off -= ((u64) d) & 0x1f;
+ nr = round_pow2 (n - off - 32, 32);
+ final_off = (nr & ~(u64) 0xff) + off;
+
+ more:
+ v0 = u8x32_load_unaligned (s + off + 0x00);
+ v1 = u8x32_load_unaligned (s + off + 0x20);
+ v2 = u8x32_load_unaligned (s + off + 0x40);
+ v3 = u8x32_load_unaligned (s + off + 0x60);
+ u8x32_store_unaligned (v0, d + off + 0x00);
+ u8x32_store_unaligned (v1, d + off + 0x20);
+ u8x32_store_unaligned (v2, d + off + 0x40);
+ u8x32_store_unaligned (v3, d + off + 0x60);
+ v0 = u8x32_load_unaligned (s + off + 0x80);
+ v1 = u8x32_load_unaligned (s + off + 0xa0);
+ v2 = u8x32_load_unaligned (s + off + 0xc0);
+ v3 = u8x32_load_unaligned (s + off + 0xe0);
+ u8x32_store_unaligned (v0, d + off + 0x80);
+ u8x32_store_unaligned (v1, d + off + 0xa0);
+ u8x32_store_unaligned (v2, d + off + 0xc0);
+ u8x32_store_unaligned (v3, d + off + 0xe0);
+ off += 256;
+ if (off != final_off)
+ goto more;
+
+ if ((nr & 0xff) == 0)
+ goto done2;
+
+ last:
+ if (PREDICT_TRUE (nr & 128))
+ {
+ v0 = u8x32_load_unaligned (s + off + 0x00);
+ v1 = u8x32_load_unaligned (s + off + 0x20);
+ v2 = u8x32_load_unaligned (s + off + 0x40);
+ v3 = u8x32_load_unaligned (s + off + 0x60);
+ u8x32_store_unaligned (v0, d + off + 0x00);
+ u8x32_store_unaligned (v1, d + off + 0x20);
+ u8x32_store_unaligned (v2, d + off + 0x40);
+ u8x32_store_unaligned (v3, d + off + 0x60);
+ off += 128;
+ }
+ if (PREDICT_TRUE (nr & 64))
+ {
+ v0 = u8x32_load_unaligned (s + off + 0x00);
+ v1 = u8x32_load_unaligned (s + off + 0x20);
+ u8x32_store_unaligned (v0, d + off + 0x00);
+ u8x32_store_unaligned (v1, d + off + 0x20);
+ off += 64;
+ }
+ if (PREDICT_TRUE (nr & 32))
+ {
+ one:
+ u8x32_store_unaligned (u8x32_load_unaligned (s + off), d + off);
+ }
+ done2:
+ u8x32_store_unaligned (u8x32_load_unaligned (s + n - 32), d + n - 32);
+ }
+ return dst;
+#elif defined(CLIB_HAVE_VEC128)
+ else
+ {
+ u8x16 v0, v1, v2, v3;
+ u64 final_off, nr, off = 32;
+
+ if (0 && n > 389)
+ {
+ __builtin_memcpy (d, s, n);
+ return dst;
+ }
+
+ u8x16_store_unaligned (u8x16_load_unaligned (s), d);
+ u8x16_store_unaligned (u8x16_load_unaligned (s + 16), d + 16);
+
+ if (n <= 48)
+ goto done2;
+
+ if (n <= 64)
+ goto one;
+
+ if (n <= 256 + 32)
+ {
+ nr = round_pow2 (n - 48, 16);
+ goto last;
+ }
+
+ off -= ((u64) d) & 0x0f;
+ nr = round_pow2 (n - off - 16, 16);
+ final_off = (nr & ~(u64) 0xff) + off;
+
+ more:
+ v0 = u8x16_load_unaligned (s + off + 0x00);
+ v1 = u8x16_load_unaligned (s + off + 0x10);
+ v2 = u8x16_load_unaligned (s + off + 0x20);
+ v3 = u8x16_load_unaligned (s + off + 0x30);
+ u8x16_store_unaligned (v0, d + off + 0x00);
+ u8x16_store_unaligned (v1, d + off + 0x10);
+ u8x16_store_unaligned (v2, d + off + 0x20);
+ u8x16_store_unaligned (v3, d + off + 0x30);
+ v0 = u8x16_load_unaligned (s + off + 0x40);
+ v1 = u8x16_load_unaligned (s + off + 0x50);
+ v2 = u8x16_load_unaligned (s + off + 0x60);
+ v3 = u8x16_load_unaligned (s + off + 0x70);
+ u8x16_store_unaligned (v0, d + off + 0x40);
+ u8x16_store_unaligned (v1, d + off + 0x50);
+ u8x16_store_unaligned (v2, d + off + 0x60);
+ u8x16_store_unaligned (v3, d + off + 0x70);
+ v0 = u8x16_load_unaligned (s + off + 0x80);
+ v1 = u8x16_load_unaligned (s + off + 0x90);
+ v2 = u8x16_load_unaligned (s + off + 0xa0);
+ v3 = u8x16_load_unaligned (s + off + 0xb0);
+ u8x16_store_unaligned (v0, d + off + 0x80);
+ u8x16_store_unaligned (v1, d + off + 0x90);
+ u8x16_store_unaligned (v2, d + off + 0xa0);
+ u8x16_store_unaligned (v3, d + off + 0xb0);
+ v0 = u8x16_load_unaligned (s + off + 0xc0);
+ v1 = u8x16_load_unaligned (s + off + 0xd0);
+ v2 = u8x16_load_unaligned (s + off + 0xe0);
+ v3 = u8x16_load_unaligned (s + off + 0xf0);
+ u8x16_store_unaligned (v0, d + off + 0xc0);
+ u8x16_store_unaligned (v1, d + off + 0xd0);
+ u8x16_store_unaligned (v2, d + off + 0xe0);
+ u8x16_store_unaligned (v3, d + off + 0xf0);
+ off += 256;
+ if (off != final_off)
+ goto more;
+
+ if ((nr & 0xff) == 0)
+ goto done2;
+
+ last:
+ if (PREDICT_TRUE (nr & 128))
+ {
+ v0 = u8x16_load_unaligned (s + off + 0x00);
+ v1 = u8x16_load_unaligned (s + off + 0x10);
+ v2 = u8x16_load_unaligned (s + off + 0x20);
+ v3 = u8x16_load_unaligned (s + off + 0x30);
+ u8x16_store_unaligned (v0, d + off + 0x00);
+ u8x16_store_unaligned (v1, d + off + 0x10);
+ u8x16_store_unaligned (v2, d + off + 0x20);
+ u8x16_store_unaligned (v3, d + off + 0x30);
+ v0 = u8x16_load_unaligned (s + off + 0x40);
+ v1 = u8x16_load_unaligned (s + off + 0x50);
+ v2 = u8x16_load_unaligned (s + off + 0x60);
+ v3 = u8x16_load_unaligned (s + off + 0x70);
+ u8x16_store_unaligned (v0, d + off + 0x40);
+ u8x16_store_unaligned (v1, d + off + 0x50);
+ u8x16_store_unaligned (v2, d + off + 0x60);
+ u8x16_store_unaligned (v3, d + off + 0x70);
+ off += 128;
+ }
+ if (PREDICT_TRUE (nr & 64))
+ {
+ v0 = u8x16_load_unaligned (s + off + 0x00);
+ v1 = u8x16_load_unaligned (s + off + 0x10);
+ v2 = u8x16_load_unaligned (s + off + 0x20);
+ v3 = u8x16_load_unaligned (s + off + 0x30);
+ u8x16_store_unaligned (v0, d + off + 0x00);
+ u8x16_store_unaligned (v1, d + off + 0x10);
+ u8x16_store_unaligned (v2, d + off + 0x20);
+ u8x16_store_unaligned (v3, d + off + 0x30);
+ off += 64;
+ }
+ if (PREDICT_TRUE (nr & 32))
+ {
+ v0 = u8x16_load_unaligned (s + off + 0x00);
+ v1 = u8x16_load_unaligned (s + off + 0x10);
+ u8x16_store_unaligned (v0, d + off + 0x00);
+ u8x16_store_unaligned (v1, d + off + 0x10);
+ off += 32;
+ }
+ if (PREDICT_TRUE (nr & 16))
+ {
+ one:
+ u8x16_store_unaligned (u8x16_load_unaligned (s + off), d + off);
+ }
+ done2:
+ u8x16_store_unaligned (u8x16_load_unaligned (s + n - 16), d + n - 16);
+ }
+ return dst;
+#else
+ __builtin_memcpy (dst, src, n);
+ return dst;
+#endif
+}
+
+/* clang-format off */
+WARN_ON (stringop-overflow)
+/* clang-format on */
+
+#endif
+#endif
diff --git a/src/vppinfra/mhash.c b/src/vppinfra/mhash.c
index c556312e64f..f0f1aa470d7 100644
--- a/src/vppinfra/mhash.c
+++ b/src/vppinfra/mhash.c
@@ -295,7 +295,7 @@ mhash_set_mem (mhash_t * h, void *key, uword * new_value, uword * old_value)
{
i = h->key_vector_free_indices[l - 1];
k = vec_elt_at_index (h->key_vector_or_heap, i);
- _vec_len (h->key_vector_free_indices) = l - 1;
+ vec_set_len (h->key_vector_free_indices, l - 1);
}
else
{
@@ -332,10 +332,10 @@ mhash_set_mem (mhash_t * h, void *key, uword * new_value, uword * old_value)
if (key_alloc_from_free_list)
{
h->key_vector_free_indices[l] = i;
- _vec_len (h->key_vector_free_indices) = l + 1;
+ vec_set_len (h->key_vector_free_indices, l + 1);
}
else
- _vec_len (h->key_vector_or_heap) -= h->n_key_bytes;
+ vec_dec_len (h->key_vector_or_heap, h->n_key_bytes);
}
}
diff --git a/src/vppinfra/mpcap.c b/src/vppinfra/mpcap.c
index 8389a7fd2d7..d8e36c29fbd 100644
--- a/src/vppinfra/mpcap.c
+++ b/src/vppinfra/mpcap.c
@@ -13,7 +13,7 @@
* limitations under the License.
*/
-#include <sys/fcntl.h>
+#include <fcntl.h>
#include <vppinfra/mpcap.h>
/*
diff --git a/src/vppinfra/pcap.c b/src/vppinfra/pcap.c
index 4f8b6bb429c..bdaa861db3f 100644
--- a/src/vppinfra/pcap.c
+++ b/src/vppinfra/pcap.c
@@ -37,7 +37,7 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <sys/fcntl.h>
+#include <fcntl.h>
#include <vppinfra/pcap.h>
/**
@@ -137,7 +137,7 @@ pcap_write (pcap_main_t * pm)
while (vec_len (pm->pcap_data) > pm->n_pcap_data_written)
{
- int n = vec_len (pm->pcap_data) - pm->n_pcap_data_written;
+ i64 n = vec_len (pm->pcap_data) - pm->n_pcap_data_written;
n = write (pm->file_descriptor,
vec_elt_at_index (pm->pcap_data, pm->n_pcap_data_written),
diff --git a/src/vppinfra/pcg.h b/src/vppinfra/pcg.h
new file mode 100644
index 00000000000..a7cc9201b8f
--- /dev/null
+++ b/src/vppinfra/pcg.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * PCG Random Number Generation for C.
+ *
+ * Copyright 2014-2019 Melissa O'Neill <oneill@pcg-random.org>,
+ * and the PCG Project contributors.
+ *
+ * SPDX-License-Identifier: (Apache-2.0 OR MIT)
+ *
+ * Licensed under the Apache License, Version 2.0 (provided in
+ * LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
+ * or under the MIT license (provided in LICENSE-MIT.txt and at
+ * http://opensource.org/licenses/MIT), at your option. This file may not
+ * be copied, modified, or distributed except according to those terms.
+ *
+ * Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
+ * express or implied. See your chosen license for details.
+ *
+ * For additional information about the PCG random number generation scheme,
+ * visit http://www.pcg-random.org/.
+ */
+
+/* This implements the pcg64i_random_t PCG specialized generator:
+ * https://www.pcg-random.org/using-pcg-c.html#specialized-generators
+ * This generator produces each 64-bits output exactly once, which is
+ * perfectly suited to generated non-repeating IVs. However, because of this
+ * property the entire internal state is revealed with each output.
+ * It has a 2^64 period and supports 2^63 non-overlaping streams */
+
+#define clib_pcg64i_random_r clib_pcg_setseq_64_rxs_m_xs_64_random_r
+#define clib_pcg64i_srandom_r clib_pcg_setseq_64_srandom_r
+
+typedef struct
+{
+ u64 state;
+ u64 inc;
+} clib_pcg_state_setseq_64_t;
+
+typedef clib_pcg_state_setseq_64_t clib_pcg64i_random_t;
+
+static_always_inline void
+clib_pcg_setseq_64_step_r (clib_pcg_state_setseq_64_t *rng)
+{
+ rng->state = rng->state * 6364136223846793005ULL + rng->inc;
+}
+
+static_always_inline u64
+clib_pcg_output_rxs_m_xs_64_64 (u64 state)
+{
+ u64 word =
+ ((state >> ((state >> 59u) + 5u)) ^ state) * 12605985483714917081ull;
+ return (word >> 43u) ^ word;
+}
+
+static_always_inline u64
+clib_pcg_setseq_64_rxs_m_xs_64_random_r (clib_pcg_state_setseq_64_t *rng)
+{
+ u64 oldstate = rng->state;
+ clib_pcg_setseq_64_step_r (rng);
+ return clib_pcg_output_rxs_m_xs_64_64 (oldstate);
+}
+
+static_always_inline void
+clib_pcg_setseq_64_srandom_r (clib_pcg_state_setseq_64_t *rng, u64 initstate,
+ u64 initseq)
+{
+ rng->state = 0U;
+ rng->inc = (initseq << 1u) | 1u;
+ clib_pcg_setseq_64_step_r (rng);
+ rng->state += initstate;
+ clib_pcg_setseq_64_step_r (rng);
+}
diff --git a/src/vppinfra/perfmon/bundle_core_power.c b/src/vppinfra/perfmon/bundle_core_power.c
new file mode 100644
index 00000000000..6a30cdfdde4
--- /dev/null
+++ b/src/vppinfra/perfmon/bundle_core_power.c
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#ifdef __x86_64__
+
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vppinfra/perfmon/perfmon.h>
+
+static u8 *
+format_perfmon_bundle_core_power (u8 *s, va_list *args)
+{
+ clib_perfmon_ctx_t __clib_unused *ctx = va_arg (*args, clib_perfmon_ctx_t *);
+ clib_perfmon_capture_t *c = va_arg (*args, clib_perfmon_capture_t *);
+ u32 col = va_arg (*args, int);
+ u64 *d = c->data;
+
+ switch (col)
+ {
+ case 0:
+ return format (s, "%7.1f %%", (f64) 100 * d[1] / d[0]);
+ case 1:
+ return format (s, "%7.1f %%", (f64) 100 * d[2] / d[0]);
+ case 2:
+ return format (s, "%7.1f %%", (f64) 100 * d[3] / d[0]);
+ default:
+ return s;
+ }
+}
+
+#define PERF_INTEL_CODE(event, umask) ((event) | (umask) << 8)
+
+CLIB_PERFMON_BUNDLE (core_power) = {
+ .name = "core-power",
+ .desc =
+ "Core cycles where the core was running under specific turbo schedule.",
+ .type = PERF_TYPE_RAW,
+ .config[0] = PERF_INTEL_CODE (0x3c, 0x00),
+ .config[1] = PERF_INTEL_CODE (0x28, 0x07),
+ .config[2] = PERF_INTEL_CODE (0x28, 0x18),
+ .config[3] = PERF_INTEL_CODE (0x28, 0x20),
+ .n_events = 4,
+ .format_fn = format_perfmon_bundle_core_power,
+ .column_headers = CLIB_STRING_ARRAY ("Level 0", "Level 1", "Level 2"),
+};
+
+#endif
diff --git a/src/vppinfra/perfmon/bundle_default.c b/src/vppinfra/perfmon/bundle_default.c
new file mode 100644
index 00000000000..c2118aed974
--- /dev/null
+++ b/src/vppinfra/perfmon/bundle_default.c
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vppinfra/perfmon/perfmon.h>
+
+static u8 *
+format_perfmon_bundle_default (u8 *s, va_list *args)
+{
+ clib_perfmon_ctx_t *ctx = va_arg (*args, clib_perfmon_ctx_t *);
+ clib_perfmon_capture_t *c = va_arg (*args, clib_perfmon_capture_t *);
+ u32 col = va_arg (*args, int);
+ u64 *d = c->data;
+
+ switch (col)
+ {
+ case 0:
+ if (ctx->ref_clock > 0)
+ return format (s, "%8.1f", (f64) d[0] / d[1] * (ctx->ref_clock / 1e9));
+ else
+ return s;
+ case 1:
+ return format (s, "%5.2f", (f64) d[2] / d[0]);
+ case 2:
+ return format (s, "%8u", d[0]);
+ case 3:
+ return format (s, "%8.2f", (f64) d[0] / c->n_ops);
+ case 4:
+ return format (s, "%8u", d[2]);
+ case 5:
+ return format (s, "%8.2f", (f64) d[2] / c->n_ops);
+ case 6:
+ return format (s, "%9u", d[3]);
+ case 7:
+ return format (s, "%9.2f", (f64) d[3] / c->n_ops);
+ case 8:
+ return format (s, "%10u", d[4]);
+ case 9:
+ return format (s, "%10.2f", (f64) d[4] / c->n_ops);
+ default:
+ return s;
+ }
+}
+
+CLIB_PERFMON_BUNDLE (default) = {
+ .name = "default",
+ .desc = "IPC, Clocks/Operatiom, Instr/Operation, Branch Total & Miss",
+ .type = PERF_TYPE_HARDWARE,
+ .config[0] = PERF_COUNT_HW_CPU_CYCLES,
+ .config[1] = PERF_COUNT_HW_REF_CPU_CYCLES,
+ .config[2] = PERF_COUNT_HW_INSTRUCTIONS,
+ .config[3] = PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
+ .config[4] = PERF_COUNT_HW_BRANCH_MISSES,
+ .n_events = 5,
+ .format_fn = format_perfmon_bundle_default,
+ .column_headers = CLIB_STRING_ARRAY ("Freq", "IPC", "Clks", "Clks/Op",
+ "Inst", "Inst/Op", "Brnch", "Brnch/Op",
+ "BrMiss", "BrMiss/Op"),
+};
diff --git a/src/vppinfra/perfmon/perfmon.c b/src/vppinfra/perfmon/perfmon.c
new file mode 100644
index 00000000000..f44f225a86c
--- /dev/null
+++ b/src/vppinfra/perfmon/perfmon.c
@@ -0,0 +1,230 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vppinfra/perfmon/perfmon.h>
+#include <vppinfra/format_table.h>
+
+clib_perfmon_main_t clib_perfmon_main;
+
+__clib_export clib_error_t *
+clib_perfmon_init_by_bundle_name (clib_perfmon_ctx_t *ctx, char *fmt, ...)
+{
+ clib_perfmon_main_t *pm = &clib_perfmon_main;
+ clib_perfmon_bundle_t *b = 0;
+ int group_fd = -1;
+ clib_error_t *err = 0;
+ va_list va;
+ char *bundle_name;
+
+ struct perf_event_attr pe = {
+ .size = sizeof (struct perf_event_attr),
+ .disabled = 1,
+ .exclude_kernel = 1,
+ .exclude_hv = 1,
+ .pinned = 1,
+ .exclusive = 1,
+ .read_format = (PERF_FORMAT_GROUP | PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING),
+ };
+
+ va_start (va, fmt);
+ bundle_name = (char *) va_format (0, fmt, &va);
+ va_end (va);
+ vec_add1 (bundle_name, 0);
+
+ for (clib_perfmon_bundle_reg_t *r = pm->bundle_regs; r; r = r->next)
+ {
+ if (strncmp (r->bundle->name, bundle_name, vec_len (bundle_name) - 1))
+ continue;
+ b = r->bundle;
+ break;
+ }
+
+ if (b == 0)
+ {
+ err = clib_error_return (0, "Unknown bundle '%s'", bundle_name);
+ goto done;
+ }
+
+ clib_memset_u8 (ctx, 0, sizeof (clib_perfmon_ctx_t));
+ vec_validate_init_empty (ctx->fds, b->n_events - 1, -1);
+ ctx->bundle = b;
+
+ for (int i = 0; i < b->n_events; i++)
+ {
+ pe.config = b->config[i];
+ pe.type = b->type;
+ int fd = syscall (__NR_perf_event_open, &pe, /* pid */ 0, /* cpu */ -1,
+ /* group_fd */ group_fd, /* flags */ 0);
+ if (fd < 0)
+ {
+ err = clib_error_return_unix (0, "perf_event_open[%u]", i);
+ goto done;
+ }
+
+ if (ctx->debug)
+ fformat (stderr, "perf event %u open, fd %d\n", i, fd);
+
+ if (group_fd == -1)
+ {
+ group_fd = fd;
+ pe.pinned = 0;
+ pe.exclusive = 0;
+ }
+
+ ctx->fds[i] = fd;
+ }
+
+ ctx->group_fd = group_fd;
+ ctx->data = vec_new (u64, 3 + b->n_events);
+ ctx->ref_clock = os_cpu_clock_frequency ();
+ vec_validate (ctx->capture_groups, 0);
+
+done:
+ if (err)
+ clib_perfmon_free (ctx);
+
+ vec_free (bundle_name);
+ return err;
+}
+
+__clib_export void
+clib_perfmon_free (clib_perfmon_ctx_t *ctx)
+{
+ clib_perfmon_clear (ctx);
+ vec_free (ctx->captures);
+ vec_free (ctx->capture_groups);
+
+ for (int i = 0; i < vec_len (ctx->fds); i++)
+ if (ctx->fds[i] > -1)
+ close (ctx->fds[i]);
+ vec_free (ctx->fds);
+ vec_free (ctx->data);
+}
+
+__clib_export void
+clib_perfmon_clear (clib_perfmon_ctx_t *ctx)
+{
+ for (int i = 0; i < vec_len (ctx->captures); i++)
+ vec_free (ctx->captures[i].desc);
+ vec_reset_length (ctx->captures);
+ for (int i = 0; i < vec_len (ctx->capture_groups); i++)
+ vec_free (ctx->capture_groups[i].name);
+ vec_reset_length (ctx->capture_groups);
+}
+
+__clib_export u64 *
+clib_perfmon_capture (clib_perfmon_ctx_t *ctx, u32 n_ops, char *fmt, ...)
+{
+ u32 read_size = (ctx->bundle->n_events + 3) * sizeof (u64);
+ clib_perfmon_capture_t *c;
+ u64 d[CLIB_PERFMON_MAX_EVENTS + 3];
+ va_list va;
+
+ if ((read (ctx->group_fd, d, read_size) != read_size))
+ {
+ if (ctx->debug)
+ fformat (stderr, "reading of %u bytes failed, %s (%d)\n", read_size,
+ strerror (errno), errno);
+ return 0;
+ }
+
+ if (ctx->debug)
+ {
+ fformat (stderr, "read events: %lu enabled: %lu running: %lu ", d[0],
+ d[1], d[2]);
+ fformat (stderr, "data: [%lu", d[3]);
+ for (int i = 1; i < ctx->bundle->n_events; i++)
+ fformat (stderr, ", %lu", d[i + 3]);
+ fformat (stderr, "]\n");
+ }
+
+ vec_add2 (ctx->captures, c, 1);
+
+ va_start (va, fmt);
+ c->desc = va_format (0, fmt, &va);
+ va_end (va);
+
+ c->n_ops = n_ops;
+ c->group = vec_len (ctx->capture_groups) - 1;
+ c->time_enabled = d[1];
+ c->time_running = d[2];
+ for (int i = 0; i < CLIB_PERFMON_MAX_EVENTS; i++)
+ c->data[i] = d[i + 3];
+
+ return ctx->data + vec_len (ctx->data) - ctx->bundle->n_events;
+}
+
+__clib_export void
+clib_perfmon_capture_group (clib_perfmon_ctx_t *ctx, char *fmt, ...)
+{
+ clib_perfmon_capture_group_t *cg;
+ va_list va;
+
+ cg = vec_end (ctx->capture_groups) - 1;
+
+ if (cg->name != 0)
+ vec_add2 (ctx->capture_groups, cg, 1);
+
+ va_start (va, fmt);
+ cg->name = va_format (0, fmt, &va);
+ va_end (va);
+ ASSERT (cg->name);
+}
+
+__clib_export void
+clib_perfmon_warmup (clib_perfmon_ctx_t *ctx)
+{
+ for (u64 i = 0; i < (u64) ctx->ref_clock; i++)
+ asm volatile("" : : "r"(i * i) : "memory");
+}
+
+__clib_export u8 *
+format_perfmon_bundle (u8 *s, va_list *args)
+{
+ clib_perfmon_ctx_t *ctx = va_arg (*args, clib_perfmon_ctx_t *);
+ clib_perfmon_capture_t *c;
+ clib_perfmon_capture_group_t *cg = 0;
+ char **hdr = ctx->bundle->column_headers;
+ table_t _t = {}, *t = &_t;
+ u32 n_row = 0, col = 0;
+
+ table_add_header_row (t, 0);
+
+ for (char **h = ctx->bundle->column_headers; h[0]; h++)
+ n_row++;
+
+ vec_foreach (c, ctx->captures)
+ {
+ if (cg != ctx->capture_groups + c->group)
+ {
+ cg = ctx->capture_groups + c->group;
+ table_format_cell (t, col, -1, "%v", cg->name);
+ table_set_cell_align (t, col, -1, TTAA_LEFT);
+ table_set_cell_fg_color (t, col, -1, TTAC_BRIGHT_RED);
+
+ table_format_cell (t, col, 0, "Ops");
+ table_set_cell_fg_color (t, col, 0, TTAC_BRIGHT_YELLOW);
+
+ for (int i = 0; i < n_row; i++)
+ {
+ table_format_cell (t, col, i + 1, "%s", hdr[i]);
+ table_set_cell_fg_color (t, col, i + 1, TTAC_BRIGHT_YELLOW);
+ }
+ col++;
+ }
+ table_format_cell (t, col, -1, "%v", c->desc);
+ table_format_cell (t, col, 0, "%7u", c->n_ops);
+ for (int i = 0; i < n_row; i++)
+ table_format_cell (t, col, i + 1, "%U", ctx->bundle->format_fn, ctx, c,
+ i);
+ col++;
+ }
+
+ s = format (s, "%U", format_table, t);
+ table_free (t);
+ return s;
+}
diff --git a/src/vppinfra/perfmon/perfmon.h b/src/vppinfra/perfmon/perfmon.h
new file mode 100644
index 00000000000..5b904a632e3
--- /dev/null
+++ b/src/vppinfra/perfmon/perfmon.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
+ */
+
+#ifndef included_perfmon_perfmon_h
+#define included_perfmon_perfmon_h
+
+#include <vppinfra/cpu.h>
+#ifdef __linux__
+#include <sys/ioctl.h>
+#include <linux/perf_event.h>
+#endif
+
+#define CLIB_PERFMON_MAX_EVENTS 7
+typedef struct
+{
+ char *name;
+ char *desc;
+ u64 config[CLIB_PERFMON_MAX_EVENTS];
+ u32 type;
+ u8 n_events;
+ format_function_t *format_fn;
+ char **column_headers;
+} clib_perfmon_bundle_t;
+
+typedef struct
+{
+ u64 time_enabled;
+ u64 time_running;
+ u64 data[CLIB_PERFMON_MAX_EVENTS];
+ u8 *desc;
+ u32 n_ops;
+ u32 group;
+} clib_perfmon_capture_t;
+
+typedef struct
+{
+ u8 *name;
+ u32 start;
+} clib_perfmon_capture_group_t;
+
+typedef struct
+{
+ int group_fd;
+ int *fds;
+ clib_perfmon_bundle_t *bundle;
+ u64 *data;
+ u8 debug : 1;
+ u32 n_captures;
+ clib_perfmon_capture_t *captures;
+ clib_perfmon_capture_group_t *capture_groups;
+ f64 ref_clock;
+} clib_perfmon_ctx_t;
+
+typedef struct clib_perfmon_bundle_reg
+{
+ clib_perfmon_bundle_t *bundle;
+ struct clib_perfmon_bundle_reg *next;
+} clib_perfmon_bundle_reg_t;
+
+typedef struct
+{
+ clib_perfmon_bundle_reg_t *bundle_regs;
+} clib_perfmon_main_t;
+
+extern clib_perfmon_main_t clib_perfmon_main;
+
+static_always_inline void
+clib_perfmon_ioctl (int fd, u32 req)
+{
+#ifdef __linux__
+#ifdef __x86_64__
+ asm volatile("syscall"
+ :
+ : "D"(fd), "S"(req), "a"(__NR_ioctl), "d"(PERF_IOC_FLAG_GROUP)
+ : "rcx", "r11" /* registers modified by kernel */);
+#else
+ ioctl (fd, req, PERF_IOC_FLAG_GROUP);
+#endif
+#endif /* linux */
+}
+
+clib_error_t *clib_perfmon_init_by_bundle_name (clib_perfmon_ctx_t *ctx,
+ char *fmt, ...);
+void clib_perfmon_free (clib_perfmon_ctx_t *ctx);
+void clib_perfmon_warmup (clib_perfmon_ctx_t *ctx);
+void clib_perfmon_clear (clib_perfmon_ctx_t *ctx);
+u64 *clib_perfmon_capture (clib_perfmon_ctx_t *ctx, u32 n_ops, char *fmt, ...);
+void clib_perfmon_capture_group (clib_perfmon_ctx_t *ctx, char *fmt, ...);
+format_function_t format_perfmon_bundle;
+
+#ifdef __linux__
+static_always_inline void
+clib_perfmon_reset (clib_perfmon_ctx_t *ctx)
+{
+ clib_perfmon_ioctl (ctx->group_fd, PERF_EVENT_IOC_RESET);
+}
+static_always_inline void
+clib_perfmon_enable (clib_perfmon_ctx_t *ctx)
+{
+ clib_perfmon_ioctl (ctx->group_fd, PERF_EVENT_IOC_ENABLE);
+}
+static_always_inline void
+clib_perfmon_disable (clib_perfmon_ctx_t *ctx)
+{
+ clib_perfmon_ioctl (ctx->group_fd, PERF_EVENT_IOC_DISABLE);
+}
+#elif __FreeBSD__
+static_always_inline void
+clib_perfmon_reset (clib_perfmon_ctx_t *ctx)
+{
+ /* TODO: Implement for FreeBSD */
+}
+static_always_inline void
+clib_perfmon_enable (clib_perfmon_ctx_t *ctx)
+{
+ /* TODO: Implement for FreeBSD */
+}
+static_always_inline void
+clib_perfmon_disable (clib_perfmon_ctx_t *ctx)
+{
+ /* TODO: Implement for FreeBSD */
+}
+#endif /* linux */
+
+#define CLIB_PERFMON_BUNDLE(x) \
+ static clib_perfmon_bundle_reg_t clib_perfmon_bundle_reg_##x; \
+ static clib_perfmon_bundle_t clib_perfmon_bundle_##x; \
+ static void __clib_constructor clib_perfmon_bundle_reg_fn_##x (void) \
+ { \
+ clib_perfmon_bundle_reg_##x.bundle = &clib_perfmon_bundle_##x; \
+ clib_perfmon_bundle_reg_##x.next = clib_perfmon_main.bundle_regs; \
+ clib_perfmon_main.bundle_regs = &clib_perfmon_bundle_reg_##x; \
+ } \
+ static clib_perfmon_bundle_t clib_perfmon_bundle_##x
+
+#endif
diff --git a/src/vppinfra/pmalloc.c b/src/vppinfra/pmalloc.c
index a0b1d1f1104..85b9db9d56c 100644
--- a/src/vppinfra/pmalloc.c
+++ b/src/vppinfra/pmalloc.c
@@ -17,12 +17,17 @@
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
+#ifdef __FreeBSD__
+#include <sys/memrange.h>
+#endif /* __FreeBSD__ */
#include <fcntl.h>
#include <unistd.h>
#include <sched.h>
#include <vppinfra/format.h>
+#ifdef __linux__
#include <vppinfra/linux/sysfs.h>
+#endif
#include <vppinfra/mem.h>
#include <vppinfra/hash.h>
#include <vppinfra/pmalloc.h>
@@ -182,8 +187,9 @@ next_chunk:
}
static void
-pmalloc_update_lookup_table (clib_pmalloc_main_t * pm, u32 first, u32 count)
+pmalloc_update_lookup_table (clib_pmalloc_main_t *pm, u32 first, u32 count)
{
+#ifdef __linux
uword seek, va, pa, p;
int fd;
u32 elts_per_page = 1U << (pm->def_log2_page_sz - pm->lookup_log2_page_sz);
@@ -221,6 +227,45 @@ pmalloc_update_lookup_table (clib_pmalloc_main_t * pm, u32 first, u32 count)
if (fd != -1)
close (fd);
+#elif defined(__FreeBSD__)
+ struct mem_extract meme;
+ uword p;
+ int fd;
+ u32 elts_per_page = 1U << (pm->def_log2_page_sz - pm->lookup_log2_page_sz);
+
+ vec_validate_aligned (pm->lookup_table,
+ vec_len (pm->pages) * elts_per_page - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ p = (uword) first * elts_per_page;
+ if (pm->flags & CLIB_PMALLOC_F_NO_PAGEMAP)
+ {
+ while (p < (uword) elts_per_page * count)
+ {
+ pm->lookup_table[p] =
+ pointer_to_uword (pm->base) + (p << pm->lookup_log2_page_sz);
+ p++;
+ }
+ return;
+ }
+
+ fd = open ((char *) "/dev/mem", O_RDONLY);
+ if (fd == -1)
+ return;
+
+ while (p < (uword) elts_per_page * count)
+ {
+ meme.me_vaddr =
+ pointer_to_uword (pm->base) + (p << pm->lookup_log2_page_sz);
+ if (ioctl (fd, MEM_EXTRACT_PADDR, &meme) == -1)
+ continue;
+ pm->lookup_table[p] = meme.me_vaddr - meme.me_paddr;
+ p++;
+ }
+ return;
+#else
+#error "Unsupported OS"
+#endif
}
static inline clib_pmalloc_page_t *
@@ -241,6 +286,7 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
return 0;
}
+#ifdef __linux__
if (a->log2_subpage_sz != clib_mem_get_log2_page_size ())
{
pm->error = clib_sysfs_prealloc_hugepages (numa_node,
@@ -249,6 +295,7 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
if (pm->error)
return 0;
}
+#endif /* __linux__ */
rv = clib_mem_set_numa_affinity (numa_node, /* force */ 1);
if (rv == CLIB_MEM_ERROR && numa_node != 0)
@@ -271,8 +318,10 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
}
else
{
+#ifdef __linux__
if (a->log2_subpage_sz != clib_mem_get_log2_page_size ())
mmap_flags |= MAP_HUGETLB;
+#endif /* __linux__ */
mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
a->fd = -1;
@@ -476,8 +525,8 @@ clib_pmalloc_alloc_aligned_on_numa (clib_pmalloc_main_t * pm, uword size,
return clib_pmalloc_alloc_inline (pm, 0, size, align, numa_node);
}
-void *
-clib_pmalloc_alloc_aligned (clib_pmalloc_main_t * pm, uword size, uword align)
+__clib_export void *
+clib_pmalloc_alloc_aligned (clib_pmalloc_main_t *pm, uword size, uword align)
{
return clib_pmalloc_alloc_inline (pm, 0, size, align,
CLIB_PMALLOC_NUMA_LOCAL);
@@ -627,7 +676,6 @@ format_pmalloc (u8 * s, va_list * va)
format_clib_error, pm->error);
- /* *INDENT-OFF* */
pool_foreach (a, pm->arenas)
{
u32 *page_index;
@@ -645,7 +693,6 @@ format_pmalloc (u8 * s, va_list * va)
format_pmalloc_page, pp, verbose);
}
}
- /* *INDENT-ON* */
return s;
}
diff --git a/src/vppinfra/pool.c b/src/vppinfra/pool.c
index 78361b5457e..1f3b96f0f0a 100644
--- a/src/vppinfra/pool.c
+++ b/src/vppinfra/pool.c
@@ -38,93 +38,39 @@
#include <vppinfra/pool.h>
__clib_export void
-_pool_init_fixed (void **pool_ptr, u32 elt_size, u32 max_elts)
+_pool_init_fixed (void **pool_ptr, uword elt_size, uword max_elts, uword align)
{
- u8 *mmap_base;
- u64 vector_size;
- u64 free_index_size;
- u64 total_size;
- u64 page_size;
- pool_header_t *fh;
- vec_header_t *vh;
+ uword *b;
+ pool_header_t *ph;
u8 *v;
- u32 *fi;
u32 i;
- u32 set_bits;
+ vec_attr_t va = { .elt_sz = elt_size,
+ .align = align,
+ .hdr_sz = sizeof (pool_header_t) };
ASSERT (elt_size);
ASSERT (max_elts);
- vector_size = pool_aligned_header_bytes + (u64) elt_size *max_elts;
- free_index_size = vec_header_bytes (0) + sizeof (u32) * max_elts;
+ v = _vec_alloc_internal (max_elts, &va);
- /* Round up to a cache line boundary */
- vector_size = (vector_size + CLIB_CACHE_LINE_BYTES - 1)
- & ~(CLIB_CACHE_LINE_BYTES - 1);
-
- free_index_size = (free_index_size + CLIB_CACHE_LINE_BYTES - 1)
- & ~(CLIB_CACHE_LINE_BYTES - 1);
-
- total_size = vector_size + free_index_size;
-
- /* Round up to an even number of pages */
- page_size = clib_mem_get_page_size ();
- total_size = (total_size + page_size - 1) & ~(page_size - 1);
-
- /* mmap demand zero memory */
-
- mmap_base = mmap (0, total_size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-
- if (mmap_base == MAP_FAILED)
- {
- clib_unix_warning ("mmap");
- *pool_ptr = 0;
- }
-
- /* First comes the pool header */
- fh = (pool_header_t *) mmap_base;
- /* Find the user vector pointer */
- v = (u8 *) (mmap_base + pool_aligned_header_bytes);
- /* Finally, the vector header */
- vh = _vec_find (v);
-
- fh->free_bitmap = 0; /* No free elts (yet) */
- fh->max_elts = max_elts;
- fh->mmap_base = mmap_base;
- fh->mmap_size = total_size;
-
- vh->len = max_elts;
+ ph = pool_header (v);
+ ph->max_elts = max_elts;
/* Build the free-index vector */
- vh = (vec_header_t *) (v + vector_size);
- vh->len = max_elts;
- fi = (u32 *) (vh + 1);
-
- fh->free_indices = fi;
+ vec_validate_aligned (ph->free_indices, max_elts - 1, CLIB_CACHE_LINE_BYTES);
+ for (i = 0; i < max_elts; i++)
+ ph->free_indices[i] = (max_elts - 1) - i;
/* Set the entire free bitmap */
- clib_bitmap_alloc (fh->free_bitmap, max_elts);
- clib_memset (fh->free_bitmap, 0xff,
- vec_len (fh->free_bitmap) * sizeof (uword));
-
- /* Clear any extraneous set bits */
- set_bits = vec_len (fh->free_bitmap) * BITS (uword);
+ clib_bitmap_alloc (ph->free_bitmap, max_elts);
- for (i = max_elts; i < set_bits; i++)
- fh->free_bitmap = clib_bitmap_set (fh->free_bitmap, i, 0);
+ for (b = ph->free_bitmap, i = max_elts; i >= uword_bits;
+ i -= uword_bits, b++)
+ b[0] = ~0ULL;
- /* Create the initial free vector */
- for (i = 0; i < max_elts; i++)
- fi[i] = (max_elts - 1) - i;
+ if (i)
+ b[0] = pow2_mask (i);
*pool_ptr = v;
}
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/pool.h b/src/vppinfra/pool.h
index 6f16e617cd2..07c9269c6d8 100644
--- a/src/vppinfra/pool.h
+++ b/src/vppinfra/pool.h
@@ -61,31 +61,21 @@ typedef struct
/** Maximum size of the pool, in elements */
u32 max_elts;
- /** mmap segment info: base + length */
- u8 *mmap_base;
- u64 mmap_size;
-
} pool_header_t;
-/** Align pool header so that pointers are naturally aligned. */
-#define pool_aligned_header_bytes \
- vec_aligned_header_bytes (sizeof (pool_header_t), sizeof (void *))
-
/** Get pool header from user pool pointer */
always_inline pool_header_t *
pool_header (void *v)
{
- return vec_aligned_header (v, sizeof (pool_header_t), sizeof (void *));
+ return vec_header (v);
}
-extern void _pool_init_fixed (void **, u32, u32);
-extern void fpool_free (void *);
+void _pool_init_fixed (void **pool_ptr, uword elt_sz, uword max_elts,
+ uword align);
/** initialize a fixed-size, preallocated pool */
-#define pool_init_fixed(pool,max_elts) \
-{ \
- _pool_init_fixed((void **)&(pool),sizeof(pool[0]),max_elts); \
-}
+#define pool_init_fixed(P, E) \
+ _pool_init_fixed ((void **) &(P), _vec_elt_sz (P), E, _vec_align (P, 0));
/** Validate a pool */
always_inline void
@@ -103,23 +93,6 @@ pool_validate (void *v)
ASSERT (clib_bitmap_get (p->free_bitmap, p->free_indices[i]) == 1);
}
-always_inline void
-pool_header_validate_index (void *v, uword index)
-{
- pool_header_t *p = pool_header (v);
-
- if (v)
- vec_validate (p->free_bitmap, index / BITS (uword));
-}
-
-#define pool_validate_index(v,i) \
-do { \
- uword __pool_validate_index = (i); \
- vec_validate_ha ((v), __pool_validate_index, \
- pool_aligned_header_bytes, /* align */ 0); \
- pool_header_validate_index ((v), __pool_validate_index); \
-} while (0)
-
/** Number of active elements in a pool.
* @return Number of active elements in a pool
*/
@@ -162,88 +135,87 @@ pool_header_bytes (void *v)
/** Local variable naming macro. */
#define _pool_var(v) _pool_##v
-/** Queries whether pool has at least N_FREE free elements. */
-always_inline uword
-pool_free_elts (void *v)
+/** Number of elements that can fit into pool with current allocation */
+#define pool_max_len(P) vec_max_len (P)
+
+/** Number of free elements in pool */
+static_always_inline uword
+_pool_free_elts (void *p, uword elt_sz)
{
- pool_header_t *p = pool_header (v);
- uword n_free = 0;
+ pool_header_t *ph;
+ uword n_free;
- if (v)
- {
- n_free += vec_len (p->free_indices);
-
- /*
- * Space left at end of vector?
- * Fixed-size pools have max_elts set non-zero,
- */
- if (p->max_elts == 0)
- n_free += vec_capacity (v, sizeof (p[0])) - vec_len (v);
- }
+ if (p == 0)
+ return 0;
+
+ ph = pool_header (p);
+
+ n_free = vec_len (ph->free_indices);
+
+ /* Fixed-size pools have max_elts set non-zero */
+ if (ph->max_elts == 0)
+ n_free += _vec_max_len (p, elt_sz) - vec_len (p);
return n_free;
}
+#define pool_free_elts(P) _pool_free_elts ((void *) (P), _vec_elt_sz (P))
+
/** Allocate an object E from a pool P (general version).
First search free list. If nothing is free extend vector of objects.
*/
-#define _pool_get_aligned_internal_numa(P,E,A,Z,N) \
-do { \
- pool_header_t * _pool_var (p) = pool_header (P); \
- uword _pool_var (l); \
- \
- STATIC_ASSERT(A==0 || ((A % sizeof(P[0]))==0) \
- || ((sizeof(P[0]) % A) == 0), \
- "Pool aligned alloc of incorrectly sized object"); \
- _pool_var (l) = 0; \
- if (P) \
- _pool_var (l) = vec_len (_pool_var (p)->free_indices); \
- \
- if (_pool_var (l) > 0) \
- { \
- /* Return free element from free list. */ \
- uword _pool_var (i) = \
- _pool_var (p)->free_indices[_pool_var (l) - 1]; \
- (E) = (P) + _pool_var (i); \
- _pool_var (p)->free_bitmap = \
- clib_bitmap_andnoti_notrim (_pool_var (p)->free_bitmap, \
- _pool_var (i)); \
- _vec_len (_pool_var (p)->free_indices) = _pool_var (l) - 1; \
- CLIB_MEM_UNPOISON((E), sizeof((E)[0])); \
- } \
- else \
- { \
- /* fixed-size, preallocated pools cannot expand */ \
- if ((P) && _pool_var(p)->max_elts) \
- { \
- clib_warning ("can't expand fixed-size pool"); \
- os_out_of_memory(); \
- } \
- /* Nothing on free list, make a new element and return it. */ \
- P = _vec_resize_numa (P, \
- /* length_increment */ 1, \
- /* new size */ (vec_len (P) + 1) * sizeof (P[0]), \
- pool_aligned_header_bytes, \
- /* align */ (A), \
- /* numa */ (N)); \
- E = vec_end (P) - 1; \
- } \
- if (Z) \
- memset(E, 0, sizeof(*E)); \
-} while (0)
-#define pool_get_aligned_zero_numa(P,E,A,Z,S) \
- _pool_get_aligned_internal_numa(P,E,A,Z,S)
+static_always_inline void
+_pool_get (void **pp, void **ep, uword align, int zero, uword elt_sz)
+{
+ uword len = 0;
+ void *p = pp[0];
+ void *e;
+ vec_attr_t va = { .hdr_sz = sizeof (pool_header_t),
+ .elt_sz = elt_sz,
+ .align = align };
+
+ if (p)
+ {
+ pool_header_t *ph = pool_header (p);
+ uword n_free = vec_len (ph->free_indices);
+
+ if (n_free)
+ {
+ uword index = ph->free_indices[n_free - 1];
+ e = p + index * elt_sz;
+ ph->free_bitmap =
+ clib_bitmap_andnoti_notrim (ph->free_bitmap, index);
+ vec_set_len (ph->free_indices, n_free - 1);
+ clib_mem_unpoison (e, elt_sz);
+ goto done;
+ }
+
+ if (ph->max_elts)
+ {
+ clib_warning ("can't expand fixed-size pool");
+ os_out_of_memory ();
+ }
+ }
-#define pool_get_aligned_numa(P,E,A,S) \
- _pool_get_aligned_internal_numa(P,E,A,0/*zero*/,S)
+ len = vec_len (p);
-#define pool_get_numa(P,E,S) \
- _pool_get_aligned_internal_numa(P,E,0/*align*/,0/*zero*/,S)
+ /* Nothing on free list, make a new element and return it. */
+ p = _vec_realloc_internal (p, len + 1, &va);
+ e = p + len * elt_sz;
-#define _pool_get_aligned_internal(P,E,A,Z) \
- _pool_get_aligned_internal_numa(P,E,A,Z,VEC_NUMA_UNSPECIFIED)
+ _vec_update_pointer (pp, p);
+
+done:
+ ep[0] = e;
+ if (zero)
+ clib_memset_u8 (e, 0, elt_sz);
+}
+
+#define _pool_get_aligned_internal(P, E, A, Z) \
+ _pool_get ((void **) &(P), (void **) &(E), _vec_align (P, A), Z, \
+ _vec_elt_sz (P))
/** Allocate an object E from a pool P with alignment A */
#define pool_get_aligned(P,E,A) _pool_get_aligned_internal(P,E,A,0)
@@ -257,114 +229,162 @@ do { \
/** Allocate an object E from a pool P and zero it */
#define pool_get_zero(P,E) pool_get_aligned_zero(P,E,0)
-/** See if pool_get will expand the pool or not */
-#define pool_get_aligned_will_expand(P,YESNO,A) \
-do { \
- pool_header_t * _pool_var (p) = pool_header (P); \
- uword _pool_var (l); \
- \
- _pool_var (l) = 0; \
- if (P) \
- { \
- if (_pool_var (p)->max_elts) \
- _pool_var (l) = _pool_var (p)->max_elts; \
- else \
- _pool_var (l) = vec_len (_pool_var (p)->free_indices); \
- } \
- \
- /* Free elements, certainly won't expand */ \
- if (_pool_var (l) > 0) \
- YESNO=0; \
- else \
- { \
- /* Nothing on free list, make a new element and return it. */ \
- YESNO = _vec_resize_will_expand \
- (P, \
- /* length_increment */ 1, \
- /* new size */ (vec_len (P) + 1) * sizeof (P[0]), \
- pool_aligned_header_bytes, \
- /* align */ (A)); \
- } \
-} while (0)
+always_inline int
+_pool_get_will_expand (void *p, uword elt_sz)
+{
+ pool_header_t *ph;
+ uword len;
+
+ if (p == 0)
+ return 1;
+
+ ph = pool_header (p);
+
+ if (ph->max_elts)
+ len = ph->max_elts;
+ else
+ len = vec_len (ph->free_indices);
+
+ /* Free elements, certainly won't expand */
+ if (len > 0)
+ return 0;
+
+ return _vec_resize_will_expand (p, 1, elt_sz);
+}
+
+#define pool_get_will_expand(P) _pool_get_will_expand (P, sizeof ((P)[0]))
+
+always_inline int
+_pool_put_will_expand (void *p, uword index, uword elt_sz)
+{
+ pool_header_t *ph = pool_header (p);
+
+ if (clib_bitmap_will_expand (ph->free_bitmap, index))
+ return 1;
-/** Tell the caller if pool get will expand the pool */
-#define pool_get_will_expand(P,YESNO) pool_get_aligned_will_expand(P,YESNO,0)
+ if (vec_resize_will_expand (ph->free_indices, 1))
+ return 1;
+
+ return 0;
+}
+
+#define pool_put_will_expand(P, E) \
+ _pool_put_will_expand (P, (E) - (P), sizeof ((P)[0]))
/** Use free bitmap to query whether given element is free. */
-#define pool_is_free(P,E) \
-({ \
- pool_header_t * _pool_var (p) = pool_header (P); \
- uword _pool_var (i) = (E) - (P); \
- (_pool_var (i) < vec_len (P)) ? clib_bitmap_get (_pool_var (p)->free_bitmap, _pool_i) : 1; \
-})
+static_always_inline int
+pool_is_free_index (void *p, uword index)
+{
+ pool_header_t *ph = pool_header (p);
+ return index < vec_len (p) ? clib_bitmap_get (ph->free_bitmap, index) : 1;
+}
-/** Use free bitmap to query whether given index is free */
-#define pool_is_free_index(P,I) pool_is_free((P),(P)+(I))
+#define pool_is_free(P, E) pool_is_free_index ((void *) (P), (E) - (P))
/** Free an object E in pool P. */
-#define pool_put(P, E) \
- do \
- { \
- typeof (P) _pool_var (p__) = (P); \
- typeof (E) _pool_var (e__) = (E); \
- pool_header_t *_pool_var (p) = pool_header (_pool_var (p__)); \
- uword _pool_var (l) = _pool_var (e__) - _pool_var (p__); \
- if (_pool_var (p)->max_elts == 0) \
- ASSERT (vec_is_member (_pool_var (p__), _pool_var (e__))); \
- ASSERT (!pool_is_free (_pool_var (p__), _pool_var (e__))); \
- \
- /* Add element to free bitmap and to free list. */ \
- _pool_var (p)->free_bitmap = \
- clib_bitmap_ori_notrim (_pool_var (p)->free_bitmap, _pool_var (l)); \
- \
- /* Preallocated pool? */ \
- if (_pool_var (p)->max_elts) \
- { \
- ASSERT (_pool_var (l) < _pool_var (p)->max_elts); \
- _pool_var (p) \
- ->free_indices[_vec_len (_pool_var (p)->free_indices)] = \
- _pool_var (l); \
- _vec_len (_pool_var (p)->free_indices) += 1; \
- } \
- else \
- vec_add1 (_pool_var (p)->free_indices, _pool_var (l)); \
- \
- CLIB_MEM_POISON (_pool_var (e__), sizeof (_pool_var (e__)[0])); \
- } \
- while (0)
-
-/** Free pool element with given index. */
-#define pool_put_index(p,i) \
-do { \
- typeof (p) _e = (p) + (i); \
- pool_put (p, _e); \
-} while (0)
+static_always_inline void
+_pool_put_index (void *p, uword index, uword elt_sz)
+{
+ pool_header_t *ph = pool_header (p);
+
+ ASSERT (index < ph->max_elts ? ph->max_elts : vec_len (p));
+ ASSERT (!pool_is_free_index (p, index));
+
+ /* Add element to free bitmap and to free list. */
+ ph->free_bitmap = clib_bitmap_ori_notrim (ph->free_bitmap, index);
+
+ /* Preallocated pool? */
+ if (ph->max_elts)
+ {
+ u32 len = _vec_len (ph->free_indices);
+ vec_set_len (ph->free_indices, len + 1);
+ ph->free_indices[len] = index;
+ }
+ else
+ vec_add1 (ph->free_indices, index);
+
+ clib_mem_poison (p + index * elt_sz, elt_sz);
+}
+
+#define pool_put_index(P, I) _pool_put_index ((void *) (P), I, _vec_elt_sz (P))
+#define pool_put(P, E) pool_put_index (P, (E) - (P))
/** Allocate N more free elements to pool (general version). */
-#define pool_alloc_aligned(P,N,A) \
-do { \
- pool_header_t * _p; \
- \
- if ((P)) \
- { \
- _p = pool_header (P); \
- if (_p->max_elts) \
- { \
- clib_warning ("Can't expand fixed-size pool"); \
- os_out_of_memory(); \
- } \
- } \
- \
- (P) = _vec_resize ((P), 0, (vec_len (P) + (N)) * sizeof (P[0]), \
- pool_aligned_header_bytes, \
- (A)); \
- _p = pool_header (P); \
- vec_resize (_p->free_indices, (N)); \
- _vec_len (_p->free_indices) -= (N); \
-} while (0)
-/** Allocate N more free elements to pool (unspecified alignment). */
-#define pool_alloc(P,N) pool_alloc_aligned(P,N,0)
+static_always_inline void
+_pool_alloc (void **pp, uword n_elts, uword align, void *heap, uword elt_sz)
+{
+ pool_header_t *ph = pool_header (pp[0]);
+ uword len = vec_len (pp[0]);
+ const vec_attr_t va = { .hdr_sz = sizeof (pool_header_t),
+ .elt_sz = elt_sz,
+ .align = align,
+ .heap = heap };
+
+ if (ph && ph->max_elts)
+ {
+ clib_warning ("Can't expand fixed-size pool");
+ os_out_of_memory ();
+ }
+
+ pp[0] = _vec_resize_internal (pp[0], len + n_elts, &va);
+ _vec_set_len (pp[0], len, elt_sz);
+ clib_mem_poison (pp[0] + len * elt_sz, n_elts * elt_sz);
+
+ ph = pool_header (pp[0]);
+ vec_resize (ph->free_indices, n_elts);
+ vec_dec_len (ph->free_indices, n_elts);
+ clib_bitmap_validate (ph->free_bitmap, (len + n_elts) ?: 1);
+}
+
+#define pool_alloc_aligned_heap(P, N, A, H) \
+ _pool_alloc ((void **) &(P), N, _vec_align (P, A), H, _vec_elt_sz (P))
+
+#define pool_alloc_heap(P, N, H) pool_alloc_aligned_heap (P, N, 0, H)
+#define pool_alloc_aligned(P, N, A) pool_alloc_aligned_heap (P, N, A, 0)
+#define pool_alloc(P, N) pool_alloc_aligned_heap (P, N, 0, 0)
+
+static_always_inline void *
+_pool_dup (void *p, uword align, uword elt_sz)
+{
+ pool_header_t *nph, *ph = pool_header (p);
+ uword len = vec_len (p);
+ const vec_attr_t va = { .hdr_sz = sizeof (pool_header_t),
+ .elt_sz = elt_sz,
+ .align = align };
+ void *n;
+
+ if (ph && ph->max_elts)
+ {
+ clib_warning ("Can't expand fixed-size pool");
+ os_out_of_memory ();
+ }
+
+ n = _vec_alloc_internal (len, &va);
+ nph = pool_header (n);
+ clib_memset_u8 (nph, 0, sizeof (vec_header_t));
+
+ if (len)
+ {
+ u32 *fi;
+ vec_foreach (fi, ph->free_indices)
+ clib_mem_unpoison (p + elt_sz * fi[0], elt_sz);
+
+ clib_memcpy_fast (n, p, len * elt_sz);
+
+ nph->free_bitmap = clib_bitmap_dup (ph->free_bitmap);
+ nph->free_indices = vec_dup (ph->free_indices);
+
+ vec_foreach (fi, ph->free_indices)
+ {
+ uword offset = elt_sz * fi[0];
+ clib_mem_poison (p + offset, elt_sz);
+ clib_mem_poison (n + offset, elt_sz);
+ }
+ }
+
+ return n;
+}
/**
* Return copy of pool with alignment
@@ -373,28 +393,9 @@ do { \
* @param A alignment (may be zero)
* @return copy of pool
*/
-#define pool_dup_aligned(P,A) \
-({ \
- typeof (P) _pool_var (new) = 0; \
- pool_header_t * _pool_var (ph), * _pool_var (new_ph); \
- u32 _pool_var (n) = pool_len (P); \
- if ((P)) \
- { \
- _pool_var (new) = _vec_resize (_pool_var (new), _pool_var (n), \
- _pool_var (n) * sizeof ((P)[0]), \
- pool_aligned_header_bytes, (A)); \
- clib_memcpy_fast (_pool_var (new), (P), \
- _pool_var (n) * sizeof ((P)[0])); \
- _pool_var (ph) = pool_header (P); \
- _pool_var (new_ph) = pool_header (_pool_var (new)); \
- _pool_var (new_ph)->free_bitmap = \
- clib_bitmap_dup (_pool_var (ph)->free_bitmap); \
- _pool_var (new_ph)->free_indices = \
- vec_dup (_pool_var (ph)->free_indices); \
- _pool_var (new_ph)->max_elts = _pool_var (ph)->max_elts; \
- } \
- _pool_var (new); \
-})
+
+#define pool_dup_aligned(P, A) \
+ _pool_dup (P, _vec_align (P, A), _vec_elt_sz (P))
/**
* Return copy of pool without alignment
@@ -405,29 +406,19 @@ do { \
#define pool_dup(P) pool_dup_aligned(P,0)
/** Low-level free pool operator (do not call directly). */
-always_inline void *
-_pool_free (void *v)
+always_inline void
+_pool_free (void **v)
{
- pool_header_t *p = pool_header (v);
- if (!v)
- return v;
- clib_bitmap_free (p->free_bitmap);
+ pool_header_t *p = pool_header (v[0]);
+ if (!p)
+ return;
- if (p->max_elts)
- {
- int rv;
+ clib_bitmap_free (p->free_bitmap);
- rv = munmap (p->mmap_base, p->mmap_size);
- if (rv)
- clib_unix_warning ("munmap");
- }
- else
- {
- vec_free (p->free_indices);
- vec_free_h (v, pool_aligned_header_bytes);
- }
- return 0;
+ vec_free (p->free_indices);
+ _vec_free (v);
}
+#define pool_free(p) _pool_free ((void **) &(p))
static_always_inline uword
pool_get_first_index (void *pool)
@@ -443,9 +434,6 @@ pool_get_next_index (void *pool, uword last)
return clib_bitmap_next_clear (h->free_bitmap, last + 1);
}
-/** Free a pool. */
-#define pool_free(p) (p) = _pool_free(p)
-
/** Optimized iteration through pool.
@param LO pointer to first element in chunk
@@ -569,11 +557,25 @@ do { \
_pool_var(rv); \
})
-#define pool_foreach_index(i,v) \
- if (v) \
- for (i = pool_get_first_index (v); \
- i < vec_len (v); \
- i = pool_get_next_index (v, i)) \
+#define pool_foreach_index(i, v) \
+ if (v) \
+ for (i = pool_get_first_index (v); i < vec_len (v); \
+ i = pool_get_next_index (v, i))
+
+/* Iterate pool by index from s to e */
+#define pool_foreach_stepping_index(i, s, e, v) \
+ for ((i) = \
+ (pool_is_free_index ((v), (s)) ? pool_get_next_index ((v), (s)) : \
+ (s)); \
+ (i) < (e); (i) = pool_get_next_index ((v), (i)))
+
+/* works only for pool of pointers, e is declared inside macro */
+#define pool_foreach_pointer(e, p) \
+ if (p) \
+ for (typeof ((p)[0]) *_t = (p) + pool_get_first_index (p), (e) = *_t, \
+ *_end = vec_end (p); \
+ _t < _end; _t = (p) + pool_get_next_index (p, _t - (p)), \
+ (e) = _t < _end ? *_t : (e))
/**
* @brief Remove all elements from a pool in a safe way
diff --git a/src/vppinfra/random_buffer.h b/src/vppinfra/random_buffer.h
index 320394d1862..12343c10535 100644
--- a/src/vppinfra/random_buffer.h
+++ b/src/vppinfra/random_buffer.h
@@ -42,9 +42,7 @@
#include <vppinfra/random_isaac.h>
#include <vppinfra/warnings.h>
-/* *INDENT-OFF* */
WARN_OFF(array-bounds)
-/* *INDENT-ON* */
typedef struct
{
@@ -54,6 +52,9 @@ typedef struct
/* Random buffer. */
uword *buffer;
+ /* An actual length to be applied before using the buffer. */
+ uword next_read_len;
+
/* Cache up to 1 word worth of bytes for random data
less than one word at a time. */
uword n_cached_bytes;
@@ -84,11 +85,16 @@ clib_random_buffer_get_data (clib_random_buffer_t * b, uword n_bytes)
{
uword n_words, i, l;
+ if (b->buffer)
+ vec_set_len (b->buffer, b->next_read_len);
+ else
+ ASSERT (b->next_read_len == 0);
+
l = b->n_cached_bytes;
if (n_bytes <= l)
{
b->n_cached_bytes = l - n_bytes;
- return &b->cached_bytes[l];
+ return &b->cached_bytes[l - n_bytes];
}
n_words = n_bytes / sizeof (uword);
@@ -100,21 +106,19 @@ clib_random_buffer_get_data (clib_random_buffer_t * b, uword n_bytes)
clib_random_buffer_fill (b, n_words);
i = vec_len (b->buffer) - n_words;
- _vec_len (b->buffer) = i;
+ b->next_read_len = i;
if (n_bytes < sizeof (uword))
{
b->cached_word = b->buffer[i];
b->n_cached_bytes = sizeof (uword) - n_bytes;
- return b->cached_bytes;
+ return &b->cached_bytes[sizeof (uword) - n_bytes];
}
else
return b->buffer + i;
}
-/* *INDENT-OFF* */
WARN_ON(array-bounds)
-/* *INDENT-ON* */
#endif /* included_clib_random_buffer_h */
diff --git a/src/vppinfra/ring.h b/src/vppinfra/ring.h
index be61dc44978..ae25e40b5c0 100644
--- a/src/vppinfra/ring.h
+++ b/src/vppinfra/ring.h
@@ -29,24 +29,28 @@ typedef struct
always_inline clib_ring_header_t *
clib_ring_header (void *v)
{
- return vec_aligned_header (v, sizeof (clib_ring_header_t), sizeof (void *));
+ return vec_header (v);
+}
+
+always_inline void
+clib_ring_reset (void *v)
+{
+ clib_ring_header_t *h = clib_ring_header (v);
+ h->next = 0;
+ h->n_enq = 0;
}
always_inline void
clib_ring_new_inline (void **p, u32 elt_bytes, u32 size, u32 align)
{
void *v;
- clib_ring_header_t *h;
+ vec_attr_t va = { .elt_sz = elt_bytes,
+ .hdr_sz = sizeof (clib_ring_header_t),
+ .align = align };
- v = _vec_resize ((void *) 0,
- /* length increment */ size,
- /* data bytes */ elt_bytes * size,
- /* header bytes */ sizeof (h[0]),
- /* data align */ align);
+ v = _vec_alloc_internal (size, &va);
- h = clib_ring_header (v);
- h->next = 0;
- h->n_enq = 0;
+ clib_ring_reset (v);
p[0] = v;
}
@@ -56,7 +60,7 @@ clib_ring_new_inline (void **p, u32 elt_bytes, u32 size, u32 align)
#define clib_ring_new(ring, size) \
{ clib_ring_new_inline ((void **)&(ring), sizeof(ring[0]), size, 0);}
-#define clib_ring_free(f) vec_free_h((f), sizeof(clib_ring_header_t))
+#define clib_ring_free(f) vec_free ((f))
always_inline u32
clib_ring_n_enq (void *v)
diff --git a/src/vppinfra/sanitizer.c b/src/vppinfra/sanitizer.c
deleted file mode 100644
index fab1cdca136..00000000000
--- a/src/vppinfra/sanitizer.c
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifdef CLIB_SANITIZE_ADDR
-
-#include <vppinfra/sanitizer.h>
-
-__clib_export clib_sanitizer_main_t sanitizer_main = { .shadow_scale = ~0 };
-
-#endif /* CLIB_SANITIZE_ADDR */
diff --git a/src/vppinfra/sanitizer.h b/src/vppinfra/sanitizer.h
deleted file mode 100644
index 658d8281230..00000000000
--- a/src/vppinfra/sanitizer.h
+++ /dev/null
@@ -1,141 +0,0 @@
-#ifndef _included_clib_sanitizer_h
-#define _included_clib_sanitizer_h
-
-#ifdef CLIB_SANITIZE_ADDR
-
-#include <sanitizer/asan_interface.h>
-#include <vppinfra/clib.h>
-#include <vppinfra/error_bootstrap.h>
-
-typedef struct
-{
- size_t shadow_scale;
- size_t shadow_offset;
-} clib_sanitizer_main_t;
-
-extern clib_sanitizer_main_t sanitizer_main;
-
-#define CLIB_NOSANITIZE_ADDR __attribute__((no_sanitize_address))
-#define CLIB_MEM_POISON(a, s) ASAN_POISON_MEMORY_REGION((a), (s))
-#define CLIB_MEM_UNPOISON(a, s) ASAN_UNPOISON_MEMORY_REGION((a), (s))
-
-#define CLIB_MEM_OVERFLOW_MAX 64
-
-static_always_inline void
-sanitizer_unpoison__ (u64 *restrict *shadow_ptr, size_t *shadow_len,
- const void *ptr, size_t len)
-{
- size_t scale, off;
-
- if (PREDICT_FALSE (~0 == sanitizer_main.shadow_scale))
- __asan_get_shadow_mapping (&sanitizer_main.shadow_scale,
- &sanitizer_main.shadow_offset);
-
- scale = sanitizer_main.shadow_scale;
- off = sanitizer_main.shadow_offset;
-
- /* compute the shadow address and length */
- *shadow_len = len >> scale;
- ASSERT (*shadow_len <= CLIB_MEM_OVERFLOW_MAX);
- *shadow_ptr = (void *) (((clib_address_t) ptr >> scale) + off);
-}
-
-static_always_inline CLIB_NOSANITIZE_ADDR void
-sanitizer_unpoison_push__ (u64 *restrict shadow, const void *ptr, size_t len)
-{
- u64 *restrict shadow_ptr;
- size_t shadow_len;
- int i;
-
- sanitizer_unpoison__ (&shadow_ptr, &shadow_len, ptr, len);
-
- /* save the shadow area */
- for (i = 0; i < shadow_len; i++)
- shadow[i] = shadow_ptr[i];
-
- /* unpoison */
- for (i = 0; i < shadow_len; i++)
- shadow_ptr[i] = 0;
-}
-
-static_always_inline CLIB_NOSANITIZE_ADDR void
-sanitizer_unpoison_pop__ (const u64 *restrict shadow, const void *ptr,
- size_t len)
-{
- u64 *restrict shadow_ptr;
- size_t shadow_len;
- int i;
-
- sanitizer_unpoison__ (&shadow_ptr, &shadow_len, ptr, len);
-
- /* restore the shadow area */
- for (i = 0; i < shadow_len; i++)
- {
- ASSERT (0 == shadow_ptr[i]);
- shadow_ptr[i] = shadow[i];
- }
-}
-
-#define CLIB_MEM_OVERFLOW(f, src, n) \
- ({ \
- typeof (f) clib_mem_overflow_ret__; \
- const void *clib_mem_overflow_src__ = (src); \
- size_t clib_mem_overflow_n__ = (n); \
- u64 clib_mem_overflow_shadow__[CLIB_MEM_OVERFLOW_MAX]; \
- sanitizer_unpoison_push__ (clib_mem_overflow_shadow__, \
- clib_mem_overflow_src__, \
- clib_mem_overflow_n__); \
- clib_mem_overflow_ret__ = f; \
- sanitizer_unpoison_pop__ (clib_mem_overflow_shadow__, \
- clib_mem_overflow_src__, \
- clib_mem_overflow_n__); \
- clib_mem_overflow_ret__; \
- })
-
-#define CLIB_MEM_OVERFLOW_LOAD(f, src) \
- ({ \
- typeof(src) clib_mem_overflow_load_src__ = (src); \
- CLIB_MEM_OVERFLOW(f(clib_mem_overflow_load_src__), clib_mem_overflow_load_src__, sizeof(typeof(f(clib_mem_overflow_load_src__)))); \
- })
-
-static_always_inline void
-CLIB_MEM_POISON_LEN (void *src, size_t oldlen, size_t newlen)
-{
- if (oldlen > newlen)
- CLIB_MEM_POISON (src + newlen, oldlen - newlen);
- else if (newlen > oldlen)
- CLIB_MEM_UNPOISON (src + oldlen, newlen - oldlen);
-}
-
-#else /* CLIB_SANITIZE_ADDR */
-
-#define CLIB_NOSANITIZE_ADDR
-#define CLIB_MEM_POISON(a, s) (void)(a)
-#define CLIB_MEM_UNPOISON(a, s) (void)(a)
-#define CLIB_MEM_OVERFLOW(a, b, c) a
-#define CLIB_MEM_OVERFLOW_LOAD(f, src) f(src)
-#define CLIB_MEM_POISON_LEN(a, b, c)
-
-#endif /* CLIB_SANITIZE_ADDR */
-
-/*
- * clang tends to force alignment of all sections when compiling for address
- * sanitizer. This confuse VPP plugin infra, prevent clang to do that
- * On the contrary, GCC does not support this kind of attribute on sections
- * sigh.
- */
-#ifdef __clang__
-#define CLIB_NOSANITIZE_PLUGIN_REG_SECTION CLIB_NOSANITIZE_ADDR
-#else
-#define CLIB_NOSANITIZE_PLUGIN_REG_SECTION
-#endif
-
-#endif /* _included_clib_sanitizer_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/serialize.c b/src/vppinfra/serialize.c
index 64509254b5d..ceda617f872 100644
--- a/src/vppinfra/serialize.c
+++ b/src/vppinfra/serialize.c
@@ -308,13 +308,16 @@ unserialize_vector_ha (serialize_main_t * m,
{
void *v, *p;
u32 l;
+ vec_attr_t va = { .align = align,
+ .elt_sz = elt_bytes,
+ .hdr_sz = header_bytes };
unserialize_integer (m, &l, sizeof (l));
if (l > max_length)
serialize_error (&m->header,
clib_error_create ("bad vector length %d", l));
- p = v = _vec_resize ((void *) 0, l, (uword) l * elt_bytes, header_bytes,
- /* align */ align);
+
+ p = v = _vec_alloc_internal (l, &va);
while (l != 0)
{
@@ -437,6 +440,9 @@ unserialize_pool_helper (serialize_main_t * m,
void *v;
u32 i, l, lo, hi;
pool_header_t *p;
+ vec_attr_t va = { .align = align,
+ .elt_sz = elt_bytes,
+ .hdr_sz = sizeof (pool_header_t) };
unserialize_integer (m, &l, sizeof (l));
if (l == 0)
@@ -444,8 +450,7 @@ unserialize_pool_helper (serialize_main_t * m,
return 0;
}
- v = _vec_resize ((void *) 0, l, (uword) l * elt_bytes, sizeof (p[0]),
- align);
+ v = _vec_alloc_internal (l, &va);
p = pool_header (v);
vec_unserialize (m, &p->free_indices, unserialize_vec_32);
@@ -715,7 +720,7 @@ serialize_write_not_inline (serialize_main_header_t * m,
n_left_b -= n;
n_left_o -= n;
if (n_left_o == 0)
- _vec_len (s->overflow_buffer) = 0;
+ vec_set_len (s->overflow_buffer, 0);
else
vec_delete (s->overflow_buffer, n, 0);
}
@@ -736,6 +741,7 @@ serialize_write_not_inline (serialize_main_header_t * m,
if (n_left_o > 0 || n_left_b < n_bytes_to_write)
{
u8 *r;
+ s->current_buffer_index = cur_bi;
vec_add2 (s->overflow_buffer, r, n_bytes_to_write);
return r;
}
@@ -772,7 +778,7 @@ serialize_read_not_inline (serialize_main_header_t * m,
if (n_left_o == 0 && s->overflow_buffer)
{
s->current_overflow_index = 0;
- _vec_len (s->overflow_buffer) = 0;
+ vec_set_len (s->overflow_buffer, 0);
}
n_left_to_read = n_bytes_to_read;
@@ -924,7 +930,7 @@ serialize_close_vector (serialize_main_t * m)
serialize_close (m); /* frees overflow buffer */
if (s->buffer)
- _vec_len (s->buffer) = s->current_buffer_index;
+ vec_set_len (s->buffer, s->current_buffer_index);
result = s->buffer;
clib_memset (m, 0, sizeof (m[0]));
return result;
@@ -1162,7 +1168,7 @@ clib_file_write (serialize_main_header_t * m, serialize_stream_t * s)
serialize_error (m, clib_error_return_unix (0, "write"));
}
if (n == s->current_buffer_index)
- _vec_len (s->buffer) = 0;
+ vec_set_len (s->buffer, 0);
else
vec_delete (s->buffer, n, 0);
s->current_buffer_index = vec_len (s->buffer);
@@ -1198,7 +1204,7 @@ serialize_open_clib_file_descriptor_helper (serialize_main_t * m, int fd,
if (!is_read)
{
m->stream.n_buffer_bytes = vec_len (m->stream.buffer);
- _vec_len (m->stream.buffer) = 0;
+ vec_set_len (m->stream.buffer, 0);
}
m->header.data_function = is_read ? clib_file_read : clib_file_write;
diff --git a/src/vppinfra/sha2.h b/src/vppinfra/sha2.h
deleted file mode 100644
index b6caf59ac7f..00000000000
--- a/src/vppinfra/sha2.h
+++ /dev/null
@@ -1,637 +0,0 @@
-/*
- * Copyright (c) 2019 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef included_sha2_h
-#define included_sha2_h
-
-#include <vppinfra/clib.h>
-
-#define SHA224_DIGEST_SIZE 28
-#define SHA224_BLOCK_SIZE 64
-
-#define SHA256_DIGEST_SIZE 32
-#define SHA256_BLOCK_SIZE 64
-#define SHA256_ROTR(x, y) ((x >> y) | (x << (32 - y)))
-#define SHA256_CH(a, b, c) ((a & b) ^ (~a & c))
-#define SHA256_MAJ(a, b, c) ((a & b) ^ (a & c) ^ (b & c))
-#define SHA256_CSIGMA0(x) (SHA256_ROTR(x, 2) ^ \
- SHA256_ROTR(x, 13) ^ \
- SHA256_ROTR(x, 22));
-#define SHA256_CSIGMA1(x) (SHA256_ROTR(x, 6) ^ \
- SHA256_ROTR(x, 11) ^ \
- SHA256_ROTR(x, 25));
-#define SHA256_SSIGMA0(x) (SHA256_ROTR (x, 7) ^ \
- SHA256_ROTR (x, 18) ^ \
- (x >> 3))
-#define SHA256_SSIGMA1(x) (SHA256_ROTR (x, 17) ^ \
- SHA256_ROTR (x, 19) ^ \
- (x >> 10))
-
-#define SHA256_MSG_SCHED(w, j) \
-{ \
- w[j] = w[j - 7] + w[j - 16]; \
- w[j] += SHA256_SSIGMA0 (w[j - 15]); \
- w[j] += SHA256_SSIGMA1 (w[j - 2]); \
-}
-
-#define SHA256_TRANSFORM(s, w, i, k) \
-{ \
- __typeof__(s[0]) t1, t2; \
- t1 = k + w[i] + s[7]; \
- t1 += SHA256_CSIGMA1 (s[4]); \
- t1 += SHA256_CH (s[4], s[5], s[6]); \
- t2 = SHA256_CSIGMA0 (s[0]); \
- t2 += SHA256_MAJ (s[0], s[1], s[2]); \
- s[7] = s[6]; \
- s[6] = s[5]; \
- s[5] = s[4]; \
- s[4] = s[3] + t1; \
- s[3] = s[2]; \
- s[2] = s[1]; \
- s[1] = s[0]; \
- s[0] = t1 + t2; \
-}
-
-#define SHA512_224_DIGEST_SIZE 28
-#define SHA512_224_BLOCK_SIZE 128
-
-#define SHA512_256_DIGEST_SIZE 32
-#define SHA512_256_BLOCK_SIZE 128
-
-#define SHA384_DIGEST_SIZE 48
-#define SHA384_BLOCK_SIZE 128
-
-#define SHA512_DIGEST_SIZE 64
-#define SHA512_BLOCK_SIZE 128
-#define SHA512_ROTR(x, y) ((x >> y) | (x << (64 - y)))
-#define SHA512_CH(a, b, c) ((a & b) ^ (~a & c))
-#define SHA512_MAJ(a, b, c) ((a & b) ^ (a & c) ^ (b & c))
-#define SHA512_CSIGMA0(x) (SHA512_ROTR (x, 28) ^ \
- SHA512_ROTR (x, 34) ^ \
- SHA512_ROTR (x, 39))
-#define SHA512_CSIGMA1(x) (SHA512_ROTR (x, 14) ^ \
- SHA512_ROTR (x, 18) ^ \
- SHA512_ROTR (x, 41))
-#define SHA512_SSIGMA0(x) (SHA512_ROTR (x, 1) ^ \
- SHA512_ROTR (x, 8) ^ \
- (x >> 7))
-#define SHA512_SSIGMA1(x) (SHA512_ROTR (x, 19) ^ \
- SHA512_ROTR (x, 61) ^ \
- (x >> 6))
-
-#define SHA512_MSG_SCHED(w, j) \
-{ \
- w[j] = w[j - 7] + w[j - 16]; \
- w[j] += SHA512_SSIGMA0 (w[j - 15]); \
- w[j] += SHA512_SSIGMA1 (w[j - 2]); \
-}
-
-#define SHA512_TRANSFORM(s, w, i, k) \
-{ \
- __typeof__(s[0]) t1, t2; \
- t1 = k + w[i] + s[7]; \
- t1 += SHA512_CSIGMA1 (s[4]); \
- t1 += SHA512_CH (s[4], s[5], s[6]); \
- t2 = SHA512_CSIGMA0 (s[0]); \
- t2 += SHA512_MAJ (s[0], s[1], s[2]); \
- s[7] = s[6]; \
- s[6] = s[5]; \
- s[5] = s[4]; \
- s[4] = s[3] + t1; \
- s[3] = s[2]; \
- s[2] = s[1]; \
- s[1] = s[0]; \
- s[0] = t1 + t2; \
-}
-
-static const u32 sha224_h[8] = {
- 0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939,
- 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4
-};
-
-static const u32 sha256_h[8] = {
- 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
- 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
-};
-
-static const u32 sha256_k[64] = {
- 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
- 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
- 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
- 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
- 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
- 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
- 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
- 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
- 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
- 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
- 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
- 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
- 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
- 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
- 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
- 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-static const u64 sha384_h[8] = {
- 0xcbbb9d5dc1059ed8, 0x629a292a367cd507,
- 0x9159015a3070dd17, 0x152fecd8f70e5939,
- 0x67332667ffc00b31, 0x8eb44a8768581511,
- 0xdb0c2e0d64f98fa7, 0x47b5481dbefa4fa4
-};
-
-static const u64 sha512_h[8] = {
- 0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
- 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
- 0x510e527fade682d1, 0x9b05688c2b3e6c1f,
- 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179
-};
-
-static const u64 sha512_224_h[8] = {
- 0x8c3d37c819544da2, 0x73e1996689dcd4d6,
- 0x1dfab7ae32ff9c82, 0x679dd514582f9fcf,
- 0x0f6d2b697bd44da8, 0x77e36f7304c48942,
- 0x3f9d85a86a1d36c8, 0x1112e6ad91d692a1
-};
-
-static const u64 sha512_256_h[8] = {
- 0x22312194fc2bf72c, 0x9f555fa3c84c64c2,
- 0x2393b86b6f53b151, 0x963877195940eabd,
- 0x96283ee2a88effe3, 0xbe5e1e2553863992,
- 0x2b0199fc2c85b8aa, 0x0eb72ddc81c52ca2
-};
-
-static const u64 sha512_k[80] = {
- 0x428a2f98d728ae22, 0x7137449123ef65cd,
- 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
- 0x3956c25bf348b538, 0x59f111f1b605d019,
- 0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
- 0xd807aa98a3030242, 0x12835b0145706fbe,
- 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
- 0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
- 0x9bdc06a725c71235, 0xc19bf174cf692694,
- 0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
- 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
- 0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
- 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
- 0x983e5152ee66dfab, 0xa831c66d2db43210,
- 0xb00327c898fb213f, 0xbf597fc7beef0ee4,
- 0xc6e00bf33da88fc2, 0xd5a79147930aa725,
- 0x06ca6351e003826f, 0x142929670a0e6e70,
- 0x27b70a8546d22ffc, 0x2e1b21385c26c926,
- 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
- 0x650a73548baf63de, 0x766a0abb3c77b2a8,
- 0x81c2c92e47edaee6, 0x92722c851482353b,
- 0xa2bfe8a14cf10364, 0xa81a664bbc423001,
- 0xc24b8b70d0f89791, 0xc76c51a30654be30,
- 0xd192e819d6ef5218, 0xd69906245565a910,
- 0xf40e35855771202a, 0x106aa07032bbd1b8,
- 0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
- 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
- 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
- 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
- 0x748f82ee5defb2fc, 0x78a5636f43172f60,
- 0x84c87814a1f0ab72, 0x8cc702081a6439ec,
- 0x90befffa23631e28, 0xa4506cebde82bde9,
- 0xbef9a3f7b2c67915, 0xc67178f2e372532b,
- 0xca273eceea26619c, 0xd186b8c721c0c207,
- 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
- 0x06f067aa72176fba, 0x0a637dc5a2c898a6,
- 0x113f9804bef90dae, 0x1b710b35131c471b,
- 0x28db77f523047d84, 0x32caab7b40c72493,
- 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
- 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
- 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
-};
-
-typedef enum
-{
- CLIB_SHA2_224,
- CLIB_SHA2_256,
- CLIB_SHA2_384,
- CLIB_SHA2_512,
- CLIB_SHA2_512_224,
- CLIB_SHA2_512_256,
-} clib_sha2_type_t;
-
-#define SHA2_MAX_BLOCK_SIZE SHA512_BLOCK_SIZE
-#define SHA2_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE
-
-typedef struct
-{
- u64 total_bytes;
- u16 n_pending;
- u8 block_size;
- u8 digest_size;
- union
- {
- u32 h32[8];
- u64 h64[8];
-#if defined(__SHA__) && defined (__x86_64__)
- u32x4 h32x4[2];
-#endif
- };
- union
- {
- u8 as_u8[SHA2_MAX_BLOCK_SIZE];
- u64 as_u64[SHA2_MAX_BLOCK_SIZE / sizeof (u64)];
- uword as_uword[SHA2_MAX_BLOCK_SIZE / sizeof (uword)];
- }
- pending;
-}
-clib_sha2_ctx_t;
-
-static_always_inline void
-clib_sha2_init (clib_sha2_ctx_t * ctx, clib_sha2_type_t type)
-{
- const u32 *h32 = 0;
- const u64 *h64 = 0;
-
- ctx->total_bytes = 0;
- ctx->n_pending = 0;
-
- switch (type)
- {
- case CLIB_SHA2_224:
- h32 = sha224_h;
- ctx->block_size = SHA224_BLOCK_SIZE;
- ctx->digest_size = SHA224_DIGEST_SIZE;
- break;
- case CLIB_SHA2_256:
- h32 = sha256_h;
- ctx->block_size = SHA256_BLOCK_SIZE;
- ctx->digest_size = SHA256_DIGEST_SIZE;
- break;
- case CLIB_SHA2_384:
- h64 = sha384_h;
- ctx->block_size = SHA384_BLOCK_SIZE;
- ctx->digest_size = SHA384_DIGEST_SIZE;
- break;
- case CLIB_SHA2_512:
- h64 = sha512_h;
- ctx->block_size = SHA512_BLOCK_SIZE;
- ctx->digest_size = SHA512_DIGEST_SIZE;
- break;
- case CLIB_SHA2_512_224:
- h64 = sha512_224_h;
- ctx->block_size = SHA512_224_BLOCK_SIZE;
- ctx->digest_size = SHA512_224_DIGEST_SIZE;
- break;
- case CLIB_SHA2_512_256:
- h64 = sha512_256_h;
- ctx->block_size = SHA512_256_BLOCK_SIZE;
- ctx->digest_size = SHA512_256_DIGEST_SIZE;
- break;
- }
- if (h32)
- for (int i = 0; i < 8; i++)
- ctx->h32[i] = h32[i];
-
- if (h64)
- for (int i = 0; i < 8; i++)
- ctx->h64[i] = h64[i];
-}
-
-#if defined(__SHA__) && defined (__x86_64__)
-static inline void
-shani_sha256_cycle_w (u32x4 cw[], u8 a, u8 b, u8 c, u8 d)
-{
- cw[a] = (u32x4) _mm_sha256msg1_epu32 ((__m128i) cw[a], (__m128i) cw[b]);
- cw[a] += (u32x4) _mm_alignr_epi8 ((__m128i) cw[d], (__m128i) cw[c], 4);
- cw[a] = (u32x4) _mm_sha256msg2_epu32 ((__m128i) cw[a], (__m128i) cw[d]);
-}
-
-static inline void
-shani_sha256_4_rounds (u32x4 cw, u8 n, u32x4 s[])
-{
- u32x4 r = *(u32x4 *) (sha256_k + 4 * n) + cw;
- s[0] = (u32x4) _mm_sha256rnds2_epu32 ((__m128i) s[0], (__m128i) s[1],
- (__m128i) r);
- r = (u32x4) u64x2_interleave_hi ((u64x2) r, (u64x2) r);
- s[1] = (u32x4) _mm_sha256rnds2_epu32 ((__m128i) s[1], (__m128i) s[0],
- (__m128i) r);
-}
-
-static inline void
-shani_sha256_shuffle (u32x4 d[2], u32x4 s[2])
-{
- /* {0, 1, 2, 3}, {4, 5, 6, 7} -> {7, 6, 3, 2}, {5, 4, 1, 0} */
- d[0] = (u32x4) _mm_shuffle_ps ((__m128) s[1], (__m128) s[0], 0xbb);
- d[1] = (u32x4) _mm_shuffle_ps ((__m128) s[1], (__m128) s[0], 0x11);
-}
-#endif
-
-void
-clib_sha256_block (clib_sha2_ctx_t * ctx, const u8 * msg, uword n_blocks)
-{
-#if defined(__SHA__) && defined (__x86_64__)
- u32x4 h[2], s[2], w[4];
-
- shani_sha256_shuffle (h, ctx->h32x4);
-
- while (n_blocks)
- {
- w[0] = u32x4_byte_swap (u32x4_load_unaligned ((u8 *) msg + 0));
- w[1] = u32x4_byte_swap (u32x4_load_unaligned ((u8 *) msg + 16));
- w[2] = u32x4_byte_swap (u32x4_load_unaligned ((u8 *) msg + 32));
- w[3] = u32x4_byte_swap (u32x4_load_unaligned ((u8 *) msg + 48));
-
- s[0] = h[0];
- s[1] = h[1];
-
- shani_sha256_4_rounds (w[0], 0, s);
- shani_sha256_4_rounds (w[1], 1, s);
- shani_sha256_4_rounds (w[2], 2, s);
- shani_sha256_4_rounds (w[3], 3, s);
-
- shani_sha256_cycle_w (w, 0, 1, 2, 3);
- shani_sha256_4_rounds (w[0], 4, s);
- shani_sha256_cycle_w (w, 1, 2, 3, 0);
- shani_sha256_4_rounds (w[1], 5, s);
- shani_sha256_cycle_w (w, 2, 3, 0, 1);
- shani_sha256_4_rounds (w[2], 6, s);
- shani_sha256_cycle_w (w, 3, 0, 1, 2);
- shani_sha256_4_rounds (w[3], 7, s);
-
- shani_sha256_cycle_w (w, 0, 1, 2, 3);
- shani_sha256_4_rounds (w[0], 8, s);
- shani_sha256_cycle_w (w, 1, 2, 3, 0);
- shani_sha256_4_rounds (w[1], 9, s);
- shani_sha256_cycle_w (w, 2, 3, 0, 1);
- shani_sha256_4_rounds (w[2], 10, s);
- shani_sha256_cycle_w (w, 3, 0, 1, 2);
- shani_sha256_4_rounds (w[3], 11, s);
-
- shani_sha256_cycle_w (w, 0, 1, 2, 3);
- shani_sha256_4_rounds (w[0], 12, s);
- shani_sha256_cycle_w (w, 1, 2, 3, 0);
- shani_sha256_4_rounds (w[1], 13, s);
- shani_sha256_cycle_w (w, 2, 3, 0, 1);
- shani_sha256_4_rounds (w[2], 14, s);
- shani_sha256_cycle_w (w, 3, 0, 1, 2);
- shani_sha256_4_rounds (w[3], 15, s);
-
- h[0] += s[0];
- h[1] += s[1];
-
- /* next */
- msg += SHA256_BLOCK_SIZE;
- n_blocks--;
- }
-
- shani_sha256_shuffle (ctx->h32x4, h);
-#else
- u32 w[64], s[8], i;
-
- while (n_blocks)
- {
- for (i = 0; i < 8; i++)
- s[i] = ctx->h32[i];
-
- for (i = 0; i < 16; i++)
- {
- w[i] = clib_net_to_host_u32 (*((u32 *) msg + i));
- SHA256_TRANSFORM (s, w, i, sha256_k[i]);
- }
-
- for (i = 16; i < 64; i++)
- {
- SHA256_MSG_SCHED (w, i);
- SHA256_TRANSFORM (s, w, i, sha256_k[i]);
- }
-
- for (i = 0; i < 8; i++)
- ctx->h32[i] += s[i];
-
- /* next */
- msg += SHA256_BLOCK_SIZE;
- n_blocks--;
- }
-#endif
-}
-
-static_always_inline void
-clib_sha512_block (clib_sha2_ctx_t * ctx, const u8 * msg, uword n_blocks)
-{
- u64 w[80], s[8], i;
-
- while (n_blocks)
- {
- for (i = 0; i < 8; i++)
- s[i] = ctx->h64[i];
-
- for (i = 0; i < 16; i++)
- {
- w[i] = clib_net_to_host_u64 (*((u64 *) msg + i));
- SHA512_TRANSFORM (s, w, i, sha512_k[i]);
- }
-
- for (i = 16; i < 80; i++)
- {
- SHA512_MSG_SCHED (w, i);
- SHA512_TRANSFORM (s, w, i, sha512_k[i]);
- }
-
- for (i = 0; i < 8; i++)
- ctx->h64[i] += s[i];
-
- /* next */
- msg += SHA512_BLOCK_SIZE;
- n_blocks--;
- }
-}
-
-static_always_inline void
-clib_sha2_update (clib_sha2_ctx_t * ctx, const u8 * msg, uword n_bytes)
-{
- uword n_blocks;
- if (ctx->n_pending)
- {
- uword n_left = ctx->block_size - ctx->n_pending;
- if (n_bytes < n_left)
- {
- clib_memcpy_fast (ctx->pending.as_u8 + ctx->n_pending, msg,
- n_bytes);
- ctx->n_pending += n_bytes;
- return;
- }
- else
- {
- clib_memcpy_fast (ctx->pending.as_u8 + ctx->n_pending, msg, n_left);
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- clib_sha512_block (ctx, ctx->pending.as_u8, 1);
- else
- clib_sha256_block (ctx, ctx->pending.as_u8, 1);
- ctx->n_pending = 0;
- ctx->total_bytes += ctx->block_size;
- n_bytes -= n_left;
- msg += n_left;
- }
- }
-
- if ((n_blocks = n_bytes / ctx->block_size))
- {
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- clib_sha512_block (ctx, msg, n_blocks);
- else
- clib_sha256_block (ctx, msg, n_blocks);
- n_bytes -= n_blocks * ctx->block_size;
- msg += n_blocks * ctx->block_size;
- ctx->total_bytes += n_blocks * ctx->block_size;
- }
-
- if (n_bytes)
- {
- clib_memset_u8 (ctx->pending.as_u8, 0, ctx->block_size);
- clib_memcpy_fast (ctx->pending.as_u8, msg, n_bytes);
- ctx->n_pending = n_bytes;
- }
- else
- ctx->n_pending = 0;
-}
-
-static_always_inline void
-clib_sha2_final (clib_sha2_ctx_t * ctx, u8 * digest)
-{
- int i;
-
- ctx->total_bytes += ctx->n_pending;
- if (ctx->n_pending == 0)
- {
- clib_memset (ctx->pending.as_u8, 0, ctx->block_size);
- ctx->pending.as_u8[0] = 0x80;
- }
- else if (ctx->n_pending + sizeof (u64) + sizeof (u8) > ctx->block_size)
- {
- ctx->pending.as_u8[ctx->n_pending] = 0x80;
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- clib_sha512_block (ctx, ctx->pending.as_u8, 1);
- else
- clib_sha256_block (ctx, ctx->pending.as_u8, 1);
- clib_memset (ctx->pending.as_u8, 0, ctx->block_size);
- }
- else
- ctx->pending.as_u8[ctx->n_pending] = 0x80;
-
- ctx->pending.as_u64[ctx->block_size / 8 - 1] =
- clib_net_to_host_u64 (ctx->total_bytes * 8);
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- clib_sha512_block (ctx, ctx->pending.as_u8, 1);
- else
- clib_sha256_block (ctx, ctx->pending.as_u8, 1);
-
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- {
- for (i = 0; i < ctx->digest_size / sizeof (u64); i++)
- *((u64 *) digest + i) = clib_net_to_host_u64 (ctx->h64[i]);
-
- /* sha512-224 case - write half of u64 */
- if (i * sizeof (u64) < ctx->digest_size)
- *((u32 *) digest + 2 * i) = clib_net_to_host_u32 (ctx->h64[i] >> 32);
- }
- else
- for (i = 0; i < ctx->digest_size / sizeof (u32); i++)
- *((u32 *) digest + i) = clib_net_to_host_u32 (ctx->h32[i]);
-}
-
-static_always_inline void
-clib_sha2 (clib_sha2_type_t type, const u8 * msg, uword len, u8 * digest)
-{
- clib_sha2_ctx_t ctx;
- clib_sha2_init (&ctx, type);
- clib_sha2_update (&ctx, msg, len);
- clib_sha2_final (&ctx, digest);
-}
-
-#define clib_sha224(...) clib_sha2 (CLIB_SHA2_224, __VA_ARGS__)
-#define clib_sha256(...) clib_sha2 (CLIB_SHA2_256, __VA_ARGS__)
-#define clib_sha384(...) clib_sha2 (CLIB_SHA2_384, __VA_ARGS__)
-#define clib_sha512(...) clib_sha2 (CLIB_SHA2_512, __VA_ARGS__)
-#define clib_sha512_224(...) clib_sha2 (CLIB_SHA2_512_224, __VA_ARGS__)
-#define clib_sha512_256(...) clib_sha2 (CLIB_SHA2_512_256, __VA_ARGS__)
-
-static_always_inline void
-clib_hmac_sha2 (clib_sha2_type_t type, const u8 * key, uword key_len,
- const u8 * msg, uword len, u8 * digest)
-{
- clib_sha2_ctx_t _ctx, *ctx = &_ctx;
- uword key_data[SHA2_MAX_BLOCK_SIZE / sizeof (uword)];
- u8 i_digest[SHA2_MAX_DIGEST_SIZE];
- int i, n_words;
-
- clib_sha2_init (ctx, type);
- n_words = ctx->block_size / sizeof (uword);
-
- /* key */
- if (key_len > ctx->block_size)
- {
- /* key is longer than block, calculate hash of key */
- clib_sha2_update (ctx, key, key_len);
- for (i = (ctx->digest_size / sizeof (uword)) / 2; i < n_words; i++)
- key_data[i] = 0;
- clib_sha2_final (ctx, (u8 *) key_data);
- clib_sha2_init (ctx, type);
- }
- else
- {
- for (i = 0; i < n_words; i++)
- key_data[i] = 0;
- clib_memcpy_fast (key_data, key, key_len);
- }
-
- /* ipad */
- for (i = 0; i < n_words; i++)
- ctx->pending.as_uword[i] = key_data[i] ^ (uword) 0x3636363636363636;
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- clib_sha512_block (ctx, ctx->pending.as_u8, 1);
- else
- clib_sha256_block (ctx, ctx->pending.as_u8, 1);
- ctx->total_bytes += ctx->block_size;
-
- /* message */
- clib_sha2_update (ctx, msg, len);
- clib_sha2_final (ctx, i_digest);
-
- /* opad */
- clib_sha2_init (ctx, type);
- for (i = 0; i < n_words; i++)
- ctx->pending.as_uword[i] = key_data[i] ^ (uword) 0x5c5c5c5c5c5c5c5c;
- if (ctx->block_size == SHA512_BLOCK_SIZE)
- clib_sha512_block (ctx, ctx->pending.as_u8, 1);
- else
- clib_sha256_block (ctx, ctx->pending.as_u8, 1);
- ctx->total_bytes += ctx->block_size;
-
- /* digest */
- clib_sha2_update (ctx, i_digest, ctx->digest_size);
- clib_sha2_final (ctx, digest);
-}
-
-#define clib_hmac_sha224(...) clib_hmac_sha2 (CLIB_SHA2_224, __VA_ARGS__)
-#define clib_hmac_sha256(...) clib_hmac_sha2 (CLIB_SHA2_256, __VA_ARGS__)
-#define clib_hmac_sha384(...) clib_hmac_sha2 (CLIB_SHA2_384, __VA_ARGS__)
-#define clib_hmac_sha512(...) clib_hmac_sha2 (CLIB_SHA2_512, __VA_ARGS__)
-#define clib_hmac_sha512_224(...) clib_hmac_sha2 (CLIB_SHA2_512_224, __VA_ARGS__)
-#define clib_hmac_sha512_256(...) clib_hmac_sha2 (CLIB_SHA2_512_256, __VA_ARGS__)
-
-#endif /* included_sha2_h */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/socket.c b/src/vppinfra/socket.c
index 26427d98fa1..2abf2b244cd 100644
--- a/src/vppinfra/socket.c
+++ b/src/vppinfra/socket.c
@@ -93,108 +93,6 @@ find_free_port (word sock)
return port < 1 << 16 ? port : -1;
}
-/* Convert a config string to a struct sockaddr and length for use
- with bind or connect. */
-static clib_error_t *
-socket_config (char *config,
- void *addr, socklen_t * addr_len, u32 ip4_default_address)
-{
- clib_error_t *error = 0;
-
- if (!config)
- config = "";
-
- /* Anything that begins with a / is a local PF_LOCAL socket. */
- if (config[0] == '/')
- {
- struct sockaddr_un *su = addr;
- su->sun_family = PF_LOCAL;
- clib_memcpy (&su->sun_path, config,
- clib_min (sizeof (su->sun_path), 1 + strlen (config)));
- *addr_len = sizeof (su[0]);
- }
-
- /* Treat everything that starts with @ as an abstract socket. */
- else if (config[0] == '@')
- {
- struct sockaddr_un *su = addr;
- su->sun_family = PF_LOCAL;
- clib_memcpy (&su->sun_path, config,
- clib_min (sizeof (su->sun_path), 1 + strlen (config)));
-
- *addr_len = sizeof (su->sun_family) + strlen (config);
- su->sun_path[0] = '\0';
- }
-
- /* Hostname or hostname:port or port. */
- else
- {
- char *host_name;
- int port = -1;
- struct sockaddr_in *sa = addr;
-
- host_name = 0;
- port = -1;
- if (config[0] != 0)
- {
- unformat_input_t i;
-
- unformat_init_string (&i, config, strlen (config));
- if (unformat (&i, "%s:%d", &host_name, &port)
- || unformat (&i, "%s:0x%x", &host_name, &port))
- ;
- else if (unformat (&i, "%s", &host_name))
- ;
- else
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, &i);
- unformat_free (&i);
-
- if (error)
- goto done;
- }
-
- sa->sin_family = PF_INET;
- *addr_len = sizeof (sa[0]);
- if (port != -1)
- sa->sin_port = htons (port);
- else
- sa->sin_port = 0;
-
- if (host_name)
- {
- struct in_addr host_addr;
-
- /* Recognize localhost to avoid host lookup in most common cast. */
- if (!strcmp (host_name, "localhost"))
- sa->sin_addr.s_addr = htonl (INADDR_LOOPBACK);
-
- else if (inet_aton (host_name, &host_addr))
- sa->sin_addr = host_addr;
-
- else if (host_name && strlen (host_name) > 0)
- {
- struct hostent *host = gethostbyname (host_name);
- if (!host)
- error = clib_error_return (0, "unknown host `%s'", config);
- else
- clib_memcpy (&sa->sin_addr.s_addr, host->h_addr_list[0],
- host->h_length);
- }
-
- else
- sa->sin_addr.s_addr = htonl (ip4_default_address);
-
- vec_free (host_name);
- if (error)
- goto done;
- }
- }
-
-done:
- return error;
-}
-
static clib_error_t *
default_socket_write (clib_socket_t * s)
{
@@ -230,7 +128,7 @@ default_socket_write (clib_socket_t * s)
else if (written > 0)
{
if (written == tx_len)
- _vec_len (s->tx_buffer) = 0;
+ vec_set_len (s->tx_buffer, 0);
else
vec_delete (s->tx_buffer, written, 0);
}
@@ -253,7 +151,7 @@ default_socket_read (clib_socket_t * sock, int n_bytes)
u8 *buf;
/* RX side of socket is down once end of file is reached. */
- if (sock->flags & CLIB_SOCKET_F_RX_END_OF_FILE)
+ if (sock->rx_end_of_file)
return 0;
fd = sock->fd;
@@ -275,10 +173,10 @@ default_socket_read (clib_socket_t * sock, int n_bytes)
/* Other side closed the socket. */
if (n_read == 0)
- sock->flags |= CLIB_SOCKET_F_RX_END_OF_FILE;
+ sock->rx_end_of_file = 1;
non_fatal:
- _vec_len (sock->rx_buffer) += n_read - n_bytes;
+ vec_inc_len (sock->rx_buffer, n_read - n_bytes);
return 0;
}
@@ -328,7 +226,7 @@ static clib_error_t *
default_socket_recvmsg (clib_socket_t * s, void *msg, int msglen,
int fds[], int num_fds)
{
-#ifdef __linux__
+#if CLIB_LINUX
char ctl[CMSG_SPACE (sizeof (int) * num_fds) +
CMSG_SPACE (sizeof (struct ucred))];
struct ucred *cr = 0;
@@ -363,7 +261,7 @@ default_socket_recvmsg (clib_socket_t * s, void *msg, int msglen,
{
if (cmsg->cmsg_level == SOL_SOCKET)
{
-#ifdef __linux__
+#if CLIB_LINUX
if (cmsg->cmsg_type == SCM_CREDENTIALS)
{
cr = (struct ucred *) CMSG_DATA (cmsg);
@@ -399,190 +297,452 @@ socket_init_funcs (clib_socket_t * s)
s->recvmsg_func = default_socket_recvmsg;
}
+static const struct
+{
+ char *prefix;
+ sa_family_t family;
+ clib_socket_type_t type;
+ u16 skip_prefix : 1;
+ u16 is_local : 1;
+} clib_socket_type_data[] = {
+ { .prefix = "unix:",
+ .family = AF_UNIX,
+ .type = CLIB_SOCKET_TYPE_UNIX,
+ .skip_prefix = 1,
+ .is_local = 1 },
+ { .prefix = "tcp:",
+ .family = AF_INET,
+ .type = CLIB_SOCKET_TYPE_INET,
+ .skip_prefix = 1 },
+#if CLIB_LINUX
+ { .prefix = "abstract:",
+ .family = AF_UNIX,
+ .type = CLIB_SOCKET_TYPE_LINUX_ABSTRACT,
+ .skip_prefix = 1,
+ .is_local = 1 },
+#endif /* CLIB_LINUX */
+ { .prefix = "/",
+ .family = AF_UNIX,
+ .type = CLIB_SOCKET_TYPE_UNIX,
+ .skip_prefix = 0,
+ .is_local = 1 },
+ { .prefix = "",
+ .family = AF_INET,
+ .type = CLIB_SOCKET_TYPE_INET,
+ .skip_prefix = 0,
+ .is_local = 0 },
+ { .prefix = "",
+ .family = AF_UNIX,
+ .type = CLIB_SOCKET_TYPE_UNIX,
+ .skip_prefix = 0,
+ .is_local = 1 },
+};
+
+static u8 *
+_clib_socket_get_string (char **p, int is_hostname)
+{
+ u8 *s = 0;
+ while (**p)
+ {
+ switch (**p)
+ {
+ case '_':
+ if (is_hostname)
+ return s;
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ case '0' ... '9':
+ case '/':
+ case '-':
+ case '.':
+ vec_add1 (s, **p);
+ (*p)++;
+ break;
+ break;
+ default:
+ return s;
+ }
+ }
+ return s;
+}
+
+__clib_export int
+clib_socket_prefix_is_valid (char *s)
+{
+ for (typeof (clib_socket_type_data[0]) *d = clib_socket_type_data;
+ d - clib_socket_type_data < ARRAY_LEN (clib_socket_type_data); d++)
+ if (d->skip_prefix && strncmp (s, d->prefix, strlen (d->prefix)) == 0)
+ return 1;
+ return 0;
+}
+
+__clib_export int
+clib_socket_prefix_get_type (char *s)
+{
+ for (typeof (clib_socket_type_data[0]) *d = clib_socket_type_data;
+ d - clib_socket_type_data < ARRAY_LEN (clib_socket_type_data); d++)
+ if (strncmp (s, d->prefix, strlen (d->prefix)) == 0)
+ return d->type;
+ return 0;
+}
+
__clib_export clib_error_t *
-clib_socket_init (clib_socket_t * s)
+clib_socket_init (clib_socket_t *s)
{
- union
- {
- struct sockaddr sa;
- struct sockaddr_un su;
- } addr;
+ struct sockaddr_un su = { .sun_family = AF_UNIX };
+ struct sockaddr_in si = { .sin_family = AF_INET };
+ struct sockaddr *sa = 0;
+ typeof (clib_socket_type_data[0]) *data = 0;
socklen_t addr_len = 0;
- int socket_type, rv;
- clib_error_t *error = 0;
- word port;
+ int rv;
+ char *p;
+ clib_error_t *err = 0;
+ u8 *name = 0;
+ u16 port = 0;
+#if CLIB_LINUX
+ int netns_fd = -1;
+#endif
- error = socket_config (s->config, &addr.sa, &addr_len,
- (s->flags & CLIB_SOCKET_F_IS_SERVER
- ? INADDR_LOOPBACK : INADDR_ANY));
- if (error)
- goto done;
+ s->fd = -1;
- socket_init_funcs (s);
+ if (!s->config)
+ s->config = "";
+
+ for (int i = 0; i < ARRAY_LEN (clib_socket_type_data); i++)
+ {
+ typeof (clib_socket_type_data[0]) *d = clib_socket_type_data + i;
+
+ if (d->is_local == 0 && s->local_only)
+ continue;
+
+ if (strncmp (s->config, d->prefix, strlen (d->prefix)) == 0)
+ {
+ data = d;
+ break;
+ }
+ }
+
+ if (data == 0)
+ return clib_error_return (0, "unsupported socket config '%s'", s->config);
+
+ s->type = data->type;
+ p = s->config + (data->skip_prefix ? strlen (data->prefix) : 0);
+
+ name = _clib_socket_get_string (&p, data->type == CLIB_SOCKET_TYPE_INET);
+ vec_add1 (name, 0);
+
+ /* parse port type for INET sockets */
+ if (data->type == CLIB_SOCKET_TYPE_INET && p[0] == ':')
+ {
+ char *old_p = p + 1;
+ long long ll = strtoll (old_p, &p, 0);
- socket_type = s->flags & CLIB_SOCKET_F_SEQPACKET ?
- SOCK_SEQPACKET : SOCK_STREAM;
+ if (p == old_p)
+ {
+ err = clib_error_return (0, "invalid port");
+ goto done;
+ }
+
+ if (ll > CLIB_U16_MAX || ll < 1)
+ {
+ err = clib_error_return (0, "port out of range");
+ goto done;
+ }
+ port = ll;
+ }
+
+ while (p[0] == ',')
+ {
+ p++;
+ if (0)
+ ;
+#if CLIB_LINUX
+ else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT && netns_fd == -1 &&
+ strncmp (p, "netns_name=", 11) == 0)
+ {
+ p += 11;
+ u8 *str = _clib_socket_get_string (&p, 0);
+ u8 *pathname = 0;
+ if (str[0] == '/')
+ pathname = format (0, "%v%c", str, 0);
+ else
+ pathname = format (0, "/var/run/netns/%v%c", str, 0);
+ if ((netns_fd = open ((char *) pathname, O_RDONLY)) < 0)
+ err = clib_error_return_unix (0, "open('%s')", pathname);
+ vec_free (str);
+ vec_free (pathname);
+ if (err)
+ goto done;
+ }
+ else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT && netns_fd == -1 &&
+ strncmp (p, "netns_pid=", 10) == 0)
+ {
+ char *old_p = p = p + 10;
+ u32 pid = (u32) strtol (old_p, &p, 0);
+
+ if (p == old_p)
+ err = clib_error_return (0, "invalid pid");
+ else
+ {
+ u8 *pathname = format (0, "/proc/%u/ns/net%c", pid, 0);
+ if ((netns_fd = open ((char *) pathname, O_RDONLY)) < 0)
+ err = clib_error_return_unix (0, "open('%s')", pathname);
+ vec_free (pathname);
+ }
+ if (err)
+ goto done;
+ }
+#endif
+ else
+ break;
+ }
- s->fd = socket (addr.sa.sa_family, socket_type, 0);
- if (s->fd < 0)
+ if (p[0] != 0)
{
- error = clib_error_return_unix (0, "socket (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return (0, "unknown input `%s'", p);
goto done;
}
- port = 0;
- if (addr.sa.sa_family == PF_INET)
- port = ((struct sockaddr_in *) &addr)->sin_port;
+#if CLIB_LINUX
+ /* change netns if requested */
+ if (s->type != CLIB_SOCKET_TYPE_INET && netns_fd != -1)
+ {
+ int fd = open ("/proc/self/ns/net", O_RDONLY);
- if (s->flags & CLIB_SOCKET_F_IS_SERVER)
+ if (setns (netns_fd, CLONE_NEWNET) < 0)
+ {
+ close (fd);
+ err = clib_error_return_unix (0, "setns(%d)", netns_fd);
+ goto done;
+ }
+ netns_fd = fd;
+ }
+#endif
+
+ if (s->type == CLIB_SOCKET_TYPE_INET)
{
- uword need_bind = 1;
+ addr_len = sizeof (si);
+ si.sin_port = htons (port);
- if (addr.sa.sa_family == PF_INET)
+ if (name)
{
- if (port == 0)
+ struct in_addr host_addr;
+ vec_add1 (name, 0);
+
+ /* Recognize localhost to avoid host lookup in most common cast. */
+ if (!strcmp ((char *) name, "localhost"))
+ si.sin_addr.s_addr = htonl (INADDR_LOOPBACK);
+
+ else if (inet_aton ((char *) name, &host_addr))
+ si.sin_addr = host_addr;
+
+ else if (strlen ((char *) name) > 0)
{
- port = find_free_port (s->fd);
- if (port < 0)
+ struct hostent *host = gethostbyname ((char *) name);
+ if (!host)
+ err = clib_error_return (0, "unknown host `%s'", name);
+ else
+ clib_memcpy (&si.sin_addr.s_addr, host->h_addr_list[0],
+ host->h_length);
+ }
+
+ else
+ si.sin_addr.s_addr =
+ htonl (s->is_server ? INADDR_LOOPBACK : INADDR_ANY);
+
+ if (err)
+ goto done;
+ }
+ sa = (struct sockaddr *) &si;
+ }
+ else if (s->type == CLIB_SOCKET_TYPE_UNIX)
+ {
+ struct stat st = { 0 };
+ char *path = (char *) &su.sun_path;
+
+ if (vec_len (name) > sizeof (su.sun_path) - 1)
+ {
+ err = clib_error_return (0, "File path '%v' too long", name);
+ goto done;
+ }
+
+ clib_memcpy (path, s->config, vec_len (name));
+ addr_len = sizeof (su);
+ sa = (struct sockaddr *) &su;
+
+ rv = stat (path, &st);
+ if (!s->is_server && rv < 0)
+ {
+ err = clib_error_return_unix (0, "stat ('%s')", path);
+ goto done;
+ }
+
+ if (s->is_server && rv == 0)
+ {
+ if (S_ISSOCK (st.st_mode))
+ {
+ int client_fd = socket (AF_UNIX, SOCK_STREAM, 0);
+ int ret = connect (client_fd, (const struct sockaddr *) &su,
+ sizeof (su));
+ typeof (errno) connect_errno = errno;
+ close (client_fd);
+
+ if (ret == 0 || (ret < 0 && connect_errno != ECONNREFUSED))
{
- error = clib_error_return (0, "no free port (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return (0, "Active listener on '%s'", path);
+ goto done;
+ }
+
+ if (unlink (path) < 0)
+ {
+ err = clib_error_return_unix (0, "unlink ('%s')", path);
goto done;
}
- need_bind = 0;
}
- }
- if (addr.sa.sa_family == PF_LOCAL &&
- ((struct sockaddr_un *) &addr)->sun_path[0] != 0)
- unlink (((struct sockaddr_un *) &addr)->sun_path);
-
- /* Make address available for multiple users. */
- {
- int v = 1;
- if (setsockopt (s->fd, SOL_SOCKET, SO_REUSEADDR, &v, sizeof (v)) < 0)
- clib_unix_warning ("setsockopt SO_REUSEADDR fails");
- }
-
-#if __linux__
- if (addr.sa.sa_family == PF_LOCAL && s->flags & CLIB_SOCKET_F_PASSCRED)
- {
- int x = 1;
- if (setsockopt (s->fd, SOL_SOCKET, SO_PASSCRED, &x, sizeof (x)) < 0)
+ else
{
- error = clib_error_return_unix (0, "setsockopt (SO_PASSCRED, "
- "fd %d, '%s')", s->fd,
- s->config);
+ err = clib_error_return (0, "File '%s' already exists", path);
goto done;
}
}
-#endif
-
- if (need_bind && bind (s->fd, &addr.sa, addr_len) < 0)
+ }
+#if CLIB_LINUX
+ else if (s->type == CLIB_SOCKET_TYPE_LINUX_ABSTRACT)
+ {
+ if (vec_len (name) > sizeof (su.sun_path) - 2)
{
- error = clib_error_return_unix (0, "bind (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return (0, "Socket name '%v' too long", name);
goto done;
}
- if (listen (s->fd, 5) < 0)
+ clib_memcpy (&su.sun_path[1], name, vec_len (name));
+ addr_len = sizeof (su.sun_family) + vec_len (name);
+ sa = (struct sockaddr *) &su;
+ s->allow_group_write = 0;
+ }
+#endif
+ else
+ {
+ err = clib_error_return_unix (0, "unknown socket family");
+ goto done;
+ }
+
+ socket_init_funcs (s);
+
+ if ((s->fd = socket (sa->sa_family,
+ s->is_seqpacket ? SOCK_SEQPACKET : SOCK_STREAM, 0)) < 0)
+ {
+ err =
+ clib_error_return_unix (0, "socket (fd %d, '%s')", s->fd, s->config);
+ goto done;
+ }
+
+ if (s->is_server)
+ {
+ uword need_bind = 1;
+
+ if (sa->sa_family == AF_INET && si.sin_port == 0)
{
- error = clib_error_return_unix (0, "listen (fd %d, '%s')",
- s->fd, s->config);
- goto done;
+ word port = find_free_port (s->fd);
+ if (port < 0)
+ {
+ err = clib_error_return (0, "no free port (fd %d, '%s')", s->fd,
+ s->config);
+ goto done;
+ }
+ si.sin_port = port;
+ need_bind = 0;
}
- if (addr.sa.sa_family == PF_LOCAL &&
- s->flags & CLIB_SOCKET_F_ALLOW_GROUP_WRITE &&
- ((struct sockaddr_un *) &addr)->sun_path[0] != 0)
+
+ if (setsockopt (s->fd, SOL_SOCKET, SO_REUSEADDR, &((int){ 1 }),
+ sizeof (int)) < 0)
+ clib_unix_warning ("setsockopt SO_REUSEADDR fails");
+
+#if CLIB_LINUX
+ if (sa->sa_family == AF_UNIX && s->passcred)
{
- struct stat st = { 0 };
- if (stat (((struct sockaddr_un *) &addr)->sun_path, &st) < 0)
+ if (setsockopt (s->fd, SOL_SOCKET, SO_PASSCRED, &((int){ 1 }),
+ sizeof (int)) < 0)
{
- error = clib_error_return_unix (0, "stat (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return_unix (0,
+ "setsockopt (SO_PASSCRED, "
+ "fd %d, '%s')",
+ s->fd, s->config);
goto done;
}
- st.st_mode |= S_IWGRP;
- if (chmod (((struct sockaddr_un *) &addr)->sun_path, st.st_mode) <
- 0)
+ }
+#endif
+
+ if (need_bind)
+ {
+ int bind_ret;
+ if (sa->sa_family == AF_UNIX && s->allow_group_write)
+ {
+ mode_t def_restrictions = umask (S_IWOTH);
+ bind_ret = bind (s->fd, sa, addr_len);
+ umask (def_restrictions);
+ }
+ else
+ bind_ret = bind (s->fd, sa, addr_len);
+
+ if (bind_ret < 0)
{
- error =
- clib_error_return_unix (0, "chmod (fd %d, '%s', mode %o)",
- s->fd, s->config, st.st_mode);
+ err = clib_error_return_unix (0, "bind (fd %d, '%s')", s->fd,
+ s->config);
goto done;
}
}
+
+ if (listen (s->fd, 5) < 0)
+ {
+ err = clib_error_return_unix (0, "listen (fd %d, '%s')", s->fd,
+ s->config);
+ goto done;
+ }
}
else
{
- if ((s->flags & CLIB_SOCKET_F_NON_BLOCKING_CONNECT)
- && fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0)
+ if (s->non_blocking_connect && fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0)
{
- error = clib_error_return_unix (0, "fcntl NONBLOCK (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return_unix (0, "fcntl NONBLOCK (fd %d, '%s')",
+ s->fd, s->config);
goto done;
}
- while ((rv = connect (s->fd, &addr.sa, addr_len)) < 0
- && errno == EAGAIN)
+ while ((rv = connect (s->fd, sa, addr_len)) < 0 && errno == EAGAIN)
;
- if (rv < 0 && !((s->flags & CLIB_SOCKET_F_NON_BLOCKING_CONNECT) &&
- errno == EINPROGRESS))
+ if (rv < 0 && !(s->non_blocking_connect && errno == EINPROGRESS))
{
- error = clib_error_return_unix (0, "connect (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return_unix (0, "connect (fd %d, '%s')", s->fd,
+ s->config);
goto done;
}
/* Connect was blocking so set fd to non-blocking now unless
* blocking mode explicitly requested. */
- if (!(s->flags & CLIB_SOCKET_F_NON_BLOCKING_CONNECT) &&
- !(s->flags & CLIB_SOCKET_F_BLOCKING) &&
+ if (!s->non_blocking_connect && !s->is_blocking &&
fcntl (s->fd, F_SETFL, O_NONBLOCK) < 0)
{
- error = clib_error_return_unix (0, "fcntl NONBLOCK2 (fd %d, '%s')",
- s->fd, s->config);
+ err = clib_error_return_unix (0, "fcntl NONBLOCK2 (fd %d, '%s')",
+ s->fd, s->config);
goto done;
}
}
- return error;
-
done:
- if (s->fd > 0)
- close (s->fd);
- return error;
-}
-
-__clib_export clib_error_t *
-clib_socket_init_netns (clib_socket_t *s, u8 *namespace)
-{
- if (namespace == NULL || namespace[0] == 0)
- return clib_socket_init (s);
-
- clib_error_t *error;
- int old_netns_fd, nfd;
-
- old_netns_fd = clib_netns_open (NULL /* self */);
- if ((nfd = clib_netns_open (namespace)) == -1)
+ if (err && s->fd > -1)
{
- error = clib_error_return_unix (0, "clib_netns_open '%s'", namespace);
- goto done;
+ close (s->fd);
+ s->fd = -1;
}
-
- if (clib_setns (nfd) == -1)
+#if CLIB_LINUX
+ if (netns_fd != -1)
{
- error = clib_error_return_unix (0, "setns '%s'", namespace);
- goto done;
+ setns (netns_fd, CLONE_NEWNET);
+ close (netns_fd);
}
-
- error = clib_socket_init (s);
-
-done:
- if (clib_setns (old_netns_fd) == -1)
- clib_warning ("Cannot set old ns");
- close (old_netns_fd);
-
- return error;
+#endif
+ vec_free (name);
+ return err;
}
__clib_export clib_error_t *
diff --git a/src/vppinfra/socket.h b/src/vppinfra/socket.h
index fa5ef1efced..c4f0b87e3e1 100644
--- a/src/vppinfra/socket.h
+++ b/src/vppinfra/socket.h
@@ -41,11 +41,25 @@
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
+#ifdef __FreeBSD__
+#include <errno.h>
+#define EBADFD EBADF
+#endif /* __FreeBSD__ */
#include <vppinfra/clib.h>
#include <vppinfra/error.h>
#include <vppinfra/format.h>
+typedef enum
+{
+ CLIB_SOCKET_TYPE_UNKNOWN = 0,
+ CLIB_SOCKET_TYPE_INET,
+ CLIB_SOCKET_TYPE_UNIX,
+#if CLIB_LINUX
+ CLIB_SOCKET_TYPE_LINUX_ABSTRACT,
+#endif
+} clib_socket_type_t;
+
typedef struct _socket_t
{
/* File descriptor. */
@@ -54,15 +68,21 @@ typedef struct _socket_t
/* Config string for socket HOST:PORT or just HOST. */
char *config;
- u32 flags;
-#define CLIB_SOCKET_F_IS_SERVER (1 << 0)
-#define CLIB_SOCKET_F_IS_CLIENT (0 << 0)
-#define CLIB_SOCKET_F_RX_END_OF_FILE (1 << 2)
-#define CLIB_SOCKET_F_NON_BLOCKING_CONNECT (1 << 3)
-#define CLIB_SOCKET_F_ALLOW_GROUP_WRITE (1 << 4)
-#define CLIB_SOCKET_F_SEQPACKET (1 << 5)
-#define CLIB_SOCKET_F_PASSCRED (1 << 6)
-#define CLIB_SOCKET_F_BLOCKING (1 << 7)
+ union
+ {
+ struct
+ {
+ u32 is_server : 1;
+ u32 rx_end_of_file : 1;
+ u32 non_blocking_connect : 1;
+ u32 allow_group_write : 1;
+ u32 is_seqpacket : 1;
+ u32 passcred : 1;
+ u32 is_blocking : 1;
+ u32 local_only : 1;
+ };
+ u32 flags;
+ };
/* Transmit buffer. Holds data waiting to be written. */
u8 *tx_buffer;
@@ -85,23 +105,33 @@ typedef struct _socket_t
int fds[], int num_fds);
clib_error_t *(*sendmsg_func) (struct _socket_t * s, void *msg, int msglen,
int fds[], int num_fds);
+ clib_socket_type_t type;
uword private_data;
} clib_socket_t;
+#define CLIB_SOCKET_FLAG(f) (((clib_socket_t){ .f = 1 }).flags)
+#define CLIB_SOCKET_F_IS_CLIENT 0
+#define CLIB_SOCKET_F_IS_SERVER CLIB_SOCKET_FLAG (is_server)
+#define CLIB_SOCKET_F_ALLOW_GROUP_WRITE CLIB_SOCKET_FLAG (allow_group_write)
+#define CLIB_SOCKET_F_SEQPACKET CLIB_SOCKET_FLAG (is_seqpacket)
+#define CLIB_SOCKET_F_PASSCRED CLIB_SOCKET_FLAG (passcred)
+#define CLIB_SOCKET_F_BLOCKING CLIB_SOCKET_FLAG (is_blocking)
+
/* socket config format is host:port.
Unspecified port causes a free one to be chosen starting
from IPPORT_USERRESERVED (5000). */
clib_error_t *clib_socket_init (clib_socket_t * socket);
-clib_error_t *clib_socket_init_netns (clib_socket_t *socket, u8 *namespace);
-
clib_error_t *clib_socket_accept (clib_socket_t * server,
clib_socket_t * client);
+int clib_socket_prefix_is_valid (char *s);
+int clib_socket_prefix_get_type (char *s);
+
always_inline uword
clib_socket_is_server (clib_socket_t * sock)
{
- return (sock->flags & CLIB_SOCKET_F_IS_SERVER) != 0;
+ return sock->is_server;
}
always_inline uword
@@ -120,7 +150,7 @@ clib_socket_is_connected (clib_socket_t * sock)
always_inline int
clib_socket_rx_end_of_file (clib_socket_t * s)
{
- return s->flags & CLIB_SOCKET_F_RX_END_OF_FILE;
+ return s->rx_end_of_file;
}
always_inline void *
diff --git a/src/vppinfra/sparse_vec.h b/src/vppinfra/sparse_vec.h
index 54a92ce7a84..3bd440d5dbd 100644
--- a/src/vppinfra/sparse_vec.h
+++ b/src/vppinfra/sparse_vec.h
@@ -38,8 +38,8 @@
#ifndef included_sparse_vec_h
#define included_sparse_vec_h
+#include <vppinfra/clib.h>
#include <vppinfra/vec.h>
-#include <vppinfra/bitops.h>
/* Sparsely indexed vectors. Basic idea taken from Hacker's delight.
Eliot added ranges. */
@@ -59,7 +59,7 @@ typedef struct
always_inline sparse_vec_header_t *
sparse_vec_header (void *v)
{
- return vec_header (v, sizeof (sparse_vec_header_t));
+ return vec_header (v);
}
/* Index 0 is always used to mark indices that are not valid in
@@ -73,17 +73,14 @@ sparse_vec_new (uword elt_bytes, uword sparse_index_bits)
void *v;
sparse_vec_header_t *h;
word n;
+ vec_attr_t va = { .elt_sz = elt_bytes, .hdr_sz = sizeof (h[0]) };
ASSERT (sparse_index_bits <= 16);
- v = _vec_resize ((void *) 0,
- /* length increment */ 8,
- /* data bytes */ 8 * elt_bytes,
- /* header bytes */ sizeof (h[0]),
- /* data align */ 0);
+ v = _vec_alloc_internal (/* data bytes */ 8, &va);
/* Make space for invalid entry (entry 0). */
- _vec_len (v) = 1;
+ _vec_set_len (v, 1, elt_bytes);
h = sparse_vec_header (v);
@@ -223,7 +220,19 @@ sparse_vec_index2 (void *v,
*i1_return = is_member1 + d1;
}
-#define sparse_vec_free(v) vec_free(v)
+#define sparse_vec_free(V) \
+ do \
+ { \
+ if (V) \
+ { \
+ sparse_vec_header_t *_h = sparse_vec_header (V); \
+ vec_free (_h->is_member_bitmap); \
+ vec_free (_h->member_counts); \
+ clib_mem_free (_h); \
+ V = 0; \
+ } \
+ } \
+ while (0)
#define sparse_vec_elt_at_index(v,i) \
vec_elt_at_index ((v), sparse_vec_index ((v), (i)))
diff --git a/src/vppinfra/std-formats.c b/src/vppinfra/std-formats.c
index 1616001f9c5..cb2872ad24b 100644
--- a/src/vppinfra/std-formats.c
+++ b/src/vppinfra/std-formats.c
@@ -135,6 +135,52 @@ format_white_space (u8 * s, va_list * va)
}
u8 *
+format_duration (u8 *s, va_list *args)
+{
+ f64 t = va_arg (*args, f64);
+ s = format (s, "");
+
+ const f64 seconds_per_minute = 60;
+ const f64 seconds_per_hour = 60 * seconds_per_minute;
+ const f64 seconds_per_day = 24 * seconds_per_hour;
+ uword days, hours, minutes, secs, msecs, usecs;
+
+ days = t / seconds_per_day;
+ t -= days * seconds_per_day;
+
+ hours = t / seconds_per_hour;
+ t -= hours * seconds_per_hour;
+
+ minutes = t / seconds_per_minute;
+ t -= minutes * seconds_per_minute;
+
+ secs = t;
+ t -= secs;
+
+ msecs = 1e3 * t;
+
+ usecs = 1e6 * t;
+ usecs = usecs % 1000;
+
+ if (t == 0.)
+ s = format (s, "0");
+ if (days)
+ s = format (s, "%ddays ", days);
+ if (hours)
+ s = format (s, "%dh ", hours);
+ if (minutes)
+ s = format (s, "%dmin ", minutes);
+ if (secs)
+ s = format (s, "%ds ", secs);
+ if (msecs)
+ s = format (s, "%dms ", msecs);
+ if (usecs)
+ s = format (s, "%dus", usecs);
+
+ return (s);
+}
+
+u8 *
format_time_interval (u8 * s, va_list * args)
{
u8 *fmt = va_arg (*args, u8 *);
@@ -204,6 +250,24 @@ format_time_interval (u8 * s, va_list * args)
return s;
}
+/* Format base 10 e.g. 100, 100K, 100M, 100G */
+__clib_export u8 *
+format_base10 (u8 *s, va_list *va)
+{
+ u64 size = va_arg (*va, u64);
+
+ if (size < 1000)
+ s = format (s, "%d", size);
+ else if (size < 1000000)
+ s = format (s, "%.2fK", (f64) size / 1000.);
+ else if (size < 1000000000)
+ s = format (s, "%.2fM", (f64) size / 1000000.);
+ else
+ s = format (s, "%.2fG", (f64) size / 1000000000.);
+
+ return s;
+}
+
/* Unparse memory size e.g. 100, 100k, 100m, 100g. */
__clib_export u8 *
format_memory_size (u8 * s, va_list * va)
@@ -332,8 +396,6 @@ format_c_identifier (u8 * s, va_list * va)
uword i, l;
l = ~0;
- if (clib_mem_is_vec (id))
- l = vec_len (id);
if (id)
for (i = 0; i < l && id[i] != 0; i++)
@@ -352,7 +414,7 @@ __clib_export u8 *
format_hexdump (u8 * s, va_list * args)
{
u8 *data = va_arg (*args, u8 *);
- uword len = va_arg (*args, uword);
+ u32 len = va_arg (*args, u32);
int i, index = 0;
const int line_len = 16;
u8 *line_hex = 0;
@@ -393,6 +455,104 @@ format_hexdump (u8 * s, va_list * args)
return s;
}
+__clib_export u8 *
+format_hexdump_u16 (u8 *s, va_list *args)
+{
+ u16 *data = va_arg (*args, u16 *);
+ u32 len = va_arg (*args, u32);
+ u32 indent = format_get_indent (s);
+
+ if (!len)
+ return s;
+
+ for (int i = 0; i < len; i++)
+ {
+ if (i % 8 == 0)
+ {
+ s = format (s, "%s%U%05x: ", i ? "\n" : "", format_white_space,
+ i ? indent : 0, i * 2);
+ }
+ s = format (s, " %04lx", data[i]);
+ }
+ return s;
+}
+
+__clib_export u8 *
+format_hexdump_u32 (u8 *s, va_list *args)
+{
+ u32 *data = va_arg (*args, u32 *);
+ u32 len = va_arg (*args, u32);
+ u32 indent = format_get_indent (s);
+
+ if (!len)
+ return s;
+
+ for (int i = 0; i < len; i++)
+ {
+ if (i % 4 == 0)
+ {
+ s = format (s, "%s%U%05x: ", i ? "\n" : "", format_white_space,
+ i ? indent : 0, i * 4);
+ }
+ s = format (s, " %08lx", data[i]);
+ }
+ return s;
+}
+
+__clib_export u8 *
+format_hexdump_u64 (u8 *s, va_list *args)
+{
+ u64 *data = va_arg (*args, u64 *);
+ u32 len = va_arg (*args, u32);
+ u32 indent = format_get_indent (s);
+
+ if (!len)
+ return s;
+
+ for (int i = 0; i < len; i++)
+ {
+ if (i % 2 == 0)
+ {
+ s = format (s, "%s%U%05x: ", i ? "\n" : "", format_white_space,
+ i ? indent : 0, i * 8);
+ }
+ s = format (s, " %016lx", data[i]);
+ }
+ return s;
+}
+
+__clib_export u8 *
+format_uword_bitmap (u8 *s, va_list *args)
+{
+ uword *bitmap = va_arg (*args, uword *);
+ int n_uword = va_arg (*args, int);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "%6s", "");
+
+ for (int i = uword_bits - 4; i >= 0; i -= 4)
+ s = format (s, "%5d", i);
+
+ vec_add1 (s, '\n');
+
+ for (int j = n_uword - 1; j >= 0; j--)
+ {
+ s = format (s, "%U0x%04x ", format_white_space, indent,
+ j * uword_bits / 8);
+ for (int i = uword_bits - 1; i >= 0; i--)
+ {
+ vec_add1 (s, (1ULL << i) & bitmap[j] ? '1' : '.');
+ if (i % 4 == 0)
+ vec_add1 (s, ' ');
+ }
+ s = format (s, uword_bits == 64 ? "0x%016lx" : "0x%08lx", bitmap[j]);
+ if (j)
+ vec_add1 (s, '\n');
+ }
+
+ return s;
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vppinfra/string.h b/src/vppinfra/string.h
index 0d2c0655c50..b1ef0e4809b 100644
--- a/src/vppinfra/string.h
+++ b/src/vppinfra/string.h
@@ -47,6 +47,9 @@
#include <vppinfra/clib.h> /* for CLIB_LINUX_KERNEL */
#include <vppinfra/vector.h>
#include <vppinfra/error_bootstrap.h>
+#ifdef __SSE4_2__
+#include <vppinfra/memcpy_x86_64.h>
+#endif
#ifdef CLIB_LINUX_KERNEL
#include <linux/string.h>
@@ -67,26 +70,6 @@
/* Exchanges source and destination. */
void clib_memswap (void *_a, void *_b, uword bytes);
-/*
- * the vector unit memcpy variants confuse coverity
- * so don't let it anywhere near them.
- */
-#ifndef __COVERITY__
-#if __AVX512BITALG__
-#include <vppinfra/memcpy_avx512.h>
-#define clib_memcpy_fast_arch(a, b, c) clib_memcpy_fast_avx512 (a, b, c)
-#elif __AVX2__
-#include <vppinfra/memcpy_avx2.h>
-#define clib_memcpy_fast_arch(a, b, c) clib_memcpy_fast_avx2 (a, b, c)
-#elif __SSSE3__
-#include <vppinfra/memcpy_sse3.h>
-#define clib_memcpy_fast_arch(a, b, c) clib_memcpy_fast_sse3 (a, b, c)
-#endif /* __AVX512BITALG__ */
-#endif /* __COVERITY__ */
-
-#ifndef clib_memcpy_fast_arch
-#define clib_memcpy_fast_arch(a, b, c) memcpy (a, b, c)
-#endif /* clib_memcpy_fast_arch */
static_always_inline void *
clib_memcpy_fast (void *restrict dst, const void *restrict src, size_t n)
@@ -94,10 +77,34 @@ clib_memcpy_fast (void *restrict dst, const void *restrict src, size_t n)
ASSERT (dst && src &&
"memcpy(src, dst, n) with src == NULL or dst == NULL is undefined "
"behaviour");
- return clib_memcpy_fast_arch (dst, src, n);
+#if defined(__COVERITY__)
+ return memcpy (dst, src, n);
+#elif defined(__SSE4_2__)
+ clib_memcpy_x86_64 (dst, src, n);
+ return dst;
+#else
+ return memcpy (dst, src, n);
+#endif
}
-#undef clib_memcpy_fast_arch
+static_always_inline void *
+clib_memmove (void *dst, const void *src, size_t n)
+{
+ u8 *d = (u8 *) dst;
+ u8 *s = (u8 *) src;
+
+ if (s == d)
+ return d;
+
+ if (d > s)
+ for (uword i = n - 1; (i + 1) > 0; i--)
+ d[i] = s[i];
+ else
+ for (uword i = 0; i < n; i++)
+ d[i] = s[i];
+
+ return d;
+}
#include <vppinfra/memcpy.h>
@@ -246,14 +253,14 @@ clib_memcpy_le (u8 * dst, u8 * src, u8 len, u8 max_len)
d0 = u8x32_load_unaligned (dst);
d1 = u8x32_load_unaligned (dst + 32);
- d0 = u8x32_blend (d0, s0, u8x32_is_greater (lv, mask));
+ d0 = u8x32_blend (d0, s0, lv > mask);
u8x32_store_unaligned (d0, dst);
if (max_len <= 32)
return;
mask += add;
- d1 = u8x32_blend (d1, s1, u8x32_is_greater (lv, mask));
+ d1 = u8x32_blend (d1, s1, lv > mask);
u8x32_store_unaligned (d1, dst + 32);
#elif defined (CLIB_HAVE_VEC128)
@@ -271,25 +278,25 @@ clib_memcpy_le (u8 * dst, u8 * src, u8 len, u8 max_len)
d2 = u8x16_load_unaligned (dst + 32);
d3 = u8x16_load_unaligned (dst + 48);
- d0 = u8x16_blend (d0, s0, u8x16_is_greater (lv, mask));
+ d0 = u8x16_blend (d0, s0, lv > mask);
u8x16_store_unaligned (d0, dst);
if (max_len <= 16)
return;
mask += add;
- d1 = u8x16_blend (d1, s1, u8x16_is_greater (lv, mask));
+ d1 = u8x16_blend (d1, s1, lv > mask);
u8x16_store_unaligned (d1, dst + 16);
if (max_len <= 32)
return;
mask += add;
- d2 = u8x16_blend (d2, s2, u8x16_is_greater (lv, mask));
+ d2 = u8x16_blend (d2, s2, lv > mask);
u8x16_store_unaligned (d2, dst + 32);
mask += add;
- d3 = u8x16_blend (d3, s3, u8x16_is_greater (lv, mask));
+ d3 = u8x16_blend (d3, s3, lv > mask);
u8x16_store_unaligned (d3, dst + 48);
#else
memmove (dst, src, len);
@@ -334,9 +341,17 @@ clib_memset_u64 (void *p, u64 val, uword count)
if (count == 0)
return;
#else
+#if defined(CLIB_HAVE_VEC128)
+ u64x2 v = u64x2_splat (val);
+#endif
while (count >= 4)
{
+#if defined(CLIB_HAVE_VEC128)
+ u64x2_store_unaligned (v, ptr);
+ u64x2_store_unaligned (v, ptr + 2);
+#else
ptr[0] = ptr[1] = ptr[2] = ptr[3] = val;
+#endif
ptr += 4;
count -= 4;
}
@@ -483,239 +498,6 @@ clib_memset_u8 (void *p, u8 val, uword count)
ptr++[0] = val;
}
-static_always_inline uword
-clib_count_equal_u64 (u64 * data, uword max_count)
-{
- uword count;
- u64 first;
-
- if (max_count <= 1)
- return max_count;
- if (data[0] != data[1])
- return 1;
-
- count = 0;
- first = data[0];
-
-#if defined(CLIB_HAVE_VEC256)
- u64x4 splat = u64x4_splat (first);
- while (count + 3 < max_count)
- {
- u64 bmp;
- bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat));
- if (bmp != 0xffffffff)
- {
- count += count_trailing_zeros (~bmp) / 8;
- return count;
- }
-
- data += 4;
- count += 4;
- }
-#else
- count += 2;
- data += 2;
- while (count + 3 < max_count &&
- ((data[0] ^ first) | (data[1] ^ first) |
- (data[2] ^ first) | (data[3] ^ first)) == 0)
- {
- data += 4;
- count += 4;
- }
-#endif
- while (count < max_count && (data[0] == first))
- {
- data += 1;
- count += 1;
- }
- return count;
-}
-
-static_always_inline uword
-clib_count_equal_u32 (u32 * data, uword max_count)
-{
- uword count;
- u32 first;
-
- if (max_count <= 1)
- return max_count;
- if (data[0] != data[1])
- return 1;
-
- count = 0;
- first = data[0];
-
-#if defined(CLIB_HAVE_VEC256)
- u32x8 splat = u32x8_splat (first);
- while (count + 7 < max_count)
- {
- u64 bmp;
- bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat));
- if (bmp != 0xffffffff)
- {
- count += count_trailing_zeros (~bmp) / 4;
- return count;
- }
-
- data += 8;
- count += 8;
- }
-#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
- u32x4 splat = u32x4_splat (first);
- while (count + 3 < max_count)
- {
- u64 bmp;
- bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat));
- if (bmp != 0xffff)
- {
- count += count_trailing_zeros (~bmp) / 4;
- return count;
- }
-
- data += 4;
- count += 4;
- }
-#else
- count += 2;
- data += 2;
- while (count + 3 < max_count &&
- ((data[0] ^ first) | (data[1] ^ first) |
- (data[2] ^ first) | (data[3] ^ first)) == 0)
- {
- data += 4;
- count += 4;
- }
-#endif
- while (count < max_count && (data[0] == first))
- {
- data += 1;
- count += 1;
- }
- return count;
-}
-
-static_always_inline uword
-clib_count_equal_u16 (u16 * data, uword max_count)
-{
- uword count;
- u16 first;
-
- if (max_count <= 1)
- return max_count;
- if (data[0] != data[1])
- return 1;
-
- count = 0;
- first = data[0];
-
-#if defined(CLIB_HAVE_VEC256)
- u16x16 splat = u16x16_splat (first);
- while (count + 15 < max_count)
- {
- u64 bmp;
- bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat));
- if (bmp != 0xffffffff)
- {
- count += count_trailing_zeros (~bmp) / 2;
- return count;
- }
-
- data += 16;
- count += 16;
- }
-#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
- u16x8 splat = u16x8_splat (first);
- while (count + 7 < max_count)
- {
- u64 bmp;
- bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat));
- if (bmp != 0xffff)
- {
- count += count_trailing_zeros (~bmp) / 2;
- return count;
- }
-
- data += 8;
- count += 8;
- }
-#else
- count += 2;
- data += 2;
- while (count + 3 < max_count &&
- ((data[0] ^ first) | (data[1] ^ first) |
- (data[2] ^ first) | (data[3] ^ first)) == 0)
- {
- data += 4;
- count += 4;
- }
-#endif
- while (count < max_count && (data[0] == first))
- {
- data += 1;
- count += 1;
- }
- return count;
-}
-
-static_always_inline uword
-clib_count_equal_u8 (u8 * data, uword max_count)
-{
- uword count;
- u8 first;
-
- if (max_count <= 1)
- return max_count;
- if (data[0] != data[1])
- return 1;
-
- count = 0;
- first = data[0];
-
-#if defined(CLIB_HAVE_VEC256)
- u8x32 splat = u8x32_splat (first);
- while (count + 31 < max_count)
- {
- u64 bmp;
- bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat));
- if (bmp != 0xffffffff)
- return max_count;
-
- data += 32;
- count += 32;
- }
-#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
- u8x16 splat = u8x16_splat (first);
- while (count + 15 < max_count)
- {
- u64 bmp;
- bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat));
- if (bmp != 0xffff)
- {
- count += count_trailing_zeros (~bmp);
- return count;
- }
-
- data += 16;
- count += 16;
- }
-#else
- count += 2;
- data += 2;
- while (count + 3 < max_count &&
- ((data[0] ^ first) | (data[1] ^ first) |
- (data[2] ^ first) | (data[3] ^ first)) == 0)
- {
- data += 4;
- count += 4;
- }
-#endif
- while (count < max_count && (data[0] == first))
- {
- data += 1;
- count += 1;
- }
- return count;
-}
/*
* This macro is to provide smooth mapping from memcmp to memcmp_s.
@@ -926,14 +708,6 @@ strncmp_s_inline (const char *s1, rsize_t s1max, const char *s2, rsize_t n,
return EOK;
}
-/*
- * This macro is provided for smooth migration from strcpy. It is not perfect
- * because we don't know the size of the destination buffer to pass to strcpy_s.
- * We improvise dmax with CLIB_STRING_MACRO_MAX.
- * Applications are encouraged to move to the C11 strcpy_s API.
- */
-#define clib_strcpy(d,s) strcpy_s_inline(d,CLIB_STRING_MACRO_MAX,s)
-
errno_t strcpy_s (char *__restrict__ dest, rsize_t dmax,
const char *__restrict__ src);
@@ -1060,16 +834,6 @@ strncpy_s_inline (char *__restrict__ dest, rsize_t dmax,
return status;
}
-/*
- * This macro is to provide smooth migration from strcat to strcat_s.
- * Because there is no dmax in strcat, we improvise it with
- * CLIB_STRING_MACRO_MAX. Please note there may be a chance to overwrite dest
- * with too many bytes from src.
- * Applications are encouraged to use C11 API to provide the actual dmax
- * for proper checking and protection.
- */
-#define clib_strcat(d,s) strcat_s_inline(d,CLIB_STRING_MACRO_MAX,s)
-
errno_t strcat_s (char *__restrict__ dest, rsize_t dmax,
const char *__restrict__ src);
@@ -1121,16 +885,6 @@ strcat_s_inline (char *__restrict__ dest, rsize_t dmax,
return EOK;
}
-/*
- * This macro is to provide smooth migration from strncat to strncat_s.
- * The unsafe strncat does not have s1max. We improvise it with
- * CLIB_STRING_MACRO_MAX. Please note there may be a chance to overwrite
- * dest with too many bytes from src.
- * Applications are encouraged to move to C11 strncat_s which requires dmax
- * from the caller and provides checking to safeguard the memory corruption.
- */
-#define clib_strncat(d,s,n) strncat_s_inline(d,CLIB_STRING_MACRO_MAX,s,n)
-
errno_t strncat_s (char *__restrict__ dest, rsize_t dmax,
const char *__restrict__ src, rsize_t n);
@@ -1350,23 +1104,6 @@ strtok_s_inline (char *__restrict__ s1, rsize_t * __restrict__ s1max,
return (ptoken);
}
-/*
- * This macro is to provide smooth mapping from strstr to strstr_s.
- * strstr_s requires s1max and s2max which the unsafe API does not have. So
- * we have to improvise them with CLIB_STRING_MACRO_MAX which may cause us
- * to access memory beyond it is intended if s1 or s2 is unterminated.
- * For the record, strstr crashes if s1 or s2 is unterminated. But this macro
- * does not.
- * Applications are encouraged to use the cool C11 strstr_s API to avoid
- * this problem.
- */
-#define clib_strstr(s1,s2) \
- ({ char * __substring = 0; \
- strstr_s_inline (s1, CLIB_STRING_MACRO_MAX, s2, CLIB_STRING_MACRO_MAX, \
- &__substring); \
- __substring; \
- })
-
errno_t strstr_s (char *s1, rsize_t s1max, const char *s2, rsize_t s2max,
char **substring);
@@ -1395,7 +1132,7 @@ strstr_s_inline (char *s1, rsize_t s1max, const char *s2, rsize_t s2max,
clib_c11_violation ("substring NULL");
if (s1 && s1max && (s1[clib_strnlen (s1, s1max)] != '\0'))
clib_c11_violation ("s1 unterminated");
- if (s2 && s2max && (s2[clib_strnlen (s2, s1max)] != '\0'))
+ if (s2 && s2max && (s2[clib_strnlen (s2, s2max)] != '\0'))
clib_c11_violation ("s2 unterminated");
return EINVAL;
}
@@ -1424,6 +1161,13 @@ strstr_s_inline (char *s1, rsize_t s1max, const char *s2, rsize_t s2max,
return EOK;
}
+static_always_inline const char *
+clib_string_skip_prefix (const char *s, const char *prefix)
+{
+ uword len = __builtin_strlen (prefix);
+ return s + (__builtin_strncmp (s, prefix, len) ? 0 : len);
+}
+
#endif /* included_clib_string_h */
/*
diff --git a/src/vppinfra/test/aes_cbc.c b/src/vppinfra/test/aes_cbc.c
new file mode 100644
index 00000000000..be5f8fb176a
--- /dev/null
+++ b/src/vppinfra/test/aes_cbc.c
@@ -0,0 +1,187 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#if defined(__AES__) || defined(__ARM_FEATURE_CRYPTO)
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/crypto/aes_cbc.h>
+
+static const u8 iv[] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
+};
+
+static const u8 plaintext[] = {
+ 0x6B, 0xC1, 0xBE, 0xE2, 0x2E, 0x40, 0x9F, 0x96, 0xE9, 0x3D, 0x7E, 0x11, 0x73,
+ 0x93, 0x17, 0x2A, 0xAE, 0x2D, 0x8A, 0x57, 0x1E, 0x03, 0xAC, 0x9C, 0x9E, 0xB7,
+ 0x6F, 0xAC, 0x45, 0xAF, 0x8E, 0x51, 0x30, 0xC8, 0x1C, 0x46, 0xA3, 0x5C, 0xE4,
+ 0x11, 0xE5, 0xFB, 0xC1, 0x19, 0x1A, 0x0A, 0x52, 0xEF, 0xF6, 0x9F, 0x24, 0x45,
+ 0xDF, 0x4F, 0x9B, 0x17, 0xAD, 0x2B, 0x41, 0x7B, 0xE6, 0x6C, 0x37, 0x10,
+};
+
+static const u8 key128[] = { 0x2B, 0x7E, 0x15, 0x16, 0x28, 0xAE, 0xD2, 0xA6,
+ 0xAB, 0xF7, 0x15, 0x88, 0x09, 0xCF, 0x4F, 0x3C };
+
+static const u8 key192[24] = {
+ 0x8E, 0x73, 0xB0, 0xF7, 0xDA, 0x0E, 0x64, 0x52, 0xC8, 0x10, 0xF3, 0x2B,
+ 0x80, 0x90, 0x79, 0xE5, 0x62, 0xF8, 0xEA, 0xD2, 0x52, 0x2C, 0x6B, 0x7B,
+};
+
+static const u8 ciphertext128[] = {
+ 0x76, 0x49, 0xAB, 0xAC, 0x81, 0x19, 0xB2, 0x46, 0xCE, 0xE9, 0x8E, 0x9B, 0x12,
+ 0xE9, 0x19, 0x7D, 0x50, 0x86, 0xCB, 0x9B, 0x50, 0x72, 0x19, 0xEE, 0x95, 0xDB,
+ 0x11, 0x3A, 0x91, 0x76, 0x78, 0xB2, 0x73, 0xBE, 0xD6, 0xB8, 0xE3, 0xC1, 0x74,
+ 0x3B, 0x71, 0x16, 0xE6, 0x9E, 0x22, 0x22, 0x95, 0x16, 0x3F, 0xF1, 0xCA, 0xA1,
+ 0x68, 0x1F, 0xAC, 0x09, 0x12, 0x0E, 0xCA, 0x30, 0x75, 0x86, 0xE1, 0xA7,
+};
+
+static const u8 ciphertext192[64] = {
+ 0x4F, 0x02, 0x1D, 0xB2, 0x43, 0xBC, 0x63, 0x3D, 0x71, 0x78, 0x18, 0x3A, 0x9F,
+ 0xA0, 0x71, 0xE8, 0xB4, 0xD9, 0xAD, 0xA9, 0xAD, 0x7D, 0xED, 0xF4, 0xE5, 0xE7,
+ 0x38, 0x76, 0x3F, 0x69, 0x14, 0x5A, 0x57, 0x1B, 0x24, 0x20, 0x12, 0xFB, 0x7A,
+ 0xE0, 0x7F, 0xA9, 0xBA, 0xAC, 0x3D, 0xF1, 0x02, 0xE0, 0x08, 0xB0, 0xE2, 0x79,
+ 0x88, 0x59, 0x88, 0x81, 0xD9, 0x20, 0xA9, 0xE6, 0x4F, 0x56, 0x15, 0xCD,
+};
+
+static const u8 key256[32] = {
+ 0x60, 0x3D, 0xEB, 0x10, 0x15, 0xCA, 0x71, 0xBE, 0x2B, 0x73, 0xAE,
+ 0xF0, 0x85, 0x7D, 0x77, 0x81, 0x1F, 0x35, 0x2C, 0x07, 0x3B, 0x61,
+ 0x08, 0xD7, 0x2D, 0x98, 0x10, 0xA3, 0x09, 0x14, 0xDF, 0xF4,
+};
+
+static const u8 ciphertext256[64] = {
+ 0xF5, 0x8C, 0x4C, 0x04, 0xD6, 0xE5, 0xF1, 0xBA, 0x77, 0x9E, 0xAB, 0xFB, 0x5F,
+ 0x7B, 0xFB, 0xD6, 0x9C, 0xFC, 0x4E, 0x96, 0x7E, 0xDB, 0x80, 0x8D, 0x67, 0x9F,
+ 0x77, 0x7B, 0xC6, 0x70, 0x2C, 0x7D, 0x39, 0xF2, 0x33, 0x69, 0xA9, 0xD9, 0xBA,
+ 0xCF, 0xA5, 0x30, 0xE2, 0x63, 0x04, 0x23, 0x14, 0x61, 0xB2, 0xEB, 0x05, 0xE2,
+ 0xC3, 0x9B, 0xE9, 0xFC, 0xDA, 0x6C, 0x19, 0x07, 0x8C, 0x6A, 0x9D, 0x1B,
+};
+
+#define _(b) \
+ static clib_error_t *test_clib_aes##b##_cbc_encrypt (clib_error_t *err) \
+ { \
+ aes_cbc_key_data_t k; \
+ u8 data[512]; \
+ clib_aes##b##_cbc_key_expand (&k, key##b); \
+ clib_aes##b##_cbc_encrypt (&k, plaintext, sizeof (plaintext), iv, data); \
+ if (memcmp (ciphertext##b, data, sizeof (ciphertext##b)) != 0) \
+ err = \
+ clib_error_return (err, "encrypted data doesn't match plaintext"); \
+ return err; \
+ } \
+ void __test_perf_fn perftest_aes##b##_enc_var_sz (test_perf_t *tp) \
+ { \
+ u32 n = tp->n_ops; \
+ aes_cbc_key_data_t *kd = test_mem_alloc (sizeof (*kd)); \
+ u8 *dst = test_mem_alloc (n + 16); \
+ u8 *src = test_mem_alloc_and_fill_inc_u8 (n + 16, 0, 0); \
+ clib_aes##b##_cbc_key_expand (kd, key##b); \
+ \
+ test_perf_event_enable (tp); \
+ clib_aes##b##_cbc_encrypt (kd, src, n, iv, dst); \
+ test_perf_event_disable (tp); \
+ }
+_ (128)
+_ (192)
+_ (256)
+#undef _
+
+REGISTER_TEST (clib_aes128_cbc_encrypt) = {
+ .name = "clib_aes128_cbc_encrypt",
+ .fn = test_clib_aes128_cbc_encrypt,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes128_enc_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 9008,
+ .fn = perftest_aes128_enc_var_sz }),
+};
+
+REGISTER_TEST (clib_aes192_cbc_encrypt) = {
+ .name = "clib_aes192_cbc_encrypt",
+ .fn = test_clib_aes192_cbc_encrypt,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes192_enc_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 9008,
+ .fn = perftest_aes192_enc_var_sz }),
+};
+
+REGISTER_TEST (clib_aes256_cbc_encrypt) = {
+ .name = "clib_aes256_cbc_encrypt",
+ .fn = test_clib_aes256_cbc_encrypt,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes256_enc_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 9008,
+ .fn = perftest_aes256_enc_var_sz }),
+};
+
+#define _(b) \
+ static clib_error_t *test_clib_aes##b##_cbc_decrypt (clib_error_t *err) \
+ { \
+ aes_cbc_key_data_t k; \
+ u8 data[512]; \
+ clib_aes##b##_cbc_key_expand (&k, key##b); \
+ clib_aes##b##_cbc_decrypt (&k, ciphertext##b, sizeof (ciphertext##b), iv, \
+ data); \
+ if (memcmp (plaintext, data, sizeof (plaintext)) != 0) \
+ err = \
+ clib_error_return (err, "decrypted data doesn't match plaintext"); \
+ return err; \
+ } \
+ void __test_perf_fn perftest_aes##b##_dec_var_sz (test_perf_t *tp) \
+ { \
+ u32 n = tp->n_ops; \
+ aes_cbc_key_data_t *kd = test_mem_alloc (sizeof (*kd)); \
+ u8 *dst = test_mem_alloc (n + 16); \
+ u8 *src = test_mem_alloc_and_fill_inc_u8 (n + 16, 0, 0); \
+ clib_aes##b##_cbc_key_expand (kd, key##b); \
+ \
+ test_perf_event_enable (tp); \
+ clib_aes##b##_cbc_decrypt (kd, src, n, iv, dst); \
+ test_perf_event_disable (tp); \
+ }
+
+_ (128)
+_ (192)
+_ (256)
+#undef _
+
+REGISTER_TEST (clib_aes128_cbc_decrypt) = {
+ .name = "clib_aes128_cbc_decrypt",
+ .fn = test_clib_aes128_cbc_decrypt,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes128_dec_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 9008,
+ .fn = perftest_aes128_dec_var_sz }),
+};
+
+REGISTER_TEST (clib_aes192_cbc_decrypt) = {
+ .name = "clib_aes192_cbc_decrypt",
+ .fn = test_clib_aes192_cbc_decrypt,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes192_dec_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 9008,
+ .fn = perftest_aes192_dec_var_sz }),
+};
+
+REGISTER_TEST (clib_aes256_cbc_decrypt) = {
+ .name = "clib_aes256_cbc_decrypt",
+ .fn = test_clib_aes256_cbc_decrypt,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes256_dec_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 9008,
+ .fn = perftest_aes256_dec_var_sz }),
+};
+
+#endif
diff --git a/src/vppinfra/test/aes_ctr.c b/src/vppinfra/test/aes_ctr.c
new file mode 100644
index 00000000000..2892700fb27
--- /dev/null
+++ b/src/vppinfra/test/aes_ctr.c
@@ -0,0 +1,481 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2024 Cisco Systems, Inc.
+ */
+
+#if defined(__AES__) || defined(__ARM_FEATURE_CRYPTO)
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/crypto/aes_ctr.h>
+
+static const struct
+{
+ char *name;
+ const u8 *pt, *key, *ct, *iv;
+ u32 data_len;
+} test_cases128[] = {
+ /* test cases */
+ { .name = "RFC3686 Test Vector #1",
+ .key = (const u8[16]){ 0xae, 0x68, 0x52, 0xf8, 0x12, 0x10, 0x67, 0xcc,
+ 0x4b, 0xf7, 0xa5, 0x76, 0x55, 0x77, 0xf3, 0x9e },
+ .iv = (const u8[16]){ 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 },
+
+ .pt = (const u8 *) "Single block msg",
+ .ct = (const u8[16]){ 0xe4, 0x09, 0x5d, 0x4f, 0xb7, 0xa7, 0xb3, 0x79, 0x2d,
+ 0x61, 0x75, 0xa3, 0x26, 0x13, 0x11, 0xb8 },
+ .data_len = 16 },
+ { .name = "RFC3686 Test Vector #2",
+ .key = (const u8[16]){ 0x7e, 0x24, 0x06, 0x78, 0x17, 0xfa, 0xe0, 0xd7,
+ 0x43, 0xd6, 0xce, 0x1f, 0x32, 0x53, 0x91, 0x63 },
+ .iv = (const u8[16]){ 0x00, 0x6c, 0xb6, 0xdb, 0xc0, 0x54, 0x3b, 0x59, 0xda,
+ 0x48, 0xd9, 0x0b, 0x00, 0x00, 0x00, 0x01 },
+ .pt = (const u8[32]){ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f },
+ .ct = (const u8[32]){ 0x51, 0x04, 0xa1, 0x06, 0x16, 0x8a, 0x72, 0xd9,
+ 0x79, 0x0d, 0x41, 0xee, 0x8e, 0xda, 0xd3, 0x88,
+ 0xeb, 0x2e, 0x1e, 0xfc, 0x46, 0xda, 0x57, 0xc8,
+ 0xfc, 0xe6, 0x30, 0xdf, 0x91, 0x41, 0xbe, 0x28 },
+ .data_len = 32 },
+ { .name = "RFC3686 Test Vector #3",
+ .key = (const u8[16]){ 0x76, 0x91, 0xbe, 0x03, 0x5e, 0x50, 0x20, 0xa8,
+ 0xac, 0x6e, 0x61, 0x85, 0x29, 0xf9, 0xa0, 0xdc },
+ .iv = (const u8[16]){ 0x00, 0xe0, 0x01, 0x7b, 0x27, 0x77, 0x7f, 0x3f, 0x4a,
+ 0x17, 0x86, 0xf0, 0x00, 0x00, 0x00, 0x01 },
+ .pt =
+ (const u8[36]){ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
+ 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a,
+ 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23 },
+ .ct =
+ (const u8[36]){ 0xc1, 0xcf, 0x48, 0xa8, 0x9f, 0x2f, 0xfd, 0xd9, 0xcf,
+ 0x46, 0x52, 0xe9, 0xef, 0xdb, 0x72, 0xd7, 0x45, 0x40,
+ 0xa4, 0x2b, 0xde, 0x6d, 0x78, 0x36, 0xd5, 0x9a, 0x5c,
+ 0xea, 0xae, 0xf3, 0x10, 0x53, 0x25, 0xb2, 0x07, 0x2f },
+ .data_len = 36 },
+}, test_cases192[] = {
+ { .name = "RFC3686 Test Vector #4",
+ .key = (const u8[24]){ 0x16, 0xaf, 0x5b, 0x14, 0x5f, 0xc9, 0xf5, 0x79,
+ 0xc1, 0x75, 0xf9, 0x3e, 0x3b, 0xfb, 0x0e, 0xed,
+ 0x86, 0x3d, 0x06, 0xcc, 0xfd, 0xb7, 0x85, 0x15 },
+ .iv = (const u8[16]){ 0x00, 0x00, 0x00, 0x48, 0x36, 0x73, 0x3c, 0x14, 0x7d,
+ 0x6d, 0x93, 0xcb, 0x00, 0x00, 0x00, 0x01 },
+ .pt = (const u8[16]){ 0x53, 0x69, 0x6e, 0x67, 0x6c, 0x65, 0x20, 0x62, 0x6c,
+ 0x6f, 0x63, 0x6b, 0x20, 0x6d, 0x73, 0x67 },
+ .ct = (const u8[16]){ 0x4b, 0x55, 0x38, 0x4f, 0xe2, 0x59, 0xc9, 0xc8, 0x4e,
+ 0x79, 0x35, 0xa0, 0x03, 0xcb, 0xe9, 0x28 },
+ .data_len = 16 },
+ { .name = "RFC3686 Test Vector #5",
+ .key = (const u8[24]){ 0x7c, 0x5c, 0xb2, 0x40, 0x1b, 0x3d, 0xc3, 0x3c,
+ 0x19, 0xe7, 0x34, 0x08, 0x19, 0xe0, 0xf6, 0x9c,
+ 0x67, 0x8c, 0x3d, 0xb8, 0xe6, 0xf6, 0xa9, 0x1a },
+ .iv = (const u8[16]){ 0x00, 0x96, 0xb0, 0x3b, 0x02, 0x0c, 0x6e, 0xad, 0xc2,
+ 0xcb, 0x50, 0x0d, 0x00, 0x00, 0x00, 0x01 },
+ .pt = (const u8[32]){ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f },
+ .ct = (const u8[32]){ 0x45, 0x32, 0x43, 0xfc, 0x60, 0x9b, 0x23, 0x32,
+ 0x7e, 0xdf, 0xaa, 0xfa, 0x71, 0x31, 0xcd, 0x9f,
+ 0x84, 0x90, 0x70, 0x1c, 0x5a, 0xd4, 0xa7, 0x9c,
+ 0xfc, 0x1f, 0xe0, 0xff, 0x42, 0xf4, 0xfb, 0x00 },
+ .data_len = 32 },
+ { .name = "RFC3686 Test Vector #6",
+ .key = (const u8[24]){ 0x02, 0xBF, 0x39, 0x1E, 0xE8, 0xEC, 0xB1, 0x59,
+ 0xB9, 0x59, 0x61, 0x7B, 0x09, 0x65, 0x27, 0x9B,
+ 0xF5, 0x9B, 0x60, 0xA7, 0x86, 0xD3, 0xE0, 0xFE },
+ .iv = (const u8[16]){ 0x00, 0x07, 0xBD, 0xFD, 0x5C, 0xBD, 0x60, 0x27, 0x8D,
+ 0xCC, 0x09, 0x12, 0x00, 0x00, 0x00, 0x01 },
+ .pt =
+ (const u8[36]){ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11,
+ 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A,
+ 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23 },
+ .ct =
+ (const u8[36]){ 0x96, 0x89, 0x3F, 0xC5, 0x5E, 0x5C, 0x72, 0x2F, 0x54,
+ 0x0B, 0x7D, 0xD1, 0xDD, 0xF7, 0xE7, 0x58, 0xD2, 0x88,
+ 0xBC, 0x95, 0xC6, 0x91, 0x65, 0x88, 0x45, 0x36, 0xC8,
+ 0x11, 0x66, 0x2F, 0x21, 0x88, 0xAB, 0xEE, 0x09, 0x35 },
+ .data_len = 36 },
+
+}, test_cases256[] = {
+ { .name = "RFC3686 Test Vector #7",
+ .key = (const u8[32]){ 0x77, 0x6b, 0xef, 0xf2, 0x85, 0x1d, 0xb0, 0x6f,
+ 0x4c, 0x8a, 0x05, 0x42, 0xc8, 0x69, 0x6f, 0x6c,
+ 0x6a, 0x81, 0xaf, 0x1e, 0xec, 0x96, 0xb4, 0xd3,
+ 0x7f, 0xc1, 0xd6, 0x89, 0xe6, 0xc1, 0xc1, 0x04 },
+ .iv = (const u8[16]){ 0x00, 0x00, 0x00, 0x60, 0xdb, 0x56, 0x72, 0xc9, 0x7a,
+ 0xa8, 0xf0, 0xb2, 0x00, 0x00, 0x00, 0x01 },
+ .pt = (const u8 *) "Single block msg",
+ .ct = (const u8[16]){ 0x14, 0x5a, 0xd0, 0x1d, 0xbf, 0x82, 0x4e, 0xc7, 0x56,
+ 0x08, 0x63, 0xdc, 0x71, 0xe3, 0xe0, 0xc0 },
+ .data_len = 16 },
+ { .name = "RFC3686 Test Vector #8",
+ .key = (const u8[32]){ 0xf6, 0xd6, 0x6d, 0x6b, 0xd5, 0x2d, 0x59, 0xbb,
+ 0x07, 0x96, 0x36, 0x58, 0x79, 0xef, 0xf8, 0x86,
+ 0xc6, 0x6d, 0xd5, 0x1a, 0x5b, 0x6a, 0x99, 0x74,
+ 0x4b, 0x50, 0x59, 0x0c, 0x87, 0xa2, 0x38, 0x84 },
+ .iv = (const u8[16]){ 0x00, 0xfa, 0xac, 0x24, 0xc1, 0x58, 0x5e, 0xf1, 0x5a,
+ 0x43, 0xd8, 0x75, 0x00, 0x00, 0x00, 0x01 },
+ .pt = (const u8[32]){ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f },
+ .ct = (const u8[32]){ 0xf0, 0x5e, 0x23, 0x1b, 0x38, 0x94, 0x61, 0x2c,
+ 0x49, 0xee, 0x00, 0x0b, 0x80, 0x4e, 0xb2, 0xa9,
+ 0xb8, 0x30, 0x6b, 0x50, 0x8f, 0x83, 0x9d, 0x6a,
+ 0x55, 0x30, 0x83, 0x1d, 0x93, 0x44, 0xaf, 0x1c },
+ .data_len = 32 },
+ { .name = "RFC3686 Test Vector #9",
+ .key = (const u8[32]){ 0xff, 0x7a, 0x61, 0x7c, 0xe6, 0x91, 0x48, 0xe4,
+ 0xf1, 0x72, 0x6e, 0x2f, 0x43, 0x58, 0x1d, 0xe2,
+ 0xaa, 0x62, 0xd9, 0xf8, 0x05, 0x53, 0x2e, 0xdf,
+ 0xf1, 0xee, 0xd6, 0x87, 0xfb, 0x54, 0x15, 0x3d },
+ .iv = (const u8[16]){ 0x00, 0x1c, 0xc5, 0xb7, 0x51, 0xa5, 0x1d, 0x70, 0xa1,
+ 0xc1, 0x11, 0x48, 0x00, 0x00, 0x00, 0x01 },
+ .pt =
+ (const u8[36]){ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+ 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
+ 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a,
+ 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23 },
+ .ct =
+ (const u8[36]){ 0xeb, 0x6c, 0x52, 0x82, 0x1d, 0x0b, 0xbb, 0xf7, 0xce,
+ 0x75, 0x94, 0x46, 0x2a, 0xca, 0x4f, 0xaa, 0xb4, 0x07,
+ 0xdf, 0x86, 0x65, 0x69, 0xfd, 0x07, 0xf4, 0x8c, 0xc0,
+ 0xb5, 0x83, 0xd6, 0x07, 0x1f, 0x1e, 0xc0, 0xe6, 0xb8 },
+ .data_len = 36 }
+};
+
+#define MAX_TEST_DATA_LEN 256
+
+#define INC_TEST_BYTES (256 * 16 + 1)
+
+static u8 inc_key128[] = {
+ 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6,
+ 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c,
+};
+
+static u8 inc_iv[] = {
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
+};
+
+static u64 inc_ct128[] = {
+ 0xb77a659c70dd8dec, 0xebaf93e67e1cdbfa, 0x744766732f6e3a26,
+ 0xb16d4de0cc6db900, 0x6811ac5c5be10d4a, 0x6b42973b30e29d96,
+ 0xf1aec4c4ac0badd8, 0xc1955129e00b33ec, 0x49d7cf50bb054cf0,
+ 0x4deb06dcdc7a21b8, 0xa257b4190916c808, 0x44b7d421c38b934b,
+ 0x9e4dbb2d1aceb85b, 0x2d1c952f53c6000d, 0x7e25b633f3bceb0d,
+ 0xcee9f88cd3c2236d, 0x10ce6bc4a53b1d37, 0xb4783ea69ebc261d,
+ 0x7f732c19e5fdd3ea, 0xb253d0ebd5522c84, 0x7925888c44ef010d,
+ 0xba213ea62e7ec7f0, 0x239e0466520393fd, 0x8cde31681d451842,
+ 0x20b8270d3c5c1bc5, 0x3e56c37a1d573ebe, 0xc4fdb0bb491cf04e,
+ 0x29c9a4f92d7b12da, 0x50c8a51f05b6f704, 0x3cf0f4071c2098fa,
+ 0xb0842470bd8c6fdd, 0x86dd40fdc9640190, 0xe4a6184230ee4f6c,
+ 0x0e2a69261819535e, 0xbdb62571c80aaa39, 0x24a0dc5eafd33f3a,
+ 0x830599f37869c6ac, 0xf7049ae1b8e5c0dd, 0x7c9dd8d4405d2050,
+ 0x0e91382b1dace623, 0xf2b62e26f4133673, 0xa9216257693afdab,
+ 0x2a26df863fb6e980, 0x85e600421c395c83, 0xd5a521016a175cb3,
+ 0x5ef31ae51f7f2f7b, 0xc6ff491d0d6f74d4, 0x16b0e60ac13156d3,
+ 0xd49e0025d5ec1e4b, 0x987c4eff196cd64e, 0xa163915e80892b07,
+ 0x69ab0084052d574a, 0x8017caa649d22bdb, 0xf5eb130f0df2c49a,
+ 0xe2ced8f88537e9ea, 0xdaaff5e845cff681, 0xbd22ac46dd219c7a,
+ 0x1b963af4641e7cf7, 0xe70e7d5b76f88573, 0x39703f5e2db84937,
+ 0x8a1514af42bf3c96, 0x7f51d78b7d3971a6, 0x437a651ef9f08c08,
+ 0x69fd3712ccdfd843, 0xd8204939e67dad48, 0x71035fc942194251,
+ 0x703d964c7525bb2a, 0xe2166e50e1892d94, 0xbe8034b11f6a5a9f,
+ 0x954e4d74c3a9e105, 0x19e077bf00e5186a, 0x7aee46c4b5d4cbf1,
+ 0xfd7dedd15a3e7d35, 0x4ba1c4b76cb93f57, 0xb2e94cffbb82f098,
+ 0x078b04fcebc1fafc, 0x923edcc8600018b2, 0xc018169aba42ff11,
+ 0x0e4f91e46db01bf8, 0x7b5d2b322371e9fa, 0x8e94284632dd300b,
+ 0x80a3d93ce61c2f13, 0x445d2fb83ecfef73, 0xe1279d639bcd26c9,
+ 0xbd1865ba653ce114, 0x0316cfe3227bfb2a, 0xddc80c63d53878db,
+ 0xc91a2f5fedf4a51a, 0xce408a5275b0271f, 0x59a0abc34619018e,
+ 0xa215c590ad1afb21, 0xe3b096d42fc03598, 0x7cada064ab4f4997,
+ 0x699be0e57d76e47f, 0x235151411eee9cbd, 0xbbc688f0eaf896cd,
+ 0x4e78715341f9299d, 0x9f85d76bf99ef2a4, 0x15110ceff4a6040b,
+ 0x9feed36ff4566060, 0x4833ea7d66a0c572, 0x94c7edbdf2169d59,
+ 0xb413d116c6e771f1, 0x9a4b6e78167f4c66, 0x42d3f993c8aaee27,
+ 0xd16783a8c4e57558, 0xb1d7a074dd67339e, 0x97a164444f97adc2,
+ 0xc15a08d61628e5f3, 0x8767e41e04eb96a2, 0xbb28953ed0eae183,
+ 0xc0bab4e80ed8cc6e, 0x1ac34b5a5c4010f8, 0x0bc3b7d9db1775b7,
+ 0x565dead595b98969, 0x0fc03a3cfb656014, 0xdb9098b924a92926,
+ 0xe2786bc431c1f39a, 0xf8a0bf4fffb78d10, 0xd76161fe1ae71851,
+ 0xced33ea693cedbb4, 0xef13034da5529a1b, 0xd71081cadbbff0ac,
+ 0x1873eb643e857392, 0xf6f7c30284ffecb0, 0x93ded259d35eb6fe,
+ 0xf872980774f6e5ef, 0xd457c8ed22d5bc3f, 0x75d907e2a6bcced2,
+ 0xcfd3dceb8d7a79ba, 0xaeed2ff2fc0872bb, 0xb5fc72005d2eb168,
+ 0x850e0e0757274665, 0xab7e5da576c706ec, 0xf1df1ba9a972a4ca,
+ 0xe81d430b4f54adf9, 0x788f3d8655ba79bb, 0xf5990db3557bbf8c,
+ 0x1cacafc47729252c, 0x7581b4d6f3b83d9b, 0x94185dcdb0b0c4cd,
+ 0x3596e687f4b9f4ed, 0xb9462442134b804d, 0xdab188808726fec6,
+ 0xfe10831e8824d4c5, 0x000c641ed4c93be7, 0x2525ee781608b1ea,
+ 0x2b32469d51104097, 0x73a09c6ea117aea9, 0x8506dcdec8ade0be,
+ 0xf9f9fa553cac7285, 0x34b24f100086b974, 0xd42fa88547ade8e7,
+ 0xfd0bb8ce9a5f8e14, 0x15df9966c6a3e433, 0xf6696aafaae89cd6,
+ 0x3d521a9d1a0087e1, 0xe18ca6b8e94701f0, 0x8a4660e26a77965e,
+ 0xc74fcdf41bf4aa20, 0x292a356d0b670157, 0x36ff3344a9eee4ea,
+ 0xd76b051d6251a14b, 0xa9e09f1bacd1e30f, 0xae47cb95f95a8831,
+ 0x58b85ac7c98537ec, 0x9e30f1be05719bd2, 0x94772e6b56fc1380,
+ 0xbe94026a4a89b783, 0x7a7ffb61daa5ac60, 0x2f7beafcc5e9ac8a,
+ 0xfa33f37edc57e94c, 0x230c3582fb412093, 0xdeec806ecc4fa3c4,
+ 0xc7ff8876a31edd76, 0x6d0500f4ccd1bb20, 0xf1d0bef759b81b6c,
+ 0x138b1d39533379b7, 0xece52f84d9f20455, 0x3ed05e391352b9dd,
+ 0x95600f558d4dea51, 0x1d6b997966e35392, 0x0eeae16905b94e37,
+ 0x7db2acc242a56ab0, 0xaf347e5598687f51, 0xbf25013db6bddc18,
+ 0x6d4f106c35f9ee28, 0xc8e90bbe4283ab8c, 0x188cf978f1477dee,
+ 0x66376bfa3a6d8131, 0xe0ebd6632eb89b24, 0xb9e49d81e9d37f69,
+ 0xa5cfa3812d530e04, 0x717353523542a27f, 0x0d6669c916ab4d34,
+ 0x79e741ad592a7bb1, 0x63a7f35584bd3ea5, 0xc0494db2930cbc32,
+ 0x442bd29d7edd0e49, 0x52ec0bce733f61a0, 0x8bd199bf55bc2b4b,
+ 0x727ede5583bb859c, 0x9d07eda6e8220df1, 0xebdd7467d7259f15,
+ 0x8f6035a5dc5f53b1, 0x063a0935630b5f6f, 0xc6e983ec1f08ebe6,
+ 0xeedc82de2b28e651, 0xe28760013e13ae23, 0x37c078d66ad376a3,
+ 0xd54a72e88e80926b, 0x5822405e1d688eec, 0xa001e0b0d4a7447f,
+ 0xfd41f41419d8fd4d, 0x1391d37127a75095, 0x4795d7fb7ad67f17,
+ 0xa47c05c9b8400a0c, 0x28519cd5e98bba0c, 0x84a72dce8a27d050,
+ 0xcbee7b3c83d68c5f, 0xab2227b8f5203d3d, 0x3335a393d47ef9ec,
+ 0xd00b21a2a5dde597, 0xb13d50489ca79216, 0xde1cc721425dda94,
+ 0x1ddc9863b5b0b8e8, 0xb125481a01dfe1b5, 0x5b331c746c4148db,
+ 0x8d6729fe30d56f1d, 0xdc413723540aca6f, 0xf08fe55711f8f09b,
+ 0x98bcde7c09126688, 0xa38c02a0c19d08b0, 0xde8df0683372e31e,
+ 0x08b4727054d766a0, 0xc13b77c325ae45ed, 0x6e7fe05de6b28d5a,
+ 0x1794a4f149586b9a, 0x23f5881c699f81b8, 0x355c9d899c0dcfe3,
+ 0x4319acb92ca33a29, 0x4f3211554c2ecf79, 0x64741347e08aaa2f,
+ 0x32f89bf1084e0723, 0xb0d5d830b9ae58a6, 0x235170babbd5686f,
+ 0xaa711d0aff2e9830, 0x4f73229995f82ca2, 0x46565f056bb352ea,
+ 0x55283776fd729f29, 0xb027c5b67be58718, 0xfa58d8c215d52ef8,
+ 0xfa1a78f7c7db4b2f, 0x7b2badd9a5a7e810, 0x6c362d97ece0f08a,
+ 0xff8ad11e7ce377b1, 0xdf5a423e843cbfa0, 0xfa9e70edc9c12d2b,
+ 0xad745d9146b0b3d9, 0xfc2a590f1ce32b8c, 0x599b34c583449c39,
+ 0xbcab9517d2bd4eae, 0xa5a7f54890e38bc7, 0xb9700fcb336a049a,
+ 0xfcfcc2d65956af5f, 0x3887b5f3e5d238d6, 0x0b9bc00a60dd37c6,
+ 0x09f8d5b6a128fe23, 0x4b33ac26a2a59b5c, 0xfc6e3f30b4b4e108,
+ 0x1e53d6aa6266bee7, 0x9adf6b4cb3369643, 0xda38dfd6df234f48,
+ 0x845e61ddc98d3d16, 0x4a0b90d7d115d701, 0x64e1c9619aa777c3,
+ 0x9dd4b1df006c81f9, 0x71b2b88aea6c679e, 0xb39da7819be759ff,
+ 0xfdad221790b269bb, 0x741f7955b56d786c, 0x5d724fcce9250a73,
+ 0x3812aa144730905b, 0xb74986be047e24c4, 0xeebb8aa5ebdcc8a0,
+ 0x26a0ea4272d5a371, 0x2ff3733c39e92f82, 0x17880beb7b808b30,
+ 0xe298cf8aa284e39c, 0xd481ff1948d0eef0, 0xed53786d517a1f10,
+ 0x853ccfe7f1cba481, 0x9ba1707467deb6dc, 0xf1aae1c3190806b3,
+ 0xb017539bb50b55c4, 0x8809bcc37ac46808, 0x0ae0a3e6e9a6bba5,
+ 0xf7a5276c2a6df772, 0xaf095d1ceb24d931, 0xaa0f62c5eb44d3a6,
+ 0x5e9915d18cd09844, 0xcfff6a2edf6cd35f, 0x893ebc1038af747e,
+ 0xe4360da910f3853a, 0x2097129be26812d5, 0x09d1e31bd3fef181,
+ 0x37a585c49cff87c5, 0xd94d2b3b1cd97311, 0xa3a2d50de285388a,
+ 0xf627d8b7298602a0, 0x567f848218395a28, 0x9b4b416995765491,
+ 0x24388b443fd8730a, 0x5b3a3cc87e225bdb, 0x53a9881d098d520b,
+ 0xadbc31258140299f, 0x37345aad0c678a3f, 0xc0e24ea3958ef6d8,
+ 0x18ceff669a144d20, 0x3ce920ab86ab70c7, 0x430c240b5307c1cb,
+ 0x7240a314d5f7fa9c, 0x4dfaf972d1856f15, 0x76ca74db2ad10515,
+ 0x607ec82965c620f7, 0xc75f531d7eae4145, 0xe91c86c49c8d84a2,
+ 0x8becf71fe1e371a7, 0x055bb0206808c289, 0x36dbcec66eabc566,
+ 0x476f4f1b52c4c856, 0x78bdf9114304e28f, 0x206e8342087ca6e2,
+ 0xda66f574514e8795, 0x903bcf41830a763f, 0x3a8c03f8bfe8c1ae,
+ 0xc386671f05740107, 0xda3abc3b566c70ab, 0xe1072ad4ebd4a028,
+ 0xfe9a6d4c0e8a80ce, 0xeb99eb25a084c442, 0xd34f23f8f279e9f3,
+ 0xccb189048479b94d, 0xfc6f6d863f74a049, 0xa437f340bfdfed0e,
+ 0xc84ef9a7139af764, 0xbeb88737819b7d55, 0x5f06fb8f06d6372b,
+ 0x7ec01ec2f978b4a2, 0x1ad4f2fb9963b46f, 0xae4cdeee5c419652,
+ 0x51ee340ba106d1dc, 0x93544a6e274cf180, 0x0de0b1abf6e9773a,
+ 0xb55514c7be768e6a, 0x70a3ee12298c0688, 0x58943a332454b1ee,
+ 0xe9de88a863b83b29, 0xb99dbf02fc35d6c9, 0x285a09f5583ac480,
+ 0xd0bf2b79a453c915, 0xb6e140e86dcb97d5, 0x8de0ab74f93a8de1,
+ 0x70f9bb989ce46c09, 0xd7ea17d64158d923, 0x308e3f8a527d0ff7,
+ 0xa0fffd413b3a872f, 0xcd35b4b30dfb6587, 0x7ef3ab8b9bd5fbcf,
+ 0x6149f604d9f355f7, 0x130d9020814780cd, 0x45cb969837f9a147,
+ 0x88dc31c106a2345e, 0x690da693a3472e6d, 0xe1dc49aaab6d8504,
+ 0x7749dc54f0a8f838, 0x358a1197921ed6e3, 0x50ae914d7b26c811,
+ 0x6e0f79b3af64d1ad, 0xec45b7e54c408577, 0x94809242f830a52f,
+ 0x88e8c0701fd8cd25, 0x21f562f903b85ca7, 0x3f8f1d2cfd57d394,
+ 0x1f0db9fb1767b393, 0x0504a2b6a6b967d3, 0xf18209ff9dee356b,
+ 0x4e74343f94f09cff, 0x53107e4bd79b52c1, 0x9c4ab4cdba0f0c2f,
+ 0xfd085f652a3c3f14, 0xcbd20129e019e573, 0x92d2e7681d64d41b,
+ 0xfa6c6c50db35a8fd, 0x7dc5177e0cc57261, 0xae3586379eed9e9d,
+ 0x4ba340964a014d54, 0x57147f7d60a4a5ee, 0x423255e50fec612e,
+ 0x1c1158e2a2afbace, 0x5e0dd39d591b341f, 0x4e0fff62124939a6,
+ 0x12e0413146fa5c8d, 0x3a6e0c37d48699a0, 0x9774260521aa490f,
+ 0xbd0f8ecc2b447c99, 0x556d41deab48dad8, 0x08bd36a5be98bc97,
+ 0x8bf0c22eb1cb99a0, 0x959954221670e572, 0x05143412beae5a0c,
+ 0x37246cbdf96ede32, 0xeb05ce52c11ab210, 0xd4e9c130ccd17048,
+ 0x42cc9b6177b7547b, 0x96d603334e7a85c7, 0x850365d5d2f5adcb,
+ 0xcfa11346e834516c, 0xfb9e30870be0c7bb, 0xc4d137ab85224e7a,
+ 0xc7f20e98475c4ab3, 0xaf464d45151fec79, 0xe4ad336a38569bcd,
+ 0xabd20fbf84b809bd, 0xb3643ed21050862a, 0xfb29924632f30a27,
+ 0x3f4fd0809492521f, 0xcc9635ff080ba76d, 0xeb679199764753a7,
+ 0x9df2de103f532b81, 0x83784f41703f0a31, 0x70ba6c249783efba,
+ 0x93cf542badd6d441, 0x8290f3e7b7fcc9a6, 0xb55485e8fadf4677,
+ 0xf29c554f7e99c1de, 0x277a3a2d674f10e9, 0xe9a5460c4d87bd2a,
+ 0x0d8489866023402a, 0x6bd7d212c07df415, 0x8d6194cb592bebc3,
+ 0xa9747f53b4cd4192, 0x56bd4c4c6373dcb9, 0x3385c9e222966cb2,
+ 0x234bda6863a4f7fd, 0xebc79b310f06f538, 0x3b7556403468fc38,
+ 0x9ac05c55de908490, 0x381dba9f8e05fd0e, 0x5e92d1853484e36a,
+ 0x030782801735585f, 0xd8c76845c71a4482, 0xea03ea2ec2406c9b,
+ 0xe2498a52f95cd21e, 0xd4ffe046d9393212, 0x93565efec984c6c9,
+ 0x154c50d8c6e11dc9, 0x3cd889f3188c18cc, 0xb5a46a6cba1287ca,
+ 0xbc203b6c8f21bb66, 0xfedf97cba4c35dea, 0x0c82b3d9520de017,
+ 0xdb2674b14ddb4d95, 0x44c8e1ca851db784, 0x5596d3e27d211d55,
+ 0x9dbe804695d2270d, 0xbd54af74b050b82a, 0xe4ea34515f120cea,
+ 0xaa2564472972ab58, 0xf97af0d678dfd0cb, 0xdebdbc18d6c71bd1,
+ 0x78423e11438fcb21, 0xf6f749d4f30510d4, 0x68de10085ea4c2ea,
+ 0x6b3ff4773ccb4ec1, 0x33206eb82742f50e, 0x3046468ab04a0778,
+ 0xd7168cc59b78654c, 0xcb5800e03e2f90d9, 0x4f8fdaa4a3b0b5ff,
+ 0xe0eeff2c2ff94e64, 0x7f2578708dafae2e, 0x6feab0ef729b4300,
+ 0xf1de49e2796cfdf5, 0x90711a9f7886a0d0, 0xf4b39401ae61d28a,
+ 0x3f26008ddcbc47e9, 0xfab0a15c25a8511d, 0x2664fc987e7fdd17,
+ 0x51125228da560a04, 0x93a545c6207a3d67, 0x7c8e4446a408cc25,
+ 0xf9b10a00083f429e, 0x48704b0fc020d66c, 0x1e1a8c7a3d66eae0,
+ 0x9bde8e4692e41915, 0x7144aad3cf672129, 0xbab5e713e8f5b335,
+ 0x2d2c0b70c55d7d11, 0xed928a6e1b388ab0, 0xf121a4a71653448f,
+ 0x0dd175d00c20e9ed, 0xe68066507fb5dcb1, 0x92384f914830a50e,
+ 0xb4d4c84f220aed3d, 0xa13e4d6ea70cc5f0, 0xfdbe2223195bfa82,
+ 0xe97bb465c3ca2099, 0x0078ec86e8daa6c0, 0x634c3a1311b805c4,
+ 0xac04a89119ae79a7, 0x690e7049d8e8762f, 0x0000000000000086,
+ 0x0000000000000000,
+};
+
+#define perftest_aesXXX_var_sz(a) \
+ void __test_perf_fn perftest_aes##a##_var_sz (test_perf_t *tp) \
+ { \
+ u32 n = tp->n_ops; \
+ aes_ctr_key_data_t *kd = test_mem_alloc (sizeof (*kd)); \
+ u8 *dst = test_mem_alloc (n + 16); \
+ u8 *src = test_mem_alloc_and_fill_inc_u8 (n + 16, 0, 0); \
+ u8 *key = test_mem_alloc_and_fill_inc_u8 (32, 192, 0); \
+ u8 *iv = test_mem_alloc_and_fill_inc_u8 (16, 128, 0); \
+ \
+ clib_aes_ctr_key_expand (kd, key, AES_KEY_##a); \
+ \
+ test_perf_event_enable (tp); \
+ clib_aes##a##_ctr (kd, src, n, iv, dst); \
+ test_perf_event_disable (tp); \
+ }
+
+static clib_error_t *
+test_clib_aes128_ctr (clib_error_t *err)
+{
+ aes_ctr_key_data_t kd;
+ aes_ctr_ctx_t ctx;
+ u8 pt[INC_TEST_BYTES];
+ u8 ct[INC_TEST_BYTES];
+
+ FOREACH_ARRAY_ELT (tc, test_cases128)
+ {
+ clib_aes_ctr_key_expand (&kd, tc->key, AES_KEY_128);
+ clib_aes128_ctr (&kd, tc->pt, tc->data_len, tc->iv, ct);
+
+ if (tc->data_len && memcmp (tc->ct, ct, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext", tc->name);
+ }
+
+ for (int i = 0; i < sizeof (pt); i++)
+ pt[i] = i;
+
+ clib_aes_ctr_key_expand (&kd, inc_key128, AES_KEY_128);
+ clib_aes128_ctr (&kd, pt, INC_TEST_BYTES, inc_iv, ct);
+ for (int i = 0; i < sizeof (pt); i++)
+ if (((u8 *) inc_ct128)[i] != ct[i])
+ return clib_error_return (err, "incremental test failed (byte %u)", i);
+
+ clib_aes_ctr_init (&ctx, &kd, inc_iv, AES_KEY_128);
+ for (u32 off = 0, chunk_size = 1; off < INC_TEST_BYTES;
+ off += chunk_size, chunk_size = clib_min (((chunk_size + 1) * 2 - 1),
+ INC_TEST_BYTES - off))
+ clib_aes_ctr_transform (&ctx, pt + off, ct + off, chunk_size, AES_KEY_128);
+
+ for (int i = 0; i < sizeof (pt); i++)
+ if (((u8 *) inc_ct128)[i] != ct[i])
+ return clib_error_return (
+ err, "incremental multiseg test failed (byte %u)", i);
+
+ return err;
+}
+
+perftest_aesXXX_var_sz (128);
+REGISTER_TEST (clib_aes128_ctr) = {
+ .name = "clib_aes128_ctr",
+ .fn = test_clib_aes128_ctr,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes128_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 1 << 20,
+ .fn = perftest_aes128_var_sz }),
+};
+
+static clib_error_t *
+test_clib_aes192_ctr (clib_error_t *err)
+{
+ aes_ctr_key_data_t kd;
+ u8 ct[MAX_TEST_DATA_LEN];
+
+ FOREACH_ARRAY_ELT (tc, test_cases192)
+ {
+ clib_aes_ctr_key_expand (&kd, tc->key, AES_KEY_192);
+ clib_aes192_ctr (&kd, tc->pt, tc->data_len, tc->iv, ct);
+
+ if (tc->data_len && memcmp (tc->ct, ct, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext", tc->name);
+ }
+
+ return err;
+}
+
+perftest_aesXXX_var_sz (192);
+REGISTER_TEST (clib_aes192_ctr) = {
+ .name = "clib_aes192_ctr",
+ .fn = test_clib_aes192_ctr,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes192_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 1 << 20,
+ .fn = perftest_aes192_var_sz }),
+};
+
+static clib_error_t *
+test_clib_aes256_ctr (clib_error_t *err)
+{
+ aes_ctr_key_data_t kd;
+ u8 ct[MAX_TEST_DATA_LEN];
+
+ FOREACH_ARRAY_ELT (tc, test_cases256)
+ {
+ aes_ctr_ctx_t ctx;
+ u32 sz = tc->data_len / 3;
+
+ clib_aes_ctr_key_expand (&kd, tc->key, AES_KEY_256);
+ clib_aes256_ctr (&kd, tc->pt, tc->data_len, tc->iv, ct);
+
+ if (tc->data_len && memcmp (tc->ct, ct, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext", tc->name);
+ clib_memset (ct, 0, tc->data_len);
+
+ clib_aes_ctr_init (&ctx, &kd, tc->iv, AES_KEY_256);
+ clib_aes_ctr_transform (&ctx, tc->pt, ct, sz, AES_KEY_256);
+ clib_aes_ctr_transform (&ctx, tc->pt + sz, ct + sz, sz, AES_KEY_256);
+ clib_aes_ctr_transform (&ctx, tc->pt + 2 * sz, ct + 2 * sz,
+ tc->data_len - 2 * sz, AES_KEY_256);
+ if (tc->data_len && memcmp (tc->ct, ct, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext (multiseg)",
+ tc->name);
+ }
+
+ return err;
+}
+
+perftest_aesXXX_var_sz (256);
+REGISTER_TEST (clib_aes256_ctr) = {
+ .name = "clib_aes256_ctr",
+ .fn = test_clib_aes256_ctr,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes256_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 1 << 20,
+ .fn = perftest_aes256_var_sz }),
+};
+
+#endif
diff --git a/src/vppinfra/test/aes_gcm.c b/src/vppinfra/test/aes_gcm.c
new file mode 100644
index 00000000000..caa36b0f710
--- /dev/null
+++ b/src/vppinfra/test/aes_gcm.c
@@ -0,0 +1,1177 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#if (defined(__AES__) && defined(__PCLMUL__)) || defined(__ARM_FEATURE_CRYPTO)
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/crypto/aes_gcm.h>
+
+static const u8 tc1_key128[16] = {
+ 0,
+};
+
+static const u8 tc1_iv[12] = {
+ 0,
+};
+
+static const u8 tc1_tag128[] = { 0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e,
+ 0x30, 0x61, 0x36, 0x7f, 0x1d, 0x57,
+ 0xa4, 0xe7, 0x45, 0x5a };
+static const u8 tc1_key256[32] = {
+ 0,
+};
+
+static const u8 tc1_tag256[] = {
+ 0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9,
+ 0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b,
+};
+
+static const u8 tc2_ciphertext256[] = { 0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60,
+ 0x6b, 0x6e, 0x07, 0x4e, 0xc5, 0xd3,
+ 0xba, 0xf3, 0x9d, 0x18 };
+
+static const u8 tc2_tag256[] = { 0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99,
+ 0x6b, 0xf0, 0x26, 0x5b, 0x98, 0xb5,
+ 0xd4, 0x8a, 0xb9, 0x19 };
+
+static const u8 tc2_plaintext[16] = {
+ 0,
+};
+
+static const u8 tc2_tag128[] = { 0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec,
+ 0x13, 0xbd, 0xf5, 0x3a, 0x67, 0xb2,
+ 0x12, 0x57, 0xbd, 0xdf };
+
+static const u8 tc2_ciphertext128[] = { 0x03, 0x88, 0xda, 0xce, 0x60, 0xb6,
+ 0xa3, 0x92, 0xf3, 0x28, 0xc2, 0xb9,
+ 0x71, 0xb2, 0xfe, 0x78 };
+
+static const u8 tc3_key128[] = { 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65,
+ 0x73, 0x1c, 0x6d, 0x6a, 0x8f, 0x94,
+ 0x67, 0x30, 0x83, 0x08 };
+
+static const u8 tc3_iv[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce,
+ 0xdb, 0xad, 0xde, 0xca, 0xf8, 0x88 };
+
+static const u8 tc3_plaintext[] = {
+ 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, 0xa5, 0x59, 0x09, 0xc5, 0xaf,
+ 0xf5, 0x26, 0x9a, 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda, 0x2e, 0x4c,
+ 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09,
+ 0x53, 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25, 0xb1, 0x6a, 0xed, 0xf5,
+ 0xaa, 0x0d, 0xe6, 0x57, 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
+};
+
+static const u8 tc3_ciphertext128[] = {
+ 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24, 0x4b, 0x72, 0x21, 0xb7, 0x84,
+ 0xd0, 0xd4, 0x9c, 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0, 0x35, 0xc1,
+ 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e, 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93,
+ 0x1c, 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05, 0x1b, 0xa3, 0x0b, 0x39,
+ 0x6a, 0x0a, 0xac, 0x97, 0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85
+};
+
+static const u8 tc3_tag128[] = { 0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd,
+ 0x64, 0xa6, 0x2c, 0xf3, 0x5a, 0xbd,
+ 0x2b, 0xa6, 0xfa, 0xb4 };
+
+static const u8 tc3_key256[] = { 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73,
+ 0x1c, 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30,
+ 0x83, 0x08, 0xfe, 0xff, 0xe9, 0x92, 0x86,
+ 0x65, 0x73, 0x1c, 0x6d, 0x6a, 0x8f, 0x94,
+ 0x67, 0x30, 0x83, 0x08 };
+
+static const u8 tc3_ciphertext256[] = {
+ 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07, 0xf4, 0x7f, 0x37, 0xa3, 0x2a,
+ 0x84, 0x42, 0x7d, 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9, 0x75, 0x98,
+ 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa, 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb,
+ 0x3d, 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38, 0xc5, 0xf6, 0x1e, 0x63,
+ 0x93, 0xba, 0x7a, 0x0a, 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
+};
+
+static const u8 tc3_tag256[] = { 0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34,
+ 0x71, 0xbd, 0xec, 0x1a, 0x50, 0x22,
+ 0x70, 0xe3, 0xcc, 0x6c };
+
+static const u8 tc4_plaintext[] = {
+ 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5, 0xa5, 0x59, 0x09, 0xc5,
+ 0xaf, 0xf5, 0x26, 0x9a, 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
+ 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72, 0x1c, 0x3c, 0x0c, 0x95,
+ 0x95, 0x68, 0x09, 0x53, 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
+ 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57, 0xba, 0x63, 0x7b, 0x39,
+};
+
+static const u8 tc4_aad[] = { 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe,
+ 0xef, 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad,
+ 0xbe, 0xef, 0xab, 0xad, 0xda, 0xd2 };
+
+static const u8 tc4_ciphertext128[] = {
+ 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24, 0x4b, 0x72, 0x21, 0xb7,
+ 0x84, 0xd0, 0xd4, 0x9c, 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
+ 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e, 0x21, 0xd5, 0x14, 0xb2,
+ 0x54, 0x66, 0x93, 0x1c, 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
+ 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97, 0x3d, 0x58, 0xe0, 0x91
+};
+
+static const u8 tc4_tag128[] = { 0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21,
+ 0xa5, 0xdb, 0x94, 0xfa, 0xe9, 0x5a,
+ 0xe7, 0x12, 0x1a, 0x47 };
+
+static const u8 tc4_ciphertext256[] = {
+ 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07, 0xf4, 0x7f, 0x37, 0xa3,
+ 0x2a, 0x84, 0x42, 0x7d, 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
+ 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa, 0x8c, 0xb0, 0x8e, 0x48,
+ 0x59, 0x0d, 0xbb, 0x3d, 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
+ 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a, 0xbc, 0xc9, 0xf6, 0x62
+};
+
+static const u8 tc4_tag256[] = { 0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e,
+ 0x17, 0x68, 0xcd, 0xdf, 0x88, 0x53,
+ 0xbb, 0x2d, 0x55, 0x1b };
+
+static const u8 inc_key[] = { 0x97, 0x3e, 0x43, 0x70, 0x84, 0x71, 0xd4, 0xe2,
+ 0x45, 0xd1, 0xcb, 0x79, 0xe8, 0xd7, 0x5f, 0x3b,
+ 0x97, 0x3e, 0x43, 0x70, 0x84, 0x71, 0xd4, 0xe2,
+ 0x45, 0xd1, 0xcb, 0x79, 0xe8, 0xd7, 0x5f, 0x3b };
+static const u8 inc_iv[] = { 0xe2, 0xe4, 0x3f, 0x29, 0xfe, 0xd4,
+ 0xbc, 0x31, 0x56, 0xa7, 0x97, 0xf5 };
+
+static const struct
+{
+ const u16 n_bytes;
+ const u64 tag_gcm_128[2];
+ const u64 tag_gcm_256[2];
+ const u64 tag_gmac_128[2];
+ const u64 tag_gmac_256[2];
+ const u8 tag256[16];
+} inc_test_cases[] = {
+ {
+ .n_bytes = 0,
+ .tag_gcm_128 = { 0x95f4b8cc824294eb, 0xbf964ccf94b47f96 },
+ .tag_gcm_256 = { 0x206b456eaa81a3c8, 0xa308160d180e080d },
+ .tag_gmac_128 = { 0x95f4b8cc824294eb, 0xbf964ccf94b47f96 },
+ .tag_gmac_256 = { 0x206b456eaa81a3c8, 0xa308160d180e080d },
+ },
+ {
+ .n_bytes = 1,
+ .tag_gcm_128 = { 0xe89aa5be94fa1db4, 0x70d82ed02542a560 },
+ .tag_gcm_256 = { 0xcb0659b38e60d3a7, 0x9758b874959187ff },
+ .tag_gmac_128 = { 0xf9be1e7db073c565, 0x3b8a0ecc7a91f09d },
+ .tag_gmac_256 = { 0x1e302e97ab394130, 0xef29621c33bdb710 },
+ },
+ {
+ .n_bytes = 7,
+ .tag_gcm_128 = { 0xf4af7cbe57bd2078, 0x063dd60abbe51049 },
+ .tag_gcm_256 = { 0x7d231388fe8a19be, 0x59be3e7205269abd },
+ .tag_gmac_128 = { 0x27d0a47980eed1c6, 0xe6163485e73d02b3 },
+ .tag_gmac_256 = { 0x61ce281b47729f6c, 0x128a6bc0880e5d84 },
+ },
+ {
+ .n_bytes = 8,
+ .tag_gcm_128 = { 0xf45b40961422abc4, 0x0a932b98c4999694 },
+ .tag_gcm_256 = { 0xf7f945beed586ee2, 0x67239433a7bd3f23 },
+ .tag_gmac_128 = { 0x3a25d38572abe3b1, 0x220798aca96d594a },
+ .tag_gmac_256 = { 0x2e0e6d58d1ab41ca, 0x09bbc83e3b7b5e11 },
+ },
+ {
+ .n_bytes = 9,
+ .tag_gcm_128 = { 0x791b0a879d236364, 0xde9553e3ed1b763f },
+ .tag_gcm_256 = { 0x24c13ed7b46813cd, 0xe646ce24ea4b281e },
+ .tag_gmac_128 = { 0x0e521672b23a4fc7, 0x16f129224dec5fd8 },
+ .tag_gmac_256 = { 0x8b9c603789c34043, 0x0a8b626928c9fb6f },
+ },
+ {
+ .n_bytes = 15,
+ .tag_gcm_128 = { 0xb277ef05e2be1cc0, 0x2922fba5e321c81e },
+ .tag_gcm_256 = { 0xc3ca9f633fa803dc, 0x96e60b0c3347d744 },
+ .tag_gmac_128 = { 0xab99e6327c8e1493, 0x09a9a153045ba43f },
+ .tag_gmac_256 = { 0xfc9ec2d6a1ad492b, 0xf0b0ba877663732d },
+ },
+ {
+ .n_bytes = 16,
+ .tag_gcm_128 = { 0x3e3438e8f932ebe3, 0x958e270d56ae588e },
+ .tag_gcm_256 = { 0x6ac53524effc8171, 0xccab3a16a0b5813c },
+ .tag_gmac_128 = { 0x0eb4a09c6c7db16b, 0x1cdb5573a27a2e4a },
+ .tag_gmac_256 = { 0x71752018b31eae33, 0xdc4bd36d44b9fd5d },
+ },
+ {
+ .n_bytes = 31,
+ .tag_gcm_128 = { 0x1f4d4a7a056e4bca, 0x97ac76121dccb4e0 },
+ .tag_gcm_256 = { 0x609aea9aec919ab6, 0x1eba3c4998e7abb9 },
+ .tag_gmac_128 = { 0x289280f9e8879c68, 0xe6b0e36afc0d2ae1 },
+ .tag_gmac_256 = { 0x0b3f61762ba4ed43, 0x293f596a76d63b37 },
+ },
+ {
+ .n_bytes = 32,
+ .tag_gcm_128 = { 0xc4b64505d045818f, 0x72bfd499f0f983b4 },
+ .tag_gcm_256 = { 0x3f003fb179b2c480, 0x883876d4904700c2 },
+ .tag_gmac_128 = { 0x3dd10ab954d807f0, 0x5ae32ee41675051e },
+ .tag_gmac_256 = { 0x1a80ab830fc736c0, 0x51db27630adae337 },
+ },
+ {
+ .n_bytes = 47,
+ .tag_gcm_128 = { 0x3aedb0c6c14f2ea1, 0xe4626626bae641cd },
+ .tag_gcm_256 = { 0x9c91b87dfd302880, 0x05bb594dde5abb9c },
+ .tag_gmac_128 = { 0xe0fe54f2bdadeba8, 0x6f8f40edb569701f },
+ .tag_gmac_256 = { 0x26c5632c7abbdb3f, 0xc18ccc24df8bb239 },
+ },
+ {
+ .n_bytes = 48,
+ .tag_gcm_128 = { 0xdbceb2aed0dbbe27, 0xfef0013e8ebe6ef1 },
+ .tag_gcm_256 = { 0x98ad025f30b58ffa, 0xabc8a99857034e42 },
+ .tag_gmac_128 = { 0x269518e8584b7f6c, 0x1c9f41410a81799c },
+ .tag_gmac_256 = { 0x144807ce7aa8eb61, 0x611a8355b4377dc6 },
+ },
+ {
+ .n_bytes = 63,
+ .tag_gcm_128 = { 0x1769ccf523a2046e, 0x7328e18749a559b4 },
+ .tag_gcm_256 = { 0xcdf2f28efa9689ce, 0x636676f6aedea9de },
+ .tag_gmac_128 = { 0x4d47537060defce8, 0x0d4819c20ba8e889 },
+ .tag_gmac_256 = { 0x7b60615e7bfc9a7a, 0x610633296eb30b94 },
+ },
+ {
+ .n_bytes = 64,
+ .tag_gcm_128 = { 0xa5602f73865b6a77, 0x78317e461ff9b560 },
+ .tag_gcm_256 = { 0x5c17a6dcd1f23b65, 0x25331c378256a93e },
+ .tag_gmac_128 = { 0x39d941ed85d81ab0, 0xe358a61078628d63 },
+ .tag_gmac_256 = { 0x5276fbdd333f380d, 0xb0dc63e68f137e74 },
+ },
+ {
+ .n_bytes = 79,
+ .tag_gcm_128 = { 0x5d32cd75f2e82d84, 0xbc15801c1fe285bd },
+ .tag_gcm_256 = { 0xb2b2855f4b1ecf70, 0xa524adc1609c757b },
+ .tag_gmac_128 = { 0xa147493f08a8738e, 0xbf07da9f4a88944f },
+ .tag_gmac_256 = { 0xfee15e0d4b936bc7, 0x1dc88398c6b168bc },
+ },
+ {
+ .n_bytes = 80,
+ .tag_gcm_128 = { 0xa303b7247b9b00df, 0xe72d6d7063d48b72 },
+ .tag_gcm_256 = { 0x7abfffc9ecfa00ec, 0x9c5ffcd753ee4568 },
+ .tag_gmac_128 = { 0xc3e61bf9f370b40e, 0x66b1c4a6df3b19d7 },
+ .tag_gmac_256 = { 0x0cc7b09a7d602352, 0x29e8a64447a764d2 },
+ },
+ {
+ .n_bytes = 95,
+ .tag_gcm_128 = { 0xf0fb35c36eac3025, 0xa13463307fc48907 },
+ .tag_gcm_256 = { 0x283a73a05bd0e3c2, 0x794a181dd07a0fb7 },
+ .tag_gmac_128 = { 0x26f3546060d9f958, 0xc1367fca8869ab40 },
+ .tag_gmac_256 = { 0xa046e1705100c711, 0xbcf9d6a06f360260 },
+ },
+ {
+ .n_bytes = 96,
+ .tag_gcm_128 = { 0x974bb3c1c258bfb5, 0xcf057344bccb0596 },
+ .tag_gcm_256 = { 0x18920d75fcfb702e, 0x18e5f14ba429b7be },
+ .tag_gmac_128 = { 0xf43cca4837ad00b8, 0xb1a1585d51838352 },
+ .tag_gmac_256 = { 0xce3427dc5123b31f, 0xdcc6e49fa0f6587e },
+ },
+ {
+ .n_bytes = 111,
+ .tag_gcm_128 = { 0x5d73baa8eef0ced3, 0x79339e31d5d813de },
+ .tag_gcm_256 = { 0x4cefa311c9c39a86, 0xe809ee78930ef736 },
+ .tag_gmac_128 = { 0x452003e6d535a523, 0x723f08581012c62e },
+ .tag_gmac_256 = { 0x6ce2e1661db942ca, 0xccd700c9c6d03cfd },
+ },
+ {
+ .n_bytes = 112,
+ .tag_gcm_128 = { 0x189aa61ce15a0d11, 0xc907e6bccbdbb8f9 },
+ .tag_gcm_256 = { 0xa41c96c843b791b4, 0x0f9f60953f03e5fc },
+ .tag_gmac_128 = { 0x44c75b94dbf8539f, 0xcdebe3ed9c68c840 },
+ .tag_gmac_256 = { 0x21a289dd39eadd19, 0x749a038e1ea0711c },
+ },
+ {
+ .n_bytes = 127,
+ .tag_gcm_128 = { 0xc6ea87bfe82d73f6, 0x9d85dbf8072bb051 },
+ .tag_gcm_256 = { 0xd5e436b2ddfac9fa, 0x54d7d13fa214703a },
+ .tag_gmac_128 = { 0xdc5374b7d7d221c4, 0xa8cf4e11958b9dff },
+ .tag_gmac_256 = { 0xc7ad0bba9de54f6a, 0x38ed037fe0924dee },
+ },
+ {
+ .n_bytes = 128,
+ .tag_gcm_128 = { 0x357d4954b7c2b440, 0xb3b07ce0cd143149 },
+ .tag_gcm_256 = { 0x5555d09cb247322d, 0xeb9d1cea38b68951 },
+ .tag_gmac_128 = { 0x6a77579181663dde, 0xe359157bd4246d3f },
+ .tag_gmac_256 = { 0x9fe930d50d661e37, 0xba4a0f3c3a6b63cf },
+ },
+ {
+ .n_bytes = 143,
+ .tag_gcm_128 = { 0x358f897d4783966f, 0x6fa44993a9ed54c4 },
+ .tag_gcm_256 = { 0x60e91f959f2ccdbe, 0x116c56fdaa107deb },
+ .tag_gmac_128 = { 0x121d26aba8aaee0d, 0xc37cda9c43f51008 },
+ .tag_gmac_256 = { 0x06918b1cd20e0abc, 0x42938b1d8e745dcd },
+ },
+ {
+ .n_bytes = 144,
+ .tag_gcm_128 = { 0x8a9efe3df387e069, 0xc0a3f2f7547c704b },
+ .tag_gcm_256 = { 0x217d59f53bfbc314, 0x2d8f088d05532b0d },
+ .tag_gmac_128 = { 0x382949d56e0e8f05, 0x4e87fb8f83f095a7 },
+ .tag_gmac_256 = { 0x75e07060883db37d, 0x5fde7b9bda37d680 },
+ },
+ {
+ .n_bytes = 159,
+ .tag_gcm_128 = { 0x297252081cc8db1e, 0x6357143fa7f756c8 },
+ .tag_gcm_256 = { 0x7e8fca9d1b17e003, 0x7bf7dad063b9a5c9 },
+ .tag_gmac_128 = { 0x5d0524b130e97547, 0xd6befd8591072437 },
+ .tag_gmac_256 = { 0xf5f631d391b635fc, 0xe8f7b6808544f312 },
+ },
+ {
+ .n_bytes = 160,
+ .tag_gcm_128 = { 0x90e034ee0f08a871, 0x002f483eefa24ec9 },
+ .tag_gcm_256 = { 0xed24df02e455d6d3, 0x7a7d318ed132cb7f },
+ .tag_gmac_128 = { 0xc75f87215ae12a2f, 0xf264e5381d5b0412 },
+ .tag_gmac_256 = { 0x1ad3e294fd55b0a6, 0xa1a551e59fd12e2f },
+ },
+ {
+ .n_bytes = 175,
+ .tag_gcm_128 = { 0x8f663955c8e4249e, 0xd9d8d8d7352b18d9 },
+ .tag_gcm_256 = { 0xd9af34eae74a35e1, 0xc22e74b34267e5df },
+ .tag_gmac_128 = { 0xb54a2e8b186a55db, 0x980f586c6da8afce },
+ .tag_gmac_256 = { 0x9cceb31baad18ff1, 0xce97588909ece8af },
+ },
+ {
+ .n_bytes = 176,
+ .tag_gcm_128 = { 0x258ec0df82f003bd, 0x571496e92c966695 },
+ .tag_gcm_256 = { 0xa1925cda1fa1dd2c, 0x914038618faecf99 },
+ .tag_gmac_128 = { 0xfc384b412bdb05ef, 0x73968cf3b464a997 },
+ .tag_gmac_256 = { 0x50d9ce4be242e176, 0x5fb78e9404c9226d },
+ },
+ {
+ .n_bytes = 191,
+ .tag_gcm_128 = { 0x796a90a3edaab614, 0x4bf34c2c6333c736 },
+ .tag_gcm_256 = { 0x4ffd3a84b346c6d5, 0x9d4c84c7ac5a191c },
+ .tag_gmac_128 = { 0x16c11c6bfad5973e, 0xa0825b9c827137c8 },
+ .tag_gmac_256 = { 0x82c144c209c22758, 0x7428b4ac38a65c56 },
+ },
+ {
+ .n_bytes = 192,
+ .tag_gcm_128 = { 0x2a44492af2e06a75, 0xbe4eab62aacfc2d3 },
+ .tag_gcm_256 = { 0xb7d4971a8061092d, 0x94da543669369e41 },
+ .tag_gmac_128 = { 0xed462726c984b596, 0xd61b317d979f5df8 },
+ .tag_gmac_256 = { 0x554dc7f30981dbf6, 0x94447d0fbf9f2c8b },
+ },
+ {
+ .n_bytes = 207,
+ .tag_gcm_128 = { 0xcfac9f67252713c8, 0xd638cf6b74c6acf6 },
+ .tag_gcm_256 = { 0x57a4a9d299663925, 0xa802f8453e8bcc5b },
+ .tag_gmac_128 = { 0xef03f3cdcb0ea819, 0xeea8f0f7f805c306 },
+ .tag_gmac_256 = { 0x3d8cd7d92cf0a212, 0x12c1ddddab7e752c },
+ },
+ {
+ .n_bytes = 208,
+ .tag_gcm_128 = { 0x5467633795b92cf5, 0x6b45fb93e19f9341 },
+ .tag_gcm_256 = { 0xaeced4090d4d20bc, 0xd20161cd2617613e },
+ .tag_gmac_128 = { 0x02bb88dbe681ab69, 0xaf973bfd0b924144 },
+ .tag_gmac_256 = { 0x313020fc5283b45e, 0x1757616d4cf17c7f },
+ },
+ {
+ .n_bytes = 223,
+ .tag_gcm_128 = { 0x2f9c725903c07adf, 0xe01712c7d6d5055d },
+ .tag_gcm_256 = { 0xeae53a9b0d03a4f9, 0x42b2375d569d384e },
+ .tag_gmac_128 = { 0x6ea092dd400ec00d, 0x23237fa0bd0c1977 },
+ .tag_gmac_256 = { 0xa02e0f41f12f0053, 0xfba53430aa616219 },
+ },
+ {
+ .n_bytes = 224,
+ .tag_gcm_128 = { 0x73e40772334901a9, 0xddf6075b357cb307 },
+ .tag_gcm_256 = { 0x2eb3450f9462c968, 0xa9fb95f281c117e9 },
+ .tag_gmac_128 = { 0x33762525c12dfd1d, 0xcb3d8d0402c23ebf },
+ .tag_gmac_256 = { 0x30c6d05fb98c2a84, 0xaa2c9f6303383d3a },
+ },
+ {
+ .n_bytes = 239,
+ .tag_gcm_128 = { 0x184d15fd2e2c63a6, 0x3dfe238b88dd2924 },
+ .tag_gcm_256 = { 0x18deafee39975b36, 0xc07761cf4fc16c06 },
+ .tag_gmac_128 = { 0x10a48f2bc4e64f87, 0x85eec49ae83d4256 },
+ .tag_gmac_256 = { 0x5ac87f47f32770eb, 0x31706ca888dd6d44 },
+ },
+ {
+ .n_bytes = 240,
+ .tag_gcm_128 = { 0x153134f11cfa06ec, 0xd987642cc3688a34 },
+ .tag_gcm_256 = { 0x3eb66b6dc0bba824, 0x274c4648d515c844 },
+ .tag_gmac_128 = { 0x9e5afe891c7c7dcb, 0xa2b3fa1c026343e2 },
+ .tag_gmac_256 = { 0xe9120e4e9ff4b1e1, 0xb88bf68336342598 },
+ },
+ {
+ .n_bytes = 255,
+ .tag_gcm_128 = { 0x2b5e78936d1ace73, 0x15b766bfee18d348 },
+ .tag_gcm_256 = { 0xeb3741a345395c97, 0x02e11e0478e4cc5a },
+ .tag_gmac_128 = { 0xf7daf525751192df, 0x1b1641c3362905ac },
+ .tag_gmac_256 = { 0x0b16a2bb842caaca, 0x996732fedaa6b829 },
+ },
+ {
+ .n_bytes = 256,
+ .tag_gcm_128 = { 0x6d4507e0c354e80a, 0x2345eccddd0bd71e },
+ .tag_gcm_256 = { 0xa582b8122d699b63, 0xb16db944f6b073f3 },
+ .tag_gmac_128 = { 0xc58bb57544c07b40, 0x1a8dd3d8124cdf39 },
+ .tag_gmac_256 = { 0xb0f6db0da52e1dc2, 0xbd3a86a577ed208a },
+ },
+ {
+ .n_bytes = 319,
+ .tag_gcm_128 = { 0x2cd41fdf6f659a6b, 0x2486849d7666d76e },
+ .tag_gcm_256 = { 0xb7e416c8a716cb4d, 0xc7abe0d755b48845 },
+ .tag_gmac_128 = { 0xad83725394d4a36b, 0x5fdd42e941cad49b },
+ .tag_gmac_256 = { 0xbb0b73609b90f7eb, 0xe4d382b8b9b7d43e },
+ },
+ {
+ .n_bytes = 320,
+ .tag_gcm_128 = { 0x064cfe34b7d9f89c, 0xb6c7263f66c89b47 },
+ .tag_gcm_256 = { 0x1254c9ae84d8ff50, 0x9faeab423099dc9a },
+ .tag_gmac_128 = { 0xd91d60ce71d24533, 0xb1cdfd3b3200b171 },
+ .tag_gmac_256 = { 0x921de9e3d353559c, 0x3509d2775817a1de },
+ },
+ {
+ .n_bytes = 383,
+ .tag_gcm_128 = { 0x14788c7531d682e1, 0x8af79effe807a4dc },
+ .tag_gcm_256 = { 0x947754a0844b4a4d, 0x9eb3849d93d5048e },
+ .tag_gmac_128 = { 0xfa84d3a18ea6f895, 0x9a45c729797a8ac4 },
+ .tag_gmac_256 = { 0xe8e61e134e40359a, 0xe8e404d4b523607c },
+ },
+ {
+ .n_bytes = 384,
+ .tag_gcm_128 = { 0xfba3fcfd9022e9a7, 0x257ba59f12055d70 },
+ .tag_gcm_256 = { 0x7c6ca4e7fba2bc35, 0x1c590be09b3d549b },
+ .tag_gmac_128 = { 0x4ca0f087d812e48f, 0xd1d39c4338d57a04 },
+ .tag_gmac_256 = { 0xb0a2257cdec364c7, 0x6a4308976fda4e5d },
+ },
+ {
+ .n_bytes = 447,
+ .tag_gcm_128 = { 0x8fde1490c60f09bf, 0xd2932f04c202c5e4 },
+ .tag_gcm_256 = { 0x1845a80cbdcf2e62, 0xc7c49c9864bca732 },
+ .tag_gmac_128 = { 0x35aa90d2deb41b9c, 0x516ab85a3f17b71e },
+ .tag_gmac_256 = { 0x1db78f8b7b34d9e7, 0xd168177351e601fe },
+ },
+ {
+ .n_bytes = 448,
+ .tag_gcm_128 = { 0xd0a7b75f734a1a7c, 0xc7689b7c571a09bf },
+ .tag_gcm_256 = { 0xef3a9118c347118d, 0x282a7736060d7bb5 },
+ .tag_gmac_128 = { 0xce2dab9fede53934, 0x27f3d2bb2af9dd2e },
+ .tag_gmac_256 = { 0xca3b0cba7b772549, 0x3104ded0d6df7123 },
+ },
+ {
+ .n_bytes = 511,
+ .tag_gcm_128 = { 0x6fb5d366fa97b2d2, 0xed2d955fcc78e556 },
+ .tag_gcm_256 = { 0xc2bc52eca9348b7c, 0x0ec18a2eb637446f },
+ .tag_gmac_128 = { 0xe3012a4897edd5b5, 0xfe18c3ec617a7e88 },
+ .tag_gmac_256 = { 0x00e050eecf184591, 0xba24484f84867f4f },
+ },
+ {
+ .n_bytes = 512,
+ .tag_gcm_128 = { 0x25138f7fe88b54bd, 0xcc078b619c0e83a2 },
+ .tag_gcm_256 = { 0x63313c5ebe68fa92, 0xccc78784896cdcc3 },
+ .tag_gmac_128 = { 0xc688fe54c5595ec0, 0x5b8a687343c3ef03 },
+ .tag_gmac_256 = { 0x807c9f8e1c198242, 0xb1e0befc0b9b8193 },
+ },
+ {
+ .n_bytes = 575,
+ .tag_gcm_128 = { 0x0ce8e0b7332a7076, 0xe4aa7ab60dd0946a },
+ .tag_gcm_256 = { 0x585cff3cf78504d4, 0x45f3a9532ea40e8b },
+ .tag_gmac_128 = { 0xc06ca34dbad542b4, 0x840508722ff031dc },
+ .tag_gmac_256 = { 0xa46e22748f195488, 0x43817a5d4d17408a },
+ },
+ {
+ .n_bytes = 576,
+ .tag_gcm_128 = { 0x45360be81e8323bd, 0x10892d9804b75bb5 },
+ .tag_gcm_256 = { 0x66208ae5d809036e, 0x603d0af49475de88 },
+ .tag_gmac_128 = { 0xb4f2b1d05fd3a4ec, 0x6a15b7a05c3a5436 },
+ .tag_gmac_256 = { 0x8d78b8f7c7daf6ff, 0x925b2a92acb7356a },
+ },
+ {
+ .n_bytes = 577,
+ .tag_gcm_128 = { 0xc7e5cd17251fd138, 0xecfb0e05110303df },
+ .tag_gcm_256 = { 0x2939d12c85ea8cf8, 0xea063fba37c92eb5 },
+ .tag_gmac_128 = { 0x1fa02b370bec64a0, 0x8c759ca95a8cea85 },
+ .tag_gmac_256 = { 0x6a602c2b1fff6617, 0x17e06d829bd24a8d },
+ },
+ {
+ .n_bytes = 639,
+ .tag_gcm_128 = { 0xc679ef7a01e8f14c, 0x281e3b9a9f715cb9 },
+ .tag_gcm_256 = { 0x13abd2d67e162f98, 0xf637d467046af949 },
+ .tag_gmac_128 = { 0x05037392550b7ae2, 0x5095b4629ba46d40 },
+ .tag_gmac_256 = { 0xd8e8045772299aa7, 0x564d72fb58ea9808 },
+ },
+ {
+ .n_bytes = 640,
+ .tag_gcm_128 = { 0xff1a2c922cdd1336, 0xcaa02eab8691bf51 },
+ .tag_gcm_256 = { 0xd57e16f169d79da5, 0x3e2b47264f8efe9c },
+ .tag_gmac_128 = { 0xb32750b403bf66f8, 0x1b03ef08da0b9d80 },
+ .tag_gmac_256 = { 0x80ac3f38e2aacbfa, 0xd4ea7eb88213b629 },
+ },
+ {
+ .n_bytes = 703,
+ .tag_gcm_128 = { 0xefd0804f0155b8f1, 0xb1849ed867269569 },
+ .tag_gcm_256 = { 0xf66c5ecbd1a06fa4, 0x55ef36f3fdbe763a },
+ .tag_gmac_128 = { 0x725813463d977e5b, 0xd52aaabb923cfabb },
+ .tag_gmac_256 = { 0x4add8f86736adc52, 0xf6dabb4596975fd7 },
+ },
+ {
+ .n_bytes = 704,
+ .tag_gcm_128 = { 0x583b29260ea8e49f, 0xfaa93b0db98f9274 },
+ .tag_gcm_256 = { 0x0b777f2cd9e2f0ef, 0x01510fc85a99382e },
+ .tag_gmac_128 = { 0x89df280b0ec65cf3, 0xa3b3c05a87d2908b },
+ .tag_gmac_256 = { 0x9d510cb7732920fc, 0x16b672e611ae2f0a },
+ },
+ {
+ .n_bytes = 767,
+ .tag_gcm_128 = { 0x671ec58ab6d4a210, 0x0845fbe603169eff },
+ .tag_gcm_256 = { 0xb3913f7eb9bbdbbb, 0x4cb17aa290f6ab11 },
+ .tag_gmac_128 = { 0x3036046580a81443, 0xe18d34bb706e632b },
+ .tag_gmac_256 = { 0x4e82bc959349466c, 0x01210641d62bbdda },
+ },
+ {
+ .n_bytes = 768,
+ .tag_gcm_128 = { 0x66993b5de915fc6e, 0x4aaf0b8441040267 },
+ .tag_gcm_256 = { 0x958ed0a6c1bf11e0, 0xc29d9f4a8ce8bdc6 },
+ .tag_gmac_128 = { 0x02674435b179fddc, 0xe016a6a0540bb9be },
+ .tag_gmac_256 = { 0xf562c523b24bf164, 0x257cb21a7b602579 },
+ },
+ {
+ .n_bytes = 831,
+ .tag_gcm_128 = { 0x4914f7980699f93c, 0xc2e44fdba6a839e7 },
+ .tag_gcm_256 = { 0xa8fab43ecd572a25, 0x3cd465e491195b81 },
+ .tag_gmac_128 = { 0xa6d725516e956d5d, 0x630768e80ac3de3d },
+ .tag_gmac_256 = { 0xb4746cdde367c9e2, 0x3ea53280901a0375 },
+ },
+ {
+ .n_bytes = 832,
+ .tag_gcm_128 = { 0xac9a519f06fb8c70, 0xdc1a6544ed2cfcf7 },
+ .tag_gcm_256 = { 0x54877a7ccd02c592, 0x1a09a4474d903b56 },
+ .tag_gmac_128 = { 0xd24937cc8b938b05, 0x8d17d73a7909bbd7 },
+ .tag_gmac_256 = { 0x9d62f65eaba46b95, 0xef7f624f71ba7695 },
+ },
+ {
+ .n_bytes = 895,
+ .tag_gcm_128 = { 0x3d365bf4d44c1071, 0x07ac3129079f2013 },
+ .tag_gcm_256 = { 0x608543d4fe6526a1, 0xc78a987b87c8d96c },
+ .tag_gmac_128 = { 0xc71cf903f7a557c5, 0x06788583ad2122a5 },
+ .tag_gmac_256 = { 0x7cdaa511565b289a, 0xf818a4c85a8bd575 },
+ },
+ {
+ .n_bytes = 896,
+ .tag_gcm_128 = { 0x97000fafd1359a0b, 0xfc226d534866b495 },
+ .tag_gcm_256 = { 0x1850ee7af3133326, 0xf198d539eee4b1f5 },
+ .tag_gmac_128 = { 0x7138da25a1114bdf, 0x4deedee9ec8ed265 },
+ .tag_gmac_256 = { 0x249e9e7ec6d879c7, 0x7abfa88b8072fb54 },
+ },
+ {
+ .n_bytes = 959,
+ .tag_gcm_128 = { 0x17200025564902f2, 0x3f2c3b711ba4086d },
+ .tag_gcm_256 = { 0x3d0bf3e8b24e296d, 0x42fe0f54e33deb6d },
+ .tag_gmac_128 = { 0x8baae9b6f3bd797a, 0x177e0b6c577f2436 },
+ .tag_gmac_256 = { 0x853f961c965f472c, 0x8adc4113b3cf933a },
+ },
+ {
+ .n_bytes = 960,
+ .tag_gcm_128 = { 0x2a30ca7325e7a81b, 0xacbc71832bdceb63 },
+ .tag_gcm_256 = { 0x037786319dc22ed7, 0x6730acf359ec3b6e },
+ .tag_gmac_128 = { 0x702dd2fbc0ec5bd2, 0x61e7618d42914e06 },
+ .tag_gmac_256 = { 0x52b3152d961cbb82, 0x6ab088b034f6e3e7 },
+ },
+ {
+ .n_bytes = 1023,
+ .tag_gcm_128 = { 0x8e8789e6c4c90855, 0x4ec5503d7f953df6 },
+ .tag_gcm_256 = { 0xdb0afebe6c085f53, 0x4eb6f07b63b8a020 },
+ .tag_gmac_128 = { 0x6e9b48e5ad508180, 0xdc86430db2bad514 },
+ .tag_gmac_256 = { 0xbb52b4fbf236b741, 0x47ae63bc836dfba3 },
+ },
+ {
+ .n_bytes = 1024,
+ .tag_gcm_128 = { 0x94e1ccbea0f24089, 0xf51b53b600363bd2 },
+ .tag_gcm_256 = { 0x70f3eb3d562f0b34, 0xffd09e1a25d5bef3 },
+ .tag_gmac_128 = { 0x65a2b560392ecee3, 0x30079a9a9dbbd3a3 },
+ .tag_gmac_256 = { 0x4d361736c43090e6, 0x135810df49dcc981 },
+ },
+ {
+ .n_bytes = 1025,
+ .tag_gcm_128 = { 0x830a99737df5a71a, 0xd9ea6e87c63d3aae },
+ .tag_gcm_256 = { 0xa3fc30e0254a5ee2, 0x52e59adc9a75be40 },
+ .tag_gmac_128 = { 0xb217556427fc09ab, 0xc32fd72ec886730d },
+ .tag_gmac_256 = { 0xeab5a9a02cb0869e, 0xd59e51684bc2839c },
+ },
+ {
+ .n_bytes = 1039,
+ .tag_gcm_128 = { 0x238f229130e92934, 0x52752fc860bca067 },
+ .tag_gcm_256 = { 0xae2754bcaed68191, 0xe0770d1e9a7a67f3 },
+ .tag_gmac_128 = { 0xe030ad2beb01d85d, 0xf10c78b1b64c27af },
+ .tag_gmac_256 = { 0x081b45e126248e85, 0xca0789f30e1c47a1 },
+ },
+ {
+ .n_bytes = 1040,
+ .tag_gcm_128 = { 0x4eebcf7391d66c6f, 0x107d8bef4a93d9c6 },
+ .tag_gcm_256 = { 0xbeb02ae5466964f3, 0x8eb90364c5f9e4cb },
+ .tag_gmac_128 = { 0x451deb85fbf27da5, 0xe47e8c91106dadda },
+ .tag_gmac_256 = { 0x85f0a72f3497699d, 0xe6fce0193cc6c9d1 },
+ },
+ {
+ .n_bytes = 1041,
+ .tag_gcm_128 = { 0xbbddfb0304411d71, 0xe573f63553d7ede4 },
+ .tag_gcm_256 = { 0x68e42d2959af0b24, 0x35ac8e73c749e7f4 },
+ .tag_gmac_128 = { 0x98d022b9896b68f8, 0x98dfde2a17b2869b },
+ .tag_gmac_256 = { 0xb8dac6add35d0d9b, 0x1c55973c6dd769af },
+ },
+ {
+ .n_bytes = 1536,
+ .tag_gcm_128 = { 0x7d8933fd922418bd, 0xc88c2f289c5d3d83 },
+ .tag_gcm_256 = { 0x966c103eb6ee69f2, 0x2f6b070b5c0fc66f },
+ .tag_gmac_128 = { 0x3b70f6154246e758, 0xd485c0edf236b6e2 },
+ .tag_gmac_256 = { 0xfefe1832387b9768, 0xc876712098256ca3 },
+ },
+ {
+ .n_bytes = 2047,
+ .tag_gcm_128 = { 0x15c6bbcb0d835fd4, 0xc33afd1328c1deb1 },
+ .tag_gcm_256 = { 0xcde3edeea228ada6, 0x8276721a8662e708 },
+ .tag_gmac_128 = { 0xb556b0e42419759e, 0x23b0365cf956a3ad },
+ .tag_gmac_256 = { 0x8df762cbbe4b2a04, 0x6841bc61e5702419 },
+ },
+ {
+ .n_bytes = 2048,
+ .tag_gcm_128 = { 0xc5ddbeb8765e3aac, 0x1bad7349fd9f2b50 },
+ .tag_gcm_256 = { 0xa2a623dde251a98d, 0xaf905fbd16f6a7d9 },
+ .tag_gmac_128 = { 0xe20f1e533df2b3d0, 0x5d170bdbcc278a63 },
+ .tag_gmac_256 = { 0x9663185c4342cd4a, 0x82d3c5a3a4998fc6 },
+ },
+ {
+ .n_bytes = 2064,
+ .tag_gcm_128 = { 0x12b76ea0a6ee9cbc, 0xdaecfae7c815aa58 },
+ .tag_gcm_256 = { 0xb5bb2f76028713dd, 0xc8f3a1448b3bd050 },
+ .tag_gmac_128 = { 0x019445c168c42f9b, 0xdf33e251bd9a27fe },
+ .tag_gmac_256 = { 0xbbabd0cefc4d6a42, 0xb138675ca66ba54f },
+ },
+ {
+ .n_bytes = 2065,
+ .tag_gcm_128 = { 0x8758c5168ffc3fd7, 0x554f1df7cfa3b976 },
+ .tag_gcm_256 = { 0xc9808cf0fd21aede, 0xe26921f3fd308006 },
+ .tag_gmac_128 = { 0x44a57e7a32031596, 0x75476d5542faa57b },
+ .tag_gmac_256 = { 0xea0e81807fa79a4a, 0x889cca80746fb8d5 },
+ },
+ {
+ .n_bytes = 4095,
+ .tag_gcm_128 = { 0x06db87757f541dc9, 0x823c619c6b88ef80 },
+ .tag_gcm_256 = { 0xdf0861a56a7fe7b0, 0xe077a5c735cc21b2 },
+ .tag_gmac_128 = { 0x43cb482bea0449e9, 0x70d668af983c9a6c },
+ .tag_gmac_256 = { 0x5fc304ad7be1d19a, 0x81bf2f4111de0b06 },
+ },
+ {
+ .n_bytes = 4096,
+ .tag_gcm_128 = { 0xe4afdad642876152, 0xf78cfcfcb92520b6 },
+ .tag_gcm_256 = { 0x7552cda8d91bdab1, 0x4bf57b7567d59e89 },
+ .tag_gmac_128 = { 0xac5240f8e9c49cfc, 0x2a3c9d0999aded50 },
+ .tag_gmac_256 = { 0x9fb6cd8f10f7b6c5, 0x16e442c147869222 },
+ },
+ {
+ .n_bytes = 4112,
+ .tag_gcm_128 = { 0x2a34db8f06bcf0ee, 0x7a4a2456fa340c33 },
+ .tag_gcm_256 = { 0x4b6c0c5b5c943f5e, 0x6d1669e849ce061a },
+ .tag_gmac_128 = { 0x143bfc9ab07d9bb5, 0xf0aa7510a9039349 },
+ .tag_gmac_256 = { 0x8a97bdd033775ba0, 0x5901a5160739be25 },
+ },
+ {
+ .n_bytes = 4113,
+ .tag_gcm_128 = { 0x296acfcbcbf529af, 0xe3e2cfb1bc5855c8 },
+ .tag_gcm_256 = { 0x181f6f9068ea477e, 0x1e05bfd01ee3e173 },
+ .tag_gmac_128 = { 0x0d81fcb0829e3c8b, 0x68016225b5fa7745 },
+ .tag_gmac_256 = { 0xa2421ac50d65c6b5, 0x84bd16fa55486af8 },
+ },
+ {
+ .n_bytes = 16382,
+ .tag_gcm_128 = { 0xd39fd367e00a103d, 0xf873a278b32d207f },
+ .tag_gcm_256 = { 0xa8da09a851ae6c88, 0x2ef17f0da7f191f1 },
+ .tag_gmac_128 = { 0xd4a22896f44c1c14, 0x69a5d02715c90ea4 },
+ .tag_gmac_256 = { 0x64788ca5e11722b6, 0x63d74a4b24538762 },
+ },
+ {
+ .n_bytes = 16383,
+ .tag_gcm_128 = { 0x2162b91aad49eebc, 0x28c7efe93e639c75 },
+ .tag_gcm_256 = { 0xc5baee5e40004087, 0xf6b26211facc66a5 },
+ .tag_gmac_128 = { 0x3ec003d690d3d846, 0x204baef851d8ad7d },
+ .tag_gmac_256 = { 0xdb51d6f5dddf16bb, 0x529f3825cf78dbd5 },
+ },
+ {
+ .n_bytes = 16384,
+ .tag_gcm_128 = { 0x2272e778c4c5c9ef, 0x84c50021e75ddbab },
+ .tag_gcm_256 = { 0x6c32f1c5666b1f4c, 0x91142a86ae5241b2 },
+ .tag_gmac_128 = { 0x43dadd5ecee9674b, 0xa30fea9ae8091c6c },
+ .tag_gmac_256 = { 0xc360b76ac1887181, 0xcb732f29ea86edeb },
+ },
+ {
+ .n_bytes = 16385,
+ .tag_gcm_128 = { 0xe2a47837578b4056, 0xf96e7233cbeb1ce1 },
+ .tag_gcm_256 = { 0xfa3aa4ebe36fb390, 0x6a2cf1671f4f1a01 },
+ .tag_gmac_128 = { 0xfd0b7312c4975687, 0xdd3096b1c850e80a },
+ .tag_gmac_256 = { 0xaf2cae4642a5536a, 0xb27aff5cc8bd354c },
+ },
+ {
+ .n_bytes = 16386,
+ .tag_gcm_128 = { 0xe1b4c0e5825304ae, 0x48c5dd82aa114320 },
+ .tag_gcm_256 = { 0x76c3612118f47fa8, 0xdd0a47b132ecad3a },
+ .tag_gmac_128 = { 0x346bc841a7f5b642, 0x6fb1b96391c66b40 },
+ .tag_gmac_256 = { 0x2f1a1b6a000e18b2, 0xf7cba25e02551d43 },
+ },
+};
+
+#define MAX_TEST_DATA_LEN 32768
+
+static const struct
+{
+ char *name;
+ const u8 *pt, *key128, *key256, *ct128, *ct256, *tag128, *tag256, *aad, *iv;
+ u32 data_len, tag128_len, tag256_len, aad_len;
+} test_cases[] = {
+ /* test cases */
+ {
+ .name = "GCM Spec. TC1",
+ .iv = tc1_iv,
+ .key128 = tc1_key128,
+ .key256 = tc1_key256,
+ .tag128 = tc1_tag128,
+ .tag128_len = sizeof (tc1_tag128),
+ .tag256 = tc1_tag256,
+ .tag256_len = sizeof (tc1_tag256),
+ },
+ {
+ .name = "GCM Spec. TC2",
+ .pt = tc2_plaintext,
+ .data_len = sizeof (tc2_plaintext),
+ .iv = tc1_iv,
+ .key128 = tc1_key128,
+ .key256 = tc1_key256,
+ .ct128 = tc2_ciphertext128,
+ .ct256 = tc2_ciphertext256,
+ .tag128 = tc2_tag128,
+ .tag128_len = sizeof (tc2_tag128),
+ .tag256 = tc2_tag256,
+ .tag256_len = sizeof (tc2_tag256),
+ },
+ {
+ .name = "GCM Spec. TC3",
+ .pt = tc3_plaintext,
+ .data_len = sizeof (tc3_plaintext),
+ .iv = tc3_iv,
+ .key128 = tc3_key128,
+ .key256 = tc3_key256,
+ .ct128 = tc3_ciphertext128,
+ .ct256 = tc3_ciphertext256,
+ .tag128 = tc3_tag128,
+ .tag128_len = sizeof (tc3_tag128),
+ .tag256 = tc3_tag256,
+ .tag256_len = sizeof (tc3_tag256),
+ },
+ {
+ .name = "GCM Spec. TC4",
+ .pt = tc4_plaintext,
+ .data_len = sizeof (tc4_plaintext),
+ .aad = tc4_aad,
+ .aad_len = sizeof (tc4_aad),
+ .iv = tc3_iv,
+ .key128 = tc3_key128,
+ .key256 = tc3_key256,
+ .ct128 = tc4_ciphertext128,
+ .ct256 = tc4_ciphertext256,
+ .tag128 = tc4_tag128,
+ .tag128_len = sizeof (tc4_tag128),
+ .tag256 = tc4_tag256,
+ .tag256_len = sizeof (tc4_tag256),
+ }
+};
+
+#define perftest_aesXXX_enc_var_sz(a) \
+ void __test_perf_fn perftest_aes##a##_enc_var_sz (test_perf_t *tp) \
+ { \
+ u32 n = tp->n_ops; \
+ aes_gcm_key_data_t *kd = test_mem_alloc (sizeof (*kd)); \
+ u8 *dst = test_mem_alloc (n + 16); \
+ u8 *src = test_mem_alloc_and_fill_inc_u8 (n + 16, 0, 0); \
+ u8 *tag = test_mem_alloc (16); \
+ u8 *key = test_mem_alloc_and_fill_inc_u8 (32, 192, 0); \
+ u8 *iv = test_mem_alloc_and_fill_inc_u8 (16, 128, 0); \
+ \
+ clib_aes_gcm_key_expand (kd, key, AES_KEY_##a); \
+ \
+ test_perf_event_enable (tp); \
+ clib_aes##a##_gcm_enc (kd, src, n, 0, 0, iv, 16, dst, tag); \
+ test_perf_event_disable (tp); \
+ }
+
+#define perftest_aesXXX_dec_var_sz(a) \
+ void __test_perf_fn perftest_aes##a##_dec_var_sz (test_perf_t *tp) \
+ { \
+ u32 n = tp->n_ops; \
+ aes_gcm_key_data_t *kd = test_mem_alloc (sizeof (*kd)); \
+ u8 *dst = test_mem_alloc (n + 16); \
+ u8 *src = test_mem_alloc_and_fill_inc_u8 (n + 16, 0, 0); \
+ u8 *tag = test_mem_alloc (16); \
+ u8 *key = test_mem_alloc_and_fill_inc_u8 (32, 192, 0); \
+ u8 *iv = test_mem_alloc_and_fill_inc_u8 (16, 128, 0); \
+ int *rv = test_mem_alloc (16); \
+ \
+ clib_aes_gcm_key_expand (kd, key, AES_KEY_##a); \
+ \
+ test_perf_event_enable (tp); \
+ rv[0] = clib_aes##a##_gcm_dec (kd, src, n, 0, 0, iv, tag, 16, dst); \
+ test_perf_event_disable (tp); \
+ }
+
+static clib_error_t *
+test_clib_aes128_gcm_enc (clib_error_t *err)
+{
+ aes_gcm_key_data_t kd;
+ u8 pt[MAX_TEST_DATA_LEN];
+ u8 ct[MAX_TEST_DATA_LEN];
+ u8 tag[16];
+
+ FOREACH_ARRAY_ELT (tc, test_cases)
+ {
+ clib_aes_gcm_key_expand (&kd, tc->key128, AES_KEY_128);
+ clib_aes128_gcm_enc (&kd, tc->pt, tc->data_len, tc->aad, tc->aad_len,
+ tc->iv, tc->tag128_len, ct, tag);
+
+ if (memcmp (tc->tag128, tag, tc->tag128_len) != 0)
+ return clib_error_return (err, "%s: invalid tag", tc->name);
+
+ if (tc->data_len && memcmp (tc->ct128, ct, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext", tc->name);
+ }
+
+ for (int i = 0; i < sizeof (pt); i++)
+ pt[i] = i;
+
+ clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_128);
+ FOREACH_ARRAY_ELT (tc, inc_test_cases)
+ {
+ clib_aes128_gcm_enc (&kd, pt, tc->n_bytes, 0, 0, inc_iv, 16, ct, tag);
+
+ if (memcmp (tc->tag_gcm_128, tag, 16) != 0)
+ return clib_error_return (err, "incremental %u bytes: invalid tag",
+ tc->n_bytes);
+ }
+
+ return err;
+}
+
+perftest_aesXXX_enc_var_sz (128);
+
+REGISTER_TEST (clib_aes128_gcm_enc) = {
+ .name = "clib_aes128_gcm_enc",
+ .fn = test_clib_aes128_gcm_enc,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes128_enc_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 1 << 20,
+ .fn = perftest_aes128_enc_var_sz }),
+};
+
+static clib_error_t *
+test_clib_aes256_gcm_enc (clib_error_t *err)
+{
+ aes_gcm_key_data_t kd;
+ u8 pt[MAX_TEST_DATA_LEN];
+ u8 ct[MAX_TEST_DATA_LEN];
+ u8 tag[16];
+
+ FOREACH_ARRAY_ELT (tc, test_cases)
+ {
+ clib_aes_gcm_key_expand (&kd, tc->key256, AES_KEY_256);
+ clib_aes256_gcm_enc (&kd, tc->pt, tc->data_len, tc->aad, tc->aad_len,
+ tc->iv, tc->tag256_len, ct, tag);
+
+ if (memcmp (tc->tag256, tag, tc->tag256_len) != 0)
+ return clib_error_return (err, "%s: invalid tag", tc->name);
+
+ if (tc->data_len && memcmp (tc->ct256, ct, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext", tc->name);
+ }
+
+ for (int i = 0; i < sizeof (pt); i++)
+ pt[i] = i;
+
+ clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_256);
+ FOREACH_ARRAY_ELT (tc, inc_test_cases)
+ {
+ clib_aes256_gcm_enc (&kd, pt, tc->n_bytes, 0, 0, inc_iv, 16, ct, tag);
+
+ if (memcmp (tc->tag_gcm_256, tag, 16) != 0)
+ return clib_error_return (err, "incremental %u bytes: invalid tag",
+ tc->n_bytes);
+ }
+
+ return err;
+}
+
+perftest_aesXXX_enc_var_sz (256);
+REGISTER_TEST (clib_aes256_gcm_enc) = {
+ .name = "clib_aes256_gcm_enc",
+ .fn = test_clib_aes256_gcm_enc,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes256_enc_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 1 << 20,
+ .fn = perftest_aes256_enc_var_sz }),
+};
+
+static clib_error_t *
+test_clib_aes128_gcm_dec (clib_error_t *err)
+{
+ aes_gcm_key_data_t kd;
+ u8 pt[MAX_TEST_DATA_LEN];
+ u8 ct[MAX_TEST_DATA_LEN];
+ u8 tag[16];
+ int rv;
+
+ FOREACH_ARRAY_ELT (tc, test_cases)
+ {
+ clib_aes_gcm_key_expand (&kd, tc->key128, AES_KEY_128);
+ rv = clib_aes128_gcm_dec (&kd, tc->ct128, tc->data_len, tc->aad,
+ tc->aad_len, tc->iv, tc->tag128,
+ tc->tag128_len, pt);
+
+ if (!rv)
+ return clib_error_return (err, "%s: invalid tag", tc->name);
+
+ if (tc->data_len && memcmp (tc->pt, pt, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext", tc->name);
+ }
+
+ for (int i = 0; i < sizeof (pt); i++)
+ pt[i] = i;
+
+ clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_128);
+ clib_aes128_gcm_enc (&kd, pt, sizeof (ct), 0, 0, inc_iv, 16, ct, tag);
+
+ FOREACH_ARRAY_ELT (tc, inc_test_cases)
+ {
+ if (!clib_aes128_gcm_dec (&kd, ct, tc->n_bytes, 0, 0, inc_iv,
+ (u8 *) tc->tag_gcm_128, 16, pt))
+ return clib_error_return (err, "incremental %u bytes: invalid tag",
+ tc->n_bytes);
+ }
+
+ return err;
+}
+
+perftest_aesXXX_dec_var_sz (128);
+
+REGISTER_TEST (clib_aes128_gcm_dec) = {
+ .name = "clib_aes128_gcm_dec",
+ .fn = test_clib_aes128_gcm_dec,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes128_dec_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 1 << 20,
+ .fn = perftest_aes128_dec_var_sz }),
+};
+
+static clib_error_t *
+test_clib_aes256_gcm_dec (clib_error_t *err)
+{
+ aes_gcm_key_data_t kd;
+ u8 pt[MAX_TEST_DATA_LEN];
+ u8 ct[MAX_TEST_DATA_LEN];
+ u8 tag[16];
+ int rv;
+
+ FOREACH_ARRAY_ELT (tc, test_cases)
+ {
+ clib_aes_gcm_key_expand (&kd, tc->key256, AES_KEY_256);
+ rv = clib_aes256_gcm_dec (&kd, tc->ct256, tc->data_len, tc->aad,
+ tc->aad_len, tc->iv, tc->tag256,
+ tc->tag256_len, pt);
+
+ if (!rv)
+ return clib_error_return (err, "%s: invalid tag", tc->name);
+
+ if (tc->data_len && memcmp (tc->pt, pt, tc->data_len) != 0)
+ return clib_error_return (err, "%s: invalid ciphertext", tc->name);
+ }
+
+ for (int i = 0; i < sizeof (pt); i++)
+ pt[i] = i;
+
+ clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_128);
+ clib_aes128_gcm_enc (&kd, pt, sizeof (ct), 0, 0, inc_iv, 16, ct, tag);
+
+ FOREACH_ARRAY_ELT (tc, inc_test_cases)
+ {
+ if (!clib_aes128_gcm_dec (&kd, ct, tc->n_bytes, 0, 0, inc_iv,
+ (u8 *) tc->tag_gcm_128, 16, pt))
+ return clib_error_return (err, "incremental %u bytes: invalid tag",
+ tc->n_bytes);
+ }
+
+ return err;
+}
+
+perftest_aesXXX_dec_var_sz (256);
+REGISTER_TEST (clib_aes256_gcm_dec) = {
+ .name = "clib_aes256_gcm_dec",
+ .fn = test_clib_aes256_gcm_dec,
+ .perf_tests = PERF_TESTS ({ .name = "variable size (per byte)",
+ .n_ops = 1424,
+ .fn = perftest_aes256_dec_var_sz },
+ { .name = "variable size (per byte)",
+ .n_ops = 1 << 20,
+ .fn = perftest_aes256_dec_var_sz }),
+};
+
+static const u8 gmac1_key[] = {
+ 0x77, 0xbe, 0x63, 0x70, 0x89, 0x71, 0xc4, 0xe2,
+ 0x40, 0xd1, 0xcb, 0x79, 0xe8, 0xd7, 0x7f, 0xeb
+};
+static const u8 gmac1_iv[] = { 0xe0, 0xe0, 0x0f, 0x19, 0xfe, 0xd7,
+ 0xba, 0x01, 0x36, 0xa7, 0x97, 0xf3 };
+static const u8 gmac1_aad[] = {
+ 0x7a, 0x43, 0xec, 0x1d, 0x9c, 0x0a, 0x5a, 0x78,
+ 0xa0, 0xb1, 0x65, 0x33, 0xa6, 0x21, 0x3c, 0xab
+};
+static const u8 gmac1_tag[] = {
+ 0x20, 0x9f, 0xcc, 0x8d, 0x36, 0x75, 0xed, 0x93,
+ 0x8e, 0x9c, 0x71, 0x66, 0x70, 0x9d, 0xd9, 0x46
+};
+
+static const u8 gmac2_key[] = {
+ 0x20, 0xb5, 0xb6, 0xb8, 0x54, 0xe1, 0x87, 0xb0,
+ 0x58, 0xa8, 0x4d, 0x57, 0xbc, 0x15, 0x38, 0xb6
+};
+
+static const u8 gmac2_iv[] = { 0x94, 0xc1, 0x93, 0x5a, 0xfc, 0x06,
+ 0x1c, 0xbf, 0x25, 0x4b, 0x93, 0x6f };
+
+static const u8 gmac2_aad[] = {
+ 0xca, 0x41, 0x8e, 0x71, 0xdb, 0xf8, 0x10, 0x03, 0x81, 0x74, 0xea, 0xa3, 0x71,
+ 0x9b, 0x3f, 0xcb, 0x80, 0x53, 0x1c, 0x71, 0x10, 0xad, 0x91, 0x92, 0xd1, 0x05,
+ 0xee, 0xaa, 0xfa, 0x15, 0xb8, 0x19, 0xac, 0x00, 0x56, 0x68, 0x75, 0x2b, 0x34,
+ 0x4e, 0xd1, 0xb2, 0x2f, 0xaf, 0x77, 0x04, 0x8b, 0xaf, 0x03, 0xdb, 0xdd, 0xb3,
+ 0xb4, 0x7d, 0x6b, 0x00, 0xe9, 0x5c, 0x4f, 0x00, 0x5e, 0x0c, 0xc9, 0xb7, 0x62,
+ 0x7c, 0xca, 0xfd, 0x3f, 0x21, 0xb3, 0x31, 0x2a, 0xa8, 0xd9, 0x1d, 0x3f, 0xa0,
+ 0x89, 0x3f, 0xe5, 0xbf, 0xf7, 0xd4, 0x4c, 0xa4, 0x6f, 0x23, 0xaf, 0xe0
+};
+
+static const u8 gmac2_tag[] = {
+ 0xb3, 0x72, 0x86, 0xeb, 0xaf, 0x4a, 0x54, 0xe0,
+ 0xff, 0xc2, 0xa1, 0xde, 0xaf, 0xc9, 0xf6, 0xdb
+};
+
+static const struct
+{
+ char *name;
+ const u8 *key128, *key256, *tag128, *tag256, *aad, *iv;
+ u32 tag128_len, tag256_len, aad_len;
+} gmac_test_cases[] = {
+ /* test cases */
+ {
+ .name = "GMAC1",
+ .iv = gmac1_iv,
+ .key128 = gmac1_key,
+ .tag128 = gmac1_tag,
+ .tag128_len = sizeof (gmac1_tag),
+ .aad = gmac1_aad,
+ .aad_len = sizeof (gmac1_aad),
+ },
+ {
+ .name = "GMAC2",
+ .iv = gmac2_iv,
+ .key128 = gmac2_key,
+ .tag128 = gmac2_tag,
+ .tag128_len = sizeof (gmac2_tag),
+ .aad = gmac2_aad,
+ .aad_len = sizeof (gmac2_aad),
+ },
+};
+
+static clib_error_t *
+test_clib_aes128_gmac (clib_error_t *err)
+{
+ u8 data[MAX_TEST_DATA_LEN];
+ aes_gcm_key_data_t kd;
+ u8 tag[16];
+
+ FOREACH_ARRAY_ELT (tc, gmac_test_cases)
+ {
+ clib_aes_gcm_key_expand (&kd, tc->key128, AES_KEY_128);
+ clib_aes128_gmac (&kd, tc->aad, tc->aad_len, tc->iv, tc->tag128_len,
+ tag);
+
+ if (memcmp (tc->tag128, tag, tc->tag128_len) != 0)
+ return clib_error_return (err, "%s: invalid tag", tc->name);
+ }
+
+ for (int i = 0; i < sizeof (data); i++)
+ data[i] = i;
+
+ clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_128);
+ FOREACH_ARRAY_ELT (tc, inc_test_cases)
+ {
+ clib_aes128_gmac (&kd, data, tc->n_bytes, inc_iv, 16, tag);
+
+ if (memcmp (tc->tag_gmac_128, tag, 16) != 0)
+ return clib_error_return (err, "incremental %u bytes: invalid tag",
+ tc->n_bytes);
+ }
+
+ return err;
+}
+
+void __test_perf_fn
+perftest_gmac256_fixed_512byte (test_perf_t *tp)
+{
+ uword n = tp->n_ops;
+ aes_gcm_key_data_t *kd = test_mem_alloc (sizeof (aes_gcm_key_data_t));
+ u8 *ivs = test_mem_alloc_and_fill_inc_u8 (n * 12, 0, 0);
+ u8 *tags = test_mem_alloc_and_fill_inc_u8 (8 + n * 16, 0, 0);
+ u8 *data = test_mem_alloc_and_fill_inc_u8 (512, 0, 0);
+
+ test_perf_event_enable (tp);
+ clib_aes_gcm_key_expand (kd, inc_key, AES_KEY_128);
+
+ for (int i = 0; i < n; i++)
+ clib_aes128_gmac (kd, data, 512, ivs + n * 12, 16, tags + n * 16);
+ test_perf_event_disable (tp);
+}
+
+REGISTER_TEST (clib_aes128_gmac) = {
+ .name = "clib_aes128_gmac",
+ .fn = test_clib_aes128_gmac,
+ .perf_tests = PERF_TESTS ({ .name = "fixed (512 byte)",
+ .n_ops = 256,
+ .fn = perftest_gmac256_fixed_512byte }),
+};
+
+static clib_error_t *
+test_clib_aes256_gmac (clib_error_t *err)
+{
+ u8 data[MAX_TEST_DATA_LEN];
+ aes_gcm_key_data_t kd;
+ u8 tag[16];
+
+#if 0
+ FOREACH_ARRAY_ELT (tc, gmac_test_cases)
+ {
+ clib_aes_gcm_key_expand (&kd, tc->key256, AES_KEY_256);
+ clib_aes256_gmac (&kd, tc->aad, tc->aad_len, tc->iv, tc->tag256_len,
+ tag);
+
+ if (memcmp (tc->tag256, tag, tc->tag256_len) != 0)
+ return clib_error_return (err, "%s: invalid tag", tc->name);
+ }
+#endif
+
+ for (int i = 0; i < sizeof (data); i++)
+ data[i] = i;
+
+ clib_aes_gcm_key_expand (&kd, inc_key, AES_KEY_256);
+ FOREACH_ARRAY_ELT (tc, inc_test_cases)
+ {
+ clib_aes256_gmac (&kd, data, tc->n_bytes, inc_iv, 16, tag);
+
+ if (memcmp (tc->tag_gmac_256, tag, 16) != 0)
+ return clib_error_return (err, "incremental %u bytes: invalid tag",
+ tc->n_bytes);
+ }
+
+ return err;
+}
+
+REGISTER_TEST (clib_aes256_gmac) = {
+ .name = "clib_aes256_gmac",
+ .fn = test_clib_aes256_gmac,
+};
+#endif
diff --git a/src/vppinfra/vector/test/array_mask.c b/src/vppinfra/test/array_mask.c
index 703c70abbe9..4d8fc7c59e2 100644
--- a/src/vppinfra/vector/test/array_mask.c
+++ b/src/vppinfra/test/array_mask.c
@@ -3,10 +3,10 @@
*/
#include <vppinfra/format.h>
-#include <vppinfra/vector/test/test.h>
+#include <vppinfra/test/test.h>
#include <vppinfra/vector/array_mask.h>
-__clib_test_fn void
+__test_funct_fn void
clib_array_mask_u32_wrapper (u32 *src, u32 mask, u32 n_elts)
{
clib_array_mask_u32 (src, mask, n_elts);
@@ -76,13 +76,15 @@ static array_mask_test_t tests[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } },
+ /* mask values 0x1, output array of 1, 0, 1, 0,.. */
+ { .mask = 1, .expected = { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0 } },
};
static clib_error_t *
test_clib_array_mask_u32 (clib_error_t *err)
{
- u32 i, j;
- for (i = 0; i < ARRAY_LEN (tests); i++)
+ u32 i, j, len;
+ for (i = 0; i < ARRAY_LEN (tests) - 1; i++)
{
u32 src[256];
for (j = 0; j < ARRAY_LEN (src); j++)
@@ -99,6 +101,41 @@ test_clib_array_mask_u32 (clib_error_t *err)
i, j, src[j], t->expected[j]);
}
}
+
+ for (i = 0; i < ARRAY_LEN (tests) - 1; i++)
+ {
+ for (len = 1; len <= 256; len++)
+ {
+ u32 src[len];
+ for (j = 0; j < ARRAY_LEN (src); j++)
+ src[j] = j;
+
+ array_mask_test_t *t = tests + i;
+ clib_array_mask_u32_wrapper (src, t->mask, ARRAY_LEN (src));
+ for (j = 0; j < ARRAY_LEN (src); j++)
+ {
+ if (src[j] != t->expected[j])
+ return clib_error_return (err,
+ "testcase %u failed at "
+ "(src[%u] = 0x%x, expected 0x%x)",
+ i, j, src[j], t->expected[j]);
+ }
+ }
+ }
+
+ u32 src[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+ array_mask_test_t *t = tests + i;
+
+ clib_array_mask_u32_wrapper (src, t->mask, ARRAY_LEN (src));
+ for (j = 0; j < ARRAY_LEN (src); j++)
+ {
+ if (src[j] != t->expected[j])
+ return clib_error_return (err,
+ "testcase %u failed at "
+ "(src[%u] = 0x%x, expected 0x%x)",
+ i, j, src[j], t->expected[j]);
+ }
+
return err;
}
diff --git a/src/vppinfra/test/compress.c b/src/vppinfra/test/compress.c
new file mode 100644
index 00000000000..083065f9bda
--- /dev/null
+++ b/src/vppinfra/test/compress.c
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/vector/compress.h>
+
+__test_funct_fn u32
+clib_compress_u64_wrapper (u64 *dst, u64 *src, u64 *mask, u32 n_elts)
+{
+ return clib_compress_u64 (dst, src, mask, n_elts);
+}
+
+__test_funct_fn u32
+clib_compress_u32_wrapper (u32 *dst, u32 *src, u64 *mask, u32 n_elts)
+{
+ return clib_compress_u32 (dst, src, mask, n_elts);
+}
+
+__test_funct_fn u32
+clib_compress_u16_wrapper (u16 *dst, u16 *src, u64 *mask, u32 n_elts)
+{
+ return clib_compress_u16 (dst, src, mask, n_elts);
+}
+
+__test_funct_fn u32
+clib_compress_u8_wrapper (u8 *dst, u8 *src, u64 *mask, u32 n_elts)
+{
+ return clib_compress_u8 (dst, src, mask, n_elts);
+}
+
+typedef struct
+{
+ u64 mask[10];
+ u32 n_elts;
+} compress_test_t;
+
+static compress_test_t tests[] = {
+ { .mask = { 1 }, .n_elts = 1 },
+ { .mask = { 2 }, .n_elts = 2 },
+ { .mask = { 3 }, .n_elts = 2 },
+ { .mask = { 0, 1 }, .n_elts = 66 },
+ { .mask = { 0, 2 }, .n_elts = 69 },
+ { .mask = { 0, 3 }, .n_elts = 66 },
+ { .mask = { ~0ULL, ~0ULL, ~0ULL, ~0ULL }, .n_elts = 62 },
+ { .mask = { ~0ULL, ~0ULL, ~0ULL, ~0ULL }, .n_elts = 255 },
+ { .mask = { ~0ULL, 1, 1, ~0ULL }, .n_elts = 256 },
+};
+
+static clib_error_t *
+test_clib_compress_u64 (clib_error_t *err)
+{
+ u32 i, j;
+
+ for (i = 0; i < ARRAY_LEN (tests); i++)
+ {
+ compress_test_t *t = tests + i;
+ u64 src[t->n_elts];
+#ifdef CLIB_SANITIZE_ADDR
+ u64 dst[t->n_elts];
+#else /* CLIB_SANITIZE_ADDR */
+ u64 dst[513];
+#endif /* CLIB_SANITIZE_ADDR */
+ u64 *dp = dst;
+ u32 r;
+ for (j = 0; j < t->n_elts; j++)
+ src[j] = j;
+
+ for (j = 0; j < ARRAY_LEN (dst); j++)
+ dst[j] = 0xa5a5a5a5a5a5a5a5;
+
+ r = clib_compress_u64_wrapper (dst, src, t->mask, t->n_elts);
+
+ for (j = 0; j < t->n_elts; j++)
+ {
+ if ((t->mask[j >> 6] & (1ULL << (j & 0x3f))) == 0)
+ continue;
+ if (dp[0] != src[j])
+ return clib_error_return (err,
+ "wrong data in testcase %u at "
+ "(dst[%u] = 0x%lx, src[%u] = 0x%lx)",
+ i, dp - dst, dp[0], j, src[j]);
+ dp++;
+ }
+
+#ifndef CLIB_SANITIZE_ADDR
+ if (dst[dp - dst + 1] != 0xa5a5a5a5a5a5a5a5)
+ return clib_error_return (err, "buffer overrun in testcase %u", i);
+#endif /* CLIB_SANITIZE_ADDR */
+
+ if (dp - dst != r)
+ return clib_error_return (err, "wrong number of elts in testcase %u",
+ i);
+ }
+
+ return err;
+}
+
+static clib_error_t *
+test_clib_compress_u32 (clib_error_t *err)
+{
+ u32 i, j;
+
+ for (i = 0; i < ARRAY_LEN (tests); i++)
+ {
+ compress_test_t *t = tests + i;
+ u32 src[t->n_elts];
+#ifdef CLIB_SANITIZE_ADDR
+ u32 dst[t->n_elts];
+#else /* CLIB_SANITIZE_ADDR */
+ u32 dst[513];
+#endif /* CLIB_SANITIZE_ADDR */
+ u32 *dp = dst;
+ u32 r;
+ for (j = 0; j < t->n_elts; j++)
+ src[j] = j;
+
+ for (j = 0; j < ARRAY_LEN (dst); j++)
+ dst[j] = 0xa5a5a5a5;
+
+ r = clib_compress_u32_wrapper (dst, src, t->mask, t->n_elts);
+
+ for (j = 0; j < t->n_elts; j++)
+ {
+ if ((t->mask[j >> 6] & (1ULL << (j & 0x3f))) == 0)
+ continue;
+
+ if (dp[0] != src[j])
+ return clib_error_return (err,
+ "wrong data in testcase %u at "
+ "(dst[%u] = 0x%x, src[%u] = 0x%x)",
+ i, dp - dst, dp[0], j, src[j]);
+ dp++;
+ }
+
+#ifndef CLIB_SANITIZE_ADDR
+ if (dst[dp - dst + 1] != 0xa5a5a5a5)
+ return clib_error_return (err, "buffer overrun in testcase %u", i);
+#endif /* CLIB_SANITIZE_ADDR */
+
+ if (dp - dst != r)
+ return clib_error_return (err, "wrong number of elts in testcase %u",
+ i);
+ }
+
+ return err;
+}
+
+static clib_error_t *
+test_clib_compress_u16 (clib_error_t *err)
+{
+ u32 i, j;
+
+ for (i = 0; i < ARRAY_LEN (tests); i++)
+ {
+ compress_test_t *t = tests + i;
+ u16 src[t->n_elts];
+#ifdef CLIB_SANITIZE_ADDR
+ u16 dst[t->n_elts];
+#else /* CLIB_SANITIZE_ADDR */
+ u16 dst[513];
+#endif /* CLIB_SANITIZE_ADDR */
+ u16 *dp = dst;
+ u32 r;
+ for (j = 0; j < t->n_elts; j++)
+ src[j] = j;
+
+ for (j = 0; j < ARRAY_LEN (dst); j++)
+ dst[j] = 0xa5a5;
+
+ r = clib_compress_u16_wrapper (dst, src, t->mask, t->n_elts);
+
+ for (j = 0; j < t->n_elts; j++)
+ {
+ if ((t->mask[j >> 6] & (1ULL << (j & 0x3f))) == 0)
+ continue;
+ if (dp[0] != src[j])
+ return clib_error_return (err,
+ "wrong data in testcase %u at "
+ "(dst[%u] = 0x%x, src[%u] = 0x%x)",
+ i, dp - dst, dp[0], j, src[j]);
+ dp++;
+ }
+
+#ifndef CLIB_SANITIZE_ADDR
+ if (dst[dp - dst + 1] != 0xa5a5)
+ return clib_error_return (err, "buffer overrun in testcase %u", i);
+#endif /* CLIB_SANITIZE_ADDR */
+
+ if (dp - dst != r)
+ return clib_error_return (err, "wrong number of elts in testcase %u",
+ i);
+ }
+
+ return err;
+}
+
+static clib_error_t *
+test_clib_compress_u8 (clib_error_t *err)
+{
+ u32 i, j;
+
+ for (i = 0; i < ARRAY_LEN (tests); i++)
+ {
+ compress_test_t *t = tests + i;
+ u8 src[t->n_elts];
+#ifdef CLIB_SANITIZE_ADDR
+ u8 dst[t->n_elts];
+#else /* CLIB_SANITIZE_ADDR */
+ u8 dst[513];
+#endif /* CLIB_SANITIZE_ADDR */
+ u8 *dp = dst;
+ u32 r;
+ for (j = 0; j < t->n_elts; j++)
+ src[j] = j;
+
+ for (j = 0; j < ARRAY_LEN (dst); j++)
+ dst[j] = 0xa5;
+
+ r = clib_compress_u8_wrapper (dst, src, t->mask, t->n_elts);
+
+ for (j = 0; j < t->n_elts; j++)
+ {
+ if ((t->mask[j >> 6] & (1ULL << (j & 0x3f))) == 0)
+ continue;
+ if (dp[0] != src[j])
+ return clib_error_return (err,
+ "wrong data in testcase %u at "
+ "(dst[%u] = 0x%x, src[%u] = 0x%x)",
+ i, dp - dst, dp[0], j, src[j]);
+ dp++;
+ }
+
+#ifndef CLIB_SANITIZE_ADDR
+ if (dst[dp - dst + 1] != 0xa5)
+ return clib_error_return (err, "buffer overrun in testcase %u", i);
+#endif /* CLIB_SANITIZE_ADDR */
+
+ if (dp - dst != r)
+ return clib_error_return (err, "wrong number of elts in testcase %u",
+ i);
+ }
+
+ return err;
+}
+
+REGISTER_TEST (clib_compress_u64) = {
+ .name = "clib_compress_u64",
+ .fn = test_clib_compress_u64,
+};
+
+REGISTER_TEST (clib_compress_u32) = {
+ .name = "clib_compress_u32",
+ .fn = test_clib_compress_u32,
+};
+
+REGISTER_TEST (clib_compress_u16) = {
+ .name = "clib_compress_u16",
+ .fn = test_clib_compress_u16,
+};
+
+REGISTER_TEST (clib_compress_u8) = {
+ .name = "clib_compress_u8",
+ .fn = test_clib_compress_u8,
+};
diff --git a/src/vppinfra/test/count_equal.c b/src/vppinfra/test/count_equal.c
new file mode 100644
index 00000000000..942c2203d3d
--- /dev/null
+++ b/src/vppinfra/test/count_equal.c
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/vector/count_equal.h>
+
+#define foreach_clib_count_equal(type) \
+ typedef uword (wrapper_fn_##type) (type * a, uword maxcount); \
+ \
+ __test_funct_fn uword clib_count_equal_##type##_wrapper (type *a, \
+ uword maxcount) \
+ { \
+ return clib_count_equal_##type (a, maxcount); \
+ } \
+ \
+ static wrapper_fn_##type *wfn_##type = &clib_count_equal_##type##_wrapper; \
+ static clib_error_t *test_clib_count_equal_##type (clib_error_t *err) \
+ { \
+ u32 ps = clib_mem_get_log2_page_size (); \
+ void *map; \
+ \
+ u16 lengths[] = { \
+ 1, 2, 3, 5, 7, 9, 15, 16, 17, 31, 32, 33, 255, 256, 257 \
+ }; \
+ type *data; \
+ \
+ map = clib_mem_vm_map (0, 2ULL << ps, ps, "test"); \
+ if (map == CLIB_MEM_VM_MAP_FAILED) \
+ return clib_error_return (err, "clib_mem_vm_map failed"); \
+ \
+ data = ((type *) (map + (1ULL << ps))); \
+ data[-1] = 0xfe; \
+ \
+ mprotect (data, 1ULL < ps, PROT_NONE); \
+ \
+ for (u8 d = 0; d < 255; d++) \
+ { \
+ for (int i = 1; i <= (1 << ps) / sizeof (data[0]); i++) \
+ data[-i] = d; \
+ for (int i = 0; i < ARRAY_LEN (lengths); i++) \
+ { \
+ uword rv, len = lengths[i]; \
+ \
+ if ((rv = wfn_##type (data - len, len)) != len) \
+ { \
+ err = clib_error_return ( \
+ err, "testcase 1 failed for len %u data %u(rv %u)", len, d, \
+ rv); \
+ goto done; \
+ } \
+ \
+ data[-1] = d + 1; \
+ if (len > 1 && ((rv = wfn_##type (data - len, len)) != len - 1)) \
+ { \
+ err = clib_error_return ( \
+ err, "testcase 2 failed for len %u data %u (rv %u)", len, \
+ d, rv); \
+ goto done; \
+ } \
+ data[-1] = d; \
+ \
+ data[-2] = d + 1; \
+ if (len > 2 && ((rv = wfn_##type (data - len, len)) != len - 2)) \
+ { \
+ err = clib_error_return ( \
+ err, "testcase 3 failed for len %u data %u (rv %u)", len, \
+ d, rv); \
+ goto done; \
+ } \
+ data[-2] = d; \
+ } \
+ } \
+ \
+ done: \
+ clib_mem_vm_unmap (map); \
+ return err; \
+ }
+
+foreach_clib_count_equal (u8);
+foreach_clib_count_equal (u16);
+foreach_clib_count_equal (u32);
+foreach_clib_count_equal (u64);
+
+REGISTER_TEST (clib_count_equal_u8) = {
+ .name = "clib_count_equal_u8",
+ .fn = test_clib_count_equal_u8,
+};
+
+REGISTER_TEST (clib_count_equal_u16) = {
+ .name = "clib_count_equal_u16",
+ .fn = test_clib_count_equal_u16,
+};
+
+REGISTER_TEST (clib_count_equal_u32) = {
+ .name = "clib_count_equal_u32",
+ .fn = test_clib_count_equal_u32,
+};
+
+REGISTER_TEST (clib_count_equal_u64) = {
+ .name = "clib_count_equal_u64",
+ .fn = test_clib_count_equal_u64,
+};
diff --git a/src/vppinfra/test/crc32c.c b/src/vppinfra/test/crc32c.c
new file mode 100644
index 00000000000..8c0c691e2e8
--- /dev/null
+++ b/src/vppinfra/test/crc32c.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/crc32.h>
+
+#ifndef CLIB_MARCH_VARIANT
+char *crc32c_test_string =
+ "The quick brown fox jumped over the lazy dog and stumbled.";
+u32 crc32c_test_values_data[] = {
+ 0x00000000, 0x96bf4dcc, 0x65479df4, 0x60a63889, 0xda99c852, 0x3337e4e2,
+ 0x4651af18, 0x83b586a1, 0x2235e3b5, 0x7f896b6f, 0x1f17a8f3, 0x60dc68bc,
+ 0x6f95458b, 0x24c5aa40, 0xe40de8f0, 0x3e344ed8, 0x798903f4, 0x73ea05e3,
+ 0xcfc61ead, 0xe6ed33a9, 0xfaa20d87, 0x5ce246c4, 0x4022138c, 0x111b090a,
+ 0x1a6b673c, 0x298d6a78, 0x5d3485d5, 0xc6c24fec, 0x91600ac3, 0x877506df,
+ 0xd9702ff7, 0xb7de5f4b, 0xf8f8e606, 0x905bdc1c, 0xb69298ce, 0x3b748c05,
+ 0x1577ee4e, 0xc19389c7, 0x842bc1c7, 0x0db915db, 0x437d7c44, 0xa61f7901,
+ 0x54919807, 0xeb4b5a35, 0xb0f5e17e, 0xfded9015, 0xb6ff2e82, 0xaec598e4,
+ 0x8258fee0, 0xc30f7e3a, 0x390ac90e, 0x1a4376fc, 0xfa5ea3c2, 0xfca2d721,
+ 0x52d74c9f, 0xe06c4bcd, 0x28728122, 0x67f288d5, 0
+};
+u32 *crc32c_test_values = crc32c_test_values_data;
+
+#else
+extern char *crc32c_test_string;
+extern u32 *crc32c_test_values;
+#endif
+
+static clib_error_t *
+test_clib_crc32c (clib_error_t *err)
+{
+ int max_len = strlen (crc32c_test_string);
+ int i;
+ for (i = 0; i < max_len; i++)
+ {
+ u32 expected_crc32c = crc32c_test_values[i];
+ u32 calculated_crc32 = clib_crc32c ((u8 *) crc32c_test_string, i);
+ if (expected_crc32c != calculated_crc32)
+ {
+ return clib_error_return (
+ err,
+ "Bad CRC32C for test case %d: expected 0x%08x, calculated: 0x%08x",
+ i, expected_crc32c, calculated_crc32);
+ }
+ }
+ return err;
+}
+
+REGISTER_TEST (clib_crc32c) = {
+ .name = "clib_crc32c",
+ .fn = test_clib_crc32c,
+};
diff --git a/src/vppinfra/test/index_to_ptr.c b/src/vppinfra/test/index_to_ptr.c
new file mode 100644
index 00000000000..06b621c10ff
--- /dev/null
+++ b/src/vppinfra/test/index_to_ptr.c
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/vector/index_to_ptr.h>
+
+typedef void (wrapper_fn) (u32 *indices, void *base, u8 shift, void **ptrs,
+ u32 n_elts);
+
+__test_funct_fn void
+clib_index_to_ptr_u32_wrapper (u32 *indices, void *base, u8 shift, void **ptrs,
+ u32 n_elts)
+{
+ clib_index_to_ptr_u32 (indices, base, shift, ptrs, n_elts);
+}
+
+static wrapper_fn *wfn = &clib_index_to_ptr_u32_wrapper;
+
+static clib_error_t *
+test_clib_index_to_ptr_u32 (clib_error_t *err)
+{
+ void *_ptrs[512 + 128], **ptrs = _ptrs + 64;
+ u32 _indices[512 + 128], *indices = _indices + 64;
+ u16 lengths[] = { 1, 3, 5, 7, 9, 15, 16, 17, 31, 32,
+ 33, 40, 41, 42, 63, 64, 65, 511, 512 };
+
+ for (int i = 0; i < ARRAY_LEN (_indices); i++)
+ _indices[i] = i;
+
+ for (int i = 0; i < ARRAY_LEN (lengths); i++)
+ {
+ u16 len = lengths[i];
+ u8 shift = 6;
+ void *base = (void *) 0x100000000 + i;
+
+ for (int j = -64; j < len + 64; j++)
+ ptrs[j] = (void *) 0xfefefefefefefefe;
+
+ wfn (indices, base, shift, ptrs, len);
+ for (int j = 0; j < len; j++)
+ {
+ void *expected = base + ((u64) indices[j] << shift);
+ if (ptrs[j] != expected)
+ return clib_error_return (err,
+ "testcase failed for length %u "
+ "(offset %u, expected %p, found %p)",
+ len, j, expected, ptrs[j]);
+ }
+ }
+ return err;
+}
+
+REGISTER_TEST (clib_index_to_ptr_u32) = {
+ .name = "clib_index_to_ptr_u32",
+ .fn = test_clib_index_to_ptr_u32,
+};
diff --git a/src/vppinfra/test/ip_csum.c b/src/vppinfra/test/ip_csum.c
new file mode 100644
index 00000000000..b8508ee449d
--- /dev/null
+++ b/src/vppinfra/test/ip_csum.c
@@ -0,0 +1,169 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/vector/ip_csum.h>
+
+typedef struct
+{
+ struct
+ {
+ u8 *src;
+ u32 count;
+ } chunk[5];
+ u16 result;
+} ip_csum_test_t;
+
+static u8 test1[] = { 0x45, 0x00, 0x00, 0x73, 0x00, 0x00, 0x40,
+ 0x00, 0x40, 0x11, 0x00, 0x00, 0xc0, 0xa8,
+ 0x00, 0x01, 0xc0, 0xa8, 0x00, 0xc7, 0x00 };
+#define TEST_LEN(x) (ARRAY_LEN (x) - 1)
+
+static ip_csum_test_t tests[] = { {
+ .chunk[0].src = test1,
+ .chunk[0].count = TEST_LEN (test1),
+ .result = 0x61b8,
+ },
+ {
+ .chunk[0].src = test1,
+ .chunk[0].count = 1,
+ .chunk[1].src = test1 + 1,
+ .chunk[1].count = 2,
+ .chunk[2].src = test1 + 3,
+ .chunk[2].count = 3,
+ .chunk[3].src = test1 + 6,
+ .chunk[3].count = 4,
+ .chunk[4].src = test1 + 10,
+ .chunk[4].count = TEST_LEN (test1) - 10,
+ .result = 0x61b8,
+ },
+ {
+ .chunk[0].count = 1,
+ .result = 0xff0f,
+ },
+ {
+ .chunk[0].count = 2,
+ .result = 0x080f,
+ },
+ {
+ .chunk[0].count = 3,
+ .result = 0x0711,
+ },
+ {
+ .chunk[0].count = 4,
+ .result = 0x1210,
+ },
+ {
+ .chunk[0].count = 63,
+ .result = 0xda01,
+ },
+ {
+ .chunk[0].count = 64,
+ .result = 0xe100,
+ },
+ {
+ .chunk[0].count = 65,
+ .result = 0xe010,
+ },
+ {
+ .chunk[0].count = 65535,
+ .result = 0xfc84,
+ },
+ {
+ .chunk[0].count = 65536,
+ .result = 0xffff,
+ } };
+
+static clib_error_t *
+test_clib_ip_csum (clib_error_t *err)
+{
+ u8 *buf;
+ buf = test_mem_alloc (65536);
+ for (int i = 0; i < 65536; i++)
+ buf[i] = 0xf0 + ((i * 7) & 0xf);
+
+ for (int i = 0; i < ARRAY_LEN (tests); i++)
+ {
+ clib_ip_csum_t c = {};
+ ip_csum_test_t *t = tests + i;
+ u16 rv;
+
+ for (int j = 0; j < ARRAY_LEN (((ip_csum_test_t *) 0)->chunk); j++)
+ if (t->chunk[j].count > 0)
+ {
+ if (t->chunk[j].src == 0)
+ clib_ip_csum_chunk (&c, buf, t->chunk[j].count);
+ else
+ clib_ip_csum_chunk (&c, t->chunk[j].src, t->chunk[j].count);
+ }
+ rv = clib_ip_csum_fold (&c);
+
+ if (rv != tests[i].result)
+ {
+ err = clib_error_return (err,
+ "bad checksum in test case %u (expected "
+ "0x%04x, calculated 0x%04x)",
+ i, tests[i].result, rv);
+ goto done;
+ }
+ }
+done:
+ return err;
+}
+
+void __test_perf_fn
+perftest_ip4_hdr (test_perf_t *tp)
+{
+ u32 n = tp->n_ops;
+ u8 *data = test_mem_alloc_and_splat (20, n, (void *) &test1);
+ u16 *res = test_mem_alloc (n * sizeof (u16));
+
+ test_perf_event_enable (tp);
+ for (int i = 0; i < n; i++)
+ res[i] = clib_ip_csum (data + i * 20, 20);
+ test_perf_event_disable (tp);
+}
+
+void __test_perf_fn
+perftest_tcp_payload (test_perf_t *tp)
+{
+ u32 n = tp->n_ops;
+ volatile uword *lenp = &tp->arg0;
+ u8 *data = test_mem_alloc_and_splat (20, n, (void *) &test1);
+ u16 *res = test_mem_alloc (n * sizeof (u16));
+
+ test_perf_event_enable (tp);
+ for (int i = 0; i < n; i++)
+ res[i] = clib_ip_csum (data + i * lenp[0], lenp[0]);
+ test_perf_event_disable (tp);
+}
+
+void __test_perf_fn
+perftest_byte (test_perf_t *tp)
+{
+ volatile uword *np = &tp->n_ops;
+ u8 *data = test_mem_alloc_and_fill_inc_u8 (*np, 0, 0);
+ u16 *res = test_mem_alloc (sizeof (u16));
+
+ test_perf_event_enable (tp);
+ res[0] = clib_ip_csum (data, np[0]);
+ test_perf_event_disable (tp);
+}
+
+REGISTER_TEST (clib_ip_csum) = {
+ .name = "clib_ip_csum",
+ .fn = test_clib_ip_csum,
+ .perf_tests = PERF_TESTS (
+ { .name = "fixed size (per IPv4 Header)",
+ .n_ops = 1024,
+ .fn = perftest_ip4_hdr },
+ { .name = "fixed size (per 1460 byte block)",
+ .n_ops = 16,
+ .arg0 = 1460,
+ .fn = perftest_tcp_payload },
+ { .name = "variable size (per byte)", .n_ops = 16384, .fn = perftest_byte }
+
+ ),
+};
diff --git a/src/vppinfra/vector/test/mask_compare.c b/src/vppinfra/test/mask_compare.c
index 64df0ee084a..738b0082dd7 100644
--- a/src/vppinfra/vector/test/mask_compare.c
+++ b/src/vppinfra/test/mask_compare.c
@@ -3,21 +3,27 @@
*/
#include <vppinfra/format.h>
-#include <vppinfra/vector/test/test.h>
+#include <vppinfra/test/test.h>
#include <vppinfra/vector/mask_compare.h>
-__clib_test_fn void
+__test_funct_fn void
clib_mask_compare_u16_wrapper (u16 v, u16 *a, u64 *mask, u32 n_elts)
{
clib_mask_compare_u16 (v, a, mask, n_elts);
}
-__clib_test_fn void
+__test_funct_fn void
clib_mask_compare_u32_wrapper (u32 v, u32 *a, u64 *mask, u32 n_elts)
{
clib_mask_compare_u32 (v, a, mask, n_elts);
}
+__test_funct_fn void
+clib_mask_compare_u64_wrapper (u64 v, u64 *a, u64 *mask, u64 n_elts)
+{
+ clib_mask_compare_u64 (v, a, mask, n_elts);
+}
+
static clib_error_t *
test_clib_mask_compare_u16 (clib_error_t *err)
{
@@ -93,3 +99,41 @@ REGISTER_TEST (clib_mask_compare_u32) = {
.name = "clib_mask_compare_u32",
.fn = test_clib_mask_compare_u32,
};
+
+static clib_error_t *
+test_clib_mask_compare_u64 (clib_error_t *err)
+{
+ u64 array[513];
+ u64 mask[10];
+ u32 i, j;
+
+ for (i = 0; i < ARRAY_LEN (array); i++)
+ array[i] = i;
+
+ for (i = 0; i < ARRAY_LEN (array); i++)
+ {
+ for (j = 0; j < ARRAY_LEN (mask); j++)
+ mask[j] = 0xa5a5a5a5a5a5a5a5;
+
+ clib_mask_compare_u64_wrapper (i, array, mask, i + 1);
+
+ for (j = 0; j < (i >> 6); j++)
+ {
+ if (mask[j])
+ return clib_error_return (err, "mask at position %u not zero", j);
+ }
+ if (mask[j] != 1ULL << (i & 0x3f))
+ return clib_error_return (err,
+ "mask at position %u is %lx, expected %lx",
+ j, mask[j], 1ULL << (i % 64));
+
+ if (mask[j + 1] != 0xa5a5a5a5a5a5a5a5)
+ return clib_error_return (err, "mask overrun at position %u", j + 1);
+ }
+ return err;
+}
+
+REGISTER_TEST (clib_mask_compare_u64) = {
+ .name = "clib_mask_compare_u64",
+ .fn = test_clib_mask_compare_u64,
+};
diff --git a/src/vppinfra/test/memcpy_x86_64.c b/src/vppinfra/test/memcpy_x86_64.c
new file mode 100644
index 00000000000..41855c39241
--- /dev/null
+++ b/src/vppinfra/test/memcpy_x86_64.c
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifdef __x86_64__
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/memcpy_x86_64.h>
+
+__test_funct_fn void
+wrapper (u8 *dst, u8 *src, uword n)
+{
+ clib_memcpy_x86_64 (dst, src, n);
+}
+
+/* clang-format off */
+#define foreach_const_n \
+ _(1) _(2) _(3) _(4) _(5) _(6) _(7) _(8) _(9) _(10) _(11) _(12) _(13) _(14) \
+ _(15) _(16) _(17) _(18) _(19) _(20) _(21) _(22) _(23) _(24) _(25) _(26) \
+ _(27) _(28) _(29) _(30) _(31) _(32) _(33) _(34) _(35) _(36) _(37) _(38) \
+ _(39) _(40) _(41) _(42) _(43) _(44) _(45) _(46) _(47) _(48) _(49) _(50) \
+ _(51) _(52) _(53) _(54) _(55) _(56) _(57) _(58) _(59) _(60) _(61) _(62) \
+ _(63) _(64) _(65) _(66) _(67) _(68) _(69) _(70) _(71) _(72) _(73) _(74) \
+ _(75) _(76) _(77) _(78) _(79) _(80) _(81) _(82) _(83) _(84) _(85) _(86) \
+ _(87) _(88) _(89) _(90) _(91) _(92) _(93) _(94) _(95) _(96) _(97) _(98) \
+ _(99) _(100) _(101) _(102) _(103) _(104) _(105) _(106) _(107) _(108) \
+ _(109) _(110) _(111) _(112) _(113) _(114) _(115) _(116) _(117) _(118) \
+ _(119) _(120) _(121) _(122) _(123) _(124) _(125) _(126) _(127) _(128) \
+ _(129) _(130) _(131) _(132) _(133) _(134) _(135) _(136) _(137) _(138) \
+ _(139) _(140) _(141) _(142) _(143) _(144) _(145) _(146) _(147) _(148) \
+ _(149) _(150) _(151) _(152) _(153) _(154) _(155) _(156) _(157) _(158) \
+ _(159) _(160) _(161) _(162) _(163) _(164) _(165) _(166) _(167) _(168) \
+ _(169) _(170) _(171) _(172) _(173) _(174) _(175) _(176) _(177) _(178) \
+ _(179) _(180) _(181) _(182) _(183) _(184) _(185) _(186) _(187) _(188) \
+ _(189) _(190) _(191) _(192) _(193) _(194) _(195) _(196) _(197) _(198) \
+ _(199) _(200) _(201) _(202) _(203) _(204) _(205) _(206) _(207) _(208) \
+ _(209) _(210) _(211) _(212) _(213) _(214) _(215) _(216) _(217) _(218) \
+ _(219) _(220) _(221) _(222) _(223) _(224) _(225) _(226) _(227) _(228) \
+ _(229) _(230) _(231) _(232) _(233) _(234) _(235) _(236) _(237) _(238) \
+ _(239) _(240) _(241) _(242) _(243) _(244) _(245) _(246) _(247) _(248) \
+ _(249) _(250) _(251) _(252) _(253) _(254) _(255)
+/* clang-format on */
+
+#define _(n) \
+ static __clib_noinline void wrapper##n (u8 *dst, u8 *src) \
+ { \
+ clib_memcpy_x86_64 (dst, src, n); \
+ }
+
+foreach_const_n;
+#undef _
+
+typedef void (const_fp_t) (u8 *dst, u8 *src);
+typedef struct
+{
+ u16 len;
+ const_fp_t *fp;
+} counst_test_t;
+
+static counst_test_t const_tests[] = {
+#define _(n) { .fp = wrapper##n, .len = n },
+ foreach_const_n
+#undef _
+};
+
+#define MAX_LEN 1024
+
+static clib_error_t *
+validate_one (clib_error_t *err, u8 *d, u8 *s, u16 n, u8 off, int is_const)
+{
+ for (int i = 0; i < n; i++)
+ if (d[i] != s[i])
+ return clib_error_return (err,
+ "memcpy error at position %d "
+ "(n = %u, off = %u, expected 0x%02x "
+ "found 0x%02x%s)",
+ i, n, off, s[i], d[i],
+ is_const ? ", const" : "");
+ for (int i = -64; i < 0; i++)
+ if (d[i] != 0xfe)
+ return clib_error_return (err,
+ "buffer underrun at position %d "
+ "(n = %u, off = %u, expected 0xfe "
+ "found 0x%02x%s)",
+ i, n, off, d[i], is_const ? ", const" : "");
+ for (int i = n; i < n + 64; i++)
+ if (d[i] != 0xfe)
+ return clib_error_return (err,
+ "buffer overrun at position %d "
+ "(n = %u, off = %u, expected 0xfe "
+ "found 0x%02x%s)",
+ i, n, off, d[i], is_const ? ", const" : "");
+ return err;
+}
+
+static clib_error_t *
+test_clib_memcpy_x86_64 (clib_error_t *err)
+{
+ u8 src[MAX_LEN + 192];
+ u8 dst[MAX_LEN + 192];
+
+ for (int i = 0; i < ARRAY_LEN (src); i++)
+ src[i] = i & 0x7f;
+
+ for (int j = 0; j < ARRAY_LEN (const_tests); j++)
+ {
+ u8 *d = dst + 64;
+ u8 *s = src + 64;
+ u16 n = const_tests[j].len;
+
+ for (int i = 0; i < 128 + n; i++)
+ dst[i] = 0xfe;
+ const_tests[j].fp (d, s);
+ if ((err = validate_one (err, d, s, n, 0, /* is_const */ 1)))
+ return err;
+ }
+
+ for (u16 n = 1; n <= MAX_LEN; n++)
+ {
+ for (int off = 0; off < 64; off += 7)
+ {
+ u8 *d = dst + 64 + off;
+ u8 *s = src + 64;
+
+ for (int i = 0; i < 128 + n + off; i++)
+ dst[i] = 0xfe;
+
+ wrapper (d, s, n);
+
+ if ((err = validate_one (err, d, s, n, off, /* is_const */ 0)))
+ return err;
+ }
+ }
+ return err;
+}
+
+REGISTER_TEST (clib_memcpy_x86_64) = {
+ .name = "clib_memcpy_x86_64",
+ .fn = test_clib_memcpy_x86_64,
+};
+#endif
diff --git a/src/vppinfra/test/poly1305.c b/src/vppinfra/test/poly1305.c
new file mode 100644
index 00000000000..34551f84047
--- /dev/null
+++ b/src/vppinfra/test/poly1305.c
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/crypto/poly1305.h>
+
+static const u8 text1[375] = {
+ 0x41, 0x6e, 0x79, 0x20, 0x73, 0x75, 0x62, 0x6d, 0x69, 0x73, 0x73, 0x69, 0x6f,
+ 0x6e, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x49, 0x45, 0x54, 0x46,
+ 0x20, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x64, 0x65, 0x64, 0x20, 0x62, 0x79, 0x20,
+ 0x74, 0x68, 0x65, 0x20, 0x43, 0x6f, 0x6e, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74,
+ 0x6f, 0x72, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x70, 0x75, 0x62, 0x6c, 0x69, 0x63,
+ 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x20, 0x61, 0x73, 0x20, 0x61, 0x6c, 0x6c, 0x20,
+ 0x6f, 0x72, 0x20, 0x70, 0x61, 0x72, 0x74, 0x20, 0x6f, 0x66, 0x20, 0x61, 0x6e,
+ 0x20, 0x49, 0x45, 0x54, 0x46, 0x20, 0x49, 0x6e, 0x74, 0x65, 0x72, 0x6e, 0x65,
+ 0x74, 0x2d, 0x44, 0x72, 0x61, 0x66, 0x74, 0x20, 0x6f, 0x72, 0x20, 0x52, 0x46,
+ 0x43, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x61, 0x6e, 0x79, 0x20, 0x73, 0x74, 0x61,
+ 0x74, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x20, 0x6d, 0x61, 0x64, 0x65, 0x20, 0x77,
+ 0x69, 0x74, 0x68, 0x69, 0x6e, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6e,
+ 0x74, 0x65, 0x78, 0x74, 0x20, 0x6f, 0x66, 0x20, 0x61, 0x6e, 0x20, 0x49, 0x45,
+ 0x54, 0x46, 0x20, 0x61, 0x63, 0x74, 0x69, 0x76, 0x69, 0x74, 0x79, 0x20, 0x69,
+ 0x73, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x69, 0x64, 0x65, 0x72, 0x65, 0x64, 0x20,
+ 0x61, 0x6e, 0x20, 0x22, 0x49, 0x45, 0x54, 0x46, 0x20, 0x43, 0x6f, 0x6e, 0x74,
+ 0x72, 0x69, 0x62, 0x75, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0x2e, 0x20, 0x53, 0x75,
+ 0x63, 0x68, 0x20, 0x73, 0x74, 0x61, 0x74, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x73,
+ 0x20, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x20, 0x6f, 0x72, 0x61, 0x6c,
+ 0x20, 0x73, 0x74, 0x61, 0x74, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x20, 0x69,
+ 0x6e, 0x20, 0x49, 0x45, 0x54, 0x46, 0x20, 0x73, 0x65, 0x73, 0x73, 0x69, 0x6f,
+ 0x6e, 0x73, 0x2c, 0x20, 0x61, 0x73, 0x20, 0x77, 0x65, 0x6c, 0x6c, 0x20, 0x61,
+ 0x73, 0x20, 0x77, 0x72, 0x69, 0x74, 0x74, 0x65, 0x6e, 0x20, 0x61, 0x6e, 0x64,
+ 0x20, 0x65, 0x6c, 0x65, 0x63, 0x74, 0x72, 0x6f, 0x6e, 0x69, 0x63, 0x20, 0x63,
+ 0x6f, 0x6d, 0x6d, 0x75, 0x6e, 0x69, 0x63, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73,
+ 0x20, 0x6d, 0x61, 0x64, 0x65, 0x20, 0x61, 0x74, 0x20, 0x61, 0x6e, 0x79, 0x20,
+ 0x74, 0x69, 0x6d, 0x65, 0x20, 0x6f, 0x72, 0x20, 0x70, 0x6c, 0x61, 0x63, 0x65,
+ 0x2c, 0x20, 0x77, 0x68, 0x69, 0x63, 0x68, 0x20, 0x61, 0x72, 0x65, 0x20, 0x61,
+ 0x64, 0x64, 0x72, 0x65, 0x73, 0x73, 0x65, 0x64, 0x20, 0x74, 0x6f
+};
+
+const static struct
+{
+ char *name;
+ u32 len;
+ const u8 key[32];
+ const u8 *msg;
+ const u8 out[16];
+} test_cases[] = {
+ {
+ .name = "test1",
+ .len = 34,
+ .out = { 0xa8, 0x06, 0x1d, 0xc1, 0x30, 0x51, 0x36, 0xc6, 0xc2, 0x2b, 0x8b,
+ 0xaf, 0x0c, 0x01, 0x27, 0xa9 },
+ .key = { 0x85, 0xd6, 0xbe, 0x78, 0x57, 0x55, 0x6d, 0x33, 0x7f, 0x44, 0x52,
+ 0xfe, 0x42, 0xd5, 0x06, 0xa8, 0x01, 0x03, 0x80, 0x8a, 0xfb, 0x0d,
+ 0xb2, 0xfd, 0x4a, 0xbf, 0xf6, 0xaf, 0x41, 0x49, 0xf5, 0x1b },
+ .msg = (u8[34]){ 0x43, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x67, 0x72, 0x61,
+ 0x70, 0x68, 0x69, 0x63, 0x20, 0x46, 0x6f, 0x72, 0x75,
+ 0x6d, 0x20, 0x52, 0x65, 0x73, 0x65, 0x61, 0x72, 0x63,
+ 0x68, 0x20, 0x47, 0x72, 0x6f, 0x75, 0x70 },
+ },
+ {
+ .name = "RFC8439 A3 TV1",
+ .len = 64,
+ .out = {},
+ .key = {},
+ .msg = (u8[64]){},
+ },
+ {
+ .name = "RFC8439 A3 TV2",
+ .len = sizeof (text1),
+ .out = { 0x36, 0xe5, 0xf6, 0xb5, 0xc5, 0xe0, 0x60, 0x70, 0xf0, 0xef, 0xca,
+ 0x96, 0x22, 0x7a, 0x86, 0x3e },
+ .key = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0xe5, 0xf6, 0xb5, 0xc5, 0xe0,
+ 0x60, 0x70, 0xf0, 0xef, 0xca, 0x96, 0x22, 0x7a, 0x86, 0x3e },
+ .msg = text1,
+ },
+ {
+ .name = "RFC8439 A3 TV3",
+ .len = sizeof (text1),
+ .out = { 0xf3, 0x47, 0x7e, 0x7c, 0xd9, 0x54, 0x17, 0xaf, 0x89, 0xa6, 0xb8,
+ 0x79, 0x4c, 0x31, 0x0c, 0xf0
+
+ },
+ .key = { 0x36, 0xe5, 0xf6, 0xb5, 0xc5, 0xe0, 0x60, 0x70, 0xf0, 0xef, 0xca,
+ 0x96, 0x22, 0x7a, 0x86, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+
+ .msg = text1,
+ },
+ {
+ .name = "RFC8439 A3 TV4",
+ .len = 127,
+ .key = { 0x1c, 0x92, 0x40, 0xa5, 0xeb, 0x55, 0xd3, 0x8a, 0xf3, 0x33, 0x88,
+ 0x86, 0x04, 0xf6, 0xb5, 0xf0, 0x47, 0x39, 0x17, 0xc1, 0x40, 0x2b,
+ 0x80, 0x09, 0x9d, 0xca, 0x5c, 0xbc, 0x20, 0x70, 0x75, 0xc0 },
+ .msg =
+ (u8[127]){
+ 0x27, 0x54, 0x77, 0x61, 0x73, 0x20, 0x62, 0x72, 0x69, 0x6c, 0x6c, 0x69,
+ 0x67, 0x2c, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x74, 0x68, 0x65, 0x20, 0x73,
+ 0x6c, 0x69, 0x74, 0x68, 0x79, 0x20, 0x74, 0x6f, 0x76, 0x65, 0x73, 0x0a,
+ 0x44, 0x69, 0x64, 0x20, 0x67, 0x79, 0x72, 0x65, 0x20, 0x61, 0x6e, 0x64,
+ 0x20, 0x67, 0x69, 0x6d, 0x62, 0x6c, 0x65, 0x20, 0x69, 0x6e, 0x20, 0x74,
+ 0x68, 0x65, 0x20, 0x77, 0x61, 0x62, 0x65, 0x3a, 0x0a, 0x41, 0x6c, 0x6c,
+ 0x20, 0x6d, 0x69, 0x6d, 0x73, 0x79, 0x20, 0x77, 0x65, 0x72, 0x65, 0x20,
+ 0x74, 0x68, 0x65, 0x20, 0x62, 0x6f, 0x72, 0x6f, 0x67, 0x6f, 0x76, 0x65,
+ 0x73, 0x2c, 0x0a, 0x41, 0x6e, 0x64, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6d,
+ 0x6f, 0x6d, 0x65, 0x20, 0x72, 0x61, 0x74, 0x68, 0x73, 0x20, 0x6f, 0x75,
+ 0x74, 0x67, 0x72, 0x61, 0x62, 0x65, 0x2e },
+ .out = { 0x45, 0x41, 0x66, 0x9a, 0x7e, 0xaa, 0xee, 0x61, 0xe7, 0x08, 0xdc,
+ 0x7c, 0xbc, 0xc5, 0xeb, 0x62 },
+ },
+ {
+ /* Test Vector #5:
+ * If one uses 130-bit partial reduction, does the code handle the case
+ * where partially reduced final result is not fully reduced? */
+ .name = "RFC8439 A3 TV5",
+ .len = 16,
+ .key = { 2 },
+ .msg = (u8[16]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .out = { 3 },
+ },
+ {
+ /* Test Vector #6:
+ * What happens if addition of s overflows modulo 2^128? */
+ .name = "RFC8439 A3 TV6",
+ .len = 16,
+ .key = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .msg = (u8[16]){ 2 },
+ .out = { 3 },
+ },
+ {
+ /* Test Vector #7:
+ * What happens if data limb is all ones and there is carry from lower
+ * limb? */
+ .name = "RFC8439 A3 TV7",
+ .len = 48,
+ .key = { 1 },
+ .msg =
+ (u8[48]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .out = { 5 },
+ },
+ {
+ /* Test Vector #8:
+ * What happens if final result from polynomial part is exactly 2^130-5? */
+ .name = "RFC8439 A3 TV8",
+ .len = 48,
+ .key = { 1 },
+ .msg =
+ (u8[48]){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfb, 0xfe, 0xfe, 0xfe,
+ 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
+ 0xfe, 0xfe, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01 },
+ .out = { 0 },
+ },
+ {
+ /* Test Vector #9:
+ * What happens if final result from polynomial part is exactly 2^130-6? */
+ .name = "RFC8439 A3 TV9",
+ .len = 16,
+ .key = { 2 },
+ .msg = (u8[16]){ 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .out = { 0xfa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff },
+ },
+ {
+ /* Test Vector #10:
+ * What happens if 5*H+L-type reduction produces 131-bit intermediate
+ * result? */
+ .name = "RFC8439 A3 TV10",
+ .len = 64,
+ .key = { [0] = 1, [8] = 4 },
+ .msg =
+ (u8[64]){ 0xE3, 0x35, 0x94, 0xD7, 0x50, 0x5E, 0x43, 0xB9, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x33, 0x94, 0xD7, 0x50,
+ 0x5E, 0x43, 0x79, 0xCD, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00 },
+ .out = { 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00 },
+ },
+ {
+ /* Test Vector #11:
+ * What happens if 5*H+L-type reduction produces 131-bit final result? */
+ .name = "RFC8439 A3 TV11",
+ .len = 48,
+ .key = { [0] = 1, [8] = 4 },
+ .msg =
+ (u8[48]){ 0xE3, 0x35, 0x94, 0xD7, 0x50, 0x5E, 0x43, 0xB9, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x33, 0x94, 0xD7, 0x50,
+ 0x5E, 0x43, 0x79, 0xCD, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ .out = { 0x13 },
+ }
+};
+
+static clib_error_t *
+test_clib_poly1305 (clib_error_t *err)
+{
+ u8 out[16] = {};
+
+ FOREACH_ARRAY_ELT (tc, test_cases)
+ {
+ clib_poly1305 (tc->key, tc->msg, tc->len, out);
+ if (memcmp (out, tc->out, 16) != 0)
+ err = clib_error_return (
+ err,
+ "\ntest: %s"
+ "\nkey: %U"
+ "\ndata: %U"
+ "\nexp out: %U"
+ "\ncalc out: %U\n",
+ tc->name, format_hexdump, tc->key, 32, format_hexdump, tc->msg,
+ tc->len, format_hexdump, tc->out, 16, format_hexdump, out, 16);
+ }
+ return err;
+}
+
+void __test_perf_fn
+perftest_64byte (test_perf_t *tp)
+{
+ u32 n = tp->n_ops;
+ u8 *m = test_mem_alloc_and_fill_inc_u8 (n * 64, 0, 0);
+ u8 *k = test_mem_alloc_and_fill_inc_u8 (n * 32, 0, 0);
+ u8 *t = test_mem_alloc (n * 16);
+
+ test_perf_event_enable (tp);
+ for (int i = 0; i < n; i++, t += 16, k += 32, m += 64)
+ clib_poly1305 (k, m, 64, t);
+ test_perf_event_disable (tp);
+}
+
+void __test_perf_fn
+perftest_byte (test_perf_t *tp)
+{
+ u32 n = tp->n_ops;
+
+ u8 *m = test_mem_alloc_and_fill_inc_u8 (n, 0, 0);
+ u8 *k = test_mem_alloc_and_fill_inc_u8 (32, 0, 0);
+ u8 *t = test_mem_alloc (16);
+
+ test_perf_event_enable (tp);
+ clib_poly1305 (k, m, n, t);
+ test_perf_event_disable (tp);
+}
+
+REGISTER_TEST (clib_poly1305) = {
+ .name = "clib_poly1305",
+ .fn = test_clib_poly1305,
+ .perf_tests = PERF_TESTS (
+ { .name = "fixed size (64 bytes)", .n_ops = 1024, .fn = perftest_64byte },
+ { .name = "variable size (per byte)",
+ .n_ops = 16384,
+ .fn = perftest_byte }),
+};
diff --git a/src/vppinfra/test/sha2.c b/src/vppinfra/test/sha2.c
new file mode 100644
index 00000000000..d5da2b61706
--- /dev/null
+++ b/src/vppinfra/test/sha2.c
@@ -0,0 +1,322 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/crypto/sha2.h>
+
+typedef struct
+{
+ const u8 *msg;
+ const u8 *key;
+ int tc;
+ u32 msg_len;
+ u32 key_len;
+ u8 digest_224[28];
+ u8 digest_256[32];
+ u8 digest_384[48];
+ u8 digest_512[64];
+ u8 digest_224_len;
+ u8 digest_256_len;
+ u8 digest_384_len;
+ u8 digest_512_len;
+} sha2_test_t;
+
+#ifndef CLIB_MARCH_VARIANT
+static const u8 key1[20] = { 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b,
+ 0x0b, 0x0b, 0x0b, 0x0b, 0x0b, 0x0b };
+static const u8 key2[4] = "Jefe";
+static const u8 key3[20] = { 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa };
+static const u8 key4[25] = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
+ 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
+ 0x16, 0x17, 0x18, 0x19 };
+static const u8 key5[20] = { 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
+ 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c,
+ 0x0c, 0x0c, 0x0c, 0x0c, 0x0c, 0x0c };
+static const u8 key6[131] = {
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa,
+ 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa, 0xaa
+};
+
+static const u8 msg1[8] = "Hi There";
+static const u8 msg2[28] = "what do ya want for nothing?";
+static const u8 msg3[50] = {
+ 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd,
+ 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd,
+ 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd,
+ 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd
+};
+static const u8 msg4[50] = {
+ 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd,
+ 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd,
+ 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd,
+ 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd, 0xcd
+};
+static const u8 msg6[54] =
+ "Test Using Larger Than Block-Size Key - Hash Key First";
+static const u8 msg7[153] =
+ "This is a test using a larger than block-size key and a larger than "
+ "block-size data. The key needs to be hashed before being used by the "
+ "HMAC algorithm.";
+
+const sha2_test_t sha2_tests[] = {
+ {
+ /* RFC4231 Test Case 1 */
+ .tc = 1,
+ .key = key1,
+ .key_len = sizeof (key1),
+ .msg = msg1,
+ .msg_len = sizeof (msg1),
+ .digest_224 = { 0x89, 0x6f, 0xb1, 0x12, 0x8a, 0xbb, 0xdf, 0x19, 0x68, 0x32,
+ 0x10, 0x7c, 0xd4, 0x9d, 0xf3, 0x3f, 0x47, 0xb4, 0xb1, 0x16,
+ 0x99, 0x12, 0xba, 0x4f, 0x53, 0x68, 0x4b, 0x22 },
+ .digest_256 = { 0xb0, 0x34, 0x4c, 0x61, 0xd8, 0xdb, 0x38, 0x53,
+ 0x5c, 0xa8, 0xaf, 0xce, 0xaf, 0x0b, 0xf1, 0x2b,
+ 0x88, 0x1d, 0xc2, 0x00, 0xc9, 0x83, 0x3d, 0xa7,
+ 0x26, 0xe9, 0x37, 0x6c, 0x2e, 0x32, 0xcf, 0xf7 },
+ .digest_384 = { 0xaf, 0xd0, 0x39, 0x44, 0xd8, 0x48, 0x95, 0x62, 0x6b, 0x08,
+ 0x25, 0xf4, 0xab, 0x46, 0x90, 0x7f, 0x15, 0xf9, 0xda, 0xdb,
+ 0xe4, 0x10, 0x1e, 0xc6, 0x82, 0xaa, 0x03, 0x4c, 0x7c, 0xeb,
+ 0xc5, 0x9c, 0xfa, 0xea, 0x9e, 0xa9, 0x07, 0x6e, 0xde, 0x7f,
+ 0x4a, 0xf1, 0x52, 0xe8, 0xb2, 0xfa, 0x9c, 0xb6 },
+ .digest_512 = { 0x87, 0xaa, 0x7c, 0xde, 0xa5, 0xef, 0x61, 0x9d, 0x4f, 0xf0,
+ 0xb4, 0x24, 0x1a, 0x1d, 0x6c, 0xb0, 0x23, 0x79, 0xf4, 0xe2,
+ 0xce, 0x4e, 0xc2, 0x78, 0x7a, 0xd0, 0xb3, 0x05, 0x45, 0xe1,
+ 0x7c, 0xde, 0xda, 0xa8, 0x33, 0xb7, 0xd6, 0xb8, 0xa7, 0x02,
+ 0x03, 0x8b, 0x27, 0x4e, 0xae, 0xa3, 0xf4, 0xe4, 0xbe, 0x9d,
+ 0x91, 0x4e, 0xeb, 0x61, 0xf1, 0x70, 0x2e, 0x69, 0x6c, 0x20,
+ 0x3a, 0x12, 0x68, 0x54 },
+ },
+ {
+ /* RFC4231 Test Case 2 */
+ .tc = 2,
+ .key = key2,
+ .key_len = sizeof (key2),
+ .msg = msg2,
+ .msg_len = sizeof (msg2),
+ .digest_224 = { 0xa3, 0x0e, 0x01, 0x09, 0x8b, 0xc6, 0xdb, 0xbf, 0x45, 0x69,
+ 0x0f, 0x3a, 0x7e, 0x9e, 0x6d, 0x0f, 0x8b, 0xbe, 0xa2, 0xa3,
+ 0x9e, 0x61, 0x48, 0x00, 0x8f, 0xd0, 0x5e, 0x44 },
+ .digest_256 = { 0x5b, 0xdc, 0xc1, 0x46, 0xbf, 0x60, 0x75, 0x4e,
+ 0x6a, 0x04, 0x24, 0x26, 0x08, 0x95, 0x75, 0xc7,
+ 0x5a, 0x00, 0x3f, 0x08, 0x9d, 0x27, 0x39, 0x83,
+ 0x9d, 0xec, 0x58, 0xb9, 0x64, 0xec, 0x38, 0x43 },
+ .digest_384 = { 0xaf, 0x45, 0xd2, 0xe3, 0x76, 0x48, 0x40, 0x31, 0x61, 0x7f,
+ 0x78, 0xd2, 0xb5, 0x8a, 0x6b, 0x1b, 0x9c, 0x7e, 0xf4, 0x64,
+ 0xf5, 0xa0, 0x1b, 0x47, 0xe4, 0x2e, 0xc3, 0x73, 0x63, 0x22,
+ 0x44, 0x5e, 0x8e, 0x22, 0x40, 0xca, 0x5e, 0x69, 0xe2, 0xc7,
+ 0x8b, 0x32, 0x39, 0xec, 0xfa, 0xb2, 0x16, 0x49 },
+ .digest_512 = { 0x16, 0x4b, 0x7a, 0x7b, 0xfc, 0xf8, 0x19, 0xe2, 0xe3, 0x95,
+ 0xfb, 0xe7, 0x3b, 0x56, 0xe0, 0xa3, 0x87, 0xbd, 0x64, 0x22,
+ 0x2e, 0x83, 0x1f, 0xd6, 0x10, 0x27, 0x0c, 0xd7, 0xea, 0x25,
+ 0x05, 0x54, 0x97, 0x58, 0xbf, 0x75, 0xc0, 0x5a, 0x99, 0x4a,
+ 0x6d, 0x03, 0x4f, 0x65, 0xf8, 0xf0, 0xe6, 0xfd, 0xca, 0xea,
+ 0xb1, 0xa3, 0x4d, 0x4a, 0x6b, 0x4b, 0x63, 0x6e, 0x07, 0x0a,
+ 0x38, 0xbc, 0xe7, 0x37 },
+ },
+ { /* RFC4231 Test Case 3 */
+ .tc = 3,
+ .key = key3,
+ .key_len = sizeof (key3),
+ .msg = msg3,
+ .msg_len = sizeof (msg3),
+ .digest_224 = { 0x7f, 0xb3, 0xcb, 0x35, 0x88, 0xc6, 0xc1, 0xf6, 0xff, 0xa9,
+ 0x69, 0x4d, 0x7d, 0x6a, 0xd2, 0x64, 0x93, 0x65, 0xb0, 0xc1,
+ 0xf6, 0x5d, 0x69, 0xd1, 0xec, 0x83, 0x33, 0xea },
+ .digest_256 = { 0x77, 0x3e, 0xa9, 0x1e, 0x36, 0x80, 0x0e, 0x46,
+ 0x85, 0x4d, 0xb8, 0xeb, 0xd0, 0x91, 0x81, 0xa7,
+ 0x29, 0x59, 0x09, 0x8b, 0x3e, 0xf8, 0xc1, 0x22,
+ 0xd9, 0x63, 0x55, 0x14, 0xce, 0xd5, 0x65, 0xfe },
+ .digest_384 = { 0x88, 0x06, 0x26, 0x08, 0xd3, 0xe6, 0xad, 0x8a, 0x0a, 0xa2,
+ 0xac, 0xe0, 0x14, 0xc8, 0xa8, 0x6f, 0x0a, 0xa6, 0x35, 0xd9,
+ 0x47, 0xac, 0x9f, 0xeb, 0xe8, 0x3e, 0xf4, 0xe5, 0x59, 0x66,
+ 0x14, 0x4b, 0x2a, 0x5a, 0xb3, 0x9d, 0xc1, 0x38, 0x14, 0xb9,
+ 0x4e, 0x3a, 0xb6, 0xe1, 0x01, 0xa3, 0x4f, 0x27 },
+ .digest_512 = { 0xfa, 0x73, 0xb0, 0x08, 0x9d, 0x56, 0xa2, 0x84, 0xef, 0xb0,
+ 0xf0, 0x75, 0x6c, 0x89, 0x0b, 0xe9, 0xb1, 0xb5, 0xdb, 0xdd,
+ 0x8e, 0xe8, 0x1a, 0x36, 0x55, 0xf8, 0x3e, 0x33, 0xb2, 0x27,
+ 0x9d, 0x39, 0xbf, 0x3e, 0x84, 0x82, 0x79, 0xa7, 0x22, 0xc8,
+ 0x06, 0xb4, 0x85, 0xa4, 0x7e, 0x67, 0xc8, 0x07, 0xb9, 0x46,
+ 0xa3, 0x37, 0xbe, 0xe8, 0x94, 0x26, 0x74, 0x27, 0x88, 0x59,
+ 0xe1, 0x32, 0x92, 0xfb } },
+ {
+ /* RFC4231 Test Case 4 */
+ .tc = 4,
+ .key = key4,
+ .key_len = sizeof (key4),
+ .msg = msg4,
+ .msg_len = sizeof (msg4),
+ .digest_224 = { 0x6c, 0x11, 0x50, 0x68, 0x74, 0x01, 0x3c, 0xac, 0x6a, 0x2a,
+ 0xbc, 0x1b, 0xb3, 0x82, 0x62, 0x7c, 0xec, 0x6a, 0x90, 0xd8,
+ 0x6e, 0xfc, 0x01, 0x2d, 0xe7, 0xaf, 0xec, 0x5a },
+ .digest_256 = { 0x82, 0x55, 0x8a, 0x38, 0x9a, 0x44, 0x3c, 0x0e,
+ 0xa4, 0xcc, 0x81, 0x98, 0x99, 0xf2, 0x08, 0x3a,
+ 0x85, 0xf0, 0xfa, 0xa3, 0xe5, 0x78, 0xf8, 0x07,
+ 0x7a, 0x2e, 0x3f, 0xf4, 0x67, 0x29, 0x66, 0x5b },
+ .digest_384 = { 0x3e, 0x8a, 0x69, 0xb7, 0x78, 0x3c, 0x25, 0x85, 0x19, 0x33,
+ 0xab, 0x62, 0x90, 0xaf, 0x6c, 0xa7, 0x7a, 0x99, 0x81, 0x48,
+ 0x08, 0x50, 0x00, 0x9c, 0xc5, 0x57, 0x7c, 0x6e, 0x1f, 0x57,
+ 0x3b, 0x4e, 0x68, 0x01, 0xdd, 0x23, 0xc4, 0xa7, 0xd6, 0x79,
+ 0xcc, 0xf8, 0xa3, 0x86, 0xc6, 0x74, 0xcf, 0xfb },
+ .digest_512 = { 0xb0, 0xba, 0x46, 0x56, 0x37, 0x45, 0x8c, 0x69, 0x90, 0xe5,
+ 0xa8, 0xc5, 0xf6, 0x1d, 0x4a, 0xf7, 0xe5, 0x76, 0xd9, 0x7f,
+ 0xf9, 0x4b, 0x87, 0x2d, 0xe7, 0x6f, 0x80, 0x50, 0x36, 0x1e,
+ 0xe3, 0xdb, 0xa9, 0x1c, 0xa5, 0xc1, 0x1a, 0xa2, 0x5e, 0xb4,
+ 0xd6, 0x79, 0x27, 0x5c, 0xc5, 0x78, 0x80, 0x63, 0xa5, 0xf1,
+ 0x97, 0x41, 0x12, 0x0c, 0x4f, 0x2d, 0xe2, 0xad, 0xeb, 0xeb,
+ 0x10, 0xa2, 0x98, 0xdd },
+ },
+ {
+ /* RFC4231 Test Case 5 */
+ .tc = 5,
+ .key = key5,
+ .key_len = sizeof (key5),
+ .msg = (u8 *) "Test With Truncation",
+ .msg_len = 20,
+ .digest_224 = { 0x0e, 0x2a, 0xea, 0x68, 0xa9, 0x0c, 0x8d, 0x37, 0xc9, 0x88,
+ 0xbc, 0xdb, 0x9f, 0xca, 0x6f, 0xa8 },
+ .digest_224_len = 16,
+ .digest_256 = { 0xa3, 0xb6, 0x16, 0x74, 0x73, 0x10, 0x0e, 0xe0, 0x6e, 0x0c,
+ 0x79, 0x6c, 0x29, 0x55, 0x55, 0x2b },
+ .digest_256_len = 16,
+ .digest_384 = { 0x3a, 0xbf, 0x34, 0xc3, 0x50, 0x3b, 0x2a, 0x23, 0xa4, 0x6e,
+ 0xfc, 0x61, 0x9b, 0xae, 0xf8, 0x97 },
+ .digest_384_len = 16,
+ .digest_512 = { 0x41, 0x5f, 0xad, 0x62, 0x71, 0x58, 0x0a, 0x53, 0x1d, 0x41,
+ 0x79, 0xbc, 0x89, 0x1d, 0x87, 0xa6 },
+ .digest_512_len = 16,
+ },
+ { /* RFC4231 Test Case 6 */
+ .tc = 6,
+ .key = key6,
+ .key_len = sizeof (key6),
+ .msg = msg6,
+ .msg_len = sizeof (msg6),
+ .digest_224 = { 0x95, 0xe9, 0xa0, 0xdb, 0x96, 0x20, 0x95, 0xad, 0xae, 0xbe,
+ 0x9b, 0x2d, 0x6f, 0x0d, 0xbc, 0xe2, 0xd4, 0x99, 0xf1, 0x12,
+ 0xf2, 0xd2, 0xb7, 0x27, 0x3f, 0xa6, 0x87, 0x0e },
+ .digest_256 = { 0x60, 0xe4, 0x31, 0x59, 0x1e, 0xe0, 0xb6, 0x7f,
+ 0x0d, 0x8a, 0x26, 0xaa, 0xcb, 0xf5, 0xb7, 0x7f,
+ 0x8e, 0x0b, 0xc6, 0x21, 0x37, 0x28, 0xc5, 0x14,
+ 0x05, 0x46, 0x04, 0x0f, 0x0e, 0xe3, 0x7f, 0x54 },
+ .digest_384 = { 0x4e, 0xce, 0x08, 0x44, 0x85, 0x81, 0x3e, 0x90, 0x88, 0xd2,
+ 0xc6, 0x3a, 0x04, 0x1b, 0xc5, 0xb4, 0x4f, 0x9e, 0xf1, 0x01,
+ 0x2a, 0x2b, 0x58, 0x8f, 0x3c, 0xd1, 0x1f, 0x05, 0x03, 0x3a,
+ 0xc4, 0xc6, 0x0c, 0x2e, 0xf6, 0xab, 0x40, 0x30, 0xfe, 0x82,
+ 0x96, 0x24, 0x8d, 0xf1, 0x63, 0xf4, 0x49, 0x52 },
+ .digest_512 = { 0x80, 0xb2, 0x42, 0x63, 0xc7, 0xc1, 0xa3, 0xeb, 0xb7, 0x14,
+ 0x93, 0xc1, 0xdd, 0x7b, 0xe8, 0xb4, 0x9b, 0x46, 0xd1, 0xf4,
+ 0x1b, 0x4a, 0xee, 0xc1, 0x12, 0x1b, 0x01, 0x37, 0x83, 0xf8,
+ 0xf3, 0x52, 0x6b, 0x56, 0xd0, 0x37, 0xe0, 0x5f, 0x25, 0x98,
+ 0xbd, 0x0f, 0xd2, 0x21, 0x5d, 0x6a, 0x1e, 0x52, 0x95, 0xe6,
+ 0x4f, 0x73, 0xf6, 0x3f, 0x0a, 0xec, 0x8b, 0x91, 0x5a, 0x98,
+ 0x5d, 0x78, 0x65, 0x98 } },
+ {
+ /* RFC4231 Test Case 7 */
+ .tc = 7,
+ .key = key6,
+ .key_len = sizeof (key6),
+ .msg = msg7,
+ .msg_len = sizeof (msg7) - 1,
+ .digest_224 = { 0x3a, 0x85, 0x41, 0x66, 0xac, 0x5d, 0x9f, 0x02, 0x3f, 0x54,
+ 0xd5, 0x17, 0xd0, 0xb3, 0x9d, 0xbd, 0x94, 0x67, 0x70, 0xdb,
+ 0x9c, 0x2b, 0x95, 0xc9, 0xf6, 0xf5, 0x65, 0xd1 },
+ .digest_256 = { 0x9b, 0x09, 0xff, 0xa7, 0x1b, 0x94, 0x2f, 0xcb,
+ 0x27, 0x63, 0x5f, 0xbc, 0xd5, 0xb0, 0xe9, 0x44,
+ 0xbf, 0xdc, 0x63, 0x64, 0x4f, 0x07, 0x13, 0x93,
+ 0x8a, 0x7f, 0x51, 0x53, 0x5c, 0x3a, 0x35, 0xe2 },
+ .digest_384 = { 0x66, 0x17, 0x17, 0x8e, 0x94, 0x1f, 0x02, 0x0d, 0x35, 0x1e,
+ 0x2f, 0x25, 0x4e, 0x8f, 0xd3, 0x2c, 0x60, 0x24, 0x20, 0xfe,
+ 0xb0, 0xb8, 0xfb, 0x9a, 0xdc, 0xce, 0xbb, 0x82, 0x46, 0x1e,
+ 0x99, 0xc5, 0xa6, 0x78, 0xcc, 0x31, 0xe7, 0x99, 0x17, 0x6d,
+ 0x38, 0x60, 0xe6, 0x11, 0x0c, 0x46, 0x52, 0x3e },
+ .digest_512 = { 0xe3, 0x7b, 0x6a, 0x77, 0x5d, 0xc8, 0x7d, 0xba, 0xa4, 0xdf,
+ 0xa9, 0xf9, 0x6e, 0x5e, 0x3f, 0xfd, 0xde, 0xbd, 0x71, 0xf8,
+ 0x86, 0x72, 0x89, 0x86, 0x5d, 0xf5, 0xa3, 0x2d, 0x20, 0xcd,
+ 0xc9, 0x44, 0xb6, 0x02, 0x2c, 0xac, 0x3c, 0x49, 0x82, 0xb1,
+ 0x0d, 0x5e, 0xeb, 0x55, 0xc3, 0xe4, 0xde, 0x15, 0x13, 0x46,
+ 0x76, 0xfb, 0x6d, 0xe0, 0x44, 0x60, 0x65, 0xc9, 0x74, 0x40,
+ 0xfa, 0x8c, 0x6a, 0x58 },
+ },
+ {}
+};
+#else
+extern const sha2_test_t sha2_tests[];
+#endif
+
+static clib_error_t *
+check_digest (clib_error_t *err, int tc, u8 *calculated, const u8 *expected,
+ u8 len)
+{
+ if (memcmp (expected, calculated, len) != 0)
+ err = clib_error_return (err,
+ "Bad HMAC SHA%u digest for test case "
+ "%u:\nExpected:\n%U\nCalculated:\n%U\n",
+ len * 8, tc, format_hexdump, expected, len,
+ format_hexdump, calculated, len);
+ return err;
+}
+
+#define _(bits) \
+ static clib_error_t *test_clib_hmac_sha##bits (clib_error_t *err) \
+ { \
+ u8 digest[64]; \
+ const sha2_test_t *t = sha2_tests; \
+ \
+ while (t->key) \
+ { \
+ u8 digest_len = t->digest_##bits##_len; \
+ if (digest_len == 0) \
+ digest_len = sizeof (t->digest_##bits); \
+ clib_memset_u8 (digest, 0xfe, sizeof (digest)); \
+ clib_hmac_sha##bits (t->key, t->key_len, t->msg, t->msg_len, digest); \
+ if ((err = check_digest (err, t->tc, digest, t->digest_##bits, \
+ digest_len))) \
+ return err; \
+ t++; \
+ } \
+ \
+ return err; \
+ } \
+ \
+ void __test_perf_fn perftest_sha##bits##_byte (test_perf_t *tp) \
+ { \
+ volatile uword *np = &tp->n_ops; \
+ volatile uword *kl = &tp->arg0; \
+ ; \
+ u8 *key = test_mem_alloc_and_fill_inc_u8 (*kl, 32, 0); \
+ u8 *data = test_mem_alloc_and_fill_inc_u8 (*np, 0, 0); \
+ u8 *digest = test_mem_alloc (64); \
+ \
+ test_perf_event_enable (tp); \
+ clib_hmac_sha##bits (key, *kl, data, *np, digest); \
+ test_perf_event_disable (tp); \
+ } \
+ REGISTER_TEST (clib_hmac_sha##bits) = { \
+ .name = "clib_hmac_sha" #bits, \
+ .fn = test_clib_hmac_sha##bits, \
+ .perf_tests = PERF_TESTS ({ .name = "byte", \
+ .n_ops = 16384, \
+ .arg0 = 20, \
+ .fn = perftest_sha##bits##_byte }) \
+ }
+
+_ (224);
+_ (256);
+_ (384);
+_ (512);
+#undef _
diff --git a/src/vppinfra/test/test.c b/src/vppinfra/test/test.c
new file mode 100644
index 00000000000..55c2ae7a11f
--- /dev/null
+++ b/src/vppinfra/test/test.c
@@ -0,0 +1,259 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/error.h>
+
+test_main_t test_main;
+
+int
+test_march_supported (clib_march_variant_type_t type)
+{
+#define _(s, n) \
+ if (CLIB_MARCH_VARIANT_TYPE_##s == type) \
+ return clib_cpu_march_priority_##s ();
+ foreach_march_variant
+#undef _
+ return 0;
+}
+
+clib_error_t *
+test_funct (test_main_t *tm)
+{
+ for (int i = 0; i < CLIB_MARCH_TYPE_N_VARIANTS; i++)
+ {
+ test_registration_t *r = tm->registrations[i];
+
+ if (r == 0 || test_march_supported (i) < 0)
+ continue;
+
+ fformat (stdout, "\nMultiarch Variant: %U\n", format_march_variant, i);
+ fformat (stdout,
+ "-------------------------------------------------------\n");
+ while (r)
+ {
+ clib_error_t *err;
+ if (tm->filter && strstr (r->name, (char *) tm->filter) == 0)
+ goto next;
+ err = (r->fn) (0);
+ fformat (stdout, "%-50s %s\n", r->name, err ? "FAIL" : "PASS");
+ for (int i = 0; i < vec_len (tm->allocated_mem); i++)
+ clib_mem_free (tm->allocated_mem[i]);
+ vec_free (tm->allocated_mem);
+ if (err)
+ {
+ clib_error_report (err);
+ fformat (stdout, "\n");
+ }
+ next:
+ r = r->next;
+ }
+ }
+
+ fformat (stdout, "\n");
+ return 0;
+}
+
+#if 0
+static u8 *
+format_test_perf_bundle_core_power (u8 *s, va_list *args)
+{
+ test_perf_event_bundle_t __clib_unused *b =
+ va_arg (*args, test_perf_event_bundle_t *);
+ test_perf_t __clib_unused *tp = va_arg (*args, test_perf_t *);
+ u64 *data = va_arg (*args, u64 *);
+
+ if (data)
+ s = format (s, "%7.1f %%", (f64) 100 * data[1] / data[0]);
+ else
+ s = format (s, "%9s", "Level 0");
+
+ if (data)
+ s = format (s, "%8.1f %%", (f64) 100 * data[2] / data[0]);
+ else
+ s = format (s, "%9s", "Level 1");
+
+ if (data)
+ s = format (s, "%7.1f %%", (f64) 100 * data[3] / data[0]);
+ else
+ s = format (s, "%9s", "Level 2");
+
+ return s;
+}
+
+#ifdef __x86_64__
+#define PERF_INTEL_CODE(event, umask) ((event) | (umask) << 8)
+ ,
+ {
+ .name = "core-power",
+ .desc =
+ "Core cycles where the core was running under specific turbo schedule.",
+ .type = PERF_TYPE_RAW,
+ .config[0] = PERF_INTEL_CODE (0x3c, 0x00),
+ .config[1] = PERF_INTEL_CODE (0x28, 0x07),
+ .config[2] = PERF_INTEL_CODE (0x28, 0x18),
+ .config[3] = PERF_INTEL_CODE (0x28, 0x20),
+ .config[4] = PERF_INTEL_CODE (0x28, 0x40),
+ .n_events = 5,
+ .format_fn = format_test_perf_bundle_core_power,
+ }
+#endif
+};
+#endif
+
+#ifdef __linux__
+clib_error_t *
+test_perf (test_main_t *tm)
+{
+ clib_error_t *err = 0;
+ clib_perfmon_ctx_t _ctx, *ctx = &_ctx;
+
+ if ((err = clib_perfmon_init_by_bundle_name (
+ ctx, "%s", tm->bundle ? (char *) tm->bundle : "default")))
+ return err;
+
+ fformat (stdout, "Warming up...\n");
+ clib_perfmon_warmup (ctx);
+
+ for (int i = 0; i < CLIB_MARCH_TYPE_N_VARIANTS; i++)
+ {
+ test_registration_t *r = tm->registrations[i];
+
+ if (r == 0 || test_march_supported (i) < 0)
+ continue;
+
+ fformat (stdout, "\nMultiarch Variant: %U\n", format_march_variant, i);
+ fformat (stdout,
+ "-------------------------------------------------------\n");
+ while (r)
+ {
+ if (r->perf_tests)
+ {
+ test_perf_t *pt = r->perf_tests;
+ if (tm->filter && strstr (r->name, (char *) tm->filter) == 0)
+ goto next;
+
+ clib_perfmon_capture_group (ctx, "%s", r->name);
+ do
+ {
+ for (int i = 0; i < tm->repeat; i++)
+ {
+ pt->fd = ctx->group_fd;
+ clib_perfmon_reset (ctx);
+ pt->fn (pt);
+ clib_perfmon_capture (ctx, pt->n_ops, "%0s", pt->name);
+ for (int i = 0; i < vec_len (tm->allocated_mem); i++)
+ clib_mem_free (tm->allocated_mem[i]);
+ vec_free (tm->allocated_mem);
+ }
+ }
+ while ((++pt)->fn);
+ }
+ next:
+ r = r->next;
+ }
+ fformat (stdout, "%U\n", format_perfmon_bundle, ctx);
+ clib_perfmon_clear (ctx);
+ }
+
+ clib_perfmon_free (ctx);
+ return err;
+}
+#elif __FreeBSD__
+clib_error_t *
+test_perf (test_main_t *tm)
+{
+ return NULL;
+}
+#endif
+
+int
+main (int argc, char *argv[])
+{
+ test_main_t *tm = &test_main;
+ unformat_input_t _i = {}, *i = &_i;
+ clib_mem_init (0, 64ULL << 20);
+ clib_error_t *err;
+ int perf = 0;
+
+ /* defaults */
+ tm->repeat = 3;
+
+ unformat_init_command_line (i, argv);
+
+ while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (i, "perf"))
+ perf = 1;
+ else if (unformat (i, "filter %s", &tm->filter))
+ ;
+ else if (unformat (i, "bundle %s", &tm->bundle))
+ ;
+ else if (unformat (i, "repeat %d", &tm->repeat))
+ ;
+ else
+ {
+ clib_warning ("unknown input '%U'", format_unformat_error, i);
+ exit (1);
+ }
+ }
+
+ if (perf)
+ err = test_perf (tm);
+ else
+ err = test_funct (tm);
+
+ if (err)
+ {
+ clib_error_report (err);
+ fformat (stderr, "\n");
+ return 1;
+ }
+ return 0;
+}
+
+void *
+test_mem_alloc (uword size)
+{
+ void *rv;
+ size = round_pow2 (size, CLIB_CACHE_LINE_BYTES);
+ rv = clib_mem_alloc_aligned (size, CLIB_CACHE_LINE_BYTES);
+ clib_memset_u8 (rv, 0, size);
+ vec_add1 (test_main.allocated_mem, rv);
+ return rv;
+}
+
+void *
+test_mem_alloc_and_fill_inc_u8 (uword size, u8 start, u8 mask)
+{
+ u8 *rv;
+ mask = mask ? mask : 0xff;
+ size = round_pow2 (size, CLIB_CACHE_LINE_BYTES);
+ rv = clib_mem_alloc_aligned (size, CLIB_CACHE_LINE_BYTES);
+ for (uword i = 0; i < size; i++)
+ rv[i] = ((u8) i + start) & mask;
+ vec_add1 (test_main.allocated_mem, rv);
+ return rv;
+}
+
+void *
+test_mem_alloc_and_splat (uword elt_size, uword n_elts, void *elt)
+{
+ u8 *rv, *e;
+ uword data_size = elt_size * n_elts;
+ uword alloc_size = round_pow2 (data_size, CLIB_CACHE_LINE_BYTES);
+ e = rv = clib_mem_alloc_aligned (alloc_size, CLIB_CACHE_LINE_BYTES);
+ while (e - rv < data_size)
+ {
+ clib_memcpy_fast (e, elt, elt_size);
+ e += elt_size;
+ }
+
+ if (data_size < alloc_size)
+ clib_memset_u8 (e, 0, alloc_size - data_size);
+ vec_add1 (test_main.allocated_mem, rv);
+ return rv;
+}
+
diff --git a/src/vppinfra/test/test.h b/src/vppinfra/test/test.h
new file mode 100644
index 00000000000..8d756366163
--- /dev/null
+++ b/src/vppinfra/test/test.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifndef included_test_test_h
+#define included_test_test_h
+
+#include <vppinfra/cpu.h>
+#include <vppinfra/perfmon/perfmon.h>
+#ifdef __linux__
+#include <sys/ioctl.h>
+#include <linux/perf_event.h>
+#endif
+
+typedef clib_error_t *(test_fn_t) (clib_error_t *);
+
+struct test_perf_;
+typedef void (test_perf_fn_t) (struct test_perf_ *tp);
+
+typedef struct test_perf_
+{
+ int fd;
+ u64 n_ops;
+ union
+ {
+ u64 arg0;
+ void *ptr0;
+ };
+ union
+ {
+ u64 arg1;
+ void *ptr1;
+ };
+ union
+ {
+ u64 arg2;
+ void *ptr2;
+ };
+ char *name;
+ test_perf_fn_t *fn;
+} test_perf_t;
+
+typedef struct test_registration_
+{
+ char *name;
+ u8 multiarch : 1;
+ test_fn_t *fn;
+ test_perf_t *perf_tests;
+ u32 n_perf_tests;
+ struct test_registration_ *next;
+} test_registration_t;
+
+typedef struct
+{
+ test_registration_t *registrations[CLIB_MARCH_TYPE_N_VARIANTS];
+ u32 repeat;
+ u8 *filter;
+ u8 *bundle;
+ f64 ref_clock;
+ void **allocated_mem;
+} test_main_t;
+extern test_main_t test_main;
+
+#define __test_funct_fn \
+ static __clib_noinline __clib_noclone __clib_section (".test_func")
+#define __test_perf_fn \
+ static __clib_noinline __clib_noclone __clib_section (".test_perf")
+
+#define REGISTER_TEST(x) \
+ test_registration_t CLIB_MARCH_SFX (__test_##x); \
+ static void __clib_constructor CLIB_MARCH_SFX (__test_registration_##x) ( \
+ void) \
+ { \
+ test_registration_t *r = &CLIB_MARCH_SFX (__test_##x); \
+ r->next = \
+ test_main.registrations[CLIB_MARCH_SFX (CLIB_MARCH_VARIANT_TYPE)]; \
+ test_main.registrations[CLIB_MARCH_SFX (CLIB_MARCH_VARIANT_TYPE)] = r; \
+ } \
+ test_registration_t CLIB_MARCH_SFX (__test_##x)
+
+#define PERF_TESTS(...) \
+ (test_perf_t[]) \
+ { \
+ __VA_ARGS__, {} \
+ }
+
+#ifdef __linux__
+static_always_inline void
+test_perf_event_reset (test_perf_t *t)
+{
+ clib_perfmon_ioctl (t->fd, PERF_EVENT_IOC_RESET);
+}
+static_always_inline void
+test_perf_event_enable (test_perf_t *t)
+{
+ clib_perfmon_ioctl (t->fd, PERF_EVENT_IOC_ENABLE);
+}
+static_always_inline void
+test_perf_event_disable (test_perf_t *t)
+{
+ clib_perfmon_ioctl (t->fd, PERF_EVENT_IOC_DISABLE);
+}
+#elif __FreeBSD__
+static_always_inline void
+test_perf_event_reset (test_perf_t *t)
+{
+ /* TODO: Implement for FreeBSD */
+}
+static_always_inline void
+test_perf_event_enable (test_perf_t *t)
+{
+ /* TODO: Implement for FreeBSD */
+}
+static_always_inline void
+test_perf_event_disable (test_perf_t *t)
+{
+ /* TODO: Implement for FreeBSD */
+}
+#endif
+
+void *test_mem_alloc (uword size);
+void *test_mem_alloc_and_fill_inc_u8 (uword size, u8 start, u8 mask);
+void *test_mem_alloc_and_splat (uword elt_size, uword n_elts, void *elt);
+
+#endif
diff --git a/src/vppinfra/test/toeplitz.c b/src/vppinfra/test/toeplitz.c
new file mode 100644
index 00000000000..708fd0e60fc
--- /dev/null
+++ b/src/vppinfra/test/toeplitz.c
@@ -0,0 +1,514 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/format.h>
+#include <vppinfra/test/test.h>
+#include <vppinfra/vector/toeplitz.h>
+
+/* secret key and test cases taken from:
+ * https://docs.microsoft.com/en-us/windows-hardware/drivers/network/verifying-the-rss-hash-calculation
+ */
+
+typedef struct
+{
+ u32 sip, dip;
+ u16 sport, dport;
+} __clib_packed ip4_key_t;
+
+typedef struct
+{
+ ip4_key_t key;
+ u32 hash_2t, hash_4t;
+} ip4_test_t;
+
+typedef struct
+{
+ u16 sip[8], dip[8];
+ u16 sport, dport;
+} __clib_packed ip6_key_t;
+
+typedef struct
+{
+ ip6_key_t key;
+ u32 hash_2t, hash_4t;
+} ip6_test_t;
+
+#define N_IP4_TESTS 5
+#define N_IP6_TESTS 3
+#define N_LENGTH_TESTS 240
+
+#ifndef CLIB_MARCH_VARIANT
+#define _IP4(a, b, c, d) ((d) << 24 | (c) << 16 | (b) << 8 | (a))
+#define _IP6(a, b, c, d, e, f, g, h) \
+ { \
+ (u16) ((a) << 8) | (u8) ((a) >> 8), (u16) ((b) << 8) | (u8) ((b) >> 8), \
+ (u16) ((c) << 8) | (u8) ((c) >> 8), (u16) ((d) << 8) | (u8) ((d) >> 8), \
+ (u16) ((e) << 8) | (u8) ((e) >> 8), (u16) ((f) << 8) | (u8) ((f) >> 8), \
+ (u16) ((g) << 8) | (u8) ((g) >> 8), (u16) ((h) << 8) | (u8) ((h) >> 8), \
+ }
+#define _PORT(a) ((a) >> 8 | (((a) &0xff) << 8))
+
+const ip4_test_t ip4_tests[N_IP4_TESTS] = {
+ /* ipv4 tests */
+ {
+ .key.sip = _IP4 (66, 9, 149, 187),
+ .key.dip = _IP4 (161, 142, 100, 80),
+ .key.sport = _PORT (2794),
+ .key.dport = _PORT (1766),
+ .hash_2t = 0x323e8fc2,
+ .hash_4t = 0x51ccc178,
+ },
+ {
+ .key.sip = _IP4 (199, 92, 111, 2),
+ .key.dip = _IP4 (65, 69, 140, 83),
+ .key.sport = _PORT (14230),
+ .key.dport = _PORT (4739),
+ .hash_2t = 0xd718262a,
+ .hash_4t = 0xc626b0ea,
+ },
+ {
+ .key.sip = _IP4 (24, 19, 198, 95),
+ .key.dip = _IP4 (12, 22, 207, 184),
+ .key.sport = _PORT (12898),
+ .key.dport = _PORT (38024),
+ .hash_2t = 0xd2d0a5de,
+ .hash_4t = 0x5c2b394a,
+ },
+ {
+ .key.sip = _IP4 (38, 27, 205, 30),
+ .key.dip = _IP4 (209, 142, 163, 6),
+ .key.sport = _PORT (48228),
+ .key.dport = _PORT (2217),
+ .hash_2t = 0x82989176,
+ .hash_4t = 0xafc7327f,
+ },
+ {
+ .key.sip = _IP4 (153, 39, 163, 191),
+ .key.dip = _IP4 (202, 188, 127, 2),
+ .key.sport = _PORT (44251),
+ .key.dport = _PORT (1303),
+ .hash_2t = 0x5d1809c5,
+ .hash_4t = 0x10e828a2,
+ }
+};
+
+const ip6_test_t ip6_tests[N_IP6_TESTS] = {
+ {
+ .key.sip = _IP6 (0x3ffe, 0x2501, 0x200, 0x1fff, 0, 0, 0, 7),
+ .key.dip = _IP6 (0x3ffe, 0x2501, 0x200, 3, 0, 0, 0, 1),
+ .key.sport = _PORT (2794),
+ .key.dport = _PORT (1766),
+ .hash_2t = 0x2cc18cd5,
+ .hash_4t = 0x40207d3d,
+ },
+ {
+ .key.sip = _IP6 (0x3ffe, 0x501, 8, 0, 0x260, 0x97ff, 0xfe40, 0xefab),
+ .key.dip = _IP6 (0xff02, 0, 0, 0, 0, 0, 0, 1),
+ .key.sport = _PORT (14230),
+ .key.dport = _PORT (4739),
+ .hash_2t = 0x0f0c461c,
+ .hash_4t = 0xdde51bbf,
+ },
+ {
+ .key.sip = _IP6 (0x3ffe, 0x1900, 0x4545, 3, 0x200, 0xf8ff, 0xfe21, 0x67cf),
+ .key.dip = _IP6 (0xfe80, 0, 0, 0, 0x200, 0xf8ff, 0xfe21, 0x67cf),
+ .key.sport = _PORT (44251),
+ .key.dport = _PORT (38024),
+ .hash_2t = 0x4b61e985,
+ .hash_4t = 0x02d1feef,
+ }
+};
+
+const u32 length_test_hashes[N_LENGTH_TESTS] = {
+ 0x00000000, 0x00000000, 0x2b6d12ad, 0x9de4446e, 0x061f00bf, 0xad7ed8f7,
+ 0x4bc7b068, 0x231fc545, 0xdbd97a33, 0xcdab29e7, 0x2d665c0c, 0x31e28ed7,
+ 0x14e19218, 0x5aa89f0f, 0xd47de07f, 0x355ec712, 0x7e1cbfc0, 0xf84de19d,
+ 0xbcf66bd3, 0x104086c6, 0x71900b34, 0xcd2f9819, 0xeae68ebb, 0x54d63b4c,
+ 0x5f865a2c, 0x9d6ded08, 0xe00b0912, 0x3fcf07a6, 0x3bd9ca93, 0x3f4f3bbb,
+ 0xd0b82624, 0xa28a08e1, 0xa585969f, 0x0c8f4a71, 0x5dce7bdd, 0x4fcf2a6d,
+ 0x91c89ae9, 0xbef8a24d, 0x8e3d30fe, 0xc8027848, 0xc1e7e513, 0xa12bd3d9,
+ 0x46700bb4, 0xc6339dab, 0x970805ad, 0xfcb50ac8, 0xc6db4f44, 0x792e2987,
+ 0xacfb7836, 0xa25ec529, 0x957d7beb, 0x6732809a, 0x891836ed, 0xeefb83b2,
+ 0xca96b40b, 0x93fd5abd, 0x9076f922, 0x59adb4eb, 0x9705aafb, 0x282719b1,
+ 0xdda9cb8a, 0x3f499131, 0x47491130, 0x30ef0759, 0xad1cf855, 0x428aa312,
+ 0x4200240a, 0x71a72857, 0x16b30c36, 0x10cca9a3, 0x166f091e, 0x30e00560,
+ 0x8acd20ba, 0xfa633d76, 0x0fe32eb7, 0xdcc0122f, 0x20aa8ab0, 0x62b2a9af,
+ 0x7a6c80a6, 0x27e87268, 0x95b797a8, 0x25d18ccd, 0x68a7fb00, 0xc54bcdad,
+ 0x3bd0e717, 0xf0df54c9, 0x780daadf, 0x7b435605, 0x150c1e10, 0x8a892e54,
+ 0x9d27cb25, 0xe23383a5, 0x57aac408, 0x83b8abf8, 0x560f33af, 0xd5cb3307,
+ 0x79ae8edc, 0x9b127665, 0x320f18bd, 0x385d636b, 0xbd1b2dbf, 0x97679888,
+ 0x738894a4, 0xeba2afb0, 0xfa7c2d50, 0xb6741aa1, 0x28922bba, 0x7783242b,
+ 0xa694cca2, 0xa32781c0, 0x696cd670, 0xa714d72f, 0xea34d35a, 0xc5aed81e,
+ 0x0438433a, 0xc1939ab2, 0xb51c123a, 0x121426b9, 0x1add93ba, 0x50c56b6a,
+ 0x7e90902a, 0xae3abd85, 0x2f7a0088, 0xb45cf6f9, 0x80070094, 0x8bd46467,
+ 0xdfd1b762, 0x0bb25856, 0x48eefe84, 0x0989dbb9, 0xfc32472b, 0x965fec6b,
+ 0x5a256bd0, 0x6df7127a, 0x7856d0d6, 0xedc82bd3, 0x1b563b96, 0xc73eace7,
+ 0xba4c0a93, 0xdfd6dd97, 0x923c41db, 0x14926ca6, 0x22e52ab1, 0x22852a66,
+ 0x79606b9c, 0xb0f22b23, 0xb46354ba, 0x9c3cd931, 0x03a92bd6, 0x84000834,
+ 0x5425df65, 0xf4dd3fc9, 0x391cc873, 0xa560b52e, 0x828037d9, 0x31323dd5,
+ 0x5c6e3147, 0x28e21f85, 0xa431eb51, 0xf468c4a3, 0x9bea1d2e, 0x43d9109c,
+ 0x5bb9b081, 0xe0825675, 0xc9c92591, 0xd29fc812, 0x03136bc9, 0x5e005a1f,
+ 0x6d821ed8, 0x3f0bfcc4, 0x24774162, 0x893bde94, 0x6475efea, 0x6711538e,
+ 0xc4755f6d, 0x9425ebe2, 0xacf471b4, 0xb947ab0c, 0x1f78c455, 0x372b3ed7,
+ 0xb3ec24d7, 0x18c4459f, 0xa8ff3695, 0xe4aa2b85, 0x8a52ad7e, 0xe05e8177,
+ 0x7aa348ed, 0x3e4ac6aa, 0x17dcf8a5, 0x93b933b0, 0x8f7413ec, 0xc77bfe61,
+ 0xfdb72874, 0x4370f138, 0xdf3462ad, 0xc8970a59, 0xb4a9fed8, 0xa2ddc39b,
+ 0xd61db62a, 0x95c5fc1b, 0x7b22e6e0, 0x1969702c, 0x7992aebb, 0x59d7c225,
+ 0x0e16db0b, 0x9f2afc21, 0x246cf66b, 0xb3d6569d, 0x29c532d7, 0xe155747a,
+ 0xe38d7872, 0xea704969, 0xb69095b0, 0x1b198efd, 0x55daab76, 0xa2a377b6,
+ 0xb31aa2fa, 0x48b73c41, 0xf0cc501a, 0x9c9ca831, 0x1b591b99, 0xb2d8d22f,
+ 0xab4b5f69, 0x4fe00e71, 0xdf5480bd, 0x982540d7, 0x7f34ea4f, 0xd7be66e1,
+ 0x9d2ab1ba, 0x1ba62e12, 0xee3fb36c, 0xf28d7c5a, 0x756311eb, 0xc68567f2,
+ 0x7b6ea177, 0xc398d9f3
+};
+
+#else
+extern const ip4_test_t ip4_tests[N_IP4_TESTS];
+extern const ip6_test_t ip6_tests[N_IP6_TESTS];
+extern const u32 length_test_hashes[N_LENGTH_TESTS];
+#endif
+
+__test_funct_fn u32
+wrapper (clib_toeplitz_hash_key_t *k, u8 *data, u32 n_bytes)
+{
+ return clib_toeplitz_hash (k, data, n_bytes);
+}
+
+__test_funct_fn void
+wrapper_x4 (clib_toeplitz_hash_key_t *k, u8 *d0, u8 *d1, u8 *d2, u8 *d3,
+ u32 *h0, u32 *h1, u32 *h2, u32 *h3, u32 n_bytes)
+{
+ clib_toeplitz_hash_x4 (k, d0, d1, d2, d3, h0, h1, h2, h3, n_bytes);
+}
+
+static clib_error_t *
+test_clib_toeplitz_hash (clib_error_t *err)
+{
+ u32 r;
+ int n_key_copies, bigkey_len, bigdata_len;
+ u8 *bigkey, *bigdata;
+ clib_toeplitz_hash_key_t *k;
+
+ k = clib_toeplitz_hash_key_init (0, 0);
+
+ for (int i = 0; i < N_IP4_TESTS; i++)
+ {
+ r = wrapper (k, (u8 *) &ip4_tests[i].key, 8);
+ if (ip4_tests[i].hash_2t != r)
+ return clib_error_return (err,
+ "wrong IPv4 2 tuple hash for test %u, "
+ "calculated 0x%08x expected 0x%08x",
+ i, ip4_tests[i].hash_2t, r);
+
+ r = wrapper (k, (u8 *) &ip4_tests[i].key, 12);
+ if (ip4_tests[i].hash_4t != r)
+ return clib_error_return (err,
+ "wrong IPv4 4 tuple hash for test %u, "
+ "calculated 0x%08x expected 0x%08x",
+ i, ip4_tests[i].hash_4t, r);
+ }
+
+ for (int i = 0; i < N_IP6_TESTS; i++)
+ {
+ r = wrapper (k, (u8 *) &ip6_tests[i].key, 32);
+ if (ip6_tests[i].hash_2t != r)
+ return clib_error_return (err,
+ "wrong IPv6 2 tuple hash for test %u, "
+ "calculated 0x%08x expected 0x%08x",
+ i, ip6_tests[i].hash_2t, r);
+
+ r = wrapper (k, (u8 *) &ip6_tests[i].key, 36);
+ if (ip6_tests[i].hash_4t != r)
+ return clib_error_return (err,
+ "wrong IPv6 4 tuple hash for test %u, "
+ "calculated 0x%08x expected 0x%08x",
+ i, ip6_tests[i].hash_4t, r);
+ }
+
+ n_key_copies = 6;
+ bigkey_len = k->key_length * n_key_copies;
+ bigdata_len = bigkey_len - 4;
+ bigkey = test_mem_alloc_and_splat (k->key_length, n_key_copies, k->data);
+ bigdata = test_mem_alloc_and_fill_inc_u8 (bigdata_len, 0, 0);
+ u32 key_len = k->key_length;
+
+ clib_toeplitz_hash_key_free (k);
+ k = clib_toeplitz_hash_key_init (bigkey, n_key_copies * key_len);
+
+ for (int i = 0; i < N_LENGTH_TESTS - 4; i++)
+ {
+ r = wrapper (k, bigdata, i);
+ if (length_test_hashes[i] != r)
+ {
+ err = clib_error_return (err,
+ "wrong length test hash for length %u, "
+ "calculated 0x%08x expected 0x%08x "
+ "xor 0x%08x",
+ i, r, length_test_hashes[i],
+ r ^ length_test_hashes[i]);
+ goto done;
+ }
+ }
+
+done:
+ clib_toeplitz_hash_key_free (k);
+ return err;
+}
+
+void __test_perf_fn
+perftest_fixed_12byte (test_perf_t *tp)
+{
+ u32 n = tp->n_ops;
+ u8 *data = test_mem_alloc_and_splat (12, n, (void *) &ip4_tests[0].key);
+ u8 *res = test_mem_alloc (4 * n);
+ clib_toeplitz_hash_key_t *k = clib_toeplitz_hash_key_init (0, 0);
+
+ test_perf_event_enable (tp);
+ for (int i = 0; i < n; i++)
+ ((u32 *) res)[i] = clib_toeplitz_hash (k, data + i * 12, 12);
+ test_perf_event_disable (tp);
+
+ clib_toeplitz_hash_key_free (k);
+}
+
+void __test_perf_fn
+perftest_fixed_36byte (test_perf_t *tp)
+{
+ u32 n = tp->n_ops;
+ u8 *data = test_mem_alloc_and_splat (36, n, (void *) &ip6_tests[0].key);
+ u8 *res = test_mem_alloc (4 * n);
+ clib_toeplitz_hash_key_t *k = clib_toeplitz_hash_key_init (0, 0);
+
+ test_perf_event_enable (tp);
+ for (int i = 0; i < n; i++)
+ ((u32 *) res)[i] = clib_toeplitz_hash (k, data + i * 36, 36);
+ test_perf_event_disable (tp);
+
+ clib_toeplitz_hash_key_free (k);
+}
+
+void __test_perf_fn
+perftest_variable_size (test_perf_t *tp)
+{
+ u32 key_len, n_keys, n = tp->n_ops;
+ u8 *key, *data = test_mem_alloc (n);
+ u32 *res = test_mem_alloc (sizeof (u32));
+ clib_toeplitz_hash_key_t *k = clib_toeplitz_hash_key_init (0, 0);
+
+ k = clib_toeplitz_hash_key_init (0, 0);
+ key_len = k->key_length;
+ n_keys = ((n + 4) / k->key_length) + 1;
+ key = test_mem_alloc_and_splat (n_keys, key_len, k->data);
+ clib_toeplitz_hash_key_free (k);
+ k = clib_toeplitz_hash_key_init (key, key_len * n_keys);
+
+ test_perf_event_enable (tp);
+ res[0] = clib_toeplitz_hash (k, data, n);
+ test_perf_event_disable (tp);
+
+ clib_toeplitz_hash_key_free (k);
+}
+
+REGISTER_TEST (clib_toeplitz_hash) = {
+ .name = "clib_toeplitz_hash",
+ .fn = test_clib_toeplitz_hash,
+ .perf_tests = PERF_TESTS ({ .name = "fixed (per 12 byte tuple)",
+ .n_ops = 1024,
+ .fn = perftest_fixed_12byte },
+ { .name = "fixed (per 36 byte tuple)",
+ .n_ops = 1024,
+ .fn = perftest_fixed_36byte },
+ { .name = "variable size (per byte)",
+ .n_ops = 16384,
+ .fn = perftest_variable_size }),
+};
+
+static clib_error_t *
+test_clib_toeplitz_hash_x4 (clib_error_t *err)
+{
+ u32 r[4];
+ int n_key_copies, bigkey_len, bigdata_len;
+ u8 *bigkey, *bigdata0, *bigdata1, *bigdata2, *bigdata3;
+ clib_toeplitz_hash_key_t *k;
+
+ k = clib_toeplitz_hash_key_init (0, 0);
+
+ wrapper_x4 (k, (u8 *) &ip4_tests[0].key, (u8 *) &ip4_tests[1].key,
+ (u8 *) &ip4_tests[2].key, (u8 *) &ip4_tests[3].key, r, r + 1,
+ r + 2, r + 3, 8);
+
+ if (ip4_tests[0].hash_2t != r[0] || ip4_tests[1].hash_2t != r[1] ||
+ ip4_tests[2].hash_2t != r[2] || ip4_tests[3].hash_2t != r[3])
+ return clib_error_return (err,
+ "wrong IPv4 2 tuple x4 hash "
+ "calculated { 0x%08x, 0x%08x, 0x%08x, 0x%08x } "
+ "expected { 0x%08x, 0x%08x, 0x%08x, 0x%08x }",
+ ip4_tests[0].hash_2t, ip4_tests[1].hash_2t,
+ ip4_tests[2].hash_2t, ip4_tests[3].hash_2t, r[0],
+ r[1], r[2], r[3]);
+
+ wrapper_x4 (k, (u8 *) &ip4_tests[0].key, (u8 *) &ip4_tests[1].key,
+ (u8 *) &ip4_tests[2].key, (u8 *) &ip4_tests[3].key, r, r + 1,
+ r + 2, r + 3, 12);
+
+ if (ip4_tests[0].hash_4t != r[0] || ip4_tests[1].hash_4t != r[1] ||
+ ip4_tests[2].hash_4t != r[2] || ip4_tests[3].hash_4t != r[3])
+ return clib_error_return (err,
+ "wrong IPv4 4 tuple x4 hash "
+ "calculated { 0x%08x, 0x%08x, 0x%08x, 0x%08x } "
+ "expected { 0x%08x, 0x%08x, 0x%08x, 0x%08x }",
+ ip4_tests[0].hash_4t, ip4_tests[1].hash_4t,
+ ip4_tests[2].hash_4t, ip4_tests[3].hash_4t, r[0],
+ r[1], r[2], r[3]);
+
+ wrapper_x4 (k, (u8 *) &ip6_tests[0].key, (u8 *) &ip6_tests[1].key,
+ (u8 *) &ip6_tests[2].key, (u8 *) &ip6_tests[0].key, r, r + 1,
+ r + 2, r + 3, 32);
+
+ if (ip6_tests[0].hash_2t != r[0] || ip6_tests[1].hash_2t != r[1] ||
+ ip6_tests[2].hash_2t != r[2] || ip6_tests[0].hash_2t != r[3])
+ return clib_error_return (err,
+ "wrong IPv6 2 tuple x4 hash "
+ "calculated { 0x%08x, 0x%08x, 0x%08x, 0x%08x } "
+ "expected { 0x%08x, 0x%08x, 0x%08x, 0x%08x }",
+ ip6_tests[0].hash_2t, ip6_tests[1].hash_2t,
+ ip6_tests[2].hash_2t, ip6_tests[0].hash_2t, r[0],
+ r[1], r[2], r[3]);
+
+ wrapper_x4 (k, (u8 *) &ip6_tests[0].key, (u8 *) &ip6_tests[1].key,
+ (u8 *) &ip6_tests[2].key, (u8 *) &ip6_tests[0].key, r, r + 1,
+ r + 2, r + 3, 36);
+
+ if (ip6_tests[0].hash_4t != r[0] || ip6_tests[1].hash_4t != r[1] ||
+ ip6_tests[2].hash_4t != r[2] || ip6_tests[0].hash_4t != r[3])
+ return clib_error_return (err,
+ "wrong IPv6 4 tuple x4 hash "
+ "calculated { 0x%08x, 0x%08x, 0x%08x, 0x%08x } "
+ "expected { 0x%08x, 0x%08x, 0x%08x, 0x%08x }",
+ ip6_tests[0].hash_4t, ip6_tests[1].hash_4t,
+ ip6_tests[2].hash_4t, ip6_tests[0].hash_4t, r[0],
+ r[1], r[2], r[3]);
+
+ n_key_copies = 6;
+ bigkey_len = k->key_length * n_key_copies;
+ bigdata_len = bigkey_len - 4;
+ bigkey = test_mem_alloc_and_splat (k->key_length, n_key_copies, k->data);
+ bigdata0 = test_mem_alloc_and_fill_inc_u8 (bigdata_len, 0, 0);
+ bigdata1 = test_mem_alloc_and_fill_inc_u8 (bigdata_len, 0, 0);
+ bigdata2 = test_mem_alloc_and_fill_inc_u8 (bigdata_len, 0, 0);
+ bigdata3 = test_mem_alloc_and_fill_inc_u8 (bigdata_len, 0, 0);
+ u32 key_len = k->key_length;
+
+ clib_toeplitz_hash_key_free (k);
+ k = clib_toeplitz_hash_key_init (bigkey, n_key_copies * key_len);
+
+ for (int i = 0; i < N_LENGTH_TESTS - 4; i++)
+ {
+ wrapper_x4 (k, bigdata0, bigdata1, bigdata2, bigdata3, r, r + 1, r + 2,
+ r + 3, i);
+ if (length_test_hashes[i] != r[0] || length_test_hashes[i] != r[1] ||
+ length_test_hashes[i] != r[2] || length_test_hashes[i] != r[3])
+ {
+ err = clib_error_return (
+ err,
+ "wrong length test hash x4 for length %u, "
+ "calculated { 0x%08x, 0x%08x, 0x%08x, 0x%08x }, expected 0x%08x",
+ i, r[0], r[1], r[2], r[3], length_test_hashes[i]);
+ goto done;
+ }
+ }
+
+done:
+ clib_toeplitz_hash_key_free (k);
+ return err;
+}
+
+void __test_perf_fn
+perftest_fixed_12byte_x4 (test_perf_t *tp)
+{
+ u32 n = tp->n_ops / 4;
+ u8 *d0 = test_mem_alloc_and_splat (12, n, (void *) &ip4_tests[0].key);
+ u8 *d1 = test_mem_alloc_and_splat (12, n, (void *) &ip4_tests[1].key);
+ u8 *d2 = test_mem_alloc_and_splat (12, n, (void *) &ip4_tests[2].key);
+ u8 *d3 = test_mem_alloc_and_splat (12, n, (void *) &ip4_tests[3].key);
+ u32 *h0 = test_mem_alloc (4 * n);
+ u32 *h1 = test_mem_alloc (4 * n);
+ u32 *h2 = test_mem_alloc (4 * n);
+ u32 *h3 = test_mem_alloc (4 * n);
+ clib_toeplitz_hash_key_t *k = clib_toeplitz_hash_key_init (0, 0);
+
+ test_perf_event_enable (tp);
+ for (int i = 0; i < n; i++)
+ clib_toeplitz_hash_x4 (k, d0 + i * 12, d1 + i * 12, d2 + i * 12,
+ d3 + i * 12, h0 + i, h1 + i, h2 + i, h3 + i, 12);
+ test_perf_event_disable (tp);
+
+ clib_toeplitz_hash_key_free (k);
+}
+
+void __test_perf_fn
+perftest_fixed_36byte_x4 (test_perf_t *tp)
+{
+ u32 n = tp->n_ops / 4;
+ u8 *d0 = test_mem_alloc_and_splat (36, n, (void *) &ip4_tests[0].key);
+ u8 *d1 = test_mem_alloc_and_splat (36, n, (void *) &ip4_tests[1].key);
+ u8 *d2 = test_mem_alloc_and_splat (36, n, (void *) &ip4_tests[2].key);
+ u8 *d3 = test_mem_alloc_and_splat (36, n, (void *) &ip4_tests[3].key);
+ u32 *h0 = test_mem_alloc (4 * n);
+ u32 *h1 = test_mem_alloc (4 * n);
+ u32 *h2 = test_mem_alloc (4 * n);
+ u32 *h3 = test_mem_alloc (4 * n);
+ clib_toeplitz_hash_key_t *k = clib_toeplitz_hash_key_init (0, 0);
+
+ test_perf_event_enable (tp);
+ for (int i = 0; i < n; i++)
+ clib_toeplitz_hash_x4 (k, d0 + i * 36, d1 + i * 36, d2 + i * 36,
+ d3 + i * 36, h0 + i, h1 + i, h2 + i, h3 + i, 36);
+ test_perf_event_disable (tp);
+
+ clib_toeplitz_hash_key_free (k);
+}
+
+void __test_perf_fn
+perftest_variable_size_x4 (test_perf_t *tp)
+{
+ u32 key_len, n_keys, n = tp->n_ops / 4;
+ u8 *key;
+ u8 *d0 = test_mem_alloc (n);
+ u8 *d1 = test_mem_alloc (n);
+ u8 *d2 = test_mem_alloc (n);
+ u8 *d3 = test_mem_alloc (n);
+ u32 *h0 = test_mem_alloc (sizeof (u32));
+ u32 *h1 = test_mem_alloc (sizeof (u32));
+ u32 *h2 = test_mem_alloc (sizeof (u32));
+ u32 *h3 = test_mem_alloc (sizeof (u32));
+ clib_toeplitz_hash_key_t *k = clib_toeplitz_hash_key_init (0, 0);
+
+ k = clib_toeplitz_hash_key_init (0, 0);
+ key_len = k->key_length;
+ n_keys = ((n + 4) / k->key_length) + 1;
+ key = test_mem_alloc_and_splat (n_keys, key_len, k->data);
+ clib_toeplitz_hash_key_free (k);
+ k = clib_toeplitz_hash_key_init (key, key_len * n_keys);
+
+ test_perf_event_enable (tp);
+ clib_toeplitz_hash_x4 (k, d0, d1, d2, d3, h0, h1, h2, h3, n);
+ test_perf_event_disable (tp);
+
+ clib_toeplitz_hash_key_free (k);
+}
+
+REGISTER_TEST (clib_toeplitz_hash_x4) = {
+ .name = "clib_toeplitz_hash_x4",
+ .fn = test_clib_toeplitz_hash_x4,
+ .perf_tests = PERF_TESTS ({ .name = "fixed (per 12 byte tuple)",
+ .n_ops = 1024,
+ .fn = perftest_fixed_12byte_x4 },
+ { .name = "fixed (per 36 byte tuple)",
+ .n_ops = 1024,
+ .fn = perftest_fixed_36byte_x4 },
+ { .name = "variable size (per byte)",
+ .n_ops = 16384,
+ .fn = perftest_variable_size_x4 }),
+};
diff --git a/src/vppinfra/test_bihash_template.c b/src/vppinfra/test_bihash_template.c
index ffed5c73287..17cc05629ae 100644
--- a/src/vppinfra/test_bihash_template.c
+++ b/src/vppinfra/test_bihash_template.c
@@ -247,6 +247,59 @@ test_bihash_threads (test_main_t * tm)
return 0;
}
+static clib_error_t *
+test_bihash_vanilla_overwrite (test_main_t *tm)
+{
+ int i;
+ BVT (clib_bihash) * h;
+ BVT (clib_bihash_kv) kv;
+
+ h = &tm->hash;
+
+#if BIHASH_32_64_SVM
+ BV (clib_bihash_initiator_init_svm)
+ (h, "test", tm->nbuckets, 0x30000000 /* base_addr */, tm->hash_memory_size);
+#else
+ BV (clib_bihash_init) (h, "test", tm->nbuckets, tm->hash_memory_size);
+#endif
+
+ for (i = 0; i < 100; i++)
+ {
+ kv.key = 12345;
+ kv.value = i;
+
+ BV (clib_bihash_add_del) (h, &kv, 1 /* is_add */);
+ }
+
+ fformat (stdout, "End of run, should one item...\n");
+ fformat (stdout, "%U", BV (format_bihash), h, 0 /* very verbose */);
+ BV (clib_bihash_free) (h);
+ return 0;
+}
+
+static clib_error_t *
+test_bihash_value_assert (test_main_t *tm)
+{
+ BVT (clib_bihash) * h;
+ BVT (clib_bihash_kv) kv;
+
+ h = &tm->hash;
+
+#if BIHASH_32_64_SVM
+ BV (clib_bihash_initiator_init_svm)
+ (h, "test", tm->nbuckets, 0x30000000 /* base_addr */, tm->hash_memory_size);
+#else
+ BV (clib_bihash_init) (h, "test", tm->nbuckets, tm->hash_memory_size);
+#endif
+
+ kv.key = 12345;
+ kv.value = 0xFEEDFACE8BADF00DULL;
+
+ fformat (stderr, "The following add should ASSERT...\n");
+ BV (clib_bihash_add_del) (h, &kv, 1 /* is_add */);
+
+ return 0;
+}
static clib_error_t *
test_bihash (test_main_t * tm)
@@ -338,7 +391,7 @@ test_bihash (test_main_t * tm)
for (i = 0; i < tm->nitems; i++)
{
/* Prefetch buckets 8 iterations ahead */
- if (1 && (i < (tm->nitems - 8)))
+ if (1 && (i < ((i64) tm->nitems - 8)))
{
BVT (clib_bihash_kv) pref_kv;
u64 pref_hash;
@@ -422,7 +475,7 @@ test_bihash (test_main_t * tm)
for (j = 0; j < tm->nitems; j++)
{
/* Prefetch buckets 8 iterations ahead */
- if (1 && (j < (tm->nitems - 8)))
+ if (1 && (j < ((i64) tm->nitems - 8)))
{
BVT (clib_bihash_kv) pref_kv;
u64 pref_hash;
@@ -514,6 +567,10 @@ test_bihash_main (test_main_t * tm)
tm->verbose = 1;
else if (unformat (i, "stale-overwrite"))
which = 3;
+ else if (unformat (i, "overwrite"))
+ which = 4;
+ else if (unformat (i, "value-assert"))
+ which = 5;
else
return clib_error_return (0, "unknown input '%U'",
format_unformat_error, i);
@@ -522,8 +579,7 @@ test_bihash_main (test_main_t * tm)
/* Preallocate hash table, key vector */
tm->key_hash = hash_create (tm->nitems, sizeof (uword));
vec_validate (tm->keys, tm->nitems - 1);
- _vec_len (tm->keys) = 0;
-
+ vec_set_len (tm->keys, 0);
switch (which)
{
@@ -543,6 +599,14 @@ test_bihash_main (test_main_t * tm)
error = test_bihash_stale_overwrite (tm);
break;
+ case 4:
+ error = test_bihash_vanilla_overwrite (tm);
+ break;
+
+ case 5:
+ error = test_bihash_value_assert (tm);
+ break;
+
default:
return clib_error_return (0, "no such test?");
}
diff --git a/src/vppinfra/test_fifo.c b/src/vppinfra/test_fifo.c
index 45392bc35eb..2d3cad33119 100644
--- a/src/vppinfra/test_fifo.c
+++ b/src/vppinfra/test_fifo.c
@@ -105,12 +105,10 @@ test_fifo_main (unformat_input_t * input)
ASSERT (clib_fifo_elts (as) == n_added - n_removed);
j = 0;
- /* *INDENT-OFF* */
clib_fifo_foreach (a, as, {
ASSERT (A_is_valid (a, n_removed + j));
j++;
});
- /* *INDENT-ON* */
ASSERT (j == clib_fifo_elts (as));
}
diff --git a/src/vppinfra/test_fpool.c b/src/vppinfra/test_fpool.c
index e2d67f16907..02d9d219717 100644
--- a/src/vppinfra/test_fpool.c
+++ b/src/vppinfra/test_fpool.c
@@ -30,7 +30,7 @@ main (int argc, char *argv[])
clib_mem_init (0, 3ULL << 30);
vec_validate (indices, NELTS - 1);
- _vec_len (indices) = 0;
+ vec_set_len (indices, 0);
pool_init_fixed (tp, NELTS);
diff --git a/src/vppinfra/test_hash.c b/src/vppinfra/test_hash.c
index 95ced448c13..25adff3443b 100644
--- a/src/vppinfra/test_hash.c
+++ b/src/vppinfra/test_hash.c
@@ -86,14 +86,12 @@ hash_next_test (word * h)
hash_pair_t *p0, *p1;
clib_error_t *error = 0;
- /* *INDENT-OFF* */
hash_foreach_pair (p0, h, {
p1 = hash_next (h, &hn);
error = CLIB_ERROR_ASSERT (p0 == p1);
if (error)
break;
});
- /* *INDENT-ON* */
if (!error)
error = CLIB_ERROR_ASSERT (!hash_next (h, &hn));
@@ -176,12 +174,10 @@ test_word_key (hash_test_t * ht)
hash_pair_t *p;
uword ki;
- /* *INDENT-OFF* */
hash_foreach_pair (p, h, {
ki = p->value[0];
ASSERT (keys[ki] == p->key);
});
- /* *INDENT-ON* */
}
if ((error = hash_validate (h)))
diff --git a/src/vppinfra/test_heap.c b/src/vppinfra/test_heap.c
index 0fd6bf74245..da3ad24a820 100644
--- a/src/vppinfra/test_heap.c
+++ b/src/vppinfra/test_heap.c
@@ -61,14 +61,13 @@ main (int argc, char *argv[])
uword *objects = 0;
uword *handles = 0;
uword objects_used;
- uword align, fixed_size;
+ uword align;
clib_mem_init (0, 10 << 20);
n = 10;
seed = (u32) getpid ();
check_mask = 0;
- fixed_size = 0;
if (argc > 1)
{
@@ -100,13 +99,6 @@ main (int argc, char *argv[])
objects_used = 0;
- if (fixed_size)
- {
- uword max_len = 1024 * 1024;
- void *memory = clib_mem_alloc (max_len * sizeof (h[0]));
- h = heap_create_from_memory (memory, max_len, sizeof (h[0]));
- }
-
for (i = 0; i < n; i++)
{
while (1)
@@ -188,9 +180,6 @@ main (int argc, char *argv[])
vec_free (objects);
vec_free (handles);
- if (fixed_size)
- vec_free_h (h, sizeof (heap_header_t));
-
if (verbose)
fformat (stderr, "%U\n", format_clib_mem_usage, /* verbose */ 0);
diff --git a/src/vppinfra/test_interrupt.c b/src/vppinfra/test_interrupt.c
new file mode 100644
index 00000000000..133692d1bd0
--- /dev/null
+++ b/src/vppinfra/test_interrupt.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Graphiant, Inc.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/clib.h>
+#include <vppinfra/format.h>
+#include <vppinfra/error.h>
+#include <vppinfra/random.h>
+#include <vppinfra/time.h>
+#include <vppinfra/interrupt.h>
+
+#define MAX_INTS 2048
+
+int debug = 0;
+
+#define debug(format, args...) \
+ if (debug) \
+ { \
+ fformat (stdout, format, ##args); \
+ }
+
+void
+set_and_check_bits (void *interrupts, int num_ints)
+{
+ for (int step = 1; step < num_ints; step++)
+ {
+ int int_num = -1;
+ int expected = 0;
+
+ debug (" Step of %d\n", step);
+ for (int i = 0; i < num_ints; i += step)
+ {
+ debug (" Setting %d\n", i);
+ clib_interrupt_set (interrupts, i);
+ }
+
+ while ((int_num =
+ clib_interrupt_get_next_and_clear (interrupts, int_num)) != -1)
+ {
+ debug (" Got %d, expecting %d\n", int_num, expected);
+ ASSERT (int_num == expected);
+ expected += step;
+ }
+ int_num = clib_interrupt_get_next_and_clear (interrupts, -1);
+ ASSERT (int_num == -1);
+ }
+}
+
+int
+main (int argc, char *argv[])
+{
+ clib_mem_init (0, 3ULL << 30);
+
+ debug = (argc > 1);
+
+ void *interrupts = NULL;
+
+ for (int num_ints = 0; num_ints < MAX_INTS; num_ints++)
+ {
+ clib_interrupt_resize (&interrupts, num_ints);
+ debug ("Size now %d\n", num_ints);
+
+ set_and_check_bits (interrupts, num_ints);
+ }
+
+ return 0;
+}
diff --git a/src/vppinfra/test_longjmp.c b/src/vppinfra/test_longjmp.c
index 01debe2ac37..50dc24b48b0 100644
--- a/src/vppinfra/test_longjmp.c
+++ b/src/vppinfra/test_longjmp.c
@@ -82,27 +82,25 @@ test_longjmp_main (unformat_input_t * input)
static uword
f3 (uword arg)
{
- uword i, j, array[10];
-
- for (i = 0; i < ARRAY_LEN (array); i++)
- array[i] = arg + i;
-
- j = 0;
- for (i = 0; i < ARRAY_LEN (array); i++)
- j ^= array[i];
-
- return j;
+ return (uword) __builtin_frame_address (0);
}
static void
test_calljmp (unformat_input_t * input)
{
- static u8 stack[32 * 1024] __attribute__ ((aligned (16)));
- uword v;
+ u8 stack[4096] __attribute__ ((aligned (16))) = {};
+ uword start, end, v;
+
+ start = pointer_to_uword (stack);
+ end = start + ARRAY_LEN (stack);
+
+ v = f3 (0);
+ if (!(v < start || v > end))
+ clib_panic ("something went wrong in the calljmp test");
v = clib_calljmp (f3, 0, stack + sizeof (stack));
- ASSERT (v == f3 (0));
- if_verbose ("calljump ok");
+ if_verbose ("calljump %s",
+ v >= start && v < (end - sizeof (uword)) ? "ok" : "fail");
}
#ifdef CLIB_UNIX
diff --git a/src/vppinfra/test_mheap.c b/src/vppinfra/test_mheap.c
deleted file mode 100644
index ae0c58a6a74..00000000000
--- a/src/vppinfra/test_mheap.c
+++ /dev/null
@@ -1,286 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/*
- Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-#ifdef CLIB_LINUX_KERNEL
-#include <linux/unistd.h>
-#endif
-
-#ifdef CLIB_UNIX
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdio.h> /* scanf */
-#endif
-
-#include <vppinfra/format.h>
-#include <vppinfra/random.h>
-#include <vppinfra/time.h>
-
-static int verbose = 0;
-#define if_verbose(format,args...) \
- if (verbose) { clib_warning(format, ## args); }
-
-int
-test1 (void)
-{
- clib_time_t clib_time;
- void *h_mem = clib_mem_alloc (2ULL << 30);
- void *h;
- uword *objects = 0;
- int i;
- f64 before, after;
-
- clib_time_init (&clib_time);
-
- vec_validate (objects, 2000000 - 1);
-
- h = mheap_alloc (h_mem, (uword) (2 << 30));
-
- before = clib_time_now (&clib_time);
-
- for (i = 0; i < vec_len (objects); i++)
- {
- h = mheap_get_aligned (h, 24 /* size */ ,
- 64 /* align */ ,
- 16 /* align at offset */ , &objects[i]);
- }
-
- after = clib_time_now (&clib_time);
-
- fformat (stdout, "alloc: %u objects in %.2f seconds, %.2f objects/second\n",
- vec_len (objects), (after - before),
- ((f64) vec_len (objects)) / (after - before));
-
- return 0;
-}
-
-
-int
-test_mheap_main (unformat_input_t * input)
-{
- int i, j, k, n_iterations;
- void *h, *h_mem;
- uword *objects = 0;
- u32 objects_used, really_verbose, n_objects, max_object_size;
- u32 check_mask, seed, trace, use_vm;
- u32 print_every = 0;
- u32 *data;
- mheap_t *mh;
-
- /* Validation flags. */
- check_mask = 0;
-#define CHECK_VALIDITY 1
-#define CHECK_DATA 2
-#define CHECK_ALIGN 4
-#define TEST1 8
-
- n_iterations = 10;
- seed = 0;
- max_object_size = 100;
- n_objects = 1000;
- trace = 0;
- really_verbose = 0;
- use_vm = 0;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (0 == unformat (input, "iter %d", &n_iterations)
- && 0 == unformat (input, "count %d", &n_objects)
- && 0 == unformat (input, "size %d", &max_object_size)
- && 0 == unformat (input, "seed %d", &seed)
- && 0 == unformat (input, "print %d", &print_every)
- && 0 == unformat (input, "validdata %|",
- &check_mask, CHECK_DATA | CHECK_VALIDITY)
- && 0 == unformat (input, "valid %|",
- &check_mask, CHECK_VALIDITY)
- && 0 == unformat (input, "verbose %=", &really_verbose, 1)
- && 0 == unformat (input, "trace %=", &trace, 1)
- && 0 == unformat (input, "vm %=", &use_vm, 1)
- && 0 == unformat (input, "align %|", &check_mask, CHECK_ALIGN)
- && 0 == unformat (input, "test1 %|", &check_mask, TEST1))
- {
- clib_warning ("unknown input `%U'", format_unformat_error, input);
- return 1;
- }
- }
-
- /* Zero seed means use default. */
- if (!seed)
- seed = random_default_seed ();
-
- if (check_mask & TEST1)
- {
- return test1 ();
- }
-
- if_verbose
- ("testing %d iterations, %d %saligned objects, max. size %d, seed %d",
- n_iterations, n_objects, (check_mask & CHECK_ALIGN) ? "randomly " : "un",
- max_object_size, seed);
-
- vec_resize (objects, n_objects);
- if (vec_bytes (objects) > 0) /* stupid warning be gone */
- clib_memset (objects, ~0, vec_bytes (objects));
- objects_used = 0;
-
- /* Allocate initial heap. */
- {
- uword size =
- max_pow2 (2 * n_objects * max_object_size * sizeof (data[0]));
-
- h_mem = clib_mem_alloc (size);
- if (!h_mem)
- return 0;
-
- h = mheap_alloc (h_mem, size);
- }
-
- if (trace)
- mheap_trace (h, trace);
-
- mh = mheap_header (h);
-
- if (use_vm)
- mh->flags &= ~MHEAP_FLAG_DISABLE_VM;
- else
- mh->flags |= MHEAP_FLAG_DISABLE_VM;
-
- if (check_mask & CHECK_VALIDITY)
- mh->flags |= MHEAP_FLAG_VALIDATE;
-
- for (i = 0; i < n_iterations; i++)
- {
- while (1)
- {
- j = random_u32 (&seed) % vec_len (objects);
- if (objects[j] != ~0 || i + objects_used < n_iterations)
- break;
- }
-
- if (objects[j] != ~0)
- {
- mheap_put (h, objects[j]);
- objects_used--;
- objects[j] = ~0;
- }
- else
- {
- uword size, align, align_offset;
-
- size = (random_u32 (&seed) % max_object_size) * sizeof (data[0]);
- align = align_offset = 0;
- if (check_mask & CHECK_ALIGN)
- {
- align = 1 << (random_u32 (&seed) % 10);
- align_offset = round_pow2 (random_u32 (&seed) & (align - 1),
- sizeof (u32));
- }
-
- h = mheap_get_aligned (h, size, align, align_offset, &objects[j]);
-
- if (align > 0)
- ASSERT (0 == ((objects[j] + align_offset) & (align - 1)));
-
- ASSERT (objects[j] != ~0);
- objects_used++;
-
- /* Set newly allocated object with test data. */
- if (check_mask & CHECK_DATA)
- {
- uword len;
-
- data = (void *) h + objects[j];
- len = mheap_len (h, data);
-
- ASSERT (size <= mheap_data_bytes (h, objects[j]));
-
- data[0] = len;
- for (k = 1; k < len; k++)
- data[k] = objects[j] + k;
- }
- }
-
- /* Verify that all used objects have correct test data. */
- if (check_mask & 2)
- {
- for (j = 0; j < vec_len (objects); j++)
- if (objects[j] != ~0)
- {
- u32 *data = h + objects[j];
- uword len = data[0];
- for (k = 1; k < len; k++)
- ASSERT (data[k] == objects[j] + k);
- }
- }
- if (print_every != 0 && i > 0 && (i % print_every) == 0)
- fformat (stderr, "iteration %d: %U\n", i, format_mheap, h,
- really_verbose);
- }
-
- if (verbose)
- fformat (stderr, "%U\n", format_mheap, h, really_verbose);
- mheap_free (h);
- clib_mem_free (h_mem);
- vec_free (objects);
-
- return 0;
-}
-
-#ifdef CLIB_UNIX
-int
-main (int argc, char *argv[])
-{
- unformat_input_t i;
- int ret;
-
- clib_mem_init (0, 3ULL << 30);
-
- verbose = (argc > 1);
- unformat_init_command_line (&i, argv);
- ret = test_mheap_main (&i);
- unformat_free (&i);
-
- return ret;
-}
-#endif /* CLIB_UNIX */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/test_pool_alloc.c b/src/vppinfra/test_pool_alloc.c
new file mode 100644
index 00000000000..57b78b8ad9e
--- /dev/null
+++ b/src/vppinfra/test_pool_alloc.c
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2023 Yandex LLC.
+ */
+
+#include <vppinfra/pool.h>
+
+/* can be a very large size */
+#define NELTS 1024
+
+int
+main (int argc, char *argv[])
+{
+ u32 *junk = 0;
+ int i;
+ u32 *tp = 0;
+ u32 *indices = 0;
+
+ clib_mem_init (0, 3ULL << 30);
+
+ vec_validate (indices, NELTS - 1);
+ vec_set_len (indices, 0);
+
+ /* zero size allocation is ok */
+ pool_alloc (tp, 0);
+
+ fformat (stdout, "%d pool elts of empty pool\n", pool_elts (tp));
+
+ pool_validate (tp);
+
+ pool_alloc (tp, NELTS);
+
+ for (i = 0; i < NELTS; i++)
+ {
+ pool_get (tp, junk);
+ vec_add1 (indices, junk - tp);
+ *junk = i;
+ }
+
+ for (i = 0; i < NELTS; i++)
+ {
+ junk = pool_elt_at_index (tp, indices[i]);
+ ASSERT (*junk == i);
+ }
+
+ fformat (stdout, "%d pool elts before deletes\n", pool_elts (tp));
+
+ pool_put_index (tp, indices[12]);
+ pool_put_index (tp, indices[43]);
+
+ fformat (stdout, "%d pool elts after deletes\n", pool_elts (tp));
+
+ pool_validate (tp);
+
+ pool_free (tp);
+ return 0;
+}
diff --git a/src/vppinfra/test_pool_iterate.c b/src/vppinfra/test_pool_iterate.c
index bcbd235ba71..fc4be6d6fe1 100644
--- a/src/vppinfra/test_pool_iterate.c
+++ b/src/vppinfra/test_pool_iterate.c
@@ -77,7 +77,6 @@ main (int argc, char *argv[])
}
while (next != ~0);
- /* *INDENT-OFF* */
pool_foreach (junk, tp)
{
int is_free;
@@ -94,7 +93,6 @@ main (int argc, char *argv[])
clib_warning ("oops, busy index %d reported free", i);
}
}
- /* *INDENT-ON* */
return 0;
}
diff --git a/src/vppinfra/test_serialize.c b/src/vppinfra/test_serialize.c
index 5c931b76023..0dcff031364 100644
--- a/src/vppinfra/test_serialize.c
+++ b/src/vppinfra/test_serialize.c
@@ -136,6 +136,46 @@ typedef struct
serialize_main_t unserialize_main;
} test_serialize_main_t;
+u8 *test_pattern;
+
+int
+vl (void *p)
+{
+ return vec_len (p);
+}
+
+void
+test_serialize_not_inline_double_vector_expand (void)
+{
+ serialize_main_t _m, *m = &_m;
+ u8 *serialized = 0;
+ u64 *magic;
+ void *p;
+ int i;
+
+ vec_validate (test_pattern, 1023);
+
+ for (i = 0; i < vec_len (test_pattern); i++)
+ test_pattern[i] = i & 0xff;
+
+ serialize_open_vector (m, serialized);
+ p = serialize_get (m, 61);
+ clib_memcpy_fast (p, test_pattern, 61);
+ serialize_integer (m, 0xDEADBEEFFEEDFACEULL, 8);
+ p = serialize_get (m, vec_len (test_pattern) - 62);
+ clib_memcpy_fast (p, test_pattern + 61, vec_len (test_pattern) - 62);
+ serialized = serialize_close_vector (m);
+
+ magic = (u64 *) (serialized + 61);
+
+ if (*magic != clib_net_to_host_u64 (0xDEADBEEFFEEDFACEULL))
+ {
+ fformat (stderr, "BUG!\n");
+ exit (1);
+ }
+ return;
+}
+
int
test_serialize_main (unformat_input_t * input)
{
@@ -168,6 +208,12 @@ test_serialize_main (unformat_input_t * input)
;
else if (unformat (input, "verbose %=", &tm->verbose, 1))
;
+ else if (unformat (input, "double-expand"))
+ {
+ test_serialize_not_inline_double_vector_expand ();
+ clib_warning ("serialize_not_inline double vector expand OK");
+ exit (0);
+ }
else
{
error = clib_error_create ("unknown input `%U'\n",
diff --git a/src/vppinfra/test_socket.c b/src/vppinfra/test_socket.c
index ea0ae658943..3a0e6b29ce6 100644
--- a/src/vppinfra/test_socket.c
+++ b/src/vppinfra/test_socket.c
@@ -99,7 +99,7 @@ test_socket_main (unformat_input_t * input)
break;
if_verbose ("%v", s->rx_buffer);
- _vec_len (s->rx_buffer) = 0;
+ vec_set_len (s->rx_buffer, 0);
}
error = clib_socket_close (s);
diff --git a/src/vppinfra/test_tw_timer.c b/src/vppinfra/test_tw_timer.c
index 47e5e49bf1f..e9f4251a7b1 100644
--- a/src/vppinfra/test_tw_timer.c
+++ b/src/vppinfra/test_tw_timer.c
@@ -316,7 +316,6 @@ test2_single (tw_timer_test_main_t * tm)
j = 0;
vec_reset_length (deleted_indices);
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
tw_timer_stop_2t_1w_2048sl (&tm->single_wheel, e->stop_timer_handle);
@@ -324,7 +323,6 @@ test2_single (tw_timer_test_main_t * tm)
if (++j >= tm->ntimers / 4)
goto del_and_re_add;
}
- /* *INDENT-ON* */
del_and_re_add:
for (j = 0; j < vec_len (deleted_indices); j++)
@@ -374,14 +372,12 @@ test2_single (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat (stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
pool_free (tm->test_elts);
tw_timer_wheel_free_2t_1w_2048sl (&tm->single_wheel);
@@ -455,7 +451,6 @@ test2_double (tw_timer_test_main_t * tm)
j = 0;
vec_reset_length (deleted_indices);
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
tw_timer_stop_16t_2w_512sl (&tm->double_wheel, e->stop_timer_handle);
@@ -463,7 +458,6 @@ test2_double (tw_timer_test_main_t * tm)
if (++j >= tm->ntimers / 4)
goto del_and_re_add;
}
- /* *INDENT-ON* */
del_and_re_add:
for (j = 0; j < vec_len (deleted_indices); j++)
@@ -512,14 +506,12 @@ test2_double (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat (stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
pool_free (tm->test_elts);
tw_timer_wheel_free_16t_2w_512sl (&tm->double_wheel);
@@ -590,7 +582,6 @@ test2_double_updates (tw_timer_test_main_t * tm)
j = 0;
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
expiration_time = get_expiration_time (tm);
@@ -602,7 +593,6 @@ test2_double_updates (tw_timer_test_main_t * tm)
if (++j >= tm->ntimers / 4)
goto done;
}
- /* *INDENT-ON* */
done:
updates += j;
@@ -623,14 +613,12 @@ test2_double_updates (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat (stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
pool_free (tm->test_elts);
tw_timer_wheel_free_16t_2w_512sl (&tm->double_wheel);
@@ -706,7 +694,6 @@ test2_triple (tw_timer_test_main_t * tm)
j = 0;
vec_reset_length (deleted_indices);
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
tw_timer_stop_4t_3w_256sl (&tm->triple_wheel, e->stop_timer_handle);
@@ -714,7 +701,6 @@ test2_triple (tw_timer_test_main_t * tm)
if (++j >= tm->ntimers / 4)
goto del_and_re_add;
}
- /* *INDENT-ON* */
del_and_re_add:
for (j = 0; j < vec_len (deleted_indices); j++)
@@ -763,14 +749,12 @@ test2_triple (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat (stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
pool_free (tm->test_elts);
tw_timer_wheel_free_4t_3w_256sl (&tm->triple_wheel);
@@ -846,7 +830,6 @@ test2_triple_ov (tw_timer_test_main_t * tm)
j = 0;
vec_reset_length (deleted_indices);
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
tw_timer_stop_1t_3w_1024sl_ov (&tm->triple_ov_wheel,
@@ -855,7 +838,6 @@ test2_triple_ov (tw_timer_test_main_t * tm)
if (++j >= tm->ntimers / 4)
goto del_and_re_add;
}
- /* *INDENT-ON* */
del_and_re_add:
for (j = 0; j < vec_len (deleted_indices); j++)
@@ -904,7 +886,6 @@ test2_triple_ov (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
TWT (tw_timer) * t;
@@ -915,7 +896,6 @@ test2_triple_ov (tw_timer_test_main_t * tm)
t = pool_elt_at_index (tm->triple_ov_wheel.timers, e->stop_timer_handle);
fformat (stdout, " expiration_time %lld\n", t->expiration_time);
}
- /* *INDENT-ON* */
pool_free (tm->test_elts);
tw_timer_wheel_free_1t_3w_1024sl_ov (&tm->triple_ov_wheel);
@@ -972,14 +952,12 @@ test1_single (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat(stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
fformat (stdout,
"final wheel time %d, fast index %d\n",
@@ -1030,14 +1008,12 @@ test1_double (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat(stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
fformat (stdout,
"final wheel time %d, fast index %d\n",
@@ -1088,14 +1064,12 @@ test1_two_timer_double (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat(stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
fformat (stdout,
"final wheel time %d, fast index %d\n",
@@ -1168,14 +1142,12 @@ test3_triple_double (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat (stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
pool_free (tm->test_elts);
tw_timer_wheel_free_4t_3w_256sl (&tm->triple_wheel);
@@ -1252,14 +1224,12 @@ test4_double_double (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat (stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
pool_free (tm->test_elts);
tw_timer_wheel_free_16t_2w_512sl (&tm->double_wheel);
@@ -1336,14 +1306,12 @@ test5_double (tw_timer_test_main_t * tm)
fformat (stdout, "Note: %d elements remain in pool\n",
pool_elts (tm->test_elts));
- /* *INDENT-OFF* */
pool_foreach (e, tm->test_elts)
{
fformat (stdout, "[%d] expected to expire %d\n",
e - tm->test_elts,
e->expected_to_expire);
}
- /* *INDENT-ON* */
pool_free (tm->test_elts);
tw_timer_wheel_free_16t_2w_512sl (&tm->double_wheel);
diff --git a/src/vppinfra/test_vec.c b/src/vppinfra/test_vec.c
index c6f97fb984d..9f336a0a095 100644
--- a/src/vppinfra/test_vec.c
+++ b/src/vppinfra/test_vec.c
@@ -207,33 +207,35 @@ dump_call_stats (uword * stats)
more sensible value later. */
#define MAX_VEC_LEN 10
-#define create_random_vec_wh(elt_type, len, hdr_bytes, seed) \
-({ \
- elt_type * _v(v) = NULL; \
- uword _v(l) = (len); \
- uword _v(h) = (hdr_bytes); \
- u8 * _v(hdr); \
- \
- if (_v(l) == 0) \
- goto __done__; \
- \
- /* ~0 means select random length between 0 and MAX_VEC_LEN. */ \
- if (_v(l) == ~0) \
- _v(l) = bounded_random_u32 (&(seed), 0, MAX_VEC_LEN); \
- \
- _v(v) = _vec_resize (NULL, _v(l), _v(l) * sizeof (elt_type), _v(h), 0); \
- fill_with_random_data (_v(v), vec_bytes (_v(v)), (seed)); \
- \
- /* Fill header with random data as well. */ \
- if (_v(h) > 0) \
- { \
- _v(hdr) = vec_header (_v(v), _v(h)); \
- fill_with_random_data (_v(hdr), _v(h), (seed)); \
- } \
- \
-__done__: \
- _v(v); \
-})
+#define create_random_vec_wh(elt_type, len, hdr_bytes, seed) \
+ ({ \
+ elt_type *_v (v) = NULL; \
+ uword _v (l) = (len); \
+ vec_attr_t _v (attr) = { .hdr_sz = (hdr_bytes), \
+ .elt_sz = sizeof (elt_type) }; \
+ uword _v (h) = (hdr_bytes); \
+ u8 *_v (hdr); \
+ \
+ if (_v (l) == 0) \
+ goto __done__; \
+ \
+ /* ~0 means select random length between 0 and MAX_VEC_LEN. */ \
+ if (_v (l) == ~0) \
+ _v (l) = bounded_random_u32 (&(seed), 0, MAX_VEC_LEN); \
+ \
+ _v (v) = _vec_alloc_internal (_v (l), &_v (attr)); \
+ fill_with_random_data (_v (v), vec_bytes (_v (v)), (seed)); \
+ \
+ /* Fill header with random data as well. */ \
+ if (_v (h) > 0) \
+ { \
+ _v (hdr) = vec_header (_v (v)); \
+ fill_with_random_data (_v (hdr), _v (h), (seed)); \
+ } \
+ \
+ __done__: \
+ _v (v); \
+ })
#define create_random_vec(elt_type, len, seed) \
create_random_vec_wh (elt_type, len, 0, seed)
@@ -258,7 +260,7 @@ validate_vec_free (elt_t * vec)
static elt_t *
validate_vec_free_h (elt_t * vec, uword hdr_bytes)
{
- vec_free_h (vec, hdr_bytes);
+ vec_free (vec);
ASSERT (vec == NULL);
return vec;
}
@@ -274,8 +276,8 @@ validate_vec_hdr (elt_t * vec, uword hdr_bytes)
return;
vh = _vec_find (vec);
- hdr = vec_header (vec, hdr_bytes);
- hdr_end = vec_header_end (hdr, hdr_bytes);
+ hdr = vec_header (vec);
+ hdr_end = vec_header_end (hdr);
ASSERT (hdr_end == (u8 *) vec);
ASSERT ((u8 *) vh - (u8 *) hdr >= hdr_bytes);
@@ -335,8 +337,7 @@ validate_vec (elt_t * vec, uword hdr_bytes)
else
{
if (hdr_bytes > 0)
- VERBOSE3 ("Header: %U\n",
- format_hex_bytes, vec_header (vec, sizeof (vec[0])),
+ VERBOSE3 ("Header: %U\n", format_hex_bytes, vec_header (vec),
sizeof (vec[0]));
VERBOSE3 ("%U\n\n",
@@ -371,7 +372,7 @@ validate_vec_resize_h (elt_t * vec, uword num_elts, uword hdr_bytes)
len1 = vec_len (vec);
if (vec)
- hdr = vec_header (vec, hdr_bytes);
+ hdr = vec_header (vec);
hash = compute_vec_hash (0, vec);
hdr_hash = compute_mem_hash (0, hdr, hdr_bytes);
@@ -391,7 +392,7 @@ validate_vec_resize_h (elt_t * vec, uword num_elts, uword hdr_bytes)
}
if (vec)
- hdr = vec_header (vec, hdr_bytes);
+ hdr = vec_header (vec);
ASSERT (compute_vec_hash (hash, vec) == 0);
ASSERT (compute_mem_hash (hdr_hash, hdr, hdr_bytes) == 0);
@@ -677,7 +678,7 @@ validate_vec_init_h (uword num_elts, uword hdr_bytes)
uword len;
elt_t *new;
- new = vec_new_ha (elt_t, num_elts, hdr_bytes, 0);
+ new = vec_new_generic (elt_t, num_elts, hdr_bytes, 0, 0);
len = vec_len (new);
ASSERT (len == num_elts);
@@ -687,7 +688,7 @@ validate_vec_init_h (uword num_elts, uword hdr_bytes)
{
if (i == 0)
{
- ptr = (u8 *) vec_header (new, hdr_bytes);
+ ptr = (u8 *) vec_header (new);
end = ptr + hdr_bytes;
}
else
@@ -799,7 +800,7 @@ run_validator_wh (uword iter)
{
case OP_IS_VEC_INIT_H:
num_elts = bounded_random_u32 (&g_seed, 0, MAX_CHANGE);
- vec_free_h (vec, sizeof (hdr_t));
+ vec_free (vec);
VERBOSE2 ("vec_init_h(), new elts %d\n", num_elts);
vec = validate_vec_init_h (num_elts, sizeof (hdr_t));
break;
@@ -840,7 +841,7 @@ run_validator_wh (uword iter)
}
validate_vec (vec, sizeof (hdr_t));
- vec_free_h (vec, sizeof (hdr_t));
+ vec_free (vec);
}
static void
diff --git a/src/vppinfra/time.c b/src/vppinfra/time.c
index 3377828bbc5..5a6aaf182e4 100644
--- a/src/vppinfra/time.c
+++ b/src/vppinfra/time.c
@@ -74,7 +74,6 @@ clock_frequency_from_proc_filesystem (void)
{
f64 cpu_freq = 1e9; /* better than 40... */
f64 ppc_timebase = 0; /* warnings be gone */
- int fd;
unformat_input_t input;
/* $$$$ aarch64 kernel doesn't report "cpu MHz" */
@@ -83,26 +82,24 @@ clock_frequency_from_proc_filesystem (void)
#endif
cpu_freq = 0;
- fd = open ("/proc/cpuinfo", 0);
- if (fd < 0)
- return cpu_freq;
-
- unformat_init_clib_file (&input, fd);
ppc_timebase = 0;
- while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ if (unformat_init_file (&input, "/proc/cpuinfo"))
{
- if (unformat (&input, "cpu MHz : %f", &cpu_freq))
- cpu_freq *= 1e6;
- else if (unformat (&input, "timebase : %f", &ppc_timebase))
- ;
- else
- unformat_skip_line (&input);
- }
-
- unformat_free (&input);
+ while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (&input, "cpu MHz : %f", &cpu_freq))
+ cpu_freq *= 1e6;
+ else if (unformat (&input, "timebase : %f", &ppc_timebase))
+ ;
+ else
+ unformat_skip_line (&input);
+ }
- close (fd);
+ unformat_free (&input);
+ }
+ else
+ return cpu_freq;
/* Override CPU frequency with time base for PPC. */
if (ppc_timebase != 0)
@@ -117,21 +114,19 @@ static f64
clock_frequency_from_sys_filesystem (void)
{
f64 cpu_freq = 0.0;
- int fd;
unformat_input_t input;
/* Time stamp always runs at max frequency. */
cpu_freq = 0;
- fd = open ("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", 0);
- if (fd < 0)
- goto done;
-
- unformat_init_clib_file (&input, fd);
- (void) unformat (&input, "%f", &cpu_freq);
- cpu_freq *= 1e3; /* measured in kHz */
- unformat_free (&input);
- close (fd);
-done:
+
+ if (unformat_init_file (
+ &input, "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq"))
+ {
+ if (unformat (&input, "%f", &cpu_freq))
+ cpu_freq *= 1e3; /* measured in kHz */
+ unformat_free (&input);
+ }
+
return cpu_freq;
}
diff --git a/src/vppinfra/time.h b/src/vppinfra/time.h
index 4d8997f0a9e..761dbed3fe8 100644
--- a/src/vppinfra/time.h
+++ b/src/vppinfra/time.h
@@ -192,6 +192,15 @@ clib_cpu_time_now (void)
return result;
}
+#elif defined(__riscv)
+
+always_inline u64
+clib_cpu_time_now (void)
+{
+ u64 result;
+ asm volatile("rdcycle %0\n" : "=r"(result));
+ return result;
+}
#else
#error "don't know how to read CPU time stamp"
diff --git a/src/vppinfra/timing_wheel.c b/src/vppinfra/timing_wheel.c
index 2c46d72a2fe..830888a19c1 100644
--- a/src/vppinfra/timing_wheel.c
+++ b/src/vppinfra/timing_wheel.c
@@ -185,7 +185,7 @@ free_elt_vector (timing_wheel_t * w, timing_wheel_elt_t * ev)
/* Poison free elements so we never use them by mistake. */
if (CLIB_DEBUG > 0)
clib_memset (ev, ~0, vec_len (ev) * sizeof (ev[0]));
- _vec_len (ev) = 0;
+ vec_set_len (ev, 0);
vec_add1 (w->free_elt_vectors, ev);
}
@@ -302,23 +302,19 @@ timing_wheel_insert (timing_wheel_t * w, u64 insert_cpu_time, u32 user_data)
/* Delete elts with given user data so that stale events don't expire. */
vec_foreach (l, w->levels)
{
- /* *INDENT-OFF* */
clib_bitmap_foreach (wi, l->occupancy_bitmap) {
l->elts[wi] = delete_user_data (l->elts[wi], user_data);
if (vec_len (l->elts[wi]) == 0)
l->occupancy_bitmap = clib_bitmap_andnoti (l->occupancy_bitmap, wi);
}
- /* *INDENT-ON* */
}
{
timing_wheel_overflow_elt_t *oe;
- /* *INDENT-OFF* */
pool_foreach (oe, w->overflow_pool) {
if (oe->user_data == user_data)
pool_put (w->overflow_pool, oe);
}
- /* *INDENT-ON* */
}
hash_unset (w->deleted_user_data_hash, user_data);
@@ -397,10 +393,8 @@ timing_wheel_next_expiring_elt_time (timing_wheel_t * w)
if (min_dt != ~0)
min_t = w->cpu_time_base + min_dt;
- /* *INDENT-OFF* */
pool_foreach (oe, w->overflow_pool)
{ min_t = clib_min (min_t, oe->cpu_time); }
- /* *INDENT-ON* */
done:
return min_t;
@@ -459,7 +453,7 @@ expire_bin (timing_wheel_t * w,
/* Adjust for deleted elts. */
if (j < e_len)
- _vec_len (expired_user_data) -= e_len - j;
+ vec_dec_len (expired_user_data, e_len - j);
free_elt_vector (w, e);
@@ -485,7 +479,6 @@ advance_cpu_time_base (timing_wheel_t * w, u32 * expired_user_data)
vec_foreach (l, w->levels)
{
uword wi;
- /* *INDENT-OFF* */
clib_bitmap_foreach (wi, l->occupancy_bitmap) {
vec_foreach (e, l->elts[wi])
{
@@ -496,13 +489,11 @@ advance_cpu_time_base (timing_wheel_t * w, u32 * expired_user_data)
e->cpu_time_relative_to_base -= delta;
}
}
- /* *INDENT-ON* */
}
/* See which overflow elements fit now. */
{
timing_wheel_overflow_elt_t *oe;
- /* *INDENT-OFF* */
pool_foreach (oe, w->overflow_pool) {
/* It fits now into 32 bits. */
if (0 == ((oe->cpu_time - w->cpu_time_base) >> BITS (e->cpu_time_relative_to_base)))
@@ -521,7 +512,6 @@ advance_cpu_time_base (timing_wheel_t * w, u32 * expired_user_data)
pool_put (w->overflow_pool, oe);
}
}
- /* *INDENT-ON* */
}
return expired_user_data;
}
@@ -613,7 +603,7 @@ timing_wheel_advance (timing_wheel_t * w, u64 advance_cpu_time,
if (PREDICT_FALSE (current_ti != advance_ti))
{
if (w->unexpired_elts_pending_insert)
- _vec_len (w->unexpired_elts_pending_insert) = 0;
+ vec_set_len (w->unexpired_elts_pending_insert, 0);
level_index = 0;
while (current_ti != advance_ti)
@@ -647,12 +637,10 @@ timing_wheel_advance (timing_wheel_t * w, u64 advance_cpu_time,
break;
level = vec_elt_at_index (w->levels, level_index);
- /* *INDENT-OFF* */
clib_bitmap_foreach (wi, level->occupancy_bitmap) {
expired_user_data = expire_bin (w, level_index, wi, advance_cpu_time,
expired_user_data);
}
- /* *INDENT-ON* */
}
if (PREDICT_TRUE (level_index < vec_len (w->levels)))
@@ -684,7 +672,7 @@ timing_wheel_advance (timing_wheel_t * w, u64 advance_cpu_time,
{
timing_wheel_elt_t *e;
vec_foreach (e, w->unexpired_elts_pending_insert) insert_elt (w, e);
- _vec_len (w->unexpired_elts_pending_insert) = 0;
+ vec_set_len (w->unexpired_elts_pending_insert, 0);
}
/* Don't advance until necessary. */
diff --git a/src/vppinfra/tw_timer_template.c b/src/vppinfra/tw_timer_template.c
index 97c70b223ce..6e8a58dbfaf 100644
--- a/src/vppinfra/tw_timer_template.c
+++ b/src/vppinfra/tw_timer_template.c
@@ -424,7 +424,7 @@ TW (tw_timer_wheel_init) (TWT (tw_timer_wheel) * tw,
tw->ticks_per_second = 1.0 / timer_interval_in_seconds;
vec_validate (tw->expired_timer_handles, 0);
- _vec_len (tw->expired_timer_handles) = 0;
+ vec_set_len (tw->expired_timer_handles, 0);
for (ring = 0; ring < TW_TIMER_WHEELS; ring++)
{
@@ -536,7 +536,7 @@ static inline
if (callback_vector_arg == 0)
{
- _vec_len (tw->expired_timer_handles) = 0;
+ vec_set_len (tw->expired_timer_handles, 0);
callback_vector = tw->expired_timer_handles;
}
else
diff --git a/src/vppinfra/types.h b/src/vppinfra/types.h
index c5e7f09ef23..ad85af35ac9 100644
--- a/src/vppinfra/types.h
+++ b/src/vppinfra/types.h
@@ -57,12 +57,8 @@ typedef unsigned char u8;
typedef unsigned short u16;
#endif /* ! CLIB_LINUX_KERNEL */
-#if defined (__x86_64__)
-#ifndef __COVERITY__
-typedef signed int i128 __attribute__ ((mode (TI)));
-typedef unsigned int u128 __attribute__ ((mode (TI)));
-#endif
-#endif
+typedef signed __int128 i128;
+typedef unsigned __int128 u128;
#if (defined(i386) || (defined(_mips) && __mips != 64) || defined(powerpc) || defined (__SPU__) || defined(__sparc__) || defined(__arm__) || defined (__xtensa__) || defined(__TMS320C6X__))
typedef signed int i32;
@@ -73,7 +69,9 @@ typedef unsigned int u32;
typedef unsigned long long u64;
#endif /* CLIB_AVOID_CLASH_WITH_LINUX_TYPES */
-#elif defined(alpha) || (defined(_mips) && __mips == 64) || defined(__x86_64__) || defined (__powerpc64__) || defined (__aarch64__)
+#elif defined(alpha) || (defined(_mips) && __mips == 64) || \
+ defined(__x86_64__) || defined(__powerpc64__) || defined(__aarch64__) || \
+ (defined(__riscv) && __riscv_xlen == 64)
typedef signed int i32;
typedef signed long i64;
@@ -123,6 +121,27 @@ typedef u64 clib_address_t;
typedef u32 clib_address_t;
#endif
+#define CLIB_I8_MAX __INT8_MAX__
+#define CLIB_I16_MAX __INT16_MAX__
+#define CLIB_I32_MAX __INT32_MAX__
+#define CLIB_I64_MAX __INT64_MAX__
+
+#define CLIB_U8_MAX __UINT8_MAX__
+#define CLIB_U16_MAX __UINT16_MAX__
+#define CLIB_U32_MAX __UINT32_MAX__
+#define CLIB_U64_MAX __UINT64_MAX__
+
+#define CLIB_F64_MAX __DBL_MAX__
+#define CLIB_F32_MAX __FLT_MAX__
+
+#if clib_address_bits == 64
+#define CLIB_WORD_MAX CLIB_I64_MAX
+#define CLIB_UWORD_MAX CLIB_U64_MAX
+#else
+#define CLIB_WORD_MAX CLIB_I32_MAX
+#define CLIB_UWORD_MAX CLIB_U32_MAX
+#endif
+
/* These are needed to convert between pointers and machine words.
MIPS is currently the only machine that can have different sized
pointers and machine words (but only when compiling with 64 bit
@@ -133,6 +152,14 @@ pointer_to_uword (const void *p)
return (uword) (clib_address_t) p;
}
+static inline __attribute__ ((always_inline)) uword
+pointer_is_aligned (void *p, uword align)
+{
+ if ((pointer_to_uword (p) & (align - 1)) == 0)
+ return 1;
+ return 0;
+}
+
#define uword_to_pointer(u,type) ((type) (clib_address_t) (u))
/* Any type: can be either word or pointer. */
@@ -163,6 +190,27 @@ typedef f64 fword;
__attribute__ ((aligned (align), packed)); \
} *) (addr))->_data)
+typedef u16 u16u __attribute__ ((aligned (1), __may_alias__));
+typedef u32 u32u __attribute__ ((aligned (1), __may_alias__));
+typedef u64 u64u __attribute__ ((aligned (1), __may_alias__));
+typedef i16 i16u __attribute__ ((aligned (1), __may_alias__));
+typedef i32 i32u __attribute__ ((aligned (1), __may_alias__));
+typedef i64 i64u __attribute__ ((aligned (1), __may_alias__));
+typedef word wordu __attribute__ ((aligned (1), __may_alias__));
+typedef uword uwordu __attribute__ ((aligned (1), __may_alias__));
+
+#define foreach_int(__var, ...) \
+ for (int __int_array[] = { __VA_ARGS__, 0 }, *__int_ptr = __int_array, \
+ __var = *__int_ptr; \
+ __int_ptr - (ARRAY_LEN (__int_array) - 1) < __int_array; \
+ __var = *++__int_ptr)
+
+#define foreach_pointer(__var, ...) \
+ for (void *__ptr_array[] = { __VA_ARGS__, 0 }, **__ptr_ptr = __ptr_array, \
+ *__var = *__ptr_ptr; \
+ __ptr_ptr - (ARRAY_LEN (__ptr_array) - 1) < __ptr_array; \
+ __var = *++__ptr_ptr)
+
#endif /* included_clib_types_h */
/*
diff --git a/src/vppinfra/unformat.c b/src/vppinfra/unformat.c
index 172182f8a34..522517888c3 100644
--- a/src/vppinfra/unformat.c
+++ b/src/vppinfra/unformat.c
@@ -36,6 +36,7 @@
*/
#include <vppinfra/format.h>
+#include <fcntl.h>
/* Call user's function to fill input buffer. */
__clib_export uword
@@ -70,22 +71,6 @@ _unformat_fill_input (unformat_input_t * i)
return i->index;
}
-always_inline uword
-is_white_space (uword c)
-{
- switch (c)
- {
- case ' ':
- case '\t':
- case '\n':
- case '\r':
- return 1;
-
- default:
- return 0;
- }
-}
-
/* Format function for dumping input stream. */
__clib_export u8 *
format_unformat_error (u8 * s, va_list * va)
@@ -968,7 +953,7 @@ parse_fail:
if (!input_matches_format)
input->index = input->buffer_marks[l - 1];
- _vec_len (input->buffer_marks) = l - 1;
+ vec_set_len (input->buffer_marks, l - 1);
}
return input_matches_format;
@@ -1003,7 +988,7 @@ unformat_user (unformat_input_t * input, unformat_function_t * func, ...)
if (!result && input->index != UNFORMAT_END_OF_INPUT)
input->index = input->buffer_marks[l];
- _vec_len (input->buffer_marks) = l;
+ vec_set_len (input->buffer_marks, l);
return result;
}
@@ -1026,7 +1011,8 @@ unformat_init_command_line (unformat_input_t * input, char *argv[])
}
__clib_export void
-unformat_init_string (unformat_input_t * input, char *string, int string_len)
+unformat_init_string (unformat_input_t *input, const char *string,
+ int string_len)
{
unformat_init (input, 0, 0);
if (string_len > 0)
@@ -1052,7 +1038,7 @@ clib_file_fill_buffer (unformat_input_t * input)
vec_resize (input->buffer, 4096);
n = read (fd, input->buffer + l, 4096);
if (n > 0)
- _vec_len (input->buffer) = l + n;
+ vec_set_len (input->buffer, l + n);
if (n <= 0)
return UNFORMAT_END_OF_INPUT;
@@ -1060,6 +1046,13 @@ clib_file_fill_buffer (unformat_input_t * input)
return input->index;
}
+static void
+unformat_close_fd (unformat_input_t *input)
+{
+ int fd = pointer_to_uword (input->fill_buffer_arg);
+ close (fd);
+}
+
__clib_export void
unformat_init_clib_file (unformat_input_t * input, int file_descriptor)
{
@@ -1067,6 +1060,31 @@ unformat_init_clib_file (unformat_input_t * input, int file_descriptor)
uword_to_pointer (file_descriptor, void *));
}
+__clib_export uword
+unformat_init_file (unformat_input_t *input, char *fmt, ...)
+{
+ va_list va;
+ u8 *path;
+ int fd;
+
+ va_start (va, fmt);
+ path = va_format (0, fmt, &va);
+ va_end (va);
+ vec_add1 (path, 0);
+
+ fd = open ((char *) path, 0);
+ vec_free (path);
+
+ if (fd >= 0)
+ {
+ unformat_init (input, clib_file_fill_buffer,
+ uword_to_pointer (fd, void *));
+ input->free = unformat_close_fd;
+ return 1;
+ }
+ return 0;
+}
+
/* Take input from Unix environment variable. */
uword
unformat_init_unix_env (unformat_input_t * input, char *var)
@@ -1101,8 +1119,97 @@ unformat_data_size (unformat_input_t * input, va_list * args)
return 1;
}
+__clib_export uword
+unformat_c_string_array (unformat_input_t *input, va_list *va)
+{
+ char *str = va_arg (*va, char *);
+ u32 array_len = va_arg (*va, u32);
+ uword c, rv = 0;
+ u8 *s = 0;
+
+ if (unformat (input, "%v", &s) == 0)
+ return 0;
+
+ c = vec_len (s);
+
+ if (c > 0 && c < array_len)
+ {
+ clib_memcpy (str, s, c);
+ str[c] = 0;
+ rv = 1;
+ }
+
+ vec_free (s);
+ return rv;
+}
+
+static uword
+__unformat_quoted_string (unformat_input_t *input, u8 **sp, char quote)
+{
+ u8 *s = 0;
+ uword c, p = 0;
+
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ if (!is_white_space (c))
+ break;
+
+ if (c != quote)
+ return 0;
+
+ while ((c = unformat_get_input (input)) != UNFORMAT_END_OF_INPUT)
+ {
+ if (c == quote && p != '\\')
+ {
+ *sp = s;
+ return 1;
+ }
+ vec_add1 (s, c);
+ p = c;
+ }
+ vec_free (s);
+
+ return 0;
+}
+
+__clib_export uword
+unformat_single_quoted_string (unformat_input_t *input, va_list *va)
+{
+ return __unformat_quoted_string (input, va_arg (*va, u8 **), '\'');
+}
+
+__clib_export uword
+unformat_double_quoted_string (unformat_input_t *input, va_list *va)
+{
+ return __unformat_quoted_string (input, va_arg (*va, u8 **), '"');
+}
+
#endif /* CLIB_UNIX */
+__clib_export uword
+unformat_u8 (unformat_input_t *input, va_list *args)
+{
+ u8 *d = va_arg (*args, u8 *);
+
+ u32 tmp;
+ if (!unformat (input, "%u", &tmp) || tmp > CLIB_U8_MAX)
+ return 0;
+
+ *d = tmp;
+ return 1;
+}
+
+__clib_export uword
+unformat_u16 (unformat_input_t *input, va_list *args)
+{
+ u16 *d = va_arg (*args, u16 *);
+
+ u32 tmp;
+ if (!unformat (input, "%u", &tmp) || tmp > CLIB_U16_MAX)
+ return 0;
+
+ *d = tmp;
+ return 1;
+}
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vppinfra/unix-formats.c b/src/vppinfra/unix-formats.c
index fd112675fa7..1a101e04aee 100644
--- a/src/vppinfra/unix-formats.c
+++ b/src/vppinfra/unix-formats.c
@@ -67,7 +67,9 @@
#include <linux/types.h>
#include <linux/netlink.h>
#endif
-#endif
+#elif __FreeBSD__
+#include <netlink/netlink.h>
+#endif /* __linux__ */
#endif /* ! __KERNEL__ */
@@ -91,7 +93,6 @@
# include <netinet/if_ether.h>
#endif /* __KERNEL__ */
-#include <vppinfra/bitops.h> /* foreach_set_bit */
#include <vppinfra/format.h>
#include <vppinfra/error.h>
@@ -410,7 +411,9 @@ u8 * format_signal (u8 * s, va_list * args)
_ (SIGPROF);
_ (SIGWINCH);
_ (SIGIO);
+#ifdef __linux__
_ (SIGPWR);
+#endif /* __linux */
#ifdef SIGSYS
_ (SIGSYS);
#endif
@@ -431,12 +434,15 @@ u8 * format_ucontext_pc (u8 * s, va_list * args)
uc = va_arg (*args, ucontext_t *);
+#ifdef __linux__
#if defined (powerpc)
regs = &uc->uc_mcontext.uc_regs->gregs[0];
#elif defined (powerpc64)
regs = &uc->uc_mcontext.uc_regs->gp_regs[0];
#elif defined (i386) || defined (__x86_64__)
regs = (void *) &uc->uc_mcontext.gregs[0];
+#elif defined(__aarch64__)
+ regs = (void *) &uc->uc_mcontext.pc;
#endif
#if defined (powerpc) || defined (powerpc64)
@@ -445,10 +451,19 @@ u8 * format_ucontext_pc (u8 * s, va_list * args)
reg_no = REG_EIP;
#elif defined (__x86_64__)
reg_no = REG_RIP;
+#elif defined(__aarch64__)
+ reg_no = 0;
#else
reg_no = 0;
regs = 0;
#endif
+#elif __FreeBSD__
+#if defined(__amd64__)
+ reg_no = 0;
+ regs = (void *) &uc->uc_mcontext.mc_rip;
+#else
+#endif /* __amd64__ */
+#endif /* __linux__ */
if (! regs)
return format (s, "unsupported");
diff --git a/src/vppinfra/unix-misc.c b/src/vppinfra/unix-misc.c
index 5559a2392fe..29cbe0a557d 100644
--- a/src/vppinfra/unix-misc.c
+++ b/src/vppinfra/unix-misc.c
@@ -37,13 +37,22 @@
#include <vppinfra/error.h>
#include <vppinfra/os.h>
+#include <vppinfra/bitmap.h>
#include <vppinfra/unix.h>
+#include <vppinfra/format.h>
+#ifdef __linux__
+#include <vppinfra/linux/sysfs.h>
+#else
+#include <sys/sysctl.h>
+#endif
#include <sys/stat.h>
#include <sys/types.h>
+#include <sys/syscall.h>
#include <sys/uio.h> /* writev */
#include <fcntl.h>
#include <stdio.h> /* for sprintf */
+#include <limits.h>
__clib_export __thread uword __os_thread_index = 0;
__clib_export __thread uword __os_numa_index = 0;
@@ -131,6 +140,37 @@ clib_file_contents (char *file, u8 ** result)
return error;
}
+__clib_export u8 *
+clib_file_get_resolved_basename (char *fmt, ...)
+{
+ va_list va;
+ char *p, buffer[PATH_MAX];
+ u8 *link, *s = 0;
+ int r;
+
+ va_start (va, fmt);
+ link = va_format (0, fmt, &va);
+ va_end (va);
+ vec_add1 (link, 0);
+
+ r = readlink ((char *) link, buffer, sizeof (buffer) - 1);
+ vec_free (link);
+
+ if (r < 1)
+ return 0;
+
+ buffer[r] = 0;
+ p = buffer + r - 1;
+ while (p > buffer && p[-1] != '/')
+ p--;
+
+ while (p[0])
+ vec_add1 (s, p++[0]);
+
+ vec_add1 (s, 0);
+ return s;
+}
+
clib_error_t *
unix_proc_file_contents (char *file, u8 ** result)
{
@@ -158,7 +198,7 @@ unix_proc_file_contents (char *file, u8 ** result)
if (bytes == 0)
{
- _vec_len (rv) = pos;
+ vec_set_len (rv, pos);
break;
}
pos += bytes;
@@ -227,6 +267,121 @@ os_get_nthreads (void)
return 1;
}
+__clib_export clib_bitmap_t *
+os_get_online_cpu_core_bitmap ()
+{
+#if __linux__
+ return clib_sysfs_read_bitmap ("/sys/devices/system/cpu/online");
+#else
+ return 0;
+#endif
+}
+
+__clib_export clib_bitmap_t *
+os_get_cpu_affinity_bitmap (int pid)
+{
+#if __linux
+ int index, ret;
+ cpu_set_t cpuset;
+ uword *affinity_cpus;
+
+ clib_bitmap_alloc (affinity_cpus, sizeof (cpu_set_t));
+ clib_bitmap_zero (affinity_cpus);
+
+ __CPU_ZERO_S (sizeof (cpu_set_t), &cpuset);
+
+ ret = syscall (SYS_sched_getaffinity, 0, sizeof (cpu_set_t), &cpuset);
+
+ if (ret < 0)
+ {
+ clib_bitmap_free (affinity_cpus);
+ return 0;
+ }
+
+ for (index = 0; index < sizeof (cpu_set_t); index++)
+ if (__CPU_ISSET_S (index, sizeof (cpu_set_t), &cpuset))
+ clib_bitmap_set (affinity_cpus, index, 1);
+ return affinity_cpus;
+#else
+ return 0;
+#endif
+}
+
+__clib_export clib_bitmap_t *
+os_get_online_cpu_node_bitmap ()
+{
+#if __linux__
+ return clib_sysfs_read_bitmap ("/sys/devices/system/node/online");
+#else
+ return 0;
+#endif
+}
+__clib_export clib_bitmap_t *
+os_get_cpu_on_node_bitmap (int node)
+{
+#if __linux__
+ return clib_sysfs_read_bitmap ("/sys/devices/system/node/node%u/cpulist",
+ node);
+#else
+ return 0;
+#endif
+}
+
+__clib_export clib_bitmap_t *
+os_get_cpu_with_memory_bitmap ()
+{
+#if __linux__
+ return clib_sysfs_read_bitmap ("/sys/devices/system/node/has_memory");
+#else
+ return 0;
+#endif
+}
+
+__clib_export int
+os_get_cpu_phys_core_id (int cpu_id)
+{
+#if __linux
+ int core_id = -1;
+ clib_error_t *err;
+ u8 *p;
+
+ p =
+ format (0, "/sys/devices/system/cpu/cpu%u/topology/core_id%c", cpu_id, 0);
+ err = clib_sysfs_read ((char *) p, "%d", &core_id);
+ vec_free (p);
+ if (err)
+ {
+ clib_error_free (err);
+ return -1;
+ }
+ return core_id;
+#else
+ return -1;
+#endif
+}
+
+__clib_export u8 *
+os_get_exec_path ()
+{
+ u8 *rv = 0;
+#ifdef __linux__
+ char tmp[PATH_MAX];
+ ssize_t sz = readlink ("/proc/self/exe", tmp, sizeof (tmp));
+
+ if (sz <= 0)
+ return 0;
+#else
+ char tmp[MAXPATHLEN];
+ int mib[4] = { CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 };
+ size_t sz = MAXPATHLEN;
+
+ if (sysctl (mib, 4, tmp, &sz, NULL, 0) == -1)
+ return 0;
+#endif
+ vec_add (rv, tmp, sz);
+ return rv;
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vppinfra/unix.h b/src/vppinfra/unix.h
index 5b82c23a3c0..abda21879f9 100644
--- a/src/vppinfra/unix.h
+++ b/src/vppinfra/unix.h
@@ -53,6 +53,28 @@ clib_error_t *clib_file_contents (char *file, u8 ** result);
/* As above but for /proc file system on Linux. */
clib_error_t *unix_proc_file_contents (char *file, u8 ** result);
+/* Retrieve bitmap of online cpu cures */
+clib_bitmap_t *os_get_online_cpu_core_bitmap ();
+
+/* Retrieve bitmap of cpus vpp has affinity to */
+clib_bitmap_t *os_get_cpu_affinity_bitmap (int pid);
+
+/* Retrieve bitmap of online cpu nodes (sockets) */
+clib_bitmap_t *os_get_online_cpu_node_bitmap ();
+
+/* Retrieve bitmap of cpus with memory */
+clib_bitmap_t *os_get_cpu_with_memory_bitmap ();
+
+/* Retrieve bitmap of cpus on specific node */
+clib_bitmap_t *os_get_cpu_on_node_bitmap (int node);
+
+/* Retrieve physical core id of specific cpu, -1 if not available */
+int os_get_cpu_phys_core_id (int cpu);
+
+/* Retrieve the path of the current executable as a vector (not
+ * null-terminated). */
+u8 *os_get_exec_path ();
+
#endif /* included_clib_unix_h */
/*
diff --git a/src/vppinfra/vec.c b/src/vppinfra/vec.c
index 970f7f7bfa8..dbaadad2dd5 100644
--- a/src/vppinfra/vec.c
+++ b/src/vppinfra/vec.c
@@ -1,39 +1,6 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2022 Cisco Systems, Inc.
*/
-/*
- Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
#include <vppinfra/vec.h>
#include <vppinfra/mem.h>
@@ -42,106 +9,125 @@
#define CLIB_VECTOR_GROW_BY_ONE 0
#endif
-/* Vector resize operator. Called as needed by various macros such as
- vec_add1() when we need to allocate memory. */
-__clib_export void *
-vec_resize_allocate_memory (void *v,
- word length_increment,
- uword data_bytes,
- uword header_bytes, uword data_align,
- uword numa_id)
+__clib_export uword
+vec_mem_size (void *v)
{
- vec_header_t *vh = _vec_find (v);
- uword old_alloc_bytes, new_alloc_bytes;
- void *old, *new;
- void *oldheap;
-
- header_bytes = vec_header_bytes (header_bytes);
-
- data_bytes += header_bytes;
-
- if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED))
- {
- oldheap = clib_mem_get_per_cpu_heap ();
- clib_mem_set_per_cpu_heap (clib_mem_get_per_numa_heap (numa_id));
- }
+ return v ? clib_mem_size (v - vec_get_header_size (v)) : 0;
+}
- if (!v)
+__clib_export void *
+_vec_alloc_internal (uword n_elts, const vec_attr_t *const attr)
+{
+ uword req_size, alloc_size, data_offset, align;
+ uword elt_sz = attr->elt_sz;
+ void *p, *v, *heap = attr->heap;
+
+ /* alignment must be power of 2 */
+ align = clib_max (attr->align, VEC_MIN_ALIGN);
+ ASSERT (count_set_bits (align) == 1);
+
+ /* calc offset where vector data starts */
+ data_offset = attr->hdr_sz + sizeof (vec_header_t);
+ data_offset += heap ? sizeof (void *) : 0;
+ data_offset = round_pow2 (data_offset, align);
+
+ req_size = data_offset + n_elts * elt_sz;
+ p = clib_mem_heap_alloc_aligned (heap, req_size, align);
+
+ /* zero out whole alocation */
+ alloc_size = clib_mem_size (p);
+ clib_mem_unpoison (p, alloc_size);
+ clib_memset_u8 (p, 0, alloc_size);
+
+ /* fill vector header */
+ v = p + data_offset;
+ _vec_find (v)->len = n_elts;
+ _vec_find (v)->hdr_size = data_offset / VEC_MIN_ALIGN;
+ _vec_find (v)->log2_align = min_log2 (align);
+ if (heap)
{
- new = clib_mem_alloc_aligned_at_offset (data_bytes, data_align, header_bytes, 1 /* yes, call os_out_of_memory */
- );
- new_alloc_bytes = clib_mem_size (new);
- CLIB_MEM_UNPOISON (new + data_bytes, new_alloc_bytes - data_bytes);
- clib_memset (new, 0, new_alloc_bytes);
- CLIB_MEM_POISON (new + data_bytes, new_alloc_bytes - data_bytes);
- v = new + header_bytes;
- _vec_len (v) = length_increment;
- _vec_numa (v) = numa_id;
- if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED))
- clib_mem_set_per_cpu_heap (oldheap);
- return v;
+ _vec_find (v)->default_heap = 0;
+ _vec_heap (v) = heap;
}
+ else
+ _vec_find (v)->default_heap = 1;
- vh->len += length_increment;
- old = v - header_bytes;
-
- /* Vector header must start heap object. */
- ASSERT (clib_mem_is_heap_object (old));
-
- old_alloc_bytes = clib_mem_size (old);
+ /* poison extra space given by allocator */
+ clib_mem_poison (p + req_size, alloc_size - req_size);
+ _vec_set_grow_elts (v, (alloc_size - req_size) / elt_sz);
+ return v;
+}
- /* Need to resize? */
- if (data_bytes <= old_alloc_bytes)
- {
- CLIB_MEM_UNPOISON (v, data_bytes);
- if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED))
- clib_mem_set_per_cpu_heap (oldheap);
- return v;
- }
+static inline void
+_vec_update_len (void *v, uword n_elts, uword elt_sz, uword n_data_bytes,
+ uword unused_bytes)
+{
+ _vec_find (v)->len = n_elts;
+ _vec_set_grow_elts (v, unused_bytes / elt_sz);
+ clib_mem_unpoison (v, n_data_bytes);
+ clib_mem_poison (v + n_data_bytes, unused_bytes);
+}
-#if CLIB_VECTOR_GROW_BY_ONE > 0
- new_alloc_bytes = data_bytes;
-#else
- new_alloc_bytes = (old_alloc_bytes * 3) / 2;
- if (new_alloc_bytes < data_bytes)
- new_alloc_bytes = data_bytes;
-#endif
+__clib_export void *
+_vec_realloc_internal (void *v, uword n_elts, const vec_attr_t *const attr)
+{
+ uword old_alloc_sz, new_alloc_sz, new_data_size, n_data_bytes, data_offset;
+ uword elt_sz;
- new =
- clib_mem_alloc_aligned_at_offset (new_alloc_bytes, data_align,
- header_bytes,
- 1 /* yes, call os_out_of_memory */ );
+ if (PREDICT_FALSE (v == 0))
+ return _vec_alloc_internal (n_elts, attr);
- /* FIXME fail gracefully. */
- if (!new)
- clib_panic
- ("vec_resize fails, length increment %d, data bytes %d, alignment %d",
- length_increment, data_bytes, data_align);
+ elt_sz = attr->elt_sz;
+ n_data_bytes = n_elts * elt_sz;
+ data_offset = vec_get_header_size (v);
+ new_data_size = data_offset + n_data_bytes;
+ new_alloc_sz = old_alloc_sz = clib_mem_size (vec_header (v));
- CLIB_MEM_UNPOISON (old, old_alloc_bytes);
- clib_memcpy_fast (new, old, old_alloc_bytes);
- clib_mem_free (old);
+ /* realloc if new size cannot fit into existing allocation */
+ if (old_alloc_sz < new_data_size)
+ {
+ uword n_bytes, req_size = new_data_size;
+ void *p = v - data_offset;
- /* Allocator may give a bit of extra room. */
- new_alloc_bytes = clib_mem_size (new);
- v = new;
+ req_size += CLIB_VECTOR_GROW_BY_ONE ? 0 : n_data_bytes / 2;
- /* Zero new memory. */
- CLIB_MEM_UNPOISON (new + data_bytes, new_alloc_bytes - data_bytes);
- memset (v + old_alloc_bytes, 0, new_alloc_bytes - old_alloc_bytes);
- CLIB_MEM_POISON (new + data_bytes, new_alloc_bytes - data_bytes);
+ p = clib_mem_heap_realloc_aligned (vec_get_heap (v), p, req_size,
+ vec_get_align (v));
+ new_alloc_sz = clib_mem_size (p);
+ v = p + data_offset;
- _vec_numa ((v + header_bytes)) = numa_id;
- if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED))
- clib_mem_set_per_cpu_heap (oldheap);
+ /* zero out new allocation */
+ n_bytes = new_alloc_sz - old_alloc_sz;
+ clib_mem_unpoison (p + old_alloc_sz, n_bytes);
+ clib_memset_u8 (p + old_alloc_sz, 0, n_bytes);
+ }
- return v + header_bytes;
+ _vec_update_len (v, n_elts, elt_sz, n_data_bytes,
+ new_alloc_sz - new_data_size);
+ return v;
}
-__clib_export uword
-clib_mem_is_vec_h (void *v, uword header_bytes)
+__clib_export void *
+_vec_resize_internal (void *v, uword n_elts, const vec_attr_t *const attr)
{
- return clib_mem_is_heap_object (vec_header (v, header_bytes));
+ uword elt_sz = attr->elt_sz;
+ if (PREDICT_TRUE (v != 0))
+ {
+ uword hs = _vec_find (v)->hdr_size * VEC_MIN_ALIGN;
+ uword alloc_sz = clib_mem_size (v - hs);
+ uword n_data_bytes = elt_sz * n_elts;
+ word unused_bytes = alloc_sz - (n_data_bytes + hs);
+
+ if (PREDICT_TRUE (unused_bytes >= 0))
+ {
+ _vec_update_len (v, n_elts, elt_sz, n_data_bytes, unused_bytes);
+ return v;
+ }
+ }
+
+ /* this shouled emit tail jump and likely avoid stack usasge inside this
+ * function */
+ return _vec_realloc_internal (v, n_elts, attr);
}
__clib_export u32
@@ -155,62 +141,3 @@ vec_free_not_inline (void *v)
{
vec_free (v);
}
-
-/** \cond */
-
-#ifdef TEST
-
-#include <stdio.h>
-
-void
-main (int argc, char *argv[])
-{
- word n = atoi (argv[1]);
- word i, *x = 0;
-
- typedef struct
- {
- word x, y, z;
- } FOO;
-
- FOO *foos = vec_init (FOO, 10), *f;
-
- vec_validate (foos, 100);
- foos[100].x = 99;
-
- _vec_len (foos) = 0;
- for (i = 0; i < n; i++)
- {
- vec_add1 (x, i);
- vec_add2 (foos, f, 1);
- f->x = 2 * i;
- f->y = 3 * i;
- f->z = 4 * i;
- }
-
- {
- word n = 2;
- word m = 42;
- vec_delete (foos, n, m);
- }
-
- {
- word n = 2;
- word m = 42;
- vec_insert (foos, n, m);
- }
-
- vec_free (x);
- vec_free (foos);
- exit (0);
-}
-#endif
-/** \endcond */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/vec.h b/src/vppinfra/vec.h
index d19ff998137..1a64a69a1e6 100644
--- a/src/vppinfra/vec.h
+++ b/src/vppinfra/vec.h
@@ -52,11 +52,13 @@
The memory layout looks like this:
~~~~~~~~
- user header (aligned to uword boundary)
- vector length: number of elements
+ user header (start of memory allocation)
+ padding
+ heap pointer (optional, only if default_heap == 0)
+ vector header: number of elements, header size
user's pointer-> vector element #0
- vector element #1
- ...
+ vector element #1
+ ...
~~~~~~~~
The user pointer contains the address of vector element # 0. Null
@@ -70,8 +72,9 @@
Typically, the header is not present. Headers allow for other
data structures to be built atop CLIB vectors.
- Users may specify the alignment for first data element of a vector
- via the vec_*_aligned macros.
+ While users may specify the alignment for first data element of a vector
+ via the vec_*_aligned macros that is typically not needed as alignment
+ is set based on native alignment of the data structure used.
Vector elements can be any C type e.g. (int, double, struct bar).
This is also true for data types built atop vectors (e.g. heap,
@@ -89,123 +92,130 @@
which are invariant.
*/
-/** \brief Low-level resize allocation function, usually not called directly
+/** \brief Low-level (re)allocation function, usually not called directly
@param v pointer to a vector
- @param length_increment length increment in elements
- @param data_bytes requested size in bytes
- @param header_bytes header size in bytes (may be zero)
- @param data_align alignment (may be zero)
- @param numa_id numa id (may be zero)
+ @param n_elts requested number of elements
+ @param elt_sz requested size of one element
+ @param hdr_sz header size in bytes (may be zero)
+ @param align alignment (may be zero)
@return v_prime pointer to resized vector, may or may not equal v
*/
-void *vec_resize_allocate_memory (void *v,
- word length_increment,
- uword data_bytes,
- uword header_bytes, uword data_align,
- uword numa_id);
-/** \brief Low-level vector resize function, usually not called directly
-
- @param v pointer to a vector
- @param length_increment length increment in elements
- @param data_bytes requested size in bytes
- @param header_bytes header size in bytes (may be zero)
- @param data_align alignment (may be zero)
- @param numa_id (may be ~0)
- @return v_prime pointer to resized vector, may or may not equal v
-*/
+typedef struct
+{
+ void *heap;
+ u32 elt_sz;
+ u16 hdr_sz;
+ u16 align;
+} vec_attr_t;
+
+void *_vec_alloc_internal (uword n_elts, const vec_attr_t *const attr);
+void *_vec_realloc_internal (void *v, uword n_elts,
+ const vec_attr_t *const attr);
+void *_vec_resize_internal (void *v, uword n_elts,
+ const vec_attr_t *const attr);
+
+/* calculate minimum alignment out of data natural alignment and provided
+ * value, should not be < VEC_MIN_ALIGN */
+static_always_inline uword
+__vec_align (uword data_align, uword configuered_align)
+{
+ data_align = clib_max (data_align, configuered_align);
+ ASSERT (count_set_bits (data_align) == 1);
+ return clib_max (VEC_MIN_ALIGN, data_align);
+}
-#define _vec_resize_numa(V,L,DB,HB,A,S) \
-({ \
- __typeof__ ((V)) _V; \
- _V = _vec_resize_inline((void *)V,L,DB,HB,clib_max((__alignof__((V)[0])),(A)),(S)); \
- _V; \
-})
+/* function used t o catch cases where vec_* macros on used on void * */
+static_always_inline uword
+__vec_elt_sz (uword elt_sz, int is_void)
+{
+ /* vector macro operations on void * are not allowed */
+ ASSERT (is_void == 0);
+ return elt_sz;
+}
-#define _vec_resize(V,L,DB,HB,A) \
- _vec_resize_numa(V,L,DB,HB,A,VEC_NUMA_UNSPECIFIED)
+static_always_inline void
+_vec_update_pointer (void **vp, void *v)
+{
+ /* avoid store if not needed */
+ if (v != vp[0])
+ vp[0] = v;
+}
-always_inline void *
-_vec_resize_inline (void *v,
- word length_increment,
- uword data_bytes, uword header_bytes, uword data_align,
- uword numa_id)
+static_always_inline void *
+vec_get_heap (void *v)
{
- vec_header_t *vh = _vec_find (v);
- uword new_data_bytes, aligned_header_bytes;
- void *oldheap;
+ if (v == 0 || _vec_find (v)->default_heap == 1)
+ return 0;
+ return _vec_heap (v);
+}
- aligned_header_bytes = vec_header_bytes (header_bytes);
+static_always_inline uword
+vec_get_align (void *v)
+{
+ return 1ULL << _vec_find (v)->log2_align;
+}
- new_data_bytes = data_bytes + aligned_header_bytes;
+static_always_inline void
+_vec_prealloc (void **vp, uword n_elts, uword hdr_sz, uword align, void *heap,
+ uword elt_sz)
+{
+ const vec_attr_t va = {
+ .elt_sz = elt_sz, .hdr_sz = hdr_sz, .align = align, .heap = heap
+ };
+ void *v;
- if (PREDICT_TRUE (v != 0))
- {
- void *p = v - aligned_header_bytes;
-
- if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED))
- {
- oldheap = clib_mem_get_per_cpu_heap ();
- clib_mem_set_per_cpu_heap (clib_mem_get_per_numa_heap (numa_id));
- }
-
- /* Vector header must start heap object. */
- ASSERT (clib_mem_is_heap_object (p));
-
- /* Typically we'll not need to resize. */
- if (new_data_bytes <= clib_mem_size (p))
- {
- CLIB_MEM_UNPOISON (v, data_bytes);
- vh->len += length_increment;
- if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED))
- clib_mem_set_per_cpu_heap (oldheap);
- return v;
- }
- if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED))
- clib_mem_set_per_cpu_heap (oldheap);
- }
+ ASSERT (vp[0] == 0);
- /* Slow path: call helper function. */
- return vec_resize_allocate_memory (v, length_increment, data_bytes,
- header_bytes,
- clib_max (sizeof (vec_header_t),
- data_align), numa_id);
+ v = _vec_alloc_internal (n_elts, &va);
+ _vec_set_len (v, 0, elt_sz);
+ _vec_update_pointer (vp, v);
}
-/** \brief Determine if vector will resize with next allocation
+/** \brief Pre-allocate a vector (generic version)
- @param v pointer to a vector
- @param length_increment length increment in elements
- @param data_bytes requested size in bytes
- @param header_bytes header size in bytes (may be zero)
- @param data_align alignment (may be zero)
- @return 1 if vector will resize 0 otherwise
+ @param V pointer to a vector
+ @param N number of elements to pre-allocate
+ @param H header size in bytes (may be zero)
+ @param A alignment (zero means default alignment of the data structure)
+ @param P heap (zero means default heap)
+ @return V (value-result macro parameter)
*/
-always_inline int
-_vec_resize_will_expand (void *v,
- word length_increment,
- uword data_bytes, uword header_bytes,
- uword data_align)
-{
- uword new_data_bytes, aligned_header_bytes;
+#define vec_prealloc_hap(V, N, H, A, P) \
+ _vec_prealloc ((void **) &(V), N, H, _vec_align (V, A), P, _vec_elt_sz (V))
- aligned_header_bytes = vec_header_bytes (header_bytes);
+/** \brief Pre-allocate a vector (simple version)
+
+ @param V pointer to a vector
+ @param N number of elements to pre-allocate
+ @return V (value-result macro parameter)
+*/
+#define vec_prealloc(V, N) vec_prealloc_hap (V, N, 0, 0, 0)
- new_data_bytes = data_bytes + aligned_header_bytes;
+/** \brief Pre-allocate a vector (heap version)
- if (PREDICT_TRUE (v != 0))
- {
- void *p = v - aligned_header_bytes;
+ @param V pointer to a vector
+ @param N number of elements to pre-allocate
+ @param P heap (zero means default heap)
+ @return V (value-result macro parameter)
+*/
+#define vec_prealloc_heap(V, N, P) vec_prealloc_hap (V, N, 0, 0, P)
- /* Vector header must start heap object. */
- ASSERT (clib_mem_is_heap_object (p));
+always_inline int
+_vec_resize_will_expand (void *v, uword n_elts, uword elt_sz)
+{
+ if (v == 0)
+ return 1;
+
+ /* Vector header must start heap object. */
+ ASSERT (clib_mem_heap_is_heap_object (vec_get_heap (v), vec_header (v)));
+
+ n_elts += _vec_len (v);
+ if ((n_elts * elt_sz) <= vec_max_bytes (v))
+ return 0;
- /* Typically we'll not need to resize. */
- if (new_data_bytes <= clib_mem_size (p))
- return 0;
- }
return 1;
}
@@ -217,34 +227,7 @@ _vec_resize_will_expand (void *v,
*/
#define vec_resize_will_expand(V, N) \
- ({ \
- word _v (n) = (N); \
- word _v (l) = vec_len (V); \
- _vec_resize_will_expand ((V), _v (n), \
- (_v (l) + _v (n)) * sizeof ((V)[0]), 0, 0); \
- })
-
-/** \brief Predicate function, says whether the supplied vector is a clib heap
- object (general version).
-
- @param v pointer to a vector
- @param header_bytes vector header size in bytes (may be zero)
- @return 0 or 1
-*/
-uword clib_mem_is_vec_h (void *v, uword header_bytes);
-
-
-/** \brief Predicate function, says whether the supplied vector is a clib heap
- object
-
- @param v pointer to a vector
- @return 0 or 1
-*/
-always_inline uword
-clib_mem_is_vec (void *v)
-{
- return clib_mem_is_vec_h (v, 0);
-}
+ _vec_resize_will_expand (V, N, _vec_elt_sz (V))
/* Local variable naming macro (prevents collisions with other macro naming). */
#define _v(var) _vec_##var
@@ -258,31 +241,36 @@ clib_mem_is_vec (void *v)
@param N number of elements to add
@param H header size in bytes (may be zero)
@param A alignment (may be zero)
- @param S numa_id (may be zero)
@return V (value-result macro parameter)
*/
-#define vec_resize_has(V,N,H,A,S) \
-do { \
- word _v(n) = (N); \
- word _v(l) = vec_len (V); \
- V = _vec_resize_numa ((V), _v(n), \
- (_v(l) + _v(n)) * sizeof ((V)[0]), \
- (H), (A),(S)); \
-} while (0)
+static_always_inline void
+_vec_resize (void **vp, uword n_add, uword hdr_sz, uword align, uword elt_sz)
+{
+ void *v = *vp;
+ if (PREDICT_FALSE (v == 0))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = hdr_sz };
+ *vp = _vec_alloc_internal (n_add, &va);
+ return;
+ }
-/** \brief Resize a vector (less general version).
- Add N elements to end of given vector V, return pointer to start of vector.
- Vector will have room for H header bytes and will have user's data aligned
- at alignment A (rounded to next power of 2).
+ if (PREDICT_FALSE (_vec_find (v)->grow_elts < n_add))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = hdr_sz };
+ v = _vec_resize_internal (v, _vec_len (v) + n_add, &va);
+ _vec_update_pointer (vp, v);
+ }
+ else
+ _vec_set_len (v, _vec_len (v) + n_add, elt_sz);
+}
- @param V pointer to a vector
- @param N number of elements to add
- @param H header size in bytes (may be zero)
- @param A alignment (may be zero)
- @return V (value-result macro parameter)
-*/
-#define vec_resize_ha(V,N,H,A) vec_resize_has(V,N,H,A,VEC_NUMA_UNSPECIFIED)
+#define vec_resize_ha(V, N, H, A) \
+ _vec_resize ((void **) &(V), N, H, _vec_align (V, A), _vec_elt_sz (V))
/** \brief Resize a vector (no header, unspecified alignment)
Add N elements to end of given vector V, return pointer to start of vector.
@@ -317,12 +305,14 @@ do { \
@return V (value-result macro parameter)
*/
-#define vec_alloc_ha(V,N,H,A) \
-do { \
- uword _v(l) = vec_len (V); \
- vec_resize_ha (V, N, H, A); \
- _vec_len (V) = _v(l); \
-} while (0)
+#define vec_alloc_ha(V, N, H, A) \
+ do \
+ { \
+ uword _v (l) = vec_len (V); \
+ vec_resize_ha (V, N, H, A); \
+ vec_set_len (V, _v (l)); \
+ } \
+ while (0)
/** \brief Allocate space for N more elements
(no header, unspecified alignment)
@@ -347,13 +337,14 @@ do { \
@param N number of elements to add
@param H header size in bytes (may be zero)
@param A alignment (may be zero)
+ @param P heap (may be zero)
@return V new vector
*/
-#define vec_new_ha(T,N,H,A) \
-({ \
- word _v(n) = (N); \
- (T *)_vec_resize ((T *) 0, _v(n), _v(n) * sizeof (T), (H), (A)); \
-})
+#define vec_new_generic(T, N, H, A, P) \
+ _vec_alloc_internal (N, &((vec_attr_t){ .align = _vec_align ((T *) 0, A), \
+ .hdr_sz = (H), \
+ .heap = (P), \
+ .elt_sz = sizeof (T) }))
/** \brief Create new vector of given type and length
(unspecified alignment, no header).
@@ -362,7 +353,7 @@ do { \
@param N number of elements to add
@return V new vector
*/
-#define vec_new(T,N) vec_new_ha(T,N,0,0)
+#define vec_new(T, N) vec_new_generic (T, N, 0, 0, 0)
/** \brief Create new vector of given type and length
(alignment specified, no header).
@@ -371,28 +362,32 @@ do { \
@param A alignment (may be zero)
@return V new vector
*/
-#define vec_new_aligned(T,N,A) vec_new_ha(T,N,0,A)
-
-/** \brief Free vector's memory (general version)
+#define vec_new_aligned(T, N, A) vec_new_generic (T, N, 0, A, 0)
+/** \brief Create new vector of given type and length
+ (heap specified, no header).
- @param V pointer to a vector
- @param H size of header in bytes
- @return V (value-result parameter, V=0)
+ @param T type of elements in new vector
+ @param N number of elements to add
+ @param P heap (may be zero)
+ @return V new vector
*/
-#define vec_free_h(V,H) \
-do { \
- if (V) \
- { \
- clib_mem_free (vec_header ((V), (H))); \
- V = 0; \
- } \
-} while (0)
+#define vec_new_heap(T, N, P) vec_new_generic (T, N, 0, 0, P)
/** \brief Free vector's memory (no header).
@param V pointer to a vector
@return V (value-result parameter, V=0)
*/
-#define vec_free(V) vec_free_h(V,0)
+
+static_always_inline void
+_vec_free (void **vp)
+{
+ if (vp[0] == 0)
+ return;
+ clib_mem_heap_free (vec_get_heap (vp[0]), vec_header (vp[0]));
+ vp[0] = 0;
+}
+
+#define vec_free(V) _vec_free ((void **) &(V))
void vec_free_not_inline (void *v);
@@ -407,34 +402,27 @@ void vec_free_not_inline (void *v);
@param V pointer to a vector
@param H size of header in bytes
@param A alignment (may be zero)
- @param S numa (may be VEC_NUMA_UNSPECIFIED)
@return Vdup copy of vector
*/
-#define vec_dup_ha_numa(V,H,A,S) \
-({ \
- __typeof__ ((V)[0]) * _v(v) = 0; \
- uword _v(l) = vec_len (V); \
- if (_v(l) > 0) \
- { \
- vec_resize_has (_v(v), _v(l), (H), (A), (S)); \
- clib_memcpy_fast (_v(v), (V), _v(l) * sizeof ((V)[0]));\
- } \
- _v(v); \
-})
-
-/** \brief Return copy of vector (VEC_NUMA_UNSPECIFIED).
-
- @param V pointer to a vector
- @param H size of header in bytes
- @param A alignment (may be zero)
+static_always_inline void *
+_vec_dup (void *v, uword hdr_size, uword align, uword elt_sz)
+{
+ uword len = vec_len (v);
+ const vec_attr_t va = { .elt_sz = elt_sz, .align = align };
+ void *n = 0;
- @return Vdup copy of vector
-*/
-#define vec_dup_ha(V,H,A) \
- vec_dup_ha_numa(V,H,A,VEC_NUMA_UNSPECIFIED)
+ if (len)
+ {
+ n = _vec_alloc_internal (len, &va);
+ clib_memcpy_fast (n, v, len * elt_sz);
+ }
+ return n;
+}
+#define vec_dup_ha(V, H, A) \
+ _vec_dup ((void *) (V), H, _vec_align (V, A), _vec_elt_sz (V))
/** \brief Return copy of vector (no header, no alignment)
@@ -467,12 +455,16 @@ void vec_free_not_inline (void *v);
@param NEW_V pointer to new vector
@param OLD_V pointer to old vector
*/
-#define vec_clone(NEW_V,OLD_V) \
-do { \
- (NEW_V) = 0; \
- (NEW_V) = _vec_resize ((NEW_V), vec_len (OLD_V), \
- vec_len (OLD_V) * sizeof ((NEW_V)[0]), (0), (0)); \
-} while (0)
+
+static_always_inline void
+_vec_clone (void **v1p, void *v2, uword align, uword elt_sz)
+{
+ const vec_attr_t va = { .elt_sz = elt_sz, .align = align };
+ v1p[0] = _vec_alloc_internal (vec_len (v2), &va);
+}
+#define vec_clone(NEW_V, OLD_V) \
+ _vec_clone ((void **) &(NEW_V), OLD_V, _vec_align (NEW_V, 0), \
+ _vec_elt_sz (NEW_V))
/** \brief Make sure vector is long enough for given index (general version).
@@ -480,39 +472,53 @@ do { \
@param I vector index which will be valid upon return
@param H header size in bytes (may be zero)
@param A alignment (may be zero)
- @param N numa_id (may be zero)
@return V (value-result macro parameter)
*/
-#define vec_validate_han(V,I,H,A,N) \
-do { \
- void *oldheap; \
- STATIC_ASSERT(A==0 || ((A % sizeof(V[0]))==0) \
- || ((sizeof(V[0]) % A) == 0), \
- "vector validate aligned on incorrectly sized object"); \
- word _v(i) = (I); \
- word _v(l) = vec_len (V); \
- if (_v(i) >= _v(l)) \
- { \
- /* switch to the per-numa heap if directed */ \
- if (PREDICT_FALSE(N != VEC_NUMA_UNSPECIFIED)) \
- { \
- oldheap = clib_mem_get_per_cpu_heap(); \
- clib_mem_set_per_cpu_heap (clib_mem_get_per_numa_heap(N)); \
- } \
- \
- vec_resize_ha ((V), 1 + (_v(i) - _v(l)), (H), (A)); \
- /* Must zero new space since user may have previously \
- used e.g. _vec_len (v) -= 10 */ \
- clib_memset ((V) + _v(l), 0, \
- (1 + (_v(i) - _v(l))) * sizeof ((V)[0])); \
- /* Switch back to the global heap */ \
- if (PREDICT_FALSE (N != VEC_NUMA_UNSPECIFIED)) \
- clib_mem_set_per_cpu_heap (oldheap); \
- } \
-} while (0)
+always_inline void
+_vec_zero_elts (void *v, uword first, uword count, uword elt_sz)
+{
+ clib_memset_u8 (v + (first * elt_sz), 0, count * elt_sz);
+}
+#define vec_zero_elts(V, F, C) _vec_zero_elts (V, F, C, sizeof ((V)[0]))
-#define vec_validate_ha(V,I,H,A) vec_validate_han(V,I,H,A,VEC_NUMA_UNSPECIFIED)
+static_always_inline void
+_vec_validate (void **vp, uword index, uword header_size, uword align,
+ void *heap, uword elt_sz)
+{
+ void *v = *vp;
+ uword vl, n_elts = index + 1;
+
+ if (PREDICT_FALSE (v == 0))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = header_size };
+ *vp = _vec_alloc_internal (n_elts, &va);
+ return;
+ }
+
+ vl = _vec_len (v);
+
+ if (PREDICT_FALSE (index < vl))
+ return;
+
+ if (PREDICT_FALSE (index >= _vec_find (v)->grow_elts + vl))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = header_size };
+ v = _vec_resize_internal (v, n_elts, &va);
+ _vec_update_pointer (vp, v);
+ }
+ else
+ _vec_set_len (v, n_elts, elt_sz);
+
+ _vec_zero_elts (v, vl, n_elts - vl, elt_sz);
+}
+
+#define vec_validate_hap(V, I, H, A, P) \
+ _vec_validate ((void **) &(V), I, H, _vec_align (V, A), 0, sizeof ((V)[0]))
/** \brief Make sure vector is long enough for given index
(no header, unspecified alignment)
@@ -521,7 +527,7 @@ do { \
@param I vector index which will be valid upon return
@return V (value-result macro parameter)
*/
-#define vec_validate(V,I) vec_validate_ha(V,I,0,0)
+#define vec_validate(V, I) vec_validate_hap (V, I, 0, 0, 0)
/** \brief Make sure vector is long enough for given index
(no header, specified alignment)
@@ -532,7 +538,18 @@ do { \
@return V (value-result macro parameter)
*/
-#define vec_validate_aligned(V,I,A) vec_validate_ha(V,I,0,A)
+#define vec_validate_aligned(V, I, A) vec_validate_hap (V, I, 0, A, 0)
+
+/** \brief Make sure vector is long enough for given index
+ (no header, specified heap)
+
+ @param V (possibly NULL) pointer to a vector.
+ @param I vector index which will be valid upon return
+ @param H heap (may be zero)
+ @return V (value-result macro parameter)
+*/
+
+#define vec_validate_heap(V, I, P) vec_validate_hap (V, I, 0, 0, P)
/** \brief Make sure vector is long enough for given index
and initialize empty space (general version)
@@ -544,20 +561,22 @@ do { \
@param A alignment (may be zero)
@return V (value-result macro parameter)
*/
-#define vec_validate_init_empty_ha(V,I,INIT,H,A) \
-do { \
- word _v(i) = (I); \
- word _v(l) = vec_len (V); \
- if (_v(i) >= _v(l)) \
- { \
- vec_resize_ha ((V), 1 + (_v(i) - _v(l)), (H), (A)); \
- while (_v(l) <= _v(i)) \
- { \
- (V)[_v(l)] = (INIT); \
- _v(l)++; \
- } \
- } \
-} while (0)
+#define vec_validate_init_empty_ha(V, I, INIT, H, A) \
+ do \
+ { \
+ word _v (i) = (I); \
+ word _v (l) = vec_len (V); \
+ if (_v (i) >= _v (l)) \
+ { \
+ vec_resize_ha (V, 1 + (_v (i) - _v (l)), H, A); \
+ while (_v (l) <= _v (i)) \
+ { \
+ (V)[_v (l)] = (INIT); \
+ _v (l)++; \
+ } \
+ } \
+ } \
+ while (0)
/** \brief Make sure vector is long enough for given index
and initialize empty space (no header, unspecified alignment)
@@ -591,12 +610,40 @@ do { \
@param A alignment (may be zero)
@return V (value-result macro parameter)
*/
-#define vec_add1_ha(V,E,H,A) \
-do { \
- word _v(l) = vec_len (V); \
- V = _vec_resize ((V), 1, (_v(l) + 1) * sizeof ((V)[0]), (H), (A)); \
- (V)[_v(l)] = (E); \
-} while (0)
+
+static_always_inline void *
+_vec_add1 (void **vp, uword hdr_sz, uword align, uword elt_sz)
+{
+ void *v = vp[0];
+ uword len;
+
+ if (PREDICT_FALSE (v == 0))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = hdr_sz };
+ return *vp = _vec_alloc_internal (1, &va);
+ }
+
+ len = _vec_len (v);
+
+ if (PREDICT_FALSE (_vec_find (v)->grow_elts == 0))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = hdr_sz };
+ v = _vec_resize_internal (v, len + 1, &va);
+ _vec_update_pointer (vp, v);
+ }
+ else
+ _vec_set_len (v, len + 1, elt_sz);
+
+ return v + len * elt_sz;
+}
+
+#define vec_add1_ha(V, E, H, A) \
+ ((__typeof__ ((V)[0]) *) _vec_add1 ((void **) &(V), H, _vec_align (V, A), \
+ _vec_elt_sz (V)))[0] = (E)
/** \brief Add 1 element to end of vector (unspecified alignment).
@@ -625,13 +672,41 @@ do { \
@param A alignment (may be zero)
@return V and P (value-result macro parameters)
*/
-#define vec_add2_ha(V,P,N,H,A) \
-do { \
- word _v(n) = (N); \
- word _v(l) = vec_len (V); \
- V = _vec_resize ((V), _v(n), (_v(l) + _v(n)) * sizeof ((V)[0]), (H), (A)); \
- P = (V) + _v(l); \
-} while (0)
+
+static_always_inline void
+_vec_add2 (void **vp, void **pp, uword n_add, uword hdr_sz, uword align,
+ uword elt_sz)
+{
+ void *v = *vp;
+ uword len;
+
+ if (PREDICT_FALSE (v == 0))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = hdr_sz };
+ *vp = *pp = _vec_alloc_internal (n_add, &va);
+ return;
+ }
+
+ len = _vec_len (v);
+ if (PREDICT_FALSE (_vec_find (v)->grow_elts < n_add))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = hdr_sz };
+ v = _vec_resize_internal (v, len + n_add, &va);
+ _vec_update_pointer (vp, v);
+ }
+ else
+ _vec_set_len (v, len + n_add, elt_sz);
+
+ *pp = v + len * elt_sz;
+}
+
+#define vec_add2_ha(V, P, N, H, A) \
+ _vec_add2 ((void **) &(V), (void **) &(P), N, H, _vec_align (V, A), \
+ _vec_elt_sz (V))
/** \brief Add N elements to end of vector V,
return pointer to new elements in P. (no header, unspecified alignment)
@@ -665,19 +740,47 @@ do { \
@param A alignment (may be zero)
@return V (value-result macro parameter)
*/
+static_always_inline void
+_vec_add (void **vp, void *e, word n_add, uword hdr_sz, uword align,
+ uword elt_sz)
+{
+ void *v = *vp;
+ uword len;
+
+ ASSERT (n_add >= 0);
+
+ if (n_add < 1)
+ return;
+
+ if (PREDICT_FALSE (v == 0))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = hdr_sz };
+ *vp = v = _vec_alloc_internal (n_add, &va);
+ clib_memcpy_fast (v, e, n_add * elt_sz);
+ return;
+ }
+
+ len = _vec_len (v);
+
+ if (PREDICT_FALSE (_vec_find (v)->grow_elts < n_add))
+ {
+ const vec_attr_t va = { .elt_sz = elt_sz,
+ .align = align,
+ .hdr_sz = hdr_sz };
+ v = _vec_resize_internal (v, len + n_add, &va);
+ _vec_update_pointer (vp, v);
+ }
+ else
+ _vec_set_len (v, len + n_add, elt_sz);
+
+ clib_memcpy_fast (v + len * elt_sz, e, n_add * elt_sz);
+}
+
#define vec_add_ha(V, E, N, H, A) \
- do \
- { \
- word _v (n) = (N); \
- if (PREDICT_TRUE (_v (n) > 0)) \
- { \
- word _v (l) = vec_len (V); \
- V = _vec_resize ((V), _v (n), (_v (l) + _v (n)) * sizeof ((V)[0]), \
- (H), (A)); \
- clib_memcpy_fast ((V) + _v (l), (E), _v (n) * sizeof ((V)[0])); \
- } \
- } \
- while (0)
+ _vec_add ((void **) &(V), (void *) (E), N, H, _vec_align (V, A), \
+ _vec_elt_sz (V))
/** \brief Add N elements to end of vector V (no header, unspecified alignment)
@@ -703,14 +806,16 @@ do { \
@param V pointer to a vector
@return E element removed from the end of the vector
*/
-#define vec_pop(V) \
-({ \
- uword _v(l) = vec_len (V); \
- ASSERT (_v(l) > 0); \
- _v(l) -= 1; \
- _vec_len (V) = _v (l); \
- (V)[_v(l)]; \
-})
+#define vec_pop(V) \
+ ({ \
+ uword _v (l) = vec_len (V); \
+ __typeof__ ((V)[0]) _v (rv); \
+ ASSERT (_v (l) > 0); \
+ _v (l) -= 1; \
+ _v (rv) = (V)[_v (l)]; \
+ vec_set_len (V, _v (l)); \
+ (_v (rv)); \
+ })
/** \brief Set E to the last element of a vector, decrement vector length
@param V pointer to a vector
@@ -737,21 +842,27 @@ do { \
@param A alignment (may be zero)
@return V (value-result macro parameter)
*/
-#define vec_insert_init_empty_ha(V,N,M,INIT,H,A) \
-do { \
- word _v(l) = vec_len (V); \
- word _v(n) = (N); \
- word _v(m) = (M); \
- V = _vec_resize ((V), \
- _v(n), \
- (_v(l) + _v(n))*sizeof((V)[0]), \
- (H), (A)); \
- ASSERT (_v(m) <= _v(l)); \
- memmove ((V) + _v(m) + _v(n), \
- (V) + _v(m), \
- (_v(l) - _v(m)) * sizeof ((V)[0])); \
- clib_memset ((V) + _v(m), INIT, _v(n) * sizeof ((V)[0])); \
-} while (0)
+
+static_always_inline void
+_vec_insert (void **vp, uword n_insert, uword ins_pt, u8 init, uword hdr_sz,
+ uword align, uword elt_sz)
+{
+ void *v = vp[0];
+ uword len = vec_len (v);
+ const vec_attr_t va = { .elt_sz = elt_sz, .align = align, .hdr_sz = hdr_sz };
+
+ ASSERT (ins_pt <= len);
+
+ v = _vec_resize_internal (v, len + n_insert, &va);
+ clib_memmove (v + va.elt_sz * (ins_pt + n_insert), v + ins_pt * elt_sz,
+ (len - ins_pt) * elt_sz);
+ _vec_zero_elts (v, ins_pt, n_insert, elt_sz);
+ _vec_update_pointer (vp, v);
+}
+
+#define vec_insert_init_empty_ha(V, N, M, INIT, H, A) \
+ _vec_insert ((void **) &(V), N, M, INIT, H, _vec_align (V, A), \
+ _vec_elt_sz (V))
/** \brief Insert N vector elements starting at element M,
initialize new elements to zero (general version)
@@ -825,23 +936,27 @@ do { \
@return V (value-result macro parameter)
*/
+static_always_inline void
+_vec_insert_elts (void **vp, void *e, uword n_insert, uword ins_pt,
+ uword hdr_sz, uword align, uword elt_sz)
+{
+ void *v = vp[0];
+ uword len = vec_len (v);
+ const vec_attr_t va = { .elt_sz = elt_sz, .align = align, .hdr_sz = hdr_sz };
+
+ ASSERT (ins_pt <= len);
+
+ v = _vec_resize_internal (v, len + n_insert, &va);
+ clib_memmove (v + elt_sz * (ins_pt + n_insert), v + ins_pt * elt_sz,
+ (len - ins_pt) * elt_sz);
+ _vec_zero_elts (v, ins_pt, n_insert, elt_sz);
+ clib_memcpy_fast (v + ins_pt * elt_sz, e, n_insert * elt_sz);
+ _vec_update_pointer (vp, v);
+}
+
#define vec_insert_elts_ha(V, E, N, M, H, A) \
- do \
- { \
- word _v (n) = (N); \
- if (PREDICT_TRUE (_v (n) > 0)) \
- { \
- word _v (l) = vec_len (V); \
- word _v (m) = (M); \
- V = _vec_resize ((V), _v (n), (_v (l) + _v (n)) * sizeof ((V)[0]), \
- (H), (A)); \
- ASSERT (_v (m) <= _v (l)); \
- memmove ((V) + _v (m) + _v (n), (V) + _v (m), \
- (_v (l) - _v (m)) * sizeof ((V)[0])); \
- clib_memcpy_fast ((V) + _v (m), (E), _v (n) * sizeof ((V)[0])); \
- } \
- } \
- while (0)
+ _vec_insert_elts ((void **) &(V), E, N, M, H, _vec_align (V, A), \
+ _vec_elt_sz (V))
/** \brief Insert N vector elements starting at element M,
insert given elements (no header, unspecified alignment)
@@ -873,57 +988,66 @@ do { \
@param M first element to delete
@return V (value-result macro parameter)
*/
-#define vec_delete(V,N,M) \
-do { \
- word _v(l) = vec_len (V); \
- word _v(n) = (N); \
- word _v(m) = (M); \
- /* Copy over deleted elements. */ \
- if (_v(l) - _v(n) - _v(m) > 0) \
- memmove ((V) + _v(m), (V) + _v(m) + _v(n), \
- (_v(l) - _v(n) - _v(m)) * sizeof ((V)[0])); \
- /* Zero empty space at end (for future re-allocation). */ \
- if (_v(n) > 0) \
- clib_memset ((V) + _v(l) - _v(n), 0, _v(n) * sizeof ((V)[0])); \
- _vec_len (V) -= _v(n); \
- CLIB_MEM_POISON(vec_end(V), _v(n) * sizeof ((V)[0])); \
-} while (0)
+
+static_always_inline void
+_vec_delete (void *v, uword n_del, uword first, uword elt_sz)
+{
+ word n_bytes_del, n_bytes_to_move, len = vec_len (v);
+ u8 *dst;
+
+ if (n_del == 0)
+ return;
+
+ ASSERT (first + n_del <= len);
+
+ n_bytes_del = n_del * elt_sz;
+ n_bytes_to_move = (len - first - n_del) * elt_sz;
+ dst = v + first * elt_sz;
+
+ if (n_bytes_to_move > 0)
+ clib_memmove (dst, dst + n_bytes_del, n_bytes_to_move);
+ clib_memset (dst + n_bytes_to_move, 0, n_bytes_del);
+
+ _vec_set_len (v, _vec_len (v) - n_del, elt_sz);
+}
+
+#define vec_delete(V, N, M) _vec_delete ((void *) (V), N, M, _vec_elt_sz (V))
/** \brief Delete the element at index I
@param V pointer to a vector
@param I index to delete
*/
-#define vec_del1(v,i) \
-do { \
- uword _vec_del_l = _vec_len (v) - 1; \
- uword _vec_del_i = (i); \
- if (_vec_del_i < _vec_del_l) \
- (v)[_vec_del_i] = (v)[_vec_del_l]; \
- _vec_len (v) = _vec_del_l; \
- CLIB_MEM_POISON(vec_end(v), sizeof ((v)[0])); \
-} while (0)
-/** \brief Append v2 after v1. Result in v1.
- @param V1 target vector
- @param V2 vector to append
-*/
+static_always_inline void
+_vec_del1 (void *v, uword index, uword elt_sz)
+{
+ uword len = _vec_len (v) - 1;
-#define vec_append(v1, v2) \
- do \
- { \
- uword _v (l1) = vec_len (v1); \
- uword _v (l2) = vec_len (v2); \
- \
- if (PREDICT_TRUE (_v (l2) > 0)) \
- { \
- v1 = _vec_resize ((v1), _v (l2), \
- (_v (l1) + _v (l2)) * sizeof ((v1)[0]), 0, 0); \
- clib_memcpy_fast ((v1) + _v (l1), (v2), \
- _v (l2) * sizeof ((v2)[0])); \
- } \
- } \
- while (0)
+ if (index < len)
+ clib_memcpy_fast (v + index * elt_sz, v + len * elt_sz, elt_sz);
+
+ _vec_set_len (v, len, elt_sz);
+}
+
+#define vec_del1(v, i) _vec_del1 ((void *) (v), i, _vec_elt_sz (v))
+
+static_always_inline void
+_vec_append (void **v1p, void *v2, uword v1_elt_sz, uword v2_elt_sz,
+ uword align)
+{
+ void *v1 = v1p[0];
+ uword len1 = vec_len (v1);
+ uword len2 = vec_len (v2);
+
+ if (PREDICT_TRUE (len2 > 0))
+ {
+ const vec_attr_t va = { .elt_sz = v2_elt_sz, .align = align };
+ v1 = _vec_resize_internal (v1, len1 + len2, &va);
+ clib_memcpy_fast (v1 + len1 * v1_elt_sz, v2, len2 * v2_elt_sz);
+ _vec_update_pointer (v1p, v1);
+ }
+}
/** \brief Append v2 after v1. Result in v1. Specified alignment.
@param V1 target vector
@@ -932,72 +1056,66 @@ do { \
*/
#define vec_append_aligned(v1, v2, align) \
- do \
- { \
- uword _v (l1) = vec_len (v1); \
- uword _v (l2) = vec_len (v2); \
- \
- if (PREDICT_TRUE (_v (l2) > 0)) \
- { \
- v1 = _vec_resize ( \
- (v1), _v (l2), (_v (l1) + _v (l2)) * sizeof ((v1)[0]), 0, align); \
- clib_memcpy_fast ((v1) + _v (l1), (v2), \
- _v (l2) * sizeof ((v2)[0])); \
- } \
- } \
- while (0)
+ _vec_append ((void **) &(v1), (void *) (v2), _vec_elt_sz (v1), \
+ _vec_elt_sz (v2), _vec_align (v1, align))
-/** \brief Prepend v2 before v1. Result in v1.
+/** \brief Append v2 after v1. Result in v1.
@param V1 target vector
- @param V2 vector to prepend
+ @param V2 vector to append
*/
-#define vec_prepend(v1, v2) \
- do \
- { \
- uword _v (l1) = vec_len (v1); \
- uword _v (l2) = vec_len (v2); \
- \
- if (PREDICT_TRUE (_v (l2) > 0)) \
- { \
- v1 = _vec_resize ((v1), _v (l2), \
- (_v (l1) + _v (l2)) * sizeof ((v1)[0]), 0, 0); \
- memmove ((v1) + _v (l2), (v1), _v (l1) * sizeof ((v1)[0])); \
- clib_memcpy_fast ((v1), (v2), _v (l2) * sizeof ((v2)[0])); \
- } \
- } \
- while (0)
+#define vec_append(v1, v2) vec_append_aligned (v1, v2, 0)
+
+static_always_inline void
+_vec_prepend (void *restrict *v1p, void *restrict v2, uword v1_elt_sz,
+ uword v2_elt_sz, uword align)
+{
+ void *restrict v1 = v1p[0];
+ uword len1 = vec_len (v1);
+ uword len2 = vec_len (v2);
+
+ if (PREDICT_TRUE (len2 > 0))
+ {
+ /* prepending vector to itself would result in use-after-free */
+ ASSERT (v1 != v2);
+ const vec_attr_t va = { .elt_sz = v2_elt_sz, .align = align };
+ v1 = _vec_resize_internal (v1, len1 + len2, &va);
+ clib_memmove (v1 + len2 * v2_elt_sz, v1, len1 * v1_elt_sz);
+ clib_memcpy_fast (v1, v2, len2 * v2_elt_sz);
+ _vec_update_pointer ((void **) v1p, v1);
+ }
+}
/** \brief Prepend v2 before v1. Result in v1. Specified alignment
@param V1 target vector
- @param V2 vector to prepend
+ @param V2 vector to prepend, V1 != V2
@param align required alignment
*/
#define vec_prepend_aligned(v1, v2, align) \
- do \
- { \
- uword _v (l1) = vec_len (v1); \
- uword _v (l2) = vec_len (v2); \
- \
- if (PREDICT_TRUE (_v (l2) > 0)) \
- { \
- v1 = _vec_resize ( \
- (v1), _v (l2), (_v (l1) + _v (l2)) * sizeof ((v1)[0]), 0, align); \
- memmove ((v1) + _v (l2), (v1), _v (l1) * sizeof ((v1)[0])); \
- clib_memcpy_fast ((v1), (v2), _v (l2) * sizeof ((v2)[0])); \
- } \
- } \
- while (0)
+ _vec_prepend ((void **) &(v1), (void *) (v2), _vec_elt_sz (v1), \
+ _vec_elt_sz (v2), _vec_align (v1, align))
+
+/** \brief Prepend v2 before v1. Result in v1.
+ @param V1 target vector
+ @param V2 vector to prepend, V1 != V2
+*/
+
+#define vec_prepend(v1, v2) vec_prepend_aligned (v1, v2, 0)
/** \brief Zero all vector elements. Null-pointer tolerant.
@param var Vector to zero
*/
-#define vec_zero(var) \
-do { \
- if (var) \
- clib_memset ((var), 0, vec_len (var) * sizeof ((var)[0])); \
-} while (0)
+static_always_inline void
+_vec_zero (void *v, uword elt_sz)
+{
+ uword len = vec_len (v);
+
+ if (len)
+ clib_memset_u8 (v, 0, len * elt_sz);
+}
+
+#define vec_zero(var) _vec_zero ((void *) (var), _vec_elt_sz (var))
/** \brief Set all vector elements to given value. Null-pointer tolerant.
@param v vector to set
@@ -1021,8 +1139,23 @@ do { \
@param v2 Pointer to a vector
@return 1 if equal, 0 if unequal
*/
-#define vec_is_equal(v1,v2) \
- (vec_len (v1) == vec_len (v2) && ! memcmp ((v1), (v2), vec_len (v1) * sizeof ((v1)[0])))
+static_always_inline int
+_vec_is_equal (void *v1, void *v2, uword v1_elt_sz, uword v2_elt_sz)
+{
+ uword vec_len_v1 = vec_len (v1);
+
+ if ((vec_len_v1 != vec_len (v2)) || (v1_elt_sz != v2_elt_sz))
+ return 0;
+
+ if ((vec_len_v1 == 0) || (memcmp (v1, v2, vec_len_v1 * v1_elt_sz) == 0))
+ return 1;
+
+ return 0;
+}
+
+#define vec_is_equal(v1, v2) \
+ _vec_is_equal ((void *) (v1), (void *) (v2), _vec_elt_sz (v1), \
+ _vec_elt_sz (v2))
/** \brief Compare two vectors (only applicable to vectors of signed numbers).
Used in qsort compare functions.
@@ -1107,15 +1240,16 @@ do { \
@param S pointer to string buffer.
@param L string length (NOT including the terminating NULL; a la strlen())
*/
-#define vec_validate_init_c_string(V, S, L) \
- do { \
- vec_reset_length (V); \
- vec_validate ((V), (L)); \
- if ((S) && (L)) \
- clib_memcpy_fast ((V), (S), (L)); \
- (V)[(L)] = 0; \
- } while (0)
-
+#define vec_validate_init_c_string(V, S, L) \
+ do \
+ { \
+ vec_reset_length (V); \
+ vec_validate (V, (L)); \
+ if ((S) && (L)) \
+ clib_memcpy_fast (V, (S), (L)); \
+ (V)[(L)] = 0; \
+ } \
+ while (0)
/** \brief Test whether a vector is a NULL terminated c-string.
@@ -1130,23 +1264,12 @@ do { \
@param V (possibly NULL) pointer to a vector.
@return V (value-result macro parameter)
*/
-#define vec_terminate_c_string(V) \
- do { \
- u32 vl = vec_len ((V)); \
- if (!vec_c_string_is_terminated(V)) \
- { \
- vec_validate ((V), vl); \
- (V)[vl] = 0; \
- } \
- } while (0)
+#define vec_terminate_c_string(V) \
+ do \
+ { \
+ if (!vec_c_string_is_terminated (V)) \
+ vec_add1 (V, 0); \
+ } \
+ while (0)
#endif /* included_vec_h */
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/vec_bootstrap.h b/src/vppinfra/vec_bootstrap.h
index 5cf5d3b76a1..5d386b1eaad 100644
--- a/src/vppinfra/vec_bootstrap.h
+++ b/src/vppinfra/vec_bootstrap.h
@@ -55,12 +55,15 @@
typedef struct
{
u32 len; /**< Number of elements in vector (NOT its allocated length). */
- u8 numa_id; /**< NUMA id */
- u8 vpad[3]; /**< pad to 8 bytes */
+ u8 hdr_size; /**< header size divided by VEC_MIN_ALIGN */
+ u8 log2_align : 7; /**< data alignment */
+ u8 default_heap : 1; /**< vector uses default heap */
+ u8 grow_elts; /**< number of elts vector can grow without realloc */
+ u8 vpad[1]; /**< pad to 8 bytes */
u8 vector_data[0]; /**< Vector data . */
} vec_header_t;
-#define VEC_NUMA_UNSPECIFIED (0xFF)
+#define VEC_MIN_ALIGN 8
/** \brief Find the vector header
@@ -71,15 +74,23 @@ typedef struct
@return pointer to the vector's vector_header_t
*/
#define _vec_find(v) ((vec_header_t *) (v) - 1)
+#define _vec_heap(v) (((void **) (_vec_find (v)))[-1])
+
+always_inline uword __vec_align (uword data_align, uword configuered_align);
+always_inline uword __vec_elt_sz (uword elt_sz, int is_void);
#define _vec_round_size(s) \
(((s) + sizeof (uword) - 1) &~ (sizeof (uword) - 1))
+#define _vec_is_void(P) \
+ __builtin_types_compatible_p (__typeof__ ((P)[0]), void)
+#define _vec_elt_sz(V) __vec_elt_sz (sizeof ((V)[0]), _vec_is_void (V))
+#define _vec_align(V, A) __vec_align (__alignof__((V)[0]), A)
-always_inline uword
-vec_header_bytes (uword header_bytes)
+always_inline __clib_nosanitize_addr uword
+vec_get_header_size (void *v)
{
- return round_pow2 (header_bytes + sizeof (vec_header_t),
- sizeof (vec_header_t));
+ uword header_size = _vec_find (v)->hdr_size * VEC_MIN_ALIGN;
+ return header_size;
}
/** \brief Find a user vector header
@@ -89,9 +100,9 @@ vec_header_bytes (uword header_bytes)
*/
always_inline void *
-vec_header (void *v, uword header_bytes)
+vec_header (void *v)
{
- return v - vec_header_bytes (header_bytes);
+ return v ? v - vec_get_header_size (v) : 0;
}
/** \brief Find the end of user vector header
@@ -101,92 +112,94 @@ vec_header (void *v, uword header_bytes)
*/
always_inline void *
-vec_header_end (void *v, uword header_bytes)
+vec_header_end (void *v)
{
- return v + vec_header_bytes (header_bytes);
+ return v + vec_get_header_size (v);
}
-always_inline uword
-vec_aligned_header_bytes (uword header_bytes, uword align)
-{
- return round_pow2 (header_bytes + sizeof (vec_header_t), align);
-}
+/** \brief Number of elements in vector (rvalue-only, NULL tolerant)
-always_inline void *
-vec_aligned_header (void *v, uword header_bytes, uword align)
-{
- return v - vec_aligned_header_bytes (header_bytes, align);
-}
+ vec_len (v) checks for NULL, but cannot be used as an lvalue.
+ If in doubt, use vec_len...
+*/
-always_inline void *
-vec_aligned_header_end (void *v, uword header_bytes, uword align)
+static_always_inline u32
+__vec_len (void *v)
{
- return v + vec_aligned_header_bytes (header_bytes, align);
+ return _vec_find (v)->len;
}
+#define _vec_len(v) __vec_len ((void *) (v))
+#define vec_len(v) ((v) ? _vec_len(v) : 0)
-/** \brief Number of elements in vector (lvalue-capable)
-
- _vec_len (v) does not check for null, but can be used as an lvalue
- (e.g. _vec_len (v) = 99).
-*/
-
-#define _vec_len(v) (_vec_find(v)->len)
+u32 vec_len_not_inline (void *v);
-/** \brief Number of elements in vector (rvalue-only, NULL tolerant)
+/** \brief Number of data bytes in vector. */
- vec_len (v) checks for NULL, but cannot be used as an lvalue.
- If in doubt, use vec_len...
-*/
+#define vec_bytes(v) (vec_len (v) * sizeof (v[0]))
-#define vec_len(v) ((v) ? _vec_len(v) : 0)
-u32 vec_len_not_inline (void *v);
+/**
+ * Return size of memory allocated for the vector
+ *
+ * @param v vector
+ * @return memory size allocated for the vector
+ */
-/** \brief Vector's NUMA id (lvalue-capable)
+uword vec_mem_size (void *v);
- _vec_numa(v) does not check for null, but can be used as an lvalue
- (e.g. _vec_numa(v) = 1).
-*/
+/**
+ * Number of elements that can fit into generic vector
+ *
+ * @param v vector
+ * @param b extra header bytes
+ * @return number of elements that can fit into vector
+ */
-#define _vec_numa(v) (_vec_find(v)->numa_id)
+always_inline uword
+vec_max_bytes (void *v)
+{
+ return v ? vec_mem_size (v) - vec_get_header_size (v) : 0;
+}
-/** \brief Return vector's NUMA ID (rvalue-only, NULL tolerant)
- vec_numa(v) checks for NULL, but cannot be used as an lvalue.
-*/
-#define vec_numa(v) ((v) ? _vec_numa(v) : 0)
+always_inline uword
+_vec_max_len (void *v, uword elt_sz)
+{
+ return vec_max_bytes (v) / elt_sz;
+}
+#define vec_max_len(v) _vec_max_len (v, _vec_elt_sz (v))
-/** \brief Number of data bytes in vector. */
+static_always_inline void
+_vec_set_grow_elts (void *v, uword n_elts)
+{
+ uword max = pow2_mask (BITS (_vec_find (0)->grow_elts));
-#define vec_bytes(v) (vec_len (v) * sizeof (v[0]))
+ if (PREDICT_FALSE (n_elts > max))
+ n_elts = max;
-/** \brief Total number of bytes that can fit in vector with current allocation. */
+ _vec_find (v)->grow_elts = n_elts;
+}
-#define vec_capacity(v,b) \
-({ \
- void * _vec_capacity_v = (void *) (v); \
- uword _vec_capacity_b = (b); \
- _vec_capacity_b = sizeof (vec_header_t) + _vec_round_size (_vec_capacity_b); \
- _vec_capacity_v ? clib_mem_size (_vec_capacity_v - _vec_capacity_b) : 0; \
-})
+always_inline void
+_vec_set_len (void *v, uword len, uword elt_sz)
+{
+ ASSERT (v);
+ ASSERT (len <= _vec_max_len (v, elt_sz));
+ uword old_len = _vec_len (v);
+ uword grow_elts = _vec_find (v)->grow_elts;
+
+ if (len > old_len)
+ clib_mem_unpoison (v + old_len * elt_sz, (len - old_len) * elt_sz);
+ else if (len < old_len)
+ clib_mem_poison (v + len * elt_sz, (old_len - len) * elt_sz);
+
+ _vec_set_grow_elts (v, old_len + grow_elts - len);
+ _vec_find (v)->len = len;
+}
-/** \brief Total number of elements that can fit into vector. */
-#define vec_max_len(v) \
- ((v) ? (vec_capacity (v,0) - vec_header_bytes (0)) / sizeof (v[0]) : 0)
-
-/** \brief Set vector length to a user-defined value */
-#ifndef __COVERITY__ /* Coverity gets confused by ASSERT() */
-#define vec_set_len(v, l) do { \
- ASSERT(v); \
- ASSERT((l) <= vec_max_len(v)); \
- CLIB_MEM_POISON_LEN((void *)(v), _vec_len(v) * sizeof((v)[0]), (l) * sizeof((v)[0])); \
- _vec_len(v) = (l); \
-} while (0)
-#else /* __COVERITY__ */
-#define vec_set_len(v, l) do { \
- _vec_len(v) = (l); \
-} while (0)
-#endif /* __COVERITY__ */
+#define vec_set_len(v, l) _vec_set_len ((void *) v, l, _vec_elt_sz (v))
+#define vec_inc_len(v, l) vec_set_len (v, _vec_len (v) + (l))
+#define vec_dec_len(v, l) vec_set_len (v, _vec_len (v) - (l))
/** \brief Reset vector length to zero
NULL-pointer tolerant
@@ -213,26 +226,22 @@ u32 vec_len_not_inline (void *v);
#define vec_foreach(var,vec) for (var = (vec); var < vec_end (vec); var++)
/** \brief Vector iterator (reverse) */
-#define vec_foreach_backwards(var,vec) \
-for (var = vec_end (vec) - 1; var >= (vec); var--)
+#define vec_foreach_backwards(var, vec) \
+ if (vec) \
+ for (var = vec_end (vec) - 1; var >= (vec); var--)
/** \brief Iterate over vector indices. */
#define vec_foreach_index(var,v) for ((var) = 0; (var) < vec_len (v); (var)++)
/** \brief Iterate over vector indices (reverse). */
-#define vec_foreach_index_backwards(var,v) \
- for ((var) = vec_len((v)) - 1; (var) >= 0; (var)--)
-
-/** \brief return the NUMA index for a vector */
-always_inline uword
-vec_get_numa (void *v)
-{
- vec_header_t *vh;
- if (v == 0)
- return 0;
- vh = _vec_find (v);
- return vh->numa_id;
-}
+#define vec_foreach_index_backwards(var, v) \
+ if (v) \
+ for ((var) = vec_len ((v)) - 1; (var) >= 0; (var)--)
+
+#define vec_foreach_pointer(e, v) \
+ if (v) \
+ for (typeof (**v) **__ep = (v), **__end = vec_end (v), *(e) = *__ep; \
+ __ep < __end; __ep++, (e) = __ep < __end ? *__ep : (e))
#endif /* included_clib_vec_bootstrap_h */
diff --git a/src/vppinfra/vector.h b/src/vppinfra/vector.h
index 6a6635b4c93..b5544c4b975 100644
--- a/src/vppinfra/vector.h
+++ b/src/vppinfra/vector.h
@@ -65,8 +65,9 @@
#define CLIB_HAVE_VEC512
#endif
-#define _vector_size(n) __attribute__ ((vector_size (n)))
-#define _vector_size_unaligned(n) __attribute__ ((vector_size (n), __aligned__ (1)))
+#define _vector_size(n) __attribute__ ((vector_size (n), __may_alias__))
+#define _vector_size_unaligned(n) \
+ __attribute__ ((vector_size (n), __aligned__ (1), __may_alias__))
#define foreach_vec64i _(i,8,8) _(i,16,4) _(i,32,2)
#define foreach_vec64u _(u,8,8) _(u,16,4) _(u,32,2)
@@ -97,22 +98,53 @@
#define foreach_vec foreach_int_vec foreach_uint_vec foreach_float_vec
-/* *INDENT-OFF* */
-
/* Type Definitions */
-#define _(t,s,c) \
-typedef t##s t##s##x##c _vector_size (s/8*c); \
-typedef t##s t##s##x##c##u _vector_size_unaligned (s/8*c); \
-typedef union { \
- t##s##x##c as_##t##s##x##c; \
- t##s as_##t##s[c]; \
-} t##s##x##c##_union_t;
+#define _(t, s, c) \
+ typedef t##s t##s##x##c _vector_size (s / 8 * c); \
+ typedef t##s t##s##x##c##u _vector_size_unaligned (s / 8 * c); \
+ typedef union \
+ { \
+ t##s##x##c as_##t##s##x##c; \
+ t##s as_##t##s[c]; \
+ } t##s##x##c##_union_t;
+/* clang-format off */
foreach_vec64i foreach_vec64u foreach_vec64f
foreach_vec128i foreach_vec128u foreach_vec128f
foreach_vec256i foreach_vec256u foreach_vec256f
foreach_vec512i foreach_vec512u foreach_vec512f
+/* clang-format on */
+#undef _
+
+ typedef union
+{
+#define _(t, s, c) t##s##x##c as_##t##s##x##c;
+ foreach_vec128i foreach_vec128u foreach_vec128f
+#undef _
+} vec128_t;
+
+typedef union
+{
+#define _(t, s, c) t##s##x##c as_##t##s##x##c;
+ foreach_vec256i foreach_vec256u foreach_vec256f
+#undef _
+#define _(t, s, c) t##s##x##c as_##t##s##x##c[2];
+ foreach_vec128i foreach_vec128u foreach_vec128f
+#undef _
+} vec256_t;
+
+typedef union
+{
+#define _(t, s, c) t##s##x##c as_##t##s##x##c;
+ foreach_vec512i foreach_vec512u foreach_vec512f
#undef _
+#define _(t, s, c) t##s##x##c as_##t##s##x##c[2];
+ foreach_vec256i foreach_vec256u foreach_vec256f
+#undef _
+#define _(t, s, c) t##s##x##c as_##t##s##x##c[4];
+ foreach_vec128i foreach_vec128u foreach_vec128f
+#undef _
+} vec512_t;
/* universal inlines */
#define _(t, s, c) \
@@ -125,6 +157,68 @@ foreach_vec
#undef _vector_size
+ /* _shuffle and _shuffle2 */
+#if defined(__GNUC__) && !defined(__clang__)
+#define __builtin_shufflevector(v1, v2, ...) \
+ __builtin_shuffle ((v1), (v2), (__typeof__ (v1)){ __VA_ARGS__ })
+#endif
+
+#define u8x16_shuffle(v1, ...) \
+ (u8x16) __builtin_shufflevector ((u8x16) (v1), (u8x16) (v1), __VA_ARGS__)
+#define u8x32_shuffle(v1, ...) \
+ (u8x32) __builtin_shufflevector ((u8x32) (v1), (u8x32) (v1), __VA_ARGS__)
+#define u8x64_shuffle(v1, ...) \
+ (u8x64) __builtin_shufflevector ((u8x64) (v1), (u8x64) (v1), __VA_ARGS__)
+
+#define u16x8_shuffle(v1, ...) \
+ (u16x8) __builtin_shufflevector ((u16x8) (v1), (u16x8) (v1), __VA_ARGS__)
+#define u16x16_shuffle(v1, ...) \
+ (u16x16) __builtin_shufflevector ((u16x16) (v1), (u16x16) (v1), __VA_ARGS__)
+#define u16x32_shuffle(v1, ...) \
+ (u16u32) __builtin_shufflevector ((u16x32) (v1), (u16x32) (v1), __VA_ARGS__);
+
+#define u32x4_shuffle(v1, ...) \
+ (u32x4) __builtin_shufflevector ((u32x4) (v1), (u32x4) (v1), __VA_ARGS__)
+#define u32x8_shuffle(v1, ...) \
+ (u32x8) __builtin_shufflevector ((u32x8) (v1), (u32x8) (v1), __VA_ARGS__)
+#define u32x16_shuffle(v1, ...) \
+ (u32x16) __builtin_shufflevector ((u32x16) (v1), (u32x16) (v1), __VA_ARGS__)
+
+#define u64x2_shuffle(v1, ...) \
+ (u64x2) __builtin_shufflevector ((u64x2) (v1), (u64x2) (v1), __VA_ARGS__)
+#define u64x4_shuffle(v1, ...) \
+ (u64x4) __builtin_shufflevector ((u64x4) (v1), (u64x4) (v1), __VA_ARGS__)
+#define u64x8_shuffle(v1, ...) \
+ (u64x8) __builtin_shufflevector ((u64x8) (v1), (u64x8) (v1), __VA_ARGS__)
+
+#define u8x16_shuffle2(v1, v2, ...) \
+ (u8x16) __builtin_shufflevector ((u8x16) (v1), (u8x16) (v2), __VA_ARGS__)
+#define u8x32_shuffle2(v1, v2, ...) \
+ (u8x32) __builtin_shufflevector ((u8x32) (v1), (u8x32) (v2), __VA_ARGS__)
+#define u8x64_shuffle2(v1, v2, ...) \
+ (u8x64) __builtin_shufflevector ((u8x64) (v1), (u8x64) (v2), __VA_ARGS__)
+
+#define u16x8_shuffle2(v1, v2, ...) \
+ (u16x8) __builtin_shufflevector ((u16x8) (v1), (u16x8) (v2), __VA_ARGS__)
+#define u16x16_shuffle2(v1, v2, ...) \
+ (u16x16) __builtin_shufflevector ((u16x16) (v1), (u16x16) (v2), __VA_ARGS__)
+#define u16x32_shuffle2(v1, v2, ...) \
+ (u16u32) __builtin_shufflevector ((u16x32) (v1), (u16x32) (v2), __VA_ARGS__);
+
+#define u32x4_shuffle2(v1, v2, ...) \
+ (u32x4) __builtin_shufflevector ((u32x4) (v1), (u32x4) (v2), __VA_ARGS__)
+#define u32x8_shuffle2(v1, v2, ...) \
+ (u32x8) __builtin_shufflevector ((u32x8) (v1), (u32x8) (v2), __VA_ARGS__)
+#define u32x16_shuffle2(v1, v2, ...) \
+ (u32x16) __builtin_shufflevector ((u32x16) (v1), (u32x16) (v2), __VA_ARGS__)
+
+#define u64x2_shuffle2(v1, v2, ...) \
+ (u64x2) __builtin_shufflevector ((u64x2) (v1), (u64x2) (v2), __VA_ARGS__)
+#define u64x4_shuffle2(v1, v2, ...) \
+ (u64x4) __builtin_shufflevector ((u64x4) (v1), (u64x4) (v2), __VA_ARGS__)
+#define u64x8_shuffle2(v1, v2, ...) \
+ (u64x8) __builtin_shufflevector ((u64x8) (v1), (u64x8) (v2), __VA_ARGS__)
+
#define VECTOR_WORD_TYPE(t) t##x
#define VECTOR_WORD_TYPE_LEN(t) (sizeof (VECTOR_WORD_TYPE(t)) / sizeof (t))
@@ -166,13 +260,4 @@ t##s##x##c##_splat (t##s x) \
#undef _
#endif
-/* *INDENT-ON* */
-
#endif /* included_clib_vector_h */
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/src/vppinfra/vector/array_mask.h b/src/vppinfra/vector/array_mask.h
index 8f2e1d7d88c..3d4a82ac01b 100644
--- a/src/vppinfra/vector/array_mask.h
+++ b/src/vppinfra/vector/array_mask.h
@@ -17,59 +17,114 @@
static_always_inline void
clib_array_mask_u32 (u32 *src, u32 mask, u32 n_elts)
{
- u32 i;
#if defined(CLIB_HAVE_VEC512)
u32x16 mask16 = u32x16_splat (mask);
-
- for (i = 0; i + 16 <= n_elts; i += 16)
- *((u32x16u *) (src + i)) &= mask16;
- n_elts -= i;
- if (n_elts)
+ if (n_elts <= 16)
{
- u16 m = pow2_mask (n_elts);
- u32x16_mask_store (u32x16_mask_load_zero (src + i, m) & mask16, src + i,
- m);
+ u32 m = pow2_mask (n_elts);
+ u32x16 r = u32x16_mask_load_zero (src, m);
+ u32x16_mask_store (r & mask16, src, m);
+ return;
}
- return;
+ for (; n_elts >= 16; n_elts -= 16, src += 16)
+ *((u32x16u *) src) &= mask16;
+ *((u32x16u *) (src + n_elts - 16)) &= mask16;
#elif defined(CLIB_HAVE_VEC256)
u32x8 mask8 = u32x8_splat (mask);
-
- for (i = 0; i + 8 <= n_elts; i += 8)
- *((u32x8u *) (src + i)) &= mask8;
- n_elts -= i;
#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
- if (n_elts)
+ if (n_elts <= 8)
{
- u8 m = pow2_mask (n_elts);
- u32x8_mask_store (u32x8_mask_load_zero (src + i, m) & mask8, src + i, m);
+ u32 m = pow2_mask (n_elts);
+ u32x8 r = u32x8_mask_load_zero (src, m);
+ u32x8_mask_store (r & mask8, src, m);
+ return;
+ }
+#else
+ if (PREDICT_FALSE (n_elts < 4))
+ {
+ if (n_elts & 2)
+ {
+ src[0] &= mask;
+ src[1] &= mask;
+ src += 2;
+ }
+ if (n_elts & 1)
+ src[0] &= mask;
+ return;
+ }
+ if (n_elts <= 8)
+ {
+ u32x4 mask4 = u32x4_splat (mask);
+ *(u32x4u *) src &= mask4;
+ *(u32x4u *) (src + n_elts - 4) &= mask4;
+ return;
}
- return;
#endif
+
+ for (; n_elts >= 8; n_elts -= 8, src += 8)
+ *((u32x8u *) src) &= mask8;
+ *((u32x8u *) (src + n_elts - 8)) &= mask8;
#elif defined(CLIB_HAVE_VEC128)
u32x4 mask4 = u32x4_splat (mask);
- for (i = 0; i + 4 <= n_elts; i += 4)
- *((u32x4u *) (src + i)) &= mask4;
- n_elts -= i;
- switch (n_elts)
+ if (PREDICT_FALSE (n_elts < 4))
{
- case 3:
- src[2] &= mask;
- case 2:
- src[1] &= mask;
- case 1:
- src[0] &= mask;
- case 0:
- default:;
+ if (n_elts & 2)
+ {
+ src[0] &= mask;
+ src[1] &= mask;
+ src += 2;
+ }
+ if (n_elts & 1)
+ src[0] &= mask;
+ return;
}
+
+ for (; n_elts >= 4; n_elts -= 4, src += 4)
+ *((u32x4u *) src) &= mask4;
+ *((u32x4u *) (src + n_elts - 4)) &= mask4;
return;
-#endif
+#else
while (n_elts > 0)
{
src[0] &= mask;
src++;
n_elts--;
}
+#endif
+}
+
+static_always_inline void
+clib_array_mask_set_u32_x64 (u32 *a, u32 v, uword bmp, int n_elts)
+{
+#if defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
+ u32x16 r = u32x16_splat (v);
+ for (; n_elts > 0; n_elts -= 16, a += 16, bmp >>= 16)
+ u32x16_mask_store (r, a, bmp);
+#elif defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u32x8 r = u32x8_splat (v);
+ for (; n_elts > 0; n_elts -= 8, a += 8, bmp >>= 8)
+ u32x8_mask_store (r, a, bmp);
+#else
+ while (bmp)
+ {
+ a[get_lowest_set_bit_index (bmp)] = v;
+ bmp = clear_lowest_set_bit (bmp);
+ }
+#endif
+}
+
+static_always_inline void
+clib_array_mask_set_u32 (u32 *a, u32 v, uword *bmp, u32 n_elts)
+{
+ while (n_elts >= uword_bits)
+ {
+ clib_array_mask_set_u32_x64 (a, v, bmp++[0], uword_bits);
+ a += uword_bits;
+ n_elts -= uword_bits;
+ }
+
+ clib_array_mask_set_u32_x64 (a, v, bmp[0] & pow2_mask (n_elts), n_elts);
}
#endif
diff --git a/src/vppinfra/vector/compress.h b/src/vppinfra/vector/compress.h
index 1d5d84e77ea..5429113984b 100644
--- a/src/vppinfra/vector/compress.h
+++ b/src/vppinfra/vector/compress.h
@@ -7,6 +7,101 @@
#include <vppinfra/clib.h>
#include <vppinfra/memcpy.h>
+static_always_inline u64 *
+clib_compress_u64_x64 (u64 *dst, u64 *src, u64 mask)
+{
+#if defined(CLIB_HAVE_VEC512_COMPRESS)
+ u64x8u *sv = (u64x8u *) src;
+ for (int i = 0; i < 8; i++)
+ {
+ u64x8_compress_store (sv[i], mask, dst);
+ dst += _popcnt32 ((u8) mask);
+ mask >>= 8;
+ }
+#elif defined(CLIB_HAVE_VEC256_COMPRESS)
+ u64x4u *sv = (u64x4u *) src;
+ for (int i = 0; i < 16; i++)
+ {
+ u64x4_compress_store (sv[i], mask, dst);
+ dst += _popcnt32 (((u8) mask) & 0x0f);
+ mask >>= 4;
+ }
+#else
+ u32 i;
+ foreach_set_bit_index (i, mask)
+ dst++[0] = src[i];
+#endif
+ return dst;
+}
+
+static_always_inline u64 *
+clib_compress_u64_x64_masked (u64 *dst, u64 *src, u64 mask)
+{
+#if defined(CLIB_HAVE_VEC512_COMPRESS) && \
+ defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
+ u64x8u *sv = (u64x8u *) src;
+ for (int i = 0; i < 8; i++)
+ {
+ u64x8u s = u64x8_mask_load_zero (&sv[i], mask);
+ u64x8_compress_store (s, mask, dst);
+ dst += _popcnt32 ((u8) mask);
+ mask >>= 8;
+ }
+#elif defined(CLIB_HAVE_VEC256_COMPRESS) && \
+ defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u64x4u *sv = (u64x4u *) src;
+ for (int i = 0; i < 16; i++)
+ {
+ u64x4u s = u64x4_mask_load_zero (&sv[i], mask);
+ u64x4_compress_store (s, mask, dst);
+ dst += _popcnt32 (((u8) mask) & 0x0f);
+ mask >>= 4;
+ }
+#else
+ u32 i;
+ foreach_set_bit_index (i, mask)
+ dst++[0] = src[i];
+#endif
+ return dst;
+}
+
+/** \brief Compress array of 64-bit elemments into destination array based on
+ * mask
+
+ @param dst destination array of u64 elements
+ @param src source array of u64 elements
+ @param mask array of u64 values representing compress mask
+ @param n_elts number of elements in the source array
+ @return number of elements stored in destionation array
+*/
+
+static_always_inline u32
+clib_compress_u64 (u64 *dst, u64 *src, u64 *mask, u32 n_elts)
+{
+ u64 *dst0 = dst;
+ while (n_elts >= 64)
+ {
+ if (mask[0] == ~0ULL)
+ {
+ clib_memcpy_fast (dst, src, 64 * sizeof (u64));
+ dst += 64;
+ }
+ else
+ dst = clib_compress_u64_x64 (dst, src, mask[0]);
+
+ mask++;
+ src += 64;
+ n_elts -= 64;
+ }
+
+ if (PREDICT_TRUE (n_elts == 0))
+ return dst - dst0;
+
+ return clib_compress_u64_x64_masked (dst, src,
+ mask[0] & pow2_mask (n_elts)) -
+ dst0;
+}
+
static_always_inline u32 *
clib_compress_u32_x64 (u32 *dst, u32 *src, u64 mask)
{
@@ -14,9 +109,8 @@ clib_compress_u32_x64 (u32 *dst, u32 *src, u64 mask)
u32x16u *sv = (u32x16u *) src;
for (int i = 0; i < 4; i++)
{
- int cnt = _popcnt32 ((u16) mask);
u32x16_compress_store (sv[i], mask, dst);
- dst += cnt;
+ dst += _popcnt32 ((u16) mask);
mask >>= 16;
}
@@ -24,18 +118,46 @@ clib_compress_u32_x64 (u32 *dst, u32 *src, u64 mask)
u32x8u *sv = (u32x8u *) src;
for (int i = 0; i < 8; i++)
{
- int cnt = _popcnt32 ((u8) mask);
u32x8_compress_store (sv[i], mask, dst);
- dst += cnt;
+ dst += _popcnt32 ((u8) mask);
mask >>= 8;
}
#else
- while (mask)
+ u32 i;
+ foreach_set_bit_index (i, mask)
+ dst++[0] = src[i];
+#endif
+ return dst;
+}
+
+static_always_inline u32 *
+clib_compress_u32_x64_masked (u32 *dst, u32 *src, u64 mask)
+{
+#if defined(CLIB_HAVE_VEC512_COMPRESS) && \
+ defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
+ u32x16u *sv = (u32x16u *) src;
+ for (int i = 0; i < 4; i++)
{
- u16 bit = count_trailing_zeros (mask);
- mask = clear_lowest_set_bit (mask);
- dst++[0] = src[bit];
+ u32x16u s = u32x16_mask_load_zero (&sv[i], mask);
+ u32x16_compress_store (s, mask, dst);
+ dst += _popcnt32 ((u16) mask);
+ mask >>= 16;
}
+
+#elif defined(CLIB_HAVE_VEC256_COMPRESS) && \
+ defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u32x8u *sv = (u32x8u *) src;
+ for (int i = 0; i < 8; i++)
+ {
+ u32x8u s = u32x8_mask_load_zero (&sv[i], mask);
+ u32x8_compress_store (s, mask, dst);
+ dst += _popcnt32 ((u8) mask);
+ mask >>= 8;
+ }
+#else
+ u32 i;
+ foreach_set_bit_index (i, mask)
+ dst++[0] = src[i];
#endif
return dst;
}
@@ -72,7 +194,154 @@ clib_compress_u32 (u32 *dst, u32 *src, u64 *mask, u32 n_elts)
if (PREDICT_TRUE (n_elts == 0))
return dst - dst0;
- return clib_compress_u32_x64 (dst, src, mask[0] & pow2_mask (n_elts)) - dst0;
+ return clib_compress_u32_x64_masked (dst, src,
+ mask[0] & pow2_mask (n_elts)) -
+ dst0;
+}
+
+static_always_inline u16 *
+clib_compress_u16_x64 (u16 *dst, u16 *src, u64 mask)
+{
+#if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16)
+ u16x32u *sv = (u16x32u *) src;
+ for (int i = 0; i < 2; i++)
+ {
+ u16x32_compress_store (sv[i], mask, dst);
+ dst += _popcnt32 ((u32) mask);
+ mask >>= 32;
+ }
+#else
+ u32 i;
+ foreach_set_bit_index (i, mask)
+ dst++[0] = src[i];
+#endif
+ return dst;
+}
+
+static_always_inline u16 *
+clib_compress_u16_x64_masked (u16 *dst, u16 *src, u64 mask)
+{
+#if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16) && \
+ defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
+ u16x32u *sv = (u16x32u *) src;
+ for (int i = 0; i < 2; i++)
+ {
+ u16x32u s = u16x32_mask_load_zero (&sv[i], mask);
+ u16x32_compress_store (s, mask, dst);
+ dst += _popcnt32 ((u32) mask);
+ mask >>= 32;
+ }
+#else
+ u32 i;
+ foreach_set_bit_index (i, mask)
+ dst++[0] = src[i];
+#endif
+ return dst;
+}
+
+/** \brief Compress array of 16-bit elemments into destination array based on
+ * mask
+
+ @param dst destination array of u16 elements
+ @param src source array of u16 elements
+ @param mask array of u64 values representing compress mask
+ @param n_elts number of elements in the source array
+ @return number of elements stored in destionation array
+*/
+
+static_always_inline u32
+clib_compress_u16 (u16 *dst, u16 *src, u64 *mask, u32 n_elts)
+{
+ u16 *dst0 = dst;
+ while (n_elts >= 64)
+ {
+ if (mask[0] == ~0ULL)
+ {
+ clib_memcpy_fast (dst, src, 64 * sizeof (u16));
+ dst += 64;
+ }
+ else
+ dst = clib_compress_u16_x64 (dst, src, mask[0]);
+
+ mask++;
+ src += 64;
+ n_elts -= 64;
+ }
+
+ if (PREDICT_TRUE (n_elts == 0))
+ return dst - dst0;
+
+ return clib_compress_u16_x64_masked (dst, src,
+ mask[0] & pow2_mask (n_elts)) -
+ dst0;
+}
+
+static_always_inline u8 *
+clib_compress_u8_x64 (u8 *dst, u8 *src, u64 mask)
+{
+#if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16)
+ u8x64u *sv = (u8x64u *) src;
+ u8x64_compress_store (sv[0], mask, dst);
+ dst += _popcnt64 (mask);
+#else
+ u32 i;
+ foreach_set_bit_index (i, mask)
+ dst++[0] = src[i];
+#endif
+ return dst;
+}
+
+static_always_inline u8 *
+clib_compress_u8_x64_masked (u8 *dst, u8 *src, u64 mask)
+{
+#if defined(CLIB_HAVE_VEC512_COMPRESS_U8_U16) && \
+ defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
+ u8x64u *sv = (u8x64u *) src;
+ u8x64u s = u8x64_mask_load_zero (sv, mask);
+ u8x64_compress_store (s, mask, dst);
+ dst += _popcnt64 (mask);
+#else
+ u32 i;
+ foreach_set_bit_index (i, mask)
+ dst++[0] = src[i];
+#endif
+ return dst;
+}
+
+/** \brief Compress array of 8-bit elemments into destination array based on
+ * mask
+
+ @param dst destination array of u8 elements
+ @param src source array of u8 elements
+ @param mask array of u64 values representing compress mask
+ @param n_elts number of elements in the source array
+ @return number of elements stored in destionation array
+*/
+
+static_always_inline u32
+clib_compress_u8 (u8 *dst, u8 *src, u64 *mask, u32 n_elts)
+{
+ u8 *dst0 = dst;
+ while (n_elts >= 64)
+ {
+ if (mask[0] == ~0ULL)
+ {
+ clib_memcpy_fast (dst, src, 64);
+ dst += 64;
+ }
+ else
+ dst = clib_compress_u8_x64 (dst, src, mask[0]);
+
+ mask++;
+ src += 64;
+ n_elts -= 64;
+ }
+
+ if (PREDICT_TRUE (n_elts == 0))
+ return dst - dst0;
+
+ return clib_compress_u8_x64_masked (dst, src, mask[0] & pow2_mask (n_elts)) -
+ dst0;
}
#endif
diff --git a/src/vppinfra/vector/count_equal.h b/src/vppinfra/vector/count_equal.h
new file mode 100644
index 00000000000..ca2fbb7fd39
--- /dev/null
+++ b/src/vppinfra/vector/count_equal.h
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifndef included_vector_count_equal_h
+#define included_vector_count_equal_h
+#include <vppinfra/clib.h>
+
+static_always_inline uword
+clib_count_equal_u64 (u64 *data, uword max_count)
+{
+ uword count;
+ u64 first;
+
+ if (max_count <= 1)
+ return max_count;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
+
+#if defined(CLIB_HAVE_VEC256)
+ u64x4 splat = u64x4_splat (first);
+ while (count + 3 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ {
+ count += count_trailing_zeros (~bmp) / 8;
+ return count;
+ }
+
+ data += 4;
+ count += 4;
+ }
+#else
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
+ {
+ data += 4;
+ count += 4;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+
+static_always_inline uword
+clib_count_equal_u32 (u32 *data, uword max_count)
+{
+ uword count;
+ u32 first;
+
+ if (max_count <= 1)
+ return max_count;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
+
+#if defined(CLIB_HAVE_VEC512)
+ u32x16 splat = u32x16_splat (first);
+ while (count + 15 < max_count)
+ {
+ u32 bmp;
+ bmp = u32x16_is_equal_mask (u32x16_load_unaligned (data), splat);
+ if (bmp != pow2_mask (16))
+ return count + count_trailing_zeros (~bmp);
+
+ data += 16;
+ count += 16;
+ }
+ if (count == max_count)
+ return count;
+ else
+ {
+ u32 mask = pow2_mask (max_count - count);
+ u32 bmp =
+ u32x16_is_equal_mask (u32x16_mask_load_zero (data, mask), splat) &
+ mask;
+ return count + count_trailing_zeros (~bmp);
+ }
+#elif defined(CLIB_HAVE_VEC256)
+ u32x8 splat = u32x8_splat (first);
+ while (count + 7 < max_count)
+ {
+ u32 bmp;
+#ifdef __AVX512F__
+ bmp = u32x8_is_equal_mask (u32x8_load_unaligned (data), splat);
+ if (bmp != pow2_mask (8))
+ return count + count_trailing_zeros (~bmp);
+#else
+ bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ return count + count_trailing_zeros (~bmp) / 4;
+#endif
+
+ data += 8;
+ count += 8;
+ }
+ if (count == max_count)
+ return count;
+#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ else
+ {
+ u32 mask = pow2_mask (max_count - count);
+ u32 bmp =
+ u32x8_is_equal_mask (u32x8_mask_load_zero (data, mask), splat) & mask;
+ return count + count_trailing_zeros (~bmp);
+ }
+#endif
+#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
+ u32x4 splat = u32x4_splat (first);
+ while (count + 3 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat));
+ if (bmp != pow2_mask (4 * 4))
+ {
+ count += count_trailing_zeros (~bmp) / 4;
+ return count;
+ }
+
+ data += 4;
+ count += 4;
+ }
+#else
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
+ {
+ data += 4;
+ count += 4;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+
+static_always_inline uword
+clib_count_equal_u16 (u16 *data, uword max_count)
+{
+ uword count;
+ u16 first;
+
+ if (max_count <= 1)
+ return max_count;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
+
+#if defined(CLIB_HAVE_VEC256)
+ u16x16 splat = u16x16_splat (first);
+ while (count + 15 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ {
+ count += count_trailing_zeros (~bmp) / 2;
+ return count;
+ }
+
+ data += 16;
+ count += 16;
+ }
+#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
+ u16x8 splat = u16x8_splat (first);
+ while (count + 7 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat));
+ if (bmp != 0xffff)
+ {
+ count += count_trailing_zeros (~bmp) / 2;
+ return count;
+ }
+
+ data += 8;
+ count += 8;
+ }
+#else
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
+ {
+ data += 4;
+ count += 4;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+
+static_always_inline uword
+clib_count_equal_u8 (u8 *data, uword max_count)
+{
+ uword count;
+ u8 first;
+
+ if (max_count <= 1)
+ return max_count;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
+
+#if defined(CLIB_HAVE_VEC512)
+ u8x64 splat = u8x64_splat (first);
+ while (count + 63 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x64_is_equal_mask (u8x64_load_unaligned (data), splat);
+ if (bmp != -1)
+ return count + count_trailing_zeros (~bmp);
+
+ data += 64;
+ count += 64;
+ }
+ if (count == max_count)
+ return count;
+#if defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
+ else
+ {
+ u64 mask = pow2_mask (max_count - count);
+ u64 bmp =
+ u8x64_is_equal_mask (u8x64_mask_load_zero (data, mask), splat) & mask;
+ return count + count_trailing_zeros (~bmp);
+ }
+#endif
+#elif defined(CLIB_HAVE_VEC256)
+ u8x32 splat = u8x32_splat (first);
+ while (count + 31 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ return count + count_trailing_zeros (~bmp);
+
+ data += 32;
+ count += 32;
+ }
+ if (count == max_count)
+ return count;
+#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ else
+ {
+ u32 mask = pow2_mask (max_count - count);
+ u64 bmp =
+ u8x32_msb_mask (u8x32_mask_load_zero (data, mask) == splat) & mask;
+ return count + count_trailing_zeros (~bmp);
+ }
+#endif
+#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
+ u8x16 splat = u8x16_splat (first);
+ while (count + 15 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat));
+ if (bmp != 0xffff)
+ return count + count_trailing_zeros (~bmp);
+
+ data += 16;
+ count += 16;
+ }
+#else
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
+ {
+ data += 4;
+ count += 4;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+
+#endif
diff --git a/src/vppinfra/vector/index_to_ptr.h b/src/vppinfra/vector/index_to_ptr.h
new file mode 100644
index 00000000000..3985b757d54
--- /dev/null
+++ b/src/vppinfra/vector/index_to_ptr.h
@@ -0,0 +1,257 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifndef included_vector_index_to_ptr_h
+#define included_vector_index_to_ptr_h
+#include <vppinfra/clib.h>
+
+#ifdef CLIB_HAVE_VEC128
+static_always_inline void
+clib_index_to_ptr_u32x4 (u32 *indices, void **ptrs, i32 i, u64x2 ov, u8 shift)
+{
+ u32x4 iv4 = u32x4_load_unaligned (indices + i);
+ u64x2 pv2;
+ pv2 = u64x2_from_u32x4 (iv4);
+ u64x2_store_unaligned ((pv2 << shift) + ov, ptrs + i);
+#ifdef __aarch64__
+ pv2 = u64x2_from_u32x4_high (iv4);
+#else
+ pv2 = u64x2_from_u32x4 ((u32x4) u8x16_word_shift_right (iv4, 8));
+#endif
+ u64x2_store_unaligned ((pv2 << shift) + ov, ptrs + i + 2);
+}
+#endif
+
+/** \brief Convert array of indices to pointers with base and shift
+
+ @param indices source array of u32 indices
+ @param base base pointer
+ @param shift numbers of bits to be shifted
+ @param ptrs destinatin array of pointers
+ @param n_elts number of elements in the source array
+*/
+
+static_always_inline void
+clib_index_to_ptr_u32 (u32 *indices, void *base, u8 shift, void **ptrs,
+ u32 n_elts)
+{
+#if defined CLIB_HAVE_VEC512
+ if (n_elts >= 8)
+ {
+ u64x8 off = u64x8_splat ((u64) base);
+ u64x8 b0, b1, b2, b3, b4, b5, b6, b7;
+
+ while (n_elts >= 64)
+ {
+ b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
+ b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
+ b2 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 16));
+ b3 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 24));
+ b4 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 32));
+ b5 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 40));
+ b6 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 48));
+ b7 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 56));
+ u64x8_store_unaligned ((b0 << shift) + off, ptrs);
+ u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
+ u64x8_store_unaligned ((b2 << shift) + off, ptrs + 16);
+ u64x8_store_unaligned ((b3 << shift) + off, ptrs + 24);
+ u64x8_store_unaligned ((b4 << shift) + off, ptrs + 32);
+ u64x8_store_unaligned ((b5 << shift) + off, ptrs + 40);
+ u64x8_store_unaligned ((b6 << shift) + off, ptrs + 48);
+ u64x8_store_unaligned ((b7 << shift) + off, ptrs + 56);
+ ptrs += 64;
+ indices += 64;
+ n_elts -= 64;
+ }
+
+ if (n_elts == 0)
+ return;
+
+ if (n_elts >= 32)
+ {
+ b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
+ b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
+ b2 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 16));
+ b3 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 24));
+ u64x8_store_unaligned ((b0 << shift) + off, ptrs);
+ u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
+ u64x8_store_unaligned ((b2 << shift) + off, ptrs + 16);
+ u64x8_store_unaligned ((b3 << shift) + off, ptrs + 24);
+ ptrs += 32;
+ indices += 32;
+ n_elts -= 32;
+ }
+ if (n_elts >= 16)
+ {
+ b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
+ b1 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + 8));
+ u64x8_store_unaligned ((b0 << shift) + off, ptrs);
+ u64x8_store_unaligned ((b1 << shift) + off, ptrs + 8);
+ ptrs += 16;
+ indices += 16;
+ n_elts -= 16;
+ }
+ if (n_elts >= 8)
+ {
+ b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices));
+ u64x8_store_unaligned ((b0 << shift) + off, ptrs);
+ ptrs += 8;
+ indices += 8;
+ n_elts -= 8;
+ }
+
+ if (n_elts == 0)
+ return;
+
+ b0 = u64x8_from_u32x8 (u32x8_load_unaligned (indices + n_elts - 8));
+ u64x8_store_unaligned ((b0 << shift) + off, ptrs + n_elts - 8);
+ }
+ else
+ {
+ u32 mask = pow2_mask (n_elts);
+ u64x8 r = u64x8_from_u32x8 (u32x8_mask_load_zero (indices, mask));
+ u64x8_mask_store ((r << shift) + u64x8_splat ((u64) base), ptrs, mask);
+ return;
+ }
+#elif defined CLIB_HAVE_VEC256
+ if (n_elts >= 4)
+ {
+ u64x4 off = u64x4_splat ((u64) base);
+ u64x4 b0, b1, b2, b3, b4, b5, b6, b7;
+
+ while (n_elts >= 32)
+ {
+ b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
+ b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
+ b2 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 8));
+ b3 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 12));
+ b4 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 16));
+ b5 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 20));
+ b6 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 24));
+ b7 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 28));
+ u64x4_store_unaligned ((b0 << shift) + off, ptrs);
+ u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
+ u64x4_store_unaligned ((b2 << shift) + off, ptrs + 8);
+ u64x4_store_unaligned ((b3 << shift) + off, ptrs + 12);
+ u64x4_store_unaligned ((b4 << shift) + off, ptrs + 16);
+ u64x4_store_unaligned ((b5 << shift) + off, ptrs + 20);
+ u64x4_store_unaligned ((b6 << shift) + off, ptrs + 24);
+ u64x4_store_unaligned ((b7 << shift) + off, ptrs + 28);
+ ptrs += 32;
+ indices += 32;
+ n_elts -= 32;
+ }
+
+ if (n_elts == 0)
+ return;
+
+ if (n_elts >= 16)
+ {
+ b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
+ b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
+ b2 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 8));
+ b3 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 12));
+ u64x4_store_unaligned ((b0 << shift) + off, ptrs);
+ u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
+ u64x4_store_unaligned ((b2 << shift) + off, ptrs + 8);
+ u64x4_store_unaligned ((b3 << shift) + off, ptrs + 12);
+ ptrs += 16;
+ indices += 16;
+ n_elts -= 16;
+ }
+ if (n_elts >= 8)
+ {
+ b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
+ b1 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + 4));
+ u64x4_store_unaligned ((b0 << shift) + off, ptrs);
+ u64x4_store_unaligned ((b1 << shift) + off, ptrs + 4);
+ ptrs += 8;
+ indices += 8;
+ n_elts -= 8;
+ }
+ if (n_elts > 4)
+ {
+ b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices));
+ u64x4_store_unaligned ((b0 << shift) + off, ptrs);
+ ptrs += 4;
+ indices += 4;
+ n_elts -= 4;
+ }
+
+ b0 = u64x4_from_u32x4 (u32x4_load_unaligned (indices + n_elts - 4));
+ u64x4_store_unaligned ((b0 << shift) + off, ptrs + n_elts - 4);
+ return;
+ }
+#ifdef CLIB_HAVE_VEC256_MASK_LOAD_STORE
+ else
+ {
+ u32 mask = pow2_mask (n_elts);
+ u64x4 r = u64x4_from_u32x4 (u32x4_mask_load_zero (indices, mask));
+ u64x4_mask_store ((r << shift) + u64x4_splat ((u64) base), ptrs, mask);
+ return;
+ }
+#endif
+#elif defined(CLIB_HAVE_VEC128)
+ if (n_elts >= 4)
+ {
+ u64x2 ov = u64x2_splat ((u64) base);
+ u32 *i = (u32 *) indices;
+ void **p = (void **) ptrs;
+ u32 n = n_elts;
+
+ while (n >= 32)
+ {
+ clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 8, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 12, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 16, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 20, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 24, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 28, ov, shift);
+ indices += 32;
+ ptrs += 32;
+ n -= 32;
+ }
+
+ if (n == 0)
+ return;
+
+ if (n >= 16)
+ {
+ clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 8, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 12, ov, shift);
+ indices += 16;
+ ptrs += 16;
+ n -= 16;
+ }
+
+ if (n >= 8)
+ {
+ clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
+ clib_index_to_ptr_u32x4 (indices, ptrs, 4, ov, shift);
+ indices += 8;
+ ptrs += 8;
+ n -= 8;
+ }
+
+ if (n > 4)
+ clib_index_to_ptr_u32x4 (indices, ptrs, 0, ov, shift);
+
+ clib_index_to_ptr_u32x4 (i, p, n_elts - 4, ov, shift);
+ return;
+ }
+#endif
+ while (n_elts)
+ {
+ ptrs[0] = base + ((u64) indices[0] << shift);
+ ptrs += 1;
+ indices += 1;
+ n_elts -= 1;
+ }
+}
+
+#endif
diff --git a/src/vppinfra/vector/ip_csum.h b/src/vppinfra/vector/ip_csum.h
new file mode 100644
index 00000000000..2cea9b448ea
--- /dev/null
+++ b/src/vppinfra/vector/ip_csum.h
@@ -0,0 +1,339 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifndef included_vector_ip_csum_h
+#define included_vector_ip_csum_h
+#include <vppinfra/clib.h>
+typedef struct
+{
+ u64 sum;
+ u8 odd;
+} clib_ip_csum_t;
+
+#if defined(CLIB_HAVE_VEC128)
+static_always_inline u64x2
+clib_ip_csum_cvt_and_add_4 (u32x4 v)
+{
+ return ((u64x2) u32x4_interleave_lo ((u32x4) v, u32x4_zero ()) +
+ (u64x2) u32x4_interleave_hi ((u32x4) v, u32x4_zero ()));
+}
+static_always_inline u64
+clib_ip_csum_hadd_2 (u64x2 v)
+{
+ return v[0] + v[1];
+}
+#endif
+
+#if defined(CLIB_HAVE_VEC256)
+static_always_inline u64x4
+clib_ip_csum_cvt_and_add_8 (u32x8 v)
+{
+ return ((u64x4) u32x8_interleave_lo ((u32x8) v, u32x8_zero ()) +
+ (u64x4) u32x8_interleave_hi ((u32x8) v, u32x8_zero ()));
+}
+static_always_inline u64
+clib_ip_csum_hadd_4 (u64x4 v)
+{
+ return clib_ip_csum_hadd_2 (u64x4_extract_lo (v) + u64x4_extract_hi (v));
+}
+#endif
+
+#if defined(CLIB_HAVE_VEC512)
+static_always_inline u64x8
+clib_ip_csum_cvt_and_add_16 (u32x16 v)
+{
+ return ((u64x8) u32x16_interleave_lo ((u32x16) v, u32x16_zero ()) +
+ (u64x8) u32x16_interleave_hi ((u32x16) v, u32x16_zero ()));
+}
+static_always_inline u64
+clib_ip_csum_hadd_8 (u64x8 v)
+{
+ return clib_ip_csum_hadd_4 (u64x8_extract_lo (v) + u64x8_extract_hi (v));
+}
+#endif
+
+static_always_inline void
+clib_ip_csum_inline (clib_ip_csum_t *c, u8 *dst, u8 *src, u16 count,
+ int is_copy)
+{
+ if (c->odd)
+ {
+ c->odd = 0;
+ c->sum += (u16) src[0] << 8;
+ count--;
+ src++;
+ if (is_copy)
+ dst++[0] = src[0];
+ }
+
+#if defined(CLIB_HAVE_VEC512)
+ u64x8 sum8 = {};
+
+ while (count >= 512)
+ {
+ u32x16u *s = (u32x16u *) src;
+ sum8 += clib_ip_csum_cvt_and_add_16 (s[0]);
+ sum8 += clib_ip_csum_cvt_and_add_16 (s[1]);
+ sum8 += clib_ip_csum_cvt_and_add_16 (s[2]);
+ sum8 += clib_ip_csum_cvt_and_add_16 (s[3]);
+ sum8 += clib_ip_csum_cvt_and_add_16 (s[8]);
+ sum8 += clib_ip_csum_cvt_and_add_16 (s[5]);
+ sum8 += clib_ip_csum_cvt_and_add_16 (s[6]);
+ sum8 += clib_ip_csum_cvt_and_add_16 (s[7]);
+ count -= 512;
+ src += 512;
+ if (is_copy)
+ {
+ u32x16u *d = (u32x16u *) dst;
+ d[0] = s[0];
+ d[1] = s[1];
+ d[2] = s[2];
+ d[3] = s[3];
+ d[4] = s[4];
+ d[5] = s[5];
+ d[6] = s[6];
+ d[7] = s[7];
+ dst += 512;
+ }
+ }
+
+ while (count >= 64)
+ {
+ u32x16u *s = (u32x16u *) src;
+ sum8 += clib_ip_csum_cvt_and_add_16 (s[0]);
+ count -= 64;
+ src += 64;
+ if (is_copy)
+ {
+ u32x16u *d = (u32x16u *) dst;
+ d[0] = s[0];
+ dst += 512;
+ }
+ }
+
+#ifdef CLIB_HAVE_VEC256_MASK_LOAD_STORE
+ if (count)
+ {
+ u64 mask = pow2_mask (count);
+ u32x16 v = (u32x16) u8x64_mask_load_zero (src, mask);
+ sum8 += clib_ip_csum_cvt_and_add_16 (v);
+ c->odd = count & 1;
+ if (is_copy)
+ u32x16_mask_store (v, dst, mask);
+ }
+ c->sum += clib_ip_csum_hadd_8 (sum8);
+ return;
+#endif
+
+ c->sum += clib_ip_csum_hadd_8 (sum8);
+#elif defined(CLIB_HAVE_VEC256)
+ u64x4 sum4 = {};
+
+ while (count >= 256)
+ {
+ u32x8u *s = (u32x8u *) src;
+ sum4 += clib_ip_csum_cvt_and_add_8 (s[0]);
+ sum4 += clib_ip_csum_cvt_and_add_8 (s[1]);
+ sum4 += clib_ip_csum_cvt_and_add_8 (s[2]);
+ sum4 += clib_ip_csum_cvt_and_add_8 (s[3]);
+ sum4 += clib_ip_csum_cvt_and_add_8 (s[4]);
+ sum4 += clib_ip_csum_cvt_and_add_8 (s[5]);
+ sum4 += clib_ip_csum_cvt_and_add_8 (s[6]);
+ sum4 += clib_ip_csum_cvt_and_add_8 (s[7]);
+ count -= 256;
+ src += 256;
+ if (is_copy)
+ {
+ u32x8u *d = (u32x8u *) dst;
+ d[0] = s[0];
+ d[1] = s[1];
+ d[2] = s[2];
+ d[3] = s[3];
+ d[4] = s[4];
+ d[5] = s[5];
+ d[6] = s[6];
+ d[7] = s[7];
+ dst += 256;
+ }
+ }
+
+ while (count >= 32)
+ {
+ u32x8u *s = (u32x8u *) src;
+ sum4 += clib_ip_csum_cvt_and_add_8 (s[0]);
+ count -= 32;
+ src += 32;
+ if (is_copy)
+ {
+ u32x8u *d = (u32x8u *) dst;
+ d[0] = s[0];
+ dst += 32;
+ }
+ }
+
+#ifdef CLIB_HAVE_VEC256_MASK_LOAD_STORE
+ if (count)
+ {
+ u32 mask = pow2_mask (count);
+ u32x8 v = (u32x8) u8x32_mask_load_zero (src, mask);
+ sum4 += clib_ip_csum_cvt_and_add_8 (v);
+ c->odd = count & 1;
+ if (is_copy)
+ u32x8_mask_store (v, dst, mask);
+ }
+ c->sum += clib_ip_csum_hadd_4 (sum4);
+ return;
+#endif
+
+ c->sum += clib_ip_csum_hadd_4 (sum4);
+#elif defined(CLIB_HAVE_VEC128)
+ u64x2 sum2 = {};
+
+ while (count >= 128)
+ {
+ u32x4u *s = (u32x4u *) src;
+ sum2 += clib_ip_csum_cvt_and_add_4 (s[0]);
+ sum2 += clib_ip_csum_cvt_and_add_4 (s[1]);
+ sum2 += clib_ip_csum_cvt_and_add_4 (s[2]);
+ sum2 += clib_ip_csum_cvt_and_add_4 (s[3]);
+ sum2 += clib_ip_csum_cvt_and_add_4 (s[4]);
+ sum2 += clib_ip_csum_cvt_and_add_4 (s[5]);
+ sum2 += clib_ip_csum_cvt_and_add_4 (s[6]);
+ sum2 += clib_ip_csum_cvt_and_add_4 (s[7]);
+ count -= 128;
+ src += 128;
+ if (is_copy)
+ {
+ u32x4u *d = (u32x4u *) dst;
+ d[0] = s[0];
+ d[1] = s[1];
+ d[2] = s[2];
+ d[3] = s[3];
+ d[4] = s[4];
+ d[5] = s[5];
+ d[6] = s[6];
+ d[7] = s[7];
+ dst += 128;
+ }
+ }
+
+ while (count >= 16)
+ {
+ u32x4u *s = (u32x4u *) src;
+ sum2 += clib_ip_csum_cvt_and_add_4 (s[0]);
+ count -= 16;
+ src += 16;
+ if (is_copy)
+ {
+ u32x4u *d = (u32x4u *) dst;
+ d[0] = s[0];
+ dst += 16;
+ }
+ }
+ c->sum += clib_ip_csum_hadd_2 (sum2);
+#else
+ while (count >= 4)
+ {
+ u32 v = *((u32 *) src);
+ c->sum += v;
+ count -= 4;
+ src += 4;
+ if (is_copy)
+ {
+ *(u32 *) dst = v;
+ dst += 4;
+ }
+ }
+#endif
+ while (count >= 2)
+ {
+ u16 v = *((u16 *) src);
+ c->sum += v;
+ count -= 2;
+ src += 2;
+ if (is_copy)
+ {
+ *(u16 *) dst = v;
+ dst += 2;
+ }
+ }
+
+ if (count)
+ {
+ c->odd = 1;
+ c->sum += (u16) src[0];
+ if (is_copy)
+ dst[0] = src[0];
+ }
+}
+
+static_always_inline u16
+clib_ip_csum_fold (clib_ip_csum_t *c)
+{
+ u64 sum = c->sum;
+#if defined(__x86_64__) && defined(__BMI2__)
+ u64 tmp = sum;
+ asm volatile(
+ /* using ADC is much faster than mov, shift, add sequence
+ * compiler produces */
+ "shr $32, %[sum] \n\t"
+ "add %k[tmp], %k[sum] \n\t"
+ "mov $16, %k[tmp] \n\t"
+ "shrx %k[tmp], %k[sum], %k[tmp] \n\t"
+ "adc %w[tmp], %w[sum] \n\t"
+ "adc $0, %w[sum] \n\t"
+ : [ sum ] "+&r"(sum), [ tmp ] "+&r"(tmp));
+#else
+ sum = ((u32) sum) + (sum >> 32);
+ sum = ((u16) sum) + (sum >> 16);
+ sum = ((u16) sum) + (sum >> 16);
+#endif
+ return (~((u16) sum));
+}
+
+static_always_inline void
+clib_ip_csum_chunk (clib_ip_csum_t *c, u8 *src, u16 count)
+{
+ return clib_ip_csum_inline (c, 0, src, count, 0);
+}
+
+static_always_inline void
+clib_ip_csum_and_copy_chunk (clib_ip_csum_t *c, u8 *src, u8 *dst, u16 count)
+{
+ return clib_ip_csum_inline (c, dst, src, count, 1);
+}
+
+static_always_inline u16
+clib_ip_csum (u8 *src, u16 count)
+{
+ clib_ip_csum_t c = {};
+ if (COMPILE_TIME_CONST (count) && count == 12)
+ {
+ for (int i = 0; i < 3; i++)
+ c.sum += ((u32 *) src)[i];
+ }
+ else if (COMPILE_TIME_CONST (count) && count == 20)
+ {
+ for (int i = 0; i < 5; i++)
+ c.sum += ((u32 *) src)[i];
+ }
+ else if (COMPILE_TIME_CONST (count) && count == 40)
+ {
+ for (int i = 0; i < 10; i++)
+ c.sum += ((u32 *) src)[i];
+ }
+ else
+ clib_ip_csum_inline (&c, 0, src, count, 0);
+ return clib_ip_csum_fold (&c);
+}
+
+static_always_inline u16
+clib_ip_csum_and_copy (u8 *dst, u8 *src, u16 count)
+{
+ clib_ip_csum_t c = {};
+ clib_ip_csum_inline (&c, dst, src, count, 1);
+ return clib_ip_csum_fold (&c);
+}
+
+#endif
diff --git a/src/vppinfra/vector/mask_compare.h b/src/vppinfra/vector/mask_compare.h
index cac48a31f47..fc72d7dac35 100644
--- a/src/vppinfra/vector/mask_compare.h
+++ b/src/vppinfra/vector/mask_compare.h
@@ -8,7 +8,7 @@
#include <vppinfra/memcpy.h>
static_always_inline u64
-clib_mask_compare_u16_x64 (u16 v, u16 *a, u32 n_elts)
+clib_mask_compare_u16_x64 (u16 v, u16 *a)
{
u64 mask = 0;
#if defined(CLIB_HAVE_VEC512)
@@ -47,6 +47,38 @@ clib_mask_compare_u16_x64 (u16 v, u16 *a, u32 n_elts)
(u64) i8x16_msb_mask (i8x16_pack (v8 == av[4], v8 == av[5])) << 32 |
(u64) i8x16_msb_mask (i8x16_pack (v8 == av[6], v8 == av[7])) << 48);
#else
+ for (int i = 0; i < 64; i++)
+ if (a[i] == v)
+ mask |= 1ULL << i;
+#endif
+ return mask;
+}
+
+static_always_inline u64
+clib_mask_compare_u16_x64_n (u16 v, u16 *a, u32 n_elts)
+{
+ u64 mask = 0;
+ CLIB_UNUSED (u64 data_mask) = pow2_mask (n_elts);
+#if defined(CLIB_HAVE_VEC512)
+ u16x32 v32 = u16x32_splat (v);
+ u16x32u *av = (u16x32u *) a;
+ mask = ((u64) u16x32_is_equal_mask (
+ u16x32_mask_load_zero (&av[0], data_mask), v32) |
+ (u64) u16x32_is_equal_mask (
+ u16x32_mask_load_zero (&av[1], data_mask >> 32), v32)
+ << 32);
+#elif defined(CLIB_HAVE_VEC256) && defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u16x16 v16 = u16x16_splat (v);
+ u16x16u *av = (u16x16u *) a;
+ i8x32 x;
+
+ x = i8x32_pack (v16 == u16x16_mask_load_zero (&av[0], data_mask),
+ v16 == u16x16_mask_load_zero (&av[1], data_mask >> 16));
+ mask = i8x32_msb_mask ((i8x32) u64x4_permute (x, 0, 2, 1, 3));
+ x = i8x32_pack (v16 == u16x16_mask_load_zero (&av[2], data_mask >> 32),
+ v16 == u16x16_mask_load_zero (&av[3], data_mask >> 48));
+ mask |= (u64) i8x32_msb_mask ((i8x32) u64x4_permute (x, 0, 2, 1, 3)) << 32;
+#else
for (int i = 0; i < n_elts; i++)
if (a[i] == v)
mask |= 1ULL << i;
@@ -68,7 +100,7 @@ clib_mask_compare_u16 (u16 v, u16 *a, u64 *mask, u32 n_elts)
{
while (n_elts >= 64)
{
- mask++[0] = clib_mask_compare_u16_x64 (v, a, 64);
+ mask++[0] = clib_mask_compare_u16_x64 (v, a);
n_elts -= 64;
a += 64;
}
@@ -76,11 +108,11 @@ clib_mask_compare_u16 (u16 v, u16 *a, u64 *mask, u32 n_elts)
if (PREDICT_TRUE (n_elts == 0))
return;
- mask[0] = clib_mask_compare_u16_x64 (v, a, n_elts) & pow2_mask (n_elts);
+ mask[0] = clib_mask_compare_u16_x64_n (v, a, n_elts) & pow2_mask (n_elts);
}
static_always_inline u64
-clib_mask_compare_u32_x64 (u32 v, u32 *a, u32 n_elts)
+clib_mask_compare_u32_x64 (u32 v, u32 *a)
{
u64 mask = 0;
#if defined(CLIB_HAVE_VEC512)
@@ -131,6 +163,57 @@ clib_mask_compare_u32_x64 (u32 v, u32 *a, u32 n_elts)
}
#else
+ for (int i = 0; i < 64; i++)
+ if (a[i] == v)
+ mask |= 1ULL << i;
+#endif
+ return mask;
+}
+
+static_always_inline u64
+clib_mask_compare_u32_x64_n (u32 v, u32 *a, u32 n_elts)
+{
+ u64 mask = 0;
+ CLIB_UNUSED (u64 data_mask) = pow2_mask (n_elts);
+#if defined(CLIB_HAVE_VEC512)
+ u32x16 v16 = u32x16_splat (v);
+ u32x16u *av = (u32x16u *) a;
+ mask = ((u64) u32x16_is_equal_mask (
+ u32x16_mask_load_zero (&av[0], data_mask), v16) |
+ (u64) u32x16_is_equal_mask (
+ u32x16_mask_load_zero (&av[1], data_mask >> 16), v16)
+ << 16 |
+ (u64) u32x16_is_equal_mask (
+ u32x16_mask_load_zero (&av[2], data_mask >> 32), v16)
+ << 32 |
+ (u64) u32x16_is_equal_mask (
+ u32x16_mask_load_zero (&av[3], data_mask >> 48), v16)
+ << 48);
+#elif defined(CLIB_HAVE_VEC256) && defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u32x8 v8 = u32x8_splat (v);
+ u32x8u *av = (u32x8u *) a;
+ u32x8 m = { 0, 4, 1, 5, 2, 6, 3, 7 };
+ i8x32 c;
+
+ c = i8x32_pack (
+ i16x16_pack (
+ (i32x8) (v8 == u32x8_mask_load_zero (&av[0], data_mask)),
+ (i32x8) (v8 == u32x8_mask_load_zero (&av[1], data_mask >> 8))),
+ i16x16_pack (
+ (i32x8) (v8 == u32x8_mask_load_zero (&av[2], data_mask >> 16)),
+ (i32x8) (v8 == u32x8_mask_load_zero (&av[3], data_mask >> 24))));
+ mask = i8x32_msb_mask ((i8x32) u32x8_permute ((u32x8) c, m));
+
+ c = i8x32_pack (
+ i16x16_pack (
+ (i32x8) (v8 == u32x8_mask_load_zero (&av[4], data_mask >> 32)),
+ (i32x8) (v8 == u32x8_mask_load_zero (&av[5], data_mask >> 40))),
+ i16x16_pack (
+ (i32x8) (v8 == u32x8_mask_load_zero (&av[6], data_mask >> 48)),
+ (i32x8) (v8 == u32x8_mask_load_zero (&av[7], data_mask >> 56))));
+ mask |= (u64) i8x32_msb_mask ((i8x32) u32x8_permute ((u32x8) c, m)) << 32;
+ mask |= (u64) i8x32_msb_mask ((i8x32) u32x8_permute ((u32x8) c, m)) << 32;
+#else
for (int i = 0; i < n_elts; i++)
if (a[i] == v)
mask |= 1ULL << i;
@@ -152,7 +235,119 @@ clib_mask_compare_u32 (u32 v, u32 *a, u64 *bitmap, u32 n_elts)
{
while (n_elts >= 64)
{
- bitmap++[0] = clib_mask_compare_u32_x64 (v, a, 64);
+ bitmap++[0] = clib_mask_compare_u32_x64 (v, a);
+ n_elts -= 64;
+ a += 64;
+ }
+
+ if (PREDICT_TRUE (n_elts == 0))
+ return;
+
+ bitmap[0] = clib_mask_compare_u32_x64_n (v, a, n_elts) & pow2_mask (n_elts);
+}
+
+static_always_inline u64
+clib_mask_compare_u64_x64 (u64 v, u64 *a)
+{
+ u64 mask = 0;
+#if defined(CLIB_HAVE_VEC512)
+ u64x8 v8 = u64x8_splat (v);
+ u64x8u *av = (u64x8u *) a;
+ mask = ((u64) u64x8_is_equal_mask (av[0], v8) |
+ (u64) u64x8_is_equal_mask (av[1], v8) << 8 |
+ (u64) u64x8_is_equal_mask (av[2], v8) << 16 |
+ (u64) u64x8_is_equal_mask (av[3], v8) << 24 |
+ (u64) u64x8_is_equal_mask (av[4], v8) << 32 |
+ (u64) u64x8_is_equal_mask (av[5], v8) << 40 |
+ (u64) u64x8_is_equal_mask (av[6], v8) << 48 |
+ (u64) u64x8_is_equal_mask (av[7], v8) << 56);
+
+#elif defined(CLIB_HAVE_VEC256) && defined(__BMI2__)
+ u64x4 v4 = u64x4_splat (v);
+ u64x4u *av = (u64x4u *) a;
+
+ for (int i = 0; i < 16; i += 2)
+ {
+ u64 l = u8x32_msb_mask (v4 == av[i]);
+ u64 h = u8x32_msb_mask (v4 == av[i + 1]);
+ mask |= _pext_u64 (l | h << 32, 0x0101010101010101) << (i * 4);
+ }
+#else
+ for (int i = 0; i < 64; i++)
+ if (a[i] == v)
+ mask |= 1ULL << i;
+#endif
+ return mask;
+}
+
+static_always_inline u64
+clib_mask_compare_u64_x64_n (u64 v, u64 *a, u32 n_elts)
+{
+ u64 mask = 0;
+ CLIB_UNUSED (u64 data_mask) = pow2_mask (n_elts);
+#if defined(CLIB_HAVE_VEC512)
+ u64x8 v8 = u64x8_splat (v);
+ u64x8u *av = (u64x8u *) a;
+ mask =
+ ((u64) u64x8_is_equal_mask (u64x8_mask_load_zero (&av[0], data_mask), v8) |
+ (u64) u64x8_is_equal_mask (u64x8_mask_load_zero (&av[1], data_mask >> 8),
+ v8)
+ << 8 |
+ (u64) u64x8_is_equal_mask (u64x8_mask_load_zero (&av[2], data_mask >> 16),
+ v8)
+ << 16 |
+ (u64) u64x8_is_equal_mask (u64x8_mask_load_zero (&av[3], data_mask >> 24),
+ v8)
+ << 24 |
+ (u64) u64x8_is_equal_mask (u64x8_mask_load_zero (&av[4], data_mask >> 32),
+ v8)
+ << 32 |
+ (u64) u64x8_is_equal_mask (u64x8_mask_load_zero (&av[5], data_mask >> 40),
+ v8)
+ << 40 |
+ (u64) u64x8_is_equal_mask (u64x8_mask_load_zero (&av[6], data_mask >> 48),
+ v8)
+ << 48 |
+ (u64) u64x8_is_equal_mask (u64x8_mask_load_zero (&av[7], data_mask >> 56),
+ v8)
+ << 56);
+
+#elif defined(CLIB_HAVE_VEC256) && defined(__BMI2__) && \
+ defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u64x4 v4 = u64x4_splat (v);
+ u64x4u *av = (u64x4u *) a;
+
+ for (int i = 0; i < 16; i += 2)
+ {
+ u64 l = u8x32_msb_mask (v4 == u64x4_mask_load_zero (&av[i], data_mask));
+ u64 h = u8x32_msb_mask (
+ v4 == u64x4_mask_load_zero (&av[i + 1], data_mask >> 4));
+ mask |= _pext_u64 (l | h << 32, 0x0101010101010101) << (i * 4);
+ data_mask >>= 8;
+ }
+#else
+ for (int i = 0; i < n_elts; i++)
+ if (a[i] == v)
+ mask |= 1ULL << i;
+#endif
+ return mask;
+}
+
+/** \brief Compare 64-bit elemments with provied value and return bitmap
+
+ @param v value to compare elements with
+ @param a array of u64 elements
+ @param mask array of u64 where reuslting mask will be stored
+ @param n_elts number of elements in the array
+ @return none
+*/
+
+static_always_inline void
+clib_mask_compare_u64 (u64 v, u64 *a, u64 *bitmap, u32 n_elts)
+{
+ while (n_elts >= 64)
+ {
+ bitmap++[0] = clib_mask_compare_u64_x64 (v, a);
n_elts -= 64;
a += 64;
}
@@ -160,7 +355,7 @@ clib_mask_compare_u32 (u32 v, u32 *a, u64 *bitmap, u32 n_elts)
if (PREDICT_TRUE (n_elts == 0))
return;
- bitmap[0] = clib_mask_compare_u32_x64 (v, a, n_elts) & pow2_mask (n_elts);
+ bitmap[0] = clib_mask_compare_u64_x64_n (v, a, n_elts) & pow2_mask (n_elts);
}
#endif
diff --git a/src/vppinfra/vector/test/compress.c b/src/vppinfra/vector/test/compress.c
deleted file mode 100644
index 7e3eba9892d..00000000000
--- a/src/vppinfra/vector/test/compress.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright(c) 2021 Cisco Systems, Inc.
- */
-
-#include <vppinfra/format.h>
-#include <vppinfra/vector/test/test.h>
-#include <vppinfra/vector/compress.h>
-
-__clib_test_fn u32
-clib_compress_u32_wrapper (u32 *dst, u32 *src, u64 *mask, u32 n_elts)
-{
- return clib_compress_u32 (dst, src, mask, n_elts);
-}
-
-typedef struct
-{
- u64 mask[10];
- u32 n_elts;
-} compress_test_t;
-
-static compress_test_t tests[] = {
- { .mask = { 1 }, .n_elts = 1 },
- { .mask = { 2 }, .n_elts = 2 },
- { .mask = { 3 }, .n_elts = 2 },
- { .mask = { 0, 1 }, .n_elts = 66 },
- { .mask = { 0, 2 }, .n_elts = 69 },
- { .mask = { 0, 3 }, .n_elts = 66 },
- { .mask = { ~0ULL, ~0ULL, ~0ULL, ~0ULL }, .n_elts = 62 },
- { .mask = { ~0ULL, ~0ULL, ~0ULL, ~0ULL }, .n_elts = 255 },
- { .mask = { ~0ULL, 1, 1, ~0ULL }, .n_elts = 256 },
-};
-
-static clib_error_t *
-test_clib_compress_u32 (clib_error_t *err)
-{
- u32 src[513];
- u32 dst[513];
- u32 i, j;
-
- for (i = 0; i < ARRAY_LEN (src); i++)
- src[i] = i;
-
- for (i = 0; i < ARRAY_LEN (tests); i++)
- {
- compress_test_t *t = tests + i;
- u32 *dp = dst;
- u32 r;
-
- for (j = 0; j < ARRAY_LEN (dst); j++)
- dst[j] = 0xa5a5a5a5;
-
- r = clib_compress_u32_wrapper (dst, src, t->mask, t->n_elts);
-
- for (j = 0; j < t->n_elts; j++)
- {
- if ((t->mask[j >> 6] & (1ULL << (j & 0x3f))) == 0)
- continue;
-
- if (dp[0] != src[j])
- return clib_error_return (err,
- "wrong data in testcase %u at "
- "(dst[%u] = 0x%x, src[%u] = 0x%x)",
- i, dp - dst, dp[0], j, src[j]);
- dp++;
- }
-
- if (dst[dp - dst + 1] != 0xa5a5a5a5)
- return clib_error_return (err, "buffer overrun in testcase %u", i);
-
- if (dp - dst != r)
- return clib_error_return (err, "wrong number of elts in testcase %u",
- i);
- }
-
- return err;
-}
-
-REGISTER_TEST (clib_compress_u32) = {
- .name = "clib_compress_u32",
- .fn = test_clib_compress_u32,
-};
diff --git a/src/vppinfra/vector/test/test.c b/src/vppinfra/vector/test/test.c
deleted file mode 100644
index 1a8b9d6ea10..00000000000
--- a/src/vppinfra/vector/test/test.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright(c) 2021 Cisco Systems, Inc.
- */
-
-#include <vppinfra/format.h>
-#include <vppinfra/vector/test/test.h>
-
-test_registration_t *test_registrations[CLIB_MARCH_TYPE_N_VARIANTS] = {};
-
-int
-test_march_supported (clib_march_variant_type_t type)
-{
-#define _(s, n) \
- if (CLIB_MARCH_VARIANT_TYPE_##s == type) \
- return clib_cpu_march_priority_##s ();
- foreach_march_variant
-#undef _
- return 0;
-}
-
-int
-main (int argc, char *argv[])
-{
- clib_mem_init (0, 64ULL << 20);
-
- for (int i = 0; i < CLIB_MARCH_TYPE_N_VARIANTS; i++)
- {
- test_registration_t *r = test_registrations[i];
-
- if (r == 0 || test_march_supported (i) < 0)
- continue;
-
- fformat (stdout, "\nMultiarch Variant: %U\n", format_march_variant, i);
- fformat (stdout,
- "-------------------------------------------------------\n");
- while (r)
- {
- clib_error_t *err;
- err = (r->fn) (0);
- fformat (stdout, "%-50s %s\n", r->name, err ? "FAIL" : "PASS");
- if (err)
- {
- clib_error_report (err);
- fformat (stdout, "\n");
- }
-
- r = r->next;
- }
- }
-
- fformat (stdout, "\n");
- return 0;
-}
diff --git a/src/vppinfra/vector/test/test.h b/src/vppinfra/vector/test/test.h
deleted file mode 100644
index bc499fb24e8..00000000000
--- a/src/vppinfra/vector/test/test.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0
- * Copyright(c) 2021 Cisco Systems, Inc.
- */
-
-#ifndef included_test_test_h
-#define included_test_test_h
-
-#include <vppinfra/cpu.h>
-
-typedef clib_error_t *(test_fn_t) (clib_error_t *);
-
-typedef struct test_registration_
-{
- char *name;
- u8 multiarch : 1;
- test_fn_t *fn;
- struct test_registration_ *next;
-} test_registration_t;
-
-extern test_registration_t *test_registrations[CLIB_MARCH_TYPE_N_VARIANTS];
-
-#define __clib_test_fn static __clib_noinline __clib_section (".test_wrapper")
-
-#define REGISTER_TEST(x) \
- test_registration_t CLIB_MARCH_SFX (__test_##x); \
- static void __clib_constructor CLIB_MARCH_SFX (__test_registration_##x) ( \
- void) \
- { \
- test_registration_t *r = &CLIB_MARCH_SFX (__test_##x); \
- r->next = test_registrations[CLIB_MARCH_SFX (CLIB_MARCH_VARIANT_TYPE)]; \
- test_registrations[CLIB_MARCH_SFX (CLIB_MARCH_VARIANT_TYPE)] = r; \
- } \
- test_registration_t CLIB_MARCH_SFX (__test_##x)
-
-#endif
diff --git a/src/vppinfra/vector/toeplitz.c b/src/vppinfra/vector/toeplitz.c
new file mode 100644
index 00000000000..fcc4b64ad19
--- /dev/null
+++ b/src/vppinfra/vector/toeplitz.c
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#include <vppinfra/clib.h>
+#include <vppinfra/mem.h>
+#include <vppinfra/vector/toeplitz.h>
+
+static u8 default_key[40] = {
+ 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
+ 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
+ 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
+ 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
+};
+
+#ifdef __x86_64__
+static_always_inline void
+clib_toeplitz_hash_key_expand_8 (u64x2 kv, u64x8u *m)
+{
+ u64x8 kv4, a, b, shift = { 0, 1, 2, 3, 4, 5, 6, 7 };
+
+ kv4 = (u64x8){ kv[0], kv[1], kv[0], kv[1], kv[0], kv[1], kv[0], kv[1] };
+
+ /* clang-format off */
+ /* create 8 byte-swapped copies of the bytes 0 - 7 */
+ a = (u64x8) u8x64_shuffle (kv4,
+ 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0,
+ 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0,
+ 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0,
+ 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0,
+ 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0,
+ 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0,
+ 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0,
+ 0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0);
+ /* create 8 byte-swapped copies of the bytes 4 - 11 */
+ b = (u64x8) u8x64_shuffle (kv4,
+ 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4,
+ 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4,
+ 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4,
+ 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4,
+ 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4,
+ 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4,
+ 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4,
+ 0xb, 0xa, 0x9, 0x8, 0x7, 0x6, 0x5, 0x4);
+ /* clang-format on */
+
+ /* shift each 64-bit element for 0 - 7 bits */
+ a <<= shift;
+ b <<= shift;
+
+ /* clang-format off */
+ /* construct eight 8x8 bit matrix used by gf2p8affine */
+ * m = (u64x8) u8x64_shuffle2 (a, b,
+ 0x07, 0x0f, 0x17, 0x1f, 0x27, 0x2f, 0x37, 0x3f,
+ 0x06, 0x0e, 0x16, 0x1e, 0x26, 0x2e, 0x36, 0x3e,
+ 0x05, 0x0d, 0x15, 0x1d, 0x25, 0x2d, 0x35, 0x3d,
+ 0x04, 0x0c, 0x14, 0x1c, 0x24, 0x2c, 0x34, 0x3c,
+ 0x47, 0x4f, 0x57, 0x5f, 0x67, 0x6f, 0x77, 0x7f,
+ 0x46, 0x4e, 0x56, 0x5e, 0x66, 0x6e, 0x76, 0x7e,
+ 0x45, 0x4d, 0x55, 0x5d, 0x65, 0x6d, 0x75, 0x7d,
+ 0x44, 0x4c, 0x54, 0x5c, 0x64, 0x6c, 0x74, 0x7c);
+ /* clang-format on */
+}
+
+void
+clib_toeplitz_hash_key_expand (u64 *matrixes, u8 *key, int size)
+{
+ u64x8u *m = (u64x8u *) matrixes;
+ u64x2 kv = {}, zero = {};
+
+ while (size >= 8)
+ {
+ kv = *(u64x2u *) key;
+ clib_toeplitz_hash_key_expand_8 (kv, m);
+ key += 8;
+ m++;
+ size -= 8;
+ }
+
+ kv = u64x2_shuffle2 (kv, zero, 1, 2);
+ clib_toeplitz_hash_key_expand_8 (kv, m);
+}
+#endif
+
+__clib_export clib_toeplitz_hash_key_t *
+clib_toeplitz_hash_key_init (u8 *key, u32 keylen)
+{
+ clib_toeplitz_hash_key_t *k;
+ u32 size, gfni_size = 0;
+
+ if (key == 0)
+ {
+ key = default_key;
+ keylen = sizeof (default_key);
+ }
+
+ size =
+ round_pow2 (sizeof (clib_toeplitz_hash_key_t) + round_pow2 (keylen, 16),
+ CLIB_CACHE_LINE_BYTES);
+#ifdef __x86_64__
+ gfni_size = round_pow2 ((keylen + 1) * 8, CLIB_CACHE_LINE_BYTES);
+#endif
+
+ k = clib_mem_alloc_aligned (size + gfni_size, CLIB_CACHE_LINE_BYTES);
+ clib_memset_u8 (k, 0, size + gfni_size);
+ k->key_length = keylen;
+ k->gfni_offset = size;
+ clib_memcpy_fast (k->data, key, keylen);
+
+#ifdef __x86_64__
+ clib_toeplitz_hash_key_expand ((u64 *) ((u8 *) k + k->gfni_offset), k->data,
+ k->key_length);
+#endif
+
+ return k;
+}
+
+__clib_export void
+clib_toeplitz_hash_key_free (clib_toeplitz_hash_key_t *k)
+{
+ clib_mem_free (k);
+}
diff --git a/src/vppinfra/vector/toeplitz.h b/src/vppinfra/vector/toeplitz.h
new file mode 100644
index 00000000000..76297f05195
--- /dev/null
+++ b/src/vppinfra/vector/toeplitz.h
@@ -0,0 +1,513 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifndef included_vector_toeplitz_h
+#define included_vector_toeplitz_h
+#include <vppinfra/clib.h>
+
+typedef struct
+{
+ u16 key_length;
+ u16 gfni_offset;
+ u8 data[];
+} clib_toeplitz_hash_key_t;
+
+clib_toeplitz_hash_key_t *clib_toeplitz_hash_key_init (u8 *key, u32 keylen);
+void clib_toeplitz_hash_key_free (clib_toeplitz_hash_key_t *k);
+
+#ifdef CLIB_HAVE_VEC256
+static_always_inline u32x8
+toeplitz_hash_one_x8 (u32x8 hash, u64x4 v4, u8 data, u8 off)
+{
+ u32x8 v8 = u32x8_shuffle2 (v4 << (off * 8), v4 << (off * 8 + 4),
+ /*uppper 32 bits of each u64 in reverse order */
+ 15, 13, 11, 9, 7, 5, 3, 1);
+
+#ifdef CLIB_HAVE_VEC256_MASK_BITWISE_OPS
+ return u32x8_mask_xor (hash, v8, data);
+#else
+ static const u32x8 bits = { 1, 2, 4, 8, 16, 32, 64, 128 };
+ return hash ^ (((u32x8_splat (data) & bits) != u32x8_zero ()) & v8);
+#endif
+}
+#endif
+
+#if defined(__GFNI__) && defined(__AVX512F__)
+static const u8x64 __clib_toeplitz_hash_gfni_permute = {
+ /* clang-format off */
+ 0x00, 0x01, 0x02, 0x03, 0x40, 0x41, 0x42, 0x43,
+ 0x01, 0x02, 0x03, 0x04, 0x41, 0x42, 0x43, 0x44,
+ 0x02, 0x03, 0x04, 0x05, 0x42, 0x43, 0x44, 0x45,
+ 0x03, 0x04, 0x05, 0x06, 0x43, 0x44, 0x45, 0x46,
+ 0x04, 0x05, 0x06, 0x07, 0x44, 0x45, 0x46, 0x47,
+ 0x05, 0x06, 0x07, 0x08, 0x45, 0x46, 0x47, 0x48,
+ 0x06, 0x07, 0x08, 0x09, 0x46, 0x47, 0x48, 0x49,
+ 0x07, 0x08, 0x09, 0x0a, 0x47, 0x48, 0x49, 0x4a
+ /* clang-format on */
+};
+static_always_inline u64x8
+clib_toeplitz_hash_gfni_one (u8x64 d0, u64x8 m, int i)
+{
+
+ d0 = i == 1 ? (u8x64) u64x8_align_right (d0, d0, 1) : d0;
+ d0 = i == 2 ? (u8x64) u64x8_align_right (d0, d0, 2) : d0;
+ d0 = i == 3 ? (u8x64) u64x8_align_right (d0, d0, 3) : d0;
+ d0 = i == 4 ? (u8x64) u64x8_align_right (d0, d0, 4) : d0;
+ d0 = i == 5 ? (u8x64) u64x8_align_right (d0, d0, 5) : d0;
+ d0 = i == 6 ? (u8x64) u64x8_align_right (d0, d0, 6) : d0;
+
+ d0 = u8x64_permute (__clib_toeplitz_hash_gfni_permute, d0);
+
+ return (u64x8) _mm512_gf2p8affine_epi64_epi8 ((__m512i) d0, (__m512i) m, 0);
+}
+
+static_always_inline u64x8
+clib_toeplitz_hash_gfni_two (u8x64 d0, u8x64 d1, u64x8 m, int i)
+{
+
+ d0 = i == 1 ? (u8x64) u64x8_align_right (d0, d0, 1) : d0;
+ d1 = i == 1 ? (u8x64) u64x8_align_right (d1, d1, 1) : d1;
+ d0 = i == 2 ? (u8x64) u64x8_align_right (d0, d0, 2) : d0;
+ d1 = i == 2 ? (u8x64) u64x8_align_right (d1, d1, 2) : d1;
+ d0 = i == 3 ? (u8x64) u64x8_align_right (d0, d0, 3) : d0;
+ d1 = i == 3 ? (u8x64) u64x8_align_right (d1, d1, 3) : d1;
+ d0 = i == 4 ? (u8x64) u64x8_align_right (d0, d0, 4) : d0;
+ d1 = i == 4 ? (u8x64) u64x8_align_right (d1, d1, 4) : d1;
+ d0 = i == 5 ? (u8x64) u64x8_align_right (d0, d0, 5) : d0;
+ d1 = i == 5 ? (u8x64) u64x8_align_right (d1, d1, 5) : d1;
+ d0 = i == 6 ? (u8x64) u64x8_align_right (d0, d0, 6) : d0;
+ d1 = i == 6 ? (u8x64) u64x8_align_right (d1, d1, 6) : d1;
+
+ d0 = u8x64_permute2 (__clib_toeplitz_hash_gfni_permute, d0, d1);
+
+ return (u64x8) _mm512_gf2p8affine_epi64_epi8 ((__m512i) d0, (__m512i) m, 0);
+}
+#endif
+
+static_always_inline u32
+clib_toeplitz_hash (clib_toeplitz_hash_key_t *k, u8 *data, int n_bytes)
+{
+ u8 *key = k->data;
+ /* key must be 4 bytes longer than data */
+ ASSERT (k->key_length - n_bytes >= 4);
+
+#if defined(__GFNI__) && defined(__AVX512F__)
+ u8x64 d0;
+ u64x8 h0 = {};
+ u64x8u *m = (u64x8u *) ((u8 *) k + k->gfni_offset);
+
+ /* move data ptr backwards for 3 byte so mask load "prepends" three zeros */
+ data -= 3;
+ n_bytes += 3;
+
+ if (n_bytes < 64)
+ {
+ d0 = u8x64_mask_load_zero ((u8 *) data, pow2_mask (n_bytes - 3) << 3);
+ goto last8;
+ }
+
+ d0 = u8x64_mask_load_zero ((u8 *) data, -1ULL << 3);
+next56:
+ h0 = u64x8_xor3 (h0, clib_toeplitz_hash_gfni_one (d0, m[0], 0),
+ clib_toeplitz_hash_gfni_one (d0, m[1], 1));
+ h0 = u64x8_xor3 (h0, clib_toeplitz_hash_gfni_one (d0, m[2], 2),
+ clib_toeplitz_hash_gfni_one (d0, m[3], 3));
+ h0 = u64x8_xor3 (h0, clib_toeplitz_hash_gfni_one (d0, m[4], 4),
+ clib_toeplitz_hash_gfni_one (d0, m[5], 5));
+ h0 ^= clib_toeplitz_hash_gfni_one (d0, m[6], 6);
+ n_bytes -= 56;
+ data += 56;
+ m += 7;
+
+ if (n_bytes >= 64)
+ {
+ d0 = *(u8x64u *) data;
+ goto next56;
+ }
+
+ if (n_bytes == 0)
+ goto done;
+
+ d0 = u8x64_mask_load_zero ((u8 *) data, pow2_mask (n_bytes));
+last8:
+ h0 ^= clib_toeplitz_hash_gfni_one (d0, m[0], 0);
+ n_bytes -= 8;
+
+ if (n_bytes > 0)
+ {
+ m += 1;
+ d0 = (u8x64) u64x8_align_right (u64x8_zero (), d0, 1);
+ goto last8;
+ }
+
+done:
+ return u64x8_hxor (h0);
+#elif defined(CLIB_HAVE_VEC256)
+ u64x4 v4, shift = { 0, 1, 2, 3 };
+ u32x8 h0 = {};
+
+ while (n_bytes >= 4)
+ {
+ v4 = u64x4_splat (clib_net_to_host_u64 (*(u64u *) key)) << shift;
+
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[0], 0);
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[1], 1);
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[2], 2);
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[3], 3);
+
+ data += 4;
+ key += 4;
+ n_bytes -= 4;
+ }
+
+ if (n_bytes)
+ {
+ u64 v = (u64) clib_net_to_host_u32 ((u64) (*(u32u *) key)) << 32;
+ v |= (u64) key[4] << 24;
+
+ if (n_bytes == 3)
+ {
+ v |= (u64) key[5] << 16;
+ v |= (u64) key[6] << 8;
+ v4 = u64x4_splat (v) << shift;
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[0], 0);
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[1], 1);
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[2], 2);
+ }
+ else if (n_bytes == 2)
+ {
+ v |= (u64) key[5] << 16;
+ v4 = u64x4_splat (v) << shift;
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[0], 0);
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[1], 1);
+ }
+ else
+ {
+ v4 = u64x4_splat (v) << shift;
+ h0 = toeplitz_hash_one_x8 (h0, v4, data[0], 0);
+ }
+ }
+
+ return u32x8_hxor (h0);
+#endif
+ u64 v, hash = 0;
+
+ while (n_bytes >= 4)
+ {
+ v = clib_net_to_host_u64 (*(u64u *) key);
+
+ for (u8 bit = 1 << 7, byte = data[0]; bit; bit >>= 1, v <<= 1)
+ hash ^= byte & bit ? v : 0;
+ for (u8 bit = 1 << 7, byte = data[1]; bit; bit >>= 1, v <<= 1)
+ hash ^= byte & bit ? v : 0;
+ for (u8 bit = 1 << 7, byte = data[2]; bit; bit >>= 1, v <<= 1)
+ hash ^= byte & bit ? v : 0;
+ for (u8 bit = 1 << 7, byte = data[3]; bit; bit >>= 1, v <<= 1)
+ hash ^= byte & bit ? v : 0;
+
+ data += 4;
+ key += 4;
+ n_bytes -= 4;
+ }
+
+ if (n_bytes)
+ {
+ v = (u64) clib_net_to_host_u32 ((u64) (*(u32u *) key)) << 32;
+ v |= (u64) key[4] << 24;
+ for (u8 bit = 1 << 7, byte = data[0]; bit; bit >>= 1, v <<= 1)
+ hash ^= byte & bit ? v : 0;
+ if (n_bytes > 1)
+ {
+ v |= (u64) key[5] << 24;
+ for (u8 bit = 1 << 7, byte = data[1]; bit; bit >>= 1, v <<= 1)
+ hash ^= byte & bit ? v : 0;
+ }
+ if (n_bytes > 2)
+ {
+ v |= (u64) key[6] << 24;
+ for (u8 bit = 1 << 7, byte = data[2]; bit; bit >>= 1, v <<= 1)
+ hash ^= byte & bit ? v : 0;
+ }
+ }
+ return hash >> 32;
+}
+
+static_always_inline void
+clib_toeplitz_hash_x4 (clib_toeplitz_hash_key_t *k, u8 *data0, u8 *data1,
+ u8 *data2, u8 *data3, u32 *hash0, u32 *hash1,
+ u32 *hash2, u32 *hash3, int n_bytes)
+{
+ /* key must be 4 bytes longer than data */
+ ASSERT (k->key_length - n_bytes >= 4);
+#if defined(__GFNI__) && defined(__AVX512F__)
+ u64x8u *m = (u64x8u *) ((u8 *) k + k->gfni_offset);
+ u8x64 d0, d1, d2, d3;
+ u64x8 h0 = {}, h2 = {};
+ u64 h, mask;
+
+ /* move data ptr backwards for 3 byte so mask load "prepends" three zeros */
+ data0 -= 3;
+ data1 -= 3;
+ data2 -= 3;
+ data3 -= 3;
+ n_bytes += 3;
+
+ if (n_bytes < 64)
+ {
+ mask = pow2_mask (n_bytes - 3) << 3;
+ d0 = u8x64_mask_load_zero ((u8 *) data0, mask);
+ d1 = u8x64_mask_load_zero ((u8 *) data1, mask);
+ d2 = u8x64_mask_load_zero ((u8 *) data2, mask);
+ d3 = u8x64_mask_load_zero ((u8 *) data3, mask);
+ goto last8;
+ }
+
+ mask = -1ULL << 3;
+ d0 = u8x64_mask_load_zero ((u8 *) data0, mask);
+ d1 = u8x64_mask_load_zero ((u8 *) data1, mask);
+ d2 = u8x64_mask_load_zero ((u8 *) data2, mask);
+ d3 = u8x64_mask_load_zero ((u8 *) data3, mask);
+next56:
+ h0 = u64x8_xor3 (h0, clib_toeplitz_hash_gfni_two (d0, d1, m[0], 0),
+ clib_toeplitz_hash_gfni_two (d0, d1, m[1], 1));
+ h2 = u64x8_xor3 (h2, clib_toeplitz_hash_gfni_two (d2, d3, m[0], 0),
+ clib_toeplitz_hash_gfni_two (d2, d3, m[1], 1));
+
+ h0 = u64x8_xor3 (h0, clib_toeplitz_hash_gfni_two (d0, d1, m[2], 2),
+ clib_toeplitz_hash_gfni_two (d0, d1, m[3], 3));
+ h2 = u64x8_xor3 (h2, clib_toeplitz_hash_gfni_two (d2, d3, m[2], 2),
+ clib_toeplitz_hash_gfni_two (d2, d3, m[3], 3));
+
+ h0 = u64x8_xor3 (h0, clib_toeplitz_hash_gfni_two (d0, d1, m[4], 4),
+ clib_toeplitz_hash_gfni_two (d0, d1, m[5], 5));
+ h2 = u64x8_xor3 (h2, clib_toeplitz_hash_gfni_two (d2, d3, m[4], 4),
+ clib_toeplitz_hash_gfni_two (d2, d3, m[5], 5));
+
+ h0 ^= clib_toeplitz_hash_gfni_two (d0, d1, m[6], 6);
+ h2 ^= clib_toeplitz_hash_gfni_two (d2, d3, m[6], 6);
+
+ n_bytes -= 56;
+ data0 += 56;
+ data1 += 56;
+ data2 += 56;
+ data3 += 56;
+ m += 7;
+
+ if (n_bytes >= 64)
+ {
+ d0 = *(u8x64u *) data0;
+ d1 = *(u8x64u *) data1;
+ d2 = *(u8x64u *) data2;
+ d3 = *(u8x64u *) data3;
+ goto next56;
+ }
+
+ if (n_bytes == 0)
+ goto done;
+
+ mask = pow2_mask (n_bytes);
+ d0 = u8x64_mask_load_zero ((u8 *) data0, mask);
+ d1 = u8x64_mask_load_zero ((u8 *) data1, mask);
+ d2 = u8x64_mask_load_zero ((u8 *) data2, mask);
+ d3 = u8x64_mask_load_zero ((u8 *) data3, mask);
+last8:
+ h0 ^= clib_toeplitz_hash_gfni_two (d0, d1, m[0], 0);
+ h2 ^= clib_toeplitz_hash_gfni_two (d2, d3, m[0], 0);
+ n_bytes -= 8;
+
+ if (n_bytes > 0)
+ {
+ u64x8 zero = {};
+ m += 1;
+ d0 = (u8x64) u64x8_align_right (zero, d0, 1);
+ d1 = (u8x64) u64x8_align_right (zero, d1, 1);
+ d2 = (u8x64) u64x8_align_right (zero, d2, 1);
+ d3 = (u8x64) u64x8_align_right (zero, d3, 1);
+ goto last8;
+ }
+
+done:
+ h = u64x8_hxor (h0);
+ *hash0 = h;
+ *hash1 = h >> 32;
+ h = u64x8_hxor (h2);
+ *hash2 = h;
+ *hash3 = h >> 32;
+#elif defined(CLIB_HAVE_VEC256)
+ u8 *key = k->data;
+ u64x4 v4, shift = { 0, 1, 2, 3 };
+ u32x8 h0 = {}, h1 = {}, h2 = {}, h3 = {};
+
+ while (n_bytes >= 4)
+ {
+ v4 = u64x4_splat (clib_net_to_host_u64 (*(u64u *) key)) << shift;
+
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[0], 0);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[0], 0);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[0], 0);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[0], 0);
+
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[1], 1);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[1], 1);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[1], 1);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[1], 1);
+
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[2], 2);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[2], 2);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[2], 2);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[2], 2);
+
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[3], 3);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[3], 3);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[3], 3);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[3], 3);
+
+ data0 += 4;
+ data1 += 4;
+ data2 += 4;
+ data3 += 4;
+ key += 4;
+ n_bytes -= 4;
+ }
+
+ if (n_bytes)
+ {
+ u64 v = (u64) clib_net_to_host_u32 ((u64) (*(u32u *) key)) << 32;
+ v |= (u64) key[4] << 24;
+
+ if (n_bytes == 3)
+ {
+ v |= (u64) key[5] << 16;
+ v |= (u64) key[6] << 8;
+ v4 = u64x4_splat (v) << shift;
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[0], 0);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[0], 0);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[0], 0);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[0], 0);
+
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[1], 1);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[1], 1);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[1], 1);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[1], 1);
+
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[2], 2);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[2], 2);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[2], 2);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[2], 2);
+ }
+ else if (n_bytes == 2)
+ {
+ v |= (u64) key[5] << 16;
+ v4 = u64x4_splat (v) << shift;
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[0], 0);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[0], 0);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[0], 0);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[0], 0);
+
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[1], 1);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[1], 1);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[1], 1);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[1], 1);
+ }
+ else
+ {
+ v4 = u64x4_splat (v) << shift;
+ h0 = toeplitz_hash_one_x8 (h0, v4, data0[0], 0);
+ h1 = toeplitz_hash_one_x8 (h1, v4, data1[0], 0);
+ h2 = toeplitz_hash_one_x8 (h2, v4, data2[0], 0);
+ h3 = toeplitz_hash_one_x8 (h3, v4, data3[0], 0);
+ }
+ }
+
+ *hash0 = u32x8_hxor (h0);
+ *hash1 = u32x8_hxor (h1);
+ *hash2 = u32x8_hxor (h2);
+ *hash3 = u32x8_hxor (h3);
+#else
+ u8 *key = k->data;
+ u64 v, h0 = 0, h1 = 0, h2 = 0, h3 = 0;
+
+ while (n_bytes >= 4)
+ {
+ v = clib_net_to_host_u64 (*(u64u *) key);
+
+ for (u8 bit = 1 << 7; bit; bit >>= 1, v <<= 1)
+ {
+ h0 ^= data0[0] & bit ? v : 0;
+ h1 ^= data1[0] & bit ? v : 0;
+ h2 ^= data2[0] & bit ? v : 0;
+ h3 ^= data3[0] & bit ? v : 0;
+ }
+ for (u8 bit = 1 << 7; bit; bit >>= 1, v <<= 1)
+ {
+ h0 ^= data0[1] & bit ? v : 0;
+ h1 ^= data1[1] & bit ? v : 0;
+ h2 ^= data2[1] & bit ? v : 0;
+ h3 ^= data3[1] & bit ? v : 0;
+ }
+ for (u8 bit = 1 << 7; bit; bit >>= 1, v <<= 1)
+ {
+ h0 ^= data0[2] & bit ? v : 0;
+ h1 ^= data1[2] & bit ? v : 0;
+ h2 ^= data2[2] & bit ? v : 0;
+ h3 ^= data3[2] & bit ? v : 0;
+ }
+ for (u8 bit = 1 << 7; bit; bit >>= 1, v <<= 1)
+ {
+ h0 ^= data0[3] & bit ? v : 0;
+ h1 ^= data1[3] & bit ? v : 0;
+ h2 ^= data2[3] & bit ? v : 0;
+ h3 ^= data3[3] & bit ? v : 0;
+ }
+
+ data0 += 4;
+ data1 += 4;
+ data2 += 4;
+ data3 += 4;
+ key += 4;
+ n_bytes -= 4;
+ }
+
+ if (n_bytes)
+ {
+ v = (u64) clib_net_to_host_u32 ((u64) (*(u32u *) key)) << 32;
+ v |= (u64) key[4] << 24;
+ for (u8 bit = 1 << 7; bit; bit >>= 1, v <<= 1)
+ {
+ h0 ^= data0[0] & bit ? v : 0;
+ h1 ^= data1[0] & bit ? v : 0;
+ h2 ^= data2[0] & bit ? v : 0;
+ h3 ^= data3[0] & bit ? v : 0;
+ }
+ if (n_bytes > 1)
+ {
+ v |= (u64) key[5] << 24;
+ for (u8 bit = 1 << 7; bit; bit >>= 1, v <<= 1)
+ {
+ h0 ^= data0[1] & bit ? v : 0;
+ h1 ^= data1[1] & bit ? v : 0;
+ h2 ^= data2[1] & bit ? v : 0;
+ h3 ^= data3[1] & bit ? v : 0;
+ }
+ }
+ if (n_bytes > 2)
+ {
+ v |= (u64) key[6] << 24;
+ for (u8 bit = 1 << 7; bit; bit >>= 1, v <<= 1)
+ {
+ h0 ^= data0[2] & bit ? v : 0;
+ h1 ^= data1[2] & bit ? v : 0;
+ h2 ^= data2[2] & bit ? v : 0;
+ h3 ^= data3[2] & bit ? v : 0;
+ }
+ }
+ }
+ *hash0 = h0 >> 32;
+ *hash1 = h1 >> 32;
+ *hash2 = h2 >> 32;
+ *hash3 = h3 >> 32;
+#endif
+}
+
+#endif
diff --git a/src/vppinfra/vector_avx2.h b/src/vppinfra/vector_avx2.h
index f38a3bdae73..866c82fcec3 100644
--- a/src/vppinfra/vector_avx2.h
+++ b/src/vppinfra/vector_avx2.h
@@ -19,7 +19,6 @@
#include <vppinfra/clib.h>
#include <x86intrin.h>
-/* *INDENT-OFF* */
#define foreach_avx2_vec256i \
_(i,8,32,epi8) _(i,16,16,epi16) _(i,32,8,epi32) _(i,64,4,epi64)
#define foreach_avx2_vec256u \
@@ -67,7 +66,6 @@ t##s##x##c##_interleave_hi (t##s##x##c a, t##s##x##c b) \
foreach_avx2_vec256i foreach_avx2_vec256u
#undef _
-/* *INDENT-ON* */
always_inline u32x8
u32x8_permute (u32x8 v, u32x8 idx)
@@ -80,7 +78,6 @@ u32x8_permute (u32x8 v, u32x8 idx)
(__m256i) v, ((m0) | (m1) << 2 | (m2) << 4 | (m3) << 6))
/* _extract_lo, _extract_hi */
-/* *INDENT-OFF* */
#define _(t1,t2) \
always_inline t1 \
t2##_extract_lo (t2 v) \
@@ -103,7 +100,6 @@ _(u16x8, u16x16)
_(u32x4, u32x8)
_(u64x2, u64x4)
#undef _
-/* *INDENT-ON* */
/* 256 bit packs. */
#define _(f, t, fn) \
@@ -132,7 +128,6 @@ i8x32_msb_mask (i8x32 v)
}
/* _from_ */
-/* *INDENT-OFF* */
#define _(f,t,i) \
static_always_inline t \
t##_from_##f (f x) \
@@ -151,7 +146,6 @@ _ (i8x16, i16x16, epi8_epi16)
_(i8x16, i32x8, epi8_epi32)
_(i8x16, i64x4, epi8_epi64)
#undef _
-/* *INDENT-ON* */
static_always_inline u64x4
u64x4_byte_swap (u64x4 v)
@@ -183,15 +177,12 @@ u16x16_byte_swap (u16x16 v)
return (u16x16) _mm256_shuffle_epi8 ((__m256i) v, (__m256i) swap);
}
-static_always_inline u8x32
-u8x32_shuffle (u8x32 v, u8x32 m)
-{
- return (u8x32) _mm256_shuffle_epi8 ((__m256i) v, (__m256i) m);
-}
-
#define u8x32_align_right(a, b, imm) \
(u8x32) _mm256_alignr_epi8 ((__m256i) a, (__m256i) b, imm)
+#define u64x4_align_right(a, b, imm) \
+ (u64x4) _mm256_alignr_epi64 ((__m256i) a, (__m256i) b, imm)
+
static_always_inline u32
u32x8_sum_elts (u32x8 sum8)
{
@@ -206,6 +197,36 @@ u32x8_hadd (u32x8 v1, u32x8 v2)
return (u32x8) _mm256_hadd_epi32 ((__m256i) v1, (__m256i) v2);
}
+static_always_inline u32
+u32x8_hxor (u32x8 v)
+{
+ u32x4 v4;
+ v4 = u32x8_extract_lo (v) ^ u32x8_extract_hi (v);
+ v4 ^= (u32x4) u8x16_align_right (v4, v4, 8);
+ v4 ^= (u32x4) u8x16_align_right (v4, v4, 4);
+ return v4[0];
+}
+
+static_always_inline u8x32
+u8x32_xor3 (u8x32 a, u8x32 b, u8x32 c)
+{
+#if __AVX512F__
+ return (u8x32) _mm256_ternarylogic_epi32 ((__m256i) a, (__m256i) b,
+ (__m256i) c, 0x96);
+#endif
+ return a ^ b ^ c;
+}
+
+static_always_inline u8x32
+u8x32_reflect_u8x16 (u8x32 x)
+{
+ static const u8x32 mask = {
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
+ };
+ return (u8x32) _mm256_shuffle_epi8 ((__m256i) x, (__m256i) mask);
+}
+
static_always_inline u16x16
u16x16_mask_last (u16x16 v, u8 n_last)
{
@@ -308,11 +329,17 @@ u32x8_scatter_one (u32x8 r, int index, void *p)
*(u32 *) p = r[index];
}
-static_always_inline u8x32
-u8x32_is_greater (u8x32 v1, u8x32 v2)
-{
- return (u8x32) _mm256_cmpgt_epi8 ((__m256i) v1, (__m256i) v2);
-}
+#define u32x8_gather_u32(base, indices, scale) \
+ (u32x8) _mm256_i32gather_epi32 ((const int *) base, (__m256i) indices, scale)
+
+#ifdef __AVX512F__
+#define u32x8_scatter_u32(base, indices, v, scale) \
+ _mm256_i32scatter_epi32 (base, (__m256i) indices, (__m256i) v, scale)
+#else
+#define u32x8_scatter_u32(base, indices, v, scale) \
+ for (u32 i = 0; i < 8; i++) \
+ *((u32u *) ((u8 *) base + (scale) * (indices)[i])) = (v)[i];
+#endif
static_always_inline u8x32
u8x32_blend (u8x32 v1, u8x32 v2, u8x32 mask)
@@ -321,6 +348,11 @@ u8x32_blend (u8x32 v1, u8x32 v2, u8x32 mask)
(__m256i) mask);
}
+#define u8x32_word_shift_left(a, n) \
+ (u8x32) _mm256_bslli_epi128 ((__m256i) a, n)
+#define u8x32_word_shift_right(a, n) \
+ (u8x32) _mm256_bsrli_epi128 ((__m256i) a, n)
+
#define u32x8_permute_lanes(a, b, m) \
(u32x8) _mm256_permute2x128_si256 ((__m256i) a, (__m256i) b, m)
#define u64x4_permute_lanes(a, b, m) \
@@ -390,6 +422,58 @@ u64x4_transpose (u64x4 a[8])
a[3] = u64x4_permute_lanes (r[1], r[3], 0x31);
}
+static_always_inline u8x32
+u8x32_splat_u8x16 (u8x16 a)
+{
+ return (u8x32) _mm256_broadcastsi128_si256 ((__m128i) a);
+}
+
+static_always_inline u32x8
+u32x8_splat_u32x4 (u32x4 a)
+{
+ return (u32x8) _mm256_broadcastsi128_si256 ((__m128i) a);
+}
+
+static_always_inline u64x4
+u64x4_splat_u64x2 (u64x2 a)
+{
+ return (u64x4) _mm256_broadcastsi128_si256 ((__m128i) a);
+}
+
+static_always_inline u8x32
+u8x32_load_partial (u8 *data, uword n)
+{
+#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ return u8x32_mask_load_zero (data, pow2_mask (n));
+#else
+ u8x32 r = {};
+ if (n > 16)
+ {
+ r = u8x32_insert_lo (r, *(u8x16u *) data);
+ r = u8x32_insert_hi (r, u8x16_load_partial (data + 16, n - 16));
+ }
+ else
+ r = u8x32_insert_lo (r, u8x16_load_partial (data, n));
+ return r;
+#endif
+}
+
+static_always_inline void
+u8x32_store_partial (u8x32 r, u8 *data, uword n)
+{
+#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u8x32_mask_store (r, data, pow2_mask (n));
+#else
+ if (n > 16)
+ {
+ *(u8x16u *) data = u8x32_extract_lo (r);
+ u8x16_store_partial (u8x32_extract_hi (r), data + 16, n - 16);
+ }
+ else
+ u8x16_store_partial (u8x32_extract_lo (r), data, n);
+#endif
+}
+
#endif /* included_vector_avx2_h */
/*
diff --git a/src/vppinfra/vector_avx512.h b/src/vppinfra/vector_avx512.h
index 3a01c1ed824..699afec1212 100644
--- a/src/vppinfra/vector_avx512.h
+++ b/src/vppinfra/vector_avx512.h
@@ -19,7 +19,6 @@
#include <vppinfra/clib.h>
#include <x86intrin.h>
-/* *INDENT-OFF* */
#define foreach_avx512_vec512i \
_(i,8,64,epi8) _(i,16,32,epi16) _(i,32,16,epi32) _(i,64,8,epi64)
#define foreach_avx512_vec512u \
@@ -29,55 +28,68 @@
/* splat, load_unaligned, store_unaligned, is_all_zero, is_equal,
is_all_equal, is_zero_mask */
-#define _(t, s, c, i) \
-static_always_inline t##s##x##c \
-t##s##x##c##_splat (t##s x) \
-{ return (t##s##x##c) _mm512_set1_##i (x); } \
-\
-static_always_inline t##s##x##c \
-t##s##x##c##_load_aligned (void *p) \
-{ return (t##s##x##c) _mm512_load_si512 (p); } \
-\
-static_always_inline void \
-t##s##x##c##_store_aligned (t##s##x##c v, void *p) \
-{ _mm512_store_si512 ((__m512i *) p, (__m512i) v); } \
-\
-static_always_inline t##s##x##c \
-t##s##x##c##_load_unaligned (void *p) \
-{ return (t##s##x##c) _mm512_loadu_si512 (p); } \
-\
-static_always_inline void \
-t##s##x##c##_store_unaligned (t##s##x##c v, void *p) \
-{ _mm512_storeu_si512 ((__m512i *) p, (__m512i) v); } \
-\
-static_always_inline int \
-t##s##x##c##_is_all_zero (t##s##x##c v) \
-{ return (_mm512_test_epi64_mask ((__m512i) v, (__m512i) v) == 0); } \
-\
-static_always_inline int \
-t##s##x##c##_is_equal (t##s##x##c a, t##s##x##c b) \
-{ return t##s##x##c##_is_all_zero (a ^ b); } \
-\
-static_always_inline int \
-t##s##x##c##_is_all_equal (t##s##x##c v, t##s x) \
-{ return t##s##x##c##_is_equal (v, t##s##x##c##_splat (x)); } \
-\
-static_always_inline u##c \
-t##s##x##c##_is_zero_mask (t##s##x##c v) \
-{ return _mm512_test_##i##_mask ((__m512i) v, (__m512i) v); } \
-\
-static_always_inline t##s##x##c \
-t##s##x##c##_interleave_lo (t##s##x##c a, t##s##x##c b) \
-{ return (t##s##x##c) _mm512_unpacklo_##i ((__m512i) a, (__m512i) b); } \
-\
-static_always_inline t##s##x##c \
-t##s##x##c##_interleave_hi (t##s##x##c a, t##s##x##c b) \
-{ return (t##s##x##c) _mm512_unpackhi_##i ((__m512i) a, (__m512i) b); } \
-
+#define _(t, s, c, i) \
+ static_always_inline t##s##x##c t##s##x##c##_splat (t##s x) \
+ { \
+ return (t##s##x##c) _mm512_set1_##i (x); \
+ } \
+ \
+ static_always_inline t##s##x##c t##s##x##c##_load_aligned (void *p) \
+ { \
+ return (t##s##x##c) _mm512_load_si512 (p); \
+ } \
+ \
+ static_always_inline void t##s##x##c##_store_aligned (t##s##x##c v, \
+ void *p) \
+ { \
+ _mm512_store_si512 ((__m512i *) p, (__m512i) v); \
+ } \
+ \
+ static_always_inline t##s##x##c t##s##x##c##_load_unaligned (void *p) \
+ { \
+ return (t##s##x##c) _mm512_loadu_si512 (p); \
+ } \
+ \
+ static_always_inline void t##s##x##c##_store_unaligned (t##s##x##c v, \
+ void *p) \
+ { \
+ _mm512_storeu_si512 ((__m512i *) p, (__m512i) v); \
+ } \
+ \
+ static_always_inline int t##s##x##c##_is_all_zero (t##s##x##c v) \
+ { \
+ return (_mm512_test_epi64_mask ((__m512i) v, (__m512i) v) == 0); \
+ } \
+ \
+ static_always_inline int t##s##x##c##_is_equal (t##s##x##c a, t##s##x##c b) \
+ { \
+ return (_mm512_cmpneq_epi64_mask ((__m512i) a, (__m512i) b) == 0); \
+ } \
+ \
+ static_always_inline int t##s##x##c##_is_all_equal (t##s##x##c v, t##s x) \
+ { \
+ return t##s##x##c##_is_equal (v, t##s##x##c##_splat (x)); \
+ } \
+ \
+ static_always_inline u##c t##s##x##c##_is_zero_mask (t##s##x##c v) \
+ { \
+ return _mm512_test_##i##_mask ((__m512i) v, (__m512i) v); \
+ } \
+ \
+ static_always_inline t##s##x##c t##s##x##c##_interleave_lo (t##s##x##c a, \
+ t##s##x##c b) \
+ { \
+ return (t##s##x##c) _mm512_unpacklo_##i ((__m512i) a, (__m512i) b); \
+ } \
+ \
+ static_always_inline t##s##x##c t##s##x##c##_interleave_hi (t##s##x##c a, \
+ t##s##x##c b) \
+ { \
+ return (t##s##x##c) _mm512_unpackhi_##i ((__m512i) a, (__m512i) b); \
+ }
foreach_avx512_vec512i foreach_avx512_vec512u
#undef _
-/* *INDENT-ON* */
static_always_inline u32
u16x32_msb_mask (u16x32 v)
@@ -85,6 +97,9 @@ u16x32_msb_mask (u16x32 v)
return (u32) _mm512_movepi16_mask ((__m512i) v);
}
+#define u64x8_i64gather(index, base, scale) \
+ (u64x8) _mm512_i64gather_epi64 ((__m512i) index, base, scale)
+
/* 512-bit packs */
#define _(f, t, fn) \
always_inline t t##_pack (f lo, f hi) \
@@ -98,6 +113,18 @@ _ (i32x16, i16x32, _mm512_packs_epi32)
_ (i32x16, u16x32, _mm512_packus_epi32)
#undef _
+static_always_inline u64x8
+u64x8_byte_swap (u64x8 v)
+{
+ u8x64 swap = {
+ 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8,
+ 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8,
+ 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8,
+ 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8,
+ };
+ return (u64x8) _mm512_shuffle_epi8 ((__m512i) v, (__m512i) swap);
+}
+
static_always_inline u32x16
u32x16_byte_swap (u32x16 v)
{
@@ -184,6 +211,13 @@ u8x64_xor3 (u8x64 a, u8x64 b, u8x64 c)
(__m512i) c, 0x96);
}
+static_always_inline u64x8
+u64x8_xor3 (u64x8 a, u64x8 b, u64x8 c)
+{
+ return (u64x8) _mm512_ternarylogic_epi32 ((__m512i) a, (__m512i) b,
+ (__m512i) c, 0x96);
+}
+
static_always_inline u8x64
u8x64_reflect_u8x16 (u8x64 x)
{
@@ -196,15 +230,12 @@ u8x64_reflect_u8x16 (u8x64 x)
return (u8x64) _mm512_shuffle_epi8 ((__m512i) x, (__m512i) mask);
}
-static_always_inline u8x64
-u8x64_shuffle (u8x64 v, u8x64 m)
-{
- return (u8x64) _mm512_shuffle_epi8 ((__m512i) v, (__m512i) m);
-}
-
#define u8x64_align_right(a, b, imm) \
(u8x64) _mm512_alignr_epi8 ((__m512i) a, (__m512i) b, imm)
+#define u64x8_align_right(a, b, imm) \
+ (u64x8) _mm512_alignr_epi64 ((__m512i) a, (__m512i) b, imm)
+
static_always_inline u32
u32x16_sum_elts (u32x16 sum16)
{
@@ -243,14 +274,42 @@ _ (u64x4, u8, _mm256, __m256i, epi64)
_ (u64x2, u8, _mm, __m128i, epi64)
#undef _
+#define _(t, m, p, i, e) \
+ static_always_inline t t##_mask_and (t a, t b, m mask) \
+ { \
+ return (t) p##_mask_and_##e ((i) a, mask, (i) a, (i) b); \
+ } \
+ static_always_inline t t##_mask_andnot (t a, t b, m mask) \
+ { \
+ return (t) p##_mask_andnot_##e ((i) a, mask, (i) a, (i) b); \
+ } \
+ static_always_inline t t##_mask_xor (t a, t b, m mask) \
+ { \
+ return (t) p##_mask_xor_##e ((i) a, mask, (i) a, (i) b); \
+ } \
+ static_always_inline t t##_mask_or (t a, t b, m mask) \
+ { \
+ return (t) p##_mask_or_##e ((i) a, mask, (i) a, (i) b); \
+ }
+_ (u32x16, u16, _mm512, __m512i, epi32)
+_ (u32x8, u8, _mm256, __m256i, epi32)
+_ (u32x4, u8, _mm, __m128i, epi32)
+_ (u64x8, u8, _mm512, __m512i, epi64)
+_ (u64x4, u8, _mm256, __m256i, epi64)
+_ (u64x2, u8, _mm, __m128i, epi64)
+#undef _
+
#ifdef CLIB_HAVE_VEC512
#define CLIB_HAVE_VEC512_MASK_LOAD_STORE
+#define CLIB_HAVE_VEC512_MASK_BITWISE_OPS
#endif
#ifdef CLIB_HAVE_VEC256
#define CLIB_HAVE_VEC256_MASK_LOAD_STORE
+#define CLIB_HAVE_VEC256_MASK_BITWISE_OPS
#endif
#ifdef CLIB_HAVE_VEC128
#define CLIB_HAVE_VEC128_MASK_LOAD_STORE
+#define CLIB_HAVE_VEC128_MASK_BITWISE_OPS
#endif
static_always_inline u8x64
@@ -265,6 +324,12 @@ u32x16_splat_u32x4 (u32x4 a)
return (u32x16) _mm512_broadcast_i64x2 ((__m128i) a);
}
+static_always_inline u64x8
+u64x8_splat_u64x2 (u64x2 a)
+{
+ return (u64x8) _mm512_broadcast_i64x2 ((__m128i) a);
+}
+
static_always_inline u32x16
u32x16_mask_blend (u32x16 a, u32x16 b, u16 mask)
{
@@ -277,6 +342,19 @@ u8x64_mask_blend (u8x64 a, u8x64 b, u64 mask)
return (u8x64) _mm512_mask_blend_epi8 (mask, (__m512i) a, (__m512i) b);
}
+static_always_inline u8x64
+u8x64_permute (u8x64 idx, u8x64 a)
+{
+ return (u8x64) _mm512_permutexvar_epi8 ((__m512i) idx, (__m512i) a);
+}
+
+static_always_inline u8x64
+u8x64_permute2 (u8x64 idx, u8x64 a, u8x64 b)
+{
+ return (u8x64) _mm512_permutex2var_epi8 ((__m512i) a, (__m512i) idx,
+ (__m512i) b);
+}
+
#define _(t, m, e, p, it) \
static_always_inline m t##_is_equal_mask (t a, t b) \
{ \
@@ -298,6 +376,27 @@ _ (u32x16, u16, epu32, _mm512, __m512i)
_ (u64x8, u8, epu64, _mm512, __m512i)
#undef _
+#define _(t, m, e, p, it) \
+ static_always_inline m t##_is_not_equal_mask (t a, t b) \
+ { \
+ return p##_cmpneq_##e##_mask ((it) a, (it) b); \
+ }
+_ (u8x16, u16, epu8, _mm, __m128i)
+_ (u16x8, u8, epu16, _mm, __m128i)
+_ (u32x4, u8, epu32, _mm, __m128i)
+_ (u64x2, u8, epu64, _mm, __m128i)
+
+_ (u8x32, u32, epu8, _mm256, __m256i)
+_ (u16x16, u16, epu16, _mm256, __m256i)
+_ (u32x8, u8, epu32, _mm256, __m256i)
+_ (u64x4, u8, epu64, _mm256, __m256i)
+
+_ (u8x64, u64, epu8, _mm512, __m512i)
+_ (u16x32, u32, epu16, _mm512, __m512i)
+_ (u32x16, u16, epu32, _mm512, __m512i)
+_ (u64x8, u8, epu64, _mm512, __m512i)
+#undef _
+
#define _(f, t, fn, it) \
static_always_inline t t##_from_##f (f x) { return (t) fn ((it) x); }
_ (u16x16, u32x16, _mm512_cvtepi16_epi32, __m256i)
@@ -338,9 +437,17 @@ _ (u8x16, u16, _mm, __m128i, epi8)
#ifdef CLIB_HAVE_VEC256
#define CLIB_HAVE_VEC256_COMPRESS
+#ifdef __AVX512VBMI2__
+#define CLIB_HAVE_VEC256_COMPRESS_U8_U16
+#endif
+
#endif
#ifdef CLIB_HAVE_VEC512
#define CLIB_HAVE_VEC512_COMPRESS
+#ifdef __AVX512VBMI2__
+#define CLIB_HAVE_VEC512_COMPRESS_U8_U16
+#endif
+
#endif
#ifndef __AVX512VBMI2__
@@ -357,17 +464,23 @@ u16x8_compress (u16x8 v, u8 mask)
}
#endif
+static_always_inline u64
+u64x8_hxor (u64x8 v)
+{
+ v ^= u64x8_align_right (v, v, 4);
+ v ^= u64x8_align_right (v, v, 2);
+ return v[0] ^ v[1];
+}
+
static_always_inline void
u32x16_transpose (u32x16 m[16])
{
__m512i r[16], a, b, c, d, x, y;
- /* *INDENT-OFF* */
__m512i pm1 = (__m512i) (u64x8) { 0, 1, 8, 9, 4, 5, 12, 13};
__m512i pm2 = (__m512i) (u64x8) { 2, 3, 10, 11, 6, 7, 14, 15};
__m512i pm3 = (__m512i) (u64x8) { 0, 1, 2, 3, 8, 9, 10, 11};
__m512i pm4 = (__m512i) (u64x8) { 4, 5, 6, 7, 12, 13, 14, 15};
- /* *INDENT-ON* */
r[0] = _mm512_unpacklo_epi32 ((__m512i) m[0], (__m512i) m[1]);
r[1] = _mm512_unpacklo_epi32 ((__m512i) m[2], (__m512i) m[3]);
@@ -447,12 +560,10 @@ u64x8_transpose (u64x8 m[8])
{
__m512i r[8], x, y;
- /* *INDENT-OFF* */
__m512i pm1 = (__m512i) (u64x8) { 0, 1, 8, 9, 4, 5, 12, 13};
__m512i pm2 = (__m512i) (u64x8) { 2, 3, 10, 11, 6, 7, 14, 15};
__m512i pm3 = (__m512i) (u64x8) { 0, 1, 2, 3, 8, 9, 10, 11};
__m512i pm4 = (__m512i) (u64x8) { 4, 5, 6, 7, 12, 13, 14, 15};
- /* *INDENT-ON* */
r[0] = _mm512_unpacklo_epi64 ((__m512i) m[0], (__m512i) m[1]);
r[1] = _mm512_unpacklo_epi64 ((__m512i) m[2], (__m512i) m[3]);
@@ -482,6 +593,18 @@ u64x8_transpose (u64x8 m[8])
m[7] = (u64x8) _mm512_permutex2var_epi64 (x, pm4, y);
}
+static_always_inline u8x64
+u8x64_load_partial (u8 *data, uword n)
+{
+ return u8x64_mask_load_zero (data, pow2_mask (n));
+}
+
+static_always_inline void
+u8x64_store_partial (u8x64 r, u8 *data, uword n)
+{
+ u8x64_mask_store (r, data, pow2_mask (n));
+}
+
#endif /* included_vector_avx512_h */
/*
* fd.io coding-style-patch-verification: ON
diff --git a/src/vppinfra/vector_neon.h b/src/vppinfra/vector_neon.h
index 70b05c60884..48644ddbd98 100644
--- a/src/vppinfra/vector_neon.h
+++ b/src/vppinfra/vector_neon.h
@@ -43,7 +43,6 @@ u8x16_compare_byte_mask (u8x16 v)
return (u32) (vgetq_lane_u64 (x64, 0) + (vgetq_lane_u64 (x64, 1) << 8));
}
-/* *INDENT-OFF* */
#define foreach_neon_vec128i \
_(i,8,16,s8) _(i,16,8,s16) _(i,32,4,s32) _(i,64,2,s64)
#define foreach_neon_vec128u \
@@ -88,12 +87,6 @@ u8x16_compare_byte_mask (u8x16 v)
return u8x16_compare_byte_mask (v); \
} \
\
- static_always_inline u##s##x##c t##s##x##c##_is_greater (t##s##x##c a, \
- t##s##x##c b) \
- { \
- return (u##s##x##c) vcgtq_##i (a, b); \
- } \
- \
static_always_inline t##s##x##c t##s##x##c##_add_saturate (t##s##x##c a, \
t##s##x##c b) \
{ \
@@ -115,7 +108,6 @@ u8x16_compare_byte_mask (u8x16 v)
foreach_neon_vec128i foreach_neon_vec128u
#undef _
-/* *INDENT-ON* */
static_always_inline u16x8
u16x8_byte_swap (u16x8 v)
@@ -129,12 +121,6 @@ u32x4_byte_swap (u32x4 v)
return (u32x4) vrev32q_u8 ((u8x16) v);
}
-static_always_inline u8x16
-u8x16_shuffle (u8x16 v, u8x16 m)
-{
- return (u8x16) vqtbl1q_u8 (v, m);
-}
-
static_always_inline u32x4
u32x4_hadd (u32x4 v1, u32x4 v2)
{
@@ -211,6 +197,18 @@ u32x4_min_scalar (u32x4 v)
#define u8x16_word_shift_left(x,n) vextq_u8(u8x16_splat (0), x, 16 - n)
#define u8x16_word_shift_right(x,n) vextq_u8(x, u8x16_splat (0), n)
+always_inline u32x4
+u32x4_interleave_hi (u32x4 a, u32x4 b)
+{
+ return (u32x4) vzip2q_u32 (a, b);
+}
+
+always_inline u32x4
+u32x4_interleave_lo (u32x4 a, u32x4 b)
+{
+ return (u32x4) vzip1q_u32 (a, b);
+}
+
static_always_inline u8x16
u8x16_reflect (u8x16 v)
{
@@ -231,6 +229,61 @@ __asm__ ("eor3 %0.16b,%1.16b,%2.16b,%3.16b": "=w" (r): "0" (a), "w" (b), "w" (c)
return a ^ b ^ c;
}
+static_always_inline u8x16
+u8x16_load_partial (u8 *data, uword n)
+{
+ u8x16 r = {};
+ if (n > 7)
+ {
+ u64x2 r;
+ r[1] = *(u64u *) (data + n - 8);
+ r >>= (16 - n) * 8;
+ r[0] = *(u64u *) data;
+ return (u8x16) r;
+ }
+ else if (n > 3)
+ {
+ u32x4 r = {};
+ r[1] = *(u32u *) (data + n - 4);
+ r >>= (8 - n) * 8;
+ r[0] = *(u32u *) data;
+ return (u8x16) r;
+ }
+ else if (n > 1)
+ {
+ u16x8 r = {};
+ r[1] = *(u16u *) (data + n - 2);
+ r >>= (4 - n) * 8;
+ r[0] = *(u16u *) data;
+ return (u8x16) r;
+ }
+ else if (n > 0)
+ r[0] = *data;
+ return r;
+}
+
+static_always_inline void
+u8x16_store_partial (u8x16 r, u8 *data, uword n)
+{
+ if (n > 7)
+ {
+ *(u64u *) (data + n - 8) = ((u64x2) r)[1] << ((16 - n) * 8);
+ *(u64u *) data = ((u64x2) r)[0];
+ }
+ else if (n > 3)
+ {
+ *(u32u *) (data + n - 4) = ((u32x4) r)[1] << ((8 - n) * 8);
+ *(u32u *) data = ((u32x4) r)[0];
+ }
+ else if (n > 1)
+ {
+ *(u16u *) (data + n - 2) = ((u16x8) r)[1] << ((4 - n) * 8);
+ *(u16u *) data = ((u16x8) r)[0];
+ }
+ else if (n > 0)
+ data[0] = r[0];
+}
+
#define CLIB_HAVE_VEC128_MSB_MASK
#define CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE
diff --git a/src/vppinfra/vector_sse42.h b/src/vppinfra/vector_sse42.h
index 7e75ad28710..58d5da90125 100644
--- a/src/vppinfra/vector_sse42.h
+++ b/src/vppinfra/vector_sse42.h
@@ -41,7 +41,6 @@
#include <vppinfra/error_bootstrap.h> /* for ASSERT */
#include <x86intrin.h>
-/* *INDENT-OFF* */
#define foreach_sse42_vec128i \
_(i,8,16,epi8) _(i,16,8,epi16) _(i,32,4,epi32) _(i,64,2,epi64x)
#define foreach_sse42_vec128u \
@@ -92,7 +91,6 @@ t##s##x##c##_max (t##s##x##c a, t##s##x##c b) \
_(i,8,16,epi8) _(i,16,8,epi16) _(i,32,4,epi32) _(i,64,2,epi64)
_(u,8,16,epu8) _(u,16,8,epu16) _(u,32,4,epu32) _(u,64,2,epu64)
#undef _
-/* *INDENT-ON* */
#define CLIB_VEC128_SPLAT_DEFINED
#define CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE
@@ -411,26 +409,7 @@ u32x4_sum_elts (u32x4 sum4)
return sum4[0];
}
-static_always_inline u8x16
-u8x16_shuffle (u8x16 v, u8x16 m)
-{
- return (u8x16) _mm_shuffle_epi8 ((__m128i) v, (__m128i) m);
-}
-
-static_always_inline u32x4
-u32x4_shuffle (u32x4 v, const int a, const int b, const int c, const int d)
-{
-#if defined(__clang__) || !__OPTIMIZE__
- u32x4 r = { v[a], v[b], v[c], v[d] };
- return r;
-#else
- return (u32x4) _mm_shuffle_epi32 ((__m128i) v,
- a | b << 2 | c << 4 | d << 6);
-#endif
-}
-
/* _from_ */
-/* *INDENT-OFF* */
#define _(f,t,i) \
static_always_inline t \
t##_from_##f (f x) \
@@ -450,7 +429,6 @@ _(i16x8, i32x4, epi16_epi32)
_(i16x8, i64x2, epi16_epi64)
_(i32x4, i64x2, epi32_epi64)
#undef _
-/* *INDENT-ON* */
static_always_inline u64x2
u64x2_gather (void *p0, void *p1)
@@ -496,12 +474,6 @@ u32x4_scatter_one (u32x4 r, int index, void *p)
}
static_always_inline u8x16
-u8x16_is_greater (u8x16 v1, u8x16 v2)
-{
- return (u8x16) _mm_cmpgt_epi8 ((__m128i) v1, (__m128i) v2);
-}
-
-static_always_inline u8x16
u8x16_blend (u8x16 v1, u8x16 v2, u8x16 mask)
{
return (u8x16) _mm_blendv_epi8 ((__m128i) v1, (__m128i) v2, (__m128i) mask);
@@ -517,6 +489,68 @@ u8x16_xor3 (u8x16 a, u8x16 b, u8x16 c)
return a ^ b ^ c;
}
+static_always_inline u8x16
+u8x16_load_partial (u8 *data, uword n)
+{
+ u8x16 r = {};
+#if defined(CLIB_HAVE_VEC128_MASK_LOAD_STORE)
+ return u8x16_mask_load_zero (data, pow2_mask (n));
+#endif
+ if (n > 7)
+ {
+ u64x2 r;
+ r[1] = *(u64u *) (data + n - 8);
+ r >>= (16 - n) * 8;
+ r[0] = *(u64u *) data;
+ return (u8x16) r;
+ }
+ else if (n > 3)
+ {
+ u32x4 r = {};
+ r[1] = *(u32u *) (data + n - 4);
+ r >>= (8 - n) * 8;
+ r[0] = *(u32u *) data;
+ return (u8x16) r;
+ }
+ else if (n > 1)
+ {
+ u16x8 r = {};
+ r[1] = *(u16u *) (data + n - 2);
+ r >>= (4 - n) * 8;
+ r[0] = *(u16u *) data;
+ return (u8x16) r;
+ }
+ else if (n > 0)
+ r[0] = *data;
+ return r;
+}
+
+static_always_inline void
+u8x16_store_partial (u8x16 r, u8 *data, uword n)
+{
+#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u8x16_mask_store (r, data, pow2_mask (n));
+#else
+ if (n > 7)
+ {
+ *(u64u *) (data + n - 8) = ((u64x2) r)[1] << ((16 - n) * 8);
+ *(u64u *) data = ((u64x2) r)[0];
+ }
+ else if (n > 3)
+ {
+ *(u32u *) (data + n - 4) = ((u32x4) r)[1] << ((8 - n) * 8);
+ *(u32u *) data = ((u32x4) r)[0];
+ }
+ else if (n > 1)
+ {
+ *(u16u *) (data + n - 2) = ((u16x8) r)[1] << ((4 - n) * 8);
+ *(u16u *) data = ((u16x8) r)[0];
+ }
+ else if (n > 0)
+ data[0] = r[0];
+#endif
+}
+
#endif /* included_vector_sse2_h */
/*